import inspect
from typing import Any, TypeVar
from browser_use.llm.messages import AssistantMessage, BaseMessage, SystemMessage, UserMessage
from browser_use.llm.views import ChatInvokeCompletion, ChatInvokeUsage
from any_llm import acompletion
T = TypeVar("T")
ANY_LLM_RUNTIME_KWARG_NAMES = {
name
for name, parameter in inspect.signature(acompletion).parameters.items()
if parameter.kind not in {inspect.Parameter.VAR_POSITIONAL, inspect.Parameter.VAR_KEYWORD}
and name not in {"model", "messages", "response_format", "temperature", "provider", "api_key", "api_base"}
}
BROWSER_USE_KWARG_MAP = {"session_id": "session_label"}
def to_any_llm_message(message: BaseMessage) -> dict[str, Any]:
if isinstance(message, SystemMessage):
return {"role": "system", "content": message.content}
if isinstance(message, UserMessage):
if isinstance(message.content, str):
return {"role": "user", "content": message.content}
content: list[dict[str, Any]] = []
for part in message.content:
if part.type == "text":
content.append({"type": "text", "text": part.text})
elif part.type == "image_url":
image_url: dict[str, Any] = {"url": part.image_url.url}
if part.image_url.detail is not None:
image_url["detail"] = part.image_url.detail
content.append({"type": "image_url", "image_url": image_url})
return {"role": "user", "content": content}
if isinstance(message, AssistantMessage):
assistant_message: dict[str, Any] = {
"role": "assistant",
"content": message.content or "",
}
if message.tool_calls:
assistant_message["tool_calls"] = [
{
"id": call.id,
"type": "function",
"function": {
"name": call.function.name,
"arguments": call.function.arguments,
},
}
for call in message.tool_calls
]
return assistant_message
message_type = type(message)
error_message = f"Unsupported message type: {message_type!r}"
raise TypeError(error_message)
def to_usage(response: Any) -> ChatInvokeUsage | None:
usage = getattr(response, "usage", None)
if usage is None:
return None
prompt_tokens_details = getattr(usage, "prompt_tokens_details", None)
cached_tokens = None
if prompt_tokens_details is not None:
cached_tokens = prompt_tokens_details.cached_tokens
return ChatInvokeUsage(
prompt_tokens=usage.prompt_tokens or 0,
prompt_cached_tokens=cached_tokens,
prompt_cache_creation_tokens=None,
prompt_image_tokens=None,
completion_tokens=usage.completion_tokens or 0,
total_tokens=usage.total_tokens or 0,
)
def to_any_llm_runtime_kwargs(kwargs: dict[str, Any]) -> dict[str, Any]:
filtered_kwargs: dict[str, Any] = {}
for key, value in kwargs.items():
mapped_key = BROWSER_USE_KWARG_MAP.get(key, key)
if mapped_key in ANY_LLM_RUNTIME_KWARG_NAMES:
filtered_kwargs[mapped_key] = value
return filtered_kwargs
class BrowserUseAnyLLM:
# browser-use checks this attribute to skip its own key validation step;
# credential handling is delegated to any-llm.
_verified_api_keys: bool = True
def __init__(
self,
*,
model: str,
provider: str | None = None,
api_key: str | None = None,
api_base: str | None = None,
temperature: float | None = 0.0,
**model_kwargs: Any,
) -> None:
self.model = model
self.temperature = temperature
self._provider = provider or model.split(":", 1)[0]
self._completion_kwargs: dict[str, Any] = dict(model_kwargs)
if provider is not None:
self._completion_kwargs["provider"] = provider
if api_key is not None:
self._completion_kwargs["api_key"] = api_key
if api_base is not None:
self._completion_kwargs["api_base"] = api_base
@property
def provider(self) -> str:
return self._provider
@property
def name(self) -> str:
return self.model
@property
def model_name(self) -> str:
return self.model
async def ainvoke(
self,
messages: list[BaseMessage],
output_format: type[T] | None = None,
**kwargs: Any,
) -> ChatInvokeCompletion[T] | ChatInvokeCompletion[str]:
runtime_kwargs = to_any_llm_runtime_kwargs(kwargs)
response = await acompletion(
model=self.model,
messages=[to_any_llm_message(message) for message in messages],
temperature=self.temperature,
response_format=output_format,
**self._completion_kwargs,
**runtime_kwargs,
)
usage = to_usage(response)
if output_format is None:
completion = response.choices[0].message.content or ""
return ChatInvokeCompletion(completion=completion, usage=usage)
parsed = response.choices[0].message.parsed
if parsed is None:
msg = "Expected structured browser-use output, but the model returned no parsed payload."
raise ValueError(msg)
return ChatInvokeCompletion(completion=parsed, usage=usage)