Files
Home_assistant/backend/app/llm/client.py
T
2026-06-16 04:38:23 +00:00

375 lines
13 KiB
Python

import json
import logging
from collections.abc import AsyncIterator
from typing import Any
from openai import AsyncOpenAI
from app.config import get_settings
logger = logging.getLogger(__name__)
class LLMClient:
def __init__(self) -> None:
settings = get_settings()
self.tools_enabled = settings.openrouter_tools_enabled
self.client = AsyncOpenAI(
api_key=settings.openrouter_api_key,
base_url=settings.openrouter_base_url,
)
def _runtime(self) -> tuple[str, str, str]:
from app.db.base import SessionLocal
from app.settings.service import SettingsService
settings = get_settings()
db = SessionLocal()
try:
svc = SettingsService(db)
model = str(svc.get_effective("openrouter_model"))
extract = str(svc.get_effective("memory_extract_model"))
effort = str(svc.get_effective("openrouter_reasoning_effort")).strip().lower()
return model, extract, effort
finally:
db.close()
def _vision_model_runtime(self) -> str:
from app.db.base import SessionLocal
from app.settings.service import SettingsService
db = SessionLocal()
try:
return str(SettingsService(db).get_effective("openrouter_vision_model"))
finally:
db.close()
@property
def model(self) -> str:
return self._runtime()[0]
@property
def memory_extract_model(self) -> str:
return self._runtime()[1]
@property
def reasoning_effort(self) -> str:
return self._runtime()[2]
@property
def vision_model(self) -> str:
return self._vision_model_runtime()
def _reasoning_extra_body(self) -> dict[str, Any] | None:
if not self.reasoning_effort:
return None
if self.reasoning_effort == "none":
return {"reasoning": {"effort": "none", "exclude": True}}
return {"reasoning": {"effort": self.reasoning_effort}}
@staticmethod
def _delta_reasoning(delta: Any) -> tuple[str, list[Any]]:
parts: list[str] = []
for attr in ("reasoning", "reasoning_content"):
value = getattr(delta, attr, None)
if value:
parts.append(str(value))
details: list[Any] = []
raw_details = getattr(delta, "reasoning_details", None)
if raw_details:
if isinstance(raw_details, list):
details.extend(raw_details)
else:
details.append(raw_details)
return "".join(parts), details
@staticmethod
def _normalize_reasoning_details(details: Any) -> list[Any] | None:
if not details:
return None
items = details if isinstance(details, list) else [details]
normalized: list[Any] = []
for item in items:
if hasattr(item, "model_dump"):
normalized.append(item.model_dump())
elif isinstance(item, dict):
normalized.append(item)
else:
normalized.append(item)
return normalized or None
@staticmethod
def attach_reasoning_to_message(
message: dict[str, Any],
*,
reasoning: str = "",
reasoning_details: list[Any] | None = None,
) -> dict[str, Any]:
if reasoning:
message["reasoning"] = reasoning
message["reasoning_content"] = reasoning
normalized = LLMClient._normalize_reasoning_details(reasoning_details)
if normalized:
message["reasoning_details"] = normalized
return message
async def stream_chat(
self,
messages: list[dict[str, Any]],
tools: list[dict[str, Any]] | None = None,
*,
model: str | None = None,
) -> AsyncIterator[dict[str, Any]]:
use_tools = bool(tools) and self.tools_enabled
kwargs: dict[str, Any] = {
"model": model or self.model,
"messages": messages,
"stream": True,
"temperature": 0.7,
}
if use_tools:
kwargs["tools"] = tools
extra_body = self._reasoning_extra_body()
if extra_body:
kwargs["extra_body"] = extra_body
try:
stream = await self.client.chat.completions.create(**kwargs)
except Exception as exc:
logger.exception("LLM stream failed: %s", exc)
yield {"type": "error", "content": str(exc)}
yield {"type": "done", "finish_reason": "error"}
return
tool_calls: dict[int, dict[str, Any]] = {}
reasoning_parts: list[str] = []
reasoning_details: list[Any] = []
try:
async for chunk in stream:
if not chunk.choices:
continue
choice = chunk.choices[0]
delta = choice.delta
if delta.content:
yield {"type": "content", "content": delta.content}
reasoning_text, details = self._delta_reasoning(delta)
if reasoning_text:
reasoning_parts.append(reasoning_text)
if details:
reasoning_details.extend(details)
if delta.tool_calls:
for tool_call in delta.tool_calls:
idx = tool_call.index
if idx not in tool_calls:
tool_calls[idx] = {
"id": tool_call.id or "",
"type": "function",
"function": {"name": "", "arguments": ""},
}
if tool_call.id:
tool_calls[idx]["id"] = tool_call.id
if tool_call.function:
if tool_call.function.name:
tool_calls[idx]["function"]["name"] = tool_call.function.name
if tool_call.function.arguments:
tool_calls[idx]["function"]["arguments"] += tool_call.function.arguments
usage = getattr(chunk, "usage", None)
if usage is not None:
logger.info(
"LLM stream usage: prompt=%s completion=%s total=%s",
getattr(usage, "prompt_tokens", None),
getattr(usage, "completion_tokens", None),
getattr(usage, "total_tokens", None),
)
if choice.finish_reason:
reasoning = "".join(reasoning_parts)
normalized_details = self._normalize_reasoning_details(reasoning_details)
if reasoning or normalized_details:
yield {
"type": "reasoning",
"reasoning": reasoning,
"reasoning_details": normalized_details,
}
if tool_calls:
yield {"type": "tool_calls", "tool_calls": list(tool_calls.values())}
logger.info(
"LLM stream done: model=%s finish_reason=%s tool_calls=%d "
"content_in_stream=%d reasoning_len=%d",
model or self.model,
choice.finish_reason,
len(tool_calls),
len(reasoning_parts),
len(reasoning),
)
yield {"type": "done", "finish_reason": choice.finish_reason}
except Exception as exc:
logger.exception("LLM stream read failed: %s", exc)
yield {"type": "error", "content": str(exc)}
yield {"type": "done", "finish_reason": "error"}
async def complete(
self,
messages: list[dict[str, Any]],
tools: list[dict[str, Any]] | None = None,
*,
temperature: float = 0.7,
model: str | None = None,
for_extraction: bool = False,
visible_reply: bool = False,
) -> dict[str, Any]:
use_tools = bool(tools) and self.tools_enabled and not for_extraction
kwargs: dict[str, Any] = {
"model": model or self.model,
"messages": messages,
"temperature": temperature,
}
if use_tools:
kwargs["tools"] = tools
if for_extraction:
kwargs["extra_body"] = {"reasoning": {"effort": "none"}}
else:
extra_body = self._reasoning_extra_body()
if extra_body:
kwargs["extra_body"] = extra_body
response = await self.client.chat.completions.create(**kwargs)
usage = getattr(response, "usage", None)
if usage is not None:
logger.info(
"LLM complete usage: prompt=%s completion=%s total=%s model=%s",
getattr(usage, "prompt_tokens", None),
getattr(usage, "completion_tokens", None),
getattr(usage, "total_tokens", None),
kwargs.get("model"),
)
message = response.choices[0].message
content = message.content or ""
reasoning = ""
for attr in ("reasoning", "reasoning_content"):
value = getattr(message, attr, None)
if value:
reasoning = str(value)
break
if not content and reasoning and not visible_reply:
content = reasoning
result: dict[str, Any] = {
"content": content,
"tool_calls": [],
"reasoning": reasoning,
"reasoning_details": getattr(message, "reasoning_details", None),
}
if message.tool_calls:
result["tool_calls"] = [
{
"id": tc.id,
"type": "function",
"function": {
"name": tc.function.name,
"arguments": tc.function.arguments,
},
}
for tc in message.tool_calls
]
return result
async def complete_vision(
self,
messages: list[dict[str, Any]],
*,
temperature: float = 0.1,
model: str | None = None,
) -> dict[str, Any]:
use_model = model or self.vision_model
kwargs: dict[str, Any] = {
"model": use_model,
"messages": messages,
"temperature": temperature,
"extra_body": {"reasoning": {"effort": "none", "exclude": True}},
}
response = await self.client.chat.completions.create(**kwargs)
usage = getattr(response, "usage", None)
usage_dict: dict[str, Any] = {}
if usage is not None:
usage_dict = {
"prompt_tokens": getattr(usage, "prompt_tokens", None),
"completion_tokens": getattr(usage, "completion_tokens", None),
"total_tokens": getattr(usage, "total_tokens", None),
}
logger.info(
"LLM vision usage: prompt=%s completion=%s total=%s model=%s",
usage_dict.get("prompt_tokens"),
usage_dict.get("completion_tokens"),
usage_dict.get("total_tokens"),
use_model,
)
message = response.choices[0].message
return {
"content": message.content or "",
"model": use_model,
"usage": usage_dict,
}
@staticmethod
def parse_tool_arguments(arguments: str) -> dict[str, Any]:
if not arguments:
return {}
try:
return json.loads(arguments)
except json.JSONDecodeError:
return {}
@staticmethod
def serialize_reasoning(
*,
reasoning: str = "",
reasoning_details: list[Any] | None = None,
) -> str | None:
payload: dict[str, Any] = {}
if reasoning:
payload["reasoning"] = reasoning
payload["reasoning_content"] = reasoning
if reasoning_details:
payload["reasoning_details"] = reasoning_details
if not payload:
return None
return json.dumps(payload, ensure_ascii=False)
@staticmethod
def deserialize_reasoning(raw: str | None) -> dict[str, Any]:
if not raw:
return {}
try:
data = json.loads(raw)
except json.JSONDecodeError:
return {"reasoning": raw}
if isinstance(data, str):
return {"reasoning": data, "reasoning_content": data}
if isinstance(data, dict):
return data
return {}
async def embed(self, texts: list[str]) -> list[list[float]]:
settings = get_settings()
if not texts:
return []
response = await self.client.embeddings.create(
model=settings.embedding_model,
input=texts,
)
return [item.embedding for item in response.data]