Home_assistant/backend/app/llm/client.py

import json
import logging
from collections.abc import AsyncIterator
from typing import Any

from openai import AsyncOpenAI

from app.config import get_settings

logger = logging.getLogger(__name__)


class LLMClient:
    def __init__(self) -> None:
        settings = get_settings()
        self.tools_enabled = settings.openrouter_tools_enabled
        self.client = AsyncOpenAI(
            api_key=settings.openrouter_api_key,
            base_url=settings.openrouter_base_url,
        )

    def _runtime(self) -> tuple[str, str, str]:
        from app.db.base import SessionLocal
        from app.settings.service import SettingsService

        settings = get_settings()
        db = SessionLocal()
        try:
            svc = SettingsService(db)
            model = str(svc.get_effective("openrouter_model"))
            extract = str(svc.get_effective("memory_extract_model"))
            effort = str(svc.get_effective("openrouter_reasoning_effort")).strip().lower()
            return model, extract, effort
        finally:
            db.close()

    def _vision_model_runtime(self) -> str:
        from app.db.base import SessionLocal
        from app.settings.service import SettingsService

        db = SessionLocal()
        try:
            return str(SettingsService(db).get_effective("openrouter_vision_model"))
        finally:
            db.close()

    @property
    def model(self) -> str:
        return self._runtime()[0]

    @property
    def memory_extract_model(self) -> str:
        return self._runtime()[1]

    @property
    def reasoning_effort(self) -> str:
        return self._runtime()[2]

    @property
    def vision_model(self) -> str:
        return self._vision_model_runtime()

    def _reasoning_extra_body(self) -> dict[str, Any] | None:
        if not self.reasoning_effort:
            return None
        if self.reasoning_effort == "none":
            return {"reasoning": {"effort": "none", "exclude": True}}
        return {"reasoning": {"effort": self.reasoning_effort}}

    @staticmethod
    def _delta_reasoning(delta: Any) -> tuple[str, list[Any]]:
        parts: list[str] = []
        for attr in ("reasoning", "reasoning_content"):
            value = getattr(delta, attr, None)
            if value:
                parts.append(str(value))

        details: list[Any] = []
        raw_details = getattr(delta, "reasoning_details", None)
        if raw_details:
            if isinstance(raw_details, list):
                details.extend(raw_details)
            else:
                details.append(raw_details)

        return "".join(parts), details

    @staticmethod
    def _normalize_reasoning_details(details: Any) -> list[Any] | None:
        if not details:
            return None
        items = details if isinstance(details, list) else [details]
        normalized: list[Any] = []
        for item in items:
            if hasattr(item, "model_dump"):
                normalized.append(item.model_dump())
            elif isinstance(item, dict):
                normalized.append(item)
            else:
                normalized.append(item)
        return normalized or None

    @staticmethod
    def attach_reasoning_to_message(
        message: dict[str, Any],
        *,
        reasoning: str = "",
        reasoning_details: list[Any] | None = None,
    ) -> dict[str, Any]:
        if reasoning:
            message["reasoning"] = reasoning
            message["reasoning_content"] = reasoning
        normalized = LLMClient._normalize_reasoning_details(reasoning_details)
        if normalized:
            message["reasoning_details"] = normalized
        return message

    async def stream_chat(
        self,
        messages: list[dict[str, Any]],
        tools: list[dict[str, Any]] | None = None,
        *,
        model: str | None = None,
    ) -> AsyncIterator[dict[str, Any]]:
        use_tools = bool(tools) and self.tools_enabled
        kwargs: dict[str, Any] = {
            "model": model or self.model,
            "messages": messages,
            "stream": True,
            "temperature": 0.7,
        }
        if use_tools:
            kwargs["tools"] = tools
        extra_body = self._reasoning_extra_body()
        if extra_body:
            kwargs["extra_body"] = extra_body

        try:
            stream = await self.client.chat.completions.create(**kwargs)
        except Exception as exc:
            logger.exception("LLM stream failed: %s", exc)
            yield {"type": "error", "content": str(exc)}
            yield {"type": "done", "finish_reason": "error"}
            return

        tool_calls: dict[int, dict[str, Any]] = {}
        reasoning_parts: list[str] = []
        reasoning_details: list[Any] = []

        try:
            async for chunk in stream:
                if not chunk.choices:
                    continue

                choice = chunk.choices[0]
                delta = choice.delta

                if delta.content:
                    yield {"type": "content", "content": delta.content}

                reasoning_text, details = self._delta_reasoning(delta)
                if reasoning_text:
                    reasoning_parts.append(reasoning_text)
                if details:
                    reasoning_details.extend(details)

                if delta.tool_calls:
                    for tool_call in delta.tool_calls:
                        idx = tool_call.index
                        if idx not in tool_calls:
                            tool_calls[idx] = {
                                "id": tool_call.id or "",
                                "type": "function",
                                "function": {"name": "", "arguments": ""},
                            }
                        if tool_call.id:
                            tool_calls[idx]["id"] = tool_call.id
                        if tool_call.function:
                            if tool_call.function.name:
                                tool_calls[idx]["function"]["name"] = tool_call.function.name
                            if tool_call.function.arguments:
                                tool_calls[idx]["function"]["arguments"] += tool_call.function.arguments

                usage = getattr(chunk, "usage", None)
                if usage is not None:
                    logger.info(
                        "LLM stream usage: prompt=%s completion=%s total=%s",
                        getattr(usage, "prompt_tokens", None),
                        getattr(usage, "completion_tokens", None),
                        getattr(usage, "total_tokens", None),
                    )

                if choice.finish_reason:
                    reasoning = "".join(reasoning_parts)
                    normalized_details = self._normalize_reasoning_details(reasoning_details)
                    if reasoning or normalized_details:
                        yield {
                            "type": "reasoning",
                            "reasoning": reasoning,
                            "reasoning_details": normalized_details,
                        }
                    if tool_calls:
                        yield {"type": "tool_calls", "tool_calls": list(tool_calls.values())}
                    logger.info(
                        "LLM stream done: model=%s finish_reason=%s tool_calls=%d "
                        "content_in_stream=%d reasoning_len=%d",
                        model or self.model,
                        choice.finish_reason,
                        len(tool_calls),
                        len(reasoning_parts),
                        len(reasoning),
                    )
                    yield {"type": "done", "finish_reason": choice.finish_reason}
        except Exception as exc:
            logger.exception("LLM stream read failed: %s", exc)
            yield {"type": "error", "content": str(exc)}
            yield {"type": "done", "finish_reason": "error"}

    async def complete(
        self,
        messages: list[dict[str, Any]],
        tools: list[dict[str, Any]] | None = None,
        *,
        temperature: float = 0.7,
        model: str | None = None,
        for_extraction: bool = False,
        visible_reply: bool = False,
    ) -> dict[str, Any]:
        use_tools = bool(tools) and self.tools_enabled and not for_extraction
        kwargs: dict[str, Any] = {
            "model": model or self.model,
            "messages": messages,
            "temperature": temperature,
        }
        if use_tools:
            kwargs["tools"] = tools
        if for_extraction:
            kwargs["extra_body"] = {"reasoning": {"effort": "none"}}
        else:
            extra_body = self._reasoning_extra_body()
            if extra_body:
                kwargs["extra_body"] = extra_body

        response = await self.client.chat.completions.create(**kwargs)
        usage = getattr(response, "usage", None)
        if usage is not None:
            logger.info(
                "LLM complete usage: prompt=%s completion=%s total=%s model=%s",
                getattr(usage, "prompt_tokens", None),
                getattr(usage, "completion_tokens", None),
                getattr(usage, "total_tokens", None),
                kwargs.get("model"),
            )
        message = response.choices[0].message

        content = message.content or ""
        reasoning = ""
        for attr in ("reasoning", "reasoning_content"):
            value = getattr(message, attr, None)
            if value:
                reasoning = str(value)
                break

        if not content and reasoning and not visible_reply:
            content = reasoning

        result: dict[str, Any] = {
            "content": content,
            "tool_calls": [],
            "reasoning": reasoning,
            "reasoning_details": getattr(message, "reasoning_details", None),
        }

        if message.tool_calls:
            result["tool_calls"] = [
                {
                    "id": tc.id,
                    "type": "function",
                    "function": {
                        "name": tc.function.name,
                        "arguments": tc.function.arguments,
                    },
                }
                for tc in message.tool_calls
            ]

        return result

    async def complete_vision(
        self,
        messages: list[dict[str, Any]],
        *,
        temperature: float = 0.1,
        model: str | None = None,
    ) -> dict[str, Any]:
        use_model = model or self.vision_model
        kwargs: dict[str, Any] = {
            "model": use_model,
            "messages": messages,
            "temperature": temperature,
            "extra_body": {"reasoning": {"effort": "none", "exclude": True}},
        }
        response = await self.client.chat.completions.create(**kwargs)
        usage = getattr(response, "usage", None)
        usage_dict: dict[str, Any] = {}
        if usage is not None:
            usage_dict = {
                "prompt_tokens": getattr(usage, "prompt_tokens", None),
                "completion_tokens": getattr(usage, "completion_tokens", None),
                "total_tokens": getattr(usage, "total_tokens", None),
            }
            logger.info(
                "LLM vision usage: prompt=%s completion=%s total=%s model=%s",
                usage_dict.get("prompt_tokens"),
                usage_dict.get("completion_tokens"),
                usage_dict.get("total_tokens"),
                use_model,
            )
        message = response.choices[0].message
        return {
            "content": message.content or "",
            "model": use_model,
            "usage": usage_dict,
        }

    @staticmethod
    def parse_tool_arguments(arguments: str) -> dict[str, Any]:
        if not arguments:
            return {}
        try:
            return json.loads(arguments)
        except json.JSONDecodeError:
            return {}

    @staticmethod
    def serialize_reasoning(
        *,
        reasoning: str = "",
        reasoning_details: list[Any] | None = None,
    ) -> str | None:
        payload: dict[str, Any] = {}
        if reasoning:
            payload["reasoning"] = reasoning
            payload["reasoning_content"] = reasoning
        if reasoning_details:
            payload["reasoning_details"] = reasoning_details
        if not payload:
            return None
        return json.dumps(payload, ensure_ascii=False)

    @staticmethod
    def deserialize_reasoning(raw: str | None) -> dict[str, Any]:
        if not raw:
            return {}
        try:
            data = json.loads(raw)
        except json.JSONDecodeError:
            return {"reasoning": raw}
        if isinstance(data, str):
            return {"reasoning": data, "reasoning_content": data}
        if isinstance(data, dict):
            return data
        return {}

    async def embed(self, texts: list[str]) -> list[list[float]]:
        settings = get_settings()
        if not texts:
            return []
        response = await self.client.embeddings.create(
            model=settings.embedding_model,
            input=texts,
        )
        return [item.embedding for item in response.data]