ChatAIBot/services/llm.py

import httpx
import json
import logging
import os
from dotenv import load_dotenv

load_dotenv()

logger = logging.getLogger(__name__)

OPENROUTER_KEY = os.getenv("ROUTER_KEY")
OPENROUTER_URL = "https://openrouter.ai/api/v1/chat/completions"

CHAT_MODEL   = os.getenv("CHAT_MODEL",   "mistralai/mistral-nemo")
SYSTEM_MODEL = os.getenv("SYSTEM_MODEL", "google/gemini-2.5-flash")
# Softer model when primary returns content_filter / empty / API errors (default: CHAT_MODEL).
LLM_FALLBACK_MODEL = (os.getenv("LLM_FALLBACK_MODEL") or "").strip() or CHAT_MODEL

HEADERS = {
    "Authorization": f"Bearer {OPENROUTER_KEY}",
    "Content-Type": "application/json",
    "HTTP-Referer": "http://localhost:8000",
}


class LLMError(Exception):
    """OpenRouter returned an error or an unexpected response shape."""


def _parse_completion_body(data: dict) -> str:
    if not isinstance(data, dict):
        raise LLMError(f"Invalid API response: expected object, got {type(data).__name__}")

    if data.get("error"):
        err = data["error"]
        if isinstance(err, dict):
            msg = err.get("message") or str(err)
            code = err.get("code")
        else:
            msg = str(err)
            code = None
        suffix = f" (code={code})" if code is not None else ""
        raise LLMError(f"OpenRouter error{suffix}: {msg}")

    choices = data.get("choices")
    if not choices:
        preview = str(data)[:400]
        raise LLMError(f"OpenRouter response has no 'choices'. Body preview: {preview}")

    first = choices[0] if isinstance(choices[0], dict) else {}
    message = first.get("message") or {}
    if not isinstance(message, dict):
        raise LLMError("OpenRouter choice has no message object")

    finish = first.get("finish_reason") or ""
    native_finish = first.get("native_finish_reason") or ""
    blocked_reasons = {"content_filter", "safety", "moderation"}
    if finish in blocked_reasons or str(native_finish).upper() in (
        "PROHIBITED_CONTENT",
        "SAFETY",
        "BLOCKED",
    ):
        raise LLMError(
            f"Content blocked by provider (finish_reason={finish}, native={native_finish})"
        )

    content = message.get("content")
    if content is not None and str(content).strip():
        return str(content)

    refusal = message.get("refusal")
    if refusal:
        raise LLMError(f"Model refused the request: {refusal}")

    if finish and finish not in ("stop", "length", "tool_calls", "function_call"):
        raise LLMError(
            f"OpenRouter finished without content (finish_reason={finish}, native={native_finish})"
        )

    raise LLMError("OpenRouter returned empty message content")


def _clean(messages: list) -> list:
    """Filter out messages with empty content."""
    return [m for m in messages if (m.get("content") or "").strip()]


async def _post_once(model: str, messages: list, extra: dict | None = None) -> str:
    if not OPENROUTER_KEY:
        raise LLMError("ROUTER_KEY is not set in environment")

    payload = {"model": model, "messages": _clean(messages), **(extra or {})}
    async with httpx.AsyncClient(timeout=90) as client:
        r = await client.post(OPENROUTER_URL, headers=HEADERS, json=payload)
        try:
            data = r.json()
        except Exception as e:
            raise LLMError(f"Non-JSON response (HTTP {r.status_code}): {r.text[:300]}") from e

        if r.status_code >= 400:
            try:
                _parse_completion_body(data)
            except LLMError:
                raise
            raise LLMError(f"HTTP {r.status_code}: {data}")

        try:
            return _parse_completion_body(data)
        except LLMError:
            logger.warning(
                "OpenRouter completion failed model=%s status=%s body=%.500s",
                model,
                r.status_code,
                data,
            )
            raise


async def _post(model: str, messages: list, extra: dict | None = None) -> str:
    """POST completion; on failure retries once with LLM_FALLBACK_MODEL (usually CHAT_MODEL)."""
    try:
        return await _post_once(model, messages, extra)
    except LLMError as primary_err:
        fallback = LLM_FALLBACK_MODEL
        if not fallback or fallback == model:
            raise
        logger.info(
            "LLM fallback: %s failed (%s) → retrying with %s",
            model,
            primary_err,
            fallback,
        )
        try:
            return await _post_once(fallback, messages, extra)
        except LLMError as fallback_err:
            raise LLMError(
                f"{primary_err} (fallback {fallback} also failed: {fallback_err})"
            ) from fallback_err


async def send_message(messages: list) -> str:
    """SYSTEM_MODEL with automatic fallback to LLM_FALLBACK_MODEL."""
    return await _post(SYSTEM_MODEL, messages)


async def send_message_with_model(messages: list, model: str) -> str:
    """Named model (RPG_*, SD_*) with automatic fallback to LLM_FALLBACK_MODEL."""
    return await _post(model, messages)


async def stream_message(messages: list):
    """Chat model stream — roleplay dialogue."""
    payload = {
        "model": CHAT_MODEL,
        "messages": _clean(messages),
        "stream": True,
    }
    timeout = httpx.Timeout(connect=10, read=120, write=10, pool=5)
    chunk_count = 0
    async with httpx.AsyncClient(timeout=timeout) as client:
        try:
            async with client.stream("POST", OPENROUTER_URL, headers=HEADERS, json=payload) as response:
                response.raise_for_status()
                buf = ""
                async for raw in response.aiter_bytes():
                    text = raw.decode("utf-8", errors="replace")
                    if not buf and chunk_count == 0:
                        logger.info("stream first bytes: %.200s", text)
                    buf += text
                    while "\n" in buf:
                        line, buf = buf.split("\n", 1)
                        line = line.rstrip("\r")
                        if not line.startswith("data: "):
                            continue
                        data = line[6:]
                        if data == "[DONE]":
                            return
                        try:
                            chunk = json.loads(data)
                            if chunk.get("error"):
                                err = chunk["error"]
                                msg = err.get("message", err) if isinstance(err, dict) else err
                                raise LLMError(f"OpenRouter stream error: {msg}")
                            choices = chunk.get("choices") or []
                            if not choices:
                                continue
                            content = (choices[0].get("delta") or {}).get("content", "")
                            if content:
                                chunk_count += 1
                                yield content
                        except LLMError:
                            raise
                        except Exception:
                            continue
        except Exception as e:
            logger.error("stream_message error after %d chunks: %s", chunk_count, e)
            raise
        finally:
            logger.info("stream_message finished: %d chunks", chunk_count)