import httpx import json import logging import os from dotenv import load_dotenv load_dotenv() logger = logging.getLogger(__name__) OPENROUTER_KEY = os.getenv("ROUTER_KEY") OPENROUTER_URL = "https://openrouter.ai/api/v1/chat/completions" CHAT_MODEL = os.getenv("CHAT_MODEL", "mistralai/mistral-nemo") SYSTEM_MODEL = os.getenv("SYSTEM_MODEL", "google/gemini-2.5-flash") # Softer model when primary returns content_filter / empty / API errors (default: CHAT_MODEL). LLM_FALLBACK_MODEL = (os.getenv("LLM_FALLBACK_MODEL") or "").strip() or CHAT_MODEL HEADERS = { "Authorization": f"Bearer {OPENROUTER_KEY}", "Content-Type": "application/json", "HTTP-Referer": "http://localhost:8000", } class LLMError(Exception): """OpenRouter returned an error or an unexpected response shape.""" def _parse_completion_body(data: dict) -> str: if not isinstance(data, dict): raise LLMError(f"Invalid API response: expected object, got {type(data).__name__}") if data.get("error"): err = data["error"] if isinstance(err, dict): msg = err.get("message") or str(err) code = err.get("code") else: msg = str(err) code = None suffix = f" (code={code})" if code is not None else "" raise LLMError(f"OpenRouter error{suffix}: {msg}") choices = data.get("choices") if not choices: preview = str(data)[:400] raise LLMError(f"OpenRouter response has no 'choices'. Body preview: {preview}") first = choices[0] if isinstance(choices[0], dict) else {} message = first.get("message") or {} if not isinstance(message, dict): raise LLMError("OpenRouter choice has no message object") finish = first.get("finish_reason") or "" native_finish = first.get("native_finish_reason") or "" blocked_reasons = {"content_filter", "safety", "moderation"} if finish in blocked_reasons or str(native_finish).upper() in ( "PROHIBITED_CONTENT", "SAFETY", "BLOCKED", ): raise LLMError( f"Content blocked by provider (finish_reason={finish}, native={native_finish})" ) content = message.get("content") if content is not None and str(content).strip(): return str(content) refusal = message.get("refusal") if refusal: raise LLMError(f"Model refused the request: {refusal}") if finish and finish not in ("stop", "length", "tool_calls", "function_call"): raise LLMError( f"OpenRouter finished without content (finish_reason={finish}, native={native_finish})" ) raise LLMError("OpenRouter returned empty message content") def _clean(messages: list) -> list: """Filter out messages with empty content.""" return [m for m in messages if (m.get("content") or "").strip()] async def _post_once(model: str, messages: list, extra: dict | None = None) -> str: if not OPENROUTER_KEY: raise LLMError("ROUTER_KEY is not set in environment") payload = {"model": model, "messages": _clean(messages), **(extra or {})} async with httpx.AsyncClient(timeout=90) as client: r = await client.post(OPENROUTER_URL, headers=HEADERS, json=payload) try: data = r.json() except Exception as e: raise LLMError(f"Non-JSON response (HTTP {r.status_code}): {r.text[:300]}") from e if r.status_code >= 400: try: _parse_completion_body(data) except LLMError: raise raise LLMError(f"HTTP {r.status_code}: {data}") try: return _parse_completion_body(data) except LLMError: logger.warning( "OpenRouter completion failed model=%s status=%s body=%.500s", model, r.status_code, data, ) raise async def _post(model: str, messages: list, extra: dict | None = None) -> str: """POST completion; on failure retries once with LLM_FALLBACK_MODEL (usually CHAT_MODEL).""" try: return await _post_once(model, messages, extra) except LLMError as primary_err: fallback = LLM_FALLBACK_MODEL if not fallback or fallback == model: raise logger.info( "LLM fallback: %s failed (%s) → retrying with %s", model, primary_err, fallback, ) try: return await _post_once(fallback, messages, extra) except LLMError as fallback_err: raise LLMError( f"{primary_err} (fallback {fallback} also failed: {fallback_err})" ) from fallback_err async def send_message(messages: list) -> str: """SYSTEM_MODEL with automatic fallback to LLM_FALLBACK_MODEL.""" return await _post(SYSTEM_MODEL, messages) async def send_message_with_model(messages: list, model: str) -> str: """Named model (RPG_*, SD_*) with automatic fallback to LLM_FALLBACK_MODEL.""" return await _post(model, messages) async def stream_message(messages: list): """Chat model stream — roleplay dialogue.""" payload = { "model": CHAT_MODEL, "messages": _clean(messages), "stream": True, } timeout = httpx.Timeout(connect=10, read=120, write=10, pool=5) chunk_count = 0 async with httpx.AsyncClient(timeout=timeout) as client: try: async with client.stream("POST", OPENROUTER_URL, headers=HEADERS, json=payload) as response: response.raise_for_status() buf = "" async for raw in response.aiter_bytes(): text = raw.decode("utf-8", errors="replace") if not buf and chunk_count == 0: logger.info("stream first bytes: %.200s", text) buf += text while "\n" in buf: line, buf = buf.split("\n", 1) line = line.rstrip("\r") if not line.startswith("data: "): continue data = line[6:] if data == "[DONE]": return try: chunk = json.loads(data) if chunk.get("error"): err = chunk["error"] msg = err.get("message", err) if isinstance(err, dict) else err raise LLMError(f"OpenRouter stream error: {msg}") choices = chunk.get("choices") or [] if not choices: continue content = (choices[0].get("delta") or {}).get("content", "") if content: chunk_count += 1 yield content except LLMError: raise except Exception: continue except Exception as e: logger.error("stream_message error after %d chunks: %s", chunk_count, e) raise finally: logger.info("stream_message finished: %d chunks", chunk_count)