fixed reasoning

2026-06-10 13:06:44 +03:00
parent 8eb6505724
commit 07e9ef6e04
5 changed files with 105 additions and 36 deletions
@@ -12,6 +12,10 @@ VITE_DEV_PORT=5173
 OPENROUTER_API_KEY=sk-or-v1-your-key-here
 OPENROUTER_MODEL=deepseek/deepseek-chat
 OPENROUTER_BASE_URL=https://openrouter.ai/api/v1
 # Для reasoning-моделей без tools: OPENROUTER_TOOLS_ENABLED=false
 OPENROUTER_TOOLS_ENABLED=true
 # JSON-экстракция памяти отдельной моделью (если основная капризничает):
 # MEMORY_EXTRACT_MODEL=deepseek/deepseek-chat
 # App
 DATABASE_URL=sqlite:///./data/assistant.db
@@ -148,6 +148,9 @@ class ChatService:
                if event["type"] == "content":
                    content_parts.append(event["content"])
                    yield self._sse("token", {"content": event["content"]})
                elif event["type"] == "error":
                    yield self._sse("error", {"message": event.get("content", "LLM error")})
                    return
                elif event["type"] == "tool_calls":
                    tool_calls = event["tool_calls"]
@@ -193,7 +196,19 @@ class ChatService:
                continue
            final_content = "".join(content_parts)
-            if final_content:
+            if not final_content.strip():
                yield self._sse(
                    "error",
                    {
                        "message": (
                            "Модель не вернула текст. Проверь OPENROUTER_MODEL: "
                            "нужна поддержка tool calling или отключи OPENROUTER_TOOLS_ENABLED=false. "
                            "Для памяти можно задать MEMORY_EXTRACT_MODEL=deepseek/deepseek-chat."
                        ),
                    },
                )
                return
            self._save_message(session_id, "assistant", final_content)
            memory_meta: dict[str, Any] = {}
@@ -17,6 +17,10 @@ class Settings(BaseSettings):
    openrouter_api_key: str = ""
    openrouter_model: str = "deepseek/deepseek-chat"
    openrouter_base_url: str = "https://openrouter.ai/api/v1"
    # Отдельная модель для JSON-задач (память, фитнес). Пусто = та же, что OPENROUTER_MODEL.
    memory_extract_model: str = ""
    # Некоторые модели (reasoning / без function calling) — выключить tools.
    openrouter_tools_enabled: bool = True
    database_url: str = "sqlite:///./data/assistant.db"
    cors_origins: str = "http://localhost:5173,http://localhost:8080,http://localhost:3000"
@@ -1,4 +1,5 @@
 import json
 import logging
 from collections.abc import AsyncIterator
 from typing import Any
@@ -6,34 +7,58 @@ from openai import AsyncOpenAI
 from app.config import get_settings
 logger = logging.getLogger(__name__)
 class LLMClient:
    def __init__(self) -> None:
        settings = get_settings()
        self.model = settings.openrouter_model
        self.tools_enabled = settings.openrouter_tools_enabled
        self.client = AsyncOpenAI(
            api_key=settings.openrouter_api_key,
            base_url=settings.openrouter_base_url,
        )
    def _delta_text(self, delta: Any) -> str:
        parts: list[str] = []
        if getattr(delta, "content", None):
            parts.append(delta.content)
        # Reasoning-модели (OpenRouter / o-series) иногда пишут сюда, а не в content.
        for attr in ("reasoning", "reasoning_content"):
            value = getattr(delta, attr, None)
            if value:
                parts.append(str(value))
        return "".join(parts)
    async def stream_chat(
        self,
        messages: list[dict[str, Any]],
        tools: list[dict[str, Any]] | None = None,
        *,
        model: str | None = None,
    ) -> AsyncIterator[dict[str, Any]]:
        use_tools = bool(tools) and self.tools_enabled
        kwargs: dict[str, Any] = {
-            "model": self.model,
+            "model": model or self.model,
            "messages": messages,
            "stream": True,
            "temperature": 0.7,
        }
-        if tools:
+        if use_tools:
            kwargs["tools"] = tools
        try:
            stream = await self.client.chat.completions.create(**kwargs)
        except Exception as exc:
            logger.exception("LLM stream failed: %s", exc)
            yield {"type": "error", "content": str(exc)}
            yield {"type": "done", "finish_reason": "error"}
            return
        tool_calls: dict[int, dict[str, Any]] = {}
        try:
            async for chunk in stream:
                if not chunk.choices:
                    continue
@@ -41,8 +66,9 @@ class LLMClient:
                choice = chunk.choices[0]
                delta = choice.delta
-            if delta.content:
+                text = self._delta_text(delta)
-                yield {"type": "content", "content": delta.content}
+                if text:
                    yield {"type": "content", "content": text}
                if delta.tool_calls:
                    for tool_call in delta.tool_calls:
@@ -65,6 +91,10 @@ class LLMClient:
                    if tool_calls:
                        yield {"type": "tool_calls", "tool_calls": list(tool_calls.values())}
                    yield {"type": "done", "finish_reason": choice.finish_reason}
        except Exception as exc:
            logger.exception("LLM stream read failed: %s", exc)
            yield {"type": "error", "content": str(exc)}
            yield {"type": "done", "finish_reason": "error"}
    async def complete(
        self,
@@ -72,20 +102,28 @@ class LLMClient:
        tools: list[dict[str, Any]] | None = None,
        *,
        temperature: float = 0.7,
        model: str | None = None,
    ) -> dict[str, Any]:
        use_tools = bool(tools) and self.tools_enabled
        kwargs: dict[str, Any] = {
-            "model": self.model,
+            "model": model or self.model,
            "messages": messages,
            "temperature": temperature,
        }
-        if tools:
+        if use_tools:
            kwargs["tools"] = tools
        response = await self.client.chat.completions.create(**kwargs)
        message = response.choices[0].message
        content = message.content or ""
        for attr in ("reasoning", "reasoning_content"):
            value = getattr(message, attr, None)
            if value and not content:
                content = str(value)
        result: dict[str, Any] = {
-            "content": message.content or "",
+            "content": content,
            "tool_calls": [],
        }
@@ -5,6 +5,7 @@ from typing import Any
 from sqlalchemy.orm import Session
 from app.config import get_settings
 from app.llm.client import LLMClient
 from app.memory.service import MemoryService
 from app.projects.structuring import strip_markdown_json
@@ -61,6 +62,9 @@ async def _call_extractor(
        *[f"- {f.get('content')}" for f in facts[:30]],
    ]
    settings = get_settings()
    extract_model = settings.memory_extract_model.strip() or None
    llm = LLMClient()
    result = await llm.complete(
        [
@@ -72,11 +76,12 @@ async def _call_extractor(
                    + "\n\n---\nДиалог:\nПользователь: "
                    + user_text
                    + "\nАссистент: "
-                    + (assistant_text[:1500] if assistant_text else "(нет ответа)")
+                    + assistant_text[:1500]
                ),
            },
        ],
        temperature=0.2,
        model=extract_model,
    )
    raw = strip_markdown_json(result.get("content") or "")
    if not raw:
@@ -98,6 +103,9 @@ async def extract_after_turn(
    if not force and _should_skip_extraction(user_text):
        return {"ok": True, "skipped": "short_message", "saved": []}
    if not (assistant_text or "").strip():
        return {"ok": True, "skipped": "no_assistant_reply", "saved": []}
    memory = MemoryService(db)
    snapshot = memory.snapshot(session_id)