From 07e9ef6e042bbf0ad18a4f72f6684f39c06a2a24 Mon Sep 17 00:00:00 2001
From: grigo <blacksmoke576@gmail.com>
Date: Wed, 10 Jun 2026 13:06:44 +0300
Subject: [PATCH] fixed reasoning

---
 .env.example                  |   4 ++
 backend/app/chat/service.py   |  19 ++++++-
 backend/app/config.py         |   4 ++
 backend/app/llm/client.py     | 104 +++++++++++++++++++++++-----------
 backend/app/memory/extract.py |  10 +++-
 5 files changed, 105 insertions(+), 36 deletions(-)

diff --git a/.env.example b/.env.example
index b5e8e4c..ecb83fa 100644
--- a/.env.example
+++ b/.env.example
@@ -12,6 +12,10 @@ VITE_DEV_PORT=5173
 OPENROUTER_API_KEY=sk-or-v1-your-key-here
 OPENROUTER_MODEL=deepseek/deepseek-chat
 OPENROUTER_BASE_URL=https://openrouter.ai/api/v1
+# Для reasoning-моделей без tools: OPENROUTER_TOOLS_ENABLED=false
+OPENROUTER_TOOLS_ENABLED=true
+# JSON-экстракция памяти отдельной моделью (если основная капризничает):
+# MEMORY_EXTRACT_MODEL=deepseek/deepseek-chat
 
 # App
 DATABASE_URL=sqlite:///./data/assistant.db
diff --git a/backend/app/chat/service.py b/backend/app/chat/service.py
index c0a31bf..fdf2c81 100644
--- a/backend/app/chat/service.py
+++ b/backend/app/chat/service.py
@@ -148,6 +148,9 @@ class ChatService:
                 if event["type"] == "content":
                     content_parts.append(event["content"])
                     yield self._sse("token", {"content": event["content"]})
+                elif event["type"] == "error":
+                    yield self._sse("error", {"message": event.get("content", "LLM error")})
+                    return
                 elif event["type"] == "tool_calls":
                     tool_calls = event["tool_calls"]
 
@@ -193,8 +196,20 @@ class ChatService:
                 continue
 
             final_content = "".join(content_parts)
-            if final_content:
-                self._save_message(session_id, "assistant", final_content)
+            if not final_content.strip():
+                yield self._sse(
+                    "error",
+                    {
+                        "message": (
+                            "Модель не вернула текст. Проверь OPENROUTER_MODEL: "
+                            "нужна поддержка tool calling или отключи OPENROUTER_TOOLS_ENABLED=false. "
+                            "Для памяти можно задать MEMORY_EXTRACT_MODEL=deepseek/deepseek-chat."
+                        ),
+                    },
+                )
+                return
+
+            self._save_message(session_id, "assistant", final_content)
 
             memory_meta: dict[str, Any] = {}
             if get_settings().memory_auto_extract:
diff --git a/backend/app/config.py b/backend/app/config.py
index 326831f..0c18c5c 100644
--- a/backend/app/config.py
+++ b/backend/app/config.py
@@ -17,6 +17,10 @@ class Settings(BaseSettings):
     openrouter_api_key: str = ""
     openrouter_model: str = "deepseek/deepseek-chat"
     openrouter_base_url: str = "https://openrouter.ai/api/v1"
+    # Отдельная модель для JSON-задач (память, фитнес). Пусто = та же, что OPENROUTER_MODEL.
+    memory_extract_model: str = ""
+    # Некоторые модели (reasoning / без function calling) — выключить tools.
+    openrouter_tools_enabled: bool = True
 
     database_url: str = "sqlite:///./data/assistant.db"
     cors_origins: str = "http://localhost:5173,http://localhost:8080,http://localhost:3000"
diff --git a/backend/app/llm/client.py b/backend/app/llm/client.py
index b839672..be80f63 100644
--- a/backend/app/llm/client.py
+++ b/backend/app/llm/client.py
@@ -1,4 +1,5 @@
 import json
+import logging
 from collections.abc import AsyncIterator
 from typing import Any
 
@@ -6,65 +7,94 @@ from openai import AsyncOpenAI
 
 from app.config import get_settings
 
+logger = logging.getLogger(__name__)
+
 
 class LLMClient:
     def __init__(self) -> None:
         settings = get_settings()
         self.model = settings.openrouter_model
+        self.tools_enabled = settings.openrouter_tools_enabled
         self.client = AsyncOpenAI(
             api_key=settings.openrouter_api_key,
             base_url=settings.openrouter_base_url,
         )
 
+    def _delta_text(self, delta: Any) -> str:
+        parts: list[str] = []
+        if getattr(delta, "content", None):
+            parts.append(delta.content)
+        # Reasoning-модели (OpenRouter / o-series) иногда пишут сюда, а не в content.
+        for attr in ("reasoning", "reasoning_content"):
+            value = getattr(delta, attr, None)
+            if value:
+                parts.append(str(value))
+        return "".join(parts)
+
     async def stream_chat(
         self,
         messages: list[dict[str, Any]],
         tools: list[dict[str, Any]] | None = None,
+        *,
+        model: str | None = None,
     ) -> AsyncIterator[dict[str, Any]]:
+        use_tools = bool(tools) and self.tools_enabled
         kwargs: dict[str, Any] = {
-            "model": self.model,
+            "model": model or self.model,
             "messages": messages,
             "stream": True,
             "temperature": 0.7,
         }
-        if tools:
+        if use_tools:
             kwargs["tools"] = tools
 
-        stream = await self.client.chat.completions.create(**kwargs)
+        try:
+            stream = await self.client.chat.completions.create(**kwargs)
+        except Exception as exc:
+            logger.exception("LLM stream failed: %s", exc)
+            yield {"type": "error", "content": str(exc)}
+            yield {"type": "done", "finish_reason": "error"}
+            return
 
         tool_calls: dict[int, dict[str, Any]] = {}
 
-        async for chunk in stream:
-            if not chunk.choices:
-                continue
+        try:
+            async for chunk in stream:
+                if not chunk.choices:
+                    continue
 
-            choice = chunk.choices[0]
-            delta = choice.delta
+                choice = chunk.choices[0]
+                delta = choice.delta
 
-            if delta.content:
-                yield {"type": "content", "content": delta.content}
+                text = self._delta_text(delta)
+                if text:
+                    yield {"type": "content", "content": text}
 
-            if delta.tool_calls:
-                for tool_call in delta.tool_calls:
-                    idx = tool_call.index
-                    if idx not in tool_calls:
-                        tool_calls[idx] = {
-                            "id": tool_call.id or "",
-                            "type": "function",
-                            "function": {"name": "", "arguments": ""},
-                        }
-                    if tool_call.id:
-                        tool_calls[idx]["id"] = tool_call.id
-                    if tool_call.function:
-                        if tool_call.function.name:
-                            tool_calls[idx]["function"]["name"] = tool_call.function.name
-                        if tool_call.function.arguments:
-                            tool_calls[idx]["function"]["arguments"] += tool_call.function.arguments
+                if delta.tool_calls:
+                    for tool_call in delta.tool_calls:
+                        idx = tool_call.index
+                        if idx not in tool_calls:
+                            tool_calls[idx] = {
+                                "id": tool_call.id or "",
+                                "type": "function",
+                                "function": {"name": "", "arguments": ""},
+                            }
+                        if tool_call.id:
+                            tool_calls[idx]["id"] = tool_call.id
+                        if tool_call.function:
+                            if tool_call.function.name:
+                                tool_calls[idx]["function"]["name"] = tool_call.function.name
+                            if tool_call.function.arguments:
+                                tool_calls[idx]["function"]["arguments"] += tool_call.function.arguments
 
-            if choice.finish_reason:
-                if tool_calls:
-                    yield {"type": "tool_calls", "tool_calls": list(tool_calls.values())}
-                yield {"type": "done", "finish_reason": choice.finish_reason}
+                if choice.finish_reason:
+                    if tool_calls:
+                        yield {"type": "tool_calls", "tool_calls": list(tool_calls.values())}
+                    yield {"type": "done", "finish_reason": choice.finish_reason}
+        except Exception as exc:
+            logger.exception("LLM stream read failed: %s", exc)
+            yield {"type": "error", "content": str(exc)}
+            yield {"type": "done", "finish_reason": "error"}
 
     async def complete(
         self,
@@ -72,20 +102,28 @@ class LLMClient:
         tools: list[dict[str, Any]] | None = None,
         *,
         temperature: float = 0.7,
+        model: str | None = None,
     ) -> dict[str, Any]:
+        use_tools = bool(tools) and self.tools_enabled
         kwargs: dict[str, Any] = {
-            "model": self.model,
+            "model": model or self.model,
             "messages": messages,
             "temperature": temperature,
         }
-        if tools:
+        if use_tools:
             kwargs["tools"] = tools
 
         response = await self.client.chat.completions.create(**kwargs)
         message = response.choices[0].message
 
+        content = message.content or ""
+        for attr in ("reasoning", "reasoning_content"):
+            value = getattr(message, attr, None)
+            if value and not content:
+                content = str(value)
+
         result: dict[str, Any] = {
-            "content": message.content or "",
+            "content": content,
             "tool_calls": [],
         }
 
diff --git a/backend/app/memory/extract.py b/backend/app/memory/extract.py
index 9117319..053aa80 100644
--- a/backend/app/memory/extract.py
+++ b/backend/app/memory/extract.py
@@ -5,6 +5,7 @@ from typing import Any
 
 from sqlalchemy.orm import Session
 
+from app.config import get_settings
 from app.llm.client import LLMClient
 from app.memory.service import MemoryService
 from app.projects.structuring import strip_markdown_json
@@ -61,6 +62,9 @@ async def _call_extractor(
         *[f"- {f.get('content')}" for f in facts[:30]],
     ]
 
+    settings = get_settings()
+    extract_model = settings.memory_extract_model.strip() or None
+
     llm = LLMClient()
     result = await llm.complete(
         [
@@ -72,11 +76,12 @@ async def _call_extractor(
                     + "\n\n---\nДиалог:\nПользователь: "
                     + user_text
                     + "\nАссистент: "
-                    + (assistant_text[:1500] if assistant_text else "(нет ответа)")
+                    + assistant_text[:1500]
                 ),
             },
         ],
         temperature=0.2,
+        model=extract_model,
     )
     raw = strip_markdown_json(result.get("content") or "")
     if not raw:
@@ -98,6 +103,9 @@ async def extract_after_turn(
     if not force and _should_skip_extraction(user_text):
         return {"ok": True, "skipped": "short_message", "saved": []}
 
+    if not (assistant_text or "").strip():
+        return {"ok": True, "skipped": "no_assistant_reply", "saved": []}
+
     memory = MemoryService(db)
     snapshot = memory.snapshot(session_id)