From 07e9ef6e042bbf0ad18a4f72f6684f39c06a2a24 Mon Sep 17 00:00:00 2001 From: grigo Date: Wed, 10 Jun 2026 13:06:44 +0300 Subject: [PATCH] fixed reasoning --- .env.example | 4 ++ backend/app/chat/service.py | 19 ++++++- backend/app/config.py | 4 ++ backend/app/llm/client.py | 104 +++++++++++++++++++++++----------- backend/app/memory/extract.py | 10 +++- 5 files changed, 105 insertions(+), 36 deletions(-) diff --git a/.env.example b/.env.example index b5e8e4c..ecb83fa 100644 --- a/.env.example +++ b/.env.example @@ -12,6 +12,10 @@ VITE_DEV_PORT=5173 OPENROUTER_API_KEY=sk-or-v1-your-key-here OPENROUTER_MODEL=deepseek/deepseek-chat OPENROUTER_BASE_URL=https://openrouter.ai/api/v1 +# Для reasoning-моделей без tools: OPENROUTER_TOOLS_ENABLED=false +OPENROUTER_TOOLS_ENABLED=true +# JSON-экстракция памяти отдельной моделью (если основная капризничает): +# MEMORY_EXTRACT_MODEL=deepseek/deepseek-chat # App DATABASE_URL=sqlite:///./data/assistant.db diff --git a/backend/app/chat/service.py b/backend/app/chat/service.py index c0a31bf..fdf2c81 100644 --- a/backend/app/chat/service.py +++ b/backend/app/chat/service.py @@ -148,6 +148,9 @@ class ChatService: if event["type"] == "content": content_parts.append(event["content"]) yield self._sse("token", {"content": event["content"]}) + elif event["type"] == "error": + yield self._sse("error", {"message": event.get("content", "LLM error")}) + return elif event["type"] == "tool_calls": tool_calls = event["tool_calls"] @@ -193,8 +196,20 @@ class ChatService: continue final_content = "".join(content_parts) - if final_content: - self._save_message(session_id, "assistant", final_content) + if not final_content.strip(): + yield self._sse( + "error", + { + "message": ( + "Модель не вернула текст. Проверь OPENROUTER_MODEL: " + "нужна поддержка tool calling или отключи OPENROUTER_TOOLS_ENABLED=false. " + "Для памяти можно задать MEMORY_EXTRACT_MODEL=deepseek/deepseek-chat." + ), + }, + ) + return + + self._save_message(session_id, "assistant", final_content) memory_meta: dict[str, Any] = {} if get_settings().memory_auto_extract: diff --git a/backend/app/config.py b/backend/app/config.py index 326831f..0c18c5c 100644 --- a/backend/app/config.py +++ b/backend/app/config.py @@ -17,6 +17,10 @@ class Settings(BaseSettings): openrouter_api_key: str = "" openrouter_model: str = "deepseek/deepseek-chat" openrouter_base_url: str = "https://openrouter.ai/api/v1" + # Отдельная модель для JSON-задач (память, фитнес). Пусто = та же, что OPENROUTER_MODEL. + memory_extract_model: str = "" + # Некоторые модели (reasoning / без function calling) — выключить tools. + openrouter_tools_enabled: bool = True database_url: str = "sqlite:///./data/assistant.db" cors_origins: str = "http://localhost:5173,http://localhost:8080,http://localhost:3000" diff --git a/backend/app/llm/client.py b/backend/app/llm/client.py index b839672..be80f63 100644 --- a/backend/app/llm/client.py +++ b/backend/app/llm/client.py @@ -1,4 +1,5 @@ import json +import logging from collections.abc import AsyncIterator from typing import Any @@ -6,65 +7,94 @@ from openai import AsyncOpenAI from app.config import get_settings +logger = logging.getLogger(__name__) + class LLMClient: def __init__(self) -> None: settings = get_settings() self.model = settings.openrouter_model + self.tools_enabled = settings.openrouter_tools_enabled self.client = AsyncOpenAI( api_key=settings.openrouter_api_key, base_url=settings.openrouter_base_url, ) + def _delta_text(self, delta: Any) -> str: + parts: list[str] = [] + if getattr(delta, "content", None): + parts.append(delta.content) + # Reasoning-модели (OpenRouter / o-series) иногда пишут сюда, а не в content. + for attr in ("reasoning", "reasoning_content"): + value = getattr(delta, attr, None) + if value: + parts.append(str(value)) + return "".join(parts) + async def stream_chat( self, messages: list[dict[str, Any]], tools: list[dict[str, Any]] | None = None, + *, + model: str | None = None, ) -> AsyncIterator[dict[str, Any]]: + use_tools = bool(tools) and self.tools_enabled kwargs: dict[str, Any] = { - "model": self.model, + "model": model or self.model, "messages": messages, "stream": True, "temperature": 0.7, } - if tools: + if use_tools: kwargs["tools"] = tools - stream = await self.client.chat.completions.create(**kwargs) + try: + stream = await self.client.chat.completions.create(**kwargs) + except Exception as exc: + logger.exception("LLM stream failed: %s", exc) + yield {"type": "error", "content": str(exc)} + yield {"type": "done", "finish_reason": "error"} + return tool_calls: dict[int, dict[str, Any]] = {} - async for chunk in stream: - if not chunk.choices: - continue + try: + async for chunk in stream: + if not chunk.choices: + continue - choice = chunk.choices[0] - delta = choice.delta + choice = chunk.choices[0] + delta = choice.delta - if delta.content: - yield {"type": "content", "content": delta.content} + text = self._delta_text(delta) + if text: + yield {"type": "content", "content": text} - if delta.tool_calls: - for tool_call in delta.tool_calls: - idx = tool_call.index - if idx not in tool_calls: - tool_calls[idx] = { - "id": tool_call.id or "", - "type": "function", - "function": {"name": "", "arguments": ""}, - } - if tool_call.id: - tool_calls[idx]["id"] = tool_call.id - if tool_call.function: - if tool_call.function.name: - tool_calls[idx]["function"]["name"] = tool_call.function.name - if tool_call.function.arguments: - tool_calls[idx]["function"]["arguments"] += tool_call.function.arguments + if delta.tool_calls: + for tool_call in delta.tool_calls: + idx = tool_call.index + if idx not in tool_calls: + tool_calls[idx] = { + "id": tool_call.id or "", + "type": "function", + "function": {"name": "", "arguments": ""}, + } + if tool_call.id: + tool_calls[idx]["id"] = tool_call.id + if tool_call.function: + if tool_call.function.name: + tool_calls[idx]["function"]["name"] = tool_call.function.name + if tool_call.function.arguments: + tool_calls[idx]["function"]["arguments"] += tool_call.function.arguments - if choice.finish_reason: - if tool_calls: - yield {"type": "tool_calls", "tool_calls": list(tool_calls.values())} - yield {"type": "done", "finish_reason": choice.finish_reason} + if choice.finish_reason: + if tool_calls: + yield {"type": "tool_calls", "tool_calls": list(tool_calls.values())} + yield {"type": "done", "finish_reason": choice.finish_reason} + except Exception as exc: + logger.exception("LLM stream read failed: %s", exc) + yield {"type": "error", "content": str(exc)} + yield {"type": "done", "finish_reason": "error"} async def complete( self, @@ -72,20 +102,28 @@ class LLMClient: tools: list[dict[str, Any]] | None = None, *, temperature: float = 0.7, + model: str | None = None, ) -> dict[str, Any]: + use_tools = bool(tools) and self.tools_enabled kwargs: dict[str, Any] = { - "model": self.model, + "model": model or self.model, "messages": messages, "temperature": temperature, } - if tools: + if use_tools: kwargs["tools"] = tools response = await self.client.chat.completions.create(**kwargs) message = response.choices[0].message + content = message.content or "" + for attr in ("reasoning", "reasoning_content"): + value = getattr(message, attr, None) + if value and not content: + content = str(value) + result: dict[str, Any] = { - "content": message.content or "", + "content": content, "tool_calls": [], } diff --git a/backend/app/memory/extract.py b/backend/app/memory/extract.py index 9117319..053aa80 100644 --- a/backend/app/memory/extract.py +++ b/backend/app/memory/extract.py @@ -5,6 +5,7 @@ from typing import Any from sqlalchemy.orm import Session +from app.config import get_settings from app.llm.client import LLMClient from app.memory.service import MemoryService from app.projects.structuring import strip_markdown_json @@ -61,6 +62,9 @@ async def _call_extractor( *[f"- {f.get('content')}" for f in facts[:30]], ] + settings = get_settings() + extract_model = settings.memory_extract_model.strip() or None + llm = LLMClient() result = await llm.complete( [ @@ -72,11 +76,12 @@ async def _call_extractor( + "\n\n---\nДиалог:\nПользователь: " + user_text + "\nАссистент: " - + (assistant_text[:1500] if assistant_text else "(нет ответа)") + + assistant_text[:1500] ), }, ], temperature=0.2, + model=extract_model, ) raw = strip_markdown_json(result.get("content") or "") if not raw: @@ -98,6 +103,9 @@ async def extract_after_turn( if not force and _should_skip_extraction(user_text): return {"ok": True, "skipped": "short_message", "saved": []} + if not (assistant_text or "").strip(): + return {"ok": True, "skipped": "no_assistant_reply", "saved": []} + memory = MemoryService(db) snapshot = memory.snapshot(session_id)