diff --git a/.env.example b/.env.example index ecb83fa..019b58b 100644 --- a/.env.example +++ b/.env.example @@ -11,9 +11,12 @@ VITE_DEV_PORT=5173 # OpenRouter OPENROUTER_API_KEY=sk-or-v1-your-key-here OPENROUTER_MODEL=deepseek/deepseek-chat +# deepseek/deepseek-v4-pro — сильная модель, tools поддерживаются: +# OPENROUTER_MODEL=deepseek/deepseek-v4-pro OPENROUTER_BASE_URL=https://openrouter.ai/api/v1 -# Для reasoning-моделей без tools: OPENROUTER_TOOLS_ENABLED=false OPENROUTER_TOOLS_ENABLED=true +# none = без thinking (быстрее, стабильнее с tools). low|medium|high|xhigh — reasoning. +OPENROUTER_REASONING_EFFORT=none # JSON-экстракция памяти отдельной моделью (если основная капризничает): # MEMORY_EXTRACT_MODEL=deepseek/deepseek-chat diff --git a/backend/app/chat/service.py b/backend/app/chat/service.py index fdf2c81..6ea5f3b 100644 --- a/backend/app/chat/service.py +++ b/backend/app/chat/service.py @@ -103,6 +103,13 @@ class ChatService: entry["tool_calls"] = json.loads(msg.tool_calls_json) if not content: entry["content"] = None + reasoning_data = LLMClient.deserialize_reasoning(msg.reasoning_json) + if reasoning_data: + LLMClient.attach_reasoning_to_message( + entry, + reasoning=reasoning_data.get("reasoning", ""), + reasoning_details=reasoning_data.get("reasoning_details"), + ) if msg.role == "tool" and msg.tool_call_id: entry["tool_call_id"] = msg.tool_call_id messages.append(entry) @@ -115,12 +122,14 @@ class ChatService: content: str = "", tool_calls: list[dict[str, Any]] | None = None, tool_call_id: str | None = None, + reasoning_json: str | None = None, ) -> Message: message = Message( session_id=session_id, role=role, content=content, tool_calls_json=json.dumps(tool_calls, ensure_ascii=False) if tool_calls else None, + reasoning_json=reasoning_json, tool_call_id=tool_call_id, ) self.db.add(message) @@ -143,11 +152,17 @@ class ChatService: for _ in range(MAX_TOOL_ROUNDS): content_parts: list[str] = [] tool_calls: list[dict[str, Any]] = [] + reasoning = "" + reasoning_details: list[Any] | None = None async for event in self.llm.stream_chat(messages, tools=TOOL_DEFINITIONS): if event["type"] == "content": content_parts.append(event["content"]) yield self._sse("token", {"content": event["content"]}) + elif event["type"] == "reasoning": + reasoning = event.get("reasoning", "") or reasoning + if event.get("reasoning_details"): + reasoning_details = event["reasoning_details"] elif event["type"] == "error": yield self._sse("error", {"message": event.get("content", "LLM error")}) return @@ -160,12 +175,22 @@ class ChatService: "content": "".join(content_parts) or None, "tool_calls": tool_calls, } + LLMClient.attach_reasoning_to_message( + assistant_msg, + reasoning=reasoning, + reasoning_details=reasoning_details, + ) + reasoning_json = LLMClient.serialize_reasoning( + reasoning=reasoning, + reasoning_details=reasoning_details, + ) messages.append(assistant_msg) self._save_message( session_id, "assistant", "".join(content_parts), tool_calls=tool_calls, + reasoning_json=reasoning_json, ) for tool_call in tool_calls: @@ -196,14 +221,16 @@ class ChatService: continue final_content = "".join(content_parts) + if not final_content.strip() and reasoning: + final_content = reasoning if not final_content.strip(): yield self._sse( "error", { "message": ( - "Модель не вернула текст. Проверь OPENROUTER_MODEL: " - "нужна поддержка tool calling или отключи OPENROUTER_TOOLS_ENABLED=false. " - "Для памяти можно задать MEMORY_EXTRACT_MODEL=deepseek/deepseek-chat." + "Модель не вернула текст. Для deepseek-v4-pro: " + "OPENROUTER_TOOLS_ENABLED=true и OPENROUTER_REASONING_EFFORT=none. " + "Для памяти: MEMORY_EXTRACT_MODEL=deepseek/deepseek-chat." ), }, ) diff --git a/backend/app/config.py b/backend/app/config.py index 0c18c5c..01e614d 100644 --- a/backend/app/config.py +++ b/backend/app/config.py @@ -21,6 +21,8 @@ class Settings(BaseSettings): memory_extract_model: str = "" # Некоторые модели (reasoning / без function calling) — выключить tools. openrouter_tools_enabled: bool = True + # DeepSeek V4 / reasoning: none | low | medium | high | xhigh. none = без thinking. + openrouter_reasoning_effort: str = "none" database_url: str = "sqlite:///./data/assistant.db" cors_origins: str = "http://localhost:5173,http://localhost:8080,http://localhost:3000" diff --git a/backend/app/db/migrate.py b/backend/app/db/migrate.py index 9cf973f..a5b328f 100644 --- a/backend/app/db/migrate.py +++ b/backend/app/db/migrate.py @@ -21,6 +21,12 @@ def run_migrations() -> None: ) ) + if "messages" in inspector.get_table_names(): + columns = {col["name"] for col in inspector.get_columns("messages")} + with engine.begin() as conn: + if "reasoning_json" not in columns: + conn.execute(text("ALTER TABLE messages ADD COLUMN reasoning_json TEXT")) + if "pomodoro_cycles" not in inspector.get_table_names(): return diff --git a/backend/app/db/models.py b/backend/app/db/models.py index 009d492..c31f4a1 100644 --- a/backend/app/db/models.py +++ b/backend/app/db/models.py @@ -29,6 +29,7 @@ class Message(Base): role: Mapped[str] = mapped_column(String(32)) content: Mapped[str] = mapped_column(Text, default="") tool_calls_json: Mapped[str | None] = mapped_column(Text, nullable=True) + reasoning_json: Mapped[str | None] = mapped_column(Text, nullable=True) tool_call_id: Mapped[str | None] = mapped_column(String(64), nullable=True) created_at: Mapped[datetime] = mapped_column(DateTime(timezone=True), server_default=func.now()) diff --git a/backend/app/llm/client.py b/backend/app/llm/client.py index be80f63..7d4aab1 100644 --- a/backend/app/llm/client.py +++ b/backend/app/llm/client.py @@ -15,21 +15,48 @@ class LLMClient: settings = get_settings() self.model = settings.openrouter_model self.tools_enabled = settings.openrouter_tools_enabled + self.reasoning_effort = settings.openrouter_reasoning_effort.strip().lower() self.client = AsyncOpenAI( api_key=settings.openrouter_api_key, base_url=settings.openrouter_base_url, ) - def _delta_text(self, delta: Any) -> str: + def _reasoning_extra_body(self) -> dict[str, Any] | None: + if not self.reasoning_effort: + return None + return {"reasoning": {"effort": self.reasoning_effort}} + + @staticmethod + def _delta_reasoning(delta: Any) -> tuple[str, list[Any]]: parts: list[str] = [] - if getattr(delta, "content", None): - parts.append(delta.content) - # Reasoning-модели (OpenRouter / o-series) иногда пишут сюда, а не в content. for attr in ("reasoning", "reasoning_content"): value = getattr(delta, attr, None) if value: parts.append(str(value)) - return "".join(parts) + + details: list[Any] = [] + raw_details = getattr(delta, "reasoning_details", None) + if raw_details: + if isinstance(raw_details, list): + details.extend(raw_details) + else: + details.append(raw_details) + + return "".join(parts), details + + @staticmethod + def attach_reasoning_to_message( + message: dict[str, Any], + *, + reasoning: str = "", + reasoning_details: list[Any] | None = None, + ) -> dict[str, Any]: + if reasoning: + message["reasoning"] = reasoning + message["reasoning_content"] = reasoning + if reasoning_details: + message["reasoning_details"] = reasoning_details + return message async def stream_chat( self, @@ -47,6 +74,9 @@ class LLMClient: } if use_tools: kwargs["tools"] = tools + extra_body = self._reasoning_extra_body() + if extra_body: + kwargs["extra_body"] = extra_body try: stream = await self.client.chat.completions.create(**kwargs) @@ -57,6 +87,8 @@ class LLMClient: return tool_calls: dict[int, dict[str, Any]] = {} + reasoning_parts: list[str] = [] + reasoning_details: list[Any] = [] try: async for chunk in stream: @@ -66,9 +98,14 @@ class LLMClient: choice = chunk.choices[0] delta = choice.delta - text = self._delta_text(delta) - if text: - yield {"type": "content", "content": text} + if delta.content: + yield {"type": "content", "content": delta.content} + + reasoning_text, details = self._delta_reasoning(delta) + if reasoning_text: + reasoning_parts.append(reasoning_text) + if details: + reasoning_details.extend(details) if delta.tool_calls: for tool_call in delta.tool_calls: @@ -88,6 +125,13 @@ class LLMClient: tool_calls[idx]["function"]["arguments"] += tool_call.function.arguments if choice.finish_reason: + reasoning = "".join(reasoning_parts) + if reasoning or reasoning_details: + yield { + "type": "reasoning", + "reasoning": reasoning, + "reasoning_details": reasoning_details or None, + } if tool_calls: yield {"type": "tool_calls", "tool_calls": list(tool_calls.values())} yield {"type": "done", "finish_reason": choice.finish_reason} @@ -112,19 +156,29 @@ class LLMClient: } if use_tools: kwargs["tools"] = tools + extra_body = self._reasoning_extra_body() + if extra_body: + kwargs["extra_body"] = extra_body response = await self.client.chat.completions.create(**kwargs) message = response.choices[0].message content = message.content or "" + reasoning = "" for attr in ("reasoning", "reasoning_content"): value = getattr(message, attr, None) - if value and not content: - content = str(value) + if value: + reasoning = str(value) + break + + if not content and reasoning: + content = reasoning result: dict[str, Any] = { "content": content, "tool_calls": [], + "reasoning": reasoning, + "reasoning_details": getattr(message, "reasoning_details", None), } if message.tool_calls: @@ -150,3 +204,33 @@ class LLMClient: return json.loads(arguments) except json.JSONDecodeError: return {} + + @staticmethod + def serialize_reasoning( + *, + reasoning: str = "", + reasoning_details: list[Any] | None = None, + ) -> str | None: + payload: dict[str, Any] = {} + if reasoning: + payload["reasoning"] = reasoning + payload["reasoning_content"] = reasoning + if reasoning_details: + payload["reasoning_details"] = reasoning_details + if not payload: + return None + return json.dumps(payload, ensure_ascii=False) + + @staticmethod + def deserialize_reasoning(raw: str | None) -> dict[str, Any]: + if not raw: + return {} + try: + data = json.loads(raw) + except json.JSONDecodeError: + return {"reasoning": raw} + if isinstance(data, str): + return {"reasoning": data, "reasoning_content": data} + if isinstance(data, dict): + return data + return {}