fixed reasoning

This commit is contained in:
2026-06-10 13:11:15 +03:00
parent 07e9ef6e04
commit 320f7c7195
6 changed files with 137 additions and 14 deletions
+4 -1
View File
@@ -11,9 +11,12 @@ VITE_DEV_PORT=5173
# OpenRouter # OpenRouter
OPENROUTER_API_KEY=sk-or-v1-your-key-here OPENROUTER_API_KEY=sk-or-v1-your-key-here
OPENROUTER_MODEL=deepseek/deepseek-chat OPENROUTER_MODEL=deepseek/deepseek-chat
# deepseek/deepseek-v4-pro — сильная модель, tools поддерживаются:
# OPENROUTER_MODEL=deepseek/deepseek-v4-pro
OPENROUTER_BASE_URL=https://openrouter.ai/api/v1 OPENROUTER_BASE_URL=https://openrouter.ai/api/v1
# Для reasoning-моделей без tools: OPENROUTER_TOOLS_ENABLED=false
OPENROUTER_TOOLS_ENABLED=true OPENROUTER_TOOLS_ENABLED=true
# none = без thinking (быстрее, стабильнее с tools). low|medium|high|xhigh — reasoning.
OPENROUTER_REASONING_EFFORT=none
# JSON-экстракция памяти отдельной моделью (если основная капризничает): # JSON-экстракция памяти отдельной моделью (если основная капризничает):
# MEMORY_EXTRACT_MODEL=deepseek/deepseek-chat # MEMORY_EXTRACT_MODEL=deepseek/deepseek-chat
+30 -3
View File
@@ -103,6 +103,13 @@ class ChatService:
entry["tool_calls"] = json.loads(msg.tool_calls_json) entry["tool_calls"] = json.loads(msg.tool_calls_json)
if not content: if not content:
entry["content"] = None entry["content"] = None
reasoning_data = LLMClient.deserialize_reasoning(msg.reasoning_json)
if reasoning_data:
LLMClient.attach_reasoning_to_message(
entry,
reasoning=reasoning_data.get("reasoning", ""),
reasoning_details=reasoning_data.get("reasoning_details"),
)
if msg.role == "tool" and msg.tool_call_id: if msg.role == "tool" and msg.tool_call_id:
entry["tool_call_id"] = msg.tool_call_id entry["tool_call_id"] = msg.tool_call_id
messages.append(entry) messages.append(entry)
@@ -115,12 +122,14 @@ class ChatService:
content: str = "", content: str = "",
tool_calls: list[dict[str, Any]] | None = None, tool_calls: list[dict[str, Any]] | None = None,
tool_call_id: str | None = None, tool_call_id: str | None = None,
reasoning_json: str | None = None,
) -> Message: ) -> Message:
message = Message( message = Message(
session_id=session_id, session_id=session_id,
role=role, role=role,
content=content, content=content,
tool_calls_json=json.dumps(tool_calls, ensure_ascii=False) if tool_calls else None, tool_calls_json=json.dumps(tool_calls, ensure_ascii=False) if tool_calls else None,
reasoning_json=reasoning_json,
tool_call_id=tool_call_id, tool_call_id=tool_call_id,
) )
self.db.add(message) self.db.add(message)
@@ -143,11 +152,17 @@ class ChatService:
for _ in range(MAX_TOOL_ROUNDS): for _ in range(MAX_TOOL_ROUNDS):
content_parts: list[str] = [] content_parts: list[str] = []
tool_calls: list[dict[str, Any]] = [] tool_calls: list[dict[str, Any]] = []
reasoning = ""
reasoning_details: list[Any] | None = None
async for event in self.llm.stream_chat(messages, tools=TOOL_DEFINITIONS): async for event in self.llm.stream_chat(messages, tools=TOOL_DEFINITIONS):
if event["type"] == "content": if event["type"] == "content":
content_parts.append(event["content"]) content_parts.append(event["content"])
yield self._sse("token", {"content": event["content"]}) yield self._sse("token", {"content": event["content"]})
elif event["type"] == "reasoning":
reasoning = event.get("reasoning", "") or reasoning
if event.get("reasoning_details"):
reasoning_details = event["reasoning_details"]
elif event["type"] == "error": elif event["type"] == "error":
yield self._sse("error", {"message": event.get("content", "LLM error")}) yield self._sse("error", {"message": event.get("content", "LLM error")})
return return
@@ -160,12 +175,22 @@ class ChatService:
"content": "".join(content_parts) or None, "content": "".join(content_parts) or None,
"tool_calls": tool_calls, "tool_calls": tool_calls,
} }
LLMClient.attach_reasoning_to_message(
assistant_msg,
reasoning=reasoning,
reasoning_details=reasoning_details,
)
reasoning_json = LLMClient.serialize_reasoning(
reasoning=reasoning,
reasoning_details=reasoning_details,
)
messages.append(assistant_msg) messages.append(assistant_msg)
self._save_message( self._save_message(
session_id, session_id,
"assistant", "assistant",
"".join(content_parts), "".join(content_parts),
tool_calls=tool_calls, tool_calls=tool_calls,
reasoning_json=reasoning_json,
) )
for tool_call in tool_calls: for tool_call in tool_calls:
@@ -196,14 +221,16 @@ class ChatService:
continue continue
final_content = "".join(content_parts) final_content = "".join(content_parts)
if not final_content.strip() and reasoning:
final_content = reasoning
if not final_content.strip(): if not final_content.strip():
yield self._sse( yield self._sse(
"error", "error",
{ {
"message": ( "message": (
"Модель не вернула текст. Проверь OPENROUTER_MODEL: " "Модель не вернула текст. Для deepseek-v4-pro: "
"нужна поддержка tool calling или отключи OPENROUTER_TOOLS_ENABLED=false. " "OPENROUTER_TOOLS_ENABLED=true и OPENROUTER_REASONING_EFFORT=none. "
"Для памяти можно задать MEMORY_EXTRACT_MODEL=deepseek/deepseek-chat." "Для памяти: MEMORY_EXTRACT_MODEL=deepseek/deepseek-chat."
), ),
}, },
) )
+2
View File
@@ -21,6 +21,8 @@ class Settings(BaseSettings):
memory_extract_model: str = "" memory_extract_model: str = ""
# Некоторые модели (reasoning / без function calling) — выключить tools. # Некоторые модели (reasoning / без function calling) — выключить tools.
openrouter_tools_enabled: bool = True openrouter_tools_enabled: bool = True
# DeepSeek V4 / reasoning: none | low | medium | high | xhigh. none = без thinking.
openrouter_reasoning_effort: str = "none"
database_url: str = "sqlite:///./data/assistant.db" database_url: str = "sqlite:///./data/assistant.db"
cors_origins: str = "http://localhost:5173,http://localhost:8080,http://localhost:3000" cors_origins: str = "http://localhost:5173,http://localhost:8080,http://localhost:3000"
+6
View File
@@ -21,6 +21,12 @@ def run_migrations() -> None:
) )
) )
if "messages" in inspector.get_table_names():
columns = {col["name"] for col in inspector.get_columns("messages")}
with engine.begin() as conn:
if "reasoning_json" not in columns:
conn.execute(text("ALTER TABLE messages ADD COLUMN reasoning_json TEXT"))
if "pomodoro_cycles" not in inspector.get_table_names(): if "pomodoro_cycles" not in inspector.get_table_names():
return return
+1
View File
@@ -29,6 +29,7 @@ class Message(Base):
role: Mapped[str] = mapped_column(String(32)) role: Mapped[str] = mapped_column(String(32))
content: Mapped[str] = mapped_column(Text, default="") content: Mapped[str] = mapped_column(Text, default="")
tool_calls_json: Mapped[str | None] = mapped_column(Text, nullable=True) tool_calls_json: Mapped[str | None] = mapped_column(Text, nullable=True)
reasoning_json: Mapped[str | None] = mapped_column(Text, nullable=True)
tool_call_id: Mapped[str | None] = mapped_column(String(64), nullable=True) tool_call_id: Mapped[str | None] = mapped_column(String(64), nullable=True)
created_at: Mapped[datetime] = mapped_column(DateTime(timezone=True), server_default=func.now()) created_at: Mapped[datetime] = mapped_column(DateTime(timezone=True), server_default=func.now())
+94 -10
View File
@@ -15,21 +15,48 @@ class LLMClient:
settings = get_settings() settings = get_settings()
self.model = settings.openrouter_model self.model = settings.openrouter_model
self.tools_enabled = settings.openrouter_tools_enabled self.tools_enabled = settings.openrouter_tools_enabled
self.reasoning_effort = settings.openrouter_reasoning_effort.strip().lower()
self.client = AsyncOpenAI( self.client = AsyncOpenAI(
api_key=settings.openrouter_api_key, api_key=settings.openrouter_api_key,
base_url=settings.openrouter_base_url, base_url=settings.openrouter_base_url,
) )
def _delta_text(self, delta: Any) -> str: def _reasoning_extra_body(self) -> dict[str, Any] | None:
if not self.reasoning_effort:
return None
return {"reasoning": {"effort": self.reasoning_effort}}
@staticmethod
def _delta_reasoning(delta: Any) -> tuple[str, list[Any]]:
parts: list[str] = [] parts: list[str] = []
if getattr(delta, "content", None):
parts.append(delta.content)
# Reasoning-модели (OpenRouter / o-series) иногда пишут сюда, а не в content.
for attr in ("reasoning", "reasoning_content"): for attr in ("reasoning", "reasoning_content"):
value = getattr(delta, attr, None) value = getattr(delta, attr, None)
if value: if value:
parts.append(str(value)) parts.append(str(value))
return "".join(parts)
details: list[Any] = []
raw_details = getattr(delta, "reasoning_details", None)
if raw_details:
if isinstance(raw_details, list):
details.extend(raw_details)
else:
details.append(raw_details)
return "".join(parts), details
@staticmethod
def attach_reasoning_to_message(
message: dict[str, Any],
*,
reasoning: str = "",
reasoning_details: list[Any] | None = None,
) -> dict[str, Any]:
if reasoning:
message["reasoning"] = reasoning
message["reasoning_content"] = reasoning
if reasoning_details:
message["reasoning_details"] = reasoning_details
return message
async def stream_chat( async def stream_chat(
self, self,
@@ -47,6 +74,9 @@ class LLMClient:
} }
if use_tools: if use_tools:
kwargs["tools"] = tools kwargs["tools"] = tools
extra_body = self._reasoning_extra_body()
if extra_body:
kwargs["extra_body"] = extra_body
try: try:
stream = await self.client.chat.completions.create(**kwargs) stream = await self.client.chat.completions.create(**kwargs)
@@ -57,6 +87,8 @@ class LLMClient:
return return
tool_calls: dict[int, dict[str, Any]] = {} tool_calls: dict[int, dict[str, Any]] = {}
reasoning_parts: list[str] = []
reasoning_details: list[Any] = []
try: try:
async for chunk in stream: async for chunk in stream:
@@ -66,9 +98,14 @@ class LLMClient:
choice = chunk.choices[0] choice = chunk.choices[0]
delta = choice.delta delta = choice.delta
text = self._delta_text(delta) if delta.content:
if text: yield {"type": "content", "content": delta.content}
yield {"type": "content", "content": text}
reasoning_text, details = self._delta_reasoning(delta)
if reasoning_text:
reasoning_parts.append(reasoning_text)
if details:
reasoning_details.extend(details)
if delta.tool_calls: if delta.tool_calls:
for tool_call in delta.tool_calls: for tool_call in delta.tool_calls:
@@ -88,6 +125,13 @@ class LLMClient:
tool_calls[idx]["function"]["arguments"] += tool_call.function.arguments tool_calls[idx]["function"]["arguments"] += tool_call.function.arguments
if choice.finish_reason: if choice.finish_reason:
reasoning = "".join(reasoning_parts)
if reasoning or reasoning_details:
yield {
"type": "reasoning",
"reasoning": reasoning,
"reasoning_details": reasoning_details or None,
}
if tool_calls: if tool_calls:
yield {"type": "tool_calls", "tool_calls": list(tool_calls.values())} yield {"type": "tool_calls", "tool_calls": list(tool_calls.values())}
yield {"type": "done", "finish_reason": choice.finish_reason} yield {"type": "done", "finish_reason": choice.finish_reason}
@@ -112,19 +156,29 @@ class LLMClient:
} }
if use_tools: if use_tools:
kwargs["tools"] = tools kwargs["tools"] = tools
extra_body = self._reasoning_extra_body()
if extra_body:
kwargs["extra_body"] = extra_body
response = await self.client.chat.completions.create(**kwargs) response = await self.client.chat.completions.create(**kwargs)
message = response.choices[0].message message = response.choices[0].message
content = message.content or "" content = message.content or ""
reasoning = ""
for attr in ("reasoning", "reasoning_content"): for attr in ("reasoning", "reasoning_content"):
value = getattr(message, attr, None) value = getattr(message, attr, None)
if value and not content: if value:
content = str(value) reasoning = str(value)
break
if not content and reasoning:
content = reasoning
result: dict[str, Any] = { result: dict[str, Any] = {
"content": content, "content": content,
"tool_calls": [], "tool_calls": [],
"reasoning": reasoning,
"reasoning_details": getattr(message, "reasoning_details", None),
} }
if message.tool_calls: if message.tool_calls:
@@ -150,3 +204,33 @@ class LLMClient:
return json.loads(arguments) return json.loads(arguments)
except json.JSONDecodeError: except json.JSONDecodeError:
return {} return {}
@staticmethod
def serialize_reasoning(
*,
reasoning: str = "",
reasoning_details: list[Any] | None = None,
) -> str | None:
payload: dict[str, Any] = {}
if reasoning:
payload["reasoning"] = reasoning
payload["reasoning_content"] = reasoning
if reasoning_details:
payload["reasoning_details"] = reasoning_details
if not payload:
return None
return json.dumps(payload, ensure_ascii=False)
@staticmethod
def deserialize_reasoning(raw: str | None) -> dict[str, Any]:
if not raw:
return {}
try:
data = json.loads(raw)
except json.JSONDecodeError:
return {"reasoning": raw}
if isinstance(data, str):
return {"reasoning": data, "reasoning_content": data}
if isinstance(data, dict):
return data
return {}