fixed reasoning
This commit is contained in:
+4
-1
@@ -11,9 +11,12 @@ VITE_DEV_PORT=5173
|
|||||||
# OpenRouter
|
# OpenRouter
|
||||||
OPENROUTER_API_KEY=sk-or-v1-your-key-here
|
OPENROUTER_API_KEY=sk-or-v1-your-key-here
|
||||||
OPENROUTER_MODEL=deepseek/deepseek-chat
|
OPENROUTER_MODEL=deepseek/deepseek-chat
|
||||||
|
# deepseek/deepseek-v4-pro — сильная модель, tools поддерживаются:
|
||||||
|
# OPENROUTER_MODEL=deepseek/deepseek-v4-pro
|
||||||
OPENROUTER_BASE_URL=https://openrouter.ai/api/v1
|
OPENROUTER_BASE_URL=https://openrouter.ai/api/v1
|
||||||
# Для reasoning-моделей без tools: OPENROUTER_TOOLS_ENABLED=false
|
|
||||||
OPENROUTER_TOOLS_ENABLED=true
|
OPENROUTER_TOOLS_ENABLED=true
|
||||||
|
# none = без thinking (быстрее, стабильнее с tools). low|medium|high|xhigh — reasoning.
|
||||||
|
OPENROUTER_REASONING_EFFORT=none
|
||||||
# JSON-экстракция памяти отдельной моделью (если основная капризничает):
|
# JSON-экстракция памяти отдельной моделью (если основная капризничает):
|
||||||
# MEMORY_EXTRACT_MODEL=deepseek/deepseek-chat
|
# MEMORY_EXTRACT_MODEL=deepseek/deepseek-chat
|
||||||
|
|
||||||
|
|||||||
@@ -103,6 +103,13 @@ class ChatService:
|
|||||||
entry["tool_calls"] = json.loads(msg.tool_calls_json)
|
entry["tool_calls"] = json.loads(msg.tool_calls_json)
|
||||||
if not content:
|
if not content:
|
||||||
entry["content"] = None
|
entry["content"] = None
|
||||||
|
reasoning_data = LLMClient.deserialize_reasoning(msg.reasoning_json)
|
||||||
|
if reasoning_data:
|
||||||
|
LLMClient.attach_reasoning_to_message(
|
||||||
|
entry,
|
||||||
|
reasoning=reasoning_data.get("reasoning", ""),
|
||||||
|
reasoning_details=reasoning_data.get("reasoning_details"),
|
||||||
|
)
|
||||||
if msg.role == "tool" and msg.tool_call_id:
|
if msg.role == "tool" and msg.tool_call_id:
|
||||||
entry["tool_call_id"] = msg.tool_call_id
|
entry["tool_call_id"] = msg.tool_call_id
|
||||||
messages.append(entry)
|
messages.append(entry)
|
||||||
@@ -115,12 +122,14 @@ class ChatService:
|
|||||||
content: str = "",
|
content: str = "",
|
||||||
tool_calls: list[dict[str, Any]] | None = None,
|
tool_calls: list[dict[str, Any]] | None = None,
|
||||||
tool_call_id: str | None = None,
|
tool_call_id: str | None = None,
|
||||||
|
reasoning_json: str | None = None,
|
||||||
) -> Message:
|
) -> Message:
|
||||||
message = Message(
|
message = Message(
|
||||||
session_id=session_id,
|
session_id=session_id,
|
||||||
role=role,
|
role=role,
|
||||||
content=content,
|
content=content,
|
||||||
tool_calls_json=json.dumps(tool_calls, ensure_ascii=False) if tool_calls else None,
|
tool_calls_json=json.dumps(tool_calls, ensure_ascii=False) if tool_calls else None,
|
||||||
|
reasoning_json=reasoning_json,
|
||||||
tool_call_id=tool_call_id,
|
tool_call_id=tool_call_id,
|
||||||
)
|
)
|
||||||
self.db.add(message)
|
self.db.add(message)
|
||||||
@@ -143,11 +152,17 @@ class ChatService:
|
|||||||
for _ in range(MAX_TOOL_ROUNDS):
|
for _ in range(MAX_TOOL_ROUNDS):
|
||||||
content_parts: list[str] = []
|
content_parts: list[str] = []
|
||||||
tool_calls: list[dict[str, Any]] = []
|
tool_calls: list[dict[str, Any]] = []
|
||||||
|
reasoning = ""
|
||||||
|
reasoning_details: list[Any] | None = None
|
||||||
|
|
||||||
async for event in self.llm.stream_chat(messages, tools=TOOL_DEFINITIONS):
|
async for event in self.llm.stream_chat(messages, tools=TOOL_DEFINITIONS):
|
||||||
if event["type"] == "content":
|
if event["type"] == "content":
|
||||||
content_parts.append(event["content"])
|
content_parts.append(event["content"])
|
||||||
yield self._sse("token", {"content": event["content"]})
|
yield self._sse("token", {"content": event["content"]})
|
||||||
|
elif event["type"] == "reasoning":
|
||||||
|
reasoning = event.get("reasoning", "") or reasoning
|
||||||
|
if event.get("reasoning_details"):
|
||||||
|
reasoning_details = event["reasoning_details"]
|
||||||
elif event["type"] == "error":
|
elif event["type"] == "error":
|
||||||
yield self._sse("error", {"message": event.get("content", "LLM error")})
|
yield self._sse("error", {"message": event.get("content", "LLM error")})
|
||||||
return
|
return
|
||||||
@@ -160,12 +175,22 @@ class ChatService:
|
|||||||
"content": "".join(content_parts) or None,
|
"content": "".join(content_parts) or None,
|
||||||
"tool_calls": tool_calls,
|
"tool_calls": tool_calls,
|
||||||
}
|
}
|
||||||
|
LLMClient.attach_reasoning_to_message(
|
||||||
|
assistant_msg,
|
||||||
|
reasoning=reasoning,
|
||||||
|
reasoning_details=reasoning_details,
|
||||||
|
)
|
||||||
|
reasoning_json = LLMClient.serialize_reasoning(
|
||||||
|
reasoning=reasoning,
|
||||||
|
reasoning_details=reasoning_details,
|
||||||
|
)
|
||||||
messages.append(assistant_msg)
|
messages.append(assistant_msg)
|
||||||
self._save_message(
|
self._save_message(
|
||||||
session_id,
|
session_id,
|
||||||
"assistant",
|
"assistant",
|
||||||
"".join(content_parts),
|
"".join(content_parts),
|
||||||
tool_calls=tool_calls,
|
tool_calls=tool_calls,
|
||||||
|
reasoning_json=reasoning_json,
|
||||||
)
|
)
|
||||||
|
|
||||||
for tool_call in tool_calls:
|
for tool_call in tool_calls:
|
||||||
@@ -196,14 +221,16 @@ class ChatService:
|
|||||||
continue
|
continue
|
||||||
|
|
||||||
final_content = "".join(content_parts)
|
final_content = "".join(content_parts)
|
||||||
|
if not final_content.strip() and reasoning:
|
||||||
|
final_content = reasoning
|
||||||
if not final_content.strip():
|
if not final_content.strip():
|
||||||
yield self._sse(
|
yield self._sse(
|
||||||
"error",
|
"error",
|
||||||
{
|
{
|
||||||
"message": (
|
"message": (
|
||||||
"Модель не вернула текст. Проверь OPENROUTER_MODEL: "
|
"Модель не вернула текст. Для deepseek-v4-pro: "
|
||||||
"нужна поддержка tool calling или отключи OPENROUTER_TOOLS_ENABLED=false. "
|
"OPENROUTER_TOOLS_ENABLED=true и OPENROUTER_REASONING_EFFORT=none. "
|
||||||
"Для памяти можно задать MEMORY_EXTRACT_MODEL=deepseek/deepseek-chat."
|
"Для памяти: MEMORY_EXTRACT_MODEL=deepseek/deepseek-chat."
|
||||||
),
|
),
|
||||||
},
|
},
|
||||||
)
|
)
|
||||||
|
|||||||
@@ -21,6 +21,8 @@ class Settings(BaseSettings):
|
|||||||
memory_extract_model: str = ""
|
memory_extract_model: str = ""
|
||||||
# Некоторые модели (reasoning / без function calling) — выключить tools.
|
# Некоторые модели (reasoning / без function calling) — выключить tools.
|
||||||
openrouter_tools_enabled: bool = True
|
openrouter_tools_enabled: bool = True
|
||||||
|
# DeepSeek V4 / reasoning: none | low | medium | high | xhigh. none = без thinking.
|
||||||
|
openrouter_reasoning_effort: str = "none"
|
||||||
|
|
||||||
database_url: str = "sqlite:///./data/assistant.db"
|
database_url: str = "sqlite:///./data/assistant.db"
|
||||||
cors_origins: str = "http://localhost:5173,http://localhost:8080,http://localhost:3000"
|
cors_origins: str = "http://localhost:5173,http://localhost:8080,http://localhost:3000"
|
||||||
|
|||||||
@@ -21,6 +21,12 @@ def run_migrations() -> None:
|
|||||||
)
|
)
|
||||||
)
|
)
|
||||||
|
|
||||||
|
if "messages" in inspector.get_table_names():
|
||||||
|
columns = {col["name"] for col in inspector.get_columns("messages")}
|
||||||
|
with engine.begin() as conn:
|
||||||
|
if "reasoning_json" not in columns:
|
||||||
|
conn.execute(text("ALTER TABLE messages ADD COLUMN reasoning_json TEXT"))
|
||||||
|
|
||||||
if "pomodoro_cycles" not in inspector.get_table_names():
|
if "pomodoro_cycles" not in inspector.get_table_names():
|
||||||
return
|
return
|
||||||
|
|
||||||
|
|||||||
@@ -29,6 +29,7 @@ class Message(Base):
|
|||||||
role: Mapped[str] = mapped_column(String(32))
|
role: Mapped[str] = mapped_column(String(32))
|
||||||
content: Mapped[str] = mapped_column(Text, default="")
|
content: Mapped[str] = mapped_column(Text, default="")
|
||||||
tool_calls_json: Mapped[str | None] = mapped_column(Text, nullable=True)
|
tool_calls_json: Mapped[str | None] = mapped_column(Text, nullable=True)
|
||||||
|
reasoning_json: Mapped[str | None] = mapped_column(Text, nullable=True)
|
||||||
tool_call_id: Mapped[str | None] = mapped_column(String(64), nullable=True)
|
tool_call_id: Mapped[str | None] = mapped_column(String(64), nullable=True)
|
||||||
created_at: Mapped[datetime] = mapped_column(DateTime(timezone=True), server_default=func.now())
|
created_at: Mapped[datetime] = mapped_column(DateTime(timezone=True), server_default=func.now())
|
||||||
|
|
||||||
|
|||||||
+94
-10
@@ -15,21 +15,48 @@ class LLMClient:
|
|||||||
settings = get_settings()
|
settings = get_settings()
|
||||||
self.model = settings.openrouter_model
|
self.model = settings.openrouter_model
|
||||||
self.tools_enabled = settings.openrouter_tools_enabled
|
self.tools_enabled = settings.openrouter_tools_enabled
|
||||||
|
self.reasoning_effort = settings.openrouter_reasoning_effort.strip().lower()
|
||||||
self.client = AsyncOpenAI(
|
self.client = AsyncOpenAI(
|
||||||
api_key=settings.openrouter_api_key,
|
api_key=settings.openrouter_api_key,
|
||||||
base_url=settings.openrouter_base_url,
|
base_url=settings.openrouter_base_url,
|
||||||
)
|
)
|
||||||
|
|
||||||
def _delta_text(self, delta: Any) -> str:
|
def _reasoning_extra_body(self) -> dict[str, Any] | None:
|
||||||
|
if not self.reasoning_effort:
|
||||||
|
return None
|
||||||
|
return {"reasoning": {"effort": self.reasoning_effort}}
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _delta_reasoning(delta: Any) -> tuple[str, list[Any]]:
|
||||||
parts: list[str] = []
|
parts: list[str] = []
|
||||||
if getattr(delta, "content", None):
|
|
||||||
parts.append(delta.content)
|
|
||||||
# Reasoning-модели (OpenRouter / o-series) иногда пишут сюда, а не в content.
|
|
||||||
for attr in ("reasoning", "reasoning_content"):
|
for attr in ("reasoning", "reasoning_content"):
|
||||||
value = getattr(delta, attr, None)
|
value = getattr(delta, attr, None)
|
||||||
if value:
|
if value:
|
||||||
parts.append(str(value))
|
parts.append(str(value))
|
||||||
return "".join(parts)
|
|
||||||
|
details: list[Any] = []
|
||||||
|
raw_details = getattr(delta, "reasoning_details", None)
|
||||||
|
if raw_details:
|
||||||
|
if isinstance(raw_details, list):
|
||||||
|
details.extend(raw_details)
|
||||||
|
else:
|
||||||
|
details.append(raw_details)
|
||||||
|
|
||||||
|
return "".join(parts), details
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def attach_reasoning_to_message(
|
||||||
|
message: dict[str, Any],
|
||||||
|
*,
|
||||||
|
reasoning: str = "",
|
||||||
|
reasoning_details: list[Any] | None = None,
|
||||||
|
) -> dict[str, Any]:
|
||||||
|
if reasoning:
|
||||||
|
message["reasoning"] = reasoning
|
||||||
|
message["reasoning_content"] = reasoning
|
||||||
|
if reasoning_details:
|
||||||
|
message["reasoning_details"] = reasoning_details
|
||||||
|
return message
|
||||||
|
|
||||||
async def stream_chat(
|
async def stream_chat(
|
||||||
self,
|
self,
|
||||||
@@ -47,6 +74,9 @@ class LLMClient:
|
|||||||
}
|
}
|
||||||
if use_tools:
|
if use_tools:
|
||||||
kwargs["tools"] = tools
|
kwargs["tools"] = tools
|
||||||
|
extra_body = self._reasoning_extra_body()
|
||||||
|
if extra_body:
|
||||||
|
kwargs["extra_body"] = extra_body
|
||||||
|
|
||||||
try:
|
try:
|
||||||
stream = await self.client.chat.completions.create(**kwargs)
|
stream = await self.client.chat.completions.create(**kwargs)
|
||||||
@@ -57,6 +87,8 @@ class LLMClient:
|
|||||||
return
|
return
|
||||||
|
|
||||||
tool_calls: dict[int, dict[str, Any]] = {}
|
tool_calls: dict[int, dict[str, Any]] = {}
|
||||||
|
reasoning_parts: list[str] = []
|
||||||
|
reasoning_details: list[Any] = []
|
||||||
|
|
||||||
try:
|
try:
|
||||||
async for chunk in stream:
|
async for chunk in stream:
|
||||||
@@ -66,9 +98,14 @@ class LLMClient:
|
|||||||
choice = chunk.choices[0]
|
choice = chunk.choices[0]
|
||||||
delta = choice.delta
|
delta = choice.delta
|
||||||
|
|
||||||
text = self._delta_text(delta)
|
if delta.content:
|
||||||
if text:
|
yield {"type": "content", "content": delta.content}
|
||||||
yield {"type": "content", "content": text}
|
|
||||||
|
reasoning_text, details = self._delta_reasoning(delta)
|
||||||
|
if reasoning_text:
|
||||||
|
reasoning_parts.append(reasoning_text)
|
||||||
|
if details:
|
||||||
|
reasoning_details.extend(details)
|
||||||
|
|
||||||
if delta.tool_calls:
|
if delta.tool_calls:
|
||||||
for tool_call in delta.tool_calls:
|
for tool_call in delta.tool_calls:
|
||||||
@@ -88,6 +125,13 @@ class LLMClient:
|
|||||||
tool_calls[idx]["function"]["arguments"] += tool_call.function.arguments
|
tool_calls[idx]["function"]["arguments"] += tool_call.function.arguments
|
||||||
|
|
||||||
if choice.finish_reason:
|
if choice.finish_reason:
|
||||||
|
reasoning = "".join(reasoning_parts)
|
||||||
|
if reasoning or reasoning_details:
|
||||||
|
yield {
|
||||||
|
"type": "reasoning",
|
||||||
|
"reasoning": reasoning,
|
||||||
|
"reasoning_details": reasoning_details or None,
|
||||||
|
}
|
||||||
if tool_calls:
|
if tool_calls:
|
||||||
yield {"type": "tool_calls", "tool_calls": list(tool_calls.values())}
|
yield {"type": "tool_calls", "tool_calls": list(tool_calls.values())}
|
||||||
yield {"type": "done", "finish_reason": choice.finish_reason}
|
yield {"type": "done", "finish_reason": choice.finish_reason}
|
||||||
@@ -112,19 +156,29 @@ class LLMClient:
|
|||||||
}
|
}
|
||||||
if use_tools:
|
if use_tools:
|
||||||
kwargs["tools"] = tools
|
kwargs["tools"] = tools
|
||||||
|
extra_body = self._reasoning_extra_body()
|
||||||
|
if extra_body:
|
||||||
|
kwargs["extra_body"] = extra_body
|
||||||
|
|
||||||
response = await self.client.chat.completions.create(**kwargs)
|
response = await self.client.chat.completions.create(**kwargs)
|
||||||
message = response.choices[0].message
|
message = response.choices[0].message
|
||||||
|
|
||||||
content = message.content or ""
|
content = message.content or ""
|
||||||
|
reasoning = ""
|
||||||
for attr in ("reasoning", "reasoning_content"):
|
for attr in ("reasoning", "reasoning_content"):
|
||||||
value = getattr(message, attr, None)
|
value = getattr(message, attr, None)
|
||||||
if value and not content:
|
if value:
|
||||||
content = str(value)
|
reasoning = str(value)
|
||||||
|
break
|
||||||
|
|
||||||
|
if not content and reasoning:
|
||||||
|
content = reasoning
|
||||||
|
|
||||||
result: dict[str, Any] = {
|
result: dict[str, Any] = {
|
||||||
"content": content,
|
"content": content,
|
||||||
"tool_calls": [],
|
"tool_calls": [],
|
||||||
|
"reasoning": reasoning,
|
||||||
|
"reasoning_details": getattr(message, "reasoning_details", None),
|
||||||
}
|
}
|
||||||
|
|
||||||
if message.tool_calls:
|
if message.tool_calls:
|
||||||
@@ -150,3 +204,33 @@ class LLMClient:
|
|||||||
return json.loads(arguments)
|
return json.loads(arguments)
|
||||||
except json.JSONDecodeError:
|
except json.JSONDecodeError:
|
||||||
return {}
|
return {}
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def serialize_reasoning(
|
||||||
|
*,
|
||||||
|
reasoning: str = "",
|
||||||
|
reasoning_details: list[Any] | None = None,
|
||||||
|
) -> str | None:
|
||||||
|
payload: dict[str, Any] = {}
|
||||||
|
if reasoning:
|
||||||
|
payload["reasoning"] = reasoning
|
||||||
|
payload["reasoning_content"] = reasoning
|
||||||
|
if reasoning_details:
|
||||||
|
payload["reasoning_details"] = reasoning_details
|
||||||
|
if not payload:
|
||||||
|
return None
|
||||||
|
return json.dumps(payload, ensure_ascii=False)
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def deserialize_reasoning(raw: str | None) -> dict[str, Any]:
|
||||||
|
if not raw:
|
||||||
|
return {}
|
||||||
|
try:
|
||||||
|
data = json.loads(raw)
|
||||||
|
except json.JSONDecodeError:
|
||||||
|
return {"reasoning": raw}
|
||||||
|
if isinstance(data, str):
|
||||||
|
return {"reasoning": data, "reasoning_content": data}
|
||||||
|
if isinstance(data, dict):
|
||||||
|
return data
|
||||||
|
return {}
|
||||||
|
|||||||
Reference in New Issue
Block a user