fixed reasoning
This commit is contained in:
@@ -12,6 +12,10 @@ VITE_DEV_PORT=5173
|
|||||||
OPENROUTER_API_KEY=sk-or-v1-your-key-here
|
OPENROUTER_API_KEY=sk-or-v1-your-key-here
|
||||||
OPENROUTER_MODEL=deepseek/deepseek-chat
|
OPENROUTER_MODEL=deepseek/deepseek-chat
|
||||||
OPENROUTER_BASE_URL=https://openrouter.ai/api/v1
|
OPENROUTER_BASE_URL=https://openrouter.ai/api/v1
|
||||||
|
# Для reasoning-моделей без tools: OPENROUTER_TOOLS_ENABLED=false
|
||||||
|
OPENROUTER_TOOLS_ENABLED=true
|
||||||
|
# JSON-экстракция памяти отдельной моделью (если основная капризничает):
|
||||||
|
# MEMORY_EXTRACT_MODEL=deepseek/deepseek-chat
|
||||||
|
|
||||||
# App
|
# App
|
||||||
DATABASE_URL=sqlite:///./data/assistant.db
|
DATABASE_URL=sqlite:///./data/assistant.db
|
||||||
|
|||||||
@@ -148,6 +148,9 @@ class ChatService:
|
|||||||
if event["type"] == "content":
|
if event["type"] == "content":
|
||||||
content_parts.append(event["content"])
|
content_parts.append(event["content"])
|
||||||
yield self._sse("token", {"content": event["content"]})
|
yield self._sse("token", {"content": event["content"]})
|
||||||
|
elif event["type"] == "error":
|
||||||
|
yield self._sse("error", {"message": event.get("content", "LLM error")})
|
||||||
|
return
|
||||||
elif event["type"] == "tool_calls":
|
elif event["type"] == "tool_calls":
|
||||||
tool_calls = event["tool_calls"]
|
tool_calls = event["tool_calls"]
|
||||||
|
|
||||||
@@ -193,7 +196,19 @@ class ChatService:
|
|||||||
continue
|
continue
|
||||||
|
|
||||||
final_content = "".join(content_parts)
|
final_content = "".join(content_parts)
|
||||||
if final_content:
|
if not final_content.strip():
|
||||||
|
yield self._sse(
|
||||||
|
"error",
|
||||||
|
{
|
||||||
|
"message": (
|
||||||
|
"Модель не вернула текст. Проверь OPENROUTER_MODEL: "
|
||||||
|
"нужна поддержка tool calling или отключи OPENROUTER_TOOLS_ENABLED=false. "
|
||||||
|
"Для памяти можно задать MEMORY_EXTRACT_MODEL=deepseek/deepseek-chat."
|
||||||
|
),
|
||||||
|
},
|
||||||
|
)
|
||||||
|
return
|
||||||
|
|
||||||
self._save_message(session_id, "assistant", final_content)
|
self._save_message(session_id, "assistant", final_content)
|
||||||
|
|
||||||
memory_meta: dict[str, Any] = {}
|
memory_meta: dict[str, Any] = {}
|
||||||
|
|||||||
@@ -17,6 +17,10 @@ class Settings(BaseSettings):
|
|||||||
openrouter_api_key: str = ""
|
openrouter_api_key: str = ""
|
||||||
openrouter_model: str = "deepseek/deepseek-chat"
|
openrouter_model: str = "deepseek/deepseek-chat"
|
||||||
openrouter_base_url: str = "https://openrouter.ai/api/v1"
|
openrouter_base_url: str = "https://openrouter.ai/api/v1"
|
||||||
|
# Отдельная модель для JSON-задач (память, фитнес). Пусто = та же, что OPENROUTER_MODEL.
|
||||||
|
memory_extract_model: str = ""
|
||||||
|
# Некоторые модели (reasoning / без function calling) — выключить tools.
|
||||||
|
openrouter_tools_enabled: bool = True
|
||||||
|
|
||||||
database_url: str = "sqlite:///./data/assistant.db"
|
database_url: str = "sqlite:///./data/assistant.db"
|
||||||
cors_origins: str = "http://localhost:5173,http://localhost:8080,http://localhost:3000"
|
cors_origins: str = "http://localhost:5173,http://localhost:8080,http://localhost:3000"
|
||||||
|
|||||||
@@ -1,4 +1,5 @@
|
|||||||
import json
|
import json
|
||||||
|
import logging
|
||||||
from collections.abc import AsyncIterator
|
from collections.abc import AsyncIterator
|
||||||
from typing import Any
|
from typing import Any
|
||||||
|
|
||||||
@@ -6,34 +7,58 @@ from openai import AsyncOpenAI
|
|||||||
|
|
||||||
from app.config import get_settings
|
from app.config import get_settings
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
class LLMClient:
|
class LLMClient:
|
||||||
def __init__(self) -> None:
|
def __init__(self) -> None:
|
||||||
settings = get_settings()
|
settings = get_settings()
|
||||||
self.model = settings.openrouter_model
|
self.model = settings.openrouter_model
|
||||||
|
self.tools_enabled = settings.openrouter_tools_enabled
|
||||||
self.client = AsyncOpenAI(
|
self.client = AsyncOpenAI(
|
||||||
api_key=settings.openrouter_api_key,
|
api_key=settings.openrouter_api_key,
|
||||||
base_url=settings.openrouter_base_url,
|
base_url=settings.openrouter_base_url,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
def _delta_text(self, delta: Any) -> str:
|
||||||
|
parts: list[str] = []
|
||||||
|
if getattr(delta, "content", None):
|
||||||
|
parts.append(delta.content)
|
||||||
|
# Reasoning-модели (OpenRouter / o-series) иногда пишут сюда, а не в content.
|
||||||
|
for attr in ("reasoning", "reasoning_content"):
|
||||||
|
value = getattr(delta, attr, None)
|
||||||
|
if value:
|
||||||
|
parts.append(str(value))
|
||||||
|
return "".join(parts)
|
||||||
|
|
||||||
async def stream_chat(
|
async def stream_chat(
|
||||||
self,
|
self,
|
||||||
messages: list[dict[str, Any]],
|
messages: list[dict[str, Any]],
|
||||||
tools: list[dict[str, Any]] | None = None,
|
tools: list[dict[str, Any]] | None = None,
|
||||||
|
*,
|
||||||
|
model: str | None = None,
|
||||||
) -> AsyncIterator[dict[str, Any]]:
|
) -> AsyncIterator[dict[str, Any]]:
|
||||||
|
use_tools = bool(tools) and self.tools_enabled
|
||||||
kwargs: dict[str, Any] = {
|
kwargs: dict[str, Any] = {
|
||||||
"model": self.model,
|
"model": model or self.model,
|
||||||
"messages": messages,
|
"messages": messages,
|
||||||
"stream": True,
|
"stream": True,
|
||||||
"temperature": 0.7,
|
"temperature": 0.7,
|
||||||
}
|
}
|
||||||
if tools:
|
if use_tools:
|
||||||
kwargs["tools"] = tools
|
kwargs["tools"] = tools
|
||||||
|
|
||||||
|
try:
|
||||||
stream = await self.client.chat.completions.create(**kwargs)
|
stream = await self.client.chat.completions.create(**kwargs)
|
||||||
|
except Exception as exc:
|
||||||
|
logger.exception("LLM stream failed: %s", exc)
|
||||||
|
yield {"type": "error", "content": str(exc)}
|
||||||
|
yield {"type": "done", "finish_reason": "error"}
|
||||||
|
return
|
||||||
|
|
||||||
tool_calls: dict[int, dict[str, Any]] = {}
|
tool_calls: dict[int, dict[str, Any]] = {}
|
||||||
|
|
||||||
|
try:
|
||||||
async for chunk in stream:
|
async for chunk in stream:
|
||||||
if not chunk.choices:
|
if not chunk.choices:
|
||||||
continue
|
continue
|
||||||
@@ -41,8 +66,9 @@ class LLMClient:
|
|||||||
choice = chunk.choices[0]
|
choice = chunk.choices[0]
|
||||||
delta = choice.delta
|
delta = choice.delta
|
||||||
|
|
||||||
if delta.content:
|
text = self._delta_text(delta)
|
||||||
yield {"type": "content", "content": delta.content}
|
if text:
|
||||||
|
yield {"type": "content", "content": text}
|
||||||
|
|
||||||
if delta.tool_calls:
|
if delta.tool_calls:
|
||||||
for tool_call in delta.tool_calls:
|
for tool_call in delta.tool_calls:
|
||||||
@@ -65,6 +91,10 @@ class LLMClient:
|
|||||||
if tool_calls:
|
if tool_calls:
|
||||||
yield {"type": "tool_calls", "tool_calls": list(tool_calls.values())}
|
yield {"type": "tool_calls", "tool_calls": list(tool_calls.values())}
|
||||||
yield {"type": "done", "finish_reason": choice.finish_reason}
|
yield {"type": "done", "finish_reason": choice.finish_reason}
|
||||||
|
except Exception as exc:
|
||||||
|
logger.exception("LLM stream read failed: %s", exc)
|
||||||
|
yield {"type": "error", "content": str(exc)}
|
||||||
|
yield {"type": "done", "finish_reason": "error"}
|
||||||
|
|
||||||
async def complete(
|
async def complete(
|
||||||
self,
|
self,
|
||||||
@@ -72,20 +102,28 @@ class LLMClient:
|
|||||||
tools: list[dict[str, Any]] | None = None,
|
tools: list[dict[str, Any]] | None = None,
|
||||||
*,
|
*,
|
||||||
temperature: float = 0.7,
|
temperature: float = 0.7,
|
||||||
|
model: str | None = None,
|
||||||
) -> dict[str, Any]:
|
) -> dict[str, Any]:
|
||||||
|
use_tools = bool(tools) and self.tools_enabled
|
||||||
kwargs: dict[str, Any] = {
|
kwargs: dict[str, Any] = {
|
||||||
"model": self.model,
|
"model": model or self.model,
|
||||||
"messages": messages,
|
"messages": messages,
|
||||||
"temperature": temperature,
|
"temperature": temperature,
|
||||||
}
|
}
|
||||||
if tools:
|
if use_tools:
|
||||||
kwargs["tools"] = tools
|
kwargs["tools"] = tools
|
||||||
|
|
||||||
response = await self.client.chat.completions.create(**kwargs)
|
response = await self.client.chat.completions.create(**kwargs)
|
||||||
message = response.choices[0].message
|
message = response.choices[0].message
|
||||||
|
|
||||||
|
content = message.content or ""
|
||||||
|
for attr in ("reasoning", "reasoning_content"):
|
||||||
|
value = getattr(message, attr, None)
|
||||||
|
if value and not content:
|
||||||
|
content = str(value)
|
||||||
|
|
||||||
result: dict[str, Any] = {
|
result: dict[str, Any] = {
|
||||||
"content": message.content or "",
|
"content": content,
|
||||||
"tool_calls": [],
|
"tool_calls": [],
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -5,6 +5,7 @@ from typing import Any
|
|||||||
|
|
||||||
from sqlalchemy.orm import Session
|
from sqlalchemy.orm import Session
|
||||||
|
|
||||||
|
from app.config import get_settings
|
||||||
from app.llm.client import LLMClient
|
from app.llm.client import LLMClient
|
||||||
from app.memory.service import MemoryService
|
from app.memory.service import MemoryService
|
||||||
from app.projects.structuring import strip_markdown_json
|
from app.projects.structuring import strip_markdown_json
|
||||||
@@ -61,6 +62,9 @@ async def _call_extractor(
|
|||||||
*[f"- {f.get('content')}" for f in facts[:30]],
|
*[f"- {f.get('content')}" for f in facts[:30]],
|
||||||
]
|
]
|
||||||
|
|
||||||
|
settings = get_settings()
|
||||||
|
extract_model = settings.memory_extract_model.strip() or None
|
||||||
|
|
||||||
llm = LLMClient()
|
llm = LLMClient()
|
||||||
result = await llm.complete(
|
result = await llm.complete(
|
||||||
[
|
[
|
||||||
@@ -72,11 +76,12 @@ async def _call_extractor(
|
|||||||
+ "\n\n---\nДиалог:\nПользователь: "
|
+ "\n\n---\nДиалог:\nПользователь: "
|
||||||
+ user_text
|
+ user_text
|
||||||
+ "\nАссистент: "
|
+ "\nАссистент: "
|
||||||
+ (assistant_text[:1500] if assistant_text else "(нет ответа)")
|
+ assistant_text[:1500]
|
||||||
),
|
),
|
||||||
},
|
},
|
||||||
],
|
],
|
||||||
temperature=0.2,
|
temperature=0.2,
|
||||||
|
model=extract_model,
|
||||||
)
|
)
|
||||||
raw = strip_markdown_json(result.get("content") or "")
|
raw = strip_markdown_json(result.get("content") or "")
|
||||||
if not raw:
|
if not raw:
|
||||||
@@ -98,6 +103,9 @@ async def extract_after_turn(
|
|||||||
if not force and _should_skip_extraction(user_text):
|
if not force and _should_skip_extraction(user_text):
|
||||||
return {"ok": True, "skipped": "short_message", "saved": []}
|
return {"ok": True, "skipped": "short_message", "saved": []}
|
||||||
|
|
||||||
|
if not (assistant_text or "").strip():
|
||||||
|
return {"ok": True, "skipped": "no_assistant_reply", "saved": []}
|
||||||
|
|
||||||
memory = MemoryService(db)
|
memory = MemoryService(db)
|
||||||
snapshot = memory.snapshot(session_id)
|
snapshot = memory.snapshot(session_id)
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user