fixed injection watcher

This commit is contained in:
2026-06-11 08:23:45 +03:00
parent b5a1831b8e
commit 0ccf19a1cc
3 changed files with 180 additions and 6 deletions
+71
View File
@@ -0,0 +1,71 @@
from typing import Any
def _tool_call_ids(tool_calls: list[dict[str, Any]]) -> list[str]:
return [tc.get("id", "") for tc in tool_calls if tc.get("id")]
def sanitize_openai_messages(messages: list[dict[str, Any]]) -> list[dict[str, Any]]:
"""Убирает битые tool-цепочки и подряд идущих assistant без user между ними."""
if not messages:
return messages
system = messages[0] if messages[0].get("role") == "system" else None
rest = messages[1:] if system else list(messages)
cleaned: list[dict[str, Any]] = []
i = 0
while i < len(rest):
msg = rest[i]
role = msg.get("role")
if role == "assistant" and msg.get("tool_calls"):
tool_calls = msg["tool_calls"]
needed_ids = set(_tool_call_ids(tool_calls))
if not needed_ids:
i += 1
continue
block = [msg]
i += 1
found_ids: set[str] = set()
while i < len(rest) and rest[i].get("role") == "tool":
tool_id = rest[i].get("tool_call_id", "")
if tool_id in needed_ids:
block.append(rest[i])
found_ids.add(tool_id)
i += 1
if found_ids == needed_ids:
cleaned.extend(block)
continue
if role == "tool":
# осиротевший tool без assistant tool_calls
i += 1
continue
if role == "assistant" and cleaned and cleaned[-1].get("role") == "assistant":
# два assistant подряд ломают API (старый баг pomodoro)
i += 1
continue
cleaned.append(msg)
i += 1
if system:
return [system, *cleaned]
return cleaned
def strip_historical_reasoning(messages: list[dict[str, Any]]) -> list[dict[str, Any]]:
"""Reasoning из БД часто неполный — для старых сообщений убираем."""
result: list[dict[str, Any]] = []
for msg in messages:
entry = dict(msg)
if entry.get("role") == "assistant":
entry.pop("reasoning", None)
entry.pop("reasoning_content", None)
entry.pop("reasoning_details", None)
result.append(entry)
return result
+102 -4
View File
@@ -11,6 +11,7 @@ from sqlalchemy.orm import Session
from app.config import get_settings from app.config import get_settings
from app.db.base import SessionLocal from app.db.base import SessionLocal
from app.character.service import CharacterService from app.character.service import CharacterService
from app.chat.history import sanitize_openai_messages, strip_historical_reasoning
from app.chat.notices import ( from app.chat.notices import (
POMODORO_TOOL_NAMES, POMODORO_TOOL_NAMES,
format_pomodoro_context, format_pomodoro_context,
@@ -145,6 +146,8 @@ class ChatService:
if msg.role == "tool" and msg.tool_call_id: if msg.role == "tool" and msg.tool_call_id:
entry["tool_call_id"] = msg.tool_call_id entry["tool_call_id"] = msg.tool_call_id
messages.append(entry) messages.append(entry)
messages = sanitize_openai_messages(messages)
messages = strip_historical_reasoning(messages)
return messages return messages
def _save_message( def _save_message(
@@ -175,6 +178,83 @@ class ChatService:
def save_user_message(self, session_id: int, user_text: str) -> None: def save_user_message(self, session_id: int, user_text: str) -> None:
self._save_message(session_id, "user", user_text) self._save_message(session_id, "user", user_text)
async def _fallback_complete(
self,
messages: list[dict[str, Any]],
session_id: int,
) -> tuple[str, list[str], list[dict[str, Any]]]:
"""Нестриминговый запасной путь, если stream вернул пустоту."""
logger.info("chat session=%s fallback complete", session_id)
result: dict[str, Any] = {"content": "", "tool_calls": []}
for with_tools in (True, False):
result = await self.llm.complete(
messages,
tools=TOOL_DEFINITIONS if with_tools else None,
temperature=0.5,
visible_reply=True,
)
if (result.get("content") or "").strip() or result.get("tool_calls"):
break
tool_calls = result.get("tool_calls") or []
content = (result.get("content") or "").strip()
notices: list[str] = []
pomodoro_events: list[dict[str, Any]] = []
if tool_calls:
assistant_msg: dict[str, Any] = {
"role": "assistant",
"content": content or None,
"tool_calls": tool_calls,
}
messages.append(assistant_msg)
self._save_message(
session_id,
"assistant",
content,
tool_calls=tool_calls,
)
for tool_call in tool_calls:
fn = tool_call["function"]
args = LLMClient.parse_tool_arguments(fn.get("arguments", ""))
tool_result = await execute_tool(
self.db, fn["name"], args, session_id=session_id
)
messages.append(
{
"role": "tool",
"tool_call_id": tool_call["id"],
"content": tool_result,
}
)
self._save_message(
session_id,
"tool",
tool_result,
tool_call_id=tool_call["id"],
)
notice = format_tool_notice(fn["name"], tool_result)
if notice:
self._save_message(session_id, "notice", notice)
notices.append(notice)
if fn["name"] in POMODORO_TOOL_NAMES:
pomodoro_events.append(
{"name": fn["name"], "result": json.loads(tool_result)}
)
if notices:
return "\n\n".join(notices), notices, pomodoro_events
followup = await self.llm.complete(
messages,
tools=None,
temperature=0.4,
visible_reply=True,
)
return (followup.get("content") or "").strip(), notices, pomodoro_events
return content, notices, pomodoro_events
async def stream_response( async def stream_response(
self, self,
session_id: int, session_id: int,
@@ -209,6 +289,7 @@ class ChatService:
tool_calls: list[dict[str, Any]] = [] tool_calls: list[dict[str, Any]] = []
reasoning = "" reasoning = ""
reasoning_details: list[Any] | None = None reasoning_details: list[Any] | None = None
finish_reason = ""
async for event in self.llm.stream_chat(messages, tools=TOOL_DEFINITIONS): async for event in self.llm.stream_chat(messages, tools=TOOL_DEFINITIONS):
if event["type"] == "content": if event["type"] == "content":
@@ -230,16 +311,20 @@ class ChatService:
return return
elif event["type"] == "tool_calls": elif event["type"] == "tool_calls":
tool_calls = event["tool_calls"] tool_calls = event["tool_calls"]
elif event["type"] == "done":
finish_reason = event.get("finish_reason", "")
logger.info( logger.info(
"chat session=%s round=%d prepare=%.2fs llm=%.2fs " "chat session=%s round=%d prepare=%.2fs llm=%.2fs "
"content_len=%d tool_calls=%d", "content_len=%d tool_calls=%d finish_reason=%s reasoning_len=%d",
session_id, session_id,
tool_round, tool_round,
prepare_sec, prepare_sec,
time.monotonic() - t_round, time.monotonic() - t_round,
len("".join(content_parts)), len("".join(content_parts)),
len(tool_calls), len(tool_calls),
finish_reason,
len(reasoning),
) )
if tool_calls: if tool_calls:
@@ -321,19 +406,32 @@ class ChatService:
final_content = (retry.get("content") or "").strip() final_content = (retry.get("content") or "").strip()
if final_content: if final_content:
yield self._sse("token", {"content": final_content}) yield self._sse("token", {"content": final_content})
if not final_content:
final_content, fb_notices, fb_pomodoro = await self._fallback_complete(
messages, session_id
)
if final_content:
yield self._sse("token", {"content": final_content})
for notice in fb_notices:
yield self._sse("notice", {"content": notice})
for event in fb_pomodoro:
yield self._sse("pomodoro", event)
if not final_content: if not final_content:
logger.warning( logger.warning(
"chat session=%s empty_reply tools=%d rounds=%d", "chat session=%s empty_reply tools=%d rounds=%d finish_reason=%s",
session_id, session_id,
tools_executed, tools_executed,
tool_round, tool_round,
finish_reason,
) )
yield self._sse( yield self._sse(
"error", "error",
{ {
"message": ( "message": (
"Модель не вернула текст после выполнения команд. " "Модель не вернула ответ (finish_reason="
"Проверь OPENROUTER_MODEL и OPENROUTER_REASONING_EFFORT=none." f"{finish_reason or 'unknown'}). "
"Попробуй новый чат или проверь OPENROUTER_MODEL."
), ),
}, },
) )
+7 -2
View File
@@ -24,6 +24,8 @@ class LLMClient:
def _reasoning_extra_body(self) -> dict[str, Any] | None: def _reasoning_extra_body(self) -> dict[str, Any] | None:
if not self.reasoning_effort: if not self.reasoning_effort:
return None return None
if self.reasoning_effort == "none":
return {"reasoning": {"effort": "none", "exclude": True}}
return {"reasoning": {"effort": self.reasoning_effort}} return {"reasoning": {"effort": self.reasoning_effort}}
@staticmethod @staticmethod
@@ -151,10 +153,13 @@ class LLMClient:
} }
if tool_calls: if tool_calls:
yield {"type": "tool_calls", "tool_calls": list(tool_calls.values())} yield {"type": "tool_calls", "tool_calls": list(tool_calls.values())}
logger.debug( logger.info(
"LLM stream done: finish_reason=%s tool_calls=%d reasoning_len=%d", "LLM stream done: model=%s finish_reason=%s tool_calls=%d "
"content_in_stream=%d reasoning_len=%d",
model or self.model,
choice.finish_reason, choice.finish_reason,
len(tool_calls), len(tool_calls),
len(reasoning_parts),
len(reasoning), len(reasoning),
) )
yield {"type": "done", "finish_reason": choice.finish_reason} yield {"type": "done", "finish_reason": choice.finish_reason}