From 0ccf19a1cc593fffda06dbcf75c8797aa4dbc3c1 Mon Sep 17 00:00:00 2001 From: grigo Date: Thu, 11 Jun 2026 08:23:45 +0300 Subject: [PATCH] fixed injection watcher --- backend/app/chat/history.py | 71 ++++++++++++++++++++++++ backend/app/chat/service.py | 106 ++++++++++++++++++++++++++++++++++-- backend/app/llm/client.py | 9 ++- 3 files changed, 180 insertions(+), 6 deletions(-) create mode 100644 backend/app/chat/history.py diff --git a/backend/app/chat/history.py b/backend/app/chat/history.py new file mode 100644 index 0000000..09b8941 --- /dev/null +++ b/backend/app/chat/history.py @@ -0,0 +1,71 @@ +from typing import Any + + +def _tool_call_ids(tool_calls: list[dict[str, Any]]) -> list[str]: + return [tc.get("id", "") for tc in tool_calls if tc.get("id")] + + +def sanitize_openai_messages(messages: list[dict[str, Any]]) -> list[dict[str, Any]]: + """Убирает битые tool-цепочки и подряд идущих assistant без user между ними.""" + if not messages: + return messages + + system = messages[0] if messages[0].get("role") == "system" else None + rest = messages[1:] if system else list(messages) + + cleaned: list[dict[str, Any]] = [] + i = 0 + while i < len(rest): + msg = rest[i] + role = msg.get("role") + + if role == "assistant" and msg.get("tool_calls"): + tool_calls = msg["tool_calls"] + needed_ids = set(_tool_call_ids(tool_calls)) + if not needed_ids: + i += 1 + continue + + block = [msg] + i += 1 + found_ids: set[str] = set() + while i < len(rest) and rest[i].get("role") == "tool": + tool_id = rest[i].get("tool_call_id", "") + if tool_id in needed_ids: + block.append(rest[i]) + found_ids.add(tool_id) + i += 1 + + if found_ids == needed_ids: + cleaned.extend(block) + continue + + if role == "tool": + # осиротевший tool без assistant tool_calls + i += 1 + continue + + if role == "assistant" and cleaned and cleaned[-1].get("role") == "assistant": + # два assistant подряд ломают API (старый баг pomodoro) + i += 1 + continue + + cleaned.append(msg) + i += 1 + + if system: + return [system, *cleaned] + return cleaned + + +def strip_historical_reasoning(messages: list[dict[str, Any]]) -> list[dict[str, Any]]: + """Reasoning из БД часто неполный — для старых сообщений убираем.""" + result: list[dict[str, Any]] = [] + for msg in messages: + entry = dict(msg) + if entry.get("role") == "assistant": + entry.pop("reasoning", None) + entry.pop("reasoning_content", None) + entry.pop("reasoning_details", None) + result.append(entry) + return result diff --git a/backend/app/chat/service.py b/backend/app/chat/service.py index ae7f028..2876ce3 100644 --- a/backend/app/chat/service.py +++ b/backend/app/chat/service.py @@ -11,6 +11,7 @@ from sqlalchemy.orm import Session from app.config import get_settings from app.db.base import SessionLocal from app.character.service import CharacterService +from app.chat.history import sanitize_openai_messages, strip_historical_reasoning from app.chat.notices import ( POMODORO_TOOL_NAMES, format_pomodoro_context, @@ -145,6 +146,8 @@ class ChatService: if msg.role == "tool" and msg.tool_call_id: entry["tool_call_id"] = msg.tool_call_id messages.append(entry) + messages = sanitize_openai_messages(messages) + messages = strip_historical_reasoning(messages) return messages def _save_message( @@ -175,6 +178,83 @@ class ChatService: def save_user_message(self, session_id: int, user_text: str) -> None: self._save_message(session_id, "user", user_text) + async def _fallback_complete( + self, + messages: list[dict[str, Any]], + session_id: int, + ) -> tuple[str, list[str], list[dict[str, Any]]]: + """Нестриминговый запасной путь, если stream вернул пустоту.""" + logger.info("chat session=%s fallback complete", session_id) + result: dict[str, Any] = {"content": "", "tool_calls": []} + for with_tools in (True, False): + result = await self.llm.complete( + messages, + tools=TOOL_DEFINITIONS if with_tools else None, + temperature=0.5, + visible_reply=True, + ) + if (result.get("content") or "").strip() or result.get("tool_calls"): + break + + tool_calls = result.get("tool_calls") or [] + content = (result.get("content") or "").strip() + notices: list[str] = [] + pomodoro_events: list[dict[str, Any]] = [] + + if tool_calls: + assistant_msg: dict[str, Any] = { + "role": "assistant", + "content": content or None, + "tool_calls": tool_calls, + } + messages.append(assistant_msg) + self._save_message( + session_id, + "assistant", + content, + tool_calls=tool_calls, + ) + for tool_call in tool_calls: + fn = tool_call["function"] + args = LLMClient.parse_tool_arguments(fn.get("arguments", "")) + tool_result = await execute_tool( + self.db, fn["name"], args, session_id=session_id + ) + messages.append( + { + "role": "tool", + "tool_call_id": tool_call["id"], + "content": tool_result, + } + ) + self._save_message( + session_id, + "tool", + tool_result, + tool_call_id=tool_call["id"], + ) + notice = format_tool_notice(fn["name"], tool_result) + if notice: + self._save_message(session_id, "notice", notice) + notices.append(notice) + if fn["name"] in POMODORO_TOOL_NAMES: + pomodoro_events.append( + {"name": fn["name"], "result": json.loads(tool_result)} + ) + + if notices: + return "\n\n".join(notices), notices, pomodoro_events + + followup = await self.llm.complete( + messages, + tools=None, + temperature=0.4, + visible_reply=True, + ) + return (followup.get("content") or "").strip(), notices, pomodoro_events + + return content, notices, pomodoro_events + async def stream_response( self, session_id: int, @@ -209,6 +289,7 @@ class ChatService: tool_calls: list[dict[str, Any]] = [] reasoning = "" reasoning_details: list[Any] | None = None + finish_reason = "" async for event in self.llm.stream_chat(messages, tools=TOOL_DEFINITIONS): if event["type"] == "content": @@ -230,16 +311,20 @@ class ChatService: return elif event["type"] == "tool_calls": tool_calls = event["tool_calls"] + elif event["type"] == "done": + finish_reason = event.get("finish_reason", "") logger.info( "chat session=%s round=%d prepare=%.2fs llm=%.2fs " - "content_len=%d tool_calls=%d", + "content_len=%d tool_calls=%d finish_reason=%s reasoning_len=%d", session_id, tool_round, prepare_sec, time.monotonic() - t_round, len("".join(content_parts)), len(tool_calls), + finish_reason, + len(reasoning), ) if tool_calls: @@ -321,19 +406,32 @@ class ChatService: final_content = (retry.get("content") or "").strip() if final_content: yield self._sse("token", {"content": final_content}) + if not final_content: + final_content, fb_notices, fb_pomodoro = await self._fallback_complete( + messages, session_id + ) + if final_content: + yield self._sse("token", {"content": final_content}) + for notice in fb_notices: + yield self._sse("notice", {"content": notice}) + for event in fb_pomodoro: + yield self._sse("pomodoro", event) + if not final_content: logger.warning( - "chat session=%s empty_reply tools=%d rounds=%d", + "chat session=%s empty_reply tools=%d rounds=%d finish_reason=%s", session_id, tools_executed, tool_round, + finish_reason, ) yield self._sse( "error", { "message": ( - "Модель не вернула текст после выполнения команд. " - "Проверь OPENROUTER_MODEL и OPENROUTER_REASONING_EFFORT=none." + "Модель не вернула ответ (finish_reason=" + f"{finish_reason or 'unknown'}). " + "Попробуй новый чат или проверь OPENROUTER_MODEL." ), }, ) diff --git a/backend/app/llm/client.py b/backend/app/llm/client.py index 2e522a7..7c67fc8 100644 --- a/backend/app/llm/client.py +++ b/backend/app/llm/client.py @@ -24,6 +24,8 @@ class LLMClient: def _reasoning_extra_body(self) -> dict[str, Any] | None: if not self.reasoning_effort: return None + if self.reasoning_effort == "none": + return {"reasoning": {"effort": "none", "exclude": True}} return {"reasoning": {"effort": self.reasoning_effort}} @staticmethod @@ -151,10 +153,13 @@ class LLMClient: } if tool_calls: yield {"type": "tool_calls", "tool_calls": list(tool_calls.values())} - logger.debug( - "LLM stream done: finish_reason=%s tool_calls=%d reasoning_len=%d", + logger.info( + "LLM stream done: model=%s finish_reason=%s tool_calls=%d " + "content_in_stream=%d reasoning_len=%d", + model or self.model, choice.finish_reason, len(tool_calls), + len(reasoning_parts), len(reasoning), ) yield {"type": "done", "finish_reason": choice.finish_reason}