added RAG, Multiuser, TG bot

This commit is contained in:
2026-06-13 20:20:56 +00:00
parent 66e1b0e29e
commit c8a9429bed
142 changed files with 19901 additions and 8790 deletions
+468
View File
@@ -0,0 +1,468 @@
import asyncio
import json
import logging
import time
from collections.abc import AsyncIterator
from typing import Any
from sqlalchemy import select
from sqlalchemy.orm import Session
from app.config import get_settings
from app.db.base import SessionLocal
from app.character.service import CharacterService
from app.chat.history import sanitize_openai_messages, strip_historical_reasoning
from app.chat.notice_inbox import DISPLAY_ONLY_ROLES
from app.chat.notices import (
POMODORO_TOOL_NAMES,
format_pomodoro_context,
format_tool_notice,
)
from app.fitness.context import format_fitness_context, get_fitness_snapshot
from app.homelab.context import format_datetime_context
from app.homelab.openmeteo import format_weather_snapshot
from app.memory.context import (
format_identity_hint,
format_memory_context,
get_memory_snapshot,
)
from app.memory.extract import extract_after_turn
from app.projects.context import format_projects_context, get_projects_snapshot
from app.reminders.context import format_reminders_context, get_reminders_snapshot
from app.shopping.context import format_shopping_context, get_shopping_snapshot
from app.db.models import ChatSession, Message
from app.llm.client import LLMClient
from app.pomodoro.service import PomodoroService
from app.tools.registry import TOOL_DEFINITIONS, execute_tool
MAX_TOOL_ROUNDS = 5
MAX_HISTORY_MESSAGES = 40
logger = logging.getLogger(__name__)
def _build_messages_for_session(session_id: int) -> list[dict[str, Any]]:
db = SessionLocal()
try:
service = ChatService(db)
session = service.get_session(session_id)
if not session:
return []
return service._build_messages(session)
finally:
db.close()
async def _extract_memory_background(
session_id: int,
user_text: str,
assistant_text: str,
) -> None:
db = SessionLocal()
try:
await extract_after_turn(db, session_id, user_text, assistant_text)
except Exception as exc:
logger.warning("Background memory extraction failed: %s", exc)
finally:
db.close()
class ChatService:
def __init__(self, db: Session):
self.db = db
self.llm = LLMClient()
self.character = CharacterService()
def list_sessions(self) -> list[ChatSession]:
stmt = select(ChatSession).order_by(ChatSession.updated_at.desc())
return list(self.db.scalars(stmt).all())
def get_session(self, session_id: int) -> ChatSession | None:
return self.db.get(ChatSession, session_id)
def create_session(self, title: str = "Новый чат") -> ChatSession:
session = ChatSession(title=title)
self.db.add(session)
self.db.commit()
self.db.refresh(session)
return session
def delete_session(self, session_id: int) -> bool:
session = self.get_session(session_id)
if not session:
return False
self.db.delete(session)
self.db.commit()
return True
def _build_system_prompt(self, session_id: int | None = None) -> str:
status = PomodoroService(self.db).get_status()
memory_snapshot = get_memory_snapshot(self.db, session_id)
fitness_snapshot = get_fitness_snapshot(self.db)
shopping_snapshot = get_shopping_snapshot(self.db)
reminders_snapshot = get_reminders_snapshot(self.db)
projects_snapshot = get_projects_snapshot(self.db)
return (
f"{self.character.get_system_prompt()}\n\n"
f"{format_datetime_context(self.db)}\n\n"
f"{format_memory_context(memory_snapshot)}\n\n"
f"{format_fitness_context(fitness_snapshot)}\n\n"
f"{format_shopping_context(shopping_snapshot)}\n\n"
f"{format_reminders_context(reminders_snapshot)}\n\n"
f"{format_weather_snapshot()}\n\n"
f"{format_pomodoro_context(status)}\n\n"
f"{format_projects_context(projects_snapshot)}"
)
def _build_messages(self, session: ChatSession) -> list[dict[str, Any]]:
system_prompt = self._build_system_prompt(session.id)
all_chat = [m for m in session.messages if m.role not in DISPLAY_ONLY_ROLES]
last_user = next((m.content for m in reversed(all_chat) if m.role == "user"), "")
if last_user:
memory_snapshot = get_memory_snapshot(self.db, session.id)
identity_hint = format_identity_hint(memory_snapshot, last_user)
if identity_hint:
system_prompt += f"\n\n{identity_hint}"
if len(all_chat) > MAX_HISTORY_MESSAGES:
system_prompt += (
f"\n\n[История чата: в контексте последние {MAX_HISTORY_MESSAGES} "
f"из {len(all_chat)} сообщений. Раннее — в сводке сессии, если сохранена.]"
)
messages: list[dict[str, Any]] = [
{"role": "system", "content": system_prompt}
]
chat_messages = all_chat[-MAX_HISTORY_MESSAGES:] if len(all_chat) > MAX_HISTORY_MESSAGES else all_chat
for msg in chat_messages:
content = msg.content or None
entry: dict[str, Any] = {"role": msg.role, "content": content}
if msg.tool_calls_json:
entry["tool_calls"] = json.loads(msg.tool_calls_json)
if not content:
entry["content"] = None
reasoning_data = LLMClient.deserialize_reasoning(msg.reasoning_json)
if reasoning_data:
LLMClient.attach_reasoning_to_message(
entry,
reasoning=reasoning_data.get("reasoning", ""),
reasoning_details=reasoning_data.get("reasoning_details"),
)
if msg.role == "tool" and msg.tool_call_id:
entry["tool_call_id"] = msg.tool_call_id
messages.append(entry)
messages = sanitize_openai_messages(messages)
messages = strip_historical_reasoning(messages)
return messages
def _save_message(
self,
session_id: int,
role: str,
content: str = "",
tool_calls: list[dict[str, Any]] | None = None,
tool_call_id: str | None = None,
reasoning_json: str | None = None,
) -> Message:
message = Message(
session_id=session_id,
role=role,
content=content,
tool_calls_json=json.dumps(tool_calls, ensure_ascii=False) if tool_calls else None,
reasoning_json=reasoning_json,
tool_call_id=tool_call_id,
)
self.db.add(message)
session = self.get_session(session_id)
if session and role == "user" and session.title == "Новый чат" and content:
session.title = content[:60] + ("..." if len(content) > 60 else "")
self.db.commit()
self.db.refresh(message)
return message
def save_user_message(self, session_id: int, user_text: str) -> None:
self._save_message(session_id, "user", user_text)
async def _fallback_complete(
self,
messages: list[dict[str, Any]],
session_id: int,
) -> tuple[str, list[str], list[dict[str, Any]]]:
"""Нестриминговый запасной путь, если stream вернул пустоту."""
logger.info("chat session=%s fallback complete", session_id)
result: dict[str, Any] = {"content": "", "tool_calls": []}
for with_tools in (True, False):
result = await self.llm.complete(
messages,
tools=TOOL_DEFINITIONS if with_tools else None,
temperature=0.5,
visible_reply=True,
)
if (result.get("content") or "").strip() or result.get("tool_calls"):
break
tool_calls = result.get("tool_calls") or []
content = (result.get("content") or "").strip()
notices: list[str] = []
pomodoro_events: list[dict[str, Any]] = []
if tool_calls:
assistant_msg: dict[str, Any] = {
"role": "assistant",
"content": content or None,
"tool_calls": tool_calls,
}
messages.append(assistant_msg)
self._save_message(
session_id,
"assistant",
content,
tool_calls=tool_calls,
)
for tool_call in tool_calls:
fn = tool_call["function"]
args = LLMClient.parse_tool_arguments(fn.get("arguments", ""))
tool_result = await execute_tool(
self.db, fn["name"], args, session_id=session_id
)
messages.append(
{
"role": "tool",
"tool_call_id": tool_call["id"],
"content": tool_result,
}
)
self._save_message(
session_id,
"tool",
tool_result,
tool_call_id=tool_call["id"],
)
notice = format_tool_notice(fn["name"], tool_result)
if notice:
self._save_message(session_id, "notice", notice)
notices.append(notice)
if fn["name"] in POMODORO_TOOL_NAMES:
pomodoro_events.append(
{"name": fn["name"], "result": json.loads(tool_result)}
)
followup = await self.llm.complete(
messages,
tools=None,
temperature=0.4,
visible_reply=True,
)
return (followup.get("content") or "").strip(), notices, pomodoro_events
return content, notices, pomodoro_events
async def stream_response(
self,
session_id: int,
user_text: str,
*,
user_message_saved: bool = False,
) -> AsyncIterator[str]:
session = self.get_session(session_id)
if not session:
yield self._sse("error", {"message": "Session not found"})
return
if not user_message_saved:
self._save_message(session_id, "user", user_text)
yield self._sse("status", {"phase": "preparing"})
t0 = time.monotonic()
messages = await asyncio.to_thread(_build_messages_for_session, session_id)
prepare_sec = time.monotonic() - t0
if not messages:
yield self._sse("error", {"message": "Session not found"})
return
yield self._sse("status", {"phase": "generating"})
streamed_reply_parts: list[str] = []
all_tool_notices: list[str] = []
tools_executed = 0
tool_round = 0
for _ in range(MAX_TOOL_ROUNDS):
tool_round += 1
t_round = time.monotonic()
content_parts: list[str] = []
tool_calls: list[dict[str, Any]] = []
reasoning = ""
reasoning_details: list[Any] | None = None
finish_reason = ""
# После tool-раунда стримим вживую; до tools — буфер (иначе текст «переписывает» notice).
stream_live = tools_executed > 0
async for event in self.llm.stream_chat(messages, tools=TOOL_DEFINITIONS):
if event["type"] == "content":
content_parts.append(event["content"])
if stream_live:
yield self._sse("token", {"content": event["content"]})
elif event["type"] == "reasoning":
reasoning = event.get("reasoning", "") or reasoning
if event.get("reasoning_details"):
reasoning_details = event["reasoning_details"]
elif event["type"] == "error":
logger.warning(
"chat session=%s llm_error round=%d prepare=%.2fs: %s",
session_id,
tool_round,
prepare_sec,
event.get("content"),
)
yield self._sse("error", {"message": event.get("content", "LLM error")})
return
elif event["type"] == "tool_calls":
tool_calls = event["tool_calls"]
elif event["type"] == "done":
finish_reason = event.get("finish_reason", "")
logger.info(
"chat session=%s round=%d prepare=%.2fs llm=%.2fs "
"content_len=%d tool_calls=%d finish_reason=%s reasoning_len=%d",
session_id,
tool_round,
prepare_sec,
time.monotonic() - t_round,
len("".join(content_parts)),
len(tool_calls),
finish_reason,
len(reasoning),
)
if tool_calls:
round_text = "".join(content_parts)
if round_text.strip():
streamed_reply_parts.append(round_text)
assistant_msg: dict[str, Any] = {
"role": "assistant",
"content": round_text or None,
"tool_calls": tool_calls,
}
LLMClient.attach_reasoning_to_message(
assistant_msg,
reasoning=reasoning,
reasoning_details=reasoning_details,
)
reasoning_json = LLMClient.serialize_reasoning(
reasoning=reasoning,
reasoning_details=reasoning_details,
)
messages.append(assistant_msg)
self._save_message(
session_id,
"assistant",
round_text,
tool_calls=tool_calls,
reasoning_json=reasoning_json,
)
round_notices: list[str] = []
for tool_call in tool_calls:
fn = tool_call["function"]
args = LLMClient.parse_tool_arguments(fn.get("arguments", ""))
result = await execute_tool(
self.db, fn["name"], args, session_id=session_id
)
tools_executed += 1
tool_message = {
"role": "tool",
"tool_call_id": tool_call["id"],
"content": result,
}
messages.append(tool_message)
self._save_message(session_id, "tool", result, tool_call_id=tool_call["id"])
notice = format_tool_notice(fn["name"], result)
if notice:
self._save_message(session_id, "notice", notice)
round_notices.append(notice)
all_tool_notices.append(notice)
if fn["name"] in POMODORO_TOOL_NAMES:
yield self._sse(
"pomodoro",
{"name": fn["name"], "result": json.loads(result)},
)
yield self._sse("status", {"phase": "tools"})
for notice in round_notices:
yield self._sse("notice", {"content": notice})
continue
if content_parts and not stream_live:
for part in content_parts:
yield self._sse("token", {"content": part})
final_content = "".join(content_parts).strip()
if not final_content and streamed_reply_parts and tools_executed == 0:
final_content = "".join(streamed_reply_parts).strip()
if not final_content and reasoning:
final_content = reasoning.strip()
if not final_content and tools_executed:
retry = await self.llm.complete(
messages,
tools=None,
temperature=0.4,
visible_reply=True,
)
final_content = (retry.get("content") or "").strip()
if final_content:
yield self._sse("token", {"content": final_content})
# Notices уже в чате как role=notice — не дублируем в assistant.
if not final_content:
final_content, fb_notices, fb_pomodoro = await self._fallback_complete(
messages, session_id
)
if final_content:
yield self._sse("token", {"content": final_content})
for notice in fb_notices:
yield self._sse("notice", {"content": notice})
for event in fb_pomodoro:
yield self._sse("pomodoro", event)
if not final_content:
logger.warning(
"chat session=%s empty_reply tools=%d rounds=%d finish_reason=%s",
session_id,
tools_executed,
tool_round,
finish_reason,
)
yield self._sse(
"error",
{
"message": (
"Модель не вернула ответ (finish_reason="
f"{finish_reason or 'unknown'}). "
"Попробуй новый чат или проверь OPENROUTER_MODEL."
),
},
)
return
self._save_message(session_id, "assistant", final_content)
logger.info(
"chat session=%s done tools=%d reply_len=%d total=%.2fs",
session_id,
tools_executed,
len(final_content),
time.monotonic() - t0,
)
yield self._sse("done", {})
if get_settings().memory_auto_extract:
asyncio.create_task(
_extract_memory_background(session_id, user_text, final_content)
)
return
yield self._sse("error", {"message": "Too many tool call rounds"})
@staticmethod
def _sse(event: str, data: dict[str, Any]) -> str:
return f"event: {event}\ndata: {json.dumps(data, ensure_ascii=False)}\n\n"