added RAG, Multiuser, TG bot

This commit is contained in:
2026-06-13 20:20:56 +00:00
parent 66e1b0e29e
commit c8a9429bed
142 changed files with 19901 additions and 8790 deletions
+152
View File
@@ -0,0 +1,152 @@
from __future__ import annotations
import hashlib
from datetime import datetime, timezone
from pathlib import Path
from typing import Any
from qdrant_client.http import models as qm
from sqlalchemy import select
from sqlalchemy.orm import Session
from app.config import get_settings
from app.db.models import ChatSession, Document, DocumentChunk, MemoryFact
from app.rag import embeddings
from app.rag.chunker import chunk_text
from app.rag.store import (
COLLECTION_DOC_CHUNKS,
COLLECTION_FACTS,
COLLECTION_SUMMARIES,
delete_by_filter,
upsert_points,
)
async def index_memory_fact(fact: MemoryFact) -> None:
settings = get_settings()
if not settings.rag_enabled or not fact.active:
return
vectors = await embeddings.embed_texts([fact.content])
if not vectors:
return
upsert_points(
COLLECTION_FACTS,
[
qm.PointStruct(
id=int(fact.id),
vector=vectors[0],
payload={
"user_id": fact.user_id,
"fact_id": fact.id,
"category": fact.category,
"content": fact.content,
"importance": fact.importance,
},
)
],
)
async def deactivate_memory_fact(fact_id: int) -> None:
settings = get_settings()
if not settings.rag_enabled:
return
delete_by_filter(
COLLECTION_FACTS,
[qm.FieldCondition(key="fact_id", match=qm.MatchValue(value=fact_id))],
)
async def index_session_summary(session_id: int, summary: str) -> None:
settings = get_settings()
if not settings.rag_enabled or not summary.strip():
return
from app.db.base import SessionLocal
user_id = 1
db = SessionLocal()
try:
session = db.get(ChatSession, session_id)
if session:
user_id = session.user_id
finally:
db.close()
vectors = await embeddings.embed_texts([summary])
if not vectors:
return
upsert_points(
COLLECTION_SUMMARIES,
[
qm.PointStruct(
id=int(session_id),
vector=vectors[0],
payload={"user_id": user_id, "session_id": session_id, "summary": summary[:4000]},
)
],
)
async def ingest_document_file(
db: Session,
*,
user_id: int,
title: str,
filename: str,
raw_bytes: bytes,
) -> dict[str, Any]:
settings = get_settings()
text = raw_bytes.decode("utf-8", errors="replace").strip()
if not text:
raise ValueError("Пустой документ")
digest = hashlib.sha256(raw_bytes).hexdigest()
doc = Document(
user_id=user_id,
title=title or filename,
filename=filename,
content_hash=digest,
size_bytes=len(raw_bytes),
)
db.add(doc)
db.flush()
chunks = chunk_text(text)
chunk_rows: list[DocumentChunk] = []
for idx, piece in enumerate(chunks):
row = DocumentChunk(document_id=doc.id, chunk_index=idx, content=piece)
db.add(row)
chunk_rows.append(row)
db.commit()
db.refresh(doc)
if settings.rag_enabled and chunks:
vectors = await embeddings.embed_texts(chunks)
points: list[qm.PointStruct] = []
for row, vector in zip(chunk_rows, vectors, strict=False):
db.refresh(row)
point_id = int(row.id)
points.append(
qm.PointStruct(
id=point_id,
vector=vector,
payload={
"user_id": user_id,
"document_id": doc.id,
"chunk_id": row.id,
"chunk_index": row.chunk_index,
"title": doc.title,
"content": row.content,
},
)
)
upsert_points(COLLECTION_DOC_CHUNKS, points)
return {
"id": doc.id,
"title": doc.title,
"filename": doc.filename,
"chunk_count": len(chunks),
"size_bytes": doc.size_bytes,
"created_at": doc.created_at.isoformat() if doc.created_at else None,
}