added RAG, Multiuser, TG bot
This commit is contained in:
@@ -0,0 +1,152 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import hashlib
|
||||
from datetime import datetime, timezone
|
||||
from pathlib import Path
|
||||
from typing import Any
|
||||
|
||||
from qdrant_client.http import models as qm
|
||||
from sqlalchemy import select
|
||||
from sqlalchemy.orm import Session
|
||||
|
||||
from app.config import get_settings
|
||||
from app.db.models import ChatSession, Document, DocumentChunk, MemoryFact
|
||||
from app.rag import embeddings
|
||||
from app.rag.chunker import chunk_text
|
||||
from app.rag.store import (
|
||||
COLLECTION_DOC_CHUNKS,
|
||||
COLLECTION_FACTS,
|
||||
COLLECTION_SUMMARIES,
|
||||
delete_by_filter,
|
||||
upsert_points,
|
||||
)
|
||||
|
||||
|
||||
async def index_memory_fact(fact: MemoryFact) -> None:
|
||||
settings = get_settings()
|
||||
if not settings.rag_enabled or not fact.active:
|
||||
return
|
||||
vectors = await embeddings.embed_texts([fact.content])
|
||||
if not vectors:
|
||||
return
|
||||
upsert_points(
|
||||
COLLECTION_FACTS,
|
||||
[
|
||||
qm.PointStruct(
|
||||
id=int(fact.id),
|
||||
vector=vectors[0],
|
||||
payload={
|
||||
"user_id": fact.user_id,
|
||||
"fact_id": fact.id,
|
||||
"category": fact.category,
|
||||
"content": fact.content,
|
||||
"importance": fact.importance,
|
||||
},
|
||||
)
|
||||
],
|
||||
)
|
||||
|
||||
|
||||
async def deactivate_memory_fact(fact_id: int) -> None:
|
||||
settings = get_settings()
|
||||
if not settings.rag_enabled:
|
||||
return
|
||||
delete_by_filter(
|
||||
COLLECTION_FACTS,
|
||||
[qm.FieldCondition(key="fact_id", match=qm.MatchValue(value=fact_id))],
|
||||
)
|
||||
|
||||
|
||||
async def index_session_summary(session_id: int, summary: str) -> None:
|
||||
settings = get_settings()
|
||||
if not settings.rag_enabled or not summary.strip():
|
||||
return
|
||||
from app.db.base import SessionLocal
|
||||
|
||||
user_id = 1
|
||||
db = SessionLocal()
|
||||
try:
|
||||
session = db.get(ChatSession, session_id)
|
||||
if session:
|
||||
user_id = session.user_id
|
||||
finally:
|
||||
db.close()
|
||||
|
||||
vectors = await embeddings.embed_texts([summary])
|
||||
if not vectors:
|
||||
return
|
||||
upsert_points(
|
||||
COLLECTION_SUMMARIES,
|
||||
[
|
||||
qm.PointStruct(
|
||||
id=int(session_id),
|
||||
vector=vectors[0],
|
||||
payload={"user_id": user_id, "session_id": session_id, "summary": summary[:4000]},
|
||||
)
|
||||
],
|
||||
)
|
||||
|
||||
|
||||
async def ingest_document_file(
|
||||
db: Session,
|
||||
*,
|
||||
user_id: int,
|
||||
title: str,
|
||||
filename: str,
|
||||
raw_bytes: bytes,
|
||||
) -> dict[str, Any]:
|
||||
settings = get_settings()
|
||||
text = raw_bytes.decode("utf-8", errors="replace").strip()
|
||||
if not text:
|
||||
raise ValueError("Пустой документ")
|
||||
|
||||
digest = hashlib.sha256(raw_bytes).hexdigest()
|
||||
doc = Document(
|
||||
user_id=user_id,
|
||||
title=title or filename,
|
||||
filename=filename,
|
||||
content_hash=digest,
|
||||
size_bytes=len(raw_bytes),
|
||||
)
|
||||
db.add(doc)
|
||||
db.flush()
|
||||
|
||||
chunks = chunk_text(text)
|
||||
chunk_rows: list[DocumentChunk] = []
|
||||
for idx, piece in enumerate(chunks):
|
||||
row = DocumentChunk(document_id=doc.id, chunk_index=idx, content=piece)
|
||||
db.add(row)
|
||||
chunk_rows.append(row)
|
||||
db.commit()
|
||||
db.refresh(doc)
|
||||
|
||||
if settings.rag_enabled and chunks:
|
||||
vectors = await embeddings.embed_texts(chunks)
|
||||
points: list[qm.PointStruct] = []
|
||||
for row, vector in zip(chunk_rows, vectors, strict=False):
|
||||
db.refresh(row)
|
||||
point_id = int(row.id)
|
||||
points.append(
|
||||
qm.PointStruct(
|
||||
id=point_id,
|
||||
vector=vector,
|
||||
payload={
|
||||
"user_id": user_id,
|
||||
"document_id": doc.id,
|
||||
"chunk_id": row.id,
|
||||
"chunk_index": row.chunk_index,
|
||||
"title": doc.title,
|
||||
"content": row.content,
|
||||
},
|
||||
)
|
||||
)
|
||||
upsert_points(COLLECTION_DOC_CHUNKS, points)
|
||||
|
||||
return {
|
||||
"id": doc.id,
|
||||
"title": doc.title,
|
||||
"filename": doc.filename,
|
||||
"chunk_count": len(chunks),
|
||||
"size_bytes": doc.size_bytes,
|
||||
"created_at": doc.created_at.isoformat() if doc.created_at else None,
|
||||
}
|
||||
Reference in New Issue
Block a user