from __future__ import annotations import hashlib from datetime import datetime, timezone from pathlib import Path from typing import Any from qdrant_client.http import models as qm from sqlalchemy import select from sqlalchemy.orm import Session from app.config import get_settings from app.db.models import ChatSession, Document, DocumentChunk from app.rag import embeddings from app.rag.chunker import chunk_text from app.rag.store import ( COLLECTION_DOC_CHUNKS, COLLECTION_FACTS, COLLECTION_SUMMARIES, delete_by_filter, upsert_points, ) async def index_memory_fact( *, fact_id: int, user_id: int, content: str, category: str, importance: int, active: bool = True, ) -> None: settings = get_settings() if not settings.rag_enabled or not active: return vectors = await embeddings.embed_texts([content]) if not vectors: return upsert_points( COLLECTION_FACTS, [ qm.PointStruct( id=int(fact_id), vector=vectors[0], payload={ "user_id": user_id, "fact_id": fact_id, "category": category, "content": content, "importance": importance, }, ) ], ) async def deactivate_memory_fact(fact_id: int) -> None: settings = get_settings() if not settings.rag_enabled: return delete_by_filter( COLLECTION_FACTS, [qm.FieldCondition(key="fact_id", match=qm.MatchValue(value=fact_id))], ) async def index_session_summary(session_id: int, summary: str) -> None: settings = get_settings() if not settings.rag_enabled or not summary.strip(): return from app.db.base import SessionLocal user_id = 1 db = SessionLocal() try: session = db.get(ChatSession, session_id) if session: user_id = session.user_id finally: db.close() vectors = await embeddings.embed_texts([summary]) if not vectors: return upsert_points( COLLECTION_SUMMARIES, [ qm.PointStruct( id=int(session_id), vector=vectors[0], payload={"user_id": user_id, "session_id": session_id, "summary": summary[:4000]}, ) ], ) async def ingest_document_file( db: Session, *, user_id: int, title: str, filename: str, raw_bytes: bytes, ) -> dict[str, Any]: settings = get_settings() text = raw_bytes.decode("utf-8", errors="replace").strip() if not text: raise ValueError("Пустой документ") digest = hashlib.sha256(raw_bytes).hexdigest() doc = Document( user_id=user_id, title=title or filename, filename=filename, content_hash=digest, size_bytes=len(raw_bytes), ) db.add(doc) db.flush() chunks = chunk_text(text) chunk_rows: list[DocumentChunk] = [] for idx, piece in enumerate(chunks): row = DocumentChunk(document_id=doc.id, chunk_index=idx, content=piece) db.add(row) chunk_rows.append(row) db.commit() db.refresh(doc) if settings.rag_enabled and chunks: vectors = await embeddings.embed_texts(chunks) points: list[qm.PointStruct] = [] for row, vector in zip(chunk_rows, vectors, strict=False): db.refresh(row) point_id = int(row.id) points.append( qm.PointStruct( id=point_id, vector=vector, payload={ "user_id": user_id, "document_id": doc.id, "chunk_id": row.id, "chunk_index": row.chunk_index, "title": doc.title, "content": row.content, }, ) ) upsert_points(COLLECTION_DOC_CHUNKS, points) return { "id": doc.id, "title": doc.title, "filename": doc.filename, "chunk_count": len(chunks), "size_bytes": doc.size_bytes, "created_at": doc.created_at.isoformat() if doc.created_at else None, }