31 lines
928 B
Python
31 lines
928 B
Python
import os
|
|
|
|
CHAT_CONTEXT_MAX = int(os.getenv("CHAT_CONTEXT_MAX", "128000"))
|
|
|
|
|
|
def estimate_tokens(text: str) -> int:
|
|
return max(0, len(text or "") // 4)
|
|
|
|
|
|
def compute_payload_usage(history: list, llm_system: str) -> dict:
|
|
"""Estimate context fill for the payload messages_for_llm would send."""
|
|
chars = len(llm_system or "")
|
|
for m in history:
|
|
if m.get("role") in ("user", "assistant"):
|
|
chars += len(m.get("content") or "")
|
|
tokens_est = chars // 4 if chars else 0
|
|
max_tokens = CHAT_CONTEXT_MAX
|
|
percent = round(100.0 * tokens_est / max_tokens, 1) if max_tokens else 0.0
|
|
return {
|
|
"chars": chars,
|
|
"tokens_est": tokens_est,
|
|
"max_tokens_est": max_tokens,
|
|
"percent": percent,
|
|
}
|
|
|
|
|
|
def context_warning_line(percent: float) -> str:
|
|
if percent <= 85:
|
|
return ""
|
|
return f"\n[Context: ~{int(percent)}% of budget — keep replies focused]"
|