import os CHAT_CONTEXT_MAX = int(os.getenv("CHAT_CONTEXT_MAX", "128000")) def estimate_tokens(text: str) -> int: return max(0, len(text or "") // 4) def compute_payload_usage(history: list, llm_system: str) -> dict: """Estimate context fill for the payload messages_for_llm would send.""" chars = len(llm_system or "") for m in history: if m.get("role") in ("user", "assistant"): chars += len(m.get("content") or "") tokens_est = chars // 4 if chars else 0 max_tokens = CHAT_CONTEXT_MAX percent = round(100.0 * tokens_est / max_tokens, 1) if max_tokens else 0.0 return { "chars": chars, "tokens_est": tokens_est, "max_tokens_est": max_tokens, "percent": percent, } def context_warning_line(percent: float) -> str: if percent <= 85: return "" return f"\n[Context: ~{int(percent)}% of budget — keep replies focused]"