52 lines
1.4 KiB
Python
52 lines
1.4 KiB
Python
"""Infer RP session language from recent chat for narrator/plot prompts."""
|
|
|
|
import re
|
|
|
|
_CYRILLIC = re.compile(r"[\u0400-\u04FF]")
|
|
|
|
|
|
def infer_rp_language(messages: list | None, *, sample: int = 8) -> str:
|
|
"""
|
|
Return 'ru' if recent user/assistant text is mostly Cyrillic, else 'en'.
|
|
"""
|
|
if not messages:
|
|
return "ru"
|
|
texts: list[str] = []
|
|
for m in reversed(messages):
|
|
if not isinstance(m, dict):
|
|
continue
|
|
if m.get("role") not in ("user", "assistant"):
|
|
continue
|
|
c = (m.get("content") or "").strip()
|
|
if c:
|
|
texts.append(c)
|
|
if len(texts) >= sample:
|
|
break
|
|
if not texts:
|
|
return "ru"
|
|
combined = " ".join(texts)
|
|
cyr = len(_CYRILLIC.findall(combined))
|
|
lat = len(re.findall(r"[A-Za-z]", combined))
|
|
if cyr == 0 and lat > 0:
|
|
return "en"
|
|
if cyr >= lat:
|
|
return "ru"
|
|
return "ru" if cyr > lat * 0.3 else "en"
|
|
|
|
|
|
def locale_instruction(lang: str) -> str:
|
|
if lang == "ru":
|
|
return (
|
|
"Session language: Russian. "
|
|
"All prose you generate (injections, titles, resolution_text, status_quo, choice labels) "
|
|
"MUST be in Russian."
|
|
)
|
|
return (
|
|
"Session language: English. "
|
|
"All prose you generate MUST be in English."
|
|
)
|
|
|
|
|
|
def locale_label(lang: str) -> str:
|
|
return "Russian" if lang == "ru" else "English"
|