Files
2026-06-10 08:32:20 +03:00

41 lines
1.2 KiB
Python
Raw Permalink Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
import re
IDENTITY_QUESTION = re.compile(
r"(кто\s+я|как\s+меня\s+зовут|сколько\s+мне\s+лет|"
r"что\s+ты\s+(помнишь|знаешь)\s+(обо\s+мне|про\s+меня)|"
r"напомни\s+(кто\s+я|про\s+меня))",
re.IGNORECASE,
)
NAME_PATTERN = re.compile(
r"(?:меня\s+зовут|имя[:\s]+|зовут)\s+([A-Za-zА-Яа-яЁё][A-Za-zА-Яа-яЁё\-]*)",
re.IGNORECASE,
)
AGE_PATTERN = re.compile(r"(?:мне\s+(\d{1,3})\s+лет|возраст[:\s]+(\d{1,3}))", re.IGNORECASE)
def normalize_text(text: str) -> str:
return " ".join(text.casefold().split())
def is_identity_question(text: str) -> bool:
return bool(IDENTITY_QUESTION.search(text))
def parse_identity(text: str) -> dict[str, str]:
result: dict[str, str] = {}
name_match = NAME_PATTERN.search(text)
if name_match:
result["name"] = name_match.group(1)
age_match = AGE_PATTERN.search(text)
if age_match:
result["age"] = age_match.group(1) or age_match.group(2)
return result
def texts_are_similar(a: str, b: str) -> bool:
na, nb = normalize_text(a), normalize_text(b)
if na == nb:
return True
return na in nb or nb in na