41 lines
1.2 KiB
Python
41 lines
1.2 KiB
Python
import re
|
||
|
||
IDENTITY_QUESTION = re.compile(
|
||
r"(кто\s+я|как\s+меня\s+зовут|сколько\s+мне\s+лет|"
|
||
r"что\s+ты\s+(помнишь|знаешь)\s+(обо\s+мне|про\s+меня)|"
|
||
r"напомни\s+(кто\s+я|про\s+меня))",
|
||
re.IGNORECASE,
|
||
)
|
||
|
||
NAME_PATTERN = re.compile(
|
||
r"(?:меня\s+зовут|имя[:\s]+|зовут)\s+([A-Za-zА-Яа-яЁё][A-Za-zА-Яа-яЁё\-]*)",
|
||
re.IGNORECASE,
|
||
)
|
||
AGE_PATTERN = re.compile(r"(?:мне\s+(\d{1,3})\s+лет|возраст[:\s]+(\d{1,3}))", re.IGNORECASE)
|
||
|
||
|
||
def normalize_text(text: str) -> str:
|
||
return " ".join(text.casefold().split())
|
||
|
||
|
||
def is_identity_question(text: str) -> bool:
|
||
return bool(IDENTITY_QUESTION.search(text))
|
||
|
||
|
||
def parse_identity(text: str) -> dict[str, str]:
|
||
result: dict[str, str] = {}
|
||
name_match = NAME_PATTERN.search(text)
|
||
if name_match:
|
||
result["name"] = name_match.group(1)
|
||
age_match = AGE_PATTERN.search(text)
|
||
if age_match:
|
||
result["age"] = age_match.group(1) or age_match.group(2)
|
||
return result
|
||
|
||
|
||
def texts_are_similar(a: str, b: str) -> bool:
|
||
na, nb = normalize_text(a), normalize_text(b)
|
||
if na == nb:
|
||
return True
|
||
return na in nb or nb in na
|