first commit

2026-05-28 08:42:46 +03:00
commit e5c0df308f
38 changed files with 2753 additions and 0 deletions
@@ -0,0 +1,125 @@
+import json
+import os
+import re
+from services.llm import send_message
+from services.personas import get_persona
+
+PROMPT_BUILDER_SYSTEM = """You are a Stable Diffusion prompt engineer for anime illustration models.
+Given a roleplay chat excerpt and character appearance hints, output ONLY valid JSON (no markdown):
+{
+  "should_generate": true,
+  "shot_type": "first_person_pov" | "landscape" | "third_person",
+  "appearance_tags": "booru-style tags for character appearance extracted from hints, e.g. 'white hair, wolf ears, wolf tail, yellow eyes'",
+  "action_tags": "booru-style tags for pose/action, e.g. 'sitting, smiling, looking at viewer'",
+  "environment_tags": "booru-style tags for location/lighting, e.g. 'indoors, kitchen, sunlight'"
+}
+Rules:
+- ONLY use real danbooru/e621 tags. Multi-word concepts MUST be written as single tags: 'white hair' not 'white, hair'. 'wolf ears' not 'wolf, ears'.
+- Do NOT include quality tags, model names, style words, 'pov', or category/metadata words.
+- Do NOT invent tags. If unsure — omit.
+- Keep each field to 3-6 tags."""
+
+
+def extract_image_prompt_tag(text: str) -> str | None:
+    if "[IMAGE_PROMPT:" not in text:
+        return None
+    try:
+        start = text.index("[IMAGE_PROMPT:") + len("[IMAGE_PROMPT:")
+        end = text.index("]", start)
+        return text[start:end].strip()
+    except ValueError:
+        return None
+
+
+def strip_image_prompt_tag(text: str) -> str:
+    return re.sub(r"\[IMAGE_PROMPT:.*?\]", "", text, flags=re.DOTALL).strip()
+
+
+PONY_CHECKPOINTS = {"ponyDiffusionV6XL_v6StartWithThisOne.safetensors"}
+SD_CHECKPOINT = os.getenv("SD_CHECKPOINT", "")
+PONY_NEGATIVE = "score_1, score_2, score_3, score_4, worst quality, low quality, blurry, bad anatomy, watermark, text, censored"
+
+def build_positive_prompt(scene: dict, persona: dict | None) -> str:
+    is_pony = SD_CHECKPOINT in PONY_CHECKPOINTS
+    quality = "score_9, score_8_up, score_7_up, source_anime, highres" if is_pony else "masterpiece, best quality, highres"
+    parts = [quality]
+
+    # prefer LLM-extracted appearance over raw persona tags
+    appearance = scene.get("appearance_tags") or (persona or {}).get("appearance_tags", "")
+    if appearance:
+        parts.append(appearance)
+
+    if scene.get("shot_type") == "landscape":
+        parts.append(scene.get("environment_tags", ""))
+    else:
+        if scene.get("shot_type") == "first_person_pov":
+            parts.append("pov, first-person view, looking at viewer")
+        parts.append(scene.get("action_tags", ""))
+        parts.append(scene.get("environment_tags", ""))
+
+    lora = (persona or {}).get("lora_name", "")
+    weight = (persona or {}).get("lora_weight", 0.8)
+    if lora:
+        parts.append(f"<lora:{lora}:{weight}>")
+
+    positive = ", ".join(p.strip() for p in parts if p and p.strip())
+    seen, deduped = set(), []
+    for tag in positive.split(", "):
+        t = tag.strip()
+        if t and t not in seen:
+            seen.add(t)
+            deduped.append(t)
+    return ", ".join(deduped)
+
+
+async def generate_sd_prompt(
+    messages: list,
+    persona_id: str,
+) -> tuple[str | None, str | None]:
+    persona = await get_persona(persona_id)
+    if not persona or not persona.get("sd_enabled"):
+        return None, None
+
+    recent = [m for m in messages if m["role"] in ("user", "assistant")][-6:]
+    if not recent:
+        return None, None
+
+    excerpt = "\n".join(f"{m['role']}: {strip_image_prompt_tag(m['content'])}" for m in recent)
+
+    appearance = persona.get("appearance_tags", "")
+    # For card personas, also include description for better visual context
+    if persona_id.startswith("card_"):
+        from services.character_card import get_character
+        card = await get_character(persona_id[5:])
+        if card and card.get("description"):
+            appearance = f"{appearance}\nCharacter description: {card['description'][:400]}"
+
+    builder_messages = [
+        {"role": "system", "content": PROMPT_BUILDER_SYSTEM},
+        {
+            "role": "user",
+            "content": f"Persona appearance hints: {appearance}\n\nChat:\n{excerpt}",
+        },
+    ]
+
+    try:
+        raw = await send_message(builder_messages)
+        raw = raw.strip()
+        if raw.startswith("```"):
+            raw = re.sub(r"^```\w*\n?", "", raw)
+            raw = re.sub(r"\n?```$", "", raw)
+        scene = json.loads(raw)
+    except (json.JSONDecodeError, Exception):
+        return None, None
+
+
+    positive = build_positive_prompt(scene, persona)
+    is_pony = SD_CHECKPOINT in PONY_CHECKPOINTS
+    negative = PONY_NEGATIVE if is_pony else "low quality, blurry, bad anatomy, watermark, text"
+    if scene.get("shot_type") == "first_person_pov":
+        negative += ", third person, over the shoulder"
+
+    full = positive
+    if negative:
+        full += f"\n\nNegative prompt: {negative}"
+    return full, negative