import json import logging import os import re from dataclasses import dataclass from services.llm import send_message, send_message_with_model from services.personas import get_persona logger = logging.getLogger(__name__) NEGATIVE_PROMPT_SEPARATOR = "\n\n__NEGATIVE_PROMPT__\n\n" PROMPT_BUILDER_SYSTEM = """You are a Stable Diffusion prompt engineer for anime illustration models. Given a roleplay chat excerpt, output ONLY valid JSON (no markdown): { "should_generate": true, "shot_type": "first_person_pov" | "landscape" | "third_person", "action_tags": "booru-style tags for pose/action/expression", "environment_tags": "booru-style tags for location/lighting/time" } Rules: - ONLY use real danbooru/e621 tags. Multi-word concepts MUST be underscore_joined. - Do NOT include appearance/character tags — those are provided separately. - Do NOT include quality tags, model names, style words, 'pov', or category/metadata words. - Do NOT invent tags. If unsure — omit. - Keep action_tags and environment_tags to 3-6 tags each. - shot_type: default "first_person_pov" for dialogue/intimacy at arm's length. "third_person" only for wide action (fight, chase). "landscape" only when environment is the focus. - should_generate: false for non-visual beats (pure internal monologue, time skips with no new pose, empty lines). - NEVER use negative words in tag fields (not, without, naked, nsfw, etc.).""" ANIMA_BUILDER_EXTRA = """ Anima hybrid mode — ALSO include: "pov_cue": "face_to_face" | "walking_together" | "doorway_invite" | "reach_to_viewer" | "dialogue_close", "viewer_body_visible": false, "scene_description": "ONE short English sentence (max 40 words). Camera POV: what the viewer sees. Mood/atmosphere only — do NOT repeat tags from action_tags/environment_tags. Do NOT list comma-separated booru tags." POV / interaction rules: - Default viewer_body_visible: false. The viewer's body, hands, or face must NOT appear in the image — only the character toward the camera. - For hugs, embraces: use arms_out, reaching_towards_viewer, inviting_hug — NOT holding_hands, lifting, carrying, nose_rub (these draw a second body in POV). - For long messages with time skips ("About an hour later..."), illustrate ONLY the final visible beat (usually the last paragraph). - scene_description: describe HER toward the camera only — NEVER "someone", "both", "with you", "hand in hand with", or another person's body. - NEVER use tags: looking_at_each_other, couple, 2girls, 2boys, multiple_girls. For POV walking together omit holding_hands (use walking, smiling, reaching_towards_viewer instead). - pov_cue: pick the framing that matches the CURRENT beat (walking_together for strolling side by side, doorway_invite for doorway with arms open, reach_to_viewer when she reaches toward camera, face_to_face for close dialogue). - Illustrate ONLY the beat under === ILLUSTRATE ===; use === Context === for outfit/location hints only. - Do NOT put English sentences in action_tags or environment_tags — tags only.""" POV_CUE_PHRASES: dict[str, str] = { "face_to_face": "POV: close face-to-face, she looks directly at you", "walking_together": "POV: walking beside you, profile and shared path visible", "doorway_invite": "POV: she blocks the doorway, arms open toward you", "reach_to_viewer": "POV: she reaches toward the camera", "dialogue_close": "POV: close conversation, she faces you at arm's length", } POV_CUE_DEFAULT = "POV: she stands before you, facing the camera" POV_INTERACTION_NEGATIVE = ( "duplicate, clone, multiple_girls, 2girls, extra_person, pov hands, " "disembodied hands, extra arms, second person" ) _CONTACT_ACTION_KEYWORDS = ( "hug", "holding_hands", "hand_holding", "arms_out", "embrace", "reaching", "inviting_hug", "arm_around", "cuddling", ) _JUNK_STANDALONE_TAGS = frozenset({ "white", "black", "skin", "ear", "ears", "girl", "boy", "fox", "wolf", "cat", "short", "tall", "slim", "golden", "silver", "red", "blue", "green", "purple", "pink", "brown", "blonde", "eye", "eyes", "hair", }) _INVALID_TAGS = frozenset({ "pumped_up", "pumped", "looking_at_each_other", "couple", "2girls", "2boys", "multiple_girls", "multiple_boys", "duo", }) _POV_DROP_ACTION_TAGS = frozenset({ "holding_hands", "hand_holding", "looking_at_each_other", "couple", "lifting", "carry", "carrying", "princess_carry", "nose_rub", "nose_boop", }) _TIME_SKIP_RE = re.compile( r"(?i)\b(?:about an hour later|hours later|later that (?:day|evening|night)|" r"the next (?:day|morning|evening)|meanwhile|after (?:some )?time)\b[.…\s]*", ) _POV_MOOD_FALLBACK: dict[str, str] = { "walking_together": "Easy warmth and quiet laughter in the afternoon light.", "doorway_invite": "Cool air and playful tension as she waits in the doorway.", "reach_to_viewer": "A charged moment as she reaches toward the camera.", "face_to_face": "Her expression softens in close focus toward the camera.", "dialogue_close": "Intimate calm in the space between you.", } _INDOOR_ENV_MARKERS = frozenset({"doorway", "indoors", "indoor", "apartment", "inside", "room"}) _OUTDOOR_ENV_MARKERS = frozenset({"outdoor", "outdoors", "outside", "street"}) _POV_PROSE_BANNED = re.compile( r"\b(someone|both|together with|hand in hand with|another person|second person|" r"your hands|your fingers|your embrace|your heat|intertwined|with you|" r"demands your|before you)\b", re.IGNORECASE, ) SD_ANIMA_DUAL_COMPARE = os.getenv("SD_ANIMA_DUAL_COMPARE", "false").lower() in ("1", "true", "yes") @dataclass class SdPromptBundle: tag_full: str negative: str desc_full: str | None = None def extract_image_prompt_tag(text: str) -> str | None: if "[IMAGE_PROMPT:" not in text: return None try: start = text.index("[IMAGE_PROMPT:") + len("[IMAGE_PROMPT:") end = text.index("]", start) return text[start:end].strip() except ValueError: return None def strip_image_prompt_tag(text: str) -> str: return re.sub(r"\[IMAGE_PROMPT:.*?\]", "", text, flags=re.DOTALL).strip() SD_CHECKPOINT = os.getenv("SD_CHECKPOINT", "") SD_UNET = os.getenv("SD_UNET", "") SD_PROMPT_MODEL = os.getenv("SD_PROMPT_MODEL", "").strip() PONY_CHECKPOINTS = {"ponyDiffusionV6XL_v6StartWithThisOne.safetensors"} PONY_NEGATIVE = "score_1, score_2, score_3, score_4, worst quality, low quality, blurry, bad anatomy, watermark, text, censored" ANIMA_NEGATIVE = "worst quality, low quality, score_1, score_2, score_3, blurry, jpeg artifacts, sepia" def _is_pony() -> bool: return SD_CHECKPOINT in PONY_CHECKPOINTS def _is_anima() -> bool: return bool(SD_UNET) and not SD_CHECKPOINT def anima_dual_enabled() -> bool: return _is_anima() and SD_ANIMA_DUAL_COMPARE def _builder_system() -> str: if _is_anima(): return PROMPT_BUILDER_SYSTEM + ANIMA_BUILDER_EXTRA return PROMPT_BUILDER_SYSTEM def _normalize_shot_type(scene: dict) -> dict: st = (scene.get("shot_type") or "").strip().lower() if st == "landscape": scene["shot_type"] = "landscape" return _sanitize_scene_fields(scene) if st == "third_person": action = (scene.get("action_tags") or "").lower() wide = ("battle", "fight", "chase", "running", "crowd", "wide_shot", "group_shot") if any(w in action for w in wide): scene["shot_type"] = "third_person" return _sanitize_scene_fields(scene) scene["shot_type"] = "first_person_pov" if scene.get("viewer_body_visible") is None: scene["viewer_body_visible"] = False return _sanitize_scene_fields(scene) def _split_tag_input(tag_str: str) -> list[str]: return [t.strip() for t in (tag_str or "").split(",") if t.strip()] def _is_sentence_like_tag(tag: str) -> bool: t = tag.strip() if len(t) > 45: return True if re.search(r"[.!?]", t): return True words = t.split() return len(words) >= 5 and "_" not in t def _filter_tag_field(tag_str: str, *, for_pov: bool, field: str) -> str: kept: list[str] = [] for raw in _split_tag_input(tag_str): key = raw.lower().replace(" ", "_") if key in _INVALID_TAGS: continue if _is_sentence_like_tag(raw): continue if for_pov and field == "action" and key in _POV_DROP_ACTION_TAGS: continue kept.append(raw if "_" in raw else key) return ", ".join(kept) def _reconcile_environment_tags(env_str: str) -> str: tags = _split_tag_input(env_str) keys = {t.lower().replace(" ", "_") for t in tags} has_indoor = bool(keys & _INDOOR_ENV_MARKERS) or any( any(m in k for m in _INDOOR_ENV_MARKERS) for k in keys ) has_outdoor = bool(keys & _OUTDOOR_ENV_MARKERS) or any( any(m in k for m in _OUTDOOR_ENV_MARKERS) for k in keys ) if has_indoor and has_outdoor: tags = [t for t in tags if t.lower().replace(" ", "_") not in _OUTDOOR_ENV_MARKERS] return ", ".join(tags) def _sanitize_pov_prose(desc: str, scene: dict) -> str: if not desc or not desc.strip(): return "" if scene.get("shot_type") != "first_person_pov": return desc.strip() kept: list[str] = [] for sentence in re.split(r"(?<=[.!?])\s+", desc.strip()): s = sentence.strip() if not s: continue if _POV_PROSE_BANNED.search(s): continue if re.search(r"\bwolfgirl\b", s, re.I) and re.search( r"\b(walks|walking|stands)\b", s, re.I ): continue kept.append(s) out = " ".join(kept).strip() return re.sub(r"\bat the viewer\b", "at the camera", out, flags=re.IGNORECASE) def _sanitize_scene_fields(scene: dict) -> dict: scene = dict(scene) for_pov = scene.get("shot_type") == "first_person_pov" scene["action_tags"] = _filter_tag_field( scene.get("action_tags") or "", for_pov=for_pov, field="action" ) env = _filter_tag_field(scene.get("environment_tags") or "", for_pov=False, field="env") scene["environment_tags"] = _reconcile_environment_tags(env) scene["scene_description"] = _sanitize_pov_prose( (scene.get("scene_description") or "").strip(), scene ) return scene def _scene_should_generate(scene: dict) -> bool: if scene.get("should_generate") is False: return False return True def _sanitize_tags_string(tag_str: str) -> str: if not tag_str: return "" out: list[str] = [] seen: set[str] = set() for raw in tag_str.split(","): t = raw.strip() if not t: continue key = t.lower().replace(" ", "_") if key in seen: continue if key in _INVALID_TAGS: continue if "_" not in key and key in _JUNK_STANDALONE_TAGS: continue if len(key) <= 2: continue seen.add(key) out.append(t if "_" in t else key) return ", ".join(out) def _quality_prefix() -> str: if _is_pony(): return "score_9, score_8_up, score_7_up, source_anime, highres" if _is_anima(): return "masterpiece, best quality, score_7, anime" return "masterpiece, best quality, highres" def _appearance_for_persona(persona: dict | None) -> str: """Tag core uses appearance_tags only (prose is for LLM context, not Comfy tag line).""" return _sanitize_tags_string((persona or {}).get("appearance_tags", "")) def _dedupe_outfit_tags(outfit_tags: str) -> str: tags = _split_tag_input(outfit_tags) keys = {t.lower().replace(" ", "_") for t in tags} if len(keys & {"jeans", "ripped_jeans", "black_jeans"}) > 1 and "jeans" in keys: tags = [t for t in tags if t.lower().replace(" ", "_") != "jeans"] return ", ".join(tags) def _scene_has_physical_contact(scene: dict) -> bool: action = (scene.get("action_tags") or "").lower() return any(k in action for k in _CONTACT_ACTION_KEYWORDS) def _infer_pov_cue_from_action(action_tags: str) -> str: action = (action_tags or "").lower() if any(k in action for k in ("holding_hands", "hand_holding", "walking", "strolling")): return "walking_together" if any(k in action for k in ("doorway", "door", "entry", "threshold")): if any(k in action for k in ("arms_out", "hug", "embrace", "inviting")): return "doorway_invite" if any(k in action for k in ("arms_out", "reaching", "inviting_hug", "hug", "embrace")): return "reach_to_viewer" if any(k in action for k in ("sitting", "lying", "bed")): return "dialogue_close" return "face_to_face" def _build_pov_phrase(scene: dict) -> str: if scene.get("shot_type") != "first_person_pov": return "" cue = (scene.get("pov_cue") or "").strip().lower().replace("-", "_").replace(" ", "_") if cue in POV_CUE_PHRASES: return POV_CUE_PHRASES[cue] inferred = _infer_pov_cue_from_action(scene.get("action_tags", "")) return POV_CUE_PHRASES.get(inferred, POV_CUE_DEFAULT) def _append_lora(parts: list[str], persona: dict | None) -> None: lora = (persona or {}).get("lora_name", "") weight = (persona or {}).get("lora_weight", 0.8) if lora: parts.append(f"") def _dedupe_comma_join(parts: list[str]) -> str: positive = ", ".join(p.strip() for p in parts if p and p.strip()) seen: set[str] = set() deduped: list[str] = [] for tag in positive.split(", "): t = tag.strip() if t and t not in seen: seen.add(t) deduped.append(t) return ", ".join(deduped) def _build_tag_core(scene: dict, persona: dict | None, outfit_tags: str = "") -> str: """Anchor + structure: quality, appearance, outfit, action/env tags, LoRA. No POV prose, no scene_description.""" parts = [_quality_prefix()] appearance = _appearance_for_persona(persona) if appearance: parts.append(appearance) if outfit_tags: parts.append(_sanitize_tags_string(_dedupe_outfit_tags(outfit_tags))) if scene.get("shot_type") == "landscape": parts.append(_sanitize_tags_string(scene.get("environment_tags", ""))) else: if not _is_anima() and scene.get("shot_type") == "first_person_pov": parts.append("pov, first-person view, looking at viewer") parts.append(_sanitize_tags_string(scene.get("action_tags", ""))) parts.append(_sanitize_tags_string(scene.get("environment_tags", ""))) _append_lora(parts, persona) return _dedupe_comma_join(parts) def build_positive_prompt_tags_only(scene: dict, persona: dict | None, outfit_tags: str = "") -> str: """Tags + contextual POV phrase (Anima) or legacy Pony path.""" if not _is_anima(): return build_positive_prompt(scene, persona, outfit_tags) core = _build_tag_core(scene, persona, outfit_tags) pov = _build_pov_phrase(scene) if pov: return f"{core}, {pov}" if core else pov return core def _tag_tokens_for_dedupe(tag_line: str) -> set[str]: tokens: set[str] = set() for part in tag_line.replace("= 4: tokens.add(w) return tokens def _trim_redundant_scene_description(desc: str, tag_line: str) -> str: tag_tokens = _tag_tokens_for_dedupe(tag_line) if not tag_tokens or not desc.strip(): return desc.strip() kept: list[str] = [] for sentence in re.split(r"(?<=[.!?])\s+", desc.strip()): s = sentence.strip() if not s: continue words = [w.lower() for w in re.findall(r"[a-zA-Z]{4,}", s)] if not words: kept.append(s) continue overlap = sum(1 for w in words if w in tag_tokens) / len(words) if overlap < 0.62: kept.append(s) return " ".join(kept).strip() def _extract_illustrate_content(content: str, max_chars: int = 1400) -> str: """Long assistant posts (first_mes): use final beat after time-skip, last paragraphs.""" text = strip_image_prompt_tag(content).strip() if not text: return "" chunks = _TIME_SKIP_RE.split(text) if len(chunks) > 1: text = chunks[-1].strip() if len(text) <= max_chars: return text paragraphs = [p.strip() for p in re.split(r"\n\s*\n", text) if p.strip()] if paragraphs: for n in (1, 2, 3): tail = "\n\n".join(paragraphs[-n:]) if len(tail) <= max_chars: return tail return paragraphs[-1][-max_chars:] return text[-max_chars:] def _fallback_mood_prose(scene: dict) -> str: cue = (scene.get("pov_cue") or "").strip().lower().replace("-", "_").replace(" ", "_") if cue in _POV_MOOD_FALLBACK: return _POV_MOOD_FALLBACK[cue] inferred = _infer_pov_cue_from_action(scene.get("action_tags", "")) return _POV_MOOD_FALLBACK.get(inferred, "Soft atmosphere; her expression toward the camera.") def _cap_scene_description(desc: str, max_words: int = 40, max_chars: int = 220) -> str: words = desc.split() if len(words) > max_words: desc = " ".join(words[:max_words]) if len(desc) > max_chars: desc = desc[: max_chars - 3] + "..." return desc def build_positive_prompt_hybrid(scene: dict, persona: dict | None, outfit_tags: str = "") -> str: """Production Anima prompt: tag core + POV cue + short mood prose.""" if not _is_anima(): return build_positive_prompt(scene, persona, outfit_tags) base = build_positive_prompt_tags_only(scene, persona, outfit_tags) desc = _trim_redundant_scene_description( (scene.get("scene_description") or "").strip(), base, ) desc = _cap_scene_description(desc) if not desc: desc = _cap_scene_description(_fallback_mood_prose(scene)) if not desc: return base lora = (persona or {}).get("lora_name", "") weight = (persona or {}).get("lora_weight", 0.8) lora_suffix = f" " if lora else "" if lora_suffix and base.endswith(lora_suffix): base = base[: -len(lora_suffix)] return f"{base}. {desc}{lora_suffix}" return f"{base}. {desc}" def build_positive_prompt(scene: dict, persona: dict | None, outfit_tags: str = "") -> str: """Legacy entry: Pony/non-Anima full prompt; Anima delegates to tags-only.""" if _is_anima(): return build_positive_prompt_tags_only(scene, persona, outfit_tags) parts = [_quality_prefix()] appearance = _appearance_for_persona(persona) if appearance: parts.append(appearance) if outfit_tags: parts.append(_sanitize_tags_string(_dedupe_outfit_tags(outfit_tags))) if scene.get("shot_type") == "landscape": parts.append(_sanitize_tags_string(scene.get("environment_tags", ""))) else: if scene.get("shot_type") == "first_person_pov": parts.append("pov, first-person view, looking at viewer") parts.append(_sanitize_tags_string(scene.get("action_tags", ""))) parts.append(_sanitize_tags_string(scene.get("environment_tags", ""))) _append_lora(parts, persona) return _dedupe_comma_join(parts) def _negative_for_scene(scene: dict) -> str: if _is_pony(): negative = PONY_NEGATIVE elif _is_anima(): negative = ANIMA_NEGATIVE else: negative = "low quality, blurry, bad anatomy, watermark, text" if scene.get("shot_type") == "first_person_pov": negative += ", third person, over the shoulder" viewer_visible = scene.get("viewer_body_visible") is True if not viewer_visible or _scene_has_physical_contact(scene): negative += ", " + POV_INTERACTION_NEGATIVE return negative def _format_builder_user_block(persona: dict, messages: list[dict], outfit_json: str) -> str: lines: list[str] = [] tags = (persona.get("appearance_tags") or "").strip() lines.append(f"Character appearance (tags): {tags}") prose = (persona.get("appearance_prose") or "").strip() if _is_anima() and prose and prose != tags: snippet = prose[:300] + ("..." if len(prose) > 300 else "") lines.append(f"Character notes (do not copy into tags or scene_description): {snippet}") try: outfit_list = json.loads(outfit_json or "[]") outfit_ref = ", ".join(outfit_list) if isinstance(outfit_list, list) else "" except Exception: outfit_ref = "" if outfit_ref: lines.append(f"Current outfit (tags): {outfit_ref}") recent = [m for m in messages if m.get("role") in ("user", "assistant")][-6:] if not recent: lines.append("\nChat:\n(no messages — return should_generate=false)") return "\n".join(lines) illustrate: list[dict] = [] if recent[-1]["role"] == "assistant": illustrate = [recent[-1]] if len(recent) >= 2 and recent[-2]["role"] == "user": illustrate.insert(0, recent[-2]) else: illustrate = [recent[-1]] if len(recent) >= 2 and recent[-2]["role"] == "assistant": illustrate.insert(0, recent[-2]) context = [m for m in recent if m not in illustrate] lines.append("\n=== ILLUSTRATE (draw THIS beat only) ===") for m in illustrate: raw = m.get("content", "") content = _extract_illustrate_content(raw) if m.get("role") == "assistant" else strip_image_prompt_tag(raw) lines.append(f"{m['role']}: {content}") if context: lines.append("\n=== Context (outfit/location hints only — do not illustrate old beats) ===") for m in context: content = strip_image_prompt_tag(m.get("content", "")) if len(content) > 800: content = content[:797] + "..." lines.append(f"{m['role']}: {content}") return "\n".join(lines) def _parse_scene_json(raw: str) -> dict: cleaned = raw.strip() if cleaned.startswith("```"): cleaned = re.sub(r"^```\w*\n?", "", cleaned) cleaned = re.sub(r"\n?```$", "", cleaned) scene = json.loads(cleaned) if not isinstance(scene, dict): raise ValueError("LLM returned non-object JSON") return _normalize_shot_type(scene) def _bundle_from_scene(scene: dict, persona: dict, outfit_tags: str) -> SdPromptBundle: negative = _negative_for_scene(scene) if _is_anima(): hybrid = build_positive_prompt_hybrid(scene, persona, outfit_tags) tag_full = hybrid + NEGATIVE_PROMPT_SEPARATOR + negative desc_full = None if anima_dual_enabled(): tags_only = build_positive_prompt_tags_only(scene, persona, outfit_tags) desc_full = tags_only + NEGATIVE_PROMPT_SEPARATOR + negative return SdPromptBundle(tag_full=tag_full, negative=negative, desc_full=desc_full) positive = build_positive_prompt(scene, persona, outfit_tags) tag_full = positive + NEGATIVE_PROMPT_SEPARATOR + negative return SdPromptBundle(tag_full=tag_full, negative=negative, desc_full=None) def _parse_chat_excerpt(excerpt: str) -> list[dict]: messages: list[dict] = [] for line in (excerpt or "").splitlines(): line = line.strip() if not line: continue lower = line.lower() if lower.startswith("user:"): messages.append({"role": "user", "content": line[5:].strip()}) elif lower.startswith("assistant:"): messages.append({"role": "assistant", "content": line[10:].strip()}) elif lower.startswith("system:"): messages.append({"role": "system", "content": line[7:].strip()}) else: messages.append({"role": "user", "content": line}) return messages async def run_prompt_builder( persona_id: str, *, messages: list[dict] | None = None, chat_excerpt: str = "", outfit_json: str = "[]", appearance_override: str | None = None, use_prose: bool = False, ) -> dict: """Debug: full SD prompt builder pipeline with LLM raw output.""" persona = await get_persona(persona_id) or {} if appearance_override is not None: persona = {**persona, "appearance_tags": appearance_override} recent = messages if messages is not None else _parse_chat_excerpt(chat_excerpt) recent = [m for m in recent if m.get("role") in ("user", "assistant")] user_block = _format_builder_user_block(persona, recent, outfit_json) builder_messages = [ {"role": "system", "content": _builder_system()}, {"role": "user", "content": user_block}, ] model_used = SD_PROMPT_MODEL or "SYSTEM_MODEL" result: dict = { "persona_id": persona_id, "sd_prompt_model": model_used, "builder_system": _builder_system(), "builder_user": user_block, "anima_dual": anima_dual_enabled(), } raw = "" try: if SD_PROMPT_MODEL: raw = await send_message_with_model(builder_messages, SD_PROMPT_MODEL) else: raw = await send_message(builder_messages) result["llm_raw"] = raw scene = _parse_scene_json(raw) result["scene"] = scene if not _scene_should_generate(scene): result["skipped"] = True result["error"] = "should_generate=false" return result try: outfit_tags = ", ".join(json.loads(outfit_json or "[]")) except Exception: outfit_tags = "" negative = _negative_for_scene(scene) if _is_anima(): tags_only = build_positive_prompt_tags_only(scene, persona, outfit_tags) hybrid = build_positive_prompt_hybrid(scene, persona, outfit_tags) result["tag_positive"] = tags_only result["hybrid_positive"] = hybrid result["negative"] = negative result["tags_only_full"] = tags_only + NEGATIVE_PROMPT_SEPARATOR + negative result["hybrid_full"] = hybrid + NEGATIVE_PROMPT_SEPARATOR + negative result["tag_full"] = result["hybrid_full"] else: positive = build_positive_prompt(scene, persona, outfit_tags) result["tag_positive"] = positive result["negative"] = negative result["tag_full"] = positive + NEGATIVE_PROMPT_SEPARATOR + negative except Exception as e: result["error"] = str(e) result["llm_raw"] = raw or result.get("llm_raw", "") return result async def generate_sd_prompt( messages: list, persona_id: str, outfit_json: str = "[]", ) -> SdPromptBundle | None: persona = await get_persona(persona_id) if not persona: return None recent = [m for m in messages if m["role"] in ("user", "assistant")] if not recent: return None user_block = _format_builder_user_block(persona, recent, outfit_json) builder_messages = [ {"role": "system", "content": _builder_system()}, {"role": "user", "content": user_block}, ] raw = "" try: if SD_PROMPT_MODEL: raw = await send_message_with_model(builder_messages, SD_PROMPT_MODEL) else: raw = await send_message(builder_messages) scene = _parse_scene_json(raw) except Exception as e: logger.warning("sd_prompt failed: %s raw=%.200s", e, raw) return None if not _scene_should_generate(scene): logger.info("sd_prompt: skipped (should_generate=false)") return None try: outfit_list = json.loads(outfit_json or "[]") outfit_tags = ", ".join(outfit_list) if isinstance(outfit_list, list) else "" except Exception: outfit_tags = "" bundle = _bundle_from_scene(scene, persona, outfit_tags) if anima_dual_enabled() and bundle.desc_full: logger.info( "Anima prompts: hybrid=%.80s | tags_only=%.80s", bundle.tag_full.split(NEGATIVE_PROMPT_SEPARATOR)[0], bundle.desc_full.split(NEGATIVE_PROMPT_SEPARATOR)[0], ) return bundle