Fixed SD Promt
This commit is contained in:
+641
-67
@@ -2,26 +2,115 @@ import json
|
||||
import logging
|
||||
import os
|
||||
import re
|
||||
from dataclasses import dataclass
|
||||
|
||||
from services.llm import send_message, send_message_with_model
|
||||
from services.personas import get_persona
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
NEGATIVE_PROMPT_SEPARATOR = "\n\n__NEGATIVE_PROMPT__\n\n"
|
||||
|
||||
PROMPT_BUILDER_SYSTEM = """You are a Stable Diffusion prompt engineer for anime illustration models.
|
||||
Given a roleplay chat excerpt, output ONLY valid JSON (no markdown):
|
||||
{
|
||||
"should_generate": true,
|
||||
"shot_type": "first_person_pov" | "landscape" | "third_person",
|
||||
"action_tags": "booru-style tags for pose/action/expression, e.g. 'sitting, smiling, holding_cup'",
|
||||
"environment_tags": "booru-style tags for location/lighting/time, e.g. 'indoors, kitchen, sunlight, daytime'"
|
||||
"action_tags": "booru-style tags for pose/action/expression",
|
||||
"environment_tags": "booru-style tags for location/lighting/time"
|
||||
}
|
||||
Rules:
|
||||
- ONLY use real danbooru/e621 tags. Multi-word concepts MUST be underscore_joined: 'fox_ears' not 'fox ears'.
|
||||
- ONLY use real danbooru/e621 tags. Multi-word concepts MUST be underscore_joined.
|
||||
- Do NOT include appearance/character tags — those are provided separately.
|
||||
- Do NOT include quality tags, model names, style words, 'pov', or category/metadata words.
|
||||
- Do NOT invent tags. If unsure — omit.
|
||||
- Keep each field to 3-6 tags."""
|
||||
- Keep action_tags and environment_tags to 3-6 tags each.
|
||||
- shot_type: default "first_person_pov" for dialogue/intimacy at arm's length. "third_person" only for wide action (fight, chase). "landscape" only when environment is the focus.
|
||||
- should_generate: false for non-visual beats (pure internal monologue, time skips with no new pose, empty lines).
|
||||
- NEVER use negative words in tag fields (not, without, naked, nsfw, etc.)."""
|
||||
|
||||
ANIMA_BUILDER_EXTRA = """
|
||||
Anima hybrid mode — ALSO include:
|
||||
"pov_cue": "face_to_face" | "walking_together" | "doorway_invite" | "reach_to_viewer" | "dialogue_close",
|
||||
"viewer_body_visible": false,
|
||||
"scene_description": "ONE short English sentence (max 40 words). Camera POV: what the viewer sees. Mood/atmosphere only — do NOT repeat tags from action_tags/environment_tags. Do NOT list comma-separated booru tags."
|
||||
POV / interaction rules:
|
||||
- Default viewer_body_visible: false. The viewer's body, hands, or face must NOT appear in the image — only the character toward the camera.
|
||||
- For hugs, embraces: use arms_out, reaching_towards_viewer, inviting_hug — NOT holding_hands, lifting, carrying, nose_rub (these draw a second body in POV).
|
||||
- For long messages with time skips ("About an hour later..."), illustrate ONLY the final visible beat (usually the last paragraph).
|
||||
- scene_description: describe HER toward the camera only — NEVER "someone", "both", "with you", "hand in hand with", or another person's body.
|
||||
- NEVER use tags: looking_at_each_other, couple, 2girls, 2boys, multiple_girls. For POV walking together omit holding_hands (use walking, smiling, reaching_towards_viewer instead).
|
||||
- pov_cue: pick the framing that matches the CURRENT beat (walking_together for strolling side by side, doorway_invite for doorway with arms open, reach_to_viewer when she reaches toward camera, face_to_face for close dialogue).
|
||||
- Illustrate ONLY the beat under === ILLUSTRATE ===; use === Context === for outfit/location hints only.
|
||||
- Do NOT put English sentences in action_tags or environment_tags — tags only."""
|
||||
|
||||
POV_CUE_PHRASES: dict[str, str] = {
|
||||
"face_to_face": "POV: close face-to-face, she looks directly at you",
|
||||
"walking_together": "POV: walking beside you, profile and shared path visible",
|
||||
"doorway_invite": "POV: she blocks the doorway, arms open toward you",
|
||||
"reach_to_viewer": "POV: she reaches toward the camera",
|
||||
"dialogue_close": "POV: close conversation, she faces you at arm's length",
|
||||
}
|
||||
|
||||
POV_CUE_DEFAULT = "POV: she stands before you, facing the camera"
|
||||
|
||||
POV_INTERACTION_NEGATIVE = (
|
||||
"duplicate, clone, multiple_girls, 2girls, extra_person, pov hands, "
|
||||
"disembodied hands, extra arms, second person"
|
||||
)
|
||||
|
||||
_CONTACT_ACTION_KEYWORDS = (
|
||||
"hug", "holding_hands", "hand_holding", "arms_out", "embrace",
|
||||
"reaching", "inviting_hug", "arm_around", "cuddling",
|
||||
)
|
||||
|
||||
_JUNK_STANDALONE_TAGS = frozenset({
|
||||
"white", "black", "skin", "ear", "ears", "girl", "boy", "fox", "wolf", "cat",
|
||||
"short", "tall", "slim", "golden", "silver", "red", "blue", "green", "purple",
|
||||
"pink", "brown", "blonde", "eye", "eyes", "hair",
|
||||
})
|
||||
|
||||
_INVALID_TAGS = frozenset({
|
||||
"pumped_up", "pumped", "looking_at_each_other", "couple",
|
||||
"2girls", "2boys", "multiple_girls", "multiple_boys", "duo",
|
||||
})
|
||||
|
||||
_POV_DROP_ACTION_TAGS = frozenset({
|
||||
"holding_hands", "hand_holding", "looking_at_each_other", "couple",
|
||||
"lifting", "carry", "carrying", "princess_carry", "nose_rub", "nose_boop",
|
||||
})
|
||||
|
||||
_TIME_SKIP_RE = re.compile(
|
||||
r"(?i)\b(?:about an hour later|hours later|later that (?:day|evening|night)|"
|
||||
r"the next (?:day|morning|evening)|meanwhile|after (?:some )?time)\b[.…\s]*",
|
||||
)
|
||||
|
||||
_POV_MOOD_FALLBACK: dict[str, str] = {
|
||||
"walking_together": "Easy warmth and quiet laughter in the afternoon light.",
|
||||
"doorway_invite": "Cool air and playful tension as she waits in the doorway.",
|
||||
"reach_to_viewer": "A charged moment as she reaches toward the camera.",
|
||||
"face_to_face": "Her expression softens in close focus toward the camera.",
|
||||
"dialogue_close": "Intimate calm in the space between you.",
|
||||
}
|
||||
|
||||
_INDOOR_ENV_MARKERS = frozenset({"doorway", "indoors", "indoor", "apartment", "inside", "room"})
|
||||
_OUTDOOR_ENV_MARKERS = frozenset({"outdoor", "outdoors", "outside", "street"})
|
||||
|
||||
_POV_PROSE_BANNED = re.compile(
|
||||
r"\b(someone|both|together with|hand in hand with|another person|second person|"
|
||||
r"your hands|your fingers|your embrace|your heat|intertwined|with you|"
|
||||
r"demands your|before you)\b",
|
||||
re.IGNORECASE,
|
||||
)
|
||||
|
||||
SD_ANIMA_DUAL_COMPARE = os.getenv("SD_ANIMA_DUAL_COMPARE", "false").lower() in ("1", "true", "yes")
|
||||
|
||||
|
||||
@dataclass
|
||||
class SdPromptBundle:
|
||||
tag_full: str
|
||||
negative: str
|
||||
desc_full: str | None = None
|
||||
|
||||
|
||||
def extract_image_prompt_tag(text: str) -> str | None:
|
||||
@@ -44,7 +133,7 @@ SD_UNET = os.getenv("SD_UNET", "")
|
||||
SD_PROMPT_MODEL = os.getenv("SD_PROMPT_MODEL", "").strip()
|
||||
|
||||
PONY_CHECKPOINTS = {"ponyDiffusionV6XL_v6StartWithThisOne.safetensors"}
|
||||
PONY_NEGATIVE = "score_1, score_2, score_3, score_4, worst quality, low quality, blurry, bad anatomy, watermark, text, censored"
|
||||
PONY_NEGATIVE = "score_1, score_2, score_3, score_4, worst quality, low quality, blurry, bad anatomy, watermark, text, censored"
|
||||
ANIMA_NEGATIVE = "worst quality, low quality, score_1, score_2, score_3, blurry, jpeg artifacts, sepia"
|
||||
|
||||
|
||||
@@ -56,37 +145,201 @@ def _is_anima() -> bool:
|
||||
return bool(SD_UNET) and not SD_CHECKPOINT
|
||||
|
||||
|
||||
def build_positive_prompt(scene: dict, persona: dict | None, outfit_tags: str = "") -> str:
|
||||
def anima_dual_enabled() -> bool:
|
||||
return _is_anima() and SD_ANIMA_DUAL_COMPARE
|
||||
|
||||
|
||||
def _builder_system() -> str:
|
||||
if _is_anima():
|
||||
return PROMPT_BUILDER_SYSTEM + ANIMA_BUILDER_EXTRA
|
||||
return PROMPT_BUILDER_SYSTEM
|
||||
|
||||
|
||||
def _normalize_shot_type(scene: dict) -> dict:
|
||||
st = (scene.get("shot_type") or "").strip().lower()
|
||||
if st == "landscape":
|
||||
scene["shot_type"] = "landscape"
|
||||
return _sanitize_scene_fields(scene)
|
||||
if st == "third_person":
|
||||
action = (scene.get("action_tags") or "").lower()
|
||||
wide = ("battle", "fight", "chase", "running", "crowd", "wide_shot", "group_shot")
|
||||
if any(w in action for w in wide):
|
||||
scene["shot_type"] = "third_person"
|
||||
return _sanitize_scene_fields(scene)
|
||||
scene["shot_type"] = "first_person_pov"
|
||||
if scene.get("viewer_body_visible") is None:
|
||||
scene["viewer_body_visible"] = False
|
||||
return _sanitize_scene_fields(scene)
|
||||
|
||||
|
||||
def _split_tag_input(tag_str: str) -> list[str]:
|
||||
return [t.strip() for t in (tag_str or "").split(",") if t.strip()]
|
||||
|
||||
|
||||
def _is_sentence_like_tag(tag: str) -> bool:
|
||||
t = tag.strip()
|
||||
if len(t) > 45:
|
||||
return True
|
||||
if re.search(r"[.!?]", t):
|
||||
return True
|
||||
words = t.split()
|
||||
return len(words) >= 5 and "_" not in t
|
||||
|
||||
|
||||
def _filter_tag_field(tag_str: str, *, for_pov: bool, field: str) -> str:
|
||||
kept: list[str] = []
|
||||
for raw in _split_tag_input(tag_str):
|
||||
key = raw.lower().replace(" ", "_")
|
||||
if key in _INVALID_TAGS:
|
||||
continue
|
||||
if _is_sentence_like_tag(raw):
|
||||
continue
|
||||
if for_pov and field == "action" and key in _POV_DROP_ACTION_TAGS:
|
||||
continue
|
||||
kept.append(raw if "_" in raw else key)
|
||||
return ", ".join(kept)
|
||||
|
||||
|
||||
def _reconcile_environment_tags(env_str: str) -> str:
|
||||
tags = _split_tag_input(env_str)
|
||||
keys = {t.lower().replace(" ", "_") for t in tags}
|
||||
has_indoor = bool(keys & _INDOOR_ENV_MARKERS) or any(
|
||||
any(m in k for m in _INDOOR_ENV_MARKERS) for k in keys
|
||||
)
|
||||
has_outdoor = bool(keys & _OUTDOOR_ENV_MARKERS) or any(
|
||||
any(m in k for m in _OUTDOOR_ENV_MARKERS) for k in keys
|
||||
)
|
||||
if has_indoor and has_outdoor:
|
||||
tags = [t for t in tags if t.lower().replace(" ", "_") not in _OUTDOOR_ENV_MARKERS]
|
||||
return ", ".join(tags)
|
||||
|
||||
|
||||
def _sanitize_pov_prose(desc: str, scene: dict) -> str:
|
||||
if not desc or not desc.strip():
|
||||
return ""
|
||||
if scene.get("shot_type") != "first_person_pov":
|
||||
return desc.strip()
|
||||
|
||||
kept: list[str] = []
|
||||
for sentence in re.split(r"(?<=[.!?])\s+", desc.strip()):
|
||||
s = sentence.strip()
|
||||
if not s:
|
||||
continue
|
||||
if _POV_PROSE_BANNED.search(s):
|
||||
continue
|
||||
if re.search(r"\bwolfgirl\b", s, re.I) and re.search(
|
||||
r"\b(walks|walking|stands)\b", s, re.I
|
||||
):
|
||||
continue
|
||||
kept.append(s)
|
||||
out = " ".join(kept).strip()
|
||||
return re.sub(r"\bat the viewer\b", "at the camera", out, flags=re.IGNORECASE)
|
||||
|
||||
|
||||
def _sanitize_scene_fields(scene: dict) -> dict:
|
||||
scene = dict(scene)
|
||||
for_pov = scene.get("shot_type") == "first_person_pov"
|
||||
scene["action_tags"] = _filter_tag_field(
|
||||
scene.get("action_tags") or "", for_pov=for_pov, field="action"
|
||||
)
|
||||
env = _filter_tag_field(scene.get("environment_tags") or "", for_pov=False, field="env")
|
||||
scene["environment_tags"] = _reconcile_environment_tags(env)
|
||||
scene["scene_description"] = _sanitize_pov_prose(
|
||||
(scene.get("scene_description") or "").strip(), scene
|
||||
)
|
||||
return scene
|
||||
|
||||
|
||||
def _scene_should_generate(scene: dict) -> bool:
|
||||
if scene.get("should_generate") is False:
|
||||
return False
|
||||
return True
|
||||
|
||||
|
||||
def _sanitize_tags_string(tag_str: str) -> str:
|
||||
if not tag_str:
|
||||
return ""
|
||||
out: list[str] = []
|
||||
seen: set[str] = set()
|
||||
for raw in tag_str.split(","):
|
||||
t = raw.strip()
|
||||
if not t:
|
||||
continue
|
||||
key = t.lower().replace(" ", "_")
|
||||
if key in seen:
|
||||
continue
|
||||
if key in _INVALID_TAGS:
|
||||
continue
|
||||
if "_" not in key and key in _JUNK_STANDALONE_TAGS:
|
||||
continue
|
||||
if len(key) <= 2:
|
||||
continue
|
||||
seen.add(key)
|
||||
out.append(t if "_" in t else key)
|
||||
return ", ".join(out)
|
||||
|
||||
|
||||
def _quality_prefix() -> str:
|
||||
if _is_pony():
|
||||
quality = "score_9, score_8_up, score_7_up, source_anime, highres"
|
||||
elif _is_anima():
|
||||
quality = "masterpiece, best quality, score_7, anime"
|
||||
else:
|
||||
quality = "masterpiece, best quality, highres"
|
||||
return "score_9, score_8_up, score_7_up, source_anime, highres"
|
||||
if _is_anima():
|
||||
return "masterpiece, best quality, score_7, anime"
|
||||
return "masterpiece, best quality, highres"
|
||||
|
||||
parts = [quality]
|
||||
|
||||
appearance = (persona or {}).get("appearance_tags", "")
|
||||
if appearance:
|
||||
parts.append(appearance)
|
||||
if outfit_tags:
|
||||
parts.append(outfit_tags)
|
||||
def _appearance_for_persona(persona: dict | None) -> str:
|
||||
"""Tag core uses appearance_tags only (prose is for LLM context, not Comfy tag line)."""
|
||||
return _sanitize_tags_string((persona or {}).get("appearance_tags", ""))
|
||||
|
||||
if scene.get("shot_type") == "landscape":
|
||||
parts.append(scene.get("environment_tags", ""))
|
||||
else:
|
||||
if scene.get("shot_type") == "first_person_pov":
|
||||
parts.append("pov, first-person view, looking at viewer")
|
||||
parts.append(scene.get("action_tags", ""))
|
||||
parts.append(scene.get("environment_tags", ""))
|
||||
|
||||
def _dedupe_outfit_tags(outfit_tags: str) -> str:
|
||||
tags = _split_tag_input(outfit_tags)
|
||||
keys = {t.lower().replace(" ", "_") for t in tags}
|
||||
if len(keys & {"jeans", "ripped_jeans", "black_jeans"}) > 1 and "jeans" in keys:
|
||||
tags = [t for t in tags if t.lower().replace(" ", "_") != "jeans"]
|
||||
return ", ".join(tags)
|
||||
|
||||
|
||||
def _scene_has_physical_contact(scene: dict) -> bool:
|
||||
action = (scene.get("action_tags") or "").lower()
|
||||
return any(k in action for k in _CONTACT_ACTION_KEYWORDS)
|
||||
|
||||
|
||||
def _infer_pov_cue_from_action(action_tags: str) -> str:
|
||||
action = (action_tags or "").lower()
|
||||
if any(k in action for k in ("holding_hands", "hand_holding", "walking", "strolling")):
|
||||
return "walking_together"
|
||||
if any(k in action for k in ("doorway", "door", "entry", "threshold")):
|
||||
if any(k in action for k in ("arms_out", "hug", "embrace", "inviting")):
|
||||
return "doorway_invite"
|
||||
if any(k in action for k in ("arms_out", "reaching", "inviting_hug", "hug", "embrace")):
|
||||
return "reach_to_viewer"
|
||||
if any(k in action for k in ("sitting", "lying", "bed")):
|
||||
return "dialogue_close"
|
||||
return "face_to_face"
|
||||
|
||||
|
||||
def _build_pov_phrase(scene: dict) -> str:
|
||||
if scene.get("shot_type") != "first_person_pov":
|
||||
return ""
|
||||
cue = (scene.get("pov_cue") or "").strip().lower().replace("-", "_").replace(" ", "_")
|
||||
if cue in POV_CUE_PHRASES:
|
||||
return POV_CUE_PHRASES[cue]
|
||||
inferred = _infer_pov_cue_from_action(scene.get("action_tags", ""))
|
||||
return POV_CUE_PHRASES.get(inferred, POV_CUE_DEFAULT)
|
||||
|
||||
|
||||
def _append_lora(parts: list[str], persona: dict | None) -> None:
|
||||
lora = (persona or {}).get("lora_name", "")
|
||||
weight = (persona or {}).get("lora_weight", 0.8)
|
||||
if lora:
|
||||
parts.append(f"<lora:{lora}:{weight}>")
|
||||
|
||||
|
||||
def _dedupe_comma_join(parts: list[str]) -> str:
|
||||
positive = ", ".join(p.strip() for p in parts if p and p.strip())
|
||||
seen, deduped = set(), []
|
||||
seen: set[str] = set()
|
||||
deduped: list[str] = []
|
||||
for tag in positive.split(", "):
|
||||
t = tag.strip()
|
||||
if t and t not in seen:
|
||||
@@ -95,53 +348,152 @@ def build_positive_prompt(scene: dict, persona: dict | None, outfit_tags: str =
|
||||
return ", ".join(deduped)
|
||||
|
||||
|
||||
async def generate_sd_prompt(
|
||||
messages: list,
|
||||
persona_id: str,
|
||||
outfit_json: str = "[]",
|
||||
) -> tuple[str | None, str | None]:
|
||||
persona = await get_persona(persona_id)
|
||||
# Generate only if persona has appearance tags
|
||||
if not persona or not (persona.get("appearance_tags") or "").strip():
|
||||
logger.debug("sd_prompt skip: persona=%s no appearance_tags", persona_id)
|
||||
return None, None
|
||||
def _build_tag_core(scene: dict, persona: dict | None, outfit_tags: str = "") -> str:
|
||||
"""Anchor + structure: quality, appearance, outfit, action/env tags, LoRA. No POV prose, no scene_description."""
|
||||
parts = [_quality_prefix()]
|
||||
appearance = _appearance_for_persona(persona)
|
||||
if appearance:
|
||||
parts.append(appearance)
|
||||
if outfit_tags:
|
||||
parts.append(_sanitize_tags_string(_dedupe_outfit_tags(outfit_tags)))
|
||||
if scene.get("shot_type") == "landscape":
|
||||
parts.append(_sanitize_tags_string(scene.get("environment_tags", "")))
|
||||
else:
|
||||
if not _is_anima() and scene.get("shot_type") == "first_person_pov":
|
||||
parts.append("pov, first-person view, looking at viewer")
|
||||
parts.append(_sanitize_tags_string(scene.get("action_tags", "")))
|
||||
parts.append(_sanitize_tags_string(scene.get("environment_tags", "")))
|
||||
_append_lora(parts, persona)
|
||||
return _dedupe_comma_join(parts)
|
||||
|
||||
recent = [m for m in messages if m["role"] in ("user", "assistant")][-6:]
|
||||
if not recent:
|
||||
return None, None
|
||||
|
||||
excerpt = "\n".join(f"{m['role']}: {strip_image_prompt_tag(m['content'])}" for m in recent)
|
||||
def build_positive_prompt_tags_only(scene: dict, persona: dict | None, outfit_tags: str = "") -> str:
|
||||
"""Tags + contextual POV phrase (Anima) or legacy Pony path."""
|
||||
if not _is_anima():
|
||||
return build_positive_prompt(scene, persona, outfit_tags)
|
||||
core = _build_tag_core(scene, persona, outfit_tags)
|
||||
pov = _build_pov_phrase(scene)
|
||||
if pov:
|
||||
return f"{core}, {pov}" if core else pov
|
||||
return core
|
||||
|
||||
builder_messages = [
|
||||
{"role": "system", "content": PROMPT_BUILDER_SYSTEM},
|
||||
{"role": "user", "content": f"Chat:\n{excerpt}"},
|
||||
]
|
||||
|
||||
try:
|
||||
if SD_PROMPT_MODEL:
|
||||
raw = await send_message_with_model(builder_messages, SD_PROMPT_MODEL)
|
||||
else:
|
||||
raw = await send_message(builder_messages)
|
||||
raw = raw.strip()
|
||||
if raw.startswith("```"):
|
||||
raw = re.sub(r"^```\w*\n?", "", raw)
|
||||
raw = re.sub(r"\n?```$", "", raw)
|
||||
scene = json.loads(raw)
|
||||
if not isinstance(scene, dict):
|
||||
logger.warning("sd_prompt: LLM returned non-dict: %.100s", raw)
|
||||
return None, None
|
||||
except Exception as e:
|
||||
logger.warning("sd_prompt failed: %s raw=%.200s", e, locals().get("raw", ""))
|
||||
return None, None
|
||||
def _tag_tokens_for_dedupe(tag_line: str) -> set[str]:
|
||||
tokens: set[str] = set()
|
||||
for part in tag_line.replace("<lora:", " ").split(","):
|
||||
for word in re.split(r"[\s_./]+", part.lower()):
|
||||
w = word.strip()
|
||||
if len(w) >= 4:
|
||||
tokens.add(w)
|
||||
return tokens
|
||||
|
||||
try:
|
||||
outfit_list = json.loads(outfit_json or "[]")
|
||||
outfit_tags = ", ".join(outfit_list) if isinstance(outfit_list, list) else ""
|
||||
except Exception:
|
||||
outfit_tags = ""
|
||||
|
||||
positive = build_positive_prompt(scene, persona, outfit_tags)
|
||||
def _trim_redundant_scene_description(desc: str, tag_line: str) -> str:
|
||||
tag_tokens = _tag_tokens_for_dedupe(tag_line)
|
||||
if not tag_tokens or not desc.strip():
|
||||
return desc.strip()
|
||||
|
||||
kept: list[str] = []
|
||||
for sentence in re.split(r"(?<=[.!?])\s+", desc.strip()):
|
||||
s = sentence.strip()
|
||||
if not s:
|
||||
continue
|
||||
words = [w.lower() for w in re.findall(r"[a-zA-Z]{4,}", s)]
|
||||
if not words:
|
||||
kept.append(s)
|
||||
continue
|
||||
overlap = sum(1 for w in words if w in tag_tokens) / len(words)
|
||||
if overlap < 0.62:
|
||||
kept.append(s)
|
||||
|
||||
return " ".join(kept).strip()
|
||||
|
||||
|
||||
def _extract_illustrate_content(content: str, max_chars: int = 1400) -> str:
|
||||
"""Long assistant posts (first_mes): use final beat after time-skip, last paragraphs."""
|
||||
text = strip_image_prompt_tag(content).strip()
|
||||
if not text:
|
||||
return ""
|
||||
chunks = _TIME_SKIP_RE.split(text)
|
||||
if len(chunks) > 1:
|
||||
text = chunks[-1].strip()
|
||||
if len(text) <= max_chars:
|
||||
return text
|
||||
paragraphs = [p.strip() for p in re.split(r"\n\s*\n", text) if p.strip()]
|
||||
if paragraphs:
|
||||
for n in (1, 2, 3):
|
||||
tail = "\n\n".join(paragraphs[-n:])
|
||||
if len(tail) <= max_chars:
|
||||
return tail
|
||||
return paragraphs[-1][-max_chars:]
|
||||
return text[-max_chars:]
|
||||
|
||||
|
||||
def _fallback_mood_prose(scene: dict) -> str:
|
||||
cue = (scene.get("pov_cue") or "").strip().lower().replace("-", "_").replace(" ", "_")
|
||||
if cue in _POV_MOOD_FALLBACK:
|
||||
return _POV_MOOD_FALLBACK[cue]
|
||||
inferred = _infer_pov_cue_from_action(scene.get("action_tags", ""))
|
||||
return _POV_MOOD_FALLBACK.get(inferred, "Soft atmosphere; her expression toward the camera.")
|
||||
|
||||
|
||||
def _cap_scene_description(desc: str, max_words: int = 40, max_chars: int = 220) -> str:
|
||||
words = desc.split()
|
||||
if len(words) > max_words:
|
||||
desc = " ".join(words[:max_words])
|
||||
if len(desc) > max_chars:
|
||||
desc = desc[: max_chars - 3] + "..."
|
||||
return desc
|
||||
|
||||
|
||||
def build_positive_prompt_hybrid(scene: dict, persona: dict | None, outfit_tags: str = "") -> str:
|
||||
"""Production Anima prompt: tag core + POV cue + short mood prose."""
|
||||
if not _is_anima():
|
||||
return build_positive_prompt(scene, persona, outfit_tags)
|
||||
|
||||
base = build_positive_prompt_tags_only(scene, persona, outfit_tags)
|
||||
desc = _trim_redundant_scene_description(
|
||||
(scene.get("scene_description") or "").strip(),
|
||||
base,
|
||||
)
|
||||
desc = _cap_scene_description(desc)
|
||||
if not desc:
|
||||
desc = _cap_scene_description(_fallback_mood_prose(scene))
|
||||
if not desc:
|
||||
return base
|
||||
|
||||
lora = (persona or {}).get("lora_name", "")
|
||||
weight = (persona or {}).get("lora_weight", 0.8)
|
||||
lora_suffix = f" <lora:{lora}:{weight}>" if lora else ""
|
||||
if lora_suffix and base.endswith(lora_suffix):
|
||||
base = base[: -len(lora_suffix)]
|
||||
return f"{base}. {desc}{lora_suffix}"
|
||||
return f"{base}. {desc}"
|
||||
|
||||
|
||||
def build_positive_prompt(scene: dict, persona: dict | None, outfit_tags: str = "") -> str:
|
||||
"""Legacy entry: Pony/non-Anima full prompt; Anima delegates to tags-only."""
|
||||
if _is_anima():
|
||||
return build_positive_prompt_tags_only(scene, persona, outfit_tags)
|
||||
|
||||
parts = [_quality_prefix()]
|
||||
appearance = _appearance_for_persona(persona)
|
||||
if appearance:
|
||||
parts.append(appearance)
|
||||
if outfit_tags:
|
||||
parts.append(_sanitize_tags_string(_dedupe_outfit_tags(outfit_tags)))
|
||||
if scene.get("shot_type") == "landscape":
|
||||
parts.append(_sanitize_tags_string(scene.get("environment_tags", "")))
|
||||
else:
|
||||
if scene.get("shot_type") == "first_person_pov":
|
||||
parts.append("pov, first-person view, looking at viewer")
|
||||
parts.append(_sanitize_tags_string(scene.get("action_tags", "")))
|
||||
parts.append(_sanitize_tags_string(scene.get("environment_tags", "")))
|
||||
_append_lora(parts, persona)
|
||||
return _dedupe_comma_join(parts)
|
||||
|
||||
|
||||
def _negative_for_scene(scene: dict) -> str:
|
||||
if _is_pony():
|
||||
negative = PONY_NEGATIVE
|
||||
elif _is_anima():
|
||||
@@ -151,6 +503,228 @@ async def generate_sd_prompt(
|
||||
|
||||
if scene.get("shot_type") == "first_person_pov":
|
||||
negative += ", third person, over the shoulder"
|
||||
viewer_visible = scene.get("viewer_body_visible") is True
|
||||
if not viewer_visible or _scene_has_physical_contact(scene):
|
||||
negative += ", " + POV_INTERACTION_NEGATIVE
|
||||
|
||||
full = positive + f"\n\nNegative prompt: {negative}"
|
||||
return full, negative
|
||||
return negative
|
||||
|
||||
|
||||
def _format_builder_user_block(persona: dict, messages: list[dict], outfit_json: str) -> str:
|
||||
lines: list[str] = []
|
||||
tags = (persona.get("appearance_tags") or "").strip()
|
||||
lines.append(f"Character appearance (tags): {tags}")
|
||||
prose = (persona.get("appearance_prose") or "").strip()
|
||||
if _is_anima() and prose and prose != tags:
|
||||
snippet = prose[:300] + ("..." if len(prose) > 300 else "")
|
||||
lines.append(f"Character notes (do not copy into tags or scene_description): {snippet}")
|
||||
|
||||
try:
|
||||
outfit_list = json.loads(outfit_json or "[]")
|
||||
outfit_ref = ", ".join(outfit_list) if isinstance(outfit_list, list) else ""
|
||||
except Exception:
|
||||
outfit_ref = ""
|
||||
|
||||
if outfit_ref:
|
||||
lines.append(f"Current outfit (tags): {outfit_ref}")
|
||||
|
||||
recent = [m for m in messages if m.get("role") in ("user", "assistant")][-6:]
|
||||
if not recent:
|
||||
lines.append("\nChat:\n(no messages — return should_generate=false)")
|
||||
return "\n".join(lines)
|
||||
|
||||
illustrate: list[dict] = []
|
||||
if recent[-1]["role"] == "assistant":
|
||||
illustrate = [recent[-1]]
|
||||
if len(recent) >= 2 and recent[-2]["role"] == "user":
|
||||
illustrate.insert(0, recent[-2])
|
||||
else:
|
||||
illustrate = [recent[-1]]
|
||||
if len(recent) >= 2 and recent[-2]["role"] == "assistant":
|
||||
illustrate.insert(0, recent[-2])
|
||||
|
||||
context = [m for m in recent if m not in illustrate]
|
||||
|
||||
lines.append("\n=== ILLUSTRATE (draw THIS beat only) ===")
|
||||
for m in illustrate:
|
||||
raw = m.get("content", "")
|
||||
content = _extract_illustrate_content(raw) if m.get("role") == "assistant" else strip_image_prompt_tag(raw)
|
||||
lines.append(f"{m['role']}: {content}")
|
||||
|
||||
if context:
|
||||
lines.append("\n=== Context (outfit/location hints only — do not illustrate old beats) ===")
|
||||
for m in context:
|
||||
content = strip_image_prompt_tag(m.get("content", ""))
|
||||
if len(content) > 800:
|
||||
content = content[:797] + "..."
|
||||
lines.append(f"{m['role']}: {content}")
|
||||
|
||||
return "\n".join(lines)
|
||||
|
||||
|
||||
def _parse_scene_json(raw: str) -> dict:
|
||||
cleaned = raw.strip()
|
||||
if cleaned.startswith("```"):
|
||||
cleaned = re.sub(r"^```\w*\n?", "", cleaned)
|
||||
cleaned = re.sub(r"\n?```$", "", cleaned)
|
||||
scene = json.loads(cleaned)
|
||||
if not isinstance(scene, dict):
|
||||
raise ValueError("LLM returned non-object JSON")
|
||||
return _normalize_shot_type(scene)
|
||||
|
||||
|
||||
def _bundle_from_scene(scene: dict, persona: dict, outfit_tags: str) -> SdPromptBundle:
|
||||
negative = _negative_for_scene(scene)
|
||||
if _is_anima():
|
||||
hybrid = build_positive_prompt_hybrid(scene, persona, outfit_tags)
|
||||
tag_full = hybrid + NEGATIVE_PROMPT_SEPARATOR + negative
|
||||
desc_full = None
|
||||
if anima_dual_enabled():
|
||||
tags_only = build_positive_prompt_tags_only(scene, persona, outfit_tags)
|
||||
desc_full = tags_only + NEGATIVE_PROMPT_SEPARATOR + negative
|
||||
return SdPromptBundle(tag_full=tag_full, negative=negative, desc_full=desc_full)
|
||||
|
||||
positive = build_positive_prompt(scene, persona, outfit_tags)
|
||||
tag_full = positive + NEGATIVE_PROMPT_SEPARATOR + negative
|
||||
return SdPromptBundle(tag_full=tag_full, negative=negative, desc_full=None)
|
||||
|
||||
|
||||
def _parse_chat_excerpt(excerpt: str) -> list[dict]:
|
||||
messages: list[dict] = []
|
||||
for line in (excerpt or "").splitlines():
|
||||
line = line.strip()
|
||||
if not line:
|
||||
continue
|
||||
lower = line.lower()
|
||||
if lower.startswith("user:"):
|
||||
messages.append({"role": "user", "content": line[5:].strip()})
|
||||
elif lower.startswith("assistant:"):
|
||||
messages.append({"role": "assistant", "content": line[10:].strip()})
|
||||
elif lower.startswith("system:"):
|
||||
messages.append({"role": "system", "content": line[7:].strip()})
|
||||
else:
|
||||
messages.append({"role": "user", "content": line})
|
||||
return messages
|
||||
|
||||
|
||||
async def run_prompt_builder(
|
||||
persona_id: str,
|
||||
*,
|
||||
messages: list[dict] | None = None,
|
||||
chat_excerpt: str = "",
|
||||
outfit_json: str = "[]",
|
||||
appearance_override: str | None = None,
|
||||
use_prose: bool = False,
|
||||
) -> dict:
|
||||
"""Debug: full SD prompt builder pipeline with LLM raw output."""
|
||||
persona = await get_persona(persona_id) or {}
|
||||
if appearance_override is not None:
|
||||
persona = {**persona, "appearance_tags": appearance_override}
|
||||
|
||||
recent = messages if messages is not None else _parse_chat_excerpt(chat_excerpt)
|
||||
recent = [m for m in recent if m.get("role") in ("user", "assistant")]
|
||||
|
||||
user_block = _format_builder_user_block(persona, recent, outfit_json)
|
||||
builder_messages = [
|
||||
{"role": "system", "content": _builder_system()},
|
||||
{"role": "user", "content": user_block},
|
||||
]
|
||||
model_used = SD_PROMPT_MODEL or "SYSTEM_MODEL"
|
||||
result: dict = {
|
||||
"persona_id": persona_id,
|
||||
"sd_prompt_model": model_used,
|
||||
"builder_system": _builder_system(),
|
||||
"builder_user": user_block,
|
||||
"anima_dual": anima_dual_enabled(),
|
||||
}
|
||||
|
||||
raw = ""
|
||||
try:
|
||||
if SD_PROMPT_MODEL:
|
||||
raw = await send_message_with_model(builder_messages, SD_PROMPT_MODEL)
|
||||
else:
|
||||
raw = await send_message(builder_messages)
|
||||
result["llm_raw"] = raw
|
||||
scene = _parse_scene_json(raw)
|
||||
result["scene"] = scene
|
||||
|
||||
if not _scene_should_generate(scene):
|
||||
result["skipped"] = True
|
||||
result["error"] = "should_generate=false"
|
||||
return result
|
||||
|
||||
try:
|
||||
outfit_tags = ", ".join(json.loads(outfit_json or "[]"))
|
||||
except Exception:
|
||||
outfit_tags = ""
|
||||
|
||||
negative = _negative_for_scene(scene)
|
||||
if _is_anima():
|
||||
tags_only = build_positive_prompt_tags_only(scene, persona, outfit_tags)
|
||||
hybrid = build_positive_prompt_hybrid(scene, persona, outfit_tags)
|
||||
result["tag_positive"] = tags_only
|
||||
result["hybrid_positive"] = hybrid
|
||||
result["negative"] = negative
|
||||
result["tags_only_full"] = tags_only + NEGATIVE_PROMPT_SEPARATOR + negative
|
||||
result["hybrid_full"] = hybrid + NEGATIVE_PROMPT_SEPARATOR + negative
|
||||
result["tag_full"] = result["hybrid_full"]
|
||||
else:
|
||||
positive = build_positive_prompt(scene, persona, outfit_tags)
|
||||
result["tag_positive"] = positive
|
||||
result["negative"] = negative
|
||||
result["tag_full"] = positive + NEGATIVE_PROMPT_SEPARATOR + negative
|
||||
except Exception as e:
|
||||
result["error"] = str(e)
|
||||
result["llm_raw"] = raw or result.get("llm_raw", "")
|
||||
|
||||
return result
|
||||
|
||||
|
||||
async def generate_sd_prompt(
|
||||
messages: list,
|
||||
persona_id: str,
|
||||
outfit_json: str = "[]",
|
||||
) -> SdPromptBundle | None:
|
||||
persona = await get_persona(persona_id)
|
||||
if not persona:
|
||||
return None
|
||||
|
||||
recent = [m for m in messages if m["role"] in ("user", "assistant")]
|
||||
if not recent:
|
||||
return None
|
||||
|
||||
user_block = _format_builder_user_block(persona, recent, outfit_json)
|
||||
builder_messages = [
|
||||
{"role": "system", "content": _builder_system()},
|
||||
{"role": "user", "content": user_block},
|
||||
]
|
||||
|
||||
raw = ""
|
||||
try:
|
||||
if SD_PROMPT_MODEL:
|
||||
raw = await send_message_with_model(builder_messages, SD_PROMPT_MODEL)
|
||||
else:
|
||||
raw = await send_message(builder_messages)
|
||||
scene = _parse_scene_json(raw)
|
||||
except Exception as e:
|
||||
logger.warning("sd_prompt failed: %s raw=%.200s", e, raw)
|
||||
return None
|
||||
|
||||
if not _scene_should_generate(scene):
|
||||
logger.info("sd_prompt: skipped (should_generate=false)")
|
||||
return None
|
||||
|
||||
try:
|
||||
outfit_list = json.loads(outfit_json or "[]")
|
||||
outfit_tags = ", ".join(outfit_list) if isinstance(outfit_list, list) else ""
|
||||
except Exception:
|
||||
outfit_tags = ""
|
||||
|
||||
bundle = _bundle_from_scene(scene, persona, outfit_tags)
|
||||
if anima_dual_enabled() and bundle.desc_full:
|
||||
logger.info(
|
||||
"Anima prompts: hybrid=%.80s | tags_only=%.80s",
|
||||
bundle.tag_full.split(NEGATIVE_PROMPT_SEPARATOR)[0],
|
||||
bundle.desc_full.split(NEGATIVE_PROMPT_SEPARATOR)[0],
|
||||
)
|
||||
return bundle
|
||||
|
||||
Reference in New Issue
Block a user