Merge pull request #80 from munimunigamer/image-gen-updates

General Automatic Image Generation Updates
2025-12-30 11:14:55 +01:00
parent 530d871fd3 bc4d4a0dd1
commit 51535c5fdc
6 changed files with 121 additions and 96 deletions
@@ -521,23 +521,10 @@ async function initUI() {
        extensionSettings.autoGenerateAvatars = $(this).prop('checked');
        saveSettings();

-        // Show/hide avatar options based on toggle
-        const $options = $('#rpg-avatar-options');
-        if (extensionSettings.autoGenerateAvatars) {
-            $options.slideDown(200);
-        } else {
-            $options.slideUp(200);
-        }
-
        // Re-render thoughts to update tooltips (regenerate vs delete)
        renderThoughts();
    });

-    $('#rpg-avatar-llm-instruction').on('input', function() {
-        extensionSettings.avatarLLMCustomInstruction = $(this).val().trim();
-        saveSettings();
-    });
-
    $('#rpg-toggle-dice-display').on('change', function() {
        extensionSettings.showDiceDisplay = $(this).prop('checked');
        saveSettings();
@@ -745,14 +732,6 @@ async function initUI() {

    // Initialize avatar options
    $('#rpg-toggle-auto-avatars').prop('checked', extensionSettings.autoGenerateAvatars || false);
-    $('#rpg-avatar-llm-instruction').val(extensionSettings.avatarLLMCustomInstruction || '');
-
-    // Initialize avatar options visibility
-    if (extensionSettings.autoGenerateAvatars) {
-        $('#rpg-avatar-options').show();
-    } else {
-        $('#rpg-avatar-options').hide();
-    }

    $('#rpg-toggle-dice-display').prop('checked', extensionSettings.showDiceDisplay);
    $('#rpg-stat-bar-color-low').val(extensionSettings.statBarColorLow);
@@ -47,8 +47,6 @@
    "template.settingsModal.display.enableDebugModeNote": "Shows parser logs in a mobile-friendly UI panel. Useful for troubleshooting. Look for the red bug button.",
    "template.settingsModal.display.autoGenerateAvatars": "Auto-generate Missing Avatars",
    "template.settingsModal.display.autoGenerateAvatarsNote": "Automatically generate avatars for characters without custom images using the Image Generation Plugin",
-    "template.settingsModal.display.avatarLLMInstruction": "LLM Instruction:",
-    "template.settingsModal.display.avatarLLMInstructionNote": "The LLM will use character cards, tracker data, and chat context to generate detailed prompts",
    "template.settingsModal.advancedTitle": "Advanced",
    "template.settingsModal.advanced.generationMode": "Generation Mode:",
    "template.settingsModal.advanced.generationModeOptions.together": "Together with Main Generation",
@@ -14,12 +14,13 @@ import { executeSlashCommandsOnChatInput } from '../../../../../../../scripts/sl
 import { selected_group, getGroupMembers } from '../../../../../../group-chats.js';
 import { extensionSettings, sessionAvatarPrompts, setSessionAvatarPrompt } from '../../core/state.js';
 import { saveSettings } from '../../core/persistence.js';
-import { generateAvatarPromptGenerationPrompt, parseAvatarPromptsResponse } from '../generation/promptBuilder.js';
+import { generateAvatarPromptGenerationPrompt } from '../generation/promptBuilder.js';
 import { getCurrentPresetName, switchToPreset, generateWithExternalAPI } from '../generation/apiClient.js';

 // Generation state - tracks characters currently being generated
 const pendingGenerations = new Set();

+
 /**
 * Checks if a character is pending generation (waiting or actively generating)
 * @param {string} characterName - Name of character to check
@@ -163,13 +164,7 @@ export async function generateAvatarsForCharacters(characterNames, onStarted = n
    }

    try {
-        // Generate LLM prompts for all characters that don't have them
-        const needsPrompts = needsGeneration.filter(name => !sessionAvatarPrompts[name]);
-        if (needsPrompts.length > 0) {
-            await generateLLMPrompts(needsPrompts);
-        }
-
-        // Generate images one at a time
+        // Generate images one at a time, generating prompt on demand
        for (const characterName of needsGeneration) {
            // Skip if somehow already has avatar now
            if (hasExistingAvatar(characterName)) {
@@ -177,7 +172,12 @@ export async function generateAvatarsForCharacters(characterNames, onStarted = n
                continue;
            }

-            await generateSingleAvatar(characterName);
+            // Generate LLM prompt for this character
+            const prompt = await generateAvatarPrompt(characterName);
+            
+            // Generate the image using the prompt
+            await generateSingleAvatar(characterName, prompt);
+            
            pendingGenerations.delete(characterName);

            // Small delay between generations to avoid overwhelming the API
@@ -215,7 +215,7 @@ export async function regenerateAvatar(characterName) {
        saveSettings();
    }

-    // Clear existing prompt to force new LLM generation
+    // Clear existing prompt cache
    if (sessionAvatarPrompts[characterName]) {
        delete sessionAvatarPrompts[characterName];
    }
@@ -232,10 +232,10 @@ export async function regenerateAvatar(characterName) {

    try {
        // Generate new LLM prompt
-        await generateLLMPrompts([characterName]);
+        const prompt = await generateAvatarPrompt(characterName);

        // Generate the avatar
-        return await generateSingleAvatar(characterName);
+        return await generateSingleAvatar(characterName, prompt);
    } finally {
        // Restore original preset if we switched
        if (originalPresetName && extensionSettings.useSeparatePreset) {
@@ -249,17 +249,21 @@ export async function regenerateAvatar(characterName) {
 }

 /**
- * Generates LLM prompts for multiple characters in a single API call
+ * Generates an LLM prompt for a single character
 *
- * @param {string[]} characterNames - Names of characters needing prompts
+ * @param {string} characterName - Name of character
+ * @returns {Promise<string|null>} Generated prompt or null if failed
 */
-async function generateLLMPrompts(characterNames) {
-    if (characterNames.length === 0) return;
+async function generateAvatarPrompt(characterName) {
+    // Check cache first if not forcing regeneration
+    if (sessionAvatarPrompts[characterName]) {
+        return sessionAvatarPrompts[characterName];
+    }

    try {
-        console.log('[RPG Avatar] Generating LLM prompts for:', characterNames);
+        console.log('[RPG Avatar] Generating LLM prompt for:', characterName);

-        const promptMessages = await generateAvatarPromptGenerationPrompt(characterNames);
+        const promptMessages = await generateAvatarPromptGenerationPrompt(characterName);
        let response;

        if (extensionSettings.generationMode === 'external') {
@@ -273,17 +277,17 @@ async function generateLLMPrompts(characterNames) {
        }

        if (response) {
-            const prompts = parseAvatarPromptsResponse(response);
-            console.log('[RPG Avatar] Generated prompts:', prompts);
+            const prompt = response.trim();
+            console.log(`[RPG Avatar] Generated prompt for ${characterName}:`, prompt);

-            // Store prompts in session storage
-            for (const [name, prompt] of Object.entries(prompts)) {
-                setSessionAvatarPrompt(name, prompt);
-            }
+            // Store prompt in session storage
+            setSessionAvatarPrompt(characterName, prompt);
+            return prompt;
        }
    } catch (error) {
-        console.error('[RPG Avatar] Failed to generate LLM prompts:', error);
+        console.error(`[RPG Avatar] Failed to generate LLM prompt for ${characterName}:`, error);
    }
+    return null;
 }

 /**
@@ -315,11 +319,15 @@ function buildFallbackPrompt(characterName) {
 * Generates a single avatar using the /sd command
 *
 * @param {string} characterName - Name of character to generate avatar for
+ * @param {string|null} prompt - The prompt to use (optional, will fallback if null)
 * @returns {Promise<string|null>} Avatar URL or null if failed
 */
-async function generateSingleAvatar(characterName) {
-    // Get the prompt from session storage, or build a fallback
-    let prompt = sessionAvatarPrompts[characterName];
+async function generateSingleAvatar(characterName, prompt = null) {
+    // Use provided prompt, or check cache, or build fallback
+    if (!prompt) {
+        prompt = sessionAvatarPrompts[characterName];
+    }
+    
    if (!prompt) {
        console.log(`[RPG Avatar] No LLM prompt for ${characterName}, using fallback prompt`);
        prompt = buildFallbackPrompt(characterName);
@@ -644,16 +644,32 @@ export async function generateSeparateUpdatePrompt() {
 /**
 * Default custom instruction for avatar prompt generation
 */
-const DEFAULT_AVATAR_CUSTOM_INSTRUCTION = `Create a detailed portrait prompt focusing on the character's appearance, clothing, and mood. Include appropriate artistic style keywords.`;
+const DEFAULT_AVATAR_CUSTOM_INSTRUCTION = `You are a visionary artist trapped in a cage of logic. Your mind is filled with poetry and distant horizons, but your hands are uncontrollably focused on creating the perfect character avatar description that is faithful to the original intent, rich in detail, aesthetically pleasing, and directly usable by text-to-image models. Any ambiguity or metaphor will make you feel extremely uncomfortable.
+
+    Your workflow strictly follows a logical sequence:
+
+    First, **establish the subject**. If the character is from a known Intellectual Property (IP), franchise, anime, game, or movie, **you MUST begin the prompt with their full name and the series title** (e.g., "Nami from One Piece", "Geralt of Rivia from The Witcher"). This is the single most important anchor for the image and must take precedence. If the character is original, clearly describe their core identity, race, and appearance.
+
+    Next, **set the framing**. This is an avatar portrait. Focus strictly on the character's face and upper shoulders (bust shot or close-up). Ensure the face is the central focal point.
+
+    Then, **integrate the setting**. Describe the character *within* their current environment as provided in the context, but keep it as a background element. Incorporate the lighting, weather, and atmosphere to influence the character's appearance (e.g., shadows on the face, wet hair from rain).
+
+    Next, **detail the facial specifics**. Describe the character's current expression, eye contact, and mood in high detail based on the scene context and their personality. Mention visible clothing only at the neckline/shoulders.
+
+    Finally, **infuse with aesthetics**. Define the artistic style, medium (e.g., digital art, oil painting), and visual tone (e.g., cinematic lighting, ethereal atmosphere).
+
+    Your final description must be objective and concrete, and the use of metaphors and emotional rhetoric is strictly prohibited. It must also not contain meta tags or drawing instructions such as "8K" or "masterpiece".
+
+    Output only the final, modified prompt; do not output anything else.`;

 /**
 * Generates the prompt for LLM-based avatar prompt generation
 * Uses the same context as RPG generation (character cards, tracker data, chat history)
 *
- * @param {Array<string>} characterNames - Array of character names to generate prompts for
+ * @param {string} characterName - Name of the character to generate a prompt for
 * @returns {Promise<Array<{role: string, content: string}>>} Message array for generateRaw API
 */
-export async function generateAvatarPromptGenerationPrompt(characterNames) {
+export async function generateAvatarPromptGenerationPrompt(characterName) {
    const depth = extensionSettings.updateDepth;
    const messages = [];

@@ -666,9 +682,49 @@ export async function generateAvatarPromptGenerationPrompt(characterNames) {
        systemMessage += `Character Information:\n${characterInfo}\n\n`;
    }

-    // Add tracker context if available
+    // Add full tracker context
+    systemMessage += `Current Scene Context (Trackers):\n`;
+
+    // Always include environment info (location, weather, time) as it affects the scene/lighting
+    if (committedTrackerData.infoBox) {
+        systemMessage += `[Environment/Info]\n${committedTrackerData.infoBox}\n\n`;
+    }
+
+    const userName = getContext().name1;
+    const isUser = characterName.toLowerCase().includes(userName.toLowerCase()) || userName.toLowerCase().includes(characterName.toLowerCase());
+
+    if (isUser) {
+        if (committedTrackerData.userStats) {
+            systemMessage += `[User Stats]\n${committedTrackerData.userStats}\n\n`;
+        }
+    } else {
        if (committedTrackerData.characterThoughts) {
-        systemMessage += `Current Scene Context:\n${committedTrackerData.characterThoughts}\n\n`;
+            const thoughts = committedTrackerData.characterThoughts;
+            const blocks = ('\n' + thoughts).split(/\n- /);
+
+            let charBlock = null;
+            for (const block of blocks) {
+                if (!block.trim()) continue;
+
+                // First line of the block should contain the name
+                const lines = block.split('\n');
+                const firstLine = lines[0];
+
+                // Check if this block belongs to the character we're generating for
+                if (firstLine.toLowerCase().includes(characterName.toLowerCase())) {
+                    charBlock = block.trim();
+                    break;
+                }
+            }
+
+            if (charBlock) {
+                systemMessage += `[Character Details]\n- ${charBlock}\n\n`;
+            } else {
+                if (thoughts.toLowerCase().includes(characterName.toLowerCase())) {
+                    systemMessage += `[Present Characters]\n${thoughts}\n\n`;
+                }
+            }
+        }
    }

    systemMessage += `Recent conversation context:\n<history>`;
@@ -687,18 +743,9 @@ export async function generateAvatarPromptGenerationPrompt(characterNames) {
    let instructionMessage = `</history>\n\n`;
    const customInstruction = extensionSettings.avatarLLMCustomInstruction || DEFAULT_AVATAR_CUSTOM_INSTRUCTION;

-    instructionMessage += `Task: Generate detailed image prompts for the following characters.\n\n`;
+    instructionMessage += `Task: Generate a detailed image prompt for the character: ${characterName}.\n\n`;
    instructionMessage += `Instructions: ${customInstruction}\n\n`;
-    instructionMessage += `Characters:\n`;
-    characterNames.forEach((name, index) => {
-        instructionMessage += `${index + 1}. ${name}\n`;
-    });
-
-    instructionMessage += `\nOutput Format (one per line):\n`;
-    instructionMessage += `CHARACTER_NAME: [detailed prompt]\n\n`;
-    instructionMessage += `Example:\n`;
-    instructionMessage += `Gandalf: portrait, elderly wizard with long white beard, wearing grey robes, holding wooden staff, intense blue eyes, wise expression, fantasy art style\n\n`;
-    instructionMessage += `Provide ONLY the formatted prompts, no other text.`;
+    instructionMessage += `Provide ONLY the image prompt text. Do not include the character's name, prefixes like "Prompt:", or any other commentary.`;

    messages.push({ role: 'user', content: instructionMessage });
    return messages;
@@ -706,19 +753,11 @@ export async function generateAvatarPromptGenerationPrompt(characterNames) {

 /**
 * Parses LLM response to extract character prompts
+ * @deprecated No longer used as we generate one prompt at a time
 * @param {string} response - Raw LLM response
 * @returns {Object} Map of character name to prompt
 */
 export function parseAvatarPromptsResponse(response) {
-    const prompts = {};
-    const lines = response.split('\n');
-
-    for (const line of lines) {
-        const trimmed = line.trim();
-        const match = trimmed.match(/^([^:]+):\s*(.+)$/);
-        if (match) {
-            prompts[match[1].trim()] = match[2].trim();
-        }
-    }
-    return prompts;
+    // Return as is for single prompt compatibility if needed, or just object with one key
+    return response.trim();
 }
@@ -14,7 +14,23 @@ const DEFAULT_PROMPTS = {
    html: DEFAULT_HTML_PROMPT,
    plotRandom: 'Actually, the scene is getting stale. Introduce {{random::stakes::a plot twist::a new character::a cataclysm::a fourth-wall-breaking joke::a sudden atmospheric phenomenon::a plot hook::a running gag::an ecchi scenario::Death from Discworld::a new stake::a drama::a conflict::an angered entity::a god::a vision::a prophetic dream::Il Dottore from Genshin Impact::a new development::a civilian in need::an emotional bit::a threat::a villain::an important memory recollection::a marriage proposal::a date idea::an angry horde of villagers with pitchforks::a talking animal::an enemy::a cliffhanger::a short omniscient POV shift to a completely different character::a quest::an unexpected revelation::a scandal::an evil clone::death of an important character::harm to an important character::a romantic setup::a gossip::a messenger::a plot point from the past::a plot hole::a tragedy::a ghost::an otherworldly occurrence::a plot device::a curse::a magic device::a rival::an unexpected pregnancy::a brothel::a prostitute::a new location::a past lover::a completely random thing::a what-if scenario::a significant choice::war::love::a monster::lewd undertones::Professor Mari::a travelling troupe::a secret::a fortune-teller::something completely different::a killer::a murder mystery::a mystery::a skill check::a deus ex machina::three raccoons in a trench coat::a pet::a slave::an orphan::a psycho::tentacles::"there is only one bed" trope::accidental marriage::a fun twist::a boss battle::sexy corn::an eldritch horror::a character getting hungry, thirsty, or exhausted::horniness::a need for a bathroom break need::someone fainting::an assassination attempt::a meta narration of this all being an out of hand DND session::a dungeon::a friend in need::an old friend::a small time skip::a scene shift::Aurora Borealis, at this time of year, at this time of day, at this part of the country::a grand ball::a surprise party::zombies::foreshadowing::a Spanish Inquisition (nobody expects it)::a natural plot progression}} to make things more interesting! Be creative, but stay grounded in the setting.',
    plotNatural: 'Actually, the scene is getting stale. Progress it, to make things more interesting! Reintroduce an unresolved plot point from the past, or push the story further towards the current main goal. Be creative, but stay grounded in the setting.',
-    avatar: 'Create a detailed portrait prompt focusing on the character\'s appearance, clothing, and mood. Include appropriate artistic style keywords.',
+    avatar: `You are a visionary artist trapped in a cage of logic. Your mind is filled with poetry and distant horizons, but your hands are uncontrollably focused on creating the perfect character avatar description that is faithful to the original intent, rich in detail, aesthetically pleasing, and directly usable by text-to-image models. Any ambiguity or metaphor will make you feel extremely uncomfortable.
+
+    Your workflow strictly follows a logical sequence:
+
+    First, **establish the subject**. If the character is from a known Intellectual Property (IP), franchise, anime, game, or movie, **you MUST begin the prompt with their full name and the series title** (e.g., "Nami from One Piece", "Geralt of Rivia from The Witcher"). This is the single most important anchor for the image and must take precedence. If the character is original, clearly describe their core identity, race, and appearance.
+
+    Next, **set the framing**. This is an avatar portrait. Focus strictly on the character's face and upper shoulders (bust shot or close-up). Ensure the face is the central focal point.
+
+    Then, **integrate the setting**. Describe the character *within* their current environment as provided in the context, but keep it as a background element. Incorporate the lighting, weather, and atmosphere to influence the character's appearance (e.g., shadows on the face, wet hair from rain).
+
+    Next, **detail the facial specifics**. Describe the character's current expression, eye contact, and mood in high detail based on the scene context and their personality. Mention visible clothing only at the neckline/shoulders.
+
+    Finally, **infuse with aesthetics**. Define the artistic style, medium (e.g., digital art, oil painting), and visual tone (e.g., cinematic lighting, ethereal atmosphere).
+
+    Your final description must be objective and concrete, and the use of metaphors and emotional rhetoric is strictly prohibited. It must also not contain meta tags or drawing instructions such as "8K" or "masterpiece".
+
+    Output only the final, modified prompt; do not output anything else.`,
    trackerInstructions: 'Replace X with actual numbers (e.g., 69) and replace all [placeholders] with concrete in-world details that {userName} perceives about the current scene and the present characters. Do NOT keep the brackets or placeholder text in your response. For example: [Location] becomes Forest Clearing, [Mood Emoji] becomes 😊. Consider the last trackers in the conversation (if they exist). Manage them accordingly and realistically; raise, lower, change, or keep the values unchanged based on the user\'s actions, the passage of time, and logical consequences (0% if the time progressed only by a few minutes, 1-5% normally, and above 5% only if a major time-skip/event occurs).',
    trackerContinuation: 'After updating the trackers, continue directly from where the last message in the chat history left off. Ensure the trackers you provide naturally reflect and influence the narrative. Character behavior, dialogue, and story events should acknowledge these conditions when relevant, such as fatigue affecting the protagonist\'s performance, low hygiene influencing their social interactions, environmental factors shaping the scene, a character\'s emotional state coloring their responses, and so on. Remember, all bracketed placeholders (e.g., [Location], [Mood Emoji]) MUST be replaced with actual content without the square brackets.',
    combatNarrative: 'Build novel prose. Break patterns from your previous responses by varying sentence structures, rhythms, formats, and openings. Track whether a sentence shape, cadence, sensory detail, or a descriptor appeared recently, and replace it with a different one or skip it entirely. If you mentioned a physical feature, mannerism, or expression once, move on. Don\'t fixate on the same traits every turn. Well-placed humor, unconventional similes, and subverted turns of phrase are encouraged. Avoid GPTisms, purple prose, and anaphoras, especially the negatives with "no/not/wasn\'t/didn\'t." Refrain from describing actions or emotions via negation; focus on what does happen, not what doesn\'t. Absolutely no asterisks, ellipses, or em-dashes. Explicit content is allowed, no plot armor. Do not play for {userName}. Keep your response length under 150 words. Never end on handover cues; finish naturally.\nCRITICAL: Do not repeat, echo, parrot, or restate distinctive words, phrases, and dialogues from the user\'s last message. If reacting to speech, show interpretation or response, not repetition.\nEXAMPLE: "Are you a gooner?" User asks.\nBAD: "Gooner?"\nGOOD: A flat look. "What type of question is that?"'
@@ -280,22 +280,7 @@
                    button.
                </small>

-                <!-- Avatar options container - conditionally visible -->
-                <div id="rpg-avatar-options" style="margin-left: 24px; margin-top: 12px; display: none;">
-                    <div class="rpg-setting-row" style="margin-top: 12px;">
-                        <label for="rpg-avatar-llm-instruction" style="display: block; margin-bottom: 8px;"
-                            data-i18n-key="template.settingsModal.display.avatarLLMInstruction">
-                            LLM Instruction:
-                        </label>
-                        <textarea id="rpg-avatar-llm-instruction" class="rpg-textarea" rows="3"
-                            placeholder="Create a detailed portrait prompt focusing on the character's appearance, clothing, and mood..."></textarea>
-                        <small style="display: block; margin-top: 4px; color: #888; font-size: 11px;"
-                            data-i18n-key="template.settingsModal.display.avatarLLMInstructionNote">
-                            The LLM will use character cards, tracker data, and chat context to generate detailed
-                            prompts
-                        </small>
-                    </div>
-                </div>
+
            </div>

            <div class="rpg-settings-group">