From 3a84e24c0aa1e828e143d7e8e9fd91c198c6c06e Mon Sep 17 00:00:00 2001 From: Lucas 'Paperboy' Rose-Winters Date: Mon, 20 Oct 2025 07:14:18 +1100 Subject: [PATCH] feat(inventory): enhance parser for AI formatting edge cases MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Completely rewrote parseItems() to robustly handle diverse AI output formats without requiring JSON mode (not all local models support it). New capabilities: 1. Multiple bracket types: [], {}, [[]] 2. Wrapping quotes: "...", '...' 3. Newline-based lists: "Sword\nShield" → ["Sword", "Shield"] 4. Markdown stripping: **bold**, *italic*, `code`, ~~strike~~ 5. List markers: "- Sword", "1. Item", "• Item" 6. Graceful unmatched parentheses (warns but doesn't crash) 7. Per-item quote stripping: ["Sword", "Shield"] Implementation: - 6-step processing pipeline with clear documentation - Helper function cleanSingleItem() for per-item cleanup - Preserves commas inside parentheses (existing feature) - Console warnings for malformed input (unmatched parens) Examples now supported: - Standard: "Sword, Shield" ✓ - Newlines: "Sword\nShield\nPotion" ✓ - Bulleted: "- Sword\n- Shield" ✓ - Numbered: "1. Sword\n2. Shield" ✓ - Markdown: "**Sword** (equipped)" → "Sword (equipped)" ✓ - Complex: "Potato (Cursed, Sexy, Etc), **Shield**" ✓ This future-proofs the parser against varied AI model behaviors. --- src/utils/itemParser.js | 197 ++++++++++++++++++++++++++++++---------- 1 file changed, 150 insertions(+), 47 deletions(-) diff --git a/src/utils/itemParser.js b/src/utils/itemParser.js index e35a901..42679e9 100644 --- a/src/utils/itemParser.js +++ b/src/utils/itemParser.js @@ -4,21 +4,43 @@ */ /** - * Parses a comma-separated item string into an array of trimmed item names. - * Filters out empty strings and handles "None" gracefully. - * Smart handling: - * - Strips wrapping square brackets that AI sometimes adds - * - Collapses newlines inside parentheses to spaces - * - Only splits on commas OUTSIDE parentheses (commas inside parentheses are preserved) + * Parses item strings from AI responses into clean arrays. + * Handles numerous AI formatting quirks and edge cases. * - * @param {string} itemString - Comma-separated items (e.g., "Sword, Shield, 3x Potions") - * @returns {string[]} Array of item names, or empty array if none + * Smart handling: + * - Strips wrapping brackets/braces: [], {}, [[]] + * - Strips wrapping quotes: "...", '...' + * - Converts newlines to commas (newline-based lists) + * - Strips markdown: **bold**, *italic*, `code`, ~~strikethrough~~ + * - Strips list markers: -, •, 1., 2., etc. + * - Collapses newlines inside parentheses to spaces + * - Only splits on commas OUTSIDE parentheses (preserves commas in descriptions) + * - Gracefully handles unmatched parentheses + * + * @param {string} itemString - Item string from AI (various formats supported) + * @returns {string[]} Array of clean item names, or empty array if none * * @example + * // Standard comma-separated * parseItems("Sword, Shield, 3x Potions") // ["Sword", "Shield", "3x Potions"] - * parseItems("Books (magical\ntomes), Sword") // ["Books (magical tomes)", "Sword"] - * parseItems("Potato (Cursed, Sexy, Your Mum & Dick, Etc), Sword") // ["Potato (Cursed, Sexy, Your Mum & Dick, Etc)", "Sword"] + * + * // Newline-based lists + * parseItems("Sword\nShield\nPotion") // ["Sword", "Shield", "Potion"] + * parseItems("- Sword\n- Shield") // ["Sword", "Shield"] + * parseItems("1. Sword\n2. Shield") // ["Sword", "Shield"] + * + * // Commas in parentheses (preserved) + * parseItems("Potato (Cursed, Sexy, Your Mum & Dick, Etc), Sword") + * // → ["Potato (Cursed, Sexy, Your Mum & Dick, Etc)", "Sword"] + * + * // Markdown formatting (stripped) + * parseItems("**Sword** (equipped), *Shield*") // ["Sword (equipped)", "Shield"] + * + * // Various brackets (stripped) * parseItems("[Sword, Shield]") // ["Sword", "Shield"] + * parseItems("{Sword, Shield}") // ["Sword", "Shield"] + * + * // Edge cases * parseItems("None") // [] * parseItems("") // [] * parseItems(null) // [] @@ -29,49 +51,80 @@ export function parseItems(itemString) { return []; } - // Trim and check for "None" (case-insensitive) - let trimmed = itemString.trim(); - if (trimmed === '' || trimmed.toLowerCase() === 'none') { + let processed = itemString.trim(); + + // Quick check for "None" or empty + if (processed === '' || processed.toLowerCase() === 'none') { return []; } - // Strip wrapping square brackets if present (AI sometimes adds these) - if (trimmed.startsWith('[') && trimmed.endsWith(']')) { - trimmed = trimmed.slice(1, -1).trim(); - // Check again for empty after stripping brackets - if (trimmed === '' || trimmed.toLowerCase() === 'none') { + // STEP 1: Strip wrapping brackets/braces (AI sometimes wraps entire lists) + // Handle: [], {}, [[]], etc. + while ( + (processed.startsWith('[') && processed.endsWith(']')) || + (processed.startsWith('{') && processed.endsWith('}')) + ) { + processed = processed.slice(1, -1).trim(); + if (processed === '' || processed.toLowerCase() === 'none') { return []; } } - // First pass: Collapse newlines inside parentheses - let processed = ''; - let parenDepth = 0; - - for (let i = 0; i < trimmed.length; i++) { - const char = trimmed[i]; - - if (char === '(') { - parenDepth++; - processed += char; - } else if (char === ')') { - parenDepth--; - processed += char; - } else if ((char === '\n' || char === '\r') && parenDepth > 0) { - // Inside parentheses: replace newline with space - // Skip if previous char was already a space - if (processed[processed.length - 1] !== ' ') { - processed += ' '; - } - } else { - processed += char; + // STEP 2: Strip wrapping quotes (AI sometimes quotes entire lists) + // Handle: "...", '...' + if ((processed.startsWith('"') && processed.endsWith('"')) || + (processed.startsWith("'") && processed.endsWith("'"))) { + processed = processed.slice(1, -1).trim(); + if (processed === '' || processed.toLowerCase() === 'none') { + return []; } } - // Clean up multiple consecutive spaces + // STEP 3: Convert newlines to commas (OUTSIDE parentheses) + // Handles newline-based lists: "Sword\nShield\nPotion" → "Sword, Shield, Potion" + let withCommas = ''; + let parenDepth = 0; + + for (let i = 0; i < processed.length; i++) { + const char = processed[i]; + + if (char === '(') { + parenDepth++; + withCommas += char; + } else if (char === ')') { + parenDepth--; + withCommas += char; + } else if ((char === '\n' || char === '\r') && parenDepth === 0) { + // Newline outside parentheses - convert to comma separator + // Don't add if previous char was already a separator + const prevChar = withCommas[withCommas.length - 1]; + if (prevChar && prevChar !== ',' && prevChar !== '\n') { + withCommas += ','; + } + } else if ((char === '\n' || char === '\r') && parenDepth > 0) { + // Newline inside parentheses - convert to space + if (withCommas[withCommas.length - 1] !== ' ') { + withCommas += ' '; + } + } else { + withCommas += char; + } + } + processed = withCommas; + + // STEP 4: Strip markdown formatting + // Remove: **bold**, *italic*, `code`, ~~strikethrough~~ + processed = processed + .replace(/\*\*(.+?)\*\*/g, '$1') // **bold** → bold + .replace(/\*(.+?)\*/g, '$1') // *italic* → italic + .replace(/`(.+?)`/g, '$1') // `code` → code + .replace(/~~(.+?)~~/g, '$1'); // ~~strike~~ → strike + + // STEP 5: Normalize whitespace processed = processed.replace(/\s+/g, ' '); - // Second pass: Smart comma splitting (only split on commas outside parentheses) + // STEP 6: Smart comma splitting (only split on commas OUTSIDE parentheses) + // Also handles list markers and quotes per-item const items = []; let currentItem = ''; parenDepth = 0; @@ -84,12 +137,17 @@ export function parseItems(itemString) { currentItem += char; } else if (char === ')') { parenDepth--; + // Graceful handling: don't let depth go negative + if (parenDepth < 0) { + console.warn('[RPG Companion] Unmatched closing parenthesis in item parsing'); + parenDepth = 0; + } currentItem += char; } else if (char === ',' && parenDepth === 0) { // Comma outside parentheses - this is a separator - const trimmedItem = currentItem.trim(); - if (trimmedItem !== '' && trimmedItem.toLowerCase() !== 'none') { - items.push(trimmedItem); + const cleaned = cleanSingleItem(currentItem); + if (cleaned) { + items.push(cleaned); } currentItem = ''; // Start new item } else { @@ -98,14 +156,59 @@ export function parseItems(itemString) { } // Don't forget the last item - const trimmedItem = currentItem.trim(); - if (trimmedItem !== '' && trimmedItem.toLowerCase() !== 'none') { - items.push(trimmedItem); + const cleaned = cleanSingleItem(currentItem); + if (cleaned) { + items.push(cleaned); + } + + // Warn if parentheses were unmatched + if (parenDepth > 0) { + console.warn('[RPG Companion] Unmatched opening parenthesis in item parsing'); } return items; } +/** + * Cleans a single item string (helper for parseItems) + * Removes list markers, wrapping quotes, and trims + * + * @param {string} item - Single item string to clean + * @returns {string|null} Cleaned item or null if empty/invalid + * @private + */ +function cleanSingleItem(item) { + if (!item || typeof item !== 'string') { + return null; + } + + let cleaned = item.trim(); + + // Filter "None" + if (cleaned === '' || cleaned.toLowerCase() === 'none') { + return null; + } + + // Strip list markers: "- Item", "• Item", "1. Item", "2. Item", etc. + // Matches: -, •, *, 1., 2., a), etc. + cleaned = cleaned.replace(/^[-•*]\s+/, ''); // "- Item" → "Item" + cleaned = cleaned.replace(/^\d+\.\s+/, ''); // "1. Item" → "Item" + cleaned = cleaned.replace(/^[a-z]\)\s+/i, ''); // "a) Item" → "Item" + + // Strip wrapping quotes from individual items + if ((cleaned.startsWith('"') && cleaned.endsWith('"')) || + (cleaned.startsWith("'") && cleaned.endsWith("'"))) { + cleaned = cleaned.slice(1, -1).trim(); + } + + // Final empty check + if (cleaned === '' || cleaned.toLowerCase() === 'none') { + return null; + } + + return cleaned; +} + /** * Serializes an array of items back into a comma-separated string. * Returns "None" for empty arrays.