feat(inventory): enhance parser for AI formatting edge cases

Completely rewrote parseItems() to robustly handle diverse AI output
formats without requiring JSON mode (not all local models support it).

New capabilities:
1. Multiple bracket types: [], {}, [[]]
2. Wrapping quotes: "...", '...'
3. Newline-based lists: "Sword\nShield" → ["Sword", "Shield"]
4. Markdown stripping: **bold**, *italic*, `code`, ~~strike~~
5. List markers: "- Sword", "1. Item", "• Item"
6. Graceful unmatched parentheses (warns but doesn't crash)
7. Per-item quote stripping: ["Sword", "Shield"]

Implementation:
- 6-step processing pipeline with clear documentation
- Helper function cleanSingleItem() for per-item cleanup
- Preserves commas inside parentheses (existing feature)
- Console warnings for malformed input (unmatched parens)

Examples now supported:
- Standard: "Sword, Shield" ✓
- Newlines: "Sword\nShield\nPotion" ✓
- Bulleted: "- Sword\n- Shield" ✓
- Numbered: "1. Sword\n2. Shield" ✓
- Markdown: "**Sword** (equipped)" → "Sword (equipped)" ✓
- Complex: "Potato (Cursed, Sexy, Etc), **Shield**" ✓

This future-proofs the parser against varied AI model behaviors.
This commit is contained in:
Lucas 'Paperboy' Rose-Winters
2025-10-20 07:14:18 +11:00
parent 6ba513c530
commit 3a84e24c0a
+150 -47
View File
@@ -4,21 +4,43 @@
*/
/**
* Parses a comma-separated item string into an array of trimmed item names.
* Filters out empty strings and handles "None" gracefully.
* Smart handling:
* - Strips wrapping square brackets that AI sometimes adds
* - Collapses newlines inside parentheses to spaces
* - Only splits on commas OUTSIDE parentheses (commas inside parentheses are preserved)
* Parses item strings from AI responses into clean arrays.
* Handles numerous AI formatting quirks and edge cases.
*
* @param {string} itemString - Comma-separated items (e.g., "Sword, Shield, 3x Potions")
* @returns {string[]} Array of item names, or empty array if none
* Smart handling:
* - Strips wrapping brackets/braces: [], {}, [[]]
* - Strips wrapping quotes: "...", '...'
* - Converts newlines to commas (newline-based lists)
* - Strips markdown: **bold**, *italic*, `code`, ~~strikethrough~~
* - Strips list markers: -, •, 1., 2., etc.
* - Collapses newlines inside parentheses to spaces
* - Only splits on commas OUTSIDE parentheses (preserves commas in descriptions)
* - Gracefully handles unmatched parentheses
*
* @param {string} itemString - Item string from AI (various formats supported)
* @returns {string[]} Array of clean item names, or empty array if none
*
* @example
* // Standard comma-separated
* parseItems("Sword, Shield, 3x Potions") // ["Sword", "Shield", "3x Potions"]
* parseItems("Books (magical\ntomes), Sword") // ["Books (magical tomes)", "Sword"]
* parseItems("Potato (Cursed, Sexy, Your Mum & Dick, Etc), Sword") // ["Potato (Cursed, Sexy, Your Mum & Dick, Etc)", "Sword"]
*
* // Newline-based lists
* parseItems("Sword\nShield\nPotion") // ["Sword", "Shield", "Potion"]
* parseItems("- Sword\n- Shield") // ["Sword", "Shield"]
* parseItems("1. Sword\n2. Shield") // ["Sword", "Shield"]
*
* // Commas in parentheses (preserved)
* parseItems("Potato (Cursed, Sexy, Your Mum & Dick, Etc), Sword")
* // → ["Potato (Cursed, Sexy, Your Mum & Dick, Etc)", "Sword"]
*
* // Markdown formatting (stripped)
* parseItems("**Sword** (equipped), *Shield*") // ["Sword (equipped)", "Shield"]
*
* // Various brackets (stripped)
* parseItems("[Sword, Shield]") // ["Sword", "Shield"]
* parseItems("{Sword, Shield}") // ["Sword", "Shield"]
*
* // Edge cases
* parseItems("None") // []
* parseItems("") // []
* parseItems(null) // []
@@ -29,49 +51,80 @@ export function parseItems(itemString) {
return [];
}
// Trim and check for "None" (case-insensitive)
let trimmed = itemString.trim();
if (trimmed === '' || trimmed.toLowerCase() === 'none') {
let processed = itemString.trim();
// Quick check for "None" or empty
if (processed === '' || processed.toLowerCase() === 'none') {
return [];
}
// Strip wrapping square brackets if present (AI sometimes adds these)
if (trimmed.startsWith('[') && trimmed.endsWith(']')) {
trimmed = trimmed.slice(1, -1).trim();
// Check again for empty after stripping brackets
if (trimmed === '' || trimmed.toLowerCase() === 'none') {
// STEP 1: Strip wrapping brackets/braces (AI sometimes wraps entire lists)
// Handle: [], {}, [[]], etc.
while (
(processed.startsWith('[') && processed.endsWith(']')) ||
(processed.startsWith('{') && processed.endsWith('}'))
) {
processed = processed.slice(1, -1).trim();
if (processed === '' || processed.toLowerCase() === 'none') {
return [];
}
}
// First pass: Collapse newlines inside parentheses
let processed = '';
let parenDepth = 0;
for (let i = 0; i < trimmed.length; i++) {
const char = trimmed[i];
if (char === '(') {
parenDepth++;
processed += char;
} else if (char === ')') {
parenDepth--;
processed += char;
} else if ((char === '\n' || char === '\r') && parenDepth > 0) {
// Inside parentheses: replace newline with space
// Skip if previous char was already a space
if (processed[processed.length - 1] !== ' ') {
processed += ' ';
}
} else {
processed += char;
// STEP 2: Strip wrapping quotes (AI sometimes quotes entire lists)
// Handle: "...", '...'
if ((processed.startsWith('"') && processed.endsWith('"')) ||
(processed.startsWith("'") && processed.endsWith("'"))) {
processed = processed.slice(1, -1).trim();
if (processed === '' || processed.toLowerCase() === 'none') {
return [];
}
}
// Clean up multiple consecutive spaces
// STEP 3: Convert newlines to commas (OUTSIDE parentheses)
// Handles newline-based lists: "Sword\nShield\nPotion" → "Sword, Shield, Potion"
let withCommas = '';
let parenDepth = 0;
for (let i = 0; i < processed.length; i++) {
const char = processed[i];
if (char === '(') {
parenDepth++;
withCommas += char;
} else if (char === ')') {
parenDepth--;
withCommas += char;
} else if ((char === '\n' || char === '\r') && parenDepth === 0) {
// Newline outside parentheses - convert to comma separator
// Don't add if previous char was already a separator
const prevChar = withCommas[withCommas.length - 1];
if (prevChar && prevChar !== ',' && prevChar !== '\n') {
withCommas += ',';
}
} else if ((char === '\n' || char === '\r') && parenDepth > 0) {
// Newline inside parentheses - convert to space
if (withCommas[withCommas.length - 1] !== ' ') {
withCommas += ' ';
}
} else {
withCommas += char;
}
}
processed = withCommas;
// STEP 4: Strip markdown formatting
// Remove: **bold**, *italic*, `code`, ~~strikethrough~~
processed = processed
.replace(/\*\*(.+?)\*\*/g, '$1') // **bold** → bold
.replace(/\*(.+?)\*/g, '$1') // *italic* → italic
.replace(/`(.+?)`/g, '$1') // `code` → code
.replace(/~~(.+?)~~/g, '$1'); // ~~strike~~ → strike
// STEP 5: Normalize whitespace
processed = processed.replace(/\s+/g, ' ');
// Second pass: Smart comma splitting (only split on commas outside parentheses)
// STEP 6: Smart comma splitting (only split on commas OUTSIDE parentheses)
// Also handles list markers and quotes per-item
const items = [];
let currentItem = '';
parenDepth = 0;
@@ -84,12 +137,17 @@ export function parseItems(itemString) {
currentItem += char;
} else if (char === ')') {
parenDepth--;
// Graceful handling: don't let depth go negative
if (parenDepth < 0) {
console.warn('[RPG Companion] Unmatched closing parenthesis in item parsing');
parenDepth = 0;
}
currentItem += char;
} else if (char === ',' && parenDepth === 0) {
// Comma outside parentheses - this is a separator
const trimmedItem = currentItem.trim();
if (trimmedItem !== '' && trimmedItem.toLowerCase() !== 'none') {
items.push(trimmedItem);
const cleaned = cleanSingleItem(currentItem);
if (cleaned) {
items.push(cleaned);
}
currentItem = ''; // Start new item
} else {
@@ -98,14 +156,59 @@ export function parseItems(itemString) {
}
// Don't forget the last item
const trimmedItem = currentItem.trim();
if (trimmedItem !== '' && trimmedItem.toLowerCase() !== 'none') {
items.push(trimmedItem);
const cleaned = cleanSingleItem(currentItem);
if (cleaned) {
items.push(cleaned);
}
// Warn if parentheses were unmatched
if (parenDepth > 0) {
console.warn('[RPG Companion] Unmatched opening parenthesis in item parsing');
}
return items;
}
/**
* Cleans a single item string (helper for parseItems)
* Removes list markers, wrapping quotes, and trims
*
* @param {string} item - Single item string to clean
* @returns {string|null} Cleaned item or null if empty/invalid
* @private
*/
function cleanSingleItem(item) {
if (!item || typeof item !== 'string') {
return null;
}
let cleaned = item.trim();
// Filter "None"
if (cleaned === '' || cleaned.toLowerCase() === 'none') {
return null;
}
// Strip list markers: "- Item", "• Item", "1. Item", "2. Item", etc.
// Matches: -, •, *, 1., 2., a), etc.
cleaned = cleaned.replace(/^[-•*]\s+/, ''); // "- Item" → "Item"
cleaned = cleaned.replace(/^\d+\.\s+/, ''); // "1. Item" → "Item"
cleaned = cleaned.replace(/^[a-z]\)\s+/i, ''); // "a) Item" → "Item"
// Strip wrapping quotes from individual items
if ((cleaned.startsWith('"') && cleaned.endsWith('"')) ||
(cleaned.startsWith("'") && cleaned.endsWith("'"))) {
cleaned = cleaned.slice(1, -1).trim();
}
// Final empty check
if (cleaned === '' || cleaned.toLowerCase() === 'none') {
return null;
}
return cleaned;
}
/**
* Serializes an array of items back into a comma-separated string.
* Returns "None" for empty arrays.