/** * MI Sanitization — defenses against prompt injection in user content. * * sanitizeForPrompt() — strips injection vectors from user-supplied text. * wrapUserContent() — wraps sanitized text with boundary markers so the * LLM treats it as data, not instructions. */ /** Patterns commonly used in prompt injection attempts. */ const INJECTION_PATTERNS = [ /\[MI_ACTION:[^\]]*\]/gi, /\[System:[^\]]*\]/gi, /\[INST\]/gi, /\[\/INST\]/gi, /<\/s>/gi, /<>/gi, /<<\/SYS>>/gi, /IGNORE PREVIOUS INSTRUCTIONS?/gi, /DISREGARD (?:ALL )?PREVIOUS/gi, /YOU ARE NOW/gi, /NEW INSTRUCTIONS?:/gi, /OVERRIDE:/gi, /SYSTEM PROMPT:/gi, /<\|(?:im_start|im_end|system|user|assistant)\|>/gi, ]; /** Maximum lengths for different field types. */ export const MAX_TITLE_LENGTH = 500; export const MAX_CONTENT_LENGTH = 2000; /** * Strip/escape injection vectors from user-supplied text. * Does NOT alter legitimate content — only known attack patterns. */ export function sanitizeForPrompt( text: string, maxLength = MAX_CONTENT_LENGTH, ): string { if (!text || typeof text !== "string") return ""; let cleaned = text; for (const pattern of INJECTION_PATTERNS) { cleaned = cleaned.replace(pattern, ""); } // Truncate to max length if (cleaned.length > maxLength) { cleaned = cleaned.slice(0, maxLength) + "…"; } return cleaned; } /** * Wrap user-provided data with clear boundary markers. * Makes it explicit to the LLM that the enclosed text is user data, * not system instructions. */ export function wrapUserContent( label: string, content: string, field = "content", ): string { const sanitized = sanitizeForPrompt(content); if (!sanitized) return ""; return `${sanitized}`; }