rspace-online/server/mi-sanitize.ts

68 lines
1.8 KiB
TypeScript

/**
* MI Sanitization — defenses against prompt injection in user content.
*
* sanitizeForPrompt() — strips injection vectors from user-supplied text.
* wrapUserContent() — wraps sanitized text with boundary markers so the
* LLM treats it as data, not instructions.
*/
/** Patterns commonly used in prompt injection attempts. */
const INJECTION_PATTERNS = [
/\[MI_ACTION:[^\]]*\]/gi,
/\[System:[^\]]*\]/gi,
/\[INST\]/gi,
/\[\/INST\]/gi,
/<\/s>/gi,
/<<SYS>>/gi,
/<<\/SYS>>/gi,
/IGNORE PREVIOUS INSTRUCTIONS?/gi,
/DISREGARD (?:ALL )?PREVIOUS/gi,
/YOU ARE NOW/gi,
/NEW INSTRUCTIONS?:/gi,
/OVERRIDE:/gi,
/SYSTEM PROMPT:/gi,
/<\|(?:im_start|im_end|system|user|assistant)\|>/gi,
];
/** Maximum lengths for different field types. */
export const MAX_TITLE_LENGTH = 500;
export const MAX_CONTENT_LENGTH = 2000;
/**
* Strip/escape injection vectors from user-supplied text.
* Does NOT alter legitimate content — only known attack patterns.
*/
export function sanitizeForPrompt(
text: string,
maxLength = MAX_CONTENT_LENGTH,
): string {
if (!text || typeof text !== "string") return "";
let cleaned = text;
for (const pattern of INJECTION_PATTERNS) {
cleaned = cleaned.replace(pattern, "");
}
// Truncate to max length
if (cleaned.length > maxLength) {
cleaned = cleaned.slice(0, maxLength) + "…";
}
return cleaned;
}
/**
* Wrap user-provided data with clear boundary markers.
* Makes it explicit to the LLM that the enclosed text is user data,
* not system instructions.
*/
export function wrapUserContent(
label: string,
content: string,
field = "content",
): string {
const sanitized = sanitizeForPrompt(content);
if (!sanitized) return "";
return `<user-data source="${label}" field="${field}">${sanitized}</user-data>`;
}