rspace-online/server/mi-sanitize.ts

/**
 * MI Sanitization — defenses against prompt injection in user content.
 *
 * sanitizeForPrompt()  — strips injection vectors from user-supplied text.
 * wrapUserContent()    — wraps sanitized text with boundary markers so the
 *                        LLM treats it as data, not instructions.
 */

/** Patterns commonly used in prompt injection attempts. */
const INJECTION_PATTERNS = [
	/\[MI_ACTION:[^\]]*\]/gi,
	/\[System:[^\]]*\]/gi,
	/\[INST\]/gi,
	/\[\/INST\]/gi,
	/<\/s>/gi,
	/<<SYS>>/gi,
	/<<\/SYS>>/gi,
	/IGNORE PREVIOUS INSTRUCTIONS?/gi,
	/DISREGARD (?:ALL )?PREVIOUS/gi,
	/YOU ARE NOW/gi,
	/NEW INSTRUCTIONS?:/gi,
	/OVERRIDE:/gi,
	/SYSTEM PROMPT:/gi,
	/<\|(?:im_start|im_end|system|user|assistant)\|>/gi,
];

/** Maximum lengths for different field types. */
export const MAX_TITLE_LENGTH = 500;
export const MAX_CONTENT_LENGTH = 2000;

/**
 * Strip/escape injection vectors from user-supplied text.
 * Does NOT alter legitimate content — only known attack patterns.
 */
export function sanitizeForPrompt(
	text: string,
	maxLength = MAX_CONTENT_LENGTH,
): string {
	if (!text || typeof text !== "string") return "";

	let cleaned = text;
	for (const pattern of INJECTION_PATTERNS) {
		cleaned = cleaned.replace(pattern, "");
	}

	// Truncate to max length
	if (cleaned.length > maxLength) {
		cleaned = cleaned.slice(0, maxLength) + "…";
	}

	return cleaned;
}

/**
 * Wrap user-provided data with clear boundary markers.
 * Makes it explicit to the LLM that the enclosed text is user data,
 * not system instructions.
 */
export function wrapUserContent(
	label: string,
	content: string,
	field = "content",
): string {
	const sanitized = sanitizeForPrompt(content);
	if (!sanitized) return "";
	return `<user-data source="${label}" field="${field}">${sanitized}</user-data>`;
}