rspace-online/server/mi-trigrams.ts

26 lines
763 B
TypeScript

/**
* Trigram + Jaccard similarity utilities for MI knowledge ranking.
* Pure functions, zero dependencies.
*/
/** Extract character trigrams from text (lowercased). */
export function trigrams(text: string): Set<string> {
const s = text.toLowerCase().replace(/[^\w\s]/g, "");
const set = new Set<string>();
for (let i = 0; i <= s.length - 3; i++) {
set.add(s.slice(i, i + 3));
}
return set;
}
/** Jaccard similarity between two trigram sets (0..1). */
export function jaccardSimilarity(a: Set<string>, b: Set<string>): number {
if (a.size === 0 && b.size === 0) return 0;
let intersection = 0;
for (const t of a) {
if (b.has(t)) intersection++;
}
const union = a.size + b.size - intersection;
return union === 0 ? 0 : intersection / union;
}