rspace-online/shared/local-first/query.ts

349 lines
9.3 KiB
TypeScript

/**
* Layer 6: Query — Materialized views + full-text search over Automerge documents.
*
* All computation is client-side. Views are automatically recomputed when their
* source documents change (via DocumentManager change subscriptions).
*/
import type { DocumentId } from './document';
import type { DocumentManager } from './document';
// ============================================================================
// TYPES
// ============================================================================
/**
* A materialized view: takes a document and projects it into a view shape.
* Views are cached and recomputed lazily on document change.
*/
export interface MaterializedView<T = any, V = any> {
/** Unique view identifier */
id: string;
/** Which document this view is derived from */
docId: DocumentId;
/** Project the document into the view */
compute(doc: T): V;
}
export interface SearchResult {
docId: DocumentId;
field: string;
/** The matched text snippet */
snippet: string;
/** Relevance score (higher = better match) */
score: number;
}
interface IndexEntry {
docId: DocumentId;
field: string;
text: string;
/** Lowercase tokens for matching */
tokens: string[];
}
// ============================================================================
// ViewEngine
// ============================================================================
export class ViewEngine {
#views = new Map<string, MaterializedView>();
#cache = new Map<string, any>();
#documents: DocumentManager;
#unsubs = new Map<string, () => void>();
#subscribers = new Map<string, Set<(v: any) => void>>();
constructor(documents: DocumentManager) {
this.#documents = documents;
}
/**
* Register a materialized view. Immediately computes it if the source doc is open.
*/
register<T, V>(view: MaterializedView<T, V>): void {
this.#views.set(view.id, view);
// Compute initial value if doc is available
const doc = this.#documents.get<T>(view.docId);
if (doc) {
this.#recompute(view);
}
// Subscribe to document changes
const unsub = this.#documents.onChange<T>(view.docId, () => {
this.#recompute(view);
});
this.#unsubs.set(view.id, unsub);
}
/**
* Unregister a view.
*/
unregister(viewId: string): void {
this.#views.delete(viewId);
this.#cache.delete(viewId);
this.#subscribers.delete(viewId);
const unsub = this.#unsubs.get(viewId);
if (unsub) { unsub(); this.#unsubs.delete(viewId); }
}
/**
* Get the current value of a view (cached).
*/
get<V>(viewId: string): V | undefined {
return this.#cache.get(viewId) as V | undefined;
}
/**
* Subscribe to view changes. Returns unsubscribe function.
*/
subscribe<V>(viewId: string, cb: (v: V) => void): () => void {
let set = this.#subscribers.get(viewId);
if (!set) {
set = new Set();
this.#subscribers.set(viewId, set);
}
set.add(cb);
// Immediately call with current value if available
const current = this.#cache.get(viewId);
if (current !== undefined) {
cb(current as V);
}
return () => { set!.delete(cb); };
}
/**
* Force recompute a view.
*/
refresh(viewId: string): void {
const view = this.#views.get(viewId);
if (view) this.#recompute(view);
}
/**
* Destroy all views and clean up subscriptions.
*/
destroy(): void {
for (const unsub of this.#unsubs.values()) {
unsub();
}
this.#views.clear();
this.#cache.clear();
this.#unsubs.clear();
this.#subscribers.clear();
}
#recompute(view: MaterializedView): void {
const doc = this.#documents.get(view.docId);
if (!doc) return;
try {
const value = view.compute(doc);
this.#cache.set(view.id, value);
const subs = this.#subscribers.get(view.id);
if (subs) {
for (const cb of subs) {
try { cb(value); } catch { /* ignore */ }
}
}
} catch (e) {
console.error(`[ViewEngine] Error computing view "${view.id}":`, e);
}
}
}
// ============================================================================
// LocalSearchEngine
// ============================================================================
/**
* Client-side full-text search over Automerge documents.
* Simple token-based matching — not a full inverted index, but fast enough
* for the expected data sizes (hundreds, not millions of documents).
*/
export class LocalSearchEngine {
#index: IndexEntry[] = [];
#documents: DocumentManager;
#indexedDocs = new Set<string>(); // "docId:field" set for dedup
constructor(documents: DocumentManager) {
this.#documents = documents;
}
/**
* Index specific fields of a document for searching.
* Call this when a document is opened or changes.
*/
index(docId: DocumentId, fields: string[]): void {
const doc = this.#documents.get(docId);
if (!doc) return;
for (const field of fields) {
const key = `${docId}:${field}`;
// Remove old entries for this doc+field
this.#index = this.#index.filter((e) => !(e.docId === docId && e.field === field));
this.#indexedDocs.delete(key);
const text = extractText(doc, field);
if (!text) continue;
this.#index.push({
docId,
field,
text,
tokens: tokenize(text),
});
this.#indexedDocs.add(key);
}
}
/**
* Index all text fields from a map/object structure.
* Walks one level of keys, indexes any string values.
*/
indexMap(docId: DocumentId, mapField: string): void {
const doc = this.#documents.get(docId);
if (!doc) return;
const map = (doc as any)[mapField];
if (!map || typeof map !== 'object') return;
for (const [itemId, item] of Object.entries(map)) {
if (!item || typeof item !== 'object') continue;
for (const [key, value] of Object.entries(item as Record<string, unknown>)) {
if (typeof value !== 'string') continue;
const fullField = `${mapField}.${itemId}.${key}`;
const compositeKey = `${docId}:${fullField}`;
this.#index = this.#index.filter((e) => !(e.docId === docId && e.field === fullField));
this.#indexedDocs.delete(compositeKey);
this.#index.push({
docId,
field: fullField,
text: value,
tokens: tokenize(value),
});
this.#indexedDocs.add(compositeKey);
}
}
}
/**
* Remove all index entries for a document.
*/
removeDoc(docId: DocumentId): void {
this.#index = this.#index.filter((e) => e.docId !== docId);
// Clean up indexedDocs set
for (const key of this.#indexedDocs) {
if (key.startsWith(`${docId}:`)) {
this.#indexedDocs.delete(key);
}
}
}
/**
* Search across all indexed documents.
*/
search(query: string, opts?: { module?: string; maxResults?: number }): SearchResult[] {
const queryTokens = tokenize(query);
if (queryTokens.length === 0) return [];
const results: SearchResult[] = [];
const moduleFilter = opts?.module;
const maxResults = opts?.maxResults ?? 50;
for (const entry of this.#index) {
// Optional module filter
if (moduleFilter) {
const parts = entry.docId.split(':');
if (parts[1] !== moduleFilter) continue;
}
const score = computeScore(queryTokens, entry.tokens);
if (score > 0) {
results.push({
docId: entry.docId,
field: entry.field,
snippet: createSnippet(entry.text, query),
score,
});
}
}
// Sort by score descending
results.sort((a, b) => b.score - a.score);
return results.slice(0, maxResults);
}
/**
* Clear the entire index.
*/
clear(): void {
this.#index = [];
this.#indexedDocs.clear();
}
}
// ============================================================================
// UTILITIES
// ============================================================================
function tokenize(text: string): string[] {
return text
.toLowerCase()
.replace(/[^\w\s]/g, ' ')
.split(/\s+/)
.filter((t) => t.length > 1);
}
function extractText(doc: any, fieldPath: string): string | null {
const parts = fieldPath.split('.');
let value: any = doc;
for (const part of parts) {
if (value == null || typeof value !== 'object') return null;
value = value[part];
}
return typeof value === 'string' ? value : null;
}
function computeScore(queryTokens: string[], docTokens: string[]): number {
let matches = 0;
for (const qt of queryTokens) {
for (const dt of docTokens) {
if (dt.includes(qt)) {
matches++;
break;
}
}
}
// Score: fraction of query tokens matched
return matches / queryTokens.length;
}
function createSnippet(text: string, query: string, contextChars = 60): string {
const lowerText = text.toLowerCase();
const lowerQuery = query.toLowerCase();
const idx = lowerText.indexOf(lowerQuery);
if (idx === -1) {
// No exact match; return beginning of text
return text.length > contextChars * 2
? text.slice(0, contextChars * 2) + '...'
: text;
}
const start = Math.max(0, idx - contextChars);
const end = Math.min(text.length, idx + query.length + contextChars);
let snippet = text.slice(start, end);
if (start > 0) snippet = '...' + snippet;
if (end < text.length) snippet = snippet + '...';
return snippet;
}