rspace-online/modules/rnotes/converters/evernote.ts

/**
 * Evernote ENEX → rNotes converter.
 *
 * Import: Parse .enex XML (ENML — strict HTML subset inside <en-note>)
 * Convert ENML → markdown via Turndown.
 * Extract <resource> base64 attachments, save to /data/files/uploads/.
 * File-based import (.enex), no auth needed.
 */

import TurndownService from 'turndown';
import { markdownToTiptap, extractPlainTextFromTiptap } from './markdown-tiptap';
import { registerConverter, hashContent } from './index';
import type { ConvertedNote, ImportInput, ImportResult, ExportOptions, ExportResult, NoteConverter } from './index';
import type { NoteItem } from '../schemas';

const turndown = new TurndownService({ headingStyle: 'atx', codeBlockStyle: 'fenced' });

// Custom Turndown rules for ENML-specific elements
turndown.addRule('enMedia', {
	filter: (node) => node.nodeName === 'EN-MEDIA',
	replacement: (_content, node) => {
		const el = node as Element;
		const hash = el.getAttribute('hash') || '';
		const type = el.getAttribute('type') || '';
		if (type.startsWith('image/')) {
			return `![image](resource:${hash})`;
		}
		return `[attachment](resource:${hash})`;
	},
});

turndown.addRule('enTodo', {
	filter: (node) => node.nodeName === 'EN-TODO',
	replacement: (_content, node) => {
		const el = node as Element;
		const checked = el.getAttribute('checked') === 'true';
		return checked ? '[x] ' : '[ ] ';
	},
});

/** Simple XML tag content extractor (avoids needing a full DOM parser on server). */
function extractTagContent(xml: string, tagName: string): string[] {
	const results: string[] = [];
	const openTag = `<${tagName}`;
	const closeTag = `</${tagName}>`;
	let pos = 0;

	while (true) {
		const start = xml.indexOf(openTag, pos);
		if (start === -1) break;

		// Find end of opening tag (handles attributes)
		const tagEnd = xml.indexOf('>', start);
		if (tagEnd === -1) break;

		const end = xml.indexOf(closeTag, tagEnd);
		if (end === -1) break;

		results.push(xml.substring(tagEnd + 1, end));
		pos = end + closeTag.length;
	}

	return results;
}

/** Extract a single tag's text content. */
function extractSingleTag(xml: string, tagName: string): string {
	const results = extractTagContent(xml, tagName);
	return results[0]?.trim() || '';
}

/** Extract attribute value from a tag. */
function extractAttribute(xml: string, attrName: string): string {
	const match = xml.match(new RegExp(`${attrName}="([^"]*)"`, 'i'));
	return match?.[1] || '';
}

/** Parse a single <note> element from ENEX. */
function parseNote(noteXml: string): {
	title: string;
	content: string;
	tags: string[];
	created?: string;
	updated?: string;
	resources: { hash: string; mime: string; data: Uint8Array; filename?: string }[];
} {
	const title = extractSingleTag(noteXml, 'title') || 'Untitled';

	// Extract ENML content (inside <content> CDATA)
	let enml = extractSingleTag(noteXml, 'content');
	// Strip CDATA wrapper if present
	enml = enml.replace(/^\s*<!\[CDATA\[/, '').replace(/\]\]>\s*$/, '');

	const tags: string[] = [];
	const tagMatches = extractTagContent(noteXml, 'tag');
	for (const t of tagMatches) {
		tags.push(t.trim().toLowerCase().replace(/\s+/g, '-'));
	}

	const created = extractSingleTag(noteXml, 'created');
	const updated = extractSingleTag(noteXml, 'updated');

	// Extract resources (attachments)
	const resources: { hash: string; mime: string; data: Uint8Array; filename?: string }[] = [];
	const resourceBlocks = extractTagContent(noteXml, 'resource');
	for (const resXml of resourceBlocks) {
		const mime = extractSingleTag(resXml, 'mime');
		const b64Data = extractSingleTag(resXml, 'data');
		const encoding = extractAttribute(resXml, 'encoding') || 'base64';

		// Extract recognition hash or compute from data
		let hash = '';
		const recognition = extractSingleTag(resXml, 'recognition');
		if (recognition) {
			// Try to get hash from recognition XML
			const hashMatch = recognition.match(/objID="([^"]+)"/);
			if (hashMatch) hash = hashMatch[1];
		}

		// Extract resource attributes
		const resAttrs = extractSingleTag(resXml, 'resource-attributes');
		const filename = resAttrs ? extractSingleTag(resAttrs, 'file-name') : undefined;

		if (b64Data && encoding === 'base64') {
			try {
				// Decode base64
				const cleaned = b64Data.replace(/\s/g, '');
				const binary = atob(cleaned);
				const bytes = new Uint8Array(binary.length);
				for (let i = 0; i < binary.length; i++) bytes[i] = binary.charCodeAt(i);

				// Compute MD5-like hash for matching en-media tags
				if (!hash) {
					hash = simpleHash(bytes);
				}

				resources.push({ hash, mime, data: bytes, filename });
			} catch { /* skip malformed base64 */ }
		}
	}

	return { title, content: enml, tags, created, updated, resources };
}

/** Simple hash for resource matching when recognition hash is missing. */
function simpleHash(data: Uint8Array): string {
	let h = 0;
	for (let i = 0; i < Math.min(data.length, 1024); i++) {
		h = ((h << 5) - h) + data[i];
		h |= 0;
	}
	return Math.abs(h).toString(16);
}

const evernoteConverter: NoteConverter = {
	id: 'evernote',
	name: 'Evernote',
	requiresAuth: false,

	async import(input: ImportInput): Promise<ImportResult> {
		if (!input.fileData) {
			throw new Error('Evernote import requires an .enex file');
		}

		const enexXml = new TextDecoder().decode(input.fileData);
		const noteBlocks = extractTagContent(enexXml, 'note');

		if (noteBlocks.length === 0) {
			return { notes: [], notebookTitle: 'Evernote Import', warnings: ['No notes found in ENEX file'] };
		}

		const notes: ConvertedNote[] = [];
		const warnings: string[] = [];

		for (const noteXml of noteBlocks) {
			try {
				const parsed = parseNote(noteXml);

				// Build resource hash→filename map for en-media replacement
				const resourceMap = new Map<string, { filename: string; data: Uint8Array; mimeType: string }>();
				for (const res of parsed.resources) {
					const ext = res.mime.includes('jpeg') || res.mime.includes('jpg') ? 'jpg'
						: res.mime.includes('png') ? 'png'
						: res.mime.includes('gif') ? 'gif'
						: res.mime.includes('webp') ? 'webp'
						: res.mime.includes('pdf') ? 'pdf'
						: 'bin';
					const fname = res.filename || `evernote-${res.hash}.${ext}`;
					resourceMap.set(res.hash, { filename: fname, data: res.data, mimeType: res.mime });
				}

				// Convert ENML to markdown
				let markdown = turndown.turndown(parsed.content);

				// Resolve resource: references to actual file paths
				const attachments: { filename: string; data: Uint8Array; mimeType: string }[] = [];
				markdown = markdown.replace(/resource:([a-f0-9]+)/g, (_match, hash) => {
					const res = resourceMap.get(hash);
					if (res) {
						attachments.push(res);
						return `/data/files/uploads/${res.filename}`;
					}
					return `resource:${hash}`;
				});

				const tiptapJson = markdownToTiptap(markdown);
				const contentPlain = extractPlainTextFromTiptap(tiptapJson);

				notes.push({
					title: parsed.title,
					content: tiptapJson,
					contentPlain,
					markdown,
					tags: parsed.tags,
					attachments: attachments.length > 0 ? attachments : undefined,
					sourceRef: {
						source: 'evernote',
						externalId: `enex:${parsed.title}`,
						lastSyncedAt: Date.now(),
						contentHash: hashContent(markdown),
					},
				});
			} catch (err) {
				warnings.push(`Failed to parse note: ${(err as Error).message}`);
			}
		}

		return { notes, notebookTitle: 'Evernote Import', warnings };
	},

	async export(): Promise<ExportResult> {
		throw new Error('Evernote export is not supported — use Evernote\'s native import');
	},
};

registerConverter(evernoteConverter);