rspace-online/modules/rnotes/converters/evernote.ts

237 lines
7.4 KiB
TypeScript

/**
* Evernote ENEX → rNotes converter.
*
* Import: Parse .enex XML (ENML — strict HTML subset inside <en-note>)
* Convert ENML → markdown via Turndown.
* Extract <resource> base64 attachments, save to /data/files/uploads/.
* File-based import (.enex), no auth needed.
*/
import TurndownService from 'turndown';
import { markdownToTiptap, extractPlainTextFromTiptap } from './markdown-tiptap';
import { registerConverter, hashContent } from './index';
import type { ConvertedNote, ImportInput, ImportResult, ExportOptions, ExportResult, NoteConverter } from './index';
import type { NoteItem } from '../schemas';
const turndown = new TurndownService({ headingStyle: 'atx', codeBlockStyle: 'fenced' });
// Custom Turndown rules for ENML-specific elements
turndown.addRule('enMedia', {
filter: (node) => node.nodeName === 'EN-MEDIA',
replacement: (_content, node) => {
const el = node as Element;
const hash = el.getAttribute('hash') || '';
const type = el.getAttribute('type') || '';
if (type.startsWith('image/')) {
return `![image](resource:${hash})`;
}
return `[attachment](resource:${hash})`;
},
});
turndown.addRule('enTodo', {
filter: (node) => node.nodeName === 'EN-TODO',
replacement: (_content, node) => {
const el = node as Element;
const checked = el.getAttribute('checked') === 'true';
return checked ? '[x] ' : '[ ] ';
},
});
/** Simple XML tag content extractor (avoids needing a full DOM parser on server). */
function extractTagContent(xml: string, tagName: string): string[] {
const results: string[] = [];
const openTag = `<${tagName}`;
const closeTag = `</${tagName}>`;
let pos = 0;
while (true) {
const start = xml.indexOf(openTag, pos);
if (start === -1) break;
// Find end of opening tag (handles attributes)
const tagEnd = xml.indexOf('>', start);
if (tagEnd === -1) break;
const end = xml.indexOf(closeTag, tagEnd);
if (end === -1) break;
results.push(xml.substring(tagEnd + 1, end));
pos = end + closeTag.length;
}
return results;
}
/** Extract a single tag's text content. */
function extractSingleTag(xml: string, tagName: string): string {
const results = extractTagContent(xml, tagName);
return results[0]?.trim() || '';
}
/** Extract attribute value from a tag. */
function extractAttribute(xml: string, attrName: string): string {
const match = xml.match(new RegExp(`${attrName}="([^"]*)"`, 'i'));
return match?.[1] || '';
}
/** Parse a single <note> element from ENEX. */
function parseNote(noteXml: string): {
title: string;
content: string;
tags: string[];
created?: string;
updated?: string;
resources: { hash: string; mime: string; data: Uint8Array; filename?: string }[];
} {
const title = extractSingleTag(noteXml, 'title') || 'Untitled';
// Extract ENML content (inside <content> CDATA)
let enml = extractSingleTag(noteXml, 'content');
// Strip CDATA wrapper if present
enml = enml.replace(/^\s*<!\[CDATA\[/, '').replace(/\]\]>\s*$/, '');
const tags: string[] = [];
const tagMatches = extractTagContent(noteXml, 'tag');
for (const t of tagMatches) {
tags.push(t.trim().toLowerCase().replace(/\s+/g, '-'));
}
const created = extractSingleTag(noteXml, 'created');
const updated = extractSingleTag(noteXml, 'updated');
// Extract resources (attachments)
const resources: { hash: string; mime: string; data: Uint8Array; filename?: string }[] = [];
const resourceBlocks = extractTagContent(noteXml, 'resource');
for (const resXml of resourceBlocks) {
const mime = extractSingleTag(resXml, 'mime');
const b64Data = extractSingleTag(resXml, 'data');
const encoding = extractAttribute(resXml, 'encoding') || 'base64';
// Extract recognition hash or compute from data
let hash = '';
const recognition = extractSingleTag(resXml, 'recognition');
if (recognition) {
// Try to get hash from recognition XML
const hashMatch = recognition.match(/objID="([^"]+)"/);
if (hashMatch) hash = hashMatch[1];
}
// Extract resource attributes
const resAttrs = extractSingleTag(resXml, 'resource-attributes');
const filename = resAttrs ? extractSingleTag(resAttrs, 'file-name') : undefined;
if (b64Data && encoding === 'base64') {
try {
// Decode base64
const cleaned = b64Data.replace(/\s/g, '');
const binary = atob(cleaned);
const bytes = new Uint8Array(binary.length);
for (let i = 0; i < binary.length; i++) bytes[i] = binary.charCodeAt(i);
// Compute MD5-like hash for matching en-media tags
if (!hash) {
hash = simpleHash(bytes);
}
resources.push({ hash, mime, data: bytes, filename });
} catch { /* skip malformed base64 */ }
}
}
return { title, content: enml, tags, created, updated, resources };
}
/** Simple hash for resource matching when recognition hash is missing. */
function simpleHash(data: Uint8Array): string {
let h = 0;
for (let i = 0; i < Math.min(data.length, 1024); i++) {
h = ((h << 5) - h) + data[i];
h |= 0;
}
return Math.abs(h).toString(16);
}
const evernoteConverter: NoteConverter = {
id: 'evernote',
name: 'Evernote',
requiresAuth: false,
async import(input: ImportInput): Promise<ImportResult> {
if (!input.fileData) {
throw new Error('Evernote import requires an .enex file');
}
const enexXml = new TextDecoder().decode(input.fileData);
const noteBlocks = extractTagContent(enexXml, 'note');
if (noteBlocks.length === 0) {
return { notes: [], notebookTitle: 'Evernote Import', warnings: ['No notes found in ENEX file'] };
}
const notes: ConvertedNote[] = [];
const warnings: string[] = [];
for (const noteXml of noteBlocks) {
try {
const parsed = parseNote(noteXml);
// Build resource hash→filename map for en-media replacement
const resourceMap = new Map<string, { filename: string; data: Uint8Array; mimeType: string }>();
for (const res of parsed.resources) {
const ext = res.mime.includes('jpeg') || res.mime.includes('jpg') ? 'jpg'
: res.mime.includes('png') ? 'png'
: res.mime.includes('gif') ? 'gif'
: res.mime.includes('webp') ? 'webp'
: res.mime.includes('pdf') ? 'pdf'
: 'bin';
const fname = res.filename || `evernote-${res.hash}.${ext}`;
resourceMap.set(res.hash, { filename: fname, data: res.data, mimeType: res.mime });
}
// Convert ENML to markdown
let markdown = turndown.turndown(parsed.content);
// Resolve resource: references to actual file paths
const attachments: { filename: string; data: Uint8Array; mimeType: string }[] = [];
markdown = markdown.replace(/resource:([a-f0-9]+)/g, (_match, hash) => {
const res = resourceMap.get(hash);
if (res) {
attachments.push(res);
return `/data/files/uploads/${res.filename}`;
}
return `resource:${hash}`;
});
const tiptapJson = markdownToTiptap(markdown);
const contentPlain = extractPlainTextFromTiptap(tiptapJson);
notes.push({
title: parsed.title,
content: tiptapJson,
contentPlain,
markdown,
tags: parsed.tags,
attachments: attachments.length > 0 ? attachments : undefined,
sourceRef: {
source: 'evernote',
externalId: `enex:${parsed.title}`,
lastSyncedAt: Date.now(),
contentHash: hashContent(markdown),
},
});
} catch (err) {
warnings.push(`Failed to parse note: ${(err as Error).message}`);
}
}
return { notes, notebookTitle: 'Evernote Import', warnings };
},
async export(): Promise<ExportResult> {
throw new Error('Evernote export is not supported — use Evernote\'s native import');
},
};
registerConverter(evernoteConverter);