237 lines
7.4 KiB
TypeScript
237 lines
7.4 KiB
TypeScript
/**
|
|
* Evernote ENEX → rNotes converter.
|
|
*
|
|
* Import: Parse .enex XML (ENML — strict HTML subset inside <en-note>)
|
|
* Convert ENML → markdown via Turndown.
|
|
* Extract <resource> base64 attachments, save to /data/files/uploads/.
|
|
* File-based import (.enex), no auth needed.
|
|
*/
|
|
|
|
import TurndownService from 'turndown';
|
|
import { markdownToTiptap, extractPlainTextFromTiptap } from './markdown-tiptap';
|
|
import { registerConverter, hashContent } from './index';
|
|
import type { ConvertedNote, ImportInput, ImportResult, ExportOptions, ExportResult, NoteConverter } from './index';
|
|
import type { NoteItem } from '../schemas';
|
|
|
|
const turndown = new TurndownService({ headingStyle: 'atx', codeBlockStyle: 'fenced' });
|
|
|
|
// Custom Turndown rules for ENML-specific elements
|
|
turndown.addRule('enMedia', {
|
|
filter: (node) => node.nodeName === 'EN-MEDIA',
|
|
replacement: (_content, node) => {
|
|
const el = node as Element;
|
|
const hash = el.getAttribute('hash') || '';
|
|
const type = el.getAttribute('type') || '';
|
|
if (type.startsWith('image/')) {
|
|
return ``;
|
|
}
|
|
return `[attachment](resource:${hash})`;
|
|
},
|
|
});
|
|
|
|
turndown.addRule('enTodo', {
|
|
filter: (node) => node.nodeName === 'EN-TODO',
|
|
replacement: (_content, node) => {
|
|
const el = node as Element;
|
|
const checked = el.getAttribute('checked') === 'true';
|
|
return checked ? '[x] ' : '[ ] ';
|
|
},
|
|
});
|
|
|
|
/** Simple XML tag content extractor (avoids needing a full DOM parser on server). */
|
|
function extractTagContent(xml: string, tagName: string): string[] {
|
|
const results: string[] = [];
|
|
const openTag = `<${tagName}`;
|
|
const closeTag = `</${tagName}>`;
|
|
let pos = 0;
|
|
|
|
while (true) {
|
|
const start = xml.indexOf(openTag, pos);
|
|
if (start === -1) break;
|
|
|
|
// Find end of opening tag (handles attributes)
|
|
const tagEnd = xml.indexOf('>', start);
|
|
if (tagEnd === -1) break;
|
|
|
|
const end = xml.indexOf(closeTag, tagEnd);
|
|
if (end === -1) break;
|
|
|
|
results.push(xml.substring(tagEnd + 1, end));
|
|
pos = end + closeTag.length;
|
|
}
|
|
|
|
return results;
|
|
}
|
|
|
|
/** Extract a single tag's text content. */
|
|
function extractSingleTag(xml: string, tagName: string): string {
|
|
const results = extractTagContent(xml, tagName);
|
|
return results[0]?.trim() || '';
|
|
}
|
|
|
|
/** Extract attribute value from a tag. */
|
|
function extractAttribute(xml: string, attrName: string): string {
|
|
const match = xml.match(new RegExp(`${attrName}="([^"]*)"`, 'i'));
|
|
return match?.[1] || '';
|
|
}
|
|
|
|
/** Parse a single <note> element from ENEX. */
|
|
function parseNote(noteXml: string): {
|
|
title: string;
|
|
content: string;
|
|
tags: string[];
|
|
created?: string;
|
|
updated?: string;
|
|
resources: { hash: string; mime: string; data: Uint8Array; filename?: string }[];
|
|
} {
|
|
const title = extractSingleTag(noteXml, 'title') || 'Untitled';
|
|
|
|
// Extract ENML content (inside <content> CDATA)
|
|
let enml = extractSingleTag(noteXml, 'content');
|
|
// Strip CDATA wrapper if present
|
|
enml = enml.replace(/^\s*<!\[CDATA\[/, '').replace(/\]\]>\s*$/, '');
|
|
|
|
const tags: string[] = [];
|
|
const tagMatches = extractTagContent(noteXml, 'tag');
|
|
for (const t of tagMatches) {
|
|
tags.push(t.trim().toLowerCase().replace(/\s+/g, '-'));
|
|
}
|
|
|
|
const created = extractSingleTag(noteXml, 'created');
|
|
const updated = extractSingleTag(noteXml, 'updated');
|
|
|
|
// Extract resources (attachments)
|
|
const resources: { hash: string; mime: string; data: Uint8Array; filename?: string }[] = [];
|
|
const resourceBlocks = extractTagContent(noteXml, 'resource');
|
|
for (const resXml of resourceBlocks) {
|
|
const mime = extractSingleTag(resXml, 'mime');
|
|
const b64Data = extractSingleTag(resXml, 'data');
|
|
const encoding = extractAttribute(resXml, 'encoding') || 'base64';
|
|
|
|
// Extract recognition hash or compute from data
|
|
let hash = '';
|
|
const recognition = extractSingleTag(resXml, 'recognition');
|
|
if (recognition) {
|
|
// Try to get hash from recognition XML
|
|
const hashMatch = recognition.match(/objID="([^"]+)"/);
|
|
if (hashMatch) hash = hashMatch[1];
|
|
}
|
|
|
|
// Extract resource attributes
|
|
const resAttrs = extractSingleTag(resXml, 'resource-attributes');
|
|
const filename = resAttrs ? extractSingleTag(resAttrs, 'file-name') : undefined;
|
|
|
|
if (b64Data && encoding === 'base64') {
|
|
try {
|
|
// Decode base64
|
|
const cleaned = b64Data.replace(/\s/g, '');
|
|
const binary = atob(cleaned);
|
|
const bytes = new Uint8Array(binary.length);
|
|
for (let i = 0; i < binary.length; i++) bytes[i] = binary.charCodeAt(i);
|
|
|
|
// Compute MD5-like hash for matching en-media tags
|
|
if (!hash) {
|
|
hash = simpleHash(bytes);
|
|
}
|
|
|
|
resources.push({ hash, mime, data: bytes, filename });
|
|
} catch { /* skip malformed base64 */ }
|
|
}
|
|
}
|
|
|
|
return { title, content: enml, tags, created, updated, resources };
|
|
}
|
|
|
|
/** Simple hash for resource matching when recognition hash is missing. */
|
|
function simpleHash(data: Uint8Array): string {
|
|
let h = 0;
|
|
for (let i = 0; i < Math.min(data.length, 1024); i++) {
|
|
h = ((h << 5) - h) + data[i];
|
|
h |= 0;
|
|
}
|
|
return Math.abs(h).toString(16);
|
|
}
|
|
|
|
const evernoteConverter: NoteConverter = {
|
|
id: 'evernote',
|
|
name: 'Evernote',
|
|
requiresAuth: false,
|
|
|
|
async import(input: ImportInput): Promise<ImportResult> {
|
|
if (!input.fileData) {
|
|
throw new Error('Evernote import requires an .enex file');
|
|
}
|
|
|
|
const enexXml = new TextDecoder().decode(input.fileData);
|
|
const noteBlocks = extractTagContent(enexXml, 'note');
|
|
|
|
if (noteBlocks.length === 0) {
|
|
return { notes: [], notebookTitle: 'Evernote Import', warnings: ['No notes found in ENEX file'] };
|
|
}
|
|
|
|
const notes: ConvertedNote[] = [];
|
|
const warnings: string[] = [];
|
|
|
|
for (const noteXml of noteBlocks) {
|
|
try {
|
|
const parsed = parseNote(noteXml);
|
|
|
|
// Build resource hash→filename map for en-media replacement
|
|
const resourceMap = new Map<string, { filename: string; data: Uint8Array; mimeType: string }>();
|
|
for (const res of parsed.resources) {
|
|
const ext = res.mime.includes('jpeg') || res.mime.includes('jpg') ? 'jpg'
|
|
: res.mime.includes('png') ? 'png'
|
|
: res.mime.includes('gif') ? 'gif'
|
|
: res.mime.includes('webp') ? 'webp'
|
|
: res.mime.includes('pdf') ? 'pdf'
|
|
: 'bin';
|
|
const fname = res.filename || `evernote-${res.hash}.${ext}`;
|
|
resourceMap.set(res.hash, { filename: fname, data: res.data, mimeType: res.mime });
|
|
}
|
|
|
|
// Convert ENML to markdown
|
|
let markdown = turndown.turndown(parsed.content);
|
|
|
|
// Resolve resource: references to actual file paths
|
|
const attachments: { filename: string; data: Uint8Array; mimeType: string }[] = [];
|
|
markdown = markdown.replace(/resource:([a-f0-9]+)/g, (_match, hash) => {
|
|
const res = resourceMap.get(hash);
|
|
if (res) {
|
|
attachments.push(res);
|
|
return `/data/files/uploads/${res.filename}`;
|
|
}
|
|
return `resource:${hash}`;
|
|
});
|
|
|
|
const tiptapJson = markdownToTiptap(markdown);
|
|
const contentPlain = extractPlainTextFromTiptap(tiptapJson);
|
|
|
|
notes.push({
|
|
title: parsed.title,
|
|
content: tiptapJson,
|
|
contentPlain,
|
|
markdown,
|
|
tags: parsed.tags,
|
|
attachments: attachments.length > 0 ? attachments : undefined,
|
|
sourceRef: {
|
|
source: 'evernote',
|
|
externalId: `enex:${parsed.title}`,
|
|
lastSyncedAt: Date.now(),
|
|
contentHash: hashContent(markdown),
|
|
},
|
|
});
|
|
} catch (err) {
|
|
warnings.push(`Failed to parse note: ${(err as Error).message}`);
|
|
}
|
|
}
|
|
|
|
return { notes, notebookTitle: 'Evernote Import', warnings };
|
|
},
|
|
|
|
async export(): Promise<ExportResult> {
|
|
throw new Error('Evernote export is not supported — use Evernote\'s native import');
|
|
},
|
|
};
|
|
|
|
registerConverter(evernoteConverter);
|