rspace-online/modules/rnotes/converters/logseq.ts

/**
 * Logseq graph ↔ rNotes converter.
 *
 * Import: ZIP of pages/ + journals/ dirs, property:: value syntax, bullet outliner blocks
 * Export: ZIP with Logseq-compatible page files + properties
 */

import JSZip from 'jszip';
import { markdownToTiptap, tiptapToMarkdown, extractPlainTextFromTiptap } from './markdown-tiptap';
import { registerConverter } from './index';
import type { ConvertedNote, ImportInput, ImportResult, ExportOptions, ExportResult, NoteConverter } from './index';
import type { NoteItem } from '../schemas';

/** Hash content for conflict detection. */
function hashContent(content: string): string {
	let hash = 0;
	for (let i = 0; i < content.length; i++) {
		const char = content.charCodeAt(i);
		hash = ((hash << 5) - hash) + char;
		hash |= 0;
	}
	return Math.abs(hash).toString(36);
}

/** Parse Logseq property:: value lines from the top of a page. */
function parseLogseqProperties(content: string): { properties: Record<string, string>; body: string } {
	const lines = content.split('\n');
	const properties: Record<string, string> = {};
	let bodyStart = 0;

	for (let i = 0; i < lines.length; i++) {
		const match = lines[i].match(/^([a-zA-Z_-]+)::\s*(.*)$/);
		if (match) {
			properties[match[1].toLowerCase()] = match[2].trim();
			bodyStart = i + 1;
		} else if (lines[i].trim() === '') {
			bodyStart = i + 1;
			continue;
		} else {
			break;
		}
	}

	return { properties, body: lines.slice(bodyStart).join('\n') };
}

/**
 * Convert Logseq outliner bullet format to regular markdown.
 * Logseq uses `- content` for all blocks with indentation for nesting.
 */
function convertOutlinerToMarkdown(content: string): string {
	const lines = content.split('\n');
	const result: string[] = [];

	for (const line of lines) {
		// Detect indented bullets: tabs or spaces followed by -
		const match = line.match(/^(\t*|\s*)- (.*)$/);
		if (match) {
			const indent = match[1];
			const text = match[2];

			// Calculate nesting level
			const level = indent.replace(/  /g, '\t').split('\t').length - 1;

			// Check if this looks like a heading (common Logseq pattern)
			if (level === 0 && text.startsWith('# ')) {
				result.push(text);
			} else if (level === 0 && !text.startsWith('- ')) {
				// Top-level bullet → paragraph or list item
				result.push(`- ${text}`);
			} else {
				// Nested bullet → indented list item
				const indentation = '  '.repeat(level);
				result.push(`${indentation}- ${text}`);
			}
		} else {
			result.push(line);
		}
	}

	return result.join('\n');
}

/** Convert [[page references]] to standard links. */
function convertPageRefs(md: string): string {
	return md.replace(/\[\[([^\]]+)\]\]/g, '[$1]($1)');
}

/** Convert Logseq tags (#tag or #[[multi word tag]]). */
function extractLogseqTags(content: string): string[] {
	const tags: string[] = [];
	// #tag
	const singleTags = content.match(/#([a-zA-Z0-9_-]+)/g);
	if (singleTags) tags.push(...singleTags.map(t => t.slice(1).toLowerCase()));
	// #[[multi word tag]]
	const multiTags = content.match(/#\[\[([^\]]+)\]\]/g);
	if (multiTags) tags.push(...multiTags.map(t => t.slice(3, -2).toLowerCase().replace(/\s+/g, '-')));
	return [...new Set(tags)];
}

/** Parse Logseq journal filename to date. */
function parseJournalDate(filename: string): string | null {
	// Common Logseq journal formats: 2026_03_01.md, 2026-03-01.md
	const match = filename.match(/(\d{4})[_-](\d{2})[_-](\d{2})\.md$/);
	if (match) return `${match[1]}-${match[2]}-${match[3]}`;
	return null;
}

/** Extract title from filename. */
function titleFromPath(filePath: string): string {
	const filename = filePath.split('/').pop() || 'Untitled';
	return filename.replace(/\.md$/i, '').replace(/%2F/g, '/').replace(/_/g, ' ');
}

const logseqConverter: NoteConverter = {
	id: 'logseq',
	name: 'Logseq',
	requiresAuth: false,

	async import(input: ImportInput): Promise<ImportResult> {
		if (!input.fileData) {
			throw new Error('Logseq import requires a ZIP file');
		}

		const zip = await JSZip.loadAsync(input.fileData);
		const notes: ConvertedNote[] = [];
		const warnings: string[] = [];
		let graphName = 'Logseq Import';

		// Collect all .md files
		const mdFiles: { path: string; file: JSZip.JSZipObject; isJournal: boolean }[] = [];
		zip.forEach((path, file) => {
			if (file.dir) return;
			if (!path.endsWith('.md')) return;
			// Skip config/hidden files
			if (path.includes('logseq/') && !path.includes('pages/') && !path.includes('journals/')) return;
			if (path.includes('.recycle/')) return;

			const isJournal = path.includes('journals/');
			mdFiles.push({ path, file, isJournal });
		});

		if (mdFiles.length === 0) {
			warnings.push('No .md files found in pages/ or journals/ directories');
			return { notes, notebookTitle: graphName, warnings };
		}

		// Detect graph name from common root
		const firstPath = mdFiles[0].path;
		const rootFolder = firstPath.split('/')[0];
		if (rootFolder && mdFiles.every(f => f.path.startsWith(rootFolder + '/'))) {
			graphName = rootFolder;
			for (const f of mdFiles) {
				f.path = f.path.slice(rootFolder.length + 1);
			}
		}

		for (const { path, file, isJournal } of mdFiles) {
			try {
				const raw = await file.async('string');
				const { properties, body } = parseLogseqProperties(raw);

				// Convert Logseq format to standard markdown
				let md = convertOutlinerToMarkdown(body);
				md = convertPageRefs(md);

				const filename = path.split('/').pop() || '';
				let title: string;

				if (isJournal) {
					const date = parseJournalDate(filename);
					title = date ? `Journal: ${date}` : titleFromPath(path);
				} else {
					title = properties.title || titleFromPath(path);
				}

				const tiptapJson = markdownToTiptap(md);
				const contentPlain = extractPlainTextFromTiptap(tiptapJson);

				// Collect tags
				const tags: string[] = [];
				if (properties.tags) {
					const tagStr = properties.tags.replace(/\[\[|\]\]/g, '');
					tags.push(...tagStr.split(',').map(t => t.trim().toLowerCase()).filter(Boolean));
				}
				tags.push(...extractLogseqTags(raw));
				if (isJournal) tags.push('journal');

				notes.push({
					title,
					content: tiptapJson,
					contentPlain,
					markdown: md,
					tags: [...new Set(tags)],
					sourceRef: {
						source: 'logseq',
						externalId: path,
						lastSyncedAt: Date.now(),
						contentHash: hashContent(raw),
					},
				});
			} catch (err) {
				warnings.push(`Failed to parse ${path}: ${(err as Error).message}`);
			}
		}

		return { notes, notebookTitle: graphName, warnings };
	},

	async export(notes: NoteItem[], opts: ExportOptions): Promise<ExportResult> {
		const zip = new JSZip();
		const graphName = opts.notebookTitle || 'rNotes Export';
		const pagesDir = zip.folder('pages')!;

		for (const note of notes) {
			// Convert content to markdown
			let md: string;
			if (note.contentFormat === 'tiptap-json' && note.content) {
				md = tiptapToMarkdown(note.content);
			} else if (note.content) {
				md = note.content.replace(/<[^>]*>/g, '').trim();
			} else {
				md = '';
			}

			// Build Logseq properties block
			const props: string[] = [];
			if (note.tags.length > 0) {
				props.push(`tags:: ${note.tags.map(t => `[[${t}]]`).join(', ')}`);
			}
			if (note.type !== 'NOTE') {
				props.push(`type:: ${note.type.toLowerCase()}`);
			}
			props.push(`created:: ${new Date(note.createdAt).toISOString().split('T')[0]}`);

			// Convert markdown paragraphs to Logseq outliner bullets
			const mdLines = md.split('\n');
			const outliner: string[] = [];
			for (const line of mdLines) {
				if (line.trim() === '') continue;
				if (line.startsWith('#')) {
					outliner.push(`- ${line}`);
				} else if (line.startsWith('- ') || line.startsWith('* ')) {
					outliner.push(`- ${line.slice(2)}`);
				} else if (line.match(/^\d+\.\s/)) {
					outliner.push(`- ${line.replace(/^\d+\.\s/, '')}`);
				} else {
					outliner.push(`- ${line}`);
				}
			}

			const propsBlock = props.length > 0 ? props.join('\n') + '\n\n' : '';
			const fileContent = `${propsBlock}${outliner.join('\n')}\n`;

			// Sanitize filename for Logseq (uses %2F for namespaced pages)
			const filename = note.title
				.replace(/[<>:"/\\|?*]/g, '')
				.replace(/\//g, '%2F')
				.trim() || 'Untitled';

			pagesDir.file(`${filename}.md`, fileContent);
		}

		const data = await zip.generateAsync({ type: 'uint8array' });
		return {
			data,
			filename: `${graphName.replace(/\s+/g, '-').toLowerCase()}-logseq.zip`,
			mimeType: 'application/zip',
		};
	},
};

registerConverter(logseqConverter);