rspace-online/modules/rnotes/converters/markdown-tiptap.ts

/**
 * Core Markdown ↔ TipTap JSON conversion utility.
 *
 * All import/export converters pass through this module.
 * - Import: source format → markdown → TipTap JSON
 * - Export: TipTap JSON → markdown → source format
 */

import { marked } from 'marked';

// ── Markdown → TipTap JSON ──

/**
 * Convert a markdown string to TipTap-compatible JSON.
 * Uses `marked` to parse markdown → HTML tokens, then builds TipTap JSON nodes.
 */
export function markdownToTiptap(md: string): string {
	const tokens = marked.lexer(md);
	const doc = {
		type: 'doc',
		content: tokensToTiptap(tokens),
	};
	return JSON.stringify(doc);
}

/** Convert marked tokens to TipTap JSON node array. */
function tokensToTiptap(tokens: any[]): any[] {
	const nodes: any[] = [];

	for (const token of tokens) {
		switch (token.type) {
			case 'heading':
				nodes.push({
					type: 'heading',
					attrs: { level: token.depth },
					content: inlineToTiptap(token.tokens || []),
				});
				break;

			case 'paragraph':
				nodes.push({
					type: 'paragraph',
					content: inlineToTiptap(token.tokens || []),
				});
				break;

			case 'blockquote':
				nodes.push({
					type: 'blockquote',
					content: tokensToTiptap(token.tokens || []),
				});
				break;

			case 'list': {
				const listType = token.ordered ? 'orderedList' : 'bulletList';
				const attrs: any = {};
				if (token.ordered && token.start !== 1) attrs.start = token.start;
				nodes.push({
					type: listType,
					...(Object.keys(attrs).length ? { attrs } : {}),
					content: token.items.map((item: any) => {
						// Check if this is a task list item
						if (item.task) {
							return {
								type: 'taskItem',
								attrs: { checked: item.checked || false },
								content: tokensToTiptap(item.tokens || []),
							};
						}
						return {
							type: 'listItem',
							content: tokensToTiptap(item.tokens || []),
						};
					}),
				});
				// If any items were task items, wrap in taskList instead
				const lastNode = nodes[nodes.length - 1];
				if (lastNode.content?.some((c: any) => c.type === 'taskItem')) {
					lastNode.type = 'taskList';
				}
				break;
			}

			case 'code':
				nodes.push({
					type: 'codeBlock',
					attrs: { language: token.lang || null },
					content: [{ type: 'text', text: token.text }],
				});
				break;

			case 'hr':
				nodes.push({ type: 'horizontalRule' });
				break;

			case 'table': {
				const rows: any[] = [];
				// Header row
				if (token.header && token.header.length > 0) {
					rows.push({
						type: 'tableRow',
						content: token.header.map((cell: any) => ({
							type: 'tableHeader',
							content: [{
								type: 'paragraph',
								content: inlineToTiptap(cell.tokens || []),
							}],
						})),
					});
				}
				// Body rows
				if (token.rows) {
					for (const row of token.rows) {
						rows.push({
							type: 'tableRow',
							content: row.map((cell: any) => ({
								type: 'tableCell',
								content: [{
									type: 'paragraph',
									content: inlineToTiptap(cell.tokens || []),
								}],
							})),
						});
					}
				}
				nodes.push({ type: 'table', content: rows });
				break;
			}

			case 'image':
				nodes.push({
					type: 'image',
					attrs: {
						src: token.href,
						alt: token.text || null,
						title: token.title || null,
					},
				});
				break;

			case 'html':
				// Pass through raw HTML as a paragraph with text
				if (token.text.trim()) {
					nodes.push({
						type: 'paragraph',
						content: [{ type: 'text', text: token.text.trim() }],
					});
				}
				break;

			case 'space':
				// Ignore whitespace-only tokens
				break;

			default:
				// Fallback: treat as paragraph if there are tokens
				if ((token as any).tokens) {
					nodes.push({
						type: 'paragraph',
						content: inlineToTiptap((token as any).tokens),
					});
				} else if ((token as any).text) {
					nodes.push({
						type: 'paragraph',
						content: [{ type: 'text', text: (token as any).text }],
					});
				}
		}
	}

	return nodes;
}

/** Convert inline marked tokens to TipTap inline content. */
function inlineToTiptap(tokens: any[]): any[] {
	const result: any[] = [];

	for (const token of tokens) {
		switch (token.type) {
			case 'text':
				if (token.text) {
					result.push({ type: 'text', text: token.text });
				}
				break;

			case 'strong':
				for (const child of inlineToTiptap(token.tokens || [])) {
					result.push(addMark(child, { type: 'bold' }));
				}
				break;

			case 'em':
				for (const child of inlineToTiptap(token.tokens || [])) {
					result.push(addMark(child, { type: 'italic' }));
				}
				break;

			case 'del':
				for (const child of inlineToTiptap(token.tokens || [])) {
					result.push(addMark(child, { type: 'strike' }));
				}
				break;

			case 'codespan':
				result.push({
					type: 'text',
					text: token.text,
					marks: [{ type: 'code' }],
				});
				break;

			case 'link':
				for (const child of inlineToTiptap(token.tokens || [])) {
					result.push(addMark(child, {
						type: 'link',
						attrs: { href: token.href, target: '_blank' },
					}));
				}
				break;

			case 'image':
				// Inline images become their own node — push text before if any
				result.push({
					type: 'text',
					text: `![${token.text || ''}](${token.href})`,
				});
				break;

			case 'br':
				result.push({ type: 'hardBreak' });
				break;

			case 'escape':
				result.push({ type: 'text', text: token.text });
				break;

			default:
				if ((token as any).text) {
					result.push({ type: 'text', text: (token as any).text });
				}
		}
	}

	return result;
}

/** Add a mark to a TipTap text node, preserving existing marks. */
function addMark(node: any, mark: any): any {
	const marks = [...(node.marks || []), mark];
	return { ...node, marks };
}

// ── TipTap JSON → Markdown ──

/**
 * Convert TipTap JSON string to markdown.
 * Walks the TipTap node tree and produces CommonMark-compatible output.
 */
export function tiptapToMarkdown(json: string): string {
	try {
		const doc = JSON.parse(json);
		if (!doc.content) return '';
		return nodesToMarkdown(doc.content).trim();
	} catch {
		// If it's not valid JSON, return as-is (might already be markdown/plain text)
		return json;
	}
}

/** Convert an array of TipTap nodes to markdown. */
function nodesToMarkdown(nodes: any[], indent = ''): string {
	const parts: string[] = [];

	for (const node of nodes) {
		switch (node.type) {
			case 'heading': {
				const level = node.attrs?.level || 1;
				const prefix = '#'.repeat(level);
				parts.push(`${prefix} ${inlineToMarkdown(node.content || [])}`);
				parts.push('');
				break;
			}

			case 'paragraph': {
				const text = inlineToMarkdown(node.content || []);
				parts.push(`${indent}${text}`);
				parts.push('');
				break;
			}

			case 'blockquote': {
				const inner = nodesToMarkdown(node.content || []);
				const lines = inner.split('\n').filter((l: string) => l !== '' || parts.length === 0);
				for (const line of lines) {
					parts.push(line ? `> ${line}` : '>');
				}
				parts.push('');
				break;
			}

			case 'bulletList': {
				for (const item of node.content || []) {
					const inner = nodesToMarkdown(item.content || [], '  ').trim();
					const lines = inner.split('\n');
					parts.push(`- ${lines[0]}`);
					for (let i = 1; i < lines.length; i++) {
						parts.push(`  ${lines[i]}`);
					}
				}
				parts.push('');
				break;
			}

			case 'orderedList': {
				const start = node.attrs?.start || 1;
				const items = node.content || [];
				for (let i = 0; i < items.length; i++) {
					const num = start + i;
					const inner = nodesToMarkdown(items[i].content || [], '   ').trim();
					const lines = inner.split('\n');
					parts.push(`${num}. ${lines[0]}`);
					for (let j = 1; j < lines.length; j++) {
						parts.push(`   ${lines[j]}`);
					}
				}
				parts.push('');
				break;
			}

			case 'taskList': {
				for (const item of node.content || []) {
					const checked = item.attrs?.checked ? 'x' : ' ';
					const inner = nodesToMarkdown(item.content || [], '  ').trim();
					parts.push(`- [${checked}] ${inner}`);
				}
				parts.push('');
				break;
			}

			case 'codeBlock': {
				const lang = node.attrs?.language || '';
				const text = node.content?.map((c: any) => c.text || '').join('') || '';
				parts.push(`\`\`\`${lang}`);
				parts.push(text);
				parts.push('```');
				parts.push('');
				break;
			}

			case 'horizontalRule':
				parts.push('---');
				parts.push('');
				break;

			case 'image': {
				const alt = node.attrs?.alt || '';
				const src = node.attrs?.src || '';
				const title = node.attrs?.title ? ` "${node.attrs.title}"` : '';
				parts.push(`![${alt}](${src}${title})`);
				parts.push('');
				break;
			}

			case 'table': {
				const rows = node.content || [];
				if (rows.length === 0) break;

				for (let r = 0; r < rows.length; r++) {
					const cells = rows[r].content || [];
					const cellTexts = cells.map((cell: any) => {
						const inner = nodesToMarkdown(cell.content || []).trim();
						return inner || ' ';
					});
					parts.push(`| ${cellTexts.join(' | ')} |`);

					// Add separator after header row
					if (r === 0) {
						parts.push(`| ${cellTexts.map(() => '---').join(' | ')} |`);
					}
				}
				parts.push('');
				break;
			}

			case 'hardBreak':
				parts.push('  ');
				break;

			default:
				// Unknown node type — try to extract text
				if (node.content) {
					parts.push(nodesToMarkdown(node.content, indent));
				} else if (node.text) {
					parts.push(node.text);
				}
		}
	}

	return parts.join('\n');
}

/** Convert TipTap inline content nodes to markdown string. */
function inlineToMarkdown(nodes: any[]): string {
	return nodes.map((node) => {
		if (node.type === 'hardBreak') return '  \n';

		let text = node.text || '';
		if (!text && node.content) {
			text = inlineToMarkdown(node.content);
		}

		if (node.marks) {
			for (const mark of node.marks) {
				switch (mark.type) {
					case 'bold':
						text = `**${text}**`;
						break;
					case 'italic':
						text = `*${text}*`;
						break;
					case 'strike':
						text = `~~${text}~~`;
						break;
					case 'code':
						text = `\`${text}\``;
						break;
					case 'link':
						text = `[${text}](${mark.attrs?.href || ''})`;
						break;
					case 'underline':
						// No standard markdown for underline, use HTML
						text = `<u>${text}</u>`;
						break;
				}
			}
		}

		return text;
	}).join('');
}

// ── Utility: extract plain text from TipTap JSON ──

/** Recursively extract plain text from a TipTap JSON string. */
export function extractPlainTextFromTiptap(json: string): string {
	try {
		const doc = JSON.parse(json);
		return walkPlainText(doc).trim();
	} catch {
		return json.replace(/<[^>]*>/g, ' ').replace(/\s+/g, ' ').trim();
	}
}

function walkPlainText(node: any): string {
	if (node.text) return node.text;
	if (!node.content) return '';
	return node.content.map(walkPlainText).join(node.type === 'paragraph' ? '\n' : '');
}