rspace-online/modules/rnotes/converters/markdown-tiptap.ts

459 lines
11 KiB
TypeScript

/**
* Core Markdown ↔ TipTap JSON conversion utility.
*
* All import/export converters pass through this module.
* - Import: source format → markdown → TipTap JSON
* - Export: TipTap JSON → markdown → source format
*/
import { marked } from 'marked';
// ── Markdown → TipTap JSON ──
/**
* Convert a markdown string to TipTap-compatible JSON.
* Uses `marked` to parse markdown → HTML tokens, then builds TipTap JSON nodes.
*/
export function markdownToTiptap(md: string): string {
const tokens = marked.lexer(md);
const doc = {
type: 'doc',
content: tokensToTiptap(tokens),
};
return JSON.stringify(doc);
}
/** Convert marked tokens to TipTap JSON node array. */
function tokensToTiptap(tokens: any[]): any[] {
const nodes: any[] = [];
for (const token of tokens) {
switch (token.type) {
case 'heading':
nodes.push({
type: 'heading',
attrs: { level: token.depth },
content: inlineToTiptap(token.tokens || []),
});
break;
case 'paragraph':
nodes.push({
type: 'paragraph',
content: inlineToTiptap(token.tokens || []),
});
break;
case 'blockquote':
nodes.push({
type: 'blockquote',
content: tokensToTiptap(token.tokens || []),
});
break;
case 'list': {
const listType = token.ordered ? 'orderedList' : 'bulletList';
const attrs: any = {};
if (token.ordered && token.start !== 1) attrs.start = token.start;
nodes.push({
type: listType,
...(Object.keys(attrs).length ? { attrs } : {}),
content: token.items.map((item: any) => {
// Check if this is a task list item
if (item.task) {
return {
type: 'taskItem',
attrs: { checked: item.checked || false },
content: tokensToTiptap(item.tokens || []),
};
}
return {
type: 'listItem',
content: tokensToTiptap(item.tokens || []),
};
}),
});
// If any items were task items, wrap in taskList instead
const lastNode = nodes[nodes.length - 1];
if (lastNode.content?.some((c: any) => c.type === 'taskItem')) {
lastNode.type = 'taskList';
}
break;
}
case 'code':
nodes.push({
type: 'codeBlock',
attrs: { language: token.lang || null },
content: [{ type: 'text', text: token.text }],
});
break;
case 'hr':
nodes.push({ type: 'horizontalRule' });
break;
case 'table': {
const rows: any[] = [];
// Header row
if (token.header && token.header.length > 0) {
rows.push({
type: 'tableRow',
content: token.header.map((cell: any) => ({
type: 'tableHeader',
content: [{
type: 'paragraph',
content: inlineToTiptap(cell.tokens || []),
}],
})),
});
}
// Body rows
if (token.rows) {
for (const row of token.rows) {
rows.push({
type: 'tableRow',
content: row.map((cell: any) => ({
type: 'tableCell',
content: [{
type: 'paragraph',
content: inlineToTiptap(cell.tokens || []),
}],
})),
});
}
}
nodes.push({ type: 'table', content: rows });
break;
}
case 'image':
nodes.push({
type: 'image',
attrs: {
src: token.href,
alt: token.text || null,
title: token.title || null,
},
});
break;
case 'html':
// Pass through raw HTML as a paragraph with text
if (token.text.trim()) {
nodes.push({
type: 'paragraph',
content: [{ type: 'text', text: token.text.trim() }],
});
}
break;
case 'space':
// Ignore whitespace-only tokens
break;
default:
// Fallback: treat as paragraph if there are tokens
if ((token as any).tokens) {
nodes.push({
type: 'paragraph',
content: inlineToTiptap((token as any).tokens),
});
} else if ((token as any).text) {
nodes.push({
type: 'paragraph',
content: [{ type: 'text', text: (token as any).text }],
});
}
}
}
return nodes;
}
/** Convert inline marked tokens to TipTap inline content. */
function inlineToTiptap(tokens: any[]): any[] {
const result: any[] = [];
for (const token of tokens) {
switch (token.type) {
case 'text':
if (token.text) {
result.push({ type: 'text', text: token.text });
}
break;
case 'strong':
for (const child of inlineToTiptap(token.tokens || [])) {
result.push(addMark(child, { type: 'bold' }));
}
break;
case 'em':
for (const child of inlineToTiptap(token.tokens || [])) {
result.push(addMark(child, { type: 'italic' }));
}
break;
case 'del':
for (const child of inlineToTiptap(token.tokens || [])) {
result.push(addMark(child, { type: 'strike' }));
}
break;
case 'codespan':
result.push({
type: 'text',
text: token.text,
marks: [{ type: 'code' }],
});
break;
case 'link':
for (const child of inlineToTiptap(token.tokens || [])) {
result.push(addMark(child, {
type: 'link',
attrs: { href: token.href, target: '_blank' },
}));
}
break;
case 'image':
// Inline images become their own node — push text before if any
result.push({
type: 'text',
text: `![${token.text || ''}](${token.href})`,
});
break;
case 'br':
result.push({ type: 'hardBreak' });
break;
case 'escape':
result.push({ type: 'text', text: token.text });
break;
default:
if ((token as any).text) {
result.push({ type: 'text', text: (token as any).text });
}
}
}
return result;
}
/** Add a mark to a TipTap text node, preserving existing marks. */
function addMark(node: any, mark: any): any {
const marks = [...(node.marks || []), mark];
return { ...node, marks };
}
// ── TipTap JSON → Markdown ──
/**
* Convert TipTap JSON string to markdown.
* Walks the TipTap node tree and produces CommonMark-compatible output.
*/
export function tiptapToMarkdown(json: string): string {
try {
const doc = JSON.parse(json);
if (!doc.content) return '';
return nodesToMarkdown(doc.content).trim();
} catch {
// If it's not valid JSON, return as-is (might already be markdown/plain text)
return json;
}
}
/** Convert an array of TipTap nodes to markdown. */
function nodesToMarkdown(nodes: any[], indent = ''): string {
const parts: string[] = [];
for (const node of nodes) {
switch (node.type) {
case 'heading': {
const level = node.attrs?.level || 1;
const prefix = '#'.repeat(level);
parts.push(`${prefix} ${inlineToMarkdown(node.content || [])}`);
parts.push('');
break;
}
case 'paragraph': {
const text = inlineToMarkdown(node.content || []);
parts.push(`${indent}${text}`);
parts.push('');
break;
}
case 'blockquote': {
const inner = nodesToMarkdown(node.content || []);
const lines = inner.split('\n').filter((l: string) => l !== '' || parts.length === 0);
for (const line of lines) {
parts.push(line ? `> ${line}` : '>');
}
parts.push('');
break;
}
case 'bulletList': {
for (const item of node.content || []) {
const inner = nodesToMarkdown(item.content || [], ' ').trim();
const lines = inner.split('\n');
parts.push(`- ${lines[0]}`);
for (let i = 1; i < lines.length; i++) {
parts.push(` ${lines[i]}`);
}
}
parts.push('');
break;
}
case 'orderedList': {
const start = node.attrs?.start || 1;
const items = node.content || [];
for (let i = 0; i < items.length; i++) {
const num = start + i;
const inner = nodesToMarkdown(items[i].content || [], ' ').trim();
const lines = inner.split('\n');
parts.push(`${num}. ${lines[0]}`);
for (let j = 1; j < lines.length; j++) {
parts.push(` ${lines[j]}`);
}
}
parts.push('');
break;
}
case 'taskList': {
for (const item of node.content || []) {
const checked = item.attrs?.checked ? 'x' : ' ';
const inner = nodesToMarkdown(item.content || [], ' ').trim();
parts.push(`- [${checked}] ${inner}`);
}
parts.push('');
break;
}
case 'codeBlock': {
const lang = node.attrs?.language || '';
const text = node.content?.map((c: any) => c.text || '').join('') || '';
parts.push(`\`\`\`${lang}`);
parts.push(text);
parts.push('```');
parts.push('');
break;
}
case 'horizontalRule':
parts.push('---');
parts.push('');
break;
case 'image': {
const alt = node.attrs?.alt || '';
const src = node.attrs?.src || '';
const title = node.attrs?.title ? ` "${node.attrs.title}"` : '';
parts.push(`![${alt}](${src}${title})`);
parts.push('');
break;
}
case 'table': {
const rows = node.content || [];
if (rows.length === 0) break;
for (let r = 0; r < rows.length; r++) {
const cells = rows[r].content || [];
const cellTexts = cells.map((cell: any) => {
const inner = nodesToMarkdown(cell.content || []).trim();
return inner || ' ';
});
parts.push(`| ${cellTexts.join(' | ')} |`);
// Add separator after header row
if (r === 0) {
parts.push(`| ${cellTexts.map(() => '---').join(' | ')} |`);
}
}
parts.push('');
break;
}
case 'hardBreak':
parts.push(' ');
break;
default:
// Unknown node type — try to extract text
if (node.content) {
parts.push(nodesToMarkdown(node.content, indent));
} else if (node.text) {
parts.push(node.text);
}
}
}
return parts.join('\n');
}
/** Convert TipTap inline content nodes to markdown string. */
function inlineToMarkdown(nodes: any[]): string {
return nodes.map((node) => {
if (node.type === 'hardBreak') return ' \n';
let text = node.text || '';
if (!text && node.content) {
text = inlineToMarkdown(node.content);
}
if (node.marks) {
for (const mark of node.marks) {
switch (mark.type) {
case 'bold':
text = `**${text}**`;
break;
case 'italic':
text = `*${text}*`;
break;
case 'strike':
text = `~~${text}~~`;
break;
case 'code':
text = `\`${text}\``;
break;
case 'link':
text = `[${text}](${mark.attrs?.href || ''})`;
break;
case 'underline':
// No standard markdown for underline, use HTML
text = `<u>${text}</u>`;
break;
}
}
}
return text;
}).join('');
}
// ── Utility: extract plain text from TipTap JSON ──
/** Recursively extract plain text from a TipTap JSON string. */
export function extractPlainTextFromTiptap(json: string): string {
try {
const doc = JSON.parse(json);
return walkPlainText(doc).trim();
} catch {
return json.replace(/<[^>]*>/g, ' ').replace(/\s+/g, ' ').trim();
}
}
function walkPlainText(node: any): string {
if (node.text) return node.text;
if (!node.content) return '';
return node.content.map(walkPlainText).join(node.type === 'paragraph' ? '\n' : '');
}