export interface DocumentSection { heading?: string; level?: number; blocks: DocumentBlock[]; } export type DocumentBlock = | { type: "paragraph"; text: string } | { type: "quote"; text: string; attribution?: string } | { type: "list"; ordered: boolean; items: string[] } | { type: "code"; language?: string; code: string } | { type: "separator" }; export interface ParsedDocument { title: string; subtitle?: string; author?: string; sections: DocumentSection[]; } export function parseMarkdown( content: string, title?: string, author?: string, ): ParsedDocument { const lines = content.split("\n"); const sections: DocumentSection[] = []; let currentSection: DocumentSection = { blocks: [] }; let detectedTitle = title; let inCodeBlock = false; let codeBlockLang = ""; let codeLines: string[] = []; let inBlockquote = false; let quoteLines: string[] = []; let inList = false; let listItems: string[] = []; let listOrdered = false; function flushQuote() { if (quoteLines.length > 0) { const text = quoteLines.join("\n").trim(); currentSection.blocks.push({ type: "quote", text }); quoteLines = []; inBlockquote = false; } } function flushList() { if (listItems.length > 0) { currentSection.blocks.push({ type: "list", ordered: listOrdered, items: listItems, }); listItems = []; inList = false; } } function flushCodeBlock() { if (codeLines.length > 0) { currentSection.blocks.push({ type: "code", language: codeBlockLang || undefined, code: codeLines.join("\n"), }); codeLines = []; inCodeBlock = false; codeBlockLang = ""; } } for (const line of lines) { // Code block handling if (line.trimStart().startsWith("```")) { if (inCodeBlock) { flushCodeBlock(); } else { flushQuote(); flushList(); inCodeBlock = true; codeBlockLang = line.trimStart().slice(3).trim(); } continue; } if (inCodeBlock) { codeLines.push(line); continue; } // Heading const headingMatch = line.match(/^(#{1,3})\s+(.+)$/); if (headingMatch) { flushQuote(); flushList(); const level = headingMatch[1].length; const headingText = headingMatch[2].trim(); // Use first h1 as title if none provided if (level === 1 && !detectedTitle) { detectedTitle = headingText; continue; } // Start a new section if (currentSection.blocks.length > 0 || currentSection.heading) { sections.push(currentSection); } currentSection = { heading: headingText, level, blocks: [] }; continue; } // Horizontal rule if (/^(-{3,}|_{3,}|\*{3,})\s*$/.test(line.trim())) { flushQuote(); flushList(); currentSection.blocks.push({ type: "separator" }); continue; } // Blockquote if (line.trimStart().startsWith("> ")) { flushList(); inBlockquote = true; quoteLines.push(line.trimStart().slice(2)); continue; } else if (inBlockquote) { if (line.trim() === "") { flushQuote(); } else { quoteLines.push(line); } continue; } // Ordered list const orderedMatch = line.match(/^\s*\d+\.\s+(.+)$/); if (orderedMatch) { flushQuote(); if (inList && !listOrdered) { flushList(); } inList = true; listOrdered = true; listItems.push(orderedMatch[1]); continue; } // Unordered list const unorderedMatch = line.match(/^\s*[-*+]\s+(.+)$/); if (unorderedMatch) { flushQuote(); if (inList && listOrdered) { flushList(); } inList = true; listOrdered = false; listItems.push(unorderedMatch[1]); continue; } // Empty line if (line.trim() === "") { flushQuote(); flushList(); continue; } // Regular paragraph text flushQuote(); flushList(); // Check if last block is a paragraph — append to it const lastBlock = currentSection.blocks[currentSection.blocks.length - 1]; if (lastBlock && lastBlock.type === "paragraph") { lastBlock.text += " " + line.trim(); } else { currentSection.blocks.push({ type: "paragraph", text: line.trim() }); } } // Flush remaining state flushQuote(); flushList(); flushCodeBlock(); if (currentSection.blocks.length > 0 || currentSection.heading) { sections.push(currentSection); } return { title: detectedTitle || "Untitled", author: author || undefined, sections, }; }