rspace-online/modules/pubs/parse-document.ts

export interface DocumentSection {
  heading?: string;
  level?: number;
  blocks: DocumentBlock[];
}

export type DocumentBlock =
  | { type: "paragraph"; text: string }
  | { type: "quote"; text: string; attribution?: string }
  | { type: "list"; ordered: boolean; items: string[] }
  | { type: "code"; language?: string; code: string }
  | { type: "separator" };

export interface ParsedDocument {
  title: string;
  subtitle?: string;
  author?: string;
  sections: DocumentSection[];
}

export function parseMarkdown(
  content: string,
  title?: string,
  author?: string,
): ParsedDocument {
  const lines = content.split("\n");
  const sections: DocumentSection[] = [];
  let currentSection: DocumentSection = { blocks: [] };
  let detectedTitle = title;
  let inCodeBlock = false;
  let codeBlockLang = "";
  let codeLines: string[] = [];
  let inBlockquote = false;
  let quoteLines: string[] = [];
  let inList = false;
  let listItems: string[] = [];
  let listOrdered = false;

  function flushQuote() {
    if (quoteLines.length > 0) {
      const text = quoteLines.join("\n").trim();
      currentSection.blocks.push({ type: "quote", text });
      quoteLines = [];
      inBlockquote = false;
    }
  }

  function flushList() {
    if (listItems.length > 0) {
      currentSection.blocks.push({
        type: "list",
        ordered: listOrdered,
        items: listItems,
      });
      listItems = [];
      inList = false;
    }
  }

  function flushCodeBlock() {
    if (codeLines.length > 0) {
      currentSection.blocks.push({
        type: "code",
        language: codeBlockLang || undefined,
        code: codeLines.join("\n"),
      });
      codeLines = [];
      inCodeBlock = false;
      codeBlockLang = "";
    }
  }

  for (const line of lines) {
    // Code block handling
    if (line.trimStart().startsWith("```")) {
      if (inCodeBlock) {
        flushCodeBlock();
      } else {
        flushQuote();
        flushList();
        inCodeBlock = true;
        codeBlockLang = line.trimStart().slice(3).trim();
      }
      continue;
    }

    if (inCodeBlock) {
      codeLines.push(line);
      continue;
    }

    // Heading
    const headingMatch = line.match(/^(#{1,3})\s+(.+)$/);
    if (headingMatch) {
      flushQuote();
      flushList();

      const level = headingMatch[1].length;
      const headingText = headingMatch[2].trim();

      // Use first h1 as title if none provided
      if (level === 1 && !detectedTitle) {
        detectedTitle = headingText;
        continue;
      }

      // Start a new section
      if (currentSection.blocks.length > 0 || currentSection.heading) {
        sections.push(currentSection);
      }
      currentSection = { heading: headingText, level, blocks: [] };
      continue;
    }

    // Horizontal rule
    if (/^(-{3,}|_{3,}|\*{3,})\s*$/.test(line.trim())) {
      flushQuote();
      flushList();
      currentSection.blocks.push({ type: "separator" });
      continue;
    }

    // Blockquote
    if (line.trimStart().startsWith("> ")) {
      flushList();
      inBlockquote = true;
      quoteLines.push(line.trimStart().slice(2));
      continue;
    } else if (inBlockquote) {
      if (line.trim() === "") {
        flushQuote();
      } else {
        quoteLines.push(line);
      }
      continue;
    }

    // Ordered list
    const orderedMatch = line.match(/^\s*\d+\.\s+(.+)$/);
    if (orderedMatch) {
      flushQuote();
      if (inList && !listOrdered) {
        flushList();
      }
      inList = true;
      listOrdered = true;
      listItems.push(orderedMatch[1]);
      continue;
    }

    // Unordered list
    const unorderedMatch = line.match(/^\s*[-*+]\s+(.+)$/);
    if (unorderedMatch) {
      flushQuote();
      if (inList && listOrdered) {
        flushList();
      }
      inList = true;
      listOrdered = false;
      listItems.push(unorderedMatch[1]);
      continue;
    }

    // Empty line
    if (line.trim() === "") {
      flushQuote();
      flushList();
      continue;
    }

    // Regular paragraph text
    flushQuote();
    flushList();

    // Check if last block is a paragraph — append to it
    const lastBlock = currentSection.blocks[currentSection.blocks.length - 1];
    if (lastBlock && lastBlock.type === "paragraph") {
      lastBlock.text += " " + line.trim();
    } else {
      currentSection.blocks.push({ type: "paragraph", text: line.trim() });
    }
  }

  // Flush remaining state
  flushQuote();
  flushList();
  flushCodeBlock();

  if (currentSection.blocks.length > 0 || currentSection.heading) {
    sections.push(currentSection);
  }

  return {
    title: detectedTitle || "Untitled",
    author: author || undefined,
    sections,
  };
}