Add EPUB converter pipeline and generated ebooks

PDF-to-fixed-layout-EPUB converter preserving visual design for Kindle/ebook readers. Includes Exploring MycoFi (84p, 30MB) and PsiloCybernetics (38p, 44MB) with full TOC. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-04-16 14:49:48 -04:00 · 2026-04-16 14:49:48 -04:00 · 14ee7f61fb
parent 97d14c47a7
commit 14ee7f61fb
9 changed files with 508 additions and 0 deletions
--- a/.gitignore
+++ b/.gitignore
@ -0,0 +1,3 @@
+__pycache__/
+*.pyc
+.venv/
--- a/converter/add_toc.py
+++ b/converter/add_toc.py
@ -0,0 +1,94 @@
+#!/usr/bin/env python3
+"""
+Add or update Table of Contents for an EPUB built by pdf_to_epub.
+
+Since the PDF has no embedded TOC, chapter markers are defined manually
+in a JSON config file. This script patches an existing EPUB's navigation.
+
+Usage:
+    python3 converter/add_toc.py output/ExploringMycoFiBook.epub --toc toc_mycofi.json
+
+TOC JSON format:
+[
+    {"title": "Cover", "page": 1},
+    {"title": "Introduction", "page": 5},
+    {"title": "Chapter 1: Mycelial Networks", "page": 12},
+    ...
+]
+"""
+
+import argparse
+import json
+import os
+import sys
+import zipfile
+import tempfile
+import shutil
+from pathlib import Path
+
+from ebooklib import epub
+
+
+def patch_toc(epub_path: str, toc_entries: list[dict]) -> str:
+    """Patch the TOC of an existing EPUB with manual chapter markers.
+
+    Args:
+        epub_path: Path to the EPUB file
+        toc_entries: List of {"title": str, "page": int} dicts (1-indexed pages)
+
+    Returns:
+        Path to the patched EPUB
+    """
+    book = epub.read_epub(epub_path)
+
+    # Find all page items sorted by filename
+    pages = sorted(
+        [item for item in book.get_items() if item.file_name.startswith("pages/")],
+        key=lambda x: x.file_name,
+    )
+
+    if not pages:
+        print("Error: No page items found in EPUB")
+        sys.exit(1)
+
+    # Build new TOC from entries
+    new_toc = []
+    for entry in toc_entries:
+        page_idx = entry["page"] - 1  # Convert 1-indexed to 0-indexed
+        if 0 <= page_idx < len(pages):
+            page_item = pages[page_idx]
+            # Create a link using the page's file_name
+            new_toc.append(epub.Link(page_item.file_name, entry["title"], f"toc_{page_idx}"))
+        else:
+            print(f"Warning: Page {entry['page']} out of range (1-{len(pages)}), skipping: {entry['title']}")
+
+    book.toc = new_toc
+
+    # Re-add navigation items
+    for item in list(book.get_items()):
+        if isinstance(item, (epub.EpubNcx, epub.EpubNav)):
+            book.items.remove(item)
+    book.add_item(epub.EpubNcx())
+    book.add_item(epub.EpubNav())
+
+    # Write back
+    epub.write_epub(epub_path, book, {})
+    print(f"Updated TOC with {len(new_toc)} entries in {epub_path}")
+    return epub_path
+
+
+def main():
+    parser = argparse.ArgumentParser(description="Add/update EPUB table of contents")
+    parser.add_argument("epub", help="Path to EPUB file")
+    parser.add_argument("--toc", required=True, help="Path to TOC JSON file")
+
+    args = parser.parse_args()
+
+    with open(args.toc) as f:
+        toc_entries = json.load(f)
+
+    patch_toc(args.epub, toc_entries)
+
+
+if __name__ == "__main__":
+    main()
--- a/converter/batch_convert.py
+++ b/converter/batch_convert.py
@ -0,0 +1,106 @@
+#!/usr/bin/env python3
+"""
+Batch converter — finds all PDFs in a directory and converts them to EPUB.
+
+Usage:
+    python3 converter/batch_convert.py /path/to/pdfs/ --output-dir output/
+    python3 converter/batch_convert.py . --dpi 150  # lower DPI for smaller files
+"""
+
+import argparse
+import os
+from pathlib import Path
+
+from pdf_to_epub import convert_pdf_to_epub
+
+
+# Known book metadata — add entries as we convert more flipbooks
+BOOK_METADATA = {
+    "ExploringMycoFiBook.pdf": {
+        "title": "Exploring MycoFi: Mycelial Design Patterns for Web3 and Beyond",
+        "author": "Jeff Emmett & Contributors",
+        "description": (
+            "A Mycopunk publication from the Greenpill Network exploring "
+            "how mycelial networks can inform the design of decentralized "
+            "economic systems, DAOs, and Web3 infrastructure."
+        ),
+    },
+    "psilocybernetics.pdf": {
+        "title": "Psilocybernetics",
+        "author": "Jeff Emmett",
+        "description": "An exploration of psychedelic-informed cybernetics.",
+    },
+}
+
+
+def find_pdfs(directory: str) -> list[Path]:
+    """Find all PDF files in a directory (non-recursive)."""
+    return sorted(Path(directory).glob("*.pdf"))
+
+
+def batch_convert(
+    input_dir: str,
+    output_dir: str = "output",
+    dpi: int = 200,
+):
+    """Convert all PDFs found in input_dir to EPUBs in output_dir."""
+    os.makedirs(output_dir, exist_ok=True)
+    pdfs = find_pdfs(input_dir)
+
+    if not pdfs:
+        print(f"No PDFs found in {input_dir}")
+        return
+
+    print(f"Found {len(pdfs)} PDF(s) to convert:\n")
+    for pdf in pdfs:
+        print(f"  - {pdf.name}")
+    print()
+
+    results = []
+    for pdf in pdfs:
+        meta = BOOK_METADATA.get(pdf.name, {})
+        output_path = os.path.join(output_dir, pdf.stem + ".epub")
+
+        print(f"{'=' * 60}")
+        print(f"Converting: {pdf.name}")
+        print(f"{'=' * 60}\n")
+
+        try:
+            result = convert_pdf_to_epub(
+                pdf_path=str(pdf),
+                output_path=output_path,
+                title=meta.get("title"),
+                author=meta.get("author"),
+                dpi=dpi,
+                description=meta.get("description", ""),
+            )
+            results.append((pdf.name, result, "OK"))
+        except Exception as e:
+            print(f"ERROR converting {pdf.name}: {e}")
+            results.append((pdf.name, None, str(e)))
+        print()
+
+    # Summary
+    print(f"\n{'=' * 60}")
+    print("BATCH CONVERSION SUMMARY")
+    print(f"{'=' * 60}")
+    for name, path, status in results:
+        if status == "OK":
+            size = os.path.getsize(path) / (1024 * 1024)
+            print(f"  OK  {name} → {path} ({size:.1f} MB)")
+        else:
+            print(f"  ERR {name}: {status}")
+
+
+def main():
+    parser = argparse.ArgumentParser(description="Batch convert PDFs to fixed-layout EPUB")
+    parser.add_argument("input_dir", help="Directory containing PDF files")
+    parser.add_argument("--output-dir", "-o", default="output", help="Output directory (default: output/)")
+    parser.add_argument("--dpi", type=int, default=200, help="Render DPI (default: 200)")
+
+    args = parser.parse_args()
+    batch_convert(args.input_dir, args.output_dir, args.dpi)
+
+
+if __name__ == "__main__":
+    main()
--- a/converter/pdf_to_epub.py
+++ b/converter/pdf_to_epub.py
@ -0,0 +1,275 @@
+#!/usr/bin/env python3
+"""
+PDF to Fixed-Layout EPUB Converter
+
+Converts visually-rich PDFs (like designed books from InDesign) into
+fixed-layout EPUB3 files suitable for Kindle and ebook readers.
+
+Each PDF page becomes a full-page image in the EPUB, preserving the
+original design, typography, and layout.
+
+Usage:
+    python3 converter/pdf_to_epub.py input.pdf [--output output.epub] [--dpi 200]
+    python3 converter/pdf_to_epub.py input.pdf --title "My Book" --author "Author Name"
+"""
+
+import argparse
+import io
+import os
+import sys
+import uuid
+from pathlib import Path
+
+import fitz  # PyMuPDF
+from ebooklib import epub
+
+
+def extract_pages_as_images(pdf_path: str, dpi: int = 200) -> list[tuple[bytes, int, int]]:
+    """Extract each PDF page as a JPEG image.
+
+    Returns list of (image_bytes, width_px, height_px) tuples.
+    """
+    doc = fitz.open(pdf_path)
+    pages = []
+    zoom = dpi / 72  # PDF is 72 DPI by default
+    matrix = fitz.Matrix(zoom, zoom)
+
+    for i, page in enumerate(doc):
+        pix = page.get_pixmap(matrix=matrix)
+        img_bytes = pix.tobytes("jpeg", jpg_quality=92)
+        pages.append((img_bytes, pix.width, pix.height))
+        print(f"  Extracted page {i + 1}/{doc.page_count} ({pix.width}x{pix.height})")
+
+    doc.close()
+    return pages
+
+
+def extract_metadata(pdf_path: str) -> dict:
+    """Pull whatever metadata we can from the PDF."""
+    doc = fitz.open(pdf_path)
+    meta = doc.metadata
+    doc.close()
+    return {
+        "title": meta.get("title", ""),
+        "author": meta.get("author", ""),
+        "subject": meta.get("subject", ""),
+    }
+
+
+def build_fixed_layout_epub(
+    pages: list[tuple[bytes, int, int]],
+    title: str,
+    author: str,
+    output_path: str,
+    language: str = "en",
+    cover_page: int = 0,
+    description: str = "",
+) -> str:
+    """Build a fixed-layout EPUB3 from page images.
+
+    Args:
+        pages: List of (jpeg_bytes, width, height) per page
+        title: Book title
+        author: Book author
+        output_path: Where to save the .epub
+        language: Language code
+        cover_page: Which page index to use as cover (default 0)
+        description: Book description for metadata
+
+    Returns:
+        Path to the created EPUB file
+    """
+    book = epub.EpubBook()
+    book_id = str(uuid.uuid4())
+
+    # -- Metadata --
+    book.set_identifier(book_id)
+    book.set_title(title)
+    book.set_language(language)
+    book.add_author(author)
+    if description:
+        book.add_metadata("DC", "description", description)
+
+    # Fixed-layout metadata (EPUB3 rendition properties)
+    book.add_metadata(
+        None,
+        "meta",
+        "pre-paginated",
+        {"property": "rendition:layout"},
+    )
+    book.add_metadata(
+        None,
+        "meta",
+        "auto",
+        {"property": "rendition:orientation"},
+    )
+    book.add_metadata(
+        None,
+        "meta",
+        "none",
+        {"property": "rendition:spread"},
+    )
+
+    # Use first page dimensions as viewport default
+    _, vp_w, vp_h = pages[0] if pages else (None, 1024, 1366)
+
+    # -- Add cover image (metadata only, actual image added in page loop) --
+    cover_bytes, _, _ = pages[cover_page]
+    book.set_cover("images/cover.jpg", cover_bytes, create_page=False)
+
+    # -- CSS for fixed-layout pages --
+    page_css = epub.EpubItem(
+        uid="page_css",
+        file_name="style/page.css",
+        media_type="text/css",
+        content=b"""
+body {
+    margin: 0;
+    padding: 0;
+    overflow: hidden;
+}
+.page-image {
+    width: 100%;
+    height: 100%;
+    object-fit: contain;
+    display: block;
+}
+""",
+    )
+    book.add_item(page_css)
+
+    # -- Build page chapters --
+    chapters = []
+    for i, (img_bytes, w, h) in enumerate(pages):
+        # Add image
+        img_item = epub.EpubImage()
+        img_item.file_name = f"images/page_{i:04d}.jpg"
+        img_item.media_type = "image/jpeg"
+        img_item.content = img_bytes
+        book.add_item(img_item)
+
+        # Create HTML page with viewport matching image dimensions
+        chapter = epub.EpubHtml(
+            title=f"Page {i + 1}",
+            file_name=f"pages/page_{i:04d}.xhtml",
+            lang=language,
+        )
+        chapter.content = f"""<html xmlns="http://www.w3.org/1999/xhtml" xmlns:epub="http://www.idpf.org/2007/ops">
+<head>
+    <meta charset="utf-8"/>
+    <meta name="viewport" content="width={w}, height={h}"/>
+    <title>Page {i + 1}</title>
+    <link rel="stylesheet" type="text/css" href="../style/page.css"/>
+</head>
+<body>
+    <div><img class="page-image" src="../images/page_{i:04d}.jpg" alt="Page {i + 1}"/></div>
+</body>
+</html>""".encode("utf-8")
+        chapter.add_item(page_css)
+        book.add_item(chapter)
+        chapters.append(chapter)
+
+    # -- Spine & TOC --
+    book.spine = chapters
+    # Simple TOC — just first, middle, last for now
+    # Can be enhanced with actual chapter markers
+    book.toc = [chapters[0]]
+    if len(chapters) > 2:
+        book.toc.append(chapters[len(chapters) // 2])
+    if len(chapters) > 1:
+        book.toc.append(chapters[-1])
+
+    # Required EPUB3 navigation
+    book.add_item(epub.EpubNcx())
+    book.add_item(epub.EpubNav())
+
+    # -- Write --
+    epub.write_epub(output_path, book, {})
+    return output_path
+
+
+def convert_pdf_to_epub(
+    pdf_path: str,
+    output_path: str | None = None,
+    title: str | None = None,
+    author: str | None = None,
+    dpi: int = 200,
+    description: str = "",
+) -> str:
+    """Main conversion function. Takes a PDF, produces a fixed-layout EPUB.
+
+    Args:
+        pdf_path: Path to input PDF
+        output_path: Path for output EPUB (default: same name as PDF with .epub)
+        title: Override title (otherwise extracted from PDF metadata)
+        author: Override author
+        dpi: Resolution for page rendering (higher = sharper but larger file)
+        description: Book description
+
+    Returns:
+        Path to created EPUB
+    """
+    pdf_path = str(Path(pdf_path).resolve())
+    if not os.path.exists(pdf_path):
+        print(f"Error: PDF not found: {pdf_path}")
+        sys.exit(1)
+
+    # Default output path
+    if output_path is None:
+        output_path = str(Path(pdf_path).with_suffix(".epub"))
+
+    # Extract metadata from PDF as fallback
+    meta = extract_metadata(pdf_path)
+    title = title or meta["title"] or Path(pdf_path).stem
+    author = author or meta["author"] or "Unknown"
+
+    print(f"Converting: {pdf_path}")
+    print(f"  Title:  {title}")
+    print(f"  Author: {author}")
+    print(f"  DPI:    {dpi}")
+    print()
+
+    # Extract pages
+    print("Extracting pages...")
+    pages = extract_pages_as_images(pdf_path, dpi=dpi)
+    print(f"\n{len(pages)} pages extracted.")
+
+    # Build EPUB
+    print(f"\nBuilding EPUB: {output_path}")
+    result = build_fixed_layout_epub(
+        pages=pages,
+        title=title,
+        author=author,
+        output_path=output_path,
+        description=description,
+    )
+
+    file_size = os.path.getsize(result) / (1024 * 1024)
+    print(f"\nDone! {result} ({file_size:.1f} MB)")
+    return result
+
+
+def main():
+    parser = argparse.ArgumentParser(
+        description="Convert PDF to fixed-layout EPUB for Kindle/ebook readers"
+    )
+    parser.add_argument("pdf", help="Path to input PDF file")
+    parser.add_argument("--output", "-o", help="Output EPUB path (default: same name as PDF)")
+    parser.add_argument("--title", "-t", help="Book title (overrides PDF metadata)")
+    parser.add_argument("--author", "-a", help="Book author (overrides PDF metadata)")
+    parser.add_argument("--dpi", type=int, default=200, help="Render DPI (default: 200)")
+    parser.add_argument("--description", "-d", default="", help="Book description")
+
+    args = parser.parse_args()
+    convert_pdf_to_epub(
+        pdf_path=args.pdf,
+        output_path=args.output,
+        title=args.title,
+        author=args.author,
+        dpi=args.dpi,
+        description=args.description,
+    )
+
+
+if __name__ == "__main__":
+    main()
--- a/output/ExploringMycoFiBook.epub
+++ b/output/ExploringMycoFiBook.epub
--- a/output/Psilocybernetics.epub
+++ b/output/Psilocybernetics.epub
--- a/requirements.txt
+++ b/requirements.txt
@ -0,0 +1,3 @@
+PyMuPDF>=1.24.0
+ebooklib>=0.18
+Pillow>=10.0
--- a/Pages_Full_Draft.idml
+++ b/Pages_Full_Draft.idml
--- a/toc_mycofi.json
+++ b/toc_mycofi.json
@ -0,0 +1,27 @@
+[
+    {"title": "Cover", "page": 1},
+    {"title": "Synopsis", "page": 2},
+    {"title": "Title Page", "page": 6},
+    {"title": "Credits", "page": 7},
+    {"title": "Contents", "page": 10},
+    {"title": "Endorsements", "page": 11},
+    {"title": "A Note from the Creators", "page": 13},
+    {"title": "Foreword", "page": 17},
+    {"title": "Introduction: Uncovering Nature's Economic Blueprints", "page": 21},
+    {"title": "Design Pattern 1: Network Infrastructure", "page": 27},
+    {"title": "Imagining Fungal Futures: Mesh Stability", "page": 31},
+    {"title": "Design Pattern 2: Fractal Nature", "page": 35},
+    {"title": "Imagining Fungal Futures: Endosymbiotic Finance", "page": 41},
+    {"title": "Design Pattern 3: Emergent Coordination", "page": 45},
+    {"title": "Imagining Fungal Futures: Adaptive Myco-Organizations", "page": 49},
+    {"title": "Design Pattern 4: Dynamic Flow", "page": 53},
+    {"title": "Imagining Fungal Futures: Vote Streaming", "page": 57},
+    {"title": "Design Pattern 5: Mutual Reciprocity", "page": 61},
+    {"title": "Imagining Fungal Futures: Generosity Networks", "page": 65},
+    {"title": "Design Pattern 6: Polycentric Pluralism", "page": 69},
+    {"title": "Imagining Fungal Futures: Collective Flourishing", "page": 73},
+    {"title": "Join the Mycelial Revolution", "page": 75},
+    {"title": "Let's Get Rooted, Mycopunk", "page": 79},
+    {"title": "Gratitude & Acknowledgments", "page": 82},
+    {"title": "Appendix: References", "page": 83}
+]