diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..00f2d38 --- /dev/null +++ b/.gitignore @@ -0,0 +1,3 @@ +__pycache__/ +*.pyc +.venv/ diff --git a/converter/add_toc.py b/converter/add_toc.py new file mode 100644 index 0000000..723c668 --- /dev/null +++ b/converter/add_toc.py @@ -0,0 +1,94 @@ +#!/usr/bin/env python3 +""" +Add or update Table of Contents for an EPUB built by pdf_to_epub. + +Since the PDF has no embedded TOC, chapter markers are defined manually +in a JSON config file. This script patches an existing EPUB's navigation. + +Usage: + python3 converter/add_toc.py output/ExploringMycoFiBook.epub --toc toc_mycofi.json + +TOC JSON format: +[ + {"title": "Cover", "page": 1}, + {"title": "Introduction", "page": 5}, + {"title": "Chapter 1: Mycelial Networks", "page": 12}, + ... +] +""" + +import argparse +import json +import os +import sys +import zipfile +import tempfile +import shutil +from pathlib import Path + +from ebooklib import epub + + +def patch_toc(epub_path: str, toc_entries: list[dict]) -> str: + """Patch the TOC of an existing EPUB with manual chapter markers. + + Args: + epub_path: Path to the EPUB file + toc_entries: List of {"title": str, "page": int} dicts (1-indexed pages) + + Returns: + Path to the patched EPUB + """ + book = epub.read_epub(epub_path) + + # Find all page items sorted by filename + pages = sorted( + [item for item in book.get_items() if item.file_name.startswith("pages/")], + key=lambda x: x.file_name, + ) + + if not pages: + print("Error: No page items found in EPUB") + sys.exit(1) + + # Build new TOC from entries + new_toc = [] + for entry in toc_entries: + page_idx = entry["page"] - 1 # Convert 1-indexed to 0-indexed + if 0 <= page_idx < len(pages): + page_item = pages[page_idx] + # Create a link using the page's file_name + new_toc.append(epub.Link(page_item.file_name, entry["title"], f"toc_{page_idx}")) + else: + print(f"Warning: Page {entry['page']} out of range (1-{len(pages)}), skipping: {entry['title']}") + + book.toc = new_toc + + # Re-add navigation items + for item in list(book.get_items()): + if isinstance(item, (epub.EpubNcx, epub.EpubNav)): + book.items.remove(item) + book.add_item(epub.EpubNcx()) + book.add_item(epub.EpubNav()) + + # Write back + epub.write_epub(epub_path, book, {}) + print(f"Updated TOC with {len(new_toc)} entries in {epub_path}") + return epub_path + + +def main(): + parser = argparse.ArgumentParser(description="Add/update EPUB table of contents") + parser.add_argument("epub", help="Path to EPUB file") + parser.add_argument("--toc", required=True, help="Path to TOC JSON file") + + args = parser.parse_args() + + with open(args.toc) as f: + toc_entries = json.load(f) + + patch_toc(args.epub, toc_entries) + + +if __name__ == "__main__": + main() diff --git a/converter/batch_convert.py b/converter/batch_convert.py new file mode 100644 index 0000000..912a3d1 --- /dev/null +++ b/converter/batch_convert.py @@ -0,0 +1,106 @@ +#!/usr/bin/env python3 +""" +Batch converter — finds all PDFs in a directory and converts them to EPUB. + +Usage: + python3 converter/batch_convert.py /path/to/pdfs/ --output-dir output/ + python3 converter/batch_convert.py . --dpi 150 # lower DPI for smaller files +""" + +import argparse +import os +from pathlib import Path + +from pdf_to_epub import convert_pdf_to_epub + + +# Known book metadata — add entries as we convert more flipbooks +BOOK_METADATA = { + "ExploringMycoFiBook.pdf": { + "title": "Exploring MycoFi: Mycelial Design Patterns for Web3 and Beyond", + "author": "Jeff Emmett & Contributors", + "description": ( + "A Mycopunk publication from the Greenpill Network exploring " + "how mycelial networks can inform the design of decentralized " + "economic systems, DAOs, and Web3 infrastructure." + ), + }, + "psilocybernetics.pdf": { + "title": "Psilocybernetics", + "author": "Jeff Emmett", + "description": "An exploration of psychedelic-informed cybernetics.", + }, +} + + +def find_pdfs(directory: str) -> list[Path]: + """Find all PDF files in a directory (non-recursive).""" + return sorted(Path(directory).glob("*.pdf")) + + +def batch_convert( + input_dir: str, + output_dir: str = "output", + dpi: int = 200, +): + """Convert all PDFs found in input_dir to EPUBs in output_dir.""" + os.makedirs(output_dir, exist_ok=True) + pdfs = find_pdfs(input_dir) + + if not pdfs: + print(f"No PDFs found in {input_dir}") + return + + print(f"Found {len(pdfs)} PDF(s) to convert:\n") + for pdf in pdfs: + print(f" - {pdf.name}") + print() + + results = [] + for pdf in pdfs: + meta = BOOK_METADATA.get(pdf.name, {}) + output_path = os.path.join(output_dir, pdf.stem + ".epub") + + print(f"{'=' * 60}") + print(f"Converting: {pdf.name}") + print(f"{'=' * 60}\n") + + try: + result = convert_pdf_to_epub( + pdf_path=str(pdf), + output_path=output_path, + title=meta.get("title"), + author=meta.get("author"), + dpi=dpi, + description=meta.get("description", ""), + ) + results.append((pdf.name, result, "OK")) + except Exception as e: + print(f"ERROR converting {pdf.name}: {e}") + results.append((pdf.name, None, str(e))) + print() + + # Summary + print(f"\n{'=' * 60}") + print("BATCH CONVERSION SUMMARY") + print(f"{'=' * 60}") + for name, path, status in results: + if status == "OK": + size = os.path.getsize(path) / (1024 * 1024) + print(f" OK {name} → {path} ({size:.1f} MB)") + else: + print(f" ERR {name}: {status}") + + +def main(): + parser = argparse.ArgumentParser(description="Batch convert PDFs to fixed-layout EPUB") + parser.add_argument("input_dir", help="Directory containing PDF files") + parser.add_argument("--output-dir", "-o", default="output", help="Output directory (default: output/)") + parser.add_argument("--dpi", type=int, default=200, help="Render DPI (default: 200)") + + args = parser.parse_args() + batch_convert(args.input_dir, args.output_dir, args.dpi) + + +if __name__ == "__main__": + main() diff --git a/converter/pdf_to_epub.py b/converter/pdf_to_epub.py new file mode 100644 index 0000000..a0a0a57 --- /dev/null +++ b/converter/pdf_to_epub.py @@ -0,0 +1,275 @@ +#!/usr/bin/env python3 +""" +PDF to Fixed-Layout EPUB Converter + +Converts visually-rich PDFs (like designed books from InDesign) into +fixed-layout EPUB3 files suitable for Kindle and ebook readers. + +Each PDF page becomes a full-page image in the EPUB, preserving the +original design, typography, and layout. + +Usage: + python3 converter/pdf_to_epub.py input.pdf [--output output.epub] [--dpi 200] + python3 converter/pdf_to_epub.py input.pdf --title "My Book" --author "Author Name" +""" + +import argparse +import io +import os +import sys +import uuid +from pathlib import Path + +import fitz # PyMuPDF +from ebooklib import epub + + +def extract_pages_as_images(pdf_path: str, dpi: int = 200) -> list[tuple[bytes, int, int]]: + """Extract each PDF page as a JPEG image. + + Returns list of (image_bytes, width_px, height_px) tuples. + """ + doc = fitz.open(pdf_path) + pages = [] + zoom = dpi / 72 # PDF is 72 DPI by default + matrix = fitz.Matrix(zoom, zoom) + + for i, page in enumerate(doc): + pix = page.get_pixmap(matrix=matrix) + img_bytes = pix.tobytes("jpeg", jpg_quality=92) + pages.append((img_bytes, pix.width, pix.height)) + print(f" Extracted page {i + 1}/{doc.page_count} ({pix.width}x{pix.height})") + + doc.close() + return pages + + +def extract_metadata(pdf_path: str) -> dict: + """Pull whatever metadata we can from the PDF.""" + doc = fitz.open(pdf_path) + meta = doc.metadata + doc.close() + return { + "title": meta.get("title", ""), + "author": meta.get("author", ""), + "subject": meta.get("subject", ""), + } + + +def build_fixed_layout_epub( + pages: list[tuple[bytes, int, int]], + title: str, + author: str, + output_path: str, + language: str = "en", + cover_page: int = 0, + description: str = "", +) -> str: + """Build a fixed-layout EPUB3 from page images. + + Args: + pages: List of (jpeg_bytes, width, height) per page + title: Book title + author: Book author + output_path: Where to save the .epub + language: Language code + cover_page: Which page index to use as cover (default 0) + description: Book description for metadata + + Returns: + Path to the created EPUB file + """ + book = epub.EpubBook() + book_id = str(uuid.uuid4()) + + # -- Metadata -- + book.set_identifier(book_id) + book.set_title(title) + book.set_language(language) + book.add_author(author) + if description: + book.add_metadata("DC", "description", description) + + # Fixed-layout metadata (EPUB3 rendition properties) + book.add_metadata( + None, + "meta", + "pre-paginated", + {"property": "rendition:layout"}, + ) + book.add_metadata( + None, + "meta", + "auto", + {"property": "rendition:orientation"}, + ) + book.add_metadata( + None, + "meta", + "none", + {"property": "rendition:spread"}, + ) + + # Use first page dimensions as viewport default + _, vp_w, vp_h = pages[0] if pages else (None, 1024, 1366) + + # -- Add cover image (metadata only, actual image added in page loop) -- + cover_bytes, _, _ = pages[cover_page] + book.set_cover("images/cover.jpg", cover_bytes, create_page=False) + + # -- CSS for fixed-layout pages -- + page_css = epub.EpubItem( + uid="page_css", + file_name="style/page.css", + media_type="text/css", + content=b""" +body { + margin: 0; + padding: 0; + overflow: hidden; +} +.page-image { + width: 100%; + height: 100%; + object-fit: contain; + display: block; +} +""", + ) + book.add_item(page_css) + + # -- Build page chapters -- + chapters = [] + for i, (img_bytes, w, h) in enumerate(pages): + # Add image + img_item = epub.EpubImage() + img_item.file_name = f"images/page_{i:04d}.jpg" + img_item.media_type = "image/jpeg" + img_item.content = img_bytes + book.add_item(img_item) + + # Create HTML page with viewport matching image dimensions + chapter = epub.EpubHtml( + title=f"Page {i + 1}", + file_name=f"pages/page_{i:04d}.xhtml", + lang=language, + ) + chapter.content = f""" + + + + Page {i + 1} + + + +
Page {i + 1}
+ +""".encode("utf-8") + chapter.add_item(page_css) + book.add_item(chapter) + chapters.append(chapter) + + # -- Spine & TOC -- + book.spine = chapters + # Simple TOC — just first, middle, last for now + # Can be enhanced with actual chapter markers + book.toc = [chapters[0]] + if len(chapters) > 2: + book.toc.append(chapters[len(chapters) // 2]) + if len(chapters) > 1: + book.toc.append(chapters[-1]) + + # Required EPUB3 navigation + book.add_item(epub.EpubNcx()) + book.add_item(epub.EpubNav()) + + # -- Write -- + epub.write_epub(output_path, book, {}) + return output_path + + +def convert_pdf_to_epub( + pdf_path: str, + output_path: str | None = None, + title: str | None = None, + author: str | None = None, + dpi: int = 200, + description: str = "", +) -> str: + """Main conversion function. Takes a PDF, produces a fixed-layout EPUB. + + Args: + pdf_path: Path to input PDF + output_path: Path for output EPUB (default: same name as PDF with .epub) + title: Override title (otherwise extracted from PDF metadata) + author: Override author + dpi: Resolution for page rendering (higher = sharper but larger file) + description: Book description + + Returns: + Path to created EPUB + """ + pdf_path = str(Path(pdf_path).resolve()) + if not os.path.exists(pdf_path): + print(f"Error: PDF not found: {pdf_path}") + sys.exit(1) + + # Default output path + if output_path is None: + output_path = str(Path(pdf_path).with_suffix(".epub")) + + # Extract metadata from PDF as fallback + meta = extract_metadata(pdf_path) + title = title or meta["title"] or Path(pdf_path).stem + author = author or meta["author"] or "Unknown" + + print(f"Converting: {pdf_path}") + print(f" Title: {title}") + print(f" Author: {author}") + print(f" DPI: {dpi}") + print() + + # Extract pages + print("Extracting pages...") + pages = extract_pages_as_images(pdf_path, dpi=dpi) + print(f"\n{len(pages)} pages extracted.") + + # Build EPUB + print(f"\nBuilding EPUB: {output_path}") + result = build_fixed_layout_epub( + pages=pages, + title=title, + author=author, + output_path=output_path, + description=description, + ) + + file_size = os.path.getsize(result) / (1024 * 1024) + print(f"\nDone! {result} ({file_size:.1f} MB)") + return result + + +def main(): + parser = argparse.ArgumentParser( + description="Convert PDF to fixed-layout EPUB for Kindle/ebook readers" + ) + parser.add_argument("pdf", help="Path to input PDF file") + parser.add_argument("--output", "-o", help="Output EPUB path (default: same name as PDF)") + parser.add_argument("--title", "-t", help="Book title (overrides PDF metadata)") + parser.add_argument("--author", "-a", help="Book author (overrides PDF metadata)") + parser.add_argument("--dpi", type=int, default=200, help="Render DPI (default: 200)") + parser.add_argument("--description", "-d", default="", help="Book description") + + args = parser.parse_args() + convert_pdf_to_epub( + pdf_path=args.pdf, + output_path=args.output, + title=args.title, + author=args.author, + dpi=args.dpi, + description=args.description, + ) + + +if __name__ == "__main__": + main() diff --git a/output/ExploringMycoFiBook.epub b/output/ExploringMycoFiBook.epub new file mode 100644 index 0000000..0e6913a Binary files /dev/null and b/output/ExploringMycoFiBook.epub differ diff --git a/output/Psilocybernetics.epub b/output/Psilocybernetics.epub new file mode 100644 index 0000000..4cb112b Binary files /dev/null and b/output/Psilocybernetics.epub differ diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..5e18e1a --- /dev/null +++ b/requirements.txt @@ -0,0 +1,3 @@ +PyMuPDF>=1.24.0 +ebooklib>=0.18 +Pillow>=10.0 diff --git a/source/Mycofi Pages_Full_Draft.idml b/source/Mycofi Pages_Full_Draft.idml new file mode 100755 index 0000000..773b4b0 Binary files /dev/null and b/source/Mycofi Pages_Full_Draft.idml differ diff --git a/toc_mycofi.json b/toc_mycofi.json new file mode 100644 index 0000000..3694a14 --- /dev/null +++ b/toc_mycofi.json @@ -0,0 +1,27 @@ +[ + {"title": "Cover", "page": 1}, + {"title": "Synopsis", "page": 2}, + {"title": "Title Page", "page": 6}, + {"title": "Credits", "page": 7}, + {"title": "Contents", "page": 10}, + {"title": "Endorsements", "page": 11}, + {"title": "A Note from the Creators", "page": 13}, + {"title": "Foreword", "page": 17}, + {"title": "Introduction: Uncovering Nature's Economic Blueprints", "page": 21}, + {"title": "Design Pattern 1: Network Infrastructure", "page": 27}, + {"title": "Imagining Fungal Futures: Mesh Stability", "page": 31}, + {"title": "Design Pattern 2: Fractal Nature", "page": 35}, + {"title": "Imagining Fungal Futures: Endosymbiotic Finance", "page": 41}, + {"title": "Design Pattern 3: Emergent Coordination", "page": 45}, + {"title": "Imagining Fungal Futures: Adaptive Myco-Organizations", "page": 49}, + {"title": "Design Pattern 4: Dynamic Flow", "page": 53}, + {"title": "Imagining Fungal Futures: Vote Streaming", "page": 57}, + {"title": "Design Pattern 5: Mutual Reciprocity", "page": 61}, + {"title": "Imagining Fungal Futures: Generosity Networks", "page": 65}, + {"title": "Design Pattern 6: Polycentric Pluralism", "page": 69}, + {"title": "Imagining Fungal Futures: Collective Flourishing", "page": 73}, + {"title": "Join the Mycelial Revolution", "page": 75}, + {"title": "Let's Get Rooted, Mycopunk", "page": 79}, + {"title": "Gratitude & Acknowledgments", "page": 82}, + {"title": "Appendix: References", "page": 83} +]