383 lines
11 KiB
Rust
383 lines
11 KiB
Rust
//! Exporting into PDF documents.
|
|
|
|
mod color;
|
|
mod extg;
|
|
mod font;
|
|
mod gradient;
|
|
mod image;
|
|
mod outline;
|
|
mod page;
|
|
|
|
use std::cmp::Eq;
|
|
use std::collections::{BTreeMap, HashMap};
|
|
use std::hash::Hash;
|
|
|
|
use base64::Engine;
|
|
use ecow::{eco_format, EcoString};
|
|
use pdf_writer::types::Direction;
|
|
use pdf_writer::{Finish, Name, Pdf, Ref, TextStr};
|
|
use typst::doc::{Document, Lang};
|
|
use typst::eval::Datetime;
|
|
use typst::font::Font;
|
|
use typst::geom::{Abs, Dir, Em};
|
|
use typst::image::Image;
|
|
use typst::model::Introspector;
|
|
use typst::util::Deferred;
|
|
use xmp_writer::{DateTime, LangId, RenditionClass, Timezone, XmpWriter};
|
|
|
|
use crate::color::ColorSpaces;
|
|
use crate::extg::ExtGState;
|
|
use crate::gradient::PdfGradient;
|
|
use crate::image::EncodedImage;
|
|
use crate::page::Page;
|
|
|
|
/// Export a document into a PDF file.
|
|
///
|
|
/// Returns the raw bytes making up the PDF file.
|
|
///
|
|
/// The `ident` parameter shall be a string that uniquely and stably identifies
|
|
/// the document. It should not change between compilations of the same
|
|
/// document. Its hash will be used to create a PDF document identifier (the
|
|
/// identifier itself is not leaked). If `ident` is `None`, a hash of the
|
|
/// document is used instead (which means that it _will_ change across
|
|
/// compilations).
|
|
///
|
|
/// The `timestamp`, if given, is expected to be the creation date of the
|
|
/// document as a UTC datetime. It will only be used if `set document(date: ..)`
|
|
/// is `auto`.
|
|
#[tracing::instrument(skip_all)]
|
|
pub fn pdf(
|
|
document: &Document,
|
|
ident: Option<&str>,
|
|
timestamp: Option<Datetime>,
|
|
) -> Vec<u8> {
|
|
let mut ctx = PdfContext::new(document);
|
|
page::construct_pages(&mut ctx, &document.pages);
|
|
font::write_fonts(&mut ctx);
|
|
image::write_images(&mut ctx);
|
|
gradient::write_gradients(&mut ctx);
|
|
extg::write_external_graphics_states(&mut ctx);
|
|
page::write_page_tree(&mut ctx);
|
|
write_catalog(&mut ctx, ident, timestamp);
|
|
ctx.pdf.finish()
|
|
}
|
|
|
|
/// Context for exporting a whole PDF document.
|
|
struct PdfContext<'a> {
|
|
/// The document that we're currently exporting.
|
|
document: &'a Document,
|
|
/// An introspector for the document, used to resolve locations links and
|
|
/// the document outline.
|
|
introspector: Introspector,
|
|
|
|
/// The writer we are writing the PDF into.
|
|
pdf: Pdf,
|
|
/// Content of exported pages.
|
|
pages: Vec<Page>,
|
|
/// For each font a mapping from used glyphs to their text representation.
|
|
/// May contain multiple chars in case of ligatures or similar things. The
|
|
/// same glyph can have a different text representation within one document,
|
|
/// then we just save the first one. The resulting strings are used for the
|
|
/// PDF's /ToUnicode map for glyphs that don't have an entry in the font's
|
|
/// cmap. This is important for copy-paste and searching.
|
|
glyph_sets: HashMap<Font, BTreeMap<u16, EcoString>>,
|
|
/// The number of glyphs for all referenced languages in the document.
|
|
/// We keep track of this to determine the main document language.
|
|
languages: HashMap<Lang, usize>,
|
|
|
|
/// Allocator for indirect reference IDs.
|
|
alloc: Ref,
|
|
/// The ID of the page tree.
|
|
page_tree_ref: Ref,
|
|
/// The IDs of written pages.
|
|
page_refs: Vec<Ref>,
|
|
/// The IDs of written fonts.
|
|
font_refs: Vec<Ref>,
|
|
/// The IDs of written images.
|
|
image_refs: Vec<Ref>,
|
|
/// The IDs of written gradients.
|
|
gradient_refs: Vec<Ref>,
|
|
/// The IDs of written external graphics states.
|
|
ext_gs_refs: Vec<Ref>,
|
|
/// Handles color space writing.
|
|
colors: ColorSpaces,
|
|
|
|
/// Deduplicates fonts used across the document.
|
|
font_map: Remapper<Font>,
|
|
/// Deduplicates images used across the document.
|
|
image_map: Remapper<Image>,
|
|
/// Handles to deferred image conversions.
|
|
image_deferred_map: HashMap<usize, Deferred<EncodedImage>>,
|
|
/// Deduplicates gradients used across the document.
|
|
gradient_map: Remapper<PdfGradient>,
|
|
/// Deduplicates external graphics states used across the document.
|
|
extg_map: Remapper<ExtGState>,
|
|
}
|
|
|
|
impl<'a> PdfContext<'a> {
|
|
fn new(document: &'a Document) -> Self {
|
|
let mut alloc = Ref::new(1);
|
|
let page_tree_ref = alloc.bump();
|
|
Self {
|
|
document,
|
|
introspector: Introspector::new(&document.pages),
|
|
pdf: Pdf::new(),
|
|
pages: vec![],
|
|
glyph_sets: HashMap::new(),
|
|
languages: HashMap::new(),
|
|
alloc,
|
|
page_tree_ref,
|
|
page_refs: vec![],
|
|
font_refs: vec![],
|
|
image_refs: vec![],
|
|
gradient_refs: vec![],
|
|
ext_gs_refs: vec![],
|
|
colors: ColorSpaces::default(),
|
|
font_map: Remapper::new(),
|
|
image_map: Remapper::new(),
|
|
image_deferred_map: HashMap::default(),
|
|
gradient_map: Remapper::new(),
|
|
extg_map: Remapper::new(),
|
|
}
|
|
}
|
|
}
|
|
|
|
/// Write the document catalog.
|
|
#[tracing::instrument(skip_all)]
|
|
fn write_catalog(ctx: &mut PdfContext, ident: Option<&str>, timestamp: Option<Datetime>) {
|
|
let lang = ctx
|
|
.languages
|
|
.iter()
|
|
.max_by_key(|(&lang, &count)| (count, lang))
|
|
.map(|(&k, _)| k);
|
|
|
|
let dir = if lang.map(Lang::dir) == Some(Dir::RTL) {
|
|
Direction::R2L
|
|
} else {
|
|
Direction::L2R
|
|
};
|
|
|
|
// Write the outline tree.
|
|
let outline_root_id = outline::write_outline(ctx);
|
|
|
|
// Write the page labels.
|
|
let page_labels = page::write_page_labels(ctx);
|
|
|
|
// Write the document information.
|
|
let mut info = ctx.pdf.document_info(ctx.alloc.bump());
|
|
let mut xmp = XmpWriter::new();
|
|
if let Some(title) = &ctx.document.title {
|
|
info.title(TextStr(title));
|
|
xmp.title([(None, title.as_str())]);
|
|
}
|
|
|
|
let authors = &ctx.document.author;
|
|
if !authors.is_empty() {
|
|
info.author(TextStr(&authors.join(", ")));
|
|
xmp.creator(authors.iter().map(|s| s.as_str()));
|
|
}
|
|
|
|
let creator = eco_format!("Typst {}", env!("CARGO_PKG_VERSION"));
|
|
info.creator(TextStr(&creator));
|
|
xmp.creator_tool(&creator);
|
|
|
|
let keywords = &ctx.document.keywords;
|
|
if !keywords.is_empty() {
|
|
let joined = keywords.join(", ");
|
|
info.keywords(TextStr(&joined));
|
|
xmp.pdf_keywords(&joined);
|
|
}
|
|
|
|
if let Some(date) = ctx.document.date.unwrap_or(timestamp) {
|
|
let tz = ctx.document.date.is_auto();
|
|
if let Some(pdf_date) = pdf_date(date, tz) {
|
|
info.creation_date(pdf_date);
|
|
info.modified_date(pdf_date);
|
|
}
|
|
if let Some(xmp_date) = xmp_date(date, tz) {
|
|
xmp.create_date(xmp_date);
|
|
xmp.modify_date(xmp_date);
|
|
}
|
|
}
|
|
|
|
info.finish();
|
|
xmp.num_pages(ctx.document.pages.len() as u32);
|
|
xmp.format("application/pdf");
|
|
xmp.language(ctx.languages.keys().map(|lang| LangId(lang.as_str())));
|
|
|
|
// A unique ID for this instance of the document. Changes if anything
|
|
// changes in the frames.
|
|
let instance_id = hash_base64(&ctx.pdf.as_bytes());
|
|
|
|
if let Some(ident) = ident {
|
|
// A unique ID for the document that stays stable across compilations.
|
|
let doc_id = hash_base64(&("PDF-1.7", ident));
|
|
xmp.document_id(&doc_id);
|
|
xmp.instance_id(&instance_id);
|
|
ctx.pdf
|
|
.set_file_id((doc_id.clone().into_bytes(), instance_id.into_bytes()));
|
|
} else {
|
|
// This is not spec-compliant, but some PDF readers really want an ID.
|
|
let bytes = instance_id.into_bytes();
|
|
ctx.pdf.set_file_id((bytes.clone(), bytes));
|
|
}
|
|
|
|
xmp.rendition_class(RenditionClass::Proof);
|
|
xmp.pdf_version("1.7");
|
|
|
|
let xmp_buf = xmp.finish(None);
|
|
let meta_ref = ctx.alloc.bump();
|
|
ctx.pdf
|
|
.stream(meta_ref, xmp_buf.as_bytes())
|
|
.pair(Name(b"Type"), Name(b"Metadata"))
|
|
.pair(Name(b"Subtype"), Name(b"XML"));
|
|
|
|
// Write the document catalog.
|
|
let mut catalog = ctx.pdf.catalog(ctx.alloc.bump());
|
|
catalog.pages(ctx.page_tree_ref);
|
|
catalog.viewer_preferences().direction(dir);
|
|
catalog.metadata(meta_ref);
|
|
|
|
// Insert the page labels.
|
|
if !page_labels.is_empty() {
|
|
let mut num_tree = catalog.page_labels();
|
|
let mut entries = num_tree.nums();
|
|
for (n, r) in &page_labels {
|
|
entries.insert(n.get() as i32 - 1, *r);
|
|
}
|
|
}
|
|
|
|
if let Some(outline_root_id) = outline_root_id {
|
|
catalog.outlines(outline_root_id);
|
|
}
|
|
|
|
if let Some(lang) = lang {
|
|
catalog.lang(TextStr(lang.as_str()));
|
|
}
|
|
}
|
|
|
|
/// Compress data with the DEFLATE algorithm.
|
|
#[tracing::instrument(skip_all)]
|
|
fn deflate(data: &[u8]) -> Vec<u8> {
|
|
const COMPRESSION_LEVEL: u8 = 6;
|
|
miniz_oxide::deflate::compress_to_vec_zlib(data, COMPRESSION_LEVEL)
|
|
}
|
|
|
|
/// Create a base64-encoded hash of the value.
|
|
fn hash_base64<T: Hash>(value: &T) -> String {
|
|
base64::engine::general_purpose::STANDARD
|
|
.encode(typst::util::hash128(value).to_be_bytes())
|
|
}
|
|
|
|
/// Converts a datetime to a pdf-writer date.
|
|
fn pdf_date(datetime: Datetime, tz: bool) -> Option<pdf_writer::Date> {
|
|
let year = datetime.year().filter(|&y| y >= 0)? as u16;
|
|
|
|
let mut pdf_date = pdf_writer::Date::new(year);
|
|
|
|
if let Some(month) = datetime.month() {
|
|
pdf_date = pdf_date.month(month);
|
|
}
|
|
|
|
if let Some(day) = datetime.day() {
|
|
pdf_date = pdf_date.day(day);
|
|
}
|
|
|
|
if let Some(h) = datetime.hour() {
|
|
pdf_date = pdf_date.hour(h);
|
|
}
|
|
|
|
if let Some(m) = datetime.minute() {
|
|
pdf_date = pdf_date.minute(m);
|
|
}
|
|
|
|
if let Some(s) = datetime.second() {
|
|
pdf_date = pdf_date.second(s);
|
|
}
|
|
|
|
if tz {
|
|
pdf_date = pdf_date.utc_offset_hour(0).utc_offset_minute(0);
|
|
}
|
|
|
|
Some(pdf_date)
|
|
}
|
|
|
|
/// Converts a datetime to an xmp-writer datetime.
|
|
fn xmp_date(datetime: Datetime, tz: bool) -> Option<xmp_writer::DateTime> {
|
|
let year = datetime.year().filter(|&y| y >= 0)? as u16;
|
|
Some(DateTime {
|
|
year,
|
|
month: datetime.month(),
|
|
day: datetime.day(),
|
|
hour: datetime.hour(),
|
|
minute: datetime.minute(),
|
|
second: datetime.second(),
|
|
timezone: if tz { Some(Timezone::Utc) } else { None },
|
|
})
|
|
}
|
|
|
|
/// Assigns new, consecutive PDF-internal indices to items.
|
|
struct Remapper<T> {
|
|
/// Forwards from the items to the pdf indices.
|
|
to_pdf: HashMap<T, usize>,
|
|
/// Backwards from the pdf indices to the items.
|
|
to_items: Vec<T>,
|
|
}
|
|
|
|
impl<T> Remapper<T>
|
|
where
|
|
T: Eq + Hash + Clone,
|
|
{
|
|
fn new() -> Self {
|
|
Self { to_pdf: HashMap::new(), to_items: vec![] }
|
|
}
|
|
|
|
fn insert(&mut self, item: T) -> usize {
|
|
let to_layout = &mut self.to_items;
|
|
*self.to_pdf.entry(item.clone()).or_insert_with(|| {
|
|
let pdf_index = to_layout.len();
|
|
to_layout.push(item);
|
|
pdf_index
|
|
})
|
|
}
|
|
|
|
fn map(&self, item: &T) -> usize {
|
|
self.to_pdf[item]
|
|
}
|
|
|
|
fn pdf_indices<'a>(
|
|
&'a self,
|
|
refs: &'a [Ref],
|
|
) -> impl Iterator<Item = (Ref, usize)> + 'a {
|
|
refs.iter().copied().zip(0..self.to_pdf.len())
|
|
}
|
|
|
|
fn items(&self) -> impl Iterator<Item = &T> + '_ {
|
|
self.to_items.iter()
|
|
}
|
|
}
|
|
|
|
/// Additional methods for [`Abs`].
|
|
trait AbsExt {
|
|
/// Convert an to a number of points.
|
|
fn to_f32(self) -> f32;
|
|
}
|
|
|
|
impl AbsExt for Abs {
|
|
fn to_f32(self) -> f32 {
|
|
self.to_pt() as f32
|
|
}
|
|
}
|
|
|
|
/// Additional methods for [`Em`].
|
|
trait EmExt {
|
|
/// Convert an em length to a number of PDF font units.
|
|
fn to_font_units(self) -> f32;
|
|
}
|
|
|
|
impl EmExt for Em {
|
|
fn to_font_units(self) -> f32 {
|
|
1000.0 * self.get() as f32
|
|
}
|
|
}
|