Better handle large numbers (u128/i128) in deserialization (#6836)

2025-08-30 00:16:36 +08:00 · 2025-08-30 00:16:36 +08:00 · 5546ebe13e
parent b08593bf26
commit 5546ebe13e
7 changed files with 129 additions and 4 deletions
--- a/crates/typst-library/src/foundations/int.rs
+++ b/crates/typst-library/src/foundations/int.rs
@ -400,7 +400,17 @@ macro_rules! signed_int {
    ($($ty:ty)*) => {
        $(cast! {
            $ty,
-            self => Value::Int(self as _),
+            self => {
+                #[allow(irrefutable_let_patterns)]
+                if let Ok(int) = i64::try_from(self) {
+                    Value::Int(int)
+                } else {
+                    // Some numbers (i128) are too large to be cast as i64
+                    // In that case, we accept that there may be a
+                    // precision loss, and use a floating point number
+                    Value::Float(self as _)
+                }
+            },
            v: i64 => v.try_into().map_err(|_| "number too large")?,
        })*
    }
@ -415,7 +425,7 @@ macro_rules! unsigned_int {
                if let Ok(int) = i64::try_from(self) {
                    Value::Int(int)
                } else {
-                    // Some u64 are too large to be cast as i64
+                    // Some numbers (u64, u128) are too large to be cast as i64
                    // In that case, we accept that there may be a
                    // precision loss, and use a floating point number
                    Value::Float(self as _)
@ -432,8 +442,8 @@ macro_rules! unsigned_int {
    }
 }

-signed_int! { i8 i16 i32 isize }
-unsigned_int! { u8 u16 u32 u64 usize }
+signed_int! { i8 i16 i32 i128 isize }
+unsigned_int! { u8 u16 u32 u64 u128 usize }

 cast! {
    NonZeroI64,
--- a/crates/typst-library/src/foundations/value.rs
+++ b/crates/typst-library/src/foundations/value.rs
@ -404,6 +404,10 @@ impl<'de> Visitor<'de> for ValueVisitor {
        Ok(v.into_value())
    }

+    fn visit_i128<E: Error>(self, v: i128) -> Result<Self::Value, E> {
+        Ok(v.into_value())
+    }
+
    fn visit_u8<E: Error>(self, v: u8) -> Result<Self::Value, E> {
        Ok(v.into_value())
    }
@ -420,6 +424,10 @@ impl<'de> Visitor<'de> for ValueVisitor {
        Ok(v.into_value())
    }

+    fn visit_u128<E: Error>(self, v: u128) -> Result<Self::Value, E> {
+        Ok(v.into_value())
+    }
+
    fn visit_f32<E: Error>(self, v: f32) -> Result<Self::Value, E> {
        Ok((v as f64).into_value())
    }
--- a/tests/suite/loading/cbor.typ
+++ b/tests/suite/loading/cbor.typ
@ -2,3 +2,23 @@
 // Warning: 15-21 `cbor.decode` is deprecated, directly pass bytes to `cbor` instead
 // Hint: 15-21 it will be removed in Typst 0.15.0
 #let _ = cbor.decode
+
+--- cbor-decode-integer ---
+#import "edge-case.typ": cbor-integers
+#let data = cbor(cbor-integers)
+
+#for (name, result) in data.representable {
+  assert.eq(
+    type(result),
+    int,
+    message: "failed to decode " + name,
+  )
+}
+
+#for (name, result) in data.large {
+  assert.eq(
+    type(result),
+    float,
+    message: "failed to approximately decode " + name,
+  )
+}
--- a/tests/suite/loading/edge-case.typ
+++ b/tests/suite/loading/edge-case.typ
@ -0,0 +1,24 @@
+// SKIP
+// Edge cases in deserialization.
+// They can be imported when testing decoding functions.
+
+#let representable-integer = (
+  i64-max: "9223372036854775807",
+  i64-min: "-9223372036854775808",
+)
+
+#let large-integer = (
+  i64-max-plus-one: "9223372036854775808",
+  i64-min-minus-one: "-9223372036854775809",
+  u64-max: "18446744073709551615",
+  u64-max-plus-one: "18446744073709551616",
+  i128-max: "170141183460469231731687303715884105727",
+  i128-min: "-170141183460469231731687303715884105728",
+  u128-max: "340282366920938463463374607431768211455",
+)
+
+// The following is generated from the rust script in PR 6836.
+#let cbor-integers = bytes(
+  (162, 109, 114, 101, 112, 114, 101, 115, 101, 110, 116, 97, 98, 108, 101, 162, 103, 105, 54, 52, 95, 109, 97, 120, 27, 127, 255, 255, 255, 255, 255, 255, 255, 103, 105, 54, 52, 95, 109, 105, 110, 59, 127, 255, 255, 255, 255, 255, 255, 255, 101, 108, 97, 114, 103, 101, 167, 112, 105, 54, 52, 95, 109, 97, 120, 95, 112, 108, 117, 115, 95, 111, 110, 101, 27, 128, 0, 0, 0, 0, 0, 0, 0, 113, 105, 54, 52, 95, 109, 105, 110, 95, 109, 105, 110, 117, 115, 95, 111, 110, 101, 59, 128, 0, 0, 0, 0, 0, 0, 0, 103, 117, 54, 52, 95, 109, 97, 120, 27, 255, 255, 255, 255, 255, 255, 255, 255, 112, 117, 54, 52, 95, 109, 97, 120, 95, 112, 108, 117, 115, 95, 111, 110, 101, 194, 73, 1, 0, 0, 0, 0, 0, 0, 0, 0, 104, 105, 49, 50, 56, 95, 109, 97, 120, 194, 80, 
+127, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 104, 105, 49, 50, 56, 95, 109, 105, 110, 195, 80, 127, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 104, 117, 49, 50, 56, 95, 109, 97, 120, 194, 80, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255),
+)
--- a/tests/suite/loading/json.typ
+++ b/tests/suite/loading/json.typ
@ -19,3 +19,22 @@
 // but not overflow
 #let bignum = json("/assets/data/big-number.json")
 #bignum
+
+--- json-decode-number ---
+#import "edge-case.typ": large-integer, representable-integer
+
+#for (name, source) in representable-integer {
+  assert.eq(
+    type(json(bytes(source))),
+    int,
+    message: "failed to decode " + name,
+  )
+}
+
+#for (name, source) in large-integer {
+  assert.eq(
+    type(json(bytes(source))),
+    float,
+    message: "failed to approximately decode " + name,
+  )
+}
--- a/tests/suite/loading/toml.typ
+++ b/tests/suite/loading/toml.typ
@ -44,3 +44,28 @@
 // Warning: 15-21 `toml.decode` is deprecated, directly pass bytes to `toml` instead
 // Hint: 15-21 it will be removed in Typst 0.15.0
 #let _ = toml.decode
+
+--- toml-decode-integer ---
+#import "edge-case.typ": representable-integer
+
+#for (name, source) in representable-integer {
+  assert.eq(
+    // The `key` trick is necessary because a TOML documents must be a table.
+    type(toml(bytes("key = " + source)).key),
+    int,
+    message: "failed to decode " + name,
+  )
+}
+
+--- toml-decode-integer-too-large ---
+// If an integer cannot be represented losslessly, an error must be thrown.
+// https://toml.io/en/v1.0.0#integer
+
+#import "edge-case.typ": large-integer
+// Error: 7-55 failed to parse TOML (number too large to fit in target type at 1:7)
+#toml(bytes("key = " + large-integer.i64-max-plus-one))
+
+--- toml-decode-integer-too-small ---
+#import "edge-case.typ": large-integer
+// Error: 7-56 failed to parse TOML (number too small to fit in target type at 1:7)
+#toml(bytes("key = " + large-integer.i64-min-minus-one))
--- a/tests/suite/loading/yaml.typ
+++ b/tests/suite/loading/yaml.typ
@ -20,3 +20,22 @@
 // Warning: 15-21 `yaml.decode` is deprecated, directly pass bytes to `yaml` instead
 // Hint: 15-21 it will be removed in Typst 0.15.0
 #let _ = yaml.decode
+
+--- yaml-decode-number ---
+#import "edge-case.typ": large-integer, representable-integer
+
+#for (name, source) in representable-integer {
+  assert.eq(
+    type(yaml(bytes(source))),
+    int,
+    message: "failed to decode " + name,
+  )
+}
+
+#for (name, source) in large-integer {
+  assert.eq(
+    type(yaml(bytes(source))),
+    float,
+    message: "failed to approximately decode " + name,
+  )
+}