Better handle large numbers (u128/i128) in deserialization (#6836)

This commit is contained in:
Y.D.X. 2025-08-30 00:16:36 +08:00 committed by GitHub
parent b08593bf26
commit 5546ebe13e
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
7 changed files with 129 additions and 4 deletions

View File

@ -400,7 +400,17 @@ macro_rules! signed_int {
($($ty:ty)*) => {
$(cast! {
$ty,
self => Value::Int(self as _),
self => {
#[allow(irrefutable_let_patterns)]
if let Ok(int) = i64::try_from(self) {
Value::Int(int)
} else {
// Some numbers (i128) are too large to be cast as i64
// In that case, we accept that there may be a
// precision loss, and use a floating point number
Value::Float(self as _)
}
},
v: i64 => v.try_into().map_err(|_| "number too large")?,
})*
}
@ -415,7 +425,7 @@ macro_rules! unsigned_int {
if let Ok(int) = i64::try_from(self) {
Value::Int(int)
} else {
// Some u64 are too large to be cast as i64
// Some numbers (u64, u128) are too large to be cast as i64
// In that case, we accept that there may be a
// precision loss, and use a floating point number
Value::Float(self as _)
@ -432,8 +442,8 @@ macro_rules! unsigned_int {
}
}
signed_int! { i8 i16 i32 isize }
unsigned_int! { u8 u16 u32 u64 usize }
signed_int! { i8 i16 i32 i128 isize }
unsigned_int! { u8 u16 u32 u64 u128 usize }
cast! {
NonZeroI64,

View File

@ -404,6 +404,10 @@ impl<'de> Visitor<'de> for ValueVisitor {
Ok(v.into_value())
}
fn visit_i128<E: Error>(self, v: i128) -> Result<Self::Value, E> {
Ok(v.into_value())
}
fn visit_u8<E: Error>(self, v: u8) -> Result<Self::Value, E> {
Ok(v.into_value())
}
@ -420,6 +424,10 @@ impl<'de> Visitor<'de> for ValueVisitor {
Ok(v.into_value())
}
fn visit_u128<E: Error>(self, v: u128) -> Result<Self::Value, E> {
Ok(v.into_value())
}
fn visit_f32<E: Error>(self, v: f32) -> Result<Self::Value, E> {
Ok((v as f64).into_value())
}

View File

@ -2,3 +2,23 @@
// Warning: 15-21 `cbor.decode` is deprecated, directly pass bytes to `cbor` instead
// Hint: 15-21 it will be removed in Typst 0.15.0
#let _ = cbor.decode
--- cbor-decode-integer ---
#import "edge-case.typ": cbor-integers
#let data = cbor(cbor-integers)
#for (name, result) in data.representable {
assert.eq(
type(result),
int,
message: "failed to decode " + name,
)
}
#for (name, result) in data.large {
assert.eq(
type(result),
float,
message: "failed to approximately decode " + name,
)
}

View File

@ -0,0 +1,24 @@
// SKIP
// Edge cases in deserialization.
// They can be imported when testing decoding functions.
#let representable-integer = (
i64-max: "9223372036854775807",
i64-min: "-9223372036854775808",
)
#let large-integer = (
i64-max-plus-one: "9223372036854775808",
i64-min-minus-one: "-9223372036854775809",
u64-max: "18446744073709551615",
u64-max-plus-one: "18446744073709551616",
i128-max: "170141183460469231731687303715884105727",
i128-min: "-170141183460469231731687303715884105728",
u128-max: "340282366920938463463374607431768211455",
)
// The following is generated from the rust script in PR 6836.
#let cbor-integers = bytes(
(162, 109, 114, 101, 112, 114, 101, 115, 101, 110, 116, 97, 98, 108, 101, 162, 103, 105, 54, 52, 95, 109, 97, 120, 27, 127, 255, 255, 255, 255, 255, 255, 255, 103, 105, 54, 52, 95, 109, 105, 110, 59, 127, 255, 255, 255, 255, 255, 255, 255, 101, 108, 97, 114, 103, 101, 167, 112, 105, 54, 52, 95, 109, 97, 120, 95, 112, 108, 117, 115, 95, 111, 110, 101, 27, 128, 0, 0, 0, 0, 0, 0, 0, 113, 105, 54, 52, 95, 109, 105, 110, 95, 109, 105, 110, 117, 115, 95, 111, 110, 101, 59, 128, 0, 0, 0, 0, 0, 0, 0, 103, 117, 54, 52, 95, 109, 97, 120, 27, 255, 255, 255, 255, 255, 255, 255, 255, 112, 117, 54, 52, 95, 109, 97, 120, 95, 112, 108, 117, 115, 95, 111, 110, 101, 194, 73, 1, 0, 0, 0, 0, 0, 0, 0, 0, 104, 105, 49, 50, 56, 95, 109, 97, 120, 194, 80,
127, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 104, 105, 49, 50, 56, 95, 109, 105, 110, 195, 80, 127, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 104, 117, 49, 50, 56, 95, 109, 97, 120, 194, 80, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255),
)

View File

@ -19,3 +19,22 @@
// but not overflow
#let bignum = json("/assets/data/big-number.json")
#bignum
--- json-decode-number ---
#import "edge-case.typ": large-integer, representable-integer
#for (name, source) in representable-integer {
assert.eq(
type(json(bytes(source))),
int,
message: "failed to decode " + name,
)
}
#for (name, source) in large-integer {
assert.eq(
type(json(bytes(source))),
float,
message: "failed to approximately decode " + name,
)
}

View File

@ -44,3 +44,28 @@
// Warning: 15-21 `toml.decode` is deprecated, directly pass bytes to `toml` instead
// Hint: 15-21 it will be removed in Typst 0.15.0
#let _ = toml.decode
--- toml-decode-integer ---
#import "edge-case.typ": representable-integer
#for (name, source) in representable-integer {
assert.eq(
// The `key` trick is necessary because a TOML documents must be a table.
type(toml(bytes("key = " + source)).key),
int,
message: "failed to decode " + name,
)
}
--- toml-decode-integer-too-large ---
// If an integer cannot be represented losslessly, an error must be thrown.
// https://toml.io/en/v1.0.0#integer
#import "edge-case.typ": large-integer
// Error: 7-55 failed to parse TOML (number too large to fit in target type at 1:7)
#toml(bytes("key = " + large-integer.i64-max-plus-one))
--- toml-decode-integer-too-small ---
#import "edge-case.typ": large-integer
// Error: 7-56 failed to parse TOML (number too small to fit in target type at 1:7)
#toml(bytes("key = " + large-integer.i64-min-minus-one))

View File

@ -20,3 +20,22 @@
// Warning: 15-21 `yaml.decode` is deprecated, directly pass bytes to `yaml` instead
// Hint: 15-21 it will be removed in Typst 0.15.0
#let _ = yaml.decode
--- yaml-decode-number ---
#import "edge-case.typ": large-integer, representable-integer
#for (name, source) in representable-integer {
assert.eq(
type(yaml(bytes(source))),
int,
message: "failed to decode " + name,
)
}
#for (name, source) in large-integer {
assert.eq(
type(yaml(bytes(source))),
float,
message: "failed to approximately decode " + name,
)
}