lexical_parse_float/
bellerophon.rs

Help
1//! An implementation of Clinger's Bellerophon algorithm.
2//!
3//! This is a moderate path algorithm that uses an extended-precision
4//! float, represented in 80 bits, by calculating the bits of slop
5//! and determining if those bits could prevent unambiguous rounding.
6//!
7//! This algorithm requires less static storage than the Lemire algorithm,
8//! and has decent performance, and is therefore used when non-decimal,
9//! non-power-of-two strings need to be parsed. Clinger's algorithm
10//! is described in depth in "How to Read Floating Point Numbers Accurately.",
11//! available online [here](http://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.45.4152&rep=rep1&type=pdf).
12//!
13//! This implementation is loosely based off the Golang implementation,
14//! found [here](https://github.com/golang/go/blob/b10849fbb97a2244c086991b4623ae9f32c212d0/src/strconv/extfloat.go).
15//! This code is therefore subject to a 3-clause BSD license.
16
17#![cfg(any(feature = "compact", feature = "radix"))]
18#![doc(hidden)]
19
20use lexical_util::format::NumberFormat;
21
22use crate::float::{ExtendedFloat80, RawFloat};
23use crate::mask::{lower_n_halfway, lower_n_mask};
24use crate::number::Number;
25use crate::shared;
26use crate::table::bellerophon_powers;
27
28// ALGORITHM
29// ---------
30
31/// Core implementation of the Bellerophon algorithm.
32///
33/// Create an extended-precision float, scale it to the proper radix power,
34/// calculate the bits of slop, and return the representation. The value
35/// will always be guaranteed to be within 1 bit, rounded-down, of the real
36/// value. If a negative exponent is returned, this represents we were
37/// unable to unambiguously round the significant digits.
38///
39/// This has been modified to return a biased, rather than unbiased exponent.
40pub fn bellerophon<F: RawFloat, const FORMAT: u128>(num: &Number, lossy: bool) -> ExtendedFloat80 {
41    let format = NumberFormat::<{ FORMAT }> {};
42    debug_assert!(
43        !matches!(format.radix(), 2 | 4 | 8 | 16 | 32),
44        "performance slow for non-pow2 cases"
45    );
46    debug_assert!(
47        format.mantissa_radix() == format.exponent_base(),
48        "only works if digits and exponent have same base"
49    );
50
51    let fp_zero = ExtendedFloat80 {
52        mant: 0,
53        exp: 0,
54    };
55    let fp_inf = ExtendedFloat80 {
56        mant: 0,
57        exp: F::INFINITE_POWER,
58    };
59
60    // Early short-circuit, in case of literal 0 or infinity.
61    // This allows us to avoid narrow casts causing numeric overflow,
62    // and is a quick check for any radix.
63    if num.mantissa == 0 || num.exponent <= -0x1000 {
64        return fp_zero;
65    } else if num.exponent >= 0x1000 {
66        return fp_inf;
67    }
68
69    // Calculate our indexes for our extended-precision multiplication.
70    let powers = bellerophon_powers(format.radix());
71    // This narrowing cast is safe, since exponent must be in a valid range.
72    let exponent = num.exponent as i32 + powers.bias;
73    let small_index = exponent % powers.step;
74    let large_index = exponent / powers.step;
75
76    if exponent < 0 {
77        // Guaranteed underflow (assign 0).
78        return fp_zero;
79    }
80    if large_index as usize >= powers.large.len() {
81        // Overflow (assign infinity)
82        return fp_inf;
83    }
84
85    // Within the valid exponent range, multiply by the large and small
86    // exponents and return the resulting value.
87
88    // Track errors to as a factor of unit in last-precision.
89    let mut errors: u32 = 0;
90    if num.many_digits {
91        errors += error_halfscale();
92    }
93
94    // Multiply by the small power.
95    // Check if we can directly multiply by an integer, if not,
96    // use extended-precision multiplication.
97    let mut fp = ExtendedFloat80 {
98        mant: num.mantissa,
99        exp: 0,
100    };
101    match fp.mant.overflowing_mul(powers.get_small_int(small_index as usize)) {
102        // Overflow, multiplication unsuccessful, go slow path.
103        (_, true) => {
104            normalize(&mut fp);
105            fp = mul(&fp, &powers.get_small(small_index as usize));
106            errors += error_halfscale();
107        },
108        // No overflow, multiplication successful.
109        (mant, false) => {
110            fp.mant = mant;
111            normalize(&mut fp);
112        },
113    }
114
115    // Multiply by the large power.
116    fp = mul(&fp, &powers.get_large(large_index as usize));
117    if errors > 0 {
118        errors += 1;
119    }
120    errors += error_halfscale();
121
122    // Normalize the floating point (and the errors).
123    let shift = normalize(&mut fp);
124    errors <<= shift;
125    fp.exp += F::EXPONENT_BIAS;
126
127    // Check for literal overflow, even with halfway cases.
128    if -fp.exp + 1 > 65 {
129        return fp_zero;
130    }
131
132    // Too many errors accumulated, return an error.
133    if !lossy && !error_is_accurate::<F>(errors, &fp) {
134        // Bias the exponent so we know it's invalid.
135        fp.exp += shared::INVALID_FP;
136        return fp;
137    }
138
139    // Check if we have a literal 0 or overflow here.
140    // If we have an exponent of -63, we can still have a valid shift,
141    // giving a case where we have too many errors and need to round-up.
142    if -fp.exp + 1 == 65 {
143        // Have more than 64 bits below the minimum exponent, must be 0.
144        return fp_zero;
145    }
146
147    shared::round::<F, _>(&mut fp, |f, s| {
148        shared::round_nearest_tie_even(f, s, |is_odd, is_halfway, is_above| {
149            is_above || (is_odd && is_halfway)
150        });
151    });
152    fp
153}
154
155// ERRORS
156// ------
157
158// Calculate if the errors in calculating the extended-precision float.
159//
160// Specifically, we want to know if we are close to a halfway representation,
161// or halfway between `b` and `b+1`, or `b+h`. The halfway representation
162// has the form:
163//     `SEEEEEEEHMMMMMMMMMMMMMMMMMMMMMMM100...`
164// where:
165//     S = Sign Bit
166//     E = Exponent Bits
167//     H = Hidden Bit
168//     M = Mantissa Bits
169//
170// The halfway representation has a bit set 1-after the mantissa digits,
171// and no bits set immediately afterward, making it impossible to
172// round between `b` and `b+1` with this representation.
173
174/// Get the full error scale.
175#[inline(always)]
176const fn error_scale() -> u32 {
177    8
178}
179
180/// Get the half error scale.
181#[inline(always)]
182const fn error_halfscale() -> u32 {
183    error_scale() / 2
184}
185
186/// Determine if the number of errors is tolerable for float precision.
187#[cfg_attr(not(feature = "compact"), inline(always))]
188fn error_is_accurate<F: RawFloat>(errors: u32, fp: &ExtendedFloat80) -> bool {
189    // Check we can't have a literal 0 denormal float.
190    debug_assert!(fp.exp >= -64, "cannot have a literal 0 float");
191
192    // Determine if extended-precision float is a good approximation.
193    // If the error has affected too many units, the float will be
194    // inaccurate, or if the representation is too close to halfway
195    // that any operations could affect this halfway representation.
196    // See the documentation for dtoa for more information.
197
198    // This is always a valid u32, since `fp.exp >= -64`
199    // will always be positive and the significand size is `{23, 52}`.
200    let mantissa_shift = 64 - F::MANTISSA_SIZE - 1;
201
202    // The unbiased exponent checks is `unbiased_exp <= F::MANTISSA_SIZE
203    // - F::EXPONENT_BIAS -64 + 1`, or `biased_exp <= F::MANTISSA_SIZE - 63`,
204    // or `biased_exp <= mantissa_shift`.
205    let extrabits = match fp.exp <= -mantissa_shift {
206        // Denormal, since shifting to the hidden bit still has a negative exponent.
207        // The unbiased check calculation for bits is `1 - F::EXPONENT_BIAS - unbiased_exp`,
208        // or `1 - biased_exp`.
209        true => 1 - fp.exp,
210        false => 64 - F::MANTISSA_SIZE - 1,
211    };
212
213    // Our logic is as follows: we want to determine if the actual
214    // mantissa and the errors during calculation differ significantly
215    // from the rounding point. The rounding point for round-nearest
216    // is the halfway point, IE, this when the truncated bits start
217    // with b1000..., while the rounding point for the round-toward
218    // is when the truncated bits are equal to 0.
219    // To do so, we can check whether the rounding point +/- the error
220    // are >/< the actual lower n bits.
221    //
222    // For whether we need to use signed or unsigned types for this
223    // analysis, see this example, using u8 rather than u64 to simplify
224    // things.
225    //
226    // # Comparisons
227    //      cmp1 = (halfway - errors) < extra
228    //      cmp1 = extra < (halfway + errors)
229    //
230    // # Large Extrabits, Low Errors
231    //
232    //      extrabits = 8
233    //      halfway          =  0b10000000
234    //      extra            =  0b10000010
235    //      errors           =  0b00000100
236    //      halfway - errors =  0b01111100
237    //      halfway + errors =  0b10000100
238    //
239    //      Unsigned:
240    //          halfway - errors = 124
241    //          halfway + errors = 132
242    //          extra            = 130
243    //          cmp1             = true
244    //          cmp2             = true
245    //      Signed:
246    //          halfway - errors = 124
247    //          halfway + errors = -124
248    //          extra            = -126
249    //          cmp1             = false
250    //          cmp2             = true
251    //
252    // # Conclusion
253    //
254    // Since errors will always be small, and since we want to detect
255    // if the representation is accurate, we need to use an **unsigned**
256    // type for comparisons.
257    let maskbits = extrabits as u64;
258    let errors = errors as u64;
259
260    // Round-to-nearest, need to use the halfway point.
261    if extrabits > 64 {
262        // Underflow, we have a shift larger than the mantissa.
263        // Representation is valid **only** if the value is close enough
264        // overflow to the next bit within errors. If it overflows,
265        // the representation is **not** valid.
266        !fp.mant.overflowing_add(errors).1
267    } else {
268        let mask = lower_n_mask(maskbits);
269        let extra = fp.mant & mask;
270
271        // Round-to-nearest, need to check if we're close to halfway.
272        // IE, b10100 | 100000, where `|` signifies the truncation point.
273        let halfway = lower_n_halfway(maskbits);
274        let cmp1 = halfway.wrapping_sub(errors) < extra;
275        let cmp2 = extra < halfway.wrapping_add(errors);
276
277        // If both comparisons are true, we have significant rounding error,
278        // and the value cannot be exactly represented. Otherwise, the
279        // representation is valid.
280        !(cmp1 && cmp2)
281    }
282}
283
284// MATH
285// ----
286
287/// Normalize float-point number.
288///
289/// Shift the mantissa so the number of leading zeros is 0, or the value
290/// itself is 0.
291///
292/// Get the number of bytes shifted.
293#[cfg_attr(not(feature = "compact"), inline(always))]
294pub fn normalize(fp: &mut ExtendedFloat80) -> i32 {
295    // Note:
296    // Using the ctlz intrinsic via `leading_zeros` is way faster (~10x)
297    // than shifting 1-bit at a time, via while loop, and also way
298    // faster (~2x) than an unrolled loop that checks at 32, 16, 4,
299    // 2, and 1 bit.
300    //
301    // Using a modulus of pow2 (which will get optimized to a bitwise
302    // and with 0x3F or faster) is slightly slower than an if/then,
303    // however, removing the if/then will likely optimize more branched
304    // code as it removes conditional logic.
305
306    // Calculate the number of leading zeros, and then zero-out
307    // any overflowing bits, to avoid shl overflow when `self.mant == 0`.
308    if fp.mant != 0 {
309        let shift = fp.mant.leading_zeros() as i32;
310        fp.mant <<= shift;
311        fp.exp -= shift;
312        shift
313    } else {
314        0
315    }
316}
317
318/// Multiply two normalized extended-precision floats, as if by `a*b`.
319///
320/// The precision is maximal when the numbers are normalized, however,
321/// decent precision will occur as long as both values have high bits
322/// set. The result is not normalized.
323///
324/// Algorithm:
325///     1. Non-signed multiplication of mantissas (requires 2x as many bits as
326///        input).
327///     2. Normalization of the result (not done here).
328///     3. Addition of exponents.
329#[cfg_attr(not(feature = "compact"), inline(always))]
330pub fn mul(x: &ExtendedFloat80, y: &ExtendedFloat80) -> ExtendedFloat80 {
331    // Logic check, values must be decently normalized prior to multiplication.
332    debug_assert!(x.mant >> 32 != 0, "cannot have a literal 0 float");
333    debug_assert!(y.mant >> 32 != 0, "cannot have a literal 0 float");
334
335    // Extract high-and-low masks.
336    const LOMASK: u64 = u32::MAX as u64;
337    let x1 = x.mant >> 32;
338    let x0 = x.mant & LOMASK;
339    let y1 = y.mant >> 32;
340    let y0 = y.mant & LOMASK;
341
342    // Get our products
343    let x1_y0 = x1 * y0;
344    let x0_y1 = x0 * y1;
345    let x0_y0 = x0 * y0;
346    let x1_y1 = x1 * y1;
347
348    let mut tmp = (x1_y0 & LOMASK) + (x0_y1 & LOMASK) + (x0_y0 >> 32);
349    // round up
350    tmp += 1 << (32 - 1);
351
352    ExtendedFloat80 {
353        mant: x1_y1 + (x1_y0 >> 32) + (x0_y1 >> 32) + (tmp >> 32),
354        exp: x.exp + y.exp + 64,
355    }
356}
357
358// POWERS
359// ------
360
361/// Pre-calculated powers of base N for the Bellerophon algorithm.
362pub struct BellerophonPowers {
363    // Pre-calculated small powers.
364    pub small: &'static [u64],
365    // Pre-calculated large powers.
366    pub large: &'static [u64],
367    /// Pre-calculated small powers as 64-bit integers
368    pub small_int: &'static [u64],
369    // Step between large powers and number of small powers.
370    pub step: i32,
371    // Exponent bias for the large powers.
372    pub bias: i32,
373    /// `ceil(log2(radix))` scaled as a multiplier.
374    pub log2: i64,
375    /// Bit shift for the log2 multiplier.
376    pub log2_shift: i32,
377}
378
379/// Allow indexing of values without bounds checking
380impl BellerophonPowers {
381    #[inline(always)]
382    pub const fn get_small(&self, index: usize) -> ExtendedFloat80 {
383        let mant = self.small[index];
384        let exp = (1 - 64) + ((self.log2 * index as i64) >> self.log2_shift);
385        ExtendedFloat80 {
386            mant,
387            exp: exp as i32,
388        }
389    }
390
391    #[inline(always)]
392    pub const fn get_large(&self, index: usize) -> ExtendedFloat80 {
393        let mant = self.large[index];
394        let biased_e = index as i64 * self.step as i64 - self.bias as i64;
395        let exp = (1 - 64) + ((self.log2 * biased_e) >> self.log2_shift);
396        ExtendedFloat80 {
397            mant,
398            exp: exp as i32,
399        }
400    }
401
402    #[inline(always)]
403    pub const fn get_small_int(&self, index: usize) -> u64 {
404        self.small_int[index]
405    }
406}
lexical_parse_float/bellerophon.rs

lexical_parse_float/
bellerophon.rs