qemu

FORK: QEMU emulator
git clone https://git.neptards.moe/neptards/qemu.git
Log | Files | Refs | Submodules | LICENSE

softfloat-parts.c.inc (44581B)


      1 /*
      2  * QEMU float support
      3  *
      4  * The code in this source file is derived from release 2a of the SoftFloat
      5  * IEC/IEEE Floating-point Arithmetic Package. Those parts of the code (and
      6  * some later contributions) are provided under that license, as detailed below.
      7  * It has subsequently been modified by contributors to the QEMU Project,
      8  * so some portions are provided under:
      9  *  the SoftFloat-2a license
     10  *  the BSD license
     11  *  GPL-v2-or-later
     12  *
     13  * Any future contributions to this file after December 1st 2014 will be
     14  * taken to be licensed under the Softfloat-2a license unless specifically
     15  * indicated otherwise.
     16  */
     17 
     18 static void partsN(return_nan)(FloatPartsN *a, float_status *s)
     19 {
     20     switch (a->cls) {
     21     case float_class_snan:
     22         float_raise(float_flag_invalid | float_flag_invalid_snan, s);
     23         if (s->default_nan_mode) {
     24             parts_default_nan(a, s);
     25         } else {
     26             parts_silence_nan(a, s);
     27         }
     28         break;
     29     case float_class_qnan:
     30         if (s->default_nan_mode) {
     31             parts_default_nan(a, s);
     32         }
     33         break;
     34     default:
     35         g_assert_not_reached();
     36     }
     37 }
     38 
     39 static FloatPartsN *partsN(pick_nan)(FloatPartsN *a, FloatPartsN *b,
     40                                      float_status *s)
     41 {
     42     if (is_snan(a->cls) || is_snan(b->cls)) {
     43         float_raise(float_flag_invalid | float_flag_invalid_snan, s);
     44     }
     45 
     46     if (s->default_nan_mode) {
     47         parts_default_nan(a, s);
     48     } else {
     49         int cmp = frac_cmp(a, b);
     50         if (cmp == 0) {
     51             cmp = a->sign < b->sign;
     52         }
     53 
     54         if (pickNaN(a->cls, b->cls, cmp > 0, s)) {
     55             a = b;
     56         }
     57         if (is_snan(a->cls)) {
     58             parts_silence_nan(a, s);
     59         }
     60     }
     61     return a;
     62 }
     63 
     64 static FloatPartsN *partsN(pick_nan_muladd)(FloatPartsN *a, FloatPartsN *b,
     65                                             FloatPartsN *c, float_status *s,
     66                                             int ab_mask, int abc_mask)
     67 {
     68     int which;
     69 
     70     if (unlikely(abc_mask & float_cmask_snan)) {
     71         float_raise(float_flag_invalid | float_flag_invalid_snan, s);
     72     }
     73 
     74     which = pickNaNMulAdd(a->cls, b->cls, c->cls,
     75                           ab_mask == float_cmask_infzero, s);
     76 
     77     if (s->default_nan_mode || which == 3) {
     78         /*
     79          * Note that this check is after pickNaNMulAdd so that function
     80          * has an opportunity to set the Invalid flag for infzero.
     81          */
     82         parts_default_nan(a, s);
     83         return a;
     84     }
     85 
     86     switch (which) {
     87     case 0:
     88         break;
     89     case 1:
     90         a = b;
     91         break;
     92     case 2:
     93         a = c;
     94         break;
     95     default:
     96         g_assert_not_reached();
     97     }
     98     if (is_snan(a->cls)) {
     99         parts_silence_nan(a, s);
    100     }
    101     return a;
    102 }
    103 
    104 /*
    105  * Canonicalize the FloatParts structure.  Determine the class,
    106  * unbias the exponent, and normalize the fraction.
    107  */
    108 static void partsN(canonicalize)(FloatPartsN *p, float_status *status,
    109                                  const FloatFmt *fmt)
    110 {
    111     if (unlikely(p->exp == 0)) {
    112         if (likely(frac_eqz(p))) {
    113             p->cls = float_class_zero;
    114         } else if (status->flush_inputs_to_zero) {
    115             float_raise(float_flag_input_denormal, status);
    116             p->cls = float_class_zero;
    117             frac_clear(p);
    118         } else {
    119             int shift = frac_normalize(p);
    120             p->cls = float_class_normal;
    121             p->exp = fmt->frac_shift - fmt->exp_bias - shift + 1;
    122         }
    123     } else if (likely(p->exp < fmt->exp_max) || fmt->arm_althp) {
    124         p->cls = float_class_normal;
    125         p->exp -= fmt->exp_bias;
    126         frac_shl(p, fmt->frac_shift);
    127         p->frac_hi |= DECOMPOSED_IMPLICIT_BIT;
    128     } else if (likely(frac_eqz(p))) {
    129         p->cls = float_class_inf;
    130     } else {
    131         frac_shl(p, fmt->frac_shift);
    132         p->cls = (parts_is_snan_frac(p->frac_hi, status)
    133                   ? float_class_snan : float_class_qnan);
    134     }
    135 }
    136 
    137 /*
    138  * Round and uncanonicalize a floating-point number by parts. There
    139  * are FRAC_SHIFT bits that may require rounding at the bottom of the
    140  * fraction; these bits will be removed. The exponent will be biased
    141  * by EXP_BIAS and must be bounded by [EXP_MAX-1, 0].
    142  */
    143 static void partsN(uncanon_normal)(FloatPartsN *p, float_status *s,
    144                                    const FloatFmt *fmt)
    145 {
    146     const int exp_max = fmt->exp_max;
    147     const int frac_shift = fmt->frac_shift;
    148     const uint64_t round_mask = fmt->round_mask;
    149     const uint64_t frac_lsb = round_mask + 1;
    150     const uint64_t frac_lsbm1 = round_mask ^ (round_mask >> 1);
    151     const uint64_t roundeven_mask = round_mask | frac_lsb;
    152     uint64_t inc;
    153     bool overflow_norm = false;
    154     int exp, flags = 0;
    155 
    156     switch (s->float_rounding_mode) {
    157     case float_round_nearest_even:
    158         if (N > 64 && frac_lsb == 0) {
    159             inc = ((p->frac_hi & 1) || (p->frac_lo & round_mask) != frac_lsbm1
    160                    ? frac_lsbm1 : 0);
    161         } else {
    162             inc = ((p->frac_lo & roundeven_mask) != frac_lsbm1
    163                    ? frac_lsbm1 : 0);
    164         }
    165         break;
    166     case float_round_ties_away:
    167         inc = frac_lsbm1;
    168         break;
    169     case float_round_to_zero:
    170         overflow_norm = true;
    171         inc = 0;
    172         break;
    173     case float_round_up:
    174         inc = p->sign ? 0 : round_mask;
    175         overflow_norm = p->sign;
    176         break;
    177     case float_round_down:
    178         inc = p->sign ? round_mask : 0;
    179         overflow_norm = !p->sign;
    180         break;
    181     case float_round_to_odd:
    182         overflow_norm = true;
    183         /* fall through */
    184     case float_round_to_odd_inf:
    185         if (N > 64 && frac_lsb == 0) {
    186             inc = p->frac_hi & 1 ? 0 : round_mask;
    187         } else {
    188             inc = p->frac_lo & frac_lsb ? 0 : round_mask;
    189         }
    190         break;
    191     default:
    192         g_assert_not_reached();
    193     }
    194 
    195     exp = p->exp + fmt->exp_bias;
    196     if (likely(exp > 0)) {
    197         if (p->frac_lo & round_mask) {
    198             flags |= float_flag_inexact;
    199             if (frac_addi(p, p, inc)) {
    200                 frac_shr(p, 1);
    201                 p->frac_hi |= DECOMPOSED_IMPLICIT_BIT;
    202                 exp++;
    203             }
    204             p->frac_lo &= ~round_mask;
    205         }
    206 
    207         if (fmt->arm_althp) {
    208             /* ARM Alt HP eschews Inf and NaN for a wider exponent.  */
    209             if (unlikely(exp > exp_max)) {
    210                 /* Overflow.  Return the maximum normal.  */
    211                 flags = float_flag_invalid;
    212                 exp = exp_max;
    213                 frac_allones(p);
    214                 p->frac_lo &= ~round_mask;
    215             }
    216         } else if (unlikely(exp >= exp_max)) {
    217             flags |= float_flag_overflow;
    218             if (s->rebias_overflow) {
    219                 exp -= fmt->exp_re_bias;
    220             } else if (overflow_norm) {
    221                 flags |= float_flag_inexact;
    222                 exp = exp_max - 1;
    223                 frac_allones(p);
    224                 p->frac_lo &= ~round_mask;
    225             } else {
    226                 flags |= float_flag_inexact;
    227                 p->cls = float_class_inf;
    228                 exp = exp_max;
    229                 frac_clear(p);
    230             }
    231         }
    232         frac_shr(p, frac_shift);
    233     } else if (unlikely(s->rebias_underflow)) {
    234         flags |= float_flag_underflow;
    235         exp += fmt->exp_re_bias;
    236         if (p->frac_lo & round_mask) {
    237             flags |= float_flag_inexact;
    238             if (frac_addi(p, p, inc)) {
    239                 frac_shr(p, 1);
    240                 p->frac_hi |= DECOMPOSED_IMPLICIT_BIT;
    241                 exp++;
    242             }
    243             p->frac_lo &= ~round_mask;
    244         }
    245         frac_shr(p, frac_shift);
    246     } else if (s->flush_to_zero) {
    247         flags |= float_flag_output_denormal;
    248         p->cls = float_class_zero;
    249         exp = 0;
    250         frac_clear(p);
    251     } else {
    252         bool is_tiny = s->tininess_before_rounding || exp < 0;
    253 
    254         if (!is_tiny) {
    255             FloatPartsN discard;
    256             is_tiny = !frac_addi(&discard, p, inc);
    257         }
    258 
    259         frac_shrjam(p, 1 - exp);
    260 
    261         if (p->frac_lo & round_mask) {
    262             /* Need to recompute round-to-even/round-to-odd. */
    263             switch (s->float_rounding_mode) {
    264             case float_round_nearest_even:
    265                 if (N > 64 && frac_lsb == 0) {
    266                     inc = ((p->frac_hi & 1) ||
    267                            (p->frac_lo & round_mask) != frac_lsbm1
    268                            ? frac_lsbm1 : 0);
    269                 } else {
    270                     inc = ((p->frac_lo & roundeven_mask) != frac_lsbm1
    271                            ? frac_lsbm1 : 0);
    272                 }
    273                 break;
    274             case float_round_to_odd:
    275             case float_round_to_odd_inf:
    276                 if (N > 64 && frac_lsb == 0) {
    277                     inc = p->frac_hi & 1 ? 0 : round_mask;
    278                 } else {
    279                     inc = p->frac_lo & frac_lsb ? 0 : round_mask;
    280                 }
    281                 break;
    282             default:
    283                 break;
    284             }
    285             flags |= float_flag_inexact;
    286             frac_addi(p, p, inc);
    287             p->frac_lo &= ~round_mask;
    288         }
    289 
    290         exp = (p->frac_hi & DECOMPOSED_IMPLICIT_BIT) != 0;
    291         frac_shr(p, frac_shift);
    292 
    293         if (is_tiny && (flags & float_flag_inexact)) {
    294             flags |= float_flag_underflow;
    295         }
    296         if (exp == 0 && frac_eqz(p)) {
    297             p->cls = float_class_zero;
    298         }
    299     }
    300     p->exp = exp;
    301     float_raise(flags, s);
    302 }
    303 
    304 static void partsN(uncanon)(FloatPartsN *p, float_status *s,
    305                             const FloatFmt *fmt)
    306 {
    307     if (likely(p->cls == float_class_normal)) {
    308         parts_uncanon_normal(p, s, fmt);
    309     } else {
    310         switch (p->cls) {
    311         case float_class_zero:
    312             p->exp = 0;
    313             frac_clear(p);
    314             return;
    315         case float_class_inf:
    316             g_assert(!fmt->arm_althp);
    317             p->exp = fmt->exp_max;
    318             frac_clear(p);
    319             return;
    320         case float_class_qnan:
    321         case float_class_snan:
    322             g_assert(!fmt->arm_althp);
    323             p->exp = fmt->exp_max;
    324             frac_shr(p, fmt->frac_shift);
    325             return;
    326         default:
    327             break;
    328         }
    329         g_assert_not_reached();
    330     }
    331 }
    332 
    333 /*
    334  * Returns the result of adding or subtracting the values of the
    335  * floating-point values `a' and `b'. The operation is performed
    336  * according to the IEC/IEEE Standard for Binary Floating-Point
    337  * Arithmetic.
    338  */
    339 static FloatPartsN *partsN(addsub)(FloatPartsN *a, FloatPartsN *b,
    340                                    float_status *s, bool subtract)
    341 {
    342     bool b_sign = b->sign ^ subtract;
    343     int ab_mask = float_cmask(a->cls) | float_cmask(b->cls);
    344 
    345     if (a->sign != b_sign) {
    346         /* Subtraction */
    347         if (likely(ab_mask == float_cmask_normal)) {
    348             if (parts_sub_normal(a, b)) {
    349                 return a;
    350             }
    351             /* Subtract was exact, fall through to set sign. */
    352             ab_mask = float_cmask_zero;
    353         }
    354 
    355         if (ab_mask == float_cmask_zero) {
    356             a->sign = s->float_rounding_mode == float_round_down;
    357             return a;
    358         }
    359 
    360         if (unlikely(ab_mask & float_cmask_anynan)) {
    361             goto p_nan;
    362         }
    363 
    364         if (ab_mask & float_cmask_inf) {
    365             if (a->cls != float_class_inf) {
    366                 /* N - Inf */
    367                 goto return_b;
    368             }
    369             if (b->cls != float_class_inf) {
    370                 /* Inf - N */
    371                 return a;
    372             }
    373             /* Inf - Inf */
    374             float_raise(float_flag_invalid | float_flag_invalid_isi, s);
    375             parts_default_nan(a, s);
    376             return a;
    377         }
    378     } else {
    379         /* Addition */
    380         if (likely(ab_mask == float_cmask_normal)) {
    381             parts_add_normal(a, b);
    382             return a;
    383         }
    384 
    385         if (ab_mask == float_cmask_zero) {
    386             return a;
    387         }
    388 
    389         if (unlikely(ab_mask & float_cmask_anynan)) {
    390             goto p_nan;
    391         }
    392 
    393         if (ab_mask & float_cmask_inf) {
    394             a->cls = float_class_inf;
    395             return a;
    396         }
    397     }
    398 
    399     if (b->cls == float_class_zero) {
    400         g_assert(a->cls == float_class_normal);
    401         return a;
    402     }
    403 
    404     g_assert(a->cls == float_class_zero);
    405     g_assert(b->cls == float_class_normal);
    406  return_b:
    407     b->sign = b_sign;
    408     return b;
    409 
    410  p_nan:
    411     return parts_pick_nan(a, b, s);
    412 }
    413 
    414 /*
    415  * Returns the result of multiplying the floating-point values `a' and
    416  * `b'. The operation is performed according to the IEC/IEEE Standard
    417  * for Binary Floating-Point Arithmetic.
    418  */
    419 static FloatPartsN *partsN(mul)(FloatPartsN *a, FloatPartsN *b,
    420                                 float_status *s)
    421 {
    422     int ab_mask = float_cmask(a->cls) | float_cmask(b->cls);
    423     bool sign = a->sign ^ b->sign;
    424 
    425     if (likely(ab_mask == float_cmask_normal)) {
    426         FloatPartsW tmp;
    427 
    428         frac_mulw(&tmp, a, b);
    429         frac_truncjam(a, &tmp);
    430 
    431         a->exp += b->exp + 1;
    432         if (!(a->frac_hi & DECOMPOSED_IMPLICIT_BIT)) {
    433             frac_add(a, a, a);
    434             a->exp -= 1;
    435         }
    436 
    437         a->sign = sign;
    438         return a;
    439     }
    440 
    441     /* Inf * Zero == NaN */
    442     if (unlikely(ab_mask == float_cmask_infzero)) {
    443         float_raise(float_flag_invalid | float_flag_invalid_imz, s);
    444         parts_default_nan(a, s);
    445         return a;
    446     }
    447 
    448     if (unlikely(ab_mask & float_cmask_anynan)) {
    449         return parts_pick_nan(a, b, s);
    450     }
    451 
    452     /* Multiply by 0 or Inf */
    453     if (ab_mask & float_cmask_inf) {
    454         a->cls = float_class_inf;
    455         a->sign = sign;
    456         return a;
    457     }
    458 
    459     g_assert(ab_mask & float_cmask_zero);
    460     a->cls = float_class_zero;
    461     a->sign = sign;
    462     return a;
    463 }
    464 
    465 /*
    466  * Returns the result of multiplying the floating-point values `a' and
    467  * `b' then adding 'c', with no intermediate rounding step after the
    468  * multiplication. The operation is performed according to the
    469  * IEC/IEEE Standard for Binary Floating-Point Arithmetic 754-2008.
    470  * The flags argument allows the caller to select negation of the
    471  * addend, the intermediate product, or the final result. (The
    472  * difference between this and having the caller do a separate
    473  * negation is that negating externally will flip the sign bit on NaNs.)
    474  *
    475  * Requires A and C extracted into a double-sized structure to provide the
    476  * extra space for the widening multiply.
    477  */
    478 static FloatPartsN *partsN(muladd)(FloatPartsN *a, FloatPartsN *b,
    479                                    FloatPartsN *c, int flags, float_status *s)
    480 {
    481     int ab_mask, abc_mask;
    482     FloatPartsW p_widen, c_widen;
    483 
    484     ab_mask = float_cmask(a->cls) | float_cmask(b->cls);
    485     abc_mask = float_cmask(c->cls) | ab_mask;
    486 
    487     /*
    488      * It is implementation-defined whether the cases of (0,inf,qnan)
    489      * and (inf,0,qnan) raise InvalidOperation or not (and what QNaN
    490      * they return if they do), so we have to hand this information
    491      * off to the target-specific pick-a-NaN routine.
    492      */
    493     if (unlikely(abc_mask & float_cmask_anynan)) {
    494         return parts_pick_nan_muladd(a, b, c, s, ab_mask, abc_mask);
    495     }
    496 
    497     if (flags & float_muladd_negate_c) {
    498         c->sign ^= 1;
    499     }
    500 
    501     /* Compute the sign of the product into A. */
    502     a->sign ^= b->sign;
    503     if (flags & float_muladd_negate_product) {
    504         a->sign ^= 1;
    505     }
    506 
    507     if (unlikely(ab_mask != float_cmask_normal)) {
    508         if (unlikely(ab_mask == float_cmask_infzero)) {
    509             float_raise(float_flag_invalid | float_flag_invalid_imz, s);
    510             goto d_nan;
    511         }
    512 
    513         if (ab_mask & float_cmask_inf) {
    514             if (c->cls == float_class_inf && a->sign != c->sign) {
    515                 float_raise(float_flag_invalid | float_flag_invalid_isi, s);
    516                 goto d_nan;
    517             }
    518             goto return_inf;
    519         }
    520 
    521         g_assert(ab_mask & float_cmask_zero);
    522         if (c->cls == float_class_normal) {
    523             *a = *c;
    524             goto return_normal;
    525         }
    526         if (c->cls == float_class_zero) {
    527             if (a->sign != c->sign) {
    528                 goto return_sub_zero;
    529             }
    530             goto return_zero;
    531         }
    532         g_assert(c->cls == float_class_inf);
    533     }
    534 
    535     if (unlikely(c->cls == float_class_inf)) {
    536         a->sign = c->sign;
    537         goto return_inf;
    538     }
    539 
    540     /* Perform the multiplication step. */
    541     p_widen.sign = a->sign;
    542     p_widen.exp = a->exp + b->exp + 1;
    543     frac_mulw(&p_widen, a, b);
    544     if (!(p_widen.frac_hi & DECOMPOSED_IMPLICIT_BIT)) {
    545         frac_add(&p_widen, &p_widen, &p_widen);
    546         p_widen.exp -= 1;
    547     }
    548 
    549     /* Perform the addition step. */
    550     if (c->cls != float_class_zero) {
    551         /* Zero-extend C to less significant bits. */
    552         frac_widen(&c_widen, c);
    553         c_widen.exp = c->exp;
    554 
    555         if (a->sign == c->sign) {
    556             parts_add_normal(&p_widen, &c_widen);
    557         } else if (!parts_sub_normal(&p_widen, &c_widen)) {
    558             goto return_sub_zero;
    559         }
    560     }
    561 
    562     /* Narrow with sticky bit, for proper rounding later. */
    563     frac_truncjam(a, &p_widen);
    564     a->sign = p_widen.sign;
    565     a->exp = p_widen.exp;
    566 
    567  return_normal:
    568     if (flags & float_muladd_halve_result) {
    569         a->exp -= 1;
    570     }
    571  finish_sign:
    572     if (flags & float_muladd_negate_result) {
    573         a->sign ^= 1;
    574     }
    575     return a;
    576 
    577  return_sub_zero:
    578     a->sign = s->float_rounding_mode == float_round_down;
    579  return_zero:
    580     a->cls = float_class_zero;
    581     goto finish_sign;
    582 
    583  return_inf:
    584     a->cls = float_class_inf;
    585     goto finish_sign;
    586 
    587  d_nan:
    588     parts_default_nan(a, s);
    589     return a;
    590 }
    591 
    592 /*
    593  * Returns the result of dividing the floating-point value `a' by the
    594  * corresponding value `b'. The operation is performed according to
    595  * the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
    596  */
    597 static FloatPartsN *partsN(div)(FloatPartsN *a, FloatPartsN *b,
    598                                 float_status *s)
    599 {
    600     int ab_mask = float_cmask(a->cls) | float_cmask(b->cls);
    601     bool sign = a->sign ^ b->sign;
    602 
    603     if (likely(ab_mask == float_cmask_normal)) {
    604         a->sign = sign;
    605         a->exp -= b->exp + frac_div(a, b);
    606         return a;
    607     }
    608 
    609     /* 0/0 or Inf/Inf => NaN */
    610     if (unlikely(ab_mask == float_cmask_zero)) {
    611         float_raise(float_flag_invalid | float_flag_invalid_zdz, s);
    612         goto d_nan;
    613     }
    614     if (unlikely(ab_mask == float_cmask_inf)) {
    615         float_raise(float_flag_invalid | float_flag_invalid_idi, s);
    616         goto d_nan;
    617     }
    618 
    619     /* All the NaN cases */
    620     if (unlikely(ab_mask & float_cmask_anynan)) {
    621         return parts_pick_nan(a, b, s);
    622     }
    623 
    624     a->sign = sign;
    625 
    626     /* Inf / X */
    627     if (a->cls == float_class_inf) {
    628         return a;
    629     }
    630 
    631     /* 0 / X */
    632     if (a->cls == float_class_zero) {
    633         return a;
    634     }
    635 
    636     /* X / Inf */
    637     if (b->cls == float_class_inf) {
    638         a->cls = float_class_zero;
    639         return a;
    640     }
    641 
    642     /* X / 0 => Inf */
    643     g_assert(b->cls == float_class_zero);
    644     float_raise(float_flag_divbyzero, s);
    645     a->cls = float_class_inf;
    646     return a;
    647 
    648  d_nan:
    649     parts_default_nan(a, s);
    650     return a;
    651 }
    652 
    653 /*
    654  * Floating point remainder, per IEC/IEEE, or modulus.
    655  */
    656 static FloatPartsN *partsN(modrem)(FloatPartsN *a, FloatPartsN *b,
    657                                    uint64_t *mod_quot, float_status *s)
    658 {
    659     int ab_mask = float_cmask(a->cls) | float_cmask(b->cls);
    660 
    661     if (likely(ab_mask == float_cmask_normal)) {
    662         frac_modrem(a, b, mod_quot);
    663         return a;
    664     }
    665 
    666     if (mod_quot) {
    667         *mod_quot = 0;
    668     }
    669 
    670     /* All the NaN cases */
    671     if (unlikely(ab_mask & float_cmask_anynan)) {
    672         return parts_pick_nan(a, b, s);
    673     }
    674 
    675     /* Inf % N; N % 0 */
    676     if (a->cls == float_class_inf || b->cls == float_class_zero) {
    677         float_raise(float_flag_invalid, s);
    678         parts_default_nan(a, s);
    679         return a;
    680     }
    681 
    682     /* N % Inf; 0 % N */
    683     g_assert(b->cls == float_class_inf || a->cls == float_class_zero);
    684     return a;
    685 }
    686 
    687 /*
    688  * Square Root
    689  *
    690  * The base algorithm is lifted from
    691  * https://git.musl-libc.org/cgit/musl/tree/src/math/sqrtf.c
    692  * https://git.musl-libc.org/cgit/musl/tree/src/math/sqrt.c
    693  * https://git.musl-libc.org/cgit/musl/tree/src/math/sqrtl.c
    694  * and is thus MIT licenced.
    695  */
    696 static void partsN(sqrt)(FloatPartsN *a, float_status *status,
    697                          const FloatFmt *fmt)
    698 {
    699     const uint32_t three32 = 3u << 30;
    700     const uint64_t three64 = 3ull << 62;
    701     uint32_t d32, m32, r32, s32, u32;            /* 32-bit computation */
    702     uint64_t d64, m64, r64, s64, u64;            /* 64-bit computation */
    703     uint64_t dh, dl, rh, rl, sh, sl, uh, ul;     /* 128-bit computation */
    704     uint64_t d0h, d0l, d1h, d1l, d2h, d2l;
    705     uint64_t discard;
    706     bool exp_odd;
    707     size_t index;
    708 
    709     if (unlikely(a->cls != float_class_normal)) {
    710         switch (a->cls) {
    711         case float_class_snan:
    712         case float_class_qnan:
    713             parts_return_nan(a, status);
    714             return;
    715         case float_class_zero:
    716             return;
    717         case float_class_inf:
    718             if (unlikely(a->sign)) {
    719                 goto d_nan;
    720             }
    721             return;
    722         default:
    723             g_assert_not_reached();
    724         }
    725     }
    726 
    727     if (unlikely(a->sign)) {
    728         goto d_nan;
    729     }
    730 
    731     /*
    732      * Argument reduction.
    733      * x = 4^e frac; with integer e, and frac in [1, 4)
    734      * m = frac fixed point at bit 62, since we're in base 4.
    735      * If base-2 exponent is odd, exchange that for multiply by 2,
    736      * which results in no shift.
    737      */
    738     exp_odd = a->exp & 1;
    739     index = extract64(a->frac_hi, 57, 6) | (!exp_odd << 6);
    740     if (!exp_odd) {
    741         frac_shr(a, 1);
    742     }
    743 
    744     /*
    745      * Approximate r ~= 1/sqrt(m) and s ~= sqrt(m) when m in [1, 4).
    746      *
    747      * Initial estimate:
    748      * 7-bit lookup table (1-bit exponent and 6-bit significand).
    749      *
    750      * The relative error (e = r0*sqrt(m)-1) of a linear estimate
    751      * (r0 = a*m + b) is |e| < 0.085955 ~ 0x1.6p-4 at best;
    752      * a table lookup is faster and needs one less iteration.
    753      * The 7-bit table gives |e| < 0x1.fdp-9.
    754      *
    755      * A Newton-Raphson iteration for r is
    756      *   s = m*r
    757      *   d = s*r
    758      *   u = 3 - d
    759      *   r = r*u/2
    760      *
    761      * Fixed point representations:
    762      *   m, s, d, u, three are all 2.30; r is 0.32
    763      */
    764     m64 = a->frac_hi;
    765     m32 = m64 >> 32;
    766 
    767     r32 = rsqrt_tab[index] << 16;
    768     /* |r*sqrt(m) - 1| < 0x1.FDp-9 */
    769 
    770     s32 = ((uint64_t)m32 * r32) >> 32;
    771     d32 = ((uint64_t)s32 * r32) >> 32;
    772     u32 = three32 - d32;
    773 
    774     if (N == 64) {
    775         /* float64 or smaller */
    776 
    777         r32 = ((uint64_t)r32 * u32) >> 31;
    778         /* |r*sqrt(m) - 1| < 0x1.7Bp-16 */
    779 
    780         s32 = ((uint64_t)m32 * r32) >> 32;
    781         d32 = ((uint64_t)s32 * r32) >> 32;
    782         u32 = three32 - d32;
    783 
    784         if (fmt->frac_size <= 23) {
    785             /* float32 or smaller */
    786 
    787             s32 = ((uint64_t)s32 * u32) >> 32;  /* 3.29 */
    788             s32 = (s32 - 1) >> 6;               /* 9.23 */
    789             /* s < sqrt(m) < s + 0x1.08p-23 */
    790 
    791             /* compute nearest rounded result to 2.23 bits */
    792             uint32_t d0 = (m32 << 16) - s32 * s32;
    793             uint32_t d1 = s32 - d0;
    794             uint32_t d2 = d1 + s32 + 1;
    795             s32 += d1 >> 31;
    796             a->frac_hi = (uint64_t)s32 << (64 - 25);
    797 
    798             /* increment or decrement for inexact */
    799             if (d2 != 0) {
    800                 a->frac_hi += ((int32_t)(d1 ^ d2) < 0 ? -1 : 1);
    801             }
    802             goto done;
    803         }
    804 
    805         /* float64 */
    806 
    807         r64 = (uint64_t)r32 * u32 * 2;
    808         /* |r*sqrt(m) - 1| < 0x1.37-p29; convert to 64-bit arithmetic */
    809         mul64To128(m64, r64, &s64, &discard);
    810         mul64To128(s64, r64, &d64, &discard);
    811         u64 = three64 - d64;
    812 
    813         mul64To128(s64, u64, &s64, &discard);  /* 3.61 */
    814         s64 = (s64 - 2) >> 9;                  /* 12.52 */
    815 
    816         /* Compute nearest rounded result */
    817         uint64_t d0 = (m64 << 42) - s64 * s64;
    818         uint64_t d1 = s64 - d0;
    819         uint64_t d2 = d1 + s64 + 1;
    820         s64 += d1 >> 63;
    821         a->frac_hi = s64 << (64 - 54);
    822 
    823         /* increment or decrement for inexact */
    824         if (d2 != 0) {
    825             a->frac_hi += ((int64_t)(d1 ^ d2) < 0 ? -1 : 1);
    826         }
    827         goto done;
    828     }
    829 
    830     r64 = (uint64_t)r32 * u32 * 2;
    831     /* |r*sqrt(m) - 1| < 0x1.7Bp-16; convert to 64-bit arithmetic */
    832 
    833     mul64To128(m64, r64, &s64, &discard);
    834     mul64To128(s64, r64, &d64, &discard);
    835     u64 = three64 - d64;
    836     mul64To128(u64, r64, &r64, &discard);
    837     r64 <<= 1;
    838     /* |r*sqrt(m) - 1| < 0x1.a5p-31 */
    839 
    840     mul64To128(m64, r64, &s64, &discard);
    841     mul64To128(s64, r64, &d64, &discard);
    842     u64 = three64 - d64;
    843     mul64To128(u64, r64, &rh, &rl);
    844     add128(rh, rl, rh, rl, &rh, &rl);
    845     /* |r*sqrt(m) - 1| < 0x1.c001p-59; change to 128-bit arithmetic */
    846 
    847     mul128To256(a->frac_hi, a->frac_lo, rh, rl, &sh, &sl, &discard, &discard);
    848     mul128To256(sh, sl, rh, rl, &dh, &dl, &discard, &discard);
    849     sub128(three64, 0, dh, dl, &uh, &ul);
    850     mul128To256(uh, ul, sh, sl, &sh, &sl, &discard, &discard);  /* 3.125 */
    851     /* -0x1p-116 < s - sqrt(m) < 0x3.8001p-125 */
    852 
    853     sub128(sh, sl, 0, 4, &sh, &sl);
    854     shift128Right(sh, sl, 13, &sh, &sl);  /* 16.112 */
    855     /* s < sqrt(m) < s + 1ulp */
    856 
    857     /* Compute nearest rounded result */
    858     mul64To128(sl, sl, &d0h, &d0l);
    859     d0h += 2 * sh * sl;
    860     sub128(a->frac_lo << 34, 0, d0h, d0l, &d0h, &d0l);
    861     sub128(sh, sl, d0h, d0l, &d1h, &d1l);
    862     add128(sh, sl, 0, 1, &d2h, &d2l);
    863     add128(d2h, d2l, d1h, d1l, &d2h, &d2l);
    864     add128(sh, sl, 0, d1h >> 63, &sh, &sl);
    865     shift128Left(sh, sl, 128 - 114, &sh, &sl);
    866 
    867     /* increment or decrement for inexact */
    868     if (d2h | d2l) {
    869         if ((int64_t)(d1h ^ d2h) < 0) {
    870             sub128(sh, sl, 0, 1, &sh, &sl);
    871         } else {
    872             add128(sh, sl, 0, 1, &sh, &sl);
    873         }
    874     }
    875     a->frac_lo = sl;
    876     a->frac_hi = sh;
    877 
    878  done:
    879     /* Convert back from base 4 to base 2. */
    880     a->exp >>= 1;
    881     if (!(a->frac_hi & DECOMPOSED_IMPLICIT_BIT)) {
    882         frac_add(a, a, a);
    883     } else {
    884         a->exp += 1;
    885     }
    886     return;
    887 
    888  d_nan:
    889     float_raise(float_flag_invalid | float_flag_invalid_sqrt, status);
    890     parts_default_nan(a, status);
    891 }
    892 
    893 /*
    894  * Rounds the floating-point value `a' to an integer, and returns the
    895  * result as a floating-point value. The operation is performed
    896  * according to the IEC/IEEE Standard for Binary Floating-Point
    897  * Arithmetic.
    898  *
    899  * parts_round_to_int_normal is an internal helper function for
    900  * normal numbers only, returning true for inexact but not directly
    901  * raising float_flag_inexact.
    902  */
    903 static bool partsN(round_to_int_normal)(FloatPartsN *a, FloatRoundMode rmode,
    904                                         int scale, int frac_size)
    905 {
    906     uint64_t frac_lsb, frac_lsbm1, rnd_even_mask, rnd_mask, inc;
    907     int shift_adj;
    908 
    909     scale = MIN(MAX(scale, -0x10000), 0x10000);
    910     a->exp += scale;
    911 
    912     if (a->exp < 0) {
    913         bool one;
    914 
    915         /* All fractional */
    916         switch (rmode) {
    917         case float_round_nearest_even:
    918             one = false;
    919             if (a->exp == -1) {
    920                 FloatPartsN tmp;
    921                 /* Shift left one, discarding DECOMPOSED_IMPLICIT_BIT */
    922                 frac_add(&tmp, a, a);
    923                 /* Anything remaining means frac > 0.5. */
    924                 one = !frac_eqz(&tmp);
    925             }
    926             break;
    927         case float_round_ties_away:
    928             one = a->exp == -1;
    929             break;
    930         case float_round_to_zero:
    931             one = false;
    932             break;
    933         case float_round_up:
    934             one = !a->sign;
    935             break;
    936         case float_round_down:
    937             one = a->sign;
    938             break;
    939         case float_round_to_odd:
    940             one = true;
    941             break;
    942         default:
    943             g_assert_not_reached();
    944         }
    945 
    946         frac_clear(a);
    947         a->exp = 0;
    948         if (one) {
    949             a->frac_hi = DECOMPOSED_IMPLICIT_BIT;
    950         } else {
    951             a->cls = float_class_zero;
    952         }
    953         return true;
    954     }
    955 
    956     if (a->exp >= frac_size) {
    957         /* All integral */
    958         return false;
    959     }
    960 
    961     if (N > 64 && a->exp < N - 64) {
    962         /*
    963          * Rounding is not in the low word -- shift lsb to bit 2,
    964          * which leaves room for sticky and rounding bit.
    965          */
    966         shift_adj = (N - 1) - (a->exp + 2);
    967         frac_shrjam(a, shift_adj);
    968         frac_lsb = 1 << 2;
    969     } else {
    970         shift_adj = 0;
    971         frac_lsb = DECOMPOSED_IMPLICIT_BIT >> (a->exp & 63);
    972     }
    973 
    974     frac_lsbm1 = frac_lsb >> 1;
    975     rnd_mask = frac_lsb - 1;
    976     rnd_even_mask = rnd_mask | frac_lsb;
    977 
    978     if (!(a->frac_lo & rnd_mask)) {
    979         /* Fractional bits already clear, undo the shift above. */
    980         frac_shl(a, shift_adj);
    981         return false;
    982     }
    983 
    984     switch (rmode) {
    985     case float_round_nearest_even:
    986         inc = ((a->frac_lo & rnd_even_mask) != frac_lsbm1 ? frac_lsbm1 : 0);
    987         break;
    988     case float_round_ties_away:
    989         inc = frac_lsbm1;
    990         break;
    991     case float_round_to_zero:
    992         inc = 0;
    993         break;
    994     case float_round_up:
    995         inc = a->sign ? 0 : rnd_mask;
    996         break;
    997     case float_round_down:
    998         inc = a->sign ? rnd_mask : 0;
    999         break;
   1000     case float_round_to_odd:
   1001         inc = a->frac_lo & frac_lsb ? 0 : rnd_mask;
   1002         break;
   1003     default:
   1004         g_assert_not_reached();
   1005     }
   1006 
   1007     if (shift_adj == 0) {
   1008         if (frac_addi(a, a, inc)) {
   1009             frac_shr(a, 1);
   1010             a->frac_hi |= DECOMPOSED_IMPLICIT_BIT;
   1011             a->exp++;
   1012         }
   1013         a->frac_lo &= ~rnd_mask;
   1014     } else {
   1015         frac_addi(a, a, inc);
   1016         a->frac_lo &= ~rnd_mask;
   1017         /* Be careful shifting back, not to overflow */
   1018         frac_shl(a, shift_adj - 1);
   1019         if (a->frac_hi & DECOMPOSED_IMPLICIT_BIT) {
   1020             a->exp++;
   1021         } else {
   1022             frac_add(a, a, a);
   1023         }
   1024     }
   1025     return true;
   1026 }
   1027 
   1028 static void partsN(round_to_int)(FloatPartsN *a, FloatRoundMode rmode,
   1029                                  int scale, float_status *s,
   1030                                  const FloatFmt *fmt)
   1031 {
   1032     switch (a->cls) {
   1033     case float_class_qnan:
   1034     case float_class_snan:
   1035         parts_return_nan(a, s);
   1036         break;
   1037     case float_class_zero:
   1038     case float_class_inf:
   1039         break;
   1040     case float_class_normal:
   1041         if (parts_round_to_int_normal(a, rmode, scale, fmt->frac_size)) {
   1042             float_raise(float_flag_inexact, s);
   1043         }
   1044         break;
   1045     default:
   1046         g_assert_not_reached();
   1047     }
   1048 }
   1049 
   1050 /*
   1051  * Returns the result of converting the floating-point value `a' to
   1052  * the two's complement integer format. The conversion is performed
   1053  * according to the IEC/IEEE Standard for Binary Floating-Point
   1054  * Arithmetic---which means in particular that the conversion is
   1055  * rounded according to the current rounding mode. If `a' is a NaN,
   1056  * the largest positive integer is returned. Otherwise, if the
   1057  * conversion overflows, the largest integer with the same sign as `a'
   1058  * is returned.
   1059  */
   1060 static int64_t partsN(float_to_sint)(FloatPartsN *p, FloatRoundMode rmode,
   1061                                      int scale, int64_t min, int64_t max,
   1062                                      float_status *s)
   1063 {
   1064     int flags = 0;
   1065     uint64_t r;
   1066 
   1067     switch (p->cls) {
   1068     case float_class_snan:
   1069         flags |= float_flag_invalid_snan;
   1070         /* fall through */
   1071     case float_class_qnan:
   1072         flags |= float_flag_invalid;
   1073         r = max;
   1074         break;
   1075 
   1076     case float_class_inf:
   1077         flags = float_flag_invalid | float_flag_invalid_cvti;
   1078         r = p->sign ? min : max;
   1079         break;
   1080 
   1081     case float_class_zero:
   1082         return 0;
   1083 
   1084     case float_class_normal:
   1085         /* TODO: N - 2 is frac_size for rounding; could use input fmt. */
   1086         if (parts_round_to_int_normal(p, rmode, scale, N - 2)) {
   1087             flags = float_flag_inexact;
   1088         }
   1089 
   1090         if (p->exp <= DECOMPOSED_BINARY_POINT) {
   1091             r = p->frac_hi >> (DECOMPOSED_BINARY_POINT - p->exp);
   1092         } else {
   1093             r = UINT64_MAX;
   1094         }
   1095         if (p->sign) {
   1096             if (r <= -(uint64_t)min) {
   1097                 r = -r;
   1098             } else {
   1099                 flags = float_flag_invalid | float_flag_invalid_cvti;
   1100                 r = min;
   1101             }
   1102         } else if (r > max) {
   1103             flags = float_flag_invalid | float_flag_invalid_cvti;
   1104             r = max;
   1105         }
   1106         break;
   1107 
   1108     default:
   1109         g_assert_not_reached();
   1110     }
   1111 
   1112     float_raise(flags, s);
   1113     return r;
   1114 }
   1115 
   1116 /*
   1117  *  Returns the result of converting the floating-point value `a' to
   1118  *  the unsigned integer format. The conversion is performed according
   1119  *  to the IEC/IEEE Standard for Binary Floating-Point
   1120  *  Arithmetic---which means in particular that the conversion is
   1121  *  rounded according to the current rounding mode. If `a' is a NaN,
   1122  *  the largest unsigned integer is returned. Otherwise, if the
   1123  *  conversion overflows, the largest unsigned integer is returned. If
   1124  *  the 'a' is negative, the result is rounded and zero is returned;
   1125  *  values that do not round to zero will raise the inexact exception
   1126  *  flag.
   1127  */
   1128 static uint64_t partsN(float_to_uint)(FloatPartsN *p, FloatRoundMode rmode,
   1129                                       int scale, uint64_t max, float_status *s)
   1130 {
   1131     int flags = 0;
   1132     uint64_t r;
   1133 
   1134     switch (p->cls) {
   1135     case float_class_snan:
   1136         flags |= float_flag_invalid_snan;
   1137         /* fall through */
   1138     case float_class_qnan:
   1139         flags |= float_flag_invalid;
   1140         r = max;
   1141         break;
   1142 
   1143     case float_class_inf:
   1144         flags = float_flag_invalid | float_flag_invalid_cvti;
   1145         r = p->sign ? 0 : max;
   1146         break;
   1147 
   1148     case float_class_zero:
   1149         return 0;
   1150 
   1151     case float_class_normal:
   1152         /* TODO: N - 2 is frac_size for rounding; could use input fmt. */
   1153         if (parts_round_to_int_normal(p, rmode, scale, N - 2)) {
   1154             flags = float_flag_inexact;
   1155             if (p->cls == float_class_zero) {
   1156                 r = 0;
   1157                 break;
   1158             }
   1159         }
   1160 
   1161         if (p->sign) {
   1162             flags = float_flag_invalid | float_flag_invalid_cvti;
   1163             r = 0;
   1164         } else if (p->exp > DECOMPOSED_BINARY_POINT) {
   1165             flags = float_flag_invalid | float_flag_invalid_cvti;
   1166             r = max;
   1167         } else {
   1168             r = p->frac_hi >> (DECOMPOSED_BINARY_POINT - p->exp);
   1169             if (r > max) {
   1170                 flags = float_flag_invalid | float_flag_invalid_cvti;
   1171                 r = max;
   1172             }
   1173         }
   1174         break;
   1175 
   1176     default:
   1177         g_assert_not_reached();
   1178     }
   1179 
   1180     float_raise(flags, s);
   1181     return r;
   1182 }
   1183 
   1184 /*
   1185  * Integer to float conversions
   1186  *
   1187  * Returns the result of converting the two's complement integer `a'
   1188  * to the floating-point format. The conversion is performed according
   1189  * to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
   1190  */
   1191 static void partsN(sint_to_float)(FloatPartsN *p, int64_t a,
   1192                                   int scale, float_status *s)
   1193 {
   1194     uint64_t f = a;
   1195     int shift;
   1196 
   1197     memset(p, 0, sizeof(*p));
   1198 
   1199     if (a == 0) {
   1200         p->cls = float_class_zero;
   1201         return;
   1202     }
   1203 
   1204     p->cls = float_class_normal;
   1205     if (a < 0) {
   1206         f = -f;
   1207         p->sign = true;
   1208     }
   1209     shift = clz64(f);
   1210     scale = MIN(MAX(scale, -0x10000), 0x10000);
   1211 
   1212     p->exp = DECOMPOSED_BINARY_POINT - shift + scale;
   1213     p->frac_hi = f << shift;
   1214 }
   1215 
   1216 /*
   1217  * Unsigned Integer to float conversions
   1218  *
   1219  * Returns the result of converting the unsigned integer `a' to the
   1220  * floating-point format. The conversion is performed according to the
   1221  * IEC/IEEE Standard for Binary Floating-Point Arithmetic.
   1222  */
   1223 static void partsN(uint_to_float)(FloatPartsN *p, uint64_t a,
   1224                                   int scale, float_status *status)
   1225 {
   1226     memset(p, 0, sizeof(*p));
   1227 
   1228     if (a == 0) {
   1229         p->cls = float_class_zero;
   1230     } else {
   1231         int shift = clz64(a);
   1232         scale = MIN(MAX(scale, -0x10000), 0x10000);
   1233         p->cls = float_class_normal;
   1234         p->exp = DECOMPOSED_BINARY_POINT - shift + scale;
   1235         p->frac_hi = a << shift;
   1236     }
   1237 }
   1238 
   1239 /*
   1240  * Float min/max.
   1241  */
   1242 static FloatPartsN *partsN(minmax)(FloatPartsN *a, FloatPartsN *b,
   1243                                    float_status *s, int flags)
   1244 {
   1245     int ab_mask = float_cmask(a->cls) | float_cmask(b->cls);
   1246     int a_exp, b_exp, cmp;
   1247 
   1248     if (unlikely(ab_mask & float_cmask_anynan)) {
   1249         /*
   1250          * For minNum/maxNum (IEEE 754-2008)
   1251          * or minimumNumber/maximumNumber (IEEE 754-2019),
   1252          * if one operand is a QNaN, and the other
   1253          * operand is numerical, then return numerical argument.
   1254          */
   1255         if ((flags & (minmax_isnum | minmax_isnumber))
   1256             && !(ab_mask & float_cmask_snan)
   1257             && (ab_mask & ~float_cmask_qnan)) {
   1258             return is_nan(a->cls) ? b : a;
   1259         }
   1260 
   1261         /*
   1262          * In IEEE 754-2019, minNum, maxNum, minNumMag and maxNumMag
   1263          * are removed and replaced with minimum, minimumNumber, maximum
   1264          * and maximumNumber.
   1265          * minimumNumber/maximumNumber behavior for SNaN is changed to:
   1266          *   If both operands are NaNs, a QNaN is returned.
   1267          *   If either operand is a SNaN,
   1268          *   an invalid operation exception is signaled,
   1269          *   but unless both operands are NaNs,
   1270          *   the SNaN is otherwise ignored and not converted to a QNaN.
   1271          */
   1272         if ((flags & minmax_isnumber)
   1273             && (ab_mask & float_cmask_snan)
   1274             && (ab_mask & ~float_cmask_anynan)) {
   1275             float_raise(float_flag_invalid, s);
   1276             return is_nan(a->cls) ? b : a;
   1277         }
   1278 
   1279         return parts_pick_nan(a, b, s);
   1280     }
   1281 
   1282     a_exp = a->exp;
   1283     b_exp = b->exp;
   1284 
   1285     if (unlikely(ab_mask != float_cmask_normal)) {
   1286         switch (a->cls) {
   1287         case float_class_normal:
   1288             break;
   1289         case float_class_inf:
   1290             a_exp = INT16_MAX;
   1291             break;
   1292         case float_class_zero:
   1293             a_exp = INT16_MIN;
   1294             break;
   1295         default:
   1296             g_assert_not_reached();
   1297             break;
   1298         }
   1299         switch (b->cls) {
   1300         case float_class_normal:
   1301             break;
   1302         case float_class_inf:
   1303             b_exp = INT16_MAX;
   1304             break;
   1305         case float_class_zero:
   1306             b_exp = INT16_MIN;
   1307             break;
   1308         default:
   1309             g_assert_not_reached();
   1310             break;
   1311         }
   1312     }
   1313 
   1314     /* Compare magnitudes. */
   1315     cmp = a_exp - b_exp;
   1316     if (cmp == 0) {
   1317         cmp = frac_cmp(a, b);
   1318     }
   1319 
   1320     /*
   1321      * Take the sign into account.
   1322      * For ismag, only do this if the magnitudes are equal.
   1323      */
   1324     if (!(flags & minmax_ismag) || cmp == 0) {
   1325         if (a->sign != b->sign) {
   1326             /* For differing signs, the negative operand is less. */
   1327             cmp = a->sign ? -1 : 1;
   1328         } else if (a->sign) {
   1329             /* For two negative operands, invert the magnitude comparison. */
   1330             cmp = -cmp;
   1331         }
   1332     }
   1333 
   1334     if (flags & minmax_ismin) {
   1335         cmp = -cmp;
   1336     }
   1337     return cmp < 0 ? b : a;
   1338 }
   1339 
   1340 /*
   1341  * Floating point compare
   1342  */
   1343 static FloatRelation partsN(compare)(FloatPartsN *a, FloatPartsN *b,
   1344                                      float_status *s, bool is_quiet)
   1345 {
   1346     int ab_mask = float_cmask(a->cls) | float_cmask(b->cls);
   1347 
   1348     if (likely(ab_mask == float_cmask_normal)) {
   1349         FloatRelation cmp;
   1350 
   1351         if (a->sign != b->sign) {
   1352             goto a_sign;
   1353         }
   1354         if (a->exp == b->exp) {
   1355             cmp = frac_cmp(a, b);
   1356         } else if (a->exp < b->exp) {
   1357             cmp = float_relation_less;
   1358         } else {
   1359             cmp = float_relation_greater;
   1360         }
   1361         if (a->sign) {
   1362             cmp = -cmp;
   1363         }
   1364         return cmp;
   1365     }
   1366 
   1367     if (unlikely(ab_mask & float_cmask_anynan)) {
   1368         if (ab_mask & float_cmask_snan) {
   1369             float_raise(float_flag_invalid | float_flag_invalid_snan, s);
   1370         } else if (!is_quiet) {
   1371             float_raise(float_flag_invalid, s);
   1372         }
   1373         return float_relation_unordered;
   1374     }
   1375 
   1376     if (ab_mask & float_cmask_zero) {
   1377         if (ab_mask == float_cmask_zero) {
   1378             return float_relation_equal;
   1379         } else if (a->cls == float_class_zero) {
   1380             goto b_sign;
   1381         } else {
   1382             goto a_sign;
   1383         }
   1384     }
   1385 
   1386     if (ab_mask == float_cmask_inf) {
   1387         if (a->sign == b->sign) {
   1388             return float_relation_equal;
   1389         }
   1390     } else if (b->cls == float_class_inf) {
   1391         goto b_sign;
   1392     } else {
   1393         g_assert(a->cls == float_class_inf);
   1394     }
   1395 
   1396  a_sign:
   1397     return a->sign ? float_relation_less : float_relation_greater;
   1398  b_sign:
   1399     return b->sign ? float_relation_greater : float_relation_less;
   1400 }
   1401 
   1402 /*
   1403  * Multiply A by 2 raised to the power N.
   1404  */
   1405 static void partsN(scalbn)(FloatPartsN *a, int n, float_status *s)
   1406 {
   1407     switch (a->cls) {
   1408     case float_class_snan:
   1409     case float_class_qnan:
   1410         parts_return_nan(a, s);
   1411         break;
   1412     case float_class_zero:
   1413     case float_class_inf:
   1414         break;
   1415     case float_class_normal:
   1416         a->exp += MIN(MAX(n, -0x10000), 0x10000);
   1417         break;
   1418     default:
   1419         g_assert_not_reached();
   1420     }
   1421 }
   1422 
   1423 /*
   1424  * Return log2(A)
   1425  */
   1426 static void partsN(log2)(FloatPartsN *a, float_status *s, const FloatFmt *fmt)
   1427 {
   1428     uint64_t a0, a1, r, t, ign;
   1429     FloatPartsN f;
   1430     int i, n, a_exp, f_exp;
   1431 
   1432     if (unlikely(a->cls != float_class_normal)) {
   1433         switch (a->cls) {
   1434         case float_class_snan:
   1435         case float_class_qnan:
   1436             parts_return_nan(a, s);
   1437             return;
   1438         case float_class_zero:
   1439             float_raise(float_flag_divbyzero, s);
   1440             /* log2(0) = -inf */
   1441             a->cls = float_class_inf;
   1442             a->sign = 1;
   1443             return;
   1444         case float_class_inf:
   1445             if (unlikely(a->sign)) {
   1446                 goto d_nan;
   1447             }
   1448             return;
   1449         default:
   1450             break;
   1451         }
   1452         g_assert_not_reached();
   1453     }
   1454     if (unlikely(a->sign)) {
   1455         goto d_nan;
   1456     }
   1457 
   1458     /* TODO: This algorithm looses bits too quickly for float128. */
   1459     g_assert(N == 64);
   1460 
   1461     a_exp = a->exp;
   1462     f_exp = -1;
   1463 
   1464     r = 0;
   1465     t = DECOMPOSED_IMPLICIT_BIT;
   1466     a0 = a->frac_hi;
   1467     a1 = 0;
   1468 
   1469     n = fmt->frac_size + 2;
   1470     if (unlikely(a_exp == -1)) {
   1471         /*
   1472          * When a_exp == -1, we're computing the log2 of a value [0.5,1.0).
   1473          * When the value is very close to 1.0, there are lots of 1's in
   1474          * the msb parts of the fraction.  At the end, when we subtract
   1475          * this value from -1.0, we can see a catastrophic loss of precision,
   1476          * as 0x800..000 - 0x7ff..ffx becomes 0x000..00y, leaving only the
   1477          * bits of y in the final result.  To minimize this, compute as many
   1478          * digits as we can.
   1479          * ??? This case needs another algorithm to avoid this.
   1480          */
   1481         n = fmt->frac_size * 2 + 2;
   1482         /* Don't compute a value overlapping the sticky bit */
   1483         n = MIN(n, 62);
   1484     }
   1485 
   1486     for (i = 0; i < n; i++) {
   1487         if (a1) {
   1488             mul128To256(a0, a1, a0, a1, &a0, &a1, &ign, &ign);
   1489         } else if (a0 & 0xffffffffull) {
   1490             mul64To128(a0, a0, &a0, &a1);
   1491         } else if (a0 & ~DECOMPOSED_IMPLICIT_BIT) {
   1492             a0 >>= 32;
   1493             a0 *= a0;
   1494         } else {
   1495             goto exact;
   1496         }
   1497 
   1498         if (a0 & DECOMPOSED_IMPLICIT_BIT) {
   1499             if (unlikely(a_exp == 0 && r == 0)) {
   1500                 /*
   1501                  * When a_exp == 0, we're computing the log2 of a value
   1502                  * [1.0,2.0).  When the value is very close to 1.0, there
   1503                  * are lots of 0's in the msb parts of the fraction.
   1504                  * We need to compute more digits to produce a correct
   1505                  * result -- restart at the top of the fraction.
   1506                  * ??? This is likely to lose precision quickly, as for
   1507                  * float128; we may need another method.
   1508                  */
   1509                 f_exp -= i;
   1510                 t = r = DECOMPOSED_IMPLICIT_BIT;
   1511                 i = 0;
   1512             } else {
   1513                 r |= t;
   1514             }
   1515         } else {
   1516             add128(a0, a1, a0, a1, &a0, &a1);
   1517         }
   1518         t >>= 1;
   1519     }
   1520 
   1521     /* Set sticky for inexact. */
   1522     r |= (a1 || a0 & ~DECOMPOSED_IMPLICIT_BIT);
   1523 
   1524  exact:
   1525     parts_sint_to_float(a, a_exp, 0, s);
   1526     if (r == 0) {
   1527         return;
   1528     }
   1529 
   1530     memset(&f, 0, sizeof(f));
   1531     f.cls = float_class_normal;
   1532     f.frac_hi = r;
   1533     f.exp = f_exp - frac_normalize(&f);
   1534 
   1535     if (a_exp < 0) {
   1536         parts_sub_normal(a, &f);
   1537     } else if (a_exp > 0) {
   1538         parts_add_normal(a, &f);
   1539     } else {
   1540         *a = f;
   1541     }
   1542     return;
   1543 
   1544  d_nan:
   1545     float_raise(float_flag_invalid, s);
   1546     parts_default_nan(a, s);
   1547 }