qemu

FORK: QEMU emulator
git clone https://git.neptards.moe/neptards/qemu.git
Log | Files | Refs | Submodules | LICENSE

vfp_helper.c (41901B)


      1 /*
      2  * ARM VFP floating-point operations
      3  *
      4  *  Copyright (c) 2003 Fabrice Bellard
      5  *
      6  * This library is free software; you can redistribute it and/or
      7  * modify it under the terms of the GNU Lesser General Public
      8  * License as published by the Free Software Foundation; either
      9  * version 2.1 of the License, or (at your option) any later version.
     10  *
     11  * This library is distributed in the hope that it will be useful,
     12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
     13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
     14  * Lesser General Public License for more details.
     15  *
     16  * You should have received a copy of the GNU Lesser General Public
     17  * License along with this library; if not, see <http://www.gnu.org/licenses/>.
     18  */
     19 
     20 #include "qemu/osdep.h"
     21 #include "cpu.h"
     22 #include "exec/helper-proto.h"
     23 #include "internals.h"
     24 #ifdef CONFIG_TCG
     25 #include "qemu/log.h"
     26 #include "fpu/softfloat.h"
     27 #endif
     28 
     29 /* VFP support.  We follow the convention used for VFP instructions:
     30    Single precision routines have a "s" suffix, double precision a
     31    "d" suffix.  */
     32 
     33 #ifdef CONFIG_TCG
     34 
     35 /* Convert host exception flags to vfp form.  */
     36 static inline int vfp_exceptbits_from_host(int host_bits)
     37 {
     38     int target_bits = 0;
     39 
     40     if (host_bits & float_flag_invalid) {
     41         target_bits |= 1;
     42     }
     43     if (host_bits & float_flag_divbyzero) {
     44         target_bits |= 2;
     45     }
     46     if (host_bits & float_flag_overflow) {
     47         target_bits |= 4;
     48     }
     49     if (host_bits & (float_flag_underflow | float_flag_output_denormal)) {
     50         target_bits |= 8;
     51     }
     52     if (host_bits & float_flag_inexact) {
     53         target_bits |= 0x10;
     54     }
     55     if (host_bits & float_flag_input_denormal) {
     56         target_bits |= 0x80;
     57     }
     58     return target_bits;
     59 }
     60 
     61 /* Convert vfp exception flags to target form.  */
     62 static inline int vfp_exceptbits_to_host(int target_bits)
     63 {
     64     int host_bits = 0;
     65 
     66     if (target_bits & 1) {
     67         host_bits |= float_flag_invalid;
     68     }
     69     if (target_bits & 2) {
     70         host_bits |= float_flag_divbyzero;
     71     }
     72     if (target_bits & 4) {
     73         host_bits |= float_flag_overflow;
     74     }
     75     if (target_bits & 8) {
     76         host_bits |= float_flag_underflow;
     77     }
     78     if (target_bits & 0x10) {
     79         host_bits |= float_flag_inexact;
     80     }
     81     if (target_bits & 0x80) {
     82         host_bits |= float_flag_input_denormal;
     83     }
     84     return host_bits;
     85 }
     86 
     87 static uint32_t vfp_get_fpscr_from_host(CPUARMState *env)
     88 {
     89     uint32_t i;
     90 
     91     i = get_float_exception_flags(&env->vfp.fp_status);
     92     i |= get_float_exception_flags(&env->vfp.standard_fp_status);
     93     /* FZ16 does not generate an input denormal exception.  */
     94     i |= (get_float_exception_flags(&env->vfp.fp_status_f16)
     95           & ~float_flag_input_denormal);
     96     i |= (get_float_exception_flags(&env->vfp.standard_fp_status_f16)
     97           & ~float_flag_input_denormal);
     98     return vfp_exceptbits_from_host(i);
     99 }
    100 
    101 static void vfp_set_fpscr_to_host(CPUARMState *env, uint32_t val)
    102 {
    103     int i;
    104     uint32_t changed = env->vfp.xregs[ARM_VFP_FPSCR];
    105 
    106     changed ^= val;
    107     if (changed & (3 << 22)) {
    108         i = (val >> 22) & 3;
    109         switch (i) {
    110         case FPROUNDING_TIEEVEN:
    111             i = float_round_nearest_even;
    112             break;
    113         case FPROUNDING_POSINF:
    114             i = float_round_up;
    115             break;
    116         case FPROUNDING_NEGINF:
    117             i = float_round_down;
    118             break;
    119         case FPROUNDING_ZERO:
    120             i = float_round_to_zero;
    121             break;
    122         }
    123         set_float_rounding_mode(i, &env->vfp.fp_status);
    124         set_float_rounding_mode(i, &env->vfp.fp_status_f16);
    125     }
    126     if (changed & FPCR_FZ16) {
    127         bool ftz_enabled = val & FPCR_FZ16;
    128         set_flush_to_zero(ftz_enabled, &env->vfp.fp_status_f16);
    129         set_flush_to_zero(ftz_enabled, &env->vfp.standard_fp_status_f16);
    130         set_flush_inputs_to_zero(ftz_enabled, &env->vfp.fp_status_f16);
    131         set_flush_inputs_to_zero(ftz_enabled, &env->vfp.standard_fp_status_f16);
    132     }
    133     if (changed & FPCR_FZ) {
    134         bool ftz_enabled = val & FPCR_FZ;
    135         set_flush_to_zero(ftz_enabled, &env->vfp.fp_status);
    136         set_flush_inputs_to_zero(ftz_enabled, &env->vfp.fp_status);
    137     }
    138     if (changed & FPCR_DN) {
    139         bool dnan_enabled = val & FPCR_DN;
    140         set_default_nan_mode(dnan_enabled, &env->vfp.fp_status);
    141         set_default_nan_mode(dnan_enabled, &env->vfp.fp_status_f16);
    142     }
    143 
    144     /*
    145      * The exception flags are ORed together when we read fpscr so we
    146      * only need to preserve the current state in one of our
    147      * float_status values.
    148      */
    149     i = vfp_exceptbits_to_host(val);
    150     set_float_exception_flags(i, &env->vfp.fp_status);
    151     set_float_exception_flags(0, &env->vfp.fp_status_f16);
    152     set_float_exception_flags(0, &env->vfp.standard_fp_status);
    153     set_float_exception_flags(0, &env->vfp.standard_fp_status_f16);
    154 }
    155 
    156 #else
    157 
    158 static uint32_t vfp_get_fpscr_from_host(CPUARMState *env)
    159 {
    160     return 0;
    161 }
    162 
    163 static void vfp_set_fpscr_to_host(CPUARMState *env, uint32_t val)
    164 {
    165 }
    166 
    167 #endif
    168 
    169 uint32_t HELPER(vfp_get_fpscr)(CPUARMState *env)
    170 {
    171     uint32_t i, fpscr;
    172 
    173     fpscr = env->vfp.xregs[ARM_VFP_FPSCR]
    174             | (env->vfp.vec_len << 16)
    175             | (env->vfp.vec_stride << 20);
    176 
    177     /*
    178      * M-profile LTPSIZE overlaps A-profile Stride; whichever of the
    179      * two is not applicable to this CPU will always be zero.
    180      */
    181     fpscr |= env->v7m.ltpsize << 16;
    182 
    183     fpscr |= vfp_get_fpscr_from_host(env);
    184 
    185     i = env->vfp.qc[0] | env->vfp.qc[1] | env->vfp.qc[2] | env->vfp.qc[3];
    186     fpscr |= i ? FPCR_QC : 0;
    187 
    188     return fpscr;
    189 }
    190 
    191 uint32_t vfp_get_fpscr(CPUARMState *env)
    192 {
    193     return HELPER(vfp_get_fpscr)(env);
    194 }
    195 
    196 void HELPER(vfp_set_fpscr)(CPUARMState *env, uint32_t val)
    197 {
    198     ARMCPU *cpu = env_archcpu(env);
    199 
    200     /* When ARMv8.2-FP16 is not supported, FZ16 is RES0.  */
    201     if (!cpu_isar_feature(any_fp16, cpu)) {
    202         val &= ~FPCR_FZ16;
    203     }
    204 
    205     vfp_set_fpscr_to_host(env, val);
    206 
    207     if (!arm_feature(env, ARM_FEATURE_M)) {
    208         /*
    209          * Short-vector length and stride; on M-profile these bits
    210          * are used for different purposes.
    211          * We can't make this conditional be "if MVFR0.FPShVec != 0",
    212          * because in v7A no-short-vector-support cores still had to
    213          * allow Stride/Len to be written with the only effect that
    214          * some insns are required to UNDEF if the guest sets them.
    215          */
    216         env->vfp.vec_len = extract32(val, 16, 3);
    217         env->vfp.vec_stride = extract32(val, 20, 2);
    218     } else if (cpu_isar_feature(aa32_mve, cpu)) {
    219         env->v7m.ltpsize = extract32(val, FPCR_LTPSIZE_SHIFT,
    220                                      FPCR_LTPSIZE_LENGTH);
    221     }
    222 
    223     if (arm_feature(env, ARM_FEATURE_NEON) ||
    224         cpu_isar_feature(aa32_mve, cpu)) {
    225         /*
    226          * The bit we set within fpscr_q is arbitrary; the register as a
    227          * whole being zero/non-zero is what counts.
    228          * TODO: M-profile MVE also has a QC bit.
    229          */
    230         env->vfp.qc[0] = val & FPCR_QC;
    231         env->vfp.qc[1] = 0;
    232         env->vfp.qc[2] = 0;
    233         env->vfp.qc[3] = 0;
    234     }
    235 
    236     /*
    237      * We don't implement trapped exception handling, so the
    238      * trap enable bits, IDE|IXE|UFE|OFE|DZE|IOE are all RAZ/WI (not RES0!)
    239      *
    240      * The exception flags IOC|DZC|OFC|UFC|IXC|IDC are stored in
    241      * fp_status; QC, Len and Stride are stored separately earlier.
    242      * Clear out all of those and the RES0 bits: only NZCV, AHP, DN,
    243      * FZ, RMode and FZ16 are kept in vfp.xregs[FPSCR].
    244      */
    245     env->vfp.xregs[ARM_VFP_FPSCR] = val & 0xf7c80000;
    246 }
    247 
    248 void vfp_set_fpscr(CPUARMState *env, uint32_t val)
    249 {
    250     HELPER(vfp_set_fpscr)(env, val);
    251 }
    252 
    253 #ifdef CONFIG_TCG
    254 
    255 #define VFP_HELPER(name, p) HELPER(glue(glue(vfp_,name),p))
    256 
    257 #define VFP_BINOP(name) \
    258 dh_ctype_f16 VFP_HELPER(name, h)(dh_ctype_f16 a, dh_ctype_f16 b, void *fpstp) \
    259 { \
    260     float_status *fpst = fpstp; \
    261     return float16_ ## name(a, b, fpst); \
    262 } \
    263 float32 VFP_HELPER(name, s)(float32 a, float32 b, void *fpstp) \
    264 { \
    265     float_status *fpst = fpstp; \
    266     return float32_ ## name(a, b, fpst); \
    267 } \
    268 float64 VFP_HELPER(name, d)(float64 a, float64 b, void *fpstp) \
    269 { \
    270     float_status *fpst = fpstp; \
    271     return float64_ ## name(a, b, fpst); \
    272 }
    273 VFP_BINOP(add)
    274 VFP_BINOP(sub)
    275 VFP_BINOP(mul)
    276 VFP_BINOP(div)
    277 VFP_BINOP(min)
    278 VFP_BINOP(max)
    279 VFP_BINOP(minnum)
    280 VFP_BINOP(maxnum)
    281 #undef VFP_BINOP
    282 
    283 dh_ctype_f16 VFP_HELPER(neg, h)(dh_ctype_f16 a)
    284 {
    285     return float16_chs(a);
    286 }
    287 
    288 float32 VFP_HELPER(neg, s)(float32 a)
    289 {
    290     return float32_chs(a);
    291 }
    292 
    293 float64 VFP_HELPER(neg, d)(float64 a)
    294 {
    295     return float64_chs(a);
    296 }
    297 
    298 dh_ctype_f16 VFP_HELPER(abs, h)(dh_ctype_f16 a)
    299 {
    300     return float16_abs(a);
    301 }
    302 
    303 float32 VFP_HELPER(abs, s)(float32 a)
    304 {
    305     return float32_abs(a);
    306 }
    307 
    308 float64 VFP_HELPER(abs, d)(float64 a)
    309 {
    310     return float64_abs(a);
    311 }
    312 
    313 dh_ctype_f16 VFP_HELPER(sqrt, h)(dh_ctype_f16 a, CPUARMState *env)
    314 {
    315     return float16_sqrt(a, &env->vfp.fp_status_f16);
    316 }
    317 
    318 float32 VFP_HELPER(sqrt, s)(float32 a, CPUARMState *env)
    319 {
    320     return float32_sqrt(a, &env->vfp.fp_status);
    321 }
    322 
    323 float64 VFP_HELPER(sqrt, d)(float64 a, CPUARMState *env)
    324 {
    325     return float64_sqrt(a, &env->vfp.fp_status);
    326 }
    327 
    328 static void softfloat_to_vfp_compare(CPUARMState *env, FloatRelation cmp)
    329 {
    330     uint32_t flags;
    331     switch (cmp) {
    332     case float_relation_equal:
    333         flags = 0x6;
    334         break;
    335     case float_relation_less:
    336         flags = 0x8;
    337         break;
    338     case float_relation_greater:
    339         flags = 0x2;
    340         break;
    341     case float_relation_unordered:
    342         flags = 0x3;
    343         break;
    344     default:
    345         g_assert_not_reached();
    346     }
    347     env->vfp.xregs[ARM_VFP_FPSCR] =
    348         deposit32(env->vfp.xregs[ARM_VFP_FPSCR], 28, 4, flags);
    349 }
    350 
    351 /* XXX: check quiet/signaling case */
    352 #define DO_VFP_cmp(P, FLOATTYPE, ARGTYPE, FPST) \
    353 void VFP_HELPER(cmp, P)(ARGTYPE a, ARGTYPE b, CPUARMState *env)  \
    354 { \
    355     softfloat_to_vfp_compare(env, \
    356         FLOATTYPE ## _compare_quiet(a, b, &env->vfp.FPST)); \
    357 } \
    358 void VFP_HELPER(cmpe, P)(ARGTYPE a, ARGTYPE b, CPUARMState *env) \
    359 { \
    360     softfloat_to_vfp_compare(env, \
    361         FLOATTYPE ## _compare(a, b, &env->vfp.FPST)); \
    362 }
    363 DO_VFP_cmp(h, float16, dh_ctype_f16, fp_status_f16)
    364 DO_VFP_cmp(s, float32, float32, fp_status)
    365 DO_VFP_cmp(d, float64, float64, fp_status)
    366 #undef DO_VFP_cmp
    367 
    368 /* Integer to float and float to integer conversions */
    369 
    370 #define CONV_ITOF(name, ftype, fsz, sign)                           \
    371 ftype HELPER(name)(uint32_t x, void *fpstp)                         \
    372 {                                                                   \
    373     float_status *fpst = fpstp;                                     \
    374     return sign##int32_to_##float##fsz((sign##int32_t)x, fpst);     \
    375 }
    376 
    377 #define CONV_FTOI(name, ftype, fsz, sign, round)                \
    378 sign##int32_t HELPER(name)(ftype x, void *fpstp)                \
    379 {                                                               \
    380     float_status *fpst = fpstp;                                 \
    381     if (float##fsz##_is_any_nan(x)) {                           \
    382         float_raise(float_flag_invalid, fpst);                  \
    383         return 0;                                               \
    384     }                                                           \
    385     return float##fsz##_to_##sign##int32##round(x, fpst);       \
    386 }
    387 
    388 #define FLOAT_CONVS(name, p, ftype, fsz, sign)            \
    389     CONV_ITOF(vfp_##name##to##p, ftype, fsz, sign)        \
    390     CONV_FTOI(vfp_to##name##p, ftype, fsz, sign, )        \
    391     CONV_FTOI(vfp_to##name##z##p, ftype, fsz, sign, _round_to_zero)
    392 
    393 FLOAT_CONVS(si, h, uint32_t, 16, )
    394 FLOAT_CONVS(si, s, float32, 32, )
    395 FLOAT_CONVS(si, d, float64, 64, )
    396 FLOAT_CONVS(ui, h, uint32_t, 16, u)
    397 FLOAT_CONVS(ui, s, float32, 32, u)
    398 FLOAT_CONVS(ui, d, float64, 64, u)
    399 
    400 #undef CONV_ITOF
    401 #undef CONV_FTOI
    402 #undef FLOAT_CONVS
    403 
    404 /* floating point conversion */
    405 float64 VFP_HELPER(fcvtd, s)(float32 x, CPUARMState *env)
    406 {
    407     return float32_to_float64(x, &env->vfp.fp_status);
    408 }
    409 
    410 float32 VFP_HELPER(fcvts, d)(float64 x, CPUARMState *env)
    411 {
    412     return float64_to_float32(x, &env->vfp.fp_status);
    413 }
    414 
    415 uint32_t HELPER(bfcvt)(float32 x, void *status)
    416 {
    417     return float32_to_bfloat16(x, status);
    418 }
    419 
    420 uint32_t HELPER(bfcvt_pair)(uint64_t pair, void *status)
    421 {
    422     bfloat16 lo = float32_to_bfloat16(extract64(pair, 0, 32), status);
    423     bfloat16 hi = float32_to_bfloat16(extract64(pair, 32, 32), status);
    424     return deposit32(lo, 16, 16, hi);
    425 }
    426 
    427 /*
    428  * VFP3 fixed point conversion. The AArch32 versions of fix-to-float
    429  * must always round-to-nearest; the AArch64 ones honour the FPSCR
    430  * rounding mode. (For AArch32 Neon the standard-FPSCR is set to
    431  * round-to-nearest so either helper will work.) AArch32 float-to-fix
    432  * must round-to-zero.
    433  */
    434 #define VFP_CONV_FIX_FLOAT(name, p, fsz, ftype, isz, itype)            \
    435 ftype HELPER(vfp_##name##to##p)(uint##isz##_t  x, uint32_t shift,      \
    436                                      void *fpstp) \
    437 { return itype##_to_##float##fsz##_scalbn(x, -shift, fpstp); }
    438 
    439 #define VFP_CONV_FIX_FLOAT_ROUND(name, p, fsz, ftype, isz, itype)      \
    440     ftype HELPER(vfp_##name##to##p##_round_to_nearest)(uint##isz##_t  x, \
    441                                                      uint32_t shift,   \
    442                                                      void *fpstp)      \
    443     {                                                                  \
    444         ftype ret;                                                     \
    445         float_status *fpst = fpstp;                                    \
    446         FloatRoundMode oldmode = fpst->float_rounding_mode;            \
    447         fpst->float_rounding_mode = float_round_nearest_even;          \
    448         ret = itype##_to_##float##fsz##_scalbn(x, -shift, fpstp);      \
    449         fpst->float_rounding_mode = oldmode;                           \
    450         return ret;                                                    \
    451     }
    452 
    453 #define VFP_CONV_FLOAT_FIX_ROUND(name, p, fsz, ftype, isz, itype, ROUND, suff) \
    454 uint##isz##_t HELPER(vfp_to##name##p##suff)(ftype x, uint32_t shift,      \
    455                                             void *fpst)                   \
    456 {                                                                         \
    457     if (unlikely(float##fsz##_is_any_nan(x))) {                           \
    458         float_raise(float_flag_invalid, fpst);                            \
    459         return 0;                                                         \
    460     }                                                                     \
    461     return float##fsz##_to_##itype##_scalbn(x, ROUND, shift, fpst);       \
    462 }
    463 
    464 #define VFP_CONV_FIX(name, p, fsz, ftype, isz, itype)            \
    465 VFP_CONV_FIX_FLOAT(name, p, fsz, ftype, isz, itype)              \
    466 VFP_CONV_FIX_FLOAT_ROUND(name, p, fsz, ftype, isz, itype)        \
    467 VFP_CONV_FLOAT_FIX_ROUND(name, p, fsz, ftype, isz, itype,        \
    468                          float_round_to_zero, _round_to_zero)    \
    469 VFP_CONV_FLOAT_FIX_ROUND(name, p, fsz, ftype, isz, itype,        \
    470                          get_float_rounding_mode(fpst), )
    471 
    472 #define VFP_CONV_FIX_A64(name, p, fsz, ftype, isz, itype)        \
    473 VFP_CONV_FIX_FLOAT(name, p, fsz, ftype, isz, itype)              \
    474 VFP_CONV_FLOAT_FIX_ROUND(name, p, fsz, ftype, isz, itype,        \
    475                          get_float_rounding_mode(fpst), )
    476 
    477 VFP_CONV_FIX(sh, d, 64, float64, 64, int16)
    478 VFP_CONV_FIX(sl, d, 64, float64, 64, int32)
    479 VFP_CONV_FIX_A64(sq, d, 64, float64, 64, int64)
    480 VFP_CONV_FIX(uh, d, 64, float64, 64, uint16)
    481 VFP_CONV_FIX(ul, d, 64, float64, 64, uint32)
    482 VFP_CONV_FIX_A64(uq, d, 64, float64, 64, uint64)
    483 VFP_CONV_FIX(sh, s, 32, float32, 32, int16)
    484 VFP_CONV_FIX(sl, s, 32, float32, 32, int32)
    485 VFP_CONV_FIX_A64(sq, s, 32, float32, 64, int64)
    486 VFP_CONV_FIX(uh, s, 32, float32, 32, uint16)
    487 VFP_CONV_FIX(ul, s, 32, float32, 32, uint32)
    488 VFP_CONV_FIX_A64(uq, s, 32, float32, 64, uint64)
    489 VFP_CONV_FIX(sh, h, 16, dh_ctype_f16, 32, int16)
    490 VFP_CONV_FIX(sl, h, 16, dh_ctype_f16, 32, int32)
    491 VFP_CONV_FIX_A64(sq, h, 16, dh_ctype_f16, 64, int64)
    492 VFP_CONV_FIX(uh, h, 16, dh_ctype_f16, 32, uint16)
    493 VFP_CONV_FIX(ul, h, 16, dh_ctype_f16, 32, uint32)
    494 VFP_CONV_FIX_A64(uq, h, 16, dh_ctype_f16, 64, uint64)
    495 
    496 #undef VFP_CONV_FIX
    497 #undef VFP_CONV_FIX_FLOAT
    498 #undef VFP_CONV_FLOAT_FIX_ROUND
    499 #undef VFP_CONV_FIX_A64
    500 
    501 /* Set the current fp rounding mode and return the old one.
    502  * The argument is a softfloat float_round_ value.
    503  */
    504 uint32_t HELPER(set_rmode)(uint32_t rmode, void *fpstp)
    505 {
    506     float_status *fp_status = fpstp;
    507 
    508     uint32_t prev_rmode = get_float_rounding_mode(fp_status);
    509     set_float_rounding_mode(rmode, fp_status);
    510 
    511     return prev_rmode;
    512 }
    513 
    514 /* Half precision conversions.  */
    515 float32 HELPER(vfp_fcvt_f16_to_f32)(uint32_t a, void *fpstp, uint32_t ahp_mode)
    516 {
    517     /* Squash FZ16 to 0 for the duration of conversion.  In this case,
    518      * it would affect flushing input denormals.
    519      */
    520     float_status *fpst = fpstp;
    521     bool save = get_flush_inputs_to_zero(fpst);
    522     set_flush_inputs_to_zero(false, fpst);
    523     float32 r = float16_to_float32(a, !ahp_mode, fpst);
    524     set_flush_inputs_to_zero(save, fpst);
    525     return r;
    526 }
    527 
    528 uint32_t HELPER(vfp_fcvt_f32_to_f16)(float32 a, void *fpstp, uint32_t ahp_mode)
    529 {
    530     /* Squash FZ16 to 0 for the duration of conversion.  In this case,
    531      * it would affect flushing output denormals.
    532      */
    533     float_status *fpst = fpstp;
    534     bool save = get_flush_to_zero(fpst);
    535     set_flush_to_zero(false, fpst);
    536     float16 r = float32_to_float16(a, !ahp_mode, fpst);
    537     set_flush_to_zero(save, fpst);
    538     return r;
    539 }
    540 
    541 float64 HELPER(vfp_fcvt_f16_to_f64)(uint32_t a, void *fpstp, uint32_t ahp_mode)
    542 {
    543     /* Squash FZ16 to 0 for the duration of conversion.  In this case,
    544      * it would affect flushing input denormals.
    545      */
    546     float_status *fpst = fpstp;
    547     bool save = get_flush_inputs_to_zero(fpst);
    548     set_flush_inputs_to_zero(false, fpst);
    549     float64 r = float16_to_float64(a, !ahp_mode, fpst);
    550     set_flush_inputs_to_zero(save, fpst);
    551     return r;
    552 }
    553 
    554 uint32_t HELPER(vfp_fcvt_f64_to_f16)(float64 a, void *fpstp, uint32_t ahp_mode)
    555 {
    556     /* Squash FZ16 to 0 for the duration of conversion.  In this case,
    557      * it would affect flushing output denormals.
    558      */
    559     float_status *fpst = fpstp;
    560     bool save = get_flush_to_zero(fpst);
    561     set_flush_to_zero(false, fpst);
    562     float16 r = float64_to_float16(a, !ahp_mode, fpst);
    563     set_flush_to_zero(save, fpst);
    564     return r;
    565 }
    566 
    567 /* NEON helpers.  */
    568 
    569 /* Constants 256 and 512 are used in some helpers; we avoid relying on
    570  * int->float conversions at run-time.  */
    571 #define float64_256 make_float64(0x4070000000000000LL)
    572 #define float64_512 make_float64(0x4080000000000000LL)
    573 #define float16_maxnorm make_float16(0x7bff)
    574 #define float32_maxnorm make_float32(0x7f7fffff)
    575 #define float64_maxnorm make_float64(0x7fefffffffffffffLL)
    576 
    577 /* Reciprocal functions
    578  *
    579  * The algorithm that must be used to calculate the estimate
    580  * is specified by the ARM ARM, see FPRecipEstimate()/RecipEstimate
    581  */
    582 
    583 /* See RecipEstimate()
    584  *
    585  * input is a 9 bit fixed point number
    586  * input range 256 .. 511 for a number from 0.5 <= x < 1.0.
    587  * result range 256 .. 511 for a number from 1.0 to 511/256.
    588  */
    589 
    590 static int recip_estimate(int input)
    591 {
    592     int a, b, r;
    593     assert(256 <= input && input < 512);
    594     a = (input * 2) + 1;
    595     b = (1 << 19) / a;
    596     r = (b + 1) >> 1;
    597     assert(256 <= r && r < 512);
    598     return r;
    599 }
    600 
    601 /*
    602  * Common wrapper to call recip_estimate
    603  *
    604  * The parameters are exponent and 64 bit fraction (without implicit
    605  * bit) where the binary point is nominally at bit 52. Returns a
    606  * float64 which can then be rounded to the appropriate size by the
    607  * callee.
    608  */
    609 
    610 static uint64_t call_recip_estimate(int *exp, int exp_off, uint64_t frac)
    611 {
    612     uint32_t scaled, estimate;
    613     uint64_t result_frac;
    614     int result_exp;
    615 
    616     /* Handle sub-normals */
    617     if (*exp == 0) {
    618         if (extract64(frac, 51, 1) == 0) {
    619             *exp = -1;
    620             frac <<= 2;
    621         } else {
    622             frac <<= 1;
    623         }
    624     }
    625 
    626     /* scaled = UInt('1':fraction<51:44>) */
    627     scaled = deposit32(1 << 8, 0, 8, extract64(frac, 44, 8));
    628     estimate = recip_estimate(scaled);
    629 
    630     result_exp = exp_off - *exp;
    631     result_frac = deposit64(0, 44, 8, estimate);
    632     if (result_exp == 0) {
    633         result_frac = deposit64(result_frac >> 1, 51, 1, 1);
    634     } else if (result_exp == -1) {
    635         result_frac = deposit64(result_frac >> 2, 50, 2, 1);
    636         result_exp = 0;
    637     }
    638 
    639     *exp = result_exp;
    640 
    641     return result_frac;
    642 }
    643 
    644 static bool round_to_inf(float_status *fpst, bool sign_bit)
    645 {
    646     switch (fpst->float_rounding_mode) {
    647     case float_round_nearest_even: /* Round to Nearest */
    648         return true;
    649     case float_round_up: /* Round to +Inf */
    650         return !sign_bit;
    651     case float_round_down: /* Round to -Inf */
    652         return sign_bit;
    653     case float_round_to_zero: /* Round to Zero */
    654         return false;
    655     default:
    656         g_assert_not_reached();
    657     }
    658 }
    659 
    660 uint32_t HELPER(recpe_f16)(uint32_t input, void *fpstp)
    661 {
    662     float_status *fpst = fpstp;
    663     float16 f16 = float16_squash_input_denormal(input, fpst);
    664     uint32_t f16_val = float16_val(f16);
    665     uint32_t f16_sign = float16_is_neg(f16);
    666     int f16_exp = extract32(f16_val, 10, 5);
    667     uint32_t f16_frac = extract32(f16_val, 0, 10);
    668     uint64_t f64_frac;
    669 
    670     if (float16_is_any_nan(f16)) {
    671         float16 nan = f16;
    672         if (float16_is_signaling_nan(f16, fpst)) {
    673             float_raise(float_flag_invalid, fpst);
    674             if (!fpst->default_nan_mode) {
    675                 nan = float16_silence_nan(f16, fpst);
    676             }
    677         }
    678         if (fpst->default_nan_mode) {
    679             nan =  float16_default_nan(fpst);
    680         }
    681         return nan;
    682     } else if (float16_is_infinity(f16)) {
    683         return float16_set_sign(float16_zero, float16_is_neg(f16));
    684     } else if (float16_is_zero(f16)) {
    685         float_raise(float_flag_divbyzero, fpst);
    686         return float16_set_sign(float16_infinity, float16_is_neg(f16));
    687     } else if (float16_abs(f16) < (1 << 8)) {
    688         /* Abs(value) < 2.0^-16 */
    689         float_raise(float_flag_overflow | float_flag_inexact, fpst);
    690         if (round_to_inf(fpst, f16_sign)) {
    691             return float16_set_sign(float16_infinity, f16_sign);
    692         } else {
    693             return float16_set_sign(float16_maxnorm, f16_sign);
    694         }
    695     } else if (f16_exp >= 29 && fpst->flush_to_zero) {
    696         float_raise(float_flag_underflow, fpst);
    697         return float16_set_sign(float16_zero, float16_is_neg(f16));
    698     }
    699 
    700     f64_frac = call_recip_estimate(&f16_exp, 29,
    701                                    ((uint64_t) f16_frac) << (52 - 10));
    702 
    703     /* result = sign : result_exp<4:0> : fraction<51:42> */
    704     f16_val = deposit32(0, 15, 1, f16_sign);
    705     f16_val = deposit32(f16_val, 10, 5, f16_exp);
    706     f16_val = deposit32(f16_val, 0, 10, extract64(f64_frac, 52 - 10, 10));
    707     return make_float16(f16_val);
    708 }
    709 
    710 float32 HELPER(recpe_f32)(float32 input, void *fpstp)
    711 {
    712     float_status *fpst = fpstp;
    713     float32 f32 = float32_squash_input_denormal(input, fpst);
    714     uint32_t f32_val = float32_val(f32);
    715     bool f32_sign = float32_is_neg(f32);
    716     int f32_exp = extract32(f32_val, 23, 8);
    717     uint32_t f32_frac = extract32(f32_val, 0, 23);
    718     uint64_t f64_frac;
    719 
    720     if (float32_is_any_nan(f32)) {
    721         float32 nan = f32;
    722         if (float32_is_signaling_nan(f32, fpst)) {
    723             float_raise(float_flag_invalid, fpst);
    724             if (!fpst->default_nan_mode) {
    725                 nan = float32_silence_nan(f32, fpst);
    726             }
    727         }
    728         if (fpst->default_nan_mode) {
    729             nan =  float32_default_nan(fpst);
    730         }
    731         return nan;
    732     } else if (float32_is_infinity(f32)) {
    733         return float32_set_sign(float32_zero, float32_is_neg(f32));
    734     } else if (float32_is_zero(f32)) {
    735         float_raise(float_flag_divbyzero, fpst);
    736         return float32_set_sign(float32_infinity, float32_is_neg(f32));
    737     } else if (float32_abs(f32) < (1ULL << 21)) {
    738         /* Abs(value) < 2.0^-128 */
    739         float_raise(float_flag_overflow | float_flag_inexact, fpst);
    740         if (round_to_inf(fpst, f32_sign)) {
    741             return float32_set_sign(float32_infinity, f32_sign);
    742         } else {
    743             return float32_set_sign(float32_maxnorm, f32_sign);
    744         }
    745     } else if (f32_exp >= 253 && fpst->flush_to_zero) {
    746         float_raise(float_flag_underflow, fpst);
    747         return float32_set_sign(float32_zero, float32_is_neg(f32));
    748     }
    749 
    750     f64_frac = call_recip_estimate(&f32_exp, 253,
    751                                    ((uint64_t) f32_frac) << (52 - 23));
    752 
    753     /* result = sign : result_exp<7:0> : fraction<51:29> */
    754     f32_val = deposit32(0, 31, 1, f32_sign);
    755     f32_val = deposit32(f32_val, 23, 8, f32_exp);
    756     f32_val = deposit32(f32_val, 0, 23, extract64(f64_frac, 52 - 23, 23));
    757     return make_float32(f32_val);
    758 }
    759 
    760 float64 HELPER(recpe_f64)(float64 input, void *fpstp)
    761 {
    762     float_status *fpst = fpstp;
    763     float64 f64 = float64_squash_input_denormal(input, fpst);
    764     uint64_t f64_val = float64_val(f64);
    765     bool f64_sign = float64_is_neg(f64);
    766     int f64_exp = extract64(f64_val, 52, 11);
    767     uint64_t f64_frac = extract64(f64_val, 0, 52);
    768 
    769     /* Deal with any special cases */
    770     if (float64_is_any_nan(f64)) {
    771         float64 nan = f64;
    772         if (float64_is_signaling_nan(f64, fpst)) {
    773             float_raise(float_flag_invalid, fpst);
    774             if (!fpst->default_nan_mode) {
    775                 nan = float64_silence_nan(f64, fpst);
    776             }
    777         }
    778         if (fpst->default_nan_mode) {
    779             nan =  float64_default_nan(fpst);
    780         }
    781         return nan;
    782     } else if (float64_is_infinity(f64)) {
    783         return float64_set_sign(float64_zero, float64_is_neg(f64));
    784     } else if (float64_is_zero(f64)) {
    785         float_raise(float_flag_divbyzero, fpst);
    786         return float64_set_sign(float64_infinity, float64_is_neg(f64));
    787     } else if ((f64_val & ~(1ULL << 63)) < (1ULL << 50)) {
    788         /* Abs(value) < 2.0^-1024 */
    789         float_raise(float_flag_overflow | float_flag_inexact, fpst);
    790         if (round_to_inf(fpst, f64_sign)) {
    791             return float64_set_sign(float64_infinity, f64_sign);
    792         } else {
    793             return float64_set_sign(float64_maxnorm, f64_sign);
    794         }
    795     } else if (f64_exp >= 2045 && fpst->flush_to_zero) {
    796         float_raise(float_flag_underflow, fpst);
    797         return float64_set_sign(float64_zero, float64_is_neg(f64));
    798     }
    799 
    800     f64_frac = call_recip_estimate(&f64_exp, 2045, f64_frac);
    801 
    802     /* result = sign : result_exp<10:0> : fraction<51:0>; */
    803     f64_val = deposit64(0, 63, 1, f64_sign);
    804     f64_val = deposit64(f64_val, 52, 11, f64_exp);
    805     f64_val = deposit64(f64_val, 0, 52, f64_frac);
    806     return make_float64(f64_val);
    807 }
    808 
    809 /* The algorithm that must be used to calculate the estimate
    810  * is specified by the ARM ARM.
    811  */
    812 
    813 static int do_recip_sqrt_estimate(int a)
    814 {
    815     int b, estimate;
    816 
    817     assert(128 <= a && a < 512);
    818     if (a < 256) {
    819         a = a * 2 + 1;
    820     } else {
    821         a = (a >> 1) << 1;
    822         a = (a + 1) * 2;
    823     }
    824     b = 512;
    825     while (a * (b + 1) * (b + 1) < (1 << 28)) {
    826         b += 1;
    827     }
    828     estimate = (b + 1) / 2;
    829     assert(256 <= estimate && estimate < 512);
    830 
    831     return estimate;
    832 }
    833 
    834 
    835 static uint64_t recip_sqrt_estimate(int *exp , int exp_off, uint64_t frac)
    836 {
    837     int estimate;
    838     uint32_t scaled;
    839 
    840     if (*exp == 0) {
    841         while (extract64(frac, 51, 1) == 0) {
    842             frac = frac << 1;
    843             *exp -= 1;
    844         }
    845         frac = extract64(frac, 0, 51) << 1;
    846     }
    847 
    848     if (*exp & 1) {
    849         /* scaled = UInt('01':fraction<51:45>) */
    850         scaled = deposit32(1 << 7, 0, 7, extract64(frac, 45, 7));
    851     } else {
    852         /* scaled = UInt('1':fraction<51:44>) */
    853         scaled = deposit32(1 << 8, 0, 8, extract64(frac, 44, 8));
    854     }
    855     estimate = do_recip_sqrt_estimate(scaled);
    856 
    857     *exp = (exp_off - *exp) / 2;
    858     return extract64(estimate, 0, 8) << 44;
    859 }
    860 
    861 uint32_t HELPER(rsqrte_f16)(uint32_t input, void *fpstp)
    862 {
    863     float_status *s = fpstp;
    864     float16 f16 = float16_squash_input_denormal(input, s);
    865     uint16_t val = float16_val(f16);
    866     bool f16_sign = float16_is_neg(f16);
    867     int f16_exp = extract32(val, 10, 5);
    868     uint16_t f16_frac = extract32(val, 0, 10);
    869     uint64_t f64_frac;
    870 
    871     if (float16_is_any_nan(f16)) {
    872         float16 nan = f16;
    873         if (float16_is_signaling_nan(f16, s)) {
    874             float_raise(float_flag_invalid, s);
    875             if (!s->default_nan_mode) {
    876                 nan = float16_silence_nan(f16, fpstp);
    877             }
    878         }
    879         if (s->default_nan_mode) {
    880             nan =  float16_default_nan(s);
    881         }
    882         return nan;
    883     } else if (float16_is_zero(f16)) {
    884         float_raise(float_flag_divbyzero, s);
    885         return float16_set_sign(float16_infinity, f16_sign);
    886     } else if (f16_sign) {
    887         float_raise(float_flag_invalid, s);
    888         return float16_default_nan(s);
    889     } else if (float16_is_infinity(f16)) {
    890         return float16_zero;
    891     }
    892 
    893     /* Scale and normalize to a double-precision value between 0.25 and 1.0,
    894      * preserving the parity of the exponent.  */
    895 
    896     f64_frac = ((uint64_t) f16_frac) << (52 - 10);
    897 
    898     f64_frac = recip_sqrt_estimate(&f16_exp, 44, f64_frac);
    899 
    900     /* result = sign : result_exp<4:0> : estimate<7:0> : Zeros(2) */
    901     val = deposit32(0, 15, 1, f16_sign);
    902     val = deposit32(val, 10, 5, f16_exp);
    903     val = deposit32(val, 2, 8, extract64(f64_frac, 52 - 8, 8));
    904     return make_float16(val);
    905 }
    906 
    907 float32 HELPER(rsqrte_f32)(float32 input, void *fpstp)
    908 {
    909     float_status *s = fpstp;
    910     float32 f32 = float32_squash_input_denormal(input, s);
    911     uint32_t val = float32_val(f32);
    912     uint32_t f32_sign = float32_is_neg(f32);
    913     int f32_exp = extract32(val, 23, 8);
    914     uint32_t f32_frac = extract32(val, 0, 23);
    915     uint64_t f64_frac;
    916 
    917     if (float32_is_any_nan(f32)) {
    918         float32 nan = f32;
    919         if (float32_is_signaling_nan(f32, s)) {
    920             float_raise(float_flag_invalid, s);
    921             if (!s->default_nan_mode) {
    922                 nan = float32_silence_nan(f32, fpstp);
    923             }
    924         }
    925         if (s->default_nan_mode) {
    926             nan =  float32_default_nan(s);
    927         }
    928         return nan;
    929     } else if (float32_is_zero(f32)) {
    930         float_raise(float_flag_divbyzero, s);
    931         return float32_set_sign(float32_infinity, float32_is_neg(f32));
    932     } else if (float32_is_neg(f32)) {
    933         float_raise(float_flag_invalid, s);
    934         return float32_default_nan(s);
    935     } else if (float32_is_infinity(f32)) {
    936         return float32_zero;
    937     }
    938 
    939     /* Scale and normalize to a double-precision value between 0.25 and 1.0,
    940      * preserving the parity of the exponent.  */
    941 
    942     f64_frac = ((uint64_t) f32_frac) << 29;
    943 
    944     f64_frac = recip_sqrt_estimate(&f32_exp, 380, f64_frac);
    945 
    946     /* result = sign : result_exp<4:0> : estimate<7:0> : Zeros(15) */
    947     val = deposit32(0, 31, 1, f32_sign);
    948     val = deposit32(val, 23, 8, f32_exp);
    949     val = deposit32(val, 15, 8, extract64(f64_frac, 52 - 8, 8));
    950     return make_float32(val);
    951 }
    952 
    953 float64 HELPER(rsqrte_f64)(float64 input, void *fpstp)
    954 {
    955     float_status *s = fpstp;
    956     float64 f64 = float64_squash_input_denormal(input, s);
    957     uint64_t val = float64_val(f64);
    958     bool f64_sign = float64_is_neg(f64);
    959     int f64_exp = extract64(val, 52, 11);
    960     uint64_t f64_frac = extract64(val, 0, 52);
    961 
    962     if (float64_is_any_nan(f64)) {
    963         float64 nan = f64;
    964         if (float64_is_signaling_nan(f64, s)) {
    965             float_raise(float_flag_invalid, s);
    966             if (!s->default_nan_mode) {
    967                 nan = float64_silence_nan(f64, fpstp);
    968             }
    969         }
    970         if (s->default_nan_mode) {
    971             nan =  float64_default_nan(s);
    972         }
    973         return nan;
    974     } else if (float64_is_zero(f64)) {
    975         float_raise(float_flag_divbyzero, s);
    976         return float64_set_sign(float64_infinity, float64_is_neg(f64));
    977     } else if (float64_is_neg(f64)) {
    978         float_raise(float_flag_invalid, s);
    979         return float64_default_nan(s);
    980     } else if (float64_is_infinity(f64)) {
    981         return float64_zero;
    982     }
    983 
    984     f64_frac = recip_sqrt_estimate(&f64_exp, 3068, f64_frac);
    985 
    986     /* result = sign : result_exp<4:0> : estimate<7:0> : Zeros(44) */
    987     val = deposit64(0, 61, 1, f64_sign);
    988     val = deposit64(val, 52, 11, f64_exp);
    989     val = deposit64(val, 44, 8, extract64(f64_frac, 52 - 8, 8));
    990     return make_float64(val);
    991 }
    992 
    993 uint32_t HELPER(recpe_u32)(uint32_t a)
    994 {
    995     int input, estimate;
    996 
    997     if ((a & 0x80000000) == 0) {
    998         return 0xffffffff;
    999     }
   1000 
   1001     input = extract32(a, 23, 9);
   1002     estimate = recip_estimate(input);
   1003 
   1004     return deposit32(0, (32 - 9), 9, estimate);
   1005 }
   1006 
   1007 uint32_t HELPER(rsqrte_u32)(uint32_t a)
   1008 {
   1009     int estimate;
   1010 
   1011     if ((a & 0xc0000000) == 0) {
   1012         return 0xffffffff;
   1013     }
   1014 
   1015     estimate = do_recip_sqrt_estimate(extract32(a, 23, 9));
   1016 
   1017     return deposit32(0, 23, 9, estimate);
   1018 }
   1019 
   1020 /* VFPv4 fused multiply-accumulate */
   1021 dh_ctype_f16 VFP_HELPER(muladd, h)(dh_ctype_f16 a, dh_ctype_f16 b,
   1022                                    dh_ctype_f16 c, void *fpstp)
   1023 {
   1024     float_status *fpst = fpstp;
   1025     return float16_muladd(a, b, c, 0, fpst);
   1026 }
   1027 
   1028 float32 VFP_HELPER(muladd, s)(float32 a, float32 b, float32 c, void *fpstp)
   1029 {
   1030     float_status *fpst = fpstp;
   1031     return float32_muladd(a, b, c, 0, fpst);
   1032 }
   1033 
   1034 float64 VFP_HELPER(muladd, d)(float64 a, float64 b, float64 c, void *fpstp)
   1035 {
   1036     float_status *fpst = fpstp;
   1037     return float64_muladd(a, b, c, 0, fpst);
   1038 }
   1039 
   1040 /* ARMv8 round to integral */
   1041 dh_ctype_f16 HELPER(rinth_exact)(dh_ctype_f16 x, void *fp_status)
   1042 {
   1043     return float16_round_to_int(x, fp_status);
   1044 }
   1045 
   1046 float32 HELPER(rints_exact)(float32 x, void *fp_status)
   1047 {
   1048     return float32_round_to_int(x, fp_status);
   1049 }
   1050 
   1051 float64 HELPER(rintd_exact)(float64 x, void *fp_status)
   1052 {
   1053     return float64_round_to_int(x, fp_status);
   1054 }
   1055 
   1056 dh_ctype_f16 HELPER(rinth)(dh_ctype_f16 x, void *fp_status)
   1057 {
   1058     int old_flags = get_float_exception_flags(fp_status), new_flags;
   1059     float16 ret;
   1060 
   1061     ret = float16_round_to_int(x, fp_status);
   1062 
   1063     /* Suppress any inexact exceptions the conversion produced */
   1064     if (!(old_flags & float_flag_inexact)) {
   1065         new_flags = get_float_exception_flags(fp_status);
   1066         set_float_exception_flags(new_flags & ~float_flag_inexact, fp_status);
   1067     }
   1068 
   1069     return ret;
   1070 }
   1071 
   1072 float32 HELPER(rints)(float32 x, void *fp_status)
   1073 {
   1074     int old_flags = get_float_exception_flags(fp_status), new_flags;
   1075     float32 ret;
   1076 
   1077     ret = float32_round_to_int(x, fp_status);
   1078 
   1079     /* Suppress any inexact exceptions the conversion produced */
   1080     if (!(old_flags & float_flag_inexact)) {
   1081         new_flags = get_float_exception_flags(fp_status);
   1082         set_float_exception_flags(new_flags & ~float_flag_inexact, fp_status);
   1083     }
   1084 
   1085     return ret;
   1086 }
   1087 
   1088 float64 HELPER(rintd)(float64 x, void *fp_status)
   1089 {
   1090     int old_flags = get_float_exception_flags(fp_status), new_flags;
   1091     float64 ret;
   1092 
   1093     ret = float64_round_to_int(x, fp_status);
   1094 
   1095     new_flags = get_float_exception_flags(fp_status);
   1096 
   1097     /* Suppress any inexact exceptions the conversion produced */
   1098     if (!(old_flags & float_flag_inexact)) {
   1099         new_flags = get_float_exception_flags(fp_status);
   1100         set_float_exception_flags(new_flags & ~float_flag_inexact, fp_status);
   1101     }
   1102 
   1103     return ret;
   1104 }
   1105 
   1106 /* Convert ARM rounding mode to softfloat */
   1107 int arm_rmode_to_sf(int rmode)
   1108 {
   1109     switch (rmode) {
   1110     case FPROUNDING_TIEAWAY:
   1111         rmode = float_round_ties_away;
   1112         break;
   1113     case FPROUNDING_ODD:
   1114         /* FIXME: add support for TIEAWAY and ODD */
   1115         qemu_log_mask(LOG_UNIMP, "arm: unimplemented rounding mode: %d\n",
   1116                       rmode);
   1117         /* fall through for now */
   1118     case FPROUNDING_TIEEVEN:
   1119     default:
   1120         rmode = float_round_nearest_even;
   1121         break;
   1122     case FPROUNDING_POSINF:
   1123         rmode = float_round_up;
   1124         break;
   1125     case FPROUNDING_NEGINF:
   1126         rmode = float_round_down;
   1127         break;
   1128     case FPROUNDING_ZERO:
   1129         rmode = float_round_to_zero;
   1130         break;
   1131     }
   1132     return rmode;
   1133 }
   1134 
   1135 /*
   1136  * Implement float64 to int32_t conversion without saturation;
   1137  * the result is supplied modulo 2^32.
   1138  */
   1139 uint64_t HELPER(fjcvtzs)(float64 value, void *vstatus)
   1140 {
   1141     float_status *status = vstatus;
   1142     uint32_t exp, sign;
   1143     uint64_t frac;
   1144     uint32_t inexact = 1; /* !Z */
   1145 
   1146     sign = extract64(value, 63, 1);
   1147     exp = extract64(value, 52, 11);
   1148     frac = extract64(value, 0, 52);
   1149 
   1150     if (exp == 0) {
   1151         /* While not inexact for IEEE FP, -0.0 is inexact for JavaScript.  */
   1152         inexact = sign;
   1153         if (frac != 0) {
   1154             if (status->flush_inputs_to_zero) {
   1155                 float_raise(float_flag_input_denormal, status);
   1156             } else {
   1157                 float_raise(float_flag_inexact, status);
   1158                 inexact = 1;
   1159             }
   1160         }
   1161         frac = 0;
   1162     } else if (exp == 0x7ff) {
   1163         /* This operation raises Invalid for both NaN and overflow (Inf).  */
   1164         float_raise(float_flag_invalid, status);
   1165         frac = 0;
   1166     } else {
   1167         int true_exp = exp - 1023;
   1168         int shift = true_exp - 52;
   1169 
   1170         /* Restore implicit bit.  */
   1171         frac |= 1ull << 52;
   1172 
   1173         /* Shift the fraction into place.  */
   1174         if (shift >= 0) {
   1175             /* The number is so large we must shift the fraction left.  */
   1176             if (shift >= 64) {
   1177                 /* The fraction is shifted out entirely.  */
   1178                 frac = 0;
   1179             } else {
   1180                 frac <<= shift;
   1181             }
   1182         } else if (shift > -64) {
   1183             /* Normal case -- shift right and notice if bits shift out.  */
   1184             inexact = (frac << (64 + shift)) != 0;
   1185             frac >>= -shift;
   1186         } else {
   1187             /* The fraction is shifted out entirely.  */
   1188             frac = 0;
   1189         }
   1190 
   1191         /* Notice overflow or inexact exceptions.  */
   1192         if (true_exp > 31 || frac > (sign ? 0x80000000ull : 0x7fffffff)) {
   1193             /* Overflow, for which this operation raises invalid.  */
   1194             float_raise(float_flag_invalid, status);
   1195             inexact = 1;
   1196         } else if (inexact) {
   1197             float_raise(float_flag_inexact, status);
   1198         }
   1199 
   1200         /* Honor the sign.  */
   1201         if (sign) {
   1202             frac = -frac;
   1203         }
   1204     }
   1205 
   1206     /* Pack the result and the env->ZF representation of Z together.  */
   1207     return deposit64(frac, 32, 32, inexact);
   1208 }
   1209 
   1210 uint32_t HELPER(vjcvt)(float64 value, CPUARMState *env)
   1211 {
   1212     uint64_t pair = HELPER(fjcvtzs)(value, &env->vfp.fp_status);
   1213     uint32_t result = pair;
   1214     uint32_t z = (pair >> 32) == 0;
   1215 
   1216     /* Store Z, clear NCV, in FPSCR.NZCV.  */
   1217     env->vfp.xregs[ARM_VFP_FPSCR]
   1218         = (env->vfp.xregs[ARM_VFP_FPSCR] & ~CPSR_NZCV) | (z * CPSR_Z);
   1219 
   1220     return result;
   1221 }
   1222 
   1223 /* Round a float32 to an integer that fits in int32_t or int64_t.  */
   1224 static float32 frint_s(float32 f, float_status *fpst, int intsize)
   1225 {
   1226     int old_flags = get_float_exception_flags(fpst);
   1227     uint32_t exp = extract32(f, 23, 8);
   1228 
   1229     if (unlikely(exp == 0xff)) {
   1230         /* NaN or Inf.  */
   1231         goto overflow;
   1232     }
   1233 
   1234     /* Round and re-extract the exponent.  */
   1235     f = float32_round_to_int(f, fpst);
   1236     exp = extract32(f, 23, 8);
   1237 
   1238     /* Validate the range of the result.  */
   1239     if (exp < 126 + intsize) {
   1240         /* abs(F) <= INT{N}_MAX */
   1241         return f;
   1242     }
   1243     if (exp == 126 + intsize) {
   1244         uint32_t sign = extract32(f, 31, 1);
   1245         uint32_t frac = extract32(f, 0, 23);
   1246         if (sign && frac == 0) {
   1247             /* F == INT{N}_MIN */
   1248             return f;
   1249         }
   1250     }
   1251 
   1252  overflow:
   1253     /*
   1254      * Raise Invalid and return INT{N}_MIN as a float.  Revert any
   1255      * inexact exception float32_round_to_int may have raised.
   1256      */
   1257     set_float_exception_flags(old_flags | float_flag_invalid, fpst);
   1258     return (0x100u + 126u + intsize) << 23;
   1259 }
   1260 
   1261 float32 HELPER(frint32_s)(float32 f, void *fpst)
   1262 {
   1263     return frint_s(f, fpst, 32);
   1264 }
   1265 
   1266 float32 HELPER(frint64_s)(float32 f, void *fpst)
   1267 {
   1268     return frint_s(f, fpst, 64);
   1269 }
   1270 
   1271 /* Round a float64 to an integer that fits in int32_t or int64_t.  */
   1272 static float64 frint_d(float64 f, float_status *fpst, int intsize)
   1273 {
   1274     int old_flags = get_float_exception_flags(fpst);
   1275     uint32_t exp = extract64(f, 52, 11);
   1276 
   1277     if (unlikely(exp == 0x7ff)) {
   1278         /* NaN or Inf.  */
   1279         goto overflow;
   1280     }
   1281 
   1282     /* Round and re-extract the exponent.  */
   1283     f = float64_round_to_int(f, fpst);
   1284     exp = extract64(f, 52, 11);
   1285 
   1286     /* Validate the range of the result.  */
   1287     if (exp < 1022 + intsize) {
   1288         /* abs(F) <= INT{N}_MAX */
   1289         return f;
   1290     }
   1291     if (exp == 1022 + intsize) {
   1292         uint64_t sign = extract64(f, 63, 1);
   1293         uint64_t frac = extract64(f, 0, 52);
   1294         if (sign && frac == 0) {
   1295             /* F == INT{N}_MIN */
   1296             return f;
   1297         }
   1298     }
   1299 
   1300  overflow:
   1301     /*
   1302      * Raise Invalid and return INT{N}_MIN as a float.  Revert any
   1303      * inexact exception float64_round_to_int may have raised.
   1304      */
   1305     set_float_exception_flags(old_flags | float_flag_invalid, fpst);
   1306     return (uint64_t)(0x800 + 1022 + intsize) << 52;
   1307 }
   1308 
   1309 float64 HELPER(frint32_d)(float64 f, void *fpst)
   1310 {
   1311     return frint_d(f, fpst, 32);
   1312 }
   1313 
   1314 float64 HELPER(frint64_d)(float64 f, void *fpst)
   1315 {
   1316     return frint_d(f, fpst, 64);
   1317 }
   1318 
   1319 void HELPER(check_hcr_el2_trap)(CPUARMState *env, uint32_t rt, uint32_t reg)
   1320 {
   1321     uint32_t syndrome;
   1322 
   1323     switch (reg) {
   1324     case ARM_VFP_MVFR0:
   1325     case ARM_VFP_MVFR1:
   1326     case ARM_VFP_MVFR2:
   1327         if (!(arm_hcr_el2_eff(env) & HCR_TID3)) {
   1328             return;
   1329         }
   1330         break;
   1331     case ARM_VFP_FPSID:
   1332         if (!(arm_hcr_el2_eff(env) & HCR_TID0)) {
   1333             return;
   1334         }
   1335         break;
   1336     default:
   1337         g_assert_not_reached();
   1338     }
   1339 
   1340     syndrome = ((EC_FPIDTRAP << ARM_EL_EC_SHIFT)
   1341                 | ARM_EL_IL
   1342                 | (1 << 24) | (0xe << 20) | (7 << 14)
   1343                 | (reg << 10) | (rt << 5) | 1);
   1344 
   1345     raise_exception(env, EXCP_HYP_TRAP, syndrome, 2);
   1346 }
   1347 
   1348 #endif