qemu

FORK: QEMU emulator
git clone https://git.neptards.moe/neptards/qemu.git
Log | Files | Refs | Submodules | LICENSE

fpu_helper.c (108449B)


      1 /*
      2  *  x86 FPU, MMX/3DNow!/SSE/SSE2/SSE3/SSSE3/SSE4/PNI helpers
      3  *
      4  *  Copyright (c) 2003 Fabrice Bellard
      5  *
      6  * This library is free software; you can redistribute it and/or
      7  * modify it under the terms of the GNU Lesser General Public
      8  * License as published by the Free Software Foundation; either
      9  * version 2.1 of the License, or (at your option) any later version.
     10  *
     11  * This library is distributed in the hope that it will be useful,
     12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
     13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
     14  * Lesser General Public License for more details.
     15  *
     16  * You should have received a copy of the GNU Lesser General Public
     17  * License along with this library; if not, see <http://www.gnu.org/licenses/>.
     18  */
     19 
     20 #include "qemu/osdep.h"
     21 #include <math.h>
     22 #include "cpu.h"
     23 #include "tcg-cpu.h"
     24 #include "exec/helper-proto.h"
     25 #include "fpu/softfloat.h"
     26 #include "fpu/softfloat-macros.h"
     27 #include "helper-tcg.h"
     28 
     29 /* float macros */
     30 #define FT0    (env->ft0)
     31 #define ST0    (env->fpregs[env->fpstt].d)
     32 #define ST(n)  (env->fpregs[(env->fpstt + (n)) & 7].d)
     33 #define ST1    ST(1)
     34 
     35 #define FPU_RC_SHIFT        10
     36 #define FPU_RC_MASK         (3 << FPU_RC_SHIFT)
     37 #define FPU_RC_NEAR         0x000
     38 #define FPU_RC_DOWN         0x400
     39 #define FPU_RC_UP           0x800
     40 #define FPU_RC_CHOP         0xc00
     41 
     42 #define MAXTAN 9223372036854775808.0
     43 
     44 /* the following deal with x86 long double-precision numbers */
     45 #define MAXEXPD 0x7fff
     46 #define EXPBIAS 16383
     47 #define EXPD(fp)        (fp.l.upper & 0x7fff)
     48 #define SIGND(fp)       ((fp.l.upper) & 0x8000)
     49 #define MANTD(fp)       (fp.l.lower)
     50 #define BIASEXPONENT(fp) fp.l.upper = (fp.l.upper & ~(0x7fff)) | EXPBIAS
     51 
     52 #define FPUS_IE (1 << 0)
     53 #define FPUS_DE (1 << 1)
     54 #define FPUS_ZE (1 << 2)
     55 #define FPUS_OE (1 << 3)
     56 #define FPUS_UE (1 << 4)
     57 #define FPUS_PE (1 << 5)
     58 #define FPUS_SF (1 << 6)
     59 #define FPUS_SE (1 << 7)
     60 #define FPUS_B  (1 << 15)
     61 
     62 #define FPUC_EM 0x3f
     63 
     64 #define floatx80_lg2 make_floatx80(0x3ffd, 0x9a209a84fbcff799LL)
     65 #define floatx80_lg2_d make_floatx80(0x3ffd, 0x9a209a84fbcff798LL)
     66 #define floatx80_l2e make_floatx80(0x3fff, 0xb8aa3b295c17f0bcLL)
     67 #define floatx80_l2e_d make_floatx80(0x3fff, 0xb8aa3b295c17f0bbLL)
     68 #define floatx80_l2t make_floatx80(0x4000, 0xd49a784bcd1b8afeLL)
     69 #define floatx80_l2t_u make_floatx80(0x4000, 0xd49a784bcd1b8affLL)
     70 #define floatx80_ln2_d make_floatx80(0x3ffe, 0xb17217f7d1cf79abLL)
     71 #define floatx80_pi_d make_floatx80(0x4000, 0xc90fdaa22168c234LL)
     72 
     73 static inline void fpush(CPUX86State *env)
     74 {
     75     env->fpstt = (env->fpstt - 1) & 7;
     76     env->fptags[env->fpstt] = 0; /* validate stack entry */
     77 }
     78 
     79 static inline void fpop(CPUX86State *env)
     80 {
     81     env->fptags[env->fpstt] = 1; /* invalidate stack entry */
     82     env->fpstt = (env->fpstt + 1) & 7;
     83 }
     84 
     85 static floatx80 do_fldt(CPUX86State *env, target_ulong ptr, uintptr_t retaddr)
     86 {
     87     CPU_LDoubleU temp;
     88 
     89     temp.l.lower = cpu_ldq_data_ra(env, ptr, retaddr);
     90     temp.l.upper = cpu_lduw_data_ra(env, ptr + 8, retaddr);
     91     return temp.d;
     92 }
     93 
     94 static void do_fstt(CPUX86State *env, floatx80 f, target_ulong ptr,
     95                     uintptr_t retaddr)
     96 {
     97     CPU_LDoubleU temp;
     98 
     99     temp.d = f;
    100     cpu_stq_data_ra(env, ptr, temp.l.lower, retaddr);
    101     cpu_stw_data_ra(env, ptr + 8, temp.l.upper, retaddr);
    102 }
    103 
    104 /* x87 FPU helpers */
    105 
    106 static inline double floatx80_to_double(CPUX86State *env, floatx80 a)
    107 {
    108     union {
    109         float64 f64;
    110         double d;
    111     } u;
    112 
    113     u.f64 = floatx80_to_float64(a, &env->fp_status);
    114     return u.d;
    115 }
    116 
    117 static inline floatx80 double_to_floatx80(CPUX86State *env, double a)
    118 {
    119     union {
    120         float64 f64;
    121         double d;
    122     } u;
    123 
    124     u.d = a;
    125     return float64_to_floatx80(u.f64, &env->fp_status);
    126 }
    127 
    128 static void fpu_set_exception(CPUX86State *env, int mask)
    129 {
    130     env->fpus |= mask;
    131     if (env->fpus & (~env->fpuc & FPUC_EM)) {
    132         env->fpus |= FPUS_SE | FPUS_B;
    133     }
    134 }
    135 
    136 static inline uint8_t save_exception_flags(CPUX86State *env)
    137 {
    138     uint8_t old_flags = get_float_exception_flags(&env->fp_status);
    139     set_float_exception_flags(0, &env->fp_status);
    140     return old_flags;
    141 }
    142 
    143 static void merge_exception_flags(CPUX86State *env, uint8_t old_flags)
    144 {
    145     uint8_t new_flags = get_float_exception_flags(&env->fp_status);
    146     float_raise(old_flags, &env->fp_status);
    147     fpu_set_exception(env,
    148                       ((new_flags & float_flag_invalid ? FPUS_IE : 0) |
    149                        (new_flags & float_flag_divbyzero ? FPUS_ZE : 0) |
    150                        (new_flags & float_flag_overflow ? FPUS_OE : 0) |
    151                        (new_flags & float_flag_underflow ? FPUS_UE : 0) |
    152                        (new_flags & float_flag_inexact ? FPUS_PE : 0) |
    153                        (new_flags & float_flag_input_denormal ? FPUS_DE : 0)));
    154 }
    155 
    156 static inline floatx80 helper_fdiv(CPUX86State *env, floatx80 a, floatx80 b)
    157 {
    158     uint8_t old_flags = save_exception_flags(env);
    159     floatx80 ret = floatx80_div(a, b, &env->fp_status);
    160     merge_exception_flags(env, old_flags);
    161     return ret;
    162 }
    163 
    164 static void fpu_raise_exception(CPUX86State *env, uintptr_t retaddr)
    165 {
    166     if (env->cr[0] & CR0_NE_MASK) {
    167         raise_exception_ra(env, EXCP10_COPR, retaddr);
    168     }
    169 #if !defined(CONFIG_USER_ONLY)
    170     else {
    171         fpu_check_raise_ferr_irq(env);
    172     }
    173 #endif
    174 }
    175 
    176 void helper_flds_FT0(CPUX86State *env, uint32_t val)
    177 {
    178     uint8_t old_flags = save_exception_flags(env);
    179     union {
    180         float32 f;
    181         uint32_t i;
    182     } u;
    183 
    184     u.i = val;
    185     FT0 = float32_to_floatx80(u.f, &env->fp_status);
    186     merge_exception_flags(env, old_flags);
    187 }
    188 
    189 void helper_fldl_FT0(CPUX86State *env, uint64_t val)
    190 {
    191     uint8_t old_flags = save_exception_flags(env);
    192     union {
    193         float64 f;
    194         uint64_t i;
    195     } u;
    196 
    197     u.i = val;
    198     FT0 = float64_to_floatx80(u.f, &env->fp_status);
    199     merge_exception_flags(env, old_flags);
    200 }
    201 
    202 void helper_fildl_FT0(CPUX86State *env, int32_t val)
    203 {
    204     FT0 = int32_to_floatx80(val, &env->fp_status);
    205 }
    206 
    207 void helper_flds_ST0(CPUX86State *env, uint32_t val)
    208 {
    209     uint8_t old_flags = save_exception_flags(env);
    210     int new_fpstt;
    211     union {
    212         float32 f;
    213         uint32_t i;
    214     } u;
    215 
    216     new_fpstt = (env->fpstt - 1) & 7;
    217     u.i = val;
    218     env->fpregs[new_fpstt].d = float32_to_floatx80(u.f, &env->fp_status);
    219     env->fpstt = new_fpstt;
    220     env->fptags[new_fpstt] = 0; /* validate stack entry */
    221     merge_exception_flags(env, old_flags);
    222 }
    223 
    224 void helper_fldl_ST0(CPUX86State *env, uint64_t val)
    225 {
    226     uint8_t old_flags = save_exception_flags(env);
    227     int new_fpstt;
    228     union {
    229         float64 f;
    230         uint64_t i;
    231     } u;
    232 
    233     new_fpstt = (env->fpstt - 1) & 7;
    234     u.i = val;
    235     env->fpregs[new_fpstt].d = float64_to_floatx80(u.f, &env->fp_status);
    236     env->fpstt = new_fpstt;
    237     env->fptags[new_fpstt] = 0; /* validate stack entry */
    238     merge_exception_flags(env, old_flags);
    239 }
    240 
    241 static FloatX80RoundPrec tmp_maximise_precision(float_status *st)
    242 {
    243     FloatX80RoundPrec old = get_floatx80_rounding_precision(st);
    244     set_floatx80_rounding_precision(floatx80_precision_x, st);
    245     return old;
    246 }
    247 
    248 void helper_fildl_ST0(CPUX86State *env, int32_t val)
    249 {
    250     int new_fpstt;
    251     FloatX80RoundPrec old = tmp_maximise_precision(&env->fp_status);
    252 
    253     new_fpstt = (env->fpstt - 1) & 7;
    254     env->fpregs[new_fpstt].d = int32_to_floatx80(val, &env->fp_status);
    255     env->fpstt = new_fpstt;
    256     env->fptags[new_fpstt] = 0; /* validate stack entry */
    257 
    258     set_floatx80_rounding_precision(old, &env->fp_status);
    259 }
    260 
    261 void helper_fildll_ST0(CPUX86State *env, int64_t val)
    262 {
    263     int new_fpstt;
    264     FloatX80RoundPrec old = tmp_maximise_precision(&env->fp_status);
    265 
    266     new_fpstt = (env->fpstt - 1) & 7;
    267     env->fpregs[new_fpstt].d = int64_to_floatx80(val, &env->fp_status);
    268     env->fpstt = new_fpstt;
    269     env->fptags[new_fpstt] = 0; /* validate stack entry */
    270 
    271     set_floatx80_rounding_precision(old, &env->fp_status);
    272 }
    273 
    274 uint32_t helper_fsts_ST0(CPUX86State *env)
    275 {
    276     uint8_t old_flags = save_exception_flags(env);
    277     union {
    278         float32 f;
    279         uint32_t i;
    280     } u;
    281 
    282     u.f = floatx80_to_float32(ST0, &env->fp_status);
    283     merge_exception_flags(env, old_flags);
    284     return u.i;
    285 }
    286 
    287 uint64_t helper_fstl_ST0(CPUX86State *env)
    288 {
    289     uint8_t old_flags = save_exception_flags(env);
    290     union {
    291         float64 f;
    292         uint64_t i;
    293     } u;
    294 
    295     u.f = floatx80_to_float64(ST0, &env->fp_status);
    296     merge_exception_flags(env, old_flags);
    297     return u.i;
    298 }
    299 
    300 int32_t helper_fist_ST0(CPUX86State *env)
    301 {
    302     uint8_t old_flags = save_exception_flags(env);
    303     int32_t val;
    304 
    305     val = floatx80_to_int32(ST0, &env->fp_status);
    306     if (val != (int16_t)val) {
    307         set_float_exception_flags(float_flag_invalid, &env->fp_status);
    308         val = -32768;
    309     }
    310     merge_exception_flags(env, old_flags);
    311     return val;
    312 }
    313 
    314 int32_t helper_fistl_ST0(CPUX86State *env)
    315 {
    316     uint8_t old_flags = save_exception_flags(env);
    317     int32_t val;
    318 
    319     val = floatx80_to_int32(ST0, &env->fp_status);
    320     if (get_float_exception_flags(&env->fp_status) & float_flag_invalid) {
    321         val = 0x80000000;
    322     }
    323     merge_exception_flags(env, old_flags);
    324     return val;
    325 }
    326 
    327 int64_t helper_fistll_ST0(CPUX86State *env)
    328 {
    329     uint8_t old_flags = save_exception_flags(env);
    330     int64_t val;
    331 
    332     val = floatx80_to_int64(ST0, &env->fp_status);
    333     if (get_float_exception_flags(&env->fp_status) & float_flag_invalid) {
    334         val = 0x8000000000000000ULL;
    335     }
    336     merge_exception_flags(env, old_flags);
    337     return val;
    338 }
    339 
    340 int32_t helper_fistt_ST0(CPUX86State *env)
    341 {
    342     uint8_t old_flags = save_exception_flags(env);
    343     int32_t val;
    344 
    345     val = floatx80_to_int32_round_to_zero(ST0, &env->fp_status);
    346     if (val != (int16_t)val) {
    347         set_float_exception_flags(float_flag_invalid, &env->fp_status);
    348         val = -32768;
    349     }
    350     merge_exception_flags(env, old_flags);
    351     return val;
    352 }
    353 
    354 int32_t helper_fisttl_ST0(CPUX86State *env)
    355 {
    356     uint8_t old_flags = save_exception_flags(env);
    357     int32_t val;
    358 
    359     val = floatx80_to_int32_round_to_zero(ST0, &env->fp_status);
    360     if (get_float_exception_flags(&env->fp_status) & float_flag_invalid) {
    361         val = 0x80000000;
    362     }
    363     merge_exception_flags(env, old_flags);
    364     return val;
    365 }
    366 
    367 int64_t helper_fisttll_ST0(CPUX86State *env)
    368 {
    369     uint8_t old_flags = save_exception_flags(env);
    370     int64_t val;
    371 
    372     val = floatx80_to_int64_round_to_zero(ST0, &env->fp_status);
    373     if (get_float_exception_flags(&env->fp_status) & float_flag_invalid) {
    374         val = 0x8000000000000000ULL;
    375     }
    376     merge_exception_flags(env, old_flags);
    377     return val;
    378 }
    379 
    380 void helper_fldt_ST0(CPUX86State *env, target_ulong ptr)
    381 {
    382     int new_fpstt;
    383 
    384     new_fpstt = (env->fpstt - 1) & 7;
    385     env->fpregs[new_fpstt].d = do_fldt(env, ptr, GETPC());
    386     env->fpstt = new_fpstt;
    387     env->fptags[new_fpstt] = 0; /* validate stack entry */
    388 }
    389 
    390 void helper_fstt_ST0(CPUX86State *env, target_ulong ptr)
    391 {
    392     do_fstt(env, ST0, ptr, GETPC());
    393 }
    394 
    395 void helper_fpush(CPUX86State *env)
    396 {
    397     fpush(env);
    398 }
    399 
    400 void helper_fpop(CPUX86State *env)
    401 {
    402     fpop(env);
    403 }
    404 
    405 void helper_fdecstp(CPUX86State *env)
    406 {
    407     env->fpstt = (env->fpstt - 1) & 7;
    408     env->fpus &= ~0x4700;
    409 }
    410 
    411 void helper_fincstp(CPUX86State *env)
    412 {
    413     env->fpstt = (env->fpstt + 1) & 7;
    414     env->fpus &= ~0x4700;
    415 }
    416 
    417 /* FPU move */
    418 
    419 void helper_ffree_STN(CPUX86State *env, int st_index)
    420 {
    421     env->fptags[(env->fpstt + st_index) & 7] = 1;
    422 }
    423 
    424 void helper_fmov_ST0_FT0(CPUX86State *env)
    425 {
    426     ST0 = FT0;
    427 }
    428 
    429 void helper_fmov_FT0_STN(CPUX86State *env, int st_index)
    430 {
    431     FT0 = ST(st_index);
    432 }
    433 
    434 void helper_fmov_ST0_STN(CPUX86State *env, int st_index)
    435 {
    436     ST0 = ST(st_index);
    437 }
    438 
    439 void helper_fmov_STN_ST0(CPUX86State *env, int st_index)
    440 {
    441     ST(st_index) = ST0;
    442 }
    443 
    444 void helper_fxchg_ST0_STN(CPUX86State *env, int st_index)
    445 {
    446     floatx80 tmp;
    447 
    448     tmp = ST(st_index);
    449     ST(st_index) = ST0;
    450     ST0 = tmp;
    451 }
    452 
    453 /* FPU operations */
    454 
    455 static const int fcom_ccval[4] = {0x0100, 0x4000, 0x0000, 0x4500};
    456 
    457 void helper_fcom_ST0_FT0(CPUX86State *env)
    458 {
    459     uint8_t old_flags = save_exception_flags(env);
    460     FloatRelation ret;
    461 
    462     ret = floatx80_compare(ST0, FT0, &env->fp_status);
    463     env->fpus = (env->fpus & ~0x4500) | fcom_ccval[ret + 1];
    464     merge_exception_flags(env, old_flags);
    465 }
    466 
    467 void helper_fucom_ST0_FT0(CPUX86State *env)
    468 {
    469     uint8_t old_flags = save_exception_flags(env);
    470     FloatRelation ret;
    471 
    472     ret = floatx80_compare_quiet(ST0, FT0, &env->fp_status);
    473     env->fpus = (env->fpus & ~0x4500) | fcom_ccval[ret + 1];
    474     merge_exception_flags(env, old_flags);
    475 }
    476 
    477 static const int fcomi_ccval[4] = {CC_C, CC_Z, 0, CC_Z | CC_P | CC_C};
    478 
    479 void helper_fcomi_ST0_FT0(CPUX86State *env)
    480 {
    481     uint8_t old_flags = save_exception_flags(env);
    482     int eflags;
    483     FloatRelation ret;
    484 
    485     ret = floatx80_compare(ST0, FT0, &env->fp_status);
    486     eflags = cpu_cc_compute_all(env, CC_OP);
    487     eflags = (eflags & ~(CC_Z | CC_P | CC_C)) | fcomi_ccval[ret + 1];
    488     CC_SRC = eflags;
    489     merge_exception_flags(env, old_flags);
    490 }
    491 
    492 void helper_fucomi_ST0_FT0(CPUX86State *env)
    493 {
    494     uint8_t old_flags = save_exception_flags(env);
    495     int eflags;
    496     FloatRelation ret;
    497 
    498     ret = floatx80_compare_quiet(ST0, FT0, &env->fp_status);
    499     eflags = cpu_cc_compute_all(env, CC_OP);
    500     eflags = (eflags & ~(CC_Z | CC_P | CC_C)) | fcomi_ccval[ret + 1];
    501     CC_SRC = eflags;
    502     merge_exception_flags(env, old_flags);
    503 }
    504 
    505 void helper_fadd_ST0_FT0(CPUX86State *env)
    506 {
    507     uint8_t old_flags = save_exception_flags(env);
    508     ST0 = floatx80_add(ST0, FT0, &env->fp_status);
    509     merge_exception_flags(env, old_flags);
    510 }
    511 
    512 void helper_fmul_ST0_FT0(CPUX86State *env)
    513 {
    514     uint8_t old_flags = save_exception_flags(env);
    515     ST0 = floatx80_mul(ST0, FT0, &env->fp_status);
    516     merge_exception_flags(env, old_flags);
    517 }
    518 
    519 void helper_fsub_ST0_FT0(CPUX86State *env)
    520 {
    521     uint8_t old_flags = save_exception_flags(env);
    522     ST0 = floatx80_sub(ST0, FT0, &env->fp_status);
    523     merge_exception_flags(env, old_flags);
    524 }
    525 
    526 void helper_fsubr_ST0_FT0(CPUX86State *env)
    527 {
    528     uint8_t old_flags = save_exception_flags(env);
    529     ST0 = floatx80_sub(FT0, ST0, &env->fp_status);
    530     merge_exception_flags(env, old_flags);
    531 }
    532 
    533 void helper_fdiv_ST0_FT0(CPUX86State *env)
    534 {
    535     ST0 = helper_fdiv(env, ST0, FT0);
    536 }
    537 
    538 void helper_fdivr_ST0_FT0(CPUX86State *env)
    539 {
    540     ST0 = helper_fdiv(env, FT0, ST0);
    541 }
    542 
    543 /* fp operations between STN and ST0 */
    544 
    545 void helper_fadd_STN_ST0(CPUX86State *env, int st_index)
    546 {
    547     uint8_t old_flags = save_exception_flags(env);
    548     ST(st_index) = floatx80_add(ST(st_index), ST0, &env->fp_status);
    549     merge_exception_flags(env, old_flags);
    550 }
    551 
    552 void helper_fmul_STN_ST0(CPUX86State *env, int st_index)
    553 {
    554     uint8_t old_flags = save_exception_flags(env);
    555     ST(st_index) = floatx80_mul(ST(st_index), ST0, &env->fp_status);
    556     merge_exception_flags(env, old_flags);
    557 }
    558 
    559 void helper_fsub_STN_ST0(CPUX86State *env, int st_index)
    560 {
    561     uint8_t old_flags = save_exception_flags(env);
    562     ST(st_index) = floatx80_sub(ST(st_index), ST0, &env->fp_status);
    563     merge_exception_flags(env, old_flags);
    564 }
    565 
    566 void helper_fsubr_STN_ST0(CPUX86State *env, int st_index)
    567 {
    568     uint8_t old_flags = save_exception_flags(env);
    569     ST(st_index) = floatx80_sub(ST0, ST(st_index), &env->fp_status);
    570     merge_exception_flags(env, old_flags);
    571 }
    572 
    573 void helper_fdiv_STN_ST0(CPUX86State *env, int st_index)
    574 {
    575     floatx80 *p;
    576 
    577     p = &ST(st_index);
    578     *p = helper_fdiv(env, *p, ST0);
    579 }
    580 
    581 void helper_fdivr_STN_ST0(CPUX86State *env, int st_index)
    582 {
    583     floatx80 *p;
    584 
    585     p = &ST(st_index);
    586     *p = helper_fdiv(env, ST0, *p);
    587 }
    588 
    589 /* misc FPU operations */
    590 void helper_fchs_ST0(CPUX86State *env)
    591 {
    592     ST0 = floatx80_chs(ST0);
    593 }
    594 
    595 void helper_fabs_ST0(CPUX86State *env)
    596 {
    597     ST0 = floatx80_abs(ST0);
    598 }
    599 
    600 void helper_fld1_ST0(CPUX86State *env)
    601 {
    602     ST0 = floatx80_one;
    603 }
    604 
    605 void helper_fldl2t_ST0(CPUX86State *env)
    606 {
    607     switch (env->fpuc & FPU_RC_MASK) {
    608     case FPU_RC_UP:
    609         ST0 = floatx80_l2t_u;
    610         break;
    611     default:
    612         ST0 = floatx80_l2t;
    613         break;
    614     }
    615 }
    616 
    617 void helper_fldl2e_ST0(CPUX86State *env)
    618 {
    619     switch (env->fpuc & FPU_RC_MASK) {
    620     case FPU_RC_DOWN:
    621     case FPU_RC_CHOP:
    622         ST0 = floatx80_l2e_d;
    623         break;
    624     default:
    625         ST0 = floatx80_l2e;
    626         break;
    627     }
    628 }
    629 
    630 void helper_fldpi_ST0(CPUX86State *env)
    631 {
    632     switch (env->fpuc & FPU_RC_MASK) {
    633     case FPU_RC_DOWN:
    634     case FPU_RC_CHOP:
    635         ST0 = floatx80_pi_d;
    636         break;
    637     default:
    638         ST0 = floatx80_pi;
    639         break;
    640     }
    641 }
    642 
    643 void helper_fldlg2_ST0(CPUX86State *env)
    644 {
    645     switch (env->fpuc & FPU_RC_MASK) {
    646     case FPU_RC_DOWN:
    647     case FPU_RC_CHOP:
    648         ST0 = floatx80_lg2_d;
    649         break;
    650     default:
    651         ST0 = floatx80_lg2;
    652         break;
    653     }
    654 }
    655 
    656 void helper_fldln2_ST0(CPUX86State *env)
    657 {
    658     switch (env->fpuc & FPU_RC_MASK) {
    659     case FPU_RC_DOWN:
    660     case FPU_RC_CHOP:
    661         ST0 = floatx80_ln2_d;
    662         break;
    663     default:
    664         ST0 = floatx80_ln2;
    665         break;
    666     }
    667 }
    668 
    669 void helper_fldz_ST0(CPUX86State *env)
    670 {
    671     ST0 = floatx80_zero;
    672 }
    673 
    674 void helper_fldz_FT0(CPUX86State *env)
    675 {
    676     FT0 = floatx80_zero;
    677 }
    678 
    679 uint32_t helper_fnstsw(CPUX86State *env)
    680 {
    681     return (env->fpus & ~0x3800) | (env->fpstt & 0x7) << 11;
    682 }
    683 
    684 uint32_t helper_fnstcw(CPUX86State *env)
    685 {
    686     return env->fpuc;
    687 }
    688 
    689 static void set_x86_rounding_mode(unsigned mode, float_status *status)
    690 {
    691     static FloatRoundMode x86_round_mode[4] = {
    692         float_round_nearest_even,
    693         float_round_down,
    694         float_round_up,
    695         float_round_to_zero
    696     };
    697     assert(mode < ARRAY_SIZE(x86_round_mode));
    698     set_float_rounding_mode(x86_round_mode[mode], status);
    699 }
    700 
    701 void update_fp_status(CPUX86State *env)
    702 {
    703     int rnd_mode;
    704     FloatX80RoundPrec rnd_prec;
    705 
    706     /* set rounding mode */
    707     rnd_mode = (env->fpuc & FPU_RC_MASK) >> FPU_RC_SHIFT;
    708     set_x86_rounding_mode(rnd_mode, &env->fp_status);
    709 
    710     switch ((env->fpuc >> 8) & 3) {
    711     case 0:
    712         rnd_prec = floatx80_precision_s;
    713         break;
    714     case 2:
    715         rnd_prec = floatx80_precision_d;
    716         break;
    717     case 3:
    718     default:
    719         rnd_prec = floatx80_precision_x;
    720         break;
    721     }
    722     set_floatx80_rounding_precision(rnd_prec, &env->fp_status);
    723 }
    724 
    725 void helper_fldcw(CPUX86State *env, uint32_t val)
    726 {
    727     cpu_set_fpuc(env, val);
    728 }
    729 
    730 void helper_fclex(CPUX86State *env)
    731 {
    732     env->fpus &= 0x7f00;
    733 }
    734 
    735 void helper_fwait(CPUX86State *env)
    736 {
    737     if (env->fpus & FPUS_SE) {
    738         fpu_raise_exception(env, GETPC());
    739     }
    740 }
    741 
    742 static void do_fninit(CPUX86State *env)
    743 {
    744     env->fpus = 0;
    745     env->fpstt = 0;
    746     env->fpcs = 0;
    747     env->fpds = 0;
    748     env->fpip = 0;
    749     env->fpdp = 0;
    750     cpu_set_fpuc(env, 0x37f);
    751     env->fptags[0] = 1;
    752     env->fptags[1] = 1;
    753     env->fptags[2] = 1;
    754     env->fptags[3] = 1;
    755     env->fptags[4] = 1;
    756     env->fptags[5] = 1;
    757     env->fptags[6] = 1;
    758     env->fptags[7] = 1;
    759 }
    760 
    761 void helper_fninit(CPUX86State *env)
    762 {
    763     do_fninit(env);
    764 }
    765 
    766 /* BCD ops */
    767 
    768 void helper_fbld_ST0(CPUX86State *env, target_ulong ptr)
    769 {
    770     floatx80 tmp;
    771     uint64_t val;
    772     unsigned int v;
    773     int i;
    774 
    775     val = 0;
    776     for (i = 8; i >= 0; i--) {
    777         v = cpu_ldub_data_ra(env, ptr + i, GETPC());
    778         val = (val * 100) + ((v >> 4) * 10) + (v & 0xf);
    779     }
    780     tmp = int64_to_floatx80(val, &env->fp_status);
    781     if (cpu_ldub_data_ra(env, ptr + 9, GETPC()) & 0x80) {
    782         tmp = floatx80_chs(tmp);
    783     }
    784     fpush(env);
    785     ST0 = tmp;
    786 }
    787 
    788 void helper_fbst_ST0(CPUX86State *env, target_ulong ptr)
    789 {
    790     uint8_t old_flags = save_exception_flags(env);
    791     int v;
    792     target_ulong mem_ref, mem_end;
    793     int64_t val;
    794     CPU_LDoubleU temp;
    795 
    796     temp.d = ST0;
    797 
    798     val = floatx80_to_int64(ST0, &env->fp_status);
    799     mem_ref = ptr;
    800     if (val >= 1000000000000000000LL || val <= -1000000000000000000LL) {
    801         set_float_exception_flags(float_flag_invalid, &env->fp_status);
    802         while (mem_ref < ptr + 7) {
    803             cpu_stb_data_ra(env, mem_ref++, 0, GETPC());
    804         }
    805         cpu_stb_data_ra(env, mem_ref++, 0xc0, GETPC());
    806         cpu_stb_data_ra(env, mem_ref++, 0xff, GETPC());
    807         cpu_stb_data_ra(env, mem_ref++, 0xff, GETPC());
    808         merge_exception_flags(env, old_flags);
    809         return;
    810     }
    811     mem_end = mem_ref + 9;
    812     if (SIGND(temp)) {
    813         cpu_stb_data_ra(env, mem_end, 0x80, GETPC());
    814         val = -val;
    815     } else {
    816         cpu_stb_data_ra(env, mem_end, 0x00, GETPC());
    817     }
    818     while (mem_ref < mem_end) {
    819         if (val == 0) {
    820             break;
    821         }
    822         v = val % 100;
    823         val = val / 100;
    824         v = ((v / 10) << 4) | (v % 10);
    825         cpu_stb_data_ra(env, mem_ref++, v, GETPC());
    826     }
    827     while (mem_ref < mem_end) {
    828         cpu_stb_data_ra(env, mem_ref++, 0, GETPC());
    829     }
    830     merge_exception_flags(env, old_flags);
    831 }
    832 
    833 /* 128-bit significand of log(2).  */
    834 #define ln2_sig_high 0xb17217f7d1cf79abULL
    835 #define ln2_sig_low 0xc9e3b39803f2f6afULL
    836 
    837 /*
    838  * Polynomial coefficients for an approximation to (2^x - 1) / x, on
    839  * the interval [-1/64, 1/64].
    840  */
    841 #define f2xm1_coeff_0 make_floatx80(0x3ffe, 0xb17217f7d1cf79acULL)
    842 #define f2xm1_coeff_0_low make_floatx80(0xbfbc, 0xd87edabf495b3762ULL)
    843 #define f2xm1_coeff_1 make_floatx80(0x3ffc, 0xf5fdeffc162c7543ULL)
    844 #define f2xm1_coeff_2 make_floatx80(0x3ffa, 0xe35846b82505fcc7ULL)
    845 #define f2xm1_coeff_3 make_floatx80(0x3ff8, 0x9d955b7dd273b899ULL)
    846 #define f2xm1_coeff_4 make_floatx80(0x3ff5, 0xaec3ff3c4ef4ac0cULL)
    847 #define f2xm1_coeff_5 make_floatx80(0x3ff2, 0xa184897c3a7f0de9ULL)
    848 #define f2xm1_coeff_6 make_floatx80(0x3fee, 0xffe634d0ec30d504ULL)
    849 #define f2xm1_coeff_7 make_floatx80(0x3feb, 0xb160111d2db515e4ULL)
    850 
    851 struct f2xm1_data {
    852     /*
    853      * A value very close to a multiple of 1/32, such that 2^t and 2^t - 1
    854      * are very close to exact floatx80 values.
    855      */
    856     floatx80 t;
    857     /* The value of 2^t.  */
    858     floatx80 exp2;
    859     /* The value of 2^t - 1.  */
    860     floatx80 exp2m1;
    861 };
    862 
    863 static const struct f2xm1_data f2xm1_table[65] = {
    864     { make_floatx80_init(0xbfff, 0x8000000000000000ULL),
    865       make_floatx80_init(0x3ffe, 0x8000000000000000ULL),
    866       make_floatx80_init(0xbffe, 0x8000000000000000ULL) },
    867     { make_floatx80_init(0xbffe, 0xf800000000002e7eULL),
    868       make_floatx80_init(0x3ffe, 0x82cd8698ac2b9160ULL),
    869       make_floatx80_init(0xbffd, 0xfa64f2cea7a8dd40ULL) },
    870     { make_floatx80_init(0xbffe, 0xefffffffffffe960ULL),
    871       make_floatx80_init(0x3ffe, 0x85aac367cc488345ULL),
    872       make_floatx80_init(0xbffd, 0xf4aa7930676ef976ULL) },
    873     { make_floatx80_init(0xbffe, 0xe800000000006f10ULL),
    874       make_floatx80_init(0x3ffe, 0x88980e8092da5c14ULL),
    875       make_floatx80_init(0xbffd, 0xeecfe2feda4b47d8ULL) },
    876     { make_floatx80_init(0xbffe, 0xe000000000008a45ULL),
    877       make_floatx80_init(0x3ffe, 0x8b95c1e3ea8ba2a5ULL),
    878       make_floatx80_init(0xbffd, 0xe8d47c382ae8bab6ULL) },
    879     { make_floatx80_init(0xbffe, 0xd7ffffffffff8a9eULL),
    880       make_floatx80_init(0x3ffe, 0x8ea4398b45cd8116ULL),
    881       make_floatx80_init(0xbffd, 0xe2b78ce97464fdd4ULL) },
    882     { make_floatx80_init(0xbffe, 0xd0000000000019a0ULL),
    883       make_floatx80_init(0x3ffe, 0x91c3d373ab11b919ULL),
    884       make_floatx80_init(0xbffd, 0xdc785918a9dc8dceULL) },
    885     { make_floatx80_init(0xbffe, 0xc7ffffffffff14dfULL),
    886       make_floatx80_init(0x3ffe, 0x94f4efa8fef76836ULL),
    887       make_floatx80_init(0xbffd, 0xd61620ae02112f94ULL) },
    888     { make_floatx80_init(0xbffe, 0xc000000000006530ULL),
    889       make_floatx80_init(0x3ffe, 0x9837f0518db87fbbULL),
    890       make_floatx80_init(0xbffd, 0xcf901f5ce48f008aULL) },
    891     { make_floatx80_init(0xbffe, 0xb7ffffffffff1723ULL),
    892       make_floatx80_init(0x3ffe, 0x9b8d39b9d54eb74cULL),
    893       make_floatx80_init(0xbffd, 0xc8e58c8c55629168ULL) },
    894     { make_floatx80_init(0xbffe, 0xb00000000000b5e1ULL),
    895       make_floatx80_init(0x3ffe, 0x9ef5326091a0c366ULL),
    896       make_floatx80_init(0xbffd, 0xc2159b3edcbe7934ULL) },
    897     { make_floatx80_init(0xbffe, 0xa800000000006f8aULL),
    898       make_floatx80_init(0x3ffe, 0xa27043030c49370aULL),
    899       make_floatx80_init(0xbffd, 0xbb1f79f9e76d91ecULL) },
    900     { make_floatx80_init(0xbffe, 0x9fffffffffff816aULL),
    901       make_floatx80_init(0x3ffe, 0xa5fed6a9b15171cfULL),
    902       make_floatx80_init(0xbffd, 0xb40252ac9d5d1c62ULL) },
    903     { make_floatx80_init(0xbffe, 0x97ffffffffffb621ULL),
    904       make_floatx80_init(0x3ffe, 0xa9a15ab4ea7c30e6ULL),
    905       make_floatx80_init(0xbffd, 0xacbd4a962b079e34ULL) },
    906     { make_floatx80_init(0xbffe, 0x8fffffffffff162bULL),
    907       make_floatx80_init(0x3ffe, 0xad583eea42a1b886ULL),
    908       make_floatx80_init(0xbffd, 0xa54f822b7abc8ef4ULL) },
    909     { make_floatx80_init(0xbffe, 0x87ffffffffff4d34ULL),
    910       make_floatx80_init(0x3ffe, 0xb123f581d2ac7b51ULL),
    911       make_floatx80_init(0xbffd, 0x9db814fc5aa7095eULL) },
    912     { make_floatx80_init(0xbffe, 0x800000000000227dULL),
    913       make_floatx80_init(0x3ffe, 0xb504f333f9de539dULL),
    914       make_floatx80_init(0xbffd, 0x95f619980c4358c6ULL) },
    915     { make_floatx80_init(0xbffd, 0xefffffffffff3978ULL),
    916       make_floatx80_init(0x3ffe, 0xb8fbaf4762fbd0a1ULL),
    917       make_floatx80_init(0xbffd, 0x8e08a1713a085ebeULL) },
    918     { make_floatx80_init(0xbffd, 0xe00000000000df81ULL),
    919       make_floatx80_init(0x3ffe, 0xbd08a39f580bfd8cULL),
    920       make_floatx80_init(0xbffd, 0x85eeb8c14fe804e8ULL) },
    921     { make_floatx80_init(0xbffd, 0xd00000000000bccfULL),
    922       make_floatx80_init(0x3ffe, 0xc12c4cca667062f6ULL),
    923       make_floatx80_init(0xbffc, 0xfb4eccd6663e7428ULL) },
    924     { make_floatx80_init(0xbffd, 0xc00000000000eff0ULL),
    925       make_floatx80_init(0x3ffe, 0xc5672a1155069abeULL),
    926       make_floatx80_init(0xbffc, 0xea6357baabe59508ULL) },
    927     { make_floatx80_init(0xbffd, 0xb000000000000fe6ULL),
    928       make_floatx80_init(0x3ffe, 0xc9b9bd866e2f234bULL),
    929       make_floatx80_init(0xbffc, 0xd91909e6474372d4ULL) },
    930     { make_floatx80_init(0xbffd, 0x9fffffffffff2172ULL),
    931       make_floatx80_init(0x3ffe, 0xce248c151f84bf00ULL),
    932       make_floatx80_init(0xbffc, 0xc76dcfab81ed0400ULL) },
    933     { make_floatx80_init(0xbffd, 0x8fffffffffffafffULL),
    934       make_floatx80_init(0x3ffe, 0xd2a81d91f12afb2bULL),
    935       make_floatx80_init(0xbffc, 0xb55f89b83b541354ULL) },
    936     { make_floatx80_init(0xbffc, 0xffffffffffff81a3ULL),
    937       make_floatx80_init(0x3ffe, 0xd744fccad69d7d5eULL),
    938       make_floatx80_init(0xbffc, 0xa2ec0cd4a58a0a88ULL) },
    939     { make_floatx80_init(0xbffc, 0xdfffffffffff1568ULL),
    940       make_floatx80_init(0x3ffe, 0xdbfbb797daf25a44ULL),
    941       make_floatx80_init(0xbffc, 0x901121a0943696f0ULL) },
    942     { make_floatx80_init(0xbffc, 0xbfffffffffff68daULL),
    943       make_floatx80_init(0x3ffe, 0xe0ccdeec2a94f811ULL),
    944       make_floatx80_init(0xbffb, 0xf999089eab583f78ULL) },
    945     { make_floatx80_init(0xbffc, 0x9fffffffffff4690ULL),
    946       make_floatx80_init(0x3ffe, 0xe5b906e77c83657eULL),
    947       make_floatx80_init(0xbffb, 0xd237c8c41be4d410ULL) },
    948     { make_floatx80_init(0xbffb, 0xffffffffffff8aeeULL),
    949       make_floatx80_init(0x3ffe, 0xeac0c6e7dd24427cULL),
    950       make_floatx80_init(0xbffb, 0xa9f9c8c116ddec20ULL) },
    951     { make_floatx80_init(0xbffb, 0xbfffffffffff2d18ULL),
    952       make_floatx80_init(0x3ffe, 0xefe4b99bdcdb06ebULL),
    953       make_floatx80_init(0xbffb, 0x80da33211927c8a8ULL) },
    954     { make_floatx80_init(0xbffa, 0xffffffffffff8ccbULL),
    955       make_floatx80_init(0x3ffe, 0xf5257d152486d0f4ULL),
    956       make_floatx80_init(0xbffa, 0xada82eadb792f0c0ULL) },
    957     { make_floatx80_init(0xbff9, 0xffffffffffff11feULL),
    958       make_floatx80_init(0x3ffe, 0xfa83b2db722a0846ULL),
    959       make_floatx80_init(0xbff9, 0xaf89a491babef740ULL) },
    960     { floatx80_zero_init,
    961       make_floatx80_init(0x3fff, 0x8000000000000000ULL),
    962       floatx80_zero_init },
    963     { make_floatx80_init(0x3ff9, 0xffffffffffff2680ULL),
    964       make_floatx80_init(0x3fff, 0x82cd8698ac2b9f6fULL),
    965       make_floatx80_init(0x3ff9, 0xb361a62b0ae7dbc0ULL) },
    966     { make_floatx80_init(0x3ffb, 0x800000000000b500ULL),
    967       make_floatx80_init(0x3fff, 0x85aac367cc488345ULL),
    968       make_floatx80_init(0x3ffa, 0xb5586cf9891068a0ULL) },
    969     { make_floatx80_init(0x3ffb, 0xbfffffffffff4b67ULL),
    970       make_floatx80_init(0x3fff, 0x88980e8092da7cceULL),
    971       make_floatx80_init(0x3ffb, 0x8980e8092da7cce0ULL) },
    972     { make_floatx80_init(0x3ffb, 0xffffffffffffff57ULL),
    973       make_floatx80_init(0x3fff, 0x8b95c1e3ea8bd6dfULL),
    974       make_floatx80_init(0x3ffb, 0xb95c1e3ea8bd6df0ULL) },
    975     { make_floatx80_init(0x3ffc, 0x9fffffffffff811fULL),
    976       make_floatx80_init(0x3fff, 0x8ea4398b45cd4780ULL),
    977       make_floatx80_init(0x3ffb, 0xea4398b45cd47800ULL) },
    978     { make_floatx80_init(0x3ffc, 0xbfffffffffff9980ULL),
    979       make_floatx80_init(0x3fff, 0x91c3d373ab11b919ULL),
    980       make_floatx80_init(0x3ffc, 0x8e1e9b9d588dc8c8ULL) },
    981     { make_floatx80_init(0x3ffc, 0xdffffffffffff631ULL),
    982       make_floatx80_init(0x3fff, 0x94f4efa8fef70864ULL),
    983       make_floatx80_init(0x3ffc, 0xa7a77d47f7b84320ULL) },
    984     { make_floatx80_init(0x3ffc, 0xffffffffffff2499ULL),
    985       make_floatx80_init(0x3fff, 0x9837f0518db892d4ULL),
    986       make_floatx80_init(0x3ffc, 0xc1bf828c6dc496a0ULL) },
    987     { make_floatx80_init(0x3ffd, 0x8fffffffffff80fbULL),
    988       make_floatx80_init(0x3fff, 0x9b8d39b9d54e3a79ULL),
    989       make_floatx80_init(0x3ffc, 0xdc69cdceaa71d3c8ULL) },
    990     { make_floatx80_init(0x3ffd, 0x9fffffffffffbc23ULL),
    991       make_floatx80_init(0x3fff, 0x9ef5326091a10313ULL),
    992       make_floatx80_init(0x3ffc, 0xf7a993048d081898ULL) },
    993     { make_floatx80_init(0x3ffd, 0xafffffffffff20ecULL),
    994       make_floatx80_init(0x3fff, 0xa27043030c49370aULL),
    995       make_floatx80_init(0x3ffd, 0x89c10c0c3124dc28ULL) },
    996     { make_floatx80_init(0x3ffd, 0xc00000000000fd2cULL),
    997       make_floatx80_init(0x3fff, 0xa5fed6a9b15171cfULL),
    998       make_floatx80_init(0x3ffd, 0x97fb5aa6c545c73cULL) },
    999     { make_floatx80_init(0x3ffd, 0xd0000000000093beULL),
   1000       make_floatx80_init(0x3fff, 0xa9a15ab4ea7c30e6ULL),
   1001       make_floatx80_init(0x3ffd, 0xa6856ad3a9f0c398ULL) },
   1002     { make_floatx80_init(0x3ffd, 0xe00000000000c2aeULL),
   1003       make_floatx80_init(0x3fff, 0xad583eea42a17876ULL),
   1004       make_floatx80_init(0x3ffd, 0xb560fba90a85e1d8ULL) },
   1005     { make_floatx80_init(0x3ffd, 0xefffffffffff1e3fULL),
   1006       make_floatx80_init(0x3fff, 0xb123f581d2abef6cULL),
   1007       make_floatx80_init(0x3ffd, 0xc48fd6074aafbdb0ULL) },
   1008     { make_floatx80_init(0x3ffd, 0xffffffffffff1c23ULL),
   1009       make_floatx80_init(0x3fff, 0xb504f333f9de2cadULL),
   1010       make_floatx80_init(0x3ffd, 0xd413cccfe778b2b4ULL) },
   1011     { make_floatx80_init(0x3ffe, 0x8800000000006344ULL),
   1012       make_floatx80_init(0x3fff, 0xb8fbaf4762fbd0a1ULL),
   1013       make_floatx80_init(0x3ffd, 0xe3eebd1d8bef4284ULL) },
   1014     { make_floatx80_init(0x3ffe, 0x9000000000005d67ULL),
   1015       make_floatx80_init(0x3fff, 0xbd08a39f580c668dULL),
   1016       make_floatx80_init(0x3ffd, 0xf4228e7d60319a34ULL) },
   1017     { make_floatx80_init(0x3ffe, 0x9800000000009127ULL),
   1018       make_floatx80_init(0x3fff, 0xc12c4cca6670e042ULL),
   1019       make_floatx80_init(0x3ffe, 0x82589994cce1c084ULL) },
   1020     { make_floatx80_init(0x3ffe, 0x9fffffffffff06f9ULL),
   1021       make_floatx80_init(0x3fff, 0xc5672a11550655c3ULL),
   1022       make_floatx80_init(0x3ffe, 0x8ace5422aa0cab86ULL) },
   1023     { make_floatx80_init(0x3ffe, 0xa7fffffffffff80dULL),
   1024       make_floatx80_init(0x3fff, 0xc9b9bd866e2f234bULL),
   1025       make_floatx80_init(0x3ffe, 0x93737b0cdc5e4696ULL) },
   1026     { make_floatx80_init(0x3ffe, 0xafffffffffff1470ULL),
   1027       make_floatx80_init(0x3fff, 0xce248c151f83fd69ULL),
   1028       make_floatx80_init(0x3ffe, 0x9c49182a3f07fad2ULL) },
   1029     { make_floatx80_init(0x3ffe, 0xb800000000000e0aULL),
   1030       make_floatx80_init(0x3fff, 0xd2a81d91f12aec5cULL),
   1031       make_floatx80_init(0x3ffe, 0xa5503b23e255d8b8ULL) },
   1032     { make_floatx80_init(0x3ffe, 0xc00000000000b7faULL),
   1033       make_floatx80_init(0x3fff, 0xd744fccad69dd630ULL),
   1034       make_floatx80_init(0x3ffe, 0xae89f995ad3bac60ULL) },
   1035     { make_floatx80_init(0x3ffe, 0xc800000000003aa6ULL),
   1036       make_floatx80_init(0x3fff, 0xdbfbb797daf25a44ULL),
   1037       make_floatx80_init(0x3ffe, 0xb7f76f2fb5e4b488ULL) },
   1038     { make_floatx80_init(0x3ffe, 0xd00000000000a6aeULL),
   1039       make_floatx80_init(0x3fff, 0xe0ccdeec2a954685ULL),
   1040       make_floatx80_init(0x3ffe, 0xc199bdd8552a8d0aULL) },
   1041     { make_floatx80_init(0x3ffe, 0xd800000000004165ULL),
   1042       make_floatx80_init(0x3fff, 0xe5b906e77c837155ULL),
   1043       make_floatx80_init(0x3ffe, 0xcb720dcef906e2aaULL) },
   1044     { make_floatx80_init(0x3ffe, 0xe00000000000582cULL),
   1045       make_floatx80_init(0x3fff, 0xeac0c6e7dd24713aULL),
   1046       make_floatx80_init(0x3ffe, 0xd5818dcfba48e274ULL) },
   1047     { make_floatx80_init(0x3ffe, 0xe800000000001a5dULL),
   1048       make_floatx80_init(0x3fff, 0xefe4b99bdcdb06ebULL),
   1049       make_floatx80_init(0x3ffe, 0xdfc97337b9b60dd6ULL) },
   1050     { make_floatx80_init(0x3ffe, 0xefffffffffffc1efULL),
   1051       make_floatx80_init(0x3fff, 0xf5257d152486a2faULL),
   1052       make_floatx80_init(0x3ffe, 0xea4afa2a490d45f4ULL) },
   1053     { make_floatx80_init(0x3ffe, 0xf800000000001069ULL),
   1054       make_floatx80_init(0x3fff, 0xfa83b2db722a0e5cULL),
   1055       make_floatx80_init(0x3ffe, 0xf50765b6e4541cb8ULL) },
   1056     { make_floatx80_init(0x3fff, 0x8000000000000000ULL),
   1057       make_floatx80_init(0x4000, 0x8000000000000000ULL),
   1058       make_floatx80_init(0x3fff, 0x8000000000000000ULL) },
   1059 };
   1060 
   1061 void helper_f2xm1(CPUX86State *env)
   1062 {
   1063     uint8_t old_flags = save_exception_flags(env);
   1064     uint64_t sig = extractFloatx80Frac(ST0);
   1065     int32_t exp = extractFloatx80Exp(ST0);
   1066     bool sign = extractFloatx80Sign(ST0);
   1067 
   1068     if (floatx80_invalid_encoding(ST0)) {
   1069         float_raise(float_flag_invalid, &env->fp_status);
   1070         ST0 = floatx80_default_nan(&env->fp_status);
   1071     } else if (floatx80_is_any_nan(ST0)) {
   1072         if (floatx80_is_signaling_nan(ST0, &env->fp_status)) {
   1073             float_raise(float_flag_invalid, &env->fp_status);
   1074             ST0 = floatx80_silence_nan(ST0, &env->fp_status);
   1075         }
   1076     } else if (exp > 0x3fff ||
   1077                (exp == 0x3fff && sig != (0x8000000000000000ULL))) {
   1078         /* Out of range for the instruction, treat as invalid.  */
   1079         float_raise(float_flag_invalid, &env->fp_status);
   1080         ST0 = floatx80_default_nan(&env->fp_status);
   1081     } else if (exp == 0x3fff) {
   1082         /* Argument 1 or -1, exact result 1 or -0.5.  */
   1083         if (sign) {
   1084             ST0 = make_floatx80(0xbffe, 0x8000000000000000ULL);
   1085         }
   1086     } else if (exp < 0x3fb0) {
   1087         if (!floatx80_is_zero(ST0)) {
   1088             /*
   1089              * Multiplying the argument by an extra-precision version
   1090              * of log(2) is sufficiently precise.  Zero arguments are
   1091              * returned unchanged.
   1092              */
   1093             uint64_t sig0, sig1, sig2;
   1094             if (exp == 0) {
   1095                 normalizeFloatx80Subnormal(sig, &exp, &sig);
   1096             }
   1097             mul128By64To192(ln2_sig_high, ln2_sig_low, sig, &sig0, &sig1,
   1098                             &sig2);
   1099             /* This result is inexact.  */
   1100             sig1 |= 1;
   1101             ST0 = normalizeRoundAndPackFloatx80(floatx80_precision_x,
   1102                                                 sign, exp, sig0, sig1,
   1103                                                 &env->fp_status);
   1104         }
   1105     } else {
   1106         floatx80 tmp, y, accum;
   1107         bool asign, bsign;
   1108         int32_t n, aexp, bexp;
   1109         uint64_t asig0, asig1, asig2, bsig0, bsig1;
   1110         FloatRoundMode save_mode = env->fp_status.float_rounding_mode;
   1111         FloatX80RoundPrec save_prec =
   1112             env->fp_status.floatx80_rounding_precision;
   1113         env->fp_status.float_rounding_mode = float_round_nearest_even;
   1114         env->fp_status.floatx80_rounding_precision = floatx80_precision_x;
   1115 
   1116         /* Find the nearest multiple of 1/32 to the argument.  */
   1117         tmp = floatx80_scalbn(ST0, 5, &env->fp_status);
   1118         n = 32 + floatx80_to_int32(tmp, &env->fp_status);
   1119         y = floatx80_sub(ST0, f2xm1_table[n].t, &env->fp_status);
   1120 
   1121         if (floatx80_is_zero(y)) {
   1122             /*
   1123              * Use the value of 2^t - 1 from the table, to avoid
   1124              * needing to special-case zero as a result of
   1125              * multiplication below.
   1126              */
   1127             ST0 = f2xm1_table[n].t;
   1128             set_float_exception_flags(float_flag_inexact, &env->fp_status);
   1129             env->fp_status.float_rounding_mode = save_mode;
   1130         } else {
   1131             /*
   1132              * Compute the lower parts of a polynomial expansion for
   1133              * (2^y - 1) / y.
   1134              */
   1135             accum = floatx80_mul(f2xm1_coeff_7, y, &env->fp_status);
   1136             accum = floatx80_add(f2xm1_coeff_6, accum, &env->fp_status);
   1137             accum = floatx80_mul(accum, y, &env->fp_status);
   1138             accum = floatx80_add(f2xm1_coeff_5, accum, &env->fp_status);
   1139             accum = floatx80_mul(accum, y, &env->fp_status);
   1140             accum = floatx80_add(f2xm1_coeff_4, accum, &env->fp_status);
   1141             accum = floatx80_mul(accum, y, &env->fp_status);
   1142             accum = floatx80_add(f2xm1_coeff_3, accum, &env->fp_status);
   1143             accum = floatx80_mul(accum, y, &env->fp_status);
   1144             accum = floatx80_add(f2xm1_coeff_2, accum, &env->fp_status);
   1145             accum = floatx80_mul(accum, y, &env->fp_status);
   1146             accum = floatx80_add(f2xm1_coeff_1, accum, &env->fp_status);
   1147             accum = floatx80_mul(accum, y, &env->fp_status);
   1148             accum = floatx80_add(f2xm1_coeff_0_low, accum, &env->fp_status);
   1149 
   1150             /*
   1151              * The full polynomial expansion is f2xm1_coeff_0 + accum
   1152              * (where accum has much lower magnitude, and so, in
   1153              * particular, carry out of the addition is not possible).
   1154              * (This expansion is only accurate to about 70 bits, not
   1155              * 128 bits.)
   1156              */
   1157             aexp = extractFloatx80Exp(f2xm1_coeff_0);
   1158             asign = extractFloatx80Sign(f2xm1_coeff_0);
   1159             shift128RightJamming(extractFloatx80Frac(accum), 0,
   1160                                  aexp - extractFloatx80Exp(accum),
   1161                                  &asig0, &asig1);
   1162             bsig0 = extractFloatx80Frac(f2xm1_coeff_0);
   1163             bsig1 = 0;
   1164             if (asign == extractFloatx80Sign(accum)) {
   1165                 add128(bsig0, bsig1, asig0, asig1, &asig0, &asig1);
   1166             } else {
   1167                 sub128(bsig0, bsig1, asig0, asig1, &asig0, &asig1);
   1168             }
   1169             /* And thus compute an approximation to 2^y - 1.  */
   1170             mul128By64To192(asig0, asig1, extractFloatx80Frac(y),
   1171                             &asig0, &asig1, &asig2);
   1172             aexp += extractFloatx80Exp(y) - 0x3ffe;
   1173             asign ^= extractFloatx80Sign(y);
   1174             if (n != 32) {
   1175                 /*
   1176                  * Multiply this by the precomputed value of 2^t and
   1177                  * add that of 2^t - 1.
   1178                  */
   1179                 mul128By64To192(asig0, asig1,
   1180                                 extractFloatx80Frac(f2xm1_table[n].exp2),
   1181                                 &asig0, &asig1, &asig2);
   1182                 aexp += extractFloatx80Exp(f2xm1_table[n].exp2) - 0x3ffe;
   1183                 bexp = extractFloatx80Exp(f2xm1_table[n].exp2m1);
   1184                 bsig0 = extractFloatx80Frac(f2xm1_table[n].exp2m1);
   1185                 bsig1 = 0;
   1186                 if (bexp < aexp) {
   1187                     shift128RightJamming(bsig0, bsig1, aexp - bexp,
   1188                                          &bsig0, &bsig1);
   1189                 } else if (aexp < bexp) {
   1190                     shift128RightJamming(asig0, asig1, bexp - aexp,
   1191                                          &asig0, &asig1);
   1192                     aexp = bexp;
   1193                 }
   1194                 /* The sign of 2^t - 1 is always that of the result.  */
   1195                 bsign = extractFloatx80Sign(f2xm1_table[n].exp2m1);
   1196                 if (asign == bsign) {
   1197                     /* Avoid possible carry out of the addition.  */
   1198                     shift128RightJamming(asig0, asig1, 1,
   1199                                          &asig0, &asig1);
   1200                     shift128RightJamming(bsig0, bsig1, 1,
   1201                                          &bsig0, &bsig1);
   1202                     ++aexp;
   1203                     add128(asig0, asig1, bsig0, bsig1, &asig0, &asig1);
   1204                 } else {
   1205                     sub128(bsig0, bsig1, asig0, asig1, &asig0, &asig1);
   1206                     asign = bsign;
   1207                 }
   1208             }
   1209             env->fp_status.float_rounding_mode = save_mode;
   1210             /* This result is inexact.  */
   1211             asig1 |= 1;
   1212             ST0 = normalizeRoundAndPackFloatx80(floatx80_precision_x,
   1213                                                 asign, aexp, asig0, asig1,
   1214                                                 &env->fp_status);
   1215         }
   1216 
   1217         env->fp_status.floatx80_rounding_precision = save_prec;
   1218     }
   1219     merge_exception_flags(env, old_flags);
   1220 }
   1221 
   1222 void helper_fptan(CPUX86State *env)
   1223 {
   1224     double fptemp = floatx80_to_double(env, ST0);
   1225 
   1226     if ((fptemp > MAXTAN) || (fptemp < -MAXTAN)) {
   1227         env->fpus |= 0x400;
   1228     } else {
   1229         fptemp = tan(fptemp);
   1230         ST0 = double_to_floatx80(env, fptemp);
   1231         fpush(env);
   1232         ST0 = floatx80_one;
   1233         env->fpus &= ~0x400; /* C2 <-- 0 */
   1234         /* the above code is for |arg| < 2**52 only */
   1235     }
   1236 }
   1237 
   1238 /* Values of pi/4, pi/2, 3pi/4 and pi, with 128-bit precision.  */
   1239 #define pi_4_exp 0x3ffe
   1240 #define pi_4_sig_high 0xc90fdaa22168c234ULL
   1241 #define pi_4_sig_low 0xc4c6628b80dc1cd1ULL
   1242 #define pi_2_exp 0x3fff
   1243 #define pi_2_sig_high 0xc90fdaa22168c234ULL
   1244 #define pi_2_sig_low 0xc4c6628b80dc1cd1ULL
   1245 #define pi_34_exp 0x4000
   1246 #define pi_34_sig_high 0x96cbe3f9990e91a7ULL
   1247 #define pi_34_sig_low 0x9394c9e8a0a5159dULL
   1248 #define pi_exp 0x4000
   1249 #define pi_sig_high 0xc90fdaa22168c234ULL
   1250 #define pi_sig_low 0xc4c6628b80dc1cd1ULL
   1251 
   1252 /*
   1253  * Polynomial coefficients for an approximation to atan(x), with only
   1254  * odd powers of x used, for x in the interval [-1/16, 1/16].  (Unlike
   1255  * for some other approximations, no low part is needed for the first
   1256  * coefficient here to achieve a sufficiently accurate result, because
   1257  * the coefficient in this minimax approximation is very close to
   1258  * exactly 1.)
   1259  */
   1260 #define fpatan_coeff_0 make_floatx80(0x3fff, 0x8000000000000000ULL)
   1261 #define fpatan_coeff_1 make_floatx80(0xbffd, 0xaaaaaaaaaaaaaa43ULL)
   1262 #define fpatan_coeff_2 make_floatx80(0x3ffc, 0xccccccccccbfe4f8ULL)
   1263 #define fpatan_coeff_3 make_floatx80(0xbffc, 0x92492491fbab2e66ULL)
   1264 #define fpatan_coeff_4 make_floatx80(0x3ffb, 0xe38e372881ea1e0bULL)
   1265 #define fpatan_coeff_5 make_floatx80(0xbffb, 0xba2c0104bbdd0615ULL)
   1266 #define fpatan_coeff_6 make_floatx80(0x3ffb, 0x9baf7ebf898b42efULL)
   1267 
   1268 struct fpatan_data {
   1269     /* High and low parts of atan(x).  */
   1270     floatx80 atan_high, atan_low;
   1271 };
   1272 
   1273 static const struct fpatan_data fpatan_table[9] = {
   1274     { floatx80_zero_init,
   1275       floatx80_zero_init },
   1276     { make_floatx80_init(0x3ffb, 0xfeadd4d5617b6e33ULL),
   1277       make_floatx80_init(0xbfb9, 0xdda19d8305ddc420ULL) },
   1278     { make_floatx80_init(0x3ffc, 0xfadbafc96406eb15ULL),
   1279       make_floatx80_init(0x3fbb, 0xdb8f3debef442fccULL) },
   1280     { make_floatx80_init(0x3ffd, 0xb7b0ca0f26f78474ULL),
   1281       make_floatx80_init(0xbfbc, 0xeab9bdba460376faULL) },
   1282     { make_floatx80_init(0x3ffd, 0xed63382b0dda7b45ULL),
   1283       make_floatx80_init(0x3fbc, 0xdfc88bd978751a06ULL) },
   1284     { make_floatx80_init(0x3ffe, 0x8f005d5ef7f59f9bULL),
   1285       make_floatx80_init(0x3fbd, 0xb906bc2ccb886e90ULL) },
   1286     { make_floatx80_init(0x3ffe, 0xa4bc7d1934f70924ULL),
   1287       make_floatx80_init(0x3fbb, 0xcd43f9522bed64f8ULL) },
   1288     { make_floatx80_init(0x3ffe, 0xb8053e2bc2319e74ULL),
   1289       make_floatx80_init(0xbfbc, 0xd3496ab7bd6eef0cULL) },
   1290     { make_floatx80_init(0x3ffe, 0xc90fdaa22168c235ULL),
   1291       make_floatx80_init(0xbfbc, 0xece675d1fc8f8cbcULL) },
   1292 };
   1293 
   1294 void helper_fpatan(CPUX86State *env)
   1295 {
   1296     uint8_t old_flags = save_exception_flags(env);
   1297     uint64_t arg0_sig = extractFloatx80Frac(ST0);
   1298     int32_t arg0_exp = extractFloatx80Exp(ST0);
   1299     bool arg0_sign = extractFloatx80Sign(ST0);
   1300     uint64_t arg1_sig = extractFloatx80Frac(ST1);
   1301     int32_t arg1_exp = extractFloatx80Exp(ST1);
   1302     bool arg1_sign = extractFloatx80Sign(ST1);
   1303 
   1304     if (floatx80_is_signaling_nan(ST0, &env->fp_status)) {
   1305         float_raise(float_flag_invalid, &env->fp_status);
   1306         ST1 = floatx80_silence_nan(ST0, &env->fp_status);
   1307     } else if (floatx80_is_signaling_nan(ST1, &env->fp_status)) {
   1308         float_raise(float_flag_invalid, &env->fp_status);
   1309         ST1 = floatx80_silence_nan(ST1, &env->fp_status);
   1310     } else if (floatx80_invalid_encoding(ST0) ||
   1311                floatx80_invalid_encoding(ST1)) {
   1312         float_raise(float_flag_invalid, &env->fp_status);
   1313         ST1 = floatx80_default_nan(&env->fp_status);
   1314     } else if (floatx80_is_any_nan(ST0)) {
   1315         ST1 = ST0;
   1316     } else if (floatx80_is_any_nan(ST1)) {
   1317         /* Pass this NaN through.  */
   1318     } else if (floatx80_is_zero(ST1) && !arg0_sign) {
   1319         /* Pass this zero through.  */
   1320     } else if (((floatx80_is_infinity(ST0) && !floatx80_is_infinity(ST1)) ||
   1321                  arg0_exp - arg1_exp >= 80) &&
   1322                !arg0_sign) {
   1323         /*
   1324          * Dividing ST1 by ST0 gives the correct result up to
   1325          * rounding, and avoids spurious underflow exceptions that
   1326          * might result from passing some small values through the
   1327          * polynomial approximation, but if a finite nonzero result of
   1328          * division is exact, the result of fpatan is still inexact
   1329          * (and underflowing where appropriate).
   1330          */
   1331         FloatX80RoundPrec save_prec =
   1332             env->fp_status.floatx80_rounding_precision;
   1333         env->fp_status.floatx80_rounding_precision = floatx80_precision_x;
   1334         ST1 = floatx80_div(ST1, ST0, &env->fp_status);
   1335         env->fp_status.floatx80_rounding_precision = save_prec;
   1336         if (!floatx80_is_zero(ST1) &&
   1337             !(get_float_exception_flags(&env->fp_status) &
   1338               float_flag_inexact)) {
   1339             /*
   1340              * The mathematical result is very slightly closer to zero
   1341              * than this exact result.  Round a value with the
   1342              * significand adjusted accordingly to get the correct
   1343              * exceptions, and possibly an adjusted result depending
   1344              * on the rounding mode.
   1345              */
   1346             uint64_t sig = extractFloatx80Frac(ST1);
   1347             int32_t exp = extractFloatx80Exp(ST1);
   1348             bool sign = extractFloatx80Sign(ST1);
   1349             if (exp == 0) {
   1350                 normalizeFloatx80Subnormal(sig, &exp, &sig);
   1351             }
   1352             ST1 = normalizeRoundAndPackFloatx80(floatx80_precision_x,
   1353                                                 sign, exp, sig - 1,
   1354                                                 -1, &env->fp_status);
   1355         }
   1356     } else {
   1357         /* The result is inexact.  */
   1358         bool rsign = arg1_sign;
   1359         int32_t rexp;
   1360         uint64_t rsig0, rsig1;
   1361         if (floatx80_is_zero(ST1)) {
   1362             /*
   1363              * ST0 is negative.  The result is pi with the sign of
   1364              * ST1.
   1365              */
   1366             rexp = pi_exp;
   1367             rsig0 = pi_sig_high;
   1368             rsig1 = pi_sig_low;
   1369         } else if (floatx80_is_infinity(ST1)) {
   1370             if (floatx80_is_infinity(ST0)) {
   1371                 if (arg0_sign) {
   1372                     rexp = pi_34_exp;
   1373                     rsig0 = pi_34_sig_high;
   1374                     rsig1 = pi_34_sig_low;
   1375                 } else {
   1376                     rexp = pi_4_exp;
   1377                     rsig0 = pi_4_sig_high;
   1378                     rsig1 = pi_4_sig_low;
   1379                 }
   1380             } else {
   1381                 rexp = pi_2_exp;
   1382                 rsig0 = pi_2_sig_high;
   1383                 rsig1 = pi_2_sig_low;
   1384             }
   1385         } else if (floatx80_is_zero(ST0) || arg1_exp - arg0_exp >= 80) {
   1386             rexp = pi_2_exp;
   1387             rsig0 = pi_2_sig_high;
   1388             rsig1 = pi_2_sig_low;
   1389         } else if (floatx80_is_infinity(ST0) || arg0_exp - arg1_exp >= 80) {
   1390             /* ST0 is negative.  */
   1391             rexp = pi_exp;
   1392             rsig0 = pi_sig_high;
   1393             rsig1 = pi_sig_low;
   1394         } else {
   1395             /*
   1396              * ST0 and ST1 are finite, nonzero and with exponents not
   1397              * too far apart.
   1398              */
   1399             int32_t adj_exp, num_exp, den_exp, xexp, yexp, n, texp, zexp, aexp;
   1400             int32_t azexp, axexp;
   1401             bool adj_sub, ysign, zsign;
   1402             uint64_t adj_sig0, adj_sig1, num_sig, den_sig, xsig0, xsig1;
   1403             uint64_t msig0, msig1, msig2, remsig0, remsig1, remsig2;
   1404             uint64_t ysig0, ysig1, tsig, zsig0, zsig1, asig0, asig1;
   1405             uint64_t azsig0, azsig1;
   1406             uint64_t azsig2, azsig3, axsig0, axsig1;
   1407             floatx80 x8;
   1408             FloatRoundMode save_mode = env->fp_status.float_rounding_mode;
   1409             FloatX80RoundPrec save_prec =
   1410                 env->fp_status.floatx80_rounding_precision;
   1411             env->fp_status.float_rounding_mode = float_round_nearest_even;
   1412             env->fp_status.floatx80_rounding_precision = floatx80_precision_x;
   1413 
   1414             if (arg0_exp == 0) {
   1415                 normalizeFloatx80Subnormal(arg0_sig, &arg0_exp, &arg0_sig);
   1416             }
   1417             if (arg1_exp == 0) {
   1418                 normalizeFloatx80Subnormal(arg1_sig, &arg1_exp, &arg1_sig);
   1419             }
   1420             if (arg0_exp > arg1_exp ||
   1421                 (arg0_exp == arg1_exp && arg0_sig >= arg1_sig)) {
   1422                 /* Work with abs(ST1) / abs(ST0).  */
   1423                 num_exp = arg1_exp;
   1424                 num_sig = arg1_sig;
   1425                 den_exp = arg0_exp;
   1426                 den_sig = arg0_sig;
   1427                 if (arg0_sign) {
   1428                     /* The result is subtracted from pi.  */
   1429                     adj_exp = pi_exp;
   1430                     adj_sig0 = pi_sig_high;
   1431                     adj_sig1 = pi_sig_low;
   1432                     adj_sub = true;
   1433                 } else {
   1434                     /* The result is used as-is.  */
   1435                     adj_exp = 0;
   1436                     adj_sig0 = 0;
   1437                     adj_sig1 = 0;
   1438                     adj_sub = false;
   1439                 }
   1440             } else {
   1441                 /* Work with abs(ST0) / abs(ST1).  */
   1442                 num_exp = arg0_exp;
   1443                 num_sig = arg0_sig;
   1444                 den_exp = arg1_exp;
   1445                 den_sig = arg1_sig;
   1446                 /* The result is added to or subtracted from pi/2.  */
   1447                 adj_exp = pi_2_exp;
   1448                 adj_sig0 = pi_2_sig_high;
   1449                 adj_sig1 = pi_2_sig_low;
   1450                 adj_sub = !arg0_sign;
   1451             }
   1452 
   1453             /*
   1454              * Compute x = num/den, where 0 < x <= 1 and x is not too
   1455              * small.
   1456              */
   1457             xexp = num_exp - den_exp + 0x3ffe;
   1458             remsig0 = num_sig;
   1459             remsig1 = 0;
   1460             if (den_sig <= remsig0) {
   1461                 shift128Right(remsig0, remsig1, 1, &remsig0, &remsig1);
   1462                 ++xexp;
   1463             }
   1464             xsig0 = estimateDiv128To64(remsig0, remsig1, den_sig);
   1465             mul64To128(den_sig, xsig0, &msig0, &msig1);
   1466             sub128(remsig0, remsig1, msig0, msig1, &remsig0, &remsig1);
   1467             while ((int64_t) remsig0 < 0) {
   1468                 --xsig0;
   1469                 add128(remsig0, remsig1, 0, den_sig, &remsig0, &remsig1);
   1470             }
   1471             xsig1 = estimateDiv128To64(remsig1, 0, den_sig);
   1472             /*
   1473              * No need to correct any estimation error in xsig1; even
   1474              * with such error, it is accurate enough.
   1475              */
   1476 
   1477             /*
   1478              * Split x as x = t + y, where t = n/8 is the nearest
   1479              * multiple of 1/8 to x.
   1480              */
   1481             x8 = normalizeRoundAndPackFloatx80(floatx80_precision_x,
   1482                                                false, xexp + 3, xsig0,
   1483                                                xsig1, &env->fp_status);
   1484             n = floatx80_to_int32(x8, &env->fp_status);
   1485             if (n == 0) {
   1486                 ysign = false;
   1487                 yexp = xexp;
   1488                 ysig0 = xsig0;
   1489                 ysig1 = xsig1;
   1490                 texp = 0;
   1491                 tsig = 0;
   1492             } else {
   1493                 int shift = clz32(n) + 32;
   1494                 texp = 0x403b - shift;
   1495                 tsig = n;
   1496                 tsig <<= shift;
   1497                 if (texp == xexp) {
   1498                     sub128(xsig0, xsig1, tsig, 0, &ysig0, &ysig1);
   1499                     if ((int64_t) ysig0 >= 0) {
   1500                         ysign = false;
   1501                         if (ysig0 == 0) {
   1502                             if (ysig1 == 0) {
   1503                                 yexp = 0;
   1504                             } else {
   1505                                 shift = clz64(ysig1) + 64;
   1506                                 yexp = xexp - shift;
   1507                                 shift128Left(ysig0, ysig1, shift,
   1508                                              &ysig0, &ysig1);
   1509                             }
   1510                         } else {
   1511                             shift = clz64(ysig0);
   1512                             yexp = xexp - shift;
   1513                             shift128Left(ysig0, ysig1, shift, &ysig0, &ysig1);
   1514                         }
   1515                     } else {
   1516                         ysign = true;
   1517                         sub128(0, 0, ysig0, ysig1, &ysig0, &ysig1);
   1518                         if (ysig0 == 0) {
   1519                             shift = clz64(ysig1) + 64;
   1520                         } else {
   1521                             shift = clz64(ysig0);
   1522                         }
   1523                         yexp = xexp - shift;
   1524                         shift128Left(ysig0, ysig1, shift, &ysig0, &ysig1);
   1525                     }
   1526                 } else {
   1527                     /*
   1528                      * t's exponent must be greater than x's because t
   1529                      * is positive and the nearest multiple of 1/8 to
   1530                      * x, and if x has a greater exponent, the power
   1531                      * of 2 with that exponent is also a multiple of
   1532                      * 1/8.
   1533                      */
   1534                     uint64_t usig0, usig1;
   1535                     shift128RightJamming(xsig0, xsig1, texp - xexp,
   1536                                          &usig0, &usig1);
   1537                     ysign = true;
   1538                     sub128(tsig, 0, usig0, usig1, &ysig0, &ysig1);
   1539                     if (ysig0 == 0) {
   1540                         shift = clz64(ysig1) + 64;
   1541                     } else {
   1542                         shift = clz64(ysig0);
   1543                     }
   1544                     yexp = texp - shift;
   1545                     shift128Left(ysig0, ysig1, shift, &ysig0, &ysig1);
   1546                 }
   1547             }
   1548 
   1549             /*
   1550              * Compute z = y/(1+tx), so arctan(x) = arctan(t) +
   1551              * arctan(z).
   1552              */
   1553             zsign = ysign;
   1554             if (texp == 0 || yexp == 0) {
   1555                 zexp = yexp;
   1556                 zsig0 = ysig0;
   1557                 zsig1 = ysig1;
   1558             } else {
   1559                 /*
   1560                  * t <= 1, x <= 1 and if both are 1 then y is 0, so tx < 1.
   1561                  */
   1562                 int32_t dexp = texp + xexp - 0x3ffe;
   1563                 uint64_t dsig0, dsig1, dsig2;
   1564                 mul128By64To192(xsig0, xsig1, tsig, &dsig0, &dsig1, &dsig2);
   1565                 /*
   1566                  * dexp <= 0x3fff (and if equal, dsig0 has a leading 0
   1567                  * bit).  Add 1 to produce the denominator 1+tx.
   1568                  */
   1569                 shift128RightJamming(dsig0, dsig1, 0x3fff - dexp,
   1570                                      &dsig0, &dsig1);
   1571                 dsig0 |= 0x8000000000000000ULL;
   1572                 zexp = yexp - 1;
   1573                 remsig0 = ysig0;
   1574                 remsig1 = ysig1;
   1575                 remsig2 = 0;
   1576                 if (dsig0 <= remsig0) {
   1577                     shift128Right(remsig0, remsig1, 1, &remsig0, &remsig1);
   1578                     ++zexp;
   1579                 }
   1580                 zsig0 = estimateDiv128To64(remsig0, remsig1, dsig0);
   1581                 mul128By64To192(dsig0, dsig1, zsig0, &msig0, &msig1, &msig2);
   1582                 sub192(remsig0, remsig1, remsig2, msig0, msig1, msig2,
   1583                        &remsig0, &remsig1, &remsig2);
   1584                 while ((int64_t) remsig0 < 0) {
   1585                     --zsig0;
   1586                     add192(remsig0, remsig1, remsig2, 0, dsig0, dsig1,
   1587                            &remsig0, &remsig1, &remsig2);
   1588                 }
   1589                 zsig1 = estimateDiv128To64(remsig1, remsig2, dsig0);
   1590                 /* No need to correct any estimation error in zsig1.  */
   1591             }
   1592 
   1593             if (zexp == 0) {
   1594                 azexp = 0;
   1595                 azsig0 = 0;
   1596                 azsig1 = 0;
   1597             } else {
   1598                 floatx80 z2, accum;
   1599                 uint64_t z2sig0, z2sig1, z2sig2, z2sig3;
   1600                 /* Compute z^2.  */
   1601                 mul128To256(zsig0, zsig1, zsig0, zsig1,
   1602                             &z2sig0, &z2sig1, &z2sig2, &z2sig3);
   1603                 z2 = normalizeRoundAndPackFloatx80(floatx80_precision_x, false,
   1604                                                    zexp + zexp - 0x3ffe,
   1605                                                    z2sig0, z2sig1,
   1606                                                    &env->fp_status);
   1607 
   1608                 /* Compute the lower parts of the polynomial expansion.  */
   1609                 accum = floatx80_mul(fpatan_coeff_6, z2, &env->fp_status);
   1610                 accum = floatx80_add(fpatan_coeff_5, accum, &env->fp_status);
   1611                 accum = floatx80_mul(accum, z2, &env->fp_status);
   1612                 accum = floatx80_add(fpatan_coeff_4, accum, &env->fp_status);
   1613                 accum = floatx80_mul(accum, z2, &env->fp_status);
   1614                 accum = floatx80_add(fpatan_coeff_3, accum, &env->fp_status);
   1615                 accum = floatx80_mul(accum, z2, &env->fp_status);
   1616                 accum = floatx80_add(fpatan_coeff_2, accum, &env->fp_status);
   1617                 accum = floatx80_mul(accum, z2, &env->fp_status);
   1618                 accum = floatx80_add(fpatan_coeff_1, accum, &env->fp_status);
   1619                 accum = floatx80_mul(accum, z2, &env->fp_status);
   1620 
   1621                 /*
   1622                  * The full polynomial expansion is z*(fpatan_coeff_0 + accum).
   1623                  * fpatan_coeff_0 is 1, and accum is negative and much smaller.
   1624                  */
   1625                 aexp = extractFloatx80Exp(fpatan_coeff_0);
   1626                 shift128RightJamming(extractFloatx80Frac(accum), 0,
   1627                                      aexp - extractFloatx80Exp(accum),
   1628                                      &asig0, &asig1);
   1629                 sub128(extractFloatx80Frac(fpatan_coeff_0), 0, asig0, asig1,
   1630                        &asig0, &asig1);
   1631                 /* Multiply by z to compute arctan(z).  */
   1632                 azexp = aexp + zexp - 0x3ffe;
   1633                 mul128To256(asig0, asig1, zsig0, zsig1, &azsig0, &azsig1,
   1634                             &azsig2, &azsig3);
   1635             }
   1636 
   1637             /* Add arctan(t) (positive or zero) and arctan(z) (sign zsign).  */
   1638             if (texp == 0) {
   1639                 /* z is positive.  */
   1640                 axexp = azexp;
   1641                 axsig0 = azsig0;
   1642                 axsig1 = azsig1;
   1643             } else {
   1644                 bool low_sign = extractFloatx80Sign(fpatan_table[n].atan_low);
   1645                 int32_t low_exp = extractFloatx80Exp(fpatan_table[n].atan_low);
   1646                 uint64_t low_sig0 =
   1647                     extractFloatx80Frac(fpatan_table[n].atan_low);
   1648                 uint64_t low_sig1 = 0;
   1649                 axexp = extractFloatx80Exp(fpatan_table[n].atan_high);
   1650                 axsig0 = extractFloatx80Frac(fpatan_table[n].atan_high);
   1651                 axsig1 = 0;
   1652                 shift128RightJamming(low_sig0, low_sig1, axexp - low_exp,
   1653                                      &low_sig0, &low_sig1);
   1654                 if (low_sign) {
   1655                     sub128(axsig0, axsig1, low_sig0, low_sig1,
   1656                            &axsig0, &axsig1);
   1657                 } else {
   1658                     add128(axsig0, axsig1, low_sig0, low_sig1,
   1659                            &axsig0, &axsig1);
   1660                 }
   1661                 if (azexp >= axexp) {
   1662                     shift128RightJamming(axsig0, axsig1, azexp - axexp + 1,
   1663                                          &axsig0, &axsig1);
   1664                     axexp = azexp + 1;
   1665                     shift128RightJamming(azsig0, azsig1, 1,
   1666                                          &azsig0, &azsig1);
   1667                 } else {
   1668                     shift128RightJamming(axsig0, axsig1, 1,
   1669                                          &axsig0, &axsig1);
   1670                     shift128RightJamming(azsig0, azsig1, axexp - azexp + 1,
   1671                                          &azsig0, &azsig1);
   1672                     ++axexp;
   1673                 }
   1674                 if (zsign) {
   1675                     sub128(axsig0, axsig1, azsig0, azsig1,
   1676                            &axsig0, &axsig1);
   1677                 } else {
   1678                     add128(axsig0, axsig1, azsig0, azsig1,
   1679                            &axsig0, &axsig1);
   1680                 }
   1681             }
   1682 
   1683             if (adj_exp == 0) {
   1684                 rexp = axexp;
   1685                 rsig0 = axsig0;
   1686                 rsig1 = axsig1;
   1687             } else {
   1688                 /*
   1689                  * Add or subtract arctan(x) (exponent axexp,
   1690                  * significand axsig0 and axsig1, positive, not
   1691                  * necessarily normalized) to the number given by
   1692                  * adj_exp, adj_sig0 and adj_sig1, according to
   1693                  * adj_sub.
   1694                  */
   1695                 if (adj_exp >= axexp) {
   1696                     shift128RightJamming(axsig0, axsig1, adj_exp - axexp + 1,
   1697                                          &axsig0, &axsig1);
   1698                     rexp = adj_exp + 1;
   1699                     shift128RightJamming(adj_sig0, adj_sig1, 1,
   1700                                          &adj_sig0, &adj_sig1);
   1701                 } else {
   1702                     shift128RightJamming(axsig0, axsig1, 1,
   1703                                          &axsig0, &axsig1);
   1704                     shift128RightJamming(adj_sig0, adj_sig1,
   1705                                          axexp - adj_exp + 1,
   1706                                          &adj_sig0, &adj_sig1);
   1707                     rexp = axexp + 1;
   1708                 }
   1709                 if (adj_sub) {
   1710                     sub128(adj_sig0, adj_sig1, axsig0, axsig1,
   1711                            &rsig0, &rsig1);
   1712                 } else {
   1713                     add128(adj_sig0, adj_sig1, axsig0, axsig1,
   1714                            &rsig0, &rsig1);
   1715                 }
   1716             }
   1717 
   1718             env->fp_status.float_rounding_mode = save_mode;
   1719             env->fp_status.floatx80_rounding_precision = save_prec;
   1720         }
   1721         /* This result is inexact.  */
   1722         rsig1 |= 1;
   1723         ST1 = normalizeRoundAndPackFloatx80(floatx80_precision_x, rsign, rexp,
   1724                                             rsig0, rsig1, &env->fp_status);
   1725     }
   1726 
   1727     fpop(env);
   1728     merge_exception_flags(env, old_flags);
   1729 }
   1730 
   1731 void helper_fxtract(CPUX86State *env)
   1732 {
   1733     uint8_t old_flags = save_exception_flags(env);
   1734     CPU_LDoubleU temp;
   1735 
   1736     temp.d = ST0;
   1737 
   1738     if (floatx80_is_zero(ST0)) {
   1739         /* Easy way to generate -inf and raising division by 0 exception */
   1740         ST0 = floatx80_div(floatx80_chs(floatx80_one), floatx80_zero,
   1741                            &env->fp_status);
   1742         fpush(env);
   1743         ST0 = temp.d;
   1744     } else if (floatx80_invalid_encoding(ST0)) {
   1745         float_raise(float_flag_invalid, &env->fp_status);
   1746         ST0 = floatx80_default_nan(&env->fp_status);
   1747         fpush(env);
   1748         ST0 = ST1;
   1749     } else if (floatx80_is_any_nan(ST0)) {
   1750         if (floatx80_is_signaling_nan(ST0, &env->fp_status)) {
   1751             float_raise(float_flag_invalid, &env->fp_status);
   1752             ST0 = floatx80_silence_nan(ST0, &env->fp_status);
   1753         }
   1754         fpush(env);
   1755         ST0 = ST1;
   1756     } else if (floatx80_is_infinity(ST0)) {
   1757         fpush(env);
   1758         ST0 = ST1;
   1759         ST1 = floatx80_infinity;
   1760     } else {
   1761         int expdif;
   1762 
   1763         if (EXPD(temp) == 0) {
   1764             int shift = clz64(temp.l.lower);
   1765             temp.l.lower <<= shift;
   1766             expdif = 1 - EXPBIAS - shift;
   1767             float_raise(float_flag_input_denormal, &env->fp_status);
   1768         } else {
   1769             expdif = EXPD(temp) - EXPBIAS;
   1770         }
   1771         /* DP exponent bias */
   1772         ST0 = int32_to_floatx80(expdif, &env->fp_status);
   1773         fpush(env);
   1774         BIASEXPONENT(temp);
   1775         ST0 = temp.d;
   1776     }
   1777     merge_exception_flags(env, old_flags);
   1778 }
   1779 
   1780 static void helper_fprem_common(CPUX86State *env, bool mod)
   1781 {
   1782     uint8_t old_flags = save_exception_flags(env);
   1783     uint64_t quotient;
   1784     CPU_LDoubleU temp0, temp1;
   1785     int exp0, exp1, expdiff;
   1786 
   1787     temp0.d = ST0;
   1788     temp1.d = ST1;
   1789     exp0 = EXPD(temp0);
   1790     exp1 = EXPD(temp1);
   1791 
   1792     env->fpus &= ~0x4700; /* (C3,C2,C1,C0) <-- 0000 */
   1793     if (floatx80_is_zero(ST0) || floatx80_is_zero(ST1) ||
   1794         exp0 == 0x7fff || exp1 == 0x7fff ||
   1795         floatx80_invalid_encoding(ST0) || floatx80_invalid_encoding(ST1)) {
   1796         ST0 = floatx80_modrem(ST0, ST1, mod, &quotient, &env->fp_status);
   1797     } else {
   1798         if (exp0 == 0) {
   1799             exp0 = 1 - clz64(temp0.l.lower);
   1800         }
   1801         if (exp1 == 0) {
   1802             exp1 = 1 - clz64(temp1.l.lower);
   1803         }
   1804         expdiff = exp0 - exp1;
   1805         if (expdiff < 64) {
   1806             ST0 = floatx80_modrem(ST0, ST1, mod, &quotient, &env->fp_status);
   1807             env->fpus |= (quotient & 0x4) << (8 - 2);  /* (C0) <-- q2 */
   1808             env->fpus |= (quotient & 0x2) << (14 - 1); /* (C3) <-- q1 */
   1809             env->fpus |= (quotient & 0x1) << (9 - 0);  /* (C1) <-- q0 */
   1810         } else {
   1811             /*
   1812              * Partial remainder.  This choice of how many bits to
   1813              * process at once is specified in AMD instruction set
   1814              * manuals, and empirically is followed by Intel
   1815              * processors as well; it ensures that the final remainder
   1816              * operation in a loop does produce the correct low three
   1817              * bits of the quotient.  AMD manuals specify that the
   1818              * flags other than C2 are cleared, and empirically Intel
   1819              * processors clear them as well.
   1820              */
   1821             int n = 32 + (expdiff % 32);
   1822             temp1.d = floatx80_scalbn(temp1.d, expdiff - n, &env->fp_status);
   1823             ST0 = floatx80_mod(ST0, temp1.d, &env->fp_status);
   1824             env->fpus |= 0x400;  /* C2 <-- 1 */
   1825         }
   1826     }
   1827     merge_exception_flags(env, old_flags);
   1828 }
   1829 
   1830 void helper_fprem1(CPUX86State *env)
   1831 {
   1832     helper_fprem_common(env, false);
   1833 }
   1834 
   1835 void helper_fprem(CPUX86State *env)
   1836 {
   1837     helper_fprem_common(env, true);
   1838 }
   1839 
   1840 /* 128-bit significand of log2(e).  */
   1841 #define log2_e_sig_high 0xb8aa3b295c17f0bbULL
   1842 #define log2_e_sig_low 0xbe87fed0691d3e89ULL
   1843 
   1844 /*
   1845  * Polynomial coefficients for an approximation to log2((1+x)/(1-x)),
   1846  * with only odd powers of x used, for x in the interval [2*sqrt(2)-3,
   1847  * 3-2*sqrt(2)], which corresponds to logarithms of numbers in the
   1848  * interval [sqrt(2)/2, sqrt(2)].
   1849  */
   1850 #define fyl2x_coeff_0 make_floatx80(0x4000, 0xb8aa3b295c17f0bcULL)
   1851 #define fyl2x_coeff_0_low make_floatx80(0xbfbf, 0x834972fe2d7bab1bULL)
   1852 #define fyl2x_coeff_1 make_floatx80(0x3ffe, 0xf6384ee1d01febb8ULL)
   1853 #define fyl2x_coeff_2 make_floatx80(0x3ffe, 0x93bb62877cdfa2e3ULL)
   1854 #define fyl2x_coeff_3 make_floatx80(0x3ffd, 0xd30bb153d808f269ULL)
   1855 #define fyl2x_coeff_4 make_floatx80(0x3ffd, 0xa42589eaf451499eULL)
   1856 #define fyl2x_coeff_5 make_floatx80(0x3ffd, 0x864d42c0f8f17517ULL)
   1857 #define fyl2x_coeff_6 make_floatx80(0x3ffc, 0xe3476578adf26272ULL)
   1858 #define fyl2x_coeff_7 make_floatx80(0x3ffc, 0xc506c5f874e6d80fULL)
   1859 #define fyl2x_coeff_8 make_floatx80(0x3ffc, 0xac5cf50cc57d6372ULL)
   1860 #define fyl2x_coeff_9 make_floatx80(0x3ffc, 0xb1ed0066d971a103ULL)
   1861 
   1862 /*
   1863  * Compute an approximation of log2(1+arg), where 1+arg is in the
   1864  * interval [sqrt(2)/2, sqrt(2)].  It is assumed that when this
   1865  * function is called, rounding precision is set to 80 and the
   1866  * round-to-nearest mode is in effect.  arg must not be exactly zero,
   1867  * and must not be so close to zero that underflow might occur.
   1868  */
   1869 static void helper_fyl2x_common(CPUX86State *env, floatx80 arg, int32_t *exp,
   1870                                 uint64_t *sig0, uint64_t *sig1)
   1871 {
   1872     uint64_t arg0_sig = extractFloatx80Frac(arg);
   1873     int32_t arg0_exp = extractFloatx80Exp(arg);
   1874     bool arg0_sign = extractFloatx80Sign(arg);
   1875     bool asign;
   1876     int32_t dexp, texp, aexp;
   1877     uint64_t dsig0, dsig1, tsig0, tsig1, rsig0, rsig1, rsig2;
   1878     uint64_t msig0, msig1, msig2, t2sig0, t2sig1, t2sig2, t2sig3;
   1879     uint64_t asig0, asig1, asig2, asig3, bsig0, bsig1;
   1880     floatx80 t2, accum;
   1881 
   1882     /*
   1883      * Compute an approximation of arg/(2+arg), with extra precision,
   1884      * as the argument to a polynomial approximation.  The extra
   1885      * precision is only needed for the first term of the
   1886      * approximation, with subsequent terms being significantly
   1887      * smaller; the approximation only uses odd exponents, and the
   1888      * square of arg/(2+arg) is at most 17-12*sqrt(2) = 0.029....
   1889      */
   1890     if (arg0_sign) {
   1891         dexp = 0x3fff;
   1892         shift128RightJamming(arg0_sig, 0, dexp - arg0_exp, &dsig0, &dsig1);
   1893         sub128(0, 0, dsig0, dsig1, &dsig0, &dsig1);
   1894     } else {
   1895         dexp = 0x4000;
   1896         shift128RightJamming(arg0_sig, 0, dexp - arg0_exp, &dsig0, &dsig1);
   1897         dsig0 |= 0x8000000000000000ULL;
   1898     }
   1899     texp = arg0_exp - dexp + 0x3ffe;
   1900     rsig0 = arg0_sig;
   1901     rsig1 = 0;
   1902     rsig2 = 0;
   1903     if (dsig0 <= rsig0) {
   1904         shift128Right(rsig0, rsig1, 1, &rsig0, &rsig1);
   1905         ++texp;
   1906     }
   1907     tsig0 = estimateDiv128To64(rsig0, rsig1, dsig0);
   1908     mul128By64To192(dsig0, dsig1, tsig0, &msig0, &msig1, &msig2);
   1909     sub192(rsig0, rsig1, rsig2, msig0, msig1, msig2,
   1910            &rsig0, &rsig1, &rsig2);
   1911     while ((int64_t) rsig0 < 0) {
   1912         --tsig0;
   1913         add192(rsig0, rsig1, rsig2, 0, dsig0, dsig1,
   1914                &rsig0, &rsig1, &rsig2);
   1915     }
   1916     tsig1 = estimateDiv128To64(rsig1, rsig2, dsig0);
   1917     /*
   1918      * No need to correct any estimation error in tsig1; even with
   1919      * such error, it is accurate enough.  Now compute the square of
   1920      * that approximation.
   1921      */
   1922     mul128To256(tsig0, tsig1, tsig0, tsig1,
   1923                 &t2sig0, &t2sig1, &t2sig2, &t2sig3);
   1924     t2 = normalizeRoundAndPackFloatx80(floatx80_precision_x, false,
   1925                                        texp + texp - 0x3ffe,
   1926                                        t2sig0, t2sig1, &env->fp_status);
   1927 
   1928     /* Compute the lower parts of the polynomial expansion.  */
   1929     accum = floatx80_mul(fyl2x_coeff_9, t2, &env->fp_status);
   1930     accum = floatx80_add(fyl2x_coeff_8, accum, &env->fp_status);
   1931     accum = floatx80_mul(accum, t2, &env->fp_status);
   1932     accum = floatx80_add(fyl2x_coeff_7, accum, &env->fp_status);
   1933     accum = floatx80_mul(accum, t2, &env->fp_status);
   1934     accum = floatx80_add(fyl2x_coeff_6, accum, &env->fp_status);
   1935     accum = floatx80_mul(accum, t2, &env->fp_status);
   1936     accum = floatx80_add(fyl2x_coeff_5, accum, &env->fp_status);
   1937     accum = floatx80_mul(accum, t2, &env->fp_status);
   1938     accum = floatx80_add(fyl2x_coeff_4, accum, &env->fp_status);
   1939     accum = floatx80_mul(accum, t2, &env->fp_status);
   1940     accum = floatx80_add(fyl2x_coeff_3, accum, &env->fp_status);
   1941     accum = floatx80_mul(accum, t2, &env->fp_status);
   1942     accum = floatx80_add(fyl2x_coeff_2, accum, &env->fp_status);
   1943     accum = floatx80_mul(accum, t2, &env->fp_status);
   1944     accum = floatx80_add(fyl2x_coeff_1, accum, &env->fp_status);
   1945     accum = floatx80_mul(accum, t2, &env->fp_status);
   1946     accum = floatx80_add(fyl2x_coeff_0_low, accum, &env->fp_status);
   1947 
   1948     /*
   1949      * The full polynomial expansion is fyl2x_coeff_0 + accum (where
   1950      * accum has much lower magnitude, and so, in particular, carry
   1951      * out of the addition is not possible), multiplied by t.  (This
   1952      * expansion is only accurate to about 70 bits, not 128 bits.)
   1953      */
   1954     aexp = extractFloatx80Exp(fyl2x_coeff_0);
   1955     asign = extractFloatx80Sign(fyl2x_coeff_0);
   1956     shift128RightJamming(extractFloatx80Frac(accum), 0,
   1957                          aexp - extractFloatx80Exp(accum),
   1958                          &asig0, &asig1);
   1959     bsig0 = extractFloatx80Frac(fyl2x_coeff_0);
   1960     bsig1 = 0;
   1961     if (asign == extractFloatx80Sign(accum)) {
   1962         add128(bsig0, bsig1, asig0, asig1, &asig0, &asig1);
   1963     } else {
   1964         sub128(bsig0, bsig1, asig0, asig1, &asig0, &asig1);
   1965     }
   1966     /* Multiply by t to compute the required result.  */
   1967     mul128To256(asig0, asig1, tsig0, tsig1,
   1968                 &asig0, &asig1, &asig2, &asig3);
   1969     aexp += texp - 0x3ffe;
   1970     *exp = aexp;
   1971     *sig0 = asig0;
   1972     *sig1 = asig1;
   1973 }
   1974 
   1975 void helper_fyl2xp1(CPUX86State *env)
   1976 {
   1977     uint8_t old_flags = save_exception_flags(env);
   1978     uint64_t arg0_sig = extractFloatx80Frac(ST0);
   1979     int32_t arg0_exp = extractFloatx80Exp(ST0);
   1980     bool arg0_sign = extractFloatx80Sign(ST0);
   1981     uint64_t arg1_sig = extractFloatx80Frac(ST1);
   1982     int32_t arg1_exp = extractFloatx80Exp(ST1);
   1983     bool arg1_sign = extractFloatx80Sign(ST1);
   1984 
   1985     if (floatx80_is_signaling_nan(ST0, &env->fp_status)) {
   1986         float_raise(float_flag_invalid, &env->fp_status);
   1987         ST1 = floatx80_silence_nan(ST0, &env->fp_status);
   1988     } else if (floatx80_is_signaling_nan(ST1, &env->fp_status)) {
   1989         float_raise(float_flag_invalid, &env->fp_status);
   1990         ST1 = floatx80_silence_nan(ST1, &env->fp_status);
   1991     } else if (floatx80_invalid_encoding(ST0) ||
   1992                floatx80_invalid_encoding(ST1)) {
   1993         float_raise(float_flag_invalid, &env->fp_status);
   1994         ST1 = floatx80_default_nan(&env->fp_status);
   1995     } else if (floatx80_is_any_nan(ST0)) {
   1996         ST1 = ST0;
   1997     } else if (floatx80_is_any_nan(ST1)) {
   1998         /* Pass this NaN through.  */
   1999     } else if (arg0_exp > 0x3ffd ||
   2000                (arg0_exp == 0x3ffd && arg0_sig > (arg0_sign ?
   2001                                                   0x95f619980c4336f7ULL :
   2002                                                   0xd413cccfe7799211ULL))) {
   2003         /*
   2004          * Out of range for the instruction (ST0 must have absolute
   2005          * value less than 1 - sqrt(2)/2 = 0.292..., according to
   2006          * Intel manuals; AMD manuals allow a range from sqrt(2)/2 - 1
   2007          * to sqrt(2) - 1, which we allow here), treat as invalid.
   2008          */
   2009         float_raise(float_flag_invalid, &env->fp_status);
   2010         ST1 = floatx80_default_nan(&env->fp_status);
   2011     } else if (floatx80_is_zero(ST0) || floatx80_is_zero(ST1) ||
   2012                arg1_exp == 0x7fff) {
   2013         /*
   2014          * One argument is zero, or multiplying by infinity; correct
   2015          * result is exact and can be obtained by multiplying the
   2016          * arguments.
   2017          */
   2018         ST1 = floatx80_mul(ST0, ST1, &env->fp_status);
   2019     } else if (arg0_exp < 0x3fb0) {
   2020         /*
   2021          * Multiplying both arguments and an extra-precision version
   2022          * of log2(e) is sufficiently precise.
   2023          */
   2024         uint64_t sig0, sig1, sig2;
   2025         int32_t exp;
   2026         if (arg0_exp == 0) {
   2027             normalizeFloatx80Subnormal(arg0_sig, &arg0_exp, &arg0_sig);
   2028         }
   2029         if (arg1_exp == 0) {
   2030             normalizeFloatx80Subnormal(arg1_sig, &arg1_exp, &arg1_sig);
   2031         }
   2032         mul128By64To192(log2_e_sig_high, log2_e_sig_low, arg0_sig,
   2033                         &sig0, &sig1, &sig2);
   2034         exp = arg0_exp + 1;
   2035         mul128By64To192(sig0, sig1, arg1_sig, &sig0, &sig1, &sig2);
   2036         exp += arg1_exp - 0x3ffe;
   2037         /* This result is inexact.  */
   2038         sig1 |= 1;
   2039         ST1 = normalizeRoundAndPackFloatx80(floatx80_precision_x,
   2040                                             arg0_sign ^ arg1_sign, exp,
   2041                                             sig0, sig1, &env->fp_status);
   2042     } else {
   2043         int32_t aexp;
   2044         uint64_t asig0, asig1, asig2;
   2045         FloatRoundMode save_mode = env->fp_status.float_rounding_mode;
   2046         FloatX80RoundPrec save_prec =
   2047             env->fp_status.floatx80_rounding_precision;
   2048         env->fp_status.float_rounding_mode = float_round_nearest_even;
   2049         env->fp_status.floatx80_rounding_precision = floatx80_precision_x;
   2050 
   2051         helper_fyl2x_common(env, ST0, &aexp, &asig0, &asig1);
   2052         /*
   2053          * Multiply by the second argument to compute the required
   2054          * result.
   2055          */
   2056         if (arg1_exp == 0) {
   2057             normalizeFloatx80Subnormal(arg1_sig, &arg1_exp, &arg1_sig);
   2058         }
   2059         mul128By64To192(asig0, asig1, arg1_sig, &asig0, &asig1, &asig2);
   2060         aexp += arg1_exp - 0x3ffe;
   2061         /* This result is inexact.  */
   2062         asig1 |= 1;
   2063         env->fp_status.float_rounding_mode = save_mode;
   2064         ST1 = normalizeRoundAndPackFloatx80(floatx80_precision_x,
   2065                                             arg0_sign ^ arg1_sign, aexp,
   2066                                             asig0, asig1, &env->fp_status);
   2067         env->fp_status.floatx80_rounding_precision = save_prec;
   2068     }
   2069     fpop(env);
   2070     merge_exception_flags(env, old_flags);
   2071 }
   2072 
   2073 void helper_fyl2x(CPUX86State *env)
   2074 {
   2075     uint8_t old_flags = save_exception_flags(env);
   2076     uint64_t arg0_sig = extractFloatx80Frac(ST0);
   2077     int32_t arg0_exp = extractFloatx80Exp(ST0);
   2078     bool arg0_sign = extractFloatx80Sign(ST0);
   2079     uint64_t arg1_sig = extractFloatx80Frac(ST1);
   2080     int32_t arg1_exp = extractFloatx80Exp(ST1);
   2081     bool arg1_sign = extractFloatx80Sign(ST1);
   2082 
   2083     if (floatx80_is_signaling_nan(ST0, &env->fp_status)) {
   2084         float_raise(float_flag_invalid, &env->fp_status);
   2085         ST1 = floatx80_silence_nan(ST0, &env->fp_status);
   2086     } else if (floatx80_is_signaling_nan(ST1, &env->fp_status)) {
   2087         float_raise(float_flag_invalid, &env->fp_status);
   2088         ST1 = floatx80_silence_nan(ST1, &env->fp_status);
   2089     } else if (floatx80_invalid_encoding(ST0) ||
   2090                floatx80_invalid_encoding(ST1)) {
   2091         float_raise(float_flag_invalid, &env->fp_status);
   2092         ST1 = floatx80_default_nan(&env->fp_status);
   2093     } else if (floatx80_is_any_nan(ST0)) {
   2094         ST1 = ST0;
   2095     } else if (floatx80_is_any_nan(ST1)) {
   2096         /* Pass this NaN through.  */
   2097     } else if (arg0_sign && !floatx80_is_zero(ST0)) {
   2098         float_raise(float_flag_invalid, &env->fp_status);
   2099         ST1 = floatx80_default_nan(&env->fp_status);
   2100     } else if (floatx80_is_infinity(ST1)) {
   2101         FloatRelation cmp = floatx80_compare(ST0, floatx80_one,
   2102                                              &env->fp_status);
   2103         switch (cmp) {
   2104         case float_relation_less:
   2105             ST1 = floatx80_chs(ST1);
   2106             break;
   2107         case float_relation_greater:
   2108             /* Result is infinity of the same sign as ST1.  */
   2109             break;
   2110         default:
   2111             float_raise(float_flag_invalid, &env->fp_status);
   2112             ST1 = floatx80_default_nan(&env->fp_status);
   2113             break;
   2114         }
   2115     } else if (floatx80_is_infinity(ST0)) {
   2116         if (floatx80_is_zero(ST1)) {
   2117             float_raise(float_flag_invalid, &env->fp_status);
   2118             ST1 = floatx80_default_nan(&env->fp_status);
   2119         } else if (arg1_sign) {
   2120             ST1 = floatx80_chs(ST0);
   2121         } else {
   2122             ST1 = ST0;
   2123         }
   2124     } else if (floatx80_is_zero(ST0)) {
   2125         if (floatx80_is_zero(ST1)) {
   2126             float_raise(float_flag_invalid, &env->fp_status);
   2127             ST1 = floatx80_default_nan(&env->fp_status);
   2128         } else {
   2129             /* Result is infinity with opposite sign to ST1.  */
   2130             float_raise(float_flag_divbyzero, &env->fp_status);
   2131             ST1 = make_floatx80(arg1_sign ? 0x7fff : 0xffff,
   2132                                 0x8000000000000000ULL);
   2133         }
   2134     } else if (floatx80_is_zero(ST1)) {
   2135         if (floatx80_lt(ST0, floatx80_one, &env->fp_status)) {
   2136             ST1 = floatx80_chs(ST1);
   2137         }
   2138         /* Otherwise, ST1 is already the correct result.  */
   2139     } else if (floatx80_eq(ST0, floatx80_one, &env->fp_status)) {
   2140         if (arg1_sign) {
   2141             ST1 = floatx80_chs(floatx80_zero);
   2142         } else {
   2143             ST1 = floatx80_zero;
   2144         }
   2145     } else {
   2146         int32_t int_exp;
   2147         floatx80 arg0_m1;
   2148         FloatRoundMode save_mode = env->fp_status.float_rounding_mode;
   2149         FloatX80RoundPrec save_prec =
   2150             env->fp_status.floatx80_rounding_precision;
   2151         env->fp_status.float_rounding_mode = float_round_nearest_even;
   2152         env->fp_status.floatx80_rounding_precision = floatx80_precision_x;
   2153 
   2154         if (arg0_exp == 0) {
   2155             normalizeFloatx80Subnormal(arg0_sig, &arg0_exp, &arg0_sig);
   2156         }
   2157         if (arg1_exp == 0) {
   2158             normalizeFloatx80Subnormal(arg1_sig, &arg1_exp, &arg1_sig);
   2159         }
   2160         int_exp = arg0_exp - 0x3fff;
   2161         if (arg0_sig > 0xb504f333f9de6484ULL) {
   2162             ++int_exp;
   2163         }
   2164         arg0_m1 = floatx80_sub(floatx80_scalbn(ST0, -int_exp,
   2165                                                &env->fp_status),
   2166                                floatx80_one, &env->fp_status);
   2167         if (floatx80_is_zero(arg0_m1)) {
   2168             /* Exact power of 2; multiply by ST1.  */
   2169             env->fp_status.float_rounding_mode = save_mode;
   2170             ST1 = floatx80_mul(int32_to_floatx80(int_exp, &env->fp_status),
   2171                                ST1, &env->fp_status);
   2172         } else {
   2173             bool asign = extractFloatx80Sign(arg0_m1);
   2174             int32_t aexp;
   2175             uint64_t asig0, asig1, asig2;
   2176             helper_fyl2x_common(env, arg0_m1, &aexp, &asig0, &asig1);
   2177             if (int_exp != 0) {
   2178                 bool isign = (int_exp < 0);
   2179                 int32_t iexp;
   2180                 uint64_t isig;
   2181                 int shift;
   2182                 int_exp = isign ? -int_exp : int_exp;
   2183                 shift = clz32(int_exp) + 32;
   2184                 isig = int_exp;
   2185                 isig <<= shift;
   2186                 iexp = 0x403e - shift;
   2187                 shift128RightJamming(asig0, asig1, iexp - aexp,
   2188                                      &asig0, &asig1);
   2189                 if (asign == isign) {
   2190                     add128(isig, 0, asig0, asig1, &asig0, &asig1);
   2191                 } else {
   2192                     sub128(isig, 0, asig0, asig1, &asig0, &asig1);
   2193                 }
   2194                 aexp = iexp;
   2195                 asign = isign;
   2196             }
   2197             /*
   2198              * Multiply by the second argument to compute the required
   2199              * result.
   2200              */
   2201             if (arg1_exp == 0) {
   2202                 normalizeFloatx80Subnormal(arg1_sig, &arg1_exp, &arg1_sig);
   2203             }
   2204             mul128By64To192(asig0, asig1, arg1_sig, &asig0, &asig1, &asig2);
   2205             aexp += arg1_exp - 0x3ffe;
   2206             /* This result is inexact.  */
   2207             asig1 |= 1;
   2208             env->fp_status.float_rounding_mode = save_mode;
   2209             ST1 = normalizeRoundAndPackFloatx80(floatx80_precision_x,
   2210                                                 asign ^ arg1_sign, aexp,
   2211                                                 asig0, asig1, &env->fp_status);
   2212         }
   2213 
   2214         env->fp_status.floatx80_rounding_precision = save_prec;
   2215     }
   2216     fpop(env);
   2217     merge_exception_flags(env, old_flags);
   2218 }
   2219 
   2220 void helper_fsqrt(CPUX86State *env)
   2221 {
   2222     uint8_t old_flags = save_exception_flags(env);
   2223     if (floatx80_is_neg(ST0)) {
   2224         env->fpus &= ~0x4700;  /* (C3,C2,C1,C0) <-- 0000 */
   2225         env->fpus |= 0x400;
   2226     }
   2227     ST0 = floatx80_sqrt(ST0, &env->fp_status);
   2228     merge_exception_flags(env, old_flags);
   2229 }
   2230 
   2231 void helper_fsincos(CPUX86State *env)
   2232 {
   2233     double fptemp = floatx80_to_double(env, ST0);
   2234 
   2235     if ((fptemp > MAXTAN) || (fptemp < -MAXTAN)) {
   2236         env->fpus |= 0x400;
   2237     } else {
   2238         ST0 = double_to_floatx80(env, sin(fptemp));
   2239         fpush(env);
   2240         ST0 = double_to_floatx80(env, cos(fptemp));
   2241         env->fpus &= ~0x400;  /* C2 <-- 0 */
   2242         /* the above code is for |arg| < 2**63 only */
   2243     }
   2244 }
   2245 
   2246 void helper_frndint(CPUX86State *env)
   2247 {
   2248     uint8_t old_flags = save_exception_flags(env);
   2249     ST0 = floatx80_round_to_int(ST0, &env->fp_status);
   2250     merge_exception_flags(env, old_flags);
   2251 }
   2252 
   2253 void helper_fscale(CPUX86State *env)
   2254 {
   2255     uint8_t old_flags = save_exception_flags(env);
   2256     if (floatx80_invalid_encoding(ST1) || floatx80_invalid_encoding(ST0)) {
   2257         float_raise(float_flag_invalid, &env->fp_status);
   2258         ST0 = floatx80_default_nan(&env->fp_status);
   2259     } else if (floatx80_is_any_nan(ST1)) {
   2260         if (floatx80_is_signaling_nan(ST0, &env->fp_status)) {
   2261             float_raise(float_flag_invalid, &env->fp_status);
   2262         }
   2263         ST0 = ST1;
   2264         if (floatx80_is_signaling_nan(ST0, &env->fp_status)) {
   2265             float_raise(float_flag_invalid, &env->fp_status);
   2266             ST0 = floatx80_silence_nan(ST0, &env->fp_status);
   2267         }
   2268     } else if (floatx80_is_infinity(ST1) &&
   2269                !floatx80_invalid_encoding(ST0) &&
   2270                !floatx80_is_any_nan(ST0)) {
   2271         if (floatx80_is_neg(ST1)) {
   2272             if (floatx80_is_infinity(ST0)) {
   2273                 float_raise(float_flag_invalid, &env->fp_status);
   2274                 ST0 = floatx80_default_nan(&env->fp_status);
   2275             } else {
   2276                 ST0 = (floatx80_is_neg(ST0) ?
   2277                        floatx80_chs(floatx80_zero) :
   2278                        floatx80_zero);
   2279             }
   2280         } else {
   2281             if (floatx80_is_zero(ST0)) {
   2282                 float_raise(float_flag_invalid, &env->fp_status);
   2283                 ST0 = floatx80_default_nan(&env->fp_status);
   2284             } else {
   2285                 ST0 = (floatx80_is_neg(ST0) ?
   2286                        floatx80_chs(floatx80_infinity) :
   2287                        floatx80_infinity);
   2288             }
   2289         }
   2290     } else {
   2291         int n;
   2292         FloatX80RoundPrec save = env->fp_status.floatx80_rounding_precision;
   2293         uint8_t save_flags = get_float_exception_flags(&env->fp_status);
   2294         set_float_exception_flags(0, &env->fp_status);
   2295         n = floatx80_to_int32_round_to_zero(ST1, &env->fp_status);
   2296         set_float_exception_flags(save_flags, &env->fp_status);
   2297         env->fp_status.floatx80_rounding_precision = floatx80_precision_x;
   2298         ST0 = floatx80_scalbn(ST0, n, &env->fp_status);
   2299         env->fp_status.floatx80_rounding_precision = save;
   2300     }
   2301     merge_exception_flags(env, old_flags);
   2302 }
   2303 
   2304 void helper_fsin(CPUX86State *env)
   2305 {
   2306     double fptemp = floatx80_to_double(env, ST0);
   2307 
   2308     if ((fptemp > MAXTAN) || (fptemp < -MAXTAN)) {
   2309         env->fpus |= 0x400;
   2310     } else {
   2311         ST0 = double_to_floatx80(env, sin(fptemp));
   2312         env->fpus &= ~0x400;  /* C2 <-- 0 */
   2313         /* the above code is for |arg| < 2**53 only */
   2314     }
   2315 }
   2316 
   2317 void helper_fcos(CPUX86State *env)
   2318 {
   2319     double fptemp = floatx80_to_double(env, ST0);
   2320 
   2321     if ((fptemp > MAXTAN) || (fptemp < -MAXTAN)) {
   2322         env->fpus |= 0x400;
   2323     } else {
   2324         ST0 = double_to_floatx80(env, cos(fptemp));
   2325         env->fpus &= ~0x400;  /* C2 <-- 0 */
   2326         /* the above code is for |arg| < 2**63 only */
   2327     }
   2328 }
   2329 
   2330 void helper_fxam_ST0(CPUX86State *env)
   2331 {
   2332     CPU_LDoubleU temp;
   2333     int expdif;
   2334 
   2335     temp.d = ST0;
   2336 
   2337     env->fpus &= ~0x4700; /* (C3,C2,C1,C0) <-- 0000 */
   2338     if (SIGND(temp)) {
   2339         env->fpus |= 0x200; /* C1 <-- 1 */
   2340     }
   2341 
   2342     if (env->fptags[env->fpstt]) {
   2343         env->fpus |= 0x4100; /* Empty */
   2344         return;
   2345     }
   2346 
   2347     expdif = EXPD(temp);
   2348     if (expdif == MAXEXPD) {
   2349         if (MANTD(temp) == 0x8000000000000000ULL) {
   2350             env->fpus |= 0x500; /* Infinity */
   2351         } else if (MANTD(temp) & 0x8000000000000000ULL) {
   2352             env->fpus |= 0x100; /* NaN */
   2353         }
   2354     } else if (expdif == 0) {
   2355         if (MANTD(temp) == 0) {
   2356             env->fpus |=  0x4000; /* Zero */
   2357         } else {
   2358             env->fpus |= 0x4400; /* Denormal */
   2359         }
   2360     } else if (MANTD(temp) & 0x8000000000000000ULL) {
   2361         env->fpus |= 0x400;
   2362     }
   2363 }
   2364 
   2365 static void do_fstenv(CPUX86State *env, target_ulong ptr, int data32,
   2366                       uintptr_t retaddr)
   2367 {
   2368     int fpus, fptag, exp, i;
   2369     uint64_t mant;
   2370     CPU_LDoubleU tmp;
   2371 
   2372     fpus = (env->fpus & ~0x3800) | (env->fpstt & 0x7) << 11;
   2373     fptag = 0;
   2374     for (i = 7; i >= 0; i--) {
   2375         fptag <<= 2;
   2376         if (env->fptags[i]) {
   2377             fptag |= 3;
   2378         } else {
   2379             tmp.d = env->fpregs[i].d;
   2380             exp = EXPD(tmp);
   2381             mant = MANTD(tmp);
   2382             if (exp == 0 && mant == 0) {
   2383                 /* zero */
   2384                 fptag |= 1;
   2385             } else if (exp == 0 || exp == MAXEXPD
   2386                        || (mant & (1LL << 63)) == 0) {
   2387                 /* NaNs, infinity, denormal */
   2388                 fptag |= 2;
   2389             }
   2390         }
   2391     }
   2392     if (data32) {
   2393         /* 32 bit */
   2394         cpu_stl_data_ra(env, ptr, env->fpuc, retaddr);
   2395         cpu_stl_data_ra(env, ptr + 4, fpus, retaddr);
   2396         cpu_stl_data_ra(env, ptr + 8, fptag, retaddr);
   2397         cpu_stl_data_ra(env, ptr + 12, env->fpip, retaddr); /* fpip */
   2398         cpu_stl_data_ra(env, ptr + 16, env->fpcs, retaddr); /* fpcs */
   2399         cpu_stl_data_ra(env, ptr + 20, env->fpdp, retaddr); /* fpoo */
   2400         cpu_stl_data_ra(env, ptr + 24, env->fpds, retaddr); /* fpos */
   2401     } else {
   2402         /* 16 bit */
   2403         cpu_stw_data_ra(env, ptr, env->fpuc, retaddr);
   2404         cpu_stw_data_ra(env, ptr + 2, fpus, retaddr);
   2405         cpu_stw_data_ra(env, ptr + 4, fptag, retaddr);
   2406         cpu_stw_data_ra(env, ptr + 6, env->fpip, retaddr);
   2407         cpu_stw_data_ra(env, ptr + 8, env->fpcs, retaddr);
   2408         cpu_stw_data_ra(env, ptr + 10, env->fpdp, retaddr);
   2409         cpu_stw_data_ra(env, ptr + 12, env->fpds, retaddr);
   2410     }
   2411 }
   2412 
   2413 void helper_fstenv(CPUX86State *env, target_ulong ptr, int data32)
   2414 {
   2415     do_fstenv(env, ptr, data32, GETPC());
   2416 }
   2417 
   2418 static void cpu_set_fpus(CPUX86State *env, uint16_t fpus)
   2419 {
   2420     env->fpstt = (fpus >> 11) & 7;
   2421     env->fpus = fpus & ~0x3800 & ~FPUS_B;
   2422     env->fpus |= env->fpus & FPUS_SE ? FPUS_B : 0;
   2423 #if !defined(CONFIG_USER_ONLY)
   2424     if (!(env->fpus & FPUS_SE)) {
   2425         /*
   2426          * Here the processor deasserts FERR#; in response, the chipset deasserts
   2427          * IGNNE#.
   2428          */
   2429         cpu_clear_ignne();
   2430     }
   2431 #endif
   2432 }
   2433 
   2434 static void do_fldenv(CPUX86State *env, target_ulong ptr, int data32,
   2435                       uintptr_t retaddr)
   2436 {
   2437     int i, fpus, fptag;
   2438 
   2439     if (data32) {
   2440         cpu_set_fpuc(env, cpu_lduw_data_ra(env, ptr, retaddr));
   2441         fpus = cpu_lduw_data_ra(env, ptr + 4, retaddr);
   2442         fptag = cpu_lduw_data_ra(env, ptr + 8, retaddr);
   2443     } else {
   2444         cpu_set_fpuc(env, cpu_lduw_data_ra(env, ptr, retaddr));
   2445         fpus = cpu_lduw_data_ra(env, ptr + 2, retaddr);
   2446         fptag = cpu_lduw_data_ra(env, ptr + 4, retaddr);
   2447     }
   2448     cpu_set_fpus(env, fpus);
   2449     for (i = 0; i < 8; i++) {
   2450         env->fptags[i] = ((fptag & 3) == 3);
   2451         fptag >>= 2;
   2452     }
   2453 }
   2454 
   2455 void helper_fldenv(CPUX86State *env, target_ulong ptr, int data32)
   2456 {
   2457     do_fldenv(env, ptr, data32, GETPC());
   2458 }
   2459 
   2460 static void do_fsave(CPUX86State *env, target_ulong ptr, int data32,
   2461                      uintptr_t retaddr)
   2462 {
   2463     floatx80 tmp;
   2464     int i;
   2465 
   2466     do_fstenv(env, ptr, data32, retaddr);
   2467 
   2468     ptr += (target_ulong)14 << data32;
   2469     for (i = 0; i < 8; i++) {
   2470         tmp = ST(i);
   2471         do_fstt(env, tmp, ptr, retaddr);
   2472         ptr += 10;
   2473     }
   2474 
   2475     do_fninit(env);
   2476 }
   2477 
   2478 void helper_fsave(CPUX86State *env, target_ulong ptr, int data32)
   2479 {
   2480     do_fsave(env, ptr, data32, GETPC());
   2481 }
   2482 
   2483 static void do_frstor(CPUX86State *env, target_ulong ptr, int data32,
   2484                       uintptr_t retaddr)
   2485 {
   2486     floatx80 tmp;
   2487     int i;
   2488 
   2489     do_fldenv(env, ptr, data32, retaddr);
   2490     ptr += (target_ulong)14 << data32;
   2491 
   2492     for (i = 0; i < 8; i++) {
   2493         tmp = do_fldt(env, ptr, retaddr);
   2494         ST(i) = tmp;
   2495         ptr += 10;
   2496     }
   2497 }
   2498 
   2499 void helper_frstor(CPUX86State *env, target_ulong ptr, int data32)
   2500 {
   2501     do_frstor(env, ptr, data32, GETPC());
   2502 }
   2503 
   2504 #define XO(X)  offsetof(X86XSaveArea, X)
   2505 
   2506 static void do_xsave_fpu(CPUX86State *env, target_ulong ptr, uintptr_t ra)
   2507 {
   2508     int fpus, fptag, i;
   2509     target_ulong addr;
   2510 
   2511     fpus = (env->fpus & ~0x3800) | (env->fpstt & 0x7) << 11;
   2512     fptag = 0;
   2513     for (i = 0; i < 8; i++) {
   2514         fptag |= (env->fptags[i] << i);
   2515     }
   2516 
   2517     cpu_stw_data_ra(env, ptr + XO(legacy.fcw), env->fpuc, ra);
   2518     cpu_stw_data_ra(env, ptr + XO(legacy.fsw), fpus, ra);
   2519     cpu_stw_data_ra(env, ptr + XO(legacy.ftw), fptag ^ 0xff, ra);
   2520 
   2521     /* In 32-bit mode this is eip, sel, dp, sel.
   2522        In 64-bit mode this is rip, rdp.
   2523        But in either case we don't write actual data, just zeros.  */
   2524     cpu_stq_data_ra(env, ptr + XO(legacy.fpip), 0, ra); /* eip+sel; rip */
   2525     cpu_stq_data_ra(env, ptr + XO(legacy.fpdp), 0, ra); /* edp+sel; rdp */
   2526 
   2527     addr = ptr + XO(legacy.fpregs);
   2528     for (i = 0; i < 8; i++) {
   2529         floatx80 tmp = ST(i);
   2530         do_fstt(env, tmp, addr, ra);
   2531         addr += 16;
   2532     }
   2533 }
   2534 
   2535 static void do_xsave_mxcsr(CPUX86State *env, target_ulong ptr, uintptr_t ra)
   2536 {
   2537     update_mxcsr_from_sse_status(env);
   2538     cpu_stl_data_ra(env, ptr + XO(legacy.mxcsr), env->mxcsr, ra);
   2539     cpu_stl_data_ra(env, ptr + XO(legacy.mxcsr_mask), 0x0000ffff, ra);
   2540 }
   2541 
   2542 static void do_xsave_sse(CPUX86State *env, target_ulong ptr, uintptr_t ra)
   2543 {
   2544     int i, nb_xmm_regs;
   2545     target_ulong addr;
   2546 
   2547     if (env->hflags & HF_CS64_MASK) {
   2548         nb_xmm_regs = 16;
   2549     } else {
   2550         nb_xmm_regs = 8;
   2551     }
   2552 
   2553     addr = ptr + XO(legacy.xmm_regs);
   2554     for (i = 0; i < nb_xmm_regs; i++) {
   2555         cpu_stq_data_ra(env, addr, env->xmm_regs[i].ZMM_Q(0), ra);
   2556         cpu_stq_data_ra(env, addr + 8, env->xmm_regs[i].ZMM_Q(1), ra);
   2557         addr += 16;
   2558     }
   2559 }
   2560 
   2561 static void do_xsave_ymmh(CPUX86State *env, target_ulong ptr, uintptr_t ra)
   2562 {
   2563     int i, nb_xmm_regs;
   2564 
   2565     if (env->hflags & HF_CS64_MASK) {
   2566         nb_xmm_regs = 16;
   2567     } else {
   2568         nb_xmm_regs = 8;
   2569     }
   2570 
   2571     for (i = 0; i < nb_xmm_regs; i++, ptr += 16) {
   2572         cpu_stq_data_ra(env, ptr, env->xmm_regs[i].ZMM_Q(2), ra);
   2573         cpu_stq_data_ra(env, ptr + 8, env->xmm_regs[i].ZMM_Q(3), ra);
   2574     }
   2575 }
   2576 
   2577 static void do_xsave_bndregs(CPUX86State *env, target_ulong ptr, uintptr_t ra)
   2578 {
   2579     target_ulong addr = ptr + offsetof(XSaveBNDREG, bnd_regs);
   2580     int i;
   2581 
   2582     for (i = 0; i < 4; i++, addr += 16) {
   2583         cpu_stq_data_ra(env, addr, env->bnd_regs[i].lb, ra);
   2584         cpu_stq_data_ra(env, addr + 8, env->bnd_regs[i].ub, ra);
   2585     }
   2586 }
   2587 
   2588 static void do_xsave_bndcsr(CPUX86State *env, target_ulong ptr, uintptr_t ra)
   2589 {
   2590     cpu_stq_data_ra(env, ptr + offsetof(XSaveBNDCSR, bndcsr.cfgu),
   2591                     env->bndcs_regs.cfgu, ra);
   2592     cpu_stq_data_ra(env, ptr + offsetof(XSaveBNDCSR, bndcsr.sts),
   2593                     env->bndcs_regs.sts, ra);
   2594 }
   2595 
   2596 static void do_xsave_pkru(CPUX86State *env, target_ulong ptr, uintptr_t ra)
   2597 {
   2598     cpu_stq_data_ra(env, ptr, env->pkru, ra);
   2599 }
   2600 
   2601 static void do_fxsave(CPUX86State *env, target_ulong ptr, uintptr_t ra)
   2602 {
   2603     /* The operand must be 16 byte aligned */
   2604     if (ptr & 0xf) {
   2605         raise_exception_ra(env, EXCP0D_GPF, ra);
   2606     }
   2607 
   2608     do_xsave_fpu(env, ptr, ra);
   2609 
   2610     if (env->cr[4] & CR4_OSFXSR_MASK) {
   2611         do_xsave_mxcsr(env, ptr, ra);
   2612         /* Fast FXSAVE leaves out the XMM registers */
   2613         if (!(env->efer & MSR_EFER_FFXSR)
   2614             || (env->hflags & HF_CPL_MASK)
   2615             || !(env->hflags & HF_LMA_MASK)) {
   2616             do_xsave_sse(env, ptr, ra);
   2617         }
   2618     }
   2619 }
   2620 
   2621 void helper_fxsave(CPUX86State *env, target_ulong ptr)
   2622 {
   2623     do_fxsave(env, ptr, GETPC());
   2624 }
   2625 
   2626 static uint64_t get_xinuse(CPUX86State *env)
   2627 {
   2628     uint64_t inuse = -1;
   2629 
   2630     /* For the most part, we don't track XINUSE.  We could calculate it
   2631        here for all components, but it's probably less work to simply
   2632        indicate in use.  That said, the state of BNDREGS is important
   2633        enough to track in HFLAGS, so we might as well use that here.  */
   2634     if ((env->hflags & HF_MPX_IU_MASK) == 0) {
   2635        inuse &= ~XSTATE_BNDREGS_MASK;
   2636     }
   2637     return inuse;
   2638 }
   2639 
   2640 static void do_xsave(CPUX86State *env, target_ulong ptr, uint64_t rfbm,
   2641                      uint64_t inuse, uint64_t opt, uintptr_t ra)
   2642 {
   2643     uint64_t old_bv, new_bv;
   2644 
   2645     /* The OS must have enabled XSAVE.  */
   2646     if (!(env->cr[4] & CR4_OSXSAVE_MASK)) {
   2647         raise_exception_ra(env, EXCP06_ILLOP, ra);
   2648     }
   2649 
   2650     /* The operand must be 64 byte aligned.  */
   2651     if (ptr & 63) {
   2652         raise_exception_ra(env, EXCP0D_GPF, ra);
   2653     }
   2654 
   2655     /* Never save anything not enabled by XCR0.  */
   2656     rfbm &= env->xcr0;
   2657     opt &= rfbm;
   2658 
   2659     if (opt & XSTATE_FP_MASK) {
   2660         do_xsave_fpu(env, ptr, ra);
   2661     }
   2662     if (rfbm & XSTATE_SSE_MASK) {
   2663         /* Note that saving MXCSR is not suppressed by XSAVEOPT.  */
   2664         do_xsave_mxcsr(env, ptr, ra);
   2665     }
   2666     if (opt & XSTATE_SSE_MASK) {
   2667         do_xsave_sse(env, ptr, ra);
   2668     }
   2669     if (opt & XSTATE_YMM_MASK) {
   2670         do_xsave_ymmh(env, ptr + XO(avx_state), ra);
   2671     }
   2672     if (opt & XSTATE_BNDREGS_MASK) {
   2673         do_xsave_bndregs(env, ptr + XO(bndreg_state), ra);
   2674     }
   2675     if (opt & XSTATE_BNDCSR_MASK) {
   2676         do_xsave_bndcsr(env, ptr + XO(bndcsr_state), ra);
   2677     }
   2678     if (opt & XSTATE_PKRU_MASK) {
   2679         do_xsave_pkru(env, ptr + XO(pkru_state), ra);
   2680     }
   2681 
   2682     /* Update the XSTATE_BV field.  */
   2683     old_bv = cpu_ldq_data_ra(env, ptr + XO(header.xstate_bv), ra);
   2684     new_bv = (old_bv & ~rfbm) | (inuse & rfbm);
   2685     cpu_stq_data_ra(env, ptr + XO(header.xstate_bv), new_bv, ra);
   2686 }
   2687 
   2688 void helper_xsave(CPUX86State *env, target_ulong ptr, uint64_t rfbm)
   2689 {
   2690     do_xsave(env, ptr, rfbm, get_xinuse(env), -1, GETPC());
   2691 }
   2692 
   2693 void helper_xsaveopt(CPUX86State *env, target_ulong ptr, uint64_t rfbm)
   2694 {
   2695     uint64_t inuse = get_xinuse(env);
   2696     do_xsave(env, ptr, rfbm, inuse, inuse, GETPC());
   2697 }
   2698 
   2699 static void do_xrstor_fpu(CPUX86State *env, target_ulong ptr, uintptr_t ra)
   2700 {
   2701     int i, fpuc, fpus, fptag;
   2702     target_ulong addr;
   2703 
   2704     fpuc = cpu_lduw_data_ra(env, ptr + XO(legacy.fcw), ra);
   2705     fpus = cpu_lduw_data_ra(env, ptr + XO(legacy.fsw), ra);
   2706     fptag = cpu_lduw_data_ra(env, ptr + XO(legacy.ftw), ra);
   2707     cpu_set_fpuc(env, fpuc);
   2708     cpu_set_fpus(env, fpus);
   2709     fptag ^= 0xff;
   2710     for (i = 0; i < 8; i++) {
   2711         env->fptags[i] = ((fptag >> i) & 1);
   2712     }
   2713 
   2714     addr = ptr + XO(legacy.fpregs);
   2715     for (i = 0; i < 8; i++) {
   2716         floatx80 tmp = do_fldt(env, addr, ra);
   2717         ST(i) = tmp;
   2718         addr += 16;
   2719     }
   2720 }
   2721 
   2722 static void do_xrstor_mxcsr(CPUX86State *env, target_ulong ptr, uintptr_t ra)
   2723 {
   2724     cpu_set_mxcsr(env, cpu_ldl_data_ra(env, ptr + XO(legacy.mxcsr), ra));
   2725 }
   2726 
   2727 static void do_xrstor_sse(CPUX86State *env, target_ulong ptr, uintptr_t ra)
   2728 {
   2729     int i, nb_xmm_regs;
   2730     target_ulong addr;
   2731 
   2732     if (env->hflags & HF_CS64_MASK) {
   2733         nb_xmm_regs = 16;
   2734     } else {
   2735         nb_xmm_regs = 8;
   2736     }
   2737 
   2738     addr = ptr + XO(legacy.xmm_regs);
   2739     for (i = 0; i < nb_xmm_regs; i++) {
   2740         env->xmm_regs[i].ZMM_Q(0) = cpu_ldq_data_ra(env, addr, ra);
   2741         env->xmm_regs[i].ZMM_Q(1) = cpu_ldq_data_ra(env, addr + 8, ra);
   2742         addr += 16;
   2743     }
   2744 }
   2745 
   2746 static void do_clear_sse(CPUX86State *env)
   2747 {
   2748     int i, nb_xmm_regs;
   2749 
   2750     if (env->hflags & HF_CS64_MASK) {
   2751         nb_xmm_regs = 16;
   2752     } else {
   2753         nb_xmm_regs = 8;
   2754     }
   2755 
   2756     for (i = 0; i < nb_xmm_regs; i++) {
   2757         env->xmm_regs[i].ZMM_Q(0) = 0;
   2758         env->xmm_regs[i].ZMM_Q(1) = 0;
   2759     }
   2760 }
   2761 
   2762 static void do_xrstor_ymmh(CPUX86State *env, target_ulong ptr, uintptr_t ra)
   2763 {
   2764     int i, nb_xmm_regs;
   2765 
   2766     if (env->hflags & HF_CS64_MASK) {
   2767         nb_xmm_regs = 16;
   2768     } else {
   2769         nb_xmm_regs = 8;
   2770     }
   2771 
   2772     for (i = 0; i < nb_xmm_regs; i++, ptr += 16) {
   2773         env->xmm_regs[i].ZMM_Q(2) = cpu_ldq_data_ra(env, ptr, ra);
   2774         env->xmm_regs[i].ZMM_Q(3) = cpu_ldq_data_ra(env, ptr + 8, ra);
   2775     }
   2776 }
   2777 
   2778 static void do_clear_ymmh(CPUX86State *env)
   2779 {
   2780     int i, nb_xmm_regs;
   2781 
   2782     if (env->hflags & HF_CS64_MASK) {
   2783         nb_xmm_regs = 16;
   2784     } else {
   2785         nb_xmm_regs = 8;
   2786     }
   2787 
   2788     for (i = 0; i < nb_xmm_regs; i++) {
   2789         env->xmm_regs[i].ZMM_Q(2) = 0;
   2790         env->xmm_regs[i].ZMM_Q(3) = 0;
   2791     }
   2792 }
   2793 
   2794 static void do_xrstor_bndregs(CPUX86State *env, target_ulong ptr, uintptr_t ra)
   2795 {
   2796     target_ulong addr = ptr + offsetof(XSaveBNDREG, bnd_regs);
   2797     int i;
   2798 
   2799     for (i = 0; i < 4; i++, addr += 16) {
   2800         env->bnd_regs[i].lb = cpu_ldq_data_ra(env, addr, ra);
   2801         env->bnd_regs[i].ub = cpu_ldq_data_ra(env, addr + 8, ra);
   2802     }
   2803 }
   2804 
   2805 static void do_xrstor_bndcsr(CPUX86State *env, target_ulong ptr, uintptr_t ra)
   2806 {
   2807     /* FIXME: Extend highest implemented bit of linear address.  */
   2808     env->bndcs_regs.cfgu
   2809         = cpu_ldq_data_ra(env, ptr + offsetof(XSaveBNDCSR, bndcsr.cfgu), ra);
   2810     env->bndcs_regs.sts
   2811         = cpu_ldq_data_ra(env, ptr + offsetof(XSaveBNDCSR, bndcsr.sts), ra);
   2812 }
   2813 
   2814 static void do_xrstor_pkru(CPUX86State *env, target_ulong ptr, uintptr_t ra)
   2815 {
   2816     env->pkru = cpu_ldq_data_ra(env, ptr, ra);
   2817 }
   2818 
   2819 static void do_fxrstor(CPUX86State *env, target_ulong ptr, uintptr_t ra)
   2820 {
   2821     /* The operand must be 16 byte aligned */
   2822     if (ptr & 0xf) {
   2823         raise_exception_ra(env, EXCP0D_GPF, ra);
   2824     }
   2825 
   2826     do_xrstor_fpu(env, ptr, ra);
   2827 
   2828     if (env->cr[4] & CR4_OSFXSR_MASK) {
   2829         do_xrstor_mxcsr(env, ptr, ra);
   2830         /* Fast FXRSTOR leaves out the XMM registers */
   2831         if (!(env->efer & MSR_EFER_FFXSR)
   2832             || (env->hflags & HF_CPL_MASK)
   2833             || !(env->hflags & HF_LMA_MASK)) {
   2834             do_xrstor_sse(env, ptr, ra);
   2835         }
   2836     }
   2837 }
   2838 
   2839 void helper_fxrstor(CPUX86State *env, target_ulong ptr)
   2840 {
   2841     do_fxrstor(env, ptr, GETPC());
   2842 }
   2843 
   2844 static void do_xrstor(CPUX86State *env, target_ulong ptr, uint64_t rfbm, uintptr_t ra)
   2845 {
   2846     uint64_t xstate_bv, xcomp_bv, reserve0;
   2847 
   2848     rfbm &= env->xcr0;
   2849 
   2850     /* The OS must have enabled XSAVE.  */
   2851     if (!(env->cr[4] & CR4_OSXSAVE_MASK)) {
   2852         raise_exception_ra(env, EXCP06_ILLOP, ra);
   2853     }
   2854 
   2855     /* The operand must be 64 byte aligned.  */
   2856     if (ptr & 63) {
   2857         raise_exception_ra(env, EXCP0D_GPF, ra);
   2858     }
   2859 
   2860     xstate_bv = cpu_ldq_data_ra(env, ptr + XO(header.xstate_bv), ra);
   2861 
   2862     if ((int64_t)xstate_bv < 0) {
   2863         /* FIXME: Compact form.  */
   2864         raise_exception_ra(env, EXCP0D_GPF, ra);
   2865     }
   2866 
   2867     /* Standard form.  */
   2868 
   2869     /* The XSTATE_BV field must not set bits not present in XCR0.  */
   2870     if (xstate_bv & ~env->xcr0) {
   2871         raise_exception_ra(env, EXCP0D_GPF, ra);
   2872     }
   2873 
   2874     /* The XCOMP_BV field must be zero.  Note that, as of the April 2016
   2875        revision, the description of the XSAVE Header (Vol 1, Sec 13.4.2)
   2876        describes only XCOMP_BV, but the description of the standard form
   2877        of XRSTOR (Vol 1, Sec 13.8.1) checks bytes 23:8 for zero, which
   2878        includes the next 64-bit field.  */
   2879     xcomp_bv = cpu_ldq_data_ra(env, ptr + XO(header.xcomp_bv), ra);
   2880     reserve0 = cpu_ldq_data_ra(env, ptr + XO(header.reserve0), ra);
   2881     if (xcomp_bv || reserve0) {
   2882         raise_exception_ra(env, EXCP0D_GPF, ra);
   2883     }
   2884 
   2885     if (rfbm & XSTATE_FP_MASK) {
   2886         if (xstate_bv & XSTATE_FP_MASK) {
   2887             do_xrstor_fpu(env, ptr, ra);
   2888         } else {
   2889             do_fninit(env);
   2890             memset(env->fpregs, 0, sizeof(env->fpregs));
   2891         }
   2892     }
   2893     if (rfbm & XSTATE_SSE_MASK) {
   2894         /* Note that the standard form of XRSTOR loads MXCSR from memory
   2895            whether or not the XSTATE_BV bit is set.  */
   2896         do_xrstor_mxcsr(env, ptr, ra);
   2897         if (xstate_bv & XSTATE_SSE_MASK) {
   2898             do_xrstor_sse(env, ptr, ra);
   2899         } else {
   2900             do_clear_sse(env);
   2901         }
   2902     }
   2903     if (rfbm & XSTATE_YMM_MASK) {
   2904         if (xstate_bv & XSTATE_YMM_MASK) {
   2905             do_xrstor_ymmh(env, ptr + XO(avx_state), ra);
   2906         } else {
   2907             do_clear_ymmh(env);
   2908         }
   2909     }
   2910     if (rfbm & XSTATE_BNDREGS_MASK) {
   2911         if (xstate_bv & XSTATE_BNDREGS_MASK) {
   2912             do_xrstor_bndregs(env, ptr + XO(bndreg_state), ra);
   2913             env->hflags |= HF_MPX_IU_MASK;
   2914         } else {
   2915             memset(env->bnd_regs, 0, sizeof(env->bnd_regs));
   2916             env->hflags &= ~HF_MPX_IU_MASK;
   2917         }
   2918     }
   2919     if (rfbm & XSTATE_BNDCSR_MASK) {
   2920         if (xstate_bv & XSTATE_BNDCSR_MASK) {
   2921             do_xrstor_bndcsr(env, ptr + XO(bndcsr_state), ra);
   2922         } else {
   2923             memset(&env->bndcs_regs, 0, sizeof(env->bndcs_regs));
   2924         }
   2925         cpu_sync_bndcs_hflags(env);
   2926     }
   2927     if (rfbm & XSTATE_PKRU_MASK) {
   2928         uint64_t old_pkru = env->pkru;
   2929         if (xstate_bv & XSTATE_PKRU_MASK) {
   2930             do_xrstor_pkru(env, ptr + XO(pkru_state), ra);
   2931         } else {
   2932             env->pkru = 0;
   2933         }
   2934         if (env->pkru != old_pkru) {
   2935             CPUState *cs = env_cpu(env);
   2936             tlb_flush(cs);
   2937         }
   2938     }
   2939 }
   2940 
   2941 #undef XO
   2942 
   2943 void helper_xrstor(CPUX86State *env, target_ulong ptr, uint64_t rfbm)
   2944 {
   2945     do_xrstor(env, ptr, rfbm, GETPC());
   2946 }
   2947 
   2948 #if defined(CONFIG_USER_ONLY)
   2949 void cpu_x86_fsave(CPUX86State *env, target_ulong ptr, int data32)
   2950 {
   2951     do_fsave(env, ptr, data32, 0);
   2952 }
   2953 
   2954 void cpu_x86_frstor(CPUX86State *env, target_ulong ptr, int data32)
   2955 {
   2956     do_frstor(env, ptr, data32, 0);
   2957 }
   2958 
   2959 void cpu_x86_fxsave(CPUX86State *env, target_ulong ptr)
   2960 {
   2961     do_fxsave(env, ptr, 0);
   2962 }
   2963 
   2964 void cpu_x86_fxrstor(CPUX86State *env, target_ulong ptr)
   2965 {
   2966     do_fxrstor(env, ptr, 0);
   2967 }
   2968 
   2969 void cpu_x86_xsave(CPUX86State *env, target_ulong ptr)
   2970 {
   2971     do_xsave(env, ptr, -1, get_xinuse(env), -1, 0);
   2972 }
   2973 
   2974 void cpu_x86_xrstor(CPUX86State *env, target_ulong ptr)
   2975 {
   2976     do_xrstor(env, ptr, -1, 0);
   2977 }
   2978 #endif
   2979 
   2980 uint64_t helper_xgetbv(CPUX86State *env, uint32_t ecx)
   2981 {
   2982     /* The OS must have enabled XSAVE.  */
   2983     if (!(env->cr[4] & CR4_OSXSAVE_MASK)) {
   2984         raise_exception_ra(env, EXCP06_ILLOP, GETPC());
   2985     }
   2986 
   2987     switch (ecx) {
   2988     case 0:
   2989         return env->xcr0;
   2990     case 1:
   2991         if (env->features[FEAT_XSAVE] & CPUID_XSAVE_XGETBV1) {
   2992             return env->xcr0 & get_xinuse(env);
   2993         }
   2994         break;
   2995     }
   2996     raise_exception_ra(env, EXCP0D_GPF, GETPC());
   2997 }
   2998 
   2999 void helper_xsetbv(CPUX86State *env, uint32_t ecx, uint64_t mask)
   3000 {
   3001     uint32_t dummy, ena_lo, ena_hi;
   3002     uint64_t ena;
   3003 
   3004     /* The OS must have enabled XSAVE.  */
   3005     if (!(env->cr[4] & CR4_OSXSAVE_MASK)) {
   3006         raise_exception_ra(env, EXCP06_ILLOP, GETPC());
   3007     }
   3008 
   3009     /* Only XCR0 is defined at present; the FPU may not be disabled.  */
   3010     if (ecx != 0 || (mask & XSTATE_FP_MASK) == 0) {
   3011         goto do_gpf;
   3012     }
   3013 
   3014     /* Disallow enabling unimplemented features.  */
   3015     cpu_x86_cpuid(env, 0x0d, 0, &ena_lo, &dummy, &dummy, &ena_hi);
   3016     ena = ((uint64_t)ena_hi << 32) | ena_lo;
   3017     if (mask & ~ena) {
   3018         goto do_gpf;
   3019     }
   3020 
   3021     /* Disallow enabling only half of MPX.  */
   3022     if ((mask ^ (mask * (XSTATE_BNDCSR_MASK / XSTATE_BNDREGS_MASK)))
   3023         & XSTATE_BNDCSR_MASK) {
   3024         goto do_gpf;
   3025     }
   3026 
   3027     env->xcr0 = mask;
   3028     cpu_sync_bndcs_hflags(env);
   3029     cpu_sync_avx_hflag(env);
   3030     return;
   3031 
   3032  do_gpf:
   3033     raise_exception_ra(env, EXCP0D_GPF, GETPC());
   3034 }
   3035 
   3036 /* MMX/SSE */
   3037 /* XXX: optimize by storing fptt and fptags in the static cpu state */
   3038 
   3039 #define SSE_DAZ             0x0040
   3040 #define SSE_RC_SHIFT        13
   3041 #define SSE_RC_MASK         (3 << SSE_RC_SHIFT)
   3042 #define SSE_FZ              0x8000
   3043 
   3044 void update_mxcsr_status(CPUX86State *env)
   3045 {
   3046     uint32_t mxcsr = env->mxcsr;
   3047     int rnd_type;
   3048 
   3049     /* set rounding mode */
   3050     rnd_type = (mxcsr & SSE_RC_MASK) >> SSE_RC_SHIFT;
   3051     set_x86_rounding_mode(rnd_type, &env->sse_status);
   3052 
   3053     /* Set exception flags.  */
   3054     set_float_exception_flags((mxcsr & FPUS_IE ? float_flag_invalid : 0) |
   3055                               (mxcsr & FPUS_ZE ? float_flag_divbyzero : 0) |
   3056                               (mxcsr & FPUS_OE ? float_flag_overflow : 0) |
   3057                               (mxcsr & FPUS_UE ? float_flag_underflow : 0) |
   3058                               (mxcsr & FPUS_PE ? float_flag_inexact : 0),
   3059                               &env->sse_status);
   3060 
   3061     /* set denormals are zero */
   3062     set_flush_inputs_to_zero((mxcsr & SSE_DAZ) ? 1 : 0, &env->sse_status);
   3063 
   3064     /* set flush to zero */
   3065     set_flush_to_zero((mxcsr & SSE_FZ) ? 1 : 0, &env->sse_status);
   3066 }
   3067 
   3068 void update_mxcsr_from_sse_status(CPUX86State *env)
   3069 {
   3070     uint8_t flags = get_float_exception_flags(&env->sse_status);
   3071     /*
   3072      * The MXCSR denormal flag has opposite semantics to
   3073      * float_flag_input_denormal (the softfloat code sets that flag
   3074      * only when flushing input denormals to zero, but SSE sets it
   3075      * only when not flushing them to zero), so is not converted
   3076      * here.
   3077      */
   3078     env->mxcsr |= ((flags & float_flag_invalid ? FPUS_IE : 0) |
   3079                    (flags & float_flag_divbyzero ? FPUS_ZE : 0) |
   3080                    (flags & float_flag_overflow ? FPUS_OE : 0) |
   3081                    (flags & float_flag_underflow ? FPUS_UE : 0) |
   3082                    (flags & float_flag_inexact ? FPUS_PE : 0) |
   3083                    (flags & float_flag_output_denormal ? FPUS_UE | FPUS_PE :
   3084                     0));
   3085 }
   3086 
   3087 void helper_update_mxcsr(CPUX86State *env)
   3088 {
   3089     update_mxcsr_from_sse_status(env);
   3090 }
   3091 
   3092 void helper_ldmxcsr(CPUX86State *env, uint32_t val)
   3093 {
   3094     cpu_set_mxcsr(env, val);
   3095 }
   3096 
   3097 void helper_enter_mmx(CPUX86State *env)
   3098 {
   3099     env->fpstt = 0;
   3100     *(uint32_t *)(env->fptags) = 0;
   3101     *(uint32_t *)(env->fptags + 4) = 0;
   3102 }
   3103 
   3104 void helper_emms(CPUX86State *env)
   3105 {
   3106     /* set to empty state */
   3107     *(uint32_t *)(env->fptags) = 0x01010101;
   3108     *(uint32_t *)(env->fptags + 4) = 0x01010101;
   3109 }
   3110 
   3111 #define SHIFT 0
   3112 #include "ops_sse.h"
   3113 
   3114 #define SHIFT 1
   3115 #include "ops_sse.h"
   3116 
   3117 #define SHIFT 2
   3118 #include "ops_sse.h"