qemu

FORK: QEMU emulator
git clone https://git.neptards.moe/neptards/qemu.git
Log | Files | Refs | Submodules | LICENSE

vec_fpu_helper.c (38083B)


      1 /*
      2  * QEMU TCG support -- s390x vector floating point instruction support
      3  *
      4  * Copyright (C) 2019 Red Hat Inc
      5  *
      6  * Authors:
      7  *   David Hildenbrand <david@redhat.com>
      8  *
      9  * This work is licensed under the terms of the GNU GPL, version 2 or later.
     10  * See the COPYING file in the top-level directory.
     11  */
     12 #include "qemu/osdep.h"
     13 #include "cpu.h"
     14 #include "s390x-internal.h"
     15 #include "vec.h"
     16 #include "tcg_s390x.h"
     17 #include "tcg/tcg-gvec-desc.h"
     18 #include "exec/exec-all.h"
     19 #include "exec/helper-proto.h"
     20 #include "fpu/softfloat.h"
     21 
     22 #define VIC_INVALID         0x1
     23 #define VIC_DIVBYZERO       0x2
     24 #define VIC_OVERFLOW        0x3
     25 #define VIC_UNDERFLOW       0x4
     26 #define VIC_INEXACT         0x5
     27 
     28 /* returns the VEX. If the VEX is 0, there is no trap */
     29 static uint8_t check_ieee_exc(CPUS390XState *env, uint8_t enr, bool XxC,
     30                               uint8_t *vec_exc)
     31 {
     32     uint8_t vece_exc = 0, trap_exc;
     33     unsigned qemu_exc;
     34 
     35     /* Retrieve and clear the softfloat exceptions */
     36     qemu_exc = env->fpu_status.float_exception_flags;
     37     if (qemu_exc == 0) {
     38         return 0;
     39     }
     40     env->fpu_status.float_exception_flags = 0;
     41 
     42     vece_exc = s390_softfloat_exc_to_ieee(qemu_exc);
     43 
     44     /* Add them to the vector-wide s390x exception bits */
     45     *vec_exc |= vece_exc;
     46 
     47     /* Check for traps and construct the VXC */
     48     trap_exc = vece_exc & env->fpc >> 24;
     49     if (trap_exc) {
     50         if (trap_exc & S390_IEEE_MASK_INVALID) {
     51             return enr << 4 | VIC_INVALID;
     52         } else if (trap_exc & S390_IEEE_MASK_DIVBYZERO) {
     53             return enr << 4 | VIC_DIVBYZERO;
     54         } else if (trap_exc & S390_IEEE_MASK_OVERFLOW) {
     55             return enr << 4 | VIC_OVERFLOW;
     56         } else if (trap_exc & S390_IEEE_MASK_UNDERFLOW) {
     57             return enr << 4 | VIC_UNDERFLOW;
     58         } else if (!XxC) {
     59             g_assert(trap_exc & S390_IEEE_MASK_INEXACT);
     60             /* inexact has lowest priority on traps */
     61             return enr << 4 | VIC_INEXACT;
     62         }
     63     }
     64     return 0;
     65 }
     66 
     67 static void handle_ieee_exc(CPUS390XState *env, uint8_t vxc, uint8_t vec_exc,
     68                             uintptr_t retaddr)
     69 {
     70     if (vxc) {
     71         /* on traps, the fpc flags are not updated, instruction is suppressed */
     72         tcg_s390_vector_exception(env, vxc, retaddr);
     73     }
     74     if (vec_exc) {
     75         /* indicate exceptions for all elements combined */
     76         env->fpc |= vec_exc << 16;
     77     }
     78 }
     79 
     80 static float32 s390_vec_read_float32(const S390Vector *v, uint8_t enr)
     81 {
     82     return make_float32(s390_vec_read_element32(v, enr));
     83 }
     84 
     85 static float64 s390_vec_read_float64(const S390Vector *v, uint8_t enr)
     86 {
     87     return make_float64(s390_vec_read_element64(v, enr));
     88 }
     89 
     90 static float128 s390_vec_read_float128(const S390Vector *v)
     91 {
     92     return make_float128(s390_vec_read_element64(v, 0),
     93                          s390_vec_read_element64(v, 1));
     94 }
     95 
     96 static void s390_vec_write_float32(S390Vector *v, uint8_t enr, float32 data)
     97 {
     98     return s390_vec_write_element32(v, enr, data);
     99 }
    100 
    101 static void s390_vec_write_float64(S390Vector *v, uint8_t enr, float64 data)
    102 {
    103     return s390_vec_write_element64(v, enr, data);
    104 }
    105 
    106 static void s390_vec_write_float128(S390Vector *v, float128 data)
    107 {
    108     s390_vec_write_element64(v, 0, data.high);
    109     s390_vec_write_element64(v, 1, data.low);
    110 }
    111 
    112 typedef float32 (*vop32_2_fn)(float32 a, float_status *s);
    113 static void vop32_2(S390Vector *v1, const S390Vector *v2, CPUS390XState *env,
    114                     bool s, bool XxC, uint8_t erm, vop32_2_fn fn,
    115                     uintptr_t retaddr)
    116 {
    117     uint8_t vxc, vec_exc = 0;
    118     S390Vector tmp = {};
    119     int i, old_mode;
    120 
    121     old_mode = s390_swap_bfp_rounding_mode(env, erm);
    122     for (i = 0; i < 4; i++) {
    123         const float32 a = s390_vec_read_float32(v2, i);
    124 
    125         s390_vec_write_float32(&tmp, i, fn(a, &env->fpu_status));
    126         vxc = check_ieee_exc(env, i, XxC, &vec_exc);
    127         if (s || vxc) {
    128             break;
    129         }
    130     }
    131     s390_restore_bfp_rounding_mode(env, old_mode);
    132     handle_ieee_exc(env, vxc, vec_exc, retaddr);
    133     *v1 = tmp;
    134 }
    135 
    136 typedef float64 (*vop64_2_fn)(float64 a, float_status *s);
    137 static void vop64_2(S390Vector *v1, const S390Vector *v2, CPUS390XState *env,
    138                     bool s, bool XxC, uint8_t erm, vop64_2_fn fn,
    139                     uintptr_t retaddr)
    140 {
    141     uint8_t vxc, vec_exc = 0;
    142     S390Vector tmp = {};
    143     int i, old_mode;
    144 
    145     old_mode = s390_swap_bfp_rounding_mode(env, erm);
    146     for (i = 0; i < 2; i++) {
    147         const float64 a = s390_vec_read_float64(v2, i);
    148 
    149         s390_vec_write_float64(&tmp, i, fn(a, &env->fpu_status));
    150         vxc = check_ieee_exc(env, i, XxC, &vec_exc);
    151         if (s || vxc) {
    152             break;
    153         }
    154     }
    155     s390_restore_bfp_rounding_mode(env, old_mode);
    156     handle_ieee_exc(env, vxc, vec_exc, retaddr);
    157     *v1 = tmp;
    158 }
    159 
    160 typedef float128 (*vop128_2_fn)(float128 a, float_status *s);
    161 static void vop128_2(S390Vector *v1, const S390Vector *v2, CPUS390XState *env,
    162                     bool s, bool XxC, uint8_t erm, vop128_2_fn fn,
    163                     uintptr_t retaddr)
    164 {
    165     const float128 a = s390_vec_read_float128(v2);
    166     uint8_t vxc, vec_exc = 0;
    167     S390Vector tmp = {};
    168     int old_mode;
    169 
    170     old_mode = s390_swap_bfp_rounding_mode(env, erm);
    171     s390_vec_write_float128(&tmp, fn(a, &env->fpu_status));
    172     vxc = check_ieee_exc(env, 0, XxC, &vec_exc);
    173     s390_restore_bfp_rounding_mode(env, old_mode);
    174     handle_ieee_exc(env, vxc, vec_exc, retaddr);
    175     *v1 = tmp;
    176 }
    177 
    178 static float32 vcdg32(float32 a, float_status *s)
    179 {
    180     return int32_to_float32(a, s);
    181 }
    182 
    183 static float32 vcdlg32(float32 a, float_status *s)
    184 {
    185     return uint32_to_float32(a, s);
    186 }
    187 
    188 static float32 vcgd32(float32 a, float_status *s)
    189 {
    190     const float32 tmp = float32_to_int32(a, s);
    191 
    192     return float32_is_any_nan(a) ? INT32_MIN : tmp;
    193 }
    194 
    195 static float32 vclgd32(float32 a, float_status *s)
    196 {
    197     const float32 tmp = float32_to_uint32(a, s);
    198 
    199     return float32_is_any_nan(a) ? 0 : tmp;
    200 }
    201 
    202 static float64 vcdg64(float64 a, float_status *s)
    203 {
    204     return int64_to_float64(a, s);
    205 }
    206 
    207 static float64 vcdlg64(float64 a, float_status *s)
    208 {
    209     return uint64_to_float64(a, s);
    210 }
    211 
    212 static float64 vcgd64(float64 a, float_status *s)
    213 {
    214     const float64 tmp = float64_to_int64(a, s);
    215 
    216     return float64_is_any_nan(a) ? INT64_MIN : tmp;
    217 }
    218 
    219 static float64 vclgd64(float64 a, float_status *s)
    220 {
    221     const float64 tmp = float64_to_uint64(a, s);
    222 
    223     return float64_is_any_nan(a) ? 0 : tmp;
    224 }
    225 
    226 #define DEF_GVEC_VOP2_FN(NAME, FN, BITS)                                       \
    227 void HELPER(gvec_##NAME##BITS)(void *v1, const void *v2, CPUS390XState *env,   \
    228                                uint32_t desc)                                  \
    229 {                                                                              \
    230     const uint8_t erm = extract32(simd_data(desc), 4, 4);                      \
    231     const bool se = extract32(simd_data(desc), 3, 1);                          \
    232     const bool XxC = extract32(simd_data(desc), 2, 1);                         \
    233                                                                                \
    234     vop##BITS##_2(v1, v2, env, se, XxC, erm, FN, GETPC());                     \
    235 }
    236 
    237 #define DEF_GVEC_VOP2_32(NAME)                                                 \
    238 DEF_GVEC_VOP2_FN(NAME, NAME##32, 32)
    239 
    240 #define DEF_GVEC_VOP2_64(NAME)                                                 \
    241 DEF_GVEC_VOP2_FN(NAME, NAME##64, 64)
    242 
    243 #define DEF_GVEC_VOP2(NAME, OP)                                                \
    244 DEF_GVEC_VOP2_FN(NAME, float32_##OP, 32)                                       \
    245 DEF_GVEC_VOP2_FN(NAME, float64_##OP, 64)                                       \
    246 DEF_GVEC_VOP2_FN(NAME, float128_##OP, 128)
    247 
    248 DEF_GVEC_VOP2_32(vcdg)
    249 DEF_GVEC_VOP2_32(vcdlg)
    250 DEF_GVEC_VOP2_32(vcgd)
    251 DEF_GVEC_VOP2_32(vclgd)
    252 DEF_GVEC_VOP2_64(vcdg)
    253 DEF_GVEC_VOP2_64(vcdlg)
    254 DEF_GVEC_VOP2_64(vcgd)
    255 DEF_GVEC_VOP2_64(vclgd)
    256 DEF_GVEC_VOP2(vfi, round_to_int)
    257 DEF_GVEC_VOP2(vfsq, sqrt)
    258 
    259 typedef float32 (*vop32_3_fn)(float32 a, float32 b, float_status *s);
    260 static void vop32_3(S390Vector *v1, const S390Vector *v2, const S390Vector *v3,
    261                     CPUS390XState *env, bool s, vop32_3_fn fn,
    262                     uintptr_t retaddr)
    263 {
    264     uint8_t vxc, vec_exc = 0;
    265     S390Vector tmp = {};
    266     int i;
    267 
    268     for (i = 0; i < 4; i++) {
    269         const float32 a = s390_vec_read_float32(v2, i);
    270         const float32 b = s390_vec_read_float32(v3, i);
    271 
    272         s390_vec_write_float32(&tmp, i, fn(a, b, &env->fpu_status));
    273         vxc = check_ieee_exc(env, i, false, &vec_exc);
    274         if (s || vxc) {
    275             break;
    276         }
    277     }
    278     handle_ieee_exc(env, vxc, vec_exc, retaddr);
    279     *v1 = tmp;
    280 }
    281 
    282 typedef float64 (*vop64_3_fn)(float64 a, float64 b, float_status *s);
    283 static void vop64_3(S390Vector *v1, const S390Vector *v2, const S390Vector *v3,
    284                     CPUS390XState *env, bool s, vop64_3_fn fn,
    285                     uintptr_t retaddr)
    286 {
    287     uint8_t vxc, vec_exc = 0;
    288     S390Vector tmp = {};
    289     int i;
    290 
    291     for (i = 0; i < 2; i++) {
    292         const float64 a = s390_vec_read_float64(v2, i);
    293         const float64 b = s390_vec_read_float64(v3, i);
    294 
    295         s390_vec_write_float64(&tmp, i, fn(a, b, &env->fpu_status));
    296         vxc = check_ieee_exc(env, i, false, &vec_exc);
    297         if (s || vxc) {
    298             break;
    299         }
    300     }
    301     handle_ieee_exc(env, vxc, vec_exc, retaddr);
    302     *v1 = tmp;
    303 }
    304 
    305 typedef float128 (*vop128_3_fn)(float128 a, float128 b, float_status *s);
    306 static void vop128_3(S390Vector *v1, const S390Vector *v2, const S390Vector *v3,
    307                      CPUS390XState *env, bool s, vop128_3_fn fn,
    308                      uintptr_t retaddr)
    309 {
    310     const float128 a = s390_vec_read_float128(v2);
    311     const float128 b = s390_vec_read_float128(v3);
    312     uint8_t vxc, vec_exc = 0;
    313     S390Vector tmp = {};
    314 
    315     s390_vec_write_float128(&tmp, fn(a, b, &env->fpu_status));
    316     vxc = check_ieee_exc(env, 0, false, &vec_exc);
    317     handle_ieee_exc(env, vxc, vec_exc, retaddr);
    318     *v1 = tmp;
    319 }
    320 
    321 #define DEF_GVEC_VOP3_B(NAME, OP, BITS)                                        \
    322 void HELPER(gvec_##NAME##BITS)(void *v1, const void *v2, const void *v3,       \
    323                               CPUS390XState *env, uint32_t desc)               \
    324 {                                                                              \
    325     const bool se = extract32(simd_data(desc), 3, 1);                          \
    326                                                                                \
    327     vop##BITS##_3(v1, v2, v3, env, se, float##BITS##_##OP, GETPC());           \
    328 }
    329 
    330 #define DEF_GVEC_VOP3(NAME, OP)                                                \
    331 DEF_GVEC_VOP3_B(NAME, OP, 32)                                                  \
    332 DEF_GVEC_VOP3_B(NAME, OP, 64)                                                  \
    333 DEF_GVEC_VOP3_B(NAME, OP, 128)
    334 
    335 DEF_GVEC_VOP3(vfa, add)
    336 DEF_GVEC_VOP3(vfs, sub)
    337 DEF_GVEC_VOP3(vfd, div)
    338 DEF_GVEC_VOP3(vfm, mul)
    339 
    340 static int wfc32(const S390Vector *v1, const S390Vector *v2,
    341                  CPUS390XState *env, bool signal, uintptr_t retaddr)
    342 {
    343     /* only the zero-indexed elements are compared */
    344     const float32 a = s390_vec_read_float32(v1, 0);
    345     const float32 b = s390_vec_read_float32(v2, 0);
    346     uint8_t vxc, vec_exc = 0;
    347     int cmp;
    348 
    349     if (signal) {
    350         cmp = float32_compare(a, b, &env->fpu_status);
    351     } else {
    352         cmp = float32_compare_quiet(a, b, &env->fpu_status);
    353     }
    354     vxc = check_ieee_exc(env, 0, false, &vec_exc);
    355     handle_ieee_exc(env, vxc, vec_exc, retaddr);
    356 
    357     return float_comp_to_cc(env, cmp);
    358 }
    359 
    360 static int wfc64(const S390Vector *v1, const S390Vector *v2,
    361                  CPUS390XState *env, bool signal, uintptr_t retaddr)
    362 {
    363     /* only the zero-indexed elements are compared */
    364     const float64 a = s390_vec_read_float64(v1, 0);
    365     const float64 b = s390_vec_read_float64(v2, 0);
    366     uint8_t vxc, vec_exc = 0;
    367     int cmp;
    368 
    369     if (signal) {
    370         cmp = float64_compare(a, b, &env->fpu_status);
    371     } else {
    372         cmp = float64_compare_quiet(a, b, &env->fpu_status);
    373     }
    374     vxc = check_ieee_exc(env, 0, false, &vec_exc);
    375     handle_ieee_exc(env, vxc, vec_exc, retaddr);
    376 
    377     return float_comp_to_cc(env, cmp);
    378 }
    379 
    380 static int wfc128(const S390Vector *v1, const S390Vector *v2,
    381                   CPUS390XState *env, bool signal, uintptr_t retaddr)
    382 {
    383     /* only the zero-indexed elements are compared */
    384     const float128 a = s390_vec_read_float128(v1);
    385     const float128 b = s390_vec_read_float128(v2);
    386     uint8_t vxc, vec_exc = 0;
    387     int cmp;
    388 
    389     if (signal) {
    390         cmp = float128_compare(a, b, &env->fpu_status);
    391     } else {
    392         cmp = float128_compare_quiet(a, b, &env->fpu_status);
    393     }
    394     vxc = check_ieee_exc(env, 0, false, &vec_exc);
    395     handle_ieee_exc(env, vxc, vec_exc, retaddr);
    396 
    397     return float_comp_to_cc(env, cmp);
    398 }
    399 
    400 #define DEF_GVEC_WFC_B(NAME, SIGNAL, BITS)                                     \
    401 void HELPER(gvec_##NAME##BITS)(const void *v1, const void *v2,                 \
    402                                CPUS390XState *env, uint32_t desc)              \
    403 {                                                                              \
    404     env->cc_op = wfc##BITS(v1, v2, env, SIGNAL, GETPC());                      \
    405 }
    406 
    407 #define DEF_GVEC_WFC(NAME, SIGNAL)                                             \
    408      DEF_GVEC_WFC_B(NAME, SIGNAL, 32)                                          \
    409      DEF_GVEC_WFC_B(NAME, SIGNAL, 64)                                          \
    410      DEF_GVEC_WFC_B(NAME, SIGNAL, 128)
    411 
    412 DEF_GVEC_WFC(wfc, false)
    413 DEF_GVEC_WFC(wfk, true)
    414 
    415 typedef bool (*vfc32_fn)(float32 a, float32 b, float_status *status);
    416 static int vfc32(S390Vector *v1, const S390Vector *v2, const S390Vector *v3,
    417                  CPUS390XState *env, bool s, vfc32_fn fn, uintptr_t retaddr)
    418 {
    419     uint8_t vxc, vec_exc = 0;
    420     S390Vector tmp = {};
    421     int match = 0;
    422     int i;
    423 
    424     for (i = 0; i < 4; i++) {
    425         const float32 a = s390_vec_read_float32(v2, i);
    426         const float32 b = s390_vec_read_float32(v3, i);
    427 
    428         /* swap the order of the parameters, so we can use existing functions */
    429         if (fn(b, a, &env->fpu_status)) {
    430             match++;
    431             s390_vec_write_element32(&tmp, i, -1u);
    432         }
    433         vxc = check_ieee_exc(env, i, false, &vec_exc);
    434         if (s || vxc) {
    435             break;
    436         }
    437     }
    438 
    439     handle_ieee_exc(env, vxc, vec_exc, retaddr);
    440     *v1 = tmp;
    441     if (match) {
    442         return s || match == 4 ? 0 : 1;
    443     }
    444     return 3;
    445 }
    446 
    447 typedef bool (*vfc64_fn)(float64 a, float64 b, float_status *status);
    448 static int vfc64(S390Vector *v1, const S390Vector *v2, const S390Vector *v3,
    449                  CPUS390XState *env, bool s, vfc64_fn fn, uintptr_t retaddr)
    450 {
    451     uint8_t vxc, vec_exc = 0;
    452     S390Vector tmp = {};
    453     int match = 0;
    454     int i;
    455 
    456     for (i = 0; i < 2; i++) {
    457         const float64 a = s390_vec_read_float64(v2, i);
    458         const float64 b = s390_vec_read_float64(v3, i);
    459 
    460         /* swap the order of the parameters, so we can use existing functions */
    461         if (fn(b, a, &env->fpu_status)) {
    462             match++;
    463             s390_vec_write_element64(&tmp, i, -1ull);
    464         }
    465         vxc = check_ieee_exc(env, i, false, &vec_exc);
    466         if (s || vxc) {
    467             break;
    468         }
    469     }
    470 
    471     handle_ieee_exc(env, vxc, vec_exc, retaddr);
    472     *v1 = tmp;
    473     if (match) {
    474         return s || match == 2 ? 0 : 1;
    475     }
    476     return 3;
    477 }
    478 
    479 typedef bool (*vfc128_fn)(float128 a, float128 b, float_status *status);
    480 static int vfc128(S390Vector *v1, const S390Vector *v2, const S390Vector *v3,
    481                  CPUS390XState *env, bool s, vfc128_fn fn, uintptr_t retaddr)
    482 {
    483     const float128 a = s390_vec_read_float128(v2);
    484     const float128 b = s390_vec_read_float128(v3);
    485     uint8_t vxc, vec_exc = 0;
    486     S390Vector tmp = {};
    487     bool match = false;
    488 
    489     /* swap the order of the parameters, so we can use existing functions */
    490     if (fn(b, a, &env->fpu_status)) {
    491         match = true;
    492         s390_vec_write_element64(&tmp, 0, -1ull);
    493         s390_vec_write_element64(&tmp, 1, -1ull);
    494     }
    495     vxc = check_ieee_exc(env, 0, false, &vec_exc);
    496     handle_ieee_exc(env, vxc, vec_exc, retaddr);
    497     *v1 = tmp;
    498     return match ? 0 : 3;
    499 }
    500 
    501 #define DEF_GVEC_VFC_B(NAME, OP, BITS)                                         \
    502 void HELPER(gvec_##NAME##BITS)(void *v1, const void *v2, const void *v3,       \
    503                                CPUS390XState *env, uint32_t desc)              \
    504 {                                                                              \
    505     const bool se = extract32(simd_data(desc), 3, 1);                          \
    506     const bool sq = extract32(simd_data(desc), 2, 1);                          \
    507     vfc##BITS##_fn fn = sq ? float##BITS##_##OP : float##BITS##_##OP##_quiet;  \
    508                                                                                \
    509     vfc##BITS(v1, v2, v3, env, se, fn, GETPC());                               \
    510 }                                                                              \
    511                                                                                \
    512 void HELPER(gvec_##NAME##BITS##_cc)(void *v1, const void *v2, const void *v3,  \
    513                                     CPUS390XState *env, uint32_t desc)         \
    514 {                                                                              \
    515     const bool se = extract32(simd_data(desc), 3, 1);                          \
    516     const bool sq = extract32(simd_data(desc), 2, 1);                          \
    517     vfc##BITS##_fn fn = sq ? float##BITS##_##OP : float##BITS##_##OP##_quiet;  \
    518                                                                                \
    519     env->cc_op = vfc##BITS(v1, v2, v3, env, se, fn, GETPC());                  \
    520 }
    521 
    522 #define DEF_GVEC_VFC(NAME, OP)                                                 \
    523 DEF_GVEC_VFC_B(NAME, OP, 32)                                                   \
    524 DEF_GVEC_VFC_B(NAME, OP, 64)                                                   \
    525 DEF_GVEC_VFC_B(NAME, OP, 128)                                                  \
    526 
    527 DEF_GVEC_VFC(vfce, eq)
    528 DEF_GVEC_VFC(vfch, lt)
    529 DEF_GVEC_VFC(vfche, le)
    530 
    531 void HELPER(gvec_vfll32)(void *v1, const void *v2, CPUS390XState *env,
    532                          uint32_t desc)
    533 {
    534     const bool s = extract32(simd_data(desc), 3, 1);
    535     uint8_t vxc, vec_exc = 0;
    536     S390Vector tmp = {};
    537     int i;
    538 
    539     for (i = 0; i < 2; i++) {
    540         /* load from even element */
    541         const float32 a = s390_vec_read_element32(v2, i * 2);
    542         const uint64_t ret = float32_to_float64(a, &env->fpu_status);
    543 
    544         s390_vec_write_element64(&tmp, i, ret);
    545         /* indicate the source element */
    546         vxc = check_ieee_exc(env, i * 2, false, &vec_exc);
    547         if (s || vxc) {
    548             break;
    549         }
    550     }
    551     handle_ieee_exc(env, vxc, vec_exc, GETPC());
    552     *(S390Vector *)v1 = tmp;
    553 }
    554 
    555 void HELPER(gvec_vfll64)(void *v1, const void *v2, CPUS390XState *env,
    556                          uint32_t desc)
    557 {
    558     /* load from even element */
    559     const float128 ret = float64_to_float128(s390_vec_read_float64(v2, 0),
    560                                              &env->fpu_status);
    561     uint8_t vxc, vec_exc = 0;
    562 
    563     vxc = check_ieee_exc(env, 0, false, &vec_exc);
    564     handle_ieee_exc(env, vxc, vec_exc, GETPC());
    565     s390_vec_write_float128(v1, ret);
    566 }
    567 
    568 void HELPER(gvec_vflr64)(void *v1, const void *v2, CPUS390XState *env,
    569                          uint32_t desc)
    570 {
    571     const uint8_t erm = extract32(simd_data(desc), 4, 4);
    572     const bool s = extract32(simd_data(desc), 3, 1);
    573     const bool XxC = extract32(simd_data(desc), 2, 1);
    574     uint8_t vxc, vec_exc = 0;
    575     S390Vector tmp = {};
    576     int i, old_mode;
    577 
    578     old_mode = s390_swap_bfp_rounding_mode(env, erm);
    579     for (i = 0; i < 2; i++) {
    580         float64 a = s390_vec_read_element64(v2, i);
    581         uint32_t ret = float64_to_float32(a, &env->fpu_status);
    582 
    583         /* place at even element */
    584         s390_vec_write_element32(&tmp, i * 2, ret);
    585         /* indicate the source element */
    586         vxc = check_ieee_exc(env, i, XxC, &vec_exc);
    587         if (s || vxc) {
    588             break;
    589         }
    590     }
    591     s390_restore_bfp_rounding_mode(env, old_mode);
    592     handle_ieee_exc(env, vxc, vec_exc, GETPC());
    593     *(S390Vector *)v1 = tmp;
    594 }
    595 
    596 void HELPER(gvec_vflr128)(void *v1, const void *v2, CPUS390XState *env,
    597                           uint32_t desc)
    598 {
    599     const uint8_t erm = extract32(simd_data(desc), 4, 4);
    600     const bool XxC = extract32(simd_data(desc), 2, 1);
    601     uint8_t vxc, vec_exc = 0;
    602     int old_mode;
    603     float64 ret;
    604 
    605     old_mode = s390_swap_bfp_rounding_mode(env, erm);
    606     ret = float128_to_float64(s390_vec_read_float128(v2), &env->fpu_status);
    607     vxc = check_ieee_exc(env, 0, XxC, &vec_exc);
    608     s390_restore_bfp_rounding_mode(env, old_mode);
    609     handle_ieee_exc(env, vxc, vec_exc, GETPC());
    610 
    611     /* place at even element, odd element is unpredictable */
    612     s390_vec_write_float64(v1, 0, ret);
    613 }
    614 
    615 static void vfma32(S390Vector *v1, const S390Vector *v2, const S390Vector *v3,
    616                    const S390Vector *v4, CPUS390XState *env, bool s, int flags,
    617                    uintptr_t retaddr)
    618 {
    619     uint8_t vxc, vec_exc = 0;
    620     S390Vector tmp = {};
    621     int i;
    622 
    623     for (i = 0; i < 4; i++) {
    624         const float32 a = s390_vec_read_float32(v2, i);
    625         const float32 b = s390_vec_read_float32(v3, i);
    626         const float32 c = s390_vec_read_float32(v4, i);
    627         float32 ret = float32_muladd(a, b, c, flags, &env->fpu_status);
    628 
    629         s390_vec_write_float32(&tmp, i, ret);
    630         vxc = check_ieee_exc(env, i, false, &vec_exc);
    631         if (s || vxc) {
    632             break;
    633         }
    634     }
    635     handle_ieee_exc(env, vxc, vec_exc, retaddr);
    636     *v1 = tmp;
    637 }
    638 
    639 static void vfma64(S390Vector *v1, const S390Vector *v2, const S390Vector *v3,
    640                    const S390Vector *v4, CPUS390XState *env, bool s, int flags,
    641                    uintptr_t retaddr)
    642 {
    643     uint8_t vxc, vec_exc = 0;
    644     S390Vector tmp = {};
    645     int i;
    646 
    647     for (i = 0; i < 2; i++) {
    648         const float64 a = s390_vec_read_float64(v2, i);
    649         const float64 b = s390_vec_read_float64(v3, i);
    650         const float64 c = s390_vec_read_float64(v4, i);
    651         const float64 ret = float64_muladd(a, b, c, flags, &env->fpu_status);
    652 
    653         s390_vec_write_float64(&tmp, i, ret);
    654         vxc = check_ieee_exc(env, i, false, &vec_exc);
    655         if (s || vxc) {
    656             break;
    657         }
    658     }
    659     handle_ieee_exc(env, vxc, vec_exc, retaddr);
    660     *v1 = tmp;
    661 }
    662 
    663 static void vfma128(S390Vector *v1, const S390Vector *v2, const S390Vector *v3,
    664                     const S390Vector *v4, CPUS390XState *env, bool s, int flags,
    665                     uintptr_t retaddr)
    666 {
    667     const float128 a = s390_vec_read_float128(v2);
    668     const float128 b = s390_vec_read_float128(v3);
    669     const float128 c = s390_vec_read_float128(v4);
    670     uint8_t vxc, vec_exc = 0;
    671     float128 ret;
    672 
    673     ret = float128_muladd(a, b, c, flags, &env->fpu_status);
    674     vxc = check_ieee_exc(env, 0, false, &vec_exc);
    675     handle_ieee_exc(env, vxc, vec_exc, retaddr);
    676     s390_vec_write_float128(v1, ret);
    677 }
    678 
    679 #define DEF_GVEC_VFMA_B(NAME, FLAGS, BITS)                                     \
    680 void HELPER(gvec_##NAME##BITS)(void *v1, const void *v2, const void *v3,       \
    681                                const void *v4, CPUS390XState *env,             \
    682                                uint32_t desc)                                  \
    683 {                                                                              \
    684     const bool se = extract32(simd_data(desc), 3, 1);                          \
    685                                                                                \
    686     vfma##BITS(v1, v2, v3, v4, env, se, FLAGS, GETPC());                       \
    687 }
    688 
    689 #define DEF_GVEC_VFMA(NAME, FLAGS)                                             \
    690     DEF_GVEC_VFMA_B(NAME, FLAGS, 32)                                           \
    691     DEF_GVEC_VFMA_B(NAME, FLAGS, 64)                                           \
    692     DEF_GVEC_VFMA_B(NAME, FLAGS, 128)
    693 
    694 DEF_GVEC_VFMA(vfma, 0)
    695 DEF_GVEC_VFMA(vfms, float_muladd_negate_c)
    696 DEF_GVEC_VFMA(vfnma, float_muladd_negate_result)
    697 DEF_GVEC_VFMA(vfnms, float_muladd_negate_c | float_muladd_negate_result)
    698 
    699 void HELPER(gvec_vftci32)(void *v1, const void *v2, CPUS390XState *env,
    700                           uint32_t desc)
    701 {
    702     uint16_t i3 = extract32(simd_data(desc), 4, 12);
    703     bool s = extract32(simd_data(desc), 3, 1);
    704     int i, match = 0;
    705 
    706     for (i = 0; i < 4; i++) {
    707         float32 a = s390_vec_read_float32(v2, i);
    708 
    709         if (float32_dcmask(env, a) & i3) {
    710             match++;
    711             s390_vec_write_element32(v1, i, -1u);
    712         } else {
    713             s390_vec_write_element32(v1, i, 0);
    714         }
    715         if (s) {
    716             break;
    717         }
    718     }
    719 
    720     if (match == 4 || (s && match)) {
    721         env->cc_op = 0;
    722     } else if (match) {
    723         env->cc_op = 1;
    724     } else {
    725         env->cc_op = 3;
    726     }
    727 }
    728 
    729 void HELPER(gvec_vftci64)(void *v1, const void *v2, CPUS390XState *env,
    730                           uint32_t desc)
    731 {
    732     const uint16_t i3 = extract32(simd_data(desc), 4, 12);
    733     const bool s = extract32(simd_data(desc), 3, 1);
    734     int i, match = 0;
    735 
    736     for (i = 0; i < 2; i++) {
    737         const float64 a = s390_vec_read_float64(v2, i);
    738 
    739         if (float64_dcmask(env, a) & i3) {
    740             match++;
    741             s390_vec_write_element64(v1, i, -1ull);
    742         } else {
    743             s390_vec_write_element64(v1, i, 0);
    744         }
    745         if (s) {
    746             break;
    747         }
    748     }
    749 
    750     if (match == 2 || (s && match)) {
    751         env->cc_op = 0;
    752     } else if (match) {
    753         env->cc_op = 1;
    754     } else {
    755         env->cc_op = 3;
    756     }
    757 }
    758 
    759 void HELPER(gvec_vftci128)(void *v1, const void *v2, CPUS390XState *env,
    760                            uint32_t desc)
    761 {
    762     const float128 a = s390_vec_read_float128(v2);
    763     uint16_t i3 = extract32(simd_data(desc), 4, 12);
    764 
    765     if (float128_dcmask(env, a) & i3) {
    766         env->cc_op = 0;
    767         s390_vec_write_element64(v1, 0, -1ull);
    768         s390_vec_write_element64(v1, 1, -1ull);
    769     } else {
    770         env->cc_op = 3;
    771         s390_vec_write_element64(v1, 0, 0);
    772         s390_vec_write_element64(v1, 1, 0);
    773     }
    774 }
    775 
    776 typedef enum S390MinMaxType {
    777     S390_MINMAX_TYPE_IEEE = 0,
    778     S390_MINMAX_TYPE_JAVA,
    779     S390_MINMAX_TYPE_C_MACRO,
    780     S390_MINMAX_TYPE_CPP,
    781     S390_MINMAX_TYPE_F,
    782 } S390MinMaxType;
    783 
    784 typedef enum S390MinMaxRes {
    785     S390_MINMAX_RES_MINMAX = 0,
    786     S390_MINMAX_RES_A,
    787     S390_MINMAX_RES_B,
    788     S390_MINMAX_RES_SILENCE_A,
    789     S390_MINMAX_RES_SILENCE_B,
    790 } S390MinMaxRes;
    791 
    792 static S390MinMaxRes vfmin_res(uint16_t dcmask_a, uint16_t dcmask_b,
    793                                S390MinMaxType type, float_status *s)
    794 {
    795     const bool neg_a = dcmask_a & DCMASK_NEGATIVE;
    796     const bool nan_a = dcmask_a & DCMASK_NAN;
    797     const bool nan_b = dcmask_b & DCMASK_NAN;
    798 
    799     g_assert(type > S390_MINMAX_TYPE_IEEE && type <= S390_MINMAX_TYPE_F);
    800 
    801     if (unlikely((dcmask_a | dcmask_b) & DCMASK_NAN)) {
    802         const bool sig_a = dcmask_a & DCMASK_SIGNALING_NAN;
    803         const bool sig_b = dcmask_b & DCMASK_SIGNALING_NAN;
    804 
    805         if ((dcmask_a | dcmask_b) & DCMASK_SIGNALING_NAN) {
    806             s->float_exception_flags |= float_flag_invalid;
    807         }
    808         switch (type) {
    809         case S390_MINMAX_TYPE_JAVA:
    810             if (sig_a) {
    811                 return S390_MINMAX_RES_SILENCE_A;
    812             } else if (sig_b) {
    813                 return S390_MINMAX_RES_SILENCE_B;
    814             }
    815             return nan_a ? S390_MINMAX_RES_A : S390_MINMAX_RES_B;
    816         case S390_MINMAX_TYPE_F:
    817             return nan_b ? S390_MINMAX_RES_A : S390_MINMAX_RES_B;
    818         case S390_MINMAX_TYPE_C_MACRO:
    819             s->float_exception_flags |= float_flag_invalid;
    820             return S390_MINMAX_RES_B;
    821         case S390_MINMAX_TYPE_CPP:
    822             s->float_exception_flags |= float_flag_invalid;
    823             return S390_MINMAX_RES_A;
    824         default:
    825             g_assert_not_reached();
    826         }
    827     } else if (unlikely((dcmask_a & DCMASK_ZERO) && (dcmask_b & DCMASK_ZERO))) {
    828         switch (type) {
    829         case S390_MINMAX_TYPE_JAVA:
    830             return neg_a ? S390_MINMAX_RES_A : S390_MINMAX_RES_B;
    831         case S390_MINMAX_TYPE_C_MACRO:
    832             return S390_MINMAX_RES_B;
    833         case S390_MINMAX_TYPE_F:
    834             return !neg_a ? S390_MINMAX_RES_B : S390_MINMAX_RES_A;
    835         case S390_MINMAX_TYPE_CPP:
    836             return S390_MINMAX_RES_A;
    837         default:
    838             g_assert_not_reached();
    839         }
    840     }
    841     return S390_MINMAX_RES_MINMAX;
    842 }
    843 
    844 static S390MinMaxRes vfmax_res(uint16_t dcmask_a, uint16_t dcmask_b,
    845                                S390MinMaxType type, float_status *s)
    846 {
    847     g_assert(type > S390_MINMAX_TYPE_IEEE && type <= S390_MINMAX_TYPE_F);
    848 
    849     if (unlikely((dcmask_a | dcmask_b) & DCMASK_NAN)) {
    850         const bool sig_a = dcmask_a & DCMASK_SIGNALING_NAN;
    851         const bool sig_b = dcmask_b & DCMASK_SIGNALING_NAN;
    852         const bool nan_a = dcmask_a & DCMASK_NAN;
    853         const bool nan_b = dcmask_b & DCMASK_NAN;
    854 
    855         if ((dcmask_a | dcmask_b) & DCMASK_SIGNALING_NAN) {
    856             s->float_exception_flags |= float_flag_invalid;
    857         }
    858         switch (type) {
    859         case S390_MINMAX_TYPE_JAVA:
    860             if (sig_a) {
    861                 return S390_MINMAX_RES_SILENCE_A;
    862             } else if (sig_b) {
    863                 return S390_MINMAX_RES_SILENCE_B;
    864             }
    865             return nan_a ? S390_MINMAX_RES_A : S390_MINMAX_RES_B;
    866         case S390_MINMAX_TYPE_F:
    867             return nan_b ? S390_MINMAX_RES_A : S390_MINMAX_RES_B;
    868         case S390_MINMAX_TYPE_C_MACRO:
    869             s->float_exception_flags |= float_flag_invalid;
    870             return S390_MINMAX_RES_B;
    871         case S390_MINMAX_TYPE_CPP:
    872             s->float_exception_flags |= float_flag_invalid;
    873             return S390_MINMAX_RES_A;
    874         default:
    875             g_assert_not_reached();
    876         }
    877     } else if (unlikely((dcmask_a & DCMASK_ZERO) && (dcmask_b & DCMASK_ZERO))) {
    878         const bool neg_a = dcmask_a & DCMASK_NEGATIVE;
    879 
    880         switch (type) {
    881         case S390_MINMAX_TYPE_JAVA:
    882         case S390_MINMAX_TYPE_F:
    883             return neg_a ? S390_MINMAX_RES_B : S390_MINMAX_RES_A;
    884         case S390_MINMAX_TYPE_C_MACRO:
    885             return S390_MINMAX_RES_B;
    886         case S390_MINMAX_TYPE_CPP:
    887             return S390_MINMAX_RES_A;
    888         default:
    889             g_assert_not_reached();
    890         }
    891     }
    892     return S390_MINMAX_RES_MINMAX;
    893 }
    894 
    895 static S390MinMaxRes vfminmax_res(uint16_t dcmask_a, uint16_t dcmask_b,
    896                                   S390MinMaxType type, bool is_min,
    897                                   float_status *s)
    898 {
    899     return is_min ? vfmin_res(dcmask_a, dcmask_b, type, s) :
    900                     vfmax_res(dcmask_a, dcmask_b, type, s);
    901 }
    902 
    903 static void vfminmax32(S390Vector *v1, const S390Vector *v2,
    904                        const S390Vector *v3, CPUS390XState *env,
    905                        S390MinMaxType type, bool is_min, bool is_abs, bool se,
    906                        uintptr_t retaddr)
    907 {
    908     float_status *s = &env->fpu_status;
    909     uint8_t vxc, vec_exc = 0;
    910     S390Vector tmp = {};
    911     int i;
    912 
    913     for (i = 0; i < 4; i++) {
    914         float32 a = s390_vec_read_float32(v2, i);
    915         float32 b = s390_vec_read_float32(v3, i);
    916         float32 result;
    917 
    918         if (type != S390_MINMAX_TYPE_IEEE) {
    919             S390MinMaxRes res;
    920 
    921             if (is_abs) {
    922                 a = float32_abs(a);
    923                 b = float32_abs(b);
    924             }
    925 
    926             res = vfminmax_res(float32_dcmask(env, a), float32_dcmask(env, b),
    927                                type, is_min, s);
    928             switch (res) {
    929             case S390_MINMAX_RES_MINMAX:
    930                 result = is_min ? float32_min(a, b, s) : float32_max(a, b, s);
    931                 break;
    932             case S390_MINMAX_RES_A:
    933                 result = a;
    934                 break;
    935             case S390_MINMAX_RES_B:
    936                 result = b;
    937                 break;
    938             case S390_MINMAX_RES_SILENCE_A:
    939                 result = float32_silence_nan(a, s);
    940                 break;
    941             case S390_MINMAX_RES_SILENCE_B:
    942                 result = float32_silence_nan(b, s);
    943                 break;
    944             default:
    945                 g_assert_not_reached();
    946             }
    947         } else if (!is_abs) {
    948             result = is_min ? float32_minnum(a, b, &env->fpu_status) :
    949                               float32_maxnum(a, b, &env->fpu_status);
    950         } else {
    951             result = is_min ? float32_minnummag(a, b, &env->fpu_status) :
    952                               float32_maxnummag(a, b, &env->fpu_status);
    953         }
    954 
    955         s390_vec_write_float32(&tmp, i, result);
    956         vxc = check_ieee_exc(env, i, false, &vec_exc);
    957         if (se || vxc) {
    958             break;
    959         }
    960     }
    961     handle_ieee_exc(env, vxc, vec_exc, retaddr);
    962     *v1 = tmp;
    963 }
    964 
    965 static void vfminmax64(S390Vector *v1, const S390Vector *v2,
    966                        const S390Vector *v3, CPUS390XState *env,
    967                        S390MinMaxType type, bool is_min, bool is_abs, bool se,
    968                        uintptr_t retaddr)
    969 {
    970     float_status *s = &env->fpu_status;
    971     uint8_t vxc, vec_exc = 0;
    972     S390Vector tmp = {};
    973     int i;
    974 
    975     for (i = 0; i < 2; i++) {
    976         float64 a = s390_vec_read_float64(v2, i);
    977         float64 b = s390_vec_read_float64(v3, i);
    978         float64 result;
    979 
    980         if (type != S390_MINMAX_TYPE_IEEE) {
    981             S390MinMaxRes res;
    982 
    983             if (is_abs) {
    984                 a = float64_abs(a);
    985                 b = float64_abs(b);
    986             }
    987 
    988             res = vfminmax_res(float64_dcmask(env, a), float64_dcmask(env, b),
    989                                type, is_min, s);
    990             switch (res) {
    991             case S390_MINMAX_RES_MINMAX:
    992                 result = is_min ? float64_min(a, b, s) : float64_max(a, b, s);
    993                 break;
    994             case S390_MINMAX_RES_A:
    995                 result = a;
    996                 break;
    997             case S390_MINMAX_RES_B:
    998                 result = b;
    999                 break;
   1000             case S390_MINMAX_RES_SILENCE_A:
   1001                 result = float64_silence_nan(a, s);
   1002                 break;
   1003             case S390_MINMAX_RES_SILENCE_B:
   1004                 result = float64_silence_nan(b, s);
   1005                 break;
   1006             default:
   1007                 g_assert_not_reached();
   1008             }
   1009         } else if (!is_abs) {
   1010             result = is_min ? float64_minnum(a, b, &env->fpu_status) :
   1011                               float64_maxnum(a, b, &env->fpu_status);
   1012         } else {
   1013             result = is_min ? float64_minnummag(a, b, &env->fpu_status) :
   1014                               float64_maxnummag(a, b, &env->fpu_status);
   1015         }
   1016 
   1017         s390_vec_write_float64(&tmp, i, result);
   1018         vxc = check_ieee_exc(env, i, false, &vec_exc);
   1019         if (se || vxc) {
   1020             break;
   1021         }
   1022     }
   1023     handle_ieee_exc(env, vxc, vec_exc, retaddr);
   1024     *v1 = tmp;
   1025 }
   1026 
   1027 static void vfminmax128(S390Vector *v1, const S390Vector *v2,
   1028                         const S390Vector *v3, CPUS390XState *env,
   1029                         S390MinMaxType type, bool is_min, bool is_abs, bool se,
   1030                         uintptr_t retaddr)
   1031 {
   1032     float128 a = s390_vec_read_float128(v2);
   1033     float128 b = s390_vec_read_float128(v3);
   1034     float_status *s = &env->fpu_status;
   1035     uint8_t vxc, vec_exc = 0;
   1036     float128 result;
   1037 
   1038     if (type != S390_MINMAX_TYPE_IEEE) {
   1039         S390MinMaxRes res;
   1040 
   1041         if (is_abs) {
   1042             a = float128_abs(a);
   1043             b = float128_abs(b);
   1044         }
   1045 
   1046         res = vfminmax_res(float128_dcmask(env, a), float128_dcmask(env, b),
   1047                            type, is_min, s);
   1048         switch (res) {
   1049         case S390_MINMAX_RES_MINMAX:
   1050             result = is_min ? float128_min(a, b, s) : float128_max(a, b, s);
   1051             break;
   1052         case S390_MINMAX_RES_A:
   1053             result = a;
   1054             break;
   1055         case S390_MINMAX_RES_B:
   1056             result = b;
   1057             break;
   1058         case S390_MINMAX_RES_SILENCE_A:
   1059             result = float128_silence_nan(a, s);
   1060             break;
   1061         case S390_MINMAX_RES_SILENCE_B:
   1062             result = float128_silence_nan(b, s);
   1063             break;
   1064         default:
   1065             g_assert_not_reached();
   1066         }
   1067     } else if (!is_abs) {
   1068         result = is_min ? float128_minnum(a, b, &env->fpu_status) :
   1069                           float128_maxnum(a, b, &env->fpu_status);
   1070     } else {
   1071         result = is_min ? float128_minnummag(a, b, &env->fpu_status) :
   1072                           float128_maxnummag(a, b, &env->fpu_status);
   1073     }
   1074 
   1075     vxc = check_ieee_exc(env, 0, false, &vec_exc);
   1076     handle_ieee_exc(env, vxc, vec_exc, retaddr);
   1077     s390_vec_write_float128(v1, result);
   1078 }
   1079 
   1080 #define DEF_GVEC_VFMINMAX_B(NAME, IS_MIN, BITS)                                \
   1081 void HELPER(gvec_##NAME##BITS)(void *v1, const void *v2, const void *v3,       \
   1082                                CPUS390XState *env, uint32_t desc)              \
   1083 {                                                                              \
   1084     const bool se = extract32(simd_data(desc), 3, 1);                          \
   1085     uint8_t type = extract32(simd_data(desc), 4, 4);                           \
   1086     bool is_abs = false;                                                       \
   1087                                                                                \
   1088     if (type >= 8) {                                                           \
   1089         is_abs = true;                                                         \
   1090         type -= 8;                                                             \
   1091     }                                                                          \
   1092                                                                                \
   1093     vfminmax##BITS(v1, v2, v3, env, type, IS_MIN, is_abs, se, GETPC());        \
   1094 }
   1095 
   1096 #define DEF_GVEC_VFMINMAX(NAME, IS_MIN)                                        \
   1097     DEF_GVEC_VFMINMAX_B(NAME, IS_MIN, 32)                                      \
   1098     DEF_GVEC_VFMINMAX_B(NAME, IS_MIN, 64)                                      \
   1099     DEF_GVEC_VFMINMAX_B(NAME, IS_MIN, 128)
   1100 
   1101 DEF_GVEC_VFMINMAX(vfmax, false)
   1102 DEF_GVEC_VFMINMAX(vfmin, true)