qemu

FORK: QEMU emulator
git clone https://git.neptards.moe/neptards/qemu.git
Log | Files | Refs | Submodules | LICENSE

int_helper.c (98462B)


      1 /*
      2  *  PowerPC integer and vector emulation helpers for QEMU.
      3  *
      4  *  Copyright (c) 2003-2007 Jocelyn Mayer
      5  *
      6  * This library is free software; you can redistribute it and/or
      7  * modify it under the terms of the GNU Lesser General Public
      8  * License as published by the Free Software Foundation; either
      9  * version 2.1 of the License, or (at your option) any later version.
     10  *
     11  * This library is distributed in the hope that it will be useful,
     12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
     13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
     14  * Lesser General Public License for more details.
     15  *
     16  * You should have received a copy of the GNU Lesser General Public
     17  * License along with this library; if not, see <http://www.gnu.org/licenses/>.
     18  */
     19 
     20 #include "qemu/osdep.h"
     21 #include "cpu.h"
     22 #include "internal.h"
     23 #include "qemu/host-utils.h"
     24 #include "qemu/main-loop.h"
     25 #include "qemu/log.h"
     26 #include "exec/helper-proto.h"
     27 #include "crypto/aes.h"
     28 #include "fpu/softfloat.h"
     29 #include "qapi/error.h"
     30 #include "qemu/guest-random.h"
     31 #include "tcg/tcg-gvec-desc.h"
     32 
     33 #include "helper_regs.h"
     34 /*****************************************************************************/
     35 /* Fixed point operations helpers */
     36 
     37 static inline void helper_update_ov_legacy(CPUPPCState *env, int ov)
     38 {
     39     if (unlikely(ov)) {
     40         env->so = env->ov = env->ov32 = 1;
     41     } else {
     42         env->ov = env->ov32 = 0;
     43     }
     44 }
     45 
     46 target_ulong helper_divweu(CPUPPCState *env, target_ulong ra, target_ulong rb,
     47                            uint32_t oe)
     48 {
     49     uint64_t rt = 0;
     50     int overflow = 0;
     51 
     52     uint64_t dividend = (uint64_t)ra << 32;
     53     uint64_t divisor = (uint32_t)rb;
     54 
     55     if (unlikely(divisor == 0)) {
     56         overflow = 1;
     57     } else {
     58         rt = dividend / divisor;
     59         overflow = rt > UINT32_MAX;
     60     }
     61 
     62     if (unlikely(overflow)) {
     63         rt = 0; /* Undefined */
     64     }
     65 
     66     if (oe) {
     67         helper_update_ov_legacy(env, overflow);
     68     }
     69 
     70     return (target_ulong)rt;
     71 }
     72 
     73 target_ulong helper_divwe(CPUPPCState *env, target_ulong ra, target_ulong rb,
     74                           uint32_t oe)
     75 {
     76     int64_t rt = 0;
     77     int overflow = 0;
     78 
     79     int64_t dividend = (int64_t)ra << 32;
     80     int64_t divisor = (int64_t)((int32_t)rb);
     81 
     82     if (unlikely((divisor == 0) ||
     83                  ((divisor == -1ull) && (dividend == INT64_MIN)))) {
     84         overflow = 1;
     85     } else {
     86         rt = dividend / divisor;
     87         overflow = rt != (int32_t)rt;
     88     }
     89 
     90     if (unlikely(overflow)) {
     91         rt = 0; /* Undefined */
     92     }
     93 
     94     if (oe) {
     95         helper_update_ov_legacy(env, overflow);
     96     }
     97 
     98     return (target_ulong)rt;
     99 }
    100 
    101 #if defined(TARGET_PPC64)
    102 
    103 uint64_t helper_divdeu(CPUPPCState *env, uint64_t ra, uint64_t rb, uint32_t oe)
    104 {
    105     uint64_t rt = 0;
    106     int overflow = 0;
    107 
    108     if (unlikely(rb == 0 || ra >= rb)) {
    109         overflow = 1;
    110         rt = 0; /* Undefined */
    111     } else {
    112         divu128(&rt, &ra, rb);
    113     }
    114 
    115     if (oe) {
    116         helper_update_ov_legacy(env, overflow);
    117     }
    118 
    119     return rt;
    120 }
    121 
    122 uint64_t helper_divde(CPUPPCState *env, uint64_t rau, uint64_t rbu, uint32_t oe)
    123 {
    124     uint64_t rt = 0;
    125     int64_t ra = (int64_t)rau;
    126     int64_t rb = (int64_t)rbu;
    127     int overflow = 0;
    128 
    129     if (unlikely(rb == 0 || uabs64(ra) >= uabs64(rb))) {
    130         overflow = 1;
    131         rt = 0; /* Undefined */
    132     } else {
    133         divs128(&rt, &ra, rb);
    134     }
    135 
    136     if (oe) {
    137         helper_update_ov_legacy(env, overflow);
    138     }
    139 
    140     return rt;
    141 }
    142 
    143 #endif
    144 
    145 
    146 #if defined(TARGET_PPC64)
    147 /* if x = 0xab, returns 0xababababababababa */
    148 #define pattern(x) (((x) & 0xff) * (~(target_ulong)0 / 0xff))
    149 
    150 /*
    151  * subtract 1 from each byte, and with inverse, check if MSB is set at each
    152  * byte.
    153  * i.e. ((0x00 - 0x01) & ~(0x00)) & 0x80
    154  *      (0xFF & 0xFF) & 0x80 = 0x80 (zero found)
    155  */
    156 #define haszero(v) (((v) - pattern(0x01)) & ~(v) & pattern(0x80))
    157 
    158 /* When you XOR the pattern and there is a match, that byte will be zero */
    159 #define hasvalue(x, n)  (haszero((x) ^ pattern(n)))
    160 
    161 uint32_t helper_cmpeqb(target_ulong ra, target_ulong rb)
    162 {
    163     return hasvalue(rb, ra) ? CRF_GT : 0;
    164 }
    165 
    166 #undef pattern
    167 #undef haszero
    168 #undef hasvalue
    169 
    170 /*
    171  * Return a random number.
    172  */
    173 uint64_t helper_darn32(void)
    174 {
    175     Error *err = NULL;
    176     uint32_t ret;
    177 
    178     if (qemu_guest_getrandom(&ret, sizeof(ret), &err) < 0) {
    179         qemu_log_mask(LOG_UNIMP, "darn: Crypto failure: %s",
    180                       error_get_pretty(err));
    181         error_free(err);
    182         return -1;
    183     }
    184 
    185     return ret;
    186 }
    187 
    188 uint64_t helper_darn64(void)
    189 {
    190     Error *err = NULL;
    191     uint64_t ret;
    192 
    193     if (qemu_guest_getrandom(&ret, sizeof(ret), &err) < 0) {
    194         qemu_log_mask(LOG_UNIMP, "darn: Crypto failure: %s",
    195                       error_get_pretty(err));
    196         error_free(err);
    197         return -1;
    198     }
    199 
    200     return ret;
    201 }
    202 
    203 uint64_t helper_bpermd(uint64_t rs, uint64_t rb)
    204 {
    205     int i;
    206     uint64_t ra = 0;
    207 
    208     for (i = 0; i < 8; i++) {
    209         int index = (rs >> (i * 8)) & 0xFF;
    210         if (index < 64) {
    211             if (rb & PPC_BIT(index)) {
    212                 ra |= 1 << i;
    213             }
    214         }
    215     }
    216     return ra;
    217 }
    218 
    219 #endif
    220 
    221 target_ulong helper_cmpb(target_ulong rs, target_ulong rb)
    222 {
    223     target_ulong mask = 0xff;
    224     target_ulong ra = 0;
    225     int i;
    226 
    227     for (i = 0; i < sizeof(target_ulong); i++) {
    228         if ((rs & mask) == (rb & mask)) {
    229             ra |= mask;
    230         }
    231         mask <<= 8;
    232     }
    233     return ra;
    234 }
    235 
    236 /* shift right arithmetic helper */
    237 target_ulong helper_sraw(CPUPPCState *env, target_ulong value,
    238                          target_ulong shift)
    239 {
    240     int32_t ret;
    241 
    242     if (likely(!(shift & 0x20))) {
    243         if (likely((uint32_t)shift != 0)) {
    244             shift &= 0x1f;
    245             ret = (int32_t)value >> shift;
    246             if (likely(ret >= 0 || (value & ((1 << shift) - 1)) == 0)) {
    247                 env->ca32 = env->ca = 0;
    248             } else {
    249                 env->ca32 = env->ca = 1;
    250             }
    251         } else {
    252             ret = (int32_t)value;
    253             env->ca32 = env->ca = 0;
    254         }
    255     } else {
    256         ret = (int32_t)value >> 31;
    257         env->ca32 = env->ca = (ret != 0);
    258     }
    259     return (target_long)ret;
    260 }
    261 
    262 #if defined(TARGET_PPC64)
    263 target_ulong helper_srad(CPUPPCState *env, target_ulong value,
    264                          target_ulong shift)
    265 {
    266     int64_t ret;
    267 
    268     if (likely(!(shift & 0x40))) {
    269         if (likely((uint64_t)shift != 0)) {
    270             shift &= 0x3f;
    271             ret = (int64_t)value >> shift;
    272             if (likely(ret >= 0 || (value & ((1ULL << shift) - 1)) == 0)) {
    273                 env->ca32 = env->ca = 0;
    274             } else {
    275                 env->ca32 = env->ca = 1;
    276             }
    277         } else {
    278             ret = (int64_t)value;
    279             env->ca32 = env->ca = 0;
    280         }
    281     } else {
    282         ret = (int64_t)value >> 63;
    283         env->ca32 = env->ca = (ret != 0);
    284     }
    285     return ret;
    286 }
    287 #endif
    288 
    289 #if defined(TARGET_PPC64)
    290 target_ulong helper_popcntb(target_ulong val)
    291 {
    292     /* Note that we don't fold past bytes */
    293     val = (val & 0x5555555555555555ULL) + ((val >>  1) &
    294                                            0x5555555555555555ULL);
    295     val = (val & 0x3333333333333333ULL) + ((val >>  2) &
    296                                            0x3333333333333333ULL);
    297     val = (val & 0x0f0f0f0f0f0f0f0fULL) + ((val >>  4) &
    298                                            0x0f0f0f0f0f0f0f0fULL);
    299     return val;
    300 }
    301 
    302 target_ulong helper_popcntw(target_ulong val)
    303 {
    304     /* Note that we don't fold past words.  */
    305     val = (val & 0x5555555555555555ULL) + ((val >>  1) &
    306                                            0x5555555555555555ULL);
    307     val = (val & 0x3333333333333333ULL) + ((val >>  2) &
    308                                            0x3333333333333333ULL);
    309     val = (val & 0x0f0f0f0f0f0f0f0fULL) + ((val >>  4) &
    310                                            0x0f0f0f0f0f0f0f0fULL);
    311     val = (val & 0x00ff00ff00ff00ffULL) + ((val >>  8) &
    312                                            0x00ff00ff00ff00ffULL);
    313     val = (val & 0x0000ffff0000ffffULL) + ((val >> 16) &
    314                                            0x0000ffff0000ffffULL);
    315     return val;
    316 }
    317 #else
    318 target_ulong helper_popcntb(target_ulong val)
    319 {
    320     /* Note that we don't fold past bytes */
    321     val = (val & 0x55555555) + ((val >>  1) & 0x55555555);
    322     val = (val & 0x33333333) + ((val >>  2) & 0x33333333);
    323     val = (val & 0x0f0f0f0f) + ((val >>  4) & 0x0f0f0f0f);
    324     return val;
    325 }
    326 #endif
    327 
    328 uint64_t helper_CFUGED(uint64_t src, uint64_t mask)
    329 {
    330     /*
    331      * Instead of processing the mask bit-by-bit from the most significant to
    332      * the least significant bit, as described in PowerISA, we'll handle it in
    333      * blocks of 'n' zeros/ones from LSB to MSB. To avoid the decision to use
    334      * ctz or cto, we negate the mask at the end of the loop.
    335      */
    336     target_ulong m, left = 0, right = 0;
    337     unsigned int n, i = 64;
    338     bool bit = false; /* tracks if we are processing zeros or ones */
    339 
    340     if (mask == 0 || mask == -1) {
    341         return src;
    342     }
    343 
    344     /* Processes the mask in blocks, from LSB to MSB */
    345     while (i) {
    346         /* Find how many bits we should take */
    347         n = ctz64(mask);
    348         if (n > i) {
    349             n = i;
    350         }
    351 
    352         /*
    353          * Extracts 'n' trailing bits of src and put them on the leading 'n'
    354          * bits of 'right' or 'left', pushing down the previously extracted
    355          * values.
    356          */
    357         m = (1ll << n) - 1;
    358         if (bit) {
    359             right = ror64(right | (src & m), n);
    360         } else {
    361             left = ror64(left | (src & m), n);
    362         }
    363 
    364         /*
    365          * Discards the processed bits from 'src' and 'mask'. Note that we are
    366          * removing 'n' trailing zeros from 'mask', but the logical shift will
    367          * add 'n' leading zeros back, so the population count of 'mask' is kept
    368          * the same.
    369          */
    370         src >>= n;
    371         mask >>= n;
    372         i -= n;
    373         bit = !bit;
    374         mask = ~mask;
    375     }
    376 
    377     /*
    378      * At the end, right was ror'ed ctpop(mask) times. To put it back in place,
    379      * we'll shift it more 64-ctpop(mask) times.
    380      */
    381     if (bit) {
    382         n = ctpop64(mask);
    383     } else {
    384         n = 64 - ctpop64(mask);
    385     }
    386 
    387     return left | (right >> n);
    388 }
    389 
    390 uint64_t helper_PDEPD(uint64_t src, uint64_t mask)
    391 {
    392     int i, o;
    393     uint64_t result = 0;
    394 
    395     if (mask == -1) {
    396         return src;
    397     }
    398 
    399     for (i = 0; mask != 0; i++) {
    400         o = ctz64(mask);
    401         mask &= mask - 1;
    402         result |= ((src >> i) & 1) << o;
    403     }
    404 
    405     return result;
    406 }
    407 
    408 uint64_t helper_PEXTD(uint64_t src, uint64_t mask)
    409 {
    410     int i, o;
    411     uint64_t result = 0;
    412 
    413     if (mask == -1) {
    414         return src;
    415     }
    416 
    417     for (o = 0; mask != 0; o++) {
    418         i = ctz64(mask);
    419         mask &= mask - 1;
    420         result |= ((src >> i) & 1) << o;
    421     }
    422 
    423     return result;
    424 }
    425 
    426 /*****************************************************************************/
    427 /* Altivec extension helpers */
    428 #if HOST_BIG_ENDIAN
    429 #define VECTOR_FOR_INORDER_I(index, element)                    \
    430     for (index = 0; index < ARRAY_SIZE(r->element); index++)
    431 #else
    432 #define VECTOR_FOR_INORDER_I(index, element)                    \
    433     for (index = ARRAY_SIZE(r->element) - 1; index >= 0; index--)
    434 #endif
    435 
    436 /* Saturating arithmetic helpers.  */
    437 #define SATCVT(from, to, from_type, to_type, min, max)          \
    438     static inline to_type cvt##from##to(from_type x, int *sat)  \
    439     {                                                           \
    440         to_type r;                                              \
    441                                                                 \
    442         if (x < (from_type)min) {                               \
    443             r = min;                                            \
    444             *sat = 1;                                           \
    445         } else if (x > (from_type)max) {                        \
    446             r = max;                                            \
    447             *sat = 1;                                           \
    448         } else {                                                \
    449             r = x;                                              \
    450         }                                                       \
    451         return r;                                               \
    452     }
    453 #define SATCVTU(from, to, from_type, to_type, min, max)         \
    454     static inline to_type cvt##from##to(from_type x, int *sat)  \
    455     {                                                           \
    456         to_type r;                                              \
    457                                                                 \
    458         if (x > (from_type)max) {                               \
    459             r = max;                                            \
    460             *sat = 1;                                           \
    461         } else {                                                \
    462             r = x;                                              \
    463         }                                                       \
    464         return r;                                               \
    465     }
    466 SATCVT(sh, sb, int16_t, int8_t, INT8_MIN, INT8_MAX)
    467 SATCVT(sw, sh, int32_t, int16_t, INT16_MIN, INT16_MAX)
    468 SATCVT(sd, sw, int64_t, int32_t, INT32_MIN, INT32_MAX)
    469 
    470 SATCVTU(uh, ub, uint16_t, uint8_t, 0, UINT8_MAX)
    471 SATCVTU(uw, uh, uint32_t, uint16_t, 0, UINT16_MAX)
    472 SATCVTU(ud, uw, uint64_t, uint32_t, 0, UINT32_MAX)
    473 SATCVT(sh, ub, int16_t, uint8_t, 0, UINT8_MAX)
    474 SATCVT(sw, uh, int32_t, uint16_t, 0, UINT16_MAX)
    475 SATCVT(sd, uw, int64_t, uint32_t, 0, UINT32_MAX)
    476 #undef SATCVT
    477 #undef SATCVTU
    478 
    479 void helper_mtvscr(CPUPPCState *env, uint32_t vscr)
    480 {
    481     ppc_store_vscr(env, vscr);
    482 }
    483 
    484 uint32_t helper_mfvscr(CPUPPCState *env)
    485 {
    486     return ppc_get_vscr(env);
    487 }
    488 
    489 static inline void set_vscr_sat(CPUPPCState *env)
    490 {
    491     /* The choice of non-zero value is arbitrary.  */
    492     env->vscr_sat.u32[0] = 1;
    493 }
    494 
    495 /* vprtybq */
    496 void helper_VPRTYBQ(ppc_avr_t *r, ppc_avr_t *b, uint32_t v)
    497 {
    498     uint64_t res = b->u64[0] ^ b->u64[1];
    499     res ^= res >> 32;
    500     res ^= res >> 16;
    501     res ^= res >> 8;
    502     r->VsrD(1) = res & 1;
    503     r->VsrD(0) = 0;
    504 }
    505 
    506 #define VARITHFP(suffix, func)                                          \
    507     void helper_v##suffix(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, \
    508                           ppc_avr_t *b)                                 \
    509     {                                                                   \
    510         int i;                                                          \
    511                                                                         \
    512         for (i = 0; i < ARRAY_SIZE(r->f32); i++) {                      \
    513             r->f32[i] = func(a->f32[i], b->f32[i], &env->vec_status);   \
    514         }                                                               \
    515     }
    516 VARITHFP(addfp, float32_add)
    517 VARITHFP(subfp, float32_sub)
    518 VARITHFP(minfp, float32_min)
    519 VARITHFP(maxfp, float32_max)
    520 #undef VARITHFP
    521 
    522 #define VARITHFPFMA(suffix, type)                                       \
    523     void helper_v##suffix(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, \
    524                            ppc_avr_t *b, ppc_avr_t *c)                  \
    525     {                                                                   \
    526         int i;                                                          \
    527         for (i = 0; i < ARRAY_SIZE(r->f32); i++) {                      \
    528             r->f32[i] = float32_muladd(a->f32[i], c->f32[i], b->f32[i], \
    529                                        type, &env->vec_status);         \
    530         }                                                               \
    531     }
    532 VARITHFPFMA(maddfp, 0);
    533 VARITHFPFMA(nmsubfp, float_muladd_negate_result | float_muladd_negate_c);
    534 #undef VARITHFPFMA
    535 
    536 #define VARITHSAT_CASE(type, op, cvt, element)                          \
    537     {                                                                   \
    538         type result = (type)a->element[i] op (type)b->element[i];       \
    539         r->element[i] = cvt(result, &sat);                              \
    540     }
    541 
    542 #define VARITHSAT_DO(name, op, optype, cvt, element)                    \
    543     void helper_v##name(ppc_avr_t *r, ppc_avr_t *vscr_sat,              \
    544                         ppc_avr_t *a, ppc_avr_t *b, uint32_t desc)      \
    545     {                                                                   \
    546         int sat = 0;                                                    \
    547         int i;                                                          \
    548                                                                         \
    549         for (i = 0; i < ARRAY_SIZE(r->element); i++) {                  \
    550             VARITHSAT_CASE(optype, op, cvt, element);                   \
    551         }                                                               \
    552         if (sat) {                                                      \
    553             vscr_sat->u32[0] = 1;                                       \
    554         }                                                               \
    555     }
    556 #define VARITHSAT_SIGNED(suffix, element, optype, cvt)          \
    557     VARITHSAT_DO(adds##suffix##s, +, optype, cvt, element)      \
    558     VARITHSAT_DO(subs##suffix##s, -, optype, cvt, element)
    559 #define VARITHSAT_UNSIGNED(suffix, element, optype, cvt)        \
    560     VARITHSAT_DO(addu##suffix##s, +, optype, cvt, element)      \
    561     VARITHSAT_DO(subu##suffix##s, -, optype, cvt, element)
    562 VARITHSAT_SIGNED(b, s8, int16_t, cvtshsb)
    563 VARITHSAT_SIGNED(h, s16, int32_t, cvtswsh)
    564 VARITHSAT_SIGNED(w, s32, int64_t, cvtsdsw)
    565 VARITHSAT_UNSIGNED(b, u8, uint16_t, cvtshub)
    566 VARITHSAT_UNSIGNED(h, u16, uint32_t, cvtswuh)
    567 VARITHSAT_UNSIGNED(w, u32, uint64_t, cvtsduw)
    568 #undef VARITHSAT_CASE
    569 #undef VARITHSAT_DO
    570 #undef VARITHSAT_SIGNED
    571 #undef VARITHSAT_UNSIGNED
    572 
    573 #define VAVG(name, element, etype)                                          \
    574     void helper_##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t v)\
    575     {                                                                       \
    576         int i;                                                              \
    577                                                                             \
    578         for (i = 0; i < ARRAY_SIZE(r->element); i++) {                      \
    579             etype x = (etype)a->element[i] + (etype)b->element[i] + 1;      \
    580             r->element[i] = x >> 1;                                         \
    581         }                                                                   \
    582     }
    583 
    584 VAVG(VAVGSB, s8, int16_t)
    585 VAVG(VAVGUB, u8, uint16_t)
    586 VAVG(VAVGSH, s16, int32_t)
    587 VAVG(VAVGUH, u16, uint32_t)
    588 VAVG(VAVGSW, s32, int64_t)
    589 VAVG(VAVGUW, u32, uint64_t)
    590 #undef VAVG
    591 
    592 #define VABSDU(name, element)                                           \
    593 void helper_##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t v)\
    594 {                                                                       \
    595     int i;                                                              \
    596                                                                         \
    597     for (i = 0; i < ARRAY_SIZE(r->element); i++) {                      \
    598         r->element[i] = (a->element[i] > b->element[i]) ?               \
    599             (a->element[i] - b->element[i]) :                           \
    600             (b->element[i] - a->element[i]);                            \
    601     }                                                                   \
    602 }
    603 
    604 /*
    605  * VABSDU - Vector absolute difference unsigned
    606  *   name    - instruction mnemonic suffix (b: byte, h: halfword, w: word)
    607  *   element - element type to access from vector
    608  */
    609 VABSDU(VABSDUB, u8)
    610 VABSDU(VABSDUH, u16)
    611 VABSDU(VABSDUW, u32)
    612 #undef VABSDU
    613 
    614 #define VCF(suffix, cvt, element)                                       \
    615     void helper_vcf##suffix(CPUPPCState *env, ppc_avr_t *r,             \
    616                             ppc_avr_t *b, uint32_t uim)                 \
    617     {                                                                   \
    618         int i;                                                          \
    619                                                                         \
    620         for (i = 0; i < ARRAY_SIZE(r->f32); i++) {                      \
    621             float32 t = cvt(b->element[i], &env->vec_status);           \
    622             r->f32[i] = float32_scalbn(t, -uim, &env->vec_status);      \
    623         }                                                               \
    624     }
    625 VCF(ux, uint32_to_float32, u32)
    626 VCF(sx, int32_to_float32, s32)
    627 #undef VCF
    628 
    629 #define VCMPNEZ(NAME, ELEM) \
    630 void helper_##NAME(ppc_vsr_t *t, ppc_vsr_t *a, ppc_vsr_t *b, uint32_t desc) \
    631 {                                                                           \
    632     for (int i = 0; i < ARRAY_SIZE(t->ELEM); i++) {                         \
    633         t->ELEM[i] = ((a->ELEM[i] == 0) || (b->ELEM[i] == 0) ||             \
    634                       (a->ELEM[i] != b->ELEM[i])) ? -1 : 0;                 \
    635     }                                                                       \
    636 }
    637 VCMPNEZ(VCMPNEZB, u8)
    638 VCMPNEZ(VCMPNEZH, u16)
    639 VCMPNEZ(VCMPNEZW, u32)
    640 #undef VCMPNEZ
    641 
    642 #define VCMPFP_DO(suffix, compare, order, record)                       \
    643     void helper_vcmp##suffix(CPUPPCState *env, ppc_avr_t *r,            \
    644                              ppc_avr_t *a, ppc_avr_t *b)                \
    645     {                                                                   \
    646         uint32_t ones = (uint32_t)-1;                                   \
    647         uint32_t all = ones;                                            \
    648         uint32_t none = 0;                                              \
    649         int i;                                                          \
    650                                                                         \
    651         for (i = 0; i < ARRAY_SIZE(r->f32); i++) {                      \
    652             uint32_t result;                                            \
    653             FloatRelation rel =                                         \
    654                 float32_compare_quiet(a->f32[i], b->f32[i],             \
    655                                       &env->vec_status);                \
    656             if (rel == float_relation_unordered) {                      \
    657                 result = 0;                                             \
    658             } else if (rel compare order) {                             \
    659                 result = ones;                                          \
    660             } else {                                                    \
    661                 result = 0;                                             \
    662             }                                                           \
    663             r->u32[i] = result;                                         \
    664             all &= result;                                              \
    665             none |= result;                                             \
    666         }                                                               \
    667         if (record) {                                                   \
    668             env->crf[6] = ((all != 0) << 3) | ((none == 0) << 1);       \
    669         }                                                               \
    670     }
    671 #define VCMPFP(suffix, compare, order)          \
    672     VCMPFP_DO(suffix, compare, order, 0)        \
    673     VCMPFP_DO(suffix##_dot, compare, order, 1)
    674 VCMPFP(eqfp, ==, float_relation_equal)
    675 VCMPFP(gefp, !=, float_relation_less)
    676 VCMPFP(gtfp, ==, float_relation_greater)
    677 #undef VCMPFP_DO
    678 #undef VCMPFP
    679 
    680 static inline void vcmpbfp_internal(CPUPPCState *env, ppc_avr_t *r,
    681                                     ppc_avr_t *a, ppc_avr_t *b, int record)
    682 {
    683     int i;
    684     int all_in = 0;
    685 
    686     for (i = 0; i < ARRAY_SIZE(r->f32); i++) {
    687         FloatRelation le_rel = float32_compare_quiet(a->f32[i], b->f32[i],
    688                                                      &env->vec_status);
    689         if (le_rel == float_relation_unordered) {
    690             r->u32[i] = 0xc0000000;
    691             all_in = 1;
    692         } else {
    693             float32 bneg = float32_chs(b->f32[i]);
    694             FloatRelation ge_rel = float32_compare_quiet(a->f32[i], bneg,
    695                                                          &env->vec_status);
    696             int le = le_rel != float_relation_greater;
    697             int ge = ge_rel != float_relation_less;
    698 
    699             r->u32[i] = ((!le) << 31) | ((!ge) << 30);
    700             all_in |= (!le | !ge);
    701         }
    702     }
    703     if (record) {
    704         env->crf[6] = (all_in == 0) << 1;
    705     }
    706 }
    707 
    708 void helper_vcmpbfp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
    709 {
    710     vcmpbfp_internal(env, r, a, b, 0);
    711 }
    712 
    713 void helper_vcmpbfp_dot(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
    714                         ppc_avr_t *b)
    715 {
    716     vcmpbfp_internal(env, r, a, b, 1);
    717 }
    718 
    719 #define VCT(suffix, satcvt, element)                                    \
    720     void helper_vct##suffix(CPUPPCState *env, ppc_avr_t *r,             \
    721                             ppc_avr_t *b, uint32_t uim)                 \
    722     {                                                                   \
    723         int i;                                                          \
    724         int sat = 0;                                                    \
    725         float_status s = env->vec_status;                               \
    726                                                                         \
    727         set_float_rounding_mode(float_round_to_zero, &s);               \
    728         for (i = 0; i < ARRAY_SIZE(r->f32); i++) {                      \
    729             if (float32_is_any_nan(b->f32[i])) {                        \
    730                 r->element[i] = 0;                                      \
    731             } else {                                                    \
    732                 float64 t = float32_to_float64(b->f32[i], &s);          \
    733                 int64_t j;                                              \
    734                                                                         \
    735                 t = float64_scalbn(t, uim, &s);                         \
    736                 j = float64_to_int64(t, &s);                            \
    737                 r->element[i] = satcvt(j, &sat);                        \
    738             }                                                           \
    739         }                                                               \
    740         if (sat) {                                                      \
    741             set_vscr_sat(env);                                          \
    742         }                                                               \
    743     }
    744 VCT(uxs, cvtsduw, u32)
    745 VCT(sxs, cvtsdsw, s32)
    746 #undef VCT
    747 
    748 typedef int64_t do_ger(uint32_t, uint32_t, uint32_t);
    749 
    750 static int64_t ger_rank8(uint32_t a, uint32_t b, uint32_t mask)
    751 {
    752     int64_t psum = 0;
    753     for (int i = 0; i < 8; i++, mask >>= 1) {
    754         if (mask & 1) {
    755             psum += (int64_t)sextract32(a, 4 * i, 4) * sextract32(b, 4 * i, 4);
    756         }
    757     }
    758     return psum;
    759 }
    760 
    761 static int64_t ger_rank4(uint32_t a, uint32_t b, uint32_t mask)
    762 {
    763     int64_t psum = 0;
    764     for (int i = 0; i < 4; i++, mask >>= 1) {
    765         if (mask & 1) {
    766             psum += sextract32(a, 8 * i, 8) * (int64_t)extract32(b, 8 * i, 8);
    767         }
    768     }
    769     return psum;
    770 }
    771 
    772 static int64_t ger_rank2(uint32_t a, uint32_t b, uint32_t mask)
    773 {
    774     int64_t psum = 0;
    775     for (int i = 0; i < 2; i++, mask >>= 1) {
    776         if (mask & 1) {
    777             psum += (int64_t)sextract32(a, 16 * i, 16) *
    778                              sextract32(b, 16 * i, 16);
    779         }
    780     }
    781     return psum;
    782 }
    783 
    784 static void xviger(CPUPPCState *env, ppc_vsr_t *a, ppc_vsr_t *b, ppc_acc_t  *at,
    785                    uint32_t mask, bool sat, bool acc, do_ger ger)
    786 {
    787     uint8_t pmsk = FIELD_EX32(mask, GER_MSK, PMSK),
    788             xmsk = FIELD_EX32(mask, GER_MSK, XMSK),
    789             ymsk = FIELD_EX32(mask, GER_MSK, YMSK);
    790     uint8_t xmsk_bit, ymsk_bit;
    791     int64_t psum;
    792     int i, j;
    793     for (i = 0, xmsk_bit = 1 << 3; i < 4; i++, xmsk_bit >>= 1) {
    794         for (j = 0, ymsk_bit = 1 << 3; j < 4; j++, ymsk_bit >>= 1) {
    795             if ((xmsk_bit & xmsk) && (ymsk_bit & ymsk)) {
    796                 psum = ger(a->VsrW(i), b->VsrW(j), pmsk);
    797                 if (acc) {
    798                     psum += at[i].VsrSW(j);
    799                 }
    800                 if (sat && psum > INT32_MAX) {
    801                     set_vscr_sat(env);
    802                     at[i].VsrSW(j) = INT32_MAX;
    803                 } else if (sat && psum < INT32_MIN) {
    804                     set_vscr_sat(env);
    805                     at[i].VsrSW(j) = INT32_MIN;
    806                 } else {
    807                     at[i].VsrSW(j) = (int32_t) psum;
    808                 }
    809             } else {
    810                 at[i].VsrSW(j) = 0;
    811             }
    812         }
    813     }
    814 }
    815 
    816 QEMU_FLATTEN
    817 void helper_XVI4GER8(CPUPPCState *env, ppc_vsr_t *a, ppc_vsr_t *b,
    818                      ppc_acc_t *at, uint32_t mask)
    819 {
    820     xviger(env, a, b, at, mask, false, false, ger_rank8);
    821 }
    822 
    823 QEMU_FLATTEN
    824 void helper_XVI4GER8PP(CPUPPCState *env, ppc_vsr_t *a, ppc_vsr_t *b,
    825                        ppc_acc_t *at, uint32_t mask)
    826 {
    827     xviger(env, a, b, at, mask, false, true, ger_rank8);
    828 }
    829 
    830 QEMU_FLATTEN
    831 void helper_XVI8GER4(CPUPPCState *env, ppc_vsr_t *a, ppc_vsr_t *b,
    832                      ppc_acc_t *at, uint32_t mask)
    833 {
    834     xviger(env, a, b, at, mask, false, false, ger_rank4);
    835 }
    836 
    837 QEMU_FLATTEN
    838 void helper_XVI8GER4PP(CPUPPCState *env, ppc_vsr_t *a, ppc_vsr_t *b,
    839                        ppc_acc_t *at, uint32_t mask)
    840 {
    841     xviger(env, a, b, at, mask, false, true, ger_rank4);
    842 }
    843 
    844 QEMU_FLATTEN
    845 void helper_XVI8GER4SPP(CPUPPCState *env, ppc_vsr_t *a, ppc_vsr_t *b,
    846                         ppc_acc_t *at, uint32_t mask)
    847 {
    848     xviger(env, a, b, at, mask, true, true, ger_rank4);
    849 }
    850 
    851 QEMU_FLATTEN
    852 void helper_XVI16GER2(CPUPPCState *env, ppc_vsr_t *a, ppc_vsr_t *b,
    853                       ppc_acc_t *at, uint32_t mask)
    854 {
    855     xviger(env, a, b, at, mask, false, false, ger_rank2);
    856 }
    857 
    858 QEMU_FLATTEN
    859 void helper_XVI16GER2S(CPUPPCState *env, ppc_vsr_t *a, ppc_vsr_t *b,
    860                        ppc_acc_t *at, uint32_t mask)
    861 {
    862     xviger(env, a, b, at, mask, true, false, ger_rank2);
    863 }
    864 
    865 QEMU_FLATTEN
    866 void helper_XVI16GER2PP(CPUPPCState *env, ppc_vsr_t *a, ppc_vsr_t *b,
    867                         ppc_acc_t *at, uint32_t mask)
    868 {
    869     xviger(env, a, b, at, mask, false, true, ger_rank2);
    870 }
    871 
    872 QEMU_FLATTEN
    873 void helper_XVI16GER2SPP(CPUPPCState *env, ppc_vsr_t *a, ppc_vsr_t *b,
    874                          ppc_acc_t *at, uint32_t mask)
    875 {
    876     xviger(env, a, b, at, mask, true, true, ger_rank2);
    877 }
    878 
    879 target_ulong helper_vclzlsbb(ppc_avr_t *r)
    880 {
    881     target_ulong count = 0;
    882     int i;
    883     for (i = 0; i < ARRAY_SIZE(r->u8); i++) {
    884         if (r->VsrB(i) & 0x01) {
    885             break;
    886         }
    887         count++;
    888     }
    889     return count;
    890 }
    891 
    892 target_ulong helper_vctzlsbb(ppc_avr_t *r)
    893 {
    894     target_ulong count = 0;
    895     int i;
    896     for (i = ARRAY_SIZE(r->u8) - 1; i >= 0; i--) {
    897         if (r->VsrB(i) & 0x01) {
    898             break;
    899         }
    900         count++;
    901     }
    902     return count;
    903 }
    904 
    905 void helper_VMHADDSHS(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
    906                       ppc_avr_t *b, ppc_avr_t *c)
    907 {
    908     int sat = 0;
    909     int i;
    910 
    911     for (i = 0; i < ARRAY_SIZE(r->s16); i++) {
    912         int32_t prod = a->s16[i] * b->s16[i];
    913         int32_t t = (int32_t)c->s16[i] + (prod >> 15);
    914 
    915         r->s16[i] = cvtswsh(t, &sat);
    916     }
    917 
    918     if (sat) {
    919         set_vscr_sat(env);
    920     }
    921 }
    922 
    923 void helper_VMHRADDSHS(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
    924                        ppc_avr_t *b, ppc_avr_t *c)
    925 {
    926     int sat = 0;
    927     int i;
    928 
    929     for (i = 0; i < ARRAY_SIZE(r->s16); i++) {
    930         int32_t prod = a->s16[i] * b->s16[i] + 0x00004000;
    931         int32_t t = (int32_t)c->s16[i] + (prod >> 15);
    932         r->s16[i] = cvtswsh(t, &sat);
    933     }
    934 
    935     if (sat) {
    936         set_vscr_sat(env);
    937     }
    938 }
    939 
    940 void helper_VMLADDUHM(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c,
    941                       uint32_t v)
    942 {
    943     int i;
    944 
    945     for (i = 0; i < ARRAY_SIZE(r->s16); i++) {
    946         int32_t prod = a->s16[i] * b->s16[i];
    947         r->s16[i] = (int16_t) (prod + c->s16[i]);
    948     }
    949 }
    950 
    951 #define VMRG_DO(name, element, access, ofs)                                  \
    952     void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)            \
    953     {                                                                        \
    954         ppc_avr_t result;                                                    \
    955         int i, half = ARRAY_SIZE(r->element) / 2;                            \
    956                                                                              \
    957         for (i = 0; i < half; i++) {                                         \
    958             result.access(i * 2 + 0) = a->access(i + ofs);                   \
    959             result.access(i * 2 + 1) = b->access(i + ofs);                   \
    960         }                                                                    \
    961         *r = result;                                                         \
    962     }
    963 
    964 #define VMRG(suffix, element, access)          \
    965     VMRG_DO(mrgl##suffix, element, access, half)   \
    966     VMRG_DO(mrgh##suffix, element, access, 0)
    967 VMRG(b, u8, VsrB)
    968 VMRG(h, u16, VsrH)
    969 VMRG(w, u32, VsrW)
    970 #undef VMRG_DO
    971 #undef VMRG
    972 
    973 void helper_VMSUMMBM(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
    974 {
    975     int32_t prod[16];
    976     int i;
    977 
    978     for (i = 0; i < ARRAY_SIZE(r->s8); i++) {
    979         prod[i] = (int32_t)a->s8[i] * b->u8[i];
    980     }
    981 
    982     VECTOR_FOR_INORDER_I(i, s32) {
    983         r->s32[i] = c->s32[i] + prod[4 * i] + prod[4 * i + 1] +
    984             prod[4 * i + 2] + prod[4 * i + 3];
    985     }
    986 }
    987 
    988 void helper_VMSUMSHM(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
    989 {
    990     int32_t prod[8];
    991     int i;
    992 
    993     for (i = 0; i < ARRAY_SIZE(r->s16); i++) {
    994         prod[i] = a->s16[i] * b->s16[i];
    995     }
    996 
    997     VECTOR_FOR_INORDER_I(i, s32) {
    998         r->s32[i] = c->s32[i] + prod[2 * i] + prod[2 * i + 1];
    999     }
   1000 }
   1001 
   1002 void helper_VMSUMSHS(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
   1003                      ppc_avr_t *b, ppc_avr_t *c)
   1004 {
   1005     int32_t prod[8];
   1006     int i;
   1007     int sat = 0;
   1008 
   1009     for (i = 0; i < ARRAY_SIZE(r->s16); i++) {
   1010         prod[i] = (int32_t)a->s16[i] * b->s16[i];
   1011     }
   1012 
   1013     VECTOR_FOR_INORDER_I(i, s32) {
   1014         int64_t t = (int64_t)c->s32[i] + prod[2 * i] + prod[2 * i + 1];
   1015 
   1016         r->u32[i] = cvtsdsw(t, &sat);
   1017     }
   1018 
   1019     if (sat) {
   1020         set_vscr_sat(env);
   1021     }
   1022 }
   1023 
   1024 void helper_VMSUMUBM(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
   1025 {
   1026     uint16_t prod[16];
   1027     int i;
   1028 
   1029     for (i = 0; i < ARRAY_SIZE(r->u8); i++) {
   1030         prod[i] = a->u8[i] * b->u8[i];
   1031     }
   1032 
   1033     VECTOR_FOR_INORDER_I(i, u32) {
   1034         r->u32[i] = c->u32[i] + prod[4 * i] + prod[4 * i + 1] +
   1035             prod[4 * i + 2] + prod[4 * i + 3];
   1036     }
   1037 }
   1038 
   1039 void helper_VMSUMUHM(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
   1040 {
   1041     uint32_t prod[8];
   1042     int i;
   1043 
   1044     for (i = 0; i < ARRAY_SIZE(r->u16); i++) {
   1045         prod[i] = a->u16[i] * b->u16[i];
   1046     }
   1047 
   1048     VECTOR_FOR_INORDER_I(i, u32) {
   1049         r->u32[i] = c->u32[i] + prod[2 * i] + prod[2 * i + 1];
   1050     }
   1051 }
   1052 
   1053 void helper_VMSUMUHS(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
   1054                      ppc_avr_t *b, ppc_avr_t *c)
   1055 {
   1056     uint32_t prod[8];
   1057     int i;
   1058     int sat = 0;
   1059 
   1060     for (i = 0; i < ARRAY_SIZE(r->u16); i++) {
   1061         prod[i] = a->u16[i] * b->u16[i];
   1062     }
   1063 
   1064     VECTOR_FOR_INORDER_I(i, s32) {
   1065         uint64_t t = (uint64_t)c->u32[i] + prod[2 * i] + prod[2 * i + 1];
   1066 
   1067         r->u32[i] = cvtuduw(t, &sat);
   1068     }
   1069 
   1070     if (sat) {
   1071         set_vscr_sat(env);
   1072     }
   1073 }
   1074 
   1075 #define VMUL_DO_EVN(name, mul_element, mul_access, prod_access, cast)   \
   1076     void helper_V##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)       \
   1077     {                                                                   \
   1078         int i;                                                          \
   1079                                                                         \
   1080         for (i = 0; i < ARRAY_SIZE(r->mul_element); i += 2) {           \
   1081             r->prod_access(i >> 1) = (cast)a->mul_access(i) *           \
   1082                                      (cast)b->mul_access(i);            \
   1083         }                                                               \
   1084     }
   1085 
   1086 #define VMUL_DO_ODD(name, mul_element, mul_access, prod_access, cast)   \
   1087     void helper_V##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)       \
   1088     {                                                                   \
   1089         int i;                                                          \
   1090                                                                         \
   1091         for (i = 0; i < ARRAY_SIZE(r->mul_element); i += 2) {           \
   1092             r->prod_access(i >> 1) = (cast)a->mul_access(i + 1) *       \
   1093                                      (cast)b->mul_access(i + 1);        \
   1094         }                                                               \
   1095     }
   1096 
   1097 #define VMUL(suffix, mul_element, mul_access, prod_access, cast)       \
   1098     VMUL_DO_EVN(MULE##suffix, mul_element, mul_access, prod_access, cast)  \
   1099     VMUL_DO_ODD(MULO##suffix, mul_element, mul_access, prod_access, cast)
   1100 VMUL(SB, s8, VsrSB, VsrSH, int16_t)
   1101 VMUL(SH, s16, VsrSH, VsrSW, int32_t)
   1102 VMUL(SW, s32, VsrSW, VsrSD, int64_t)
   1103 VMUL(UB, u8, VsrB, VsrH, uint16_t)
   1104 VMUL(UH, u16, VsrH, VsrW, uint32_t)
   1105 VMUL(UW, u32, VsrW, VsrD, uint64_t)
   1106 #undef VMUL_DO_EVN
   1107 #undef VMUL_DO_ODD
   1108 #undef VMUL
   1109 
   1110 void helper_XXPERMX(ppc_vsr_t *t, ppc_vsr_t *s0, ppc_vsr_t *s1, ppc_vsr_t *pcv,
   1111                     target_ulong uim)
   1112 {
   1113     int i, idx;
   1114     ppc_vsr_t tmp = { .u64 = {0, 0} };
   1115 
   1116     for (i = 0; i < ARRAY_SIZE(t->u8); i++) {
   1117         if ((pcv->VsrB(i) >> 5) == uim) {
   1118             idx = pcv->VsrB(i) & 0x1f;
   1119             if (idx < ARRAY_SIZE(t->u8)) {
   1120                 tmp.VsrB(i) = s0->VsrB(idx);
   1121             } else {
   1122                 tmp.VsrB(i) = s1->VsrB(idx - ARRAY_SIZE(t->u8));
   1123             }
   1124         }
   1125     }
   1126 
   1127     *t = tmp;
   1128 }
   1129 
   1130 void helper_VDIVSQ(ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b)
   1131 {
   1132     Int128 neg1 = int128_makes64(-1);
   1133     Int128 int128_min = int128_make128(0, INT64_MIN);
   1134     if (likely(int128_nz(b->s128) &&
   1135               (int128_ne(a->s128, int128_min) || int128_ne(b->s128, neg1)))) {
   1136         t->s128 = int128_divs(a->s128, b->s128);
   1137     } else {
   1138         t->s128 = a->s128; /* Undefined behavior */
   1139     }
   1140 }
   1141 
   1142 void helper_VDIVUQ(ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b)
   1143 {
   1144     if (int128_nz(b->s128)) {
   1145         t->s128 = int128_divu(a->s128, b->s128);
   1146     } else {
   1147         t->s128 = a->s128; /* Undefined behavior */
   1148     }
   1149 }
   1150 
   1151 void helper_VDIVESD(ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b)
   1152 {
   1153     int i;
   1154     int64_t high;
   1155     uint64_t low;
   1156     for (i = 0; i < 2; i++) {
   1157         high = a->s64[i];
   1158         low = 0;
   1159         if (unlikely((high == INT64_MIN && b->s64[i] == -1) || !b->s64[i])) {
   1160             t->s64[i] = a->s64[i]; /* Undefined behavior */
   1161         } else {
   1162             divs128(&low, &high, b->s64[i]);
   1163             t->s64[i] = low;
   1164         }
   1165     }
   1166 }
   1167 
   1168 void helper_VDIVEUD(ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b)
   1169 {
   1170     int i;
   1171     uint64_t high, low;
   1172     for (i = 0; i < 2; i++) {
   1173         high = a->u64[i];
   1174         low = 0;
   1175         if (unlikely(!b->u64[i])) {
   1176             t->u64[i] = a->u64[i]; /* Undefined behavior */
   1177         } else {
   1178             divu128(&low, &high, b->u64[i]);
   1179             t->u64[i] = low;
   1180         }
   1181     }
   1182 }
   1183 
   1184 void helper_VDIVESQ(ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b)
   1185 {
   1186     Int128 high, low;
   1187     Int128 int128_min = int128_make128(0, INT64_MIN);
   1188     Int128 neg1 = int128_makes64(-1);
   1189 
   1190     high = a->s128;
   1191     low = int128_zero();
   1192     if (unlikely(!int128_nz(b->s128) ||
   1193                  (int128_eq(b->s128, neg1) && int128_eq(high, int128_min)))) {
   1194         t->s128 = a->s128; /* Undefined behavior */
   1195     } else {
   1196         divs256(&low, &high, b->s128);
   1197         t->s128 = low;
   1198     }
   1199 }
   1200 
   1201 void helper_VDIVEUQ(ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b)
   1202 {
   1203     Int128 high, low;
   1204 
   1205     high = a->s128;
   1206     low = int128_zero();
   1207     if (unlikely(!int128_nz(b->s128))) {
   1208         t->s128 = a->s128; /* Undefined behavior */
   1209     } else {
   1210         divu256(&low, &high, b->s128);
   1211         t->s128 = low;
   1212     }
   1213 }
   1214 
   1215 void helper_VMODSQ(ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b)
   1216 {
   1217     Int128 neg1 = int128_makes64(-1);
   1218     Int128 int128_min = int128_make128(0, INT64_MIN);
   1219     if (likely(int128_nz(b->s128) &&
   1220               (int128_ne(a->s128, int128_min) || int128_ne(b->s128, neg1)))) {
   1221         t->s128 = int128_rems(a->s128, b->s128);
   1222     } else {
   1223         t->s128 = int128_zero(); /* Undefined behavior */
   1224     }
   1225 }
   1226 
   1227 void helper_VMODUQ(ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b)
   1228 {
   1229     if (likely(int128_nz(b->s128))) {
   1230         t->s128 = int128_remu(a->s128, b->s128);
   1231     } else {
   1232         t->s128 = int128_zero(); /* Undefined behavior */
   1233     }
   1234 }
   1235 
   1236 void helper_VPERM(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
   1237 {
   1238     ppc_avr_t result;
   1239     int i;
   1240 
   1241     for (i = 0; i < ARRAY_SIZE(r->u8); i++) {
   1242         int s = c->VsrB(i) & 0x1f;
   1243         int index = s & 0xf;
   1244 
   1245         if (s & 0x10) {
   1246             result.VsrB(i) = b->VsrB(index);
   1247         } else {
   1248             result.VsrB(i) = a->VsrB(index);
   1249         }
   1250     }
   1251     *r = result;
   1252 }
   1253 
   1254 void helper_VPERMR(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
   1255 {
   1256     ppc_avr_t result;
   1257     int i;
   1258 
   1259     for (i = 0; i < ARRAY_SIZE(r->u8); i++) {
   1260         int s = c->VsrB(i) & 0x1f;
   1261         int index = 15 - (s & 0xf);
   1262 
   1263         if (s & 0x10) {
   1264             result.VsrB(i) = a->VsrB(index);
   1265         } else {
   1266             result.VsrB(i) = b->VsrB(index);
   1267         }
   1268     }
   1269     *r = result;
   1270 }
   1271 
   1272 #define XXGENPCV_BE_EXP(NAME, SZ) \
   1273 void glue(helper_, glue(NAME, _be_exp))(ppc_vsr_t *t, ppc_vsr_t *b) \
   1274 {                                                                   \
   1275     ppc_vsr_t tmp;                                                  \
   1276                                                                     \
   1277     /* Initialize tmp with the result of an all-zeros mask */       \
   1278     tmp.VsrD(0) = 0x1011121314151617;                               \
   1279     tmp.VsrD(1) = 0x18191A1B1C1D1E1F;                               \
   1280                                                                     \
   1281     /* Iterate over the most significant byte of each element */    \
   1282     for (int i = 0, j = 0; i < ARRAY_SIZE(b->u8); i += SZ) {        \
   1283         if (b->VsrB(i) & 0x80) {                                    \
   1284             /* Update each byte of the element */                   \
   1285             for (int k = 0; k < SZ; k++) {                          \
   1286                 tmp.VsrB(i + k) = j + k;                            \
   1287             }                                                       \
   1288             j += SZ;                                                \
   1289         }                                                           \
   1290     }                                                               \
   1291                                                                     \
   1292     *t = tmp;                                                       \
   1293 }
   1294 
   1295 #define XXGENPCV_BE_COMP(NAME, SZ) \
   1296 void glue(helper_, glue(NAME, _be_comp))(ppc_vsr_t *t, ppc_vsr_t *b)\
   1297 {                                                                   \
   1298     ppc_vsr_t tmp = { .u64 = { 0, 0 } };                            \
   1299                                                                     \
   1300     /* Iterate over the most significant byte of each element */    \
   1301     for (int i = 0, j = 0; i < ARRAY_SIZE(b->u8); i += SZ) {        \
   1302         if (b->VsrB(i) & 0x80) {                                    \
   1303             /* Update each byte of the element */                   \
   1304             for (int k = 0; k < SZ; k++) {                          \
   1305                 tmp.VsrB(j + k) = i + k;                            \
   1306             }                                                       \
   1307             j += SZ;                                                \
   1308         }                                                           \
   1309     }                                                               \
   1310                                                                     \
   1311     *t = tmp;                                                       \
   1312 }
   1313 
   1314 #define XXGENPCV_LE_EXP(NAME, SZ) \
   1315 void glue(helper_, glue(NAME, _le_exp))(ppc_vsr_t *t, ppc_vsr_t *b) \
   1316 {                                                                   \
   1317     ppc_vsr_t tmp;                                                  \
   1318                                                                     \
   1319     /* Initialize tmp with the result of an all-zeros mask */       \
   1320     tmp.VsrD(0) = 0x1F1E1D1C1B1A1918;                               \
   1321     tmp.VsrD(1) = 0x1716151413121110;                               \
   1322                                                                     \
   1323     /* Iterate over the most significant byte of each element */    \
   1324     for (int i = 0, j = 0; i < ARRAY_SIZE(b->u8); i += SZ) {        \
   1325         /* Reverse indexing of "i" */                               \
   1326         const int idx = ARRAY_SIZE(b->u8) - i - SZ;                 \
   1327         if (b->VsrB(idx) & 0x80) {                                  \
   1328             /* Update each byte of the element */                   \
   1329             for (int k = 0, rk = SZ - 1; k < SZ; k++, rk--) {       \
   1330                 tmp.VsrB(idx + rk) = j + k;                         \
   1331             }                                                       \
   1332             j += SZ;                                                \
   1333         }                                                           \
   1334     }                                                               \
   1335                                                                     \
   1336     *t = tmp;                                                       \
   1337 }
   1338 
   1339 #define XXGENPCV_LE_COMP(NAME, SZ) \
   1340 void glue(helper_, glue(NAME, _le_comp))(ppc_vsr_t *t, ppc_vsr_t *b)\
   1341 {                                                                   \
   1342     ppc_vsr_t tmp = { .u64 = { 0, 0 } };                            \
   1343                                                                     \
   1344     /* Iterate over the most significant byte of each element */    \
   1345     for (int i = 0, j = 0; i < ARRAY_SIZE(b->u8); i += SZ) {        \
   1346         if (b->VsrB(ARRAY_SIZE(b->u8) - i - SZ) & 0x80) {           \
   1347             /* Update each byte of the element */                   \
   1348             for (int k = 0, rk = SZ - 1; k < SZ; k++, rk--) {       \
   1349                 /* Reverse indexing of "j" */                       \
   1350                 const int idx = ARRAY_SIZE(b->u8) - j - SZ;         \
   1351                 tmp.VsrB(idx + rk) = i + k;                         \
   1352             }                                                       \
   1353             j += SZ;                                                \
   1354         }                                                           \
   1355     }                                                               \
   1356                                                                     \
   1357     *t = tmp;                                                       \
   1358 }
   1359 
   1360 #define XXGENPCV(NAME, SZ) \
   1361     XXGENPCV_BE_EXP(NAME, SZ)  \
   1362     XXGENPCV_BE_COMP(NAME, SZ) \
   1363     XXGENPCV_LE_EXP(NAME, SZ)  \
   1364     XXGENPCV_LE_COMP(NAME, SZ) \
   1365 
   1366 XXGENPCV(XXGENPCVBM, 1)
   1367 XXGENPCV(XXGENPCVHM, 2)
   1368 XXGENPCV(XXGENPCVWM, 4)
   1369 XXGENPCV(XXGENPCVDM, 8)
   1370 
   1371 #undef XXGENPCV_BE_EXP
   1372 #undef XXGENPCV_BE_COMP
   1373 #undef XXGENPCV_LE_EXP
   1374 #undef XXGENPCV_LE_COMP
   1375 #undef XXGENPCV
   1376 
   1377 #if HOST_BIG_ENDIAN
   1378 #define VBPERMQ_INDEX(avr, i) ((avr)->u8[(i)])
   1379 #define VBPERMD_INDEX(i) (i)
   1380 #define VBPERMQ_DW(index) (((index) & 0x40) != 0)
   1381 #else
   1382 #define VBPERMQ_INDEX(avr, i) ((avr)->u8[15 - (i)])
   1383 #define VBPERMD_INDEX(i) (1 - i)
   1384 #define VBPERMQ_DW(index) (((index) & 0x40) == 0)
   1385 #endif
   1386 #define EXTRACT_BIT(avr, i, index) \
   1387         (extract64((avr)->VsrD(i), 63 - index, 1))
   1388 
   1389 void helper_vbpermd(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
   1390 {
   1391     int i, j;
   1392     ppc_avr_t result = { .u64 = { 0, 0 } };
   1393     VECTOR_FOR_INORDER_I(i, u64) {
   1394         for (j = 0; j < 8; j++) {
   1395             int index = VBPERMQ_INDEX(b, (i * 8) + j);
   1396             if (index < 64 && EXTRACT_BIT(a, i, index)) {
   1397                 result.u64[VBPERMD_INDEX(i)] |= (0x80 >> j);
   1398             }
   1399         }
   1400     }
   1401     *r = result;
   1402 }
   1403 
   1404 void helper_vbpermq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
   1405 {
   1406     int i;
   1407     uint64_t perm = 0;
   1408 
   1409     VECTOR_FOR_INORDER_I(i, u8) {
   1410         int index = VBPERMQ_INDEX(b, i);
   1411 
   1412         if (index < 128) {
   1413             uint64_t mask = (1ull << (63 - (index & 0x3F)));
   1414             if (a->u64[VBPERMQ_DW(index)] & mask) {
   1415                 perm |= (0x8000 >> i);
   1416             }
   1417         }
   1418     }
   1419 
   1420     r->VsrD(0) = perm;
   1421     r->VsrD(1) = 0;
   1422 }
   1423 
   1424 #undef VBPERMQ_INDEX
   1425 #undef VBPERMQ_DW
   1426 
   1427 #define PMSUM(name, srcfld, trgfld, trgtyp)                   \
   1428 void helper_##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)  \
   1429 {                                                             \
   1430     int i, j;                                                 \
   1431     trgtyp prod[sizeof(ppc_avr_t) / sizeof(a->srcfld[0])];    \
   1432                                                               \
   1433     VECTOR_FOR_INORDER_I(i, srcfld) {                         \
   1434         prod[i] = 0;                                          \
   1435         for (j = 0; j < sizeof(a->srcfld[0]) * 8; j++) {      \
   1436             if (a->srcfld[i] & (1ull << j)) {                 \
   1437                 prod[i] ^= ((trgtyp)b->srcfld[i] << j);       \
   1438             }                                                 \
   1439         }                                                     \
   1440     }                                                         \
   1441                                                               \
   1442     VECTOR_FOR_INORDER_I(i, trgfld) {                         \
   1443         r->trgfld[i] = prod[2 * i] ^ prod[2 * i + 1];         \
   1444     }                                                         \
   1445 }
   1446 
   1447 PMSUM(vpmsumb, u8, u16, uint16_t)
   1448 PMSUM(vpmsumh, u16, u32, uint32_t)
   1449 PMSUM(vpmsumw, u32, u64, uint64_t)
   1450 
   1451 void helper_VPMSUMD(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
   1452 {
   1453     int i, j;
   1454     Int128 tmp, prod[2] = {int128_zero(), int128_zero()};
   1455 
   1456     for (j = 0; j < 64; j++) {
   1457         for (i = 0; i < ARRAY_SIZE(r->u64); i++) {
   1458             if (a->VsrD(i) & (1ull << j)) {
   1459                 tmp = int128_make64(b->VsrD(i));
   1460                 tmp = int128_lshift(tmp, j);
   1461                 prod[i] = int128_xor(prod[i], tmp);
   1462             }
   1463         }
   1464     }
   1465 
   1466     r->s128 = int128_xor(prod[0], prod[1]);
   1467 }
   1468 
   1469 #if HOST_BIG_ENDIAN
   1470 #define PKBIG 1
   1471 #else
   1472 #define PKBIG 0
   1473 #endif
   1474 void helper_vpkpx(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
   1475 {
   1476     int i, j;
   1477     ppc_avr_t result;
   1478 #if HOST_BIG_ENDIAN
   1479     const ppc_avr_t *x[2] = { a, b };
   1480 #else
   1481     const ppc_avr_t *x[2] = { b, a };
   1482 #endif
   1483 
   1484     VECTOR_FOR_INORDER_I(i, u64) {
   1485         VECTOR_FOR_INORDER_I(j, u32) {
   1486             uint32_t e = x[i]->u32[j];
   1487 
   1488             result.u16[4 * i + j] = (((e >> 9) & 0xfc00) |
   1489                                      ((e >> 6) & 0x3e0) |
   1490                                      ((e >> 3) & 0x1f));
   1491         }
   1492     }
   1493     *r = result;
   1494 }
   1495 
   1496 #define VPK(suffix, from, to, cvt, dosat)                               \
   1497     void helper_vpk##suffix(CPUPPCState *env, ppc_avr_t *r,             \
   1498                             ppc_avr_t *a, ppc_avr_t *b)                 \
   1499     {                                                                   \
   1500         int i;                                                          \
   1501         int sat = 0;                                                    \
   1502         ppc_avr_t result;                                               \
   1503         ppc_avr_t *a0 = PKBIG ? a : b;                                  \
   1504         ppc_avr_t *a1 = PKBIG ? b : a;                                  \
   1505                                                                         \
   1506         VECTOR_FOR_INORDER_I(i, from) {                                 \
   1507             result.to[i] = cvt(a0->from[i], &sat);                      \
   1508             result.to[i + ARRAY_SIZE(r->from)] = cvt(a1->from[i], &sat);\
   1509         }                                                               \
   1510         *r = result;                                                    \
   1511         if (dosat && sat) {                                             \
   1512             set_vscr_sat(env);                                          \
   1513         }                                                               \
   1514     }
   1515 #define I(x, y) (x)
   1516 VPK(shss, s16, s8, cvtshsb, 1)
   1517 VPK(shus, s16, u8, cvtshub, 1)
   1518 VPK(swss, s32, s16, cvtswsh, 1)
   1519 VPK(swus, s32, u16, cvtswuh, 1)
   1520 VPK(sdss, s64, s32, cvtsdsw, 1)
   1521 VPK(sdus, s64, u32, cvtsduw, 1)
   1522 VPK(uhus, u16, u8, cvtuhub, 1)
   1523 VPK(uwus, u32, u16, cvtuwuh, 1)
   1524 VPK(udus, u64, u32, cvtuduw, 1)
   1525 VPK(uhum, u16, u8, I, 0)
   1526 VPK(uwum, u32, u16, I, 0)
   1527 VPK(udum, u64, u32, I, 0)
   1528 #undef I
   1529 #undef VPK
   1530 #undef PKBIG
   1531 
   1532 void helper_vrefp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *b)
   1533 {
   1534     int i;
   1535 
   1536     for (i = 0; i < ARRAY_SIZE(r->f32); i++) {
   1537         r->f32[i] = float32_div(float32_one, b->f32[i], &env->vec_status);
   1538     }
   1539 }
   1540 
   1541 #define VRFI(suffix, rounding)                                  \
   1542     void helper_vrfi##suffix(CPUPPCState *env, ppc_avr_t *r,    \
   1543                              ppc_avr_t *b)                      \
   1544     {                                                           \
   1545         int i;                                                  \
   1546         float_status s = env->vec_status;                       \
   1547                                                                 \
   1548         set_float_rounding_mode(rounding, &s);                  \
   1549         for (i = 0; i < ARRAY_SIZE(r->f32); i++) {              \
   1550             r->f32[i] = float32_round_to_int (b->f32[i], &s);   \
   1551         }                                                       \
   1552     }
   1553 VRFI(n, float_round_nearest_even)
   1554 VRFI(m, float_round_down)
   1555 VRFI(p, float_round_up)
   1556 VRFI(z, float_round_to_zero)
   1557 #undef VRFI
   1558 
   1559 void helper_vrsqrtefp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *b)
   1560 {
   1561     int i;
   1562 
   1563     for (i = 0; i < ARRAY_SIZE(r->f32); i++) {
   1564         float32 t = float32_sqrt(b->f32[i], &env->vec_status);
   1565 
   1566         r->f32[i] = float32_div(float32_one, t, &env->vec_status);
   1567     }
   1568 }
   1569 
   1570 #define VRLMI(name, size, element, insert)                                  \
   1571 void helper_##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t desc) \
   1572 {                                                                           \
   1573     int i;                                                                  \
   1574     for (i = 0; i < ARRAY_SIZE(r->element); i++) {                          \
   1575         uint##size##_t src1 = a->element[i];                                \
   1576         uint##size##_t src2 = b->element[i];                                \
   1577         uint##size##_t src3 = r->element[i];                                \
   1578         uint##size##_t begin, end, shift, mask, rot_val;                    \
   1579                                                                             \
   1580         shift = extract##size(src2, 0, 6);                                  \
   1581         end   = extract##size(src2, 8, 6);                                  \
   1582         begin = extract##size(src2, 16, 6);                                 \
   1583         rot_val = rol##size(src1, shift);                                   \
   1584         mask = mask_u##size(begin, end);                                    \
   1585         if (insert) {                                                       \
   1586             r->element[i] = (rot_val & mask) | (src3 & ~mask);              \
   1587         } else {                                                            \
   1588             r->element[i] = (rot_val & mask);                               \
   1589         }                                                                   \
   1590     }                                                                       \
   1591 }
   1592 
   1593 VRLMI(VRLDMI, 64, u64, 1);
   1594 VRLMI(VRLWMI, 32, u32, 1);
   1595 VRLMI(VRLDNM, 64, u64, 0);
   1596 VRLMI(VRLWNM, 32, u32, 0);
   1597 
   1598 void helper_vexptefp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *b)
   1599 {
   1600     int i;
   1601 
   1602     for (i = 0; i < ARRAY_SIZE(r->f32); i++) {
   1603         r->f32[i] = float32_exp2(b->f32[i], &env->vec_status);
   1604     }
   1605 }
   1606 
   1607 void helper_vlogefp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *b)
   1608 {
   1609     int i;
   1610 
   1611     for (i = 0; i < ARRAY_SIZE(r->f32); i++) {
   1612         r->f32[i] = float32_log2(b->f32[i], &env->vec_status);
   1613     }
   1614 }
   1615 
   1616 #define VEXTU_X_DO(name, size, left)                            \
   1617 target_ulong glue(helper_, name)(target_ulong a, ppc_avr_t *b)  \
   1618 {                                                               \
   1619     int index = (a & 0xf) * 8;                                  \
   1620     if (left) {                                                 \
   1621         index = 128 - index - size;                             \
   1622     }                                                           \
   1623     return int128_getlo(int128_rshift(b->s128, index)) &        \
   1624         MAKE_64BIT_MASK(0, size);                               \
   1625 }
   1626 VEXTU_X_DO(vextublx,  8, 1)
   1627 VEXTU_X_DO(vextuhlx, 16, 1)
   1628 VEXTU_X_DO(vextuwlx, 32, 1)
   1629 VEXTU_X_DO(vextubrx,  8, 0)
   1630 VEXTU_X_DO(vextuhrx, 16, 0)
   1631 VEXTU_X_DO(vextuwrx, 32, 0)
   1632 #undef VEXTU_X_DO
   1633 
   1634 void helper_vslv(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
   1635 {
   1636     int i;
   1637     unsigned int shift, bytes, size;
   1638 
   1639     size = ARRAY_SIZE(r->u8);
   1640     for (i = 0; i < size; i++) {
   1641         shift = b->VsrB(i) & 0x7;             /* extract shift value */
   1642         bytes = (a->VsrB(i) << 8) +           /* extract adjacent bytes */
   1643             (((i + 1) < size) ? a->VsrB(i + 1) : 0);
   1644         r->VsrB(i) = (bytes << shift) >> 8;   /* shift and store result */
   1645     }
   1646 }
   1647 
   1648 void helper_vsrv(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
   1649 {
   1650     int i;
   1651     unsigned int shift, bytes;
   1652 
   1653     /*
   1654      * Use reverse order, as destination and source register can be
   1655      * same. Its being modified in place saving temporary, reverse
   1656      * order will guarantee that computed result is not fed back.
   1657      */
   1658     for (i = ARRAY_SIZE(r->u8) - 1; i >= 0; i--) {
   1659         shift = b->VsrB(i) & 0x7;               /* extract shift value */
   1660         bytes = ((i ? a->VsrB(i - 1) : 0) << 8) + a->VsrB(i);
   1661                                                 /* extract adjacent bytes */
   1662         r->VsrB(i) = (bytes >> shift) & 0xFF;   /* shift and store result */
   1663     }
   1664 }
   1665 
   1666 void helper_vsldoi(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t shift)
   1667 {
   1668     int sh = shift & 0xf;
   1669     int i;
   1670     ppc_avr_t result;
   1671 
   1672     for (i = 0; i < ARRAY_SIZE(r->u8); i++) {
   1673         int index = sh + i;
   1674         if (index > 0xf) {
   1675             result.VsrB(i) = b->VsrB(index - 0x10);
   1676         } else {
   1677             result.VsrB(i) = a->VsrB(index);
   1678         }
   1679     }
   1680     *r = result;
   1681 }
   1682 
   1683 void helper_vslo(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
   1684 {
   1685     int sh = (b->VsrB(0xf) >> 3) & 0xf;
   1686 
   1687 #if HOST_BIG_ENDIAN
   1688     memmove(&r->u8[0], &a->u8[sh], 16 - sh);
   1689     memset(&r->u8[16 - sh], 0, sh);
   1690 #else
   1691     memmove(&r->u8[sh], &a->u8[0], 16 - sh);
   1692     memset(&r->u8[0], 0, sh);
   1693 #endif
   1694 }
   1695 
   1696 #if HOST_BIG_ENDIAN
   1697 #define ELEM_ADDR(VEC, IDX, SIZE) (&(VEC)->u8[IDX])
   1698 #else
   1699 #define ELEM_ADDR(VEC, IDX, SIZE) (&(VEC)->u8[15 - (IDX)] - (SIZE) + 1)
   1700 #endif
   1701 
   1702 #define VINSX(SUFFIX, TYPE) \
   1703 void glue(glue(helper_VINS, SUFFIX), LX)(CPUPPCState *env, ppc_avr_t *t,       \
   1704                                          uint64_t val, target_ulong index)     \
   1705 {                                                                              \
   1706     const int maxidx = ARRAY_SIZE(t->u8) - sizeof(TYPE);                       \
   1707     target_long idx = index;                                                   \
   1708                                                                                \
   1709     if (idx < 0 || idx > maxidx) {                                             \
   1710         idx =  idx < 0 ? sizeof(TYPE) - idx : idx;                             \
   1711         qemu_log_mask(LOG_GUEST_ERROR,                                         \
   1712             "Invalid index for Vector Insert Element after 0x" TARGET_FMT_lx   \
   1713             ", RA = " TARGET_FMT_ld " > %d\n", env->nip, idx, maxidx);         \
   1714     } else {                                                                   \
   1715         TYPE src = val;                                                        \
   1716         memcpy(ELEM_ADDR(t, idx, sizeof(TYPE)), &src, sizeof(TYPE));           \
   1717     }                                                                          \
   1718 }
   1719 VINSX(B, uint8_t)
   1720 VINSX(H, uint16_t)
   1721 VINSX(W, uint32_t)
   1722 VINSX(D, uint64_t)
   1723 #undef ELEM_ADDR
   1724 #undef VINSX
   1725 #if HOST_BIG_ENDIAN
   1726 #define VEXTDVLX(NAME, SIZE) \
   1727 void helper_##NAME(CPUPPCState *env, ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b, \
   1728                    target_ulong index)                                         \
   1729 {                                                                              \
   1730     const target_long idx = index;                                             \
   1731     ppc_avr_t tmp[2] = { *a, *b };                                             \
   1732     memset(t, 0, sizeof(*t));                                                  \
   1733     if (idx >= 0 && idx + SIZE <= sizeof(tmp)) {                               \
   1734         memcpy(&t->u8[ARRAY_SIZE(t->u8) / 2 - SIZE], (void *)tmp + idx, SIZE); \
   1735     } else {                                                                   \
   1736         qemu_log_mask(LOG_GUEST_ERROR, "Invalid index for " #NAME " after 0x"  \
   1737                       TARGET_FMT_lx ", RC = " TARGET_FMT_ld " > %d\n",         \
   1738                       env->nip, idx < 0 ? SIZE - idx : idx, 32 - SIZE);        \
   1739     }                                                                          \
   1740 }
   1741 #else
   1742 #define VEXTDVLX(NAME, SIZE) \
   1743 void helper_##NAME(CPUPPCState *env, ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b, \
   1744                    target_ulong index)                                         \
   1745 {                                                                              \
   1746     const target_long idx = index;                                             \
   1747     ppc_avr_t tmp[2] = { *b, *a };                                             \
   1748     memset(t, 0, sizeof(*t));                                                  \
   1749     if (idx >= 0 && idx + SIZE <= sizeof(tmp)) {                               \
   1750         memcpy(&t->u8[ARRAY_SIZE(t->u8) / 2],                                  \
   1751                (void *)tmp + sizeof(tmp) - SIZE - idx, SIZE);                  \
   1752     } else {                                                                   \
   1753         qemu_log_mask(LOG_GUEST_ERROR, "Invalid index for " #NAME " after 0x"  \
   1754                       TARGET_FMT_lx ", RC = " TARGET_FMT_ld " > %d\n",         \
   1755                       env->nip, idx < 0 ? SIZE - idx : idx, 32 - SIZE);        \
   1756     }                                                                          \
   1757 }
   1758 #endif
   1759 VEXTDVLX(VEXTDUBVLX, 1)
   1760 VEXTDVLX(VEXTDUHVLX, 2)
   1761 VEXTDVLX(VEXTDUWVLX, 4)
   1762 VEXTDVLX(VEXTDDVLX, 8)
   1763 #undef VEXTDVLX
   1764 #if HOST_BIG_ENDIAN
   1765 #define VEXTRACT(suffix, element)                                            \
   1766     void helper_vextract##suffix(ppc_avr_t *r, ppc_avr_t *b, uint32_t index) \
   1767     {                                                                        \
   1768         uint32_t es = sizeof(r->element[0]);                                 \
   1769         memmove(&r->u8[8 - es], &b->u8[index], es);                          \
   1770         memset(&r->u8[8], 0, 8);                                             \
   1771         memset(&r->u8[0], 0, 8 - es);                                        \
   1772     }
   1773 #else
   1774 #define VEXTRACT(suffix, element)                                            \
   1775     void helper_vextract##suffix(ppc_avr_t *r, ppc_avr_t *b, uint32_t index) \
   1776     {                                                                        \
   1777         uint32_t es = sizeof(r->element[0]);                                 \
   1778         uint32_t s = (16 - index) - es;                                      \
   1779         memmove(&r->u8[8], &b->u8[s], es);                                   \
   1780         memset(&r->u8[0], 0, 8);                                             \
   1781         memset(&r->u8[8 + es], 0, 8 - es);                                   \
   1782     }
   1783 #endif
   1784 VEXTRACT(ub, u8)
   1785 VEXTRACT(uh, u16)
   1786 VEXTRACT(uw, u32)
   1787 VEXTRACT(d, u64)
   1788 #undef VEXTRACT
   1789 
   1790 #define VSTRI(NAME, ELEM, NUM_ELEMS, LEFT) \
   1791 uint32_t helper_##NAME(ppc_avr_t *t, ppc_avr_t *b) \
   1792 {                                                   \
   1793     int i, idx, crf = 0;                            \
   1794                                                     \
   1795     for (i = 0; i < NUM_ELEMS; i++) {               \
   1796         idx = LEFT ? i : NUM_ELEMS - i - 1;         \
   1797         if (b->Vsr##ELEM(idx)) {                    \
   1798             t->Vsr##ELEM(idx) = b->Vsr##ELEM(idx);  \
   1799         } else {                                    \
   1800             crf = 0b0010;                           \
   1801             break;                                  \
   1802         }                                           \
   1803     }                                               \
   1804                                                     \
   1805     for (; i < NUM_ELEMS; i++) {                    \
   1806         idx = LEFT ? i : NUM_ELEMS - i - 1;         \
   1807         t->Vsr##ELEM(idx) = 0;                      \
   1808     }                                               \
   1809                                                     \
   1810     return crf;                                     \
   1811 }
   1812 VSTRI(VSTRIBL, B, 16, true)
   1813 VSTRI(VSTRIBR, B, 16, false)
   1814 VSTRI(VSTRIHL, H, 8, true)
   1815 VSTRI(VSTRIHR, H, 8, false)
   1816 #undef VSTRI
   1817 
   1818 void helper_XXEXTRACTUW(ppc_vsr_t *xt, ppc_vsr_t *xb, uint32_t index)
   1819 {
   1820     ppc_vsr_t t = { };
   1821     size_t es = sizeof(uint32_t);
   1822     uint32_t ext_index;
   1823     int i;
   1824 
   1825     ext_index = index;
   1826     for (i = 0; i < es; i++, ext_index++) {
   1827         t.VsrB(8 - es + i) = xb->VsrB(ext_index % 16);
   1828     }
   1829 
   1830     *xt = t;
   1831 }
   1832 
   1833 void helper_XXINSERTW(ppc_vsr_t *xt, ppc_vsr_t *xb, uint32_t index)
   1834 {
   1835     ppc_vsr_t t = *xt;
   1836     size_t es = sizeof(uint32_t);
   1837     int ins_index, i = 0;
   1838 
   1839     ins_index = index;
   1840     for (i = 0; i < es && ins_index < 16; i++, ins_index++) {
   1841         t.VsrB(ins_index) = xb->VsrB(8 - es + i);
   1842     }
   1843 
   1844     *xt = t;
   1845 }
   1846 
   1847 void helper_XXEVAL(ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c,
   1848                    uint32_t desc)
   1849 {
   1850     /*
   1851      * Instead of processing imm bit-by-bit, we'll skip the computation of
   1852      * conjunctions whose corresponding bit is unset.
   1853      */
   1854     int bit, imm = simd_data(desc);
   1855     Int128 conj, disj = int128_zero();
   1856 
   1857     /* Iterate over set bits from the least to the most significant bit */
   1858     while (imm) {
   1859         /*
   1860          * Get the next bit to be processed with ctz64. Invert the result of
   1861          * ctz64 to match the indexing used by PowerISA.
   1862          */
   1863         bit = 7 - ctzl(imm);
   1864         if (bit & 0x4) {
   1865             conj = a->s128;
   1866         } else {
   1867             conj = int128_not(a->s128);
   1868         }
   1869         if (bit & 0x2) {
   1870             conj = int128_and(conj, b->s128);
   1871         } else {
   1872             conj = int128_and(conj, int128_not(b->s128));
   1873         }
   1874         if (bit & 0x1) {
   1875             conj = int128_and(conj, c->s128);
   1876         } else {
   1877             conj = int128_and(conj, int128_not(c->s128));
   1878         }
   1879         disj = int128_or(disj, conj);
   1880 
   1881         /* Unset the least significant bit that is set */
   1882         imm &= imm - 1;
   1883     }
   1884 
   1885     t->s128 = disj;
   1886 }
   1887 
   1888 #define XXBLEND(name, sz) \
   1889 void glue(helper_XXBLENDV, name)(ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b,  \
   1890                                  ppc_avr_t *c, uint32_t desc)               \
   1891 {                                                                           \
   1892     for (int i = 0; i < ARRAY_SIZE(t->glue(u, sz)); i++) {                  \
   1893         t->glue(u, sz)[i] = (c->glue(s, sz)[i] >> (sz - 1)) ?               \
   1894             b->glue(u, sz)[i] : a->glue(u, sz)[i];                          \
   1895     }                                                                       \
   1896 }
   1897 XXBLEND(B, 8)
   1898 XXBLEND(H, 16)
   1899 XXBLEND(W, 32)
   1900 XXBLEND(D, 64)
   1901 #undef XXBLEND
   1902 
   1903 void helper_vsro(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
   1904 {
   1905     int sh = (b->VsrB(0xf) >> 3) & 0xf;
   1906 
   1907 #if HOST_BIG_ENDIAN
   1908     memmove(&r->u8[sh], &a->u8[0], 16 - sh);
   1909     memset(&r->u8[0], 0, sh);
   1910 #else
   1911     memmove(&r->u8[0], &a->u8[sh], 16 - sh);
   1912     memset(&r->u8[16 - sh], 0, sh);
   1913 #endif
   1914 }
   1915 
   1916 void helper_vsumsws(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
   1917 {
   1918     int64_t t;
   1919     int i, upper;
   1920     ppc_avr_t result;
   1921     int sat = 0;
   1922 
   1923     upper = ARRAY_SIZE(r->s32) - 1;
   1924     t = (int64_t)b->VsrSW(upper);
   1925     for (i = 0; i < ARRAY_SIZE(r->s32); i++) {
   1926         t += a->VsrSW(i);
   1927         result.VsrSW(i) = 0;
   1928     }
   1929     result.VsrSW(upper) = cvtsdsw(t, &sat);
   1930     *r = result;
   1931 
   1932     if (sat) {
   1933         set_vscr_sat(env);
   1934     }
   1935 }
   1936 
   1937 void helper_vsum2sws(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
   1938 {
   1939     int i, j, upper;
   1940     ppc_avr_t result;
   1941     int sat = 0;
   1942 
   1943     upper = 1;
   1944     for (i = 0; i < ARRAY_SIZE(r->u64); i++) {
   1945         int64_t t = (int64_t)b->VsrSW(upper + i * 2);
   1946 
   1947         result.VsrD(i) = 0;
   1948         for (j = 0; j < ARRAY_SIZE(r->u64); j++) {
   1949             t += a->VsrSW(2 * i + j);
   1950         }
   1951         result.VsrSW(upper + i * 2) = cvtsdsw(t, &sat);
   1952     }
   1953 
   1954     *r = result;
   1955     if (sat) {
   1956         set_vscr_sat(env);
   1957     }
   1958 }
   1959 
   1960 void helper_vsum4sbs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
   1961 {
   1962     int i, j;
   1963     int sat = 0;
   1964 
   1965     for (i = 0; i < ARRAY_SIZE(r->s32); i++) {
   1966         int64_t t = (int64_t)b->s32[i];
   1967 
   1968         for (j = 0; j < ARRAY_SIZE(r->s32); j++) {
   1969             t += a->s8[4 * i + j];
   1970         }
   1971         r->s32[i] = cvtsdsw(t, &sat);
   1972     }
   1973 
   1974     if (sat) {
   1975         set_vscr_sat(env);
   1976     }
   1977 }
   1978 
   1979 void helper_vsum4shs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
   1980 {
   1981     int sat = 0;
   1982     int i;
   1983 
   1984     for (i = 0; i < ARRAY_SIZE(r->s32); i++) {
   1985         int64_t t = (int64_t)b->s32[i];
   1986 
   1987         t += a->s16[2 * i] + a->s16[2 * i + 1];
   1988         r->s32[i] = cvtsdsw(t, &sat);
   1989     }
   1990 
   1991     if (sat) {
   1992         set_vscr_sat(env);
   1993     }
   1994 }
   1995 
   1996 void helper_vsum4ubs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
   1997 {
   1998     int i, j;
   1999     int sat = 0;
   2000 
   2001     for (i = 0; i < ARRAY_SIZE(r->u32); i++) {
   2002         uint64_t t = (uint64_t)b->u32[i];
   2003 
   2004         for (j = 0; j < ARRAY_SIZE(r->u32); j++) {
   2005             t += a->u8[4 * i + j];
   2006         }
   2007         r->u32[i] = cvtuduw(t, &sat);
   2008     }
   2009 
   2010     if (sat) {
   2011         set_vscr_sat(env);
   2012     }
   2013 }
   2014 
   2015 #if HOST_BIG_ENDIAN
   2016 #define UPKHI 1
   2017 #define UPKLO 0
   2018 #else
   2019 #define UPKHI 0
   2020 #define UPKLO 1
   2021 #endif
   2022 #define VUPKPX(suffix, hi)                                              \
   2023     void helper_vupk##suffix(ppc_avr_t *r, ppc_avr_t *b)                \
   2024     {                                                                   \
   2025         int i;                                                          \
   2026         ppc_avr_t result;                                               \
   2027                                                                         \
   2028         for (i = 0; i < ARRAY_SIZE(r->u32); i++) {                      \
   2029             uint16_t e = b->u16[hi ? i : i + 4];                        \
   2030             uint8_t a = (e >> 15) ? 0xff : 0;                           \
   2031             uint8_t r = (e >> 10) & 0x1f;                               \
   2032             uint8_t g = (e >> 5) & 0x1f;                                \
   2033             uint8_t b = e & 0x1f;                                       \
   2034                                                                         \
   2035             result.u32[i] = (a << 24) | (r << 16) | (g << 8) | b;       \
   2036         }                                                               \
   2037         *r = result;                                                    \
   2038     }
   2039 VUPKPX(lpx, UPKLO)
   2040 VUPKPX(hpx, UPKHI)
   2041 #undef VUPKPX
   2042 
   2043 #define VUPK(suffix, unpacked, packee, hi)                              \
   2044     void helper_vupk##suffix(ppc_avr_t *r, ppc_avr_t *b)                \
   2045     {                                                                   \
   2046         int i;                                                          \
   2047         ppc_avr_t result;                                               \
   2048                                                                         \
   2049         if (hi) {                                                       \
   2050             for (i = 0; i < ARRAY_SIZE(r->unpacked); i++) {             \
   2051                 result.unpacked[i] = b->packee[i];                      \
   2052             }                                                           \
   2053         } else {                                                        \
   2054             for (i = ARRAY_SIZE(r->unpacked); i < ARRAY_SIZE(r->packee); \
   2055                  i++) {                                                 \
   2056                 result.unpacked[i - ARRAY_SIZE(r->unpacked)] = b->packee[i]; \
   2057             }                                                           \
   2058         }                                                               \
   2059         *r = result;                                                    \
   2060     }
   2061 VUPK(hsb, s16, s8, UPKHI)
   2062 VUPK(hsh, s32, s16, UPKHI)
   2063 VUPK(hsw, s64, s32, UPKHI)
   2064 VUPK(lsb, s16, s8, UPKLO)
   2065 VUPK(lsh, s32, s16, UPKLO)
   2066 VUPK(lsw, s64, s32, UPKLO)
   2067 #undef VUPK
   2068 #undef UPKHI
   2069 #undef UPKLO
   2070 
   2071 #define VGENERIC_DO(name, element)                                      \
   2072     void helper_v##name(ppc_avr_t *r, ppc_avr_t *b)                     \
   2073     {                                                                   \
   2074         int i;                                                          \
   2075                                                                         \
   2076         for (i = 0; i < ARRAY_SIZE(r->element); i++) {                  \
   2077             r->element[i] = name(b->element[i]);                        \
   2078         }                                                               \
   2079     }
   2080 
   2081 #define clzb(v) ((v) ? clz32((uint32_t)(v) << 24) : 8)
   2082 #define clzh(v) ((v) ? clz32((uint32_t)(v) << 16) : 16)
   2083 
   2084 VGENERIC_DO(clzb, u8)
   2085 VGENERIC_DO(clzh, u16)
   2086 
   2087 #undef clzb
   2088 #undef clzh
   2089 
   2090 #define ctzb(v) ((v) ? ctz32(v) : 8)
   2091 #define ctzh(v) ((v) ? ctz32(v) : 16)
   2092 #define ctzw(v) ctz32((v))
   2093 #define ctzd(v) ctz64((v))
   2094 
   2095 VGENERIC_DO(ctzb, u8)
   2096 VGENERIC_DO(ctzh, u16)
   2097 VGENERIC_DO(ctzw, u32)
   2098 VGENERIC_DO(ctzd, u64)
   2099 
   2100 #undef ctzb
   2101 #undef ctzh
   2102 #undef ctzw
   2103 #undef ctzd
   2104 
   2105 #define popcntb(v) ctpop8(v)
   2106 #define popcnth(v) ctpop16(v)
   2107 #define popcntw(v) ctpop32(v)
   2108 #define popcntd(v) ctpop64(v)
   2109 
   2110 VGENERIC_DO(popcntb, u8)
   2111 VGENERIC_DO(popcnth, u16)
   2112 VGENERIC_DO(popcntw, u32)
   2113 VGENERIC_DO(popcntd, u64)
   2114 
   2115 #undef popcntb
   2116 #undef popcnth
   2117 #undef popcntw
   2118 #undef popcntd
   2119 
   2120 #undef VGENERIC_DO
   2121 
   2122 void helper_VADDUQM(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
   2123 {
   2124     r->s128 = int128_add(a->s128, b->s128);
   2125 }
   2126 
   2127 void helper_VADDEUQM(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
   2128 {
   2129     r->s128 = int128_add(int128_add(a->s128, b->s128),
   2130                          int128_make64(int128_getlo(c->s128) & 1));
   2131 }
   2132 
   2133 void helper_VADDCUQ(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
   2134 {
   2135     r->VsrD(1) = int128_ult(int128_not(a->s128), b->s128);
   2136     r->VsrD(0) = 0;
   2137 }
   2138 
   2139 void helper_VADDECUQ(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
   2140 {
   2141     bool carry_out = int128_ult(int128_not(a->s128), b->s128),
   2142          carry_in = int128_getlo(c->s128) & 1;
   2143 
   2144     if (!carry_out && carry_in) {
   2145         carry_out = (int128_nz(a->s128) || int128_nz(b->s128)) &&
   2146                     int128_eq(int128_add(a->s128, b->s128), int128_makes64(-1));
   2147     }
   2148 
   2149     r->VsrD(0) = 0;
   2150     r->VsrD(1) = carry_out;
   2151 }
   2152 
   2153 void helper_VSUBUQM(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
   2154 {
   2155     r->s128 = int128_sub(a->s128, b->s128);
   2156 }
   2157 
   2158 void helper_VSUBEUQM(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
   2159 {
   2160     r->s128 = int128_add(int128_add(a->s128, int128_not(b->s128)),
   2161                          int128_make64(int128_getlo(c->s128) & 1));
   2162 }
   2163 
   2164 void helper_VSUBCUQ(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
   2165 {
   2166     Int128 tmp = int128_not(b->s128);
   2167 
   2168     r->VsrD(1) = int128_ult(int128_not(a->s128), tmp) ||
   2169                  int128_eq(int128_add(a->s128, tmp), int128_makes64(-1));
   2170     r->VsrD(0) = 0;
   2171 }
   2172 
   2173 void helper_VSUBECUQ(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
   2174 {
   2175     Int128 tmp = int128_not(b->s128);
   2176     bool carry_out = int128_ult(int128_not(a->s128), tmp),
   2177          carry_in = int128_getlo(c->s128) & 1;
   2178 
   2179     r->VsrD(1) = carry_out || (carry_in && int128_eq(int128_add(a->s128, tmp),
   2180                                                      int128_makes64(-1)));
   2181     r->VsrD(0) = 0;
   2182 }
   2183 
   2184 #define BCD_PLUS_PREF_1 0xC
   2185 #define BCD_PLUS_PREF_2 0xF
   2186 #define BCD_PLUS_ALT_1  0xA
   2187 #define BCD_NEG_PREF    0xD
   2188 #define BCD_NEG_ALT     0xB
   2189 #define BCD_PLUS_ALT_2  0xE
   2190 #define NATIONAL_PLUS   0x2B
   2191 #define NATIONAL_NEG    0x2D
   2192 
   2193 #define BCD_DIG_BYTE(n) (15 - ((n) / 2))
   2194 
   2195 static int bcd_get_sgn(ppc_avr_t *bcd)
   2196 {
   2197     switch (bcd->VsrB(BCD_DIG_BYTE(0)) & 0xF) {
   2198     case BCD_PLUS_PREF_1:
   2199     case BCD_PLUS_PREF_2:
   2200     case BCD_PLUS_ALT_1:
   2201     case BCD_PLUS_ALT_2:
   2202     {
   2203         return 1;
   2204     }
   2205 
   2206     case BCD_NEG_PREF:
   2207     case BCD_NEG_ALT:
   2208     {
   2209         return -1;
   2210     }
   2211 
   2212     default:
   2213     {
   2214         return 0;
   2215     }
   2216     }
   2217 }
   2218 
   2219 static int bcd_preferred_sgn(int sgn, int ps)
   2220 {
   2221     if (sgn >= 0) {
   2222         return (ps == 0) ? BCD_PLUS_PREF_1 : BCD_PLUS_PREF_2;
   2223     } else {
   2224         return BCD_NEG_PREF;
   2225     }
   2226 }
   2227 
   2228 static uint8_t bcd_get_digit(ppc_avr_t *bcd, int n, int *invalid)
   2229 {
   2230     uint8_t result;
   2231     if (n & 1) {
   2232         result = bcd->VsrB(BCD_DIG_BYTE(n)) >> 4;
   2233     } else {
   2234        result = bcd->VsrB(BCD_DIG_BYTE(n)) & 0xF;
   2235     }
   2236 
   2237     if (unlikely(result > 9)) {
   2238         *invalid = true;
   2239     }
   2240     return result;
   2241 }
   2242 
   2243 static void bcd_put_digit(ppc_avr_t *bcd, uint8_t digit, int n)
   2244 {
   2245     if (n & 1) {
   2246         bcd->VsrB(BCD_DIG_BYTE(n)) &= 0x0F;
   2247         bcd->VsrB(BCD_DIG_BYTE(n)) |= (digit << 4);
   2248     } else {
   2249         bcd->VsrB(BCD_DIG_BYTE(n)) &= 0xF0;
   2250         bcd->VsrB(BCD_DIG_BYTE(n)) |= digit;
   2251     }
   2252 }
   2253 
   2254 static bool bcd_is_valid(ppc_avr_t *bcd)
   2255 {
   2256     int i;
   2257     int invalid = 0;
   2258 
   2259     if (bcd_get_sgn(bcd) == 0) {
   2260         return false;
   2261     }
   2262 
   2263     for (i = 1; i < 32; i++) {
   2264         bcd_get_digit(bcd, i, &invalid);
   2265         if (unlikely(invalid)) {
   2266             return false;
   2267         }
   2268     }
   2269     return true;
   2270 }
   2271 
   2272 static int bcd_cmp_zero(ppc_avr_t *bcd)
   2273 {
   2274     if (bcd->VsrD(0) == 0 && (bcd->VsrD(1) >> 4) == 0) {
   2275         return CRF_EQ;
   2276     } else {
   2277         return (bcd_get_sgn(bcd) == 1) ? CRF_GT : CRF_LT;
   2278     }
   2279 }
   2280 
   2281 static uint16_t get_national_digit(ppc_avr_t *reg, int n)
   2282 {
   2283     return reg->VsrH(7 - n);
   2284 }
   2285 
   2286 static void set_national_digit(ppc_avr_t *reg, uint8_t val, int n)
   2287 {
   2288     reg->VsrH(7 - n) = val;
   2289 }
   2290 
   2291 static int bcd_cmp_mag(ppc_avr_t *a, ppc_avr_t *b)
   2292 {
   2293     int i;
   2294     int invalid = 0;
   2295     for (i = 31; i > 0; i--) {
   2296         uint8_t dig_a = bcd_get_digit(a, i, &invalid);
   2297         uint8_t dig_b = bcd_get_digit(b, i, &invalid);
   2298         if (unlikely(invalid)) {
   2299             return 0; /* doesn't matter */
   2300         } else if (dig_a > dig_b) {
   2301             return 1;
   2302         } else if (dig_a < dig_b) {
   2303             return -1;
   2304         }
   2305     }
   2306 
   2307     return 0;
   2308 }
   2309 
   2310 static int bcd_add_mag(ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b, int *invalid,
   2311                        int *overflow)
   2312 {
   2313     int carry = 0;
   2314     int i;
   2315     int is_zero = 1;
   2316 
   2317     for (i = 1; i <= 31; i++) {
   2318         uint8_t digit = bcd_get_digit(a, i, invalid) +
   2319                         bcd_get_digit(b, i, invalid) + carry;
   2320         is_zero &= (digit == 0);
   2321         if (digit > 9) {
   2322             carry = 1;
   2323             digit -= 10;
   2324         } else {
   2325             carry = 0;
   2326         }
   2327 
   2328         bcd_put_digit(t, digit, i);
   2329     }
   2330 
   2331     *overflow = carry;
   2332     return is_zero;
   2333 }
   2334 
   2335 static void bcd_sub_mag(ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b, int *invalid,
   2336                        int *overflow)
   2337 {
   2338     int carry = 0;
   2339     int i;
   2340 
   2341     for (i = 1; i <= 31; i++) {
   2342         uint8_t digit = bcd_get_digit(a, i, invalid) -
   2343                         bcd_get_digit(b, i, invalid) + carry;
   2344         if (digit & 0x80) {
   2345             carry = -1;
   2346             digit += 10;
   2347         } else {
   2348             carry = 0;
   2349         }
   2350 
   2351         bcd_put_digit(t, digit, i);
   2352     }
   2353 
   2354     *overflow = carry;
   2355 }
   2356 
   2357 uint32_t helper_bcdadd(ppc_avr_t *r,  ppc_avr_t *a, ppc_avr_t *b, uint32_t ps)
   2358 {
   2359 
   2360     int sgna = bcd_get_sgn(a);
   2361     int sgnb = bcd_get_sgn(b);
   2362     int invalid = (sgna == 0) || (sgnb == 0);
   2363     int overflow = 0;
   2364     int zero = 0;
   2365     uint32_t cr = 0;
   2366     ppc_avr_t result = { .u64 = { 0, 0 } };
   2367 
   2368     if (!invalid) {
   2369         if (sgna == sgnb) {
   2370             result.VsrB(BCD_DIG_BYTE(0)) = bcd_preferred_sgn(sgna, ps);
   2371             zero = bcd_add_mag(&result, a, b, &invalid, &overflow);
   2372             cr = (sgna > 0) ? CRF_GT : CRF_LT;
   2373         } else {
   2374             int magnitude = bcd_cmp_mag(a, b);
   2375             if (magnitude > 0) {
   2376                 result.VsrB(BCD_DIG_BYTE(0)) = bcd_preferred_sgn(sgna, ps);
   2377                 bcd_sub_mag(&result, a, b, &invalid, &overflow);
   2378                 cr = (sgna > 0) ? CRF_GT : CRF_LT;
   2379             } else if (magnitude < 0) {
   2380                 result.VsrB(BCD_DIG_BYTE(0)) = bcd_preferred_sgn(sgnb, ps);
   2381                 bcd_sub_mag(&result, b, a, &invalid, &overflow);
   2382                 cr = (sgnb > 0) ? CRF_GT : CRF_LT;
   2383             } else {
   2384                 result.VsrB(BCD_DIG_BYTE(0)) = bcd_preferred_sgn(0, ps);
   2385                 cr = CRF_EQ;
   2386             }
   2387         }
   2388     }
   2389 
   2390     if (unlikely(invalid)) {
   2391         result.VsrD(0) = result.VsrD(1) = -1;
   2392         cr = CRF_SO;
   2393     } else if (overflow) {
   2394         cr |= CRF_SO;
   2395     } else if (zero) {
   2396         cr |= CRF_EQ;
   2397     }
   2398 
   2399     *r = result;
   2400 
   2401     return cr;
   2402 }
   2403 
   2404 uint32_t helper_bcdsub(ppc_avr_t *r,  ppc_avr_t *a, ppc_avr_t *b, uint32_t ps)
   2405 {
   2406     ppc_avr_t bcopy = *b;
   2407     int sgnb = bcd_get_sgn(b);
   2408     if (sgnb < 0) {
   2409         bcd_put_digit(&bcopy, BCD_PLUS_PREF_1, 0);
   2410     } else if (sgnb > 0) {
   2411         bcd_put_digit(&bcopy, BCD_NEG_PREF, 0);
   2412     }
   2413     /* else invalid ... defer to bcdadd code for proper handling */
   2414 
   2415     return helper_bcdadd(r, a, &bcopy, ps);
   2416 }
   2417 
   2418 uint32_t helper_bcdcfn(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps)
   2419 {
   2420     int i;
   2421     int cr = 0;
   2422     uint16_t national = 0;
   2423     uint16_t sgnb = get_national_digit(b, 0);
   2424     ppc_avr_t ret = { .u64 = { 0, 0 } };
   2425     int invalid = (sgnb != NATIONAL_PLUS && sgnb != NATIONAL_NEG);
   2426 
   2427     for (i = 1; i < 8; i++) {
   2428         national = get_national_digit(b, i);
   2429         if (unlikely(national < 0x30 || national > 0x39)) {
   2430             invalid = 1;
   2431             break;
   2432         }
   2433 
   2434         bcd_put_digit(&ret, national & 0xf, i);
   2435     }
   2436 
   2437     if (sgnb == NATIONAL_PLUS) {
   2438         bcd_put_digit(&ret, (ps == 0) ? BCD_PLUS_PREF_1 : BCD_PLUS_PREF_2, 0);
   2439     } else {
   2440         bcd_put_digit(&ret, BCD_NEG_PREF, 0);
   2441     }
   2442 
   2443     cr = bcd_cmp_zero(&ret);
   2444 
   2445     if (unlikely(invalid)) {
   2446         cr = CRF_SO;
   2447     }
   2448 
   2449     *r = ret;
   2450 
   2451     return cr;
   2452 }
   2453 
   2454 uint32_t helper_bcdctn(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps)
   2455 {
   2456     int i;
   2457     int cr = 0;
   2458     int sgnb = bcd_get_sgn(b);
   2459     int invalid = (sgnb == 0);
   2460     ppc_avr_t ret = { .u64 = { 0, 0 } };
   2461 
   2462     int ox_flag = (b->VsrD(0) != 0) || ((b->VsrD(1) >> 32) != 0);
   2463 
   2464     for (i = 1; i < 8; i++) {
   2465         set_national_digit(&ret, 0x30 + bcd_get_digit(b, i, &invalid), i);
   2466 
   2467         if (unlikely(invalid)) {
   2468             break;
   2469         }
   2470     }
   2471     set_national_digit(&ret, (sgnb == -1) ? NATIONAL_NEG : NATIONAL_PLUS, 0);
   2472 
   2473     cr = bcd_cmp_zero(b);
   2474 
   2475     if (ox_flag) {
   2476         cr |= CRF_SO;
   2477     }
   2478 
   2479     if (unlikely(invalid)) {
   2480         cr = CRF_SO;
   2481     }
   2482 
   2483     *r = ret;
   2484 
   2485     return cr;
   2486 }
   2487 
   2488 uint32_t helper_bcdcfz(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps)
   2489 {
   2490     int i;
   2491     int cr = 0;
   2492     int invalid = 0;
   2493     int zone_digit = 0;
   2494     int zone_lead = ps ? 0xF : 0x3;
   2495     int digit = 0;
   2496     ppc_avr_t ret = { .u64 = { 0, 0 } };
   2497     int sgnb = b->VsrB(BCD_DIG_BYTE(0)) >> 4;
   2498 
   2499     if (unlikely((sgnb < 0xA) && ps)) {
   2500         invalid = 1;
   2501     }
   2502 
   2503     for (i = 0; i < 16; i++) {
   2504         zone_digit = i ? b->VsrB(BCD_DIG_BYTE(i * 2)) >> 4 : zone_lead;
   2505         digit = b->VsrB(BCD_DIG_BYTE(i * 2)) & 0xF;
   2506         if (unlikely(zone_digit != zone_lead || digit > 0x9)) {
   2507             invalid = 1;
   2508             break;
   2509         }
   2510 
   2511         bcd_put_digit(&ret, digit, i + 1);
   2512     }
   2513 
   2514     if ((ps && (sgnb == 0xB || sgnb == 0xD)) ||
   2515             (!ps && (sgnb & 0x4))) {
   2516         bcd_put_digit(&ret, BCD_NEG_PREF, 0);
   2517     } else {
   2518         bcd_put_digit(&ret, BCD_PLUS_PREF_1, 0);
   2519     }
   2520 
   2521     cr = bcd_cmp_zero(&ret);
   2522 
   2523     if (unlikely(invalid)) {
   2524         cr = CRF_SO;
   2525     }
   2526 
   2527     *r = ret;
   2528 
   2529     return cr;
   2530 }
   2531 
   2532 uint32_t helper_bcdctz(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps)
   2533 {
   2534     int i;
   2535     int cr = 0;
   2536     uint8_t digit = 0;
   2537     int sgnb = bcd_get_sgn(b);
   2538     int zone_lead = (ps) ? 0xF0 : 0x30;
   2539     int invalid = (sgnb == 0);
   2540     ppc_avr_t ret = { .u64 = { 0, 0 } };
   2541 
   2542     int ox_flag = ((b->VsrD(0) >> 4) != 0);
   2543 
   2544     for (i = 0; i < 16; i++) {
   2545         digit = bcd_get_digit(b, i + 1, &invalid);
   2546 
   2547         if (unlikely(invalid)) {
   2548             break;
   2549         }
   2550 
   2551         ret.VsrB(BCD_DIG_BYTE(i * 2)) = zone_lead + digit;
   2552     }
   2553 
   2554     if (ps) {
   2555         bcd_put_digit(&ret, (sgnb == 1) ? 0xC : 0xD, 1);
   2556     } else {
   2557         bcd_put_digit(&ret, (sgnb == 1) ? 0x3 : 0x7, 1);
   2558     }
   2559 
   2560     cr = bcd_cmp_zero(b);
   2561 
   2562     if (ox_flag) {
   2563         cr |= CRF_SO;
   2564     }
   2565 
   2566     if (unlikely(invalid)) {
   2567         cr = CRF_SO;
   2568     }
   2569 
   2570     *r = ret;
   2571 
   2572     return cr;
   2573 }
   2574 
   2575 /**
   2576  * Compare 2 128-bit unsigned integers, passed in as unsigned 64-bit pairs
   2577  *
   2578  * Returns:
   2579  * > 0 if ahi|alo > bhi|blo,
   2580  * 0 if ahi|alo == bhi|blo,
   2581  * < 0 if ahi|alo < bhi|blo
   2582  */
   2583 static inline int ucmp128(uint64_t alo, uint64_t ahi,
   2584                           uint64_t blo, uint64_t bhi)
   2585 {
   2586     return (ahi == bhi) ?
   2587         (alo > blo ? 1 : (alo == blo ? 0 : -1)) :
   2588         (ahi > bhi ? 1 : -1);
   2589 }
   2590 
   2591 uint32_t helper_bcdcfsq(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps)
   2592 {
   2593     int i;
   2594     int cr;
   2595     uint64_t lo_value;
   2596     uint64_t hi_value;
   2597     uint64_t rem;
   2598     ppc_avr_t ret = { .u64 = { 0, 0 } };
   2599 
   2600     if (b->VsrSD(0) < 0) {
   2601         lo_value = -b->VsrSD(1);
   2602         hi_value = ~b->VsrD(0) + !lo_value;
   2603         bcd_put_digit(&ret, 0xD, 0);
   2604 
   2605         cr = CRF_LT;
   2606     } else {
   2607         lo_value = b->VsrD(1);
   2608         hi_value = b->VsrD(0);
   2609         bcd_put_digit(&ret, bcd_preferred_sgn(0, ps), 0);
   2610 
   2611         if (hi_value == 0 && lo_value == 0) {
   2612             cr = CRF_EQ;
   2613         } else {
   2614             cr = CRF_GT;
   2615         }
   2616     }
   2617 
   2618     /*
   2619      * Check src limits: abs(src) <= 10^31 - 1
   2620      *
   2621      * 10^31 - 1 = 0x0000007e37be2022 c0914b267fffffff
   2622      */
   2623     if (ucmp128(lo_value, hi_value,
   2624                 0xc0914b267fffffffULL, 0x7e37be2022ULL) > 0) {
   2625         cr |= CRF_SO;
   2626 
   2627         /*
   2628          * According to the ISA, if src wouldn't fit in the destination
   2629          * register, the result is undefined.
   2630          * In that case, we leave r unchanged.
   2631          */
   2632     } else {
   2633         rem = divu128(&lo_value, &hi_value, 1000000000000000ULL);
   2634 
   2635         for (i = 1; i < 16; rem /= 10, i++) {
   2636             bcd_put_digit(&ret, rem % 10, i);
   2637         }
   2638 
   2639         for (; i < 32; lo_value /= 10, i++) {
   2640             bcd_put_digit(&ret, lo_value % 10, i);
   2641         }
   2642 
   2643         *r = ret;
   2644     }
   2645 
   2646     return cr;
   2647 }
   2648 
   2649 uint32_t helper_bcdctsq(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps)
   2650 {
   2651     uint8_t i;
   2652     int cr;
   2653     uint64_t carry;
   2654     uint64_t unused;
   2655     uint64_t lo_value;
   2656     uint64_t hi_value = 0;
   2657     int sgnb = bcd_get_sgn(b);
   2658     int invalid = (sgnb == 0);
   2659 
   2660     lo_value = bcd_get_digit(b, 31, &invalid);
   2661     for (i = 30; i > 0; i--) {
   2662         mulu64(&lo_value, &carry, lo_value, 10ULL);
   2663         mulu64(&hi_value, &unused, hi_value, 10ULL);
   2664         lo_value += bcd_get_digit(b, i, &invalid);
   2665         hi_value += carry;
   2666 
   2667         if (unlikely(invalid)) {
   2668             break;
   2669         }
   2670     }
   2671 
   2672     if (sgnb == -1) {
   2673         r->VsrSD(1) = -lo_value;
   2674         r->VsrSD(0) = ~hi_value + !r->VsrSD(1);
   2675     } else {
   2676         r->VsrSD(1) = lo_value;
   2677         r->VsrSD(0) = hi_value;
   2678     }
   2679 
   2680     cr = bcd_cmp_zero(b);
   2681 
   2682     if (unlikely(invalid)) {
   2683         cr = CRF_SO;
   2684     }
   2685 
   2686     return cr;
   2687 }
   2688 
   2689 uint32_t helper_bcdcpsgn(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps)
   2690 {
   2691     int i;
   2692     int invalid = 0;
   2693 
   2694     if (bcd_get_sgn(a) == 0 || bcd_get_sgn(b) == 0) {
   2695         return CRF_SO;
   2696     }
   2697 
   2698     *r = *a;
   2699     bcd_put_digit(r, b->VsrB(BCD_DIG_BYTE(0)) & 0xF, 0);
   2700 
   2701     for (i = 1; i < 32; i++) {
   2702         bcd_get_digit(a, i, &invalid);
   2703         bcd_get_digit(b, i, &invalid);
   2704         if (unlikely(invalid)) {
   2705             return CRF_SO;
   2706         }
   2707     }
   2708 
   2709     return bcd_cmp_zero(r);
   2710 }
   2711 
   2712 uint32_t helper_bcdsetsgn(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps)
   2713 {
   2714     int sgnb = bcd_get_sgn(b);
   2715 
   2716     *r = *b;
   2717     bcd_put_digit(r, bcd_preferred_sgn(sgnb, ps), 0);
   2718 
   2719     if (bcd_is_valid(b) == false) {
   2720         return CRF_SO;
   2721     }
   2722 
   2723     return bcd_cmp_zero(r);
   2724 }
   2725 
   2726 uint32_t helper_bcds(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps)
   2727 {
   2728     int cr;
   2729     int i = a->VsrSB(7);
   2730     bool ox_flag = false;
   2731     int sgnb = bcd_get_sgn(b);
   2732     ppc_avr_t ret = *b;
   2733     ret.VsrD(1) &= ~0xf;
   2734 
   2735     if (bcd_is_valid(b) == false) {
   2736         return CRF_SO;
   2737     }
   2738 
   2739     if (unlikely(i > 31)) {
   2740         i = 31;
   2741     } else if (unlikely(i < -31)) {
   2742         i = -31;
   2743     }
   2744 
   2745     if (i > 0) {
   2746         ulshift(&ret.VsrD(1), &ret.VsrD(0), i * 4, &ox_flag);
   2747     } else {
   2748         urshift(&ret.VsrD(1), &ret.VsrD(0), -i * 4);
   2749     }
   2750     bcd_put_digit(&ret, bcd_preferred_sgn(sgnb, ps), 0);
   2751 
   2752     *r = ret;
   2753 
   2754     cr = bcd_cmp_zero(r);
   2755     if (ox_flag) {
   2756         cr |= CRF_SO;
   2757     }
   2758 
   2759     return cr;
   2760 }
   2761 
   2762 uint32_t helper_bcdus(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps)
   2763 {
   2764     int cr;
   2765     int i;
   2766     int invalid = 0;
   2767     bool ox_flag = false;
   2768     ppc_avr_t ret = *b;
   2769 
   2770     for (i = 0; i < 32; i++) {
   2771         bcd_get_digit(b, i, &invalid);
   2772 
   2773         if (unlikely(invalid)) {
   2774             return CRF_SO;
   2775         }
   2776     }
   2777 
   2778     i = a->VsrSB(7);
   2779     if (i >= 32) {
   2780         ox_flag = true;
   2781         ret.VsrD(1) = ret.VsrD(0) = 0;
   2782     } else if (i <= -32) {
   2783         ret.VsrD(1) = ret.VsrD(0) = 0;
   2784     } else if (i > 0) {
   2785         ulshift(&ret.VsrD(1), &ret.VsrD(0), i * 4, &ox_flag);
   2786     } else {
   2787         urshift(&ret.VsrD(1), &ret.VsrD(0), -i * 4);
   2788     }
   2789     *r = ret;
   2790 
   2791     cr = bcd_cmp_zero(r);
   2792     if (ox_flag) {
   2793         cr |= CRF_SO;
   2794     }
   2795 
   2796     return cr;
   2797 }
   2798 
   2799 uint32_t helper_bcdsr(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps)
   2800 {
   2801     int cr;
   2802     int unused = 0;
   2803     int invalid = 0;
   2804     bool ox_flag = false;
   2805     int sgnb = bcd_get_sgn(b);
   2806     ppc_avr_t ret = *b;
   2807     ret.VsrD(1) &= ~0xf;
   2808 
   2809     int i = a->VsrSB(7);
   2810     ppc_avr_t bcd_one;
   2811 
   2812     bcd_one.VsrD(0) = 0;
   2813     bcd_one.VsrD(1) = 0x10;
   2814 
   2815     if (bcd_is_valid(b) == false) {
   2816         return CRF_SO;
   2817     }
   2818 
   2819     if (unlikely(i > 31)) {
   2820         i = 31;
   2821     } else if (unlikely(i < -31)) {
   2822         i = -31;
   2823     }
   2824 
   2825     if (i > 0) {
   2826         ulshift(&ret.VsrD(1), &ret.VsrD(0), i * 4, &ox_flag);
   2827     } else {
   2828         urshift(&ret.VsrD(1), &ret.VsrD(0), -i * 4);
   2829 
   2830         if (bcd_get_digit(&ret, 0, &invalid) >= 5) {
   2831             bcd_add_mag(&ret, &ret, &bcd_one, &invalid, &unused);
   2832         }
   2833     }
   2834     bcd_put_digit(&ret, bcd_preferred_sgn(sgnb, ps), 0);
   2835 
   2836     cr = bcd_cmp_zero(&ret);
   2837     if (ox_flag) {
   2838         cr |= CRF_SO;
   2839     }
   2840     *r = ret;
   2841 
   2842     return cr;
   2843 }
   2844 
   2845 uint32_t helper_bcdtrunc(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps)
   2846 {
   2847     uint64_t mask;
   2848     uint32_t ox_flag = 0;
   2849     int i = a->VsrSH(3) + 1;
   2850     ppc_avr_t ret = *b;
   2851 
   2852     if (bcd_is_valid(b) == false) {
   2853         return CRF_SO;
   2854     }
   2855 
   2856     if (i > 16 && i < 32) {
   2857         mask = (uint64_t)-1 >> (128 - i * 4);
   2858         if (ret.VsrD(0) & ~mask) {
   2859             ox_flag = CRF_SO;
   2860         }
   2861 
   2862         ret.VsrD(0) &= mask;
   2863     } else if (i >= 0 && i <= 16) {
   2864         mask = (uint64_t)-1 >> (64 - i * 4);
   2865         if (ret.VsrD(0) || (ret.VsrD(1) & ~mask)) {
   2866             ox_flag = CRF_SO;
   2867         }
   2868 
   2869         ret.VsrD(1) &= mask;
   2870         ret.VsrD(0) = 0;
   2871     }
   2872     bcd_put_digit(&ret, bcd_preferred_sgn(bcd_get_sgn(b), ps), 0);
   2873     *r = ret;
   2874 
   2875     return bcd_cmp_zero(&ret) | ox_flag;
   2876 }
   2877 
   2878 uint32_t helper_bcdutrunc(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps)
   2879 {
   2880     int i;
   2881     uint64_t mask;
   2882     uint32_t ox_flag = 0;
   2883     int invalid = 0;
   2884     ppc_avr_t ret = *b;
   2885 
   2886     for (i = 0; i < 32; i++) {
   2887         bcd_get_digit(b, i, &invalid);
   2888 
   2889         if (unlikely(invalid)) {
   2890             return CRF_SO;
   2891         }
   2892     }
   2893 
   2894     i = a->VsrSH(3);
   2895     if (i > 16 && i < 33) {
   2896         mask = (uint64_t)-1 >> (128 - i * 4);
   2897         if (ret.VsrD(0) & ~mask) {
   2898             ox_flag = CRF_SO;
   2899         }
   2900 
   2901         ret.VsrD(0) &= mask;
   2902     } else if (i > 0 && i <= 16) {
   2903         mask = (uint64_t)-1 >> (64 - i * 4);
   2904         if (ret.VsrD(0) || (ret.VsrD(1) & ~mask)) {
   2905             ox_flag = CRF_SO;
   2906         }
   2907 
   2908         ret.VsrD(1) &= mask;
   2909         ret.VsrD(0) = 0;
   2910     } else if (i == 0) {
   2911         if (ret.VsrD(0) || ret.VsrD(1)) {
   2912             ox_flag = CRF_SO;
   2913         }
   2914         ret.VsrD(0) = ret.VsrD(1) = 0;
   2915     }
   2916 
   2917     *r = ret;
   2918     if (r->VsrD(0) == 0 && r->VsrD(1) == 0) {
   2919         return ox_flag | CRF_EQ;
   2920     }
   2921 
   2922     return ox_flag | CRF_GT;
   2923 }
   2924 
   2925 void helper_vsbox(ppc_avr_t *r, ppc_avr_t *a)
   2926 {
   2927     int i;
   2928     VECTOR_FOR_INORDER_I(i, u8) {
   2929         r->u8[i] = AES_sbox[a->u8[i]];
   2930     }
   2931 }
   2932 
   2933 void helper_vcipher(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
   2934 {
   2935     ppc_avr_t result;
   2936     int i;
   2937 
   2938     VECTOR_FOR_INORDER_I(i, u32) {
   2939         result.VsrW(i) = b->VsrW(i) ^
   2940             (AES_Te0[a->VsrB(AES_shifts[4 * i + 0])] ^
   2941              AES_Te1[a->VsrB(AES_shifts[4 * i + 1])] ^
   2942              AES_Te2[a->VsrB(AES_shifts[4 * i + 2])] ^
   2943              AES_Te3[a->VsrB(AES_shifts[4 * i + 3])]);
   2944     }
   2945     *r = result;
   2946 }
   2947 
   2948 void helper_vcipherlast(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
   2949 {
   2950     ppc_avr_t result;
   2951     int i;
   2952 
   2953     VECTOR_FOR_INORDER_I(i, u8) {
   2954         result.VsrB(i) = b->VsrB(i) ^ (AES_sbox[a->VsrB(AES_shifts[i])]);
   2955     }
   2956     *r = result;
   2957 }
   2958 
   2959 void helper_vncipher(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
   2960 {
   2961     /* This differs from what is written in ISA V2.07.  The RTL is */
   2962     /* incorrect and will be fixed in V2.07B.                      */
   2963     int i;
   2964     ppc_avr_t tmp;
   2965 
   2966     VECTOR_FOR_INORDER_I(i, u8) {
   2967         tmp.VsrB(i) = b->VsrB(i) ^ AES_isbox[a->VsrB(AES_ishifts[i])];
   2968     }
   2969 
   2970     VECTOR_FOR_INORDER_I(i, u32) {
   2971         r->VsrW(i) =
   2972             AES_imc[tmp.VsrB(4 * i + 0)][0] ^
   2973             AES_imc[tmp.VsrB(4 * i + 1)][1] ^
   2974             AES_imc[tmp.VsrB(4 * i + 2)][2] ^
   2975             AES_imc[tmp.VsrB(4 * i + 3)][3];
   2976     }
   2977 }
   2978 
   2979 void helper_vncipherlast(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
   2980 {
   2981     ppc_avr_t result;
   2982     int i;
   2983 
   2984     VECTOR_FOR_INORDER_I(i, u8) {
   2985         result.VsrB(i) = b->VsrB(i) ^ (AES_isbox[a->VsrB(AES_ishifts[i])]);
   2986     }
   2987     *r = result;
   2988 }
   2989 
   2990 void helper_vshasigmaw(ppc_avr_t *r,  ppc_avr_t *a, uint32_t st_six)
   2991 {
   2992     int st = (st_six & 0x10) != 0;
   2993     int six = st_six & 0xF;
   2994     int i;
   2995 
   2996     for (i = 0; i < ARRAY_SIZE(r->u32); i++) {
   2997         if (st == 0) {
   2998             if ((six & (0x8 >> i)) == 0) {
   2999                 r->VsrW(i) = ror32(a->VsrW(i), 7) ^
   3000                              ror32(a->VsrW(i), 18) ^
   3001                              (a->VsrW(i) >> 3);
   3002             } else { /* six.bit[i] == 1 */
   3003                 r->VsrW(i) = ror32(a->VsrW(i), 17) ^
   3004                              ror32(a->VsrW(i), 19) ^
   3005                              (a->VsrW(i) >> 10);
   3006             }
   3007         } else { /* st == 1 */
   3008             if ((six & (0x8 >> i)) == 0) {
   3009                 r->VsrW(i) = ror32(a->VsrW(i), 2) ^
   3010                              ror32(a->VsrW(i), 13) ^
   3011                              ror32(a->VsrW(i), 22);
   3012             } else { /* six.bit[i] == 1 */
   3013                 r->VsrW(i) = ror32(a->VsrW(i), 6) ^
   3014                              ror32(a->VsrW(i), 11) ^
   3015                              ror32(a->VsrW(i), 25);
   3016             }
   3017         }
   3018     }
   3019 }
   3020 
   3021 void helper_vshasigmad(ppc_avr_t *r,  ppc_avr_t *a, uint32_t st_six)
   3022 {
   3023     int st = (st_six & 0x10) != 0;
   3024     int six = st_six & 0xF;
   3025     int i;
   3026 
   3027     for (i = 0; i < ARRAY_SIZE(r->u64); i++) {
   3028         if (st == 0) {
   3029             if ((six & (0x8 >> (2 * i))) == 0) {
   3030                 r->VsrD(i) = ror64(a->VsrD(i), 1) ^
   3031                              ror64(a->VsrD(i), 8) ^
   3032                              (a->VsrD(i) >> 7);
   3033             } else { /* six.bit[2*i] == 1 */
   3034                 r->VsrD(i) = ror64(a->VsrD(i), 19) ^
   3035                              ror64(a->VsrD(i), 61) ^
   3036                              (a->VsrD(i) >> 6);
   3037             }
   3038         } else { /* st == 1 */
   3039             if ((six & (0x8 >> (2 * i))) == 0) {
   3040                 r->VsrD(i) = ror64(a->VsrD(i), 28) ^
   3041                              ror64(a->VsrD(i), 34) ^
   3042                              ror64(a->VsrD(i), 39);
   3043             } else { /* six.bit[2*i] == 1 */
   3044                 r->VsrD(i) = ror64(a->VsrD(i), 14) ^
   3045                              ror64(a->VsrD(i), 18) ^
   3046                              ror64(a->VsrD(i), 41);
   3047             }
   3048         }
   3049     }
   3050 }
   3051 
   3052 void helper_vpermxor(ppc_avr_t *r,  ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
   3053 {
   3054     ppc_avr_t result;
   3055     int i;
   3056 
   3057     for (i = 0; i < ARRAY_SIZE(r->u8); i++) {
   3058         int indexA = c->VsrB(i) >> 4;
   3059         int indexB = c->VsrB(i) & 0xF;
   3060 
   3061         result.VsrB(i) = a->VsrB(indexA) ^ b->VsrB(indexB);
   3062     }
   3063     *r = result;
   3064 }
   3065 
   3066 #undef VECTOR_FOR_INORDER_I
   3067 
   3068 /*****************************************************************************/
   3069 /* SPE extension helpers */
   3070 /* Use a table to make this quicker */
   3071 static const uint8_t hbrev[16] = {
   3072     0x0, 0x8, 0x4, 0xC, 0x2, 0xA, 0x6, 0xE,
   3073     0x1, 0x9, 0x5, 0xD, 0x3, 0xB, 0x7, 0xF,
   3074 };
   3075 
   3076 static inline uint8_t byte_reverse(uint8_t val)
   3077 {
   3078     return hbrev[val >> 4] | (hbrev[val & 0xF] << 4);
   3079 }
   3080 
   3081 static inline uint32_t word_reverse(uint32_t val)
   3082 {
   3083     return byte_reverse(val >> 24) | (byte_reverse(val >> 16) << 8) |
   3084         (byte_reverse(val >> 8) << 16) | (byte_reverse(val) << 24);
   3085 }
   3086 
   3087 #define MASKBITS 16 /* Random value - to be fixed (implementation dependent) */
   3088 target_ulong helper_brinc(target_ulong arg1, target_ulong arg2)
   3089 {
   3090     uint32_t a, b, d, mask;
   3091 
   3092     mask = UINT32_MAX >> (32 - MASKBITS);
   3093     a = arg1 & mask;
   3094     b = arg2 & mask;
   3095     d = word_reverse(1 + word_reverse(a | ~b));
   3096     return (arg1 & ~mask) | (d & b);
   3097 }
   3098 
   3099 uint32_t helper_cntlsw32(uint32_t val)
   3100 {
   3101     if (val & 0x80000000) {
   3102         return clz32(~val);
   3103     } else {
   3104         return clz32(val);
   3105     }
   3106 }
   3107 
   3108 uint32_t helper_cntlzw32(uint32_t val)
   3109 {
   3110     return clz32(val);
   3111 }
   3112 
   3113 /* 440 specific */
   3114 target_ulong helper_dlmzb(CPUPPCState *env, target_ulong high,
   3115                           target_ulong low, uint32_t update_Rc)
   3116 {
   3117     target_ulong mask;
   3118     int i;
   3119 
   3120     i = 1;
   3121     for (mask = 0xFF000000; mask != 0; mask = mask >> 8) {
   3122         if ((high & mask) == 0) {
   3123             if (update_Rc) {
   3124                 env->crf[0] = 0x4;
   3125             }
   3126             goto done;
   3127         }
   3128         i++;
   3129     }
   3130     for (mask = 0xFF000000; mask != 0; mask = mask >> 8) {
   3131         if ((low & mask) == 0) {
   3132             if (update_Rc) {
   3133                 env->crf[0] = 0x8;
   3134             }
   3135             goto done;
   3136         }
   3137         i++;
   3138     }
   3139     i = 8;
   3140     if (update_Rc) {
   3141         env->crf[0] = 0x2;
   3142     }
   3143  done:
   3144     env->xer = (env->xer & ~0x7F) | i;
   3145     if (update_Rc) {
   3146         env->crf[0] |= xer_so;
   3147     }
   3148     return i;
   3149 }