qemu

FORK: QEMU emulator
git clone https://git.neptards.moe/neptards/qemu.git
Log | Files | Refs | Submodules | LICENSE

msa_helper.c (322624B)


      1 /*
      2  * MIPS SIMD Architecture Module Instruction emulation helpers for QEMU.
      3  *
      4  * Copyright (c) 2014 Imagination Technologies
      5  *
      6  * This library is free software; you can redistribute it and/or
      7  * modify it under the terms of the GNU Lesser General Public
      8  * License as published by the Free Software Foundation; either
      9  * version 2.1 of the License, or (at your option) any later version.
     10  *
     11  * This library is distributed in the hope that it will be useful,
     12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
     13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
     14  * Lesser General Public License for more details.
     15  *
     16  * You should have received a copy of the GNU Lesser General Public
     17  * License along with this library; if not, see <http://www.gnu.org/licenses/>.
     18  */
     19 
     20 #include "qemu/osdep.h"
     21 #include "cpu.h"
     22 #include "internal.h"
     23 #include "tcg/tcg.h"
     24 #include "exec/exec-all.h"
     25 #include "exec/helper-proto.h"
     26 #include "exec/memop.h"
     27 #include "fpu/softfloat.h"
     28 #include "fpu_helper.h"
     29 
     30 /* Data format min and max values */
     31 #define DF_BITS(df) (1 << ((df) + 3))
     32 
     33 #define DF_MAX_INT(df)  (int64_t)((1LL << (DF_BITS(df) - 1)) - 1)
     34 #define M_MAX_INT(m)    (int64_t)((1LL << ((m)         - 1)) - 1)
     35 
     36 #define DF_MIN_INT(df)  (int64_t)(-(1LL << (DF_BITS(df) - 1)))
     37 #define M_MIN_INT(m)    (int64_t)(-(1LL << ((m)         - 1)))
     38 
     39 #define DF_MAX_UINT(df) (uint64_t)(-1ULL >> (64 - DF_BITS(df)))
     40 #define M_MAX_UINT(m)   (uint64_t)(-1ULL >> (64 - (m)))
     41 
     42 #define UNSIGNED(x, df) ((x) & DF_MAX_UINT(df))
     43 #define SIGNED(x, df)                                                   \
     44     ((((int64_t)x) << (64 - DF_BITS(df))) >> (64 - DF_BITS(df)))
     45 
     46 /* Element-by-element access macros */
     47 #define DF_ELEMENTS(df) (MSA_WRLEN / DF_BITS(df))
     48 
     49 
     50 
     51 /*
     52  * Bit Count
     53  * ---------
     54  *
     55  * +---------------+----------------------------------------------------------+
     56  * | NLOC.B        | Vector Leading Ones Count (byte)                         |
     57  * | NLOC.H        | Vector Leading Ones Count (halfword)                     |
     58  * | NLOC.W        | Vector Leading Ones Count (word)                         |
     59  * | NLOC.D        | Vector Leading Ones Count (doubleword)                   |
     60  * | NLZC.B        | Vector Leading Zeros Count (byte)                        |
     61  * | NLZC.H        | Vector Leading Zeros Count (halfword)                    |
     62  * | NLZC.W        | Vector Leading Zeros Count (word)                        |
     63  * | NLZC.D        | Vector Leading Zeros Count (doubleword)                  |
     64  * | PCNT.B        | Vector Population Count (byte)                           |
     65  * | PCNT.H        | Vector Population Count (halfword)                       |
     66  * | PCNT.W        | Vector Population Count (word)                           |
     67  * | PCNT.D        | Vector Population Count (doubleword)                     |
     68  * +---------------+----------------------------------------------------------+
     69  */
     70 
     71 static inline int64_t msa_nlzc_df(uint32_t df, int64_t arg)
     72 {
     73     uint64_t x, y;
     74     int n, c;
     75 
     76     x = UNSIGNED(arg, df);
     77     n = DF_BITS(df);
     78     c = DF_BITS(df) / 2;
     79 
     80     do {
     81         y = x >> c;
     82         if (y != 0) {
     83             n = n - c;
     84             x = y;
     85         }
     86         c = c >> 1;
     87     } while (c != 0);
     88 
     89     return n - x;
     90 }
     91 
     92 static inline int64_t msa_nloc_df(uint32_t df, int64_t arg)
     93 {
     94     return msa_nlzc_df(df, UNSIGNED((~arg), df));
     95 }
     96 
     97 void helper_msa_nloc_b(CPUMIPSState *env, uint32_t wd, uint32_t ws)
     98 {
     99     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
    100     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
    101 
    102     pwd->b[0]  = msa_nloc_df(DF_BYTE, pws->b[0]);
    103     pwd->b[1]  = msa_nloc_df(DF_BYTE, pws->b[1]);
    104     pwd->b[2]  = msa_nloc_df(DF_BYTE, pws->b[2]);
    105     pwd->b[3]  = msa_nloc_df(DF_BYTE, pws->b[3]);
    106     pwd->b[4]  = msa_nloc_df(DF_BYTE, pws->b[4]);
    107     pwd->b[5]  = msa_nloc_df(DF_BYTE, pws->b[5]);
    108     pwd->b[6]  = msa_nloc_df(DF_BYTE, pws->b[6]);
    109     pwd->b[7]  = msa_nloc_df(DF_BYTE, pws->b[7]);
    110     pwd->b[8]  = msa_nloc_df(DF_BYTE, pws->b[8]);
    111     pwd->b[9]  = msa_nloc_df(DF_BYTE, pws->b[9]);
    112     pwd->b[10] = msa_nloc_df(DF_BYTE, pws->b[10]);
    113     pwd->b[11] = msa_nloc_df(DF_BYTE, pws->b[11]);
    114     pwd->b[12] = msa_nloc_df(DF_BYTE, pws->b[12]);
    115     pwd->b[13] = msa_nloc_df(DF_BYTE, pws->b[13]);
    116     pwd->b[14] = msa_nloc_df(DF_BYTE, pws->b[14]);
    117     pwd->b[15] = msa_nloc_df(DF_BYTE, pws->b[15]);
    118 }
    119 
    120 void helper_msa_nloc_h(CPUMIPSState *env, uint32_t wd, uint32_t ws)
    121 {
    122     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
    123     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
    124 
    125     pwd->h[0]  = msa_nloc_df(DF_HALF, pws->h[0]);
    126     pwd->h[1]  = msa_nloc_df(DF_HALF, pws->h[1]);
    127     pwd->h[2]  = msa_nloc_df(DF_HALF, pws->h[2]);
    128     pwd->h[3]  = msa_nloc_df(DF_HALF, pws->h[3]);
    129     pwd->h[4]  = msa_nloc_df(DF_HALF, pws->h[4]);
    130     pwd->h[5]  = msa_nloc_df(DF_HALF, pws->h[5]);
    131     pwd->h[6]  = msa_nloc_df(DF_HALF, pws->h[6]);
    132     pwd->h[7]  = msa_nloc_df(DF_HALF, pws->h[7]);
    133 }
    134 
    135 void helper_msa_nloc_w(CPUMIPSState *env, uint32_t wd, uint32_t ws)
    136 {
    137     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
    138     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
    139 
    140     pwd->w[0]  = msa_nloc_df(DF_WORD, pws->w[0]);
    141     pwd->w[1]  = msa_nloc_df(DF_WORD, pws->w[1]);
    142     pwd->w[2]  = msa_nloc_df(DF_WORD, pws->w[2]);
    143     pwd->w[3]  = msa_nloc_df(DF_WORD, pws->w[3]);
    144 }
    145 
    146 void helper_msa_nloc_d(CPUMIPSState *env, uint32_t wd, uint32_t ws)
    147 {
    148     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
    149     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
    150 
    151     pwd->d[0]  = msa_nloc_df(DF_DOUBLE, pws->d[0]);
    152     pwd->d[1]  = msa_nloc_df(DF_DOUBLE, pws->d[1]);
    153 }
    154 
    155 void helper_msa_nlzc_b(CPUMIPSState *env, uint32_t wd, uint32_t ws)
    156 {
    157     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
    158     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
    159 
    160     pwd->b[0]  = msa_nlzc_df(DF_BYTE, pws->b[0]);
    161     pwd->b[1]  = msa_nlzc_df(DF_BYTE, pws->b[1]);
    162     pwd->b[2]  = msa_nlzc_df(DF_BYTE, pws->b[2]);
    163     pwd->b[3]  = msa_nlzc_df(DF_BYTE, pws->b[3]);
    164     pwd->b[4]  = msa_nlzc_df(DF_BYTE, pws->b[4]);
    165     pwd->b[5]  = msa_nlzc_df(DF_BYTE, pws->b[5]);
    166     pwd->b[6]  = msa_nlzc_df(DF_BYTE, pws->b[6]);
    167     pwd->b[7]  = msa_nlzc_df(DF_BYTE, pws->b[7]);
    168     pwd->b[8]  = msa_nlzc_df(DF_BYTE, pws->b[8]);
    169     pwd->b[9]  = msa_nlzc_df(DF_BYTE, pws->b[9]);
    170     pwd->b[10] = msa_nlzc_df(DF_BYTE, pws->b[10]);
    171     pwd->b[11] = msa_nlzc_df(DF_BYTE, pws->b[11]);
    172     pwd->b[12] = msa_nlzc_df(DF_BYTE, pws->b[12]);
    173     pwd->b[13] = msa_nlzc_df(DF_BYTE, pws->b[13]);
    174     pwd->b[14] = msa_nlzc_df(DF_BYTE, pws->b[14]);
    175     pwd->b[15] = msa_nlzc_df(DF_BYTE, pws->b[15]);
    176 }
    177 
    178 void helper_msa_nlzc_h(CPUMIPSState *env, uint32_t wd, uint32_t ws)
    179 {
    180     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
    181     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
    182 
    183     pwd->h[0]  = msa_nlzc_df(DF_HALF, pws->h[0]);
    184     pwd->h[1]  = msa_nlzc_df(DF_HALF, pws->h[1]);
    185     pwd->h[2]  = msa_nlzc_df(DF_HALF, pws->h[2]);
    186     pwd->h[3]  = msa_nlzc_df(DF_HALF, pws->h[3]);
    187     pwd->h[4]  = msa_nlzc_df(DF_HALF, pws->h[4]);
    188     pwd->h[5]  = msa_nlzc_df(DF_HALF, pws->h[5]);
    189     pwd->h[6]  = msa_nlzc_df(DF_HALF, pws->h[6]);
    190     pwd->h[7]  = msa_nlzc_df(DF_HALF, pws->h[7]);
    191 }
    192 
    193 void helper_msa_nlzc_w(CPUMIPSState *env, uint32_t wd, uint32_t ws)
    194 {
    195     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
    196     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
    197 
    198     pwd->w[0]  = msa_nlzc_df(DF_WORD, pws->w[0]);
    199     pwd->w[1]  = msa_nlzc_df(DF_WORD, pws->w[1]);
    200     pwd->w[2]  = msa_nlzc_df(DF_WORD, pws->w[2]);
    201     pwd->w[3]  = msa_nlzc_df(DF_WORD, pws->w[3]);
    202 }
    203 
    204 void helper_msa_nlzc_d(CPUMIPSState *env, uint32_t wd, uint32_t ws)
    205 {
    206     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
    207     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
    208 
    209     pwd->d[0]  = msa_nlzc_df(DF_DOUBLE, pws->d[0]);
    210     pwd->d[1]  = msa_nlzc_df(DF_DOUBLE, pws->d[1]);
    211 }
    212 
    213 static inline int64_t msa_pcnt_df(uint32_t df, int64_t arg)
    214 {
    215     uint64_t x;
    216 
    217     x = UNSIGNED(arg, df);
    218 
    219     x = (x & 0x5555555555555555ULL) + ((x >>  1) & 0x5555555555555555ULL);
    220     x = (x & 0x3333333333333333ULL) + ((x >>  2) & 0x3333333333333333ULL);
    221     x = (x & 0x0F0F0F0F0F0F0F0FULL) + ((x >>  4) & 0x0F0F0F0F0F0F0F0FULL);
    222     x = (x & 0x00FF00FF00FF00FFULL) + ((x >>  8) & 0x00FF00FF00FF00FFULL);
    223     x = (x & 0x0000FFFF0000FFFFULL) + ((x >> 16) & 0x0000FFFF0000FFFFULL);
    224     x = (x & 0x00000000FFFFFFFFULL) + ((x >> 32));
    225 
    226     return x;
    227 }
    228 
    229 void helper_msa_pcnt_b(CPUMIPSState *env, uint32_t wd, uint32_t ws)
    230 {
    231     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
    232     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
    233 
    234     pwd->b[0]  = msa_pcnt_df(DF_BYTE, pws->b[0]);
    235     pwd->b[1]  = msa_pcnt_df(DF_BYTE, pws->b[1]);
    236     pwd->b[2]  = msa_pcnt_df(DF_BYTE, pws->b[2]);
    237     pwd->b[3]  = msa_pcnt_df(DF_BYTE, pws->b[3]);
    238     pwd->b[4]  = msa_pcnt_df(DF_BYTE, pws->b[4]);
    239     pwd->b[5]  = msa_pcnt_df(DF_BYTE, pws->b[5]);
    240     pwd->b[6]  = msa_pcnt_df(DF_BYTE, pws->b[6]);
    241     pwd->b[7]  = msa_pcnt_df(DF_BYTE, pws->b[7]);
    242     pwd->b[8]  = msa_pcnt_df(DF_BYTE, pws->b[8]);
    243     pwd->b[9]  = msa_pcnt_df(DF_BYTE, pws->b[9]);
    244     pwd->b[10] = msa_pcnt_df(DF_BYTE, pws->b[10]);
    245     pwd->b[11] = msa_pcnt_df(DF_BYTE, pws->b[11]);
    246     pwd->b[12] = msa_pcnt_df(DF_BYTE, pws->b[12]);
    247     pwd->b[13] = msa_pcnt_df(DF_BYTE, pws->b[13]);
    248     pwd->b[14] = msa_pcnt_df(DF_BYTE, pws->b[14]);
    249     pwd->b[15] = msa_pcnt_df(DF_BYTE, pws->b[15]);
    250 }
    251 
    252 void helper_msa_pcnt_h(CPUMIPSState *env, uint32_t wd, uint32_t ws)
    253 {
    254     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
    255     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
    256 
    257     pwd->h[0]  = msa_pcnt_df(DF_HALF, pws->h[0]);
    258     pwd->h[1]  = msa_pcnt_df(DF_HALF, pws->h[1]);
    259     pwd->h[2]  = msa_pcnt_df(DF_HALF, pws->h[2]);
    260     pwd->h[3]  = msa_pcnt_df(DF_HALF, pws->h[3]);
    261     pwd->h[4]  = msa_pcnt_df(DF_HALF, pws->h[4]);
    262     pwd->h[5]  = msa_pcnt_df(DF_HALF, pws->h[5]);
    263     pwd->h[6]  = msa_pcnt_df(DF_HALF, pws->h[6]);
    264     pwd->h[7]  = msa_pcnt_df(DF_HALF, pws->h[7]);
    265 }
    266 
    267 void helper_msa_pcnt_w(CPUMIPSState *env, uint32_t wd, uint32_t ws)
    268 {
    269     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
    270     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
    271 
    272     pwd->w[0]  = msa_pcnt_df(DF_WORD, pws->w[0]);
    273     pwd->w[1]  = msa_pcnt_df(DF_WORD, pws->w[1]);
    274     pwd->w[2]  = msa_pcnt_df(DF_WORD, pws->w[2]);
    275     pwd->w[3]  = msa_pcnt_df(DF_WORD, pws->w[3]);
    276 }
    277 
    278 void helper_msa_pcnt_d(CPUMIPSState *env, uint32_t wd, uint32_t ws)
    279 {
    280     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
    281     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
    282 
    283     pwd->d[0]  = msa_pcnt_df(DF_DOUBLE, pws->d[0]);
    284     pwd->d[1]  = msa_pcnt_df(DF_DOUBLE, pws->d[1]);
    285 }
    286 
    287 
    288 /*
    289  * Bit Move
    290  * --------
    291  *
    292  * +---------------+----------------------------------------------------------+
    293  * | BINSL.B       | Vector Bit Insert Left (byte)                            |
    294  * | BINSL.H       | Vector Bit Insert Left (halfword)                        |
    295  * | BINSL.W       | Vector Bit Insert Left (word)                            |
    296  * | BINSL.D       | Vector Bit Insert Left (doubleword)                      |
    297  * | BINSR.B       | Vector Bit Insert Right (byte)                           |
    298  * | BINSR.H       | Vector Bit Insert Right (halfword)                       |
    299  * | BINSR.W       | Vector Bit Insert Right (word)                           |
    300  * | BINSR.D       | Vector Bit Insert Right (doubleword)                     |
    301  * | BMNZ.V        | Vector Bit Move If Not Zero                              |
    302  * | BMZ.V         | Vector Bit Move If Zero                                  |
    303  * | BSEL.V        | Vector Bit Select                                        |
    304  * +---------------+----------------------------------------------------------+
    305  */
    306 
    307 /* Data format bit position and unsigned values */
    308 #define BIT_POSITION(x, df) ((uint64_t)(x) % DF_BITS(df))
    309 
    310 static inline int64_t msa_binsl_df(uint32_t df,
    311                                    int64_t dest, int64_t arg1, int64_t arg2)
    312 {
    313     uint64_t u_arg1 = UNSIGNED(arg1, df);
    314     uint64_t u_dest = UNSIGNED(dest, df);
    315     int32_t sh_d = BIT_POSITION(arg2, df) + 1;
    316     int32_t sh_a = DF_BITS(df) - sh_d;
    317     if (sh_d == DF_BITS(df)) {
    318         return u_arg1;
    319     } else {
    320         return UNSIGNED(UNSIGNED(u_dest << sh_d, df) >> sh_d, df) |
    321                UNSIGNED(UNSIGNED(u_arg1 >> sh_a, df) << sh_a, df);
    322     }
    323 }
    324 
    325 void helper_msa_binsl_b(CPUMIPSState *env,
    326                         uint32_t wd, uint32_t ws, uint32_t wt)
    327 {
    328     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
    329     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
    330     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
    331 
    332     pwd->b[0]  = msa_binsl_df(DF_BYTE, pwd->b[0],  pws->b[0],  pwt->b[0]);
    333     pwd->b[1]  = msa_binsl_df(DF_BYTE, pwd->b[1],  pws->b[1],  pwt->b[1]);
    334     pwd->b[2]  = msa_binsl_df(DF_BYTE, pwd->b[2],  pws->b[2],  pwt->b[2]);
    335     pwd->b[3]  = msa_binsl_df(DF_BYTE, pwd->b[3],  pws->b[3],  pwt->b[3]);
    336     pwd->b[4]  = msa_binsl_df(DF_BYTE, pwd->b[4],  pws->b[4],  pwt->b[4]);
    337     pwd->b[5]  = msa_binsl_df(DF_BYTE, pwd->b[5],  pws->b[5],  pwt->b[5]);
    338     pwd->b[6]  = msa_binsl_df(DF_BYTE, pwd->b[6],  pws->b[6],  pwt->b[6]);
    339     pwd->b[7]  = msa_binsl_df(DF_BYTE, pwd->b[7],  pws->b[7],  pwt->b[7]);
    340     pwd->b[8]  = msa_binsl_df(DF_BYTE, pwd->b[8],  pws->b[8],  pwt->b[8]);
    341     pwd->b[9]  = msa_binsl_df(DF_BYTE, pwd->b[9],  pws->b[9],  pwt->b[9]);
    342     pwd->b[10] = msa_binsl_df(DF_BYTE, pwd->b[10], pws->b[10], pwt->b[10]);
    343     pwd->b[11] = msa_binsl_df(DF_BYTE, pwd->b[11], pws->b[11], pwt->b[11]);
    344     pwd->b[12] = msa_binsl_df(DF_BYTE, pwd->b[12], pws->b[12], pwt->b[12]);
    345     pwd->b[13] = msa_binsl_df(DF_BYTE, pwd->b[13], pws->b[13], pwt->b[13]);
    346     pwd->b[14] = msa_binsl_df(DF_BYTE, pwd->b[14], pws->b[14], pwt->b[14]);
    347     pwd->b[15] = msa_binsl_df(DF_BYTE, pwd->b[15], pws->b[15], pwt->b[15]);
    348 }
    349 
    350 void helper_msa_binsl_h(CPUMIPSState *env,
    351                         uint32_t wd, uint32_t ws, uint32_t wt)
    352 {
    353     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
    354     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
    355     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
    356 
    357     pwd->h[0]  = msa_binsl_df(DF_HALF, pwd->h[0],  pws->h[0],  pwt->h[0]);
    358     pwd->h[1]  = msa_binsl_df(DF_HALF, pwd->h[1],  pws->h[1],  pwt->h[1]);
    359     pwd->h[2]  = msa_binsl_df(DF_HALF, pwd->h[2],  pws->h[2],  pwt->h[2]);
    360     pwd->h[3]  = msa_binsl_df(DF_HALF, pwd->h[3],  pws->h[3],  pwt->h[3]);
    361     pwd->h[4]  = msa_binsl_df(DF_HALF, pwd->h[4],  pws->h[4],  pwt->h[4]);
    362     pwd->h[5]  = msa_binsl_df(DF_HALF, pwd->h[5],  pws->h[5],  pwt->h[5]);
    363     pwd->h[6]  = msa_binsl_df(DF_HALF, pwd->h[6],  pws->h[6],  pwt->h[6]);
    364     pwd->h[7]  = msa_binsl_df(DF_HALF, pwd->h[7],  pws->h[7],  pwt->h[7]);
    365 }
    366 
    367 void helper_msa_binsl_w(CPUMIPSState *env,
    368                         uint32_t wd, uint32_t ws, uint32_t wt)
    369 {
    370     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
    371     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
    372     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
    373 
    374     pwd->w[0]  = msa_binsl_df(DF_WORD, pwd->w[0],  pws->w[0],  pwt->w[0]);
    375     pwd->w[1]  = msa_binsl_df(DF_WORD, pwd->w[1],  pws->w[1],  pwt->w[1]);
    376     pwd->w[2]  = msa_binsl_df(DF_WORD, pwd->w[2],  pws->w[2],  pwt->w[2]);
    377     pwd->w[3]  = msa_binsl_df(DF_WORD, pwd->w[3],  pws->w[3],  pwt->w[3]);
    378 }
    379 
    380 void helper_msa_binsl_d(CPUMIPSState *env,
    381                         uint32_t wd, uint32_t ws, uint32_t wt)
    382 {
    383     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
    384     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
    385     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
    386 
    387     pwd->d[0]  = msa_binsl_df(DF_DOUBLE, pwd->d[0],  pws->d[0],  pwt->d[0]);
    388     pwd->d[1]  = msa_binsl_df(DF_DOUBLE, pwd->d[1],  pws->d[1],  pwt->d[1]);
    389 }
    390 
    391 static inline int64_t msa_binsr_df(uint32_t df,
    392                                    int64_t dest, int64_t arg1, int64_t arg2)
    393 {
    394     uint64_t u_arg1 = UNSIGNED(arg1, df);
    395     uint64_t u_dest = UNSIGNED(dest, df);
    396     int32_t sh_d = BIT_POSITION(arg2, df) + 1;
    397     int32_t sh_a = DF_BITS(df) - sh_d;
    398     if (sh_d == DF_BITS(df)) {
    399         return u_arg1;
    400     } else {
    401         return UNSIGNED(UNSIGNED(u_dest >> sh_d, df) << sh_d, df) |
    402                UNSIGNED(UNSIGNED(u_arg1 << sh_a, df) >> sh_a, df);
    403     }
    404 }
    405 
    406 void helper_msa_binsr_b(CPUMIPSState *env,
    407                         uint32_t wd, uint32_t ws, uint32_t wt)
    408 {
    409     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
    410     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
    411     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
    412 
    413     pwd->b[0]  = msa_binsr_df(DF_BYTE, pwd->b[0],  pws->b[0],  pwt->b[0]);
    414     pwd->b[1]  = msa_binsr_df(DF_BYTE, pwd->b[1],  pws->b[1],  pwt->b[1]);
    415     pwd->b[2]  = msa_binsr_df(DF_BYTE, pwd->b[2],  pws->b[2],  pwt->b[2]);
    416     pwd->b[3]  = msa_binsr_df(DF_BYTE, pwd->b[3],  pws->b[3],  pwt->b[3]);
    417     pwd->b[4]  = msa_binsr_df(DF_BYTE, pwd->b[4],  pws->b[4],  pwt->b[4]);
    418     pwd->b[5]  = msa_binsr_df(DF_BYTE, pwd->b[5],  pws->b[5],  pwt->b[5]);
    419     pwd->b[6]  = msa_binsr_df(DF_BYTE, pwd->b[6],  pws->b[6],  pwt->b[6]);
    420     pwd->b[7]  = msa_binsr_df(DF_BYTE, pwd->b[7],  pws->b[7],  pwt->b[7]);
    421     pwd->b[8]  = msa_binsr_df(DF_BYTE, pwd->b[8],  pws->b[8],  pwt->b[8]);
    422     pwd->b[9]  = msa_binsr_df(DF_BYTE, pwd->b[9],  pws->b[9],  pwt->b[9]);
    423     pwd->b[10] = msa_binsr_df(DF_BYTE, pwd->b[10], pws->b[10], pwt->b[10]);
    424     pwd->b[11] = msa_binsr_df(DF_BYTE, pwd->b[11], pws->b[11], pwt->b[11]);
    425     pwd->b[12] = msa_binsr_df(DF_BYTE, pwd->b[12], pws->b[12], pwt->b[12]);
    426     pwd->b[13] = msa_binsr_df(DF_BYTE, pwd->b[13], pws->b[13], pwt->b[13]);
    427     pwd->b[14] = msa_binsr_df(DF_BYTE, pwd->b[14], pws->b[14], pwt->b[14]);
    428     pwd->b[15] = msa_binsr_df(DF_BYTE, pwd->b[15], pws->b[15], pwt->b[15]);
    429 }
    430 
    431 void helper_msa_binsr_h(CPUMIPSState *env,
    432                         uint32_t wd, uint32_t ws, uint32_t wt)
    433 {
    434     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
    435     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
    436     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
    437 
    438     pwd->h[0]  = msa_binsr_df(DF_HALF, pwd->h[0],  pws->h[0],  pwt->h[0]);
    439     pwd->h[1]  = msa_binsr_df(DF_HALF, pwd->h[1],  pws->h[1],  pwt->h[1]);
    440     pwd->h[2]  = msa_binsr_df(DF_HALF, pwd->h[2],  pws->h[2],  pwt->h[2]);
    441     pwd->h[3]  = msa_binsr_df(DF_HALF, pwd->h[3],  pws->h[3],  pwt->h[3]);
    442     pwd->h[4]  = msa_binsr_df(DF_HALF, pwd->h[4],  pws->h[4],  pwt->h[4]);
    443     pwd->h[5]  = msa_binsr_df(DF_HALF, pwd->h[5],  pws->h[5],  pwt->h[5]);
    444     pwd->h[6]  = msa_binsr_df(DF_HALF, pwd->h[6],  pws->h[6],  pwt->h[6]);
    445     pwd->h[7]  = msa_binsr_df(DF_HALF, pwd->h[7],  pws->h[7],  pwt->h[7]);
    446 }
    447 
    448 void helper_msa_binsr_w(CPUMIPSState *env,
    449                         uint32_t wd, uint32_t ws, uint32_t wt)
    450 {
    451     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
    452     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
    453     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
    454 
    455     pwd->w[0]  = msa_binsr_df(DF_WORD, pwd->w[0],  pws->w[0],  pwt->w[0]);
    456     pwd->w[1]  = msa_binsr_df(DF_WORD, pwd->w[1],  pws->w[1],  pwt->w[1]);
    457     pwd->w[2]  = msa_binsr_df(DF_WORD, pwd->w[2],  pws->w[2],  pwt->w[2]);
    458     pwd->w[3]  = msa_binsr_df(DF_WORD, pwd->w[3],  pws->w[3],  pwt->w[3]);
    459 }
    460 
    461 void helper_msa_binsr_d(CPUMIPSState *env,
    462                         uint32_t wd, uint32_t ws, uint32_t wt)
    463 {
    464     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
    465     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
    466     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
    467 
    468     pwd->d[0]  = msa_binsr_df(DF_DOUBLE, pwd->d[0],  pws->d[0],  pwt->d[0]);
    469     pwd->d[1]  = msa_binsr_df(DF_DOUBLE, pwd->d[1],  pws->d[1],  pwt->d[1]);
    470 }
    471 
    472 void helper_msa_bmnz_v(CPUMIPSState *env, uint32_t wd, uint32_t ws, uint32_t wt)
    473 {
    474     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
    475     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
    476     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
    477 
    478     pwd->d[0] = UNSIGNED(                                                     \
    479         ((pwd->d[0] & (~pwt->d[0])) | (pws->d[0] & pwt->d[0])), DF_DOUBLE);
    480     pwd->d[1] = UNSIGNED(                                                     \
    481         ((pwd->d[1] & (~pwt->d[1])) | (pws->d[1] & pwt->d[1])), DF_DOUBLE);
    482 }
    483 
    484 void helper_msa_bmz_v(CPUMIPSState *env, uint32_t wd, uint32_t ws, uint32_t wt)
    485 {
    486     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
    487     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
    488     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
    489 
    490     pwd->d[0] = UNSIGNED(                                                     \
    491         ((pwd->d[0] & pwt->d[0]) | (pws->d[0] & (~pwt->d[0]))), DF_DOUBLE);
    492     pwd->d[1] = UNSIGNED(                                                     \
    493         ((pwd->d[1] & pwt->d[1]) | (pws->d[1] & (~pwt->d[1]))), DF_DOUBLE);
    494 }
    495 
    496 void helper_msa_bsel_v(CPUMIPSState *env, uint32_t wd, uint32_t ws, uint32_t wt)
    497 {
    498     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
    499     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
    500     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
    501 
    502     pwd->d[0] = UNSIGNED(                                                     \
    503         (pws->d[0] & (~pwd->d[0])) | (pwt->d[0] & pwd->d[0]), DF_DOUBLE);
    504     pwd->d[1] = UNSIGNED(                                                     \
    505         (pws->d[1] & (~pwd->d[1])) | (pwt->d[1] & pwd->d[1]), DF_DOUBLE);
    506 }
    507 
    508 
    509 /*
    510  * Bit Set
    511  * -------
    512  *
    513  * +---------------+----------------------------------------------------------+
    514  * | BCLR.B        | Vector Bit Clear (byte)                                  |
    515  * | BCLR.H        | Vector Bit Clear (halfword)                              |
    516  * | BCLR.W        | Vector Bit Clear (word)                                  |
    517  * | BCLR.D        | Vector Bit Clear (doubleword)                            |
    518  * | BNEG.B        | Vector Bit Negate (byte)                                 |
    519  * | BNEG.H        | Vector Bit Negate (halfword)                             |
    520  * | BNEG.W        | Vector Bit Negate (word)                                 |
    521  * | BNEG.D        | Vector Bit Negate (doubleword)                           |
    522  * | BSET.B        | Vector Bit Set (byte)                                    |
    523  * | BSET.H        | Vector Bit Set (halfword)                                |
    524  * | BSET.W        | Vector Bit Set (word)                                    |
    525  * | BSET.D        | Vector Bit Set (doubleword)                              |
    526  * +---------------+----------------------------------------------------------+
    527  */
    528 
    529 static inline int64_t msa_bclr_df(uint32_t df, int64_t arg1, int64_t arg2)
    530 {
    531     int32_t b_arg2 = BIT_POSITION(arg2, df);
    532     return UNSIGNED(arg1 & (~(1LL << b_arg2)), df);
    533 }
    534 
    535 void helper_msa_bclr_b(CPUMIPSState *env, uint32_t wd, uint32_t ws, uint32_t wt)
    536 {
    537     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
    538     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
    539     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
    540 
    541     pwd->b[0]  = msa_bclr_df(DF_BYTE, pws->b[0],  pwt->b[0]);
    542     pwd->b[1]  = msa_bclr_df(DF_BYTE, pws->b[1],  pwt->b[1]);
    543     pwd->b[2]  = msa_bclr_df(DF_BYTE, pws->b[2],  pwt->b[2]);
    544     pwd->b[3]  = msa_bclr_df(DF_BYTE, pws->b[3],  pwt->b[3]);
    545     pwd->b[4]  = msa_bclr_df(DF_BYTE, pws->b[4],  pwt->b[4]);
    546     pwd->b[5]  = msa_bclr_df(DF_BYTE, pws->b[5],  pwt->b[5]);
    547     pwd->b[6]  = msa_bclr_df(DF_BYTE, pws->b[6],  pwt->b[6]);
    548     pwd->b[7]  = msa_bclr_df(DF_BYTE, pws->b[7],  pwt->b[7]);
    549     pwd->b[8]  = msa_bclr_df(DF_BYTE, pws->b[8],  pwt->b[8]);
    550     pwd->b[9]  = msa_bclr_df(DF_BYTE, pws->b[9],  pwt->b[9]);
    551     pwd->b[10] = msa_bclr_df(DF_BYTE, pws->b[10], pwt->b[10]);
    552     pwd->b[11] = msa_bclr_df(DF_BYTE, pws->b[11], pwt->b[11]);
    553     pwd->b[12] = msa_bclr_df(DF_BYTE, pws->b[12], pwt->b[12]);
    554     pwd->b[13] = msa_bclr_df(DF_BYTE, pws->b[13], pwt->b[13]);
    555     pwd->b[14] = msa_bclr_df(DF_BYTE, pws->b[14], pwt->b[14]);
    556     pwd->b[15] = msa_bclr_df(DF_BYTE, pws->b[15], pwt->b[15]);
    557 }
    558 
    559 void helper_msa_bclr_h(CPUMIPSState *env, uint32_t wd, uint32_t ws, uint32_t wt)
    560 {
    561     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
    562     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
    563     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
    564 
    565     pwd->h[0]  = msa_bclr_df(DF_HALF, pws->h[0],  pwt->h[0]);
    566     pwd->h[1]  = msa_bclr_df(DF_HALF, pws->h[1],  pwt->h[1]);
    567     pwd->h[2]  = msa_bclr_df(DF_HALF, pws->h[2],  pwt->h[2]);
    568     pwd->h[3]  = msa_bclr_df(DF_HALF, pws->h[3],  pwt->h[3]);
    569     pwd->h[4]  = msa_bclr_df(DF_HALF, pws->h[4],  pwt->h[4]);
    570     pwd->h[5]  = msa_bclr_df(DF_HALF, pws->h[5],  pwt->h[5]);
    571     pwd->h[6]  = msa_bclr_df(DF_HALF, pws->h[6],  pwt->h[6]);
    572     pwd->h[7]  = msa_bclr_df(DF_HALF, pws->h[7],  pwt->h[7]);
    573 }
    574 
    575 void helper_msa_bclr_w(CPUMIPSState *env, uint32_t wd, uint32_t ws, uint32_t wt)
    576 {
    577     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
    578     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
    579     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
    580 
    581     pwd->w[0]  = msa_bclr_df(DF_WORD, pws->w[0],  pwt->w[0]);
    582     pwd->w[1]  = msa_bclr_df(DF_WORD, pws->w[1],  pwt->w[1]);
    583     pwd->w[2]  = msa_bclr_df(DF_WORD, pws->w[2],  pwt->w[2]);
    584     pwd->w[3]  = msa_bclr_df(DF_WORD, pws->w[3],  pwt->w[3]);
    585 }
    586 
    587 void helper_msa_bclr_d(CPUMIPSState *env, uint32_t wd, uint32_t ws, uint32_t wt)
    588 {
    589     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
    590     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
    591     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
    592 
    593     pwd->d[0]  = msa_bclr_df(DF_DOUBLE, pws->d[0],  pwt->d[0]);
    594     pwd->d[1]  = msa_bclr_df(DF_DOUBLE, pws->d[1],  pwt->d[1]);
    595 }
    596 
    597 static inline int64_t msa_bneg_df(uint32_t df, int64_t arg1, int64_t arg2)
    598 {
    599     int32_t b_arg2 = BIT_POSITION(arg2, df);
    600     return UNSIGNED(arg1 ^ (1LL << b_arg2), df);
    601 }
    602 
    603 void helper_msa_bneg_b(CPUMIPSState *env, uint32_t wd, uint32_t ws, uint32_t wt)
    604 {
    605     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
    606     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
    607     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
    608 
    609     pwd->b[0]  = msa_bneg_df(DF_BYTE, pws->b[0],  pwt->b[0]);
    610     pwd->b[1]  = msa_bneg_df(DF_BYTE, pws->b[1],  pwt->b[1]);
    611     pwd->b[2]  = msa_bneg_df(DF_BYTE, pws->b[2],  pwt->b[2]);
    612     pwd->b[3]  = msa_bneg_df(DF_BYTE, pws->b[3],  pwt->b[3]);
    613     pwd->b[4]  = msa_bneg_df(DF_BYTE, pws->b[4],  pwt->b[4]);
    614     pwd->b[5]  = msa_bneg_df(DF_BYTE, pws->b[5],  pwt->b[5]);
    615     pwd->b[6]  = msa_bneg_df(DF_BYTE, pws->b[6],  pwt->b[6]);
    616     pwd->b[7]  = msa_bneg_df(DF_BYTE, pws->b[7],  pwt->b[7]);
    617     pwd->b[8]  = msa_bneg_df(DF_BYTE, pws->b[8],  pwt->b[8]);
    618     pwd->b[9]  = msa_bneg_df(DF_BYTE, pws->b[9],  pwt->b[9]);
    619     pwd->b[10] = msa_bneg_df(DF_BYTE, pws->b[10], pwt->b[10]);
    620     pwd->b[11] = msa_bneg_df(DF_BYTE, pws->b[11], pwt->b[11]);
    621     pwd->b[12] = msa_bneg_df(DF_BYTE, pws->b[12], pwt->b[12]);
    622     pwd->b[13] = msa_bneg_df(DF_BYTE, pws->b[13], pwt->b[13]);
    623     pwd->b[14] = msa_bneg_df(DF_BYTE, pws->b[14], pwt->b[14]);
    624     pwd->b[15] = msa_bneg_df(DF_BYTE, pws->b[15], pwt->b[15]);
    625 }
    626 
    627 void helper_msa_bneg_h(CPUMIPSState *env, uint32_t wd, uint32_t ws, uint32_t wt)
    628 {
    629     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
    630     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
    631     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
    632 
    633     pwd->h[0]  = msa_bneg_df(DF_HALF, pws->h[0],  pwt->h[0]);
    634     pwd->h[1]  = msa_bneg_df(DF_HALF, pws->h[1],  pwt->h[1]);
    635     pwd->h[2]  = msa_bneg_df(DF_HALF, pws->h[2],  pwt->h[2]);
    636     pwd->h[3]  = msa_bneg_df(DF_HALF, pws->h[3],  pwt->h[3]);
    637     pwd->h[4]  = msa_bneg_df(DF_HALF, pws->h[4],  pwt->h[4]);
    638     pwd->h[5]  = msa_bneg_df(DF_HALF, pws->h[5],  pwt->h[5]);
    639     pwd->h[6]  = msa_bneg_df(DF_HALF, pws->h[6],  pwt->h[6]);
    640     pwd->h[7]  = msa_bneg_df(DF_HALF, pws->h[7],  pwt->h[7]);
    641 }
    642 
    643 void helper_msa_bneg_w(CPUMIPSState *env, uint32_t wd, uint32_t ws, uint32_t wt)
    644 {
    645     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
    646     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
    647     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
    648 
    649     pwd->w[0]  = msa_bneg_df(DF_WORD, pws->w[0],  pwt->w[0]);
    650     pwd->w[1]  = msa_bneg_df(DF_WORD, pws->w[1],  pwt->w[1]);
    651     pwd->w[2]  = msa_bneg_df(DF_WORD, pws->w[2],  pwt->w[2]);
    652     pwd->w[3]  = msa_bneg_df(DF_WORD, pws->w[3],  pwt->w[3]);
    653 }
    654 
    655 void helper_msa_bneg_d(CPUMIPSState *env, uint32_t wd, uint32_t ws, uint32_t wt)
    656 {
    657     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
    658     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
    659     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
    660 
    661     pwd->d[0]  = msa_bneg_df(DF_DOUBLE, pws->d[0],  pwt->d[0]);
    662     pwd->d[1]  = msa_bneg_df(DF_DOUBLE, pws->d[1],  pwt->d[1]);
    663 }
    664 
    665 static inline int64_t msa_bset_df(uint32_t df, int64_t arg1,
    666         int64_t arg2)
    667 {
    668     int32_t b_arg2 = BIT_POSITION(arg2, df);
    669     return UNSIGNED(arg1 | (1LL << b_arg2), df);
    670 }
    671 
    672 void helper_msa_bset_b(CPUMIPSState *env, uint32_t wd, uint32_t ws, uint32_t wt)
    673 {
    674     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
    675     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
    676     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
    677 
    678     pwd->b[0]  = msa_bset_df(DF_BYTE, pws->b[0],  pwt->b[0]);
    679     pwd->b[1]  = msa_bset_df(DF_BYTE, pws->b[1],  pwt->b[1]);
    680     pwd->b[2]  = msa_bset_df(DF_BYTE, pws->b[2],  pwt->b[2]);
    681     pwd->b[3]  = msa_bset_df(DF_BYTE, pws->b[3],  pwt->b[3]);
    682     pwd->b[4]  = msa_bset_df(DF_BYTE, pws->b[4],  pwt->b[4]);
    683     pwd->b[5]  = msa_bset_df(DF_BYTE, pws->b[5],  pwt->b[5]);
    684     pwd->b[6]  = msa_bset_df(DF_BYTE, pws->b[6],  pwt->b[6]);
    685     pwd->b[7]  = msa_bset_df(DF_BYTE, pws->b[7],  pwt->b[7]);
    686     pwd->b[8]  = msa_bset_df(DF_BYTE, pws->b[8],  pwt->b[8]);
    687     pwd->b[9]  = msa_bset_df(DF_BYTE, pws->b[9],  pwt->b[9]);
    688     pwd->b[10] = msa_bset_df(DF_BYTE, pws->b[10], pwt->b[10]);
    689     pwd->b[11] = msa_bset_df(DF_BYTE, pws->b[11], pwt->b[11]);
    690     pwd->b[12] = msa_bset_df(DF_BYTE, pws->b[12], pwt->b[12]);
    691     pwd->b[13] = msa_bset_df(DF_BYTE, pws->b[13], pwt->b[13]);
    692     pwd->b[14] = msa_bset_df(DF_BYTE, pws->b[14], pwt->b[14]);
    693     pwd->b[15] = msa_bset_df(DF_BYTE, pws->b[15], pwt->b[15]);
    694 }
    695 
    696 void helper_msa_bset_h(CPUMIPSState *env, uint32_t wd, uint32_t ws, uint32_t wt)
    697 {
    698     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
    699     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
    700     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
    701 
    702     pwd->h[0]  = msa_bset_df(DF_HALF, pws->h[0],  pwt->h[0]);
    703     pwd->h[1]  = msa_bset_df(DF_HALF, pws->h[1],  pwt->h[1]);
    704     pwd->h[2]  = msa_bset_df(DF_HALF, pws->h[2],  pwt->h[2]);
    705     pwd->h[3]  = msa_bset_df(DF_HALF, pws->h[3],  pwt->h[3]);
    706     pwd->h[4]  = msa_bset_df(DF_HALF, pws->h[4],  pwt->h[4]);
    707     pwd->h[5]  = msa_bset_df(DF_HALF, pws->h[5],  pwt->h[5]);
    708     pwd->h[6]  = msa_bset_df(DF_HALF, pws->h[6],  pwt->h[6]);
    709     pwd->h[7]  = msa_bset_df(DF_HALF, pws->h[7],  pwt->h[7]);
    710 }
    711 
    712 void helper_msa_bset_w(CPUMIPSState *env, uint32_t wd, uint32_t ws, uint32_t wt)
    713 {
    714     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
    715     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
    716     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
    717 
    718     pwd->w[0]  = msa_bset_df(DF_WORD, pws->w[0],  pwt->w[0]);
    719     pwd->w[1]  = msa_bset_df(DF_WORD, pws->w[1],  pwt->w[1]);
    720     pwd->w[2]  = msa_bset_df(DF_WORD, pws->w[2],  pwt->w[2]);
    721     pwd->w[3]  = msa_bset_df(DF_WORD, pws->w[3],  pwt->w[3]);
    722 }
    723 
    724 void helper_msa_bset_d(CPUMIPSState *env, uint32_t wd, uint32_t ws, uint32_t wt)
    725 {
    726     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
    727     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
    728     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
    729 
    730     pwd->d[0]  = msa_bset_df(DF_DOUBLE, pws->d[0],  pwt->d[0]);
    731     pwd->d[1]  = msa_bset_df(DF_DOUBLE, pws->d[1],  pwt->d[1]);
    732 }
    733 
    734 
    735 /*
    736  * Fixed Multiply
    737  * --------------
    738  *
    739  * +---------------+----------------------------------------------------------+
    740  * | MADD_Q.H      | Vector Fixed-Point Multiply and Add (halfword)           |
    741  * | MADD_Q.W      | Vector Fixed-Point Multiply and Add (word)               |
    742  * | MADDR_Q.H     | Vector Fixed-Point Multiply and Add Rounded (halfword)   |
    743  * | MADDR_Q.W     | Vector Fixed-Point Multiply and Add Rounded (word)       |
    744  * | MSUB_Q.H      | Vector Fixed-Point Multiply and Subtr. (halfword)        |
    745  * | MSUB_Q.W      | Vector Fixed-Point Multiply and Subtr. (word)            |
    746  * | MSUBR_Q.H     | Vector Fixed-Point Multiply and Subtr. Rounded (halfword)|
    747  * | MSUBR_Q.W     | Vector Fixed-Point Multiply and Subtr. Rounded (word)    |
    748  * | MUL_Q.H       | Vector Fixed-Point Multiply (halfword)                   |
    749  * | MUL_Q.W       | Vector Fixed-Point Multiply (word)                       |
    750  * | MULR_Q.H      | Vector Fixed-Point Multiply Rounded (halfword)           |
    751  * | MULR_Q.W      | Vector Fixed-Point Multiply Rounded (word)               |
    752  * +---------------+----------------------------------------------------------+
    753  */
    754 
    755 /* TODO: insert Fixed Multiply group helpers here */
    756 
    757 
    758 /*
    759  * Float Max Min
    760  * -------------
    761  *
    762  * +---------------+----------------------------------------------------------+
    763  * | FMAX_A.W      | Vector Floating-Point Maximum (Absolute) (word)          |
    764  * | FMAX_A.D      | Vector Floating-Point Maximum (Absolute) (doubleword)    |
    765  * | FMAX.W        | Vector Floating-Point Maximum (word)                     |
    766  * | FMAX.D        | Vector Floating-Point Maximum (doubleword)               |
    767  * | FMIN_A.W      | Vector Floating-Point Minimum (Absolute) (word)          |
    768  * | FMIN_A.D      | Vector Floating-Point Minimum (Absolute) (doubleword)    |
    769  * | FMIN.W        | Vector Floating-Point Minimum (word)                     |
    770  * | FMIN.D        | Vector Floating-Point Minimum (doubleword)               |
    771  * +---------------+----------------------------------------------------------+
    772  */
    773 
    774 /* TODO: insert Float Max Min group helpers here */
    775 
    776 
    777 /*
    778  * Int Add
    779  * -------
    780  *
    781  * +---------------+----------------------------------------------------------+
    782  * | ADD_A.B       | Vector Add Absolute Values (byte)                        |
    783  * | ADD_A.H       | Vector Add Absolute Values (halfword)                    |
    784  * | ADD_A.W       | Vector Add Absolute Values (word)                        |
    785  * | ADD_A.D       | Vector Add Absolute Values (doubleword)                  |
    786  * | ADDS_A.B      | Vector Signed Saturated Add (of Absolute) (byte)         |
    787  * | ADDS_A.H      | Vector Signed Saturated Add (of Absolute) (halfword)     |
    788  * | ADDS_A.W      | Vector Signed Saturated Add (of Absolute) (word)         |
    789  * | ADDS_A.D      | Vector Signed Saturated Add (of Absolute) (doubleword)   |
    790  * | ADDS_S.B      | Vector Signed Saturated Add (of Signed) (byte)           |
    791  * | ADDS_S.H      | Vector Signed Saturated Add (of Signed) (halfword)       |
    792  * | ADDS_S.W      | Vector Signed Saturated Add (of Signed) (word)           |
    793  * | ADDS_S.D      | Vector Signed Saturated Add (of Signed) (doubleword)     |
    794  * | ADDS_U.B      | Vector Unsigned Saturated Add (of Unsigned) (byte)       |
    795  * | ADDS_U.H      | Vector Unsigned Saturated Add (of Unsigned) (halfword)   |
    796  * | ADDS_U.W      | Vector Unsigned Saturated Add (of Unsigned) (word)       |
    797  * | ADDS_U.D      | Vector Unsigned Saturated Add (of Unsigned) (doubleword) |
    798  * | ADDV.B        | Vector Add (byte)                                        |
    799  * | ADDV.H        | Vector Add (halfword)                                    |
    800  * | ADDV.W        | Vector Add (word)                                        |
    801  * | ADDV.D        | Vector Add (doubleword)                                  |
    802  * | HADD_S.H      | Vector Signed Horizontal Add (halfword)                  |
    803  * | HADD_S.W      | Vector Signed Horizontal Add (word)                      |
    804  * | HADD_S.D      | Vector Signed Horizontal Add (doubleword)                |
    805  * | HADD_U.H      | Vector Unigned Horizontal Add (halfword)                 |
    806  * | HADD_U.W      | Vector Unigned Horizontal Add (word)                     |
    807  * | HADD_U.D      | Vector Unigned Horizontal Add (doubleword)               |
    808  * +---------------+----------------------------------------------------------+
    809  */
    810 
    811 
    812 static inline int64_t msa_add_a_df(uint32_t df, int64_t arg1, int64_t arg2)
    813 {
    814     uint64_t abs_arg1 = arg1 >= 0 ? arg1 : -arg1;
    815     uint64_t abs_arg2 = arg2 >= 0 ? arg2 : -arg2;
    816     return abs_arg1 + abs_arg2;
    817 }
    818 
    819 void helper_msa_add_a_b(CPUMIPSState *env,
    820                         uint32_t wd, uint32_t ws, uint32_t wt)
    821 {
    822     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
    823     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
    824     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
    825 
    826     pwd->b[0]  = msa_add_a_df(DF_BYTE, pws->b[0],  pwt->b[0]);
    827     pwd->b[1]  = msa_add_a_df(DF_BYTE, pws->b[1],  pwt->b[1]);
    828     pwd->b[2]  = msa_add_a_df(DF_BYTE, pws->b[2],  pwt->b[2]);
    829     pwd->b[3]  = msa_add_a_df(DF_BYTE, pws->b[3],  pwt->b[3]);
    830     pwd->b[4]  = msa_add_a_df(DF_BYTE, pws->b[4],  pwt->b[4]);
    831     pwd->b[5]  = msa_add_a_df(DF_BYTE, pws->b[5],  pwt->b[5]);
    832     pwd->b[6]  = msa_add_a_df(DF_BYTE, pws->b[6],  pwt->b[6]);
    833     pwd->b[7]  = msa_add_a_df(DF_BYTE, pws->b[7],  pwt->b[7]);
    834     pwd->b[8]  = msa_add_a_df(DF_BYTE, pws->b[8],  pwt->b[8]);
    835     pwd->b[9]  = msa_add_a_df(DF_BYTE, pws->b[9],  pwt->b[9]);
    836     pwd->b[10] = msa_add_a_df(DF_BYTE, pws->b[10], pwt->b[10]);
    837     pwd->b[11] = msa_add_a_df(DF_BYTE, pws->b[11], pwt->b[11]);
    838     pwd->b[12] = msa_add_a_df(DF_BYTE, pws->b[12], pwt->b[12]);
    839     pwd->b[13] = msa_add_a_df(DF_BYTE, pws->b[13], pwt->b[13]);
    840     pwd->b[14] = msa_add_a_df(DF_BYTE, pws->b[14], pwt->b[14]);
    841     pwd->b[15] = msa_add_a_df(DF_BYTE, pws->b[15], pwt->b[15]);
    842 }
    843 
    844 void helper_msa_add_a_h(CPUMIPSState *env,
    845                         uint32_t wd, uint32_t ws, uint32_t wt)
    846 {
    847     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
    848     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
    849     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
    850 
    851     pwd->h[0]  = msa_add_a_df(DF_HALF, pws->h[0],  pwt->h[0]);
    852     pwd->h[1]  = msa_add_a_df(DF_HALF, pws->h[1],  pwt->h[1]);
    853     pwd->h[2]  = msa_add_a_df(DF_HALF, pws->h[2],  pwt->h[2]);
    854     pwd->h[3]  = msa_add_a_df(DF_HALF, pws->h[3],  pwt->h[3]);
    855     pwd->h[4]  = msa_add_a_df(DF_HALF, pws->h[4],  pwt->h[4]);
    856     pwd->h[5]  = msa_add_a_df(DF_HALF, pws->h[5],  pwt->h[5]);
    857     pwd->h[6]  = msa_add_a_df(DF_HALF, pws->h[6],  pwt->h[6]);
    858     pwd->h[7]  = msa_add_a_df(DF_HALF, pws->h[7],  pwt->h[7]);
    859 }
    860 
    861 void helper_msa_add_a_w(CPUMIPSState *env,
    862                         uint32_t wd, uint32_t ws, uint32_t wt)
    863 {
    864     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
    865     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
    866     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
    867 
    868     pwd->w[0]  = msa_add_a_df(DF_WORD, pws->w[0],  pwt->w[0]);
    869     pwd->w[1]  = msa_add_a_df(DF_WORD, pws->w[1],  pwt->w[1]);
    870     pwd->w[2]  = msa_add_a_df(DF_WORD, pws->w[2],  pwt->w[2]);
    871     pwd->w[3]  = msa_add_a_df(DF_WORD, pws->w[3],  pwt->w[3]);
    872 }
    873 
    874 void helper_msa_add_a_d(CPUMIPSState *env,
    875                         uint32_t wd, uint32_t ws, uint32_t wt)
    876 {
    877     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
    878     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
    879     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
    880 
    881     pwd->d[0]  = msa_add_a_df(DF_DOUBLE, pws->d[0],  pwt->d[0]);
    882     pwd->d[1]  = msa_add_a_df(DF_DOUBLE, pws->d[1],  pwt->d[1]);
    883 }
    884 
    885 
    886 static inline int64_t msa_adds_a_df(uint32_t df, int64_t arg1, int64_t arg2)
    887 {
    888     uint64_t max_int = (uint64_t)DF_MAX_INT(df);
    889     uint64_t abs_arg1 = arg1 >= 0 ? arg1 : -arg1;
    890     uint64_t abs_arg2 = arg2 >= 0 ? arg2 : -arg2;
    891     if (abs_arg1 > max_int || abs_arg2 > max_int) {
    892         return (int64_t)max_int;
    893     } else {
    894         return (abs_arg1 < max_int - abs_arg2) ? abs_arg1 + abs_arg2 : max_int;
    895     }
    896 }
    897 
    898 void helper_msa_adds_a_b(CPUMIPSState *env,
    899                          uint32_t wd, uint32_t ws, uint32_t wt)
    900 {
    901     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
    902     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
    903     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
    904 
    905     pwd->b[0]  = msa_adds_a_df(DF_BYTE, pws->b[0],  pwt->b[0]);
    906     pwd->b[1]  = msa_adds_a_df(DF_BYTE, pws->b[1],  pwt->b[1]);
    907     pwd->b[2]  = msa_adds_a_df(DF_BYTE, pws->b[2],  pwt->b[2]);
    908     pwd->b[3]  = msa_adds_a_df(DF_BYTE, pws->b[3],  pwt->b[3]);
    909     pwd->b[4]  = msa_adds_a_df(DF_BYTE, pws->b[4],  pwt->b[4]);
    910     pwd->b[5]  = msa_adds_a_df(DF_BYTE, pws->b[5],  pwt->b[5]);
    911     pwd->b[6]  = msa_adds_a_df(DF_BYTE, pws->b[6],  pwt->b[6]);
    912     pwd->b[7]  = msa_adds_a_df(DF_BYTE, pws->b[7],  pwt->b[7]);
    913     pwd->b[8]  = msa_adds_a_df(DF_BYTE, pws->b[8],  pwt->b[8]);
    914     pwd->b[9]  = msa_adds_a_df(DF_BYTE, pws->b[9],  pwt->b[9]);
    915     pwd->b[10] = msa_adds_a_df(DF_BYTE, pws->b[10], pwt->b[10]);
    916     pwd->b[11] = msa_adds_a_df(DF_BYTE, pws->b[11], pwt->b[11]);
    917     pwd->b[12] = msa_adds_a_df(DF_BYTE, pws->b[12], pwt->b[12]);
    918     pwd->b[13] = msa_adds_a_df(DF_BYTE, pws->b[13], pwt->b[13]);
    919     pwd->b[14] = msa_adds_a_df(DF_BYTE, pws->b[14], pwt->b[14]);
    920     pwd->b[15] = msa_adds_a_df(DF_BYTE, pws->b[15], pwt->b[15]);
    921 }
    922 
    923 void helper_msa_adds_a_h(CPUMIPSState *env,
    924                          uint32_t wd, uint32_t ws, uint32_t wt)
    925 {
    926     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
    927     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
    928     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
    929 
    930     pwd->h[0]  = msa_adds_a_df(DF_HALF, pws->h[0],  pwt->h[0]);
    931     pwd->h[1]  = msa_adds_a_df(DF_HALF, pws->h[1],  pwt->h[1]);
    932     pwd->h[2]  = msa_adds_a_df(DF_HALF, pws->h[2],  pwt->h[2]);
    933     pwd->h[3]  = msa_adds_a_df(DF_HALF, pws->h[3],  pwt->h[3]);
    934     pwd->h[4]  = msa_adds_a_df(DF_HALF, pws->h[4],  pwt->h[4]);
    935     pwd->h[5]  = msa_adds_a_df(DF_HALF, pws->h[5],  pwt->h[5]);
    936     pwd->h[6]  = msa_adds_a_df(DF_HALF, pws->h[6],  pwt->h[6]);
    937     pwd->h[7]  = msa_adds_a_df(DF_HALF, pws->h[7],  pwt->h[7]);
    938 }
    939 
    940 void helper_msa_adds_a_w(CPUMIPSState *env,
    941                          uint32_t wd, uint32_t ws, uint32_t wt)
    942 {
    943     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
    944     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
    945     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
    946 
    947     pwd->w[0]  = msa_adds_a_df(DF_WORD, pws->w[0],  pwt->w[0]);
    948     pwd->w[1]  = msa_adds_a_df(DF_WORD, pws->w[1],  pwt->w[1]);
    949     pwd->w[2]  = msa_adds_a_df(DF_WORD, pws->w[2],  pwt->w[2]);
    950     pwd->w[3]  = msa_adds_a_df(DF_WORD, pws->w[3],  pwt->w[3]);
    951 }
    952 
    953 void helper_msa_adds_a_d(CPUMIPSState *env,
    954                          uint32_t wd, uint32_t ws, uint32_t wt)
    955 {
    956     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
    957     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
    958     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
    959 
    960     pwd->d[0]  = msa_adds_a_df(DF_DOUBLE, pws->d[0],  pwt->d[0]);
    961     pwd->d[1]  = msa_adds_a_df(DF_DOUBLE, pws->d[1],  pwt->d[1]);
    962 }
    963 
    964 
    965 static inline int64_t msa_adds_s_df(uint32_t df, int64_t arg1, int64_t arg2)
    966 {
    967     int64_t max_int = DF_MAX_INT(df);
    968     int64_t min_int = DF_MIN_INT(df);
    969     if (arg1 < 0) {
    970         return (min_int - arg1 < arg2) ? arg1 + arg2 : min_int;
    971     } else {
    972         return (arg2 < max_int - arg1) ? arg1 + arg2 : max_int;
    973     }
    974 }
    975 
    976 void helper_msa_adds_s_b(CPUMIPSState *env,
    977                          uint32_t wd, uint32_t ws, uint32_t wt)
    978 {
    979     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
    980     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
    981     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
    982 
    983     pwd->b[0]  = msa_adds_s_df(DF_BYTE, pws->b[0],  pwt->b[0]);
    984     pwd->b[1]  = msa_adds_s_df(DF_BYTE, pws->b[1],  pwt->b[1]);
    985     pwd->b[2]  = msa_adds_s_df(DF_BYTE, pws->b[2],  pwt->b[2]);
    986     pwd->b[3]  = msa_adds_s_df(DF_BYTE, pws->b[3],  pwt->b[3]);
    987     pwd->b[4]  = msa_adds_s_df(DF_BYTE, pws->b[4],  pwt->b[4]);
    988     pwd->b[5]  = msa_adds_s_df(DF_BYTE, pws->b[5],  pwt->b[5]);
    989     pwd->b[6]  = msa_adds_s_df(DF_BYTE, pws->b[6],  pwt->b[6]);
    990     pwd->b[7]  = msa_adds_s_df(DF_BYTE, pws->b[7],  pwt->b[7]);
    991     pwd->b[8]  = msa_adds_s_df(DF_BYTE, pws->b[8],  pwt->b[8]);
    992     pwd->b[9]  = msa_adds_s_df(DF_BYTE, pws->b[9],  pwt->b[9]);
    993     pwd->b[10] = msa_adds_s_df(DF_BYTE, pws->b[10], pwt->b[10]);
    994     pwd->b[11] = msa_adds_s_df(DF_BYTE, pws->b[11], pwt->b[11]);
    995     pwd->b[12] = msa_adds_s_df(DF_BYTE, pws->b[12], pwt->b[12]);
    996     pwd->b[13] = msa_adds_s_df(DF_BYTE, pws->b[13], pwt->b[13]);
    997     pwd->b[14] = msa_adds_s_df(DF_BYTE, pws->b[14], pwt->b[14]);
    998     pwd->b[15] = msa_adds_s_df(DF_BYTE, pws->b[15], pwt->b[15]);
    999 }
   1000 
   1001 void helper_msa_adds_s_h(CPUMIPSState *env,
   1002                          uint32_t wd, uint32_t ws, uint32_t wt)
   1003 {
   1004     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   1005     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   1006     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
   1007 
   1008     pwd->h[0]  = msa_adds_s_df(DF_HALF, pws->h[0],  pwt->h[0]);
   1009     pwd->h[1]  = msa_adds_s_df(DF_HALF, pws->h[1],  pwt->h[1]);
   1010     pwd->h[2]  = msa_adds_s_df(DF_HALF, pws->h[2],  pwt->h[2]);
   1011     pwd->h[3]  = msa_adds_s_df(DF_HALF, pws->h[3],  pwt->h[3]);
   1012     pwd->h[4]  = msa_adds_s_df(DF_HALF, pws->h[4],  pwt->h[4]);
   1013     pwd->h[5]  = msa_adds_s_df(DF_HALF, pws->h[5],  pwt->h[5]);
   1014     pwd->h[6]  = msa_adds_s_df(DF_HALF, pws->h[6],  pwt->h[6]);
   1015     pwd->h[7]  = msa_adds_s_df(DF_HALF, pws->h[7],  pwt->h[7]);
   1016 }
   1017 
   1018 void helper_msa_adds_s_w(CPUMIPSState *env,
   1019                          uint32_t wd, uint32_t ws, uint32_t wt)
   1020 {
   1021     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   1022     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   1023     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
   1024 
   1025     pwd->w[0]  = msa_adds_s_df(DF_WORD, pws->w[0],  pwt->w[0]);
   1026     pwd->w[1]  = msa_adds_s_df(DF_WORD, pws->w[1],  pwt->w[1]);
   1027     pwd->w[2]  = msa_adds_s_df(DF_WORD, pws->w[2],  pwt->w[2]);
   1028     pwd->w[3]  = msa_adds_s_df(DF_WORD, pws->w[3],  pwt->w[3]);
   1029 }
   1030 
   1031 void helper_msa_adds_s_d(CPUMIPSState *env,
   1032                          uint32_t wd, uint32_t ws, uint32_t wt)
   1033 {
   1034     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   1035     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   1036     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
   1037 
   1038     pwd->d[0]  = msa_adds_s_df(DF_DOUBLE, pws->d[0],  pwt->d[0]);
   1039     pwd->d[1]  = msa_adds_s_df(DF_DOUBLE, pws->d[1],  pwt->d[1]);
   1040 }
   1041 
   1042 
   1043 static inline uint64_t msa_adds_u_df(uint32_t df, uint64_t arg1, uint64_t arg2)
   1044 {
   1045     uint64_t max_uint = DF_MAX_UINT(df);
   1046     uint64_t u_arg1 = UNSIGNED(arg1, df);
   1047     uint64_t u_arg2 = UNSIGNED(arg2, df);
   1048     return (u_arg1 < max_uint - u_arg2) ? u_arg1 + u_arg2 : max_uint;
   1049 }
   1050 
   1051 void helper_msa_adds_u_b(CPUMIPSState *env,
   1052                          uint32_t wd, uint32_t ws, uint32_t wt)
   1053 {
   1054     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   1055     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   1056     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
   1057 
   1058     pwd->b[0]  = msa_adds_u_df(DF_BYTE, pws->b[0],  pwt->b[0]);
   1059     pwd->b[1]  = msa_adds_u_df(DF_BYTE, pws->b[1],  pwt->b[1]);
   1060     pwd->b[2]  = msa_adds_u_df(DF_BYTE, pws->b[2],  pwt->b[2]);
   1061     pwd->b[3]  = msa_adds_u_df(DF_BYTE, pws->b[3],  pwt->b[3]);
   1062     pwd->b[4]  = msa_adds_u_df(DF_BYTE, pws->b[4],  pwt->b[4]);
   1063     pwd->b[5]  = msa_adds_u_df(DF_BYTE, pws->b[5],  pwt->b[5]);
   1064     pwd->b[6]  = msa_adds_u_df(DF_BYTE, pws->b[6],  pwt->b[6]);
   1065     pwd->b[7]  = msa_adds_u_df(DF_BYTE, pws->b[7],  pwt->b[7]);
   1066     pwd->b[8]  = msa_adds_u_df(DF_BYTE, pws->b[8],  pwt->b[8]);
   1067     pwd->b[9]  = msa_adds_u_df(DF_BYTE, pws->b[9],  pwt->b[9]);
   1068     pwd->b[10] = msa_adds_u_df(DF_BYTE, pws->b[10], pwt->b[10]);
   1069     pwd->b[11] = msa_adds_u_df(DF_BYTE, pws->b[11], pwt->b[11]);
   1070     pwd->b[12] = msa_adds_u_df(DF_BYTE, pws->b[12], pwt->b[12]);
   1071     pwd->b[13] = msa_adds_u_df(DF_BYTE, pws->b[13], pwt->b[13]);
   1072     pwd->b[14] = msa_adds_u_df(DF_BYTE, pws->b[14], pwt->b[14]);
   1073     pwd->b[15] = msa_adds_u_df(DF_BYTE, pws->b[15], pwt->b[15]);
   1074 }
   1075 
   1076 void helper_msa_adds_u_h(CPUMIPSState *env,
   1077                          uint32_t wd, uint32_t ws, uint32_t wt)
   1078 {
   1079     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   1080     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   1081     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
   1082 
   1083     pwd->h[0]  = msa_adds_u_df(DF_HALF, pws->h[0],  pwt->h[0]);
   1084     pwd->h[1]  = msa_adds_u_df(DF_HALF, pws->h[1],  pwt->h[1]);
   1085     pwd->h[2]  = msa_adds_u_df(DF_HALF, pws->h[2],  pwt->h[2]);
   1086     pwd->h[3]  = msa_adds_u_df(DF_HALF, pws->h[3],  pwt->h[3]);
   1087     pwd->h[4]  = msa_adds_u_df(DF_HALF, pws->h[4],  pwt->h[4]);
   1088     pwd->h[5]  = msa_adds_u_df(DF_HALF, pws->h[5],  pwt->h[5]);
   1089     pwd->h[6]  = msa_adds_u_df(DF_HALF, pws->h[6],  pwt->h[6]);
   1090     pwd->h[7]  = msa_adds_u_df(DF_HALF, pws->h[7],  pwt->h[7]);
   1091 }
   1092 
   1093 void helper_msa_adds_u_w(CPUMIPSState *env,
   1094                          uint32_t wd, uint32_t ws, uint32_t wt)
   1095 {
   1096     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   1097     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   1098     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
   1099 
   1100     pwd->w[0]  = msa_adds_u_df(DF_WORD, pws->w[0],  pwt->w[0]);
   1101     pwd->w[1]  = msa_adds_u_df(DF_WORD, pws->w[1],  pwt->w[1]);
   1102     pwd->w[2]  = msa_adds_u_df(DF_WORD, pws->w[2],  pwt->w[2]);
   1103     pwd->w[3]  = msa_adds_u_df(DF_WORD, pws->w[3],  pwt->w[3]);
   1104 }
   1105 
   1106 void helper_msa_adds_u_d(CPUMIPSState *env,
   1107                          uint32_t wd, uint32_t ws, uint32_t wt)
   1108 {
   1109     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   1110     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   1111     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
   1112 
   1113     pwd->d[0]  = msa_adds_u_df(DF_DOUBLE, pws->d[0],  pwt->d[0]);
   1114     pwd->d[1]  = msa_adds_u_df(DF_DOUBLE, pws->d[1],  pwt->d[1]);
   1115 }
   1116 
   1117 
   1118 static inline int64_t msa_addv_df(uint32_t df, int64_t arg1, int64_t arg2)
   1119 {
   1120     return arg1 + arg2;
   1121 }
   1122 
   1123 void helper_msa_addv_b(CPUMIPSState *env,
   1124                        uint32_t wd, uint32_t ws, uint32_t wt)
   1125 {
   1126     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   1127     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   1128     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
   1129 
   1130     pwd->b[0]  = msa_addv_df(DF_BYTE, pws->b[0],  pwt->b[0]);
   1131     pwd->b[1]  = msa_addv_df(DF_BYTE, pws->b[1],  pwt->b[1]);
   1132     pwd->b[2]  = msa_addv_df(DF_BYTE, pws->b[2],  pwt->b[2]);
   1133     pwd->b[3]  = msa_addv_df(DF_BYTE, pws->b[3],  pwt->b[3]);
   1134     pwd->b[4]  = msa_addv_df(DF_BYTE, pws->b[4],  pwt->b[4]);
   1135     pwd->b[5]  = msa_addv_df(DF_BYTE, pws->b[5],  pwt->b[5]);
   1136     pwd->b[6]  = msa_addv_df(DF_BYTE, pws->b[6],  pwt->b[6]);
   1137     pwd->b[7]  = msa_addv_df(DF_BYTE, pws->b[7],  pwt->b[7]);
   1138     pwd->b[8]  = msa_addv_df(DF_BYTE, pws->b[8],  pwt->b[8]);
   1139     pwd->b[9]  = msa_addv_df(DF_BYTE, pws->b[9],  pwt->b[9]);
   1140     pwd->b[10] = msa_addv_df(DF_BYTE, pws->b[10], pwt->b[10]);
   1141     pwd->b[11] = msa_addv_df(DF_BYTE, pws->b[11], pwt->b[11]);
   1142     pwd->b[12] = msa_addv_df(DF_BYTE, pws->b[12], pwt->b[12]);
   1143     pwd->b[13] = msa_addv_df(DF_BYTE, pws->b[13], pwt->b[13]);
   1144     pwd->b[14] = msa_addv_df(DF_BYTE, pws->b[14], pwt->b[14]);
   1145     pwd->b[15] = msa_addv_df(DF_BYTE, pws->b[15], pwt->b[15]);
   1146 }
   1147 
   1148 void helper_msa_addv_h(CPUMIPSState *env,
   1149                        uint32_t wd, uint32_t ws, uint32_t wt)
   1150 {
   1151     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   1152     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   1153     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
   1154 
   1155     pwd->h[0]  = msa_addv_df(DF_HALF, pws->h[0],  pwt->h[0]);
   1156     pwd->h[1]  = msa_addv_df(DF_HALF, pws->h[1],  pwt->h[1]);
   1157     pwd->h[2]  = msa_addv_df(DF_HALF, pws->h[2],  pwt->h[2]);
   1158     pwd->h[3]  = msa_addv_df(DF_HALF, pws->h[3],  pwt->h[3]);
   1159     pwd->h[4]  = msa_addv_df(DF_HALF, pws->h[4],  pwt->h[4]);
   1160     pwd->h[5]  = msa_addv_df(DF_HALF, pws->h[5],  pwt->h[5]);
   1161     pwd->h[6]  = msa_addv_df(DF_HALF, pws->h[6],  pwt->h[6]);
   1162     pwd->h[7]  = msa_addv_df(DF_HALF, pws->h[7],  pwt->h[7]);
   1163 }
   1164 
   1165 void helper_msa_addv_w(CPUMIPSState *env,
   1166                        uint32_t wd, uint32_t ws, uint32_t wt)
   1167 {
   1168     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   1169     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   1170     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
   1171 
   1172     pwd->w[0]  = msa_addv_df(DF_WORD, pws->w[0],  pwt->w[0]);
   1173     pwd->w[1]  = msa_addv_df(DF_WORD, pws->w[1],  pwt->w[1]);
   1174     pwd->w[2]  = msa_addv_df(DF_WORD, pws->w[2],  pwt->w[2]);
   1175     pwd->w[3]  = msa_addv_df(DF_WORD, pws->w[3],  pwt->w[3]);
   1176 }
   1177 
   1178 void helper_msa_addv_d(CPUMIPSState *env,
   1179                        uint32_t wd, uint32_t ws, uint32_t wt)
   1180 {
   1181     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   1182     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   1183     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
   1184 
   1185     pwd->d[0]  = msa_addv_df(DF_DOUBLE, pws->d[0],  pwt->d[0]);
   1186     pwd->d[1]  = msa_addv_df(DF_DOUBLE, pws->d[1],  pwt->d[1]);
   1187 }
   1188 
   1189 
   1190 #define SIGNED_EVEN(a, df) \
   1191         ((((int64_t)(a)) << (64 - DF_BITS(df) / 2)) >> (64 - DF_BITS(df) / 2))
   1192 
   1193 #define UNSIGNED_EVEN(a, df) \
   1194         ((((uint64_t)(a)) << (64 - DF_BITS(df) / 2)) >> (64 - DF_BITS(df) / 2))
   1195 
   1196 #define SIGNED_ODD(a, df) \
   1197         ((((int64_t)(a)) << (64 - DF_BITS(df))) >> (64 - DF_BITS(df) / 2))
   1198 
   1199 #define UNSIGNED_ODD(a, df) \
   1200         ((((uint64_t)(a)) << (64 - DF_BITS(df))) >> (64 - DF_BITS(df) / 2))
   1201 
   1202 
   1203 static inline int64_t msa_hadd_s_df(uint32_t df, int64_t arg1, int64_t arg2)
   1204 {
   1205     return SIGNED_ODD(arg1, df) + SIGNED_EVEN(arg2, df);
   1206 }
   1207 
   1208 void helper_msa_hadd_s_h(CPUMIPSState *env,
   1209                          uint32_t wd, uint32_t ws, uint32_t wt)
   1210 {
   1211     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   1212     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   1213     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
   1214 
   1215     pwd->h[0]  = msa_hadd_s_df(DF_HALF, pws->h[0],  pwt->h[0]);
   1216     pwd->h[1]  = msa_hadd_s_df(DF_HALF, pws->h[1],  pwt->h[1]);
   1217     pwd->h[2]  = msa_hadd_s_df(DF_HALF, pws->h[2],  pwt->h[2]);
   1218     pwd->h[3]  = msa_hadd_s_df(DF_HALF, pws->h[3],  pwt->h[3]);
   1219     pwd->h[4]  = msa_hadd_s_df(DF_HALF, pws->h[4],  pwt->h[4]);
   1220     pwd->h[5]  = msa_hadd_s_df(DF_HALF, pws->h[5],  pwt->h[5]);
   1221     pwd->h[6]  = msa_hadd_s_df(DF_HALF, pws->h[6],  pwt->h[6]);
   1222     pwd->h[7]  = msa_hadd_s_df(DF_HALF, pws->h[7],  pwt->h[7]);
   1223 }
   1224 
   1225 void helper_msa_hadd_s_w(CPUMIPSState *env,
   1226                          uint32_t wd, uint32_t ws, uint32_t wt)
   1227 {
   1228     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   1229     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   1230     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
   1231 
   1232     pwd->w[0]  = msa_hadd_s_df(DF_WORD, pws->w[0],  pwt->w[0]);
   1233     pwd->w[1]  = msa_hadd_s_df(DF_WORD, pws->w[1],  pwt->w[1]);
   1234     pwd->w[2]  = msa_hadd_s_df(DF_WORD, pws->w[2],  pwt->w[2]);
   1235     pwd->w[3]  = msa_hadd_s_df(DF_WORD, pws->w[3],  pwt->w[3]);
   1236 }
   1237 
   1238 void helper_msa_hadd_s_d(CPUMIPSState *env,
   1239                          uint32_t wd, uint32_t ws, uint32_t wt)
   1240 {
   1241     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   1242     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   1243     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
   1244 
   1245     pwd->d[0]  = msa_hadd_s_df(DF_DOUBLE, pws->d[0],  pwt->d[0]);
   1246     pwd->d[1]  = msa_hadd_s_df(DF_DOUBLE, pws->d[1],  pwt->d[1]);
   1247 }
   1248 
   1249 
   1250 static inline int64_t msa_hadd_u_df(uint32_t df, int64_t arg1, int64_t arg2)
   1251 {
   1252     return UNSIGNED_ODD(arg1, df) + UNSIGNED_EVEN(arg2, df);
   1253 }
   1254 
   1255 void helper_msa_hadd_u_h(CPUMIPSState *env,
   1256                          uint32_t wd, uint32_t ws, uint32_t wt)
   1257 {
   1258     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   1259     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   1260     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
   1261 
   1262     pwd->h[0]  = msa_hadd_u_df(DF_HALF, pws->h[0],  pwt->h[0]);
   1263     pwd->h[1]  = msa_hadd_u_df(DF_HALF, pws->h[1],  pwt->h[1]);
   1264     pwd->h[2]  = msa_hadd_u_df(DF_HALF, pws->h[2],  pwt->h[2]);
   1265     pwd->h[3]  = msa_hadd_u_df(DF_HALF, pws->h[3],  pwt->h[3]);
   1266     pwd->h[4]  = msa_hadd_u_df(DF_HALF, pws->h[4],  pwt->h[4]);
   1267     pwd->h[5]  = msa_hadd_u_df(DF_HALF, pws->h[5],  pwt->h[5]);
   1268     pwd->h[6]  = msa_hadd_u_df(DF_HALF, pws->h[6],  pwt->h[6]);
   1269     pwd->h[7]  = msa_hadd_u_df(DF_HALF, pws->h[7],  pwt->h[7]);
   1270 }
   1271 
   1272 void helper_msa_hadd_u_w(CPUMIPSState *env,
   1273                          uint32_t wd, uint32_t ws, uint32_t wt)
   1274 {
   1275     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   1276     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   1277     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
   1278 
   1279     pwd->w[0]  = msa_hadd_u_df(DF_WORD, pws->w[0],  pwt->w[0]);
   1280     pwd->w[1]  = msa_hadd_u_df(DF_WORD, pws->w[1],  pwt->w[1]);
   1281     pwd->w[2]  = msa_hadd_u_df(DF_WORD, pws->w[2],  pwt->w[2]);
   1282     pwd->w[3]  = msa_hadd_u_df(DF_WORD, pws->w[3],  pwt->w[3]);
   1283 }
   1284 
   1285 void helper_msa_hadd_u_d(CPUMIPSState *env,
   1286                          uint32_t wd, uint32_t ws, uint32_t wt)
   1287 {
   1288     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   1289     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   1290     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
   1291 
   1292     pwd->d[0]  = msa_hadd_u_df(DF_DOUBLE, pws->d[0],  pwt->d[0]);
   1293     pwd->d[1]  = msa_hadd_u_df(DF_DOUBLE, pws->d[1],  pwt->d[1]);
   1294 }
   1295 
   1296 
   1297 /*
   1298  * Int Average
   1299  * -----------
   1300  *
   1301  * +---------------+----------------------------------------------------------+
   1302  * | AVE_S.B       | Vector Signed Average (byte)                             |
   1303  * | AVE_S.H       | Vector Signed Average (halfword)                         |
   1304  * | AVE_S.W       | Vector Signed Average (word)                             |
   1305  * | AVE_S.D       | Vector Signed Average (doubleword)                       |
   1306  * | AVE_U.B       | Vector Unsigned Average (byte)                           |
   1307  * | AVE_U.H       | Vector Unsigned Average (halfword)                       |
   1308  * | AVE_U.W       | Vector Unsigned Average (word)                           |
   1309  * | AVE_U.D       | Vector Unsigned Average (doubleword)                     |
   1310  * | AVER_S.B      | Vector Signed Average Rounded (byte)                     |
   1311  * | AVER_S.H      | Vector Signed Average Rounded (halfword)                 |
   1312  * | AVER_S.W      | Vector Signed Average Rounded (word)                     |
   1313  * | AVER_S.D      | Vector Signed Average Rounded (doubleword)               |
   1314  * | AVER_U.B      | Vector Unsigned Average Rounded (byte)                   |
   1315  * | AVER_U.H      | Vector Unsigned Average Rounded (halfword)               |
   1316  * | AVER_U.W      | Vector Unsigned Average Rounded (word)                   |
   1317  * | AVER_U.D      | Vector Unsigned Average Rounded (doubleword)             |
   1318  * +---------------+----------------------------------------------------------+
   1319  */
   1320 
   1321 static inline int64_t msa_ave_s_df(uint32_t df, int64_t arg1, int64_t arg2)
   1322 {
   1323     /* signed shift */
   1324     return (arg1 >> 1) + (arg2 >> 1) + (arg1 & arg2 & 1);
   1325 }
   1326 
   1327 void helper_msa_ave_s_b(CPUMIPSState *env,
   1328                         uint32_t wd, uint32_t ws, uint32_t wt)
   1329 {
   1330     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   1331     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   1332     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
   1333 
   1334     pwd->b[0]  = msa_ave_s_df(DF_BYTE, pws->b[0],  pwt->b[0]);
   1335     pwd->b[1]  = msa_ave_s_df(DF_BYTE, pws->b[1],  pwt->b[1]);
   1336     pwd->b[2]  = msa_ave_s_df(DF_BYTE, pws->b[2],  pwt->b[2]);
   1337     pwd->b[3]  = msa_ave_s_df(DF_BYTE, pws->b[3],  pwt->b[3]);
   1338     pwd->b[4]  = msa_ave_s_df(DF_BYTE, pws->b[4],  pwt->b[4]);
   1339     pwd->b[5]  = msa_ave_s_df(DF_BYTE, pws->b[5],  pwt->b[5]);
   1340     pwd->b[6]  = msa_ave_s_df(DF_BYTE, pws->b[6],  pwt->b[6]);
   1341     pwd->b[7]  = msa_ave_s_df(DF_BYTE, pws->b[7],  pwt->b[7]);
   1342     pwd->b[8]  = msa_ave_s_df(DF_BYTE, pws->b[8],  pwt->b[8]);
   1343     pwd->b[9]  = msa_ave_s_df(DF_BYTE, pws->b[9],  pwt->b[9]);
   1344     pwd->b[10] = msa_ave_s_df(DF_BYTE, pws->b[10], pwt->b[10]);
   1345     pwd->b[11] = msa_ave_s_df(DF_BYTE, pws->b[11], pwt->b[11]);
   1346     pwd->b[12] = msa_ave_s_df(DF_BYTE, pws->b[12], pwt->b[12]);
   1347     pwd->b[13] = msa_ave_s_df(DF_BYTE, pws->b[13], pwt->b[13]);
   1348     pwd->b[14] = msa_ave_s_df(DF_BYTE, pws->b[14], pwt->b[14]);
   1349     pwd->b[15] = msa_ave_s_df(DF_BYTE, pws->b[15], pwt->b[15]);
   1350 }
   1351 
   1352 void helper_msa_ave_s_h(CPUMIPSState *env,
   1353                         uint32_t wd, uint32_t ws, uint32_t wt)
   1354 {
   1355     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   1356     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   1357     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
   1358 
   1359     pwd->h[0]  = msa_ave_s_df(DF_HALF, pws->h[0],  pwt->h[0]);
   1360     pwd->h[1]  = msa_ave_s_df(DF_HALF, pws->h[1],  pwt->h[1]);
   1361     pwd->h[2]  = msa_ave_s_df(DF_HALF, pws->h[2],  pwt->h[2]);
   1362     pwd->h[3]  = msa_ave_s_df(DF_HALF, pws->h[3],  pwt->h[3]);
   1363     pwd->h[4]  = msa_ave_s_df(DF_HALF, pws->h[4],  pwt->h[4]);
   1364     pwd->h[5]  = msa_ave_s_df(DF_HALF, pws->h[5],  pwt->h[5]);
   1365     pwd->h[6]  = msa_ave_s_df(DF_HALF, pws->h[6],  pwt->h[6]);
   1366     pwd->h[7]  = msa_ave_s_df(DF_HALF, pws->h[7],  pwt->h[7]);
   1367 }
   1368 
   1369 void helper_msa_ave_s_w(CPUMIPSState *env,
   1370                         uint32_t wd, uint32_t ws, uint32_t wt)
   1371 {
   1372     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   1373     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   1374     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
   1375 
   1376     pwd->w[0]  = msa_ave_s_df(DF_WORD, pws->w[0],  pwt->w[0]);
   1377     pwd->w[1]  = msa_ave_s_df(DF_WORD, pws->w[1],  pwt->w[1]);
   1378     pwd->w[2]  = msa_ave_s_df(DF_WORD, pws->w[2],  pwt->w[2]);
   1379     pwd->w[3]  = msa_ave_s_df(DF_WORD, pws->w[3],  pwt->w[3]);
   1380 }
   1381 
   1382 void helper_msa_ave_s_d(CPUMIPSState *env,
   1383                         uint32_t wd, uint32_t ws, uint32_t wt)
   1384 {
   1385     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   1386     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   1387     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
   1388 
   1389     pwd->d[0]  = msa_ave_s_df(DF_DOUBLE, pws->d[0],  pwt->d[0]);
   1390     pwd->d[1]  = msa_ave_s_df(DF_DOUBLE, pws->d[1],  pwt->d[1]);
   1391 }
   1392 
   1393 static inline uint64_t msa_ave_u_df(uint32_t df, uint64_t arg1, uint64_t arg2)
   1394 {
   1395     uint64_t u_arg1 = UNSIGNED(arg1, df);
   1396     uint64_t u_arg2 = UNSIGNED(arg2, df);
   1397     /* unsigned shift */
   1398     return (u_arg1 >> 1) + (u_arg2 >> 1) + (u_arg1 & u_arg2 & 1);
   1399 }
   1400 
   1401 void helper_msa_ave_u_b(CPUMIPSState *env,
   1402                         uint32_t wd, uint32_t ws, uint32_t wt)
   1403 {
   1404     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   1405     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   1406     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
   1407 
   1408     pwd->b[0]  = msa_ave_u_df(DF_BYTE, pws->b[0],  pwt->b[0]);
   1409     pwd->b[1]  = msa_ave_u_df(DF_BYTE, pws->b[1],  pwt->b[1]);
   1410     pwd->b[2]  = msa_ave_u_df(DF_BYTE, pws->b[2],  pwt->b[2]);
   1411     pwd->b[3]  = msa_ave_u_df(DF_BYTE, pws->b[3],  pwt->b[3]);
   1412     pwd->b[4]  = msa_ave_u_df(DF_BYTE, pws->b[4],  pwt->b[4]);
   1413     pwd->b[5]  = msa_ave_u_df(DF_BYTE, pws->b[5],  pwt->b[5]);
   1414     pwd->b[6]  = msa_ave_u_df(DF_BYTE, pws->b[6],  pwt->b[6]);
   1415     pwd->b[7]  = msa_ave_u_df(DF_BYTE, pws->b[7],  pwt->b[7]);
   1416     pwd->b[8]  = msa_ave_u_df(DF_BYTE, pws->b[8],  pwt->b[8]);
   1417     pwd->b[9]  = msa_ave_u_df(DF_BYTE, pws->b[9],  pwt->b[9]);
   1418     pwd->b[10] = msa_ave_u_df(DF_BYTE, pws->b[10], pwt->b[10]);
   1419     pwd->b[11] = msa_ave_u_df(DF_BYTE, pws->b[11], pwt->b[11]);
   1420     pwd->b[12] = msa_ave_u_df(DF_BYTE, pws->b[12], pwt->b[12]);
   1421     pwd->b[13] = msa_ave_u_df(DF_BYTE, pws->b[13], pwt->b[13]);
   1422     pwd->b[14] = msa_ave_u_df(DF_BYTE, pws->b[14], pwt->b[14]);
   1423     pwd->b[15] = msa_ave_u_df(DF_BYTE, pws->b[15], pwt->b[15]);
   1424 }
   1425 
   1426 void helper_msa_ave_u_h(CPUMIPSState *env,
   1427                         uint32_t wd, uint32_t ws, uint32_t wt)
   1428 {
   1429     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   1430     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   1431     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
   1432 
   1433     pwd->h[0]  = msa_ave_u_df(DF_HALF, pws->h[0],  pwt->h[0]);
   1434     pwd->h[1]  = msa_ave_u_df(DF_HALF, pws->h[1],  pwt->h[1]);
   1435     pwd->h[2]  = msa_ave_u_df(DF_HALF, pws->h[2],  pwt->h[2]);
   1436     pwd->h[3]  = msa_ave_u_df(DF_HALF, pws->h[3],  pwt->h[3]);
   1437     pwd->h[4]  = msa_ave_u_df(DF_HALF, pws->h[4],  pwt->h[4]);
   1438     pwd->h[5]  = msa_ave_u_df(DF_HALF, pws->h[5],  pwt->h[5]);
   1439     pwd->h[6]  = msa_ave_u_df(DF_HALF, pws->h[6],  pwt->h[6]);
   1440     pwd->h[7]  = msa_ave_u_df(DF_HALF, pws->h[7],  pwt->h[7]);
   1441 }
   1442 
   1443 void helper_msa_ave_u_w(CPUMIPSState *env,
   1444                         uint32_t wd, uint32_t ws, uint32_t wt)
   1445 {
   1446     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   1447     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   1448     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
   1449 
   1450     pwd->w[0]  = msa_ave_u_df(DF_WORD, pws->w[0],  pwt->w[0]);
   1451     pwd->w[1]  = msa_ave_u_df(DF_WORD, pws->w[1],  pwt->w[1]);
   1452     pwd->w[2]  = msa_ave_u_df(DF_WORD, pws->w[2],  pwt->w[2]);
   1453     pwd->w[3]  = msa_ave_u_df(DF_WORD, pws->w[3],  pwt->w[3]);
   1454 }
   1455 
   1456 void helper_msa_ave_u_d(CPUMIPSState *env,
   1457                         uint32_t wd, uint32_t ws, uint32_t wt)
   1458 {
   1459     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   1460     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   1461     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
   1462 
   1463     pwd->d[0]  = msa_ave_u_df(DF_DOUBLE, pws->d[0],  pwt->d[0]);
   1464     pwd->d[1]  = msa_ave_u_df(DF_DOUBLE, pws->d[1],  pwt->d[1]);
   1465 }
   1466 
   1467 static inline int64_t msa_aver_s_df(uint32_t df, int64_t arg1, int64_t arg2)
   1468 {
   1469     /* signed shift */
   1470     return (arg1 >> 1) + (arg2 >> 1) + ((arg1 | arg2) & 1);
   1471 }
   1472 
   1473 void helper_msa_aver_s_b(CPUMIPSState *env,
   1474                          uint32_t wd, uint32_t ws, uint32_t wt)
   1475 {
   1476     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   1477     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   1478     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
   1479 
   1480     pwd->b[0]  = msa_aver_s_df(DF_BYTE, pws->b[0],  pwt->b[0]);
   1481     pwd->b[1]  = msa_aver_s_df(DF_BYTE, pws->b[1],  pwt->b[1]);
   1482     pwd->b[2]  = msa_aver_s_df(DF_BYTE, pws->b[2],  pwt->b[2]);
   1483     pwd->b[3]  = msa_aver_s_df(DF_BYTE, pws->b[3],  pwt->b[3]);
   1484     pwd->b[4]  = msa_aver_s_df(DF_BYTE, pws->b[4],  pwt->b[4]);
   1485     pwd->b[5]  = msa_aver_s_df(DF_BYTE, pws->b[5],  pwt->b[5]);
   1486     pwd->b[6]  = msa_aver_s_df(DF_BYTE, pws->b[6],  pwt->b[6]);
   1487     pwd->b[7]  = msa_aver_s_df(DF_BYTE, pws->b[7],  pwt->b[7]);
   1488     pwd->b[8]  = msa_aver_s_df(DF_BYTE, pws->b[8],  pwt->b[8]);
   1489     pwd->b[9]  = msa_aver_s_df(DF_BYTE, pws->b[9],  pwt->b[9]);
   1490     pwd->b[10] = msa_aver_s_df(DF_BYTE, pws->b[10], pwt->b[10]);
   1491     pwd->b[11] = msa_aver_s_df(DF_BYTE, pws->b[11], pwt->b[11]);
   1492     pwd->b[12] = msa_aver_s_df(DF_BYTE, pws->b[12], pwt->b[12]);
   1493     pwd->b[13] = msa_aver_s_df(DF_BYTE, pws->b[13], pwt->b[13]);
   1494     pwd->b[14] = msa_aver_s_df(DF_BYTE, pws->b[14], pwt->b[14]);
   1495     pwd->b[15] = msa_aver_s_df(DF_BYTE, pws->b[15], pwt->b[15]);
   1496 }
   1497 
   1498 void helper_msa_aver_s_h(CPUMIPSState *env,
   1499                          uint32_t wd, uint32_t ws, uint32_t wt)
   1500 {
   1501     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   1502     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   1503     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
   1504 
   1505     pwd->h[0]  = msa_aver_s_df(DF_HALF, pws->h[0],  pwt->h[0]);
   1506     pwd->h[1]  = msa_aver_s_df(DF_HALF, pws->h[1],  pwt->h[1]);
   1507     pwd->h[2]  = msa_aver_s_df(DF_HALF, pws->h[2],  pwt->h[2]);
   1508     pwd->h[3]  = msa_aver_s_df(DF_HALF, pws->h[3],  pwt->h[3]);
   1509     pwd->h[4]  = msa_aver_s_df(DF_HALF, pws->h[4],  pwt->h[4]);
   1510     pwd->h[5]  = msa_aver_s_df(DF_HALF, pws->h[5],  pwt->h[5]);
   1511     pwd->h[6]  = msa_aver_s_df(DF_HALF, pws->h[6],  pwt->h[6]);
   1512     pwd->h[7]  = msa_aver_s_df(DF_HALF, pws->h[7],  pwt->h[7]);
   1513 }
   1514 
   1515 void helper_msa_aver_s_w(CPUMIPSState *env,
   1516                          uint32_t wd, uint32_t ws, uint32_t wt)
   1517 {
   1518     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   1519     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   1520     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
   1521 
   1522     pwd->w[0]  = msa_aver_s_df(DF_WORD, pws->w[0],  pwt->w[0]);
   1523     pwd->w[1]  = msa_aver_s_df(DF_WORD, pws->w[1],  pwt->w[1]);
   1524     pwd->w[2]  = msa_aver_s_df(DF_WORD, pws->w[2],  pwt->w[2]);
   1525     pwd->w[3]  = msa_aver_s_df(DF_WORD, pws->w[3],  pwt->w[3]);
   1526 }
   1527 
   1528 void helper_msa_aver_s_d(CPUMIPSState *env,
   1529                          uint32_t wd, uint32_t ws, uint32_t wt)
   1530 {
   1531     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   1532     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   1533     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
   1534 
   1535     pwd->d[0]  = msa_aver_s_df(DF_DOUBLE, pws->d[0],  pwt->d[0]);
   1536     pwd->d[1]  = msa_aver_s_df(DF_DOUBLE, pws->d[1],  pwt->d[1]);
   1537 }
   1538 
   1539 static inline uint64_t msa_aver_u_df(uint32_t df, uint64_t arg1, uint64_t arg2)
   1540 {
   1541     uint64_t u_arg1 = UNSIGNED(arg1, df);
   1542     uint64_t u_arg2 = UNSIGNED(arg2, df);
   1543     /* unsigned shift */
   1544     return (u_arg1 >> 1) + (u_arg2 >> 1) + ((u_arg1 | u_arg2) & 1);
   1545 }
   1546 
   1547 void helper_msa_aver_u_b(CPUMIPSState *env,
   1548                          uint32_t wd, uint32_t ws, uint32_t wt)
   1549 {
   1550     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   1551     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   1552     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
   1553 
   1554     pwd->b[0]  = msa_aver_u_df(DF_BYTE, pws->b[0],  pwt->b[0]);
   1555     pwd->b[1]  = msa_aver_u_df(DF_BYTE, pws->b[1],  pwt->b[1]);
   1556     pwd->b[2]  = msa_aver_u_df(DF_BYTE, pws->b[2],  pwt->b[2]);
   1557     pwd->b[3]  = msa_aver_u_df(DF_BYTE, pws->b[3],  pwt->b[3]);
   1558     pwd->b[4]  = msa_aver_u_df(DF_BYTE, pws->b[4],  pwt->b[4]);
   1559     pwd->b[5]  = msa_aver_u_df(DF_BYTE, pws->b[5],  pwt->b[5]);
   1560     pwd->b[6]  = msa_aver_u_df(DF_BYTE, pws->b[6],  pwt->b[6]);
   1561     pwd->b[7]  = msa_aver_u_df(DF_BYTE, pws->b[7],  pwt->b[7]);
   1562     pwd->b[8]  = msa_aver_u_df(DF_BYTE, pws->b[8],  pwt->b[8]);
   1563     pwd->b[9]  = msa_aver_u_df(DF_BYTE, pws->b[9],  pwt->b[9]);
   1564     pwd->b[10] = msa_aver_u_df(DF_BYTE, pws->b[10], pwt->b[10]);
   1565     pwd->b[11] = msa_aver_u_df(DF_BYTE, pws->b[11], pwt->b[11]);
   1566     pwd->b[12] = msa_aver_u_df(DF_BYTE, pws->b[12], pwt->b[12]);
   1567     pwd->b[13] = msa_aver_u_df(DF_BYTE, pws->b[13], pwt->b[13]);
   1568     pwd->b[14] = msa_aver_u_df(DF_BYTE, pws->b[14], pwt->b[14]);
   1569     pwd->b[15] = msa_aver_u_df(DF_BYTE, pws->b[15], pwt->b[15]);
   1570 }
   1571 
   1572 void helper_msa_aver_u_h(CPUMIPSState *env,
   1573                          uint32_t wd, uint32_t ws, uint32_t wt)
   1574 {
   1575     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   1576     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   1577     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
   1578 
   1579     pwd->h[0]  = msa_aver_u_df(DF_HALF, pws->h[0],  pwt->h[0]);
   1580     pwd->h[1]  = msa_aver_u_df(DF_HALF, pws->h[1],  pwt->h[1]);
   1581     pwd->h[2]  = msa_aver_u_df(DF_HALF, pws->h[2],  pwt->h[2]);
   1582     pwd->h[3]  = msa_aver_u_df(DF_HALF, pws->h[3],  pwt->h[3]);
   1583     pwd->h[4]  = msa_aver_u_df(DF_HALF, pws->h[4],  pwt->h[4]);
   1584     pwd->h[5]  = msa_aver_u_df(DF_HALF, pws->h[5],  pwt->h[5]);
   1585     pwd->h[6]  = msa_aver_u_df(DF_HALF, pws->h[6],  pwt->h[6]);
   1586     pwd->h[7]  = msa_aver_u_df(DF_HALF, pws->h[7],  pwt->h[7]);
   1587 }
   1588 
   1589 void helper_msa_aver_u_w(CPUMIPSState *env,
   1590                          uint32_t wd, uint32_t ws, uint32_t wt)
   1591 {
   1592     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   1593     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   1594     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
   1595 
   1596     pwd->w[0]  = msa_aver_u_df(DF_WORD, pws->w[0],  pwt->w[0]);
   1597     pwd->w[1]  = msa_aver_u_df(DF_WORD, pws->w[1],  pwt->w[1]);
   1598     pwd->w[2]  = msa_aver_u_df(DF_WORD, pws->w[2],  pwt->w[2]);
   1599     pwd->w[3]  = msa_aver_u_df(DF_WORD, pws->w[3],  pwt->w[3]);
   1600 }
   1601 
   1602 void helper_msa_aver_u_d(CPUMIPSState *env,
   1603                          uint32_t wd, uint32_t ws, uint32_t wt)
   1604 {
   1605     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   1606     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   1607     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
   1608 
   1609     pwd->d[0]  = msa_aver_u_df(DF_DOUBLE, pws->d[0],  pwt->d[0]);
   1610     pwd->d[1]  = msa_aver_u_df(DF_DOUBLE, pws->d[1],  pwt->d[1]);
   1611 }
   1612 
   1613 
   1614 /*
   1615  * Int Compare
   1616  * -----------
   1617  *
   1618  * +---------------+----------------------------------------------------------+
   1619  * | CEQ.B         | Vector Compare Equal (byte)                              |
   1620  * | CEQ.H         | Vector Compare Equal (halfword)                          |
   1621  * | CEQ.W         | Vector Compare Equal (word)                              |
   1622  * | CEQ.D         | Vector Compare Equal (doubleword)                        |
   1623  * | CLE_S.B       | Vector Compare Signed Less Than or Equal (byte)          |
   1624  * | CLE_S.H       | Vector Compare Signed Less Than or Equal (halfword)      |
   1625  * | CLE_S.W       | Vector Compare Signed Less Than or Equal (word)          |
   1626  * | CLE_S.D       | Vector Compare Signed Less Than or Equal (doubleword)    |
   1627  * | CLE_U.B       | Vector Compare Unsigned Less Than or Equal (byte)        |
   1628  * | CLE_U.H       | Vector Compare Unsigned Less Than or Equal (halfword)    |
   1629  * | CLE_U.W       | Vector Compare Unsigned Less Than or Equal (word)        |
   1630  * | CLE_U.D       | Vector Compare Unsigned Less Than or Equal (doubleword)  |
   1631  * | CLT_S.B       | Vector Compare Signed Less Than (byte)                   |
   1632  * | CLT_S.H       | Vector Compare Signed Less Than (halfword)               |
   1633  * | CLT_S.W       | Vector Compare Signed Less Than (word)                   |
   1634  * | CLT_S.D       | Vector Compare Signed Less Than (doubleword)             |
   1635  * | CLT_U.B       | Vector Compare Unsigned Less Than (byte)                 |
   1636  * | CLT_U.H       | Vector Compare Unsigned Less Than (halfword)             |
   1637  * | CLT_U.W       | Vector Compare Unsigned Less Than (word)                 |
   1638  * | CLT_U.D       | Vector Compare Unsigned Less Than (doubleword)           |
   1639  * +---------------+----------------------------------------------------------+
   1640  */
   1641 
   1642 static inline int64_t msa_ceq_df(uint32_t df, int64_t arg1, int64_t arg2)
   1643 {
   1644     return arg1 == arg2 ? -1 : 0;
   1645 }
   1646 
   1647 static inline int8_t msa_ceq_b(int8_t arg1, int8_t arg2)
   1648 {
   1649     return arg1 == arg2 ? -1 : 0;
   1650 }
   1651 
   1652 void helper_msa_ceq_b(CPUMIPSState *env, uint32_t wd, uint32_t ws, uint32_t wt)
   1653 {
   1654     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   1655     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   1656     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
   1657 
   1658     pwd->b[0]  = msa_ceq_b(pws->b[0],  pwt->b[0]);
   1659     pwd->b[1]  = msa_ceq_b(pws->b[1],  pwt->b[1]);
   1660     pwd->b[2]  = msa_ceq_b(pws->b[2],  pwt->b[2]);
   1661     pwd->b[3]  = msa_ceq_b(pws->b[3],  pwt->b[3]);
   1662     pwd->b[4]  = msa_ceq_b(pws->b[4],  pwt->b[4]);
   1663     pwd->b[5]  = msa_ceq_b(pws->b[5],  pwt->b[5]);
   1664     pwd->b[6]  = msa_ceq_b(pws->b[6],  pwt->b[6]);
   1665     pwd->b[7]  = msa_ceq_b(pws->b[7],  pwt->b[7]);
   1666     pwd->b[8]  = msa_ceq_b(pws->b[8],  pwt->b[8]);
   1667     pwd->b[9]  = msa_ceq_b(pws->b[9],  pwt->b[9]);
   1668     pwd->b[10] = msa_ceq_b(pws->b[10], pwt->b[10]);
   1669     pwd->b[11] = msa_ceq_b(pws->b[11], pwt->b[11]);
   1670     pwd->b[12] = msa_ceq_b(pws->b[12], pwt->b[12]);
   1671     pwd->b[13] = msa_ceq_b(pws->b[13], pwt->b[13]);
   1672     pwd->b[14] = msa_ceq_b(pws->b[14], pwt->b[14]);
   1673     pwd->b[15] = msa_ceq_b(pws->b[15], pwt->b[15]);
   1674 }
   1675 
   1676 static inline int16_t msa_ceq_h(int16_t arg1, int16_t arg2)
   1677 {
   1678     return arg1 == arg2 ? -1 : 0;
   1679 }
   1680 
   1681 void helper_msa_ceq_h(CPUMIPSState *env, uint32_t wd, uint32_t ws, uint32_t wt)
   1682 {
   1683     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   1684     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   1685     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
   1686 
   1687     pwd->h[0]  = msa_ceq_h(pws->h[0],  pwt->h[0]);
   1688     pwd->h[1]  = msa_ceq_h(pws->h[1],  pwt->h[1]);
   1689     pwd->h[2]  = msa_ceq_h(pws->h[2],  pwt->h[2]);
   1690     pwd->h[3]  = msa_ceq_h(pws->h[3],  pwt->h[3]);
   1691     pwd->h[4]  = msa_ceq_h(pws->h[4],  pwt->h[4]);
   1692     pwd->h[5]  = msa_ceq_h(pws->h[5],  pwt->h[5]);
   1693     pwd->h[6]  = msa_ceq_h(pws->h[6],  pwt->h[6]);
   1694     pwd->h[7]  = msa_ceq_h(pws->h[7],  pwt->h[7]);
   1695 }
   1696 
   1697 static inline int32_t msa_ceq_w(int32_t arg1, int32_t arg2)
   1698 {
   1699     return arg1 == arg2 ? -1 : 0;
   1700 }
   1701 
   1702 void helper_msa_ceq_w(CPUMIPSState *env, uint32_t wd, uint32_t ws, uint32_t wt)
   1703 {
   1704     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   1705     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   1706     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
   1707 
   1708     pwd->w[0]  = msa_ceq_w(pws->w[0],  pwt->w[0]);
   1709     pwd->w[1]  = msa_ceq_w(pws->w[1],  pwt->w[1]);
   1710     pwd->w[2]  = msa_ceq_w(pws->w[2],  pwt->w[2]);
   1711     pwd->w[3]  = msa_ceq_w(pws->w[3],  pwt->w[3]);
   1712 }
   1713 
   1714 static inline int64_t msa_ceq_d(int64_t arg1, int64_t arg2)
   1715 {
   1716     return arg1 == arg2 ? -1 : 0;
   1717 }
   1718 
   1719 void helper_msa_ceq_d(CPUMIPSState *env, uint32_t wd, uint32_t ws, uint32_t wt)
   1720 {
   1721     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   1722     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   1723     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
   1724 
   1725     pwd->d[0]  = msa_ceq_d(pws->d[0],  pwt->d[0]);
   1726     pwd->d[1]  = msa_ceq_d(pws->d[1],  pwt->d[1]);
   1727 }
   1728 
   1729 static inline int64_t msa_cle_s_df(uint32_t df, int64_t arg1, int64_t arg2)
   1730 {
   1731     return arg1 <= arg2 ? -1 : 0;
   1732 }
   1733 
   1734 void helper_msa_cle_s_b(CPUMIPSState *env,
   1735                         uint32_t wd, uint32_t ws, uint32_t wt)
   1736 {
   1737     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   1738     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   1739     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
   1740 
   1741     pwd->b[0]  = msa_cle_s_df(DF_BYTE, pws->b[0],  pwt->b[0]);
   1742     pwd->b[1]  = msa_cle_s_df(DF_BYTE, pws->b[1],  pwt->b[1]);
   1743     pwd->b[2]  = msa_cle_s_df(DF_BYTE, pws->b[2],  pwt->b[2]);
   1744     pwd->b[3]  = msa_cle_s_df(DF_BYTE, pws->b[3],  pwt->b[3]);
   1745     pwd->b[4]  = msa_cle_s_df(DF_BYTE, pws->b[4],  pwt->b[4]);
   1746     pwd->b[5]  = msa_cle_s_df(DF_BYTE, pws->b[5],  pwt->b[5]);
   1747     pwd->b[6]  = msa_cle_s_df(DF_BYTE, pws->b[6],  pwt->b[6]);
   1748     pwd->b[7]  = msa_cle_s_df(DF_BYTE, pws->b[7],  pwt->b[7]);
   1749     pwd->b[8]  = msa_cle_s_df(DF_BYTE, pws->b[8],  pwt->b[8]);
   1750     pwd->b[9]  = msa_cle_s_df(DF_BYTE, pws->b[9],  pwt->b[9]);
   1751     pwd->b[10] = msa_cle_s_df(DF_BYTE, pws->b[10], pwt->b[10]);
   1752     pwd->b[11] = msa_cle_s_df(DF_BYTE, pws->b[11], pwt->b[11]);
   1753     pwd->b[12] = msa_cle_s_df(DF_BYTE, pws->b[12], pwt->b[12]);
   1754     pwd->b[13] = msa_cle_s_df(DF_BYTE, pws->b[13], pwt->b[13]);
   1755     pwd->b[14] = msa_cle_s_df(DF_BYTE, pws->b[14], pwt->b[14]);
   1756     pwd->b[15] = msa_cle_s_df(DF_BYTE, pws->b[15], pwt->b[15]);
   1757 }
   1758 
   1759 void helper_msa_cle_s_h(CPUMIPSState *env,
   1760                         uint32_t wd, uint32_t ws, uint32_t wt)
   1761 {
   1762     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   1763     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   1764     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
   1765 
   1766     pwd->h[0]  = msa_cle_s_df(DF_HALF, pws->h[0],  pwt->h[0]);
   1767     pwd->h[1]  = msa_cle_s_df(DF_HALF, pws->h[1],  pwt->h[1]);
   1768     pwd->h[2]  = msa_cle_s_df(DF_HALF, pws->h[2],  pwt->h[2]);
   1769     pwd->h[3]  = msa_cle_s_df(DF_HALF, pws->h[3],  pwt->h[3]);
   1770     pwd->h[4]  = msa_cle_s_df(DF_HALF, pws->h[4],  pwt->h[4]);
   1771     pwd->h[5]  = msa_cle_s_df(DF_HALF, pws->h[5],  pwt->h[5]);
   1772     pwd->h[6]  = msa_cle_s_df(DF_HALF, pws->h[6],  pwt->h[6]);
   1773     pwd->h[7]  = msa_cle_s_df(DF_HALF, pws->h[7],  pwt->h[7]);
   1774 }
   1775 
   1776 void helper_msa_cle_s_w(CPUMIPSState *env,
   1777                         uint32_t wd, uint32_t ws, uint32_t wt)
   1778 {
   1779     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   1780     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   1781     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
   1782 
   1783     pwd->w[0]  = msa_cle_s_df(DF_WORD, pws->w[0],  pwt->w[0]);
   1784     pwd->w[1]  = msa_cle_s_df(DF_WORD, pws->w[1],  pwt->w[1]);
   1785     pwd->w[2]  = msa_cle_s_df(DF_WORD, pws->w[2],  pwt->w[2]);
   1786     pwd->w[3]  = msa_cle_s_df(DF_WORD, pws->w[3],  pwt->w[3]);
   1787 }
   1788 
   1789 void helper_msa_cle_s_d(CPUMIPSState *env,
   1790                         uint32_t wd, uint32_t ws, uint32_t wt)
   1791 {
   1792     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   1793     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   1794     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
   1795 
   1796     pwd->d[0]  = msa_cle_s_df(DF_DOUBLE, pws->d[0],  pwt->d[0]);
   1797     pwd->d[1]  = msa_cle_s_df(DF_DOUBLE, pws->d[1],  pwt->d[1]);
   1798 }
   1799 
   1800 static inline int64_t msa_cle_u_df(uint32_t df, int64_t arg1, int64_t arg2)
   1801 {
   1802     uint64_t u_arg1 = UNSIGNED(arg1, df);
   1803     uint64_t u_arg2 = UNSIGNED(arg2, df);
   1804     return u_arg1 <= u_arg2 ? -1 : 0;
   1805 }
   1806 
   1807 void helper_msa_cle_u_b(CPUMIPSState *env,
   1808                         uint32_t wd, uint32_t ws, uint32_t wt)
   1809 {
   1810     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   1811     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   1812     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
   1813 
   1814     pwd->b[0]  = msa_cle_u_df(DF_BYTE, pws->b[0],  pwt->b[0]);
   1815     pwd->b[1]  = msa_cle_u_df(DF_BYTE, pws->b[1],  pwt->b[1]);
   1816     pwd->b[2]  = msa_cle_u_df(DF_BYTE, pws->b[2],  pwt->b[2]);
   1817     pwd->b[3]  = msa_cle_u_df(DF_BYTE, pws->b[3],  pwt->b[3]);
   1818     pwd->b[4]  = msa_cle_u_df(DF_BYTE, pws->b[4],  pwt->b[4]);
   1819     pwd->b[5]  = msa_cle_u_df(DF_BYTE, pws->b[5],  pwt->b[5]);
   1820     pwd->b[6]  = msa_cle_u_df(DF_BYTE, pws->b[6],  pwt->b[6]);
   1821     pwd->b[7]  = msa_cle_u_df(DF_BYTE, pws->b[7],  pwt->b[7]);
   1822     pwd->b[8]  = msa_cle_u_df(DF_BYTE, pws->b[8],  pwt->b[8]);
   1823     pwd->b[9]  = msa_cle_u_df(DF_BYTE, pws->b[9],  pwt->b[9]);
   1824     pwd->b[10] = msa_cle_u_df(DF_BYTE, pws->b[10], pwt->b[10]);
   1825     pwd->b[11] = msa_cle_u_df(DF_BYTE, pws->b[11], pwt->b[11]);
   1826     pwd->b[12] = msa_cle_u_df(DF_BYTE, pws->b[12], pwt->b[12]);
   1827     pwd->b[13] = msa_cle_u_df(DF_BYTE, pws->b[13], pwt->b[13]);
   1828     pwd->b[14] = msa_cle_u_df(DF_BYTE, pws->b[14], pwt->b[14]);
   1829     pwd->b[15] = msa_cle_u_df(DF_BYTE, pws->b[15], pwt->b[15]);
   1830 }
   1831 
   1832 void helper_msa_cle_u_h(CPUMIPSState *env,
   1833                         uint32_t wd, uint32_t ws, uint32_t wt)
   1834 {
   1835     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   1836     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   1837     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
   1838 
   1839     pwd->h[0]  = msa_cle_u_df(DF_HALF, pws->h[0],  pwt->h[0]);
   1840     pwd->h[1]  = msa_cle_u_df(DF_HALF, pws->h[1],  pwt->h[1]);
   1841     pwd->h[2]  = msa_cle_u_df(DF_HALF, pws->h[2],  pwt->h[2]);
   1842     pwd->h[3]  = msa_cle_u_df(DF_HALF, pws->h[3],  pwt->h[3]);
   1843     pwd->h[4]  = msa_cle_u_df(DF_HALF, pws->h[4],  pwt->h[4]);
   1844     pwd->h[5]  = msa_cle_u_df(DF_HALF, pws->h[5],  pwt->h[5]);
   1845     pwd->h[6]  = msa_cle_u_df(DF_HALF, pws->h[6],  pwt->h[6]);
   1846     pwd->h[7]  = msa_cle_u_df(DF_HALF, pws->h[7],  pwt->h[7]);
   1847 }
   1848 
   1849 void helper_msa_cle_u_w(CPUMIPSState *env,
   1850                         uint32_t wd, uint32_t ws, uint32_t wt)
   1851 {
   1852     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   1853     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   1854     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
   1855 
   1856     pwd->w[0]  = msa_cle_u_df(DF_WORD, pws->w[0],  pwt->w[0]);
   1857     pwd->w[1]  = msa_cle_u_df(DF_WORD, pws->w[1],  pwt->w[1]);
   1858     pwd->w[2]  = msa_cle_u_df(DF_WORD, pws->w[2],  pwt->w[2]);
   1859     pwd->w[3]  = msa_cle_u_df(DF_WORD, pws->w[3],  pwt->w[3]);
   1860 }
   1861 
   1862 void helper_msa_cle_u_d(CPUMIPSState *env,
   1863                         uint32_t wd, uint32_t ws, uint32_t wt)
   1864 {
   1865     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   1866     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   1867     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
   1868 
   1869     pwd->d[0]  = msa_cle_u_df(DF_DOUBLE, pws->d[0],  pwt->d[0]);
   1870     pwd->d[1]  = msa_cle_u_df(DF_DOUBLE, pws->d[1],  pwt->d[1]);
   1871 }
   1872 
   1873 static inline int64_t msa_clt_s_df(uint32_t df, int64_t arg1, int64_t arg2)
   1874 {
   1875     return arg1 < arg2 ? -1 : 0;
   1876 }
   1877 
   1878 static inline int8_t msa_clt_s_b(int8_t arg1, int8_t arg2)
   1879 {
   1880     return arg1 < arg2 ? -1 : 0;
   1881 }
   1882 
   1883 void helper_msa_clt_s_b(CPUMIPSState *env,
   1884                         uint32_t wd, uint32_t ws, uint32_t wt)
   1885 {
   1886     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   1887     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   1888     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
   1889 
   1890     pwd->b[0]  = msa_clt_s_b(pws->b[0],  pwt->b[0]);
   1891     pwd->b[1]  = msa_clt_s_b(pws->b[1],  pwt->b[1]);
   1892     pwd->b[2]  = msa_clt_s_b(pws->b[2],  pwt->b[2]);
   1893     pwd->b[3]  = msa_clt_s_b(pws->b[3],  pwt->b[3]);
   1894     pwd->b[4]  = msa_clt_s_b(pws->b[4],  pwt->b[4]);
   1895     pwd->b[5]  = msa_clt_s_b(pws->b[5],  pwt->b[5]);
   1896     pwd->b[6]  = msa_clt_s_b(pws->b[6],  pwt->b[6]);
   1897     pwd->b[7]  = msa_clt_s_b(pws->b[7],  pwt->b[7]);
   1898     pwd->b[8]  = msa_clt_s_b(pws->b[8],  pwt->b[8]);
   1899     pwd->b[9]  = msa_clt_s_b(pws->b[9],  pwt->b[9]);
   1900     pwd->b[10] = msa_clt_s_b(pws->b[10], pwt->b[10]);
   1901     pwd->b[11] = msa_clt_s_b(pws->b[11], pwt->b[11]);
   1902     pwd->b[12] = msa_clt_s_b(pws->b[12], pwt->b[12]);
   1903     pwd->b[13] = msa_clt_s_b(pws->b[13], pwt->b[13]);
   1904     pwd->b[14] = msa_clt_s_b(pws->b[14], pwt->b[14]);
   1905     pwd->b[15] = msa_clt_s_b(pws->b[15], pwt->b[15]);
   1906 }
   1907 
   1908 static inline int16_t msa_clt_s_h(int16_t arg1, int16_t arg2)
   1909 {
   1910     return arg1 < arg2 ? -1 : 0;
   1911 }
   1912 
   1913 void helper_msa_clt_s_h(CPUMIPSState *env,
   1914                         uint32_t wd, uint32_t ws, uint32_t wt)
   1915 {
   1916     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   1917     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   1918     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
   1919 
   1920     pwd->h[0]  = msa_clt_s_h(pws->h[0],  pwt->h[0]);
   1921     pwd->h[1]  = msa_clt_s_h(pws->h[1],  pwt->h[1]);
   1922     pwd->h[2]  = msa_clt_s_h(pws->h[2],  pwt->h[2]);
   1923     pwd->h[3]  = msa_clt_s_h(pws->h[3],  pwt->h[3]);
   1924     pwd->h[4]  = msa_clt_s_h(pws->h[4],  pwt->h[4]);
   1925     pwd->h[5]  = msa_clt_s_h(pws->h[5],  pwt->h[5]);
   1926     pwd->h[6]  = msa_clt_s_h(pws->h[6],  pwt->h[6]);
   1927     pwd->h[7]  = msa_clt_s_h(pws->h[7],  pwt->h[7]);
   1928 }
   1929 
   1930 static inline int32_t msa_clt_s_w(int32_t arg1, int32_t arg2)
   1931 {
   1932     return arg1 < arg2 ? -1 : 0;
   1933 }
   1934 
   1935 void helper_msa_clt_s_w(CPUMIPSState *env,
   1936                         uint32_t wd, uint32_t ws, uint32_t wt)
   1937 {
   1938     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   1939     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   1940     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
   1941 
   1942     pwd->w[0]  = msa_clt_s_w(pws->w[0],  pwt->w[0]);
   1943     pwd->w[1]  = msa_clt_s_w(pws->w[1],  pwt->w[1]);
   1944     pwd->w[2]  = msa_clt_s_w(pws->w[2],  pwt->w[2]);
   1945     pwd->w[3]  = msa_clt_s_w(pws->w[3],  pwt->w[3]);
   1946 }
   1947 
   1948 static inline int64_t msa_clt_s_d(int64_t arg1, int64_t arg2)
   1949 {
   1950     return arg1 < arg2 ? -1 : 0;
   1951 }
   1952 
   1953 void helper_msa_clt_s_d(CPUMIPSState *env,
   1954                         uint32_t wd, uint32_t ws, uint32_t wt)
   1955 {
   1956     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   1957     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   1958     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
   1959 
   1960     pwd->d[0]  = msa_clt_s_d(pws->d[0],  pwt->d[0]);
   1961     pwd->d[1]  = msa_clt_s_d(pws->d[1],  pwt->d[1]);
   1962 }
   1963 
   1964 static inline int64_t msa_clt_u_df(uint32_t df, int64_t arg1, int64_t arg2)
   1965 {
   1966     uint64_t u_arg1 = UNSIGNED(arg1, df);
   1967     uint64_t u_arg2 = UNSIGNED(arg2, df);
   1968     return u_arg1 < u_arg2 ? -1 : 0;
   1969 }
   1970 
   1971 void helper_msa_clt_u_b(CPUMIPSState *env,
   1972                         uint32_t wd, uint32_t ws, uint32_t wt)
   1973 {
   1974     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   1975     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   1976     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
   1977 
   1978     pwd->b[0]  = msa_clt_u_df(DF_BYTE, pws->b[0],  pwt->b[0]);
   1979     pwd->b[1]  = msa_clt_u_df(DF_BYTE, pws->b[1],  pwt->b[1]);
   1980     pwd->b[2]  = msa_clt_u_df(DF_BYTE, pws->b[2],  pwt->b[2]);
   1981     pwd->b[3]  = msa_clt_u_df(DF_BYTE, pws->b[3],  pwt->b[3]);
   1982     pwd->b[4]  = msa_clt_u_df(DF_BYTE, pws->b[4],  pwt->b[4]);
   1983     pwd->b[5]  = msa_clt_u_df(DF_BYTE, pws->b[5],  pwt->b[5]);
   1984     pwd->b[6]  = msa_clt_u_df(DF_BYTE, pws->b[6],  pwt->b[6]);
   1985     pwd->b[7]  = msa_clt_u_df(DF_BYTE, pws->b[7],  pwt->b[7]);
   1986     pwd->b[8]  = msa_clt_u_df(DF_BYTE, pws->b[8],  pwt->b[8]);
   1987     pwd->b[9]  = msa_clt_u_df(DF_BYTE, pws->b[9],  pwt->b[9]);
   1988     pwd->b[10] = msa_clt_u_df(DF_BYTE, pws->b[10], pwt->b[10]);
   1989     pwd->b[11] = msa_clt_u_df(DF_BYTE, pws->b[11], pwt->b[11]);
   1990     pwd->b[12] = msa_clt_u_df(DF_BYTE, pws->b[12], pwt->b[12]);
   1991     pwd->b[13] = msa_clt_u_df(DF_BYTE, pws->b[13], pwt->b[13]);
   1992     pwd->b[14] = msa_clt_u_df(DF_BYTE, pws->b[14], pwt->b[14]);
   1993     pwd->b[15] = msa_clt_u_df(DF_BYTE, pws->b[15], pwt->b[15]);
   1994 }
   1995 
   1996 void helper_msa_clt_u_h(CPUMIPSState *env,
   1997                         uint32_t wd, uint32_t ws, uint32_t wt)
   1998 {
   1999     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   2000     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   2001     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
   2002 
   2003     pwd->h[0]  = msa_clt_u_df(DF_HALF, pws->h[0],  pwt->h[0]);
   2004     pwd->h[1]  = msa_clt_u_df(DF_HALF, pws->h[1],  pwt->h[1]);
   2005     pwd->h[2]  = msa_clt_u_df(DF_HALF, pws->h[2],  pwt->h[2]);
   2006     pwd->h[3]  = msa_clt_u_df(DF_HALF, pws->h[3],  pwt->h[3]);
   2007     pwd->h[4]  = msa_clt_u_df(DF_HALF, pws->h[4],  pwt->h[4]);
   2008     pwd->h[5]  = msa_clt_u_df(DF_HALF, pws->h[5],  pwt->h[5]);
   2009     pwd->h[6]  = msa_clt_u_df(DF_HALF, pws->h[6],  pwt->h[6]);
   2010     pwd->h[7]  = msa_clt_u_df(DF_HALF, pws->h[7],  pwt->h[7]);
   2011 }
   2012 
   2013 void helper_msa_clt_u_w(CPUMIPSState *env,
   2014                         uint32_t wd, uint32_t ws, uint32_t wt)
   2015 {
   2016     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   2017     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   2018     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
   2019 
   2020     pwd->w[0]  = msa_clt_u_df(DF_WORD, pws->w[0],  pwt->w[0]);
   2021     pwd->w[1]  = msa_clt_u_df(DF_WORD, pws->w[1],  pwt->w[1]);
   2022     pwd->w[2]  = msa_clt_u_df(DF_WORD, pws->w[2],  pwt->w[2]);
   2023     pwd->w[3]  = msa_clt_u_df(DF_WORD, pws->w[3],  pwt->w[3]);
   2024 }
   2025 
   2026 void helper_msa_clt_u_d(CPUMIPSState *env,
   2027                         uint32_t wd, uint32_t ws, uint32_t wt)
   2028 {
   2029     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   2030     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   2031     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
   2032 
   2033     pwd->d[0]  = msa_clt_u_df(DF_DOUBLE, pws->d[0],  pwt->d[0]);
   2034     pwd->d[1]  = msa_clt_u_df(DF_DOUBLE, pws->d[1],  pwt->d[1]);
   2035 }
   2036 
   2037 
   2038 /*
   2039  * Int Divide
   2040  * ----------
   2041  *
   2042  * +---------------+----------------------------------------------------------+
   2043  * | DIV_S.B       | Vector Signed Divide (byte)                              |
   2044  * | DIV_S.H       | Vector Signed Divide (halfword)                          |
   2045  * | DIV_S.W       | Vector Signed Divide (word)                              |
   2046  * | DIV_S.D       | Vector Signed Divide (doubleword)                        |
   2047  * | DIV_U.B       | Vector Unsigned Divide (byte)                            |
   2048  * | DIV_U.H       | Vector Unsigned Divide (halfword)                        |
   2049  * | DIV_U.W       | Vector Unsigned Divide (word)                            |
   2050  * | DIV_U.D       | Vector Unsigned Divide (doubleword)                      |
   2051  * +---------------+----------------------------------------------------------+
   2052  */
   2053 
   2054 
   2055 static inline int64_t msa_div_s_df(uint32_t df, int64_t arg1, int64_t arg2)
   2056 {
   2057     if (arg1 == DF_MIN_INT(df) && arg2 == -1) {
   2058         return DF_MIN_INT(df);
   2059     }
   2060     return arg2 ? arg1 / arg2
   2061                 : arg1 >= 0 ? -1 : 1;
   2062 }
   2063 
   2064 void helper_msa_div_s_b(CPUMIPSState *env,
   2065                         uint32_t wd, uint32_t ws, uint32_t wt)
   2066 {
   2067     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   2068     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   2069     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
   2070 
   2071     pwd->b[0]  = msa_div_s_df(DF_BYTE, pws->b[0],  pwt->b[0]);
   2072     pwd->b[1]  = msa_div_s_df(DF_BYTE, pws->b[1],  pwt->b[1]);
   2073     pwd->b[2]  = msa_div_s_df(DF_BYTE, pws->b[2],  pwt->b[2]);
   2074     pwd->b[3]  = msa_div_s_df(DF_BYTE, pws->b[3],  pwt->b[3]);
   2075     pwd->b[4]  = msa_div_s_df(DF_BYTE, pws->b[4],  pwt->b[4]);
   2076     pwd->b[5]  = msa_div_s_df(DF_BYTE, pws->b[5],  pwt->b[5]);
   2077     pwd->b[6]  = msa_div_s_df(DF_BYTE, pws->b[6],  pwt->b[6]);
   2078     pwd->b[7]  = msa_div_s_df(DF_BYTE, pws->b[7],  pwt->b[7]);
   2079     pwd->b[8]  = msa_div_s_df(DF_BYTE, pws->b[8],  pwt->b[8]);
   2080     pwd->b[9]  = msa_div_s_df(DF_BYTE, pws->b[9],  pwt->b[9]);
   2081     pwd->b[10] = msa_div_s_df(DF_BYTE, pws->b[10], pwt->b[10]);
   2082     pwd->b[11] = msa_div_s_df(DF_BYTE, pws->b[11], pwt->b[11]);
   2083     pwd->b[12] = msa_div_s_df(DF_BYTE, pws->b[12], pwt->b[12]);
   2084     pwd->b[13] = msa_div_s_df(DF_BYTE, pws->b[13], pwt->b[13]);
   2085     pwd->b[14] = msa_div_s_df(DF_BYTE, pws->b[14], pwt->b[14]);
   2086     pwd->b[15] = msa_div_s_df(DF_BYTE, pws->b[15], pwt->b[15]);
   2087 }
   2088 
   2089 void helper_msa_div_s_h(CPUMIPSState *env,
   2090                         uint32_t wd, uint32_t ws, uint32_t wt)
   2091 {
   2092     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   2093     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   2094     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
   2095 
   2096     pwd->h[0]  = msa_div_s_df(DF_HALF, pws->h[0],  pwt->h[0]);
   2097     pwd->h[1]  = msa_div_s_df(DF_HALF, pws->h[1],  pwt->h[1]);
   2098     pwd->h[2]  = msa_div_s_df(DF_HALF, pws->h[2],  pwt->h[2]);
   2099     pwd->h[3]  = msa_div_s_df(DF_HALF, pws->h[3],  pwt->h[3]);
   2100     pwd->h[4]  = msa_div_s_df(DF_HALF, pws->h[4],  pwt->h[4]);
   2101     pwd->h[5]  = msa_div_s_df(DF_HALF, pws->h[5],  pwt->h[5]);
   2102     pwd->h[6]  = msa_div_s_df(DF_HALF, pws->h[6],  pwt->h[6]);
   2103     pwd->h[7]  = msa_div_s_df(DF_HALF, pws->h[7],  pwt->h[7]);
   2104 }
   2105 
   2106 void helper_msa_div_s_w(CPUMIPSState *env,
   2107                         uint32_t wd, uint32_t ws, uint32_t wt)
   2108 {
   2109     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   2110     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   2111     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
   2112 
   2113     pwd->w[0]  = msa_div_s_df(DF_WORD, pws->w[0],  pwt->w[0]);
   2114     pwd->w[1]  = msa_div_s_df(DF_WORD, pws->w[1],  pwt->w[1]);
   2115     pwd->w[2]  = msa_div_s_df(DF_WORD, pws->w[2],  pwt->w[2]);
   2116     pwd->w[3]  = msa_div_s_df(DF_WORD, pws->w[3],  pwt->w[3]);
   2117 }
   2118 
   2119 void helper_msa_div_s_d(CPUMIPSState *env,
   2120                         uint32_t wd, uint32_t ws, uint32_t wt)
   2121 {
   2122     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   2123     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   2124     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
   2125 
   2126     pwd->d[0]  = msa_div_s_df(DF_DOUBLE, pws->d[0],  pwt->d[0]);
   2127     pwd->d[1]  = msa_div_s_df(DF_DOUBLE, pws->d[1],  pwt->d[1]);
   2128 }
   2129 
   2130 static inline int64_t msa_div_u_df(uint32_t df, int64_t arg1, int64_t arg2)
   2131 {
   2132     uint64_t u_arg1 = UNSIGNED(arg1, df);
   2133     uint64_t u_arg2 = UNSIGNED(arg2, df);
   2134     return arg2 ? u_arg1 / u_arg2 : -1;
   2135 }
   2136 
   2137 void helper_msa_div_u_b(CPUMIPSState *env,
   2138                         uint32_t wd, uint32_t ws, uint32_t wt)
   2139 {
   2140     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   2141     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   2142     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
   2143 
   2144     pwd->b[0]  = msa_div_u_df(DF_BYTE, pws->b[0],  pwt->b[0]);
   2145     pwd->b[1]  = msa_div_u_df(DF_BYTE, pws->b[1],  pwt->b[1]);
   2146     pwd->b[2]  = msa_div_u_df(DF_BYTE, pws->b[2],  pwt->b[2]);
   2147     pwd->b[3]  = msa_div_u_df(DF_BYTE, pws->b[3],  pwt->b[3]);
   2148     pwd->b[4]  = msa_div_u_df(DF_BYTE, pws->b[4],  pwt->b[4]);
   2149     pwd->b[5]  = msa_div_u_df(DF_BYTE, pws->b[5],  pwt->b[5]);
   2150     pwd->b[6]  = msa_div_u_df(DF_BYTE, pws->b[6],  pwt->b[6]);
   2151     pwd->b[7]  = msa_div_u_df(DF_BYTE, pws->b[7],  pwt->b[7]);
   2152     pwd->b[8]  = msa_div_u_df(DF_BYTE, pws->b[8],  pwt->b[8]);
   2153     pwd->b[9]  = msa_div_u_df(DF_BYTE, pws->b[9],  pwt->b[9]);
   2154     pwd->b[10] = msa_div_u_df(DF_BYTE, pws->b[10], pwt->b[10]);
   2155     pwd->b[11] = msa_div_u_df(DF_BYTE, pws->b[11], pwt->b[11]);
   2156     pwd->b[12] = msa_div_u_df(DF_BYTE, pws->b[12], pwt->b[12]);
   2157     pwd->b[13] = msa_div_u_df(DF_BYTE, pws->b[13], pwt->b[13]);
   2158     pwd->b[14] = msa_div_u_df(DF_BYTE, pws->b[14], pwt->b[14]);
   2159     pwd->b[15] = msa_div_u_df(DF_BYTE, pws->b[15], pwt->b[15]);
   2160 }
   2161 
   2162 void helper_msa_div_u_h(CPUMIPSState *env,
   2163                         uint32_t wd, uint32_t ws, uint32_t wt)
   2164 {
   2165     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   2166     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   2167     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
   2168 
   2169     pwd->h[0]  = msa_div_u_df(DF_HALF, pws->h[0],  pwt->h[0]);
   2170     pwd->h[1]  = msa_div_u_df(DF_HALF, pws->h[1],  pwt->h[1]);
   2171     pwd->h[2]  = msa_div_u_df(DF_HALF, pws->h[2],  pwt->h[2]);
   2172     pwd->h[3]  = msa_div_u_df(DF_HALF, pws->h[3],  pwt->h[3]);
   2173     pwd->h[4]  = msa_div_u_df(DF_HALF, pws->h[4],  pwt->h[4]);
   2174     pwd->h[5]  = msa_div_u_df(DF_HALF, pws->h[5],  pwt->h[5]);
   2175     pwd->h[6]  = msa_div_u_df(DF_HALF, pws->h[6],  pwt->h[6]);
   2176     pwd->h[7]  = msa_div_u_df(DF_HALF, pws->h[7],  pwt->h[7]);
   2177 }
   2178 
   2179 void helper_msa_div_u_w(CPUMIPSState *env,
   2180                         uint32_t wd, uint32_t ws, uint32_t wt)
   2181 {
   2182     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   2183     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   2184     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
   2185 
   2186     pwd->w[0]  = msa_div_u_df(DF_WORD, pws->w[0],  pwt->w[0]);
   2187     pwd->w[1]  = msa_div_u_df(DF_WORD, pws->w[1],  pwt->w[1]);
   2188     pwd->w[2]  = msa_div_u_df(DF_WORD, pws->w[2],  pwt->w[2]);
   2189     pwd->w[3]  = msa_div_u_df(DF_WORD, pws->w[3],  pwt->w[3]);
   2190 }
   2191 
   2192 void helper_msa_div_u_d(CPUMIPSState *env,
   2193                         uint32_t wd, uint32_t ws, uint32_t wt)
   2194 {
   2195     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   2196     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   2197     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
   2198 
   2199     pwd->d[0]  = msa_div_u_df(DF_DOUBLE, pws->d[0],  pwt->d[0]);
   2200     pwd->d[1]  = msa_div_u_df(DF_DOUBLE, pws->d[1],  pwt->d[1]);
   2201 }
   2202 
   2203 
   2204 /*
   2205  * Int Dot Product
   2206  * ---------------
   2207  *
   2208  * +---------------+----------------------------------------------------------+
   2209  * | DOTP_S.H      | Vector Signed Dot Product (halfword)                     |
   2210  * | DOTP_S.W      | Vector Signed Dot Product (word)                         |
   2211  * | DOTP_S.D      | Vector Signed Dot Product (doubleword)                   |
   2212  * | DOTP_U.H      | Vector Unsigned Dot Product (halfword)                   |
   2213  * | DOTP_U.W      | Vector Unsigned Dot Product (word)                       |
   2214  * | DOTP_U.D      | Vector Unsigned Dot Product (doubleword)                 |
   2215  * | DPADD_S.H     | Vector Signed Dot Product (halfword)                     |
   2216  * | DPADD_S.W     | Vector Signed Dot Product (word)                         |
   2217  * | DPADD_S.D     | Vector Signed Dot Product (doubleword)                   |
   2218  * | DPADD_U.H     | Vector Unsigned Dot Product (halfword)                   |
   2219  * | DPADD_U.W     | Vector Unsigned Dot Product (word)                       |
   2220  * | DPADD_U.D     | Vector Unsigned Dot Product (doubleword)                 |
   2221  * | DPSUB_S.H     | Vector Signed Dot Product (halfword)                     |
   2222  * | DPSUB_S.W     | Vector Signed Dot Product (word)                         |
   2223  * | DPSUB_S.D     | Vector Signed Dot Product (doubleword)                   |
   2224  * | DPSUB_U.H     | Vector Unsigned Dot Product (halfword)                   |
   2225  * | DPSUB_U.W     | Vector Unsigned Dot Product (word)                       |
   2226  * | DPSUB_U.D     | Vector Unsigned Dot Product (doubleword)                 |
   2227  * +---------------+----------------------------------------------------------+
   2228  */
   2229 
   2230 #define SIGNED_EXTRACT(e, o, a, df)     \
   2231     do {                                \
   2232         e = SIGNED_EVEN(a, df);         \
   2233         o = SIGNED_ODD(a, df);          \
   2234     } while (0)
   2235 
   2236 #define UNSIGNED_EXTRACT(e, o, a, df)   \
   2237     do {                                \
   2238         e = UNSIGNED_EVEN(a, df);       \
   2239         o = UNSIGNED_ODD(a, df);        \
   2240     } while (0)
   2241 
   2242 
   2243 static inline int64_t msa_dotp_s_df(uint32_t df, int64_t arg1, int64_t arg2)
   2244 {
   2245     int64_t even_arg1;
   2246     int64_t even_arg2;
   2247     int64_t odd_arg1;
   2248     int64_t odd_arg2;
   2249     SIGNED_EXTRACT(even_arg1, odd_arg1, arg1, df);
   2250     SIGNED_EXTRACT(even_arg2, odd_arg2, arg2, df);
   2251     return (even_arg1 * even_arg2) + (odd_arg1 * odd_arg2);
   2252 }
   2253 
   2254 void helper_msa_dotp_s_h(CPUMIPSState *env,
   2255                          uint32_t wd, uint32_t ws, uint32_t wt)
   2256 {
   2257     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   2258     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   2259     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
   2260 
   2261     pwd->h[0]  = msa_dotp_s_df(DF_HALF, pws->h[0],  pwt->h[0]);
   2262     pwd->h[1]  = msa_dotp_s_df(DF_HALF, pws->h[1],  pwt->h[1]);
   2263     pwd->h[2]  = msa_dotp_s_df(DF_HALF, pws->h[2],  pwt->h[2]);
   2264     pwd->h[3]  = msa_dotp_s_df(DF_HALF, pws->h[3],  pwt->h[3]);
   2265     pwd->h[4]  = msa_dotp_s_df(DF_HALF, pws->h[4],  pwt->h[4]);
   2266     pwd->h[5]  = msa_dotp_s_df(DF_HALF, pws->h[5],  pwt->h[5]);
   2267     pwd->h[6]  = msa_dotp_s_df(DF_HALF, pws->h[6],  pwt->h[6]);
   2268     pwd->h[7]  = msa_dotp_s_df(DF_HALF, pws->h[7],  pwt->h[7]);
   2269 }
   2270 
   2271 void helper_msa_dotp_s_w(CPUMIPSState *env,
   2272                          uint32_t wd, uint32_t ws, uint32_t wt)
   2273 {
   2274     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   2275     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   2276     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
   2277 
   2278     pwd->w[0]  = msa_dotp_s_df(DF_WORD, pws->w[0],  pwt->w[0]);
   2279     pwd->w[1]  = msa_dotp_s_df(DF_WORD, pws->w[1],  pwt->w[1]);
   2280     pwd->w[2]  = msa_dotp_s_df(DF_WORD, pws->w[2],  pwt->w[2]);
   2281     pwd->w[3]  = msa_dotp_s_df(DF_WORD, pws->w[3],  pwt->w[3]);
   2282 }
   2283 
   2284 void helper_msa_dotp_s_d(CPUMIPSState *env,
   2285                          uint32_t wd, uint32_t ws, uint32_t wt)
   2286 {
   2287     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   2288     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   2289     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
   2290 
   2291     pwd->d[0]  = msa_dotp_s_df(DF_DOUBLE, pws->d[0],  pwt->d[0]);
   2292     pwd->d[1]  = msa_dotp_s_df(DF_DOUBLE, pws->d[1],  pwt->d[1]);
   2293 }
   2294 
   2295 
   2296 static inline int64_t msa_dotp_u_df(uint32_t df, int64_t arg1, int64_t arg2)
   2297 {
   2298     int64_t even_arg1;
   2299     int64_t even_arg2;
   2300     int64_t odd_arg1;
   2301     int64_t odd_arg2;
   2302     UNSIGNED_EXTRACT(even_arg1, odd_arg1, arg1, df);
   2303     UNSIGNED_EXTRACT(even_arg2, odd_arg2, arg2, df);
   2304     return (even_arg1 * even_arg2) + (odd_arg1 * odd_arg2);
   2305 }
   2306 
   2307 void helper_msa_dotp_u_h(CPUMIPSState *env,
   2308                          uint32_t wd, uint32_t ws, uint32_t wt)
   2309 {
   2310     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   2311     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   2312     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
   2313 
   2314     pwd->h[0]  = msa_dotp_u_df(DF_HALF, pws->h[0],  pwt->h[0]);
   2315     pwd->h[1]  = msa_dotp_u_df(DF_HALF, pws->h[1],  pwt->h[1]);
   2316     pwd->h[2]  = msa_dotp_u_df(DF_HALF, pws->h[2],  pwt->h[2]);
   2317     pwd->h[3]  = msa_dotp_u_df(DF_HALF, pws->h[3],  pwt->h[3]);
   2318     pwd->h[4]  = msa_dotp_u_df(DF_HALF, pws->h[4],  pwt->h[4]);
   2319     pwd->h[5]  = msa_dotp_u_df(DF_HALF, pws->h[5],  pwt->h[5]);
   2320     pwd->h[6]  = msa_dotp_u_df(DF_HALF, pws->h[6],  pwt->h[6]);
   2321     pwd->h[7]  = msa_dotp_u_df(DF_HALF, pws->h[7],  pwt->h[7]);
   2322 }
   2323 
   2324 void helper_msa_dotp_u_w(CPUMIPSState *env,
   2325                          uint32_t wd, uint32_t ws, uint32_t wt)
   2326 {
   2327     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   2328     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   2329     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
   2330 
   2331     pwd->w[0]  = msa_dotp_u_df(DF_WORD, pws->w[0],  pwt->w[0]);
   2332     pwd->w[1]  = msa_dotp_u_df(DF_WORD, pws->w[1],  pwt->w[1]);
   2333     pwd->w[2]  = msa_dotp_u_df(DF_WORD, pws->w[2],  pwt->w[2]);
   2334     pwd->w[3]  = msa_dotp_u_df(DF_WORD, pws->w[3],  pwt->w[3]);
   2335 }
   2336 
   2337 void helper_msa_dotp_u_d(CPUMIPSState *env,
   2338                          uint32_t wd, uint32_t ws, uint32_t wt)
   2339 {
   2340     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   2341     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   2342     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
   2343 
   2344     pwd->d[0]  = msa_dotp_u_df(DF_DOUBLE, pws->d[0],  pwt->d[0]);
   2345     pwd->d[1]  = msa_dotp_u_df(DF_DOUBLE, pws->d[1],  pwt->d[1]);
   2346 }
   2347 
   2348 
   2349 static inline int64_t msa_dpadd_s_df(uint32_t df, int64_t dest, int64_t arg1,
   2350                                      int64_t arg2)
   2351 {
   2352     int64_t even_arg1;
   2353     int64_t even_arg2;
   2354     int64_t odd_arg1;
   2355     int64_t odd_arg2;
   2356     SIGNED_EXTRACT(even_arg1, odd_arg1, arg1, df);
   2357     SIGNED_EXTRACT(even_arg2, odd_arg2, arg2, df);
   2358     return dest + (even_arg1 * even_arg2) + (odd_arg1 * odd_arg2);
   2359 }
   2360 
   2361 void helper_msa_dpadd_s_h(CPUMIPSState *env,
   2362                           uint32_t wd, uint32_t ws, uint32_t wt)
   2363 {
   2364     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   2365     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   2366     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
   2367 
   2368     pwd->h[0]  = msa_dpadd_s_df(DF_HALF, pwd->h[0],  pws->h[0],  pwt->h[0]);
   2369     pwd->h[1]  = msa_dpadd_s_df(DF_HALF, pwd->h[1],  pws->h[1],  pwt->h[1]);
   2370     pwd->h[2]  = msa_dpadd_s_df(DF_HALF, pwd->h[2],  pws->h[2],  pwt->h[2]);
   2371     pwd->h[3]  = msa_dpadd_s_df(DF_HALF, pwd->h[3],  pws->h[3],  pwt->h[3]);
   2372     pwd->h[4]  = msa_dpadd_s_df(DF_HALF, pwd->h[4],  pws->h[4],  pwt->h[4]);
   2373     pwd->h[5]  = msa_dpadd_s_df(DF_HALF, pwd->h[5],  pws->h[5],  pwt->h[5]);
   2374     pwd->h[6]  = msa_dpadd_s_df(DF_HALF, pwd->h[6],  pws->h[6],  pwt->h[6]);
   2375     pwd->h[7]  = msa_dpadd_s_df(DF_HALF, pwd->h[7],  pws->h[7],  pwt->h[7]);
   2376 }
   2377 
   2378 void helper_msa_dpadd_s_w(CPUMIPSState *env,
   2379                           uint32_t wd, uint32_t ws, uint32_t wt)
   2380 {
   2381     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   2382     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   2383     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
   2384 
   2385     pwd->w[0]  = msa_dpadd_s_df(DF_WORD, pwd->w[0],  pws->w[0],  pwt->w[0]);
   2386     pwd->w[1]  = msa_dpadd_s_df(DF_WORD, pwd->w[1],  pws->w[1],  pwt->w[1]);
   2387     pwd->w[2]  = msa_dpadd_s_df(DF_WORD, pwd->w[2],  pws->w[2],  pwt->w[2]);
   2388     pwd->w[3]  = msa_dpadd_s_df(DF_WORD, pwd->w[3],  pws->w[3],  pwt->w[3]);
   2389 }
   2390 
   2391 void helper_msa_dpadd_s_d(CPUMIPSState *env,
   2392                           uint32_t wd, uint32_t ws, uint32_t wt)
   2393 {
   2394     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   2395     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   2396     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
   2397 
   2398     pwd->d[0]  = msa_dpadd_s_df(DF_DOUBLE, pwd->d[0],  pws->d[0],  pwt->d[0]);
   2399     pwd->d[1]  = msa_dpadd_s_df(DF_DOUBLE, pwd->d[1],  pws->d[1],  pwt->d[1]);
   2400 }
   2401 
   2402 
   2403 static inline int64_t msa_dpadd_u_df(uint32_t df, int64_t dest, int64_t arg1,
   2404                                      int64_t arg2)
   2405 {
   2406     int64_t even_arg1;
   2407     int64_t even_arg2;
   2408     int64_t odd_arg1;
   2409     int64_t odd_arg2;
   2410     UNSIGNED_EXTRACT(even_arg1, odd_arg1, arg1, df);
   2411     UNSIGNED_EXTRACT(even_arg2, odd_arg2, arg2, df);
   2412     return dest + (even_arg1 * even_arg2) + (odd_arg1 * odd_arg2);
   2413 }
   2414 
   2415 void helper_msa_dpadd_u_h(CPUMIPSState *env,
   2416                           uint32_t wd, uint32_t ws, uint32_t wt)
   2417 {
   2418     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   2419     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   2420     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
   2421 
   2422     pwd->h[0]  = msa_dpadd_u_df(DF_HALF, pwd->h[0],  pws->h[0],  pwt->h[0]);
   2423     pwd->h[1]  = msa_dpadd_u_df(DF_HALF, pwd->h[1],  pws->h[1],  pwt->h[1]);
   2424     pwd->h[2]  = msa_dpadd_u_df(DF_HALF, pwd->h[2],  pws->h[2],  pwt->h[2]);
   2425     pwd->h[3]  = msa_dpadd_u_df(DF_HALF, pwd->h[3],  pws->h[3],  pwt->h[3]);
   2426     pwd->h[4]  = msa_dpadd_u_df(DF_HALF, pwd->h[4],  pws->h[4],  pwt->h[4]);
   2427     pwd->h[5]  = msa_dpadd_u_df(DF_HALF, pwd->h[5],  pws->h[5],  pwt->h[5]);
   2428     pwd->h[6]  = msa_dpadd_u_df(DF_HALF, pwd->h[6],  pws->h[6],  pwt->h[6]);
   2429     pwd->h[7]  = msa_dpadd_u_df(DF_HALF, pwd->h[7],  pws->h[7],  pwt->h[7]);
   2430 }
   2431 
   2432 void helper_msa_dpadd_u_w(CPUMIPSState *env,
   2433                           uint32_t wd, uint32_t ws, uint32_t wt)
   2434 {
   2435     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   2436     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   2437     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
   2438 
   2439     pwd->w[0]  = msa_dpadd_u_df(DF_WORD, pwd->w[0],  pws->w[0],  pwt->w[0]);
   2440     pwd->w[1]  = msa_dpadd_u_df(DF_WORD, pwd->w[1],  pws->w[1],  pwt->w[1]);
   2441     pwd->w[2]  = msa_dpadd_u_df(DF_WORD, pwd->w[2],  pws->w[2],  pwt->w[2]);
   2442     pwd->w[3]  = msa_dpadd_u_df(DF_WORD, pwd->w[3],  pws->w[3],  pwt->w[3]);
   2443 }
   2444 
   2445 void helper_msa_dpadd_u_d(CPUMIPSState *env,
   2446                           uint32_t wd, uint32_t ws, uint32_t wt)
   2447 {
   2448     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   2449     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   2450     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
   2451 
   2452     pwd->d[0]  = msa_dpadd_u_df(DF_DOUBLE, pwd->d[0],  pws->d[0],  pwt->d[0]);
   2453     pwd->d[1]  = msa_dpadd_u_df(DF_DOUBLE, pwd->d[1],  pws->d[1],  pwt->d[1]);
   2454 }
   2455 
   2456 
   2457 static inline int64_t msa_dpsub_s_df(uint32_t df, int64_t dest, int64_t arg1,
   2458                                      int64_t arg2)
   2459 {
   2460     int64_t even_arg1;
   2461     int64_t even_arg2;
   2462     int64_t odd_arg1;
   2463     int64_t odd_arg2;
   2464     SIGNED_EXTRACT(even_arg1, odd_arg1, arg1, df);
   2465     SIGNED_EXTRACT(even_arg2, odd_arg2, arg2, df);
   2466     return dest - ((even_arg1 * even_arg2) + (odd_arg1 * odd_arg2));
   2467 }
   2468 
   2469 void helper_msa_dpsub_s_h(CPUMIPSState *env,
   2470                           uint32_t wd, uint32_t ws, uint32_t wt)
   2471 {
   2472     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   2473     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   2474     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
   2475 
   2476     pwd->h[0]  = msa_dpsub_s_df(DF_HALF, pwd->h[0],  pws->h[0],  pwt->h[0]);
   2477     pwd->h[1]  = msa_dpsub_s_df(DF_HALF, pwd->h[1],  pws->h[1],  pwt->h[1]);
   2478     pwd->h[2]  = msa_dpsub_s_df(DF_HALF, pwd->h[2],  pws->h[2],  pwt->h[2]);
   2479     pwd->h[3]  = msa_dpsub_s_df(DF_HALF, pwd->h[3],  pws->h[3],  pwt->h[3]);
   2480     pwd->h[4]  = msa_dpsub_s_df(DF_HALF, pwd->h[4],  pws->h[4],  pwt->h[4]);
   2481     pwd->h[5]  = msa_dpsub_s_df(DF_HALF, pwd->h[5],  pws->h[5],  pwt->h[5]);
   2482     pwd->h[6]  = msa_dpsub_s_df(DF_HALF, pwd->h[6],  pws->h[6],  pwt->h[6]);
   2483     pwd->h[7]  = msa_dpsub_s_df(DF_HALF, pwd->h[7],  pws->h[7],  pwt->h[7]);
   2484 }
   2485 
   2486 void helper_msa_dpsub_s_w(CPUMIPSState *env,
   2487                           uint32_t wd, uint32_t ws, uint32_t wt)
   2488 {
   2489     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   2490     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   2491     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
   2492 
   2493     pwd->w[0]  = msa_dpsub_s_df(DF_WORD, pwd->w[0],  pws->w[0],  pwt->w[0]);
   2494     pwd->w[1]  = msa_dpsub_s_df(DF_WORD, pwd->w[1],  pws->w[1],  pwt->w[1]);
   2495     pwd->w[2]  = msa_dpsub_s_df(DF_WORD, pwd->w[2],  pws->w[2],  pwt->w[2]);
   2496     pwd->w[3]  = msa_dpsub_s_df(DF_WORD, pwd->w[3],  pws->w[3],  pwt->w[3]);
   2497 }
   2498 
   2499 void helper_msa_dpsub_s_d(CPUMIPSState *env,
   2500                           uint32_t wd, uint32_t ws, uint32_t wt)
   2501 {
   2502     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   2503     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   2504     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
   2505 
   2506     pwd->d[0]  = msa_dpsub_s_df(DF_DOUBLE, pwd->d[0],  pws->d[0],  pwt->d[0]);
   2507     pwd->d[1]  = msa_dpsub_s_df(DF_DOUBLE, pwd->d[1],  pws->d[1],  pwt->d[1]);
   2508 }
   2509 
   2510 
   2511 static inline int64_t msa_dpsub_u_df(uint32_t df, int64_t dest, int64_t arg1,
   2512                                      int64_t arg2)
   2513 {
   2514     int64_t even_arg1;
   2515     int64_t even_arg2;
   2516     int64_t odd_arg1;
   2517     int64_t odd_arg2;
   2518     UNSIGNED_EXTRACT(even_arg1, odd_arg1, arg1, df);
   2519     UNSIGNED_EXTRACT(even_arg2, odd_arg2, arg2, df);
   2520     return dest - ((even_arg1 * even_arg2) + (odd_arg1 * odd_arg2));
   2521 }
   2522 
   2523 void helper_msa_dpsub_u_h(CPUMIPSState *env,
   2524                           uint32_t wd, uint32_t ws, uint32_t wt)
   2525 {
   2526     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   2527     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   2528     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
   2529 
   2530     pwd->h[0]  = msa_dpsub_u_df(DF_HALF, pwd->h[0],  pws->h[0],  pwt->h[0]);
   2531     pwd->h[1]  = msa_dpsub_u_df(DF_HALF, pwd->h[1],  pws->h[1],  pwt->h[1]);
   2532     pwd->h[2]  = msa_dpsub_u_df(DF_HALF, pwd->h[2],  pws->h[2],  pwt->h[2]);
   2533     pwd->h[3]  = msa_dpsub_u_df(DF_HALF, pwd->h[3],  pws->h[3],  pwt->h[3]);
   2534     pwd->h[4]  = msa_dpsub_u_df(DF_HALF, pwd->h[4],  pws->h[4],  pwt->h[4]);
   2535     pwd->h[5]  = msa_dpsub_u_df(DF_HALF, pwd->h[5],  pws->h[5],  pwt->h[5]);
   2536     pwd->h[6]  = msa_dpsub_u_df(DF_HALF, pwd->h[6],  pws->h[6],  pwt->h[6]);
   2537     pwd->h[7]  = msa_dpsub_u_df(DF_HALF, pwd->h[7],  pws->h[7],  pwt->h[7]);
   2538 }
   2539 
   2540 void helper_msa_dpsub_u_w(CPUMIPSState *env,
   2541                           uint32_t wd, uint32_t ws, uint32_t wt)
   2542 {
   2543     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   2544     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   2545     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
   2546 
   2547     pwd->w[0]  = msa_dpsub_u_df(DF_WORD, pwd->w[0],  pws->w[0],  pwt->w[0]);
   2548     pwd->w[1]  = msa_dpsub_u_df(DF_WORD, pwd->w[1],  pws->w[1],  pwt->w[1]);
   2549     pwd->w[2]  = msa_dpsub_u_df(DF_WORD, pwd->w[2],  pws->w[2],  pwt->w[2]);
   2550     pwd->w[3]  = msa_dpsub_u_df(DF_WORD, pwd->w[3],  pws->w[3],  pwt->w[3]);
   2551 }
   2552 
   2553 void helper_msa_dpsub_u_d(CPUMIPSState *env,
   2554                           uint32_t wd, uint32_t ws, uint32_t wt)
   2555 {
   2556     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   2557     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   2558     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
   2559 
   2560     pwd->d[0]  = msa_dpsub_u_df(DF_DOUBLE, pwd->d[0],  pws->d[0],  pwt->d[0]);
   2561     pwd->d[1]  = msa_dpsub_u_df(DF_DOUBLE, pwd->d[1],  pws->d[1],  pwt->d[1]);
   2562 }
   2563 
   2564 
   2565 /*
   2566  * Int Max Min
   2567  * -----------
   2568  *
   2569  * +---------------+----------------------------------------------------------+
   2570  * | MAX_A.B       | Vector Maximum Based on Absolute Value (byte)            |
   2571  * | MAX_A.H       | Vector Maximum Based on Absolute Value (halfword)        |
   2572  * | MAX_A.W       | Vector Maximum Based on Absolute Value (word)            |
   2573  * | MAX_A.D       | Vector Maximum Based on Absolute Value (doubleword)      |
   2574  * | MAX_S.B       | Vector Signed Maximum (byte)                             |
   2575  * | MAX_S.H       | Vector Signed Maximum (halfword)                         |
   2576  * | MAX_S.W       | Vector Signed Maximum (word)                             |
   2577  * | MAX_S.D       | Vector Signed Maximum (doubleword)                       |
   2578  * | MAX_U.B       | Vector Unsigned Maximum (byte)                           |
   2579  * | MAX_U.H       | Vector Unsigned Maximum (halfword)                       |
   2580  * | MAX_U.W       | Vector Unsigned Maximum (word)                           |
   2581  * | MAX_U.D       | Vector Unsigned Maximum (doubleword)                     |
   2582  * | MIN_A.B       | Vector Minimum Based on Absolute Value (byte)            |
   2583  * | MIN_A.H       | Vector Minimum Based on Absolute Value (halfword)        |
   2584  * | MIN_A.W       | Vector Minimum Based on Absolute Value (word)            |
   2585  * | MIN_A.D       | Vector Minimum Based on Absolute Value (doubleword)      |
   2586  * | MIN_S.B       | Vector Signed Minimum (byte)                             |
   2587  * | MIN_S.H       | Vector Signed Minimum (halfword)                         |
   2588  * | MIN_S.W       | Vector Signed Minimum (word)                             |
   2589  * | MIN_S.D       | Vector Signed Minimum (doubleword)                       |
   2590  * | MIN_U.B       | Vector Unsigned Minimum (byte)                           |
   2591  * | MIN_U.H       | Vector Unsigned Minimum (halfword)                       |
   2592  * | MIN_U.W       | Vector Unsigned Minimum (word)                           |
   2593  * | MIN_U.D       | Vector Unsigned Minimum (doubleword)                     |
   2594  * +---------------+----------------------------------------------------------+
   2595  */
   2596 
   2597 static inline int64_t msa_max_a_df(uint32_t df, int64_t arg1, int64_t arg2)
   2598 {
   2599     uint64_t abs_arg1 = arg1 >= 0 ? arg1 : -arg1;
   2600     uint64_t abs_arg2 = arg2 >= 0 ? arg2 : -arg2;
   2601     return abs_arg1 > abs_arg2 ? arg1 : arg2;
   2602 }
   2603 
   2604 void helper_msa_max_a_b(CPUMIPSState *env,
   2605                         uint32_t wd, uint32_t ws, uint32_t wt)
   2606 {
   2607     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   2608     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   2609     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
   2610 
   2611     pwd->b[0]  = msa_max_a_df(DF_BYTE, pws->b[0],  pwt->b[0]);
   2612     pwd->b[1]  = msa_max_a_df(DF_BYTE, pws->b[1],  pwt->b[1]);
   2613     pwd->b[2]  = msa_max_a_df(DF_BYTE, pws->b[2],  pwt->b[2]);
   2614     pwd->b[3]  = msa_max_a_df(DF_BYTE, pws->b[3],  pwt->b[3]);
   2615     pwd->b[4]  = msa_max_a_df(DF_BYTE, pws->b[4],  pwt->b[4]);
   2616     pwd->b[5]  = msa_max_a_df(DF_BYTE, pws->b[5],  pwt->b[5]);
   2617     pwd->b[6]  = msa_max_a_df(DF_BYTE, pws->b[6],  pwt->b[6]);
   2618     pwd->b[7]  = msa_max_a_df(DF_BYTE, pws->b[7],  pwt->b[7]);
   2619     pwd->b[8]  = msa_max_a_df(DF_BYTE, pws->b[8],  pwt->b[8]);
   2620     pwd->b[9]  = msa_max_a_df(DF_BYTE, pws->b[9],  pwt->b[9]);
   2621     pwd->b[10] = msa_max_a_df(DF_BYTE, pws->b[10], pwt->b[10]);
   2622     pwd->b[11] = msa_max_a_df(DF_BYTE, pws->b[11], pwt->b[11]);
   2623     pwd->b[12] = msa_max_a_df(DF_BYTE, pws->b[12], pwt->b[12]);
   2624     pwd->b[13] = msa_max_a_df(DF_BYTE, pws->b[13], pwt->b[13]);
   2625     pwd->b[14] = msa_max_a_df(DF_BYTE, pws->b[14], pwt->b[14]);
   2626     pwd->b[15] = msa_max_a_df(DF_BYTE, pws->b[15], pwt->b[15]);
   2627 }
   2628 
   2629 void helper_msa_max_a_h(CPUMIPSState *env,
   2630                         uint32_t wd, uint32_t ws, uint32_t wt)
   2631 {
   2632     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   2633     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   2634     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
   2635 
   2636     pwd->h[0]  = msa_max_a_df(DF_HALF, pws->h[0],  pwt->h[0]);
   2637     pwd->h[1]  = msa_max_a_df(DF_HALF, pws->h[1],  pwt->h[1]);
   2638     pwd->h[2]  = msa_max_a_df(DF_HALF, pws->h[2],  pwt->h[2]);
   2639     pwd->h[3]  = msa_max_a_df(DF_HALF, pws->h[3],  pwt->h[3]);
   2640     pwd->h[4]  = msa_max_a_df(DF_HALF, pws->h[4],  pwt->h[4]);
   2641     pwd->h[5]  = msa_max_a_df(DF_HALF, pws->h[5],  pwt->h[5]);
   2642     pwd->h[6]  = msa_max_a_df(DF_HALF, pws->h[6],  pwt->h[6]);
   2643     pwd->h[7]  = msa_max_a_df(DF_HALF, pws->h[7],  pwt->h[7]);
   2644 }
   2645 
   2646 void helper_msa_max_a_w(CPUMIPSState *env,
   2647                         uint32_t wd, uint32_t ws, uint32_t wt)
   2648 {
   2649     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   2650     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   2651     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
   2652 
   2653     pwd->w[0]  = msa_max_a_df(DF_WORD, pws->w[0],  pwt->w[0]);
   2654     pwd->w[1]  = msa_max_a_df(DF_WORD, pws->w[1],  pwt->w[1]);
   2655     pwd->w[2]  = msa_max_a_df(DF_WORD, pws->w[2],  pwt->w[2]);
   2656     pwd->w[3]  = msa_max_a_df(DF_WORD, pws->w[3],  pwt->w[3]);
   2657 }
   2658 
   2659 void helper_msa_max_a_d(CPUMIPSState *env,
   2660                         uint32_t wd, uint32_t ws, uint32_t wt)
   2661 {
   2662     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   2663     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   2664     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
   2665 
   2666     pwd->d[0]  = msa_max_a_df(DF_DOUBLE, pws->d[0],  pwt->d[0]);
   2667     pwd->d[1]  = msa_max_a_df(DF_DOUBLE, pws->d[1],  pwt->d[1]);
   2668 }
   2669 
   2670 
   2671 static inline int64_t msa_max_s_df(uint32_t df, int64_t arg1, int64_t arg2)
   2672 {
   2673     return arg1 > arg2 ? arg1 : arg2;
   2674 }
   2675 
   2676 void helper_msa_max_s_b(CPUMIPSState *env,
   2677                         uint32_t wd, uint32_t ws, uint32_t wt)
   2678 {
   2679     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   2680     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   2681     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
   2682 
   2683     pwd->b[0]  = msa_max_s_df(DF_BYTE, pws->b[0],  pwt->b[0]);
   2684     pwd->b[1]  = msa_max_s_df(DF_BYTE, pws->b[1],  pwt->b[1]);
   2685     pwd->b[2]  = msa_max_s_df(DF_BYTE, pws->b[2],  pwt->b[2]);
   2686     pwd->b[3]  = msa_max_s_df(DF_BYTE, pws->b[3],  pwt->b[3]);
   2687     pwd->b[4]  = msa_max_s_df(DF_BYTE, pws->b[4],  pwt->b[4]);
   2688     pwd->b[5]  = msa_max_s_df(DF_BYTE, pws->b[5],  pwt->b[5]);
   2689     pwd->b[6]  = msa_max_s_df(DF_BYTE, pws->b[6],  pwt->b[6]);
   2690     pwd->b[7]  = msa_max_s_df(DF_BYTE, pws->b[7],  pwt->b[7]);
   2691     pwd->b[8]  = msa_max_s_df(DF_BYTE, pws->b[8],  pwt->b[8]);
   2692     pwd->b[9]  = msa_max_s_df(DF_BYTE, pws->b[9],  pwt->b[9]);
   2693     pwd->b[10] = msa_max_s_df(DF_BYTE, pws->b[10], pwt->b[10]);
   2694     pwd->b[11] = msa_max_s_df(DF_BYTE, pws->b[11], pwt->b[11]);
   2695     pwd->b[12] = msa_max_s_df(DF_BYTE, pws->b[12], pwt->b[12]);
   2696     pwd->b[13] = msa_max_s_df(DF_BYTE, pws->b[13], pwt->b[13]);
   2697     pwd->b[14] = msa_max_s_df(DF_BYTE, pws->b[14], pwt->b[14]);
   2698     pwd->b[15] = msa_max_s_df(DF_BYTE, pws->b[15], pwt->b[15]);
   2699 }
   2700 
   2701 void helper_msa_max_s_h(CPUMIPSState *env,
   2702                         uint32_t wd, uint32_t ws, uint32_t wt)
   2703 {
   2704     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   2705     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   2706     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
   2707 
   2708     pwd->h[0]  = msa_max_s_df(DF_HALF, pws->h[0],  pwt->h[0]);
   2709     pwd->h[1]  = msa_max_s_df(DF_HALF, pws->h[1],  pwt->h[1]);
   2710     pwd->h[2]  = msa_max_s_df(DF_HALF, pws->h[2],  pwt->h[2]);
   2711     pwd->h[3]  = msa_max_s_df(DF_HALF, pws->h[3],  pwt->h[3]);
   2712     pwd->h[4]  = msa_max_s_df(DF_HALF, pws->h[4],  pwt->h[4]);
   2713     pwd->h[5]  = msa_max_s_df(DF_HALF, pws->h[5],  pwt->h[5]);
   2714     pwd->h[6]  = msa_max_s_df(DF_HALF, pws->h[6],  pwt->h[6]);
   2715     pwd->h[7]  = msa_max_s_df(DF_HALF, pws->h[7],  pwt->h[7]);
   2716 }
   2717 
   2718 void helper_msa_max_s_w(CPUMIPSState *env,
   2719                         uint32_t wd, uint32_t ws, uint32_t wt)
   2720 {
   2721     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   2722     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   2723     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
   2724 
   2725     pwd->w[0]  = msa_max_s_df(DF_WORD, pws->w[0],  pwt->w[0]);
   2726     pwd->w[1]  = msa_max_s_df(DF_WORD, pws->w[1],  pwt->w[1]);
   2727     pwd->w[2]  = msa_max_s_df(DF_WORD, pws->w[2],  pwt->w[2]);
   2728     pwd->w[3]  = msa_max_s_df(DF_WORD, pws->w[3],  pwt->w[3]);
   2729 }
   2730 
   2731 void helper_msa_max_s_d(CPUMIPSState *env,
   2732                         uint32_t wd, uint32_t ws, uint32_t wt)
   2733 {
   2734     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   2735     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   2736     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
   2737 
   2738     pwd->d[0]  = msa_max_s_df(DF_DOUBLE, pws->d[0],  pwt->d[0]);
   2739     pwd->d[1]  = msa_max_s_df(DF_DOUBLE, pws->d[1],  pwt->d[1]);
   2740 }
   2741 
   2742 
   2743 static inline int64_t msa_max_u_df(uint32_t df, int64_t arg1, int64_t arg2)
   2744 {
   2745     uint64_t u_arg1 = UNSIGNED(arg1, df);
   2746     uint64_t u_arg2 = UNSIGNED(arg2, df);
   2747     return u_arg1 > u_arg2 ? arg1 : arg2;
   2748 }
   2749 
   2750 void helper_msa_max_u_b(CPUMIPSState *env,
   2751                         uint32_t wd, uint32_t ws, uint32_t wt)
   2752 {
   2753     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   2754     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   2755     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
   2756 
   2757     pwd->b[0]  = msa_max_u_df(DF_BYTE, pws->b[0],  pwt->b[0]);
   2758     pwd->b[1]  = msa_max_u_df(DF_BYTE, pws->b[1],  pwt->b[1]);
   2759     pwd->b[2]  = msa_max_u_df(DF_BYTE, pws->b[2],  pwt->b[2]);
   2760     pwd->b[3]  = msa_max_u_df(DF_BYTE, pws->b[3],  pwt->b[3]);
   2761     pwd->b[4]  = msa_max_u_df(DF_BYTE, pws->b[4],  pwt->b[4]);
   2762     pwd->b[5]  = msa_max_u_df(DF_BYTE, pws->b[5],  pwt->b[5]);
   2763     pwd->b[6]  = msa_max_u_df(DF_BYTE, pws->b[6],  pwt->b[6]);
   2764     pwd->b[7]  = msa_max_u_df(DF_BYTE, pws->b[7],  pwt->b[7]);
   2765     pwd->b[8]  = msa_max_u_df(DF_BYTE, pws->b[8],  pwt->b[8]);
   2766     pwd->b[9]  = msa_max_u_df(DF_BYTE, pws->b[9],  pwt->b[9]);
   2767     pwd->b[10] = msa_max_u_df(DF_BYTE, pws->b[10], pwt->b[10]);
   2768     pwd->b[11] = msa_max_u_df(DF_BYTE, pws->b[11], pwt->b[11]);
   2769     pwd->b[12] = msa_max_u_df(DF_BYTE, pws->b[12], pwt->b[12]);
   2770     pwd->b[13] = msa_max_u_df(DF_BYTE, pws->b[13], pwt->b[13]);
   2771     pwd->b[14] = msa_max_u_df(DF_BYTE, pws->b[14], pwt->b[14]);
   2772     pwd->b[15] = msa_max_u_df(DF_BYTE, pws->b[15], pwt->b[15]);
   2773 }
   2774 
   2775 void helper_msa_max_u_h(CPUMIPSState *env,
   2776                         uint32_t wd, uint32_t ws, uint32_t wt)
   2777 {
   2778     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   2779     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   2780     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
   2781 
   2782     pwd->h[0]  = msa_max_u_df(DF_HALF, pws->h[0],  pwt->h[0]);
   2783     pwd->h[1]  = msa_max_u_df(DF_HALF, pws->h[1],  pwt->h[1]);
   2784     pwd->h[2]  = msa_max_u_df(DF_HALF, pws->h[2],  pwt->h[2]);
   2785     pwd->h[3]  = msa_max_u_df(DF_HALF, pws->h[3],  pwt->h[3]);
   2786     pwd->h[4]  = msa_max_u_df(DF_HALF, pws->h[4],  pwt->h[4]);
   2787     pwd->h[5]  = msa_max_u_df(DF_HALF, pws->h[5],  pwt->h[5]);
   2788     pwd->h[6]  = msa_max_u_df(DF_HALF, pws->h[6],  pwt->h[6]);
   2789     pwd->h[7]  = msa_max_u_df(DF_HALF, pws->h[7],  pwt->h[7]);
   2790 }
   2791 
   2792 void helper_msa_max_u_w(CPUMIPSState *env,
   2793                         uint32_t wd, uint32_t ws, uint32_t wt)
   2794 {
   2795     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   2796     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   2797     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
   2798 
   2799     pwd->w[0]  = msa_max_u_df(DF_WORD, pws->w[0],  pwt->w[0]);
   2800     pwd->w[1]  = msa_max_u_df(DF_WORD, pws->w[1],  pwt->w[1]);
   2801     pwd->w[2]  = msa_max_u_df(DF_WORD, pws->w[2],  pwt->w[2]);
   2802     pwd->w[3]  = msa_max_u_df(DF_WORD, pws->w[3],  pwt->w[3]);
   2803 }
   2804 
   2805 void helper_msa_max_u_d(CPUMIPSState *env,
   2806                         uint32_t wd, uint32_t ws, uint32_t wt)
   2807 {
   2808     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   2809     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   2810     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
   2811 
   2812     pwd->d[0]  = msa_max_u_df(DF_DOUBLE, pws->d[0],  pwt->d[0]);
   2813     pwd->d[1]  = msa_max_u_df(DF_DOUBLE, pws->d[1],  pwt->d[1]);
   2814 }
   2815 
   2816 
   2817 static inline int64_t msa_min_a_df(uint32_t df, int64_t arg1, int64_t arg2)
   2818 {
   2819     uint64_t abs_arg1 = arg1 >= 0 ? arg1 : -arg1;
   2820     uint64_t abs_arg2 = arg2 >= 0 ? arg2 : -arg2;
   2821     return abs_arg1 < abs_arg2 ? arg1 : arg2;
   2822 }
   2823 
   2824 void helper_msa_min_a_b(CPUMIPSState *env,
   2825                         uint32_t wd, uint32_t ws, uint32_t wt)
   2826 {
   2827     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   2828     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   2829     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
   2830 
   2831     pwd->b[0]  = msa_min_a_df(DF_BYTE, pws->b[0],  pwt->b[0]);
   2832     pwd->b[1]  = msa_min_a_df(DF_BYTE, pws->b[1],  pwt->b[1]);
   2833     pwd->b[2]  = msa_min_a_df(DF_BYTE, pws->b[2],  pwt->b[2]);
   2834     pwd->b[3]  = msa_min_a_df(DF_BYTE, pws->b[3],  pwt->b[3]);
   2835     pwd->b[4]  = msa_min_a_df(DF_BYTE, pws->b[4],  pwt->b[4]);
   2836     pwd->b[5]  = msa_min_a_df(DF_BYTE, pws->b[5],  pwt->b[5]);
   2837     pwd->b[6]  = msa_min_a_df(DF_BYTE, pws->b[6],  pwt->b[6]);
   2838     pwd->b[7]  = msa_min_a_df(DF_BYTE, pws->b[7],  pwt->b[7]);
   2839     pwd->b[8]  = msa_min_a_df(DF_BYTE, pws->b[8],  pwt->b[8]);
   2840     pwd->b[9]  = msa_min_a_df(DF_BYTE, pws->b[9],  pwt->b[9]);
   2841     pwd->b[10] = msa_min_a_df(DF_BYTE, pws->b[10], pwt->b[10]);
   2842     pwd->b[11] = msa_min_a_df(DF_BYTE, pws->b[11], pwt->b[11]);
   2843     pwd->b[12] = msa_min_a_df(DF_BYTE, pws->b[12], pwt->b[12]);
   2844     pwd->b[13] = msa_min_a_df(DF_BYTE, pws->b[13], pwt->b[13]);
   2845     pwd->b[14] = msa_min_a_df(DF_BYTE, pws->b[14], pwt->b[14]);
   2846     pwd->b[15] = msa_min_a_df(DF_BYTE, pws->b[15], pwt->b[15]);
   2847 }
   2848 
   2849 void helper_msa_min_a_h(CPUMIPSState *env,
   2850                         uint32_t wd, uint32_t ws, uint32_t wt)
   2851 {
   2852     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   2853     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   2854     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
   2855 
   2856     pwd->h[0]  = msa_min_a_df(DF_HALF, pws->h[0],  pwt->h[0]);
   2857     pwd->h[1]  = msa_min_a_df(DF_HALF, pws->h[1],  pwt->h[1]);
   2858     pwd->h[2]  = msa_min_a_df(DF_HALF, pws->h[2],  pwt->h[2]);
   2859     pwd->h[3]  = msa_min_a_df(DF_HALF, pws->h[3],  pwt->h[3]);
   2860     pwd->h[4]  = msa_min_a_df(DF_HALF, pws->h[4],  pwt->h[4]);
   2861     pwd->h[5]  = msa_min_a_df(DF_HALF, pws->h[5],  pwt->h[5]);
   2862     pwd->h[6]  = msa_min_a_df(DF_HALF, pws->h[6],  pwt->h[6]);
   2863     pwd->h[7]  = msa_min_a_df(DF_HALF, pws->h[7],  pwt->h[7]);
   2864 }
   2865 
   2866 void helper_msa_min_a_w(CPUMIPSState *env,
   2867                         uint32_t wd, uint32_t ws, uint32_t wt)
   2868 {
   2869     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   2870     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   2871     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
   2872 
   2873     pwd->w[0]  = msa_min_a_df(DF_WORD, pws->w[0],  pwt->w[0]);
   2874     pwd->w[1]  = msa_min_a_df(DF_WORD, pws->w[1],  pwt->w[1]);
   2875     pwd->w[2]  = msa_min_a_df(DF_WORD, pws->w[2],  pwt->w[2]);
   2876     pwd->w[3]  = msa_min_a_df(DF_WORD, pws->w[3],  pwt->w[3]);
   2877 }
   2878 
   2879 void helper_msa_min_a_d(CPUMIPSState *env,
   2880                         uint32_t wd, uint32_t ws, uint32_t wt)
   2881 {
   2882     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   2883     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   2884     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
   2885 
   2886     pwd->d[0]  = msa_min_a_df(DF_DOUBLE, pws->d[0],  pwt->d[0]);
   2887     pwd->d[1]  = msa_min_a_df(DF_DOUBLE, pws->d[1],  pwt->d[1]);
   2888 }
   2889 
   2890 
   2891 static inline int64_t msa_min_s_df(uint32_t df, int64_t arg1, int64_t arg2)
   2892 {
   2893     return arg1 < arg2 ? arg1 : arg2;
   2894 }
   2895 
   2896 void helper_msa_min_s_b(CPUMIPSState *env,
   2897                         uint32_t wd, uint32_t ws, uint32_t wt)
   2898 {
   2899     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   2900     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   2901     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
   2902 
   2903     pwd->b[0]  = msa_min_s_df(DF_BYTE, pws->b[0],  pwt->b[0]);
   2904     pwd->b[1]  = msa_min_s_df(DF_BYTE, pws->b[1],  pwt->b[1]);
   2905     pwd->b[2]  = msa_min_s_df(DF_BYTE, pws->b[2],  pwt->b[2]);
   2906     pwd->b[3]  = msa_min_s_df(DF_BYTE, pws->b[3],  pwt->b[3]);
   2907     pwd->b[4]  = msa_min_s_df(DF_BYTE, pws->b[4],  pwt->b[4]);
   2908     pwd->b[5]  = msa_min_s_df(DF_BYTE, pws->b[5],  pwt->b[5]);
   2909     pwd->b[6]  = msa_min_s_df(DF_BYTE, pws->b[6],  pwt->b[6]);
   2910     pwd->b[7]  = msa_min_s_df(DF_BYTE, pws->b[7],  pwt->b[7]);
   2911     pwd->b[8]  = msa_min_s_df(DF_BYTE, pws->b[8],  pwt->b[8]);
   2912     pwd->b[9]  = msa_min_s_df(DF_BYTE, pws->b[9],  pwt->b[9]);
   2913     pwd->b[10] = msa_min_s_df(DF_BYTE, pws->b[10], pwt->b[10]);
   2914     pwd->b[11] = msa_min_s_df(DF_BYTE, pws->b[11], pwt->b[11]);
   2915     pwd->b[12] = msa_min_s_df(DF_BYTE, pws->b[12], pwt->b[12]);
   2916     pwd->b[13] = msa_min_s_df(DF_BYTE, pws->b[13], pwt->b[13]);
   2917     pwd->b[14] = msa_min_s_df(DF_BYTE, pws->b[14], pwt->b[14]);
   2918     pwd->b[15] = msa_min_s_df(DF_BYTE, pws->b[15], pwt->b[15]);
   2919 }
   2920 
   2921 void helper_msa_min_s_h(CPUMIPSState *env,
   2922                         uint32_t wd, uint32_t ws, uint32_t wt)
   2923 {
   2924     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   2925     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   2926     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
   2927 
   2928     pwd->h[0]  = msa_min_s_df(DF_HALF, pws->h[0],  pwt->h[0]);
   2929     pwd->h[1]  = msa_min_s_df(DF_HALF, pws->h[1],  pwt->h[1]);
   2930     pwd->h[2]  = msa_min_s_df(DF_HALF, pws->h[2],  pwt->h[2]);
   2931     pwd->h[3]  = msa_min_s_df(DF_HALF, pws->h[3],  pwt->h[3]);
   2932     pwd->h[4]  = msa_min_s_df(DF_HALF, pws->h[4],  pwt->h[4]);
   2933     pwd->h[5]  = msa_min_s_df(DF_HALF, pws->h[5],  pwt->h[5]);
   2934     pwd->h[6]  = msa_min_s_df(DF_HALF, pws->h[6],  pwt->h[6]);
   2935     pwd->h[7]  = msa_min_s_df(DF_HALF, pws->h[7],  pwt->h[7]);
   2936 }
   2937 
   2938 void helper_msa_min_s_w(CPUMIPSState *env,
   2939                         uint32_t wd, uint32_t ws, uint32_t wt)
   2940 {
   2941     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   2942     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   2943     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
   2944 
   2945     pwd->w[0]  = msa_min_s_df(DF_WORD, pws->w[0],  pwt->w[0]);
   2946     pwd->w[1]  = msa_min_s_df(DF_WORD, pws->w[1],  pwt->w[1]);
   2947     pwd->w[2]  = msa_min_s_df(DF_WORD, pws->w[2],  pwt->w[2]);
   2948     pwd->w[3]  = msa_min_s_df(DF_WORD, pws->w[3],  pwt->w[3]);
   2949 }
   2950 
   2951 void helper_msa_min_s_d(CPUMIPSState *env,
   2952                         uint32_t wd, uint32_t ws, uint32_t wt)
   2953 {
   2954     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   2955     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   2956     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
   2957 
   2958     pwd->d[0]  = msa_min_s_df(DF_DOUBLE, pws->d[0],  pwt->d[0]);
   2959     pwd->d[1]  = msa_min_s_df(DF_DOUBLE, pws->d[1],  pwt->d[1]);
   2960 }
   2961 
   2962 
   2963 static inline int64_t msa_min_u_df(uint32_t df, int64_t arg1, int64_t arg2)
   2964 {
   2965     uint64_t u_arg1 = UNSIGNED(arg1, df);
   2966     uint64_t u_arg2 = UNSIGNED(arg2, df);
   2967     return u_arg1 < u_arg2 ? arg1 : arg2;
   2968 }
   2969 
   2970 void helper_msa_min_u_b(CPUMIPSState *env,
   2971                         uint32_t wd, uint32_t ws, uint32_t wt)
   2972 {
   2973     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   2974     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   2975     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
   2976 
   2977     pwd->b[0]  = msa_min_u_df(DF_BYTE, pws->b[0],  pwt->b[0]);
   2978     pwd->b[1]  = msa_min_u_df(DF_BYTE, pws->b[1],  pwt->b[1]);
   2979     pwd->b[2]  = msa_min_u_df(DF_BYTE, pws->b[2],  pwt->b[2]);
   2980     pwd->b[3]  = msa_min_u_df(DF_BYTE, pws->b[3],  pwt->b[3]);
   2981     pwd->b[4]  = msa_min_u_df(DF_BYTE, pws->b[4],  pwt->b[4]);
   2982     pwd->b[5]  = msa_min_u_df(DF_BYTE, pws->b[5],  pwt->b[5]);
   2983     pwd->b[6]  = msa_min_u_df(DF_BYTE, pws->b[6],  pwt->b[6]);
   2984     pwd->b[7]  = msa_min_u_df(DF_BYTE, pws->b[7],  pwt->b[7]);
   2985     pwd->b[8]  = msa_min_u_df(DF_BYTE, pws->b[8],  pwt->b[8]);
   2986     pwd->b[9]  = msa_min_u_df(DF_BYTE, pws->b[9],  pwt->b[9]);
   2987     pwd->b[10] = msa_min_u_df(DF_BYTE, pws->b[10], pwt->b[10]);
   2988     pwd->b[11] = msa_min_u_df(DF_BYTE, pws->b[11], pwt->b[11]);
   2989     pwd->b[12] = msa_min_u_df(DF_BYTE, pws->b[12], pwt->b[12]);
   2990     pwd->b[13] = msa_min_u_df(DF_BYTE, pws->b[13], pwt->b[13]);
   2991     pwd->b[14] = msa_min_u_df(DF_BYTE, pws->b[14], pwt->b[14]);
   2992     pwd->b[15] = msa_min_u_df(DF_BYTE, pws->b[15], pwt->b[15]);
   2993 }
   2994 
   2995 void helper_msa_min_u_h(CPUMIPSState *env,
   2996                         uint32_t wd, uint32_t ws, uint32_t wt)
   2997 {
   2998     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   2999     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   3000     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
   3001 
   3002     pwd->h[0]  = msa_min_u_df(DF_HALF, pws->h[0],  pwt->h[0]);
   3003     pwd->h[1]  = msa_min_u_df(DF_HALF, pws->h[1],  pwt->h[1]);
   3004     pwd->h[2]  = msa_min_u_df(DF_HALF, pws->h[2],  pwt->h[2]);
   3005     pwd->h[3]  = msa_min_u_df(DF_HALF, pws->h[3],  pwt->h[3]);
   3006     pwd->h[4]  = msa_min_u_df(DF_HALF, pws->h[4],  pwt->h[4]);
   3007     pwd->h[5]  = msa_min_u_df(DF_HALF, pws->h[5],  pwt->h[5]);
   3008     pwd->h[6]  = msa_min_u_df(DF_HALF, pws->h[6],  pwt->h[6]);
   3009     pwd->h[7]  = msa_min_u_df(DF_HALF, pws->h[7],  pwt->h[7]);
   3010 }
   3011 
   3012 void helper_msa_min_u_w(CPUMIPSState *env,
   3013                         uint32_t wd, uint32_t ws, uint32_t wt)
   3014 {
   3015     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   3016     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   3017     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
   3018 
   3019     pwd->w[0]  = msa_min_u_df(DF_WORD, pws->w[0],  pwt->w[0]);
   3020     pwd->w[1]  = msa_min_u_df(DF_WORD, pws->w[1],  pwt->w[1]);
   3021     pwd->w[2]  = msa_min_u_df(DF_WORD, pws->w[2],  pwt->w[2]);
   3022     pwd->w[3]  = msa_min_u_df(DF_WORD, pws->w[3],  pwt->w[3]);
   3023 }
   3024 
   3025 void helper_msa_min_u_d(CPUMIPSState *env,
   3026                         uint32_t wd, uint32_t ws, uint32_t wt)
   3027 {
   3028     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   3029     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   3030     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
   3031 
   3032     pwd->d[0]  = msa_min_u_df(DF_DOUBLE, pws->d[0],  pwt->d[0]);
   3033     pwd->d[1]  = msa_min_u_df(DF_DOUBLE, pws->d[1],  pwt->d[1]);
   3034 }
   3035 
   3036 
   3037 /*
   3038  * Int Modulo
   3039  * ----------
   3040  *
   3041  * +---------------+----------------------------------------------------------+
   3042  * | MOD_S.B       | Vector Signed Modulo (byte)                              |
   3043  * | MOD_S.H       | Vector Signed Modulo (halfword)                          |
   3044  * | MOD_S.W       | Vector Signed Modulo (word)                              |
   3045  * | MOD_S.D       | Vector Signed Modulo (doubleword)                        |
   3046  * | MOD_U.B       | Vector Unsigned Modulo (byte)                            |
   3047  * | MOD_U.H       | Vector Unsigned Modulo (halfword)                        |
   3048  * | MOD_U.W       | Vector Unsigned Modulo (word)                            |
   3049  * | MOD_U.D       | Vector Unsigned Modulo (doubleword)                      |
   3050  * +---------------+----------------------------------------------------------+
   3051  */
   3052 
   3053 static inline int64_t msa_mod_s_df(uint32_t df, int64_t arg1, int64_t arg2)
   3054 {
   3055     if (arg1 == DF_MIN_INT(df) && arg2 == -1) {
   3056         return 0;
   3057     }
   3058     return arg2 ? arg1 % arg2 : arg1;
   3059 }
   3060 
   3061 void helper_msa_mod_s_b(CPUMIPSState *env,
   3062                         uint32_t wd, uint32_t ws, uint32_t wt)
   3063 {
   3064     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   3065     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   3066     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
   3067 
   3068     pwd->b[0]  = msa_mod_s_df(DF_BYTE, pws->b[0],  pwt->b[0]);
   3069     pwd->b[1]  = msa_mod_s_df(DF_BYTE, pws->b[1],  pwt->b[1]);
   3070     pwd->b[2]  = msa_mod_s_df(DF_BYTE, pws->b[2],  pwt->b[2]);
   3071     pwd->b[3]  = msa_mod_s_df(DF_BYTE, pws->b[3],  pwt->b[3]);
   3072     pwd->b[4]  = msa_mod_s_df(DF_BYTE, pws->b[4],  pwt->b[4]);
   3073     pwd->b[5]  = msa_mod_s_df(DF_BYTE, pws->b[5],  pwt->b[5]);
   3074     pwd->b[6]  = msa_mod_s_df(DF_BYTE, pws->b[6],  pwt->b[6]);
   3075     pwd->b[7]  = msa_mod_s_df(DF_BYTE, pws->b[7],  pwt->b[7]);
   3076     pwd->b[8]  = msa_mod_s_df(DF_BYTE, pws->b[8],  pwt->b[8]);
   3077     pwd->b[9]  = msa_mod_s_df(DF_BYTE, pws->b[9],  pwt->b[9]);
   3078     pwd->b[10] = msa_mod_s_df(DF_BYTE, pws->b[10], pwt->b[10]);
   3079     pwd->b[11] = msa_mod_s_df(DF_BYTE, pws->b[11], pwt->b[11]);
   3080     pwd->b[12] = msa_mod_s_df(DF_BYTE, pws->b[12], pwt->b[12]);
   3081     pwd->b[13] = msa_mod_s_df(DF_BYTE, pws->b[13], pwt->b[13]);
   3082     pwd->b[14] = msa_mod_s_df(DF_BYTE, pws->b[14], pwt->b[14]);
   3083     pwd->b[15] = msa_mod_s_df(DF_BYTE, pws->b[15], pwt->b[15]);
   3084 }
   3085 
   3086 void helper_msa_mod_s_h(CPUMIPSState *env,
   3087                         uint32_t wd, uint32_t ws, uint32_t wt)
   3088 {
   3089     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   3090     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   3091     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
   3092 
   3093     pwd->h[0]  = msa_mod_s_df(DF_HALF, pws->h[0],  pwt->h[0]);
   3094     pwd->h[1]  = msa_mod_s_df(DF_HALF, pws->h[1],  pwt->h[1]);
   3095     pwd->h[2]  = msa_mod_s_df(DF_HALF, pws->h[2],  pwt->h[2]);
   3096     pwd->h[3]  = msa_mod_s_df(DF_HALF, pws->h[3],  pwt->h[3]);
   3097     pwd->h[4]  = msa_mod_s_df(DF_HALF, pws->h[4],  pwt->h[4]);
   3098     pwd->h[5]  = msa_mod_s_df(DF_HALF, pws->h[5],  pwt->h[5]);
   3099     pwd->h[6]  = msa_mod_s_df(DF_HALF, pws->h[6],  pwt->h[6]);
   3100     pwd->h[7]  = msa_mod_s_df(DF_HALF, pws->h[7],  pwt->h[7]);
   3101 }
   3102 
   3103 void helper_msa_mod_s_w(CPUMIPSState *env,
   3104                         uint32_t wd, uint32_t ws, uint32_t wt)
   3105 {
   3106     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   3107     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   3108     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
   3109 
   3110     pwd->w[0]  = msa_mod_s_df(DF_WORD, pws->w[0],  pwt->w[0]);
   3111     pwd->w[1]  = msa_mod_s_df(DF_WORD, pws->w[1],  pwt->w[1]);
   3112     pwd->w[2]  = msa_mod_s_df(DF_WORD, pws->w[2],  pwt->w[2]);
   3113     pwd->w[3]  = msa_mod_s_df(DF_WORD, pws->w[3],  pwt->w[3]);
   3114 }
   3115 
   3116 void helper_msa_mod_s_d(CPUMIPSState *env,
   3117                         uint32_t wd, uint32_t ws, uint32_t wt)
   3118 {
   3119     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   3120     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   3121     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
   3122 
   3123     pwd->d[0]  = msa_mod_s_df(DF_DOUBLE, pws->d[0],  pwt->d[0]);
   3124     pwd->d[1]  = msa_mod_s_df(DF_DOUBLE, pws->d[1],  pwt->d[1]);
   3125 }
   3126 
   3127 static inline int64_t msa_mod_u_df(uint32_t df, int64_t arg1, int64_t arg2)
   3128 {
   3129     uint64_t u_arg1 = UNSIGNED(arg1, df);
   3130     uint64_t u_arg2 = UNSIGNED(arg2, df);
   3131     return u_arg2 ? u_arg1 % u_arg2 : u_arg1;
   3132 }
   3133 
   3134 void helper_msa_mod_u_b(CPUMIPSState *env,
   3135                         uint32_t wd, uint32_t ws, uint32_t wt)
   3136 {
   3137     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   3138     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   3139     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
   3140 
   3141     pwd->b[0]  = msa_mod_u_df(DF_BYTE, pws->b[0],  pwt->b[0]);
   3142     pwd->b[1]  = msa_mod_u_df(DF_BYTE, pws->b[1],  pwt->b[1]);
   3143     pwd->b[2]  = msa_mod_u_df(DF_BYTE, pws->b[2],  pwt->b[2]);
   3144     pwd->b[3]  = msa_mod_u_df(DF_BYTE, pws->b[3],  pwt->b[3]);
   3145     pwd->b[4]  = msa_mod_u_df(DF_BYTE, pws->b[4],  pwt->b[4]);
   3146     pwd->b[5]  = msa_mod_u_df(DF_BYTE, pws->b[5],  pwt->b[5]);
   3147     pwd->b[6]  = msa_mod_u_df(DF_BYTE, pws->b[6],  pwt->b[6]);
   3148     pwd->b[7]  = msa_mod_u_df(DF_BYTE, pws->b[7],  pwt->b[7]);
   3149     pwd->b[8]  = msa_mod_u_df(DF_BYTE, pws->b[8],  pwt->b[8]);
   3150     pwd->b[9]  = msa_mod_u_df(DF_BYTE, pws->b[9],  pwt->b[9]);
   3151     pwd->b[10] = msa_mod_u_df(DF_BYTE, pws->b[10], pwt->b[10]);
   3152     pwd->b[11] = msa_mod_u_df(DF_BYTE, pws->b[11], pwt->b[11]);
   3153     pwd->b[12] = msa_mod_u_df(DF_BYTE, pws->b[12], pwt->b[12]);
   3154     pwd->b[13] = msa_mod_u_df(DF_BYTE, pws->b[13], pwt->b[13]);
   3155     pwd->b[14] = msa_mod_u_df(DF_BYTE, pws->b[14], pwt->b[14]);
   3156     pwd->b[15] = msa_mod_u_df(DF_BYTE, pws->b[15], pwt->b[15]);
   3157 }
   3158 
   3159 void helper_msa_mod_u_h(CPUMIPSState *env,
   3160                         uint32_t wd, uint32_t ws, uint32_t wt)
   3161 {
   3162     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   3163     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   3164     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
   3165 
   3166     pwd->h[0]  = msa_mod_u_df(DF_HALF, pws->h[0],  pwt->h[0]);
   3167     pwd->h[1]  = msa_mod_u_df(DF_HALF, pws->h[1],  pwt->h[1]);
   3168     pwd->h[2]  = msa_mod_u_df(DF_HALF, pws->h[2],  pwt->h[2]);
   3169     pwd->h[3]  = msa_mod_u_df(DF_HALF, pws->h[3],  pwt->h[3]);
   3170     pwd->h[4]  = msa_mod_u_df(DF_HALF, pws->h[4],  pwt->h[4]);
   3171     pwd->h[5]  = msa_mod_u_df(DF_HALF, pws->h[5],  pwt->h[5]);
   3172     pwd->h[6]  = msa_mod_u_df(DF_HALF, pws->h[6],  pwt->h[6]);
   3173     pwd->h[7]  = msa_mod_u_df(DF_HALF, pws->h[7],  pwt->h[7]);
   3174 }
   3175 
   3176 void helper_msa_mod_u_w(CPUMIPSState *env,
   3177                         uint32_t wd, uint32_t ws, uint32_t wt)
   3178 {
   3179     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   3180     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   3181     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
   3182 
   3183     pwd->w[0]  = msa_mod_u_df(DF_WORD, pws->w[0],  pwt->w[0]);
   3184     pwd->w[1]  = msa_mod_u_df(DF_WORD, pws->w[1],  pwt->w[1]);
   3185     pwd->w[2]  = msa_mod_u_df(DF_WORD, pws->w[2],  pwt->w[2]);
   3186     pwd->w[3]  = msa_mod_u_df(DF_WORD, pws->w[3],  pwt->w[3]);
   3187 }
   3188 
   3189 void helper_msa_mod_u_d(CPUMIPSState *env,
   3190                         uint32_t wd, uint32_t ws, uint32_t wt)
   3191 {
   3192     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   3193     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   3194     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
   3195 
   3196     pwd->d[0]  = msa_mod_u_df(DF_DOUBLE, pws->d[0],  pwt->d[0]);
   3197     pwd->d[1]  = msa_mod_u_df(DF_DOUBLE, pws->d[1],  pwt->d[1]);
   3198 }
   3199 
   3200 
   3201 /*
   3202  * Int Multiply
   3203  * ------------
   3204  *
   3205  * +---------------+----------------------------------------------------------+
   3206  * | MADDV.B       | Vector Multiply and Add (byte)                           |
   3207  * | MADDV.H       | Vector Multiply and Add (halfword)                       |
   3208  * | MADDV.W       | Vector Multiply and Add (word)                           |
   3209  * | MADDV.D       | Vector Multiply and Add (doubleword)                     |
   3210  * | MSUBV.B       | Vector Multiply and Subtract (byte)                      |
   3211  * | MSUBV.H       | Vector Multiply and Subtract (halfword)                  |
   3212  * | MSUBV.W       | Vector Multiply and Subtract (word)                      |
   3213  * | MSUBV.D       | Vector Multiply and Subtract (doubleword)                |
   3214  * | MULV.B        | Vector Multiply (byte)                                   |
   3215  * | MULV.H        | Vector Multiply (halfword)                               |
   3216  * | MULV.W        | Vector Multiply (word)                                   |
   3217  * | MULV.D        | Vector Multiply (doubleword)                             |
   3218  * +---------------+----------------------------------------------------------+
   3219  */
   3220 
   3221 static inline int64_t msa_maddv_df(uint32_t df, int64_t dest, int64_t arg1,
   3222                                    int64_t arg2)
   3223 {
   3224     return dest + arg1 * arg2;
   3225 }
   3226 
   3227 void helper_msa_maddv_b(CPUMIPSState *env,
   3228                         uint32_t wd, uint32_t ws, uint32_t wt)
   3229 {
   3230     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   3231     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   3232     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
   3233 
   3234     pwd->b[0]  = msa_maddv_df(DF_BYTE, pwd->b[0],  pws->b[0],  pwt->b[0]);
   3235     pwd->b[1]  = msa_maddv_df(DF_BYTE, pwd->b[1],  pws->b[1],  pwt->b[1]);
   3236     pwd->b[2]  = msa_maddv_df(DF_BYTE, pwd->b[2],  pws->b[2],  pwt->b[2]);
   3237     pwd->b[3]  = msa_maddv_df(DF_BYTE, pwd->b[3],  pws->b[3],  pwt->b[3]);
   3238     pwd->b[4]  = msa_maddv_df(DF_BYTE, pwd->b[4],  pws->b[4],  pwt->b[4]);
   3239     pwd->b[5]  = msa_maddv_df(DF_BYTE, pwd->b[5],  pws->b[5],  pwt->b[5]);
   3240     pwd->b[6]  = msa_maddv_df(DF_BYTE, pwd->b[6],  pws->b[6],  pwt->b[6]);
   3241     pwd->b[7]  = msa_maddv_df(DF_BYTE, pwd->b[7],  pws->b[7],  pwt->b[7]);
   3242     pwd->b[8]  = msa_maddv_df(DF_BYTE, pwd->b[8],  pws->b[8],  pwt->b[8]);
   3243     pwd->b[9]  = msa_maddv_df(DF_BYTE, pwd->b[9],  pws->b[9],  pwt->b[9]);
   3244     pwd->b[10] = msa_maddv_df(DF_BYTE, pwd->b[10], pws->b[10], pwt->b[10]);
   3245     pwd->b[11] = msa_maddv_df(DF_BYTE, pwd->b[11], pws->b[11], pwt->b[11]);
   3246     pwd->b[12] = msa_maddv_df(DF_BYTE, pwd->b[12], pws->b[12], pwt->b[12]);
   3247     pwd->b[13] = msa_maddv_df(DF_BYTE, pwd->b[13], pws->b[13], pwt->b[13]);
   3248     pwd->b[14] = msa_maddv_df(DF_BYTE, pwd->b[14], pws->b[14], pwt->b[14]);
   3249     pwd->b[15] = msa_maddv_df(DF_BYTE, pwd->b[15], pws->b[15], pwt->b[15]);
   3250 }
   3251 
   3252 void helper_msa_maddv_h(CPUMIPSState *env,
   3253                         uint32_t wd, uint32_t ws, uint32_t wt)
   3254 {
   3255     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   3256     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   3257     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
   3258 
   3259     pwd->h[0]  = msa_maddv_df(DF_HALF, pwd->h[0],  pws->h[0],  pwt->h[0]);
   3260     pwd->h[1]  = msa_maddv_df(DF_HALF, pwd->h[1],  pws->h[1],  pwt->h[1]);
   3261     pwd->h[2]  = msa_maddv_df(DF_HALF, pwd->h[2],  pws->h[2],  pwt->h[2]);
   3262     pwd->h[3]  = msa_maddv_df(DF_HALF, pwd->h[3],  pws->h[3],  pwt->h[3]);
   3263     pwd->h[4]  = msa_maddv_df(DF_HALF, pwd->h[4],  pws->h[4],  pwt->h[4]);
   3264     pwd->h[5]  = msa_maddv_df(DF_HALF, pwd->h[5],  pws->h[5],  pwt->h[5]);
   3265     pwd->h[6]  = msa_maddv_df(DF_HALF, pwd->h[6],  pws->h[6],  pwt->h[6]);
   3266     pwd->h[7]  = msa_maddv_df(DF_HALF, pwd->h[7],  pws->h[7],  pwt->h[7]);
   3267 }
   3268 
   3269 void helper_msa_maddv_w(CPUMIPSState *env,
   3270                         uint32_t wd, uint32_t ws, uint32_t wt)
   3271 {
   3272     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   3273     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   3274     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
   3275 
   3276     pwd->w[0]  = msa_maddv_df(DF_WORD, pwd->w[0],  pws->w[0],  pwt->w[0]);
   3277     pwd->w[1]  = msa_maddv_df(DF_WORD, pwd->w[1],  pws->w[1],  pwt->w[1]);
   3278     pwd->w[2]  = msa_maddv_df(DF_WORD, pwd->w[2],  pws->w[2],  pwt->w[2]);
   3279     pwd->w[3]  = msa_maddv_df(DF_WORD, pwd->w[3],  pws->w[3],  pwt->w[3]);
   3280 }
   3281 
   3282 void helper_msa_maddv_d(CPUMIPSState *env,
   3283                         uint32_t wd, uint32_t ws, uint32_t wt)
   3284 {
   3285     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   3286     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   3287     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
   3288 
   3289     pwd->d[0]  = msa_maddv_df(DF_DOUBLE, pwd->d[0],  pws->d[0],  pwt->d[0]);
   3290     pwd->d[1]  = msa_maddv_df(DF_DOUBLE, pwd->d[1],  pws->d[1],  pwt->d[1]);
   3291 }
   3292 
   3293 static inline int64_t msa_msubv_df(uint32_t df, int64_t dest, int64_t arg1,
   3294                                    int64_t arg2)
   3295 {
   3296     return dest - arg1 * arg2;
   3297 }
   3298 
   3299 void helper_msa_msubv_b(CPUMIPSState *env,
   3300                         uint32_t wd, uint32_t ws, uint32_t wt)
   3301 {
   3302     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   3303     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   3304     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
   3305 
   3306     pwd->b[0]  = msa_msubv_df(DF_BYTE, pwd->b[0],  pws->b[0],  pwt->b[0]);
   3307     pwd->b[1]  = msa_msubv_df(DF_BYTE, pwd->b[1],  pws->b[1],  pwt->b[1]);
   3308     pwd->b[2]  = msa_msubv_df(DF_BYTE, pwd->b[2],  pws->b[2],  pwt->b[2]);
   3309     pwd->b[3]  = msa_msubv_df(DF_BYTE, pwd->b[3],  pws->b[3],  pwt->b[3]);
   3310     pwd->b[4]  = msa_msubv_df(DF_BYTE, pwd->b[4],  pws->b[4],  pwt->b[4]);
   3311     pwd->b[5]  = msa_msubv_df(DF_BYTE, pwd->b[5],  pws->b[5],  pwt->b[5]);
   3312     pwd->b[6]  = msa_msubv_df(DF_BYTE, pwd->b[6],  pws->b[6],  pwt->b[6]);
   3313     pwd->b[7]  = msa_msubv_df(DF_BYTE, pwd->b[7],  pws->b[7],  pwt->b[7]);
   3314     pwd->b[8]  = msa_msubv_df(DF_BYTE, pwd->b[8],  pws->b[8],  pwt->b[8]);
   3315     pwd->b[9]  = msa_msubv_df(DF_BYTE, pwd->b[9],  pws->b[9],  pwt->b[9]);
   3316     pwd->b[10] = msa_msubv_df(DF_BYTE, pwd->b[10], pws->b[10], pwt->b[10]);
   3317     pwd->b[11] = msa_msubv_df(DF_BYTE, pwd->b[11], pws->b[11], pwt->b[11]);
   3318     pwd->b[12] = msa_msubv_df(DF_BYTE, pwd->b[12], pws->b[12], pwt->b[12]);
   3319     pwd->b[13] = msa_msubv_df(DF_BYTE, pwd->b[13], pws->b[13], pwt->b[13]);
   3320     pwd->b[14] = msa_msubv_df(DF_BYTE, pwd->b[14], pws->b[14], pwt->b[14]);
   3321     pwd->b[15] = msa_msubv_df(DF_BYTE, pwd->b[15], pws->b[15], pwt->b[15]);
   3322 }
   3323 
   3324 void helper_msa_msubv_h(CPUMIPSState *env,
   3325                         uint32_t wd, uint32_t ws, uint32_t wt)
   3326 {
   3327     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   3328     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   3329     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
   3330 
   3331     pwd->h[0]  = msa_msubv_df(DF_HALF, pwd->h[0],  pws->h[0],  pwt->h[0]);
   3332     pwd->h[1]  = msa_msubv_df(DF_HALF, pwd->h[1],  pws->h[1],  pwt->h[1]);
   3333     pwd->h[2]  = msa_msubv_df(DF_HALF, pwd->h[2],  pws->h[2],  pwt->h[2]);
   3334     pwd->h[3]  = msa_msubv_df(DF_HALF, pwd->h[3],  pws->h[3],  pwt->h[3]);
   3335     pwd->h[4]  = msa_msubv_df(DF_HALF, pwd->h[4],  pws->h[4],  pwt->h[4]);
   3336     pwd->h[5]  = msa_msubv_df(DF_HALF, pwd->h[5],  pws->h[5],  pwt->h[5]);
   3337     pwd->h[6]  = msa_msubv_df(DF_HALF, pwd->h[6],  pws->h[6],  pwt->h[6]);
   3338     pwd->h[7]  = msa_msubv_df(DF_HALF, pwd->h[7],  pws->h[7],  pwt->h[7]);
   3339 }
   3340 
   3341 void helper_msa_msubv_w(CPUMIPSState *env,
   3342                         uint32_t wd, uint32_t ws, uint32_t wt)
   3343 {
   3344     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   3345     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   3346     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
   3347 
   3348     pwd->w[0]  = msa_msubv_df(DF_WORD, pwd->w[0],  pws->w[0],  pwt->w[0]);
   3349     pwd->w[1]  = msa_msubv_df(DF_WORD, pwd->w[1],  pws->w[1],  pwt->w[1]);
   3350     pwd->w[2]  = msa_msubv_df(DF_WORD, pwd->w[2],  pws->w[2],  pwt->w[2]);
   3351     pwd->w[3]  = msa_msubv_df(DF_WORD, pwd->w[3],  pws->w[3],  pwt->w[3]);
   3352 }
   3353 
   3354 void helper_msa_msubv_d(CPUMIPSState *env,
   3355                         uint32_t wd, uint32_t ws, uint32_t wt)
   3356 {
   3357     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   3358     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   3359     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
   3360 
   3361     pwd->d[0]  = msa_msubv_df(DF_DOUBLE, pwd->d[0],  pws->d[0],  pwt->d[0]);
   3362     pwd->d[1]  = msa_msubv_df(DF_DOUBLE, pwd->d[1],  pws->d[1],  pwt->d[1]);
   3363 }
   3364 
   3365 
   3366 static inline int64_t msa_mulv_df(uint32_t df, int64_t arg1, int64_t arg2)
   3367 {
   3368     return arg1 * arg2;
   3369 }
   3370 
   3371 void helper_msa_mulv_b(CPUMIPSState *env,
   3372                        uint32_t wd, uint32_t ws, uint32_t wt)
   3373 {
   3374     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   3375     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   3376     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
   3377 
   3378     pwd->b[0]  = msa_mulv_df(DF_BYTE, pws->b[0],  pwt->b[0]);
   3379     pwd->b[1]  = msa_mulv_df(DF_BYTE, pws->b[1],  pwt->b[1]);
   3380     pwd->b[2]  = msa_mulv_df(DF_BYTE, pws->b[2],  pwt->b[2]);
   3381     pwd->b[3]  = msa_mulv_df(DF_BYTE, pws->b[3],  pwt->b[3]);
   3382     pwd->b[4]  = msa_mulv_df(DF_BYTE, pws->b[4],  pwt->b[4]);
   3383     pwd->b[5]  = msa_mulv_df(DF_BYTE, pws->b[5],  pwt->b[5]);
   3384     pwd->b[6]  = msa_mulv_df(DF_BYTE, pws->b[6],  pwt->b[6]);
   3385     pwd->b[7]  = msa_mulv_df(DF_BYTE, pws->b[7],  pwt->b[7]);
   3386     pwd->b[8]  = msa_mulv_df(DF_BYTE, pws->b[8],  pwt->b[8]);
   3387     pwd->b[9]  = msa_mulv_df(DF_BYTE, pws->b[9],  pwt->b[9]);
   3388     pwd->b[10] = msa_mulv_df(DF_BYTE, pws->b[10], pwt->b[10]);
   3389     pwd->b[11] = msa_mulv_df(DF_BYTE, pws->b[11], pwt->b[11]);
   3390     pwd->b[12] = msa_mulv_df(DF_BYTE, pws->b[12], pwt->b[12]);
   3391     pwd->b[13] = msa_mulv_df(DF_BYTE, pws->b[13], pwt->b[13]);
   3392     pwd->b[14] = msa_mulv_df(DF_BYTE, pws->b[14], pwt->b[14]);
   3393     pwd->b[15] = msa_mulv_df(DF_BYTE, pws->b[15], pwt->b[15]);
   3394 }
   3395 
   3396 void helper_msa_mulv_h(CPUMIPSState *env,
   3397                        uint32_t wd, uint32_t ws, uint32_t wt)
   3398 {
   3399     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   3400     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   3401     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
   3402 
   3403     pwd->h[0]  = msa_mulv_df(DF_HALF, pws->h[0],  pwt->h[0]);
   3404     pwd->h[1]  = msa_mulv_df(DF_HALF, pws->h[1],  pwt->h[1]);
   3405     pwd->h[2]  = msa_mulv_df(DF_HALF, pws->h[2],  pwt->h[2]);
   3406     pwd->h[3]  = msa_mulv_df(DF_HALF, pws->h[3],  pwt->h[3]);
   3407     pwd->h[4]  = msa_mulv_df(DF_HALF, pws->h[4],  pwt->h[4]);
   3408     pwd->h[5]  = msa_mulv_df(DF_HALF, pws->h[5],  pwt->h[5]);
   3409     pwd->h[6]  = msa_mulv_df(DF_HALF, pws->h[6],  pwt->h[6]);
   3410     pwd->h[7]  = msa_mulv_df(DF_HALF, pws->h[7],  pwt->h[7]);
   3411 }
   3412 
   3413 void helper_msa_mulv_w(CPUMIPSState *env,
   3414                        uint32_t wd, uint32_t ws, uint32_t wt)
   3415 {
   3416     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   3417     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   3418     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
   3419 
   3420     pwd->w[0]  = msa_mulv_df(DF_WORD, pws->w[0],  pwt->w[0]);
   3421     pwd->w[1]  = msa_mulv_df(DF_WORD, pws->w[1],  pwt->w[1]);
   3422     pwd->w[2]  = msa_mulv_df(DF_WORD, pws->w[2],  pwt->w[2]);
   3423     pwd->w[3]  = msa_mulv_df(DF_WORD, pws->w[3],  pwt->w[3]);
   3424 }
   3425 
   3426 void helper_msa_mulv_d(CPUMIPSState *env,
   3427                        uint32_t wd, uint32_t ws, uint32_t wt)
   3428 {
   3429     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   3430     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   3431     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
   3432 
   3433     pwd->d[0]  = msa_mulv_df(DF_DOUBLE, pws->d[0],  pwt->d[0]);
   3434     pwd->d[1]  = msa_mulv_df(DF_DOUBLE, pws->d[1],  pwt->d[1]);
   3435 }
   3436 
   3437 
   3438 /*
   3439  * Int Subtract
   3440  * ------------
   3441  *
   3442  * +---------------+----------------------------------------------------------+
   3443  * | ASUB_S.B      | Vector Absolute Values of Signed Subtract (byte)         |
   3444  * | ASUB_S.H      | Vector Absolute Values of Signed Subtract (halfword)     |
   3445  * | ASUB_S.W      | Vector Absolute Values of Signed Subtract (word)         |
   3446  * | ASUB_S.D      | Vector Absolute Values of Signed Subtract (doubleword)   |
   3447  * | ASUB_U.B      | Vector Absolute Values of Unsigned Subtract (byte)       |
   3448  * | ASUB_U.H      | Vector Absolute Values of Unsigned Subtract (halfword)   |
   3449  * | ASUB_U.W      | Vector Absolute Values of Unsigned Subtract (word)       |
   3450  * | ASUB_U.D      | Vector Absolute Values of Unsigned Subtract (doubleword) |
   3451  * | HSUB_S.H      | Vector Signed Horizontal Subtract (halfword)             |
   3452  * | HSUB_S.W      | Vector Signed Horizontal Subtract (word)                 |
   3453  * | HSUB_S.D      | Vector Signed Horizontal Subtract (doubleword)           |
   3454  * | HSUB_U.H      | Vector Unigned Horizontal Subtract (halfword)            |
   3455  * | HSUB_U.W      | Vector Unigned Horizontal Subtract (word)                |
   3456  * | HSUB_U.D      | Vector Unigned Horizontal Subtract (doubleword)          |
   3457  * | SUBS_S.B      | Vector Signed Saturated Subtract (of Signed) (byte)      |
   3458  * | SUBS_S.H      | Vector Signed Saturated Subtract (of Signed) (halfword)  |
   3459  * | SUBS_S.W      | Vector Signed Saturated Subtract (of Signed) (word)      |
   3460  * | SUBS_S.D      | Vector Signed Saturated Subtract (of Signed) (doubleword)|
   3461  * | SUBS_U.B      | Vector Unsigned Saturated Subtract (of Uns.) (byte)      |
   3462  * | SUBS_U.H      | Vector Unsigned Saturated Subtract (of Uns.) (halfword)  |
   3463  * | SUBS_U.W      | Vector Unsigned Saturated Subtract (of Uns.) (word)      |
   3464  * | SUBS_U.D      | Vector Unsigned Saturated Subtract (of Uns.) (doubleword)|
   3465  * | SUBSUS_U.B    | Vector Uns. Sat. Subtract (of S. from Uns.) (byte)       |
   3466  * | SUBSUS_U.H    | Vector Uns. Sat. Subtract (of S. from Uns.) (halfword)   |
   3467  * | SUBSUS_U.W    | Vector Uns. Sat. Subtract (of S. from Uns.) (word)       |
   3468  * | SUBSUS_U.D    | Vector Uns. Sat. Subtract (of S. from Uns.) (doubleword) |
   3469  * | SUBSUU_S.B    | Vector Signed Saturated Subtract (of Uns.) (byte)        |
   3470  * | SUBSUU_S.H    | Vector Signed Saturated Subtract (of Uns.) (halfword)    |
   3471  * | SUBSUU_S.W    | Vector Signed Saturated Subtract (of Uns.) (word)        |
   3472  * | SUBSUU_S.D    | Vector Signed Saturated Subtract (of Uns.) (doubleword)  |
   3473  * | SUBV.B        | Vector Subtract (byte)                                   |
   3474  * | SUBV.H        | Vector Subtract (halfword)                               |
   3475  * | SUBV.W        | Vector Subtract (word)                                   |
   3476  * | SUBV.D        | Vector Subtract (doubleword)                             |
   3477  * +---------------+----------------------------------------------------------+
   3478  */
   3479 
   3480 
   3481 static inline int64_t msa_asub_s_df(uint32_t df, int64_t arg1, int64_t arg2)
   3482 {
   3483     /* signed compare */
   3484     return (arg1 < arg2) ?
   3485         (uint64_t)(arg2 - arg1) : (uint64_t)(arg1 - arg2);
   3486 }
   3487 
   3488 void helper_msa_asub_s_b(CPUMIPSState *env,
   3489                          uint32_t wd, uint32_t ws, uint32_t wt)
   3490 {
   3491     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   3492     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   3493     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
   3494 
   3495     pwd->b[0]  = msa_asub_s_df(DF_BYTE, pws->b[0],  pwt->b[0]);
   3496     pwd->b[1]  = msa_asub_s_df(DF_BYTE, pws->b[1],  pwt->b[1]);
   3497     pwd->b[2]  = msa_asub_s_df(DF_BYTE, pws->b[2],  pwt->b[2]);
   3498     pwd->b[3]  = msa_asub_s_df(DF_BYTE, pws->b[3],  pwt->b[3]);
   3499     pwd->b[4]  = msa_asub_s_df(DF_BYTE, pws->b[4],  pwt->b[4]);
   3500     pwd->b[5]  = msa_asub_s_df(DF_BYTE, pws->b[5],  pwt->b[5]);
   3501     pwd->b[6]  = msa_asub_s_df(DF_BYTE, pws->b[6],  pwt->b[6]);
   3502     pwd->b[7]  = msa_asub_s_df(DF_BYTE, pws->b[7],  pwt->b[7]);
   3503     pwd->b[8]  = msa_asub_s_df(DF_BYTE, pws->b[8],  pwt->b[8]);
   3504     pwd->b[9]  = msa_asub_s_df(DF_BYTE, pws->b[9],  pwt->b[9]);
   3505     pwd->b[10] = msa_asub_s_df(DF_BYTE, pws->b[10], pwt->b[10]);
   3506     pwd->b[11] = msa_asub_s_df(DF_BYTE, pws->b[11], pwt->b[11]);
   3507     pwd->b[12] = msa_asub_s_df(DF_BYTE, pws->b[12], pwt->b[12]);
   3508     pwd->b[13] = msa_asub_s_df(DF_BYTE, pws->b[13], pwt->b[13]);
   3509     pwd->b[14] = msa_asub_s_df(DF_BYTE, pws->b[14], pwt->b[14]);
   3510     pwd->b[15] = msa_asub_s_df(DF_BYTE, pws->b[15], pwt->b[15]);
   3511 }
   3512 
   3513 void helper_msa_asub_s_h(CPUMIPSState *env,
   3514                          uint32_t wd, uint32_t ws, uint32_t wt)
   3515 {
   3516     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   3517     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   3518     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
   3519 
   3520     pwd->h[0]  = msa_asub_s_df(DF_HALF, pws->h[0],  pwt->h[0]);
   3521     pwd->h[1]  = msa_asub_s_df(DF_HALF, pws->h[1],  pwt->h[1]);
   3522     pwd->h[2]  = msa_asub_s_df(DF_HALF, pws->h[2],  pwt->h[2]);
   3523     pwd->h[3]  = msa_asub_s_df(DF_HALF, pws->h[3],  pwt->h[3]);
   3524     pwd->h[4]  = msa_asub_s_df(DF_HALF, pws->h[4],  pwt->h[4]);
   3525     pwd->h[5]  = msa_asub_s_df(DF_HALF, pws->h[5],  pwt->h[5]);
   3526     pwd->h[6]  = msa_asub_s_df(DF_HALF, pws->h[6],  pwt->h[6]);
   3527     pwd->h[7]  = msa_asub_s_df(DF_HALF, pws->h[7],  pwt->h[7]);
   3528 }
   3529 
   3530 void helper_msa_asub_s_w(CPUMIPSState *env,
   3531                          uint32_t wd, uint32_t ws, uint32_t wt)
   3532 {
   3533     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   3534     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   3535     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
   3536 
   3537     pwd->w[0]  = msa_asub_s_df(DF_WORD, pws->w[0],  pwt->w[0]);
   3538     pwd->w[1]  = msa_asub_s_df(DF_WORD, pws->w[1],  pwt->w[1]);
   3539     pwd->w[2]  = msa_asub_s_df(DF_WORD, pws->w[2],  pwt->w[2]);
   3540     pwd->w[3]  = msa_asub_s_df(DF_WORD, pws->w[3],  pwt->w[3]);
   3541 }
   3542 
   3543 void helper_msa_asub_s_d(CPUMIPSState *env,
   3544                          uint32_t wd, uint32_t ws, uint32_t wt)
   3545 {
   3546     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   3547     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   3548     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
   3549 
   3550     pwd->d[0]  = msa_asub_s_df(DF_DOUBLE, pws->d[0],  pwt->d[0]);
   3551     pwd->d[1]  = msa_asub_s_df(DF_DOUBLE, pws->d[1],  pwt->d[1]);
   3552 }
   3553 
   3554 
   3555 static inline uint64_t msa_asub_u_df(uint32_t df, uint64_t arg1, uint64_t arg2)
   3556 {
   3557     uint64_t u_arg1 = UNSIGNED(arg1, df);
   3558     uint64_t u_arg2 = UNSIGNED(arg2, df);
   3559     /* unsigned compare */
   3560     return (u_arg1 < u_arg2) ?
   3561         (uint64_t)(u_arg2 - u_arg1) : (uint64_t)(u_arg1 - u_arg2);
   3562 }
   3563 
   3564 void helper_msa_asub_u_b(CPUMIPSState *env,
   3565                          uint32_t wd, uint32_t ws, uint32_t wt)
   3566 {
   3567     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   3568     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   3569     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
   3570 
   3571     pwd->b[0]  = msa_asub_u_df(DF_BYTE, pws->b[0],  pwt->b[0]);
   3572     pwd->b[1]  = msa_asub_u_df(DF_BYTE, pws->b[1],  pwt->b[1]);
   3573     pwd->b[2]  = msa_asub_u_df(DF_BYTE, pws->b[2],  pwt->b[2]);
   3574     pwd->b[3]  = msa_asub_u_df(DF_BYTE, pws->b[3],  pwt->b[3]);
   3575     pwd->b[4]  = msa_asub_u_df(DF_BYTE, pws->b[4],  pwt->b[4]);
   3576     pwd->b[5]  = msa_asub_u_df(DF_BYTE, pws->b[5],  pwt->b[5]);
   3577     pwd->b[6]  = msa_asub_u_df(DF_BYTE, pws->b[6],  pwt->b[6]);
   3578     pwd->b[7]  = msa_asub_u_df(DF_BYTE, pws->b[7],  pwt->b[7]);
   3579     pwd->b[8]  = msa_asub_u_df(DF_BYTE, pws->b[8],  pwt->b[8]);
   3580     pwd->b[9]  = msa_asub_u_df(DF_BYTE, pws->b[9],  pwt->b[9]);
   3581     pwd->b[10] = msa_asub_u_df(DF_BYTE, pws->b[10], pwt->b[10]);
   3582     pwd->b[11] = msa_asub_u_df(DF_BYTE, pws->b[11], pwt->b[11]);
   3583     pwd->b[12] = msa_asub_u_df(DF_BYTE, pws->b[12], pwt->b[12]);
   3584     pwd->b[13] = msa_asub_u_df(DF_BYTE, pws->b[13], pwt->b[13]);
   3585     pwd->b[14] = msa_asub_u_df(DF_BYTE, pws->b[14], pwt->b[14]);
   3586     pwd->b[15] = msa_asub_u_df(DF_BYTE, pws->b[15], pwt->b[15]);
   3587 }
   3588 
   3589 void helper_msa_asub_u_h(CPUMIPSState *env,
   3590                          uint32_t wd, uint32_t ws, uint32_t wt)
   3591 {
   3592     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   3593     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   3594     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
   3595 
   3596     pwd->h[0]  = msa_asub_u_df(DF_HALF, pws->h[0],  pwt->h[0]);
   3597     pwd->h[1]  = msa_asub_u_df(DF_HALF, pws->h[1],  pwt->h[1]);
   3598     pwd->h[2]  = msa_asub_u_df(DF_HALF, pws->h[2],  pwt->h[2]);
   3599     pwd->h[3]  = msa_asub_u_df(DF_HALF, pws->h[3],  pwt->h[3]);
   3600     pwd->h[4]  = msa_asub_u_df(DF_HALF, pws->h[4],  pwt->h[4]);
   3601     pwd->h[5]  = msa_asub_u_df(DF_HALF, pws->h[5],  pwt->h[5]);
   3602     pwd->h[6]  = msa_asub_u_df(DF_HALF, pws->h[6],  pwt->h[6]);
   3603     pwd->h[7]  = msa_asub_u_df(DF_HALF, pws->h[7],  pwt->h[7]);
   3604 }
   3605 
   3606 void helper_msa_asub_u_w(CPUMIPSState *env,
   3607                          uint32_t wd, uint32_t ws, uint32_t wt)
   3608 {
   3609     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   3610     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   3611     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
   3612 
   3613     pwd->w[0]  = msa_asub_u_df(DF_WORD, pws->w[0],  pwt->w[0]);
   3614     pwd->w[1]  = msa_asub_u_df(DF_WORD, pws->w[1],  pwt->w[1]);
   3615     pwd->w[2]  = msa_asub_u_df(DF_WORD, pws->w[2],  pwt->w[2]);
   3616     pwd->w[3]  = msa_asub_u_df(DF_WORD, pws->w[3],  pwt->w[3]);
   3617 }
   3618 
   3619 void helper_msa_asub_u_d(CPUMIPSState *env,
   3620                          uint32_t wd, uint32_t ws, uint32_t wt)
   3621 {
   3622     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   3623     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   3624     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
   3625 
   3626     pwd->d[0]  = msa_asub_u_df(DF_DOUBLE, pws->d[0],  pwt->d[0]);
   3627     pwd->d[1]  = msa_asub_u_df(DF_DOUBLE, pws->d[1],  pwt->d[1]);
   3628 }
   3629 
   3630 
   3631 static inline int64_t msa_hsub_s_df(uint32_t df, int64_t arg1, int64_t arg2)
   3632 {
   3633     return SIGNED_ODD(arg1, df) - SIGNED_EVEN(arg2, df);
   3634 }
   3635 
   3636 void helper_msa_hsub_s_h(CPUMIPSState *env,
   3637                          uint32_t wd, uint32_t ws, uint32_t wt)
   3638 {
   3639     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   3640     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   3641     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
   3642 
   3643     pwd->h[0]  = msa_hsub_s_df(DF_HALF, pws->h[0],  pwt->h[0]);
   3644     pwd->h[1]  = msa_hsub_s_df(DF_HALF, pws->h[1],  pwt->h[1]);
   3645     pwd->h[2]  = msa_hsub_s_df(DF_HALF, pws->h[2],  pwt->h[2]);
   3646     pwd->h[3]  = msa_hsub_s_df(DF_HALF, pws->h[3],  pwt->h[3]);
   3647     pwd->h[4]  = msa_hsub_s_df(DF_HALF, pws->h[4],  pwt->h[4]);
   3648     pwd->h[5]  = msa_hsub_s_df(DF_HALF, pws->h[5],  pwt->h[5]);
   3649     pwd->h[6]  = msa_hsub_s_df(DF_HALF, pws->h[6],  pwt->h[6]);
   3650     pwd->h[7]  = msa_hsub_s_df(DF_HALF, pws->h[7],  pwt->h[7]);
   3651 }
   3652 
   3653 void helper_msa_hsub_s_w(CPUMIPSState *env,
   3654                          uint32_t wd, uint32_t ws, uint32_t wt)
   3655 {
   3656     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   3657     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   3658     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
   3659 
   3660     pwd->w[0]  = msa_hsub_s_df(DF_WORD, pws->w[0],  pwt->w[0]);
   3661     pwd->w[1]  = msa_hsub_s_df(DF_WORD, pws->w[1],  pwt->w[1]);
   3662     pwd->w[2]  = msa_hsub_s_df(DF_WORD, pws->w[2],  pwt->w[2]);
   3663     pwd->w[3]  = msa_hsub_s_df(DF_WORD, pws->w[3],  pwt->w[3]);
   3664 }
   3665 
   3666 void helper_msa_hsub_s_d(CPUMIPSState *env,
   3667                          uint32_t wd, uint32_t ws, uint32_t wt)
   3668 {
   3669     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   3670     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   3671     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
   3672 
   3673     pwd->d[0]  = msa_hsub_s_df(DF_DOUBLE, pws->d[0],  pwt->d[0]);
   3674     pwd->d[1]  = msa_hsub_s_df(DF_DOUBLE, pws->d[1],  pwt->d[1]);
   3675 }
   3676 
   3677 
   3678 static inline int64_t msa_hsub_u_df(uint32_t df, int64_t arg1, int64_t arg2)
   3679 {
   3680     return UNSIGNED_ODD(arg1, df) - UNSIGNED_EVEN(arg2, df);
   3681 }
   3682 
   3683 void helper_msa_hsub_u_h(CPUMIPSState *env,
   3684                          uint32_t wd, uint32_t ws, uint32_t wt)
   3685 {
   3686     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   3687     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   3688     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
   3689 
   3690     pwd->h[0]  = msa_hsub_u_df(DF_HALF, pws->h[0],  pwt->h[0]);
   3691     pwd->h[1]  = msa_hsub_u_df(DF_HALF, pws->h[1],  pwt->h[1]);
   3692     pwd->h[2]  = msa_hsub_u_df(DF_HALF, pws->h[2],  pwt->h[2]);
   3693     pwd->h[3]  = msa_hsub_u_df(DF_HALF, pws->h[3],  pwt->h[3]);
   3694     pwd->h[4]  = msa_hsub_u_df(DF_HALF, pws->h[4],  pwt->h[4]);
   3695     pwd->h[5]  = msa_hsub_u_df(DF_HALF, pws->h[5],  pwt->h[5]);
   3696     pwd->h[6]  = msa_hsub_u_df(DF_HALF, pws->h[6],  pwt->h[6]);
   3697     pwd->h[7]  = msa_hsub_u_df(DF_HALF, pws->h[7],  pwt->h[7]);
   3698 }
   3699 
   3700 void helper_msa_hsub_u_w(CPUMIPSState *env,
   3701                          uint32_t wd, uint32_t ws, uint32_t wt)
   3702 {
   3703     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   3704     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   3705     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
   3706 
   3707     pwd->w[0]  = msa_hsub_u_df(DF_WORD, pws->w[0],  pwt->w[0]);
   3708     pwd->w[1]  = msa_hsub_u_df(DF_WORD, pws->w[1],  pwt->w[1]);
   3709     pwd->w[2]  = msa_hsub_u_df(DF_WORD, pws->w[2],  pwt->w[2]);
   3710     pwd->w[3]  = msa_hsub_u_df(DF_WORD, pws->w[3],  pwt->w[3]);
   3711 }
   3712 
   3713 void helper_msa_hsub_u_d(CPUMIPSState *env,
   3714                          uint32_t wd, uint32_t ws, uint32_t wt)
   3715 {
   3716     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   3717     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   3718     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
   3719 
   3720     pwd->d[0]  = msa_hsub_u_df(DF_DOUBLE, pws->d[0],  pwt->d[0]);
   3721     pwd->d[1]  = msa_hsub_u_df(DF_DOUBLE, pws->d[1],  pwt->d[1]);
   3722 }
   3723 
   3724 
   3725 static inline int64_t msa_subs_s_df(uint32_t df, int64_t arg1, int64_t arg2)
   3726 {
   3727     int64_t max_int = DF_MAX_INT(df);
   3728     int64_t min_int = DF_MIN_INT(df);
   3729     if (arg2 > 0) {
   3730         return (min_int + arg2 < arg1) ? arg1 - arg2 : min_int;
   3731     } else {
   3732         return (arg1 < max_int + arg2) ? arg1 - arg2 : max_int;
   3733     }
   3734 }
   3735 
   3736 void helper_msa_subs_s_b(CPUMIPSState *env,
   3737                          uint32_t wd, uint32_t ws, uint32_t wt)
   3738 {
   3739     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   3740     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   3741     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
   3742 
   3743     pwd->b[0]  = msa_subs_s_df(DF_BYTE, pws->b[0],  pwt->b[0]);
   3744     pwd->b[1]  = msa_subs_s_df(DF_BYTE, pws->b[1],  pwt->b[1]);
   3745     pwd->b[2]  = msa_subs_s_df(DF_BYTE, pws->b[2],  pwt->b[2]);
   3746     pwd->b[3]  = msa_subs_s_df(DF_BYTE, pws->b[3],  pwt->b[3]);
   3747     pwd->b[4]  = msa_subs_s_df(DF_BYTE, pws->b[4],  pwt->b[4]);
   3748     pwd->b[5]  = msa_subs_s_df(DF_BYTE, pws->b[5],  pwt->b[5]);
   3749     pwd->b[6]  = msa_subs_s_df(DF_BYTE, pws->b[6],  pwt->b[6]);
   3750     pwd->b[7]  = msa_subs_s_df(DF_BYTE, pws->b[7],  pwt->b[7]);
   3751     pwd->b[8]  = msa_subs_s_df(DF_BYTE, pws->b[8],  pwt->b[8]);
   3752     pwd->b[9]  = msa_subs_s_df(DF_BYTE, pws->b[9],  pwt->b[9]);
   3753     pwd->b[10] = msa_subs_s_df(DF_BYTE, pws->b[10], pwt->b[10]);
   3754     pwd->b[11] = msa_subs_s_df(DF_BYTE, pws->b[11], pwt->b[11]);
   3755     pwd->b[12] = msa_subs_s_df(DF_BYTE, pws->b[12], pwt->b[12]);
   3756     pwd->b[13] = msa_subs_s_df(DF_BYTE, pws->b[13], pwt->b[13]);
   3757     pwd->b[14] = msa_subs_s_df(DF_BYTE, pws->b[14], pwt->b[14]);
   3758     pwd->b[15] = msa_subs_s_df(DF_BYTE, pws->b[15], pwt->b[15]);
   3759 }
   3760 
   3761 void helper_msa_subs_s_h(CPUMIPSState *env,
   3762                          uint32_t wd, uint32_t ws, uint32_t wt)
   3763 {
   3764     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   3765     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   3766     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
   3767 
   3768     pwd->h[0]  = msa_subs_s_df(DF_HALF, pws->h[0],  pwt->h[0]);
   3769     pwd->h[1]  = msa_subs_s_df(DF_HALF, pws->h[1],  pwt->h[1]);
   3770     pwd->h[2]  = msa_subs_s_df(DF_HALF, pws->h[2],  pwt->h[2]);
   3771     pwd->h[3]  = msa_subs_s_df(DF_HALF, pws->h[3],  pwt->h[3]);
   3772     pwd->h[4]  = msa_subs_s_df(DF_HALF, pws->h[4],  pwt->h[4]);
   3773     pwd->h[5]  = msa_subs_s_df(DF_HALF, pws->h[5],  pwt->h[5]);
   3774     pwd->h[6]  = msa_subs_s_df(DF_HALF, pws->h[6],  pwt->h[6]);
   3775     pwd->h[7]  = msa_subs_s_df(DF_HALF, pws->h[7],  pwt->h[7]);
   3776 }
   3777 
   3778 void helper_msa_subs_s_w(CPUMIPSState *env,
   3779                          uint32_t wd, uint32_t ws, uint32_t wt)
   3780 {
   3781     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   3782     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   3783     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
   3784 
   3785     pwd->w[0]  = msa_subs_s_df(DF_WORD, pws->w[0],  pwt->w[0]);
   3786     pwd->w[1]  = msa_subs_s_df(DF_WORD, pws->w[1],  pwt->w[1]);
   3787     pwd->w[2]  = msa_subs_s_df(DF_WORD, pws->w[2],  pwt->w[2]);
   3788     pwd->w[3]  = msa_subs_s_df(DF_WORD, pws->w[3],  pwt->w[3]);
   3789 }
   3790 
   3791 void helper_msa_subs_s_d(CPUMIPSState *env,
   3792                          uint32_t wd, uint32_t ws, uint32_t wt)
   3793 {
   3794     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   3795     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   3796     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
   3797 
   3798     pwd->d[0]  = msa_subs_s_df(DF_DOUBLE, pws->d[0],  pwt->d[0]);
   3799     pwd->d[1]  = msa_subs_s_df(DF_DOUBLE, pws->d[1],  pwt->d[1]);
   3800 }
   3801 
   3802 
   3803 static inline int64_t msa_subs_u_df(uint32_t df, int64_t arg1, int64_t arg2)
   3804 {
   3805     uint64_t u_arg1 = UNSIGNED(arg1, df);
   3806     uint64_t u_arg2 = UNSIGNED(arg2, df);
   3807     return (u_arg1 > u_arg2) ? u_arg1 - u_arg2 : 0;
   3808 }
   3809 
   3810 void helper_msa_subs_u_b(CPUMIPSState *env,
   3811                          uint32_t wd, uint32_t ws, uint32_t wt)
   3812 {
   3813     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   3814     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   3815     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
   3816 
   3817     pwd->b[0]  = msa_subs_u_df(DF_BYTE, pws->b[0],  pwt->b[0]);
   3818     pwd->b[1]  = msa_subs_u_df(DF_BYTE, pws->b[1],  pwt->b[1]);
   3819     pwd->b[2]  = msa_subs_u_df(DF_BYTE, pws->b[2],  pwt->b[2]);
   3820     pwd->b[3]  = msa_subs_u_df(DF_BYTE, pws->b[3],  pwt->b[3]);
   3821     pwd->b[4]  = msa_subs_u_df(DF_BYTE, pws->b[4],  pwt->b[4]);
   3822     pwd->b[5]  = msa_subs_u_df(DF_BYTE, pws->b[5],  pwt->b[5]);
   3823     pwd->b[6]  = msa_subs_u_df(DF_BYTE, pws->b[6],  pwt->b[6]);
   3824     pwd->b[7]  = msa_subs_u_df(DF_BYTE, pws->b[7],  pwt->b[7]);
   3825     pwd->b[8]  = msa_subs_u_df(DF_BYTE, pws->b[8],  pwt->b[8]);
   3826     pwd->b[9]  = msa_subs_u_df(DF_BYTE, pws->b[9],  pwt->b[9]);
   3827     pwd->b[10] = msa_subs_u_df(DF_BYTE, pws->b[10], pwt->b[10]);
   3828     pwd->b[11] = msa_subs_u_df(DF_BYTE, pws->b[11], pwt->b[11]);
   3829     pwd->b[12] = msa_subs_u_df(DF_BYTE, pws->b[12], pwt->b[12]);
   3830     pwd->b[13] = msa_subs_u_df(DF_BYTE, pws->b[13], pwt->b[13]);
   3831     pwd->b[14] = msa_subs_u_df(DF_BYTE, pws->b[14], pwt->b[14]);
   3832     pwd->b[15] = msa_subs_u_df(DF_BYTE, pws->b[15], pwt->b[15]);
   3833 }
   3834 
   3835 void helper_msa_subs_u_h(CPUMIPSState *env,
   3836                          uint32_t wd, uint32_t ws, uint32_t wt)
   3837 {
   3838     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   3839     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   3840     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
   3841 
   3842     pwd->h[0]  = msa_subs_u_df(DF_HALF, pws->h[0],  pwt->h[0]);
   3843     pwd->h[1]  = msa_subs_u_df(DF_HALF, pws->h[1],  pwt->h[1]);
   3844     pwd->h[2]  = msa_subs_u_df(DF_HALF, pws->h[2],  pwt->h[2]);
   3845     pwd->h[3]  = msa_subs_u_df(DF_HALF, pws->h[3],  pwt->h[3]);
   3846     pwd->h[4]  = msa_subs_u_df(DF_HALF, pws->h[4],  pwt->h[4]);
   3847     pwd->h[5]  = msa_subs_u_df(DF_HALF, pws->h[5],  pwt->h[5]);
   3848     pwd->h[6]  = msa_subs_u_df(DF_HALF, pws->h[6],  pwt->h[6]);
   3849     pwd->h[7]  = msa_subs_u_df(DF_HALF, pws->h[7],  pwt->h[7]);
   3850 }
   3851 
   3852 void helper_msa_subs_u_w(CPUMIPSState *env,
   3853                          uint32_t wd, uint32_t ws, uint32_t wt)
   3854 {
   3855     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   3856     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   3857     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
   3858 
   3859     pwd->w[0]  = msa_subs_u_df(DF_WORD, pws->w[0],  pwt->w[0]);
   3860     pwd->w[1]  = msa_subs_u_df(DF_WORD, pws->w[1],  pwt->w[1]);
   3861     pwd->w[2]  = msa_subs_u_df(DF_WORD, pws->w[2],  pwt->w[2]);
   3862     pwd->w[3]  = msa_subs_u_df(DF_WORD, pws->w[3],  pwt->w[3]);
   3863 }
   3864 
   3865 void helper_msa_subs_u_d(CPUMIPSState *env,
   3866                          uint32_t wd, uint32_t ws, uint32_t wt)
   3867 {
   3868     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   3869     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   3870     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
   3871 
   3872     pwd->d[0]  = msa_subs_u_df(DF_DOUBLE, pws->d[0],  pwt->d[0]);
   3873     pwd->d[1]  = msa_subs_u_df(DF_DOUBLE, pws->d[1],  pwt->d[1]);
   3874 }
   3875 
   3876 
   3877 static inline int64_t msa_subsus_u_df(uint32_t df, int64_t arg1, int64_t arg2)
   3878 {
   3879     uint64_t u_arg1 = UNSIGNED(arg1, df);
   3880     uint64_t max_uint = DF_MAX_UINT(df);
   3881     if (arg2 >= 0) {
   3882         uint64_t u_arg2 = (uint64_t)arg2;
   3883         return (u_arg1 > u_arg2) ?
   3884             (int64_t)(u_arg1 - u_arg2) :
   3885             0;
   3886     } else {
   3887         uint64_t u_arg2 = (uint64_t)(-arg2);
   3888         return (u_arg1 < max_uint - u_arg2) ?
   3889             (int64_t)(u_arg1 + u_arg2) :
   3890             (int64_t)max_uint;
   3891     }
   3892 }
   3893 
   3894 void helper_msa_subsus_u_b(CPUMIPSState *env,
   3895                            uint32_t wd, uint32_t ws, uint32_t wt)
   3896 {
   3897     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   3898     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   3899     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
   3900 
   3901     pwd->b[0]  = msa_subsus_u_df(DF_BYTE, pws->b[0],  pwt->b[0]);
   3902     pwd->b[1]  = msa_subsus_u_df(DF_BYTE, pws->b[1],  pwt->b[1]);
   3903     pwd->b[2]  = msa_subsus_u_df(DF_BYTE, pws->b[2],  pwt->b[2]);
   3904     pwd->b[3]  = msa_subsus_u_df(DF_BYTE, pws->b[3],  pwt->b[3]);
   3905     pwd->b[4]  = msa_subsus_u_df(DF_BYTE, pws->b[4],  pwt->b[4]);
   3906     pwd->b[5]  = msa_subsus_u_df(DF_BYTE, pws->b[5],  pwt->b[5]);
   3907     pwd->b[6]  = msa_subsus_u_df(DF_BYTE, pws->b[6],  pwt->b[6]);
   3908     pwd->b[7]  = msa_subsus_u_df(DF_BYTE, pws->b[7],  pwt->b[7]);
   3909     pwd->b[8]  = msa_subsus_u_df(DF_BYTE, pws->b[8],  pwt->b[8]);
   3910     pwd->b[9]  = msa_subsus_u_df(DF_BYTE, pws->b[9],  pwt->b[9]);
   3911     pwd->b[10] = msa_subsus_u_df(DF_BYTE, pws->b[10], pwt->b[10]);
   3912     pwd->b[11] = msa_subsus_u_df(DF_BYTE, pws->b[11], pwt->b[11]);
   3913     pwd->b[12] = msa_subsus_u_df(DF_BYTE, pws->b[12], pwt->b[12]);
   3914     pwd->b[13] = msa_subsus_u_df(DF_BYTE, pws->b[13], pwt->b[13]);
   3915     pwd->b[14] = msa_subsus_u_df(DF_BYTE, pws->b[14], pwt->b[14]);
   3916     pwd->b[15] = msa_subsus_u_df(DF_BYTE, pws->b[15], pwt->b[15]);
   3917 }
   3918 
   3919 void helper_msa_subsus_u_h(CPUMIPSState *env,
   3920                            uint32_t wd, uint32_t ws, uint32_t wt)
   3921 {
   3922     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   3923     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   3924     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
   3925 
   3926     pwd->h[0]  = msa_subsus_u_df(DF_HALF, pws->h[0],  pwt->h[0]);
   3927     pwd->h[1]  = msa_subsus_u_df(DF_HALF, pws->h[1],  pwt->h[1]);
   3928     pwd->h[2]  = msa_subsus_u_df(DF_HALF, pws->h[2],  pwt->h[2]);
   3929     pwd->h[3]  = msa_subsus_u_df(DF_HALF, pws->h[3],  pwt->h[3]);
   3930     pwd->h[4]  = msa_subsus_u_df(DF_HALF, pws->h[4],  pwt->h[4]);
   3931     pwd->h[5]  = msa_subsus_u_df(DF_HALF, pws->h[5],  pwt->h[5]);
   3932     pwd->h[6]  = msa_subsus_u_df(DF_HALF, pws->h[6],  pwt->h[6]);
   3933     pwd->h[7]  = msa_subsus_u_df(DF_HALF, pws->h[7],  pwt->h[7]);
   3934 }
   3935 
   3936 void helper_msa_subsus_u_w(CPUMIPSState *env,
   3937                            uint32_t wd, uint32_t ws, uint32_t wt)
   3938 {
   3939     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   3940     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   3941     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
   3942 
   3943     pwd->w[0]  = msa_subsus_u_df(DF_WORD, pws->w[0],  pwt->w[0]);
   3944     pwd->w[1]  = msa_subsus_u_df(DF_WORD, pws->w[1],  pwt->w[1]);
   3945     pwd->w[2]  = msa_subsus_u_df(DF_WORD, pws->w[2],  pwt->w[2]);
   3946     pwd->w[3]  = msa_subsus_u_df(DF_WORD, pws->w[3],  pwt->w[3]);
   3947 }
   3948 
   3949 void helper_msa_subsus_u_d(CPUMIPSState *env,
   3950                            uint32_t wd, uint32_t ws, uint32_t wt)
   3951 {
   3952     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   3953     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   3954     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
   3955 
   3956     pwd->d[0]  = msa_subsus_u_df(DF_DOUBLE, pws->d[0],  pwt->d[0]);
   3957     pwd->d[1]  = msa_subsus_u_df(DF_DOUBLE, pws->d[1],  pwt->d[1]);
   3958 }
   3959 
   3960 
   3961 static inline int64_t msa_subsuu_s_df(uint32_t df, int64_t arg1, int64_t arg2)
   3962 {
   3963     uint64_t u_arg1 = UNSIGNED(arg1, df);
   3964     uint64_t u_arg2 = UNSIGNED(arg2, df);
   3965     int64_t max_int = DF_MAX_INT(df);
   3966     int64_t min_int = DF_MIN_INT(df);
   3967     if (u_arg1 > u_arg2) {
   3968         return u_arg1 - u_arg2 < (uint64_t)max_int ?
   3969             (int64_t)(u_arg1 - u_arg2) :
   3970             max_int;
   3971     } else {
   3972         return u_arg2 - u_arg1 < (uint64_t)(-min_int) ?
   3973             (int64_t)(u_arg1 - u_arg2) :
   3974             min_int;
   3975     }
   3976 }
   3977 
   3978 void helper_msa_subsuu_s_b(CPUMIPSState *env,
   3979                            uint32_t wd, uint32_t ws, uint32_t wt)
   3980 {
   3981     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   3982     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   3983     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
   3984 
   3985     pwd->b[0]  = msa_subsuu_s_df(DF_BYTE, pws->b[0],  pwt->b[0]);
   3986     pwd->b[1]  = msa_subsuu_s_df(DF_BYTE, pws->b[1],  pwt->b[1]);
   3987     pwd->b[2]  = msa_subsuu_s_df(DF_BYTE, pws->b[2],  pwt->b[2]);
   3988     pwd->b[3]  = msa_subsuu_s_df(DF_BYTE, pws->b[3],  pwt->b[3]);
   3989     pwd->b[4]  = msa_subsuu_s_df(DF_BYTE, pws->b[4],  pwt->b[4]);
   3990     pwd->b[5]  = msa_subsuu_s_df(DF_BYTE, pws->b[5],  pwt->b[5]);
   3991     pwd->b[6]  = msa_subsuu_s_df(DF_BYTE, pws->b[6],  pwt->b[6]);
   3992     pwd->b[7]  = msa_subsuu_s_df(DF_BYTE, pws->b[7],  pwt->b[7]);
   3993     pwd->b[8]  = msa_subsuu_s_df(DF_BYTE, pws->b[8],  pwt->b[8]);
   3994     pwd->b[9]  = msa_subsuu_s_df(DF_BYTE, pws->b[9],  pwt->b[9]);
   3995     pwd->b[10] = msa_subsuu_s_df(DF_BYTE, pws->b[10], pwt->b[10]);
   3996     pwd->b[11] = msa_subsuu_s_df(DF_BYTE, pws->b[11], pwt->b[11]);
   3997     pwd->b[12] = msa_subsuu_s_df(DF_BYTE, pws->b[12], pwt->b[12]);
   3998     pwd->b[13] = msa_subsuu_s_df(DF_BYTE, pws->b[13], pwt->b[13]);
   3999     pwd->b[14] = msa_subsuu_s_df(DF_BYTE, pws->b[14], pwt->b[14]);
   4000     pwd->b[15] = msa_subsuu_s_df(DF_BYTE, pws->b[15], pwt->b[15]);
   4001 }
   4002 
   4003 void helper_msa_subsuu_s_h(CPUMIPSState *env,
   4004                            uint32_t wd, uint32_t ws, uint32_t wt)
   4005 {
   4006     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   4007     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   4008     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
   4009 
   4010     pwd->h[0]  = msa_subsuu_s_df(DF_HALF, pws->h[0],  pwt->h[0]);
   4011     pwd->h[1]  = msa_subsuu_s_df(DF_HALF, pws->h[1],  pwt->h[1]);
   4012     pwd->h[2]  = msa_subsuu_s_df(DF_HALF, pws->h[2],  pwt->h[2]);
   4013     pwd->h[3]  = msa_subsuu_s_df(DF_HALF, pws->h[3],  pwt->h[3]);
   4014     pwd->h[4]  = msa_subsuu_s_df(DF_HALF, pws->h[4],  pwt->h[4]);
   4015     pwd->h[5]  = msa_subsuu_s_df(DF_HALF, pws->h[5],  pwt->h[5]);
   4016     pwd->h[6]  = msa_subsuu_s_df(DF_HALF, pws->h[6],  pwt->h[6]);
   4017     pwd->h[7]  = msa_subsuu_s_df(DF_HALF, pws->h[7],  pwt->h[7]);
   4018 }
   4019 
   4020 void helper_msa_subsuu_s_w(CPUMIPSState *env,
   4021                            uint32_t wd, uint32_t ws, uint32_t wt)
   4022 {
   4023     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   4024     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   4025     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
   4026 
   4027     pwd->w[0]  = msa_subsuu_s_df(DF_WORD, pws->w[0],  pwt->w[0]);
   4028     pwd->w[1]  = msa_subsuu_s_df(DF_WORD, pws->w[1],  pwt->w[1]);
   4029     pwd->w[2]  = msa_subsuu_s_df(DF_WORD, pws->w[2],  pwt->w[2]);
   4030     pwd->w[3]  = msa_subsuu_s_df(DF_WORD, pws->w[3],  pwt->w[3]);
   4031 }
   4032 
   4033 void helper_msa_subsuu_s_d(CPUMIPSState *env,
   4034                            uint32_t wd, uint32_t ws, uint32_t wt)
   4035 {
   4036     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   4037     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   4038     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
   4039 
   4040     pwd->d[0]  = msa_subsuu_s_df(DF_DOUBLE, pws->d[0],  pwt->d[0]);
   4041     pwd->d[1]  = msa_subsuu_s_df(DF_DOUBLE, pws->d[1],  pwt->d[1]);
   4042 }
   4043 
   4044 
   4045 static inline int64_t msa_subv_df(uint32_t df, int64_t arg1, int64_t arg2)
   4046 {
   4047     return arg1 - arg2;
   4048 }
   4049 
   4050 void helper_msa_subv_b(CPUMIPSState *env,
   4051                        uint32_t wd, uint32_t ws, uint32_t wt)
   4052 {
   4053     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   4054     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   4055     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
   4056 
   4057     pwd->b[0]  = msa_subv_df(DF_BYTE, pws->b[0],  pwt->b[0]);
   4058     pwd->b[1]  = msa_subv_df(DF_BYTE, pws->b[1],  pwt->b[1]);
   4059     pwd->b[2]  = msa_subv_df(DF_BYTE, pws->b[2],  pwt->b[2]);
   4060     pwd->b[3]  = msa_subv_df(DF_BYTE, pws->b[3],  pwt->b[3]);
   4061     pwd->b[4]  = msa_subv_df(DF_BYTE, pws->b[4],  pwt->b[4]);
   4062     pwd->b[5]  = msa_subv_df(DF_BYTE, pws->b[5],  pwt->b[5]);
   4063     pwd->b[6]  = msa_subv_df(DF_BYTE, pws->b[6],  pwt->b[6]);
   4064     pwd->b[7]  = msa_subv_df(DF_BYTE, pws->b[7],  pwt->b[7]);
   4065     pwd->b[8]  = msa_subv_df(DF_BYTE, pws->b[8],  pwt->b[8]);
   4066     pwd->b[9]  = msa_subv_df(DF_BYTE, pws->b[9],  pwt->b[9]);
   4067     pwd->b[10] = msa_subv_df(DF_BYTE, pws->b[10], pwt->b[10]);
   4068     pwd->b[11] = msa_subv_df(DF_BYTE, pws->b[11], pwt->b[11]);
   4069     pwd->b[12] = msa_subv_df(DF_BYTE, pws->b[12], pwt->b[12]);
   4070     pwd->b[13] = msa_subv_df(DF_BYTE, pws->b[13], pwt->b[13]);
   4071     pwd->b[14] = msa_subv_df(DF_BYTE, pws->b[14], pwt->b[14]);
   4072     pwd->b[15] = msa_subv_df(DF_BYTE, pws->b[15], pwt->b[15]);
   4073 }
   4074 
   4075 void helper_msa_subv_h(CPUMIPSState *env,
   4076                        uint32_t wd, uint32_t ws, uint32_t wt)
   4077 {
   4078     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   4079     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   4080     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
   4081 
   4082     pwd->h[0]  = msa_subv_df(DF_HALF, pws->h[0],  pwt->h[0]);
   4083     pwd->h[1]  = msa_subv_df(DF_HALF, pws->h[1],  pwt->h[1]);
   4084     pwd->h[2]  = msa_subv_df(DF_HALF, pws->h[2],  pwt->h[2]);
   4085     pwd->h[3]  = msa_subv_df(DF_HALF, pws->h[3],  pwt->h[3]);
   4086     pwd->h[4]  = msa_subv_df(DF_HALF, pws->h[4],  pwt->h[4]);
   4087     pwd->h[5]  = msa_subv_df(DF_HALF, pws->h[5],  pwt->h[5]);
   4088     pwd->h[6]  = msa_subv_df(DF_HALF, pws->h[6],  pwt->h[6]);
   4089     pwd->h[7]  = msa_subv_df(DF_HALF, pws->h[7],  pwt->h[7]);
   4090 }
   4091 
   4092 void helper_msa_subv_w(CPUMIPSState *env,
   4093                        uint32_t wd, uint32_t ws, uint32_t wt)
   4094 {
   4095     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   4096     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   4097     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
   4098 
   4099     pwd->w[0]  = msa_subv_df(DF_WORD, pws->w[0],  pwt->w[0]);
   4100     pwd->w[1]  = msa_subv_df(DF_WORD, pws->w[1],  pwt->w[1]);
   4101     pwd->w[2]  = msa_subv_df(DF_WORD, pws->w[2],  pwt->w[2]);
   4102     pwd->w[3]  = msa_subv_df(DF_WORD, pws->w[3],  pwt->w[3]);
   4103 }
   4104 
   4105 void helper_msa_subv_d(CPUMIPSState *env,
   4106                        uint32_t wd, uint32_t ws, uint32_t wt)
   4107 {
   4108     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   4109     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   4110     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
   4111 
   4112     pwd->d[0]  = msa_subv_df(DF_DOUBLE, pws->d[0],  pwt->d[0]);
   4113     pwd->d[1]  = msa_subv_df(DF_DOUBLE, pws->d[1],  pwt->d[1]);
   4114 }
   4115 
   4116 
   4117 /*
   4118  * Interleave
   4119  * ----------
   4120  *
   4121  * +---------------+----------------------------------------------------------+
   4122  * | ILVEV.B       | Vector Interleave Even (byte)                            |
   4123  * | ILVEV.H       | Vector Interleave Even (halfword)                        |
   4124  * | ILVEV.W       | Vector Interleave Even (word)                            |
   4125  * | ILVEV.D       | Vector Interleave Even (doubleword)                      |
   4126  * | ILVOD.B       | Vector Interleave Odd (byte)                             |
   4127  * | ILVOD.H       | Vector Interleave Odd (halfword)                         |
   4128  * | ILVOD.W       | Vector Interleave Odd (word)                             |
   4129  * | ILVOD.D       | Vector Interleave Odd (doubleword)                       |
   4130  * | ILVL.B        | Vector Interleave Left (byte)                            |
   4131  * | ILVL.H        | Vector Interleave Left (halfword)                        |
   4132  * | ILVL.W        | Vector Interleave Left (word)                            |
   4133  * | ILVL.D        | Vector Interleave Left (doubleword)                      |
   4134  * | ILVR.B        | Vector Interleave Right (byte)                           |
   4135  * | ILVR.H        | Vector Interleave Right (halfword)                       |
   4136  * | ILVR.W        | Vector Interleave Right (word)                           |
   4137  * | ILVR.D        | Vector Interleave Right (doubleword)                     |
   4138  * +---------------+----------------------------------------------------------+
   4139  */
   4140 
   4141 
   4142 void helper_msa_ilvev_b(CPUMIPSState *env,
   4143                         uint32_t wd, uint32_t ws, uint32_t wt)
   4144 {
   4145     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   4146     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   4147     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
   4148 
   4149 #if HOST_BIG_ENDIAN
   4150     pwd->b[8]  = pws->b[9];
   4151     pwd->b[9]  = pwt->b[9];
   4152     pwd->b[10] = pws->b[11];
   4153     pwd->b[11] = pwt->b[11];
   4154     pwd->b[12] = pws->b[13];
   4155     pwd->b[13] = pwt->b[13];
   4156     pwd->b[14] = pws->b[15];
   4157     pwd->b[15] = pwt->b[15];
   4158     pwd->b[0]  = pws->b[1];
   4159     pwd->b[1]  = pwt->b[1];
   4160     pwd->b[2]  = pws->b[3];
   4161     pwd->b[3]  = pwt->b[3];
   4162     pwd->b[4]  = pws->b[5];
   4163     pwd->b[5]  = pwt->b[5];
   4164     pwd->b[6]  = pws->b[7];
   4165     pwd->b[7]  = pwt->b[7];
   4166 #else
   4167     pwd->b[15] = pws->b[14];
   4168     pwd->b[14] = pwt->b[14];
   4169     pwd->b[13] = pws->b[12];
   4170     pwd->b[12] = pwt->b[12];
   4171     pwd->b[11] = pws->b[10];
   4172     pwd->b[10] = pwt->b[10];
   4173     pwd->b[9]  = pws->b[8];
   4174     pwd->b[8]  = pwt->b[8];
   4175     pwd->b[7]  = pws->b[6];
   4176     pwd->b[6]  = pwt->b[6];
   4177     pwd->b[5]  = pws->b[4];
   4178     pwd->b[4]  = pwt->b[4];
   4179     pwd->b[3]  = pws->b[2];
   4180     pwd->b[2]  = pwt->b[2];
   4181     pwd->b[1]  = pws->b[0];
   4182     pwd->b[0]  = pwt->b[0];
   4183 #endif
   4184 }
   4185 
   4186 void helper_msa_ilvev_h(CPUMIPSState *env,
   4187                         uint32_t wd, uint32_t ws, uint32_t wt)
   4188 {
   4189     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   4190     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   4191     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
   4192 
   4193 #if HOST_BIG_ENDIAN
   4194     pwd->h[4] = pws->h[5];
   4195     pwd->h[5] = pwt->h[5];
   4196     pwd->h[6] = pws->h[7];
   4197     pwd->h[7] = pwt->h[7];
   4198     pwd->h[0] = pws->h[1];
   4199     pwd->h[1] = pwt->h[1];
   4200     pwd->h[2] = pws->h[3];
   4201     pwd->h[3] = pwt->h[3];
   4202 #else
   4203     pwd->h[7] = pws->h[6];
   4204     pwd->h[6] = pwt->h[6];
   4205     pwd->h[5] = pws->h[4];
   4206     pwd->h[4] = pwt->h[4];
   4207     pwd->h[3] = pws->h[2];
   4208     pwd->h[2] = pwt->h[2];
   4209     pwd->h[1] = pws->h[0];
   4210     pwd->h[0] = pwt->h[0];
   4211 #endif
   4212 }
   4213 
   4214 void helper_msa_ilvev_w(CPUMIPSState *env,
   4215                         uint32_t wd, uint32_t ws, uint32_t wt)
   4216 {
   4217     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   4218     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   4219     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
   4220 
   4221 #if HOST_BIG_ENDIAN
   4222     pwd->w[2] = pws->w[3];
   4223     pwd->w[3] = pwt->w[3];
   4224     pwd->w[0] = pws->w[1];
   4225     pwd->w[1] = pwt->w[1];
   4226 #else
   4227     pwd->w[3] = pws->w[2];
   4228     pwd->w[2] = pwt->w[2];
   4229     pwd->w[1] = pws->w[0];
   4230     pwd->w[0] = pwt->w[0];
   4231 #endif
   4232 }
   4233 
   4234 void helper_msa_ilvev_d(CPUMIPSState *env,
   4235                         uint32_t wd, uint32_t ws, uint32_t wt)
   4236 {
   4237     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   4238     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   4239     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
   4240 
   4241     pwd->d[1] = pws->d[0];
   4242     pwd->d[0] = pwt->d[0];
   4243 }
   4244 
   4245 
   4246 void helper_msa_ilvod_b(CPUMIPSState *env,
   4247                         uint32_t wd, uint32_t ws, uint32_t wt)
   4248 {
   4249     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   4250     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   4251     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
   4252 
   4253 #if HOST_BIG_ENDIAN
   4254     pwd->b[7]  = pwt->b[6];
   4255     pwd->b[6]  = pws->b[6];
   4256     pwd->b[5]  = pwt->b[4];
   4257     pwd->b[4]  = pws->b[4];
   4258     pwd->b[3]  = pwt->b[2];
   4259     pwd->b[2]  = pws->b[2];
   4260     pwd->b[1]  = pwt->b[0];
   4261     pwd->b[0]  = pws->b[0];
   4262     pwd->b[15] = pwt->b[14];
   4263     pwd->b[14] = pws->b[14];
   4264     pwd->b[13] = pwt->b[12];
   4265     pwd->b[12] = pws->b[12];
   4266     pwd->b[11] = pwt->b[10];
   4267     pwd->b[10] = pws->b[10];
   4268     pwd->b[9]  = pwt->b[8];
   4269     pwd->b[8]  = pws->b[8];
   4270 #else
   4271     pwd->b[0]  = pwt->b[1];
   4272     pwd->b[1]  = pws->b[1];
   4273     pwd->b[2]  = pwt->b[3];
   4274     pwd->b[3]  = pws->b[3];
   4275     pwd->b[4]  = pwt->b[5];
   4276     pwd->b[5]  = pws->b[5];
   4277     pwd->b[6]  = pwt->b[7];
   4278     pwd->b[7]  = pws->b[7];
   4279     pwd->b[8]  = pwt->b[9];
   4280     pwd->b[9]  = pws->b[9];
   4281     pwd->b[10] = pwt->b[11];
   4282     pwd->b[11] = pws->b[11];
   4283     pwd->b[12] = pwt->b[13];
   4284     pwd->b[13] = pws->b[13];
   4285     pwd->b[14] = pwt->b[15];
   4286     pwd->b[15] = pws->b[15];
   4287 #endif
   4288 }
   4289 
   4290 void helper_msa_ilvod_h(CPUMIPSState *env,
   4291                         uint32_t wd, uint32_t ws, uint32_t wt)
   4292 {
   4293     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   4294     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   4295     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
   4296 
   4297 #if HOST_BIG_ENDIAN
   4298     pwd->h[3] = pwt->h[2];
   4299     pwd->h[2] = pws->h[2];
   4300     pwd->h[1] = pwt->h[0];
   4301     pwd->h[0] = pws->h[0];
   4302     pwd->h[7] = pwt->h[6];
   4303     pwd->h[6] = pws->h[6];
   4304     pwd->h[5] = pwt->h[4];
   4305     pwd->h[4] = pws->h[4];
   4306 #else
   4307     pwd->h[0] = pwt->h[1];
   4308     pwd->h[1] = pws->h[1];
   4309     pwd->h[2] = pwt->h[3];
   4310     pwd->h[3] = pws->h[3];
   4311     pwd->h[4] = pwt->h[5];
   4312     pwd->h[5] = pws->h[5];
   4313     pwd->h[6] = pwt->h[7];
   4314     pwd->h[7] = pws->h[7];
   4315 #endif
   4316 }
   4317 
   4318 void helper_msa_ilvod_w(CPUMIPSState *env,
   4319                         uint32_t wd, uint32_t ws, uint32_t wt)
   4320 {
   4321     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   4322     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   4323     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
   4324 
   4325 #if HOST_BIG_ENDIAN
   4326     pwd->w[1] = pwt->w[0];
   4327     pwd->w[0] = pws->w[0];
   4328     pwd->w[3] = pwt->w[2];
   4329     pwd->w[2] = pws->w[2];
   4330 #else
   4331     pwd->w[0] = pwt->w[1];
   4332     pwd->w[1] = pws->w[1];
   4333     pwd->w[2] = pwt->w[3];
   4334     pwd->w[3] = pws->w[3];
   4335 #endif
   4336 }
   4337 
   4338 void helper_msa_ilvod_d(CPUMIPSState *env,
   4339                         uint32_t wd, uint32_t ws, uint32_t wt)
   4340 {
   4341     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   4342     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   4343     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
   4344 
   4345     pwd->d[0] = pwt->d[1];
   4346     pwd->d[1] = pws->d[1];
   4347 }
   4348 
   4349 
   4350 void helper_msa_ilvl_b(CPUMIPSState *env,
   4351                        uint32_t wd, uint32_t ws, uint32_t wt)
   4352 {
   4353     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   4354     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   4355     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
   4356 
   4357 #if HOST_BIG_ENDIAN
   4358     pwd->b[7]  = pwt->b[15];
   4359     pwd->b[6]  = pws->b[15];
   4360     pwd->b[5]  = pwt->b[14];
   4361     pwd->b[4]  = pws->b[14];
   4362     pwd->b[3]  = pwt->b[13];
   4363     pwd->b[2]  = pws->b[13];
   4364     pwd->b[1]  = pwt->b[12];
   4365     pwd->b[0]  = pws->b[12];
   4366     pwd->b[15] = pwt->b[11];
   4367     pwd->b[14] = pws->b[11];
   4368     pwd->b[13] = pwt->b[10];
   4369     pwd->b[12] = pws->b[10];
   4370     pwd->b[11] = pwt->b[9];
   4371     pwd->b[10] = pws->b[9];
   4372     pwd->b[9]  = pwt->b[8];
   4373     pwd->b[8]  = pws->b[8];
   4374 #else
   4375     pwd->b[0]  = pwt->b[8];
   4376     pwd->b[1]  = pws->b[8];
   4377     pwd->b[2]  = pwt->b[9];
   4378     pwd->b[3]  = pws->b[9];
   4379     pwd->b[4]  = pwt->b[10];
   4380     pwd->b[5]  = pws->b[10];
   4381     pwd->b[6]  = pwt->b[11];
   4382     pwd->b[7]  = pws->b[11];
   4383     pwd->b[8]  = pwt->b[12];
   4384     pwd->b[9]  = pws->b[12];
   4385     pwd->b[10] = pwt->b[13];
   4386     pwd->b[11] = pws->b[13];
   4387     pwd->b[12] = pwt->b[14];
   4388     pwd->b[13] = pws->b[14];
   4389     pwd->b[14] = pwt->b[15];
   4390     pwd->b[15] = pws->b[15];
   4391 #endif
   4392 }
   4393 
   4394 void helper_msa_ilvl_h(CPUMIPSState *env,
   4395                        uint32_t wd, uint32_t ws, uint32_t wt)
   4396 {
   4397     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   4398     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   4399     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
   4400 
   4401 #if HOST_BIG_ENDIAN
   4402     pwd->h[3] = pwt->h[7];
   4403     pwd->h[2] = pws->h[7];
   4404     pwd->h[1] = pwt->h[6];
   4405     pwd->h[0] = pws->h[6];
   4406     pwd->h[7] = pwt->h[5];
   4407     pwd->h[6] = pws->h[5];
   4408     pwd->h[5] = pwt->h[4];
   4409     pwd->h[4] = pws->h[4];
   4410 #else
   4411     pwd->h[0] = pwt->h[4];
   4412     pwd->h[1] = pws->h[4];
   4413     pwd->h[2] = pwt->h[5];
   4414     pwd->h[3] = pws->h[5];
   4415     pwd->h[4] = pwt->h[6];
   4416     pwd->h[5] = pws->h[6];
   4417     pwd->h[6] = pwt->h[7];
   4418     pwd->h[7] = pws->h[7];
   4419 #endif
   4420 }
   4421 
   4422 void helper_msa_ilvl_w(CPUMIPSState *env,
   4423                        uint32_t wd, uint32_t ws, uint32_t wt)
   4424 {
   4425     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   4426     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   4427     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
   4428 
   4429 #if HOST_BIG_ENDIAN
   4430     pwd->w[1] = pwt->w[3];
   4431     pwd->w[0] = pws->w[3];
   4432     pwd->w[3] = pwt->w[2];
   4433     pwd->w[2] = pws->w[2];
   4434 #else
   4435     pwd->w[0] = pwt->w[2];
   4436     pwd->w[1] = pws->w[2];
   4437     pwd->w[2] = pwt->w[3];
   4438     pwd->w[3] = pws->w[3];
   4439 #endif
   4440 }
   4441 
   4442 void helper_msa_ilvl_d(CPUMIPSState *env,
   4443                        uint32_t wd, uint32_t ws, uint32_t wt)
   4444 {
   4445     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   4446     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   4447     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
   4448 
   4449     pwd->d[0] = pwt->d[1];
   4450     pwd->d[1] = pws->d[1];
   4451 }
   4452 
   4453 
   4454 void helper_msa_ilvr_b(CPUMIPSState *env,
   4455                        uint32_t wd, uint32_t ws, uint32_t wt)
   4456 {
   4457     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   4458     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   4459     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
   4460 
   4461 #if HOST_BIG_ENDIAN
   4462     pwd->b[8]  = pws->b[0];
   4463     pwd->b[9]  = pwt->b[0];
   4464     pwd->b[10] = pws->b[1];
   4465     pwd->b[11] = pwt->b[1];
   4466     pwd->b[12] = pws->b[2];
   4467     pwd->b[13] = pwt->b[2];
   4468     pwd->b[14] = pws->b[3];
   4469     pwd->b[15] = pwt->b[3];
   4470     pwd->b[0]  = pws->b[4];
   4471     pwd->b[1]  = pwt->b[4];
   4472     pwd->b[2]  = pws->b[5];
   4473     pwd->b[3]  = pwt->b[5];
   4474     pwd->b[4]  = pws->b[6];
   4475     pwd->b[5]  = pwt->b[6];
   4476     pwd->b[6]  = pws->b[7];
   4477     pwd->b[7]  = pwt->b[7];
   4478 #else
   4479     pwd->b[15] = pws->b[7];
   4480     pwd->b[14] = pwt->b[7];
   4481     pwd->b[13] = pws->b[6];
   4482     pwd->b[12] = pwt->b[6];
   4483     pwd->b[11] = pws->b[5];
   4484     pwd->b[10] = pwt->b[5];
   4485     pwd->b[9]  = pws->b[4];
   4486     pwd->b[8]  = pwt->b[4];
   4487     pwd->b[7]  = pws->b[3];
   4488     pwd->b[6]  = pwt->b[3];
   4489     pwd->b[5]  = pws->b[2];
   4490     pwd->b[4]  = pwt->b[2];
   4491     pwd->b[3]  = pws->b[1];
   4492     pwd->b[2]  = pwt->b[1];
   4493     pwd->b[1]  = pws->b[0];
   4494     pwd->b[0]  = pwt->b[0];
   4495 #endif
   4496 }
   4497 
   4498 void helper_msa_ilvr_h(CPUMIPSState *env,
   4499                        uint32_t wd, uint32_t ws, uint32_t wt)
   4500 {
   4501     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   4502     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   4503     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
   4504 
   4505 #if HOST_BIG_ENDIAN
   4506     pwd->h[4] = pws->h[0];
   4507     pwd->h[5] = pwt->h[0];
   4508     pwd->h[6] = pws->h[1];
   4509     pwd->h[7] = pwt->h[1];
   4510     pwd->h[0] = pws->h[2];
   4511     pwd->h[1] = pwt->h[2];
   4512     pwd->h[2] = pws->h[3];
   4513     pwd->h[3] = pwt->h[3];
   4514 #else
   4515     pwd->h[7] = pws->h[3];
   4516     pwd->h[6] = pwt->h[3];
   4517     pwd->h[5] = pws->h[2];
   4518     pwd->h[4] = pwt->h[2];
   4519     pwd->h[3] = pws->h[1];
   4520     pwd->h[2] = pwt->h[1];
   4521     pwd->h[1] = pws->h[0];
   4522     pwd->h[0] = pwt->h[0];
   4523 #endif
   4524 }
   4525 
   4526 void helper_msa_ilvr_w(CPUMIPSState *env,
   4527                        uint32_t wd, uint32_t ws, uint32_t wt)
   4528 {
   4529     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   4530     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   4531     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
   4532 
   4533 #if HOST_BIG_ENDIAN
   4534     pwd->w[2] = pws->w[0];
   4535     pwd->w[3] = pwt->w[0];
   4536     pwd->w[0] = pws->w[1];
   4537     pwd->w[1] = pwt->w[1];
   4538 #else
   4539     pwd->w[3] = pws->w[1];
   4540     pwd->w[2] = pwt->w[1];
   4541     pwd->w[1] = pws->w[0];
   4542     pwd->w[0] = pwt->w[0];
   4543 #endif
   4544 }
   4545 
   4546 void helper_msa_ilvr_d(CPUMIPSState *env,
   4547                        uint32_t wd, uint32_t ws, uint32_t wt)
   4548 {
   4549     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   4550     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   4551     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
   4552 
   4553     pwd->d[1] = pws->d[0];
   4554     pwd->d[0] = pwt->d[0];
   4555 }
   4556 
   4557 
   4558 /*
   4559  * Logic
   4560  * -----
   4561  *
   4562  * +---------------+----------------------------------------------------------+
   4563  * | AND.V         | Vector Logical And                                       |
   4564  * | NOR.V         | Vector Logical Negated Or                                |
   4565  * | OR.V          | Vector Logical Or                                        |
   4566  * | XOR.V         | Vector Logical Exclusive Or                              |
   4567  * +---------------+----------------------------------------------------------+
   4568  */
   4569 
   4570 
   4571 void helper_msa_and_v(CPUMIPSState *env, uint32_t wd, uint32_t ws, uint32_t wt)
   4572 {
   4573     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   4574     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   4575     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
   4576 
   4577     pwd->d[0] = pws->d[0] & pwt->d[0];
   4578     pwd->d[1] = pws->d[1] & pwt->d[1];
   4579 }
   4580 
   4581 void helper_msa_nor_v(CPUMIPSState *env, uint32_t wd, uint32_t ws, uint32_t wt)
   4582 {
   4583     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   4584     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   4585     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
   4586 
   4587     pwd->d[0] = ~(pws->d[0] | pwt->d[0]);
   4588     pwd->d[1] = ~(pws->d[1] | pwt->d[1]);
   4589 }
   4590 
   4591 void helper_msa_or_v(CPUMIPSState *env, uint32_t wd, uint32_t ws, uint32_t wt)
   4592 {
   4593     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   4594     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   4595     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
   4596 
   4597     pwd->d[0] = pws->d[0] | pwt->d[0];
   4598     pwd->d[1] = pws->d[1] | pwt->d[1];
   4599 }
   4600 
   4601 void helper_msa_xor_v(CPUMIPSState *env, uint32_t wd, uint32_t ws, uint32_t wt)
   4602 {
   4603     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   4604     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   4605     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
   4606 
   4607     pwd->d[0] = pws->d[0] ^ pwt->d[0];
   4608     pwd->d[1] = pws->d[1] ^ pwt->d[1];
   4609 }
   4610 
   4611 
   4612 /*
   4613  * Move
   4614  * ----
   4615  *
   4616  * +---------------+----------------------------------------------------------+
   4617  * | MOVE.V        | Vector Move                                              |
   4618  * +---------------+----------------------------------------------------------+
   4619  */
   4620 
   4621 static inline void msa_move_v(wr_t *pwd, wr_t *pws)
   4622 {
   4623     pwd->d[0] = pws->d[0];
   4624     pwd->d[1] = pws->d[1];
   4625 }
   4626 
   4627 void helper_msa_move_v(CPUMIPSState *env, uint32_t wd, uint32_t ws)
   4628 {
   4629     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   4630     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   4631 
   4632     msa_move_v(pwd, pws);
   4633 }
   4634 
   4635 
   4636 /*
   4637  * Pack
   4638  * ----
   4639  *
   4640  * +---------------+----------------------------------------------------------+
   4641  * | PCKEV.B       | Vector Pack Even (byte)                                  |
   4642  * | PCKEV.H       | Vector Pack Even (halfword)                              |
   4643  * | PCKEV.W       | Vector Pack Even (word)                                  |
   4644  * | PCKEV.D       | Vector Pack Even (doubleword)                            |
   4645  * | PCKOD.B       | Vector Pack Odd (byte)                                   |
   4646  * | PCKOD.H       | Vector Pack Odd (halfword)                               |
   4647  * | PCKOD.W       | Vector Pack Odd (word)                                   |
   4648  * | PCKOD.D       | Vector Pack Odd (doubleword)                             |
   4649  * | VSHF.B        | Vector Data Preserving Shuffle (byte)                    |
   4650  * | VSHF.H        | Vector Data Preserving Shuffle (halfword)                |
   4651  * | VSHF.W        | Vector Data Preserving Shuffle (word)                    |
   4652  * | VSHF.D        | Vector Data Preserving Shuffle (doubleword)              |
   4653  * +---------------+----------------------------------------------------------+
   4654  */
   4655 
   4656 
   4657 void helper_msa_pckev_b(CPUMIPSState *env,
   4658                         uint32_t wd, uint32_t ws, uint32_t wt)
   4659 {
   4660     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   4661     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   4662     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
   4663 
   4664 #if HOST_BIG_ENDIAN
   4665     pwd->b[8]  = pws->b[9];
   4666     pwd->b[10] = pws->b[13];
   4667     pwd->b[12] = pws->b[1];
   4668     pwd->b[14] = pws->b[5];
   4669     pwd->b[0]  = pwt->b[9];
   4670     pwd->b[2]  = pwt->b[13];
   4671     pwd->b[4]  = pwt->b[1];
   4672     pwd->b[6]  = pwt->b[5];
   4673     pwd->b[9]  = pws->b[11];
   4674     pwd->b[13] = pws->b[3];
   4675     pwd->b[1]  = pwt->b[11];
   4676     pwd->b[5]  = pwt->b[3];
   4677     pwd->b[11] = pws->b[15];
   4678     pwd->b[3]  = pwt->b[15];
   4679     pwd->b[15] = pws->b[7];
   4680     pwd->b[7]  = pwt->b[7];
   4681 #else
   4682     pwd->b[15] = pws->b[14];
   4683     pwd->b[13] = pws->b[10];
   4684     pwd->b[11] = pws->b[6];
   4685     pwd->b[9]  = pws->b[2];
   4686     pwd->b[7]  = pwt->b[14];
   4687     pwd->b[5]  = pwt->b[10];
   4688     pwd->b[3]  = pwt->b[6];
   4689     pwd->b[1]  = pwt->b[2];
   4690     pwd->b[14] = pws->b[12];
   4691     pwd->b[10] = pws->b[4];
   4692     pwd->b[6]  = pwt->b[12];
   4693     pwd->b[2]  = pwt->b[4];
   4694     pwd->b[12] = pws->b[8];
   4695     pwd->b[4]  = pwt->b[8];
   4696     pwd->b[8]  = pws->b[0];
   4697     pwd->b[0]  = pwt->b[0];
   4698 #endif
   4699 }
   4700 
   4701 void helper_msa_pckev_h(CPUMIPSState *env,
   4702                         uint32_t wd, uint32_t ws, uint32_t wt)
   4703 {
   4704     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   4705     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   4706     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
   4707 
   4708 #if HOST_BIG_ENDIAN
   4709     pwd->h[4] = pws->h[5];
   4710     pwd->h[6] = pws->h[1];
   4711     pwd->h[0] = pwt->h[5];
   4712     pwd->h[2] = pwt->h[1];
   4713     pwd->h[5] = pws->h[7];
   4714     pwd->h[1] = pwt->h[7];
   4715     pwd->h[7] = pws->h[3];
   4716     pwd->h[3] = pwt->h[3];
   4717 #else
   4718     pwd->h[7] = pws->h[6];
   4719     pwd->h[5] = pws->h[2];
   4720     pwd->h[3] = pwt->h[6];
   4721     pwd->h[1] = pwt->h[2];
   4722     pwd->h[6] = pws->h[4];
   4723     pwd->h[2] = pwt->h[4];
   4724     pwd->h[4] = pws->h[0];
   4725     pwd->h[0] = pwt->h[0];
   4726 #endif
   4727 }
   4728 
   4729 void helper_msa_pckev_w(CPUMIPSState *env,
   4730                         uint32_t wd, uint32_t ws, uint32_t wt)
   4731 {
   4732     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   4733     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   4734     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
   4735 
   4736 #if HOST_BIG_ENDIAN
   4737     pwd->w[2] = pws->w[3];
   4738     pwd->w[0] = pwt->w[3];
   4739     pwd->w[3] = pws->w[1];
   4740     pwd->w[1] = pwt->w[1];
   4741 #else
   4742     pwd->w[3] = pws->w[2];
   4743     pwd->w[1] = pwt->w[2];
   4744     pwd->w[2] = pws->w[0];
   4745     pwd->w[0] = pwt->w[0];
   4746 #endif
   4747 }
   4748 
   4749 void helper_msa_pckev_d(CPUMIPSState *env,
   4750                         uint32_t wd, uint32_t ws, uint32_t wt)
   4751 {
   4752     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   4753     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   4754     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
   4755 
   4756     pwd->d[1] = pws->d[0];
   4757     pwd->d[0] = pwt->d[0];
   4758 }
   4759 
   4760 
   4761 void helper_msa_pckod_b(CPUMIPSState *env,
   4762                         uint32_t wd, uint32_t ws, uint32_t wt)
   4763 {
   4764     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   4765     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   4766     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
   4767 
   4768 #if HOST_BIG_ENDIAN
   4769     pwd->b[7]  = pwt->b[6];
   4770     pwd->b[5]  = pwt->b[2];
   4771     pwd->b[3]  = pwt->b[14];
   4772     pwd->b[1]  = pwt->b[10];
   4773     pwd->b[15] = pws->b[6];
   4774     pwd->b[13] = pws->b[2];
   4775     pwd->b[11] = pws->b[14];
   4776     pwd->b[9]  = pws->b[10];
   4777     pwd->b[6]  = pwt->b[4];
   4778     pwd->b[2]  = pwt->b[12];
   4779     pwd->b[14] = pws->b[4];
   4780     pwd->b[10] = pws->b[12];
   4781     pwd->b[4]  = pwt->b[0];
   4782     pwd->b[12] = pws->b[0];
   4783     pwd->b[0]  = pwt->b[8];
   4784     pwd->b[8]  = pws->b[8];
   4785 #else
   4786     pwd->b[0]  = pwt->b[1];
   4787     pwd->b[2]  = pwt->b[5];
   4788     pwd->b[4]  = pwt->b[9];
   4789     pwd->b[6]  = pwt->b[13];
   4790     pwd->b[8]  = pws->b[1];
   4791     pwd->b[10] = pws->b[5];
   4792     pwd->b[12] = pws->b[9];
   4793     pwd->b[14] = pws->b[13];
   4794     pwd->b[1]  = pwt->b[3];
   4795     pwd->b[5]  = pwt->b[11];
   4796     pwd->b[9]  = pws->b[3];
   4797     pwd->b[13] = pws->b[11];
   4798     pwd->b[3]  = pwt->b[7];
   4799     pwd->b[11] = pws->b[7];
   4800     pwd->b[7]  = pwt->b[15];
   4801     pwd->b[15] = pws->b[15];
   4802 #endif
   4803 
   4804 }
   4805 
   4806 void helper_msa_pckod_h(CPUMIPSState *env,
   4807                         uint32_t wd, uint32_t ws, uint32_t wt)
   4808 {
   4809     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   4810     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   4811     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
   4812 
   4813 #if HOST_BIG_ENDIAN
   4814     pwd->h[3] = pwt->h[2];
   4815     pwd->h[1] = pwt->h[6];
   4816     pwd->h[7] = pws->h[2];
   4817     pwd->h[5] = pws->h[6];
   4818     pwd->h[2] = pwt->h[0];
   4819     pwd->h[6] = pws->h[0];
   4820     pwd->h[0] = pwt->h[4];
   4821     pwd->h[4] = pws->h[4];
   4822 #else
   4823     pwd->h[0] = pwt->h[1];
   4824     pwd->h[2] = pwt->h[5];
   4825     pwd->h[4] = pws->h[1];
   4826     pwd->h[6] = pws->h[5];
   4827     pwd->h[1] = pwt->h[3];
   4828     pwd->h[5] = pws->h[3];
   4829     pwd->h[3] = pwt->h[7];
   4830     pwd->h[7] = pws->h[7];
   4831 #endif
   4832 }
   4833 
   4834 void helper_msa_pckod_w(CPUMIPSState *env,
   4835                         uint32_t wd, uint32_t ws, uint32_t wt)
   4836 {
   4837     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   4838     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   4839     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
   4840 
   4841 #if HOST_BIG_ENDIAN
   4842     pwd->w[1] = pwt->w[0];
   4843     pwd->w[3] = pws->w[0];
   4844     pwd->w[0] = pwt->w[2];
   4845     pwd->w[2] = pws->w[2];
   4846 #else
   4847     pwd->w[0] = pwt->w[1];
   4848     pwd->w[2] = pws->w[1];
   4849     pwd->w[1] = pwt->w[3];
   4850     pwd->w[3] = pws->w[3];
   4851 #endif
   4852 }
   4853 
   4854 void helper_msa_pckod_d(CPUMIPSState *env,
   4855                         uint32_t wd, uint32_t ws, uint32_t wt)
   4856 {
   4857     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   4858     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   4859     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
   4860 
   4861     pwd->d[0] = pwt->d[1];
   4862     pwd->d[1] = pws->d[1];
   4863 }
   4864 
   4865 
   4866 /*
   4867  * Shift
   4868  * -----
   4869  *
   4870  * +---------------+----------------------------------------------------------+
   4871  * | SLL.B         | Vector Shift Left (byte)                                 |
   4872  * | SLL.H         | Vector Shift Left (halfword)                             |
   4873  * | SLL.W         | Vector Shift Left (word)                                 |
   4874  * | SLL.D         | Vector Shift Left (doubleword)                           |
   4875  * | SRA.B         | Vector Shift Right Arithmetic (byte)                     |
   4876  * | SRA.H         | Vector Shift Right Arithmetic (halfword)                 |
   4877  * | SRA.W         | Vector Shift Right Arithmetic (word)                     |
   4878  * | SRA.D         | Vector Shift Right Arithmetic (doubleword)               |
   4879  * | SRAR.B        | Vector Shift Right Arithmetic Rounded (byte)             |
   4880  * | SRAR.H        | Vector Shift Right Arithmetic Rounded (halfword)         |
   4881  * | SRAR.W        | Vector Shift Right Arithmetic Rounded (word)             |
   4882  * | SRAR.D        | Vector Shift Right Arithmetic Rounded (doubleword)       |
   4883  * | SRL.B         | Vector Shift Right Logical (byte)                        |
   4884  * | SRL.H         | Vector Shift Right Logical (halfword)                    |
   4885  * | SRL.W         | Vector Shift Right Logical (word)                        |
   4886  * | SRL.D         | Vector Shift Right Logical (doubleword)                  |
   4887  * | SRLR.B        | Vector Shift Right Logical Rounded (byte)                |
   4888  * | SRLR.H        | Vector Shift Right Logical Rounded (halfword)            |
   4889  * | SRLR.W        | Vector Shift Right Logical Rounded (word)                |
   4890  * | SRLR.D        | Vector Shift Right Logical Rounded (doubleword)          |
   4891  * +---------------+----------------------------------------------------------+
   4892  */
   4893 
   4894 
   4895 static inline int64_t msa_sll_df(uint32_t df, int64_t arg1, int64_t arg2)
   4896 {
   4897     int32_t b_arg2 = BIT_POSITION(arg2, df);
   4898     return arg1 << b_arg2;
   4899 }
   4900 
   4901 void helper_msa_sll_b(CPUMIPSState *env,
   4902                       uint32_t wd, uint32_t ws, uint32_t wt)
   4903 {
   4904     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   4905     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   4906     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
   4907 
   4908     pwd->b[0]  = msa_sll_df(DF_BYTE, pws->b[0],  pwt->b[0]);
   4909     pwd->b[1]  = msa_sll_df(DF_BYTE, pws->b[1],  pwt->b[1]);
   4910     pwd->b[2]  = msa_sll_df(DF_BYTE, pws->b[2],  pwt->b[2]);
   4911     pwd->b[3]  = msa_sll_df(DF_BYTE, pws->b[3],  pwt->b[3]);
   4912     pwd->b[4]  = msa_sll_df(DF_BYTE, pws->b[4],  pwt->b[4]);
   4913     pwd->b[5]  = msa_sll_df(DF_BYTE, pws->b[5],  pwt->b[5]);
   4914     pwd->b[6]  = msa_sll_df(DF_BYTE, pws->b[6],  pwt->b[6]);
   4915     pwd->b[7]  = msa_sll_df(DF_BYTE, pws->b[7],  pwt->b[7]);
   4916     pwd->b[8]  = msa_sll_df(DF_BYTE, pws->b[8],  pwt->b[8]);
   4917     pwd->b[9]  = msa_sll_df(DF_BYTE, pws->b[9],  pwt->b[9]);
   4918     pwd->b[10] = msa_sll_df(DF_BYTE, pws->b[10], pwt->b[10]);
   4919     pwd->b[11] = msa_sll_df(DF_BYTE, pws->b[11], pwt->b[11]);
   4920     pwd->b[12] = msa_sll_df(DF_BYTE, pws->b[12], pwt->b[12]);
   4921     pwd->b[13] = msa_sll_df(DF_BYTE, pws->b[13], pwt->b[13]);
   4922     pwd->b[14] = msa_sll_df(DF_BYTE, pws->b[14], pwt->b[14]);
   4923     pwd->b[15] = msa_sll_df(DF_BYTE, pws->b[15], pwt->b[15]);
   4924 }
   4925 
   4926 void helper_msa_sll_h(CPUMIPSState *env,
   4927                       uint32_t wd, uint32_t ws, uint32_t wt)
   4928 {
   4929     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   4930     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   4931     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
   4932 
   4933     pwd->h[0]  = msa_sll_df(DF_HALF, pws->h[0],  pwt->h[0]);
   4934     pwd->h[1]  = msa_sll_df(DF_HALF, pws->h[1],  pwt->h[1]);
   4935     pwd->h[2]  = msa_sll_df(DF_HALF, pws->h[2],  pwt->h[2]);
   4936     pwd->h[3]  = msa_sll_df(DF_HALF, pws->h[3],  pwt->h[3]);
   4937     pwd->h[4]  = msa_sll_df(DF_HALF, pws->h[4],  pwt->h[4]);
   4938     pwd->h[5]  = msa_sll_df(DF_HALF, pws->h[5],  pwt->h[5]);
   4939     pwd->h[6]  = msa_sll_df(DF_HALF, pws->h[6],  pwt->h[6]);
   4940     pwd->h[7]  = msa_sll_df(DF_HALF, pws->h[7],  pwt->h[7]);
   4941 }
   4942 
   4943 void helper_msa_sll_w(CPUMIPSState *env,
   4944                       uint32_t wd, uint32_t ws, uint32_t wt)
   4945 {
   4946     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   4947     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   4948     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
   4949 
   4950     pwd->w[0]  = msa_sll_df(DF_WORD, pws->w[0],  pwt->w[0]);
   4951     pwd->w[1]  = msa_sll_df(DF_WORD, pws->w[1],  pwt->w[1]);
   4952     pwd->w[2]  = msa_sll_df(DF_WORD, pws->w[2],  pwt->w[2]);
   4953     pwd->w[3]  = msa_sll_df(DF_WORD, pws->w[3],  pwt->w[3]);
   4954 }
   4955 
   4956 void helper_msa_sll_d(CPUMIPSState *env,
   4957                       uint32_t wd, uint32_t ws, uint32_t wt)
   4958 {
   4959     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   4960     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   4961     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
   4962 
   4963     pwd->d[0]  = msa_sll_df(DF_DOUBLE, pws->d[0],  pwt->d[0]);
   4964     pwd->d[1]  = msa_sll_df(DF_DOUBLE, pws->d[1],  pwt->d[1]);
   4965 }
   4966 
   4967 
   4968 static inline int64_t msa_sra_df(uint32_t df, int64_t arg1, int64_t arg2)
   4969 {
   4970     int32_t b_arg2 = BIT_POSITION(arg2, df);
   4971     return arg1 >> b_arg2;
   4972 }
   4973 
   4974 void helper_msa_sra_b(CPUMIPSState *env,
   4975                       uint32_t wd, uint32_t ws, uint32_t wt)
   4976 {
   4977     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   4978     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   4979     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
   4980 
   4981     pwd->b[0]  = msa_sra_df(DF_BYTE, pws->b[0],  pwt->b[0]);
   4982     pwd->b[1]  = msa_sra_df(DF_BYTE, pws->b[1],  pwt->b[1]);
   4983     pwd->b[2]  = msa_sra_df(DF_BYTE, pws->b[2],  pwt->b[2]);
   4984     pwd->b[3]  = msa_sra_df(DF_BYTE, pws->b[3],  pwt->b[3]);
   4985     pwd->b[4]  = msa_sra_df(DF_BYTE, pws->b[4],  pwt->b[4]);
   4986     pwd->b[5]  = msa_sra_df(DF_BYTE, pws->b[5],  pwt->b[5]);
   4987     pwd->b[6]  = msa_sra_df(DF_BYTE, pws->b[6],  pwt->b[6]);
   4988     pwd->b[7]  = msa_sra_df(DF_BYTE, pws->b[7],  pwt->b[7]);
   4989     pwd->b[8]  = msa_sra_df(DF_BYTE, pws->b[8],  pwt->b[8]);
   4990     pwd->b[9]  = msa_sra_df(DF_BYTE, pws->b[9],  pwt->b[9]);
   4991     pwd->b[10] = msa_sra_df(DF_BYTE, pws->b[10], pwt->b[10]);
   4992     pwd->b[11] = msa_sra_df(DF_BYTE, pws->b[11], pwt->b[11]);
   4993     pwd->b[12] = msa_sra_df(DF_BYTE, pws->b[12], pwt->b[12]);
   4994     pwd->b[13] = msa_sra_df(DF_BYTE, pws->b[13], pwt->b[13]);
   4995     pwd->b[14] = msa_sra_df(DF_BYTE, pws->b[14], pwt->b[14]);
   4996     pwd->b[15] = msa_sra_df(DF_BYTE, pws->b[15], pwt->b[15]);
   4997 }
   4998 
   4999 void helper_msa_sra_h(CPUMIPSState *env,
   5000                       uint32_t wd, uint32_t ws, uint32_t wt)
   5001 {
   5002     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   5003     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   5004     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
   5005 
   5006     pwd->h[0]  = msa_sra_df(DF_HALF, pws->h[0],  pwt->h[0]);
   5007     pwd->h[1]  = msa_sra_df(DF_HALF, pws->h[1],  pwt->h[1]);
   5008     pwd->h[2]  = msa_sra_df(DF_HALF, pws->h[2],  pwt->h[2]);
   5009     pwd->h[3]  = msa_sra_df(DF_HALF, pws->h[3],  pwt->h[3]);
   5010     pwd->h[4]  = msa_sra_df(DF_HALF, pws->h[4],  pwt->h[4]);
   5011     pwd->h[5]  = msa_sra_df(DF_HALF, pws->h[5],  pwt->h[5]);
   5012     pwd->h[6]  = msa_sra_df(DF_HALF, pws->h[6],  pwt->h[6]);
   5013     pwd->h[7]  = msa_sra_df(DF_HALF, pws->h[7],  pwt->h[7]);
   5014 }
   5015 
   5016 void helper_msa_sra_w(CPUMIPSState *env,
   5017                       uint32_t wd, uint32_t ws, uint32_t wt)
   5018 {
   5019     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   5020     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   5021     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
   5022 
   5023     pwd->w[0]  = msa_sra_df(DF_WORD, pws->w[0],  pwt->w[0]);
   5024     pwd->w[1]  = msa_sra_df(DF_WORD, pws->w[1],  pwt->w[1]);
   5025     pwd->w[2]  = msa_sra_df(DF_WORD, pws->w[2],  pwt->w[2]);
   5026     pwd->w[3]  = msa_sra_df(DF_WORD, pws->w[3],  pwt->w[3]);
   5027 }
   5028 
   5029 void helper_msa_sra_d(CPUMIPSState *env,
   5030                       uint32_t wd, uint32_t ws, uint32_t wt)
   5031 {
   5032     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   5033     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   5034     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
   5035 
   5036     pwd->d[0]  = msa_sra_df(DF_DOUBLE, pws->d[0],  pwt->d[0]);
   5037     pwd->d[1]  = msa_sra_df(DF_DOUBLE, pws->d[1],  pwt->d[1]);
   5038 }
   5039 
   5040 
   5041 static inline int64_t msa_srar_df(uint32_t df, int64_t arg1, int64_t arg2)
   5042 {
   5043     int32_t b_arg2 = BIT_POSITION(arg2, df);
   5044     if (b_arg2 == 0) {
   5045         return arg1;
   5046     } else {
   5047         int64_t r_bit = (arg1 >> (b_arg2 - 1)) & 1;
   5048         return (arg1 >> b_arg2) + r_bit;
   5049     }
   5050 }
   5051 
   5052 void helper_msa_srar_b(CPUMIPSState *env,
   5053                        uint32_t wd, uint32_t ws, uint32_t wt)
   5054 {
   5055     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   5056     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   5057     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
   5058 
   5059     pwd->b[0]  = msa_srar_df(DF_BYTE, pws->b[0],  pwt->b[0]);
   5060     pwd->b[1]  = msa_srar_df(DF_BYTE, pws->b[1],  pwt->b[1]);
   5061     pwd->b[2]  = msa_srar_df(DF_BYTE, pws->b[2],  pwt->b[2]);
   5062     pwd->b[3]  = msa_srar_df(DF_BYTE, pws->b[3],  pwt->b[3]);
   5063     pwd->b[4]  = msa_srar_df(DF_BYTE, pws->b[4],  pwt->b[4]);
   5064     pwd->b[5]  = msa_srar_df(DF_BYTE, pws->b[5],  pwt->b[5]);
   5065     pwd->b[6]  = msa_srar_df(DF_BYTE, pws->b[6],  pwt->b[6]);
   5066     pwd->b[7]  = msa_srar_df(DF_BYTE, pws->b[7],  pwt->b[7]);
   5067     pwd->b[8]  = msa_srar_df(DF_BYTE, pws->b[8],  pwt->b[8]);
   5068     pwd->b[9]  = msa_srar_df(DF_BYTE, pws->b[9],  pwt->b[9]);
   5069     pwd->b[10] = msa_srar_df(DF_BYTE, pws->b[10], pwt->b[10]);
   5070     pwd->b[11] = msa_srar_df(DF_BYTE, pws->b[11], pwt->b[11]);
   5071     pwd->b[12] = msa_srar_df(DF_BYTE, pws->b[12], pwt->b[12]);
   5072     pwd->b[13] = msa_srar_df(DF_BYTE, pws->b[13], pwt->b[13]);
   5073     pwd->b[14] = msa_srar_df(DF_BYTE, pws->b[14], pwt->b[14]);
   5074     pwd->b[15] = msa_srar_df(DF_BYTE, pws->b[15], pwt->b[15]);
   5075 }
   5076 
   5077 void helper_msa_srar_h(CPUMIPSState *env,
   5078                        uint32_t wd, uint32_t ws, uint32_t wt)
   5079 {
   5080     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   5081     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   5082     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
   5083 
   5084     pwd->h[0]  = msa_srar_df(DF_HALF, pws->h[0],  pwt->h[0]);
   5085     pwd->h[1]  = msa_srar_df(DF_HALF, pws->h[1],  pwt->h[1]);
   5086     pwd->h[2]  = msa_srar_df(DF_HALF, pws->h[2],  pwt->h[2]);
   5087     pwd->h[3]  = msa_srar_df(DF_HALF, pws->h[3],  pwt->h[3]);
   5088     pwd->h[4]  = msa_srar_df(DF_HALF, pws->h[4],  pwt->h[4]);
   5089     pwd->h[5]  = msa_srar_df(DF_HALF, pws->h[5],  pwt->h[5]);
   5090     pwd->h[6]  = msa_srar_df(DF_HALF, pws->h[6],  pwt->h[6]);
   5091     pwd->h[7]  = msa_srar_df(DF_HALF, pws->h[7],  pwt->h[7]);
   5092 }
   5093 
   5094 void helper_msa_srar_w(CPUMIPSState *env,
   5095                        uint32_t wd, uint32_t ws, uint32_t wt)
   5096 {
   5097     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   5098     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   5099     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
   5100 
   5101     pwd->w[0]  = msa_srar_df(DF_WORD, pws->w[0],  pwt->w[0]);
   5102     pwd->w[1]  = msa_srar_df(DF_WORD, pws->w[1],  pwt->w[1]);
   5103     pwd->w[2]  = msa_srar_df(DF_WORD, pws->w[2],  pwt->w[2]);
   5104     pwd->w[3]  = msa_srar_df(DF_WORD, pws->w[3],  pwt->w[3]);
   5105 }
   5106 
   5107 void helper_msa_srar_d(CPUMIPSState *env,
   5108                        uint32_t wd, uint32_t ws, uint32_t wt)
   5109 {
   5110     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   5111     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   5112     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
   5113 
   5114     pwd->d[0]  = msa_srar_df(DF_DOUBLE, pws->d[0],  pwt->d[0]);
   5115     pwd->d[1]  = msa_srar_df(DF_DOUBLE, pws->d[1],  pwt->d[1]);
   5116 }
   5117 
   5118 
   5119 static inline int64_t msa_srl_df(uint32_t df, int64_t arg1, int64_t arg2)
   5120 {
   5121     uint64_t u_arg1 = UNSIGNED(arg1, df);
   5122     int32_t b_arg2 = BIT_POSITION(arg2, df);
   5123     return u_arg1 >> b_arg2;
   5124 }
   5125 
   5126 void helper_msa_srl_b(CPUMIPSState *env,
   5127                       uint32_t wd, uint32_t ws, uint32_t wt)
   5128 {
   5129     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   5130     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   5131     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
   5132 
   5133     pwd->b[0]  = msa_srl_df(DF_BYTE, pws->b[0],  pwt->b[0]);
   5134     pwd->b[1]  = msa_srl_df(DF_BYTE, pws->b[1],  pwt->b[1]);
   5135     pwd->b[2]  = msa_srl_df(DF_BYTE, pws->b[2],  pwt->b[2]);
   5136     pwd->b[3]  = msa_srl_df(DF_BYTE, pws->b[3],  pwt->b[3]);
   5137     pwd->b[4]  = msa_srl_df(DF_BYTE, pws->b[4],  pwt->b[4]);
   5138     pwd->b[5]  = msa_srl_df(DF_BYTE, pws->b[5],  pwt->b[5]);
   5139     pwd->b[6]  = msa_srl_df(DF_BYTE, pws->b[6],  pwt->b[6]);
   5140     pwd->b[7]  = msa_srl_df(DF_BYTE, pws->b[7],  pwt->b[7]);
   5141     pwd->b[8]  = msa_srl_df(DF_BYTE, pws->b[8],  pwt->b[8]);
   5142     pwd->b[9]  = msa_srl_df(DF_BYTE, pws->b[9],  pwt->b[9]);
   5143     pwd->b[10] = msa_srl_df(DF_BYTE, pws->b[10], pwt->b[10]);
   5144     pwd->b[11] = msa_srl_df(DF_BYTE, pws->b[11], pwt->b[11]);
   5145     pwd->b[12] = msa_srl_df(DF_BYTE, pws->b[12], pwt->b[12]);
   5146     pwd->b[13] = msa_srl_df(DF_BYTE, pws->b[13], pwt->b[13]);
   5147     pwd->b[14] = msa_srl_df(DF_BYTE, pws->b[14], pwt->b[14]);
   5148     pwd->b[15] = msa_srl_df(DF_BYTE, pws->b[15], pwt->b[15]);
   5149 }
   5150 
   5151 void helper_msa_srl_h(CPUMIPSState *env,
   5152                       uint32_t wd, uint32_t ws, uint32_t wt)
   5153 {
   5154     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   5155     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   5156     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
   5157 
   5158     pwd->h[0]  = msa_srl_df(DF_HALF, pws->h[0],  pwt->h[0]);
   5159     pwd->h[1]  = msa_srl_df(DF_HALF, pws->h[1],  pwt->h[1]);
   5160     pwd->h[2]  = msa_srl_df(DF_HALF, pws->h[2],  pwt->h[2]);
   5161     pwd->h[3]  = msa_srl_df(DF_HALF, pws->h[3],  pwt->h[3]);
   5162     pwd->h[4]  = msa_srl_df(DF_HALF, pws->h[4],  pwt->h[4]);
   5163     pwd->h[5]  = msa_srl_df(DF_HALF, pws->h[5],  pwt->h[5]);
   5164     pwd->h[6]  = msa_srl_df(DF_HALF, pws->h[6],  pwt->h[6]);
   5165     pwd->h[7]  = msa_srl_df(DF_HALF, pws->h[7],  pwt->h[7]);
   5166 }
   5167 
   5168 void helper_msa_srl_w(CPUMIPSState *env,
   5169                       uint32_t wd, uint32_t ws, uint32_t wt)
   5170 {
   5171     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   5172     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   5173     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
   5174 
   5175     pwd->w[0]  = msa_srl_df(DF_WORD, pws->w[0],  pwt->w[0]);
   5176     pwd->w[1]  = msa_srl_df(DF_WORD, pws->w[1],  pwt->w[1]);
   5177     pwd->w[2]  = msa_srl_df(DF_WORD, pws->w[2],  pwt->w[2]);
   5178     pwd->w[3]  = msa_srl_df(DF_WORD, pws->w[3],  pwt->w[3]);
   5179 }
   5180 
   5181 void helper_msa_srl_d(CPUMIPSState *env,
   5182                       uint32_t wd, uint32_t ws, uint32_t wt)
   5183 {
   5184     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   5185     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   5186     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
   5187 
   5188     pwd->d[0]  = msa_srl_df(DF_DOUBLE, pws->d[0],  pwt->d[0]);
   5189     pwd->d[1]  = msa_srl_df(DF_DOUBLE, pws->d[1],  pwt->d[1]);
   5190 }
   5191 
   5192 
   5193 static inline int64_t msa_srlr_df(uint32_t df, int64_t arg1, int64_t arg2)
   5194 {
   5195     uint64_t u_arg1 = UNSIGNED(arg1, df);
   5196     int32_t b_arg2 = BIT_POSITION(arg2, df);
   5197     if (b_arg2 == 0) {
   5198         return u_arg1;
   5199     } else {
   5200         uint64_t r_bit = (u_arg1 >> (b_arg2 - 1)) & 1;
   5201         return (u_arg1 >> b_arg2) + r_bit;
   5202     }
   5203 }
   5204 
   5205 void helper_msa_srlr_b(CPUMIPSState *env,
   5206                        uint32_t wd, uint32_t ws, uint32_t wt)
   5207 {
   5208     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   5209     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   5210     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
   5211 
   5212     pwd->b[0]  = msa_srlr_df(DF_BYTE, pws->b[0],  pwt->b[0]);
   5213     pwd->b[1]  = msa_srlr_df(DF_BYTE, pws->b[1],  pwt->b[1]);
   5214     pwd->b[2]  = msa_srlr_df(DF_BYTE, pws->b[2],  pwt->b[2]);
   5215     pwd->b[3]  = msa_srlr_df(DF_BYTE, pws->b[3],  pwt->b[3]);
   5216     pwd->b[4]  = msa_srlr_df(DF_BYTE, pws->b[4],  pwt->b[4]);
   5217     pwd->b[5]  = msa_srlr_df(DF_BYTE, pws->b[5],  pwt->b[5]);
   5218     pwd->b[6]  = msa_srlr_df(DF_BYTE, pws->b[6],  pwt->b[6]);
   5219     pwd->b[7]  = msa_srlr_df(DF_BYTE, pws->b[7],  pwt->b[7]);
   5220     pwd->b[8]  = msa_srlr_df(DF_BYTE, pws->b[8],  pwt->b[8]);
   5221     pwd->b[9]  = msa_srlr_df(DF_BYTE, pws->b[9],  pwt->b[9]);
   5222     pwd->b[10] = msa_srlr_df(DF_BYTE, pws->b[10], pwt->b[10]);
   5223     pwd->b[11] = msa_srlr_df(DF_BYTE, pws->b[11], pwt->b[11]);
   5224     pwd->b[12] = msa_srlr_df(DF_BYTE, pws->b[12], pwt->b[12]);
   5225     pwd->b[13] = msa_srlr_df(DF_BYTE, pws->b[13], pwt->b[13]);
   5226     pwd->b[14] = msa_srlr_df(DF_BYTE, pws->b[14], pwt->b[14]);
   5227     pwd->b[15] = msa_srlr_df(DF_BYTE, pws->b[15], pwt->b[15]);
   5228 }
   5229 
   5230 void helper_msa_srlr_h(CPUMIPSState *env,
   5231                        uint32_t wd, uint32_t ws, uint32_t wt)
   5232 {
   5233     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   5234     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   5235     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
   5236 
   5237     pwd->h[0]  = msa_srlr_df(DF_HALF, pws->h[0],  pwt->h[0]);
   5238     pwd->h[1]  = msa_srlr_df(DF_HALF, pws->h[1],  pwt->h[1]);
   5239     pwd->h[2]  = msa_srlr_df(DF_HALF, pws->h[2],  pwt->h[2]);
   5240     pwd->h[3]  = msa_srlr_df(DF_HALF, pws->h[3],  pwt->h[3]);
   5241     pwd->h[4]  = msa_srlr_df(DF_HALF, pws->h[4],  pwt->h[4]);
   5242     pwd->h[5]  = msa_srlr_df(DF_HALF, pws->h[5],  pwt->h[5]);
   5243     pwd->h[6]  = msa_srlr_df(DF_HALF, pws->h[6],  pwt->h[6]);
   5244     pwd->h[7]  = msa_srlr_df(DF_HALF, pws->h[7],  pwt->h[7]);
   5245 }
   5246 
   5247 void helper_msa_srlr_w(CPUMIPSState *env,
   5248                        uint32_t wd, uint32_t ws, uint32_t wt)
   5249 {
   5250     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   5251     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   5252     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
   5253 
   5254     pwd->w[0]  = msa_srlr_df(DF_WORD, pws->w[0],  pwt->w[0]);
   5255     pwd->w[1]  = msa_srlr_df(DF_WORD, pws->w[1],  pwt->w[1]);
   5256     pwd->w[2]  = msa_srlr_df(DF_WORD, pws->w[2],  pwt->w[2]);
   5257     pwd->w[3]  = msa_srlr_df(DF_WORD, pws->w[3],  pwt->w[3]);
   5258 }
   5259 
   5260 void helper_msa_srlr_d(CPUMIPSState *env,
   5261                        uint32_t wd, uint32_t ws, uint32_t wt)
   5262 {
   5263     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   5264     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   5265     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
   5266 
   5267     pwd->d[0]  = msa_srlr_df(DF_DOUBLE, pws->d[0],  pwt->d[0]);
   5268     pwd->d[1]  = msa_srlr_df(DF_DOUBLE, pws->d[1],  pwt->d[1]);
   5269 }
   5270 
   5271 
   5272 #define MSA_FN_IMM8(FUNC, DEST, OPERATION)                              \
   5273 void helper_msa_ ## FUNC(CPUMIPSState *env, uint32_t wd, uint32_t ws,   \
   5274         uint32_t i8)                                                    \
   5275 {                                                                       \
   5276     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);                          \
   5277     wr_t *pws = &(env->active_fpu.fpr[ws].wr);                          \
   5278     uint32_t i;                                                         \
   5279     for (i = 0; i < DF_ELEMENTS(DF_BYTE); i++) {                        \
   5280         DEST = OPERATION;                                               \
   5281     }                                                                   \
   5282 }
   5283 
   5284 MSA_FN_IMM8(andi_b, pwd->b[i], pws->b[i] & i8)
   5285 MSA_FN_IMM8(ori_b, pwd->b[i], pws->b[i] | i8)
   5286 MSA_FN_IMM8(nori_b, pwd->b[i], ~(pws->b[i] | i8))
   5287 MSA_FN_IMM8(xori_b, pwd->b[i], pws->b[i] ^ i8)
   5288 
   5289 #define BIT_MOVE_IF_NOT_ZERO(dest, arg1, arg2, df) \
   5290             UNSIGNED(((dest & (~arg2)) | (arg1 & arg2)), df)
   5291 MSA_FN_IMM8(bmnzi_b, pwd->b[i],
   5292         BIT_MOVE_IF_NOT_ZERO(pwd->b[i], pws->b[i], i8, DF_BYTE))
   5293 
   5294 #define BIT_MOVE_IF_ZERO(dest, arg1, arg2, df) \
   5295             UNSIGNED((dest & arg2) | (arg1 & (~arg2)), df)
   5296 MSA_FN_IMM8(bmzi_b, pwd->b[i],
   5297         BIT_MOVE_IF_ZERO(pwd->b[i], pws->b[i], i8, DF_BYTE))
   5298 
   5299 #define BIT_SELECT(dest, arg1, arg2, df) \
   5300             UNSIGNED((arg1 & (~dest)) | (arg2 & dest), df)
   5301 MSA_FN_IMM8(bseli_b, pwd->b[i],
   5302         BIT_SELECT(pwd->b[i], pws->b[i], i8, DF_BYTE))
   5303 
   5304 #undef BIT_SELECT
   5305 #undef BIT_MOVE_IF_ZERO
   5306 #undef BIT_MOVE_IF_NOT_ZERO
   5307 #undef MSA_FN_IMM8
   5308 
   5309 #define SHF_POS(i, imm) (((i) & 0xfc) + (((imm) >> (2 * ((i) & 0x03))) & 0x03))
   5310 
   5311 void helper_msa_shf_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
   5312                        uint32_t ws, uint32_t imm)
   5313 {
   5314     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   5315     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   5316     wr_t wx, *pwx = &wx;
   5317     uint32_t i;
   5318 
   5319     switch (df) {
   5320     case DF_BYTE:
   5321         for (i = 0; i < DF_ELEMENTS(DF_BYTE); i++) {
   5322             pwx->b[i] = pws->b[SHF_POS(i, imm)];
   5323         }
   5324         break;
   5325     case DF_HALF:
   5326         for (i = 0; i < DF_ELEMENTS(DF_HALF); i++) {
   5327             pwx->h[i] = pws->h[SHF_POS(i, imm)];
   5328         }
   5329         break;
   5330     case DF_WORD:
   5331         for (i = 0; i < DF_ELEMENTS(DF_WORD); i++) {
   5332             pwx->w[i] = pws->w[SHF_POS(i, imm)];
   5333         }
   5334         break;
   5335     default:
   5336         assert(0);
   5337     }
   5338     msa_move_v(pwd, pwx);
   5339 }
   5340 
   5341 #define MSA_BINOP_IMM_DF(helper, func)                                  \
   5342 void helper_msa_ ## helper ## _df(CPUMIPSState *env, uint32_t df,       \
   5343                         uint32_t wd, uint32_t ws, int32_t u5)           \
   5344 {                                                                       \
   5345     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);                          \
   5346     wr_t *pws = &(env->active_fpu.fpr[ws].wr);                          \
   5347     uint32_t i;                                                         \
   5348                                                                         \
   5349     switch (df) {                                                       \
   5350     case DF_BYTE:                                                       \
   5351         for (i = 0; i < DF_ELEMENTS(DF_BYTE); i++) {                    \
   5352             pwd->b[i] = msa_ ## func ## _df(df, pws->b[i], u5);         \
   5353         }                                                               \
   5354         break;                                                          \
   5355     case DF_HALF:                                                       \
   5356         for (i = 0; i < DF_ELEMENTS(DF_HALF); i++) {                    \
   5357             pwd->h[i] = msa_ ## func ## _df(df, pws->h[i], u5);         \
   5358         }                                                               \
   5359         break;                                                          \
   5360     case DF_WORD:                                                       \
   5361         for (i = 0; i < DF_ELEMENTS(DF_WORD); i++) {                    \
   5362             pwd->w[i] = msa_ ## func ## _df(df, pws->w[i], u5);         \
   5363         }                                                               \
   5364         break;                                                          \
   5365     case DF_DOUBLE:                                                     \
   5366         for (i = 0; i < DF_ELEMENTS(DF_DOUBLE); i++) {                  \
   5367             pwd->d[i] = msa_ ## func ## _df(df, pws->d[i], u5);         \
   5368         }                                                               \
   5369         break;                                                          \
   5370     default:                                                            \
   5371         assert(0);                                                      \
   5372     }                                                                   \
   5373 }
   5374 
   5375 MSA_BINOP_IMM_DF(addvi, addv)
   5376 MSA_BINOP_IMM_DF(subvi, subv)
   5377 MSA_BINOP_IMM_DF(ceqi, ceq)
   5378 MSA_BINOP_IMM_DF(clei_s, cle_s)
   5379 MSA_BINOP_IMM_DF(clei_u, cle_u)
   5380 MSA_BINOP_IMM_DF(clti_s, clt_s)
   5381 MSA_BINOP_IMM_DF(clti_u, clt_u)
   5382 MSA_BINOP_IMM_DF(maxi_s, max_s)
   5383 MSA_BINOP_IMM_DF(maxi_u, max_u)
   5384 MSA_BINOP_IMM_DF(mini_s, min_s)
   5385 MSA_BINOP_IMM_DF(mini_u, min_u)
   5386 #undef MSA_BINOP_IMM_DF
   5387 
   5388 void helper_msa_ldi_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
   5389                        int32_t s10)
   5390 {
   5391     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   5392     uint32_t i;
   5393 
   5394     switch (df) {
   5395     case DF_BYTE:
   5396         for (i = 0; i < DF_ELEMENTS(DF_BYTE); i++) {
   5397             pwd->b[i] = (int8_t)s10;
   5398         }
   5399         break;
   5400     case DF_HALF:
   5401         for (i = 0; i < DF_ELEMENTS(DF_HALF); i++) {
   5402             pwd->h[i] = (int16_t)s10;
   5403         }
   5404         break;
   5405     case DF_WORD:
   5406         for (i = 0; i < DF_ELEMENTS(DF_WORD); i++) {
   5407             pwd->w[i] = (int32_t)s10;
   5408         }
   5409         break;
   5410     case DF_DOUBLE:
   5411         for (i = 0; i < DF_ELEMENTS(DF_DOUBLE); i++) {
   5412             pwd->d[i] = (int64_t)s10;
   5413         }
   5414        break;
   5415     default:
   5416         assert(0);
   5417     }
   5418 }
   5419 
   5420 static inline int64_t msa_sat_s_df(uint32_t df, int64_t arg, uint32_t m)
   5421 {
   5422     return arg < M_MIN_INT(m + 1) ? M_MIN_INT(m + 1) :
   5423                                     arg > M_MAX_INT(m + 1) ? M_MAX_INT(m + 1) :
   5424                                                              arg;
   5425 }
   5426 
   5427 static inline int64_t msa_sat_u_df(uint32_t df, int64_t arg, uint32_t m)
   5428 {
   5429     uint64_t u_arg = UNSIGNED(arg, df);
   5430     return  u_arg < M_MAX_UINT(m + 1) ? u_arg :
   5431                                         M_MAX_UINT(m + 1);
   5432 }
   5433 
   5434 #define MSA_BINOP_IMMU_DF(helper, func)                                  \
   5435 void helper_msa_ ## helper ## _df(CPUMIPSState *env, uint32_t df, uint32_t wd, \
   5436                        uint32_t ws, uint32_t u5)                        \
   5437 {                                                                       \
   5438     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);                          \
   5439     wr_t *pws = &(env->active_fpu.fpr[ws].wr);                          \
   5440     uint32_t i;                                                         \
   5441                                                                         \
   5442     switch (df) {                                                       \
   5443     case DF_BYTE:                                                       \
   5444         for (i = 0; i < DF_ELEMENTS(DF_BYTE); i++) {                    \
   5445             pwd->b[i] = msa_ ## func ## _df(df, pws->b[i], u5);         \
   5446         }                                                               \
   5447         break;                                                          \
   5448     case DF_HALF:                                                       \
   5449         for (i = 0; i < DF_ELEMENTS(DF_HALF); i++) {                    \
   5450             pwd->h[i] = msa_ ## func ## _df(df, pws->h[i], u5);         \
   5451         }                                                               \
   5452         break;                                                          \
   5453     case DF_WORD:                                                       \
   5454         for (i = 0; i < DF_ELEMENTS(DF_WORD); i++) {                    \
   5455             pwd->w[i] = msa_ ## func ## _df(df, pws->w[i], u5);         \
   5456         }                                                               \
   5457         break;                                                          \
   5458     case DF_DOUBLE:                                                     \
   5459         for (i = 0; i < DF_ELEMENTS(DF_DOUBLE); i++) {                  \
   5460             pwd->d[i] = msa_ ## func ## _df(df, pws->d[i], u5);         \
   5461         }                                                               \
   5462         break;                                                          \
   5463     default:                                                            \
   5464         assert(0);                                                      \
   5465     }                                                                   \
   5466 }
   5467 
   5468 MSA_BINOP_IMMU_DF(slli, sll)
   5469 MSA_BINOP_IMMU_DF(srai, sra)
   5470 MSA_BINOP_IMMU_DF(srli, srl)
   5471 MSA_BINOP_IMMU_DF(bclri, bclr)
   5472 MSA_BINOP_IMMU_DF(bseti, bset)
   5473 MSA_BINOP_IMMU_DF(bnegi, bneg)
   5474 MSA_BINOP_IMMU_DF(sat_s, sat_s)
   5475 MSA_BINOP_IMMU_DF(sat_u, sat_u)
   5476 MSA_BINOP_IMMU_DF(srari, srar)
   5477 MSA_BINOP_IMMU_DF(srlri, srlr)
   5478 #undef MSA_BINOP_IMMU_DF
   5479 
   5480 #define MSA_TEROP_IMMU_DF(helper, func)                                  \
   5481 void helper_msa_ ## helper ## _df(CPUMIPSState *env, uint32_t df,       \
   5482                                   uint32_t wd, uint32_t ws, uint32_t u5) \
   5483 {                                                                       \
   5484     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);                          \
   5485     wr_t *pws = &(env->active_fpu.fpr[ws].wr);                          \
   5486     uint32_t i;                                                         \
   5487                                                                         \
   5488     switch (df) {                                                       \
   5489     case DF_BYTE:                                                       \
   5490         for (i = 0; i < DF_ELEMENTS(DF_BYTE); i++) {                    \
   5491             pwd->b[i] = msa_ ## func ## _df(df, pwd->b[i], pws->b[i],   \
   5492                                             u5);                        \
   5493         }                                                               \
   5494         break;                                                          \
   5495     case DF_HALF:                                                       \
   5496         for (i = 0; i < DF_ELEMENTS(DF_HALF); i++) {                    \
   5497             pwd->h[i] = msa_ ## func ## _df(df, pwd->h[i], pws->h[i],   \
   5498                                             u5);                        \
   5499         }                                                               \
   5500         break;                                                          \
   5501     case DF_WORD:                                                       \
   5502         for (i = 0; i < DF_ELEMENTS(DF_WORD); i++) {                    \
   5503             pwd->w[i] = msa_ ## func ## _df(df, pwd->w[i], pws->w[i],   \
   5504                                             u5);                        \
   5505         }                                                               \
   5506         break;                                                          \
   5507     case DF_DOUBLE:                                                     \
   5508         for (i = 0; i < DF_ELEMENTS(DF_DOUBLE); i++) {                  \
   5509             pwd->d[i] = msa_ ## func ## _df(df, pwd->d[i], pws->d[i],   \
   5510                                             u5);                        \
   5511         }                                                               \
   5512         break;                                                          \
   5513     default:                                                            \
   5514         assert(0);                                                      \
   5515     }                                                                   \
   5516 }
   5517 
   5518 MSA_TEROP_IMMU_DF(binsli, binsl)
   5519 MSA_TEROP_IMMU_DF(binsri, binsr)
   5520 #undef MSA_TEROP_IMMU_DF
   5521 
   5522 #define CONCATENATE_AND_SLIDE(s, k)             \
   5523     do {                                        \
   5524         for (i = 0; i < s; i++) {               \
   5525             v[i]     = pws->b[s * k + i];       \
   5526             v[i + s] = pwd->b[s * k + i];       \
   5527         }                                       \
   5528         for (i = 0; i < s; i++) {               \
   5529             pwd->b[s * k + i] = v[i + n];       \
   5530         }                                       \
   5531     } while (0)
   5532 
   5533 static inline void msa_sld_df(uint32_t df, wr_t *pwd,
   5534                               wr_t *pws, target_ulong rt)
   5535 {
   5536     uint32_t n = rt % DF_ELEMENTS(df);
   5537     uint8_t v[64];
   5538     uint32_t i, k;
   5539 
   5540     switch (df) {
   5541     case DF_BYTE:
   5542         CONCATENATE_AND_SLIDE(DF_ELEMENTS(DF_BYTE), 0);
   5543         break;
   5544     case DF_HALF:
   5545         for (k = 0; k < 2; k++) {
   5546             CONCATENATE_AND_SLIDE(DF_ELEMENTS(DF_HALF), k);
   5547         }
   5548         break;
   5549     case DF_WORD:
   5550         for (k = 0; k < 4; k++) {
   5551             CONCATENATE_AND_SLIDE(DF_ELEMENTS(DF_WORD), k);
   5552         }
   5553         break;
   5554     case DF_DOUBLE:
   5555         for (k = 0; k < 8; k++) {
   5556             CONCATENATE_AND_SLIDE(DF_ELEMENTS(DF_DOUBLE), k);
   5557         }
   5558         break;
   5559     default:
   5560         assert(0);
   5561     }
   5562 }
   5563 
   5564 static inline int64_t msa_mul_q_df(uint32_t df, int64_t arg1, int64_t arg2)
   5565 {
   5566     int64_t q_min = DF_MIN_INT(df);
   5567     int64_t q_max = DF_MAX_INT(df);
   5568 
   5569     if (arg1 == q_min && arg2 == q_min) {
   5570         return q_max;
   5571     }
   5572     return (arg1 * arg2) >> (DF_BITS(df) - 1);
   5573 }
   5574 
   5575 static inline int64_t msa_mulr_q_df(uint32_t df, int64_t arg1, int64_t arg2)
   5576 {
   5577     int64_t q_min = DF_MIN_INT(df);
   5578     int64_t q_max = DF_MAX_INT(df);
   5579     int64_t r_bit = 1 << (DF_BITS(df) - 2);
   5580 
   5581     if (arg1 == q_min && arg2 == q_min) {
   5582         return q_max;
   5583     }
   5584     return (arg1 * arg2 + r_bit) >> (DF_BITS(df) - 1);
   5585 }
   5586 
   5587 #define MSA_BINOP_DF(func) \
   5588 void helper_msa_ ## func ## _df(CPUMIPSState *env, uint32_t df,         \
   5589                                 uint32_t wd, uint32_t ws, uint32_t wt)  \
   5590 {                                                                       \
   5591     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);                          \
   5592     wr_t *pws = &(env->active_fpu.fpr[ws].wr);                          \
   5593     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);                          \
   5594                                                                         \
   5595     switch (df) {                                                       \
   5596     case DF_BYTE:                                                       \
   5597         pwd->b[0]  = msa_ ## func ## _df(df, pws->b[0],  pwt->b[0]);    \
   5598         pwd->b[1]  = msa_ ## func ## _df(df, pws->b[1],  pwt->b[1]);    \
   5599         pwd->b[2]  = msa_ ## func ## _df(df, pws->b[2],  pwt->b[2]);    \
   5600         pwd->b[3]  = msa_ ## func ## _df(df, pws->b[3],  pwt->b[3]);    \
   5601         pwd->b[4]  = msa_ ## func ## _df(df, pws->b[4],  pwt->b[4]);    \
   5602         pwd->b[5]  = msa_ ## func ## _df(df, pws->b[5],  pwt->b[5]);    \
   5603         pwd->b[6]  = msa_ ## func ## _df(df, pws->b[6],  pwt->b[6]);    \
   5604         pwd->b[7]  = msa_ ## func ## _df(df, pws->b[7],  pwt->b[7]);    \
   5605         pwd->b[8]  = msa_ ## func ## _df(df, pws->b[8],  pwt->b[8]);    \
   5606         pwd->b[9]  = msa_ ## func ## _df(df, pws->b[9],  pwt->b[9]);    \
   5607         pwd->b[10] = msa_ ## func ## _df(df, pws->b[10], pwt->b[10]);   \
   5608         pwd->b[11] = msa_ ## func ## _df(df, pws->b[11], pwt->b[11]);   \
   5609         pwd->b[12] = msa_ ## func ## _df(df, pws->b[12], pwt->b[12]);   \
   5610         pwd->b[13] = msa_ ## func ## _df(df, pws->b[13], pwt->b[13]);   \
   5611         pwd->b[14] = msa_ ## func ## _df(df, pws->b[14], pwt->b[14]);   \
   5612         pwd->b[15] = msa_ ## func ## _df(df, pws->b[15], pwt->b[15]);   \
   5613         break;                                                          \
   5614     case DF_HALF:                                                       \
   5615         pwd->h[0] = msa_ ## func ## _df(df, pws->h[0], pwt->h[0]);      \
   5616         pwd->h[1] = msa_ ## func ## _df(df, pws->h[1], pwt->h[1]);      \
   5617         pwd->h[2] = msa_ ## func ## _df(df, pws->h[2], pwt->h[2]);      \
   5618         pwd->h[3] = msa_ ## func ## _df(df, pws->h[3], pwt->h[3]);      \
   5619         pwd->h[4] = msa_ ## func ## _df(df, pws->h[4], pwt->h[4]);      \
   5620         pwd->h[5] = msa_ ## func ## _df(df, pws->h[5], pwt->h[5]);      \
   5621         pwd->h[6] = msa_ ## func ## _df(df, pws->h[6], pwt->h[6]);      \
   5622         pwd->h[7] = msa_ ## func ## _df(df, pws->h[7], pwt->h[7]);      \
   5623         break;                                                          \
   5624     case DF_WORD:                                                       \
   5625         pwd->w[0] = msa_ ## func ## _df(df, pws->w[0], pwt->w[0]);      \
   5626         pwd->w[1] = msa_ ## func ## _df(df, pws->w[1], pwt->w[1]);      \
   5627         pwd->w[2] = msa_ ## func ## _df(df, pws->w[2], pwt->w[2]);      \
   5628         pwd->w[3] = msa_ ## func ## _df(df, pws->w[3], pwt->w[3]);      \
   5629         break;                                                          \
   5630     case DF_DOUBLE:                                                     \
   5631         pwd->d[0] = msa_ ## func ## _df(df, pws->d[0], pwt->d[0]);      \
   5632         pwd->d[1] = msa_ ## func ## _df(df, pws->d[1], pwt->d[1]);      \
   5633         break;                                                          \
   5634     default:                                                            \
   5635         assert(0);                                                      \
   5636     }                                                                   \
   5637 }
   5638 
   5639 MSA_BINOP_DF(mul_q)
   5640 MSA_BINOP_DF(mulr_q)
   5641 #undef MSA_BINOP_DF
   5642 
   5643 void helper_msa_sld_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
   5644                        uint32_t ws, uint32_t rt)
   5645 {
   5646     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   5647     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   5648 
   5649     msa_sld_df(df, pwd, pws, env->active_tc.gpr[rt]);
   5650 }
   5651 
   5652 static inline int64_t msa_madd_q_df(uint32_t df, int64_t dest, int64_t arg1,
   5653                                     int64_t arg2)
   5654 {
   5655     int64_t q_prod, q_ret;
   5656 
   5657     int64_t q_max = DF_MAX_INT(df);
   5658     int64_t q_min = DF_MIN_INT(df);
   5659 
   5660     q_prod = arg1 * arg2;
   5661     q_ret = ((dest << (DF_BITS(df) - 1)) + q_prod) >> (DF_BITS(df) - 1);
   5662 
   5663     return (q_ret < q_min) ? q_min : (q_max < q_ret) ? q_max : q_ret;
   5664 }
   5665 
   5666 static inline int64_t msa_msub_q_df(uint32_t df, int64_t dest, int64_t arg1,
   5667                                     int64_t arg2)
   5668 {
   5669     int64_t q_prod, q_ret;
   5670 
   5671     int64_t q_max = DF_MAX_INT(df);
   5672     int64_t q_min = DF_MIN_INT(df);
   5673 
   5674     q_prod = arg1 * arg2;
   5675     q_ret = ((dest << (DF_BITS(df) - 1)) - q_prod) >> (DF_BITS(df) - 1);
   5676 
   5677     return (q_ret < q_min) ? q_min : (q_max < q_ret) ? q_max : q_ret;
   5678 }
   5679 
   5680 static inline int64_t msa_maddr_q_df(uint32_t df, int64_t dest, int64_t arg1,
   5681                                      int64_t arg2)
   5682 {
   5683     int64_t q_prod, q_ret;
   5684 
   5685     int64_t q_max = DF_MAX_INT(df);
   5686     int64_t q_min = DF_MIN_INT(df);
   5687     int64_t r_bit = 1 << (DF_BITS(df) - 2);
   5688 
   5689     q_prod = arg1 * arg2;
   5690     q_ret = ((dest << (DF_BITS(df) - 1)) + q_prod + r_bit) >> (DF_BITS(df) - 1);
   5691 
   5692     return (q_ret < q_min) ? q_min : (q_max < q_ret) ? q_max : q_ret;
   5693 }
   5694 
   5695 static inline int64_t msa_msubr_q_df(uint32_t df, int64_t dest, int64_t arg1,
   5696                                      int64_t arg2)
   5697 {
   5698     int64_t q_prod, q_ret;
   5699 
   5700     int64_t q_max = DF_MAX_INT(df);
   5701     int64_t q_min = DF_MIN_INT(df);
   5702     int64_t r_bit = 1 << (DF_BITS(df) - 2);
   5703 
   5704     q_prod = arg1 * arg2;
   5705     q_ret = ((dest << (DF_BITS(df) - 1)) - q_prod + r_bit) >> (DF_BITS(df) - 1);
   5706 
   5707     return (q_ret < q_min) ? q_min : (q_max < q_ret) ? q_max : q_ret;
   5708 }
   5709 
   5710 #define MSA_TEROP_DF(func) \
   5711 void helper_msa_ ## func ## _df(CPUMIPSState *env, uint32_t df, uint32_t wd,  \
   5712                                 uint32_t ws, uint32_t wt)                     \
   5713 {                                                                             \
   5714     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);                                \
   5715     wr_t *pws = &(env->active_fpu.fpr[ws].wr);                                \
   5716     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);                                \
   5717                                                                               \
   5718     switch (df) {                                                             \
   5719     case DF_BYTE:                                                             \
   5720         pwd->b[0]  = msa_ ## func ## _df(df, pwd->b[0],  pws->b[0],           \
   5721                                              pwt->b[0]);                      \
   5722         pwd->b[1]  = msa_ ## func ## _df(df, pwd->b[1],  pws->b[1],           \
   5723                                              pwt->b[1]);                      \
   5724         pwd->b[2]  = msa_ ## func ## _df(df, pwd->b[2],  pws->b[2],           \
   5725                                              pwt->b[2]);                      \
   5726         pwd->b[3]  = msa_ ## func ## _df(df, pwd->b[3],  pws->b[3],           \
   5727                                              pwt->b[3]);                      \
   5728         pwd->b[4]  = msa_ ## func ## _df(df, pwd->b[4],  pws->b[4],           \
   5729                                              pwt->b[4]);                      \
   5730         pwd->b[5]  = msa_ ## func ## _df(df, pwd->b[5],  pws->b[5],           \
   5731                                              pwt->b[5]);                      \
   5732         pwd->b[6]  = msa_ ## func ## _df(df, pwd->b[6],  pws->b[6],           \
   5733                                              pwt->b[6]);                      \
   5734         pwd->b[7]  = msa_ ## func ## _df(df, pwd->b[7],  pws->b[7],           \
   5735                                              pwt->b[7]);                      \
   5736         pwd->b[8]  = msa_ ## func ## _df(df, pwd->b[8],  pws->b[8],           \
   5737                                              pwt->b[8]);                      \
   5738         pwd->b[9]  = msa_ ## func ## _df(df, pwd->b[9],  pws->b[9],           \
   5739                                              pwt->b[9]);                      \
   5740         pwd->b[10] = msa_ ## func ## _df(df, pwd->b[10], pws->b[10],          \
   5741                                              pwt->b[10]);                     \
   5742         pwd->b[11] = msa_ ## func ## _df(df, pwd->b[11], pws->b[11],          \
   5743                                              pwt->b[11]);                     \
   5744         pwd->b[12] = msa_ ## func ## _df(df, pwd->b[12], pws->b[12],          \
   5745                                              pwt->b[12]);                     \
   5746         pwd->b[13] = msa_ ## func ## _df(df, pwd->b[13], pws->b[13],          \
   5747                                              pwt->b[13]);                     \
   5748         pwd->b[14] = msa_ ## func ## _df(df, pwd->b[14], pws->b[14],          \
   5749                                              pwt->b[14]);                     \
   5750         pwd->b[15] = msa_ ## func ## _df(df, pwd->b[15], pws->b[15],          \
   5751                                              pwt->b[15]);                     \
   5752         break;                                                                \
   5753     case DF_HALF:                                                             \
   5754         pwd->h[0] = msa_ ## func ## _df(df, pwd->h[0], pws->h[0], pwt->h[0]); \
   5755         pwd->h[1] = msa_ ## func ## _df(df, pwd->h[1], pws->h[1], pwt->h[1]); \
   5756         pwd->h[2] = msa_ ## func ## _df(df, pwd->h[2], pws->h[2], pwt->h[2]); \
   5757         pwd->h[3] = msa_ ## func ## _df(df, pwd->h[3], pws->h[3], pwt->h[3]); \
   5758         pwd->h[4] = msa_ ## func ## _df(df, pwd->h[4], pws->h[4], pwt->h[4]); \
   5759         pwd->h[5] = msa_ ## func ## _df(df, pwd->h[5], pws->h[5], pwt->h[5]); \
   5760         pwd->h[6] = msa_ ## func ## _df(df, pwd->h[6], pws->h[6], pwt->h[6]); \
   5761         pwd->h[7] = msa_ ## func ## _df(df, pwd->h[7], pws->h[7], pwt->h[7]); \
   5762         break;                                                                \
   5763     case DF_WORD:                                                             \
   5764         pwd->w[0] = msa_ ## func ## _df(df, pwd->w[0], pws->w[0], pwt->w[0]); \
   5765         pwd->w[1] = msa_ ## func ## _df(df, pwd->w[1], pws->w[1], pwt->w[1]); \
   5766         pwd->w[2] = msa_ ## func ## _df(df, pwd->w[2], pws->w[2], pwt->w[2]); \
   5767         pwd->w[3] = msa_ ## func ## _df(df, pwd->w[3], pws->w[3], pwt->w[3]); \
   5768         break;                                                                \
   5769     case DF_DOUBLE:                                                           \
   5770         pwd->d[0] = msa_ ## func ## _df(df, pwd->d[0], pws->d[0], pwt->d[0]); \
   5771         pwd->d[1] = msa_ ## func ## _df(df, pwd->d[1], pws->d[1], pwt->d[1]); \
   5772         break;                                                                \
   5773     default:                                                                  \
   5774         assert(0);                                                            \
   5775     }                                                                         \
   5776 }
   5777 
   5778 MSA_TEROP_DF(binsl)
   5779 MSA_TEROP_DF(binsr)
   5780 MSA_TEROP_DF(madd_q)
   5781 MSA_TEROP_DF(msub_q)
   5782 MSA_TEROP_DF(maddr_q)
   5783 MSA_TEROP_DF(msubr_q)
   5784 #undef MSA_TEROP_DF
   5785 
   5786 static inline void msa_splat_df(uint32_t df, wr_t *pwd,
   5787                                 wr_t *pws, target_ulong rt)
   5788 {
   5789     uint32_t n = rt % DF_ELEMENTS(df);
   5790     uint32_t i;
   5791 
   5792     switch (df) {
   5793     case DF_BYTE:
   5794         for (i = 0; i < DF_ELEMENTS(DF_BYTE); i++) {
   5795             pwd->b[i] = pws->b[n];
   5796         }
   5797         break;
   5798     case DF_HALF:
   5799         for (i = 0; i < DF_ELEMENTS(DF_HALF); i++) {
   5800             pwd->h[i] = pws->h[n];
   5801         }
   5802         break;
   5803     case DF_WORD:
   5804         for (i = 0; i < DF_ELEMENTS(DF_WORD); i++) {
   5805             pwd->w[i] = pws->w[n];
   5806         }
   5807         break;
   5808     case DF_DOUBLE:
   5809         for (i = 0; i < DF_ELEMENTS(DF_DOUBLE); i++) {
   5810             pwd->d[i] = pws->d[n];
   5811         }
   5812        break;
   5813     default:
   5814         assert(0);
   5815     }
   5816 }
   5817 
   5818 void helper_msa_splat_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
   5819                          uint32_t ws, uint32_t rt)
   5820 {
   5821     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   5822     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   5823 
   5824     msa_splat_df(df, pwd, pws, env->active_tc.gpr[rt]);
   5825 }
   5826 
   5827 #define MSA_DO_B MSA_DO(b)
   5828 #define MSA_DO_H MSA_DO(h)
   5829 #define MSA_DO_W MSA_DO(w)
   5830 #define MSA_DO_D MSA_DO(d)
   5831 
   5832 #define MSA_LOOP_B MSA_LOOP(B)
   5833 #define MSA_LOOP_H MSA_LOOP(H)
   5834 #define MSA_LOOP_W MSA_LOOP(W)
   5835 #define MSA_LOOP_D MSA_LOOP(D)
   5836 
   5837 #define MSA_LOOP_COND_B MSA_LOOP_COND(DF_BYTE)
   5838 #define MSA_LOOP_COND_H MSA_LOOP_COND(DF_HALF)
   5839 #define MSA_LOOP_COND_W MSA_LOOP_COND(DF_WORD)
   5840 #define MSA_LOOP_COND_D MSA_LOOP_COND(DF_DOUBLE)
   5841 
   5842 #define MSA_LOOP(DF) \
   5843     do { \
   5844         for (i = 0; i < (MSA_LOOP_COND_ ## DF) ; i++) { \
   5845             MSA_DO_ ## DF; \
   5846         } \
   5847     } while (0)
   5848 
   5849 #define MSA_FN_DF(FUNC)                                             \
   5850 void helper_msa_##FUNC(CPUMIPSState *env, uint32_t df, uint32_t wd, \
   5851         uint32_t ws, uint32_t wt)                                   \
   5852 {                                                                   \
   5853     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);                      \
   5854     wr_t *pws = &(env->active_fpu.fpr[ws].wr);                      \
   5855     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);                      \
   5856     wr_t wx, *pwx = &wx;                                            \
   5857     uint32_t i;                                                     \
   5858     switch (df) {                                                   \
   5859     case DF_BYTE:                                                   \
   5860         MSA_LOOP_B;                                                 \
   5861         break;                                                      \
   5862     case DF_HALF:                                                   \
   5863         MSA_LOOP_H;                                                 \
   5864         break;                                                      \
   5865     case DF_WORD:                                                   \
   5866         MSA_LOOP_W;                                                 \
   5867         break;                                                      \
   5868     case DF_DOUBLE:                                                 \
   5869         MSA_LOOP_D;                                                 \
   5870         break;                                                      \
   5871     default:                                                        \
   5872         assert(0);                                                  \
   5873     }                                                               \
   5874     msa_move_v(pwd, pwx);                                           \
   5875 }
   5876 
   5877 #define MSA_LOOP_COND(DF) \
   5878             (DF_ELEMENTS(DF) / 2)
   5879 
   5880 #define Rb(pwr, i) (pwr->b[i])
   5881 #define Lb(pwr, i) (pwr->b[i + DF_ELEMENTS(DF_BYTE) / 2])
   5882 #define Rh(pwr, i) (pwr->h[i])
   5883 #define Lh(pwr, i) (pwr->h[i + DF_ELEMENTS(DF_HALF) / 2])
   5884 #define Rw(pwr, i) (pwr->w[i])
   5885 #define Lw(pwr, i) (pwr->w[i + DF_ELEMENTS(DF_WORD) / 2])
   5886 #define Rd(pwr, i) (pwr->d[i])
   5887 #define Ld(pwr, i) (pwr->d[i + DF_ELEMENTS(DF_DOUBLE) / 2])
   5888 
   5889 #undef MSA_LOOP_COND
   5890 
   5891 #define MSA_LOOP_COND(DF) \
   5892             (DF_ELEMENTS(DF))
   5893 
   5894 #define MSA_DO(DF)                                                          \
   5895     do {                                                                    \
   5896         uint32_t n = DF_ELEMENTS(df);                                       \
   5897         uint32_t k = (pwd->DF[i] & 0x3f) % (2 * n);                         \
   5898         pwx->DF[i] =                                                        \
   5899             (pwd->DF[i] & 0xc0) ? 0 : k < n ? pwt->DF[k] : pws->DF[k - n];  \
   5900     } while (0)
   5901 MSA_FN_DF(vshf_df)
   5902 #undef MSA_DO
   5903 #undef MSA_LOOP_COND
   5904 #undef MSA_FN_DF
   5905 
   5906 
   5907 void helper_msa_sldi_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
   5908                         uint32_t ws, uint32_t n)
   5909 {
   5910     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   5911     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   5912 
   5913     msa_sld_df(df, pwd, pws, n);
   5914 }
   5915 
   5916 void helper_msa_splati_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
   5917                           uint32_t ws, uint32_t n)
   5918 {
   5919     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   5920     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   5921 
   5922     msa_splat_df(df, pwd, pws, n);
   5923 }
   5924 
   5925 void helper_msa_copy_s_b(CPUMIPSState *env, uint32_t rd,
   5926                          uint32_t ws, uint32_t n)
   5927 {
   5928     n %= 16;
   5929 #if HOST_BIG_ENDIAN
   5930     if (n < 8) {
   5931         n = 8 - n - 1;
   5932     } else {
   5933         n = 24 - n - 1;
   5934     }
   5935 #endif
   5936     env->active_tc.gpr[rd] = (int8_t)env->active_fpu.fpr[ws].wr.b[n];
   5937 }
   5938 
   5939 void helper_msa_copy_s_h(CPUMIPSState *env, uint32_t rd,
   5940                          uint32_t ws, uint32_t n)
   5941 {
   5942     n %= 8;
   5943 #if HOST_BIG_ENDIAN
   5944     if (n < 4) {
   5945         n = 4 - n - 1;
   5946     } else {
   5947         n = 12 - n - 1;
   5948     }
   5949 #endif
   5950     env->active_tc.gpr[rd] = (int16_t)env->active_fpu.fpr[ws].wr.h[n];
   5951 }
   5952 
   5953 void helper_msa_copy_s_w(CPUMIPSState *env, uint32_t rd,
   5954                          uint32_t ws, uint32_t n)
   5955 {
   5956     n %= 4;
   5957 #if HOST_BIG_ENDIAN
   5958     if (n < 2) {
   5959         n = 2 - n - 1;
   5960     } else {
   5961         n = 6 - n - 1;
   5962     }
   5963 #endif
   5964     env->active_tc.gpr[rd] = (int32_t)env->active_fpu.fpr[ws].wr.w[n];
   5965 }
   5966 
   5967 void helper_msa_copy_s_d(CPUMIPSState *env, uint32_t rd,
   5968                          uint32_t ws, uint32_t n)
   5969 {
   5970     n %= 2;
   5971     env->active_tc.gpr[rd] = (int64_t)env->active_fpu.fpr[ws].wr.d[n];
   5972 }
   5973 
   5974 void helper_msa_copy_u_b(CPUMIPSState *env, uint32_t rd,
   5975                          uint32_t ws, uint32_t n)
   5976 {
   5977     n %= 16;
   5978 #if HOST_BIG_ENDIAN
   5979     if (n < 8) {
   5980         n = 8 - n - 1;
   5981     } else {
   5982         n = 24 - n - 1;
   5983     }
   5984 #endif
   5985     env->active_tc.gpr[rd] = (uint8_t)env->active_fpu.fpr[ws].wr.b[n];
   5986 }
   5987 
   5988 void helper_msa_copy_u_h(CPUMIPSState *env, uint32_t rd,
   5989                          uint32_t ws, uint32_t n)
   5990 {
   5991     n %= 8;
   5992 #if HOST_BIG_ENDIAN
   5993     if (n < 4) {
   5994         n = 4 - n - 1;
   5995     } else {
   5996         n = 12 - n - 1;
   5997     }
   5998 #endif
   5999     env->active_tc.gpr[rd] = (uint16_t)env->active_fpu.fpr[ws].wr.h[n];
   6000 }
   6001 
   6002 void helper_msa_copy_u_w(CPUMIPSState *env, uint32_t rd,
   6003                          uint32_t ws, uint32_t n)
   6004 {
   6005     n %= 4;
   6006 #if HOST_BIG_ENDIAN
   6007     if (n < 2) {
   6008         n = 2 - n - 1;
   6009     } else {
   6010         n = 6 - n - 1;
   6011     }
   6012 #endif
   6013     env->active_tc.gpr[rd] = (uint32_t)env->active_fpu.fpr[ws].wr.w[n];
   6014 }
   6015 
   6016 void helper_msa_insert_b(CPUMIPSState *env, uint32_t wd,
   6017                           uint32_t rs_num, uint32_t n)
   6018 {
   6019     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   6020     target_ulong rs = env->active_tc.gpr[rs_num];
   6021     n %= 16;
   6022 #if HOST_BIG_ENDIAN
   6023     if (n < 8) {
   6024         n = 8 - n - 1;
   6025     } else {
   6026         n = 24 - n - 1;
   6027     }
   6028 #endif
   6029     pwd->b[n] = (int8_t)rs;
   6030 }
   6031 
   6032 void helper_msa_insert_h(CPUMIPSState *env, uint32_t wd,
   6033                           uint32_t rs_num, uint32_t n)
   6034 {
   6035     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   6036     target_ulong rs = env->active_tc.gpr[rs_num];
   6037     n %= 8;
   6038 #if HOST_BIG_ENDIAN
   6039     if (n < 4) {
   6040         n = 4 - n - 1;
   6041     } else {
   6042         n = 12 - n - 1;
   6043     }
   6044 #endif
   6045     pwd->h[n] = (int16_t)rs;
   6046 }
   6047 
   6048 void helper_msa_insert_w(CPUMIPSState *env, uint32_t wd,
   6049                           uint32_t rs_num, uint32_t n)
   6050 {
   6051     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   6052     target_ulong rs = env->active_tc.gpr[rs_num];
   6053     n %= 4;
   6054 #if HOST_BIG_ENDIAN
   6055     if (n < 2) {
   6056         n = 2 - n - 1;
   6057     } else {
   6058         n = 6 - n - 1;
   6059     }
   6060 #endif
   6061     pwd->w[n] = (int32_t)rs;
   6062 }
   6063 
   6064 void helper_msa_insert_d(CPUMIPSState *env, uint32_t wd,
   6065                           uint32_t rs_num, uint32_t n)
   6066 {
   6067     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   6068     target_ulong rs = env->active_tc.gpr[rs_num];
   6069     n %= 2;
   6070     pwd->d[n] = (int64_t)rs;
   6071 }
   6072 
   6073 void helper_msa_insve_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
   6074                          uint32_t ws, uint32_t n)
   6075 {
   6076     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   6077     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   6078 
   6079     switch (df) {
   6080     case DF_BYTE:
   6081         pwd->b[n] = (int8_t)pws->b[0];
   6082         break;
   6083     case DF_HALF:
   6084         pwd->h[n] = (int16_t)pws->h[0];
   6085         break;
   6086     case DF_WORD:
   6087         pwd->w[n] = (int32_t)pws->w[0];
   6088         break;
   6089     case DF_DOUBLE:
   6090         pwd->d[n] = (int64_t)pws->d[0];
   6091         break;
   6092     default:
   6093         assert(0);
   6094     }
   6095 }
   6096 
   6097 void helper_msa_ctcmsa(CPUMIPSState *env, target_ulong elm, uint32_t cd)
   6098 {
   6099     switch (cd) {
   6100     case 0:
   6101         break;
   6102     case 1:
   6103         env->active_tc.msacsr = (int32_t)elm & MSACSR_MASK;
   6104         restore_msa_fp_status(env);
   6105         /* check exception */
   6106         if ((GET_FP_ENABLE(env->active_tc.msacsr) | FP_UNIMPLEMENTED)
   6107             & GET_FP_CAUSE(env->active_tc.msacsr)) {
   6108             do_raise_exception(env, EXCP_MSAFPE, GETPC());
   6109         }
   6110         break;
   6111     }
   6112 }
   6113 
   6114 target_ulong helper_msa_cfcmsa(CPUMIPSState *env, uint32_t cs)
   6115 {
   6116     switch (cs) {
   6117     case 0:
   6118         return env->msair;
   6119     case 1:
   6120         return env->active_tc.msacsr & MSACSR_MASK;
   6121     }
   6122     return 0;
   6123 }
   6124 
   6125 void helper_msa_fill_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
   6126                         uint32_t rs)
   6127 {
   6128     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   6129     uint32_t i;
   6130 
   6131     switch (df) {
   6132     case DF_BYTE:
   6133         for (i = 0; i < DF_ELEMENTS(DF_BYTE); i++) {
   6134             pwd->b[i] = (int8_t)env->active_tc.gpr[rs];
   6135         }
   6136         break;
   6137     case DF_HALF:
   6138         for (i = 0; i < DF_ELEMENTS(DF_HALF); i++) {
   6139             pwd->h[i] = (int16_t)env->active_tc.gpr[rs];
   6140         }
   6141         break;
   6142     case DF_WORD:
   6143         for (i = 0; i < DF_ELEMENTS(DF_WORD); i++) {
   6144             pwd->w[i] = (int32_t)env->active_tc.gpr[rs];
   6145         }
   6146         break;
   6147     case DF_DOUBLE:
   6148         for (i = 0; i < DF_ELEMENTS(DF_DOUBLE); i++) {
   6149             pwd->d[i] = (int64_t)env->active_tc.gpr[rs];
   6150         }
   6151        break;
   6152     default:
   6153         assert(0);
   6154     }
   6155 }
   6156 
   6157 
   6158 #define FLOAT_ONE32 make_float32(0x3f8 << 20)
   6159 #define FLOAT_ONE64 make_float64(0x3ffULL << 52)
   6160 
   6161 #define FLOAT_SNAN16(s) (float16_default_nan(s) ^ 0x0220)
   6162         /* 0x7c20 */
   6163 #define FLOAT_SNAN32(s) (float32_default_nan(s) ^ 0x00400020)
   6164         /* 0x7f800020 */
   6165 #define FLOAT_SNAN64(s) (float64_default_nan(s) ^ 0x0008000000000020ULL)
   6166         /* 0x7ff0000000000020 */
   6167 
   6168 static inline void clear_msacsr_cause(CPUMIPSState *env)
   6169 {
   6170     SET_FP_CAUSE(env->active_tc.msacsr, 0);
   6171 }
   6172 
   6173 static inline void check_msacsr_cause(CPUMIPSState *env, uintptr_t retaddr)
   6174 {
   6175     if ((GET_FP_CAUSE(env->active_tc.msacsr) &
   6176             (GET_FP_ENABLE(env->active_tc.msacsr) | FP_UNIMPLEMENTED)) == 0) {
   6177         UPDATE_FP_FLAGS(env->active_tc.msacsr,
   6178                 GET_FP_CAUSE(env->active_tc.msacsr));
   6179     } else {
   6180         do_raise_exception(env, EXCP_MSAFPE, retaddr);
   6181     }
   6182 }
   6183 
   6184 /* Flush-to-zero use cases for update_msacsr() */
   6185 #define CLEAR_FS_UNDERFLOW 1
   6186 #define CLEAR_IS_INEXACT   2
   6187 #define RECIPROCAL_INEXACT 4
   6188 
   6189 
   6190 static inline int ieee_to_mips_xcpt_msa(int ieee_xcpt)
   6191 {
   6192     int mips_xcpt = 0;
   6193 
   6194     if (ieee_xcpt & float_flag_invalid) {
   6195         mips_xcpt |= FP_INVALID;
   6196     }
   6197     if (ieee_xcpt & float_flag_overflow) {
   6198         mips_xcpt |= FP_OVERFLOW;
   6199     }
   6200     if (ieee_xcpt & float_flag_underflow) {
   6201         mips_xcpt |= FP_UNDERFLOW;
   6202     }
   6203     if (ieee_xcpt & float_flag_divbyzero) {
   6204         mips_xcpt |= FP_DIV0;
   6205     }
   6206     if (ieee_xcpt & float_flag_inexact) {
   6207         mips_xcpt |= FP_INEXACT;
   6208     }
   6209 
   6210     return mips_xcpt;
   6211 }
   6212 
   6213 static inline int update_msacsr(CPUMIPSState *env, int action, int denormal)
   6214 {
   6215     int ieee_exception_flags;
   6216     int mips_exception_flags = 0;
   6217     int cause;
   6218     int enable;
   6219 
   6220     ieee_exception_flags = get_float_exception_flags(
   6221                                &env->active_tc.msa_fp_status);
   6222 
   6223     /* QEMU softfloat does not signal all underflow cases */
   6224     if (denormal) {
   6225         ieee_exception_flags |= float_flag_underflow;
   6226     }
   6227     if (ieee_exception_flags) {
   6228         mips_exception_flags = ieee_to_mips_xcpt_msa(ieee_exception_flags);
   6229     }
   6230     enable = GET_FP_ENABLE(env->active_tc.msacsr) | FP_UNIMPLEMENTED;
   6231 
   6232     /* Set Inexact (I) when flushing inputs to zero */
   6233     if ((ieee_exception_flags & float_flag_input_denormal) &&
   6234             (env->active_tc.msacsr & MSACSR_FS_MASK) != 0) {
   6235         if (action & CLEAR_IS_INEXACT) {
   6236             mips_exception_flags &= ~FP_INEXACT;
   6237         } else {
   6238             mips_exception_flags |= FP_INEXACT;
   6239         }
   6240     }
   6241 
   6242     /* Set Inexact (I) and Underflow (U) when flushing outputs to zero */
   6243     if ((ieee_exception_flags & float_flag_output_denormal) &&
   6244             (env->active_tc.msacsr & MSACSR_FS_MASK) != 0) {
   6245         mips_exception_flags |= FP_INEXACT;
   6246         if (action & CLEAR_FS_UNDERFLOW) {
   6247             mips_exception_flags &= ~FP_UNDERFLOW;
   6248         } else {
   6249             mips_exception_flags |= FP_UNDERFLOW;
   6250         }
   6251     }
   6252 
   6253     /* Set Inexact (I) when Overflow (O) is not enabled */
   6254     if ((mips_exception_flags & FP_OVERFLOW) != 0 &&
   6255            (enable & FP_OVERFLOW) == 0) {
   6256         mips_exception_flags |= FP_INEXACT;
   6257     }
   6258 
   6259     /* Clear Exact Underflow when Underflow (U) is not enabled */
   6260     if ((mips_exception_flags & FP_UNDERFLOW) != 0 &&
   6261            (enable & FP_UNDERFLOW) == 0 &&
   6262            (mips_exception_flags & FP_INEXACT) == 0) {
   6263         mips_exception_flags &= ~FP_UNDERFLOW;
   6264     }
   6265 
   6266     /*
   6267      * Reciprocal operations set only Inexact when valid and not
   6268      * divide by zero
   6269      */
   6270     if ((action & RECIPROCAL_INEXACT) &&
   6271             (mips_exception_flags & (FP_INVALID | FP_DIV0)) == 0) {
   6272         mips_exception_flags = FP_INEXACT;
   6273     }
   6274 
   6275     cause = mips_exception_flags & enable; /* all current enabled exceptions */
   6276 
   6277     if (cause == 0) {
   6278         /*
   6279          * No enabled exception, update the MSACSR Cause
   6280          * with all current exceptions
   6281          */
   6282         SET_FP_CAUSE(env->active_tc.msacsr,
   6283             (GET_FP_CAUSE(env->active_tc.msacsr) | mips_exception_flags));
   6284     } else {
   6285         /* Current exceptions are enabled */
   6286         if ((env->active_tc.msacsr & MSACSR_NX_MASK) == 0) {
   6287             /*
   6288              * Exception(s) will trap, update MSACSR Cause
   6289              * with all enabled exceptions
   6290              */
   6291             SET_FP_CAUSE(env->active_tc.msacsr,
   6292                 (GET_FP_CAUSE(env->active_tc.msacsr) | mips_exception_flags));
   6293         }
   6294     }
   6295 
   6296     return mips_exception_flags;
   6297 }
   6298 
   6299 static inline int get_enabled_exceptions(const CPUMIPSState *env, int c)
   6300 {
   6301     int enable = GET_FP_ENABLE(env->active_tc.msacsr) | FP_UNIMPLEMENTED;
   6302     return c & enable;
   6303 }
   6304 
   6305 static inline float16 float16_from_float32(int32_t a, bool ieee,
   6306                                            float_status *status)
   6307 {
   6308       float16 f_val;
   6309 
   6310       f_val = float32_to_float16((float32)a, ieee, status);
   6311 
   6312       return a < 0 ? (f_val | (1 << 15)) : f_val;
   6313 }
   6314 
   6315 static inline float32 float32_from_float64(int64_t a, float_status *status)
   6316 {
   6317       float32 f_val;
   6318 
   6319       f_val = float64_to_float32((float64)a, status);
   6320 
   6321       return a < 0 ? (f_val | (1 << 31)) : f_val;
   6322 }
   6323 
   6324 static inline float32 float32_from_float16(int16_t a, bool ieee,
   6325                                            float_status *status)
   6326 {
   6327       float32 f_val;
   6328 
   6329       f_val = float16_to_float32((float16)a, ieee, status);
   6330 
   6331       return a < 0 ? (f_val | (1 << 31)) : f_val;
   6332 }
   6333 
   6334 static inline float64 float64_from_float32(int32_t a, float_status *status)
   6335 {
   6336       float64 f_val;
   6337 
   6338       f_val = float32_to_float64((float64)a, status);
   6339 
   6340       return a < 0 ? (f_val | (1ULL << 63)) : f_val;
   6341 }
   6342 
   6343 static inline float32 float32_from_q16(int16_t a, float_status *status)
   6344 {
   6345     float32 f_val;
   6346 
   6347     /* conversion as integer and scaling */
   6348     f_val = int32_to_float32(a, status);
   6349     f_val = float32_scalbn(f_val, -15, status);
   6350 
   6351     return f_val;
   6352 }
   6353 
   6354 static inline float64 float64_from_q32(int32_t a, float_status *status)
   6355 {
   6356     float64 f_val;
   6357 
   6358     /* conversion as integer and scaling */
   6359     f_val = int32_to_float64(a, status);
   6360     f_val = float64_scalbn(f_val, -31, status);
   6361 
   6362     return f_val;
   6363 }
   6364 
   6365 static inline int16_t float32_to_q16(float32 a, float_status *status)
   6366 {
   6367     int32_t q_val;
   6368     int32_t q_min = 0xffff8000;
   6369     int32_t q_max = 0x00007fff;
   6370 
   6371     int ieee_ex;
   6372 
   6373     if (float32_is_any_nan(a)) {
   6374         float_raise(float_flag_invalid, status);
   6375         return 0;
   6376     }
   6377 
   6378     /* scaling */
   6379     a = float32_scalbn(a, 15, status);
   6380 
   6381     ieee_ex = get_float_exception_flags(status);
   6382     set_float_exception_flags(ieee_ex & (~float_flag_underflow)
   6383                              , status);
   6384 
   6385     if (ieee_ex & float_flag_overflow) {
   6386         float_raise(float_flag_inexact, status);
   6387         return (int32_t)a < 0 ? q_min : q_max;
   6388     }
   6389 
   6390     /* conversion to int */
   6391     q_val = float32_to_int32(a, status);
   6392 
   6393     ieee_ex = get_float_exception_flags(status);
   6394     set_float_exception_flags(ieee_ex & (~float_flag_underflow)
   6395                              , status);
   6396 
   6397     if (ieee_ex & float_flag_invalid) {
   6398         set_float_exception_flags(ieee_ex & (~float_flag_invalid)
   6399                                , status);
   6400         float_raise(float_flag_overflow | float_flag_inexact, status);
   6401         return (int32_t)a < 0 ? q_min : q_max;
   6402     }
   6403 
   6404     if (q_val < q_min) {
   6405         float_raise(float_flag_overflow | float_flag_inexact, status);
   6406         return (int16_t)q_min;
   6407     }
   6408 
   6409     if (q_max < q_val) {
   6410         float_raise(float_flag_overflow | float_flag_inexact, status);
   6411         return (int16_t)q_max;
   6412     }
   6413 
   6414     return (int16_t)q_val;
   6415 }
   6416 
   6417 static inline int32_t float64_to_q32(float64 a, float_status *status)
   6418 {
   6419     int64_t q_val;
   6420     int64_t q_min = 0xffffffff80000000LL;
   6421     int64_t q_max = 0x000000007fffffffLL;
   6422 
   6423     int ieee_ex;
   6424 
   6425     if (float64_is_any_nan(a)) {
   6426         float_raise(float_flag_invalid, status);
   6427         return 0;
   6428     }
   6429 
   6430     /* scaling */
   6431     a = float64_scalbn(a, 31, status);
   6432 
   6433     ieee_ex = get_float_exception_flags(status);
   6434     set_float_exception_flags(ieee_ex & (~float_flag_underflow)
   6435            , status);
   6436 
   6437     if (ieee_ex & float_flag_overflow) {
   6438         float_raise(float_flag_inexact, status);
   6439         return (int64_t)a < 0 ? q_min : q_max;
   6440     }
   6441 
   6442     /* conversion to integer */
   6443     q_val = float64_to_int64(a, status);
   6444 
   6445     ieee_ex = get_float_exception_flags(status);
   6446     set_float_exception_flags(ieee_ex & (~float_flag_underflow)
   6447            , status);
   6448 
   6449     if (ieee_ex & float_flag_invalid) {
   6450         set_float_exception_flags(ieee_ex & (~float_flag_invalid)
   6451                , status);
   6452         float_raise(float_flag_overflow | float_flag_inexact, status);
   6453         return (int64_t)a < 0 ? q_min : q_max;
   6454     }
   6455 
   6456     if (q_val < q_min) {
   6457         float_raise(float_flag_overflow | float_flag_inexact, status);
   6458         return (int32_t)q_min;
   6459     }
   6460 
   6461     if (q_max < q_val) {
   6462         float_raise(float_flag_overflow | float_flag_inexact, status);
   6463         return (int32_t)q_max;
   6464     }
   6465 
   6466     return (int32_t)q_val;
   6467 }
   6468 
   6469 #define MSA_FLOAT_COND(DEST, OP, ARG1, ARG2, BITS, QUIET)                   \
   6470     do {                                                                    \
   6471         float_status *status = &env->active_tc.msa_fp_status;               \
   6472         int c;                                                              \
   6473         int64_t cond;                                                       \
   6474         set_float_exception_flags(0, status);                               \
   6475         if (!QUIET) {                                                       \
   6476             cond = float ## BITS ## _ ## OP(ARG1, ARG2, status);            \
   6477         } else {                                                            \
   6478             cond = float ## BITS ## _ ## OP ## _quiet(ARG1, ARG2, status);  \
   6479         }                                                                   \
   6480         DEST = cond ? M_MAX_UINT(BITS) : 0;                                 \
   6481         c = update_msacsr(env, CLEAR_IS_INEXACT, 0);                        \
   6482                                                                             \
   6483         if (get_enabled_exceptions(env, c)) {                               \
   6484             DEST = ((FLOAT_SNAN ## BITS(status) >> 6) << 6) | c;            \
   6485         }                                                                   \
   6486     } while (0)
   6487 
   6488 #define MSA_FLOAT_AF(DEST, ARG1, ARG2, BITS, QUIET)                 \
   6489     do {                                                            \
   6490         MSA_FLOAT_COND(DEST, eq, ARG1, ARG2, BITS, QUIET);          \
   6491         if ((DEST & M_MAX_UINT(BITS)) == M_MAX_UINT(BITS)) {        \
   6492             DEST = 0;                                               \
   6493         }                                                           \
   6494     } while (0)
   6495 
   6496 #define MSA_FLOAT_UEQ(DEST, ARG1, ARG2, BITS, QUIET)                \
   6497     do {                                                            \
   6498         MSA_FLOAT_COND(DEST, unordered, ARG1, ARG2, BITS, QUIET);   \
   6499         if (DEST == 0) {                                            \
   6500             MSA_FLOAT_COND(DEST, eq, ARG1, ARG2, BITS, QUIET);      \
   6501         }                                                           \
   6502     } while (0)
   6503 
   6504 #define MSA_FLOAT_NE(DEST, ARG1, ARG2, BITS, QUIET)                 \
   6505     do {                                                            \
   6506         MSA_FLOAT_COND(DEST, lt, ARG1, ARG2, BITS, QUIET);          \
   6507         if (DEST == 0) {                                            \
   6508             MSA_FLOAT_COND(DEST, lt, ARG2, ARG1, BITS, QUIET);      \
   6509         }                                                           \
   6510     } while (0)
   6511 
   6512 #define MSA_FLOAT_UNE(DEST, ARG1, ARG2, BITS, QUIET)                \
   6513     do {                                                            \
   6514         MSA_FLOAT_COND(DEST, unordered, ARG1, ARG2, BITS, QUIET);   \
   6515         if (DEST == 0) {                                            \
   6516             MSA_FLOAT_COND(DEST, lt, ARG1, ARG2, BITS, QUIET);      \
   6517             if (DEST == 0) {                                        \
   6518                 MSA_FLOAT_COND(DEST, lt, ARG2, ARG1, BITS, QUIET);  \
   6519             }                                                       \
   6520         }                                                           \
   6521     } while (0)
   6522 
   6523 #define MSA_FLOAT_ULE(DEST, ARG1, ARG2, BITS, QUIET)                \
   6524     do {                                                            \
   6525         MSA_FLOAT_COND(DEST, unordered, ARG1, ARG2, BITS, QUIET);   \
   6526         if (DEST == 0) {                                            \
   6527             MSA_FLOAT_COND(DEST, le, ARG1, ARG2, BITS, QUIET);      \
   6528         }                                                           \
   6529     } while (0)
   6530 
   6531 #define MSA_FLOAT_ULT(DEST, ARG1, ARG2, BITS, QUIET)                \
   6532     do {                                                            \
   6533         MSA_FLOAT_COND(DEST, unordered, ARG1, ARG2, BITS, QUIET);   \
   6534         if (DEST == 0) {                                            \
   6535             MSA_FLOAT_COND(DEST, lt, ARG1, ARG2, BITS, QUIET);      \
   6536         }                                                           \
   6537     } while (0)
   6538 
   6539 #define MSA_FLOAT_OR(DEST, ARG1, ARG2, BITS, QUIET)                 \
   6540     do {                                                            \
   6541         MSA_FLOAT_COND(DEST, le, ARG1, ARG2, BITS, QUIET);          \
   6542         if (DEST == 0) {                                            \
   6543             MSA_FLOAT_COND(DEST, le, ARG2, ARG1, BITS, QUIET);      \
   6544         }                                                           \
   6545     } while (0)
   6546 
   6547 static inline void compare_af(CPUMIPSState *env, wr_t *pwd, wr_t *pws,
   6548                               wr_t *pwt, uint32_t df, int quiet,
   6549                               uintptr_t retaddr)
   6550 {
   6551     wr_t wx, *pwx = &wx;
   6552     uint32_t i;
   6553 
   6554     clear_msacsr_cause(env);
   6555 
   6556     switch (df) {
   6557     case DF_WORD:
   6558         for (i = 0; i < DF_ELEMENTS(DF_WORD); i++) {
   6559             MSA_FLOAT_AF(pwx->w[i], pws->w[i], pwt->w[i], 32, quiet);
   6560         }
   6561         break;
   6562     case DF_DOUBLE:
   6563         for (i = 0; i < DF_ELEMENTS(DF_DOUBLE); i++) {
   6564             MSA_FLOAT_AF(pwx->d[i], pws->d[i], pwt->d[i], 64, quiet);
   6565         }
   6566         break;
   6567     default:
   6568         assert(0);
   6569     }
   6570 
   6571     check_msacsr_cause(env, retaddr);
   6572 
   6573     msa_move_v(pwd, pwx);
   6574 }
   6575 
   6576 static inline void compare_un(CPUMIPSState *env, wr_t *pwd, wr_t *pws,
   6577                               wr_t *pwt, uint32_t df, int quiet,
   6578                               uintptr_t retaddr)
   6579 {
   6580     wr_t wx, *pwx = &wx;
   6581     uint32_t i;
   6582 
   6583     clear_msacsr_cause(env);
   6584 
   6585     switch (df) {
   6586     case DF_WORD:
   6587         for (i = 0; i < DF_ELEMENTS(DF_WORD); i++) {
   6588             MSA_FLOAT_COND(pwx->w[i], unordered, pws->w[i], pwt->w[i], 32,
   6589                     quiet);
   6590         }
   6591         break;
   6592     case DF_DOUBLE:
   6593         for (i = 0; i < DF_ELEMENTS(DF_DOUBLE); i++) {
   6594             MSA_FLOAT_COND(pwx->d[i], unordered, pws->d[i], pwt->d[i], 64,
   6595                     quiet);
   6596         }
   6597         break;
   6598     default:
   6599         assert(0);
   6600     }
   6601 
   6602     check_msacsr_cause(env, retaddr);
   6603 
   6604     msa_move_v(pwd, pwx);
   6605 }
   6606 
   6607 static inline void compare_eq(CPUMIPSState *env, wr_t *pwd, wr_t *pws,
   6608                               wr_t *pwt, uint32_t df, int quiet,
   6609                               uintptr_t retaddr)
   6610 {
   6611     wr_t wx, *pwx = &wx;
   6612     uint32_t i;
   6613 
   6614     clear_msacsr_cause(env);
   6615 
   6616     switch (df) {
   6617     case DF_WORD:
   6618         for (i = 0; i < DF_ELEMENTS(DF_WORD); i++) {
   6619             MSA_FLOAT_COND(pwx->w[i], eq, pws->w[i], pwt->w[i], 32, quiet);
   6620         }
   6621         break;
   6622     case DF_DOUBLE:
   6623         for (i = 0; i < DF_ELEMENTS(DF_DOUBLE); i++) {
   6624             MSA_FLOAT_COND(pwx->d[i], eq, pws->d[i], pwt->d[i], 64, quiet);
   6625         }
   6626         break;
   6627     default:
   6628         assert(0);
   6629     }
   6630 
   6631     check_msacsr_cause(env, retaddr);
   6632 
   6633     msa_move_v(pwd, pwx);
   6634 }
   6635 
   6636 static inline void compare_ueq(CPUMIPSState *env, wr_t *pwd, wr_t *pws,
   6637                                wr_t *pwt, uint32_t df, int quiet,
   6638                                uintptr_t retaddr)
   6639 {
   6640     wr_t wx, *pwx = &wx;
   6641     uint32_t i;
   6642 
   6643     clear_msacsr_cause(env);
   6644 
   6645     switch (df) {
   6646     case DF_WORD:
   6647         for (i = 0; i < DF_ELEMENTS(DF_WORD); i++) {
   6648             MSA_FLOAT_UEQ(pwx->w[i], pws->w[i], pwt->w[i], 32, quiet);
   6649         }
   6650         break;
   6651     case DF_DOUBLE:
   6652         for (i = 0; i < DF_ELEMENTS(DF_DOUBLE); i++) {
   6653             MSA_FLOAT_UEQ(pwx->d[i], pws->d[i], pwt->d[i], 64, quiet);
   6654         }
   6655         break;
   6656     default:
   6657         assert(0);
   6658     }
   6659 
   6660     check_msacsr_cause(env, retaddr);
   6661 
   6662     msa_move_v(pwd, pwx);
   6663 }
   6664 
   6665 static inline void compare_lt(CPUMIPSState *env, wr_t *pwd, wr_t *pws,
   6666                               wr_t *pwt, uint32_t df, int quiet,
   6667                               uintptr_t retaddr)
   6668 {
   6669     wr_t wx, *pwx = &wx;
   6670     uint32_t i;
   6671 
   6672     clear_msacsr_cause(env);
   6673 
   6674     switch (df) {
   6675     case DF_WORD:
   6676         for (i = 0; i < DF_ELEMENTS(DF_WORD); i++) {
   6677             MSA_FLOAT_COND(pwx->w[i], lt, pws->w[i], pwt->w[i], 32, quiet);
   6678         }
   6679         break;
   6680     case DF_DOUBLE:
   6681         for (i = 0; i < DF_ELEMENTS(DF_DOUBLE); i++) {
   6682             MSA_FLOAT_COND(pwx->d[i], lt, pws->d[i], pwt->d[i], 64, quiet);
   6683         }
   6684         break;
   6685     default:
   6686         assert(0);
   6687     }
   6688 
   6689     check_msacsr_cause(env, retaddr);
   6690 
   6691     msa_move_v(pwd, pwx);
   6692 }
   6693 
   6694 static inline void compare_ult(CPUMIPSState *env, wr_t *pwd, wr_t *pws,
   6695                                wr_t *pwt, uint32_t df, int quiet,
   6696                                uintptr_t retaddr)
   6697 {
   6698     wr_t wx, *pwx = &wx;
   6699     uint32_t i;
   6700 
   6701     clear_msacsr_cause(env);
   6702 
   6703     switch (df) {
   6704     case DF_WORD:
   6705         for (i = 0; i < DF_ELEMENTS(DF_WORD); i++) {
   6706             MSA_FLOAT_ULT(pwx->w[i], pws->w[i], pwt->w[i], 32, quiet);
   6707         }
   6708         break;
   6709     case DF_DOUBLE:
   6710         for (i = 0; i < DF_ELEMENTS(DF_DOUBLE); i++) {
   6711             MSA_FLOAT_ULT(pwx->d[i], pws->d[i], pwt->d[i], 64, quiet);
   6712         }
   6713         break;
   6714     default:
   6715         assert(0);
   6716     }
   6717 
   6718     check_msacsr_cause(env, retaddr);
   6719 
   6720     msa_move_v(pwd, pwx);
   6721 }
   6722 
   6723 static inline void compare_le(CPUMIPSState *env, wr_t *pwd, wr_t *pws,
   6724                               wr_t *pwt, uint32_t df, int quiet,
   6725                               uintptr_t retaddr)
   6726 {
   6727     wr_t wx, *pwx = &wx;
   6728     uint32_t i;
   6729 
   6730     clear_msacsr_cause(env);
   6731 
   6732     switch (df) {
   6733     case DF_WORD:
   6734         for (i = 0; i < DF_ELEMENTS(DF_WORD); i++) {
   6735             MSA_FLOAT_COND(pwx->w[i], le, pws->w[i], pwt->w[i], 32, quiet);
   6736         }
   6737         break;
   6738     case DF_DOUBLE:
   6739         for (i = 0; i < DF_ELEMENTS(DF_DOUBLE); i++) {
   6740             MSA_FLOAT_COND(pwx->d[i], le, pws->d[i], pwt->d[i], 64, quiet);
   6741         }
   6742         break;
   6743     default:
   6744         assert(0);
   6745     }
   6746 
   6747     check_msacsr_cause(env, retaddr);
   6748 
   6749     msa_move_v(pwd, pwx);
   6750 }
   6751 
   6752 static inline void compare_ule(CPUMIPSState *env, wr_t *pwd, wr_t *pws,
   6753                                wr_t *pwt, uint32_t df, int quiet,
   6754                                uintptr_t retaddr)
   6755 {
   6756     wr_t wx, *pwx = &wx;
   6757     uint32_t i;
   6758 
   6759     clear_msacsr_cause(env);
   6760 
   6761     switch (df) {
   6762     case DF_WORD:
   6763         for (i = 0; i < DF_ELEMENTS(DF_WORD); i++) {
   6764             MSA_FLOAT_ULE(pwx->w[i], pws->w[i], pwt->w[i], 32, quiet);
   6765         }
   6766         break;
   6767     case DF_DOUBLE:
   6768         for (i = 0; i < DF_ELEMENTS(DF_DOUBLE); i++) {
   6769             MSA_FLOAT_ULE(pwx->d[i], pws->d[i], pwt->d[i], 64, quiet);
   6770         }
   6771         break;
   6772     default:
   6773         assert(0);
   6774     }
   6775 
   6776     check_msacsr_cause(env, retaddr);
   6777 
   6778     msa_move_v(pwd, pwx);
   6779 }
   6780 
   6781 static inline void compare_or(CPUMIPSState *env, wr_t *pwd, wr_t *pws,
   6782                               wr_t *pwt, uint32_t df, int quiet,
   6783                               uintptr_t retaddr)
   6784 {
   6785     wr_t wx, *pwx = &wx;
   6786     uint32_t i;
   6787 
   6788     clear_msacsr_cause(env);
   6789 
   6790     switch (df) {
   6791     case DF_WORD:
   6792         for (i = 0; i < DF_ELEMENTS(DF_WORD); i++) {
   6793             MSA_FLOAT_OR(pwx->w[i], pws->w[i], pwt->w[i], 32, quiet);
   6794         }
   6795         break;
   6796     case DF_DOUBLE:
   6797         for (i = 0; i < DF_ELEMENTS(DF_DOUBLE); i++) {
   6798             MSA_FLOAT_OR(pwx->d[i], pws->d[i], pwt->d[i], 64, quiet);
   6799         }
   6800         break;
   6801     default:
   6802         assert(0);
   6803     }
   6804 
   6805     check_msacsr_cause(env, retaddr);
   6806 
   6807     msa_move_v(pwd, pwx);
   6808 }
   6809 
   6810 static inline void compare_une(CPUMIPSState *env, wr_t *pwd, wr_t *pws,
   6811                                wr_t *pwt, uint32_t df, int quiet,
   6812                                uintptr_t retaddr)
   6813 {
   6814     wr_t wx, *pwx = &wx;
   6815     uint32_t i;
   6816 
   6817     clear_msacsr_cause(env);
   6818 
   6819     switch (df) {
   6820     case DF_WORD:
   6821         for (i = 0; i < DF_ELEMENTS(DF_WORD); i++) {
   6822             MSA_FLOAT_UNE(pwx->w[i], pws->w[i], pwt->w[i], 32, quiet);
   6823         }
   6824         break;
   6825     case DF_DOUBLE:
   6826         for (i = 0; i < DF_ELEMENTS(DF_DOUBLE); i++) {
   6827             MSA_FLOAT_UNE(pwx->d[i], pws->d[i], pwt->d[i], 64, quiet);
   6828         }
   6829         break;
   6830     default:
   6831         assert(0);
   6832     }
   6833 
   6834     check_msacsr_cause(env, retaddr);
   6835 
   6836     msa_move_v(pwd, pwx);
   6837 }
   6838 
   6839 static inline void compare_ne(CPUMIPSState *env, wr_t *pwd, wr_t *pws,
   6840                               wr_t *pwt, uint32_t df, int quiet,
   6841                               uintptr_t retaddr)
   6842 {
   6843     wr_t wx, *pwx = &wx;
   6844     uint32_t i;
   6845 
   6846     clear_msacsr_cause(env);
   6847 
   6848     switch (df) {
   6849     case DF_WORD:
   6850         for (i = 0; i < DF_ELEMENTS(DF_WORD); i++) {
   6851             MSA_FLOAT_NE(pwx->w[i], pws->w[i], pwt->w[i], 32, quiet);
   6852         }
   6853         break;
   6854     case DF_DOUBLE:
   6855         for (i = 0; i < DF_ELEMENTS(DF_DOUBLE); i++) {
   6856             MSA_FLOAT_NE(pwx->d[i], pws->d[i], pwt->d[i], 64, quiet);
   6857         }
   6858         break;
   6859     default:
   6860         assert(0);
   6861     }
   6862 
   6863     check_msacsr_cause(env, retaddr);
   6864 
   6865     msa_move_v(pwd, pwx);
   6866 }
   6867 
   6868 void helper_msa_fcaf_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
   6869                         uint32_t ws, uint32_t wt)
   6870 {
   6871     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   6872     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   6873     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
   6874     compare_af(env, pwd, pws, pwt, df, 1, GETPC());
   6875 }
   6876 
   6877 void helper_msa_fcun_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
   6878                         uint32_t ws, uint32_t wt)
   6879 {
   6880     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   6881     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   6882     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
   6883     compare_un(env, pwd, pws, pwt, df, 1, GETPC());
   6884 }
   6885 
   6886 void helper_msa_fceq_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
   6887                         uint32_t ws, uint32_t wt)
   6888 {
   6889     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   6890     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   6891     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
   6892     compare_eq(env, pwd, pws, pwt, df, 1, GETPC());
   6893 }
   6894 
   6895 void helper_msa_fcueq_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
   6896                          uint32_t ws, uint32_t wt)
   6897 {
   6898     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   6899     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   6900     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
   6901     compare_ueq(env, pwd, pws, pwt, df, 1, GETPC());
   6902 }
   6903 
   6904 void helper_msa_fclt_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
   6905                         uint32_t ws, uint32_t wt)
   6906 {
   6907     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   6908     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   6909     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
   6910     compare_lt(env, pwd, pws, pwt, df, 1, GETPC());
   6911 }
   6912 
   6913 void helper_msa_fcult_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
   6914                          uint32_t ws, uint32_t wt)
   6915 {
   6916     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   6917     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   6918     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
   6919     compare_ult(env, pwd, pws, pwt, df, 1, GETPC());
   6920 }
   6921 
   6922 void helper_msa_fcle_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
   6923                         uint32_t ws, uint32_t wt)
   6924 {
   6925     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   6926     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   6927     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
   6928     compare_le(env, pwd, pws, pwt, df, 1, GETPC());
   6929 }
   6930 
   6931 void helper_msa_fcule_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
   6932                          uint32_t ws, uint32_t wt)
   6933 {
   6934     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   6935     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   6936     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
   6937     compare_ule(env, pwd, pws, pwt, df, 1, GETPC());
   6938 }
   6939 
   6940 void helper_msa_fsaf_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
   6941                         uint32_t ws, uint32_t wt)
   6942 {
   6943     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   6944     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   6945     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
   6946     compare_af(env, pwd, pws, pwt, df, 0, GETPC());
   6947 }
   6948 
   6949 void helper_msa_fsun_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
   6950                         uint32_t ws, uint32_t wt)
   6951 {
   6952     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   6953     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   6954     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
   6955     compare_un(env, pwd, pws, pwt, df, 0, GETPC());
   6956 }
   6957 
   6958 void helper_msa_fseq_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
   6959                         uint32_t ws, uint32_t wt)
   6960 {
   6961     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   6962     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   6963     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
   6964     compare_eq(env, pwd, pws, pwt, df, 0, GETPC());
   6965 }
   6966 
   6967 void helper_msa_fsueq_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
   6968                          uint32_t ws, uint32_t wt)
   6969 {
   6970     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   6971     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   6972     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
   6973     compare_ueq(env, pwd, pws, pwt, df, 0, GETPC());
   6974 }
   6975 
   6976 void helper_msa_fslt_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
   6977                         uint32_t ws, uint32_t wt)
   6978 {
   6979     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   6980     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   6981     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
   6982     compare_lt(env, pwd, pws, pwt, df, 0, GETPC());
   6983 }
   6984 
   6985 void helper_msa_fsult_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
   6986                          uint32_t ws, uint32_t wt)
   6987 {
   6988     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   6989     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   6990     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
   6991     compare_ult(env, pwd, pws, pwt, df, 0, GETPC());
   6992 }
   6993 
   6994 void helper_msa_fsle_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
   6995                         uint32_t ws, uint32_t wt)
   6996 {
   6997     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   6998     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   6999     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
   7000     compare_le(env, pwd, pws, pwt, df, 0, GETPC());
   7001 }
   7002 
   7003 void helper_msa_fsule_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
   7004                          uint32_t ws, uint32_t wt)
   7005 {
   7006     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   7007     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   7008     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
   7009     compare_ule(env, pwd, pws, pwt, df, 0, GETPC());
   7010 }
   7011 
   7012 void helper_msa_fcor_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
   7013                         uint32_t ws, uint32_t wt)
   7014 {
   7015     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   7016     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   7017     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
   7018     compare_or(env, pwd, pws, pwt, df, 1, GETPC());
   7019 }
   7020 
   7021 void helper_msa_fcune_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
   7022                          uint32_t ws, uint32_t wt)
   7023 {
   7024     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   7025     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   7026     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
   7027     compare_une(env, pwd, pws, pwt, df, 1, GETPC());
   7028 }
   7029 
   7030 void helper_msa_fcne_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
   7031                         uint32_t ws, uint32_t wt)
   7032 {
   7033     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   7034     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   7035     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
   7036     compare_ne(env, pwd, pws, pwt, df, 1, GETPC());
   7037 }
   7038 
   7039 void helper_msa_fsor_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
   7040                         uint32_t ws, uint32_t wt)
   7041 {
   7042     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   7043     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   7044     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
   7045     compare_or(env, pwd, pws, pwt, df, 0, GETPC());
   7046 }
   7047 
   7048 void helper_msa_fsune_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
   7049                          uint32_t ws, uint32_t wt)
   7050 {
   7051     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   7052     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   7053     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
   7054     compare_une(env, pwd, pws, pwt, df, 0, GETPC());
   7055 }
   7056 
   7057 void helper_msa_fsne_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
   7058                         uint32_t ws, uint32_t wt)
   7059 {
   7060     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   7061     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   7062     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
   7063     compare_ne(env, pwd, pws, pwt, df, 0, GETPC());
   7064 }
   7065 
   7066 #define float16_is_zero(ARG) 0
   7067 #define float16_is_zero_or_denormal(ARG) 0
   7068 
   7069 #define IS_DENORMAL(ARG, BITS)                      \
   7070     (!float ## BITS ## _is_zero(ARG)                \
   7071     && float ## BITS ## _is_zero_or_denormal(ARG))
   7072 
   7073 #define MSA_FLOAT_BINOP(DEST, OP, ARG1, ARG2, BITS)                         \
   7074     do {                                                                    \
   7075         float_status *status = &env->active_tc.msa_fp_status;               \
   7076         int c;                                                              \
   7077                                                                             \
   7078         set_float_exception_flags(0, status);                               \
   7079         DEST = float ## BITS ## _ ## OP(ARG1, ARG2, status);                \
   7080         c = update_msacsr(env, 0, IS_DENORMAL(DEST, BITS));                 \
   7081                                                                             \
   7082         if (get_enabled_exceptions(env, c)) {                               \
   7083             DEST = ((FLOAT_SNAN ## BITS(status) >> 6) << 6) | c;            \
   7084         }                                                                   \
   7085     } while (0)
   7086 
   7087 void helper_msa_fadd_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
   7088         uint32_t ws, uint32_t wt)
   7089 {
   7090     wr_t wx, *pwx = &wx;
   7091     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   7092     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   7093     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
   7094     uint32_t i;
   7095 
   7096     clear_msacsr_cause(env);
   7097 
   7098     switch (df) {
   7099     case DF_WORD:
   7100         for (i = 0; i < DF_ELEMENTS(DF_WORD); i++) {
   7101             MSA_FLOAT_BINOP(pwx->w[i], add, pws->w[i], pwt->w[i], 32);
   7102         }
   7103         break;
   7104     case DF_DOUBLE:
   7105         for (i = 0; i < DF_ELEMENTS(DF_DOUBLE); i++) {
   7106             MSA_FLOAT_BINOP(pwx->d[i], add, pws->d[i], pwt->d[i], 64);
   7107         }
   7108         break;
   7109     default:
   7110         assert(0);
   7111     }
   7112 
   7113     check_msacsr_cause(env, GETPC());
   7114     msa_move_v(pwd, pwx);
   7115 }
   7116 
   7117 void helper_msa_fsub_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
   7118         uint32_t ws, uint32_t wt)
   7119 {
   7120     wr_t wx, *pwx = &wx;
   7121     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   7122     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   7123     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
   7124     uint32_t i;
   7125 
   7126     clear_msacsr_cause(env);
   7127 
   7128     switch (df) {
   7129     case DF_WORD:
   7130         for (i = 0; i < DF_ELEMENTS(DF_WORD); i++) {
   7131             MSA_FLOAT_BINOP(pwx->w[i], sub, pws->w[i], pwt->w[i], 32);
   7132         }
   7133         break;
   7134     case DF_DOUBLE:
   7135         for (i = 0; i < DF_ELEMENTS(DF_DOUBLE); i++) {
   7136             MSA_FLOAT_BINOP(pwx->d[i], sub, pws->d[i], pwt->d[i], 64);
   7137         }
   7138         break;
   7139     default:
   7140         assert(0);
   7141     }
   7142 
   7143     check_msacsr_cause(env, GETPC());
   7144     msa_move_v(pwd, pwx);
   7145 }
   7146 
   7147 void helper_msa_fmul_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
   7148         uint32_t ws, uint32_t wt)
   7149 {
   7150     wr_t wx, *pwx = &wx;
   7151     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   7152     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   7153     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
   7154     uint32_t i;
   7155 
   7156     clear_msacsr_cause(env);
   7157 
   7158     switch (df) {
   7159     case DF_WORD:
   7160         for (i = 0; i < DF_ELEMENTS(DF_WORD); i++) {
   7161             MSA_FLOAT_BINOP(pwx->w[i], mul, pws->w[i], pwt->w[i], 32);
   7162         }
   7163         break;
   7164     case DF_DOUBLE:
   7165         for (i = 0; i < DF_ELEMENTS(DF_DOUBLE); i++) {
   7166             MSA_FLOAT_BINOP(pwx->d[i], mul, pws->d[i], pwt->d[i], 64);
   7167         }
   7168         break;
   7169     default:
   7170         assert(0);
   7171     }
   7172 
   7173     check_msacsr_cause(env, GETPC());
   7174 
   7175     msa_move_v(pwd, pwx);
   7176 }
   7177 
   7178 void helper_msa_fdiv_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
   7179         uint32_t ws, uint32_t wt)
   7180 {
   7181     wr_t wx, *pwx = &wx;
   7182     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   7183     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   7184     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
   7185     uint32_t i;
   7186 
   7187     clear_msacsr_cause(env);
   7188 
   7189     switch (df) {
   7190     case DF_WORD:
   7191         for (i = 0; i < DF_ELEMENTS(DF_WORD); i++) {
   7192             MSA_FLOAT_BINOP(pwx->w[i], div, pws->w[i], pwt->w[i], 32);
   7193         }
   7194         break;
   7195     case DF_DOUBLE:
   7196         for (i = 0; i < DF_ELEMENTS(DF_DOUBLE); i++) {
   7197             MSA_FLOAT_BINOP(pwx->d[i], div, pws->d[i], pwt->d[i], 64);
   7198         }
   7199         break;
   7200     default:
   7201         assert(0);
   7202     }
   7203 
   7204     check_msacsr_cause(env, GETPC());
   7205 
   7206     msa_move_v(pwd, pwx);
   7207 }
   7208 
   7209 #define MSA_FLOAT_MULADD(DEST, ARG1, ARG2, ARG3, NEGATE, BITS)              \
   7210     do {                                                                    \
   7211         float_status *status = &env->active_tc.msa_fp_status;               \
   7212         int c;                                                              \
   7213                                                                             \
   7214         set_float_exception_flags(0, status);                               \
   7215         DEST = float ## BITS ## _muladd(ARG2, ARG3, ARG1, NEGATE, status);  \
   7216         c = update_msacsr(env, 0, IS_DENORMAL(DEST, BITS));                 \
   7217                                                                             \
   7218         if (get_enabled_exceptions(env, c)) {                               \
   7219             DEST = ((FLOAT_SNAN ## BITS(status) >> 6) << 6) | c;            \
   7220         }                                                                   \
   7221     } while (0)
   7222 
   7223 void helper_msa_fmadd_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
   7224         uint32_t ws, uint32_t wt)
   7225 {
   7226     wr_t wx, *pwx = &wx;
   7227     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   7228     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   7229     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
   7230     uint32_t i;
   7231 
   7232     clear_msacsr_cause(env);
   7233 
   7234     switch (df) {
   7235     case DF_WORD:
   7236         for (i = 0; i < DF_ELEMENTS(DF_WORD); i++) {
   7237             MSA_FLOAT_MULADD(pwx->w[i], pwd->w[i],
   7238                            pws->w[i], pwt->w[i], 0, 32);
   7239         }
   7240         break;
   7241     case DF_DOUBLE:
   7242         for (i = 0; i < DF_ELEMENTS(DF_DOUBLE); i++) {
   7243             MSA_FLOAT_MULADD(pwx->d[i], pwd->d[i],
   7244                            pws->d[i], pwt->d[i], 0, 64);
   7245         }
   7246         break;
   7247     default:
   7248         assert(0);
   7249     }
   7250 
   7251     check_msacsr_cause(env, GETPC());
   7252 
   7253     msa_move_v(pwd, pwx);
   7254 }
   7255 
   7256 void helper_msa_fmsub_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
   7257         uint32_t ws, uint32_t wt)
   7258 {
   7259     wr_t wx, *pwx = &wx;
   7260     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   7261     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   7262     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
   7263     uint32_t i;
   7264 
   7265     clear_msacsr_cause(env);
   7266 
   7267     switch (df) {
   7268     case DF_WORD:
   7269         for (i = 0; i < DF_ELEMENTS(DF_WORD); i++) {
   7270             MSA_FLOAT_MULADD(pwx->w[i], pwd->w[i],
   7271                            pws->w[i], pwt->w[i],
   7272                            float_muladd_negate_product, 32);
   7273       }
   7274       break;
   7275     case DF_DOUBLE:
   7276         for (i = 0; i < DF_ELEMENTS(DF_DOUBLE); i++) {
   7277             MSA_FLOAT_MULADD(pwx->d[i], pwd->d[i],
   7278                            pws->d[i], pwt->d[i],
   7279                            float_muladd_negate_product, 64);
   7280         }
   7281         break;
   7282     default:
   7283         assert(0);
   7284     }
   7285 
   7286     check_msacsr_cause(env, GETPC());
   7287 
   7288     msa_move_v(pwd, pwx);
   7289 }
   7290 
   7291 void helper_msa_fexp2_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
   7292         uint32_t ws, uint32_t wt)
   7293 {
   7294     wr_t wx, *pwx = &wx;
   7295     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   7296     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   7297     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
   7298     uint32_t i;
   7299 
   7300     clear_msacsr_cause(env);
   7301 
   7302     switch (df) {
   7303     case DF_WORD:
   7304         for (i = 0; i < DF_ELEMENTS(DF_WORD); i++) {
   7305             MSA_FLOAT_BINOP(pwx->w[i], scalbn, pws->w[i],
   7306                             pwt->w[i] >  0x200 ?  0x200 :
   7307                             pwt->w[i] < -0x200 ? -0x200 : pwt->w[i],
   7308                             32);
   7309         }
   7310         break;
   7311     case DF_DOUBLE:
   7312         for (i = 0; i < DF_ELEMENTS(DF_DOUBLE); i++) {
   7313             MSA_FLOAT_BINOP(pwx->d[i], scalbn, pws->d[i],
   7314                             pwt->d[i] >  0x1000 ?  0x1000 :
   7315                             pwt->d[i] < -0x1000 ? -0x1000 : pwt->d[i],
   7316                             64);
   7317         }
   7318         break;
   7319     default:
   7320         assert(0);
   7321     }
   7322 
   7323     check_msacsr_cause(env, GETPC());
   7324 
   7325     msa_move_v(pwd, pwx);
   7326 }
   7327 
   7328 #define MSA_FLOAT_UNOP(DEST, OP, ARG, BITS)                                 \
   7329     do {                                                                    \
   7330         float_status *status = &env->active_tc.msa_fp_status;               \
   7331         int c;                                                              \
   7332                                                                             \
   7333         set_float_exception_flags(0, status);                               \
   7334         DEST = float ## BITS ## _ ## OP(ARG, status);                       \
   7335         c = update_msacsr(env, 0, IS_DENORMAL(DEST, BITS));                 \
   7336                                                                             \
   7337         if (get_enabled_exceptions(env, c)) {                               \
   7338             DEST = ((FLOAT_SNAN ## BITS(status) >> 6) << 6) | c;            \
   7339         }                                                                   \
   7340     } while (0)
   7341 
   7342 void helper_msa_fexdo_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
   7343                          uint32_t ws, uint32_t wt)
   7344 {
   7345     wr_t wx, *pwx = &wx;
   7346     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   7347     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   7348     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
   7349     uint32_t i;
   7350 
   7351     clear_msacsr_cause(env);
   7352 
   7353     switch (df) {
   7354     case DF_WORD:
   7355         for (i = 0; i < DF_ELEMENTS(DF_WORD); i++) {
   7356             /*
   7357              * Half precision floats come in two formats: standard
   7358              * IEEE and "ARM" format.  The latter gains extra exponent
   7359              * range by omitting the NaN/Inf encodings.
   7360              */
   7361             bool ieee = true;
   7362 
   7363             MSA_FLOAT_BINOP(Lh(pwx, i), from_float32, pws->w[i], ieee, 16);
   7364             MSA_FLOAT_BINOP(Rh(pwx, i), from_float32, pwt->w[i], ieee, 16);
   7365         }
   7366         break;
   7367     case DF_DOUBLE:
   7368         for (i = 0; i < DF_ELEMENTS(DF_DOUBLE); i++) {
   7369             MSA_FLOAT_UNOP(Lw(pwx, i), from_float64, pws->d[i], 32);
   7370             MSA_FLOAT_UNOP(Rw(pwx, i), from_float64, pwt->d[i], 32);
   7371         }
   7372         break;
   7373     default:
   7374         assert(0);
   7375     }
   7376 
   7377     check_msacsr_cause(env, GETPC());
   7378     msa_move_v(pwd, pwx);
   7379 }
   7380 
   7381 #define MSA_FLOAT_UNOP_XD(DEST, OP, ARG, BITS, XBITS)                       \
   7382     do {                                                                    \
   7383         float_status *status = &env->active_tc.msa_fp_status;               \
   7384         int c;                                                              \
   7385                                                                             \
   7386         set_float_exception_flags(0, status);                               \
   7387         DEST = float ## BITS ## _ ## OP(ARG, status);                       \
   7388         c = update_msacsr(env, CLEAR_FS_UNDERFLOW, 0);                      \
   7389                                                                             \
   7390         if (get_enabled_exceptions(env, c)) {                               \
   7391             DEST = ((FLOAT_SNAN ## XBITS(status) >> 6) << 6) | c;           \
   7392         }                                                                   \
   7393     } while (0)
   7394 
   7395 void helper_msa_ftq_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
   7396                        uint32_t ws, uint32_t wt)
   7397 {
   7398     wr_t wx, *pwx = &wx;
   7399     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   7400     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   7401     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
   7402     uint32_t i;
   7403 
   7404     clear_msacsr_cause(env);
   7405 
   7406     switch (df) {
   7407     case DF_WORD:
   7408         for (i = 0; i < DF_ELEMENTS(DF_WORD); i++) {
   7409             MSA_FLOAT_UNOP_XD(Lh(pwx, i), to_q16, pws->w[i], 32, 16);
   7410             MSA_FLOAT_UNOP_XD(Rh(pwx, i), to_q16, pwt->w[i], 32, 16);
   7411         }
   7412         break;
   7413     case DF_DOUBLE:
   7414         for (i = 0; i < DF_ELEMENTS(DF_DOUBLE); i++) {
   7415             MSA_FLOAT_UNOP_XD(Lw(pwx, i), to_q32, pws->d[i], 64, 32);
   7416             MSA_FLOAT_UNOP_XD(Rw(pwx, i), to_q32, pwt->d[i], 64, 32);
   7417         }
   7418         break;
   7419     default:
   7420         assert(0);
   7421     }
   7422 
   7423     check_msacsr_cause(env, GETPC());
   7424 
   7425     msa_move_v(pwd, pwx);
   7426 }
   7427 
   7428 #define NUMBER_QNAN_PAIR(ARG1, ARG2, BITS, STATUS)      \
   7429     !float ## BITS ## _is_any_nan(ARG1)                 \
   7430     && float ## BITS ## _is_quiet_nan(ARG2, STATUS)
   7431 
   7432 #define MSA_FLOAT_MAXOP(DEST, OP, ARG1, ARG2, BITS)                         \
   7433     do {                                                                    \
   7434         float_status *status = &env->active_tc.msa_fp_status;               \
   7435         int c;                                                              \
   7436                                                                             \
   7437         set_float_exception_flags(0, status);                               \
   7438         DEST = float ## BITS ## _ ## OP(ARG1, ARG2, status);                \
   7439         c = update_msacsr(env, 0, 0);                                       \
   7440                                                                             \
   7441         if (get_enabled_exceptions(env, c)) {                               \
   7442             DEST = ((FLOAT_SNAN ## BITS(status) >> 6) << 6) | c;            \
   7443         }                                                                   \
   7444     } while (0)
   7445 
   7446 #define FMAXMIN_A(F, G, X, _S, _T, BITS, STATUS)                    \
   7447     do {                                                            \
   7448         uint## BITS ##_t S = _S, T = _T;                            \
   7449         uint## BITS ##_t as, at, xs, xt, xd;                        \
   7450         if (NUMBER_QNAN_PAIR(S, T, BITS, STATUS)) {                 \
   7451             T = S;                                                  \
   7452         }                                                           \
   7453         else if (NUMBER_QNAN_PAIR(T, S, BITS, STATUS)) {            \
   7454             S = T;                                                  \
   7455         }                                                           \
   7456         as = float## BITS ##_abs(S);                                \
   7457         at = float## BITS ##_abs(T);                                \
   7458         MSA_FLOAT_MAXOP(xs, F,  S,  T, BITS);                       \
   7459         MSA_FLOAT_MAXOP(xt, G,  S,  T, BITS);                       \
   7460         MSA_FLOAT_MAXOP(xd, F, as, at, BITS);                       \
   7461         X = (as == at || xd == float## BITS ##_abs(xs)) ? xs : xt;  \
   7462     } while (0)
   7463 
   7464 void helper_msa_fmin_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
   7465         uint32_t ws, uint32_t wt)
   7466 {
   7467     float_status *status = &env->active_tc.msa_fp_status;
   7468     wr_t wx, *pwx = &wx;
   7469     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   7470     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   7471     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
   7472 
   7473     clear_msacsr_cause(env);
   7474 
   7475     if (df == DF_WORD) {
   7476 
   7477         if (NUMBER_QNAN_PAIR(pws->w[0], pwt->w[0], 32, status)) {
   7478             MSA_FLOAT_MAXOP(pwx->w[0], min, pws->w[0], pws->w[0], 32);
   7479         } else if (NUMBER_QNAN_PAIR(pwt->w[0], pws->w[0], 32, status)) {
   7480             MSA_FLOAT_MAXOP(pwx->w[0], min, pwt->w[0], pwt->w[0], 32);
   7481         } else {
   7482             MSA_FLOAT_MAXOP(pwx->w[0], min, pws->w[0], pwt->w[0], 32);
   7483         }
   7484 
   7485         if (NUMBER_QNAN_PAIR(pws->w[1], pwt->w[1], 32, status)) {
   7486             MSA_FLOAT_MAXOP(pwx->w[1], min, pws->w[1], pws->w[1], 32);
   7487         } else if (NUMBER_QNAN_PAIR(pwt->w[1], pws->w[1], 32, status)) {
   7488             MSA_FLOAT_MAXOP(pwx->w[1], min, pwt->w[1], pwt->w[1], 32);
   7489         } else {
   7490             MSA_FLOAT_MAXOP(pwx->w[1], min, pws->w[1], pwt->w[1], 32);
   7491         }
   7492 
   7493         if (NUMBER_QNAN_PAIR(pws->w[2], pwt->w[2], 32, status)) {
   7494             MSA_FLOAT_MAXOP(pwx->w[2], min, pws->w[2], pws->w[2], 32);
   7495         } else if (NUMBER_QNAN_PAIR(pwt->w[2], pws->w[2], 32, status)) {
   7496             MSA_FLOAT_MAXOP(pwx->w[2], min, pwt->w[2], pwt->w[2], 32);
   7497         } else {
   7498             MSA_FLOAT_MAXOP(pwx->w[2], min, pws->w[2], pwt->w[2], 32);
   7499         }
   7500 
   7501         if (NUMBER_QNAN_PAIR(pws->w[3], pwt->w[3], 32, status)) {
   7502             MSA_FLOAT_MAXOP(pwx->w[3], min, pws->w[3], pws->w[3], 32);
   7503         } else if (NUMBER_QNAN_PAIR(pwt->w[3], pws->w[3], 32, status)) {
   7504             MSA_FLOAT_MAXOP(pwx->w[3], min, pwt->w[3], pwt->w[3], 32);
   7505         } else {
   7506             MSA_FLOAT_MAXOP(pwx->w[3], min, pws->w[3], pwt->w[3], 32);
   7507         }
   7508 
   7509     } else if (df == DF_DOUBLE) {
   7510 
   7511         if (NUMBER_QNAN_PAIR(pws->d[0], pwt->d[0], 64, status)) {
   7512             MSA_FLOAT_MAXOP(pwx->d[0], min, pws->d[0], pws->d[0], 64);
   7513         } else if (NUMBER_QNAN_PAIR(pwt->d[0], pws->d[0], 64, status)) {
   7514             MSA_FLOAT_MAXOP(pwx->d[0], min, pwt->d[0], pwt->d[0], 64);
   7515         } else {
   7516             MSA_FLOAT_MAXOP(pwx->d[0], min, pws->d[0], pwt->d[0], 64);
   7517         }
   7518 
   7519         if (NUMBER_QNAN_PAIR(pws->d[1], pwt->d[1], 64, status)) {
   7520             MSA_FLOAT_MAXOP(pwx->d[1], min, pws->d[1], pws->d[1], 64);
   7521         } else if (NUMBER_QNAN_PAIR(pwt->d[1], pws->d[1], 64, status)) {
   7522             MSA_FLOAT_MAXOP(pwx->d[1], min, pwt->d[1], pwt->d[1], 64);
   7523         } else {
   7524             MSA_FLOAT_MAXOP(pwx->d[1], min, pws->d[1], pwt->d[1], 64);
   7525         }
   7526 
   7527     } else {
   7528 
   7529         assert(0);
   7530 
   7531     }
   7532 
   7533     check_msacsr_cause(env, GETPC());
   7534 
   7535     msa_move_v(pwd, pwx);
   7536 }
   7537 
   7538 void helper_msa_fmin_a_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
   7539         uint32_t ws, uint32_t wt)
   7540 {
   7541     float_status *status = &env->active_tc.msa_fp_status;
   7542     wr_t wx, *pwx = &wx;
   7543     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   7544     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   7545     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
   7546 
   7547     clear_msacsr_cause(env);
   7548 
   7549     if (df == DF_WORD) {
   7550         FMAXMIN_A(min, max, pwx->w[0], pws->w[0], pwt->w[0], 32, status);
   7551         FMAXMIN_A(min, max, pwx->w[1], pws->w[1], pwt->w[1], 32, status);
   7552         FMAXMIN_A(min, max, pwx->w[2], pws->w[2], pwt->w[2], 32, status);
   7553         FMAXMIN_A(min, max, pwx->w[3], pws->w[3], pwt->w[3], 32, status);
   7554     } else if (df == DF_DOUBLE) {
   7555         FMAXMIN_A(min, max, pwx->d[0], pws->d[0], pwt->d[0], 64, status);
   7556         FMAXMIN_A(min, max, pwx->d[1], pws->d[1], pwt->d[1], 64, status);
   7557     } else {
   7558         assert(0);
   7559     }
   7560 
   7561     check_msacsr_cause(env, GETPC());
   7562 
   7563     msa_move_v(pwd, pwx);
   7564 }
   7565 
   7566 void helper_msa_fmax_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
   7567         uint32_t ws, uint32_t wt)
   7568 {
   7569      float_status *status = &env->active_tc.msa_fp_status;
   7570     wr_t wx, *pwx = &wx;
   7571     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   7572     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   7573     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
   7574 
   7575     clear_msacsr_cause(env);
   7576 
   7577     if (df == DF_WORD) {
   7578 
   7579         if (NUMBER_QNAN_PAIR(pws->w[0], pwt->w[0], 32, status)) {
   7580             MSA_FLOAT_MAXOP(pwx->w[0], max, pws->w[0], pws->w[0], 32);
   7581         } else if (NUMBER_QNAN_PAIR(pwt->w[0], pws->w[0], 32, status)) {
   7582             MSA_FLOAT_MAXOP(pwx->w[0], max, pwt->w[0], pwt->w[0], 32);
   7583         } else {
   7584             MSA_FLOAT_MAXOP(pwx->w[0], max, pws->w[0], pwt->w[0], 32);
   7585         }
   7586 
   7587         if (NUMBER_QNAN_PAIR(pws->w[1], pwt->w[1], 32, status)) {
   7588             MSA_FLOAT_MAXOP(pwx->w[1], max, pws->w[1], pws->w[1], 32);
   7589         } else if (NUMBER_QNAN_PAIR(pwt->w[1], pws->w[1], 32, status)) {
   7590             MSA_FLOAT_MAXOP(pwx->w[1], max, pwt->w[1], pwt->w[1], 32);
   7591         } else {
   7592             MSA_FLOAT_MAXOP(pwx->w[1], max, pws->w[1], pwt->w[1], 32);
   7593         }
   7594 
   7595         if (NUMBER_QNAN_PAIR(pws->w[2], pwt->w[2], 32, status)) {
   7596             MSA_FLOAT_MAXOP(pwx->w[2], max, pws->w[2], pws->w[2], 32);
   7597         } else if (NUMBER_QNAN_PAIR(pwt->w[2], pws->w[2], 32, status)) {
   7598             MSA_FLOAT_MAXOP(pwx->w[2], max, pwt->w[2], pwt->w[2], 32);
   7599         } else {
   7600             MSA_FLOAT_MAXOP(pwx->w[2], max, pws->w[2], pwt->w[2], 32);
   7601         }
   7602 
   7603         if (NUMBER_QNAN_PAIR(pws->w[3], pwt->w[3], 32, status)) {
   7604             MSA_FLOAT_MAXOP(pwx->w[3], max, pws->w[3], pws->w[3], 32);
   7605         } else if (NUMBER_QNAN_PAIR(pwt->w[3], pws->w[3], 32, status)) {
   7606             MSA_FLOAT_MAXOP(pwx->w[3], max, pwt->w[3], pwt->w[3], 32);
   7607         } else {
   7608             MSA_FLOAT_MAXOP(pwx->w[3], max, pws->w[3], pwt->w[3], 32);
   7609         }
   7610 
   7611     } else if (df == DF_DOUBLE) {
   7612 
   7613         if (NUMBER_QNAN_PAIR(pws->d[0], pwt->d[0], 64, status)) {
   7614             MSA_FLOAT_MAXOP(pwx->d[0], max, pws->d[0], pws->d[0], 64);
   7615         } else if (NUMBER_QNAN_PAIR(pwt->d[0], pws->d[0], 64, status)) {
   7616             MSA_FLOAT_MAXOP(pwx->d[0], max, pwt->d[0], pwt->d[0], 64);
   7617         } else {
   7618             MSA_FLOAT_MAXOP(pwx->d[0], max, pws->d[0], pwt->d[0], 64);
   7619         }
   7620 
   7621         if (NUMBER_QNAN_PAIR(pws->d[1], pwt->d[1], 64, status)) {
   7622             MSA_FLOAT_MAXOP(pwx->d[1], max, pws->d[1], pws->d[1], 64);
   7623         } else if (NUMBER_QNAN_PAIR(pwt->d[1], pws->d[1], 64, status)) {
   7624             MSA_FLOAT_MAXOP(pwx->d[1], max, pwt->d[1], pwt->d[1], 64);
   7625         } else {
   7626             MSA_FLOAT_MAXOP(pwx->d[1], max, pws->d[1], pwt->d[1], 64);
   7627         }
   7628 
   7629     } else {
   7630 
   7631         assert(0);
   7632 
   7633     }
   7634 
   7635     check_msacsr_cause(env, GETPC());
   7636 
   7637     msa_move_v(pwd, pwx);
   7638 }
   7639 
   7640 void helper_msa_fmax_a_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
   7641         uint32_t ws, uint32_t wt)
   7642 {
   7643     float_status *status = &env->active_tc.msa_fp_status;
   7644     wr_t wx, *pwx = &wx;
   7645     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   7646     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   7647     wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
   7648 
   7649     clear_msacsr_cause(env);
   7650 
   7651     if (df == DF_WORD) {
   7652         FMAXMIN_A(max, min, pwx->w[0], pws->w[0], pwt->w[0], 32, status);
   7653         FMAXMIN_A(max, min, pwx->w[1], pws->w[1], pwt->w[1], 32, status);
   7654         FMAXMIN_A(max, min, pwx->w[2], pws->w[2], pwt->w[2], 32, status);
   7655         FMAXMIN_A(max, min, pwx->w[3], pws->w[3], pwt->w[3], 32, status);
   7656     } else if (df == DF_DOUBLE) {
   7657         FMAXMIN_A(max, min, pwx->d[0], pws->d[0], pwt->d[0], 64, status);
   7658         FMAXMIN_A(max, min, pwx->d[1], pws->d[1], pwt->d[1], 64, status);
   7659     } else {
   7660         assert(0);
   7661     }
   7662 
   7663     check_msacsr_cause(env, GETPC());
   7664 
   7665     msa_move_v(pwd, pwx);
   7666 }
   7667 
   7668 void helper_msa_fclass_df(CPUMIPSState *env, uint32_t df,
   7669         uint32_t wd, uint32_t ws)
   7670 {
   7671     float_status *status = &env->active_tc.msa_fp_status;
   7672 
   7673     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   7674     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   7675     if (df == DF_WORD) {
   7676         pwd->w[0] = float_class_s(pws->w[0], status);
   7677         pwd->w[1] = float_class_s(pws->w[1], status);
   7678         pwd->w[2] = float_class_s(pws->w[2], status);
   7679         pwd->w[3] = float_class_s(pws->w[3], status);
   7680     } else if (df == DF_DOUBLE) {
   7681         pwd->d[0] = float_class_d(pws->d[0], status);
   7682         pwd->d[1] = float_class_d(pws->d[1], status);
   7683     } else {
   7684         assert(0);
   7685     }
   7686 }
   7687 
   7688 #define MSA_FLOAT_UNOP0(DEST, OP, ARG, BITS)                                \
   7689     do {                                                                    \
   7690         float_status *status = &env->active_tc.msa_fp_status;               \
   7691         int c;                                                              \
   7692                                                                             \
   7693         set_float_exception_flags(0, status);                               \
   7694         DEST = float ## BITS ## _ ## OP(ARG, status);                       \
   7695         c = update_msacsr(env, CLEAR_FS_UNDERFLOW, 0);                      \
   7696                                                                             \
   7697         if (get_enabled_exceptions(env, c)) {                               \
   7698             DEST = ((FLOAT_SNAN ## BITS(status) >> 6) << 6) | c;            \
   7699         } else if (float ## BITS ## _is_any_nan(ARG)) {                     \
   7700             DEST = 0;                                                       \
   7701         }                                                                   \
   7702     } while (0)
   7703 
   7704 void helper_msa_ftrunc_s_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
   7705                             uint32_t ws)
   7706 {
   7707     wr_t wx, *pwx = &wx;
   7708     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   7709     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   7710     uint32_t i;
   7711 
   7712     clear_msacsr_cause(env);
   7713 
   7714     switch (df) {
   7715     case DF_WORD:
   7716         for (i = 0; i < DF_ELEMENTS(DF_WORD); i++) {
   7717             MSA_FLOAT_UNOP0(pwx->w[i], to_int32_round_to_zero, pws->w[i], 32);
   7718         }
   7719         break;
   7720     case DF_DOUBLE:
   7721         for (i = 0; i < DF_ELEMENTS(DF_DOUBLE); i++) {
   7722             MSA_FLOAT_UNOP0(pwx->d[i], to_int64_round_to_zero, pws->d[i], 64);
   7723         }
   7724         break;
   7725     default:
   7726         assert(0);
   7727     }
   7728 
   7729     check_msacsr_cause(env, GETPC());
   7730 
   7731     msa_move_v(pwd, pwx);
   7732 }
   7733 
   7734 void helper_msa_ftrunc_u_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
   7735                             uint32_t ws)
   7736 {
   7737     wr_t wx, *pwx = &wx;
   7738     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   7739     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   7740     uint32_t i;
   7741 
   7742     clear_msacsr_cause(env);
   7743 
   7744     switch (df) {
   7745     case DF_WORD:
   7746         for (i = 0; i < DF_ELEMENTS(DF_WORD); i++) {
   7747             MSA_FLOAT_UNOP0(pwx->w[i], to_uint32_round_to_zero, pws->w[i], 32);
   7748         }
   7749         break;
   7750     case DF_DOUBLE:
   7751         for (i = 0; i < DF_ELEMENTS(DF_DOUBLE); i++) {
   7752             MSA_FLOAT_UNOP0(pwx->d[i], to_uint64_round_to_zero, pws->d[i], 64);
   7753         }
   7754         break;
   7755     default:
   7756         assert(0);
   7757     }
   7758 
   7759     check_msacsr_cause(env, GETPC());
   7760 
   7761     msa_move_v(pwd, pwx);
   7762 }
   7763 
   7764 void helper_msa_fsqrt_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
   7765                          uint32_t ws)
   7766 {
   7767     wr_t wx, *pwx = &wx;
   7768     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   7769     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   7770     uint32_t i;
   7771 
   7772     clear_msacsr_cause(env);
   7773 
   7774     switch (df) {
   7775     case DF_WORD:
   7776         for (i = 0; i < DF_ELEMENTS(DF_WORD); i++) {
   7777             MSA_FLOAT_UNOP(pwx->w[i], sqrt, pws->w[i], 32);
   7778         }
   7779         break;
   7780     case DF_DOUBLE:
   7781         for (i = 0; i < DF_ELEMENTS(DF_DOUBLE); i++) {
   7782             MSA_FLOAT_UNOP(pwx->d[i], sqrt, pws->d[i], 64);
   7783         }
   7784         break;
   7785     default:
   7786         assert(0);
   7787     }
   7788 
   7789     check_msacsr_cause(env, GETPC());
   7790 
   7791     msa_move_v(pwd, pwx);
   7792 }
   7793 
   7794 #define MSA_FLOAT_RECIPROCAL(DEST, ARG, BITS)                               \
   7795     do {                                                                    \
   7796         float_status *status = &env->active_tc.msa_fp_status;               \
   7797         int c;                                                              \
   7798                                                                             \
   7799         set_float_exception_flags(0, status);                               \
   7800         DEST = float ## BITS ## _ ## div(FLOAT_ONE ## BITS, ARG, status);   \
   7801         c = update_msacsr(env, float ## BITS ## _is_infinity(ARG) ||        \
   7802                           float ## BITS ## _is_quiet_nan(DEST, status) ?    \
   7803                           0 : RECIPROCAL_INEXACT,                           \
   7804                           IS_DENORMAL(DEST, BITS));                         \
   7805                                                                             \
   7806         if (get_enabled_exceptions(env, c)) {                               \
   7807             DEST = ((FLOAT_SNAN ## BITS(status) >> 6) << 6) | c;            \
   7808         }                                                                   \
   7809     } while (0)
   7810 
   7811 void helper_msa_frsqrt_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
   7812                           uint32_t ws)
   7813 {
   7814     wr_t wx, *pwx = &wx;
   7815     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   7816     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   7817     uint32_t i;
   7818 
   7819     clear_msacsr_cause(env);
   7820 
   7821     switch (df) {
   7822     case DF_WORD:
   7823         for (i = 0; i < DF_ELEMENTS(DF_WORD); i++) {
   7824             MSA_FLOAT_RECIPROCAL(pwx->w[i], float32_sqrt(pws->w[i],
   7825                     &env->active_tc.msa_fp_status), 32);
   7826         }
   7827         break;
   7828     case DF_DOUBLE:
   7829         for (i = 0; i < DF_ELEMENTS(DF_DOUBLE); i++) {
   7830             MSA_FLOAT_RECIPROCAL(pwx->d[i], float64_sqrt(pws->d[i],
   7831                     &env->active_tc.msa_fp_status), 64);
   7832         }
   7833         break;
   7834     default:
   7835         assert(0);
   7836     }
   7837 
   7838     check_msacsr_cause(env, GETPC());
   7839 
   7840     msa_move_v(pwd, pwx);
   7841 }
   7842 
   7843 void helper_msa_frcp_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
   7844                         uint32_t ws)
   7845 {
   7846     wr_t wx, *pwx = &wx;
   7847     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   7848     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   7849     uint32_t i;
   7850 
   7851     clear_msacsr_cause(env);
   7852 
   7853     switch (df) {
   7854     case DF_WORD:
   7855         for (i = 0; i < DF_ELEMENTS(DF_WORD); i++) {
   7856             MSA_FLOAT_RECIPROCAL(pwx->w[i], pws->w[i], 32);
   7857         }
   7858         break;
   7859     case DF_DOUBLE:
   7860         for (i = 0; i < DF_ELEMENTS(DF_DOUBLE); i++) {
   7861             MSA_FLOAT_RECIPROCAL(pwx->d[i], pws->d[i], 64);
   7862         }
   7863         break;
   7864     default:
   7865         assert(0);
   7866     }
   7867 
   7868     check_msacsr_cause(env, GETPC());
   7869 
   7870     msa_move_v(pwd, pwx);
   7871 }
   7872 
   7873 void helper_msa_frint_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
   7874                          uint32_t ws)
   7875 {
   7876     wr_t wx, *pwx = &wx;
   7877     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   7878     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   7879     uint32_t i;
   7880 
   7881     clear_msacsr_cause(env);
   7882 
   7883     switch (df) {
   7884     case DF_WORD:
   7885         for (i = 0; i < DF_ELEMENTS(DF_WORD); i++) {
   7886             MSA_FLOAT_UNOP(pwx->w[i], round_to_int, pws->w[i], 32);
   7887         }
   7888         break;
   7889     case DF_DOUBLE:
   7890         for (i = 0; i < DF_ELEMENTS(DF_DOUBLE); i++) {
   7891             MSA_FLOAT_UNOP(pwx->d[i], round_to_int, pws->d[i], 64);
   7892         }
   7893         break;
   7894     default:
   7895         assert(0);
   7896     }
   7897 
   7898     check_msacsr_cause(env, GETPC());
   7899 
   7900     msa_move_v(pwd, pwx);
   7901 }
   7902 
   7903 #define MSA_FLOAT_LOGB(DEST, ARG, BITS)                                     \
   7904     do {                                                                    \
   7905         float_status *status = &env->active_tc.msa_fp_status;               \
   7906         int c;                                                              \
   7907                                                                             \
   7908         set_float_exception_flags(0, status);                               \
   7909         set_float_rounding_mode(float_round_down, status);                  \
   7910         DEST = float ## BITS ## _ ## log2(ARG, status);                     \
   7911         DEST = float ## BITS ## _ ## round_to_int(DEST, status);            \
   7912         set_float_rounding_mode(ieee_rm[(env->active_tc.msacsr &            \
   7913                                          MSACSR_RM_MASK) >> MSACSR_RM],     \
   7914                                 status);                                    \
   7915                                                                             \
   7916         set_float_exception_flags(get_float_exception_flags(status) &       \
   7917                                   (~float_flag_inexact),                    \
   7918                                   status);                                  \
   7919                                                                             \
   7920         c = update_msacsr(env, 0, IS_DENORMAL(DEST, BITS));                 \
   7921                                                                             \
   7922         if (get_enabled_exceptions(env, c)) {                               \
   7923             DEST = ((FLOAT_SNAN ## BITS(status) >> 6) << 6) | c;            \
   7924         }                                                                   \
   7925     } while (0)
   7926 
   7927 void helper_msa_flog2_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
   7928                          uint32_t ws)
   7929 {
   7930     wr_t wx, *pwx = &wx;
   7931     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   7932     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   7933     uint32_t i;
   7934 
   7935     clear_msacsr_cause(env);
   7936 
   7937     switch (df) {
   7938     case DF_WORD:
   7939         for (i = 0; i < DF_ELEMENTS(DF_WORD); i++) {
   7940             MSA_FLOAT_LOGB(pwx->w[i], pws->w[i], 32);
   7941         }
   7942         break;
   7943     case DF_DOUBLE:
   7944         for (i = 0; i < DF_ELEMENTS(DF_DOUBLE); i++) {
   7945             MSA_FLOAT_LOGB(pwx->d[i], pws->d[i], 64);
   7946         }
   7947         break;
   7948     default:
   7949         assert(0);
   7950     }
   7951 
   7952     check_msacsr_cause(env, GETPC());
   7953 
   7954     msa_move_v(pwd, pwx);
   7955 }
   7956 
   7957 void helper_msa_fexupl_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
   7958                           uint32_t ws)
   7959 {
   7960     wr_t wx, *pwx = &wx;
   7961     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   7962     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   7963     uint32_t i;
   7964 
   7965     clear_msacsr_cause(env);
   7966 
   7967     switch (df) {
   7968     case DF_WORD:
   7969         for (i = 0; i < DF_ELEMENTS(DF_WORD); i++) {
   7970             /*
   7971              * Half precision floats come in two formats: standard
   7972              * IEEE and "ARM" format.  The latter gains extra exponent
   7973              * range by omitting the NaN/Inf encodings.
   7974              */
   7975             bool ieee = true;
   7976 
   7977             MSA_FLOAT_BINOP(pwx->w[i], from_float16, Lh(pws, i), ieee, 32);
   7978         }
   7979         break;
   7980     case DF_DOUBLE:
   7981         for (i = 0; i < DF_ELEMENTS(DF_DOUBLE); i++) {
   7982             MSA_FLOAT_UNOP(pwx->d[i], from_float32, Lw(pws, i), 64);
   7983         }
   7984         break;
   7985     default:
   7986         assert(0);
   7987     }
   7988 
   7989     check_msacsr_cause(env, GETPC());
   7990     msa_move_v(pwd, pwx);
   7991 }
   7992 
   7993 void helper_msa_fexupr_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
   7994                           uint32_t ws)
   7995 {
   7996     wr_t wx, *pwx = &wx;
   7997     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   7998     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   7999     uint32_t i;
   8000 
   8001     clear_msacsr_cause(env);
   8002 
   8003     switch (df) {
   8004     case DF_WORD:
   8005         for (i = 0; i < DF_ELEMENTS(DF_WORD); i++) {
   8006             /*
   8007              * Half precision floats come in two formats: standard
   8008              * IEEE and "ARM" format.  The latter gains extra exponent
   8009              * range by omitting the NaN/Inf encodings.
   8010              */
   8011             bool ieee = true;
   8012 
   8013             MSA_FLOAT_BINOP(pwx->w[i], from_float16, Rh(pws, i), ieee, 32);
   8014         }
   8015         break;
   8016     case DF_DOUBLE:
   8017         for (i = 0; i < DF_ELEMENTS(DF_DOUBLE); i++) {
   8018             MSA_FLOAT_UNOP(pwx->d[i], from_float32, Rw(pws, i), 64);
   8019         }
   8020         break;
   8021     default:
   8022         assert(0);
   8023     }
   8024 
   8025     check_msacsr_cause(env, GETPC());
   8026     msa_move_v(pwd, pwx);
   8027 }
   8028 
   8029 void helper_msa_ffql_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
   8030                         uint32_t ws)
   8031 {
   8032     wr_t wx, *pwx = &wx;
   8033     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   8034     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   8035     uint32_t i;
   8036 
   8037     switch (df) {
   8038     case DF_WORD:
   8039         for (i = 0; i < DF_ELEMENTS(DF_WORD); i++) {
   8040             MSA_FLOAT_UNOP(pwx->w[i], from_q16, Lh(pws, i), 32);
   8041         }
   8042         break;
   8043     case DF_DOUBLE:
   8044         for (i = 0; i < DF_ELEMENTS(DF_DOUBLE); i++) {
   8045             MSA_FLOAT_UNOP(pwx->d[i], from_q32, Lw(pws, i), 64);
   8046         }
   8047         break;
   8048     default:
   8049         assert(0);
   8050     }
   8051 
   8052     msa_move_v(pwd, pwx);
   8053 }
   8054 
   8055 void helper_msa_ffqr_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
   8056                         uint32_t ws)
   8057 {
   8058     wr_t wx, *pwx = &wx;
   8059     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   8060     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   8061     uint32_t i;
   8062 
   8063     switch (df) {
   8064     case DF_WORD:
   8065         for (i = 0; i < DF_ELEMENTS(DF_WORD); i++) {
   8066             MSA_FLOAT_UNOP(pwx->w[i], from_q16, Rh(pws, i), 32);
   8067         }
   8068         break;
   8069     case DF_DOUBLE:
   8070         for (i = 0; i < DF_ELEMENTS(DF_DOUBLE); i++) {
   8071             MSA_FLOAT_UNOP(pwx->d[i], from_q32, Rw(pws, i), 64);
   8072         }
   8073         break;
   8074     default:
   8075         assert(0);
   8076     }
   8077 
   8078     msa_move_v(pwd, pwx);
   8079 }
   8080 
   8081 void helper_msa_ftint_s_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
   8082                            uint32_t ws)
   8083 {
   8084     wr_t wx, *pwx = &wx;
   8085     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   8086     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   8087     uint32_t i;
   8088 
   8089     clear_msacsr_cause(env);
   8090 
   8091     switch (df) {
   8092     case DF_WORD:
   8093         for (i = 0; i < DF_ELEMENTS(DF_WORD); i++) {
   8094             MSA_FLOAT_UNOP0(pwx->w[i], to_int32, pws->w[i], 32);
   8095         }
   8096         break;
   8097     case DF_DOUBLE:
   8098         for (i = 0; i < DF_ELEMENTS(DF_DOUBLE); i++) {
   8099             MSA_FLOAT_UNOP0(pwx->d[i], to_int64, pws->d[i], 64);
   8100         }
   8101         break;
   8102     default:
   8103         assert(0);
   8104     }
   8105 
   8106     check_msacsr_cause(env, GETPC());
   8107 
   8108     msa_move_v(pwd, pwx);
   8109 }
   8110 
   8111 void helper_msa_ftint_u_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
   8112                            uint32_t ws)
   8113 {
   8114     wr_t wx, *pwx = &wx;
   8115     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   8116     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   8117     uint32_t i;
   8118 
   8119     clear_msacsr_cause(env);
   8120 
   8121     switch (df) {
   8122     case DF_WORD:
   8123         for (i = 0; i < DF_ELEMENTS(DF_WORD); i++) {
   8124             MSA_FLOAT_UNOP0(pwx->w[i], to_uint32, pws->w[i], 32);
   8125         }
   8126         break;
   8127     case DF_DOUBLE:
   8128         for (i = 0; i < DF_ELEMENTS(DF_DOUBLE); i++) {
   8129             MSA_FLOAT_UNOP0(pwx->d[i], to_uint64, pws->d[i], 64);
   8130         }
   8131         break;
   8132     default:
   8133         assert(0);
   8134     }
   8135 
   8136     check_msacsr_cause(env, GETPC());
   8137 
   8138     msa_move_v(pwd, pwx);
   8139 }
   8140 
   8141 #define float32_from_int32 int32_to_float32
   8142 #define float32_from_uint32 uint32_to_float32
   8143 
   8144 #define float64_from_int64 int64_to_float64
   8145 #define float64_from_uint64 uint64_to_float64
   8146 
   8147 void helper_msa_ffint_s_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
   8148                            uint32_t ws)
   8149 {
   8150     wr_t wx, *pwx = &wx;
   8151     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   8152     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   8153     uint32_t i;
   8154 
   8155     clear_msacsr_cause(env);
   8156 
   8157     switch (df) {
   8158     case DF_WORD:
   8159         for (i = 0; i < DF_ELEMENTS(DF_WORD); i++) {
   8160             MSA_FLOAT_UNOP(pwx->w[i], from_int32, pws->w[i], 32);
   8161         }
   8162         break;
   8163     case DF_DOUBLE:
   8164         for (i = 0; i < DF_ELEMENTS(DF_DOUBLE); i++) {
   8165             MSA_FLOAT_UNOP(pwx->d[i], from_int64, pws->d[i], 64);
   8166         }
   8167         break;
   8168     default:
   8169         assert(0);
   8170     }
   8171 
   8172     check_msacsr_cause(env, GETPC());
   8173 
   8174     msa_move_v(pwd, pwx);
   8175 }
   8176 
   8177 void helper_msa_ffint_u_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
   8178                            uint32_t ws)
   8179 {
   8180     wr_t wx, *pwx = &wx;
   8181     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   8182     wr_t *pws = &(env->active_fpu.fpr[ws].wr);
   8183     uint32_t i;
   8184 
   8185     clear_msacsr_cause(env);
   8186 
   8187     switch (df) {
   8188     case DF_WORD:
   8189         for (i = 0; i < DF_ELEMENTS(DF_WORD); i++) {
   8190             MSA_FLOAT_UNOP(pwx->w[i], from_uint32, pws->w[i], 32);
   8191         }
   8192         break;
   8193     case DF_DOUBLE:
   8194         for (i = 0; i < DF_ELEMENTS(DF_DOUBLE); i++) {
   8195             MSA_FLOAT_UNOP(pwx->d[i], from_uint64, pws->d[i], 64);
   8196         }
   8197         break;
   8198     default:
   8199         assert(0);
   8200     }
   8201 
   8202     check_msacsr_cause(env, GETPC());
   8203 
   8204     msa_move_v(pwd, pwx);
   8205 }
   8206 
   8207 /* Data format min and max values */
   8208 #define DF_BITS(df) (1 << ((df) + 3))
   8209 
   8210 /* Element-by-element access macros */
   8211 #define DF_ELEMENTS(df) (MSA_WRLEN / DF_BITS(df))
   8212 
   8213 #if !defined(CONFIG_USER_ONLY)
   8214 #define MEMOP_IDX(DF)                                                   \
   8215     MemOpIdx oi = make_memop_idx(MO_TE | DF | MO_UNALN,                 \
   8216                                  cpu_mmu_index(env, false));
   8217 #else
   8218 #define MEMOP_IDX(DF)
   8219 #endif
   8220 
   8221 #if TARGET_BIG_ENDIAN
   8222 static inline uint64_t bswap16x4(uint64_t x)
   8223 {
   8224     uint64_t m = 0x00ff00ff00ff00ffull;
   8225     return ((x & m) << 8) | ((x >> 8) & m);
   8226 }
   8227 
   8228 static inline uint64_t bswap32x2(uint64_t x)
   8229 {
   8230     return ror64(bswap64(x), 32);
   8231 }
   8232 #endif
   8233 
   8234 void helper_msa_ld_b(CPUMIPSState *env, uint32_t wd,
   8235                      target_ulong addr)
   8236 {
   8237     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   8238     uintptr_t ra = GETPC();
   8239     uint64_t d0, d1;
   8240 
   8241     /* Load 8 bytes at a time.  Vector element ordering makes this LE.  */
   8242     d0 = cpu_ldq_le_data_ra(env, addr + 0, ra);
   8243     d1 = cpu_ldq_le_data_ra(env, addr + 8, ra);
   8244     pwd->d[0] = d0;
   8245     pwd->d[1] = d1;
   8246 }
   8247 
   8248 void helper_msa_ld_h(CPUMIPSState *env, uint32_t wd,
   8249                      target_ulong addr)
   8250 {
   8251     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   8252     uintptr_t ra = GETPC();
   8253     uint64_t d0, d1;
   8254 
   8255     /*
   8256      * Load 8 bytes at a time.  Use little-endian load, then for
   8257      * big-endian target, we must then swap the four halfwords.
   8258      */
   8259     d0 = cpu_ldq_le_data_ra(env, addr + 0, ra);
   8260     d1 = cpu_ldq_le_data_ra(env, addr + 8, ra);
   8261 #if TARGET_BIG_ENDIAN
   8262     d0 = bswap16x4(d0);
   8263     d1 = bswap16x4(d1);
   8264 #endif
   8265     pwd->d[0] = d0;
   8266     pwd->d[1] = d1;
   8267 }
   8268 
   8269 void helper_msa_ld_w(CPUMIPSState *env, uint32_t wd,
   8270                      target_ulong addr)
   8271 {
   8272     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   8273     uintptr_t ra = GETPC();
   8274     uint64_t d0, d1;
   8275 
   8276     /*
   8277      * Load 8 bytes at a time.  Use little-endian load, then for
   8278      * big-endian target, we must then bswap the two words.
   8279      */
   8280     d0 = cpu_ldq_le_data_ra(env, addr + 0, ra);
   8281     d1 = cpu_ldq_le_data_ra(env, addr + 8, ra);
   8282 #if TARGET_BIG_ENDIAN
   8283     d0 = bswap32x2(d0);
   8284     d1 = bswap32x2(d1);
   8285 #endif
   8286     pwd->d[0] = d0;
   8287     pwd->d[1] = d1;
   8288 }
   8289 
   8290 void helper_msa_ld_d(CPUMIPSState *env, uint32_t wd,
   8291                      target_ulong addr)
   8292 {
   8293     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   8294     uintptr_t ra = GETPC();
   8295     uint64_t d0, d1;
   8296 
   8297     d0 = cpu_ldq_data_ra(env, addr + 0, ra);
   8298     d1 = cpu_ldq_data_ra(env, addr + 8, ra);
   8299     pwd->d[0] = d0;
   8300     pwd->d[1] = d1;
   8301 }
   8302 
   8303 #define MSA_PAGESPAN(x) \
   8304         ((((x) & ~TARGET_PAGE_MASK) + MSA_WRLEN / 8 - 1) >= TARGET_PAGE_SIZE)
   8305 
   8306 static inline void ensure_writable_pages(CPUMIPSState *env,
   8307                                          target_ulong addr,
   8308                                          int mmu_idx,
   8309                                          uintptr_t retaddr)
   8310 {
   8311     /* FIXME: Probe the actual accesses (pass and use a size) */
   8312     if (unlikely(MSA_PAGESPAN(addr))) {
   8313         /* first page */
   8314         probe_write(env, addr, 0, mmu_idx, retaddr);
   8315         /* second page */
   8316         addr = (addr & TARGET_PAGE_MASK) + TARGET_PAGE_SIZE;
   8317         probe_write(env, addr, 0, mmu_idx, retaddr);
   8318     }
   8319 }
   8320 
   8321 void helper_msa_st_b(CPUMIPSState *env, uint32_t wd,
   8322                      target_ulong addr)
   8323 {
   8324     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   8325     int mmu_idx = cpu_mmu_index(env, false);
   8326     uintptr_t ra = GETPC();
   8327 
   8328     ensure_writable_pages(env, addr, mmu_idx, ra);
   8329 
   8330     /* Store 8 bytes at a time.  Vector element ordering makes this LE.  */
   8331     cpu_stq_le_data_ra(env, addr + 0, pwd->d[0], ra);
   8332     cpu_stq_le_data_ra(env, addr + 8, pwd->d[1], ra);
   8333 }
   8334 
   8335 void helper_msa_st_h(CPUMIPSState *env, uint32_t wd,
   8336                      target_ulong addr)
   8337 {
   8338     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   8339     int mmu_idx = cpu_mmu_index(env, false);
   8340     uintptr_t ra = GETPC();
   8341     uint64_t d0, d1;
   8342 
   8343     ensure_writable_pages(env, addr, mmu_idx, ra);
   8344 
   8345     /* Store 8 bytes at a time.  See helper_msa_ld_h. */
   8346     d0 = pwd->d[0];
   8347     d1 = pwd->d[1];
   8348 #if TARGET_BIG_ENDIAN
   8349     d0 = bswap16x4(d0);
   8350     d1 = bswap16x4(d1);
   8351 #endif
   8352     cpu_stq_le_data_ra(env, addr + 0, d0, ra);
   8353     cpu_stq_le_data_ra(env, addr + 8, d1, ra);
   8354 }
   8355 
   8356 void helper_msa_st_w(CPUMIPSState *env, uint32_t wd,
   8357                      target_ulong addr)
   8358 {
   8359     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   8360     int mmu_idx = cpu_mmu_index(env, false);
   8361     uintptr_t ra = GETPC();
   8362     uint64_t d0, d1;
   8363 
   8364     ensure_writable_pages(env, addr, mmu_idx, ra);
   8365 
   8366     /* Store 8 bytes at a time.  See helper_msa_ld_w. */
   8367     d0 = pwd->d[0];
   8368     d1 = pwd->d[1];
   8369 #if TARGET_BIG_ENDIAN
   8370     d0 = bswap32x2(d0);
   8371     d1 = bswap32x2(d1);
   8372 #endif
   8373     cpu_stq_le_data_ra(env, addr + 0, d0, ra);
   8374     cpu_stq_le_data_ra(env, addr + 8, d1, ra);
   8375 }
   8376 
   8377 void helper_msa_st_d(CPUMIPSState *env, uint32_t wd,
   8378                      target_ulong addr)
   8379 {
   8380     wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
   8381     int mmu_idx = cpu_mmu_index(env, false);
   8382     uintptr_t ra = GETPC();
   8383 
   8384     ensure_writable_pages(env, addr, mmu_idx, GETPC());
   8385 
   8386     cpu_stq_data_ra(env, addr + 0, pwd->d[0], ra);
   8387     cpu_stq_data_ra(env, addr + 8, pwd->d[1], ra);
   8388 }