translate-mve.c - qemu - FORK: QEMU emulator

translate-mve.c (76274B)
      1 /*
      2  *  ARM translation: M-profile MVE instructions
      3  *
      4  *  Copyright (c) 2021 Linaro, Ltd.
      5  *
      6  * This library is free software; you can redistribute it and/or
      7  * modify it under the terms of the GNU Lesser General Public
      8  * License as published by the Free Software Foundation; either
      9  * version 2.1 of the License, or (at your option) any later version.
     10  *
     11  * This library is distributed in the hope that it will be useful,
     12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
     13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
     14  * Lesser General Public License for more details.
     15  *
     16  * You should have received a copy of the GNU Lesser General Public
     17  * License along with this library; if not, see <http://www.gnu.org/licenses/>.
     18  */
     19 
     20 #include "qemu/osdep.h"
     21 #include "tcg/tcg-op.h"
     22 #include "tcg/tcg-op-gvec.h"
     23 #include "exec/exec-all.h"
     24 #include "exec/gen-icount.h"
     25 #include "translate.h"
     26 #include "translate-a32.h"
     27 
     28 static inline int vidup_imm(DisasContext *s, int x)
     29 {
     30     return 1 << x;
     31 }
     32 
     33 /* Include the generated decoder */
     34 #include "decode-mve.c.inc"
     35 
     36 typedef void MVEGenLdStFn(TCGv_ptr, TCGv_ptr, TCGv_i32);
     37 typedef void MVEGenLdStSGFn(TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_i32);
     38 typedef void MVEGenLdStIlFn(TCGv_ptr, TCGv_i32, TCGv_i32);
     39 typedef void MVEGenOneOpFn(TCGv_ptr, TCGv_ptr, TCGv_ptr);
     40 typedef void MVEGenTwoOpFn(TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_ptr);
     41 typedef void MVEGenTwoOpScalarFn(TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_i32);
     42 typedef void MVEGenTwoOpShiftFn(TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_i32);
     43 typedef void MVEGenLongDualAccOpFn(TCGv_i64, TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_i64);
     44 typedef void MVEGenVADDVFn(TCGv_i32, TCGv_ptr, TCGv_ptr, TCGv_i32);
     45 typedef void MVEGenOneOpImmFn(TCGv_ptr, TCGv_ptr, TCGv_i64);
     46 typedef void MVEGenVIDUPFn(TCGv_i32, TCGv_ptr, TCGv_ptr, TCGv_i32, TCGv_i32);
     47 typedef void MVEGenVIWDUPFn(TCGv_i32, TCGv_ptr, TCGv_ptr, TCGv_i32, TCGv_i32, TCGv_i32);
     48 typedef void MVEGenCmpFn(TCGv_ptr, TCGv_ptr, TCGv_ptr);
     49 typedef void MVEGenScalarCmpFn(TCGv_ptr, TCGv_ptr, TCGv_i32);
     50 typedef void MVEGenVABAVFn(TCGv_i32, TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_i32);
     51 typedef void MVEGenDualAccOpFn(TCGv_i32, TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_i32);
     52 typedef void MVEGenVCVTRmodeFn(TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_i32);
     53 
     54 /* Return the offset of a Qn register (same semantics as aa32_vfp_qreg()) */
     55 static inline long mve_qreg_offset(unsigned reg)
     56 {
     57     return offsetof(CPUARMState, vfp.zregs[reg].d[0]);
     58 }
     59 
     60 static TCGv_ptr mve_qreg_ptr(unsigned reg)
     61 {
     62     TCGv_ptr ret = tcg_temp_new_ptr();
     63     tcg_gen_addi_ptr(ret, cpu_env, mve_qreg_offset(reg));
     64     return ret;
     65 }
     66 
     67 static bool mve_no_predication(DisasContext *s)
     68 {
     69     /*
     70      * Return true if we are executing the entire MVE instruction
     71      * with no predication or partial-execution, and so we can safely
     72      * use an inline TCG vector implementation.
     73      */
     74     return s->eci == 0 && s->mve_no_pred;
     75 }
     76 
     77 static bool mve_check_qreg_bank(DisasContext *s, int qmask)
     78 {
     79     /*
     80      * Check whether Qregs are in range. For v8.1M only Q0..Q7
     81      * are supported, see VFPSmallRegisterBank().
     82      */
     83     return qmask < 8;
     84 }
     85 
     86 bool mve_eci_check(DisasContext *s)
     87 {
     88     /*
     89      * This is a beatwise insn: check that ECI is valid (not a
     90      * reserved value) and note that we are handling it.
     91      * Return true if OK, false if we generated an exception.
     92      */
     93     s->eci_handled = true;
     94     switch (s->eci) {
     95     case ECI_NONE:
     96     case ECI_A0:
     97     case ECI_A0A1:
     98     case ECI_A0A1A2:
     99     case ECI_A0A1A2B0:
    100         return true;
    101     default:
    102         /* Reserved value: INVSTATE UsageFault */
    103         gen_exception_insn(s, 0, EXCP_INVSTATE, syn_uncategorized());
    104         return false;
    105     }
    106 }
    107 
    108 void mve_update_eci(DisasContext *s)
    109 {
    110     /*
    111      * The helper function will always update the CPUState field,
    112      * so we only need to update the DisasContext field.
    113      */
    114     if (s->eci) {
    115         s->eci = (s->eci == ECI_A0A1A2B0) ? ECI_A0 : ECI_NONE;
    116     }
    117 }
    118 
    119 void mve_update_and_store_eci(DisasContext *s)
    120 {
    121     /*
    122      * For insns which don't call a helper function that will call
    123      * mve_advance_vpt(), this version updates s->eci and also stores
    124      * it out to the CPUState field.
    125      */
    126     if (s->eci) {
    127         mve_update_eci(s);
    128         store_cpu_field(tcg_constant_i32(s->eci << 4), condexec_bits);
    129     }
    130 }
    131 
    132 static bool mve_skip_first_beat(DisasContext *s)
    133 {
    134     /* Return true if PSR.ECI says we must skip the first beat of this insn */
    135     switch (s->eci) {
    136     case ECI_NONE:
    137         return false;
    138     case ECI_A0:
    139     case ECI_A0A1:
    140     case ECI_A0A1A2:
    141     case ECI_A0A1A2B0:
    142         return true;
    143     default:
    144         g_assert_not_reached();
    145     }
    146 }
    147 
    148 static bool do_ldst(DisasContext *s, arg_VLDR_VSTR *a, MVEGenLdStFn *fn,
    149                     unsigned msize)
    150 {
    151     TCGv_i32 addr;
    152     uint32_t offset;
    153     TCGv_ptr qreg;
    154 
    155     if (!dc_isar_feature(aa32_mve, s) ||
    156         !mve_check_qreg_bank(s, a->qd) ||
    157         !fn) {
    158         return false;
    159     }
    160 
    161     /* CONSTRAINED UNPREDICTABLE: we choose to UNDEF */
    162     if (a->rn == 15 || (a->rn == 13 && a->w)) {
    163         return false;
    164     }
    165 
    166     if (!mve_eci_check(s) || !vfp_access_check(s)) {
    167         return true;
    168     }
    169 
    170     offset = a->imm << msize;
    171     if (!a->a) {
    172         offset = -offset;
    173     }
    174     addr = load_reg(s, a->rn);
    175     if (a->p) {
    176         tcg_gen_addi_i32(addr, addr, offset);
    177     }
    178 
    179     qreg = mve_qreg_ptr(a->qd);
    180     fn(cpu_env, qreg, addr);
    181     tcg_temp_free_ptr(qreg);
    182 
    183     /*
    184      * Writeback always happens after the last beat of the insn,
    185      * regardless of predication
    186      */
    187     if (a->w) {
    188         if (!a->p) {
    189             tcg_gen_addi_i32(addr, addr, offset);
    190         }
    191         store_reg(s, a->rn, addr);
    192     } else {
    193         tcg_temp_free_i32(addr);
    194     }
    195     mve_update_eci(s);
    196     return true;
    197 }
    198 
    199 static bool trans_VLDR_VSTR(DisasContext *s, arg_VLDR_VSTR *a)
    200 {
    201     static MVEGenLdStFn * const ldstfns[4][2] = {
    202         { gen_helper_mve_vstrb, gen_helper_mve_vldrb },
    203         { gen_helper_mve_vstrh, gen_helper_mve_vldrh },
    204         { gen_helper_mve_vstrw, gen_helper_mve_vldrw },
    205         { NULL, NULL }
    206     };
    207     return do_ldst(s, a, ldstfns[a->size][a->l], a->size);
    208 }
    209 
    210 #define DO_VLDST_WIDE_NARROW(OP, SLD, ULD, ST, MSIZE)           \
    211     static bool trans_##OP(DisasContext *s, arg_VLDR_VSTR *a)   \
    212     {                                                           \
    213         static MVEGenLdStFn * const ldstfns[2][2] = {           \
    214             { gen_helper_mve_##ST, gen_helper_mve_##SLD },      \
    215             { NULL, gen_helper_mve_##ULD },                     \
    216         };                                                      \
    217         return do_ldst(s, a, ldstfns[a->u][a->l], MSIZE);       \
    218     }
    219 
    220 DO_VLDST_WIDE_NARROW(VLDSTB_H, vldrb_sh, vldrb_uh, vstrb_h, MO_8)
    221 DO_VLDST_WIDE_NARROW(VLDSTB_W, vldrb_sw, vldrb_uw, vstrb_w, MO_8)
    222 DO_VLDST_WIDE_NARROW(VLDSTH_W, vldrh_sw, vldrh_uw, vstrh_w, MO_16)
    223 
    224 static bool do_ldst_sg(DisasContext *s, arg_vldst_sg *a, MVEGenLdStSGFn fn)
    225 {
    226     TCGv_i32 addr;
    227     TCGv_ptr qd, qm;
    228 
    229     if (!dc_isar_feature(aa32_mve, s) ||
    230         !mve_check_qreg_bank(s, a->qd | a->qm) ||
    231         !fn || a->rn == 15) {
    232         /* Rn case is UNPREDICTABLE */
    233         return false;
    234     }
    235 
    236     if (!mve_eci_check(s) || !vfp_access_check(s)) {
    237         return true;
    238     }
    239 
    240     addr = load_reg(s, a->rn);
    241 
    242     qd = mve_qreg_ptr(a->qd);
    243     qm = mve_qreg_ptr(a->qm);
    244     fn(cpu_env, qd, qm, addr);
    245     tcg_temp_free_ptr(qd);
    246     tcg_temp_free_ptr(qm);
    247     tcg_temp_free_i32(addr);
    248     mve_update_eci(s);
    249     return true;
    250 }
    251 
    252 /*
    253  * The naming scheme here is "vldrb_sg_sh == in-memory byte loads
    254  * signextended to halfword elements in register". _os_ indicates that
    255  * the offsets in Qm should be scaled by the element size.
    256  */
    257 /* This macro is just to make the arrays more compact in these functions */
    258 #define F(N) gen_helper_mve_##N
    259 
    260 /* VLDRB/VSTRB (ie msize 1) with OS=1 is UNPREDICTABLE; we UNDEF */
    261 static bool trans_VLDR_S_sg(DisasContext *s, arg_vldst_sg *a)
    262 {
    263     static MVEGenLdStSGFn * const fns[2][4][4] = { {
    264             { NULL, F(vldrb_sg_sh), F(vldrb_sg_sw), NULL },
    265             { NULL, NULL,           F(vldrh_sg_sw), NULL },
    266             { NULL, NULL,           NULL,           NULL },
    267             { NULL, NULL,           NULL,           NULL }
    268         }, {
    269             { NULL, NULL,              NULL,              NULL },
    270             { NULL, NULL,              F(vldrh_sg_os_sw), NULL },
    271             { NULL, NULL,              NULL,              NULL },
    272             { NULL, NULL,              NULL,              NULL }
    273         }
    274     };
    275     if (a->qd == a->qm) {
    276         return false; /* UNPREDICTABLE */
    277     }
    278     return do_ldst_sg(s, a, fns[a->os][a->msize][a->size]);
    279 }
    280 
    281 static bool trans_VLDR_U_sg(DisasContext *s, arg_vldst_sg *a)
    282 {
    283     static MVEGenLdStSGFn * const fns[2][4][4] = { {
    284             { F(vldrb_sg_ub), F(vldrb_sg_uh), F(vldrb_sg_uw), NULL },
    285             { NULL,           F(vldrh_sg_uh), F(vldrh_sg_uw), NULL },
    286             { NULL,           NULL,           F(vldrw_sg_uw), NULL },
    287             { NULL,           NULL,           NULL,           F(vldrd_sg_ud) }
    288         }, {
    289             { NULL, NULL,              NULL,              NULL },
    290             { NULL, F(vldrh_sg_os_uh), F(vldrh_sg_os_uw), NULL },
    291             { NULL, NULL,              F(vldrw_sg_os_uw), NULL },
    292             { NULL, NULL,              NULL,              F(vldrd_sg_os_ud) }
    293         }
    294     };
    295     if (a->qd == a->qm) {
    296         return false; /* UNPREDICTABLE */
    297     }
    298     return do_ldst_sg(s, a, fns[a->os][a->msize][a->size]);
    299 }
    300 
    301 static bool trans_VSTR_sg(DisasContext *s, arg_vldst_sg *a)
    302 {
    303     static MVEGenLdStSGFn * const fns[2][4][4] = { {
    304             { F(vstrb_sg_ub), F(vstrb_sg_uh), F(vstrb_sg_uw), NULL },
    305             { NULL,           F(vstrh_sg_uh), F(vstrh_sg_uw), NULL },
    306             { NULL,           NULL,           F(vstrw_sg_uw), NULL },
    307             { NULL,           NULL,           NULL,           F(vstrd_sg_ud) }
    308         }, {
    309             { NULL, NULL,              NULL,              NULL },
    310             { NULL, F(vstrh_sg_os_uh), F(vstrh_sg_os_uw), NULL },
    311             { NULL, NULL,              F(vstrw_sg_os_uw), NULL },
    312             { NULL, NULL,              NULL,              F(vstrd_sg_os_ud) }
    313         }
    314     };
    315     return do_ldst_sg(s, a, fns[a->os][a->msize][a->size]);
    316 }
    317 
    318 #undef F
    319 
    320 static bool do_ldst_sg_imm(DisasContext *s, arg_vldst_sg_imm *a,
    321                            MVEGenLdStSGFn *fn, unsigned msize)
    322 {
    323     uint32_t offset;
    324     TCGv_ptr qd, qm;
    325 
    326     if (!dc_isar_feature(aa32_mve, s) ||
    327         !mve_check_qreg_bank(s, a->qd | a->qm) ||
    328         !fn) {
    329         return false;
    330     }
    331 
    332     if (!mve_eci_check(s) || !vfp_access_check(s)) {
    333         return true;
    334     }
    335 
    336     offset = a->imm << msize;
    337     if (!a->a) {
    338         offset = -offset;
    339     }
    340 
    341     qd = mve_qreg_ptr(a->qd);
    342     qm = mve_qreg_ptr(a->qm);
    343     fn(cpu_env, qd, qm, tcg_constant_i32(offset));
    344     tcg_temp_free_ptr(qd);
    345     tcg_temp_free_ptr(qm);
    346     mve_update_eci(s);
    347     return true;
    348 }
    349 
    350 static bool trans_VLDRW_sg_imm(DisasContext *s, arg_vldst_sg_imm *a)
    351 {
    352     static MVEGenLdStSGFn * const fns[] = {
    353         gen_helper_mve_vldrw_sg_uw,
    354         gen_helper_mve_vldrw_sg_wb_uw,
    355     };
    356     if (a->qd == a->qm) {
    357         return false; /* UNPREDICTABLE */
    358     }
    359     return do_ldst_sg_imm(s, a, fns[a->w], MO_32);
    360 }
    361 
    362 static bool trans_VLDRD_sg_imm(DisasContext *s, arg_vldst_sg_imm *a)
    363 {
    364     static MVEGenLdStSGFn * const fns[] = {
    365         gen_helper_mve_vldrd_sg_ud,
    366         gen_helper_mve_vldrd_sg_wb_ud,
    367     };
    368     if (a->qd == a->qm) {
    369         return false; /* UNPREDICTABLE */
    370     }
    371     return do_ldst_sg_imm(s, a, fns[a->w], MO_64);
    372 }
    373 
    374 static bool trans_VSTRW_sg_imm(DisasContext *s, arg_vldst_sg_imm *a)
    375 {
    376     static MVEGenLdStSGFn * const fns[] = {
    377         gen_helper_mve_vstrw_sg_uw,
    378         gen_helper_mve_vstrw_sg_wb_uw,
    379     };
    380     return do_ldst_sg_imm(s, a, fns[a->w], MO_32);
    381 }
    382 
    383 static bool trans_VSTRD_sg_imm(DisasContext *s, arg_vldst_sg_imm *a)
    384 {
    385     static MVEGenLdStSGFn * const fns[] = {
    386         gen_helper_mve_vstrd_sg_ud,
    387         gen_helper_mve_vstrd_sg_wb_ud,
    388     };
    389     return do_ldst_sg_imm(s, a, fns[a->w], MO_64);
    390 }
    391 
    392 static bool do_vldst_il(DisasContext *s, arg_vldst_il *a, MVEGenLdStIlFn *fn,
    393                         int addrinc)
    394 {
    395     TCGv_i32 rn;
    396 
    397     if (!dc_isar_feature(aa32_mve, s) ||
    398         !mve_check_qreg_bank(s, a->qd) ||
    399         !fn || (a->rn == 13 && a->w) || a->rn == 15) {
    400         /* Variously UNPREDICTABLE or UNDEF or related-encoding */
    401         return false;
    402     }
    403     if (!mve_eci_check(s) || !vfp_access_check(s)) {
    404         return true;
    405     }
    406 
    407     rn = load_reg(s, a->rn);
    408     /*
    409      * We pass the index of Qd, not a pointer, because the helper must
    410      * access multiple Q registers starting at Qd and working up.
    411      */
    412     fn(cpu_env, tcg_constant_i32(a->qd), rn);
    413 
    414     if (a->w) {
    415         tcg_gen_addi_i32(rn, rn, addrinc);
    416         store_reg(s, a->rn, rn);
    417     } else {
    418         tcg_temp_free_i32(rn);
    419     }
    420     mve_update_and_store_eci(s);
    421     return true;
    422 }
    423 
    424 /* This macro is just to make the arrays more compact in these functions */
    425 #define F(N) gen_helper_mve_##N
    426 
    427 static bool trans_VLD2(DisasContext *s, arg_vldst_il *a)
    428 {
    429     static MVEGenLdStIlFn * const fns[4][4] = {
    430         { F(vld20b), F(vld20h), F(vld20w), NULL, },
    431         { F(vld21b), F(vld21h), F(vld21w), NULL, },
    432         { NULL, NULL, NULL, NULL },
    433         { NULL, NULL, NULL, NULL },
    434     };
    435     if (a->qd > 6) {
    436         return false;
    437     }
    438     return do_vldst_il(s, a, fns[a->pat][a->size], 32);
    439 }
    440 
    441 static bool trans_VLD4(DisasContext *s, arg_vldst_il *a)
    442 {
    443     static MVEGenLdStIlFn * const fns[4][4] = {
    444         { F(vld40b), F(vld40h), F(vld40w), NULL, },
    445         { F(vld41b), F(vld41h), F(vld41w), NULL, },
    446         { F(vld42b), F(vld42h), F(vld42w), NULL, },
    447         { F(vld43b), F(vld43h), F(vld43w), NULL, },
    448     };
    449     if (a->qd > 4) {
    450         return false;
    451     }
    452     return do_vldst_il(s, a, fns[a->pat][a->size], 64);
    453 }
    454 
    455 static bool trans_VST2(DisasContext *s, arg_vldst_il *a)
    456 {
    457     static MVEGenLdStIlFn * const fns[4][4] = {
    458         { F(vst20b), F(vst20h), F(vst20w), NULL, },
    459         { F(vst21b), F(vst21h), F(vst21w), NULL, },
    460         { NULL, NULL, NULL, NULL },
    461         { NULL, NULL, NULL, NULL },
    462     };
    463     if (a->qd > 6) {
    464         return false;
    465     }
    466     return do_vldst_il(s, a, fns[a->pat][a->size], 32);
    467 }
    468 
    469 static bool trans_VST4(DisasContext *s, arg_vldst_il *a)
    470 {
    471     static MVEGenLdStIlFn * const fns[4][4] = {
    472         { F(vst40b), F(vst40h), F(vst40w), NULL, },
    473         { F(vst41b), F(vst41h), F(vst41w), NULL, },
    474         { F(vst42b), F(vst42h), F(vst42w), NULL, },
    475         { F(vst43b), F(vst43h), F(vst43w), NULL, },
    476     };
    477     if (a->qd > 4) {
    478         return false;
    479     }
    480     return do_vldst_il(s, a, fns[a->pat][a->size], 64);
    481 }
    482 
    483 #undef F
    484 
    485 static bool trans_VDUP(DisasContext *s, arg_VDUP *a)
    486 {
    487     TCGv_ptr qd;
    488     TCGv_i32 rt;
    489 
    490     if (!dc_isar_feature(aa32_mve, s) ||
    491         !mve_check_qreg_bank(s, a->qd)) {
    492         return false;
    493     }
    494     if (a->rt == 13 || a->rt == 15) {
    495         /* UNPREDICTABLE; we choose to UNDEF */
    496         return false;
    497     }
    498     if (!mve_eci_check(s) || !vfp_access_check(s)) {
    499         return true;
    500     }
    501 
    502     rt = load_reg(s, a->rt);
    503     if (mve_no_predication(s)) {
    504         tcg_gen_gvec_dup_i32(a->size, mve_qreg_offset(a->qd), 16, 16, rt);
    505     } else {
    506         qd = mve_qreg_ptr(a->qd);
    507         tcg_gen_dup_i32(a->size, rt, rt);
    508         gen_helper_mve_vdup(cpu_env, qd, rt);
    509         tcg_temp_free_ptr(qd);
    510     }
    511     tcg_temp_free_i32(rt);
    512     mve_update_eci(s);
    513     return true;
    514 }
    515 
    516 static bool do_1op_vec(DisasContext *s, arg_1op *a, MVEGenOneOpFn fn,
    517                        GVecGen2Fn vecfn)
    518 {
    519     TCGv_ptr qd, qm;
    520 
    521     if (!dc_isar_feature(aa32_mve, s) ||
    522         !mve_check_qreg_bank(s, a->qd | a->qm) ||
    523         !fn) {
    524         return false;
    525     }
    526 
    527     if (!mve_eci_check(s) || !vfp_access_check(s)) {
    528         return true;
    529     }
    530 
    531     if (vecfn && mve_no_predication(s)) {
    532         vecfn(a->size, mve_qreg_offset(a->qd), mve_qreg_offset(a->qm), 16, 16);
    533     } else {
    534         qd = mve_qreg_ptr(a->qd);
    535         qm = mve_qreg_ptr(a->qm);
    536         fn(cpu_env, qd, qm);
    537         tcg_temp_free_ptr(qd);
    538         tcg_temp_free_ptr(qm);
    539     }
    540     mve_update_eci(s);
    541     return true;
    542 }
    543 
    544 static bool do_1op(DisasContext *s, arg_1op *a, MVEGenOneOpFn fn)
    545 {
    546     return do_1op_vec(s, a, fn, NULL);
    547 }
    548 
    549 #define DO_1OP_VEC(INSN, FN, VECFN)                             \
    550     static bool trans_##INSN(DisasContext *s, arg_1op *a)       \
    551     {                                                           \
    552         static MVEGenOneOpFn * const fns[] = {                  \
    553             gen_helper_mve_##FN##b,                             \
    554             gen_helper_mve_##FN##h,                             \
    555             gen_helper_mve_##FN##w,                             \
    556             NULL,                                               \
    557         };                                                      \
    558         return do_1op_vec(s, a, fns[a->size], VECFN);           \
    559     }
    560 
    561 #define DO_1OP(INSN, FN) DO_1OP_VEC(INSN, FN, NULL)
    562 
    563 DO_1OP(VCLZ, vclz)
    564 DO_1OP(VCLS, vcls)
    565 DO_1OP_VEC(VABS, vabs, tcg_gen_gvec_abs)
    566 DO_1OP_VEC(VNEG, vneg, tcg_gen_gvec_neg)
    567 DO_1OP(VQABS, vqabs)
    568 DO_1OP(VQNEG, vqneg)
    569 DO_1OP(VMAXA, vmaxa)
    570 DO_1OP(VMINA, vmina)
    571 
    572 /*
    573  * For simple float/int conversions we use the fixed-point
    574  * conversion helpers with a zero shift count
    575  */
    576 #define DO_VCVT(INSN, HFN, SFN)                                         \
    577     static void gen_##INSN##h(TCGv_ptr env, TCGv_ptr qd, TCGv_ptr qm)   \
    578     {                                                                   \
    579         gen_helper_mve_##HFN(env, qd, qm, tcg_constant_i32(0));         \
    580     }                                                                   \
    581     static void gen_##INSN##s(TCGv_ptr env, TCGv_ptr qd, TCGv_ptr qm)   \
    582     {                                                                   \
    583         gen_helper_mve_##SFN(env, qd, qm, tcg_constant_i32(0));         \
    584     }                                                                   \
    585     static bool trans_##INSN(DisasContext *s, arg_1op *a)               \
    586     {                                                                   \
    587         static MVEGenOneOpFn * const fns[] = {                          \
    588             NULL,                                                       \
    589             gen_##INSN##h,                                              \
    590             gen_##INSN##s,                                              \
    591             NULL,                                                       \
    592         };                                                              \
    593         if (!dc_isar_feature(aa32_mve_fp, s)) {                         \
    594             return false;                                               \
    595         }                                                               \
    596         return do_1op(s, a, fns[a->size]);                              \
    597     }
    598 
    599 DO_VCVT(VCVT_SF, vcvt_sh, vcvt_sf)
    600 DO_VCVT(VCVT_UF, vcvt_uh, vcvt_uf)
    601 DO_VCVT(VCVT_FS, vcvt_hs, vcvt_fs)
    602 DO_VCVT(VCVT_FU, vcvt_hu, vcvt_fu)
    603 
    604 static bool do_vcvt_rmode(DisasContext *s, arg_1op *a,
    605                           enum arm_fprounding rmode, bool u)
    606 {
    607     /*
    608      * Handle VCVT fp to int with specified rounding mode.
    609      * This is a 1op fn but we must pass the rounding mode as
    610      * an immediate to the helper.
    611      */
    612     TCGv_ptr qd, qm;
    613     static MVEGenVCVTRmodeFn * const fns[4][2] = {
    614         { NULL, NULL },
    615         { gen_helper_mve_vcvt_rm_sh, gen_helper_mve_vcvt_rm_uh },
    616         { gen_helper_mve_vcvt_rm_ss, gen_helper_mve_vcvt_rm_us },
    617         { NULL, NULL },
    618     };
    619     MVEGenVCVTRmodeFn *fn = fns[a->size][u];
    620 
    621     if (!dc_isar_feature(aa32_mve_fp, s) ||
    622         !mve_check_qreg_bank(s, a->qd | a->qm) ||
    623         !fn) {
    624         return false;
    625     }
    626 
    627     if (!mve_eci_check(s) || !vfp_access_check(s)) {
    628         return true;
    629     }
    630 
    631     qd = mve_qreg_ptr(a->qd);
    632     qm = mve_qreg_ptr(a->qm);
    633     fn(cpu_env, qd, qm, tcg_constant_i32(arm_rmode_to_sf(rmode)));
    634     tcg_temp_free_ptr(qd);
    635     tcg_temp_free_ptr(qm);
    636     mve_update_eci(s);
    637     return true;
    638 }
    639 
    640 #define DO_VCVT_RMODE(INSN, RMODE, U)                           \
    641     static bool trans_##INSN(DisasContext *s, arg_1op *a)       \
    642     {                                                           \
    643         return do_vcvt_rmode(s, a, RMODE, U);                   \
    644     }                                                           \
    645 
    646 DO_VCVT_RMODE(VCVTAS, FPROUNDING_TIEAWAY, false)
    647 DO_VCVT_RMODE(VCVTAU, FPROUNDING_TIEAWAY, true)
    648 DO_VCVT_RMODE(VCVTNS, FPROUNDING_TIEEVEN, false)
    649 DO_VCVT_RMODE(VCVTNU, FPROUNDING_TIEEVEN, true)
    650 DO_VCVT_RMODE(VCVTPS, FPROUNDING_POSINF, false)
    651 DO_VCVT_RMODE(VCVTPU, FPROUNDING_POSINF, true)
    652 DO_VCVT_RMODE(VCVTMS, FPROUNDING_NEGINF, false)
    653 DO_VCVT_RMODE(VCVTMU, FPROUNDING_NEGINF, true)
    654 
    655 #define DO_VCVT_SH(INSN, FN)                                    \
    656     static bool trans_##INSN(DisasContext *s, arg_1op *a)       \
    657     {                                                           \
    658         if (!dc_isar_feature(aa32_mve_fp, s)) {                 \
    659             return false;                                       \
    660         }                                                       \
    661         return do_1op(s, a, gen_helper_mve_##FN);               \
    662     }                                                           \
    663 
    664 DO_VCVT_SH(VCVTB_SH, vcvtb_sh)
    665 DO_VCVT_SH(VCVTT_SH, vcvtt_sh)
    666 DO_VCVT_SH(VCVTB_HS, vcvtb_hs)
    667 DO_VCVT_SH(VCVTT_HS, vcvtt_hs)
    668 
    669 #define DO_VRINT(INSN, RMODE)                                           \
    670     static void gen_##INSN##h(TCGv_ptr env, TCGv_ptr qd, TCGv_ptr qm)   \
    671     {                                                                   \
    672         gen_helper_mve_vrint_rm_h(env, qd, qm,                          \
    673                                   tcg_constant_i32(arm_rmode_to_sf(RMODE))); \
    674     }                                                                   \
    675     static void gen_##INSN##s(TCGv_ptr env, TCGv_ptr qd, TCGv_ptr qm)   \
    676     {                                                                   \
    677         gen_helper_mve_vrint_rm_s(env, qd, qm,                          \
    678                                   tcg_constant_i32(arm_rmode_to_sf(RMODE))); \
    679     }                                                                   \
    680     static bool trans_##INSN(DisasContext *s, arg_1op *a)               \
    681     {                                                                   \
    682         static MVEGenOneOpFn * const fns[] = {                          \
    683             NULL,                                                       \
    684             gen_##INSN##h,                                              \
    685             gen_##INSN##s,                                              \
    686             NULL,                                                       \
    687         };                                                              \
    688         if (!dc_isar_feature(aa32_mve_fp, s)) {                         \
    689             return false;                                               \
    690         }                                                               \
    691         return do_1op(s, a, fns[a->size]);                              \
    692     }
    693 
    694 DO_VRINT(VRINTN, FPROUNDING_TIEEVEN)
    695 DO_VRINT(VRINTA, FPROUNDING_TIEAWAY)
    696 DO_VRINT(VRINTZ, FPROUNDING_ZERO)
    697 DO_VRINT(VRINTM, FPROUNDING_NEGINF)
    698 DO_VRINT(VRINTP, FPROUNDING_POSINF)
    699 
    700 static bool trans_VRINTX(DisasContext *s, arg_1op *a)
    701 {
    702     static MVEGenOneOpFn * const fns[] = {
    703         NULL,
    704         gen_helper_mve_vrintx_h,
    705         gen_helper_mve_vrintx_s,
    706         NULL,
    707     };
    708     if (!dc_isar_feature(aa32_mve_fp, s)) {
    709         return false;
    710     }
    711     return do_1op(s, a, fns[a->size]);
    712 }
    713 
    714 /* Narrowing moves: only size 0 and 1 are valid */
    715 #define DO_VMOVN(INSN, FN) \
    716     static bool trans_##INSN(DisasContext *s, arg_1op *a)       \
    717     {                                                           \
    718         static MVEGenOneOpFn * const fns[] = {                  \
    719             gen_helper_mve_##FN##b,                             \
    720             gen_helper_mve_##FN##h,                             \
    721             NULL,                                               \
    722             NULL,                                               \
    723         };                                                      \
    724         return do_1op(s, a, fns[a->size]);                      \
    725     }
    726 
    727 DO_VMOVN(VMOVNB, vmovnb)
    728 DO_VMOVN(VMOVNT, vmovnt)
    729 DO_VMOVN(VQMOVUNB, vqmovunb)
    730 DO_VMOVN(VQMOVUNT, vqmovunt)
    731 DO_VMOVN(VQMOVN_BS, vqmovnbs)
    732 DO_VMOVN(VQMOVN_TS, vqmovnts)
    733 DO_VMOVN(VQMOVN_BU, vqmovnbu)
    734 DO_VMOVN(VQMOVN_TU, vqmovntu)
    735 
    736 static bool trans_VREV16(DisasContext *s, arg_1op *a)
    737 {
    738     static MVEGenOneOpFn * const fns[] = {
    739         gen_helper_mve_vrev16b,
    740         NULL,
    741         NULL,
    742         NULL,
    743     };
    744     return do_1op(s, a, fns[a->size]);
    745 }
    746 
    747 static bool trans_VREV32(DisasContext *s, arg_1op *a)
    748 {
    749     static MVEGenOneOpFn * const fns[] = {
    750         gen_helper_mve_vrev32b,
    751         gen_helper_mve_vrev32h,
    752         NULL,
    753         NULL,
    754     };
    755     return do_1op(s, a, fns[a->size]);
    756 }
    757 
    758 static bool trans_VREV64(DisasContext *s, arg_1op *a)
    759 {
    760     static MVEGenOneOpFn * const fns[] = {
    761         gen_helper_mve_vrev64b,
    762         gen_helper_mve_vrev64h,
    763         gen_helper_mve_vrev64w,
    764         NULL,
    765     };
    766     return do_1op(s, a, fns[a->size]);
    767 }
    768 
    769 static bool trans_VMVN(DisasContext *s, arg_1op *a)
    770 {
    771     return do_1op_vec(s, a, gen_helper_mve_vmvn, tcg_gen_gvec_not);
    772 }
    773 
    774 static bool trans_VABS_fp(DisasContext *s, arg_1op *a)
    775 {
    776     static MVEGenOneOpFn * const fns[] = {
    777         NULL,
    778         gen_helper_mve_vfabsh,
    779         gen_helper_mve_vfabss,
    780         NULL,
    781     };
    782     if (!dc_isar_feature(aa32_mve_fp, s)) {
    783         return false;
    784     }
    785     return do_1op(s, a, fns[a->size]);
    786 }
    787 
    788 static bool trans_VNEG_fp(DisasContext *s, arg_1op *a)
    789 {
    790     static MVEGenOneOpFn * const fns[] = {
    791         NULL,
    792         gen_helper_mve_vfnegh,
    793         gen_helper_mve_vfnegs,
    794         NULL,
    795     };
    796     if (!dc_isar_feature(aa32_mve_fp, s)) {
    797         return false;
    798     }
    799     return do_1op(s, a, fns[a->size]);
    800 }
    801 
    802 static bool do_2op_vec(DisasContext *s, arg_2op *a, MVEGenTwoOpFn fn,
    803                        GVecGen3Fn *vecfn)
    804 {
    805     TCGv_ptr qd, qn, qm;
    806 
    807     if (!dc_isar_feature(aa32_mve, s) ||
    808         !mve_check_qreg_bank(s, a->qd | a->qn | a->qm) ||
    809         !fn) {
    810         return false;
    811     }
    812     if (!mve_eci_check(s) || !vfp_access_check(s)) {
    813         return true;
    814     }
    815 
    816     if (vecfn && mve_no_predication(s)) {
    817         vecfn(a->size, mve_qreg_offset(a->qd), mve_qreg_offset(a->qn),
    818               mve_qreg_offset(a->qm), 16, 16);
    819     } else {
    820         qd = mve_qreg_ptr(a->qd);
    821         qn = mve_qreg_ptr(a->qn);
    822         qm = mve_qreg_ptr(a->qm);
    823         fn(cpu_env, qd, qn, qm);
    824         tcg_temp_free_ptr(qd);
    825         tcg_temp_free_ptr(qn);
    826         tcg_temp_free_ptr(qm);
    827     }
    828     mve_update_eci(s);
    829     return true;
    830 }
    831 
    832 static bool do_2op(DisasContext *s, arg_2op *a, MVEGenTwoOpFn *fn)
    833 {
    834     return do_2op_vec(s, a, fn, NULL);
    835 }
    836 
    837 #define DO_LOGIC(INSN, HELPER, VECFN)                           \
    838     static bool trans_##INSN(DisasContext *s, arg_2op *a)       \
    839     {                                                           \
    840         return do_2op_vec(s, a, HELPER, VECFN);                 \
    841     }
    842 
    843 DO_LOGIC(VAND, gen_helper_mve_vand, tcg_gen_gvec_and)
    844 DO_LOGIC(VBIC, gen_helper_mve_vbic, tcg_gen_gvec_andc)
    845 DO_LOGIC(VORR, gen_helper_mve_vorr, tcg_gen_gvec_or)
    846 DO_LOGIC(VORN, gen_helper_mve_vorn, tcg_gen_gvec_orc)
    847 DO_LOGIC(VEOR, gen_helper_mve_veor, tcg_gen_gvec_xor)
    848 
    849 static bool trans_VPSEL(DisasContext *s, arg_2op *a)
    850 {
    851     /* This insn updates predication bits */
    852     s->base.is_jmp = DISAS_UPDATE_NOCHAIN;
    853     return do_2op(s, a, gen_helper_mve_vpsel);
    854 }
    855 
    856 #define DO_2OP_VEC(INSN, FN, VECFN)                             \
    857     static bool trans_##INSN(DisasContext *s, arg_2op *a)       \
    858     {                                                           \
    859         static MVEGenTwoOpFn * const fns[] = {                  \
    860             gen_helper_mve_##FN##b,                             \
    861             gen_helper_mve_##FN##h,                             \
    862             gen_helper_mve_##FN##w,                             \
    863             NULL,                                               \
    864         };                                                      \
    865         return do_2op_vec(s, a, fns[a->size], VECFN);           \
    866     }
    867 
    868 #define DO_2OP(INSN, FN) DO_2OP_VEC(INSN, FN, NULL)
    869 
    870 DO_2OP_VEC(VADD, vadd, tcg_gen_gvec_add)
    871 DO_2OP_VEC(VSUB, vsub, tcg_gen_gvec_sub)
    872 DO_2OP_VEC(VMUL, vmul, tcg_gen_gvec_mul)
    873 DO_2OP(VMULH_S, vmulhs)
    874 DO_2OP(VMULH_U, vmulhu)
    875 DO_2OP(VRMULH_S, vrmulhs)
    876 DO_2OP(VRMULH_U, vrmulhu)
    877 DO_2OP_VEC(VMAX_S, vmaxs, tcg_gen_gvec_smax)
    878 DO_2OP_VEC(VMAX_U, vmaxu, tcg_gen_gvec_umax)
    879 DO_2OP_VEC(VMIN_S, vmins, tcg_gen_gvec_smin)
    880 DO_2OP_VEC(VMIN_U, vminu, tcg_gen_gvec_umin)
    881 DO_2OP(VABD_S, vabds)
    882 DO_2OP(VABD_U, vabdu)
    883 DO_2OP(VHADD_S, vhadds)
    884 DO_2OP(VHADD_U, vhaddu)
    885 DO_2OP(VHSUB_S, vhsubs)
    886 DO_2OP(VHSUB_U, vhsubu)
    887 DO_2OP(VMULL_BS, vmullbs)
    888 DO_2OP(VMULL_BU, vmullbu)
    889 DO_2OP(VMULL_TS, vmullts)
    890 DO_2OP(VMULL_TU, vmulltu)
    891 DO_2OP(VQDMULH, vqdmulh)
    892 DO_2OP(VQRDMULH, vqrdmulh)
    893 DO_2OP(VQADD_S, vqadds)
    894 DO_2OP(VQADD_U, vqaddu)
    895 DO_2OP(VQSUB_S, vqsubs)
    896 DO_2OP(VQSUB_U, vqsubu)
    897 DO_2OP(VSHL_S, vshls)
    898 DO_2OP(VSHL_U, vshlu)
    899 DO_2OP(VRSHL_S, vrshls)
    900 DO_2OP(VRSHL_U, vrshlu)
    901 DO_2OP(VQSHL_S, vqshls)
    902 DO_2OP(VQSHL_U, vqshlu)
    903 DO_2OP(VQRSHL_S, vqrshls)
    904 DO_2OP(VQRSHL_U, vqrshlu)
    905 DO_2OP(VQDMLADH, vqdmladh)
    906 DO_2OP(VQDMLADHX, vqdmladhx)
    907 DO_2OP(VQRDMLADH, vqrdmladh)
    908 DO_2OP(VQRDMLADHX, vqrdmladhx)
    909 DO_2OP(VQDMLSDH, vqdmlsdh)
    910 DO_2OP(VQDMLSDHX, vqdmlsdhx)
    911 DO_2OP(VQRDMLSDH, vqrdmlsdh)
    912 DO_2OP(VQRDMLSDHX, vqrdmlsdhx)
    913 DO_2OP(VRHADD_S, vrhadds)
    914 DO_2OP(VRHADD_U, vrhaddu)
    915 /*
    916  * VCADD Qd == Qm at size MO_32 is UNPREDICTABLE; we choose not to diagnose
    917  * so we can reuse the DO_2OP macro. (Our implementation calculates the
    918  * "expected" results in this case.) Similarly for VHCADD.
    919  */
    920 DO_2OP(VCADD90, vcadd90)
    921 DO_2OP(VCADD270, vcadd270)
    922 DO_2OP(VHCADD90, vhcadd90)
    923 DO_2OP(VHCADD270, vhcadd270)
    924 
    925 static bool trans_VQDMULLB(DisasContext *s, arg_2op *a)
    926 {
    927     static MVEGenTwoOpFn * const fns[] = {
    928         NULL,
    929         gen_helper_mve_vqdmullbh,
    930         gen_helper_mve_vqdmullbw,
    931         NULL,
    932     };
    933     if (a->size == MO_32 && (a->qd == a->qm || a->qd == a->qn)) {
    934         /* UNPREDICTABLE; we choose to undef */
    935         return false;
    936     }
    937     return do_2op(s, a, fns[a->size]);
    938 }
    939 
    940 static bool trans_VQDMULLT(DisasContext *s, arg_2op *a)
    941 {
    942     static MVEGenTwoOpFn * const fns[] = {
    943         NULL,
    944         gen_helper_mve_vqdmullth,
    945         gen_helper_mve_vqdmulltw,
    946         NULL,
    947     };
    948     if (a->size == MO_32 && (a->qd == a->qm || a->qd == a->qn)) {
    949         /* UNPREDICTABLE; we choose to undef */
    950         return false;
    951     }
    952     return do_2op(s, a, fns[a->size]);
    953 }
    954 
    955 static bool trans_VMULLP_B(DisasContext *s, arg_2op *a)
    956 {
    957     /*
    958      * Note that a->size indicates the output size, ie VMULL.P8
    959      * is the 8x8->16 operation and a->size is MO_16; VMULL.P16
    960      * is the 16x16->32 operation and a->size is MO_32.
    961      */
    962     static MVEGenTwoOpFn * const fns[] = {
    963         NULL,
    964         gen_helper_mve_vmullpbh,
    965         gen_helper_mve_vmullpbw,
    966         NULL,
    967     };
    968     return do_2op(s, a, fns[a->size]);
    969 }
    970 
    971 static bool trans_VMULLP_T(DisasContext *s, arg_2op *a)
    972 {
    973     /* a->size is as for trans_VMULLP_B */
    974     static MVEGenTwoOpFn * const fns[] = {
    975         NULL,
    976         gen_helper_mve_vmullpth,
    977         gen_helper_mve_vmullptw,
    978         NULL,
    979     };
    980     return do_2op(s, a, fns[a->size]);
    981 }
    982 
    983 /*
    984  * VADC and VSBC: these perform an add-with-carry or subtract-with-carry
    985  * of the 32-bit elements in each lane of the input vectors, where the
    986  * carry-out of each add is the carry-in of the next.  The initial carry
    987  * input is either fixed (0 for VADCI, 1 for VSBCI) or is from FPSCR.C
    988  * (for VADC and VSBC); the carry out at the end is written back to FPSCR.C.
    989  * These insns are subject to beat-wise execution.  Partial execution
    990  * of an I=1 (initial carry input fixed) insn which does not
    991  * execute the first beat must start with the current FPSCR.NZCV
    992  * value, not the fixed constant input.
    993  */
    994 static bool trans_VADC(DisasContext *s, arg_2op *a)
    995 {
    996     return do_2op(s, a, gen_helper_mve_vadc);
    997 }
    998 
    999 static bool trans_VADCI(DisasContext *s, arg_2op *a)
   1000 {
   1001     if (mve_skip_first_beat(s)) {
   1002         return trans_VADC(s, a);
   1003     }
   1004     return do_2op(s, a, gen_helper_mve_vadci);
   1005 }
   1006 
   1007 static bool trans_VSBC(DisasContext *s, arg_2op *a)
   1008 {
   1009     return do_2op(s, a, gen_helper_mve_vsbc);
   1010 }
   1011 
   1012 static bool trans_VSBCI(DisasContext *s, arg_2op *a)
   1013 {
   1014     if (mve_skip_first_beat(s)) {
   1015         return trans_VSBC(s, a);
   1016     }
   1017     return do_2op(s, a, gen_helper_mve_vsbci);
   1018 }
   1019 
   1020 #define DO_2OP_FP(INSN, FN)                                     \
   1021     static bool trans_##INSN(DisasContext *s, arg_2op *a)       \
   1022     {                                                           \
   1023         static MVEGenTwoOpFn * const fns[] = {                  \
   1024             NULL,                                               \
   1025             gen_helper_mve_##FN##h,                             \
   1026             gen_helper_mve_##FN##s,                             \
   1027             NULL,                                               \
   1028         };                                                      \
   1029         if (!dc_isar_feature(aa32_mve_fp, s)) {                 \
   1030             return false;                                       \
   1031         }                                                       \
   1032         return do_2op(s, a, fns[a->size]);                      \
   1033     }
   1034 
   1035 DO_2OP_FP(VADD_fp, vfadd)
   1036 DO_2OP_FP(VSUB_fp, vfsub)
   1037 DO_2OP_FP(VMUL_fp, vfmul)
   1038 DO_2OP_FP(VABD_fp, vfabd)
   1039 DO_2OP_FP(VMAXNM, vmaxnm)
   1040 DO_2OP_FP(VMINNM, vminnm)
   1041 DO_2OP_FP(VCADD90_fp, vfcadd90)
   1042 DO_2OP_FP(VCADD270_fp, vfcadd270)
   1043 DO_2OP_FP(VFMA, vfma)
   1044 DO_2OP_FP(VFMS, vfms)
   1045 DO_2OP_FP(VCMUL0, vcmul0)
   1046 DO_2OP_FP(VCMUL90, vcmul90)
   1047 DO_2OP_FP(VCMUL180, vcmul180)
   1048 DO_2OP_FP(VCMUL270, vcmul270)
   1049 DO_2OP_FP(VCMLA0, vcmla0)
   1050 DO_2OP_FP(VCMLA90, vcmla90)
   1051 DO_2OP_FP(VCMLA180, vcmla180)
   1052 DO_2OP_FP(VCMLA270, vcmla270)
   1053 DO_2OP_FP(VMAXNMA, vmaxnma)
   1054 DO_2OP_FP(VMINNMA, vminnma)
   1055 
   1056 static bool do_2op_scalar(DisasContext *s, arg_2scalar *a,
   1057                           MVEGenTwoOpScalarFn fn)
   1058 {
   1059     TCGv_ptr qd, qn;
   1060     TCGv_i32 rm;
   1061 
   1062     if (!dc_isar_feature(aa32_mve, s) ||
   1063         !mve_check_qreg_bank(s, a->qd | a->qn) ||
   1064         !fn) {
   1065         return false;
   1066     }
   1067     if (a->rm == 13 || a->rm == 15) {
   1068         /* UNPREDICTABLE */
   1069         return false;
   1070     }
   1071     if (!mve_eci_check(s) || !vfp_access_check(s)) {
   1072         return true;
   1073     }
   1074 
   1075     qd = mve_qreg_ptr(a->qd);
   1076     qn = mve_qreg_ptr(a->qn);
   1077     rm = load_reg(s, a->rm);
   1078     fn(cpu_env, qd, qn, rm);
   1079     tcg_temp_free_i32(rm);
   1080     tcg_temp_free_ptr(qd);
   1081     tcg_temp_free_ptr(qn);
   1082     mve_update_eci(s);
   1083     return true;
   1084 }
   1085 
   1086 #define DO_2OP_SCALAR(INSN, FN)                                 \
   1087     static bool trans_##INSN(DisasContext *s, arg_2scalar *a)   \
   1088     {                                                           \
   1089         static MVEGenTwoOpScalarFn * const fns[] = {            \
   1090             gen_helper_mve_##FN##b,                             \
   1091             gen_helper_mve_##FN##h,                             \
   1092             gen_helper_mve_##FN##w,                             \
   1093             NULL,                                               \
   1094         };                                                      \
   1095         return do_2op_scalar(s, a, fns[a->size]);               \
   1096     }
   1097 
   1098 DO_2OP_SCALAR(VADD_scalar, vadd_scalar)
   1099 DO_2OP_SCALAR(VSUB_scalar, vsub_scalar)
   1100 DO_2OP_SCALAR(VMUL_scalar, vmul_scalar)
   1101 DO_2OP_SCALAR(VHADD_S_scalar, vhadds_scalar)
   1102 DO_2OP_SCALAR(VHADD_U_scalar, vhaddu_scalar)
   1103 DO_2OP_SCALAR(VHSUB_S_scalar, vhsubs_scalar)
   1104 DO_2OP_SCALAR(VHSUB_U_scalar, vhsubu_scalar)
   1105 DO_2OP_SCALAR(VQADD_S_scalar, vqadds_scalar)
   1106 DO_2OP_SCALAR(VQADD_U_scalar, vqaddu_scalar)
   1107 DO_2OP_SCALAR(VQSUB_S_scalar, vqsubs_scalar)
   1108 DO_2OP_SCALAR(VQSUB_U_scalar, vqsubu_scalar)
   1109 DO_2OP_SCALAR(VQDMULH_scalar, vqdmulh_scalar)
   1110 DO_2OP_SCALAR(VQRDMULH_scalar, vqrdmulh_scalar)
   1111 DO_2OP_SCALAR(VBRSR, vbrsr)
   1112 DO_2OP_SCALAR(VMLA, vmla)
   1113 DO_2OP_SCALAR(VMLAS, vmlas)
   1114 DO_2OP_SCALAR(VQDMLAH, vqdmlah)
   1115 DO_2OP_SCALAR(VQRDMLAH, vqrdmlah)
   1116 DO_2OP_SCALAR(VQDMLASH, vqdmlash)
   1117 DO_2OP_SCALAR(VQRDMLASH, vqrdmlash)
   1118 
   1119 static bool trans_VQDMULLB_scalar(DisasContext *s, arg_2scalar *a)
   1120 {
   1121     static MVEGenTwoOpScalarFn * const fns[] = {
   1122         NULL,
   1123         gen_helper_mve_vqdmullb_scalarh,
   1124         gen_helper_mve_vqdmullb_scalarw,
   1125         NULL,
   1126     };
   1127     if (a->qd == a->qn && a->size == MO_32) {
   1128         /* UNPREDICTABLE; we choose to undef */
   1129         return false;
   1130     }
   1131     return do_2op_scalar(s, a, fns[a->size]);
   1132 }
   1133 
   1134 static bool trans_VQDMULLT_scalar(DisasContext *s, arg_2scalar *a)
   1135 {
   1136     static MVEGenTwoOpScalarFn * const fns[] = {
   1137         NULL,
   1138         gen_helper_mve_vqdmullt_scalarh,
   1139         gen_helper_mve_vqdmullt_scalarw,
   1140         NULL,
   1141     };
   1142     if (a->qd == a->qn && a->size == MO_32) {
   1143         /* UNPREDICTABLE; we choose to undef */
   1144         return false;
   1145     }
   1146     return do_2op_scalar(s, a, fns[a->size]);
   1147 }
   1148 
   1149 
   1150 #define DO_2OP_FP_SCALAR(INSN, FN)                              \
   1151     static bool trans_##INSN(DisasContext *s, arg_2scalar *a)   \
   1152     {                                                           \
   1153         static MVEGenTwoOpScalarFn * const fns[] = {            \
   1154             NULL,                                               \
   1155             gen_helper_mve_##FN##h,                             \
   1156             gen_helper_mve_##FN##s,                             \
   1157             NULL,                                               \
   1158         };                                                      \
   1159         if (!dc_isar_feature(aa32_mve_fp, s)) {                 \
   1160             return false;                                       \
   1161         }                                                       \
   1162         return do_2op_scalar(s, a, fns[a->size]);               \
   1163     }
   1164 
   1165 DO_2OP_FP_SCALAR(VADD_fp_scalar, vfadd_scalar)
   1166 DO_2OP_FP_SCALAR(VSUB_fp_scalar, vfsub_scalar)
   1167 DO_2OP_FP_SCALAR(VMUL_fp_scalar, vfmul_scalar)
   1168 DO_2OP_FP_SCALAR(VFMA_scalar, vfma_scalar)
   1169 DO_2OP_FP_SCALAR(VFMAS_scalar, vfmas_scalar)
   1170 
   1171 static bool do_long_dual_acc(DisasContext *s, arg_vmlaldav *a,
   1172                              MVEGenLongDualAccOpFn *fn)
   1173 {
   1174     TCGv_ptr qn, qm;
   1175     TCGv_i64 rda;
   1176     TCGv_i32 rdalo, rdahi;
   1177 
   1178     if (!dc_isar_feature(aa32_mve, s) ||
   1179         !mve_check_qreg_bank(s, a->qn | a->qm) ||
   1180         !fn) {
   1181         return false;
   1182     }
   1183     /*
   1184      * rdahi == 13 is UNPREDICTABLE; rdahi == 15 is a related
   1185      * encoding; rdalo always has bit 0 clear so cannot be 13 or 15.
   1186      */
   1187     if (a->rdahi == 13 || a->rdahi == 15) {
   1188         return false;
   1189     }
   1190     if (!mve_eci_check(s) || !vfp_access_check(s)) {
   1191         return true;
   1192     }
   1193 
   1194     qn = mve_qreg_ptr(a->qn);
   1195     qm = mve_qreg_ptr(a->qm);
   1196 
   1197     /*
   1198      * This insn is subject to beat-wise execution. Partial execution
   1199      * of an A=0 (no-accumulate) insn which does not execute the first
   1200      * beat must start with the current rda value, not 0.
   1201      */
   1202     if (a->a || mve_skip_first_beat(s)) {
   1203         rda = tcg_temp_new_i64();
   1204         rdalo = load_reg(s, a->rdalo);
   1205         rdahi = load_reg(s, a->rdahi);
   1206         tcg_gen_concat_i32_i64(rda, rdalo, rdahi);
   1207         tcg_temp_free_i32(rdalo);
   1208         tcg_temp_free_i32(rdahi);
   1209     } else {
   1210         rda = tcg_const_i64(0);
   1211     }
   1212 
   1213     fn(rda, cpu_env, qn, qm, rda);
   1214     tcg_temp_free_ptr(qn);
   1215     tcg_temp_free_ptr(qm);
   1216 
   1217     rdalo = tcg_temp_new_i32();
   1218     rdahi = tcg_temp_new_i32();
   1219     tcg_gen_extrl_i64_i32(rdalo, rda);
   1220     tcg_gen_extrh_i64_i32(rdahi, rda);
   1221     store_reg(s, a->rdalo, rdalo);
   1222     store_reg(s, a->rdahi, rdahi);
   1223     tcg_temp_free_i64(rda);
   1224     mve_update_eci(s);
   1225     return true;
   1226 }
   1227 
   1228 static bool trans_VMLALDAV_S(DisasContext *s, arg_vmlaldav *a)
   1229 {
   1230     static MVEGenLongDualAccOpFn * const fns[4][2] = {
   1231         { NULL, NULL },
   1232         { gen_helper_mve_vmlaldavsh, gen_helper_mve_vmlaldavxsh },
   1233         { gen_helper_mve_vmlaldavsw, gen_helper_mve_vmlaldavxsw },
   1234         { NULL, NULL },
   1235     };
   1236     return do_long_dual_acc(s, a, fns[a->size][a->x]);
   1237 }
   1238 
   1239 static bool trans_VMLALDAV_U(DisasContext *s, arg_vmlaldav *a)
   1240 {
   1241     static MVEGenLongDualAccOpFn * const fns[4][2] = {
   1242         { NULL, NULL },
   1243         { gen_helper_mve_vmlaldavuh, NULL },
   1244         { gen_helper_mve_vmlaldavuw, NULL },
   1245         { NULL, NULL },
   1246     };
   1247     return do_long_dual_acc(s, a, fns[a->size][a->x]);
   1248 }
   1249 
   1250 static bool trans_VMLSLDAV(DisasContext *s, arg_vmlaldav *a)
   1251 {
   1252     static MVEGenLongDualAccOpFn * const fns[4][2] = {
   1253         { NULL, NULL },
   1254         { gen_helper_mve_vmlsldavsh, gen_helper_mve_vmlsldavxsh },
   1255         { gen_helper_mve_vmlsldavsw, gen_helper_mve_vmlsldavxsw },
   1256         { NULL, NULL },
   1257     };
   1258     return do_long_dual_acc(s, a, fns[a->size][a->x]);
   1259 }
   1260 
   1261 static bool trans_VRMLALDAVH_S(DisasContext *s, arg_vmlaldav *a)
   1262 {
   1263     static MVEGenLongDualAccOpFn * const fns[] = {
   1264         gen_helper_mve_vrmlaldavhsw, gen_helper_mve_vrmlaldavhxsw,
   1265     };
   1266     return do_long_dual_acc(s, a, fns[a->x]);
   1267 }
   1268 
   1269 static bool trans_VRMLALDAVH_U(DisasContext *s, arg_vmlaldav *a)
   1270 {
   1271     static MVEGenLongDualAccOpFn * const fns[] = {
   1272         gen_helper_mve_vrmlaldavhuw, NULL,
   1273     };
   1274     return do_long_dual_acc(s, a, fns[a->x]);
   1275 }
   1276 
   1277 static bool trans_VRMLSLDAVH(DisasContext *s, arg_vmlaldav *a)
   1278 {
   1279     static MVEGenLongDualAccOpFn * const fns[] = {
   1280         gen_helper_mve_vrmlsldavhsw, gen_helper_mve_vrmlsldavhxsw,
   1281     };
   1282     return do_long_dual_acc(s, a, fns[a->x]);
   1283 }
   1284 
   1285 static bool do_dual_acc(DisasContext *s, arg_vmladav *a, MVEGenDualAccOpFn *fn)
   1286 {
   1287     TCGv_ptr qn, qm;
   1288     TCGv_i32 rda;
   1289 
   1290     if (!dc_isar_feature(aa32_mve, s) ||
   1291         !mve_check_qreg_bank(s, a->qn) ||
   1292         !fn) {
   1293         return false;
   1294     }
   1295     if (!mve_eci_check(s) || !vfp_access_check(s)) {
   1296         return true;
   1297     }
   1298 
   1299     qn = mve_qreg_ptr(a->qn);
   1300     qm = mve_qreg_ptr(a->qm);
   1301 
   1302     /*
   1303      * This insn is subject to beat-wise execution. Partial execution
   1304      * of an A=0 (no-accumulate) insn which does not execute the first
   1305      * beat must start with the current rda value, not 0.
   1306      */
   1307     if (a->a || mve_skip_first_beat(s)) {
   1308         rda = load_reg(s, a->rda);
   1309     } else {
   1310         rda = tcg_const_i32(0);
   1311     }
   1312 
   1313     fn(rda, cpu_env, qn, qm, rda);
   1314     store_reg(s, a->rda, rda);
   1315     tcg_temp_free_ptr(qn);
   1316     tcg_temp_free_ptr(qm);
   1317 
   1318     mve_update_eci(s);
   1319     return true;
   1320 }
   1321 
   1322 #define DO_DUAL_ACC(INSN, FN)                                           \
   1323     static bool trans_##INSN(DisasContext *s, arg_vmladav *a)           \
   1324     {                                                                   \
   1325         static MVEGenDualAccOpFn * const fns[4][2] = {                  \
   1326             { gen_helper_mve_##FN##b, gen_helper_mve_##FN##xb },        \
   1327             { gen_helper_mve_##FN##h, gen_helper_mve_##FN##xh },        \
   1328             { gen_helper_mve_##FN##w, gen_helper_mve_##FN##xw },        \
   1329             { NULL, NULL },                                             \
   1330         };                                                              \
   1331         return do_dual_acc(s, a, fns[a->size][a->x]);                   \
   1332     }
   1333 
   1334 DO_DUAL_ACC(VMLADAV_S, vmladavs)
   1335 DO_DUAL_ACC(VMLSDAV, vmlsdav)
   1336 
   1337 static bool trans_VMLADAV_U(DisasContext *s, arg_vmladav *a)
   1338 {
   1339     static MVEGenDualAccOpFn * const fns[4][2] = {
   1340         { gen_helper_mve_vmladavub, NULL },
   1341         { gen_helper_mve_vmladavuh, NULL },
   1342         { gen_helper_mve_vmladavuw, NULL },
   1343         { NULL, NULL },
   1344     };
   1345     return do_dual_acc(s, a, fns[a->size][a->x]);
   1346 }
   1347 
   1348 static void gen_vpst(DisasContext *s, uint32_t mask)
   1349 {
   1350     /*
   1351      * Set the VPR mask fields. We take advantage of MASK01 and MASK23
   1352      * being adjacent fields in the register.
   1353      *
   1354      * Updating the masks is not predicated, but it is subject to beat-wise
   1355      * execution, and the mask is updated on the odd-numbered beats.
   1356      * So if PSR.ECI says we should skip beat 1, we mustn't update the
   1357      * 01 mask field.
   1358      */
   1359     TCGv_i32 vpr = load_cpu_field(v7m.vpr);
   1360     switch (s->eci) {
   1361     case ECI_NONE:
   1362     case ECI_A0:
   1363         /* Update both 01 and 23 fields */
   1364         tcg_gen_deposit_i32(vpr, vpr,
   1365                             tcg_constant_i32(mask | (mask << 4)),
   1366                             R_V7M_VPR_MASK01_SHIFT,
   1367                             R_V7M_VPR_MASK01_LENGTH + R_V7M_VPR_MASK23_LENGTH);
   1368         break;
   1369     case ECI_A0A1:
   1370     case ECI_A0A1A2:
   1371     case ECI_A0A1A2B0:
   1372         /* Update only the 23 mask field */
   1373         tcg_gen_deposit_i32(vpr, vpr,
   1374                             tcg_constant_i32(mask),
   1375                             R_V7M_VPR_MASK23_SHIFT, R_V7M_VPR_MASK23_LENGTH);
   1376         break;
   1377     default:
   1378         g_assert_not_reached();
   1379     }
   1380     store_cpu_field(vpr, v7m.vpr);
   1381 }
   1382 
   1383 static bool trans_VPST(DisasContext *s, arg_VPST *a)
   1384 {
   1385     /* mask == 0 is a "related encoding" */
   1386     if (!dc_isar_feature(aa32_mve, s) || !a->mask) {
   1387         return false;
   1388     }
   1389     if (!mve_eci_check(s) || !vfp_access_check(s)) {
   1390         return true;
   1391     }
   1392     gen_vpst(s, a->mask);
   1393     mve_update_and_store_eci(s);
   1394     return true;
   1395 }
   1396 
   1397 static bool trans_VPNOT(DisasContext *s, arg_VPNOT *a)
   1398 {
   1399     /*
   1400      * Invert the predicate in VPR.P0. We have call out to
   1401      * a helper because this insn itself is beatwise and can
   1402      * be predicated.
   1403      */
   1404     if (!dc_isar_feature(aa32_mve, s)) {
   1405         return false;
   1406     }
   1407     if (!mve_eci_check(s) || !vfp_access_check(s)) {
   1408         return true;
   1409     }
   1410 
   1411     gen_helper_mve_vpnot(cpu_env);
   1412     /* This insn updates predication bits */
   1413     s->base.is_jmp = DISAS_UPDATE_NOCHAIN;
   1414     mve_update_eci(s);
   1415     return true;
   1416 }
   1417 
   1418 static bool trans_VADDV(DisasContext *s, arg_VADDV *a)
   1419 {
   1420     /* VADDV: vector add across vector */
   1421     static MVEGenVADDVFn * const fns[4][2] = {
   1422         { gen_helper_mve_vaddvsb, gen_helper_mve_vaddvub },
   1423         { gen_helper_mve_vaddvsh, gen_helper_mve_vaddvuh },
   1424         { gen_helper_mve_vaddvsw, gen_helper_mve_vaddvuw },
   1425         { NULL, NULL }
   1426     };
   1427     TCGv_ptr qm;
   1428     TCGv_i32 rda;
   1429 
   1430     if (!dc_isar_feature(aa32_mve, s) ||
   1431         a->size == 3) {
   1432         return false;
   1433     }
   1434     if (!mve_eci_check(s) || !vfp_access_check(s)) {
   1435         return true;
   1436     }
   1437 
   1438     /*
   1439      * This insn is subject to beat-wise execution. Partial execution
   1440      * of an A=0 (no-accumulate) insn which does not execute the first
   1441      * beat must start with the current value of Rda, not zero.
   1442      */
   1443     if (a->a || mve_skip_first_beat(s)) {
   1444         /* Accumulate input from Rda */
   1445         rda = load_reg(s, a->rda);
   1446     } else {
   1447         /* Accumulate starting at zero */
   1448         rda = tcg_const_i32(0);
   1449     }
   1450 
   1451     qm = mve_qreg_ptr(a->qm);
   1452     fns[a->size][a->u](rda, cpu_env, qm, rda);
   1453     store_reg(s, a->rda, rda);
   1454     tcg_temp_free_ptr(qm);
   1455 
   1456     mve_update_eci(s);
   1457     return true;
   1458 }
   1459 
   1460 static bool trans_VADDLV(DisasContext *s, arg_VADDLV *a)
   1461 {
   1462     /*
   1463      * Vector Add Long Across Vector: accumulate the 32-bit
   1464      * elements of the vector into a 64-bit result stored in
   1465      * a pair of general-purpose registers.
   1466      * No need to check Qm's bank: it is only 3 bits in decode.
   1467      */
   1468     TCGv_ptr qm;
   1469     TCGv_i64 rda;
   1470     TCGv_i32 rdalo, rdahi;
   1471 
   1472     if (!dc_isar_feature(aa32_mve, s)) {
   1473         return false;
   1474     }
   1475     /*
   1476      * rdahi == 13 is UNPREDICTABLE; rdahi == 15 is a related
   1477      * encoding; rdalo always has bit 0 clear so cannot be 13 or 15.
   1478      */
   1479     if (a->rdahi == 13 || a->rdahi == 15) {
   1480         return false;
   1481     }
   1482     if (!mve_eci_check(s) || !vfp_access_check(s)) {
   1483         return true;
   1484     }
   1485 
   1486     /*
   1487      * This insn is subject to beat-wise execution. Partial execution
   1488      * of an A=0 (no-accumulate) insn which does not execute the first
   1489      * beat must start with the current value of RdaHi:RdaLo, not zero.
   1490      */
   1491     if (a->a || mve_skip_first_beat(s)) {
   1492         /* Accumulate input from RdaHi:RdaLo */
   1493         rda = tcg_temp_new_i64();
   1494         rdalo = load_reg(s, a->rdalo);
   1495         rdahi = load_reg(s, a->rdahi);
   1496         tcg_gen_concat_i32_i64(rda, rdalo, rdahi);
   1497         tcg_temp_free_i32(rdalo);
   1498         tcg_temp_free_i32(rdahi);
   1499     } else {
   1500         /* Accumulate starting at zero */
   1501         rda = tcg_const_i64(0);
   1502     }
   1503 
   1504     qm = mve_qreg_ptr(a->qm);
   1505     if (a->u) {
   1506         gen_helper_mve_vaddlv_u(rda, cpu_env, qm, rda);
   1507     } else {
   1508         gen_helper_mve_vaddlv_s(rda, cpu_env, qm, rda);
   1509     }
   1510     tcg_temp_free_ptr(qm);
   1511 
   1512     rdalo = tcg_temp_new_i32();
   1513     rdahi = tcg_temp_new_i32();
   1514     tcg_gen_extrl_i64_i32(rdalo, rda);
   1515     tcg_gen_extrh_i64_i32(rdahi, rda);
   1516     store_reg(s, a->rdalo, rdalo);
   1517     store_reg(s, a->rdahi, rdahi);
   1518     tcg_temp_free_i64(rda);
   1519     mve_update_eci(s);
   1520     return true;
   1521 }
   1522 
   1523 static bool do_1imm(DisasContext *s, arg_1imm *a, MVEGenOneOpImmFn *fn,
   1524                     GVecGen2iFn *vecfn)
   1525 {
   1526     TCGv_ptr qd;
   1527     uint64_t imm;
   1528 
   1529     if (!dc_isar_feature(aa32_mve, s) ||
   1530         !mve_check_qreg_bank(s, a->qd) ||
   1531         !fn) {
   1532         return false;
   1533     }
   1534     if (!mve_eci_check(s) || !vfp_access_check(s)) {
   1535         return true;
   1536     }
   1537 
   1538     imm = asimd_imm_const(a->imm, a->cmode, a->op);
   1539 
   1540     if (vecfn && mve_no_predication(s)) {
   1541         vecfn(MO_64, mve_qreg_offset(a->qd), mve_qreg_offset(a->qd),
   1542               imm, 16, 16);
   1543     } else {
   1544         qd = mve_qreg_ptr(a->qd);
   1545         fn(cpu_env, qd, tcg_constant_i64(imm));
   1546         tcg_temp_free_ptr(qd);
   1547     }
   1548     mve_update_eci(s);
   1549     return true;
   1550 }
   1551 
   1552 static void gen_gvec_vmovi(unsigned vece, uint32_t dofs, uint32_t aofs,
   1553                            int64_t c, uint32_t oprsz, uint32_t maxsz)
   1554 {
   1555     tcg_gen_gvec_dup_imm(vece, dofs, oprsz, maxsz, c);
   1556 }
   1557 
   1558 static bool trans_Vimm_1r(DisasContext *s, arg_1imm *a)
   1559 {
   1560     /* Handle decode of cmode/op here between VORR/VBIC/VMOV */
   1561     MVEGenOneOpImmFn *fn;
   1562     GVecGen2iFn *vecfn;
   1563 
   1564     if ((a->cmode & 1) && a->cmode < 12) {
   1565         if (a->op) {
   1566             /*
   1567              * For op=1, the immediate will be inverted by asimd_imm_const(),
   1568              * so the VBIC becomes a logical AND operation.
   1569              */
   1570             fn = gen_helper_mve_vandi;
   1571             vecfn = tcg_gen_gvec_andi;
   1572         } else {
   1573             fn = gen_helper_mve_vorri;
   1574             vecfn = tcg_gen_gvec_ori;
   1575         }
   1576     } else {
   1577         /* There is one unallocated cmode/op combination in this space */
   1578         if (a->cmode == 15 && a->op == 1) {
   1579             return false;
   1580         }
   1581         /* asimd_imm_const() sorts out VMVNI vs VMOVI for us */
   1582         fn = gen_helper_mve_vmovi;
   1583         vecfn = gen_gvec_vmovi;
   1584     }
   1585     return do_1imm(s, a, fn, vecfn);
   1586 }
   1587 
   1588 static bool do_2shift_vec(DisasContext *s, arg_2shift *a, MVEGenTwoOpShiftFn fn,
   1589                           bool negateshift, GVecGen2iFn vecfn)
   1590 {
   1591     TCGv_ptr qd, qm;
   1592     int shift = a->shift;
   1593 
   1594     if (!dc_isar_feature(aa32_mve, s) ||
   1595         !mve_check_qreg_bank(s, a->qd | a->qm) ||
   1596         !fn) {
   1597         return false;
   1598     }
   1599     if (!mve_eci_check(s) || !vfp_access_check(s)) {
   1600         return true;
   1601     }
   1602 
   1603     /*
   1604      * When we handle a right shift insn using a left-shift helper
   1605      * which permits a negative shift count to indicate a right-shift,
   1606      * we must negate the shift count.
   1607      */
   1608     if (negateshift) {
   1609         shift = -shift;
   1610     }
   1611 
   1612     if (vecfn && mve_no_predication(s)) {
   1613         vecfn(a->size, mve_qreg_offset(a->qd), mve_qreg_offset(a->qm),
   1614               shift, 16, 16);
   1615     } else {
   1616         qd = mve_qreg_ptr(a->qd);
   1617         qm = mve_qreg_ptr(a->qm);
   1618         fn(cpu_env, qd, qm, tcg_constant_i32(shift));
   1619         tcg_temp_free_ptr(qd);
   1620         tcg_temp_free_ptr(qm);
   1621     }
   1622     mve_update_eci(s);
   1623     return true;
   1624 }
   1625 
   1626 static bool do_2shift(DisasContext *s, arg_2shift *a, MVEGenTwoOpShiftFn fn,
   1627                       bool negateshift)
   1628 {
   1629     return do_2shift_vec(s, a, fn, negateshift, NULL);
   1630 }
   1631 
   1632 #define DO_2SHIFT_VEC(INSN, FN, NEGATESHIFT, VECFN)                     \
   1633     static bool trans_##INSN(DisasContext *s, arg_2shift *a)            \
   1634     {                                                                   \
   1635         static MVEGenTwoOpShiftFn * const fns[] = {                     \
   1636             gen_helper_mve_##FN##b,                                     \
   1637             gen_helper_mve_##FN##h,                                     \
   1638             gen_helper_mve_##FN##w,                                     \
   1639             NULL,                                                       \
   1640         };                                                              \
   1641         return do_2shift_vec(s, a, fns[a->size], NEGATESHIFT, VECFN);   \
   1642     }
   1643 
   1644 #define DO_2SHIFT(INSN, FN, NEGATESHIFT)        \
   1645     DO_2SHIFT_VEC(INSN, FN, NEGATESHIFT, NULL)
   1646 
   1647 static void do_gvec_shri_s(unsigned vece, uint32_t dofs, uint32_t aofs,
   1648                            int64_t shift, uint32_t oprsz, uint32_t maxsz)
   1649 {
   1650     /*
   1651      * We get here with a negated shift count, and we must handle
   1652      * shifts by the element size, which tcg_gen_gvec_sari() does not do.
   1653      */
   1654     shift = -shift;
   1655     if (shift == (8 << vece)) {
   1656         shift--;
   1657     }
   1658     tcg_gen_gvec_sari(vece, dofs, aofs, shift, oprsz, maxsz);
   1659 }
   1660 
   1661 static void do_gvec_shri_u(unsigned vece, uint32_t dofs, uint32_t aofs,
   1662                            int64_t shift, uint32_t oprsz, uint32_t maxsz)
   1663 {
   1664     /*
   1665      * We get here with a negated shift count, and we must handle
   1666      * shifts by the element size, which tcg_gen_gvec_shri() does not do.
   1667      */
   1668     shift = -shift;
   1669     if (shift == (8 << vece)) {
   1670         tcg_gen_gvec_dup_imm(vece, dofs, oprsz, maxsz, 0);
   1671     } else {
   1672         tcg_gen_gvec_shri(vece, dofs, aofs, shift, oprsz, maxsz);
   1673     }
   1674 }
   1675 
   1676 DO_2SHIFT_VEC(VSHLI, vshli_u, false, tcg_gen_gvec_shli)
   1677 DO_2SHIFT(VQSHLI_S, vqshli_s, false)
   1678 DO_2SHIFT(VQSHLI_U, vqshli_u, false)
   1679 DO_2SHIFT(VQSHLUI, vqshlui_s, false)
   1680 /* These right shifts use a left-shift helper with negated shift count */
   1681 DO_2SHIFT_VEC(VSHRI_S, vshli_s, true, do_gvec_shri_s)
   1682 DO_2SHIFT_VEC(VSHRI_U, vshli_u, true, do_gvec_shri_u)
   1683 DO_2SHIFT(VRSHRI_S, vrshli_s, true)
   1684 DO_2SHIFT(VRSHRI_U, vrshli_u, true)
   1685 
   1686 DO_2SHIFT_VEC(VSRI, vsri, false, gen_gvec_sri)
   1687 DO_2SHIFT_VEC(VSLI, vsli, false, gen_gvec_sli)
   1688 
   1689 #define DO_2SHIFT_FP(INSN, FN)                                  \
   1690     static bool trans_##INSN(DisasContext *s, arg_2shift *a)    \
   1691     {                                                           \
   1692         if (!dc_isar_feature(aa32_mve_fp, s)) {                 \
   1693             return false;                                       \
   1694         }                                                       \
   1695         return do_2shift(s, a, gen_helper_mve_##FN, false);     \
   1696     }
   1697 
   1698 DO_2SHIFT_FP(VCVT_SH_fixed, vcvt_sh)
   1699 DO_2SHIFT_FP(VCVT_UH_fixed, vcvt_uh)
   1700 DO_2SHIFT_FP(VCVT_HS_fixed, vcvt_hs)
   1701 DO_2SHIFT_FP(VCVT_HU_fixed, vcvt_hu)
   1702 DO_2SHIFT_FP(VCVT_SF_fixed, vcvt_sf)
   1703 DO_2SHIFT_FP(VCVT_UF_fixed, vcvt_uf)
   1704 DO_2SHIFT_FP(VCVT_FS_fixed, vcvt_fs)
   1705 DO_2SHIFT_FP(VCVT_FU_fixed, vcvt_fu)
   1706 
   1707 static bool do_2shift_scalar(DisasContext *s, arg_shl_scalar *a,
   1708                              MVEGenTwoOpShiftFn *fn)
   1709 {
   1710     TCGv_ptr qda;
   1711     TCGv_i32 rm;
   1712 
   1713     if (!dc_isar_feature(aa32_mve, s) ||
   1714         !mve_check_qreg_bank(s, a->qda) ||
   1715         a->rm == 13 || a->rm == 15 || !fn) {
   1716         /* Rm cases are UNPREDICTABLE */
   1717         return false;
   1718     }
   1719     if (!mve_eci_check(s) || !vfp_access_check(s)) {
   1720         return true;
   1721     }
   1722 
   1723     qda = mve_qreg_ptr(a->qda);
   1724     rm = load_reg(s, a->rm);
   1725     fn(cpu_env, qda, qda, rm);
   1726     tcg_temp_free_ptr(qda);
   1727     tcg_temp_free_i32(rm);
   1728     mve_update_eci(s);
   1729     return true;
   1730 }
   1731 
   1732 #define DO_2SHIFT_SCALAR(INSN, FN)                                      \
   1733     static bool trans_##INSN(DisasContext *s, arg_shl_scalar *a)        \
   1734     {                                                                   \
   1735         static MVEGenTwoOpShiftFn * const fns[] = {                     \
   1736             gen_helper_mve_##FN##b,                                     \
   1737             gen_helper_mve_##FN##h,                                     \
   1738             gen_helper_mve_##FN##w,                                     \
   1739             NULL,                                                       \
   1740         };                                                              \
   1741         return do_2shift_scalar(s, a, fns[a->size]);                    \
   1742     }
   1743 
   1744 DO_2SHIFT_SCALAR(VSHL_S_scalar, vshli_s)
   1745 DO_2SHIFT_SCALAR(VSHL_U_scalar, vshli_u)
   1746 DO_2SHIFT_SCALAR(VRSHL_S_scalar, vrshli_s)
   1747 DO_2SHIFT_SCALAR(VRSHL_U_scalar, vrshli_u)
   1748 DO_2SHIFT_SCALAR(VQSHL_S_scalar, vqshli_s)
   1749 DO_2SHIFT_SCALAR(VQSHL_U_scalar, vqshli_u)
   1750 DO_2SHIFT_SCALAR(VQRSHL_S_scalar, vqrshli_s)
   1751 DO_2SHIFT_SCALAR(VQRSHL_U_scalar, vqrshli_u)
   1752 
   1753 #define DO_VSHLL(INSN, FN)                                              \
   1754     static bool trans_##INSN(DisasContext *s, arg_2shift *a)            \
   1755     {                                                                   \
   1756         static MVEGenTwoOpShiftFn * const fns[] = {                     \
   1757             gen_helper_mve_##FN##b,                                     \
   1758             gen_helper_mve_##FN##h,                                     \
   1759         };                                                              \
   1760         return do_2shift_vec(s, a, fns[a->size], false, do_gvec_##FN);  \
   1761     }
   1762 
   1763 /*
   1764  * For the VSHLL vector helpers, the vece is the size of the input
   1765  * (ie MO_8 or MO_16); the helpers want to work in the output size.
   1766  * The shift count can be 0..<input size>, inclusive. (0 is VMOVL.)
   1767  */
   1768 static void do_gvec_vshllbs(unsigned vece, uint32_t dofs, uint32_t aofs,
   1769                             int64_t shift, uint32_t oprsz, uint32_t maxsz)
   1770 {
   1771     unsigned ovece = vece + 1;
   1772     unsigned ibits = vece == MO_8 ? 8 : 16;
   1773     tcg_gen_gvec_shli(ovece, dofs, aofs, ibits, oprsz, maxsz);
   1774     tcg_gen_gvec_sari(ovece, dofs, dofs, ibits - shift, oprsz, maxsz);
   1775 }
   1776 
   1777 static void do_gvec_vshllbu(unsigned vece, uint32_t dofs, uint32_t aofs,
   1778                             int64_t shift, uint32_t oprsz, uint32_t maxsz)
   1779 {
   1780     unsigned ovece = vece + 1;
   1781     tcg_gen_gvec_andi(ovece, dofs, aofs,
   1782                       ovece == MO_16 ? 0xff : 0xffff, oprsz, maxsz);
   1783     tcg_gen_gvec_shli(ovece, dofs, dofs, shift, oprsz, maxsz);
   1784 }
   1785 
   1786 static void do_gvec_vshllts(unsigned vece, uint32_t dofs, uint32_t aofs,
   1787                             int64_t shift, uint32_t oprsz, uint32_t maxsz)
   1788 {
   1789     unsigned ovece = vece + 1;
   1790     unsigned ibits = vece == MO_8 ? 8 : 16;
   1791     if (shift == 0) {
   1792         tcg_gen_gvec_sari(ovece, dofs, aofs, ibits, oprsz, maxsz);
   1793     } else {
   1794         tcg_gen_gvec_andi(ovece, dofs, aofs,
   1795                           ovece == MO_16 ? 0xff00 : 0xffff0000, oprsz, maxsz);
   1796         tcg_gen_gvec_sari(ovece, dofs, dofs, ibits - shift, oprsz, maxsz);
   1797     }
   1798 }
   1799 
   1800 static void do_gvec_vshlltu(unsigned vece, uint32_t dofs, uint32_t aofs,
   1801                             int64_t shift, uint32_t oprsz, uint32_t maxsz)
   1802 {
   1803     unsigned ovece = vece + 1;
   1804     unsigned ibits = vece == MO_8 ? 8 : 16;
   1805     if (shift == 0) {
   1806         tcg_gen_gvec_shri(ovece, dofs, aofs, ibits, oprsz, maxsz);
   1807     } else {
   1808         tcg_gen_gvec_andi(ovece, dofs, aofs,
   1809                           ovece == MO_16 ? 0xff00 : 0xffff0000, oprsz, maxsz);
   1810         tcg_gen_gvec_shri(ovece, dofs, dofs, ibits - shift, oprsz, maxsz);
   1811     }
   1812 }
   1813 
   1814 DO_VSHLL(VSHLL_BS, vshllbs)
   1815 DO_VSHLL(VSHLL_BU, vshllbu)
   1816 DO_VSHLL(VSHLL_TS, vshllts)
   1817 DO_VSHLL(VSHLL_TU, vshlltu)
   1818 
   1819 #define DO_2SHIFT_N(INSN, FN)                                   \
   1820     static bool trans_##INSN(DisasContext *s, arg_2shift *a)    \
   1821     {                                                           \
   1822         static MVEGenTwoOpShiftFn * const fns[] = {             \
   1823             gen_helper_mve_##FN##b,                             \
   1824             gen_helper_mve_##FN##h,                             \
   1825         };                                                      \
   1826         return do_2shift(s, a, fns[a->size], false);            \
   1827     }
   1828 
   1829 DO_2SHIFT_N(VSHRNB, vshrnb)
   1830 DO_2SHIFT_N(VSHRNT, vshrnt)
   1831 DO_2SHIFT_N(VRSHRNB, vrshrnb)
   1832 DO_2SHIFT_N(VRSHRNT, vrshrnt)
   1833 DO_2SHIFT_N(VQSHRNB_S, vqshrnb_s)
   1834 DO_2SHIFT_N(VQSHRNT_S, vqshrnt_s)
   1835 DO_2SHIFT_N(VQSHRNB_U, vqshrnb_u)
   1836 DO_2SHIFT_N(VQSHRNT_U, vqshrnt_u)
   1837 DO_2SHIFT_N(VQSHRUNB, vqshrunb)
   1838 DO_2SHIFT_N(VQSHRUNT, vqshrunt)
   1839 DO_2SHIFT_N(VQRSHRNB_S, vqrshrnb_s)
   1840 DO_2SHIFT_N(VQRSHRNT_S, vqrshrnt_s)
   1841 DO_2SHIFT_N(VQRSHRNB_U, vqrshrnb_u)
   1842 DO_2SHIFT_N(VQRSHRNT_U, vqrshrnt_u)
   1843 DO_2SHIFT_N(VQRSHRUNB, vqrshrunb)
   1844 DO_2SHIFT_N(VQRSHRUNT, vqrshrunt)
   1845 
   1846 static bool trans_VSHLC(DisasContext *s, arg_VSHLC *a)
   1847 {
   1848     /*
   1849      * Whole Vector Left Shift with Carry. The carry is taken
   1850      * from a general purpose register and written back there.
   1851      * An imm of 0 means "shift by 32".
   1852      */
   1853     TCGv_ptr qd;
   1854     TCGv_i32 rdm;
   1855 
   1856     if (!dc_isar_feature(aa32_mve, s) || !mve_check_qreg_bank(s, a->qd)) {
   1857         return false;
   1858     }
   1859     if (a->rdm == 13 || a->rdm == 15) {
   1860         /* CONSTRAINED UNPREDICTABLE: we UNDEF */
   1861         return false;
   1862     }
   1863     if (!mve_eci_check(s) || !vfp_access_check(s)) {
   1864         return true;
   1865     }
   1866 
   1867     qd = mve_qreg_ptr(a->qd);
   1868     rdm = load_reg(s, a->rdm);
   1869     gen_helper_mve_vshlc(rdm, cpu_env, qd, rdm, tcg_constant_i32(a->imm));
   1870     store_reg(s, a->rdm, rdm);
   1871     tcg_temp_free_ptr(qd);
   1872     mve_update_eci(s);
   1873     return true;
   1874 }
   1875 
   1876 static bool do_vidup(DisasContext *s, arg_vidup *a, MVEGenVIDUPFn *fn)
   1877 {
   1878     TCGv_ptr qd;
   1879     TCGv_i32 rn;
   1880 
   1881     /*
   1882      * Vector increment/decrement with wrap and duplicate (VIDUP, VDDUP).
   1883      * This fills the vector with elements of successively increasing
   1884      * or decreasing values, starting from Rn.
   1885      */
   1886     if (!dc_isar_feature(aa32_mve, s) || !mve_check_qreg_bank(s, a->qd)) {
   1887         return false;
   1888     }
   1889     if (a->size == MO_64) {
   1890         /* size 0b11 is another encoding */
   1891         return false;
   1892     }
   1893     if (!mve_eci_check(s) || !vfp_access_check(s)) {
   1894         return true;
   1895     }
   1896 
   1897     qd = mve_qreg_ptr(a->qd);
   1898     rn = load_reg(s, a->rn);
   1899     fn(rn, cpu_env, qd, rn, tcg_constant_i32(a->imm));
   1900     store_reg(s, a->rn, rn);
   1901     tcg_temp_free_ptr(qd);
   1902     mve_update_eci(s);
   1903     return true;
   1904 }
   1905 
   1906 static bool do_viwdup(DisasContext *s, arg_viwdup *a, MVEGenVIWDUPFn *fn)
   1907 {
   1908     TCGv_ptr qd;
   1909     TCGv_i32 rn, rm;
   1910 
   1911     /*
   1912      * Vector increment/decrement with wrap and duplicate (VIWDUp, VDWDUP)
   1913      * This fills the vector with elements of successively increasing
   1914      * or decreasing values, starting from Rn. Rm specifies a point where
   1915      * the count wraps back around to 0. The updated offset is written back
   1916      * to Rn.
   1917      */
   1918     if (!dc_isar_feature(aa32_mve, s) || !mve_check_qreg_bank(s, a->qd)) {
   1919         return false;
   1920     }
   1921     if (!fn || a->rm == 13 || a->rm == 15) {
   1922         /*
   1923          * size 0b11 is another encoding; Rm == 13 is UNPREDICTABLE;
   1924          * Rm == 13 is VIWDUP, VDWDUP.
   1925          */
   1926         return false;
   1927     }
   1928     if (!mve_eci_check(s) || !vfp_access_check(s)) {
   1929         return true;
   1930     }
   1931 
   1932     qd = mve_qreg_ptr(a->qd);
   1933     rn = load_reg(s, a->rn);
   1934     rm = load_reg(s, a->rm);
   1935     fn(rn, cpu_env, qd, rn, rm, tcg_constant_i32(a->imm));
   1936     store_reg(s, a->rn, rn);
   1937     tcg_temp_free_ptr(qd);
   1938     tcg_temp_free_i32(rm);
   1939     mve_update_eci(s);
   1940     return true;
   1941 }
   1942 
   1943 static bool trans_VIDUP(DisasContext *s, arg_vidup *a)
   1944 {
   1945     static MVEGenVIDUPFn * const fns[] = {
   1946         gen_helper_mve_vidupb,
   1947         gen_helper_mve_viduph,
   1948         gen_helper_mve_vidupw,
   1949         NULL,
   1950     };
   1951     return do_vidup(s, a, fns[a->size]);
   1952 }
   1953 
   1954 static bool trans_VDDUP(DisasContext *s, arg_vidup *a)
   1955 {
   1956     static MVEGenVIDUPFn * const fns[] = {
   1957         gen_helper_mve_vidupb,
   1958         gen_helper_mve_viduph,
   1959         gen_helper_mve_vidupw,
   1960         NULL,
   1961     };
   1962     /* VDDUP is just like VIDUP but with a negative immediate */
   1963     a->imm = -a->imm;
   1964     return do_vidup(s, a, fns[a->size]);
   1965 }
   1966 
   1967 static bool trans_VIWDUP(DisasContext *s, arg_viwdup *a)
   1968 {
   1969     static MVEGenVIWDUPFn * const fns[] = {
   1970         gen_helper_mve_viwdupb,
   1971         gen_helper_mve_viwduph,
   1972         gen_helper_mve_viwdupw,
   1973         NULL,
   1974     };
   1975     return do_viwdup(s, a, fns[a->size]);
   1976 }
   1977 
   1978 static bool trans_VDWDUP(DisasContext *s, arg_viwdup *a)
   1979 {
   1980     static MVEGenVIWDUPFn * const fns[] = {
   1981         gen_helper_mve_vdwdupb,
   1982         gen_helper_mve_vdwduph,
   1983         gen_helper_mve_vdwdupw,
   1984         NULL,
   1985     };
   1986     return do_viwdup(s, a, fns[a->size]);
   1987 }
   1988 
   1989 static bool do_vcmp(DisasContext *s, arg_vcmp *a, MVEGenCmpFn *fn)
   1990 {
   1991     TCGv_ptr qn, qm;
   1992 
   1993     if (!dc_isar_feature(aa32_mve, s) || !mve_check_qreg_bank(s, a->qm) ||
   1994         !fn) {
   1995         return false;
   1996     }
   1997     if (!mve_eci_check(s) || !vfp_access_check(s)) {
   1998         return true;
   1999     }
   2000 
   2001     qn = mve_qreg_ptr(a->qn);
   2002     qm = mve_qreg_ptr(a->qm);
   2003     fn(cpu_env, qn, qm);
   2004     tcg_temp_free_ptr(qn);
   2005     tcg_temp_free_ptr(qm);
   2006     if (a->mask) {
   2007         /* VPT */
   2008         gen_vpst(s, a->mask);
   2009     }
   2010     /* This insn updates predication bits */
   2011     s->base.is_jmp = DISAS_UPDATE_NOCHAIN;
   2012     mve_update_eci(s);
   2013     return true;
   2014 }
   2015 
   2016 static bool do_vcmp_scalar(DisasContext *s, arg_vcmp_scalar *a,
   2017                            MVEGenScalarCmpFn *fn)
   2018 {
   2019     TCGv_ptr qn;
   2020     TCGv_i32 rm;
   2021 
   2022     if (!dc_isar_feature(aa32_mve, s) || !fn || a->rm == 13) {
   2023         return false;
   2024     }
   2025     if (!mve_eci_check(s) || !vfp_access_check(s)) {
   2026         return true;
   2027     }
   2028 
   2029     qn = mve_qreg_ptr(a->qn);
   2030     if (a->rm == 15) {
   2031         /* Encoding Rm=0b1111 means "constant zero" */
   2032         rm = tcg_constant_i32(0);
   2033     } else {
   2034         rm = load_reg(s, a->rm);
   2035     }
   2036     fn(cpu_env, qn, rm);
   2037     tcg_temp_free_ptr(qn);
   2038     tcg_temp_free_i32(rm);
   2039     if (a->mask) {
   2040         /* VPT */
   2041         gen_vpst(s, a->mask);
   2042     }
   2043     /* This insn updates predication bits */
   2044     s->base.is_jmp = DISAS_UPDATE_NOCHAIN;
   2045     mve_update_eci(s);
   2046     return true;
   2047 }
   2048 
   2049 #define DO_VCMP(INSN, FN)                                       \
   2050     static bool trans_##INSN(DisasContext *s, arg_vcmp *a)      \
   2051     {                                                           \
   2052         static MVEGenCmpFn * const fns[] = {                    \
   2053             gen_helper_mve_##FN##b,                             \
   2054             gen_helper_mve_##FN##h,                             \
   2055             gen_helper_mve_##FN##w,                             \
   2056             NULL,                                               \
   2057         };                                                      \
   2058         return do_vcmp(s, a, fns[a->size]);                     \
   2059     }                                                           \
   2060     static bool trans_##INSN##_scalar(DisasContext *s,          \
   2061                                       arg_vcmp_scalar *a)       \
   2062     {                                                           \
   2063         static MVEGenScalarCmpFn * const fns[] = {              \
   2064             gen_helper_mve_##FN##_scalarb,                      \
   2065             gen_helper_mve_##FN##_scalarh,                      \
   2066             gen_helper_mve_##FN##_scalarw,                      \
   2067             NULL,                                               \
   2068         };                                                      \
   2069         return do_vcmp_scalar(s, a, fns[a->size]);              \
   2070     }
   2071 
   2072 DO_VCMP(VCMPEQ, vcmpeq)
   2073 DO_VCMP(VCMPNE, vcmpne)
   2074 DO_VCMP(VCMPCS, vcmpcs)
   2075 DO_VCMP(VCMPHI, vcmphi)
   2076 DO_VCMP(VCMPGE, vcmpge)
   2077 DO_VCMP(VCMPLT, vcmplt)
   2078 DO_VCMP(VCMPGT, vcmpgt)
   2079 DO_VCMP(VCMPLE, vcmple)
   2080 
   2081 #define DO_VCMP_FP(INSN, FN)                                    \
   2082     static bool trans_##INSN(DisasContext *s, arg_vcmp *a)      \
   2083     {                                                           \
   2084         static MVEGenCmpFn * const fns[] = {                    \
   2085             NULL,                                               \
   2086             gen_helper_mve_##FN##h,                             \
   2087             gen_helper_mve_##FN##s,                             \
   2088             NULL,                                               \
   2089         };                                                      \
   2090         if (!dc_isar_feature(aa32_mve_fp, s)) {                 \
   2091             return false;                                       \
   2092         }                                                       \
   2093         return do_vcmp(s, a, fns[a->size]);                     \
   2094     }                                                           \
   2095     static bool trans_##INSN##_scalar(DisasContext *s,          \
   2096                                       arg_vcmp_scalar *a)       \
   2097     {                                                           \
   2098         static MVEGenScalarCmpFn * const fns[] = {              \
   2099             NULL,                                               \
   2100             gen_helper_mve_##FN##_scalarh,                      \
   2101             gen_helper_mve_##FN##_scalars,                      \
   2102             NULL,                                               \
   2103         };                                                      \
   2104         if (!dc_isar_feature(aa32_mve_fp, s)) {                 \
   2105             return false;                                       \
   2106         }                                                       \
   2107         return do_vcmp_scalar(s, a, fns[a->size]);              \
   2108     }
   2109 
   2110 DO_VCMP_FP(VCMPEQ_fp, vfcmpeq)
   2111 DO_VCMP_FP(VCMPNE_fp, vfcmpne)
   2112 DO_VCMP_FP(VCMPGE_fp, vfcmpge)
   2113 DO_VCMP_FP(VCMPLT_fp, vfcmplt)
   2114 DO_VCMP_FP(VCMPGT_fp, vfcmpgt)
   2115 DO_VCMP_FP(VCMPLE_fp, vfcmple)
   2116 
   2117 static bool do_vmaxv(DisasContext *s, arg_vmaxv *a, MVEGenVADDVFn fn)
   2118 {
   2119     /*
   2120      * MIN/MAX operations across a vector: compute the min or
   2121      * max of the initial value in a general purpose register
   2122      * and all the elements in the vector, and store it back
   2123      * into the general purpose register.
   2124      */
   2125     TCGv_ptr qm;
   2126     TCGv_i32 rda;
   2127 
   2128     if (!dc_isar_feature(aa32_mve, s) || !mve_check_qreg_bank(s, a->qm) ||
   2129         !fn || a->rda == 13 || a->rda == 15) {
   2130         /* Rda cases are UNPREDICTABLE */
   2131         return false;
   2132     }
   2133     if (!mve_eci_check(s) || !vfp_access_check(s)) {
   2134         return true;
   2135     }
   2136 
   2137     qm = mve_qreg_ptr(a->qm);
   2138     rda = load_reg(s, a->rda);
   2139     fn(rda, cpu_env, qm, rda);
   2140     store_reg(s, a->rda, rda);
   2141     tcg_temp_free_ptr(qm);
   2142     mve_update_eci(s);
   2143     return true;
   2144 }
   2145 
   2146 #define DO_VMAXV(INSN, FN)                                      \
   2147     static bool trans_##INSN(DisasContext *s, arg_vmaxv *a)     \
   2148     {                                                           \
   2149         static MVEGenVADDVFn * const fns[] = {                  \
   2150             gen_helper_mve_##FN##b,                             \
   2151             gen_helper_mve_##FN##h,                             \
   2152             gen_helper_mve_##FN##w,                             \
   2153             NULL,                                               \
   2154         };                                                      \
   2155         return do_vmaxv(s, a, fns[a->size]);                    \
   2156     }
   2157 
   2158 DO_VMAXV(VMAXV_S, vmaxvs)
   2159 DO_VMAXV(VMAXV_U, vmaxvu)
   2160 DO_VMAXV(VMAXAV, vmaxav)
   2161 DO_VMAXV(VMINV_S, vminvs)
   2162 DO_VMAXV(VMINV_U, vminvu)
   2163 DO_VMAXV(VMINAV, vminav)
   2164 
   2165 #define DO_VMAXV_FP(INSN, FN)                                   \
   2166     static bool trans_##INSN(DisasContext *s, arg_vmaxv *a)     \
   2167     {                                                           \
   2168         static MVEGenVADDVFn * const fns[] = {                  \
   2169             NULL,                                               \
   2170             gen_helper_mve_##FN##h,                             \
   2171             gen_helper_mve_##FN##s,                             \
   2172             NULL,                                               \
   2173         };                                                      \
   2174         if (!dc_isar_feature(aa32_mve_fp, s)) {                 \
   2175             return false;                                       \
   2176         }                                                       \
   2177         return do_vmaxv(s, a, fns[a->size]);                    \
   2178     }
   2179 
   2180 DO_VMAXV_FP(VMAXNMV, vmaxnmv)
   2181 DO_VMAXV_FP(VMINNMV, vminnmv)
   2182 DO_VMAXV_FP(VMAXNMAV, vmaxnmav)
   2183 DO_VMAXV_FP(VMINNMAV, vminnmav)
   2184 
   2185 static bool do_vabav(DisasContext *s, arg_vabav *a, MVEGenVABAVFn *fn)
   2186 {
   2187     /* Absolute difference accumulated across vector */
   2188     TCGv_ptr qn, qm;
   2189     TCGv_i32 rda;
   2190 
   2191     if (!dc_isar_feature(aa32_mve, s) ||
   2192         !mve_check_qreg_bank(s, a->qm | a->qn) ||
   2193         !fn || a->rda == 13 || a->rda == 15) {
   2194         /* Rda cases are UNPREDICTABLE */
   2195         return false;
   2196     }
   2197     if (!mve_eci_check(s) || !vfp_access_check(s)) {
   2198         return true;
   2199     }
   2200 
   2201     qm = mve_qreg_ptr(a->qm);
   2202     qn = mve_qreg_ptr(a->qn);
   2203     rda = load_reg(s, a->rda);
   2204     fn(rda, cpu_env, qn, qm, rda);
   2205     store_reg(s, a->rda, rda);
   2206     tcg_temp_free_ptr(qm);
   2207     tcg_temp_free_ptr(qn);
   2208     mve_update_eci(s);
   2209     return true;
   2210 }
   2211 
   2212 #define DO_VABAV(INSN, FN)                                      \
   2213     static bool trans_##INSN(DisasContext *s, arg_vabav *a)     \
   2214     {                                                           \
   2215         static MVEGenVABAVFn * const fns[] = {                  \
   2216             gen_helper_mve_##FN##b,                             \
   2217             gen_helper_mve_##FN##h,                             \
   2218             gen_helper_mve_##FN##w,                             \
   2219             NULL,                                               \
   2220         };                                                      \
   2221         return do_vabav(s, a, fns[a->size]);                    \
   2222     }
   2223 
   2224 DO_VABAV(VABAV_S, vabavs)
   2225 DO_VABAV(VABAV_U, vabavu)
   2226 
   2227 static bool trans_VMOV_to_2gp(DisasContext *s, arg_VMOV_to_2gp *a)
   2228 {
   2229     /*
   2230      * VMOV two 32-bit vector lanes to two general-purpose registers.
   2231      * This insn is not predicated but it is subject to beat-wise
   2232      * execution if it is not in an IT block. For us this means
   2233      * only that if PSR.ECI says we should not be executing the beat
   2234      * corresponding to the lane of the vector register being accessed
   2235      * then we should skip perfoming the move, and that we need to do
   2236      * the usual check for bad ECI state and advance of ECI state.
   2237      * (If PSR.ECI is non-zero then we cannot be in an IT block.)
   2238      */
   2239     TCGv_i32 tmp;
   2240     int vd;
   2241 
   2242     if (!dc_isar_feature(aa32_mve, s) || !mve_check_qreg_bank(s, a->qd) ||
   2243         a->rt == 13 || a->rt == 15 || a->rt2 == 13 || a->rt2 == 15 ||
   2244         a->rt == a->rt2) {
   2245         /* Rt/Rt2 cases are UNPREDICTABLE */
   2246         return false;
   2247     }
   2248     if (!mve_eci_check(s) || !vfp_access_check(s)) {
   2249         return true;
   2250     }
   2251 
   2252     /* Convert Qreg index to Dreg for read_neon_element32() etc */
   2253     vd = a->qd * 2;
   2254 
   2255     if (!mve_skip_vmov(s, vd, a->idx, MO_32)) {
   2256         tmp = tcg_temp_new_i32();
   2257         read_neon_element32(tmp, vd, a->idx, MO_32);
   2258         store_reg(s, a->rt, tmp);
   2259     }
   2260     if (!mve_skip_vmov(s, vd + 1, a->idx, MO_32)) {
   2261         tmp = tcg_temp_new_i32();
   2262         read_neon_element32(tmp, vd + 1, a->idx, MO_32);
   2263         store_reg(s, a->rt2, tmp);
   2264     }
   2265 
   2266     mve_update_and_store_eci(s);
   2267     return true;
   2268 }
   2269 
   2270 static bool trans_VMOV_from_2gp(DisasContext *s, arg_VMOV_to_2gp *a)
   2271 {
   2272     /*
   2273      * VMOV two general-purpose registers to two 32-bit vector lanes.
   2274      * This insn is not predicated but it is subject to beat-wise
   2275      * execution if it is not in an IT block. For us this means
   2276      * only that if PSR.ECI says we should not be executing the beat
   2277      * corresponding to the lane of the vector register being accessed
   2278      * then we should skip perfoming the move, and that we need to do
   2279      * the usual check for bad ECI state and advance of ECI state.
   2280      * (If PSR.ECI is non-zero then we cannot be in an IT block.)
   2281      */
   2282     TCGv_i32 tmp;
   2283     int vd;
   2284 
   2285     if (!dc_isar_feature(aa32_mve, s) || !mve_check_qreg_bank(s, a->qd) ||
   2286         a->rt == 13 || a->rt == 15 || a->rt2 == 13 || a->rt2 == 15) {
   2287         /* Rt/Rt2 cases are UNPREDICTABLE */
   2288         return false;
   2289     }
   2290     if (!mve_eci_check(s) || !vfp_access_check(s)) {
   2291         return true;
   2292     }
   2293 
   2294     /* Convert Qreg idx to Dreg for read_neon_element32() etc */
   2295     vd = a->qd * 2;
   2296 
   2297     if (!mve_skip_vmov(s, vd, a->idx, MO_32)) {
   2298         tmp = load_reg(s, a->rt);
   2299         write_neon_element32(tmp, vd, a->idx, MO_32);
   2300         tcg_temp_free_i32(tmp);
   2301     }
   2302     if (!mve_skip_vmov(s, vd + 1, a->idx, MO_32)) {
   2303         tmp = load_reg(s, a->rt2);
   2304         write_neon_element32(tmp, vd + 1, a->idx, MO_32);
   2305         tcg_temp_free_i32(tmp);
   2306     }
   2307 
   2308     mve_update_and_store_eci(s);
   2309     return true;
   2310 }
	qemu FORK: QEMU emulator
	git clone https://git.neptards.moe/neptards/qemu.git
	Log \| Files \| Refs \| Submodules \| LICENSE