qemu

FORK: QEMU emulator
git clone https://git.neptards.moe/neptards/qemu.git
Log | Files | Refs | Submodules | LICENSE

translate-neon.c (126369B)


      1 /*
      2  *  ARM translation: AArch32 Neon instructions
      3  *
      4  *  Copyright (c) 2003 Fabrice Bellard
      5  *  Copyright (c) 2005-2007 CodeSourcery
      6  *  Copyright (c) 2007 OpenedHand, Ltd.
      7  *  Copyright (c) 2020 Linaro, Ltd.
      8  *
      9  * This library is free software; you can redistribute it and/or
     10  * modify it under the terms of the GNU Lesser General Public
     11  * License as published by the Free Software Foundation; either
     12  * version 2.1 of the License, or (at your option) any later version.
     13  *
     14  * This library is distributed in the hope that it will be useful,
     15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
     16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
     17  * Lesser General Public License for more details.
     18  *
     19  * You should have received a copy of the GNU Lesser General Public
     20  * License along with this library; if not, see <http://www.gnu.org/licenses/>.
     21  */
     22 
     23 #include "qemu/osdep.h"
     24 #include "tcg/tcg-op.h"
     25 #include "tcg/tcg-op-gvec.h"
     26 #include "exec/exec-all.h"
     27 #include "exec/gen-icount.h"
     28 #include "translate.h"
     29 #include "translate-a32.h"
     30 
     31 /* Include the generated Neon decoder */
     32 #include "decode-neon-dp.c.inc"
     33 #include "decode-neon-ls.c.inc"
     34 #include "decode-neon-shared.c.inc"
     35 
     36 static TCGv_ptr vfp_reg_ptr(bool dp, int reg)
     37 {
     38     TCGv_ptr ret = tcg_temp_new_ptr();
     39     tcg_gen_addi_ptr(ret, cpu_env, vfp_reg_offset(dp, reg));
     40     return ret;
     41 }
     42 
     43 static void neon_load_element(TCGv_i32 var, int reg, int ele, MemOp mop)
     44 {
     45     long offset = neon_element_offset(reg, ele, mop & MO_SIZE);
     46 
     47     switch (mop) {
     48     case MO_UB:
     49         tcg_gen_ld8u_i32(var, cpu_env, offset);
     50         break;
     51     case MO_UW:
     52         tcg_gen_ld16u_i32(var, cpu_env, offset);
     53         break;
     54     case MO_UL:
     55         tcg_gen_ld_i32(var, cpu_env, offset);
     56         break;
     57     default:
     58         g_assert_not_reached();
     59     }
     60 }
     61 
     62 static void neon_load_element64(TCGv_i64 var, int reg, int ele, MemOp mop)
     63 {
     64     long offset = neon_element_offset(reg, ele, mop & MO_SIZE);
     65 
     66     switch (mop) {
     67     case MO_UB:
     68         tcg_gen_ld8u_i64(var, cpu_env, offset);
     69         break;
     70     case MO_UW:
     71         tcg_gen_ld16u_i64(var, cpu_env, offset);
     72         break;
     73     case MO_UL:
     74         tcg_gen_ld32u_i64(var, cpu_env, offset);
     75         break;
     76     case MO_UQ:
     77         tcg_gen_ld_i64(var, cpu_env, offset);
     78         break;
     79     default:
     80         g_assert_not_reached();
     81     }
     82 }
     83 
     84 static void neon_store_element(int reg, int ele, MemOp size, TCGv_i32 var)
     85 {
     86     long offset = neon_element_offset(reg, ele, size);
     87 
     88     switch (size) {
     89     case MO_8:
     90         tcg_gen_st8_i32(var, cpu_env, offset);
     91         break;
     92     case MO_16:
     93         tcg_gen_st16_i32(var, cpu_env, offset);
     94         break;
     95     case MO_32:
     96         tcg_gen_st_i32(var, cpu_env, offset);
     97         break;
     98     default:
     99         g_assert_not_reached();
    100     }
    101 }
    102 
    103 static void neon_store_element64(int reg, int ele, MemOp size, TCGv_i64 var)
    104 {
    105     long offset = neon_element_offset(reg, ele, size);
    106 
    107     switch (size) {
    108     case MO_8:
    109         tcg_gen_st8_i64(var, cpu_env, offset);
    110         break;
    111     case MO_16:
    112         tcg_gen_st16_i64(var, cpu_env, offset);
    113         break;
    114     case MO_32:
    115         tcg_gen_st32_i64(var, cpu_env, offset);
    116         break;
    117     case MO_64:
    118         tcg_gen_st_i64(var, cpu_env, offset);
    119         break;
    120     default:
    121         g_assert_not_reached();
    122     }
    123 }
    124 
    125 static bool do_neon_ddda(DisasContext *s, int q, int vd, int vn, int vm,
    126                          int data, gen_helper_gvec_4 *fn_gvec)
    127 {
    128     /* UNDEF accesses to D16-D31 if they don't exist. */
    129     if (((vd | vn | vm) & 0x10) && !dc_isar_feature(aa32_simd_r32, s)) {
    130         return false;
    131     }
    132 
    133     /*
    134      * UNDEF accesses to odd registers for each bit of Q.
    135      * Q will be 0b111 for all Q-reg instructions, otherwise
    136      * when we have mixed Q- and D-reg inputs.
    137      */
    138     if (((vd & 1) * 4 | (vn & 1) * 2 | (vm & 1)) & q) {
    139         return false;
    140     }
    141 
    142     if (!vfp_access_check(s)) {
    143         return true;
    144     }
    145 
    146     int opr_sz = q ? 16 : 8;
    147     tcg_gen_gvec_4_ool(vfp_reg_offset(1, vd),
    148                        vfp_reg_offset(1, vn),
    149                        vfp_reg_offset(1, vm),
    150                        vfp_reg_offset(1, vd),
    151                        opr_sz, opr_sz, data, fn_gvec);
    152     return true;
    153 }
    154 
    155 static bool do_neon_ddda_fpst(DisasContext *s, int q, int vd, int vn, int vm,
    156                               int data, ARMFPStatusFlavour fp_flavour,
    157                               gen_helper_gvec_4_ptr *fn_gvec_ptr)
    158 {
    159     /* UNDEF accesses to D16-D31 if they don't exist. */
    160     if (((vd | vn | vm) & 0x10) && !dc_isar_feature(aa32_simd_r32, s)) {
    161         return false;
    162     }
    163 
    164     /*
    165      * UNDEF accesses to odd registers for each bit of Q.
    166      * Q will be 0b111 for all Q-reg instructions, otherwise
    167      * when we have mixed Q- and D-reg inputs.
    168      */
    169     if (((vd & 1) * 4 | (vn & 1) * 2 | (vm & 1)) & q) {
    170         return false;
    171     }
    172 
    173     if (!vfp_access_check(s)) {
    174         return true;
    175     }
    176 
    177     int opr_sz = q ? 16 : 8;
    178     TCGv_ptr fpst = fpstatus_ptr(fp_flavour);
    179 
    180     tcg_gen_gvec_4_ptr(vfp_reg_offset(1, vd),
    181                        vfp_reg_offset(1, vn),
    182                        vfp_reg_offset(1, vm),
    183                        vfp_reg_offset(1, vd),
    184                        fpst, opr_sz, opr_sz, data, fn_gvec_ptr);
    185     tcg_temp_free_ptr(fpst);
    186     return true;
    187 }
    188 
    189 static bool trans_VCMLA(DisasContext *s, arg_VCMLA *a)
    190 {
    191     if (!dc_isar_feature(aa32_vcma, s)) {
    192         return false;
    193     }
    194     if (a->size == MO_16) {
    195         if (!dc_isar_feature(aa32_fp16_arith, s)) {
    196             return false;
    197         }
    198         return do_neon_ddda_fpst(s, a->q * 7, a->vd, a->vn, a->vm, a->rot,
    199                                  FPST_STD_F16, gen_helper_gvec_fcmlah);
    200     }
    201     return do_neon_ddda_fpst(s, a->q * 7, a->vd, a->vn, a->vm, a->rot,
    202                              FPST_STD, gen_helper_gvec_fcmlas);
    203 }
    204 
    205 static bool trans_VCADD(DisasContext *s, arg_VCADD *a)
    206 {
    207     int opr_sz;
    208     TCGv_ptr fpst;
    209     gen_helper_gvec_3_ptr *fn_gvec_ptr;
    210 
    211     if (!dc_isar_feature(aa32_vcma, s)
    212         || (a->size == MO_16 && !dc_isar_feature(aa32_fp16_arith, s))) {
    213         return false;
    214     }
    215 
    216     /* UNDEF accesses to D16-D31 if they don't exist. */
    217     if (!dc_isar_feature(aa32_simd_r32, s) &&
    218         ((a->vd | a->vn | a->vm) & 0x10)) {
    219         return false;
    220     }
    221 
    222     if ((a->vn | a->vm | a->vd) & a->q) {
    223         return false;
    224     }
    225 
    226     if (!vfp_access_check(s)) {
    227         return true;
    228     }
    229 
    230     opr_sz = (1 + a->q) * 8;
    231     fpst = fpstatus_ptr(a->size == MO_16 ? FPST_STD_F16 : FPST_STD);
    232     fn_gvec_ptr = (a->size == MO_16) ?
    233         gen_helper_gvec_fcaddh : gen_helper_gvec_fcadds;
    234     tcg_gen_gvec_3_ptr(vfp_reg_offset(1, a->vd),
    235                        vfp_reg_offset(1, a->vn),
    236                        vfp_reg_offset(1, a->vm),
    237                        fpst, opr_sz, opr_sz, a->rot,
    238                        fn_gvec_ptr);
    239     tcg_temp_free_ptr(fpst);
    240     return true;
    241 }
    242 
    243 static bool trans_VSDOT(DisasContext *s, arg_VSDOT *a)
    244 {
    245     if (!dc_isar_feature(aa32_dp, s)) {
    246         return false;
    247     }
    248     return do_neon_ddda(s, a->q * 7, a->vd, a->vn, a->vm, 0,
    249                         gen_helper_gvec_sdot_b);
    250 }
    251 
    252 static bool trans_VUDOT(DisasContext *s, arg_VUDOT *a)
    253 {
    254     if (!dc_isar_feature(aa32_dp, s)) {
    255         return false;
    256     }
    257     return do_neon_ddda(s, a->q * 7, a->vd, a->vn, a->vm, 0,
    258                         gen_helper_gvec_udot_b);
    259 }
    260 
    261 static bool trans_VUSDOT(DisasContext *s, arg_VUSDOT *a)
    262 {
    263     if (!dc_isar_feature(aa32_i8mm, s)) {
    264         return false;
    265     }
    266     return do_neon_ddda(s, a->q * 7, a->vd, a->vn, a->vm, 0,
    267                         gen_helper_gvec_usdot_b);
    268 }
    269 
    270 static bool trans_VDOT_b16(DisasContext *s, arg_VDOT_b16 *a)
    271 {
    272     if (!dc_isar_feature(aa32_bf16, s)) {
    273         return false;
    274     }
    275     return do_neon_ddda(s, a->q * 7, a->vd, a->vn, a->vm, 0,
    276                         gen_helper_gvec_bfdot);
    277 }
    278 
    279 static bool trans_VFML(DisasContext *s, arg_VFML *a)
    280 {
    281     int opr_sz;
    282 
    283     if (!dc_isar_feature(aa32_fhm, s)) {
    284         return false;
    285     }
    286 
    287     /* UNDEF accesses to D16-D31 if they don't exist. */
    288     if (!dc_isar_feature(aa32_simd_r32, s) &&
    289         (a->vd & 0x10)) {
    290         return false;
    291     }
    292 
    293     if (a->vd & a->q) {
    294         return false;
    295     }
    296 
    297     if (!vfp_access_check(s)) {
    298         return true;
    299     }
    300 
    301     opr_sz = (1 + a->q) * 8;
    302     tcg_gen_gvec_3_ptr(vfp_reg_offset(1, a->vd),
    303                        vfp_reg_offset(a->q, a->vn),
    304                        vfp_reg_offset(a->q, a->vm),
    305                        cpu_env, opr_sz, opr_sz, a->s, /* is_2 == 0 */
    306                        gen_helper_gvec_fmlal_a32);
    307     return true;
    308 }
    309 
    310 static bool trans_VCMLA_scalar(DisasContext *s, arg_VCMLA_scalar *a)
    311 {
    312     int data = (a->index << 2) | a->rot;
    313 
    314     if (!dc_isar_feature(aa32_vcma, s)) {
    315         return false;
    316     }
    317     if (a->size == MO_16) {
    318         if (!dc_isar_feature(aa32_fp16_arith, s)) {
    319             return false;
    320         }
    321         return do_neon_ddda_fpst(s, a->q * 6, a->vd, a->vn, a->vm, data,
    322                                  FPST_STD_F16, gen_helper_gvec_fcmlah_idx);
    323     }
    324     return do_neon_ddda_fpst(s, a->q * 6, a->vd, a->vn, a->vm, data,
    325                              FPST_STD, gen_helper_gvec_fcmlas_idx);
    326 }
    327 
    328 static bool trans_VSDOT_scalar(DisasContext *s, arg_VSDOT_scalar *a)
    329 {
    330     if (!dc_isar_feature(aa32_dp, s)) {
    331         return false;
    332     }
    333     return do_neon_ddda(s, a->q * 6, a->vd, a->vn, a->vm, a->index,
    334                         gen_helper_gvec_sdot_idx_b);
    335 }
    336 
    337 static bool trans_VUDOT_scalar(DisasContext *s, arg_VUDOT_scalar *a)
    338 {
    339     if (!dc_isar_feature(aa32_dp, s)) {
    340         return false;
    341     }
    342     return do_neon_ddda(s, a->q * 6, a->vd, a->vn, a->vm, a->index,
    343                         gen_helper_gvec_udot_idx_b);
    344 }
    345 
    346 static bool trans_VUSDOT_scalar(DisasContext *s, arg_VUSDOT_scalar *a)
    347 {
    348     if (!dc_isar_feature(aa32_i8mm, s)) {
    349         return false;
    350     }
    351     return do_neon_ddda(s, a->q * 6, a->vd, a->vn, a->vm, a->index,
    352                         gen_helper_gvec_usdot_idx_b);
    353 }
    354 
    355 static bool trans_VSUDOT_scalar(DisasContext *s, arg_VSUDOT_scalar *a)
    356 {
    357     if (!dc_isar_feature(aa32_i8mm, s)) {
    358         return false;
    359     }
    360     return do_neon_ddda(s, a->q * 6, a->vd, a->vn, a->vm, a->index,
    361                         gen_helper_gvec_sudot_idx_b);
    362 }
    363 
    364 static bool trans_VDOT_b16_scal(DisasContext *s, arg_VDOT_b16_scal *a)
    365 {
    366     if (!dc_isar_feature(aa32_bf16, s)) {
    367         return false;
    368     }
    369     return do_neon_ddda(s, a->q * 6, a->vd, a->vn, a->vm, a->index,
    370                         gen_helper_gvec_bfdot_idx);
    371 }
    372 
    373 static bool trans_VFML_scalar(DisasContext *s, arg_VFML_scalar *a)
    374 {
    375     int opr_sz;
    376 
    377     if (!dc_isar_feature(aa32_fhm, s)) {
    378         return false;
    379     }
    380 
    381     /* UNDEF accesses to D16-D31 if they don't exist. */
    382     if (!dc_isar_feature(aa32_simd_r32, s) &&
    383         ((a->vd & 0x10) || (a->q && (a->vn & 0x10)))) {
    384         return false;
    385     }
    386 
    387     if (a->vd & a->q) {
    388         return false;
    389     }
    390 
    391     if (!vfp_access_check(s)) {
    392         return true;
    393     }
    394 
    395     opr_sz = (1 + a->q) * 8;
    396     tcg_gen_gvec_3_ptr(vfp_reg_offset(1, a->vd),
    397                        vfp_reg_offset(a->q, a->vn),
    398                        vfp_reg_offset(a->q, a->rm),
    399                        cpu_env, opr_sz, opr_sz,
    400                        (a->index << 2) | a->s, /* is_2 == 0 */
    401                        gen_helper_gvec_fmlal_idx_a32);
    402     return true;
    403 }
    404 
    405 static struct {
    406     int nregs;
    407     int interleave;
    408     int spacing;
    409 } const neon_ls_element_type[11] = {
    410     {1, 4, 1},
    411     {1, 4, 2},
    412     {4, 1, 1},
    413     {2, 2, 2},
    414     {1, 3, 1},
    415     {1, 3, 2},
    416     {3, 1, 1},
    417     {1, 1, 1},
    418     {1, 2, 1},
    419     {1, 2, 2},
    420     {2, 1, 1}
    421 };
    422 
    423 static void gen_neon_ldst_base_update(DisasContext *s, int rm, int rn,
    424                                       int stride)
    425 {
    426     if (rm != 15) {
    427         TCGv_i32 base;
    428 
    429         base = load_reg(s, rn);
    430         if (rm == 13) {
    431             tcg_gen_addi_i32(base, base, stride);
    432         } else {
    433             TCGv_i32 index;
    434             index = load_reg(s, rm);
    435             tcg_gen_add_i32(base, base, index);
    436             tcg_temp_free_i32(index);
    437         }
    438         store_reg(s, rn, base);
    439     }
    440 }
    441 
    442 static bool trans_VLDST_multiple(DisasContext *s, arg_VLDST_multiple *a)
    443 {
    444     /* Neon load/store multiple structures */
    445     int nregs, interleave, spacing, reg, n;
    446     MemOp mop, align, endian;
    447     int mmu_idx = get_mem_index(s);
    448     int size = a->size;
    449     TCGv_i64 tmp64;
    450     TCGv_i32 addr;
    451 
    452     if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
    453         return false;
    454     }
    455 
    456     /* UNDEF accesses to D16-D31 if they don't exist */
    457     if (!dc_isar_feature(aa32_simd_r32, s) && (a->vd & 0x10)) {
    458         return false;
    459     }
    460     if (a->itype > 10) {
    461         return false;
    462     }
    463     /* Catch UNDEF cases for bad values of align field */
    464     switch (a->itype & 0xc) {
    465     case 4:
    466         if (a->align >= 2) {
    467             return false;
    468         }
    469         break;
    470     case 8:
    471         if (a->align == 3) {
    472             return false;
    473         }
    474         break;
    475     default:
    476         break;
    477     }
    478     nregs = neon_ls_element_type[a->itype].nregs;
    479     interleave = neon_ls_element_type[a->itype].interleave;
    480     spacing = neon_ls_element_type[a->itype].spacing;
    481     if (size == 3 && (interleave | spacing) != 1) {
    482         return false;
    483     }
    484 
    485     if (!vfp_access_check(s)) {
    486         return true;
    487     }
    488 
    489     /* For our purposes, bytes are always little-endian.  */
    490     endian = s->be_data;
    491     if (size == 0) {
    492         endian = MO_LE;
    493     }
    494 
    495     /* Enforce alignment requested by the instruction */
    496     if (a->align) {
    497         align = pow2_align(a->align + 2); /* 4 ** a->align */
    498     } else {
    499         align = s->align_mem ? MO_ALIGN : 0;
    500     }
    501 
    502     /*
    503      * Consecutive little-endian elements from a single register
    504      * can be promoted to a larger little-endian operation.
    505      */
    506     if (interleave == 1 && endian == MO_LE) {
    507         /* Retain any natural alignment. */
    508         if (align == MO_ALIGN) {
    509             align = pow2_align(size);
    510         }
    511         size = 3;
    512     }
    513 
    514     tmp64 = tcg_temp_new_i64();
    515     addr = tcg_temp_new_i32();
    516     load_reg_var(s, addr, a->rn);
    517 
    518     mop = endian | size | align;
    519     for (reg = 0; reg < nregs; reg++) {
    520         for (n = 0; n < 8 >> size; n++) {
    521             int xs;
    522             for (xs = 0; xs < interleave; xs++) {
    523                 int tt = a->vd + reg + spacing * xs;
    524 
    525                 if (a->l) {
    526                     gen_aa32_ld_internal_i64(s, tmp64, addr, mmu_idx, mop);
    527                     neon_store_element64(tt, n, size, tmp64);
    528                 } else {
    529                     neon_load_element64(tmp64, tt, n, size);
    530                     gen_aa32_st_internal_i64(s, tmp64, addr, mmu_idx, mop);
    531                 }
    532                 tcg_gen_addi_i32(addr, addr, 1 << size);
    533 
    534                 /* Subsequent memory operations inherit alignment */
    535                 mop &= ~MO_AMASK;
    536             }
    537         }
    538     }
    539     tcg_temp_free_i32(addr);
    540     tcg_temp_free_i64(tmp64);
    541 
    542     gen_neon_ldst_base_update(s, a->rm, a->rn, nregs * interleave * 8);
    543     return true;
    544 }
    545 
    546 static bool trans_VLD_all_lanes(DisasContext *s, arg_VLD_all_lanes *a)
    547 {
    548     /* Neon load single structure to all lanes */
    549     int reg, stride, vec_size;
    550     int vd = a->vd;
    551     int size = a->size;
    552     int nregs = a->n + 1;
    553     TCGv_i32 addr, tmp;
    554     MemOp mop, align;
    555 
    556     if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
    557         return false;
    558     }
    559 
    560     /* UNDEF accesses to D16-D31 if they don't exist */
    561     if (!dc_isar_feature(aa32_simd_r32, s) && (a->vd & 0x10)) {
    562         return false;
    563     }
    564 
    565     align = 0;
    566     if (size == 3) {
    567         if (nregs != 4 || a->a == 0) {
    568             return false;
    569         }
    570         /* For VLD4 size == 3 a == 1 means 32 bits at 16 byte alignment */
    571         size = MO_32;
    572         align = MO_ALIGN_16;
    573     } else if (a->a) {
    574         switch (nregs) {
    575         case 1:
    576             if (size == 0) {
    577                 return false;
    578             }
    579             align = MO_ALIGN;
    580             break;
    581         case 2:
    582             align = pow2_align(size + 1);
    583             break;
    584         case 3:
    585             return false;
    586         case 4:
    587             if (size == 2) {
    588                 align = pow2_align(3);
    589             } else {
    590                 align = pow2_align(size + 2);
    591             }
    592             break;
    593         default:
    594             g_assert_not_reached();
    595         }
    596     }
    597 
    598     if (!vfp_access_check(s)) {
    599         return true;
    600     }
    601 
    602     /*
    603      * VLD1 to all lanes: T bit indicates how many Dregs to write.
    604      * VLD2/3/4 to all lanes: T bit indicates register stride.
    605      */
    606     stride = a->t ? 2 : 1;
    607     vec_size = nregs == 1 ? stride * 8 : 8;
    608     mop = size | align;
    609     tmp = tcg_temp_new_i32();
    610     addr = tcg_temp_new_i32();
    611     load_reg_var(s, addr, a->rn);
    612     for (reg = 0; reg < nregs; reg++) {
    613         gen_aa32_ld_i32(s, tmp, addr, get_mem_index(s), mop);
    614         if ((vd & 1) && vec_size == 16) {
    615             /*
    616              * We cannot write 16 bytes at once because the
    617              * destination is unaligned.
    618              */
    619             tcg_gen_gvec_dup_i32(size, neon_full_reg_offset(vd),
    620                                  8, 8, tmp);
    621             tcg_gen_gvec_mov(0, neon_full_reg_offset(vd + 1),
    622                              neon_full_reg_offset(vd), 8, 8);
    623         } else {
    624             tcg_gen_gvec_dup_i32(size, neon_full_reg_offset(vd),
    625                                  vec_size, vec_size, tmp);
    626         }
    627         tcg_gen_addi_i32(addr, addr, 1 << size);
    628         vd += stride;
    629 
    630         /* Subsequent memory operations inherit alignment */
    631         mop &= ~MO_AMASK;
    632     }
    633     tcg_temp_free_i32(tmp);
    634     tcg_temp_free_i32(addr);
    635 
    636     gen_neon_ldst_base_update(s, a->rm, a->rn, (1 << size) * nregs);
    637 
    638     return true;
    639 }
    640 
    641 static bool trans_VLDST_single(DisasContext *s, arg_VLDST_single *a)
    642 {
    643     /* Neon load/store single structure to one lane */
    644     int reg;
    645     int nregs = a->n + 1;
    646     int vd = a->vd;
    647     TCGv_i32 addr, tmp;
    648     MemOp mop;
    649 
    650     if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
    651         return false;
    652     }
    653 
    654     /* UNDEF accesses to D16-D31 if they don't exist */
    655     if (!dc_isar_feature(aa32_simd_r32, s) && (a->vd & 0x10)) {
    656         return false;
    657     }
    658 
    659     /* Catch the UNDEF cases. This is unavoidably a bit messy. */
    660     switch (nregs) {
    661     case 1:
    662         if (a->stride != 1) {
    663             return false;
    664         }
    665         if (((a->align & (1 << a->size)) != 0) ||
    666             (a->size == 2 && (a->align == 1 || a->align == 2))) {
    667             return false;
    668         }
    669         break;
    670     case 2:
    671         if (a->size == 2 && (a->align & 2) != 0) {
    672             return false;
    673         }
    674         break;
    675     case 3:
    676         if (a->align != 0) {
    677             return false;
    678         }
    679         break;
    680     case 4:
    681         if (a->size == 2 && a->align == 3) {
    682             return false;
    683         }
    684         break;
    685     default:
    686         g_assert_not_reached();
    687     }
    688     if ((vd + a->stride * (nregs - 1)) > 31) {
    689         /*
    690          * Attempts to write off the end of the register file are
    691          * UNPREDICTABLE; we choose to UNDEF because otherwise we would
    692          * access off the end of the array that holds the register data.
    693          */
    694         return false;
    695     }
    696 
    697     if (!vfp_access_check(s)) {
    698         return true;
    699     }
    700 
    701     /* Pick up SCTLR settings */
    702     mop = finalize_memop(s, a->size);
    703 
    704     if (a->align) {
    705         MemOp align_op;
    706 
    707         switch (nregs) {
    708         case 1:
    709             /* For VLD1, use natural alignment. */
    710             align_op = MO_ALIGN;
    711             break;
    712         case 2:
    713             /* For VLD2, use double alignment. */
    714             align_op = pow2_align(a->size + 1);
    715             break;
    716         case 4:
    717             if (a->size == MO_32) {
    718                 /*
    719                  * For VLD4.32, align = 1 is double alignment, align = 2 is
    720                  * quad alignment; align = 3 is rejected above.
    721                  */
    722                 align_op = pow2_align(a->size + a->align);
    723             } else {
    724                 /* For VLD4.8 and VLD.16, we want quad alignment. */
    725                 align_op = pow2_align(a->size + 2);
    726             }
    727             break;
    728         default:
    729             /* For VLD3, the alignment field is zero and rejected above. */
    730             g_assert_not_reached();
    731         }
    732 
    733         mop = (mop & ~MO_AMASK) | align_op;
    734     }
    735 
    736     tmp = tcg_temp_new_i32();
    737     addr = tcg_temp_new_i32();
    738     load_reg_var(s, addr, a->rn);
    739 
    740     for (reg = 0; reg < nregs; reg++) {
    741         if (a->l) {
    742             gen_aa32_ld_internal_i32(s, tmp, addr, get_mem_index(s), mop);
    743             neon_store_element(vd, a->reg_idx, a->size, tmp);
    744         } else { /* Store */
    745             neon_load_element(tmp, vd, a->reg_idx, a->size);
    746             gen_aa32_st_internal_i32(s, tmp, addr, get_mem_index(s), mop);
    747         }
    748         vd += a->stride;
    749         tcg_gen_addi_i32(addr, addr, 1 << a->size);
    750 
    751         /* Subsequent memory operations inherit alignment */
    752         mop &= ~MO_AMASK;
    753     }
    754     tcg_temp_free_i32(addr);
    755     tcg_temp_free_i32(tmp);
    756 
    757     gen_neon_ldst_base_update(s, a->rm, a->rn, (1 << a->size) * nregs);
    758 
    759     return true;
    760 }
    761 
    762 static bool do_3same(DisasContext *s, arg_3same *a, GVecGen3Fn fn)
    763 {
    764     int vec_size = a->q ? 16 : 8;
    765     int rd_ofs = neon_full_reg_offset(a->vd);
    766     int rn_ofs = neon_full_reg_offset(a->vn);
    767     int rm_ofs = neon_full_reg_offset(a->vm);
    768 
    769     if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
    770         return false;
    771     }
    772 
    773     /* UNDEF accesses to D16-D31 if they don't exist. */
    774     if (!dc_isar_feature(aa32_simd_r32, s) &&
    775         ((a->vd | a->vn | a->vm) & 0x10)) {
    776         return false;
    777     }
    778 
    779     if ((a->vn | a->vm | a->vd) & a->q) {
    780         return false;
    781     }
    782 
    783     if (!vfp_access_check(s)) {
    784         return true;
    785     }
    786 
    787     fn(a->size, rd_ofs, rn_ofs, rm_ofs, vec_size, vec_size);
    788     return true;
    789 }
    790 
    791 #define DO_3SAME(INSN, FUNC)                                            \
    792     static bool trans_##INSN##_3s(DisasContext *s, arg_3same *a)        \
    793     {                                                                   \
    794         return do_3same(s, a, FUNC);                                    \
    795     }
    796 
    797 DO_3SAME(VADD, tcg_gen_gvec_add)
    798 DO_3SAME(VSUB, tcg_gen_gvec_sub)
    799 DO_3SAME(VAND, tcg_gen_gvec_and)
    800 DO_3SAME(VBIC, tcg_gen_gvec_andc)
    801 DO_3SAME(VORR, tcg_gen_gvec_or)
    802 DO_3SAME(VORN, tcg_gen_gvec_orc)
    803 DO_3SAME(VEOR, tcg_gen_gvec_xor)
    804 DO_3SAME(VSHL_S, gen_gvec_sshl)
    805 DO_3SAME(VSHL_U, gen_gvec_ushl)
    806 DO_3SAME(VQADD_S, gen_gvec_sqadd_qc)
    807 DO_3SAME(VQADD_U, gen_gvec_uqadd_qc)
    808 DO_3SAME(VQSUB_S, gen_gvec_sqsub_qc)
    809 DO_3SAME(VQSUB_U, gen_gvec_uqsub_qc)
    810 
    811 /* These insns are all gvec_bitsel but with the inputs in various orders. */
    812 #define DO_3SAME_BITSEL(INSN, O1, O2, O3)                               \
    813     static void gen_##INSN##_3s(unsigned vece, uint32_t rd_ofs,         \
    814                                 uint32_t rn_ofs, uint32_t rm_ofs,       \
    815                                 uint32_t oprsz, uint32_t maxsz)         \
    816     {                                                                   \
    817         tcg_gen_gvec_bitsel(vece, rd_ofs, O1, O2, O3, oprsz, maxsz);    \
    818     }                                                                   \
    819     DO_3SAME(INSN, gen_##INSN##_3s)
    820 
    821 DO_3SAME_BITSEL(VBSL, rd_ofs, rn_ofs, rm_ofs)
    822 DO_3SAME_BITSEL(VBIT, rm_ofs, rn_ofs, rd_ofs)
    823 DO_3SAME_BITSEL(VBIF, rm_ofs, rd_ofs, rn_ofs)
    824 
    825 #define DO_3SAME_NO_SZ_3(INSN, FUNC)                                    \
    826     static bool trans_##INSN##_3s(DisasContext *s, arg_3same *a)        \
    827     {                                                                   \
    828         if (a->size == 3) {                                             \
    829             return false;                                               \
    830         }                                                               \
    831         return do_3same(s, a, FUNC);                                    \
    832     }
    833 
    834 DO_3SAME_NO_SZ_3(VMAX_S, tcg_gen_gvec_smax)
    835 DO_3SAME_NO_SZ_3(VMAX_U, tcg_gen_gvec_umax)
    836 DO_3SAME_NO_SZ_3(VMIN_S, tcg_gen_gvec_smin)
    837 DO_3SAME_NO_SZ_3(VMIN_U, tcg_gen_gvec_umin)
    838 DO_3SAME_NO_SZ_3(VMUL, tcg_gen_gvec_mul)
    839 DO_3SAME_NO_SZ_3(VMLA, gen_gvec_mla)
    840 DO_3SAME_NO_SZ_3(VMLS, gen_gvec_mls)
    841 DO_3SAME_NO_SZ_3(VTST, gen_gvec_cmtst)
    842 DO_3SAME_NO_SZ_3(VABD_S, gen_gvec_sabd)
    843 DO_3SAME_NO_SZ_3(VABA_S, gen_gvec_saba)
    844 DO_3SAME_NO_SZ_3(VABD_U, gen_gvec_uabd)
    845 DO_3SAME_NO_SZ_3(VABA_U, gen_gvec_uaba)
    846 
    847 #define DO_3SAME_CMP(INSN, COND)                                        \
    848     static void gen_##INSN##_3s(unsigned vece, uint32_t rd_ofs,         \
    849                                 uint32_t rn_ofs, uint32_t rm_ofs,       \
    850                                 uint32_t oprsz, uint32_t maxsz)         \
    851     {                                                                   \
    852         tcg_gen_gvec_cmp(COND, vece, rd_ofs, rn_ofs, rm_ofs, oprsz, maxsz); \
    853     }                                                                   \
    854     DO_3SAME_NO_SZ_3(INSN, gen_##INSN##_3s)
    855 
    856 DO_3SAME_CMP(VCGT_S, TCG_COND_GT)
    857 DO_3SAME_CMP(VCGT_U, TCG_COND_GTU)
    858 DO_3SAME_CMP(VCGE_S, TCG_COND_GE)
    859 DO_3SAME_CMP(VCGE_U, TCG_COND_GEU)
    860 DO_3SAME_CMP(VCEQ, TCG_COND_EQ)
    861 
    862 #define WRAP_OOL_FN(WRAPNAME, FUNC)                                        \
    863     static void WRAPNAME(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,  \
    864                          uint32_t rm_ofs, uint32_t oprsz, uint32_t maxsz)  \
    865     {                                                                      \
    866         tcg_gen_gvec_3_ool(rd_ofs, rn_ofs, rm_ofs, oprsz, maxsz, 0, FUNC); \
    867     }
    868 
    869 WRAP_OOL_FN(gen_VMUL_p_3s, gen_helper_gvec_pmul_b)
    870 
    871 static bool trans_VMUL_p_3s(DisasContext *s, arg_3same *a)
    872 {
    873     if (a->size != 0) {
    874         return false;
    875     }
    876     return do_3same(s, a, gen_VMUL_p_3s);
    877 }
    878 
    879 #define DO_VQRDMLAH(INSN, FUNC)                                         \
    880     static bool trans_##INSN##_3s(DisasContext *s, arg_3same *a)        \
    881     {                                                                   \
    882         if (!dc_isar_feature(aa32_rdm, s)) {                            \
    883             return false;                                               \
    884         }                                                               \
    885         if (a->size != 1 && a->size != 2) {                             \
    886             return false;                                               \
    887         }                                                               \
    888         return do_3same(s, a, FUNC);                                    \
    889     }
    890 
    891 DO_VQRDMLAH(VQRDMLAH, gen_gvec_sqrdmlah_qc)
    892 DO_VQRDMLAH(VQRDMLSH, gen_gvec_sqrdmlsh_qc)
    893 
    894 #define DO_SHA1(NAME, FUNC)                                             \
    895     WRAP_OOL_FN(gen_##NAME##_3s, FUNC)                                  \
    896     static bool trans_##NAME##_3s(DisasContext *s, arg_3same *a)        \
    897     {                                                                   \
    898         if (!dc_isar_feature(aa32_sha1, s)) {                           \
    899             return false;                                               \
    900         }                                                               \
    901         return do_3same(s, a, gen_##NAME##_3s);                         \
    902     }
    903 
    904 DO_SHA1(SHA1C, gen_helper_crypto_sha1c)
    905 DO_SHA1(SHA1P, gen_helper_crypto_sha1p)
    906 DO_SHA1(SHA1M, gen_helper_crypto_sha1m)
    907 DO_SHA1(SHA1SU0, gen_helper_crypto_sha1su0)
    908 
    909 #define DO_SHA2(NAME, FUNC)                                             \
    910     WRAP_OOL_FN(gen_##NAME##_3s, FUNC)                                  \
    911     static bool trans_##NAME##_3s(DisasContext *s, arg_3same *a)        \
    912     {                                                                   \
    913         if (!dc_isar_feature(aa32_sha2, s)) {                           \
    914             return false;                                               \
    915         }                                                               \
    916         return do_3same(s, a, gen_##NAME##_3s);                         \
    917     }
    918 
    919 DO_SHA2(SHA256H, gen_helper_crypto_sha256h)
    920 DO_SHA2(SHA256H2, gen_helper_crypto_sha256h2)
    921 DO_SHA2(SHA256SU1, gen_helper_crypto_sha256su1)
    922 
    923 #define DO_3SAME_64(INSN, FUNC)                                         \
    924     static void gen_##INSN##_3s(unsigned vece, uint32_t rd_ofs,         \
    925                                 uint32_t rn_ofs, uint32_t rm_ofs,       \
    926                                 uint32_t oprsz, uint32_t maxsz)         \
    927     {                                                                   \
    928         static const GVecGen3 op = { .fni8 = FUNC };                    \
    929         tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, oprsz, maxsz, &op);      \
    930     }                                                                   \
    931     DO_3SAME(INSN, gen_##INSN##_3s)
    932 
    933 #define DO_3SAME_64_ENV(INSN, FUNC)                                     \
    934     static void gen_##INSN##_elt(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m)    \
    935     {                                                                   \
    936         FUNC(d, cpu_env, n, m);                                         \
    937     }                                                                   \
    938     DO_3SAME_64(INSN, gen_##INSN##_elt)
    939 
    940 DO_3SAME_64(VRSHL_S64, gen_helper_neon_rshl_s64)
    941 DO_3SAME_64(VRSHL_U64, gen_helper_neon_rshl_u64)
    942 DO_3SAME_64_ENV(VQSHL_S64, gen_helper_neon_qshl_s64)
    943 DO_3SAME_64_ENV(VQSHL_U64, gen_helper_neon_qshl_u64)
    944 DO_3SAME_64_ENV(VQRSHL_S64, gen_helper_neon_qrshl_s64)
    945 DO_3SAME_64_ENV(VQRSHL_U64, gen_helper_neon_qrshl_u64)
    946 
    947 #define DO_3SAME_32(INSN, FUNC)                                         \
    948     static void gen_##INSN##_3s(unsigned vece, uint32_t rd_ofs,         \
    949                                 uint32_t rn_ofs, uint32_t rm_ofs,       \
    950                                 uint32_t oprsz, uint32_t maxsz)         \
    951     {                                                                   \
    952         static const GVecGen3 ops[4] = {                                \
    953             { .fni4 = gen_helper_neon_##FUNC##8 },                      \
    954             { .fni4 = gen_helper_neon_##FUNC##16 },                     \
    955             { .fni4 = gen_helper_neon_##FUNC##32 },                     \
    956             { 0 },                                                      \
    957         };                                                              \
    958         tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, oprsz, maxsz, &ops[vece]); \
    959     }                                                                   \
    960     static bool trans_##INSN##_3s(DisasContext *s, arg_3same *a)        \
    961     {                                                                   \
    962         if (a->size > 2) {                                              \
    963             return false;                                               \
    964         }                                                               \
    965         return do_3same(s, a, gen_##INSN##_3s);                         \
    966     }
    967 
    968 /*
    969  * Some helper functions need to be passed the cpu_env. In order
    970  * to use those with the gvec APIs like tcg_gen_gvec_3() we need
    971  * to create wrapper functions whose prototype is a NeonGenTwoOpFn()
    972  * and which call a NeonGenTwoOpEnvFn().
    973  */
    974 #define WRAP_ENV_FN(WRAPNAME, FUNC)                                     \
    975     static void WRAPNAME(TCGv_i32 d, TCGv_i32 n, TCGv_i32 m)            \
    976     {                                                                   \
    977         FUNC(d, cpu_env, n, m);                                         \
    978     }
    979 
    980 #define DO_3SAME_32_ENV(INSN, FUNC)                                     \
    981     WRAP_ENV_FN(gen_##INSN##_tramp8, gen_helper_neon_##FUNC##8);        \
    982     WRAP_ENV_FN(gen_##INSN##_tramp16, gen_helper_neon_##FUNC##16);      \
    983     WRAP_ENV_FN(gen_##INSN##_tramp32, gen_helper_neon_##FUNC##32);      \
    984     static void gen_##INSN##_3s(unsigned vece, uint32_t rd_ofs,         \
    985                                 uint32_t rn_ofs, uint32_t rm_ofs,       \
    986                                 uint32_t oprsz, uint32_t maxsz)         \
    987     {                                                                   \
    988         static const GVecGen3 ops[4] = {                                \
    989             { .fni4 = gen_##INSN##_tramp8 },                            \
    990             { .fni4 = gen_##INSN##_tramp16 },                           \
    991             { .fni4 = gen_##INSN##_tramp32 },                           \
    992             { 0 },                                                      \
    993         };                                                              \
    994         tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, oprsz, maxsz, &ops[vece]); \
    995     }                                                                   \
    996     static bool trans_##INSN##_3s(DisasContext *s, arg_3same *a)        \
    997     {                                                                   \
    998         if (a->size > 2) {                                              \
    999             return false;                                               \
   1000         }                                                               \
   1001         return do_3same(s, a, gen_##INSN##_3s);                         \
   1002     }
   1003 
   1004 DO_3SAME_32(VHADD_S, hadd_s)
   1005 DO_3SAME_32(VHADD_U, hadd_u)
   1006 DO_3SAME_32(VHSUB_S, hsub_s)
   1007 DO_3SAME_32(VHSUB_U, hsub_u)
   1008 DO_3SAME_32(VRHADD_S, rhadd_s)
   1009 DO_3SAME_32(VRHADD_U, rhadd_u)
   1010 DO_3SAME_32(VRSHL_S, rshl_s)
   1011 DO_3SAME_32(VRSHL_U, rshl_u)
   1012 
   1013 DO_3SAME_32_ENV(VQSHL_S, qshl_s)
   1014 DO_3SAME_32_ENV(VQSHL_U, qshl_u)
   1015 DO_3SAME_32_ENV(VQRSHL_S, qrshl_s)
   1016 DO_3SAME_32_ENV(VQRSHL_U, qrshl_u)
   1017 
   1018 static bool do_3same_pair(DisasContext *s, arg_3same *a, NeonGenTwoOpFn *fn)
   1019 {
   1020     /* Operations handled pairwise 32 bits at a time */
   1021     TCGv_i32 tmp, tmp2, tmp3;
   1022 
   1023     if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
   1024         return false;
   1025     }
   1026 
   1027     /* UNDEF accesses to D16-D31 if they don't exist. */
   1028     if (!dc_isar_feature(aa32_simd_r32, s) &&
   1029         ((a->vd | a->vn | a->vm) & 0x10)) {
   1030         return false;
   1031     }
   1032 
   1033     if (a->size == 3) {
   1034         return false;
   1035     }
   1036 
   1037     if (!vfp_access_check(s)) {
   1038         return true;
   1039     }
   1040 
   1041     assert(a->q == 0); /* enforced by decode patterns */
   1042 
   1043     /*
   1044      * Note that we have to be careful not to clobber the source operands
   1045      * in the "vm == vd" case by storing the result of the first pass too
   1046      * early. Since Q is 0 there are always just two passes, so instead
   1047      * of a complicated loop over each pass we just unroll.
   1048      */
   1049     tmp = tcg_temp_new_i32();
   1050     tmp2 = tcg_temp_new_i32();
   1051     tmp3 = tcg_temp_new_i32();
   1052 
   1053     read_neon_element32(tmp, a->vn, 0, MO_32);
   1054     read_neon_element32(tmp2, a->vn, 1, MO_32);
   1055     fn(tmp, tmp, tmp2);
   1056 
   1057     read_neon_element32(tmp3, a->vm, 0, MO_32);
   1058     read_neon_element32(tmp2, a->vm, 1, MO_32);
   1059     fn(tmp3, tmp3, tmp2);
   1060 
   1061     write_neon_element32(tmp, a->vd, 0, MO_32);
   1062     write_neon_element32(tmp3, a->vd, 1, MO_32);
   1063 
   1064     tcg_temp_free_i32(tmp);
   1065     tcg_temp_free_i32(tmp2);
   1066     tcg_temp_free_i32(tmp3);
   1067     return true;
   1068 }
   1069 
   1070 #define DO_3SAME_PAIR(INSN, func)                                       \
   1071     static bool trans_##INSN##_3s(DisasContext *s, arg_3same *a)        \
   1072     {                                                                   \
   1073         static NeonGenTwoOpFn * const fns[] = {                         \
   1074             gen_helper_neon_##func##8,                                  \
   1075             gen_helper_neon_##func##16,                                 \
   1076             gen_helper_neon_##func##32,                                 \
   1077         };                                                              \
   1078         if (a->size > 2) {                                              \
   1079             return false;                                               \
   1080         }                                                               \
   1081         return do_3same_pair(s, a, fns[a->size]);                       \
   1082     }
   1083 
   1084 /* 32-bit pairwise ops end up the same as the elementwise versions.  */
   1085 #define gen_helper_neon_pmax_s32  tcg_gen_smax_i32
   1086 #define gen_helper_neon_pmax_u32  tcg_gen_umax_i32
   1087 #define gen_helper_neon_pmin_s32  tcg_gen_smin_i32
   1088 #define gen_helper_neon_pmin_u32  tcg_gen_umin_i32
   1089 #define gen_helper_neon_padd_u32  tcg_gen_add_i32
   1090 
   1091 DO_3SAME_PAIR(VPMAX_S, pmax_s)
   1092 DO_3SAME_PAIR(VPMIN_S, pmin_s)
   1093 DO_3SAME_PAIR(VPMAX_U, pmax_u)
   1094 DO_3SAME_PAIR(VPMIN_U, pmin_u)
   1095 DO_3SAME_PAIR(VPADD, padd_u)
   1096 
   1097 #define DO_3SAME_VQDMULH(INSN, FUNC)                                    \
   1098     WRAP_ENV_FN(gen_##INSN##_tramp16, gen_helper_neon_##FUNC##_s16);    \
   1099     WRAP_ENV_FN(gen_##INSN##_tramp32, gen_helper_neon_##FUNC##_s32);    \
   1100     static void gen_##INSN##_3s(unsigned vece, uint32_t rd_ofs,         \
   1101                                 uint32_t rn_ofs, uint32_t rm_ofs,       \
   1102                                 uint32_t oprsz, uint32_t maxsz)         \
   1103     {                                                                   \
   1104         static const GVecGen3 ops[2] = {                                \
   1105             { .fni4 = gen_##INSN##_tramp16 },                           \
   1106             { .fni4 = gen_##INSN##_tramp32 },                           \
   1107         };                                                              \
   1108         tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, oprsz, maxsz, &ops[vece - 1]); \
   1109     }                                                                   \
   1110     static bool trans_##INSN##_3s(DisasContext *s, arg_3same *a)        \
   1111     {                                                                   \
   1112         if (a->size != 1 && a->size != 2) {                             \
   1113             return false;                                               \
   1114         }                                                               \
   1115         return do_3same(s, a, gen_##INSN##_3s);                         \
   1116     }
   1117 
   1118 DO_3SAME_VQDMULH(VQDMULH, qdmulh)
   1119 DO_3SAME_VQDMULH(VQRDMULH, qrdmulh)
   1120 
   1121 #define WRAP_FP_GVEC(WRAPNAME, FPST, FUNC)                              \
   1122     static void WRAPNAME(unsigned vece, uint32_t rd_ofs,                \
   1123                          uint32_t rn_ofs, uint32_t rm_ofs,              \
   1124                          uint32_t oprsz, uint32_t maxsz)                \
   1125     {                                                                   \
   1126         TCGv_ptr fpst = fpstatus_ptr(FPST);                             \
   1127         tcg_gen_gvec_3_ptr(rd_ofs, rn_ofs, rm_ofs, fpst,                \
   1128                            oprsz, maxsz, 0, FUNC);                      \
   1129         tcg_temp_free_ptr(fpst);                                        \
   1130     }
   1131 
   1132 #define DO_3S_FP_GVEC(INSN,SFUNC,HFUNC)                                 \
   1133     WRAP_FP_GVEC(gen_##INSN##_fp32_3s, FPST_STD, SFUNC)                 \
   1134     WRAP_FP_GVEC(gen_##INSN##_fp16_3s, FPST_STD_F16, HFUNC)             \
   1135     static bool trans_##INSN##_fp_3s(DisasContext *s, arg_3same *a)     \
   1136     {                                                                   \
   1137         if (a->size == MO_16) {                                         \
   1138             if (!dc_isar_feature(aa32_fp16_arith, s)) {                 \
   1139                 return false;                                           \
   1140             }                                                           \
   1141             return do_3same(s, a, gen_##INSN##_fp16_3s);                \
   1142         }                                                               \
   1143         return do_3same(s, a, gen_##INSN##_fp32_3s);                    \
   1144     }
   1145 
   1146 
   1147 DO_3S_FP_GVEC(VADD, gen_helper_gvec_fadd_s, gen_helper_gvec_fadd_h)
   1148 DO_3S_FP_GVEC(VSUB, gen_helper_gvec_fsub_s, gen_helper_gvec_fsub_h)
   1149 DO_3S_FP_GVEC(VABD, gen_helper_gvec_fabd_s, gen_helper_gvec_fabd_h)
   1150 DO_3S_FP_GVEC(VMUL, gen_helper_gvec_fmul_s, gen_helper_gvec_fmul_h)
   1151 DO_3S_FP_GVEC(VCEQ, gen_helper_gvec_fceq_s, gen_helper_gvec_fceq_h)
   1152 DO_3S_FP_GVEC(VCGE, gen_helper_gvec_fcge_s, gen_helper_gvec_fcge_h)
   1153 DO_3S_FP_GVEC(VCGT, gen_helper_gvec_fcgt_s, gen_helper_gvec_fcgt_h)
   1154 DO_3S_FP_GVEC(VACGE, gen_helper_gvec_facge_s, gen_helper_gvec_facge_h)
   1155 DO_3S_FP_GVEC(VACGT, gen_helper_gvec_facgt_s, gen_helper_gvec_facgt_h)
   1156 DO_3S_FP_GVEC(VMAX, gen_helper_gvec_fmax_s, gen_helper_gvec_fmax_h)
   1157 DO_3S_FP_GVEC(VMIN, gen_helper_gvec_fmin_s, gen_helper_gvec_fmin_h)
   1158 DO_3S_FP_GVEC(VMLA, gen_helper_gvec_fmla_s, gen_helper_gvec_fmla_h)
   1159 DO_3S_FP_GVEC(VMLS, gen_helper_gvec_fmls_s, gen_helper_gvec_fmls_h)
   1160 DO_3S_FP_GVEC(VFMA, gen_helper_gvec_vfma_s, gen_helper_gvec_vfma_h)
   1161 DO_3S_FP_GVEC(VFMS, gen_helper_gvec_vfms_s, gen_helper_gvec_vfms_h)
   1162 DO_3S_FP_GVEC(VRECPS, gen_helper_gvec_recps_nf_s, gen_helper_gvec_recps_nf_h)
   1163 DO_3S_FP_GVEC(VRSQRTS, gen_helper_gvec_rsqrts_nf_s, gen_helper_gvec_rsqrts_nf_h)
   1164 
   1165 WRAP_FP_GVEC(gen_VMAXNM_fp32_3s, FPST_STD, gen_helper_gvec_fmaxnum_s)
   1166 WRAP_FP_GVEC(gen_VMAXNM_fp16_3s, FPST_STD_F16, gen_helper_gvec_fmaxnum_h)
   1167 WRAP_FP_GVEC(gen_VMINNM_fp32_3s, FPST_STD, gen_helper_gvec_fminnum_s)
   1168 WRAP_FP_GVEC(gen_VMINNM_fp16_3s, FPST_STD_F16, gen_helper_gvec_fminnum_h)
   1169 
   1170 static bool trans_VMAXNM_fp_3s(DisasContext *s, arg_3same *a)
   1171 {
   1172     if (!arm_dc_feature(s, ARM_FEATURE_V8)) {
   1173         return false;
   1174     }
   1175 
   1176     if (a->size == MO_16) {
   1177         if (!dc_isar_feature(aa32_fp16_arith, s)) {
   1178             return false;
   1179         }
   1180         return do_3same(s, a, gen_VMAXNM_fp16_3s);
   1181     }
   1182     return do_3same(s, a, gen_VMAXNM_fp32_3s);
   1183 }
   1184 
   1185 static bool trans_VMINNM_fp_3s(DisasContext *s, arg_3same *a)
   1186 {
   1187     if (!arm_dc_feature(s, ARM_FEATURE_V8)) {
   1188         return false;
   1189     }
   1190 
   1191     if (a->size == MO_16) {
   1192         if (!dc_isar_feature(aa32_fp16_arith, s)) {
   1193             return false;
   1194         }
   1195         return do_3same(s, a, gen_VMINNM_fp16_3s);
   1196     }
   1197     return do_3same(s, a, gen_VMINNM_fp32_3s);
   1198 }
   1199 
   1200 static bool do_3same_fp_pair(DisasContext *s, arg_3same *a,
   1201                              gen_helper_gvec_3_ptr *fn)
   1202 {
   1203     /* FP pairwise operations */
   1204     TCGv_ptr fpstatus;
   1205 
   1206     if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
   1207         return false;
   1208     }
   1209 
   1210     /* UNDEF accesses to D16-D31 if they don't exist. */
   1211     if (!dc_isar_feature(aa32_simd_r32, s) &&
   1212         ((a->vd | a->vn | a->vm) & 0x10)) {
   1213         return false;
   1214     }
   1215 
   1216     if (!vfp_access_check(s)) {
   1217         return true;
   1218     }
   1219 
   1220     assert(a->q == 0); /* enforced by decode patterns */
   1221 
   1222 
   1223     fpstatus = fpstatus_ptr(a->size == MO_16 ? FPST_STD_F16 : FPST_STD);
   1224     tcg_gen_gvec_3_ptr(vfp_reg_offset(1, a->vd),
   1225                        vfp_reg_offset(1, a->vn),
   1226                        vfp_reg_offset(1, a->vm),
   1227                        fpstatus, 8, 8, 0, fn);
   1228     tcg_temp_free_ptr(fpstatus);
   1229 
   1230     return true;
   1231 }
   1232 
   1233 /*
   1234  * For all the functions using this macro, size == 1 means fp16,
   1235  * which is an architecture extension we don't implement yet.
   1236  */
   1237 #define DO_3S_FP_PAIR(INSN,FUNC)                                    \
   1238     static bool trans_##INSN##_fp_3s(DisasContext *s, arg_3same *a) \
   1239     {                                                               \
   1240         if (a->size == MO_16) {                                     \
   1241             if (!dc_isar_feature(aa32_fp16_arith, s)) {             \
   1242                 return false;                                       \
   1243             }                                                       \
   1244             return do_3same_fp_pair(s, a, FUNC##h);                 \
   1245         }                                                           \
   1246         return do_3same_fp_pair(s, a, FUNC##s);                     \
   1247     }
   1248 
   1249 DO_3S_FP_PAIR(VPADD, gen_helper_neon_padd)
   1250 DO_3S_FP_PAIR(VPMAX, gen_helper_neon_pmax)
   1251 DO_3S_FP_PAIR(VPMIN, gen_helper_neon_pmin)
   1252 
   1253 static bool do_vector_2sh(DisasContext *s, arg_2reg_shift *a, GVecGen2iFn *fn)
   1254 {
   1255     /* Handle a 2-reg-shift insn which can be vectorized. */
   1256     int vec_size = a->q ? 16 : 8;
   1257     int rd_ofs = neon_full_reg_offset(a->vd);
   1258     int rm_ofs = neon_full_reg_offset(a->vm);
   1259 
   1260     if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
   1261         return false;
   1262     }
   1263 
   1264     /* UNDEF accesses to D16-D31 if they don't exist. */
   1265     if (!dc_isar_feature(aa32_simd_r32, s) &&
   1266         ((a->vd | a->vm) & 0x10)) {
   1267         return false;
   1268     }
   1269 
   1270     if ((a->vm | a->vd) & a->q) {
   1271         return false;
   1272     }
   1273 
   1274     if (!vfp_access_check(s)) {
   1275         return true;
   1276     }
   1277 
   1278     fn(a->size, rd_ofs, rm_ofs, a->shift, vec_size, vec_size);
   1279     return true;
   1280 }
   1281 
   1282 #define DO_2SH(INSN, FUNC)                                              \
   1283     static bool trans_##INSN##_2sh(DisasContext *s, arg_2reg_shift *a)  \
   1284     {                                                                   \
   1285         return do_vector_2sh(s, a, FUNC);                               \
   1286     }                                                                   \
   1287 
   1288 DO_2SH(VSHL, tcg_gen_gvec_shli)
   1289 DO_2SH(VSLI, gen_gvec_sli)
   1290 DO_2SH(VSRI, gen_gvec_sri)
   1291 DO_2SH(VSRA_S, gen_gvec_ssra)
   1292 DO_2SH(VSRA_U, gen_gvec_usra)
   1293 DO_2SH(VRSHR_S, gen_gvec_srshr)
   1294 DO_2SH(VRSHR_U, gen_gvec_urshr)
   1295 DO_2SH(VRSRA_S, gen_gvec_srsra)
   1296 DO_2SH(VRSRA_U, gen_gvec_ursra)
   1297 
   1298 static bool trans_VSHR_S_2sh(DisasContext *s, arg_2reg_shift *a)
   1299 {
   1300     /* Signed shift out of range results in all-sign-bits */
   1301     a->shift = MIN(a->shift, (8 << a->size) - 1);
   1302     return do_vector_2sh(s, a, tcg_gen_gvec_sari);
   1303 }
   1304 
   1305 static void gen_zero_rd_2sh(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
   1306                             int64_t shift, uint32_t oprsz, uint32_t maxsz)
   1307 {
   1308     tcg_gen_gvec_dup_imm(vece, rd_ofs, oprsz, maxsz, 0);
   1309 }
   1310 
   1311 static bool trans_VSHR_U_2sh(DisasContext *s, arg_2reg_shift *a)
   1312 {
   1313     /* Shift out of range is architecturally valid and results in zero. */
   1314     if (a->shift >= (8 << a->size)) {
   1315         return do_vector_2sh(s, a, gen_zero_rd_2sh);
   1316     } else {
   1317         return do_vector_2sh(s, a, tcg_gen_gvec_shri);
   1318     }
   1319 }
   1320 
   1321 static bool do_2shift_env_64(DisasContext *s, arg_2reg_shift *a,
   1322                              NeonGenTwo64OpEnvFn *fn)
   1323 {
   1324     /*
   1325      * 2-reg-and-shift operations, size == 3 case, where the
   1326      * function needs to be passed cpu_env.
   1327      */
   1328     TCGv_i64 constimm;
   1329     int pass;
   1330 
   1331     if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
   1332         return false;
   1333     }
   1334 
   1335     /* UNDEF accesses to D16-D31 if they don't exist. */
   1336     if (!dc_isar_feature(aa32_simd_r32, s) &&
   1337         ((a->vd | a->vm) & 0x10)) {
   1338         return false;
   1339     }
   1340 
   1341     if ((a->vm | a->vd) & a->q) {
   1342         return false;
   1343     }
   1344 
   1345     if (!vfp_access_check(s)) {
   1346         return true;
   1347     }
   1348 
   1349     /*
   1350      * To avoid excessive duplication of ops we implement shift
   1351      * by immediate using the variable shift operations.
   1352      */
   1353     constimm = tcg_constant_i64(dup_const(a->size, a->shift));
   1354 
   1355     for (pass = 0; pass < a->q + 1; pass++) {
   1356         TCGv_i64 tmp = tcg_temp_new_i64();
   1357 
   1358         read_neon_element64(tmp, a->vm, pass, MO_64);
   1359         fn(tmp, cpu_env, tmp, constimm);
   1360         write_neon_element64(tmp, a->vd, pass, MO_64);
   1361         tcg_temp_free_i64(tmp);
   1362     }
   1363     return true;
   1364 }
   1365 
   1366 static bool do_2shift_env_32(DisasContext *s, arg_2reg_shift *a,
   1367                              NeonGenTwoOpEnvFn *fn)
   1368 {
   1369     /*
   1370      * 2-reg-and-shift operations, size < 3 case, where the
   1371      * helper needs to be passed cpu_env.
   1372      */
   1373     TCGv_i32 constimm, tmp;
   1374     int pass;
   1375 
   1376     if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
   1377         return false;
   1378     }
   1379 
   1380     /* UNDEF accesses to D16-D31 if they don't exist. */
   1381     if (!dc_isar_feature(aa32_simd_r32, s) &&
   1382         ((a->vd | a->vm) & 0x10)) {
   1383         return false;
   1384     }
   1385 
   1386     if ((a->vm | a->vd) & a->q) {
   1387         return false;
   1388     }
   1389 
   1390     if (!vfp_access_check(s)) {
   1391         return true;
   1392     }
   1393 
   1394     /*
   1395      * To avoid excessive duplication of ops we implement shift
   1396      * by immediate using the variable shift operations.
   1397      */
   1398     constimm = tcg_constant_i32(dup_const(a->size, a->shift));
   1399     tmp = tcg_temp_new_i32();
   1400 
   1401     for (pass = 0; pass < (a->q ? 4 : 2); pass++) {
   1402         read_neon_element32(tmp, a->vm, pass, MO_32);
   1403         fn(tmp, cpu_env, tmp, constimm);
   1404         write_neon_element32(tmp, a->vd, pass, MO_32);
   1405     }
   1406     tcg_temp_free_i32(tmp);
   1407     return true;
   1408 }
   1409 
   1410 #define DO_2SHIFT_ENV(INSN, FUNC)                                       \
   1411     static bool trans_##INSN##_64_2sh(DisasContext *s, arg_2reg_shift *a) \
   1412     {                                                                   \
   1413         return do_2shift_env_64(s, a, gen_helper_neon_##FUNC##64);      \
   1414     }                                                                   \
   1415     static bool trans_##INSN##_2sh(DisasContext *s, arg_2reg_shift *a)  \
   1416     {                                                                   \
   1417         static NeonGenTwoOpEnvFn * const fns[] = {                      \
   1418             gen_helper_neon_##FUNC##8,                                  \
   1419             gen_helper_neon_##FUNC##16,                                 \
   1420             gen_helper_neon_##FUNC##32,                                 \
   1421         };                                                              \
   1422         assert(a->size < ARRAY_SIZE(fns));                              \
   1423         return do_2shift_env_32(s, a, fns[a->size]);                    \
   1424     }
   1425 
   1426 DO_2SHIFT_ENV(VQSHLU, qshlu_s)
   1427 DO_2SHIFT_ENV(VQSHL_U, qshl_u)
   1428 DO_2SHIFT_ENV(VQSHL_S, qshl_s)
   1429 
   1430 static bool do_2shift_narrow_64(DisasContext *s, arg_2reg_shift *a,
   1431                                 NeonGenTwo64OpFn *shiftfn,
   1432                                 NeonGenNarrowEnvFn *narrowfn)
   1433 {
   1434     /* 2-reg-and-shift narrowing-shift operations, size == 3 case */
   1435     TCGv_i64 constimm, rm1, rm2;
   1436     TCGv_i32 rd;
   1437 
   1438     if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
   1439         return false;
   1440     }
   1441 
   1442     /* UNDEF accesses to D16-D31 if they don't exist. */
   1443     if (!dc_isar_feature(aa32_simd_r32, s) &&
   1444         ((a->vd | a->vm) & 0x10)) {
   1445         return false;
   1446     }
   1447 
   1448     if (a->vm & 1) {
   1449         return false;
   1450     }
   1451 
   1452     if (!vfp_access_check(s)) {
   1453         return true;
   1454     }
   1455 
   1456     /*
   1457      * This is always a right shift, and the shiftfn is always a
   1458      * left-shift helper, which thus needs the negated shift count.
   1459      */
   1460     constimm = tcg_constant_i64(-a->shift);
   1461     rm1 = tcg_temp_new_i64();
   1462     rm2 = tcg_temp_new_i64();
   1463     rd = tcg_temp_new_i32();
   1464 
   1465     /* Load both inputs first to avoid potential overwrite if rm == rd */
   1466     read_neon_element64(rm1, a->vm, 0, MO_64);
   1467     read_neon_element64(rm2, a->vm, 1, MO_64);
   1468 
   1469     shiftfn(rm1, rm1, constimm);
   1470     narrowfn(rd, cpu_env, rm1);
   1471     write_neon_element32(rd, a->vd, 0, MO_32);
   1472 
   1473     shiftfn(rm2, rm2, constimm);
   1474     narrowfn(rd, cpu_env, rm2);
   1475     write_neon_element32(rd, a->vd, 1, MO_32);
   1476 
   1477     tcg_temp_free_i32(rd);
   1478     tcg_temp_free_i64(rm1);
   1479     tcg_temp_free_i64(rm2);
   1480 
   1481     return true;
   1482 }
   1483 
   1484 static bool do_2shift_narrow_32(DisasContext *s, arg_2reg_shift *a,
   1485                                 NeonGenTwoOpFn *shiftfn,
   1486                                 NeonGenNarrowEnvFn *narrowfn)
   1487 {
   1488     /* 2-reg-and-shift narrowing-shift operations, size < 3 case */
   1489     TCGv_i32 constimm, rm1, rm2, rm3, rm4;
   1490     TCGv_i64 rtmp;
   1491     uint32_t imm;
   1492 
   1493     if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
   1494         return false;
   1495     }
   1496 
   1497     /* UNDEF accesses to D16-D31 if they don't exist. */
   1498     if (!dc_isar_feature(aa32_simd_r32, s) &&
   1499         ((a->vd | a->vm) & 0x10)) {
   1500         return false;
   1501     }
   1502 
   1503     if (a->vm & 1) {
   1504         return false;
   1505     }
   1506 
   1507     if (!vfp_access_check(s)) {
   1508         return true;
   1509     }
   1510 
   1511     /*
   1512      * This is always a right shift, and the shiftfn is always a
   1513      * left-shift helper, which thus needs the negated shift count
   1514      * duplicated into each lane of the immediate value.
   1515      */
   1516     if (a->size == 1) {
   1517         imm = (uint16_t)(-a->shift);
   1518         imm |= imm << 16;
   1519     } else {
   1520         /* size == 2 */
   1521         imm = -a->shift;
   1522     }
   1523     constimm = tcg_constant_i32(imm);
   1524 
   1525     /* Load all inputs first to avoid potential overwrite */
   1526     rm1 = tcg_temp_new_i32();
   1527     rm2 = tcg_temp_new_i32();
   1528     rm3 = tcg_temp_new_i32();
   1529     rm4 = tcg_temp_new_i32();
   1530     read_neon_element32(rm1, a->vm, 0, MO_32);
   1531     read_neon_element32(rm2, a->vm, 1, MO_32);
   1532     read_neon_element32(rm3, a->vm, 2, MO_32);
   1533     read_neon_element32(rm4, a->vm, 3, MO_32);
   1534     rtmp = tcg_temp_new_i64();
   1535 
   1536     shiftfn(rm1, rm1, constimm);
   1537     shiftfn(rm2, rm2, constimm);
   1538 
   1539     tcg_gen_concat_i32_i64(rtmp, rm1, rm2);
   1540     tcg_temp_free_i32(rm2);
   1541 
   1542     narrowfn(rm1, cpu_env, rtmp);
   1543     write_neon_element32(rm1, a->vd, 0, MO_32);
   1544     tcg_temp_free_i32(rm1);
   1545 
   1546     shiftfn(rm3, rm3, constimm);
   1547     shiftfn(rm4, rm4, constimm);
   1548 
   1549     tcg_gen_concat_i32_i64(rtmp, rm3, rm4);
   1550     tcg_temp_free_i32(rm4);
   1551 
   1552     narrowfn(rm3, cpu_env, rtmp);
   1553     tcg_temp_free_i64(rtmp);
   1554     write_neon_element32(rm3, a->vd, 1, MO_32);
   1555     tcg_temp_free_i32(rm3);
   1556     return true;
   1557 }
   1558 
   1559 #define DO_2SN_64(INSN, FUNC, NARROWFUNC)                               \
   1560     static bool trans_##INSN##_2sh(DisasContext *s, arg_2reg_shift *a)  \
   1561     {                                                                   \
   1562         return do_2shift_narrow_64(s, a, FUNC, NARROWFUNC);             \
   1563     }
   1564 #define DO_2SN_32(INSN, FUNC, NARROWFUNC)                               \
   1565     static bool trans_##INSN##_2sh(DisasContext *s, arg_2reg_shift *a)  \
   1566     {                                                                   \
   1567         return do_2shift_narrow_32(s, a, FUNC, NARROWFUNC);             \
   1568     }
   1569 
   1570 static void gen_neon_narrow_u32(TCGv_i32 dest, TCGv_ptr env, TCGv_i64 src)
   1571 {
   1572     tcg_gen_extrl_i64_i32(dest, src);
   1573 }
   1574 
   1575 static void gen_neon_narrow_u16(TCGv_i32 dest, TCGv_ptr env, TCGv_i64 src)
   1576 {
   1577     gen_helper_neon_narrow_u16(dest, src);
   1578 }
   1579 
   1580 static void gen_neon_narrow_u8(TCGv_i32 dest, TCGv_ptr env, TCGv_i64 src)
   1581 {
   1582     gen_helper_neon_narrow_u8(dest, src);
   1583 }
   1584 
   1585 DO_2SN_64(VSHRN_64, gen_ushl_i64, gen_neon_narrow_u32)
   1586 DO_2SN_32(VSHRN_32, gen_ushl_i32, gen_neon_narrow_u16)
   1587 DO_2SN_32(VSHRN_16, gen_helper_neon_shl_u16, gen_neon_narrow_u8)
   1588 
   1589 DO_2SN_64(VRSHRN_64, gen_helper_neon_rshl_u64, gen_neon_narrow_u32)
   1590 DO_2SN_32(VRSHRN_32, gen_helper_neon_rshl_u32, gen_neon_narrow_u16)
   1591 DO_2SN_32(VRSHRN_16, gen_helper_neon_rshl_u16, gen_neon_narrow_u8)
   1592 
   1593 DO_2SN_64(VQSHRUN_64, gen_sshl_i64, gen_helper_neon_unarrow_sat32)
   1594 DO_2SN_32(VQSHRUN_32, gen_sshl_i32, gen_helper_neon_unarrow_sat16)
   1595 DO_2SN_32(VQSHRUN_16, gen_helper_neon_shl_s16, gen_helper_neon_unarrow_sat8)
   1596 
   1597 DO_2SN_64(VQRSHRUN_64, gen_helper_neon_rshl_s64, gen_helper_neon_unarrow_sat32)
   1598 DO_2SN_32(VQRSHRUN_32, gen_helper_neon_rshl_s32, gen_helper_neon_unarrow_sat16)
   1599 DO_2SN_32(VQRSHRUN_16, gen_helper_neon_rshl_s16, gen_helper_neon_unarrow_sat8)
   1600 DO_2SN_64(VQSHRN_S64, gen_sshl_i64, gen_helper_neon_narrow_sat_s32)
   1601 DO_2SN_32(VQSHRN_S32, gen_sshl_i32, gen_helper_neon_narrow_sat_s16)
   1602 DO_2SN_32(VQSHRN_S16, gen_helper_neon_shl_s16, gen_helper_neon_narrow_sat_s8)
   1603 
   1604 DO_2SN_64(VQRSHRN_S64, gen_helper_neon_rshl_s64, gen_helper_neon_narrow_sat_s32)
   1605 DO_2SN_32(VQRSHRN_S32, gen_helper_neon_rshl_s32, gen_helper_neon_narrow_sat_s16)
   1606 DO_2SN_32(VQRSHRN_S16, gen_helper_neon_rshl_s16, gen_helper_neon_narrow_sat_s8)
   1607 
   1608 DO_2SN_64(VQSHRN_U64, gen_ushl_i64, gen_helper_neon_narrow_sat_u32)
   1609 DO_2SN_32(VQSHRN_U32, gen_ushl_i32, gen_helper_neon_narrow_sat_u16)
   1610 DO_2SN_32(VQSHRN_U16, gen_helper_neon_shl_u16, gen_helper_neon_narrow_sat_u8)
   1611 
   1612 DO_2SN_64(VQRSHRN_U64, gen_helper_neon_rshl_u64, gen_helper_neon_narrow_sat_u32)
   1613 DO_2SN_32(VQRSHRN_U32, gen_helper_neon_rshl_u32, gen_helper_neon_narrow_sat_u16)
   1614 DO_2SN_32(VQRSHRN_U16, gen_helper_neon_rshl_u16, gen_helper_neon_narrow_sat_u8)
   1615 
   1616 static bool do_vshll_2sh(DisasContext *s, arg_2reg_shift *a,
   1617                          NeonGenWidenFn *widenfn, bool u)
   1618 {
   1619     TCGv_i64 tmp;
   1620     TCGv_i32 rm0, rm1;
   1621     uint64_t widen_mask = 0;
   1622 
   1623     if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
   1624         return false;
   1625     }
   1626 
   1627     /* UNDEF accesses to D16-D31 if they don't exist. */
   1628     if (!dc_isar_feature(aa32_simd_r32, s) &&
   1629         ((a->vd | a->vm) & 0x10)) {
   1630         return false;
   1631     }
   1632 
   1633     if (a->vd & 1) {
   1634         return false;
   1635     }
   1636 
   1637     if (!vfp_access_check(s)) {
   1638         return true;
   1639     }
   1640 
   1641     /*
   1642      * This is a widen-and-shift operation. The shift is always less
   1643      * than the width of the source type, so after widening the input
   1644      * vector we can simply shift the whole 64-bit widened register,
   1645      * and then clear the potential overflow bits resulting from left
   1646      * bits of the narrow input appearing as right bits of the left
   1647      * neighbour narrow input. Calculate a mask of bits to clear.
   1648      */
   1649     if ((a->shift != 0) && (a->size < 2 || u)) {
   1650         int esize = 8 << a->size;
   1651         widen_mask = MAKE_64BIT_MASK(0, esize);
   1652         widen_mask >>= esize - a->shift;
   1653         widen_mask = dup_const(a->size + 1, widen_mask);
   1654     }
   1655 
   1656     rm0 = tcg_temp_new_i32();
   1657     rm1 = tcg_temp_new_i32();
   1658     read_neon_element32(rm0, a->vm, 0, MO_32);
   1659     read_neon_element32(rm1, a->vm, 1, MO_32);
   1660     tmp = tcg_temp_new_i64();
   1661 
   1662     widenfn(tmp, rm0);
   1663     tcg_temp_free_i32(rm0);
   1664     if (a->shift != 0) {
   1665         tcg_gen_shli_i64(tmp, tmp, a->shift);
   1666         tcg_gen_andi_i64(tmp, tmp, ~widen_mask);
   1667     }
   1668     write_neon_element64(tmp, a->vd, 0, MO_64);
   1669 
   1670     widenfn(tmp, rm1);
   1671     tcg_temp_free_i32(rm1);
   1672     if (a->shift != 0) {
   1673         tcg_gen_shli_i64(tmp, tmp, a->shift);
   1674         tcg_gen_andi_i64(tmp, tmp, ~widen_mask);
   1675     }
   1676     write_neon_element64(tmp, a->vd, 1, MO_64);
   1677     tcg_temp_free_i64(tmp);
   1678     return true;
   1679 }
   1680 
   1681 static bool trans_VSHLL_S_2sh(DisasContext *s, arg_2reg_shift *a)
   1682 {
   1683     static NeonGenWidenFn * const widenfn[] = {
   1684         gen_helper_neon_widen_s8,
   1685         gen_helper_neon_widen_s16,
   1686         tcg_gen_ext_i32_i64,
   1687     };
   1688     return do_vshll_2sh(s, a, widenfn[a->size], false);
   1689 }
   1690 
   1691 static bool trans_VSHLL_U_2sh(DisasContext *s, arg_2reg_shift *a)
   1692 {
   1693     static NeonGenWidenFn * const widenfn[] = {
   1694         gen_helper_neon_widen_u8,
   1695         gen_helper_neon_widen_u16,
   1696         tcg_gen_extu_i32_i64,
   1697     };
   1698     return do_vshll_2sh(s, a, widenfn[a->size], true);
   1699 }
   1700 
   1701 static bool do_fp_2sh(DisasContext *s, arg_2reg_shift *a,
   1702                       gen_helper_gvec_2_ptr *fn)
   1703 {
   1704     /* FP operations in 2-reg-and-shift group */
   1705     int vec_size = a->q ? 16 : 8;
   1706     int rd_ofs = neon_full_reg_offset(a->vd);
   1707     int rm_ofs = neon_full_reg_offset(a->vm);
   1708     TCGv_ptr fpst;
   1709 
   1710     if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
   1711         return false;
   1712     }
   1713 
   1714     if (a->size == MO_16) {
   1715         if (!dc_isar_feature(aa32_fp16_arith, s)) {
   1716             return false;
   1717         }
   1718     }
   1719 
   1720     /* UNDEF accesses to D16-D31 if they don't exist. */
   1721     if (!dc_isar_feature(aa32_simd_r32, s) &&
   1722         ((a->vd | a->vm) & 0x10)) {
   1723         return false;
   1724     }
   1725 
   1726     if ((a->vm | a->vd) & a->q) {
   1727         return false;
   1728     }
   1729 
   1730     if (!vfp_access_check(s)) {
   1731         return true;
   1732     }
   1733 
   1734     fpst = fpstatus_ptr(a->size == MO_16 ? FPST_STD_F16 : FPST_STD);
   1735     tcg_gen_gvec_2_ptr(rd_ofs, rm_ofs, fpst, vec_size, vec_size, a->shift, fn);
   1736     tcg_temp_free_ptr(fpst);
   1737     return true;
   1738 }
   1739 
   1740 #define DO_FP_2SH(INSN, FUNC)                                           \
   1741     static bool trans_##INSN##_2sh(DisasContext *s, arg_2reg_shift *a)  \
   1742     {                                                                   \
   1743         return do_fp_2sh(s, a, FUNC);                                   \
   1744     }
   1745 
   1746 DO_FP_2SH(VCVT_SF, gen_helper_gvec_vcvt_sf)
   1747 DO_FP_2SH(VCVT_UF, gen_helper_gvec_vcvt_uf)
   1748 DO_FP_2SH(VCVT_FS, gen_helper_gvec_vcvt_fs)
   1749 DO_FP_2SH(VCVT_FU, gen_helper_gvec_vcvt_fu)
   1750 
   1751 DO_FP_2SH(VCVT_SH, gen_helper_gvec_vcvt_sh)
   1752 DO_FP_2SH(VCVT_UH, gen_helper_gvec_vcvt_uh)
   1753 DO_FP_2SH(VCVT_HS, gen_helper_gvec_vcvt_hs)
   1754 DO_FP_2SH(VCVT_HU, gen_helper_gvec_vcvt_hu)
   1755 
   1756 static bool do_1reg_imm(DisasContext *s, arg_1reg_imm *a,
   1757                         GVecGen2iFn *fn)
   1758 {
   1759     uint64_t imm;
   1760     int reg_ofs, vec_size;
   1761 
   1762     if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
   1763         return false;
   1764     }
   1765 
   1766     /* UNDEF accesses to D16-D31 if they don't exist. */
   1767     if (!dc_isar_feature(aa32_simd_r32, s) && (a->vd & 0x10)) {
   1768         return false;
   1769     }
   1770 
   1771     if (a->vd & a->q) {
   1772         return false;
   1773     }
   1774 
   1775     if (!vfp_access_check(s)) {
   1776         return true;
   1777     }
   1778 
   1779     reg_ofs = neon_full_reg_offset(a->vd);
   1780     vec_size = a->q ? 16 : 8;
   1781     imm = asimd_imm_const(a->imm, a->cmode, a->op);
   1782 
   1783     fn(MO_64, reg_ofs, reg_ofs, imm, vec_size, vec_size);
   1784     return true;
   1785 }
   1786 
   1787 static void gen_VMOV_1r(unsigned vece, uint32_t dofs, uint32_t aofs,
   1788                         int64_t c, uint32_t oprsz, uint32_t maxsz)
   1789 {
   1790     tcg_gen_gvec_dup_imm(MO_64, dofs, oprsz, maxsz, c);
   1791 }
   1792 
   1793 static bool trans_Vimm_1r(DisasContext *s, arg_1reg_imm *a)
   1794 {
   1795     /* Handle decode of cmode/op here between VORR/VBIC/VMOV */
   1796     GVecGen2iFn *fn;
   1797 
   1798     if ((a->cmode & 1) && a->cmode < 12) {
   1799         /* for op=1, the imm will be inverted, so BIC becomes AND. */
   1800         fn = a->op ? tcg_gen_gvec_andi : tcg_gen_gvec_ori;
   1801     } else {
   1802         /* There is one unallocated cmode/op combination in this space */
   1803         if (a->cmode == 15 && a->op == 1) {
   1804             return false;
   1805         }
   1806         fn = gen_VMOV_1r;
   1807     }
   1808     return do_1reg_imm(s, a, fn);
   1809 }
   1810 
   1811 static bool do_prewiden_3d(DisasContext *s, arg_3diff *a,
   1812                            NeonGenWidenFn *widenfn,
   1813                            NeonGenTwo64OpFn *opfn,
   1814                            int src1_mop, int src2_mop)
   1815 {
   1816     /* 3-regs different lengths, prewidening case (VADDL/VSUBL/VAADW/VSUBW) */
   1817     TCGv_i64 rn0_64, rn1_64, rm_64;
   1818 
   1819     if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
   1820         return false;
   1821     }
   1822 
   1823     /* UNDEF accesses to D16-D31 if they don't exist. */
   1824     if (!dc_isar_feature(aa32_simd_r32, s) &&
   1825         ((a->vd | a->vn | a->vm) & 0x10)) {
   1826         return false;
   1827     }
   1828 
   1829     if (!opfn) {
   1830         /* size == 3 case, which is an entirely different insn group */
   1831         return false;
   1832     }
   1833 
   1834     if ((a->vd & 1) || (src1_mop == MO_UQ && (a->vn & 1))) {
   1835         return false;
   1836     }
   1837 
   1838     if (!vfp_access_check(s)) {
   1839         return true;
   1840     }
   1841 
   1842     rn0_64 = tcg_temp_new_i64();
   1843     rn1_64 = tcg_temp_new_i64();
   1844     rm_64 = tcg_temp_new_i64();
   1845 
   1846     if (src1_mop >= 0) {
   1847         read_neon_element64(rn0_64, a->vn, 0, src1_mop);
   1848     } else {
   1849         TCGv_i32 tmp = tcg_temp_new_i32();
   1850         read_neon_element32(tmp, a->vn, 0, MO_32);
   1851         widenfn(rn0_64, tmp);
   1852         tcg_temp_free_i32(tmp);
   1853     }
   1854     if (src2_mop >= 0) {
   1855         read_neon_element64(rm_64, a->vm, 0, src2_mop);
   1856     } else {
   1857         TCGv_i32 tmp = tcg_temp_new_i32();
   1858         read_neon_element32(tmp, a->vm, 0, MO_32);
   1859         widenfn(rm_64, tmp);
   1860         tcg_temp_free_i32(tmp);
   1861     }
   1862 
   1863     opfn(rn0_64, rn0_64, rm_64);
   1864 
   1865     /*
   1866      * Load second pass inputs before storing the first pass result, to
   1867      * avoid incorrect results if a narrow input overlaps with the result.
   1868      */
   1869     if (src1_mop >= 0) {
   1870         read_neon_element64(rn1_64, a->vn, 1, src1_mop);
   1871     } else {
   1872         TCGv_i32 tmp = tcg_temp_new_i32();
   1873         read_neon_element32(tmp, a->vn, 1, MO_32);
   1874         widenfn(rn1_64, tmp);
   1875         tcg_temp_free_i32(tmp);
   1876     }
   1877     if (src2_mop >= 0) {
   1878         read_neon_element64(rm_64, a->vm, 1, src2_mop);
   1879     } else {
   1880         TCGv_i32 tmp = tcg_temp_new_i32();
   1881         read_neon_element32(tmp, a->vm, 1, MO_32);
   1882         widenfn(rm_64, tmp);
   1883         tcg_temp_free_i32(tmp);
   1884     }
   1885 
   1886     write_neon_element64(rn0_64, a->vd, 0, MO_64);
   1887 
   1888     opfn(rn1_64, rn1_64, rm_64);
   1889     write_neon_element64(rn1_64, a->vd, 1, MO_64);
   1890 
   1891     tcg_temp_free_i64(rn0_64);
   1892     tcg_temp_free_i64(rn1_64);
   1893     tcg_temp_free_i64(rm_64);
   1894 
   1895     return true;
   1896 }
   1897 
   1898 #define DO_PREWIDEN(INSN, S, OP, SRC1WIDE, SIGN)                        \
   1899     static bool trans_##INSN##_3d(DisasContext *s, arg_3diff *a)        \
   1900     {                                                                   \
   1901         static NeonGenWidenFn * const widenfn[] = {                     \
   1902             gen_helper_neon_widen_##S##8,                               \
   1903             gen_helper_neon_widen_##S##16,                              \
   1904             NULL, NULL,                                                 \
   1905         };                                                              \
   1906         static NeonGenTwo64OpFn * const addfn[] = {                     \
   1907             gen_helper_neon_##OP##l_u16,                                \
   1908             gen_helper_neon_##OP##l_u32,                                \
   1909             tcg_gen_##OP##_i64,                                         \
   1910             NULL,                                                       \
   1911         };                                                              \
   1912         int narrow_mop = a->size == MO_32 ? MO_32 | SIGN : -1;          \
   1913         return do_prewiden_3d(s, a, widenfn[a->size], addfn[a->size],   \
   1914                               SRC1WIDE ? MO_UQ : narrow_mop,             \
   1915                               narrow_mop);                              \
   1916     }
   1917 
   1918 DO_PREWIDEN(VADDL_S, s, add, false, MO_SIGN)
   1919 DO_PREWIDEN(VADDL_U, u, add, false, 0)
   1920 DO_PREWIDEN(VSUBL_S, s, sub, false, MO_SIGN)
   1921 DO_PREWIDEN(VSUBL_U, u, sub, false, 0)
   1922 DO_PREWIDEN(VADDW_S, s, add, true, MO_SIGN)
   1923 DO_PREWIDEN(VADDW_U, u, add, true, 0)
   1924 DO_PREWIDEN(VSUBW_S, s, sub, true, MO_SIGN)
   1925 DO_PREWIDEN(VSUBW_U, u, sub, true, 0)
   1926 
   1927 static bool do_narrow_3d(DisasContext *s, arg_3diff *a,
   1928                          NeonGenTwo64OpFn *opfn, NeonGenNarrowFn *narrowfn)
   1929 {
   1930     /* 3-regs different lengths, narrowing (VADDHN/VSUBHN/VRADDHN/VRSUBHN) */
   1931     TCGv_i64 rn_64, rm_64;
   1932     TCGv_i32 rd0, rd1;
   1933 
   1934     if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
   1935         return false;
   1936     }
   1937 
   1938     /* UNDEF accesses to D16-D31 if they don't exist. */
   1939     if (!dc_isar_feature(aa32_simd_r32, s) &&
   1940         ((a->vd | a->vn | a->vm) & 0x10)) {
   1941         return false;
   1942     }
   1943 
   1944     if (!opfn || !narrowfn) {
   1945         /* size == 3 case, which is an entirely different insn group */
   1946         return false;
   1947     }
   1948 
   1949     if ((a->vn | a->vm) & 1) {
   1950         return false;
   1951     }
   1952 
   1953     if (!vfp_access_check(s)) {
   1954         return true;
   1955     }
   1956 
   1957     rn_64 = tcg_temp_new_i64();
   1958     rm_64 = tcg_temp_new_i64();
   1959     rd0 = tcg_temp_new_i32();
   1960     rd1 = tcg_temp_new_i32();
   1961 
   1962     read_neon_element64(rn_64, a->vn, 0, MO_64);
   1963     read_neon_element64(rm_64, a->vm, 0, MO_64);
   1964 
   1965     opfn(rn_64, rn_64, rm_64);
   1966 
   1967     narrowfn(rd0, rn_64);
   1968 
   1969     read_neon_element64(rn_64, a->vn, 1, MO_64);
   1970     read_neon_element64(rm_64, a->vm, 1, MO_64);
   1971 
   1972     opfn(rn_64, rn_64, rm_64);
   1973 
   1974     narrowfn(rd1, rn_64);
   1975 
   1976     write_neon_element32(rd0, a->vd, 0, MO_32);
   1977     write_neon_element32(rd1, a->vd, 1, MO_32);
   1978 
   1979     tcg_temp_free_i32(rd0);
   1980     tcg_temp_free_i32(rd1);
   1981     tcg_temp_free_i64(rn_64);
   1982     tcg_temp_free_i64(rm_64);
   1983 
   1984     return true;
   1985 }
   1986 
   1987 #define DO_NARROW_3D(INSN, OP, NARROWTYPE, EXTOP)                       \
   1988     static bool trans_##INSN##_3d(DisasContext *s, arg_3diff *a)        \
   1989     {                                                                   \
   1990         static NeonGenTwo64OpFn * const addfn[] = {                     \
   1991             gen_helper_neon_##OP##l_u16,                                \
   1992             gen_helper_neon_##OP##l_u32,                                \
   1993             tcg_gen_##OP##_i64,                                         \
   1994             NULL,                                                       \
   1995         };                                                              \
   1996         static NeonGenNarrowFn * const narrowfn[] = {                   \
   1997             gen_helper_neon_##NARROWTYPE##_high_u8,                     \
   1998             gen_helper_neon_##NARROWTYPE##_high_u16,                    \
   1999             EXTOP,                                                      \
   2000             NULL,                                                       \
   2001         };                                                              \
   2002         return do_narrow_3d(s, a, addfn[a->size], narrowfn[a->size]);   \
   2003     }
   2004 
   2005 static void gen_narrow_round_high_u32(TCGv_i32 rd, TCGv_i64 rn)
   2006 {
   2007     tcg_gen_addi_i64(rn, rn, 1u << 31);
   2008     tcg_gen_extrh_i64_i32(rd, rn);
   2009 }
   2010 
   2011 DO_NARROW_3D(VADDHN, add, narrow, tcg_gen_extrh_i64_i32)
   2012 DO_NARROW_3D(VSUBHN, sub, narrow, tcg_gen_extrh_i64_i32)
   2013 DO_NARROW_3D(VRADDHN, add, narrow_round, gen_narrow_round_high_u32)
   2014 DO_NARROW_3D(VRSUBHN, sub, narrow_round, gen_narrow_round_high_u32)
   2015 
   2016 static bool do_long_3d(DisasContext *s, arg_3diff *a,
   2017                        NeonGenTwoOpWidenFn *opfn,
   2018                        NeonGenTwo64OpFn *accfn)
   2019 {
   2020     /*
   2021      * 3-regs different lengths, long operations.
   2022      * These perform an operation on two inputs that returns a double-width
   2023      * result, and then possibly perform an accumulation operation of
   2024      * that result into the double-width destination.
   2025      */
   2026     TCGv_i64 rd0, rd1, tmp;
   2027     TCGv_i32 rn, rm;
   2028 
   2029     if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
   2030         return false;
   2031     }
   2032 
   2033     /* UNDEF accesses to D16-D31 if they don't exist. */
   2034     if (!dc_isar_feature(aa32_simd_r32, s) &&
   2035         ((a->vd | a->vn | a->vm) & 0x10)) {
   2036         return false;
   2037     }
   2038 
   2039     if (!opfn) {
   2040         /* size == 3 case, which is an entirely different insn group */
   2041         return false;
   2042     }
   2043 
   2044     if (a->vd & 1) {
   2045         return false;
   2046     }
   2047 
   2048     if (!vfp_access_check(s)) {
   2049         return true;
   2050     }
   2051 
   2052     rd0 = tcg_temp_new_i64();
   2053     rd1 = tcg_temp_new_i64();
   2054 
   2055     rn = tcg_temp_new_i32();
   2056     rm = tcg_temp_new_i32();
   2057     read_neon_element32(rn, a->vn, 0, MO_32);
   2058     read_neon_element32(rm, a->vm, 0, MO_32);
   2059     opfn(rd0, rn, rm);
   2060 
   2061     read_neon_element32(rn, a->vn, 1, MO_32);
   2062     read_neon_element32(rm, a->vm, 1, MO_32);
   2063     opfn(rd1, rn, rm);
   2064     tcg_temp_free_i32(rn);
   2065     tcg_temp_free_i32(rm);
   2066 
   2067     /* Don't store results until after all loads: they might overlap */
   2068     if (accfn) {
   2069         tmp = tcg_temp_new_i64();
   2070         read_neon_element64(tmp, a->vd, 0, MO_64);
   2071         accfn(rd0, tmp, rd0);
   2072         read_neon_element64(tmp, a->vd, 1, MO_64);
   2073         accfn(rd1, tmp, rd1);
   2074         tcg_temp_free_i64(tmp);
   2075     }
   2076 
   2077     write_neon_element64(rd0, a->vd, 0, MO_64);
   2078     write_neon_element64(rd1, a->vd, 1, MO_64);
   2079     tcg_temp_free_i64(rd0);
   2080     tcg_temp_free_i64(rd1);
   2081 
   2082     return true;
   2083 }
   2084 
   2085 static bool trans_VABDL_S_3d(DisasContext *s, arg_3diff *a)
   2086 {
   2087     static NeonGenTwoOpWidenFn * const opfn[] = {
   2088         gen_helper_neon_abdl_s16,
   2089         gen_helper_neon_abdl_s32,
   2090         gen_helper_neon_abdl_s64,
   2091         NULL,
   2092     };
   2093 
   2094     return do_long_3d(s, a, opfn[a->size], NULL);
   2095 }
   2096 
   2097 static bool trans_VABDL_U_3d(DisasContext *s, arg_3diff *a)
   2098 {
   2099     static NeonGenTwoOpWidenFn * const opfn[] = {
   2100         gen_helper_neon_abdl_u16,
   2101         gen_helper_neon_abdl_u32,
   2102         gen_helper_neon_abdl_u64,
   2103         NULL,
   2104     };
   2105 
   2106     return do_long_3d(s, a, opfn[a->size], NULL);
   2107 }
   2108 
   2109 static bool trans_VABAL_S_3d(DisasContext *s, arg_3diff *a)
   2110 {
   2111     static NeonGenTwoOpWidenFn * const opfn[] = {
   2112         gen_helper_neon_abdl_s16,
   2113         gen_helper_neon_abdl_s32,
   2114         gen_helper_neon_abdl_s64,
   2115         NULL,
   2116     };
   2117     static NeonGenTwo64OpFn * const addfn[] = {
   2118         gen_helper_neon_addl_u16,
   2119         gen_helper_neon_addl_u32,
   2120         tcg_gen_add_i64,
   2121         NULL,
   2122     };
   2123 
   2124     return do_long_3d(s, a, opfn[a->size], addfn[a->size]);
   2125 }
   2126 
   2127 static bool trans_VABAL_U_3d(DisasContext *s, arg_3diff *a)
   2128 {
   2129     static NeonGenTwoOpWidenFn * const opfn[] = {
   2130         gen_helper_neon_abdl_u16,
   2131         gen_helper_neon_abdl_u32,
   2132         gen_helper_neon_abdl_u64,
   2133         NULL,
   2134     };
   2135     static NeonGenTwo64OpFn * const addfn[] = {
   2136         gen_helper_neon_addl_u16,
   2137         gen_helper_neon_addl_u32,
   2138         tcg_gen_add_i64,
   2139         NULL,
   2140     };
   2141 
   2142     return do_long_3d(s, a, opfn[a->size], addfn[a->size]);
   2143 }
   2144 
   2145 static void gen_mull_s32(TCGv_i64 rd, TCGv_i32 rn, TCGv_i32 rm)
   2146 {
   2147     TCGv_i32 lo = tcg_temp_new_i32();
   2148     TCGv_i32 hi = tcg_temp_new_i32();
   2149 
   2150     tcg_gen_muls2_i32(lo, hi, rn, rm);
   2151     tcg_gen_concat_i32_i64(rd, lo, hi);
   2152 
   2153     tcg_temp_free_i32(lo);
   2154     tcg_temp_free_i32(hi);
   2155 }
   2156 
   2157 static void gen_mull_u32(TCGv_i64 rd, TCGv_i32 rn, TCGv_i32 rm)
   2158 {
   2159     TCGv_i32 lo = tcg_temp_new_i32();
   2160     TCGv_i32 hi = tcg_temp_new_i32();
   2161 
   2162     tcg_gen_mulu2_i32(lo, hi, rn, rm);
   2163     tcg_gen_concat_i32_i64(rd, lo, hi);
   2164 
   2165     tcg_temp_free_i32(lo);
   2166     tcg_temp_free_i32(hi);
   2167 }
   2168 
   2169 static bool trans_VMULL_S_3d(DisasContext *s, arg_3diff *a)
   2170 {
   2171     static NeonGenTwoOpWidenFn * const opfn[] = {
   2172         gen_helper_neon_mull_s8,
   2173         gen_helper_neon_mull_s16,
   2174         gen_mull_s32,
   2175         NULL,
   2176     };
   2177 
   2178     return do_long_3d(s, a, opfn[a->size], NULL);
   2179 }
   2180 
   2181 static bool trans_VMULL_U_3d(DisasContext *s, arg_3diff *a)
   2182 {
   2183     static NeonGenTwoOpWidenFn * const opfn[] = {
   2184         gen_helper_neon_mull_u8,
   2185         gen_helper_neon_mull_u16,
   2186         gen_mull_u32,
   2187         NULL,
   2188     };
   2189 
   2190     return do_long_3d(s, a, opfn[a->size], NULL);
   2191 }
   2192 
   2193 #define DO_VMLAL(INSN,MULL,ACC)                                         \
   2194     static bool trans_##INSN##_3d(DisasContext *s, arg_3diff *a)        \
   2195     {                                                                   \
   2196         static NeonGenTwoOpWidenFn * const opfn[] = {                   \
   2197             gen_helper_neon_##MULL##8,                                  \
   2198             gen_helper_neon_##MULL##16,                                 \
   2199             gen_##MULL##32,                                             \
   2200             NULL,                                                       \
   2201         };                                                              \
   2202         static NeonGenTwo64OpFn * const accfn[] = {                     \
   2203             gen_helper_neon_##ACC##l_u16,                               \
   2204             gen_helper_neon_##ACC##l_u32,                               \
   2205             tcg_gen_##ACC##_i64,                                        \
   2206             NULL,                                                       \
   2207         };                                                              \
   2208         return do_long_3d(s, a, opfn[a->size], accfn[a->size]);         \
   2209     }
   2210 
   2211 DO_VMLAL(VMLAL_S,mull_s,add)
   2212 DO_VMLAL(VMLAL_U,mull_u,add)
   2213 DO_VMLAL(VMLSL_S,mull_s,sub)
   2214 DO_VMLAL(VMLSL_U,mull_u,sub)
   2215 
   2216 static void gen_VQDMULL_16(TCGv_i64 rd, TCGv_i32 rn, TCGv_i32 rm)
   2217 {
   2218     gen_helper_neon_mull_s16(rd, rn, rm);
   2219     gen_helper_neon_addl_saturate_s32(rd, cpu_env, rd, rd);
   2220 }
   2221 
   2222 static void gen_VQDMULL_32(TCGv_i64 rd, TCGv_i32 rn, TCGv_i32 rm)
   2223 {
   2224     gen_mull_s32(rd, rn, rm);
   2225     gen_helper_neon_addl_saturate_s64(rd, cpu_env, rd, rd);
   2226 }
   2227 
   2228 static bool trans_VQDMULL_3d(DisasContext *s, arg_3diff *a)
   2229 {
   2230     static NeonGenTwoOpWidenFn * const opfn[] = {
   2231         NULL,
   2232         gen_VQDMULL_16,
   2233         gen_VQDMULL_32,
   2234         NULL,
   2235     };
   2236 
   2237     return do_long_3d(s, a, opfn[a->size], NULL);
   2238 }
   2239 
   2240 static void gen_VQDMLAL_acc_16(TCGv_i64 rd, TCGv_i64 rn, TCGv_i64 rm)
   2241 {
   2242     gen_helper_neon_addl_saturate_s32(rd, cpu_env, rn, rm);
   2243 }
   2244 
   2245 static void gen_VQDMLAL_acc_32(TCGv_i64 rd, TCGv_i64 rn, TCGv_i64 rm)
   2246 {
   2247     gen_helper_neon_addl_saturate_s64(rd, cpu_env, rn, rm);
   2248 }
   2249 
   2250 static bool trans_VQDMLAL_3d(DisasContext *s, arg_3diff *a)
   2251 {
   2252     static NeonGenTwoOpWidenFn * const opfn[] = {
   2253         NULL,
   2254         gen_VQDMULL_16,
   2255         gen_VQDMULL_32,
   2256         NULL,
   2257     };
   2258     static NeonGenTwo64OpFn * const accfn[] = {
   2259         NULL,
   2260         gen_VQDMLAL_acc_16,
   2261         gen_VQDMLAL_acc_32,
   2262         NULL,
   2263     };
   2264 
   2265     return do_long_3d(s, a, opfn[a->size], accfn[a->size]);
   2266 }
   2267 
   2268 static void gen_VQDMLSL_acc_16(TCGv_i64 rd, TCGv_i64 rn, TCGv_i64 rm)
   2269 {
   2270     gen_helper_neon_negl_u32(rm, rm);
   2271     gen_helper_neon_addl_saturate_s32(rd, cpu_env, rn, rm);
   2272 }
   2273 
   2274 static void gen_VQDMLSL_acc_32(TCGv_i64 rd, TCGv_i64 rn, TCGv_i64 rm)
   2275 {
   2276     tcg_gen_neg_i64(rm, rm);
   2277     gen_helper_neon_addl_saturate_s64(rd, cpu_env, rn, rm);
   2278 }
   2279 
   2280 static bool trans_VQDMLSL_3d(DisasContext *s, arg_3diff *a)
   2281 {
   2282     static NeonGenTwoOpWidenFn * const opfn[] = {
   2283         NULL,
   2284         gen_VQDMULL_16,
   2285         gen_VQDMULL_32,
   2286         NULL,
   2287     };
   2288     static NeonGenTwo64OpFn * const accfn[] = {
   2289         NULL,
   2290         gen_VQDMLSL_acc_16,
   2291         gen_VQDMLSL_acc_32,
   2292         NULL,
   2293     };
   2294 
   2295     return do_long_3d(s, a, opfn[a->size], accfn[a->size]);
   2296 }
   2297 
   2298 static bool trans_VMULL_P_3d(DisasContext *s, arg_3diff *a)
   2299 {
   2300     gen_helper_gvec_3 *fn_gvec;
   2301 
   2302     if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
   2303         return false;
   2304     }
   2305 
   2306     /* UNDEF accesses to D16-D31 if they don't exist. */
   2307     if (!dc_isar_feature(aa32_simd_r32, s) &&
   2308         ((a->vd | a->vn | a->vm) & 0x10)) {
   2309         return false;
   2310     }
   2311 
   2312     if (a->vd & 1) {
   2313         return false;
   2314     }
   2315 
   2316     switch (a->size) {
   2317     case 0:
   2318         fn_gvec = gen_helper_neon_pmull_h;
   2319         break;
   2320     case 2:
   2321         if (!dc_isar_feature(aa32_pmull, s)) {
   2322             return false;
   2323         }
   2324         fn_gvec = gen_helper_gvec_pmull_q;
   2325         break;
   2326     default:
   2327         return false;
   2328     }
   2329 
   2330     if (!vfp_access_check(s)) {
   2331         return true;
   2332     }
   2333 
   2334     tcg_gen_gvec_3_ool(neon_full_reg_offset(a->vd),
   2335                        neon_full_reg_offset(a->vn),
   2336                        neon_full_reg_offset(a->vm),
   2337                        16, 16, 0, fn_gvec);
   2338     return true;
   2339 }
   2340 
   2341 static void gen_neon_dup_low16(TCGv_i32 var)
   2342 {
   2343     TCGv_i32 tmp = tcg_temp_new_i32();
   2344     tcg_gen_ext16u_i32(var, var);
   2345     tcg_gen_shli_i32(tmp, var, 16);
   2346     tcg_gen_or_i32(var, var, tmp);
   2347     tcg_temp_free_i32(tmp);
   2348 }
   2349 
   2350 static void gen_neon_dup_high16(TCGv_i32 var)
   2351 {
   2352     TCGv_i32 tmp = tcg_temp_new_i32();
   2353     tcg_gen_andi_i32(var, var, 0xffff0000);
   2354     tcg_gen_shri_i32(tmp, var, 16);
   2355     tcg_gen_or_i32(var, var, tmp);
   2356     tcg_temp_free_i32(tmp);
   2357 }
   2358 
   2359 static inline TCGv_i32 neon_get_scalar(int size, int reg)
   2360 {
   2361     TCGv_i32 tmp = tcg_temp_new_i32();
   2362     if (size == MO_16) {
   2363         read_neon_element32(tmp, reg & 7, reg >> 4, MO_32);
   2364         if (reg & 8) {
   2365             gen_neon_dup_high16(tmp);
   2366         } else {
   2367             gen_neon_dup_low16(tmp);
   2368         }
   2369     } else {
   2370         read_neon_element32(tmp, reg & 15, reg >> 4, MO_32);
   2371     }
   2372     return tmp;
   2373 }
   2374 
   2375 static bool do_2scalar(DisasContext *s, arg_2scalar *a,
   2376                        NeonGenTwoOpFn *opfn, NeonGenTwoOpFn *accfn)
   2377 {
   2378     /*
   2379      * Two registers and a scalar: perform an operation between
   2380      * the input elements and the scalar, and then possibly
   2381      * perform an accumulation operation of that result into the
   2382      * destination.
   2383      */
   2384     TCGv_i32 scalar, tmp;
   2385     int pass;
   2386 
   2387     if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
   2388         return false;
   2389     }
   2390 
   2391     /* UNDEF accesses to D16-D31 if they don't exist. */
   2392     if (!dc_isar_feature(aa32_simd_r32, s) &&
   2393         ((a->vd | a->vn | a->vm) & 0x10)) {
   2394         return false;
   2395     }
   2396 
   2397     if (!opfn) {
   2398         /* Bad size (including size == 3, which is a different insn group) */
   2399         return false;
   2400     }
   2401 
   2402     if (a->q && ((a->vd | a->vn) & 1)) {
   2403         return false;
   2404     }
   2405 
   2406     if (!vfp_access_check(s)) {
   2407         return true;
   2408     }
   2409 
   2410     scalar = neon_get_scalar(a->size, a->vm);
   2411     tmp = tcg_temp_new_i32();
   2412 
   2413     for (pass = 0; pass < (a->q ? 4 : 2); pass++) {
   2414         read_neon_element32(tmp, a->vn, pass, MO_32);
   2415         opfn(tmp, tmp, scalar);
   2416         if (accfn) {
   2417             TCGv_i32 rd = tcg_temp_new_i32();
   2418             read_neon_element32(rd, a->vd, pass, MO_32);
   2419             accfn(tmp, rd, tmp);
   2420             tcg_temp_free_i32(rd);
   2421         }
   2422         write_neon_element32(tmp, a->vd, pass, MO_32);
   2423     }
   2424     tcg_temp_free_i32(tmp);
   2425     tcg_temp_free_i32(scalar);
   2426     return true;
   2427 }
   2428 
   2429 static bool trans_VMUL_2sc(DisasContext *s, arg_2scalar *a)
   2430 {
   2431     static NeonGenTwoOpFn * const opfn[] = {
   2432         NULL,
   2433         gen_helper_neon_mul_u16,
   2434         tcg_gen_mul_i32,
   2435         NULL,
   2436     };
   2437 
   2438     return do_2scalar(s, a, opfn[a->size], NULL);
   2439 }
   2440 
   2441 static bool trans_VMLA_2sc(DisasContext *s, arg_2scalar *a)
   2442 {
   2443     static NeonGenTwoOpFn * const opfn[] = {
   2444         NULL,
   2445         gen_helper_neon_mul_u16,
   2446         tcg_gen_mul_i32,
   2447         NULL,
   2448     };
   2449     static NeonGenTwoOpFn * const accfn[] = {
   2450         NULL,
   2451         gen_helper_neon_add_u16,
   2452         tcg_gen_add_i32,
   2453         NULL,
   2454     };
   2455 
   2456     return do_2scalar(s, a, opfn[a->size], accfn[a->size]);
   2457 }
   2458 
   2459 static bool trans_VMLS_2sc(DisasContext *s, arg_2scalar *a)
   2460 {
   2461     static NeonGenTwoOpFn * const opfn[] = {
   2462         NULL,
   2463         gen_helper_neon_mul_u16,
   2464         tcg_gen_mul_i32,
   2465         NULL,
   2466     };
   2467     static NeonGenTwoOpFn * const accfn[] = {
   2468         NULL,
   2469         gen_helper_neon_sub_u16,
   2470         tcg_gen_sub_i32,
   2471         NULL,
   2472     };
   2473 
   2474     return do_2scalar(s, a, opfn[a->size], accfn[a->size]);
   2475 }
   2476 
   2477 static bool do_2scalar_fp_vec(DisasContext *s, arg_2scalar *a,
   2478                               gen_helper_gvec_3_ptr *fn)
   2479 {
   2480     /* Two registers and a scalar, using gvec */
   2481     int vec_size = a->q ? 16 : 8;
   2482     int rd_ofs = neon_full_reg_offset(a->vd);
   2483     int rn_ofs = neon_full_reg_offset(a->vn);
   2484     int rm_ofs;
   2485     int idx;
   2486     TCGv_ptr fpstatus;
   2487 
   2488     if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
   2489         return false;
   2490     }
   2491 
   2492     /* UNDEF accesses to D16-D31 if they don't exist. */
   2493     if (!dc_isar_feature(aa32_simd_r32, s) &&
   2494         ((a->vd | a->vn | a->vm) & 0x10)) {
   2495         return false;
   2496     }
   2497 
   2498     if (!fn) {
   2499         /* Bad size (including size == 3, which is a different insn group) */
   2500         return false;
   2501     }
   2502 
   2503     if (a->q && ((a->vd | a->vn) & 1)) {
   2504         return false;
   2505     }
   2506 
   2507     if (!vfp_access_check(s)) {
   2508         return true;
   2509     }
   2510 
   2511     /* a->vm is M:Vm, which encodes both register and index */
   2512     idx = extract32(a->vm, a->size + 2, 2);
   2513     a->vm = extract32(a->vm, 0, a->size + 2);
   2514     rm_ofs = neon_full_reg_offset(a->vm);
   2515 
   2516     fpstatus = fpstatus_ptr(a->size == 1 ? FPST_STD_F16 : FPST_STD);
   2517     tcg_gen_gvec_3_ptr(rd_ofs, rn_ofs, rm_ofs, fpstatus,
   2518                        vec_size, vec_size, idx, fn);
   2519     tcg_temp_free_ptr(fpstatus);
   2520     return true;
   2521 }
   2522 
   2523 #define DO_VMUL_F_2sc(NAME, FUNC)                                       \
   2524     static bool trans_##NAME##_F_2sc(DisasContext *s, arg_2scalar *a)   \
   2525     {                                                                   \
   2526         static gen_helper_gvec_3_ptr * const opfn[] = {                 \
   2527             NULL,                                                       \
   2528             gen_helper_##FUNC##_h,                                      \
   2529             gen_helper_##FUNC##_s,                                      \
   2530             NULL,                                                       \
   2531         };                                                              \
   2532         if (a->size == MO_16 && !dc_isar_feature(aa32_fp16_arith, s)) { \
   2533             return false;                                               \
   2534         }                                                               \
   2535         return do_2scalar_fp_vec(s, a, opfn[a->size]);                  \
   2536     }
   2537 
   2538 DO_VMUL_F_2sc(VMUL, gvec_fmul_idx)
   2539 DO_VMUL_F_2sc(VMLA, gvec_fmla_nf_idx)
   2540 DO_VMUL_F_2sc(VMLS, gvec_fmls_nf_idx)
   2541 
   2542 WRAP_ENV_FN(gen_VQDMULH_16, gen_helper_neon_qdmulh_s16)
   2543 WRAP_ENV_FN(gen_VQDMULH_32, gen_helper_neon_qdmulh_s32)
   2544 WRAP_ENV_FN(gen_VQRDMULH_16, gen_helper_neon_qrdmulh_s16)
   2545 WRAP_ENV_FN(gen_VQRDMULH_32, gen_helper_neon_qrdmulh_s32)
   2546 
   2547 static bool trans_VQDMULH_2sc(DisasContext *s, arg_2scalar *a)
   2548 {
   2549     static NeonGenTwoOpFn * const opfn[] = {
   2550         NULL,
   2551         gen_VQDMULH_16,
   2552         gen_VQDMULH_32,
   2553         NULL,
   2554     };
   2555 
   2556     return do_2scalar(s, a, opfn[a->size], NULL);
   2557 }
   2558 
   2559 static bool trans_VQRDMULH_2sc(DisasContext *s, arg_2scalar *a)
   2560 {
   2561     static NeonGenTwoOpFn * const opfn[] = {
   2562         NULL,
   2563         gen_VQRDMULH_16,
   2564         gen_VQRDMULH_32,
   2565         NULL,
   2566     };
   2567 
   2568     return do_2scalar(s, a, opfn[a->size], NULL);
   2569 }
   2570 
   2571 static bool do_vqrdmlah_2sc(DisasContext *s, arg_2scalar *a,
   2572                             NeonGenThreeOpEnvFn *opfn)
   2573 {
   2574     /*
   2575      * VQRDMLAH/VQRDMLSH: this is like do_2scalar, but the opfn
   2576      * performs a kind of fused op-then-accumulate using a helper
   2577      * function that takes all of rd, rn and the scalar at once.
   2578      */
   2579     TCGv_i32 scalar, rn, rd;
   2580     int pass;
   2581 
   2582     if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
   2583         return false;
   2584     }
   2585 
   2586     if (!dc_isar_feature(aa32_rdm, s)) {
   2587         return false;
   2588     }
   2589 
   2590     /* UNDEF accesses to D16-D31 if they don't exist. */
   2591     if (!dc_isar_feature(aa32_simd_r32, s) &&
   2592         ((a->vd | a->vn | a->vm) & 0x10)) {
   2593         return false;
   2594     }
   2595 
   2596     if (!opfn) {
   2597         /* Bad size (including size == 3, which is a different insn group) */
   2598         return false;
   2599     }
   2600 
   2601     if (a->q && ((a->vd | a->vn) & 1)) {
   2602         return false;
   2603     }
   2604 
   2605     if (!vfp_access_check(s)) {
   2606         return true;
   2607     }
   2608 
   2609     scalar = neon_get_scalar(a->size, a->vm);
   2610     rn = tcg_temp_new_i32();
   2611     rd = tcg_temp_new_i32();
   2612 
   2613     for (pass = 0; pass < (a->q ? 4 : 2); pass++) {
   2614         read_neon_element32(rn, a->vn, pass, MO_32);
   2615         read_neon_element32(rd, a->vd, pass, MO_32);
   2616         opfn(rd, cpu_env, rn, scalar, rd);
   2617         write_neon_element32(rd, a->vd, pass, MO_32);
   2618     }
   2619     tcg_temp_free_i32(rn);
   2620     tcg_temp_free_i32(rd);
   2621     tcg_temp_free_i32(scalar);
   2622 
   2623     return true;
   2624 }
   2625 
   2626 static bool trans_VQRDMLAH_2sc(DisasContext *s, arg_2scalar *a)
   2627 {
   2628     static NeonGenThreeOpEnvFn *opfn[] = {
   2629         NULL,
   2630         gen_helper_neon_qrdmlah_s16,
   2631         gen_helper_neon_qrdmlah_s32,
   2632         NULL,
   2633     };
   2634     return do_vqrdmlah_2sc(s, a, opfn[a->size]);
   2635 }
   2636 
   2637 static bool trans_VQRDMLSH_2sc(DisasContext *s, arg_2scalar *a)
   2638 {
   2639     static NeonGenThreeOpEnvFn *opfn[] = {
   2640         NULL,
   2641         gen_helper_neon_qrdmlsh_s16,
   2642         gen_helper_neon_qrdmlsh_s32,
   2643         NULL,
   2644     };
   2645     return do_vqrdmlah_2sc(s, a, opfn[a->size]);
   2646 }
   2647 
   2648 static bool do_2scalar_long(DisasContext *s, arg_2scalar *a,
   2649                             NeonGenTwoOpWidenFn *opfn,
   2650                             NeonGenTwo64OpFn *accfn)
   2651 {
   2652     /*
   2653      * Two registers and a scalar, long operations: perform an
   2654      * operation on the input elements and the scalar which produces
   2655      * a double-width result, and then possibly perform an accumulation
   2656      * operation of that result into the destination.
   2657      */
   2658     TCGv_i32 scalar, rn;
   2659     TCGv_i64 rn0_64, rn1_64;
   2660 
   2661     if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
   2662         return false;
   2663     }
   2664 
   2665     /* UNDEF accesses to D16-D31 if they don't exist. */
   2666     if (!dc_isar_feature(aa32_simd_r32, s) &&
   2667         ((a->vd | a->vn | a->vm) & 0x10)) {
   2668         return false;
   2669     }
   2670 
   2671     if (!opfn) {
   2672         /* Bad size (including size == 3, which is a different insn group) */
   2673         return false;
   2674     }
   2675 
   2676     if (a->vd & 1) {
   2677         return false;
   2678     }
   2679 
   2680     if (!vfp_access_check(s)) {
   2681         return true;
   2682     }
   2683 
   2684     scalar = neon_get_scalar(a->size, a->vm);
   2685 
   2686     /* Load all inputs before writing any outputs, in case of overlap */
   2687     rn = tcg_temp_new_i32();
   2688     read_neon_element32(rn, a->vn, 0, MO_32);
   2689     rn0_64 = tcg_temp_new_i64();
   2690     opfn(rn0_64, rn, scalar);
   2691 
   2692     read_neon_element32(rn, a->vn, 1, MO_32);
   2693     rn1_64 = tcg_temp_new_i64();
   2694     opfn(rn1_64, rn, scalar);
   2695     tcg_temp_free_i32(rn);
   2696     tcg_temp_free_i32(scalar);
   2697 
   2698     if (accfn) {
   2699         TCGv_i64 t64 = tcg_temp_new_i64();
   2700         read_neon_element64(t64, a->vd, 0, MO_64);
   2701         accfn(rn0_64, t64, rn0_64);
   2702         read_neon_element64(t64, a->vd, 1, MO_64);
   2703         accfn(rn1_64, t64, rn1_64);
   2704         tcg_temp_free_i64(t64);
   2705     }
   2706 
   2707     write_neon_element64(rn0_64, a->vd, 0, MO_64);
   2708     write_neon_element64(rn1_64, a->vd, 1, MO_64);
   2709     tcg_temp_free_i64(rn0_64);
   2710     tcg_temp_free_i64(rn1_64);
   2711     return true;
   2712 }
   2713 
   2714 static bool trans_VMULL_S_2sc(DisasContext *s, arg_2scalar *a)
   2715 {
   2716     static NeonGenTwoOpWidenFn * const opfn[] = {
   2717         NULL,
   2718         gen_helper_neon_mull_s16,
   2719         gen_mull_s32,
   2720         NULL,
   2721     };
   2722 
   2723     return do_2scalar_long(s, a, opfn[a->size], NULL);
   2724 }
   2725 
   2726 static bool trans_VMULL_U_2sc(DisasContext *s, arg_2scalar *a)
   2727 {
   2728     static NeonGenTwoOpWidenFn * const opfn[] = {
   2729         NULL,
   2730         gen_helper_neon_mull_u16,
   2731         gen_mull_u32,
   2732         NULL,
   2733     };
   2734 
   2735     return do_2scalar_long(s, a, opfn[a->size], NULL);
   2736 }
   2737 
   2738 #define DO_VMLAL_2SC(INSN, MULL, ACC)                                   \
   2739     static bool trans_##INSN##_2sc(DisasContext *s, arg_2scalar *a)     \
   2740     {                                                                   \
   2741         static NeonGenTwoOpWidenFn * const opfn[] = {                   \
   2742             NULL,                                                       \
   2743             gen_helper_neon_##MULL##16,                                 \
   2744             gen_##MULL##32,                                             \
   2745             NULL,                                                       \
   2746         };                                                              \
   2747         static NeonGenTwo64OpFn * const accfn[] = {                     \
   2748             NULL,                                                       \
   2749             gen_helper_neon_##ACC##l_u32,                               \
   2750             tcg_gen_##ACC##_i64,                                        \
   2751             NULL,                                                       \
   2752         };                                                              \
   2753         return do_2scalar_long(s, a, opfn[a->size], accfn[a->size]);    \
   2754     }
   2755 
   2756 DO_VMLAL_2SC(VMLAL_S, mull_s, add)
   2757 DO_VMLAL_2SC(VMLAL_U, mull_u, add)
   2758 DO_VMLAL_2SC(VMLSL_S, mull_s, sub)
   2759 DO_VMLAL_2SC(VMLSL_U, mull_u, sub)
   2760 
   2761 static bool trans_VQDMULL_2sc(DisasContext *s, arg_2scalar *a)
   2762 {
   2763     static NeonGenTwoOpWidenFn * const opfn[] = {
   2764         NULL,
   2765         gen_VQDMULL_16,
   2766         gen_VQDMULL_32,
   2767         NULL,
   2768     };
   2769 
   2770     return do_2scalar_long(s, a, opfn[a->size], NULL);
   2771 }
   2772 
   2773 static bool trans_VQDMLAL_2sc(DisasContext *s, arg_2scalar *a)
   2774 {
   2775     static NeonGenTwoOpWidenFn * const opfn[] = {
   2776         NULL,
   2777         gen_VQDMULL_16,
   2778         gen_VQDMULL_32,
   2779         NULL,
   2780     };
   2781     static NeonGenTwo64OpFn * const accfn[] = {
   2782         NULL,
   2783         gen_VQDMLAL_acc_16,
   2784         gen_VQDMLAL_acc_32,
   2785         NULL,
   2786     };
   2787 
   2788     return do_2scalar_long(s, a, opfn[a->size], accfn[a->size]);
   2789 }
   2790 
   2791 static bool trans_VQDMLSL_2sc(DisasContext *s, arg_2scalar *a)
   2792 {
   2793     static NeonGenTwoOpWidenFn * const opfn[] = {
   2794         NULL,
   2795         gen_VQDMULL_16,
   2796         gen_VQDMULL_32,
   2797         NULL,
   2798     };
   2799     static NeonGenTwo64OpFn * const accfn[] = {
   2800         NULL,
   2801         gen_VQDMLSL_acc_16,
   2802         gen_VQDMLSL_acc_32,
   2803         NULL,
   2804     };
   2805 
   2806     return do_2scalar_long(s, a, opfn[a->size], accfn[a->size]);
   2807 }
   2808 
   2809 static bool trans_VEXT(DisasContext *s, arg_VEXT *a)
   2810 {
   2811     if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
   2812         return false;
   2813     }
   2814 
   2815     /* UNDEF accesses to D16-D31 if they don't exist. */
   2816     if (!dc_isar_feature(aa32_simd_r32, s) &&
   2817         ((a->vd | a->vn | a->vm) & 0x10)) {
   2818         return false;
   2819     }
   2820 
   2821     if ((a->vn | a->vm | a->vd) & a->q) {
   2822         return false;
   2823     }
   2824 
   2825     if (a->imm > 7 && !a->q) {
   2826         return false;
   2827     }
   2828 
   2829     if (!vfp_access_check(s)) {
   2830         return true;
   2831     }
   2832 
   2833     if (!a->q) {
   2834         /* Extract 64 bits from <Vm:Vn> */
   2835         TCGv_i64 left, right, dest;
   2836 
   2837         left = tcg_temp_new_i64();
   2838         right = tcg_temp_new_i64();
   2839         dest = tcg_temp_new_i64();
   2840 
   2841         read_neon_element64(right, a->vn, 0, MO_64);
   2842         read_neon_element64(left, a->vm, 0, MO_64);
   2843         tcg_gen_extract2_i64(dest, right, left, a->imm * 8);
   2844         write_neon_element64(dest, a->vd, 0, MO_64);
   2845 
   2846         tcg_temp_free_i64(left);
   2847         tcg_temp_free_i64(right);
   2848         tcg_temp_free_i64(dest);
   2849     } else {
   2850         /* Extract 128 bits from <Vm+1:Vm:Vn+1:Vn> */
   2851         TCGv_i64 left, middle, right, destleft, destright;
   2852 
   2853         left = tcg_temp_new_i64();
   2854         middle = tcg_temp_new_i64();
   2855         right = tcg_temp_new_i64();
   2856         destleft = tcg_temp_new_i64();
   2857         destright = tcg_temp_new_i64();
   2858 
   2859         if (a->imm < 8) {
   2860             read_neon_element64(right, a->vn, 0, MO_64);
   2861             read_neon_element64(middle, a->vn, 1, MO_64);
   2862             tcg_gen_extract2_i64(destright, right, middle, a->imm * 8);
   2863             read_neon_element64(left, a->vm, 0, MO_64);
   2864             tcg_gen_extract2_i64(destleft, middle, left, a->imm * 8);
   2865         } else {
   2866             read_neon_element64(right, a->vn, 1, MO_64);
   2867             read_neon_element64(middle, a->vm, 0, MO_64);
   2868             tcg_gen_extract2_i64(destright, right, middle, (a->imm - 8) * 8);
   2869             read_neon_element64(left, a->vm, 1, MO_64);
   2870             tcg_gen_extract2_i64(destleft, middle, left, (a->imm - 8) * 8);
   2871         }
   2872 
   2873         write_neon_element64(destright, a->vd, 0, MO_64);
   2874         write_neon_element64(destleft, a->vd, 1, MO_64);
   2875 
   2876         tcg_temp_free_i64(destright);
   2877         tcg_temp_free_i64(destleft);
   2878         tcg_temp_free_i64(right);
   2879         tcg_temp_free_i64(middle);
   2880         tcg_temp_free_i64(left);
   2881     }
   2882     return true;
   2883 }
   2884 
   2885 static bool trans_VTBL(DisasContext *s, arg_VTBL *a)
   2886 {
   2887     TCGv_i64 val, def;
   2888     TCGv_i32 desc;
   2889 
   2890     if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
   2891         return false;
   2892     }
   2893 
   2894     /* UNDEF accesses to D16-D31 if they don't exist. */
   2895     if (!dc_isar_feature(aa32_simd_r32, s) &&
   2896         ((a->vd | a->vn | a->vm) & 0x10)) {
   2897         return false;
   2898     }
   2899 
   2900     if ((a->vn + a->len + 1) > 32) {
   2901         /*
   2902          * This is UNPREDICTABLE; we choose to UNDEF to avoid the
   2903          * helper function running off the end of the register file.
   2904          */
   2905         return false;
   2906     }
   2907 
   2908     if (!vfp_access_check(s)) {
   2909         return true;
   2910     }
   2911 
   2912     desc = tcg_constant_i32((a->vn << 2) | a->len);
   2913     def = tcg_temp_new_i64();
   2914     if (a->op) {
   2915         read_neon_element64(def, a->vd, 0, MO_64);
   2916     } else {
   2917         tcg_gen_movi_i64(def, 0);
   2918     }
   2919     val = tcg_temp_new_i64();
   2920     read_neon_element64(val, a->vm, 0, MO_64);
   2921 
   2922     gen_helper_neon_tbl(val, cpu_env, desc, val, def);
   2923     write_neon_element64(val, a->vd, 0, MO_64);
   2924 
   2925     tcg_temp_free_i64(def);
   2926     tcg_temp_free_i64(val);
   2927     return true;
   2928 }
   2929 
   2930 static bool trans_VDUP_scalar(DisasContext *s, arg_VDUP_scalar *a)
   2931 {
   2932     if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
   2933         return false;
   2934     }
   2935 
   2936     /* UNDEF accesses to D16-D31 if they don't exist. */
   2937     if (!dc_isar_feature(aa32_simd_r32, s) &&
   2938         ((a->vd | a->vm) & 0x10)) {
   2939         return false;
   2940     }
   2941 
   2942     if (a->vd & a->q) {
   2943         return false;
   2944     }
   2945 
   2946     if (!vfp_access_check(s)) {
   2947         return true;
   2948     }
   2949 
   2950     tcg_gen_gvec_dup_mem(a->size, neon_full_reg_offset(a->vd),
   2951                          neon_element_offset(a->vm, a->index, a->size),
   2952                          a->q ? 16 : 8, a->q ? 16 : 8);
   2953     return true;
   2954 }
   2955 
   2956 static bool trans_VREV64(DisasContext *s, arg_VREV64 *a)
   2957 {
   2958     int pass, half;
   2959     TCGv_i32 tmp[2];
   2960 
   2961     if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
   2962         return false;
   2963     }
   2964 
   2965     /* UNDEF accesses to D16-D31 if they don't exist. */
   2966     if (!dc_isar_feature(aa32_simd_r32, s) &&
   2967         ((a->vd | a->vm) & 0x10)) {
   2968         return false;
   2969     }
   2970 
   2971     if ((a->vd | a->vm) & a->q) {
   2972         return false;
   2973     }
   2974 
   2975     if (a->size == 3) {
   2976         return false;
   2977     }
   2978 
   2979     if (!vfp_access_check(s)) {
   2980         return true;
   2981     }
   2982 
   2983     tmp[0] = tcg_temp_new_i32();
   2984     tmp[1] = tcg_temp_new_i32();
   2985 
   2986     for (pass = 0; pass < (a->q ? 2 : 1); pass++) {
   2987         for (half = 0; half < 2; half++) {
   2988             read_neon_element32(tmp[half], a->vm, pass * 2 + half, MO_32);
   2989             switch (a->size) {
   2990             case 0:
   2991                 tcg_gen_bswap32_i32(tmp[half], tmp[half]);
   2992                 break;
   2993             case 1:
   2994                 gen_swap_half(tmp[half], tmp[half]);
   2995                 break;
   2996             case 2:
   2997                 break;
   2998             default:
   2999                 g_assert_not_reached();
   3000             }
   3001         }
   3002         write_neon_element32(tmp[1], a->vd, pass * 2, MO_32);
   3003         write_neon_element32(tmp[0], a->vd, pass * 2 + 1, MO_32);
   3004     }
   3005 
   3006     tcg_temp_free_i32(tmp[0]);
   3007     tcg_temp_free_i32(tmp[1]);
   3008     return true;
   3009 }
   3010 
   3011 static bool do_2misc_pairwise(DisasContext *s, arg_2misc *a,
   3012                               NeonGenWidenFn *widenfn,
   3013                               NeonGenTwo64OpFn *opfn,
   3014                               NeonGenTwo64OpFn *accfn)
   3015 {
   3016     /*
   3017      * Pairwise long operations: widen both halves of the pair,
   3018      * combine the pairs with the opfn, and then possibly accumulate
   3019      * into the destination with the accfn.
   3020      */
   3021     int pass;
   3022 
   3023     if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
   3024         return false;
   3025     }
   3026 
   3027     /* UNDEF accesses to D16-D31 if they don't exist. */
   3028     if (!dc_isar_feature(aa32_simd_r32, s) &&
   3029         ((a->vd | a->vm) & 0x10)) {
   3030         return false;
   3031     }
   3032 
   3033     if ((a->vd | a->vm) & a->q) {
   3034         return false;
   3035     }
   3036 
   3037     if (!widenfn) {
   3038         return false;
   3039     }
   3040 
   3041     if (!vfp_access_check(s)) {
   3042         return true;
   3043     }
   3044 
   3045     for (pass = 0; pass < a->q + 1; pass++) {
   3046         TCGv_i32 tmp;
   3047         TCGv_i64 rm0_64, rm1_64, rd_64;
   3048 
   3049         rm0_64 = tcg_temp_new_i64();
   3050         rm1_64 = tcg_temp_new_i64();
   3051         rd_64 = tcg_temp_new_i64();
   3052 
   3053         tmp = tcg_temp_new_i32();
   3054         read_neon_element32(tmp, a->vm, pass * 2, MO_32);
   3055         widenfn(rm0_64, tmp);
   3056         read_neon_element32(tmp, a->vm, pass * 2 + 1, MO_32);
   3057         widenfn(rm1_64, tmp);
   3058         tcg_temp_free_i32(tmp);
   3059 
   3060         opfn(rd_64, rm0_64, rm1_64);
   3061         tcg_temp_free_i64(rm0_64);
   3062         tcg_temp_free_i64(rm1_64);
   3063 
   3064         if (accfn) {
   3065             TCGv_i64 tmp64 = tcg_temp_new_i64();
   3066             read_neon_element64(tmp64, a->vd, pass, MO_64);
   3067             accfn(rd_64, tmp64, rd_64);
   3068             tcg_temp_free_i64(tmp64);
   3069         }
   3070         write_neon_element64(rd_64, a->vd, pass, MO_64);
   3071         tcg_temp_free_i64(rd_64);
   3072     }
   3073     return true;
   3074 }
   3075 
   3076 static bool trans_VPADDL_S(DisasContext *s, arg_2misc *a)
   3077 {
   3078     static NeonGenWidenFn * const widenfn[] = {
   3079         gen_helper_neon_widen_s8,
   3080         gen_helper_neon_widen_s16,
   3081         tcg_gen_ext_i32_i64,
   3082         NULL,
   3083     };
   3084     static NeonGenTwo64OpFn * const opfn[] = {
   3085         gen_helper_neon_paddl_u16,
   3086         gen_helper_neon_paddl_u32,
   3087         tcg_gen_add_i64,
   3088         NULL,
   3089     };
   3090 
   3091     return do_2misc_pairwise(s, a, widenfn[a->size], opfn[a->size], NULL);
   3092 }
   3093 
   3094 static bool trans_VPADDL_U(DisasContext *s, arg_2misc *a)
   3095 {
   3096     static NeonGenWidenFn * const widenfn[] = {
   3097         gen_helper_neon_widen_u8,
   3098         gen_helper_neon_widen_u16,
   3099         tcg_gen_extu_i32_i64,
   3100         NULL,
   3101     };
   3102     static NeonGenTwo64OpFn * const opfn[] = {
   3103         gen_helper_neon_paddl_u16,
   3104         gen_helper_neon_paddl_u32,
   3105         tcg_gen_add_i64,
   3106         NULL,
   3107     };
   3108 
   3109     return do_2misc_pairwise(s, a, widenfn[a->size], opfn[a->size], NULL);
   3110 }
   3111 
   3112 static bool trans_VPADAL_S(DisasContext *s, arg_2misc *a)
   3113 {
   3114     static NeonGenWidenFn * const widenfn[] = {
   3115         gen_helper_neon_widen_s8,
   3116         gen_helper_neon_widen_s16,
   3117         tcg_gen_ext_i32_i64,
   3118         NULL,
   3119     };
   3120     static NeonGenTwo64OpFn * const opfn[] = {
   3121         gen_helper_neon_paddl_u16,
   3122         gen_helper_neon_paddl_u32,
   3123         tcg_gen_add_i64,
   3124         NULL,
   3125     };
   3126     static NeonGenTwo64OpFn * const accfn[] = {
   3127         gen_helper_neon_addl_u16,
   3128         gen_helper_neon_addl_u32,
   3129         tcg_gen_add_i64,
   3130         NULL,
   3131     };
   3132 
   3133     return do_2misc_pairwise(s, a, widenfn[a->size], opfn[a->size],
   3134                              accfn[a->size]);
   3135 }
   3136 
   3137 static bool trans_VPADAL_U(DisasContext *s, arg_2misc *a)
   3138 {
   3139     static NeonGenWidenFn * const widenfn[] = {
   3140         gen_helper_neon_widen_u8,
   3141         gen_helper_neon_widen_u16,
   3142         tcg_gen_extu_i32_i64,
   3143         NULL,
   3144     };
   3145     static NeonGenTwo64OpFn * const opfn[] = {
   3146         gen_helper_neon_paddl_u16,
   3147         gen_helper_neon_paddl_u32,
   3148         tcg_gen_add_i64,
   3149         NULL,
   3150     };
   3151     static NeonGenTwo64OpFn * const accfn[] = {
   3152         gen_helper_neon_addl_u16,
   3153         gen_helper_neon_addl_u32,
   3154         tcg_gen_add_i64,
   3155         NULL,
   3156     };
   3157 
   3158     return do_2misc_pairwise(s, a, widenfn[a->size], opfn[a->size],
   3159                              accfn[a->size]);
   3160 }
   3161 
   3162 typedef void ZipFn(TCGv_ptr, TCGv_ptr);
   3163 
   3164 static bool do_zip_uzp(DisasContext *s, arg_2misc *a,
   3165                        ZipFn *fn)
   3166 {
   3167     TCGv_ptr pd, pm;
   3168 
   3169     if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
   3170         return false;
   3171     }
   3172 
   3173     /* UNDEF accesses to D16-D31 if they don't exist. */
   3174     if (!dc_isar_feature(aa32_simd_r32, s) &&
   3175         ((a->vd | a->vm) & 0x10)) {
   3176         return false;
   3177     }
   3178 
   3179     if ((a->vd | a->vm) & a->q) {
   3180         return false;
   3181     }
   3182 
   3183     if (!fn) {
   3184         /* Bad size or size/q combination */
   3185         return false;
   3186     }
   3187 
   3188     if (!vfp_access_check(s)) {
   3189         return true;
   3190     }
   3191 
   3192     pd = vfp_reg_ptr(true, a->vd);
   3193     pm = vfp_reg_ptr(true, a->vm);
   3194     fn(pd, pm);
   3195     tcg_temp_free_ptr(pd);
   3196     tcg_temp_free_ptr(pm);
   3197     return true;
   3198 }
   3199 
   3200 static bool trans_VUZP(DisasContext *s, arg_2misc *a)
   3201 {
   3202     static ZipFn * const fn[2][4] = {
   3203         {
   3204             gen_helper_neon_unzip8,
   3205             gen_helper_neon_unzip16,
   3206             NULL,
   3207             NULL,
   3208         }, {
   3209             gen_helper_neon_qunzip8,
   3210             gen_helper_neon_qunzip16,
   3211             gen_helper_neon_qunzip32,
   3212             NULL,
   3213         }
   3214     };
   3215     return do_zip_uzp(s, a, fn[a->q][a->size]);
   3216 }
   3217 
   3218 static bool trans_VZIP(DisasContext *s, arg_2misc *a)
   3219 {
   3220     static ZipFn * const fn[2][4] = {
   3221         {
   3222             gen_helper_neon_zip8,
   3223             gen_helper_neon_zip16,
   3224             NULL,
   3225             NULL,
   3226         }, {
   3227             gen_helper_neon_qzip8,
   3228             gen_helper_neon_qzip16,
   3229             gen_helper_neon_qzip32,
   3230             NULL,
   3231         }
   3232     };
   3233     return do_zip_uzp(s, a, fn[a->q][a->size]);
   3234 }
   3235 
   3236 static bool do_vmovn(DisasContext *s, arg_2misc *a,
   3237                      NeonGenNarrowEnvFn *narrowfn)
   3238 {
   3239     TCGv_i64 rm;
   3240     TCGv_i32 rd0, rd1;
   3241 
   3242     if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
   3243         return false;
   3244     }
   3245 
   3246     /* UNDEF accesses to D16-D31 if they don't exist. */
   3247     if (!dc_isar_feature(aa32_simd_r32, s) &&
   3248         ((a->vd | a->vm) & 0x10)) {
   3249         return false;
   3250     }
   3251 
   3252     if (a->vm & 1) {
   3253         return false;
   3254     }
   3255 
   3256     if (!narrowfn) {
   3257         return false;
   3258     }
   3259 
   3260     if (!vfp_access_check(s)) {
   3261         return true;
   3262     }
   3263 
   3264     rm = tcg_temp_new_i64();
   3265     rd0 = tcg_temp_new_i32();
   3266     rd1 = tcg_temp_new_i32();
   3267 
   3268     read_neon_element64(rm, a->vm, 0, MO_64);
   3269     narrowfn(rd0, cpu_env, rm);
   3270     read_neon_element64(rm, a->vm, 1, MO_64);
   3271     narrowfn(rd1, cpu_env, rm);
   3272     write_neon_element32(rd0, a->vd, 0, MO_32);
   3273     write_neon_element32(rd1, a->vd, 1, MO_32);
   3274     tcg_temp_free_i32(rd0);
   3275     tcg_temp_free_i32(rd1);
   3276     tcg_temp_free_i64(rm);
   3277     return true;
   3278 }
   3279 
   3280 #define DO_VMOVN(INSN, FUNC)                                    \
   3281     static bool trans_##INSN(DisasContext *s, arg_2misc *a)     \
   3282     {                                                           \
   3283         static NeonGenNarrowEnvFn * const narrowfn[] = {        \
   3284             FUNC##8,                                            \
   3285             FUNC##16,                                           \
   3286             FUNC##32,                                           \
   3287             NULL,                                               \
   3288         };                                                      \
   3289         return do_vmovn(s, a, narrowfn[a->size]);               \
   3290     }
   3291 
   3292 DO_VMOVN(VMOVN, gen_neon_narrow_u)
   3293 DO_VMOVN(VQMOVUN, gen_helper_neon_unarrow_sat)
   3294 DO_VMOVN(VQMOVN_S, gen_helper_neon_narrow_sat_s)
   3295 DO_VMOVN(VQMOVN_U, gen_helper_neon_narrow_sat_u)
   3296 
   3297 static bool trans_VSHLL(DisasContext *s, arg_2misc *a)
   3298 {
   3299     TCGv_i32 rm0, rm1;
   3300     TCGv_i64 rd;
   3301     static NeonGenWidenFn * const widenfns[] = {
   3302         gen_helper_neon_widen_u8,
   3303         gen_helper_neon_widen_u16,
   3304         tcg_gen_extu_i32_i64,
   3305         NULL,
   3306     };
   3307     NeonGenWidenFn *widenfn = widenfns[a->size];
   3308 
   3309     if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
   3310         return false;
   3311     }
   3312 
   3313     /* UNDEF accesses to D16-D31 if they don't exist. */
   3314     if (!dc_isar_feature(aa32_simd_r32, s) &&
   3315         ((a->vd | a->vm) & 0x10)) {
   3316         return false;
   3317     }
   3318 
   3319     if (a->vd & 1) {
   3320         return false;
   3321     }
   3322 
   3323     if (!widenfn) {
   3324         return false;
   3325     }
   3326 
   3327     if (!vfp_access_check(s)) {
   3328         return true;
   3329     }
   3330 
   3331     rd = tcg_temp_new_i64();
   3332     rm0 = tcg_temp_new_i32();
   3333     rm1 = tcg_temp_new_i32();
   3334 
   3335     read_neon_element32(rm0, a->vm, 0, MO_32);
   3336     read_neon_element32(rm1, a->vm, 1, MO_32);
   3337 
   3338     widenfn(rd, rm0);
   3339     tcg_gen_shli_i64(rd, rd, 8 << a->size);
   3340     write_neon_element64(rd, a->vd, 0, MO_64);
   3341     widenfn(rd, rm1);
   3342     tcg_gen_shli_i64(rd, rd, 8 << a->size);
   3343     write_neon_element64(rd, a->vd, 1, MO_64);
   3344 
   3345     tcg_temp_free_i64(rd);
   3346     tcg_temp_free_i32(rm0);
   3347     tcg_temp_free_i32(rm1);
   3348     return true;
   3349 }
   3350 
   3351 static bool trans_VCVT_B16_F32(DisasContext *s, arg_2misc *a)
   3352 {
   3353     TCGv_ptr fpst;
   3354     TCGv_i64 tmp;
   3355     TCGv_i32 dst0, dst1;
   3356 
   3357     if (!dc_isar_feature(aa32_bf16, s)) {
   3358         return false;
   3359     }
   3360 
   3361     /* UNDEF accesses to D16-D31 if they don't exist. */
   3362     if (!dc_isar_feature(aa32_simd_r32, s) &&
   3363         ((a->vd | a->vm) & 0x10)) {
   3364         return false;
   3365     }
   3366 
   3367     if ((a->vm & 1) || (a->size != 1)) {
   3368         return false;
   3369     }
   3370 
   3371     if (!vfp_access_check(s)) {
   3372         return true;
   3373     }
   3374 
   3375     fpst = fpstatus_ptr(FPST_STD);
   3376     tmp = tcg_temp_new_i64();
   3377     dst0 = tcg_temp_new_i32();
   3378     dst1 = tcg_temp_new_i32();
   3379 
   3380     read_neon_element64(tmp, a->vm, 0, MO_64);
   3381     gen_helper_bfcvt_pair(dst0, tmp, fpst);
   3382 
   3383     read_neon_element64(tmp, a->vm, 1, MO_64);
   3384     gen_helper_bfcvt_pair(dst1, tmp, fpst);
   3385 
   3386     write_neon_element32(dst0, a->vd, 0, MO_32);
   3387     write_neon_element32(dst1, a->vd, 1, MO_32);
   3388 
   3389     tcg_temp_free_i64(tmp);
   3390     tcg_temp_free_i32(dst0);
   3391     tcg_temp_free_i32(dst1);
   3392     tcg_temp_free_ptr(fpst);
   3393     return true;
   3394 }
   3395 
   3396 static bool trans_VCVT_F16_F32(DisasContext *s, arg_2misc *a)
   3397 {
   3398     TCGv_ptr fpst;
   3399     TCGv_i32 ahp, tmp, tmp2, tmp3;
   3400 
   3401     if (!arm_dc_feature(s, ARM_FEATURE_NEON) ||
   3402         !dc_isar_feature(aa32_fp16_spconv, s)) {
   3403         return false;
   3404     }
   3405 
   3406     /* UNDEF accesses to D16-D31 if they don't exist. */
   3407     if (!dc_isar_feature(aa32_simd_r32, s) &&
   3408         ((a->vd | a->vm) & 0x10)) {
   3409         return false;
   3410     }
   3411 
   3412     if ((a->vm & 1) || (a->size != 1)) {
   3413         return false;
   3414     }
   3415 
   3416     if (!vfp_access_check(s)) {
   3417         return true;
   3418     }
   3419 
   3420     fpst = fpstatus_ptr(FPST_STD);
   3421     ahp = get_ahp_flag();
   3422     tmp = tcg_temp_new_i32();
   3423     read_neon_element32(tmp, a->vm, 0, MO_32);
   3424     gen_helper_vfp_fcvt_f32_to_f16(tmp, tmp, fpst, ahp);
   3425     tmp2 = tcg_temp_new_i32();
   3426     read_neon_element32(tmp2, a->vm, 1, MO_32);
   3427     gen_helper_vfp_fcvt_f32_to_f16(tmp2, tmp2, fpst, ahp);
   3428     tcg_gen_shli_i32(tmp2, tmp2, 16);
   3429     tcg_gen_or_i32(tmp2, tmp2, tmp);
   3430     read_neon_element32(tmp, a->vm, 2, MO_32);
   3431     gen_helper_vfp_fcvt_f32_to_f16(tmp, tmp, fpst, ahp);
   3432     tmp3 = tcg_temp_new_i32();
   3433     read_neon_element32(tmp3, a->vm, 3, MO_32);
   3434     write_neon_element32(tmp2, a->vd, 0, MO_32);
   3435     tcg_temp_free_i32(tmp2);
   3436     gen_helper_vfp_fcvt_f32_to_f16(tmp3, tmp3, fpst, ahp);
   3437     tcg_gen_shli_i32(tmp3, tmp3, 16);
   3438     tcg_gen_or_i32(tmp3, tmp3, tmp);
   3439     write_neon_element32(tmp3, a->vd, 1, MO_32);
   3440     tcg_temp_free_i32(tmp3);
   3441     tcg_temp_free_i32(tmp);
   3442     tcg_temp_free_i32(ahp);
   3443     tcg_temp_free_ptr(fpst);
   3444 
   3445     return true;
   3446 }
   3447 
   3448 static bool trans_VCVT_F32_F16(DisasContext *s, arg_2misc *a)
   3449 {
   3450     TCGv_ptr fpst;
   3451     TCGv_i32 ahp, tmp, tmp2, tmp3;
   3452 
   3453     if (!arm_dc_feature(s, ARM_FEATURE_NEON) ||
   3454         !dc_isar_feature(aa32_fp16_spconv, s)) {
   3455         return false;
   3456     }
   3457 
   3458     /* UNDEF accesses to D16-D31 if they don't exist. */
   3459     if (!dc_isar_feature(aa32_simd_r32, s) &&
   3460         ((a->vd | a->vm) & 0x10)) {
   3461         return false;
   3462     }
   3463 
   3464     if ((a->vd & 1) || (a->size != 1)) {
   3465         return false;
   3466     }
   3467 
   3468     if (!vfp_access_check(s)) {
   3469         return true;
   3470     }
   3471 
   3472     fpst = fpstatus_ptr(FPST_STD);
   3473     ahp = get_ahp_flag();
   3474     tmp3 = tcg_temp_new_i32();
   3475     tmp2 = tcg_temp_new_i32();
   3476     tmp = tcg_temp_new_i32();
   3477     read_neon_element32(tmp, a->vm, 0, MO_32);
   3478     read_neon_element32(tmp2, a->vm, 1, MO_32);
   3479     tcg_gen_ext16u_i32(tmp3, tmp);
   3480     gen_helper_vfp_fcvt_f16_to_f32(tmp3, tmp3, fpst, ahp);
   3481     write_neon_element32(tmp3, a->vd, 0, MO_32);
   3482     tcg_gen_shri_i32(tmp, tmp, 16);
   3483     gen_helper_vfp_fcvt_f16_to_f32(tmp, tmp, fpst, ahp);
   3484     write_neon_element32(tmp, a->vd, 1, MO_32);
   3485     tcg_temp_free_i32(tmp);
   3486     tcg_gen_ext16u_i32(tmp3, tmp2);
   3487     gen_helper_vfp_fcvt_f16_to_f32(tmp3, tmp3, fpst, ahp);
   3488     write_neon_element32(tmp3, a->vd, 2, MO_32);
   3489     tcg_temp_free_i32(tmp3);
   3490     tcg_gen_shri_i32(tmp2, tmp2, 16);
   3491     gen_helper_vfp_fcvt_f16_to_f32(tmp2, tmp2, fpst, ahp);
   3492     write_neon_element32(tmp2, a->vd, 3, MO_32);
   3493     tcg_temp_free_i32(tmp2);
   3494     tcg_temp_free_i32(ahp);
   3495     tcg_temp_free_ptr(fpst);
   3496 
   3497     return true;
   3498 }
   3499 
   3500 static bool do_2misc_vec(DisasContext *s, arg_2misc *a, GVecGen2Fn *fn)
   3501 {
   3502     int vec_size = a->q ? 16 : 8;
   3503     int rd_ofs = neon_full_reg_offset(a->vd);
   3504     int rm_ofs = neon_full_reg_offset(a->vm);
   3505 
   3506     if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
   3507         return false;
   3508     }
   3509 
   3510     /* UNDEF accesses to D16-D31 if they don't exist. */
   3511     if (!dc_isar_feature(aa32_simd_r32, s) &&
   3512         ((a->vd | a->vm) & 0x10)) {
   3513         return false;
   3514     }
   3515 
   3516     if (a->size == 3) {
   3517         return false;
   3518     }
   3519 
   3520     if ((a->vd | a->vm) & a->q) {
   3521         return false;
   3522     }
   3523 
   3524     if (!vfp_access_check(s)) {
   3525         return true;
   3526     }
   3527 
   3528     fn(a->size, rd_ofs, rm_ofs, vec_size, vec_size);
   3529 
   3530     return true;
   3531 }
   3532 
   3533 #define DO_2MISC_VEC(INSN, FN)                                  \
   3534     static bool trans_##INSN(DisasContext *s, arg_2misc *a)     \
   3535     {                                                           \
   3536         return do_2misc_vec(s, a, FN);                          \
   3537     }
   3538 
   3539 DO_2MISC_VEC(VNEG, tcg_gen_gvec_neg)
   3540 DO_2MISC_VEC(VABS, tcg_gen_gvec_abs)
   3541 DO_2MISC_VEC(VCEQ0, gen_gvec_ceq0)
   3542 DO_2MISC_VEC(VCGT0, gen_gvec_cgt0)
   3543 DO_2MISC_VEC(VCLE0, gen_gvec_cle0)
   3544 DO_2MISC_VEC(VCGE0, gen_gvec_cge0)
   3545 DO_2MISC_VEC(VCLT0, gen_gvec_clt0)
   3546 
   3547 static bool trans_VMVN(DisasContext *s, arg_2misc *a)
   3548 {
   3549     if (a->size != 0) {
   3550         return false;
   3551     }
   3552     return do_2misc_vec(s, a, tcg_gen_gvec_not);
   3553 }
   3554 
   3555 #define WRAP_2M_3_OOL_FN(WRAPNAME, FUNC, DATA)                          \
   3556     static void WRAPNAME(unsigned vece, uint32_t rd_ofs,                \
   3557                          uint32_t rm_ofs, uint32_t oprsz,               \
   3558                          uint32_t maxsz)                                \
   3559     {                                                                   \
   3560         tcg_gen_gvec_3_ool(rd_ofs, rd_ofs, rm_ofs, oprsz, maxsz,        \
   3561                            DATA, FUNC);                                 \
   3562     }
   3563 
   3564 #define WRAP_2M_2_OOL_FN(WRAPNAME, FUNC, DATA)                          \
   3565     static void WRAPNAME(unsigned vece, uint32_t rd_ofs,                \
   3566                          uint32_t rm_ofs, uint32_t oprsz,               \
   3567                          uint32_t maxsz)                                \
   3568     {                                                                   \
   3569         tcg_gen_gvec_2_ool(rd_ofs, rm_ofs, oprsz, maxsz, DATA, FUNC);   \
   3570     }
   3571 
   3572 WRAP_2M_3_OOL_FN(gen_AESE, gen_helper_crypto_aese, 0)
   3573 WRAP_2M_3_OOL_FN(gen_AESD, gen_helper_crypto_aese, 1)
   3574 WRAP_2M_2_OOL_FN(gen_AESMC, gen_helper_crypto_aesmc, 0)
   3575 WRAP_2M_2_OOL_FN(gen_AESIMC, gen_helper_crypto_aesmc, 1)
   3576 WRAP_2M_2_OOL_FN(gen_SHA1H, gen_helper_crypto_sha1h, 0)
   3577 WRAP_2M_2_OOL_FN(gen_SHA1SU1, gen_helper_crypto_sha1su1, 0)
   3578 WRAP_2M_2_OOL_FN(gen_SHA256SU0, gen_helper_crypto_sha256su0, 0)
   3579 
   3580 #define DO_2M_CRYPTO(INSN, FEATURE, SIZE)                       \
   3581     static bool trans_##INSN(DisasContext *s, arg_2misc *a)     \
   3582     {                                                           \
   3583         if (!dc_isar_feature(FEATURE, s) || a->size != SIZE) {  \
   3584             return false;                                       \
   3585         }                                                       \
   3586         return do_2misc_vec(s, a, gen_##INSN);                  \
   3587     }
   3588 
   3589 DO_2M_CRYPTO(AESE, aa32_aes, 0)
   3590 DO_2M_CRYPTO(AESD, aa32_aes, 0)
   3591 DO_2M_CRYPTO(AESMC, aa32_aes, 0)
   3592 DO_2M_CRYPTO(AESIMC, aa32_aes, 0)
   3593 DO_2M_CRYPTO(SHA1H, aa32_sha1, 2)
   3594 DO_2M_CRYPTO(SHA1SU1, aa32_sha1, 2)
   3595 DO_2M_CRYPTO(SHA256SU0, aa32_sha2, 2)
   3596 
   3597 static bool do_2misc(DisasContext *s, arg_2misc *a, NeonGenOneOpFn *fn)
   3598 {
   3599     TCGv_i32 tmp;
   3600     int pass;
   3601 
   3602     /* Handle a 2-reg-misc operation by iterating 32 bits at a time */
   3603     if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
   3604         return false;
   3605     }
   3606 
   3607     /* UNDEF accesses to D16-D31 if they don't exist. */
   3608     if (!dc_isar_feature(aa32_simd_r32, s) &&
   3609         ((a->vd | a->vm) & 0x10)) {
   3610         return false;
   3611     }
   3612 
   3613     if (!fn) {
   3614         return false;
   3615     }
   3616 
   3617     if ((a->vd | a->vm) & a->q) {
   3618         return false;
   3619     }
   3620 
   3621     if (!vfp_access_check(s)) {
   3622         return true;
   3623     }
   3624 
   3625     tmp = tcg_temp_new_i32();
   3626     for (pass = 0; pass < (a->q ? 4 : 2); pass++) {
   3627         read_neon_element32(tmp, a->vm, pass, MO_32);
   3628         fn(tmp, tmp);
   3629         write_neon_element32(tmp, a->vd, pass, MO_32);
   3630     }
   3631     tcg_temp_free_i32(tmp);
   3632 
   3633     return true;
   3634 }
   3635 
   3636 static bool trans_VREV32(DisasContext *s, arg_2misc *a)
   3637 {
   3638     static NeonGenOneOpFn * const fn[] = {
   3639         tcg_gen_bswap32_i32,
   3640         gen_swap_half,
   3641         NULL,
   3642         NULL,
   3643     };
   3644     return do_2misc(s, a, fn[a->size]);
   3645 }
   3646 
   3647 static bool trans_VREV16(DisasContext *s, arg_2misc *a)
   3648 {
   3649     if (a->size != 0) {
   3650         return false;
   3651     }
   3652     return do_2misc(s, a, gen_rev16);
   3653 }
   3654 
   3655 static bool trans_VCLS(DisasContext *s, arg_2misc *a)
   3656 {
   3657     static NeonGenOneOpFn * const fn[] = {
   3658         gen_helper_neon_cls_s8,
   3659         gen_helper_neon_cls_s16,
   3660         gen_helper_neon_cls_s32,
   3661         NULL,
   3662     };
   3663     return do_2misc(s, a, fn[a->size]);
   3664 }
   3665 
   3666 static void do_VCLZ_32(TCGv_i32 rd, TCGv_i32 rm)
   3667 {
   3668     tcg_gen_clzi_i32(rd, rm, 32);
   3669 }
   3670 
   3671 static bool trans_VCLZ(DisasContext *s, arg_2misc *a)
   3672 {
   3673     static NeonGenOneOpFn * const fn[] = {
   3674         gen_helper_neon_clz_u8,
   3675         gen_helper_neon_clz_u16,
   3676         do_VCLZ_32,
   3677         NULL,
   3678     };
   3679     return do_2misc(s, a, fn[a->size]);
   3680 }
   3681 
   3682 static bool trans_VCNT(DisasContext *s, arg_2misc *a)
   3683 {
   3684     if (a->size != 0) {
   3685         return false;
   3686     }
   3687     return do_2misc(s, a, gen_helper_neon_cnt_u8);
   3688 }
   3689 
   3690 static void gen_VABS_F(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
   3691                        uint32_t oprsz, uint32_t maxsz)
   3692 {
   3693     tcg_gen_gvec_andi(vece, rd_ofs, rm_ofs,
   3694                       vece == MO_16 ? 0x7fff : 0x7fffffff,
   3695                       oprsz, maxsz);
   3696 }
   3697 
   3698 static bool trans_VABS_F(DisasContext *s, arg_2misc *a)
   3699 {
   3700     if (a->size == MO_16) {
   3701         if (!dc_isar_feature(aa32_fp16_arith, s)) {
   3702             return false;
   3703         }
   3704     } else if (a->size != MO_32) {
   3705         return false;
   3706     }
   3707     return do_2misc_vec(s, a, gen_VABS_F);
   3708 }
   3709 
   3710 static void gen_VNEG_F(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
   3711                        uint32_t oprsz, uint32_t maxsz)
   3712 {
   3713     tcg_gen_gvec_xori(vece, rd_ofs, rm_ofs,
   3714                       vece == MO_16 ? 0x8000 : 0x80000000,
   3715                       oprsz, maxsz);
   3716 }
   3717 
   3718 static bool trans_VNEG_F(DisasContext *s, arg_2misc *a)
   3719 {
   3720     if (a->size == MO_16) {
   3721         if (!dc_isar_feature(aa32_fp16_arith, s)) {
   3722             return false;
   3723         }
   3724     } else if (a->size != MO_32) {
   3725         return false;
   3726     }
   3727     return do_2misc_vec(s, a, gen_VNEG_F);
   3728 }
   3729 
   3730 static bool trans_VRECPE(DisasContext *s, arg_2misc *a)
   3731 {
   3732     if (a->size != 2) {
   3733         return false;
   3734     }
   3735     return do_2misc(s, a, gen_helper_recpe_u32);
   3736 }
   3737 
   3738 static bool trans_VRSQRTE(DisasContext *s, arg_2misc *a)
   3739 {
   3740     if (a->size != 2) {
   3741         return false;
   3742     }
   3743     return do_2misc(s, a, gen_helper_rsqrte_u32);
   3744 }
   3745 
   3746 #define WRAP_1OP_ENV_FN(WRAPNAME, FUNC) \
   3747     static void WRAPNAME(TCGv_i32 d, TCGv_i32 m)        \
   3748     {                                                   \
   3749         FUNC(d, cpu_env, m);                            \
   3750     }
   3751 
   3752 WRAP_1OP_ENV_FN(gen_VQABS_s8, gen_helper_neon_qabs_s8)
   3753 WRAP_1OP_ENV_FN(gen_VQABS_s16, gen_helper_neon_qabs_s16)
   3754 WRAP_1OP_ENV_FN(gen_VQABS_s32, gen_helper_neon_qabs_s32)
   3755 WRAP_1OP_ENV_FN(gen_VQNEG_s8, gen_helper_neon_qneg_s8)
   3756 WRAP_1OP_ENV_FN(gen_VQNEG_s16, gen_helper_neon_qneg_s16)
   3757 WRAP_1OP_ENV_FN(gen_VQNEG_s32, gen_helper_neon_qneg_s32)
   3758 
   3759 static bool trans_VQABS(DisasContext *s, arg_2misc *a)
   3760 {
   3761     static NeonGenOneOpFn * const fn[] = {
   3762         gen_VQABS_s8,
   3763         gen_VQABS_s16,
   3764         gen_VQABS_s32,
   3765         NULL,
   3766     };
   3767     return do_2misc(s, a, fn[a->size]);
   3768 }
   3769 
   3770 static bool trans_VQNEG(DisasContext *s, arg_2misc *a)
   3771 {
   3772     static NeonGenOneOpFn * const fn[] = {
   3773         gen_VQNEG_s8,
   3774         gen_VQNEG_s16,
   3775         gen_VQNEG_s32,
   3776         NULL,
   3777     };
   3778     return do_2misc(s, a, fn[a->size]);
   3779 }
   3780 
   3781 #define DO_2MISC_FP_VEC(INSN, HFUNC, SFUNC)                             \
   3782     static void gen_##INSN(unsigned vece, uint32_t rd_ofs,              \
   3783                            uint32_t rm_ofs,                             \
   3784                            uint32_t oprsz, uint32_t maxsz)              \
   3785     {                                                                   \
   3786         static gen_helper_gvec_2_ptr * const fns[4] = {                 \
   3787             NULL, HFUNC, SFUNC, NULL,                                   \
   3788         };                                                              \
   3789         TCGv_ptr fpst;                                                  \
   3790         fpst = fpstatus_ptr(vece == MO_16 ? FPST_STD_F16 : FPST_STD);   \
   3791         tcg_gen_gvec_2_ptr(rd_ofs, rm_ofs, fpst, oprsz, maxsz, 0,       \
   3792                            fns[vece]);                                  \
   3793         tcg_temp_free_ptr(fpst);                                        \
   3794     }                                                                   \
   3795     static bool trans_##INSN(DisasContext *s, arg_2misc *a)             \
   3796     {                                                                   \
   3797         if (a->size == MO_16) {                                         \
   3798             if (!dc_isar_feature(aa32_fp16_arith, s)) {                 \
   3799                 return false;                                           \
   3800             }                                                           \
   3801         } else if (a->size != MO_32) {                                  \
   3802             return false;                                               \
   3803         }                                                               \
   3804         return do_2misc_vec(s, a, gen_##INSN);                          \
   3805     }
   3806 
   3807 DO_2MISC_FP_VEC(VRECPE_F, gen_helper_gvec_frecpe_h, gen_helper_gvec_frecpe_s)
   3808 DO_2MISC_FP_VEC(VRSQRTE_F, gen_helper_gvec_frsqrte_h, gen_helper_gvec_frsqrte_s)
   3809 DO_2MISC_FP_VEC(VCGT0_F, gen_helper_gvec_fcgt0_h, gen_helper_gvec_fcgt0_s)
   3810 DO_2MISC_FP_VEC(VCGE0_F, gen_helper_gvec_fcge0_h, gen_helper_gvec_fcge0_s)
   3811 DO_2MISC_FP_VEC(VCEQ0_F, gen_helper_gvec_fceq0_h, gen_helper_gvec_fceq0_s)
   3812 DO_2MISC_FP_VEC(VCLT0_F, gen_helper_gvec_fclt0_h, gen_helper_gvec_fclt0_s)
   3813 DO_2MISC_FP_VEC(VCLE0_F, gen_helper_gvec_fcle0_h, gen_helper_gvec_fcle0_s)
   3814 DO_2MISC_FP_VEC(VCVT_FS, gen_helper_gvec_sstoh, gen_helper_gvec_sitos)
   3815 DO_2MISC_FP_VEC(VCVT_FU, gen_helper_gvec_ustoh, gen_helper_gvec_uitos)
   3816 DO_2MISC_FP_VEC(VCVT_SF, gen_helper_gvec_tosszh, gen_helper_gvec_tosizs)
   3817 DO_2MISC_FP_VEC(VCVT_UF, gen_helper_gvec_touszh, gen_helper_gvec_touizs)
   3818 
   3819 DO_2MISC_FP_VEC(VRINTX_impl, gen_helper_gvec_vrintx_h, gen_helper_gvec_vrintx_s)
   3820 
   3821 static bool trans_VRINTX(DisasContext *s, arg_2misc *a)
   3822 {
   3823     if (!arm_dc_feature(s, ARM_FEATURE_V8)) {
   3824         return false;
   3825     }
   3826     return trans_VRINTX_impl(s, a);
   3827 }
   3828 
   3829 #define DO_VEC_RMODE(INSN, RMODE, OP)                                   \
   3830     static void gen_##INSN(unsigned vece, uint32_t rd_ofs,              \
   3831                            uint32_t rm_ofs,                             \
   3832                            uint32_t oprsz, uint32_t maxsz)              \
   3833     {                                                                   \
   3834         static gen_helper_gvec_2_ptr * const fns[4] = {                 \
   3835             NULL,                                                       \
   3836             gen_helper_gvec_##OP##h,                                    \
   3837             gen_helper_gvec_##OP##s,                                    \
   3838             NULL,                                                       \
   3839         };                                                              \
   3840         TCGv_ptr fpst;                                                  \
   3841         fpst = fpstatus_ptr(vece == 1 ? FPST_STD_F16 : FPST_STD);       \
   3842         tcg_gen_gvec_2_ptr(rd_ofs, rm_ofs, fpst, oprsz, maxsz,          \
   3843                            arm_rmode_to_sf(RMODE), fns[vece]);          \
   3844         tcg_temp_free_ptr(fpst);                                        \
   3845     }                                                                   \
   3846     static bool trans_##INSN(DisasContext *s, arg_2misc *a)             \
   3847     {                                                                   \
   3848         if (!arm_dc_feature(s, ARM_FEATURE_V8)) {                       \
   3849             return false;                                               \
   3850         }                                                               \
   3851         if (a->size == MO_16) {                                         \
   3852             if (!dc_isar_feature(aa32_fp16_arith, s)) {                 \
   3853                 return false;                                           \
   3854             }                                                           \
   3855         } else if (a->size != MO_32) {                                  \
   3856             return false;                                               \
   3857         }                                                               \
   3858         return do_2misc_vec(s, a, gen_##INSN);                          \
   3859     }
   3860 
   3861 DO_VEC_RMODE(VCVTAU, FPROUNDING_TIEAWAY, vcvt_rm_u)
   3862 DO_VEC_RMODE(VCVTAS, FPROUNDING_TIEAWAY, vcvt_rm_s)
   3863 DO_VEC_RMODE(VCVTNU, FPROUNDING_TIEEVEN, vcvt_rm_u)
   3864 DO_VEC_RMODE(VCVTNS, FPROUNDING_TIEEVEN, vcvt_rm_s)
   3865 DO_VEC_RMODE(VCVTPU, FPROUNDING_POSINF, vcvt_rm_u)
   3866 DO_VEC_RMODE(VCVTPS, FPROUNDING_POSINF, vcvt_rm_s)
   3867 DO_VEC_RMODE(VCVTMU, FPROUNDING_NEGINF, vcvt_rm_u)
   3868 DO_VEC_RMODE(VCVTMS, FPROUNDING_NEGINF, vcvt_rm_s)
   3869 
   3870 DO_VEC_RMODE(VRINTN, FPROUNDING_TIEEVEN, vrint_rm_)
   3871 DO_VEC_RMODE(VRINTA, FPROUNDING_TIEAWAY, vrint_rm_)
   3872 DO_VEC_RMODE(VRINTZ, FPROUNDING_ZERO, vrint_rm_)
   3873 DO_VEC_RMODE(VRINTM, FPROUNDING_NEGINF, vrint_rm_)
   3874 DO_VEC_RMODE(VRINTP, FPROUNDING_POSINF, vrint_rm_)
   3875 
   3876 static bool trans_VSWP(DisasContext *s, arg_2misc *a)
   3877 {
   3878     TCGv_i64 rm, rd;
   3879     int pass;
   3880 
   3881     if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
   3882         return false;
   3883     }
   3884 
   3885     /* UNDEF accesses to D16-D31 if they don't exist. */
   3886     if (!dc_isar_feature(aa32_simd_r32, s) &&
   3887         ((a->vd | a->vm) & 0x10)) {
   3888         return false;
   3889     }
   3890 
   3891     if (a->size != 0) {
   3892         return false;
   3893     }
   3894 
   3895     if ((a->vd | a->vm) & a->q) {
   3896         return false;
   3897     }
   3898 
   3899     if (!vfp_access_check(s)) {
   3900         return true;
   3901     }
   3902 
   3903     rm = tcg_temp_new_i64();
   3904     rd = tcg_temp_new_i64();
   3905     for (pass = 0; pass < (a->q ? 2 : 1); pass++) {
   3906         read_neon_element64(rm, a->vm, pass, MO_64);
   3907         read_neon_element64(rd, a->vd, pass, MO_64);
   3908         write_neon_element64(rm, a->vd, pass, MO_64);
   3909         write_neon_element64(rd, a->vm, pass, MO_64);
   3910     }
   3911     tcg_temp_free_i64(rm);
   3912     tcg_temp_free_i64(rd);
   3913 
   3914     return true;
   3915 }
   3916 static void gen_neon_trn_u8(TCGv_i32 t0, TCGv_i32 t1)
   3917 {
   3918     TCGv_i32 rd, tmp;
   3919 
   3920     rd = tcg_temp_new_i32();
   3921     tmp = tcg_temp_new_i32();
   3922 
   3923     tcg_gen_shli_i32(rd, t0, 8);
   3924     tcg_gen_andi_i32(rd, rd, 0xff00ff00);
   3925     tcg_gen_andi_i32(tmp, t1, 0x00ff00ff);
   3926     tcg_gen_or_i32(rd, rd, tmp);
   3927 
   3928     tcg_gen_shri_i32(t1, t1, 8);
   3929     tcg_gen_andi_i32(t1, t1, 0x00ff00ff);
   3930     tcg_gen_andi_i32(tmp, t0, 0xff00ff00);
   3931     tcg_gen_or_i32(t1, t1, tmp);
   3932     tcg_gen_mov_i32(t0, rd);
   3933 
   3934     tcg_temp_free_i32(tmp);
   3935     tcg_temp_free_i32(rd);
   3936 }
   3937 
   3938 static void gen_neon_trn_u16(TCGv_i32 t0, TCGv_i32 t1)
   3939 {
   3940     TCGv_i32 rd, tmp;
   3941 
   3942     rd = tcg_temp_new_i32();
   3943     tmp = tcg_temp_new_i32();
   3944 
   3945     tcg_gen_shli_i32(rd, t0, 16);
   3946     tcg_gen_andi_i32(tmp, t1, 0xffff);
   3947     tcg_gen_or_i32(rd, rd, tmp);
   3948     tcg_gen_shri_i32(t1, t1, 16);
   3949     tcg_gen_andi_i32(tmp, t0, 0xffff0000);
   3950     tcg_gen_or_i32(t1, t1, tmp);
   3951     tcg_gen_mov_i32(t0, rd);
   3952 
   3953     tcg_temp_free_i32(tmp);
   3954     tcg_temp_free_i32(rd);
   3955 }
   3956 
   3957 static bool trans_VTRN(DisasContext *s, arg_2misc *a)
   3958 {
   3959     TCGv_i32 tmp, tmp2;
   3960     int pass;
   3961 
   3962     if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
   3963         return false;
   3964     }
   3965 
   3966     /* UNDEF accesses to D16-D31 if they don't exist. */
   3967     if (!dc_isar_feature(aa32_simd_r32, s) &&
   3968         ((a->vd | a->vm) & 0x10)) {
   3969         return false;
   3970     }
   3971 
   3972     if ((a->vd | a->vm) & a->q) {
   3973         return false;
   3974     }
   3975 
   3976     if (a->size == 3) {
   3977         return false;
   3978     }
   3979 
   3980     if (!vfp_access_check(s)) {
   3981         return true;
   3982     }
   3983 
   3984     tmp = tcg_temp_new_i32();
   3985     tmp2 = tcg_temp_new_i32();
   3986     if (a->size == MO_32) {
   3987         for (pass = 0; pass < (a->q ? 4 : 2); pass += 2) {
   3988             read_neon_element32(tmp, a->vm, pass, MO_32);
   3989             read_neon_element32(tmp2, a->vd, pass + 1, MO_32);
   3990             write_neon_element32(tmp2, a->vm, pass, MO_32);
   3991             write_neon_element32(tmp, a->vd, pass + 1, MO_32);
   3992         }
   3993     } else {
   3994         for (pass = 0; pass < (a->q ? 4 : 2); pass++) {
   3995             read_neon_element32(tmp, a->vm, pass, MO_32);
   3996             read_neon_element32(tmp2, a->vd, pass, MO_32);
   3997             if (a->size == MO_8) {
   3998                 gen_neon_trn_u8(tmp, tmp2);
   3999             } else {
   4000                 gen_neon_trn_u16(tmp, tmp2);
   4001             }
   4002             write_neon_element32(tmp2, a->vm, pass, MO_32);
   4003             write_neon_element32(tmp, a->vd, pass, MO_32);
   4004         }
   4005     }
   4006     tcg_temp_free_i32(tmp);
   4007     tcg_temp_free_i32(tmp2);
   4008     return true;
   4009 }
   4010 
   4011 static bool trans_VSMMLA(DisasContext *s, arg_VSMMLA *a)
   4012 {
   4013     if (!dc_isar_feature(aa32_i8mm, s)) {
   4014         return false;
   4015     }
   4016     return do_neon_ddda(s, 7, a->vd, a->vn, a->vm, 0,
   4017                         gen_helper_gvec_smmla_b);
   4018 }
   4019 
   4020 static bool trans_VUMMLA(DisasContext *s, arg_VUMMLA *a)
   4021 {
   4022     if (!dc_isar_feature(aa32_i8mm, s)) {
   4023         return false;
   4024     }
   4025     return do_neon_ddda(s, 7, a->vd, a->vn, a->vm, 0,
   4026                         gen_helper_gvec_ummla_b);
   4027 }
   4028 
   4029 static bool trans_VUSMMLA(DisasContext *s, arg_VUSMMLA *a)
   4030 {
   4031     if (!dc_isar_feature(aa32_i8mm, s)) {
   4032         return false;
   4033     }
   4034     return do_neon_ddda(s, 7, a->vd, a->vn, a->vm, 0,
   4035                         gen_helper_gvec_usmmla_b);
   4036 }
   4037 
   4038 static bool trans_VMMLA_b16(DisasContext *s, arg_VMMLA_b16 *a)
   4039 {
   4040     if (!dc_isar_feature(aa32_bf16, s)) {
   4041         return false;
   4042     }
   4043     return do_neon_ddda(s, 7, a->vd, a->vn, a->vm, 0,
   4044                         gen_helper_gvec_bfmmla);
   4045 }
   4046 
   4047 static bool trans_VFMA_b16(DisasContext *s, arg_VFMA_b16 *a)
   4048 {
   4049     if (!dc_isar_feature(aa32_bf16, s)) {
   4050         return false;
   4051     }
   4052     return do_neon_ddda_fpst(s, 7, a->vd, a->vn, a->vm, a->q, FPST_STD,
   4053                              gen_helper_gvec_bfmlal);
   4054 }
   4055 
   4056 static bool trans_VFMA_b16_scal(DisasContext *s, arg_VFMA_b16_scal *a)
   4057 {
   4058     if (!dc_isar_feature(aa32_bf16, s)) {
   4059         return false;
   4060     }
   4061     return do_neon_ddda_fpst(s, 6, a->vd, a->vn, a->vm,
   4062                              (a->index << 1) | a->q, FPST_STD,
   4063                              gen_helper_gvec_bfmlal_idx);
   4064 }