qemu

FORK: QEMU emulator
git clone https://git.neptards.moe/neptards/qemu.git
Log | Files | Refs | Submodules | LICENSE

translate-a64.c (476260B)


      1 /*
      2  *  AArch64 translation
      3  *
      4  *  Copyright (c) 2013 Alexander Graf <agraf@suse.de>
      5  *
      6  * This library is free software; you can redistribute it and/or
      7  * modify it under the terms of the GNU Lesser General Public
      8  * License as published by the Free Software Foundation; either
      9  * version 2.1 of the License, or (at your option) any later version.
     10  *
     11  * This library is distributed in the hope that it will be useful,
     12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
     13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
     14  * Lesser General Public License for more details.
     15  *
     16  * You should have received a copy of the GNU Lesser General Public
     17  * License along with this library; if not, see <http://www.gnu.org/licenses/>.
     18  */
     19 #include "qemu/osdep.h"
     20 
     21 #include "cpu.h"
     22 #include "exec/exec-all.h"
     23 #include "tcg/tcg-op.h"
     24 #include "tcg/tcg-op-gvec.h"
     25 #include "qemu/log.h"
     26 #include "arm_ldst.h"
     27 #include "translate.h"
     28 #include "internals.h"
     29 #include "qemu/host-utils.h"
     30 #include "semihosting/semihost.h"
     31 #include "exec/gen-icount.h"
     32 #include "exec/helper-proto.h"
     33 #include "exec/helper-gen.h"
     34 #include "exec/log.h"
     35 #include "cpregs.h"
     36 #include "translate-a64.h"
     37 #include "qemu/atomic128.h"
     38 
     39 static TCGv_i64 cpu_X[32];
     40 static TCGv_i64 cpu_pc;
     41 
     42 /* Load/store exclusive handling */
     43 static TCGv_i64 cpu_exclusive_high;
     44 
     45 static const char *regnames[] = {
     46     "x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7",
     47     "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15",
     48     "x16", "x17", "x18", "x19", "x20", "x21", "x22", "x23",
     49     "x24", "x25", "x26", "x27", "x28", "x29", "lr", "sp"
     50 };
     51 
     52 enum a64_shift_type {
     53     A64_SHIFT_TYPE_LSL = 0,
     54     A64_SHIFT_TYPE_LSR = 1,
     55     A64_SHIFT_TYPE_ASR = 2,
     56     A64_SHIFT_TYPE_ROR = 3
     57 };
     58 
     59 /* Table based decoder typedefs - used when the relevant bits for decode
     60  * are too awkwardly scattered across the instruction (eg SIMD).
     61  */
     62 typedef void AArch64DecodeFn(DisasContext *s, uint32_t insn);
     63 
     64 typedef struct AArch64DecodeTable {
     65     uint32_t pattern;
     66     uint32_t mask;
     67     AArch64DecodeFn *disas_fn;
     68 } AArch64DecodeTable;
     69 
     70 /* initialize TCG globals.  */
     71 void a64_translate_init(void)
     72 {
     73     int i;
     74 
     75     cpu_pc = tcg_global_mem_new_i64(cpu_env,
     76                                     offsetof(CPUARMState, pc),
     77                                     "pc");
     78     for (i = 0; i < 32; i++) {
     79         cpu_X[i] = tcg_global_mem_new_i64(cpu_env,
     80                                           offsetof(CPUARMState, xregs[i]),
     81                                           regnames[i]);
     82     }
     83 
     84     cpu_exclusive_high = tcg_global_mem_new_i64(cpu_env,
     85         offsetof(CPUARMState, exclusive_high), "exclusive_high");
     86 }
     87 
     88 /*
     89  * Return the core mmu_idx to use for A64 "unprivileged load/store" insns
     90  */
     91 static int get_a64_user_mem_index(DisasContext *s)
     92 {
     93     /*
     94      * If AccType_UNPRIV is not used, the insn uses AccType_NORMAL,
     95      * which is the usual mmu_idx for this cpu state.
     96      */
     97     ARMMMUIdx useridx = s->mmu_idx;
     98 
     99     if (s->unpriv) {
    100         /*
    101          * We have pre-computed the condition for AccType_UNPRIV.
    102          * Therefore we should never get here with a mmu_idx for
    103          * which we do not know the corresponding user mmu_idx.
    104          */
    105         switch (useridx) {
    106         case ARMMMUIdx_E10_1:
    107         case ARMMMUIdx_E10_1_PAN:
    108             useridx = ARMMMUIdx_E10_0;
    109             break;
    110         case ARMMMUIdx_E20_2:
    111         case ARMMMUIdx_E20_2_PAN:
    112             useridx = ARMMMUIdx_E20_0;
    113             break;
    114         default:
    115             g_assert_not_reached();
    116         }
    117     }
    118     return arm_to_core_mmu_idx(useridx);
    119 }
    120 
    121 static void set_btype_raw(int val)
    122 {
    123     tcg_gen_st_i32(tcg_constant_i32(val), cpu_env,
    124                    offsetof(CPUARMState, btype));
    125 }
    126 
    127 static void set_btype(DisasContext *s, int val)
    128 {
    129     /* BTYPE is a 2-bit field, and 0 should be done with reset_btype.  */
    130     tcg_debug_assert(val >= 1 && val <= 3);
    131     set_btype_raw(val);
    132     s->btype = -1;
    133 }
    134 
    135 static void reset_btype(DisasContext *s)
    136 {
    137     if (s->btype != 0) {
    138         set_btype_raw(0);
    139         s->btype = 0;
    140     }
    141 }
    142 
    143 static void gen_pc_plus_diff(DisasContext *s, TCGv_i64 dest, target_long diff)
    144 {
    145     assert(s->pc_save != -1);
    146     if (TARGET_TB_PCREL) {
    147         tcg_gen_addi_i64(dest, cpu_pc, (s->pc_curr - s->pc_save) + diff);
    148     } else {
    149         tcg_gen_movi_i64(dest, s->pc_curr + diff);
    150     }
    151 }
    152 
    153 void gen_a64_update_pc(DisasContext *s, target_long diff)
    154 {
    155     gen_pc_plus_diff(s, cpu_pc, diff);
    156     s->pc_save = s->pc_curr + diff;
    157 }
    158 
    159 /*
    160  * Handle Top Byte Ignore (TBI) bits.
    161  *
    162  * If address tagging is enabled via the TCR TBI bits:
    163  *  + for EL2 and EL3 there is only one TBI bit, and if it is set
    164  *    then the address is zero-extended, clearing bits [63:56]
    165  *  + for EL0 and EL1, TBI0 controls addresses with bit 55 == 0
    166  *    and TBI1 controls addressses with bit 55 == 1.
    167  *    If the appropriate TBI bit is set for the address then
    168  *    the address is sign-extended from bit 55 into bits [63:56]
    169  *
    170  * Here We have concatenated TBI{1,0} into tbi.
    171  */
    172 static void gen_top_byte_ignore(DisasContext *s, TCGv_i64 dst,
    173                                 TCGv_i64 src, int tbi)
    174 {
    175     if (tbi == 0) {
    176         /* Load unmodified address */
    177         tcg_gen_mov_i64(dst, src);
    178     } else if (!regime_has_2_ranges(s->mmu_idx)) {
    179         /* Force tag byte to all zero */
    180         tcg_gen_extract_i64(dst, src, 0, 56);
    181     } else {
    182         /* Sign-extend from bit 55.  */
    183         tcg_gen_sextract_i64(dst, src, 0, 56);
    184 
    185         switch (tbi) {
    186         case 1:
    187             /* tbi0 but !tbi1: only use the extension if positive */
    188             tcg_gen_and_i64(dst, dst, src);
    189             break;
    190         case 2:
    191             /* !tbi0 but tbi1: only use the extension if negative */
    192             tcg_gen_or_i64(dst, dst, src);
    193             break;
    194         case 3:
    195             /* tbi0 and tbi1: always use the extension */
    196             break;
    197         default:
    198             g_assert_not_reached();
    199         }
    200     }
    201 }
    202 
    203 static void gen_a64_set_pc(DisasContext *s, TCGv_i64 src)
    204 {
    205     /*
    206      * If address tagging is enabled for instructions via the TCR TBI bits,
    207      * then loading an address into the PC will clear out any tag.
    208      */
    209     gen_top_byte_ignore(s, cpu_pc, src, s->tbii);
    210     s->pc_save = -1;
    211 }
    212 
    213 /*
    214  * Handle MTE and/or TBI.
    215  *
    216  * For TBI, ideally, we would do nothing.  Proper behaviour on fault is
    217  * for the tag to be present in the FAR_ELx register.  But for user-only
    218  * mode we do not have a TLB with which to implement this, so we must
    219  * remove the top byte now.
    220  *
    221  * Always return a fresh temporary that we can increment independently
    222  * of the write-back address.
    223  */
    224 
    225 TCGv_i64 clean_data_tbi(DisasContext *s, TCGv_i64 addr)
    226 {
    227     TCGv_i64 clean = new_tmp_a64(s);
    228 #ifdef CONFIG_USER_ONLY
    229     gen_top_byte_ignore(s, clean, addr, s->tbid);
    230 #else
    231     tcg_gen_mov_i64(clean, addr);
    232 #endif
    233     return clean;
    234 }
    235 
    236 /* Insert a zero tag into src, with the result at dst. */
    237 static void gen_address_with_allocation_tag0(TCGv_i64 dst, TCGv_i64 src)
    238 {
    239     tcg_gen_andi_i64(dst, src, ~MAKE_64BIT_MASK(56, 4));
    240 }
    241 
    242 static void gen_probe_access(DisasContext *s, TCGv_i64 ptr,
    243                              MMUAccessType acc, int log2_size)
    244 {
    245     gen_helper_probe_access(cpu_env, ptr,
    246                             tcg_constant_i32(acc),
    247                             tcg_constant_i32(get_mem_index(s)),
    248                             tcg_constant_i32(1 << log2_size));
    249 }
    250 
    251 /*
    252  * For MTE, check a single logical or atomic access.  This probes a single
    253  * address, the exact one specified.  The size and alignment of the access
    254  * is not relevant to MTE, per se, but watchpoints do require the size,
    255  * and we want to recognize those before making any other changes to state.
    256  */
    257 static TCGv_i64 gen_mte_check1_mmuidx(DisasContext *s, TCGv_i64 addr,
    258                                       bool is_write, bool tag_checked,
    259                                       int log2_size, bool is_unpriv,
    260                                       int core_idx)
    261 {
    262     if (tag_checked && s->mte_active[is_unpriv]) {
    263         TCGv_i64 ret;
    264         int desc = 0;
    265 
    266         desc = FIELD_DP32(desc, MTEDESC, MIDX, core_idx);
    267         desc = FIELD_DP32(desc, MTEDESC, TBI, s->tbid);
    268         desc = FIELD_DP32(desc, MTEDESC, TCMA, s->tcma);
    269         desc = FIELD_DP32(desc, MTEDESC, WRITE, is_write);
    270         desc = FIELD_DP32(desc, MTEDESC, SIZEM1, (1 << log2_size) - 1);
    271 
    272         ret = new_tmp_a64(s);
    273         gen_helper_mte_check(ret, cpu_env, tcg_constant_i32(desc), addr);
    274 
    275         return ret;
    276     }
    277     return clean_data_tbi(s, addr);
    278 }
    279 
    280 TCGv_i64 gen_mte_check1(DisasContext *s, TCGv_i64 addr, bool is_write,
    281                         bool tag_checked, int log2_size)
    282 {
    283     return gen_mte_check1_mmuidx(s, addr, is_write, tag_checked, log2_size,
    284                                  false, get_mem_index(s));
    285 }
    286 
    287 /*
    288  * For MTE, check multiple logical sequential accesses.
    289  */
    290 TCGv_i64 gen_mte_checkN(DisasContext *s, TCGv_i64 addr, bool is_write,
    291                         bool tag_checked, int size)
    292 {
    293     if (tag_checked && s->mte_active[0]) {
    294         TCGv_i64 ret;
    295         int desc = 0;
    296 
    297         desc = FIELD_DP32(desc, MTEDESC, MIDX, get_mem_index(s));
    298         desc = FIELD_DP32(desc, MTEDESC, TBI, s->tbid);
    299         desc = FIELD_DP32(desc, MTEDESC, TCMA, s->tcma);
    300         desc = FIELD_DP32(desc, MTEDESC, WRITE, is_write);
    301         desc = FIELD_DP32(desc, MTEDESC, SIZEM1, size - 1);
    302 
    303         ret = new_tmp_a64(s);
    304         gen_helper_mte_check(ret, cpu_env, tcg_constant_i32(desc), addr);
    305 
    306         return ret;
    307     }
    308     return clean_data_tbi(s, addr);
    309 }
    310 
    311 typedef struct DisasCompare64 {
    312     TCGCond cond;
    313     TCGv_i64 value;
    314 } DisasCompare64;
    315 
    316 static void a64_test_cc(DisasCompare64 *c64, int cc)
    317 {
    318     DisasCompare c32;
    319 
    320     arm_test_cc(&c32, cc);
    321 
    322     /* Sign-extend the 32-bit value so that the GE/LT comparisons work
    323        * properly.  The NE/EQ comparisons are also fine with this choice.  */
    324     c64->cond = c32.cond;
    325     c64->value = tcg_temp_new_i64();
    326     tcg_gen_ext_i32_i64(c64->value, c32.value);
    327 
    328     arm_free_cc(&c32);
    329 }
    330 
    331 static void a64_free_cc(DisasCompare64 *c64)
    332 {
    333     tcg_temp_free_i64(c64->value);
    334 }
    335 
    336 static void gen_rebuild_hflags(DisasContext *s)
    337 {
    338     gen_helper_rebuild_hflags_a64(cpu_env, tcg_constant_i32(s->current_el));
    339 }
    340 
    341 static void gen_exception_internal(int excp)
    342 {
    343     assert(excp_is_internal(excp));
    344     gen_helper_exception_internal(cpu_env, tcg_constant_i32(excp));
    345 }
    346 
    347 static void gen_exception_internal_insn(DisasContext *s, int excp)
    348 {
    349     gen_a64_update_pc(s, 0);
    350     gen_exception_internal(excp);
    351     s->base.is_jmp = DISAS_NORETURN;
    352 }
    353 
    354 static void gen_exception_bkpt_insn(DisasContext *s, uint32_t syndrome)
    355 {
    356     gen_a64_update_pc(s, 0);
    357     gen_helper_exception_bkpt_insn(cpu_env, tcg_constant_i32(syndrome));
    358     s->base.is_jmp = DISAS_NORETURN;
    359 }
    360 
    361 static void gen_step_complete_exception(DisasContext *s)
    362 {
    363     /* We just completed step of an insn. Move from Active-not-pending
    364      * to Active-pending, and then also take the swstep exception.
    365      * This corresponds to making the (IMPDEF) choice to prioritize
    366      * swstep exceptions over asynchronous exceptions taken to an exception
    367      * level where debug is disabled. This choice has the advantage that
    368      * we do not need to maintain internal state corresponding to the
    369      * ISV/EX syndrome bits between completion of the step and generation
    370      * of the exception, and our syndrome information is always correct.
    371      */
    372     gen_ss_advance(s);
    373     gen_swstep_exception(s, 1, s->is_ldex);
    374     s->base.is_jmp = DISAS_NORETURN;
    375 }
    376 
    377 static inline bool use_goto_tb(DisasContext *s, uint64_t dest)
    378 {
    379     if (s->ss_active) {
    380         return false;
    381     }
    382     return translator_use_goto_tb(&s->base, dest);
    383 }
    384 
    385 static void gen_goto_tb(DisasContext *s, int n, int64_t diff)
    386 {
    387     if (use_goto_tb(s, s->pc_curr + diff)) {
    388         /*
    389          * For pcrel, the pc must always be up-to-date on entry to
    390          * the linked TB, so that it can use simple additions for all
    391          * further adjustments.  For !pcrel, the linked TB is compiled
    392          * to know its full virtual address, so we can delay the
    393          * update to pc to the unlinked path.  A long chain of links
    394          * can thus avoid many updates to the PC.
    395          */
    396         if (TARGET_TB_PCREL) {
    397             gen_a64_update_pc(s, diff);
    398             tcg_gen_goto_tb(n);
    399         } else {
    400             tcg_gen_goto_tb(n);
    401             gen_a64_update_pc(s, diff);
    402         }
    403         tcg_gen_exit_tb(s->base.tb, n);
    404         s->base.is_jmp = DISAS_NORETURN;
    405     } else {
    406         gen_a64_update_pc(s, diff);
    407         if (s->ss_active) {
    408             gen_step_complete_exception(s);
    409         } else {
    410             tcg_gen_lookup_and_goto_ptr();
    411             s->base.is_jmp = DISAS_NORETURN;
    412         }
    413     }
    414 }
    415 
    416 static void init_tmp_a64_array(DisasContext *s)
    417 {
    418 #ifdef CONFIG_DEBUG_TCG
    419     memset(s->tmp_a64, 0, sizeof(s->tmp_a64));
    420 #endif
    421     s->tmp_a64_count = 0;
    422 }
    423 
    424 static void free_tmp_a64(DisasContext *s)
    425 {
    426     int i;
    427     for (i = 0; i < s->tmp_a64_count; i++) {
    428         tcg_temp_free_i64(s->tmp_a64[i]);
    429     }
    430     init_tmp_a64_array(s);
    431 }
    432 
    433 TCGv_i64 new_tmp_a64(DisasContext *s)
    434 {
    435     assert(s->tmp_a64_count < TMP_A64_MAX);
    436     return s->tmp_a64[s->tmp_a64_count++] = tcg_temp_new_i64();
    437 }
    438 
    439 TCGv_i64 new_tmp_a64_local(DisasContext *s)
    440 {
    441     assert(s->tmp_a64_count < TMP_A64_MAX);
    442     return s->tmp_a64[s->tmp_a64_count++] = tcg_temp_local_new_i64();
    443 }
    444 
    445 TCGv_i64 new_tmp_a64_zero(DisasContext *s)
    446 {
    447     TCGv_i64 t = new_tmp_a64(s);
    448     tcg_gen_movi_i64(t, 0);
    449     return t;
    450 }
    451 
    452 /*
    453  * Register access functions
    454  *
    455  * These functions are used for directly accessing a register in where
    456  * changes to the final register value are likely to be made. If you
    457  * need to use a register for temporary calculation (e.g. index type
    458  * operations) use the read_* form.
    459  *
    460  * B1.2.1 Register mappings
    461  *
    462  * In instruction register encoding 31 can refer to ZR (zero register) or
    463  * the SP (stack pointer) depending on context. In QEMU's case we map SP
    464  * to cpu_X[31] and ZR accesses to a temporary which can be discarded.
    465  * This is the point of the _sp forms.
    466  */
    467 TCGv_i64 cpu_reg(DisasContext *s, int reg)
    468 {
    469     if (reg == 31) {
    470         return new_tmp_a64_zero(s);
    471     } else {
    472         return cpu_X[reg];
    473     }
    474 }
    475 
    476 /* register access for when 31 == SP */
    477 TCGv_i64 cpu_reg_sp(DisasContext *s, int reg)
    478 {
    479     return cpu_X[reg];
    480 }
    481 
    482 /* read a cpu register in 32bit/64bit mode. Returns a TCGv_i64
    483  * representing the register contents. This TCGv is an auto-freed
    484  * temporary so it need not be explicitly freed, and may be modified.
    485  */
    486 TCGv_i64 read_cpu_reg(DisasContext *s, int reg, int sf)
    487 {
    488     TCGv_i64 v = new_tmp_a64(s);
    489     if (reg != 31) {
    490         if (sf) {
    491             tcg_gen_mov_i64(v, cpu_X[reg]);
    492         } else {
    493             tcg_gen_ext32u_i64(v, cpu_X[reg]);
    494         }
    495     } else {
    496         tcg_gen_movi_i64(v, 0);
    497     }
    498     return v;
    499 }
    500 
    501 TCGv_i64 read_cpu_reg_sp(DisasContext *s, int reg, int sf)
    502 {
    503     TCGv_i64 v = new_tmp_a64(s);
    504     if (sf) {
    505         tcg_gen_mov_i64(v, cpu_X[reg]);
    506     } else {
    507         tcg_gen_ext32u_i64(v, cpu_X[reg]);
    508     }
    509     return v;
    510 }
    511 
    512 /* Return the offset into CPUARMState of a slice (from
    513  * the least significant end) of FP register Qn (ie
    514  * Dn, Sn, Hn or Bn).
    515  * (Note that this is not the same mapping as for A32; see cpu.h)
    516  */
    517 static inline int fp_reg_offset(DisasContext *s, int regno, MemOp size)
    518 {
    519     return vec_reg_offset(s, regno, 0, size);
    520 }
    521 
    522 /* Offset of the high half of the 128 bit vector Qn */
    523 static inline int fp_reg_hi_offset(DisasContext *s, int regno)
    524 {
    525     return vec_reg_offset(s, regno, 1, MO_64);
    526 }
    527 
    528 /* Convenience accessors for reading and writing single and double
    529  * FP registers. Writing clears the upper parts of the associated
    530  * 128 bit vector register, as required by the architecture.
    531  * Note that unlike the GP register accessors, the values returned
    532  * by the read functions must be manually freed.
    533  */
    534 static TCGv_i64 read_fp_dreg(DisasContext *s, int reg)
    535 {
    536     TCGv_i64 v = tcg_temp_new_i64();
    537 
    538     tcg_gen_ld_i64(v, cpu_env, fp_reg_offset(s, reg, MO_64));
    539     return v;
    540 }
    541 
    542 static TCGv_i32 read_fp_sreg(DisasContext *s, int reg)
    543 {
    544     TCGv_i32 v = tcg_temp_new_i32();
    545 
    546     tcg_gen_ld_i32(v, cpu_env, fp_reg_offset(s, reg, MO_32));
    547     return v;
    548 }
    549 
    550 static TCGv_i32 read_fp_hreg(DisasContext *s, int reg)
    551 {
    552     TCGv_i32 v = tcg_temp_new_i32();
    553 
    554     tcg_gen_ld16u_i32(v, cpu_env, fp_reg_offset(s, reg, MO_16));
    555     return v;
    556 }
    557 
    558 /* Clear the bits above an N-bit vector, for N = (is_q ? 128 : 64).
    559  * If SVE is not enabled, then there are only 128 bits in the vector.
    560  */
    561 static void clear_vec_high(DisasContext *s, bool is_q, int rd)
    562 {
    563     unsigned ofs = fp_reg_offset(s, rd, MO_64);
    564     unsigned vsz = vec_full_reg_size(s);
    565 
    566     /* Nop move, with side effect of clearing the tail. */
    567     tcg_gen_gvec_mov(MO_64, ofs, ofs, is_q ? 16 : 8, vsz);
    568 }
    569 
    570 void write_fp_dreg(DisasContext *s, int reg, TCGv_i64 v)
    571 {
    572     unsigned ofs = fp_reg_offset(s, reg, MO_64);
    573 
    574     tcg_gen_st_i64(v, cpu_env, ofs);
    575     clear_vec_high(s, false, reg);
    576 }
    577 
    578 static void write_fp_sreg(DisasContext *s, int reg, TCGv_i32 v)
    579 {
    580     TCGv_i64 tmp = tcg_temp_new_i64();
    581 
    582     tcg_gen_extu_i32_i64(tmp, v);
    583     write_fp_dreg(s, reg, tmp);
    584     tcg_temp_free_i64(tmp);
    585 }
    586 
    587 /* Expand a 2-operand AdvSIMD vector operation using an expander function.  */
    588 static void gen_gvec_fn2(DisasContext *s, bool is_q, int rd, int rn,
    589                          GVecGen2Fn *gvec_fn, int vece)
    590 {
    591     gvec_fn(vece, vec_full_reg_offset(s, rd), vec_full_reg_offset(s, rn),
    592             is_q ? 16 : 8, vec_full_reg_size(s));
    593 }
    594 
    595 /* Expand a 2-operand + immediate AdvSIMD vector operation using
    596  * an expander function.
    597  */
    598 static void gen_gvec_fn2i(DisasContext *s, bool is_q, int rd, int rn,
    599                           int64_t imm, GVecGen2iFn *gvec_fn, int vece)
    600 {
    601     gvec_fn(vece, vec_full_reg_offset(s, rd), vec_full_reg_offset(s, rn),
    602             imm, is_q ? 16 : 8, vec_full_reg_size(s));
    603 }
    604 
    605 /* Expand a 3-operand AdvSIMD vector operation using an expander function.  */
    606 static void gen_gvec_fn3(DisasContext *s, bool is_q, int rd, int rn, int rm,
    607                          GVecGen3Fn *gvec_fn, int vece)
    608 {
    609     gvec_fn(vece, vec_full_reg_offset(s, rd), vec_full_reg_offset(s, rn),
    610             vec_full_reg_offset(s, rm), is_q ? 16 : 8, vec_full_reg_size(s));
    611 }
    612 
    613 /* Expand a 4-operand AdvSIMD vector operation using an expander function.  */
    614 static void gen_gvec_fn4(DisasContext *s, bool is_q, int rd, int rn, int rm,
    615                          int rx, GVecGen4Fn *gvec_fn, int vece)
    616 {
    617     gvec_fn(vece, vec_full_reg_offset(s, rd), vec_full_reg_offset(s, rn),
    618             vec_full_reg_offset(s, rm), vec_full_reg_offset(s, rx),
    619             is_q ? 16 : 8, vec_full_reg_size(s));
    620 }
    621 
    622 /* Expand a 2-operand operation using an out-of-line helper.  */
    623 static void gen_gvec_op2_ool(DisasContext *s, bool is_q, int rd,
    624                              int rn, int data, gen_helper_gvec_2 *fn)
    625 {
    626     tcg_gen_gvec_2_ool(vec_full_reg_offset(s, rd),
    627                        vec_full_reg_offset(s, rn),
    628                        is_q ? 16 : 8, vec_full_reg_size(s), data, fn);
    629 }
    630 
    631 /* Expand a 3-operand operation using an out-of-line helper.  */
    632 static void gen_gvec_op3_ool(DisasContext *s, bool is_q, int rd,
    633                              int rn, int rm, int data, gen_helper_gvec_3 *fn)
    634 {
    635     tcg_gen_gvec_3_ool(vec_full_reg_offset(s, rd),
    636                        vec_full_reg_offset(s, rn),
    637                        vec_full_reg_offset(s, rm),
    638                        is_q ? 16 : 8, vec_full_reg_size(s), data, fn);
    639 }
    640 
    641 /* Expand a 3-operand + fpstatus pointer + simd data value operation using
    642  * an out-of-line helper.
    643  */
    644 static void gen_gvec_op3_fpst(DisasContext *s, bool is_q, int rd, int rn,
    645                               int rm, bool is_fp16, int data,
    646                               gen_helper_gvec_3_ptr *fn)
    647 {
    648     TCGv_ptr fpst = fpstatus_ptr(is_fp16 ? FPST_FPCR_F16 : FPST_FPCR);
    649     tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, rd),
    650                        vec_full_reg_offset(s, rn),
    651                        vec_full_reg_offset(s, rm), fpst,
    652                        is_q ? 16 : 8, vec_full_reg_size(s), data, fn);
    653     tcg_temp_free_ptr(fpst);
    654 }
    655 
    656 /* Expand a 3-operand + qc + operation using an out-of-line helper.  */
    657 static void gen_gvec_op3_qc(DisasContext *s, bool is_q, int rd, int rn,
    658                             int rm, gen_helper_gvec_3_ptr *fn)
    659 {
    660     TCGv_ptr qc_ptr = tcg_temp_new_ptr();
    661 
    662     tcg_gen_addi_ptr(qc_ptr, cpu_env, offsetof(CPUARMState, vfp.qc));
    663     tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, rd),
    664                        vec_full_reg_offset(s, rn),
    665                        vec_full_reg_offset(s, rm), qc_ptr,
    666                        is_q ? 16 : 8, vec_full_reg_size(s), 0, fn);
    667     tcg_temp_free_ptr(qc_ptr);
    668 }
    669 
    670 /* Expand a 4-operand operation using an out-of-line helper.  */
    671 static void gen_gvec_op4_ool(DisasContext *s, bool is_q, int rd, int rn,
    672                              int rm, int ra, int data, gen_helper_gvec_4 *fn)
    673 {
    674     tcg_gen_gvec_4_ool(vec_full_reg_offset(s, rd),
    675                        vec_full_reg_offset(s, rn),
    676                        vec_full_reg_offset(s, rm),
    677                        vec_full_reg_offset(s, ra),
    678                        is_q ? 16 : 8, vec_full_reg_size(s), data, fn);
    679 }
    680 
    681 /*
    682  * Expand a 4-operand + fpstatus pointer + simd data value operation using
    683  * an out-of-line helper.
    684  */
    685 static void gen_gvec_op4_fpst(DisasContext *s, bool is_q, int rd, int rn,
    686                               int rm, int ra, bool is_fp16, int data,
    687                               gen_helper_gvec_4_ptr *fn)
    688 {
    689     TCGv_ptr fpst = fpstatus_ptr(is_fp16 ? FPST_FPCR_F16 : FPST_FPCR);
    690     tcg_gen_gvec_4_ptr(vec_full_reg_offset(s, rd),
    691                        vec_full_reg_offset(s, rn),
    692                        vec_full_reg_offset(s, rm),
    693                        vec_full_reg_offset(s, ra), fpst,
    694                        is_q ? 16 : 8, vec_full_reg_size(s), data, fn);
    695     tcg_temp_free_ptr(fpst);
    696 }
    697 
    698 /* Set ZF and NF based on a 64 bit result. This is alas fiddlier
    699  * than the 32 bit equivalent.
    700  */
    701 static inline void gen_set_NZ64(TCGv_i64 result)
    702 {
    703     tcg_gen_extr_i64_i32(cpu_ZF, cpu_NF, result);
    704     tcg_gen_or_i32(cpu_ZF, cpu_ZF, cpu_NF);
    705 }
    706 
    707 /* Set NZCV as for a logical operation: NZ as per result, CV cleared. */
    708 static inline void gen_logic_CC(int sf, TCGv_i64 result)
    709 {
    710     if (sf) {
    711         gen_set_NZ64(result);
    712     } else {
    713         tcg_gen_extrl_i64_i32(cpu_ZF, result);
    714         tcg_gen_mov_i32(cpu_NF, cpu_ZF);
    715     }
    716     tcg_gen_movi_i32(cpu_CF, 0);
    717     tcg_gen_movi_i32(cpu_VF, 0);
    718 }
    719 
    720 /* dest = T0 + T1; compute C, N, V and Z flags */
    721 static void gen_add_CC(int sf, TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1)
    722 {
    723     if (sf) {
    724         TCGv_i64 result, flag, tmp;
    725         result = tcg_temp_new_i64();
    726         flag = tcg_temp_new_i64();
    727         tmp = tcg_temp_new_i64();
    728 
    729         tcg_gen_movi_i64(tmp, 0);
    730         tcg_gen_add2_i64(result, flag, t0, tmp, t1, tmp);
    731 
    732         tcg_gen_extrl_i64_i32(cpu_CF, flag);
    733 
    734         gen_set_NZ64(result);
    735 
    736         tcg_gen_xor_i64(flag, result, t0);
    737         tcg_gen_xor_i64(tmp, t0, t1);
    738         tcg_gen_andc_i64(flag, flag, tmp);
    739         tcg_temp_free_i64(tmp);
    740         tcg_gen_extrh_i64_i32(cpu_VF, flag);
    741 
    742         tcg_gen_mov_i64(dest, result);
    743         tcg_temp_free_i64(result);
    744         tcg_temp_free_i64(flag);
    745     } else {
    746         /* 32 bit arithmetic */
    747         TCGv_i32 t0_32 = tcg_temp_new_i32();
    748         TCGv_i32 t1_32 = tcg_temp_new_i32();
    749         TCGv_i32 tmp = tcg_temp_new_i32();
    750 
    751         tcg_gen_movi_i32(tmp, 0);
    752         tcg_gen_extrl_i64_i32(t0_32, t0);
    753         tcg_gen_extrl_i64_i32(t1_32, t1);
    754         tcg_gen_add2_i32(cpu_NF, cpu_CF, t0_32, tmp, t1_32, tmp);
    755         tcg_gen_mov_i32(cpu_ZF, cpu_NF);
    756         tcg_gen_xor_i32(cpu_VF, cpu_NF, t0_32);
    757         tcg_gen_xor_i32(tmp, t0_32, t1_32);
    758         tcg_gen_andc_i32(cpu_VF, cpu_VF, tmp);
    759         tcg_gen_extu_i32_i64(dest, cpu_NF);
    760 
    761         tcg_temp_free_i32(tmp);
    762         tcg_temp_free_i32(t0_32);
    763         tcg_temp_free_i32(t1_32);
    764     }
    765 }
    766 
    767 /* dest = T0 - T1; compute C, N, V and Z flags */
    768 static void gen_sub_CC(int sf, TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1)
    769 {
    770     if (sf) {
    771         /* 64 bit arithmetic */
    772         TCGv_i64 result, flag, tmp;
    773 
    774         result = tcg_temp_new_i64();
    775         flag = tcg_temp_new_i64();
    776         tcg_gen_sub_i64(result, t0, t1);
    777 
    778         gen_set_NZ64(result);
    779 
    780         tcg_gen_setcond_i64(TCG_COND_GEU, flag, t0, t1);
    781         tcg_gen_extrl_i64_i32(cpu_CF, flag);
    782 
    783         tcg_gen_xor_i64(flag, result, t0);
    784         tmp = tcg_temp_new_i64();
    785         tcg_gen_xor_i64(tmp, t0, t1);
    786         tcg_gen_and_i64(flag, flag, tmp);
    787         tcg_temp_free_i64(tmp);
    788         tcg_gen_extrh_i64_i32(cpu_VF, flag);
    789         tcg_gen_mov_i64(dest, result);
    790         tcg_temp_free_i64(flag);
    791         tcg_temp_free_i64(result);
    792     } else {
    793         /* 32 bit arithmetic */
    794         TCGv_i32 t0_32 = tcg_temp_new_i32();
    795         TCGv_i32 t1_32 = tcg_temp_new_i32();
    796         TCGv_i32 tmp;
    797 
    798         tcg_gen_extrl_i64_i32(t0_32, t0);
    799         tcg_gen_extrl_i64_i32(t1_32, t1);
    800         tcg_gen_sub_i32(cpu_NF, t0_32, t1_32);
    801         tcg_gen_mov_i32(cpu_ZF, cpu_NF);
    802         tcg_gen_setcond_i32(TCG_COND_GEU, cpu_CF, t0_32, t1_32);
    803         tcg_gen_xor_i32(cpu_VF, cpu_NF, t0_32);
    804         tmp = tcg_temp_new_i32();
    805         tcg_gen_xor_i32(tmp, t0_32, t1_32);
    806         tcg_temp_free_i32(t0_32);
    807         tcg_temp_free_i32(t1_32);
    808         tcg_gen_and_i32(cpu_VF, cpu_VF, tmp);
    809         tcg_temp_free_i32(tmp);
    810         tcg_gen_extu_i32_i64(dest, cpu_NF);
    811     }
    812 }
    813 
    814 /* dest = T0 + T1 + CF; do not compute flags. */
    815 static void gen_adc(int sf, TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1)
    816 {
    817     TCGv_i64 flag = tcg_temp_new_i64();
    818     tcg_gen_extu_i32_i64(flag, cpu_CF);
    819     tcg_gen_add_i64(dest, t0, t1);
    820     tcg_gen_add_i64(dest, dest, flag);
    821     tcg_temp_free_i64(flag);
    822 
    823     if (!sf) {
    824         tcg_gen_ext32u_i64(dest, dest);
    825     }
    826 }
    827 
    828 /* dest = T0 + T1 + CF; compute C, N, V and Z flags. */
    829 static void gen_adc_CC(int sf, TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1)
    830 {
    831     if (sf) {
    832         TCGv_i64 result = tcg_temp_new_i64();
    833         TCGv_i64 cf_64 = tcg_temp_new_i64();
    834         TCGv_i64 vf_64 = tcg_temp_new_i64();
    835         TCGv_i64 tmp = tcg_temp_new_i64();
    836         TCGv_i64 zero = tcg_constant_i64(0);
    837 
    838         tcg_gen_extu_i32_i64(cf_64, cpu_CF);
    839         tcg_gen_add2_i64(result, cf_64, t0, zero, cf_64, zero);
    840         tcg_gen_add2_i64(result, cf_64, result, cf_64, t1, zero);
    841         tcg_gen_extrl_i64_i32(cpu_CF, cf_64);
    842         gen_set_NZ64(result);
    843 
    844         tcg_gen_xor_i64(vf_64, result, t0);
    845         tcg_gen_xor_i64(tmp, t0, t1);
    846         tcg_gen_andc_i64(vf_64, vf_64, tmp);
    847         tcg_gen_extrh_i64_i32(cpu_VF, vf_64);
    848 
    849         tcg_gen_mov_i64(dest, result);
    850 
    851         tcg_temp_free_i64(tmp);
    852         tcg_temp_free_i64(vf_64);
    853         tcg_temp_free_i64(cf_64);
    854         tcg_temp_free_i64(result);
    855     } else {
    856         TCGv_i32 t0_32 = tcg_temp_new_i32();
    857         TCGv_i32 t1_32 = tcg_temp_new_i32();
    858         TCGv_i32 tmp = tcg_temp_new_i32();
    859         TCGv_i32 zero = tcg_constant_i32(0);
    860 
    861         tcg_gen_extrl_i64_i32(t0_32, t0);
    862         tcg_gen_extrl_i64_i32(t1_32, t1);
    863         tcg_gen_add2_i32(cpu_NF, cpu_CF, t0_32, zero, cpu_CF, zero);
    864         tcg_gen_add2_i32(cpu_NF, cpu_CF, cpu_NF, cpu_CF, t1_32, zero);
    865 
    866         tcg_gen_mov_i32(cpu_ZF, cpu_NF);
    867         tcg_gen_xor_i32(cpu_VF, cpu_NF, t0_32);
    868         tcg_gen_xor_i32(tmp, t0_32, t1_32);
    869         tcg_gen_andc_i32(cpu_VF, cpu_VF, tmp);
    870         tcg_gen_extu_i32_i64(dest, cpu_NF);
    871 
    872         tcg_temp_free_i32(tmp);
    873         tcg_temp_free_i32(t1_32);
    874         tcg_temp_free_i32(t0_32);
    875     }
    876 }
    877 
    878 /*
    879  * Load/Store generators
    880  */
    881 
    882 /*
    883  * Store from GPR register to memory.
    884  */
    885 static void do_gpr_st_memidx(DisasContext *s, TCGv_i64 source,
    886                              TCGv_i64 tcg_addr, MemOp memop, int memidx,
    887                              bool iss_valid,
    888                              unsigned int iss_srt,
    889                              bool iss_sf, bool iss_ar)
    890 {
    891     memop = finalize_memop(s, memop);
    892     tcg_gen_qemu_st_i64(source, tcg_addr, memidx, memop);
    893 
    894     if (iss_valid) {
    895         uint32_t syn;
    896 
    897         syn = syn_data_abort_with_iss(0,
    898                                       (memop & MO_SIZE),
    899                                       false,
    900                                       iss_srt,
    901                                       iss_sf,
    902                                       iss_ar,
    903                                       0, 0, 0, 0, 0, false);
    904         disas_set_insn_syndrome(s, syn);
    905     }
    906 }
    907 
    908 static void do_gpr_st(DisasContext *s, TCGv_i64 source,
    909                       TCGv_i64 tcg_addr, MemOp memop,
    910                       bool iss_valid,
    911                       unsigned int iss_srt,
    912                       bool iss_sf, bool iss_ar)
    913 {
    914     do_gpr_st_memidx(s, source, tcg_addr, memop, get_mem_index(s),
    915                      iss_valid, iss_srt, iss_sf, iss_ar);
    916 }
    917 
    918 /*
    919  * Load from memory to GPR register
    920  */
    921 static void do_gpr_ld_memidx(DisasContext *s, TCGv_i64 dest, TCGv_i64 tcg_addr,
    922                              MemOp memop, bool extend, int memidx,
    923                              bool iss_valid, unsigned int iss_srt,
    924                              bool iss_sf, bool iss_ar)
    925 {
    926     memop = finalize_memop(s, memop);
    927     tcg_gen_qemu_ld_i64(dest, tcg_addr, memidx, memop);
    928 
    929     if (extend && (memop & MO_SIGN)) {
    930         g_assert((memop & MO_SIZE) <= MO_32);
    931         tcg_gen_ext32u_i64(dest, dest);
    932     }
    933 
    934     if (iss_valid) {
    935         uint32_t syn;
    936 
    937         syn = syn_data_abort_with_iss(0,
    938                                       (memop & MO_SIZE),
    939                                       (memop & MO_SIGN) != 0,
    940                                       iss_srt,
    941                                       iss_sf,
    942                                       iss_ar,
    943                                       0, 0, 0, 0, 0, false);
    944         disas_set_insn_syndrome(s, syn);
    945     }
    946 }
    947 
    948 static void do_gpr_ld(DisasContext *s, TCGv_i64 dest, TCGv_i64 tcg_addr,
    949                       MemOp memop, bool extend,
    950                       bool iss_valid, unsigned int iss_srt,
    951                       bool iss_sf, bool iss_ar)
    952 {
    953     do_gpr_ld_memidx(s, dest, tcg_addr, memop, extend, get_mem_index(s),
    954                      iss_valid, iss_srt, iss_sf, iss_ar);
    955 }
    956 
    957 /*
    958  * Store from FP register to memory
    959  */
    960 static void do_fp_st(DisasContext *s, int srcidx, TCGv_i64 tcg_addr, int size)
    961 {
    962     /* This writes the bottom N bits of a 128 bit wide vector to memory */
    963     TCGv_i64 tmplo = tcg_temp_new_i64();
    964     MemOp mop;
    965 
    966     tcg_gen_ld_i64(tmplo, cpu_env, fp_reg_offset(s, srcidx, MO_64));
    967 
    968     if (size < 4) {
    969         mop = finalize_memop(s, size);
    970         tcg_gen_qemu_st_i64(tmplo, tcg_addr, get_mem_index(s), mop);
    971     } else {
    972         bool be = s->be_data == MO_BE;
    973         TCGv_i64 tcg_hiaddr = tcg_temp_new_i64();
    974         TCGv_i64 tmphi = tcg_temp_new_i64();
    975 
    976         tcg_gen_ld_i64(tmphi, cpu_env, fp_reg_hi_offset(s, srcidx));
    977 
    978         mop = s->be_data | MO_UQ;
    979         tcg_gen_qemu_st_i64(be ? tmphi : tmplo, tcg_addr, get_mem_index(s),
    980                             mop | (s->align_mem ? MO_ALIGN_16 : 0));
    981         tcg_gen_addi_i64(tcg_hiaddr, tcg_addr, 8);
    982         tcg_gen_qemu_st_i64(be ? tmplo : tmphi, tcg_hiaddr,
    983                             get_mem_index(s), mop);
    984 
    985         tcg_temp_free_i64(tcg_hiaddr);
    986         tcg_temp_free_i64(tmphi);
    987     }
    988 
    989     tcg_temp_free_i64(tmplo);
    990 }
    991 
    992 /*
    993  * Load from memory to FP register
    994  */
    995 static void do_fp_ld(DisasContext *s, int destidx, TCGv_i64 tcg_addr, int size)
    996 {
    997     /* This always zero-extends and writes to a full 128 bit wide vector */
    998     TCGv_i64 tmplo = tcg_temp_new_i64();
    999     TCGv_i64 tmphi = NULL;
   1000     MemOp mop;
   1001 
   1002     if (size < 4) {
   1003         mop = finalize_memop(s, size);
   1004         tcg_gen_qemu_ld_i64(tmplo, tcg_addr, get_mem_index(s), mop);
   1005     } else {
   1006         bool be = s->be_data == MO_BE;
   1007         TCGv_i64 tcg_hiaddr;
   1008 
   1009         tmphi = tcg_temp_new_i64();
   1010         tcg_hiaddr = tcg_temp_new_i64();
   1011 
   1012         mop = s->be_data | MO_UQ;
   1013         tcg_gen_qemu_ld_i64(be ? tmphi : tmplo, tcg_addr, get_mem_index(s),
   1014                             mop | (s->align_mem ? MO_ALIGN_16 : 0));
   1015         tcg_gen_addi_i64(tcg_hiaddr, tcg_addr, 8);
   1016         tcg_gen_qemu_ld_i64(be ? tmplo : tmphi, tcg_hiaddr,
   1017                             get_mem_index(s), mop);
   1018         tcg_temp_free_i64(tcg_hiaddr);
   1019     }
   1020 
   1021     tcg_gen_st_i64(tmplo, cpu_env, fp_reg_offset(s, destidx, MO_64));
   1022     tcg_temp_free_i64(tmplo);
   1023 
   1024     if (tmphi) {
   1025         tcg_gen_st_i64(tmphi, cpu_env, fp_reg_hi_offset(s, destidx));
   1026         tcg_temp_free_i64(tmphi);
   1027     }
   1028     clear_vec_high(s, tmphi != NULL, destidx);
   1029 }
   1030 
   1031 /*
   1032  * Vector load/store helpers.
   1033  *
   1034  * The principal difference between this and a FP load is that we don't
   1035  * zero extend as we are filling a partial chunk of the vector register.
   1036  * These functions don't support 128 bit loads/stores, which would be
   1037  * normal load/store operations.
   1038  *
   1039  * The _i32 versions are useful when operating on 32 bit quantities
   1040  * (eg for floating point single or using Neon helper functions).
   1041  */
   1042 
   1043 /* Get value of an element within a vector register */
   1044 static void read_vec_element(DisasContext *s, TCGv_i64 tcg_dest, int srcidx,
   1045                              int element, MemOp memop)
   1046 {
   1047     int vect_off = vec_reg_offset(s, srcidx, element, memop & MO_SIZE);
   1048     switch ((unsigned)memop) {
   1049     case MO_8:
   1050         tcg_gen_ld8u_i64(tcg_dest, cpu_env, vect_off);
   1051         break;
   1052     case MO_16:
   1053         tcg_gen_ld16u_i64(tcg_dest, cpu_env, vect_off);
   1054         break;
   1055     case MO_32:
   1056         tcg_gen_ld32u_i64(tcg_dest, cpu_env, vect_off);
   1057         break;
   1058     case MO_8|MO_SIGN:
   1059         tcg_gen_ld8s_i64(tcg_dest, cpu_env, vect_off);
   1060         break;
   1061     case MO_16|MO_SIGN:
   1062         tcg_gen_ld16s_i64(tcg_dest, cpu_env, vect_off);
   1063         break;
   1064     case MO_32|MO_SIGN:
   1065         tcg_gen_ld32s_i64(tcg_dest, cpu_env, vect_off);
   1066         break;
   1067     case MO_64:
   1068     case MO_64|MO_SIGN:
   1069         tcg_gen_ld_i64(tcg_dest, cpu_env, vect_off);
   1070         break;
   1071     default:
   1072         g_assert_not_reached();
   1073     }
   1074 }
   1075 
   1076 static void read_vec_element_i32(DisasContext *s, TCGv_i32 tcg_dest, int srcidx,
   1077                                  int element, MemOp memop)
   1078 {
   1079     int vect_off = vec_reg_offset(s, srcidx, element, memop & MO_SIZE);
   1080     switch (memop) {
   1081     case MO_8:
   1082         tcg_gen_ld8u_i32(tcg_dest, cpu_env, vect_off);
   1083         break;
   1084     case MO_16:
   1085         tcg_gen_ld16u_i32(tcg_dest, cpu_env, vect_off);
   1086         break;
   1087     case MO_8|MO_SIGN:
   1088         tcg_gen_ld8s_i32(tcg_dest, cpu_env, vect_off);
   1089         break;
   1090     case MO_16|MO_SIGN:
   1091         tcg_gen_ld16s_i32(tcg_dest, cpu_env, vect_off);
   1092         break;
   1093     case MO_32:
   1094     case MO_32|MO_SIGN:
   1095         tcg_gen_ld_i32(tcg_dest, cpu_env, vect_off);
   1096         break;
   1097     default:
   1098         g_assert_not_reached();
   1099     }
   1100 }
   1101 
   1102 /* Set value of an element within a vector register */
   1103 static void write_vec_element(DisasContext *s, TCGv_i64 tcg_src, int destidx,
   1104                               int element, MemOp memop)
   1105 {
   1106     int vect_off = vec_reg_offset(s, destidx, element, memop & MO_SIZE);
   1107     switch (memop) {
   1108     case MO_8:
   1109         tcg_gen_st8_i64(tcg_src, cpu_env, vect_off);
   1110         break;
   1111     case MO_16:
   1112         tcg_gen_st16_i64(tcg_src, cpu_env, vect_off);
   1113         break;
   1114     case MO_32:
   1115         tcg_gen_st32_i64(tcg_src, cpu_env, vect_off);
   1116         break;
   1117     case MO_64:
   1118         tcg_gen_st_i64(tcg_src, cpu_env, vect_off);
   1119         break;
   1120     default:
   1121         g_assert_not_reached();
   1122     }
   1123 }
   1124 
   1125 static void write_vec_element_i32(DisasContext *s, TCGv_i32 tcg_src,
   1126                                   int destidx, int element, MemOp memop)
   1127 {
   1128     int vect_off = vec_reg_offset(s, destidx, element, memop & MO_SIZE);
   1129     switch (memop) {
   1130     case MO_8:
   1131         tcg_gen_st8_i32(tcg_src, cpu_env, vect_off);
   1132         break;
   1133     case MO_16:
   1134         tcg_gen_st16_i32(tcg_src, cpu_env, vect_off);
   1135         break;
   1136     case MO_32:
   1137         tcg_gen_st_i32(tcg_src, cpu_env, vect_off);
   1138         break;
   1139     default:
   1140         g_assert_not_reached();
   1141     }
   1142 }
   1143 
   1144 /* Store from vector register to memory */
   1145 static void do_vec_st(DisasContext *s, int srcidx, int element,
   1146                       TCGv_i64 tcg_addr, MemOp mop)
   1147 {
   1148     TCGv_i64 tcg_tmp = tcg_temp_new_i64();
   1149 
   1150     read_vec_element(s, tcg_tmp, srcidx, element, mop & MO_SIZE);
   1151     tcg_gen_qemu_st_i64(tcg_tmp, tcg_addr, get_mem_index(s), mop);
   1152 
   1153     tcg_temp_free_i64(tcg_tmp);
   1154 }
   1155 
   1156 /* Load from memory to vector register */
   1157 static void do_vec_ld(DisasContext *s, int destidx, int element,
   1158                       TCGv_i64 tcg_addr, MemOp mop)
   1159 {
   1160     TCGv_i64 tcg_tmp = tcg_temp_new_i64();
   1161 
   1162     tcg_gen_qemu_ld_i64(tcg_tmp, tcg_addr, get_mem_index(s), mop);
   1163     write_vec_element(s, tcg_tmp, destidx, element, mop & MO_SIZE);
   1164 
   1165     tcg_temp_free_i64(tcg_tmp);
   1166 }
   1167 
   1168 /* Check that FP/Neon access is enabled. If it is, return
   1169  * true. If not, emit code to generate an appropriate exception,
   1170  * and return false; the caller should not emit any code for
   1171  * the instruction. Note that this check must happen after all
   1172  * unallocated-encoding checks (otherwise the syndrome information
   1173  * for the resulting exception will be incorrect).
   1174  */
   1175 static bool fp_access_check_only(DisasContext *s)
   1176 {
   1177     if (s->fp_excp_el) {
   1178         assert(!s->fp_access_checked);
   1179         s->fp_access_checked = true;
   1180 
   1181         gen_exception_insn_el(s, 0, EXCP_UDEF,
   1182                               syn_fp_access_trap(1, 0xe, false, 0),
   1183                               s->fp_excp_el);
   1184         return false;
   1185     }
   1186     s->fp_access_checked = true;
   1187     return true;
   1188 }
   1189 
   1190 static bool fp_access_check(DisasContext *s)
   1191 {
   1192     if (!fp_access_check_only(s)) {
   1193         return false;
   1194     }
   1195     if (s->sme_trap_nonstreaming && s->is_nonstreaming) {
   1196         gen_exception_insn(s, 0, EXCP_UDEF,
   1197                            syn_smetrap(SME_ET_Streaming, false));
   1198         return false;
   1199     }
   1200     return true;
   1201 }
   1202 
   1203 /*
   1204  * Check that SVE access is enabled.  If it is, return true.
   1205  * If not, emit code to generate an appropriate exception and return false.
   1206  * This function corresponds to CheckSVEEnabled().
   1207  */
   1208 bool sve_access_check(DisasContext *s)
   1209 {
   1210     if (s->pstate_sm || !dc_isar_feature(aa64_sve, s)) {
   1211         assert(dc_isar_feature(aa64_sme, s));
   1212         if (!sme_sm_enabled_check(s)) {
   1213             goto fail_exit;
   1214         }
   1215     } else if (s->sve_excp_el) {
   1216         gen_exception_insn_el(s, 0, EXCP_UDEF,
   1217                               syn_sve_access_trap(), s->sve_excp_el);
   1218         goto fail_exit;
   1219     }
   1220     s->sve_access_checked = true;
   1221     return fp_access_check(s);
   1222 
   1223  fail_exit:
   1224     /* Assert that we only raise one exception per instruction. */
   1225     assert(!s->sve_access_checked);
   1226     s->sve_access_checked = true;
   1227     return false;
   1228 }
   1229 
   1230 /*
   1231  * Check that SME access is enabled, raise an exception if not.
   1232  * Note that this function corresponds to CheckSMEAccess and is
   1233  * only used directly for cpregs.
   1234  */
   1235 static bool sme_access_check(DisasContext *s)
   1236 {
   1237     if (s->sme_excp_el) {
   1238         gen_exception_insn_el(s, 0, EXCP_UDEF,
   1239                               syn_smetrap(SME_ET_AccessTrap, false),
   1240                               s->sme_excp_el);
   1241         return false;
   1242     }
   1243     return true;
   1244 }
   1245 
   1246 /* This function corresponds to CheckSMEEnabled. */
   1247 bool sme_enabled_check(DisasContext *s)
   1248 {
   1249     /*
   1250      * Note that unlike sve_excp_el, we have not constrained sme_excp_el
   1251      * to be zero when fp_excp_el has priority.  This is because we need
   1252      * sme_excp_el by itself for cpregs access checks.
   1253      */
   1254     if (!s->fp_excp_el || s->sme_excp_el < s->fp_excp_el) {
   1255         s->fp_access_checked = true;
   1256         return sme_access_check(s);
   1257     }
   1258     return fp_access_check_only(s);
   1259 }
   1260 
   1261 /* Common subroutine for CheckSMEAnd*Enabled. */
   1262 bool sme_enabled_check_with_svcr(DisasContext *s, unsigned req)
   1263 {
   1264     if (!sme_enabled_check(s)) {
   1265         return false;
   1266     }
   1267     if (FIELD_EX64(req, SVCR, SM) && !s->pstate_sm) {
   1268         gen_exception_insn(s, 0, EXCP_UDEF,
   1269                            syn_smetrap(SME_ET_NotStreaming, false));
   1270         return false;
   1271     }
   1272     if (FIELD_EX64(req, SVCR, ZA) && !s->pstate_za) {
   1273         gen_exception_insn(s, 0, EXCP_UDEF,
   1274                            syn_smetrap(SME_ET_InactiveZA, false));
   1275         return false;
   1276     }
   1277     return true;
   1278 }
   1279 
   1280 /*
   1281  * This utility function is for doing register extension with an
   1282  * optional shift. You will likely want to pass a temporary for the
   1283  * destination register. See DecodeRegExtend() in the ARM ARM.
   1284  */
   1285 static void ext_and_shift_reg(TCGv_i64 tcg_out, TCGv_i64 tcg_in,
   1286                               int option, unsigned int shift)
   1287 {
   1288     int extsize = extract32(option, 0, 2);
   1289     bool is_signed = extract32(option, 2, 1);
   1290 
   1291     if (is_signed) {
   1292         switch (extsize) {
   1293         case 0:
   1294             tcg_gen_ext8s_i64(tcg_out, tcg_in);
   1295             break;
   1296         case 1:
   1297             tcg_gen_ext16s_i64(tcg_out, tcg_in);
   1298             break;
   1299         case 2:
   1300             tcg_gen_ext32s_i64(tcg_out, tcg_in);
   1301             break;
   1302         case 3:
   1303             tcg_gen_mov_i64(tcg_out, tcg_in);
   1304             break;
   1305         }
   1306     } else {
   1307         switch (extsize) {
   1308         case 0:
   1309             tcg_gen_ext8u_i64(tcg_out, tcg_in);
   1310             break;
   1311         case 1:
   1312             tcg_gen_ext16u_i64(tcg_out, tcg_in);
   1313             break;
   1314         case 2:
   1315             tcg_gen_ext32u_i64(tcg_out, tcg_in);
   1316             break;
   1317         case 3:
   1318             tcg_gen_mov_i64(tcg_out, tcg_in);
   1319             break;
   1320         }
   1321     }
   1322 
   1323     if (shift) {
   1324         tcg_gen_shli_i64(tcg_out, tcg_out, shift);
   1325     }
   1326 }
   1327 
   1328 static inline void gen_check_sp_alignment(DisasContext *s)
   1329 {
   1330     /* The AArch64 architecture mandates that (if enabled via PSTATE
   1331      * or SCTLR bits) there is a check that SP is 16-aligned on every
   1332      * SP-relative load or store (with an exception generated if it is not).
   1333      * In line with general QEMU practice regarding misaligned accesses,
   1334      * we omit these checks for the sake of guest program performance.
   1335      * This function is provided as a hook so we can more easily add these
   1336      * checks in future (possibly as a "favour catching guest program bugs
   1337      * over speed" user selectable option).
   1338      */
   1339 }
   1340 
   1341 /*
   1342  * This provides a simple table based table lookup decoder. It is
   1343  * intended to be used when the relevant bits for decode are too
   1344  * awkwardly placed and switch/if based logic would be confusing and
   1345  * deeply nested. Since it's a linear search through the table, tables
   1346  * should be kept small.
   1347  *
   1348  * It returns the first handler where insn & mask == pattern, or
   1349  * NULL if there is no match.
   1350  * The table is terminated by an empty mask (i.e. 0)
   1351  */
   1352 static inline AArch64DecodeFn *lookup_disas_fn(const AArch64DecodeTable *table,
   1353                                                uint32_t insn)
   1354 {
   1355     const AArch64DecodeTable *tptr = table;
   1356 
   1357     while (tptr->mask) {
   1358         if ((insn & tptr->mask) == tptr->pattern) {
   1359             return tptr->disas_fn;
   1360         }
   1361         tptr++;
   1362     }
   1363     return NULL;
   1364 }
   1365 
   1366 /*
   1367  * The instruction disassembly implemented here matches
   1368  * the instruction encoding classifications in chapter C4
   1369  * of the ARM Architecture Reference Manual (DDI0487B_a);
   1370  * classification names and decode diagrams here should generally
   1371  * match up with those in the manual.
   1372  */
   1373 
   1374 /* Unconditional branch (immediate)
   1375  *   31  30       26 25                                  0
   1376  * +----+-----------+-------------------------------------+
   1377  * | op | 0 0 1 0 1 |                 imm26               |
   1378  * +----+-----------+-------------------------------------+
   1379  */
   1380 static void disas_uncond_b_imm(DisasContext *s, uint32_t insn)
   1381 {
   1382     int64_t diff = sextract32(insn, 0, 26) * 4;
   1383 
   1384     if (insn & (1U << 31)) {
   1385         /* BL Branch with link */
   1386         gen_pc_plus_diff(s, cpu_reg(s, 30), curr_insn_len(s));
   1387     }
   1388 
   1389     /* B Branch / BL Branch with link */
   1390     reset_btype(s);
   1391     gen_goto_tb(s, 0, diff);
   1392 }
   1393 
   1394 /* Compare and branch (immediate)
   1395  *   31  30         25  24  23                  5 4      0
   1396  * +----+-------------+----+---------------------+--------+
   1397  * | sf | 0 1 1 0 1 0 | op |         imm19       |   Rt   |
   1398  * +----+-------------+----+---------------------+--------+
   1399  */
   1400 static void disas_comp_b_imm(DisasContext *s, uint32_t insn)
   1401 {
   1402     unsigned int sf, op, rt;
   1403     int64_t diff;
   1404     DisasLabel match;
   1405     TCGv_i64 tcg_cmp;
   1406 
   1407     sf = extract32(insn, 31, 1);
   1408     op = extract32(insn, 24, 1); /* 0: CBZ; 1: CBNZ */
   1409     rt = extract32(insn, 0, 5);
   1410     diff = sextract32(insn, 5, 19) * 4;
   1411 
   1412     tcg_cmp = read_cpu_reg(s, rt, sf);
   1413     reset_btype(s);
   1414 
   1415     match = gen_disas_label(s);
   1416     tcg_gen_brcondi_i64(op ? TCG_COND_NE : TCG_COND_EQ,
   1417                         tcg_cmp, 0, match.label);
   1418     gen_goto_tb(s, 0, 4);
   1419     set_disas_label(s, match);
   1420     gen_goto_tb(s, 1, diff);
   1421 }
   1422 
   1423 /* Test and branch (immediate)
   1424  *   31  30         25  24  23   19 18          5 4    0
   1425  * +----+-------------+----+-------+-------------+------+
   1426  * | b5 | 0 1 1 0 1 1 | op |  b40  |    imm14    |  Rt  |
   1427  * +----+-------------+----+-------+-------------+------+
   1428  */
   1429 static void disas_test_b_imm(DisasContext *s, uint32_t insn)
   1430 {
   1431     unsigned int bit_pos, op, rt;
   1432     int64_t diff;
   1433     DisasLabel match;
   1434     TCGv_i64 tcg_cmp;
   1435 
   1436     bit_pos = (extract32(insn, 31, 1) << 5) | extract32(insn, 19, 5);
   1437     op = extract32(insn, 24, 1); /* 0: TBZ; 1: TBNZ */
   1438     diff = sextract32(insn, 5, 14) * 4;
   1439     rt = extract32(insn, 0, 5);
   1440 
   1441     tcg_cmp = tcg_temp_new_i64();
   1442     tcg_gen_andi_i64(tcg_cmp, cpu_reg(s, rt), (1ULL << bit_pos));
   1443 
   1444     reset_btype(s);
   1445 
   1446     match = gen_disas_label(s);
   1447     tcg_gen_brcondi_i64(op ? TCG_COND_NE : TCG_COND_EQ,
   1448                         tcg_cmp, 0, match.label);
   1449     tcg_temp_free_i64(tcg_cmp);
   1450     gen_goto_tb(s, 0, 4);
   1451     set_disas_label(s, match);
   1452     gen_goto_tb(s, 1, diff);
   1453 }
   1454 
   1455 /* Conditional branch (immediate)
   1456  *  31           25  24  23                  5   4  3    0
   1457  * +---------------+----+---------------------+----+------+
   1458  * | 0 1 0 1 0 1 0 | o1 |         imm19       | o0 | cond |
   1459  * +---------------+----+---------------------+----+------+
   1460  */
   1461 static void disas_cond_b_imm(DisasContext *s, uint32_t insn)
   1462 {
   1463     unsigned int cond;
   1464     int64_t diff;
   1465 
   1466     if ((insn & (1 << 4)) || (insn & (1 << 24))) {
   1467         unallocated_encoding(s);
   1468         return;
   1469     }
   1470     diff = sextract32(insn, 5, 19) * 4;
   1471     cond = extract32(insn, 0, 4);
   1472 
   1473     reset_btype(s);
   1474     if (cond < 0x0e) {
   1475         /* genuinely conditional branches */
   1476         DisasLabel match = gen_disas_label(s);
   1477         arm_gen_test_cc(cond, match.label);
   1478         gen_goto_tb(s, 0, 4);
   1479         set_disas_label(s, match);
   1480         gen_goto_tb(s, 1, diff);
   1481     } else {
   1482         /* 0xe and 0xf are both "always" conditions */
   1483         gen_goto_tb(s, 0, diff);
   1484     }
   1485 }
   1486 
   1487 /* HINT instruction group, including various allocated HINTs */
   1488 static void handle_hint(DisasContext *s, uint32_t insn,
   1489                         unsigned int op1, unsigned int op2, unsigned int crm)
   1490 {
   1491     unsigned int selector = crm << 3 | op2;
   1492 
   1493     if (op1 != 3) {
   1494         unallocated_encoding(s);
   1495         return;
   1496     }
   1497 
   1498     switch (selector) {
   1499     case 0b00000: /* NOP */
   1500         break;
   1501     case 0b00011: /* WFI */
   1502         s->base.is_jmp = DISAS_WFI;
   1503         break;
   1504     case 0b00001: /* YIELD */
   1505         /* When running in MTTCG we don't generate jumps to the yield and
   1506          * WFE helpers as it won't affect the scheduling of other vCPUs.
   1507          * If we wanted to more completely model WFE/SEV so we don't busy
   1508          * spin unnecessarily we would need to do something more involved.
   1509          */
   1510         if (!(tb_cflags(s->base.tb) & CF_PARALLEL)) {
   1511             s->base.is_jmp = DISAS_YIELD;
   1512         }
   1513         break;
   1514     case 0b00010: /* WFE */
   1515         if (!(tb_cflags(s->base.tb) & CF_PARALLEL)) {
   1516             s->base.is_jmp = DISAS_WFE;
   1517         }
   1518         break;
   1519     case 0b00100: /* SEV */
   1520     case 0b00101: /* SEVL */
   1521     case 0b00110: /* DGH */
   1522         /* we treat all as NOP at least for now */
   1523         break;
   1524     case 0b00111: /* XPACLRI */
   1525         if (s->pauth_active) {
   1526             gen_helper_xpaci(cpu_X[30], cpu_env, cpu_X[30]);
   1527         }
   1528         break;
   1529     case 0b01000: /* PACIA1716 */
   1530         if (s->pauth_active) {
   1531             gen_helper_pacia(cpu_X[17], cpu_env, cpu_X[17], cpu_X[16]);
   1532         }
   1533         break;
   1534     case 0b01010: /* PACIB1716 */
   1535         if (s->pauth_active) {
   1536             gen_helper_pacib(cpu_X[17], cpu_env, cpu_X[17], cpu_X[16]);
   1537         }
   1538         break;
   1539     case 0b01100: /* AUTIA1716 */
   1540         if (s->pauth_active) {
   1541             gen_helper_autia(cpu_X[17], cpu_env, cpu_X[17], cpu_X[16]);
   1542         }
   1543         break;
   1544     case 0b01110: /* AUTIB1716 */
   1545         if (s->pauth_active) {
   1546             gen_helper_autib(cpu_X[17], cpu_env, cpu_X[17], cpu_X[16]);
   1547         }
   1548         break;
   1549     case 0b10000: /* ESB */
   1550         /* Without RAS, we must implement this as NOP. */
   1551         if (dc_isar_feature(aa64_ras, s)) {
   1552             /*
   1553              * QEMU does not have a source of physical SErrors,
   1554              * so we are only concerned with virtual SErrors.
   1555              * The pseudocode in the ARM for this case is
   1556              *   if PSTATE.EL IN {EL0, EL1} && EL2Enabled() then
   1557              *      AArch64.vESBOperation();
   1558              * Most of the condition can be evaluated at translation time.
   1559              * Test for EL2 present, and defer test for SEL2 to runtime.
   1560              */
   1561             if (s->current_el <= 1 && arm_dc_feature(s, ARM_FEATURE_EL2)) {
   1562                 gen_helper_vesb(cpu_env);
   1563             }
   1564         }
   1565         break;
   1566     case 0b11000: /* PACIAZ */
   1567         if (s->pauth_active) {
   1568             gen_helper_pacia(cpu_X[30], cpu_env, cpu_X[30],
   1569                                 new_tmp_a64_zero(s));
   1570         }
   1571         break;
   1572     case 0b11001: /* PACIASP */
   1573         if (s->pauth_active) {
   1574             gen_helper_pacia(cpu_X[30], cpu_env, cpu_X[30], cpu_X[31]);
   1575         }
   1576         break;
   1577     case 0b11010: /* PACIBZ */
   1578         if (s->pauth_active) {
   1579             gen_helper_pacib(cpu_X[30], cpu_env, cpu_X[30],
   1580                                 new_tmp_a64_zero(s));
   1581         }
   1582         break;
   1583     case 0b11011: /* PACIBSP */
   1584         if (s->pauth_active) {
   1585             gen_helper_pacib(cpu_X[30], cpu_env, cpu_X[30], cpu_X[31]);
   1586         }
   1587         break;
   1588     case 0b11100: /* AUTIAZ */
   1589         if (s->pauth_active) {
   1590             gen_helper_autia(cpu_X[30], cpu_env, cpu_X[30],
   1591                               new_tmp_a64_zero(s));
   1592         }
   1593         break;
   1594     case 0b11101: /* AUTIASP */
   1595         if (s->pauth_active) {
   1596             gen_helper_autia(cpu_X[30], cpu_env, cpu_X[30], cpu_X[31]);
   1597         }
   1598         break;
   1599     case 0b11110: /* AUTIBZ */
   1600         if (s->pauth_active) {
   1601             gen_helper_autib(cpu_X[30], cpu_env, cpu_X[30],
   1602                               new_tmp_a64_zero(s));
   1603         }
   1604         break;
   1605     case 0b11111: /* AUTIBSP */
   1606         if (s->pauth_active) {
   1607             gen_helper_autib(cpu_X[30], cpu_env, cpu_X[30], cpu_X[31]);
   1608         }
   1609         break;
   1610     default:
   1611         /* default specified as NOP equivalent */
   1612         break;
   1613     }
   1614 }
   1615 
   1616 static void gen_clrex(DisasContext *s, uint32_t insn)
   1617 {
   1618     tcg_gen_movi_i64(cpu_exclusive_addr, -1);
   1619 }
   1620 
   1621 /* CLREX, DSB, DMB, ISB */
   1622 static void handle_sync(DisasContext *s, uint32_t insn,
   1623                         unsigned int op1, unsigned int op2, unsigned int crm)
   1624 {
   1625     TCGBar bar;
   1626 
   1627     if (op1 != 3) {
   1628         unallocated_encoding(s);
   1629         return;
   1630     }
   1631 
   1632     switch (op2) {
   1633     case 2: /* CLREX */
   1634         gen_clrex(s, insn);
   1635         return;
   1636     case 4: /* DSB */
   1637     case 5: /* DMB */
   1638         switch (crm & 3) {
   1639         case 1: /* MBReqTypes_Reads */
   1640             bar = TCG_BAR_SC | TCG_MO_LD_LD | TCG_MO_LD_ST;
   1641             break;
   1642         case 2: /* MBReqTypes_Writes */
   1643             bar = TCG_BAR_SC | TCG_MO_ST_ST;
   1644             break;
   1645         default: /* MBReqTypes_All */
   1646             bar = TCG_BAR_SC | TCG_MO_ALL;
   1647             break;
   1648         }
   1649         tcg_gen_mb(bar);
   1650         return;
   1651     case 6: /* ISB */
   1652         /* We need to break the TB after this insn to execute
   1653          * a self-modified code correctly and also to take
   1654          * any pending interrupts immediately.
   1655          */
   1656         reset_btype(s);
   1657         gen_goto_tb(s, 0, 4);
   1658         return;
   1659 
   1660     case 7: /* SB */
   1661         if (crm != 0 || !dc_isar_feature(aa64_sb, s)) {
   1662             goto do_unallocated;
   1663         }
   1664         /*
   1665          * TODO: There is no speculation barrier opcode for TCG;
   1666          * MB and end the TB instead.
   1667          */
   1668         tcg_gen_mb(TCG_MO_ALL | TCG_BAR_SC);
   1669         gen_goto_tb(s, 0, 4);
   1670         return;
   1671 
   1672     default:
   1673     do_unallocated:
   1674         unallocated_encoding(s);
   1675         return;
   1676     }
   1677 }
   1678 
   1679 static void gen_xaflag(void)
   1680 {
   1681     TCGv_i32 z = tcg_temp_new_i32();
   1682 
   1683     tcg_gen_setcondi_i32(TCG_COND_EQ, z, cpu_ZF, 0);
   1684 
   1685     /*
   1686      * (!C & !Z) << 31
   1687      * (!(C | Z)) << 31
   1688      * ~((C | Z) << 31)
   1689      * ~-(C | Z)
   1690      * (C | Z) - 1
   1691      */
   1692     tcg_gen_or_i32(cpu_NF, cpu_CF, z);
   1693     tcg_gen_subi_i32(cpu_NF, cpu_NF, 1);
   1694 
   1695     /* !(Z & C) */
   1696     tcg_gen_and_i32(cpu_ZF, z, cpu_CF);
   1697     tcg_gen_xori_i32(cpu_ZF, cpu_ZF, 1);
   1698 
   1699     /* (!C & Z) << 31 -> -(Z & ~C) */
   1700     tcg_gen_andc_i32(cpu_VF, z, cpu_CF);
   1701     tcg_gen_neg_i32(cpu_VF, cpu_VF);
   1702 
   1703     /* C | Z */
   1704     tcg_gen_or_i32(cpu_CF, cpu_CF, z);
   1705 
   1706     tcg_temp_free_i32(z);
   1707 }
   1708 
   1709 static void gen_axflag(void)
   1710 {
   1711     tcg_gen_sari_i32(cpu_VF, cpu_VF, 31);         /* V ? -1 : 0 */
   1712     tcg_gen_andc_i32(cpu_CF, cpu_CF, cpu_VF);     /* C & !V */
   1713 
   1714     /* !(Z | V) -> !(!ZF | V) -> ZF & !V -> ZF & ~VF */
   1715     tcg_gen_andc_i32(cpu_ZF, cpu_ZF, cpu_VF);
   1716 
   1717     tcg_gen_movi_i32(cpu_NF, 0);
   1718     tcg_gen_movi_i32(cpu_VF, 0);
   1719 }
   1720 
   1721 /* MSR (immediate) - move immediate to processor state field */
   1722 static void handle_msr_i(DisasContext *s, uint32_t insn,
   1723                          unsigned int op1, unsigned int op2, unsigned int crm)
   1724 {
   1725     int op = op1 << 3 | op2;
   1726 
   1727     /* End the TB by default, chaining is ok.  */
   1728     s->base.is_jmp = DISAS_TOO_MANY;
   1729 
   1730     switch (op) {
   1731     case 0x00: /* CFINV */
   1732         if (crm != 0 || !dc_isar_feature(aa64_condm_4, s)) {
   1733             goto do_unallocated;
   1734         }
   1735         tcg_gen_xori_i32(cpu_CF, cpu_CF, 1);
   1736         s->base.is_jmp = DISAS_NEXT;
   1737         break;
   1738 
   1739     case 0x01: /* XAFlag */
   1740         if (crm != 0 || !dc_isar_feature(aa64_condm_5, s)) {
   1741             goto do_unallocated;
   1742         }
   1743         gen_xaflag();
   1744         s->base.is_jmp = DISAS_NEXT;
   1745         break;
   1746 
   1747     case 0x02: /* AXFlag */
   1748         if (crm != 0 || !dc_isar_feature(aa64_condm_5, s)) {
   1749             goto do_unallocated;
   1750         }
   1751         gen_axflag();
   1752         s->base.is_jmp = DISAS_NEXT;
   1753         break;
   1754 
   1755     case 0x03: /* UAO */
   1756         if (!dc_isar_feature(aa64_uao, s) || s->current_el == 0) {
   1757             goto do_unallocated;
   1758         }
   1759         if (crm & 1) {
   1760             set_pstate_bits(PSTATE_UAO);
   1761         } else {
   1762             clear_pstate_bits(PSTATE_UAO);
   1763         }
   1764         gen_rebuild_hflags(s);
   1765         break;
   1766 
   1767     case 0x04: /* PAN */
   1768         if (!dc_isar_feature(aa64_pan, s) || s->current_el == 0) {
   1769             goto do_unallocated;
   1770         }
   1771         if (crm & 1) {
   1772             set_pstate_bits(PSTATE_PAN);
   1773         } else {
   1774             clear_pstate_bits(PSTATE_PAN);
   1775         }
   1776         gen_rebuild_hflags(s);
   1777         break;
   1778 
   1779     case 0x05: /* SPSel */
   1780         if (s->current_el == 0) {
   1781             goto do_unallocated;
   1782         }
   1783         gen_helper_msr_i_spsel(cpu_env, tcg_constant_i32(crm & PSTATE_SP));
   1784         break;
   1785 
   1786     case 0x19: /* SSBS */
   1787         if (!dc_isar_feature(aa64_ssbs, s)) {
   1788             goto do_unallocated;
   1789         }
   1790         if (crm & 1) {
   1791             set_pstate_bits(PSTATE_SSBS);
   1792         } else {
   1793             clear_pstate_bits(PSTATE_SSBS);
   1794         }
   1795         /* Don't need to rebuild hflags since SSBS is a nop */
   1796         break;
   1797 
   1798     case 0x1a: /* DIT */
   1799         if (!dc_isar_feature(aa64_dit, s)) {
   1800             goto do_unallocated;
   1801         }
   1802         if (crm & 1) {
   1803             set_pstate_bits(PSTATE_DIT);
   1804         } else {
   1805             clear_pstate_bits(PSTATE_DIT);
   1806         }
   1807         /* There's no need to rebuild hflags because DIT is a nop */
   1808         break;
   1809 
   1810     case 0x1e: /* DAIFSet */
   1811         gen_helper_msr_i_daifset(cpu_env, tcg_constant_i32(crm));
   1812         break;
   1813 
   1814     case 0x1f: /* DAIFClear */
   1815         gen_helper_msr_i_daifclear(cpu_env, tcg_constant_i32(crm));
   1816         /* For DAIFClear, exit the cpu loop to re-evaluate pending IRQs.  */
   1817         s->base.is_jmp = DISAS_UPDATE_EXIT;
   1818         break;
   1819 
   1820     case 0x1c: /* TCO */
   1821         if (dc_isar_feature(aa64_mte, s)) {
   1822             /* Full MTE is enabled -- set the TCO bit as directed. */
   1823             if (crm & 1) {
   1824                 set_pstate_bits(PSTATE_TCO);
   1825             } else {
   1826                 clear_pstate_bits(PSTATE_TCO);
   1827             }
   1828             gen_rebuild_hflags(s);
   1829             /* Many factors, including TCO, go into MTE_ACTIVE. */
   1830             s->base.is_jmp = DISAS_UPDATE_NOCHAIN;
   1831         } else if (dc_isar_feature(aa64_mte_insn_reg, s)) {
   1832             /* Only "instructions accessible at EL0" -- PSTATE.TCO is WI.  */
   1833             s->base.is_jmp = DISAS_NEXT;
   1834         } else {
   1835             goto do_unallocated;
   1836         }
   1837         break;
   1838 
   1839     case 0x1b: /* SVCR* */
   1840         if (!dc_isar_feature(aa64_sme, s) || crm < 2 || crm > 7) {
   1841             goto do_unallocated;
   1842         }
   1843         if (sme_access_check(s)) {
   1844             bool i = crm & 1;
   1845             bool changed = false;
   1846 
   1847             if ((crm & 2) && i != s->pstate_sm) {
   1848                 gen_helper_set_pstate_sm(cpu_env, tcg_constant_i32(i));
   1849                 changed = true;
   1850             }
   1851             if ((crm & 4) && i != s->pstate_za) {
   1852                 gen_helper_set_pstate_za(cpu_env, tcg_constant_i32(i));
   1853                 changed = true;
   1854             }
   1855             if (changed) {
   1856                 gen_rebuild_hflags(s);
   1857             } else {
   1858                 s->base.is_jmp = DISAS_NEXT;
   1859             }
   1860         }
   1861         break;
   1862 
   1863     default:
   1864     do_unallocated:
   1865         unallocated_encoding(s);
   1866         return;
   1867     }
   1868 }
   1869 
   1870 static void gen_get_nzcv(TCGv_i64 tcg_rt)
   1871 {
   1872     TCGv_i32 tmp = tcg_temp_new_i32();
   1873     TCGv_i32 nzcv = tcg_temp_new_i32();
   1874 
   1875     /* build bit 31, N */
   1876     tcg_gen_andi_i32(nzcv, cpu_NF, (1U << 31));
   1877     /* build bit 30, Z */
   1878     tcg_gen_setcondi_i32(TCG_COND_EQ, tmp, cpu_ZF, 0);
   1879     tcg_gen_deposit_i32(nzcv, nzcv, tmp, 30, 1);
   1880     /* build bit 29, C */
   1881     tcg_gen_deposit_i32(nzcv, nzcv, cpu_CF, 29, 1);
   1882     /* build bit 28, V */
   1883     tcg_gen_shri_i32(tmp, cpu_VF, 31);
   1884     tcg_gen_deposit_i32(nzcv, nzcv, tmp, 28, 1);
   1885     /* generate result */
   1886     tcg_gen_extu_i32_i64(tcg_rt, nzcv);
   1887 
   1888     tcg_temp_free_i32(nzcv);
   1889     tcg_temp_free_i32(tmp);
   1890 }
   1891 
   1892 static void gen_set_nzcv(TCGv_i64 tcg_rt)
   1893 {
   1894     TCGv_i32 nzcv = tcg_temp_new_i32();
   1895 
   1896     /* take NZCV from R[t] */
   1897     tcg_gen_extrl_i64_i32(nzcv, tcg_rt);
   1898 
   1899     /* bit 31, N */
   1900     tcg_gen_andi_i32(cpu_NF, nzcv, (1U << 31));
   1901     /* bit 30, Z */
   1902     tcg_gen_andi_i32(cpu_ZF, nzcv, (1 << 30));
   1903     tcg_gen_setcondi_i32(TCG_COND_EQ, cpu_ZF, cpu_ZF, 0);
   1904     /* bit 29, C */
   1905     tcg_gen_andi_i32(cpu_CF, nzcv, (1 << 29));
   1906     tcg_gen_shri_i32(cpu_CF, cpu_CF, 29);
   1907     /* bit 28, V */
   1908     tcg_gen_andi_i32(cpu_VF, nzcv, (1 << 28));
   1909     tcg_gen_shli_i32(cpu_VF, cpu_VF, 3);
   1910     tcg_temp_free_i32(nzcv);
   1911 }
   1912 
   1913 static void gen_sysreg_undef(DisasContext *s, bool isread,
   1914                              uint8_t op0, uint8_t op1, uint8_t op2,
   1915                              uint8_t crn, uint8_t crm, uint8_t rt)
   1916 {
   1917     /*
   1918      * Generate code to emit an UNDEF with correct syndrome
   1919      * information for a failed system register access.
   1920      * This is EC_UNCATEGORIZED (ie a standard UNDEF) in most cases,
   1921      * but if FEAT_IDST is implemented then read accesses to registers
   1922      * in the feature ID space are reported with the EC_SYSTEMREGISTERTRAP
   1923      * syndrome.
   1924      */
   1925     uint32_t syndrome;
   1926 
   1927     if (isread && dc_isar_feature(aa64_ids, s) &&
   1928         arm_cpreg_encoding_in_idspace(op0, op1, op2, crn, crm)) {
   1929         syndrome = syn_aa64_sysregtrap(op0, op1, op2, crn, crm, rt, isread);
   1930     } else {
   1931         syndrome = syn_uncategorized();
   1932     }
   1933     gen_exception_insn(s, 0, EXCP_UDEF, syndrome);
   1934 }
   1935 
   1936 /* MRS - move from system register
   1937  * MSR (register) - move to system register
   1938  * SYS
   1939  * SYSL
   1940  * These are all essentially the same insn in 'read' and 'write'
   1941  * versions, with varying op0 fields.
   1942  */
   1943 static void handle_sys(DisasContext *s, uint32_t insn, bool isread,
   1944                        unsigned int op0, unsigned int op1, unsigned int op2,
   1945                        unsigned int crn, unsigned int crm, unsigned int rt)
   1946 {
   1947     const ARMCPRegInfo *ri;
   1948     TCGv_i64 tcg_rt;
   1949 
   1950     ri = get_arm_cp_reginfo(s->cp_regs,
   1951                             ENCODE_AA64_CP_REG(CP_REG_ARM64_SYSREG_CP,
   1952                                                crn, crm, op0, op1, op2));
   1953 
   1954     if (!ri) {
   1955         /* Unknown register; this might be a guest error or a QEMU
   1956          * unimplemented feature.
   1957          */
   1958         qemu_log_mask(LOG_UNIMP, "%s access to unsupported AArch64 "
   1959                       "system register op0:%d op1:%d crn:%d crm:%d op2:%d\n",
   1960                       isread ? "read" : "write", op0, op1, crn, crm, op2);
   1961         gen_sysreg_undef(s, isread, op0, op1, op2, crn, crm, rt);
   1962         return;
   1963     }
   1964 
   1965     /* Check access permissions */
   1966     if (!cp_access_ok(s->current_el, ri, isread)) {
   1967         gen_sysreg_undef(s, isread, op0, op1, op2, crn, crm, rt);
   1968         return;
   1969     }
   1970 
   1971     if (ri->accessfn) {
   1972         /* Emit code to perform further access permissions checks at
   1973          * runtime; this may result in an exception.
   1974          */
   1975         uint32_t syndrome;
   1976 
   1977         syndrome = syn_aa64_sysregtrap(op0, op1, op2, crn, crm, rt, isread);
   1978         gen_a64_update_pc(s, 0);
   1979         gen_helper_access_check_cp_reg(cpu_env,
   1980                                        tcg_constant_ptr(ri),
   1981                                        tcg_constant_i32(syndrome),
   1982                                        tcg_constant_i32(isread));
   1983     } else if (ri->type & ARM_CP_RAISES_EXC) {
   1984         /*
   1985          * The readfn or writefn might raise an exception;
   1986          * synchronize the CPU state in case it does.
   1987          */
   1988         gen_a64_update_pc(s, 0);
   1989     }
   1990 
   1991     /* Handle special cases first */
   1992     switch (ri->type & ARM_CP_SPECIAL_MASK) {
   1993     case 0:
   1994         break;
   1995     case ARM_CP_NOP:
   1996         return;
   1997     case ARM_CP_NZCV:
   1998         tcg_rt = cpu_reg(s, rt);
   1999         if (isread) {
   2000             gen_get_nzcv(tcg_rt);
   2001         } else {
   2002             gen_set_nzcv(tcg_rt);
   2003         }
   2004         return;
   2005     case ARM_CP_CURRENTEL:
   2006         /* Reads as current EL value from pstate, which is
   2007          * guaranteed to be constant by the tb flags.
   2008          */
   2009         tcg_rt = cpu_reg(s, rt);
   2010         tcg_gen_movi_i64(tcg_rt, s->current_el << 2);
   2011         return;
   2012     case ARM_CP_DC_ZVA:
   2013         /* Writes clear the aligned block of memory which rt points into. */
   2014         if (s->mte_active[0]) {
   2015             int desc = 0;
   2016 
   2017             desc = FIELD_DP32(desc, MTEDESC, MIDX, get_mem_index(s));
   2018             desc = FIELD_DP32(desc, MTEDESC, TBI, s->tbid);
   2019             desc = FIELD_DP32(desc, MTEDESC, TCMA, s->tcma);
   2020 
   2021             tcg_rt = new_tmp_a64(s);
   2022             gen_helper_mte_check_zva(tcg_rt, cpu_env,
   2023                                      tcg_constant_i32(desc), cpu_reg(s, rt));
   2024         } else {
   2025             tcg_rt = clean_data_tbi(s, cpu_reg(s, rt));
   2026         }
   2027         gen_helper_dc_zva(cpu_env, tcg_rt);
   2028         return;
   2029     case ARM_CP_DC_GVA:
   2030         {
   2031             TCGv_i64 clean_addr, tag;
   2032 
   2033             /*
   2034              * DC_GVA, like DC_ZVA, requires that we supply the original
   2035              * pointer for an invalid page.  Probe that address first.
   2036              */
   2037             tcg_rt = cpu_reg(s, rt);
   2038             clean_addr = clean_data_tbi(s, tcg_rt);
   2039             gen_probe_access(s, clean_addr, MMU_DATA_STORE, MO_8);
   2040 
   2041             if (s->ata) {
   2042                 /* Extract the tag from the register to match STZGM.  */
   2043                 tag = tcg_temp_new_i64();
   2044                 tcg_gen_shri_i64(tag, tcg_rt, 56);
   2045                 gen_helper_stzgm_tags(cpu_env, clean_addr, tag);
   2046                 tcg_temp_free_i64(tag);
   2047             }
   2048         }
   2049         return;
   2050     case ARM_CP_DC_GZVA:
   2051         {
   2052             TCGv_i64 clean_addr, tag;
   2053 
   2054             /* For DC_GZVA, we can rely on DC_ZVA for the proper fault. */
   2055             tcg_rt = cpu_reg(s, rt);
   2056             clean_addr = clean_data_tbi(s, tcg_rt);
   2057             gen_helper_dc_zva(cpu_env, clean_addr);
   2058 
   2059             if (s->ata) {
   2060                 /* Extract the tag from the register to match STZGM.  */
   2061                 tag = tcg_temp_new_i64();
   2062                 tcg_gen_shri_i64(tag, tcg_rt, 56);
   2063                 gen_helper_stzgm_tags(cpu_env, clean_addr, tag);
   2064                 tcg_temp_free_i64(tag);
   2065             }
   2066         }
   2067         return;
   2068     default:
   2069         g_assert_not_reached();
   2070     }
   2071     if ((ri->type & ARM_CP_FPU) && !fp_access_check_only(s)) {
   2072         return;
   2073     } else if ((ri->type & ARM_CP_SVE) && !sve_access_check(s)) {
   2074         return;
   2075     } else if ((ri->type & ARM_CP_SME) && !sme_access_check(s)) {
   2076         return;
   2077     }
   2078 
   2079     if ((tb_cflags(s->base.tb) & CF_USE_ICOUNT) && (ri->type & ARM_CP_IO)) {
   2080         gen_io_start();
   2081     }
   2082 
   2083     tcg_rt = cpu_reg(s, rt);
   2084 
   2085     if (isread) {
   2086         if (ri->type & ARM_CP_CONST) {
   2087             tcg_gen_movi_i64(tcg_rt, ri->resetvalue);
   2088         } else if (ri->readfn) {
   2089             gen_helper_get_cp_reg64(tcg_rt, cpu_env, tcg_constant_ptr(ri));
   2090         } else {
   2091             tcg_gen_ld_i64(tcg_rt, cpu_env, ri->fieldoffset);
   2092         }
   2093     } else {
   2094         if (ri->type & ARM_CP_CONST) {
   2095             /* If not forbidden by access permissions, treat as WI */
   2096             return;
   2097         } else if (ri->writefn) {
   2098             gen_helper_set_cp_reg64(cpu_env, tcg_constant_ptr(ri), tcg_rt);
   2099         } else {
   2100             tcg_gen_st_i64(tcg_rt, cpu_env, ri->fieldoffset);
   2101         }
   2102     }
   2103 
   2104     if ((tb_cflags(s->base.tb) & CF_USE_ICOUNT) && (ri->type & ARM_CP_IO)) {
   2105         /* I/O operations must end the TB here (whether read or write) */
   2106         s->base.is_jmp = DISAS_UPDATE_EXIT;
   2107     }
   2108     if (!isread && !(ri->type & ARM_CP_SUPPRESS_TB_END)) {
   2109         /*
   2110          * A write to any coprocessor regiser that ends a TB
   2111          * must rebuild the hflags for the next TB.
   2112          */
   2113         gen_rebuild_hflags(s);
   2114         /*
   2115          * We default to ending the TB on a coprocessor register write,
   2116          * but allow this to be suppressed by the register definition
   2117          * (usually only necessary to work around guest bugs).
   2118          */
   2119         s->base.is_jmp = DISAS_UPDATE_EXIT;
   2120     }
   2121 }
   2122 
   2123 /* System
   2124  *  31                 22 21  20 19 18 16 15   12 11    8 7   5 4    0
   2125  * +---------------------+---+-----+-----+-------+-------+-----+------+
   2126  * | 1 1 0 1 0 1 0 1 0 0 | L | op0 | op1 |  CRn  |  CRm  | op2 |  Rt  |
   2127  * +---------------------+---+-----+-----+-------+-------+-----+------+
   2128  */
   2129 static void disas_system(DisasContext *s, uint32_t insn)
   2130 {
   2131     unsigned int l, op0, op1, crn, crm, op2, rt;
   2132     l = extract32(insn, 21, 1);
   2133     op0 = extract32(insn, 19, 2);
   2134     op1 = extract32(insn, 16, 3);
   2135     crn = extract32(insn, 12, 4);
   2136     crm = extract32(insn, 8, 4);
   2137     op2 = extract32(insn, 5, 3);
   2138     rt = extract32(insn, 0, 5);
   2139 
   2140     if (op0 == 0) {
   2141         if (l || rt != 31) {
   2142             unallocated_encoding(s);
   2143             return;
   2144         }
   2145         switch (crn) {
   2146         case 2: /* HINT (including allocated hints like NOP, YIELD, etc) */
   2147             handle_hint(s, insn, op1, op2, crm);
   2148             break;
   2149         case 3: /* CLREX, DSB, DMB, ISB */
   2150             handle_sync(s, insn, op1, op2, crm);
   2151             break;
   2152         case 4: /* MSR (immediate) */
   2153             handle_msr_i(s, insn, op1, op2, crm);
   2154             break;
   2155         default:
   2156             unallocated_encoding(s);
   2157             break;
   2158         }
   2159         return;
   2160     }
   2161     handle_sys(s, insn, l, op0, op1, op2, crn, crm, rt);
   2162 }
   2163 
   2164 /* Exception generation
   2165  *
   2166  *  31             24 23 21 20                     5 4   2 1  0
   2167  * +-----------------+-----+------------------------+-----+----+
   2168  * | 1 1 0 1 0 1 0 0 | opc |          imm16         | op2 | LL |
   2169  * +-----------------------+------------------------+----------+
   2170  */
   2171 static void disas_exc(DisasContext *s, uint32_t insn)
   2172 {
   2173     int opc = extract32(insn, 21, 3);
   2174     int op2_ll = extract32(insn, 0, 5);
   2175     int imm16 = extract32(insn, 5, 16);
   2176 
   2177     switch (opc) {
   2178     case 0:
   2179         /* For SVC, HVC and SMC we advance the single-step state
   2180          * machine before taking the exception. This is architecturally
   2181          * mandated, to ensure that single-stepping a system call
   2182          * instruction works properly.
   2183          */
   2184         switch (op2_ll) {
   2185         case 1:                                                     /* SVC */
   2186             gen_ss_advance(s);
   2187             gen_exception_insn(s, 4, EXCP_SWI, syn_aa64_svc(imm16));
   2188             break;
   2189         case 2:                                                     /* HVC */
   2190             if (s->current_el == 0) {
   2191                 unallocated_encoding(s);
   2192                 break;
   2193             }
   2194             /* The pre HVC helper handles cases when HVC gets trapped
   2195              * as an undefined insn by runtime configuration.
   2196              */
   2197             gen_a64_update_pc(s, 0);
   2198             gen_helper_pre_hvc(cpu_env);
   2199             gen_ss_advance(s);
   2200             gen_exception_insn_el(s, 4, EXCP_HVC, syn_aa64_hvc(imm16), 2);
   2201             break;
   2202         case 3:                                                     /* SMC */
   2203             if (s->current_el == 0) {
   2204                 unallocated_encoding(s);
   2205                 break;
   2206             }
   2207             gen_a64_update_pc(s, 0);
   2208             gen_helper_pre_smc(cpu_env, tcg_constant_i32(syn_aa64_smc(imm16)));
   2209             gen_ss_advance(s);
   2210             gen_exception_insn_el(s, 4, EXCP_SMC, syn_aa64_smc(imm16), 3);
   2211             break;
   2212         default:
   2213             unallocated_encoding(s);
   2214             break;
   2215         }
   2216         break;
   2217     case 1:
   2218         if (op2_ll != 0) {
   2219             unallocated_encoding(s);
   2220             break;
   2221         }
   2222         /* BRK */
   2223         gen_exception_bkpt_insn(s, syn_aa64_bkpt(imm16));
   2224         break;
   2225     case 2:
   2226         if (op2_ll != 0) {
   2227             unallocated_encoding(s);
   2228             break;
   2229         }
   2230         /* HLT. This has two purposes.
   2231          * Architecturally, it is an external halting debug instruction.
   2232          * Since QEMU doesn't implement external debug, we treat this as
   2233          * it is required for halting debug disabled: it will UNDEF.
   2234          * Secondly, "HLT 0xf000" is the A64 semihosting syscall instruction.
   2235          */
   2236         if (semihosting_enabled(s->current_el == 0) && imm16 == 0xf000) {
   2237             gen_exception_internal_insn(s, EXCP_SEMIHOST);
   2238         } else {
   2239             unallocated_encoding(s);
   2240         }
   2241         break;
   2242     case 5:
   2243         if (op2_ll < 1 || op2_ll > 3) {
   2244             unallocated_encoding(s);
   2245             break;
   2246         }
   2247         /* DCPS1, DCPS2, DCPS3 */
   2248         unallocated_encoding(s);
   2249         break;
   2250     default:
   2251         unallocated_encoding(s);
   2252         break;
   2253     }
   2254 }
   2255 
   2256 /* Unconditional branch (register)
   2257  *  31           25 24   21 20   16 15   10 9    5 4     0
   2258  * +---------------+-------+-------+-------+------+-------+
   2259  * | 1 1 0 1 0 1 1 |  opc  |  op2  |  op3  |  Rn  |  op4  |
   2260  * +---------------+-------+-------+-------+------+-------+
   2261  */
   2262 static void disas_uncond_b_reg(DisasContext *s, uint32_t insn)
   2263 {
   2264     unsigned int opc, op2, op3, rn, op4;
   2265     unsigned btype_mod = 2;   /* 0: BR, 1: BLR, 2: other */
   2266     TCGv_i64 dst;
   2267     TCGv_i64 modifier;
   2268 
   2269     opc = extract32(insn, 21, 4);
   2270     op2 = extract32(insn, 16, 5);
   2271     op3 = extract32(insn, 10, 6);
   2272     rn = extract32(insn, 5, 5);
   2273     op4 = extract32(insn, 0, 5);
   2274 
   2275     if (op2 != 0x1f) {
   2276         goto do_unallocated;
   2277     }
   2278 
   2279     switch (opc) {
   2280     case 0: /* BR */
   2281     case 1: /* BLR */
   2282     case 2: /* RET */
   2283         btype_mod = opc;
   2284         switch (op3) {
   2285         case 0:
   2286             /* BR, BLR, RET */
   2287             if (op4 != 0) {
   2288                 goto do_unallocated;
   2289             }
   2290             dst = cpu_reg(s, rn);
   2291             break;
   2292 
   2293         case 2:
   2294         case 3:
   2295             if (!dc_isar_feature(aa64_pauth, s)) {
   2296                 goto do_unallocated;
   2297             }
   2298             if (opc == 2) {
   2299                 /* RETAA, RETAB */
   2300                 if (rn != 0x1f || op4 != 0x1f) {
   2301                     goto do_unallocated;
   2302                 }
   2303                 rn = 30;
   2304                 modifier = cpu_X[31];
   2305             } else {
   2306                 /* BRAAZ, BRABZ, BLRAAZ, BLRABZ */
   2307                 if (op4 != 0x1f) {
   2308                     goto do_unallocated;
   2309                 }
   2310                 modifier = new_tmp_a64_zero(s);
   2311             }
   2312             if (s->pauth_active) {
   2313                 dst = new_tmp_a64(s);
   2314                 if (op3 == 2) {
   2315                     gen_helper_autia(dst, cpu_env, cpu_reg(s, rn), modifier);
   2316                 } else {
   2317                     gen_helper_autib(dst, cpu_env, cpu_reg(s, rn), modifier);
   2318                 }
   2319             } else {
   2320                 dst = cpu_reg(s, rn);
   2321             }
   2322             break;
   2323 
   2324         default:
   2325             goto do_unallocated;
   2326         }
   2327         /* BLR also needs to load return address */
   2328         if (opc == 1) {
   2329             TCGv_i64 lr = cpu_reg(s, 30);
   2330             if (dst == lr) {
   2331                 TCGv_i64 tmp = new_tmp_a64(s);
   2332                 tcg_gen_mov_i64(tmp, dst);
   2333                 dst = tmp;
   2334             }
   2335             gen_pc_plus_diff(s, lr, curr_insn_len(s));
   2336         }
   2337         gen_a64_set_pc(s, dst);
   2338         break;
   2339 
   2340     case 8: /* BRAA */
   2341     case 9: /* BLRAA */
   2342         if (!dc_isar_feature(aa64_pauth, s)) {
   2343             goto do_unallocated;
   2344         }
   2345         if ((op3 & ~1) != 2) {
   2346             goto do_unallocated;
   2347         }
   2348         btype_mod = opc & 1;
   2349         if (s->pauth_active) {
   2350             dst = new_tmp_a64(s);
   2351             modifier = cpu_reg_sp(s, op4);
   2352             if (op3 == 2) {
   2353                 gen_helper_autia(dst, cpu_env, cpu_reg(s, rn), modifier);
   2354             } else {
   2355                 gen_helper_autib(dst, cpu_env, cpu_reg(s, rn), modifier);
   2356             }
   2357         } else {
   2358             dst = cpu_reg(s, rn);
   2359         }
   2360         /* BLRAA also needs to load return address */
   2361         if (opc == 9) {
   2362             TCGv_i64 lr = cpu_reg(s, 30);
   2363             if (dst == lr) {
   2364                 TCGv_i64 tmp = new_tmp_a64(s);
   2365                 tcg_gen_mov_i64(tmp, dst);
   2366                 dst = tmp;
   2367             }
   2368             gen_pc_plus_diff(s, lr, curr_insn_len(s));
   2369         }
   2370         gen_a64_set_pc(s, dst);
   2371         break;
   2372 
   2373     case 4: /* ERET */
   2374         if (s->current_el == 0) {
   2375             goto do_unallocated;
   2376         }
   2377         switch (op3) {
   2378         case 0: /* ERET */
   2379             if (op4 != 0) {
   2380                 goto do_unallocated;
   2381             }
   2382             dst = tcg_temp_new_i64();
   2383             tcg_gen_ld_i64(dst, cpu_env,
   2384                            offsetof(CPUARMState, elr_el[s->current_el]));
   2385             break;
   2386 
   2387         case 2: /* ERETAA */
   2388         case 3: /* ERETAB */
   2389             if (!dc_isar_feature(aa64_pauth, s)) {
   2390                 goto do_unallocated;
   2391             }
   2392             if (rn != 0x1f || op4 != 0x1f) {
   2393                 goto do_unallocated;
   2394             }
   2395             dst = tcg_temp_new_i64();
   2396             tcg_gen_ld_i64(dst, cpu_env,
   2397                            offsetof(CPUARMState, elr_el[s->current_el]));
   2398             if (s->pauth_active) {
   2399                 modifier = cpu_X[31];
   2400                 if (op3 == 2) {
   2401                     gen_helper_autia(dst, cpu_env, dst, modifier);
   2402                 } else {
   2403                     gen_helper_autib(dst, cpu_env, dst, modifier);
   2404                 }
   2405             }
   2406             break;
   2407 
   2408         default:
   2409             goto do_unallocated;
   2410         }
   2411         if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
   2412             gen_io_start();
   2413         }
   2414 
   2415         gen_helper_exception_return(cpu_env, dst);
   2416         tcg_temp_free_i64(dst);
   2417         /* Must exit loop to check un-masked IRQs */
   2418         s->base.is_jmp = DISAS_EXIT;
   2419         return;
   2420 
   2421     case 5: /* DRPS */
   2422         if (op3 != 0 || op4 != 0 || rn != 0x1f) {
   2423             goto do_unallocated;
   2424         } else {
   2425             unallocated_encoding(s);
   2426         }
   2427         return;
   2428 
   2429     default:
   2430     do_unallocated:
   2431         unallocated_encoding(s);
   2432         return;
   2433     }
   2434 
   2435     switch (btype_mod) {
   2436     case 0: /* BR */
   2437         if (dc_isar_feature(aa64_bti, s)) {
   2438             /* BR to {x16,x17} or !guard -> 1, else 3.  */
   2439             set_btype(s, rn == 16 || rn == 17 || !s->guarded_page ? 1 : 3);
   2440         }
   2441         break;
   2442 
   2443     case 1: /* BLR */
   2444         if (dc_isar_feature(aa64_bti, s)) {
   2445             /* BLR sets BTYPE to 2, regardless of source guarded page.  */
   2446             set_btype(s, 2);
   2447         }
   2448         break;
   2449 
   2450     default: /* RET or none of the above.  */
   2451         /* BTYPE will be set to 0 by normal end-of-insn processing.  */
   2452         break;
   2453     }
   2454 
   2455     s->base.is_jmp = DISAS_JUMP;
   2456 }
   2457 
   2458 /* Branches, exception generating and system instructions */
   2459 static void disas_b_exc_sys(DisasContext *s, uint32_t insn)
   2460 {
   2461     switch (extract32(insn, 25, 7)) {
   2462     case 0x0a: case 0x0b:
   2463     case 0x4a: case 0x4b: /* Unconditional branch (immediate) */
   2464         disas_uncond_b_imm(s, insn);
   2465         break;
   2466     case 0x1a: case 0x5a: /* Compare & branch (immediate) */
   2467         disas_comp_b_imm(s, insn);
   2468         break;
   2469     case 0x1b: case 0x5b: /* Test & branch (immediate) */
   2470         disas_test_b_imm(s, insn);
   2471         break;
   2472     case 0x2a: /* Conditional branch (immediate) */
   2473         disas_cond_b_imm(s, insn);
   2474         break;
   2475     case 0x6a: /* Exception generation / System */
   2476         if (insn & (1 << 24)) {
   2477             if (extract32(insn, 22, 2) == 0) {
   2478                 disas_system(s, insn);
   2479             } else {
   2480                 unallocated_encoding(s);
   2481             }
   2482         } else {
   2483             disas_exc(s, insn);
   2484         }
   2485         break;
   2486     case 0x6b: /* Unconditional branch (register) */
   2487         disas_uncond_b_reg(s, insn);
   2488         break;
   2489     default:
   2490         unallocated_encoding(s);
   2491         break;
   2492     }
   2493 }
   2494 
   2495 /*
   2496  * Load/Store exclusive instructions are implemented by remembering
   2497  * the value/address loaded, and seeing if these are the same
   2498  * when the store is performed. This is not actually the architecturally
   2499  * mandated semantics, but it works for typical guest code sequences
   2500  * and avoids having to monitor regular stores.
   2501  *
   2502  * The store exclusive uses the atomic cmpxchg primitives to avoid
   2503  * races in multi-threaded linux-user and when MTTCG softmmu is
   2504  * enabled.
   2505  */
   2506 static void gen_load_exclusive(DisasContext *s, int rt, int rt2,
   2507                                TCGv_i64 addr, int size, bool is_pair)
   2508 {
   2509     int idx = get_mem_index(s);
   2510     MemOp memop = s->be_data;
   2511 
   2512     g_assert(size <= 3);
   2513     if (is_pair) {
   2514         g_assert(size >= 2);
   2515         if (size == 2) {
   2516             /* The pair must be single-copy atomic for the doubleword.  */
   2517             memop |= MO_64 | MO_ALIGN;
   2518             tcg_gen_qemu_ld_i64(cpu_exclusive_val, addr, idx, memop);
   2519             if (s->be_data == MO_LE) {
   2520                 tcg_gen_extract_i64(cpu_reg(s, rt), cpu_exclusive_val, 0, 32);
   2521                 tcg_gen_extract_i64(cpu_reg(s, rt2), cpu_exclusive_val, 32, 32);
   2522             } else {
   2523                 tcg_gen_extract_i64(cpu_reg(s, rt), cpu_exclusive_val, 32, 32);
   2524                 tcg_gen_extract_i64(cpu_reg(s, rt2), cpu_exclusive_val, 0, 32);
   2525             }
   2526         } else {
   2527             /* The pair must be single-copy atomic for *each* doubleword, not
   2528                the entire quadword, however it must be quadword aligned.  */
   2529             memop |= MO_64;
   2530             tcg_gen_qemu_ld_i64(cpu_exclusive_val, addr, idx,
   2531                                 memop | MO_ALIGN_16);
   2532 
   2533             TCGv_i64 addr2 = tcg_temp_new_i64();
   2534             tcg_gen_addi_i64(addr2, addr, 8);
   2535             tcg_gen_qemu_ld_i64(cpu_exclusive_high, addr2, idx, memop);
   2536             tcg_temp_free_i64(addr2);
   2537 
   2538             tcg_gen_mov_i64(cpu_reg(s, rt), cpu_exclusive_val);
   2539             tcg_gen_mov_i64(cpu_reg(s, rt2), cpu_exclusive_high);
   2540         }
   2541     } else {
   2542         memop |= size | MO_ALIGN;
   2543         tcg_gen_qemu_ld_i64(cpu_exclusive_val, addr, idx, memop);
   2544         tcg_gen_mov_i64(cpu_reg(s, rt), cpu_exclusive_val);
   2545     }
   2546     tcg_gen_mov_i64(cpu_exclusive_addr, addr);
   2547 }
   2548 
   2549 static void gen_store_exclusive(DisasContext *s, int rd, int rt, int rt2,
   2550                                 TCGv_i64 addr, int size, int is_pair)
   2551 {
   2552     /* if (env->exclusive_addr == addr && env->exclusive_val == [addr]
   2553      *     && (!is_pair || env->exclusive_high == [addr + datasize])) {
   2554      *     [addr] = {Rt};
   2555      *     if (is_pair) {
   2556      *         [addr + datasize] = {Rt2};
   2557      *     }
   2558      *     {Rd} = 0;
   2559      * } else {
   2560      *     {Rd} = 1;
   2561      * }
   2562      * env->exclusive_addr = -1;
   2563      */
   2564     TCGLabel *fail_label = gen_new_label();
   2565     TCGLabel *done_label = gen_new_label();
   2566     TCGv_i64 tmp;
   2567 
   2568     tcg_gen_brcond_i64(TCG_COND_NE, addr, cpu_exclusive_addr, fail_label);
   2569 
   2570     tmp = tcg_temp_new_i64();
   2571     if (is_pair) {
   2572         if (size == 2) {
   2573             if (s->be_data == MO_LE) {
   2574                 tcg_gen_concat32_i64(tmp, cpu_reg(s, rt), cpu_reg(s, rt2));
   2575             } else {
   2576                 tcg_gen_concat32_i64(tmp, cpu_reg(s, rt2), cpu_reg(s, rt));
   2577             }
   2578             tcg_gen_atomic_cmpxchg_i64(tmp, cpu_exclusive_addr,
   2579                                        cpu_exclusive_val, tmp,
   2580                                        get_mem_index(s),
   2581                                        MO_64 | MO_ALIGN | s->be_data);
   2582             tcg_gen_setcond_i64(TCG_COND_NE, tmp, tmp, cpu_exclusive_val);
   2583         } else if (tb_cflags(s->base.tb) & CF_PARALLEL) {
   2584             if (!HAVE_CMPXCHG128) {
   2585                 gen_helper_exit_atomic(cpu_env);
   2586                 /*
   2587                  * Produce a result so we have a well-formed opcode
   2588                  * stream when the following (dead) code uses 'tmp'.
   2589                  * TCG will remove the dead ops for us.
   2590                  */
   2591                 tcg_gen_movi_i64(tmp, 0);
   2592             } else if (s->be_data == MO_LE) {
   2593                 gen_helper_paired_cmpxchg64_le_parallel(tmp, cpu_env,
   2594                                                         cpu_exclusive_addr,
   2595                                                         cpu_reg(s, rt),
   2596                                                         cpu_reg(s, rt2));
   2597             } else {
   2598                 gen_helper_paired_cmpxchg64_be_parallel(tmp, cpu_env,
   2599                                                         cpu_exclusive_addr,
   2600                                                         cpu_reg(s, rt),
   2601                                                         cpu_reg(s, rt2));
   2602             }
   2603         } else if (s->be_data == MO_LE) {
   2604             gen_helper_paired_cmpxchg64_le(tmp, cpu_env, cpu_exclusive_addr,
   2605                                            cpu_reg(s, rt), cpu_reg(s, rt2));
   2606         } else {
   2607             gen_helper_paired_cmpxchg64_be(tmp, cpu_env, cpu_exclusive_addr,
   2608                                            cpu_reg(s, rt), cpu_reg(s, rt2));
   2609         }
   2610     } else {
   2611         tcg_gen_atomic_cmpxchg_i64(tmp, cpu_exclusive_addr, cpu_exclusive_val,
   2612                                    cpu_reg(s, rt), get_mem_index(s),
   2613                                    size | MO_ALIGN | s->be_data);
   2614         tcg_gen_setcond_i64(TCG_COND_NE, tmp, tmp, cpu_exclusive_val);
   2615     }
   2616     tcg_gen_mov_i64(cpu_reg(s, rd), tmp);
   2617     tcg_temp_free_i64(tmp);
   2618     tcg_gen_br(done_label);
   2619 
   2620     gen_set_label(fail_label);
   2621     tcg_gen_movi_i64(cpu_reg(s, rd), 1);
   2622     gen_set_label(done_label);
   2623     tcg_gen_movi_i64(cpu_exclusive_addr, -1);
   2624 }
   2625 
   2626 static void gen_compare_and_swap(DisasContext *s, int rs, int rt,
   2627                                  int rn, int size)
   2628 {
   2629     TCGv_i64 tcg_rs = cpu_reg(s, rs);
   2630     TCGv_i64 tcg_rt = cpu_reg(s, rt);
   2631     int memidx = get_mem_index(s);
   2632     TCGv_i64 clean_addr;
   2633 
   2634     if (rn == 31) {
   2635         gen_check_sp_alignment(s);
   2636     }
   2637     clean_addr = gen_mte_check1(s, cpu_reg_sp(s, rn), true, rn != 31, size);
   2638     tcg_gen_atomic_cmpxchg_i64(tcg_rs, clean_addr, tcg_rs, tcg_rt, memidx,
   2639                                size | MO_ALIGN | s->be_data);
   2640 }
   2641 
   2642 static void gen_compare_and_swap_pair(DisasContext *s, int rs, int rt,
   2643                                       int rn, int size)
   2644 {
   2645     TCGv_i64 s1 = cpu_reg(s, rs);
   2646     TCGv_i64 s2 = cpu_reg(s, rs + 1);
   2647     TCGv_i64 t1 = cpu_reg(s, rt);
   2648     TCGv_i64 t2 = cpu_reg(s, rt + 1);
   2649     TCGv_i64 clean_addr;
   2650     int memidx = get_mem_index(s);
   2651 
   2652     if (rn == 31) {
   2653         gen_check_sp_alignment(s);
   2654     }
   2655 
   2656     /* This is a single atomic access, despite the "pair". */
   2657     clean_addr = gen_mte_check1(s, cpu_reg_sp(s, rn), true, rn != 31, size + 1);
   2658 
   2659     if (size == 2) {
   2660         TCGv_i64 cmp = tcg_temp_new_i64();
   2661         TCGv_i64 val = tcg_temp_new_i64();
   2662 
   2663         if (s->be_data == MO_LE) {
   2664             tcg_gen_concat32_i64(val, t1, t2);
   2665             tcg_gen_concat32_i64(cmp, s1, s2);
   2666         } else {
   2667             tcg_gen_concat32_i64(val, t2, t1);
   2668             tcg_gen_concat32_i64(cmp, s2, s1);
   2669         }
   2670 
   2671         tcg_gen_atomic_cmpxchg_i64(cmp, clean_addr, cmp, val, memidx,
   2672                                    MO_64 | MO_ALIGN | s->be_data);
   2673         tcg_temp_free_i64(val);
   2674 
   2675         if (s->be_data == MO_LE) {
   2676             tcg_gen_extr32_i64(s1, s2, cmp);
   2677         } else {
   2678             tcg_gen_extr32_i64(s2, s1, cmp);
   2679         }
   2680         tcg_temp_free_i64(cmp);
   2681     } else if (tb_cflags(s->base.tb) & CF_PARALLEL) {
   2682         if (HAVE_CMPXCHG128) {
   2683             TCGv_i32 tcg_rs = tcg_constant_i32(rs);
   2684             if (s->be_data == MO_LE) {
   2685                 gen_helper_casp_le_parallel(cpu_env, tcg_rs,
   2686                                             clean_addr, t1, t2);
   2687             } else {
   2688                 gen_helper_casp_be_parallel(cpu_env, tcg_rs,
   2689                                             clean_addr, t1, t2);
   2690             }
   2691         } else {
   2692             gen_helper_exit_atomic(cpu_env);
   2693             s->base.is_jmp = DISAS_NORETURN;
   2694         }
   2695     } else {
   2696         TCGv_i64 d1 = tcg_temp_new_i64();
   2697         TCGv_i64 d2 = tcg_temp_new_i64();
   2698         TCGv_i64 a2 = tcg_temp_new_i64();
   2699         TCGv_i64 c1 = tcg_temp_new_i64();
   2700         TCGv_i64 c2 = tcg_temp_new_i64();
   2701         TCGv_i64 zero = tcg_constant_i64(0);
   2702 
   2703         /* Load the two words, in memory order.  */
   2704         tcg_gen_qemu_ld_i64(d1, clean_addr, memidx,
   2705                             MO_64 | MO_ALIGN_16 | s->be_data);
   2706         tcg_gen_addi_i64(a2, clean_addr, 8);
   2707         tcg_gen_qemu_ld_i64(d2, a2, memidx, MO_64 | s->be_data);
   2708 
   2709         /* Compare the two words, also in memory order.  */
   2710         tcg_gen_setcond_i64(TCG_COND_EQ, c1, d1, s1);
   2711         tcg_gen_setcond_i64(TCG_COND_EQ, c2, d2, s2);
   2712         tcg_gen_and_i64(c2, c2, c1);
   2713 
   2714         /* If compare equal, write back new data, else write back old data.  */
   2715         tcg_gen_movcond_i64(TCG_COND_NE, c1, c2, zero, t1, d1);
   2716         tcg_gen_movcond_i64(TCG_COND_NE, c2, c2, zero, t2, d2);
   2717         tcg_gen_qemu_st_i64(c1, clean_addr, memidx, MO_64 | s->be_data);
   2718         tcg_gen_qemu_st_i64(c2, a2, memidx, MO_64 | s->be_data);
   2719         tcg_temp_free_i64(a2);
   2720         tcg_temp_free_i64(c1);
   2721         tcg_temp_free_i64(c2);
   2722 
   2723         /* Write back the data from memory to Rs.  */
   2724         tcg_gen_mov_i64(s1, d1);
   2725         tcg_gen_mov_i64(s2, d2);
   2726         tcg_temp_free_i64(d1);
   2727         tcg_temp_free_i64(d2);
   2728     }
   2729 }
   2730 
   2731 /* Update the Sixty-Four bit (SF) registersize. This logic is derived
   2732  * from the ARMv8 specs for LDR (Shared decode for all encodings).
   2733  */
   2734 static bool disas_ldst_compute_iss_sf(int size, bool is_signed, int opc)
   2735 {
   2736     int opc0 = extract32(opc, 0, 1);
   2737     int regsize;
   2738 
   2739     if (is_signed) {
   2740         regsize = opc0 ? 32 : 64;
   2741     } else {
   2742         regsize = size == 3 ? 64 : 32;
   2743     }
   2744     return regsize == 64;
   2745 }
   2746 
   2747 /* Load/store exclusive
   2748  *
   2749  *  31 30 29         24  23  22   21  20  16  15  14   10 9    5 4    0
   2750  * +-----+-------------+----+---+----+------+----+-------+------+------+
   2751  * | sz  | 0 0 1 0 0 0 | o2 | L | o1 |  Rs  | o0 |  Rt2  |  Rn  | Rt   |
   2752  * +-----+-------------+----+---+----+------+----+-------+------+------+
   2753  *
   2754  *  sz: 00 -> 8 bit, 01 -> 16 bit, 10 -> 32 bit, 11 -> 64 bit
   2755  *   L: 0 -> store, 1 -> load
   2756  *  o2: 0 -> exclusive, 1 -> not
   2757  *  o1: 0 -> single register, 1 -> register pair
   2758  *  o0: 1 -> load-acquire/store-release, 0 -> not
   2759  */
   2760 static void disas_ldst_excl(DisasContext *s, uint32_t insn)
   2761 {
   2762     int rt = extract32(insn, 0, 5);
   2763     int rn = extract32(insn, 5, 5);
   2764     int rt2 = extract32(insn, 10, 5);
   2765     int rs = extract32(insn, 16, 5);
   2766     int is_lasr = extract32(insn, 15, 1);
   2767     int o2_L_o1_o0 = extract32(insn, 21, 3) * 2 | is_lasr;
   2768     int size = extract32(insn, 30, 2);
   2769     TCGv_i64 clean_addr;
   2770 
   2771     switch (o2_L_o1_o0) {
   2772     case 0x0: /* STXR */
   2773     case 0x1: /* STLXR */
   2774         if (rn == 31) {
   2775             gen_check_sp_alignment(s);
   2776         }
   2777         if (is_lasr) {
   2778             tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL);
   2779         }
   2780         clean_addr = gen_mte_check1(s, cpu_reg_sp(s, rn),
   2781                                     true, rn != 31, size);
   2782         gen_store_exclusive(s, rs, rt, rt2, clean_addr, size, false);
   2783         return;
   2784 
   2785     case 0x4: /* LDXR */
   2786     case 0x5: /* LDAXR */
   2787         if (rn == 31) {
   2788             gen_check_sp_alignment(s);
   2789         }
   2790         clean_addr = gen_mte_check1(s, cpu_reg_sp(s, rn),
   2791                                     false, rn != 31, size);
   2792         s->is_ldex = true;
   2793         gen_load_exclusive(s, rt, rt2, clean_addr, size, false);
   2794         if (is_lasr) {
   2795             tcg_gen_mb(TCG_MO_ALL | TCG_BAR_LDAQ);
   2796         }
   2797         return;
   2798 
   2799     case 0x8: /* STLLR */
   2800         if (!dc_isar_feature(aa64_lor, s)) {
   2801             break;
   2802         }
   2803         /* StoreLORelease is the same as Store-Release for QEMU.  */
   2804         /* fall through */
   2805     case 0x9: /* STLR */
   2806         /* Generate ISS for non-exclusive accesses including LASR.  */
   2807         if (rn == 31) {
   2808             gen_check_sp_alignment(s);
   2809         }
   2810         tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL);
   2811         clean_addr = gen_mte_check1(s, cpu_reg_sp(s, rn),
   2812                                     true, rn != 31, size);
   2813         /* TODO: ARMv8.4-LSE SCTLR.nAA */
   2814         do_gpr_st(s, cpu_reg(s, rt), clean_addr, size | MO_ALIGN, true, rt,
   2815                   disas_ldst_compute_iss_sf(size, false, 0), is_lasr);
   2816         return;
   2817 
   2818     case 0xc: /* LDLAR */
   2819         if (!dc_isar_feature(aa64_lor, s)) {
   2820             break;
   2821         }
   2822         /* LoadLOAcquire is the same as Load-Acquire for QEMU.  */
   2823         /* fall through */
   2824     case 0xd: /* LDAR */
   2825         /* Generate ISS for non-exclusive accesses including LASR.  */
   2826         if (rn == 31) {
   2827             gen_check_sp_alignment(s);
   2828         }
   2829         clean_addr = gen_mte_check1(s, cpu_reg_sp(s, rn),
   2830                                     false, rn != 31, size);
   2831         /* TODO: ARMv8.4-LSE SCTLR.nAA */
   2832         do_gpr_ld(s, cpu_reg(s, rt), clean_addr, size | MO_ALIGN, false, true,
   2833                   rt, disas_ldst_compute_iss_sf(size, false, 0), is_lasr);
   2834         tcg_gen_mb(TCG_MO_ALL | TCG_BAR_LDAQ);
   2835         return;
   2836 
   2837     case 0x2: case 0x3: /* CASP / STXP */
   2838         if (size & 2) { /* STXP / STLXP */
   2839             if (rn == 31) {
   2840                 gen_check_sp_alignment(s);
   2841             }
   2842             if (is_lasr) {
   2843                 tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL);
   2844             }
   2845             clean_addr = gen_mte_check1(s, cpu_reg_sp(s, rn),
   2846                                         true, rn != 31, size);
   2847             gen_store_exclusive(s, rs, rt, rt2, clean_addr, size, true);
   2848             return;
   2849         }
   2850         if (rt2 == 31
   2851             && ((rt | rs) & 1) == 0
   2852             && dc_isar_feature(aa64_atomics, s)) {
   2853             /* CASP / CASPL */
   2854             gen_compare_and_swap_pair(s, rs, rt, rn, size | 2);
   2855             return;
   2856         }
   2857         break;
   2858 
   2859     case 0x6: case 0x7: /* CASPA / LDXP */
   2860         if (size & 2) { /* LDXP / LDAXP */
   2861             if (rn == 31) {
   2862                 gen_check_sp_alignment(s);
   2863             }
   2864             clean_addr = gen_mte_check1(s, cpu_reg_sp(s, rn),
   2865                                         false, rn != 31, size);
   2866             s->is_ldex = true;
   2867             gen_load_exclusive(s, rt, rt2, clean_addr, size, true);
   2868             if (is_lasr) {
   2869                 tcg_gen_mb(TCG_MO_ALL | TCG_BAR_LDAQ);
   2870             }
   2871             return;
   2872         }
   2873         if (rt2 == 31
   2874             && ((rt | rs) & 1) == 0
   2875             && dc_isar_feature(aa64_atomics, s)) {
   2876             /* CASPA / CASPAL */
   2877             gen_compare_and_swap_pair(s, rs, rt, rn, size | 2);
   2878             return;
   2879         }
   2880         break;
   2881 
   2882     case 0xa: /* CAS */
   2883     case 0xb: /* CASL */
   2884     case 0xe: /* CASA */
   2885     case 0xf: /* CASAL */
   2886         if (rt2 == 31 && dc_isar_feature(aa64_atomics, s)) {
   2887             gen_compare_and_swap(s, rs, rt, rn, size);
   2888             return;
   2889         }
   2890         break;
   2891     }
   2892     unallocated_encoding(s);
   2893 }
   2894 
   2895 /*
   2896  * Load register (literal)
   2897  *
   2898  *  31 30 29   27  26 25 24 23                5 4     0
   2899  * +-----+-------+---+-----+-------------------+-------+
   2900  * | opc | 0 1 1 | V | 0 0 |     imm19         |  Rt   |
   2901  * +-----+-------+---+-----+-------------------+-------+
   2902  *
   2903  * V: 1 -> vector (simd/fp)
   2904  * opc (non-vector): 00 -> 32 bit, 01 -> 64 bit,
   2905  *                   10-> 32 bit signed, 11 -> prefetch
   2906  * opc (vector): 00 -> 32 bit, 01 -> 64 bit, 10 -> 128 bit (11 unallocated)
   2907  */
   2908 static void disas_ld_lit(DisasContext *s, uint32_t insn)
   2909 {
   2910     int rt = extract32(insn, 0, 5);
   2911     int64_t imm = sextract32(insn, 5, 19) << 2;
   2912     bool is_vector = extract32(insn, 26, 1);
   2913     int opc = extract32(insn, 30, 2);
   2914     bool is_signed = false;
   2915     int size = 2;
   2916     TCGv_i64 tcg_rt, clean_addr;
   2917 
   2918     if (is_vector) {
   2919         if (opc == 3) {
   2920             unallocated_encoding(s);
   2921             return;
   2922         }
   2923         size = 2 + opc;
   2924         if (!fp_access_check(s)) {
   2925             return;
   2926         }
   2927     } else {
   2928         if (opc == 3) {
   2929             /* PRFM (literal) : prefetch */
   2930             return;
   2931         }
   2932         size = 2 + extract32(opc, 0, 1);
   2933         is_signed = extract32(opc, 1, 1);
   2934     }
   2935 
   2936     tcg_rt = cpu_reg(s, rt);
   2937 
   2938     clean_addr = new_tmp_a64(s);
   2939     gen_pc_plus_diff(s, clean_addr, imm);
   2940     if (is_vector) {
   2941         do_fp_ld(s, rt, clean_addr, size);
   2942     } else {
   2943         /* Only unsigned 32bit loads target 32bit registers.  */
   2944         bool iss_sf = opc != 0;
   2945 
   2946         do_gpr_ld(s, tcg_rt, clean_addr, size + is_signed * MO_SIGN,
   2947                   false, true, rt, iss_sf, false);
   2948     }
   2949 }
   2950 
   2951 /*
   2952  * LDNP (Load Pair - non-temporal hint)
   2953  * LDP (Load Pair - non vector)
   2954  * LDPSW (Load Pair Signed Word - non vector)
   2955  * STNP (Store Pair - non-temporal hint)
   2956  * STP (Store Pair - non vector)
   2957  * LDNP (Load Pair of SIMD&FP - non-temporal hint)
   2958  * LDP (Load Pair of SIMD&FP)
   2959  * STNP (Store Pair of SIMD&FP - non-temporal hint)
   2960  * STP (Store Pair of SIMD&FP)
   2961  *
   2962  *  31 30 29   27  26  25 24   23  22 21   15 14   10 9    5 4    0
   2963  * +-----+-------+---+---+-------+---+-----------------------------+
   2964  * | opc | 1 0 1 | V | 0 | index | L |  imm7 |  Rt2  |  Rn  | Rt   |
   2965  * +-----+-------+---+---+-------+---+-------+-------+------+------+
   2966  *
   2967  * opc: LDP/STP/LDNP/STNP        00 -> 32 bit, 10 -> 64 bit
   2968  *      LDPSW/STGP               01
   2969  *      LDP/STP/LDNP/STNP (SIMD) 00 -> 32 bit, 01 -> 64 bit, 10 -> 128 bit
   2970  *   V: 0 -> GPR, 1 -> Vector
   2971  * idx: 00 -> signed offset with non-temporal hint, 01 -> post-index,
   2972  *      10 -> signed offset, 11 -> pre-index
   2973  *   L: 0 -> Store 1 -> Load
   2974  *
   2975  * Rt, Rt2 = GPR or SIMD registers to be stored
   2976  * Rn = general purpose register containing address
   2977  * imm7 = signed offset (multiple of 4 or 8 depending on size)
   2978  */
   2979 static void disas_ldst_pair(DisasContext *s, uint32_t insn)
   2980 {
   2981     int rt = extract32(insn, 0, 5);
   2982     int rn = extract32(insn, 5, 5);
   2983     int rt2 = extract32(insn, 10, 5);
   2984     uint64_t offset = sextract64(insn, 15, 7);
   2985     int index = extract32(insn, 23, 2);
   2986     bool is_vector = extract32(insn, 26, 1);
   2987     bool is_load = extract32(insn, 22, 1);
   2988     int opc = extract32(insn, 30, 2);
   2989 
   2990     bool is_signed = false;
   2991     bool postindex = false;
   2992     bool wback = false;
   2993     bool set_tag = false;
   2994 
   2995     TCGv_i64 clean_addr, dirty_addr;
   2996 
   2997     int size;
   2998 
   2999     if (opc == 3) {
   3000         unallocated_encoding(s);
   3001         return;
   3002     }
   3003 
   3004     if (is_vector) {
   3005         size = 2 + opc;
   3006     } else if (opc == 1 && !is_load) {
   3007         /* STGP */
   3008         if (!dc_isar_feature(aa64_mte_insn_reg, s) || index == 0) {
   3009             unallocated_encoding(s);
   3010             return;
   3011         }
   3012         size = 3;
   3013         set_tag = true;
   3014     } else {
   3015         size = 2 + extract32(opc, 1, 1);
   3016         is_signed = extract32(opc, 0, 1);
   3017         if (!is_load && is_signed) {
   3018             unallocated_encoding(s);
   3019             return;
   3020         }
   3021     }
   3022 
   3023     switch (index) {
   3024     case 1: /* post-index */
   3025         postindex = true;
   3026         wback = true;
   3027         break;
   3028     case 0:
   3029         /* signed offset with "non-temporal" hint. Since we don't emulate
   3030          * caches we don't care about hints to the cache system about
   3031          * data access patterns, and handle this identically to plain
   3032          * signed offset.
   3033          */
   3034         if (is_signed) {
   3035             /* There is no non-temporal-hint version of LDPSW */
   3036             unallocated_encoding(s);
   3037             return;
   3038         }
   3039         postindex = false;
   3040         break;
   3041     case 2: /* signed offset, rn not updated */
   3042         postindex = false;
   3043         break;
   3044     case 3: /* pre-index */
   3045         postindex = false;
   3046         wback = true;
   3047         break;
   3048     }
   3049 
   3050     if (is_vector && !fp_access_check(s)) {
   3051         return;
   3052     }
   3053 
   3054     offset <<= (set_tag ? LOG2_TAG_GRANULE : size);
   3055 
   3056     if (rn == 31) {
   3057         gen_check_sp_alignment(s);
   3058     }
   3059 
   3060     dirty_addr = read_cpu_reg_sp(s, rn, 1);
   3061     if (!postindex) {
   3062         tcg_gen_addi_i64(dirty_addr, dirty_addr, offset);
   3063     }
   3064 
   3065     if (set_tag) {
   3066         if (!s->ata) {
   3067             /*
   3068              * TODO: We could rely on the stores below, at least for
   3069              * system mode, if we arrange to add MO_ALIGN_16.
   3070              */
   3071             gen_helper_stg_stub(cpu_env, dirty_addr);
   3072         } else if (tb_cflags(s->base.tb) & CF_PARALLEL) {
   3073             gen_helper_stg_parallel(cpu_env, dirty_addr, dirty_addr);
   3074         } else {
   3075             gen_helper_stg(cpu_env, dirty_addr, dirty_addr);
   3076         }
   3077     }
   3078 
   3079     clean_addr = gen_mte_checkN(s, dirty_addr, !is_load,
   3080                                 (wback || rn != 31) && !set_tag, 2 << size);
   3081 
   3082     if (is_vector) {
   3083         if (is_load) {
   3084             do_fp_ld(s, rt, clean_addr, size);
   3085         } else {
   3086             do_fp_st(s, rt, clean_addr, size);
   3087         }
   3088         tcg_gen_addi_i64(clean_addr, clean_addr, 1 << size);
   3089         if (is_load) {
   3090             do_fp_ld(s, rt2, clean_addr, size);
   3091         } else {
   3092             do_fp_st(s, rt2, clean_addr, size);
   3093         }
   3094     } else {
   3095         TCGv_i64 tcg_rt = cpu_reg(s, rt);
   3096         TCGv_i64 tcg_rt2 = cpu_reg(s, rt2);
   3097 
   3098         if (is_load) {
   3099             TCGv_i64 tmp = tcg_temp_new_i64();
   3100 
   3101             /* Do not modify tcg_rt before recognizing any exception
   3102              * from the second load.
   3103              */
   3104             do_gpr_ld(s, tmp, clean_addr, size + is_signed * MO_SIGN,
   3105                       false, false, 0, false, false);
   3106             tcg_gen_addi_i64(clean_addr, clean_addr, 1 << size);
   3107             do_gpr_ld(s, tcg_rt2, clean_addr, size + is_signed * MO_SIGN,
   3108                       false, false, 0, false, false);
   3109 
   3110             tcg_gen_mov_i64(tcg_rt, tmp);
   3111             tcg_temp_free_i64(tmp);
   3112         } else {
   3113             do_gpr_st(s, tcg_rt, clean_addr, size,
   3114                       false, 0, false, false);
   3115             tcg_gen_addi_i64(clean_addr, clean_addr, 1 << size);
   3116             do_gpr_st(s, tcg_rt2, clean_addr, size,
   3117                       false, 0, false, false);
   3118         }
   3119     }
   3120 
   3121     if (wback) {
   3122         if (postindex) {
   3123             tcg_gen_addi_i64(dirty_addr, dirty_addr, offset);
   3124         }
   3125         tcg_gen_mov_i64(cpu_reg_sp(s, rn), dirty_addr);
   3126     }
   3127 }
   3128 
   3129 /*
   3130  * Load/store (immediate post-indexed)
   3131  * Load/store (immediate pre-indexed)
   3132  * Load/store (unscaled immediate)
   3133  *
   3134  * 31 30 29   27  26 25 24 23 22 21  20    12 11 10 9    5 4    0
   3135  * +----+-------+---+-----+-----+---+--------+-----+------+------+
   3136  * |size| 1 1 1 | V | 0 0 | opc | 0 |  imm9  | idx |  Rn  |  Rt  |
   3137  * +----+-------+---+-----+-----+---+--------+-----+------+------+
   3138  *
   3139  * idx = 01 -> post-indexed, 11 pre-indexed, 00 unscaled imm. (no writeback)
   3140          10 -> unprivileged
   3141  * V = 0 -> non-vector
   3142  * size: 00 -> 8 bit, 01 -> 16 bit, 10 -> 32 bit, 11 -> 64bit
   3143  * opc: 00 -> store, 01 -> loadu, 10 -> loads 64, 11 -> loads 32
   3144  */
   3145 static void disas_ldst_reg_imm9(DisasContext *s, uint32_t insn,
   3146                                 int opc,
   3147                                 int size,
   3148                                 int rt,
   3149                                 bool is_vector)
   3150 {
   3151     int rn = extract32(insn, 5, 5);
   3152     int imm9 = sextract32(insn, 12, 9);
   3153     int idx = extract32(insn, 10, 2);
   3154     bool is_signed = false;
   3155     bool is_store = false;
   3156     bool is_extended = false;
   3157     bool is_unpriv = (idx == 2);
   3158     bool iss_valid;
   3159     bool post_index;
   3160     bool writeback;
   3161     int memidx;
   3162 
   3163     TCGv_i64 clean_addr, dirty_addr;
   3164 
   3165     if (is_vector) {
   3166         size |= (opc & 2) << 1;
   3167         if (size > 4 || is_unpriv) {
   3168             unallocated_encoding(s);
   3169             return;
   3170         }
   3171         is_store = ((opc & 1) == 0);
   3172         if (!fp_access_check(s)) {
   3173             return;
   3174         }
   3175     } else {
   3176         if (size == 3 && opc == 2) {
   3177             /* PRFM - prefetch */
   3178             if (idx != 0) {
   3179                 unallocated_encoding(s);
   3180                 return;
   3181             }
   3182             return;
   3183         }
   3184         if (opc == 3 && size > 1) {
   3185             unallocated_encoding(s);
   3186             return;
   3187         }
   3188         is_store = (opc == 0);
   3189         is_signed = extract32(opc, 1, 1);
   3190         is_extended = (size < 3) && extract32(opc, 0, 1);
   3191     }
   3192 
   3193     switch (idx) {
   3194     case 0:
   3195     case 2:
   3196         post_index = false;
   3197         writeback = false;
   3198         break;
   3199     case 1:
   3200         post_index = true;
   3201         writeback = true;
   3202         break;
   3203     case 3:
   3204         post_index = false;
   3205         writeback = true;
   3206         break;
   3207     default:
   3208         g_assert_not_reached();
   3209     }
   3210 
   3211     iss_valid = !is_vector && !writeback;
   3212 
   3213     if (rn == 31) {
   3214         gen_check_sp_alignment(s);
   3215     }
   3216 
   3217     dirty_addr = read_cpu_reg_sp(s, rn, 1);
   3218     if (!post_index) {
   3219         tcg_gen_addi_i64(dirty_addr, dirty_addr, imm9);
   3220     }
   3221 
   3222     memidx = is_unpriv ? get_a64_user_mem_index(s) : get_mem_index(s);
   3223     clean_addr = gen_mte_check1_mmuidx(s, dirty_addr, is_store,
   3224                                        writeback || rn != 31,
   3225                                        size, is_unpriv, memidx);
   3226 
   3227     if (is_vector) {
   3228         if (is_store) {
   3229             do_fp_st(s, rt, clean_addr, size);
   3230         } else {
   3231             do_fp_ld(s, rt, clean_addr, size);
   3232         }
   3233     } else {
   3234         TCGv_i64 tcg_rt = cpu_reg(s, rt);
   3235         bool iss_sf = disas_ldst_compute_iss_sf(size, is_signed, opc);
   3236 
   3237         if (is_store) {
   3238             do_gpr_st_memidx(s, tcg_rt, clean_addr, size, memidx,
   3239                              iss_valid, rt, iss_sf, false);
   3240         } else {
   3241             do_gpr_ld_memidx(s, tcg_rt, clean_addr, size + is_signed * MO_SIGN,
   3242                              is_extended, memidx,
   3243                              iss_valid, rt, iss_sf, false);
   3244         }
   3245     }
   3246 
   3247     if (writeback) {
   3248         TCGv_i64 tcg_rn = cpu_reg_sp(s, rn);
   3249         if (post_index) {
   3250             tcg_gen_addi_i64(dirty_addr, dirty_addr, imm9);
   3251         }
   3252         tcg_gen_mov_i64(tcg_rn, dirty_addr);
   3253     }
   3254 }
   3255 
   3256 /*
   3257  * Load/store (register offset)
   3258  *
   3259  * 31 30 29   27  26 25 24 23 22 21  20  16 15 13 12 11 10 9  5 4  0
   3260  * +----+-------+---+-----+-----+---+------+-----+--+-----+----+----+
   3261  * |size| 1 1 1 | V | 0 0 | opc | 1 |  Rm  | opt | S| 1 0 | Rn | Rt |
   3262  * +----+-------+---+-----+-----+---+------+-----+--+-----+----+----+
   3263  *
   3264  * For non-vector:
   3265  *   size: 00-> byte, 01 -> 16 bit, 10 -> 32bit, 11 -> 64bit
   3266  *   opc: 00 -> store, 01 -> loadu, 10 -> loads 64, 11 -> loads 32
   3267  * For vector:
   3268  *   size is opc<1>:size<1:0> so 100 -> 128 bit; 110 and 111 unallocated
   3269  *   opc<0>: 0 -> store, 1 -> load
   3270  * V: 1 -> vector/simd
   3271  * opt: extend encoding (see DecodeRegExtend)
   3272  * S: if S=1 then scale (essentially index by sizeof(size))
   3273  * Rt: register to transfer into/out of
   3274  * Rn: address register or SP for base
   3275  * Rm: offset register or ZR for offset
   3276  */
   3277 static void disas_ldst_reg_roffset(DisasContext *s, uint32_t insn,
   3278                                    int opc,
   3279                                    int size,
   3280                                    int rt,
   3281                                    bool is_vector)
   3282 {
   3283     int rn = extract32(insn, 5, 5);
   3284     int shift = extract32(insn, 12, 1);
   3285     int rm = extract32(insn, 16, 5);
   3286     int opt = extract32(insn, 13, 3);
   3287     bool is_signed = false;
   3288     bool is_store = false;
   3289     bool is_extended = false;
   3290 
   3291     TCGv_i64 tcg_rm, clean_addr, dirty_addr;
   3292 
   3293     if (extract32(opt, 1, 1) == 0) {
   3294         unallocated_encoding(s);
   3295         return;
   3296     }
   3297 
   3298     if (is_vector) {
   3299         size |= (opc & 2) << 1;
   3300         if (size > 4) {
   3301             unallocated_encoding(s);
   3302             return;
   3303         }
   3304         is_store = !extract32(opc, 0, 1);
   3305         if (!fp_access_check(s)) {
   3306             return;
   3307         }
   3308     } else {
   3309         if (size == 3 && opc == 2) {
   3310             /* PRFM - prefetch */
   3311             return;
   3312         }
   3313         if (opc == 3 && size > 1) {
   3314             unallocated_encoding(s);
   3315             return;
   3316         }
   3317         is_store = (opc == 0);
   3318         is_signed = extract32(opc, 1, 1);
   3319         is_extended = (size < 3) && extract32(opc, 0, 1);
   3320     }
   3321 
   3322     if (rn == 31) {
   3323         gen_check_sp_alignment(s);
   3324     }
   3325     dirty_addr = read_cpu_reg_sp(s, rn, 1);
   3326 
   3327     tcg_rm = read_cpu_reg(s, rm, 1);
   3328     ext_and_shift_reg(tcg_rm, tcg_rm, opt, shift ? size : 0);
   3329 
   3330     tcg_gen_add_i64(dirty_addr, dirty_addr, tcg_rm);
   3331     clean_addr = gen_mte_check1(s, dirty_addr, is_store, true, size);
   3332 
   3333     if (is_vector) {
   3334         if (is_store) {
   3335             do_fp_st(s, rt, clean_addr, size);
   3336         } else {
   3337             do_fp_ld(s, rt, clean_addr, size);
   3338         }
   3339     } else {
   3340         TCGv_i64 tcg_rt = cpu_reg(s, rt);
   3341         bool iss_sf = disas_ldst_compute_iss_sf(size, is_signed, opc);
   3342         if (is_store) {
   3343             do_gpr_st(s, tcg_rt, clean_addr, size,
   3344                       true, rt, iss_sf, false);
   3345         } else {
   3346             do_gpr_ld(s, tcg_rt, clean_addr, size + is_signed * MO_SIGN,
   3347                       is_extended, true, rt, iss_sf, false);
   3348         }
   3349     }
   3350 }
   3351 
   3352 /*
   3353  * Load/store (unsigned immediate)
   3354  *
   3355  * 31 30 29   27  26 25 24 23 22 21        10 9     5
   3356  * +----+-------+---+-----+-----+------------+-------+------+
   3357  * |size| 1 1 1 | V | 0 1 | opc |   imm12    |  Rn   |  Rt  |
   3358  * +----+-------+---+-----+-----+------------+-------+------+
   3359  *
   3360  * For non-vector:
   3361  *   size: 00-> byte, 01 -> 16 bit, 10 -> 32bit, 11 -> 64bit
   3362  *   opc: 00 -> store, 01 -> loadu, 10 -> loads 64, 11 -> loads 32
   3363  * For vector:
   3364  *   size is opc<1>:size<1:0> so 100 -> 128 bit; 110 and 111 unallocated
   3365  *   opc<0>: 0 -> store, 1 -> load
   3366  * Rn: base address register (inc SP)
   3367  * Rt: target register
   3368  */
   3369 static void disas_ldst_reg_unsigned_imm(DisasContext *s, uint32_t insn,
   3370                                         int opc,
   3371                                         int size,
   3372                                         int rt,
   3373                                         bool is_vector)
   3374 {
   3375     int rn = extract32(insn, 5, 5);
   3376     unsigned int imm12 = extract32(insn, 10, 12);
   3377     unsigned int offset;
   3378 
   3379     TCGv_i64 clean_addr, dirty_addr;
   3380 
   3381     bool is_store;
   3382     bool is_signed = false;
   3383     bool is_extended = false;
   3384 
   3385     if (is_vector) {
   3386         size |= (opc & 2) << 1;
   3387         if (size > 4) {
   3388             unallocated_encoding(s);
   3389             return;
   3390         }
   3391         is_store = !extract32(opc, 0, 1);
   3392         if (!fp_access_check(s)) {
   3393             return;
   3394         }
   3395     } else {
   3396         if (size == 3 && opc == 2) {
   3397             /* PRFM - prefetch */
   3398             return;
   3399         }
   3400         if (opc == 3 && size > 1) {
   3401             unallocated_encoding(s);
   3402             return;
   3403         }
   3404         is_store = (opc == 0);
   3405         is_signed = extract32(opc, 1, 1);
   3406         is_extended = (size < 3) && extract32(opc, 0, 1);
   3407     }
   3408 
   3409     if (rn == 31) {
   3410         gen_check_sp_alignment(s);
   3411     }
   3412     dirty_addr = read_cpu_reg_sp(s, rn, 1);
   3413     offset = imm12 << size;
   3414     tcg_gen_addi_i64(dirty_addr, dirty_addr, offset);
   3415     clean_addr = gen_mte_check1(s, dirty_addr, is_store, rn != 31, size);
   3416 
   3417     if (is_vector) {
   3418         if (is_store) {
   3419             do_fp_st(s, rt, clean_addr, size);
   3420         } else {
   3421             do_fp_ld(s, rt, clean_addr, size);
   3422         }
   3423     } else {
   3424         TCGv_i64 tcg_rt = cpu_reg(s, rt);
   3425         bool iss_sf = disas_ldst_compute_iss_sf(size, is_signed, opc);
   3426         if (is_store) {
   3427             do_gpr_st(s, tcg_rt, clean_addr, size,
   3428                       true, rt, iss_sf, false);
   3429         } else {
   3430             do_gpr_ld(s, tcg_rt, clean_addr, size + is_signed * MO_SIGN,
   3431                       is_extended, true, rt, iss_sf, false);
   3432         }
   3433     }
   3434 }
   3435 
   3436 /* Atomic memory operations
   3437  *
   3438  *  31  30      27  26    24    22  21   16   15    12    10    5     0
   3439  * +------+-------+---+-----+-----+---+----+----+-----+-----+----+-----+
   3440  * | size | 1 1 1 | V | 0 0 | A R | 1 | Rs | o3 | opc | 0 0 | Rn |  Rt |
   3441  * +------+-------+---+-----+-----+--------+----+-----+-----+----+-----+
   3442  *
   3443  * Rt: the result register
   3444  * Rn: base address or SP
   3445  * Rs: the source register for the operation
   3446  * V: vector flag (always 0 as of v8.3)
   3447  * A: acquire flag
   3448  * R: release flag
   3449  */
   3450 static void disas_ldst_atomic(DisasContext *s, uint32_t insn,
   3451                               int size, int rt, bool is_vector)
   3452 {
   3453     int rs = extract32(insn, 16, 5);
   3454     int rn = extract32(insn, 5, 5);
   3455     int o3_opc = extract32(insn, 12, 4);
   3456     bool r = extract32(insn, 22, 1);
   3457     bool a = extract32(insn, 23, 1);
   3458     TCGv_i64 tcg_rs, tcg_rt, clean_addr;
   3459     AtomicThreeOpFn *fn = NULL;
   3460     MemOp mop = s->be_data | size | MO_ALIGN;
   3461 
   3462     if (is_vector || !dc_isar_feature(aa64_atomics, s)) {
   3463         unallocated_encoding(s);
   3464         return;
   3465     }
   3466     switch (o3_opc) {
   3467     case 000: /* LDADD */
   3468         fn = tcg_gen_atomic_fetch_add_i64;
   3469         break;
   3470     case 001: /* LDCLR */
   3471         fn = tcg_gen_atomic_fetch_and_i64;
   3472         break;
   3473     case 002: /* LDEOR */
   3474         fn = tcg_gen_atomic_fetch_xor_i64;
   3475         break;
   3476     case 003: /* LDSET */
   3477         fn = tcg_gen_atomic_fetch_or_i64;
   3478         break;
   3479     case 004: /* LDSMAX */
   3480         fn = tcg_gen_atomic_fetch_smax_i64;
   3481         mop |= MO_SIGN;
   3482         break;
   3483     case 005: /* LDSMIN */
   3484         fn = tcg_gen_atomic_fetch_smin_i64;
   3485         mop |= MO_SIGN;
   3486         break;
   3487     case 006: /* LDUMAX */
   3488         fn = tcg_gen_atomic_fetch_umax_i64;
   3489         break;
   3490     case 007: /* LDUMIN */
   3491         fn = tcg_gen_atomic_fetch_umin_i64;
   3492         break;
   3493     case 010: /* SWP */
   3494         fn = tcg_gen_atomic_xchg_i64;
   3495         break;
   3496     case 014: /* LDAPR, LDAPRH, LDAPRB */
   3497         if (!dc_isar_feature(aa64_rcpc_8_3, s) ||
   3498             rs != 31 || a != 1 || r != 0) {
   3499             unallocated_encoding(s);
   3500             return;
   3501         }
   3502         break;
   3503     default:
   3504         unallocated_encoding(s);
   3505         return;
   3506     }
   3507 
   3508     if (rn == 31) {
   3509         gen_check_sp_alignment(s);
   3510     }
   3511     clean_addr = gen_mte_check1(s, cpu_reg_sp(s, rn), false, rn != 31, size);
   3512 
   3513     if (o3_opc == 014) {
   3514         /*
   3515          * LDAPR* are a special case because they are a simple load, not a
   3516          * fetch-and-do-something op.
   3517          * The architectural consistency requirements here are weaker than
   3518          * full load-acquire (we only need "load-acquire processor consistent"),
   3519          * but we choose to implement them as full LDAQ.
   3520          */
   3521         do_gpr_ld(s, cpu_reg(s, rt), clean_addr, size, false,
   3522                   true, rt, disas_ldst_compute_iss_sf(size, false, 0), true);
   3523         tcg_gen_mb(TCG_MO_ALL | TCG_BAR_LDAQ);
   3524         return;
   3525     }
   3526 
   3527     tcg_rs = read_cpu_reg(s, rs, true);
   3528     tcg_rt = cpu_reg(s, rt);
   3529 
   3530     if (o3_opc == 1) { /* LDCLR */
   3531         tcg_gen_not_i64(tcg_rs, tcg_rs);
   3532     }
   3533 
   3534     /* The tcg atomic primitives are all full barriers.  Therefore we
   3535      * can ignore the Acquire and Release bits of this instruction.
   3536      */
   3537     fn(tcg_rt, clean_addr, tcg_rs, get_mem_index(s), mop);
   3538 
   3539     if ((mop & MO_SIGN) && size != MO_64) {
   3540         tcg_gen_ext32u_i64(tcg_rt, tcg_rt);
   3541     }
   3542 }
   3543 
   3544 /*
   3545  * PAC memory operations
   3546  *
   3547  *  31  30      27  26    24    22  21       12  11  10    5     0
   3548  * +------+-------+---+-----+-----+---+--------+---+---+----+-----+
   3549  * | size | 1 1 1 | V | 0 0 | M S | 1 |  imm9  | W | 1 | Rn |  Rt |
   3550  * +------+-------+---+-----+-----+---+--------+---+---+----+-----+
   3551  *
   3552  * Rt: the result register
   3553  * Rn: base address or SP
   3554  * V: vector flag (always 0 as of v8.3)
   3555  * M: clear for key DA, set for key DB
   3556  * W: pre-indexing flag
   3557  * S: sign for imm9.
   3558  */
   3559 static void disas_ldst_pac(DisasContext *s, uint32_t insn,
   3560                            int size, int rt, bool is_vector)
   3561 {
   3562     int rn = extract32(insn, 5, 5);
   3563     bool is_wback = extract32(insn, 11, 1);
   3564     bool use_key_a = !extract32(insn, 23, 1);
   3565     int offset;
   3566     TCGv_i64 clean_addr, dirty_addr, tcg_rt;
   3567 
   3568     if (size != 3 || is_vector || !dc_isar_feature(aa64_pauth, s)) {
   3569         unallocated_encoding(s);
   3570         return;
   3571     }
   3572 
   3573     if (rn == 31) {
   3574         gen_check_sp_alignment(s);
   3575     }
   3576     dirty_addr = read_cpu_reg_sp(s, rn, 1);
   3577 
   3578     if (s->pauth_active) {
   3579         if (use_key_a) {
   3580             gen_helper_autda(dirty_addr, cpu_env, dirty_addr,
   3581                              new_tmp_a64_zero(s));
   3582         } else {
   3583             gen_helper_autdb(dirty_addr, cpu_env, dirty_addr,
   3584                              new_tmp_a64_zero(s));
   3585         }
   3586     }
   3587 
   3588     /* Form the 10-bit signed, scaled offset.  */
   3589     offset = (extract32(insn, 22, 1) << 9) | extract32(insn, 12, 9);
   3590     offset = sextract32(offset << size, 0, 10 + size);
   3591     tcg_gen_addi_i64(dirty_addr, dirty_addr, offset);
   3592 
   3593     /* Note that "clean" and "dirty" here refer to TBI not PAC.  */
   3594     clean_addr = gen_mte_check1(s, dirty_addr, false,
   3595                                 is_wback || rn != 31, size);
   3596 
   3597     tcg_rt = cpu_reg(s, rt);
   3598     do_gpr_ld(s, tcg_rt, clean_addr, size,
   3599               /* extend */ false, /* iss_valid */ !is_wback,
   3600               /* iss_srt */ rt, /* iss_sf */ true, /* iss_ar */ false);
   3601 
   3602     if (is_wback) {
   3603         tcg_gen_mov_i64(cpu_reg_sp(s, rn), dirty_addr);
   3604     }
   3605 }
   3606 
   3607 /*
   3608  * LDAPR/STLR (unscaled immediate)
   3609  *
   3610  *  31  30            24    22  21       12    10    5     0
   3611  * +------+-------------+-----+---+--------+-----+----+-----+
   3612  * | size | 0 1 1 0 0 1 | opc | 0 |  imm9  | 0 0 | Rn |  Rt |
   3613  * +------+-------------+-----+---+--------+-----+----+-----+
   3614  *
   3615  * Rt: source or destination register
   3616  * Rn: base register
   3617  * imm9: unscaled immediate offset
   3618  * opc: 00: STLUR*, 01/10/11: various LDAPUR*
   3619  * size: size of load/store
   3620  */
   3621 static void disas_ldst_ldapr_stlr(DisasContext *s, uint32_t insn)
   3622 {
   3623     int rt = extract32(insn, 0, 5);
   3624     int rn = extract32(insn, 5, 5);
   3625     int offset = sextract32(insn, 12, 9);
   3626     int opc = extract32(insn, 22, 2);
   3627     int size = extract32(insn, 30, 2);
   3628     TCGv_i64 clean_addr, dirty_addr;
   3629     bool is_store = false;
   3630     bool extend = false;
   3631     bool iss_sf;
   3632     MemOp mop;
   3633 
   3634     if (!dc_isar_feature(aa64_rcpc_8_4, s)) {
   3635         unallocated_encoding(s);
   3636         return;
   3637     }
   3638 
   3639     /* TODO: ARMv8.4-LSE SCTLR.nAA */
   3640     mop = size | MO_ALIGN;
   3641 
   3642     switch (opc) {
   3643     case 0: /* STLURB */
   3644         is_store = true;
   3645         break;
   3646     case 1: /* LDAPUR* */
   3647         break;
   3648     case 2: /* LDAPURS* 64-bit variant */
   3649         if (size == 3) {
   3650             unallocated_encoding(s);
   3651             return;
   3652         }
   3653         mop |= MO_SIGN;
   3654         break;
   3655     case 3: /* LDAPURS* 32-bit variant */
   3656         if (size > 1) {
   3657             unallocated_encoding(s);
   3658             return;
   3659         }
   3660         mop |= MO_SIGN;
   3661         extend = true; /* zero-extend 32->64 after signed load */
   3662         break;
   3663     default:
   3664         g_assert_not_reached();
   3665     }
   3666 
   3667     iss_sf = disas_ldst_compute_iss_sf(size, (mop & MO_SIGN) != 0, opc);
   3668 
   3669     if (rn == 31) {
   3670         gen_check_sp_alignment(s);
   3671     }
   3672 
   3673     dirty_addr = read_cpu_reg_sp(s, rn, 1);
   3674     tcg_gen_addi_i64(dirty_addr, dirty_addr, offset);
   3675     clean_addr = clean_data_tbi(s, dirty_addr);
   3676 
   3677     if (is_store) {
   3678         /* Store-Release semantics */
   3679         tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL);
   3680         do_gpr_st(s, cpu_reg(s, rt), clean_addr, mop, true, rt, iss_sf, true);
   3681     } else {
   3682         /*
   3683          * Load-AcquirePC semantics; we implement as the slightly more
   3684          * restrictive Load-Acquire.
   3685          */
   3686         do_gpr_ld(s, cpu_reg(s, rt), clean_addr, mop,
   3687                   extend, true, rt, iss_sf, true);
   3688         tcg_gen_mb(TCG_MO_ALL | TCG_BAR_LDAQ);
   3689     }
   3690 }
   3691 
   3692 /* Load/store register (all forms) */
   3693 static void disas_ldst_reg(DisasContext *s, uint32_t insn)
   3694 {
   3695     int rt = extract32(insn, 0, 5);
   3696     int opc = extract32(insn, 22, 2);
   3697     bool is_vector = extract32(insn, 26, 1);
   3698     int size = extract32(insn, 30, 2);
   3699 
   3700     switch (extract32(insn, 24, 2)) {
   3701     case 0:
   3702         if (extract32(insn, 21, 1) == 0) {
   3703             /* Load/store register (unscaled immediate)
   3704              * Load/store immediate pre/post-indexed
   3705              * Load/store register unprivileged
   3706              */
   3707             disas_ldst_reg_imm9(s, insn, opc, size, rt, is_vector);
   3708             return;
   3709         }
   3710         switch (extract32(insn, 10, 2)) {
   3711         case 0:
   3712             disas_ldst_atomic(s, insn, size, rt, is_vector);
   3713             return;
   3714         case 2:
   3715             disas_ldst_reg_roffset(s, insn, opc, size, rt, is_vector);
   3716             return;
   3717         default:
   3718             disas_ldst_pac(s, insn, size, rt, is_vector);
   3719             return;
   3720         }
   3721         break;
   3722     case 1:
   3723         disas_ldst_reg_unsigned_imm(s, insn, opc, size, rt, is_vector);
   3724         return;
   3725     }
   3726     unallocated_encoding(s);
   3727 }
   3728 
   3729 /* AdvSIMD load/store multiple structures
   3730  *
   3731  *  31  30  29           23 22  21         16 15    12 11  10 9    5 4    0
   3732  * +---+---+---------------+---+-------------+--------+------+------+------+
   3733  * | 0 | Q | 0 0 1 1 0 0 0 | L | 0 0 0 0 0 0 | opcode | size |  Rn  |  Rt  |
   3734  * +---+---+---------------+---+-------------+--------+------+------+------+
   3735  *
   3736  * AdvSIMD load/store multiple structures (post-indexed)
   3737  *
   3738  *  31  30  29           23 22  21  20     16 15    12 11  10 9    5 4    0
   3739  * +---+---+---------------+---+---+---------+--------+------+------+------+
   3740  * | 0 | Q | 0 0 1 1 0 0 1 | L | 0 |   Rm    | opcode | size |  Rn  |  Rt  |
   3741  * +---+---+---------------+---+---+---------+--------+------+------+------+
   3742  *
   3743  * Rt: first (or only) SIMD&FP register to be transferred
   3744  * Rn: base address or SP
   3745  * Rm (post-index only): post-index register (when !31) or size dependent #imm
   3746  */
   3747 static void disas_ldst_multiple_struct(DisasContext *s, uint32_t insn)
   3748 {
   3749     int rt = extract32(insn, 0, 5);
   3750     int rn = extract32(insn, 5, 5);
   3751     int rm = extract32(insn, 16, 5);
   3752     int size = extract32(insn, 10, 2);
   3753     int opcode = extract32(insn, 12, 4);
   3754     bool is_store = !extract32(insn, 22, 1);
   3755     bool is_postidx = extract32(insn, 23, 1);
   3756     bool is_q = extract32(insn, 30, 1);
   3757     TCGv_i64 clean_addr, tcg_rn, tcg_ebytes;
   3758     MemOp endian, align, mop;
   3759 
   3760     int total;    /* total bytes */
   3761     int elements; /* elements per vector */
   3762     int rpt;    /* num iterations */
   3763     int selem;  /* structure elements */
   3764     int r;
   3765 
   3766     if (extract32(insn, 31, 1) || extract32(insn, 21, 1)) {
   3767         unallocated_encoding(s);
   3768         return;
   3769     }
   3770 
   3771     if (!is_postidx && rm != 0) {
   3772         unallocated_encoding(s);
   3773         return;
   3774     }
   3775 
   3776     /* From the shared decode logic */
   3777     switch (opcode) {
   3778     case 0x0:
   3779         rpt = 1;
   3780         selem = 4;
   3781         break;
   3782     case 0x2:
   3783         rpt = 4;
   3784         selem = 1;
   3785         break;
   3786     case 0x4:
   3787         rpt = 1;
   3788         selem = 3;
   3789         break;
   3790     case 0x6:
   3791         rpt = 3;
   3792         selem = 1;
   3793         break;
   3794     case 0x7:
   3795         rpt = 1;
   3796         selem = 1;
   3797         break;
   3798     case 0x8:
   3799         rpt = 1;
   3800         selem = 2;
   3801         break;
   3802     case 0xa:
   3803         rpt = 2;
   3804         selem = 1;
   3805         break;
   3806     default:
   3807         unallocated_encoding(s);
   3808         return;
   3809     }
   3810 
   3811     if (size == 3 && !is_q && selem != 1) {
   3812         /* reserved */
   3813         unallocated_encoding(s);
   3814         return;
   3815     }
   3816 
   3817     if (!fp_access_check(s)) {
   3818         return;
   3819     }
   3820 
   3821     if (rn == 31) {
   3822         gen_check_sp_alignment(s);
   3823     }
   3824 
   3825     /* For our purposes, bytes are always little-endian.  */
   3826     endian = s->be_data;
   3827     if (size == 0) {
   3828         endian = MO_LE;
   3829     }
   3830 
   3831     total = rpt * selem * (is_q ? 16 : 8);
   3832     tcg_rn = cpu_reg_sp(s, rn);
   3833 
   3834     /*
   3835      * Issue the MTE check vs the logical repeat count, before we
   3836      * promote consecutive little-endian elements below.
   3837      */
   3838     clean_addr = gen_mte_checkN(s, tcg_rn, is_store, is_postidx || rn != 31,
   3839                                 total);
   3840 
   3841     /*
   3842      * Consecutive little-endian elements from a single register
   3843      * can be promoted to a larger little-endian operation.
   3844      */
   3845     align = MO_ALIGN;
   3846     if (selem == 1 && endian == MO_LE) {
   3847         align = pow2_align(size);
   3848         size = 3;
   3849     }
   3850     if (!s->align_mem) {
   3851         align = 0;
   3852     }
   3853     mop = endian | size | align;
   3854 
   3855     elements = (is_q ? 16 : 8) >> size;
   3856     tcg_ebytes = tcg_constant_i64(1 << size);
   3857     for (r = 0; r < rpt; r++) {
   3858         int e;
   3859         for (e = 0; e < elements; e++) {
   3860             int xs;
   3861             for (xs = 0; xs < selem; xs++) {
   3862                 int tt = (rt + r + xs) % 32;
   3863                 if (is_store) {
   3864                     do_vec_st(s, tt, e, clean_addr, mop);
   3865                 } else {
   3866                     do_vec_ld(s, tt, e, clean_addr, mop);
   3867                 }
   3868                 tcg_gen_add_i64(clean_addr, clean_addr, tcg_ebytes);
   3869             }
   3870         }
   3871     }
   3872 
   3873     if (!is_store) {
   3874         /* For non-quad operations, setting a slice of the low
   3875          * 64 bits of the register clears the high 64 bits (in
   3876          * the ARM ARM pseudocode this is implicit in the fact
   3877          * that 'rval' is a 64 bit wide variable).
   3878          * For quad operations, we might still need to zero the
   3879          * high bits of SVE.
   3880          */
   3881         for (r = 0; r < rpt * selem; r++) {
   3882             int tt = (rt + r) % 32;
   3883             clear_vec_high(s, is_q, tt);
   3884         }
   3885     }
   3886 
   3887     if (is_postidx) {
   3888         if (rm == 31) {
   3889             tcg_gen_addi_i64(tcg_rn, tcg_rn, total);
   3890         } else {
   3891             tcg_gen_add_i64(tcg_rn, tcg_rn, cpu_reg(s, rm));
   3892         }
   3893     }
   3894 }
   3895 
   3896 /* AdvSIMD load/store single structure
   3897  *
   3898  *  31  30  29           23 22 21 20       16 15 13 12  11  10 9    5 4    0
   3899  * +---+---+---------------+-----+-----------+-----+---+------+------+------+
   3900  * | 0 | Q | 0 0 1 1 0 1 0 | L R | 0 0 0 0 0 | opc | S | size |  Rn  |  Rt  |
   3901  * +---+---+---------------+-----+-----------+-----+---+------+------+------+
   3902  *
   3903  * AdvSIMD load/store single structure (post-indexed)
   3904  *
   3905  *  31  30  29           23 22 21 20       16 15 13 12  11  10 9    5 4    0
   3906  * +---+---+---------------+-----+-----------+-----+---+------+------+------+
   3907  * | 0 | Q | 0 0 1 1 0 1 1 | L R |     Rm    | opc | S | size |  Rn  |  Rt  |
   3908  * +---+---+---------------+-----+-----------+-----+---+------+------+------+
   3909  *
   3910  * Rt: first (or only) SIMD&FP register to be transferred
   3911  * Rn: base address or SP
   3912  * Rm (post-index only): post-index register (when !31) or size dependent #imm
   3913  * index = encoded in Q:S:size dependent on size
   3914  *
   3915  * lane_size = encoded in R, opc
   3916  * transfer width = encoded in opc, S, size
   3917  */
   3918 static void disas_ldst_single_struct(DisasContext *s, uint32_t insn)
   3919 {
   3920     int rt = extract32(insn, 0, 5);
   3921     int rn = extract32(insn, 5, 5);
   3922     int rm = extract32(insn, 16, 5);
   3923     int size = extract32(insn, 10, 2);
   3924     int S = extract32(insn, 12, 1);
   3925     int opc = extract32(insn, 13, 3);
   3926     int R = extract32(insn, 21, 1);
   3927     int is_load = extract32(insn, 22, 1);
   3928     int is_postidx = extract32(insn, 23, 1);
   3929     int is_q = extract32(insn, 30, 1);
   3930 
   3931     int scale = extract32(opc, 1, 2);
   3932     int selem = (extract32(opc, 0, 1) << 1 | R) + 1;
   3933     bool replicate = false;
   3934     int index = is_q << 3 | S << 2 | size;
   3935     int xs, total;
   3936     TCGv_i64 clean_addr, tcg_rn, tcg_ebytes;
   3937     MemOp mop;
   3938 
   3939     if (extract32(insn, 31, 1)) {
   3940         unallocated_encoding(s);
   3941         return;
   3942     }
   3943     if (!is_postidx && rm != 0) {
   3944         unallocated_encoding(s);
   3945         return;
   3946     }
   3947 
   3948     switch (scale) {
   3949     case 3:
   3950         if (!is_load || S) {
   3951             unallocated_encoding(s);
   3952             return;
   3953         }
   3954         scale = size;
   3955         replicate = true;
   3956         break;
   3957     case 0:
   3958         break;
   3959     case 1:
   3960         if (extract32(size, 0, 1)) {
   3961             unallocated_encoding(s);
   3962             return;
   3963         }
   3964         index >>= 1;
   3965         break;
   3966     case 2:
   3967         if (extract32(size, 1, 1)) {
   3968             unallocated_encoding(s);
   3969             return;
   3970         }
   3971         if (!extract32(size, 0, 1)) {
   3972             index >>= 2;
   3973         } else {
   3974             if (S) {
   3975                 unallocated_encoding(s);
   3976                 return;
   3977             }
   3978             index >>= 3;
   3979             scale = 3;
   3980         }
   3981         break;
   3982     default:
   3983         g_assert_not_reached();
   3984     }
   3985 
   3986     if (!fp_access_check(s)) {
   3987         return;
   3988     }
   3989 
   3990     if (rn == 31) {
   3991         gen_check_sp_alignment(s);
   3992     }
   3993 
   3994     total = selem << scale;
   3995     tcg_rn = cpu_reg_sp(s, rn);
   3996 
   3997     clean_addr = gen_mte_checkN(s, tcg_rn, !is_load, is_postidx || rn != 31,
   3998                                 total);
   3999     mop = finalize_memop(s, scale);
   4000 
   4001     tcg_ebytes = tcg_constant_i64(1 << scale);
   4002     for (xs = 0; xs < selem; xs++) {
   4003         if (replicate) {
   4004             /* Load and replicate to all elements */
   4005             TCGv_i64 tcg_tmp = tcg_temp_new_i64();
   4006 
   4007             tcg_gen_qemu_ld_i64(tcg_tmp, clean_addr, get_mem_index(s), mop);
   4008             tcg_gen_gvec_dup_i64(scale, vec_full_reg_offset(s, rt),
   4009                                  (is_q + 1) * 8, vec_full_reg_size(s),
   4010                                  tcg_tmp);
   4011             tcg_temp_free_i64(tcg_tmp);
   4012         } else {
   4013             /* Load/store one element per register */
   4014             if (is_load) {
   4015                 do_vec_ld(s, rt, index, clean_addr, mop);
   4016             } else {
   4017                 do_vec_st(s, rt, index, clean_addr, mop);
   4018             }
   4019         }
   4020         tcg_gen_add_i64(clean_addr, clean_addr, tcg_ebytes);
   4021         rt = (rt + 1) % 32;
   4022     }
   4023 
   4024     if (is_postidx) {
   4025         if (rm == 31) {
   4026             tcg_gen_addi_i64(tcg_rn, tcg_rn, total);
   4027         } else {
   4028             tcg_gen_add_i64(tcg_rn, tcg_rn, cpu_reg(s, rm));
   4029         }
   4030     }
   4031 }
   4032 
   4033 /*
   4034  * Load/Store memory tags
   4035  *
   4036  *  31 30 29         24     22  21     12    10      5      0
   4037  * +-----+-------------+-----+---+------+-----+------+------+
   4038  * | 1 1 | 0 1 1 0 0 1 | op1 | 1 | imm9 | op2 |  Rn  |  Rt  |
   4039  * +-----+-------------+-----+---+------+-----+------+------+
   4040  */
   4041 static void disas_ldst_tag(DisasContext *s, uint32_t insn)
   4042 {
   4043     int rt = extract32(insn, 0, 5);
   4044     int rn = extract32(insn, 5, 5);
   4045     uint64_t offset = sextract64(insn, 12, 9) << LOG2_TAG_GRANULE;
   4046     int op2 = extract32(insn, 10, 2);
   4047     int op1 = extract32(insn, 22, 2);
   4048     bool is_load = false, is_pair = false, is_zero = false, is_mult = false;
   4049     int index = 0;
   4050     TCGv_i64 addr, clean_addr, tcg_rt;
   4051 
   4052     /* We checked insn bits [29:24,21] in the caller.  */
   4053     if (extract32(insn, 30, 2) != 3) {
   4054         goto do_unallocated;
   4055     }
   4056 
   4057     /*
   4058      * @index is a tri-state variable which has 3 states:
   4059      * < 0 : post-index, writeback
   4060      * = 0 : signed offset
   4061      * > 0 : pre-index, writeback
   4062      */
   4063     switch (op1) {
   4064     case 0:
   4065         if (op2 != 0) {
   4066             /* STG */
   4067             index = op2 - 2;
   4068         } else {
   4069             /* STZGM */
   4070             if (s->current_el == 0 || offset != 0) {
   4071                 goto do_unallocated;
   4072             }
   4073             is_mult = is_zero = true;
   4074         }
   4075         break;
   4076     case 1:
   4077         if (op2 != 0) {
   4078             /* STZG */
   4079             is_zero = true;
   4080             index = op2 - 2;
   4081         } else {
   4082             /* LDG */
   4083             is_load = true;
   4084         }
   4085         break;
   4086     case 2:
   4087         if (op2 != 0) {
   4088             /* ST2G */
   4089             is_pair = true;
   4090             index = op2 - 2;
   4091         } else {
   4092             /* STGM */
   4093             if (s->current_el == 0 || offset != 0) {
   4094                 goto do_unallocated;
   4095             }
   4096             is_mult = true;
   4097         }
   4098         break;
   4099     case 3:
   4100         if (op2 != 0) {
   4101             /* STZ2G */
   4102             is_pair = is_zero = true;
   4103             index = op2 - 2;
   4104         } else {
   4105             /* LDGM */
   4106             if (s->current_el == 0 || offset != 0) {
   4107                 goto do_unallocated;
   4108             }
   4109             is_mult = is_load = true;
   4110         }
   4111         break;
   4112 
   4113     default:
   4114     do_unallocated:
   4115         unallocated_encoding(s);
   4116         return;
   4117     }
   4118 
   4119     if (is_mult
   4120         ? !dc_isar_feature(aa64_mte, s)
   4121         : !dc_isar_feature(aa64_mte_insn_reg, s)) {
   4122         goto do_unallocated;
   4123     }
   4124 
   4125     if (rn == 31) {
   4126         gen_check_sp_alignment(s);
   4127     }
   4128 
   4129     addr = read_cpu_reg_sp(s, rn, true);
   4130     if (index >= 0) {
   4131         /* pre-index or signed offset */
   4132         tcg_gen_addi_i64(addr, addr, offset);
   4133     }
   4134 
   4135     if (is_mult) {
   4136         tcg_rt = cpu_reg(s, rt);
   4137 
   4138         if (is_zero) {
   4139             int size = 4 << s->dcz_blocksize;
   4140 
   4141             if (s->ata) {
   4142                 gen_helper_stzgm_tags(cpu_env, addr, tcg_rt);
   4143             }
   4144             /*
   4145              * The non-tags portion of STZGM is mostly like DC_ZVA,
   4146              * except the alignment happens before the access.
   4147              */
   4148             clean_addr = clean_data_tbi(s, addr);
   4149             tcg_gen_andi_i64(clean_addr, clean_addr, -size);
   4150             gen_helper_dc_zva(cpu_env, clean_addr);
   4151         } else if (s->ata) {
   4152             if (is_load) {
   4153                 gen_helper_ldgm(tcg_rt, cpu_env, addr);
   4154             } else {
   4155                 gen_helper_stgm(cpu_env, addr, tcg_rt);
   4156             }
   4157         } else {
   4158             MMUAccessType acc = is_load ? MMU_DATA_LOAD : MMU_DATA_STORE;
   4159             int size = 4 << GMID_EL1_BS;
   4160 
   4161             clean_addr = clean_data_tbi(s, addr);
   4162             tcg_gen_andi_i64(clean_addr, clean_addr, -size);
   4163             gen_probe_access(s, clean_addr, acc, size);
   4164 
   4165             if (is_load) {
   4166                 /* The result tags are zeros.  */
   4167                 tcg_gen_movi_i64(tcg_rt, 0);
   4168             }
   4169         }
   4170         return;
   4171     }
   4172 
   4173     if (is_load) {
   4174         tcg_gen_andi_i64(addr, addr, -TAG_GRANULE);
   4175         tcg_rt = cpu_reg(s, rt);
   4176         if (s->ata) {
   4177             gen_helper_ldg(tcg_rt, cpu_env, addr, tcg_rt);
   4178         } else {
   4179             clean_addr = clean_data_tbi(s, addr);
   4180             gen_probe_access(s, clean_addr, MMU_DATA_LOAD, MO_8);
   4181             gen_address_with_allocation_tag0(tcg_rt, addr);
   4182         }
   4183     } else {
   4184         tcg_rt = cpu_reg_sp(s, rt);
   4185         if (!s->ata) {
   4186             /*
   4187              * For STG and ST2G, we need to check alignment and probe memory.
   4188              * TODO: For STZG and STZ2G, we could rely on the stores below,
   4189              * at least for system mode; user-only won't enforce alignment.
   4190              */
   4191             if (is_pair) {
   4192                 gen_helper_st2g_stub(cpu_env, addr);
   4193             } else {
   4194                 gen_helper_stg_stub(cpu_env, addr);
   4195             }
   4196         } else if (tb_cflags(s->base.tb) & CF_PARALLEL) {
   4197             if (is_pair) {
   4198                 gen_helper_st2g_parallel(cpu_env, addr, tcg_rt);
   4199             } else {
   4200                 gen_helper_stg_parallel(cpu_env, addr, tcg_rt);
   4201             }
   4202         } else {
   4203             if (is_pair) {
   4204                 gen_helper_st2g(cpu_env, addr, tcg_rt);
   4205             } else {
   4206                 gen_helper_stg(cpu_env, addr, tcg_rt);
   4207             }
   4208         }
   4209     }
   4210 
   4211     if (is_zero) {
   4212         TCGv_i64 clean_addr = clean_data_tbi(s, addr);
   4213         TCGv_i64 tcg_zero = tcg_constant_i64(0);
   4214         int mem_index = get_mem_index(s);
   4215         int i, n = (1 + is_pair) << LOG2_TAG_GRANULE;
   4216 
   4217         tcg_gen_qemu_st_i64(tcg_zero, clean_addr, mem_index,
   4218                             MO_UQ | MO_ALIGN_16);
   4219         for (i = 8; i < n; i += 8) {
   4220             tcg_gen_addi_i64(clean_addr, clean_addr, 8);
   4221             tcg_gen_qemu_st_i64(tcg_zero, clean_addr, mem_index, MO_UQ);
   4222         }
   4223     }
   4224 
   4225     if (index != 0) {
   4226         /* pre-index or post-index */
   4227         if (index < 0) {
   4228             /* post-index */
   4229             tcg_gen_addi_i64(addr, addr, offset);
   4230         }
   4231         tcg_gen_mov_i64(cpu_reg_sp(s, rn), addr);
   4232     }
   4233 }
   4234 
   4235 /* Loads and stores */
   4236 static void disas_ldst(DisasContext *s, uint32_t insn)
   4237 {
   4238     switch (extract32(insn, 24, 6)) {
   4239     case 0x08: /* Load/store exclusive */
   4240         disas_ldst_excl(s, insn);
   4241         break;
   4242     case 0x18: case 0x1c: /* Load register (literal) */
   4243         disas_ld_lit(s, insn);
   4244         break;
   4245     case 0x28: case 0x29:
   4246     case 0x2c: case 0x2d: /* Load/store pair (all forms) */
   4247         disas_ldst_pair(s, insn);
   4248         break;
   4249     case 0x38: case 0x39:
   4250     case 0x3c: case 0x3d: /* Load/store register (all forms) */
   4251         disas_ldst_reg(s, insn);
   4252         break;
   4253     case 0x0c: /* AdvSIMD load/store multiple structures */
   4254         disas_ldst_multiple_struct(s, insn);
   4255         break;
   4256     case 0x0d: /* AdvSIMD load/store single structure */
   4257         disas_ldst_single_struct(s, insn);
   4258         break;
   4259     case 0x19:
   4260         if (extract32(insn, 21, 1) != 0) {
   4261             disas_ldst_tag(s, insn);
   4262         } else if (extract32(insn, 10, 2) == 0) {
   4263             disas_ldst_ldapr_stlr(s, insn);
   4264         } else {
   4265             unallocated_encoding(s);
   4266         }
   4267         break;
   4268     default:
   4269         unallocated_encoding(s);
   4270         break;
   4271     }
   4272 }
   4273 
   4274 /* PC-rel. addressing
   4275  *   31  30   29 28       24 23                5 4    0
   4276  * +----+-------+-----------+-------------------+------+
   4277  * | op | immlo | 1 0 0 0 0 |       immhi       |  Rd  |
   4278  * +----+-------+-----------+-------------------+------+
   4279  */
   4280 static void disas_pc_rel_adr(DisasContext *s, uint32_t insn)
   4281 {
   4282     unsigned int page, rd;
   4283     int64_t offset;
   4284 
   4285     page = extract32(insn, 31, 1);
   4286     /* SignExtend(immhi:immlo) -> offset */
   4287     offset = sextract64(insn, 5, 19);
   4288     offset = offset << 2 | extract32(insn, 29, 2);
   4289     rd = extract32(insn, 0, 5);
   4290 
   4291     if (page) {
   4292         /* ADRP (page based) */
   4293         offset <<= 12;
   4294         /* The page offset is ok for TARGET_TB_PCREL. */
   4295         offset -= s->pc_curr & 0xfff;
   4296     }
   4297 
   4298     gen_pc_plus_diff(s, cpu_reg(s, rd), offset);
   4299 }
   4300 
   4301 /*
   4302  * Add/subtract (immediate)
   4303  *
   4304  *  31 30 29 28         23 22 21         10 9   5 4   0
   4305  * +--+--+--+-------------+--+-------------+-----+-----+
   4306  * |sf|op| S| 1 0 0 0 1 0 |sh|    imm12    |  Rn | Rd  |
   4307  * +--+--+--+-------------+--+-------------+-----+-----+
   4308  *
   4309  *    sf: 0 -> 32bit, 1 -> 64bit
   4310  *    op: 0 -> add  , 1 -> sub
   4311  *     S: 1 -> set flags
   4312  *    sh: 1 -> LSL imm by 12
   4313  */
   4314 static void disas_add_sub_imm(DisasContext *s, uint32_t insn)
   4315 {
   4316     int rd = extract32(insn, 0, 5);
   4317     int rn = extract32(insn, 5, 5);
   4318     uint64_t imm = extract32(insn, 10, 12);
   4319     bool shift = extract32(insn, 22, 1);
   4320     bool setflags = extract32(insn, 29, 1);
   4321     bool sub_op = extract32(insn, 30, 1);
   4322     bool is_64bit = extract32(insn, 31, 1);
   4323 
   4324     TCGv_i64 tcg_rn = cpu_reg_sp(s, rn);
   4325     TCGv_i64 tcg_rd = setflags ? cpu_reg(s, rd) : cpu_reg_sp(s, rd);
   4326     TCGv_i64 tcg_result;
   4327 
   4328     if (shift) {
   4329         imm <<= 12;
   4330     }
   4331 
   4332     tcg_result = tcg_temp_new_i64();
   4333     if (!setflags) {
   4334         if (sub_op) {
   4335             tcg_gen_subi_i64(tcg_result, tcg_rn, imm);
   4336         } else {
   4337             tcg_gen_addi_i64(tcg_result, tcg_rn, imm);
   4338         }
   4339     } else {
   4340         TCGv_i64 tcg_imm = tcg_constant_i64(imm);
   4341         if (sub_op) {
   4342             gen_sub_CC(is_64bit, tcg_result, tcg_rn, tcg_imm);
   4343         } else {
   4344             gen_add_CC(is_64bit, tcg_result, tcg_rn, tcg_imm);
   4345         }
   4346     }
   4347 
   4348     if (is_64bit) {
   4349         tcg_gen_mov_i64(tcg_rd, tcg_result);
   4350     } else {
   4351         tcg_gen_ext32u_i64(tcg_rd, tcg_result);
   4352     }
   4353 
   4354     tcg_temp_free_i64(tcg_result);
   4355 }
   4356 
   4357 /*
   4358  * Add/subtract (immediate, with tags)
   4359  *
   4360  *  31 30 29 28         23 22 21     16 14      10 9   5 4   0
   4361  * +--+--+--+-------------+--+---------+--+-------+-----+-----+
   4362  * |sf|op| S| 1 0 0 0 1 1 |o2|  uimm6  |o3| uimm4 |  Rn | Rd  |
   4363  * +--+--+--+-------------+--+---------+--+-------+-----+-----+
   4364  *
   4365  *    op: 0 -> add, 1 -> sub
   4366  */
   4367 static void disas_add_sub_imm_with_tags(DisasContext *s, uint32_t insn)
   4368 {
   4369     int rd = extract32(insn, 0, 5);
   4370     int rn = extract32(insn, 5, 5);
   4371     int uimm4 = extract32(insn, 10, 4);
   4372     int uimm6 = extract32(insn, 16, 6);
   4373     bool sub_op = extract32(insn, 30, 1);
   4374     TCGv_i64 tcg_rn, tcg_rd;
   4375     int imm;
   4376 
   4377     /* Test all of sf=1, S=0, o2=0, o3=0.  */
   4378     if ((insn & 0xa040c000u) != 0x80000000u ||
   4379         !dc_isar_feature(aa64_mte_insn_reg, s)) {
   4380         unallocated_encoding(s);
   4381         return;
   4382     }
   4383 
   4384     imm = uimm6 << LOG2_TAG_GRANULE;
   4385     if (sub_op) {
   4386         imm = -imm;
   4387     }
   4388 
   4389     tcg_rn = cpu_reg_sp(s, rn);
   4390     tcg_rd = cpu_reg_sp(s, rd);
   4391 
   4392     if (s->ata) {
   4393         gen_helper_addsubg(tcg_rd, cpu_env, tcg_rn,
   4394                            tcg_constant_i32(imm),
   4395                            tcg_constant_i32(uimm4));
   4396     } else {
   4397         tcg_gen_addi_i64(tcg_rd, tcg_rn, imm);
   4398         gen_address_with_allocation_tag0(tcg_rd, tcg_rd);
   4399     }
   4400 }
   4401 
   4402 /* The input should be a value in the bottom e bits (with higher
   4403  * bits zero); returns that value replicated into every element
   4404  * of size e in a 64 bit integer.
   4405  */
   4406 static uint64_t bitfield_replicate(uint64_t mask, unsigned int e)
   4407 {
   4408     assert(e != 0);
   4409     while (e < 64) {
   4410         mask |= mask << e;
   4411         e *= 2;
   4412     }
   4413     return mask;
   4414 }
   4415 
   4416 /* Return a value with the bottom len bits set (where 0 < len <= 64) */
   4417 static inline uint64_t bitmask64(unsigned int length)
   4418 {
   4419     assert(length > 0 && length <= 64);
   4420     return ~0ULL >> (64 - length);
   4421 }
   4422 
   4423 /* Simplified variant of pseudocode DecodeBitMasks() for the case where we
   4424  * only require the wmask. Returns false if the imms/immr/immn are a reserved
   4425  * value (ie should cause a guest UNDEF exception), and true if they are
   4426  * valid, in which case the decoded bit pattern is written to result.
   4427  */
   4428 bool logic_imm_decode_wmask(uint64_t *result, unsigned int immn,
   4429                             unsigned int imms, unsigned int immr)
   4430 {
   4431     uint64_t mask;
   4432     unsigned e, levels, s, r;
   4433     int len;
   4434 
   4435     assert(immn < 2 && imms < 64 && immr < 64);
   4436 
   4437     /* The bit patterns we create here are 64 bit patterns which
   4438      * are vectors of identical elements of size e = 2, 4, 8, 16, 32 or
   4439      * 64 bits each. Each element contains the same value: a run
   4440      * of between 1 and e-1 non-zero bits, rotated within the
   4441      * element by between 0 and e-1 bits.
   4442      *
   4443      * The element size and run length are encoded into immn (1 bit)
   4444      * and imms (6 bits) as follows:
   4445      * 64 bit elements: immn = 1, imms = <length of run - 1>
   4446      * 32 bit elements: immn = 0, imms = 0 : <length of run - 1>
   4447      * 16 bit elements: immn = 0, imms = 10 : <length of run - 1>
   4448      *  8 bit elements: immn = 0, imms = 110 : <length of run - 1>
   4449      *  4 bit elements: immn = 0, imms = 1110 : <length of run - 1>
   4450      *  2 bit elements: immn = 0, imms = 11110 : <length of run - 1>
   4451      * Notice that immn = 0, imms = 11111x is the only combination
   4452      * not covered by one of the above options; this is reserved.
   4453      * Further, <length of run - 1> all-ones is a reserved pattern.
   4454      *
   4455      * In all cases the rotation is by immr % e (and immr is 6 bits).
   4456      */
   4457 
   4458     /* First determine the element size */
   4459     len = 31 - clz32((immn << 6) | (~imms & 0x3f));
   4460     if (len < 1) {
   4461         /* This is the immn == 0, imms == 0x11111x case */
   4462         return false;
   4463     }
   4464     e = 1 << len;
   4465 
   4466     levels = e - 1;
   4467     s = imms & levels;
   4468     r = immr & levels;
   4469 
   4470     if (s == levels) {
   4471         /* <length of run - 1> mustn't be all-ones. */
   4472         return false;
   4473     }
   4474 
   4475     /* Create the value of one element: s+1 set bits rotated
   4476      * by r within the element (which is e bits wide)...
   4477      */
   4478     mask = bitmask64(s + 1);
   4479     if (r) {
   4480         mask = (mask >> r) | (mask << (e - r));
   4481         mask &= bitmask64(e);
   4482     }
   4483     /* ...then replicate the element over the whole 64 bit value */
   4484     mask = bitfield_replicate(mask, e);
   4485     *result = mask;
   4486     return true;
   4487 }
   4488 
   4489 /* Logical (immediate)
   4490  *   31  30 29 28         23 22  21  16 15  10 9    5 4    0
   4491  * +----+-----+-------------+---+------+------+------+------+
   4492  * | sf | opc | 1 0 0 1 0 0 | N | immr | imms |  Rn  |  Rd  |
   4493  * +----+-----+-------------+---+------+------+------+------+
   4494  */
   4495 static void disas_logic_imm(DisasContext *s, uint32_t insn)
   4496 {
   4497     unsigned int sf, opc, is_n, immr, imms, rn, rd;
   4498     TCGv_i64 tcg_rd, tcg_rn;
   4499     uint64_t wmask;
   4500     bool is_and = false;
   4501 
   4502     sf = extract32(insn, 31, 1);
   4503     opc = extract32(insn, 29, 2);
   4504     is_n = extract32(insn, 22, 1);
   4505     immr = extract32(insn, 16, 6);
   4506     imms = extract32(insn, 10, 6);
   4507     rn = extract32(insn, 5, 5);
   4508     rd = extract32(insn, 0, 5);
   4509 
   4510     if (!sf && is_n) {
   4511         unallocated_encoding(s);
   4512         return;
   4513     }
   4514 
   4515     if (opc == 0x3) { /* ANDS */
   4516         tcg_rd = cpu_reg(s, rd);
   4517     } else {
   4518         tcg_rd = cpu_reg_sp(s, rd);
   4519     }
   4520     tcg_rn = cpu_reg(s, rn);
   4521 
   4522     if (!logic_imm_decode_wmask(&wmask, is_n, imms, immr)) {
   4523         /* some immediate field values are reserved */
   4524         unallocated_encoding(s);
   4525         return;
   4526     }
   4527 
   4528     if (!sf) {
   4529         wmask &= 0xffffffff;
   4530     }
   4531 
   4532     switch (opc) {
   4533     case 0x3: /* ANDS */
   4534     case 0x0: /* AND */
   4535         tcg_gen_andi_i64(tcg_rd, tcg_rn, wmask);
   4536         is_and = true;
   4537         break;
   4538     case 0x1: /* ORR */
   4539         tcg_gen_ori_i64(tcg_rd, tcg_rn, wmask);
   4540         break;
   4541     case 0x2: /* EOR */
   4542         tcg_gen_xori_i64(tcg_rd, tcg_rn, wmask);
   4543         break;
   4544     default:
   4545         assert(FALSE); /* must handle all above */
   4546         break;
   4547     }
   4548 
   4549     if (!sf && !is_and) {
   4550         /* zero extend final result; we know we can skip this for AND
   4551          * since the immediate had the high 32 bits clear.
   4552          */
   4553         tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
   4554     }
   4555 
   4556     if (opc == 3) { /* ANDS */
   4557         gen_logic_CC(sf, tcg_rd);
   4558     }
   4559 }
   4560 
   4561 /*
   4562  * Move wide (immediate)
   4563  *
   4564  *  31 30 29 28         23 22 21 20             5 4    0
   4565  * +--+-----+-------------+-----+----------------+------+
   4566  * |sf| opc | 1 0 0 1 0 1 |  hw |  imm16         |  Rd  |
   4567  * +--+-----+-------------+-----+----------------+------+
   4568  *
   4569  * sf: 0 -> 32 bit, 1 -> 64 bit
   4570  * opc: 00 -> N, 10 -> Z, 11 -> K
   4571  * hw: shift/16 (0,16, and sf only 32, 48)
   4572  */
   4573 static void disas_movw_imm(DisasContext *s, uint32_t insn)
   4574 {
   4575     int rd = extract32(insn, 0, 5);
   4576     uint64_t imm = extract32(insn, 5, 16);
   4577     int sf = extract32(insn, 31, 1);
   4578     int opc = extract32(insn, 29, 2);
   4579     int pos = extract32(insn, 21, 2) << 4;
   4580     TCGv_i64 tcg_rd = cpu_reg(s, rd);
   4581 
   4582     if (!sf && (pos >= 32)) {
   4583         unallocated_encoding(s);
   4584         return;
   4585     }
   4586 
   4587     switch (opc) {
   4588     case 0: /* MOVN */
   4589     case 2: /* MOVZ */
   4590         imm <<= pos;
   4591         if (opc == 0) {
   4592             imm = ~imm;
   4593         }
   4594         if (!sf) {
   4595             imm &= 0xffffffffu;
   4596         }
   4597         tcg_gen_movi_i64(tcg_rd, imm);
   4598         break;
   4599     case 3: /* MOVK */
   4600         tcg_gen_deposit_i64(tcg_rd, tcg_rd, tcg_constant_i64(imm), pos, 16);
   4601         if (!sf) {
   4602             tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
   4603         }
   4604         break;
   4605     default:
   4606         unallocated_encoding(s);
   4607         break;
   4608     }
   4609 }
   4610 
   4611 /* Bitfield
   4612  *   31  30 29 28         23 22  21  16 15  10 9    5 4    0
   4613  * +----+-----+-------------+---+------+------+------+------+
   4614  * | sf | opc | 1 0 0 1 1 0 | N | immr | imms |  Rn  |  Rd  |
   4615  * +----+-----+-------------+---+------+------+------+------+
   4616  */
   4617 static void disas_bitfield(DisasContext *s, uint32_t insn)
   4618 {
   4619     unsigned int sf, n, opc, ri, si, rn, rd, bitsize, pos, len;
   4620     TCGv_i64 tcg_rd, tcg_tmp;
   4621 
   4622     sf = extract32(insn, 31, 1);
   4623     opc = extract32(insn, 29, 2);
   4624     n = extract32(insn, 22, 1);
   4625     ri = extract32(insn, 16, 6);
   4626     si = extract32(insn, 10, 6);
   4627     rn = extract32(insn, 5, 5);
   4628     rd = extract32(insn, 0, 5);
   4629     bitsize = sf ? 64 : 32;
   4630 
   4631     if (sf != n || ri >= bitsize || si >= bitsize || opc > 2) {
   4632         unallocated_encoding(s);
   4633         return;
   4634     }
   4635 
   4636     tcg_rd = cpu_reg(s, rd);
   4637 
   4638     /* Suppress the zero-extend for !sf.  Since RI and SI are constrained
   4639        to be smaller than bitsize, we'll never reference data outside the
   4640        low 32-bits anyway.  */
   4641     tcg_tmp = read_cpu_reg(s, rn, 1);
   4642 
   4643     /* Recognize simple(r) extractions.  */
   4644     if (si >= ri) {
   4645         /* Wd<s-r:0> = Wn<s:r> */
   4646         len = (si - ri) + 1;
   4647         if (opc == 0) { /* SBFM: ASR, SBFX, SXTB, SXTH, SXTW */
   4648             tcg_gen_sextract_i64(tcg_rd, tcg_tmp, ri, len);
   4649             goto done;
   4650         } else if (opc == 2) { /* UBFM: UBFX, LSR, UXTB, UXTH */
   4651             tcg_gen_extract_i64(tcg_rd, tcg_tmp, ri, len);
   4652             return;
   4653         }
   4654         /* opc == 1, BFXIL fall through to deposit */
   4655         tcg_gen_shri_i64(tcg_tmp, tcg_tmp, ri);
   4656         pos = 0;
   4657     } else {
   4658         /* Handle the ri > si case with a deposit
   4659          * Wd<32+s-r,32-r> = Wn<s:0>
   4660          */
   4661         len = si + 1;
   4662         pos = (bitsize - ri) & (bitsize - 1);
   4663     }
   4664 
   4665     if (opc == 0 && len < ri) {
   4666         /* SBFM: sign extend the destination field from len to fill
   4667            the balance of the word.  Let the deposit below insert all
   4668            of those sign bits.  */
   4669         tcg_gen_sextract_i64(tcg_tmp, tcg_tmp, 0, len);
   4670         len = ri;
   4671     }
   4672 
   4673     if (opc == 1) { /* BFM, BFXIL */
   4674         tcg_gen_deposit_i64(tcg_rd, tcg_rd, tcg_tmp, pos, len);
   4675     } else {
   4676         /* SBFM or UBFM: We start with zero, and we haven't modified
   4677            any bits outside bitsize, therefore the zero-extension
   4678            below is unneeded.  */
   4679         tcg_gen_deposit_z_i64(tcg_rd, tcg_tmp, pos, len);
   4680         return;
   4681     }
   4682 
   4683  done:
   4684     if (!sf) { /* zero extend final result */
   4685         tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
   4686     }
   4687 }
   4688 
   4689 /* Extract
   4690  *   31  30  29 28         23 22   21  20  16 15    10 9    5 4    0
   4691  * +----+------+-------------+---+----+------+--------+------+------+
   4692  * | sf | op21 | 1 0 0 1 1 1 | N | o0 |  Rm  |  imms  |  Rn  |  Rd  |
   4693  * +----+------+-------------+---+----+------+--------+------+------+
   4694  */
   4695 static void disas_extract(DisasContext *s, uint32_t insn)
   4696 {
   4697     unsigned int sf, n, rm, imm, rn, rd, bitsize, op21, op0;
   4698 
   4699     sf = extract32(insn, 31, 1);
   4700     n = extract32(insn, 22, 1);
   4701     rm = extract32(insn, 16, 5);
   4702     imm = extract32(insn, 10, 6);
   4703     rn = extract32(insn, 5, 5);
   4704     rd = extract32(insn, 0, 5);
   4705     op21 = extract32(insn, 29, 2);
   4706     op0 = extract32(insn, 21, 1);
   4707     bitsize = sf ? 64 : 32;
   4708 
   4709     if (sf != n || op21 || op0 || imm >= bitsize) {
   4710         unallocated_encoding(s);
   4711     } else {
   4712         TCGv_i64 tcg_rd, tcg_rm, tcg_rn;
   4713 
   4714         tcg_rd = cpu_reg(s, rd);
   4715 
   4716         if (unlikely(imm == 0)) {
   4717             /* tcg shl_i32/shl_i64 is undefined for 32/64 bit shifts,
   4718              * so an extract from bit 0 is a special case.
   4719              */
   4720             if (sf) {
   4721                 tcg_gen_mov_i64(tcg_rd, cpu_reg(s, rm));
   4722             } else {
   4723                 tcg_gen_ext32u_i64(tcg_rd, cpu_reg(s, rm));
   4724             }
   4725         } else {
   4726             tcg_rm = cpu_reg(s, rm);
   4727             tcg_rn = cpu_reg(s, rn);
   4728 
   4729             if (sf) {
   4730                 /* Specialization to ROR happens in EXTRACT2.  */
   4731                 tcg_gen_extract2_i64(tcg_rd, tcg_rm, tcg_rn, imm);
   4732             } else {
   4733                 TCGv_i32 t0 = tcg_temp_new_i32();
   4734 
   4735                 tcg_gen_extrl_i64_i32(t0, tcg_rm);
   4736                 if (rm == rn) {
   4737                     tcg_gen_rotri_i32(t0, t0, imm);
   4738                 } else {
   4739                     TCGv_i32 t1 = tcg_temp_new_i32();
   4740                     tcg_gen_extrl_i64_i32(t1, tcg_rn);
   4741                     tcg_gen_extract2_i32(t0, t0, t1, imm);
   4742                     tcg_temp_free_i32(t1);
   4743                 }
   4744                 tcg_gen_extu_i32_i64(tcg_rd, t0);
   4745                 tcg_temp_free_i32(t0);
   4746             }
   4747         }
   4748     }
   4749 }
   4750 
   4751 /* Data processing - immediate */
   4752 static void disas_data_proc_imm(DisasContext *s, uint32_t insn)
   4753 {
   4754     switch (extract32(insn, 23, 6)) {
   4755     case 0x20: case 0x21: /* PC-rel. addressing */
   4756         disas_pc_rel_adr(s, insn);
   4757         break;
   4758     case 0x22: /* Add/subtract (immediate) */
   4759         disas_add_sub_imm(s, insn);
   4760         break;
   4761     case 0x23: /* Add/subtract (immediate, with tags) */
   4762         disas_add_sub_imm_with_tags(s, insn);
   4763         break;
   4764     case 0x24: /* Logical (immediate) */
   4765         disas_logic_imm(s, insn);
   4766         break;
   4767     case 0x25: /* Move wide (immediate) */
   4768         disas_movw_imm(s, insn);
   4769         break;
   4770     case 0x26: /* Bitfield */
   4771         disas_bitfield(s, insn);
   4772         break;
   4773     case 0x27: /* Extract */
   4774         disas_extract(s, insn);
   4775         break;
   4776     default:
   4777         unallocated_encoding(s);
   4778         break;
   4779     }
   4780 }
   4781 
   4782 /* Shift a TCGv src by TCGv shift_amount, put result in dst.
   4783  * Note that it is the caller's responsibility to ensure that the
   4784  * shift amount is in range (ie 0..31 or 0..63) and provide the ARM
   4785  * mandated semantics for out of range shifts.
   4786  */
   4787 static void shift_reg(TCGv_i64 dst, TCGv_i64 src, int sf,
   4788                       enum a64_shift_type shift_type, TCGv_i64 shift_amount)
   4789 {
   4790     switch (shift_type) {
   4791     case A64_SHIFT_TYPE_LSL:
   4792         tcg_gen_shl_i64(dst, src, shift_amount);
   4793         break;
   4794     case A64_SHIFT_TYPE_LSR:
   4795         tcg_gen_shr_i64(dst, src, shift_amount);
   4796         break;
   4797     case A64_SHIFT_TYPE_ASR:
   4798         if (!sf) {
   4799             tcg_gen_ext32s_i64(dst, src);
   4800         }
   4801         tcg_gen_sar_i64(dst, sf ? src : dst, shift_amount);
   4802         break;
   4803     case A64_SHIFT_TYPE_ROR:
   4804         if (sf) {
   4805             tcg_gen_rotr_i64(dst, src, shift_amount);
   4806         } else {
   4807             TCGv_i32 t0, t1;
   4808             t0 = tcg_temp_new_i32();
   4809             t1 = tcg_temp_new_i32();
   4810             tcg_gen_extrl_i64_i32(t0, src);
   4811             tcg_gen_extrl_i64_i32(t1, shift_amount);
   4812             tcg_gen_rotr_i32(t0, t0, t1);
   4813             tcg_gen_extu_i32_i64(dst, t0);
   4814             tcg_temp_free_i32(t0);
   4815             tcg_temp_free_i32(t1);
   4816         }
   4817         break;
   4818     default:
   4819         assert(FALSE); /* all shift types should be handled */
   4820         break;
   4821     }
   4822 
   4823     if (!sf) { /* zero extend final result */
   4824         tcg_gen_ext32u_i64(dst, dst);
   4825     }
   4826 }
   4827 
   4828 /* Shift a TCGv src by immediate, put result in dst.
   4829  * The shift amount must be in range (this should always be true as the
   4830  * relevant instructions will UNDEF on bad shift immediates).
   4831  */
   4832 static void shift_reg_imm(TCGv_i64 dst, TCGv_i64 src, int sf,
   4833                           enum a64_shift_type shift_type, unsigned int shift_i)
   4834 {
   4835     assert(shift_i < (sf ? 64 : 32));
   4836 
   4837     if (shift_i == 0) {
   4838         tcg_gen_mov_i64(dst, src);
   4839     } else {
   4840         shift_reg(dst, src, sf, shift_type, tcg_constant_i64(shift_i));
   4841     }
   4842 }
   4843 
   4844 /* Logical (shifted register)
   4845  *   31  30 29 28       24 23   22 21  20  16 15    10 9    5 4    0
   4846  * +----+-----+-----------+-------+---+------+--------+------+------+
   4847  * | sf | opc | 0 1 0 1 0 | shift | N |  Rm  |  imm6  |  Rn  |  Rd  |
   4848  * +----+-----+-----------+-------+---+------+--------+------+------+
   4849  */
   4850 static void disas_logic_reg(DisasContext *s, uint32_t insn)
   4851 {
   4852     TCGv_i64 tcg_rd, tcg_rn, tcg_rm;
   4853     unsigned int sf, opc, shift_type, invert, rm, shift_amount, rn, rd;
   4854 
   4855     sf = extract32(insn, 31, 1);
   4856     opc = extract32(insn, 29, 2);
   4857     shift_type = extract32(insn, 22, 2);
   4858     invert = extract32(insn, 21, 1);
   4859     rm = extract32(insn, 16, 5);
   4860     shift_amount = extract32(insn, 10, 6);
   4861     rn = extract32(insn, 5, 5);
   4862     rd = extract32(insn, 0, 5);
   4863 
   4864     if (!sf && (shift_amount & (1 << 5))) {
   4865         unallocated_encoding(s);
   4866         return;
   4867     }
   4868 
   4869     tcg_rd = cpu_reg(s, rd);
   4870 
   4871     if (opc == 1 && shift_amount == 0 && shift_type == 0 && rn == 31) {
   4872         /* Unshifted ORR and ORN with WZR/XZR is the standard encoding for
   4873          * register-register MOV and MVN, so it is worth special casing.
   4874          */
   4875         tcg_rm = cpu_reg(s, rm);
   4876         if (invert) {
   4877             tcg_gen_not_i64(tcg_rd, tcg_rm);
   4878             if (!sf) {
   4879                 tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
   4880             }
   4881         } else {
   4882             if (sf) {
   4883                 tcg_gen_mov_i64(tcg_rd, tcg_rm);
   4884             } else {
   4885                 tcg_gen_ext32u_i64(tcg_rd, tcg_rm);
   4886             }
   4887         }
   4888         return;
   4889     }
   4890 
   4891     tcg_rm = read_cpu_reg(s, rm, sf);
   4892 
   4893     if (shift_amount) {
   4894         shift_reg_imm(tcg_rm, tcg_rm, sf, shift_type, shift_amount);
   4895     }
   4896 
   4897     tcg_rn = cpu_reg(s, rn);
   4898 
   4899     switch (opc | (invert << 2)) {
   4900     case 0: /* AND */
   4901     case 3: /* ANDS */
   4902         tcg_gen_and_i64(tcg_rd, tcg_rn, tcg_rm);
   4903         break;
   4904     case 1: /* ORR */
   4905         tcg_gen_or_i64(tcg_rd, tcg_rn, tcg_rm);
   4906         break;
   4907     case 2: /* EOR */
   4908         tcg_gen_xor_i64(tcg_rd, tcg_rn, tcg_rm);
   4909         break;
   4910     case 4: /* BIC */
   4911     case 7: /* BICS */
   4912         tcg_gen_andc_i64(tcg_rd, tcg_rn, tcg_rm);
   4913         break;
   4914     case 5: /* ORN */
   4915         tcg_gen_orc_i64(tcg_rd, tcg_rn, tcg_rm);
   4916         break;
   4917     case 6: /* EON */
   4918         tcg_gen_eqv_i64(tcg_rd, tcg_rn, tcg_rm);
   4919         break;
   4920     default:
   4921         assert(FALSE);
   4922         break;
   4923     }
   4924 
   4925     if (!sf) {
   4926         tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
   4927     }
   4928 
   4929     if (opc == 3) {
   4930         gen_logic_CC(sf, tcg_rd);
   4931     }
   4932 }
   4933 
   4934 /*
   4935  * Add/subtract (extended register)
   4936  *
   4937  *  31|30|29|28       24|23 22|21|20   16|15  13|12  10|9  5|4  0|
   4938  * +--+--+--+-----------+-----+--+-------+------+------+----+----+
   4939  * |sf|op| S| 0 1 0 1 1 | opt | 1|  Rm   |option| imm3 | Rn | Rd |
   4940  * +--+--+--+-----------+-----+--+-------+------+------+----+----+
   4941  *
   4942  *  sf: 0 -> 32bit, 1 -> 64bit
   4943  *  op: 0 -> add  , 1 -> sub
   4944  *   S: 1 -> set flags
   4945  * opt: 00
   4946  * option: extension type (see DecodeRegExtend)
   4947  * imm3: optional shift to Rm
   4948  *
   4949  * Rd = Rn + LSL(extend(Rm), amount)
   4950  */
   4951 static void disas_add_sub_ext_reg(DisasContext *s, uint32_t insn)
   4952 {
   4953     int rd = extract32(insn, 0, 5);
   4954     int rn = extract32(insn, 5, 5);
   4955     int imm3 = extract32(insn, 10, 3);
   4956     int option = extract32(insn, 13, 3);
   4957     int rm = extract32(insn, 16, 5);
   4958     int opt = extract32(insn, 22, 2);
   4959     bool setflags = extract32(insn, 29, 1);
   4960     bool sub_op = extract32(insn, 30, 1);
   4961     bool sf = extract32(insn, 31, 1);
   4962 
   4963     TCGv_i64 tcg_rm, tcg_rn; /* temps */
   4964     TCGv_i64 tcg_rd;
   4965     TCGv_i64 tcg_result;
   4966 
   4967     if (imm3 > 4 || opt != 0) {
   4968         unallocated_encoding(s);
   4969         return;
   4970     }
   4971 
   4972     /* non-flag setting ops may use SP */
   4973     if (!setflags) {
   4974         tcg_rd = cpu_reg_sp(s, rd);
   4975     } else {
   4976         tcg_rd = cpu_reg(s, rd);
   4977     }
   4978     tcg_rn = read_cpu_reg_sp(s, rn, sf);
   4979 
   4980     tcg_rm = read_cpu_reg(s, rm, sf);
   4981     ext_and_shift_reg(tcg_rm, tcg_rm, option, imm3);
   4982 
   4983     tcg_result = tcg_temp_new_i64();
   4984 
   4985     if (!setflags) {
   4986         if (sub_op) {
   4987             tcg_gen_sub_i64(tcg_result, tcg_rn, tcg_rm);
   4988         } else {
   4989             tcg_gen_add_i64(tcg_result, tcg_rn, tcg_rm);
   4990         }
   4991     } else {
   4992         if (sub_op) {
   4993             gen_sub_CC(sf, tcg_result, tcg_rn, tcg_rm);
   4994         } else {
   4995             gen_add_CC(sf, tcg_result, tcg_rn, tcg_rm);
   4996         }
   4997     }
   4998 
   4999     if (sf) {
   5000         tcg_gen_mov_i64(tcg_rd, tcg_result);
   5001     } else {
   5002         tcg_gen_ext32u_i64(tcg_rd, tcg_result);
   5003     }
   5004 
   5005     tcg_temp_free_i64(tcg_result);
   5006 }
   5007 
   5008 /*
   5009  * Add/subtract (shifted register)
   5010  *
   5011  *  31 30 29 28       24 23 22 21 20   16 15     10 9    5 4    0
   5012  * +--+--+--+-----------+-----+--+-------+---------+------+------+
   5013  * |sf|op| S| 0 1 0 1 1 |shift| 0|  Rm   |  imm6   |  Rn  |  Rd  |
   5014  * +--+--+--+-----------+-----+--+-------+---------+------+------+
   5015  *
   5016  *    sf: 0 -> 32bit, 1 -> 64bit
   5017  *    op: 0 -> add  , 1 -> sub
   5018  *     S: 1 -> set flags
   5019  * shift: 00 -> LSL, 01 -> LSR, 10 -> ASR, 11 -> RESERVED
   5020  *  imm6: Shift amount to apply to Rm before the add/sub
   5021  */
   5022 static void disas_add_sub_reg(DisasContext *s, uint32_t insn)
   5023 {
   5024     int rd = extract32(insn, 0, 5);
   5025     int rn = extract32(insn, 5, 5);
   5026     int imm6 = extract32(insn, 10, 6);
   5027     int rm = extract32(insn, 16, 5);
   5028     int shift_type = extract32(insn, 22, 2);
   5029     bool setflags = extract32(insn, 29, 1);
   5030     bool sub_op = extract32(insn, 30, 1);
   5031     bool sf = extract32(insn, 31, 1);
   5032 
   5033     TCGv_i64 tcg_rd = cpu_reg(s, rd);
   5034     TCGv_i64 tcg_rn, tcg_rm;
   5035     TCGv_i64 tcg_result;
   5036 
   5037     if ((shift_type == 3) || (!sf && (imm6 > 31))) {
   5038         unallocated_encoding(s);
   5039         return;
   5040     }
   5041 
   5042     tcg_rn = read_cpu_reg(s, rn, sf);
   5043     tcg_rm = read_cpu_reg(s, rm, sf);
   5044 
   5045     shift_reg_imm(tcg_rm, tcg_rm, sf, shift_type, imm6);
   5046 
   5047     tcg_result = tcg_temp_new_i64();
   5048 
   5049     if (!setflags) {
   5050         if (sub_op) {
   5051             tcg_gen_sub_i64(tcg_result, tcg_rn, tcg_rm);
   5052         } else {
   5053             tcg_gen_add_i64(tcg_result, tcg_rn, tcg_rm);
   5054         }
   5055     } else {
   5056         if (sub_op) {
   5057             gen_sub_CC(sf, tcg_result, tcg_rn, tcg_rm);
   5058         } else {
   5059             gen_add_CC(sf, tcg_result, tcg_rn, tcg_rm);
   5060         }
   5061     }
   5062 
   5063     if (sf) {
   5064         tcg_gen_mov_i64(tcg_rd, tcg_result);
   5065     } else {
   5066         tcg_gen_ext32u_i64(tcg_rd, tcg_result);
   5067     }
   5068 
   5069     tcg_temp_free_i64(tcg_result);
   5070 }
   5071 
   5072 /* Data-processing (3 source)
   5073  *
   5074  *    31 30  29 28       24 23 21  20  16  15  14  10 9    5 4    0
   5075  *  +--+------+-----------+------+------+----+------+------+------+
   5076  *  |sf| op54 | 1 1 0 1 1 | op31 |  Rm  | o0 |  Ra  |  Rn  |  Rd  |
   5077  *  +--+------+-----------+------+------+----+------+------+------+
   5078  */
   5079 static void disas_data_proc_3src(DisasContext *s, uint32_t insn)
   5080 {
   5081     int rd = extract32(insn, 0, 5);
   5082     int rn = extract32(insn, 5, 5);
   5083     int ra = extract32(insn, 10, 5);
   5084     int rm = extract32(insn, 16, 5);
   5085     int op_id = (extract32(insn, 29, 3) << 4) |
   5086         (extract32(insn, 21, 3) << 1) |
   5087         extract32(insn, 15, 1);
   5088     bool sf = extract32(insn, 31, 1);
   5089     bool is_sub = extract32(op_id, 0, 1);
   5090     bool is_high = extract32(op_id, 2, 1);
   5091     bool is_signed = false;
   5092     TCGv_i64 tcg_op1;
   5093     TCGv_i64 tcg_op2;
   5094     TCGv_i64 tcg_tmp;
   5095 
   5096     /* Note that op_id is sf:op54:op31:o0 so it includes the 32/64 size flag */
   5097     switch (op_id) {
   5098     case 0x42: /* SMADDL */
   5099     case 0x43: /* SMSUBL */
   5100     case 0x44: /* SMULH */
   5101         is_signed = true;
   5102         break;
   5103     case 0x0: /* MADD (32bit) */
   5104     case 0x1: /* MSUB (32bit) */
   5105     case 0x40: /* MADD (64bit) */
   5106     case 0x41: /* MSUB (64bit) */
   5107     case 0x4a: /* UMADDL */
   5108     case 0x4b: /* UMSUBL */
   5109     case 0x4c: /* UMULH */
   5110         break;
   5111     default:
   5112         unallocated_encoding(s);
   5113         return;
   5114     }
   5115 
   5116     if (is_high) {
   5117         TCGv_i64 low_bits = tcg_temp_new_i64(); /* low bits discarded */
   5118         TCGv_i64 tcg_rd = cpu_reg(s, rd);
   5119         TCGv_i64 tcg_rn = cpu_reg(s, rn);
   5120         TCGv_i64 tcg_rm = cpu_reg(s, rm);
   5121 
   5122         if (is_signed) {
   5123             tcg_gen_muls2_i64(low_bits, tcg_rd, tcg_rn, tcg_rm);
   5124         } else {
   5125             tcg_gen_mulu2_i64(low_bits, tcg_rd, tcg_rn, tcg_rm);
   5126         }
   5127 
   5128         tcg_temp_free_i64(low_bits);
   5129         return;
   5130     }
   5131 
   5132     tcg_op1 = tcg_temp_new_i64();
   5133     tcg_op2 = tcg_temp_new_i64();
   5134     tcg_tmp = tcg_temp_new_i64();
   5135 
   5136     if (op_id < 0x42) {
   5137         tcg_gen_mov_i64(tcg_op1, cpu_reg(s, rn));
   5138         tcg_gen_mov_i64(tcg_op2, cpu_reg(s, rm));
   5139     } else {
   5140         if (is_signed) {
   5141             tcg_gen_ext32s_i64(tcg_op1, cpu_reg(s, rn));
   5142             tcg_gen_ext32s_i64(tcg_op2, cpu_reg(s, rm));
   5143         } else {
   5144             tcg_gen_ext32u_i64(tcg_op1, cpu_reg(s, rn));
   5145             tcg_gen_ext32u_i64(tcg_op2, cpu_reg(s, rm));
   5146         }
   5147     }
   5148 
   5149     if (ra == 31 && !is_sub) {
   5150         /* Special-case MADD with rA == XZR; it is the standard MUL alias */
   5151         tcg_gen_mul_i64(cpu_reg(s, rd), tcg_op1, tcg_op2);
   5152     } else {
   5153         tcg_gen_mul_i64(tcg_tmp, tcg_op1, tcg_op2);
   5154         if (is_sub) {
   5155             tcg_gen_sub_i64(cpu_reg(s, rd), cpu_reg(s, ra), tcg_tmp);
   5156         } else {
   5157             tcg_gen_add_i64(cpu_reg(s, rd), cpu_reg(s, ra), tcg_tmp);
   5158         }
   5159     }
   5160 
   5161     if (!sf) {
   5162         tcg_gen_ext32u_i64(cpu_reg(s, rd), cpu_reg(s, rd));
   5163     }
   5164 
   5165     tcg_temp_free_i64(tcg_op1);
   5166     tcg_temp_free_i64(tcg_op2);
   5167     tcg_temp_free_i64(tcg_tmp);
   5168 }
   5169 
   5170 /* Add/subtract (with carry)
   5171  *  31 30 29 28 27 26 25 24 23 22 21  20  16  15       10  9    5 4   0
   5172  * +--+--+--+------------------------+------+-------------+------+-----+
   5173  * |sf|op| S| 1  1  0  1  0  0  0  0 |  rm  | 0 0 0 0 0 0 |  Rn  |  Rd |
   5174  * +--+--+--+------------------------+------+-------------+------+-----+
   5175  */
   5176 
   5177 static void disas_adc_sbc(DisasContext *s, uint32_t insn)
   5178 {
   5179     unsigned int sf, op, setflags, rm, rn, rd;
   5180     TCGv_i64 tcg_y, tcg_rn, tcg_rd;
   5181 
   5182     sf = extract32(insn, 31, 1);
   5183     op = extract32(insn, 30, 1);
   5184     setflags = extract32(insn, 29, 1);
   5185     rm = extract32(insn, 16, 5);
   5186     rn = extract32(insn, 5, 5);
   5187     rd = extract32(insn, 0, 5);
   5188 
   5189     tcg_rd = cpu_reg(s, rd);
   5190     tcg_rn = cpu_reg(s, rn);
   5191 
   5192     if (op) {
   5193         tcg_y = new_tmp_a64(s);
   5194         tcg_gen_not_i64(tcg_y, cpu_reg(s, rm));
   5195     } else {
   5196         tcg_y = cpu_reg(s, rm);
   5197     }
   5198 
   5199     if (setflags) {
   5200         gen_adc_CC(sf, tcg_rd, tcg_rn, tcg_y);
   5201     } else {
   5202         gen_adc(sf, tcg_rd, tcg_rn, tcg_y);
   5203     }
   5204 }
   5205 
   5206 /*
   5207  * Rotate right into flags
   5208  *  31 30 29                21       15          10      5  4      0
   5209  * +--+--+--+-----------------+--------+-----------+------+--+------+
   5210  * |sf|op| S| 1 1 0 1 0 0 0 0 |  imm6  | 0 0 0 0 1 |  Rn  |o2| mask |
   5211  * +--+--+--+-----------------+--------+-----------+------+--+------+
   5212  */
   5213 static void disas_rotate_right_into_flags(DisasContext *s, uint32_t insn)
   5214 {
   5215     int mask = extract32(insn, 0, 4);
   5216     int o2 = extract32(insn, 4, 1);
   5217     int rn = extract32(insn, 5, 5);
   5218     int imm6 = extract32(insn, 15, 6);
   5219     int sf_op_s = extract32(insn, 29, 3);
   5220     TCGv_i64 tcg_rn;
   5221     TCGv_i32 nzcv;
   5222 
   5223     if (sf_op_s != 5 || o2 != 0 || !dc_isar_feature(aa64_condm_4, s)) {
   5224         unallocated_encoding(s);
   5225         return;
   5226     }
   5227 
   5228     tcg_rn = read_cpu_reg(s, rn, 1);
   5229     tcg_gen_rotri_i64(tcg_rn, tcg_rn, imm6);
   5230 
   5231     nzcv = tcg_temp_new_i32();
   5232     tcg_gen_extrl_i64_i32(nzcv, tcg_rn);
   5233 
   5234     if (mask & 8) { /* N */
   5235         tcg_gen_shli_i32(cpu_NF, nzcv, 31 - 3);
   5236     }
   5237     if (mask & 4) { /* Z */
   5238         tcg_gen_not_i32(cpu_ZF, nzcv);
   5239         tcg_gen_andi_i32(cpu_ZF, cpu_ZF, 4);
   5240     }
   5241     if (mask & 2) { /* C */
   5242         tcg_gen_extract_i32(cpu_CF, nzcv, 1, 1);
   5243     }
   5244     if (mask & 1) { /* V */
   5245         tcg_gen_shli_i32(cpu_VF, nzcv, 31 - 0);
   5246     }
   5247 
   5248     tcg_temp_free_i32(nzcv);
   5249 }
   5250 
   5251 /*
   5252  * Evaluate into flags
   5253  *  31 30 29                21        15   14        10      5  4      0
   5254  * +--+--+--+-----------------+---------+----+---------+------+--+------+
   5255  * |sf|op| S| 1 1 0 1 0 0 0 0 | opcode2 | sz | 0 0 1 0 |  Rn  |o3| mask |
   5256  * +--+--+--+-----------------+---------+----+---------+------+--+------+
   5257  */
   5258 static void disas_evaluate_into_flags(DisasContext *s, uint32_t insn)
   5259 {
   5260     int o3_mask = extract32(insn, 0, 5);
   5261     int rn = extract32(insn, 5, 5);
   5262     int o2 = extract32(insn, 15, 6);
   5263     int sz = extract32(insn, 14, 1);
   5264     int sf_op_s = extract32(insn, 29, 3);
   5265     TCGv_i32 tmp;
   5266     int shift;
   5267 
   5268     if (sf_op_s != 1 || o2 != 0 || o3_mask != 0xd ||
   5269         !dc_isar_feature(aa64_condm_4, s)) {
   5270         unallocated_encoding(s);
   5271         return;
   5272     }
   5273     shift = sz ? 16 : 24;  /* SETF16 or SETF8 */
   5274 
   5275     tmp = tcg_temp_new_i32();
   5276     tcg_gen_extrl_i64_i32(tmp, cpu_reg(s, rn));
   5277     tcg_gen_shli_i32(cpu_NF, tmp, shift);
   5278     tcg_gen_shli_i32(cpu_VF, tmp, shift - 1);
   5279     tcg_gen_mov_i32(cpu_ZF, cpu_NF);
   5280     tcg_gen_xor_i32(cpu_VF, cpu_VF, cpu_NF);
   5281     tcg_temp_free_i32(tmp);
   5282 }
   5283 
   5284 /* Conditional compare (immediate / register)
   5285  *  31 30 29 28 27 26 25 24 23 22 21  20    16 15  12  11  10  9   5  4 3   0
   5286  * +--+--+--+------------------------+--------+------+----+--+------+--+-----+
   5287  * |sf|op| S| 1  1  0  1  0  0  1  0 |imm5/rm | cond |i/r |o2|  Rn  |o3|nzcv |
   5288  * +--+--+--+------------------------+--------+------+----+--+------+--+-----+
   5289  *        [1]                             y                [0]       [0]
   5290  */
   5291 static void disas_cc(DisasContext *s, uint32_t insn)
   5292 {
   5293     unsigned int sf, op, y, cond, rn, nzcv, is_imm;
   5294     TCGv_i32 tcg_t0, tcg_t1, tcg_t2;
   5295     TCGv_i64 tcg_tmp, tcg_y, tcg_rn;
   5296     DisasCompare c;
   5297 
   5298     if (!extract32(insn, 29, 1)) {
   5299         unallocated_encoding(s);
   5300         return;
   5301     }
   5302     if (insn & (1 << 10 | 1 << 4)) {
   5303         unallocated_encoding(s);
   5304         return;
   5305     }
   5306     sf = extract32(insn, 31, 1);
   5307     op = extract32(insn, 30, 1);
   5308     is_imm = extract32(insn, 11, 1);
   5309     y = extract32(insn, 16, 5); /* y = rm (reg) or imm5 (imm) */
   5310     cond = extract32(insn, 12, 4);
   5311     rn = extract32(insn, 5, 5);
   5312     nzcv = extract32(insn, 0, 4);
   5313 
   5314     /* Set T0 = !COND.  */
   5315     tcg_t0 = tcg_temp_new_i32();
   5316     arm_test_cc(&c, cond);
   5317     tcg_gen_setcondi_i32(tcg_invert_cond(c.cond), tcg_t0, c.value, 0);
   5318     arm_free_cc(&c);
   5319 
   5320     /* Load the arguments for the new comparison.  */
   5321     if (is_imm) {
   5322         tcg_y = new_tmp_a64(s);
   5323         tcg_gen_movi_i64(tcg_y, y);
   5324     } else {
   5325         tcg_y = cpu_reg(s, y);
   5326     }
   5327     tcg_rn = cpu_reg(s, rn);
   5328 
   5329     /* Set the flags for the new comparison.  */
   5330     tcg_tmp = tcg_temp_new_i64();
   5331     if (op) {
   5332         gen_sub_CC(sf, tcg_tmp, tcg_rn, tcg_y);
   5333     } else {
   5334         gen_add_CC(sf, tcg_tmp, tcg_rn, tcg_y);
   5335     }
   5336     tcg_temp_free_i64(tcg_tmp);
   5337 
   5338     /* If COND was false, force the flags to #nzcv.  Compute two masks
   5339      * to help with this: T1 = (COND ? 0 : -1), T2 = (COND ? -1 : 0).
   5340      * For tcg hosts that support ANDC, we can make do with just T1.
   5341      * In either case, allow the tcg optimizer to delete any unused mask.
   5342      */
   5343     tcg_t1 = tcg_temp_new_i32();
   5344     tcg_t2 = tcg_temp_new_i32();
   5345     tcg_gen_neg_i32(tcg_t1, tcg_t0);
   5346     tcg_gen_subi_i32(tcg_t2, tcg_t0, 1);
   5347 
   5348     if (nzcv & 8) { /* N */
   5349         tcg_gen_or_i32(cpu_NF, cpu_NF, tcg_t1);
   5350     } else {
   5351         if (TCG_TARGET_HAS_andc_i32) {
   5352             tcg_gen_andc_i32(cpu_NF, cpu_NF, tcg_t1);
   5353         } else {
   5354             tcg_gen_and_i32(cpu_NF, cpu_NF, tcg_t2);
   5355         }
   5356     }
   5357     if (nzcv & 4) { /* Z */
   5358         if (TCG_TARGET_HAS_andc_i32) {
   5359             tcg_gen_andc_i32(cpu_ZF, cpu_ZF, tcg_t1);
   5360         } else {
   5361             tcg_gen_and_i32(cpu_ZF, cpu_ZF, tcg_t2);
   5362         }
   5363     } else {
   5364         tcg_gen_or_i32(cpu_ZF, cpu_ZF, tcg_t0);
   5365     }
   5366     if (nzcv & 2) { /* C */
   5367         tcg_gen_or_i32(cpu_CF, cpu_CF, tcg_t0);
   5368     } else {
   5369         if (TCG_TARGET_HAS_andc_i32) {
   5370             tcg_gen_andc_i32(cpu_CF, cpu_CF, tcg_t1);
   5371         } else {
   5372             tcg_gen_and_i32(cpu_CF, cpu_CF, tcg_t2);
   5373         }
   5374     }
   5375     if (nzcv & 1) { /* V */
   5376         tcg_gen_or_i32(cpu_VF, cpu_VF, tcg_t1);
   5377     } else {
   5378         if (TCG_TARGET_HAS_andc_i32) {
   5379             tcg_gen_andc_i32(cpu_VF, cpu_VF, tcg_t1);
   5380         } else {
   5381             tcg_gen_and_i32(cpu_VF, cpu_VF, tcg_t2);
   5382         }
   5383     }
   5384     tcg_temp_free_i32(tcg_t0);
   5385     tcg_temp_free_i32(tcg_t1);
   5386     tcg_temp_free_i32(tcg_t2);
   5387 }
   5388 
   5389 /* Conditional select
   5390  *   31   30  29  28             21 20  16 15  12 11 10 9    5 4    0
   5391  * +----+----+---+-----------------+------+------+-----+------+------+
   5392  * | sf | op | S | 1 1 0 1 0 1 0 0 |  Rm  | cond | op2 |  Rn  |  Rd  |
   5393  * +----+----+---+-----------------+------+------+-----+------+------+
   5394  */
   5395 static void disas_cond_select(DisasContext *s, uint32_t insn)
   5396 {
   5397     unsigned int sf, else_inv, rm, cond, else_inc, rn, rd;
   5398     TCGv_i64 tcg_rd, zero;
   5399     DisasCompare64 c;
   5400 
   5401     if (extract32(insn, 29, 1) || extract32(insn, 11, 1)) {
   5402         /* S == 1 or op2<1> == 1 */
   5403         unallocated_encoding(s);
   5404         return;
   5405     }
   5406     sf = extract32(insn, 31, 1);
   5407     else_inv = extract32(insn, 30, 1);
   5408     rm = extract32(insn, 16, 5);
   5409     cond = extract32(insn, 12, 4);
   5410     else_inc = extract32(insn, 10, 1);
   5411     rn = extract32(insn, 5, 5);
   5412     rd = extract32(insn, 0, 5);
   5413 
   5414     tcg_rd = cpu_reg(s, rd);
   5415 
   5416     a64_test_cc(&c, cond);
   5417     zero = tcg_constant_i64(0);
   5418 
   5419     if (rn == 31 && rm == 31 && (else_inc ^ else_inv)) {
   5420         /* CSET & CSETM.  */
   5421         tcg_gen_setcond_i64(tcg_invert_cond(c.cond), tcg_rd, c.value, zero);
   5422         if (else_inv) {
   5423             tcg_gen_neg_i64(tcg_rd, tcg_rd);
   5424         }
   5425     } else {
   5426         TCGv_i64 t_true = cpu_reg(s, rn);
   5427         TCGv_i64 t_false = read_cpu_reg(s, rm, 1);
   5428         if (else_inv && else_inc) {
   5429             tcg_gen_neg_i64(t_false, t_false);
   5430         } else if (else_inv) {
   5431             tcg_gen_not_i64(t_false, t_false);
   5432         } else if (else_inc) {
   5433             tcg_gen_addi_i64(t_false, t_false, 1);
   5434         }
   5435         tcg_gen_movcond_i64(c.cond, tcg_rd, c.value, zero, t_true, t_false);
   5436     }
   5437 
   5438     a64_free_cc(&c);
   5439 
   5440     if (!sf) {
   5441         tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
   5442     }
   5443 }
   5444 
   5445 static void handle_clz(DisasContext *s, unsigned int sf,
   5446                        unsigned int rn, unsigned int rd)
   5447 {
   5448     TCGv_i64 tcg_rd, tcg_rn;
   5449     tcg_rd = cpu_reg(s, rd);
   5450     tcg_rn = cpu_reg(s, rn);
   5451 
   5452     if (sf) {
   5453         tcg_gen_clzi_i64(tcg_rd, tcg_rn, 64);
   5454     } else {
   5455         TCGv_i32 tcg_tmp32 = tcg_temp_new_i32();
   5456         tcg_gen_extrl_i64_i32(tcg_tmp32, tcg_rn);
   5457         tcg_gen_clzi_i32(tcg_tmp32, tcg_tmp32, 32);
   5458         tcg_gen_extu_i32_i64(tcg_rd, tcg_tmp32);
   5459         tcg_temp_free_i32(tcg_tmp32);
   5460     }
   5461 }
   5462 
   5463 static void handle_cls(DisasContext *s, unsigned int sf,
   5464                        unsigned int rn, unsigned int rd)
   5465 {
   5466     TCGv_i64 tcg_rd, tcg_rn;
   5467     tcg_rd = cpu_reg(s, rd);
   5468     tcg_rn = cpu_reg(s, rn);
   5469 
   5470     if (sf) {
   5471         tcg_gen_clrsb_i64(tcg_rd, tcg_rn);
   5472     } else {
   5473         TCGv_i32 tcg_tmp32 = tcg_temp_new_i32();
   5474         tcg_gen_extrl_i64_i32(tcg_tmp32, tcg_rn);
   5475         tcg_gen_clrsb_i32(tcg_tmp32, tcg_tmp32);
   5476         tcg_gen_extu_i32_i64(tcg_rd, tcg_tmp32);
   5477         tcg_temp_free_i32(tcg_tmp32);
   5478     }
   5479 }
   5480 
   5481 static void handle_rbit(DisasContext *s, unsigned int sf,
   5482                         unsigned int rn, unsigned int rd)
   5483 {
   5484     TCGv_i64 tcg_rd, tcg_rn;
   5485     tcg_rd = cpu_reg(s, rd);
   5486     tcg_rn = cpu_reg(s, rn);
   5487 
   5488     if (sf) {
   5489         gen_helper_rbit64(tcg_rd, tcg_rn);
   5490     } else {
   5491         TCGv_i32 tcg_tmp32 = tcg_temp_new_i32();
   5492         tcg_gen_extrl_i64_i32(tcg_tmp32, tcg_rn);
   5493         gen_helper_rbit(tcg_tmp32, tcg_tmp32);
   5494         tcg_gen_extu_i32_i64(tcg_rd, tcg_tmp32);
   5495         tcg_temp_free_i32(tcg_tmp32);
   5496     }
   5497 }
   5498 
   5499 /* REV with sf==1, opcode==3 ("REV64") */
   5500 static void handle_rev64(DisasContext *s, unsigned int sf,
   5501                          unsigned int rn, unsigned int rd)
   5502 {
   5503     if (!sf) {
   5504         unallocated_encoding(s);
   5505         return;
   5506     }
   5507     tcg_gen_bswap64_i64(cpu_reg(s, rd), cpu_reg(s, rn));
   5508 }
   5509 
   5510 /* REV with sf==0, opcode==2
   5511  * REV32 (sf==1, opcode==2)
   5512  */
   5513 static void handle_rev32(DisasContext *s, unsigned int sf,
   5514                          unsigned int rn, unsigned int rd)
   5515 {
   5516     TCGv_i64 tcg_rd = cpu_reg(s, rd);
   5517     TCGv_i64 tcg_rn = cpu_reg(s, rn);
   5518 
   5519     if (sf) {
   5520         tcg_gen_bswap64_i64(tcg_rd, tcg_rn);
   5521         tcg_gen_rotri_i64(tcg_rd, tcg_rd, 32);
   5522     } else {
   5523         tcg_gen_bswap32_i64(tcg_rd, tcg_rn, TCG_BSWAP_OZ);
   5524     }
   5525 }
   5526 
   5527 /* REV16 (opcode==1) */
   5528 static void handle_rev16(DisasContext *s, unsigned int sf,
   5529                          unsigned int rn, unsigned int rd)
   5530 {
   5531     TCGv_i64 tcg_rd = cpu_reg(s, rd);
   5532     TCGv_i64 tcg_tmp = tcg_temp_new_i64();
   5533     TCGv_i64 tcg_rn = read_cpu_reg(s, rn, sf);
   5534     TCGv_i64 mask = tcg_constant_i64(sf ? 0x00ff00ff00ff00ffull : 0x00ff00ff);
   5535 
   5536     tcg_gen_shri_i64(tcg_tmp, tcg_rn, 8);
   5537     tcg_gen_and_i64(tcg_rd, tcg_rn, mask);
   5538     tcg_gen_and_i64(tcg_tmp, tcg_tmp, mask);
   5539     tcg_gen_shli_i64(tcg_rd, tcg_rd, 8);
   5540     tcg_gen_or_i64(tcg_rd, tcg_rd, tcg_tmp);
   5541 
   5542     tcg_temp_free_i64(tcg_tmp);
   5543 }
   5544 
   5545 /* Data-processing (1 source)
   5546  *   31  30  29  28             21 20     16 15    10 9    5 4    0
   5547  * +----+---+---+-----------------+---------+--------+------+------+
   5548  * | sf | 1 | S | 1 1 0 1 0 1 1 0 | opcode2 | opcode |  Rn  |  Rd  |
   5549  * +----+---+---+-----------------+---------+--------+------+------+
   5550  */
   5551 static void disas_data_proc_1src(DisasContext *s, uint32_t insn)
   5552 {
   5553     unsigned int sf, opcode, opcode2, rn, rd;
   5554     TCGv_i64 tcg_rd;
   5555 
   5556     if (extract32(insn, 29, 1)) {
   5557         unallocated_encoding(s);
   5558         return;
   5559     }
   5560 
   5561     sf = extract32(insn, 31, 1);
   5562     opcode = extract32(insn, 10, 6);
   5563     opcode2 = extract32(insn, 16, 5);
   5564     rn = extract32(insn, 5, 5);
   5565     rd = extract32(insn, 0, 5);
   5566 
   5567 #define MAP(SF, O2, O1) ((SF) | (O1 << 1) | (O2 << 7))
   5568 
   5569     switch (MAP(sf, opcode2, opcode)) {
   5570     case MAP(0, 0x00, 0x00): /* RBIT */
   5571     case MAP(1, 0x00, 0x00):
   5572         handle_rbit(s, sf, rn, rd);
   5573         break;
   5574     case MAP(0, 0x00, 0x01): /* REV16 */
   5575     case MAP(1, 0x00, 0x01):
   5576         handle_rev16(s, sf, rn, rd);
   5577         break;
   5578     case MAP(0, 0x00, 0x02): /* REV/REV32 */
   5579     case MAP(1, 0x00, 0x02):
   5580         handle_rev32(s, sf, rn, rd);
   5581         break;
   5582     case MAP(1, 0x00, 0x03): /* REV64 */
   5583         handle_rev64(s, sf, rn, rd);
   5584         break;
   5585     case MAP(0, 0x00, 0x04): /* CLZ */
   5586     case MAP(1, 0x00, 0x04):
   5587         handle_clz(s, sf, rn, rd);
   5588         break;
   5589     case MAP(0, 0x00, 0x05): /* CLS */
   5590     case MAP(1, 0x00, 0x05):
   5591         handle_cls(s, sf, rn, rd);
   5592         break;
   5593     case MAP(1, 0x01, 0x00): /* PACIA */
   5594         if (s->pauth_active) {
   5595             tcg_rd = cpu_reg(s, rd);
   5596             gen_helper_pacia(tcg_rd, cpu_env, tcg_rd, cpu_reg_sp(s, rn));
   5597         } else if (!dc_isar_feature(aa64_pauth, s)) {
   5598             goto do_unallocated;
   5599         }
   5600         break;
   5601     case MAP(1, 0x01, 0x01): /* PACIB */
   5602         if (s->pauth_active) {
   5603             tcg_rd = cpu_reg(s, rd);
   5604             gen_helper_pacib(tcg_rd, cpu_env, tcg_rd, cpu_reg_sp(s, rn));
   5605         } else if (!dc_isar_feature(aa64_pauth, s)) {
   5606             goto do_unallocated;
   5607         }
   5608         break;
   5609     case MAP(1, 0x01, 0x02): /* PACDA */
   5610         if (s->pauth_active) {
   5611             tcg_rd = cpu_reg(s, rd);
   5612             gen_helper_pacda(tcg_rd, cpu_env, tcg_rd, cpu_reg_sp(s, rn));
   5613         } else if (!dc_isar_feature(aa64_pauth, s)) {
   5614             goto do_unallocated;
   5615         }
   5616         break;
   5617     case MAP(1, 0x01, 0x03): /* PACDB */
   5618         if (s->pauth_active) {
   5619             tcg_rd = cpu_reg(s, rd);
   5620             gen_helper_pacdb(tcg_rd, cpu_env, tcg_rd, cpu_reg_sp(s, rn));
   5621         } else if (!dc_isar_feature(aa64_pauth, s)) {
   5622             goto do_unallocated;
   5623         }
   5624         break;
   5625     case MAP(1, 0x01, 0x04): /* AUTIA */
   5626         if (s->pauth_active) {
   5627             tcg_rd = cpu_reg(s, rd);
   5628             gen_helper_autia(tcg_rd, cpu_env, tcg_rd, cpu_reg_sp(s, rn));
   5629         } else if (!dc_isar_feature(aa64_pauth, s)) {
   5630             goto do_unallocated;
   5631         }
   5632         break;
   5633     case MAP(1, 0x01, 0x05): /* AUTIB */
   5634         if (s->pauth_active) {
   5635             tcg_rd = cpu_reg(s, rd);
   5636             gen_helper_autib(tcg_rd, cpu_env, tcg_rd, cpu_reg_sp(s, rn));
   5637         } else if (!dc_isar_feature(aa64_pauth, s)) {
   5638             goto do_unallocated;
   5639         }
   5640         break;
   5641     case MAP(1, 0x01, 0x06): /* AUTDA */
   5642         if (s->pauth_active) {
   5643             tcg_rd = cpu_reg(s, rd);
   5644             gen_helper_autda(tcg_rd, cpu_env, tcg_rd, cpu_reg_sp(s, rn));
   5645         } else if (!dc_isar_feature(aa64_pauth, s)) {
   5646             goto do_unallocated;
   5647         }
   5648         break;
   5649     case MAP(1, 0x01, 0x07): /* AUTDB */
   5650         if (s->pauth_active) {
   5651             tcg_rd = cpu_reg(s, rd);
   5652             gen_helper_autdb(tcg_rd, cpu_env, tcg_rd, cpu_reg_sp(s, rn));
   5653         } else if (!dc_isar_feature(aa64_pauth, s)) {
   5654             goto do_unallocated;
   5655         }
   5656         break;
   5657     case MAP(1, 0x01, 0x08): /* PACIZA */
   5658         if (!dc_isar_feature(aa64_pauth, s) || rn != 31) {
   5659             goto do_unallocated;
   5660         } else if (s->pauth_active) {
   5661             tcg_rd = cpu_reg(s, rd);
   5662             gen_helper_pacia(tcg_rd, cpu_env, tcg_rd, new_tmp_a64_zero(s));
   5663         }
   5664         break;
   5665     case MAP(1, 0x01, 0x09): /* PACIZB */
   5666         if (!dc_isar_feature(aa64_pauth, s) || rn != 31) {
   5667             goto do_unallocated;
   5668         } else if (s->pauth_active) {
   5669             tcg_rd = cpu_reg(s, rd);
   5670             gen_helper_pacib(tcg_rd, cpu_env, tcg_rd, new_tmp_a64_zero(s));
   5671         }
   5672         break;
   5673     case MAP(1, 0x01, 0x0a): /* PACDZA */
   5674         if (!dc_isar_feature(aa64_pauth, s) || rn != 31) {
   5675             goto do_unallocated;
   5676         } else if (s->pauth_active) {
   5677             tcg_rd = cpu_reg(s, rd);
   5678             gen_helper_pacda(tcg_rd, cpu_env, tcg_rd, new_tmp_a64_zero(s));
   5679         }
   5680         break;
   5681     case MAP(1, 0x01, 0x0b): /* PACDZB */
   5682         if (!dc_isar_feature(aa64_pauth, s) || rn != 31) {
   5683             goto do_unallocated;
   5684         } else if (s->pauth_active) {
   5685             tcg_rd = cpu_reg(s, rd);
   5686             gen_helper_pacdb(tcg_rd, cpu_env, tcg_rd, new_tmp_a64_zero(s));
   5687         }
   5688         break;
   5689     case MAP(1, 0x01, 0x0c): /* AUTIZA */
   5690         if (!dc_isar_feature(aa64_pauth, s) || rn != 31) {
   5691             goto do_unallocated;
   5692         } else if (s->pauth_active) {
   5693             tcg_rd = cpu_reg(s, rd);
   5694             gen_helper_autia(tcg_rd, cpu_env, tcg_rd, new_tmp_a64_zero(s));
   5695         }
   5696         break;
   5697     case MAP(1, 0x01, 0x0d): /* AUTIZB */
   5698         if (!dc_isar_feature(aa64_pauth, s) || rn != 31) {
   5699             goto do_unallocated;
   5700         } else if (s->pauth_active) {
   5701             tcg_rd = cpu_reg(s, rd);
   5702             gen_helper_autib(tcg_rd, cpu_env, tcg_rd, new_tmp_a64_zero(s));
   5703         }
   5704         break;
   5705     case MAP(1, 0x01, 0x0e): /* AUTDZA */
   5706         if (!dc_isar_feature(aa64_pauth, s) || rn != 31) {
   5707             goto do_unallocated;
   5708         } else if (s->pauth_active) {
   5709             tcg_rd = cpu_reg(s, rd);
   5710             gen_helper_autda(tcg_rd, cpu_env, tcg_rd, new_tmp_a64_zero(s));
   5711         }
   5712         break;
   5713     case MAP(1, 0x01, 0x0f): /* AUTDZB */
   5714         if (!dc_isar_feature(aa64_pauth, s) || rn != 31) {
   5715             goto do_unallocated;
   5716         } else if (s->pauth_active) {
   5717             tcg_rd = cpu_reg(s, rd);
   5718             gen_helper_autdb(tcg_rd, cpu_env, tcg_rd, new_tmp_a64_zero(s));
   5719         }
   5720         break;
   5721     case MAP(1, 0x01, 0x10): /* XPACI */
   5722         if (!dc_isar_feature(aa64_pauth, s) || rn != 31) {
   5723             goto do_unallocated;
   5724         } else if (s->pauth_active) {
   5725             tcg_rd = cpu_reg(s, rd);
   5726             gen_helper_xpaci(tcg_rd, cpu_env, tcg_rd);
   5727         }
   5728         break;
   5729     case MAP(1, 0x01, 0x11): /* XPACD */
   5730         if (!dc_isar_feature(aa64_pauth, s) || rn != 31) {
   5731             goto do_unallocated;
   5732         } else if (s->pauth_active) {
   5733             tcg_rd = cpu_reg(s, rd);
   5734             gen_helper_xpacd(tcg_rd, cpu_env, tcg_rd);
   5735         }
   5736         break;
   5737     default:
   5738     do_unallocated:
   5739         unallocated_encoding(s);
   5740         break;
   5741     }
   5742 
   5743 #undef MAP
   5744 }
   5745 
   5746 static void handle_div(DisasContext *s, bool is_signed, unsigned int sf,
   5747                        unsigned int rm, unsigned int rn, unsigned int rd)
   5748 {
   5749     TCGv_i64 tcg_n, tcg_m, tcg_rd;
   5750     tcg_rd = cpu_reg(s, rd);
   5751 
   5752     if (!sf && is_signed) {
   5753         tcg_n = new_tmp_a64(s);
   5754         tcg_m = new_tmp_a64(s);
   5755         tcg_gen_ext32s_i64(tcg_n, cpu_reg(s, rn));
   5756         tcg_gen_ext32s_i64(tcg_m, cpu_reg(s, rm));
   5757     } else {
   5758         tcg_n = read_cpu_reg(s, rn, sf);
   5759         tcg_m = read_cpu_reg(s, rm, sf);
   5760     }
   5761 
   5762     if (is_signed) {
   5763         gen_helper_sdiv64(tcg_rd, tcg_n, tcg_m);
   5764     } else {
   5765         gen_helper_udiv64(tcg_rd, tcg_n, tcg_m);
   5766     }
   5767 
   5768     if (!sf) { /* zero extend final result */
   5769         tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
   5770     }
   5771 }
   5772 
   5773 /* LSLV, LSRV, ASRV, RORV */
   5774 static void handle_shift_reg(DisasContext *s,
   5775                              enum a64_shift_type shift_type, unsigned int sf,
   5776                              unsigned int rm, unsigned int rn, unsigned int rd)
   5777 {
   5778     TCGv_i64 tcg_shift = tcg_temp_new_i64();
   5779     TCGv_i64 tcg_rd = cpu_reg(s, rd);
   5780     TCGv_i64 tcg_rn = read_cpu_reg(s, rn, sf);
   5781 
   5782     tcg_gen_andi_i64(tcg_shift, cpu_reg(s, rm), sf ? 63 : 31);
   5783     shift_reg(tcg_rd, tcg_rn, sf, shift_type, tcg_shift);
   5784     tcg_temp_free_i64(tcg_shift);
   5785 }
   5786 
   5787 /* CRC32[BHWX], CRC32C[BHWX] */
   5788 static void handle_crc32(DisasContext *s,
   5789                          unsigned int sf, unsigned int sz, bool crc32c,
   5790                          unsigned int rm, unsigned int rn, unsigned int rd)
   5791 {
   5792     TCGv_i64 tcg_acc, tcg_val;
   5793     TCGv_i32 tcg_bytes;
   5794 
   5795     if (!dc_isar_feature(aa64_crc32, s)
   5796         || (sf == 1 && sz != 3)
   5797         || (sf == 0 && sz == 3)) {
   5798         unallocated_encoding(s);
   5799         return;
   5800     }
   5801 
   5802     if (sz == 3) {
   5803         tcg_val = cpu_reg(s, rm);
   5804     } else {
   5805         uint64_t mask;
   5806         switch (sz) {
   5807         case 0:
   5808             mask = 0xFF;
   5809             break;
   5810         case 1:
   5811             mask = 0xFFFF;
   5812             break;
   5813         case 2:
   5814             mask = 0xFFFFFFFF;
   5815             break;
   5816         default:
   5817             g_assert_not_reached();
   5818         }
   5819         tcg_val = new_tmp_a64(s);
   5820         tcg_gen_andi_i64(tcg_val, cpu_reg(s, rm), mask);
   5821     }
   5822 
   5823     tcg_acc = cpu_reg(s, rn);
   5824     tcg_bytes = tcg_constant_i32(1 << sz);
   5825 
   5826     if (crc32c) {
   5827         gen_helper_crc32c_64(cpu_reg(s, rd), tcg_acc, tcg_val, tcg_bytes);
   5828     } else {
   5829         gen_helper_crc32_64(cpu_reg(s, rd), tcg_acc, tcg_val, tcg_bytes);
   5830     }
   5831 }
   5832 
   5833 /* Data-processing (2 source)
   5834  *   31   30  29 28             21 20  16 15    10 9    5 4    0
   5835  * +----+---+---+-----------------+------+--------+------+------+
   5836  * | sf | 0 | S | 1 1 0 1 0 1 1 0 |  Rm  | opcode |  Rn  |  Rd  |
   5837  * +----+---+---+-----------------+------+--------+------+------+
   5838  */
   5839 static void disas_data_proc_2src(DisasContext *s, uint32_t insn)
   5840 {
   5841     unsigned int sf, rm, opcode, rn, rd, setflag;
   5842     sf = extract32(insn, 31, 1);
   5843     setflag = extract32(insn, 29, 1);
   5844     rm = extract32(insn, 16, 5);
   5845     opcode = extract32(insn, 10, 6);
   5846     rn = extract32(insn, 5, 5);
   5847     rd = extract32(insn, 0, 5);
   5848 
   5849     if (setflag && opcode != 0) {
   5850         unallocated_encoding(s);
   5851         return;
   5852     }
   5853 
   5854     switch (opcode) {
   5855     case 0: /* SUBP(S) */
   5856         if (sf == 0 || !dc_isar_feature(aa64_mte_insn_reg, s)) {
   5857             goto do_unallocated;
   5858         } else {
   5859             TCGv_i64 tcg_n, tcg_m, tcg_d;
   5860 
   5861             tcg_n = read_cpu_reg_sp(s, rn, true);
   5862             tcg_m = read_cpu_reg_sp(s, rm, true);
   5863             tcg_gen_sextract_i64(tcg_n, tcg_n, 0, 56);
   5864             tcg_gen_sextract_i64(tcg_m, tcg_m, 0, 56);
   5865             tcg_d = cpu_reg(s, rd);
   5866 
   5867             if (setflag) {
   5868                 gen_sub_CC(true, tcg_d, tcg_n, tcg_m);
   5869             } else {
   5870                 tcg_gen_sub_i64(tcg_d, tcg_n, tcg_m);
   5871             }
   5872         }
   5873         break;
   5874     case 2: /* UDIV */
   5875         handle_div(s, false, sf, rm, rn, rd);
   5876         break;
   5877     case 3: /* SDIV */
   5878         handle_div(s, true, sf, rm, rn, rd);
   5879         break;
   5880     case 4: /* IRG */
   5881         if (sf == 0 || !dc_isar_feature(aa64_mte_insn_reg, s)) {
   5882             goto do_unallocated;
   5883         }
   5884         if (s->ata) {
   5885             gen_helper_irg(cpu_reg_sp(s, rd), cpu_env,
   5886                            cpu_reg_sp(s, rn), cpu_reg(s, rm));
   5887         } else {
   5888             gen_address_with_allocation_tag0(cpu_reg_sp(s, rd),
   5889                                              cpu_reg_sp(s, rn));
   5890         }
   5891         break;
   5892     case 5: /* GMI */
   5893         if (sf == 0 || !dc_isar_feature(aa64_mte_insn_reg, s)) {
   5894             goto do_unallocated;
   5895         } else {
   5896             TCGv_i64 t = tcg_temp_new_i64();
   5897 
   5898             tcg_gen_extract_i64(t, cpu_reg_sp(s, rn), 56, 4);
   5899             tcg_gen_shl_i64(t, tcg_constant_i64(1), t);
   5900             tcg_gen_or_i64(cpu_reg(s, rd), cpu_reg(s, rm), t);
   5901 
   5902             tcg_temp_free_i64(t);
   5903         }
   5904         break;
   5905     case 8: /* LSLV */
   5906         handle_shift_reg(s, A64_SHIFT_TYPE_LSL, sf, rm, rn, rd);
   5907         break;
   5908     case 9: /* LSRV */
   5909         handle_shift_reg(s, A64_SHIFT_TYPE_LSR, sf, rm, rn, rd);
   5910         break;
   5911     case 10: /* ASRV */
   5912         handle_shift_reg(s, A64_SHIFT_TYPE_ASR, sf, rm, rn, rd);
   5913         break;
   5914     case 11: /* RORV */
   5915         handle_shift_reg(s, A64_SHIFT_TYPE_ROR, sf, rm, rn, rd);
   5916         break;
   5917     case 12: /* PACGA */
   5918         if (sf == 0 || !dc_isar_feature(aa64_pauth, s)) {
   5919             goto do_unallocated;
   5920         }
   5921         gen_helper_pacga(cpu_reg(s, rd), cpu_env,
   5922                          cpu_reg(s, rn), cpu_reg_sp(s, rm));
   5923         break;
   5924     case 16:
   5925     case 17:
   5926     case 18:
   5927     case 19:
   5928     case 20:
   5929     case 21:
   5930     case 22:
   5931     case 23: /* CRC32 */
   5932     {
   5933         int sz = extract32(opcode, 0, 2);
   5934         bool crc32c = extract32(opcode, 2, 1);
   5935         handle_crc32(s, sf, sz, crc32c, rm, rn, rd);
   5936         break;
   5937     }
   5938     default:
   5939     do_unallocated:
   5940         unallocated_encoding(s);
   5941         break;
   5942     }
   5943 }
   5944 
   5945 /*
   5946  * Data processing - register
   5947  *  31  30 29  28      25    21  20  16      10         0
   5948  * +--+---+--+---+-------+-----+-------+-------+---------+
   5949  * |  |op0|  |op1| 1 0 1 | op2 |       |  op3  |         |
   5950  * +--+---+--+---+-------+-----+-------+-------+---------+
   5951  */
   5952 static void disas_data_proc_reg(DisasContext *s, uint32_t insn)
   5953 {
   5954     int op0 = extract32(insn, 30, 1);
   5955     int op1 = extract32(insn, 28, 1);
   5956     int op2 = extract32(insn, 21, 4);
   5957     int op3 = extract32(insn, 10, 6);
   5958 
   5959     if (!op1) {
   5960         if (op2 & 8) {
   5961             if (op2 & 1) {
   5962                 /* Add/sub (extended register) */
   5963                 disas_add_sub_ext_reg(s, insn);
   5964             } else {
   5965                 /* Add/sub (shifted register) */
   5966                 disas_add_sub_reg(s, insn);
   5967             }
   5968         } else {
   5969             /* Logical (shifted register) */
   5970             disas_logic_reg(s, insn);
   5971         }
   5972         return;
   5973     }
   5974 
   5975     switch (op2) {
   5976     case 0x0:
   5977         switch (op3) {
   5978         case 0x00: /* Add/subtract (with carry) */
   5979             disas_adc_sbc(s, insn);
   5980             break;
   5981 
   5982         case 0x01: /* Rotate right into flags */
   5983         case 0x21:
   5984             disas_rotate_right_into_flags(s, insn);
   5985             break;
   5986 
   5987         case 0x02: /* Evaluate into flags */
   5988         case 0x12:
   5989         case 0x22:
   5990         case 0x32:
   5991             disas_evaluate_into_flags(s, insn);
   5992             break;
   5993 
   5994         default:
   5995             goto do_unallocated;
   5996         }
   5997         break;
   5998 
   5999     case 0x2: /* Conditional compare */
   6000         disas_cc(s, insn); /* both imm and reg forms */
   6001         break;
   6002 
   6003     case 0x4: /* Conditional select */
   6004         disas_cond_select(s, insn);
   6005         break;
   6006 
   6007     case 0x6: /* Data-processing */
   6008         if (op0) {    /* (1 source) */
   6009             disas_data_proc_1src(s, insn);
   6010         } else {      /* (2 source) */
   6011             disas_data_proc_2src(s, insn);
   6012         }
   6013         break;
   6014     case 0x8 ... 0xf: /* (3 source) */
   6015         disas_data_proc_3src(s, insn);
   6016         break;
   6017 
   6018     default:
   6019     do_unallocated:
   6020         unallocated_encoding(s);
   6021         break;
   6022     }
   6023 }
   6024 
   6025 static void handle_fp_compare(DisasContext *s, int size,
   6026                               unsigned int rn, unsigned int rm,
   6027                               bool cmp_with_zero, bool signal_all_nans)
   6028 {
   6029     TCGv_i64 tcg_flags = tcg_temp_new_i64();
   6030     TCGv_ptr fpst = fpstatus_ptr(size == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
   6031 
   6032     if (size == MO_64) {
   6033         TCGv_i64 tcg_vn, tcg_vm;
   6034 
   6035         tcg_vn = read_fp_dreg(s, rn);
   6036         if (cmp_with_zero) {
   6037             tcg_vm = tcg_constant_i64(0);
   6038         } else {
   6039             tcg_vm = read_fp_dreg(s, rm);
   6040         }
   6041         if (signal_all_nans) {
   6042             gen_helper_vfp_cmped_a64(tcg_flags, tcg_vn, tcg_vm, fpst);
   6043         } else {
   6044             gen_helper_vfp_cmpd_a64(tcg_flags, tcg_vn, tcg_vm, fpst);
   6045         }
   6046         tcg_temp_free_i64(tcg_vn);
   6047         tcg_temp_free_i64(tcg_vm);
   6048     } else {
   6049         TCGv_i32 tcg_vn = tcg_temp_new_i32();
   6050         TCGv_i32 tcg_vm = tcg_temp_new_i32();
   6051 
   6052         read_vec_element_i32(s, tcg_vn, rn, 0, size);
   6053         if (cmp_with_zero) {
   6054             tcg_gen_movi_i32(tcg_vm, 0);
   6055         } else {
   6056             read_vec_element_i32(s, tcg_vm, rm, 0, size);
   6057         }
   6058 
   6059         switch (size) {
   6060         case MO_32:
   6061             if (signal_all_nans) {
   6062                 gen_helper_vfp_cmpes_a64(tcg_flags, tcg_vn, tcg_vm, fpst);
   6063             } else {
   6064                 gen_helper_vfp_cmps_a64(tcg_flags, tcg_vn, tcg_vm, fpst);
   6065             }
   6066             break;
   6067         case MO_16:
   6068             if (signal_all_nans) {
   6069                 gen_helper_vfp_cmpeh_a64(tcg_flags, tcg_vn, tcg_vm, fpst);
   6070             } else {
   6071                 gen_helper_vfp_cmph_a64(tcg_flags, tcg_vn, tcg_vm, fpst);
   6072             }
   6073             break;
   6074         default:
   6075             g_assert_not_reached();
   6076         }
   6077 
   6078         tcg_temp_free_i32(tcg_vn);
   6079         tcg_temp_free_i32(tcg_vm);
   6080     }
   6081 
   6082     tcg_temp_free_ptr(fpst);
   6083 
   6084     gen_set_nzcv(tcg_flags);
   6085 
   6086     tcg_temp_free_i64(tcg_flags);
   6087 }
   6088 
   6089 /* Floating point compare
   6090  *   31  30  29 28       24 23  22  21 20  16 15 14 13  10    9    5 4     0
   6091  * +---+---+---+-----------+------+---+------+-----+---------+------+-------+
   6092  * | M | 0 | S | 1 1 1 1 0 | type | 1 |  Rm  | op  | 1 0 0 0 |  Rn  |  op2  |
   6093  * +---+---+---+-----------+------+---+------+-----+---------+------+-------+
   6094  */
   6095 static void disas_fp_compare(DisasContext *s, uint32_t insn)
   6096 {
   6097     unsigned int mos, type, rm, op, rn, opc, op2r;
   6098     int size;
   6099 
   6100     mos = extract32(insn, 29, 3);
   6101     type = extract32(insn, 22, 2);
   6102     rm = extract32(insn, 16, 5);
   6103     op = extract32(insn, 14, 2);
   6104     rn = extract32(insn, 5, 5);
   6105     opc = extract32(insn, 3, 2);
   6106     op2r = extract32(insn, 0, 3);
   6107 
   6108     if (mos || op || op2r) {
   6109         unallocated_encoding(s);
   6110         return;
   6111     }
   6112 
   6113     switch (type) {
   6114     case 0:
   6115         size = MO_32;
   6116         break;
   6117     case 1:
   6118         size = MO_64;
   6119         break;
   6120     case 3:
   6121         size = MO_16;
   6122         if (dc_isar_feature(aa64_fp16, s)) {
   6123             break;
   6124         }
   6125         /* fallthru */
   6126     default:
   6127         unallocated_encoding(s);
   6128         return;
   6129     }
   6130 
   6131     if (!fp_access_check(s)) {
   6132         return;
   6133     }
   6134 
   6135     handle_fp_compare(s, size, rn, rm, opc & 1, opc & 2);
   6136 }
   6137 
   6138 /* Floating point conditional compare
   6139  *   31  30  29 28       24 23  22  21 20  16 15  12 11 10 9    5  4   3    0
   6140  * +---+---+---+-----------+------+---+------+------+-----+------+----+------+
   6141  * | M | 0 | S | 1 1 1 1 0 | type | 1 |  Rm  | cond | 0 1 |  Rn  | op | nzcv |
   6142  * +---+---+---+-----------+------+---+------+------+-----+------+----+------+
   6143  */
   6144 static void disas_fp_ccomp(DisasContext *s, uint32_t insn)
   6145 {
   6146     unsigned int mos, type, rm, cond, rn, op, nzcv;
   6147     TCGLabel *label_continue = NULL;
   6148     int size;
   6149 
   6150     mos = extract32(insn, 29, 3);
   6151     type = extract32(insn, 22, 2);
   6152     rm = extract32(insn, 16, 5);
   6153     cond = extract32(insn, 12, 4);
   6154     rn = extract32(insn, 5, 5);
   6155     op = extract32(insn, 4, 1);
   6156     nzcv = extract32(insn, 0, 4);
   6157 
   6158     if (mos) {
   6159         unallocated_encoding(s);
   6160         return;
   6161     }
   6162 
   6163     switch (type) {
   6164     case 0:
   6165         size = MO_32;
   6166         break;
   6167     case 1:
   6168         size = MO_64;
   6169         break;
   6170     case 3:
   6171         size = MO_16;
   6172         if (dc_isar_feature(aa64_fp16, s)) {
   6173             break;
   6174         }
   6175         /* fallthru */
   6176     default:
   6177         unallocated_encoding(s);
   6178         return;
   6179     }
   6180 
   6181     if (!fp_access_check(s)) {
   6182         return;
   6183     }
   6184 
   6185     if (cond < 0x0e) { /* not always */
   6186         TCGLabel *label_match = gen_new_label();
   6187         label_continue = gen_new_label();
   6188         arm_gen_test_cc(cond, label_match);
   6189         /* nomatch: */
   6190         gen_set_nzcv(tcg_constant_i64(nzcv << 28));
   6191         tcg_gen_br(label_continue);
   6192         gen_set_label(label_match);
   6193     }
   6194 
   6195     handle_fp_compare(s, size, rn, rm, false, op);
   6196 
   6197     if (cond < 0x0e) {
   6198         gen_set_label(label_continue);
   6199     }
   6200 }
   6201 
   6202 /* Floating point conditional select
   6203  *   31  30  29 28       24 23  22  21 20  16 15  12 11 10 9    5 4    0
   6204  * +---+---+---+-----------+------+---+------+------+-----+------+------+
   6205  * | M | 0 | S | 1 1 1 1 0 | type | 1 |  Rm  | cond | 1 1 |  Rn  |  Rd  |
   6206  * +---+---+---+-----------+------+---+------+------+-----+------+------+
   6207  */
   6208 static void disas_fp_csel(DisasContext *s, uint32_t insn)
   6209 {
   6210     unsigned int mos, type, rm, cond, rn, rd;
   6211     TCGv_i64 t_true, t_false;
   6212     DisasCompare64 c;
   6213     MemOp sz;
   6214 
   6215     mos = extract32(insn, 29, 3);
   6216     type = extract32(insn, 22, 2);
   6217     rm = extract32(insn, 16, 5);
   6218     cond = extract32(insn, 12, 4);
   6219     rn = extract32(insn, 5, 5);
   6220     rd = extract32(insn, 0, 5);
   6221 
   6222     if (mos) {
   6223         unallocated_encoding(s);
   6224         return;
   6225     }
   6226 
   6227     switch (type) {
   6228     case 0:
   6229         sz = MO_32;
   6230         break;
   6231     case 1:
   6232         sz = MO_64;
   6233         break;
   6234     case 3:
   6235         sz = MO_16;
   6236         if (dc_isar_feature(aa64_fp16, s)) {
   6237             break;
   6238         }
   6239         /* fallthru */
   6240     default:
   6241         unallocated_encoding(s);
   6242         return;
   6243     }
   6244 
   6245     if (!fp_access_check(s)) {
   6246         return;
   6247     }
   6248 
   6249     /* Zero extend sreg & hreg inputs to 64 bits now.  */
   6250     t_true = tcg_temp_new_i64();
   6251     t_false = tcg_temp_new_i64();
   6252     read_vec_element(s, t_true, rn, 0, sz);
   6253     read_vec_element(s, t_false, rm, 0, sz);
   6254 
   6255     a64_test_cc(&c, cond);
   6256     tcg_gen_movcond_i64(c.cond, t_true, c.value, tcg_constant_i64(0),
   6257                         t_true, t_false);
   6258     tcg_temp_free_i64(t_false);
   6259     a64_free_cc(&c);
   6260 
   6261     /* Note that sregs & hregs write back zeros to the high bits,
   6262        and we've already done the zero-extension.  */
   6263     write_fp_dreg(s, rd, t_true);
   6264     tcg_temp_free_i64(t_true);
   6265 }
   6266 
   6267 /* Floating-point data-processing (1 source) - half precision */
   6268 static void handle_fp_1src_half(DisasContext *s, int opcode, int rd, int rn)
   6269 {
   6270     TCGv_ptr fpst = NULL;
   6271     TCGv_i32 tcg_op = read_fp_hreg(s, rn);
   6272     TCGv_i32 tcg_res = tcg_temp_new_i32();
   6273 
   6274     switch (opcode) {
   6275     case 0x0: /* FMOV */
   6276         tcg_gen_mov_i32(tcg_res, tcg_op);
   6277         break;
   6278     case 0x1: /* FABS */
   6279         tcg_gen_andi_i32(tcg_res, tcg_op, 0x7fff);
   6280         break;
   6281     case 0x2: /* FNEG */
   6282         tcg_gen_xori_i32(tcg_res, tcg_op, 0x8000);
   6283         break;
   6284     case 0x3: /* FSQRT */
   6285         fpst = fpstatus_ptr(FPST_FPCR_F16);
   6286         gen_helper_sqrt_f16(tcg_res, tcg_op, fpst);
   6287         break;
   6288     case 0x8: /* FRINTN */
   6289     case 0x9: /* FRINTP */
   6290     case 0xa: /* FRINTM */
   6291     case 0xb: /* FRINTZ */
   6292     case 0xc: /* FRINTA */
   6293     {
   6294         TCGv_i32 tcg_rmode = tcg_const_i32(arm_rmode_to_sf(opcode & 7));
   6295         fpst = fpstatus_ptr(FPST_FPCR_F16);
   6296 
   6297         gen_helper_set_rmode(tcg_rmode, tcg_rmode, fpst);
   6298         gen_helper_advsimd_rinth(tcg_res, tcg_op, fpst);
   6299 
   6300         gen_helper_set_rmode(tcg_rmode, tcg_rmode, fpst);
   6301         tcg_temp_free_i32(tcg_rmode);
   6302         break;
   6303     }
   6304     case 0xe: /* FRINTX */
   6305         fpst = fpstatus_ptr(FPST_FPCR_F16);
   6306         gen_helper_advsimd_rinth_exact(tcg_res, tcg_op, fpst);
   6307         break;
   6308     case 0xf: /* FRINTI */
   6309         fpst = fpstatus_ptr(FPST_FPCR_F16);
   6310         gen_helper_advsimd_rinth(tcg_res, tcg_op, fpst);
   6311         break;
   6312     default:
   6313         g_assert_not_reached();
   6314     }
   6315 
   6316     write_fp_sreg(s, rd, tcg_res);
   6317 
   6318     if (fpst) {
   6319         tcg_temp_free_ptr(fpst);
   6320     }
   6321     tcg_temp_free_i32(tcg_op);
   6322     tcg_temp_free_i32(tcg_res);
   6323 }
   6324 
   6325 /* Floating-point data-processing (1 source) - single precision */
   6326 static void handle_fp_1src_single(DisasContext *s, int opcode, int rd, int rn)
   6327 {
   6328     void (*gen_fpst)(TCGv_i32, TCGv_i32, TCGv_ptr);
   6329     TCGv_i32 tcg_op, tcg_res;
   6330     TCGv_ptr fpst;
   6331     int rmode = -1;
   6332 
   6333     tcg_op = read_fp_sreg(s, rn);
   6334     tcg_res = tcg_temp_new_i32();
   6335 
   6336     switch (opcode) {
   6337     case 0x0: /* FMOV */
   6338         tcg_gen_mov_i32(tcg_res, tcg_op);
   6339         goto done;
   6340     case 0x1: /* FABS */
   6341         gen_helper_vfp_abss(tcg_res, tcg_op);
   6342         goto done;
   6343     case 0x2: /* FNEG */
   6344         gen_helper_vfp_negs(tcg_res, tcg_op);
   6345         goto done;
   6346     case 0x3: /* FSQRT */
   6347         gen_helper_vfp_sqrts(tcg_res, tcg_op, cpu_env);
   6348         goto done;
   6349     case 0x6: /* BFCVT */
   6350         gen_fpst = gen_helper_bfcvt;
   6351         break;
   6352     case 0x8: /* FRINTN */
   6353     case 0x9: /* FRINTP */
   6354     case 0xa: /* FRINTM */
   6355     case 0xb: /* FRINTZ */
   6356     case 0xc: /* FRINTA */
   6357         rmode = arm_rmode_to_sf(opcode & 7);
   6358         gen_fpst = gen_helper_rints;
   6359         break;
   6360     case 0xe: /* FRINTX */
   6361         gen_fpst = gen_helper_rints_exact;
   6362         break;
   6363     case 0xf: /* FRINTI */
   6364         gen_fpst = gen_helper_rints;
   6365         break;
   6366     case 0x10: /* FRINT32Z */
   6367         rmode = float_round_to_zero;
   6368         gen_fpst = gen_helper_frint32_s;
   6369         break;
   6370     case 0x11: /* FRINT32X */
   6371         gen_fpst = gen_helper_frint32_s;
   6372         break;
   6373     case 0x12: /* FRINT64Z */
   6374         rmode = float_round_to_zero;
   6375         gen_fpst = gen_helper_frint64_s;
   6376         break;
   6377     case 0x13: /* FRINT64X */
   6378         gen_fpst = gen_helper_frint64_s;
   6379         break;
   6380     default:
   6381         g_assert_not_reached();
   6382     }
   6383 
   6384     fpst = fpstatus_ptr(FPST_FPCR);
   6385     if (rmode >= 0) {
   6386         TCGv_i32 tcg_rmode = tcg_const_i32(rmode);
   6387         gen_helper_set_rmode(tcg_rmode, tcg_rmode, fpst);
   6388         gen_fpst(tcg_res, tcg_op, fpst);
   6389         gen_helper_set_rmode(tcg_rmode, tcg_rmode, fpst);
   6390         tcg_temp_free_i32(tcg_rmode);
   6391     } else {
   6392         gen_fpst(tcg_res, tcg_op, fpst);
   6393     }
   6394     tcg_temp_free_ptr(fpst);
   6395 
   6396  done:
   6397     write_fp_sreg(s, rd, tcg_res);
   6398     tcg_temp_free_i32(tcg_op);
   6399     tcg_temp_free_i32(tcg_res);
   6400 }
   6401 
   6402 /* Floating-point data-processing (1 source) - double precision */
   6403 static void handle_fp_1src_double(DisasContext *s, int opcode, int rd, int rn)
   6404 {
   6405     void (*gen_fpst)(TCGv_i64, TCGv_i64, TCGv_ptr);
   6406     TCGv_i64 tcg_op, tcg_res;
   6407     TCGv_ptr fpst;
   6408     int rmode = -1;
   6409 
   6410     switch (opcode) {
   6411     case 0x0: /* FMOV */
   6412         gen_gvec_fn2(s, false, rd, rn, tcg_gen_gvec_mov, 0);
   6413         return;
   6414     }
   6415 
   6416     tcg_op = read_fp_dreg(s, rn);
   6417     tcg_res = tcg_temp_new_i64();
   6418 
   6419     switch (opcode) {
   6420     case 0x1: /* FABS */
   6421         gen_helper_vfp_absd(tcg_res, tcg_op);
   6422         goto done;
   6423     case 0x2: /* FNEG */
   6424         gen_helper_vfp_negd(tcg_res, tcg_op);
   6425         goto done;
   6426     case 0x3: /* FSQRT */
   6427         gen_helper_vfp_sqrtd(tcg_res, tcg_op, cpu_env);
   6428         goto done;
   6429     case 0x8: /* FRINTN */
   6430     case 0x9: /* FRINTP */
   6431     case 0xa: /* FRINTM */
   6432     case 0xb: /* FRINTZ */
   6433     case 0xc: /* FRINTA */
   6434         rmode = arm_rmode_to_sf(opcode & 7);
   6435         gen_fpst = gen_helper_rintd;
   6436         break;
   6437     case 0xe: /* FRINTX */
   6438         gen_fpst = gen_helper_rintd_exact;
   6439         break;
   6440     case 0xf: /* FRINTI */
   6441         gen_fpst = gen_helper_rintd;
   6442         break;
   6443     case 0x10: /* FRINT32Z */
   6444         rmode = float_round_to_zero;
   6445         gen_fpst = gen_helper_frint32_d;
   6446         break;
   6447     case 0x11: /* FRINT32X */
   6448         gen_fpst = gen_helper_frint32_d;
   6449         break;
   6450     case 0x12: /* FRINT64Z */
   6451         rmode = float_round_to_zero;
   6452         gen_fpst = gen_helper_frint64_d;
   6453         break;
   6454     case 0x13: /* FRINT64X */
   6455         gen_fpst = gen_helper_frint64_d;
   6456         break;
   6457     default:
   6458         g_assert_not_reached();
   6459     }
   6460 
   6461     fpst = fpstatus_ptr(FPST_FPCR);
   6462     if (rmode >= 0) {
   6463         TCGv_i32 tcg_rmode = tcg_const_i32(rmode);
   6464         gen_helper_set_rmode(tcg_rmode, tcg_rmode, fpst);
   6465         gen_fpst(tcg_res, tcg_op, fpst);
   6466         gen_helper_set_rmode(tcg_rmode, tcg_rmode, fpst);
   6467         tcg_temp_free_i32(tcg_rmode);
   6468     } else {
   6469         gen_fpst(tcg_res, tcg_op, fpst);
   6470     }
   6471     tcg_temp_free_ptr(fpst);
   6472 
   6473  done:
   6474     write_fp_dreg(s, rd, tcg_res);
   6475     tcg_temp_free_i64(tcg_op);
   6476     tcg_temp_free_i64(tcg_res);
   6477 }
   6478 
   6479 static void handle_fp_fcvt(DisasContext *s, int opcode,
   6480                            int rd, int rn, int dtype, int ntype)
   6481 {
   6482     switch (ntype) {
   6483     case 0x0:
   6484     {
   6485         TCGv_i32 tcg_rn = read_fp_sreg(s, rn);
   6486         if (dtype == 1) {
   6487             /* Single to double */
   6488             TCGv_i64 tcg_rd = tcg_temp_new_i64();
   6489             gen_helper_vfp_fcvtds(tcg_rd, tcg_rn, cpu_env);
   6490             write_fp_dreg(s, rd, tcg_rd);
   6491             tcg_temp_free_i64(tcg_rd);
   6492         } else {
   6493             /* Single to half */
   6494             TCGv_i32 tcg_rd = tcg_temp_new_i32();
   6495             TCGv_i32 ahp = get_ahp_flag();
   6496             TCGv_ptr fpst = fpstatus_ptr(FPST_FPCR);
   6497 
   6498             gen_helper_vfp_fcvt_f32_to_f16(tcg_rd, tcg_rn, fpst, ahp);
   6499             /* write_fp_sreg is OK here because top half of tcg_rd is zero */
   6500             write_fp_sreg(s, rd, tcg_rd);
   6501             tcg_temp_free_i32(tcg_rd);
   6502             tcg_temp_free_i32(ahp);
   6503             tcg_temp_free_ptr(fpst);
   6504         }
   6505         tcg_temp_free_i32(tcg_rn);
   6506         break;
   6507     }
   6508     case 0x1:
   6509     {
   6510         TCGv_i64 tcg_rn = read_fp_dreg(s, rn);
   6511         TCGv_i32 tcg_rd = tcg_temp_new_i32();
   6512         if (dtype == 0) {
   6513             /* Double to single */
   6514             gen_helper_vfp_fcvtsd(tcg_rd, tcg_rn, cpu_env);
   6515         } else {
   6516             TCGv_ptr fpst = fpstatus_ptr(FPST_FPCR);
   6517             TCGv_i32 ahp = get_ahp_flag();
   6518             /* Double to half */
   6519             gen_helper_vfp_fcvt_f64_to_f16(tcg_rd, tcg_rn, fpst, ahp);
   6520             /* write_fp_sreg is OK here because top half of tcg_rd is zero */
   6521             tcg_temp_free_ptr(fpst);
   6522             tcg_temp_free_i32(ahp);
   6523         }
   6524         write_fp_sreg(s, rd, tcg_rd);
   6525         tcg_temp_free_i32(tcg_rd);
   6526         tcg_temp_free_i64(tcg_rn);
   6527         break;
   6528     }
   6529     case 0x3:
   6530     {
   6531         TCGv_i32 tcg_rn = read_fp_sreg(s, rn);
   6532         TCGv_ptr tcg_fpst = fpstatus_ptr(FPST_FPCR);
   6533         TCGv_i32 tcg_ahp = get_ahp_flag();
   6534         tcg_gen_ext16u_i32(tcg_rn, tcg_rn);
   6535         if (dtype == 0) {
   6536             /* Half to single */
   6537             TCGv_i32 tcg_rd = tcg_temp_new_i32();
   6538             gen_helper_vfp_fcvt_f16_to_f32(tcg_rd, tcg_rn, tcg_fpst, tcg_ahp);
   6539             write_fp_sreg(s, rd, tcg_rd);
   6540             tcg_temp_free_i32(tcg_rd);
   6541         } else {
   6542             /* Half to double */
   6543             TCGv_i64 tcg_rd = tcg_temp_new_i64();
   6544             gen_helper_vfp_fcvt_f16_to_f64(tcg_rd, tcg_rn, tcg_fpst, tcg_ahp);
   6545             write_fp_dreg(s, rd, tcg_rd);
   6546             tcg_temp_free_i64(tcg_rd);
   6547         }
   6548         tcg_temp_free_i32(tcg_rn);
   6549         tcg_temp_free_ptr(tcg_fpst);
   6550         tcg_temp_free_i32(tcg_ahp);
   6551         break;
   6552     }
   6553     default:
   6554         g_assert_not_reached();
   6555     }
   6556 }
   6557 
   6558 /* Floating point data-processing (1 source)
   6559  *   31  30  29 28       24 23  22  21 20    15 14       10 9    5 4    0
   6560  * +---+---+---+-----------+------+---+--------+-----------+------+------+
   6561  * | M | 0 | S | 1 1 1 1 0 | type | 1 | opcode | 1 0 0 0 0 |  Rn  |  Rd  |
   6562  * +---+---+---+-----------+------+---+--------+-----------+------+------+
   6563  */
   6564 static void disas_fp_1src(DisasContext *s, uint32_t insn)
   6565 {
   6566     int mos = extract32(insn, 29, 3);
   6567     int type = extract32(insn, 22, 2);
   6568     int opcode = extract32(insn, 15, 6);
   6569     int rn = extract32(insn, 5, 5);
   6570     int rd = extract32(insn, 0, 5);
   6571 
   6572     if (mos) {
   6573         goto do_unallocated;
   6574     }
   6575 
   6576     switch (opcode) {
   6577     case 0x4: case 0x5: case 0x7:
   6578     {
   6579         /* FCVT between half, single and double precision */
   6580         int dtype = extract32(opcode, 0, 2);
   6581         if (type == 2 || dtype == type) {
   6582             goto do_unallocated;
   6583         }
   6584         if (!fp_access_check(s)) {
   6585             return;
   6586         }
   6587 
   6588         handle_fp_fcvt(s, opcode, rd, rn, dtype, type);
   6589         break;
   6590     }
   6591 
   6592     case 0x10 ... 0x13: /* FRINT{32,64}{X,Z} */
   6593         if (type > 1 || !dc_isar_feature(aa64_frint, s)) {
   6594             goto do_unallocated;
   6595         }
   6596         /* fall through */
   6597     case 0x0 ... 0x3:
   6598     case 0x8 ... 0xc:
   6599     case 0xe ... 0xf:
   6600         /* 32-to-32 and 64-to-64 ops */
   6601         switch (type) {
   6602         case 0:
   6603             if (!fp_access_check(s)) {
   6604                 return;
   6605             }
   6606             handle_fp_1src_single(s, opcode, rd, rn);
   6607             break;
   6608         case 1:
   6609             if (!fp_access_check(s)) {
   6610                 return;
   6611             }
   6612             handle_fp_1src_double(s, opcode, rd, rn);
   6613             break;
   6614         case 3:
   6615             if (!dc_isar_feature(aa64_fp16, s)) {
   6616                 goto do_unallocated;
   6617             }
   6618 
   6619             if (!fp_access_check(s)) {
   6620                 return;
   6621             }
   6622             handle_fp_1src_half(s, opcode, rd, rn);
   6623             break;
   6624         default:
   6625             goto do_unallocated;
   6626         }
   6627         break;
   6628 
   6629     case 0x6:
   6630         switch (type) {
   6631         case 1: /* BFCVT */
   6632             if (!dc_isar_feature(aa64_bf16, s)) {
   6633                 goto do_unallocated;
   6634             }
   6635             if (!fp_access_check(s)) {
   6636                 return;
   6637             }
   6638             handle_fp_1src_single(s, opcode, rd, rn);
   6639             break;
   6640         default:
   6641             goto do_unallocated;
   6642         }
   6643         break;
   6644 
   6645     default:
   6646     do_unallocated:
   6647         unallocated_encoding(s);
   6648         break;
   6649     }
   6650 }
   6651 
   6652 /* Floating-point data-processing (2 source) - single precision */
   6653 static void handle_fp_2src_single(DisasContext *s, int opcode,
   6654                                   int rd, int rn, int rm)
   6655 {
   6656     TCGv_i32 tcg_op1;
   6657     TCGv_i32 tcg_op2;
   6658     TCGv_i32 tcg_res;
   6659     TCGv_ptr fpst;
   6660 
   6661     tcg_res = tcg_temp_new_i32();
   6662     fpst = fpstatus_ptr(FPST_FPCR);
   6663     tcg_op1 = read_fp_sreg(s, rn);
   6664     tcg_op2 = read_fp_sreg(s, rm);
   6665 
   6666     switch (opcode) {
   6667     case 0x0: /* FMUL */
   6668         gen_helper_vfp_muls(tcg_res, tcg_op1, tcg_op2, fpst);
   6669         break;
   6670     case 0x1: /* FDIV */
   6671         gen_helper_vfp_divs(tcg_res, tcg_op1, tcg_op2, fpst);
   6672         break;
   6673     case 0x2: /* FADD */
   6674         gen_helper_vfp_adds(tcg_res, tcg_op1, tcg_op2, fpst);
   6675         break;
   6676     case 0x3: /* FSUB */
   6677         gen_helper_vfp_subs(tcg_res, tcg_op1, tcg_op2, fpst);
   6678         break;
   6679     case 0x4: /* FMAX */
   6680         gen_helper_vfp_maxs(tcg_res, tcg_op1, tcg_op2, fpst);
   6681         break;
   6682     case 0x5: /* FMIN */
   6683         gen_helper_vfp_mins(tcg_res, tcg_op1, tcg_op2, fpst);
   6684         break;
   6685     case 0x6: /* FMAXNM */
   6686         gen_helper_vfp_maxnums(tcg_res, tcg_op1, tcg_op2, fpst);
   6687         break;
   6688     case 0x7: /* FMINNM */
   6689         gen_helper_vfp_minnums(tcg_res, tcg_op1, tcg_op2, fpst);
   6690         break;
   6691     case 0x8: /* FNMUL */
   6692         gen_helper_vfp_muls(tcg_res, tcg_op1, tcg_op2, fpst);
   6693         gen_helper_vfp_negs(tcg_res, tcg_res);
   6694         break;
   6695     }
   6696 
   6697     write_fp_sreg(s, rd, tcg_res);
   6698 
   6699     tcg_temp_free_ptr(fpst);
   6700     tcg_temp_free_i32(tcg_op1);
   6701     tcg_temp_free_i32(tcg_op2);
   6702     tcg_temp_free_i32(tcg_res);
   6703 }
   6704 
   6705 /* Floating-point data-processing (2 source) - double precision */
   6706 static void handle_fp_2src_double(DisasContext *s, int opcode,
   6707                                   int rd, int rn, int rm)
   6708 {
   6709     TCGv_i64 tcg_op1;
   6710     TCGv_i64 tcg_op2;
   6711     TCGv_i64 tcg_res;
   6712     TCGv_ptr fpst;
   6713 
   6714     tcg_res = tcg_temp_new_i64();
   6715     fpst = fpstatus_ptr(FPST_FPCR);
   6716     tcg_op1 = read_fp_dreg(s, rn);
   6717     tcg_op2 = read_fp_dreg(s, rm);
   6718 
   6719     switch (opcode) {
   6720     case 0x0: /* FMUL */
   6721         gen_helper_vfp_muld(tcg_res, tcg_op1, tcg_op2, fpst);
   6722         break;
   6723     case 0x1: /* FDIV */
   6724         gen_helper_vfp_divd(tcg_res, tcg_op1, tcg_op2, fpst);
   6725         break;
   6726     case 0x2: /* FADD */
   6727         gen_helper_vfp_addd(tcg_res, tcg_op1, tcg_op2, fpst);
   6728         break;
   6729     case 0x3: /* FSUB */
   6730         gen_helper_vfp_subd(tcg_res, tcg_op1, tcg_op2, fpst);
   6731         break;
   6732     case 0x4: /* FMAX */
   6733         gen_helper_vfp_maxd(tcg_res, tcg_op1, tcg_op2, fpst);
   6734         break;
   6735     case 0x5: /* FMIN */
   6736         gen_helper_vfp_mind(tcg_res, tcg_op1, tcg_op2, fpst);
   6737         break;
   6738     case 0x6: /* FMAXNM */
   6739         gen_helper_vfp_maxnumd(tcg_res, tcg_op1, tcg_op2, fpst);
   6740         break;
   6741     case 0x7: /* FMINNM */
   6742         gen_helper_vfp_minnumd(tcg_res, tcg_op1, tcg_op2, fpst);
   6743         break;
   6744     case 0x8: /* FNMUL */
   6745         gen_helper_vfp_muld(tcg_res, tcg_op1, tcg_op2, fpst);
   6746         gen_helper_vfp_negd(tcg_res, tcg_res);
   6747         break;
   6748     }
   6749 
   6750     write_fp_dreg(s, rd, tcg_res);
   6751 
   6752     tcg_temp_free_ptr(fpst);
   6753     tcg_temp_free_i64(tcg_op1);
   6754     tcg_temp_free_i64(tcg_op2);
   6755     tcg_temp_free_i64(tcg_res);
   6756 }
   6757 
   6758 /* Floating-point data-processing (2 source) - half precision */
   6759 static void handle_fp_2src_half(DisasContext *s, int opcode,
   6760                                 int rd, int rn, int rm)
   6761 {
   6762     TCGv_i32 tcg_op1;
   6763     TCGv_i32 tcg_op2;
   6764     TCGv_i32 tcg_res;
   6765     TCGv_ptr fpst;
   6766 
   6767     tcg_res = tcg_temp_new_i32();
   6768     fpst = fpstatus_ptr(FPST_FPCR_F16);
   6769     tcg_op1 = read_fp_hreg(s, rn);
   6770     tcg_op2 = read_fp_hreg(s, rm);
   6771 
   6772     switch (opcode) {
   6773     case 0x0: /* FMUL */
   6774         gen_helper_advsimd_mulh(tcg_res, tcg_op1, tcg_op2, fpst);
   6775         break;
   6776     case 0x1: /* FDIV */
   6777         gen_helper_advsimd_divh(tcg_res, tcg_op1, tcg_op2, fpst);
   6778         break;
   6779     case 0x2: /* FADD */
   6780         gen_helper_advsimd_addh(tcg_res, tcg_op1, tcg_op2, fpst);
   6781         break;
   6782     case 0x3: /* FSUB */
   6783         gen_helper_advsimd_subh(tcg_res, tcg_op1, tcg_op2, fpst);
   6784         break;
   6785     case 0x4: /* FMAX */
   6786         gen_helper_advsimd_maxh(tcg_res, tcg_op1, tcg_op2, fpst);
   6787         break;
   6788     case 0x5: /* FMIN */
   6789         gen_helper_advsimd_minh(tcg_res, tcg_op1, tcg_op2, fpst);
   6790         break;
   6791     case 0x6: /* FMAXNM */
   6792         gen_helper_advsimd_maxnumh(tcg_res, tcg_op1, tcg_op2, fpst);
   6793         break;
   6794     case 0x7: /* FMINNM */
   6795         gen_helper_advsimd_minnumh(tcg_res, tcg_op1, tcg_op2, fpst);
   6796         break;
   6797     case 0x8: /* FNMUL */
   6798         gen_helper_advsimd_mulh(tcg_res, tcg_op1, tcg_op2, fpst);
   6799         tcg_gen_xori_i32(tcg_res, tcg_res, 0x8000);
   6800         break;
   6801     default:
   6802         g_assert_not_reached();
   6803     }
   6804 
   6805     write_fp_sreg(s, rd, tcg_res);
   6806 
   6807     tcg_temp_free_ptr(fpst);
   6808     tcg_temp_free_i32(tcg_op1);
   6809     tcg_temp_free_i32(tcg_op2);
   6810     tcg_temp_free_i32(tcg_res);
   6811 }
   6812 
   6813 /* Floating point data-processing (2 source)
   6814  *   31  30  29 28       24 23  22  21 20  16 15    12 11 10 9    5 4    0
   6815  * +---+---+---+-----------+------+---+------+--------+-----+------+------+
   6816  * | M | 0 | S | 1 1 1 1 0 | type | 1 |  Rm  | opcode | 1 0 |  Rn  |  Rd  |
   6817  * +---+---+---+-----------+------+---+------+--------+-----+------+------+
   6818  */
   6819 static void disas_fp_2src(DisasContext *s, uint32_t insn)
   6820 {
   6821     int mos = extract32(insn, 29, 3);
   6822     int type = extract32(insn, 22, 2);
   6823     int rd = extract32(insn, 0, 5);
   6824     int rn = extract32(insn, 5, 5);
   6825     int rm = extract32(insn, 16, 5);
   6826     int opcode = extract32(insn, 12, 4);
   6827 
   6828     if (opcode > 8 || mos) {
   6829         unallocated_encoding(s);
   6830         return;
   6831     }
   6832 
   6833     switch (type) {
   6834     case 0:
   6835         if (!fp_access_check(s)) {
   6836             return;
   6837         }
   6838         handle_fp_2src_single(s, opcode, rd, rn, rm);
   6839         break;
   6840     case 1:
   6841         if (!fp_access_check(s)) {
   6842             return;
   6843         }
   6844         handle_fp_2src_double(s, opcode, rd, rn, rm);
   6845         break;
   6846     case 3:
   6847         if (!dc_isar_feature(aa64_fp16, s)) {
   6848             unallocated_encoding(s);
   6849             return;
   6850         }
   6851         if (!fp_access_check(s)) {
   6852             return;
   6853         }
   6854         handle_fp_2src_half(s, opcode, rd, rn, rm);
   6855         break;
   6856     default:
   6857         unallocated_encoding(s);
   6858     }
   6859 }
   6860 
   6861 /* Floating-point data-processing (3 source) - single precision */
   6862 static void handle_fp_3src_single(DisasContext *s, bool o0, bool o1,
   6863                                   int rd, int rn, int rm, int ra)
   6864 {
   6865     TCGv_i32 tcg_op1, tcg_op2, tcg_op3;
   6866     TCGv_i32 tcg_res = tcg_temp_new_i32();
   6867     TCGv_ptr fpst = fpstatus_ptr(FPST_FPCR);
   6868 
   6869     tcg_op1 = read_fp_sreg(s, rn);
   6870     tcg_op2 = read_fp_sreg(s, rm);
   6871     tcg_op3 = read_fp_sreg(s, ra);
   6872 
   6873     /* These are fused multiply-add, and must be done as one
   6874      * floating point operation with no rounding between the
   6875      * multiplication and addition steps.
   6876      * NB that doing the negations here as separate steps is
   6877      * correct : an input NaN should come out with its sign bit
   6878      * flipped if it is a negated-input.
   6879      */
   6880     if (o1 == true) {
   6881         gen_helper_vfp_negs(tcg_op3, tcg_op3);
   6882     }
   6883 
   6884     if (o0 != o1) {
   6885         gen_helper_vfp_negs(tcg_op1, tcg_op1);
   6886     }
   6887 
   6888     gen_helper_vfp_muladds(tcg_res, tcg_op1, tcg_op2, tcg_op3, fpst);
   6889 
   6890     write_fp_sreg(s, rd, tcg_res);
   6891 
   6892     tcg_temp_free_ptr(fpst);
   6893     tcg_temp_free_i32(tcg_op1);
   6894     tcg_temp_free_i32(tcg_op2);
   6895     tcg_temp_free_i32(tcg_op3);
   6896     tcg_temp_free_i32(tcg_res);
   6897 }
   6898 
   6899 /* Floating-point data-processing (3 source) - double precision */
   6900 static void handle_fp_3src_double(DisasContext *s, bool o0, bool o1,
   6901                                   int rd, int rn, int rm, int ra)
   6902 {
   6903     TCGv_i64 tcg_op1, tcg_op2, tcg_op3;
   6904     TCGv_i64 tcg_res = tcg_temp_new_i64();
   6905     TCGv_ptr fpst = fpstatus_ptr(FPST_FPCR);
   6906 
   6907     tcg_op1 = read_fp_dreg(s, rn);
   6908     tcg_op2 = read_fp_dreg(s, rm);
   6909     tcg_op3 = read_fp_dreg(s, ra);
   6910 
   6911     /* These are fused multiply-add, and must be done as one
   6912      * floating point operation with no rounding between the
   6913      * multiplication and addition steps.
   6914      * NB that doing the negations here as separate steps is
   6915      * correct : an input NaN should come out with its sign bit
   6916      * flipped if it is a negated-input.
   6917      */
   6918     if (o1 == true) {
   6919         gen_helper_vfp_negd(tcg_op3, tcg_op3);
   6920     }
   6921 
   6922     if (o0 != o1) {
   6923         gen_helper_vfp_negd(tcg_op1, tcg_op1);
   6924     }
   6925 
   6926     gen_helper_vfp_muladdd(tcg_res, tcg_op1, tcg_op2, tcg_op3, fpst);
   6927 
   6928     write_fp_dreg(s, rd, tcg_res);
   6929 
   6930     tcg_temp_free_ptr(fpst);
   6931     tcg_temp_free_i64(tcg_op1);
   6932     tcg_temp_free_i64(tcg_op2);
   6933     tcg_temp_free_i64(tcg_op3);
   6934     tcg_temp_free_i64(tcg_res);
   6935 }
   6936 
   6937 /* Floating-point data-processing (3 source) - half precision */
   6938 static void handle_fp_3src_half(DisasContext *s, bool o0, bool o1,
   6939                                 int rd, int rn, int rm, int ra)
   6940 {
   6941     TCGv_i32 tcg_op1, tcg_op2, tcg_op3;
   6942     TCGv_i32 tcg_res = tcg_temp_new_i32();
   6943     TCGv_ptr fpst = fpstatus_ptr(FPST_FPCR_F16);
   6944 
   6945     tcg_op1 = read_fp_hreg(s, rn);
   6946     tcg_op2 = read_fp_hreg(s, rm);
   6947     tcg_op3 = read_fp_hreg(s, ra);
   6948 
   6949     /* These are fused multiply-add, and must be done as one
   6950      * floating point operation with no rounding between the
   6951      * multiplication and addition steps.
   6952      * NB that doing the negations here as separate steps is
   6953      * correct : an input NaN should come out with its sign bit
   6954      * flipped if it is a negated-input.
   6955      */
   6956     if (o1 == true) {
   6957         tcg_gen_xori_i32(tcg_op3, tcg_op3, 0x8000);
   6958     }
   6959 
   6960     if (o0 != o1) {
   6961         tcg_gen_xori_i32(tcg_op1, tcg_op1, 0x8000);
   6962     }
   6963 
   6964     gen_helper_advsimd_muladdh(tcg_res, tcg_op1, tcg_op2, tcg_op3, fpst);
   6965 
   6966     write_fp_sreg(s, rd, tcg_res);
   6967 
   6968     tcg_temp_free_ptr(fpst);
   6969     tcg_temp_free_i32(tcg_op1);
   6970     tcg_temp_free_i32(tcg_op2);
   6971     tcg_temp_free_i32(tcg_op3);
   6972     tcg_temp_free_i32(tcg_res);
   6973 }
   6974 
   6975 /* Floating point data-processing (3 source)
   6976  *   31  30  29 28       24 23  22  21  20  16  15  14  10 9    5 4    0
   6977  * +---+---+---+-----------+------+----+------+----+------+------+------+
   6978  * | M | 0 | S | 1 1 1 1 1 | type | o1 |  Rm  | o0 |  Ra  |  Rn  |  Rd  |
   6979  * +---+---+---+-----------+------+----+------+----+------+------+------+
   6980  */
   6981 static void disas_fp_3src(DisasContext *s, uint32_t insn)
   6982 {
   6983     int mos = extract32(insn, 29, 3);
   6984     int type = extract32(insn, 22, 2);
   6985     int rd = extract32(insn, 0, 5);
   6986     int rn = extract32(insn, 5, 5);
   6987     int ra = extract32(insn, 10, 5);
   6988     int rm = extract32(insn, 16, 5);
   6989     bool o0 = extract32(insn, 15, 1);
   6990     bool o1 = extract32(insn, 21, 1);
   6991 
   6992     if (mos) {
   6993         unallocated_encoding(s);
   6994         return;
   6995     }
   6996 
   6997     switch (type) {
   6998     case 0:
   6999         if (!fp_access_check(s)) {
   7000             return;
   7001         }
   7002         handle_fp_3src_single(s, o0, o1, rd, rn, rm, ra);
   7003         break;
   7004     case 1:
   7005         if (!fp_access_check(s)) {
   7006             return;
   7007         }
   7008         handle_fp_3src_double(s, o0, o1, rd, rn, rm, ra);
   7009         break;
   7010     case 3:
   7011         if (!dc_isar_feature(aa64_fp16, s)) {
   7012             unallocated_encoding(s);
   7013             return;
   7014         }
   7015         if (!fp_access_check(s)) {
   7016             return;
   7017         }
   7018         handle_fp_3src_half(s, o0, o1, rd, rn, rm, ra);
   7019         break;
   7020     default:
   7021         unallocated_encoding(s);
   7022     }
   7023 }
   7024 
   7025 /* Floating point immediate
   7026  *   31  30  29 28       24 23  22  21 20        13 12   10 9    5 4    0
   7027  * +---+---+---+-----------+------+---+------------+-------+------+------+
   7028  * | M | 0 | S | 1 1 1 1 0 | type | 1 |    imm8    | 1 0 0 | imm5 |  Rd  |
   7029  * +---+---+---+-----------+------+---+------------+-------+------+------+
   7030  */
   7031 static void disas_fp_imm(DisasContext *s, uint32_t insn)
   7032 {
   7033     int rd = extract32(insn, 0, 5);
   7034     int imm5 = extract32(insn, 5, 5);
   7035     int imm8 = extract32(insn, 13, 8);
   7036     int type = extract32(insn, 22, 2);
   7037     int mos = extract32(insn, 29, 3);
   7038     uint64_t imm;
   7039     MemOp sz;
   7040 
   7041     if (mos || imm5) {
   7042         unallocated_encoding(s);
   7043         return;
   7044     }
   7045 
   7046     switch (type) {
   7047     case 0:
   7048         sz = MO_32;
   7049         break;
   7050     case 1:
   7051         sz = MO_64;
   7052         break;
   7053     case 3:
   7054         sz = MO_16;
   7055         if (dc_isar_feature(aa64_fp16, s)) {
   7056             break;
   7057         }
   7058         /* fallthru */
   7059     default:
   7060         unallocated_encoding(s);
   7061         return;
   7062     }
   7063 
   7064     if (!fp_access_check(s)) {
   7065         return;
   7066     }
   7067 
   7068     imm = vfp_expand_imm(sz, imm8);
   7069     write_fp_dreg(s, rd, tcg_constant_i64(imm));
   7070 }
   7071 
   7072 /* Handle floating point <=> fixed point conversions. Note that we can
   7073  * also deal with fp <=> integer conversions as a special case (scale == 64)
   7074  * OPTME: consider handling that special case specially or at least skipping
   7075  * the call to scalbn in the helpers for zero shifts.
   7076  */
   7077 static void handle_fpfpcvt(DisasContext *s, int rd, int rn, int opcode,
   7078                            bool itof, int rmode, int scale, int sf, int type)
   7079 {
   7080     bool is_signed = !(opcode & 1);
   7081     TCGv_ptr tcg_fpstatus;
   7082     TCGv_i32 tcg_shift, tcg_single;
   7083     TCGv_i64 tcg_double;
   7084 
   7085     tcg_fpstatus = fpstatus_ptr(type == 3 ? FPST_FPCR_F16 : FPST_FPCR);
   7086 
   7087     tcg_shift = tcg_constant_i32(64 - scale);
   7088 
   7089     if (itof) {
   7090         TCGv_i64 tcg_int = cpu_reg(s, rn);
   7091         if (!sf) {
   7092             TCGv_i64 tcg_extend = new_tmp_a64(s);
   7093 
   7094             if (is_signed) {
   7095                 tcg_gen_ext32s_i64(tcg_extend, tcg_int);
   7096             } else {
   7097                 tcg_gen_ext32u_i64(tcg_extend, tcg_int);
   7098             }
   7099 
   7100             tcg_int = tcg_extend;
   7101         }
   7102 
   7103         switch (type) {
   7104         case 1: /* float64 */
   7105             tcg_double = tcg_temp_new_i64();
   7106             if (is_signed) {
   7107                 gen_helper_vfp_sqtod(tcg_double, tcg_int,
   7108                                      tcg_shift, tcg_fpstatus);
   7109             } else {
   7110                 gen_helper_vfp_uqtod(tcg_double, tcg_int,
   7111                                      tcg_shift, tcg_fpstatus);
   7112             }
   7113             write_fp_dreg(s, rd, tcg_double);
   7114             tcg_temp_free_i64(tcg_double);
   7115             break;
   7116 
   7117         case 0: /* float32 */
   7118             tcg_single = tcg_temp_new_i32();
   7119             if (is_signed) {
   7120                 gen_helper_vfp_sqtos(tcg_single, tcg_int,
   7121                                      tcg_shift, tcg_fpstatus);
   7122             } else {
   7123                 gen_helper_vfp_uqtos(tcg_single, tcg_int,
   7124                                      tcg_shift, tcg_fpstatus);
   7125             }
   7126             write_fp_sreg(s, rd, tcg_single);
   7127             tcg_temp_free_i32(tcg_single);
   7128             break;
   7129 
   7130         case 3: /* float16 */
   7131             tcg_single = tcg_temp_new_i32();
   7132             if (is_signed) {
   7133                 gen_helper_vfp_sqtoh(tcg_single, tcg_int,
   7134                                      tcg_shift, tcg_fpstatus);
   7135             } else {
   7136                 gen_helper_vfp_uqtoh(tcg_single, tcg_int,
   7137                                      tcg_shift, tcg_fpstatus);
   7138             }
   7139             write_fp_sreg(s, rd, tcg_single);
   7140             tcg_temp_free_i32(tcg_single);
   7141             break;
   7142 
   7143         default:
   7144             g_assert_not_reached();
   7145         }
   7146     } else {
   7147         TCGv_i64 tcg_int = cpu_reg(s, rd);
   7148         TCGv_i32 tcg_rmode;
   7149 
   7150         if (extract32(opcode, 2, 1)) {
   7151             /* There are too many rounding modes to all fit into rmode,
   7152              * so FCVTA[US] is a special case.
   7153              */
   7154             rmode = FPROUNDING_TIEAWAY;
   7155         }
   7156 
   7157         tcg_rmode = tcg_const_i32(arm_rmode_to_sf(rmode));
   7158 
   7159         gen_helper_set_rmode(tcg_rmode, tcg_rmode, tcg_fpstatus);
   7160 
   7161         switch (type) {
   7162         case 1: /* float64 */
   7163             tcg_double = read_fp_dreg(s, rn);
   7164             if (is_signed) {
   7165                 if (!sf) {
   7166                     gen_helper_vfp_tosld(tcg_int, tcg_double,
   7167                                          tcg_shift, tcg_fpstatus);
   7168                 } else {
   7169                     gen_helper_vfp_tosqd(tcg_int, tcg_double,
   7170                                          tcg_shift, tcg_fpstatus);
   7171                 }
   7172             } else {
   7173                 if (!sf) {
   7174                     gen_helper_vfp_tould(tcg_int, tcg_double,
   7175                                          tcg_shift, tcg_fpstatus);
   7176                 } else {
   7177                     gen_helper_vfp_touqd(tcg_int, tcg_double,
   7178                                          tcg_shift, tcg_fpstatus);
   7179                 }
   7180             }
   7181             if (!sf) {
   7182                 tcg_gen_ext32u_i64(tcg_int, tcg_int);
   7183             }
   7184             tcg_temp_free_i64(tcg_double);
   7185             break;
   7186 
   7187         case 0: /* float32 */
   7188             tcg_single = read_fp_sreg(s, rn);
   7189             if (sf) {
   7190                 if (is_signed) {
   7191                     gen_helper_vfp_tosqs(tcg_int, tcg_single,
   7192                                          tcg_shift, tcg_fpstatus);
   7193                 } else {
   7194                     gen_helper_vfp_touqs(tcg_int, tcg_single,
   7195                                          tcg_shift, tcg_fpstatus);
   7196                 }
   7197             } else {
   7198                 TCGv_i32 tcg_dest = tcg_temp_new_i32();
   7199                 if (is_signed) {
   7200                     gen_helper_vfp_tosls(tcg_dest, tcg_single,
   7201                                          tcg_shift, tcg_fpstatus);
   7202                 } else {
   7203                     gen_helper_vfp_touls(tcg_dest, tcg_single,
   7204                                          tcg_shift, tcg_fpstatus);
   7205                 }
   7206                 tcg_gen_extu_i32_i64(tcg_int, tcg_dest);
   7207                 tcg_temp_free_i32(tcg_dest);
   7208             }
   7209             tcg_temp_free_i32(tcg_single);
   7210             break;
   7211 
   7212         case 3: /* float16 */
   7213             tcg_single = read_fp_sreg(s, rn);
   7214             if (sf) {
   7215                 if (is_signed) {
   7216                     gen_helper_vfp_tosqh(tcg_int, tcg_single,
   7217                                          tcg_shift, tcg_fpstatus);
   7218                 } else {
   7219                     gen_helper_vfp_touqh(tcg_int, tcg_single,
   7220                                          tcg_shift, tcg_fpstatus);
   7221                 }
   7222             } else {
   7223                 TCGv_i32 tcg_dest = tcg_temp_new_i32();
   7224                 if (is_signed) {
   7225                     gen_helper_vfp_toslh(tcg_dest, tcg_single,
   7226                                          tcg_shift, tcg_fpstatus);
   7227                 } else {
   7228                     gen_helper_vfp_toulh(tcg_dest, tcg_single,
   7229                                          tcg_shift, tcg_fpstatus);
   7230                 }
   7231                 tcg_gen_extu_i32_i64(tcg_int, tcg_dest);
   7232                 tcg_temp_free_i32(tcg_dest);
   7233             }
   7234             tcg_temp_free_i32(tcg_single);
   7235             break;
   7236 
   7237         default:
   7238             g_assert_not_reached();
   7239         }
   7240 
   7241         gen_helper_set_rmode(tcg_rmode, tcg_rmode, tcg_fpstatus);
   7242         tcg_temp_free_i32(tcg_rmode);
   7243     }
   7244 
   7245     tcg_temp_free_ptr(tcg_fpstatus);
   7246 }
   7247 
   7248 /* Floating point <-> fixed point conversions
   7249  *   31   30  29 28       24 23  22  21 20   19 18    16 15   10 9    5 4    0
   7250  * +----+---+---+-----------+------+---+-------+--------+-------+------+------+
   7251  * | sf | 0 | S | 1 1 1 1 0 | type | 0 | rmode | opcode | scale |  Rn  |  Rd  |
   7252  * +----+---+---+-----------+------+---+-------+--------+-------+------+------+
   7253  */
   7254 static void disas_fp_fixed_conv(DisasContext *s, uint32_t insn)
   7255 {
   7256     int rd = extract32(insn, 0, 5);
   7257     int rn = extract32(insn, 5, 5);
   7258     int scale = extract32(insn, 10, 6);
   7259     int opcode = extract32(insn, 16, 3);
   7260     int rmode = extract32(insn, 19, 2);
   7261     int type = extract32(insn, 22, 2);
   7262     bool sbit = extract32(insn, 29, 1);
   7263     bool sf = extract32(insn, 31, 1);
   7264     bool itof;
   7265 
   7266     if (sbit || (!sf && scale < 32)) {
   7267         unallocated_encoding(s);
   7268         return;
   7269     }
   7270 
   7271     switch (type) {
   7272     case 0: /* float32 */
   7273     case 1: /* float64 */
   7274         break;
   7275     case 3: /* float16 */
   7276         if (dc_isar_feature(aa64_fp16, s)) {
   7277             break;
   7278         }
   7279         /* fallthru */
   7280     default:
   7281         unallocated_encoding(s);
   7282         return;
   7283     }
   7284 
   7285     switch ((rmode << 3) | opcode) {
   7286     case 0x2: /* SCVTF */
   7287     case 0x3: /* UCVTF */
   7288         itof = true;
   7289         break;
   7290     case 0x18: /* FCVTZS */
   7291     case 0x19: /* FCVTZU */
   7292         itof = false;
   7293         break;
   7294     default:
   7295         unallocated_encoding(s);
   7296         return;
   7297     }
   7298 
   7299     if (!fp_access_check(s)) {
   7300         return;
   7301     }
   7302 
   7303     handle_fpfpcvt(s, rd, rn, opcode, itof, FPROUNDING_ZERO, scale, sf, type);
   7304 }
   7305 
   7306 static void handle_fmov(DisasContext *s, int rd, int rn, int type, bool itof)
   7307 {
   7308     /* FMOV: gpr to or from float, double, or top half of quad fp reg,
   7309      * without conversion.
   7310      */
   7311 
   7312     if (itof) {
   7313         TCGv_i64 tcg_rn = cpu_reg(s, rn);
   7314         TCGv_i64 tmp;
   7315 
   7316         switch (type) {
   7317         case 0:
   7318             /* 32 bit */
   7319             tmp = tcg_temp_new_i64();
   7320             tcg_gen_ext32u_i64(tmp, tcg_rn);
   7321             write_fp_dreg(s, rd, tmp);
   7322             tcg_temp_free_i64(tmp);
   7323             break;
   7324         case 1:
   7325             /* 64 bit */
   7326             write_fp_dreg(s, rd, tcg_rn);
   7327             break;
   7328         case 2:
   7329             /* 64 bit to top half. */
   7330             tcg_gen_st_i64(tcg_rn, cpu_env, fp_reg_hi_offset(s, rd));
   7331             clear_vec_high(s, true, rd);
   7332             break;
   7333         case 3:
   7334             /* 16 bit */
   7335             tmp = tcg_temp_new_i64();
   7336             tcg_gen_ext16u_i64(tmp, tcg_rn);
   7337             write_fp_dreg(s, rd, tmp);
   7338             tcg_temp_free_i64(tmp);
   7339             break;
   7340         default:
   7341             g_assert_not_reached();
   7342         }
   7343     } else {
   7344         TCGv_i64 tcg_rd = cpu_reg(s, rd);
   7345 
   7346         switch (type) {
   7347         case 0:
   7348             /* 32 bit */
   7349             tcg_gen_ld32u_i64(tcg_rd, cpu_env, fp_reg_offset(s, rn, MO_32));
   7350             break;
   7351         case 1:
   7352             /* 64 bit */
   7353             tcg_gen_ld_i64(tcg_rd, cpu_env, fp_reg_offset(s, rn, MO_64));
   7354             break;
   7355         case 2:
   7356             /* 64 bits from top half */
   7357             tcg_gen_ld_i64(tcg_rd, cpu_env, fp_reg_hi_offset(s, rn));
   7358             break;
   7359         case 3:
   7360             /* 16 bit */
   7361             tcg_gen_ld16u_i64(tcg_rd, cpu_env, fp_reg_offset(s, rn, MO_16));
   7362             break;
   7363         default:
   7364             g_assert_not_reached();
   7365         }
   7366     }
   7367 }
   7368 
   7369 static void handle_fjcvtzs(DisasContext *s, int rd, int rn)
   7370 {
   7371     TCGv_i64 t = read_fp_dreg(s, rn);
   7372     TCGv_ptr fpstatus = fpstatus_ptr(FPST_FPCR);
   7373 
   7374     gen_helper_fjcvtzs(t, t, fpstatus);
   7375 
   7376     tcg_temp_free_ptr(fpstatus);
   7377 
   7378     tcg_gen_ext32u_i64(cpu_reg(s, rd), t);
   7379     tcg_gen_extrh_i64_i32(cpu_ZF, t);
   7380     tcg_gen_movi_i32(cpu_CF, 0);
   7381     tcg_gen_movi_i32(cpu_NF, 0);
   7382     tcg_gen_movi_i32(cpu_VF, 0);
   7383 
   7384     tcg_temp_free_i64(t);
   7385 }
   7386 
   7387 /* Floating point <-> integer conversions
   7388  *   31   30  29 28       24 23  22  21 20   19 18 16 15         10 9  5 4  0
   7389  * +----+---+---+-----------+------+---+-------+-----+-------------+----+----+
   7390  * | sf | 0 | S | 1 1 1 1 0 | type | 1 | rmode | opc | 0 0 0 0 0 0 | Rn | Rd |
   7391  * +----+---+---+-----------+------+---+-------+-----+-------------+----+----+
   7392  */
   7393 static void disas_fp_int_conv(DisasContext *s, uint32_t insn)
   7394 {
   7395     int rd = extract32(insn, 0, 5);
   7396     int rn = extract32(insn, 5, 5);
   7397     int opcode = extract32(insn, 16, 3);
   7398     int rmode = extract32(insn, 19, 2);
   7399     int type = extract32(insn, 22, 2);
   7400     bool sbit = extract32(insn, 29, 1);
   7401     bool sf = extract32(insn, 31, 1);
   7402     bool itof = false;
   7403 
   7404     if (sbit) {
   7405         goto do_unallocated;
   7406     }
   7407 
   7408     switch (opcode) {
   7409     case 2: /* SCVTF */
   7410     case 3: /* UCVTF */
   7411         itof = true;
   7412         /* fallthru */
   7413     case 4: /* FCVTAS */
   7414     case 5: /* FCVTAU */
   7415         if (rmode != 0) {
   7416             goto do_unallocated;
   7417         }
   7418         /* fallthru */
   7419     case 0: /* FCVT[NPMZ]S */
   7420     case 1: /* FCVT[NPMZ]U */
   7421         switch (type) {
   7422         case 0: /* float32 */
   7423         case 1: /* float64 */
   7424             break;
   7425         case 3: /* float16 */
   7426             if (!dc_isar_feature(aa64_fp16, s)) {
   7427                 goto do_unallocated;
   7428             }
   7429             break;
   7430         default:
   7431             goto do_unallocated;
   7432         }
   7433         if (!fp_access_check(s)) {
   7434             return;
   7435         }
   7436         handle_fpfpcvt(s, rd, rn, opcode, itof, rmode, 64, sf, type);
   7437         break;
   7438 
   7439     default:
   7440         switch (sf << 7 | type << 5 | rmode << 3 | opcode) {
   7441         case 0b01100110: /* FMOV half <-> 32-bit int */
   7442         case 0b01100111:
   7443         case 0b11100110: /* FMOV half <-> 64-bit int */
   7444         case 0b11100111:
   7445             if (!dc_isar_feature(aa64_fp16, s)) {
   7446                 goto do_unallocated;
   7447             }
   7448             /* fallthru */
   7449         case 0b00000110: /* FMOV 32-bit */
   7450         case 0b00000111:
   7451         case 0b10100110: /* FMOV 64-bit */
   7452         case 0b10100111:
   7453         case 0b11001110: /* FMOV top half of 128-bit */
   7454         case 0b11001111:
   7455             if (!fp_access_check(s)) {
   7456                 return;
   7457             }
   7458             itof = opcode & 1;
   7459             handle_fmov(s, rd, rn, type, itof);
   7460             break;
   7461 
   7462         case 0b00111110: /* FJCVTZS */
   7463             if (!dc_isar_feature(aa64_jscvt, s)) {
   7464                 goto do_unallocated;
   7465             } else if (fp_access_check(s)) {
   7466                 handle_fjcvtzs(s, rd, rn);
   7467             }
   7468             break;
   7469 
   7470         default:
   7471         do_unallocated:
   7472             unallocated_encoding(s);
   7473             return;
   7474         }
   7475         break;
   7476     }
   7477 }
   7478 
   7479 /* FP-specific subcases of table C3-6 (SIMD and FP data processing)
   7480  *   31  30  29 28     25 24                          0
   7481  * +---+---+---+---------+-----------------------------+
   7482  * |   | 0 |   | 1 1 1 1 |                             |
   7483  * +---+---+---+---------+-----------------------------+
   7484  */
   7485 static void disas_data_proc_fp(DisasContext *s, uint32_t insn)
   7486 {
   7487     if (extract32(insn, 24, 1)) {
   7488         /* Floating point data-processing (3 source) */
   7489         disas_fp_3src(s, insn);
   7490     } else if (extract32(insn, 21, 1) == 0) {
   7491         /* Floating point to fixed point conversions */
   7492         disas_fp_fixed_conv(s, insn);
   7493     } else {
   7494         switch (extract32(insn, 10, 2)) {
   7495         case 1:
   7496             /* Floating point conditional compare */
   7497             disas_fp_ccomp(s, insn);
   7498             break;
   7499         case 2:
   7500             /* Floating point data-processing (2 source) */
   7501             disas_fp_2src(s, insn);
   7502             break;
   7503         case 3:
   7504             /* Floating point conditional select */
   7505             disas_fp_csel(s, insn);
   7506             break;
   7507         case 0:
   7508             switch (ctz32(extract32(insn, 12, 4))) {
   7509             case 0: /* [15:12] == xxx1 */
   7510                 /* Floating point immediate */
   7511                 disas_fp_imm(s, insn);
   7512                 break;
   7513             case 1: /* [15:12] == xx10 */
   7514                 /* Floating point compare */
   7515                 disas_fp_compare(s, insn);
   7516                 break;
   7517             case 2: /* [15:12] == x100 */
   7518                 /* Floating point data-processing (1 source) */
   7519                 disas_fp_1src(s, insn);
   7520                 break;
   7521             case 3: /* [15:12] == 1000 */
   7522                 unallocated_encoding(s);
   7523                 break;
   7524             default: /* [15:12] == 0000 */
   7525                 /* Floating point <-> integer conversions */
   7526                 disas_fp_int_conv(s, insn);
   7527                 break;
   7528             }
   7529             break;
   7530         }
   7531     }
   7532 }
   7533 
   7534 static void do_ext64(DisasContext *s, TCGv_i64 tcg_left, TCGv_i64 tcg_right,
   7535                      int pos)
   7536 {
   7537     /* Extract 64 bits from the middle of two concatenated 64 bit
   7538      * vector register slices left:right. The extracted bits start
   7539      * at 'pos' bits into the right (least significant) side.
   7540      * We return the result in tcg_right, and guarantee not to
   7541      * trash tcg_left.
   7542      */
   7543     TCGv_i64 tcg_tmp = tcg_temp_new_i64();
   7544     assert(pos > 0 && pos < 64);
   7545 
   7546     tcg_gen_shri_i64(tcg_right, tcg_right, pos);
   7547     tcg_gen_shli_i64(tcg_tmp, tcg_left, 64 - pos);
   7548     tcg_gen_or_i64(tcg_right, tcg_right, tcg_tmp);
   7549 
   7550     tcg_temp_free_i64(tcg_tmp);
   7551 }
   7552 
   7553 /* EXT
   7554  *   31  30 29         24 23 22  21 20  16 15  14  11 10  9    5 4    0
   7555  * +---+---+-------------+-----+---+------+---+------+---+------+------+
   7556  * | 0 | Q | 1 0 1 1 1 0 | op2 | 0 |  Rm  | 0 | imm4 | 0 |  Rn  |  Rd  |
   7557  * +---+---+-------------+-----+---+------+---+------+---+------+------+
   7558  */
   7559 static void disas_simd_ext(DisasContext *s, uint32_t insn)
   7560 {
   7561     int is_q = extract32(insn, 30, 1);
   7562     int op2 = extract32(insn, 22, 2);
   7563     int imm4 = extract32(insn, 11, 4);
   7564     int rm = extract32(insn, 16, 5);
   7565     int rn = extract32(insn, 5, 5);
   7566     int rd = extract32(insn, 0, 5);
   7567     int pos = imm4 << 3;
   7568     TCGv_i64 tcg_resl, tcg_resh;
   7569 
   7570     if (op2 != 0 || (!is_q && extract32(imm4, 3, 1))) {
   7571         unallocated_encoding(s);
   7572         return;
   7573     }
   7574 
   7575     if (!fp_access_check(s)) {
   7576         return;
   7577     }
   7578 
   7579     tcg_resh = tcg_temp_new_i64();
   7580     tcg_resl = tcg_temp_new_i64();
   7581 
   7582     /* Vd gets bits starting at pos bits into Vm:Vn. This is
   7583      * either extracting 128 bits from a 128:128 concatenation, or
   7584      * extracting 64 bits from a 64:64 concatenation.
   7585      */
   7586     if (!is_q) {
   7587         read_vec_element(s, tcg_resl, rn, 0, MO_64);
   7588         if (pos != 0) {
   7589             read_vec_element(s, tcg_resh, rm, 0, MO_64);
   7590             do_ext64(s, tcg_resh, tcg_resl, pos);
   7591         }
   7592     } else {
   7593         TCGv_i64 tcg_hh;
   7594         typedef struct {
   7595             int reg;
   7596             int elt;
   7597         } EltPosns;
   7598         EltPosns eltposns[] = { {rn, 0}, {rn, 1}, {rm, 0}, {rm, 1} };
   7599         EltPosns *elt = eltposns;
   7600 
   7601         if (pos >= 64) {
   7602             elt++;
   7603             pos -= 64;
   7604         }
   7605 
   7606         read_vec_element(s, tcg_resl, elt->reg, elt->elt, MO_64);
   7607         elt++;
   7608         read_vec_element(s, tcg_resh, elt->reg, elt->elt, MO_64);
   7609         elt++;
   7610         if (pos != 0) {
   7611             do_ext64(s, tcg_resh, tcg_resl, pos);
   7612             tcg_hh = tcg_temp_new_i64();
   7613             read_vec_element(s, tcg_hh, elt->reg, elt->elt, MO_64);
   7614             do_ext64(s, tcg_hh, tcg_resh, pos);
   7615             tcg_temp_free_i64(tcg_hh);
   7616         }
   7617     }
   7618 
   7619     write_vec_element(s, tcg_resl, rd, 0, MO_64);
   7620     tcg_temp_free_i64(tcg_resl);
   7621     if (is_q) {
   7622         write_vec_element(s, tcg_resh, rd, 1, MO_64);
   7623     }
   7624     tcg_temp_free_i64(tcg_resh);
   7625     clear_vec_high(s, is_q, rd);
   7626 }
   7627 
   7628 /* TBL/TBX
   7629  *   31  30 29         24 23 22  21 20  16 15  14 13  12  11 10 9    5 4    0
   7630  * +---+---+-------------+-----+---+------+---+-----+----+-----+------+------+
   7631  * | 0 | Q | 0 0 1 1 1 0 | op2 | 0 |  Rm  | 0 | len | op | 0 0 |  Rn  |  Rd  |
   7632  * +---+---+-------------+-----+---+------+---+-----+----+-----+------+------+
   7633  */
   7634 static void disas_simd_tb(DisasContext *s, uint32_t insn)
   7635 {
   7636     int op2 = extract32(insn, 22, 2);
   7637     int is_q = extract32(insn, 30, 1);
   7638     int rm = extract32(insn, 16, 5);
   7639     int rn = extract32(insn, 5, 5);
   7640     int rd = extract32(insn, 0, 5);
   7641     int is_tbx = extract32(insn, 12, 1);
   7642     int len = (extract32(insn, 13, 2) + 1) * 16;
   7643 
   7644     if (op2 != 0) {
   7645         unallocated_encoding(s);
   7646         return;
   7647     }
   7648 
   7649     if (!fp_access_check(s)) {
   7650         return;
   7651     }
   7652 
   7653     tcg_gen_gvec_2_ptr(vec_full_reg_offset(s, rd),
   7654                        vec_full_reg_offset(s, rm), cpu_env,
   7655                        is_q ? 16 : 8, vec_full_reg_size(s),
   7656                        (len << 6) | (is_tbx << 5) | rn,
   7657                        gen_helper_simd_tblx);
   7658 }
   7659 
   7660 /* ZIP/UZP/TRN
   7661  *   31  30 29         24 23  22  21 20   16 15 14 12 11 10 9    5 4    0
   7662  * +---+---+-------------+------+---+------+---+------------------+------+
   7663  * | 0 | Q | 0 0 1 1 1 0 | size | 0 |  Rm  | 0 | opc | 1 0 |  Rn  |  Rd  |
   7664  * +---+---+-------------+------+---+------+---+------------------+------+
   7665  */
   7666 static void disas_simd_zip_trn(DisasContext *s, uint32_t insn)
   7667 {
   7668     int rd = extract32(insn, 0, 5);
   7669     int rn = extract32(insn, 5, 5);
   7670     int rm = extract32(insn, 16, 5);
   7671     int size = extract32(insn, 22, 2);
   7672     /* opc field bits [1:0] indicate ZIP/UZP/TRN;
   7673      * bit 2 indicates 1 vs 2 variant of the insn.
   7674      */
   7675     int opcode = extract32(insn, 12, 2);
   7676     bool part = extract32(insn, 14, 1);
   7677     bool is_q = extract32(insn, 30, 1);
   7678     int esize = 8 << size;
   7679     int i, ofs;
   7680     int datasize = is_q ? 128 : 64;
   7681     int elements = datasize / esize;
   7682     TCGv_i64 tcg_res, tcg_resl, tcg_resh;
   7683 
   7684     if (opcode == 0 || (size == 3 && !is_q)) {
   7685         unallocated_encoding(s);
   7686         return;
   7687     }
   7688 
   7689     if (!fp_access_check(s)) {
   7690         return;
   7691     }
   7692 
   7693     tcg_resl = tcg_const_i64(0);
   7694     tcg_resh = is_q ? tcg_const_i64(0) : NULL;
   7695     tcg_res = tcg_temp_new_i64();
   7696 
   7697     for (i = 0; i < elements; i++) {
   7698         switch (opcode) {
   7699         case 1: /* UZP1/2 */
   7700         {
   7701             int midpoint = elements / 2;
   7702             if (i < midpoint) {
   7703                 read_vec_element(s, tcg_res, rn, 2 * i + part, size);
   7704             } else {
   7705                 read_vec_element(s, tcg_res, rm,
   7706                                  2 * (i - midpoint) + part, size);
   7707             }
   7708             break;
   7709         }
   7710         case 2: /* TRN1/2 */
   7711             if (i & 1) {
   7712                 read_vec_element(s, tcg_res, rm, (i & ~1) + part, size);
   7713             } else {
   7714                 read_vec_element(s, tcg_res, rn, (i & ~1) + part, size);
   7715             }
   7716             break;
   7717         case 3: /* ZIP1/2 */
   7718         {
   7719             int base = part * elements / 2;
   7720             if (i & 1) {
   7721                 read_vec_element(s, tcg_res, rm, base + (i >> 1), size);
   7722             } else {
   7723                 read_vec_element(s, tcg_res, rn, base + (i >> 1), size);
   7724             }
   7725             break;
   7726         }
   7727         default:
   7728             g_assert_not_reached();
   7729         }
   7730 
   7731         ofs = i * esize;
   7732         if (ofs < 64) {
   7733             tcg_gen_shli_i64(tcg_res, tcg_res, ofs);
   7734             tcg_gen_or_i64(tcg_resl, tcg_resl, tcg_res);
   7735         } else {
   7736             tcg_gen_shli_i64(tcg_res, tcg_res, ofs - 64);
   7737             tcg_gen_or_i64(tcg_resh, tcg_resh, tcg_res);
   7738         }
   7739     }
   7740 
   7741     tcg_temp_free_i64(tcg_res);
   7742 
   7743     write_vec_element(s, tcg_resl, rd, 0, MO_64);
   7744     tcg_temp_free_i64(tcg_resl);
   7745 
   7746     if (is_q) {
   7747         write_vec_element(s, tcg_resh, rd, 1, MO_64);
   7748         tcg_temp_free_i64(tcg_resh);
   7749     }
   7750     clear_vec_high(s, is_q, rd);
   7751 }
   7752 
   7753 /*
   7754  * do_reduction_op helper
   7755  *
   7756  * This mirrors the Reduce() pseudocode in the ARM ARM. It is
   7757  * important for correct NaN propagation that we do these
   7758  * operations in exactly the order specified by the pseudocode.
   7759  *
   7760  * This is a recursive function, TCG temps should be freed by the
   7761  * calling function once it is done with the values.
   7762  */
   7763 static TCGv_i32 do_reduction_op(DisasContext *s, int fpopcode, int rn,
   7764                                 int esize, int size, int vmap, TCGv_ptr fpst)
   7765 {
   7766     if (esize == size) {
   7767         int element;
   7768         MemOp msize = esize == 16 ? MO_16 : MO_32;
   7769         TCGv_i32 tcg_elem;
   7770 
   7771         /* We should have one register left here */
   7772         assert(ctpop8(vmap) == 1);
   7773         element = ctz32(vmap);
   7774         assert(element < 8);
   7775 
   7776         tcg_elem = tcg_temp_new_i32();
   7777         read_vec_element_i32(s, tcg_elem, rn, element, msize);
   7778         return tcg_elem;
   7779     } else {
   7780         int bits = size / 2;
   7781         int shift = ctpop8(vmap) / 2;
   7782         int vmap_lo = (vmap >> shift) & vmap;
   7783         int vmap_hi = (vmap & ~vmap_lo);
   7784         TCGv_i32 tcg_hi, tcg_lo, tcg_res;
   7785 
   7786         tcg_hi = do_reduction_op(s, fpopcode, rn, esize, bits, vmap_hi, fpst);
   7787         tcg_lo = do_reduction_op(s, fpopcode, rn, esize, bits, vmap_lo, fpst);
   7788         tcg_res = tcg_temp_new_i32();
   7789 
   7790         switch (fpopcode) {
   7791         case 0x0c: /* fmaxnmv half-precision */
   7792             gen_helper_advsimd_maxnumh(tcg_res, tcg_lo, tcg_hi, fpst);
   7793             break;
   7794         case 0x0f: /* fmaxv half-precision */
   7795             gen_helper_advsimd_maxh(tcg_res, tcg_lo, tcg_hi, fpst);
   7796             break;
   7797         case 0x1c: /* fminnmv half-precision */
   7798             gen_helper_advsimd_minnumh(tcg_res, tcg_lo, tcg_hi, fpst);
   7799             break;
   7800         case 0x1f: /* fminv half-precision */
   7801             gen_helper_advsimd_minh(tcg_res, tcg_lo, tcg_hi, fpst);
   7802             break;
   7803         case 0x2c: /* fmaxnmv */
   7804             gen_helper_vfp_maxnums(tcg_res, tcg_lo, tcg_hi, fpst);
   7805             break;
   7806         case 0x2f: /* fmaxv */
   7807             gen_helper_vfp_maxs(tcg_res, tcg_lo, tcg_hi, fpst);
   7808             break;
   7809         case 0x3c: /* fminnmv */
   7810             gen_helper_vfp_minnums(tcg_res, tcg_lo, tcg_hi, fpst);
   7811             break;
   7812         case 0x3f: /* fminv */
   7813             gen_helper_vfp_mins(tcg_res, tcg_lo, tcg_hi, fpst);
   7814             break;
   7815         default:
   7816             g_assert_not_reached();
   7817         }
   7818 
   7819         tcg_temp_free_i32(tcg_hi);
   7820         tcg_temp_free_i32(tcg_lo);
   7821         return tcg_res;
   7822     }
   7823 }
   7824 
   7825 /* AdvSIMD across lanes
   7826  *   31  30  29 28       24 23  22 21       17 16    12 11 10 9    5 4    0
   7827  * +---+---+---+-----------+------+-----------+--------+-----+------+------+
   7828  * | 0 | Q | U | 0 1 1 1 0 | size | 1 1 0 0 0 | opcode | 1 0 |  Rn  |  Rd  |
   7829  * +---+---+---+-----------+------+-----------+--------+-----+------+------+
   7830  */
   7831 static void disas_simd_across_lanes(DisasContext *s, uint32_t insn)
   7832 {
   7833     int rd = extract32(insn, 0, 5);
   7834     int rn = extract32(insn, 5, 5);
   7835     int size = extract32(insn, 22, 2);
   7836     int opcode = extract32(insn, 12, 5);
   7837     bool is_q = extract32(insn, 30, 1);
   7838     bool is_u = extract32(insn, 29, 1);
   7839     bool is_fp = false;
   7840     bool is_min = false;
   7841     int esize;
   7842     int elements;
   7843     int i;
   7844     TCGv_i64 tcg_res, tcg_elt;
   7845 
   7846     switch (opcode) {
   7847     case 0x1b: /* ADDV */
   7848         if (is_u) {
   7849             unallocated_encoding(s);
   7850             return;
   7851         }
   7852         /* fall through */
   7853     case 0x3: /* SADDLV, UADDLV */
   7854     case 0xa: /* SMAXV, UMAXV */
   7855     case 0x1a: /* SMINV, UMINV */
   7856         if (size == 3 || (size == 2 && !is_q)) {
   7857             unallocated_encoding(s);
   7858             return;
   7859         }
   7860         break;
   7861     case 0xc: /* FMAXNMV, FMINNMV */
   7862     case 0xf: /* FMAXV, FMINV */
   7863         /* Bit 1 of size field encodes min vs max and the actual size
   7864          * depends on the encoding of the U bit. If not set (and FP16
   7865          * enabled) then we do half-precision float instead of single
   7866          * precision.
   7867          */
   7868         is_min = extract32(size, 1, 1);
   7869         is_fp = true;
   7870         if (!is_u && dc_isar_feature(aa64_fp16, s)) {
   7871             size = 1;
   7872         } else if (!is_u || !is_q || extract32(size, 0, 1)) {
   7873             unallocated_encoding(s);
   7874             return;
   7875         } else {
   7876             size = 2;
   7877         }
   7878         break;
   7879     default:
   7880         unallocated_encoding(s);
   7881         return;
   7882     }
   7883 
   7884     if (!fp_access_check(s)) {
   7885         return;
   7886     }
   7887 
   7888     esize = 8 << size;
   7889     elements = (is_q ? 128 : 64) / esize;
   7890 
   7891     tcg_res = tcg_temp_new_i64();
   7892     tcg_elt = tcg_temp_new_i64();
   7893 
   7894     /* These instructions operate across all lanes of a vector
   7895      * to produce a single result. We can guarantee that a 64
   7896      * bit intermediate is sufficient:
   7897      *  + for [US]ADDLV the maximum element size is 32 bits, and
   7898      *    the result type is 64 bits
   7899      *  + for FMAX*V, FMIN*V, ADDV the intermediate type is the
   7900      *    same as the element size, which is 32 bits at most
   7901      * For the integer operations we can choose to work at 64
   7902      * or 32 bits and truncate at the end; for simplicity
   7903      * we use 64 bits always. The floating point
   7904      * ops do require 32 bit intermediates, though.
   7905      */
   7906     if (!is_fp) {
   7907         read_vec_element(s, tcg_res, rn, 0, size | (is_u ? 0 : MO_SIGN));
   7908 
   7909         for (i = 1; i < elements; i++) {
   7910             read_vec_element(s, tcg_elt, rn, i, size | (is_u ? 0 : MO_SIGN));
   7911 
   7912             switch (opcode) {
   7913             case 0x03: /* SADDLV / UADDLV */
   7914             case 0x1b: /* ADDV */
   7915                 tcg_gen_add_i64(tcg_res, tcg_res, tcg_elt);
   7916                 break;
   7917             case 0x0a: /* SMAXV / UMAXV */
   7918                 if (is_u) {
   7919                     tcg_gen_umax_i64(tcg_res, tcg_res, tcg_elt);
   7920                 } else {
   7921                     tcg_gen_smax_i64(tcg_res, tcg_res, tcg_elt);
   7922                 }
   7923                 break;
   7924             case 0x1a: /* SMINV / UMINV */
   7925                 if (is_u) {
   7926                     tcg_gen_umin_i64(tcg_res, tcg_res, tcg_elt);
   7927                 } else {
   7928                     tcg_gen_smin_i64(tcg_res, tcg_res, tcg_elt);
   7929                 }
   7930                 break;
   7931             default:
   7932                 g_assert_not_reached();
   7933             }
   7934 
   7935         }
   7936     } else {
   7937         /* Floating point vector reduction ops which work across 32
   7938          * bit (single) or 16 bit (half-precision) intermediates.
   7939          * Note that correct NaN propagation requires that we do these
   7940          * operations in exactly the order specified by the pseudocode.
   7941          */
   7942         TCGv_ptr fpst = fpstatus_ptr(size == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
   7943         int fpopcode = opcode | is_min << 4 | is_u << 5;
   7944         int vmap = (1 << elements) - 1;
   7945         TCGv_i32 tcg_res32 = do_reduction_op(s, fpopcode, rn, esize,
   7946                                              (is_q ? 128 : 64), vmap, fpst);
   7947         tcg_gen_extu_i32_i64(tcg_res, tcg_res32);
   7948         tcg_temp_free_i32(tcg_res32);
   7949         tcg_temp_free_ptr(fpst);
   7950     }
   7951 
   7952     tcg_temp_free_i64(tcg_elt);
   7953 
   7954     /* Now truncate the result to the width required for the final output */
   7955     if (opcode == 0x03) {
   7956         /* SADDLV, UADDLV: result is 2*esize */
   7957         size++;
   7958     }
   7959 
   7960     switch (size) {
   7961     case 0:
   7962         tcg_gen_ext8u_i64(tcg_res, tcg_res);
   7963         break;
   7964     case 1:
   7965         tcg_gen_ext16u_i64(tcg_res, tcg_res);
   7966         break;
   7967     case 2:
   7968         tcg_gen_ext32u_i64(tcg_res, tcg_res);
   7969         break;
   7970     case 3:
   7971         break;
   7972     default:
   7973         g_assert_not_reached();
   7974     }
   7975 
   7976     write_fp_dreg(s, rd, tcg_res);
   7977     tcg_temp_free_i64(tcg_res);
   7978 }
   7979 
   7980 /* DUP (Element, Vector)
   7981  *
   7982  *  31  30   29              21 20    16 15        10  9    5 4    0
   7983  * +---+---+-------------------+--------+-------------+------+------+
   7984  * | 0 | Q | 0 0 1 1 1 0 0 0 0 |  imm5  | 0 0 0 0 0 1 |  Rn  |  Rd  |
   7985  * +---+---+-------------------+--------+-------------+------+------+
   7986  *
   7987  * size: encoded in imm5 (see ARM ARM LowestSetBit())
   7988  */
   7989 static void handle_simd_dupe(DisasContext *s, int is_q, int rd, int rn,
   7990                              int imm5)
   7991 {
   7992     int size = ctz32(imm5);
   7993     int index;
   7994 
   7995     if (size > 3 || (size == 3 && !is_q)) {
   7996         unallocated_encoding(s);
   7997         return;
   7998     }
   7999 
   8000     if (!fp_access_check(s)) {
   8001         return;
   8002     }
   8003 
   8004     index = imm5 >> (size + 1);
   8005     tcg_gen_gvec_dup_mem(size, vec_full_reg_offset(s, rd),
   8006                          vec_reg_offset(s, rn, index, size),
   8007                          is_q ? 16 : 8, vec_full_reg_size(s));
   8008 }
   8009 
   8010 /* DUP (element, scalar)
   8011  *  31                   21 20    16 15        10  9    5 4    0
   8012  * +-----------------------+--------+-------------+------+------+
   8013  * | 0 1 0 1 1 1 1 0 0 0 0 |  imm5  | 0 0 0 0 0 1 |  Rn  |  Rd  |
   8014  * +-----------------------+--------+-------------+------+------+
   8015  */
   8016 static void handle_simd_dupes(DisasContext *s, int rd, int rn,
   8017                               int imm5)
   8018 {
   8019     int size = ctz32(imm5);
   8020     int index;
   8021     TCGv_i64 tmp;
   8022 
   8023     if (size > 3) {
   8024         unallocated_encoding(s);
   8025         return;
   8026     }
   8027 
   8028     if (!fp_access_check(s)) {
   8029         return;
   8030     }
   8031 
   8032     index = imm5 >> (size + 1);
   8033 
   8034     /* This instruction just extracts the specified element and
   8035      * zero-extends it into the bottom of the destination register.
   8036      */
   8037     tmp = tcg_temp_new_i64();
   8038     read_vec_element(s, tmp, rn, index, size);
   8039     write_fp_dreg(s, rd, tmp);
   8040     tcg_temp_free_i64(tmp);
   8041 }
   8042 
   8043 /* DUP (General)
   8044  *
   8045  *  31  30   29              21 20    16 15        10  9    5 4    0
   8046  * +---+---+-------------------+--------+-------------+------+------+
   8047  * | 0 | Q | 0 0 1 1 1 0 0 0 0 |  imm5  | 0 0 0 0 1 1 |  Rn  |  Rd  |
   8048  * +---+---+-------------------+--------+-------------+------+------+
   8049  *
   8050  * size: encoded in imm5 (see ARM ARM LowestSetBit())
   8051  */
   8052 static void handle_simd_dupg(DisasContext *s, int is_q, int rd, int rn,
   8053                              int imm5)
   8054 {
   8055     int size = ctz32(imm5);
   8056     uint32_t dofs, oprsz, maxsz;
   8057 
   8058     if (size > 3 || ((size == 3) && !is_q)) {
   8059         unallocated_encoding(s);
   8060         return;
   8061     }
   8062 
   8063     if (!fp_access_check(s)) {
   8064         return;
   8065     }
   8066 
   8067     dofs = vec_full_reg_offset(s, rd);
   8068     oprsz = is_q ? 16 : 8;
   8069     maxsz = vec_full_reg_size(s);
   8070 
   8071     tcg_gen_gvec_dup_i64(size, dofs, oprsz, maxsz, cpu_reg(s, rn));
   8072 }
   8073 
   8074 /* INS (Element)
   8075  *
   8076  *  31                   21 20    16 15  14    11  10 9    5 4    0
   8077  * +-----------------------+--------+------------+---+------+------+
   8078  * | 0 1 1 0 1 1 1 0 0 0 0 |  imm5  | 0 |  imm4  | 1 |  Rn  |  Rd  |
   8079  * +-----------------------+--------+------------+---+------+------+
   8080  *
   8081  * size: encoded in imm5 (see ARM ARM LowestSetBit())
   8082  * index: encoded in imm5<4:size+1>
   8083  */
   8084 static void handle_simd_inse(DisasContext *s, int rd, int rn,
   8085                              int imm4, int imm5)
   8086 {
   8087     int size = ctz32(imm5);
   8088     int src_index, dst_index;
   8089     TCGv_i64 tmp;
   8090 
   8091     if (size > 3) {
   8092         unallocated_encoding(s);
   8093         return;
   8094     }
   8095 
   8096     if (!fp_access_check(s)) {
   8097         return;
   8098     }
   8099 
   8100     dst_index = extract32(imm5, 1+size, 5);
   8101     src_index = extract32(imm4, size, 4);
   8102 
   8103     tmp = tcg_temp_new_i64();
   8104 
   8105     read_vec_element(s, tmp, rn, src_index, size);
   8106     write_vec_element(s, tmp, rd, dst_index, size);
   8107 
   8108     tcg_temp_free_i64(tmp);
   8109 
   8110     /* INS is considered a 128-bit write for SVE. */
   8111     clear_vec_high(s, true, rd);
   8112 }
   8113 
   8114 
   8115 /* INS (General)
   8116  *
   8117  *  31                   21 20    16 15        10  9    5 4    0
   8118  * +-----------------------+--------+-------------+------+------+
   8119  * | 0 1 0 0 1 1 1 0 0 0 0 |  imm5  | 0 0 0 1 1 1 |  Rn  |  Rd  |
   8120  * +-----------------------+--------+-------------+------+------+
   8121  *
   8122  * size: encoded in imm5 (see ARM ARM LowestSetBit())
   8123  * index: encoded in imm5<4:size+1>
   8124  */
   8125 static void handle_simd_insg(DisasContext *s, int rd, int rn, int imm5)
   8126 {
   8127     int size = ctz32(imm5);
   8128     int idx;
   8129 
   8130     if (size > 3) {
   8131         unallocated_encoding(s);
   8132         return;
   8133     }
   8134 
   8135     if (!fp_access_check(s)) {
   8136         return;
   8137     }
   8138 
   8139     idx = extract32(imm5, 1 + size, 4 - size);
   8140     write_vec_element(s, cpu_reg(s, rn), rd, idx, size);
   8141 
   8142     /* INS is considered a 128-bit write for SVE. */
   8143     clear_vec_high(s, true, rd);
   8144 }
   8145 
   8146 /*
   8147  * UMOV (General)
   8148  * SMOV (General)
   8149  *
   8150  *  31  30   29              21 20    16 15    12   10 9    5 4    0
   8151  * +---+---+-------------------+--------+-------------+------+------+
   8152  * | 0 | Q | 0 0 1 1 1 0 0 0 0 |  imm5  | 0 0 1 U 1 1 |  Rn  |  Rd  |
   8153  * +---+---+-------------------+--------+-------------+------+------+
   8154  *
   8155  * U: unsigned when set
   8156  * size: encoded in imm5 (see ARM ARM LowestSetBit())
   8157  */
   8158 static void handle_simd_umov_smov(DisasContext *s, int is_q, int is_signed,
   8159                                   int rn, int rd, int imm5)
   8160 {
   8161     int size = ctz32(imm5);
   8162     int element;
   8163     TCGv_i64 tcg_rd;
   8164 
   8165     /* Check for UnallocatedEncodings */
   8166     if (is_signed) {
   8167         if (size > 2 || (size == 2 && !is_q)) {
   8168             unallocated_encoding(s);
   8169             return;
   8170         }
   8171     } else {
   8172         if (size > 3
   8173             || (size < 3 && is_q)
   8174             || (size == 3 && !is_q)) {
   8175             unallocated_encoding(s);
   8176             return;
   8177         }
   8178     }
   8179 
   8180     if (!fp_access_check(s)) {
   8181         return;
   8182     }
   8183 
   8184     element = extract32(imm5, 1+size, 4);
   8185 
   8186     tcg_rd = cpu_reg(s, rd);
   8187     read_vec_element(s, tcg_rd, rn, element, size | (is_signed ? MO_SIGN : 0));
   8188     if (is_signed && !is_q) {
   8189         tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
   8190     }
   8191 }
   8192 
   8193 /* AdvSIMD copy
   8194  *   31  30  29  28             21 20  16 15  14  11 10  9    5 4    0
   8195  * +---+---+----+-----------------+------+---+------+---+------+------+
   8196  * | 0 | Q | op | 0 1 1 1 0 0 0 0 | imm5 | 0 | imm4 | 1 |  Rn  |  Rd  |
   8197  * +---+---+----+-----------------+------+---+------+---+------+------+
   8198  */
   8199 static void disas_simd_copy(DisasContext *s, uint32_t insn)
   8200 {
   8201     int rd = extract32(insn, 0, 5);
   8202     int rn = extract32(insn, 5, 5);
   8203     int imm4 = extract32(insn, 11, 4);
   8204     int op = extract32(insn, 29, 1);
   8205     int is_q = extract32(insn, 30, 1);
   8206     int imm5 = extract32(insn, 16, 5);
   8207 
   8208     if (op) {
   8209         if (is_q) {
   8210             /* INS (element) */
   8211             handle_simd_inse(s, rd, rn, imm4, imm5);
   8212         } else {
   8213             unallocated_encoding(s);
   8214         }
   8215     } else {
   8216         switch (imm4) {
   8217         case 0:
   8218             /* DUP (element - vector) */
   8219             handle_simd_dupe(s, is_q, rd, rn, imm5);
   8220             break;
   8221         case 1:
   8222             /* DUP (general) */
   8223             handle_simd_dupg(s, is_q, rd, rn, imm5);
   8224             break;
   8225         case 3:
   8226             if (is_q) {
   8227                 /* INS (general) */
   8228                 handle_simd_insg(s, rd, rn, imm5);
   8229             } else {
   8230                 unallocated_encoding(s);
   8231             }
   8232             break;
   8233         case 5:
   8234         case 7:
   8235             /* UMOV/SMOV (is_q indicates 32/64; imm4 indicates signedness) */
   8236             handle_simd_umov_smov(s, is_q, (imm4 == 5), rn, rd, imm5);
   8237             break;
   8238         default:
   8239             unallocated_encoding(s);
   8240             break;
   8241         }
   8242     }
   8243 }
   8244 
   8245 /* AdvSIMD modified immediate
   8246  *  31  30   29  28                 19 18 16 15   12  11  10  9     5 4    0
   8247  * +---+---+----+---------------------+-----+-------+----+---+-------+------+
   8248  * | 0 | Q | op | 0 1 1 1 1 0 0 0 0 0 | abc | cmode | o2 | 1 | defgh |  Rd  |
   8249  * +---+---+----+---------------------+-----+-------+----+---+-------+------+
   8250  *
   8251  * There are a number of operations that can be carried out here:
   8252  *   MOVI - move (shifted) imm into register
   8253  *   MVNI - move inverted (shifted) imm into register
   8254  *   ORR  - bitwise OR of (shifted) imm with register
   8255  *   BIC  - bitwise clear of (shifted) imm with register
   8256  * With ARMv8.2 we also have:
   8257  *   FMOV half-precision
   8258  */
   8259 static void disas_simd_mod_imm(DisasContext *s, uint32_t insn)
   8260 {
   8261     int rd = extract32(insn, 0, 5);
   8262     int cmode = extract32(insn, 12, 4);
   8263     int o2 = extract32(insn, 11, 1);
   8264     uint64_t abcdefgh = extract32(insn, 5, 5) | (extract32(insn, 16, 3) << 5);
   8265     bool is_neg = extract32(insn, 29, 1);
   8266     bool is_q = extract32(insn, 30, 1);
   8267     uint64_t imm = 0;
   8268 
   8269     if (o2 != 0 || ((cmode == 0xf) && is_neg && !is_q)) {
   8270         /* Check for FMOV (vector, immediate) - half-precision */
   8271         if (!(dc_isar_feature(aa64_fp16, s) && o2 && cmode == 0xf)) {
   8272             unallocated_encoding(s);
   8273             return;
   8274         }
   8275     }
   8276 
   8277     if (!fp_access_check(s)) {
   8278         return;
   8279     }
   8280 
   8281     if (cmode == 15 && o2 && !is_neg) {
   8282         /* FMOV (vector, immediate) - half-precision */
   8283         imm = vfp_expand_imm(MO_16, abcdefgh);
   8284         /* now duplicate across the lanes */
   8285         imm = dup_const(MO_16, imm);
   8286     } else {
   8287         imm = asimd_imm_const(abcdefgh, cmode, is_neg);
   8288     }
   8289 
   8290     if (!((cmode & 0x9) == 0x1 || (cmode & 0xd) == 0x9)) {
   8291         /* MOVI or MVNI, with MVNI negation handled above.  */
   8292         tcg_gen_gvec_dup_imm(MO_64, vec_full_reg_offset(s, rd), is_q ? 16 : 8,
   8293                              vec_full_reg_size(s), imm);
   8294     } else {
   8295         /* ORR or BIC, with BIC negation to AND handled above.  */
   8296         if (is_neg) {
   8297             gen_gvec_fn2i(s, is_q, rd, rd, imm, tcg_gen_gvec_andi, MO_64);
   8298         } else {
   8299             gen_gvec_fn2i(s, is_q, rd, rd, imm, tcg_gen_gvec_ori, MO_64);
   8300         }
   8301     }
   8302 }
   8303 
   8304 /* AdvSIMD scalar copy
   8305  *  31 30  29  28             21 20  16 15  14  11 10  9    5 4    0
   8306  * +-----+----+-----------------+------+---+------+---+------+------+
   8307  * | 0 1 | op | 1 1 1 1 0 0 0 0 | imm5 | 0 | imm4 | 1 |  Rn  |  Rd  |
   8308  * +-----+----+-----------------+------+---+------+---+------+------+
   8309  */
   8310 static void disas_simd_scalar_copy(DisasContext *s, uint32_t insn)
   8311 {
   8312     int rd = extract32(insn, 0, 5);
   8313     int rn = extract32(insn, 5, 5);
   8314     int imm4 = extract32(insn, 11, 4);
   8315     int imm5 = extract32(insn, 16, 5);
   8316     int op = extract32(insn, 29, 1);
   8317 
   8318     if (op != 0 || imm4 != 0) {
   8319         unallocated_encoding(s);
   8320         return;
   8321     }
   8322 
   8323     /* DUP (element, scalar) */
   8324     handle_simd_dupes(s, rd, rn, imm5);
   8325 }
   8326 
   8327 /* AdvSIMD scalar pairwise
   8328  *  31 30  29 28       24 23  22 21       17 16    12 11 10 9    5 4    0
   8329  * +-----+---+-----------+------+-----------+--------+-----+------+------+
   8330  * | 0 1 | U | 1 1 1 1 0 | size | 1 1 0 0 0 | opcode | 1 0 |  Rn  |  Rd  |
   8331  * +-----+---+-----------+------+-----------+--------+-----+------+------+
   8332  */
   8333 static void disas_simd_scalar_pairwise(DisasContext *s, uint32_t insn)
   8334 {
   8335     int u = extract32(insn, 29, 1);
   8336     int size = extract32(insn, 22, 2);
   8337     int opcode = extract32(insn, 12, 5);
   8338     int rn = extract32(insn, 5, 5);
   8339     int rd = extract32(insn, 0, 5);
   8340     TCGv_ptr fpst;
   8341 
   8342     /* For some ops (the FP ones), size[1] is part of the encoding.
   8343      * For ADDP strictly it is not but size[1] is always 1 for valid
   8344      * encodings.
   8345      */
   8346     opcode |= (extract32(size, 1, 1) << 5);
   8347 
   8348     switch (opcode) {
   8349     case 0x3b: /* ADDP */
   8350         if (u || size != 3) {
   8351             unallocated_encoding(s);
   8352             return;
   8353         }
   8354         if (!fp_access_check(s)) {
   8355             return;
   8356         }
   8357 
   8358         fpst = NULL;
   8359         break;
   8360     case 0xc: /* FMAXNMP */
   8361     case 0xd: /* FADDP */
   8362     case 0xf: /* FMAXP */
   8363     case 0x2c: /* FMINNMP */
   8364     case 0x2f: /* FMINP */
   8365         /* FP op, size[0] is 32 or 64 bit*/
   8366         if (!u) {
   8367             if (!dc_isar_feature(aa64_fp16, s)) {
   8368                 unallocated_encoding(s);
   8369                 return;
   8370             } else {
   8371                 size = MO_16;
   8372             }
   8373         } else {
   8374             size = extract32(size, 0, 1) ? MO_64 : MO_32;
   8375         }
   8376 
   8377         if (!fp_access_check(s)) {
   8378             return;
   8379         }
   8380 
   8381         fpst = fpstatus_ptr(size == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
   8382         break;
   8383     default:
   8384         unallocated_encoding(s);
   8385         return;
   8386     }
   8387 
   8388     if (size == MO_64) {
   8389         TCGv_i64 tcg_op1 = tcg_temp_new_i64();
   8390         TCGv_i64 tcg_op2 = tcg_temp_new_i64();
   8391         TCGv_i64 tcg_res = tcg_temp_new_i64();
   8392 
   8393         read_vec_element(s, tcg_op1, rn, 0, MO_64);
   8394         read_vec_element(s, tcg_op2, rn, 1, MO_64);
   8395 
   8396         switch (opcode) {
   8397         case 0x3b: /* ADDP */
   8398             tcg_gen_add_i64(tcg_res, tcg_op1, tcg_op2);
   8399             break;
   8400         case 0xc: /* FMAXNMP */
   8401             gen_helper_vfp_maxnumd(tcg_res, tcg_op1, tcg_op2, fpst);
   8402             break;
   8403         case 0xd: /* FADDP */
   8404             gen_helper_vfp_addd(tcg_res, tcg_op1, tcg_op2, fpst);
   8405             break;
   8406         case 0xf: /* FMAXP */
   8407             gen_helper_vfp_maxd(tcg_res, tcg_op1, tcg_op2, fpst);
   8408             break;
   8409         case 0x2c: /* FMINNMP */
   8410             gen_helper_vfp_minnumd(tcg_res, tcg_op1, tcg_op2, fpst);
   8411             break;
   8412         case 0x2f: /* FMINP */
   8413             gen_helper_vfp_mind(tcg_res, tcg_op1, tcg_op2, fpst);
   8414             break;
   8415         default:
   8416             g_assert_not_reached();
   8417         }
   8418 
   8419         write_fp_dreg(s, rd, tcg_res);
   8420 
   8421         tcg_temp_free_i64(tcg_op1);
   8422         tcg_temp_free_i64(tcg_op2);
   8423         tcg_temp_free_i64(tcg_res);
   8424     } else {
   8425         TCGv_i32 tcg_op1 = tcg_temp_new_i32();
   8426         TCGv_i32 tcg_op2 = tcg_temp_new_i32();
   8427         TCGv_i32 tcg_res = tcg_temp_new_i32();
   8428 
   8429         read_vec_element_i32(s, tcg_op1, rn, 0, size);
   8430         read_vec_element_i32(s, tcg_op2, rn, 1, size);
   8431 
   8432         if (size == MO_16) {
   8433             switch (opcode) {
   8434             case 0xc: /* FMAXNMP */
   8435                 gen_helper_advsimd_maxnumh(tcg_res, tcg_op1, tcg_op2, fpst);
   8436                 break;
   8437             case 0xd: /* FADDP */
   8438                 gen_helper_advsimd_addh(tcg_res, tcg_op1, tcg_op2, fpst);
   8439                 break;
   8440             case 0xf: /* FMAXP */
   8441                 gen_helper_advsimd_maxh(tcg_res, tcg_op1, tcg_op2, fpst);
   8442                 break;
   8443             case 0x2c: /* FMINNMP */
   8444                 gen_helper_advsimd_minnumh(tcg_res, tcg_op1, tcg_op2, fpst);
   8445                 break;
   8446             case 0x2f: /* FMINP */
   8447                 gen_helper_advsimd_minh(tcg_res, tcg_op1, tcg_op2, fpst);
   8448                 break;
   8449             default:
   8450                 g_assert_not_reached();
   8451             }
   8452         } else {
   8453             switch (opcode) {
   8454             case 0xc: /* FMAXNMP */
   8455                 gen_helper_vfp_maxnums(tcg_res, tcg_op1, tcg_op2, fpst);
   8456                 break;
   8457             case 0xd: /* FADDP */
   8458                 gen_helper_vfp_adds(tcg_res, tcg_op1, tcg_op2, fpst);
   8459                 break;
   8460             case 0xf: /* FMAXP */
   8461                 gen_helper_vfp_maxs(tcg_res, tcg_op1, tcg_op2, fpst);
   8462                 break;
   8463             case 0x2c: /* FMINNMP */
   8464                 gen_helper_vfp_minnums(tcg_res, tcg_op1, tcg_op2, fpst);
   8465                 break;
   8466             case 0x2f: /* FMINP */
   8467                 gen_helper_vfp_mins(tcg_res, tcg_op1, tcg_op2, fpst);
   8468                 break;
   8469             default:
   8470                 g_assert_not_reached();
   8471             }
   8472         }
   8473 
   8474         write_fp_sreg(s, rd, tcg_res);
   8475 
   8476         tcg_temp_free_i32(tcg_op1);
   8477         tcg_temp_free_i32(tcg_op2);
   8478         tcg_temp_free_i32(tcg_res);
   8479     }
   8480 
   8481     if (fpst) {
   8482         tcg_temp_free_ptr(fpst);
   8483     }
   8484 }
   8485 
   8486 /*
   8487  * Common SSHR[RA]/USHR[RA] - Shift right (optional rounding/accumulate)
   8488  *
   8489  * This code is handles the common shifting code and is used by both
   8490  * the vector and scalar code.
   8491  */
   8492 static void handle_shri_with_rndacc(TCGv_i64 tcg_res, TCGv_i64 tcg_src,
   8493                                     TCGv_i64 tcg_rnd, bool accumulate,
   8494                                     bool is_u, int size, int shift)
   8495 {
   8496     bool extended_result = false;
   8497     bool round = tcg_rnd != NULL;
   8498     int ext_lshift = 0;
   8499     TCGv_i64 tcg_src_hi;
   8500 
   8501     if (round && size == 3) {
   8502         extended_result = true;
   8503         ext_lshift = 64 - shift;
   8504         tcg_src_hi = tcg_temp_new_i64();
   8505     } else if (shift == 64) {
   8506         if (!accumulate && is_u) {
   8507             /* result is zero */
   8508             tcg_gen_movi_i64(tcg_res, 0);
   8509             return;
   8510         }
   8511     }
   8512 
   8513     /* Deal with the rounding step */
   8514     if (round) {
   8515         if (extended_result) {
   8516             TCGv_i64 tcg_zero = tcg_constant_i64(0);
   8517             if (!is_u) {
   8518                 /* take care of sign extending tcg_res */
   8519                 tcg_gen_sari_i64(tcg_src_hi, tcg_src, 63);
   8520                 tcg_gen_add2_i64(tcg_src, tcg_src_hi,
   8521                                  tcg_src, tcg_src_hi,
   8522                                  tcg_rnd, tcg_zero);
   8523             } else {
   8524                 tcg_gen_add2_i64(tcg_src, tcg_src_hi,
   8525                                  tcg_src, tcg_zero,
   8526                                  tcg_rnd, tcg_zero);
   8527             }
   8528         } else {
   8529             tcg_gen_add_i64(tcg_src, tcg_src, tcg_rnd);
   8530         }
   8531     }
   8532 
   8533     /* Now do the shift right */
   8534     if (round && extended_result) {
   8535         /* extended case, >64 bit precision required */
   8536         if (ext_lshift == 0) {
   8537             /* special case, only high bits matter */
   8538             tcg_gen_mov_i64(tcg_src, tcg_src_hi);
   8539         } else {
   8540             tcg_gen_shri_i64(tcg_src, tcg_src, shift);
   8541             tcg_gen_shli_i64(tcg_src_hi, tcg_src_hi, ext_lshift);
   8542             tcg_gen_or_i64(tcg_src, tcg_src, tcg_src_hi);
   8543         }
   8544     } else {
   8545         if (is_u) {
   8546             if (shift == 64) {
   8547                 /* essentially shifting in 64 zeros */
   8548                 tcg_gen_movi_i64(tcg_src, 0);
   8549             } else {
   8550                 tcg_gen_shri_i64(tcg_src, tcg_src, shift);
   8551             }
   8552         } else {
   8553             if (shift == 64) {
   8554                 /* effectively extending the sign-bit */
   8555                 tcg_gen_sari_i64(tcg_src, tcg_src, 63);
   8556             } else {
   8557                 tcg_gen_sari_i64(tcg_src, tcg_src, shift);
   8558             }
   8559         }
   8560     }
   8561 
   8562     if (accumulate) {
   8563         tcg_gen_add_i64(tcg_res, tcg_res, tcg_src);
   8564     } else {
   8565         tcg_gen_mov_i64(tcg_res, tcg_src);
   8566     }
   8567 
   8568     if (extended_result) {
   8569         tcg_temp_free_i64(tcg_src_hi);
   8570     }
   8571 }
   8572 
   8573 /* SSHR[RA]/USHR[RA] - Scalar shift right (optional rounding/accumulate) */
   8574 static void handle_scalar_simd_shri(DisasContext *s,
   8575                                     bool is_u, int immh, int immb,
   8576                                     int opcode, int rn, int rd)
   8577 {
   8578     const int size = 3;
   8579     int immhb = immh << 3 | immb;
   8580     int shift = 2 * (8 << size) - immhb;
   8581     bool accumulate = false;
   8582     bool round = false;
   8583     bool insert = false;
   8584     TCGv_i64 tcg_rn;
   8585     TCGv_i64 tcg_rd;
   8586     TCGv_i64 tcg_round;
   8587 
   8588     if (!extract32(immh, 3, 1)) {
   8589         unallocated_encoding(s);
   8590         return;
   8591     }
   8592 
   8593     if (!fp_access_check(s)) {
   8594         return;
   8595     }
   8596 
   8597     switch (opcode) {
   8598     case 0x02: /* SSRA / USRA (accumulate) */
   8599         accumulate = true;
   8600         break;
   8601     case 0x04: /* SRSHR / URSHR (rounding) */
   8602         round = true;
   8603         break;
   8604     case 0x06: /* SRSRA / URSRA (accum + rounding) */
   8605         accumulate = round = true;
   8606         break;
   8607     case 0x08: /* SRI */
   8608         insert = true;
   8609         break;
   8610     }
   8611 
   8612     if (round) {
   8613         tcg_round = tcg_constant_i64(1ULL << (shift - 1));
   8614     } else {
   8615         tcg_round = NULL;
   8616     }
   8617 
   8618     tcg_rn = read_fp_dreg(s, rn);
   8619     tcg_rd = (accumulate || insert) ? read_fp_dreg(s, rd) : tcg_temp_new_i64();
   8620 
   8621     if (insert) {
   8622         /* shift count same as element size is valid but does nothing;
   8623          * special case to avoid potential shift by 64.
   8624          */
   8625         int esize = 8 << size;
   8626         if (shift != esize) {
   8627             tcg_gen_shri_i64(tcg_rn, tcg_rn, shift);
   8628             tcg_gen_deposit_i64(tcg_rd, tcg_rd, tcg_rn, 0, esize - shift);
   8629         }
   8630     } else {
   8631         handle_shri_with_rndacc(tcg_rd, tcg_rn, tcg_round,
   8632                                 accumulate, is_u, size, shift);
   8633     }
   8634 
   8635     write_fp_dreg(s, rd, tcg_rd);
   8636 
   8637     tcg_temp_free_i64(tcg_rn);
   8638     tcg_temp_free_i64(tcg_rd);
   8639 }
   8640 
   8641 /* SHL/SLI - Scalar shift left */
   8642 static void handle_scalar_simd_shli(DisasContext *s, bool insert,
   8643                                     int immh, int immb, int opcode,
   8644                                     int rn, int rd)
   8645 {
   8646     int size = 32 - clz32(immh) - 1;
   8647     int immhb = immh << 3 | immb;
   8648     int shift = immhb - (8 << size);
   8649     TCGv_i64 tcg_rn;
   8650     TCGv_i64 tcg_rd;
   8651 
   8652     if (!extract32(immh, 3, 1)) {
   8653         unallocated_encoding(s);
   8654         return;
   8655     }
   8656 
   8657     if (!fp_access_check(s)) {
   8658         return;
   8659     }
   8660 
   8661     tcg_rn = read_fp_dreg(s, rn);
   8662     tcg_rd = insert ? read_fp_dreg(s, rd) : tcg_temp_new_i64();
   8663 
   8664     if (insert) {
   8665         tcg_gen_deposit_i64(tcg_rd, tcg_rd, tcg_rn, shift, 64 - shift);
   8666     } else {
   8667         tcg_gen_shli_i64(tcg_rd, tcg_rn, shift);
   8668     }
   8669 
   8670     write_fp_dreg(s, rd, tcg_rd);
   8671 
   8672     tcg_temp_free_i64(tcg_rn);
   8673     tcg_temp_free_i64(tcg_rd);
   8674 }
   8675 
   8676 /* SQSHRN/SQSHRUN - Saturating (signed/unsigned) shift right with
   8677  * (signed/unsigned) narrowing */
   8678 static void handle_vec_simd_sqshrn(DisasContext *s, bool is_scalar, bool is_q,
   8679                                    bool is_u_shift, bool is_u_narrow,
   8680                                    int immh, int immb, int opcode,
   8681                                    int rn, int rd)
   8682 {
   8683     int immhb = immh << 3 | immb;
   8684     int size = 32 - clz32(immh) - 1;
   8685     int esize = 8 << size;
   8686     int shift = (2 * esize) - immhb;
   8687     int elements = is_scalar ? 1 : (64 / esize);
   8688     bool round = extract32(opcode, 0, 1);
   8689     MemOp ldop = (size + 1) | (is_u_shift ? 0 : MO_SIGN);
   8690     TCGv_i64 tcg_rn, tcg_rd, tcg_round;
   8691     TCGv_i32 tcg_rd_narrowed;
   8692     TCGv_i64 tcg_final;
   8693 
   8694     static NeonGenNarrowEnvFn * const signed_narrow_fns[4][2] = {
   8695         { gen_helper_neon_narrow_sat_s8,
   8696           gen_helper_neon_unarrow_sat8 },
   8697         { gen_helper_neon_narrow_sat_s16,
   8698           gen_helper_neon_unarrow_sat16 },
   8699         { gen_helper_neon_narrow_sat_s32,
   8700           gen_helper_neon_unarrow_sat32 },
   8701         { NULL, NULL },
   8702     };
   8703     static NeonGenNarrowEnvFn * const unsigned_narrow_fns[4] = {
   8704         gen_helper_neon_narrow_sat_u8,
   8705         gen_helper_neon_narrow_sat_u16,
   8706         gen_helper_neon_narrow_sat_u32,
   8707         NULL
   8708     };
   8709     NeonGenNarrowEnvFn *narrowfn;
   8710 
   8711     int i;
   8712 
   8713     assert(size < 4);
   8714 
   8715     if (extract32(immh, 3, 1)) {
   8716         unallocated_encoding(s);
   8717         return;
   8718     }
   8719 
   8720     if (!fp_access_check(s)) {
   8721         return;
   8722     }
   8723 
   8724     if (is_u_shift) {
   8725         narrowfn = unsigned_narrow_fns[size];
   8726     } else {
   8727         narrowfn = signed_narrow_fns[size][is_u_narrow ? 1 : 0];
   8728     }
   8729 
   8730     tcg_rn = tcg_temp_new_i64();
   8731     tcg_rd = tcg_temp_new_i64();
   8732     tcg_rd_narrowed = tcg_temp_new_i32();
   8733     tcg_final = tcg_const_i64(0);
   8734 
   8735     if (round) {
   8736         tcg_round = tcg_constant_i64(1ULL << (shift - 1));
   8737     } else {
   8738         tcg_round = NULL;
   8739     }
   8740 
   8741     for (i = 0; i < elements; i++) {
   8742         read_vec_element(s, tcg_rn, rn, i, ldop);
   8743         handle_shri_with_rndacc(tcg_rd, tcg_rn, tcg_round,
   8744                                 false, is_u_shift, size+1, shift);
   8745         narrowfn(tcg_rd_narrowed, cpu_env, tcg_rd);
   8746         tcg_gen_extu_i32_i64(tcg_rd, tcg_rd_narrowed);
   8747         tcg_gen_deposit_i64(tcg_final, tcg_final, tcg_rd, esize * i, esize);
   8748     }
   8749 
   8750     if (!is_q) {
   8751         write_vec_element(s, tcg_final, rd, 0, MO_64);
   8752     } else {
   8753         write_vec_element(s, tcg_final, rd, 1, MO_64);
   8754     }
   8755 
   8756     tcg_temp_free_i64(tcg_rn);
   8757     tcg_temp_free_i64(tcg_rd);
   8758     tcg_temp_free_i32(tcg_rd_narrowed);
   8759     tcg_temp_free_i64(tcg_final);
   8760 
   8761     clear_vec_high(s, is_q, rd);
   8762 }
   8763 
   8764 /* SQSHLU, UQSHL, SQSHL: saturating left shifts */
   8765 static void handle_simd_qshl(DisasContext *s, bool scalar, bool is_q,
   8766                              bool src_unsigned, bool dst_unsigned,
   8767                              int immh, int immb, int rn, int rd)
   8768 {
   8769     int immhb = immh << 3 | immb;
   8770     int size = 32 - clz32(immh) - 1;
   8771     int shift = immhb - (8 << size);
   8772     int pass;
   8773 
   8774     assert(immh != 0);
   8775     assert(!(scalar && is_q));
   8776 
   8777     if (!scalar) {
   8778         if (!is_q && extract32(immh, 3, 1)) {
   8779             unallocated_encoding(s);
   8780             return;
   8781         }
   8782 
   8783         /* Since we use the variable-shift helpers we must
   8784          * replicate the shift count into each element of
   8785          * the tcg_shift value.
   8786          */
   8787         switch (size) {
   8788         case 0:
   8789             shift |= shift << 8;
   8790             /* fall through */
   8791         case 1:
   8792             shift |= shift << 16;
   8793             break;
   8794         case 2:
   8795         case 3:
   8796             break;
   8797         default:
   8798             g_assert_not_reached();
   8799         }
   8800     }
   8801 
   8802     if (!fp_access_check(s)) {
   8803         return;
   8804     }
   8805 
   8806     if (size == 3) {
   8807         TCGv_i64 tcg_shift = tcg_constant_i64(shift);
   8808         static NeonGenTwo64OpEnvFn * const fns[2][2] = {
   8809             { gen_helper_neon_qshl_s64, gen_helper_neon_qshlu_s64 },
   8810             { NULL, gen_helper_neon_qshl_u64 },
   8811         };
   8812         NeonGenTwo64OpEnvFn *genfn = fns[src_unsigned][dst_unsigned];
   8813         int maxpass = is_q ? 2 : 1;
   8814 
   8815         for (pass = 0; pass < maxpass; pass++) {
   8816             TCGv_i64 tcg_op = tcg_temp_new_i64();
   8817 
   8818             read_vec_element(s, tcg_op, rn, pass, MO_64);
   8819             genfn(tcg_op, cpu_env, tcg_op, tcg_shift);
   8820             write_vec_element(s, tcg_op, rd, pass, MO_64);
   8821 
   8822             tcg_temp_free_i64(tcg_op);
   8823         }
   8824         clear_vec_high(s, is_q, rd);
   8825     } else {
   8826         TCGv_i32 tcg_shift = tcg_constant_i32(shift);
   8827         static NeonGenTwoOpEnvFn * const fns[2][2][3] = {
   8828             {
   8829                 { gen_helper_neon_qshl_s8,
   8830                   gen_helper_neon_qshl_s16,
   8831                   gen_helper_neon_qshl_s32 },
   8832                 { gen_helper_neon_qshlu_s8,
   8833                   gen_helper_neon_qshlu_s16,
   8834                   gen_helper_neon_qshlu_s32 }
   8835             }, {
   8836                 { NULL, NULL, NULL },
   8837                 { gen_helper_neon_qshl_u8,
   8838                   gen_helper_neon_qshl_u16,
   8839                   gen_helper_neon_qshl_u32 }
   8840             }
   8841         };
   8842         NeonGenTwoOpEnvFn *genfn = fns[src_unsigned][dst_unsigned][size];
   8843         MemOp memop = scalar ? size : MO_32;
   8844         int maxpass = scalar ? 1 : is_q ? 4 : 2;
   8845 
   8846         for (pass = 0; pass < maxpass; pass++) {
   8847             TCGv_i32 tcg_op = tcg_temp_new_i32();
   8848 
   8849             read_vec_element_i32(s, tcg_op, rn, pass, memop);
   8850             genfn(tcg_op, cpu_env, tcg_op, tcg_shift);
   8851             if (scalar) {
   8852                 switch (size) {
   8853                 case 0:
   8854                     tcg_gen_ext8u_i32(tcg_op, tcg_op);
   8855                     break;
   8856                 case 1:
   8857                     tcg_gen_ext16u_i32(tcg_op, tcg_op);
   8858                     break;
   8859                 case 2:
   8860                     break;
   8861                 default:
   8862                     g_assert_not_reached();
   8863                 }
   8864                 write_fp_sreg(s, rd, tcg_op);
   8865             } else {
   8866                 write_vec_element_i32(s, tcg_op, rd, pass, MO_32);
   8867             }
   8868 
   8869             tcg_temp_free_i32(tcg_op);
   8870         }
   8871 
   8872         if (!scalar) {
   8873             clear_vec_high(s, is_q, rd);
   8874         }
   8875     }
   8876 }
   8877 
   8878 /* Common vector code for handling integer to FP conversion */
   8879 static void handle_simd_intfp_conv(DisasContext *s, int rd, int rn,
   8880                                    int elements, int is_signed,
   8881                                    int fracbits, int size)
   8882 {
   8883     TCGv_ptr tcg_fpst = fpstatus_ptr(size == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
   8884     TCGv_i32 tcg_shift = NULL;
   8885 
   8886     MemOp mop = size | (is_signed ? MO_SIGN : 0);
   8887     int pass;
   8888 
   8889     if (fracbits || size == MO_64) {
   8890         tcg_shift = tcg_constant_i32(fracbits);
   8891     }
   8892 
   8893     if (size == MO_64) {
   8894         TCGv_i64 tcg_int64 = tcg_temp_new_i64();
   8895         TCGv_i64 tcg_double = tcg_temp_new_i64();
   8896 
   8897         for (pass = 0; pass < elements; pass++) {
   8898             read_vec_element(s, tcg_int64, rn, pass, mop);
   8899 
   8900             if (is_signed) {
   8901                 gen_helper_vfp_sqtod(tcg_double, tcg_int64,
   8902                                      tcg_shift, tcg_fpst);
   8903             } else {
   8904                 gen_helper_vfp_uqtod(tcg_double, tcg_int64,
   8905                                      tcg_shift, tcg_fpst);
   8906             }
   8907             if (elements == 1) {
   8908                 write_fp_dreg(s, rd, tcg_double);
   8909             } else {
   8910                 write_vec_element(s, tcg_double, rd, pass, MO_64);
   8911             }
   8912         }
   8913 
   8914         tcg_temp_free_i64(tcg_int64);
   8915         tcg_temp_free_i64(tcg_double);
   8916 
   8917     } else {
   8918         TCGv_i32 tcg_int32 = tcg_temp_new_i32();
   8919         TCGv_i32 tcg_float = tcg_temp_new_i32();
   8920 
   8921         for (pass = 0; pass < elements; pass++) {
   8922             read_vec_element_i32(s, tcg_int32, rn, pass, mop);
   8923 
   8924             switch (size) {
   8925             case MO_32:
   8926                 if (fracbits) {
   8927                     if (is_signed) {
   8928                         gen_helper_vfp_sltos(tcg_float, tcg_int32,
   8929                                              tcg_shift, tcg_fpst);
   8930                     } else {
   8931                         gen_helper_vfp_ultos(tcg_float, tcg_int32,
   8932                                              tcg_shift, tcg_fpst);
   8933                     }
   8934                 } else {
   8935                     if (is_signed) {
   8936                         gen_helper_vfp_sitos(tcg_float, tcg_int32, tcg_fpst);
   8937                     } else {
   8938                         gen_helper_vfp_uitos(tcg_float, tcg_int32, tcg_fpst);
   8939                     }
   8940                 }
   8941                 break;
   8942             case MO_16:
   8943                 if (fracbits) {
   8944                     if (is_signed) {
   8945                         gen_helper_vfp_sltoh(tcg_float, tcg_int32,
   8946                                              tcg_shift, tcg_fpst);
   8947                     } else {
   8948                         gen_helper_vfp_ultoh(tcg_float, tcg_int32,
   8949                                              tcg_shift, tcg_fpst);
   8950                     }
   8951                 } else {
   8952                     if (is_signed) {
   8953                         gen_helper_vfp_sitoh(tcg_float, tcg_int32, tcg_fpst);
   8954                     } else {
   8955                         gen_helper_vfp_uitoh(tcg_float, tcg_int32, tcg_fpst);
   8956                     }
   8957                 }
   8958                 break;
   8959             default:
   8960                 g_assert_not_reached();
   8961             }
   8962 
   8963             if (elements == 1) {
   8964                 write_fp_sreg(s, rd, tcg_float);
   8965             } else {
   8966                 write_vec_element_i32(s, tcg_float, rd, pass, size);
   8967             }
   8968         }
   8969 
   8970         tcg_temp_free_i32(tcg_int32);
   8971         tcg_temp_free_i32(tcg_float);
   8972     }
   8973 
   8974     tcg_temp_free_ptr(tcg_fpst);
   8975 
   8976     clear_vec_high(s, elements << size == 16, rd);
   8977 }
   8978 
   8979 /* UCVTF/SCVTF - Integer to FP conversion */
   8980 static void handle_simd_shift_intfp_conv(DisasContext *s, bool is_scalar,
   8981                                          bool is_q, bool is_u,
   8982                                          int immh, int immb, int opcode,
   8983                                          int rn, int rd)
   8984 {
   8985     int size, elements, fracbits;
   8986     int immhb = immh << 3 | immb;
   8987 
   8988     if (immh & 8) {
   8989         size = MO_64;
   8990         if (!is_scalar && !is_q) {
   8991             unallocated_encoding(s);
   8992             return;
   8993         }
   8994     } else if (immh & 4) {
   8995         size = MO_32;
   8996     } else if (immh & 2) {
   8997         size = MO_16;
   8998         if (!dc_isar_feature(aa64_fp16, s)) {
   8999             unallocated_encoding(s);
   9000             return;
   9001         }
   9002     } else {
   9003         /* immh == 0 would be a failure of the decode logic */
   9004         g_assert(immh == 1);
   9005         unallocated_encoding(s);
   9006         return;
   9007     }
   9008 
   9009     if (is_scalar) {
   9010         elements = 1;
   9011     } else {
   9012         elements = (8 << is_q) >> size;
   9013     }
   9014     fracbits = (16 << size) - immhb;
   9015 
   9016     if (!fp_access_check(s)) {
   9017         return;
   9018     }
   9019 
   9020     handle_simd_intfp_conv(s, rd, rn, elements, !is_u, fracbits, size);
   9021 }
   9022 
   9023 /* FCVTZS, FVCVTZU - FP to fixedpoint conversion */
   9024 static void handle_simd_shift_fpint_conv(DisasContext *s, bool is_scalar,
   9025                                          bool is_q, bool is_u,
   9026                                          int immh, int immb, int rn, int rd)
   9027 {
   9028     int immhb = immh << 3 | immb;
   9029     int pass, size, fracbits;
   9030     TCGv_ptr tcg_fpstatus;
   9031     TCGv_i32 tcg_rmode, tcg_shift;
   9032 
   9033     if (immh & 0x8) {
   9034         size = MO_64;
   9035         if (!is_scalar && !is_q) {
   9036             unallocated_encoding(s);
   9037             return;
   9038         }
   9039     } else if (immh & 0x4) {
   9040         size = MO_32;
   9041     } else if (immh & 0x2) {
   9042         size = MO_16;
   9043         if (!dc_isar_feature(aa64_fp16, s)) {
   9044             unallocated_encoding(s);
   9045             return;
   9046         }
   9047     } else {
   9048         /* Should have split out AdvSIMD modified immediate earlier.  */
   9049         assert(immh == 1);
   9050         unallocated_encoding(s);
   9051         return;
   9052     }
   9053 
   9054     if (!fp_access_check(s)) {
   9055         return;
   9056     }
   9057 
   9058     assert(!(is_scalar && is_q));
   9059 
   9060     tcg_rmode = tcg_const_i32(arm_rmode_to_sf(FPROUNDING_ZERO));
   9061     tcg_fpstatus = fpstatus_ptr(size == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
   9062     gen_helper_set_rmode(tcg_rmode, tcg_rmode, tcg_fpstatus);
   9063     fracbits = (16 << size) - immhb;
   9064     tcg_shift = tcg_constant_i32(fracbits);
   9065 
   9066     if (size == MO_64) {
   9067         int maxpass = is_scalar ? 1 : 2;
   9068 
   9069         for (pass = 0; pass < maxpass; pass++) {
   9070             TCGv_i64 tcg_op = tcg_temp_new_i64();
   9071 
   9072             read_vec_element(s, tcg_op, rn, pass, MO_64);
   9073             if (is_u) {
   9074                 gen_helper_vfp_touqd(tcg_op, tcg_op, tcg_shift, tcg_fpstatus);
   9075             } else {
   9076                 gen_helper_vfp_tosqd(tcg_op, tcg_op, tcg_shift, tcg_fpstatus);
   9077             }
   9078             write_vec_element(s, tcg_op, rd, pass, MO_64);
   9079             tcg_temp_free_i64(tcg_op);
   9080         }
   9081         clear_vec_high(s, is_q, rd);
   9082     } else {
   9083         void (*fn)(TCGv_i32, TCGv_i32, TCGv_i32, TCGv_ptr);
   9084         int maxpass = is_scalar ? 1 : ((8 << is_q) >> size);
   9085 
   9086         switch (size) {
   9087         case MO_16:
   9088             if (is_u) {
   9089                 fn = gen_helper_vfp_touhh;
   9090             } else {
   9091                 fn = gen_helper_vfp_toshh;
   9092             }
   9093             break;
   9094         case MO_32:
   9095             if (is_u) {
   9096                 fn = gen_helper_vfp_touls;
   9097             } else {
   9098                 fn = gen_helper_vfp_tosls;
   9099             }
   9100             break;
   9101         default:
   9102             g_assert_not_reached();
   9103         }
   9104 
   9105         for (pass = 0; pass < maxpass; pass++) {
   9106             TCGv_i32 tcg_op = tcg_temp_new_i32();
   9107 
   9108             read_vec_element_i32(s, tcg_op, rn, pass, size);
   9109             fn(tcg_op, tcg_op, tcg_shift, tcg_fpstatus);
   9110             if (is_scalar) {
   9111                 write_fp_sreg(s, rd, tcg_op);
   9112             } else {
   9113                 write_vec_element_i32(s, tcg_op, rd, pass, size);
   9114             }
   9115             tcg_temp_free_i32(tcg_op);
   9116         }
   9117         if (!is_scalar) {
   9118             clear_vec_high(s, is_q, rd);
   9119         }
   9120     }
   9121 
   9122     gen_helper_set_rmode(tcg_rmode, tcg_rmode, tcg_fpstatus);
   9123     tcg_temp_free_ptr(tcg_fpstatus);
   9124     tcg_temp_free_i32(tcg_rmode);
   9125 }
   9126 
   9127 /* AdvSIMD scalar shift by immediate
   9128  *  31 30  29 28         23 22  19 18  16 15    11  10 9    5 4    0
   9129  * +-----+---+-------------+------+------+--------+---+------+------+
   9130  * | 0 1 | U | 1 1 1 1 1 0 | immh | immb | opcode | 1 |  Rn  |  Rd  |
   9131  * +-----+---+-------------+------+------+--------+---+------+------+
   9132  *
   9133  * This is the scalar version so it works on a fixed sized registers
   9134  */
   9135 static void disas_simd_scalar_shift_imm(DisasContext *s, uint32_t insn)
   9136 {
   9137     int rd = extract32(insn, 0, 5);
   9138     int rn = extract32(insn, 5, 5);
   9139     int opcode = extract32(insn, 11, 5);
   9140     int immb = extract32(insn, 16, 3);
   9141     int immh = extract32(insn, 19, 4);
   9142     bool is_u = extract32(insn, 29, 1);
   9143 
   9144     if (immh == 0) {
   9145         unallocated_encoding(s);
   9146         return;
   9147     }
   9148 
   9149     switch (opcode) {
   9150     case 0x08: /* SRI */
   9151         if (!is_u) {
   9152             unallocated_encoding(s);
   9153             return;
   9154         }
   9155         /* fall through */
   9156     case 0x00: /* SSHR / USHR */
   9157     case 0x02: /* SSRA / USRA */
   9158     case 0x04: /* SRSHR / URSHR */
   9159     case 0x06: /* SRSRA / URSRA */
   9160         handle_scalar_simd_shri(s, is_u, immh, immb, opcode, rn, rd);
   9161         break;
   9162     case 0x0a: /* SHL / SLI */
   9163         handle_scalar_simd_shli(s, is_u, immh, immb, opcode, rn, rd);
   9164         break;
   9165     case 0x1c: /* SCVTF, UCVTF */
   9166         handle_simd_shift_intfp_conv(s, true, false, is_u, immh, immb,
   9167                                      opcode, rn, rd);
   9168         break;
   9169     case 0x10: /* SQSHRUN, SQSHRUN2 */
   9170     case 0x11: /* SQRSHRUN, SQRSHRUN2 */
   9171         if (!is_u) {
   9172             unallocated_encoding(s);
   9173             return;
   9174         }
   9175         handle_vec_simd_sqshrn(s, true, false, false, true,
   9176                                immh, immb, opcode, rn, rd);
   9177         break;
   9178     case 0x12: /* SQSHRN, SQSHRN2, UQSHRN */
   9179     case 0x13: /* SQRSHRN, SQRSHRN2, UQRSHRN, UQRSHRN2 */
   9180         handle_vec_simd_sqshrn(s, true, false, is_u, is_u,
   9181                                immh, immb, opcode, rn, rd);
   9182         break;
   9183     case 0xc: /* SQSHLU */
   9184         if (!is_u) {
   9185             unallocated_encoding(s);
   9186             return;
   9187         }
   9188         handle_simd_qshl(s, true, false, false, true, immh, immb, rn, rd);
   9189         break;
   9190     case 0xe: /* SQSHL, UQSHL */
   9191         handle_simd_qshl(s, true, false, is_u, is_u, immh, immb, rn, rd);
   9192         break;
   9193     case 0x1f: /* FCVTZS, FCVTZU */
   9194         handle_simd_shift_fpint_conv(s, true, false, is_u, immh, immb, rn, rd);
   9195         break;
   9196     default:
   9197         unallocated_encoding(s);
   9198         break;
   9199     }
   9200 }
   9201 
   9202 /* AdvSIMD scalar three different
   9203  *  31 30  29 28       24 23  22  21 20  16 15    12 11 10 9    5 4    0
   9204  * +-----+---+-----------+------+---+------+--------+-----+------+------+
   9205  * | 0 1 | U | 1 1 1 1 0 | size | 1 |  Rm  | opcode | 0 0 |  Rn  |  Rd  |
   9206  * +-----+---+-----------+------+---+------+--------+-----+------+------+
   9207  */
   9208 static void disas_simd_scalar_three_reg_diff(DisasContext *s, uint32_t insn)
   9209 {
   9210     bool is_u = extract32(insn, 29, 1);
   9211     int size = extract32(insn, 22, 2);
   9212     int opcode = extract32(insn, 12, 4);
   9213     int rm = extract32(insn, 16, 5);
   9214     int rn = extract32(insn, 5, 5);
   9215     int rd = extract32(insn, 0, 5);
   9216 
   9217     if (is_u) {
   9218         unallocated_encoding(s);
   9219         return;
   9220     }
   9221 
   9222     switch (opcode) {
   9223     case 0x9: /* SQDMLAL, SQDMLAL2 */
   9224     case 0xb: /* SQDMLSL, SQDMLSL2 */
   9225     case 0xd: /* SQDMULL, SQDMULL2 */
   9226         if (size == 0 || size == 3) {
   9227             unallocated_encoding(s);
   9228             return;
   9229         }
   9230         break;
   9231     default:
   9232         unallocated_encoding(s);
   9233         return;
   9234     }
   9235 
   9236     if (!fp_access_check(s)) {
   9237         return;
   9238     }
   9239 
   9240     if (size == 2) {
   9241         TCGv_i64 tcg_op1 = tcg_temp_new_i64();
   9242         TCGv_i64 tcg_op2 = tcg_temp_new_i64();
   9243         TCGv_i64 tcg_res = tcg_temp_new_i64();
   9244 
   9245         read_vec_element(s, tcg_op1, rn, 0, MO_32 | MO_SIGN);
   9246         read_vec_element(s, tcg_op2, rm, 0, MO_32 | MO_SIGN);
   9247 
   9248         tcg_gen_mul_i64(tcg_res, tcg_op1, tcg_op2);
   9249         gen_helper_neon_addl_saturate_s64(tcg_res, cpu_env, tcg_res, tcg_res);
   9250 
   9251         switch (opcode) {
   9252         case 0xd: /* SQDMULL, SQDMULL2 */
   9253             break;
   9254         case 0xb: /* SQDMLSL, SQDMLSL2 */
   9255             tcg_gen_neg_i64(tcg_res, tcg_res);
   9256             /* fall through */
   9257         case 0x9: /* SQDMLAL, SQDMLAL2 */
   9258             read_vec_element(s, tcg_op1, rd, 0, MO_64);
   9259             gen_helper_neon_addl_saturate_s64(tcg_res, cpu_env,
   9260                                               tcg_res, tcg_op1);
   9261             break;
   9262         default:
   9263             g_assert_not_reached();
   9264         }
   9265 
   9266         write_fp_dreg(s, rd, tcg_res);
   9267 
   9268         tcg_temp_free_i64(tcg_op1);
   9269         tcg_temp_free_i64(tcg_op2);
   9270         tcg_temp_free_i64(tcg_res);
   9271     } else {
   9272         TCGv_i32 tcg_op1 = read_fp_hreg(s, rn);
   9273         TCGv_i32 tcg_op2 = read_fp_hreg(s, rm);
   9274         TCGv_i64 tcg_res = tcg_temp_new_i64();
   9275 
   9276         gen_helper_neon_mull_s16(tcg_res, tcg_op1, tcg_op2);
   9277         gen_helper_neon_addl_saturate_s32(tcg_res, cpu_env, tcg_res, tcg_res);
   9278 
   9279         switch (opcode) {
   9280         case 0xd: /* SQDMULL, SQDMULL2 */
   9281             break;
   9282         case 0xb: /* SQDMLSL, SQDMLSL2 */
   9283             gen_helper_neon_negl_u32(tcg_res, tcg_res);
   9284             /* fall through */
   9285         case 0x9: /* SQDMLAL, SQDMLAL2 */
   9286         {
   9287             TCGv_i64 tcg_op3 = tcg_temp_new_i64();
   9288             read_vec_element(s, tcg_op3, rd, 0, MO_32);
   9289             gen_helper_neon_addl_saturate_s32(tcg_res, cpu_env,
   9290                                               tcg_res, tcg_op3);
   9291             tcg_temp_free_i64(tcg_op3);
   9292             break;
   9293         }
   9294         default:
   9295             g_assert_not_reached();
   9296         }
   9297 
   9298         tcg_gen_ext32u_i64(tcg_res, tcg_res);
   9299         write_fp_dreg(s, rd, tcg_res);
   9300 
   9301         tcg_temp_free_i32(tcg_op1);
   9302         tcg_temp_free_i32(tcg_op2);
   9303         tcg_temp_free_i64(tcg_res);
   9304     }
   9305 }
   9306 
   9307 static void handle_3same_64(DisasContext *s, int opcode, bool u,
   9308                             TCGv_i64 tcg_rd, TCGv_i64 tcg_rn, TCGv_i64 tcg_rm)
   9309 {
   9310     /* Handle 64x64->64 opcodes which are shared between the scalar
   9311      * and vector 3-same groups. We cover every opcode where size == 3
   9312      * is valid in either the three-reg-same (integer, not pairwise)
   9313      * or scalar-three-reg-same groups.
   9314      */
   9315     TCGCond cond;
   9316 
   9317     switch (opcode) {
   9318     case 0x1: /* SQADD */
   9319         if (u) {
   9320             gen_helper_neon_qadd_u64(tcg_rd, cpu_env, tcg_rn, tcg_rm);
   9321         } else {
   9322             gen_helper_neon_qadd_s64(tcg_rd, cpu_env, tcg_rn, tcg_rm);
   9323         }
   9324         break;
   9325     case 0x5: /* SQSUB */
   9326         if (u) {
   9327             gen_helper_neon_qsub_u64(tcg_rd, cpu_env, tcg_rn, tcg_rm);
   9328         } else {
   9329             gen_helper_neon_qsub_s64(tcg_rd, cpu_env, tcg_rn, tcg_rm);
   9330         }
   9331         break;
   9332     case 0x6: /* CMGT, CMHI */
   9333         /* 64 bit integer comparison, result = test ? (2^64 - 1) : 0.
   9334          * We implement this using setcond (test) and then negating.
   9335          */
   9336         cond = u ? TCG_COND_GTU : TCG_COND_GT;
   9337     do_cmop:
   9338         tcg_gen_setcond_i64(cond, tcg_rd, tcg_rn, tcg_rm);
   9339         tcg_gen_neg_i64(tcg_rd, tcg_rd);
   9340         break;
   9341     case 0x7: /* CMGE, CMHS */
   9342         cond = u ? TCG_COND_GEU : TCG_COND_GE;
   9343         goto do_cmop;
   9344     case 0x11: /* CMTST, CMEQ */
   9345         if (u) {
   9346             cond = TCG_COND_EQ;
   9347             goto do_cmop;
   9348         }
   9349         gen_cmtst_i64(tcg_rd, tcg_rn, tcg_rm);
   9350         break;
   9351     case 0x8: /* SSHL, USHL */
   9352         if (u) {
   9353             gen_ushl_i64(tcg_rd, tcg_rn, tcg_rm);
   9354         } else {
   9355             gen_sshl_i64(tcg_rd, tcg_rn, tcg_rm);
   9356         }
   9357         break;
   9358     case 0x9: /* SQSHL, UQSHL */
   9359         if (u) {
   9360             gen_helper_neon_qshl_u64(tcg_rd, cpu_env, tcg_rn, tcg_rm);
   9361         } else {
   9362             gen_helper_neon_qshl_s64(tcg_rd, cpu_env, tcg_rn, tcg_rm);
   9363         }
   9364         break;
   9365     case 0xa: /* SRSHL, URSHL */
   9366         if (u) {
   9367             gen_helper_neon_rshl_u64(tcg_rd, tcg_rn, tcg_rm);
   9368         } else {
   9369             gen_helper_neon_rshl_s64(tcg_rd, tcg_rn, tcg_rm);
   9370         }
   9371         break;
   9372     case 0xb: /* SQRSHL, UQRSHL */
   9373         if (u) {
   9374             gen_helper_neon_qrshl_u64(tcg_rd, cpu_env, tcg_rn, tcg_rm);
   9375         } else {
   9376             gen_helper_neon_qrshl_s64(tcg_rd, cpu_env, tcg_rn, tcg_rm);
   9377         }
   9378         break;
   9379     case 0x10: /* ADD, SUB */
   9380         if (u) {
   9381             tcg_gen_sub_i64(tcg_rd, tcg_rn, tcg_rm);
   9382         } else {
   9383             tcg_gen_add_i64(tcg_rd, tcg_rn, tcg_rm);
   9384         }
   9385         break;
   9386     default:
   9387         g_assert_not_reached();
   9388     }
   9389 }
   9390 
   9391 /* Handle the 3-same-operands float operations; shared by the scalar
   9392  * and vector encodings. The caller must filter out any encodings
   9393  * not allocated for the encoding it is dealing with.
   9394  */
   9395 static void handle_3same_float(DisasContext *s, int size, int elements,
   9396                                int fpopcode, int rd, int rn, int rm)
   9397 {
   9398     int pass;
   9399     TCGv_ptr fpst = fpstatus_ptr(FPST_FPCR);
   9400 
   9401     for (pass = 0; pass < elements; pass++) {
   9402         if (size) {
   9403             /* Double */
   9404             TCGv_i64 tcg_op1 = tcg_temp_new_i64();
   9405             TCGv_i64 tcg_op2 = tcg_temp_new_i64();
   9406             TCGv_i64 tcg_res = tcg_temp_new_i64();
   9407 
   9408             read_vec_element(s, tcg_op1, rn, pass, MO_64);
   9409             read_vec_element(s, tcg_op2, rm, pass, MO_64);
   9410 
   9411             switch (fpopcode) {
   9412             case 0x39: /* FMLS */
   9413                 /* As usual for ARM, separate negation for fused multiply-add */
   9414                 gen_helper_vfp_negd(tcg_op1, tcg_op1);
   9415                 /* fall through */
   9416             case 0x19: /* FMLA */
   9417                 read_vec_element(s, tcg_res, rd, pass, MO_64);
   9418                 gen_helper_vfp_muladdd(tcg_res, tcg_op1, tcg_op2,
   9419                                        tcg_res, fpst);
   9420                 break;
   9421             case 0x18: /* FMAXNM */
   9422                 gen_helper_vfp_maxnumd(tcg_res, tcg_op1, tcg_op2, fpst);
   9423                 break;
   9424             case 0x1a: /* FADD */
   9425                 gen_helper_vfp_addd(tcg_res, tcg_op1, tcg_op2, fpst);
   9426                 break;
   9427             case 0x1b: /* FMULX */
   9428                 gen_helper_vfp_mulxd(tcg_res, tcg_op1, tcg_op2, fpst);
   9429                 break;
   9430             case 0x1c: /* FCMEQ */
   9431                 gen_helper_neon_ceq_f64(tcg_res, tcg_op1, tcg_op2, fpst);
   9432                 break;
   9433             case 0x1e: /* FMAX */
   9434                 gen_helper_vfp_maxd(tcg_res, tcg_op1, tcg_op2, fpst);
   9435                 break;
   9436             case 0x1f: /* FRECPS */
   9437                 gen_helper_recpsf_f64(tcg_res, tcg_op1, tcg_op2, fpst);
   9438                 break;
   9439             case 0x38: /* FMINNM */
   9440                 gen_helper_vfp_minnumd(tcg_res, tcg_op1, tcg_op2, fpst);
   9441                 break;
   9442             case 0x3a: /* FSUB */
   9443                 gen_helper_vfp_subd(tcg_res, tcg_op1, tcg_op2, fpst);
   9444                 break;
   9445             case 0x3e: /* FMIN */
   9446                 gen_helper_vfp_mind(tcg_res, tcg_op1, tcg_op2, fpst);
   9447                 break;
   9448             case 0x3f: /* FRSQRTS */
   9449                 gen_helper_rsqrtsf_f64(tcg_res, tcg_op1, tcg_op2, fpst);
   9450                 break;
   9451             case 0x5b: /* FMUL */
   9452                 gen_helper_vfp_muld(tcg_res, tcg_op1, tcg_op2, fpst);
   9453                 break;
   9454             case 0x5c: /* FCMGE */
   9455                 gen_helper_neon_cge_f64(tcg_res, tcg_op1, tcg_op2, fpst);
   9456                 break;
   9457             case 0x5d: /* FACGE */
   9458                 gen_helper_neon_acge_f64(tcg_res, tcg_op1, tcg_op2, fpst);
   9459                 break;
   9460             case 0x5f: /* FDIV */
   9461                 gen_helper_vfp_divd(tcg_res, tcg_op1, tcg_op2, fpst);
   9462                 break;
   9463             case 0x7a: /* FABD */
   9464                 gen_helper_vfp_subd(tcg_res, tcg_op1, tcg_op2, fpst);
   9465                 gen_helper_vfp_absd(tcg_res, tcg_res);
   9466                 break;
   9467             case 0x7c: /* FCMGT */
   9468                 gen_helper_neon_cgt_f64(tcg_res, tcg_op1, tcg_op2, fpst);
   9469                 break;
   9470             case 0x7d: /* FACGT */
   9471                 gen_helper_neon_acgt_f64(tcg_res, tcg_op1, tcg_op2, fpst);
   9472                 break;
   9473             default:
   9474                 g_assert_not_reached();
   9475             }
   9476 
   9477             write_vec_element(s, tcg_res, rd, pass, MO_64);
   9478 
   9479             tcg_temp_free_i64(tcg_res);
   9480             tcg_temp_free_i64(tcg_op1);
   9481             tcg_temp_free_i64(tcg_op2);
   9482         } else {
   9483             /* Single */
   9484             TCGv_i32 tcg_op1 = tcg_temp_new_i32();
   9485             TCGv_i32 tcg_op2 = tcg_temp_new_i32();
   9486             TCGv_i32 tcg_res = tcg_temp_new_i32();
   9487 
   9488             read_vec_element_i32(s, tcg_op1, rn, pass, MO_32);
   9489             read_vec_element_i32(s, tcg_op2, rm, pass, MO_32);
   9490 
   9491             switch (fpopcode) {
   9492             case 0x39: /* FMLS */
   9493                 /* As usual for ARM, separate negation for fused multiply-add */
   9494                 gen_helper_vfp_negs(tcg_op1, tcg_op1);
   9495                 /* fall through */
   9496             case 0x19: /* FMLA */
   9497                 read_vec_element_i32(s, tcg_res, rd, pass, MO_32);
   9498                 gen_helper_vfp_muladds(tcg_res, tcg_op1, tcg_op2,
   9499                                        tcg_res, fpst);
   9500                 break;
   9501             case 0x1a: /* FADD */
   9502                 gen_helper_vfp_adds(tcg_res, tcg_op1, tcg_op2, fpst);
   9503                 break;
   9504             case 0x1b: /* FMULX */
   9505                 gen_helper_vfp_mulxs(tcg_res, tcg_op1, tcg_op2, fpst);
   9506                 break;
   9507             case 0x1c: /* FCMEQ */
   9508                 gen_helper_neon_ceq_f32(tcg_res, tcg_op1, tcg_op2, fpst);
   9509                 break;
   9510             case 0x1e: /* FMAX */
   9511                 gen_helper_vfp_maxs(tcg_res, tcg_op1, tcg_op2, fpst);
   9512                 break;
   9513             case 0x1f: /* FRECPS */
   9514                 gen_helper_recpsf_f32(tcg_res, tcg_op1, tcg_op2, fpst);
   9515                 break;
   9516             case 0x18: /* FMAXNM */
   9517                 gen_helper_vfp_maxnums(tcg_res, tcg_op1, tcg_op2, fpst);
   9518                 break;
   9519             case 0x38: /* FMINNM */
   9520                 gen_helper_vfp_minnums(tcg_res, tcg_op1, tcg_op2, fpst);
   9521                 break;
   9522             case 0x3a: /* FSUB */
   9523                 gen_helper_vfp_subs(tcg_res, tcg_op1, tcg_op2, fpst);
   9524                 break;
   9525             case 0x3e: /* FMIN */
   9526                 gen_helper_vfp_mins(tcg_res, tcg_op1, tcg_op2, fpst);
   9527                 break;
   9528             case 0x3f: /* FRSQRTS */
   9529                 gen_helper_rsqrtsf_f32(tcg_res, tcg_op1, tcg_op2, fpst);
   9530                 break;
   9531             case 0x5b: /* FMUL */
   9532                 gen_helper_vfp_muls(tcg_res, tcg_op1, tcg_op2, fpst);
   9533                 break;
   9534             case 0x5c: /* FCMGE */
   9535                 gen_helper_neon_cge_f32(tcg_res, tcg_op1, tcg_op2, fpst);
   9536                 break;
   9537             case 0x5d: /* FACGE */
   9538                 gen_helper_neon_acge_f32(tcg_res, tcg_op1, tcg_op2, fpst);
   9539                 break;
   9540             case 0x5f: /* FDIV */
   9541                 gen_helper_vfp_divs(tcg_res, tcg_op1, tcg_op2, fpst);
   9542                 break;
   9543             case 0x7a: /* FABD */
   9544                 gen_helper_vfp_subs(tcg_res, tcg_op1, tcg_op2, fpst);
   9545                 gen_helper_vfp_abss(tcg_res, tcg_res);
   9546                 break;
   9547             case 0x7c: /* FCMGT */
   9548                 gen_helper_neon_cgt_f32(tcg_res, tcg_op1, tcg_op2, fpst);
   9549                 break;
   9550             case 0x7d: /* FACGT */
   9551                 gen_helper_neon_acgt_f32(tcg_res, tcg_op1, tcg_op2, fpst);
   9552                 break;
   9553             default:
   9554                 g_assert_not_reached();
   9555             }
   9556 
   9557             if (elements == 1) {
   9558                 /* scalar single so clear high part */
   9559                 TCGv_i64 tcg_tmp = tcg_temp_new_i64();
   9560 
   9561                 tcg_gen_extu_i32_i64(tcg_tmp, tcg_res);
   9562                 write_vec_element(s, tcg_tmp, rd, pass, MO_64);
   9563                 tcg_temp_free_i64(tcg_tmp);
   9564             } else {
   9565                 write_vec_element_i32(s, tcg_res, rd, pass, MO_32);
   9566             }
   9567 
   9568             tcg_temp_free_i32(tcg_res);
   9569             tcg_temp_free_i32(tcg_op1);
   9570             tcg_temp_free_i32(tcg_op2);
   9571         }
   9572     }
   9573 
   9574     tcg_temp_free_ptr(fpst);
   9575 
   9576     clear_vec_high(s, elements * (size ? 8 : 4) > 8, rd);
   9577 }
   9578 
   9579 /* AdvSIMD scalar three same
   9580  *  31 30  29 28       24 23  22  21 20  16 15    11  10 9    5 4    0
   9581  * +-----+---+-----------+------+---+------+--------+---+------+------+
   9582  * | 0 1 | U | 1 1 1 1 0 | size | 1 |  Rm  | opcode | 1 |  Rn  |  Rd  |
   9583  * +-----+---+-----------+------+---+------+--------+---+------+------+
   9584  */
   9585 static void disas_simd_scalar_three_reg_same(DisasContext *s, uint32_t insn)
   9586 {
   9587     int rd = extract32(insn, 0, 5);
   9588     int rn = extract32(insn, 5, 5);
   9589     int opcode = extract32(insn, 11, 5);
   9590     int rm = extract32(insn, 16, 5);
   9591     int size = extract32(insn, 22, 2);
   9592     bool u = extract32(insn, 29, 1);
   9593     TCGv_i64 tcg_rd;
   9594 
   9595     if (opcode >= 0x18) {
   9596         /* Floating point: U, size[1] and opcode indicate operation */
   9597         int fpopcode = opcode | (extract32(size, 1, 1) << 5) | (u << 6);
   9598         switch (fpopcode) {
   9599         case 0x1b: /* FMULX */
   9600         case 0x1f: /* FRECPS */
   9601         case 0x3f: /* FRSQRTS */
   9602         case 0x5d: /* FACGE */
   9603         case 0x7d: /* FACGT */
   9604         case 0x1c: /* FCMEQ */
   9605         case 0x5c: /* FCMGE */
   9606         case 0x7c: /* FCMGT */
   9607         case 0x7a: /* FABD */
   9608             break;
   9609         default:
   9610             unallocated_encoding(s);
   9611             return;
   9612         }
   9613 
   9614         if (!fp_access_check(s)) {
   9615             return;
   9616         }
   9617 
   9618         handle_3same_float(s, extract32(size, 0, 1), 1, fpopcode, rd, rn, rm);
   9619         return;
   9620     }
   9621 
   9622     switch (opcode) {
   9623     case 0x1: /* SQADD, UQADD */
   9624     case 0x5: /* SQSUB, UQSUB */
   9625     case 0x9: /* SQSHL, UQSHL */
   9626     case 0xb: /* SQRSHL, UQRSHL */
   9627         break;
   9628     case 0x8: /* SSHL, USHL */
   9629     case 0xa: /* SRSHL, URSHL */
   9630     case 0x6: /* CMGT, CMHI */
   9631     case 0x7: /* CMGE, CMHS */
   9632     case 0x11: /* CMTST, CMEQ */
   9633     case 0x10: /* ADD, SUB (vector) */
   9634         if (size != 3) {
   9635             unallocated_encoding(s);
   9636             return;
   9637         }
   9638         break;
   9639     case 0x16: /* SQDMULH, SQRDMULH (vector) */
   9640         if (size != 1 && size != 2) {
   9641             unallocated_encoding(s);
   9642             return;
   9643         }
   9644         break;
   9645     default:
   9646         unallocated_encoding(s);
   9647         return;
   9648     }
   9649 
   9650     if (!fp_access_check(s)) {
   9651         return;
   9652     }
   9653 
   9654     tcg_rd = tcg_temp_new_i64();
   9655 
   9656     if (size == 3) {
   9657         TCGv_i64 tcg_rn = read_fp_dreg(s, rn);
   9658         TCGv_i64 tcg_rm = read_fp_dreg(s, rm);
   9659 
   9660         handle_3same_64(s, opcode, u, tcg_rd, tcg_rn, tcg_rm);
   9661         tcg_temp_free_i64(tcg_rn);
   9662         tcg_temp_free_i64(tcg_rm);
   9663     } else {
   9664         /* Do a single operation on the lowest element in the vector.
   9665          * We use the standard Neon helpers and rely on 0 OP 0 == 0 with
   9666          * no side effects for all these operations.
   9667          * OPTME: special-purpose helpers would avoid doing some
   9668          * unnecessary work in the helper for the 8 and 16 bit cases.
   9669          */
   9670         NeonGenTwoOpEnvFn *genenvfn;
   9671         TCGv_i32 tcg_rn = tcg_temp_new_i32();
   9672         TCGv_i32 tcg_rm = tcg_temp_new_i32();
   9673         TCGv_i32 tcg_rd32 = tcg_temp_new_i32();
   9674 
   9675         read_vec_element_i32(s, tcg_rn, rn, 0, size);
   9676         read_vec_element_i32(s, tcg_rm, rm, 0, size);
   9677 
   9678         switch (opcode) {
   9679         case 0x1: /* SQADD, UQADD */
   9680         {
   9681             static NeonGenTwoOpEnvFn * const fns[3][2] = {
   9682                 { gen_helper_neon_qadd_s8, gen_helper_neon_qadd_u8 },
   9683                 { gen_helper_neon_qadd_s16, gen_helper_neon_qadd_u16 },
   9684                 { gen_helper_neon_qadd_s32, gen_helper_neon_qadd_u32 },
   9685             };
   9686             genenvfn = fns[size][u];
   9687             break;
   9688         }
   9689         case 0x5: /* SQSUB, UQSUB */
   9690         {
   9691             static NeonGenTwoOpEnvFn * const fns[3][2] = {
   9692                 { gen_helper_neon_qsub_s8, gen_helper_neon_qsub_u8 },
   9693                 { gen_helper_neon_qsub_s16, gen_helper_neon_qsub_u16 },
   9694                 { gen_helper_neon_qsub_s32, gen_helper_neon_qsub_u32 },
   9695             };
   9696             genenvfn = fns[size][u];
   9697             break;
   9698         }
   9699         case 0x9: /* SQSHL, UQSHL */
   9700         {
   9701             static NeonGenTwoOpEnvFn * const fns[3][2] = {
   9702                 { gen_helper_neon_qshl_s8, gen_helper_neon_qshl_u8 },
   9703                 { gen_helper_neon_qshl_s16, gen_helper_neon_qshl_u16 },
   9704                 { gen_helper_neon_qshl_s32, gen_helper_neon_qshl_u32 },
   9705             };
   9706             genenvfn = fns[size][u];
   9707             break;
   9708         }
   9709         case 0xb: /* SQRSHL, UQRSHL */
   9710         {
   9711             static NeonGenTwoOpEnvFn * const fns[3][2] = {
   9712                 { gen_helper_neon_qrshl_s8, gen_helper_neon_qrshl_u8 },
   9713                 { gen_helper_neon_qrshl_s16, gen_helper_neon_qrshl_u16 },
   9714                 { gen_helper_neon_qrshl_s32, gen_helper_neon_qrshl_u32 },
   9715             };
   9716             genenvfn = fns[size][u];
   9717             break;
   9718         }
   9719         case 0x16: /* SQDMULH, SQRDMULH */
   9720         {
   9721             static NeonGenTwoOpEnvFn * const fns[2][2] = {
   9722                 { gen_helper_neon_qdmulh_s16, gen_helper_neon_qrdmulh_s16 },
   9723                 { gen_helper_neon_qdmulh_s32, gen_helper_neon_qrdmulh_s32 },
   9724             };
   9725             assert(size == 1 || size == 2);
   9726             genenvfn = fns[size - 1][u];
   9727             break;
   9728         }
   9729         default:
   9730             g_assert_not_reached();
   9731         }
   9732 
   9733         genenvfn(tcg_rd32, cpu_env, tcg_rn, tcg_rm);
   9734         tcg_gen_extu_i32_i64(tcg_rd, tcg_rd32);
   9735         tcg_temp_free_i32(tcg_rd32);
   9736         tcg_temp_free_i32(tcg_rn);
   9737         tcg_temp_free_i32(tcg_rm);
   9738     }
   9739 
   9740     write_fp_dreg(s, rd, tcg_rd);
   9741 
   9742     tcg_temp_free_i64(tcg_rd);
   9743 }
   9744 
   9745 /* AdvSIMD scalar three same FP16
   9746  *  31 30  29 28       24 23  22 21 20  16 15 14 13    11 10  9  5 4  0
   9747  * +-----+---+-----------+---+-----+------+-----+--------+---+----+----+
   9748  * | 0 1 | U | 1 1 1 1 0 | a | 1 0 |  Rm  | 0 0 | opcode | 1 | Rn | Rd |
   9749  * +-----+---+-----------+---+-----+------+-----+--------+---+----+----+
   9750  * v: 0101 1110 0100 0000 0000 0100 0000 0000 => 5e400400
   9751  * m: 1101 1111 0110 0000 1100 0100 0000 0000 => df60c400
   9752  */
   9753 static void disas_simd_scalar_three_reg_same_fp16(DisasContext *s,
   9754                                                   uint32_t insn)
   9755 {
   9756     int rd = extract32(insn, 0, 5);
   9757     int rn = extract32(insn, 5, 5);
   9758     int opcode = extract32(insn, 11, 3);
   9759     int rm = extract32(insn, 16, 5);
   9760     bool u = extract32(insn, 29, 1);
   9761     bool a = extract32(insn, 23, 1);
   9762     int fpopcode = opcode | (a << 3) |  (u << 4);
   9763     TCGv_ptr fpst;
   9764     TCGv_i32 tcg_op1;
   9765     TCGv_i32 tcg_op2;
   9766     TCGv_i32 tcg_res;
   9767 
   9768     switch (fpopcode) {
   9769     case 0x03: /* FMULX */
   9770     case 0x04: /* FCMEQ (reg) */
   9771     case 0x07: /* FRECPS */
   9772     case 0x0f: /* FRSQRTS */
   9773     case 0x14: /* FCMGE (reg) */
   9774     case 0x15: /* FACGE */
   9775     case 0x1a: /* FABD */
   9776     case 0x1c: /* FCMGT (reg) */
   9777     case 0x1d: /* FACGT */
   9778         break;
   9779     default:
   9780         unallocated_encoding(s);
   9781         return;
   9782     }
   9783 
   9784     if (!dc_isar_feature(aa64_fp16, s)) {
   9785         unallocated_encoding(s);
   9786     }
   9787 
   9788     if (!fp_access_check(s)) {
   9789         return;
   9790     }
   9791 
   9792     fpst = fpstatus_ptr(FPST_FPCR_F16);
   9793 
   9794     tcg_op1 = read_fp_hreg(s, rn);
   9795     tcg_op2 = read_fp_hreg(s, rm);
   9796     tcg_res = tcg_temp_new_i32();
   9797 
   9798     switch (fpopcode) {
   9799     case 0x03: /* FMULX */
   9800         gen_helper_advsimd_mulxh(tcg_res, tcg_op1, tcg_op2, fpst);
   9801         break;
   9802     case 0x04: /* FCMEQ (reg) */
   9803         gen_helper_advsimd_ceq_f16(tcg_res, tcg_op1, tcg_op2, fpst);
   9804         break;
   9805     case 0x07: /* FRECPS */
   9806         gen_helper_recpsf_f16(tcg_res, tcg_op1, tcg_op2, fpst);
   9807         break;
   9808     case 0x0f: /* FRSQRTS */
   9809         gen_helper_rsqrtsf_f16(tcg_res, tcg_op1, tcg_op2, fpst);
   9810         break;
   9811     case 0x14: /* FCMGE (reg) */
   9812         gen_helper_advsimd_cge_f16(tcg_res, tcg_op1, tcg_op2, fpst);
   9813         break;
   9814     case 0x15: /* FACGE */
   9815         gen_helper_advsimd_acge_f16(tcg_res, tcg_op1, tcg_op2, fpst);
   9816         break;
   9817     case 0x1a: /* FABD */
   9818         gen_helper_advsimd_subh(tcg_res, tcg_op1, tcg_op2, fpst);
   9819         tcg_gen_andi_i32(tcg_res, tcg_res, 0x7fff);
   9820         break;
   9821     case 0x1c: /* FCMGT (reg) */
   9822         gen_helper_advsimd_cgt_f16(tcg_res, tcg_op1, tcg_op2, fpst);
   9823         break;
   9824     case 0x1d: /* FACGT */
   9825         gen_helper_advsimd_acgt_f16(tcg_res, tcg_op1, tcg_op2, fpst);
   9826         break;
   9827     default:
   9828         g_assert_not_reached();
   9829     }
   9830 
   9831     write_fp_sreg(s, rd, tcg_res);
   9832 
   9833 
   9834     tcg_temp_free_i32(tcg_res);
   9835     tcg_temp_free_i32(tcg_op1);
   9836     tcg_temp_free_i32(tcg_op2);
   9837     tcg_temp_free_ptr(fpst);
   9838 }
   9839 
   9840 /* AdvSIMD scalar three same extra
   9841  *  31 30  29 28       24 23  22  21 20  16  15 14    11  10 9  5 4  0
   9842  * +-----+---+-----------+------+---+------+---+--------+---+----+----+
   9843  * | 0 1 | U | 1 1 1 1 0 | size | 0 |  Rm  | 1 | opcode | 1 | Rn | Rd |
   9844  * +-----+---+-----------+------+---+------+---+--------+---+----+----+
   9845  */
   9846 static void disas_simd_scalar_three_reg_same_extra(DisasContext *s,
   9847                                                    uint32_t insn)
   9848 {
   9849     int rd = extract32(insn, 0, 5);
   9850     int rn = extract32(insn, 5, 5);
   9851     int opcode = extract32(insn, 11, 4);
   9852     int rm = extract32(insn, 16, 5);
   9853     int size = extract32(insn, 22, 2);
   9854     bool u = extract32(insn, 29, 1);
   9855     TCGv_i32 ele1, ele2, ele3;
   9856     TCGv_i64 res;
   9857     bool feature;
   9858 
   9859     switch (u * 16 + opcode) {
   9860     case 0x10: /* SQRDMLAH (vector) */
   9861     case 0x11: /* SQRDMLSH (vector) */
   9862         if (size != 1 && size != 2) {
   9863             unallocated_encoding(s);
   9864             return;
   9865         }
   9866         feature = dc_isar_feature(aa64_rdm, s);
   9867         break;
   9868     default:
   9869         unallocated_encoding(s);
   9870         return;
   9871     }
   9872     if (!feature) {
   9873         unallocated_encoding(s);
   9874         return;
   9875     }
   9876     if (!fp_access_check(s)) {
   9877         return;
   9878     }
   9879 
   9880     /* Do a single operation on the lowest element in the vector.
   9881      * We use the standard Neon helpers and rely on 0 OP 0 == 0
   9882      * with no side effects for all these operations.
   9883      * OPTME: special-purpose helpers would avoid doing some
   9884      * unnecessary work in the helper for the 16 bit cases.
   9885      */
   9886     ele1 = tcg_temp_new_i32();
   9887     ele2 = tcg_temp_new_i32();
   9888     ele3 = tcg_temp_new_i32();
   9889 
   9890     read_vec_element_i32(s, ele1, rn, 0, size);
   9891     read_vec_element_i32(s, ele2, rm, 0, size);
   9892     read_vec_element_i32(s, ele3, rd, 0, size);
   9893 
   9894     switch (opcode) {
   9895     case 0x0: /* SQRDMLAH */
   9896         if (size == 1) {
   9897             gen_helper_neon_qrdmlah_s16(ele3, cpu_env, ele1, ele2, ele3);
   9898         } else {
   9899             gen_helper_neon_qrdmlah_s32(ele3, cpu_env, ele1, ele2, ele3);
   9900         }
   9901         break;
   9902     case 0x1: /* SQRDMLSH */
   9903         if (size == 1) {
   9904             gen_helper_neon_qrdmlsh_s16(ele3, cpu_env, ele1, ele2, ele3);
   9905         } else {
   9906             gen_helper_neon_qrdmlsh_s32(ele3, cpu_env, ele1, ele2, ele3);
   9907         }
   9908         break;
   9909     default:
   9910         g_assert_not_reached();
   9911     }
   9912     tcg_temp_free_i32(ele1);
   9913     tcg_temp_free_i32(ele2);
   9914 
   9915     res = tcg_temp_new_i64();
   9916     tcg_gen_extu_i32_i64(res, ele3);
   9917     tcg_temp_free_i32(ele3);
   9918 
   9919     write_fp_dreg(s, rd, res);
   9920     tcg_temp_free_i64(res);
   9921 }
   9922 
   9923 static void handle_2misc_64(DisasContext *s, int opcode, bool u,
   9924                             TCGv_i64 tcg_rd, TCGv_i64 tcg_rn,
   9925                             TCGv_i32 tcg_rmode, TCGv_ptr tcg_fpstatus)
   9926 {
   9927     /* Handle 64->64 opcodes which are shared between the scalar and
   9928      * vector 2-reg-misc groups. We cover every integer opcode where size == 3
   9929      * is valid in either group and also the double-precision fp ops.
   9930      * The caller only need provide tcg_rmode and tcg_fpstatus if the op
   9931      * requires them.
   9932      */
   9933     TCGCond cond;
   9934 
   9935     switch (opcode) {
   9936     case 0x4: /* CLS, CLZ */
   9937         if (u) {
   9938             tcg_gen_clzi_i64(tcg_rd, tcg_rn, 64);
   9939         } else {
   9940             tcg_gen_clrsb_i64(tcg_rd, tcg_rn);
   9941         }
   9942         break;
   9943     case 0x5: /* NOT */
   9944         /* This opcode is shared with CNT and RBIT but we have earlier
   9945          * enforced that size == 3 if and only if this is the NOT insn.
   9946          */
   9947         tcg_gen_not_i64(tcg_rd, tcg_rn);
   9948         break;
   9949     case 0x7: /* SQABS, SQNEG */
   9950         if (u) {
   9951             gen_helper_neon_qneg_s64(tcg_rd, cpu_env, tcg_rn);
   9952         } else {
   9953             gen_helper_neon_qabs_s64(tcg_rd, cpu_env, tcg_rn);
   9954         }
   9955         break;
   9956     case 0xa: /* CMLT */
   9957         /* 64 bit integer comparison against zero, result is
   9958          * test ? (2^64 - 1) : 0. We implement via setcond(!test) and
   9959          * subtracting 1.
   9960          */
   9961         cond = TCG_COND_LT;
   9962     do_cmop:
   9963         tcg_gen_setcondi_i64(cond, tcg_rd, tcg_rn, 0);
   9964         tcg_gen_neg_i64(tcg_rd, tcg_rd);
   9965         break;
   9966     case 0x8: /* CMGT, CMGE */
   9967         cond = u ? TCG_COND_GE : TCG_COND_GT;
   9968         goto do_cmop;
   9969     case 0x9: /* CMEQ, CMLE */
   9970         cond = u ? TCG_COND_LE : TCG_COND_EQ;
   9971         goto do_cmop;
   9972     case 0xb: /* ABS, NEG */
   9973         if (u) {
   9974             tcg_gen_neg_i64(tcg_rd, tcg_rn);
   9975         } else {
   9976             tcg_gen_abs_i64(tcg_rd, tcg_rn);
   9977         }
   9978         break;
   9979     case 0x2f: /* FABS */
   9980         gen_helper_vfp_absd(tcg_rd, tcg_rn);
   9981         break;
   9982     case 0x6f: /* FNEG */
   9983         gen_helper_vfp_negd(tcg_rd, tcg_rn);
   9984         break;
   9985     case 0x7f: /* FSQRT */
   9986         gen_helper_vfp_sqrtd(tcg_rd, tcg_rn, cpu_env);
   9987         break;
   9988     case 0x1a: /* FCVTNS */
   9989     case 0x1b: /* FCVTMS */
   9990     case 0x1c: /* FCVTAS */
   9991     case 0x3a: /* FCVTPS */
   9992     case 0x3b: /* FCVTZS */
   9993         gen_helper_vfp_tosqd(tcg_rd, tcg_rn, tcg_constant_i32(0), tcg_fpstatus);
   9994         break;
   9995     case 0x5a: /* FCVTNU */
   9996     case 0x5b: /* FCVTMU */
   9997     case 0x5c: /* FCVTAU */
   9998     case 0x7a: /* FCVTPU */
   9999     case 0x7b: /* FCVTZU */
  10000         gen_helper_vfp_touqd(tcg_rd, tcg_rn, tcg_constant_i32(0), tcg_fpstatus);
  10001         break;
  10002     case 0x18: /* FRINTN */
  10003     case 0x19: /* FRINTM */
  10004     case 0x38: /* FRINTP */
  10005     case 0x39: /* FRINTZ */
  10006     case 0x58: /* FRINTA */
  10007     case 0x79: /* FRINTI */
  10008         gen_helper_rintd(tcg_rd, tcg_rn, tcg_fpstatus);
  10009         break;
  10010     case 0x59: /* FRINTX */
  10011         gen_helper_rintd_exact(tcg_rd, tcg_rn, tcg_fpstatus);
  10012         break;
  10013     case 0x1e: /* FRINT32Z */
  10014     case 0x5e: /* FRINT32X */
  10015         gen_helper_frint32_d(tcg_rd, tcg_rn, tcg_fpstatus);
  10016         break;
  10017     case 0x1f: /* FRINT64Z */
  10018     case 0x5f: /* FRINT64X */
  10019         gen_helper_frint64_d(tcg_rd, tcg_rn, tcg_fpstatus);
  10020         break;
  10021     default:
  10022         g_assert_not_reached();
  10023     }
  10024 }
  10025 
  10026 static void handle_2misc_fcmp_zero(DisasContext *s, int opcode,
  10027                                    bool is_scalar, bool is_u, bool is_q,
  10028                                    int size, int rn, int rd)
  10029 {
  10030     bool is_double = (size == MO_64);
  10031     TCGv_ptr fpst;
  10032 
  10033     if (!fp_access_check(s)) {
  10034         return;
  10035     }
  10036 
  10037     fpst = fpstatus_ptr(size == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
  10038 
  10039     if (is_double) {
  10040         TCGv_i64 tcg_op = tcg_temp_new_i64();
  10041         TCGv_i64 tcg_zero = tcg_constant_i64(0);
  10042         TCGv_i64 tcg_res = tcg_temp_new_i64();
  10043         NeonGenTwoDoubleOpFn *genfn;
  10044         bool swap = false;
  10045         int pass;
  10046 
  10047         switch (opcode) {
  10048         case 0x2e: /* FCMLT (zero) */
  10049             swap = true;
  10050             /* fallthrough */
  10051         case 0x2c: /* FCMGT (zero) */
  10052             genfn = gen_helper_neon_cgt_f64;
  10053             break;
  10054         case 0x2d: /* FCMEQ (zero) */
  10055             genfn = gen_helper_neon_ceq_f64;
  10056             break;
  10057         case 0x6d: /* FCMLE (zero) */
  10058             swap = true;
  10059             /* fall through */
  10060         case 0x6c: /* FCMGE (zero) */
  10061             genfn = gen_helper_neon_cge_f64;
  10062             break;
  10063         default:
  10064             g_assert_not_reached();
  10065         }
  10066 
  10067         for (pass = 0; pass < (is_scalar ? 1 : 2); pass++) {
  10068             read_vec_element(s, tcg_op, rn, pass, MO_64);
  10069             if (swap) {
  10070                 genfn(tcg_res, tcg_zero, tcg_op, fpst);
  10071             } else {
  10072                 genfn(tcg_res, tcg_op, tcg_zero, fpst);
  10073             }
  10074             write_vec_element(s, tcg_res, rd, pass, MO_64);
  10075         }
  10076         tcg_temp_free_i64(tcg_res);
  10077         tcg_temp_free_i64(tcg_op);
  10078 
  10079         clear_vec_high(s, !is_scalar, rd);
  10080     } else {
  10081         TCGv_i32 tcg_op = tcg_temp_new_i32();
  10082         TCGv_i32 tcg_zero = tcg_constant_i32(0);
  10083         TCGv_i32 tcg_res = tcg_temp_new_i32();
  10084         NeonGenTwoSingleOpFn *genfn;
  10085         bool swap = false;
  10086         int pass, maxpasses;
  10087 
  10088         if (size == MO_16) {
  10089             switch (opcode) {
  10090             case 0x2e: /* FCMLT (zero) */
  10091                 swap = true;
  10092                 /* fall through */
  10093             case 0x2c: /* FCMGT (zero) */
  10094                 genfn = gen_helper_advsimd_cgt_f16;
  10095                 break;
  10096             case 0x2d: /* FCMEQ (zero) */
  10097                 genfn = gen_helper_advsimd_ceq_f16;
  10098                 break;
  10099             case 0x6d: /* FCMLE (zero) */
  10100                 swap = true;
  10101                 /* fall through */
  10102             case 0x6c: /* FCMGE (zero) */
  10103                 genfn = gen_helper_advsimd_cge_f16;
  10104                 break;
  10105             default:
  10106                 g_assert_not_reached();
  10107             }
  10108         } else {
  10109             switch (opcode) {
  10110             case 0x2e: /* FCMLT (zero) */
  10111                 swap = true;
  10112                 /* fall through */
  10113             case 0x2c: /* FCMGT (zero) */
  10114                 genfn = gen_helper_neon_cgt_f32;
  10115                 break;
  10116             case 0x2d: /* FCMEQ (zero) */
  10117                 genfn = gen_helper_neon_ceq_f32;
  10118                 break;
  10119             case 0x6d: /* FCMLE (zero) */
  10120                 swap = true;
  10121                 /* fall through */
  10122             case 0x6c: /* FCMGE (zero) */
  10123                 genfn = gen_helper_neon_cge_f32;
  10124                 break;
  10125             default:
  10126                 g_assert_not_reached();
  10127             }
  10128         }
  10129 
  10130         if (is_scalar) {
  10131             maxpasses = 1;
  10132         } else {
  10133             int vector_size = 8 << is_q;
  10134             maxpasses = vector_size >> size;
  10135         }
  10136 
  10137         for (pass = 0; pass < maxpasses; pass++) {
  10138             read_vec_element_i32(s, tcg_op, rn, pass, size);
  10139             if (swap) {
  10140                 genfn(tcg_res, tcg_zero, tcg_op, fpst);
  10141             } else {
  10142                 genfn(tcg_res, tcg_op, tcg_zero, fpst);
  10143             }
  10144             if (is_scalar) {
  10145                 write_fp_sreg(s, rd, tcg_res);
  10146             } else {
  10147                 write_vec_element_i32(s, tcg_res, rd, pass, size);
  10148             }
  10149         }
  10150         tcg_temp_free_i32(tcg_res);
  10151         tcg_temp_free_i32(tcg_op);
  10152         if (!is_scalar) {
  10153             clear_vec_high(s, is_q, rd);
  10154         }
  10155     }
  10156 
  10157     tcg_temp_free_ptr(fpst);
  10158 }
  10159 
  10160 static void handle_2misc_reciprocal(DisasContext *s, int opcode,
  10161                                     bool is_scalar, bool is_u, bool is_q,
  10162                                     int size, int rn, int rd)
  10163 {
  10164     bool is_double = (size == 3);
  10165     TCGv_ptr fpst = fpstatus_ptr(FPST_FPCR);
  10166 
  10167     if (is_double) {
  10168         TCGv_i64 tcg_op = tcg_temp_new_i64();
  10169         TCGv_i64 tcg_res = tcg_temp_new_i64();
  10170         int pass;
  10171 
  10172         for (pass = 0; pass < (is_scalar ? 1 : 2); pass++) {
  10173             read_vec_element(s, tcg_op, rn, pass, MO_64);
  10174             switch (opcode) {
  10175             case 0x3d: /* FRECPE */
  10176                 gen_helper_recpe_f64(tcg_res, tcg_op, fpst);
  10177                 break;
  10178             case 0x3f: /* FRECPX */
  10179                 gen_helper_frecpx_f64(tcg_res, tcg_op, fpst);
  10180                 break;
  10181             case 0x7d: /* FRSQRTE */
  10182                 gen_helper_rsqrte_f64(tcg_res, tcg_op, fpst);
  10183                 break;
  10184             default:
  10185                 g_assert_not_reached();
  10186             }
  10187             write_vec_element(s, tcg_res, rd, pass, MO_64);
  10188         }
  10189         tcg_temp_free_i64(tcg_res);
  10190         tcg_temp_free_i64(tcg_op);
  10191         clear_vec_high(s, !is_scalar, rd);
  10192     } else {
  10193         TCGv_i32 tcg_op = tcg_temp_new_i32();
  10194         TCGv_i32 tcg_res = tcg_temp_new_i32();
  10195         int pass, maxpasses;
  10196 
  10197         if (is_scalar) {
  10198             maxpasses = 1;
  10199         } else {
  10200             maxpasses = is_q ? 4 : 2;
  10201         }
  10202 
  10203         for (pass = 0; pass < maxpasses; pass++) {
  10204             read_vec_element_i32(s, tcg_op, rn, pass, MO_32);
  10205 
  10206             switch (opcode) {
  10207             case 0x3c: /* URECPE */
  10208                 gen_helper_recpe_u32(tcg_res, tcg_op);
  10209                 break;
  10210             case 0x3d: /* FRECPE */
  10211                 gen_helper_recpe_f32(tcg_res, tcg_op, fpst);
  10212                 break;
  10213             case 0x3f: /* FRECPX */
  10214                 gen_helper_frecpx_f32(tcg_res, tcg_op, fpst);
  10215                 break;
  10216             case 0x7d: /* FRSQRTE */
  10217                 gen_helper_rsqrte_f32(tcg_res, tcg_op, fpst);
  10218                 break;
  10219             default:
  10220                 g_assert_not_reached();
  10221             }
  10222 
  10223             if (is_scalar) {
  10224                 write_fp_sreg(s, rd, tcg_res);
  10225             } else {
  10226                 write_vec_element_i32(s, tcg_res, rd, pass, MO_32);
  10227             }
  10228         }
  10229         tcg_temp_free_i32(tcg_res);
  10230         tcg_temp_free_i32(tcg_op);
  10231         if (!is_scalar) {
  10232             clear_vec_high(s, is_q, rd);
  10233         }
  10234     }
  10235     tcg_temp_free_ptr(fpst);
  10236 }
  10237 
  10238 static void handle_2misc_narrow(DisasContext *s, bool scalar,
  10239                                 int opcode, bool u, bool is_q,
  10240                                 int size, int rn, int rd)
  10241 {
  10242     /* Handle 2-reg-misc ops which are narrowing (so each 2*size element
  10243      * in the source becomes a size element in the destination).
  10244      */
  10245     int pass;
  10246     TCGv_i32 tcg_res[2];
  10247     int destelt = is_q ? 2 : 0;
  10248     int passes = scalar ? 1 : 2;
  10249 
  10250     if (scalar) {
  10251         tcg_res[1] = tcg_constant_i32(0);
  10252     }
  10253 
  10254     for (pass = 0; pass < passes; pass++) {
  10255         TCGv_i64 tcg_op = tcg_temp_new_i64();
  10256         NeonGenNarrowFn *genfn = NULL;
  10257         NeonGenNarrowEnvFn *genenvfn = NULL;
  10258 
  10259         if (scalar) {
  10260             read_vec_element(s, tcg_op, rn, pass, size + 1);
  10261         } else {
  10262             read_vec_element(s, tcg_op, rn, pass, MO_64);
  10263         }
  10264         tcg_res[pass] = tcg_temp_new_i32();
  10265 
  10266         switch (opcode) {
  10267         case 0x12: /* XTN, SQXTUN */
  10268         {
  10269             static NeonGenNarrowFn * const xtnfns[3] = {
  10270                 gen_helper_neon_narrow_u8,
  10271                 gen_helper_neon_narrow_u16,
  10272                 tcg_gen_extrl_i64_i32,
  10273             };
  10274             static NeonGenNarrowEnvFn * const sqxtunfns[3] = {
  10275                 gen_helper_neon_unarrow_sat8,
  10276                 gen_helper_neon_unarrow_sat16,
  10277                 gen_helper_neon_unarrow_sat32,
  10278             };
  10279             if (u) {
  10280                 genenvfn = sqxtunfns[size];
  10281             } else {
  10282                 genfn = xtnfns[size];
  10283             }
  10284             break;
  10285         }
  10286         case 0x14: /* SQXTN, UQXTN */
  10287         {
  10288             static NeonGenNarrowEnvFn * const fns[3][2] = {
  10289                 { gen_helper_neon_narrow_sat_s8,
  10290                   gen_helper_neon_narrow_sat_u8 },
  10291                 { gen_helper_neon_narrow_sat_s16,
  10292                   gen_helper_neon_narrow_sat_u16 },
  10293                 { gen_helper_neon_narrow_sat_s32,
  10294                   gen_helper_neon_narrow_sat_u32 },
  10295             };
  10296             genenvfn = fns[size][u];
  10297             break;
  10298         }
  10299         case 0x16: /* FCVTN, FCVTN2 */
  10300             /* 32 bit to 16 bit or 64 bit to 32 bit float conversion */
  10301             if (size == 2) {
  10302                 gen_helper_vfp_fcvtsd(tcg_res[pass], tcg_op, cpu_env);
  10303             } else {
  10304                 TCGv_i32 tcg_lo = tcg_temp_new_i32();
  10305                 TCGv_i32 tcg_hi = tcg_temp_new_i32();
  10306                 TCGv_ptr fpst = fpstatus_ptr(FPST_FPCR);
  10307                 TCGv_i32 ahp = get_ahp_flag();
  10308 
  10309                 tcg_gen_extr_i64_i32(tcg_lo, tcg_hi, tcg_op);
  10310                 gen_helper_vfp_fcvt_f32_to_f16(tcg_lo, tcg_lo, fpst, ahp);
  10311                 gen_helper_vfp_fcvt_f32_to_f16(tcg_hi, tcg_hi, fpst, ahp);
  10312                 tcg_gen_deposit_i32(tcg_res[pass], tcg_lo, tcg_hi, 16, 16);
  10313                 tcg_temp_free_i32(tcg_lo);
  10314                 tcg_temp_free_i32(tcg_hi);
  10315                 tcg_temp_free_ptr(fpst);
  10316                 tcg_temp_free_i32(ahp);
  10317             }
  10318             break;
  10319         case 0x36: /* BFCVTN, BFCVTN2 */
  10320             {
  10321                 TCGv_ptr fpst = fpstatus_ptr(FPST_FPCR);
  10322                 gen_helper_bfcvt_pair(tcg_res[pass], tcg_op, fpst);
  10323                 tcg_temp_free_ptr(fpst);
  10324             }
  10325             break;
  10326         case 0x56:  /* FCVTXN, FCVTXN2 */
  10327             /* 64 bit to 32 bit float conversion
  10328              * with von Neumann rounding (round to odd)
  10329              */
  10330             assert(size == 2);
  10331             gen_helper_fcvtx_f64_to_f32(tcg_res[pass], tcg_op, cpu_env);
  10332             break;
  10333         default:
  10334             g_assert_not_reached();
  10335         }
  10336 
  10337         if (genfn) {
  10338             genfn(tcg_res[pass], tcg_op);
  10339         } else if (genenvfn) {
  10340             genenvfn(tcg_res[pass], cpu_env, tcg_op);
  10341         }
  10342 
  10343         tcg_temp_free_i64(tcg_op);
  10344     }
  10345 
  10346     for (pass = 0; pass < 2; pass++) {
  10347         write_vec_element_i32(s, tcg_res[pass], rd, destelt + pass, MO_32);
  10348         tcg_temp_free_i32(tcg_res[pass]);
  10349     }
  10350     clear_vec_high(s, is_q, rd);
  10351 }
  10352 
  10353 /* Remaining saturating accumulating ops */
  10354 static void handle_2misc_satacc(DisasContext *s, bool is_scalar, bool is_u,
  10355                                 bool is_q, int size, int rn, int rd)
  10356 {
  10357     bool is_double = (size == 3);
  10358 
  10359     if (is_double) {
  10360         TCGv_i64 tcg_rn = tcg_temp_new_i64();
  10361         TCGv_i64 tcg_rd = tcg_temp_new_i64();
  10362         int pass;
  10363 
  10364         for (pass = 0; pass < (is_scalar ? 1 : 2); pass++) {
  10365             read_vec_element(s, tcg_rn, rn, pass, MO_64);
  10366             read_vec_element(s, tcg_rd, rd, pass, MO_64);
  10367 
  10368             if (is_u) { /* USQADD */
  10369                 gen_helper_neon_uqadd_s64(tcg_rd, cpu_env, tcg_rn, tcg_rd);
  10370             } else { /* SUQADD */
  10371                 gen_helper_neon_sqadd_u64(tcg_rd, cpu_env, tcg_rn, tcg_rd);
  10372             }
  10373             write_vec_element(s, tcg_rd, rd, pass, MO_64);
  10374         }
  10375         tcg_temp_free_i64(tcg_rd);
  10376         tcg_temp_free_i64(tcg_rn);
  10377         clear_vec_high(s, !is_scalar, rd);
  10378     } else {
  10379         TCGv_i32 tcg_rn = tcg_temp_new_i32();
  10380         TCGv_i32 tcg_rd = tcg_temp_new_i32();
  10381         int pass, maxpasses;
  10382 
  10383         if (is_scalar) {
  10384             maxpasses = 1;
  10385         } else {
  10386             maxpasses = is_q ? 4 : 2;
  10387         }
  10388 
  10389         for (pass = 0; pass < maxpasses; pass++) {
  10390             if (is_scalar) {
  10391                 read_vec_element_i32(s, tcg_rn, rn, pass, size);
  10392                 read_vec_element_i32(s, tcg_rd, rd, pass, size);
  10393             } else {
  10394                 read_vec_element_i32(s, tcg_rn, rn, pass, MO_32);
  10395                 read_vec_element_i32(s, tcg_rd, rd, pass, MO_32);
  10396             }
  10397 
  10398             if (is_u) { /* USQADD */
  10399                 switch (size) {
  10400                 case 0:
  10401                     gen_helper_neon_uqadd_s8(tcg_rd, cpu_env, tcg_rn, tcg_rd);
  10402                     break;
  10403                 case 1:
  10404                     gen_helper_neon_uqadd_s16(tcg_rd, cpu_env, tcg_rn, tcg_rd);
  10405                     break;
  10406                 case 2:
  10407                     gen_helper_neon_uqadd_s32(tcg_rd, cpu_env, tcg_rn, tcg_rd);
  10408                     break;
  10409                 default:
  10410                     g_assert_not_reached();
  10411                 }
  10412             } else { /* SUQADD */
  10413                 switch (size) {
  10414                 case 0:
  10415                     gen_helper_neon_sqadd_u8(tcg_rd, cpu_env, tcg_rn, tcg_rd);
  10416                     break;
  10417                 case 1:
  10418                     gen_helper_neon_sqadd_u16(tcg_rd, cpu_env, tcg_rn, tcg_rd);
  10419                     break;
  10420                 case 2:
  10421                     gen_helper_neon_sqadd_u32(tcg_rd, cpu_env, tcg_rn, tcg_rd);
  10422                     break;
  10423                 default:
  10424                     g_assert_not_reached();
  10425                 }
  10426             }
  10427 
  10428             if (is_scalar) {
  10429                 write_vec_element(s, tcg_constant_i64(0), rd, 0, MO_64);
  10430             }
  10431             write_vec_element_i32(s, tcg_rd, rd, pass, MO_32);
  10432         }
  10433         tcg_temp_free_i32(tcg_rd);
  10434         tcg_temp_free_i32(tcg_rn);
  10435         clear_vec_high(s, is_q, rd);
  10436     }
  10437 }
  10438 
  10439 /* AdvSIMD scalar two reg misc
  10440  *  31 30  29 28       24 23  22 21       17 16    12 11 10 9    5 4    0
  10441  * +-----+---+-----------+------+-----------+--------+-----+------+------+
  10442  * | 0 1 | U | 1 1 1 1 0 | size | 1 0 0 0 0 | opcode | 1 0 |  Rn  |  Rd  |
  10443  * +-----+---+-----------+------+-----------+--------+-----+------+------+
  10444  */
  10445 static void disas_simd_scalar_two_reg_misc(DisasContext *s, uint32_t insn)
  10446 {
  10447     int rd = extract32(insn, 0, 5);
  10448     int rn = extract32(insn, 5, 5);
  10449     int opcode = extract32(insn, 12, 5);
  10450     int size = extract32(insn, 22, 2);
  10451     bool u = extract32(insn, 29, 1);
  10452     bool is_fcvt = false;
  10453     int rmode;
  10454     TCGv_i32 tcg_rmode;
  10455     TCGv_ptr tcg_fpstatus;
  10456 
  10457     switch (opcode) {
  10458     case 0x3: /* USQADD / SUQADD*/
  10459         if (!fp_access_check(s)) {
  10460             return;
  10461         }
  10462         handle_2misc_satacc(s, true, u, false, size, rn, rd);
  10463         return;
  10464     case 0x7: /* SQABS / SQNEG */
  10465         break;
  10466     case 0xa: /* CMLT */
  10467         if (u) {
  10468             unallocated_encoding(s);
  10469             return;
  10470         }
  10471         /* fall through */
  10472     case 0x8: /* CMGT, CMGE */
  10473     case 0x9: /* CMEQ, CMLE */
  10474     case 0xb: /* ABS, NEG */
  10475         if (size != 3) {
  10476             unallocated_encoding(s);
  10477             return;
  10478         }
  10479         break;
  10480     case 0x12: /* SQXTUN */
  10481         if (!u) {
  10482             unallocated_encoding(s);
  10483             return;
  10484         }
  10485         /* fall through */
  10486     case 0x14: /* SQXTN, UQXTN */
  10487         if (size == 3) {
  10488             unallocated_encoding(s);
  10489             return;
  10490         }
  10491         if (!fp_access_check(s)) {
  10492             return;
  10493         }
  10494         handle_2misc_narrow(s, true, opcode, u, false, size, rn, rd);
  10495         return;
  10496     case 0xc ... 0xf:
  10497     case 0x16 ... 0x1d:
  10498     case 0x1f:
  10499         /* Floating point: U, size[1] and opcode indicate operation;
  10500          * size[0] indicates single or double precision.
  10501          */
  10502         opcode |= (extract32(size, 1, 1) << 5) | (u << 6);
  10503         size = extract32(size, 0, 1) ? 3 : 2;
  10504         switch (opcode) {
  10505         case 0x2c: /* FCMGT (zero) */
  10506         case 0x2d: /* FCMEQ (zero) */
  10507         case 0x2e: /* FCMLT (zero) */
  10508         case 0x6c: /* FCMGE (zero) */
  10509         case 0x6d: /* FCMLE (zero) */
  10510             handle_2misc_fcmp_zero(s, opcode, true, u, true, size, rn, rd);
  10511             return;
  10512         case 0x1d: /* SCVTF */
  10513         case 0x5d: /* UCVTF */
  10514         {
  10515             bool is_signed = (opcode == 0x1d);
  10516             if (!fp_access_check(s)) {
  10517                 return;
  10518             }
  10519             handle_simd_intfp_conv(s, rd, rn, 1, is_signed, 0, size);
  10520             return;
  10521         }
  10522         case 0x3d: /* FRECPE */
  10523         case 0x3f: /* FRECPX */
  10524         case 0x7d: /* FRSQRTE */
  10525             if (!fp_access_check(s)) {
  10526                 return;
  10527             }
  10528             handle_2misc_reciprocal(s, opcode, true, u, true, size, rn, rd);
  10529             return;
  10530         case 0x1a: /* FCVTNS */
  10531         case 0x1b: /* FCVTMS */
  10532         case 0x3a: /* FCVTPS */
  10533         case 0x3b: /* FCVTZS */
  10534         case 0x5a: /* FCVTNU */
  10535         case 0x5b: /* FCVTMU */
  10536         case 0x7a: /* FCVTPU */
  10537         case 0x7b: /* FCVTZU */
  10538             is_fcvt = true;
  10539             rmode = extract32(opcode, 5, 1) | (extract32(opcode, 0, 1) << 1);
  10540             break;
  10541         case 0x1c: /* FCVTAS */
  10542         case 0x5c: /* FCVTAU */
  10543             /* TIEAWAY doesn't fit in the usual rounding mode encoding */
  10544             is_fcvt = true;
  10545             rmode = FPROUNDING_TIEAWAY;
  10546             break;
  10547         case 0x56: /* FCVTXN, FCVTXN2 */
  10548             if (size == 2) {
  10549                 unallocated_encoding(s);
  10550                 return;
  10551             }
  10552             if (!fp_access_check(s)) {
  10553                 return;
  10554             }
  10555             handle_2misc_narrow(s, true, opcode, u, false, size - 1, rn, rd);
  10556             return;
  10557         default:
  10558             unallocated_encoding(s);
  10559             return;
  10560         }
  10561         break;
  10562     default:
  10563         unallocated_encoding(s);
  10564         return;
  10565     }
  10566 
  10567     if (!fp_access_check(s)) {
  10568         return;
  10569     }
  10570 
  10571     if (is_fcvt) {
  10572         tcg_rmode = tcg_const_i32(arm_rmode_to_sf(rmode));
  10573         tcg_fpstatus = fpstatus_ptr(FPST_FPCR);
  10574         gen_helper_set_rmode(tcg_rmode, tcg_rmode, tcg_fpstatus);
  10575     } else {
  10576         tcg_rmode = NULL;
  10577         tcg_fpstatus = NULL;
  10578     }
  10579 
  10580     if (size == 3) {
  10581         TCGv_i64 tcg_rn = read_fp_dreg(s, rn);
  10582         TCGv_i64 tcg_rd = tcg_temp_new_i64();
  10583 
  10584         handle_2misc_64(s, opcode, u, tcg_rd, tcg_rn, tcg_rmode, tcg_fpstatus);
  10585         write_fp_dreg(s, rd, tcg_rd);
  10586         tcg_temp_free_i64(tcg_rd);
  10587         tcg_temp_free_i64(tcg_rn);
  10588     } else {
  10589         TCGv_i32 tcg_rn = tcg_temp_new_i32();
  10590         TCGv_i32 tcg_rd = tcg_temp_new_i32();
  10591 
  10592         read_vec_element_i32(s, tcg_rn, rn, 0, size);
  10593 
  10594         switch (opcode) {
  10595         case 0x7: /* SQABS, SQNEG */
  10596         {
  10597             NeonGenOneOpEnvFn *genfn;
  10598             static NeonGenOneOpEnvFn * const fns[3][2] = {
  10599                 { gen_helper_neon_qabs_s8, gen_helper_neon_qneg_s8 },
  10600                 { gen_helper_neon_qabs_s16, gen_helper_neon_qneg_s16 },
  10601                 { gen_helper_neon_qabs_s32, gen_helper_neon_qneg_s32 },
  10602             };
  10603             genfn = fns[size][u];
  10604             genfn(tcg_rd, cpu_env, tcg_rn);
  10605             break;
  10606         }
  10607         case 0x1a: /* FCVTNS */
  10608         case 0x1b: /* FCVTMS */
  10609         case 0x1c: /* FCVTAS */
  10610         case 0x3a: /* FCVTPS */
  10611         case 0x3b: /* FCVTZS */
  10612             gen_helper_vfp_tosls(tcg_rd, tcg_rn, tcg_constant_i32(0),
  10613                                  tcg_fpstatus);
  10614             break;
  10615         case 0x5a: /* FCVTNU */
  10616         case 0x5b: /* FCVTMU */
  10617         case 0x5c: /* FCVTAU */
  10618         case 0x7a: /* FCVTPU */
  10619         case 0x7b: /* FCVTZU */
  10620             gen_helper_vfp_touls(tcg_rd, tcg_rn, tcg_constant_i32(0),
  10621                                  tcg_fpstatus);
  10622             break;
  10623         default:
  10624             g_assert_not_reached();
  10625         }
  10626 
  10627         write_fp_sreg(s, rd, tcg_rd);
  10628         tcg_temp_free_i32(tcg_rd);
  10629         tcg_temp_free_i32(tcg_rn);
  10630     }
  10631 
  10632     if (is_fcvt) {
  10633         gen_helper_set_rmode(tcg_rmode, tcg_rmode, tcg_fpstatus);
  10634         tcg_temp_free_i32(tcg_rmode);
  10635         tcg_temp_free_ptr(tcg_fpstatus);
  10636     }
  10637 }
  10638 
  10639 /* SSHR[RA]/USHR[RA] - Vector shift right (optional rounding/accumulate) */
  10640 static void handle_vec_simd_shri(DisasContext *s, bool is_q, bool is_u,
  10641                                  int immh, int immb, int opcode, int rn, int rd)
  10642 {
  10643     int size = 32 - clz32(immh) - 1;
  10644     int immhb = immh << 3 | immb;
  10645     int shift = 2 * (8 << size) - immhb;
  10646     GVecGen2iFn *gvec_fn;
  10647 
  10648     if (extract32(immh, 3, 1) && !is_q) {
  10649         unallocated_encoding(s);
  10650         return;
  10651     }
  10652     tcg_debug_assert(size <= 3);
  10653 
  10654     if (!fp_access_check(s)) {
  10655         return;
  10656     }
  10657 
  10658     switch (opcode) {
  10659     case 0x02: /* SSRA / USRA (accumulate) */
  10660         gvec_fn = is_u ? gen_gvec_usra : gen_gvec_ssra;
  10661         break;
  10662 
  10663     case 0x08: /* SRI */
  10664         gvec_fn = gen_gvec_sri;
  10665         break;
  10666 
  10667     case 0x00: /* SSHR / USHR */
  10668         if (is_u) {
  10669             if (shift == 8 << size) {
  10670                 /* Shift count the same size as element size produces zero.  */
  10671                 tcg_gen_gvec_dup_imm(size, vec_full_reg_offset(s, rd),
  10672                                      is_q ? 16 : 8, vec_full_reg_size(s), 0);
  10673                 return;
  10674             }
  10675             gvec_fn = tcg_gen_gvec_shri;
  10676         } else {
  10677             /* Shift count the same size as element size produces all sign.  */
  10678             if (shift == 8 << size) {
  10679                 shift -= 1;
  10680             }
  10681             gvec_fn = tcg_gen_gvec_sari;
  10682         }
  10683         break;
  10684 
  10685     case 0x04: /* SRSHR / URSHR (rounding) */
  10686         gvec_fn = is_u ? gen_gvec_urshr : gen_gvec_srshr;
  10687         break;
  10688 
  10689     case 0x06: /* SRSRA / URSRA (accum + rounding) */
  10690         gvec_fn = is_u ? gen_gvec_ursra : gen_gvec_srsra;
  10691         break;
  10692 
  10693     default:
  10694         g_assert_not_reached();
  10695     }
  10696 
  10697     gen_gvec_fn2i(s, is_q, rd, rn, shift, gvec_fn, size);
  10698 }
  10699 
  10700 /* SHL/SLI - Vector shift left */
  10701 static void handle_vec_simd_shli(DisasContext *s, bool is_q, bool insert,
  10702                                  int immh, int immb, int opcode, int rn, int rd)
  10703 {
  10704     int size = 32 - clz32(immh) - 1;
  10705     int immhb = immh << 3 | immb;
  10706     int shift = immhb - (8 << size);
  10707 
  10708     /* Range of size is limited by decode: immh is a non-zero 4 bit field */
  10709     assert(size >= 0 && size <= 3);
  10710 
  10711     if (extract32(immh, 3, 1) && !is_q) {
  10712         unallocated_encoding(s);
  10713         return;
  10714     }
  10715 
  10716     if (!fp_access_check(s)) {
  10717         return;
  10718     }
  10719 
  10720     if (insert) {
  10721         gen_gvec_fn2i(s, is_q, rd, rn, shift, gen_gvec_sli, size);
  10722     } else {
  10723         gen_gvec_fn2i(s, is_q, rd, rn, shift, tcg_gen_gvec_shli, size);
  10724     }
  10725 }
  10726 
  10727 /* USHLL/SHLL - Vector shift left with widening */
  10728 static void handle_vec_simd_wshli(DisasContext *s, bool is_q, bool is_u,
  10729                                  int immh, int immb, int opcode, int rn, int rd)
  10730 {
  10731     int size = 32 - clz32(immh) - 1;
  10732     int immhb = immh << 3 | immb;
  10733     int shift = immhb - (8 << size);
  10734     int dsize = 64;
  10735     int esize = 8 << size;
  10736     int elements = dsize/esize;
  10737     TCGv_i64 tcg_rn = new_tmp_a64(s);
  10738     TCGv_i64 tcg_rd = new_tmp_a64(s);
  10739     int i;
  10740 
  10741     if (size >= 3) {
  10742         unallocated_encoding(s);
  10743         return;
  10744     }
  10745 
  10746     if (!fp_access_check(s)) {
  10747         return;
  10748     }
  10749 
  10750     /* For the LL variants the store is larger than the load,
  10751      * so if rd == rn we would overwrite parts of our input.
  10752      * So load everything right now and use shifts in the main loop.
  10753      */
  10754     read_vec_element(s, tcg_rn, rn, is_q ? 1 : 0, MO_64);
  10755 
  10756     for (i = 0; i < elements; i++) {
  10757         tcg_gen_shri_i64(tcg_rd, tcg_rn, i * esize);
  10758         ext_and_shift_reg(tcg_rd, tcg_rd, size | (!is_u << 2), 0);
  10759         tcg_gen_shli_i64(tcg_rd, tcg_rd, shift);
  10760         write_vec_element(s, tcg_rd, rd, i, size + 1);
  10761     }
  10762 }
  10763 
  10764 /* SHRN/RSHRN - Shift right with narrowing (and potential rounding) */
  10765 static void handle_vec_simd_shrn(DisasContext *s, bool is_q,
  10766                                  int immh, int immb, int opcode, int rn, int rd)
  10767 {
  10768     int immhb = immh << 3 | immb;
  10769     int size = 32 - clz32(immh) - 1;
  10770     int dsize = 64;
  10771     int esize = 8 << size;
  10772     int elements = dsize/esize;
  10773     int shift = (2 * esize) - immhb;
  10774     bool round = extract32(opcode, 0, 1);
  10775     TCGv_i64 tcg_rn, tcg_rd, tcg_final;
  10776     TCGv_i64 tcg_round;
  10777     int i;
  10778 
  10779     if (extract32(immh, 3, 1)) {
  10780         unallocated_encoding(s);
  10781         return;
  10782     }
  10783 
  10784     if (!fp_access_check(s)) {
  10785         return;
  10786     }
  10787 
  10788     tcg_rn = tcg_temp_new_i64();
  10789     tcg_rd = tcg_temp_new_i64();
  10790     tcg_final = tcg_temp_new_i64();
  10791     read_vec_element(s, tcg_final, rd, is_q ? 1 : 0, MO_64);
  10792 
  10793     if (round) {
  10794         tcg_round = tcg_constant_i64(1ULL << (shift - 1));
  10795     } else {
  10796         tcg_round = NULL;
  10797     }
  10798 
  10799     for (i = 0; i < elements; i++) {
  10800         read_vec_element(s, tcg_rn, rn, i, size+1);
  10801         handle_shri_with_rndacc(tcg_rd, tcg_rn, tcg_round,
  10802                                 false, true, size+1, shift);
  10803 
  10804         tcg_gen_deposit_i64(tcg_final, tcg_final, tcg_rd, esize * i, esize);
  10805     }
  10806 
  10807     if (!is_q) {
  10808         write_vec_element(s, tcg_final, rd, 0, MO_64);
  10809     } else {
  10810         write_vec_element(s, tcg_final, rd, 1, MO_64);
  10811     }
  10812     tcg_temp_free_i64(tcg_rn);
  10813     tcg_temp_free_i64(tcg_rd);
  10814     tcg_temp_free_i64(tcg_final);
  10815 
  10816     clear_vec_high(s, is_q, rd);
  10817 }
  10818 
  10819 
  10820 /* AdvSIMD shift by immediate
  10821  *  31  30   29 28         23 22  19 18  16 15    11  10 9    5 4    0
  10822  * +---+---+---+-------------+------+------+--------+---+------+------+
  10823  * | 0 | Q | U | 0 1 1 1 1 0 | immh | immb | opcode | 1 |  Rn  |  Rd  |
  10824  * +---+---+---+-------------+------+------+--------+---+------+------+
  10825  */
  10826 static void disas_simd_shift_imm(DisasContext *s, uint32_t insn)
  10827 {
  10828     int rd = extract32(insn, 0, 5);
  10829     int rn = extract32(insn, 5, 5);
  10830     int opcode = extract32(insn, 11, 5);
  10831     int immb = extract32(insn, 16, 3);
  10832     int immh = extract32(insn, 19, 4);
  10833     bool is_u = extract32(insn, 29, 1);
  10834     bool is_q = extract32(insn, 30, 1);
  10835 
  10836     /* data_proc_simd[] has sent immh == 0 to disas_simd_mod_imm. */
  10837     assert(immh != 0);
  10838 
  10839     switch (opcode) {
  10840     case 0x08: /* SRI */
  10841         if (!is_u) {
  10842             unallocated_encoding(s);
  10843             return;
  10844         }
  10845         /* fall through */
  10846     case 0x00: /* SSHR / USHR */
  10847     case 0x02: /* SSRA / USRA (accumulate) */
  10848     case 0x04: /* SRSHR / URSHR (rounding) */
  10849     case 0x06: /* SRSRA / URSRA (accum + rounding) */
  10850         handle_vec_simd_shri(s, is_q, is_u, immh, immb, opcode, rn, rd);
  10851         break;
  10852     case 0x0a: /* SHL / SLI */
  10853         handle_vec_simd_shli(s, is_q, is_u, immh, immb, opcode, rn, rd);
  10854         break;
  10855     case 0x10: /* SHRN */
  10856     case 0x11: /* RSHRN / SQRSHRUN */
  10857         if (is_u) {
  10858             handle_vec_simd_sqshrn(s, false, is_q, false, true, immh, immb,
  10859                                    opcode, rn, rd);
  10860         } else {
  10861             handle_vec_simd_shrn(s, is_q, immh, immb, opcode, rn, rd);
  10862         }
  10863         break;
  10864     case 0x12: /* SQSHRN / UQSHRN */
  10865     case 0x13: /* SQRSHRN / UQRSHRN */
  10866         handle_vec_simd_sqshrn(s, false, is_q, is_u, is_u, immh, immb,
  10867                                opcode, rn, rd);
  10868         break;
  10869     case 0x14: /* SSHLL / USHLL */
  10870         handle_vec_simd_wshli(s, is_q, is_u, immh, immb, opcode, rn, rd);
  10871         break;
  10872     case 0x1c: /* SCVTF / UCVTF */
  10873         handle_simd_shift_intfp_conv(s, false, is_q, is_u, immh, immb,
  10874                                      opcode, rn, rd);
  10875         break;
  10876     case 0xc: /* SQSHLU */
  10877         if (!is_u) {
  10878             unallocated_encoding(s);
  10879             return;
  10880         }
  10881         handle_simd_qshl(s, false, is_q, false, true, immh, immb, rn, rd);
  10882         break;
  10883     case 0xe: /* SQSHL, UQSHL */
  10884         handle_simd_qshl(s, false, is_q, is_u, is_u, immh, immb, rn, rd);
  10885         break;
  10886     case 0x1f: /* FCVTZS/ FCVTZU */
  10887         handle_simd_shift_fpint_conv(s, false, is_q, is_u, immh, immb, rn, rd);
  10888         return;
  10889     default:
  10890         unallocated_encoding(s);
  10891         return;
  10892     }
  10893 }
  10894 
  10895 /* Generate code to do a "long" addition or subtraction, ie one done in
  10896  * TCGv_i64 on vector lanes twice the width specified by size.
  10897  */
  10898 static void gen_neon_addl(int size, bool is_sub, TCGv_i64 tcg_res,
  10899                           TCGv_i64 tcg_op1, TCGv_i64 tcg_op2)
  10900 {
  10901     static NeonGenTwo64OpFn * const fns[3][2] = {
  10902         { gen_helper_neon_addl_u16, gen_helper_neon_subl_u16 },
  10903         { gen_helper_neon_addl_u32, gen_helper_neon_subl_u32 },
  10904         { tcg_gen_add_i64, tcg_gen_sub_i64 },
  10905     };
  10906     NeonGenTwo64OpFn *genfn;
  10907     assert(size < 3);
  10908 
  10909     genfn = fns[size][is_sub];
  10910     genfn(tcg_res, tcg_op1, tcg_op2);
  10911 }
  10912 
  10913 static void handle_3rd_widening(DisasContext *s, int is_q, int is_u, int size,
  10914                                 int opcode, int rd, int rn, int rm)
  10915 {
  10916     /* 3-reg-different widening insns: 64 x 64 -> 128 */
  10917     TCGv_i64 tcg_res[2];
  10918     int pass, accop;
  10919 
  10920     tcg_res[0] = tcg_temp_new_i64();
  10921     tcg_res[1] = tcg_temp_new_i64();
  10922 
  10923     /* Does this op do an adding accumulate, a subtracting accumulate,
  10924      * or no accumulate at all?
  10925      */
  10926     switch (opcode) {
  10927     case 5:
  10928     case 8:
  10929     case 9:
  10930         accop = 1;
  10931         break;
  10932     case 10:
  10933     case 11:
  10934         accop = -1;
  10935         break;
  10936     default:
  10937         accop = 0;
  10938         break;
  10939     }
  10940 
  10941     if (accop != 0) {
  10942         read_vec_element(s, tcg_res[0], rd, 0, MO_64);
  10943         read_vec_element(s, tcg_res[1], rd, 1, MO_64);
  10944     }
  10945 
  10946     /* size == 2 means two 32x32->64 operations; this is worth special
  10947      * casing because we can generally handle it inline.
  10948      */
  10949     if (size == 2) {
  10950         for (pass = 0; pass < 2; pass++) {
  10951             TCGv_i64 tcg_op1 = tcg_temp_new_i64();
  10952             TCGv_i64 tcg_op2 = tcg_temp_new_i64();
  10953             TCGv_i64 tcg_passres;
  10954             MemOp memop = MO_32 | (is_u ? 0 : MO_SIGN);
  10955 
  10956             int elt = pass + is_q * 2;
  10957 
  10958             read_vec_element(s, tcg_op1, rn, elt, memop);
  10959             read_vec_element(s, tcg_op2, rm, elt, memop);
  10960 
  10961             if (accop == 0) {
  10962                 tcg_passres = tcg_res[pass];
  10963             } else {
  10964                 tcg_passres = tcg_temp_new_i64();
  10965             }
  10966 
  10967             switch (opcode) {
  10968             case 0: /* SADDL, SADDL2, UADDL, UADDL2 */
  10969                 tcg_gen_add_i64(tcg_passres, tcg_op1, tcg_op2);
  10970                 break;
  10971             case 2: /* SSUBL, SSUBL2, USUBL, USUBL2 */
  10972                 tcg_gen_sub_i64(tcg_passres, tcg_op1, tcg_op2);
  10973                 break;
  10974             case 5: /* SABAL, SABAL2, UABAL, UABAL2 */
  10975             case 7: /* SABDL, SABDL2, UABDL, UABDL2 */
  10976             {
  10977                 TCGv_i64 tcg_tmp1 = tcg_temp_new_i64();
  10978                 TCGv_i64 tcg_tmp2 = tcg_temp_new_i64();
  10979 
  10980                 tcg_gen_sub_i64(tcg_tmp1, tcg_op1, tcg_op2);
  10981                 tcg_gen_sub_i64(tcg_tmp2, tcg_op2, tcg_op1);
  10982                 tcg_gen_movcond_i64(is_u ? TCG_COND_GEU : TCG_COND_GE,
  10983                                     tcg_passres,
  10984                                     tcg_op1, tcg_op2, tcg_tmp1, tcg_tmp2);
  10985                 tcg_temp_free_i64(tcg_tmp1);
  10986                 tcg_temp_free_i64(tcg_tmp2);
  10987                 break;
  10988             }
  10989             case 8: /* SMLAL, SMLAL2, UMLAL, UMLAL2 */
  10990             case 10: /* SMLSL, SMLSL2, UMLSL, UMLSL2 */
  10991             case 12: /* UMULL, UMULL2, SMULL, SMULL2 */
  10992                 tcg_gen_mul_i64(tcg_passres, tcg_op1, tcg_op2);
  10993                 break;
  10994             case 9: /* SQDMLAL, SQDMLAL2 */
  10995             case 11: /* SQDMLSL, SQDMLSL2 */
  10996             case 13: /* SQDMULL, SQDMULL2 */
  10997                 tcg_gen_mul_i64(tcg_passres, tcg_op1, tcg_op2);
  10998                 gen_helper_neon_addl_saturate_s64(tcg_passres, cpu_env,
  10999                                                   tcg_passres, tcg_passres);
  11000                 break;
  11001             default:
  11002                 g_assert_not_reached();
  11003             }
  11004 
  11005             if (opcode == 9 || opcode == 11) {
  11006                 /* saturating accumulate ops */
  11007                 if (accop < 0) {
  11008                     tcg_gen_neg_i64(tcg_passres, tcg_passres);
  11009                 }
  11010                 gen_helper_neon_addl_saturate_s64(tcg_res[pass], cpu_env,
  11011                                                   tcg_res[pass], tcg_passres);
  11012             } else if (accop > 0) {
  11013                 tcg_gen_add_i64(tcg_res[pass], tcg_res[pass], tcg_passres);
  11014             } else if (accop < 0) {
  11015                 tcg_gen_sub_i64(tcg_res[pass], tcg_res[pass], tcg_passres);
  11016             }
  11017 
  11018             if (accop != 0) {
  11019                 tcg_temp_free_i64(tcg_passres);
  11020             }
  11021 
  11022             tcg_temp_free_i64(tcg_op1);
  11023             tcg_temp_free_i64(tcg_op2);
  11024         }
  11025     } else {
  11026         /* size 0 or 1, generally helper functions */
  11027         for (pass = 0; pass < 2; pass++) {
  11028             TCGv_i32 tcg_op1 = tcg_temp_new_i32();
  11029             TCGv_i32 tcg_op2 = tcg_temp_new_i32();
  11030             TCGv_i64 tcg_passres;
  11031             int elt = pass + is_q * 2;
  11032 
  11033             read_vec_element_i32(s, tcg_op1, rn, elt, MO_32);
  11034             read_vec_element_i32(s, tcg_op2, rm, elt, MO_32);
  11035 
  11036             if (accop == 0) {
  11037                 tcg_passres = tcg_res[pass];
  11038             } else {
  11039                 tcg_passres = tcg_temp_new_i64();
  11040             }
  11041 
  11042             switch (opcode) {
  11043             case 0: /* SADDL, SADDL2, UADDL, UADDL2 */
  11044             case 2: /* SSUBL, SSUBL2, USUBL, USUBL2 */
  11045             {
  11046                 TCGv_i64 tcg_op2_64 = tcg_temp_new_i64();
  11047                 static NeonGenWidenFn * const widenfns[2][2] = {
  11048                     { gen_helper_neon_widen_s8, gen_helper_neon_widen_u8 },
  11049                     { gen_helper_neon_widen_s16, gen_helper_neon_widen_u16 },
  11050                 };
  11051                 NeonGenWidenFn *widenfn = widenfns[size][is_u];
  11052 
  11053                 widenfn(tcg_op2_64, tcg_op2);
  11054                 widenfn(tcg_passres, tcg_op1);
  11055                 gen_neon_addl(size, (opcode == 2), tcg_passres,
  11056                               tcg_passres, tcg_op2_64);
  11057                 tcg_temp_free_i64(tcg_op2_64);
  11058                 break;
  11059             }
  11060             case 5: /* SABAL, SABAL2, UABAL, UABAL2 */
  11061             case 7: /* SABDL, SABDL2, UABDL, UABDL2 */
  11062                 if (size == 0) {
  11063                     if (is_u) {
  11064                         gen_helper_neon_abdl_u16(tcg_passres, tcg_op1, tcg_op2);
  11065                     } else {
  11066                         gen_helper_neon_abdl_s16(tcg_passres, tcg_op1, tcg_op2);
  11067                     }
  11068                 } else {
  11069                     if (is_u) {
  11070                         gen_helper_neon_abdl_u32(tcg_passres, tcg_op1, tcg_op2);
  11071                     } else {
  11072                         gen_helper_neon_abdl_s32(tcg_passres, tcg_op1, tcg_op2);
  11073                     }
  11074                 }
  11075                 break;
  11076             case 8: /* SMLAL, SMLAL2, UMLAL, UMLAL2 */
  11077             case 10: /* SMLSL, SMLSL2, UMLSL, UMLSL2 */
  11078             case 12: /* UMULL, UMULL2, SMULL, SMULL2 */
  11079                 if (size == 0) {
  11080                     if (is_u) {
  11081                         gen_helper_neon_mull_u8(tcg_passres, tcg_op1, tcg_op2);
  11082                     } else {
  11083                         gen_helper_neon_mull_s8(tcg_passres, tcg_op1, tcg_op2);
  11084                     }
  11085                 } else {
  11086                     if (is_u) {
  11087                         gen_helper_neon_mull_u16(tcg_passres, tcg_op1, tcg_op2);
  11088                     } else {
  11089                         gen_helper_neon_mull_s16(tcg_passres, tcg_op1, tcg_op2);
  11090                     }
  11091                 }
  11092                 break;
  11093             case 9: /* SQDMLAL, SQDMLAL2 */
  11094             case 11: /* SQDMLSL, SQDMLSL2 */
  11095             case 13: /* SQDMULL, SQDMULL2 */
  11096                 assert(size == 1);
  11097                 gen_helper_neon_mull_s16(tcg_passres, tcg_op1, tcg_op2);
  11098                 gen_helper_neon_addl_saturate_s32(tcg_passres, cpu_env,
  11099                                                   tcg_passres, tcg_passres);
  11100                 break;
  11101             default:
  11102                 g_assert_not_reached();
  11103             }
  11104             tcg_temp_free_i32(tcg_op1);
  11105             tcg_temp_free_i32(tcg_op2);
  11106 
  11107             if (accop != 0) {
  11108                 if (opcode == 9 || opcode == 11) {
  11109                     /* saturating accumulate ops */
  11110                     if (accop < 0) {
  11111                         gen_helper_neon_negl_u32(tcg_passres, tcg_passres);
  11112                     }
  11113                     gen_helper_neon_addl_saturate_s32(tcg_res[pass], cpu_env,
  11114                                                       tcg_res[pass],
  11115                                                       tcg_passres);
  11116                 } else {
  11117                     gen_neon_addl(size, (accop < 0), tcg_res[pass],
  11118                                   tcg_res[pass], tcg_passres);
  11119                 }
  11120                 tcg_temp_free_i64(tcg_passres);
  11121             }
  11122         }
  11123     }
  11124 
  11125     write_vec_element(s, tcg_res[0], rd, 0, MO_64);
  11126     write_vec_element(s, tcg_res[1], rd, 1, MO_64);
  11127     tcg_temp_free_i64(tcg_res[0]);
  11128     tcg_temp_free_i64(tcg_res[1]);
  11129 }
  11130 
  11131 static void handle_3rd_wide(DisasContext *s, int is_q, int is_u, int size,
  11132                             int opcode, int rd, int rn, int rm)
  11133 {
  11134     TCGv_i64 tcg_res[2];
  11135     int part = is_q ? 2 : 0;
  11136     int pass;
  11137 
  11138     for (pass = 0; pass < 2; pass++) {
  11139         TCGv_i64 tcg_op1 = tcg_temp_new_i64();
  11140         TCGv_i32 tcg_op2 = tcg_temp_new_i32();
  11141         TCGv_i64 tcg_op2_wide = tcg_temp_new_i64();
  11142         static NeonGenWidenFn * const widenfns[3][2] = {
  11143             { gen_helper_neon_widen_s8, gen_helper_neon_widen_u8 },
  11144             { gen_helper_neon_widen_s16, gen_helper_neon_widen_u16 },
  11145             { tcg_gen_ext_i32_i64, tcg_gen_extu_i32_i64 },
  11146         };
  11147         NeonGenWidenFn *widenfn = widenfns[size][is_u];
  11148 
  11149         read_vec_element(s, tcg_op1, rn, pass, MO_64);
  11150         read_vec_element_i32(s, tcg_op2, rm, part + pass, MO_32);
  11151         widenfn(tcg_op2_wide, tcg_op2);
  11152         tcg_temp_free_i32(tcg_op2);
  11153         tcg_res[pass] = tcg_temp_new_i64();
  11154         gen_neon_addl(size, (opcode == 3),
  11155                       tcg_res[pass], tcg_op1, tcg_op2_wide);
  11156         tcg_temp_free_i64(tcg_op1);
  11157         tcg_temp_free_i64(tcg_op2_wide);
  11158     }
  11159 
  11160     for (pass = 0; pass < 2; pass++) {
  11161         write_vec_element(s, tcg_res[pass], rd, pass, MO_64);
  11162         tcg_temp_free_i64(tcg_res[pass]);
  11163     }
  11164 }
  11165 
  11166 static void do_narrow_round_high_u32(TCGv_i32 res, TCGv_i64 in)
  11167 {
  11168     tcg_gen_addi_i64(in, in, 1U << 31);
  11169     tcg_gen_extrh_i64_i32(res, in);
  11170 }
  11171 
  11172 static void handle_3rd_narrowing(DisasContext *s, int is_q, int is_u, int size,
  11173                                  int opcode, int rd, int rn, int rm)
  11174 {
  11175     TCGv_i32 tcg_res[2];
  11176     int part = is_q ? 2 : 0;
  11177     int pass;
  11178 
  11179     for (pass = 0; pass < 2; pass++) {
  11180         TCGv_i64 tcg_op1 = tcg_temp_new_i64();
  11181         TCGv_i64 tcg_op2 = tcg_temp_new_i64();
  11182         TCGv_i64 tcg_wideres = tcg_temp_new_i64();
  11183         static NeonGenNarrowFn * const narrowfns[3][2] = {
  11184             { gen_helper_neon_narrow_high_u8,
  11185               gen_helper_neon_narrow_round_high_u8 },
  11186             { gen_helper_neon_narrow_high_u16,
  11187               gen_helper_neon_narrow_round_high_u16 },
  11188             { tcg_gen_extrh_i64_i32, do_narrow_round_high_u32 },
  11189         };
  11190         NeonGenNarrowFn *gennarrow = narrowfns[size][is_u];
  11191 
  11192         read_vec_element(s, tcg_op1, rn, pass, MO_64);
  11193         read_vec_element(s, tcg_op2, rm, pass, MO_64);
  11194 
  11195         gen_neon_addl(size, (opcode == 6), tcg_wideres, tcg_op1, tcg_op2);
  11196 
  11197         tcg_temp_free_i64(tcg_op1);
  11198         tcg_temp_free_i64(tcg_op2);
  11199 
  11200         tcg_res[pass] = tcg_temp_new_i32();
  11201         gennarrow(tcg_res[pass], tcg_wideres);
  11202         tcg_temp_free_i64(tcg_wideres);
  11203     }
  11204 
  11205     for (pass = 0; pass < 2; pass++) {
  11206         write_vec_element_i32(s, tcg_res[pass], rd, pass + part, MO_32);
  11207         tcg_temp_free_i32(tcg_res[pass]);
  11208     }
  11209     clear_vec_high(s, is_q, rd);
  11210 }
  11211 
  11212 /* AdvSIMD three different
  11213  *   31  30  29 28       24 23  22  21 20  16 15    12 11 10 9    5 4    0
  11214  * +---+---+---+-----------+------+---+------+--------+-----+------+------+
  11215  * | 0 | Q | U | 0 1 1 1 0 | size | 1 |  Rm  | opcode | 0 0 |  Rn  |  Rd  |
  11216  * +---+---+---+-----------+------+---+------+--------+-----+------+------+
  11217  */
  11218 static void disas_simd_three_reg_diff(DisasContext *s, uint32_t insn)
  11219 {
  11220     /* Instructions in this group fall into three basic classes
  11221      * (in each case with the operation working on each element in
  11222      * the input vectors):
  11223      * (1) widening 64 x 64 -> 128 (with possibly Vd as an extra
  11224      *     128 bit input)
  11225      * (2) wide 64 x 128 -> 128
  11226      * (3) narrowing 128 x 128 -> 64
  11227      * Here we do initial decode, catch unallocated cases and
  11228      * dispatch to separate functions for each class.
  11229      */
  11230     int is_q = extract32(insn, 30, 1);
  11231     int is_u = extract32(insn, 29, 1);
  11232     int size = extract32(insn, 22, 2);
  11233     int opcode = extract32(insn, 12, 4);
  11234     int rm = extract32(insn, 16, 5);
  11235     int rn = extract32(insn, 5, 5);
  11236     int rd = extract32(insn, 0, 5);
  11237 
  11238     switch (opcode) {
  11239     case 1: /* SADDW, SADDW2, UADDW, UADDW2 */
  11240     case 3: /* SSUBW, SSUBW2, USUBW, USUBW2 */
  11241         /* 64 x 128 -> 128 */
  11242         if (size == 3) {
  11243             unallocated_encoding(s);
  11244             return;
  11245         }
  11246         if (!fp_access_check(s)) {
  11247             return;
  11248         }
  11249         handle_3rd_wide(s, is_q, is_u, size, opcode, rd, rn, rm);
  11250         break;
  11251     case 4: /* ADDHN, ADDHN2, RADDHN, RADDHN2 */
  11252     case 6: /* SUBHN, SUBHN2, RSUBHN, RSUBHN2 */
  11253         /* 128 x 128 -> 64 */
  11254         if (size == 3) {
  11255             unallocated_encoding(s);
  11256             return;
  11257         }
  11258         if (!fp_access_check(s)) {
  11259             return;
  11260         }
  11261         handle_3rd_narrowing(s, is_q, is_u, size, opcode, rd, rn, rm);
  11262         break;
  11263     case 14: /* PMULL, PMULL2 */
  11264         if (is_u) {
  11265             unallocated_encoding(s);
  11266             return;
  11267         }
  11268         switch (size) {
  11269         case 0: /* PMULL.P8 */
  11270             if (!fp_access_check(s)) {
  11271                 return;
  11272             }
  11273             /* The Q field specifies lo/hi half input for this insn.  */
  11274             gen_gvec_op3_ool(s, true, rd, rn, rm, is_q,
  11275                              gen_helper_neon_pmull_h);
  11276             break;
  11277 
  11278         case 3: /* PMULL.P64 */
  11279             if (!dc_isar_feature(aa64_pmull, s)) {
  11280                 unallocated_encoding(s);
  11281                 return;
  11282             }
  11283             if (!fp_access_check(s)) {
  11284                 return;
  11285             }
  11286             /* The Q field specifies lo/hi half input for this insn.  */
  11287             gen_gvec_op3_ool(s, true, rd, rn, rm, is_q,
  11288                              gen_helper_gvec_pmull_q);
  11289             break;
  11290 
  11291         default:
  11292             unallocated_encoding(s);
  11293             break;
  11294         }
  11295         return;
  11296     case 9: /* SQDMLAL, SQDMLAL2 */
  11297     case 11: /* SQDMLSL, SQDMLSL2 */
  11298     case 13: /* SQDMULL, SQDMULL2 */
  11299         if (is_u || size == 0) {
  11300             unallocated_encoding(s);
  11301             return;
  11302         }
  11303         /* fall through */
  11304     case 0: /* SADDL, SADDL2, UADDL, UADDL2 */
  11305     case 2: /* SSUBL, SSUBL2, USUBL, USUBL2 */
  11306     case 5: /* SABAL, SABAL2, UABAL, UABAL2 */
  11307     case 7: /* SABDL, SABDL2, UABDL, UABDL2 */
  11308     case 8: /* SMLAL, SMLAL2, UMLAL, UMLAL2 */
  11309     case 10: /* SMLSL, SMLSL2, UMLSL, UMLSL2 */
  11310     case 12: /* SMULL, SMULL2, UMULL, UMULL2 */
  11311         /* 64 x 64 -> 128 */
  11312         if (size == 3) {
  11313             unallocated_encoding(s);
  11314             return;
  11315         }
  11316         if (!fp_access_check(s)) {
  11317             return;
  11318         }
  11319 
  11320         handle_3rd_widening(s, is_q, is_u, size, opcode, rd, rn, rm);
  11321         break;
  11322     default:
  11323         /* opcode 15 not allocated */
  11324         unallocated_encoding(s);
  11325         break;
  11326     }
  11327 }
  11328 
  11329 /* Logic op (opcode == 3) subgroup of C3.6.16. */
  11330 static void disas_simd_3same_logic(DisasContext *s, uint32_t insn)
  11331 {
  11332     int rd = extract32(insn, 0, 5);
  11333     int rn = extract32(insn, 5, 5);
  11334     int rm = extract32(insn, 16, 5);
  11335     int size = extract32(insn, 22, 2);
  11336     bool is_u = extract32(insn, 29, 1);
  11337     bool is_q = extract32(insn, 30, 1);
  11338 
  11339     if (!fp_access_check(s)) {
  11340         return;
  11341     }
  11342 
  11343     switch (size + 4 * is_u) {
  11344     case 0: /* AND */
  11345         gen_gvec_fn3(s, is_q, rd, rn, rm, tcg_gen_gvec_and, 0);
  11346         return;
  11347     case 1: /* BIC */
  11348         gen_gvec_fn3(s, is_q, rd, rn, rm, tcg_gen_gvec_andc, 0);
  11349         return;
  11350     case 2: /* ORR */
  11351         gen_gvec_fn3(s, is_q, rd, rn, rm, tcg_gen_gvec_or, 0);
  11352         return;
  11353     case 3: /* ORN */
  11354         gen_gvec_fn3(s, is_q, rd, rn, rm, tcg_gen_gvec_orc, 0);
  11355         return;
  11356     case 4: /* EOR */
  11357         gen_gvec_fn3(s, is_q, rd, rn, rm, tcg_gen_gvec_xor, 0);
  11358         return;
  11359 
  11360     case 5: /* BSL bitwise select */
  11361         gen_gvec_fn4(s, is_q, rd, rd, rn, rm, tcg_gen_gvec_bitsel, 0);
  11362         return;
  11363     case 6: /* BIT, bitwise insert if true */
  11364         gen_gvec_fn4(s, is_q, rd, rm, rn, rd, tcg_gen_gvec_bitsel, 0);
  11365         return;
  11366     case 7: /* BIF, bitwise insert if false */
  11367         gen_gvec_fn4(s, is_q, rd, rm, rd, rn, tcg_gen_gvec_bitsel, 0);
  11368         return;
  11369 
  11370     default:
  11371         g_assert_not_reached();
  11372     }
  11373 }
  11374 
  11375 /* Pairwise op subgroup of C3.6.16.
  11376  *
  11377  * This is called directly or via the handle_3same_float for float pairwise
  11378  * operations where the opcode and size are calculated differently.
  11379  */
  11380 static void handle_simd_3same_pair(DisasContext *s, int is_q, int u, int opcode,
  11381                                    int size, int rn, int rm, int rd)
  11382 {
  11383     TCGv_ptr fpst;
  11384     int pass;
  11385 
  11386     /* Floating point operations need fpst */
  11387     if (opcode >= 0x58) {
  11388         fpst = fpstatus_ptr(FPST_FPCR);
  11389     } else {
  11390         fpst = NULL;
  11391     }
  11392 
  11393     if (!fp_access_check(s)) {
  11394         return;
  11395     }
  11396 
  11397     /* These operations work on the concatenated rm:rn, with each pair of
  11398      * adjacent elements being operated on to produce an element in the result.
  11399      */
  11400     if (size == 3) {
  11401         TCGv_i64 tcg_res[2];
  11402 
  11403         for (pass = 0; pass < 2; pass++) {
  11404             TCGv_i64 tcg_op1 = tcg_temp_new_i64();
  11405             TCGv_i64 tcg_op2 = tcg_temp_new_i64();
  11406             int passreg = (pass == 0) ? rn : rm;
  11407 
  11408             read_vec_element(s, tcg_op1, passreg, 0, MO_64);
  11409             read_vec_element(s, tcg_op2, passreg, 1, MO_64);
  11410             tcg_res[pass] = tcg_temp_new_i64();
  11411 
  11412             switch (opcode) {
  11413             case 0x17: /* ADDP */
  11414                 tcg_gen_add_i64(tcg_res[pass], tcg_op1, tcg_op2);
  11415                 break;
  11416             case 0x58: /* FMAXNMP */
  11417                 gen_helper_vfp_maxnumd(tcg_res[pass], tcg_op1, tcg_op2, fpst);
  11418                 break;
  11419             case 0x5a: /* FADDP */
  11420                 gen_helper_vfp_addd(tcg_res[pass], tcg_op1, tcg_op2, fpst);
  11421                 break;
  11422             case 0x5e: /* FMAXP */
  11423                 gen_helper_vfp_maxd(tcg_res[pass], tcg_op1, tcg_op2, fpst);
  11424                 break;
  11425             case 0x78: /* FMINNMP */
  11426                 gen_helper_vfp_minnumd(tcg_res[pass], tcg_op1, tcg_op2, fpst);
  11427                 break;
  11428             case 0x7e: /* FMINP */
  11429                 gen_helper_vfp_mind(tcg_res[pass], tcg_op1, tcg_op2, fpst);
  11430                 break;
  11431             default:
  11432                 g_assert_not_reached();
  11433             }
  11434 
  11435             tcg_temp_free_i64(tcg_op1);
  11436             tcg_temp_free_i64(tcg_op2);
  11437         }
  11438 
  11439         for (pass = 0; pass < 2; pass++) {
  11440             write_vec_element(s, tcg_res[pass], rd, pass, MO_64);
  11441             tcg_temp_free_i64(tcg_res[pass]);
  11442         }
  11443     } else {
  11444         int maxpass = is_q ? 4 : 2;
  11445         TCGv_i32 tcg_res[4];
  11446 
  11447         for (pass = 0; pass < maxpass; pass++) {
  11448             TCGv_i32 tcg_op1 = tcg_temp_new_i32();
  11449             TCGv_i32 tcg_op2 = tcg_temp_new_i32();
  11450             NeonGenTwoOpFn *genfn = NULL;
  11451             int passreg = pass < (maxpass / 2) ? rn : rm;
  11452             int passelt = (is_q && (pass & 1)) ? 2 : 0;
  11453 
  11454             read_vec_element_i32(s, tcg_op1, passreg, passelt, MO_32);
  11455             read_vec_element_i32(s, tcg_op2, passreg, passelt + 1, MO_32);
  11456             tcg_res[pass] = tcg_temp_new_i32();
  11457 
  11458             switch (opcode) {
  11459             case 0x17: /* ADDP */
  11460             {
  11461                 static NeonGenTwoOpFn * const fns[3] = {
  11462                     gen_helper_neon_padd_u8,
  11463                     gen_helper_neon_padd_u16,
  11464                     tcg_gen_add_i32,
  11465                 };
  11466                 genfn = fns[size];
  11467                 break;
  11468             }
  11469             case 0x14: /* SMAXP, UMAXP */
  11470             {
  11471                 static NeonGenTwoOpFn * const fns[3][2] = {
  11472                     { gen_helper_neon_pmax_s8, gen_helper_neon_pmax_u8 },
  11473                     { gen_helper_neon_pmax_s16, gen_helper_neon_pmax_u16 },
  11474                     { tcg_gen_smax_i32, tcg_gen_umax_i32 },
  11475                 };
  11476                 genfn = fns[size][u];
  11477                 break;
  11478             }
  11479             case 0x15: /* SMINP, UMINP */
  11480             {
  11481                 static NeonGenTwoOpFn * const fns[3][2] = {
  11482                     { gen_helper_neon_pmin_s8, gen_helper_neon_pmin_u8 },
  11483                     { gen_helper_neon_pmin_s16, gen_helper_neon_pmin_u16 },
  11484                     { tcg_gen_smin_i32, tcg_gen_umin_i32 },
  11485                 };
  11486                 genfn = fns[size][u];
  11487                 break;
  11488             }
  11489             /* The FP operations are all on single floats (32 bit) */
  11490             case 0x58: /* FMAXNMP */
  11491                 gen_helper_vfp_maxnums(tcg_res[pass], tcg_op1, tcg_op2, fpst);
  11492                 break;
  11493             case 0x5a: /* FADDP */
  11494                 gen_helper_vfp_adds(tcg_res[pass], tcg_op1, tcg_op2, fpst);
  11495                 break;
  11496             case 0x5e: /* FMAXP */
  11497                 gen_helper_vfp_maxs(tcg_res[pass], tcg_op1, tcg_op2, fpst);
  11498                 break;
  11499             case 0x78: /* FMINNMP */
  11500                 gen_helper_vfp_minnums(tcg_res[pass], tcg_op1, tcg_op2, fpst);
  11501                 break;
  11502             case 0x7e: /* FMINP */
  11503                 gen_helper_vfp_mins(tcg_res[pass], tcg_op1, tcg_op2, fpst);
  11504                 break;
  11505             default:
  11506                 g_assert_not_reached();
  11507             }
  11508 
  11509             /* FP ops called directly, otherwise call now */
  11510             if (genfn) {
  11511                 genfn(tcg_res[pass], tcg_op1, tcg_op2);
  11512             }
  11513 
  11514             tcg_temp_free_i32(tcg_op1);
  11515             tcg_temp_free_i32(tcg_op2);
  11516         }
  11517 
  11518         for (pass = 0; pass < maxpass; pass++) {
  11519             write_vec_element_i32(s, tcg_res[pass], rd, pass, MO_32);
  11520             tcg_temp_free_i32(tcg_res[pass]);
  11521         }
  11522         clear_vec_high(s, is_q, rd);
  11523     }
  11524 
  11525     if (fpst) {
  11526         tcg_temp_free_ptr(fpst);
  11527     }
  11528 }
  11529 
  11530 /* Floating point op subgroup of C3.6.16. */
  11531 static void disas_simd_3same_float(DisasContext *s, uint32_t insn)
  11532 {
  11533     /* For floating point ops, the U, size[1] and opcode bits
  11534      * together indicate the operation. size[0] indicates single
  11535      * or double.
  11536      */
  11537     int fpopcode = extract32(insn, 11, 5)
  11538         | (extract32(insn, 23, 1) << 5)
  11539         | (extract32(insn, 29, 1) << 6);
  11540     int is_q = extract32(insn, 30, 1);
  11541     int size = extract32(insn, 22, 1);
  11542     int rm = extract32(insn, 16, 5);
  11543     int rn = extract32(insn, 5, 5);
  11544     int rd = extract32(insn, 0, 5);
  11545 
  11546     int datasize = is_q ? 128 : 64;
  11547     int esize = 32 << size;
  11548     int elements = datasize / esize;
  11549 
  11550     if (size == 1 && !is_q) {
  11551         unallocated_encoding(s);
  11552         return;
  11553     }
  11554 
  11555     switch (fpopcode) {
  11556     case 0x58: /* FMAXNMP */
  11557     case 0x5a: /* FADDP */
  11558     case 0x5e: /* FMAXP */
  11559     case 0x78: /* FMINNMP */
  11560     case 0x7e: /* FMINP */
  11561         if (size && !is_q) {
  11562             unallocated_encoding(s);
  11563             return;
  11564         }
  11565         handle_simd_3same_pair(s, is_q, 0, fpopcode, size ? MO_64 : MO_32,
  11566                                rn, rm, rd);
  11567         return;
  11568     case 0x1b: /* FMULX */
  11569     case 0x1f: /* FRECPS */
  11570     case 0x3f: /* FRSQRTS */
  11571     case 0x5d: /* FACGE */
  11572     case 0x7d: /* FACGT */
  11573     case 0x19: /* FMLA */
  11574     case 0x39: /* FMLS */
  11575     case 0x18: /* FMAXNM */
  11576     case 0x1a: /* FADD */
  11577     case 0x1c: /* FCMEQ */
  11578     case 0x1e: /* FMAX */
  11579     case 0x38: /* FMINNM */
  11580     case 0x3a: /* FSUB */
  11581     case 0x3e: /* FMIN */
  11582     case 0x5b: /* FMUL */
  11583     case 0x5c: /* FCMGE */
  11584     case 0x5f: /* FDIV */
  11585     case 0x7a: /* FABD */
  11586     case 0x7c: /* FCMGT */
  11587         if (!fp_access_check(s)) {
  11588             return;
  11589         }
  11590         handle_3same_float(s, size, elements, fpopcode, rd, rn, rm);
  11591         return;
  11592 
  11593     case 0x1d: /* FMLAL  */
  11594     case 0x3d: /* FMLSL  */
  11595     case 0x59: /* FMLAL2 */
  11596     case 0x79: /* FMLSL2 */
  11597         if (size & 1 || !dc_isar_feature(aa64_fhm, s)) {
  11598             unallocated_encoding(s);
  11599             return;
  11600         }
  11601         if (fp_access_check(s)) {
  11602             int is_s = extract32(insn, 23, 1);
  11603             int is_2 = extract32(insn, 29, 1);
  11604             int data = (is_2 << 1) | is_s;
  11605             tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, rd),
  11606                                vec_full_reg_offset(s, rn),
  11607                                vec_full_reg_offset(s, rm), cpu_env,
  11608                                is_q ? 16 : 8, vec_full_reg_size(s),
  11609                                data, gen_helper_gvec_fmlal_a64);
  11610         }
  11611         return;
  11612 
  11613     default:
  11614         unallocated_encoding(s);
  11615         return;
  11616     }
  11617 }
  11618 
  11619 /* Integer op subgroup of C3.6.16. */
  11620 static void disas_simd_3same_int(DisasContext *s, uint32_t insn)
  11621 {
  11622     int is_q = extract32(insn, 30, 1);
  11623     int u = extract32(insn, 29, 1);
  11624     int size = extract32(insn, 22, 2);
  11625     int opcode = extract32(insn, 11, 5);
  11626     int rm = extract32(insn, 16, 5);
  11627     int rn = extract32(insn, 5, 5);
  11628     int rd = extract32(insn, 0, 5);
  11629     int pass;
  11630     TCGCond cond;
  11631 
  11632     switch (opcode) {
  11633     case 0x13: /* MUL, PMUL */
  11634         if (u && size != 0) {
  11635             unallocated_encoding(s);
  11636             return;
  11637         }
  11638         /* fall through */
  11639     case 0x0: /* SHADD, UHADD */
  11640     case 0x2: /* SRHADD, URHADD */
  11641     case 0x4: /* SHSUB, UHSUB */
  11642     case 0xc: /* SMAX, UMAX */
  11643     case 0xd: /* SMIN, UMIN */
  11644     case 0xe: /* SABD, UABD */
  11645     case 0xf: /* SABA, UABA */
  11646     case 0x12: /* MLA, MLS */
  11647         if (size == 3) {
  11648             unallocated_encoding(s);
  11649             return;
  11650         }
  11651         break;
  11652     case 0x16: /* SQDMULH, SQRDMULH */
  11653         if (size == 0 || size == 3) {
  11654             unallocated_encoding(s);
  11655             return;
  11656         }
  11657         break;
  11658     default:
  11659         if (size == 3 && !is_q) {
  11660             unallocated_encoding(s);
  11661             return;
  11662         }
  11663         break;
  11664     }
  11665 
  11666     if (!fp_access_check(s)) {
  11667         return;
  11668     }
  11669 
  11670     switch (opcode) {
  11671     case 0x01: /* SQADD, UQADD */
  11672         if (u) {
  11673             gen_gvec_fn3(s, is_q, rd, rn, rm, gen_gvec_uqadd_qc, size);
  11674         } else {
  11675             gen_gvec_fn3(s, is_q, rd, rn, rm, gen_gvec_sqadd_qc, size);
  11676         }
  11677         return;
  11678     case 0x05: /* SQSUB, UQSUB */
  11679         if (u) {
  11680             gen_gvec_fn3(s, is_q, rd, rn, rm, gen_gvec_uqsub_qc, size);
  11681         } else {
  11682             gen_gvec_fn3(s, is_q, rd, rn, rm, gen_gvec_sqsub_qc, size);
  11683         }
  11684         return;
  11685     case 0x08: /* SSHL, USHL */
  11686         if (u) {
  11687             gen_gvec_fn3(s, is_q, rd, rn, rm, gen_gvec_ushl, size);
  11688         } else {
  11689             gen_gvec_fn3(s, is_q, rd, rn, rm, gen_gvec_sshl, size);
  11690         }
  11691         return;
  11692     case 0x0c: /* SMAX, UMAX */
  11693         if (u) {
  11694             gen_gvec_fn3(s, is_q, rd, rn, rm, tcg_gen_gvec_umax, size);
  11695         } else {
  11696             gen_gvec_fn3(s, is_q, rd, rn, rm, tcg_gen_gvec_smax, size);
  11697         }
  11698         return;
  11699     case 0x0d: /* SMIN, UMIN */
  11700         if (u) {
  11701             gen_gvec_fn3(s, is_q, rd, rn, rm, tcg_gen_gvec_umin, size);
  11702         } else {
  11703             gen_gvec_fn3(s, is_q, rd, rn, rm, tcg_gen_gvec_smin, size);
  11704         }
  11705         return;
  11706     case 0xe: /* SABD, UABD */
  11707         if (u) {
  11708             gen_gvec_fn3(s, is_q, rd, rn, rm, gen_gvec_uabd, size);
  11709         } else {
  11710             gen_gvec_fn3(s, is_q, rd, rn, rm, gen_gvec_sabd, size);
  11711         }
  11712         return;
  11713     case 0xf: /* SABA, UABA */
  11714         if (u) {
  11715             gen_gvec_fn3(s, is_q, rd, rn, rm, gen_gvec_uaba, size);
  11716         } else {
  11717             gen_gvec_fn3(s, is_q, rd, rn, rm, gen_gvec_saba, size);
  11718         }
  11719         return;
  11720     case 0x10: /* ADD, SUB */
  11721         if (u) {
  11722             gen_gvec_fn3(s, is_q, rd, rn, rm, tcg_gen_gvec_sub, size);
  11723         } else {
  11724             gen_gvec_fn3(s, is_q, rd, rn, rm, tcg_gen_gvec_add, size);
  11725         }
  11726         return;
  11727     case 0x13: /* MUL, PMUL */
  11728         if (!u) { /* MUL */
  11729             gen_gvec_fn3(s, is_q, rd, rn, rm, tcg_gen_gvec_mul, size);
  11730         } else {  /* PMUL */
  11731             gen_gvec_op3_ool(s, is_q, rd, rn, rm, 0, gen_helper_gvec_pmul_b);
  11732         }
  11733         return;
  11734     case 0x12: /* MLA, MLS */
  11735         if (u) {
  11736             gen_gvec_fn3(s, is_q, rd, rn, rm, gen_gvec_mls, size);
  11737         } else {
  11738             gen_gvec_fn3(s, is_q, rd, rn, rm, gen_gvec_mla, size);
  11739         }
  11740         return;
  11741     case 0x16: /* SQDMULH, SQRDMULH */
  11742         {
  11743             static gen_helper_gvec_3_ptr * const fns[2][2] = {
  11744                 { gen_helper_neon_sqdmulh_h, gen_helper_neon_sqrdmulh_h },
  11745                 { gen_helper_neon_sqdmulh_s, gen_helper_neon_sqrdmulh_s },
  11746             };
  11747             gen_gvec_op3_qc(s, is_q, rd, rn, rm, fns[size - 1][u]);
  11748         }
  11749         return;
  11750     case 0x11:
  11751         if (!u) { /* CMTST */
  11752             gen_gvec_fn3(s, is_q, rd, rn, rm, gen_gvec_cmtst, size);
  11753             return;
  11754         }
  11755         /* else CMEQ */
  11756         cond = TCG_COND_EQ;
  11757         goto do_gvec_cmp;
  11758     case 0x06: /* CMGT, CMHI */
  11759         cond = u ? TCG_COND_GTU : TCG_COND_GT;
  11760         goto do_gvec_cmp;
  11761     case 0x07: /* CMGE, CMHS */
  11762         cond = u ? TCG_COND_GEU : TCG_COND_GE;
  11763     do_gvec_cmp:
  11764         tcg_gen_gvec_cmp(cond, size, vec_full_reg_offset(s, rd),
  11765                          vec_full_reg_offset(s, rn),
  11766                          vec_full_reg_offset(s, rm),
  11767                          is_q ? 16 : 8, vec_full_reg_size(s));
  11768         return;
  11769     }
  11770 
  11771     if (size == 3) {
  11772         assert(is_q);
  11773         for (pass = 0; pass < 2; pass++) {
  11774             TCGv_i64 tcg_op1 = tcg_temp_new_i64();
  11775             TCGv_i64 tcg_op2 = tcg_temp_new_i64();
  11776             TCGv_i64 tcg_res = tcg_temp_new_i64();
  11777 
  11778             read_vec_element(s, tcg_op1, rn, pass, MO_64);
  11779             read_vec_element(s, tcg_op2, rm, pass, MO_64);
  11780 
  11781             handle_3same_64(s, opcode, u, tcg_res, tcg_op1, tcg_op2);
  11782 
  11783             write_vec_element(s, tcg_res, rd, pass, MO_64);
  11784 
  11785             tcg_temp_free_i64(tcg_res);
  11786             tcg_temp_free_i64(tcg_op1);
  11787             tcg_temp_free_i64(tcg_op2);
  11788         }
  11789     } else {
  11790         for (pass = 0; pass < (is_q ? 4 : 2); pass++) {
  11791             TCGv_i32 tcg_op1 = tcg_temp_new_i32();
  11792             TCGv_i32 tcg_op2 = tcg_temp_new_i32();
  11793             TCGv_i32 tcg_res = tcg_temp_new_i32();
  11794             NeonGenTwoOpFn *genfn = NULL;
  11795             NeonGenTwoOpEnvFn *genenvfn = NULL;
  11796 
  11797             read_vec_element_i32(s, tcg_op1, rn, pass, MO_32);
  11798             read_vec_element_i32(s, tcg_op2, rm, pass, MO_32);
  11799 
  11800             switch (opcode) {
  11801             case 0x0: /* SHADD, UHADD */
  11802             {
  11803                 static NeonGenTwoOpFn * const fns[3][2] = {
  11804                     { gen_helper_neon_hadd_s8, gen_helper_neon_hadd_u8 },
  11805                     { gen_helper_neon_hadd_s16, gen_helper_neon_hadd_u16 },
  11806                     { gen_helper_neon_hadd_s32, gen_helper_neon_hadd_u32 },
  11807                 };
  11808                 genfn = fns[size][u];
  11809                 break;
  11810             }
  11811             case 0x2: /* SRHADD, URHADD */
  11812             {
  11813                 static NeonGenTwoOpFn * const fns[3][2] = {
  11814                     { gen_helper_neon_rhadd_s8, gen_helper_neon_rhadd_u8 },
  11815                     { gen_helper_neon_rhadd_s16, gen_helper_neon_rhadd_u16 },
  11816                     { gen_helper_neon_rhadd_s32, gen_helper_neon_rhadd_u32 },
  11817                 };
  11818                 genfn = fns[size][u];
  11819                 break;
  11820             }
  11821             case 0x4: /* SHSUB, UHSUB */
  11822             {
  11823                 static NeonGenTwoOpFn * const fns[3][2] = {
  11824                     { gen_helper_neon_hsub_s8, gen_helper_neon_hsub_u8 },
  11825                     { gen_helper_neon_hsub_s16, gen_helper_neon_hsub_u16 },
  11826                     { gen_helper_neon_hsub_s32, gen_helper_neon_hsub_u32 },
  11827                 };
  11828                 genfn = fns[size][u];
  11829                 break;
  11830             }
  11831             case 0x9: /* SQSHL, UQSHL */
  11832             {
  11833                 static NeonGenTwoOpEnvFn * const fns[3][2] = {
  11834                     { gen_helper_neon_qshl_s8, gen_helper_neon_qshl_u8 },
  11835                     { gen_helper_neon_qshl_s16, gen_helper_neon_qshl_u16 },
  11836                     { gen_helper_neon_qshl_s32, gen_helper_neon_qshl_u32 },
  11837                 };
  11838                 genenvfn = fns[size][u];
  11839                 break;
  11840             }
  11841             case 0xa: /* SRSHL, URSHL */
  11842             {
  11843                 static NeonGenTwoOpFn * const fns[3][2] = {
  11844                     { gen_helper_neon_rshl_s8, gen_helper_neon_rshl_u8 },
  11845                     { gen_helper_neon_rshl_s16, gen_helper_neon_rshl_u16 },
  11846                     { gen_helper_neon_rshl_s32, gen_helper_neon_rshl_u32 },
  11847                 };
  11848                 genfn = fns[size][u];
  11849                 break;
  11850             }
  11851             case 0xb: /* SQRSHL, UQRSHL */
  11852             {
  11853                 static NeonGenTwoOpEnvFn * const fns[3][2] = {
  11854                     { gen_helper_neon_qrshl_s8, gen_helper_neon_qrshl_u8 },
  11855                     { gen_helper_neon_qrshl_s16, gen_helper_neon_qrshl_u16 },
  11856                     { gen_helper_neon_qrshl_s32, gen_helper_neon_qrshl_u32 },
  11857                 };
  11858                 genenvfn = fns[size][u];
  11859                 break;
  11860             }
  11861             default:
  11862                 g_assert_not_reached();
  11863             }
  11864 
  11865             if (genenvfn) {
  11866                 genenvfn(tcg_res, cpu_env, tcg_op1, tcg_op2);
  11867             } else {
  11868                 genfn(tcg_res, tcg_op1, tcg_op2);
  11869             }
  11870 
  11871             write_vec_element_i32(s, tcg_res, rd, pass, MO_32);
  11872 
  11873             tcg_temp_free_i32(tcg_res);
  11874             tcg_temp_free_i32(tcg_op1);
  11875             tcg_temp_free_i32(tcg_op2);
  11876         }
  11877     }
  11878     clear_vec_high(s, is_q, rd);
  11879 }
  11880 
  11881 /* AdvSIMD three same
  11882  *  31  30  29  28       24 23  22  21 20  16 15    11  10 9    5 4    0
  11883  * +---+---+---+-----------+------+---+------+--------+---+------+------+
  11884  * | 0 | Q | U | 0 1 1 1 0 | size | 1 |  Rm  | opcode | 1 |  Rn  |  Rd  |
  11885  * +---+---+---+-----------+------+---+------+--------+---+------+------+
  11886  */
  11887 static void disas_simd_three_reg_same(DisasContext *s, uint32_t insn)
  11888 {
  11889     int opcode = extract32(insn, 11, 5);
  11890 
  11891     switch (opcode) {
  11892     case 0x3: /* logic ops */
  11893         disas_simd_3same_logic(s, insn);
  11894         break;
  11895     case 0x17: /* ADDP */
  11896     case 0x14: /* SMAXP, UMAXP */
  11897     case 0x15: /* SMINP, UMINP */
  11898     {
  11899         /* Pairwise operations */
  11900         int is_q = extract32(insn, 30, 1);
  11901         int u = extract32(insn, 29, 1);
  11902         int size = extract32(insn, 22, 2);
  11903         int rm = extract32(insn, 16, 5);
  11904         int rn = extract32(insn, 5, 5);
  11905         int rd = extract32(insn, 0, 5);
  11906         if (opcode == 0x17) {
  11907             if (u || (size == 3 && !is_q)) {
  11908                 unallocated_encoding(s);
  11909                 return;
  11910             }
  11911         } else {
  11912             if (size == 3) {
  11913                 unallocated_encoding(s);
  11914                 return;
  11915             }
  11916         }
  11917         handle_simd_3same_pair(s, is_q, u, opcode, size, rn, rm, rd);
  11918         break;
  11919     }
  11920     case 0x18 ... 0x31:
  11921         /* floating point ops, sz[1] and U are part of opcode */
  11922         disas_simd_3same_float(s, insn);
  11923         break;
  11924     default:
  11925         disas_simd_3same_int(s, insn);
  11926         break;
  11927     }
  11928 }
  11929 
  11930 /*
  11931  * Advanced SIMD three same (ARMv8.2 FP16 variants)
  11932  *
  11933  *  31  30  29  28       24 23  22 21 20  16 15 14 13    11 10  9    5 4    0
  11934  * +---+---+---+-----------+---------+------+-----+--------+---+------+------+
  11935  * | 0 | Q | U | 0 1 1 1 0 | a | 1 0 |  Rm  | 0 0 | opcode | 1 |  Rn  |  Rd  |
  11936  * +---+---+---+-----------+---------+------+-----+--------+---+------+------+
  11937  *
  11938  * This includes FMULX, FCMEQ (register), FRECPS, FRSQRTS, FCMGE
  11939  * (register), FACGE, FABD, FCMGT (register) and FACGT.
  11940  *
  11941  */
  11942 static void disas_simd_three_reg_same_fp16(DisasContext *s, uint32_t insn)
  11943 {
  11944     int opcode = extract32(insn, 11, 3);
  11945     int u = extract32(insn, 29, 1);
  11946     int a = extract32(insn, 23, 1);
  11947     int is_q = extract32(insn, 30, 1);
  11948     int rm = extract32(insn, 16, 5);
  11949     int rn = extract32(insn, 5, 5);
  11950     int rd = extract32(insn, 0, 5);
  11951     /*
  11952      * For these floating point ops, the U, a and opcode bits
  11953      * together indicate the operation.
  11954      */
  11955     int fpopcode = opcode | (a << 3) | (u << 4);
  11956     int datasize = is_q ? 128 : 64;
  11957     int elements = datasize / 16;
  11958     bool pairwise;
  11959     TCGv_ptr fpst;
  11960     int pass;
  11961 
  11962     switch (fpopcode) {
  11963     case 0x0: /* FMAXNM */
  11964     case 0x1: /* FMLA */
  11965     case 0x2: /* FADD */
  11966     case 0x3: /* FMULX */
  11967     case 0x4: /* FCMEQ */
  11968     case 0x6: /* FMAX */
  11969     case 0x7: /* FRECPS */
  11970     case 0x8: /* FMINNM */
  11971     case 0x9: /* FMLS */
  11972     case 0xa: /* FSUB */
  11973     case 0xe: /* FMIN */
  11974     case 0xf: /* FRSQRTS */
  11975     case 0x13: /* FMUL */
  11976     case 0x14: /* FCMGE */
  11977     case 0x15: /* FACGE */
  11978     case 0x17: /* FDIV */
  11979     case 0x1a: /* FABD */
  11980     case 0x1c: /* FCMGT */
  11981     case 0x1d: /* FACGT */
  11982         pairwise = false;
  11983         break;
  11984     case 0x10: /* FMAXNMP */
  11985     case 0x12: /* FADDP */
  11986     case 0x16: /* FMAXP */
  11987     case 0x18: /* FMINNMP */
  11988     case 0x1e: /* FMINP */
  11989         pairwise = true;
  11990         break;
  11991     default:
  11992         unallocated_encoding(s);
  11993         return;
  11994     }
  11995 
  11996     if (!dc_isar_feature(aa64_fp16, s)) {
  11997         unallocated_encoding(s);
  11998         return;
  11999     }
  12000 
  12001     if (!fp_access_check(s)) {
  12002         return;
  12003     }
  12004 
  12005     fpst = fpstatus_ptr(FPST_FPCR_F16);
  12006 
  12007     if (pairwise) {
  12008         int maxpass = is_q ? 8 : 4;
  12009         TCGv_i32 tcg_op1 = tcg_temp_new_i32();
  12010         TCGv_i32 tcg_op2 = tcg_temp_new_i32();
  12011         TCGv_i32 tcg_res[8];
  12012 
  12013         for (pass = 0; pass < maxpass; pass++) {
  12014             int passreg = pass < (maxpass / 2) ? rn : rm;
  12015             int passelt = (pass << 1) & (maxpass - 1);
  12016 
  12017             read_vec_element_i32(s, tcg_op1, passreg, passelt, MO_16);
  12018             read_vec_element_i32(s, tcg_op2, passreg, passelt + 1, MO_16);
  12019             tcg_res[pass] = tcg_temp_new_i32();
  12020 
  12021             switch (fpopcode) {
  12022             case 0x10: /* FMAXNMP */
  12023                 gen_helper_advsimd_maxnumh(tcg_res[pass], tcg_op1, tcg_op2,
  12024                                            fpst);
  12025                 break;
  12026             case 0x12: /* FADDP */
  12027                 gen_helper_advsimd_addh(tcg_res[pass], tcg_op1, tcg_op2, fpst);
  12028                 break;
  12029             case 0x16: /* FMAXP */
  12030                 gen_helper_advsimd_maxh(tcg_res[pass], tcg_op1, tcg_op2, fpst);
  12031                 break;
  12032             case 0x18: /* FMINNMP */
  12033                 gen_helper_advsimd_minnumh(tcg_res[pass], tcg_op1, tcg_op2,
  12034                                            fpst);
  12035                 break;
  12036             case 0x1e: /* FMINP */
  12037                 gen_helper_advsimd_minh(tcg_res[pass], tcg_op1, tcg_op2, fpst);
  12038                 break;
  12039             default:
  12040                 g_assert_not_reached();
  12041             }
  12042         }
  12043 
  12044         for (pass = 0; pass < maxpass; pass++) {
  12045             write_vec_element_i32(s, tcg_res[pass], rd, pass, MO_16);
  12046             tcg_temp_free_i32(tcg_res[pass]);
  12047         }
  12048 
  12049         tcg_temp_free_i32(tcg_op1);
  12050         tcg_temp_free_i32(tcg_op2);
  12051 
  12052     } else {
  12053         for (pass = 0; pass < elements; pass++) {
  12054             TCGv_i32 tcg_op1 = tcg_temp_new_i32();
  12055             TCGv_i32 tcg_op2 = tcg_temp_new_i32();
  12056             TCGv_i32 tcg_res = tcg_temp_new_i32();
  12057 
  12058             read_vec_element_i32(s, tcg_op1, rn, pass, MO_16);
  12059             read_vec_element_i32(s, tcg_op2, rm, pass, MO_16);
  12060 
  12061             switch (fpopcode) {
  12062             case 0x0: /* FMAXNM */
  12063                 gen_helper_advsimd_maxnumh(tcg_res, tcg_op1, tcg_op2, fpst);
  12064                 break;
  12065             case 0x1: /* FMLA */
  12066                 read_vec_element_i32(s, tcg_res, rd, pass, MO_16);
  12067                 gen_helper_advsimd_muladdh(tcg_res, tcg_op1, tcg_op2, tcg_res,
  12068                                            fpst);
  12069                 break;
  12070             case 0x2: /* FADD */
  12071                 gen_helper_advsimd_addh(tcg_res, tcg_op1, tcg_op2, fpst);
  12072                 break;
  12073             case 0x3: /* FMULX */
  12074                 gen_helper_advsimd_mulxh(tcg_res, tcg_op1, tcg_op2, fpst);
  12075                 break;
  12076             case 0x4: /* FCMEQ */
  12077                 gen_helper_advsimd_ceq_f16(tcg_res, tcg_op1, tcg_op2, fpst);
  12078                 break;
  12079             case 0x6: /* FMAX */
  12080                 gen_helper_advsimd_maxh(tcg_res, tcg_op1, tcg_op2, fpst);
  12081                 break;
  12082             case 0x7: /* FRECPS */
  12083                 gen_helper_recpsf_f16(tcg_res, tcg_op1, tcg_op2, fpst);
  12084                 break;
  12085             case 0x8: /* FMINNM */
  12086                 gen_helper_advsimd_minnumh(tcg_res, tcg_op1, tcg_op2, fpst);
  12087                 break;
  12088             case 0x9: /* FMLS */
  12089                 /* As usual for ARM, separate negation for fused multiply-add */
  12090                 tcg_gen_xori_i32(tcg_op1, tcg_op1, 0x8000);
  12091                 read_vec_element_i32(s, tcg_res, rd, pass, MO_16);
  12092                 gen_helper_advsimd_muladdh(tcg_res, tcg_op1, tcg_op2, tcg_res,
  12093                                            fpst);
  12094                 break;
  12095             case 0xa: /* FSUB */
  12096                 gen_helper_advsimd_subh(tcg_res, tcg_op1, tcg_op2, fpst);
  12097                 break;
  12098             case 0xe: /* FMIN */
  12099                 gen_helper_advsimd_minh(tcg_res, tcg_op1, tcg_op2, fpst);
  12100                 break;
  12101             case 0xf: /* FRSQRTS */
  12102                 gen_helper_rsqrtsf_f16(tcg_res, tcg_op1, tcg_op2, fpst);
  12103                 break;
  12104             case 0x13: /* FMUL */
  12105                 gen_helper_advsimd_mulh(tcg_res, tcg_op1, tcg_op2, fpst);
  12106                 break;
  12107             case 0x14: /* FCMGE */
  12108                 gen_helper_advsimd_cge_f16(tcg_res, tcg_op1, tcg_op2, fpst);
  12109                 break;
  12110             case 0x15: /* FACGE */
  12111                 gen_helper_advsimd_acge_f16(tcg_res, tcg_op1, tcg_op2, fpst);
  12112                 break;
  12113             case 0x17: /* FDIV */
  12114                 gen_helper_advsimd_divh(tcg_res, tcg_op1, tcg_op2, fpst);
  12115                 break;
  12116             case 0x1a: /* FABD */
  12117                 gen_helper_advsimd_subh(tcg_res, tcg_op1, tcg_op2, fpst);
  12118                 tcg_gen_andi_i32(tcg_res, tcg_res, 0x7fff);
  12119                 break;
  12120             case 0x1c: /* FCMGT */
  12121                 gen_helper_advsimd_cgt_f16(tcg_res, tcg_op1, tcg_op2, fpst);
  12122                 break;
  12123             case 0x1d: /* FACGT */
  12124                 gen_helper_advsimd_acgt_f16(tcg_res, tcg_op1, tcg_op2, fpst);
  12125                 break;
  12126             default:
  12127                 g_assert_not_reached();
  12128             }
  12129 
  12130             write_vec_element_i32(s, tcg_res, rd, pass, MO_16);
  12131             tcg_temp_free_i32(tcg_res);
  12132             tcg_temp_free_i32(tcg_op1);
  12133             tcg_temp_free_i32(tcg_op2);
  12134         }
  12135     }
  12136 
  12137     tcg_temp_free_ptr(fpst);
  12138 
  12139     clear_vec_high(s, is_q, rd);
  12140 }
  12141 
  12142 /* AdvSIMD three same extra
  12143  *  31   30  29 28       24 23  22  21 20  16  15 14    11  10 9  5 4  0
  12144  * +---+---+---+-----------+------+---+------+---+--------+---+----+----+
  12145  * | 0 | Q | U | 0 1 1 1 0 | size | 0 |  Rm  | 1 | opcode | 1 | Rn | Rd |
  12146  * +---+---+---+-----------+------+---+------+---+--------+---+----+----+
  12147  */
  12148 static void disas_simd_three_reg_same_extra(DisasContext *s, uint32_t insn)
  12149 {
  12150     int rd = extract32(insn, 0, 5);
  12151     int rn = extract32(insn, 5, 5);
  12152     int opcode = extract32(insn, 11, 4);
  12153     int rm = extract32(insn, 16, 5);
  12154     int size = extract32(insn, 22, 2);
  12155     bool u = extract32(insn, 29, 1);
  12156     bool is_q = extract32(insn, 30, 1);
  12157     bool feature;
  12158     int rot;
  12159 
  12160     switch (u * 16 + opcode) {
  12161     case 0x10: /* SQRDMLAH (vector) */
  12162     case 0x11: /* SQRDMLSH (vector) */
  12163         if (size != 1 && size != 2) {
  12164             unallocated_encoding(s);
  12165             return;
  12166         }
  12167         feature = dc_isar_feature(aa64_rdm, s);
  12168         break;
  12169     case 0x02: /* SDOT (vector) */
  12170     case 0x12: /* UDOT (vector) */
  12171         if (size != MO_32) {
  12172             unallocated_encoding(s);
  12173             return;
  12174         }
  12175         feature = dc_isar_feature(aa64_dp, s);
  12176         break;
  12177     case 0x03: /* USDOT */
  12178         if (size != MO_32) {
  12179             unallocated_encoding(s);
  12180             return;
  12181         }
  12182         feature = dc_isar_feature(aa64_i8mm, s);
  12183         break;
  12184     case 0x04: /* SMMLA */
  12185     case 0x14: /* UMMLA */
  12186     case 0x05: /* USMMLA */
  12187         if (!is_q || size != MO_32) {
  12188             unallocated_encoding(s);
  12189             return;
  12190         }
  12191         feature = dc_isar_feature(aa64_i8mm, s);
  12192         break;
  12193     case 0x18: /* FCMLA, #0 */
  12194     case 0x19: /* FCMLA, #90 */
  12195     case 0x1a: /* FCMLA, #180 */
  12196     case 0x1b: /* FCMLA, #270 */
  12197     case 0x1c: /* FCADD, #90 */
  12198     case 0x1e: /* FCADD, #270 */
  12199         if (size == 0
  12200             || (size == 1 && !dc_isar_feature(aa64_fp16, s))
  12201             || (size == 3 && !is_q)) {
  12202             unallocated_encoding(s);
  12203             return;
  12204         }
  12205         feature = dc_isar_feature(aa64_fcma, s);
  12206         break;
  12207     case 0x1d: /* BFMMLA */
  12208         if (size != MO_16 || !is_q) {
  12209             unallocated_encoding(s);
  12210             return;
  12211         }
  12212         feature = dc_isar_feature(aa64_bf16, s);
  12213         break;
  12214     case 0x1f:
  12215         switch (size) {
  12216         case 1: /* BFDOT */
  12217         case 3: /* BFMLAL{B,T} */
  12218             feature = dc_isar_feature(aa64_bf16, s);
  12219             break;
  12220         default:
  12221             unallocated_encoding(s);
  12222             return;
  12223         }
  12224         break;
  12225     default:
  12226         unallocated_encoding(s);
  12227         return;
  12228     }
  12229     if (!feature) {
  12230         unallocated_encoding(s);
  12231         return;
  12232     }
  12233     if (!fp_access_check(s)) {
  12234         return;
  12235     }
  12236 
  12237     switch (opcode) {
  12238     case 0x0: /* SQRDMLAH (vector) */
  12239         gen_gvec_fn3(s, is_q, rd, rn, rm, gen_gvec_sqrdmlah_qc, size);
  12240         return;
  12241 
  12242     case 0x1: /* SQRDMLSH (vector) */
  12243         gen_gvec_fn3(s, is_q, rd, rn, rm, gen_gvec_sqrdmlsh_qc, size);
  12244         return;
  12245 
  12246     case 0x2: /* SDOT / UDOT */
  12247         gen_gvec_op4_ool(s, is_q, rd, rn, rm, rd, 0,
  12248                          u ? gen_helper_gvec_udot_b : gen_helper_gvec_sdot_b);
  12249         return;
  12250 
  12251     case 0x3: /* USDOT */
  12252         gen_gvec_op4_ool(s, is_q, rd, rn, rm, rd, 0, gen_helper_gvec_usdot_b);
  12253         return;
  12254 
  12255     case 0x04: /* SMMLA, UMMLA */
  12256         gen_gvec_op4_ool(s, 1, rd, rn, rm, rd, 0,
  12257                          u ? gen_helper_gvec_ummla_b
  12258                          : gen_helper_gvec_smmla_b);
  12259         return;
  12260     case 0x05: /* USMMLA */
  12261         gen_gvec_op4_ool(s, 1, rd, rn, rm, rd, 0, gen_helper_gvec_usmmla_b);
  12262         return;
  12263 
  12264     case 0x8: /* FCMLA, #0 */
  12265     case 0x9: /* FCMLA, #90 */
  12266     case 0xa: /* FCMLA, #180 */
  12267     case 0xb: /* FCMLA, #270 */
  12268         rot = extract32(opcode, 0, 2);
  12269         switch (size) {
  12270         case 1:
  12271             gen_gvec_op4_fpst(s, is_q, rd, rn, rm, rd, true, rot,
  12272                               gen_helper_gvec_fcmlah);
  12273             break;
  12274         case 2:
  12275             gen_gvec_op4_fpst(s, is_q, rd, rn, rm, rd, false, rot,
  12276                               gen_helper_gvec_fcmlas);
  12277             break;
  12278         case 3:
  12279             gen_gvec_op4_fpst(s, is_q, rd, rn, rm, rd, false, rot,
  12280                               gen_helper_gvec_fcmlad);
  12281             break;
  12282         default:
  12283             g_assert_not_reached();
  12284         }
  12285         return;
  12286 
  12287     case 0xc: /* FCADD, #90 */
  12288     case 0xe: /* FCADD, #270 */
  12289         rot = extract32(opcode, 1, 1);
  12290         switch (size) {
  12291         case 1:
  12292             gen_gvec_op3_fpst(s, is_q, rd, rn, rm, size == 1, rot,
  12293                               gen_helper_gvec_fcaddh);
  12294             break;
  12295         case 2:
  12296             gen_gvec_op3_fpst(s, is_q, rd, rn, rm, size == 1, rot,
  12297                               gen_helper_gvec_fcadds);
  12298             break;
  12299         case 3:
  12300             gen_gvec_op3_fpst(s, is_q, rd, rn, rm, size == 1, rot,
  12301                               gen_helper_gvec_fcaddd);
  12302             break;
  12303         default:
  12304             g_assert_not_reached();
  12305         }
  12306         return;
  12307 
  12308     case 0xd: /* BFMMLA */
  12309         gen_gvec_op4_ool(s, is_q, rd, rn, rm, rd, 0, gen_helper_gvec_bfmmla);
  12310         return;
  12311     case 0xf:
  12312         switch (size) {
  12313         case 1: /* BFDOT */
  12314             gen_gvec_op4_ool(s, is_q, rd, rn, rm, rd, 0, gen_helper_gvec_bfdot);
  12315             break;
  12316         case 3: /* BFMLAL{B,T} */
  12317             gen_gvec_op4_fpst(s, 1, rd, rn, rm, rd, false, is_q,
  12318                               gen_helper_gvec_bfmlal);
  12319             break;
  12320         default:
  12321             g_assert_not_reached();
  12322         }
  12323         return;
  12324 
  12325     default:
  12326         g_assert_not_reached();
  12327     }
  12328 }
  12329 
  12330 static void handle_2misc_widening(DisasContext *s, int opcode, bool is_q,
  12331                                   int size, int rn, int rd)
  12332 {
  12333     /* Handle 2-reg-misc ops which are widening (so each size element
  12334      * in the source becomes a 2*size element in the destination.
  12335      * The only instruction like this is FCVTL.
  12336      */
  12337     int pass;
  12338 
  12339     if (size == 3) {
  12340         /* 32 -> 64 bit fp conversion */
  12341         TCGv_i64 tcg_res[2];
  12342         int srcelt = is_q ? 2 : 0;
  12343 
  12344         for (pass = 0; pass < 2; pass++) {
  12345             TCGv_i32 tcg_op = tcg_temp_new_i32();
  12346             tcg_res[pass] = tcg_temp_new_i64();
  12347 
  12348             read_vec_element_i32(s, tcg_op, rn, srcelt + pass, MO_32);
  12349             gen_helper_vfp_fcvtds(tcg_res[pass], tcg_op, cpu_env);
  12350             tcg_temp_free_i32(tcg_op);
  12351         }
  12352         for (pass = 0; pass < 2; pass++) {
  12353             write_vec_element(s, tcg_res[pass], rd, pass, MO_64);
  12354             tcg_temp_free_i64(tcg_res[pass]);
  12355         }
  12356     } else {
  12357         /* 16 -> 32 bit fp conversion */
  12358         int srcelt = is_q ? 4 : 0;
  12359         TCGv_i32 tcg_res[4];
  12360         TCGv_ptr fpst = fpstatus_ptr(FPST_FPCR);
  12361         TCGv_i32 ahp = get_ahp_flag();
  12362 
  12363         for (pass = 0; pass < 4; pass++) {
  12364             tcg_res[pass] = tcg_temp_new_i32();
  12365 
  12366             read_vec_element_i32(s, tcg_res[pass], rn, srcelt + pass, MO_16);
  12367             gen_helper_vfp_fcvt_f16_to_f32(tcg_res[pass], tcg_res[pass],
  12368                                            fpst, ahp);
  12369         }
  12370         for (pass = 0; pass < 4; pass++) {
  12371             write_vec_element_i32(s, tcg_res[pass], rd, pass, MO_32);
  12372             tcg_temp_free_i32(tcg_res[pass]);
  12373         }
  12374 
  12375         tcg_temp_free_ptr(fpst);
  12376         tcg_temp_free_i32(ahp);
  12377     }
  12378 }
  12379 
  12380 static void handle_rev(DisasContext *s, int opcode, bool u,
  12381                        bool is_q, int size, int rn, int rd)
  12382 {
  12383     int op = (opcode << 1) | u;
  12384     int opsz = op + size;
  12385     int grp_size = 3 - opsz;
  12386     int dsize = is_q ? 128 : 64;
  12387     int i;
  12388 
  12389     if (opsz >= 3) {
  12390         unallocated_encoding(s);
  12391         return;
  12392     }
  12393 
  12394     if (!fp_access_check(s)) {
  12395         return;
  12396     }
  12397 
  12398     if (size == 0) {
  12399         /* Special case bytes, use bswap op on each group of elements */
  12400         int groups = dsize / (8 << grp_size);
  12401 
  12402         for (i = 0; i < groups; i++) {
  12403             TCGv_i64 tcg_tmp = tcg_temp_new_i64();
  12404 
  12405             read_vec_element(s, tcg_tmp, rn, i, grp_size);
  12406             switch (grp_size) {
  12407             case MO_16:
  12408                 tcg_gen_bswap16_i64(tcg_tmp, tcg_tmp, TCG_BSWAP_IZ);
  12409                 break;
  12410             case MO_32:
  12411                 tcg_gen_bswap32_i64(tcg_tmp, tcg_tmp, TCG_BSWAP_IZ);
  12412                 break;
  12413             case MO_64:
  12414                 tcg_gen_bswap64_i64(tcg_tmp, tcg_tmp);
  12415                 break;
  12416             default:
  12417                 g_assert_not_reached();
  12418             }
  12419             write_vec_element(s, tcg_tmp, rd, i, grp_size);
  12420             tcg_temp_free_i64(tcg_tmp);
  12421         }
  12422         clear_vec_high(s, is_q, rd);
  12423     } else {
  12424         int revmask = (1 << grp_size) - 1;
  12425         int esize = 8 << size;
  12426         int elements = dsize / esize;
  12427         TCGv_i64 tcg_rn = tcg_temp_new_i64();
  12428         TCGv_i64 tcg_rd = tcg_const_i64(0);
  12429         TCGv_i64 tcg_rd_hi = tcg_const_i64(0);
  12430 
  12431         for (i = 0; i < elements; i++) {
  12432             int e_rev = (i & 0xf) ^ revmask;
  12433             int off = e_rev * esize;
  12434             read_vec_element(s, tcg_rn, rn, i, size);
  12435             if (off >= 64) {
  12436                 tcg_gen_deposit_i64(tcg_rd_hi, tcg_rd_hi,
  12437                                     tcg_rn, off - 64, esize);
  12438             } else {
  12439                 tcg_gen_deposit_i64(tcg_rd, tcg_rd, tcg_rn, off, esize);
  12440             }
  12441         }
  12442         write_vec_element(s, tcg_rd, rd, 0, MO_64);
  12443         write_vec_element(s, tcg_rd_hi, rd, 1, MO_64);
  12444 
  12445         tcg_temp_free_i64(tcg_rd_hi);
  12446         tcg_temp_free_i64(tcg_rd);
  12447         tcg_temp_free_i64(tcg_rn);
  12448     }
  12449 }
  12450 
  12451 static void handle_2misc_pairwise(DisasContext *s, int opcode, bool u,
  12452                                   bool is_q, int size, int rn, int rd)
  12453 {
  12454     /* Implement the pairwise operations from 2-misc:
  12455      * SADDLP, UADDLP, SADALP, UADALP.
  12456      * These all add pairs of elements in the input to produce a
  12457      * double-width result element in the output (possibly accumulating).
  12458      */
  12459     bool accum = (opcode == 0x6);
  12460     int maxpass = is_q ? 2 : 1;
  12461     int pass;
  12462     TCGv_i64 tcg_res[2];
  12463 
  12464     if (size == 2) {
  12465         /* 32 + 32 -> 64 op */
  12466         MemOp memop = size + (u ? 0 : MO_SIGN);
  12467 
  12468         for (pass = 0; pass < maxpass; pass++) {
  12469             TCGv_i64 tcg_op1 = tcg_temp_new_i64();
  12470             TCGv_i64 tcg_op2 = tcg_temp_new_i64();
  12471 
  12472             tcg_res[pass] = tcg_temp_new_i64();
  12473 
  12474             read_vec_element(s, tcg_op1, rn, pass * 2, memop);
  12475             read_vec_element(s, tcg_op2, rn, pass * 2 + 1, memop);
  12476             tcg_gen_add_i64(tcg_res[pass], tcg_op1, tcg_op2);
  12477             if (accum) {
  12478                 read_vec_element(s, tcg_op1, rd, pass, MO_64);
  12479                 tcg_gen_add_i64(tcg_res[pass], tcg_res[pass], tcg_op1);
  12480             }
  12481 
  12482             tcg_temp_free_i64(tcg_op1);
  12483             tcg_temp_free_i64(tcg_op2);
  12484         }
  12485     } else {
  12486         for (pass = 0; pass < maxpass; pass++) {
  12487             TCGv_i64 tcg_op = tcg_temp_new_i64();
  12488             NeonGenOne64OpFn *genfn;
  12489             static NeonGenOne64OpFn * const fns[2][2] = {
  12490                 { gen_helper_neon_addlp_s8,  gen_helper_neon_addlp_u8 },
  12491                 { gen_helper_neon_addlp_s16,  gen_helper_neon_addlp_u16 },
  12492             };
  12493 
  12494             genfn = fns[size][u];
  12495 
  12496             tcg_res[pass] = tcg_temp_new_i64();
  12497 
  12498             read_vec_element(s, tcg_op, rn, pass, MO_64);
  12499             genfn(tcg_res[pass], tcg_op);
  12500 
  12501             if (accum) {
  12502                 read_vec_element(s, tcg_op, rd, pass, MO_64);
  12503                 if (size == 0) {
  12504                     gen_helper_neon_addl_u16(tcg_res[pass],
  12505                                              tcg_res[pass], tcg_op);
  12506                 } else {
  12507                     gen_helper_neon_addl_u32(tcg_res[pass],
  12508                                              tcg_res[pass], tcg_op);
  12509                 }
  12510             }
  12511             tcg_temp_free_i64(tcg_op);
  12512         }
  12513     }
  12514     if (!is_q) {
  12515         tcg_res[1] = tcg_constant_i64(0);
  12516     }
  12517     for (pass = 0; pass < 2; pass++) {
  12518         write_vec_element(s, tcg_res[pass], rd, pass, MO_64);
  12519         tcg_temp_free_i64(tcg_res[pass]);
  12520     }
  12521 }
  12522 
  12523 static void handle_shll(DisasContext *s, bool is_q, int size, int rn, int rd)
  12524 {
  12525     /* Implement SHLL and SHLL2 */
  12526     int pass;
  12527     int part = is_q ? 2 : 0;
  12528     TCGv_i64 tcg_res[2];
  12529 
  12530     for (pass = 0; pass < 2; pass++) {
  12531         static NeonGenWidenFn * const widenfns[3] = {
  12532             gen_helper_neon_widen_u8,
  12533             gen_helper_neon_widen_u16,
  12534             tcg_gen_extu_i32_i64,
  12535         };
  12536         NeonGenWidenFn *widenfn = widenfns[size];
  12537         TCGv_i32 tcg_op = tcg_temp_new_i32();
  12538 
  12539         read_vec_element_i32(s, tcg_op, rn, part + pass, MO_32);
  12540         tcg_res[pass] = tcg_temp_new_i64();
  12541         widenfn(tcg_res[pass], tcg_op);
  12542         tcg_gen_shli_i64(tcg_res[pass], tcg_res[pass], 8 << size);
  12543 
  12544         tcg_temp_free_i32(tcg_op);
  12545     }
  12546 
  12547     for (pass = 0; pass < 2; pass++) {
  12548         write_vec_element(s, tcg_res[pass], rd, pass, MO_64);
  12549         tcg_temp_free_i64(tcg_res[pass]);
  12550     }
  12551 }
  12552 
  12553 /* AdvSIMD two reg misc
  12554  *   31  30  29 28       24 23  22 21       17 16    12 11 10 9    5 4    0
  12555  * +---+---+---+-----------+------+-----------+--------+-----+------+------+
  12556  * | 0 | Q | U | 0 1 1 1 0 | size | 1 0 0 0 0 | opcode | 1 0 |  Rn  |  Rd  |
  12557  * +---+---+---+-----------+------+-----------+--------+-----+------+------+
  12558  */
  12559 static void disas_simd_two_reg_misc(DisasContext *s, uint32_t insn)
  12560 {
  12561     int size = extract32(insn, 22, 2);
  12562     int opcode = extract32(insn, 12, 5);
  12563     bool u = extract32(insn, 29, 1);
  12564     bool is_q = extract32(insn, 30, 1);
  12565     int rn = extract32(insn, 5, 5);
  12566     int rd = extract32(insn, 0, 5);
  12567     bool need_fpstatus = false;
  12568     bool need_rmode = false;
  12569     int rmode = -1;
  12570     TCGv_i32 tcg_rmode;
  12571     TCGv_ptr tcg_fpstatus;
  12572 
  12573     switch (opcode) {
  12574     case 0x0: /* REV64, REV32 */
  12575     case 0x1: /* REV16 */
  12576         handle_rev(s, opcode, u, is_q, size, rn, rd);
  12577         return;
  12578     case 0x5: /* CNT, NOT, RBIT */
  12579         if (u && size == 0) {
  12580             /* NOT */
  12581             break;
  12582         } else if (u && size == 1) {
  12583             /* RBIT */
  12584             break;
  12585         } else if (!u && size == 0) {
  12586             /* CNT */
  12587             break;
  12588         }
  12589         unallocated_encoding(s);
  12590         return;
  12591     case 0x12: /* XTN, XTN2, SQXTUN, SQXTUN2 */
  12592     case 0x14: /* SQXTN, SQXTN2, UQXTN, UQXTN2 */
  12593         if (size == 3) {
  12594             unallocated_encoding(s);
  12595             return;
  12596         }
  12597         if (!fp_access_check(s)) {
  12598             return;
  12599         }
  12600 
  12601         handle_2misc_narrow(s, false, opcode, u, is_q, size, rn, rd);
  12602         return;
  12603     case 0x4: /* CLS, CLZ */
  12604         if (size == 3) {
  12605             unallocated_encoding(s);
  12606             return;
  12607         }
  12608         break;
  12609     case 0x2: /* SADDLP, UADDLP */
  12610     case 0x6: /* SADALP, UADALP */
  12611         if (size == 3) {
  12612             unallocated_encoding(s);
  12613             return;
  12614         }
  12615         if (!fp_access_check(s)) {
  12616             return;
  12617         }
  12618         handle_2misc_pairwise(s, opcode, u, is_q, size, rn, rd);
  12619         return;
  12620     case 0x13: /* SHLL, SHLL2 */
  12621         if (u == 0 || size == 3) {
  12622             unallocated_encoding(s);
  12623             return;
  12624         }
  12625         if (!fp_access_check(s)) {
  12626             return;
  12627         }
  12628         handle_shll(s, is_q, size, rn, rd);
  12629         return;
  12630     case 0xa: /* CMLT */
  12631         if (u == 1) {
  12632             unallocated_encoding(s);
  12633             return;
  12634         }
  12635         /* fall through */
  12636     case 0x8: /* CMGT, CMGE */
  12637     case 0x9: /* CMEQ, CMLE */
  12638     case 0xb: /* ABS, NEG */
  12639         if (size == 3 && !is_q) {
  12640             unallocated_encoding(s);
  12641             return;
  12642         }
  12643         break;
  12644     case 0x3: /* SUQADD, USQADD */
  12645         if (size == 3 && !is_q) {
  12646             unallocated_encoding(s);
  12647             return;
  12648         }
  12649         if (!fp_access_check(s)) {
  12650             return;
  12651         }
  12652         handle_2misc_satacc(s, false, u, is_q, size, rn, rd);
  12653         return;
  12654     case 0x7: /* SQABS, SQNEG */
  12655         if (size == 3 && !is_q) {
  12656             unallocated_encoding(s);
  12657             return;
  12658         }
  12659         break;
  12660     case 0xc ... 0xf:
  12661     case 0x16 ... 0x1f:
  12662     {
  12663         /* Floating point: U, size[1] and opcode indicate operation;
  12664          * size[0] indicates single or double precision.
  12665          */
  12666         int is_double = extract32(size, 0, 1);
  12667         opcode |= (extract32(size, 1, 1) << 5) | (u << 6);
  12668         size = is_double ? 3 : 2;
  12669         switch (opcode) {
  12670         case 0x2f: /* FABS */
  12671         case 0x6f: /* FNEG */
  12672             if (size == 3 && !is_q) {
  12673                 unallocated_encoding(s);
  12674                 return;
  12675             }
  12676             break;
  12677         case 0x1d: /* SCVTF */
  12678         case 0x5d: /* UCVTF */
  12679         {
  12680             bool is_signed = (opcode == 0x1d) ? true : false;
  12681             int elements = is_double ? 2 : is_q ? 4 : 2;
  12682             if (is_double && !is_q) {
  12683                 unallocated_encoding(s);
  12684                 return;
  12685             }
  12686             if (!fp_access_check(s)) {
  12687                 return;
  12688             }
  12689             handle_simd_intfp_conv(s, rd, rn, elements, is_signed, 0, size);
  12690             return;
  12691         }
  12692         case 0x2c: /* FCMGT (zero) */
  12693         case 0x2d: /* FCMEQ (zero) */
  12694         case 0x2e: /* FCMLT (zero) */
  12695         case 0x6c: /* FCMGE (zero) */
  12696         case 0x6d: /* FCMLE (zero) */
  12697             if (size == 3 && !is_q) {
  12698                 unallocated_encoding(s);
  12699                 return;
  12700             }
  12701             handle_2misc_fcmp_zero(s, opcode, false, u, is_q, size, rn, rd);
  12702             return;
  12703         case 0x7f: /* FSQRT */
  12704             if (size == 3 && !is_q) {
  12705                 unallocated_encoding(s);
  12706                 return;
  12707             }
  12708             break;
  12709         case 0x1a: /* FCVTNS */
  12710         case 0x1b: /* FCVTMS */
  12711         case 0x3a: /* FCVTPS */
  12712         case 0x3b: /* FCVTZS */
  12713         case 0x5a: /* FCVTNU */
  12714         case 0x5b: /* FCVTMU */
  12715         case 0x7a: /* FCVTPU */
  12716         case 0x7b: /* FCVTZU */
  12717             need_fpstatus = true;
  12718             need_rmode = true;
  12719             rmode = extract32(opcode, 5, 1) | (extract32(opcode, 0, 1) << 1);
  12720             if (size == 3 && !is_q) {
  12721                 unallocated_encoding(s);
  12722                 return;
  12723             }
  12724             break;
  12725         case 0x5c: /* FCVTAU */
  12726         case 0x1c: /* FCVTAS */
  12727             need_fpstatus = true;
  12728             need_rmode = true;
  12729             rmode = FPROUNDING_TIEAWAY;
  12730             if (size == 3 && !is_q) {
  12731                 unallocated_encoding(s);
  12732                 return;
  12733             }
  12734             break;
  12735         case 0x3c: /* URECPE */
  12736             if (size == 3) {
  12737                 unallocated_encoding(s);
  12738                 return;
  12739             }
  12740             /* fall through */
  12741         case 0x3d: /* FRECPE */
  12742         case 0x7d: /* FRSQRTE */
  12743             if (size == 3 && !is_q) {
  12744                 unallocated_encoding(s);
  12745                 return;
  12746             }
  12747             if (!fp_access_check(s)) {
  12748                 return;
  12749             }
  12750             handle_2misc_reciprocal(s, opcode, false, u, is_q, size, rn, rd);
  12751             return;
  12752         case 0x56: /* FCVTXN, FCVTXN2 */
  12753             if (size == 2) {
  12754                 unallocated_encoding(s);
  12755                 return;
  12756             }
  12757             /* fall through */
  12758         case 0x16: /* FCVTN, FCVTN2 */
  12759             /* handle_2misc_narrow does a 2*size -> size operation, but these
  12760              * instructions encode the source size rather than dest size.
  12761              */
  12762             if (!fp_access_check(s)) {
  12763                 return;
  12764             }
  12765             handle_2misc_narrow(s, false, opcode, 0, is_q, size - 1, rn, rd);
  12766             return;
  12767         case 0x36: /* BFCVTN, BFCVTN2 */
  12768             if (!dc_isar_feature(aa64_bf16, s) || size != 2) {
  12769                 unallocated_encoding(s);
  12770                 return;
  12771             }
  12772             if (!fp_access_check(s)) {
  12773                 return;
  12774             }
  12775             handle_2misc_narrow(s, false, opcode, 0, is_q, size - 1, rn, rd);
  12776             return;
  12777         case 0x17: /* FCVTL, FCVTL2 */
  12778             if (!fp_access_check(s)) {
  12779                 return;
  12780             }
  12781             handle_2misc_widening(s, opcode, is_q, size, rn, rd);
  12782             return;
  12783         case 0x18: /* FRINTN */
  12784         case 0x19: /* FRINTM */
  12785         case 0x38: /* FRINTP */
  12786         case 0x39: /* FRINTZ */
  12787             need_rmode = true;
  12788             rmode = extract32(opcode, 5, 1) | (extract32(opcode, 0, 1) << 1);
  12789             /* fall through */
  12790         case 0x59: /* FRINTX */
  12791         case 0x79: /* FRINTI */
  12792             need_fpstatus = true;
  12793             if (size == 3 && !is_q) {
  12794                 unallocated_encoding(s);
  12795                 return;
  12796             }
  12797             break;
  12798         case 0x58: /* FRINTA */
  12799             need_rmode = true;
  12800             rmode = FPROUNDING_TIEAWAY;
  12801             need_fpstatus = true;
  12802             if (size == 3 && !is_q) {
  12803                 unallocated_encoding(s);
  12804                 return;
  12805             }
  12806             break;
  12807         case 0x7c: /* URSQRTE */
  12808             if (size == 3) {
  12809                 unallocated_encoding(s);
  12810                 return;
  12811             }
  12812             break;
  12813         case 0x1e: /* FRINT32Z */
  12814         case 0x1f: /* FRINT64Z */
  12815             need_rmode = true;
  12816             rmode = FPROUNDING_ZERO;
  12817             /* fall through */
  12818         case 0x5e: /* FRINT32X */
  12819         case 0x5f: /* FRINT64X */
  12820             need_fpstatus = true;
  12821             if ((size == 3 && !is_q) || !dc_isar_feature(aa64_frint, s)) {
  12822                 unallocated_encoding(s);
  12823                 return;
  12824             }
  12825             break;
  12826         default:
  12827             unallocated_encoding(s);
  12828             return;
  12829         }
  12830         break;
  12831     }
  12832     default:
  12833         unallocated_encoding(s);
  12834         return;
  12835     }
  12836 
  12837     if (!fp_access_check(s)) {
  12838         return;
  12839     }
  12840 
  12841     if (need_fpstatus || need_rmode) {
  12842         tcg_fpstatus = fpstatus_ptr(FPST_FPCR);
  12843     } else {
  12844         tcg_fpstatus = NULL;
  12845     }
  12846     if (need_rmode) {
  12847         tcg_rmode = tcg_const_i32(arm_rmode_to_sf(rmode));
  12848         gen_helper_set_rmode(tcg_rmode, tcg_rmode, tcg_fpstatus);
  12849     } else {
  12850         tcg_rmode = NULL;
  12851     }
  12852 
  12853     switch (opcode) {
  12854     case 0x5:
  12855         if (u && size == 0) { /* NOT */
  12856             gen_gvec_fn2(s, is_q, rd, rn, tcg_gen_gvec_not, 0);
  12857             return;
  12858         }
  12859         break;
  12860     case 0x8: /* CMGT, CMGE */
  12861         if (u) {
  12862             gen_gvec_fn2(s, is_q, rd, rn, gen_gvec_cge0, size);
  12863         } else {
  12864             gen_gvec_fn2(s, is_q, rd, rn, gen_gvec_cgt0, size);
  12865         }
  12866         return;
  12867     case 0x9: /* CMEQ, CMLE */
  12868         if (u) {
  12869             gen_gvec_fn2(s, is_q, rd, rn, gen_gvec_cle0, size);
  12870         } else {
  12871             gen_gvec_fn2(s, is_q, rd, rn, gen_gvec_ceq0, size);
  12872         }
  12873         return;
  12874     case 0xa: /* CMLT */
  12875         gen_gvec_fn2(s, is_q, rd, rn, gen_gvec_clt0, size);
  12876         return;
  12877     case 0xb:
  12878         if (u) { /* ABS, NEG */
  12879             gen_gvec_fn2(s, is_q, rd, rn, tcg_gen_gvec_neg, size);
  12880         } else {
  12881             gen_gvec_fn2(s, is_q, rd, rn, tcg_gen_gvec_abs, size);
  12882         }
  12883         return;
  12884     }
  12885 
  12886     if (size == 3) {
  12887         /* All 64-bit element operations can be shared with scalar 2misc */
  12888         int pass;
  12889 
  12890         /* Coverity claims (size == 3 && !is_q) has been eliminated
  12891          * from all paths leading to here.
  12892          */
  12893         tcg_debug_assert(is_q);
  12894         for (pass = 0; pass < 2; pass++) {
  12895             TCGv_i64 tcg_op = tcg_temp_new_i64();
  12896             TCGv_i64 tcg_res = tcg_temp_new_i64();
  12897 
  12898             read_vec_element(s, tcg_op, rn, pass, MO_64);
  12899 
  12900             handle_2misc_64(s, opcode, u, tcg_res, tcg_op,
  12901                             tcg_rmode, tcg_fpstatus);
  12902 
  12903             write_vec_element(s, tcg_res, rd, pass, MO_64);
  12904 
  12905             tcg_temp_free_i64(tcg_res);
  12906             tcg_temp_free_i64(tcg_op);
  12907         }
  12908     } else {
  12909         int pass;
  12910 
  12911         for (pass = 0; pass < (is_q ? 4 : 2); pass++) {
  12912             TCGv_i32 tcg_op = tcg_temp_new_i32();
  12913             TCGv_i32 tcg_res = tcg_temp_new_i32();
  12914 
  12915             read_vec_element_i32(s, tcg_op, rn, pass, MO_32);
  12916 
  12917             if (size == 2) {
  12918                 /* Special cases for 32 bit elements */
  12919                 switch (opcode) {
  12920                 case 0x4: /* CLS */
  12921                     if (u) {
  12922                         tcg_gen_clzi_i32(tcg_res, tcg_op, 32);
  12923                     } else {
  12924                         tcg_gen_clrsb_i32(tcg_res, tcg_op);
  12925                     }
  12926                     break;
  12927                 case 0x7: /* SQABS, SQNEG */
  12928                     if (u) {
  12929                         gen_helper_neon_qneg_s32(tcg_res, cpu_env, tcg_op);
  12930                     } else {
  12931                         gen_helper_neon_qabs_s32(tcg_res, cpu_env, tcg_op);
  12932                     }
  12933                     break;
  12934                 case 0x2f: /* FABS */
  12935                     gen_helper_vfp_abss(tcg_res, tcg_op);
  12936                     break;
  12937                 case 0x6f: /* FNEG */
  12938                     gen_helper_vfp_negs(tcg_res, tcg_op);
  12939                     break;
  12940                 case 0x7f: /* FSQRT */
  12941                     gen_helper_vfp_sqrts(tcg_res, tcg_op, cpu_env);
  12942                     break;
  12943                 case 0x1a: /* FCVTNS */
  12944                 case 0x1b: /* FCVTMS */
  12945                 case 0x1c: /* FCVTAS */
  12946                 case 0x3a: /* FCVTPS */
  12947                 case 0x3b: /* FCVTZS */
  12948                     gen_helper_vfp_tosls(tcg_res, tcg_op,
  12949                                          tcg_constant_i32(0), tcg_fpstatus);
  12950                     break;
  12951                 case 0x5a: /* FCVTNU */
  12952                 case 0x5b: /* FCVTMU */
  12953                 case 0x5c: /* FCVTAU */
  12954                 case 0x7a: /* FCVTPU */
  12955                 case 0x7b: /* FCVTZU */
  12956                     gen_helper_vfp_touls(tcg_res, tcg_op,
  12957                                          tcg_constant_i32(0), tcg_fpstatus);
  12958                     break;
  12959                 case 0x18: /* FRINTN */
  12960                 case 0x19: /* FRINTM */
  12961                 case 0x38: /* FRINTP */
  12962                 case 0x39: /* FRINTZ */
  12963                 case 0x58: /* FRINTA */
  12964                 case 0x79: /* FRINTI */
  12965                     gen_helper_rints(tcg_res, tcg_op, tcg_fpstatus);
  12966                     break;
  12967                 case 0x59: /* FRINTX */
  12968                     gen_helper_rints_exact(tcg_res, tcg_op, tcg_fpstatus);
  12969                     break;
  12970                 case 0x7c: /* URSQRTE */
  12971                     gen_helper_rsqrte_u32(tcg_res, tcg_op);
  12972                     break;
  12973                 case 0x1e: /* FRINT32Z */
  12974                 case 0x5e: /* FRINT32X */
  12975                     gen_helper_frint32_s(tcg_res, tcg_op, tcg_fpstatus);
  12976                     break;
  12977                 case 0x1f: /* FRINT64Z */
  12978                 case 0x5f: /* FRINT64X */
  12979                     gen_helper_frint64_s(tcg_res, tcg_op, tcg_fpstatus);
  12980                     break;
  12981                 default:
  12982                     g_assert_not_reached();
  12983                 }
  12984             } else {
  12985                 /* Use helpers for 8 and 16 bit elements */
  12986                 switch (opcode) {
  12987                 case 0x5: /* CNT, RBIT */
  12988                     /* For these two insns size is part of the opcode specifier
  12989                      * (handled earlier); they always operate on byte elements.
  12990                      */
  12991                     if (u) {
  12992                         gen_helper_neon_rbit_u8(tcg_res, tcg_op);
  12993                     } else {
  12994                         gen_helper_neon_cnt_u8(tcg_res, tcg_op);
  12995                     }
  12996                     break;
  12997                 case 0x7: /* SQABS, SQNEG */
  12998                 {
  12999                     NeonGenOneOpEnvFn *genfn;
  13000                     static NeonGenOneOpEnvFn * const fns[2][2] = {
  13001                         { gen_helper_neon_qabs_s8, gen_helper_neon_qneg_s8 },
  13002                         { gen_helper_neon_qabs_s16, gen_helper_neon_qneg_s16 },
  13003                     };
  13004                     genfn = fns[size][u];
  13005                     genfn(tcg_res, cpu_env, tcg_op);
  13006                     break;
  13007                 }
  13008                 case 0x4: /* CLS, CLZ */
  13009                     if (u) {
  13010                         if (size == 0) {
  13011                             gen_helper_neon_clz_u8(tcg_res, tcg_op);
  13012                         } else {
  13013                             gen_helper_neon_clz_u16(tcg_res, tcg_op);
  13014                         }
  13015                     } else {
  13016                         if (size == 0) {
  13017                             gen_helper_neon_cls_s8(tcg_res, tcg_op);
  13018                         } else {
  13019                             gen_helper_neon_cls_s16(tcg_res, tcg_op);
  13020                         }
  13021                     }
  13022                     break;
  13023                 default:
  13024                     g_assert_not_reached();
  13025                 }
  13026             }
  13027 
  13028             write_vec_element_i32(s, tcg_res, rd, pass, MO_32);
  13029 
  13030             tcg_temp_free_i32(tcg_res);
  13031             tcg_temp_free_i32(tcg_op);
  13032         }
  13033     }
  13034     clear_vec_high(s, is_q, rd);
  13035 
  13036     if (need_rmode) {
  13037         gen_helper_set_rmode(tcg_rmode, tcg_rmode, tcg_fpstatus);
  13038         tcg_temp_free_i32(tcg_rmode);
  13039     }
  13040     if (need_fpstatus) {
  13041         tcg_temp_free_ptr(tcg_fpstatus);
  13042     }
  13043 }
  13044 
  13045 /* AdvSIMD [scalar] two register miscellaneous (FP16)
  13046  *
  13047  *   31  30  29 28  27     24  23 22 21       17 16    12 11 10 9    5 4    0
  13048  * +---+---+---+---+---------+---+-------------+--------+-----+------+------+
  13049  * | 0 | Q | U | S | 1 1 1 0 | a | 1 1 1 1 0 0 | opcode | 1 0 |  Rn  |  Rd  |
  13050  * +---+---+---+---+---------+---+-------------+--------+-----+------+------+
  13051  *   mask: 1000 1111 0111 1110 0000 1100 0000 0000 0x8f7e 0c00
  13052  *   val:  0000 1110 0111 1000 0000 1000 0000 0000 0x0e78 0800
  13053  *
  13054  * This actually covers two groups where scalar access is governed by
  13055  * bit 28. A bunch of the instructions (float to integral) only exist
  13056  * in the vector form and are un-allocated for the scalar decode. Also
  13057  * in the scalar decode Q is always 1.
  13058  */
  13059 static void disas_simd_two_reg_misc_fp16(DisasContext *s, uint32_t insn)
  13060 {
  13061     int fpop, opcode, a, u;
  13062     int rn, rd;
  13063     bool is_q;
  13064     bool is_scalar;
  13065     bool only_in_vector = false;
  13066 
  13067     int pass;
  13068     TCGv_i32 tcg_rmode = NULL;
  13069     TCGv_ptr tcg_fpstatus = NULL;
  13070     bool need_rmode = false;
  13071     bool need_fpst = true;
  13072     int rmode;
  13073 
  13074     if (!dc_isar_feature(aa64_fp16, s)) {
  13075         unallocated_encoding(s);
  13076         return;
  13077     }
  13078 
  13079     rd = extract32(insn, 0, 5);
  13080     rn = extract32(insn, 5, 5);
  13081 
  13082     a = extract32(insn, 23, 1);
  13083     u = extract32(insn, 29, 1);
  13084     is_scalar = extract32(insn, 28, 1);
  13085     is_q = extract32(insn, 30, 1);
  13086 
  13087     opcode = extract32(insn, 12, 5);
  13088     fpop = deposit32(opcode, 5, 1, a);
  13089     fpop = deposit32(fpop, 6, 1, u);
  13090 
  13091     switch (fpop) {
  13092     case 0x1d: /* SCVTF */
  13093     case 0x5d: /* UCVTF */
  13094     {
  13095         int elements;
  13096 
  13097         if (is_scalar) {
  13098             elements = 1;
  13099         } else {
  13100             elements = (is_q ? 8 : 4);
  13101         }
  13102 
  13103         if (!fp_access_check(s)) {
  13104             return;
  13105         }
  13106         handle_simd_intfp_conv(s, rd, rn, elements, !u, 0, MO_16);
  13107         return;
  13108     }
  13109     break;
  13110     case 0x2c: /* FCMGT (zero) */
  13111     case 0x2d: /* FCMEQ (zero) */
  13112     case 0x2e: /* FCMLT (zero) */
  13113     case 0x6c: /* FCMGE (zero) */
  13114     case 0x6d: /* FCMLE (zero) */
  13115         handle_2misc_fcmp_zero(s, fpop, is_scalar, 0, is_q, MO_16, rn, rd);
  13116         return;
  13117     case 0x3d: /* FRECPE */
  13118     case 0x3f: /* FRECPX */
  13119         break;
  13120     case 0x18: /* FRINTN */
  13121         need_rmode = true;
  13122         only_in_vector = true;
  13123         rmode = FPROUNDING_TIEEVEN;
  13124         break;
  13125     case 0x19: /* FRINTM */
  13126         need_rmode = true;
  13127         only_in_vector = true;
  13128         rmode = FPROUNDING_NEGINF;
  13129         break;
  13130     case 0x38: /* FRINTP */
  13131         need_rmode = true;
  13132         only_in_vector = true;
  13133         rmode = FPROUNDING_POSINF;
  13134         break;
  13135     case 0x39: /* FRINTZ */
  13136         need_rmode = true;
  13137         only_in_vector = true;
  13138         rmode = FPROUNDING_ZERO;
  13139         break;
  13140     case 0x58: /* FRINTA */
  13141         need_rmode = true;
  13142         only_in_vector = true;
  13143         rmode = FPROUNDING_TIEAWAY;
  13144         break;
  13145     case 0x59: /* FRINTX */
  13146     case 0x79: /* FRINTI */
  13147         only_in_vector = true;
  13148         /* current rounding mode */
  13149         break;
  13150     case 0x1a: /* FCVTNS */
  13151         need_rmode = true;
  13152         rmode = FPROUNDING_TIEEVEN;
  13153         break;
  13154     case 0x1b: /* FCVTMS */
  13155         need_rmode = true;
  13156         rmode = FPROUNDING_NEGINF;
  13157         break;
  13158     case 0x1c: /* FCVTAS */
  13159         need_rmode = true;
  13160         rmode = FPROUNDING_TIEAWAY;
  13161         break;
  13162     case 0x3a: /* FCVTPS */
  13163         need_rmode = true;
  13164         rmode = FPROUNDING_POSINF;
  13165         break;
  13166     case 0x3b: /* FCVTZS */
  13167         need_rmode = true;
  13168         rmode = FPROUNDING_ZERO;
  13169         break;
  13170     case 0x5a: /* FCVTNU */
  13171         need_rmode = true;
  13172         rmode = FPROUNDING_TIEEVEN;
  13173         break;
  13174     case 0x5b: /* FCVTMU */
  13175         need_rmode = true;
  13176         rmode = FPROUNDING_NEGINF;
  13177         break;
  13178     case 0x5c: /* FCVTAU */
  13179         need_rmode = true;
  13180         rmode = FPROUNDING_TIEAWAY;
  13181         break;
  13182     case 0x7a: /* FCVTPU */
  13183         need_rmode = true;
  13184         rmode = FPROUNDING_POSINF;
  13185         break;
  13186     case 0x7b: /* FCVTZU */
  13187         need_rmode = true;
  13188         rmode = FPROUNDING_ZERO;
  13189         break;
  13190     case 0x2f: /* FABS */
  13191     case 0x6f: /* FNEG */
  13192         need_fpst = false;
  13193         break;
  13194     case 0x7d: /* FRSQRTE */
  13195     case 0x7f: /* FSQRT (vector) */
  13196         break;
  13197     default:
  13198         unallocated_encoding(s);
  13199         return;
  13200     }
  13201 
  13202 
  13203     /* Check additional constraints for the scalar encoding */
  13204     if (is_scalar) {
  13205         if (!is_q) {
  13206             unallocated_encoding(s);
  13207             return;
  13208         }
  13209         /* FRINTxx is only in the vector form */
  13210         if (only_in_vector) {
  13211             unallocated_encoding(s);
  13212             return;
  13213         }
  13214     }
  13215 
  13216     if (!fp_access_check(s)) {
  13217         return;
  13218     }
  13219 
  13220     if (need_rmode || need_fpst) {
  13221         tcg_fpstatus = fpstatus_ptr(FPST_FPCR_F16);
  13222     }
  13223 
  13224     if (need_rmode) {
  13225         tcg_rmode = tcg_const_i32(arm_rmode_to_sf(rmode));
  13226         gen_helper_set_rmode(tcg_rmode, tcg_rmode, tcg_fpstatus);
  13227     }
  13228 
  13229     if (is_scalar) {
  13230         TCGv_i32 tcg_op = read_fp_hreg(s, rn);
  13231         TCGv_i32 tcg_res = tcg_temp_new_i32();
  13232 
  13233         switch (fpop) {
  13234         case 0x1a: /* FCVTNS */
  13235         case 0x1b: /* FCVTMS */
  13236         case 0x1c: /* FCVTAS */
  13237         case 0x3a: /* FCVTPS */
  13238         case 0x3b: /* FCVTZS */
  13239             gen_helper_advsimd_f16tosinth(tcg_res, tcg_op, tcg_fpstatus);
  13240             break;
  13241         case 0x3d: /* FRECPE */
  13242             gen_helper_recpe_f16(tcg_res, tcg_op, tcg_fpstatus);
  13243             break;
  13244         case 0x3f: /* FRECPX */
  13245             gen_helper_frecpx_f16(tcg_res, tcg_op, tcg_fpstatus);
  13246             break;
  13247         case 0x5a: /* FCVTNU */
  13248         case 0x5b: /* FCVTMU */
  13249         case 0x5c: /* FCVTAU */
  13250         case 0x7a: /* FCVTPU */
  13251         case 0x7b: /* FCVTZU */
  13252             gen_helper_advsimd_f16touinth(tcg_res, tcg_op, tcg_fpstatus);
  13253             break;
  13254         case 0x6f: /* FNEG */
  13255             tcg_gen_xori_i32(tcg_res, tcg_op, 0x8000);
  13256             break;
  13257         case 0x7d: /* FRSQRTE */
  13258             gen_helper_rsqrte_f16(tcg_res, tcg_op, tcg_fpstatus);
  13259             break;
  13260         default:
  13261             g_assert_not_reached();
  13262         }
  13263 
  13264         /* limit any sign extension going on */
  13265         tcg_gen_andi_i32(tcg_res, tcg_res, 0xffff);
  13266         write_fp_sreg(s, rd, tcg_res);
  13267 
  13268         tcg_temp_free_i32(tcg_res);
  13269         tcg_temp_free_i32(tcg_op);
  13270     } else {
  13271         for (pass = 0; pass < (is_q ? 8 : 4); pass++) {
  13272             TCGv_i32 tcg_op = tcg_temp_new_i32();
  13273             TCGv_i32 tcg_res = tcg_temp_new_i32();
  13274 
  13275             read_vec_element_i32(s, tcg_op, rn, pass, MO_16);
  13276 
  13277             switch (fpop) {
  13278             case 0x1a: /* FCVTNS */
  13279             case 0x1b: /* FCVTMS */
  13280             case 0x1c: /* FCVTAS */
  13281             case 0x3a: /* FCVTPS */
  13282             case 0x3b: /* FCVTZS */
  13283                 gen_helper_advsimd_f16tosinth(tcg_res, tcg_op, tcg_fpstatus);
  13284                 break;
  13285             case 0x3d: /* FRECPE */
  13286                 gen_helper_recpe_f16(tcg_res, tcg_op, tcg_fpstatus);
  13287                 break;
  13288             case 0x5a: /* FCVTNU */
  13289             case 0x5b: /* FCVTMU */
  13290             case 0x5c: /* FCVTAU */
  13291             case 0x7a: /* FCVTPU */
  13292             case 0x7b: /* FCVTZU */
  13293                 gen_helper_advsimd_f16touinth(tcg_res, tcg_op, tcg_fpstatus);
  13294                 break;
  13295             case 0x18: /* FRINTN */
  13296             case 0x19: /* FRINTM */
  13297             case 0x38: /* FRINTP */
  13298             case 0x39: /* FRINTZ */
  13299             case 0x58: /* FRINTA */
  13300             case 0x79: /* FRINTI */
  13301                 gen_helper_advsimd_rinth(tcg_res, tcg_op, tcg_fpstatus);
  13302                 break;
  13303             case 0x59: /* FRINTX */
  13304                 gen_helper_advsimd_rinth_exact(tcg_res, tcg_op, tcg_fpstatus);
  13305                 break;
  13306             case 0x2f: /* FABS */
  13307                 tcg_gen_andi_i32(tcg_res, tcg_op, 0x7fff);
  13308                 break;
  13309             case 0x6f: /* FNEG */
  13310                 tcg_gen_xori_i32(tcg_res, tcg_op, 0x8000);
  13311                 break;
  13312             case 0x7d: /* FRSQRTE */
  13313                 gen_helper_rsqrte_f16(tcg_res, tcg_op, tcg_fpstatus);
  13314                 break;
  13315             case 0x7f: /* FSQRT */
  13316                 gen_helper_sqrt_f16(tcg_res, tcg_op, tcg_fpstatus);
  13317                 break;
  13318             default:
  13319                 g_assert_not_reached();
  13320             }
  13321 
  13322             write_vec_element_i32(s, tcg_res, rd, pass, MO_16);
  13323 
  13324             tcg_temp_free_i32(tcg_res);
  13325             tcg_temp_free_i32(tcg_op);
  13326         }
  13327 
  13328         clear_vec_high(s, is_q, rd);
  13329     }
  13330 
  13331     if (tcg_rmode) {
  13332         gen_helper_set_rmode(tcg_rmode, tcg_rmode, tcg_fpstatus);
  13333         tcg_temp_free_i32(tcg_rmode);
  13334     }
  13335 
  13336     if (tcg_fpstatus) {
  13337         tcg_temp_free_ptr(tcg_fpstatus);
  13338     }
  13339 }
  13340 
  13341 /* AdvSIMD scalar x indexed element
  13342  *  31 30  29 28       24 23  22 21  20  19  16 15 12  11  10 9    5 4    0
  13343  * +-----+---+-----------+------+---+---+------+-----+---+---+------+------+
  13344  * | 0 1 | U | 1 1 1 1 1 | size | L | M |  Rm  | opc | H | 0 |  Rn  |  Rd  |
  13345  * +-----+---+-----------+------+---+---+------+-----+---+---+------+------+
  13346  * AdvSIMD vector x indexed element
  13347  *   31  30  29 28       24 23  22 21  20  19  16 15 12  11  10 9    5 4    0
  13348  * +---+---+---+-----------+------+---+---+------+-----+---+---+------+------+
  13349  * | 0 | Q | U | 0 1 1 1 1 | size | L | M |  Rm  | opc | H | 0 |  Rn  |  Rd  |
  13350  * +---+---+---+-----------+------+---+---+------+-----+---+---+------+------+
  13351  */
  13352 static void disas_simd_indexed(DisasContext *s, uint32_t insn)
  13353 {
  13354     /* This encoding has two kinds of instruction:
  13355      *  normal, where we perform elt x idxelt => elt for each
  13356      *     element in the vector
  13357      *  long, where we perform elt x idxelt and generate a result of
  13358      *     double the width of the input element
  13359      * The long ops have a 'part' specifier (ie come in INSN, INSN2 pairs).
  13360      */
  13361     bool is_scalar = extract32(insn, 28, 1);
  13362     bool is_q = extract32(insn, 30, 1);
  13363     bool u = extract32(insn, 29, 1);
  13364     int size = extract32(insn, 22, 2);
  13365     int l = extract32(insn, 21, 1);
  13366     int m = extract32(insn, 20, 1);
  13367     /* Note that the Rm field here is only 4 bits, not 5 as it usually is */
  13368     int rm = extract32(insn, 16, 4);
  13369     int opcode = extract32(insn, 12, 4);
  13370     int h = extract32(insn, 11, 1);
  13371     int rn = extract32(insn, 5, 5);
  13372     int rd = extract32(insn, 0, 5);
  13373     bool is_long = false;
  13374     int is_fp = 0;
  13375     bool is_fp16 = false;
  13376     int index;
  13377     TCGv_ptr fpst;
  13378 
  13379     switch (16 * u + opcode) {
  13380     case 0x08: /* MUL */
  13381     case 0x10: /* MLA */
  13382     case 0x14: /* MLS */
  13383         if (is_scalar) {
  13384             unallocated_encoding(s);
  13385             return;
  13386         }
  13387         break;
  13388     case 0x02: /* SMLAL, SMLAL2 */
  13389     case 0x12: /* UMLAL, UMLAL2 */
  13390     case 0x06: /* SMLSL, SMLSL2 */
  13391     case 0x16: /* UMLSL, UMLSL2 */
  13392     case 0x0a: /* SMULL, SMULL2 */
  13393     case 0x1a: /* UMULL, UMULL2 */
  13394         if (is_scalar) {
  13395             unallocated_encoding(s);
  13396             return;
  13397         }
  13398         is_long = true;
  13399         break;
  13400     case 0x03: /* SQDMLAL, SQDMLAL2 */
  13401     case 0x07: /* SQDMLSL, SQDMLSL2 */
  13402     case 0x0b: /* SQDMULL, SQDMULL2 */
  13403         is_long = true;
  13404         break;
  13405     case 0x0c: /* SQDMULH */
  13406     case 0x0d: /* SQRDMULH */
  13407         break;
  13408     case 0x01: /* FMLA */
  13409     case 0x05: /* FMLS */
  13410     case 0x09: /* FMUL */
  13411     case 0x19: /* FMULX */
  13412         is_fp = 1;
  13413         break;
  13414     case 0x1d: /* SQRDMLAH */
  13415     case 0x1f: /* SQRDMLSH */
  13416         if (!dc_isar_feature(aa64_rdm, s)) {
  13417             unallocated_encoding(s);
  13418             return;
  13419         }
  13420         break;
  13421     case 0x0e: /* SDOT */
  13422     case 0x1e: /* UDOT */
  13423         if (is_scalar || size != MO_32 || !dc_isar_feature(aa64_dp, s)) {
  13424             unallocated_encoding(s);
  13425             return;
  13426         }
  13427         break;
  13428     case 0x0f:
  13429         switch (size) {
  13430         case 0: /* SUDOT */
  13431         case 2: /* USDOT */
  13432             if (is_scalar || !dc_isar_feature(aa64_i8mm, s)) {
  13433                 unallocated_encoding(s);
  13434                 return;
  13435             }
  13436             size = MO_32;
  13437             break;
  13438         case 1: /* BFDOT */
  13439             if (is_scalar || !dc_isar_feature(aa64_bf16, s)) {
  13440                 unallocated_encoding(s);
  13441                 return;
  13442             }
  13443             size = MO_32;
  13444             break;
  13445         case 3: /* BFMLAL{B,T} */
  13446             if (is_scalar || !dc_isar_feature(aa64_bf16, s)) {
  13447                 unallocated_encoding(s);
  13448                 return;
  13449             }
  13450             /* can't set is_fp without other incorrect size checks */
  13451             size = MO_16;
  13452             break;
  13453         default:
  13454             unallocated_encoding(s);
  13455             return;
  13456         }
  13457         break;
  13458     case 0x11: /* FCMLA #0 */
  13459     case 0x13: /* FCMLA #90 */
  13460     case 0x15: /* FCMLA #180 */
  13461     case 0x17: /* FCMLA #270 */
  13462         if (is_scalar || !dc_isar_feature(aa64_fcma, s)) {
  13463             unallocated_encoding(s);
  13464             return;
  13465         }
  13466         is_fp = 2;
  13467         break;
  13468     case 0x00: /* FMLAL */
  13469     case 0x04: /* FMLSL */
  13470     case 0x18: /* FMLAL2 */
  13471     case 0x1c: /* FMLSL2 */
  13472         if (is_scalar || size != MO_32 || !dc_isar_feature(aa64_fhm, s)) {
  13473             unallocated_encoding(s);
  13474             return;
  13475         }
  13476         size = MO_16;
  13477         /* is_fp, but we pass cpu_env not fp_status.  */
  13478         break;
  13479     default:
  13480         unallocated_encoding(s);
  13481         return;
  13482     }
  13483 
  13484     switch (is_fp) {
  13485     case 1: /* normal fp */
  13486         /* convert insn encoded size to MemOp size */
  13487         switch (size) {
  13488         case 0: /* half-precision */
  13489             size = MO_16;
  13490             is_fp16 = true;
  13491             break;
  13492         case MO_32: /* single precision */
  13493         case MO_64: /* double precision */
  13494             break;
  13495         default:
  13496             unallocated_encoding(s);
  13497             return;
  13498         }
  13499         break;
  13500 
  13501     case 2: /* complex fp */
  13502         /* Each indexable element is a complex pair.  */
  13503         size += 1;
  13504         switch (size) {
  13505         case MO_32:
  13506             if (h && !is_q) {
  13507                 unallocated_encoding(s);
  13508                 return;
  13509             }
  13510             is_fp16 = true;
  13511             break;
  13512         case MO_64:
  13513             break;
  13514         default:
  13515             unallocated_encoding(s);
  13516             return;
  13517         }
  13518         break;
  13519 
  13520     default: /* integer */
  13521         switch (size) {
  13522         case MO_8:
  13523         case MO_64:
  13524             unallocated_encoding(s);
  13525             return;
  13526         }
  13527         break;
  13528     }
  13529     if (is_fp16 && !dc_isar_feature(aa64_fp16, s)) {
  13530         unallocated_encoding(s);
  13531         return;
  13532     }
  13533 
  13534     /* Given MemOp size, adjust register and indexing.  */
  13535     switch (size) {
  13536     case MO_16:
  13537         index = h << 2 | l << 1 | m;
  13538         break;
  13539     case MO_32:
  13540         index = h << 1 | l;
  13541         rm |= m << 4;
  13542         break;
  13543     case MO_64:
  13544         if (l || !is_q) {
  13545             unallocated_encoding(s);
  13546             return;
  13547         }
  13548         index = h;
  13549         rm |= m << 4;
  13550         break;
  13551     default:
  13552         g_assert_not_reached();
  13553     }
  13554 
  13555     if (!fp_access_check(s)) {
  13556         return;
  13557     }
  13558 
  13559     if (is_fp) {
  13560         fpst = fpstatus_ptr(is_fp16 ? FPST_FPCR_F16 : FPST_FPCR);
  13561     } else {
  13562         fpst = NULL;
  13563     }
  13564 
  13565     switch (16 * u + opcode) {
  13566     case 0x0e: /* SDOT */
  13567     case 0x1e: /* UDOT */
  13568         gen_gvec_op4_ool(s, is_q, rd, rn, rm, rd, index,
  13569                          u ? gen_helper_gvec_udot_idx_b
  13570                          : gen_helper_gvec_sdot_idx_b);
  13571         return;
  13572     case 0x0f:
  13573         switch (extract32(insn, 22, 2)) {
  13574         case 0: /* SUDOT */
  13575             gen_gvec_op4_ool(s, is_q, rd, rn, rm, rd, index,
  13576                              gen_helper_gvec_sudot_idx_b);
  13577             return;
  13578         case 1: /* BFDOT */
  13579             gen_gvec_op4_ool(s, is_q, rd, rn, rm, rd, index,
  13580                              gen_helper_gvec_bfdot_idx);
  13581             return;
  13582         case 2: /* USDOT */
  13583             gen_gvec_op4_ool(s, is_q, rd, rn, rm, rd, index,
  13584                              gen_helper_gvec_usdot_idx_b);
  13585             return;
  13586         case 3: /* BFMLAL{B,T} */
  13587             gen_gvec_op4_fpst(s, 1, rd, rn, rm, rd, 0, (index << 1) | is_q,
  13588                               gen_helper_gvec_bfmlal_idx);
  13589             return;
  13590         }
  13591         g_assert_not_reached();
  13592     case 0x11: /* FCMLA #0 */
  13593     case 0x13: /* FCMLA #90 */
  13594     case 0x15: /* FCMLA #180 */
  13595     case 0x17: /* FCMLA #270 */
  13596         {
  13597             int rot = extract32(insn, 13, 2);
  13598             int data = (index << 2) | rot;
  13599             tcg_gen_gvec_4_ptr(vec_full_reg_offset(s, rd),
  13600                                vec_full_reg_offset(s, rn),
  13601                                vec_full_reg_offset(s, rm),
  13602                                vec_full_reg_offset(s, rd), fpst,
  13603                                is_q ? 16 : 8, vec_full_reg_size(s), data,
  13604                                size == MO_64
  13605                                ? gen_helper_gvec_fcmlas_idx
  13606                                : gen_helper_gvec_fcmlah_idx);
  13607             tcg_temp_free_ptr(fpst);
  13608         }
  13609         return;
  13610 
  13611     case 0x00: /* FMLAL */
  13612     case 0x04: /* FMLSL */
  13613     case 0x18: /* FMLAL2 */
  13614     case 0x1c: /* FMLSL2 */
  13615         {
  13616             int is_s = extract32(opcode, 2, 1);
  13617             int is_2 = u;
  13618             int data = (index << 2) | (is_2 << 1) | is_s;
  13619             tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, rd),
  13620                                vec_full_reg_offset(s, rn),
  13621                                vec_full_reg_offset(s, rm), cpu_env,
  13622                                is_q ? 16 : 8, vec_full_reg_size(s),
  13623                                data, gen_helper_gvec_fmlal_idx_a64);
  13624         }
  13625         return;
  13626 
  13627     case 0x08: /* MUL */
  13628         if (!is_long && !is_scalar) {
  13629             static gen_helper_gvec_3 * const fns[3] = {
  13630                 gen_helper_gvec_mul_idx_h,
  13631                 gen_helper_gvec_mul_idx_s,
  13632                 gen_helper_gvec_mul_idx_d,
  13633             };
  13634             tcg_gen_gvec_3_ool(vec_full_reg_offset(s, rd),
  13635                                vec_full_reg_offset(s, rn),
  13636                                vec_full_reg_offset(s, rm),
  13637                                is_q ? 16 : 8, vec_full_reg_size(s),
  13638                                index, fns[size - 1]);
  13639             return;
  13640         }
  13641         break;
  13642 
  13643     case 0x10: /* MLA */
  13644         if (!is_long && !is_scalar) {
  13645             static gen_helper_gvec_4 * const fns[3] = {
  13646                 gen_helper_gvec_mla_idx_h,
  13647                 gen_helper_gvec_mla_idx_s,
  13648                 gen_helper_gvec_mla_idx_d,
  13649             };
  13650             tcg_gen_gvec_4_ool(vec_full_reg_offset(s, rd),
  13651                                vec_full_reg_offset(s, rn),
  13652                                vec_full_reg_offset(s, rm),
  13653                                vec_full_reg_offset(s, rd),
  13654                                is_q ? 16 : 8, vec_full_reg_size(s),
  13655                                index, fns[size - 1]);
  13656             return;
  13657         }
  13658         break;
  13659 
  13660     case 0x14: /* MLS */
  13661         if (!is_long && !is_scalar) {
  13662             static gen_helper_gvec_4 * const fns[3] = {
  13663                 gen_helper_gvec_mls_idx_h,
  13664                 gen_helper_gvec_mls_idx_s,
  13665                 gen_helper_gvec_mls_idx_d,
  13666             };
  13667             tcg_gen_gvec_4_ool(vec_full_reg_offset(s, rd),
  13668                                vec_full_reg_offset(s, rn),
  13669                                vec_full_reg_offset(s, rm),
  13670                                vec_full_reg_offset(s, rd),
  13671                                is_q ? 16 : 8, vec_full_reg_size(s),
  13672                                index, fns[size - 1]);
  13673             return;
  13674         }
  13675         break;
  13676     }
  13677 
  13678     if (size == 3) {
  13679         TCGv_i64 tcg_idx = tcg_temp_new_i64();
  13680         int pass;
  13681 
  13682         assert(is_fp && is_q && !is_long);
  13683 
  13684         read_vec_element(s, tcg_idx, rm, index, MO_64);
  13685 
  13686         for (pass = 0; pass < (is_scalar ? 1 : 2); pass++) {
  13687             TCGv_i64 tcg_op = tcg_temp_new_i64();
  13688             TCGv_i64 tcg_res = tcg_temp_new_i64();
  13689 
  13690             read_vec_element(s, tcg_op, rn, pass, MO_64);
  13691 
  13692             switch (16 * u + opcode) {
  13693             case 0x05: /* FMLS */
  13694                 /* As usual for ARM, separate negation for fused multiply-add */
  13695                 gen_helper_vfp_negd(tcg_op, tcg_op);
  13696                 /* fall through */
  13697             case 0x01: /* FMLA */
  13698                 read_vec_element(s, tcg_res, rd, pass, MO_64);
  13699                 gen_helper_vfp_muladdd(tcg_res, tcg_op, tcg_idx, tcg_res, fpst);
  13700                 break;
  13701             case 0x09: /* FMUL */
  13702                 gen_helper_vfp_muld(tcg_res, tcg_op, tcg_idx, fpst);
  13703                 break;
  13704             case 0x19: /* FMULX */
  13705                 gen_helper_vfp_mulxd(tcg_res, tcg_op, tcg_idx, fpst);
  13706                 break;
  13707             default:
  13708                 g_assert_not_reached();
  13709             }
  13710 
  13711             write_vec_element(s, tcg_res, rd, pass, MO_64);
  13712             tcg_temp_free_i64(tcg_op);
  13713             tcg_temp_free_i64(tcg_res);
  13714         }
  13715 
  13716         tcg_temp_free_i64(tcg_idx);
  13717         clear_vec_high(s, !is_scalar, rd);
  13718     } else if (!is_long) {
  13719         /* 32 bit floating point, or 16 or 32 bit integer.
  13720          * For the 16 bit scalar case we use the usual Neon helpers and
  13721          * rely on the fact that 0 op 0 == 0 with no side effects.
  13722          */
  13723         TCGv_i32 tcg_idx = tcg_temp_new_i32();
  13724         int pass, maxpasses;
  13725 
  13726         if (is_scalar) {
  13727             maxpasses = 1;
  13728         } else {
  13729             maxpasses = is_q ? 4 : 2;
  13730         }
  13731 
  13732         read_vec_element_i32(s, tcg_idx, rm, index, size);
  13733 
  13734         if (size == 1 && !is_scalar) {
  13735             /* The simplest way to handle the 16x16 indexed ops is to duplicate
  13736              * the index into both halves of the 32 bit tcg_idx and then use
  13737              * the usual Neon helpers.
  13738              */
  13739             tcg_gen_deposit_i32(tcg_idx, tcg_idx, tcg_idx, 16, 16);
  13740         }
  13741 
  13742         for (pass = 0; pass < maxpasses; pass++) {
  13743             TCGv_i32 tcg_op = tcg_temp_new_i32();
  13744             TCGv_i32 tcg_res = tcg_temp_new_i32();
  13745 
  13746             read_vec_element_i32(s, tcg_op, rn, pass, is_scalar ? size : MO_32);
  13747 
  13748             switch (16 * u + opcode) {
  13749             case 0x08: /* MUL */
  13750             case 0x10: /* MLA */
  13751             case 0x14: /* MLS */
  13752             {
  13753                 static NeonGenTwoOpFn * const fns[2][2] = {
  13754                     { gen_helper_neon_add_u16, gen_helper_neon_sub_u16 },
  13755                     { tcg_gen_add_i32, tcg_gen_sub_i32 },
  13756                 };
  13757                 NeonGenTwoOpFn *genfn;
  13758                 bool is_sub = opcode == 0x4;
  13759 
  13760                 if (size == 1) {
  13761                     gen_helper_neon_mul_u16(tcg_res, tcg_op, tcg_idx);
  13762                 } else {
  13763                     tcg_gen_mul_i32(tcg_res, tcg_op, tcg_idx);
  13764                 }
  13765                 if (opcode == 0x8) {
  13766                     break;
  13767                 }
  13768                 read_vec_element_i32(s, tcg_op, rd, pass, MO_32);
  13769                 genfn = fns[size - 1][is_sub];
  13770                 genfn(tcg_res, tcg_op, tcg_res);
  13771                 break;
  13772             }
  13773             case 0x05: /* FMLS */
  13774             case 0x01: /* FMLA */
  13775                 read_vec_element_i32(s, tcg_res, rd, pass,
  13776                                      is_scalar ? size : MO_32);
  13777                 switch (size) {
  13778                 case 1:
  13779                     if (opcode == 0x5) {
  13780                         /* As usual for ARM, separate negation for fused
  13781                          * multiply-add */
  13782                         tcg_gen_xori_i32(tcg_op, tcg_op, 0x80008000);
  13783                     }
  13784                     if (is_scalar) {
  13785                         gen_helper_advsimd_muladdh(tcg_res, tcg_op, tcg_idx,
  13786                                                    tcg_res, fpst);
  13787                     } else {
  13788                         gen_helper_advsimd_muladd2h(tcg_res, tcg_op, tcg_idx,
  13789                                                     tcg_res, fpst);
  13790                     }
  13791                     break;
  13792                 case 2:
  13793                     if (opcode == 0x5) {
  13794                         /* As usual for ARM, separate negation for
  13795                          * fused multiply-add */
  13796                         tcg_gen_xori_i32(tcg_op, tcg_op, 0x80000000);
  13797                     }
  13798                     gen_helper_vfp_muladds(tcg_res, tcg_op, tcg_idx,
  13799                                            tcg_res, fpst);
  13800                     break;
  13801                 default:
  13802                     g_assert_not_reached();
  13803                 }
  13804                 break;
  13805             case 0x09: /* FMUL */
  13806                 switch (size) {
  13807                 case 1:
  13808                     if (is_scalar) {
  13809                         gen_helper_advsimd_mulh(tcg_res, tcg_op,
  13810                                                 tcg_idx, fpst);
  13811                     } else {
  13812                         gen_helper_advsimd_mul2h(tcg_res, tcg_op,
  13813                                                  tcg_idx, fpst);
  13814                     }
  13815                     break;
  13816                 case 2:
  13817                     gen_helper_vfp_muls(tcg_res, tcg_op, tcg_idx, fpst);
  13818                     break;
  13819                 default:
  13820                     g_assert_not_reached();
  13821                 }
  13822                 break;
  13823             case 0x19: /* FMULX */
  13824                 switch (size) {
  13825                 case 1:
  13826                     if (is_scalar) {
  13827                         gen_helper_advsimd_mulxh(tcg_res, tcg_op,
  13828                                                  tcg_idx, fpst);
  13829                     } else {
  13830                         gen_helper_advsimd_mulx2h(tcg_res, tcg_op,
  13831                                                   tcg_idx, fpst);
  13832                     }
  13833                     break;
  13834                 case 2:
  13835                     gen_helper_vfp_mulxs(tcg_res, tcg_op, tcg_idx, fpst);
  13836                     break;
  13837                 default:
  13838                     g_assert_not_reached();
  13839                 }
  13840                 break;
  13841             case 0x0c: /* SQDMULH */
  13842                 if (size == 1) {
  13843                     gen_helper_neon_qdmulh_s16(tcg_res, cpu_env,
  13844                                                tcg_op, tcg_idx);
  13845                 } else {
  13846                     gen_helper_neon_qdmulh_s32(tcg_res, cpu_env,
  13847                                                tcg_op, tcg_idx);
  13848                 }
  13849                 break;
  13850             case 0x0d: /* SQRDMULH */
  13851                 if (size == 1) {
  13852                     gen_helper_neon_qrdmulh_s16(tcg_res, cpu_env,
  13853                                                 tcg_op, tcg_idx);
  13854                 } else {
  13855                     gen_helper_neon_qrdmulh_s32(tcg_res, cpu_env,
  13856                                                 tcg_op, tcg_idx);
  13857                 }
  13858                 break;
  13859             case 0x1d: /* SQRDMLAH */
  13860                 read_vec_element_i32(s, tcg_res, rd, pass,
  13861                                      is_scalar ? size : MO_32);
  13862                 if (size == 1) {
  13863                     gen_helper_neon_qrdmlah_s16(tcg_res, cpu_env,
  13864                                                 tcg_op, tcg_idx, tcg_res);
  13865                 } else {
  13866                     gen_helper_neon_qrdmlah_s32(tcg_res, cpu_env,
  13867                                                 tcg_op, tcg_idx, tcg_res);
  13868                 }
  13869                 break;
  13870             case 0x1f: /* SQRDMLSH */
  13871                 read_vec_element_i32(s, tcg_res, rd, pass,
  13872                                      is_scalar ? size : MO_32);
  13873                 if (size == 1) {
  13874                     gen_helper_neon_qrdmlsh_s16(tcg_res, cpu_env,
  13875                                                 tcg_op, tcg_idx, tcg_res);
  13876                 } else {
  13877                     gen_helper_neon_qrdmlsh_s32(tcg_res, cpu_env,
  13878                                                 tcg_op, tcg_idx, tcg_res);
  13879                 }
  13880                 break;
  13881             default:
  13882                 g_assert_not_reached();
  13883             }
  13884 
  13885             if (is_scalar) {
  13886                 write_fp_sreg(s, rd, tcg_res);
  13887             } else {
  13888                 write_vec_element_i32(s, tcg_res, rd, pass, MO_32);
  13889             }
  13890 
  13891             tcg_temp_free_i32(tcg_op);
  13892             tcg_temp_free_i32(tcg_res);
  13893         }
  13894 
  13895         tcg_temp_free_i32(tcg_idx);
  13896         clear_vec_high(s, is_q, rd);
  13897     } else {
  13898         /* long ops: 16x16->32 or 32x32->64 */
  13899         TCGv_i64 tcg_res[2];
  13900         int pass;
  13901         bool satop = extract32(opcode, 0, 1);
  13902         MemOp memop = MO_32;
  13903 
  13904         if (satop || !u) {
  13905             memop |= MO_SIGN;
  13906         }
  13907 
  13908         if (size == 2) {
  13909             TCGv_i64 tcg_idx = tcg_temp_new_i64();
  13910 
  13911             read_vec_element(s, tcg_idx, rm, index, memop);
  13912 
  13913             for (pass = 0; pass < (is_scalar ? 1 : 2); pass++) {
  13914                 TCGv_i64 tcg_op = tcg_temp_new_i64();
  13915                 TCGv_i64 tcg_passres;
  13916                 int passelt;
  13917 
  13918                 if (is_scalar) {
  13919                     passelt = 0;
  13920                 } else {
  13921                     passelt = pass + (is_q * 2);
  13922                 }
  13923 
  13924                 read_vec_element(s, tcg_op, rn, passelt, memop);
  13925 
  13926                 tcg_res[pass] = tcg_temp_new_i64();
  13927 
  13928                 if (opcode == 0xa || opcode == 0xb) {
  13929                     /* Non-accumulating ops */
  13930                     tcg_passres = tcg_res[pass];
  13931                 } else {
  13932                     tcg_passres = tcg_temp_new_i64();
  13933                 }
  13934 
  13935                 tcg_gen_mul_i64(tcg_passres, tcg_op, tcg_idx);
  13936                 tcg_temp_free_i64(tcg_op);
  13937 
  13938                 if (satop) {
  13939                     /* saturating, doubling */
  13940                     gen_helper_neon_addl_saturate_s64(tcg_passres, cpu_env,
  13941                                                       tcg_passres, tcg_passres);
  13942                 }
  13943 
  13944                 if (opcode == 0xa || opcode == 0xb) {
  13945                     continue;
  13946                 }
  13947 
  13948                 /* Accumulating op: handle accumulate step */
  13949                 read_vec_element(s, tcg_res[pass], rd, pass, MO_64);
  13950 
  13951                 switch (opcode) {
  13952                 case 0x2: /* SMLAL, SMLAL2, UMLAL, UMLAL2 */
  13953                     tcg_gen_add_i64(tcg_res[pass], tcg_res[pass], tcg_passres);
  13954                     break;
  13955                 case 0x6: /* SMLSL, SMLSL2, UMLSL, UMLSL2 */
  13956                     tcg_gen_sub_i64(tcg_res[pass], tcg_res[pass], tcg_passres);
  13957                     break;
  13958                 case 0x7: /* SQDMLSL, SQDMLSL2 */
  13959                     tcg_gen_neg_i64(tcg_passres, tcg_passres);
  13960                     /* fall through */
  13961                 case 0x3: /* SQDMLAL, SQDMLAL2 */
  13962                     gen_helper_neon_addl_saturate_s64(tcg_res[pass], cpu_env,
  13963                                                       tcg_res[pass],
  13964                                                       tcg_passres);
  13965                     break;
  13966                 default:
  13967                     g_assert_not_reached();
  13968                 }
  13969                 tcg_temp_free_i64(tcg_passres);
  13970             }
  13971             tcg_temp_free_i64(tcg_idx);
  13972 
  13973             clear_vec_high(s, !is_scalar, rd);
  13974         } else {
  13975             TCGv_i32 tcg_idx = tcg_temp_new_i32();
  13976 
  13977             assert(size == 1);
  13978             read_vec_element_i32(s, tcg_idx, rm, index, size);
  13979 
  13980             if (!is_scalar) {
  13981                 /* The simplest way to handle the 16x16 indexed ops is to
  13982                  * duplicate the index into both halves of the 32 bit tcg_idx
  13983                  * and then use the usual Neon helpers.
  13984                  */
  13985                 tcg_gen_deposit_i32(tcg_idx, tcg_idx, tcg_idx, 16, 16);
  13986             }
  13987 
  13988             for (pass = 0; pass < (is_scalar ? 1 : 2); pass++) {
  13989                 TCGv_i32 tcg_op = tcg_temp_new_i32();
  13990                 TCGv_i64 tcg_passres;
  13991 
  13992                 if (is_scalar) {
  13993                     read_vec_element_i32(s, tcg_op, rn, pass, size);
  13994                 } else {
  13995                     read_vec_element_i32(s, tcg_op, rn,
  13996                                          pass + (is_q * 2), MO_32);
  13997                 }
  13998 
  13999                 tcg_res[pass] = tcg_temp_new_i64();
  14000 
  14001                 if (opcode == 0xa || opcode == 0xb) {
  14002                     /* Non-accumulating ops */
  14003                     tcg_passres = tcg_res[pass];
  14004                 } else {
  14005                     tcg_passres = tcg_temp_new_i64();
  14006                 }
  14007 
  14008                 if (memop & MO_SIGN) {
  14009                     gen_helper_neon_mull_s16(tcg_passres, tcg_op, tcg_idx);
  14010                 } else {
  14011                     gen_helper_neon_mull_u16(tcg_passres, tcg_op, tcg_idx);
  14012                 }
  14013                 if (satop) {
  14014                     gen_helper_neon_addl_saturate_s32(tcg_passres, cpu_env,
  14015                                                       tcg_passres, tcg_passres);
  14016                 }
  14017                 tcg_temp_free_i32(tcg_op);
  14018 
  14019                 if (opcode == 0xa || opcode == 0xb) {
  14020                     continue;
  14021                 }
  14022 
  14023                 /* Accumulating op: handle accumulate step */
  14024                 read_vec_element(s, tcg_res[pass], rd, pass, MO_64);
  14025 
  14026                 switch (opcode) {
  14027                 case 0x2: /* SMLAL, SMLAL2, UMLAL, UMLAL2 */
  14028                     gen_helper_neon_addl_u32(tcg_res[pass], tcg_res[pass],
  14029                                              tcg_passres);
  14030                     break;
  14031                 case 0x6: /* SMLSL, SMLSL2, UMLSL, UMLSL2 */
  14032                     gen_helper_neon_subl_u32(tcg_res[pass], tcg_res[pass],
  14033                                              tcg_passres);
  14034                     break;
  14035                 case 0x7: /* SQDMLSL, SQDMLSL2 */
  14036                     gen_helper_neon_negl_u32(tcg_passres, tcg_passres);
  14037                     /* fall through */
  14038                 case 0x3: /* SQDMLAL, SQDMLAL2 */
  14039                     gen_helper_neon_addl_saturate_s32(tcg_res[pass], cpu_env,
  14040                                                       tcg_res[pass],
  14041                                                       tcg_passres);
  14042                     break;
  14043                 default:
  14044                     g_assert_not_reached();
  14045                 }
  14046                 tcg_temp_free_i64(tcg_passres);
  14047             }
  14048             tcg_temp_free_i32(tcg_idx);
  14049 
  14050             if (is_scalar) {
  14051                 tcg_gen_ext32u_i64(tcg_res[0], tcg_res[0]);
  14052             }
  14053         }
  14054 
  14055         if (is_scalar) {
  14056             tcg_res[1] = tcg_constant_i64(0);
  14057         }
  14058 
  14059         for (pass = 0; pass < 2; pass++) {
  14060             write_vec_element(s, tcg_res[pass], rd, pass, MO_64);
  14061             tcg_temp_free_i64(tcg_res[pass]);
  14062         }
  14063     }
  14064 
  14065     if (fpst) {
  14066         tcg_temp_free_ptr(fpst);
  14067     }
  14068 }
  14069 
  14070 /* Crypto AES
  14071  *  31             24 23  22 21       17 16    12 11 10 9    5 4    0
  14072  * +-----------------+------+-----------+--------+-----+------+------+
  14073  * | 0 1 0 0 1 1 1 0 | size | 1 0 1 0 0 | opcode | 1 0 |  Rn  |  Rd  |
  14074  * +-----------------+------+-----------+--------+-----+------+------+
  14075  */
  14076 static void disas_crypto_aes(DisasContext *s, uint32_t insn)
  14077 {
  14078     int size = extract32(insn, 22, 2);
  14079     int opcode = extract32(insn, 12, 5);
  14080     int rn = extract32(insn, 5, 5);
  14081     int rd = extract32(insn, 0, 5);
  14082     int decrypt;
  14083     gen_helper_gvec_2 *genfn2 = NULL;
  14084     gen_helper_gvec_3 *genfn3 = NULL;
  14085 
  14086     if (!dc_isar_feature(aa64_aes, s) || size != 0) {
  14087         unallocated_encoding(s);
  14088         return;
  14089     }
  14090 
  14091     switch (opcode) {
  14092     case 0x4: /* AESE */
  14093         decrypt = 0;
  14094         genfn3 = gen_helper_crypto_aese;
  14095         break;
  14096     case 0x6: /* AESMC */
  14097         decrypt = 0;
  14098         genfn2 = gen_helper_crypto_aesmc;
  14099         break;
  14100     case 0x5: /* AESD */
  14101         decrypt = 1;
  14102         genfn3 = gen_helper_crypto_aese;
  14103         break;
  14104     case 0x7: /* AESIMC */
  14105         decrypt = 1;
  14106         genfn2 = gen_helper_crypto_aesmc;
  14107         break;
  14108     default:
  14109         unallocated_encoding(s);
  14110         return;
  14111     }
  14112 
  14113     if (!fp_access_check(s)) {
  14114         return;
  14115     }
  14116     if (genfn2) {
  14117         gen_gvec_op2_ool(s, true, rd, rn, decrypt, genfn2);
  14118     } else {
  14119         gen_gvec_op3_ool(s, true, rd, rd, rn, decrypt, genfn3);
  14120     }
  14121 }
  14122 
  14123 /* Crypto three-reg SHA
  14124  *  31             24 23  22  21 20  16  15 14    12 11 10 9    5 4    0
  14125  * +-----------------+------+---+------+---+--------+-----+------+------+
  14126  * | 0 1 0 1 1 1 1 0 | size | 0 |  Rm  | 0 | opcode | 0 0 |  Rn  |  Rd  |
  14127  * +-----------------+------+---+------+---+--------+-----+------+------+
  14128  */
  14129 static void disas_crypto_three_reg_sha(DisasContext *s, uint32_t insn)
  14130 {
  14131     int size = extract32(insn, 22, 2);
  14132     int opcode = extract32(insn, 12, 3);
  14133     int rm = extract32(insn, 16, 5);
  14134     int rn = extract32(insn, 5, 5);
  14135     int rd = extract32(insn, 0, 5);
  14136     gen_helper_gvec_3 *genfn;
  14137     bool feature;
  14138 
  14139     if (size != 0) {
  14140         unallocated_encoding(s);
  14141         return;
  14142     }
  14143 
  14144     switch (opcode) {
  14145     case 0: /* SHA1C */
  14146         genfn = gen_helper_crypto_sha1c;
  14147         feature = dc_isar_feature(aa64_sha1, s);
  14148         break;
  14149     case 1: /* SHA1P */
  14150         genfn = gen_helper_crypto_sha1p;
  14151         feature = dc_isar_feature(aa64_sha1, s);
  14152         break;
  14153     case 2: /* SHA1M */
  14154         genfn = gen_helper_crypto_sha1m;
  14155         feature = dc_isar_feature(aa64_sha1, s);
  14156         break;
  14157     case 3: /* SHA1SU0 */
  14158         genfn = gen_helper_crypto_sha1su0;
  14159         feature = dc_isar_feature(aa64_sha1, s);
  14160         break;
  14161     case 4: /* SHA256H */
  14162         genfn = gen_helper_crypto_sha256h;
  14163         feature = dc_isar_feature(aa64_sha256, s);
  14164         break;
  14165     case 5: /* SHA256H2 */
  14166         genfn = gen_helper_crypto_sha256h2;
  14167         feature = dc_isar_feature(aa64_sha256, s);
  14168         break;
  14169     case 6: /* SHA256SU1 */
  14170         genfn = gen_helper_crypto_sha256su1;
  14171         feature = dc_isar_feature(aa64_sha256, s);
  14172         break;
  14173     default:
  14174         unallocated_encoding(s);
  14175         return;
  14176     }
  14177 
  14178     if (!feature) {
  14179         unallocated_encoding(s);
  14180         return;
  14181     }
  14182 
  14183     if (!fp_access_check(s)) {
  14184         return;
  14185     }
  14186     gen_gvec_op3_ool(s, true, rd, rn, rm, 0, genfn);
  14187 }
  14188 
  14189 /* Crypto two-reg SHA
  14190  *  31             24 23  22 21       17 16    12 11 10 9    5 4    0
  14191  * +-----------------+------+-----------+--------+-----+------+------+
  14192  * | 0 1 0 1 1 1 1 0 | size | 1 0 1 0 0 | opcode | 1 0 |  Rn  |  Rd  |
  14193  * +-----------------+------+-----------+--------+-----+------+------+
  14194  */
  14195 static void disas_crypto_two_reg_sha(DisasContext *s, uint32_t insn)
  14196 {
  14197     int size = extract32(insn, 22, 2);
  14198     int opcode = extract32(insn, 12, 5);
  14199     int rn = extract32(insn, 5, 5);
  14200     int rd = extract32(insn, 0, 5);
  14201     gen_helper_gvec_2 *genfn;
  14202     bool feature;
  14203 
  14204     if (size != 0) {
  14205         unallocated_encoding(s);
  14206         return;
  14207     }
  14208 
  14209     switch (opcode) {
  14210     case 0: /* SHA1H */
  14211         feature = dc_isar_feature(aa64_sha1, s);
  14212         genfn = gen_helper_crypto_sha1h;
  14213         break;
  14214     case 1: /* SHA1SU1 */
  14215         feature = dc_isar_feature(aa64_sha1, s);
  14216         genfn = gen_helper_crypto_sha1su1;
  14217         break;
  14218     case 2: /* SHA256SU0 */
  14219         feature = dc_isar_feature(aa64_sha256, s);
  14220         genfn = gen_helper_crypto_sha256su0;
  14221         break;
  14222     default:
  14223         unallocated_encoding(s);
  14224         return;
  14225     }
  14226 
  14227     if (!feature) {
  14228         unallocated_encoding(s);
  14229         return;
  14230     }
  14231 
  14232     if (!fp_access_check(s)) {
  14233         return;
  14234     }
  14235     gen_gvec_op2_ool(s, true, rd, rn, 0, genfn);
  14236 }
  14237 
  14238 static void gen_rax1_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m)
  14239 {
  14240     tcg_gen_rotli_i64(d, m, 1);
  14241     tcg_gen_xor_i64(d, d, n);
  14242 }
  14243 
  14244 static void gen_rax1_vec(unsigned vece, TCGv_vec d, TCGv_vec n, TCGv_vec m)
  14245 {
  14246     tcg_gen_rotli_vec(vece, d, m, 1);
  14247     tcg_gen_xor_vec(vece, d, d, n);
  14248 }
  14249 
  14250 void gen_gvec_rax1(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
  14251                    uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
  14252 {
  14253     static const TCGOpcode vecop_list[] = { INDEX_op_rotli_vec, 0 };
  14254     static const GVecGen3 op = {
  14255         .fni8 = gen_rax1_i64,
  14256         .fniv = gen_rax1_vec,
  14257         .opt_opc = vecop_list,
  14258         .fno = gen_helper_crypto_rax1,
  14259         .vece = MO_64,
  14260     };
  14261     tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &op);
  14262 }
  14263 
  14264 /* Crypto three-reg SHA512
  14265  *  31                   21 20  16 15  14  13 12  11  10  9    5 4    0
  14266  * +-----------------------+------+---+---+-----+--------+------+------+
  14267  * | 1 1 0 0 1 1 1 0 0 1 1 |  Rm  | 1 | O | 0 0 | opcode |  Rn  |  Rd  |
  14268  * +-----------------------+------+---+---+-----+--------+------+------+
  14269  */
  14270 static void disas_crypto_three_reg_sha512(DisasContext *s, uint32_t insn)
  14271 {
  14272     int opcode = extract32(insn, 10, 2);
  14273     int o =  extract32(insn, 14, 1);
  14274     int rm = extract32(insn, 16, 5);
  14275     int rn = extract32(insn, 5, 5);
  14276     int rd = extract32(insn, 0, 5);
  14277     bool feature;
  14278     gen_helper_gvec_3 *oolfn = NULL;
  14279     GVecGen3Fn *gvecfn = NULL;
  14280 
  14281     if (o == 0) {
  14282         switch (opcode) {
  14283         case 0: /* SHA512H */
  14284             feature = dc_isar_feature(aa64_sha512, s);
  14285             oolfn = gen_helper_crypto_sha512h;
  14286             break;
  14287         case 1: /* SHA512H2 */
  14288             feature = dc_isar_feature(aa64_sha512, s);
  14289             oolfn = gen_helper_crypto_sha512h2;
  14290             break;
  14291         case 2: /* SHA512SU1 */
  14292             feature = dc_isar_feature(aa64_sha512, s);
  14293             oolfn = gen_helper_crypto_sha512su1;
  14294             break;
  14295         case 3: /* RAX1 */
  14296             feature = dc_isar_feature(aa64_sha3, s);
  14297             gvecfn = gen_gvec_rax1;
  14298             break;
  14299         default:
  14300             g_assert_not_reached();
  14301         }
  14302     } else {
  14303         switch (opcode) {
  14304         case 0: /* SM3PARTW1 */
  14305             feature = dc_isar_feature(aa64_sm3, s);
  14306             oolfn = gen_helper_crypto_sm3partw1;
  14307             break;
  14308         case 1: /* SM3PARTW2 */
  14309             feature = dc_isar_feature(aa64_sm3, s);
  14310             oolfn = gen_helper_crypto_sm3partw2;
  14311             break;
  14312         case 2: /* SM4EKEY */
  14313             feature = dc_isar_feature(aa64_sm4, s);
  14314             oolfn = gen_helper_crypto_sm4ekey;
  14315             break;
  14316         default:
  14317             unallocated_encoding(s);
  14318             return;
  14319         }
  14320     }
  14321 
  14322     if (!feature) {
  14323         unallocated_encoding(s);
  14324         return;
  14325     }
  14326 
  14327     if (!fp_access_check(s)) {
  14328         return;
  14329     }
  14330 
  14331     if (oolfn) {
  14332         gen_gvec_op3_ool(s, true, rd, rn, rm, 0, oolfn);
  14333     } else {
  14334         gen_gvec_fn3(s, true, rd, rn, rm, gvecfn, MO_64);
  14335     }
  14336 }
  14337 
  14338 /* Crypto two-reg SHA512
  14339  *  31                                     12  11  10  9    5 4    0
  14340  * +-----------------------------------------+--------+------+------+
  14341  * | 1 1 0 0 1 1 1 0 1 1 0 0 0 0 0 0 1 0 0 0 | opcode |  Rn  |  Rd  |
  14342  * +-----------------------------------------+--------+------+------+
  14343  */
  14344 static void disas_crypto_two_reg_sha512(DisasContext *s, uint32_t insn)
  14345 {
  14346     int opcode = extract32(insn, 10, 2);
  14347     int rn = extract32(insn, 5, 5);
  14348     int rd = extract32(insn, 0, 5);
  14349     bool feature;
  14350 
  14351     switch (opcode) {
  14352     case 0: /* SHA512SU0 */
  14353         feature = dc_isar_feature(aa64_sha512, s);
  14354         break;
  14355     case 1: /* SM4E */
  14356         feature = dc_isar_feature(aa64_sm4, s);
  14357         break;
  14358     default:
  14359         unallocated_encoding(s);
  14360         return;
  14361     }
  14362 
  14363     if (!feature) {
  14364         unallocated_encoding(s);
  14365         return;
  14366     }
  14367 
  14368     if (!fp_access_check(s)) {
  14369         return;
  14370     }
  14371 
  14372     switch (opcode) {
  14373     case 0: /* SHA512SU0 */
  14374         gen_gvec_op2_ool(s, true, rd, rn, 0, gen_helper_crypto_sha512su0);
  14375         break;
  14376     case 1: /* SM4E */
  14377         gen_gvec_op3_ool(s, true, rd, rd, rn, 0, gen_helper_crypto_sm4e);
  14378         break;
  14379     default:
  14380         g_assert_not_reached();
  14381     }
  14382 }
  14383 
  14384 /* Crypto four-register
  14385  *  31               23 22 21 20  16 15  14  10 9    5 4    0
  14386  * +-------------------+-----+------+---+------+------+------+
  14387  * | 1 1 0 0 1 1 1 0 0 | Op0 |  Rm  | 0 |  Ra  |  Rn  |  Rd  |
  14388  * +-------------------+-----+------+---+------+------+------+
  14389  */
  14390 static void disas_crypto_four_reg(DisasContext *s, uint32_t insn)
  14391 {
  14392     int op0 = extract32(insn, 21, 2);
  14393     int rm = extract32(insn, 16, 5);
  14394     int ra = extract32(insn, 10, 5);
  14395     int rn = extract32(insn, 5, 5);
  14396     int rd = extract32(insn, 0, 5);
  14397     bool feature;
  14398 
  14399     switch (op0) {
  14400     case 0: /* EOR3 */
  14401     case 1: /* BCAX */
  14402         feature = dc_isar_feature(aa64_sha3, s);
  14403         break;
  14404     case 2: /* SM3SS1 */
  14405         feature = dc_isar_feature(aa64_sm3, s);
  14406         break;
  14407     default:
  14408         unallocated_encoding(s);
  14409         return;
  14410     }
  14411 
  14412     if (!feature) {
  14413         unallocated_encoding(s);
  14414         return;
  14415     }
  14416 
  14417     if (!fp_access_check(s)) {
  14418         return;
  14419     }
  14420 
  14421     if (op0 < 2) {
  14422         TCGv_i64 tcg_op1, tcg_op2, tcg_op3, tcg_res[2];
  14423         int pass;
  14424 
  14425         tcg_op1 = tcg_temp_new_i64();
  14426         tcg_op2 = tcg_temp_new_i64();
  14427         tcg_op3 = tcg_temp_new_i64();
  14428         tcg_res[0] = tcg_temp_new_i64();
  14429         tcg_res[1] = tcg_temp_new_i64();
  14430 
  14431         for (pass = 0; pass < 2; pass++) {
  14432             read_vec_element(s, tcg_op1, rn, pass, MO_64);
  14433             read_vec_element(s, tcg_op2, rm, pass, MO_64);
  14434             read_vec_element(s, tcg_op3, ra, pass, MO_64);
  14435 
  14436             if (op0 == 0) {
  14437                 /* EOR3 */
  14438                 tcg_gen_xor_i64(tcg_res[pass], tcg_op2, tcg_op3);
  14439             } else {
  14440                 /* BCAX */
  14441                 tcg_gen_andc_i64(tcg_res[pass], tcg_op2, tcg_op3);
  14442             }
  14443             tcg_gen_xor_i64(tcg_res[pass], tcg_res[pass], tcg_op1);
  14444         }
  14445         write_vec_element(s, tcg_res[0], rd, 0, MO_64);
  14446         write_vec_element(s, tcg_res[1], rd, 1, MO_64);
  14447 
  14448         tcg_temp_free_i64(tcg_op1);
  14449         tcg_temp_free_i64(tcg_op2);
  14450         tcg_temp_free_i64(tcg_op3);
  14451         tcg_temp_free_i64(tcg_res[0]);
  14452         tcg_temp_free_i64(tcg_res[1]);
  14453     } else {
  14454         TCGv_i32 tcg_op1, tcg_op2, tcg_op3, tcg_res, tcg_zero;
  14455 
  14456         tcg_op1 = tcg_temp_new_i32();
  14457         tcg_op2 = tcg_temp_new_i32();
  14458         tcg_op3 = tcg_temp_new_i32();
  14459         tcg_res = tcg_temp_new_i32();
  14460         tcg_zero = tcg_constant_i32(0);
  14461 
  14462         read_vec_element_i32(s, tcg_op1, rn, 3, MO_32);
  14463         read_vec_element_i32(s, tcg_op2, rm, 3, MO_32);
  14464         read_vec_element_i32(s, tcg_op3, ra, 3, MO_32);
  14465 
  14466         tcg_gen_rotri_i32(tcg_res, tcg_op1, 20);
  14467         tcg_gen_add_i32(tcg_res, tcg_res, tcg_op2);
  14468         tcg_gen_add_i32(tcg_res, tcg_res, tcg_op3);
  14469         tcg_gen_rotri_i32(tcg_res, tcg_res, 25);
  14470 
  14471         write_vec_element_i32(s, tcg_zero, rd, 0, MO_32);
  14472         write_vec_element_i32(s, tcg_zero, rd, 1, MO_32);
  14473         write_vec_element_i32(s, tcg_zero, rd, 2, MO_32);
  14474         write_vec_element_i32(s, tcg_res, rd, 3, MO_32);
  14475 
  14476         tcg_temp_free_i32(tcg_op1);
  14477         tcg_temp_free_i32(tcg_op2);
  14478         tcg_temp_free_i32(tcg_op3);
  14479         tcg_temp_free_i32(tcg_res);
  14480     }
  14481 }
  14482 
  14483 /* Crypto XAR
  14484  *  31                   21 20  16 15    10 9    5 4    0
  14485  * +-----------------------+------+--------+------+------+
  14486  * | 1 1 0 0 1 1 1 0 1 0 0 |  Rm  |  imm6  |  Rn  |  Rd  |
  14487  * +-----------------------+------+--------+------+------+
  14488  */
  14489 static void disas_crypto_xar(DisasContext *s, uint32_t insn)
  14490 {
  14491     int rm = extract32(insn, 16, 5);
  14492     int imm6 = extract32(insn, 10, 6);
  14493     int rn = extract32(insn, 5, 5);
  14494     int rd = extract32(insn, 0, 5);
  14495 
  14496     if (!dc_isar_feature(aa64_sha3, s)) {
  14497         unallocated_encoding(s);
  14498         return;
  14499     }
  14500 
  14501     if (!fp_access_check(s)) {
  14502         return;
  14503     }
  14504 
  14505     gen_gvec_xar(MO_64, vec_full_reg_offset(s, rd),
  14506                  vec_full_reg_offset(s, rn),
  14507                  vec_full_reg_offset(s, rm), imm6, 16,
  14508                  vec_full_reg_size(s));
  14509 }
  14510 
  14511 /* Crypto three-reg imm2
  14512  *  31                   21 20  16 15  14 13 12  11  10  9    5 4    0
  14513  * +-----------------------+------+-----+------+--------+------+------+
  14514  * | 1 1 0 0 1 1 1 0 0 1 0 |  Rm  | 1 0 | imm2 | opcode |  Rn  |  Rd  |
  14515  * +-----------------------+------+-----+------+--------+------+------+
  14516  */
  14517 static void disas_crypto_three_reg_imm2(DisasContext *s, uint32_t insn)
  14518 {
  14519     static gen_helper_gvec_3 * const fns[4] = {
  14520         gen_helper_crypto_sm3tt1a, gen_helper_crypto_sm3tt1b,
  14521         gen_helper_crypto_sm3tt2a, gen_helper_crypto_sm3tt2b,
  14522     };
  14523     int opcode = extract32(insn, 10, 2);
  14524     int imm2 = extract32(insn, 12, 2);
  14525     int rm = extract32(insn, 16, 5);
  14526     int rn = extract32(insn, 5, 5);
  14527     int rd = extract32(insn, 0, 5);
  14528 
  14529     if (!dc_isar_feature(aa64_sm3, s)) {
  14530         unallocated_encoding(s);
  14531         return;
  14532     }
  14533 
  14534     if (!fp_access_check(s)) {
  14535         return;
  14536     }
  14537 
  14538     gen_gvec_op3_ool(s, true, rd, rn, rm, imm2, fns[opcode]);
  14539 }
  14540 
  14541 /* C3.6 Data processing - SIMD, inc Crypto
  14542  *
  14543  * As the decode gets a little complex we are using a table based
  14544  * approach for this part of the decode.
  14545  */
  14546 static const AArch64DecodeTable data_proc_simd[] = {
  14547     /* pattern  ,  mask     ,  fn                        */
  14548     { 0x0e200400, 0x9f200400, disas_simd_three_reg_same },
  14549     { 0x0e008400, 0x9f208400, disas_simd_three_reg_same_extra },
  14550     { 0x0e200000, 0x9f200c00, disas_simd_three_reg_diff },
  14551     { 0x0e200800, 0x9f3e0c00, disas_simd_two_reg_misc },
  14552     { 0x0e300800, 0x9f3e0c00, disas_simd_across_lanes },
  14553     { 0x0e000400, 0x9fe08400, disas_simd_copy },
  14554     { 0x0f000000, 0x9f000400, disas_simd_indexed }, /* vector indexed */
  14555     /* simd_mod_imm decode is a subset of simd_shift_imm, so must precede it */
  14556     { 0x0f000400, 0x9ff80400, disas_simd_mod_imm },
  14557     { 0x0f000400, 0x9f800400, disas_simd_shift_imm },
  14558     { 0x0e000000, 0xbf208c00, disas_simd_tb },
  14559     { 0x0e000800, 0xbf208c00, disas_simd_zip_trn },
  14560     { 0x2e000000, 0xbf208400, disas_simd_ext },
  14561     { 0x5e200400, 0xdf200400, disas_simd_scalar_three_reg_same },
  14562     { 0x5e008400, 0xdf208400, disas_simd_scalar_three_reg_same_extra },
  14563     { 0x5e200000, 0xdf200c00, disas_simd_scalar_three_reg_diff },
  14564     { 0x5e200800, 0xdf3e0c00, disas_simd_scalar_two_reg_misc },
  14565     { 0x5e300800, 0xdf3e0c00, disas_simd_scalar_pairwise },
  14566     { 0x5e000400, 0xdfe08400, disas_simd_scalar_copy },
  14567     { 0x5f000000, 0xdf000400, disas_simd_indexed }, /* scalar indexed */
  14568     { 0x5f000400, 0xdf800400, disas_simd_scalar_shift_imm },
  14569     { 0x4e280800, 0xff3e0c00, disas_crypto_aes },
  14570     { 0x5e000000, 0xff208c00, disas_crypto_three_reg_sha },
  14571     { 0x5e280800, 0xff3e0c00, disas_crypto_two_reg_sha },
  14572     { 0xce608000, 0xffe0b000, disas_crypto_three_reg_sha512 },
  14573     { 0xcec08000, 0xfffff000, disas_crypto_two_reg_sha512 },
  14574     { 0xce000000, 0xff808000, disas_crypto_four_reg },
  14575     { 0xce800000, 0xffe00000, disas_crypto_xar },
  14576     { 0xce408000, 0xffe0c000, disas_crypto_three_reg_imm2 },
  14577     { 0x0e400400, 0x9f60c400, disas_simd_three_reg_same_fp16 },
  14578     { 0x0e780800, 0x8f7e0c00, disas_simd_two_reg_misc_fp16 },
  14579     { 0x5e400400, 0xdf60c400, disas_simd_scalar_three_reg_same_fp16 },
  14580     { 0x00000000, 0x00000000, NULL }
  14581 };
  14582 
  14583 static void disas_data_proc_simd(DisasContext *s, uint32_t insn)
  14584 {
  14585     /* Note that this is called with all non-FP cases from
  14586      * table C3-6 so it must UNDEF for entries not specifically
  14587      * allocated to instructions in that table.
  14588      */
  14589     AArch64DecodeFn *fn = lookup_disas_fn(&data_proc_simd[0], insn);
  14590     if (fn) {
  14591         fn(s, insn);
  14592     } else {
  14593         unallocated_encoding(s);
  14594     }
  14595 }
  14596 
  14597 /* C3.6 Data processing - SIMD and floating point */
  14598 static void disas_data_proc_simd_fp(DisasContext *s, uint32_t insn)
  14599 {
  14600     if (extract32(insn, 28, 1) == 1 && extract32(insn, 30, 1) == 0) {
  14601         disas_data_proc_fp(s, insn);
  14602     } else {
  14603         /* SIMD, including crypto */
  14604         disas_data_proc_simd(s, insn);
  14605     }
  14606 }
  14607 
  14608 /*
  14609  * Include the generated SME FA64 decoder.
  14610  */
  14611 
  14612 #include "decode-sme-fa64.c.inc"
  14613 
  14614 static bool trans_OK(DisasContext *s, arg_OK *a)
  14615 {
  14616     return true;
  14617 }
  14618 
  14619 static bool trans_FAIL(DisasContext *s, arg_OK *a)
  14620 {
  14621     s->is_nonstreaming = true;
  14622     return true;
  14623 }
  14624 
  14625 /**
  14626  * is_guarded_page:
  14627  * @env: The cpu environment
  14628  * @s: The DisasContext
  14629  *
  14630  * Return true if the page is guarded.
  14631  */
  14632 static bool is_guarded_page(CPUARMState *env, DisasContext *s)
  14633 {
  14634     uint64_t addr = s->base.pc_first;
  14635 #ifdef CONFIG_USER_ONLY
  14636     return page_get_flags(addr) & PAGE_BTI;
  14637 #else
  14638     CPUTLBEntryFull *full;
  14639     void *host;
  14640     int mmu_idx = arm_to_core_mmu_idx(s->mmu_idx);
  14641     int flags;
  14642 
  14643     /*
  14644      * We test this immediately after reading an insn, which means
  14645      * that the TLB entry must be present and valid, and thus this
  14646      * access will never raise an exception.
  14647      */
  14648     flags = probe_access_full(env, addr, MMU_INST_FETCH, mmu_idx,
  14649                               false, &host, &full, 0);
  14650     assert(!(flags & TLB_INVALID_MASK));
  14651 
  14652     return full->guarded;
  14653 #endif
  14654 }
  14655 
  14656 /**
  14657  * btype_destination_ok:
  14658  * @insn: The instruction at the branch destination
  14659  * @bt: SCTLR_ELx.BT
  14660  * @btype: PSTATE.BTYPE, and is non-zero
  14661  *
  14662  * On a guarded page, there are a limited number of insns
  14663  * that may be present at the branch target:
  14664  *   - branch target identifiers,
  14665  *   - paciasp, pacibsp,
  14666  *   - BRK insn
  14667  *   - HLT insn
  14668  * Anything else causes a Branch Target Exception.
  14669  *
  14670  * Return true if the branch is compatible, false to raise BTITRAP.
  14671  */
  14672 static bool btype_destination_ok(uint32_t insn, bool bt, int btype)
  14673 {
  14674     if ((insn & 0xfffff01fu) == 0xd503201fu) {
  14675         /* HINT space */
  14676         switch (extract32(insn, 5, 7)) {
  14677         case 0b011001: /* PACIASP */
  14678         case 0b011011: /* PACIBSP */
  14679             /*
  14680              * If SCTLR_ELx.BT, then PACI*SP are not compatible
  14681              * with btype == 3.  Otherwise all btype are ok.
  14682              */
  14683             return !bt || btype != 3;
  14684         case 0b100000: /* BTI */
  14685             /* Not compatible with any btype.  */
  14686             return false;
  14687         case 0b100010: /* BTI c */
  14688             /* Not compatible with btype == 3 */
  14689             return btype != 3;
  14690         case 0b100100: /* BTI j */
  14691             /* Not compatible with btype == 2 */
  14692             return btype != 2;
  14693         case 0b100110: /* BTI jc */
  14694             /* Compatible with any btype.  */
  14695             return true;
  14696         }
  14697     } else {
  14698         switch (insn & 0xffe0001fu) {
  14699         case 0xd4200000u: /* BRK */
  14700         case 0xd4400000u: /* HLT */
  14701             /* Give priority to the breakpoint exception.  */
  14702             return true;
  14703         }
  14704     }
  14705     return false;
  14706 }
  14707 
  14708 static void aarch64_tr_init_disas_context(DisasContextBase *dcbase,
  14709                                           CPUState *cpu)
  14710 {
  14711     DisasContext *dc = container_of(dcbase, DisasContext, base);
  14712     CPUARMState *env = cpu->env_ptr;
  14713     ARMCPU *arm_cpu = env_archcpu(env);
  14714     CPUARMTBFlags tb_flags = arm_tbflags_from_tb(dc->base.tb);
  14715     int bound, core_mmu_idx;
  14716 
  14717     dc->isar = &arm_cpu->isar;
  14718     dc->condjmp = 0;
  14719     dc->pc_save = dc->base.pc_first;
  14720     dc->aarch64 = true;
  14721     dc->thumb = false;
  14722     dc->sctlr_b = 0;
  14723     dc->be_data = EX_TBFLAG_ANY(tb_flags, BE_DATA) ? MO_BE : MO_LE;
  14724     dc->condexec_mask = 0;
  14725     dc->condexec_cond = 0;
  14726     core_mmu_idx = EX_TBFLAG_ANY(tb_flags, MMUIDX);
  14727     dc->mmu_idx = core_to_aa64_mmu_idx(core_mmu_idx);
  14728     dc->tbii = EX_TBFLAG_A64(tb_flags, TBII);
  14729     dc->tbid = EX_TBFLAG_A64(tb_flags, TBID);
  14730     dc->tcma = EX_TBFLAG_A64(tb_flags, TCMA);
  14731     dc->current_el = arm_mmu_idx_to_el(dc->mmu_idx);
  14732 #if !defined(CONFIG_USER_ONLY)
  14733     dc->user = (dc->current_el == 0);
  14734 #endif
  14735     dc->fp_excp_el = EX_TBFLAG_ANY(tb_flags, FPEXC_EL);
  14736     dc->align_mem = EX_TBFLAG_ANY(tb_flags, ALIGN_MEM);
  14737     dc->pstate_il = EX_TBFLAG_ANY(tb_flags, PSTATE__IL);
  14738     dc->sve_excp_el = EX_TBFLAG_A64(tb_flags, SVEEXC_EL);
  14739     dc->sme_excp_el = EX_TBFLAG_A64(tb_flags, SMEEXC_EL);
  14740     dc->vl = (EX_TBFLAG_A64(tb_flags, VL) + 1) * 16;
  14741     dc->svl = (EX_TBFLAG_A64(tb_flags, SVL) + 1) * 16;
  14742     dc->pauth_active = EX_TBFLAG_A64(tb_flags, PAUTH_ACTIVE);
  14743     dc->bt = EX_TBFLAG_A64(tb_flags, BT);
  14744     dc->btype = EX_TBFLAG_A64(tb_flags, BTYPE);
  14745     dc->unpriv = EX_TBFLAG_A64(tb_flags, UNPRIV);
  14746     dc->ata = EX_TBFLAG_A64(tb_flags, ATA);
  14747     dc->mte_active[0] = EX_TBFLAG_A64(tb_flags, MTE_ACTIVE);
  14748     dc->mte_active[1] = EX_TBFLAG_A64(tb_flags, MTE0_ACTIVE);
  14749     dc->pstate_sm = EX_TBFLAG_A64(tb_flags, PSTATE_SM);
  14750     dc->pstate_za = EX_TBFLAG_A64(tb_flags, PSTATE_ZA);
  14751     dc->sme_trap_nonstreaming = EX_TBFLAG_A64(tb_flags, SME_TRAP_NONSTREAMING);
  14752     dc->vec_len = 0;
  14753     dc->vec_stride = 0;
  14754     dc->cp_regs = arm_cpu->cp_regs;
  14755     dc->features = env->features;
  14756     dc->dcz_blocksize = arm_cpu->dcz_blocksize;
  14757 
  14758 #ifdef CONFIG_USER_ONLY
  14759     /* In sve_probe_page, we assume TBI is enabled. */
  14760     tcg_debug_assert(dc->tbid & 1);
  14761 #endif
  14762 
  14763     /* Single step state. The code-generation logic here is:
  14764      *  SS_ACTIVE == 0:
  14765      *   generate code with no special handling for single-stepping (except
  14766      *   that anything that can make us go to SS_ACTIVE == 1 must end the TB;
  14767      *   this happens anyway because those changes are all system register or
  14768      *   PSTATE writes).
  14769      *  SS_ACTIVE == 1, PSTATE.SS == 1: (active-not-pending)
  14770      *   emit code for one insn
  14771      *   emit code to clear PSTATE.SS
  14772      *   emit code to generate software step exception for completed step
  14773      *   end TB (as usual for having generated an exception)
  14774      *  SS_ACTIVE == 1, PSTATE.SS == 0: (active-pending)
  14775      *   emit code to generate a software step exception
  14776      *   end the TB
  14777      */
  14778     dc->ss_active = EX_TBFLAG_ANY(tb_flags, SS_ACTIVE);
  14779     dc->pstate_ss = EX_TBFLAG_ANY(tb_flags, PSTATE__SS);
  14780     dc->is_ldex = false;
  14781 
  14782     /* Bound the number of insns to execute to those left on the page.  */
  14783     bound = -(dc->base.pc_first | TARGET_PAGE_MASK) / 4;
  14784 
  14785     /* If architectural single step active, limit to 1.  */
  14786     if (dc->ss_active) {
  14787         bound = 1;
  14788     }
  14789     dc->base.max_insns = MIN(dc->base.max_insns, bound);
  14790 
  14791     init_tmp_a64_array(dc);
  14792 }
  14793 
  14794 static void aarch64_tr_tb_start(DisasContextBase *db, CPUState *cpu)
  14795 {
  14796 }
  14797 
  14798 static void aarch64_tr_insn_start(DisasContextBase *dcbase, CPUState *cpu)
  14799 {
  14800     DisasContext *dc = container_of(dcbase, DisasContext, base);
  14801     target_ulong pc_arg = dc->base.pc_next;
  14802 
  14803     if (TARGET_TB_PCREL) {
  14804         pc_arg &= ~TARGET_PAGE_MASK;
  14805     }
  14806     tcg_gen_insn_start(pc_arg, 0, 0);
  14807     dc->insn_start = tcg_last_op();
  14808 }
  14809 
  14810 static void aarch64_tr_translate_insn(DisasContextBase *dcbase, CPUState *cpu)
  14811 {
  14812     DisasContext *s = container_of(dcbase, DisasContext, base);
  14813     CPUARMState *env = cpu->env_ptr;
  14814     uint64_t pc = s->base.pc_next;
  14815     uint32_t insn;
  14816 
  14817     /* Singlestep exceptions have the highest priority. */
  14818     if (s->ss_active && !s->pstate_ss) {
  14819         /* Singlestep state is Active-pending.
  14820          * If we're in this state at the start of a TB then either
  14821          *  a) we just took an exception to an EL which is being debugged
  14822          *     and this is the first insn in the exception handler
  14823          *  b) debug exceptions were masked and we just unmasked them
  14824          *     without changing EL (eg by clearing PSTATE.D)
  14825          * In either case we're going to take a swstep exception in the
  14826          * "did not step an insn" case, and so the syndrome ISV and EX
  14827          * bits should be zero.
  14828          */
  14829         assert(s->base.num_insns == 1);
  14830         gen_swstep_exception(s, 0, 0);
  14831         s->base.is_jmp = DISAS_NORETURN;
  14832         s->base.pc_next = pc + 4;
  14833         return;
  14834     }
  14835 
  14836     if (pc & 3) {
  14837         /*
  14838          * PC alignment fault.  This has priority over the instruction abort
  14839          * that we would receive from a translation fault via arm_ldl_code.
  14840          * This should only be possible after an indirect branch, at the
  14841          * start of the TB.
  14842          */
  14843         assert(s->base.num_insns == 1);
  14844         gen_helper_exception_pc_alignment(cpu_env, tcg_constant_tl(pc));
  14845         s->base.is_jmp = DISAS_NORETURN;
  14846         s->base.pc_next = QEMU_ALIGN_UP(pc, 4);
  14847         return;
  14848     }
  14849 
  14850     s->pc_curr = pc;
  14851     insn = arm_ldl_code(env, &s->base, pc, s->sctlr_b);
  14852     s->insn = insn;
  14853     s->base.pc_next = pc + 4;
  14854 
  14855     s->fp_access_checked = false;
  14856     s->sve_access_checked = false;
  14857 
  14858     if (s->pstate_il) {
  14859         /*
  14860          * Illegal execution state. This has priority over BTI
  14861          * exceptions, but comes after instruction abort exceptions.
  14862          */
  14863         gen_exception_insn(s, 0, EXCP_UDEF, syn_illegalstate());
  14864         return;
  14865     }
  14866 
  14867     if (dc_isar_feature(aa64_bti, s)) {
  14868         if (s->base.num_insns == 1) {
  14869             /*
  14870              * At the first insn of the TB, compute s->guarded_page.
  14871              * We delayed computing this until successfully reading
  14872              * the first insn of the TB, above.  This (mostly) ensures
  14873              * that the softmmu tlb entry has been populated, and the
  14874              * page table GP bit is available.
  14875              *
  14876              * Note that we need to compute this even if btype == 0,
  14877              * because this value is used for BR instructions later
  14878              * where ENV is not available.
  14879              */
  14880             s->guarded_page = is_guarded_page(env, s);
  14881 
  14882             /* First insn can have btype set to non-zero.  */
  14883             tcg_debug_assert(s->btype >= 0);
  14884 
  14885             /*
  14886              * Note that the Branch Target Exception has fairly high
  14887              * priority -- below debugging exceptions but above most
  14888              * everything else.  This allows us to handle this now
  14889              * instead of waiting until the insn is otherwise decoded.
  14890              */
  14891             if (s->btype != 0
  14892                 && s->guarded_page
  14893                 && !btype_destination_ok(insn, s->bt, s->btype)) {
  14894                 gen_exception_insn(s, 0, EXCP_UDEF, syn_btitrap(s->btype));
  14895                 return;
  14896             }
  14897         } else {
  14898             /* Not the first insn: btype must be 0.  */
  14899             tcg_debug_assert(s->btype == 0);
  14900         }
  14901     }
  14902 
  14903     s->is_nonstreaming = false;
  14904     if (s->sme_trap_nonstreaming) {
  14905         disas_sme_fa64(s, insn);
  14906     }
  14907 
  14908     switch (extract32(insn, 25, 4)) {
  14909     case 0x0:
  14910         if (!extract32(insn, 31, 1) || !disas_sme(s, insn)) {
  14911             unallocated_encoding(s);
  14912         }
  14913         break;
  14914     case 0x1: case 0x3: /* UNALLOCATED */
  14915         unallocated_encoding(s);
  14916         break;
  14917     case 0x2:
  14918         if (!disas_sve(s, insn)) {
  14919             unallocated_encoding(s);
  14920         }
  14921         break;
  14922     case 0x8: case 0x9: /* Data processing - immediate */
  14923         disas_data_proc_imm(s, insn);
  14924         break;
  14925     case 0xa: case 0xb: /* Branch, exception generation and system insns */
  14926         disas_b_exc_sys(s, insn);
  14927         break;
  14928     case 0x4:
  14929     case 0x6:
  14930     case 0xc:
  14931     case 0xe:      /* Loads and stores */
  14932         disas_ldst(s, insn);
  14933         break;
  14934     case 0x5:
  14935     case 0xd:      /* Data processing - register */
  14936         disas_data_proc_reg(s, insn);
  14937         break;
  14938     case 0x7:
  14939     case 0xf:      /* Data processing - SIMD and floating point */
  14940         disas_data_proc_simd_fp(s, insn);
  14941         break;
  14942     default:
  14943         assert(FALSE); /* all 15 cases should be handled above */
  14944         break;
  14945     }
  14946 
  14947     /* if we allocated any temporaries, free them here */
  14948     free_tmp_a64(s);
  14949 
  14950     /*
  14951      * After execution of most insns, btype is reset to 0.
  14952      * Note that we set btype == -1 when the insn sets btype.
  14953      */
  14954     if (s->btype > 0 && s->base.is_jmp != DISAS_NORETURN) {
  14955         reset_btype(s);
  14956     }
  14957 
  14958     translator_loop_temp_check(&s->base);
  14959 }
  14960 
  14961 static void aarch64_tr_tb_stop(DisasContextBase *dcbase, CPUState *cpu)
  14962 {
  14963     DisasContext *dc = container_of(dcbase, DisasContext, base);
  14964 
  14965     if (unlikely(dc->ss_active)) {
  14966         /* Note that this means single stepping WFI doesn't halt the CPU.
  14967          * For conditional branch insns this is harmless unreachable code as
  14968          * gen_goto_tb() has already handled emitting the debug exception
  14969          * (and thus a tb-jump is not possible when singlestepping).
  14970          */
  14971         switch (dc->base.is_jmp) {
  14972         default:
  14973             gen_a64_update_pc(dc, 4);
  14974             /* fall through */
  14975         case DISAS_EXIT:
  14976         case DISAS_JUMP:
  14977             gen_step_complete_exception(dc);
  14978             break;
  14979         case DISAS_NORETURN:
  14980             break;
  14981         }
  14982     } else {
  14983         switch (dc->base.is_jmp) {
  14984         case DISAS_NEXT:
  14985         case DISAS_TOO_MANY:
  14986             gen_goto_tb(dc, 1, 4);
  14987             break;
  14988         default:
  14989         case DISAS_UPDATE_EXIT:
  14990             gen_a64_update_pc(dc, 4);
  14991             /* fall through */
  14992         case DISAS_EXIT:
  14993             tcg_gen_exit_tb(NULL, 0);
  14994             break;
  14995         case DISAS_UPDATE_NOCHAIN:
  14996             gen_a64_update_pc(dc, 4);
  14997             /* fall through */
  14998         case DISAS_JUMP:
  14999             tcg_gen_lookup_and_goto_ptr();
  15000             break;
  15001         case DISAS_NORETURN:
  15002         case DISAS_SWI:
  15003             break;
  15004         case DISAS_WFE:
  15005             gen_a64_update_pc(dc, 4);
  15006             gen_helper_wfe(cpu_env);
  15007             break;
  15008         case DISAS_YIELD:
  15009             gen_a64_update_pc(dc, 4);
  15010             gen_helper_yield(cpu_env);
  15011             break;
  15012         case DISAS_WFI:
  15013             /*
  15014              * This is a special case because we don't want to just halt
  15015              * the CPU if trying to debug across a WFI.
  15016              */
  15017             gen_a64_update_pc(dc, 4);
  15018             gen_helper_wfi(cpu_env, tcg_constant_i32(4));
  15019             /*
  15020              * The helper doesn't necessarily throw an exception, but we
  15021              * must go back to the main loop to check for interrupts anyway.
  15022              */
  15023             tcg_gen_exit_tb(NULL, 0);
  15024             break;
  15025         }
  15026     }
  15027 }
  15028 
  15029 static void aarch64_tr_disas_log(const DisasContextBase *dcbase,
  15030                                  CPUState *cpu, FILE *logfile)
  15031 {
  15032     DisasContext *dc = container_of(dcbase, DisasContext, base);
  15033 
  15034     fprintf(logfile, "IN: %s\n", lookup_symbol(dc->base.pc_first));
  15035     target_disas(logfile, cpu, dc->base.pc_first, dc->base.tb->size);
  15036 }
  15037 
  15038 const TranslatorOps aarch64_translator_ops = {
  15039     .init_disas_context = aarch64_tr_init_disas_context,
  15040     .tb_start           = aarch64_tr_tb_start,
  15041     .insn_start         = aarch64_tr_insn_start,
  15042     .translate_insn     = aarch64_tr_translate_insn,
  15043     .tb_stop            = aarch64_tr_tb_stop,
  15044     .disas_log          = aarch64_tr_disas_log,
  15045 };