qemu

FORK: QEMU emulator
git clone https://git.neptards.moe/neptards/qemu.git
Log | Files | Refs | Submodules | LICENSE

translate.c (297505B)


      1 /*
      2  *  ARM translation
      3  *
      4  *  Copyright (c) 2003 Fabrice Bellard
      5  *  Copyright (c) 2005-2007 CodeSourcery
      6  *  Copyright (c) 2007 OpenedHand, Ltd.
      7  *
      8  * This library is free software; you can redistribute it and/or
      9  * modify it under the terms of the GNU Lesser General Public
     10  * License as published by the Free Software Foundation; either
     11  * version 2.1 of the License, or (at your option) any later version.
     12  *
     13  * This library is distributed in the hope that it will be useful,
     14  * but WITHOUT ANY WARRANTY; without even the implied warranty of
     15  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
     16  * Lesser General Public License for more details.
     17  *
     18  * You should have received a copy of the GNU Lesser General Public
     19  * License along with this library; if not, see <http://www.gnu.org/licenses/>.
     20  */
     21 #include "qemu/osdep.h"
     22 
     23 #include "cpu.h"
     24 #include "internals.h"
     25 #include "disas/disas.h"
     26 #include "exec/exec-all.h"
     27 #include "tcg/tcg-op.h"
     28 #include "tcg/tcg-op-gvec.h"
     29 #include "qemu/log.h"
     30 #include "qemu/bitops.h"
     31 #include "arm_ldst.h"
     32 #include "semihosting/semihost.h"
     33 #include "exec/helper-proto.h"
     34 #include "exec/helper-gen.h"
     35 #include "exec/log.h"
     36 #include "cpregs.h"
     37 
     38 
     39 #define ENABLE_ARCH_4T    arm_dc_feature(s, ARM_FEATURE_V4T)
     40 #define ENABLE_ARCH_5     arm_dc_feature(s, ARM_FEATURE_V5)
     41 /* currently all emulated v5 cores are also v5TE, so don't bother */
     42 #define ENABLE_ARCH_5TE   arm_dc_feature(s, ARM_FEATURE_V5)
     43 #define ENABLE_ARCH_5J    dc_isar_feature(aa32_jazelle, s)
     44 #define ENABLE_ARCH_6     arm_dc_feature(s, ARM_FEATURE_V6)
     45 #define ENABLE_ARCH_6K    arm_dc_feature(s, ARM_FEATURE_V6K)
     46 #define ENABLE_ARCH_6T2   arm_dc_feature(s, ARM_FEATURE_THUMB2)
     47 #define ENABLE_ARCH_7     arm_dc_feature(s, ARM_FEATURE_V7)
     48 #define ENABLE_ARCH_8     arm_dc_feature(s, ARM_FEATURE_V8)
     49 
     50 #include "translate.h"
     51 #include "translate-a32.h"
     52 
     53 /* These are TCG temporaries used only by the legacy iwMMXt decoder */
     54 static TCGv_i64 cpu_V0, cpu_V1, cpu_M0;
     55 /* These are TCG globals which alias CPUARMState fields */
     56 static TCGv_i32 cpu_R[16];
     57 TCGv_i32 cpu_CF, cpu_NF, cpu_VF, cpu_ZF;
     58 TCGv_i64 cpu_exclusive_addr;
     59 TCGv_i64 cpu_exclusive_val;
     60 
     61 #include "exec/gen-icount.h"
     62 
     63 static const char * const regnames[] =
     64     { "r0", "r1", "r2", "r3", "r4", "r5", "r6", "r7",
     65       "r8", "r9", "r10", "r11", "r12", "r13", "r14", "pc" };
     66 
     67 
     68 /* initialize TCG globals.  */
     69 void arm_translate_init(void)
     70 {
     71     int i;
     72 
     73     for (i = 0; i < 16; i++) {
     74         cpu_R[i] = tcg_global_mem_new_i32(cpu_env,
     75                                           offsetof(CPUARMState, regs[i]),
     76                                           regnames[i]);
     77     }
     78     cpu_CF = tcg_global_mem_new_i32(cpu_env, offsetof(CPUARMState, CF), "CF");
     79     cpu_NF = tcg_global_mem_new_i32(cpu_env, offsetof(CPUARMState, NF), "NF");
     80     cpu_VF = tcg_global_mem_new_i32(cpu_env, offsetof(CPUARMState, VF), "VF");
     81     cpu_ZF = tcg_global_mem_new_i32(cpu_env, offsetof(CPUARMState, ZF), "ZF");
     82 
     83     cpu_exclusive_addr = tcg_global_mem_new_i64(cpu_env,
     84         offsetof(CPUARMState, exclusive_addr), "exclusive_addr");
     85     cpu_exclusive_val = tcg_global_mem_new_i64(cpu_env,
     86         offsetof(CPUARMState, exclusive_val), "exclusive_val");
     87 
     88     a64_translate_init();
     89 }
     90 
     91 uint64_t asimd_imm_const(uint32_t imm, int cmode, int op)
     92 {
     93     /* Expand the encoded constant as per AdvSIMDExpandImm pseudocode */
     94     switch (cmode) {
     95     case 0: case 1:
     96         /* no-op */
     97         break;
     98     case 2: case 3:
     99         imm <<= 8;
    100         break;
    101     case 4: case 5:
    102         imm <<= 16;
    103         break;
    104     case 6: case 7:
    105         imm <<= 24;
    106         break;
    107     case 8: case 9:
    108         imm |= imm << 16;
    109         break;
    110     case 10: case 11:
    111         imm = (imm << 8) | (imm << 24);
    112         break;
    113     case 12:
    114         imm = (imm << 8) | 0xff;
    115         break;
    116     case 13:
    117         imm = (imm << 16) | 0xffff;
    118         break;
    119     case 14:
    120         if (op) {
    121             /*
    122              * This and cmode == 15 op == 1 are the only cases where
    123              * the top and bottom 32 bits of the encoded constant differ.
    124              */
    125             uint64_t imm64 = 0;
    126             int n;
    127 
    128             for (n = 0; n < 8; n++) {
    129                 if (imm & (1 << n)) {
    130                     imm64 |= (0xffULL << (n * 8));
    131                 }
    132             }
    133             return imm64;
    134         }
    135         imm |= (imm << 8) | (imm << 16) | (imm << 24);
    136         break;
    137     case 15:
    138         if (op) {
    139             /* Reserved encoding for AArch32; valid for AArch64 */
    140             uint64_t imm64 = (uint64_t)(imm & 0x3f) << 48;
    141             if (imm & 0x80) {
    142                 imm64 |= 0x8000000000000000ULL;
    143             }
    144             if (imm & 0x40) {
    145                 imm64 |= 0x3fc0000000000000ULL;
    146             } else {
    147                 imm64 |= 0x4000000000000000ULL;
    148             }
    149             return imm64;
    150         }
    151         imm = ((imm & 0x80) << 24) | ((imm & 0x3f) << 19)
    152             | ((imm & 0x40) ? (0x1f << 25) : (1 << 30));
    153         break;
    154     }
    155     if (op) {
    156         imm = ~imm;
    157     }
    158     return dup_const(MO_32, imm);
    159 }
    160 
    161 /* Generate a label used for skipping this instruction */
    162 void arm_gen_condlabel(DisasContext *s)
    163 {
    164     if (!s->condjmp) {
    165         s->condlabel = gen_disas_label(s);
    166         s->condjmp = 1;
    167     }
    168 }
    169 
    170 /* Flags for the disas_set_da_iss info argument:
    171  * lower bits hold the Rt register number, higher bits are flags.
    172  */
    173 typedef enum ISSInfo {
    174     ISSNone = 0,
    175     ISSRegMask = 0x1f,
    176     ISSInvalid = (1 << 5),
    177     ISSIsAcqRel = (1 << 6),
    178     ISSIsWrite = (1 << 7),
    179     ISSIs16Bit = (1 << 8),
    180 } ISSInfo;
    181 
    182 /*
    183  * Store var into env + offset to a member with size bytes.
    184  * Free var after use.
    185  */
    186 void store_cpu_offset(TCGv_i32 var, int offset, int size)
    187 {
    188     switch (size) {
    189     case 1:
    190         tcg_gen_st8_i32(var, cpu_env, offset);
    191         break;
    192     case 4:
    193         tcg_gen_st_i32(var, cpu_env, offset);
    194         break;
    195     default:
    196         g_assert_not_reached();
    197     }
    198     tcg_temp_free_i32(var);
    199 }
    200 
    201 /* Save the syndrome information for a Data Abort */
    202 static void disas_set_da_iss(DisasContext *s, MemOp memop, ISSInfo issinfo)
    203 {
    204     uint32_t syn;
    205     int sas = memop & MO_SIZE;
    206     bool sse = memop & MO_SIGN;
    207     bool is_acqrel = issinfo & ISSIsAcqRel;
    208     bool is_write = issinfo & ISSIsWrite;
    209     bool is_16bit = issinfo & ISSIs16Bit;
    210     int srt = issinfo & ISSRegMask;
    211 
    212     if (issinfo & ISSInvalid) {
    213         /* Some callsites want to conditionally provide ISS info,
    214          * eg "only if this was not a writeback"
    215          */
    216         return;
    217     }
    218 
    219     if (srt == 15) {
    220         /* For AArch32, insns where the src/dest is R15 never generate
    221          * ISS information. Catching that here saves checking at all
    222          * the call sites.
    223          */
    224         return;
    225     }
    226 
    227     syn = syn_data_abort_with_iss(0, sas, sse, srt, 0, is_acqrel,
    228                                   0, 0, 0, is_write, 0, is_16bit);
    229     disas_set_insn_syndrome(s, syn);
    230 }
    231 
    232 static inline int get_a32_user_mem_index(DisasContext *s)
    233 {
    234     /* Return the core mmu_idx to use for A32/T32 "unprivileged load/store"
    235      * insns:
    236      *  if PL2, UNPREDICTABLE (we choose to implement as if PL0)
    237      *  otherwise, access as if at PL0.
    238      */
    239     switch (s->mmu_idx) {
    240     case ARMMMUIdx_E3:
    241     case ARMMMUIdx_E2:        /* this one is UNPREDICTABLE */
    242     case ARMMMUIdx_E10_0:
    243     case ARMMMUIdx_E10_1:
    244     case ARMMMUIdx_E10_1_PAN:
    245         return arm_to_core_mmu_idx(ARMMMUIdx_E10_0);
    246     case ARMMMUIdx_MUser:
    247     case ARMMMUIdx_MPriv:
    248         return arm_to_core_mmu_idx(ARMMMUIdx_MUser);
    249     case ARMMMUIdx_MUserNegPri:
    250     case ARMMMUIdx_MPrivNegPri:
    251         return arm_to_core_mmu_idx(ARMMMUIdx_MUserNegPri);
    252     case ARMMMUIdx_MSUser:
    253     case ARMMMUIdx_MSPriv:
    254         return arm_to_core_mmu_idx(ARMMMUIdx_MSUser);
    255     case ARMMMUIdx_MSUserNegPri:
    256     case ARMMMUIdx_MSPrivNegPri:
    257         return arm_to_core_mmu_idx(ARMMMUIdx_MSUserNegPri);
    258     default:
    259         g_assert_not_reached();
    260     }
    261 }
    262 
    263 /* The pc_curr difference for an architectural jump. */
    264 static target_long jmp_diff(DisasContext *s, target_long diff)
    265 {
    266     return diff + (s->thumb ? 4 : 8);
    267 }
    268 
    269 static void gen_pc_plus_diff(DisasContext *s, TCGv_i32 var, target_long diff)
    270 {
    271     assert(s->pc_save != -1);
    272     if (TARGET_TB_PCREL) {
    273         tcg_gen_addi_i32(var, cpu_R[15], (s->pc_curr - s->pc_save) + diff);
    274     } else {
    275         tcg_gen_movi_i32(var, s->pc_curr + diff);
    276     }
    277 }
    278 
    279 /* Set a variable to the value of a CPU register.  */
    280 void load_reg_var(DisasContext *s, TCGv_i32 var, int reg)
    281 {
    282     if (reg == 15) {
    283         gen_pc_plus_diff(s, var, jmp_diff(s, 0));
    284     } else {
    285         tcg_gen_mov_i32(var, cpu_R[reg]);
    286     }
    287 }
    288 
    289 /*
    290  * Create a new temp, REG + OFS, except PC is ALIGN(PC, 4).
    291  * This is used for load/store for which use of PC implies (literal),
    292  * or ADD that implies ADR.
    293  */
    294 TCGv_i32 add_reg_for_lit(DisasContext *s, int reg, int ofs)
    295 {
    296     TCGv_i32 tmp = tcg_temp_new_i32();
    297 
    298     if (reg == 15) {
    299         /*
    300          * This address is computed from an aligned PC:
    301          * subtract off the low bits.
    302          */
    303         gen_pc_plus_diff(s, tmp, jmp_diff(s, ofs - (s->pc_curr & 3)));
    304     } else {
    305         tcg_gen_addi_i32(tmp, cpu_R[reg], ofs);
    306     }
    307     return tmp;
    308 }
    309 
    310 /* Set a CPU register.  The source must be a temporary and will be
    311    marked as dead.  */
    312 void store_reg(DisasContext *s, int reg, TCGv_i32 var)
    313 {
    314     if (reg == 15) {
    315         /* In Thumb mode, we must ignore bit 0.
    316          * In ARM mode, for ARMv4 and ARMv5, it is UNPREDICTABLE if bits [1:0]
    317          * are not 0b00, but for ARMv6 and above, we must ignore bits [1:0].
    318          * We choose to ignore [1:0] in ARM mode for all architecture versions.
    319          */
    320         tcg_gen_andi_i32(var, var, s->thumb ? ~1 : ~3);
    321         s->base.is_jmp = DISAS_JUMP;
    322         s->pc_save = -1;
    323     } else if (reg == 13 && arm_dc_feature(s, ARM_FEATURE_M)) {
    324         /* For M-profile SP bits [1:0] are always zero */
    325         tcg_gen_andi_i32(var, var, ~3);
    326     }
    327     tcg_gen_mov_i32(cpu_R[reg], var);
    328     tcg_temp_free_i32(var);
    329 }
    330 
    331 /*
    332  * Variant of store_reg which applies v8M stack-limit checks before updating
    333  * SP. If the check fails this will result in an exception being taken.
    334  * We disable the stack checks for CONFIG_USER_ONLY because we have
    335  * no idea what the stack limits should be in that case.
    336  * If stack checking is not being done this just acts like store_reg().
    337  */
    338 static void store_sp_checked(DisasContext *s, TCGv_i32 var)
    339 {
    340 #ifndef CONFIG_USER_ONLY
    341     if (s->v8m_stackcheck) {
    342         gen_helper_v8m_stackcheck(cpu_env, var);
    343     }
    344 #endif
    345     store_reg(s, 13, var);
    346 }
    347 
    348 /* Value extensions.  */
    349 #define gen_uxtb(var) tcg_gen_ext8u_i32(var, var)
    350 #define gen_uxth(var) tcg_gen_ext16u_i32(var, var)
    351 #define gen_sxtb(var) tcg_gen_ext8s_i32(var, var)
    352 #define gen_sxth(var) tcg_gen_ext16s_i32(var, var)
    353 
    354 #define gen_sxtb16(var) gen_helper_sxtb16(var, var)
    355 #define gen_uxtb16(var) gen_helper_uxtb16(var, var)
    356 
    357 void gen_set_cpsr(TCGv_i32 var, uint32_t mask)
    358 {
    359     gen_helper_cpsr_write(cpu_env, var, tcg_constant_i32(mask));
    360 }
    361 
    362 static void gen_rebuild_hflags(DisasContext *s, bool new_el)
    363 {
    364     bool m_profile = arm_dc_feature(s, ARM_FEATURE_M);
    365 
    366     if (new_el) {
    367         if (m_profile) {
    368             gen_helper_rebuild_hflags_m32_newel(cpu_env);
    369         } else {
    370             gen_helper_rebuild_hflags_a32_newel(cpu_env);
    371         }
    372     } else {
    373         TCGv_i32 tcg_el = tcg_constant_i32(s->current_el);
    374         if (m_profile) {
    375             gen_helper_rebuild_hflags_m32(cpu_env, tcg_el);
    376         } else {
    377             gen_helper_rebuild_hflags_a32(cpu_env, tcg_el);
    378         }
    379     }
    380 }
    381 
    382 static void gen_exception_internal(int excp)
    383 {
    384     assert(excp_is_internal(excp));
    385     gen_helper_exception_internal(cpu_env, tcg_constant_i32(excp));
    386 }
    387 
    388 static void gen_singlestep_exception(DisasContext *s)
    389 {
    390     /* We just completed step of an insn. Move from Active-not-pending
    391      * to Active-pending, and then also take the swstep exception.
    392      * This corresponds to making the (IMPDEF) choice to prioritize
    393      * swstep exceptions over asynchronous exceptions taken to an exception
    394      * level where debug is disabled. This choice has the advantage that
    395      * we do not need to maintain internal state corresponding to the
    396      * ISV/EX syndrome bits between completion of the step and generation
    397      * of the exception, and our syndrome information is always correct.
    398      */
    399     gen_ss_advance(s);
    400     gen_swstep_exception(s, 1, s->is_ldex);
    401     s->base.is_jmp = DISAS_NORETURN;
    402 }
    403 
    404 void clear_eci_state(DisasContext *s)
    405 {
    406     /*
    407      * Clear any ECI/ICI state: used when a load multiple/store
    408      * multiple insn executes.
    409      */
    410     if (s->eci) {
    411         store_cpu_field_constant(0, condexec_bits);
    412         s->eci = 0;
    413     }
    414 }
    415 
    416 static void gen_smul_dual(TCGv_i32 a, TCGv_i32 b)
    417 {
    418     TCGv_i32 tmp1 = tcg_temp_new_i32();
    419     TCGv_i32 tmp2 = tcg_temp_new_i32();
    420     tcg_gen_ext16s_i32(tmp1, a);
    421     tcg_gen_ext16s_i32(tmp2, b);
    422     tcg_gen_mul_i32(tmp1, tmp1, tmp2);
    423     tcg_temp_free_i32(tmp2);
    424     tcg_gen_sari_i32(a, a, 16);
    425     tcg_gen_sari_i32(b, b, 16);
    426     tcg_gen_mul_i32(b, b, a);
    427     tcg_gen_mov_i32(a, tmp1);
    428     tcg_temp_free_i32(tmp1);
    429 }
    430 
    431 /* Byteswap each halfword.  */
    432 void gen_rev16(TCGv_i32 dest, TCGv_i32 var)
    433 {
    434     TCGv_i32 tmp = tcg_temp_new_i32();
    435     TCGv_i32 mask = tcg_constant_i32(0x00ff00ff);
    436     tcg_gen_shri_i32(tmp, var, 8);
    437     tcg_gen_and_i32(tmp, tmp, mask);
    438     tcg_gen_and_i32(var, var, mask);
    439     tcg_gen_shli_i32(var, var, 8);
    440     tcg_gen_or_i32(dest, var, tmp);
    441     tcg_temp_free_i32(tmp);
    442 }
    443 
    444 /* Byteswap low halfword and sign extend.  */
    445 static void gen_revsh(TCGv_i32 dest, TCGv_i32 var)
    446 {
    447     tcg_gen_bswap16_i32(var, var, TCG_BSWAP_OS);
    448 }
    449 
    450 /* Dual 16-bit add.  Result placed in t0 and t1 is marked as dead.
    451     tmp = (t0 ^ t1) & 0x8000;
    452     t0 &= ~0x8000;
    453     t1 &= ~0x8000;
    454     t0 = (t0 + t1) ^ tmp;
    455  */
    456 
    457 static void gen_add16(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1)
    458 {
    459     TCGv_i32 tmp = tcg_temp_new_i32();
    460     tcg_gen_xor_i32(tmp, t0, t1);
    461     tcg_gen_andi_i32(tmp, tmp, 0x8000);
    462     tcg_gen_andi_i32(t0, t0, ~0x8000);
    463     tcg_gen_andi_i32(t1, t1, ~0x8000);
    464     tcg_gen_add_i32(t0, t0, t1);
    465     tcg_gen_xor_i32(dest, t0, tmp);
    466     tcg_temp_free_i32(tmp);
    467 }
    468 
    469 /* Set N and Z flags from var.  */
    470 static inline void gen_logic_CC(TCGv_i32 var)
    471 {
    472     tcg_gen_mov_i32(cpu_NF, var);
    473     tcg_gen_mov_i32(cpu_ZF, var);
    474 }
    475 
    476 /* dest = T0 + T1 + CF. */
    477 static void gen_add_carry(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1)
    478 {
    479     tcg_gen_add_i32(dest, t0, t1);
    480     tcg_gen_add_i32(dest, dest, cpu_CF);
    481 }
    482 
    483 /* dest = T0 - T1 + CF - 1.  */
    484 static void gen_sub_carry(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1)
    485 {
    486     tcg_gen_sub_i32(dest, t0, t1);
    487     tcg_gen_add_i32(dest, dest, cpu_CF);
    488     tcg_gen_subi_i32(dest, dest, 1);
    489 }
    490 
    491 /* dest = T0 + T1. Compute C, N, V and Z flags */
    492 static void gen_add_CC(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1)
    493 {
    494     TCGv_i32 tmp = tcg_temp_new_i32();
    495     tcg_gen_movi_i32(tmp, 0);
    496     tcg_gen_add2_i32(cpu_NF, cpu_CF, t0, tmp, t1, tmp);
    497     tcg_gen_mov_i32(cpu_ZF, cpu_NF);
    498     tcg_gen_xor_i32(cpu_VF, cpu_NF, t0);
    499     tcg_gen_xor_i32(tmp, t0, t1);
    500     tcg_gen_andc_i32(cpu_VF, cpu_VF, tmp);
    501     tcg_temp_free_i32(tmp);
    502     tcg_gen_mov_i32(dest, cpu_NF);
    503 }
    504 
    505 /* dest = T0 + T1 + CF.  Compute C, N, V and Z flags */
    506 static void gen_adc_CC(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1)
    507 {
    508     TCGv_i32 tmp = tcg_temp_new_i32();
    509     if (TCG_TARGET_HAS_add2_i32) {
    510         tcg_gen_movi_i32(tmp, 0);
    511         tcg_gen_add2_i32(cpu_NF, cpu_CF, t0, tmp, cpu_CF, tmp);
    512         tcg_gen_add2_i32(cpu_NF, cpu_CF, cpu_NF, cpu_CF, t1, tmp);
    513     } else {
    514         TCGv_i64 q0 = tcg_temp_new_i64();
    515         TCGv_i64 q1 = tcg_temp_new_i64();
    516         tcg_gen_extu_i32_i64(q0, t0);
    517         tcg_gen_extu_i32_i64(q1, t1);
    518         tcg_gen_add_i64(q0, q0, q1);
    519         tcg_gen_extu_i32_i64(q1, cpu_CF);
    520         tcg_gen_add_i64(q0, q0, q1);
    521         tcg_gen_extr_i64_i32(cpu_NF, cpu_CF, q0);
    522         tcg_temp_free_i64(q0);
    523         tcg_temp_free_i64(q1);
    524     }
    525     tcg_gen_mov_i32(cpu_ZF, cpu_NF);
    526     tcg_gen_xor_i32(cpu_VF, cpu_NF, t0);
    527     tcg_gen_xor_i32(tmp, t0, t1);
    528     tcg_gen_andc_i32(cpu_VF, cpu_VF, tmp);
    529     tcg_temp_free_i32(tmp);
    530     tcg_gen_mov_i32(dest, cpu_NF);
    531 }
    532 
    533 /* dest = T0 - T1. Compute C, N, V and Z flags */
    534 static void gen_sub_CC(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1)
    535 {
    536     TCGv_i32 tmp;
    537     tcg_gen_sub_i32(cpu_NF, t0, t1);
    538     tcg_gen_mov_i32(cpu_ZF, cpu_NF);
    539     tcg_gen_setcond_i32(TCG_COND_GEU, cpu_CF, t0, t1);
    540     tcg_gen_xor_i32(cpu_VF, cpu_NF, t0);
    541     tmp = tcg_temp_new_i32();
    542     tcg_gen_xor_i32(tmp, t0, t1);
    543     tcg_gen_and_i32(cpu_VF, cpu_VF, tmp);
    544     tcg_temp_free_i32(tmp);
    545     tcg_gen_mov_i32(dest, cpu_NF);
    546 }
    547 
    548 /* dest = T0 + ~T1 + CF.  Compute C, N, V and Z flags */
    549 static void gen_sbc_CC(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1)
    550 {
    551     TCGv_i32 tmp = tcg_temp_new_i32();
    552     tcg_gen_not_i32(tmp, t1);
    553     gen_adc_CC(dest, t0, tmp);
    554     tcg_temp_free_i32(tmp);
    555 }
    556 
    557 #define GEN_SHIFT(name)                                               \
    558 static void gen_##name(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1)       \
    559 {                                                                     \
    560     TCGv_i32 tmpd = tcg_temp_new_i32();                               \
    561     TCGv_i32 tmp1 = tcg_temp_new_i32();                               \
    562     TCGv_i32 zero = tcg_constant_i32(0);                              \
    563     tcg_gen_andi_i32(tmp1, t1, 0x1f);                                 \
    564     tcg_gen_##name##_i32(tmpd, t0, tmp1);                             \
    565     tcg_gen_andi_i32(tmp1, t1, 0xe0);                                 \
    566     tcg_gen_movcond_i32(TCG_COND_NE, dest, tmp1, zero, zero, tmpd);   \
    567     tcg_temp_free_i32(tmpd);                                          \
    568     tcg_temp_free_i32(tmp1);                                          \
    569 }
    570 GEN_SHIFT(shl)
    571 GEN_SHIFT(shr)
    572 #undef GEN_SHIFT
    573 
    574 static void gen_sar(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1)
    575 {
    576     TCGv_i32 tmp1 = tcg_temp_new_i32();
    577 
    578     tcg_gen_andi_i32(tmp1, t1, 0xff);
    579     tcg_gen_umin_i32(tmp1, tmp1, tcg_constant_i32(31));
    580     tcg_gen_sar_i32(dest, t0, tmp1);
    581     tcg_temp_free_i32(tmp1);
    582 }
    583 
    584 static void shifter_out_im(TCGv_i32 var, int shift)
    585 {
    586     tcg_gen_extract_i32(cpu_CF, var, shift, 1);
    587 }
    588 
    589 /* Shift by immediate.  Includes special handling for shift == 0.  */
    590 static inline void gen_arm_shift_im(TCGv_i32 var, int shiftop,
    591                                     int shift, int flags)
    592 {
    593     switch (shiftop) {
    594     case 0: /* LSL */
    595         if (shift != 0) {
    596             if (flags)
    597                 shifter_out_im(var, 32 - shift);
    598             tcg_gen_shli_i32(var, var, shift);
    599         }
    600         break;
    601     case 1: /* LSR */
    602         if (shift == 0) {
    603             if (flags) {
    604                 tcg_gen_shri_i32(cpu_CF, var, 31);
    605             }
    606             tcg_gen_movi_i32(var, 0);
    607         } else {
    608             if (flags)
    609                 shifter_out_im(var, shift - 1);
    610             tcg_gen_shri_i32(var, var, shift);
    611         }
    612         break;
    613     case 2: /* ASR */
    614         if (shift == 0)
    615             shift = 32;
    616         if (flags)
    617             shifter_out_im(var, shift - 1);
    618         if (shift == 32)
    619           shift = 31;
    620         tcg_gen_sari_i32(var, var, shift);
    621         break;
    622     case 3: /* ROR/RRX */
    623         if (shift != 0) {
    624             if (flags)
    625                 shifter_out_im(var, shift - 1);
    626             tcg_gen_rotri_i32(var, var, shift); break;
    627         } else {
    628             TCGv_i32 tmp = tcg_temp_new_i32();
    629             tcg_gen_shli_i32(tmp, cpu_CF, 31);
    630             if (flags)
    631                 shifter_out_im(var, 0);
    632             tcg_gen_shri_i32(var, var, 1);
    633             tcg_gen_or_i32(var, var, tmp);
    634             tcg_temp_free_i32(tmp);
    635         }
    636     }
    637 };
    638 
    639 static inline void gen_arm_shift_reg(TCGv_i32 var, int shiftop,
    640                                      TCGv_i32 shift, int flags)
    641 {
    642     if (flags) {
    643         switch (shiftop) {
    644         case 0: gen_helper_shl_cc(var, cpu_env, var, shift); break;
    645         case 1: gen_helper_shr_cc(var, cpu_env, var, shift); break;
    646         case 2: gen_helper_sar_cc(var, cpu_env, var, shift); break;
    647         case 3: gen_helper_ror_cc(var, cpu_env, var, shift); break;
    648         }
    649     } else {
    650         switch (shiftop) {
    651         case 0:
    652             gen_shl(var, var, shift);
    653             break;
    654         case 1:
    655             gen_shr(var, var, shift);
    656             break;
    657         case 2:
    658             gen_sar(var, var, shift);
    659             break;
    660         case 3: tcg_gen_andi_i32(shift, shift, 0x1f);
    661                 tcg_gen_rotr_i32(var, var, shift); break;
    662         }
    663     }
    664     tcg_temp_free_i32(shift);
    665 }
    666 
    667 /*
    668  * Generate a conditional based on ARM condition code cc.
    669  * This is common between ARM and Aarch64 targets.
    670  */
    671 void arm_test_cc(DisasCompare *cmp, int cc)
    672 {
    673     TCGv_i32 value;
    674     TCGCond cond;
    675     bool global = true;
    676 
    677     switch (cc) {
    678     case 0: /* eq: Z */
    679     case 1: /* ne: !Z */
    680         cond = TCG_COND_EQ;
    681         value = cpu_ZF;
    682         break;
    683 
    684     case 2: /* cs: C */
    685     case 3: /* cc: !C */
    686         cond = TCG_COND_NE;
    687         value = cpu_CF;
    688         break;
    689 
    690     case 4: /* mi: N */
    691     case 5: /* pl: !N */
    692         cond = TCG_COND_LT;
    693         value = cpu_NF;
    694         break;
    695 
    696     case 6: /* vs: V */
    697     case 7: /* vc: !V */
    698         cond = TCG_COND_LT;
    699         value = cpu_VF;
    700         break;
    701 
    702     case 8: /* hi: C && !Z */
    703     case 9: /* ls: !C || Z -> !(C && !Z) */
    704         cond = TCG_COND_NE;
    705         value = tcg_temp_new_i32();
    706         global = false;
    707         /* CF is 1 for C, so -CF is an all-bits-set mask for C;
    708            ZF is non-zero for !Z; so AND the two subexpressions.  */
    709         tcg_gen_neg_i32(value, cpu_CF);
    710         tcg_gen_and_i32(value, value, cpu_ZF);
    711         break;
    712 
    713     case 10: /* ge: N == V -> N ^ V == 0 */
    714     case 11: /* lt: N != V -> N ^ V != 0 */
    715         /* Since we're only interested in the sign bit, == 0 is >= 0.  */
    716         cond = TCG_COND_GE;
    717         value = tcg_temp_new_i32();
    718         global = false;
    719         tcg_gen_xor_i32(value, cpu_VF, cpu_NF);
    720         break;
    721 
    722     case 12: /* gt: !Z && N == V */
    723     case 13: /* le: Z || N != V */
    724         cond = TCG_COND_NE;
    725         value = tcg_temp_new_i32();
    726         global = false;
    727         /* (N == V) is equal to the sign bit of ~(NF ^ VF).  Propagate
    728          * the sign bit then AND with ZF to yield the result.  */
    729         tcg_gen_xor_i32(value, cpu_VF, cpu_NF);
    730         tcg_gen_sari_i32(value, value, 31);
    731         tcg_gen_andc_i32(value, cpu_ZF, value);
    732         break;
    733 
    734     case 14: /* always */
    735     case 15: /* always */
    736         /* Use the ALWAYS condition, which will fold early.
    737          * It doesn't matter what we use for the value.  */
    738         cond = TCG_COND_ALWAYS;
    739         value = cpu_ZF;
    740         goto no_invert;
    741 
    742     default:
    743         fprintf(stderr, "Bad condition code 0x%x\n", cc);
    744         abort();
    745     }
    746 
    747     if (cc & 1) {
    748         cond = tcg_invert_cond(cond);
    749     }
    750 
    751  no_invert:
    752     cmp->cond = cond;
    753     cmp->value = value;
    754     cmp->value_global = global;
    755 }
    756 
    757 void arm_free_cc(DisasCompare *cmp)
    758 {
    759     if (!cmp->value_global) {
    760         tcg_temp_free_i32(cmp->value);
    761     }
    762 }
    763 
    764 void arm_jump_cc(DisasCompare *cmp, TCGLabel *label)
    765 {
    766     tcg_gen_brcondi_i32(cmp->cond, cmp->value, 0, label);
    767 }
    768 
    769 void arm_gen_test_cc(int cc, TCGLabel *label)
    770 {
    771     DisasCompare cmp;
    772     arm_test_cc(&cmp, cc);
    773     arm_jump_cc(&cmp, label);
    774     arm_free_cc(&cmp);
    775 }
    776 
    777 void gen_set_condexec(DisasContext *s)
    778 {
    779     if (s->condexec_mask) {
    780         uint32_t val = (s->condexec_cond << 4) | (s->condexec_mask >> 1);
    781 
    782         store_cpu_field_constant(val, condexec_bits);
    783     }
    784 }
    785 
    786 void gen_update_pc(DisasContext *s, target_long diff)
    787 {
    788     gen_pc_plus_diff(s, cpu_R[15], diff);
    789     s->pc_save = s->pc_curr + diff;
    790 }
    791 
    792 /* Set PC and Thumb state from var.  var is marked as dead.  */
    793 static inline void gen_bx(DisasContext *s, TCGv_i32 var)
    794 {
    795     s->base.is_jmp = DISAS_JUMP;
    796     tcg_gen_andi_i32(cpu_R[15], var, ~1);
    797     tcg_gen_andi_i32(var, var, 1);
    798     store_cpu_field(var, thumb);
    799     s->pc_save = -1;
    800 }
    801 
    802 /*
    803  * Set PC and Thumb state from var. var is marked as dead.
    804  * For M-profile CPUs, include logic to detect exception-return
    805  * branches and handle them. This is needed for Thumb POP/LDM to PC, LDR to PC,
    806  * and BX reg, and no others, and happens only for code in Handler mode.
    807  * The Security Extension also requires us to check for the FNC_RETURN
    808  * which signals a function return from non-secure state; this can happen
    809  * in both Handler and Thread mode.
    810  * To avoid having to do multiple comparisons in inline generated code,
    811  * we make the check we do here loose, so it will match for EXC_RETURN
    812  * in Thread mode. For system emulation do_v7m_exception_exit() checks
    813  * for these spurious cases and returns without doing anything (giving
    814  * the same behaviour as for a branch to a non-magic address).
    815  *
    816  * In linux-user mode it is unclear what the right behaviour for an
    817  * attempted FNC_RETURN should be, because in real hardware this will go
    818  * directly to Secure code (ie not the Linux kernel) which will then treat
    819  * the error in any way it chooses. For QEMU we opt to make the FNC_RETURN
    820  * attempt behave the way it would on a CPU without the security extension,
    821  * which is to say "like a normal branch". That means we can simply treat
    822  * all branches as normal with no magic address behaviour.
    823  */
    824 static inline void gen_bx_excret(DisasContext *s, TCGv_i32 var)
    825 {
    826     /* Generate the same code here as for a simple bx, but flag via
    827      * s->base.is_jmp that we need to do the rest of the work later.
    828      */
    829     gen_bx(s, var);
    830 #ifndef CONFIG_USER_ONLY
    831     if (arm_dc_feature(s, ARM_FEATURE_M_SECURITY) ||
    832         (s->v7m_handler_mode && arm_dc_feature(s, ARM_FEATURE_M))) {
    833         s->base.is_jmp = DISAS_BX_EXCRET;
    834     }
    835 #endif
    836 }
    837 
    838 static inline void gen_bx_excret_final_code(DisasContext *s)
    839 {
    840     /* Generate the code to finish possible exception return and end the TB */
    841     DisasLabel excret_label = gen_disas_label(s);
    842     uint32_t min_magic;
    843 
    844     if (arm_dc_feature(s, ARM_FEATURE_M_SECURITY)) {
    845         /* Covers FNC_RETURN and EXC_RETURN magic */
    846         min_magic = FNC_RETURN_MIN_MAGIC;
    847     } else {
    848         /* EXC_RETURN magic only */
    849         min_magic = EXC_RETURN_MIN_MAGIC;
    850     }
    851 
    852     /* Is the new PC value in the magic range indicating exception return? */
    853     tcg_gen_brcondi_i32(TCG_COND_GEU, cpu_R[15], min_magic, excret_label.label);
    854     /* No: end the TB as we would for a DISAS_JMP */
    855     if (s->ss_active) {
    856         gen_singlestep_exception(s);
    857     } else {
    858         tcg_gen_exit_tb(NULL, 0);
    859     }
    860     set_disas_label(s, excret_label);
    861     /* Yes: this is an exception return.
    862      * At this point in runtime env->regs[15] and env->thumb will hold
    863      * the exception-return magic number, which do_v7m_exception_exit()
    864      * will read. Nothing else will be able to see those values because
    865      * the cpu-exec main loop guarantees that we will always go straight
    866      * from raising the exception to the exception-handling code.
    867      *
    868      * gen_ss_advance(s) does nothing on M profile currently but
    869      * calling it is conceptually the right thing as we have executed
    870      * this instruction (compare SWI, HVC, SMC handling).
    871      */
    872     gen_ss_advance(s);
    873     gen_exception_internal(EXCP_EXCEPTION_EXIT);
    874 }
    875 
    876 static inline void gen_bxns(DisasContext *s, int rm)
    877 {
    878     TCGv_i32 var = load_reg(s, rm);
    879 
    880     /* The bxns helper may raise an EXCEPTION_EXIT exception, so in theory
    881      * we need to sync state before calling it, but:
    882      *  - we don't need to do gen_update_pc() because the bxns helper will
    883      *    always set the PC itself
    884      *  - we don't need to do gen_set_condexec() because BXNS is UNPREDICTABLE
    885      *    unless it's outside an IT block or the last insn in an IT block,
    886      *    so we know that condexec == 0 (already set at the top of the TB)
    887      *    is correct in the non-UNPREDICTABLE cases, and we can choose
    888      *    "zeroes the IT bits" as our UNPREDICTABLE behaviour otherwise.
    889      */
    890     gen_helper_v7m_bxns(cpu_env, var);
    891     tcg_temp_free_i32(var);
    892     s->base.is_jmp = DISAS_EXIT;
    893 }
    894 
    895 static inline void gen_blxns(DisasContext *s, int rm)
    896 {
    897     TCGv_i32 var = load_reg(s, rm);
    898 
    899     /* We don't need to sync condexec state, for the same reason as bxns.
    900      * We do however need to set the PC, because the blxns helper reads it.
    901      * The blxns helper may throw an exception.
    902      */
    903     gen_update_pc(s, curr_insn_len(s));
    904     gen_helper_v7m_blxns(cpu_env, var);
    905     tcg_temp_free_i32(var);
    906     s->base.is_jmp = DISAS_EXIT;
    907 }
    908 
    909 /* Variant of store_reg which uses branch&exchange logic when storing
    910    to r15 in ARM architecture v7 and above. The source must be a temporary
    911    and will be marked as dead. */
    912 static inline void store_reg_bx(DisasContext *s, int reg, TCGv_i32 var)
    913 {
    914     if (reg == 15 && ENABLE_ARCH_7) {
    915         gen_bx(s, var);
    916     } else {
    917         store_reg(s, reg, var);
    918     }
    919 }
    920 
    921 /* Variant of store_reg which uses branch&exchange logic when storing
    922  * to r15 in ARM architecture v5T and above. This is used for storing
    923  * the results of a LDR/LDM/POP into r15, and corresponds to the cases
    924  * in the ARM ARM which use the LoadWritePC() pseudocode function. */
    925 static inline void store_reg_from_load(DisasContext *s, int reg, TCGv_i32 var)
    926 {
    927     if (reg == 15 && ENABLE_ARCH_5) {
    928         gen_bx_excret(s, var);
    929     } else {
    930         store_reg(s, reg, var);
    931     }
    932 }
    933 
    934 #ifdef CONFIG_USER_ONLY
    935 #define IS_USER_ONLY 1
    936 #else
    937 #define IS_USER_ONLY 0
    938 #endif
    939 
    940 MemOp pow2_align(unsigned i)
    941 {
    942     static const MemOp mop_align[] = {
    943         0, MO_ALIGN_2, MO_ALIGN_4, MO_ALIGN_8, MO_ALIGN_16,
    944         /*
    945          * FIXME: TARGET_PAGE_BITS_MIN affects TLB_FLAGS_MASK such
    946          * that 256-bit alignment (MO_ALIGN_32) cannot be supported:
    947          * see get_alignment_bits(). Enforce only 128-bit alignment for now.
    948          */
    949         MO_ALIGN_16
    950     };
    951     g_assert(i < ARRAY_SIZE(mop_align));
    952     return mop_align[i];
    953 }
    954 
    955 /*
    956  * Abstractions of "generate code to do a guest load/store for
    957  * AArch32", where a vaddr is always 32 bits (and is zero
    958  * extended if we're a 64 bit core) and  data is also
    959  * 32 bits unless specifically doing a 64 bit access.
    960  * These functions work like tcg_gen_qemu_{ld,st}* except
    961  * that the address argument is TCGv_i32 rather than TCGv.
    962  */
    963 
    964 static TCGv gen_aa32_addr(DisasContext *s, TCGv_i32 a32, MemOp op)
    965 {
    966     TCGv addr = tcg_temp_new();
    967     tcg_gen_extu_i32_tl(addr, a32);
    968 
    969     /* Not needed for user-mode BE32, where we use MO_BE instead.  */
    970     if (!IS_USER_ONLY && s->sctlr_b && (op & MO_SIZE) < MO_32) {
    971         tcg_gen_xori_tl(addr, addr, 4 - (1 << (op & MO_SIZE)));
    972     }
    973     return addr;
    974 }
    975 
    976 /*
    977  * Internal routines are used for NEON cases where the endianness
    978  * and/or alignment has already been taken into account and manipulated.
    979  */
    980 void gen_aa32_ld_internal_i32(DisasContext *s, TCGv_i32 val,
    981                               TCGv_i32 a32, int index, MemOp opc)
    982 {
    983     TCGv addr = gen_aa32_addr(s, a32, opc);
    984     tcg_gen_qemu_ld_i32(val, addr, index, opc);
    985     tcg_temp_free(addr);
    986 }
    987 
    988 void gen_aa32_st_internal_i32(DisasContext *s, TCGv_i32 val,
    989                               TCGv_i32 a32, int index, MemOp opc)
    990 {
    991     TCGv addr = gen_aa32_addr(s, a32, opc);
    992     tcg_gen_qemu_st_i32(val, addr, index, opc);
    993     tcg_temp_free(addr);
    994 }
    995 
    996 void gen_aa32_ld_internal_i64(DisasContext *s, TCGv_i64 val,
    997                               TCGv_i32 a32, int index, MemOp opc)
    998 {
    999     TCGv addr = gen_aa32_addr(s, a32, opc);
   1000 
   1001     tcg_gen_qemu_ld_i64(val, addr, index, opc);
   1002 
   1003     /* Not needed for user-mode BE32, where we use MO_BE instead.  */
   1004     if (!IS_USER_ONLY && s->sctlr_b && (opc & MO_SIZE) == MO_64) {
   1005         tcg_gen_rotri_i64(val, val, 32);
   1006     }
   1007     tcg_temp_free(addr);
   1008 }
   1009 
   1010 void gen_aa32_st_internal_i64(DisasContext *s, TCGv_i64 val,
   1011                               TCGv_i32 a32, int index, MemOp opc)
   1012 {
   1013     TCGv addr = gen_aa32_addr(s, a32, opc);
   1014 
   1015     /* Not needed for user-mode BE32, where we use MO_BE instead.  */
   1016     if (!IS_USER_ONLY && s->sctlr_b && (opc & MO_SIZE) == MO_64) {
   1017         TCGv_i64 tmp = tcg_temp_new_i64();
   1018         tcg_gen_rotri_i64(tmp, val, 32);
   1019         tcg_gen_qemu_st_i64(tmp, addr, index, opc);
   1020         tcg_temp_free_i64(tmp);
   1021     } else {
   1022         tcg_gen_qemu_st_i64(val, addr, index, opc);
   1023     }
   1024     tcg_temp_free(addr);
   1025 }
   1026 
   1027 void gen_aa32_ld_i32(DisasContext *s, TCGv_i32 val, TCGv_i32 a32,
   1028                      int index, MemOp opc)
   1029 {
   1030     gen_aa32_ld_internal_i32(s, val, a32, index, finalize_memop(s, opc));
   1031 }
   1032 
   1033 void gen_aa32_st_i32(DisasContext *s, TCGv_i32 val, TCGv_i32 a32,
   1034                      int index, MemOp opc)
   1035 {
   1036     gen_aa32_st_internal_i32(s, val, a32, index, finalize_memop(s, opc));
   1037 }
   1038 
   1039 void gen_aa32_ld_i64(DisasContext *s, TCGv_i64 val, TCGv_i32 a32,
   1040                      int index, MemOp opc)
   1041 {
   1042     gen_aa32_ld_internal_i64(s, val, a32, index, finalize_memop(s, opc));
   1043 }
   1044 
   1045 void gen_aa32_st_i64(DisasContext *s, TCGv_i64 val, TCGv_i32 a32,
   1046                      int index, MemOp opc)
   1047 {
   1048     gen_aa32_st_internal_i64(s, val, a32, index, finalize_memop(s, opc));
   1049 }
   1050 
   1051 #define DO_GEN_LD(SUFF, OPC)                                            \
   1052     static inline void gen_aa32_ld##SUFF(DisasContext *s, TCGv_i32 val, \
   1053                                          TCGv_i32 a32, int index)       \
   1054     {                                                                   \
   1055         gen_aa32_ld_i32(s, val, a32, index, OPC);                       \
   1056     }
   1057 
   1058 #define DO_GEN_ST(SUFF, OPC)                                            \
   1059     static inline void gen_aa32_st##SUFF(DisasContext *s, TCGv_i32 val, \
   1060                                          TCGv_i32 a32, int index)       \
   1061     {                                                                   \
   1062         gen_aa32_st_i32(s, val, a32, index, OPC);                       \
   1063     }
   1064 
   1065 static inline void gen_hvc(DisasContext *s, int imm16)
   1066 {
   1067     /* The pre HVC helper handles cases when HVC gets trapped
   1068      * as an undefined insn by runtime configuration (ie before
   1069      * the insn really executes).
   1070      */
   1071     gen_update_pc(s, 0);
   1072     gen_helper_pre_hvc(cpu_env);
   1073     /* Otherwise we will treat this as a real exception which
   1074      * happens after execution of the insn. (The distinction matters
   1075      * for the PC value reported to the exception handler and also
   1076      * for single stepping.)
   1077      */
   1078     s->svc_imm = imm16;
   1079     gen_update_pc(s, curr_insn_len(s));
   1080     s->base.is_jmp = DISAS_HVC;
   1081 }
   1082 
   1083 static inline void gen_smc(DisasContext *s)
   1084 {
   1085     /* As with HVC, we may take an exception either before or after
   1086      * the insn executes.
   1087      */
   1088     gen_update_pc(s, 0);
   1089     gen_helper_pre_smc(cpu_env, tcg_constant_i32(syn_aa32_smc()));
   1090     gen_update_pc(s, curr_insn_len(s));
   1091     s->base.is_jmp = DISAS_SMC;
   1092 }
   1093 
   1094 static void gen_exception_internal_insn(DisasContext *s, int excp)
   1095 {
   1096     gen_set_condexec(s);
   1097     gen_update_pc(s, 0);
   1098     gen_exception_internal(excp);
   1099     s->base.is_jmp = DISAS_NORETURN;
   1100 }
   1101 
   1102 static void gen_exception_el_v(int excp, uint32_t syndrome, TCGv_i32 tcg_el)
   1103 {
   1104     gen_helper_exception_with_syndrome_el(cpu_env, tcg_constant_i32(excp),
   1105                                           tcg_constant_i32(syndrome), tcg_el);
   1106 }
   1107 
   1108 static void gen_exception_el(int excp, uint32_t syndrome, uint32_t target_el)
   1109 {
   1110     gen_exception_el_v(excp, syndrome, tcg_constant_i32(target_el));
   1111 }
   1112 
   1113 static void gen_exception(int excp, uint32_t syndrome)
   1114 {
   1115     gen_helper_exception_with_syndrome(cpu_env, tcg_constant_i32(excp),
   1116                                        tcg_constant_i32(syndrome));
   1117 }
   1118 
   1119 static void gen_exception_insn_el_v(DisasContext *s, target_long pc_diff,
   1120                                     int excp, uint32_t syn, TCGv_i32 tcg_el)
   1121 {
   1122     if (s->aarch64) {
   1123         gen_a64_update_pc(s, pc_diff);
   1124     } else {
   1125         gen_set_condexec(s);
   1126         gen_update_pc(s, pc_diff);
   1127     }
   1128     gen_exception_el_v(excp, syn, tcg_el);
   1129     s->base.is_jmp = DISAS_NORETURN;
   1130 }
   1131 
   1132 void gen_exception_insn_el(DisasContext *s, target_long pc_diff, int excp,
   1133                            uint32_t syn, uint32_t target_el)
   1134 {
   1135     gen_exception_insn_el_v(s, pc_diff, excp, syn,
   1136                             tcg_constant_i32(target_el));
   1137 }
   1138 
   1139 void gen_exception_insn(DisasContext *s, target_long pc_diff,
   1140                         int excp, uint32_t syn)
   1141 {
   1142     if (s->aarch64) {
   1143         gen_a64_update_pc(s, pc_diff);
   1144     } else {
   1145         gen_set_condexec(s);
   1146         gen_update_pc(s, pc_diff);
   1147     }
   1148     gen_exception(excp, syn);
   1149     s->base.is_jmp = DISAS_NORETURN;
   1150 }
   1151 
   1152 static void gen_exception_bkpt_insn(DisasContext *s, uint32_t syn)
   1153 {
   1154     gen_set_condexec(s);
   1155     gen_update_pc(s, 0);
   1156     gen_helper_exception_bkpt_insn(cpu_env, tcg_constant_i32(syn));
   1157     s->base.is_jmp = DISAS_NORETURN;
   1158 }
   1159 
   1160 void unallocated_encoding(DisasContext *s)
   1161 {
   1162     /* Unallocated and reserved encodings are uncategorized */
   1163     gen_exception_insn(s, 0, EXCP_UDEF, syn_uncategorized());
   1164 }
   1165 
   1166 /* Force a TB lookup after an instruction that changes the CPU state.  */
   1167 void gen_lookup_tb(DisasContext *s)
   1168 {
   1169     gen_pc_plus_diff(s, cpu_R[15], curr_insn_len(s));
   1170     s->base.is_jmp = DISAS_EXIT;
   1171 }
   1172 
   1173 static inline void gen_hlt(DisasContext *s, int imm)
   1174 {
   1175     /* HLT. This has two purposes.
   1176      * Architecturally, it is an external halting debug instruction.
   1177      * Since QEMU doesn't implement external debug, we treat this as
   1178      * it is required for halting debug disabled: it will UNDEF.
   1179      * Secondly, "HLT 0x3C" is a T32 semihosting trap instruction,
   1180      * and "HLT 0xF000" is an A32 semihosting syscall. These traps
   1181      * must trigger semihosting even for ARMv7 and earlier, where
   1182      * HLT was an undefined encoding.
   1183      * In system mode, we don't allow userspace access to
   1184      * semihosting, to provide some semblance of security
   1185      * (and for consistency with our 32-bit semihosting).
   1186      */
   1187     if (semihosting_enabled(s->current_el != 0) &&
   1188         (imm == (s->thumb ? 0x3c : 0xf000))) {
   1189         gen_exception_internal_insn(s, EXCP_SEMIHOST);
   1190         return;
   1191     }
   1192 
   1193     unallocated_encoding(s);
   1194 }
   1195 
   1196 /*
   1197  * Return the offset of a "full" NEON Dreg.
   1198  */
   1199 long neon_full_reg_offset(unsigned reg)
   1200 {
   1201     return offsetof(CPUARMState, vfp.zregs[reg >> 1].d[reg & 1]);
   1202 }
   1203 
   1204 /*
   1205  * Return the offset of a 2**SIZE piece of a NEON register, at index ELE,
   1206  * where 0 is the least significant end of the register.
   1207  */
   1208 long neon_element_offset(int reg, int element, MemOp memop)
   1209 {
   1210     int element_size = 1 << (memop & MO_SIZE);
   1211     int ofs = element * element_size;
   1212 #if HOST_BIG_ENDIAN
   1213     /*
   1214      * Calculate the offset assuming fully little-endian,
   1215      * then XOR to account for the order of the 8-byte units.
   1216      */
   1217     if (element_size < 8) {
   1218         ofs ^= 8 - element_size;
   1219     }
   1220 #endif
   1221     return neon_full_reg_offset(reg) + ofs;
   1222 }
   1223 
   1224 /* Return the offset of a VFP Dreg (dp = true) or VFP Sreg (dp = false). */
   1225 long vfp_reg_offset(bool dp, unsigned reg)
   1226 {
   1227     if (dp) {
   1228         return neon_element_offset(reg, 0, MO_64);
   1229     } else {
   1230         return neon_element_offset(reg >> 1, reg & 1, MO_32);
   1231     }
   1232 }
   1233 
   1234 void read_neon_element32(TCGv_i32 dest, int reg, int ele, MemOp memop)
   1235 {
   1236     long off = neon_element_offset(reg, ele, memop);
   1237 
   1238     switch (memop) {
   1239     case MO_SB:
   1240         tcg_gen_ld8s_i32(dest, cpu_env, off);
   1241         break;
   1242     case MO_UB:
   1243         tcg_gen_ld8u_i32(dest, cpu_env, off);
   1244         break;
   1245     case MO_SW:
   1246         tcg_gen_ld16s_i32(dest, cpu_env, off);
   1247         break;
   1248     case MO_UW:
   1249         tcg_gen_ld16u_i32(dest, cpu_env, off);
   1250         break;
   1251     case MO_UL:
   1252     case MO_SL:
   1253         tcg_gen_ld_i32(dest, cpu_env, off);
   1254         break;
   1255     default:
   1256         g_assert_not_reached();
   1257     }
   1258 }
   1259 
   1260 void read_neon_element64(TCGv_i64 dest, int reg, int ele, MemOp memop)
   1261 {
   1262     long off = neon_element_offset(reg, ele, memop);
   1263 
   1264     switch (memop) {
   1265     case MO_SL:
   1266         tcg_gen_ld32s_i64(dest, cpu_env, off);
   1267         break;
   1268     case MO_UL:
   1269         tcg_gen_ld32u_i64(dest, cpu_env, off);
   1270         break;
   1271     case MO_UQ:
   1272         tcg_gen_ld_i64(dest, cpu_env, off);
   1273         break;
   1274     default:
   1275         g_assert_not_reached();
   1276     }
   1277 }
   1278 
   1279 void write_neon_element32(TCGv_i32 src, int reg, int ele, MemOp memop)
   1280 {
   1281     long off = neon_element_offset(reg, ele, memop);
   1282 
   1283     switch (memop) {
   1284     case MO_8:
   1285         tcg_gen_st8_i32(src, cpu_env, off);
   1286         break;
   1287     case MO_16:
   1288         tcg_gen_st16_i32(src, cpu_env, off);
   1289         break;
   1290     case MO_32:
   1291         tcg_gen_st_i32(src, cpu_env, off);
   1292         break;
   1293     default:
   1294         g_assert_not_reached();
   1295     }
   1296 }
   1297 
   1298 void write_neon_element64(TCGv_i64 src, int reg, int ele, MemOp memop)
   1299 {
   1300     long off = neon_element_offset(reg, ele, memop);
   1301 
   1302     switch (memop) {
   1303     case MO_32:
   1304         tcg_gen_st32_i64(src, cpu_env, off);
   1305         break;
   1306     case MO_64:
   1307         tcg_gen_st_i64(src, cpu_env, off);
   1308         break;
   1309     default:
   1310         g_assert_not_reached();
   1311     }
   1312 }
   1313 
   1314 #define ARM_CP_RW_BIT   (1 << 20)
   1315 
   1316 static inline void iwmmxt_load_reg(TCGv_i64 var, int reg)
   1317 {
   1318     tcg_gen_ld_i64(var, cpu_env, offsetof(CPUARMState, iwmmxt.regs[reg]));
   1319 }
   1320 
   1321 static inline void iwmmxt_store_reg(TCGv_i64 var, int reg)
   1322 {
   1323     tcg_gen_st_i64(var, cpu_env, offsetof(CPUARMState, iwmmxt.regs[reg]));
   1324 }
   1325 
   1326 static inline TCGv_i32 iwmmxt_load_creg(int reg)
   1327 {
   1328     TCGv_i32 var = tcg_temp_new_i32();
   1329     tcg_gen_ld_i32(var, cpu_env, offsetof(CPUARMState, iwmmxt.cregs[reg]));
   1330     return var;
   1331 }
   1332 
   1333 static inline void iwmmxt_store_creg(int reg, TCGv_i32 var)
   1334 {
   1335     tcg_gen_st_i32(var, cpu_env, offsetof(CPUARMState, iwmmxt.cregs[reg]));
   1336     tcg_temp_free_i32(var);
   1337 }
   1338 
   1339 static inline void gen_op_iwmmxt_movq_wRn_M0(int rn)
   1340 {
   1341     iwmmxt_store_reg(cpu_M0, rn);
   1342 }
   1343 
   1344 static inline void gen_op_iwmmxt_movq_M0_wRn(int rn)
   1345 {
   1346     iwmmxt_load_reg(cpu_M0, rn);
   1347 }
   1348 
   1349 static inline void gen_op_iwmmxt_orq_M0_wRn(int rn)
   1350 {
   1351     iwmmxt_load_reg(cpu_V1, rn);
   1352     tcg_gen_or_i64(cpu_M0, cpu_M0, cpu_V1);
   1353 }
   1354 
   1355 static inline void gen_op_iwmmxt_andq_M0_wRn(int rn)
   1356 {
   1357     iwmmxt_load_reg(cpu_V1, rn);
   1358     tcg_gen_and_i64(cpu_M0, cpu_M0, cpu_V1);
   1359 }
   1360 
   1361 static inline void gen_op_iwmmxt_xorq_M0_wRn(int rn)
   1362 {
   1363     iwmmxt_load_reg(cpu_V1, rn);
   1364     tcg_gen_xor_i64(cpu_M0, cpu_M0, cpu_V1);
   1365 }
   1366 
   1367 #define IWMMXT_OP(name) \
   1368 static inline void gen_op_iwmmxt_##name##_M0_wRn(int rn) \
   1369 { \
   1370     iwmmxt_load_reg(cpu_V1, rn); \
   1371     gen_helper_iwmmxt_##name(cpu_M0, cpu_M0, cpu_V1); \
   1372 }
   1373 
   1374 #define IWMMXT_OP_ENV(name) \
   1375 static inline void gen_op_iwmmxt_##name##_M0_wRn(int rn) \
   1376 { \
   1377     iwmmxt_load_reg(cpu_V1, rn); \
   1378     gen_helper_iwmmxt_##name(cpu_M0, cpu_env, cpu_M0, cpu_V1); \
   1379 }
   1380 
   1381 #define IWMMXT_OP_ENV_SIZE(name) \
   1382 IWMMXT_OP_ENV(name##b) \
   1383 IWMMXT_OP_ENV(name##w) \
   1384 IWMMXT_OP_ENV(name##l)
   1385 
   1386 #define IWMMXT_OP_ENV1(name) \
   1387 static inline void gen_op_iwmmxt_##name##_M0(void) \
   1388 { \
   1389     gen_helper_iwmmxt_##name(cpu_M0, cpu_env, cpu_M0); \
   1390 }
   1391 
   1392 IWMMXT_OP(maddsq)
   1393 IWMMXT_OP(madduq)
   1394 IWMMXT_OP(sadb)
   1395 IWMMXT_OP(sadw)
   1396 IWMMXT_OP(mulslw)
   1397 IWMMXT_OP(mulshw)
   1398 IWMMXT_OP(mululw)
   1399 IWMMXT_OP(muluhw)
   1400 IWMMXT_OP(macsw)
   1401 IWMMXT_OP(macuw)
   1402 
   1403 IWMMXT_OP_ENV_SIZE(unpackl)
   1404 IWMMXT_OP_ENV_SIZE(unpackh)
   1405 
   1406 IWMMXT_OP_ENV1(unpacklub)
   1407 IWMMXT_OP_ENV1(unpackluw)
   1408 IWMMXT_OP_ENV1(unpacklul)
   1409 IWMMXT_OP_ENV1(unpackhub)
   1410 IWMMXT_OP_ENV1(unpackhuw)
   1411 IWMMXT_OP_ENV1(unpackhul)
   1412 IWMMXT_OP_ENV1(unpacklsb)
   1413 IWMMXT_OP_ENV1(unpacklsw)
   1414 IWMMXT_OP_ENV1(unpacklsl)
   1415 IWMMXT_OP_ENV1(unpackhsb)
   1416 IWMMXT_OP_ENV1(unpackhsw)
   1417 IWMMXT_OP_ENV1(unpackhsl)
   1418 
   1419 IWMMXT_OP_ENV_SIZE(cmpeq)
   1420 IWMMXT_OP_ENV_SIZE(cmpgtu)
   1421 IWMMXT_OP_ENV_SIZE(cmpgts)
   1422 
   1423 IWMMXT_OP_ENV_SIZE(mins)
   1424 IWMMXT_OP_ENV_SIZE(minu)
   1425 IWMMXT_OP_ENV_SIZE(maxs)
   1426 IWMMXT_OP_ENV_SIZE(maxu)
   1427 
   1428 IWMMXT_OP_ENV_SIZE(subn)
   1429 IWMMXT_OP_ENV_SIZE(addn)
   1430 IWMMXT_OP_ENV_SIZE(subu)
   1431 IWMMXT_OP_ENV_SIZE(addu)
   1432 IWMMXT_OP_ENV_SIZE(subs)
   1433 IWMMXT_OP_ENV_SIZE(adds)
   1434 
   1435 IWMMXT_OP_ENV(avgb0)
   1436 IWMMXT_OP_ENV(avgb1)
   1437 IWMMXT_OP_ENV(avgw0)
   1438 IWMMXT_OP_ENV(avgw1)
   1439 
   1440 IWMMXT_OP_ENV(packuw)
   1441 IWMMXT_OP_ENV(packul)
   1442 IWMMXT_OP_ENV(packuq)
   1443 IWMMXT_OP_ENV(packsw)
   1444 IWMMXT_OP_ENV(packsl)
   1445 IWMMXT_OP_ENV(packsq)
   1446 
   1447 static void gen_op_iwmmxt_set_mup(void)
   1448 {
   1449     TCGv_i32 tmp;
   1450     tmp = load_cpu_field(iwmmxt.cregs[ARM_IWMMXT_wCon]);
   1451     tcg_gen_ori_i32(tmp, tmp, 2);
   1452     store_cpu_field(tmp, iwmmxt.cregs[ARM_IWMMXT_wCon]);
   1453 }
   1454 
   1455 static void gen_op_iwmmxt_set_cup(void)
   1456 {
   1457     TCGv_i32 tmp;
   1458     tmp = load_cpu_field(iwmmxt.cregs[ARM_IWMMXT_wCon]);
   1459     tcg_gen_ori_i32(tmp, tmp, 1);
   1460     store_cpu_field(tmp, iwmmxt.cregs[ARM_IWMMXT_wCon]);
   1461 }
   1462 
   1463 static void gen_op_iwmmxt_setpsr_nz(void)
   1464 {
   1465     TCGv_i32 tmp = tcg_temp_new_i32();
   1466     gen_helper_iwmmxt_setpsr_nz(tmp, cpu_M0);
   1467     store_cpu_field(tmp, iwmmxt.cregs[ARM_IWMMXT_wCASF]);
   1468 }
   1469 
   1470 static inline void gen_op_iwmmxt_addl_M0_wRn(int rn)
   1471 {
   1472     iwmmxt_load_reg(cpu_V1, rn);
   1473     tcg_gen_ext32u_i64(cpu_V1, cpu_V1);
   1474     tcg_gen_add_i64(cpu_M0, cpu_M0, cpu_V1);
   1475 }
   1476 
   1477 static inline int gen_iwmmxt_address(DisasContext *s, uint32_t insn,
   1478                                      TCGv_i32 dest)
   1479 {
   1480     int rd;
   1481     uint32_t offset;
   1482     TCGv_i32 tmp;
   1483 
   1484     rd = (insn >> 16) & 0xf;
   1485     tmp = load_reg(s, rd);
   1486 
   1487     offset = (insn & 0xff) << ((insn >> 7) & 2);
   1488     if (insn & (1 << 24)) {
   1489         /* Pre indexed */
   1490         if (insn & (1 << 23))
   1491             tcg_gen_addi_i32(tmp, tmp, offset);
   1492         else
   1493             tcg_gen_addi_i32(tmp, tmp, -offset);
   1494         tcg_gen_mov_i32(dest, tmp);
   1495         if (insn & (1 << 21))
   1496             store_reg(s, rd, tmp);
   1497         else
   1498             tcg_temp_free_i32(tmp);
   1499     } else if (insn & (1 << 21)) {
   1500         /* Post indexed */
   1501         tcg_gen_mov_i32(dest, tmp);
   1502         if (insn & (1 << 23))
   1503             tcg_gen_addi_i32(tmp, tmp, offset);
   1504         else
   1505             tcg_gen_addi_i32(tmp, tmp, -offset);
   1506         store_reg(s, rd, tmp);
   1507     } else if (!(insn & (1 << 23)))
   1508         return 1;
   1509     return 0;
   1510 }
   1511 
   1512 static inline int gen_iwmmxt_shift(uint32_t insn, uint32_t mask, TCGv_i32 dest)
   1513 {
   1514     int rd = (insn >> 0) & 0xf;
   1515     TCGv_i32 tmp;
   1516 
   1517     if (insn & (1 << 8)) {
   1518         if (rd < ARM_IWMMXT_wCGR0 || rd > ARM_IWMMXT_wCGR3) {
   1519             return 1;
   1520         } else {
   1521             tmp = iwmmxt_load_creg(rd);
   1522         }
   1523     } else {
   1524         tmp = tcg_temp_new_i32();
   1525         iwmmxt_load_reg(cpu_V0, rd);
   1526         tcg_gen_extrl_i64_i32(tmp, cpu_V0);
   1527     }
   1528     tcg_gen_andi_i32(tmp, tmp, mask);
   1529     tcg_gen_mov_i32(dest, tmp);
   1530     tcg_temp_free_i32(tmp);
   1531     return 0;
   1532 }
   1533 
   1534 /* Disassemble an iwMMXt instruction.  Returns nonzero if an error occurred
   1535    (ie. an undefined instruction).  */
   1536 static int disas_iwmmxt_insn(DisasContext *s, uint32_t insn)
   1537 {
   1538     int rd, wrd;
   1539     int rdhi, rdlo, rd0, rd1, i;
   1540     TCGv_i32 addr;
   1541     TCGv_i32 tmp, tmp2, tmp3;
   1542 
   1543     if ((insn & 0x0e000e00) == 0x0c000000) {
   1544         if ((insn & 0x0fe00ff0) == 0x0c400000) {
   1545             wrd = insn & 0xf;
   1546             rdlo = (insn >> 12) & 0xf;
   1547             rdhi = (insn >> 16) & 0xf;
   1548             if (insn & ARM_CP_RW_BIT) {                         /* TMRRC */
   1549                 iwmmxt_load_reg(cpu_V0, wrd);
   1550                 tcg_gen_extrl_i64_i32(cpu_R[rdlo], cpu_V0);
   1551                 tcg_gen_extrh_i64_i32(cpu_R[rdhi], cpu_V0);
   1552             } else {                                    /* TMCRR */
   1553                 tcg_gen_concat_i32_i64(cpu_V0, cpu_R[rdlo], cpu_R[rdhi]);
   1554                 iwmmxt_store_reg(cpu_V0, wrd);
   1555                 gen_op_iwmmxt_set_mup();
   1556             }
   1557             return 0;
   1558         }
   1559 
   1560         wrd = (insn >> 12) & 0xf;
   1561         addr = tcg_temp_new_i32();
   1562         if (gen_iwmmxt_address(s, insn, addr)) {
   1563             tcg_temp_free_i32(addr);
   1564             return 1;
   1565         }
   1566         if (insn & ARM_CP_RW_BIT) {
   1567             if ((insn >> 28) == 0xf) {                  /* WLDRW wCx */
   1568                 tmp = tcg_temp_new_i32();
   1569                 gen_aa32_ld32u(s, tmp, addr, get_mem_index(s));
   1570                 iwmmxt_store_creg(wrd, tmp);
   1571             } else {
   1572                 i = 1;
   1573                 if (insn & (1 << 8)) {
   1574                     if (insn & (1 << 22)) {             /* WLDRD */
   1575                         gen_aa32_ld64(s, cpu_M0, addr, get_mem_index(s));
   1576                         i = 0;
   1577                     } else {                            /* WLDRW wRd */
   1578                         tmp = tcg_temp_new_i32();
   1579                         gen_aa32_ld32u(s, tmp, addr, get_mem_index(s));
   1580                     }
   1581                 } else {
   1582                     tmp = tcg_temp_new_i32();
   1583                     if (insn & (1 << 22)) {             /* WLDRH */
   1584                         gen_aa32_ld16u(s, tmp, addr, get_mem_index(s));
   1585                     } else {                            /* WLDRB */
   1586                         gen_aa32_ld8u(s, tmp, addr, get_mem_index(s));
   1587                     }
   1588                 }
   1589                 if (i) {
   1590                     tcg_gen_extu_i32_i64(cpu_M0, tmp);
   1591                     tcg_temp_free_i32(tmp);
   1592                 }
   1593                 gen_op_iwmmxt_movq_wRn_M0(wrd);
   1594             }
   1595         } else {
   1596             if ((insn >> 28) == 0xf) {                  /* WSTRW wCx */
   1597                 tmp = iwmmxt_load_creg(wrd);
   1598                 gen_aa32_st32(s, tmp, addr, get_mem_index(s));
   1599             } else {
   1600                 gen_op_iwmmxt_movq_M0_wRn(wrd);
   1601                 tmp = tcg_temp_new_i32();
   1602                 if (insn & (1 << 8)) {
   1603                     if (insn & (1 << 22)) {             /* WSTRD */
   1604                         gen_aa32_st64(s, cpu_M0, addr, get_mem_index(s));
   1605                     } else {                            /* WSTRW wRd */
   1606                         tcg_gen_extrl_i64_i32(tmp, cpu_M0);
   1607                         gen_aa32_st32(s, tmp, addr, get_mem_index(s));
   1608                     }
   1609                 } else {
   1610                     if (insn & (1 << 22)) {             /* WSTRH */
   1611                         tcg_gen_extrl_i64_i32(tmp, cpu_M0);
   1612                         gen_aa32_st16(s, tmp, addr, get_mem_index(s));
   1613                     } else {                            /* WSTRB */
   1614                         tcg_gen_extrl_i64_i32(tmp, cpu_M0);
   1615                         gen_aa32_st8(s, tmp, addr, get_mem_index(s));
   1616                     }
   1617                 }
   1618             }
   1619             tcg_temp_free_i32(tmp);
   1620         }
   1621         tcg_temp_free_i32(addr);
   1622         return 0;
   1623     }
   1624 
   1625     if ((insn & 0x0f000000) != 0x0e000000)
   1626         return 1;
   1627 
   1628     switch (((insn >> 12) & 0xf00) | ((insn >> 4) & 0xff)) {
   1629     case 0x000:                                                 /* WOR */
   1630         wrd = (insn >> 12) & 0xf;
   1631         rd0 = (insn >> 0) & 0xf;
   1632         rd1 = (insn >> 16) & 0xf;
   1633         gen_op_iwmmxt_movq_M0_wRn(rd0);
   1634         gen_op_iwmmxt_orq_M0_wRn(rd1);
   1635         gen_op_iwmmxt_setpsr_nz();
   1636         gen_op_iwmmxt_movq_wRn_M0(wrd);
   1637         gen_op_iwmmxt_set_mup();
   1638         gen_op_iwmmxt_set_cup();
   1639         break;
   1640     case 0x011:                                                 /* TMCR */
   1641         if (insn & 0xf)
   1642             return 1;
   1643         rd = (insn >> 12) & 0xf;
   1644         wrd = (insn >> 16) & 0xf;
   1645         switch (wrd) {
   1646         case ARM_IWMMXT_wCID:
   1647         case ARM_IWMMXT_wCASF:
   1648             break;
   1649         case ARM_IWMMXT_wCon:
   1650             gen_op_iwmmxt_set_cup();
   1651             /* Fall through.  */
   1652         case ARM_IWMMXT_wCSSF:
   1653             tmp = iwmmxt_load_creg(wrd);
   1654             tmp2 = load_reg(s, rd);
   1655             tcg_gen_andc_i32(tmp, tmp, tmp2);
   1656             tcg_temp_free_i32(tmp2);
   1657             iwmmxt_store_creg(wrd, tmp);
   1658             break;
   1659         case ARM_IWMMXT_wCGR0:
   1660         case ARM_IWMMXT_wCGR1:
   1661         case ARM_IWMMXT_wCGR2:
   1662         case ARM_IWMMXT_wCGR3:
   1663             gen_op_iwmmxt_set_cup();
   1664             tmp = load_reg(s, rd);
   1665             iwmmxt_store_creg(wrd, tmp);
   1666             break;
   1667         default:
   1668             return 1;
   1669         }
   1670         break;
   1671     case 0x100:                                                 /* WXOR */
   1672         wrd = (insn >> 12) & 0xf;
   1673         rd0 = (insn >> 0) & 0xf;
   1674         rd1 = (insn >> 16) & 0xf;
   1675         gen_op_iwmmxt_movq_M0_wRn(rd0);
   1676         gen_op_iwmmxt_xorq_M0_wRn(rd1);
   1677         gen_op_iwmmxt_setpsr_nz();
   1678         gen_op_iwmmxt_movq_wRn_M0(wrd);
   1679         gen_op_iwmmxt_set_mup();
   1680         gen_op_iwmmxt_set_cup();
   1681         break;
   1682     case 0x111:                                                 /* TMRC */
   1683         if (insn & 0xf)
   1684             return 1;
   1685         rd = (insn >> 12) & 0xf;
   1686         wrd = (insn >> 16) & 0xf;
   1687         tmp = iwmmxt_load_creg(wrd);
   1688         store_reg(s, rd, tmp);
   1689         break;
   1690     case 0x300:                                                 /* WANDN */
   1691         wrd = (insn >> 12) & 0xf;
   1692         rd0 = (insn >> 0) & 0xf;
   1693         rd1 = (insn >> 16) & 0xf;
   1694         gen_op_iwmmxt_movq_M0_wRn(rd0);
   1695         tcg_gen_neg_i64(cpu_M0, cpu_M0);
   1696         gen_op_iwmmxt_andq_M0_wRn(rd1);
   1697         gen_op_iwmmxt_setpsr_nz();
   1698         gen_op_iwmmxt_movq_wRn_M0(wrd);
   1699         gen_op_iwmmxt_set_mup();
   1700         gen_op_iwmmxt_set_cup();
   1701         break;
   1702     case 0x200:                                                 /* WAND */
   1703         wrd = (insn >> 12) & 0xf;
   1704         rd0 = (insn >> 0) & 0xf;
   1705         rd1 = (insn >> 16) & 0xf;
   1706         gen_op_iwmmxt_movq_M0_wRn(rd0);
   1707         gen_op_iwmmxt_andq_M0_wRn(rd1);
   1708         gen_op_iwmmxt_setpsr_nz();
   1709         gen_op_iwmmxt_movq_wRn_M0(wrd);
   1710         gen_op_iwmmxt_set_mup();
   1711         gen_op_iwmmxt_set_cup();
   1712         break;
   1713     case 0x810: case 0xa10:                             /* WMADD */
   1714         wrd = (insn >> 12) & 0xf;
   1715         rd0 = (insn >> 0) & 0xf;
   1716         rd1 = (insn >> 16) & 0xf;
   1717         gen_op_iwmmxt_movq_M0_wRn(rd0);
   1718         if (insn & (1 << 21))
   1719             gen_op_iwmmxt_maddsq_M0_wRn(rd1);
   1720         else
   1721             gen_op_iwmmxt_madduq_M0_wRn(rd1);
   1722         gen_op_iwmmxt_movq_wRn_M0(wrd);
   1723         gen_op_iwmmxt_set_mup();
   1724         break;
   1725     case 0x10e: case 0x50e: case 0x90e: case 0xd0e:     /* WUNPCKIL */
   1726         wrd = (insn >> 12) & 0xf;
   1727         rd0 = (insn >> 16) & 0xf;
   1728         rd1 = (insn >> 0) & 0xf;
   1729         gen_op_iwmmxt_movq_M0_wRn(rd0);
   1730         switch ((insn >> 22) & 3) {
   1731         case 0:
   1732             gen_op_iwmmxt_unpacklb_M0_wRn(rd1);
   1733             break;
   1734         case 1:
   1735             gen_op_iwmmxt_unpacklw_M0_wRn(rd1);
   1736             break;
   1737         case 2:
   1738             gen_op_iwmmxt_unpackll_M0_wRn(rd1);
   1739             break;
   1740         case 3:
   1741             return 1;
   1742         }
   1743         gen_op_iwmmxt_movq_wRn_M0(wrd);
   1744         gen_op_iwmmxt_set_mup();
   1745         gen_op_iwmmxt_set_cup();
   1746         break;
   1747     case 0x10c: case 0x50c: case 0x90c: case 0xd0c:     /* WUNPCKIH */
   1748         wrd = (insn >> 12) & 0xf;
   1749         rd0 = (insn >> 16) & 0xf;
   1750         rd1 = (insn >> 0) & 0xf;
   1751         gen_op_iwmmxt_movq_M0_wRn(rd0);
   1752         switch ((insn >> 22) & 3) {
   1753         case 0:
   1754             gen_op_iwmmxt_unpackhb_M0_wRn(rd1);
   1755             break;
   1756         case 1:
   1757             gen_op_iwmmxt_unpackhw_M0_wRn(rd1);
   1758             break;
   1759         case 2:
   1760             gen_op_iwmmxt_unpackhl_M0_wRn(rd1);
   1761             break;
   1762         case 3:
   1763             return 1;
   1764         }
   1765         gen_op_iwmmxt_movq_wRn_M0(wrd);
   1766         gen_op_iwmmxt_set_mup();
   1767         gen_op_iwmmxt_set_cup();
   1768         break;
   1769     case 0x012: case 0x112: case 0x412: case 0x512:     /* WSAD */
   1770         wrd = (insn >> 12) & 0xf;
   1771         rd0 = (insn >> 16) & 0xf;
   1772         rd1 = (insn >> 0) & 0xf;
   1773         gen_op_iwmmxt_movq_M0_wRn(rd0);
   1774         if (insn & (1 << 22))
   1775             gen_op_iwmmxt_sadw_M0_wRn(rd1);
   1776         else
   1777             gen_op_iwmmxt_sadb_M0_wRn(rd1);
   1778         if (!(insn & (1 << 20)))
   1779             gen_op_iwmmxt_addl_M0_wRn(wrd);
   1780         gen_op_iwmmxt_movq_wRn_M0(wrd);
   1781         gen_op_iwmmxt_set_mup();
   1782         break;
   1783     case 0x010: case 0x110: case 0x210: case 0x310:     /* WMUL */
   1784         wrd = (insn >> 12) & 0xf;
   1785         rd0 = (insn >> 16) & 0xf;
   1786         rd1 = (insn >> 0) & 0xf;
   1787         gen_op_iwmmxt_movq_M0_wRn(rd0);
   1788         if (insn & (1 << 21)) {
   1789             if (insn & (1 << 20))
   1790                 gen_op_iwmmxt_mulshw_M0_wRn(rd1);
   1791             else
   1792                 gen_op_iwmmxt_mulslw_M0_wRn(rd1);
   1793         } else {
   1794             if (insn & (1 << 20))
   1795                 gen_op_iwmmxt_muluhw_M0_wRn(rd1);
   1796             else
   1797                 gen_op_iwmmxt_mululw_M0_wRn(rd1);
   1798         }
   1799         gen_op_iwmmxt_movq_wRn_M0(wrd);
   1800         gen_op_iwmmxt_set_mup();
   1801         break;
   1802     case 0x410: case 0x510: case 0x610: case 0x710:     /* WMAC */
   1803         wrd = (insn >> 12) & 0xf;
   1804         rd0 = (insn >> 16) & 0xf;
   1805         rd1 = (insn >> 0) & 0xf;
   1806         gen_op_iwmmxt_movq_M0_wRn(rd0);
   1807         if (insn & (1 << 21))
   1808             gen_op_iwmmxt_macsw_M0_wRn(rd1);
   1809         else
   1810             gen_op_iwmmxt_macuw_M0_wRn(rd1);
   1811         if (!(insn & (1 << 20))) {
   1812             iwmmxt_load_reg(cpu_V1, wrd);
   1813             tcg_gen_add_i64(cpu_M0, cpu_M0, cpu_V1);
   1814         }
   1815         gen_op_iwmmxt_movq_wRn_M0(wrd);
   1816         gen_op_iwmmxt_set_mup();
   1817         break;
   1818     case 0x006: case 0x406: case 0x806: case 0xc06:     /* WCMPEQ */
   1819         wrd = (insn >> 12) & 0xf;
   1820         rd0 = (insn >> 16) & 0xf;
   1821         rd1 = (insn >> 0) & 0xf;
   1822         gen_op_iwmmxt_movq_M0_wRn(rd0);
   1823         switch ((insn >> 22) & 3) {
   1824         case 0:
   1825             gen_op_iwmmxt_cmpeqb_M0_wRn(rd1);
   1826             break;
   1827         case 1:
   1828             gen_op_iwmmxt_cmpeqw_M0_wRn(rd1);
   1829             break;
   1830         case 2:
   1831             gen_op_iwmmxt_cmpeql_M0_wRn(rd1);
   1832             break;
   1833         case 3:
   1834             return 1;
   1835         }
   1836         gen_op_iwmmxt_movq_wRn_M0(wrd);
   1837         gen_op_iwmmxt_set_mup();
   1838         gen_op_iwmmxt_set_cup();
   1839         break;
   1840     case 0x800: case 0x900: case 0xc00: case 0xd00:     /* WAVG2 */
   1841         wrd = (insn >> 12) & 0xf;
   1842         rd0 = (insn >> 16) & 0xf;
   1843         rd1 = (insn >> 0) & 0xf;
   1844         gen_op_iwmmxt_movq_M0_wRn(rd0);
   1845         if (insn & (1 << 22)) {
   1846             if (insn & (1 << 20))
   1847                 gen_op_iwmmxt_avgw1_M0_wRn(rd1);
   1848             else
   1849                 gen_op_iwmmxt_avgw0_M0_wRn(rd1);
   1850         } else {
   1851             if (insn & (1 << 20))
   1852                 gen_op_iwmmxt_avgb1_M0_wRn(rd1);
   1853             else
   1854                 gen_op_iwmmxt_avgb0_M0_wRn(rd1);
   1855         }
   1856         gen_op_iwmmxt_movq_wRn_M0(wrd);
   1857         gen_op_iwmmxt_set_mup();
   1858         gen_op_iwmmxt_set_cup();
   1859         break;
   1860     case 0x802: case 0x902: case 0xa02: case 0xb02:     /* WALIGNR */
   1861         wrd = (insn >> 12) & 0xf;
   1862         rd0 = (insn >> 16) & 0xf;
   1863         rd1 = (insn >> 0) & 0xf;
   1864         gen_op_iwmmxt_movq_M0_wRn(rd0);
   1865         tmp = iwmmxt_load_creg(ARM_IWMMXT_wCGR0 + ((insn >> 20) & 3));
   1866         tcg_gen_andi_i32(tmp, tmp, 7);
   1867         iwmmxt_load_reg(cpu_V1, rd1);
   1868         gen_helper_iwmmxt_align(cpu_M0, cpu_M0, cpu_V1, tmp);
   1869         tcg_temp_free_i32(tmp);
   1870         gen_op_iwmmxt_movq_wRn_M0(wrd);
   1871         gen_op_iwmmxt_set_mup();
   1872         break;
   1873     case 0x601: case 0x605: case 0x609: case 0x60d:     /* TINSR */
   1874         if (((insn >> 6) & 3) == 3)
   1875             return 1;
   1876         rd = (insn >> 12) & 0xf;
   1877         wrd = (insn >> 16) & 0xf;
   1878         tmp = load_reg(s, rd);
   1879         gen_op_iwmmxt_movq_M0_wRn(wrd);
   1880         switch ((insn >> 6) & 3) {
   1881         case 0:
   1882             tmp2 = tcg_constant_i32(0xff);
   1883             tmp3 = tcg_constant_i32((insn & 7) << 3);
   1884             break;
   1885         case 1:
   1886             tmp2 = tcg_constant_i32(0xffff);
   1887             tmp3 = tcg_constant_i32((insn & 3) << 4);
   1888             break;
   1889         case 2:
   1890             tmp2 = tcg_constant_i32(0xffffffff);
   1891             tmp3 = tcg_constant_i32((insn & 1) << 5);
   1892             break;
   1893         default:
   1894             g_assert_not_reached();
   1895         }
   1896         gen_helper_iwmmxt_insr(cpu_M0, cpu_M0, tmp, tmp2, tmp3);
   1897         tcg_temp_free_i32(tmp);
   1898         gen_op_iwmmxt_movq_wRn_M0(wrd);
   1899         gen_op_iwmmxt_set_mup();
   1900         break;
   1901     case 0x107: case 0x507: case 0x907: case 0xd07:     /* TEXTRM */
   1902         rd = (insn >> 12) & 0xf;
   1903         wrd = (insn >> 16) & 0xf;
   1904         if (rd == 15 || ((insn >> 22) & 3) == 3)
   1905             return 1;
   1906         gen_op_iwmmxt_movq_M0_wRn(wrd);
   1907         tmp = tcg_temp_new_i32();
   1908         switch ((insn >> 22) & 3) {
   1909         case 0:
   1910             tcg_gen_shri_i64(cpu_M0, cpu_M0, (insn & 7) << 3);
   1911             tcg_gen_extrl_i64_i32(tmp, cpu_M0);
   1912             if (insn & 8) {
   1913                 tcg_gen_ext8s_i32(tmp, tmp);
   1914             } else {
   1915                 tcg_gen_andi_i32(tmp, tmp, 0xff);
   1916             }
   1917             break;
   1918         case 1:
   1919             tcg_gen_shri_i64(cpu_M0, cpu_M0, (insn & 3) << 4);
   1920             tcg_gen_extrl_i64_i32(tmp, cpu_M0);
   1921             if (insn & 8) {
   1922                 tcg_gen_ext16s_i32(tmp, tmp);
   1923             } else {
   1924                 tcg_gen_andi_i32(tmp, tmp, 0xffff);
   1925             }
   1926             break;
   1927         case 2:
   1928             tcg_gen_shri_i64(cpu_M0, cpu_M0, (insn & 1) << 5);
   1929             tcg_gen_extrl_i64_i32(tmp, cpu_M0);
   1930             break;
   1931         }
   1932         store_reg(s, rd, tmp);
   1933         break;
   1934     case 0x117: case 0x517: case 0x917: case 0xd17:     /* TEXTRC */
   1935         if ((insn & 0x000ff008) != 0x0003f000 || ((insn >> 22) & 3) == 3)
   1936             return 1;
   1937         tmp = iwmmxt_load_creg(ARM_IWMMXT_wCASF);
   1938         switch ((insn >> 22) & 3) {
   1939         case 0:
   1940             tcg_gen_shri_i32(tmp, tmp, ((insn & 7) << 2) + 0);
   1941             break;
   1942         case 1:
   1943             tcg_gen_shri_i32(tmp, tmp, ((insn & 3) << 3) + 4);
   1944             break;
   1945         case 2:
   1946             tcg_gen_shri_i32(tmp, tmp, ((insn & 1) << 4) + 12);
   1947             break;
   1948         }
   1949         tcg_gen_shli_i32(tmp, tmp, 28);
   1950         gen_set_nzcv(tmp);
   1951         tcg_temp_free_i32(tmp);
   1952         break;
   1953     case 0x401: case 0x405: case 0x409: case 0x40d:     /* TBCST */
   1954         if (((insn >> 6) & 3) == 3)
   1955             return 1;
   1956         rd = (insn >> 12) & 0xf;
   1957         wrd = (insn >> 16) & 0xf;
   1958         tmp = load_reg(s, rd);
   1959         switch ((insn >> 6) & 3) {
   1960         case 0:
   1961             gen_helper_iwmmxt_bcstb(cpu_M0, tmp);
   1962             break;
   1963         case 1:
   1964             gen_helper_iwmmxt_bcstw(cpu_M0, tmp);
   1965             break;
   1966         case 2:
   1967             gen_helper_iwmmxt_bcstl(cpu_M0, tmp);
   1968             break;
   1969         }
   1970         tcg_temp_free_i32(tmp);
   1971         gen_op_iwmmxt_movq_wRn_M0(wrd);
   1972         gen_op_iwmmxt_set_mup();
   1973         break;
   1974     case 0x113: case 0x513: case 0x913: case 0xd13:     /* TANDC */
   1975         if ((insn & 0x000ff00f) != 0x0003f000 || ((insn >> 22) & 3) == 3)
   1976             return 1;
   1977         tmp = iwmmxt_load_creg(ARM_IWMMXT_wCASF);
   1978         tmp2 = tcg_temp_new_i32();
   1979         tcg_gen_mov_i32(tmp2, tmp);
   1980         switch ((insn >> 22) & 3) {
   1981         case 0:
   1982             for (i = 0; i < 7; i ++) {
   1983                 tcg_gen_shli_i32(tmp2, tmp2, 4);
   1984                 tcg_gen_and_i32(tmp, tmp, tmp2);
   1985             }
   1986             break;
   1987         case 1:
   1988             for (i = 0; i < 3; i ++) {
   1989                 tcg_gen_shli_i32(tmp2, tmp2, 8);
   1990                 tcg_gen_and_i32(tmp, tmp, tmp2);
   1991             }
   1992             break;
   1993         case 2:
   1994             tcg_gen_shli_i32(tmp2, tmp2, 16);
   1995             tcg_gen_and_i32(tmp, tmp, tmp2);
   1996             break;
   1997         }
   1998         gen_set_nzcv(tmp);
   1999         tcg_temp_free_i32(tmp2);
   2000         tcg_temp_free_i32(tmp);
   2001         break;
   2002     case 0x01c: case 0x41c: case 0x81c: case 0xc1c:     /* WACC */
   2003         wrd = (insn >> 12) & 0xf;
   2004         rd0 = (insn >> 16) & 0xf;
   2005         gen_op_iwmmxt_movq_M0_wRn(rd0);
   2006         switch ((insn >> 22) & 3) {
   2007         case 0:
   2008             gen_helper_iwmmxt_addcb(cpu_M0, cpu_M0);
   2009             break;
   2010         case 1:
   2011             gen_helper_iwmmxt_addcw(cpu_M0, cpu_M0);
   2012             break;
   2013         case 2:
   2014             gen_helper_iwmmxt_addcl(cpu_M0, cpu_M0);
   2015             break;
   2016         case 3:
   2017             return 1;
   2018         }
   2019         gen_op_iwmmxt_movq_wRn_M0(wrd);
   2020         gen_op_iwmmxt_set_mup();
   2021         break;
   2022     case 0x115: case 0x515: case 0x915: case 0xd15:     /* TORC */
   2023         if ((insn & 0x000ff00f) != 0x0003f000 || ((insn >> 22) & 3) == 3)
   2024             return 1;
   2025         tmp = iwmmxt_load_creg(ARM_IWMMXT_wCASF);
   2026         tmp2 = tcg_temp_new_i32();
   2027         tcg_gen_mov_i32(tmp2, tmp);
   2028         switch ((insn >> 22) & 3) {
   2029         case 0:
   2030             for (i = 0; i < 7; i ++) {
   2031                 tcg_gen_shli_i32(tmp2, tmp2, 4);
   2032                 tcg_gen_or_i32(tmp, tmp, tmp2);
   2033             }
   2034             break;
   2035         case 1:
   2036             for (i = 0; i < 3; i ++) {
   2037                 tcg_gen_shli_i32(tmp2, tmp2, 8);
   2038                 tcg_gen_or_i32(tmp, tmp, tmp2);
   2039             }
   2040             break;
   2041         case 2:
   2042             tcg_gen_shli_i32(tmp2, tmp2, 16);
   2043             tcg_gen_or_i32(tmp, tmp, tmp2);
   2044             break;
   2045         }
   2046         gen_set_nzcv(tmp);
   2047         tcg_temp_free_i32(tmp2);
   2048         tcg_temp_free_i32(tmp);
   2049         break;
   2050     case 0x103: case 0x503: case 0x903: case 0xd03:     /* TMOVMSK */
   2051         rd = (insn >> 12) & 0xf;
   2052         rd0 = (insn >> 16) & 0xf;
   2053         if ((insn & 0xf) != 0 || ((insn >> 22) & 3) == 3)
   2054             return 1;
   2055         gen_op_iwmmxt_movq_M0_wRn(rd0);
   2056         tmp = tcg_temp_new_i32();
   2057         switch ((insn >> 22) & 3) {
   2058         case 0:
   2059             gen_helper_iwmmxt_msbb(tmp, cpu_M0);
   2060             break;
   2061         case 1:
   2062             gen_helper_iwmmxt_msbw(tmp, cpu_M0);
   2063             break;
   2064         case 2:
   2065             gen_helper_iwmmxt_msbl(tmp, cpu_M0);
   2066             break;
   2067         }
   2068         store_reg(s, rd, tmp);
   2069         break;
   2070     case 0x106: case 0x306: case 0x506: case 0x706:     /* WCMPGT */
   2071     case 0x906: case 0xb06: case 0xd06: case 0xf06:
   2072         wrd = (insn >> 12) & 0xf;
   2073         rd0 = (insn >> 16) & 0xf;
   2074         rd1 = (insn >> 0) & 0xf;
   2075         gen_op_iwmmxt_movq_M0_wRn(rd0);
   2076         switch ((insn >> 22) & 3) {
   2077         case 0:
   2078             if (insn & (1 << 21))
   2079                 gen_op_iwmmxt_cmpgtsb_M0_wRn(rd1);
   2080             else
   2081                 gen_op_iwmmxt_cmpgtub_M0_wRn(rd1);
   2082             break;
   2083         case 1:
   2084             if (insn & (1 << 21))
   2085                 gen_op_iwmmxt_cmpgtsw_M0_wRn(rd1);
   2086             else
   2087                 gen_op_iwmmxt_cmpgtuw_M0_wRn(rd1);
   2088             break;
   2089         case 2:
   2090             if (insn & (1 << 21))
   2091                 gen_op_iwmmxt_cmpgtsl_M0_wRn(rd1);
   2092             else
   2093                 gen_op_iwmmxt_cmpgtul_M0_wRn(rd1);
   2094             break;
   2095         case 3:
   2096             return 1;
   2097         }
   2098         gen_op_iwmmxt_movq_wRn_M0(wrd);
   2099         gen_op_iwmmxt_set_mup();
   2100         gen_op_iwmmxt_set_cup();
   2101         break;
   2102     case 0x00e: case 0x20e: case 0x40e: case 0x60e:     /* WUNPCKEL */
   2103     case 0x80e: case 0xa0e: case 0xc0e: case 0xe0e:
   2104         wrd = (insn >> 12) & 0xf;
   2105         rd0 = (insn >> 16) & 0xf;
   2106         gen_op_iwmmxt_movq_M0_wRn(rd0);
   2107         switch ((insn >> 22) & 3) {
   2108         case 0:
   2109             if (insn & (1 << 21))
   2110                 gen_op_iwmmxt_unpacklsb_M0();
   2111             else
   2112                 gen_op_iwmmxt_unpacklub_M0();
   2113             break;
   2114         case 1:
   2115             if (insn & (1 << 21))
   2116                 gen_op_iwmmxt_unpacklsw_M0();
   2117             else
   2118                 gen_op_iwmmxt_unpackluw_M0();
   2119             break;
   2120         case 2:
   2121             if (insn & (1 << 21))
   2122                 gen_op_iwmmxt_unpacklsl_M0();
   2123             else
   2124                 gen_op_iwmmxt_unpacklul_M0();
   2125             break;
   2126         case 3:
   2127             return 1;
   2128         }
   2129         gen_op_iwmmxt_movq_wRn_M0(wrd);
   2130         gen_op_iwmmxt_set_mup();
   2131         gen_op_iwmmxt_set_cup();
   2132         break;
   2133     case 0x00c: case 0x20c: case 0x40c: case 0x60c:     /* WUNPCKEH */
   2134     case 0x80c: case 0xa0c: case 0xc0c: case 0xe0c:
   2135         wrd = (insn >> 12) & 0xf;
   2136         rd0 = (insn >> 16) & 0xf;
   2137         gen_op_iwmmxt_movq_M0_wRn(rd0);
   2138         switch ((insn >> 22) & 3) {
   2139         case 0:
   2140             if (insn & (1 << 21))
   2141                 gen_op_iwmmxt_unpackhsb_M0();
   2142             else
   2143                 gen_op_iwmmxt_unpackhub_M0();
   2144             break;
   2145         case 1:
   2146             if (insn & (1 << 21))
   2147                 gen_op_iwmmxt_unpackhsw_M0();
   2148             else
   2149                 gen_op_iwmmxt_unpackhuw_M0();
   2150             break;
   2151         case 2:
   2152             if (insn & (1 << 21))
   2153                 gen_op_iwmmxt_unpackhsl_M0();
   2154             else
   2155                 gen_op_iwmmxt_unpackhul_M0();
   2156             break;
   2157         case 3:
   2158             return 1;
   2159         }
   2160         gen_op_iwmmxt_movq_wRn_M0(wrd);
   2161         gen_op_iwmmxt_set_mup();
   2162         gen_op_iwmmxt_set_cup();
   2163         break;
   2164     case 0x204: case 0x604: case 0xa04: case 0xe04:     /* WSRL */
   2165     case 0x214: case 0x614: case 0xa14: case 0xe14:
   2166         if (((insn >> 22) & 3) == 0)
   2167             return 1;
   2168         wrd = (insn >> 12) & 0xf;
   2169         rd0 = (insn >> 16) & 0xf;
   2170         gen_op_iwmmxt_movq_M0_wRn(rd0);
   2171         tmp = tcg_temp_new_i32();
   2172         if (gen_iwmmxt_shift(insn, 0xff, tmp)) {
   2173             tcg_temp_free_i32(tmp);
   2174             return 1;
   2175         }
   2176         switch ((insn >> 22) & 3) {
   2177         case 1:
   2178             gen_helper_iwmmxt_srlw(cpu_M0, cpu_env, cpu_M0, tmp);
   2179             break;
   2180         case 2:
   2181             gen_helper_iwmmxt_srll(cpu_M0, cpu_env, cpu_M0, tmp);
   2182             break;
   2183         case 3:
   2184             gen_helper_iwmmxt_srlq(cpu_M0, cpu_env, cpu_M0, tmp);
   2185             break;
   2186         }
   2187         tcg_temp_free_i32(tmp);
   2188         gen_op_iwmmxt_movq_wRn_M0(wrd);
   2189         gen_op_iwmmxt_set_mup();
   2190         gen_op_iwmmxt_set_cup();
   2191         break;
   2192     case 0x004: case 0x404: case 0x804: case 0xc04:     /* WSRA */
   2193     case 0x014: case 0x414: case 0x814: case 0xc14:
   2194         if (((insn >> 22) & 3) == 0)
   2195             return 1;
   2196         wrd = (insn >> 12) & 0xf;
   2197         rd0 = (insn >> 16) & 0xf;
   2198         gen_op_iwmmxt_movq_M0_wRn(rd0);
   2199         tmp = tcg_temp_new_i32();
   2200         if (gen_iwmmxt_shift(insn, 0xff, tmp)) {
   2201             tcg_temp_free_i32(tmp);
   2202             return 1;
   2203         }
   2204         switch ((insn >> 22) & 3) {
   2205         case 1:
   2206             gen_helper_iwmmxt_sraw(cpu_M0, cpu_env, cpu_M0, tmp);
   2207             break;
   2208         case 2:
   2209             gen_helper_iwmmxt_sral(cpu_M0, cpu_env, cpu_M0, tmp);
   2210             break;
   2211         case 3:
   2212             gen_helper_iwmmxt_sraq(cpu_M0, cpu_env, cpu_M0, tmp);
   2213             break;
   2214         }
   2215         tcg_temp_free_i32(tmp);
   2216         gen_op_iwmmxt_movq_wRn_M0(wrd);
   2217         gen_op_iwmmxt_set_mup();
   2218         gen_op_iwmmxt_set_cup();
   2219         break;
   2220     case 0x104: case 0x504: case 0x904: case 0xd04:     /* WSLL */
   2221     case 0x114: case 0x514: case 0x914: case 0xd14:
   2222         if (((insn >> 22) & 3) == 0)
   2223             return 1;
   2224         wrd = (insn >> 12) & 0xf;
   2225         rd0 = (insn >> 16) & 0xf;
   2226         gen_op_iwmmxt_movq_M0_wRn(rd0);
   2227         tmp = tcg_temp_new_i32();
   2228         if (gen_iwmmxt_shift(insn, 0xff, tmp)) {
   2229             tcg_temp_free_i32(tmp);
   2230             return 1;
   2231         }
   2232         switch ((insn >> 22) & 3) {
   2233         case 1:
   2234             gen_helper_iwmmxt_sllw(cpu_M0, cpu_env, cpu_M0, tmp);
   2235             break;
   2236         case 2:
   2237             gen_helper_iwmmxt_slll(cpu_M0, cpu_env, cpu_M0, tmp);
   2238             break;
   2239         case 3:
   2240             gen_helper_iwmmxt_sllq(cpu_M0, cpu_env, cpu_M0, tmp);
   2241             break;
   2242         }
   2243         tcg_temp_free_i32(tmp);
   2244         gen_op_iwmmxt_movq_wRn_M0(wrd);
   2245         gen_op_iwmmxt_set_mup();
   2246         gen_op_iwmmxt_set_cup();
   2247         break;
   2248     case 0x304: case 0x704: case 0xb04: case 0xf04:     /* WROR */
   2249     case 0x314: case 0x714: case 0xb14: case 0xf14:
   2250         if (((insn >> 22) & 3) == 0)
   2251             return 1;
   2252         wrd = (insn >> 12) & 0xf;
   2253         rd0 = (insn >> 16) & 0xf;
   2254         gen_op_iwmmxt_movq_M0_wRn(rd0);
   2255         tmp = tcg_temp_new_i32();
   2256         switch ((insn >> 22) & 3) {
   2257         case 1:
   2258             if (gen_iwmmxt_shift(insn, 0xf, tmp)) {
   2259                 tcg_temp_free_i32(tmp);
   2260                 return 1;
   2261             }
   2262             gen_helper_iwmmxt_rorw(cpu_M0, cpu_env, cpu_M0, tmp);
   2263             break;
   2264         case 2:
   2265             if (gen_iwmmxt_shift(insn, 0x1f, tmp)) {
   2266                 tcg_temp_free_i32(tmp);
   2267                 return 1;
   2268             }
   2269             gen_helper_iwmmxt_rorl(cpu_M0, cpu_env, cpu_M0, tmp);
   2270             break;
   2271         case 3:
   2272             if (gen_iwmmxt_shift(insn, 0x3f, tmp)) {
   2273                 tcg_temp_free_i32(tmp);
   2274                 return 1;
   2275             }
   2276             gen_helper_iwmmxt_rorq(cpu_M0, cpu_env, cpu_M0, tmp);
   2277             break;
   2278         }
   2279         tcg_temp_free_i32(tmp);
   2280         gen_op_iwmmxt_movq_wRn_M0(wrd);
   2281         gen_op_iwmmxt_set_mup();
   2282         gen_op_iwmmxt_set_cup();
   2283         break;
   2284     case 0x116: case 0x316: case 0x516: case 0x716:     /* WMIN */
   2285     case 0x916: case 0xb16: case 0xd16: case 0xf16:
   2286         wrd = (insn >> 12) & 0xf;
   2287         rd0 = (insn >> 16) & 0xf;
   2288         rd1 = (insn >> 0) & 0xf;
   2289         gen_op_iwmmxt_movq_M0_wRn(rd0);
   2290         switch ((insn >> 22) & 3) {
   2291         case 0:
   2292             if (insn & (1 << 21))
   2293                 gen_op_iwmmxt_minsb_M0_wRn(rd1);
   2294             else
   2295                 gen_op_iwmmxt_minub_M0_wRn(rd1);
   2296             break;
   2297         case 1:
   2298             if (insn & (1 << 21))
   2299                 gen_op_iwmmxt_minsw_M0_wRn(rd1);
   2300             else
   2301                 gen_op_iwmmxt_minuw_M0_wRn(rd1);
   2302             break;
   2303         case 2:
   2304             if (insn & (1 << 21))
   2305                 gen_op_iwmmxt_minsl_M0_wRn(rd1);
   2306             else
   2307                 gen_op_iwmmxt_minul_M0_wRn(rd1);
   2308             break;
   2309         case 3:
   2310             return 1;
   2311         }
   2312         gen_op_iwmmxt_movq_wRn_M0(wrd);
   2313         gen_op_iwmmxt_set_mup();
   2314         break;
   2315     case 0x016: case 0x216: case 0x416: case 0x616:     /* WMAX */
   2316     case 0x816: case 0xa16: case 0xc16: case 0xe16:
   2317         wrd = (insn >> 12) & 0xf;
   2318         rd0 = (insn >> 16) & 0xf;
   2319         rd1 = (insn >> 0) & 0xf;
   2320         gen_op_iwmmxt_movq_M0_wRn(rd0);
   2321         switch ((insn >> 22) & 3) {
   2322         case 0:
   2323             if (insn & (1 << 21))
   2324                 gen_op_iwmmxt_maxsb_M0_wRn(rd1);
   2325             else
   2326                 gen_op_iwmmxt_maxub_M0_wRn(rd1);
   2327             break;
   2328         case 1:
   2329             if (insn & (1 << 21))
   2330                 gen_op_iwmmxt_maxsw_M0_wRn(rd1);
   2331             else
   2332                 gen_op_iwmmxt_maxuw_M0_wRn(rd1);
   2333             break;
   2334         case 2:
   2335             if (insn & (1 << 21))
   2336                 gen_op_iwmmxt_maxsl_M0_wRn(rd1);
   2337             else
   2338                 gen_op_iwmmxt_maxul_M0_wRn(rd1);
   2339             break;
   2340         case 3:
   2341             return 1;
   2342         }
   2343         gen_op_iwmmxt_movq_wRn_M0(wrd);
   2344         gen_op_iwmmxt_set_mup();
   2345         break;
   2346     case 0x002: case 0x102: case 0x202: case 0x302:     /* WALIGNI */
   2347     case 0x402: case 0x502: case 0x602: case 0x702:
   2348         wrd = (insn >> 12) & 0xf;
   2349         rd0 = (insn >> 16) & 0xf;
   2350         rd1 = (insn >> 0) & 0xf;
   2351         gen_op_iwmmxt_movq_M0_wRn(rd0);
   2352         iwmmxt_load_reg(cpu_V1, rd1);
   2353         gen_helper_iwmmxt_align(cpu_M0, cpu_M0, cpu_V1,
   2354                                 tcg_constant_i32((insn >> 20) & 3));
   2355         gen_op_iwmmxt_movq_wRn_M0(wrd);
   2356         gen_op_iwmmxt_set_mup();
   2357         break;
   2358     case 0x01a: case 0x11a: case 0x21a: case 0x31a:     /* WSUB */
   2359     case 0x41a: case 0x51a: case 0x61a: case 0x71a:
   2360     case 0x81a: case 0x91a: case 0xa1a: case 0xb1a:
   2361     case 0xc1a: case 0xd1a: case 0xe1a: case 0xf1a:
   2362         wrd = (insn >> 12) & 0xf;
   2363         rd0 = (insn >> 16) & 0xf;
   2364         rd1 = (insn >> 0) & 0xf;
   2365         gen_op_iwmmxt_movq_M0_wRn(rd0);
   2366         switch ((insn >> 20) & 0xf) {
   2367         case 0x0:
   2368             gen_op_iwmmxt_subnb_M0_wRn(rd1);
   2369             break;
   2370         case 0x1:
   2371             gen_op_iwmmxt_subub_M0_wRn(rd1);
   2372             break;
   2373         case 0x3:
   2374             gen_op_iwmmxt_subsb_M0_wRn(rd1);
   2375             break;
   2376         case 0x4:
   2377             gen_op_iwmmxt_subnw_M0_wRn(rd1);
   2378             break;
   2379         case 0x5:
   2380             gen_op_iwmmxt_subuw_M0_wRn(rd1);
   2381             break;
   2382         case 0x7:
   2383             gen_op_iwmmxt_subsw_M0_wRn(rd1);
   2384             break;
   2385         case 0x8:
   2386             gen_op_iwmmxt_subnl_M0_wRn(rd1);
   2387             break;
   2388         case 0x9:
   2389             gen_op_iwmmxt_subul_M0_wRn(rd1);
   2390             break;
   2391         case 0xb:
   2392             gen_op_iwmmxt_subsl_M0_wRn(rd1);
   2393             break;
   2394         default:
   2395             return 1;
   2396         }
   2397         gen_op_iwmmxt_movq_wRn_M0(wrd);
   2398         gen_op_iwmmxt_set_mup();
   2399         gen_op_iwmmxt_set_cup();
   2400         break;
   2401     case 0x01e: case 0x11e: case 0x21e: case 0x31e:     /* WSHUFH */
   2402     case 0x41e: case 0x51e: case 0x61e: case 0x71e:
   2403     case 0x81e: case 0x91e: case 0xa1e: case 0xb1e:
   2404     case 0xc1e: case 0xd1e: case 0xe1e: case 0xf1e:
   2405         wrd = (insn >> 12) & 0xf;
   2406         rd0 = (insn >> 16) & 0xf;
   2407         gen_op_iwmmxt_movq_M0_wRn(rd0);
   2408         tmp = tcg_constant_i32(((insn >> 16) & 0xf0) | (insn & 0x0f));
   2409         gen_helper_iwmmxt_shufh(cpu_M0, cpu_env, cpu_M0, tmp);
   2410         gen_op_iwmmxt_movq_wRn_M0(wrd);
   2411         gen_op_iwmmxt_set_mup();
   2412         gen_op_iwmmxt_set_cup();
   2413         break;
   2414     case 0x018: case 0x118: case 0x218: case 0x318:     /* WADD */
   2415     case 0x418: case 0x518: case 0x618: case 0x718:
   2416     case 0x818: case 0x918: case 0xa18: case 0xb18:
   2417     case 0xc18: case 0xd18: case 0xe18: case 0xf18:
   2418         wrd = (insn >> 12) & 0xf;
   2419         rd0 = (insn >> 16) & 0xf;
   2420         rd1 = (insn >> 0) & 0xf;
   2421         gen_op_iwmmxt_movq_M0_wRn(rd0);
   2422         switch ((insn >> 20) & 0xf) {
   2423         case 0x0:
   2424             gen_op_iwmmxt_addnb_M0_wRn(rd1);
   2425             break;
   2426         case 0x1:
   2427             gen_op_iwmmxt_addub_M0_wRn(rd1);
   2428             break;
   2429         case 0x3:
   2430             gen_op_iwmmxt_addsb_M0_wRn(rd1);
   2431             break;
   2432         case 0x4:
   2433             gen_op_iwmmxt_addnw_M0_wRn(rd1);
   2434             break;
   2435         case 0x5:
   2436             gen_op_iwmmxt_adduw_M0_wRn(rd1);
   2437             break;
   2438         case 0x7:
   2439             gen_op_iwmmxt_addsw_M0_wRn(rd1);
   2440             break;
   2441         case 0x8:
   2442             gen_op_iwmmxt_addnl_M0_wRn(rd1);
   2443             break;
   2444         case 0x9:
   2445             gen_op_iwmmxt_addul_M0_wRn(rd1);
   2446             break;
   2447         case 0xb:
   2448             gen_op_iwmmxt_addsl_M0_wRn(rd1);
   2449             break;
   2450         default:
   2451             return 1;
   2452         }
   2453         gen_op_iwmmxt_movq_wRn_M0(wrd);
   2454         gen_op_iwmmxt_set_mup();
   2455         gen_op_iwmmxt_set_cup();
   2456         break;
   2457     case 0x008: case 0x108: case 0x208: case 0x308:     /* WPACK */
   2458     case 0x408: case 0x508: case 0x608: case 0x708:
   2459     case 0x808: case 0x908: case 0xa08: case 0xb08:
   2460     case 0xc08: case 0xd08: case 0xe08: case 0xf08:
   2461         if (!(insn & (1 << 20)) || ((insn >> 22) & 3) == 0)
   2462             return 1;
   2463         wrd = (insn >> 12) & 0xf;
   2464         rd0 = (insn >> 16) & 0xf;
   2465         rd1 = (insn >> 0) & 0xf;
   2466         gen_op_iwmmxt_movq_M0_wRn(rd0);
   2467         switch ((insn >> 22) & 3) {
   2468         case 1:
   2469             if (insn & (1 << 21))
   2470                 gen_op_iwmmxt_packsw_M0_wRn(rd1);
   2471             else
   2472                 gen_op_iwmmxt_packuw_M0_wRn(rd1);
   2473             break;
   2474         case 2:
   2475             if (insn & (1 << 21))
   2476                 gen_op_iwmmxt_packsl_M0_wRn(rd1);
   2477             else
   2478                 gen_op_iwmmxt_packul_M0_wRn(rd1);
   2479             break;
   2480         case 3:
   2481             if (insn & (1 << 21))
   2482                 gen_op_iwmmxt_packsq_M0_wRn(rd1);
   2483             else
   2484                 gen_op_iwmmxt_packuq_M0_wRn(rd1);
   2485             break;
   2486         }
   2487         gen_op_iwmmxt_movq_wRn_M0(wrd);
   2488         gen_op_iwmmxt_set_mup();
   2489         gen_op_iwmmxt_set_cup();
   2490         break;
   2491     case 0x201: case 0x203: case 0x205: case 0x207:
   2492     case 0x209: case 0x20b: case 0x20d: case 0x20f:
   2493     case 0x211: case 0x213: case 0x215: case 0x217:
   2494     case 0x219: case 0x21b: case 0x21d: case 0x21f:
   2495         wrd = (insn >> 5) & 0xf;
   2496         rd0 = (insn >> 12) & 0xf;
   2497         rd1 = (insn >> 0) & 0xf;
   2498         if (rd0 == 0xf || rd1 == 0xf)
   2499             return 1;
   2500         gen_op_iwmmxt_movq_M0_wRn(wrd);
   2501         tmp = load_reg(s, rd0);
   2502         tmp2 = load_reg(s, rd1);
   2503         switch ((insn >> 16) & 0xf) {
   2504         case 0x0:                                       /* TMIA */
   2505             gen_helper_iwmmxt_muladdsl(cpu_M0, cpu_M0, tmp, tmp2);
   2506             break;
   2507         case 0x8:                                       /* TMIAPH */
   2508             gen_helper_iwmmxt_muladdsw(cpu_M0, cpu_M0, tmp, tmp2);
   2509             break;
   2510         case 0xc: case 0xd: case 0xe: case 0xf:                 /* TMIAxy */
   2511             if (insn & (1 << 16))
   2512                 tcg_gen_shri_i32(tmp, tmp, 16);
   2513             if (insn & (1 << 17))
   2514                 tcg_gen_shri_i32(tmp2, tmp2, 16);
   2515             gen_helper_iwmmxt_muladdswl(cpu_M0, cpu_M0, tmp, tmp2);
   2516             break;
   2517         default:
   2518             tcg_temp_free_i32(tmp2);
   2519             tcg_temp_free_i32(tmp);
   2520             return 1;
   2521         }
   2522         tcg_temp_free_i32(tmp2);
   2523         tcg_temp_free_i32(tmp);
   2524         gen_op_iwmmxt_movq_wRn_M0(wrd);
   2525         gen_op_iwmmxt_set_mup();
   2526         break;
   2527     default:
   2528         return 1;
   2529     }
   2530 
   2531     return 0;
   2532 }
   2533 
   2534 /* Disassemble an XScale DSP instruction.  Returns nonzero if an error occurred
   2535    (ie. an undefined instruction).  */
   2536 static int disas_dsp_insn(DisasContext *s, uint32_t insn)
   2537 {
   2538     int acc, rd0, rd1, rdhi, rdlo;
   2539     TCGv_i32 tmp, tmp2;
   2540 
   2541     if ((insn & 0x0ff00f10) == 0x0e200010) {
   2542         /* Multiply with Internal Accumulate Format */
   2543         rd0 = (insn >> 12) & 0xf;
   2544         rd1 = insn & 0xf;
   2545         acc = (insn >> 5) & 7;
   2546 
   2547         if (acc != 0)
   2548             return 1;
   2549 
   2550         tmp = load_reg(s, rd0);
   2551         tmp2 = load_reg(s, rd1);
   2552         switch ((insn >> 16) & 0xf) {
   2553         case 0x0:                                       /* MIA */
   2554             gen_helper_iwmmxt_muladdsl(cpu_M0, cpu_M0, tmp, tmp2);
   2555             break;
   2556         case 0x8:                                       /* MIAPH */
   2557             gen_helper_iwmmxt_muladdsw(cpu_M0, cpu_M0, tmp, tmp2);
   2558             break;
   2559         case 0xc:                                       /* MIABB */
   2560         case 0xd:                                       /* MIABT */
   2561         case 0xe:                                       /* MIATB */
   2562         case 0xf:                                       /* MIATT */
   2563             if (insn & (1 << 16))
   2564                 tcg_gen_shri_i32(tmp, tmp, 16);
   2565             if (insn & (1 << 17))
   2566                 tcg_gen_shri_i32(tmp2, tmp2, 16);
   2567             gen_helper_iwmmxt_muladdswl(cpu_M0, cpu_M0, tmp, tmp2);
   2568             break;
   2569         default:
   2570             return 1;
   2571         }
   2572         tcg_temp_free_i32(tmp2);
   2573         tcg_temp_free_i32(tmp);
   2574 
   2575         gen_op_iwmmxt_movq_wRn_M0(acc);
   2576         return 0;
   2577     }
   2578 
   2579     if ((insn & 0x0fe00ff8) == 0x0c400000) {
   2580         /* Internal Accumulator Access Format */
   2581         rdhi = (insn >> 16) & 0xf;
   2582         rdlo = (insn >> 12) & 0xf;
   2583         acc = insn & 7;
   2584 
   2585         if (acc != 0)
   2586             return 1;
   2587 
   2588         if (insn & ARM_CP_RW_BIT) {                     /* MRA */
   2589             iwmmxt_load_reg(cpu_V0, acc);
   2590             tcg_gen_extrl_i64_i32(cpu_R[rdlo], cpu_V0);
   2591             tcg_gen_extrh_i64_i32(cpu_R[rdhi], cpu_V0);
   2592             tcg_gen_andi_i32(cpu_R[rdhi], cpu_R[rdhi], (1 << (40 - 32)) - 1);
   2593         } else {                                        /* MAR */
   2594             tcg_gen_concat_i32_i64(cpu_V0, cpu_R[rdlo], cpu_R[rdhi]);
   2595             iwmmxt_store_reg(cpu_V0, acc);
   2596         }
   2597         return 0;
   2598     }
   2599 
   2600     return 1;
   2601 }
   2602 
   2603 static void gen_goto_ptr(void)
   2604 {
   2605     tcg_gen_lookup_and_goto_ptr();
   2606 }
   2607 
   2608 /* This will end the TB but doesn't guarantee we'll return to
   2609  * cpu_loop_exec. Any live exit_requests will be processed as we
   2610  * enter the next TB.
   2611  */
   2612 static void gen_goto_tb(DisasContext *s, int n, target_long diff)
   2613 {
   2614     if (translator_use_goto_tb(&s->base, s->pc_curr + diff)) {
   2615         /*
   2616          * For pcrel, the pc must always be up-to-date on entry to
   2617          * the linked TB, so that it can use simple additions for all
   2618          * further adjustments.  For !pcrel, the linked TB is compiled
   2619          * to know its full virtual address, so we can delay the
   2620          * update to pc to the unlinked path.  A long chain of links
   2621          * can thus avoid many updates to the PC.
   2622          */
   2623         if (TARGET_TB_PCREL) {
   2624             gen_update_pc(s, diff);
   2625             tcg_gen_goto_tb(n);
   2626         } else {
   2627             tcg_gen_goto_tb(n);
   2628             gen_update_pc(s, diff);
   2629         }
   2630         tcg_gen_exit_tb(s->base.tb, n);
   2631     } else {
   2632         gen_update_pc(s, diff);
   2633         gen_goto_ptr();
   2634     }
   2635     s->base.is_jmp = DISAS_NORETURN;
   2636 }
   2637 
   2638 /* Jump, specifying which TB number to use if we gen_goto_tb() */
   2639 static void gen_jmp_tb(DisasContext *s, target_long diff, int tbno)
   2640 {
   2641     if (unlikely(s->ss_active)) {
   2642         /* An indirect jump so that we still trigger the debug exception.  */
   2643         gen_update_pc(s, diff);
   2644         s->base.is_jmp = DISAS_JUMP;
   2645         return;
   2646     }
   2647     switch (s->base.is_jmp) {
   2648     case DISAS_NEXT:
   2649     case DISAS_TOO_MANY:
   2650     case DISAS_NORETURN:
   2651         /*
   2652          * The normal case: just go to the destination TB.
   2653          * NB: NORETURN happens if we generate code like
   2654          *    gen_brcondi(l);
   2655          *    gen_jmp();
   2656          *    gen_set_label(l);
   2657          *    gen_jmp();
   2658          * on the second call to gen_jmp().
   2659          */
   2660         gen_goto_tb(s, tbno, diff);
   2661         break;
   2662     case DISAS_UPDATE_NOCHAIN:
   2663     case DISAS_UPDATE_EXIT:
   2664         /*
   2665          * We already decided we're leaving the TB for some other reason.
   2666          * Avoid using goto_tb so we really do exit back to the main loop
   2667          * and don't chain to another TB.
   2668          */
   2669         gen_update_pc(s, diff);
   2670         gen_goto_ptr();
   2671         s->base.is_jmp = DISAS_NORETURN;
   2672         break;
   2673     default:
   2674         /*
   2675          * We shouldn't be emitting code for a jump and also have
   2676          * is_jmp set to one of the special cases like DISAS_SWI.
   2677          */
   2678         g_assert_not_reached();
   2679     }
   2680 }
   2681 
   2682 static inline void gen_jmp(DisasContext *s, target_long diff)
   2683 {
   2684     gen_jmp_tb(s, diff, 0);
   2685 }
   2686 
   2687 static inline void gen_mulxy(TCGv_i32 t0, TCGv_i32 t1, int x, int y)
   2688 {
   2689     if (x)
   2690         tcg_gen_sari_i32(t0, t0, 16);
   2691     else
   2692         gen_sxth(t0);
   2693     if (y)
   2694         tcg_gen_sari_i32(t1, t1, 16);
   2695     else
   2696         gen_sxth(t1);
   2697     tcg_gen_mul_i32(t0, t0, t1);
   2698 }
   2699 
   2700 /* Return the mask of PSR bits set by a MSR instruction.  */
   2701 static uint32_t msr_mask(DisasContext *s, int flags, int spsr)
   2702 {
   2703     uint32_t mask = 0;
   2704 
   2705     if (flags & (1 << 0)) {
   2706         mask |= 0xff;
   2707     }
   2708     if (flags & (1 << 1)) {
   2709         mask |= 0xff00;
   2710     }
   2711     if (flags & (1 << 2)) {
   2712         mask |= 0xff0000;
   2713     }
   2714     if (flags & (1 << 3)) {
   2715         mask |= 0xff000000;
   2716     }
   2717 
   2718     /* Mask out undefined and reserved bits.  */
   2719     mask &= aarch32_cpsr_valid_mask(s->features, s->isar);
   2720 
   2721     /* Mask out execution state.  */
   2722     if (!spsr) {
   2723         mask &= ~CPSR_EXEC;
   2724     }
   2725 
   2726     /* Mask out privileged bits.  */
   2727     if (IS_USER(s)) {
   2728         mask &= CPSR_USER;
   2729     }
   2730     return mask;
   2731 }
   2732 
   2733 /* Returns nonzero if access to the PSR is not permitted. Marks t0 as dead. */
   2734 static int gen_set_psr(DisasContext *s, uint32_t mask, int spsr, TCGv_i32 t0)
   2735 {
   2736     TCGv_i32 tmp;
   2737     if (spsr) {
   2738         /* ??? This is also undefined in system mode.  */
   2739         if (IS_USER(s))
   2740             return 1;
   2741 
   2742         tmp = load_cpu_field(spsr);
   2743         tcg_gen_andi_i32(tmp, tmp, ~mask);
   2744         tcg_gen_andi_i32(t0, t0, mask);
   2745         tcg_gen_or_i32(tmp, tmp, t0);
   2746         store_cpu_field(tmp, spsr);
   2747     } else {
   2748         gen_set_cpsr(t0, mask);
   2749     }
   2750     tcg_temp_free_i32(t0);
   2751     gen_lookup_tb(s);
   2752     return 0;
   2753 }
   2754 
   2755 /* Returns nonzero if access to the PSR is not permitted.  */
   2756 static int gen_set_psr_im(DisasContext *s, uint32_t mask, int spsr, uint32_t val)
   2757 {
   2758     TCGv_i32 tmp;
   2759     tmp = tcg_temp_new_i32();
   2760     tcg_gen_movi_i32(tmp, val);
   2761     return gen_set_psr(s, mask, spsr, tmp);
   2762 }
   2763 
   2764 static bool msr_banked_access_decode(DisasContext *s, int r, int sysm, int rn,
   2765                                      int *tgtmode, int *regno)
   2766 {
   2767     /* Decode the r and sysm fields of MSR/MRS banked accesses into
   2768      * the target mode and register number, and identify the various
   2769      * unpredictable cases.
   2770      * MSR (banked) and MRS (banked) are CONSTRAINED UNPREDICTABLE if:
   2771      *  + executed in user mode
   2772      *  + using R15 as the src/dest register
   2773      *  + accessing an unimplemented register
   2774      *  + accessing a register that's inaccessible at current PL/security state*
   2775      *  + accessing a register that you could access with a different insn
   2776      * We choose to UNDEF in all these cases.
   2777      * Since we don't know which of the various AArch32 modes we are in
   2778      * we have to defer some checks to runtime.
   2779      * Accesses to Monitor mode registers from Secure EL1 (which implies
   2780      * that EL3 is AArch64) must trap to EL3.
   2781      *
   2782      * If the access checks fail this function will emit code to take
   2783      * an exception and return false. Otherwise it will return true,
   2784      * and set *tgtmode and *regno appropriately.
   2785      */
   2786     /* These instructions are present only in ARMv8, or in ARMv7 with the
   2787      * Virtualization Extensions.
   2788      */
   2789     if (!arm_dc_feature(s, ARM_FEATURE_V8) &&
   2790         !arm_dc_feature(s, ARM_FEATURE_EL2)) {
   2791         goto undef;
   2792     }
   2793 
   2794     if (IS_USER(s) || rn == 15) {
   2795         goto undef;
   2796     }
   2797 
   2798     /* The table in the v8 ARM ARM section F5.2.3 describes the encoding
   2799      * of registers into (r, sysm).
   2800      */
   2801     if (r) {
   2802         /* SPSRs for other modes */
   2803         switch (sysm) {
   2804         case 0xe: /* SPSR_fiq */
   2805             *tgtmode = ARM_CPU_MODE_FIQ;
   2806             break;
   2807         case 0x10: /* SPSR_irq */
   2808             *tgtmode = ARM_CPU_MODE_IRQ;
   2809             break;
   2810         case 0x12: /* SPSR_svc */
   2811             *tgtmode = ARM_CPU_MODE_SVC;
   2812             break;
   2813         case 0x14: /* SPSR_abt */
   2814             *tgtmode = ARM_CPU_MODE_ABT;
   2815             break;
   2816         case 0x16: /* SPSR_und */
   2817             *tgtmode = ARM_CPU_MODE_UND;
   2818             break;
   2819         case 0x1c: /* SPSR_mon */
   2820             *tgtmode = ARM_CPU_MODE_MON;
   2821             break;
   2822         case 0x1e: /* SPSR_hyp */
   2823             *tgtmode = ARM_CPU_MODE_HYP;
   2824             break;
   2825         default: /* unallocated */
   2826             goto undef;
   2827         }
   2828         /* We arbitrarily assign SPSR a register number of 16. */
   2829         *regno = 16;
   2830     } else {
   2831         /* general purpose registers for other modes */
   2832         switch (sysm) {
   2833         case 0x0 ... 0x6:   /* 0b00xxx : r8_usr ... r14_usr */
   2834             *tgtmode = ARM_CPU_MODE_USR;
   2835             *regno = sysm + 8;
   2836             break;
   2837         case 0x8 ... 0xe:   /* 0b01xxx : r8_fiq ... r14_fiq */
   2838             *tgtmode = ARM_CPU_MODE_FIQ;
   2839             *regno = sysm;
   2840             break;
   2841         case 0x10 ... 0x11: /* 0b1000x : r14_irq, r13_irq */
   2842             *tgtmode = ARM_CPU_MODE_IRQ;
   2843             *regno = sysm & 1 ? 13 : 14;
   2844             break;
   2845         case 0x12 ... 0x13: /* 0b1001x : r14_svc, r13_svc */
   2846             *tgtmode = ARM_CPU_MODE_SVC;
   2847             *regno = sysm & 1 ? 13 : 14;
   2848             break;
   2849         case 0x14 ... 0x15: /* 0b1010x : r14_abt, r13_abt */
   2850             *tgtmode = ARM_CPU_MODE_ABT;
   2851             *regno = sysm & 1 ? 13 : 14;
   2852             break;
   2853         case 0x16 ... 0x17: /* 0b1011x : r14_und, r13_und */
   2854             *tgtmode = ARM_CPU_MODE_UND;
   2855             *regno = sysm & 1 ? 13 : 14;
   2856             break;
   2857         case 0x1c ... 0x1d: /* 0b1110x : r14_mon, r13_mon */
   2858             *tgtmode = ARM_CPU_MODE_MON;
   2859             *regno = sysm & 1 ? 13 : 14;
   2860             break;
   2861         case 0x1e ... 0x1f: /* 0b1111x : elr_hyp, r13_hyp */
   2862             *tgtmode = ARM_CPU_MODE_HYP;
   2863             /* Arbitrarily pick 17 for ELR_Hyp (which is not a banked LR!) */
   2864             *regno = sysm & 1 ? 13 : 17;
   2865             break;
   2866         default: /* unallocated */
   2867             goto undef;
   2868         }
   2869     }
   2870 
   2871     /* Catch the 'accessing inaccessible register' cases we can detect
   2872      * at translate time.
   2873      */
   2874     switch (*tgtmode) {
   2875     case ARM_CPU_MODE_MON:
   2876         if (!arm_dc_feature(s, ARM_FEATURE_EL3) || s->ns) {
   2877             goto undef;
   2878         }
   2879         if (s->current_el == 1) {
   2880             /* If we're in Secure EL1 (which implies that EL3 is AArch64)
   2881              * then accesses to Mon registers trap to Secure EL2, if it exists,
   2882              * otherwise EL3.
   2883              */
   2884             TCGv_i32 tcg_el;
   2885 
   2886             if (arm_dc_feature(s, ARM_FEATURE_AARCH64) &&
   2887                 dc_isar_feature(aa64_sel2, s)) {
   2888                 /* Target EL is EL<3 minus SCR_EL3.EEL2> */
   2889                 tcg_el = load_cpu_field(cp15.scr_el3);
   2890                 tcg_gen_sextract_i32(tcg_el, tcg_el, ctz32(SCR_EEL2), 1);
   2891                 tcg_gen_addi_i32(tcg_el, tcg_el, 3);
   2892             } else {
   2893                 tcg_el = tcg_constant_i32(3);
   2894             }
   2895 
   2896             gen_exception_insn_el_v(s, 0, EXCP_UDEF,
   2897                                     syn_uncategorized(), tcg_el);
   2898             tcg_temp_free_i32(tcg_el);
   2899             return false;
   2900         }
   2901         break;
   2902     case ARM_CPU_MODE_HYP:
   2903         /*
   2904          * SPSR_hyp and r13_hyp can only be accessed from Monitor mode
   2905          * (and so we can forbid accesses from EL2 or below). elr_hyp
   2906          * can be accessed also from Hyp mode, so forbid accesses from
   2907          * EL0 or EL1.
   2908          */
   2909         if (!arm_dc_feature(s, ARM_FEATURE_EL2) || s->current_el < 2 ||
   2910             (s->current_el < 3 && *regno != 17)) {
   2911             goto undef;
   2912         }
   2913         break;
   2914     default:
   2915         break;
   2916     }
   2917 
   2918     return true;
   2919 
   2920 undef:
   2921     /* If we get here then some access check did not pass */
   2922     gen_exception_insn(s, 0, EXCP_UDEF, syn_uncategorized());
   2923     return false;
   2924 }
   2925 
   2926 static void gen_msr_banked(DisasContext *s, int r, int sysm, int rn)
   2927 {
   2928     TCGv_i32 tcg_reg;
   2929     int tgtmode = 0, regno = 0;
   2930 
   2931     if (!msr_banked_access_decode(s, r, sysm, rn, &tgtmode, &regno)) {
   2932         return;
   2933     }
   2934 
   2935     /* Sync state because msr_banked() can raise exceptions */
   2936     gen_set_condexec(s);
   2937     gen_update_pc(s, 0);
   2938     tcg_reg = load_reg(s, rn);
   2939     gen_helper_msr_banked(cpu_env, tcg_reg,
   2940                           tcg_constant_i32(tgtmode),
   2941                           tcg_constant_i32(regno));
   2942     tcg_temp_free_i32(tcg_reg);
   2943     s->base.is_jmp = DISAS_UPDATE_EXIT;
   2944 }
   2945 
   2946 static void gen_mrs_banked(DisasContext *s, int r, int sysm, int rn)
   2947 {
   2948     TCGv_i32 tcg_reg;
   2949     int tgtmode = 0, regno = 0;
   2950 
   2951     if (!msr_banked_access_decode(s, r, sysm, rn, &tgtmode, &regno)) {
   2952         return;
   2953     }
   2954 
   2955     /* Sync state because mrs_banked() can raise exceptions */
   2956     gen_set_condexec(s);
   2957     gen_update_pc(s, 0);
   2958     tcg_reg = tcg_temp_new_i32();
   2959     gen_helper_mrs_banked(tcg_reg, cpu_env,
   2960                           tcg_constant_i32(tgtmode),
   2961                           tcg_constant_i32(regno));
   2962     store_reg(s, rn, tcg_reg);
   2963     s->base.is_jmp = DISAS_UPDATE_EXIT;
   2964 }
   2965 
   2966 /* Store value to PC as for an exception return (ie don't
   2967  * mask bits). The subsequent call to gen_helper_cpsr_write_eret()
   2968  * will do the masking based on the new value of the Thumb bit.
   2969  */
   2970 static void store_pc_exc_ret(DisasContext *s, TCGv_i32 pc)
   2971 {
   2972     tcg_gen_mov_i32(cpu_R[15], pc);
   2973     tcg_temp_free_i32(pc);
   2974 }
   2975 
   2976 /* Generate a v6 exception return.  Marks both values as dead.  */
   2977 static void gen_rfe(DisasContext *s, TCGv_i32 pc, TCGv_i32 cpsr)
   2978 {
   2979     store_pc_exc_ret(s, pc);
   2980     /* The cpsr_write_eret helper will mask the low bits of PC
   2981      * appropriately depending on the new Thumb bit, so it must
   2982      * be called after storing the new PC.
   2983      */
   2984     if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
   2985         gen_io_start();
   2986     }
   2987     gen_helper_cpsr_write_eret(cpu_env, cpsr);
   2988     tcg_temp_free_i32(cpsr);
   2989     /* Must exit loop to check un-masked IRQs */
   2990     s->base.is_jmp = DISAS_EXIT;
   2991 }
   2992 
   2993 /* Generate an old-style exception return. Marks pc as dead. */
   2994 static void gen_exception_return(DisasContext *s, TCGv_i32 pc)
   2995 {
   2996     gen_rfe(s, pc, load_cpu_field(spsr));
   2997 }
   2998 
   2999 static void gen_gvec_fn3_qc(uint32_t rd_ofs, uint32_t rn_ofs, uint32_t rm_ofs,
   3000                             uint32_t opr_sz, uint32_t max_sz,
   3001                             gen_helper_gvec_3_ptr *fn)
   3002 {
   3003     TCGv_ptr qc_ptr = tcg_temp_new_ptr();
   3004 
   3005     tcg_gen_addi_ptr(qc_ptr, cpu_env, offsetof(CPUARMState, vfp.qc));
   3006     tcg_gen_gvec_3_ptr(rd_ofs, rn_ofs, rm_ofs, qc_ptr,
   3007                        opr_sz, max_sz, 0, fn);
   3008     tcg_temp_free_ptr(qc_ptr);
   3009 }
   3010 
   3011 void gen_gvec_sqrdmlah_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
   3012                           uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
   3013 {
   3014     static gen_helper_gvec_3_ptr * const fns[2] = {
   3015         gen_helper_gvec_qrdmlah_s16, gen_helper_gvec_qrdmlah_s32
   3016     };
   3017     tcg_debug_assert(vece >= 1 && vece <= 2);
   3018     gen_gvec_fn3_qc(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, fns[vece - 1]);
   3019 }
   3020 
   3021 void gen_gvec_sqrdmlsh_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
   3022                           uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
   3023 {
   3024     static gen_helper_gvec_3_ptr * const fns[2] = {
   3025         gen_helper_gvec_qrdmlsh_s16, gen_helper_gvec_qrdmlsh_s32
   3026     };
   3027     tcg_debug_assert(vece >= 1 && vece <= 2);
   3028     gen_gvec_fn3_qc(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, fns[vece - 1]);
   3029 }
   3030 
   3031 #define GEN_CMP0(NAME, COND)                                            \
   3032     static void gen_##NAME##0_i32(TCGv_i32 d, TCGv_i32 a)               \
   3033     {                                                                   \
   3034         tcg_gen_setcondi_i32(COND, d, a, 0);                            \
   3035         tcg_gen_neg_i32(d, d);                                          \
   3036     }                                                                   \
   3037     static void gen_##NAME##0_i64(TCGv_i64 d, TCGv_i64 a)               \
   3038     {                                                                   \
   3039         tcg_gen_setcondi_i64(COND, d, a, 0);                            \
   3040         tcg_gen_neg_i64(d, d);                                          \
   3041     }                                                                   \
   3042     static void gen_##NAME##0_vec(unsigned vece, TCGv_vec d, TCGv_vec a) \
   3043     {                                                                   \
   3044         TCGv_vec zero = tcg_constant_vec_matching(d, vece, 0);          \
   3045         tcg_gen_cmp_vec(COND, vece, d, a, zero);                        \
   3046     }                                                                   \
   3047     void gen_gvec_##NAME##0(unsigned vece, uint32_t d, uint32_t m,      \
   3048                             uint32_t opr_sz, uint32_t max_sz)           \
   3049     {                                                                   \
   3050         const GVecGen2 op[4] = {                                        \
   3051             { .fno = gen_helper_gvec_##NAME##0_b,                       \
   3052               .fniv = gen_##NAME##0_vec,                                \
   3053               .opt_opc = vecop_list_cmp,                                \
   3054               .vece = MO_8 },                                           \
   3055             { .fno = gen_helper_gvec_##NAME##0_h,                       \
   3056               .fniv = gen_##NAME##0_vec,                                \
   3057               .opt_opc = vecop_list_cmp,                                \
   3058               .vece = MO_16 },                                          \
   3059             { .fni4 = gen_##NAME##0_i32,                                \
   3060               .fniv = gen_##NAME##0_vec,                                \
   3061               .opt_opc = vecop_list_cmp,                                \
   3062               .vece = MO_32 },                                          \
   3063             { .fni8 = gen_##NAME##0_i64,                                \
   3064               .fniv = gen_##NAME##0_vec,                                \
   3065               .opt_opc = vecop_list_cmp,                                \
   3066               .prefer_i64 = TCG_TARGET_REG_BITS == 64,                  \
   3067               .vece = MO_64 },                                          \
   3068         };                                                              \
   3069         tcg_gen_gvec_2(d, m, opr_sz, max_sz, &op[vece]);                \
   3070     }
   3071 
   3072 static const TCGOpcode vecop_list_cmp[] = {
   3073     INDEX_op_cmp_vec, 0
   3074 };
   3075 
   3076 GEN_CMP0(ceq, TCG_COND_EQ)
   3077 GEN_CMP0(cle, TCG_COND_LE)
   3078 GEN_CMP0(cge, TCG_COND_GE)
   3079 GEN_CMP0(clt, TCG_COND_LT)
   3080 GEN_CMP0(cgt, TCG_COND_GT)
   3081 
   3082 #undef GEN_CMP0
   3083 
   3084 static void gen_ssra8_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
   3085 {
   3086     tcg_gen_vec_sar8i_i64(a, a, shift);
   3087     tcg_gen_vec_add8_i64(d, d, a);
   3088 }
   3089 
   3090 static void gen_ssra16_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
   3091 {
   3092     tcg_gen_vec_sar16i_i64(a, a, shift);
   3093     tcg_gen_vec_add16_i64(d, d, a);
   3094 }
   3095 
   3096 static void gen_ssra32_i32(TCGv_i32 d, TCGv_i32 a, int32_t shift)
   3097 {
   3098     tcg_gen_sari_i32(a, a, shift);
   3099     tcg_gen_add_i32(d, d, a);
   3100 }
   3101 
   3102 static void gen_ssra64_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
   3103 {
   3104     tcg_gen_sari_i64(a, a, shift);
   3105     tcg_gen_add_i64(d, d, a);
   3106 }
   3107 
   3108 static void gen_ssra_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh)
   3109 {
   3110     tcg_gen_sari_vec(vece, a, a, sh);
   3111     tcg_gen_add_vec(vece, d, d, a);
   3112 }
   3113 
   3114 void gen_gvec_ssra(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
   3115                    int64_t shift, uint32_t opr_sz, uint32_t max_sz)
   3116 {
   3117     static const TCGOpcode vecop_list[] = {
   3118         INDEX_op_sari_vec, INDEX_op_add_vec, 0
   3119     };
   3120     static const GVecGen2i ops[4] = {
   3121         { .fni8 = gen_ssra8_i64,
   3122           .fniv = gen_ssra_vec,
   3123           .fno = gen_helper_gvec_ssra_b,
   3124           .load_dest = true,
   3125           .opt_opc = vecop_list,
   3126           .vece = MO_8 },
   3127         { .fni8 = gen_ssra16_i64,
   3128           .fniv = gen_ssra_vec,
   3129           .fno = gen_helper_gvec_ssra_h,
   3130           .load_dest = true,
   3131           .opt_opc = vecop_list,
   3132           .vece = MO_16 },
   3133         { .fni4 = gen_ssra32_i32,
   3134           .fniv = gen_ssra_vec,
   3135           .fno = gen_helper_gvec_ssra_s,
   3136           .load_dest = true,
   3137           .opt_opc = vecop_list,
   3138           .vece = MO_32 },
   3139         { .fni8 = gen_ssra64_i64,
   3140           .fniv = gen_ssra_vec,
   3141           .fno = gen_helper_gvec_ssra_b,
   3142           .prefer_i64 = TCG_TARGET_REG_BITS == 64,
   3143           .opt_opc = vecop_list,
   3144           .load_dest = true,
   3145           .vece = MO_64 },
   3146     };
   3147 
   3148     /* tszimm encoding produces immediates in the range [1..esize]. */
   3149     tcg_debug_assert(shift > 0);
   3150     tcg_debug_assert(shift <= (8 << vece));
   3151 
   3152     /*
   3153      * Shifts larger than the element size are architecturally valid.
   3154      * Signed results in all sign bits.
   3155      */
   3156     shift = MIN(shift, (8 << vece) - 1);
   3157     tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]);
   3158 }
   3159 
   3160 static void gen_usra8_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
   3161 {
   3162     tcg_gen_vec_shr8i_i64(a, a, shift);
   3163     tcg_gen_vec_add8_i64(d, d, a);
   3164 }
   3165 
   3166 static void gen_usra16_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
   3167 {
   3168     tcg_gen_vec_shr16i_i64(a, a, shift);
   3169     tcg_gen_vec_add16_i64(d, d, a);
   3170 }
   3171 
   3172 static void gen_usra32_i32(TCGv_i32 d, TCGv_i32 a, int32_t shift)
   3173 {
   3174     tcg_gen_shri_i32(a, a, shift);
   3175     tcg_gen_add_i32(d, d, a);
   3176 }
   3177 
   3178 static void gen_usra64_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
   3179 {
   3180     tcg_gen_shri_i64(a, a, shift);
   3181     tcg_gen_add_i64(d, d, a);
   3182 }
   3183 
   3184 static void gen_usra_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh)
   3185 {
   3186     tcg_gen_shri_vec(vece, a, a, sh);
   3187     tcg_gen_add_vec(vece, d, d, a);
   3188 }
   3189 
   3190 void gen_gvec_usra(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
   3191                    int64_t shift, uint32_t opr_sz, uint32_t max_sz)
   3192 {
   3193     static const TCGOpcode vecop_list[] = {
   3194         INDEX_op_shri_vec, INDEX_op_add_vec, 0
   3195     };
   3196     static const GVecGen2i ops[4] = {
   3197         { .fni8 = gen_usra8_i64,
   3198           .fniv = gen_usra_vec,
   3199           .fno = gen_helper_gvec_usra_b,
   3200           .load_dest = true,
   3201           .opt_opc = vecop_list,
   3202           .vece = MO_8, },
   3203         { .fni8 = gen_usra16_i64,
   3204           .fniv = gen_usra_vec,
   3205           .fno = gen_helper_gvec_usra_h,
   3206           .load_dest = true,
   3207           .opt_opc = vecop_list,
   3208           .vece = MO_16, },
   3209         { .fni4 = gen_usra32_i32,
   3210           .fniv = gen_usra_vec,
   3211           .fno = gen_helper_gvec_usra_s,
   3212           .load_dest = true,
   3213           .opt_opc = vecop_list,
   3214           .vece = MO_32, },
   3215         { .fni8 = gen_usra64_i64,
   3216           .fniv = gen_usra_vec,
   3217           .fno = gen_helper_gvec_usra_d,
   3218           .prefer_i64 = TCG_TARGET_REG_BITS == 64,
   3219           .load_dest = true,
   3220           .opt_opc = vecop_list,
   3221           .vece = MO_64, },
   3222     };
   3223 
   3224     /* tszimm encoding produces immediates in the range [1..esize]. */
   3225     tcg_debug_assert(shift > 0);
   3226     tcg_debug_assert(shift <= (8 << vece));
   3227 
   3228     /*
   3229      * Shifts larger than the element size are architecturally valid.
   3230      * Unsigned results in all zeros as input to accumulate: nop.
   3231      */
   3232     if (shift < (8 << vece)) {
   3233         tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]);
   3234     } else {
   3235         /* Nop, but we do need to clear the tail. */
   3236         tcg_gen_gvec_mov(vece, rd_ofs, rd_ofs, opr_sz, max_sz);
   3237     }
   3238 }
   3239 
   3240 /*
   3241  * Shift one less than the requested amount, and the low bit is
   3242  * the rounding bit.  For the 8 and 16-bit operations, because we
   3243  * mask the low bit, we can perform a normal integer shift instead
   3244  * of a vector shift.
   3245  */
   3246 static void gen_srshr8_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
   3247 {
   3248     TCGv_i64 t = tcg_temp_new_i64();
   3249 
   3250     tcg_gen_shri_i64(t, a, sh - 1);
   3251     tcg_gen_andi_i64(t, t, dup_const(MO_8, 1));
   3252     tcg_gen_vec_sar8i_i64(d, a, sh);
   3253     tcg_gen_vec_add8_i64(d, d, t);
   3254     tcg_temp_free_i64(t);
   3255 }
   3256 
   3257 static void gen_srshr16_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
   3258 {
   3259     TCGv_i64 t = tcg_temp_new_i64();
   3260 
   3261     tcg_gen_shri_i64(t, a, sh - 1);
   3262     tcg_gen_andi_i64(t, t, dup_const(MO_16, 1));
   3263     tcg_gen_vec_sar16i_i64(d, a, sh);
   3264     tcg_gen_vec_add16_i64(d, d, t);
   3265     tcg_temp_free_i64(t);
   3266 }
   3267 
   3268 static void gen_srshr32_i32(TCGv_i32 d, TCGv_i32 a, int32_t sh)
   3269 {
   3270     TCGv_i32 t;
   3271 
   3272     /* Handle shift by the input size for the benefit of trans_SRSHR_ri */
   3273     if (sh == 32) {
   3274         tcg_gen_movi_i32(d, 0);
   3275         return;
   3276     }
   3277     t = tcg_temp_new_i32();
   3278     tcg_gen_extract_i32(t, a, sh - 1, 1);
   3279     tcg_gen_sari_i32(d, a, sh);
   3280     tcg_gen_add_i32(d, d, t);
   3281     tcg_temp_free_i32(t);
   3282 }
   3283 
   3284 static void gen_srshr64_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
   3285 {
   3286     TCGv_i64 t = tcg_temp_new_i64();
   3287 
   3288     tcg_gen_extract_i64(t, a, sh - 1, 1);
   3289     tcg_gen_sari_i64(d, a, sh);
   3290     tcg_gen_add_i64(d, d, t);
   3291     tcg_temp_free_i64(t);
   3292 }
   3293 
   3294 static void gen_srshr_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh)
   3295 {
   3296     TCGv_vec t = tcg_temp_new_vec_matching(d);
   3297     TCGv_vec ones = tcg_temp_new_vec_matching(d);
   3298 
   3299     tcg_gen_shri_vec(vece, t, a, sh - 1);
   3300     tcg_gen_dupi_vec(vece, ones, 1);
   3301     tcg_gen_and_vec(vece, t, t, ones);
   3302     tcg_gen_sari_vec(vece, d, a, sh);
   3303     tcg_gen_add_vec(vece, d, d, t);
   3304 
   3305     tcg_temp_free_vec(t);
   3306     tcg_temp_free_vec(ones);
   3307 }
   3308 
   3309 void gen_gvec_srshr(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
   3310                     int64_t shift, uint32_t opr_sz, uint32_t max_sz)
   3311 {
   3312     static const TCGOpcode vecop_list[] = {
   3313         INDEX_op_shri_vec, INDEX_op_sari_vec, INDEX_op_add_vec, 0
   3314     };
   3315     static const GVecGen2i ops[4] = {
   3316         { .fni8 = gen_srshr8_i64,
   3317           .fniv = gen_srshr_vec,
   3318           .fno = gen_helper_gvec_srshr_b,
   3319           .opt_opc = vecop_list,
   3320           .vece = MO_8 },
   3321         { .fni8 = gen_srshr16_i64,
   3322           .fniv = gen_srshr_vec,
   3323           .fno = gen_helper_gvec_srshr_h,
   3324           .opt_opc = vecop_list,
   3325           .vece = MO_16 },
   3326         { .fni4 = gen_srshr32_i32,
   3327           .fniv = gen_srshr_vec,
   3328           .fno = gen_helper_gvec_srshr_s,
   3329           .opt_opc = vecop_list,
   3330           .vece = MO_32 },
   3331         { .fni8 = gen_srshr64_i64,
   3332           .fniv = gen_srshr_vec,
   3333           .fno = gen_helper_gvec_srshr_d,
   3334           .prefer_i64 = TCG_TARGET_REG_BITS == 64,
   3335           .opt_opc = vecop_list,
   3336           .vece = MO_64 },
   3337     };
   3338 
   3339     /* tszimm encoding produces immediates in the range [1..esize] */
   3340     tcg_debug_assert(shift > 0);
   3341     tcg_debug_assert(shift <= (8 << vece));
   3342 
   3343     if (shift == (8 << vece)) {
   3344         /*
   3345          * Shifts larger than the element size are architecturally valid.
   3346          * Signed results in all sign bits.  With rounding, this produces
   3347          *   (-1 + 1) >> 1 == 0, or (0 + 1) >> 1 == 0.
   3348          * I.e. always zero.
   3349          */
   3350         tcg_gen_gvec_dup_imm(vece, rd_ofs, opr_sz, max_sz, 0);
   3351     } else {
   3352         tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]);
   3353     }
   3354 }
   3355 
   3356 static void gen_srsra8_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
   3357 {
   3358     TCGv_i64 t = tcg_temp_new_i64();
   3359 
   3360     gen_srshr8_i64(t, a, sh);
   3361     tcg_gen_vec_add8_i64(d, d, t);
   3362     tcg_temp_free_i64(t);
   3363 }
   3364 
   3365 static void gen_srsra16_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
   3366 {
   3367     TCGv_i64 t = tcg_temp_new_i64();
   3368 
   3369     gen_srshr16_i64(t, a, sh);
   3370     tcg_gen_vec_add16_i64(d, d, t);
   3371     tcg_temp_free_i64(t);
   3372 }
   3373 
   3374 static void gen_srsra32_i32(TCGv_i32 d, TCGv_i32 a, int32_t sh)
   3375 {
   3376     TCGv_i32 t = tcg_temp_new_i32();
   3377 
   3378     gen_srshr32_i32(t, a, sh);
   3379     tcg_gen_add_i32(d, d, t);
   3380     tcg_temp_free_i32(t);
   3381 }
   3382 
   3383 static void gen_srsra64_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
   3384 {
   3385     TCGv_i64 t = tcg_temp_new_i64();
   3386 
   3387     gen_srshr64_i64(t, a, sh);
   3388     tcg_gen_add_i64(d, d, t);
   3389     tcg_temp_free_i64(t);
   3390 }
   3391 
   3392 static void gen_srsra_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh)
   3393 {
   3394     TCGv_vec t = tcg_temp_new_vec_matching(d);
   3395 
   3396     gen_srshr_vec(vece, t, a, sh);
   3397     tcg_gen_add_vec(vece, d, d, t);
   3398     tcg_temp_free_vec(t);
   3399 }
   3400 
   3401 void gen_gvec_srsra(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
   3402                     int64_t shift, uint32_t opr_sz, uint32_t max_sz)
   3403 {
   3404     static const TCGOpcode vecop_list[] = {
   3405         INDEX_op_shri_vec, INDEX_op_sari_vec, INDEX_op_add_vec, 0
   3406     };
   3407     static const GVecGen2i ops[4] = {
   3408         { .fni8 = gen_srsra8_i64,
   3409           .fniv = gen_srsra_vec,
   3410           .fno = gen_helper_gvec_srsra_b,
   3411           .opt_opc = vecop_list,
   3412           .load_dest = true,
   3413           .vece = MO_8 },
   3414         { .fni8 = gen_srsra16_i64,
   3415           .fniv = gen_srsra_vec,
   3416           .fno = gen_helper_gvec_srsra_h,
   3417           .opt_opc = vecop_list,
   3418           .load_dest = true,
   3419           .vece = MO_16 },
   3420         { .fni4 = gen_srsra32_i32,
   3421           .fniv = gen_srsra_vec,
   3422           .fno = gen_helper_gvec_srsra_s,
   3423           .opt_opc = vecop_list,
   3424           .load_dest = true,
   3425           .vece = MO_32 },
   3426         { .fni8 = gen_srsra64_i64,
   3427           .fniv = gen_srsra_vec,
   3428           .fno = gen_helper_gvec_srsra_d,
   3429           .prefer_i64 = TCG_TARGET_REG_BITS == 64,
   3430           .opt_opc = vecop_list,
   3431           .load_dest = true,
   3432           .vece = MO_64 },
   3433     };
   3434 
   3435     /* tszimm encoding produces immediates in the range [1..esize] */
   3436     tcg_debug_assert(shift > 0);
   3437     tcg_debug_assert(shift <= (8 << vece));
   3438 
   3439     /*
   3440      * Shifts larger than the element size are architecturally valid.
   3441      * Signed results in all sign bits.  With rounding, this produces
   3442      *   (-1 + 1) >> 1 == 0, or (0 + 1) >> 1 == 0.
   3443      * I.e. always zero.  With accumulation, this leaves D unchanged.
   3444      */
   3445     if (shift == (8 << vece)) {
   3446         /* Nop, but we do need to clear the tail. */
   3447         tcg_gen_gvec_mov(vece, rd_ofs, rd_ofs, opr_sz, max_sz);
   3448     } else {
   3449         tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]);
   3450     }
   3451 }
   3452 
   3453 static void gen_urshr8_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
   3454 {
   3455     TCGv_i64 t = tcg_temp_new_i64();
   3456 
   3457     tcg_gen_shri_i64(t, a, sh - 1);
   3458     tcg_gen_andi_i64(t, t, dup_const(MO_8, 1));
   3459     tcg_gen_vec_shr8i_i64(d, a, sh);
   3460     tcg_gen_vec_add8_i64(d, d, t);
   3461     tcg_temp_free_i64(t);
   3462 }
   3463 
   3464 static void gen_urshr16_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
   3465 {
   3466     TCGv_i64 t = tcg_temp_new_i64();
   3467 
   3468     tcg_gen_shri_i64(t, a, sh - 1);
   3469     tcg_gen_andi_i64(t, t, dup_const(MO_16, 1));
   3470     tcg_gen_vec_shr16i_i64(d, a, sh);
   3471     tcg_gen_vec_add16_i64(d, d, t);
   3472     tcg_temp_free_i64(t);
   3473 }
   3474 
   3475 static void gen_urshr32_i32(TCGv_i32 d, TCGv_i32 a, int32_t sh)
   3476 {
   3477     TCGv_i32 t;
   3478 
   3479     /* Handle shift by the input size for the benefit of trans_URSHR_ri */
   3480     if (sh == 32) {
   3481         tcg_gen_extract_i32(d, a, sh - 1, 1);
   3482         return;
   3483     }
   3484     t = tcg_temp_new_i32();
   3485     tcg_gen_extract_i32(t, a, sh - 1, 1);
   3486     tcg_gen_shri_i32(d, a, sh);
   3487     tcg_gen_add_i32(d, d, t);
   3488     tcg_temp_free_i32(t);
   3489 }
   3490 
   3491 static void gen_urshr64_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
   3492 {
   3493     TCGv_i64 t = tcg_temp_new_i64();
   3494 
   3495     tcg_gen_extract_i64(t, a, sh - 1, 1);
   3496     tcg_gen_shri_i64(d, a, sh);
   3497     tcg_gen_add_i64(d, d, t);
   3498     tcg_temp_free_i64(t);
   3499 }
   3500 
   3501 static void gen_urshr_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t shift)
   3502 {
   3503     TCGv_vec t = tcg_temp_new_vec_matching(d);
   3504     TCGv_vec ones = tcg_temp_new_vec_matching(d);
   3505 
   3506     tcg_gen_shri_vec(vece, t, a, shift - 1);
   3507     tcg_gen_dupi_vec(vece, ones, 1);
   3508     tcg_gen_and_vec(vece, t, t, ones);
   3509     tcg_gen_shri_vec(vece, d, a, shift);
   3510     tcg_gen_add_vec(vece, d, d, t);
   3511 
   3512     tcg_temp_free_vec(t);
   3513     tcg_temp_free_vec(ones);
   3514 }
   3515 
   3516 void gen_gvec_urshr(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
   3517                     int64_t shift, uint32_t opr_sz, uint32_t max_sz)
   3518 {
   3519     static const TCGOpcode vecop_list[] = {
   3520         INDEX_op_shri_vec, INDEX_op_add_vec, 0
   3521     };
   3522     static const GVecGen2i ops[4] = {
   3523         { .fni8 = gen_urshr8_i64,
   3524           .fniv = gen_urshr_vec,
   3525           .fno = gen_helper_gvec_urshr_b,
   3526           .opt_opc = vecop_list,
   3527           .vece = MO_8 },
   3528         { .fni8 = gen_urshr16_i64,
   3529           .fniv = gen_urshr_vec,
   3530           .fno = gen_helper_gvec_urshr_h,
   3531           .opt_opc = vecop_list,
   3532           .vece = MO_16 },
   3533         { .fni4 = gen_urshr32_i32,
   3534           .fniv = gen_urshr_vec,
   3535           .fno = gen_helper_gvec_urshr_s,
   3536           .opt_opc = vecop_list,
   3537           .vece = MO_32 },
   3538         { .fni8 = gen_urshr64_i64,
   3539           .fniv = gen_urshr_vec,
   3540           .fno = gen_helper_gvec_urshr_d,
   3541           .prefer_i64 = TCG_TARGET_REG_BITS == 64,
   3542           .opt_opc = vecop_list,
   3543           .vece = MO_64 },
   3544     };
   3545 
   3546     /* tszimm encoding produces immediates in the range [1..esize] */
   3547     tcg_debug_assert(shift > 0);
   3548     tcg_debug_assert(shift <= (8 << vece));
   3549 
   3550     if (shift == (8 << vece)) {
   3551         /*
   3552          * Shifts larger than the element size are architecturally valid.
   3553          * Unsigned results in zero.  With rounding, this produces a
   3554          * copy of the most significant bit.
   3555          */
   3556         tcg_gen_gvec_shri(vece, rd_ofs, rm_ofs, shift - 1, opr_sz, max_sz);
   3557     } else {
   3558         tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]);
   3559     }
   3560 }
   3561 
   3562 static void gen_ursra8_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
   3563 {
   3564     TCGv_i64 t = tcg_temp_new_i64();
   3565 
   3566     if (sh == 8) {
   3567         tcg_gen_vec_shr8i_i64(t, a, 7);
   3568     } else {
   3569         gen_urshr8_i64(t, a, sh);
   3570     }
   3571     tcg_gen_vec_add8_i64(d, d, t);
   3572     tcg_temp_free_i64(t);
   3573 }
   3574 
   3575 static void gen_ursra16_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
   3576 {
   3577     TCGv_i64 t = tcg_temp_new_i64();
   3578 
   3579     if (sh == 16) {
   3580         tcg_gen_vec_shr16i_i64(t, a, 15);
   3581     } else {
   3582         gen_urshr16_i64(t, a, sh);
   3583     }
   3584     tcg_gen_vec_add16_i64(d, d, t);
   3585     tcg_temp_free_i64(t);
   3586 }
   3587 
   3588 static void gen_ursra32_i32(TCGv_i32 d, TCGv_i32 a, int32_t sh)
   3589 {
   3590     TCGv_i32 t = tcg_temp_new_i32();
   3591 
   3592     if (sh == 32) {
   3593         tcg_gen_shri_i32(t, a, 31);
   3594     } else {
   3595         gen_urshr32_i32(t, a, sh);
   3596     }
   3597     tcg_gen_add_i32(d, d, t);
   3598     tcg_temp_free_i32(t);
   3599 }
   3600 
   3601 static void gen_ursra64_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
   3602 {
   3603     TCGv_i64 t = tcg_temp_new_i64();
   3604 
   3605     if (sh == 64) {
   3606         tcg_gen_shri_i64(t, a, 63);
   3607     } else {
   3608         gen_urshr64_i64(t, a, sh);
   3609     }
   3610     tcg_gen_add_i64(d, d, t);
   3611     tcg_temp_free_i64(t);
   3612 }
   3613 
   3614 static void gen_ursra_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh)
   3615 {
   3616     TCGv_vec t = tcg_temp_new_vec_matching(d);
   3617 
   3618     if (sh == (8 << vece)) {
   3619         tcg_gen_shri_vec(vece, t, a, sh - 1);
   3620     } else {
   3621         gen_urshr_vec(vece, t, a, sh);
   3622     }
   3623     tcg_gen_add_vec(vece, d, d, t);
   3624     tcg_temp_free_vec(t);
   3625 }
   3626 
   3627 void gen_gvec_ursra(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
   3628                     int64_t shift, uint32_t opr_sz, uint32_t max_sz)
   3629 {
   3630     static const TCGOpcode vecop_list[] = {
   3631         INDEX_op_shri_vec, INDEX_op_add_vec, 0
   3632     };
   3633     static const GVecGen2i ops[4] = {
   3634         { .fni8 = gen_ursra8_i64,
   3635           .fniv = gen_ursra_vec,
   3636           .fno = gen_helper_gvec_ursra_b,
   3637           .opt_opc = vecop_list,
   3638           .load_dest = true,
   3639           .vece = MO_8 },
   3640         { .fni8 = gen_ursra16_i64,
   3641           .fniv = gen_ursra_vec,
   3642           .fno = gen_helper_gvec_ursra_h,
   3643           .opt_opc = vecop_list,
   3644           .load_dest = true,
   3645           .vece = MO_16 },
   3646         { .fni4 = gen_ursra32_i32,
   3647           .fniv = gen_ursra_vec,
   3648           .fno = gen_helper_gvec_ursra_s,
   3649           .opt_opc = vecop_list,
   3650           .load_dest = true,
   3651           .vece = MO_32 },
   3652         { .fni8 = gen_ursra64_i64,
   3653           .fniv = gen_ursra_vec,
   3654           .fno = gen_helper_gvec_ursra_d,
   3655           .prefer_i64 = TCG_TARGET_REG_BITS == 64,
   3656           .opt_opc = vecop_list,
   3657           .load_dest = true,
   3658           .vece = MO_64 },
   3659     };
   3660 
   3661     /* tszimm encoding produces immediates in the range [1..esize] */
   3662     tcg_debug_assert(shift > 0);
   3663     tcg_debug_assert(shift <= (8 << vece));
   3664 
   3665     tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]);
   3666 }
   3667 
   3668 static void gen_shr8_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
   3669 {
   3670     uint64_t mask = dup_const(MO_8, 0xff >> shift);
   3671     TCGv_i64 t = tcg_temp_new_i64();
   3672 
   3673     tcg_gen_shri_i64(t, a, shift);
   3674     tcg_gen_andi_i64(t, t, mask);
   3675     tcg_gen_andi_i64(d, d, ~mask);
   3676     tcg_gen_or_i64(d, d, t);
   3677     tcg_temp_free_i64(t);
   3678 }
   3679 
   3680 static void gen_shr16_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
   3681 {
   3682     uint64_t mask = dup_const(MO_16, 0xffff >> shift);
   3683     TCGv_i64 t = tcg_temp_new_i64();
   3684 
   3685     tcg_gen_shri_i64(t, a, shift);
   3686     tcg_gen_andi_i64(t, t, mask);
   3687     tcg_gen_andi_i64(d, d, ~mask);
   3688     tcg_gen_or_i64(d, d, t);
   3689     tcg_temp_free_i64(t);
   3690 }
   3691 
   3692 static void gen_shr32_ins_i32(TCGv_i32 d, TCGv_i32 a, int32_t shift)
   3693 {
   3694     tcg_gen_shri_i32(a, a, shift);
   3695     tcg_gen_deposit_i32(d, d, a, 0, 32 - shift);
   3696 }
   3697 
   3698 static void gen_shr64_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
   3699 {
   3700     tcg_gen_shri_i64(a, a, shift);
   3701     tcg_gen_deposit_i64(d, d, a, 0, 64 - shift);
   3702 }
   3703 
   3704 static void gen_shr_ins_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh)
   3705 {
   3706     TCGv_vec t = tcg_temp_new_vec_matching(d);
   3707     TCGv_vec m = tcg_temp_new_vec_matching(d);
   3708 
   3709     tcg_gen_dupi_vec(vece, m, MAKE_64BIT_MASK((8 << vece) - sh, sh));
   3710     tcg_gen_shri_vec(vece, t, a, sh);
   3711     tcg_gen_and_vec(vece, d, d, m);
   3712     tcg_gen_or_vec(vece, d, d, t);
   3713 
   3714     tcg_temp_free_vec(t);
   3715     tcg_temp_free_vec(m);
   3716 }
   3717 
   3718 void gen_gvec_sri(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
   3719                   int64_t shift, uint32_t opr_sz, uint32_t max_sz)
   3720 {
   3721     static const TCGOpcode vecop_list[] = { INDEX_op_shri_vec, 0 };
   3722     const GVecGen2i ops[4] = {
   3723         { .fni8 = gen_shr8_ins_i64,
   3724           .fniv = gen_shr_ins_vec,
   3725           .fno = gen_helper_gvec_sri_b,
   3726           .load_dest = true,
   3727           .opt_opc = vecop_list,
   3728           .vece = MO_8 },
   3729         { .fni8 = gen_shr16_ins_i64,
   3730           .fniv = gen_shr_ins_vec,
   3731           .fno = gen_helper_gvec_sri_h,
   3732           .load_dest = true,
   3733           .opt_opc = vecop_list,
   3734           .vece = MO_16 },
   3735         { .fni4 = gen_shr32_ins_i32,
   3736           .fniv = gen_shr_ins_vec,
   3737           .fno = gen_helper_gvec_sri_s,
   3738           .load_dest = true,
   3739           .opt_opc = vecop_list,
   3740           .vece = MO_32 },
   3741         { .fni8 = gen_shr64_ins_i64,
   3742           .fniv = gen_shr_ins_vec,
   3743           .fno = gen_helper_gvec_sri_d,
   3744           .prefer_i64 = TCG_TARGET_REG_BITS == 64,
   3745           .load_dest = true,
   3746           .opt_opc = vecop_list,
   3747           .vece = MO_64 },
   3748     };
   3749 
   3750     /* tszimm encoding produces immediates in the range [1..esize]. */
   3751     tcg_debug_assert(shift > 0);
   3752     tcg_debug_assert(shift <= (8 << vece));
   3753 
   3754     /* Shift of esize leaves destination unchanged. */
   3755     if (shift < (8 << vece)) {
   3756         tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]);
   3757     } else {
   3758         /* Nop, but we do need to clear the tail. */
   3759         tcg_gen_gvec_mov(vece, rd_ofs, rd_ofs, opr_sz, max_sz);
   3760     }
   3761 }
   3762 
   3763 static void gen_shl8_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
   3764 {
   3765     uint64_t mask = dup_const(MO_8, 0xff << shift);
   3766     TCGv_i64 t = tcg_temp_new_i64();
   3767 
   3768     tcg_gen_shli_i64(t, a, shift);
   3769     tcg_gen_andi_i64(t, t, mask);
   3770     tcg_gen_andi_i64(d, d, ~mask);
   3771     tcg_gen_or_i64(d, d, t);
   3772     tcg_temp_free_i64(t);
   3773 }
   3774 
   3775 static void gen_shl16_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
   3776 {
   3777     uint64_t mask = dup_const(MO_16, 0xffff << shift);
   3778     TCGv_i64 t = tcg_temp_new_i64();
   3779 
   3780     tcg_gen_shli_i64(t, a, shift);
   3781     tcg_gen_andi_i64(t, t, mask);
   3782     tcg_gen_andi_i64(d, d, ~mask);
   3783     tcg_gen_or_i64(d, d, t);
   3784     tcg_temp_free_i64(t);
   3785 }
   3786 
   3787 static void gen_shl32_ins_i32(TCGv_i32 d, TCGv_i32 a, int32_t shift)
   3788 {
   3789     tcg_gen_deposit_i32(d, d, a, shift, 32 - shift);
   3790 }
   3791 
   3792 static void gen_shl64_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
   3793 {
   3794     tcg_gen_deposit_i64(d, d, a, shift, 64 - shift);
   3795 }
   3796 
   3797 static void gen_shl_ins_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh)
   3798 {
   3799     TCGv_vec t = tcg_temp_new_vec_matching(d);
   3800     TCGv_vec m = tcg_temp_new_vec_matching(d);
   3801 
   3802     tcg_gen_shli_vec(vece, t, a, sh);
   3803     tcg_gen_dupi_vec(vece, m, MAKE_64BIT_MASK(0, sh));
   3804     tcg_gen_and_vec(vece, d, d, m);
   3805     tcg_gen_or_vec(vece, d, d, t);
   3806 
   3807     tcg_temp_free_vec(t);
   3808     tcg_temp_free_vec(m);
   3809 }
   3810 
   3811 void gen_gvec_sli(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
   3812                   int64_t shift, uint32_t opr_sz, uint32_t max_sz)
   3813 {
   3814     static const TCGOpcode vecop_list[] = { INDEX_op_shli_vec, 0 };
   3815     const GVecGen2i ops[4] = {
   3816         { .fni8 = gen_shl8_ins_i64,
   3817           .fniv = gen_shl_ins_vec,
   3818           .fno = gen_helper_gvec_sli_b,
   3819           .load_dest = true,
   3820           .opt_opc = vecop_list,
   3821           .vece = MO_8 },
   3822         { .fni8 = gen_shl16_ins_i64,
   3823           .fniv = gen_shl_ins_vec,
   3824           .fno = gen_helper_gvec_sli_h,
   3825           .load_dest = true,
   3826           .opt_opc = vecop_list,
   3827           .vece = MO_16 },
   3828         { .fni4 = gen_shl32_ins_i32,
   3829           .fniv = gen_shl_ins_vec,
   3830           .fno = gen_helper_gvec_sli_s,
   3831           .load_dest = true,
   3832           .opt_opc = vecop_list,
   3833           .vece = MO_32 },
   3834         { .fni8 = gen_shl64_ins_i64,
   3835           .fniv = gen_shl_ins_vec,
   3836           .fno = gen_helper_gvec_sli_d,
   3837           .prefer_i64 = TCG_TARGET_REG_BITS == 64,
   3838           .load_dest = true,
   3839           .opt_opc = vecop_list,
   3840           .vece = MO_64 },
   3841     };
   3842 
   3843     /* tszimm encoding produces immediates in the range [0..esize-1]. */
   3844     tcg_debug_assert(shift >= 0);
   3845     tcg_debug_assert(shift < (8 << vece));
   3846 
   3847     if (shift == 0) {
   3848         tcg_gen_gvec_mov(vece, rd_ofs, rm_ofs, opr_sz, max_sz);
   3849     } else {
   3850         tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]);
   3851     }
   3852 }
   3853 
   3854 static void gen_mla8_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
   3855 {
   3856     gen_helper_neon_mul_u8(a, a, b);
   3857     gen_helper_neon_add_u8(d, d, a);
   3858 }
   3859 
   3860 static void gen_mls8_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
   3861 {
   3862     gen_helper_neon_mul_u8(a, a, b);
   3863     gen_helper_neon_sub_u8(d, d, a);
   3864 }
   3865 
   3866 static void gen_mla16_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
   3867 {
   3868     gen_helper_neon_mul_u16(a, a, b);
   3869     gen_helper_neon_add_u16(d, d, a);
   3870 }
   3871 
   3872 static void gen_mls16_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
   3873 {
   3874     gen_helper_neon_mul_u16(a, a, b);
   3875     gen_helper_neon_sub_u16(d, d, a);
   3876 }
   3877 
   3878 static void gen_mla32_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
   3879 {
   3880     tcg_gen_mul_i32(a, a, b);
   3881     tcg_gen_add_i32(d, d, a);
   3882 }
   3883 
   3884 static void gen_mls32_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
   3885 {
   3886     tcg_gen_mul_i32(a, a, b);
   3887     tcg_gen_sub_i32(d, d, a);
   3888 }
   3889 
   3890 static void gen_mla64_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
   3891 {
   3892     tcg_gen_mul_i64(a, a, b);
   3893     tcg_gen_add_i64(d, d, a);
   3894 }
   3895 
   3896 static void gen_mls64_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
   3897 {
   3898     tcg_gen_mul_i64(a, a, b);
   3899     tcg_gen_sub_i64(d, d, a);
   3900 }
   3901 
   3902 static void gen_mla_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b)
   3903 {
   3904     tcg_gen_mul_vec(vece, a, a, b);
   3905     tcg_gen_add_vec(vece, d, d, a);
   3906 }
   3907 
   3908 static void gen_mls_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b)
   3909 {
   3910     tcg_gen_mul_vec(vece, a, a, b);
   3911     tcg_gen_sub_vec(vece, d, d, a);
   3912 }
   3913 
   3914 /* Note that while NEON does not support VMLA and VMLS as 64-bit ops,
   3915  * these tables are shared with AArch64 which does support them.
   3916  */
   3917 void gen_gvec_mla(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
   3918                   uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
   3919 {
   3920     static const TCGOpcode vecop_list[] = {
   3921         INDEX_op_mul_vec, INDEX_op_add_vec, 0
   3922     };
   3923     static const GVecGen3 ops[4] = {
   3924         { .fni4 = gen_mla8_i32,
   3925           .fniv = gen_mla_vec,
   3926           .load_dest = true,
   3927           .opt_opc = vecop_list,
   3928           .vece = MO_8 },
   3929         { .fni4 = gen_mla16_i32,
   3930           .fniv = gen_mla_vec,
   3931           .load_dest = true,
   3932           .opt_opc = vecop_list,
   3933           .vece = MO_16 },
   3934         { .fni4 = gen_mla32_i32,
   3935           .fniv = gen_mla_vec,
   3936           .load_dest = true,
   3937           .opt_opc = vecop_list,
   3938           .vece = MO_32 },
   3939         { .fni8 = gen_mla64_i64,
   3940           .fniv = gen_mla_vec,
   3941           .prefer_i64 = TCG_TARGET_REG_BITS == 64,
   3942           .load_dest = true,
   3943           .opt_opc = vecop_list,
   3944           .vece = MO_64 },
   3945     };
   3946     tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
   3947 }
   3948 
   3949 void gen_gvec_mls(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
   3950                   uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
   3951 {
   3952     static const TCGOpcode vecop_list[] = {
   3953         INDEX_op_mul_vec, INDEX_op_sub_vec, 0
   3954     };
   3955     static const GVecGen3 ops[4] = {
   3956         { .fni4 = gen_mls8_i32,
   3957           .fniv = gen_mls_vec,
   3958           .load_dest = true,
   3959           .opt_opc = vecop_list,
   3960           .vece = MO_8 },
   3961         { .fni4 = gen_mls16_i32,
   3962           .fniv = gen_mls_vec,
   3963           .load_dest = true,
   3964           .opt_opc = vecop_list,
   3965           .vece = MO_16 },
   3966         { .fni4 = gen_mls32_i32,
   3967           .fniv = gen_mls_vec,
   3968           .load_dest = true,
   3969           .opt_opc = vecop_list,
   3970           .vece = MO_32 },
   3971         { .fni8 = gen_mls64_i64,
   3972           .fniv = gen_mls_vec,
   3973           .prefer_i64 = TCG_TARGET_REG_BITS == 64,
   3974           .load_dest = true,
   3975           .opt_opc = vecop_list,
   3976           .vece = MO_64 },
   3977     };
   3978     tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
   3979 }
   3980 
   3981 /* CMTST : test is "if (X & Y != 0)". */
   3982 static void gen_cmtst_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
   3983 {
   3984     tcg_gen_and_i32(d, a, b);
   3985     tcg_gen_setcondi_i32(TCG_COND_NE, d, d, 0);
   3986     tcg_gen_neg_i32(d, d);
   3987 }
   3988 
   3989 void gen_cmtst_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
   3990 {
   3991     tcg_gen_and_i64(d, a, b);
   3992     tcg_gen_setcondi_i64(TCG_COND_NE, d, d, 0);
   3993     tcg_gen_neg_i64(d, d);
   3994 }
   3995 
   3996 static void gen_cmtst_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b)
   3997 {
   3998     tcg_gen_and_vec(vece, d, a, b);
   3999     tcg_gen_dupi_vec(vece, a, 0);
   4000     tcg_gen_cmp_vec(TCG_COND_NE, vece, d, d, a);
   4001 }
   4002 
   4003 void gen_gvec_cmtst(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
   4004                     uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
   4005 {
   4006     static const TCGOpcode vecop_list[] = { INDEX_op_cmp_vec, 0 };
   4007     static const GVecGen3 ops[4] = {
   4008         { .fni4 = gen_helper_neon_tst_u8,
   4009           .fniv = gen_cmtst_vec,
   4010           .opt_opc = vecop_list,
   4011           .vece = MO_8 },
   4012         { .fni4 = gen_helper_neon_tst_u16,
   4013           .fniv = gen_cmtst_vec,
   4014           .opt_opc = vecop_list,
   4015           .vece = MO_16 },
   4016         { .fni4 = gen_cmtst_i32,
   4017           .fniv = gen_cmtst_vec,
   4018           .opt_opc = vecop_list,
   4019           .vece = MO_32 },
   4020         { .fni8 = gen_cmtst_i64,
   4021           .fniv = gen_cmtst_vec,
   4022           .prefer_i64 = TCG_TARGET_REG_BITS == 64,
   4023           .opt_opc = vecop_list,
   4024           .vece = MO_64 },
   4025     };
   4026     tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
   4027 }
   4028 
   4029 void gen_ushl_i32(TCGv_i32 dst, TCGv_i32 src, TCGv_i32 shift)
   4030 {
   4031     TCGv_i32 lval = tcg_temp_new_i32();
   4032     TCGv_i32 rval = tcg_temp_new_i32();
   4033     TCGv_i32 lsh = tcg_temp_new_i32();
   4034     TCGv_i32 rsh = tcg_temp_new_i32();
   4035     TCGv_i32 zero = tcg_constant_i32(0);
   4036     TCGv_i32 max = tcg_constant_i32(32);
   4037 
   4038     /*
   4039      * Rely on the TCG guarantee that out of range shifts produce
   4040      * unspecified results, not undefined behaviour (i.e. no trap).
   4041      * Discard out-of-range results after the fact.
   4042      */
   4043     tcg_gen_ext8s_i32(lsh, shift);
   4044     tcg_gen_neg_i32(rsh, lsh);
   4045     tcg_gen_shl_i32(lval, src, lsh);
   4046     tcg_gen_shr_i32(rval, src, rsh);
   4047     tcg_gen_movcond_i32(TCG_COND_LTU, dst, lsh, max, lval, zero);
   4048     tcg_gen_movcond_i32(TCG_COND_LTU, dst, rsh, max, rval, dst);
   4049 
   4050     tcg_temp_free_i32(lval);
   4051     tcg_temp_free_i32(rval);
   4052     tcg_temp_free_i32(lsh);
   4053     tcg_temp_free_i32(rsh);
   4054 }
   4055 
   4056 void gen_ushl_i64(TCGv_i64 dst, TCGv_i64 src, TCGv_i64 shift)
   4057 {
   4058     TCGv_i64 lval = tcg_temp_new_i64();
   4059     TCGv_i64 rval = tcg_temp_new_i64();
   4060     TCGv_i64 lsh = tcg_temp_new_i64();
   4061     TCGv_i64 rsh = tcg_temp_new_i64();
   4062     TCGv_i64 zero = tcg_constant_i64(0);
   4063     TCGv_i64 max = tcg_constant_i64(64);
   4064 
   4065     /*
   4066      * Rely on the TCG guarantee that out of range shifts produce
   4067      * unspecified results, not undefined behaviour (i.e. no trap).
   4068      * Discard out-of-range results after the fact.
   4069      */
   4070     tcg_gen_ext8s_i64(lsh, shift);
   4071     tcg_gen_neg_i64(rsh, lsh);
   4072     tcg_gen_shl_i64(lval, src, lsh);
   4073     tcg_gen_shr_i64(rval, src, rsh);
   4074     tcg_gen_movcond_i64(TCG_COND_LTU, dst, lsh, max, lval, zero);
   4075     tcg_gen_movcond_i64(TCG_COND_LTU, dst, rsh, max, rval, dst);
   4076 
   4077     tcg_temp_free_i64(lval);
   4078     tcg_temp_free_i64(rval);
   4079     tcg_temp_free_i64(lsh);
   4080     tcg_temp_free_i64(rsh);
   4081 }
   4082 
   4083 static void gen_ushl_vec(unsigned vece, TCGv_vec dst,
   4084                          TCGv_vec src, TCGv_vec shift)
   4085 {
   4086     TCGv_vec lval = tcg_temp_new_vec_matching(dst);
   4087     TCGv_vec rval = tcg_temp_new_vec_matching(dst);
   4088     TCGv_vec lsh = tcg_temp_new_vec_matching(dst);
   4089     TCGv_vec rsh = tcg_temp_new_vec_matching(dst);
   4090     TCGv_vec msk, max;
   4091 
   4092     tcg_gen_neg_vec(vece, rsh, shift);
   4093     if (vece == MO_8) {
   4094         tcg_gen_mov_vec(lsh, shift);
   4095     } else {
   4096         msk = tcg_temp_new_vec_matching(dst);
   4097         tcg_gen_dupi_vec(vece, msk, 0xff);
   4098         tcg_gen_and_vec(vece, lsh, shift, msk);
   4099         tcg_gen_and_vec(vece, rsh, rsh, msk);
   4100         tcg_temp_free_vec(msk);
   4101     }
   4102 
   4103     /*
   4104      * Rely on the TCG guarantee that out of range shifts produce
   4105      * unspecified results, not undefined behaviour (i.e. no trap).
   4106      * Discard out-of-range results after the fact.
   4107      */
   4108     tcg_gen_shlv_vec(vece, lval, src, lsh);
   4109     tcg_gen_shrv_vec(vece, rval, src, rsh);
   4110 
   4111     max = tcg_temp_new_vec_matching(dst);
   4112     tcg_gen_dupi_vec(vece, max, 8 << vece);
   4113 
   4114     /*
   4115      * The choice of LT (signed) and GEU (unsigned) are biased toward
   4116      * the instructions of the x86_64 host.  For MO_8, the whole byte
   4117      * is significant so we must use an unsigned compare; otherwise we
   4118      * have already masked to a byte and so a signed compare works.
   4119      * Other tcg hosts have a full set of comparisons and do not care.
   4120      */
   4121     if (vece == MO_8) {
   4122         tcg_gen_cmp_vec(TCG_COND_GEU, vece, lsh, lsh, max);
   4123         tcg_gen_cmp_vec(TCG_COND_GEU, vece, rsh, rsh, max);
   4124         tcg_gen_andc_vec(vece, lval, lval, lsh);
   4125         tcg_gen_andc_vec(vece, rval, rval, rsh);
   4126     } else {
   4127         tcg_gen_cmp_vec(TCG_COND_LT, vece, lsh, lsh, max);
   4128         tcg_gen_cmp_vec(TCG_COND_LT, vece, rsh, rsh, max);
   4129         tcg_gen_and_vec(vece, lval, lval, lsh);
   4130         tcg_gen_and_vec(vece, rval, rval, rsh);
   4131     }
   4132     tcg_gen_or_vec(vece, dst, lval, rval);
   4133 
   4134     tcg_temp_free_vec(max);
   4135     tcg_temp_free_vec(lval);
   4136     tcg_temp_free_vec(rval);
   4137     tcg_temp_free_vec(lsh);
   4138     tcg_temp_free_vec(rsh);
   4139 }
   4140 
   4141 void gen_gvec_ushl(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
   4142                    uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
   4143 {
   4144     static const TCGOpcode vecop_list[] = {
   4145         INDEX_op_neg_vec, INDEX_op_shlv_vec,
   4146         INDEX_op_shrv_vec, INDEX_op_cmp_vec, 0
   4147     };
   4148     static const GVecGen3 ops[4] = {
   4149         { .fniv = gen_ushl_vec,
   4150           .fno = gen_helper_gvec_ushl_b,
   4151           .opt_opc = vecop_list,
   4152           .vece = MO_8 },
   4153         { .fniv = gen_ushl_vec,
   4154           .fno = gen_helper_gvec_ushl_h,
   4155           .opt_opc = vecop_list,
   4156           .vece = MO_16 },
   4157         { .fni4 = gen_ushl_i32,
   4158           .fniv = gen_ushl_vec,
   4159           .opt_opc = vecop_list,
   4160           .vece = MO_32 },
   4161         { .fni8 = gen_ushl_i64,
   4162           .fniv = gen_ushl_vec,
   4163           .opt_opc = vecop_list,
   4164           .vece = MO_64 },
   4165     };
   4166     tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
   4167 }
   4168 
   4169 void gen_sshl_i32(TCGv_i32 dst, TCGv_i32 src, TCGv_i32 shift)
   4170 {
   4171     TCGv_i32 lval = tcg_temp_new_i32();
   4172     TCGv_i32 rval = tcg_temp_new_i32();
   4173     TCGv_i32 lsh = tcg_temp_new_i32();
   4174     TCGv_i32 rsh = tcg_temp_new_i32();
   4175     TCGv_i32 zero = tcg_constant_i32(0);
   4176     TCGv_i32 max = tcg_constant_i32(31);
   4177 
   4178     /*
   4179      * Rely on the TCG guarantee that out of range shifts produce
   4180      * unspecified results, not undefined behaviour (i.e. no trap).
   4181      * Discard out-of-range results after the fact.
   4182      */
   4183     tcg_gen_ext8s_i32(lsh, shift);
   4184     tcg_gen_neg_i32(rsh, lsh);
   4185     tcg_gen_shl_i32(lval, src, lsh);
   4186     tcg_gen_umin_i32(rsh, rsh, max);
   4187     tcg_gen_sar_i32(rval, src, rsh);
   4188     tcg_gen_movcond_i32(TCG_COND_LEU, lval, lsh, max, lval, zero);
   4189     tcg_gen_movcond_i32(TCG_COND_LT, dst, lsh, zero, rval, lval);
   4190 
   4191     tcg_temp_free_i32(lval);
   4192     tcg_temp_free_i32(rval);
   4193     tcg_temp_free_i32(lsh);
   4194     tcg_temp_free_i32(rsh);
   4195 }
   4196 
   4197 void gen_sshl_i64(TCGv_i64 dst, TCGv_i64 src, TCGv_i64 shift)
   4198 {
   4199     TCGv_i64 lval = tcg_temp_new_i64();
   4200     TCGv_i64 rval = tcg_temp_new_i64();
   4201     TCGv_i64 lsh = tcg_temp_new_i64();
   4202     TCGv_i64 rsh = tcg_temp_new_i64();
   4203     TCGv_i64 zero = tcg_constant_i64(0);
   4204     TCGv_i64 max = tcg_constant_i64(63);
   4205 
   4206     /*
   4207      * Rely on the TCG guarantee that out of range shifts produce
   4208      * unspecified results, not undefined behaviour (i.e. no trap).
   4209      * Discard out-of-range results after the fact.
   4210      */
   4211     tcg_gen_ext8s_i64(lsh, shift);
   4212     tcg_gen_neg_i64(rsh, lsh);
   4213     tcg_gen_shl_i64(lval, src, lsh);
   4214     tcg_gen_umin_i64(rsh, rsh, max);
   4215     tcg_gen_sar_i64(rval, src, rsh);
   4216     tcg_gen_movcond_i64(TCG_COND_LEU, lval, lsh, max, lval, zero);
   4217     tcg_gen_movcond_i64(TCG_COND_LT, dst, lsh, zero, rval, lval);
   4218 
   4219     tcg_temp_free_i64(lval);
   4220     tcg_temp_free_i64(rval);
   4221     tcg_temp_free_i64(lsh);
   4222     tcg_temp_free_i64(rsh);
   4223 }
   4224 
   4225 static void gen_sshl_vec(unsigned vece, TCGv_vec dst,
   4226                          TCGv_vec src, TCGv_vec shift)
   4227 {
   4228     TCGv_vec lval = tcg_temp_new_vec_matching(dst);
   4229     TCGv_vec rval = tcg_temp_new_vec_matching(dst);
   4230     TCGv_vec lsh = tcg_temp_new_vec_matching(dst);
   4231     TCGv_vec rsh = tcg_temp_new_vec_matching(dst);
   4232     TCGv_vec tmp = tcg_temp_new_vec_matching(dst);
   4233 
   4234     /*
   4235      * Rely on the TCG guarantee that out of range shifts produce
   4236      * unspecified results, not undefined behaviour (i.e. no trap).
   4237      * Discard out-of-range results after the fact.
   4238      */
   4239     tcg_gen_neg_vec(vece, rsh, shift);
   4240     if (vece == MO_8) {
   4241         tcg_gen_mov_vec(lsh, shift);
   4242     } else {
   4243         tcg_gen_dupi_vec(vece, tmp, 0xff);
   4244         tcg_gen_and_vec(vece, lsh, shift, tmp);
   4245         tcg_gen_and_vec(vece, rsh, rsh, tmp);
   4246     }
   4247 
   4248     /* Bound rsh so out of bound right shift gets -1.  */
   4249     tcg_gen_dupi_vec(vece, tmp, (8 << vece) - 1);
   4250     tcg_gen_umin_vec(vece, rsh, rsh, tmp);
   4251     tcg_gen_cmp_vec(TCG_COND_GT, vece, tmp, lsh, tmp);
   4252 
   4253     tcg_gen_shlv_vec(vece, lval, src, lsh);
   4254     tcg_gen_sarv_vec(vece, rval, src, rsh);
   4255 
   4256     /* Select in-bound left shift.  */
   4257     tcg_gen_andc_vec(vece, lval, lval, tmp);
   4258 
   4259     /* Select between left and right shift.  */
   4260     if (vece == MO_8) {
   4261         tcg_gen_dupi_vec(vece, tmp, 0);
   4262         tcg_gen_cmpsel_vec(TCG_COND_LT, vece, dst, lsh, tmp, rval, lval);
   4263     } else {
   4264         tcg_gen_dupi_vec(vece, tmp, 0x80);
   4265         tcg_gen_cmpsel_vec(TCG_COND_LT, vece, dst, lsh, tmp, lval, rval);
   4266     }
   4267 
   4268     tcg_temp_free_vec(lval);
   4269     tcg_temp_free_vec(rval);
   4270     tcg_temp_free_vec(lsh);
   4271     tcg_temp_free_vec(rsh);
   4272     tcg_temp_free_vec(tmp);
   4273 }
   4274 
   4275 void gen_gvec_sshl(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
   4276                    uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
   4277 {
   4278     static const TCGOpcode vecop_list[] = {
   4279         INDEX_op_neg_vec, INDEX_op_umin_vec, INDEX_op_shlv_vec,
   4280         INDEX_op_sarv_vec, INDEX_op_cmp_vec, INDEX_op_cmpsel_vec, 0
   4281     };
   4282     static const GVecGen3 ops[4] = {
   4283         { .fniv = gen_sshl_vec,
   4284           .fno = gen_helper_gvec_sshl_b,
   4285           .opt_opc = vecop_list,
   4286           .vece = MO_8 },
   4287         { .fniv = gen_sshl_vec,
   4288           .fno = gen_helper_gvec_sshl_h,
   4289           .opt_opc = vecop_list,
   4290           .vece = MO_16 },
   4291         { .fni4 = gen_sshl_i32,
   4292           .fniv = gen_sshl_vec,
   4293           .opt_opc = vecop_list,
   4294           .vece = MO_32 },
   4295         { .fni8 = gen_sshl_i64,
   4296           .fniv = gen_sshl_vec,
   4297           .opt_opc = vecop_list,
   4298           .vece = MO_64 },
   4299     };
   4300     tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
   4301 }
   4302 
   4303 static void gen_uqadd_vec(unsigned vece, TCGv_vec t, TCGv_vec sat,
   4304                           TCGv_vec a, TCGv_vec b)
   4305 {
   4306     TCGv_vec x = tcg_temp_new_vec_matching(t);
   4307     tcg_gen_add_vec(vece, x, a, b);
   4308     tcg_gen_usadd_vec(vece, t, a, b);
   4309     tcg_gen_cmp_vec(TCG_COND_NE, vece, x, x, t);
   4310     tcg_gen_or_vec(vece, sat, sat, x);
   4311     tcg_temp_free_vec(x);
   4312 }
   4313 
   4314 void gen_gvec_uqadd_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
   4315                        uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
   4316 {
   4317     static const TCGOpcode vecop_list[] = {
   4318         INDEX_op_usadd_vec, INDEX_op_cmp_vec, INDEX_op_add_vec, 0
   4319     };
   4320     static const GVecGen4 ops[4] = {
   4321         { .fniv = gen_uqadd_vec,
   4322           .fno = gen_helper_gvec_uqadd_b,
   4323           .write_aofs = true,
   4324           .opt_opc = vecop_list,
   4325           .vece = MO_8 },
   4326         { .fniv = gen_uqadd_vec,
   4327           .fno = gen_helper_gvec_uqadd_h,
   4328           .write_aofs = true,
   4329           .opt_opc = vecop_list,
   4330           .vece = MO_16 },
   4331         { .fniv = gen_uqadd_vec,
   4332           .fno = gen_helper_gvec_uqadd_s,
   4333           .write_aofs = true,
   4334           .opt_opc = vecop_list,
   4335           .vece = MO_32 },
   4336         { .fniv = gen_uqadd_vec,
   4337           .fno = gen_helper_gvec_uqadd_d,
   4338           .write_aofs = true,
   4339           .opt_opc = vecop_list,
   4340           .vece = MO_64 },
   4341     };
   4342     tcg_gen_gvec_4(rd_ofs, offsetof(CPUARMState, vfp.qc),
   4343                    rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
   4344 }
   4345 
   4346 static void gen_sqadd_vec(unsigned vece, TCGv_vec t, TCGv_vec sat,
   4347                           TCGv_vec a, TCGv_vec b)
   4348 {
   4349     TCGv_vec x = tcg_temp_new_vec_matching(t);
   4350     tcg_gen_add_vec(vece, x, a, b);
   4351     tcg_gen_ssadd_vec(vece, t, a, b);
   4352     tcg_gen_cmp_vec(TCG_COND_NE, vece, x, x, t);
   4353     tcg_gen_or_vec(vece, sat, sat, x);
   4354     tcg_temp_free_vec(x);
   4355 }
   4356 
   4357 void gen_gvec_sqadd_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
   4358                        uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
   4359 {
   4360     static const TCGOpcode vecop_list[] = {
   4361         INDEX_op_ssadd_vec, INDEX_op_cmp_vec, INDEX_op_add_vec, 0
   4362     };
   4363     static const GVecGen4 ops[4] = {
   4364         { .fniv = gen_sqadd_vec,
   4365           .fno = gen_helper_gvec_sqadd_b,
   4366           .opt_opc = vecop_list,
   4367           .write_aofs = true,
   4368           .vece = MO_8 },
   4369         { .fniv = gen_sqadd_vec,
   4370           .fno = gen_helper_gvec_sqadd_h,
   4371           .opt_opc = vecop_list,
   4372           .write_aofs = true,
   4373           .vece = MO_16 },
   4374         { .fniv = gen_sqadd_vec,
   4375           .fno = gen_helper_gvec_sqadd_s,
   4376           .opt_opc = vecop_list,
   4377           .write_aofs = true,
   4378           .vece = MO_32 },
   4379         { .fniv = gen_sqadd_vec,
   4380           .fno = gen_helper_gvec_sqadd_d,
   4381           .opt_opc = vecop_list,
   4382           .write_aofs = true,
   4383           .vece = MO_64 },
   4384     };
   4385     tcg_gen_gvec_4(rd_ofs, offsetof(CPUARMState, vfp.qc),
   4386                    rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
   4387 }
   4388 
   4389 static void gen_uqsub_vec(unsigned vece, TCGv_vec t, TCGv_vec sat,
   4390                           TCGv_vec a, TCGv_vec b)
   4391 {
   4392     TCGv_vec x = tcg_temp_new_vec_matching(t);
   4393     tcg_gen_sub_vec(vece, x, a, b);
   4394     tcg_gen_ussub_vec(vece, t, a, b);
   4395     tcg_gen_cmp_vec(TCG_COND_NE, vece, x, x, t);
   4396     tcg_gen_or_vec(vece, sat, sat, x);
   4397     tcg_temp_free_vec(x);
   4398 }
   4399 
   4400 void gen_gvec_uqsub_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
   4401                        uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
   4402 {
   4403     static const TCGOpcode vecop_list[] = {
   4404         INDEX_op_ussub_vec, INDEX_op_cmp_vec, INDEX_op_sub_vec, 0
   4405     };
   4406     static const GVecGen4 ops[4] = {
   4407         { .fniv = gen_uqsub_vec,
   4408           .fno = gen_helper_gvec_uqsub_b,
   4409           .opt_opc = vecop_list,
   4410           .write_aofs = true,
   4411           .vece = MO_8 },
   4412         { .fniv = gen_uqsub_vec,
   4413           .fno = gen_helper_gvec_uqsub_h,
   4414           .opt_opc = vecop_list,
   4415           .write_aofs = true,
   4416           .vece = MO_16 },
   4417         { .fniv = gen_uqsub_vec,
   4418           .fno = gen_helper_gvec_uqsub_s,
   4419           .opt_opc = vecop_list,
   4420           .write_aofs = true,
   4421           .vece = MO_32 },
   4422         { .fniv = gen_uqsub_vec,
   4423           .fno = gen_helper_gvec_uqsub_d,
   4424           .opt_opc = vecop_list,
   4425           .write_aofs = true,
   4426           .vece = MO_64 },
   4427     };
   4428     tcg_gen_gvec_4(rd_ofs, offsetof(CPUARMState, vfp.qc),
   4429                    rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
   4430 }
   4431 
   4432 static void gen_sqsub_vec(unsigned vece, TCGv_vec t, TCGv_vec sat,
   4433                           TCGv_vec a, TCGv_vec b)
   4434 {
   4435     TCGv_vec x = tcg_temp_new_vec_matching(t);
   4436     tcg_gen_sub_vec(vece, x, a, b);
   4437     tcg_gen_sssub_vec(vece, t, a, b);
   4438     tcg_gen_cmp_vec(TCG_COND_NE, vece, x, x, t);
   4439     tcg_gen_or_vec(vece, sat, sat, x);
   4440     tcg_temp_free_vec(x);
   4441 }
   4442 
   4443 void gen_gvec_sqsub_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
   4444                        uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
   4445 {
   4446     static const TCGOpcode vecop_list[] = {
   4447         INDEX_op_sssub_vec, INDEX_op_cmp_vec, INDEX_op_sub_vec, 0
   4448     };
   4449     static const GVecGen4 ops[4] = {
   4450         { .fniv = gen_sqsub_vec,
   4451           .fno = gen_helper_gvec_sqsub_b,
   4452           .opt_opc = vecop_list,
   4453           .write_aofs = true,
   4454           .vece = MO_8 },
   4455         { .fniv = gen_sqsub_vec,
   4456           .fno = gen_helper_gvec_sqsub_h,
   4457           .opt_opc = vecop_list,
   4458           .write_aofs = true,
   4459           .vece = MO_16 },
   4460         { .fniv = gen_sqsub_vec,
   4461           .fno = gen_helper_gvec_sqsub_s,
   4462           .opt_opc = vecop_list,
   4463           .write_aofs = true,
   4464           .vece = MO_32 },
   4465         { .fniv = gen_sqsub_vec,
   4466           .fno = gen_helper_gvec_sqsub_d,
   4467           .opt_opc = vecop_list,
   4468           .write_aofs = true,
   4469           .vece = MO_64 },
   4470     };
   4471     tcg_gen_gvec_4(rd_ofs, offsetof(CPUARMState, vfp.qc),
   4472                    rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
   4473 }
   4474 
   4475 static void gen_sabd_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
   4476 {
   4477     TCGv_i32 t = tcg_temp_new_i32();
   4478 
   4479     tcg_gen_sub_i32(t, a, b);
   4480     tcg_gen_sub_i32(d, b, a);
   4481     tcg_gen_movcond_i32(TCG_COND_LT, d, a, b, d, t);
   4482     tcg_temp_free_i32(t);
   4483 }
   4484 
   4485 static void gen_sabd_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
   4486 {
   4487     TCGv_i64 t = tcg_temp_new_i64();
   4488 
   4489     tcg_gen_sub_i64(t, a, b);
   4490     tcg_gen_sub_i64(d, b, a);
   4491     tcg_gen_movcond_i64(TCG_COND_LT, d, a, b, d, t);
   4492     tcg_temp_free_i64(t);
   4493 }
   4494 
   4495 static void gen_sabd_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b)
   4496 {
   4497     TCGv_vec t = tcg_temp_new_vec_matching(d);
   4498 
   4499     tcg_gen_smin_vec(vece, t, a, b);
   4500     tcg_gen_smax_vec(vece, d, a, b);
   4501     tcg_gen_sub_vec(vece, d, d, t);
   4502     tcg_temp_free_vec(t);
   4503 }
   4504 
   4505 void gen_gvec_sabd(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
   4506                    uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
   4507 {
   4508     static const TCGOpcode vecop_list[] = {
   4509         INDEX_op_sub_vec, INDEX_op_smin_vec, INDEX_op_smax_vec, 0
   4510     };
   4511     static const GVecGen3 ops[4] = {
   4512         { .fniv = gen_sabd_vec,
   4513           .fno = gen_helper_gvec_sabd_b,
   4514           .opt_opc = vecop_list,
   4515           .vece = MO_8 },
   4516         { .fniv = gen_sabd_vec,
   4517           .fno = gen_helper_gvec_sabd_h,
   4518           .opt_opc = vecop_list,
   4519           .vece = MO_16 },
   4520         { .fni4 = gen_sabd_i32,
   4521           .fniv = gen_sabd_vec,
   4522           .fno = gen_helper_gvec_sabd_s,
   4523           .opt_opc = vecop_list,
   4524           .vece = MO_32 },
   4525         { .fni8 = gen_sabd_i64,
   4526           .fniv = gen_sabd_vec,
   4527           .fno = gen_helper_gvec_sabd_d,
   4528           .prefer_i64 = TCG_TARGET_REG_BITS == 64,
   4529           .opt_opc = vecop_list,
   4530           .vece = MO_64 },
   4531     };
   4532     tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
   4533 }
   4534 
   4535 static void gen_uabd_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
   4536 {
   4537     TCGv_i32 t = tcg_temp_new_i32();
   4538 
   4539     tcg_gen_sub_i32(t, a, b);
   4540     tcg_gen_sub_i32(d, b, a);
   4541     tcg_gen_movcond_i32(TCG_COND_LTU, d, a, b, d, t);
   4542     tcg_temp_free_i32(t);
   4543 }
   4544 
   4545 static void gen_uabd_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
   4546 {
   4547     TCGv_i64 t = tcg_temp_new_i64();
   4548 
   4549     tcg_gen_sub_i64(t, a, b);
   4550     tcg_gen_sub_i64(d, b, a);
   4551     tcg_gen_movcond_i64(TCG_COND_LTU, d, a, b, d, t);
   4552     tcg_temp_free_i64(t);
   4553 }
   4554 
   4555 static void gen_uabd_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b)
   4556 {
   4557     TCGv_vec t = tcg_temp_new_vec_matching(d);
   4558 
   4559     tcg_gen_umin_vec(vece, t, a, b);
   4560     tcg_gen_umax_vec(vece, d, a, b);
   4561     tcg_gen_sub_vec(vece, d, d, t);
   4562     tcg_temp_free_vec(t);
   4563 }
   4564 
   4565 void gen_gvec_uabd(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
   4566                    uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
   4567 {
   4568     static const TCGOpcode vecop_list[] = {
   4569         INDEX_op_sub_vec, INDEX_op_umin_vec, INDEX_op_umax_vec, 0
   4570     };
   4571     static const GVecGen3 ops[4] = {
   4572         { .fniv = gen_uabd_vec,
   4573           .fno = gen_helper_gvec_uabd_b,
   4574           .opt_opc = vecop_list,
   4575           .vece = MO_8 },
   4576         { .fniv = gen_uabd_vec,
   4577           .fno = gen_helper_gvec_uabd_h,
   4578           .opt_opc = vecop_list,
   4579           .vece = MO_16 },
   4580         { .fni4 = gen_uabd_i32,
   4581           .fniv = gen_uabd_vec,
   4582           .fno = gen_helper_gvec_uabd_s,
   4583           .opt_opc = vecop_list,
   4584           .vece = MO_32 },
   4585         { .fni8 = gen_uabd_i64,
   4586           .fniv = gen_uabd_vec,
   4587           .fno = gen_helper_gvec_uabd_d,
   4588           .prefer_i64 = TCG_TARGET_REG_BITS == 64,
   4589           .opt_opc = vecop_list,
   4590           .vece = MO_64 },
   4591     };
   4592     tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
   4593 }
   4594 
   4595 static void gen_saba_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
   4596 {
   4597     TCGv_i32 t = tcg_temp_new_i32();
   4598     gen_sabd_i32(t, a, b);
   4599     tcg_gen_add_i32(d, d, t);
   4600     tcg_temp_free_i32(t);
   4601 }
   4602 
   4603 static void gen_saba_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
   4604 {
   4605     TCGv_i64 t = tcg_temp_new_i64();
   4606     gen_sabd_i64(t, a, b);
   4607     tcg_gen_add_i64(d, d, t);
   4608     tcg_temp_free_i64(t);
   4609 }
   4610 
   4611 static void gen_saba_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b)
   4612 {
   4613     TCGv_vec t = tcg_temp_new_vec_matching(d);
   4614     gen_sabd_vec(vece, t, a, b);
   4615     tcg_gen_add_vec(vece, d, d, t);
   4616     tcg_temp_free_vec(t);
   4617 }
   4618 
   4619 void gen_gvec_saba(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
   4620                    uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
   4621 {
   4622     static const TCGOpcode vecop_list[] = {
   4623         INDEX_op_sub_vec, INDEX_op_add_vec,
   4624         INDEX_op_smin_vec, INDEX_op_smax_vec, 0
   4625     };
   4626     static const GVecGen3 ops[4] = {
   4627         { .fniv = gen_saba_vec,
   4628           .fno = gen_helper_gvec_saba_b,
   4629           .opt_opc = vecop_list,
   4630           .load_dest = true,
   4631           .vece = MO_8 },
   4632         { .fniv = gen_saba_vec,
   4633           .fno = gen_helper_gvec_saba_h,
   4634           .opt_opc = vecop_list,
   4635           .load_dest = true,
   4636           .vece = MO_16 },
   4637         { .fni4 = gen_saba_i32,
   4638           .fniv = gen_saba_vec,
   4639           .fno = gen_helper_gvec_saba_s,
   4640           .opt_opc = vecop_list,
   4641           .load_dest = true,
   4642           .vece = MO_32 },
   4643         { .fni8 = gen_saba_i64,
   4644           .fniv = gen_saba_vec,
   4645           .fno = gen_helper_gvec_saba_d,
   4646           .prefer_i64 = TCG_TARGET_REG_BITS == 64,
   4647           .opt_opc = vecop_list,
   4648           .load_dest = true,
   4649           .vece = MO_64 },
   4650     };
   4651     tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
   4652 }
   4653 
   4654 static void gen_uaba_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
   4655 {
   4656     TCGv_i32 t = tcg_temp_new_i32();
   4657     gen_uabd_i32(t, a, b);
   4658     tcg_gen_add_i32(d, d, t);
   4659     tcg_temp_free_i32(t);
   4660 }
   4661 
   4662 static void gen_uaba_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
   4663 {
   4664     TCGv_i64 t = tcg_temp_new_i64();
   4665     gen_uabd_i64(t, a, b);
   4666     tcg_gen_add_i64(d, d, t);
   4667     tcg_temp_free_i64(t);
   4668 }
   4669 
   4670 static void gen_uaba_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b)
   4671 {
   4672     TCGv_vec t = tcg_temp_new_vec_matching(d);
   4673     gen_uabd_vec(vece, t, a, b);
   4674     tcg_gen_add_vec(vece, d, d, t);
   4675     tcg_temp_free_vec(t);
   4676 }
   4677 
   4678 void gen_gvec_uaba(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
   4679                    uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
   4680 {
   4681     static const TCGOpcode vecop_list[] = {
   4682         INDEX_op_sub_vec, INDEX_op_add_vec,
   4683         INDEX_op_umin_vec, INDEX_op_umax_vec, 0
   4684     };
   4685     static const GVecGen3 ops[4] = {
   4686         { .fniv = gen_uaba_vec,
   4687           .fno = gen_helper_gvec_uaba_b,
   4688           .opt_opc = vecop_list,
   4689           .load_dest = true,
   4690           .vece = MO_8 },
   4691         { .fniv = gen_uaba_vec,
   4692           .fno = gen_helper_gvec_uaba_h,
   4693           .opt_opc = vecop_list,
   4694           .load_dest = true,
   4695           .vece = MO_16 },
   4696         { .fni4 = gen_uaba_i32,
   4697           .fniv = gen_uaba_vec,
   4698           .fno = gen_helper_gvec_uaba_s,
   4699           .opt_opc = vecop_list,
   4700           .load_dest = true,
   4701           .vece = MO_32 },
   4702         { .fni8 = gen_uaba_i64,
   4703           .fniv = gen_uaba_vec,
   4704           .fno = gen_helper_gvec_uaba_d,
   4705           .prefer_i64 = TCG_TARGET_REG_BITS == 64,
   4706           .opt_opc = vecop_list,
   4707           .load_dest = true,
   4708           .vece = MO_64 },
   4709     };
   4710     tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
   4711 }
   4712 
   4713 static void do_coproc_insn(DisasContext *s, int cpnum, int is64,
   4714                            int opc1, int crn, int crm, int opc2,
   4715                            bool isread, int rt, int rt2)
   4716 {
   4717     const ARMCPRegInfo *ri;
   4718 
   4719     ri = get_arm_cp_reginfo(s->cp_regs,
   4720             ENCODE_CP_REG(cpnum, is64, s->ns, crn, crm, opc1, opc2));
   4721     if (ri) {
   4722         bool need_exit_tb;
   4723 
   4724         /* Check access permissions */
   4725         if (!cp_access_ok(s->current_el, ri, isread)) {
   4726             unallocated_encoding(s);
   4727             return;
   4728         }
   4729 
   4730         if (s->hstr_active || ri->accessfn ||
   4731             (arm_dc_feature(s, ARM_FEATURE_XSCALE) && cpnum < 14)) {
   4732             /* Emit code to perform further access permissions checks at
   4733              * runtime; this may result in an exception.
   4734              * Note that on XScale all cp0..c13 registers do an access check
   4735              * call in order to handle c15_cpar.
   4736              */
   4737             uint32_t syndrome;
   4738 
   4739             /* Note that since we are an implementation which takes an
   4740              * exception on a trapped conditional instruction only if the
   4741              * instruction passes its condition code check, we can take
   4742              * advantage of the clause in the ARM ARM that allows us to set
   4743              * the COND field in the instruction to 0xE in all cases.
   4744              * We could fish the actual condition out of the insn (ARM)
   4745              * or the condexec bits (Thumb) but it isn't necessary.
   4746              */
   4747             switch (cpnum) {
   4748             case 14:
   4749                 if (is64) {
   4750                     syndrome = syn_cp14_rrt_trap(1, 0xe, opc1, crm, rt, rt2,
   4751                                                  isread, false);
   4752                 } else {
   4753                     syndrome = syn_cp14_rt_trap(1, 0xe, opc1, opc2, crn, crm,
   4754                                                 rt, isread, false);
   4755                 }
   4756                 break;
   4757             case 15:
   4758                 if (is64) {
   4759                     syndrome = syn_cp15_rrt_trap(1, 0xe, opc1, crm, rt, rt2,
   4760                                                  isread, false);
   4761                 } else {
   4762                     syndrome = syn_cp15_rt_trap(1, 0xe, opc1, opc2, crn, crm,
   4763                                                 rt, isread, false);
   4764                 }
   4765                 break;
   4766             default:
   4767                 /* ARMv8 defines that only coprocessors 14 and 15 exist,
   4768                  * so this can only happen if this is an ARMv7 or earlier CPU,
   4769                  * in which case the syndrome information won't actually be
   4770                  * guest visible.
   4771                  */
   4772                 assert(!arm_dc_feature(s, ARM_FEATURE_V8));
   4773                 syndrome = syn_uncategorized();
   4774                 break;
   4775             }
   4776 
   4777             gen_set_condexec(s);
   4778             gen_update_pc(s, 0);
   4779             gen_helper_access_check_cp_reg(cpu_env,
   4780                                            tcg_constant_ptr(ri),
   4781                                            tcg_constant_i32(syndrome),
   4782                                            tcg_constant_i32(isread));
   4783         } else if (ri->type & ARM_CP_RAISES_EXC) {
   4784             /*
   4785              * The readfn or writefn might raise an exception;
   4786              * synchronize the CPU state in case it does.
   4787              */
   4788             gen_set_condexec(s);
   4789             gen_update_pc(s, 0);
   4790         }
   4791 
   4792         /* Handle special cases first */
   4793         switch (ri->type & ARM_CP_SPECIAL_MASK) {
   4794         case 0:
   4795             break;
   4796         case ARM_CP_NOP:
   4797             return;
   4798         case ARM_CP_WFI:
   4799             if (isread) {
   4800                 unallocated_encoding(s);
   4801                 return;
   4802             }
   4803             gen_update_pc(s, curr_insn_len(s));
   4804             s->base.is_jmp = DISAS_WFI;
   4805             return;
   4806         default:
   4807             g_assert_not_reached();
   4808         }
   4809 
   4810         if ((tb_cflags(s->base.tb) & CF_USE_ICOUNT) && (ri->type & ARM_CP_IO)) {
   4811             gen_io_start();
   4812         }
   4813 
   4814         if (isread) {
   4815             /* Read */
   4816             if (is64) {
   4817                 TCGv_i64 tmp64;
   4818                 TCGv_i32 tmp;
   4819                 if (ri->type & ARM_CP_CONST) {
   4820                     tmp64 = tcg_constant_i64(ri->resetvalue);
   4821                 } else if (ri->readfn) {
   4822                     tmp64 = tcg_temp_new_i64();
   4823                     gen_helper_get_cp_reg64(tmp64, cpu_env,
   4824                                             tcg_constant_ptr(ri));
   4825                 } else {
   4826                     tmp64 = tcg_temp_new_i64();
   4827                     tcg_gen_ld_i64(tmp64, cpu_env, ri->fieldoffset);
   4828                 }
   4829                 tmp = tcg_temp_new_i32();
   4830                 tcg_gen_extrl_i64_i32(tmp, tmp64);
   4831                 store_reg(s, rt, tmp);
   4832                 tmp = tcg_temp_new_i32();
   4833                 tcg_gen_extrh_i64_i32(tmp, tmp64);
   4834                 tcg_temp_free_i64(tmp64);
   4835                 store_reg(s, rt2, tmp);
   4836             } else {
   4837                 TCGv_i32 tmp;
   4838                 if (ri->type & ARM_CP_CONST) {
   4839                     tmp = tcg_constant_i32(ri->resetvalue);
   4840                 } else if (ri->readfn) {
   4841                     tmp = tcg_temp_new_i32();
   4842                     gen_helper_get_cp_reg(tmp, cpu_env, tcg_constant_ptr(ri));
   4843                 } else {
   4844                     tmp = load_cpu_offset(ri->fieldoffset);
   4845                 }
   4846                 if (rt == 15) {
   4847                     /* Destination register of r15 for 32 bit loads sets
   4848                      * the condition codes from the high 4 bits of the value
   4849                      */
   4850                     gen_set_nzcv(tmp);
   4851                     tcg_temp_free_i32(tmp);
   4852                 } else {
   4853                     store_reg(s, rt, tmp);
   4854                 }
   4855             }
   4856         } else {
   4857             /* Write */
   4858             if (ri->type & ARM_CP_CONST) {
   4859                 /* If not forbidden by access permissions, treat as WI */
   4860                 return;
   4861             }
   4862 
   4863             if (is64) {
   4864                 TCGv_i32 tmplo, tmphi;
   4865                 TCGv_i64 tmp64 = tcg_temp_new_i64();
   4866                 tmplo = load_reg(s, rt);
   4867                 tmphi = load_reg(s, rt2);
   4868                 tcg_gen_concat_i32_i64(tmp64, tmplo, tmphi);
   4869                 tcg_temp_free_i32(tmplo);
   4870                 tcg_temp_free_i32(tmphi);
   4871                 if (ri->writefn) {
   4872                     gen_helper_set_cp_reg64(cpu_env, tcg_constant_ptr(ri),
   4873                                             tmp64);
   4874                 } else {
   4875                     tcg_gen_st_i64(tmp64, cpu_env, ri->fieldoffset);
   4876                 }
   4877                 tcg_temp_free_i64(tmp64);
   4878             } else {
   4879                 TCGv_i32 tmp = load_reg(s, rt);
   4880                 if (ri->writefn) {
   4881                     gen_helper_set_cp_reg(cpu_env, tcg_constant_ptr(ri), tmp);
   4882                     tcg_temp_free_i32(tmp);
   4883                 } else {
   4884                     store_cpu_offset(tmp, ri->fieldoffset, 4);
   4885                 }
   4886             }
   4887         }
   4888 
   4889         /* I/O operations must end the TB here (whether read or write) */
   4890         need_exit_tb = ((tb_cflags(s->base.tb) & CF_USE_ICOUNT) &&
   4891                         (ri->type & ARM_CP_IO));
   4892 
   4893         if (!isread && !(ri->type & ARM_CP_SUPPRESS_TB_END)) {
   4894             /*
   4895              * A write to any coprocessor register that ends a TB
   4896              * must rebuild the hflags for the next TB.
   4897              */
   4898             gen_rebuild_hflags(s, ri->type & ARM_CP_NEWEL);
   4899             /*
   4900              * We default to ending the TB on a coprocessor register write,
   4901              * but allow this to be suppressed by the register definition
   4902              * (usually only necessary to work around guest bugs).
   4903              */
   4904             need_exit_tb = true;
   4905         }
   4906         if (need_exit_tb) {
   4907             gen_lookup_tb(s);
   4908         }
   4909 
   4910         return;
   4911     }
   4912 
   4913     /* Unknown register; this might be a guest error or a QEMU
   4914      * unimplemented feature.
   4915      */
   4916     if (is64) {
   4917         qemu_log_mask(LOG_UNIMP, "%s access to unsupported AArch32 "
   4918                       "64 bit system register cp:%d opc1: %d crm:%d "
   4919                       "(%s)\n",
   4920                       isread ? "read" : "write", cpnum, opc1, crm,
   4921                       s->ns ? "non-secure" : "secure");
   4922     } else {
   4923         qemu_log_mask(LOG_UNIMP, "%s access to unsupported AArch32 "
   4924                       "system register cp:%d opc1:%d crn:%d crm:%d opc2:%d "
   4925                       "(%s)\n",
   4926                       isread ? "read" : "write", cpnum, opc1, crn, crm, opc2,
   4927                       s->ns ? "non-secure" : "secure");
   4928     }
   4929 
   4930     unallocated_encoding(s);
   4931     return;
   4932 }
   4933 
   4934 /* Decode XScale DSP or iWMMXt insn (in the copro space, cp=0 or 1) */
   4935 static void disas_xscale_insn(DisasContext *s, uint32_t insn)
   4936 {
   4937     int cpnum = (insn >> 8) & 0xf;
   4938 
   4939     if (extract32(s->c15_cpar, cpnum, 1) == 0) {
   4940         unallocated_encoding(s);
   4941     } else if (arm_dc_feature(s, ARM_FEATURE_IWMMXT)) {
   4942         if (disas_iwmmxt_insn(s, insn)) {
   4943             unallocated_encoding(s);
   4944         }
   4945     } else if (arm_dc_feature(s, ARM_FEATURE_XSCALE)) {
   4946         if (disas_dsp_insn(s, insn)) {
   4947             unallocated_encoding(s);
   4948         }
   4949     }
   4950 }
   4951 
   4952 /* Store a 64-bit value to a register pair.  Clobbers val.  */
   4953 static void gen_storeq_reg(DisasContext *s, int rlow, int rhigh, TCGv_i64 val)
   4954 {
   4955     TCGv_i32 tmp;
   4956     tmp = tcg_temp_new_i32();
   4957     tcg_gen_extrl_i64_i32(tmp, val);
   4958     store_reg(s, rlow, tmp);
   4959     tmp = tcg_temp_new_i32();
   4960     tcg_gen_extrh_i64_i32(tmp, val);
   4961     store_reg(s, rhigh, tmp);
   4962 }
   4963 
   4964 /* load and add a 64-bit value from a register pair.  */
   4965 static void gen_addq(DisasContext *s, TCGv_i64 val, int rlow, int rhigh)
   4966 {
   4967     TCGv_i64 tmp;
   4968     TCGv_i32 tmpl;
   4969     TCGv_i32 tmph;
   4970 
   4971     /* Load 64-bit value rd:rn.  */
   4972     tmpl = load_reg(s, rlow);
   4973     tmph = load_reg(s, rhigh);
   4974     tmp = tcg_temp_new_i64();
   4975     tcg_gen_concat_i32_i64(tmp, tmpl, tmph);
   4976     tcg_temp_free_i32(tmpl);
   4977     tcg_temp_free_i32(tmph);
   4978     tcg_gen_add_i64(val, val, tmp);
   4979     tcg_temp_free_i64(tmp);
   4980 }
   4981 
   4982 /* Set N and Z flags from hi|lo.  */
   4983 static void gen_logicq_cc(TCGv_i32 lo, TCGv_i32 hi)
   4984 {
   4985     tcg_gen_mov_i32(cpu_NF, hi);
   4986     tcg_gen_or_i32(cpu_ZF, lo, hi);
   4987 }
   4988 
   4989 /* Load/Store exclusive instructions are implemented by remembering
   4990    the value/address loaded, and seeing if these are the same
   4991    when the store is performed.  This should be sufficient to implement
   4992    the architecturally mandated semantics, and avoids having to monitor
   4993    regular stores.  The compare vs the remembered value is done during
   4994    the cmpxchg operation, but we must compare the addresses manually.  */
   4995 static void gen_load_exclusive(DisasContext *s, int rt, int rt2,
   4996                                TCGv_i32 addr, int size)
   4997 {
   4998     TCGv_i32 tmp = tcg_temp_new_i32();
   4999     MemOp opc = size | MO_ALIGN | s->be_data;
   5000 
   5001     s->is_ldex = true;
   5002 
   5003     if (size == 3) {
   5004         TCGv_i32 tmp2 = tcg_temp_new_i32();
   5005         TCGv_i64 t64 = tcg_temp_new_i64();
   5006 
   5007         /*
   5008          * For AArch32, architecturally the 32-bit word at the lowest
   5009          * address is always Rt and the one at addr+4 is Rt2, even if
   5010          * the CPU is big-endian. That means we don't want to do a
   5011          * gen_aa32_ld_i64(), which checks SCTLR_B as if for an
   5012          * architecturally 64-bit access, but instead do a 64-bit access
   5013          * using MO_BE if appropriate and then split the two halves.
   5014          */
   5015         TCGv taddr = gen_aa32_addr(s, addr, opc);
   5016 
   5017         tcg_gen_qemu_ld_i64(t64, taddr, get_mem_index(s), opc);
   5018         tcg_temp_free(taddr);
   5019         tcg_gen_mov_i64(cpu_exclusive_val, t64);
   5020         if (s->be_data == MO_BE) {
   5021             tcg_gen_extr_i64_i32(tmp2, tmp, t64);
   5022         } else {
   5023             tcg_gen_extr_i64_i32(tmp, tmp2, t64);
   5024         }
   5025         tcg_temp_free_i64(t64);
   5026 
   5027         store_reg(s, rt2, tmp2);
   5028     } else {
   5029         gen_aa32_ld_i32(s, tmp, addr, get_mem_index(s), opc);
   5030         tcg_gen_extu_i32_i64(cpu_exclusive_val, tmp);
   5031     }
   5032 
   5033     store_reg(s, rt, tmp);
   5034     tcg_gen_extu_i32_i64(cpu_exclusive_addr, addr);
   5035 }
   5036 
   5037 static void gen_clrex(DisasContext *s)
   5038 {
   5039     tcg_gen_movi_i64(cpu_exclusive_addr, -1);
   5040 }
   5041 
   5042 static void gen_store_exclusive(DisasContext *s, int rd, int rt, int rt2,
   5043                                 TCGv_i32 addr, int size)
   5044 {
   5045     TCGv_i32 t0, t1, t2;
   5046     TCGv_i64 extaddr;
   5047     TCGv taddr;
   5048     TCGLabel *done_label;
   5049     TCGLabel *fail_label;
   5050     MemOp opc = size | MO_ALIGN | s->be_data;
   5051 
   5052     /* if (env->exclusive_addr == addr && env->exclusive_val == [addr]) {
   5053          [addr] = {Rt};
   5054          {Rd} = 0;
   5055        } else {
   5056          {Rd} = 1;
   5057        } */
   5058     fail_label = gen_new_label();
   5059     done_label = gen_new_label();
   5060     extaddr = tcg_temp_new_i64();
   5061     tcg_gen_extu_i32_i64(extaddr, addr);
   5062     tcg_gen_brcond_i64(TCG_COND_NE, extaddr, cpu_exclusive_addr, fail_label);
   5063     tcg_temp_free_i64(extaddr);
   5064 
   5065     taddr = gen_aa32_addr(s, addr, opc);
   5066     t0 = tcg_temp_new_i32();
   5067     t1 = load_reg(s, rt);
   5068     if (size == 3) {
   5069         TCGv_i64 o64 = tcg_temp_new_i64();
   5070         TCGv_i64 n64 = tcg_temp_new_i64();
   5071 
   5072         t2 = load_reg(s, rt2);
   5073 
   5074         /*
   5075          * For AArch32, architecturally the 32-bit word at the lowest
   5076          * address is always Rt and the one at addr+4 is Rt2, even if
   5077          * the CPU is big-endian. Since we're going to treat this as a
   5078          * single 64-bit BE store, we need to put the two halves in the
   5079          * opposite order for BE to LE, so that they end up in the right
   5080          * places.  We don't want gen_aa32_st_i64, because that checks
   5081          * SCTLR_B as if for an architectural 64-bit access.
   5082          */
   5083         if (s->be_data == MO_BE) {
   5084             tcg_gen_concat_i32_i64(n64, t2, t1);
   5085         } else {
   5086             tcg_gen_concat_i32_i64(n64, t1, t2);
   5087         }
   5088         tcg_temp_free_i32(t2);
   5089 
   5090         tcg_gen_atomic_cmpxchg_i64(o64, taddr, cpu_exclusive_val, n64,
   5091                                    get_mem_index(s), opc);
   5092         tcg_temp_free_i64(n64);
   5093 
   5094         tcg_gen_setcond_i64(TCG_COND_NE, o64, o64, cpu_exclusive_val);
   5095         tcg_gen_extrl_i64_i32(t0, o64);
   5096 
   5097         tcg_temp_free_i64(o64);
   5098     } else {
   5099         t2 = tcg_temp_new_i32();
   5100         tcg_gen_extrl_i64_i32(t2, cpu_exclusive_val);
   5101         tcg_gen_atomic_cmpxchg_i32(t0, taddr, t2, t1, get_mem_index(s), opc);
   5102         tcg_gen_setcond_i32(TCG_COND_NE, t0, t0, t2);
   5103         tcg_temp_free_i32(t2);
   5104     }
   5105     tcg_temp_free_i32(t1);
   5106     tcg_temp_free(taddr);
   5107     tcg_gen_mov_i32(cpu_R[rd], t0);
   5108     tcg_temp_free_i32(t0);
   5109     tcg_gen_br(done_label);
   5110 
   5111     gen_set_label(fail_label);
   5112     tcg_gen_movi_i32(cpu_R[rd], 1);
   5113     gen_set_label(done_label);
   5114     tcg_gen_movi_i64(cpu_exclusive_addr, -1);
   5115 }
   5116 
   5117 /* gen_srs:
   5118  * @env: CPUARMState
   5119  * @s: DisasContext
   5120  * @mode: mode field from insn (which stack to store to)
   5121  * @amode: addressing mode (DA/IA/DB/IB), encoded as per P,U bits in ARM insn
   5122  * @writeback: true if writeback bit set
   5123  *
   5124  * Generate code for the SRS (Store Return State) insn.
   5125  */
   5126 static void gen_srs(DisasContext *s,
   5127                     uint32_t mode, uint32_t amode, bool writeback)
   5128 {
   5129     int32_t offset;
   5130     TCGv_i32 addr, tmp;
   5131     bool undef = false;
   5132 
   5133     /* SRS is:
   5134      * - trapped to EL3 if EL3 is AArch64 and we are at Secure EL1
   5135      *   and specified mode is monitor mode
   5136      * - UNDEFINED in Hyp mode
   5137      * - UNPREDICTABLE in User or System mode
   5138      * - UNPREDICTABLE if the specified mode is:
   5139      * -- not implemented
   5140      * -- not a valid mode number
   5141      * -- a mode that's at a higher exception level
   5142      * -- Monitor, if we are Non-secure
   5143      * For the UNPREDICTABLE cases we choose to UNDEF.
   5144      */
   5145     if (s->current_el == 1 && !s->ns && mode == ARM_CPU_MODE_MON) {
   5146         gen_exception_insn_el(s, 0, EXCP_UDEF, syn_uncategorized(), 3);
   5147         return;
   5148     }
   5149 
   5150     if (s->current_el == 0 || s->current_el == 2) {
   5151         undef = true;
   5152     }
   5153 
   5154     switch (mode) {
   5155     case ARM_CPU_MODE_USR:
   5156     case ARM_CPU_MODE_FIQ:
   5157     case ARM_CPU_MODE_IRQ:
   5158     case ARM_CPU_MODE_SVC:
   5159     case ARM_CPU_MODE_ABT:
   5160     case ARM_CPU_MODE_UND:
   5161     case ARM_CPU_MODE_SYS:
   5162         break;
   5163     case ARM_CPU_MODE_HYP:
   5164         if (s->current_el == 1 || !arm_dc_feature(s, ARM_FEATURE_EL2)) {
   5165             undef = true;
   5166         }
   5167         break;
   5168     case ARM_CPU_MODE_MON:
   5169         /* No need to check specifically for "are we non-secure" because
   5170          * we've already made EL0 UNDEF and handled the trap for S-EL1;
   5171          * so if this isn't EL3 then we must be non-secure.
   5172          */
   5173         if (s->current_el != 3) {
   5174             undef = true;
   5175         }
   5176         break;
   5177     default:
   5178         undef = true;
   5179     }
   5180 
   5181     if (undef) {
   5182         unallocated_encoding(s);
   5183         return;
   5184     }
   5185 
   5186     addr = tcg_temp_new_i32();
   5187     /* get_r13_banked() will raise an exception if called from System mode */
   5188     gen_set_condexec(s);
   5189     gen_update_pc(s, 0);
   5190     gen_helper_get_r13_banked(addr, cpu_env, tcg_constant_i32(mode));
   5191     switch (amode) {
   5192     case 0: /* DA */
   5193         offset = -4;
   5194         break;
   5195     case 1: /* IA */
   5196         offset = 0;
   5197         break;
   5198     case 2: /* DB */
   5199         offset = -8;
   5200         break;
   5201     case 3: /* IB */
   5202         offset = 4;
   5203         break;
   5204     default:
   5205         g_assert_not_reached();
   5206     }
   5207     tcg_gen_addi_i32(addr, addr, offset);
   5208     tmp = load_reg(s, 14);
   5209     gen_aa32_st_i32(s, tmp, addr, get_mem_index(s), MO_UL | MO_ALIGN);
   5210     tcg_temp_free_i32(tmp);
   5211     tmp = load_cpu_field(spsr);
   5212     tcg_gen_addi_i32(addr, addr, 4);
   5213     gen_aa32_st_i32(s, tmp, addr, get_mem_index(s), MO_UL | MO_ALIGN);
   5214     tcg_temp_free_i32(tmp);
   5215     if (writeback) {
   5216         switch (amode) {
   5217         case 0:
   5218             offset = -8;
   5219             break;
   5220         case 1:
   5221             offset = 4;
   5222             break;
   5223         case 2:
   5224             offset = -4;
   5225             break;
   5226         case 3:
   5227             offset = 0;
   5228             break;
   5229         default:
   5230             g_assert_not_reached();
   5231         }
   5232         tcg_gen_addi_i32(addr, addr, offset);
   5233         gen_helper_set_r13_banked(cpu_env, tcg_constant_i32(mode), addr);
   5234     }
   5235     tcg_temp_free_i32(addr);
   5236     s->base.is_jmp = DISAS_UPDATE_EXIT;
   5237 }
   5238 
   5239 /* Skip this instruction if the ARM condition is false */
   5240 static void arm_skip_unless(DisasContext *s, uint32_t cond)
   5241 {
   5242     arm_gen_condlabel(s);
   5243     arm_gen_test_cc(cond ^ 1, s->condlabel.label);
   5244 }
   5245 
   5246 
   5247 /*
   5248  * Constant expanders used by T16/T32 decode
   5249  */
   5250 
   5251 /* Return only the rotation part of T32ExpandImm.  */
   5252 static int t32_expandimm_rot(DisasContext *s, int x)
   5253 {
   5254     return x & 0xc00 ? extract32(x, 7, 5) : 0;
   5255 }
   5256 
   5257 /* Return the unrotated immediate from T32ExpandImm.  */
   5258 static int t32_expandimm_imm(DisasContext *s, int x)
   5259 {
   5260     int imm = extract32(x, 0, 8);
   5261 
   5262     switch (extract32(x, 8, 4)) {
   5263     case 0: /* XY */
   5264         /* Nothing to do.  */
   5265         break;
   5266     case 1: /* 00XY00XY */
   5267         imm *= 0x00010001;
   5268         break;
   5269     case 2: /* XY00XY00 */
   5270         imm *= 0x01000100;
   5271         break;
   5272     case 3: /* XYXYXYXY */
   5273         imm *= 0x01010101;
   5274         break;
   5275     default:
   5276         /* Rotated constant.  */
   5277         imm |= 0x80;
   5278         break;
   5279     }
   5280     return imm;
   5281 }
   5282 
   5283 static int t32_branch24(DisasContext *s, int x)
   5284 {
   5285     /* Convert J1:J2 at x[22:21] to I2:I1, which involves I=J^~S.  */
   5286     x ^= !(x < 0) * (3 << 21);
   5287     /* Append the final zero.  */
   5288     return x << 1;
   5289 }
   5290 
   5291 static int t16_setflags(DisasContext *s)
   5292 {
   5293     return s->condexec_mask == 0;
   5294 }
   5295 
   5296 static int t16_push_list(DisasContext *s, int x)
   5297 {
   5298     return (x & 0xff) | (x & 0x100) << (14 - 8);
   5299 }
   5300 
   5301 static int t16_pop_list(DisasContext *s, int x)
   5302 {
   5303     return (x & 0xff) | (x & 0x100) << (15 - 8);
   5304 }
   5305 
   5306 /*
   5307  * Include the generated decoders.
   5308  */
   5309 
   5310 #include "decode-a32.c.inc"
   5311 #include "decode-a32-uncond.c.inc"
   5312 #include "decode-t32.c.inc"
   5313 #include "decode-t16.c.inc"
   5314 
   5315 static bool valid_cp(DisasContext *s, int cp)
   5316 {
   5317     /*
   5318      * Return true if this coprocessor field indicates something
   5319      * that's really a possible coprocessor.
   5320      * For v7 and earlier, coprocessors 8..15 were reserved for Arm use,
   5321      * and of those only cp14 and cp15 were used for registers.
   5322      * cp10 and cp11 were used for VFP and Neon, whose decode is
   5323      * dealt with elsewhere. With the advent of fp16, cp9 is also
   5324      * now part of VFP.
   5325      * For v8A and later, the encoding has been tightened so that
   5326      * only cp14 and cp15 are valid, and other values aren't considered
   5327      * to be in the coprocessor-instruction space at all. v8M still
   5328      * permits coprocessors 0..7.
   5329      * For XScale, we must not decode the XScale cp0, cp1 space as
   5330      * a standard coprocessor insn, because we want to fall through to
   5331      * the legacy disas_xscale_insn() decoder after decodetree is done.
   5332      */
   5333     if (arm_dc_feature(s, ARM_FEATURE_XSCALE) && (cp == 0 || cp == 1)) {
   5334         return false;
   5335     }
   5336 
   5337     if (arm_dc_feature(s, ARM_FEATURE_V8) &&
   5338         !arm_dc_feature(s, ARM_FEATURE_M)) {
   5339         return cp >= 14;
   5340     }
   5341     return cp < 8 || cp >= 14;
   5342 }
   5343 
   5344 static bool trans_MCR(DisasContext *s, arg_MCR *a)
   5345 {
   5346     if (!valid_cp(s, a->cp)) {
   5347         return false;
   5348     }
   5349     do_coproc_insn(s, a->cp, false, a->opc1, a->crn, a->crm, a->opc2,
   5350                    false, a->rt, 0);
   5351     return true;
   5352 }
   5353 
   5354 static bool trans_MRC(DisasContext *s, arg_MRC *a)
   5355 {
   5356     if (!valid_cp(s, a->cp)) {
   5357         return false;
   5358     }
   5359     do_coproc_insn(s, a->cp, false, a->opc1, a->crn, a->crm, a->opc2,
   5360                    true, a->rt, 0);
   5361     return true;
   5362 }
   5363 
   5364 static bool trans_MCRR(DisasContext *s, arg_MCRR *a)
   5365 {
   5366     if (!valid_cp(s, a->cp)) {
   5367         return false;
   5368     }
   5369     do_coproc_insn(s, a->cp, true, a->opc1, 0, a->crm, 0,
   5370                    false, a->rt, a->rt2);
   5371     return true;
   5372 }
   5373 
   5374 static bool trans_MRRC(DisasContext *s, arg_MRRC *a)
   5375 {
   5376     if (!valid_cp(s, a->cp)) {
   5377         return false;
   5378     }
   5379     do_coproc_insn(s, a->cp, true, a->opc1, 0, a->crm, 0,
   5380                    true, a->rt, a->rt2);
   5381     return true;
   5382 }
   5383 
   5384 /* Helpers to swap operands for reverse-subtract.  */
   5385 static void gen_rsb(TCGv_i32 dst, TCGv_i32 a, TCGv_i32 b)
   5386 {
   5387     tcg_gen_sub_i32(dst, b, a);
   5388 }
   5389 
   5390 static void gen_rsb_CC(TCGv_i32 dst, TCGv_i32 a, TCGv_i32 b)
   5391 {
   5392     gen_sub_CC(dst, b, a);
   5393 }
   5394 
   5395 static void gen_rsc(TCGv_i32 dest, TCGv_i32 a, TCGv_i32 b)
   5396 {
   5397     gen_sub_carry(dest, b, a);
   5398 }
   5399 
   5400 static void gen_rsc_CC(TCGv_i32 dest, TCGv_i32 a, TCGv_i32 b)
   5401 {
   5402     gen_sbc_CC(dest, b, a);
   5403 }
   5404 
   5405 /*
   5406  * Helpers for the data processing routines.
   5407  *
   5408  * After the computation store the results back.
   5409  * This may be suppressed altogether (STREG_NONE), require a runtime
   5410  * check against the stack limits (STREG_SP_CHECK), or generate an
   5411  * exception return.  Oh, or store into a register.
   5412  *
   5413  * Always return true, indicating success for a trans_* function.
   5414  */
   5415 typedef enum {
   5416    STREG_NONE,
   5417    STREG_NORMAL,
   5418    STREG_SP_CHECK,
   5419    STREG_EXC_RET,
   5420 } StoreRegKind;
   5421 
   5422 static bool store_reg_kind(DisasContext *s, int rd,
   5423                             TCGv_i32 val, StoreRegKind kind)
   5424 {
   5425     switch (kind) {
   5426     case STREG_NONE:
   5427         tcg_temp_free_i32(val);
   5428         return true;
   5429     case STREG_NORMAL:
   5430         /* See ALUWritePC: Interworking only from a32 mode. */
   5431         if (s->thumb) {
   5432             store_reg(s, rd, val);
   5433         } else {
   5434             store_reg_bx(s, rd, val);
   5435         }
   5436         return true;
   5437     case STREG_SP_CHECK:
   5438         store_sp_checked(s, val);
   5439         return true;
   5440     case STREG_EXC_RET:
   5441         gen_exception_return(s, val);
   5442         return true;
   5443     }
   5444     g_assert_not_reached();
   5445 }
   5446 
   5447 /*
   5448  * Data Processing (register)
   5449  *
   5450  * Operate, with set flags, one register source,
   5451  * one immediate shifted register source, and a destination.
   5452  */
   5453 static bool op_s_rrr_shi(DisasContext *s, arg_s_rrr_shi *a,
   5454                          void (*gen)(TCGv_i32, TCGv_i32, TCGv_i32),
   5455                          int logic_cc, StoreRegKind kind)
   5456 {
   5457     TCGv_i32 tmp1, tmp2;
   5458 
   5459     tmp2 = load_reg(s, a->rm);
   5460     gen_arm_shift_im(tmp2, a->shty, a->shim, logic_cc);
   5461     tmp1 = load_reg(s, a->rn);
   5462 
   5463     gen(tmp1, tmp1, tmp2);
   5464     tcg_temp_free_i32(tmp2);
   5465 
   5466     if (logic_cc) {
   5467         gen_logic_CC(tmp1);
   5468     }
   5469     return store_reg_kind(s, a->rd, tmp1, kind);
   5470 }
   5471 
   5472 static bool op_s_rxr_shi(DisasContext *s, arg_s_rrr_shi *a,
   5473                          void (*gen)(TCGv_i32, TCGv_i32),
   5474                          int logic_cc, StoreRegKind kind)
   5475 {
   5476     TCGv_i32 tmp;
   5477 
   5478     tmp = load_reg(s, a->rm);
   5479     gen_arm_shift_im(tmp, a->shty, a->shim, logic_cc);
   5480 
   5481     gen(tmp, tmp);
   5482     if (logic_cc) {
   5483         gen_logic_CC(tmp);
   5484     }
   5485     return store_reg_kind(s, a->rd, tmp, kind);
   5486 }
   5487 
   5488 /*
   5489  * Data-processing (register-shifted register)
   5490  *
   5491  * Operate, with set flags, one register source,
   5492  * one register shifted register source, and a destination.
   5493  */
   5494 static bool op_s_rrr_shr(DisasContext *s, arg_s_rrr_shr *a,
   5495                          void (*gen)(TCGv_i32, TCGv_i32, TCGv_i32),
   5496                          int logic_cc, StoreRegKind kind)
   5497 {
   5498     TCGv_i32 tmp1, tmp2;
   5499 
   5500     tmp1 = load_reg(s, a->rs);
   5501     tmp2 = load_reg(s, a->rm);
   5502     gen_arm_shift_reg(tmp2, a->shty, tmp1, logic_cc);
   5503     tmp1 = load_reg(s, a->rn);
   5504 
   5505     gen(tmp1, tmp1, tmp2);
   5506     tcg_temp_free_i32(tmp2);
   5507 
   5508     if (logic_cc) {
   5509         gen_logic_CC(tmp1);
   5510     }
   5511     return store_reg_kind(s, a->rd, tmp1, kind);
   5512 }
   5513 
   5514 static bool op_s_rxr_shr(DisasContext *s, arg_s_rrr_shr *a,
   5515                          void (*gen)(TCGv_i32, TCGv_i32),
   5516                          int logic_cc, StoreRegKind kind)
   5517 {
   5518     TCGv_i32 tmp1, tmp2;
   5519 
   5520     tmp1 = load_reg(s, a->rs);
   5521     tmp2 = load_reg(s, a->rm);
   5522     gen_arm_shift_reg(tmp2, a->shty, tmp1, logic_cc);
   5523 
   5524     gen(tmp2, tmp2);
   5525     if (logic_cc) {
   5526         gen_logic_CC(tmp2);
   5527     }
   5528     return store_reg_kind(s, a->rd, tmp2, kind);
   5529 }
   5530 
   5531 /*
   5532  * Data-processing (immediate)
   5533  *
   5534  * Operate, with set flags, one register source,
   5535  * one rotated immediate, and a destination.
   5536  *
   5537  * Note that logic_cc && a->rot setting CF based on the msb of the
   5538  * immediate is the reason why we must pass in the unrotated form
   5539  * of the immediate.
   5540  */
   5541 static bool op_s_rri_rot(DisasContext *s, arg_s_rri_rot *a,
   5542                          void (*gen)(TCGv_i32, TCGv_i32, TCGv_i32),
   5543                          int logic_cc, StoreRegKind kind)
   5544 {
   5545     TCGv_i32 tmp1;
   5546     uint32_t imm;
   5547 
   5548     imm = ror32(a->imm, a->rot);
   5549     if (logic_cc && a->rot) {
   5550         tcg_gen_movi_i32(cpu_CF, imm >> 31);
   5551     }
   5552     tmp1 = load_reg(s, a->rn);
   5553 
   5554     gen(tmp1, tmp1, tcg_constant_i32(imm));
   5555 
   5556     if (logic_cc) {
   5557         gen_logic_CC(tmp1);
   5558     }
   5559     return store_reg_kind(s, a->rd, tmp1, kind);
   5560 }
   5561 
   5562 static bool op_s_rxi_rot(DisasContext *s, arg_s_rri_rot *a,
   5563                          void (*gen)(TCGv_i32, TCGv_i32),
   5564                          int logic_cc, StoreRegKind kind)
   5565 {
   5566     TCGv_i32 tmp;
   5567     uint32_t imm;
   5568 
   5569     imm = ror32(a->imm, a->rot);
   5570     if (logic_cc && a->rot) {
   5571         tcg_gen_movi_i32(cpu_CF, imm >> 31);
   5572     }
   5573 
   5574     tmp = tcg_temp_new_i32();
   5575     gen(tmp, tcg_constant_i32(imm));
   5576 
   5577     if (logic_cc) {
   5578         gen_logic_CC(tmp);
   5579     }
   5580     return store_reg_kind(s, a->rd, tmp, kind);
   5581 }
   5582 
   5583 #define DO_ANY3(NAME, OP, L, K)                                         \
   5584     static bool trans_##NAME##_rrri(DisasContext *s, arg_s_rrr_shi *a)  \
   5585     { StoreRegKind k = (K); return op_s_rrr_shi(s, a, OP, L, k); }      \
   5586     static bool trans_##NAME##_rrrr(DisasContext *s, arg_s_rrr_shr *a)  \
   5587     { StoreRegKind k = (K); return op_s_rrr_shr(s, a, OP, L, k); }      \
   5588     static bool trans_##NAME##_rri(DisasContext *s, arg_s_rri_rot *a)   \
   5589     { StoreRegKind k = (K); return op_s_rri_rot(s, a, OP, L, k); }
   5590 
   5591 #define DO_ANY2(NAME, OP, L, K)                                         \
   5592     static bool trans_##NAME##_rxri(DisasContext *s, arg_s_rrr_shi *a)  \
   5593     { StoreRegKind k = (K); return op_s_rxr_shi(s, a, OP, L, k); }      \
   5594     static bool trans_##NAME##_rxrr(DisasContext *s, arg_s_rrr_shr *a)  \
   5595     { StoreRegKind k = (K); return op_s_rxr_shr(s, a, OP, L, k); }      \
   5596     static bool trans_##NAME##_rxi(DisasContext *s, arg_s_rri_rot *a)   \
   5597     { StoreRegKind k = (K); return op_s_rxi_rot(s, a, OP, L, k); }
   5598 
   5599 #define DO_CMP2(NAME, OP, L)                                            \
   5600     static bool trans_##NAME##_xrri(DisasContext *s, arg_s_rrr_shi *a)  \
   5601     { return op_s_rrr_shi(s, a, OP, L, STREG_NONE); }                   \
   5602     static bool trans_##NAME##_xrrr(DisasContext *s, arg_s_rrr_shr *a)  \
   5603     { return op_s_rrr_shr(s, a, OP, L, STREG_NONE); }                   \
   5604     static bool trans_##NAME##_xri(DisasContext *s, arg_s_rri_rot *a)   \
   5605     { return op_s_rri_rot(s, a, OP, L, STREG_NONE); }
   5606 
   5607 DO_ANY3(AND, tcg_gen_and_i32, a->s, STREG_NORMAL)
   5608 DO_ANY3(EOR, tcg_gen_xor_i32, a->s, STREG_NORMAL)
   5609 DO_ANY3(ORR, tcg_gen_or_i32, a->s, STREG_NORMAL)
   5610 DO_ANY3(BIC, tcg_gen_andc_i32, a->s, STREG_NORMAL)
   5611 
   5612 DO_ANY3(RSB, a->s ? gen_rsb_CC : gen_rsb, false, STREG_NORMAL)
   5613 DO_ANY3(ADC, a->s ? gen_adc_CC : gen_add_carry, false, STREG_NORMAL)
   5614 DO_ANY3(SBC, a->s ? gen_sbc_CC : gen_sub_carry, false, STREG_NORMAL)
   5615 DO_ANY3(RSC, a->s ? gen_rsc_CC : gen_rsc, false, STREG_NORMAL)
   5616 
   5617 DO_CMP2(TST, tcg_gen_and_i32, true)
   5618 DO_CMP2(TEQ, tcg_gen_xor_i32, true)
   5619 DO_CMP2(CMN, gen_add_CC, false)
   5620 DO_CMP2(CMP, gen_sub_CC, false)
   5621 
   5622 DO_ANY3(ADD, a->s ? gen_add_CC : tcg_gen_add_i32, false,
   5623         a->rd == 13 && a->rn == 13 ? STREG_SP_CHECK : STREG_NORMAL)
   5624 
   5625 /*
   5626  * Note for the computation of StoreRegKind we return out of the
   5627  * middle of the functions that are expanded by DO_ANY3, and that
   5628  * we modify a->s via that parameter before it is used by OP.
   5629  */
   5630 DO_ANY3(SUB, a->s ? gen_sub_CC : tcg_gen_sub_i32, false,
   5631         ({
   5632             StoreRegKind ret = STREG_NORMAL;
   5633             if (a->rd == 15 && a->s) {
   5634                 /*
   5635                  * See ALUExceptionReturn:
   5636                  * In User mode, UNPREDICTABLE; we choose UNDEF.
   5637                  * In Hyp mode, UNDEFINED.
   5638                  */
   5639                 if (IS_USER(s) || s->current_el == 2) {
   5640                     unallocated_encoding(s);
   5641                     return true;
   5642                 }
   5643                 /* There is no writeback of nzcv to PSTATE.  */
   5644                 a->s = 0;
   5645                 ret = STREG_EXC_RET;
   5646             } else if (a->rd == 13 && a->rn == 13) {
   5647                 ret = STREG_SP_CHECK;
   5648             }
   5649             ret;
   5650         }))
   5651 
   5652 DO_ANY2(MOV, tcg_gen_mov_i32, a->s,
   5653         ({
   5654             StoreRegKind ret = STREG_NORMAL;
   5655             if (a->rd == 15 && a->s) {
   5656                 /*
   5657                  * See ALUExceptionReturn:
   5658                  * In User mode, UNPREDICTABLE; we choose UNDEF.
   5659                  * In Hyp mode, UNDEFINED.
   5660                  */
   5661                 if (IS_USER(s) || s->current_el == 2) {
   5662                     unallocated_encoding(s);
   5663                     return true;
   5664                 }
   5665                 /* There is no writeback of nzcv to PSTATE.  */
   5666                 a->s = 0;
   5667                 ret = STREG_EXC_RET;
   5668             } else if (a->rd == 13) {
   5669                 ret = STREG_SP_CHECK;
   5670             }
   5671             ret;
   5672         }))
   5673 
   5674 DO_ANY2(MVN, tcg_gen_not_i32, a->s, STREG_NORMAL)
   5675 
   5676 /*
   5677  * ORN is only available with T32, so there is no register-shifted-register
   5678  * form of the insn.  Using the DO_ANY3 macro would create an unused function.
   5679  */
   5680 static bool trans_ORN_rrri(DisasContext *s, arg_s_rrr_shi *a)
   5681 {
   5682     return op_s_rrr_shi(s, a, tcg_gen_orc_i32, a->s, STREG_NORMAL);
   5683 }
   5684 
   5685 static bool trans_ORN_rri(DisasContext *s, arg_s_rri_rot *a)
   5686 {
   5687     return op_s_rri_rot(s, a, tcg_gen_orc_i32, a->s, STREG_NORMAL);
   5688 }
   5689 
   5690 #undef DO_ANY3
   5691 #undef DO_ANY2
   5692 #undef DO_CMP2
   5693 
   5694 static bool trans_ADR(DisasContext *s, arg_ri *a)
   5695 {
   5696     store_reg_bx(s, a->rd, add_reg_for_lit(s, 15, a->imm));
   5697     return true;
   5698 }
   5699 
   5700 static bool trans_MOVW(DisasContext *s, arg_MOVW *a)
   5701 {
   5702     if (!ENABLE_ARCH_6T2) {
   5703         return false;
   5704     }
   5705 
   5706     store_reg(s, a->rd, tcg_constant_i32(a->imm));
   5707     return true;
   5708 }
   5709 
   5710 static bool trans_MOVT(DisasContext *s, arg_MOVW *a)
   5711 {
   5712     TCGv_i32 tmp;
   5713 
   5714     if (!ENABLE_ARCH_6T2) {
   5715         return false;
   5716     }
   5717 
   5718     tmp = load_reg(s, a->rd);
   5719     tcg_gen_ext16u_i32(tmp, tmp);
   5720     tcg_gen_ori_i32(tmp, tmp, a->imm << 16);
   5721     store_reg(s, a->rd, tmp);
   5722     return true;
   5723 }
   5724 
   5725 /*
   5726  * v8.1M MVE wide-shifts
   5727  */
   5728 static bool do_mve_shl_ri(DisasContext *s, arg_mve_shl_ri *a,
   5729                           WideShiftImmFn *fn)
   5730 {
   5731     TCGv_i64 rda;
   5732     TCGv_i32 rdalo, rdahi;
   5733 
   5734     if (!arm_dc_feature(s, ARM_FEATURE_V8_1M)) {
   5735         /* Decode falls through to ORR/MOV UNPREDICTABLE handling */
   5736         return false;
   5737     }
   5738     if (a->rdahi == 15) {
   5739         /* These are a different encoding (SQSHL/SRSHR/UQSHL/URSHR) */
   5740         return false;
   5741     }
   5742     if (!dc_isar_feature(aa32_mve, s) ||
   5743         !arm_dc_feature(s, ARM_FEATURE_M_MAIN) ||
   5744         a->rdahi == 13) {
   5745         /* RdaHi == 13 is UNPREDICTABLE; we choose to UNDEF */
   5746         unallocated_encoding(s);
   5747         return true;
   5748     }
   5749 
   5750     if (a->shim == 0) {
   5751         a->shim = 32;
   5752     }
   5753 
   5754     rda = tcg_temp_new_i64();
   5755     rdalo = load_reg(s, a->rdalo);
   5756     rdahi = load_reg(s, a->rdahi);
   5757     tcg_gen_concat_i32_i64(rda, rdalo, rdahi);
   5758 
   5759     fn(rda, rda, a->shim);
   5760 
   5761     tcg_gen_extrl_i64_i32(rdalo, rda);
   5762     tcg_gen_extrh_i64_i32(rdahi, rda);
   5763     store_reg(s, a->rdalo, rdalo);
   5764     store_reg(s, a->rdahi, rdahi);
   5765     tcg_temp_free_i64(rda);
   5766 
   5767     return true;
   5768 }
   5769 
   5770 static bool trans_ASRL_ri(DisasContext *s, arg_mve_shl_ri *a)
   5771 {
   5772     return do_mve_shl_ri(s, a, tcg_gen_sari_i64);
   5773 }
   5774 
   5775 static bool trans_LSLL_ri(DisasContext *s, arg_mve_shl_ri *a)
   5776 {
   5777     return do_mve_shl_ri(s, a, tcg_gen_shli_i64);
   5778 }
   5779 
   5780 static bool trans_LSRL_ri(DisasContext *s, arg_mve_shl_ri *a)
   5781 {
   5782     return do_mve_shl_ri(s, a, tcg_gen_shri_i64);
   5783 }
   5784 
   5785 static void gen_mve_sqshll(TCGv_i64 r, TCGv_i64 n, int64_t shift)
   5786 {
   5787     gen_helper_mve_sqshll(r, cpu_env, n, tcg_constant_i32(shift));
   5788 }
   5789 
   5790 static bool trans_SQSHLL_ri(DisasContext *s, arg_mve_shl_ri *a)
   5791 {
   5792     return do_mve_shl_ri(s, a, gen_mve_sqshll);
   5793 }
   5794 
   5795 static void gen_mve_uqshll(TCGv_i64 r, TCGv_i64 n, int64_t shift)
   5796 {
   5797     gen_helper_mve_uqshll(r, cpu_env, n, tcg_constant_i32(shift));
   5798 }
   5799 
   5800 static bool trans_UQSHLL_ri(DisasContext *s, arg_mve_shl_ri *a)
   5801 {
   5802     return do_mve_shl_ri(s, a, gen_mve_uqshll);
   5803 }
   5804 
   5805 static bool trans_SRSHRL_ri(DisasContext *s, arg_mve_shl_ri *a)
   5806 {
   5807     return do_mve_shl_ri(s, a, gen_srshr64_i64);
   5808 }
   5809 
   5810 static bool trans_URSHRL_ri(DisasContext *s, arg_mve_shl_ri *a)
   5811 {
   5812     return do_mve_shl_ri(s, a, gen_urshr64_i64);
   5813 }
   5814 
   5815 static bool do_mve_shl_rr(DisasContext *s, arg_mve_shl_rr *a, WideShiftFn *fn)
   5816 {
   5817     TCGv_i64 rda;
   5818     TCGv_i32 rdalo, rdahi;
   5819 
   5820     if (!arm_dc_feature(s, ARM_FEATURE_V8_1M)) {
   5821         /* Decode falls through to ORR/MOV UNPREDICTABLE handling */
   5822         return false;
   5823     }
   5824     if (a->rdahi == 15) {
   5825         /* These are a different encoding (SQSHL/SRSHR/UQSHL/URSHR) */
   5826         return false;
   5827     }
   5828     if (!dc_isar_feature(aa32_mve, s) ||
   5829         !arm_dc_feature(s, ARM_FEATURE_M_MAIN) ||
   5830         a->rdahi == 13 || a->rm == 13 || a->rm == 15 ||
   5831         a->rm == a->rdahi || a->rm == a->rdalo) {
   5832         /* These rdahi/rdalo/rm cases are UNPREDICTABLE; we choose to UNDEF */
   5833         unallocated_encoding(s);
   5834         return true;
   5835     }
   5836 
   5837     rda = tcg_temp_new_i64();
   5838     rdalo = load_reg(s, a->rdalo);
   5839     rdahi = load_reg(s, a->rdahi);
   5840     tcg_gen_concat_i32_i64(rda, rdalo, rdahi);
   5841 
   5842     /* The helper takes care of the sign-extension of the low 8 bits of Rm */
   5843     fn(rda, cpu_env, rda, cpu_R[a->rm]);
   5844 
   5845     tcg_gen_extrl_i64_i32(rdalo, rda);
   5846     tcg_gen_extrh_i64_i32(rdahi, rda);
   5847     store_reg(s, a->rdalo, rdalo);
   5848     store_reg(s, a->rdahi, rdahi);
   5849     tcg_temp_free_i64(rda);
   5850 
   5851     return true;
   5852 }
   5853 
   5854 static bool trans_LSLL_rr(DisasContext *s, arg_mve_shl_rr *a)
   5855 {
   5856     return do_mve_shl_rr(s, a, gen_helper_mve_ushll);
   5857 }
   5858 
   5859 static bool trans_ASRL_rr(DisasContext *s, arg_mve_shl_rr *a)
   5860 {
   5861     return do_mve_shl_rr(s, a, gen_helper_mve_sshrl);
   5862 }
   5863 
   5864 static bool trans_UQRSHLL64_rr(DisasContext *s, arg_mve_shl_rr *a)
   5865 {
   5866     return do_mve_shl_rr(s, a, gen_helper_mve_uqrshll);
   5867 }
   5868 
   5869 static bool trans_SQRSHRL64_rr(DisasContext *s, arg_mve_shl_rr *a)
   5870 {
   5871     return do_mve_shl_rr(s, a, gen_helper_mve_sqrshrl);
   5872 }
   5873 
   5874 static bool trans_UQRSHLL48_rr(DisasContext *s, arg_mve_shl_rr *a)
   5875 {
   5876     return do_mve_shl_rr(s, a, gen_helper_mve_uqrshll48);
   5877 }
   5878 
   5879 static bool trans_SQRSHRL48_rr(DisasContext *s, arg_mve_shl_rr *a)
   5880 {
   5881     return do_mve_shl_rr(s, a, gen_helper_mve_sqrshrl48);
   5882 }
   5883 
   5884 static bool do_mve_sh_ri(DisasContext *s, arg_mve_sh_ri *a, ShiftImmFn *fn)
   5885 {
   5886     if (!arm_dc_feature(s, ARM_FEATURE_V8_1M)) {
   5887         /* Decode falls through to ORR/MOV UNPREDICTABLE handling */
   5888         return false;
   5889     }
   5890     if (!dc_isar_feature(aa32_mve, s) ||
   5891         !arm_dc_feature(s, ARM_FEATURE_M_MAIN) ||
   5892         a->rda == 13 || a->rda == 15) {
   5893         /* These rda cases are UNPREDICTABLE; we choose to UNDEF */
   5894         unallocated_encoding(s);
   5895         return true;
   5896     }
   5897 
   5898     if (a->shim == 0) {
   5899         a->shim = 32;
   5900     }
   5901     fn(cpu_R[a->rda], cpu_R[a->rda], a->shim);
   5902 
   5903     return true;
   5904 }
   5905 
   5906 static bool trans_URSHR_ri(DisasContext *s, arg_mve_sh_ri *a)
   5907 {
   5908     return do_mve_sh_ri(s, a, gen_urshr32_i32);
   5909 }
   5910 
   5911 static bool trans_SRSHR_ri(DisasContext *s, arg_mve_sh_ri *a)
   5912 {
   5913     return do_mve_sh_ri(s, a, gen_srshr32_i32);
   5914 }
   5915 
   5916 static void gen_mve_sqshl(TCGv_i32 r, TCGv_i32 n, int32_t shift)
   5917 {
   5918     gen_helper_mve_sqshl(r, cpu_env, n, tcg_constant_i32(shift));
   5919 }
   5920 
   5921 static bool trans_SQSHL_ri(DisasContext *s, arg_mve_sh_ri *a)
   5922 {
   5923     return do_mve_sh_ri(s, a, gen_mve_sqshl);
   5924 }
   5925 
   5926 static void gen_mve_uqshl(TCGv_i32 r, TCGv_i32 n, int32_t shift)
   5927 {
   5928     gen_helper_mve_uqshl(r, cpu_env, n, tcg_constant_i32(shift));
   5929 }
   5930 
   5931 static bool trans_UQSHL_ri(DisasContext *s, arg_mve_sh_ri *a)
   5932 {
   5933     return do_mve_sh_ri(s, a, gen_mve_uqshl);
   5934 }
   5935 
   5936 static bool do_mve_sh_rr(DisasContext *s, arg_mve_sh_rr *a, ShiftFn *fn)
   5937 {
   5938     if (!arm_dc_feature(s, ARM_FEATURE_V8_1M)) {
   5939         /* Decode falls through to ORR/MOV UNPREDICTABLE handling */
   5940         return false;
   5941     }
   5942     if (!dc_isar_feature(aa32_mve, s) ||
   5943         !arm_dc_feature(s, ARM_FEATURE_M_MAIN) ||
   5944         a->rda == 13 || a->rda == 15 || a->rm == 13 || a->rm == 15 ||
   5945         a->rm == a->rda) {
   5946         /* These rda/rm cases are UNPREDICTABLE; we choose to UNDEF */
   5947         unallocated_encoding(s);
   5948         return true;
   5949     }
   5950 
   5951     /* The helper takes care of the sign-extension of the low 8 bits of Rm */
   5952     fn(cpu_R[a->rda], cpu_env, cpu_R[a->rda], cpu_R[a->rm]);
   5953     return true;
   5954 }
   5955 
   5956 static bool trans_SQRSHR_rr(DisasContext *s, arg_mve_sh_rr *a)
   5957 {
   5958     return do_mve_sh_rr(s, a, gen_helper_mve_sqrshr);
   5959 }
   5960 
   5961 static bool trans_UQRSHL_rr(DisasContext *s, arg_mve_sh_rr *a)
   5962 {
   5963     return do_mve_sh_rr(s, a, gen_helper_mve_uqrshl);
   5964 }
   5965 
   5966 /*
   5967  * Multiply and multiply accumulate
   5968  */
   5969 
   5970 static bool op_mla(DisasContext *s, arg_s_rrrr *a, bool add)
   5971 {
   5972     TCGv_i32 t1, t2;
   5973 
   5974     t1 = load_reg(s, a->rn);
   5975     t2 = load_reg(s, a->rm);
   5976     tcg_gen_mul_i32(t1, t1, t2);
   5977     tcg_temp_free_i32(t2);
   5978     if (add) {
   5979         t2 = load_reg(s, a->ra);
   5980         tcg_gen_add_i32(t1, t1, t2);
   5981         tcg_temp_free_i32(t2);
   5982     }
   5983     if (a->s) {
   5984         gen_logic_CC(t1);
   5985     }
   5986     store_reg(s, a->rd, t1);
   5987     return true;
   5988 }
   5989 
   5990 static bool trans_MUL(DisasContext *s, arg_MUL *a)
   5991 {
   5992     return op_mla(s, a, false);
   5993 }
   5994 
   5995 static bool trans_MLA(DisasContext *s, arg_MLA *a)
   5996 {
   5997     return op_mla(s, a, true);
   5998 }
   5999 
   6000 static bool trans_MLS(DisasContext *s, arg_MLS *a)
   6001 {
   6002     TCGv_i32 t1, t2;
   6003 
   6004     if (!ENABLE_ARCH_6T2) {
   6005         return false;
   6006     }
   6007     t1 = load_reg(s, a->rn);
   6008     t2 = load_reg(s, a->rm);
   6009     tcg_gen_mul_i32(t1, t1, t2);
   6010     tcg_temp_free_i32(t2);
   6011     t2 = load_reg(s, a->ra);
   6012     tcg_gen_sub_i32(t1, t2, t1);
   6013     tcg_temp_free_i32(t2);
   6014     store_reg(s, a->rd, t1);
   6015     return true;
   6016 }
   6017 
   6018 static bool op_mlal(DisasContext *s, arg_s_rrrr *a, bool uns, bool add)
   6019 {
   6020     TCGv_i32 t0, t1, t2, t3;
   6021 
   6022     t0 = load_reg(s, a->rm);
   6023     t1 = load_reg(s, a->rn);
   6024     if (uns) {
   6025         tcg_gen_mulu2_i32(t0, t1, t0, t1);
   6026     } else {
   6027         tcg_gen_muls2_i32(t0, t1, t0, t1);
   6028     }
   6029     if (add) {
   6030         t2 = load_reg(s, a->ra);
   6031         t3 = load_reg(s, a->rd);
   6032         tcg_gen_add2_i32(t0, t1, t0, t1, t2, t3);
   6033         tcg_temp_free_i32(t2);
   6034         tcg_temp_free_i32(t3);
   6035     }
   6036     if (a->s) {
   6037         gen_logicq_cc(t0, t1);
   6038     }
   6039     store_reg(s, a->ra, t0);
   6040     store_reg(s, a->rd, t1);
   6041     return true;
   6042 }
   6043 
   6044 static bool trans_UMULL(DisasContext *s, arg_UMULL *a)
   6045 {
   6046     return op_mlal(s, a, true, false);
   6047 }
   6048 
   6049 static bool trans_SMULL(DisasContext *s, arg_SMULL *a)
   6050 {
   6051     return op_mlal(s, a, false, false);
   6052 }
   6053 
   6054 static bool trans_UMLAL(DisasContext *s, arg_UMLAL *a)
   6055 {
   6056     return op_mlal(s, a, true, true);
   6057 }
   6058 
   6059 static bool trans_SMLAL(DisasContext *s, arg_SMLAL *a)
   6060 {
   6061     return op_mlal(s, a, false, true);
   6062 }
   6063 
   6064 static bool trans_UMAAL(DisasContext *s, arg_UMAAL *a)
   6065 {
   6066     TCGv_i32 t0, t1, t2, zero;
   6067 
   6068     if (s->thumb
   6069         ? !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)
   6070         : !ENABLE_ARCH_6) {
   6071         return false;
   6072     }
   6073 
   6074     t0 = load_reg(s, a->rm);
   6075     t1 = load_reg(s, a->rn);
   6076     tcg_gen_mulu2_i32(t0, t1, t0, t1);
   6077     zero = tcg_constant_i32(0);
   6078     t2 = load_reg(s, a->ra);
   6079     tcg_gen_add2_i32(t0, t1, t0, t1, t2, zero);
   6080     tcg_temp_free_i32(t2);
   6081     t2 = load_reg(s, a->rd);
   6082     tcg_gen_add2_i32(t0, t1, t0, t1, t2, zero);
   6083     tcg_temp_free_i32(t2);
   6084     store_reg(s, a->ra, t0);
   6085     store_reg(s, a->rd, t1);
   6086     return true;
   6087 }
   6088 
   6089 /*
   6090  * Saturating addition and subtraction
   6091  */
   6092 
   6093 static bool op_qaddsub(DisasContext *s, arg_rrr *a, bool add, bool doub)
   6094 {
   6095     TCGv_i32 t0, t1;
   6096 
   6097     if (s->thumb
   6098         ? !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)
   6099         : !ENABLE_ARCH_5TE) {
   6100         return false;
   6101     }
   6102 
   6103     t0 = load_reg(s, a->rm);
   6104     t1 = load_reg(s, a->rn);
   6105     if (doub) {
   6106         gen_helper_add_saturate(t1, cpu_env, t1, t1);
   6107     }
   6108     if (add) {
   6109         gen_helper_add_saturate(t0, cpu_env, t0, t1);
   6110     } else {
   6111         gen_helper_sub_saturate(t0, cpu_env, t0, t1);
   6112     }
   6113     tcg_temp_free_i32(t1);
   6114     store_reg(s, a->rd, t0);
   6115     return true;
   6116 }
   6117 
   6118 #define DO_QADDSUB(NAME, ADD, DOUB) \
   6119 static bool trans_##NAME(DisasContext *s, arg_rrr *a)    \
   6120 {                                                        \
   6121     return op_qaddsub(s, a, ADD, DOUB);                  \
   6122 }
   6123 
   6124 DO_QADDSUB(QADD, true, false)
   6125 DO_QADDSUB(QSUB, false, false)
   6126 DO_QADDSUB(QDADD, true, true)
   6127 DO_QADDSUB(QDSUB, false, true)
   6128 
   6129 #undef DO_QADDSUB
   6130 
   6131 /*
   6132  * Halfword multiply and multiply accumulate
   6133  */
   6134 
   6135 static bool op_smlaxxx(DisasContext *s, arg_rrrr *a,
   6136                        int add_long, bool nt, bool mt)
   6137 {
   6138     TCGv_i32 t0, t1, tl, th;
   6139 
   6140     if (s->thumb
   6141         ? !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)
   6142         : !ENABLE_ARCH_5TE) {
   6143         return false;
   6144     }
   6145 
   6146     t0 = load_reg(s, a->rn);
   6147     t1 = load_reg(s, a->rm);
   6148     gen_mulxy(t0, t1, nt, mt);
   6149     tcg_temp_free_i32(t1);
   6150 
   6151     switch (add_long) {
   6152     case 0:
   6153         store_reg(s, a->rd, t0);
   6154         break;
   6155     case 1:
   6156         t1 = load_reg(s, a->ra);
   6157         gen_helper_add_setq(t0, cpu_env, t0, t1);
   6158         tcg_temp_free_i32(t1);
   6159         store_reg(s, a->rd, t0);
   6160         break;
   6161     case 2:
   6162         tl = load_reg(s, a->ra);
   6163         th = load_reg(s, a->rd);
   6164         /* Sign-extend the 32-bit product to 64 bits.  */
   6165         t1 = tcg_temp_new_i32();
   6166         tcg_gen_sari_i32(t1, t0, 31);
   6167         tcg_gen_add2_i32(tl, th, tl, th, t0, t1);
   6168         tcg_temp_free_i32(t0);
   6169         tcg_temp_free_i32(t1);
   6170         store_reg(s, a->ra, tl);
   6171         store_reg(s, a->rd, th);
   6172         break;
   6173     default:
   6174         g_assert_not_reached();
   6175     }
   6176     return true;
   6177 }
   6178 
   6179 #define DO_SMLAX(NAME, add, nt, mt) \
   6180 static bool trans_##NAME(DisasContext *s, arg_rrrr *a)     \
   6181 {                                                          \
   6182     return op_smlaxxx(s, a, add, nt, mt);                  \
   6183 }
   6184 
   6185 DO_SMLAX(SMULBB, 0, 0, 0)
   6186 DO_SMLAX(SMULBT, 0, 0, 1)
   6187 DO_SMLAX(SMULTB, 0, 1, 0)
   6188 DO_SMLAX(SMULTT, 0, 1, 1)
   6189 
   6190 DO_SMLAX(SMLABB, 1, 0, 0)
   6191 DO_SMLAX(SMLABT, 1, 0, 1)
   6192 DO_SMLAX(SMLATB, 1, 1, 0)
   6193 DO_SMLAX(SMLATT, 1, 1, 1)
   6194 
   6195 DO_SMLAX(SMLALBB, 2, 0, 0)
   6196 DO_SMLAX(SMLALBT, 2, 0, 1)
   6197 DO_SMLAX(SMLALTB, 2, 1, 0)
   6198 DO_SMLAX(SMLALTT, 2, 1, 1)
   6199 
   6200 #undef DO_SMLAX
   6201 
   6202 static bool op_smlawx(DisasContext *s, arg_rrrr *a, bool add, bool mt)
   6203 {
   6204     TCGv_i32 t0, t1;
   6205 
   6206     if (!ENABLE_ARCH_5TE) {
   6207         return false;
   6208     }
   6209 
   6210     t0 = load_reg(s, a->rn);
   6211     t1 = load_reg(s, a->rm);
   6212     /*
   6213      * Since the nominal result is product<47:16>, shift the 16-bit
   6214      * input up by 16 bits, so that the result is at product<63:32>.
   6215      */
   6216     if (mt) {
   6217         tcg_gen_andi_i32(t1, t1, 0xffff0000);
   6218     } else {
   6219         tcg_gen_shli_i32(t1, t1, 16);
   6220     }
   6221     tcg_gen_muls2_i32(t0, t1, t0, t1);
   6222     tcg_temp_free_i32(t0);
   6223     if (add) {
   6224         t0 = load_reg(s, a->ra);
   6225         gen_helper_add_setq(t1, cpu_env, t1, t0);
   6226         tcg_temp_free_i32(t0);
   6227     }
   6228     store_reg(s, a->rd, t1);
   6229     return true;
   6230 }
   6231 
   6232 #define DO_SMLAWX(NAME, add, mt) \
   6233 static bool trans_##NAME(DisasContext *s, arg_rrrr *a)     \
   6234 {                                                          \
   6235     return op_smlawx(s, a, add, mt);                       \
   6236 }
   6237 
   6238 DO_SMLAWX(SMULWB, 0, 0)
   6239 DO_SMLAWX(SMULWT, 0, 1)
   6240 DO_SMLAWX(SMLAWB, 1, 0)
   6241 DO_SMLAWX(SMLAWT, 1, 1)
   6242 
   6243 #undef DO_SMLAWX
   6244 
   6245 /*
   6246  * MSR (immediate) and hints
   6247  */
   6248 
   6249 static bool trans_YIELD(DisasContext *s, arg_YIELD *a)
   6250 {
   6251     /*
   6252      * When running single-threaded TCG code, use the helper to ensure that
   6253      * the next round-robin scheduled vCPU gets a crack.  When running in
   6254      * MTTCG we don't generate jumps to the helper as it won't affect the
   6255      * scheduling of other vCPUs.
   6256      */
   6257     if (!(tb_cflags(s->base.tb) & CF_PARALLEL)) {
   6258         gen_update_pc(s, curr_insn_len(s));
   6259         s->base.is_jmp = DISAS_YIELD;
   6260     }
   6261     return true;
   6262 }
   6263 
   6264 static bool trans_WFE(DisasContext *s, arg_WFE *a)
   6265 {
   6266     /*
   6267      * When running single-threaded TCG code, use the helper to ensure that
   6268      * the next round-robin scheduled vCPU gets a crack.  In MTTCG mode we
   6269      * just skip this instruction.  Currently the SEV/SEVL instructions,
   6270      * which are *one* of many ways to wake the CPU from WFE, are not
   6271      * implemented so we can't sleep like WFI does.
   6272      */
   6273     if (!(tb_cflags(s->base.tb) & CF_PARALLEL)) {
   6274         gen_update_pc(s, curr_insn_len(s));
   6275         s->base.is_jmp = DISAS_WFE;
   6276     }
   6277     return true;
   6278 }
   6279 
   6280 static bool trans_WFI(DisasContext *s, arg_WFI *a)
   6281 {
   6282     /* For WFI, halt the vCPU until an IRQ. */
   6283     gen_update_pc(s, curr_insn_len(s));
   6284     s->base.is_jmp = DISAS_WFI;
   6285     return true;
   6286 }
   6287 
   6288 static bool trans_ESB(DisasContext *s, arg_ESB *a)
   6289 {
   6290     /*
   6291      * For M-profile, minimal-RAS ESB can be a NOP.
   6292      * Without RAS, we must implement this as NOP.
   6293      */
   6294     if (!arm_dc_feature(s, ARM_FEATURE_M) && dc_isar_feature(aa32_ras, s)) {
   6295         /*
   6296          * QEMU does not have a source of physical SErrors,
   6297          * so we are only concerned with virtual SErrors.
   6298          * The pseudocode in the ARM for this case is
   6299          *   if PSTATE.EL IN {EL0, EL1} && EL2Enabled() then
   6300          *      AArch32.vESBOperation();
   6301          * Most of the condition can be evaluated at translation time.
   6302          * Test for EL2 present, and defer test for SEL2 to runtime.
   6303          */
   6304         if (s->current_el <= 1 && arm_dc_feature(s, ARM_FEATURE_EL2)) {
   6305             gen_helper_vesb(cpu_env);
   6306         }
   6307     }
   6308     return true;
   6309 }
   6310 
   6311 static bool trans_NOP(DisasContext *s, arg_NOP *a)
   6312 {
   6313     return true;
   6314 }
   6315 
   6316 static bool trans_MSR_imm(DisasContext *s, arg_MSR_imm *a)
   6317 {
   6318     uint32_t val = ror32(a->imm, a->rot * 2);
   6319     uint32_t mask = msr_mask(s, a->mask, a->r);
   6320 
   6321     if (gen_set_psr_im(s, mask, a->r, val)) {
   6322         unallocated_encoding(s);
   6323     }
   6324     return true;
   6325 }
   6326 
   6327 /*
   6328  * Cyclic Redundancy Check
   6329  */
   6330 
   6331 static bool op_crc32(DisasContext *s, arg_rrr *a, bool c, MemOp sz)
   6332 {
   6333     TCGv_i32 t1, t2, t3;
   6334 
   6335     if (!dc_isar_feature(aa32_crc32, s)) {
   6336         return false;
   6337     }
   6338 
   6339     t1 = load_reg(s, a->rn);
   6340     t2 = load_reg(s, a->rm);
   6341     switch (sz) {
   6342     case MO_8:
   6343         gen_uxtb(t2);
   6344         break;
   6345     case MO_16:
   6346         gen_uxth(t2);
   6347         break;
   6348     case MO_32:
   6349         break;
   6350     default:
   6351         g_assert_not_reached();
   6352     }
   6353     t3 = tcg_constant_i32(1 << sz);
   6354     if (c) {
   6355         gen_helper_crc32c(t1, t1, t2, t3);
   6356     } else {
   6357         gen_helper_crc32(t1, t1, t2, t3);
   6358     }
   6359     tcg_temp_free_i32(t2);
   6360     store_reg(s, a->rd, t1);
   6361     return true;
   6362 }
   6363 
   6364 #define DO_CRC32(NAME, c, sz) \
   6365 static bool trans_##NAME(DisasContext *s, arg_rrr *a)  \
   6366     { return op_crc32(s, a, c, sz); }
   6367 
   6368 DO_CRC32(CRC32B, false, MO_8)
   6369 DO_CRC32(CRC32H, false, MO_16)
   6370 DO_CRC32(CRC32W, false, MO_32)
   6371 DO_CRC32(CRC32CB, true, MO_8)
   6372 DO_CRC32(CRC32CH, true, MO_16)
   6373 DO_CRC32(CRC32CW, true, MO_32)
   6374 
   6375 #undef DO_CRC32
   6376 
   6377 /*
   6378  * Miscellaneous instructions
   6379  */
   6380 
   6381 static bool trans_MRS_bank(DisasContext *s, arg_MRS_bank *a)
   6382 {
   6383     if (arm_dc_feature(s, ARM_FEATURE_M)) {
   6384         return false;
   6385     }
   6386     gen_mrs_banked(s, a->r, a->sysm, a->rd);
   6387     return true;
   6388 }
   6389 
   6390 static bool trans_MSR_bank(DisasContext *s, arg_MSR_bank *a)
   6391 {
   6392     if (arm_dc_feature(s, ARM_FEATURE_M)) {
   6393         return false;
   6394     }
   6395     gen_msr_banked(s, a->r, a->sysm, a->rn);
   6396     return true;
   6397 }
   6398 
   6399 static bool trans_MRS_reg(DisasContext *s, arg_MRS_reg *a)
   6400 {
   6401     TCGv_i32 tmp;
   6402 
   6403     if (arm_dc_feature(s, ARM_FEATURE_M)) {
   6404         return false;
   6405     }
   6406     if (a->r) {
   6407         if (IS_USER(s)) {
   6408             unallocated_encoding(s);
   6409             return true;
   6410         }
   6411         tmp = load_cpu_field(spsr);
   6412     } else {
   6413         tmp = tcg_temp_new_i32();
   6414         gen_helper_cpsr_read(tmp, cpu_env);
   6415     }
   6416     store_reg(s, a->rd, tmp);
   6417     return true;
   6418 }
   6419 
   6420 static bool trans_MSR_reg(DisasContext *s, arg_MSR_reg *a)
   6421 {
   6422     TCGv_i32 tmp;
   6423     uint32_t mask = msr_mask(s, a->mask, a->r);
   6424 
   6425     if (arm_dc_feature(s, ARM_FEATURE_M)) {
   6426         return false;
   6427     }
   6428     tmp = load_reg(s, a->rn);
   6429     if (gen_set_psr(s, mask, a->r, tmp)) {
   6430         unallocated_encoding(s);
   6431     }
   6432     return true;
   6433 }
   6434 
   6435 static bool trans_MRS_v7m(DisasContext *s, arg_MRS_v7m *a)
   6436 {
   6437     TCGv_i32 tmp;
   6438 
   6439     if (!arm_dc_feature(s, ARM_FEATURE_M)) {
   6440         return false;
   6441     }
   6442     tmp = tcg_temp_new_i32();
   6443     gen_helper_v7m_mrs(tmp, cpu_env, tcg_constant_i32(a->sysm));
   6444     store_reg(s, a->rd, tmp);
   6445     return true;
   6446 }
   6447 
   6448 static bool trans_MSR_v7m(DisasContext *s, arg_MSR_v7m *a)
   6449 {
   6450     TCGv_i32 addr, reg;
   6451 
   6452     if (!arm_dc_feature(s, ARM_FEATURE_M)) {
   6453         return false;
   6454     }
   6455     addr = tcg_constant_i32((a->mask << 10) | a->sysm);
   6456     reg = load_reg(s, a->rn);
   6457     gen_helper_v7m_msr(cpu_env, addr, reg);
   6458     tcg_temp_free_i32(reg);
   6459     /* If we wrote to CONTROL, the EL might have changed */
   6460     gen_rebuild_hflags(s, true);
   6461     gen_lookup_tb(s);
   6462     return true;
   6463 }
   6464 
   6465 static bool trans_BX(DisasContext *s, arg_BX *a)
   6466 {
   6467     if (!ENABLE_ARCH_4T) {
   6468         return false;
   6469     }
   6470     gen_bx_excret(s, load_reg(s, a->rm));
   6471     return true;
   6472 }
   6473 
   6474 static bool trans_BXJ(DisasContext *s, arg_BXJ *a)
   6475 {
   6476     if (!ENABLE_ARCH_5J || arm_dc_feature(s, ARM_FEATURE_M)) {
   6477         return false;
   6478     }
   6479     /*
   6480      * v7A allows BXJ to be trapped via HSTR.TJDBX. We don't waste a
   6481      * TBFLAGS bit on a basically-never-happens case, so call a helper
   6482      * function to check for the trap and raise the exception if needed
   6483      * (passing it the register number for the syndrome value).
   6484      * v8A doesn't have this HSTR bit.
   6485      */
   6486     if (!arm_dc_feature(s, ARM_FEATURE_V8) &&
   6487         arm_dc_feature(s, ARM_FEATURE_EL2) &&
   6488         s->current_el < 2 && s->ns) {
   6489         gen_helper_check_bxj_trap(cpu_env, tcg_constant_i32(a->rm));
   6490     }
   6491     /* Trivial implementation equivalent to bx.  */
   6492     gen_bx(s, load_reg(s, a->rm));
   6493     return true;
   6494 }
   6495 
   6496 static bool trans_BLX_r(DisasContext *s, arg_BLX_r *a)
   6497 {
   6498     TCGv_i32 tmp;
   6499 
   6500     if (!ENABLE_ARCH_5) {
   6501         return false;
   6502     }
   6503     tmp = load_reg(s, a->rm);
   6504     gen_pc_plus_diff(s, cpu_R[14], curr_insn_len(s) | s->thumb);
   6505     gen_bx(s, tmp);
   6506     return true;
   6507 }
   6508 
   6509 /*
   6510  * BXNS/BLXNS: only exist for v8M with the security extensions,
   6511  * and always UNDEF if NonSecure.  We don't implement these in
   6512  * the user-only mode either (in theory you can use them from
   6513  * Secure User mode but they are too tied in to system emulation).
   6514  */
   6515 static bool trans_BXNS(DisasContext *s, arg_BXNS *a)
   6516 {
   6517     if (!s->v8m_secure || IS_USER_ONLY) {
   6518         unallocated_encoding(s);
   6519     } else {
   6520         gen_bxns(s, a->rm);
   6521     }
   6522     return true;
   6523 }
   6524 
   6525 static bool trans_BLXNS(DisasContext *s, arg_BLXNS *a)
   6526 {
   6527     if (!s->v8m_secure || IS_USER_ONLY) {
   6528         unallocated_encoding(s);
   6529     } else {
   6530         gen_blxns(s, a->rm);
   6531     }
   6532     return true;
   6533 }
   6534 
   6535 static bool trans_CLZ(DisasContext *s, arg_CLZ *a)
   6536 {
   6537     TCGv_i32 tmp;
   6538 
   6539     if (!ENABLE_ARCH_5) {
   6540         return false;
   6541     }
   6542     tmp = load_reg(s, a->rm);
   6543     tcg_gen_clzi_i32(tmp, tmp, 32);
   6544     store_reg(s, a->rd, tmp);
   6545     return true;
   6546 }
   6547 
   6548 static bool trans_ERET(DisasContext *s, arg_ERET *a)
   6549 {
   6550     TCGv_i32 tmp;
   6551 
   6552     if (!arm_dc_feature(s, ARM_FEATURE_V7VE)) {
   6553         return false;
   6554     }
   6555     if (IS_USER(s)) {
   6556         unallocated_encoding(s);
   6557         return true;
   6558     }
   6559     if (s->current_el == 2) {
   6560         /* ERET from Hyp uses ELR_Hyp, not LR */
   6561         tmp = load_cpu_field(elr_el[2]);
   6562     } else {
   6563         tmp = load_reg(s, 14);
   6564     }
   6565     gen_exception_return(s, tmp);
   6566     return true;
   6567 }
   6568 
   6569 static bool trans_HLT(DisasContext *s, arg_HLT *a)
   6570 {
   6571     gen_hlt(s, a->imm);
   6572     return true;
   6573 }
   6574 
   6575 static bool trans_BKPT(DisasContext *s, arg_BKPT *a)
   6576 {
   6577     if (!ENABLE_ARCH_5) {
   6578         return false;
   6579     }
   6580     /* BKPT is OK with ECI set and leaves it untouched */
   6581     s->eci_handled = true;
   6582     if (arm_dc_feature(s, ARM_FEATURE_M) &&
   6583         semihosting_enabled(s->current_el == 0) &&
   6584         (a->imm == 0xab)) {
   6585         gen_exception_internal_insn(s, EXCP_SEMIHOST);
   6586     } else {
   6587         gen_exception_bkpt_insn(s, syn_aa32_bkpt(a->imm, false));
   6588     }
   6589     return true;
   6590 }
   6591 
   6592 static bool trans_HVC(DisasContext *s, arg_HVC *a)
   6593 {
   6594     if (!ENABLE_ARCH_7 || arm_dc_feature(s, ARM_FEATURE_M)) {
   6595         return false;
   6596     }
   6597     if (IS_USER(s)) {
   6598         unallocated_encoding(s);
   6599     } else {
   6600         gen_hvc(s, a->imm);
   6601     }
   6602     return true;
   6603 }
   6604 
   6605 static bool trans_SMC(DisasContext *s, arg_SMC *a)
   6606 {
   6607     if (!ENABLE_ARCH_6K || arm_dc_feature(s, ARM_FEATURE_M)) {
   6608         return false;
   6609     }
   6610     if (IS_USER(s)) {
   6611         unallocated_encoding(s);
   6612     } else {
   6613         gen_smc(s);
   6614     }
   6615     return true;
   6616 }
   6617 
   6618 static bool trans_SG(DisasContext *s, arg_SG *a)
   6619 {
   6620     if (!arm_dc_feature(s, ARM_FEATURE_M) ||
   6621         !arm_dc_feature(s, ARM_FEATURE_V8)) {
   6622         return false;
   6623     }
   6624     /*
   6625      * SG (v8M only)
   6626      * The bulk of the behaviour for this instruction is implemented
   6627      * in v7m_handle_execute_nsc(), which deals with the insn when
   6628      * it is executed by a CPU in non-secure state from memory
   6629      * which is Secure & NonSecure-Callable.
   6630      * Here we only need to handle the remaining cases:
   6631      *  * in NS memory (including the "security extension not
   6632      *    implemented" case) : NOP
   6633      *  * in S memory but CPU already secure (clear IT bits)
   6634      * We know that the attribute for the memory this insn is
   6635      * in must match the current CPU state, because otherwise
   6636      * get_phys_addr_pmsav8 would have generated an exception.
   6637      */
   6638     if (s->v8m_secure) {
   6639         /* Like the IT insn, we don't need to generate any code */
   6640         s->condexec_cond = 0;
   6641         s->condexec_mask = 0;
   6642     }
   6643     return true;
   6644 }
   6645 
   6646 static bool trans_TT(DisasContext *s, arg_TT *a)
   6647 {
   6648     TCGv_i32 addr, tmp;
   6649 
   6650     if (!arm_dc_feature(s, ARM_FEATURE_M) ||
   6651         !arm_dc_feature(s, ARM_FEATURE_V8)) {
   6652         return false;
   6653     }
   6654     if (a->rd == 13 || a->rd == 15 || a->rn == 15) {
   6655         /* We UNDEF for these UNPREDICTABLE cases */
   6656         unallocated_encoding(s);
   6657         return true;
   6658     }
   6659     if (a->A && !s->v8m_secure) {
   6660         /* This case is UNDEFINED.  */
   6661         unallocated_encoding(s);
   6662         return true;
   6663     }
   6664 
   6665     addr = load_reg(s, a->rn);
   6666     tmp = tcg_temp_new_i32();
   6667     gen_helper_v7m_tt(tmp, cpu_env, addr, tcg_constant_i32((a->A << 1) | a->T));
   6668     tcg_temp_free_i32(addr);
   6669     store_reg(s, a->rd, tmp);
   6670     return true;
   6671 }
   6672 
   6673 /*
   6674  * Load/store register index
   6675  */
   6676 
   6677 static ISSInfo make_issinfo(DisasContext *s, int rd, bool p, bool w)
   6678 {
   6679     ISSInfo ret;
   6680 
   6681     /* ISS not valid if writeback */
   6682     if (p && !w) {
   6683         ret = rd;
   6684         if (curr_insn_len(s) == 2) {
   6685             ret |= ISSIs16Bit;
   6686         }
   6687     } else {
   6688         ret = ISSInvalid;
   6689     }
   6690     return ret;
   6691 }
   6692 
   6693 static TCGv_i32 op_addr_rr_pre(DisasContext *s, arg_ldst_rr *a)
   6694 {
   6695     TCGv_i32 addr = load_reg(s, a->rn);
   6696 
   6697     if (s->v8m_stackcheck && a->rn == 13 && a->w) {
   6698         gen_helper_v8m_stackcheck(cpu_env, addr);
   6699     }
   6700 
   6701     if (a->p) {
   6702         TCGv_i32 ofs = load_reg(s, a->rm);
   6703         gen_arm_shift_im(ofs, a->shtype, a->shimm, 0);
   6704         if (a->u) {
   6705             tcg_gen_add_i32(addr, addr, ofs);
   6706         } else {
   6707             tcg_gen_sub_i32(addr, addr, ofs);
   6708         }
   6709         tcg_temp_free_i32(ofs);
   6710     }
   6711     return addr;
   6712 }
   6713 
   6714 static void op_addr_rr_post(DisasContext *s, arg_ldst_rr *a,
   6715                             TCGv_i32 addr, int address_offset)
   6716 {
   6717     if (!a->p) {
   6718         TCGv_i32 ofs = load_reg(s, a->rm);
   6719         gen_arm_shift_im(ofs, a->shtype, a->shimm, 0);
   6720         if (a->u) {
   6721             tcg_gen_add_i32(addr, addr, ofs);
   6722         } else {
   6723             tcg_gen_sub_i32(addr, addr, ofs);
   6724         }
   6725         tcg_temp_free_i32(ofs);
   6726     } else if (!a->w) {
   6727         tcg_temp_free_i32(addr);
   6728         return;
   6729     }
   6730     tcg_gen_addi_i32(addr, addr, address_offset);
   6731     store_reg(s, a->rn, addr);
   6732 }
   6733 
   6734 static bool op_load_rr(DisasContext *s, arg_ldst_rr *a,
   6735                        MemOp mop, int mem_idx)
   6736 {
   6737     ISSInfo issinfo = make_issinfo(s, a->rt, a->p, a->w);
   6738     TCGv_i32 addr, tmp;
   6739 
   6740     addr = op_addr_rr_pre(s, a);
   6741 
   6742     tmp = tcg_temp_new_i32();
   6743     gen_aa32_ld_i32(s, tmp, addr, mem_idx, mop);
   6744     disas_set_da_iss(s, mop, issinfo);
   6745 
   6746     /*
   6747      * Perform base writeback before the loaded value to
   6748      * ensure correct behavior with overlapping index registers.
   6749      */
   6750     op_addr_rr_post(s, a, addr, 0);
   6751     store_reg_from_load(s, a->rt, tmp);
   6752     return true;
   6753 }
   6754 
   6755 static bool op_store_rr(DisasContext *s, arg_ldst_rr *a,
   6756                         MemOp mop, int mem_idx)
   6757 {
   6758     ISSInfo issinfo = make_issinfo(s, a->rt, a->p, a->w) | ISSIsWrite;
   6759     TCGv_i32 addr, tmp;
   6760 
   6761     /*
   6762      * In Thumb encodings of stores Rn=1111 is UNDEF; for Arm it
   6763      * is either UNPREDICTABLE or has defined behaviour
   6764      */
   6765     if (s->thumb && a->rn == 15) {
   6766         return false;
   6767     }
   6768 
   6769     addr = op_addr_rr_pre(s, a);
   6770 
   6771     tmp = load_reg(s, a->rt);
   6772     gen_aa32_st_i32(s, tmp, addr, mem_idx, mop);
   6773     disas_set_da_iss(s, mop, issinfo);
   6774     tcg_temp_free_i32(tmp);
   6775 
   6776     op_addr_rr_post(s, a, addr, 0);
   6777     return true;
   6778 }
   6779 
   6780 static bool trans_LDRD_rr(DisasContext *s, arg_ldst_rr *a)
   6781 {
   6782     int mem_idx = get_mem_index(s);
   6783     TCGv_i32 addr, tmp;
   6784 
   6785     if (!ENABLE_ARCH_5TE) {
   6786         return false;
   6787     }
   6788     if (a->rt & 1) {
   6789         unallocated_encoding(s);
   6790         return true;
   6791     }
   6792     addr = op_addr_rr_pre(s, a);
   6793 
   6794     tmp = tcg_temp_new_i32();
   6795     gen_aa32_ld_i32(s, tmp, addr, mem_idx, MO_UL | MO_ALIGN);
   6796     store_reg(s, a->rt, tmp);
   6797 
   6798     tcg_gen_addi_i32(addr, addr, 4);
   6799 
   6800     tmp = tcg_temp_new_i32();
   6801     gen_aa32_ld_i32(s, tmp, addr, mem_idx, MO_UL | MO_ALIGN);
   6802     store_reg(s, a->rt + 1, tmp);
   6803 
   6804     /* LDRD w/ base writeback is undefined if the registers overlap.  */
   6805     op_addr_rr_post(s, a, addr, -4);
   6806     return true;
   6807 }
   6808 
   6809 static bool trans_STRD_rr(DisasContext *s, arg_ldst_rr *a)
   6810 {
   6811     int mem_idx = get_mem_index(s);
   6812     TCGv_i32 addr, tmp;
   6813 
   6814     if (!ENABLE_ARCH_5TE) {
   6815         return false;
   6816     }
   6817     if (a->rt & 1) {
   6818         unallocated_encoding(s);
   6819         return true;
   6820     }
   6821     addr = op_addr_rr_pre(s, a);
   6822 
   6823     tmp = load_reg(s, a->rt);
   6824     gen_aa32_st_i32(s, tmp, addr, mem_idx, MO_UL | MO_ALIGN);
   6825     tcg_temp_free_i32(tmp);
   6826 
   6827     tcg_gen_addi_i32(addr, addr, 4);
   6828 
   6829     tmp = load_reg(s, a->rt + 1);
   6830     gen_aa32_st_i32(s, tmp, addr, mem_idx, MO_UL | MO_ALIGN);
   6831     tcg_temp_free_i32(tmp);
   6832 
   6833     op_addr_rr_post(s, a, addr, -4);
   6834     return true;
   6835 }
   6836 
   6837 /*
   6838  * Load/store immediate index
   6839  */
   6840 
   6841 static TCGv_i32 op_addr_ri_pre(DisasContext *s, arg_ldst_ri *a)
   6842 {
   6843     int ofs = a->imm;
   6844 
   6845     if (!a->u) {
   6846         ofs = -ofs;
   6847     }
   6848 
   6849     if (s->v8m_stackcheck && a->rn == 13 && a->w) {
   6850         /*
   6851          * Stackcheck. Here we know 'addr' is the current SP;
   6852          * U is set if we're moving SP up, else down. It is
   6853          * UNKNOWN whether the limit check triggers when SP starts
   6854          * below the limit and ends up above it; we chose to do so.
   6855          */
   6856         if (!a->u) {
   6857             TCGv_i32 newsp = tcg_temp_new_i32();
   6858             tcg_gen_addi_i32(newsp, cpu_R[13], ofs);
   6859             gen_helper_v8m_stackcheck(cpu_env, newsp);
   6860             tcg_temp_free_i32(newsp);
   6861         } else {
   6862             gen_helper_v8m_stackcheck(cpu_env, cpu_R[13]);
   6863         }
   6864     }
   6865 
   6866     return add_reg_for_lit(s, a->rn, a->p ? ofs : 0);
   6867 }
   6868 
   6869 static void op_addr_ri_post(DisasContext *s, arg_ldst_ri *a,
   6870                             TCGv_i32 addr, int address_offset)
   6871 {
   6872     if (!a->p) {
   6873         if (a->u) {
   6874             address_offset += a->imm;
   6875         } else {
   6876             address_offset -= a->imm;
   6877         }
   6878     } else if (!a->w) {
   6879         tcg_temp_free_i32(addr);
   6880         return;
   6881     }
   6882     tcg_gen_addi_i32(addr, addr, address_offset);
   6883     store_reg(s, a->rn, addr);
   6884 }
   6885 
   6886 static bool op_load_ri(DisasContext *s, arg_ldst_ri *a,
   6887                        MemOp mop, int mem_idx)
   6888 {
   6889     ISSInfo issinfo = make_issinfo(s, a->rt, a->p, a->w);
   6890     TCGv_i32 addr, tmp;
   6891 
   6892     addr = op_addr_ri_pre(s, a);
   6893 
   6894     tmp = tcg_temp_new_i32();
   6895     gen_aa32_ld_i32(s, tmp, addr, mem_idx, mop);
   6896     disas_set_da_iss(s, mop, issinfo);
   6897 
   6898     /*
   6899      * Perform base writeback before the loaded value to
   6900      * ensure correct behavior with overlapping index registers.
   6901      */
   6902     op_addr_ri_post(s, a, addr, 0);
   6903     store_reg_from_load(s, a->rt, tmp);
   6904     return true;
   6905 }
   6906 
   6907 static bool op_store_ri(DisasContext *s, arg_ldst_ri *a,
   6908                         MemOp mop, int mem_idx)
   6909 {
   6910     ISSInfo issinfo = make_issinfo(s, a->rt, a->p, a->w) | ISSIsWrite;
   6911     TCGv_i32 addr, tmp;
   6912 
   6913     /*
   6914      * In Thumb encodings of stores Rn=1111 is UNDEF; for Arm it
   6915      * is either UNPREDICTABLE or has defined behaviour
   6916      */
   6917     if (s->thumb && a->rn == 15) {
   6918         return false;
   6919     }
   6920 
   6921     addr = op_addr_ri_pre(s, a);
   6922 
   6923     tmp = load_reg(s, a->rt);
   6924     gen_aa32_st_i32(s, tmp, addr, mem_idx, mop);
   6925     disas_set_da_iss(s, mop, issinfo);
   6926     tcg_temp_free_i32(tmp);
   6927 
   6928     op_addr_ri_post(s, a, addr, 0);
   6929     return true;
   6930 }
   6931 
   6932 static bool op_ldrd_ri(DisasContext *s, arg_ldst_ri *a, int rt2)
   6933 {
   6934     int mem_idx = get_mem_index(s);
   6935     TCGv_i32 addr, tmp;
   6936 
   6937     addr = op_addr_ri_pre(s, a);
   6938 
   6939     tmp = tcg_temp_new_i32();
   6940     gen_aa32_ld_i32(s, tmp, addr, mem_idx, MO_UL | MO_ALIGN);
   6941     store_reg(s, a->rt, tmp);
   6942 
   6943     tcg_gen_addi_i32(addr, addr, 4);
   6944 
   6945     tmp = tcg_temp_new_i32();
   6946     gen_aa32_ld_i32(s, tmp, addr, mem_idx, MO_UL | MO_ALIGN);
   6947     store_reg(s, rt2, tmp);
   6948 
   6949     /* LDRD w/ base writeback is undefined if the registers overlap.  */
   6950     op_addr_ri_post(s, a, addr, -4);
   6951     return true;
   6952 }
   6953 
   6954 static bool trans_LDRD_ri_a32(DisasContext *s, arg_ldst_ri *a)
   6955 {
   6956     if (!ENABLE_ARCH_5TE || (a->rt & 1)) {
   6957         return false;
   6958     }
   6959     return op_ldrd_ri(s, a, a->rt + 1);
   6960 }
   6961 
   6962 static bool trans_LDRD_ri_t32(DisasContext *s, arg_ldst_ri2 *a)
   6963 {
   6964     arg_ldst_ri b = {
   6965         .u = a->u, .w = a->w, .p = a->p,
   6966         .rn = a->rn, .rt = a->rt, .imm = a->imm
   6967     };
   6968     return op_ldrd_ri(s, &b, a->rt2);
   6969 }
   6970 
   6971 static bool op_strd_ri(DisasContext *s, arg_ldst_ri *a, int rt2)
   6972 {
   6973     int mem_idx = get_mem_index(s);
   6974     TCGv_i32 addr, tmp;
   6975 
   6976     addr = op_addr_ri_pre(s, a);
   6977 
   6978     tmp = load_reg(s, a->rt);
   6979     gen_aa32_st_i32(s, tmp, addr, mem_idx, MO_UL | MO_ALIGN);
   6980     tcg_temp_free_i32(tmp);
   6981 
   6982     tcg_gen_addi_i32(addr, addr, 4);
   6983 
   6984     tmp = load_reg(s, rt2);
   6985     gen_aa32_st_i32(s, tmp, addr, mem_idx, MO_UL | MO_ALIGN);
   6986     tcg_temp_free_i32(tmp);
   6987 
   6988     op_addr_ri_post(s, a, addr, -4);
   6989     return true;
   6990 }
   6991 
   6992 static bool trans_STRD_ri_a32(DisasContext *s, arg_ldst_ri *a)
   6993 {
   6994     if (!ENABLE_ARCH_5TE || (a->rt & 1)) {
   6995         return false;
   6996     }
   6997     return op_strd_ri(s, a, a->rt + 1);
   6998 }
   6999 
   7000 static bool trans_STRD_ri_t32(DisasContext *s, arg_ldst_ri2 *a)
   7001 {
   7002     arg_ldst_ri b = {
   7003         .u = a->u, .w = a->w, .p = a->p,
   7004         .rn = a->rn, .rt = a->rt, .imm = a->imm
   7005     };
   7006     return op_strd_ri(s, &b, a->rt2);
   7007 }
   7008 
   7009 #define DO_LDST(NAME, WHICH, MEMOP) \
   7010 static bool trans_##NAME##_ri(DisasContext *s, arg_ldst_ri *a)        \
   7011 {                                                                     \
   7012     return op_##WHICH##_ri(s, a, MEMOP, get_mem_index(s));            \
   7013 }                                                                     \
   7014 static bool trans_##NAME##T_ri(DisasContext *s, arg_ldst_ri *a)       \
   7015 {                                                                     \
   7016     return op_##WHICH##_ri(s, a, MEMOP, get_a32_user_mem_index(s));   \
   7017 }                                                                     \
   7018 static bool trans_##NAME##_rr(DisasContext *s, arg_ldst_rr *a)        \
   7019 {                                                                     \
   7020     return op_##WHICH##_rr(s, a, MEMOP, get_mem_index(s));            \
   7021 }                                                                     \
   7022 static bool trans_##NAME##T_rr(DisasContext *s, arg_ldst_rr *a)       \
   7023 {                                                                     \
   7024     return op_##WHICH##_rr(s, a, MEMOP, get_a32_user_mem_index(s));   \
   7025 }
   7026 
   7027 DO_LDST(LDR, load, MO_UL)
   7028 DO_LDST(LDRB, load, MO_UB)
   7029 DO_LDST(LDRH, load, MO_UW)
   7030 DO_LDST(LDRSB, load, MO_SB)
   7031 DO_LDST(LDRSH, load, MO_SW)
   7032 
   7033 DO_LDST(STR, store, MO_UL)
   7034 DO_LDST(STRB, store, MO_UB)
   7035 DO_LDST(STRH, store, MO_UW)
   7036 
   7037 #undef DO_LDST
   7038 
   7039 /*
   7040  * Synchronization primitives
   7041  */
   7042 
   7043 static bool op_swp(DisasContext *s, arg_SWP *a, MemOp opc)
   7044 {
   7045     TCGv_i32 addr, tmp;
   7046     TCGv taddr;
   7047 
   7048     opc |= s->be_data;
   7049     addr = load_reg(s, a->rn);
   7050     taddr = gen_aa32_addr(s, addr, opc);
   7051     tcg_temp_free_i32(addr);
   7052 
   7053     tmp = load_reg(s, a->rt2);
   7054     tcg_gen_atomic_xchg_i32(tmp, taddr, tmp, get_mem_index(s), opc);
   7055     tcg_temp_free(taddr);
   7056 
   7057     store_reg(s, a->rt, tmp);
   7058     return true;
   7059 }
   7060 
   7061 static bool trans_SWP(DisasContext *s, arg_SWP *a)
   7062 {
   7063     return op_swp(s, a, MO_UL | MO_ALIGN);
   7064 }
   7065 
   7066 static bool trans_SWPB(DisasContext *s, arg_SWP *a)
   7067 {
   7068     return op_swp(s, a, MO_UB);
   7069 }
   7070 
   7071 /*
   7072  * Load/Store Exclusive and Load-Acquire/Store-Release
   7073  */
   7074 
   7075 static bool op_strex(DisasContext *s, arg_STREX *a, MemOp mop, bool rel)
   7076 {
   7077     TCGv_i32 addr;
   7078     /* Some cases stopped being UNPREDICTABLE in v8A (but not v8M) */
   7079     bool v8a = ENABLE_ARCH_8 && !arm_dc_feature(s, ARM_FEATURE_M);
   7080 
   7081     /* We UNDEF for these UNPREDICTABLE cases.  */
   7082     if (a->rd == 15 || a->rn == 15 || a->rt == 15
   7083         || a->rd == a->rn || a->rd == a->rt
   7084         || (!v8a && s->thumb && (a->rd == 13 || a->rt == 13))
   7085         || (mop == MO_64
   7086             && (a->rt2 == 15
   7087                 || a->rd == a->rt2
   7088                 || (!v8a && s->thumb && a->rt2 == 13)))) {
   7089         unallocated_encoding(s);
   7090         return true;
   7091     }
   7092 
   7093     if (rel) {
   7094         tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL);
   7095     }
   7096 
   7097     addr = tcg_temp_local_new_i32();
   7098     load_reg_var(s, addr, a->rn);
   7099     tcg_gen_addi_i32(addr, addr, a->imm);
   7100 
   7101     gen_store_exclusive(s, a->rd, a->rt, a->rt2, addr, mop);
   7102     tcg_temp_free_i32(addr);
   7103     return true;
   7104 }
   7105 
   7106 static bool trans_STREX(DisasContext *s, arg_STREX *a)
   7107 {
   7108     if (!ENABLE_ARCH_6) {
   7109         return false;
   7110     }
   7111     return op_strex(s, a, MO_32, false);
   7112 }
   7113 
   7114 static bool trans_STREXD_a32(DisasContext *s, arg_STREX *a)
   7115 {
   7116     if (!ENABLE_ARCH_6K) {
   7117         return false;
   7118     }
   7119     /* We UNDEF for these UNPREDICTABLE cases.  */
   7120     if (a->rt & 1) {
   7121         unallocated_encoding(s);
   7122         return true;
   7123     }
   7124     a->rt2 = a->rt + 1;
   7125     return op_strex(s, a, MO_64, false);
   7126 }
   7127 
   7128 static bool trans_STREXD_t32(DisasContext *s, arg_STREX *a)
   7129 {
   7130     return op_strex(s, a, MO_64, false);
   7131 }
   7132 
   7133 static bool trans_STREXB(DisasContext *s, arg_STREX *a)
   7134 {
   7135     if (s->thumb ? !ENABLE_ARCH_7 : !ENABLE_ARCH_6K) {
   7136         return false;
   7137     }
   7138     return op_strex(s, a, MO_8, false);
   7139 }
   7140 
   7141 static bool trans_STREXH(DisasContext *s, arg_STREX *a)
   7142 {
   7143     if (s->thumb ? !ENABLE_ARCH_7 : !ENABLE_ARCH_6K) {
   7144         return false;
   7145     }
   7146     return op_strex(s, a, MO_16, false);
   7147 }
   7148 
   7149 static bool trans_STLEX(DisasContext *s, arg_STREX *a)
   7150 {
   7151     if (!ENABLE_ARCH_8) {
   7152         return false;
   7153     }
   7154     return op_strex(s, a, MO_32, true);
   7155 }
   7156 
   7157 static bool trans_STLEXD_a32(DisasContext *s, arg_STREX *a)
   7158 {
   7159     if (!ENABLE_ARCH_8) {
   7160         return false;
   7161     }
   7162     /* We UNDEF for these UNPREDICTABLE cases.  */
   7163     if (a->rt & 1) {
   7164         unallocated_encoding(s);
   7165         return true;
   7166     }
   7167     a->rt2 = a->rt + 1;
   7168     return op_strex(s, a, MO_64, true);
   7169 }
   7170 
   7171 static bool trans_STLEXD_t32(DisasContext *s, arg_STREX *a)
   7172 {
   7173     if (!ENABLE_ARCH_8) {
   7174         return false;
   7175     }
   7176     return op_strex(s, a, MO_64, true);
   7177 }
   7178 
   7179 static bool trans_STLEXB(DisasContext *s, arg_STREX *a)
   7180 {
   7181     if (!ENABLE_ARCH_8) {
   7182         return false;
   7183     }
   7184     return op_strex(s, a, MO_8, true);
   7185 }
   7186 
   7187 static bool trans_STLEXH(DisasContext *s, arg_STREX *a)
   7188 {
   7189     if (!ENABLE_ARCH_8) {
   7190         return false;
   7191     }
   7192     return op_strex(s, a, MO_16, true);
   7193 }
   7194 
   7195 static bool op_stl(DisasContext *s, arg_STL *a, MemOp mop)
   7196 {
   7197     TCGv_i32 addr, tmp;
   7198 
   7199     if (!ENABLE_ARCH_8) {
   7200         return false;
   7201     }
   7202     /* We UNDEF for these UNPREDICTABLE cases.  */
   7203     if (a->rn == 15 || a->rt == 15) {
   7204         unallocated_encoding(s);
   7205         return true;
   7206     }
   7207 
   7208     addr = load_reg(s, a->rn);
   7209     tmp = load_reg(s, a->rt);
   7210     tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL);
   7211     gen_aa32_st_i32(s, tmp, addr, get_mem_index(s), mop | MO_ALIGN);
   7212     disas_set_da_iss(s, mop, a->rt | ISSIsAcqRel | ISSIsWrite);
   7213 
   7214     tcg_temp_free_i32(tmp);
   7215     tcg_temp_free_i32(addr);
   7216     return true;
   7217 }
   7218 
   7219 static bool trans_STL(DisasContext *s, arg_STL *a)
   7220 {
   7221     return op_stl(s, a, MO_UL);
   7222 }
   7223 
   7224 static bool trans_STLB(DisasContext *s, arg_STL *a)
   7225 {
   7226     return op_stl(s, a, MO_UB);
   7227 }
   7228 
   7229 static bool trans_STLH(DisasContext *s, arg_STL *a)
   7230 {
   7231     return op_stl(s, a, MO_UW);
   7232 }
   7233 
   7234 static bool op_ldrex(DisasContext *s, arg_LDREX *a, MemOp mop, bool acq)
   7235 {
   7236     TCGv_i32 addr;
   7237     /* Some cases stopped being UNPREDICTABLE in v8A (but not v8M) */
   7238     bool v8a = ENABLE_ARCH_8 && !arm_dc_feature(s, ARM_FEATURE_M);
   7239 
   7240     /* We UNDEF for these UNPREDICTABLE cases.  */
   7241     if (a->rn == 15 || a->rt == 15
   7242         || (!v8a && s->thumb && a->rt == 13)
   7243         || (mop == MO_64
   7244             && (a->rt2 == 15 || a->rt == a->rt2
   7245                 || (!v8a && s->thumb && a->rt2 == 13)))) {
   7246         unallocated_encoding(s);
   7247         return true;
   7248     }
   7249 
   7250     addr = tcg_temp_local_new_i32();
   7251     load_reg_var(s, addr, a->rn);
   7252     tcg_gen_addi_i32(addr, addr, a->imm);
   7253 
   7254     gen_load_exclusive(s, a->rt, a->rt2, addr, mop);
   7255     tcg_temp_free_i32(addr);
   7256 
   7257     if (acq) {
   7258         tcg_gen_mb(TCG_MO_ALL | TCG_BAR_LDAQ);
   7259     }
   7260     return true;
   7261 }
   7262 
   7263 static bool trans_LDREX(DisasContext *s, arg_LDREX *a)
   7264 {
   7265     if (!ENABLE_ARCH_6) {
   7266         return false;
   7267     }
   7268     return op_ldrex(s, a, MO_32, false);
   7269 }
   7270 
   7271 static bool trans_LDREXD_a32(DisasContext *s, arg_LDREX *a)
   7272 {
   7273     if (!ENABLE_ARCH_6K) {
   7274         return false;
   7275     }
   7276     /* We UNDEF for these UNPREDICTABLE cases.  */
   7277     if (a->rt & 1) {
   7278         unallocated_encoding(s);
   7279         return true;
   7280     }
   7281     a->rt2 = a->rt + 1;
   7282     return op_ldrex(s, a, MO_64, false);
   7283 }
   7284 
   7285 static bool trans_LDREXD_t32(DisasContext *s, arg_LDREX *a)
   7286 {
   7287     return op_ldrex(s, a, MO_64, false);
   7288 }
   7289 
   7290 static bool trans_LDREXB(DisasContext *s, arg_LDREX *a)
   7291 {
   7292     if (s->thumb ? !ENABLE_ARCH_7 : !ENABLE_ARCH_6K) {
   7293         return false;
   7294     }
   7295     return op_ldrex(s, a, MO_8, false);
   7296 }
   7297 
   7298 static bool trans_LDREXH(DisasContext *s, arg_LDREX *a)
   7299 {
   7300     if (s->thumb ? !ENABLE_ARCH_7 : !ENABLE_ARCH_6K) {
   7301         return false;
   7302     }
   7303     return op_ldrex(s, a, MO_16, false);
   7304 }
   7305 
   7306 static bool trans_LDAEX(DisasContext *s, arg_LDREX *a)
   7307 {
   7308     if (!ENABLE_ARCH_8) {
   7309         return false;
   7310     }
   7311     return op_ldrex(s, a, MO_32, true);
   7312 }
   7313 
   7314 static bool trans_LDAEXD_a32(DisasContext *s, arg_LDREX *a)
   7315 {
   7316     if (!ENABLE_ARCH_8) {
   7317         return false;
   7318     }
   7319     /* We UNDEF for these UNPREDICTABLE cases.  */
   7320     if (a->rt & 1) {
   7321         unallocated_encoding(s);
   7322         return true;
   7323     }
   7324     a->rt2 = a->rt + 1;
   7325     return op_ldrex(s, a, MO_64, true);
   7326 }
   7327 
   7328 static bool trans_LDAEXD_t32(DisasContext *s, arg_LDREX *a)
   7329 {
   7330     if (!ENABLE_ARCH_8) {
   7331         return false;
   7332     }
   7333     return op_ldrex(s, a, MO_64, true);
   7334 }
   7335 
   7336 static bool trans_LDAEXB(DisasContext *s, arg_LDREX *a)
   7337 {
   7338     if (!ENABLE_ARCH_8) {
   7339         return false;
   7340     }
   7341     return op_ldrex(s, a, MO_8, true);
   7342 }
   7343 
   7344 static bool trans_LDAEXH(DisasContext *s, arg_LDREX *a)
   7345 {
   7346     if (!ENABLE_ARCH_8) {
   7347         return false;
   7348     }
   7349     return op_ldrex(s, a, MO_16, true);
   7350 }
   7351 
   7352 static bool op_lda(DisasContext *s, arg_LDA *a, MemOp mop)
   7353 {
   7354     TCGv_i32 addr, tmp;
   7355 
   7356     if (!ENABLE_ARCH_8) {
   7357         return false;
   7358     }
   7359     /* We UNDEF for these UNPREDICTABLE cases.  */
   7360     if (a->rn == 15 || a->rt == 15) {
   7361         unallocated_encoding(s);
   7362         return true;
   7363     }
   7364 
   7365     addr = load_reg(s, a->rn);
   7366     tmp = tcg_temp_new_i32();
   7367     gen_aa32_ld_i32(s, tmp, addr, get_mem_index(s), mop | MO_ALIGN);
   7368     disas_set_da_iss(s, mop, a->rt | ISSIsAcqRel);
   7369     tcg_temp_free_i32(addr);
   7370 
   7371     store_reg(s, a->rt, tmp);
   7372     tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL);
   7373     return true;
   7374 }
   7375 
   7376 static bool trans_LDA(DisasContext *s, arg_LDA *a)
   7377 {
   7378     return op_lda(s, a, MO_UL);
   7379 }
   7380 
   7381 static bool trans_LDAB(DisasContext *s, arg_LDA *a)
   7382 {
   7383     return op_lda(s, a, MO_UB);
   7384 }
   7385 
   7386 static bool trans_LDAH(DisasContext *s, arg_LDA *a)
   7387 {
   7388     return op_lda(s, a, MO_UW);
   7389 }
   7390 
   7391 /*
   7392  * Media instructions
   7393  */
   7394 
   7395 static bool trans_USADA8(DisasContext *s, arg_USADA8 *a)
   7396 {
   7397     TCGv_i32 t1, t2;
   7398 
   7399     if (!ENABLE_ARCH_6) {
   7400         return false;
   7401     }
   7402 
   7403     t1 = load_reg(s, a->rn);
   7404     t2 = load_reg(s, a->rm);
   7405     gen_helper_usad8(t1, t1, t2);
   7406     tcg_temp_free_i32(t2);
   7407     if (a->ra != 15) {
   7408         t2 = load_reg(s, a->ra);
   7409         tcg_gen_add_i32(t1, t1, t2);
   7410         tcg_temp_free_i32(t2);
   7411     }
   7412     store_reg(s, a->rd, t1);
   7413     return true;
   7414 }
   7415 
   7416 static bool op_bfx(DisasContext *s, arg_UBFX *a, bool u)
   7417 {
   7418     TCGv_i32 tmp;
   7419     int width = a->widthm1 + 1;
   7420     int shift = a->lsb;
   7421 
   7422     if (!ENABLE_ARCH_6T2) {
   7423         return false;
   7424     }
   7425     if (shift + width > 32) {
   7426         /* UNPREDICTABLE; we choose to UNDEF */
   7427         unallocated_encoding(s);
   7428         return true;
   7429     }
   7430 
   7431     tmp = load_reg(s, a->rn);
   7432     if (u) {
   7433         tcg_gen_extract_i32(tmp, tmp, shift, width);
   7434     } else {
   7435         tcg_gen_sextract_i32(tmp, tmp, shift, width);
   7436     }
   7437     store_reg(s, a->rd, tmp);
   7438     return true;
   7439 }
   7440 
   7441 static bool trans_SBFX(DisasContext *s, arg_SBFX *a)
   7442 {
   7443     return op_bfx(s, a, false);
   7444 }
   7445 
   7446 static bool trans_UBFX(DisasContext *s, arg_UBFX *a)
   7447 {
   7448     return op_bfx(s, a, true);
   7449 }
   7450 
   7451 static bool trans_BFCI(DisasContext *s, arg_BFCI *a)
   7452 {
   7453     TCGv_i32 tmp;
   7454     int msb = a->msb, lsb = a->lsb;
   7455     int width;
   7456 
   7457     if (!ENABLE_ARCH_6T2) {
   7458         return false;
   7459     }
   7460     if (msb < lsb) {
   7461         /* UNPREDICTABLE; we choose to UNDEF */
   7462         unallocated_encoding(s);
   7463         return true;
   7464     }
   7465 
   7466     width = msb + 1 - lsb;
   7467     if (a->rn == 15) {
   7468         /* BFC */
   7469         tmp = tcg_const_i32(0);
   7470     } else {
   7471         /* BFI */
   7472         tmp = load_reg(s, a->rn);
   7473     }
   7474     if (width != 32) {
   7475         TCGv_i32 tmp2 = load_reg(s, a->rd);
   7476         tcg_gen_deposit_i32(tmp, tmp2, tmp, lsb, width);
   7477         tcg_temp_free_i32(tmp2);
   7478     }
   7479     store_reg(s, a->rd, tmp);
   7480     return true;
   7481 }
   7482 
   7483 static bool trans_UDF(DisasContext *s, arg_UDF *a)
   7484 {
   7485     unallocated_encoding(s);
   7486     return true;
   7487 }
   7488 
   7489 /*
   7490  * Parallel addition and subtraction
   7491  */
   7492 
   7493 static bool op_par_addsub(DisasContext *s, arg_rrr *a,
   7494                           void (*gen)(TCGv_i32, TCGv_i32, TCGv_i32))
   7495 {
   7496     TCGv_i32 t0, t1;
   7497 
   7498     if (s->thumb
   7499         ? !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)
   7500         : !ENABLE_ARCH_6) {
   7501         return false;
   7502     }
   7503 
   7504     t0 = load_reg(s, a->rn);
   7505     t1 = load_reg(s, a->rm);
   7506 
   7507     gen(t0, t0, t1);
   7508 
   7509     tcg_temp_free_i32(t1);
   7510     store_reg(s, a->rd, t0);
   7511     return true;
   7512 }
   7513 
   7514 static bool op_par_addsub_ge(DisasContext *s, arg_rrr *a,
   7515                              void (*gen)(TCGv_i32, TCGv_i32,
   7516                                          TCGv_i32, TCGv_ptr))
   7517 {
   7518     TCGv_i32 t0, t1;
   7519     TCGv_ptr ge;
   7520 
   7521     if (s->thumb
   7522         ? !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)
   7523         : !ENABLE_ARCH_6) {
   7524         return false;
   7525     }
   7526 
   7527     t0 = load_reg(s, a->rn);
   7528     t1 = load_reg(s, a->rm);
   7529 
   7530     ge = tcg_temp_new_ptr();
   7531     tcg_gen_addi_ptr(ge, cpu_env, offsetof(CPUARMState, GE));
   7532     gen(t0, t0, t1, ge);
   7533 
   7534     tcg_temp_free_ptr(ge);
   7535     tcg_temp_free_i32(t1);
   7536     store_reg(s, a->rd, t0);
   7537     return true;
   7538 }
   7539 
   7540 #define DO_PAR_ADDSUB(NAME, helper) \
   7541 static bool trans_##NAME(DisasContext *s, arg_rrr *a)   \
   7542 {                                                       \
   7543     return op_par_addsub(s, a, helper);                 \
   7544 }
   7545 
   7546 #define DO_PAR_ADDSUB_GE(NAME, helper) \
   7547 static bool trans_##NAME(DisasContext *s, arg_rrr *a)   \
   7548 {                                                       \
   7549     return op_par_addsub_ge(s, a, helper);              \
   7550 }
   7551 
   7552 DO_PAR_ADDSUB_GE(SADD16, gen_helper_sadd16)
   7553 DO_PAR_ADDSUB_GE(SASX, gen_helper_saddsubx)
   7554 DO_PAR_ADDSUB_GE(SSAX, gen_helper_ssubaddx)
   7555 DO_PAR_ADDSUB_GE(SSUB16, gen_helper_ssub16)
   7556 DO_PAR_ADDSUB_GE(SADD8, gen_helper_sadd8)
   7557 DO_PAR_ADDSUB_GE(SSUB8, gen_helper_ssub8)
   7558 
   7559 DO_PAR_ADDSUB_GE(UADD16, gen_helper_uadd16)
   7560 DO_PAR_ADDSUB_GE(UASX, gen_helper_uaddsubx)
   7561 DO_PAR_ADDSUB_GE(USAX, gen_helper_usubaddx)
   7562 DO_PAR_ADDSUB_GE(USUB16, gen_helper_usub16)
   7563 DO_PAR_ADDSUB_GE(UADD8, gen_helper_uadd8)
   7564 DO_PAR_ADDSUB_GE(USUB8, gen_helper_usub8)
   7565 
   7566 DO_PAR_ADDSUB(QADD16, gen_helper_qadd16)
   7567 DO_PAR_ADDSUB(QASX, gen_helper_qaddsubx)
   7568 DO_PAR_ADDSUB(QSAX, gen_helper_qsubaddx)
   7569 DO_PAR_ADDSUB(QSUB16, gen_helper_qsub16)
   7570 DO_PAR_ADDSUB(QADD8, gen_helper_qadd8)
   7571 DO_PAR_ADDSUB(QSUB8, gen_helper_qsub8)
   7572 
   7573 DO_PAR_ADDSUB(UQADD16, gen_helper_uqadd16)
   7574 DO_PAR_ADDSUB(UQASX, gen_helper_uqaddsubx)
   7575 DO_PAR_ADDSUB(UQSAX, gen_helper_uqsubaddx)
   7576 DO_PAR_ADDSUB(UQSUB16, gen_helper_uqsub16)
   7577 DO_PAR_ADDSUB(UQADD8, gen_helper_uqadd8)
   7578 DO_PAR_ADDSUB(UQSUB8, gen_helper_uqsub8)
   7579 
   7580 DO_PAR_ADDSUB(SHADD16, gen_helper_shadd16)
   7581 DO_PAR_ADDSUB(SHASX, gen_helper_shaddsubx)
   7582 DO_PAR_ADDSUB(SHSAX, gen_helper_shsubaddx)
   7583 DO_PAR_ADDSUB(SHSUB16, gen_helper_shsub16)
   7584 DO_PAR_ADDSUB(SHADD8, gen_helper_shadd8)
   7585 DO_PAR_ADDSUB(SHSUB8, gen_helper_shsub8)
   7586 
   7587 DO_PAR_ADDSUB(UHADD16, gen_helper_uhadd16)
   7588 DO_PAR_ADDSUB(UHASX, gen_helper_uhaddsubx)
   7589 DO_PAR_ADDSUB(UHSAX, gen_helper_uhsubaddx)
   7590 DO_PAR_ADDSUB(UHSUB16, gen_helper_uhsub16)
   7591 DO_PAR_ADDSUB(UHADD8, gen_helper_uhadd8)
   7592 DO_PAR_ADDSUB(UHSUB8, gen_helper_uhsub8)
   7593 
   7594 #undef DO_PAR_ADDSUB
   7595 #undef DO_PAR_ADDSUB_GE
   7596 
   7597 /*
   7598  * Packing, unpacking, saturation, and reversal
   7599  */
   7600 
   7601 static bool trans_PKH(DisasContext *s, arg_PKH *a)
   7602 {
   7603     TCGv_i32 tn, tm;
   7604     int shift = a->imm;
   7605 
   7606     if (s->thumb
   7607         ? !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)
   7608         : !ENABLE_ARCH_6) {
   7609         return false;
   7610     }
   7611 
   7612     tn = load_reg(s, a->rn);
   7613     tm = load_reg(s, a->rm);
   7614     if (a->tb) {
   7615         /* PKHTB */
   7616         if (shift == 0) {
   7617             shift = 31;
   7618         }
   7619         tcg_gen_sari_i32(tm, tm, shift);
   7620         tcg_gen_deposit_i32(tn, tn, tm, 0, 16);
   7621     } else {
   7622         /* PKHBT */
   7623         tcg_gen_shli_i32(tm, tm, shift);
   7624         tcg_gen_deposit_i32(tn, tm, tn, 0, 16);
   7625     }
   7626     tcg_temp_free_i32(tm);
   7627     store_reg(s, a->rd, tn);
   7628     return true;
   7629 }
   7630 
   7631 static bool op_sat(DisasContext *s, arg_sat *a,
   7632                    void (*gen)(TCGv_i32, TCGv_env, TCGv_i32, TCGv_i32))
   7633 {
   7634     TCGv_i32 tmp;
   7635     int shift = a->imm;
   7636 
   7637     if (!ENABLE_ARCH_6) {
   7638         return false;
   7639     }
   7640 
   7641     tmp = load_reg(s, a->rn);
   7642     if (a->sh) {
   7643         tcg_gen_sari_i32(tmp, tmp, shift ? shift : 31);
   7644     } else {
   7645         tcg_gen_shli_i32(tmp, tmp, shift);
   7646     }
   7647 
   7648     gen(tmp, cpu_env, tmp, tcg_constant_i32(a->satimm));
   7649 
   7650     store_reg(s, a->rd, tmp);
   7651     return true;
   7652 }
   7653 
   7654 static bool trans_SSAT(DisasContext *s, arg_sat *a)
   7655 {
   7656     return op_sat(s, a, gen_helper_ssat);
   7657 }
   7658 
   7659 static bool trans_USAT(DisasContext *s, arg_sat *a)
   7660 {
   7661     return op_sat(s, a, gen_helper_usat);
   7662 }
   7663 
   7664 static bool trans_SSAT16(DisasContext *s, arg_sat *a)
   7665 {
   7666     if (s->thumb && !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)) {
   7667         return false;
   7668     }
   7669     return op_sat(s, a, gen_helper_ssat16);
   7670 }
   7671 
   7672 static bool trans_USAT16(DisasContext *s, arg_sat *a)
   7673 {
   7674     if (s->thumb && !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)) {
   7675         return false;
   7676     }
   7677     return op_sat(s, a, gen_helper_usat16);
   7678 }
   7679 
   7680 static bool op_xta(DisasContext *s, arg_rrr_rot *a,
   7681                    void (*gen_extract)(TCGv_i32, TCGv_i32),
   7682                    void (*gen_add)(TCGv_i32, TCGv_i32, TCGv_i32))
   7683 {
   7684     TCGv_i32 tmp;
   7685 
   7686     if (!ENABLE_ARCH_6) {
   7687         return false;
   7688     }
   7689 
   7690     tmp = load_reg(s, a->rm);
   7691     /*
   7692      * TODO: In many cases we could do a shift instead of a rotate.
   7693      * Combined with a simple extend, that becomes an extract.
   7694      */
   7695     tcg_gen_rotri_i32(tmp, tmp, a->rot * 8);
   7696     gen_extract(tmp, tmp);
   7697 
   7698     if (a->rn != 15) {
   7699         TCGv_i32 tmp2 = load_reg(s, a->rn);
   7700         gen_add(tmp, tmp, tmp2);
   7701         tcg_temp_free_i32(tmp2);
   7702     }
   7703     store_reg(s, a->rd, tmp);
   7704     return true;
   7705 }
   7706 
   7707 static bool trans_SXTAB(DisasContext *s, arg_rrr_rot *a)
   7708 {
   7709     return op_xta(s, a, tcg_gen_ext8s_i32, tcg_gen_add_i32);
   7710 }
   7711 
   7712 static bool trans_SXTAH(DisasContext *s, arg_rrr_rot *a)
   7713 {
   7714     return op_xta(s, a, tcg_gen_ext16s_i32, tcg_gen_add_i32);
   7715 }
   7716 
   7717 static bool trans_SXTAB16(DisasContext *s, arg_rrr_rot *a)
   7718 {
   7719     if (s->thumb && !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)) {
   7720         return false;
   7721     }
   7722     return op_xta(s, a, gen_helper_sxtb16, gen_add16);
   7723 }
   7724 
   7725 static bool trans_UXTAB(DisasContext *s, arg_rrr_rot *a)
   7726 {
   7727     return op_xta(s, a, tcg_gen_ext8u_i32, tcg_gen_add_i32);
   7728 }
   7729 
   7730 static bool trans_UXTAH(DisasContext *s, arg_rrr_rot *a)
   7731 {
   7732     return op_xta(s, a, tcg_gen_ext16u_i32, tcg_gen_add_i32);
   7733 }
   7734 
   7735 static bool trans_UXTAB16(DisasContext *s, arg_rrr_rot *a)
   7736 {
   7737     if (s->thumb && !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)) {
   7738         return false;
   7739     }
   7740     return op_xta(s, a, gen_helper_uxtb16, gen_add16);
   7741 }
   7742 
   7743 static bool trans_SEL(DisasContext *s, arg_rrr *a)
   7744 {
   7745     TCGv_i32 t1, t2, t3;
   7746 
   7747     if (s->thumb
   7748         ? !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)
   7749         : !ENABLE_ARCH_6) {
   7750         return false;
   7751     }
   7752 
   7753     t1 = load_reg(s, a->rn);
   7754     t2 = load_reg(s, a->rm);
   7755     t3 = tcg_temp_new_i32();
   7756     tcg_gen_ld_i32(t3, cpu_env, offsetof(CPUARMState, GE));
   7757     gen_helper_sel_flags(t1, t3, t1, t2);
   7758     tcg_temp_free_i32(t3);
   7759     tcg_temp_free_i32(t2);
   7760     store_reg(s, a->rd, t1);
   7761     return true;
   7762 }
   7763 
   7764 static bool op_rr(DisasContext *s, arg_rr *a,
   7765                   void (*gen)(TCGv_i32, TCGv_i32))
   7766 {
   7767     TCGv_i32 tmp;
   7768 
   7769     tmp = load_reg(s, a->rm);
   7770     gen(tmp, tmp);
   7771     store_reg(s, a->rd, tmp);
   7772     return true;
   7773 }
   7774 
   7775 static bool trans_REV(DisasContext *s, arg_rr *a)
   7776 {
   7777     if (!ENABLE_ARCH_6) {
   7778         return false;
   7779     }
   7780     return op_rr(s, a, tcg_gen_bswap32_i32);
   7781 }
   7782 
   7783 static bool trans_REV16(DisasContext *s, arg_rr *a)
   7784 {
   7785     if (!ENABLE_ARCH_6) {
   7786         return false;
   7787     }
   7788     return op_rr(s, a, gen_rev16);
   7789 }
   7790 
   7791 static bool trans_REVSH(DisasContext *s, arg_rr *a)
   7792 {
   7793     if (!ENABLE_ARCH_6) {
   7794         return false;
   7795     }
   7796     return op_rr(s, a, gen_revsh);
   7797 }
   7798 
   7799 static bool trans_RBIT(DisasContext *s, arg_rr *a)
   7800 {
   7801     if (!ENABLE_ARCH_6T2) {
   7802         return false;
   7803     }
   7804     return op_rr(s, a, gen_helper_rbit);
   7805 }
   7806 
   7807 /*
   7808  * Signed multiply, signed and unsigned divide
   7809  */
   7810 
   7811 static bool op_smlad(DisasContext *s, arg_rrrr *a, bool m_swap, bool sub)
   7812 {
   7813     TCGv_i32 t1, t2;
   7814 
   7815     if (!ENABLE_ARCH_6) {
   7816         return false;
   7817     }
   7818 
   7819     t1 = load_reg(s, a->rn);
   7820     t2 = load_reg(s, a->rm);
   7821     if (m_swap) {
   7822         gen_swap_half(t2, t2);
   7823     }
   7824     gen_smul_dual(t1, t2);
   7825 
   7826     if (sub) {
   7827         /*
   7828          * This subtraction cannot overflow, so we can do a simple
   7829          * 32-bit subtraction and then a possible 32-bit saturating
   7830          * addition of Ra.
   7831          */
   7832         tcg_gen_sub_i32(t1, t1, t2);
   7833         tcg_temp_free_i32(t2);
   7834 
   7835         if (a->ra != 15) {
   7836             t2 = load_reg(s, a->ra);
   7837             gen_helper_add_setq(t1, cpu_env, t1, t2);
   7838             tcg_temp_free_i32(t2);
   7839         }
   7840     } else if (a->ra == 15) {
   7841         /* Single saturation-checking addition */
   7842         gen_helper_add_setq(t1, cpu_env, t1, t2);
   7843         tcg_temp_free_i32(t2);
   7844     } else {
   7845         /*
   7846          * We need to add the products and Ra together and then
   7847          * determine whether the final result overflowed. Doing
   7848          * this as two separate add-and-check-overflow steps incorrectly
   7849          * sets Q for cases like (-32768 * -32768) + (-32768 * -32768) + -1.
   7850          * Do all the arithmetic at 64-bits and then check for overflow.
   7851          */
   7852         TCGv_i64 p64, q64;
   7853         TCGv_i32 t3, qf, one;
   7854 
   7855         p64 = tcg_temp_new_i64();
   7856         q64 = tcg_temp_new_i64();
   7857         tcg_gen_ext_i32_i64(p64, t1);
   7858         tcg_gen_ext_i32_i64(q64, t2);
   7859         tcg_gen_add_i64(p64, p64, q64);
   7860         load_reg_var(s, t2, a->ra);
   7861         tcg_gen_ext_i32_i64(q64, t2);
   7862         tcg_gen_add_i64(p64, p64, q64);
   7863         tcg_temp_free_i64(q64);
   7864 
   7865         tcg_gen_extr_i64_i32(t1, t2, p64);
   7866         tcg_temp_free_i64(p64);
   7867         /*
   7868          * t1 is the low half of the result which goes into Rd.
   7869          * We have overflow and must set Q if the high half (t2)
   7870          * is different from the sign-extension of t1.
   7871          */
   7872         t3 = tcg_temp_new_i32();
   7873         tcg_gen_sari_i32(t3, t1, 31);
   7874         qf = load_cpu_field(QF);
   7875         one = tcg_constant_i32(1);
   7876         tcg_gen_movcond_i32(TCG_COND_NE, qf, t2, t3, one, qf);
   7877         store_cpu_field(qf, QF);
   7878         tcg_temp_free_i32(t3);
   7879         tcg_temp_free_i32(t2);
   7880     }
   7881     store_reg(s, a->rd, t1);
   7882     return true;
   7883 }
   7884 
   7885 static bool trans_SMLAD(DisasContext *s, arg_rrrr *a)
   7886 {
   7887     return op_smlad(s, a, false, false);
   7888 }
   7889 
   7890 static bool trans_SMLADX(DisasContext *s, arg_rrrr *a)
   7891 {
   7892     return op_smlad(s, a, true, false);
   7893 }
   7894 
   7895 static bool trans_SMLSD(DisasContext *s, arg_rrrr *a)
   7896 {
   7897     return op_smlad(s, a, false, true);
   7898 }
   7899 
   7900 static bool trans_SMLSDX(DisasContext *s, arg_rrrr *a)
   7901 {
   7902     return op_smlad(s, a, true, true);
   7903 }
   7904 
   7905 static bool op_smlald(DisasContext *s, arg_rrrr *a, bool m_swap, bool sub)
   7906 {
   7907     TCGv_i32 t1, t2;
   7908     TCGv_i64 l1, l2;
   7909 
   7910     if (!ENABLE_ARCH_6) {
   7911         return false;
   7912     }
   7913 
   7914     t1 = load_reg(s, a->rn);
   7915     t2 = load_reg(s, a->rm);
   7916     if (m_swap) {
   7917         gen_swap_half(t2, t2);
   7918     }
   7919     gen_smul_dual(t1, t2);
   7920 
   7921     l1 = tcg_temp_new_i64();
   7922     l2 = tcg_temp_new_i64();
   7923     tcg_gen_ext_i32_i64(l1, t1);
   7924     tcg_gen_ext_i32_i64(l2, t2);
   7925     tcg_temp_free_i32(t1);
   7926     tcg_temp_free_i32(t2);
   7927 
   7928     if (sub) {
   7929         tcg_gen_sub_i64(l1, l1, l2);
   7930     } else {
   7931         tcg_gen_add_i64(l1, l1, l2);
   7932     }
   7933     tcg_temp_free_i64(l2);
   7934 
   7935     gen_addq(s, l1, a->ra, a->rd);
   7936     gen_storeq_reg(s, a->ra, a->rd, l1);
   7937     tcg_temp_free_i64(l1);
   7938     return true;
   7939 }
   7940 
   7941 static bool trans_SMLALD(DisasContext *s, arg_rrrr *a)
   7942 {
   7943     return op_smlald(s, a, false, false);
   7944 }
   7945 
   7946 static bool trans_SMLALDX(DisasContext *s, arg_rrrr *a)
   7947 {
   7948     return op_smlald(s, a, true, false);
   7949 }
   7950 
   7951 static bool trans_SMLSLD(DisasContext *s, arg_rrrr *a)
   7952 {
   7953     return op_smlald(s, a, false, true);
   7954 }
   7955 
   7956 static bool trans_SMLSLDX(DisasContext *s, arg_rrrr *a)
   7957 {
   7958     return op_smlald(s, a, true, true);
   7959 }
   7960 
   7961 static bool op_smmla(DisasContext *s, arg_rrrr *a, bool round, bool sub)
   7962 {
   7963     TCGv_i32 t1, t2;
   7964 
   7965     if (s->thumb
   7966         ? !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)
   7967         : !ENABLE_ARCH_6) {
   7968         return false;
   7969     }
   7970 
   7971     t1 = load_reg(s, a->rn);
   7972     t2 = load_reg(s, a->rm);
   7973     tcg_gen_muls2_i32(t2, t1, t1, t2);
   7974 
   7975     if (a->ra != 15) {
   7976         TCGv_i32 t3 = load_reg(s, a->ra);
   7977         if (sub) {
   7978             /*
   7979              * For SMMLS, we need a 64-bit subtract.  Borrow caused by
   7980              * a non-zero multiplicand lowpart, and the correct result
   7981              * lowpart for rounding.
   7982              */
   7983             tcg_gen_sub2_i32(t2, t1, tcg_constant_i32(0), t3, t2, t1);
   7984         } else {
   7985             tcg_gen_add_i32(t1, t1, t3);
   7986         }
   7987         tcg_temp_free_i32(t3);
   7988     }
   7989     if (round) {
   7990         /*
   7991          * Adding 0x80000000 to the 64-bit quantity means that we have
   7992          * carry in to the high word when the low word has the msb set.
   7993          */
   7994         tcg_gen_shri_i32(t2, t2, 31);
   7995         tcg_gen_add_i32(t1, t1, t2);
   7996     }
   7997     tcg_temp_free_i32(t2);
   7998     store_reg(s, a->rd, t1);
   7999     return true;
   8000 }
   8001 
   8002 static bool trans_SMMLA(DisasContext *s, arg_rrrr *a)
   8003 {
   8004     return op_smmla(s, a, false, false);
   8005 }
   8006 
   8007 static bool trans_SMMLAR(DisasContext *s, arg_rrrr *a)
   8008 {
   8009     return op_smmla(s, a, true, false);
   8010 }
   8011 
   8012 static bool trans_SMMLS(DisasContext *s, arg_rrrr *a)
   8013 {
   8014     return op_smmla(s, a, false, true);
   8015 }
   8016 
   8017 static bool trans_SMMLSR(DisasContext *s, arg_rrrr *a)
   8018 {
   8019     return op_smmla(s, a, true, true);
   8020 }
   8021 
   8022 static bool op_div(DisasContext *s, arg_rrr *a, bool u)
   8023 {
   8024     TCGv_i32 t1, t2;
   8025 
   8026     if (s->thumb
   8027         ? !dc_isar_feature(aa32_thumb_div, s)
   8028         : !dc_isar_feature(aa32_arm_div, s)) {
   8029         return false;
   8030     }
   8031 
   8032     t1 = load_reg(s, a->rn);
   8033     t2 = load_reg(s, a->rm);
   8034     if (u) {
   8035         gen_helper_udiv(t1, cpu_env, t1, t2);
   8036     } else {
   8037         gen_helper_sdiv(t1, cpu_env, t1, t2);
   8038     }
   8039     tcg_temp_free_i32(t2);
   8040     store_reg(s, a->rd, t1);
   8041     return true;
   8042 }
   8043 
   8044 static bool trans_SDIV(DisasContext *s, arg_rrr *a)
   8045 {
   8046     return op_div(s, a, false);
   8047 }
   8048 
   8049 static bool trans_UDIV(DisasContext *s, arg_rrr *a)
   8050 {
   8051     return op_div(s, a, true);
   8052 }
   8053 
   8054 /*
   8055  * Block data transfer
   8056  */
   8057 
   8058 static TCGv_i32 op_addr_block_pre(DisasContext *s, arg_ldst_block *a, int n)
   8059 {
   8060     TCGv_i32 addr = load_reg(s, a->rn);
   8061 
   8062     if (a->b) {
   8063         if (a->i) {
   8064             /* pre increment */
   8065             tcg_gen_addi_i32(addr, addr, 4);
   8066         } else {
   8067             /* pre decrement */
   8068             tcg_gen_addi_i32(addr, addr, -(n * 4));
   8069         }
   8070     } else if (!a->i && n != 1) {
   8071         /* post decrement */
   8072         tcg_gen_addi_i32(addr, addr, -((n - 1) * 4));
   8073     }
   8074 
   8075     if (s->v8m_stackcheck && a->rn == 13 && a->w) {
   8076         /*
   8077          * If the writeback is incrementing SP rather than
   8078          * decrementing it, and the initial SP is below the
   8079          * stack limit but the final written-back SP would
   8080          * be above, then we must not perform any memory
   8081          * accesses, but it is IMPDEF whether we generate
   8082          * an exception. We choose to do so in this case.
   8083          * At this point 'addr' is the lowest address, so
   8084          * either the original SP (if incrementing) or our
   8085          * final SP (if decrementing), so that's what we check.
   8086          */
   8087         gen_helper_v8m_stackcheck(cpu_env, addr);
   8088     }
   8089 
   8090     return addr;
   8091 }
   8092 
   8093 static void op_addr_block_post(DisasContext *s, arg_ldst_block *a,
   8094                                TCGv_i32 addr, int n)
   8095 {
   8096     if (a->w) {
   8097         /* write back */
   8098         if (!a->b) {
   8099             if (a->i) {
   8100                 /* post increment */
   8101                 tcg_gen_addi_i32(addr, addr, 4);
   8102             } else {
   8103                 /* post decrement */
   8104                 tcg_gen_addi_i32(addr, addr, -(n * 4));
   8105             }
   8106         } else if (!a->i && n != 1) {
   8107             /* pre decrement */
   8108             tcg_gen_addi_i32(addr, addr, -((n - 1) * 4));
   8109         }
   8110         store_reg(s, a->rn, addr);
   8111     } else {
   8112         tcg_temp_free_i32(addr);
   8113     }
   8114 }
   8115 
   8116 static bool op_stm(DisasContext *s, arg_ldst_block *a, int min_n)
   8117 {
   8118     int i, j, n, list, mem_idx;
   8119     bool user = a->u;
   8120     TCGv_i32 addr, tmp;
   8121 
   8122     if (user) {
   8123         /* STM (user) */
   8124         if (IS_USER(s)) {
   8125             /* Only usable in supervisor mode.  */
   8126             unallocated_encoding(s);
   8127             return true;
   8128         }
   8129     }
   8130 
   8131     list = a->list;
   8132     n = ctpop16(list);
   8133     if (n < min_n || a->rn == 15) {
   8134         unallocated_encoding(s);
   8135         return true;
   8136     }
   8137 
   8138     s->eci_handled = true;
   8139 
   8140     addr = op_addr_block_pre(s, a, n);
   8141     mem_idx = get_mem_index(s);
   8142 
   8143     for (i = j = 0; i < 16; i++) {
   8144         if (!(list & (1 << i))) {
   8145             continue;
   8146         }
   8147 
   8148         if (user && i != 15) {
   8149             tmp = tcg_temp_new_i32();
   8150             gen_helper_get_user_reg(tmp, cpu_env, tcg_constant_i32(i));
   8151         } else {
   8152             tmp = load_reg(s, i);
   8153         }
   8154         gen_aa32_st_i32(s, tmp, addr, mem_idx, MO_UL | MO_ALIGN);
   8155         tcg_temp_free_i32(tmp);
   8156 
   8157         /* No need to add after the last transfer.  */
   8158         if (++j != n) {
   8159             tcg_gen_addi_i32(addr, addr, 4);
   8160         }
   8161     }
   8162 
   8163     op_addr_block_post(s, a, addr, n);
   8164     clear_eci_state(s);
   8165     return true;
   8166 }
   8167 
   8168 static bool trans_STM(DisasContext *s, arg_ldst_block *a)
   8169 {
   8170     /* BitCount(list) < 1 is UNPREDICTABLE */
   8171     return op_stm(s, a, 1);
   8172 }
   8173 
   8174 static bool trans_STM_t32(DisasContext *s, arg_ldst_block *a)
   8175 {
   8176     /* Writeback register in register list is UNPREDICTABLE for T32.  */
   8177     if (a->w && (a->list & (1 << a->rn))) {
   8178         unallocated_encoding(s);
   8179         return true;
   8180     }
   8181     /* BitCount(list) < 2 is UNPREDICTABLE */
   8182     return op_stm(s, a, 2);
   8183 }
   8184 
   8185 static bool do_ldm(DisasContext *s, arg_ldst_block *a, int min_n)
   8186 {
   8187     int i, j, n, list, mem_idx;
   8188     bool loaded_base;
   8189     bool user = a->u;
   8190     bool exc_return = false;
   8191     TCGv_i32 addr, tmp, loaded_var;
   8192 
   8193     if (user) {
   8194         /* LDM (user), LDM (exception return) */
   8195         if (IS_USER(s)) {
   8196             /* Only usable in supervisor mode.  */
   8197             unallocated_encoding(s);
   8198             return true;
   8199         }
   8200         if (extract32(a->list, 15, 1)) {
   8201             exc_return = true;
   8202             user = false;
   8203         } else {
   8204             /* LDM (user) does not allow writeback.  */
   8205             if (a->w) {
   8206                 unallocated_encoding(s);
   8207                 return true;
   8208             }
   8209         }
   8210     }
   8211 
   8212     list = a->list;
   8213     n = ctpop16(list);
   8214     if (n < min_n || a->rn == 15) {
   8215         unallocated_encoding(s);
   8216         return true;
   8217     }
   8218 
   8219     s->eci_handled = true;
   8220 
   8221     addr = op_addr_block_pre(s, a, n);
   8222     mem_idx = get_mem_index(s);
   8223     loaded_base = false;
   8224     loaded_var = NULL;
   8225 
   8226     for (i = j = 0; i < 16; i++) {
   8227         if (!(list & (1 << i))) {
   8228             continue;
   8229         }
   8230 
   8231         tmp = tcg_temp_new_i32();
   8232         gen_aa32_ld_i32(s, tmp, addr, mem_idx, MO_UL | MO_ALIGN);
   8233         if (user) {
   8234             gen_helper_set_user_reg(cpu_env, tcg_constant_i32(i), tmp);
   8235             tcg_temp_free_i32(tmp);
   8236         } else if (i == a->rn) {
   8237             loaded_var = tmp;
   8238             loaded_base = true;
   8239         } else if (i == 15 && exc_return) {
   8240             store_pc_exc_ret(s, tmp);
   8241         } else {
   8242             store_reg_from_load(s, i, tmp);
   8243         }
   8244 
   8245         /* No need to add after the last transfer.  */
   8246         if (++j != n) {
   8247             tcg_gen_addi_i32(addr, addr, 4);
   8248         }
   8249     }
   8250 
   8251     op_addr_block_post(s, a, addr, n);
   8252 
   8253     if (loaded_base) {
   8254         /* Note that we reject base == pc above.  */
   8255         store_reg(s, a->rn, loaded_var);
   8256     }
   8257 
   8258     if (exc_return) {
   8259         /* Restore CPSR from SPSR.  */
   8260         tmp = load_cpu_field(spsr);
   8261         if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
   8262             gen_io_start();
   8263         }
   8264         gen_helper_cpsr_write_eret(cpu_env, tmp);
   8265         tcg_temp_free_i32(tmp);
   8266         /* Must exit loop to check un-masked IRQs */
   8267         s->base.is_jmp = DISAS_EXIT;
   8268     }
   8269     clear_eci_state(s);
   8270     return true;
   8271 }
   8272 
   8273 static bool trans_LDM_a32(DisasContext *s, arg_ldst_block *a)
   8274 {
   8275     /*
   8276      * Writeback register in register list is UNPREDICTABLE
   8277      * for ArchVersion() >= 7.  Prior to v7, A32 would write
   8278      * an UNKNOWN value to the base register.
   8279      */
   8280     if (ENABLE_ARCH_7 && a->w && (a->list & (1 << a->rn))) {
   8281         unallocated_encoding(s);
   8282         return true;
   8283     }
   8284     /* BitCount(list) < 1 is UNPREDICTABLE */
   8285     return do_ldm(s, a, 1);
   8286 }
   8287 
   8288 static bool trans_LDM_t32(DisasContext *s, arg_ldst_block *a)
   8289 {
   8290     /* Writeback register in register list is UNPREDICTABLE for T32. */
   8291     if (a->w && (a->list & (1 << a->rn))) {
   8292         unallocated_encoding(s);
   8293         return true;
   8294     }
   8295     /* BitCount(list) < 2 is UNPREDICTABLE */
   8296     return do_ldm(s, a, 2);
   8297 }
   8298 
   8299 static bool trans_LDM_t16(DisasContext *s, arg_ldst_block *a)
   8300 {
   8301     /* Writeback is conditional on the base register not being loaded.  */
   8302     a->w = !(a->list & (1 << a->rn));
   8303     /* BitCount(list) < 1 is UNPREDICTABLE */
   8304     return do_ldm(s, a, 1);
   8305 }
   8306 
   8307 static bool trans_CLRM(DisasContext *s, arg_CLRM *a)
   8308 {
   8309     int i;
   8310     TCGv_i32 zero;
   8311 
   8312     if (!dc_isar_feature(aa32_m_sec_state, s)) {
   8313         return false;
   8314     }
   8315 
   8316     if (extract32(a->list, 13, 1)) {
   8317         return false;
   8318     }
   8319 
   8320     if (!a->list) {
   8321         /* UNPREDICTABLE; we choose to UNDEF */
   8322         return false;
   8323     }
   8324 
   8325     s->eci_handled = true;
   8326 
   8327     zero = tcg_constant_i32(0);
   8328     for (i = 0; i < 15; i++) {
   8329         if (extract32(a->list, i, 1)) {
   8330             /* Clear R[i] */
   8331             tcg_gen_mov_i32(cpu_R[i], zero);
   8332         }
   8333     }
   8334     if (extract32(a->list, 15, 1)) {
   8335         /*
   8336          * Clear APSR (by calling the MSR helper with the same argument
   8337          * as for "MSR APSR_nzcvqg, Rn": mask = 0b1100, SYSM=0)
   8338          */
   8339         gen_helper_v7m_msr(cpu_env, tcg_constant_i32(0xc00), zero);
   8340     }
   8341     clear_eci_state(s);
   8342     return true;
   8343 }
   8344 
   8345 /*
   8346  * Branch, branch with link
   8347  */
   8348 
   8349 static bool trans_B(DisasContext *s, arg_i *a)
   8350 {
   8351     gen_jmp(s, jmp_diff(s, a->imm));
   8352     return true;
   8353 }
   8354 
   8355 static bool trans_B_cond_thumb(DisasContext *s, arg_ci *a)
   8356 {
   8357     /* This has cond from encoding, required to be outside IT block.  */
   8358     if (a->cond >= 0xe) {
   8359         return false;
   8360     }
   8361     if (s->condexec_mask) {
   8362         unallocated_encoding(s);
   8363         return true;
   8364     }
   8365     arm_skip_unless(s, a->cond);
   8366     gen_jmp(s, jmp_diff(s, a->imm));
   8367     return true;
   8368 }
   8369 
   8370 static bool trans_BL(DisasContext *s, arg_i *a)
   8371 {
   8372     gen_pc_plus_diff(s, cpu_R[14], curr_insn_len(s) | s->thumb);
   8373     gen_jmp(s, jmp_diff(s, a->imm));
   8374     return true;
   8375 }
   8376 
   8377 static bool trans_BLX_i(DisasContext *s, arg_BLX_i *a)
   8378 {
   8379     /*
   8380      * BLX <imm> would be useless on M-profile; the encoding space
   8381      * is used for other insns from v8.1M onward, and UNDEFs before that.
   8382      */
   8383     if (arm_dc_feature(s, ARM_FEATURE_M)) {
   8384         return false;
   8385     }
   8386 
   8387     /* For A32, ARM_FEATURE_V5 is checked near the start of the uncond block. */
   8388     if (s->thumb && (a->imm & 2)) {
   8389         return false;
   8390     }
   8391     gen_pc_plus_diff(s, cpu_R[14], curr_insn_len(s) | s->thumb);
   8392     store_cpu_field_constant(!s->thumb, thumb);
   8393     /* This jump is computed from an aligned PC: subtract off the low bits. */
   8394     gen_jmp(s, jmp_diff(s, a->imm - (s->pc_curr & 3)));
   8395     return true;
   8396 }
   8397 
   8398 static bool trans_BL_BLX_prefix(DisasContext *s, arg_BL_BLX_prefix *a)
   8399 {
   8400     assert(!arm_dc_feature(s, ARM_FEATURE_THUMB2));
   8401     gen_pc_plus_diff(s, cpu_R[14], jmp_diff(s, a->imm << 12));
   8402     return true;
   8403 }
   8404 
   8405 static bool trans_BL_suffix(DisasContext *s, arg_BL_suffix *a)
   8406 {
   8407     TCGv_i32 tmp = tcg_temp_new_i32();
   8408 
   8409     assert(!arm_dc_feature(s, ARM_FEATURE_THUMB2));
   8410     tcg_gen_addi_i32(tmp, cpu_R[14], (a->imm << 1) | 1);
   8411     gen_pc_plus_diff(s, cpu_R[14], curr_insn_len(s) | 1);
   8412     gen_bx(s, tmp);
   8413     return true;
   8414 }
   8415 
   8416 static bool trans_BLX_suffix(DisasContext *s, arg_BLX_suffix *a)
   8417 {
   8418     TCGv_i32 tmp;
   8419 
   8420     assert(!arm_dc_feature(s, ARM_FEATURE_THUMB2));
   8421     if (!ENABLE_ARCH_5) {
   8422         return false;
   8423     }
   8424     tmp = tcg_temp_new_i32();
   8425     tcg_gen_addi_i32(tmp, cpu_R[14], a->imm << 1);
   8426     tcg_gen_andi_i32(tmp, tmp, 0xfffffffc);
   8427     gen_pc_plus_diff(s, cpu_R[14], curr_insn_len(s) | 1);
   8428     gen_bx(s, tmp);
   8429     return true;
   8430 }
   8431 
   8432 static bool trans_BF(DisasContext *s, arg_BF *a)
   8433 {
   8434     /*
   8435      * M-profile branch future insns. The architecture permits an
   8436      * implementation to implement these as NOPs (equivalent to
   8437      * discarding the LO_BRANCH_INFO cache immediately), and we
   8438      * take that IMPDEF option because for QEMU a "real" implementation
   8439      * would be complicated and wouldn't execute any faster.
   8440      */
   8441     if (!dc_isar_feature(aa32_lob, s)) {
   8442         return false;
   8443     }
   8444     if (a->boff == 0) {
   8445         /* SEE "Related encodings" (loop insns) */
   8446         return false;
   8447     }
   8448     /* Handle as NOP */
   8449     return true;
   8450 }
   8451 
   8452 static bool trans_DLS(DisasContext *s, arg_DLS *a)
   8453 {
   8454     /* M-profile low-overhead loop start */
   8455     TCGv_i32 tmp;
   8456 
   8457     if (!dc_isar_feature(aa32_lob, s)) {
   8458         return false;
   8459     }
   8460     if (a->rn == 13 || a->rn == 15) {
   8461         /*
   8462          * For DLSTP rn == 15 is a related encoding (LCTP); the
   8463          * other cases caught by this condition are all
   8464          * CONSTRAINED UNPREDICTABLE: we choose to UNDEF
   8465          */
   8466         return false;
   8467     }
   8468 
   8469     if (a->size != 4) {
   8470         /* DLSTP */
   8471         if (!dc_isar_feature(aa32_mve, s)) {
   8472             return false;
   8473         }
   8474         if (!vfp_access_check(s)) {
   8475             return true;
   8476         }
   8477     }
   8478 
   8479     /* Not a while loop: set LR to the count, and set LTPSIZE for DLSTP */
   8480     tmp = load_reg(s, a->rn);
   8481     store_reg(s, 14, tmp);
   8482     if (a->size != 4) {
   8483         /* DLSTP: set FPSCR.LTPSIZE */
   8484         store_cpu_field(tcg_constant_i32(a->size), v7m.ltpsize);
   8485         s->base.is_jmp = DISAS_UPDATE_NOCHAIN;
   8486     }
   8487     return true;
   8488 }
   8489 
   8490 static bool trans_WLS(DisasContext *s, arg_WLS *a)
   8491 {
   8492     /* M-profile low-overhead while-loop start */
   8493     TCGv_i32 tmp;
   8494     DisasLabel nextlabel;
   8495 
   8496     if (!dc_isar_feature(aa32_lob, s)) {
   8497         return false;
   8498     }
   8499     if (a->rn == 13 || a->rn == 15) {
   8500         /*
   8501          * For WLSTP rn == 15 is a related encoding (LE); the
   8502          * other cases caught by this condition are all
   8503          * CONSTRAINED UNPREDICTABLE: we choose to UNDEF
   8504          */
   8505         return false;
   8506     }
   8507     if (s->condexec_mask) {
   8508         /*
   8509          * WLS in an IT block is CONSTRAINED UNPREDICTABLE;
   8510          * we choose to UNDEF, because otherwise our use of
   8511          * gen_goto_tb(1) would clash with the use of TB exit 1
   8512          * in the dc->condjmp condition-failed codepath in
   8513          * arm_tr_tb_stop() and we'd get an assertion.
   8514          */
   8515         return false;
   8516     }
   8517     if (a->size != 4) {
   8518         /* WLSTP */
   8519         if (!dc_isar_feature(aa32_mve, s)) {
   8520             return false;
   8521         }
   8522         /*
   8523          * We need to check that the FPU is enabled here, but mustn't
   8524          * call vfp_access_check() to do that because we don't want to
   8525          * do the lazy state preservation in the "loop count is zero" case.
   8526          * Do the check-and-raise-exception by hand.
   8527          */
   8528         if (s->fp_excp_el) {
   8529             gen_exception_insn_el(s, 0, EXCP_NOCP,
   8530                                   syn_uncategorized(), s->fp_excp_el);
   8531             return true;
   8532         }
   8533     }
   8534 
   8535     nextlabel = gen_disas_label(s);
   8536     tcg_gen_brcondi_i32(TCG_COND_EQ, cpu_R[a->rn], 0, nextlabel.label);
   8537     tmp = load_reg(s, a->rn);
   8538     store_reg(s, 14, tmp);
   8539     if (a->size != 4) {
   8540         /*
   8541          * WLSTP: set FPSCR.LTPSIZE. This requires that we do the
   8542          * lazy state preservation, new FP context creation, etc,
   8543          * that vfp_access_check() does. We know that the actual
   8544          * access check will succeed (ie it won't generate code that
   8545          * throws an exception) because we did that check by hand earlier.
   8546          */
   8547         bool ok = vfp_access_check(s);
   8548         assert(ok);
   8549         store_cpu_field(tcg_constant_i32(a->size), v7m.ltpsize);
   8550         /*
   8551          * LTPSIZE updated, but MVE_NO_PRED will always be the same thing (0)
   8552          * when we take this upcoming exit from this TB, so gen_jmp_tb() is OK.
   8553          */
   8554     }
   8555     gen_jmp_tb(s, curr_insn_len(s), 1);
   8556 
   8557     set_disas_label(s, nextlabel);
   8558     gen_jmp(s, jmp_diff(s, a->imm));
   8559     return true;
   8560 }
   8561 
   8562 static bool trans_LE(DisasContext *s, arg_LE *a)
   8563 {
   8564     /*
   8565      * M-profile low-overhead loop end. The architecture permits an
   8566      * implementation to discard the LO_BRANCH_INFO cache at any time,
   8567      * and we take the IMPDEF option to never set it in the first place
   8568      * (equivalent to always discarding it immediately), because for QEMU
   8569      * a "real" implementation would be complicated and wouldn't execute
   8570      * any faster.
   8571      */
   8572     TCGv_i32 tmp;
   8573     DisasLabel loopend;
   8574     bool fpu_active;
   8575 
   8576     if (!dc_isar_feature(aa32_lob, s)) {
   8577         return false;
   8578     }
   8579     if (a->f && a->tp) {
   8580         return false;
   8581     }
   8582     if (s->condexec_mask) {
   8583         /*
   8584          * LE in an IT block is CONSTRAINED UNPREDICTABLE;
   8585          * we choose to UNDEF, because otherwise our use of
   8586          * gen_goto_tb(1) would clash with the use of TB exit 1
   8587          * in the dc->condjmp condition-failed codepath in
   8588          * arm_tr_tb_stop() and we'd get an assertion.
   8589          */
   8590         return false;
   8591     }
   8592     if (a->tp) {
   8593         /* LETP */
   8594         if (!dc_isar_feature(aa32_mve, s)) {
   8595             return false;
   8596         }
   8597         if (!vfp_access_check(s)) {
   8598             s->eci_handled = true;
   8599             return true;
   8600         }
   8601     }
   8602 
   8603     /* LE/LETP is OK with ECI set and leaves it untouched */
   8604     s->eci_handled = true;
   8605 
   8606     /*
   8607      * With MVE, LTPSIZE might not be 4, and we must emit an INVSTATE
   8608      * UsageFault exception for the LE insn in that case. Note that we
   8609      * are not directly checking FPSCR.LTPSIZE but instead check the
   8610      * pseudocode LTPSIZE() function, which returns 4 if the FPU is
   8611      * not currently active (ie ActiveFPState() returns false). We
   8612      * can identify not-active purely from our TB state flags, as the
   8613      * FPU is active only if:
   8614      *  the FPU is enabled
   8615      *  AND lazy state preservation is not active
   8616      *  AND we do not need a new fp context (this is the ASPEN/FPCA check)
   8617      *
   8618      * Usually we don't need to care about this distinction between
   8619      * LTPSIZE and FPSCR.LTPSIZE, because the code in vfp_access_check()
   8620      * will either take an exception or clear the conditions that make
   8621      * the FPU not active. But LE is an unusual case of a non-FP insn
   8622      * that looks at LTPSIZE.
   8623      */
   8624     fpu_active = !s->fp_excp_el && !s->v7m_lspact && !s->v7m_new_fp_ctxt_needed;
   8625 
   8626     if (!a->tp && dc_isar_feature(aa32_mve, s) && fpu_active) {
   8627         /* Need to do a runtime check for LTPSIZE != 4 */
   8628         DisasLabel skipexc = gen_disas_label(s);
   8629         tmp = load_cpu_field(v7m.ltpsize);
   8630         tcg_gen_brcondi_i32(TCG_COND_EQ, tmp, 4, skipexc.label);
   8631         tcg_temp_free_i32(tmp);
   8632         gen_exception_insn(s, 0, EXCP_INVSTATE, syn_uncategorized());
   8633         set_disas_label(s, skipexc);
   8634     }
   8635 
   8636     if (a->f) {
   8637         /* Loop-forever: just jump back to the loop start */
   8638         gen_jmp(s, jmp_diff(s, -a->imm));
   8639         return true;
   8640     }
   8641 
   8642     /*
   8643      * Not loop-forever. If LR <= loop-decrement-value this is the last loop.
   8644      * For LE, we know at this point that LTPSIZE must be 4 and the
   8645      * loop decrement value is 1. For LETP we need to calculate the decrement
   8646      * value from LTPSIZE.
   8647      */
   8648     loopend = gen_disas_label(s);
   8649     if (!a->tp) {
   8650         tcg_gen_brcondi_i32(TCG_COND_LEU, cpu_R[14], 1, loopend.label);
   8651         tcg_gen_addi_i32(cpu_R[14], cpu_R[14], -1);
   8652     } else {
   8653         /*
   8654          * Decrement by 1 << (4 - LTPSIZE). We need to use a TCG local
   8655          * so that decr stays live after the brcondi.
   8656          */
   8657         TCGv_i32 decr = tcg_temp_local_new_i32();
   8658         TCGv_i32 ltpsize = load_cpu_field(v7m.ltpsize);
   8659         tcg_gen_sub_i32(decr, tcg_constant_i32(4), ltpsize);
   8660         tcg_gen_shl_i32(decr, tcg_constant_i32(1), decr);
   8661         tcg_temp_free_i32(ltpsize);
   8662 
   8663         tcg_gen_brcond_i32(TCG_COND_LEU, cpu_R[14], decr, loopend.label);
   8664 
   8665         tcg_gen_sub_i32(cpu_R[14], cpu_R[14], decr);
   8666         tcg_temp_free_i32(decr);
   8667     }
   8668     /* Jump back to the loop start */
   8669     gen_jmp(s, jmp_diff(s, -a->imm));
   8670 
   8671     set_disas_label(s, loopend);
   8672     if (a->tp) {
   8673         /* Exits from tail-pred loops must reset LTPSIZE to 4 */
   8674         store_cpu_field(tcg_constant_i32(4), v7m.ltpsize);
   8675     }
   8676     /* End TB, continuing to following insn */
   8677     gen_jmp_tb(s, curr_insn_len(s), 1);
   8678     return true;
   8679 }
   8680 
   8681 static bool trans_LCTP(DisasContext *s, arg_LCTP *a)
   8682 {
   8683     /*
   8684      * M-profile Loop Clear with Tail Predication. Since our implementation
   8685      * doesn't cache branch information, all we need to do is reset
   8686      * FPSCR.LTPSIZE to 4.
   8687      */
   8688 
   8689     if (!dc_isar_feature(aa32_lob, s) ||
   8690         !dc_isar_feature(aa32_mve, s)) {
   8691         return false;
   8692     }
   8693 
   8694     if (!vfp_access_check(s)) {
   8695         return true;
   8696     }
   8697 
   8698     store_cpu_field_constant(4, v7m.ltpsize);
   8699     return true;
   8700 }
   8701 
   8702 static bool trans_VCTP(DisasContext *s, arg_VCTP *a)
   8703 {
   8704     /*
   8705      * M-profile Create Vector Tail Predicate. This insn is itself
   8706      * predicated and is subject to beatwise execution.
   8707      */
   8708     TCGv_i32 rn_shifted, masklen;
   8709 
   8710     if (!dc_isar_feature(aa32_mve, s) || a->rn == 13 || a->rn == 15) {
   8711         return false;
   8712     }
   8713 
   8714     if (!mve_eci_check(s) || !vfp_access_check(s)) {
   8715         return true;
   8716     }
   8717 
   8718     /*
   8719      * We pre-calculate the mask length here to avoid having
   8720      * to have multiple helpers specialized for size.
   8721      * We pass the helper "rn <= (1 << (4 - size)) ? (rn << size) : 16".
   8722      */
   8723     rn_shifted = tcg_temp_new_i32();
   8724     masklen = load_reg(s, a->rn);
   8725     tcg_gen_shli_i32(rn_shifted, masklen, a->size);
   8726     tcg_gen_movcond_i32(TCG_COND_LEU, masklen,
   8727                         masklen, tcg_constant_i32(1 << (4 - a->size)),
   8728                         rn_shifted, tcg_constant_i32(16));
   8729     gen_helper_mve_vctp(cpu_env, masklen);
   8730     tcg_temp_free_i32(masklen);
   8731     tcg_temp_free_i32(rn_shifted);
   8732     /* This insn updates predication bits */
   8733     s->base.is_jmp = DISAS_UPDATE_NOCHAIN;
   8734     mve_update_eci(s);
   8735     return true;
   8736 }
   8737 
   8738 static bool op_tbranch(DisasContext *s, arg_tbranch *a, bool half)
   8739 {
   8740     TCGv_i32 addr, tmp;
   8741 
   8742     tmp = load_reg(s, a->rm);
   8743     if (half) {
   8744         tcg_gen_add_i32(tmp, tmp, tmp);
   8745     }
   8746     addr = load_reg(s, a->rn);
   8747     tcg_gen_add_i32(addr, addr, tmp);
   8748 
   8749     gen_aa32_ld_i32(s, tmp, addr, get_mem_index(s), half ? MO_UW : MO_UB);
   8750 
   8751     tcg_gen_add_i32(tmp, tmp, tmp);
   8752     gen_pc_plus_diff(s, addr, jmp_diff(s, 0));
   8753     tcg_gen_add_i32(tmp, tmp, addr);
   8754     tcg_temp_free_i32(addr);
   8755     store_reg(s, 15, tmp);
   8756     return true;
   8757 }
   8758 
   8759 static bool trans_TBB(DisasContext *s, arg_tbranch *a)
   8760 {
   8761     return op_tbranch(s, a, false);
   8762 }
   8763 
   8764 static bool trans_TBH(DisasContext *s, arg_tbranch *a)
   8765 {
   8766     return op_tbranch(s, a, true);
   8767 }
   8768 
   8769 static bool trans_CBZ(DisasContext *s, arg_CBZ *a)
   8770 {
   8771     TCGv_i32 tmp = load_reg(s, a->rn);
   8772 
   8773     arm_gen_condlabel(s);
   8774     tcg_gen_brcondi_i32(a->nz ? TCG_COND_EQ : TCG_COND_NE,
   8775                         tmp, 0, s->condlabel.label);
   8776     tcg_temp_free_i32(tmp);
   8777     gen_jmp(s, jmp_diff(s, a->imm));
   8778     return true;
   8779 }
   8780 
   8781 /*
   8782  * Supervisor call - both T32 & A32 come here so we need to check
   8783  * which mode we are in when checking for semihosting.
   8784  */
   8785 
   8786 static bool trans_SVC(DisasContext *s, arg_SVC *a)
   8787 {
   8788     const uint32_t semihost_imm = s->thumb ? 0xab : 0x123456;
   8789 
   8790     if (!arm_dc_feature(s, ARM_FEATURE_M) &&
   8791         semihosting_enabled(s->current_el == 0) &&
   8792         (a->imm == semihost_imm)) {
   8793         gen_exception_internal_insn(s, EXCP_SEMIHOST);
   8794     } else {
   8795         gen_update_pc(s, curr_insn_len(s));
   8796         s->svc_imm = a->imm;
   8797         s->base.is_jmp = DISAS_SWI;
   8798     }
   8799     return true;
   8800 }
   8801 
   8802 /*
   8803  * Unconditional system instructions
   8804  */
   8805 
   8806 static bool trans_RFE(DisasContext *s, arg_RFE *a)
   8807 {
   8808     static const int8_t pre_offset[4] = {
   8809         /* DA */ -4, /* IA */ 0, /* DB */ -8, /* IB */ 4
   8810     };
   8811     static const int8_t post_offset[4] = {
   8812         /* DA */ -8, /* IA */ 4, /* DB */ -4, /* IB */ 0
   8813     };
   8814     TCGv_i32 addr, t1, t2;
   8815 
   8816     if (!ENABLE_ARCH_6 || arm_dc_feature(s, ARM_FEATURE_M)) {
   8817         return false;
   8818     }
   8819     if (IS_USER(s)) {
   8820         unallocated_encoding(s);
   8821         return true;
   8822     }
   8823 
   8824     addr = load_reg(s, a->rn);
   8825     tcg_gen_addi_i32(addr, addr, pre_offset[a->pu]);
   8826 
   8827     /* Load PC into tmp and CPSR into tmp2.  */
   8828     t1 = tcg_temp_new_i32();
   8829     gen_aa32_ld_i32(s, t1, addr, get_mem_index(s), MO_UL | MO_ALIGN);
   8830     tcg_gen_addi_i32(addr, addr, 4);
   8831     t2 = tcg_temp_new_i32();
   8832     gen_aa32_ld_i32(s, t2, addr, get_mem_index(s), MO_UL | MO_ALIGN);
   8833 
   8834     if (a->w) {
   8835         /* Base writeback.  */
   8836         tcg_gen_addi_i32(addr, addr, post_offset[a->pu]);
   8837         store_reg(s, a->rn, addr);
   8838     } else {
   8839         tcg_temp_free_i32(addr);
   8840     }
   8841     gen_rfe(s, t1, t2);
   8842     return true;
   8843 }
   8844 
   8845 static bool trans_SRS(DisasContext *s, arg_SRS *a)
   8846 {
   8847     if (!ENABLE_ARCH_6 || arm_dc_feature(s, ARM_FEATURE_M)) {
   8848         return false;
   8849     }
   8850     gen_srs(s, a->mode, a->pu, a->w);
   8851     return true;
   8852 }
   8853 
   8854 static bool trans_CPS(DisasContext *s, arg_CPS *a)
   8855 {
   8856     uint32_t mask, val;
   8857 
   8858     if (!ENABLE_ARCH_6 || arm_dc_feature(s, ARM_FEATURE_M)) {
   8859         return false;
   8860     }
   8861     if (IS_USER(s)) {
   8862         /* Implemented as NOP in user mode.  */
   8863         return true;
   8864     }
   8865     /* TODO: There are quite a lot of UNPREDICTABLE argument combinations. */
   8866 
   8867     mask = val = 0;
   8868     if (a->imod & 2) {
   8869         if (a->A) {
   8870             mask |= CPSR_A;
   8871         }
   8872         if (a->I) {
   8873             mask |= CPSR_I;
   8874         }
   8875         if (a->F) {
   8876             mask |= CPSR_F;
   8877         }
   8878         if (a->imod & 1) {
   8879             val |= mask;
   8880         }
   8881     }
   8882     if (a->M) {
   8883         mask |= CPSR_M;
   8884         val |= a->mode;
   8885     }
   8886     if (mask) {
   8887         gen_set_psr_im(s, mask, 0, val);
   8888     }
   8889     return true;
   8890 }
   8891 
   8892 static bool trans_CPS_v7m(DisasContext *s, arg_CPS_v7m *a)
   8893 {
   8894     TCGv_i32 tmp, addr;
   8895 
   8896     if (!arm_dc_feature(s, ARM_FEATURE_M)) {
   8897         return false;
   8898     }
   8899     if (IS_USER(s)) {
   8900         /* Implemented as NOP in user mode.  */
   8901         return true;
   8902     }
   8903 
   8904     tmp = tcg_constant_i32(a->im);
   8905     /* FAULTMASK */
   8906     if (a->F) {
   8907         addr = tcg_constant_i32(19);
   8908         gen_helper_v7m_msr(cpu_env, addr, tmp);
   8909     }
   8910     /* PRIMASK */
   8911     if (a->I) {
   8912         addr = tcg_constant_i32(16);
   8913         gen_helper_v7m_msr(cpu_env, addr, tmp);
   8914     }
   8915     gen_rebuild_hflags(s, false);
   8916     gen_lookup_tb(s);
   8917     return true;
   8918 }
   8919 
   8920 /*
   8921  * Clear-Exclusive, Barriers
   8922  */
   8923 
   8924 static bool trans_CLREX(DisasContext *s, arg_CLREX *a)
   8925 {
   8926     if (s->thumb
   8927         ? !ENABLE_ARCH_7 && !arm_dc_feature(s, ARM_FEATURE_M)
   8928         : !ENABLE_ARCH_6K) {
   8929         return false;
   8930     }
   8931     gen_clrex(s);
   8932     return true;
   8933 }
   8934 
   8935 static bool trans_DSB(DisasContext *s, arg_DSB *a)
   8936 {
   8937     if (!ENABLE_ARCH_7 && !arm_dc_feature(s, ARM_FEATURE_M)) {
   8938         return false;
   8939     }
   8940     tcg_gen_mb(TCG_MO_ALL | TCG_BAR_SC);
   8941     return true;
   8942 }
   8943 
   8944 static bool trans_DMB(DisasContext *s, arg_DMB *a)
   8945 {
   8946     return trans_DSB(s, NULL);
   8947 }
   8948 
   8949 static bool trans_ISB(DisasContext *s, arg_ISB *a)
   8950 {
   8951     if (!ENABLE_ARCH_7 && !arm_dc_feature(s, ARM_FEATURE_M)) {
   8952         return false;
   8953     }
   8954     /*
   8955      * We need to break the TB after this insn to execute
   8956      * self-modifying code correctly and also to take
   8957      * any pending interrupts immediately.
   8958      */
   8959     s->base.is_jmp = DISAS_TOO_MANY;
   8960     return true;
   8961 }
   8962 
   8963 static bool trans_SB(DisasContext *s, arg_SB *a)
   8964 {
   8965     if (!dc_isar_feature(aa32_sb, s)) {
   8966         return false;
   8967     }
   8968     /*
   8969      * TODO: There is no speculation barrier opcode
   8970      * for TCG; MB and end the TB instead.
   8971      */
   8972     tcg_gen_mb(TCG_MO_ALL | TCG_BAR_SC);
   8973     s->base.is_jmp = DISAS_TOO_MANY;
   8974     return true;
   8975 }
   8976 
   8977 static bool trans_SETEND(DisasContext *s, arg_SETEND *a)
   8978 {
   8979     if (!ENABLE_ARCH_6) {
   8980         return false;
   8981     }
   8982     if (a->E != (s->be_data == MO_BE)) {
   8983         gen_helper_setend(cpu_env);
   8984         s->base.is_jmp = DISAS_UPDATE_EXIT;
   8985     }
   8986     return true;
   8987 }
   8988 
   8989 /*
   8990  * Preload instructions
   8991  * All are nops, contingent on the appropriate arch level.
   8992  */
   8993 
   8994 static bool trans_PLD(DisasContext *s, arg_PLD *a)
   8995 {
   8996     return ENABLE_ARCH_5TE;
   8997 }
   8998 
   8999 static bool trans_PLDW(DisasContext *s, arg_PLD *a)
   9000 {
   9001     return arm_dc_feature(s, ARM_FEATURE_V7MP);
   9002 }
   9003 
   9004 static bool trans_PLI(DisasContext *s, arg_PLD *a)
   9005 {
   9006     return ENABLE_ARCH_7;
   9007 }
   9008 
   9009 /*
   9010  * If-then
   9011  */
   9012 
   9013 static bool trans_IT(DisasContext *s, arg_IT *a)
   9014 {
   9015     int cond_mask = a->cond_mask;
   9016 
   9017     /*
   9018      * No actual code generated for this insn, just setup state.
   9019      *
   9020      * Combinations of firstcond and mask which set up an 0b1111
   9021      * condition are UNPREDICTABLE; we take the CONSTRAINED
   9022      * UNPREDICTABLE choice to treat 0b1111 the same as 0b1110,
   9023      * i.e. both meaning "execute always".
   9024      */
   9025     s->condexec_cond = (cond_mask >> 4) & 0xe;
   9026     s->condexec_mask = cond_mask & 0x1f;
   9027     return true;
   9028 }
   9029 
   9030 /* v8.1M CSEL/CSINC/CSNEG/CSINV */
   9031 static bool trans_CSEL(DisasContext *s, arg_CSEL *a)
   9032 {
   9033     TCGv_i32 rn, rm, zero;
   9034     DisasCompare c;
   9035 
   9036     if (!arm_dc_feature(s, ARM_FEATURE_V8_1M)) {
   9037         return false;
   9038     }
   9039 
   9040     if (a->rm == 13) {
   9041         /* SEE "Related encodings" (MVE shifts) */
   9042         return false;
   9043     }
   9044 
   9045     if (a->rd == 13 || a->rd == 15 || a->rn == 13 || a->fcond >= 14) {
   9046         /* CONSTRAINED UNPREDICTABLE: we choose to UNDEF */
   9047         return false;
   9048     }
   9049 
   9050     /* In this insn input reg fields of 0b1111 mean "zero", not "PC" */
   9051     zero = tcg_constant_i32(0);
   9052     if (a->rn == 15) {
   9053         rn = zero;
   9054     } else {
   9055         rn = load_reg(s, a->rn);
   9056     }
   9057     if (a->rm == 15) {
   9058         rm = zero;
   9059     } else {
   9060         rm = load_reg(s, a->rm);
   9061     }
   9062 
   9063     switch (a->op) {
   9064     case 0: /* CSEL */
   9065         break;
   9066     case 1: /* CSINC */
   9067         tcg_gen_addi_i32(rm, rm, 1);
   9068         break;
   9069     case 2: /* CSINV */
   9070         tcg_gen_not_i32(rm, rm);
   9071         break;
   9072     case 3: /* CSNEG */
   9073         tcg_gen_neg_i32(rm, rm);
   9074         break;
   9075     default:
   9076         g_assert_not_reached();
   9077     }
   9078 
   9079     arm_test_cc(&c, a->fcond);
   9080     tcg_gen_movcond_i32(c.cond, rn, c.value, zero, rn, rm);
   9081     arm_free_cc(&c);
   9082 
   9083     store_reg(s, a->rd, rn);
   9084     tcg_temp_free_i32(rm);
   9085 
   9086     return true;
   9087 }
   9088 
   9089 /*
   9090  * Legacy decoder.
   9091  */
   9092 
   9093 static void disas_arm_insn(DisasContext *s, unsigned int insn)
   9094 {
   9095     unsigned int cond = insn >> 28;
   9096 
   9097     /* M variants do not implement ARM mode; this must raise the INVSTATE
   9098      * UsageFault exception.
   9099      */
   9100     if (arm_dc_feature(s, ARM_FEATURE_M)) {
   9101         gen_exception_insn(s, 0, EXCP_INVSTATE, syn_uncategorized());
   9102         return;
   9103     }
   9104 
   9105     if (s->pstate_il) {
   9106         /*
   9107          * Illegal execution state. This has priority over BTI
   9108          * exceptions, but comes after instruction abort exceptions.
   9109          */
   9110         gen_exception_insn(s, 0, EXCP_UDEF, syn_illegalstate());
   9111         return;
   9112     }
   9113 
   9114     if (cond == 0xf) {
   9115         /* In ARMv3 and v4 the NV condition is UNPREDICTABLE; we
   9116          * choose to UNDEF. In ARMv5 and above the space is used
   9117          * for miscellaneous unconditional instructions.
   9118          */
   9119         if (!arm_dc_feature(s, ARM_FEATURE_V5)) {
   9120             unallocated_encoding(s);
   9121             return;
   9122         }
   9123 
   9124         /* Unconditional instructions.  */
   9125         /* TODO: Perhaps merge these into one decodetree output file.  */
   9126         if (disas_a32_uncond(s, insn) ||
   9127             disas_vfp_uncond(s, insn) ||
   9128             disas_neon_dp(s, insn) ||
   9129             disas_neon_ls(s, insn) ||
   9130             disas_neon_shared(s, insn)) {
   9131             return;
   9132         }
   9133         /* fall back to legacy decoder */
   9134 
   9135         if ((insn & 0x0e000f00) == 0x0c000100) {
   9136             if (arm_dc_feature(s, ARM_FEATURE_IWMMXT)) {
   9137                 /* iWMMXt register transfer.  */
   9138                 if (extract32(s->c15_cpar, 1, 1)) {
   9139                     if (!disas_iwmmxt_insn(s, insn)) {
   9140                         return;
   9141                     }
   9142                 }
   9143             }
   9144         }
   9145         goto illegal_op;
   9146     }
   9147     if (cond != 0xe) {
   9148         /* if not always execute, we generate a conditional jump to
   9149            next instruction */
   9150         arm_skip_unless(s, cond);
   9151     }
   9152 
   9153     /* TODO: Perhaps merge these into one decodetree output file.  */
   9154     if (disas_a32(s, insn) ||
   9155         disas_vfp(s, insn)) {
   9156         return;
   9157     }
   9158     /* fall back to legacy decoder */
   9159     /* TODO: convert xscale/iwmmxt decoder to decodetree ?? */
   9160     if (arm_dc_feature(s, ARM_FEATURE_XSCALE)) {
   9161         if (((insn & 0x0c000e00) == 0x0c000000)
   9162             && ((insn & 0x03000000) != 0x03000000)) {
   9163             /* Coprocessor insn, coprocessor 0 or 1 */
   9164             disas_xscale_insn(s, insn);
   9165             return;
   9166         }
   9167     }
   9168 
   9169 illegal_op:
   9170     unallocated_encoding(s);
   9171 }
   9172 
   9173 static bool thumb_insn_is_16bit(DisasContext *s, uint32_t pc, uint32_t insn)
   9174 {
   9175     /*
   9176      * Return true if this is a 16 bit instruction. We must be precise
   9177      * about this (matching the decode).
   9178      */
   9179     if ((insn >> 11) < 0x1d) {
   9180         /* Definitely a 16-bit instruction */
   9181         return true;
   9182     }
   9183 
   9184     /* Top five bits 0b11101 / 0b11110 / 0b11111 : this is the
   9185      * first half of a 32-bit Thumb insn. Thumb-1 cores might
   9186      * end up actually treating this as two 16-bit insns, though,
   9187      * if it's half of a bl/blx pair that might span a page boundary.
   9188      */
   9189     if (arm_dc_feature(s, ARM_FEATURE_THUMB2) ||
   9190         arm_dc_feature(s, ARM_FEATURE_M)) {
   9191         /* Thumb2 cores (including all M profile ones) always treat
   9192          * 32-bit insns as 32-bit.
   9193          */
   9194         return false;
   9195     }
   9196 
   9197     if ((insn >> 11) == 0x1e && pc - s->page_start < TARGET_PAGE_SIZE - 3) {
   9198         /* 0b1111_0xxx_xxxx_xxxx : BL/BLX prefix, and the suffix
   9199          * is not on the next page; we merge this into a 32-bit
   9200          * insn.
   9201          */
   9202         return false;
   9203     }
   9204     /* 0b1110_1xxx_xxxx_xxxx : BLX suffix (or UNDEF);
   9205      * 0b1111_1xxx_xxxx_xxxx : BL suffix;
   9206      * 0b1111_0xxx_xxxx_xxxx : BL/BLX prefix on the end of a page
   9207      *  -- handle as single 16 bit insn
   9208      */
   9209     return true;
   9210 }
   9211 
   9212 /* Translate a 32-bit thumb instruction. */
   9213 static void disas_thumb2_insn(DisasContext *s, uint32_t insn)
   9214 {
   9215     /*
   9216      * ARMv6-M supports a limited subset of Thumb2 instructions.
   9217      * Other Thumb1 architectures allow only 32-bit
   9218      * combined BL/BLX prefix and suffix.
   9219      */
   9220     if (arm_dc_feature(s, ARM_FEATURE_M) &&
   9221         !arm_dc_feature(s, ARM_FEATURE_V7)) {
   9222         int i;
   9223         bool found = false;
   9224         static const uint32_t armv6m_insn[] = {0xf3808000 /* msr */,
   9225                                                0xf3b08040 /* dsb */,
   9226                                                0xf3b08050 /* dmb */,
   9227                                                0xf3b08060 /* isb */,
   9228                                                0xf3e08000 /* mrs */,
   9229                                                0xf000d000 /* bl */};
   9230         static const uint32_t armv6m_mask[] = {0xffe0d000,
   9231                                                0xfff0d0f0,
   9232                                                0xfff0d0f0,
   9233                                                0xfff0d0f0,
   9234                                                0xffe0d000,
   9235                                                0xf800d000};
   9236 
   9237         for (i = 0; i < ARRAY_SIZE(armv6m_insn); i++) {
   9238             if ((insn & armv6m_mask[i]) == armv6m_insn[i]) {
   9239                 found = true;
   9240                 break;
   9241             }
   9242         }
   9243         if (!found) {
   9244             goto illegal_op;
   9245         }
   9246     } else if ((insn & 0xf800e800) != 0xf000e800)  {
   9247         if (!arm_dc_feature(s, ARM_FEATURE_THUMB2)) {
   9248             unallocated_encoding(s);
   9249             return;
   9250         }
   9251     }
   9252 
   9253     if (arm_dc_feature(s, ARM_FEATURE_M)) {
   9254         /*
   9255          * NOCP takes precedence over any UNDEF for (almost) the
   9256          * entire wide range of coprocessor-space encodings, so check
   9257          * for it first before proceeding to actually decode eg VFP
   9258          * insns. This decode also handles the few insns which are
   9259          * in copro space but do not have NOCP checks (eg VLLDM, VLSTM).
   9260          */
   9261         if (disas_m_nocp(s, insn)) {
   9262             return;
   9263         }
   9264     }
   9265 
   9266     if ((insn & 0xef000000) == 0xef000000) {
   9267         /*
   9268          * T32 encodings 0b111p_1111_qqqq_qqqq_qqqq_qqqq_qqqq_qqqq
   9269          * transform into
   9270          * A32 encodings 0b1111_001p_qqqq_qqqq_qqqq_qqqq_qqqq_qqqq
   9271          */
   9272         uint32_t a32_insn = (insn & 0xe2ffffff) |
   9273             ((insn & (1 << 28)) >> 4) | (1 << 28);
   9274 
   9275         if (disas_neon_dp(s, a32_insn)) {
   9276             return;
   9277         }
   9278     }
   9279 
   9280     if ((insn & 0xff100000) == 0xf9000000) {
   9281         /*
   9282          * T32 encodings 0b1111_1001_ppp0_qqqq_qqqq_qqqq_qqqq_qqqq
   9283          * transform into
   9284          * A32 encodings 0b1111_0100_ppp0_qqqq_qqqq_qqqq_qqqq_qqqq
   9285          */
   9286         uint32_t a32_insn = (insn & 0x00ffffff) | 0xf4000000;
   9287 
   9288         if (disas_neon_ls(s, a32_insn)) {
   9289             return;
   9290         }
   9291     }
   9292 
   9293     /*
   9294      * TODO: Perhaps merge these into one decodetree output file.
   9295      * Note disas_vfp is written for a32 with cond field in the
   9296      * top nibble.  The t32 encoding requires 0xe in the top nibble.
   9297      */
   9298     if (disas_t32(s, insn) ||
   9299         disas_vfp_uncond(s, insn) ||
   9300         disas_neon_shared(s, insn) ||
   9301         disas_mve(s, insn) ||
   9302         ((insn >> 28) == 0xe && disas_vfp(s, insn))) {
   9303         return;
   9304     }
   9305 
   9306 illegal_op:
   9307     unallocated_encoding(s);
   9308 }
   9309 
   9310 static void disas_thumb_insn(DisasContext *s, uint32_t insn)
   9311 {
   9312     if (!disas_t16(s, insn)) {
   9313         unallocated_encoding(s);
   9314     }
   9315 }
   9316 
   9317 static bool insn_crosses_page(CPUARMState *env, DisasContext *s)
   9318 {
   9319     /* Return true if the insn at dc->base.pc_next might cross a page boundary.
   9320      * (False positives are OK, false negatives are not.)
   9321      * We know this is a Thumb insn, and our caller ensures we are
   9322      * only called if dc->base.pc_next is less than 4 bytes from the page
   9323      * boundary, so we cross the page if the first 16 bits indicate
   9324      * that this is a 32 bit insn.
   9325      */
   9326     uint16_t insn = arm_lduw_code(env, &s->base, s->base.pc_next, s->sctlr_b);
   9327 
   9328     return !thumb_insn_is_16bit(s, s->base.pc_next, insn);
   9329 }
   9330 
   9331 static void arm_tr_init_disas_context(DisasContextBase *dcbase, CPUState *cs)
   9332 {
   9333     DisasContext *dc = container_of(dcbase, DisasContext, base);
   9334     CPUARMState *env = cs->env_ptr;
   9335     ARMCPU *cpu = env_archcpu(env);
   9336     CPUARMTBFlags tb_flags = arm_tbflags_from_tb(dc->base.tb);
   9337     uint32_t condexec, core_mmu_idx;
   9338 
   9339     dc->isar = &cpu->isar;
   9340     dc->condjmp = 0;
   9341     dc->pc_save = dc->base.pc_first;
   9342     dc->aarch64 = false;
   9343     dc->thumb = EX_TBFLAG_AM32(tb_flags, THUMB);
   9344     dc->be_data = EX_TBFLAG_ANY(tb_flags, BE_DATA) ? MO_BE : MO_LE;
   9345     condexec = EX_TBFLAG_AM32(tb_flags, CONDEXEC);
   9346     /*
   9347      * the CONDEXEC TB flags are CPSR bits [15:10][26:25]. On A-profile this
   9348      * is always the IT bits. On M-profile, some of the reserved encodings
   9349      * of IT are used instead to indicate either ICI or ECI, which
   9350      * indicate partial progress of a restartable insn that was interrupted
   9351      * partway through by an exception:
   9352      *  * if CONDEXEC[3:0] != 0b0000 : CONDEXEC is IT bits
   9353      *  * if CONDEXEC[3:0] == 0b0000 : CONDEXEC is ICI or ECI bits
   9354      * In all cases CONDEXEC == 0 means "not in IT block or restartable
   9355      * insn, behave normally".
   9356      */
   9357     dc->eci = dc->condexec_mask = dc->condexec_cond = 0;
   9358     dc->eci_handled = false;
   9359     if (condexec & 0xf) {
   9360         dc->condexec_mask = (condexec & 0xf) << 1;
   9361         dc->condexec_cond = condexec >> 4;
   9362     } else {
   9363         if (arm_feature(env, ARM_FEATURE_M)) {
   9364             dc->eci = condexec >> 4;
   9365         }
   9366     }
   9367 
   9368     core_mmu_idx = EX_TBFLAG_ANY(tb_flags, MMUIDX);
   9369     dc->mmu_idx = core_to_arm_mmu_idx(env, core_mmu_idx);
   9370     dc->current_el = arm_mmu_idx_to_el(dc->mmu_idx);
   9371 #if !defined(CONFIG_USER_ONLY)
   9372     dc->user = (dc->current_el == 0);
   9373 #endif
   9374     dc->fp_excp_el = EX_TBFLAG_ANY(tb_flags, FPEXC_EL);
   9375     dc->align_mem = EX_TBFLAG_ANY(tb_flags, ALIGN_MEM);
   9376     dc->pstate_il = EX_TBFLAG_ANY(tb_flags, PSTATE__IL);
   9377 
   9378     if (arm_feature(env, ARM_FEATURE_M)) {
   9379         dc->vfp_enabled = 1;
   9380         dc->be_data = MO_TE;
   9381         dc->v7m_handler_mode = EX_TBFLAG_M32(tb_flags, HANDLER);
   9382         dc->v8m_secure = EX_TBFLAG_M32(tb_flags, SECURE);
   9383         dc->v8m_stackcheck = EX_TBFLAG_M32(tb_flags, STACKCHECK);
   9384         dc->v8m_fpccr_s_wrong = EX_TBFLAG_M32(tb_flags, FPCCR_S_WRONG);
   9385         dc->v7m_new_fp_ctxt_needed =
   9386             EX_TBFLAG_M32(tb_flags, NEW_FP_CTXT_NEEDED);
   9387         dc->v7m_lspact = EX_TBFLAG_M32(tb_flags, LSPACT);
   9388         dc->mve_no_pred = EX_TBFLAG_M32(tb_flags, MVE_NO_PRED);
   9389     } else {
   9390         dc->sctlr_b = EX_TBFLAG_A32(tb_flags, SCTLR__B);
   9391         dc->hstr_active = EX_TBFLAG_A32(tb_flags, HSTR_ACTIVE);
   9392         dc->ns = EX_TBFLAG_A32(tb_flags, NS);
   9393         dc->vfp_enabled = EX_TBFLAG_A32(tb_flags, VFPEN);
   9394         if (arm_feature(env, ARM_FEATURE_XSCALE)) {
   9395             dc->c15_cpar = EX_TBFLAG_A32(tb_flags, XSCALE_CPAR);
   9396         } else {
   9397             dc->vec_len = EX_TBFLAG_A32(tb_flags, VECLEN);
   9398             dc->vec_stride = EX_TBFLAG_A32(tb_flags, VECSTRIDE);
   9399         }
   9400         dc->sme_trap_nonstreaming =
   9401             EX_TBFLAG_A32(tb_flags, SME_TRAP_NONSTREAMING);
   9402     }
   9403     dc->cp_regs = cpu->cp_regs;
   9404     dc->features = env->features;
   9405 
   9406     /* Single step state. The code-generation logic here is:
   9407      *  SS_ACTIVE == 0:
   9408      *   generate code with no special handling for single-stepping (except
   9409      *   that anything that can make us go to SS_ACTIVE == 1 must end the TB;
   9410      *   this happens anyway because those changes are all system register or
   9411      *   PSTATE writes).
   9412      *  SS_ACTIVE == 1, PSTATE.SS == 1: (active-not-pending)
   9413      *   emit code for one insn
   9414      *   emit code to clear PSTATE.SS
   9415      *   emit code to generate software step exception for completed step
   9416      *   end TB (as usual for having generated an exception)
   9417      *  SS_ACTIVE == 1, PSTATE.SS == 0: (active-pending)
   9418      *   emit code to generate a software step exception
   9419      *   end the TB
   9420      */
   9421     dc->ss_active = EX_TBFLAG_ANY(tb_flags, SS_ACTIVE);
   9422     dc->pstate_ss = EX_TBFLAG_ANY(tb_flags, PSTATE__SS);
   9423     dc->is_ldex = false;
   9424 
   9425     dc->page_start = dc->base.pc_first & TARGET_PAGE_MASK;
   9426 
   9427     /* If architectural single step active, limit to 1.  */
   9428     if (dc->ss_active) {
   9429         dc->base.max_insns = 1;
   9430     }
   9431 
   9432     /* ARM is a fixed-length ISA.  Bound the number of insns to execute
   9433        to those left on the page.  */
   9434     if (!dc->thumb) {
   9435         int bound = -(dc->base.pc_first | TARGET_PAGE_MASK) / 4;
   9436         dc->base.max_insns = MIN(dc->base.max_insns, bound);
   9437     }
   9438 
   9439     cpu_V0 = tcg_temp_new_i64();
   9440     cpu_V1 = tcg_temp_new_i64();
   9441     cpu_M0 = tcg_temp_new_i64();
   9442 }
   9443 
   9444 static void arm_tr_tb_start(DisasContextBase *dcbase, CPUState *cpu)
   9445 {
   9446     DisasContext *dc = container_of(dcbase, DisasContext, base);
   9447 
   9448     /* A note on handling of the condexec (IT) bits:
   9449      *
   9450      * We want to avoid the overhead of having to write the updated condexec
   9451      * bits back to the CPUARMState for every instruction in an IT block. So:
   9452      * (1) if the condexec bits are not already zero then we write
   9453      * zero back into the CPUARMState now. This avoids complications trying
   9454      * to do it at the end of the block. (For example if we don't do this
   9455      * it's hard to identify whether we can safely skip writing condexec
   9456      * at the end of the TB, which we definitely want to do for the case
   9457      * where a TB doesn't do anything with the IT state at all.)
   9458      * (2) if we are going to leave the TB then we call gen_set_condexec()
   9459      * which will write the correct value into CPUARMState if zero is wrong.
   9460      * This is done both for leaving the TB at the end, and for leaving
   9461      * it because of an exception we know will happen, which is done in
   9462      * gen_exception_insn(). The latter is necessary because we need to
   9463      * leave the TB with the PC/IT state just prior to execution of the
   9464      * instruction which caused the exception.
   9465      * (3) if we leave the TB unexpectedly (eg a data abort on a load)
   9466      * then the CPUARMState will be wrong and we need to reset it.
   9467      * This is handled in the same way as restoration of the
   9468      * PC in these situations; we save the value of the condexec bits
   9469      * for each PC via tcg_gen_insn_start(), and restore_state_to_opc()
   9470      * then uses this to restore them after an exception.
   9471      *
   9472      * Note that there are no instructions which can read the condexec
   9473      * bits, and none which can write non-static values to them, so
   9474      * we don't need to care about whether CPUARMState is correct in the
   9475      * middle of a TB.
   9476      */
   9477 
   9478     /* Reset the conditional execution bits immediately. This avoids
   9479        complications trying to do it at the end of the block.  */
   9480     if (dc->condexec_mask || dc->condexec_cond) {
   9481         store_cpu_field_constant(0, condexec_bits);
   9482     }
   9483 }
   9484 
   9485 static void arm_tr_insn_start(DisasContextBase *dcbase, CPUState *cpu)
   9486 {
   9487     DisasContext *dc = container_of(dcbase, DisasContext, base);
   9488     /*
   9489      * The ECI/ICI bits share PSR bits with the IT bits, so we
   9490      * need to reconstitute the bits from the split-out DisasContext
   9491      * fields here.
   9492      */
   9493     uint32_t condexec_bits;
   9494     target_ulong pc_arg = dc->base.pc_next;
   9495 
   9496     if (TARGET_TB_PCREL) {
   9497         pc_arg &= ~TARGET_PAGE_MASK;
   9498     }
   9499     if (dc->eci) {
   9500         condexec_bits = dc->eci << 4;
   9501     } else {
   9502         condexec_bits = (dc->condexec_cond << 4) | (dc->condexec_mask >> 1);
   9503     }
   9504     tcg_gen_insn_start(pc_arg, condexec_bits, 0);
   9505     dc->insn_start = tcg_last_op();
   9506 }
   9507 
   9508 static bool arm_check_kernelpage(DisasContext *dc)
   9509 {
   9510 #ifdef CONFIG_USER_ONLY
   9511     /* Intercept jump to the magic kernel page.  */
   9512     if (dc->base.pc_next >= 0xffff0000) {
   9513         /* We always get here via a jump, so know we are not in a
   9514            conditional execution block.  */
   9515         gen_exception_internal(EXCP_KERNEL_TRAP);
   9516         dc->base.is_jmp = DISAS_NORETURN;
   9517         return true;
   9518     }
   9519 #endif
   9520     return false;
   9521 }
   9522 
   9523 static bool arm_check_ss_active(DisasContext *dc)
   9524 {
   9525     if (dc->ss_active && !dc->pstate_ss) {
   9526         /* Singlestep state is Active-pending.
   9527          * If we're in this state at the start of a TB then either
   9528          *  a) we just took an exception to an EL which is being debugged
   9529          *     and this is the first insn in the exception handler
   9530          *  b) debug exceptions were masked and we just unmasked them
   9531          *     without changing EL (eg by clearing PSTATE.D)
   9532          * In either case we're going to take a swstep exception in the
   9533          * "did not step an insn" case, and so the syndrome ISV and EX
   9534          * bits should be zero.
   9535          */
   9536         assert(dc->base.num_insns == 1);
   9537         gen_swstep_exception(dc, 0, 0);
   9538         dc->base.is_jmp = DISAS_NORETURN;
   9539         return true;
   9540     }
   9541 
   9542     return false;
   9543 }
   9544 
   9545 static void arm_post_translate_insn(DisasContext *dc)
   9546 {
   9547     if (dc->condjmp && dc->base.is_jmp == DISAS_NEXT) {
   9548         if (dc->pc_save != dc->condlabel.pc_save) {
   9549             gen_update_pc(dc, dc->condlabel.pc_save - dc->pc_save);
   9550         }
   9551         gen_set_label(dc->condlabel.label);
   9552         dc->condjmp = 0;
   9553     }
   9554     translator_loop_temp_check(&dc->base);
   9555 }
   9556 
   9557 static void arm_tr_translate_insn(DisasContextBase *dcbase, CPUState *cpu)
   9558 {
   9559     DisasContext *dc = container_of(dcbase, DisasContext, base);
   9560     CPUARMState *env = cpu->env_ptr;
   9561     uint32_t pc = dc->base.pc_next;
   9562     unsigned int insn;
   9563 
   9564     /* Singlestep exceptions have the highest priority. */
   9565     if (arm_check_ss_active(dc)) {
   9566         dc->base.pc_next = pc + 4;
   9567         return;
   9568     }
   9569 
   9570     if (pc & 3) {
   9571         /*
   9572          * PC alignment fault.  This has priority over the instruction abort
   9573          * that we would receive from a translation fault via arm_ldl_code
   9574          * (or the execution of the kernelpage entrypoint). This should only
   9575          * be possible after an indirect branch, at the start of the TB.
   9576          */
   9577         assert(dc->base.num_insns == 1);
   9578         gen_helper_exception_pc_alignment(cpu_env, tcg_constant_tl(pc));
   9579         dc->base.is_jmp = DISAS_NORETURN;
   9580         dc->base.pc_next = QEMU_ALIGN_UP(pc, 4);
   9581         return;
   9582     }
   9583 
   9584     if (arm_check_kernelpage(dc)) {
   9585         dc->base.pc_next = pc + 4;
   9586         return;
   9587     }
   9588 
   9589     dc->pc_curr = pc;
   9590     insn = arm_ldl_code(env, &dc->base, pc, dc->sctlr_b);
   9591     dc->insn = insn;
   9592     dc->base.pc_next = pc + 4;
   9593     disas_arm_insn(dc, insn);
   9594 
   9595     arm_post_translate_insn(dc);
   9596 
   9597     /* ARM is a fixed-length ISA.  We performed the cross-page check
   9598        in init_disas_context by adjusting max_insns.  */
   9599 }
   9600 
   9601 static bool thumb_insn_is_unconditional(DisasContext *s, uint32_t insn)
   9602 {
   9603     /* Return true if this Thumb insn is always unconditional,
   9604      * even inside an IT block. This is true of only a very few
   9605      * instructions: BKPT, HLT, and SG.
   9606      *
   9607      * A larger class of instructions are UNPREDICTABLE if used
   9608      * inside an IT block; we do not need to detect those here, because
   9609      * what we do by default (perform the cc check and update the IT
   9610      * bits state machine) is a permitted CONSTRAINED UNPREDICTABLE
   9611      * choice for those situations.
   9612      *
   9613      * insn is either a 16-bit or a 32-bit instruction; the two are
   9614      * distinguishable because for the 16-bit case the top 16 bits
   9615      * are zeroes, and that isn't a valid 32-bit encoding.
   9616      */
   9617     if ((insn & 0xffffff00) == 0xbe00) {
   9618         /* BKPT */
   9619         return true;
   9620     }
   9621 
   9622     if ((insn & 0xffffffc0) == 0xba80 && arm_dc_feature(s, ARM_FEATURE_V8) &&
   9623         !arm_dc_feature(s, ARM_FEATURE_M)) {
   9624         /* HLT: v8A only. This is unconditional even when it is going to
   9625          * UNDEF; see the v8A ARM ARM DDI0487B.a H3.3.
   9626          * For v7 cores this was a plain old undefined encoding and so
   9627          * honours its cc check. (We might be using the encoding as
   9628          * a semihosting trap, but we don't change the cc check behaviour
   9629          * on that account, because a debugger connected to a real v7A
   9630          * core and emulating semihosting traps by catching the UNDEF
   9631          * exception would also only see cases where the cc check passed.
   9632          * No guest code should be trying to do a HLT semihosting trap
   9633          * in an IT block anyway.
   9634          */
   9635         return true;
   9636     }
   9637 
   9638     if (insn == 0xe97fe97f && arm_dc_feature(s, ARM_FEATURE_V8) &&
   9639         arm_dc_feature(s, ARM_FEATURE_M)) {
   9640         /* SG: v8M only */
   9641         return true;
   9642     }
   9643 
   9644     return false;
   9645 }
   9646 
   9647 static void thumb_tr_translate_insn(DisasContextBase *dcbase, CPUState *cpu)
   9648 {
   9649     DisasContext *dc = container_of(dcbase, DisasContext, base);
   9650     CPUARMState *env = cpu->env_ptr;
   9651     uint32_t pc = dc->base.pc_next;
   9652     uint32_t insn;
   9653     bool is_16bit;
   9654     /* TCG op to rewind to if this turns out to be an invalid ECI state */
   9655     TCGOp *insn_eci_rewind = NULL;
   9656     target_ulong insn_eci_pc_save = -1;
   9657 
   9658     /* Misaligned thumb PC is architecturally impossible. */
   9659     assert((dc->base.pc_next & 1) == 0);
   9660 
   9661     if (arm_check_ss_active(dc) || arm_check_kernelpage(dc)) {
   9662         dc->base.pc_next = pc + 2;
   9663         return;
   9664     }
   9665 
   9666     dc->pc_curr = pc;
   9667     insn = arm_lduw_code(env, &dc->base, pc, dc->sctlr_b);
   9668     is_16bit = thumb_insn_is_16bit(dc, dc->base.pc_next, insn);
   9669     pc += 2;
   9670     if (!is_16bit) {
   9671         uint32_t insn2 = arm_lduw_code(env, &dc->base, pc, dc->sctlr_b);
   9672         insn = insn << 16 | insn2;
   9673         pc += 2;
   9674     }
   9675     dc->base.pc_next = pc;
   9676     dc->insn = insn;
   9677 
   9678     if (dc->pstate_il) {
   9679         /*
   9680          * Illegal execution state. This has priority over BTI
   9681          * exceptions, but comes after instruction abort exceptions.
   9682          */
   9683         gen_exception_insn(dc, 0, EXCP_UDEF, syn_illegalstate());
   9684         return;
   9685     }
   9686 
   9687     if (dc->eci) {
   9688         /*
   9689          * For M-profile continuable instructions, ECI/ICI handling
   9690          * falls into these cases:
   9691          *  - interrupt-continuable instructions
   9692          *     These are the various load/store multiple insns (both
   9693          *     integer and fp). The ICI bits indicate the register
   9694          *     where the load/store can resume. We make the IMPDEF
   9695          *     choice to always do "instruction restart", ie ignore
   9696          *     the ICI value and always execute the ldm/stm from the
   9697          *     start. So all we need to do is zero PSR.ICI if the
   9698          *     insn executes.
   9699          *  - MVE instructions subject to beat-wise execution
   9700          *     Here the ECI bits indicate which beats have already been
   9701          *     executed, and we must honour this. Each insn of this
   9702          *     type will handle it correctly. We will update PSR.ECI
   9703          *     in the helper function for the insn (some ECI values
   9704          *     mean that the following insn also has been partially
   9705          *     executed).
   9706          *  - Special cases which don't advance ECI
   9707          *     The insns LE, LETP and BKPT leave the ECI/ICI state
   9708          *     bits untouched.
   9709          *  - all other insns (the common case)
   9710          *     Non-zero ECI/ICI means an INVSTATE UsageFault.
   9711          *     We place a rewind-marker here. Insns in the previous
   9712          *     three categories will set a flag in the DisasContext.
   9713          *     If the flag isn't set after we call disas_thumb_insn()
   9714          *     or disas_thumb2_insn() then we know we have a "some other
   9715          *     insn" case. We will rewind to the marker (ie throwing away
   9716          *     all the generated code) and instead emit "take exception".
   9717          */
   9718         insn_eci_rewind = tcg_last_op();
   9719         insn_eci_pc_save = dc->pc_save;
   9720     }
   9721 
   9722     if (dc->condexec_mask && !thumb_insn_is_unconditional(dc, insn)) {
   9723         uint32_t cond = dc->condexec_cond;
   9724 
   9725         /*
   9726          * Conditionally skip the insn. Note that both 0xe and 0xf mean
   9727          * "always"; 0xf is not "never".
   9728          */
   9729         if (cond < 0x0e) {
   9730             arm_skip_unless(dc, cond);
   9731         }
   9732     }
   9733 
   9734     if (is_16bit) {
   9735         disas_thumb_insn(dc, insn);
   9736     } else {
   9737         disas_thumb2_insn(dc, insn);
   9738     }
   9739 
   9740     /* Advance the Thumb condexec condition.  */
   9741     if (dc->condexec_mask) {
   9742         dc->condexec_cond = ((dc->condexec_cond & 0xe) |
   9743                              ((dc->condexec_mask >> 4) & 1));
   9744         dc->condexec_mask = (dc->condexec_mask << 1) & 0x1f;
   9745         if (dc->condexec_mask == 0) {
   9746             dc->condexec_cond = 0;
   9747         }
   9748     }
   9749 
   9750     if (dc->eci && !dc->eci_handled) {
   9751         /*
   9752          * Insn wasn't valid for ECI/ICI at all: undo what we
   9753          * just generated and instead emit an exception
   9754          */
   9755         tcg_remove_ops_after(insn_eci_rewind);
   9756         dc->pc_save = insn_eci_pc_save;
   9757         dc->condjmp = 0;
   9758         gen_exception_insn(dc, 0, EXCP_INVSTATE, syn_uncategorized());
   9759     }
   9760 
   9761     arm_post_translate_insn(dc);
   9762 
   9763     /* Thumb is a variable-length ISA.  Stop translation when the next insn
   9764      * will touch a new page.  This ensures that prefetch aborts occur at
   9765      * the right place.
   9766      *
   9767      * We want to stop the TB if the next insn starts in a new page,
   9768      * or if it spans between this page and the next. This means that
   9769      * if we're looking at the last halfword in the page we need to
   9770      * see if it's a 16-bit Thumb insn (which will fit in this TB)
   9771      * or a 32-bit Thumb insn (which won't).
   9772      * This is to avoid generating a silly TB with a single 16-bit insn
   9773      * in it at the end of this page (which would execute correctly
   9774      * but isn't very efficient).
   9775      */
   9776     if (dc->base.is_jmp == DISAS_NEXT
   9777         && (dc->base.pc_next - dc->page_start >= TARGET_PAGE_SIZE
   9778             || (dc->base.pc_next - dc->page_start >= TARGET_PAGE_SIZE - 3
   9779                 && insn_crosses_page(env, dc)))) {
   9780         dc->base.is_jmp = DISAS_TOO_MANY;
   9781     }
   9782 }
   9783 
   9784 static void arm_tr_tb_stop(DisasContextBase *dcbase, CPUState *cpu)
   9785 {
   9786     DisasContext *dc = container_of(dcbase, DisasContext, base);
   9787 
   9788     /* At this stage dc->condjmp will only be set when the skipped
   9789        instruction was a conditional branch or trap, and the PC has
   9790        already been written.  */
   9791     gen_set_condexec(dc);
   9792     if (dc->base.is_jmp == DISAS_BX_EXCRET) {
   9793         /* Exception return branches need some special case code at the
   9794          * end of the TB, which is complex enough that it has to
   9795          * handle the single-step vs not and the condition-failed
   9796          * insn codepath itself.
   9797          */
   9798         gen_bx_excret_final_code(dc);
   9799     } else if (unlikely(dc->ss_active)) {
   9800         /* Unconditional and "condition passed" instruction codepath. */
   9801         switch (dc->base.is_jmp) {
   9802         case DISAS_SWI:
   9803             gen_ss_advance(dc);
   9804             gen_exception(EXCP_SWI, syn_aa32_svc(dc->svc_imm, dc->thumb));
   9805             break;
   9806         case DISAS_HVC:
   9807             gen_ss_advance(dc);
   9808             gen_exception_el(EXCP_HVC, syn_aa32_hvc(dc->svc_imm), 2);
   9809             break;
   9810         case DISAS_SMC:
   9811             gen_ss_advance(dc);
   9812             gen_exception_el(EXCP_SMC, syn_aa32_smc(), 3);
   9813             break;
   9814         case DISAS_NEXT:
   9815         case DISAS_TOO_MANY:
   9816         case DISAS_UPDATE_EXIT:
   9817         case DISAS_UPDATE_NOCHAIN:
   9818             gen_update_pc(dc, curr_insn_len(dc));
   9819             /* fall through */
   9820         default:
   9821             /* FIXME: Single stepping a WFI insn will not halt the CPU. */
   9822             gen_singlestep_exception(dc);
   9823             break;
   9824         case DISAS_NORETURN:
   9825             break;
   9826         }
   9827     } else {
   9828         /* While branches must always occur at the end of an IT block,
   9829            there are a few other things that can cause us to terminate
   9830            the TB in the middle of an IT block:
   9831             - Exception generating instructions (bkpt, swi, undefined).
   9832             - Page boundaries.
   9833             - Hardware watchpoints.
   9834            Hardware breakpoints have already been handled and skip this code.
   9835          */
   9836         switch (dc->base.is_jmp) {
   9837         case DISAS_NEXT:
   9838         case DISAS_TOO_MANY:
   9839             gen_goto_tb(dc, 1, curr_insn_len(dc));
   9840             break;
   9841         case DISAS_UPDATE_NOCHAIN:
   9842             gen_update_pc(dc, curr_insn_len(dc));
   9843             /* fall through */
   9844         case DISAS_JUMP:
   9845             gen_goto_ptr();
   9846             break;
   9847         case DISAS_UPDATE_EXIT:
   9848             gen_update_pc(dc, curr_insn_len(dc));
   9849             /* fall through */
   9850         default:
   9851             /* indicate that the hash table must be used to find the next TB */
   9852             tcg_gen_exit_tb(NULL, 0);
   9853             break;
   9854         case DISAS_NORETURN:
   9855             /* nothing more to generate */
   9856             break;
   9857         case DISAS_WFI:
   9858             gen_helper_wfi(cpu_env, tcg_constant_i32(curr_insn_len(dc)));
   9859             /*
   9860              * The helper doesn't necessarily throw an exception, but we
   9861              * must go back to the main loop to check for interrupts anyway.
   9862              */
   9863             tcg_gen_exit_tb(NULL, 0);
   9864             break;
   9865         case DISAS_WFE:
   9866             gen_helper_wfe(cpu_env);
   9867             break;
   9868         case DISAS_YIELD:
   9869             gen_helper_yield(cpu_env);
   9870             break;
   9871         case DISAS_SWI:
   9872             gen_exception(EXCP_SWI, syn_aa32_svc(dc->svc_imm, dc->thumb));
   9873             break;
   9874         case DISAS_HVC:
   9875             gen_exception_el(EXCP_HVC, syn_aa32_hvc(dc->svc_imm), 2);
   9876             break;
   9877         case DISAS_SMC:
   9878             gen_exception_el(EXCP_SMC, syn_aa32_smc(), 3);
   9879             break;
   9880         }
   9881     }
   9882 
   9883     if (dc->condjmp) {
   9884         /* "Condition failed" instruction codepath for the branch/trap insn */
   9885         set_disas_label(dc, dc->condlabel);
   9886         gen_set_condexec(dc);
   9887         if (unlikely(dc->ss_active)) {
   9888             gen_update_pc(dc, curr_insn_len(dc));
   9889             gen_singlestep_exception(dc);
   9890         } else {
   9891             gen_goto_tb(dc, 1, curr_insn_len(dc));
   9892         }
   9893     }
   9894 }
   9895 
   9896 static void arm_tr_disas_log(const DisasContextBase *dcbase,
   9897                              CPUState *cpu, FILE *logfile)
   9898 {
   9899     DisasContext *dc = container_of(dcbase, DisasContext, base);
   9900 
   9901     fprintf(logfile, "IN: %s\n", lookup_symbol(dc->base.pc_first));
   9902     target_disas(logfile, cpu, dc->base.pc_first, dc->base.tb->size);
   9903 }
   9904 
   9905 static const TranslatorOps arm_translator_ops = {
   9906     .init_disas_context = arm_tr_init_disas_context,
   9907     .tb_start           = arm_tr_tb_start,
   9908     .insn_start         = arm_tr_insn_start,
   9909     .translate_insn     = arm_tr_translate_insn,
   9910     .tb_stop            = arm_tr_tb_stop,
   9911     .disas_log          = arm_tr_disas_log,
   9912 };
   9913 
   9914 static const TranslatorOps thumb_translator_ops = {
   9915     .init_disas_context = arm_tr_init_disas_context,
   9916     .tb_start           = arm_tr_tb_start,
   9917     .insn_start         = arm_tr_insn_start,
   9918     .translate_insn     = thumb_tr_translate_insn,
   9919     .tb_stop            = arm_tr_tb_stop,
   9920     .disas_log          = arm_tr_disas_log,
   9921 };
   9922 
   9923 /* generate intermediate code for basic block 'tb'.  */
   9924 void gen_intermediate_code(CPUState *cpu, TranslationBlock *tb, int max_insns,
   9925                            target_ulong pc, void *host_pc)
   9926 {
   9927     DisasContext dc = { };
   9928     const TranslatorOps *ops = &arm_translator_ops;
   9929     CPUARMTBFlags tb_flags = arm_tbflags_from_tb(tb);
   9930 
   9931     if (EX_TBFLAG_AM32(tb_flags, THUMB)) {
   9932         ops = &thumb_translator_ops;
   9933     }
   9934 #ifdef TARGET_AARCH64
   9935     if (EX_TBFLAG_ANY(tb_flags, AARCH64_STATE)) {
   9936         ops = &aarch64_translator_ops;
   9937     }
   9938 #endif
   9939 
   9940     translator_loop(cpu, tb, max_insns, pc, host_pc, ops, &dc.base);
   9941 }