qemu

FORK: QEMU emulator
git clone https://git.neptards.moe/neptards/qemu.git
Log | Files | Refs | Submodules | LICENSE

tcg-target.c.inc (102049B)


      1 /*
      2  * Tiny Code Generator for QEMU
      3  *
      4  * Copyright (c) 2008 Andrzej Zaborowski
      5  *
      6  * Permission is hereby granted, free of charge, to any person obtaining a copy
      7  * of this software and associated documentation files (the "Software"), to deal
      8  * in the Software without restriction, including without limitation the rights
      9  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
     10  * copies of the Software, and to permit persons to whom the Software is
     11  * furnished to do so, subject to the following conditions:
     12  *
     13  * The above copyright notice and this permission notice shall be included in
     14  * all copies or substantial portions of the Software.
     15  *
     16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
     17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
     18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
     19  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
     20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
     21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
     22  * THE SOFTWARE.
     23  */
     24 
     25 #include "elf.h"
     26 #include "../tcg-ldst.c.inc"
     27 #include "../tcg-pool.c.inc"
     28 
     29 int arm_arch = __ARM_ARCH;
     30 
     31 #ifndef use_idiv_instructions
     32 bool use_idiv_instructions;
     33 #endif
     34 #ifndef use_neon_instructions
     35 bool use_neon_instructions;
     36 #endif
     37 
     38 #ifdef CONFIG_DEBUG_TCG
     39 static const char * const tcg_target_reg_names[TCG_TARGET_NB_REGS] = {
     40     "%r0",  "%r1",  "%r2",  "%r3",  "%r4",  "%r5",  "%r6",  "%r7",
     41     "%r8",  "%r9",  "%r10", "%r11", "%r12", "%sp",  "%r14", "%pc",
     42     "%q0",  "%q1",  "%q2",  "%q3",  "%q4",  "%q5",  "%q6",  "%q7",
     43     "%q8",  "%q9",  "%q10", "%q11", "%q12", "%q13", "%q14", "%q15",
     44 };
     45 #endif
     46 
     47 static const int tcg_target_reg_alloc_order[] = {
     48     TCG_REG_R4,
     49     TCG_REG_R5,
     50     TCG_REG_R6,
     51     TCG_REG_R7,
     52     TCG_REG_R8,
     53     TCG_REG_R9,
     54     TCG_REG_R10,
     55     TCG_REG_R11,
     56     TCG_REG_R13,
     57     TCG_REG_R0,
     58     TCG_REG_R1,
     59     TCG_REG_R2,
     60     TCG_REG_R3,
     61     TCG_REG_R12,
     62     TCG_REG_R14,
     63 
     64     TCG_REG_Q0,
     65     TCG_REG_Q1,
     66     TCG_REG_Q2,
     67     TCG_REG_Q3,
     68     /* Q4 - Q7 are call-saved, and skipped. */
     69     TCG_REG_Q8,
     70     TCG_REG_Q9,
     71     TCG_REG_Q10,
     72     TCG_REG_Q11,
     73     TCG_REG_Q12,
     74     TCG_REG_Q13,
     75     TCG_REG_Q14,
     76     TCG_REG_Q15,
     77 };
     78 
     79 static const int tcg_target_call_iarg_regs[4] = {
     80     TCG_REG_R0, TCG_REG_R1, TCG_REG_R2, TCG_REG_R3
     81 };
     82 static const int tcg_target_call_oarg_regs[2] = {
     83     TCG_REG_R0, TCG_REG_R1
     84 };
     85 
     86 #define TCG_REG_TMP  TCG_REG_R12
     87 #define TCG_VEC_TMP  TCG_REG_Q15
     88 #ifndef CONFIG_SOFTMMU
     89 #define TCG_REG_GUEST_BASE  TCG_REG_R11
     90 #endif
     91 
     92 typedef enum {
     93     COND_EQ = 0x0,
     94     COND_NE = 0x1,
     95     COND_CS = 0x2,	/* Unsigned greater or equal */
     96     COND_CC = 0x3,	/* Unsigned less than */
     97     COND_MI = 0x4,	/* Negative */
     98     COND_PL = 0x5,	/* Zero or greater */
     99     COND_VS = 0x6,	/* Overflow */
    100     COND_VC = 0x7,	/* No overflow */
    101     COND_HI = 0x8,	/* Unsigned greater than */
    102     COND_LS = 0x9,	/* Unsigned less or equal */
    103     COND_GE = 0xa,
    104     COND_LT = 0xb,
    105     COND_GT = 0xc,
    106     COND_LE = 0xd,
    107     COND_AL = 0xe,
    108 } ARMCond;
    109 
    110 #define TO_CPSR (1 << 20)
    111 
    112 #define SHIFT_IMM_LSL(im)	(((im) << 7) | 0x00)
    113 #define SHIFT_IMM_LSR(im)	(((im) << 7) | 0x20)
    114 #define SHIFT_IMM_ASR(im)	(((im) << 7) | 0x40)
    115 #define SHIFT_IMM_ROR(im)	(((im) << 7) | 0x60)
    116 #define SHIFT_REG_LSL(rs)	(((rs) << 8) | 0x10)
    117 #define SHIFT_REG_LSR(rs)	(((rs) << 8) | 0x30)
    118 #define SHIFT_REG_ASR(rs)	(((rs) << 8) | 0x50)
    119 #define SHIFT_REG_ROR(rs)	(((rs) << 8) | 0x70)
    120 
    121 typedef enum {
    122     ARITH_AND = 0x0 << 21,
    123     ARITH_EOR = 0x1 << 21,
    124     ARITH_SUB = 0x2 << 21,
    125     ARITH_RSB = 0x3 << 21,
    126     ARITH_ADD = 0x4 << 21,
    127     ARITH_ADC = 0x5 << 21,
    128     ARITH_SBC = 0x6 << 21,
    129     ARITH_RSC = 0x7 << 21,
    130     ARITH_TST = 0x8 << 21 | TO_CPSR,
    131     ARITH_CMP = 0xa << 21 | TO_CPSR,
    132     ARITH_CMN = 0xb << 21 | TO_CPSR,
    133     ARITH_ORR = 0xc << 21,
    134     ARITH_MOV = 0xd << 21,
    135     ARITH_BIC = 0xe << 21,
    136     ARITH_MVN = 0xf << 21,
    137 
    138     INSN_CLZ       = 0x016f0f10,
    139     INSN_RBIT      = 0x06ff0f30,
    140 
    141     INSN_LDMIA     = 0x08b00000,
    142     INSN_STMDB     = 0x09200000,
    143 
    144     INSN_LDR_IMM   = 0x04100000,
    145     INSN_LDR_REG   = 0x06100000,
    146     INSN_STR_IMM   = 0x04000000,
    147     INSN_STR_REG   = 0x06000000,
    148 
    149     INSN_LDRH_IMM  = 0x005000b0,
    150     INSN_LDRH_REG  = 0x001000b0,
    151     INSN_LDRSH_IMM = 0x005000f0,
    152     INSN_LDRSH_REG = 0x001000f0,
    153     INSN_STRH_IMM  = 0x004000b0,
    154     INSN_STRH_REG  = 0x000000b0,
    155 
    156     INSN_LDRB_IMM  = 0x04500000,
    157     INSN_LDRB_REG  = 0x06500000,
    158     INSN_LDRSB_IMM = 0x005000d0,
    159     INSN_LDRSB_REG = 0x001000d0,
    160     INSN_STRB_IMM  = 0x04400000,
    161     INSN_STRB_REG  = 0x06400000,
    162 
    163     INSN_LDRD_IMM  = 0x004000d0,
    164     INSN_LDRD_REG  = 0x000000d0,
    165     INSN_STRD_IMM  = 0x004000f0,
    166     INSN_STRD_REG  = 0x000000f0,
    167 
    168     INSN_DMB_ISH   = 0xf57ff05b,
    169     INSN_DMB_MCR   = 0xee070fba,
    170 
    171     /* Architected nop introduced in v6k.  */
    172     /* ??? This is an MSR (imm) 0,0,0 insn.  Anyone know if this
    173        also Just So Happened to do nothing on pre-v6k so that we
    174        don't need to conditionalize it?  */
    175     INSN_NOP_v6k   = 0xe320f000,
    176     /* Otherwise the assembler uses mov r0,r0 */
    177     INSN_NOP_v4    = (COND_AL << 28) | ARITH_MOV,
    178 
    179     INSN_VADD      = 0xf2000800,
    180     INSN_VAND      = 0xf2000110,
    181     INSN_VBIC      = 0xf2100110,
    182     INSN_VEOR      = 0xf3000110,
    183     INSN_VORN      = 0xf2300110,
    184     INSN_VORR      = 0xf2200110,
    185     INSN_VSUB      = 0xf3000800,
    186     INSN_VMUL      = 0xf2000910,
    187     INSN_VQADD     = 0xf2000010,
    188     INSN_VQADD_U   = 0xf3000010,
    189     INSN_VQSUB     = 0xf2000210,
    190     INSN_VQSUB_U   = 0xf3000210,
    191     INSN_VMAX      = 0xf2000600,
    192     INSN_VMAX_U    = 0xf3000600,
    193     INSN_VMIN      = 0xf2000610,
    194     INSN_VMIN_U    = 0xf3000610,
    195 
    196     INSN_VABS      = 0xf3b10300,
    197     INSN_VMVN      = 0xf3b00580,
    198     INSN_VNEG      = 0xf3b10380,
    199 
    200     INSN_VCEQ0     = 0xf3b10100,
    201     INSN_VCGT0     = 0xf3b10000,
    202     INSN_VCGE0     = 0xf3b10080,
    203     INSN_VCLE0     = 0xf3b10180,
    204     INSN_VCLT0     = 0xf3b10200,
    205 
    206     INSN_VCEQ      = 0xf3000810,
    207     INSN_VCGE      = 0xf2000310,
    208     INSN_VCGT      = 0xf2000300,
    209     INSN_VCGE_U    = 0xf3000310,
    210     INSN_VCGT_U    = 0xf3000300,
    211 
    212     INSN_VSHLI     = 0xf2800510,  /* VSHL (immediate) */
    213     INSN_VSARI     = 0xf2800010,  /* VSHR.S */
    214     INSN_VSHRI     = 0xf3800010,  /* VSHR.U */
    215     INSN_VSLI      = 0xf3800510,
    216     INSN_VSHL_S    = 0xf2000400,  /* VSHL.S (register) */
    217     INSN_VSHL_U    = 0xf3000400,  /* VSHL.U (register) */
    218 
    219     INSN_VBSL      = 0xf3100110,
    220     INSN_VBIT      = 0xf3200110,
    221     INSN_VBIF      = 0xf3300110,
    222 
    223     INSN_VTST      = 0xf2000810,
    224 
    225     INSN_VDUP_G    = 0xee800b10,  /* VDUP (ARM core register) */
    226     INSN_VDUP_S    = 0xf3b00c00,  /* VDUP (scalar) */
    227     INSN_VLDR_D    = 0xed100b00,  /* VLDR.64 */
    228     INSN_VLD1      = 0xf4200000,  /* VLD1 (multiple single elements) */
    229     INSN_VLD1R     = 0xf4a00c00,  /* VLD1 (single element to all lanes) */
    230     INSN_VST1      = 0xf4000000,  /* VST1 (multiple single elements) */
    231     INSN_VMOVI     = 0xf2800010,  /* VMOV (immediate) */
    232 } ARMInsn;
    233 
    234 #define INSN_NOP   (use_armv7_instructions ? INSN_NOP_v6k : INSN_NOP_v4)
    235 
    236 static const uint8_t tcg_cond_to_arm_cond[] = {
    237     [TCG_COND_EQ] = COND_EQ,
    238     [TCG_COND_NE] = COND_NE,
    239     [TCG_COND_LT] = COND_LT,
    240     [TCG_COND_GE] = COND_GE,
    241     [TCG_COND_LE] = COND_LE,
    242     [TCG_COND_GT] = COND_GT,
    243     /* unsigned */
    244     [TCG_COND_LTU] = COND_CC,
    245     [TCG_COND_GEU] = COND_CS,
    246     [TCG_COND_LEU] = COND_LS,
    247     [TCG_COND_GTU] = COND_HI,
    248 };
    249 
    250 static int encode_imm(uint32_t imm);
    251 
    252 /* TCG private relocation type: add with pc+imm8 */
    253 #define R_ARM_PC8  11
    254 
    255 /* TCG private relocation type: vldr with imm8 << 2 */
    256 #define R_ARM_PC11 12
    257 
    258 static bool reloc_pc24(tcg_insn_unit *src_rw, const tcg_insn_unit *target)
    259 {
    260     const tcg_insn_unit *src_rx = tcg_splitwx_to_rx(src_rw);
    261     ptrdiff_t offset = (tcg_ptr_byte_diff(target, src_rx) - 8) >> 2;
    262 
    263     if (offset == sextract32(offset, 0, 24)) {
    264         *src_rw = deposit32(*src_rw, 0, 24, offset);
    265         return true;
    266     }
    267     return false;
    268 }
    269 
    270 static bool reloc_pc13(tcg_insn_unit *src_rw, const tcg_insn_unit *target)
    271 {
    272     const tcg_insn_unit *src_rx = tcg_splitwx_to_rx(src_rw);
    273     ptrdiff_t offset = tcg_ptr_byte_diff(target, src_rx) - 8;
    274 
    275     if (offset >= -0xfff && offset <= 0xfff) {
    276         tcg_insn_unit insn = *src_rw;
    277         bool u = (offset >= 0);
    278         if (!u) {
    279             offset = -offset;
    280         }
    281         insn = deposit32(insn, 23, 1, u);
    282         insn = deposit32(insn, 0, 12, offset);
    283         *src_rw = insn;
    284         return true;
    285     }
    286     return false;
    287 }
    288 
    289 static bool reloc_pc11(tcg_insn_unit *src_rw, const tcg_insn_unit *target)
    290 {
    291     const tcg_insn_unit *src_rx = tcg_splitwx_to_rx(src_rw);
    292     ptrdiff_t offset = (tcg_ptr_byte_diff(target, src_rx) - 8) / 4;
    293 
    294     if (offset >= -0xff && offset <= 0xff) {
    295         tcg_insn_unit insn = *src_rw;
    296         bool u = (offset >= 0);
    297         if (!u) {
    298             offset = -offset;
    299         }
    300         insn = deposit32(insn, 23, 1, u);
    301         insn = deposit32(insn, 0, 8, offset);
    302         *src_rw = insn;
    303         return true;
    304     }
    305     return false;
    306 }
    307 
    308 static bool reloc_pc8(tcg_insn_unit *src_rw, const tcg_insn_unit *target)
    309 {
    310     const tcg_insn_unit *src_rx = tcg_splitwx_to_rx(src_rw);
    311     ptrdiff_t offset = tcg_ptr_byte_diff(target, src_rx) - 8;
    312     int imm12 = encode_imm(offset);
    313 
    314     if (imm12 >= 0) {
    315         *src_rw = deposit32(*src_rw, 0, 12, imm12);
    316         return true;
    317     }
    318     return false;
    319 }
    320 
    321 static bool patch_reloc(tcg_insn_unit *code_ptr, int type,
    322                         intptr_t value, intptr_t addend)
    323 {
    324     tcg_debug_assert(addend == 0);
    325     switch (type) {
    326     case R_ARM_PC24:
    327         return reloc_pc24(code_ptr, (const tcg_insn_unit *)value);
    328     case R_ARM_PC13:
    329         return reloc_pc13(code_ptr, (const tcg_insn_unit *)value);
    330     case R_ARM_PC11:
    331         return reloc_pc11(code_ptr, (const tcg_insn_unit *)value);
    332     case R_ARM_PC8:
    333         return reloc_pc8(code_ptr, (const tcg_insn_unit *)value);
    334     default:
    335         g_assert_not_reached();
    336     }
    337 }
    338 
    339 #define TCG_CT_CONST_ARM  0x100
    340 #define TCG_CT_CONST_INV  0x200
    341 #define TCG_CT_CONST_NEG  0x400
    342 #define TCG_CT_CONST_ZERO 0x800
    343 #define TCG_CT_CONST_ORRI 0x1000
    344 #define TCG_CT_CONST_ANDI 0x2000
    345 
    346 #define ALL_GENERAL_REGS  0xffffu
    347 #define ALL_VECTOR_REGS   0xffff0000u
    348 
    349 /*
    350  * r0-r2 will be overwritten when reading the tlb entry (softmmu only)
    351  * and r0-r1 doing the byte swapping, so don't use these.
    352  * r3 is removed for softmmu to avoid clashes with helper arguments.
    353  */
    354 #ifdef CONFIG_SOFTMMU
    355 #define ALL_QLOAD_REGS \
    356     (ALL_GENERAL_REGS & ~((1 << TCG_REG_R0) | (1 << TCG_REG_R1) | \
    357                           (1 << TCG_REG_R2) | (1 << TCG_REG_R3) | \
    358                           (1 << TCG_REG_R14)))
    359 #define ALL_QSTORE_REGS \
    360     (ALL_GENERAL_REGS & ~((1 << TCG_REG_R0) | (1 << TCG_REG_R1) | \
    361                           (1 << TCG_REG_R2) | (1 << TCG_REG_R14) | \
    362                           ((TARGET_LONG_BITS == 64) << TCG_REG_R3)))
    363 #else
    364 #define ALL_QLOAD_REGS   ALL_GENERAL_REGS
    365 #define ALL_QSTORE_REGS \
    366     (ALL_GENERAL_REGS & ~((1 << TCG_REG_R0) | (1 << TCG_REG_R1)))
    367 #endif
    368 
    369 /*
    370  * ARM immediates for ALU instructions are made of an unsigned 8-bit
    371  * right-rotated by an even amount between 0 and 30.
    372  *
    373  * Return < 0 if @imm cannot be encoded, else the entire imm12 field.
    374  */
    375 static int encode_imm(uint32_t imm)
    376 {
    377     uint32_t rot, imm8;
    378 
    379     /* Simple case, no rotation required. */
    380     if ((imm & ~0xff) == 0) {
    381         return imm;
    382     }
    383 
    384     /* Next, try a simple even shift.  */
    385     rot = ctz32(imm) & ~1;
    386     imm8 = imm >> rot;
    387     rot = 32 - rot;
    388     if ((imm8 & ~0xff) == 0) {
    389         goto found;
    390     }
    391 
    392     /*
    393      * Finally, try harder with rotations.
    394      * The ctz test above will have taken care of rotates >= 8.
    395      */
    396     for (rot = 2; rot < 8; rot += 2) {
    397         imm8 = rol32(imm, rot);
    398         if ((imm8 & ~0xff) == 0) {
    399             goto found;
    400         }
    401     }
    402     /* Fail: imm cannot be encoded. */
    403     return -1;
    404 
    405  found:
    406     /* Note that rot is even, and we discard bit 0 by shifting by 7. */
    407     return rot << 7 | imm8;
    408 }
    409 
    410 static int encode_imm_nofail(uint32_t imm)
    411 {
    412     int ret = encode_imm(imm);
    413     tcg_debug_assert(ret >= 0);
    414     return ret;
    415 }
    416 
    417 static bool check_fit_imm(uint32_t imm)
    418 {
    419     return encode_imm(imm) >= 0;
    420 }
    421 
    422 /* Return true if v16 is a valid 16-bit shifted immediate.  */
    423 static bool is_shimm16(uint16_t v16, int *cmode, int *imm8)
    424 {
    425     if (v16 == (v16 & 0xff)) {
    426         *cmode = 0x8;
    427         *imm8 = v16 & 0xff;
    428         return true;
    429     } else if (v16 == (v16 & 0xff00)) {
    430         *cmode = 0xa;
    431         *imm8 = v16 >> 8;
    432         return true;
    433     }
    434     return false;
    435 }
    436 
    437 /* Return true if v32 is a valid 32-bit shifted immediate.  */
    438 static bool is_shimm32(uint32_t v32, int *cmode, int *imm8)
    439 {
    440     if (v32 == (v32 & 0xff)) {
    441         *cmode = 0x0;
    442         *imm8 = v32 & 0xff;
    443         return true;
    444     } else if (v32 == (v32 & 0xff00)) {
    445         *cmode = 0x2;
    446         *imm8 = (v32 >> 8) & 0xff;
    447         return true;
    448     } else if (v32 == (v32 & 0xff0000)) {
    449         *cmode = 0x4;
    450         *imm8 = (v32 >> 16) & 0xff;
    451         return true;
    452     } else if (v32 == (v32 & 0xff000000)) {
    453         *cmode = 0x6;
    454         *imm8 = v32 >> 24;
    455         return true;
    456     }
    457     return false;
    458 }
    459 
    460 /* Return true if v32 is a valid 32-bit shifting ones immediate.  */
    461 static bool is_soimm32(uint32_t v32, int *cmode, int *imm8)
    462 {
    463     if ((v32 & 0xffff00ff) == 0xff) {
    464         *cmode = 0xc;
    465         *imm8 = (v32 >> 8) & 0xff;
    466         return true;
    467     } else if ((v32 & 0xff00ffff) == 0xffff) {
    468         *cmode = 0xd;
    469         *imm8 = (v32 >> 16) & 0xff;
    470         return true;
    471     }
    472     return false;
    473 }
    474 
    475 /*
    476  * Return non-zero if v32 can be formed by MOVI+ORR.
    477  * Place the parameters for MOVI in (cmode, imm8).
    478  * Return the cmode for ORR; the imm8 can be had via extraction from v32.
    479  */
    480 static int is_shimm32_pair(uint32_t v32, int *cmode, int *imm8)
    481 {
    482     int i;
    483 
    484     for (i = 6; i > 0; i -= 2) {
    485         /* Mask out one byte we can add with ORR.  */
    486         uint32_t tmp = v32 & ~(0xffu << (i * 4));
    487         if (is_shimm32(tmp, cmode, imm8) ||
    488             is_soimm32(tmp, cmode, imm8)) {
    489             break;
    490         }
    491     }
    492     return i;
    493 }
    494 
    495 /* Return true if V is a valid 16-bit or 32-bit shifted immediate.  */
    496 static bool is_shimm1632(uint32_t v32, int *cmode, int *imm8)
    497 {
    498     if (v32 == deposit32(v32, 16, 16, v32)) {
    499         return is_shimm16(v32, cmode, imm8);
    500     } else {
    501         return is_shimm32(v32, cmode, imm8);
    502     }
    503 }
    504 
    505 /* Test if a constant matches the constraint.
    506  * TODO: define constraints for:
    507  *
    508  * ldr/str offset:   between -0xfff and 0xfff
    509  * ldrh/strh offset: between -0xff and 0xff
    510  * mov operand2:     values represented with x << (2 * y), x < 0x100
    511  * add, sub, eor...: ditto
    512  */
    513 static bool tcg_target_const_match(int64_t val, TCGType type, int ct)
    514 {
    515     if (ct & TCG_CT_CONST) {
    516         return 1;
    517     } else if ((ct & TCG_CT_CONST_ARM) && check_fit_imm(val)) {
    518         return 1;
    519     } else if ((ct & TCG_CT_CONST_INV) && check_fit_imm(~val)) {
    520         return 1;
    521     } else if ((ct & TCG_CT_CONST_NEG) && check_fit_imm(-val)) {
    522         return 1;
    523     } else if ((ct & TCG_CT_CONST_ZERO) && val == 0) {
    524         return 1;
    525     }
    526 
    527     switch (ct & (TCG_CT_CONST_ORRI | TCG_CT_CONST_ANDI)) {
    528     case 0:
    529         break;
    530     case TCG_CT_CONST_ANDI:
    531         val = ~val;
    532         /* fallthru */
    533     case TCG_CT_CONST_ORRI:
    534         if (val == deposit64(val, 32, 32, val)) {
    535             int cmode, imm8;
    536             return is_shimm1632(val, &cmode, &imm8);
    537         }
    538         break;
    539     default:
    540         /* Both bits should not be set for the same insn.  */
    541         g_assert_not_reached();
    542     }
    543 
    544     return 0;
    545 }
    546 
    547 static void tcg_out_b_imm(TCGContext *s, ARMCond cond, int32_t offset)
    548 {
    549     tcg_out32(s, (cond << 28) | 0x0a000000 |
    550                     (((offset - 8) >> 2) & 0x00ffffff));
    551 }
    552 
    553 static void tcg_out_bl_imm(TCGContext *s, ARMCond cond, int32_t offset)
    554 {
    555     tcg_out32(s, (cond << 28) | 0x0b000000 |
    556                     (((offset - 8) >> 2) & 0x00ffffff));
    557 }
    558 
    559 static void tcg_out_blx_reg(TCGContext *s, ARMCond cond, TCGReg rn)
    560 {
    561     tcg_out32(s, (cond << 28) | 0x012fff30 | rn);
    562 }
    563 
    564 static void tcg_out_blx_imm(TCGContext *s, int32_t offset)
    565 {
    566     tcg_out32(s, 0xfa000000 | ((offset & 2) << 23) |
    567                 (((offset - 8) >> 2) & 0x00ffffff));
    568 }
    569 
    570 static void tcg_out_dat_reg(TCGContext *s, ARMCond cond, ARMInsn opc,
    571                             TCGReg rd, TCGReg rn, TCGReg rm, int shift)
    572 {
    573     tcg_out32(s, (cond << 28) | (0 << 25) | opc |
    574                     (rn << 16) | (rd << 12) | shift | rm);
    575 }
    576 
    577 static void tcg_out_mov_reg(TCGContext *s, ARMCond cond, TCGReg rd, TCGReg rm)
    578 {
    579     /* Simple reg-reg move, optimising out the 'do nothing' case */
    580     if (rd != rm) {
    581         tcg_out_dat_reg(s, cond, ARITH_MOV, rd, 0, rm, SHIFT_IMM_LSL(0));
    582     }
    583 }
    584 
    585 static void tcg_out_bx_reg(TCGContext *s, ARMCond cond, TCGReg rn)
    586 {
    587     tcg_out32(s, (cond << 28) | 0x012fff10 | rn);
    588 }
    589 
    590 static void tcg_out_b_reg(TCGContext *s, ARMCond cond, TCGReg rn)
    591 {
    592     /*
    593      * Unless the C portion of QEMU is compiled as thumb, we don't need
    594      * true BX semantics; merely a branch to an address held in a register.
    595      */
    596     tcg_out_bx_reg(s, cond, rn);
    597 }
    598 
    599 static void tcg_out_dat_imm(TCGContext *s, ARMCond cond, ARMInsn opc,
    600                             TCGReg rd, TCGReg rn, int im)
    601 {
    602     tcg_out32(s, (cond << 28) | (1 << 25) | opc |
    603                     (rn << 16) | (rd << 12) | im);
    604 }
    605 
    606 static void tcg_out_ldstm(TCGContext *s, ARMCond cond, ARMInsn opc,
    607                           TCGReg rn, uint16_t mask)
    608 {
    609     tcg_out32(s, (cond << 28) | opc | (rn << 16) | mask);
    610 }
    611 
    612 /* Note that this routine is used for both LDR and LDRH formats, so we do
    613    not wish to include an immediate shift at this point.  */
    614 static void tcg_out_memop_r(TCGContext *s, ARMCond cond, ARMInsn opc, TCGReg rt,
    615                             TCGReg rn, TCGReg rm, bool u, bool p, bool w)
    616 {
    617     tcg_out32(s, (cond << 28) | opc | (u << 23) | (p << 24)
    618               | (w << 21) | (rn << 16) | (rt << 12) | rm);
    619 }
    620 
    621 static void tcg_out_memop_8(TCGContext *s, ARMCond cond, ARMInsn opc, TCGReg rt,
    622                             TCGReg rn, int imm8, bool p, bool w)
    623 {
    624     bool u = 1;
    625     if (imm8 < 0) {
    626         imm8 = -imm8;
    627         u = 0;
    628     }
    629     tcg_out32(s, (cond << 28) | opc | (u << 23) | (p << 24) | (w << 21) |
    630               (rn << 16) | (rt << 12) | ((imm8 & 0xf0) << 4) | (imm8 & 0xf));
    631 }
    632 
    633 static void tcg_out_memop_12(TCGContext *s, ARMCond cond, ARMInsn opc,
    634                              TCGReg rt, TCGReg rn, int imm12, bool p, bool w)
    635 {
    636     bool u = 1;
    637     if (imm12 < 0) {
    638         imm12 = -imm12;
    639         u = 0;
    640     }
    641     tcg_out32(s, (cond << 28) | opc | (u << 23) | (p << 24) | (w << 21) |
    642               (rn << 16) | (rt << 12) | imm12);
    643 }
    644 
    645 static void tcg_out_ld32_12(TCGContext *s, ARMCond cond, TCGReg rt,
    646                             TCGReg rn, int imm12)
    647 {
    648     tcg_out_memop_12(s, cond, INSN_LDR_IMM, rt, rn, imm12, 1, 0);
    649 }
    650 
    651 static void tcg_out_st32_12(TCGContext *s, ARMCond cond, TCGReg rt,
    652                             TCGReg rn, int imm12)
    653 {
    654     tcg_out_memop_12(s, cond, INSN_STR_IMM, rt, rn, imm12, 1, 0);
    655 }
    656 
    657 static void tcg_out_ld32_r(TCGContext *s, ARMCond cond, TCGReg rt,
    658                            TCGReg rn, TCGReg rm)
    659 {
    660     tcg_out_memop_r(s, cond, INSN_LDR_REG, rt, rn, rm, 1, 1, 0);
    661 }
    662 
    663 static void tcg_out_st32_r(TCGContext *s, ARMCond cond, TCGReg rt,
    664                            TCGReg rn, TCGReg rm)
    665 {
    666     tcg_out_memop_r(s, cond, INSN_STR_REG, rt, rn, rm, 1, 1, 0);
    667 }
    668 
    669 static void tcg_out_ldrd_8(TCGContext *s, ARMCond cond, TCGReg rt,
    670                            TCGReg rn, int imm8)
    671 {
    672     tcg_out_memop_8(s, cond, INSN_LDRD_IMM, rt, rn, imm8, 1, 0);
    673 }
    674 
    675 static void tcg_out_ldrd_r(TCGContext *s, ARMCond cond, TCGReg rt,
    676                            TCGReg rn, TCGReg rm)
    677 {
    678     tcg_out_memop_r(s, cond, INSN_LDRD_REG, rt, rn, rm, 1, 1, 0);
    679 }
    680 
    681 static void __attribute__((unused))
    682 tcg_out_ldrd_rwb(TCGContext *s, ARMCond cond, TCGReg rt, TCGReg rn, TCGReg rm)
    683 {
    684     tcg_out_memop_r(s, cond, INSN_LDRD_REG, rt, rn, rm, 1, 1, 1);
    685 }
    686 
    687 static void tcg_out_strd_8(TCGContext *s, ARMCond cond, TCGReg rt,
    688                            TCGReg rn, int imm8)
    689 {
    690     tcg_out_memop_8(s, cond, INSN_STRD_IMM, rt, rn, imm8, 1, 0);
    691 }
    692 
    693 static void tcg_out_strd_r(TCGContext *s, ARMCond cond, TCGReg rt,
    694                            TCGReg rn, TCGReg rm)
    695 {
    696     tcg_out_memop_r(s, cond, INSN_STRD_REG, rt, rn, rm, 1, 1, 0);
    697 }
    698 
    699 /* Register pre-increment with base writeback.  */
    700 static void tcg_out_ld32_rwb(TCGContext *s, ARMCond cond, TCGReg rt,
    701                              TCGReg rn, TCGReg rm)
    702 {
    703     tcg_out_memop_r(s, cond, INSN_LDR_REG, rt, rn, rm, 1, 1, 1);
    704 }
    705 
    706 static void tcg_out_st32_rwb(TCGContext *s, ARMCond cond, TCGReg rt,
    707                              TCGReg rn, TCGReg rm)
    708 {
    709     tcg_out_memop_r(s, cond, INSN_STR_REG, rt, rn, rm, 1, 1, 1);
    710 }
    711 
    712 static void tcg_out_ld16u_8(TCGContext *s, ARMCond cond, TCGReg rt,
    713                             TCGReg rn, int imm8)
    714 {
    715     tcg_out_memop_8(s, cond, INSN_LDRH_IMM, rt, rn, imm8, 1, 0);
    716 }
    717 
    718 static void tcg_out_st16_8(TCGContext *s, ARMCond cond, TCGReg rt,
    719                            TCGReg rn, int imm8)
    720 {
    721     tcg_out_memop_8(s, cond, INSN_STRH_IMM, rt, rn, imm8, 1, 0);
    722 }
    723 
    724 static void tcg_out_ld16u_r(TCGContext *s, ARMCond cond, TCGReg rt,
    725                             TCGReg rn, TCGReg rm)
    726 {
    727     tcg_out_memop_r(s, cond, INSN_LDRH_REG, rt, rn, rm, 1, 1, 0);
    728 }
    729 
    730 static void tcg_out_st16_r(TCGContext *s, ARMCond cond, TCGReg rt,
    731                            TCGReg rn, TCGReg rm)
    732 {
    733     tcg_out_memop_r(s, cond, INSN_STRH_REG, rt, rn, rm, 1, 1, 0);
    734 }
    735 
    736 static void tcg_out_ld16s_8(TCGContext *s, ARMCond cond, TCGReg rt,
    737                             TCGReg rn, int imm8)
    738 {
    739     tcg_out_memop_8(s, cond, INSN_LDRSH_IMM, rt, rn, imm8, 1, 0);
    740 }
    741 
    742 static void tcg_out_ld16s_r(TCGContext *s, ARMCond cond, TCGReg rt,
    743                             TCGReg rn, TCGReg rm)
    744 {
    745     tcg_out_memop_r(s, cond, INSN_LDRSH_REG, rt, rn, rm, 1, 1, 0);
    746 }
    747 
    748 static void tcg_out_ld8_12(TCGContext *s, ARMCond cond, TCGReg rt,
    749                            TCGReg rn, int imm12)
    750 {
    751     tcg_out_memop_12(s, cond, INSN_LDRB_IMM, rt, rn, imm12, 1, 0);
    752 }
    753 
    754 static void tcg_out_st8_12(TCGContext *s, ARMCond cond, TCGReg rt,
    755                            TCGReg rn, int imm12)
    756 {
    757     tcg_out_memop_12(s, cond, INSN_STRB_IMM, rt, rn, imm12, 1, 0);
    758 }
    759 
    760 static void tcg_out_ld8_r(TCGContext *s, ARMCond cond, TCGReg rt,
    761                           TCGReg rn, TCGReg rm)
    762 {
    763     tcg_out_memop_r(s, cond, INSN_LDRB_REG, rt, rn, rm, 1, 1, 0);
    764 }
    765 
    766 static void tcg_out_st8_r(TCGContext *s, ARMCond cond, TCGReg rt,
    767                           TCGReg rn, TCGReg rm)
    768 {
    769     tcg_out_memop_r(s, cond, INSN_STRB_REG, rt, rn, rm, 1, 1, 0);
    770 }
    771 
    772 static void tcg_out_ld8s_8(TCGContext *s, ARMCond cond, TCGReg rt,
    773                            TCGReg rn, int imm8)
    774 {
    775     tcg_out_memop_8(s, cond, INSN_LDRSB_IMM, rt, rn, imm8, 1, 0);
    776 }
    777 
    778 static void tcg_out_ld8s_r(TCGContext *s, ARMCond cond, TCGReg rt,
    779                            TCGReg rn, TCGReg rm)
    780 {
    781     tcg_out_memop_r(s, cond, INSN_LDRSB_REG, rt, rn, rm, 1, 1, 0);
    782 }
    783 
    784 static void tcg_out_movi_pool(TCGContext *s, ARMCond cond,
    785                               TCGReg rd, uint32_t arg)
    786 {
    787     new_pool_label(s, arg, R_ARM_PC13, s->code_ptr, 0);
    788     tcg_out_ld32_12(s, cond, rd, TCG_REG_PC, 0);
    789 }
    790 
    791 static void tcg_out_movi32(TCGContext *s, ARMCond cond,
    792                            TCGReg rd, uint32_t arg)
    793 {
    794     int imm12, diff, opc, sh1, sh2;
    795     uint32_t tt0, tt1, tt2;
    796 
    797     /* Check a single MOV/MVN before anything else.  */
    798     imm12 = encode_imm(arg);
    799     if (imm12 >= 0) {
    800         tcg_out_dat_imm(s, cond, ARITH_MOV, rd, 0, imm12);
    801         return;
    802     }
    803     imm12 = encode_imm(~arg);
    804     if (imm12 >= 0) {
    805         tcg_out_dat_imm(s, cond, ARITH_MVN, rd, 0, imm12);
    806         return;
    807     }
    808 
    809     /* Check for a pc-relative address.  This will usually be the TB,
    810        or within the TB, which is immediately before the code block.  */
    811     diff = tcg_pcrel_diff(s, (void *)arg) - 8;
    812     if (diff >= 0) {
    813         imm12 = encode_imm(diff);
    814         if (imm12 >= 0) {
    815             tcg_out_dat_imm(s, cond, ARITH_ADD, rd, TCG_REG_PC, imm12);
    816             return;
    817         }
    818     } else {
    819         imm12 = encode_imm(-diff);
    820         if (imm12 >= 0) {
    821             tcg_out_dat_imm(s, cond, ARITH_SUB, rd, TCG_REG_PC, imm12);
    822             return;
    823         }
    824     }
    825 
    826     /* Use movw + movt.  */
    827     if (use_armv7_instructions) {
    828         /* movw */
    829         tcg_out32(s, (cond << 28) | 0x03000000 | (rd << 12)
    830                   | ((arg << 4) & 0x000f0000) | (arg & 0xfff));
    831         if (arg & 0xffff0000) {
    832             /* movt */
    833             tcg_out32(s, (cond << 28) | 0x03400000 | (rd << 12)
    834                       | ((arg >> 12) & 0x000f0000) | ((arg >> 16) & 0xfff));
    835         }
    836         return;
    837     }
    838 
    839     /* Look for sequences of two insns.  If we have lots of 1's, we can
    840        shorten the sequence by beginning with mvn and then clearing
    841        higher bits with eor.  */
    842     tt0 = arg;
    843     opc = ARITH_MOV;
    844     if (ctpop32(arg) > 16) {
    845         tt0 = ~arg;
    846         opc = ARITH_MVN;
    847     }
    848     sh1 = ctz32(tt0) & ~1;
    849     tt1 = tt0 & ~(0xff << sh1);
    850     sh2 = ctz32(tt1) & ~1;
    851     tt2 = tt1 & ~(0xff << sh2);
    852     if (tt2 == 0) {
    853         int rot;
    854 
    855         rot = ((32 - sh1) << 7) & 0xf00;
    856         tcg_out_dat_imm(s, cond, opc, rd,  0, ((tt0 >> sh1) & 0xff) | rot);
    857         rot = ((32 - sh2) << 7) & 0xf00;
    858         tcg_out_dat_imm(s, cond, ARITH_EOR, rd, rd,
    859                         ((tt0 >> sh2) & 0xff) | rot);
    860         return;
    861     }
    862 
    863     /* Otherwise, drop it into the constant pool.  */
    864     tcg_out_movi_pool(s, cond, rd, arg);
    865 }
    866 
    867 /*
    868  * Emit either the reg,imm or reg,reg form of a data-processing insn.
    869  * rhs must satisfy the "rI" constraint.
    870  */
    871 static void tcg_out_dat_rI(TCGContext *s, ARMCond cond, ARMInsn opc,
    872                            TCGReg dst, TCGReg lhs, TCGArg rhs, int rhs_is_const)
    873 {
    874     if (rhs_is_const) {
    875         tcg_out_dat_imm(s, cond, opc, dst, lhs, encode_imm_nofail(rhs));
    876     } else {
    877         tcg_out_dat_reg(s, cond, opc, dst, lhs, rhs, SHIFT_IMM_LSL(0));
    878     }
    879 }
    880 
    881 /*
    882  * Emit either the reg,imm or reg,reg form of a data-processing insn.
    883  * rhs must satisfy the "rIK" constraint.
    884  */
    885 static void tcg_out_dat_rIK(TCGContext *s, ARMCond cond, ARMInsn opc,
    886                             ARMInsn opinv, TCGReg dst, TCGReg lhs, TCGArg rhs,
    887                             bool rhs_is_const)
    888 {
    889     if (rhs_is_const) {
    890         int imm12 = encode_imm(rhs);
    891         if (imm12 < 0) {
    892             imm12 = encode_imm_nofail(~rhs);
    893             opc = opinv;
    894         }
    895         tcg_out_dat_imm(s, cond, opc, dst, lhs, imm12);
    896     } else {
    897         tcg_out_dat_reg(s, cond, opc, dst, lhs, rhs, SHIFT_IMM_LSL(0));
    898     }
    899 }
    900 
    901 static void tcg_out_dat_rIN(TCGContext *s, ARMCond cond, ARMInsn opc,
    902                             ARMInsn opneg, TCGReg dst, TCGReg lhs, TCGArg rhs,
    903                             bool rhs_is_const)
    904 {
    905     /* Emit either the reg,imm or reg,reg form of a data-processing insn.
    906      * rhs must satisfy the "rIN" constraint.
    907      */
    908     if (rhs_is_const) {
    909         int imm12 = encode_imm(rhs);
    910         if (imm12 < 0) {
    911             imm12 = encode_imm_nofail(-rhs);
    912             opc = opneg;
    913         }
    914         tcg_out_dat_imm(s, cond, opc, dst, lhs, imm12);
    915     } else {
    916         tcg_out_dat_reg(s, cond, opc, dst, lhs, rhs, SHIFT_IMM_LSL(0));
    917     }
    918 }
    919 
    920 static void tcg_out_mul32(TCGContext *s, ARMCond cond, TCGReg rd,
    921                           TCGReg rn, TCGReg rm)
    922 {
    923     /* mul */
    924     tcg_out32(s, (cond << 28) | 0x90 | (rd << 16) | (rm << 8) | rn);
    925 }
    926 
    927 static void tcg_out_umull32(TCGContext *s, ARMCond cond, TCGReg rd0,
    928                             TCGReg rd1, TCGReg rn, TCGReg rm)
    929 {
    930     /* umull */
    931     tcg_out32(s, (cond << 28) | 0x00800090 |
    932               (rd1 << 16) | (rd0 << 12) | (rm << 8) | rn);
    933 }
    934 
    935 static void tcg_out_smull32(TCGContext *s, ARMCond cond, TCGReg rd0,
    936                             TCGReg rd1, TCGReg rn, TCGReg rm)
    937 {
    938     /* smull */
    939     tcg_out32(s, (cond << 28) | 0x00c00090 |
    940               (rd1 << 16) | (rd0 << 12) | (rm << 8) | rn);
    941 }
    942 
    943 static void tcg_out_sdiv(TCGContext *s, ARMCond cond,
    944                          TCGReg rd, TCGReg rn, TCGReg rm)
    945 {
    946     tcg_out32(s, 0x0710f010 | (cond << 28) | (rd << 16) | rn | (rm << 8));
    947 }
    948 
    949 static void tcg_out_udiv(TCGContext *s, ARMCond cond,
    950                          TCGReg rd, TCGReg rn, TCGReg rm)
    951 {
    952     tcg_out32(s, 0x0730f010 | (cond << 28) | (rd << 16) | rn | (rm << 8));
    953 }
    954 
    955 static void tcg_out_ext8s(TCGContext *s, ARMCond cond, TCGReg rd, TCGReg rn)
    956 {
    957     /* sxtb */
    958     tcg_out32(s, 0x06af0070 | (cond << 28) | (rd << 12) | rn);
    959 }
    960 
    961 static void __attribute__((unused))
    962 tcg_out_ext8u(TCGContext *s, ARMCond cond, TCGReg rd, TCGReg rn)
    963 {
    964     tcg_out_dat_imm(s, cond, ARITH_AND, rd, rn, 0xff);
    965 }
    966 
    967 static void tcg_out_ext16s(TCGContext *s, ARMCond cond, TCGReg rd, TCGReg rn)
    968 {
    969     /* sxth */
    970     tcg_out32(s, 0x06bf0070 | (cond << 28) | (rd << 12) | rn);
    971 }
    972 
    973 static void tcg_out_ext16u(TCGContext *s, ARMCond cond, TCGReg rd, TCGReg rn)
    974 {
    975     /* uxth */
    976     tcg_out32(s, 0x06ff0070 | (cond << 28) | (rd << 12) | rn);
    977 }
    978 
    979 static void tcg_out_bswap16(TCGContext *s, ARMCond cond,
    980                             TCGReg rd, TCGReg rn, int flags)
    981 {
    982     if (flags & TCG_BSWAP_OS) {
    983         /* revsh */
    984         tcg_out32(s, 0x06ff0fb0 | (cond << 28) | (rd << 12) | rn);
    985         return;
    986     }
    987 
    988     /* rev16 */
    989     tcg_out32(s, 0x06bf0fb0 | (cond << 28) | (rd << 12) | rn);
    990     if ((flags & (TCG_BSWAP_IZ | TCG_BSWAP_OZ)) == TCG_BSWAP_OZ) {
    991         /* uxth */
    992         tcg_out32(s, 0x06ff0070 | (cond << 28) | (rd << 12) | rd);
    993     }
    994 }
    995 
    996 static void tcg_out_bswap32(TCGContext *s, ARMCond cond, TCGReg rd, TCGReg rn)
    997 {
    998     /* rev */
    999     tcg_out32(s, 0x06bf0f30 | (cond << 28) | (rd << 12) | rn);
   1000 }
   1001 
   1002 static void tcg_out_deposit(TCGContext *s, ARMCond cond, TCGReg rd,
   1003                             TCGArg a1, int ofs, int len, bool const_a1)
   1004 {
   1005     if (const_a1) {
   1006         /* bfi becomes bfc with rn == 15.  */
   1007         a1 = 15;
   1008     }
   1009     /* bfi/bfc */
   1010     tcg_out32(s, 0x07c00010 | (cond << 28) | (rd << 12) | a1
   1011               | (ofs << 7) | ((ofs + len - 1) << 16));
   1012 }
   1013 
   1014 static void tcg_out_extract(TCGContext *s, ARMCond cond, TCGReg rd,
   1015                             TCGReg rn, int ofs, int len)
   1016 {
   1017     /* ubfx */
   1018     tcg_out32(s, 0x07e00050 | (cond << 28) | (rd << 12) | rn
   1019               | (ofs << 7) | ((len - 1) << 16));
   1020 }
   1021 
   1022 static void tcg_out_sextract(TCGContext *s, ARMCond cond, TCGReg rd,
   1023                              TCGReg rn, int ofs, int len)
   1024 {
   1025     /* sbfx */
   1026     tcg_out32(s, 0x07a00050 | (cond << 28) | (rd << 12) | rn
   1027               | (ofs << 7) | ((len - 1) << 16));
   1028 }
   1029 
   1030 static void tcg_out_ld32u(TCGContext *s, ARMCond cond,
   1031                           TCGReg rd, TCGReg rn, int32_t offset)
   1032 {
   1033     if (offset > 0xfff || offset < -0xfff) {
   1034         tcg_out_movi32(s, cond, TCG_REG_TMP, offset);
   1035         tcg_out_ld32_r(s, cond, rd, rn, TCG_REG_TMP);
   1036     } else
   1037         tcg_out_ld32_12(s, cond, rd, rn, offset);
   1038 }
   1039 
   1040 static void tcg_out_st32(TCGContext *s, ARMCond cond,
   1041                          TCGReg rd, TCGReg rn, int32_t offset)
   1042 {
   1043     if (offset > 0xfff || offset < -0xfff) {
   1044         tcg_out_movi32(s, cond, TCG_REG_TMP, offset);
   1045         tcg_out_st32_r(s, cond, rd, rn, TCG_REG_TMP);
   1046     } else
   1047         tcg_out_st32_12(s, cond, rd, rn, offset);
   1048 }
   1049 
   1050 static void tcg_out_ld16u(TCGContext *s, ARMCond cond,
   1051                           TCGReg rd, TCGReg rn, int32_t offset)
   1052 {
   1053     if (offset > 0xff || offset < -0xff) {
   1054         tcg_out_movi32(s, cond, TCG_REG_TMP, offset);
   1055         tcg_out_ld16u_r(s, cond, rd, rn, TCG_REG_TMP);
   1056     } else
   1057         tcg_out_ld16u_8(s, cond, rd, rn, offset);
   1058 }
   1059 
   1060 static void tcg_out_ld16s(TCGContext *s, ARMCond cond,
   1061                           TCGReg rd, TCGReg rn, int32_t offset)
   1062 {
   1063     if (offset > 0xff || offset < -0xff) {
   1064         tcg_out_movi32(s, cond, TCG_REG_TMP, offset);
   1065         tcg_out_ld16s_r(s, cond, rd, rn, TCG_REG_TMP);
   1066     } else
   1067         tcg_out_ld16s_8(s, cond, rd, rn, offset);
   1068 }
   1069 
   1070 static void tcg_out_st16(TCGContext *s, ARMCond cond,
   1071                          TCGReg rd, TCGReg rn, int32_t offset)
   1072 {
   1073     if (offset > 0xff || offset < -0xff) {
   1074         tcg_out_movi32(s, cond, TCG_REG_TMP, offset);
   1075         tcg_out_st16_r(s, cond, rd, rn, TCG_REG_TMP);
   1076     } else
   1077         tcg_out_st16_8(s, cond, rd, rn, offset);
   1078 }
   1079 
   1080 static void tcg_out_ld8u(TCGContext *s, ARMCond cond,
   1081                          TCGReg rd, TCGReg rn, int32_t offset)
   1082 {
   1083     if (offset > 0xfff || offset < -0xfff) {
   1084         tcg_out_movi32(s, cond, TCG_REG_TMP, offset);
   1085         tcg_out_ld8_r(s, cond, rd, rn, TCG_REG_TMP);
   1086     } else
   1087         tcg_out_ld8_12(s, cond, rd, rn, offset);
   1088 }
   1089 
   1090 static void tcg_out_ld8s(TCGContext *s, ARMCond cond,
   1091                          TCGReg rd, TCGReg rn, int32_t offset)
   1092 {
   1093     if (offset > 0xff || offset < -0xff) {
   1094         tcg_out_movi32(s, cond, TCG_REG_TMP, offset);
   1095         tcg_out_ld8s_r(s, cond, rd, rn, TCG_REG_TMP);
   1096     } else
   1097         tcg_out_ld8s_8(s, cond, rd, rn, offset);
   1098 }
   1099 
   1100 static void tcg_out_st8(TCGContext *s, ARMCond cond,
   1101                         TCGReg rd, TCGReg rn, int32_t offset)
   1102 {
   1103     if (offset > 0xfff || offset < -0xfff) {
   1104         tcg_out_movi32(s, cond, TCG_REG_TMP, offset);
   1105         tcg_out_st8_r(s, cond, rd, rn, TCG_REG_TMP);
   1106     } else
   1107         tcg_out_st8_12(s, cond, rd, rn, offset);
   1108 }
   1109 
   1110 /*
   1111  * The _goto case is normally between TBs within the same code buffer, and
   1112  * with the code buffer limited to 16MB we wouldn't need the long case.
   1113  * But we also use it for the tail-call to the qemu_ld/st helpers, which does.
   1114  */
   1115 static void tcg_out_goto(TCGContext *s, ARMCond cond, const tcg_insn_unit *addr)
   1116 {
   1117     intptr_t addri = (intptr_t)addr;
   1118     ptrdiff_t disp = tcg_pcrel_diff(s, addr);
   1119     bool arm_mode = !(addri & 1);
   1120 
   1121     if (arm_mode && disp - 8 < 0x01fffffd && disp - 8 > -0x01fffffd) {
   1122         tcg_out_b_imm(s, cond, disp);
   1123         return;
   1124     }
   1125 
   1126     /* LDR is interworking from v5t. */
   1127     tcg_out_movi_pool(s, cond, TCG_REG_PC, addri);
   1128 }
   1129 
   1130 /*
   1131  * The call case is mostly used for helpers - so it's not unreasonable
   1132  * for them to be beyond branch range.
   1133  */
   1134 static void tcg_out_call(TCGContext *s, const tcg_insn_unit *addr)
   1135 {
   1136     intptr_t addri = (intptr_t)addr;
   1137     ptrdiff_t disp = tcg_pcrel_diff(s, addr);
   1138     bool arm_mode = !(addri & 1);
   1139 
   1140     if (disp - 8 < 0x02000000 && disp - 8 >= -0x02000000) {
   1141         if (arm_mode) {
   1142             tcg_out_bl_imm(s, COND_AL, disp);
   1143         } else {
   1144             tcg_out_blx_imm(s, disp);
   1145         }
   1146         return;
   1147     }
   1148 
   1149     tcg_out_movi32(s, COND_AL, TCG_REG_TMP, addri);
   1150     tcg_out_blx_reg(s, COND_AL, TCG_REG_TMP);
   1151 }
   1152 
   1153 static void tcg_out_goto_label(TCGContext *s, ARMCond cond, TCGLabel *l)
   1154 {
   1155     if (l->has_value) {
   1156         tcg_out_goto(s, cond, l->u.value_ptr);
   1157     } else {
   1158         tcg_out_reloc(s, s->code_ptr, R_ARM_PC24, l, 0);
   1159         tcg_out_b_imm(s, cond, 0);
   1160     }
   1161 }
   1162 
   1163 static void tcg_out_mb(TCGContext *s, TCGArg a0)
   1164 {
   1165     if (use_armv7_instructions) {
   1166         tcg_out32(s, INSN_DMB_ISH);
   1167     } else {
   1168         tcg_out32(s, INSN_DMB_MCR);
   1169     }
   1170 }
   1171 
   1172 static TCGCond tcg_out_cmp2(TCGContext *s, const TCGArg *args,
   1173                             const int *const_args)
   1174 {
   1175     TCGReg al = args[0];
   1176     TCGReg ah = args[1];
   1177     TCGArg bl = args[2];
   1178     TCGArg bh = args[3];
   1179     TCGCond cond = args[4];
   1180     int const_bl = const_args[2];
   1181     int const_bh = const_args[3];
   1182 
   1183     switch (cond) {
   1184     case TCG_COND_EQ:
   1185     case TCG_COND_NE:
   1186     case TCG_COND_LTU:
   1187     case TCG_COND_LEU:
   1188     case TCG_COND_GTU:
   1189     case TCG_COND_GEU:
   1190         /* We perform a conditional comparision.  If the high half is
   1191            equal, then overwrite the flags with the comparison of the
   1192            low half.  The resulting flags cover the whole.  */
   1193         tcg_out_dat_rI(s, COND_AL, ARITH_CMP, 0, ah, bh, const_bh);
   1194         tcg_out_dat_rI(s, COND_EQ, ARITH_CMP, 0, al, bl, const_bl);
   1195         return cond;
   1196 
   1197     case TCG_COND_LT:
   1198     case TCG_COND_GE:
   1199         /* We perform a double-word subtraction and examine the result.
   1200            We do not actually need the result of the subtract, so the
   1201            low part "subtract" is a compare.  For the high half we have
   1202            no choice but to compute into a temporary.  */
   1203         tcg_out_dat_rI(s, COND_AL, ARITH_CMP, 0, al, bl, const_bl);
   1204         tcg_out_dat_rI(s, COND_AL, ARITH_SBC | TO_CPSR,
   1205                        TCG_REG_TMP, ah, bh, const_bh);
   1206         return cond;
   1207 
   1208     case TCG_COND_LE:
   1209     case TCG_COND_GT:
   1210         /* Similar, but with swapped arguments, via reversed subtract.  */
   1211         tcg_out_dat_rI(s, COND_AL, ARITH_RSB | TO_CPSR,
   1212                        TCG_REG_TMP, al, bl, const_bl);
   1213         tcg_out_dat_rI(s, COND_AL, ARITH_RSC | TO_CPSR,
   1214                        TCG_REG_TMP, ah, bh, const_bh);
   1215         return tcg_swap_cond(cond);
   1216 
   1217     default:
   1218         g_assert_not_reached();
   1219     }
   1220 }
   1221 
   1222 /*
   1223  * Note that TCGReg references Q-registers.
   1224  * Q-regno = 2 * D-regno, so shift left by 1 whlie inserting.
   1225  */
   1226 static uint32_t encode_vd(TCGReg rd)
   1227 {
   1228     tcg_debug_assert(rd >= TCG_REG_Q0);
   1229     return (extract32(rd, 3, 1) << 22) | (extract32(rd, 0, 3) << 13);
   1230 }
   1231 
   1232 static uint32_t encode_vn(TCGReg rn)
   1233 {
   1234     tcg_debug_assert(rn >= TCG_REG_Q0);
   1235     return (extract32(rn, 3, 1) << 7) | (extract32(rn, 0, 3) << 17);
   1236 }
   1237 
   1238 static uint32_t encode_vm(TCGReg rm)
   1239 {
   1240     tcg_debug_assert(rm >= TCG_REG_Q0);
   1241     return (extract32(rm, 3, 1) << 5) | (extract32(rm, 0, 3) << 1);
   1242 }
   1243 
   1244 static void tcg_out_vreg2(TCGContext *s, ARMInsn insn, int q, int vece,
   1245                           TCGReg d, TCGReg m)
   1246 {
   1247     tcg_out32(s, insn | (vece << 18) | (q << 6) |
   1248               encode_vd(d) | encode_vm(m));
   1249 }
   1250 
   1251 static void tcg_out_vreg3(TCGContext *s, ARMInsn insn, int q, int vece,
   1252                           TCGReg d, TCGReg n, TCGReg m)
   1253 {
   1254     tcg_out32(s, insn | (vece << 20) | (q << 6) |
   1255               encode_vd(d) | encode_vn(n) | encode_vm(m));
   1256 }
   1257 
   1258 static void tcg_out_vmovi(TCGContext *s, TCGReg rd,
   1259                           int q, int op, int cmode, uint8_t imm8)
   1260 {
   1261     tcg_out32(s, INSN_VMOVI | encode_vd(rd) | (q << 6) | (op << 5)
   1262               | (cmode << 8) | extract32(imm8, 0, 4)
   1263               | (extract32(imm8, 4, 3) << 16)
   1264               | (extract32(imm8, 7, 1) << 24));
   1265 }
   1266 
   1267 static void tcg_out_vshifti(TCGContext *s, ARMInsn insn, int q,
   1268                             TCGReg rd, TCGReg rm, int l_imm6)
   1269 {
   1270     tcg_out32(s, insn | (q << 6) | encode_vd(rd) | encode_vm(rm) |
   1271               (extract32(l_imm6, 6, 1) << 7) |
   1272               (extract32(l_imm6, 0, 6) << 16));
   1273 }
   1274 
   1275 static void tcg_out_vldst(TCGContext *s, ARMInsn insn,
   1276                           TCGReg rd, TCGReg rn, int offset)
   1277 {
   1278     if (offset != 0) {
   1279         if (check_fit_imm(offset) || check_fit_imm(-offset)) {
   1280             tcg_out_dat_rIN(s, COND_AL, ARITH_ADD, ARITH_SUB,
   1281                             TCG_REG_TMP, rn, offset, true);
   1282         } else {
   1283             tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_TMP, offset);
   1284             tcg_out_dat_reg(s, COND_AL, ARITH_ADD,
   1285                             TCG_REG_TMP, TCG_REG_TMP, rn, 0);
   1286         }
   1287         rn = TCG_REG_TMP;
   1288     }
   1289     tcg_out32(s, insn | (rn << 16) | encode_vd(rd) | 0xf);
   1290 }
   1291 
   1292 #ifdef CONFIG_SOFTMMU
   1293 /* helper signature: helper_ret_ld_mmu(CPUState *env, target_ulong addr,
   1294  *                                     int mmu_idx, uintptr_t ra)
   1295  */
   1296 static void * const qemu_ld_helpers[MO_SSIZE + 1] = {
   1297     [MO_UB]   = helper_ret_ldub_mmu,
   1298     [MO_SB]   = helper_ret_ldsb_mmu,
   1299 #if HOST_BIG_ENDIAN
   1300     [MO_UW] = helper_be_lduw_mmu,
   1301     [MO_UL] = helper_be_ldul_mmu,
   1302     [MO_UQ] = helper_be_ldq_mmu,
   1303     [MO_SW] = helper_be_ldsw_mmu,
   1304     [MO_SL] = helper_be_ldul_mmu,
   1305 #else
   1306     [MO_UW] = helper_le_lduw_mmu,
   1307     [MO_UL] = helper_le_ldul_mmu,
   1308     [MO_UQ] = helper_le_ldq_mmu,
   1309     [MO_SW] = helper_le_ldsw_mmu,
   1310     [MO_SL] = helper_le_ldul_mmu,
   1311 #endif
   1312 };
   1313 
   1314 /* helper signature: helper_ret_st_mmu(CPUState *env, target_ulong addr,
   1315  *                                     uintxx_t val, int mmu_idx, uintptr_t ra)
   1316  */
   1317 static void * const qemu_st_helpers[MO_SIZE + 1] = {
   1318     [MO_8]   = helper_ret_stb_mmu,
   1319 #if HOST_BIG_ENDIAN
   1320     [MO_16] = helper_be_stw_mmu,
   1321     [MO_32] = helper_be_stl_mmu,
   1322     [MO_64] = helper_be_stq_mmu,
   1323 #else
   1324     [MO_16] = helper_le_stw_mmu,
   1325     [MO_32] = helper_le_stl_mmu,
   1326     [MO_64] = helper_le_stq_mmu,
   1327 #endif
   1328 };
   1329 
   1330 /* Helper routines for marshalling helper function arguments into
   1331  * the correct registers and stack.
   1332  * argreg is where we want to put this argument, arg is the argument itself.
   1333  * Return value is the updated argreg ready for the next call.
   1334  * Note that argreg 0..3 is real registers, 4+ on stack.
   1335  *
   1336  * We provide routines for arguments which are: immediate, 32 bit
   1337  * value in register, 16 and 8 bit values in register (which must be zero
   1338  * extended before use) and 64 bit value in a lo:hi register pair.
   1339  */
   1340 #define DEFINE_TCG_OUT_ARG(NAME, ARGTYPE, MOV_ARG, EXT_ARG)                \
   1341 static TCGReg NAME(TCGContext *s, TCGReg argreg, ARGTYPE arg)              \
   1342 {                                                                          \
   1343     if (argreg < 4) {                                                      \
   1344         MOV_ARG(s, COND_AL, argreg, arg);                                  \
   1345     } else {                                                               \
   1346         int ofs = (argreg - 4) * 4;                                        \
   1347         EXT_ARG;                                                           \
   1348         tcg_debug_assert(ofs + 4 <= TCG_STATIC_CALL_ARGS_SIZE);            \
   1349         tcg_out_st32_12(s, COND_AL, arg, TCG_REG_CALL_STACK, ofs);         \
   1350     }                                                                      \
   1351     return argreg + 1;                                                     \
   1352 }
   1353 
   1354 DEFINE_TCG_OUT_ARG(tcg_out_arg_imm32, uint32_t, tcg_out_movi32,
   1355     (tcg_out_movi32(s, COND_AL, TCG_REG_TMP, arg), arg = TCG_REG_TMP))
   1356 DEFINE_TCG_OUT_ARG(tcg_out_arg_reg8, TCGReg, tcg_out_ext8u,
   1357     (tcg_out_ext8u(s, COND_AL, TCG_REG_TMP, arg), arg = TCG_REG_TMP))
   1358 DEFINE_TCG_OUT_ARG(tcg_out_arg_reg16, TCGReg, tcg_out_ext16u,
   1359     (tcg_out_ext16u(s, COND_AL, TCG_REG_TMP, arg), arg = TCG_REG_TMP))
   1360 DEFINE_TCG_OUT_ARG(tcg_out_arg_reg32, TCGReg, tcg_out_mov_reg, )
   1361 
   1362 static TCGReg tcg_out_arg_reg64(TCGContext *s, TCGReg argreg,
   1363                                 TCGReg arglo, TCGReg arghi)
   1364 {
   1365     /* 64 bit arguments must go in even/odd register pairs
   1366      * and in 8-aligned stack slots.
   1367      */
   1368     if (argreg & 1) {
   1369         argreg++;
   1370     }
   1371     if (argreg >= 4 && (arglo & 1) == 0 && arghi == arglo + 1) {
   1372         tcg_out_strd_8(s, COND_AL, arglo,
   1373                        TCG_REG_CALL_STACK, (argreg - 4) * 4);
   1374         return argreg + 2;
   1375     } else {
   1376         argreg = tcg_out_arg_reg32(s, argreg, arglo);
   1377         argreg = tcg_out_arg_reg32(s, argreg, arghi);
   1378         return argreg;
   1379     }
   1380 }
   1381 
   1382 #define TLB_SHIFT	(CPU_TLB_ENTRY_BITS + CPU_TLB_BITS)
   1383 
   1384 /* We expect to use an 9-bit sign-magnitude negative offset from ENV.  */
   1385 QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) > 0);
   1386 QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) < -256);
   1387 
   1388 /* These offsets are built into the LDRD below.  */
   1389 QEMU_BUILD_BUG_ON(offsetof(CPUTLBDescFast, mask) != 0);
   1390 QEMU_BUILD_BUG_ON(offsetof(CPUTLBDescFast, table) != 4);
   1391 
   1392 /* Load and compare a TLB entry, leaving the flags set.  Returns the register
   1393    containing the addend of the tlb entry.  Clobbers R0, R1, R2, TMP.  */
   1394 
   1395 static TCGReg tcg_out_tlb_read(TCGContext *s, TCGReg addrlo, TCGReg addrhi,
   1396                                MemOp opc, int mem_index, bool is_load)
   1397 {
   1398     int cmp_off = (is_load ? offsetof(CPUTLBEntry, addr_read)
   1399                    : offsetof(CPUTLBEntry, addr_write));
   1400     int fast_off = TLB_MASK_TABLE_OFS(mem_index);
   1401     unsigned s_mask = (1 << (opc & MO_SIZE)) - 1;
   1402     unsigned a_mask = (1 << get_alignment_bits(opc)) - 1;
   1403     TCGReg t_addr;
   1404 
   1405     /* Load env_tlb(env)->f[mmu_idx].{mask,table} into {r0,r1}.  */
   1406     tcg_out_ldrd_8(s, COND_AL, TCG_REG_R0, TCG_AREG0, fast_off);
   1407 
   1408     /* Extract the tlb index from the address into R0.  */
   1409     tcg_out_dat_reg(s, COND_AL, ARITH_AND, TCG_REG_R0, TCG_REG_R0, addrlo,
   1410                     SHIFT_IMM_LSR(TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS));
   1411 
   1412     /*
   1413      * Add the tlb_table pointer, creating the CPUTLBEntry address in R1.
   1414      * Load the tlb comparator into R2/R3 and the fast path addend into R1.
   1415      */
   1416     if (cmp_off == 0) {
   1417         if (TARGET_LONG_BITS == 64) {
   1418             tcg_out_ldrd_rwb(s, COND_AL, TCG_REG_R2, TCG_REG_R1, TCG_REG_R0);
   1419         } else {
   1420             tcg_out_ld32_rwb(s, COND_AL, TCG_REG_R2, TCG_REG_R1, TCG_REG_R0);
   1421         }
   1422     } else {
   1423         tcg_out_dat_reg(s, COND_AL, ARITH_ADD,
   1424                         TCG_REG_R1, TCG_REG_R1, TCG_REG_R0, 0);
   1425         if (TARGET_LONG_BITS == 64) {
   1426             tcg_out_ldrd_8(s, COND_AL, TCG_REG_R2, TCG_REG_R1, cmp_off);
   1427         } else {
   1428             tcg_out_ld32_12(s, COND_AL, TCG_REG_R2, TCG_REG_R1, cmp_off);
   1429         }
   1430     }
   1431 
   1432     /* Load the tlb addend.  */
   1433     tcg_out_ld32_12(s, COND_AL, TCG_REG_R1, TCG_REG_R1,
   1434                     offsetof(CPUTLBEntry, addend));
   1435 
   1436     /*
   1437      * Check alignment, check comparators.
   1438      * Do this in 2-4 insns.  Use MOVW for v7, if possible,
   1439      * to reduce the number of sequential conditional instructions.
   1440      * Almost all guests have at least 4k pages, which means that we need
   1441      * to clear at least 9 bits even for an 8-byte memory, which means it
   1442      * isn't worth checking for an immediate operand for BIC.
   1443      *
   1444      * For unaligned accesses, test the page of the last unit of alignment.
   1445      * This leaves the least significant alignment bits unchanged, and of
   1446      * course must be zero.
   1447      */
   1448     t_addr = addrlo;
   1449     if (a_mask < s_mask) {
   1450         t_addr = TCG_REG_R0;
   1451         tcg_out_dat_imm(s, COND_AL, ARITH_ADD, t_addr,
   1452                         addrlo, s_mask - a_mask);
   1453     }
   1454     if (use_armv7_instructions && TARGET_PAGE_BITS <= 16) {
   1455         tcg_out_movi32(s, COND_AL, TCG_REG_TMP, ~(TARGET_PAGE_MASK | a_mask));
   1456         tcg_out_dat_reg(s, COND_AL, ARITH_BIC, TCG_REG_TMP,
   1457                         t_addr, TCG_REG_TMP, 0);
   1458         tcg_out_dat_reg(s, COND_AL, ARITH_CMP, 0, TCG_REG_R2, TCG_REG_TMP, 0);
   1459     } else {
   1460         if (a_mask) {
   1461             tcg_debug_assert(a_mask <= 0xff);
   1462             tcg_out_dat_imm(s, COND_AL, ARITH_TST, 0, addrlo, a_mask);
   1463         }
   1464         tcg_out_dat_reg(s, COND_AL, ARITH_MOV, TCG_REG_TMP, 0, t_addr,
   1465                         SHIFT_IMM_LSR(TARGET_PAGE_BITS));
   1466         tcg_out_dat_reg(s, (a_mask ? COND_EQ : COND_AL), ARITH_CMP,
   1467                         0, TCG_REG_R2, TCG_REG_TMP,
   1468                         SHIFT_IMM_LSL(TARGET_PAGE_BITS));
   1469     }
   1470 
   1471     if (TARGET_LONG_BITS == 64) {
   1472         tcg_out_dat_reg(s, COND_EQ, ARITH_CMP, 0, TCG_REG_R3, addrhi, 0);
   1473     }
   1474 
   1475     return TCG_REG_R1;
   1476 }
   1477 
   1478 /* Record the context of a call to the out of line helper code for the slow
   1479    path for a load or store, so that we can later generate the correct
   1480    helper code.  */
   1481 static void add_qemu_ldst_label(TCGContext *s, bool is_ld, MemOpIdx oi,
   1482                                 TCGReg datalo, TCGReg datahi, TCGReg addrlo,
   1483                                 TCGReg addrhi, tcg_insn_unit *raddr,
   1484                                 tcg_insn_unit *label_ptr)
   1485 {
   1486     TCGLabelQemuLdst *label = new_ldst_label(s);
   1487 
   1488     label->is_ld = is_ld;
   1489     label->oi = oi;
   1490     label->datalo_reg = datalo;
   1491     label->datahi_reg = datahi;
   1492     label->addrlo_reg = addrlo;
   1493     label->addrhi_reg = addrhi;
   1494     label->raddr = tcg_splitwx_to_rx(raddr);
   1495     label->label_ptr[0] = label_ptr;
   1496 }
   1497 
   1498 static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
   1499 {
   1500     TCGReg argreg, datalo, datahi;
   1501     MemOpIdx oi = lb->oi;
   1502     MemOp opc = get_memop(oi);
   1503 
   1504     if (!reloc_pc24(lb->label_ptr[0], tcg_splitwx_to_rx(s->code_ptr))) {
   1505         return false;
   1506     }
   1507 
   1508     argreg = tcg_out_arg_reg32(s, TCG_REG_R0, TCG_AREG0);
   1509     if (TARGET_LONG_BITS == 64) {
   1510         argreg = tcg_out_arg_reg64(s, argreg, lb->addrlo_reg, lb->addrhi_reg);
   1511     } else {
   1512         argreg = tcg_out_arg_reg32(s, argreg, lb->addrlo_reg);
   1513     }
   1514     argreg = tcg_out_arg_imm32(s, argreg, oi);
   1515     argreg = tcg_out_arg_reg32(s, argreg, TCG_REG_R14);
   1516 
   1517     /* Use the canonical unsigned helpers and minimize icache usage. */
   1518     tcg_out_call(s, qemu_ld_helpers[opc & MO_SIZE]);
   1519 
   1520     datalo = lb->datalo_reg;
   1521     datahi = lb->datahi_reg;
   1522     switch (opc & MO_SSIZE) {
   1523     case MO_SB:
   1524         tcg_out_ext8s(s, COND_AL, datalo, TCG_REG_R0);
   1525         break;
   1526     case MO_SW:
   1527         tcg_out_ext16s(s, COND_AL, datalo, TCG_REG_R0);
   1528         break;
   1529     default:
   1530         tcg_out_mov_reg(s, COND_AL, datalo, TCG_REG_R0);
   1531         break;
   1532     case MO_UQ:
   1533         if (datalo != TCG_REG_R1) {
   1534             tcg_out_mov_reg(s, COND_AL, datalo, TCG_REG_R0);
   1535             tcg_out_mov_reg(s, COND_AL, datahi, TCG_REG_R1);
   1536         } else if (datahi != TCG_REG_R0) {
   1537             tcg_out_mov_reg(s, COND_AL, datahi, TCG_REG_R1);
   1538             tcg_out_mov_reg(s, COND_AL, datalo, TCG_REG_R0);
   1539         } else {
   1540             tcg_out_mov_reg(s, COND_AL, TCG_REG_TMP, TCG_REG_R0);
   1541             tcg_out_mov_reg(s, COND_AL, datahi, TCG_REG_R1);
   1542             tcg_out_mov_reg(s, COND_AL, datalo, TCG_REG_TMP);
   1543         }
   1544         break;
   1545     }
   1546 
   1547     tcg_out_goto(s, COND_AL, lb->raddr);
   1548     return true;
   1549 }
   1550 
   1551 static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
   1552 {
   1553     TCGReg argreg, datalo, datahi;
   1554     MemOpIdx oi = lb->oi;
   1555     MemOp opc = get_memop(oi);
   1556 
   1557     if (!reloc_pc24(lb->label_ptr[0], tcg_splitwx_to_rx(s->code_ptr))) {
   1558         return false;
   1559     }
   1560 
   1561     argreg = TCG_REG_R0;
   1562     argreg = tcg_out_arg_reg32(s, argreg, TCG_AREG0);
   1563     if (TARGET_LONG_BITS == 64) {
   1564         argreg = tcg_out_arg_reg64(s, argreg, lb->addrlo_reg, lb->addrhi_reg);
   1565     } else {
   1566         argreg = tcg_out_arg_reg32(s, argreg, lb->addrlo_reg);
   1567     }
   1568 
   1569     datalo = lb->datalo_reg;
   1570     datahi = lb->datahi_reg;
   1571     switch (opc & MO_SIZE) {
   1572     case MO_8:
   1573         argreg = tcg_out_arg_reg8(s, argreg, datalo);
   1574         break;
   1575     case MO_16:
   1576         argreg = tcg_out_arg_reg16(s, argreg, datalo);
   1577         break;
   1578     case MO_32:
   1579     default:
   1580         argreg = tcg_out_arg_reg32(s, argreg, datalo);
   1581         break;
   1582     case MO_64:
   1583         argreg = tcg_out_arg_reg64(s, argreg, datalo, datahi);
   1584         break;
   1585     }
   1586 
   1587     argreg = tcg_out_arg_imm32(s, argreg, oi);
   1588     argreg = tcg_out_arg_reg32(s, argreg, TCG_REG_R14);
   1589 
   1590     /* Tail-call to the helper, which will return to the fast path.  */
   1591     tcg_out_goto(s, COND_AL, qemu_st_helpers[opc & MO_SIZE]);
   1592     return true;
   1593 }
   1594 #else
   1595 
   1596 static void tcg_out_test_alignment(TCGContext *s, bool is_ld, TCGReg addrlo,
   1597                                    TCGReg addrhi, unsigned a_bits)
   1598 {
   1599     unsigned a_mask = (1 << a_bits) - 1;
   1600     TCGLabelQemuLdst *label = new_ldst_label(s);
   1601 
   1602     label->is_ld = is_ld;
   1603     label->addrlo_reg = addrlo;
   1604     label->addrhi_reg = addrhi;
   1605 
   1606     /* We are expecting a_bits to max out at 7, and can easily support 8. */
   1607     tcg_debug_assert(a_mask <= 0xff);
   1608     /* tst addr, #mask */
   1609     tcg_out_dat_imm(s, COND_AL, ARITH_TST, 0, addrlo, a_mask);
   1610 
   1611     /* blne slow_path */
   1612     label->label_ptr[0] = s->code_ptr;
   1613     tcg_out_bl_imm(s, COND_NE, 0);
   1614 
   1615     label->raddr = tcg_splitwx_to_rx(s->code_ptr);
   1616 }
   1617 
   1618 static bool tcg_out_fail_alignment(TCGContext *s, TCGLabelQemuLdst *l)
   1619 {
   1620     if (!reloc_pc24(l->label_ptr[0], tcg_splitwx_to_rx(s->code_ptr))) {
   1621         return false;
   1622     }
   1623 
   1624     if (TARGET_LONG_BITS == 64) {
   1625         /* 64-bit target address is aligned into R2:R3. */
   1626         if (l->addrhi_reg != TCG_REG_R2) {
   1627             tcg_out_mov(s, TCG_TYPE_I32, TCG_REG_R2, l->addrlo_reg);
   1628             tcg_out_mov(s, TCG_TYPE_I32, TCG_REG_R3, l->addrhi_reg);
   1629         } else if (l->addrlo_reg != TCG_REG_R3) {
   1630             tcg_out_mov(s, TCG_TYPE_I32, TCG_REG_R3, l->addrhi_reg);
   1631             tcg_out_mov(s, TCG_TYPE_I32, TCG_REG_R2, l->addrlo_reg);
   1632         } else {
   1633             tcg_out_mov(s, TCG_TYPE_I32, TCG_REG_R1, TCG_REG_R2);
   1634             tcg_out_mov(s, TCG_TYPE_I32, TCG_REG_R2, TCG_REG_R3);
   1635             tcg_out_mov(s, TCG_TYPE_I32, TCG_REG_R3, TCG_REG_R1);
   1636         }
   1637     } else {
   1638         tcg_out_mov(s, TCG_TYPE_I32, TCG_REG_R1, l->addrlo_reg);
   1639     }
   1640     tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_R0, TCG_AREG0);
   1641 
   1642     /*
   1643      * Tail call to the helper, with the return address back inline,
   1644      * just for the clarity of the debugging traceback -- the helper
   1645      * cannot return.  We have used BLNE to arrive here, so LR is
   1646      * already set.
   1647      */
   1648     tcg_out_goto(s, COND_AL, (const void *)
   1649                  (l->is_ld ? helper_unaligned_ld : helper_unaligned_st));
   1650     return true;
   1651 }
   1652 
   1653 static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
   1654 {
   1655     return tcg_out_fail_alignment(s, l);
   1656 }
   1657 
   1658 static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
   1659 {
   1660     return tcg_out_fail_alignment(s, l);
   1661 }
   1662 #endif /* SOFTMMU */
   1663 
   1664 static void tcg_out_qemu_ld_index(TCGContext *s, MemOp opc,
   1665                                   TCGReg datalo, TCGReg datahi,
   1666                                   TCGReg addrlo, TCGReg addend,
   1667                                   bool scratch_addend)
   1668 {
   1669     /* Byte swapping is left to middle-end expansion. */
   1670     tcg_debug_assert((opc & MO_BSWAP) == 0);
   1671 
   1672     switch (opc & MO_SSIZE) {
   1673     case MO_UB:
   1674         tcg_out_ld8_r(s, COND_AL, datalo, addrlo, addend);
   1675         break;
   1676     case MO_SB:
   1677         tcg_out_ld8s_r(s, COND_AL, datalo, addrlo, addend);
   1678         break;
   1679     case MO_UW:
   1680         tcg_out_ld16u_r(s, COND_AL, datalo, addrlo, addend);
   1681         break;
   1682     case MO_SW:
   1683         tcg_out_ld16s_r(s, COND_AL, datalo, addrlo, addend);
   1684         break;
   1685     case MO_UL:
   1686         tcg_out_ld32_r(s, COND_AL, datalo, addrlo, addend);
   1687         break;
   1688     case MO_UQ:
   1689         /* LDRD requires alignment; double-check that. */
   1690         if (get_alignment_bits(opc) >= MO_64
   1691             && (datalo & 1) == 0 && datahi == datalo + 1) {
   1692             /*
   1693              * Rm (the second address op) must not overlap Rt or Rt + 1.
   1694              * Since datalo is aligned, we can simplify the test via alignment.
   1695              * Flip the two address arguments if that works.
   1696              */
   1697             if ((addend & ~1) != datalo) {
   1698                 tcg_out_ldrd_r(s, COND_AL, datalo, addrlo, addend);
   1699                 break;
   1700             }
   1701             if ((addrlo & ~1) != datalo) {
   1702                 tcg_out_ldrd_r(s, COND_AL, datalo, addend, addrlo);
   1703                 break;
   1704             }
   1705         }
   1706         if (scratch_addend) {
   1707             tcg_out_ld32_rwb(s, COND_AL, datalo, addend, addrlo);
   1708             tcg_out_ld32_12(s, COND_AL, datahi, addend, 4);
   1709         } else {
   1710             tcg_out_dat_reg(s, COND_AL, ARITH_ADD, TCG_REG_TMP,
   1711                             addend, addrlo, SHIFT_IMM_LSL(0));
   1712             tcg_out_ld32_12(s, COND_AL, datalo, TCG_REG_TMP, 0);
   1713             tcg_out_ld32_12(s, COND_AL, datahi, TCG_REG_TMP, 4);
   1714         }
   1715         break;
   1716     default:
   1717         g_assert_not_reached();
   1718     }
   1719 }
   1720 
   1721 #ifndef CONFIG_SOFTMMU
   1722 static void tcg_out_qemu_ld_direct(TCGContext *s, MemOp opc, TCGReg datalo,
   1723                                    TCGReg datahi, TCGReg addrlo)
   1724 {
   1725     /* Byte swapping is left to middle-end expansion. */
   1726     tcg_debug_assert((opc & MO_BSWAP) == 0);
   1727 
   1728     switch (opc & MO_SSIZE) {
   1729     case MO_UB:
   1730         tcg_out_ld8_12(s, COND_AL, datalo, addrlo, 0);
   1731         break;
   1732     case MO_SB:
   1733         tcg_out_ld8s_8(s, COND_AL, datalo, addrlo, 0);
   1734         break;
   1735     case MO_UW:
   1736         tcg_out_ld16u_8(s, COND_AL, datalo, addrlo, 0);
   1737         break;
   1738     case MO_SW:
   1739         tcg_out_ld16s_8(s, COND_AL, datalo, addrlo, 0);
   1740         break;
   1741     case MO_UL:
   1742         tcg_out_ld32_12(s, COND_AL, datalo, addrlo, 0);
   1743         break;
   1744     case MO_UQ:
   1745         /* LDRD requires alignment; double-check that. */
   1746         if (get_alignment_bits(opc) >= MO_64
   1747             && (datalo & 1) == 0 && datahi == datalo + 1) {
   1748             tcg_out_ldrd_8(s, COND_AL, datalo, addrlo, 0);
   1749         } else if (datalo == addrlo) {
   1750             tcg_out_ld32_12(s, COND_AL, datahi, addrlo, 4);
   1751             tcg_out_ld32_12(s, COND_AL, datalo, addrlo, 0);
   1752         } else {
   1753             tcg_out_ld32_12(s, COND_AL, datalo, addrlo, 0);
   1754             tcg_out_ld32_12(s, COND_AL, datahi, addrlo, 4);
   1755         }
   1756         break;
   1757     default:
   1758         g_assert_not_reached();
   1759     }
   1760 }
   1761 #endif
   1762 
   1763 static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, bool is64)
   1764 {
   1765     TCGReg addrlo, datalo, datahi, addrhi __attribute__((unused));
   1766     MemOpIdx oi;
   1767     MemOp opc;
   1768 #ifdef CONFIG_SOFTMMU
   1769     int mem_index;
   1770     TCGReg addend;
   1771     tcg_insn_unit *label_ptr;
   1772 #else
   1773     unsigned a_bits;
   1774 #endif
   1775 
   1776     datalo = *args++;
   1777     datahi = (is64 ? *args++ : 0);
   1778     addrlo = *args++;
   1779     addrhi = (TARGET_LONG_BITS == 64 ? *args++ : 0);
   1780     oi = *args++;
   1781     opc = get_memop(oi);
   1782 
   1783 #ifdef CONFIG_SOFTMMU
   1784     mem_index = get_mmuidx(oi);
   1785     addend = tcg_out_tlb_read(s, addrlo, addrhi, opc, mem_index, 1);
   1786 
   1787     /* This a conditional BL only to load a pointer within this opcode into LR
   1788        for the slow path.  We will not be using the value for a tail call.  */
   1789     label_ptr = s->code_ptr;
   1790     tcg_out_bl_imm(s, COND_NE, 0);
   1791 
   1792     tcg_out_qemu_ld_index(s, opc, datalo, datahi, addrlo, addend, true);
   1793 
   1794     add_qemu_ldst_label(s, true, oi, datalo, datahi, addrlo, addrhi,
   1795                         s->code_ptr, label_ptr);
   1796 #else /* !CONFIG_SOFTMMU */
   1797     a_bits = get_alignment_bits(opc);
   1798     if (a_bits) {
   1799         tcg_out_test_alignment(s, true, addrlo, addrhi, a_bits);
   1800     }
   1801     if (guest_base) {
   1802         tcg_out_qemu_ld_index(s, opc, datalo, datahi,
   1803                               addrlo, TCG_REG_GUEST_BASE, false);
   1804     } else {
   1805         tcg_out_qemu_ld_direct(s, opc, datalo, datahi, addrlo);
   1806     }
   1807 #endif
   1808 }
   1809 
   1810 static void tcg_out_qemu_st_index(TCGContext *s, ARMCond cond, MemOp opc,
   1811                                   TCGReg datalo, TCGReg datahi,
   1812                                   TCGReg addrlo, TCGReg addend,
   1813                                   bool scratch_addend)
   1814 {
   1815     /* Byte swapping is left to middle-end expansion. */
   1816     tcg_debug_assert((opc & MO_BSWAP) == 0);
   1817 
   1818     switch (opc & MO_SIZE) {
   1819     case MO_8:
   1820         tcg_out_st8_r(s, cond, datalo, addrlo, addend);
   1821         break;
   1822     case MO_16:
   1823         tcg_out_st16_r(s, cond, datalo, addrlo, addend);
   1824         break;
   1825     case MO_32:
   1826         tcg_out_st32_r(s, cond, datalo, addrlo, addend);
   1827         break;
   1828     case MO_64:
   1829         /* STRD requires alignment; double-check that. */
   1830         if (get_alignment_bits(opc) >= MO_64
   1831             && (datalo & 1) == 0 && datahi == datalo + 1) {
   1832             tcg_out_strd_r(s, cond, datalo, addrlo, addend);
   1833         } else if (scratch_addend) {
   1834             tcg_out_st32_rwb(s, cond, datalo, addend, addrlo);
   1835             tcg_out_st32_12(s, cond, datahi, addend, 4);
   1836         } else {
   1837             tcg_out_dat_reg(s, cond, ARITH_ADD, TCG_REG_TMP,
   1838                             addend, addrlo, SHIFT_IMM_LSL(0));
   1839             tcg_out_st32_12(s, cond, datalo, TCG_REG_TMP, 0);
   1840             tcg_out_st32_12(s, cond, datahi, TCG_REG_TMP, 4);
   1841         }
   1842         break;
   1843     default:
   1844         g_assert_not_reached();
   1845     }
   1846 }
   1847 
   1848 #ifndef CONFIG_SOFTMMU
   1849 static void tcg_out_qemu_st_direct(TCGContext *s, MemOp opc, TCGReg datalo,
   1850                                    TCGReg datahi, TCGReg addrlo)
   1851 {
   1852     /* Byte swapping is left to middle-end expansion. */
   1853     tcg_debug_assert((opc & MO_BSWAP) == 0);
   1854 
   1855     switch (opc & MO_SIZE) {
   1856     case MO_8:
   1857         tcg_out_st8_12(s, COND_AL, datalo, addrlo, 0);
   1858         break;
   1859     case MO_16:
   1860         tcg_out_st16_8(s, COND_AL, datalo, addrlo, 0);
   1861         break;
   1862     case MO_32:
   1863         tcg_out_st32_12(s, COND_AL, datalo, addrlo, 0);
   1864         break;
   1865     case MO_64:
   1866         /* STRD requires alignment; double-check that. */
   1867         if (get_alignment_bits(opc) >= MO_64
   1868             && (datalo & 1) == 0 && datahi == datalo + 1) {
   1869             tcg_out_strd_8(s, COND_AL, datalo, addrlo, 0);
   1870         } else {
   1871             tcg_out_st32_12(s, COND_AL, datalo, addrlo, 0);
   1872             tcg_out_st32_12(s, COND_AL, datahi, addrlo, 4);
   1873         }
   1874         break;
   1875     default:
   1876         g_assert_not_reached();
   1877     }
   1878 }
   1879 #endif
   1880 
   1881 static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, bool is64)
   1882 {
   1883     TCGReg addrlo, datalo, datahi, addrhi __attribute__((unused));
   1884     MemOpIdx oi;
   1885     MemOp opc;
   1886 #ifdef CONFIG_SOFTMMU
   1887     int mem_index;
   1888     TCGReg addend;
   1889     tcg_insn_unit *label_ptr;
   1890 #else
   1891     unsigned a_bits;
   1892 #endif
   1893 
   1894     datalo = *args++;
   1895     datahi = (is64 ? *args++ : 0);
   1896     addrlo = *args++;
   1897     addrhi = (TARGET_LONG_BITS == 64 ? *args++ : 0);
   1898     oi = *args++;
   1899     opc = get_memop(oi);
   1900 
   1901 #ifdef CONFIG_SOFTMMU
   1902     mem_index = get_mmuidx(oi);
   1903     addend = tcg_out_tlb_read(s, addrlo, addrhi, opc, mem_index, 0);
   1904 
   1905     tcg_out_qemu_st_index(s, COND_EQ, opc, datalo, datahi,
   1906                           addrlo, addend, true);
   1907 
   1908     /* The conditional call must come last, as we're going to return here.  */
   1909     label_ptr = s->code_ptr;
   1910     tcg_out_bl_imm(s, COND_NE, 0);
   1911 
   1912     add_qemu_ldst_label(s, false, oi, datalo, datahi, addrlo, addrhi,
   1913                         s->code_ptr, label_ptr);
   1914 #else /* !CONFIG_SOFTMMU */
   1915     a_bits = get_alignment_bits(opc);
   1916     if (a_bits) {
   1917         tcg_out_test_alignment(s, false, addrlo, addrhi, a_bits);
   1918     }
   1919     if (guest_base) {
   1920         tcg_out_qemu_st_index(s, COND_AL, opc, datalo, datahi,
   1921                               addrlo, TCG_REG_GUEST_BASE, false);
   1922     } else {
   1923         tcg_out_qemu_st_direct(s, opc, datalo, datahi, addrlo);
   1924     }
   1925 #endif
   1926 }
   1927 
   1928 static void tcg_out_epilogue(TCGContext *s);
   1929 
   1930 static void tcg_out_op(TCGContext *s, TCGOpcode opc,
   1931                        const TCGArg args[TCG_MAX_OP_ARGS],
   1932                        const int const_args[TCG_MAX_OP_ARGS])
   1933 {
   1934     TCGArg a0, a1, a2, a3, a4, a5;
   1935     int c;
   1936 
   1937     switch (opc) {
   1938     case INDEX_op_exit_tb:
   1939         tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R0, args[0]);
   1940         tcg_out_epilogue(s);
   1941         break;
   1942     case INDEX_op_goto_tb:
   1943         {
   1944             /* Indirect jump method */
   1945             intptr_t ptr, dif, dil;
   1946             TCGReg base = TCG_REG_PC;
   1947 
   1948             tcg_debug_assert(s->tb_jmp_insn_offset == 0);
   1949             ptr = (intptr_t)tcg_splitwx_to_rx(s->tb_jmp_target_addr + args[0]);
   1950             dif = tcg_pcrel_diff(s, (void *)ptr) - 8;
   1951             dil = sextract32(dif, 0, 12);
   1952             if (dif != dil) {
   1953                 /* The TB is close, but outside the 12 bits addressable by
   1954                    the load.  We can extend this to 20 bits with a sub of a
   1955                    shifted immediate from pc.  In the vastly unlikely event
   1956                    the code requires more than 1MB, we'll use 2 insns and
   1957                    be no worse off.  */
   1958                 base = TCG_REG_R0;
   1959                 tcg_out_movi32(s, COND_AL, base, ptr - dil);
   1960             }
   1961             tcg_out_ld32_12(s, COND_AL, TCG_REG_PC, base, dil);
   1962             set_jmp_reset_offset(s, args[0]);
   1963         }
   1964         break;
   1965     case INDEX_op_goto_ptr:
   1966         tcg_out_b_reg(s, COND_AL, args[0]);
   1967         break;
   1968     case INDEX_op_br:
   1969         tcg_out_goto_label(s, COND_AL, arg_label(args[0]));
   1970         break;
   1971 
   1972     case INDEX_op_ld8u_i32:
   1973         tcg_out_ld8u(s, COND_AL, args[0], args[1], args[2]);
   1974         break;
   1975     case INDEX_op_ld8s_i32:
   1976         tcg_out_ld8s(s, COND_AL, args[0], args[1], args[2]);
   1977         break;
   1978     case INDEX_op_ld16u_i32:
   1979         tcg_out_ld16u(s, COND_AL, args[0], args[1], args[2]);
   1980         break;
   1981     case INDEX_op_ld16s_i32:
   1982         tcg_out_ld16s(s, COND_AL, args[0], args[1], args[2]);
   1983         break;
   1984     case INDEX_op_ld_i32:
   1985         tcg_out_ld32u(s, COND_AL, args[0], args[1], args[2]);
   1986         break;
   1987     case INDEX_op_st8_i32:
   1988         tcg_out_st8(s, COND_AL, args[0], args[1], args[2]);
   1989         break;
   1990     case INDEX_op_st16_i32:
   1991         tcg_out_st16(s, COND_AL, args[0], args[1], args[2]);
   1992         break;
   1993     case INDEX_op_st_i32:
   1994         tcg_out_st32(s, COND_AL, args[0], args[1], args[2]);
   1995         break;
   1996 
   1997     case INDEX_op_movcond_i32:
   1998         /* Constraints mean that v2 is always in the same register as dest,
   1999          * so we only need to do "if condition passed, move v1 to dest".
   2000          */
   2001         tcg_out_dat_rIN(s, COND_AL, ARITH_CMP, ARITH_CMN, 0,
   2002                         args[1], args[2], const_args[2]);
   2003         tcg_out_dat_rIK(s, tcg_cond_to_arm_cond[args[5]], ARITH_MOV,
   2004                         ARITH_MVN, args[0], 0, args[3], const_args[3]);
   2005         break;
   2006     case INDEX_op_add_i32:
   2007         tcg_out_dat_rIN(s, COND_AL, ARITH_ADD, ARITH_SUB,
   2008                         args[0], args[1], args[2], const_args[2]);
   2009         break;
   2010     case INDEX_op_sub_i32:
   2011         if (const_args[1]) {
   2012             if (const_args[2]) {
   2013                 tcg_out_movi32(s, COND_AL, args[0], args[1] - args[2]);
   2014             } else {
   2015                 tcg_out_dat_rI(s, COND_AL, ARITH_RSB,
   2016                                args[0], args[2], args[1], 1);
   2017             }
   2018         } else {
   2019             tcg_out_dat_rIN(s, COND_AL, ARITH_SUB, ARITH_ADD,
   2020                             args[0], args[1], args[2], const_args[2]);
   2021         }
   2022         break;
   2023     case INDEX_op_and_i32:
   2024         tcg_out_dat_rIK(s, COND_AL, ARITH_AND, ARITH_BIC,
   2025                         args[0], args[1], args[2], const_args[2]);
   2026         break;
   2027     case INDEX_op_andc_i32:
   2028         tcg_out_dat_rIK(s, COND_AL, ARITH_BIC, ARITH_AND,
   2029                         args[0], args[1], args[2], const_args[2]);
   2030         break;
   2031     case INDEX_op_or_i32:
   2032         c = ARITH_ORR;
   2033         goto gen_arith;
   2034     case INDEX_op_xor_i32:
   2035         c = ARITH_EOR;
   2036         /* Fall through.  */
   2037     gen_arith:
   2038         tcg_out_dat_rI(s, COND_AL, c, args[0], args[1], args[2], const_args[2]);
   2039         break;
   2040     case INDEX_op_add2_i32:
   2041         a0 = args[0], a1 = args[1], a2 = args[2];
   2042         a3 = args[3], a4 = args[4], a5 = args[5];
   2043         if (a0 == a3 || (a0 == a5 && !const_args[5])) {
   2044             a0 = TCG_REG_TMP;
   2045         }
   2046         tcg_out_dat_rIN(s, COND_AL, ARITH_ADD | TO_CPSR, ARITH_SUB | TO_CPSR,
   2047                         a0, a2, a4, const_args[4]);
   2048         tcg_out_dat_rIK(s, COND_AL, ARITH_ADC, ARITH_SBC,
   2049                         a1, a3, a5, const_args[5]);
   2050         tcg_out_mov_reg(s, COND_AL, args[0], a0);
   2051         break;
   2052     case INDEX_op_sub2_i32:
   2053         a0 = args[0], a1 = args[1], a2 = args[2];
   2054         a3 = args[3], a4 = args[4], a5 = args[5];
   2055         if ((a0 == a3 && !const_args[3]) || (a0 == a5 && !const_args[5])) {
   2056             a0 = TCG_REG_TMP;
   2057         }
   2058         if (const_args[2]) {
   2059             if (const_args[4]) {
   2060                 tcg_out_movi32(s, COND_AL, a0, a4);
   2061                 a4 = a0;
   2062             }
   2063             tcg_out_dat_rI(s, COND_AL, ARITH_RSB | TO_CPSR, a0, a4, a2, 1);
   2064         } else {
   2065             tcg_out_dat_rIN(s, COND_AL, ARITH_SUB | TO_CPSR,
   2066                             ARITH_ADD | TO_CPSR, a0, a2, a4, const_args[4]);
   2067         }
   2068         if (const_args[3]) {
   2069             if (const_args[5]) {
   2070                 tcg_out_movi32(s, COND_AL, a1, a5);
   2071                 a5 = a1;
   2072             }
   2073             tcg_out_dat_rI(s, COND_AL, ARITH_RSC, a1, a5, a3, 1);
   2074         } else {
   2075             tcg_out_dat_rIK(s, COND_AL, ARITH_SBC, ARITH_ADC,
   2076                             a1, a3, a5, const_args[5]);
   2077         }
   2078         tcg_out_mov_reg(s, COND_AL, args[0], a0);
   2079         break;
   2080     case INDEX_op_neg_i32:
   2081         tcg_out_dat_imm(s, COND_AL, ARITH_RSB, args[0], args[1], 0);
   2082         break;
   2083     case INDEX_op_not_i32:
   2084         tcg_out_dat_reg(s, COND_AL,
   2085                         ARITH_MVN, args[0], 0, args[1], SHIFT_IMM_LSL(0));
   2086         break;
   2087     case INDEX_op_mul_i32:
   2088         tcg_out_mul32(s, COND_AL, args[0], args[1], args[2]);
   2089         break;
   2090     case INDEX_op_mulu2_i32:
   2091         tcg_out_umull32(s, COND_AL, args[0], args[1], args[2], args[3]);
   2092         break;
   2093     case INDEX_op_muls2_i32:
   2094         tcg_out_smull32(s, COND_AL, args[0], args[1], args[2], args[3]);
   2095         break;
   2096     /* XXX: Perhaps args[2] & 0x1f is wrong */
   2097     case INDEX_op_shl_i32:
   2098         c = const_args[2] ?
   2099                 SHIFT_IMM_LSL(args[2] & 0x1f) : SHIFT_REG_LSL(args[2]);
   2100         goto gen_shift32;
   2101     case INDEX_op_shr_i32:
   2102         c = const_args[2] ? (args[2] & 0x1f) ? SHIFT_IMM_LSR(args[2] & 0x1f) :
   2103                 SHIFT_IMM_LSL(0) : SHIFT_REG_LSR(args[2]);
   2104         goto gen_shift32;
   2105     case INDEX_op_sar_i32:
   2106         c = const_args[2] ? (args[2] & 0x1f) ? SHIFT_IMM_ASR(args[2] & 0x1f) :
   2107                 SHIFT_IMM_LSL(0) : SHIFT_REG_ASR(args[2]);
   2108         goto gen_shift32;
   2109     case INDEX_op_rotr_i32:
   2110         c = const_args[2] ? (args[2] & 0x1f) ? SHIFT_IMM_ROR(args[2] & 0x1f) :
   2111                 SHIFT_IMM_LSL(0) : SHIFT_REG_ROR(args[2]);
   2112         /* Fall through.  */
   2113     gen_shift32:
   2114         tcg_out_dat_reg(s, COND_AL, ARITH_MOV, args[0], 0, args[1], c);
   2115         break;
   2116 
   2117     case INDEX_op_rotl_i32:
   2118         if (const_args[2]) {
   2119             tcg_out_dat_reg(s, COND_AL, ARITH_MOV, args[0], 0, args[1],
   2120                             ((0x20 - args[2]) & 0x1f) ?
   2121                             SHIFT_IMM_ROR((0x20 - args[2]) & 0x1f) :
   2122                             SHIFT_IMM_LSL(0));
   2123         } else {
   2124             tcg_out_dat_imm(s, COND_AL, ARITH_RSB, TCG_REG_TMP, args[2], 0x20);
   2125             tcg_out_dat_reg(s, COND_AL, ARITH_MOV, args[0], 0, args[1],
   2126                             SHIFT_REG_ROR(TCG_REG_TMP));
   2127         }
   2128         break;
   2129 
   2130     case INDEX_op_ctz_i32:
   2131         tcg_out_dat_reg(s, COND_AL, INSN_RBIT, TCG_REG_TMP, 0, args[1], 0);
   2132         a1 = TCG_REG_TMP;
   2133         goto do_clz;
   2134 
   2135     case INDEX_op_clz_i32:
   2136         a1 = args[1];
   2137     do_clz:
   2138         a0 = args[0];
   2139         a2 = args[2];
   2140         c = const_args[2];
   2141         if (c && a2 == 32) {
   2142             tcg_out_dat_reg(s, COND_AL, INSN_CLZ, a0, 0, a1, 0);
   2143             break;
   2144         }
   2145         tcg_out_dat_imm(s, COND_AL, ARITH_CMP, 0, a1, 0);
   2146         tcg_out_dat_reg(s, COND_NE, INSN_CLZ, a0, 0, a1, 0);
   2147         if (c || a0 != a2) {
   2148             tcg_out_dat_rIK(s, COND_EQ, ARITH_MOV, ARITH_MVN, a0, 0, a2, c);
   2149         }
   2150         break;
   2151 
   2152     case INDEX_op_brcond_i32:
   2153         tcg_out_dat_rIN(s, COND_AL, ARITH_CMP, ARITH_CMN, 0,
   2154                        args[0], args[1], const_args[1]);
   2155         tcg_out_goto_label(s, tcg_cond_to_arm_cond[args[2]],
   2156                            arg_label(args[3]));
   2157         break;
   2158     case INDEX_op_setcond_i32:
   2159         tcg_out_dat_rIN(s, COND_AL, ARITH_CMP, ARITH_CMN, 0,
   2160                         args[1], args[2], const_args[2]);
   2161         tcg_out_dat_imm(s, tcg_cond_to_arm_cond[args[3]],
   2162                         ARITH_MOV, args[0], 0, 1);
   2163         tcg_out_dat_imm(s, tcg_cond_to_arm_cond[tcg_invert_cond(args[3])],
   2164                         ARITH_MOV, args[0], 0, 0);
   2165         break;
   2166 
   2167     case INDEX_op_brcond2_i32:
   2168         c = tcg_out_cmp2(s, args, const_args);
   2169         tcg_out_goto_label(s, tcg_cond_to_arm_cond[c], arg_label(args[5]));
   2170         break;
   2171     case INDEX_op_setcond2_i32:
   2172         c = tcg_out_cmp2(s, args + 1, const_args + 1);
   2173         tcg_out_dat_imm(s, tcg_cond_to_arm_cond[c], ARITH_MOV, args[0], 0, 1);
   2174         tcg_out_dat_imm(s, tcg_cond_to_arm_cond[tcg_invert_cond(c)],
   2175                         ARITH_MOV, args[0], 0, 0);
   2176         break;
   2177 
   2178     case INDEX_op_qemu_ld_i32:
   2179         tcg_out_qemu_ld(s, args, 0);
   2180         break;
   2181     case INDEX_op_qemu_ld_i64:
   2182         tcg_out_qemu_ld(s, args, 1);
   2183         break;
   2184     case INDEX_op_qemu_st_i32:
   2185         tcg_out_qemu_st(s, args, 0);
   2186         break;
   2187     case INDEX_op_qemu_st_i64:
   2188         tcg_out_qemu_st(s, args, 1);
   2189         break;
   2190 
   2191     case INDEX_op_bswap16_i32:
   2192         tcg_out_bswap16(s, COND_AL, args[0], args[1], args[2]);
   2193         break;
   2194     case INDEX_op_bswap32_i32:
   2195         tcg_out_bswap32(s, COND_AL, args[0], args[1]);
   2196         break;
   2197 
   2198     case INDEX_op_ext8s_i32:
   2199         tcg_out_ext8s(s, COND_AL, args[0], args[1]);
   2200         break;
   2201     case INDEX_op_ext16s_i32:
   2202         tcg_out_ext16s(s, COND_AL, args[0], args[1]);
   2203         break;
   2204     case INDEX_op_ext16u_i32:
   2205         tcg_out_ext16u(s, COND_AL, args[0], args[1]);
   2206         break;
   2207 
   2208     case INDEX_op_deposit_i32:
   2209         tcg_out_deposit(s, COND_AL, args[0], args[2],
   2210                         args[3], args[4], const_args[2]);
   2211         break;
   2212     case INDEX_op_extract_i32:
   2213         tcg_out_extract(s, COND_AL, args[0], args[1], args[2], args[3]);
   2214         break;
   2215     case INDEX_op_sextract_i32:
   2216         tcg_out_sextract(s, COND_AL, args[0], args[1], args[2], args[3]);
   2217         break;
   2218     case INDEX_op_extract2_i32:
   2219         /* ??? These optimization vs zero should be generic.  */
   2220         /* ??? But we can't substitute 2 for 1 in the opcode stream yet.  */
   2221         if (const_args[1]) {
   2222             if (const_args[2]) {
   2223                 tcg_out_movi(s, TCG_TYPE_REG, args[0], 0);
   2224             } else {
   2225                 tcg_out_dat_reg(s, COND_AL, ARITH_MOV, args[0], 0,
   2226                                 args[2], SHIFT_IMM_LSL(32 - args[3]));
   2227             }
   2228         } else if (const_args[2]) {
   2229             tcg_out_dat_reg(s, COND_AL, ARITH_MOV, args[0], 0,
   2230                             args[1], SHIFT_IMM_LSR(args[3]));
   2231         } else {
   2232             /* We can do extract2 in 2 insns, vs the 3 required otherwise.  */
   2233             tcg_out_dat_reg(s, COND_AL, ARITH_MOV, TCG_REG_TMP, 0,
   2234                             args[2], SHIFT_IMM_LSL(32 - args[3]));
   2235             tcg_out_dat_reg(s, COND_AL, ARITH_ORR, args[0], TCG_REG_TMP,
   2236                             args[1], SHIFT_IMM_LSR(args[3]));
   2237         }
   2238         break;
   2239 
   2240     case INDEX_op_div_i32:
   2241         tcg_out_sdiv(s, COND_AL, args[0], args[1], args[2]);
   2242         break;
   2243     case INDEX_op_divu_i32:
   2244         tcg_out_udiv(s, COND_AL, args[0], args[1], args[2]);
   2245         break;
   2246 
   2247     case INDEX_op_mb:
   2248         tcg_out_mb(s, args[0]);
   2249         break;
   2250 
   2251     case INDEX_op_mov_i32:  /* Always emitted via tcg_out_mov.  */
   2252     case INDEX_op_call:     /* Always emitted via tcg_out_call.  */
   2253     default:
   2254         tcg_abort();
   2255     }
   2256 }
   2257 
   2258 static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
   2259 {
   2260     switch (op) {
   2261     case INDEX_op_goto_ptr:
   2262         return C_O0_I1(r);
   2263 
   2264     case INDEX_op_ld8u_i32:
   2265     case INDEX_op_ld8s_i32:
   2266     case INDEX_op_ld16u_i32:
   2267     case INDEX_op_ld16s_i32:
   2268     case INDEX_op_ld_i32:
   2269     case INDEX_op_neg_i32:
   2270     case INDEX_op_not_i32:
   2271     case INDEX_op_bswap16_i32:
   2272     case INDEX_op_bswap32_i32:
   2273     case INDEX_op_ext8s_i32:
   2274     case INDEX_op_ext16s_i32:
   2275     case INDEX_op_ext16u_i32:
   2276     case INDEX_op_extract_i32:
   2277     case INDEX_op_sextract_i32:
   2278         return C_O1_I1(r, r);
   2279 
   2280     case INDEX_op_st8_i32:
   2281     case INDEX_op_st16_i32:
   2282     case INDEX_op_st_i32:
   2283         return C_O0_I2(r, r);
   2284 
   2285     case INDEX_op_add_i32:
   2286     case INDEX_op_sub_i32:
   2287     case INDEX_op_setcond_i32:
   2288         return C_O1_I2(r, r, rIN);
   2289 
   2290     case INDEX_op_and_i32:
   2291     case INDEX_op_andc_i32:
   2292     case INDEX_op_clz_i32:
   2293     case INDEX_op_ctz_i32:
   2294         return C_O1_I2(r, r, rIK);
   2295 
   2296     case INDEX_op_mul_i32:
   2297     case INDEX_op_div_i32:
   2298     case INDEX_op_divu_i32:
   2299         return C_O1_I2(r, r, r);
   2300 
   2301     case INDEX_op_mulu2_i32:
   2302     case INDEX_op_muls2_i32:
   2303         return C_O2_I2(r, r, r, r);
   2304 
   2305     case INDEX_op_or_i32:
   2306     case INDEX_op_xor_i32:
   2307         return C_O1_I2(r, r, rI);
   2308 
   2309     case INDEX_op_shl_i32:
   2310     case INDEX_op_shr_i32:
   2311     case INDEX_op_sar_i32:
   2312     case INDEX_op_rotl_i32:
   2313     case INDEX_op_rotr_i32:
   2314         return C_O1_I2(r, r, ri);
   2315 
   2316     case INDEX_op_brcond_i32:
   2317         return C_O0_I2(r, rIN);
   2318     case INDEX_op_deposit_i32:
   2319         return C_O1_I2(r, 0, rZ);
   2320     case INDEX_op_extract2_i32:
   2321         return C_O1_I2(r, rZ, rZ);
   2322     case INDEX_op_movcond_i32:
   2323         return C_O1_I4(r, r, rIN, rIK, 0);
   2324     case INDEX_op_add2_i32:
   2325         return C_O2_I4(r, r, r, r, rIN, rIK);
   2326     case INDEX_op_sub2_i32:
   2327         return C_O2_I4(r, r, rI, rI, rIN, rIK);
   2328     case INDEX_op_brcond2_i32:
   2329         return C_O0_I4(r, r, rI, rI);
   2330     case INDEX_op_setcond2_i32:
   2331         return C_O1_I4(r, r, r, rI, rI);
   2332 
   2333     case INDEX_op_qemu_ld_i32:
   2334         return TARGET_LONG_BITS == 32 ? C_O1_I1(r, l) : C_O1_I2(r, l, l);
   2335     case INDEX_op_qemu_ld_i64:
   2336         return TARGET_LONG_BITS == 32 ? C_O2_I1(r, r, l) : C_O2_I2(r, r, l, l);
   2337     case INDEX_op_qemu_st_i32:
   2338         return TARGET_LONG_BITS == 32 ? C_O0_I2(s, s) : C_O0_I3(s, s, s);
   2339     case INDEX_op_qemu_st_i64:
   2340         return TARGET_LONG_BITS == 32 ? C_O0_I3(s, s, s) : C_O0_I4(s, s, s, s);
   2341 
   2342     case INDEX_op_st_vec:
   2343         return C_O0_I2(w, r);
   2344     case INDEX_op_ld_vec:
   2345     case INDEX_op_dupm_vec:
   2346         return C_O1_I1(w, r);
   2347     case INDEX_op_dup_vec:
   2348         return C_O1_I1(w, wr);
   2349     case INDEX_op_abs_vec:
   2350     case INDEX_op_neg_vec:
   2351     case INDEX_op_not_vec:
   2352     case INDEX_op_shli_vec:
   2353     case INDEX_op_shri_vec:
   2354     case INDEX_op_sari_vec:
   2355         return C_O1_I1(w, w);
   2356     case INDEX_op_dup2_vec:
   2357     case INDEX_op_add_vec:
   2358     case INDEX_op_mul_vec:
   2359     case INDEX_op_smax_vec:
   2360     case INDEX_op_smin_vec:
   2361     case INDEX_op_ssadd_vec:
   2362     case INDEX_op_sssub_vec:
   2363     case INDEX_op_sub_vec:
   2364     case INDEX_op_umax_vec:
   2365     case INDEX_op_umin_vec:
   2366     case INDEX_op_usadd_vec:
   2367     case INDEX_op_ussub_vec:
   2368     case INDEX_op_xor_vec:
   2369     case INDEX_op_arm_sshl_vec:
   2370     case INDEX_op_arm_ushl_vec:
   2371         return C_O1_I2(w, w, w);
   2372     case INDEX_op_arm_sli_vec:
   2373         return C_O1_I2(w, 0, w);
   2374     case INDEX_op_or_vec:
   2375     case INDEX_op_andc_vec:
   2376         return C_O1_I2(w, w, wO);
   2377     case INDEX_op_and_vec:
   2378     case INDEX_op_orc_vec:
   2379         return C_O1_I2(w, w, wV);
   2380     case INDEX_op_cmp_vec:
   2381         return C_O1_I2(w, w, wZ);
   2382     case INDEX_op_bitsel_vec:
   2383         return C_O1_I3(w, w, w, w);
   2384     default:
   2385         g_assert_not_reached();
   2386     }
   2387 }
   2388 
   2389 static void tcg_target_init(TCGContext *s)
   2390 {
   2391     /*
   2392      * Only probe for the platform and capabilities if we haven't already
   2393      * determined maximum values at compile time.
   2394      */
   2395 #if !defined(use_idiv_instructions) || !defined(use_neon_instructions)
   2396     {
   2397         unsigned long hwcap = qemu_getauxval(AT_HWCAP);
   2398 #ifndef use_idiv_instructions
   2399         use_idiv_instructions = (hwcap & HWCAP_ARM_IDIVA) != 0;
   2400 #endif
   2401 #ifndef use_neon_instructions
   2402         use_neon_instructions = (hwcap & HWCAP_ARM_NEON) != 0;
   2403 #endif
   2404     }
   2405 #endif
   2406 
   2407     if (__ARM_ARCH < 7) {
   2408         const char *pl = (const char *)qemu_getauxval(AT_PLATFORM);
   2409         if (pl != NULL && pl[0] == 'v' && pl[1] >= '4' && pl[1] <= '9') {
   2410             arm_arch = pl[1] - '0';
   2411         }
   2412 
   2413         if (arm_arch < 6) {
   2414             error_report("TCG: ARMv%d is unsupported; exiting", arm_arch);
   2415             exit(EXIT_FAILURE);
   2416         }
   2417     }
   2418 
   2419     tcg_target_available_regs[TCG_TYPE_I32] = ALL_GENERAL_REGS;
   2420 
   2421     tcg_target_call_clobber_regs = 0;
   2422     tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R0);
   2423     tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R1);
   2424     tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R2);
   2425     tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R3);
   2426     tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R12);
   2427     tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R14);
   2428 
   2429     if (use_neon_instructions) {
   2430         tcg_target_available_regs[TCG_TYPE_V64]  = ALL_VECTOR_REGS;
   2431         tcg_target_available_regs[TCG_TYPE_V128] = ALL_VECTOR_REGS;
   2432 
   2433         tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_Q0);
   2434         tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_Q1);
   2435         tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_Q2);
   2436         tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_Q3);
   2437         tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_Q8);
   2438         tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_Q9);
   2439         tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_Q10);
   2440         tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_Q11);
   2441         tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_Q12);
   2442         tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_Q13);
   2443         tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_Q14);
   2444         tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_Q15);
   2445     }
   2446 
   2447     s->reserved_regs = 0;
   2448     tcg_regset_set_reg(s->reserved_regs, TCG_REG_CALL_STACK);
   2449     tcg_regset_set_reg(s->reserved_regs, TCG_REG_TMP);
   2450     tcg_regset_set_reg(s->reserved_regs, TCG_REG_PC);
   2451     tcg_regset_set_reg(s->reserved_regs, TCG_VEC_TMP);
   2452 }
   2453 
   2454 static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg arg,
   2455                        TCGReg arg1, intptr_t arg2)
   2456 {
   2457     switch (type) {
   2458     case TCG_TYPE_I32:
   2459         tcg_out_ld32u(s, COND_AL, arg, arg1, arg2);
   2460         return;
   2461     case TCG_TYPE_V64:
   2462         /* regs 1; size 8; align 8 */
   2463         tcg_out_vldst(s, INSN_VLD1 | 0x7d0, arg, arg1, arg2);
   2464         return;
   2465     case TCG_TYPE_V128:
   2466         /*
   2467          * We have only 8-byte alignment for the stack per the ABI.
   2468          * Rather than dynamically re-align the stack, it's easier
   2469          * to simply not request alignment beyond that.  So:
   2470          * regs 2; size 8; align 8
   2471          */
   2472         tcg_out_vldst(s, INSN_VLD1 | 0xad0, arg, arg1, arg2);
   2473         return;
   2474     default:
   2475         g_assert_not_reached();
   2476     }
   2477 }
   2478 
   2479 static void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg,
   2480                        TCGReg arg1, intptr_t arg2)
   2481 {
   2482     switch (type) {
   2483     case TCG_TYPE_I32:
   2484         tcg_out_st32(s, COND_AL, arg, arg1, arg2);
   2485         return;
   2486     case TCG_TYPE_V64:
   2487         /* regs 1; size 8; align 8 */
   2488         tcg_out_vldst(s, INSN_VST1 | 0x7d0, arg, arg1, arg2);
   2489         return;
   2490     case TCG_TYPE_V128:
   2491         /* See tcg_out_ld re alignment: regs 2; size 8; align 8 */
   2492         tcg_out_vldst(s, INSN_VST1 | 0xad0, arg, arg1, arg2);
   2493         return;
   2494     default:
   2495         g_assert_not_reached();
   2496     }
   2497 }
   2498 
   2499 static bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val,
   2500                         TCGReg base, intptr_t ofs)
   2501 {
   2502     return false;
   2503 }
   2504 
   2505 static bool tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg)
   2506 {
   2507     if (ret == arg) {
   2508         return true;
   2509     }
   2510     switch (type) {
   2511     case TCG_TYPE_I32:
   2512         if (ret < TCG_REG_Q0 && arg < TCG_REG_Q0) {
   2513             tcg_out_mov_reg(s, COND_AL, ret, arg);
   2514             return true;
   2515         }
   2516         return false;
   2517 
   2518     case TCG_TYPE_V64:
   2519     case TCG_TYPE_V128:
   2520         /* "VMOV D,N" is an alias for "VORR D,N,N". */
   2521         tcg_out_vreg3(s, INSN_VORR, type - TCG_TYPE_V64, 0, ret, arg, arg);
   2522         return true;
   2523 
   2524     default:
   2525         g_assert_not_reached();
   2526     }
   2527 }
   2528 
   2529 static void tcg_out_movi(TCGContext *s, TCGType type,
   2530                          TCGReg ret, tcg_target_long arg)
   2531 {
   2532     tcg_debug_assert(type == TCG_TYPE_I32);
   2533     tcg_debug_assert(ret < TCG_REG_Q0);
   2534     tcg_out_movi32(s, COND_AL, ret, arg);
   2535 }
   2536 
   2537 /* Type is always V128, with I64 elements.  */
   2538 static void tcg_out_dup2_vec(TCGContext *s, TCGReg rd, TCGReg rl, TCGReg rh)
   2539 {
   2540     /* Move high element into place first. */
   2541     /* VMOV Dd+1, Ds */
   2542     tcg_out_vreg3(s, INSN_VORR | (1 << 12), 0, 0, rd, rh, rh);
   2543     /* Move low element into place; tcg_out_mov will check for nop. */
   2544     tcg_out_mov(s, TCG_TYPE_V64, rd, rl);
   2545 }
   2546 
   2547 static bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece,
   2548                             TCGReg rd, TCGReg rs)
   2549 {
   2550     int q = type - TCG_TYPE_V64;
   2551 
   2552     if (vece == MO_64) {
   2553         if (type == TCG_TYPE_V128) {
   2554             tcg_out_dup2_vec(s, rd, rs, rs);
   2555         } else {
   2556             tcg_out_mov(s, TCG_TYPE_V64, rd, rs);
   2557         }
   2558     } else if (rs < TCG_REG_Q0) {
   2559         int b = (vece == MO_8);
   2560         int e = (vece == MO_16);
   2561         tcg_out32(s, INSN_VDUP_G | (b << 22) | (q << 21) | (e << 5) |
   2562                   encode_vn(rd) | (rs << 12));
   2563     } else {
   2564         int imm4 = 1 << vece;
   2565         tcg_out32(s, INSN_VDUP_S | (imm4 << 16) | (q << 6) |
   2566                   encode_vd(rd) | encode_vm(rs));
   2567     }
   2568     return true;
   2569 }
   2570 
   2571 static bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece,
   2572                              TCGReg rd, TCGReg base, intptr_t offset)
   2573 {
   2574     if (vece == MO_64) {
   2575         tcg_out_ld(s, TCG_TYPE_V64, rd, base, offset);
   2576         if (type == TCG_TYPE_V128) {
   2577             tcg_out_dup2_vec(s, rd, rd, rd);
   2578         }
   2579     } else {
   2580         int q = type - TCG_TYPE_V64;
   2581         tcg_out_vldst(s, INSN_VLD1R | (vece << 6) | (q << 5),
   2582                       rd, base, offset);
   2583     }
   2584     return true;
   2585 }
   2586 
   2587 static void tcg_out_dupi_vec(TCGContext *s, TCGType type, unsigned vece,
   2588                              TCGReg rd, int64_t v64)
   2589 {
   2590     int q = type - TCG_TYPE_V64;
   2591     int cmode, imm8, i;
   2592 
   2593     /* Test all bytes equal first.  */
   2594     if (vece == MO_8) {
   2595         tcg_out_vmovi(s, rd, q, 0, 0xe, v64);
   2596         return;
   2597     }
   2598 
   2599     /*
   2600      * Test all bytes 0x00 or 0xff second.  This can match cases that
   2601      * might otherwise take 2 or 3 insns for MO_16 or MO_32 below.
   2602      */
   2603     for (i = imm8 = 0; i < 8; i++) {
   2604         uint8_t byte = v64 >> (i * 8);
   2605         if (byte == 0xff) {
   2606             imm8 |= 1 << i;
   2607         } else if (byte != 0) {
   2608             goto fail_bytes;
   2609         }
   2610     }
   2611     tcg_out_vmovi(s, rd, q, 1, 0xe, imm8);
   2612     return;
   2613  fail_bytes:
   2614 
   2615     /*
   2616      * Tests for various replications.  For each element width, if we
   2617      * cannot find an expansion there's no point checking a larger
   2618      * width because we already know by replication it cannot match.
   2619      */
   2620     if (vece == MO_16) {
   2621         uint16_t v16 = v64;
   2622 
   2623         if (is_shimm16(v16, &cmode, &imm8)) {
   2624             tcg_out_vmovi(s, rd, q, 0, cmode, imm8);
   2625             return;
   2626         }
   2627         if (is_shimm16(~v16, &cmode, &imm8)) {
   2628             tcg_out_vmovi(s, rd, q, 1, cmode, imm8);
   2629             return;
   2630         }
   2631 
   2632         /*
   2633          * Otherwise, all remaining constants can be loaded in two insns:
   2634          * rd = v16 & 0xff, rd |= v16 & 0xff00.
   2635          */
   2636         tcg_out_vmovi(s, rd, q, 0, 0x8, v16 & 0xff);
   2637         tcg_out_vmovi(s, rd, q, 0, 0xb, v16 >> 8);   /* VORRI */
   2638         return;
   2639     }
   2640 
   2641     if (vece == MO_32) {
   2642         uint32_t v32 = v64;
   2643 
   2644         if (is_shimm32(v32, &cmode, &imm8) ||
   2645             is_soimm32(v32, &cmode, &imm8)) {
   2646             tcg_out_vmovi(s, rd, q, 0, cmode, imm8);
   2647             return;
   2648         }
   2649         if (is_shimm32(~v32, &cmode, &imm8) ||
   2650             is_soimm32(~v32, &cmode, &imm8)) {
   2651             tcg_out_vmovi(s, rd, q, 1, cmode, imm8);
   2652             return;
   2653         }
   2654 
   2655         /*
   2656          * Restrict the set of constants to those we can load with
   2657          * two instructions.  Others we load from the pool.
   2658          */
   2659         i = is_shimm32_pair(v32, &cmode, &imm8);
   2660         if (i) {
   2661             tcg_out_vmovi(s, rd, q, 0, cmode, imm8);
   2662             tcg_out_vmovi(s, rd, q, 0, i | 1, extract32(v32, i * 4, 8));
   2663             return;
   2664         }
   2665         i = is_shimm32_pair(~v32, &cmode, &imm8);
   2666         if (i) {
   2667             tcg_out_vmovi(s, rd, q, 1, cmode, imm8);
   2668             tcg_out_vmovi(s, rd, q, 1, i | 1, extract32(~v32, i * 4, 8));
   2669             return;
   2670         }
   2671     }
   2672 
   2673     /*
   2674      * As a last resort, load from the constant pool.
   2675      */
   2676     if (!q || vece == MO_64) {
   2677         new_pool_l2(s, R_ARM_PC11, s->code_ptr, 0, v64, v64 >> 32);
   2678         /* VLDR Dd, [pc + offset] */
   2679         tcg_out32(s, INSN_VLDR_D | encode_vd(rd) | (0xf << 16));
   2680         if (q) {
   2681             tcg_out_dup2_vec(s, rd, rd, rd);
   2682         }
   2683     } else {
   2684         new_pool_label(s, (uint32_t)v64, R_ARM_PC8, s->code_ptr, 0);
   2685         /* add tmp, pc, offset */
   2686         tcg_out_dat_imm(s, COND_AL, ARITH_ADD, TCG_REG_TMP, TCG_REG_PC, 0);
   2687         tcg_out_dupm_vec(s, type, MO_32, rd, TCG_REG_TMP, 0);
   2688     }
   2689 }
   2690 
   2691 static const ARMInsn vec_cmp_insn[16] = {
   2692     [TCG_COND_EQ] = INSN_VCEQ,
   2693     [TCG_COND_GT] = INSN_VCGT,
   2694     [TCG_COND_GE] = INSN_VCGE,
   2695     [TCG_COND_GTU] = INSN_VCGT_U,
   2696     [TCG_COND_GEU] = INSN_VCGE_U,
   2697 };
   2698 
   2699 static const ARMInsn vec_cmp0_insn[16] = {
   2700     [TCG_COND_EQ] = INSN_VCEQ0,
   2701     [TCG_COND_GT] = INSN_VCGT0,
   2702     [TCG_COND_GE] = INSN_VCGE0,
   2703     [TCG_COND_LT] = INSN_VCLT0,
   2704     [TCG_COND_LE] = INSN_VCLE0,
   2705 };
   2706 
   2707 static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
   2708                            unsigned vecl, unsigned vece,
   2709                            const TCGArg args[TCG_MAX_OP_ARGS],
   2710                            const int const_args[TCG_MAX_OP_ARGS])
   2711 {
   2712     TCGType type = vecl + TCG_TYPE_V64;
   2713     unsigned q = vecl;
   2714     TCGArg a0, a1, a2, a3;
   2715     int cmode, imm8;
   2716 
   2717     a0 = args[0];
   2718     a1 = args[1];
   2719     a2 = args[2];
   2720 
   2721     switch (opc) {
   2722     case INDEX_op_ld_vec:
   2723         tcg_out_ld(s, type, a0, a1, a2);
   2724         return;
   2725     case INDEX_op_st_vec:
   2726         tcg_out_st(s, type, a0, a1, a2);
   2727         return;
   2728     case INDEX_op_dupm_vec:
   2729         tcg_out_dupm_vec(s, type, vece, a0, a1, a2);
   2730         return;
   2731     case INDEX_op_dup2_vec:
   2732         tcg_out_dup2_vec(s, a0, a1, a2);
   2733         return;
   2734     case INDEX_op_abs_vec:
   2735         tcg_out_vreg2(s, INSN_VABS, q, vece, a0, a1);
   2736         return;
   2737     case INDEX_op_neg_vec:
   2738         tcg_out_vreg2(s, INSN_VNEG, q, vece, a0, a1);
   2739         return;
   2740     case INDEX_op_not_vec:
   2741         tcg_out_vreg2(s, INSN_VMVN, q, 0, a0, a1);
   2742         return;
   2743     case INDEX_op_add_vec:
   2744         tcg_out_vreg3(s, INSN_VADD, q, vece, a0, a1, a2);
   2745         return;
   2746     case INDEX_op_mul_vec:
   2747         tcg_out_vreg3(s, INSN_VMUL, q, vece, a0, a1, a2);
   2748         return;
   2749     case INDEX_op_smax_vec:
   2750         tcg_out_vreg3(s, INSN_VMAX, q, vece, a0, a1, a2);
   2751         return;
   2752     case INDEX_op_smin_vec:
   2753         tcg_out_vreg3(s, INSN_VMIN, q, vece, a0, a1, a2);
   2754         return;
   2755     case INDEX_op_sub_vec:
   2756         tcg_out_vreg3(s, INSN_VSUB, q, vece, a0, a1, a2);
   2757         return;
   2758     case INDEX_op_ssadd_vec:
   2759         tcg_out_vreg3(s, INSN_VQADD, q, vece, a0, a1, a2);
   2760         return;
   2761     case INDEX_op_sssub_vec:
   2762         tcg_out_vreg3(s, INSN_VQSUB, q, vece, a0, a1, a2);
   2763         return;
   2764     case INDEX_op_umax_vec:
   2765         tcg_out_vreg3(s, INSN_VMAX_U, q, vece, a0, a1, a2);
   2766         return;
   2767     case INDEX_op_umin_vec:
   2768         tcg_out_vreg3(s, INSN_VMIN_U, q, vece, a0, a1, a2);
   2769         return;
   2770     case INDEX_op_usadd_vec:
   2771         tcg_out_vreg3(s, INSN_VQADD_U, q, vece, a0, a1, a2);
   2772         return;
   2773     case INDEX_op_ussub_vec:
   2774         tcg_out_vreg3(s, INSN_VQSUB_U, q, vece, a0, a1, a2);
   2775         return;
   2776     case INDEX_op_xor_vec:
   2777         tcg_out_vreg3(s, INSN_VEOR, q, 0, a0, a1, a2);
   2778         return;
   2779     case INDEX_op_arm_sshl_vec:
   2780         /*
   2781          * Note that Vm is the data and Vn is the shift count,
   2782          * therefore the arguments appear reversed.
   2783          */
   2784         tcg_out_vreg3(s, INSN_VSHL_S, q, vece, a0, a2, a1);
   2785         return;
   2786     case INDEX_op_arm_ushl_vec:
   2787         /* See above. */
   2788         tcg_out_vreg3(s, INSN_VSHL_U, q, vece, a0, a2, a1);
   2789         return;
   2790     case INDEX_op_shli_vec:
   2791         tcg_out_vshifti(s, INSN_VSHLI, q, a0, a1, a2 + (8 << vece));
   2792         return;
   2793     case INDEX_op_shri_vec:
   2794         tcg_out_vshifti(s, INSN_VSHRI, q, a0, a1, (16 << vece) - a2);
   2795         return;
   2796     case INDEX_op_sari_vec:
   2797         tcg_out_vshifti(s, INSN_VSARI, q, a0, a1, (16 << vece) - a2);
   2798         return;
   2799     case INDEX_op_arm_sli_vec:
   2800         tcg_out_vshifti(s, INSN_VSLI, q, a0, a2, args[3] + (8 << vece));
   2801         return;
   2802 
   2803     case INDEX_op_andc_vec:
   2804         if (!const_args[2]) {
   2805             tcg_out_vreg3(s, INSN_VBIC, q, 0, a0, a1, a2);
   2806             return;
   2807         }
   2808         a2 = ~a2;
   2809         /* fall through */
   2810     case INDEX_op_and_vec:
   2811         if (const_args[2]) {
   2812             is_shimm1632(~a2, &cmode, &imm8);
   2813             if (a0 == a1) {
   2814                 tcg_out_vmovi(s, a0, q, 1, cmode | 1, imm8); /* VBICI */
   2815                 return;
   2816             }
   2817             tcg_out_vmovi(s, a0, q, 1, cmode, imm8); /* VMVNI */
   2818             a2 = a0;
   2819         }
   2820         tcg_out_vreg3(s, INSN_VAND, q, 0, a0, a1, a2);
   2821         return;
   2822 
   2823     case INDEX_op_orc_vec:
   2824         if (!const_args[2]) {
   2825             tcg_out_vreg3(s, INSN_VORN, q, 0, a0, a1, a2);
   2826             return;
   2827         }
   2828         a2 = ~a2;
   2829         /* fall through */
   2830     case INDEX_op_or_vec:
   2831         if (const_args[2]) {
   2832             is_shimm1632(a2, &cmode, &imm8);
   2833             if (a0 == a1) {
   2834                 tcg_out_vmovi(s, a0, q, 0, cmode | 1, imm8); /* VORRI */
   2835                 return;
   2836             }
   2837             tcg_out_vmovi(s, a0, q, 0, cmode, imm8); /* VMOVI */
   2838             a2 = a0;
   2839         }
   2840         tcg_out_vreg3(s, INSN_VORR, q, 0, a0, a1, a2);
   2841         return;
   2842 
   2843     case INDEX_op_cmp_vec:
   2844         {
   2845             TCGCond cond = args[3];
   2846 
   2847             if (cond == TCG_COND_NE) {
   2848                 if (const_args[2]) {
   2849                     tcg_out_vreg3(s, INSN_VTST, q, vece, a0, a1, a1);
   2850                 } else {
   2851                     tcg_out_vreg3(s, INSN_VCEQ, q, vece, a0, a1, a2);
   2852                     tcg_out_vreg2(s, INSN_VMVN, q, 0, a0, a0);
   2853                 }
   2854             } else {
   2855                 ARMInsn insn;
   2856 
   2857                 if (const_args[2]) {
   2858                     insn = vec_cmp0_insn[cond];
   2859                     if (insn) {
   2860                         tcg_out_vreg2(s, insn, q, vece, a0, a1);
   2861                         return;
   2862                     }
   2863                     tcg_out_dupi_vec(s, type, MO_8, TCG_VEC_TMP, 0);
   2864                     a2 = TCG_VEC_TMP;
   2865                 }
   2866                 insn = vec_cmp_insn[cond];
   2867                 if (insn == 0) {
   2868                     TCGArg t;
   2869                     t = a1, a1 = a2, a2 = t;
   2870                     cond = tcg_swap_cond(cond);
   2871                     insn = vec_cmp_insn[cond];
   2872                     tcg_debug_assert(insn != 0);
   2873                 }
   2874                 tcg_out_vreg3(s, insn, q, vece, a0, a1, a2);
   2875             }
   2876         }
   2877         return;
   2878 
   2879     case INDEX_op_bitsel_vec:
   2880         a3 = args[3];
   2881         if (a0 == a3) {
   2882             tcg_out_vreg3(s, INSN_VBIT, q, 0, a0, a2, a1);
   2883         } else if (a0 == a2) {
   2884             tcg_out_vreg3(s, INSN_VBIF, q, 0, a0, a3, a1);
   2885         } else {
   2886             tcg_out_mov(s, type, a0, a1);
   2887             tcg_out_vreg3(s, INSN_VBSL, q, 0, a0, a2, a3);
   2888         }
   2889         return;
   2890 
   2891     case INDEX_op_mov_vec:  /* Always emitted via tcg_out_mov.  */
   2892     case INDEX_op_dup_vec:  /* Always emitted via tcg_out_dup_vec.  */
   2893     default:
   2894         g_assert_not_reached();
   2895     }
   2896 }
   2897 
   2898 int tcg_can_emit_vec_op(TCGOpcode opc, TCGType type, unsigned vece)
   2899 {
   2900     switch (opc) {
   2901     case INDEX_op_add_vec:
   2902     case INDEX_op_sub_vec:
   2903     case INDEX_op_and_vec:
   2904     case INDEX_op_andc_vec:
   2905     case INDEX_op_or_vec:
   2906     case INDEX_op_orc_vec:
   2907     case INDEX_op_xor_vec:
   2908     case INDEX_op_not_vec:
   2909     case INDEX_op_shli_vec:
   2910     case INDEX_op_shri_vec:
   2911     case INDEX_op_sari_vec:
   2912     case INDEX_op_ssadd_vec:
   2913     case INDEX_op_sssub_vec:
   2914     case INDEX_op_usadd_vec:
   2915     case INDEX_op_ussub_vec:
   2916     case INDEX_op_bitsel_vec:
   2917         return 1;
   2918     case INDEX_op_abs_vec:
   2919     case INDEX_op_cmp_vec:
   2920     case INDEX_op_mul_vec:
   2921     case INDEX_op_neg_vec:
   2922     case INDEX_op_smax_vec:
   2923     case INDEX_op_smin_vec:
   2924     case INDEX_op_umax_vec:
   2925     case INDEX_op_umin_vec:
   2926         return vece < MO_64;
   2927     case INDEX_op_shlv_vec:
   2928     case INDEX_op_shrv_vec:
   2929     case INDEX_op_sarv_vec:
   2930     case INDEX_op_rotli_vec:
   2931     case INDEX_op_rotlv_vec:
   2932     case INDEX_op_rotrv_vec:
   2933         return -1;
   2934     default:
   2935         return 0;
   2936     }
   2937 }
   2938 
   2939 void tcg_expand_vec_op(TCGOpcode opc, TCGType type, unsigned vece,
   2940                        TCGArg a0, ...)
   2941 {
   2942     va_list va;
   2943     TCGv_vec v0, v1, v2, t1, t2, c1;
   2944     TCGArg a2;
   2945 
   2946     va_start(va, a0);
   2947     v0 = temp_tcgv_vec(arg_temp(a0));
   2948     v1 = temp_tcgv_vec(arg_temp(va_arg(va, TCGArg)));
   2949     a2 = va_arg(va, TCGArg);
   2950     va_end(va);
   2951 
   2952     switch (opc) {
   2953     case INDEX_op_shlv_vec:
   2954         /*
   2955          * Merely propagate shlv_vec to arm_ushl_vec.
   2956          * In this way we don't set TCG_TARGET_HAS_shv_vec
   2957          * because everything is done via expansion.
   2958          */
   2959         v2 = temp_tcgv_vec(arg_temp(a2));
   2960         vec_gen_3(INDEX_op_arm_ushl_vec, type, vece, tcgv_vec_arg(v0),
   2961                   tcgv_vec_arg(v1), tcgv_vec_arg(v2));
   2962         break;
   2963 
   2964     case INDEX_op_shrv_vec:
   2965     case INDEX_op_sarv_vec:
   2966         /* Right shifts are negative left shifts for NEON.  */
   2967         v2 = temp_tcgv_vec(arg_temp(a2));
   2968         t1 = tcg_temp_new_vec(type);
   2969         tcg_gen_neg_vec(vece, t1, v2);
   2970         if (opc == INDEX_op_shrv_vec) {
   2971             opc = INDEX_op_arm_ushl_vec;
   2972         } else {
   2973             opc = INDEX_op_arm_sshl_vec;
   2974         }
   2975         vec_gen_3(opc, type, vece, tcgv_vec_arg(v0),
   2976                   tcgv_vec_arg(v1), tcgv_vec_arg(t1));
   2977         tcg_temp_free_vec(t1);
   2978         break;
   2979 
   2980     case INDEX_op_rotli_vec:
   2981         t1 = tcg_temp_new_vec(type);
   2982         tcg_gen_shri_vec(vece, t1, v1, -a2 & ((8 << vece) - 1));
   2983         vec_gen_4(INDEX_op_arm_sli_vec, type, vece,
   2984                   tcgv_vec_arg(v0), tcgv_vec_arg(t1), tcgv_vec_arg(v1), a2);
   2985         tcg_temp_free_vec(t1);
   2986         break;
   2987 
   2988     case INDEX_op_rotlv_vec:
   2989         v2 = temp_tcgv_vec(arg_temp(a2));
   2990         t1 = tcg_temp_new_vec(type);
   2991         c1 = tcg_constant_vec(type, vece, 8 << vece);
   2992         tcg_gen_sub_vec(vece, t1, v2, c1);
   2993         /* Right shifts are negative left shifts for NEON.  */
   2994         vec_gen_3(INDEX_op_arm_ushl_vec, type, vece, tcgv_vec_arg(t1),
   2995                   tcgv_vec_arg(v1), tcgv_vec_arg(t1));
   2996         vec_gen_3(INDEX_op_arm_ushl_vec, type, vece, tcgv_vec_arg(v0),
   2997                   tcgv_vec_arg(v1), tcgv_vec_arg(v2));
   2998         tcg_gen_or_vec(vece, v0, v0, t1);
   2999         tcg_temp_free_vec(t1);
   3000         break;
   3001 
   3002     case INDEX_op_rotrv_vec:
   3003         v2 = temp_tcgv_vec(arg_temp(a2));
   3004         t1 = tcg_temp_new_vec(type);
   3005         t2 = tcg_temp_new_vec(type);
   3006         c1 = tcg_constant_vec(type, vece, 8 << vece);
   3007         tcg_gen_neg_vec(vece, t1, v2);
   3008         tcg_gen_sub_vec(vece, t2, c1, v2);
   3009         /* Right shifts are negative left shifts for NEON.  */
   3010         vec_gen_3(INDEX_op_arm_ushl_vec, type, vece, tcgv_vec_arg(t1),
   3011                   tcgv_vec_arg(v1), tcgv_vec_arg(t1));
   3012         vec_gen_3(INDEX_op_arm_ushl_vec, type, vece, tcgv_vec_arg(t2),
   3013                   tcgv_vec_arg(v1), tcgv_vec_arg(t2));
   3014         tcg_gen_or_vec(vece, v0, t1, t2);
   3015         tcg_temp_free_vec(t1);
   3016         tcg_temp_free_vec(t2);
   3017         break;
   3018 
   3019     default:
   3020         g_assert_not_reached();
   3021     }
   3022 }
   3023 
   3024 static void tcg_out_nop_fill(tcg_insn_unit *p, int count)
   3025 {
   3026     int i;
   3027     for (i = 0; i < count; ++i) {
   3028         p[i] = INSN_NOP;
   3029     }
   3030 }
   3031 
   3032 /* Compute frame size via macros, to share between tcg_target_qemu_prologue
   3033    and tcg_register_jit.  */
   3034 
   3035 #define PUSH_SIZE  ((11 - 4 + 1 + 1) * sizeof(tcg_target_long))
   3036 
   3037 #define FRAME_SIZE \
   3038     ((PUSH_SIZE \
   3039       + TCG_STATIC_CALL_ARGS_SIZE \
   3040       + CPU_TEMP_BUF_NLONGS * sizeof(long) \
   3041       + TCG_TARGET_STACK_ALIGN - 1) \
   3042      & -TCG_TARGET_STACK_ALIGN)
   3043 
   3044 #define STACK_ADDEND  (FRAME_SIZE - PUSH_SIZE)
   3045 
   3046 static void tcg_target_qemu_prologue(TCGContext *s)
   3047 {
   3048     /* Calling convention requires us to save r4-r11 and lr.  */
   3049     /* stmdb sp!, { r4 - r11, lr } */
   3050     tcg_out_ldstm(s, COND_AL, INSN_STMDB, TCG_REG_CALL_STACK,
   3051                   (1 << TCG_REG_R4) | (1 << TCG_REG_R5) | (1 << TCG_REG_R6) |
   3052                   (1 << TCG_REG_R7) | (1 << TCG_REG_R8) | (1 << TCG_REG_R9) |
   3053                   (1 << TCG_REG_R10) | (1 << TCG_REG_R11) | (1 << TCG_REG_R14));
   3054 
   3055     /* Reserve callee argument and tcg temp space.  */
   3056     tcg_out_dat_rI(s, COND_AL, ARITH_SUB, TCG_REG_CALL_STACK,
   3057                    TCG_REG_CALL_STACK, STACK_ADDEND, 1);
   3058     tcg_set_frame(s, TCG_REG_CALL_STACK, TCG_STATIC_CALL_ARGS_SIZE,
   3059                   CPU_TEMP_BUF_NLONGS * sizeof(long));
   3060 
   3061     tcg_out_mov(s, TCG_TYPE_PTR, TCG_AREG0, tcg_target_call_iarg_regs[0]);
   3062 
   3063 #ifndef CONFIG_SOFTMMU
   3064     if (guest_base) {
   3065         tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_GUEST_BASE, guest_base);
   3066         tcg_regset_set_reg(s->reserved_regs, TCG_REG_GUEST_BASE);
   3067     }
   3068 #endif
   3069 
   3070     tcg_out_b_reg(s, COND_AL, tcg_target_call_iarg_regs[1]);
   3071 
   3072     /*
   3073      * Return path for goto_ptr. Set return value to 0, a-la exit_tb,
   3074      * and fall through to the rest of the epilogue.
   3075      */
   3076     tcg_code_gen_epilogue = tcg_splitwx_to_rx(s->code_ptr);
   3077     tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R0, 0);
   3078     tcg_out_epilogue(s);
   3079 }
   3080 
   3081 static void tcg_out_epilogue(TCGContext *s)
   3082 {
   3083     /* Release local stack frame.  */
   3084     tcg_out_dat_rI(s, COND_AL, ARITH_ADD, TCG_REG_CALL_STACK,
   3085                    TCG_REG_CALL_STACK, STACK_ADDEND, 1);
   3086 
   3087     /* ldmia sp!, { r4 - r11, pc } */
   3088     tcg_out_ldstm(s, COND_AL, INSN_LDMIA, TCG_REG_CALL_STACK,
   3089                   (1 << TCG_REG_R4) | (1 << TCG_REG_R5) | (1 << TCG_REG_R6) |
   3090                   (1 << TCG_REG_R7) | (1 << TCG_REG_R8) | (1 << TCG_REG_R9) |
   3091                   (1 << TCG_REG_R10) | (1 << TCG_REG_R11) | (1 << TCG_REG_PC));
   3092 }
   3093 
   3094 typedef struct {
   3095     DebugFrameHeader h;
   3096     uint8_t fde_def_cfa[4];
   3097     uint8_t fde_reg_ofs[18];
   3098 } DebugFrame;
   3099 
   3100 #define ELF_HOST_MACHINE EM_ARM
   3101 
   3102 /* We're expecting a 2 byte uleb128 encoded value.  */
   3103 QEMU_BUILD_BUG_ON(FRAME_SIZE >= (1 << 14));
   3104 
   3105 static const DebugFrame debug_frame = {
   3106     .h.cie.len = sizeof(DebugFrameCIE)-4, /* length after .len member */
   3107     .h.cie.id = -1,
   3108     .h.cie.version = 1,
   3109     .h.cie.code_align = 1,
   3110     .h.cie.data_align = 0x7c,             /* sleb128 -4 */
   3111     .h.cie.return_column = 14,
   3112 
   3113     /* Total FDE size does not include the "len" member.  */
   3114     .h.fde.len = sizeof(DebugFrame) - offsetof(DebugFrame, h.fde.cie_offset),
   3115 
   3116     .fde_def_cfa = {
   3117         12, 13,                         /* DW_CFA_def_cfa sp, ... */
   3118         (FRAME_SIZE & 0x7f) | 0x80,     /* ... uleb128 FRAME_SIZE */
   3119         (FRAME_SIZE >> 7)
   3120     },
   3121     .fde_reg_ofs = {
   3122         /* The following must match the stmdb in the prologue.  */
   3123         0x8e, 1,                        /* DW_CFA_offset, lr, -4 */
   3124         0x8b, 2,                        /* DW_CFA_offset, r11, -8 */
   3125         0x8a, 3,                        /* DW_CFA_offset, r10, -12 */
   3126         0x89, 4,                        /* DW_CFA_offset, r9, -16 */
   3127         0x88, 5,                        /* DW_CFA_offset, r8, -20 */
   3128         0x87, 6,                        /* DW_CFA_offset, r7, -24 */
   3129         0x86, 7,                        /* DW_CFA_offset, r6, -28 */
   3130         0x85, 8,                        /* DW_CFA_offset, r5, -32 */
   3131         0x84, 9,                        /* DW_CFA_offset, r4, -36 */
   3132     }
   3133 };
   3134 
   3135 void tcg_register_jit(const void *buf, size_t buf_size)
   3136 {
   3137     tcg_register_jit_int(buf, buf_size, &debug_frame, sizeof(debug_frame));
   3138 }