qemu

FORK: QEMU emulator
git clone https://git.neptards.moe/neptards/qemu.git
Log | Files | Refs | Submodules | LICENSE

tcg-target.c.inc (125143B)


      1 /*
      2  * Tiny Code Generator for QEMU
      3  *
      4  * Copyright (c) 2008 Fabrice Bellard
      5  *
      6  * Permission is hereby granted, free of charge, to any person obtaining a copy
      7  * of this software and associated documentation files (the "Software"), to deal
      8  * in the Software without restriction, including without limitation the rights
      9  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
     10  * copies of the Software, and to permit persons to whom the Software is
     11  * furnished to do so, subject to the following conditions:
     12  *
     13  * The above copyright notice and this permission notice shall be included in
     14  * all copies or substantial portions of the Software.
     15  *
     16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
     17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
     18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
     19  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
     20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
     21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
     22  * THE SOFTWARE.
     23  */
     24 
     25 #include "elf.h"
     26 #include "../tcg-pool.c.inc"
     27 #include "../tcg-ldst.c.inc"
     28 
     29 /*
     30  * Standardize on the _CALL_FOO symbols used by GCC:
     31  * Apple XCode does not define _CALL_DARWIN.
     32  * Clang defines _CALL_ELF (64-bit) but not _CALL_SYSV (32-bit).
     33  */
     34 #if !defined(_CALL_SYSV) && \
     35     !defined(_CALL_DARWIN) && \
     36     !defined(_CALL_AIX) && \
     37     !defined(_CALL_ELF)
     38 # if defined(__APPLE__)
     39 #  define _CALL_DARWIN
     40 # elif defined(__ELF__) && TCG_TARGET_REG_BITS == 32
     41 #  define _CALL_SYSV
     42 # else
     43 #  error "Unknown ABI"
     44 # endif
     45 #endif 
     46 
     47 #ifdef _CALL_SYSV
     48 # define TCG_TARGET_CALL_ALIGN_ARGS   1
     49 #endif
     50 
     51 /* For some memory operations, we need a scratch that isn't R0.  For the AIX
     52    calling convention, we can re-use the TOC register since we'll be reloading
     53    it at every call.  Otherwise R12 will do nicely as neither a call-saved
     54    register nor a parameter register.  */
     55 #ifdef _CALL_AIX
     56 # define TCG_REG_TMP1   TCG_REG_R2
     57 #else
     58 # define TCG_REG_TMP1   TCG_REG_R12
     59 #endif
     60 
     61 #define TCG_VEC_TMP1    TCG_REG_V0
     62 #define TCG_VEC_TMP2    TCG_REG_V1
     63 
     64 #define TCG_REG_TB     TCG_REG_R31
     65 #define USE_REG_TB     (TCG_TARGET_REG_BITS == 64)
     66 
     67 /* Shorthand for size of a pointer.  Avoid promotion to unsigned.  */
     68 #define SZP  ((int)sizeof(void *))
     69 
     70 /* Shorthand for size of a register.  */
     71 #define SZR  (TCG_TARGET_REG_BITS / 8)
     72 
     73 #define TCG_CT_CONST_S16  0x100
     74 #define TCG_CT_CONST_U16  0x200
     75 #define TCG_CT_CONST_S32  0x400
     76 #define TCG_CT_CONST_U32  0x800
     77 #define TCG_CT_CONST_ZERO 0x1000
     78 #define TCG_CT_CONST_MONE 0x2000
     79 #define TCG_CT_CONST_WSZ  0x4000
     80 
     81 #define ALL_GENERAL_REGS  0xffffffffu
     82 #define ALL_VECTOR_REGS   0xffffffff00000000ull
     83 
     84 #ifdef CONFIG_SOFTMMU
     85 #define ALL_QLOAD_REGS \
     86     (ALL_GENERAL_REGS & \
     87      ~((1 << TCG_REG_R3) | (1 << TCG_REG_R4) | (1 << TCG_REG_R5)))
     88 #define ALL_QSTORE_REGS \
     89     (ALL_GENERAL_REGS & ~((1 << TCG_REG_R3) | (1 << TCG_REG_R4) | \
     90                           (1 << TCG_REG_R5) | (1 << TCG_REG_R6)))
     91 #else
     92 #define ALL_QLOAD_REGS  (ALL_GENERAL_REGS & ~(1 << TCG_REG_R3))
     93 #define ALL_QSTORE_REGS ALL_QLOAD_REGS
     94 #endif
     95 
     96 TCGPowerISA have_isa;
     97 static bool have_isel;
     98 bool have_altivec;
     99 bool have_vsx;
    100 
    101 #ifndef CONFIG_SOFTMMU
    102 #define TCG_GUEST_BASE_REG 30
    103 #endif
    104 
    105 #ifdef CONFIG_DEBUG_TCG
    106 static const char tcg_target_reg_names[TCG_TARGET_NB_REGS][4] = {
    107     "r0",  "r1",  "r2",  "r3",  "r4",  "r5",  "r6",  "r7",
    108     "r8",  "r9",  "r10", "r11", "r12", "r13", "r14", "r15",
    109     "r16", "r17", "r18", "r19", "r20", "r21", "r22", "r23",
    110     "r24", "r25", "r26", "r27", "r28", "r29", "r30", "r31",
    111     "v0",  "v1",  "v2",  "v3",  "v4",  "v5",  "v6",  "v7",
    112     "v8",  "v9",  "v10", "v11", "v12", "v13", "v14", "v15",
    113     "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23",
    114     "v24", "v25", "v26", "v27", "v28", "v29", "v30", "v31",
    115 };
    116 #endif
    117 
    118 static const int tcg_target_reg_alloc_order[] = {
    119     TCG_REG_R14,  /* call saved registers */
    120     TCG_REG_R15,
    121     TCG_REG_R16,
    122     TCG_REG_R17,
    123     TCG_REG_R18,
    124     TCG_REG_R19,
    125     TCG_REG_R20,
    126     TCG_REG_R21,
    127     TCG_REG_R22,
    128     TCG_REG_R23,
    129     TCG_REG_R24,
    130     TCG_REG_R25,
    131     TCG_REG_R26,
    132     TCG_REG_R27,
    133     TCG_REG_R28,
    134     TCG_REG_R29,
    135     TCG_REG_R30,
    136     TCG_REG_R31,
    137     TCG_REG_R12,  /* call clobbered, non-arguments */
    138     TCG_REG_R11,
    139     TCG_REG_R2,
    140     TCG_REG_R13,
    141     TCG_REG_R10,  /* call clobbered, arguments */
    142     TCG_REG_R9,
    143     TCG_REG_R8,
    144     TCG_REG_R7,
    145     TCG_REG_R6,
    146     TCG_REG_R5,
    147     TCG_REG_R4,
    148     TCG_REG_R3,
    149 
    150     /* V0 and V1 reserved as temporaries; V20 - V31 are call-saved */
    151     TCG_REG_V2,   /* call clobbered, vectors */
    152     TCG_REG_V3,
    153     TCG_REG_V4,
    154     TCG_REG_V5,
    155     TCG_REG_V6,
    156     TCG_REG_V7,
    157     TCG_REG_V8,
    158     TCG_REG_V9,
    159     TCG_REG_V10,
    160     TCG_REG_V11,
    161     TCG_REG_V12,
    162     TCG_REG_V13,
    163     TCG_REG_V14,
    164     TCG_REG_V15,
    165     TCG_REG_V16,
    166     TCG_REG_V17,
    167     TCG_REG_V18,
    168     TCG_REG_V19,
    169 };
    170 
    171 static const int tcg_target_call_iarg_regs[] = {
    172     TCG_REG_R3,
    173     TCG_REG_R4,
    174     TCG_REG_R5,
    175     TCG_REG_R6,
    176     TCG_REG_R7,
    177     TCG_REG_R8,
    178     TCG_REG_R9,
    179     TCG_REG_R10
    180 };
    181 
    182 static const int tcg_target_call_oarg_regs[] = {
    183     TCG_REG_R3,
    184     TCG_REG_R4
    185 };
    186 
    187 static const int tcg_target_callee_save_regs[] = {
    188 #ifdef _CALL_DARWIN
    189     TCG_REG_R11,
    190 #endif
    191     TCG_REG_R14,
    192     TCG_REG_R15,
    193     TCG_REG_R16,
    194     TCG_REG_R17,
    195     TCG_REG_R18,
    196     TCG_REG_R19,
    197     TCG_REG_R20,
    198     TCG_REG_R21,
    199     TCG_REG_R22,
    200     TCG_REG_R23,
    201     TCG_REG_R24,
    202     TCG_REG_R25,
    203     TCG_REG_R26,
    204     TCG_REG_R27, /* currently used for the global env */
    205     TCG_REG_R28,
    206     TCG_REG_R29,
    207     TCG_REG_R30,
    208     TCG_REG_R31
    209 };
    210 
    211 static inline bool in_range_b(tcg_target_long target)
    212 {
    213     return target == sextract64(target, 0, 26);
    214 }
    215 
    216 static uint32_t reloc_pc24_val(const tcg_insn_unit *pc,
    217 			       const tcg_insn_unit *target)
    218 {
    219     ptrdiff_t disp = tcg_ptr_byte_diff(target, pc);
    220     tcg_debug_assert(in_range_b(disp));
    221     return disp & 0x3fffffc;
    222 }
    223 
    224 static bool reloc_pc24(tcg_insn_unit *src_rw, const tcg_insn_unit *target)
    225 {
    226     const tcg_insn_unit *src_rx = tcg_splitwx_to_rx(src_rw);
    227     ptrdiff_t disp = tcg_ptr_byte_diff(target, src_rx);
    228 
    229     if (in_range_b(disp)) {
    230         *src_rw = (*src_rw & ~0x3fffffc) | (disp & 0x3fffffc);
    231         return true;
    232     }
    233     return false;
    234 }
    235 
    236 static uint16_t reloc_pc14_val(const tcg_insn_unit *pc,
    237 			       const tcg_insn_unit *target)
    238 {
    239     ptrdiff_t disp = tcg_ptr_byte_diff(target, pc);
    240     tcg_debug_assert(disp == (int16_t) disp);
    241     return disp & 0xfffc;
    242 }
    243 
    244 static bool reloc_pc14(tcg_insn_unit *src_rw, const tcg_insn_unit *target)
    245 {
    246     const tcg_insn_unit *src_rx = tcg_splitwx_to_rx(src_rw);
    247     ptrdiff_t disp = tcg_ptr_byte_diff(target, src_rx);
    248 
    249     if (disp == (int16_t) disp) {
    250         *src_rw = (*src_rw & ~0xfffc) | (disp & 0xfffc);
    251         return true;
    252     }
    253     return false;
    254 }
    255 
    256 /* test if a constant matches the constraint */
    257 static bool tcg_target_const_match(int64_t val, TCGType type, int ct)
    258 {
    259     if (ct & TCG_CT_CONST) {
    260         return 1;
    261     }
    262 
    263     /* The only 32-bit constraint we use aside from
    264        TCG_CT_CONST is TCG_CT_CONST_S16.  */
    265     if (type == TCG_TYPE_I32) {
    266         val = (int32_t)val;
    267     }
    268 
    269     if ((ct & TCG_CT_CONST_S16) && val == (int16_t)val) {
    270         return 1;
    271     } else if ((ct & TCG_CT_CONST_U16) && val == (uint16_t)val) {
    272         return 1;
    273     } else if ((ct & TCG_CT_CONST_S32) && val == (int32_t)val) {
    274         return 1;
    275     } else if ((ct & TCG_CT_CONST_U32) && val == (uint32_t)val) {
    276         return 1;
    277     } else if ((ct & TCG_CT_CONST_ZERO) && val == 0) {
    278         return 1;
    279     } else if ((ct & TCG_CT_CONST_MONE) && val == -1) {
    280         return 1;
    281     } else if ((ct & TCG_CT_CONST_WSZ)
    282                && val == (type == TCG_TYPE_I32 ? 32 : 64)) {
    283         return 1;
    284     }
    285     return 0;
    286 }
    287 
    288 #define OPCD(opc) ((opc)<<26)
    289 #define XO19(opc) (OPCD(19)|((opc)<<1))
    290 #define MD30(opc) (OPCD(30)|((opc)<<2))
    291 #define MDS30(opc) (OPCD(30)|((opc)<<1))
    292 #define XO31(opc) (OPCD(31)|((opc)<<1))
    293 #define XO58(opc) (OPCD(58)|(opc))
    294 #define XO62(opc) (OPCD(62)|(opc))
    295 #define VX4(opc)  (OPCD(4)|(opc))
    296 
    297 #define B      OPCD( 18)
    298 #define BC     OPCD( 16)
    299 #define LBZ    OPCD( 34)
    300 #define LHZ    OPCD( 40)
    301 #define LHA    OPCD( 42)
    302 #define LWZ    OPCD( 32)
    303 #define LWZUX  XO31( 55)
    304 #define STB    OPCD( 38)
    305 #define STH    OPCD( 44)
    306 #define STW    OPCD( 36)
    307 
    308 #define STD    XO62(  0)
    309 #define STDU   XO62(  1)
    310 #define STDX   XO31(149)
    311 
    312 #define LD     XO58(  0)
    313 #define LDX    XO31( 21)
    314 #define LDU    XO58(  1)
    315 #define LDUX   XO31( 53)
    316 #define LWA    XO58(  2)
    317 #define LWAX   XO31(341)
    318 
    319 #define ADDIC  OPCD( 12)
    320 #define ADDI   OPCD( 14)
    321 #define ADDIS  OPCD( 15)
    322 #define ORI    OPCD( 24)
    323 #define ORIS   OPCD( 25)
    324 #define XORI   OPCD( 26)
    325 #define XORIS  OPCD( 27)
    326 #define ANDI   OPCD( 28)
    327 #define ANDIS  OPCD( 29)
    328 #define MULLI  OPCD(  7)
    329 #define CMPLI  OPCD( 10)
    330 #define CMPI   OPCD( 11)
    331 #define SUBFIC OPCD( 8)
    332 
    333 #define LWZU   OPCD( 33)
    334 #define STWU   OPCD( 37)
    335 
    336 #define RLWIMI OPCD( 20)
    337 #define RLWINM OPCD( 21)
    338 #define RLWNM  OPCD( 23)
    339 
    340 #define RLDICL MD30(  0)
    341 #define RLDICR MD30(  1)
    342 #define RLDIMI MD30(  3)
    343 #define RLDCL  MDS30( 8)
    344 
    345 #define BCLR   XO19( 16)
    346 #define BCCTR  XO19(528)
    347 #define CRAND  XO19(257)
    348 #define CRANDC XO19(129)
    349 #define CRNAND XO19(225)
    350 #define CROR   XO19(449)
    351 #define CRNOR  XO19( 33)
    352 
    353 #define EXTSB  XO31(954)
    354 #define EXTSH  XO31(922)
    355 #define EXTSW  XO31(986)
    356 #define ADD    XO31(266)
    357 #define ADDE   XO31(138)
    358 #define ADDME  XO31(234)
    359 #define ADDZE  XO31(202)
    360 #define ADDC   XO31( 10)
    361 #define AND    XO31( 28)
    362 #define SUBF   XO31( 40)
    363 #define SUBFC  XO31(  8)
    364 #define SUBFE  XO31(136)
    365 #define SUBFME XO31(232)
    366 #define SUBFZE XO31(200)
    367 #define OR     XO31(444)
    368 #define XOR    XO31(316)
    369 #define MULLW  XO31(235)
    370 #define MULHW  XO31( 75)
    371 #define MULHWU XO31( 11)
    372 #define DIVW   XO31(491)
    373 #define DIVWU  XO31(459)
    374 #define MODSW  XO31(779)
    375 #define MODUW  XO31(267)
    376 #define CMP    XO31(  0)
    377 #define CMPL   XO31( 32)
    378 #define LHBRX  XO31(790)
    379 #define LWBRX  XO31(534)
    380 #define LDBRX  XO31(532)
    381 #define STHBRX XO31(918)
    382 #define STWBRX XO31(662)
    383 #define STDBRX XO31(660)
    384 #define MFSPR  XO31(339)
    385 #define MTSPR  XO31(467)
    386 #define SRAWI  XO31(824)
    387 #define NEG    XO31(104)
    388 #define MFCR   XO31( 19)
    389 #define MFOCRF (MFCR | (1u << 20))
    390 #define NOR    XO31(124)
    391 #define CNTLZW XO31( 26)
    392 #define CNTLZD XO31( 58)
    393 #define CNTTZW XO31(538)
    394 #define CNTTZD XO31(570)
    395 #define CNTPOPW XO31(378)
    396 #define CNTPOPD XO31(506)
    397 #define ANDC   XO31( 60)
    398 #define ORC    XO31(412)
    399 #define EQV    XO31(284)
    400 #define NAND   XO31(476)
    401 #define ISEL   XO31( 15)
    402 
    403 #define MULLD  XO31(233)
    404 #define MULHD  XO31( 73)
    405 #define MULHDU XO31(  9)
    406 #define DIVD   XO31(489)
    407 #define DIVDU  XO31(457)
    408 #define MODSD  XO31(777)
    409 #define MODUD  XO31(265)
    410 
    411 #define LBZX   XO31( 87)
    412 #define LHZX   XO31(279)
    413 #define LHAX   XO31(343)
    414 #define LWZX   XO31( 23)
    415 #define STBX   XO31(215)
    416 #define STHX   XO31(407)
    417 #define STWX   XO31(151)
    418 
    419 #define EIEIO  XO31(854)
    420 #define HWSYNC XO31(598)
    421 #define LWSYNC (HWSYNC | (1u << 21))
    422 
    423 #define SPR(a, b) ((((a)<<5)|(b))<<11)
    424 #define LR     SPR(8, 0)
    425 #define CTR    SPR(9, 0)
    426 
    427 #define SLW    XO31( 24)
    428 #define SRW    XO31(536)
    429 #define SRAW   XO31(792)
    430 
    431 #define SLD    XO31( 27)
    432 #define SRD    XO31(539)
    433 #define SRAD   XO31(794)
    434 #define SRADI  XO31(413<<1)
    435 
    436 #define BRH    XO31(219)
    437 #define BRW    XO31(155)
    438 #define BRD    XO31(187)
    439 
    440 #define TW     XO31( 4)
    441 #define TRAP   (TW | TO(31))
    442 
    443 #define NOP    ORI  /* ori 0,0,0 */
    444 
    445 #define LVX        XO31(103)
    446 #define LVEBX      XO31(7)
    447 #define LVEHX      XO31(39)
    448 #define LVEWX      XO31(71)
    449 #define LXSDX      (XO31(588) | 1)  /* v2.06, force tx=1 */
    450 #define LXVDSX     (XO31(332) | 1)  /* v2.06, force tx=1 */
    451 #define LXSIWZX    (XO31(12) | 1)   /* v2.07, force tx=1 */
    452 #define LXV        (OPCD(61) | 8 | 1)  /* v3.00, force tx=1 */
    453 #define LXSD       (OPCD(57) | 2)   /* v3.00 */
    454 #define LXVWSX     (XO31(364) | 1)  /* v3.00, force tx=1 */
    455 
    456 #define STVX       XO31(231)
    457 #define STVEWX     XO31(199)
    458 #define STXSDX     (XO31(716) | 1)  /* v2.06, force sx=1 */
    459 #define STXSIWX    (XO31(140) | 1)  /* v2.07, force sx=1 */
    460 #define STXV       (OPCD(61) | 8 | 5) /* v3.00, force sx=1 */
    461 #define STXSD      (OPCD(61) | 2)   /* v3.00 */
    462 
    463 #define VADDSBS    VX4(768)
    464 #define VADDUBS    VX4(512)
    465 #define VADDUBM    VX4(0)
    466 #define VADDSHS    VX4(832)
    467 #define VADDUHS    VX4(576)
    468 #define VADDUHM    VX4(64)
    469 #define VADDSWS    VX4(896)
    470 #define VADDUWS    VX4(640)
    471 #define VADDUWM    VX4(128)
    472 #define VADDUDM    VX4(192)       /* v2.07 */
    473 
    474 #define VSUBSBS    VX4(1792)
    475 #define VSUBUBS    VX4(1536)
    476 #define VSUBUBM    VX4(1024)
    477 #define VSUBSHS    VX4(1856)
    478 #define VSUBUHS    VX4(1600)
    479 #define VSUBUHM    VX4(1088)
    480 #define VSUBSWS    VX4(1920)
    481 #define VSUBUWS    VX4(1664)
    482 #define VSUBUWM    VX4(1152)
    483 #define VSUBUDM    VX4(1216)      /* v2.07 */
    484 
    485 #define VNEGW      (VX4(1538) | (6 << 16))  /* v3.00 */
    486 #define VNEGD      (VX4(1538) | (7 << 16))  /* v3.00 */
    487 
    488 #define VMAXSB     VX4(258)
    489 #define VMAXSH     VX4(322)
    490 #define VMAXSW     VX4(386)
    491 #define VMAXSD     VX4(450)       /* v2.07 */
    492 #define VMAXUB     VX4(2)
    493 #define VMAXUH     VX4(66)
    494 #define VMAXUW     VX4(130)
    495 #define VMAXUD     VX4(194)       /* v2.07 */
    496 #define VMINSB     VX4(770)
    497 #define VMINSH     VX4(834)
    498 #define VMINSW     VX4(898)
    499 #define VMINSD     VX4(962)       /* v2.07 */
    500 #define VMINUB     VX4(514)
    501 #define VMINUH     VX4(578)
    502 #define VMINUW     VX4(642)
    503 #define VMINUD     VX4(706)       /* v2.07 */
    504 
    505 #define VCMPEQUB   VX4(6)
    506 #define VCMPEQUH   VX4(70)
    507 #define VCMPEQUW   VX4(134)
    508 #define VCMPEQUD   VX4(199)       /* v2.07 */
    509 #define VCMPGTSB   VX4(774)
    510 #define VCMPGTSH   VX4(838)
    511 #define VCMPGTSW   VX4(902)
    512 #define VCMPGTSD   VX4(967)       /* v2.07 */
    513 #define VCMPGTUB   VX4(518)
    514 #define VCMPGTUH   VX4(582)
    515 #define VCMPGTUW   VX4(646)
    516 #define VCMPGTUD   VX4(711)       /* v2.07 */
    517 #define VCMPNEB    VX4(7)         /* v3.00 */
    518 #define VCMPNEH    VX4(71)        /* v3.00 */
    519 #define VCMPNEW    VX4(135)       /* v3.00 */
    520 
    521 #define VSLB       VX4(260)
    522 #define VSLH       VX4(324)
    523 #define VSLW       VX4(388)
    524 #define VSLD       VX4(1476)      /* v2.07 */
    525 #define VSRB       VX4(516)
    526 #define VSRH       VX4(580)
    527 #define VSRW       VX4(644)
    528 #define VSRD       VX4(1732)      /* v2.07 */
    529 #define VSRAB      VX4(772)
    530 #define VSRAH      VX4(836)
    531 #define VSRAW      VX4(900)
    532 #define VSRAD      VX4(964)       /* v2.07 */
    533 #define VRLB       VX4(4)
    534 #define VRLH       VX4(68)
    535 #define VRLW       VX4(132)
    536 #define VRLD       VX4(196)       /* v2.07 */
    537 
    538 #define VMULEUB    VX4(520)
    539 #define VMULEUH    VX4(584)
    540 #define VMULEUW    VX4(648)       /* v2.07 */
    541 #define VMULOUB    VX4(8)
    542 #define VMULOUH    VX4(72)
    543 #define VMULOUW    VX4(136)       /* v2.07 */
    544 #define VMULUWM    VX4(137)       /* v2.07 */
    545 #define VMULLD     VX4(457)       /* v3.10 */
    546 #define VMSUMUHM   VX4(38)
    547 
    548 #define VMRGHB     VX4(12)
    549 #define VMRGHH     VX4(76)
    550 #define VMRGHW     VX4(140)
    551 #define VMRGLB     VX4(268)
    552 #define VMRGLH     VX4(332)
    553 #define VMRGLW     VX4(396)
    554 
    555 #define VPKUHUM    VX4(14)
    556 #define VPKUWUM    VX4(78)
    557 
    558 #define VAND       VX4(1028)
    559 #define VANDC      VX4(1092)
    560 #define VNOR       VX4(1284)
    561 #define VOR        VX4(1156)
    562 #define VXOR       VX4(1220)
    563 #define VEQV       VX4(1668)      /* v2.07 */
    564 #define VNAND      VX4(1412)      /* v2.07 */
    565 #define VORC       VX4(1348)      /* v2.07 */
    566 
    567 #define VSPLTB     VX4(524)
    568 #define VSPLTH     VX4(588)
    569 #define VSPLTW     VX4(652)
    570 #define VSPLTISB   VX4(780)
    571 #define VSPLTISH   VX4(844)
    572 #define VSPLTISW   VX4(908)
    573 
    574 #define VSLDOI     VX4(44)
    575 
    576 #define XXPERMDI   (OPCD(60) | (10 << 3) | 7)  /* v2.06, force ax=bx=tx=1 */
    577 #define XXSEL      (OPCD(60) | (3 << 4) | 0xf) /* v2.06, force ax=bx=cx=tx=1 */
    578 #define XXSPLTIB   (OPCD(60) | (360 << 1) | 1) /* v3.00, force tx=1 */
    579 
    580 #define MFVSRD     (XO31(51) | 1)   /* v2.07, force sx=1 */
    581 #define MFVSRWZ    (XO31(115) | 1)  /* v2.07, force sx=1 */
    582 #define MTVSRD     (XO31(179) | 1)  /* v2.07, force tx=1 */
    583 #define MTVSRWZ    (XO31(243) | 1)  /* v2.07, force tx=1 */
    584 #define MTVSRDD    (XO31(435) | 1)  /* v3.00, force tx=1 */
    585 #define MTVSRWS    (XO31(403) | 1)  /* v3.00, force tx=1 */
    586 
    587 #define RT(r) ((r)<<21)
    588 #define RS(r) ((r)<<21)
    589 #define RA(r) ((r)<<16)
    590 #define RB(r) ((r)<<11)
    591 #define TO(t) ((t)<<21)
    592 #define SH(s) ((s)<<11)
    593 #define MB(b) ((b)<<6)
    594 #define ME(e) ((e)<<1)
    595 #define BO(o) ((o)<<21)
    596 #define MB64(b) ((b)<<5)
    597 #define FXM(b) (1 << (19 - (b)))
    598 
    599 #define VRT(r)  (((r) & 31) << 21)
    600 #define VRA(r)  (((r) & 31) << 16)
    601 #define VRB(r)  (((r) & 31) << 11)
    602 #define VRC(r)  (((r) & 31) <<  6)
    603 
    604 #define LK    1
    605 
    606 #define TAB(t, a, b) (RT(t) | RA(a) | RB(b))
    607 #define SAB(s, a, b) (RS(s) | RA(a) | RB(b))
    608 #define TAI(s, a, i) (RT(s) | RA(a) | ((i) & 0xffff))
    609 #define SAI(s, a, i) (RS(s) | RA(a) | ((i) & 0xffff))
    610 
    611 #define BF(n)    ((n)<<23)
    612 #define BI(n, c) (((c)+((n)*4))<<16)
    613 #define BT(n, c) (((c)+((n)*4))<<21)
    614 #define BA(n, c) (((c)+((n)*4))<<16)
    615 #define BB(n, c) (((c)+((n)*4))<<11)
    616 #define BC_(n, c) (((c)+((n)*4))<<6)
    617 
    618 #define BO_COND_TRUE  BO(12)
    619 #define BO_COND_FALSE BO( 4)
    620 #define BO_ALWAYS     BO(20)
    621 
    622 enum {
    623     CR_LT,
    624     CR_GT,
    625     CR_EQ,
    626     CR_SO
    627 };
    628 
    629 static const uint32_t tcg_to_bc[] = {
    630     [TCG_COND_EQ]  = BC | BI(7, CR_EQ) | BO_COND_TRUE,
    631     [TCG_COND_NE]  = BC | BI(7, CR_EQ) | BO_COND_FALSE,
    632     [TCG_COND_LT]  = BC | BI(7, CR_LT) | BO_COND_TRUE,
    633     [TCG_COND_GE]  = BC | BI(7, CR_LT) | BO_COND_FALSE,
    634     [TCG_COND_LE]  = BC | BI(7, CR_GT) | BO_COND_FALSE,
    635     [TCG_COND_GT]  = BC | BI(7, CR_GT) | BO_COND_TRUE,
    636     [TCG_COND_LTU] = BC | BI(7, CR_LT) | BO_COND_TRUE,
    637     [TCG_COND_GEU] = BC | BI(7, CR_LT) | BO_COND_FALSE,
    638     [TCG_COND_LEU] = BC | BI(7, CR_GT) | BO_COND_FALSE,
    639     [TCG_COND_GTU] = BC | BI(7, CR_GT) | BO_COND_TRUE,
    640 };
    641 
    642 /* The low bit here is set if the RA and RB fields must be inverted.  */
    643 static const uint32_t tcg_to_isel[] = {
    644     [TCG_COND_EQ]  = ISEL | BC_(7, CR_EQ),
    645     [TCG_COND_NE]  = ISEL | BC_(7, CR_EQ) | 1,
    646     [TCG_COND_LT]  = ISEL | BC_(7, CR_LT),
    647     [TCG_COND_GE]  = ISEL | BC_(7, CR_LT) | 1,
    648     [TCG_COND_LE]  = ISEL | BC_(7, CR_GT) | 1,
    649     [TCG_COND_GT]  = ISEL | BC_(7, CR_GT),
    650     [TCG_COND_LTU] = ISEL | BC_(7, CR_LT),
    651     [TCG_COND_GEU] = ISEL | BC_(7, CR_LT) | 1,
    652     [TCG_COND_LEU] = ISEL | BC_(7, CR_GT) | 1,
    653     [TCG_COND_GTU] = ISEL | BC_(7, CR_GT),
    654 };
    655 
    656 static bool patch_reloc(tcg_insn_unit *code_ptr, int type,
    657                         intptr_t value, intptr_t addend)
    658 {
    659     const tcg_insn_unit *target;
    660     int16_t lo;
    661     int32_t hi;
    662 
    663     value += addend;
    664     target = (const tcg_insn_unit *)value;
    665 
    666     switch (type) {
    667     case R_PPC_REL14:
    668         return reloc_pc14(code_ptr, target);
    669     case R_PPC_REL24:
    670         return reloc_pc24(code_ptr, target);
    671     case R_PPC_ADDR16:
    672         /*
    673          * We are (slightly) abusing this relocation type.  In particular,
    674          * assert that the low 2 bits are zero, and do not modify them.
    675          * That way we can use this with LD et al that have opcode bits
    676          * in the low 2 bits of the insn.
    677          */
    678         if ((value & 3) || value != (int16_t)value) {
    679             return false;
    680         }
    681         *code_ptr = (*code_ptr & ~0xfffc) | (value & 0xfffc);
    682         break;
    683     case R_PPC_ADDR32:
    684         /*
    685          * We are abusing this relocation type.  Again, this points to
    686          * a pair of insns, lis + load.  This is an absolute address
    687          * relocation for PPC32 so the lis cannot be removed.
    688          */
    689         lo = value;
    690         hi = value - lo;
    691         if (hi + lo != value) {
    692             return false;
    693         }
    694         code_ptr[0] = deposit32(code_ptr[0], 0, 16, hi >> 16);
    695         code_ptr[1] = deposit32(code_ptr[1], 0, 16, lo);
    696         break;
    697     default:
    698         g_assert_not_reached();
    699     }
    700     return true;
    701 }
    702 
    703 static void tcg_out_mem_long(TCGContext *s, int opi, int opx, TCGReg rt,
    704                              TCGReg base, tcg_target_long offset);
    705 
    706 static bool tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg)
    707 {
    708     if (ret == arg) {
    709         return true;
    710     }
    711     switch (type) {
    712     case TCG_TYPE_I64:
    713         tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
    714         /* fallthru */
    715     case TCG_TYPE_I32:
    716         if (ret < TCG_REG_V0) {
    717             if (arg < TCG_REG_V0) {
    718                 tcg_out32(s, OR | SAB(arg, ret, arg));
    719                 break;
    720             } else if (have_isa_2_07) {
    721                 tcg_out32(s, (type == TCG_TYPE_I32 ? MFVSRWZ : MFVSRD)
    722                           | VRT(arg) | RA(ret));
    723                 break;
    724             } else {
    725                 /* Altivec does not support vector->integer moves.  */
    726                 return false;
    727             }
    728         } else if (arg < TCG_REG_V0) {
    729             if (have_isa_2_07) {
    730                 tcg_out32(s, (type == TCG_TYPE_I32 ? MTVSRWZ : MTVSRD)
    731                           | VRT(ret) | RA(arg));
    732                 break;
    733             } else {
    734                 /* Altivec does not support integer->vector moves.  */
    735                 return false;
    736             }
    737         }
    738         /* fallthru */
    739     case TCG_TYPE_V64:
    740     case TCG_TYPE_V128:
    741         tcg_debug_assert(ret >= TCG_REG_V0 && arg >= TCG_REG_V0);
    742         tcg_out32(s, VOR | VRT(ret) | VRA(arg) | VRB(arg));
    743         break;
    744     default:
    745         g_assert_not_reached();
    746     }
    747     return true;
    748 }
    749 
    750 static inline void tcg_out_rld(TCGContext *s, int op, TCGReg ra, TCGReg rs,
    751                                int sh, int mb)
    752 {
    753     tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
    754     sh = SH(sh & 0x1f) | (((sh >> 5) & 1) << 1);
    755     mb = MB64((mb >> 5) | ((mb << 1) & 0x3f));
    756     tcg_out32(s, op | RA(ra) | RS(rs) | sh | mb);
    757 }
    758 
    759 static inline void tcg_out_rlw(TCGContext *s, int op, TCGReg ra, TCGReg rs,
    760                                int sh, int mb, int me)
    761 {
    762     tcg_out32(s, op | RA(ra) | RS(rs) | SH(sh) | MB(mb) | ME(me));
    763 }
    764 
    765 static inline void tcg_out_ext8s(TCGContext *s, TCGReg dst, TCGReg src)
    766 {
    767     tcg_out32(s, EXTSB | RA(dst) | RS(src));
    768 }
    769 
    770 static inline void tcg_out_ext16s(TCGContext *s, TCGReg dst, TCGReg src)
    771 {
    772     tcg_out32(s, EXTSH | RA(dst) | RS(src));
    773 }
    774 
    775 static inline void tcg_out_ext16u(TCGContext *s, TCGReg dst, TCGReg src)
    776 {
    777     tcg_out32(s, ANDI | SAI(src, dst, 0xffff));
    778 }
    779 
    780 static inline void tcg_out_ext32s(TCGContext *s, TCGReg dst, TCGReg src)
    781 {
    782     tcg_out32(s, EXTSW | RA(dst) | RS(src));
    783 }
    784 
    785 static inline void tcg_out_ext32u(TCGContext *s, TCGReg dst, TCGReg src)
    786 {
    787     tcg_out_rld(s, RLDICL, dst, src, 0, 32);
    788 }
    789 
    790 static inline void tcg_out_shli32(TCGContext *s, TCGReg dst, TCGReg src, int c)
    791 {
    792     tcg_out_rlw(s, RLWINM, dst, src, c, 0, 31 - c);
    793 }
    794 
    795 static inline void tcg_out_shli64(TCGContext *s, TCGReg dst, TCGReg src, int c)
    796 {
    797     tcg_out_rld(s, RLDICR, dst, src, c, 63 - c);
    798 }
    799 
    800 static inline void tcg_out_sari32(TCGContext *s, TCGReg dst, TCGReg src, int c)
    801 {
    802     /* Limit immediate shift count lest we create an illegal insn.  */
    803     tcg_out32(s, SRAWI | RA(dst) | RS(src) | SH(c & 31));
    804 }
    805 
    806 static inline void tcg_out_shri32(TCGContext *s, TCGReg dst, TCGReg src, int c)
    807 {
    808     tcg_out_rlw(s, RLWINM, dst, src, 32 - c, c, 31);
    809 }
    810 
    811 static inline void tcg_out_shri64(TCGContext *s, TCGReg dst, TCGReg src, int c)
    812 {
    813     tcg_out_rld(s, RLDICL, dst, src, 64 - c, c);
    814 }
    815 
    816 static inline void tcg_out_sari64(TCGContext *s, TCGReg dst, TCGReg src, int c)
    817 {
    818     tcg_out32(s, SRADI | RA(dst) | RS(src) | SH(c & 0x1f) | ((c >> 4) & 2));
    819 }
    820 
    821 static void tcg_out_bswap16(TCGContext *s, TCGReg dst, TCGReg src, int flags)
    822 {
    823     TCGReg tmp = dst == src ? TCG_REG_R0 : dst;
    824 
    825     if (have_isa_3_10) {
    826         tcg_out32(s, BRH | RA(dst) | RS(src));
    827         if (flags & TCG_BSWAP_OS) {
    828             tcg_out_ext16s(s, dst, dst);
    829         } else if ((flags & (TCG_BSWAP_IZ | TCG_BSWAP_OZ)) == TCG_BSWAP_OZ) {
    830             tcg_out_ext16u(s, dst, dst);
    831         }
    832         return;
    833     }
    834 
    835     /*
    836      * In the following,
    837      *   dep(a, b, m) -> (a & ~m) | (b & m)
    838      *
    839      * Begin with:                              src = xxxxabcd
    840      */
    841     /* tmp = rol32(src, 24) & 0x000000ff            = 0000000c */
    842     tcg_out_rlw(s, RLWINM, tmp, src, 24, 24, 31);
    843     /* tmp = dep(tmp, rol32(src, 8), 0x0000ff00)    = 000000dc */
    844     tcg_out_rlw(s, RLWIMI, tmp, src, 8, 16, 23);
    845 
    846     if (flags & TCG_BSWAP_OS) {
    847         tcg_out_ext16s(s, dst, tmp);
    848     } else {
    849         tcg_out_mov(s, TCG_TYPE_REG, dst, tmp);
    850     }
    851 }
    852 
    853 static void tcg_out_bswap32(TCGContext *s, TCGReg dst, TCGReg src, int flags)
    854 {
    855     TCGReg tmp = dst == src ? TCG_REG_R0 : dst;
    856 
    857     if (have_isa_3_10) {
    858         tcg_out32(s, BRW | RA(dst) | RS(src));
    859         if (flags & TCG_BSWAP_OS) {
    860             tcg_out_ext32s(s, dst, dst);
    861         } else if ((flags & (TCG_BSWAP_IZ | TCG_BSWAP_OZ)) == TCG_BSWAP_OZ) {
    862             tcg_out_ext32u(s, dst, dst);
    863         }
    864         return;
    865     }
    866 
    867     /*
    868      * Stolen from gcc's builtin_bswap32.
    869      * In the following,
    870      *   dep(a, b, m) -> (a & ~m) | (b & m)
    871      *
    872      * Begin with:                              src = xxxxabcd
    873      */
    874     /* tmp = rol32(src, 8) & 0xffffffff             = 0000bcda */
    875     tcg_out_rlw(s, RLWINM, tmp, src, 8, 0, 31);
    876     /* tmp = dep(tmp, rol32(src, 24), 0xff000000)   = 0000dcda */
    877     tcg_out_rlw(s, RLWIMI, tmp, src, 24, 0, 7);
    878     /* tmp = dep(tmp, rol32(src, 24), 0x0000ff00)   = 0000dcba */
    879     tcg_out_rlw(s, RLWIMI, tmp, src, 24, 16, 23);
    880 
    881     if (flags & TCG_BSWAP_OS) {
    882         tcg_out_ext32s(s, dst, tmp);
    883     } else {
    884         tcg_out_mov(s, TCG_TYPE_REG, dst, tmp);
    885     }
    886 }
    887 
    888 static void tcg_out_bswap64(TCGContext *s, TCGReg dst, TCGReg src)
    889 {
    890     TCGReg t0 = dst == src ? TCG_REG_R0 : dst;
    891     TCGReg t1 = dst == src ? dst : TCG_REG_R0;
    892 
    893     if (have_isa_3_10) {
    894         tcg_out32(s, BRD | RA(dst) | RS(src));
    895         return;
    896     }
    897 
    898     /*
    899      * In the following,
    900      *   dep(a, b, m) -> (a & ~m) | (b & m)
    901      *
    902      * Begin with:                              src = abcdefgh
    903      */
    904     /* t0 = rol32(src, 8) & 0xffffffff              = 0000fghe */
    905     tcg_out_rlw(s, RLWINM, t0, src, 8, 0, 31);
    906     /* t0 = dep(t0, rol32(src, 24), 0xff000000)     = 0000hghe */
    907     tcg_out_rlw(s, RLWIMI, t0, src, 24, 0, 7);
    908     /* t0 = dep(t0, rol32(src, 24), 0x0000ff00)     = 0000hgfe */
    909     tcg_out_rlw(s, RLWIMI, t0, src, 24, 16, 23);
    910 
    911     /* t0 = rol64(t0, 32)                           = hgfe0000 */
    912     tcg_out_rld(s, RLDICL, t0, t0, 32, 0);
    913     /* t1 = rol64(src, 32)                          = efghabcd */
    914     tcg_out_rld(s, RLDICL, t1, src, 32, 0);
    915 
    916     /* t0 = dep(t0, rol32(t1, 24), 0xffffffff)      = hgfebcda */
    917     tcg_out_rlw(s, RLWIMI, t0, t1, 8, 0, 31);
    918     /* t0 = dep(t0, rol32(t1, 24), 0xff000000)      = hgfedcda */
    919     tcg_out_rlw(s, RLWIMI, t0, t1, 24, 0, 7);
    920     /* t0 = dep(t0, rol32(t1, 24), 0x0000ff00)      = hgfedcba */
    921     tcg_out_rlw(s, RLWIMI, t0, t1, 24, 16, 23);
    922 
    923     tcg_out_mov(s, TCG_TYPE_REG, dst, t0);
    924 }
    925 
    926 /* Emit a move into ret of arg, if it can be done in one insn.  */
    927 static bool tcg_out_movi_one(TCGContext *s, TCGReg ret, tcg_target_long arg)
    928 {
    929     if (arg == (int16_t)arg) {
    930         tcg_out32(s, ADDI | TAI(ret, 0, arg));
    931         return true;
    932     }
    933     if (arg == (int32_t)arg && (arg & 0xffff) == 0) {
    934         tcg_out32(s, ADDIS | TAI(ret, 0, arg >> 16));
    935         return true;
    936     }
    937     return false;
    938 }
    939 
    940 static void tcg_out_movi_int(TCGContext *s, TCGType type, TCGReg ret,
    941                              tcg_target_long arg, bool in_prologue)
    942 {
    943     intptr_t tb_diff;
    944     tcg_target_long tmp;
    945     int shift;
    946 
    947     tcg_debug_assert(TCG_TARGET_REG_BITS == 64 || type == TCG_TYPE_I32);
    948 
    949     if (TCG_TARGET_REG_BITS == 64 && type == TCG_TYPE_I32) {
    950         arg = (int32_t)arg;
    951     }
    952 
    953     /* Load 16-bit immediates with one insn.  */
    954     if (tcg_out_movi_one(s, ret, arg)) {
    955         return;
    956     }
    957 
    958     /* Load addresses within the TB with one insn.  */
    959     tb_diff = tcg_tbrel_diff(s, (void *)arg);
    960     if (!in_prologue && USE_REG_TB && tb_diff == (int16_t)tb_diff) {
    961         tcg_out32(s, ADDI | TAI(ret, TCG_REG_TB, tb_diff));
    962         return;
    963     }
    964 
    965     /* Load 32-bit immediates with two insns.  Note that we've already
    966        eliminated bare ADDIS, so we know both insns are required.  */
    967     if (TCG_TARGET_REG_BITS == 32 || arg == (int32_t)arg) {
    968         tcg_out32(s, ADDIS | TAI(ret, 0, arg >> 16));
    969         tcg_out32(s, ORI | SAI(ret, ret, arg));
    970         return;
    971     }
    972     if (arg == (uint32_t)arg && !(arg & 0x8000)) {
    973         tcg_out32(s, ADDI | TAI(ret, 0, arg));
    974         tcg_out32(s, ORIS | SAI(ret, ret, arg >> 16));
    975         return;
    976     }
    977 
    978     /* Load masked 16-bit value.  */
    979     if (arg > 0 && (arg & 0x8000)) {
    980         tmp = arg | 0x7fff;
    981         if ((tmp & (tmp + 1)) == 0) {
    982             int mb = clz64(tmp + 1) + 1;
    983             tcg_out32(s, ADDI | TAI(ret, 0, arg));
    984             tcg_out_rld(s, RLDICL, ret, ret, 0, mb);
    985             return;
    986         }
    987     }
    988 
    989     /* Load common masks with 2 insns.  */
    990     shift = ctz64(arg);
    991     tmp = arg >> shift;
    992     if (tmp == (int16_t)tmp) {
    993         tcg_out32(s, ADDI | TAI(ret, 0, tmp));
    994         tcg_out_shli64(s, ret, ret, shift);
    995         return;
    996     }
    997     shift = clz64(arg);
    998     if (tcg_out_movi_one(s, ret, arg << shift)) {
    999         tcg_out_shri64(s, ret, ret, shift);
   1000         return;
   1001     }
   1002 
   1003     /* Load addresses within 2GB of TB with 2 (or rarely 3) insns.  */
   1004     if (!in_prologue && USE_REG_TB && tb_diff == (int32_t)tb_diff) {
   1005         tcg_out_mem_long(s, ADDI, ADD, ret, TCG_REG_TB, tb_diff);
   1006         return;
   1007     }
   1008 
   1009     /* Use the constant pool, if possible.  */
   1010     if (!in_prologue && USE_REG_TB) {
   1011         new_pool_label(s, arg, R_PPC_ADDR16, s->code_ptr,
   1012                        tcg_tbrel_diff(s, NULL));
   1013         tcg_out32(s, LD | TAI(ret, TCG_REG_TB, 0));
   1014         return;
   1015     }
   1016 
   1017     tmp = arg >> 31 >> 1;
   1018     tcg_out_movi(s, TCG_TYPE_I32, ret, tmp);
   1019     if (tmp) {
   1020         tcg_out_shli64(s, ret, ret, 32);
   1021     }
   1022     if (arg & 0xffff0000) {
   1023         tcg_out32(s, ORIS | SAI(ret, ret, arg >> 16));
   1024     }
   1025     if (arg & 0xffff) {
   1026         tcg_out32(s, ORI | SAI(ret, ret, arg));
   1027     }
   1028 }
   1029 
   1030 static void tcg_out_dupi_vec(TCGContext *s, TCGType type, unsigned vece,
   1031                              TCGReg ret, int64_t val)
   1032 {
   1033     uint32_t load_insn;
   1034     int rel, low;
   1035     intptr_t add;
   1036 
   1037     switch (vece) {
   1038     case MO_8:
   1039         low = (int8_t)val;
   1040         if (low >= -16 && low < 16) {
   1041             tcg_out32(s, VSPLTISB | VRT(ret) | ((val & 31) << 16));
   1042             return;
   1043         }
   1044         if (have_isa_3_00) {
   1045             tcg_out32(s, XXSPLTIB | VRT(ret) | ((val & 0xff) << 11));
   1046             return;
   1047         }
   1048         break;
   1049 
   1050     case MO_16:
   1051         low = (int16_t)val;
   1052         if (low >= -16 && low < 16) {
   1053             tcg_out32(s, VSPLTISH | VRT(ret) | ((val & 31) << 16));
   1054             return;
   1055         }
   1056         break;
   1057 
   1058     case MO_32:
   1059         low = (int32_t)val;
   1060         if (low >= -16 && low < 16) {
   1061             tcg_out32(s, VSPLTISW | VRT(ret) | ((val & 31) << 16));
   1062             return;
   1063         }
   1064         break;
   1065     }
   1066 
   1067     /*
   1068      * Otherwise we must load the value from the constant pool.
   1069      */
   1070     if (USE_REG_TB) {
   1071         rel = R_PPC_ADDR16;
   1072         add = tcg_tbrel_diff(s, NULL);
   1073     } else {
   1074         rel = R_PPC_ADDR32;
   1075         add = 0;
   1076     }
   1077 
   1078     if (have_vsx) {
   1079         load_insn = type == TCG_TYPE_V64 ? LXSDX : LXVDSX;
   1080         load_insn |= VRT(ret) | RB(TCG_REG_TMP1);
   1081         if (TCG_TARGET_REG_BITS == 64) {
   1082             new_pool_label(s, val, rel, s->code_ptr, add);
   1083         } else {
   1084             new_pool_l2(s, rel, s->code_ptr, add, val >> 32, val);
   1085         }
   1086     } else {
   1087         load_insn = LVX | VRT(ret) | RB(TCG_REG_TMP1);
   1088         if (TCG_TARGET_REG_BITS == 64) {
   1089             new_pool_l2(s, rel, s->code_ptr, add, val, val);
   1090         } else {
   1091             new_pool_l4(s, rel, s->code_ptr, add,
   1092                         val >> 32, val, val >> 32, val);
   1093         }
   1094     }
   1095 
   1096     if (USE_REG_TB) {
   1097         tcg_out32(s, ADDI | TAI(TCG_REG_TMP1, 0, 0));
   1098         load_insn |= RA(TCG_REG_TB);
   1099     } else {
   1100         tcg_out32(s, ADDIS | TAI(TCG_REG_TMP1, 0, 0));
   1101         tcg_out32(s, ADDI | TAI(TCG_REG_TMP1, TCG_REG_TMP1, 0));
   1102     }
   1103     tcg_out32(s, load_insn);
   1104 }
   1105 
   1106 static void tcg_out_movi(TCGContext *s, TCGType type, TCGReg ret,
   1107                          tcg_target_long arg)
   1108 {
   1109     switch (type) {
   1110     case TCG_TYPE_I32:
   1111     case TCG_TYPE_I64:
   1112         tcg_debug_assert(ret < TCG_REG_V0);
   1113         tcg_out_movi_int(s, type, ret, arg, false);
   1114         break;
   1115 
   1116     default:
   1117         g_assert_not_reached();
   1118     }
   1119 }
   1120 
   1121 static bool mask_operand(uint32_t c, int *mb, int *me)
   1122 {
   1123     uint32_t lsb, test;
   1124 
   1125     /* Accept a bit pattern like:
   1126            0....01....1
   1127            1....10....0
   1128            0..01..10..0
   1129        Keep track of the transitions.  */
   1130     if (c == 0 || c == -1) {
   1131         return false;
   1132     }
   1133     test = c;
   1134     lsb = test & -test;
   1135     test += lsb;
   1136     if (test & (test - 1)) {
   1137         return false;
   1138     }
   1139 
   1140     *me = clz32(lsb);
   1141     *mb = test ? clz32(test & -test) + 1 : 0;
   1142     return true;
   1143 }
   1144 
   1145 static bool mask64_operand(uint64_t c, int *mb, int *me)
   1146 {
   1147     uint64_t lsb;
   1148 
   1149     if (c == 0) {
   1150         return false;
   1151     }
   1152 
   1153     lsb = c & -c;
   1154     /* Accept 1..10..0.  */
   1155     if (c == -lsb) {
   1156         *mb = 0;
   1157         *me = clz64(lsb);
   1158         return true;
   1159     }
   1160     /* Accept 0..01..1.  */
   1161     if (lsb == 1 && (c & (c + 1)) == 0) {
   1162         *mb = clz64(c + 1) + 1;
   1163         *me = 63;
   1164         return true;
   1165     }
   1166     return false;
   1167 }
   1168 
   1169 static void tcg_out_andi32(TCGContext *s, TCGReg dst, TCGReg src, uint32_t c)
   1170 {
   1171     int mb, me;
   1172 
   1173     if (mask_operand(c, &mb, &me)) {
   1174         tcg_out_rlw(s, RLWINM, dst, src, 0, mb, me);
   1175     } else if ((c & 0xffff) == c) {
   1176         tcg_out32(s, ANDI | SAI(src, dst, c));
   1177         return;
   1178     } else if ((c & 0xffff0000) == c) {
   1179         tcg_out32(s, ANDIS | SAI(src, dst, c >> 16));
   1180         return;
   1181     } else {
   1182         tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_R0, c);
   1183         tcg_out32(s, AND | SAB(src, dst, TCG_REG_R0));
   1184     }
   1185 }
   1186 
   1187 static void tcg_out_andi64(TCGContext *s, TCGReg dst, TCGReg src, uint64_t c)
   1188 {
   1189     int mb, me;
   1190 
   1191     tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
   1192     if (mask64_operand(c, &mb, &me)) {
   1193         if (mb == 0) {
   1194             tcg_out_rld(s, RLDICR, dst, src, 0, me);
   1195         } else {
   1196             tcg_out_rld(s, RLDICL, dst, src, 0, mb);
   1197         }
   1198     } else if ((c & 0xffff) == c) {
   1199         tcg_out32(s, ANDI | SAI(src, dst, c));
   1200         return;
   1201     } else if ((c & 0xffff0000) == c) {
   1202         tcg_out32(s, ANDIS | SAI(src, dst, c >> 16));
   1203         return;
   1204     } else {
   1205         tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_R0, c);
   1206         tcg_out32(s, AND | SAB(src, dst, TCG_REG_R0));
   1207     }
   1208 }
   1209 
   1210 static void tcg_out_zori32(TCGContext *s, TCGReg dst, TCGReg src, uint32_t c,
   1211                            int op_lo, int op_hi)
   1212 {
   1213     if (c >> 16) {
   1214         tcg_out32(s, op_hi | SAI(src, dst, c >> 16));
   1215         src = dst;
   1216     }
   1217     if (c & 0xffff) {
   1218         tcg_out32(s, op_lo | SAI(src, dst, c));
   1219         src = dst;
   1220     }
   1221 }
   1222 
   1223 static void tcg_out_ori32(TCGContext *s, TCGReg dst, TCGReg src, uint32_t c)
   1224 {
   1225     tcg_out_zori32(s, dst, src, c, ORI, ORIS);
   1226 }
   1227 
   1228 static void tcg_out_xori32(TCGContext *s, TCGReg dst, TCGReg src, uint32_t c)
   1229 {
   1230     tcg_out_zori32(s, dst, src, c, XORI, XORIS);
   1231 }
   1232 
   1233 static void tcg_out_b(TCGContext *s, int mask, const tcg_insn_unit *target)
   1234 {
   1235     ptrdiff_t disp = tcg_pcrel_diff(s, target);
   1236     if (in_range_b(disp)) {
   1237         tcg_out32(s, B | (disp & 0x3fffffc) | mask);
   1238     } else {
   1239         tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R0, (uintptr_t)target);
   1240         tcg_out32(s, MTSPR | RS(TCG_REG_R0) | CTR);
   1241         tcg_out32(s, BCCTR | BO_ALWAYS | mask);
   1242     }
   1243 }
   1244 
   1245 static void tcg_out_mem_long(TCGContext *s, int opi, int opx, TCGReg rt,
   1246                              TCGReg base, tcg_target_long offset)
   1247 {
   1248     tcg_target_long orig = offset, l0, l1, extra = 0, align = 0;
   1249     bool is_int_store = false;
   1250     TCGReg rs = TCG_REG_TMP1;
   1251 
   1252     switch (opi) {
   1253     case LD: case LWA:
   1254         align = 3;
   1255         /* FALLTHRU */
   1256     default:
   1257         if (rt > TCG_REG_R0 && rt < TCG_REG_V0) {
   1258             rs = rt;
   1259             break;
   1260         }
   1261         break;
   1262     case LXSD:
   1263     case STXSD:
   1264         align = 3;
   1265         break;
   1266     case LXV:
   1267     case STXV:
   1268         align = 15;
   1269         break;
   1270     case STD:
   1271         align = 3;
   1272         /* FALLTHRU */
   1273     case STB: case STH: case STW:
   1274         is_int_store = true;
   1275         break;
   1276     }
   1277 
   1278     /* For unaligned, or very large offsets, use the indexed form.  */
   1279     if (offset & align || offset != (int32_t)offset || opi == 0) {
   1280         if (rs == base) {
   1281             rs = TCG_REG_R0;
   1282         }
   1283         tcg_debug_assert(!is_int_store || rs != rt);
   1284         tcg_out_movi(s, TCG_TYPE_PTR, rs, orig);
   1285         tcg_out32(s, opx | TAB(rt & 31, base, rs));
   1286         return;
   1287     }
   1288 
   1289     l0 = (int16_t)offset;
   1290     offset = (offset - l0) >> 16;
   1291     l1 = (int16_t)offset;
   1292 
   1293     if (l1 < 0 && orig >= 0) {
   1294         extra = 0x4000;
   1295         l1 = (int16_t)(offset - 0x4000);
   1296     }
   1297     if (l1) {
   1298         tcg_out32(s, ADDIS | TAI(rs, base, l1));
   1299         base = rs;
   1300     }
   1301     if (extra) {
   1302         tcg_out32(s, ADDIS | TAI(rs, base, extra));
   1303         base = rs;
   1304     }
   1305     if (opi != ADDI || base != rt || l0 != 0) {
   1306         tcg_out32(s, opi | TAI(rt & 31, base, l0));
   1307     }
   1308 }
   1309 
   1310 static void tcg_out_vsldoi(TCGContext *s, TCGReg ret,
   1311                            TCGReg va, TCGReg vb, int shb)
   1312 {
   1313     tcg_out32(s, VSLDOI | VRT(ret) | VRA(va) | VRB(vb) | (shb << 6));
   1314 }
   1315 
   1316 static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret,
   1317                        TCGReg base, intptr_t offset)
   1318 {
   1319     int shift;
   1320 
   1321     switch (type) {
   1322     case TCG_TYPE_I32:
   1323         if (ret < TCG_REG_V0) {
   1324             tcg_out_mem_long(s, LWZ, LWZX, ret, base, offset);
   1325             break;
   1326         }
   1327         if (have_isa_2_07 && have_vsx) {
   1328             tcg_out_mem_long(s, 0, LXSIWZX, ret, base, offset);
   1329             break;
   1330         }
   1331         tcg_debug_assert((offset & 3) == 0);
   1332         tcg_out_mem_long(s, 0, LVEWX, ret, base, offset);
   1333         shift = (offset - 4) & 0xc;
   1334         if (shift) {
   1335             tcg_out_vsldoi(s, ret, ret, ret, shift);
   1336         }
   1337         break;
   1338     case TCG_TYPE_I64:
   1339         if (ret < TCG_REG_V0) {
   1340             tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
   1341             tcg_out_mem_long(s, LD, LDX, ret, base, offset);
   1342             break;
   1343         }
   1344         /* fallthru */
   1345     case TCG_TYPE_V64:
   1346         tcg_debug_assert(ret >= TCG_REG_V0);
   1347         if (have_vsx) {
   1348             tcg_out_mem_long(s, have_isa_3_00 ? LXSD : 0, LXSDX,
   1349                              ret, base, offset);
   1350             break;
   1351         }
   1352         tcg_debug_assert((offset & 7) == 0);
   1353         tcg_out_mem_long(s, 0, LVX, ret, base, offset & -16);
   1354         if (offset & 8) {
   1355             tcg_out_vsldoi(s, ret, ret, ret, 8);
   1356         }
   1357         break;
   1358     case TCG_TYPE_V128:
   1359         tcg_debug_assert(ret >= TCG_REG_V0);
   1360         tcg_debug_assert((offset & 15) == 0);
   1361         tcg_out_mem_long(s, have_isa_3_00 ? LXV : 0,
   1362                          LVX, ret, base, offset);
   1363         break;
   1364     default:
   1365         g_assert_not_reached();
   1366     }
   1367 }
   1368 
   1369 static void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg,
   1370                               TCGReg base, intptr_t offset)
   1371 {
   1372     int shift;
   1373 
   1374     switch (type) {
   1375     case TCG_TYPE_I32:
   1376         if (arg < TCG_REG_V0) {
   1377             tcg_out_mem_long(s, STW, STWX, arg, base, offset);
   1378             break;
   1379         }
   1380         if (have_isa_2_07 && have_vsx) {
   1381             tcg_out_mem_long(s, 0, STXSIWX, arg, base, offset);
   1382             break;
   1383         }
   1384         assert((offset & 3) == 0);
   1385         tcg_debug_assert((offset & 3) == 0);
   1386         shift = (offset - 4) & 0xc;
   1387         if (shift) {
   1388             tcg_out_vsldoi(s, TCG_VEC_TMP1, arg, arg, shift);
   1389             arg = TCG_VEC_TMP1;
   1390         }
   1391         tcg_out_mem_long(s, 0, STVEWX, arg, base, offset);
   1392         break;
   1393     case TCG_TYPE_I64:
   1394         if (arg < TCG_REG_V0) {
   1395             tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
   1396             tcg_out_mem_long(s, STD, STDX, arg, base, offset);
   1397             break;
   1398         }
   1399         /* fallthru */
   1400     case TCG_TYPE_V64:
   1401         tcg_debug_assert(arg >= TCG_REG_V0);
   1402         if (have_vsx) {
   1403             tcg_out_mem_long(s, have_isa_3_00 ? STXSD : 0,
   1404                              STXSDX, arg, base, offset);
   1405             break;
   1406         }
   1407         tcg_debug_assert((offset & 7) == 0);
   1408         if (offset & 8) {
   1409             tcg_out_vsldoi(s, TCG_VEC_TMP1, arg, arg, 8);
   1410             arg = TCG_VEC_TMP1;
   1411         }
   1412         tcg_out_mem_long(s, 0, STVEWX, arg, base, offset);
   1413         tcg_out_mem_long(s, 0, STVEWX, arg, base, offset + 4);
   1414         break;
   1415     case TCG_TYPE_V128:
   1416         tcg_debug_assert(arg >= TCG_REG_V0);
   1417         tcg_out_mem_long(s, have_isa_3_00 ? STXV : 0,
   1418                          STVX, arg, base, offset);
   1419         break;
   1420     default:
   1421         g_assert_not_reached();
   1422     }
   1423 }
   1424 
   1425 static inline bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val,
   1426                                TCGReg base, intptr_t ofs)
   1427 {
   1428     return false;
   1429 }
   1430 
   1431 static void tcg_out_cmp(TCGContext *s, int cond, TCGArg arg1, TCGArg arg2,
   1432                         int const_arg2, int cr, TCGType type)
   1433 {
   1434     int imm;
   1435     uint32_t op;
   1436 
   1437     tcg_debug_assert(TCG_TARGET_REG_BITS == 64 || type == TCG_TYPE_I32);
   1438 
   1439     /* Simplify the comparisons below wrt CMPI.  */
   1440     if (type == TCG_TYPE_I32) {
   1441         arg2 = (int32_t)arg2;
   1442     }
   1443 
   1444     switch (cond) {
   1445     case TCG_COND_EQ:
   1446     case TCG_COND_NE:
   1447         if (const_arg2) {
   1448             if ((int16_t) arg2 == arg2) {
   1449                 op = CMPI;
   1450                 imm = 1;
   1451                 break;
   1452             } else if ((uint16_t) arg2 == arg2) {
   1453                 op = CMPLI;
   1454                 imm = 1;
   1455                 break;
   1456             }
   1457         }
   1458         op = CMPL;
   1459         imm = 0;
   1460         break;
   1461 
   1462     case TCG_COND_LT:
   1463     case TCG_COND_GE:
   1464     case TCG_COND_LE:
   1465     case TCG_COND_GT:
   1466         if (const_arg2) {
   1467             if ((int16_t) arg2 == arg2) {
   1468                 op = CMPI;
   1469                 imm = 1;
   1470                 break;
   1471             }
   1472         }
   1473         op = CMP;
   1474         imm = 0;
   1475         break;
   1476 
   1477     case TCG_COND_LTU:
   1478     case TCG_COND_GEU:
   1479     case TCG_COND_LEU:
   1480     case TCG_COND_GTU:
   1481         if (const_arg2) {
   1482             if ((uint16_t) arg2 == arg2) {
   1483                 op = CMPLI;
   1484                 imm = 1;
   1485                 break;
   1486             }
   1487         }
   1488         op = CMPL;
   1489         imm = 0;
   1490         break;
   1491 
   1492     default:
   1493         tcg_abort();
   1494     }
   1495     op |= BF(cr) | ((type == TCG_TYPE_I64) << 21);
   1496 
   1497     if (imm) {
   1498         tcg_out32(s, op | RA(arg1) | (arg2 & 0xffff));
   1499     } else {
   1500         if (const_arg2) {
   1501             tcg_out_movi(s, type, TCG_REG_R0, arg2);
   1502             arg2 = TCG_REG_R0;
   1503         }
   1504         tcg_out32(s, op | RA(arg1) | RB(arg2));
   1505     }
   1506 }
   1507 
   1508 static void tcg_out_setcond_eq0(TCGContext *s, TCGType type,
   1509                                 TCGReg dst, TCGReg src)
   1510 {
   1511     if (type == TCG_TYPE_I32) {
   1512         tcg_out32(s, CNTLZW | RS(src) | RA(dst));
   1513         tcg_out_shri32(s, dst, dst, 5);
   1514     } else {
   1515         tcg_out32(s, CNTLZD | RS(src) | RA(dst));
   1516         tcg_out_shri64(s, dst, dst, 6);
   1517     }
   1518 }
   1519 
   1520 static void tcg_out_setcond_ne0(TCGContext *s, TCGReg dst, TCGReg src)
   1521 {
   1522     /* X != 0 implies X + -1 generates a carry.  Extra addition
   1523        trickery means: R = X-1 + ~X + C = X-1 + (-X+1) + C = C.  */
   1524     if (dst != src) {
   1525         tcg_out32(s, ADDIC | TAI(dst, src, -1));
   1526         tcg_out32(s, SUBFE | TAB(dst, dst, src));
   1527     } else {
   1528         tcg_out32(s, ADDIC | TAI(TCG_REG_R0, src, -1));
   1529         tcg_out32(s, SUBFE | TAB(dst, TCG_REG_R0, src));
   1530     }
   1531 }
   1532 
   1533 static TCGReg tcg_gen_setcond_xor(TCGContext *s, TCGReg arg1, TCGArg arg2,
   1534                                   bool const_arg2)
   1535 {
   1536     if (const_arg2) {
   1537         if ((uint32_t)arg2 == arg2) {
   1538             tcg_out_xori32(s, TCG_REG_R0, arg1, arg2);
   1539         } else {
   1540             tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_R0, arg2);
   1541             tcg_out32(s, XOR | SAB(arg1, TCG_REG_R0, TCG_REG_R0));
   1542         }
   1543     } else {
   1544         tcg_out32(s, XOR | SAB(arg1, TCG_REG_R0, arg2));
   1545     }
   1546     return TCG_REG_R0;
   1547 }
   1548 
   1549 static void tcg_out_setcond(TCGContext *s, TCGType type, TCGCond cond,
   1550                             TCGArg arg0, TCGArg arg1, TCGArg arg2,
   1551                             int const_arg2)
   1552 {
   1553     int crop, sh;
   1554 
   1555     tcg_debug_assert(TCG_TARGET_REG_BITS == 64 || type == TCG_TYPE_I32);
   1556 
   1557     /* Ignore high bits of a potential constant arg2.  */
   1558     if (type == TCG_TYPE_I32) {
   1559         arg2 = (uint32_t)arg2;
   1560     }
   1561 
   1562     /* Handle common and trivial cases before handling anything else.  */
   1563     if (arg2 == 0) {
   1564         switch (cond) {
   1565         case TCG_COND_EQ:
   1566             tcg_out_setcond_eq0(s, type, arg0, arg1);
   1567             return;
   1568         case TCG_COND_NE:
   1569             if (TCG_TARGET_REG_BITS == 64 && type == TCG_TYPE_I32) {
   1570                 tcg_out_ext32u(s, TCG_REG_R0, arg1);
   1571                 arg1 = TCG_REG_R0;
   1572             }
   1573             tcg_out_setcond_ne0(s, arg0, arg1);
   1574             return;
   1575         case TCG_COND_GE:
   1576             tcg_out32(s, NOR | SAB(arg1, arg0, arg1));
   1577             arg1 = arg0;
   1578             /* FALLTHRU */
   1579         case TCG_COND_LT:
   1580             /* Extract the sign bit.  */
   1581             if (type == TCG_TYPE_I32) {
   1582                 tcg_out_shri32(s, arg0, arg1, 31);
   1583             } else {
   1584                 tcg_out_shri64(s, arg0, arg1, 63);
   1585             }
   1586             return;
   1587         default:
   1588             break;
   1589         }
   1590     }
   1591 
   1592     /* If we have ISEL, we can implement everything with 3 or 4 insns.
   1593        All other cases below are also at least 3 insns, so speed up the
   1594        code generator by not considering them and always using ISEL.  */
   1595     if (have_isel) {
   1596         int isel, tab;
   1597 
   1598         tcg_out_cmp(s, cond, arg1, arg2, const_arg2, 7, type);
   1599 
   1600         isel = tcg_to_isel[cond];
   1601 
   1602         tcg_out_movi(s, type, arg0, 1);
   1603         if (isel & 1) {
   1604             /* arg0 = (bc ? 0 : 1) */
   1605             tab = TAB(arg0, 0, arg0);
   1606             isel &= ~1;
   1607         } else {
   1608             /* arg0 = (bc ? 1 : 0) */
   1609             tcg_out_movi(s, type, TCG_REG_R0, 0);
   1610             tab = TAB(arg0, arg0, TCG_REG_R0);
   1611         }
   1612         tcg_out32(s, isel | tab);
   1613         return;
   1614     }
   1615 
   1616     switch (cond) {
   1617     case TCG_COND_EQ:
   1618         arg1 = tcg_gen_setcond_xor(s, arg1, arg2, const_arg2);
   1619         tcg_out_setcond_eq0(s, type, arg0, arg1);
   1620         return;
   1621 
   1622     case TCG_COND_NE:
   1623         arg1 = tcg_gen_setcond_xor(s, arg1, arg2, const_arg2);
   1624         /* Discard the high bits only once, rather than both inputs.  */
   1625         if (TCG_TARGET_REG_BITS == 64 && type == TCG_TYPE_I32) {
   1626             tcg_out_ext32u(s, TCG_REG_R0, arg1);
   1627             arg1 = TCG_REG_R0;
   1628         }
   1629         tcg_out_setcond_ne0(s, arg0, arg1);
   1630         return;
   1631 
   1632     case TCG_COND_GT:
   1633     case TCG_COND_GTU:
   1634         sh = 30;
   1635         crop = 0;
   1636         goto crtest;
   1637 
   1638     case TCG_COND_LT:
   1639     case TCG_COND_LTU:
   1640         sh = 29;
   1641         crop = 0;
   1642         goto crtest;
   1643 
   1644     case TCG_COND_GE:
   1645     case TCG_COND_GEU:
   1646         sh = 31;
   1647         crop = CRNOR | BT(7, CR_EQ) | BA(7, CR_LT) | BB(7, CR_LT);
   1648         goto crtest;
   1649 
   1650     case TCG_COND_LE:
   1651     case TCG_COND_LEU:
   1652         sh = 31;
   1653         crop = CRNOR | BT(7, CR_EQ) | BA(7, CR_GT) | BB(7, CR_GT);
   1654     crtest:
   1655         tcg_out_cmp(s, cond, arg1, arg2, const_arg2, 7, type);
   1656         if (crop) {
   1657             tcg_out32(s, crop);
   1658         }
   1659         tcg_out32(s, MFOCRF | RT(TCG_REG_R0) | FXM(7));
   1660         tcg_out_rlw(s, RLWINM, arg0, TCG_REG_R0, sh, 31, 31);
   1661         break;
   1662 
   1663     default:
   1664         tcg_abort();
   1665     }
   1666 }
   1667 
   1668 static void tcg_out_bc(TCGContext *s, int bc, TCGLabel *l)
   1669 {
   1670     if (l->has_value) {
   1671         bc |= reloc_pc14_val(tcg_splitwx_to_rx(s->code_ptr), l->u.value_ptr);
   1672     } else {
   1673         tcg_out_reloc(s, s->code_ptr, R_PPC_REL14, l, 0);
   1674     }
   1675     tcg_out32(s, bc);
   1676 }
   1677 
   1678 static void tcg_out_brcond(TCGContext *s, TCGCond cond,
   1679                            TCGArg arg1, TCGArg arg2, int const_arg2,
   1680                            TCGLabel *l, TCGType type)
   1681 {
   1682     tcg_out_cmp(s, cond, arg1, arg2, const_arg2, 7, type);
   1683     tcg_out_bc(s, tcg_to_bc[cond], l);
   1684 }
   1685 
   1686 static void tcg_out_movcond(TCGContext *s, TCGType type, TCGCond cond,
   1687                             TCGArg dest, TCGArg c1, TCGArg c2, TCGArg v1,
   1688                             TCGArg v2, bool const_c2)
   1689 {
   1690     /* If for some reason both inputs are zero, don't produce bad code.  */
   1691     if (v1 == 0 && v2 == 0) {
   1692         tcg_out_movi(s, type, dest, 0);
   1693         return;
   1694     }
   1695 
   1696     tcg_out_cmp(s, cond, c1, c2, const_c2, 7, type);
   1697 
   1698     if (have_isel) {
   1699         int isel = tcg_to_isel[cond];
   1700 
   1701         /* Swap the V operands if the operation indicates inversion.  */
   1702         if (isel & 1) {
   1703             int t = v1;
   1704             v1 = v2;
   1705             v2 = t;
   1706             isel &= ~1;
   1707         }
   1708         /* V1 == 0 is handled by isel; V2 == 0 must be handled by hand.  */
   1709         if (v2 == 0) {
   1710             tcg_out_movi(s, type, TCG_REG_R0, 0);
   1711         }
   1712         tcg_out32(s, isel | TAB(dest, v1, v2));
   1713     } else {
   1714         if (dest == v2) {
   1715             cond = tcg_invert_cond(cond);
   1716             v2 = v1;
   1717         } else if (dest != v1) {
   1718             if (v1 == 0) {
   1719                 tcg_out_movi(s, type, dest, 0);
   1720             } else {
   1721                 tcg_out_mov(s, type, dest, v1);
   1722             }
   1723         }
   1724         /* Branch forward over one insn */
   1725         tcg_out32(s, tcg_to_bc[cond] | 8);
   1726         if (v2 == 0) {
   1727             tcg_out_movi(s, type, dest, 0);
   1728         } else {
   1729             tcg_out_mov(s, type, dest, v2);
   1730         }
   1731     }
   1732 }
   1733 
   1734 static void tcg_out_cntxz(TCGContext *s, TCGType type, uint32_t opc,
   1735                           TCGArg a0, TCGArg a1, TCGArg a2, bool const_a2)
   1736 {
   1737     if (const_a2 && a2 == (type == TCG_TYPE_I32 ? 32 : 64)) {
   1738         tcg_out32(s, opc | RA(a0) | RS(a1));
   1739     } else {
   1740         tcg_out_cmp(s, TCG_COND_EQ, a1, 0, 1, 7, type);
   1741         /* Note that the only other valid constant for a2 is 0.  */
   1742         if (have_isel) {
   1743             tcg_out32(s, opc | RA(TCG_REG_R0) | RS(a1));
   1744             tcg_out32(s, tcg_to_isel[TCG_COND_EQ] | TAB(a0, a2, TCG_REG_R0));
   1745         } else if (!const_a2 && a0 == a2) {
   1746             tcg_out32(s, tcg_to_bc[TCG_COND_EQ] | 8);
   1747             tcg_out32(s, opc | RA(a0) | RS(a1));
   1748         } else {
   1749             tcg_out32(s, opc | RA(a0) | RS(a1));
   1750             tcg_out32(s, tcg_to_bc[TCG_COND_NE] | 8);
   1751             if (const_a2) {
   1752                 tcg_out_movi(s, type, a0, 0);
   1753             } else {
   1754                 tcg_out_mov(s, type, a0, a2);
   1755             }
   1756         }
   1757     }
   1758 }
   1759 
   1760 static void tcg_out_cmp2(TCGContext *s, const TCGArg *args,
   1761                          const int *const_args)
   1762 {
   1763     static const struct { uint8_t bit1, bit2; } bits[] = {
   1764         [TCG_COND_LT ] = { CR_LT, CR_LT },
   1765         [TCG_COND_LE ] = { CR_LT, CR_GT },
   1766         [TCG_COND_GT ] = { CR_GT, CR_GT },
   1767         [TCG_COND_GE ] = { CR_GT, CR_LT },
   1768         [TCG_COND_LTU] = { CR_LT, CR_LT },
   1769         [TCG_COND_LEU] = { CR_LT, CR_GT },
   1770         [TCG_COND_GTU] = { CR_GT, CR_GT },
   1771         [TCG_COND_GEU] = { CR_GT, CR_LT },
   1772     };
   1773 
   1774     TCGCond cond = args[4], cond2;
   1775     TCGArg al, ah, bl, bh;
   1776     int blconst, bhconst;
   1777     int op, bit1, bit2;
   1778 
   1779     al = args[0];
   1780     ah = args[1];
   1781     bl = args[2];
   1782     bh = args[3];
   1783     blconst = const_args[2];
   1784     bhconst = const_args[3];
   1785 
   1786     switch (cond) {
   1787     case TCG_COND_EQ:
   1788         op = CRAND;
   1789         goto do_equality;
   1790     case TCG_COND_NE:
   1791         op = CRNAND;
   1792     do_equality:
   1793         tcg_out_cmp(s, cond, al, bl, blconst, 6, TCG_TYPE_I32);
   1794         tcg_out_cmp(s, cond, ah, bh, bhconst, 7, TCG_TYPE_I32);
   1795         tcg_out32(s, op | BT(7, CR_EQ) | BA(6, CR_EQ) | BB(7, CR_EQ));
   1796         break;
   1797 
   1798     case TCG_COND_LT:
   1799     case TCG_COND_LE:
   1800     case TCG_COND_GT:
   1801     case TCG_COND_GE:
   1802     case TCG_COND_LTU:
   1803     case TCG_COND_LEU:
   1804     case TCG_COND_GTU:
   1805     case TCG_COND_GEU:
   1806         bit1 = bits[cond].bit1;
   1807         bit2 = bits[cond].bit2;
   1808         op = (bit1 != bit2 ? CRANDC : CRAND);
   1809         cond2 = tcg_unsigned_cond(cond);
   1810 
   1811         tcg_out_cmp(s, cond, ah, bh, bhconst, 6, TCG_TYPE_I32);
   1812         tcg_out_cmp(s, cond2, al, bl, blconst, 7, TCG_TYPE_I32);
   1813         tcg_out32(s, op | BT(7, CR_EQ) | BA(6, CR_EQ) | BB(7, bit2));
   1814         tcg_out32(s, CROR | BT(7, CR_EQ) | BA(6, bit1) | BB(7, CR_EQ));
   1815         break;
   1816 
   1817     default:
   1818         tcg_abort();
   1819     }
   1820 }
   1821 
   1822 static void tcg_out_setcond2(TCGContext *s, const TCGArg *args,
   1823                              const int *const_args)
   1824 {
   1825     tcg_out_cmp2(s, args + 1, const_args + 1);
   1826     tcg_out32(s, MFOCRF | RT(TCG_REG_R0) | FXM(7));
   1827     tcg_out_rlw(s, RLWINM, args[0], TCG_REG_R0, 31, 31, 31);
   1828 }
   1829 
   1830 static void tcg_out_brcond2 (TCGContext *s, const TCGArg *args,
   1831                              const int *const_args)
   1832 {
   1833     tcg_out_cmp2(s, args, const_args);
   1834     tcg_out_bc(s, BC | BI(7, CR_EQ) | BO_COND_TRUE, arg_label(args[5]));
   1835 }
   1836 
   1837 static void tcg_out_mb(TCGContext *s, TCGArg a0)
   1838 {
   1839     uint32_t insn;
   1840 
   1841     if (a0 & TCG_MO_ST_LD) {
   1842         insn = HWSYNC;
   1843     } else {
   1844         insn = LWSYNC;
   1845     }
   1846 
   1847     tcg_out32(s, insn);
   1848 }
   1849 
   1850 static inline uint64_t make_pair(tcg_insn_unit i1, tcg_insn_unit i2)
   1851 {
   1852     if (HOST_BIG_ENDIAN) {
   1853         return (uint64_t)i1 << 32 | i2;
   1854     }
   1855     return (uint64_t)i2 << 32 | i1;
   1856 }
   1857 
   1858 static inline void ppc64_replace2(uintptr_t rx, uintptr_t rw,
   1859                                   tcg_insn_unit i0, tcg_insn_unit i1)
   1860 {
   1861 #if TCG_TARGET_REG_BITS == 64
   1862     qatomic_set((uint64_t *)rw, make_pair(i0, i1));
   1863     flush_idcache_range(rx, rw, 8);
   1864 #else
   1865     qemu_build_not_reached();
   1866 #endif
   1867 }
   1868 
   1869 static inline void ppc64_replace4(uintptr_t rx, uintptr_t rw,
   1870                                   tcg_insn_unit i0, tcg_insn_unit i1,
   1871                                   tcg_insn_unit i2, tcg_insn_unit i3)
   1872 {
   1873     uint64_t p[2];
   1874 
   1875     p[!HOST_BIG_ENDIAN] = make_pair(i0, i1);
   1876     p[HOST_BIG_ENDIAN] = make_pair(i2, i3);
   1877 
   1878     /*
   1879      * There's no convenient way to get the compiler to allocate a pair
   1880      * of registers at an even index, so copy into r6/r7 and clobber.
   1881      */
   1882     asm("mr  %%r6, %1\n\t"
   1883         "mr  %%r7, %2\n\t"
   1884         "stq %%r6, %0"
   1885         : "=Q"(*(__int128 *)rw) : "r"(p[0]), "r"(p[1]) : "r6", "r7");
   1886     flush_idcache_range(rx, rw, 16);
   1887 }
   1888 
   1889 void tb_target_set_jmp_target(uintptr_t tc_ptr, uintptr_t jmp_rx,
   1890                               uintptr_t jmp_rw, uintptr_t addr)
   1891 {
   1892     tcg_insn_unit i0, i1, i2, i3;
   1893     intptr_t tb_diff = addr - tc_ptr;
   1894     intptr_t br_diff = addr - (jmp_rx + 4);
   1895     intptr_t lo, hi;
   1896 
   1897     if (TCG_TARGET_REG_BITS == 32) {
   1898         intptr_t diff = addr - jmp_rx;
   1899         tcg_debug_assert(in_range_b(diff));
   1900         qatomic_set((uint32_t *)jmp_rw, B | (diff & 0x3fffffc));
   1901         flush_idcache_range(jmp_rx, jmp_rw, 4);
   1902         return;
   1903     }
   1904 
   1905     /*
   1906      * For 16-bit displacements, we can use a single add + branch.
   1907      * This happens quite often.
   1908      */
   1909     if (tb_diff == (int16_t)tb_diff) {
   1910         i0 = ADDI | TAI(TCG_REG_TB, TCG_REG_TB, tb_diff);
   1911         i1 = B | (br_diff & 0x3fffffc);
   1912         ppc64_replace2(jmp_rx, jmp_rw, i0, i1);
   1913         return;
   1914     }
   1915 
   1916     lo = (int16_t)tb_diff;
   1917     hi = (int32_t)(tb_diff - lo);
   1918     assert(tb_diff == hi + lo);
   1919     i0 = ADDIS | TAI(TCG_REG_TB, TCG_REG_TB, hi >> 16);
   1920     i1 = ADDI | TAI(TCG_REG_TB, TCG_REG_TB, lo);
   1921 
   1922     /*
   1923      * Without stq from 2.07, we can only update two insns,
   1924      * and those must be the ones that load the target address.
   1925      */
   1926     if (!have_isa_2_07) {
   1927         ppc64_replace2(jmp_rx, jmp_rw, i0, i1);
   1928         return;
   1929     }
   1930 
   1931     /*
   1932      * For 26-bit displacements, we can use a direct branch.
   1933      * Otherwise we still need the indirect branch, which we
   1934      * must restore after a potential direct branch write.
   1935      */
   1936     br_diff -= 4;
   1937     if (in_range_b(br_diff)) {
   1938         i2 = B | (br_diff & 0x3fffffc);
   1939         i3 = NOP;
   1940     } else {
   1941         i2 = MTSPR | RS(TCG_REG_TB) | CTR;
   1942         i3 = BCCTR | BO_ALWAYS;
   1943     }
   1944     ppc64_replace4(jmp_rx, jmp_rw, i0, i1, i2, i3);
   1945 }
   1946 
   1947 static void tcg_out_call_int(TCGContext *s, int lk,
   1948                              const tcg_insn_unit *target)
   1949 {
   1950 #ifdef _CALL_AIX
   1951     /* Look through the descriptor.  If the branch is in range, and we
   1952        don't have to spend too much effort on building the toc.  */
   1953     const void *tgt = ((const void * const *)target)[0];
   1954     uintptr_t toc = ((const uintptr_t *)target)[1];
   1955     intptr_t diff = tcg_pcrel_diff(s, tgt);
   1956 
   1957     if (in_range_b(diff) && toc == (uint32_t)toc) {
   1958         tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_TMP1, toc);
   1959         tcg_out_b(s, lk, tgt);
   1960     } else {
   1961         /* Fold the low bits of the constant into the addresses below.  */
   1962         intptr_t arg = (intptr_t)target;
   1963         int ofs = (int16_t)arg;
   1964 
   1965         if (ofs + 8 < 0x8000) {
   1966             arg -= ofs;
   1967         } else {
   1968             ofs = 0;
   1969         }
   1970         tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_TMP1, arg);
   1971         tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_R0, TCG_REG_TMP1, ofs);
   1972         tcg_out32(s, MTSPR | RA(TCG_REG_R0) | CTR);
   1973         tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_R2, TCG_REG_TMP1, ofs + SZP);
   1974         tcg_out32(s, BCCTR | BO_ALWAYS | lk);
   1975     }
   1976 #elif defined(_CALL_ELF) && _CALL_ELF == 2
   1977     intptr_t diff;
   1978 
   1979     /* In the ELFv2 ABI, we have to set up r12 to contain the destination
   1980        address, which the callee uses to compute its TOC address.  */
   1981     /* FIXME: when the branch is in range, we could avoid r12 load if we
   1982        knew that the destination uses the same TOC, and what its local
   1983        entry point offset is.  */
   1984     tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R12, (intptr_t)target);
   1985 
   1986     diff = tcg_pcrel_diff(s, target);
   1987     if (in_range_b(diff)) {
   1988         tcg_out_b(s, lk, target);
   1989     } else {
   1990         tcg_out32(s, MTSPR | RS(TCG_REG_R12) | CTR);
   1991         tcg_out32(s, BCCTR | BO_ALWAYS | lk);
   1992     }
   1993 #else
   1994     tcg_out_b(s, lk, target);
   1995 #endif
   1996 }
   1997 
   1998 static void tcg_out_call(TCGContext *s, const tcg_insn_unit *target)
   1999 {
   2000     tcg_out_call_int(s, LK, target);
   2001 }
   2002 
   2003 static const uint32_t qemu_ldx_opc[(MO_SSIZE + MO_BSWAP) + 1] = {
   2004     [MO_UB] = LBZX,
   2005     [MO_UW] = LHZX,
   2006     [MO_UL] = LWZX,
   2007     [MO_UQ] = LDX,
   2008     [MO_SW] = LHAX,
   2009     [MO_SL] = LWAX,
   2010     [MO_BSWAP | MO_UB] = LBZX,
   2011     [MO_BSWAP | MO_UW] = LHBRX,
   2012     [MO_BSWAP | MO_UL] = LWBRX,
   2013     [MO_BSWAP | MO_UQ] = LDBRX,
   2014 };
   2015 
   2016 static const uint32_t qemu_stx_opc[(MO_SIZE + MO_BSWAP) + 1] = {
   2017     [MO_UB] = STBX,
   2018     [MO_UW] = STHX,
   2019     [MO_UL] = STWX,
   2020     [MO_UQ] = STDX,
   2021     [MO_BSWAP | MO_UB] = STBX,
   2022     [MO_BSWAP | MO_UW] = STHBRX,
   2023     [MO_BSWAP | MO_UL] = STWBRX,
   2024     [MO_BSWAP | MO_UQ] = STDBRX,
   2025 };
   2026 
   2027 static const uint32_t qemu_exts_opc[4] = {
   2028     EXTSB, EXTSH, EXTSW, 0
   2029 };
   2030 
   2031 #if defined (CONFIG_SOFTMMU)
   2032 /* helper signature: helper_ld_mmu(CPUState *env, target_ulong addr,
   2033  *                                 int mmu_idx, uintptr_t ra)
   2034  */
   2035 static void * const qemu_ld_helpers[(MO_SIZE | MO_BSWAP) + 1] = {
   2036     [MO_UB]   = helper_ret_ldub_mmu,
   2037     [MO_LEUW] = helper_le_lduw_mmu,
   2038     [MO_LEUL] = helper_le_ldul_mmu,
   2039     [MO_LEUQ] = helper_le_ldq_mmu,
   2040     [MO_BEUW] = helper_be_lduw_mmu,
   2041     [MO_BEUL] = helper_be_ldul_mmu,
   2042     [MO_BEUQ] = helper_be_ldq_mmu,
   2043 };
   2044 
   2045 /* helper signature: helper_st_mmu(CPUState *env, target_ulong addr,
   2046  *                                 uintxx_t val, int mmu_idx, uintptr_t ra)
   2047  */
   2048 static void * const qemu_st_helpers[(MO_SIZE | MO_BSWAP) + 1] = {
   2049     [MO_UB]   = helper_ret_stb_mmu,
   2050     [MO_LEUW] = helper_le_stw_mmu,
   2051     [MO_LEUL] = helper_le_stl_mmu,
   2052     [MO_LEUQ] = helper_le_stq_mmu,
   2053     [MO_BEUW] = helper_be_stw_mmu,
   2054     [MO_BEUL] = helper_be_stl_mmu,
   2055     [MO_BEUQ] = helper_be_stq_mmu,
   2056 };
   2057 
   2058 /* We expect to use a 16-bit negative offset from ENV.  */
   2059 QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) > 0);
   2060 QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) < -32768);
   2061 
   2062 /* Perform the TLB load and compare.  Places the result of the comparison
   2063    in CR7, loads the addend of the TLB into R3, and returns the register
   2064    containing the guest address (zero-extended into R4).  Clobbers R0 and R2. */
   2065 
   2066 static TCGReg tcg_out_tlb_read(TCGContext *s, MemOp opc,
   2067                                TCGReg addrlo, TCGReg addrhi,
   2068                                int mem_index, bool is_read)
   2069 {
   2070     int cmp_off
   2071         = (is_read
   2072            ? offsetof(CPUTLBEntry, addr_read)
   2073            : offsetof(CPUTLBEntry, addr_write));
   2074     int fast_off = TLB_MASK_TABLE_OFS(mem_index);
   2075     int mask_off = fast_off + offsetof(CPUTLBDescFast, mask);
   2076     int table_off = fast_off + offsetof(CPUTLBDescFast, table);
   2077     unsigned s_bits = opc & MO_SIZE;
   2078     unsigned a_bits = get_alignment_bits(opc);
   2079 
   2080     /* Load tlb_mask[mmu_idx] and tlb_table[mmu_idx].  */
   2081     tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_R3, TCG_AREG0, mask_off);
   2082     tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_R4, TCG_AREG0, table_off);
   2083 
   2084     /* Extract the page index, shifted into place for tlb index.  */
   2085     if (TCG_TARGET_REG_BITS == 32) {
   2086         tcg_out_shri32(s, TCG_REG_TMP1, addrlo,
   2087                        TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS);
   2088     } else {
   2089         tcg_out_shri64(s, TCG_REG_TMP1, addrlo,
   2090                        TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS);
   2091     }
   2092     tcg_out32(s, AND | SAB(TCG_REG_R3, TCG_REG_R3, TCG_REG_TMP1));
   2093 
   2094     /* Load the TLB comparator.  */
   2095     if (cmp_off == 0 && TCG_TARGET_REG_BITS >= TARGET_LONG_BITS) {
   2096         uint32_t lxu = (TCG_TARGET_REG_BITS == 32 || TARGET_LONG_BITS == 32
   2097                         ? LWZUX : LDUX);
   2098         tcg_out32(s, lxu | TAB(TCG_REG_TMP1, TCG_REG_R3, TCG_REG_R4));
   2099     } else {
   2100         tcg_out32(s, ADD | TAB(TCG_REG_R3, TCG_REG_R3, TCG_REG_R4));
   2101         if (TCG_TARGET_REG_BITS < TARGET_LONG_BITS) {
   2102             tcg_out_ld(s, TCG_TYPE_I32, TCG_REG_TMP1, TCG_REG_R3, cmp_off + 4);
   2103             tcg_out_ld(s, TCG_TYPE_I32, TCG_REG_R4, TCG_REG_R3, cmp_off);
   2104         } else {
   2105             tcg_out_ld(s, TCG_TYPE_TL, TCG_REG_TMP1, TCG_REG_R3, cmp_off);
   2106         }
   2107     }
   2108 
   2109     /* Load the TLB addend for use on the fast path.  Do this asap
   2110        to minimize any load use delay.  */
   2111     tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_R3, TCG_REG_R3,
   2112                offsetof(CPUTLBEntry, addend));
   2113 
   2114     /* Clear the non-page, non-alignment bits from the address */
   2115     if (TCG_TARGET_REG_BITS == 32) {
   2116         /* We don't support unaligned accesses on 32-bits.
   2117          * Preserve the bottom bits and thus trigger a comparison
   2118          * failure on unaligned accesses.
   2119          */
   2120         if (a_bits < s_bits) {
   2121             a_bits = s_bits;
   2122         }
   2123         tcg_out_rlw(s, RLWINM, TCG_REG_R0, addrlo, 0,
   2124                     (32 - a_bits) & 31, 31 - TARGET_PAGE_BITS);
   2125     } else {
   2126         TCGReg t = addrlo;
   2127 
   2128         /* If the access is unaligned, we need to make sure we fail if we
   2129          * cross a page boundary.  The trick is to add the access size-1
   2130          * to the address before masking the low bits.  That will make the
   2131          * address overflow to the next page if we cross a page boundary,
   2132          * which will then force a mismatch of the TLB compare.
   2133          */
   2134         if (a_bits < s_bits) {
   2135             unsigned a_mask = (1 << a_bits) - 1;
   2136             unsigned s_mask = (1 << s_bits) - 1;
   2137             tcg_out32(s, ADDI | TAI(TCG_REG_R0, t, s_mask - a_mask));
   2138             t = TCG_REG_R0;
   2139         }
   2140 
   2141         /* Mask the address for the requested alignment.  */
   2142         if (TARGET_LONG_BITS == 32) {
   2143             tcg_out_rlw(s, RLWINM, TCG_REG_R0, t, 0,
   2144                         (32 - a_bits) & 31, 31 - TARGET_PAGE_BITS);
   2145             /* Zero-extend the address for use in the final address.  */
   2146             tcg_out_ext32u(s, TCG_REG_R4, addrlo);
   2147             addrlo = TCG_REG_R4;
   2148         } else if (a_bits == 0) {
   2149             tcg_out_rld(s, RLDICR, TCG_REG_R0, t, 0, 63 - TARGET_PAGE_BITS);
   2150         } else {
   2151             tcg_out_rld(s, RLDICL, TCG_REG_R0, t,
   2152                         64 - TARGET_PAGE_BITS, TARGET_PAGE_BITS - a_bits);
   2153             tcg_out_rld(s, RLDICL, TCG_REG_R0, TCG_REG_R0, TARGET_PAGE_BITS, 0);
   2154         }
   2155     }
   2156 
   2157     if (TCG_TARGET_REG_BITS < TARGET_LONG_BITS) {
   2158         tcg_out_cmp(s, TCG_COND_EQ, TCG_REG_R0, TCG_REG_TMP1,
   2159                     0, 7, TCG_TYPE_I32);
   2160         tcg_out_cmp(s, TCG_COND_EQ, addrhi, TCG_REG_R4, 0, 6, TCG_TYPE_I32);
   2161         tcg_out32(s, CRAND | BT(7, CR_EQ) | BA(6, CR_EQ) | BB(7, CR_EQ));
   2162     } else {
   2163         tcg_out_cmp(s, TCG_COND_EQ, TCG_REG_R0, TCG_REG_TMP1,
   2164                     0, 7, TCG_TYPE_TL);
   2165     }
   2166 
   2167     return addrlo;
   2168 }
   2169 
   2170 /* Record the context of a call to the out of line helper code for the slow
   2171    path for a load or store, so that we can later generate the correct
   2172    helper code.  */
   2173 static void add_qemu_ldst_label(TCGContext *s, bool is_ld, MemOpIdx oi,
   2174                                 TCGReg datalo_reg, TCGReg datahi_reg,
   2175                                 TCGReg addrlo_reg, TCGReg addrhi_reg,
   2176                                 tcg_insn_unit *raddr, tcg_insn_unit *lptr)
   2177 {
   2178     TCGLabelQemuLdst *label = new_ldst_label(s);
   2179 
   2180     label->is_ld = is_ld;
   2181     label->oi = oi;
   2182     label->datalo_reg = datalo_reg;
   2183     label->datahi_reg = datahi_reg;
   2184     label->addrlo_reg = addrlo_reg;
   2185     label->addrhi_reg = addrhi_reg;
   2186     label->raddr = tcg_splitwx_to_rx(raddr);
   2187     label->label_ptr[0] = lptr;
   2188 }
   2189 
   2190 static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
   2191 {
   2192     MemOpIdx oi = lb->oi;
   2193     MemOp opc = get_memop(oi);
   2194     TCGReg hi, lo, arg = TCG_REG_R3;
   2195 
   2196     if (!reloc_pc14(lb->label_ptr[0], tcg_splitwx_to_rx(s->code_ptr))) {
   2197         return false;
   2198     }
   2199 
   2200     tcg_out_mov(s, TCG_TYPE_PTR, arg++, TCG_AREG0);
   2201 
   2202     lo = lb->addrlo_reg;
   2203     hi = lb->addrhi_reg;
   2204     if (TCG_TARGET_REG_BITS < TARGET_LONG_BITS) {
   2205 #ifdef TCG_TARGET_CALL_ALIGN_ARGS
   2206         arg |= 1;
   2207 #endif
   2208         tcg_out_mov(s, TCG_TYPE_I32, arg++, hi);
   2209         tcg_out_mov(s, TCG_TYPE_I32, arg++, lo);
   2210     } else {
   2211         /* If the address needed to be zero-extended, we'll have already
   2212            placed it in R4.  The only remaining case is 64-bit guest.  */
   2213         tcg_out_mov(s, TCG_TYPE_TL, arg++, lo);
   2214     }
   2215 
   2216     tcg_out_movi(s, TCG_TYPE_I32, arg++, oi);
   2217     tcg_out32(s, MFSPR | RT(arg) | LR);
   2218 
   2219     tcg_out_call(s, qemu_ld_helpers[opc & (MO_BSWAP | MO_SIZE)]);
   2220 
   2221     lo = lb->datalo_reg;
   2222     hi = lb->datahi_reg;
   2223     if (TCG_TARGET_REG_BITS == 32 && (opc & MO_SIZE) == MO_64) {
   2224         tcg_out_mov(s, TCG_TYPE_I32, lo, TCG_REG_R4);
   2225         tcg_out_mov(s, TCG_TYPE_I32, hi, TCG_REG_R3);
   2226     } else if (opc & MO_SIGN) {
   2227         uint32_t insn = qemu_exts_opc[opc & MO_SIZE];
   2228         tcg_out32(s, insn | RA(lo) | RS(TCG_REG_R3));
   2229     } else {
   2230         tcg_out_mov(s, TCG_TYPE_REG, lo, TCG_REG_R3);
   2231     }
   2232 
   2233     tcg_out_b(s, 0, lb->raddr);
   2234     return true;
   2235 }
   2236 
   2237 static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
   2238 {
   2239     MemOpIdx oi = lb->oi;
   2240     MemOp opc = get_memop(oi);
   2241     MemOp s_bits = opc & MO_SIZE;
   2242     TCGReg hi, lo, arg = TCG_REG_R3;
   2243 
   2244     if (!reloc_pc14(lb->label_ptr[0], tcg_splitwx_to_rx(s->code_ptr))) {
   2245         return false;
   2246     }
   2247 
   2248     tcg_out_mov(s, TCG_TYPE_PTR, arg++, TCG_AREG0);
   2249 
   2250     lo = lb->addrlo_reg;
   2251     hi = lb->addrhi_reg;
   2252     if (TCG_TARGET_REG_BITS < TARGET_LONG_BITS) {
   2253 #ifdef TCG_TARGET_CALL_ALIGN_ARGS
   2254         arg |= 1;
   2255 #endif
   2256         tcg_out_mov(s, TCG_TYPE_I32, arg++, hi);
   2257         tcg_out_mov(s, TCG_TYPE_I32, arg++, lo);
   2258     } else {
   2259         /* If the address needed to be zero-extended, we'll have already
   2260            placed it in R4.  The only remaining case is 64-bit guest.  */
   2261         tcg_out_mov(s, TCG_TYPE_TL, arg++, lo);
   2262     }
   2263 
   2264     lo = lb->datalo_reg;
   2265     hi = lb->datahi_reg;
   2266     if (TCG_TARGET_REG_BITS == 32) {
   2267         switch (s_bits) {
   2268         case MO_64:
   2269 #ifdef TCG_TARGET_CALL_ALIGN_ARGS
   2270             arg |= 1;
   2271 #endif
   2272             tcg_out_mov(s, TCG_TYPE_I32, arg++, hi);
   2273             /* FALLTHRU */
   2274         case MO_32:
   2275             tcg_out_mov(s, TCG_TYPE_I32, arg++, lo);
   2276             break;
   2277         default:
   2278             tcg_out_rlw(s, RLWINM, arg++, lo, 0, 32 - (8 << s_bits), 31);
   2279             break;
   2280         }
   2281     } else {
   2282         if (s_bits == MO_64) {
   2283             tcg_out_mov(s, TCG_TYPE_I64, arg++, lo);
   2284         } else {
   2285             tcg_out_rld(s, RLDICL, arg++, lo, 0, 64 - (8 << s_bits));
   2286         }
   2287     }
   2288 
   2289     tcg_out_movi(s, TCG_TYPE_I32, arg++, oi);
   2290     tcg_out32(s, MFSPR | RT(arg) | LR);
   2291 
   2292     tcg_out_call(s, qemu_st_helpers[opc & (MO_BSWAP | MO_SIZE)]);
   2293 
   2294     tcg_out_b(s, 0, lb->raddr);
   2295     return true;
   2296 }
   2297 #else
   2298 
   2299 static void tcg_out_test_alignment(TCGContext *s, bool is_ld, TCGReg addrlo,
   2300                                    TCGReg addrhi, unsigned a_bits)
   2301 {
   2302     unsigned a_mask = (1 << a_bits) - 1;
   2303     TCGLabelQemuLdst *label = new_ldst_label(s);
   2304 
   2305     label->is_ld = is_ld;
   2306     label->addrlo_reg = addrlo;
   2307     label->addrhi_reg = addrhi;
   2308 
   2309     /* We are expecting a_bits to max out at 7, much lower than ANDI. */
   2310     tcg_debug_assert(a_bits < 16);
   2311     tcg_out32(s, ANDI | SAI(addrlo, TCG_REG_R0, a_mask));
   2312 
   2313     label->label_ptr[0] = s->code_ptr;
   2314     tcg_out32(s, BC | BI(0, CR_EQ) | BO_COND_FALSE | LK);
   2315 
   2316     label->raddr = tcg_splitwx_to_rx(s->code_ptr);
   2317 }
   2318 
   2319 static bool tcg_out_fail_alignment(TCGContext *s, TCGLabelQemuLdst *l)
   2320 {
   2321     if (!reloc_pc14(l->label_ptr[0], tcg_splitwx_to_rx(s->code_ptr))) {
   2322         return false;
   2323     }
   2324 
   2325     if (TCG_TARGET_REG_BITS < TARGET_LONG_BITS) {
   2326         TCGReg arg = TCG_REG_R4;
   2327 #ifdef TCG_TARGET_CALL_ALIGN_ARGS
   2328         arg |= 1;
   2329 #endif
   2330         if (l->addrlo_reg != arg) {
   2331             tcg_out_mov(s, TCG_TYPE_I32, arg, l->addrhi_reg);
   2332             tcg_out_mov(s, TCG_TYPE_I32, arg + 1, l->addrlo_reg);
   2333         } else if (l->addrhi_reg != arg + 1) {
   2334             tcg_out_mov(s, TCG_TYPE_I32, arg + 1, l->addrlo_reg);
   2335             tcg_out_mov(s, TCG_TYPE_I32, arg, l->addrhi_reg);
   2336         } else {
   2337             tcg_out_mov(s, TCG_TYPE_I32, TCG_REG_R0, arg);
   2338             tcg_out_mov(s, TCG_TYPE_I32, arg, arg + 1);
   2339             tcg_out_mov(s, TCG_TYPE_I32, arg + 1, TCG_REG_R0);
   2340         }
   2341     } else {
   2342         tcg_out_mov(s, TCG_TYPE_TL, TCG_REG_R4, l->addrlo_reg);
   2343     }
   2344     tcg_out_mov(s, TCG_TYPE_TL, TCG_REG_R3, TCG_AREG0);
   2345 
   2346     /* "Tail call" to the helper, with the return address back inline. */
   2347     tcg_out_call_int(s, 0, (const void *)(l->is_ld ? helper_unaligned_ld
   2348                                           : helper_unaligned_st));
   2349     return true;
   2350 }
   2351 
   2352 static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
   2353 {
   2354     return tcg_out_fail_alignment(s, l);
   2355 }
   2356 
   2357 static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
   2358 {
   2359     return tcg_out_fail_alignment(s, l);
   2360 }
   2361 
   2362 #endif /* SOFTMMU */
   2363 
   2364 static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, bool is_64)
   2365 {
   2366     TCGReg datalo, datahi, addrlo, rbase;
   2367     TCGReg addrhi __attribute__((unused));
   2368     MemOpIdx oi;
   2369     MemOp opc, s_bits;
   2370 #ifdef CONFIG_SOFTMMU
   2371     int mem_index;
   2372     tcg_insn_unit *label_ptr;
   2373 #else
   2374     unsigned a_bits;
   2375 #endif
   2376 
   2377     datalo = *args++;
   2378     datahi = (TCG_TARGET_REG_BITS == 32 && is_64 ? *args++ : 0);
   2379     addrlo = *args++;
   2380     addrhi = (TCG_TARGET_REG_BITS < TARGET_LONG_BITS ? *args++ : 0);
   2381     oi = *args++;
   2382     opc = get_memop(oi);
   2383     s_bits = opc & MO_SIZE;
   2384 
   2385 #ifdef CONFIG_SOFTMMU
   2386     mem_index = get_mmuidx(oi);
   2387     addrlo = tcg_out_tlb_read(s, opc, addrlo, addrhi, mem_index, true);
   2388 
   2389     /* Load a pointer into the current opcode w/conditional branch-link. */
   2390     label_ptr = s->code_ptr;
   2391     tcg_out32(s, BC | BI(7, CR_EQ) | BO_COND_FALSE | LK);
   2392 
   2393     rbase = TCG_REG_R3;
   2394 #else  /* !CONFIG_SOFTMMU */
   2395     a_bits = get_alignment_bits(opc);
   2396     if (a_bits) {
   2397         tcg_out_test_alignment(s, true, addrlo, addrhi, a_bits);
   2398     }
   2399     rbase = guest_base ? TCG_GUEST_BASE_REG : 0;
   2400     if (TCG_TARGET_REG_BITS > TARGET_LONG_BITS) {
   2401         tcg_out_ext32u(s, TCG_REG_TMP1, addrlo);
   2402         addrlo = TCG_REG_TMP1;
   2403     }
   2404 #endif
   2405 
   2406     if (TCG_TARGET_REG_BITS == 32 && s_bits == MO_64) {
   2407         if (opc & MO_BSWAP) {
   2408             tcg_out32(s, ADDI | TAI(TCG_REG_R0, addrlo, 4));
   2409             tcg_out32(s, LWBRX | TAB(datalo, rbase, addrlo));
   2410             tcg_out32(s, LWBRX | TAB(datahi, rbase, TCG_REG_R0));
   2411         } else if (rbase != 0) {
   2412             tcg_out32(s, ADDI | TAI(TCG_REG_R0, addrlo, 4));
   2413             tcg_out32(s, LWZX | TAB(datahi, rbase, addrlo));
   2414             tcg_out32(s, LWZX | TAB(datalo, rbase, TCG_REG_R0));
   2415         } else if (addrlo == datahi) {
   2416             tcg_out32(s, LWZ | TAI(datalo, addrlo, 4));
   2417             tcg_out32(s, LWZ | TAI(datahi, addrlo, 0));
   2418         } else {
   2419             tcg_out32(s, LWZ | TAI(datahi, addrlo, 0));
   2420             tcg_out32(s, LWZ | TAI(datalo, addrlo, 4));
   2421         }
   2422     } else {
   2423         uint32_t insn = qemu_ldx_opc[opc & (MO_BSWAP | MO_SSIZE)];
   2424         if (!have_isa_2_06 && insn == LDBRX) {
   2425             tcg_out32(s, ADDI | TAI(TCG_REG_R0, addrlo, 4));
   2426             tcg_out32(s, LWBRX | TAB(datalo, rbase, addrlo));
   2427             tcg_out32(s, LWBRX | TAB(TCG_REG_R0, rbase, TCG_REG_R0));
   2428             tcg_out_rld(s, RLDIMI, datalo, TCG_REG_R0, 32, 0);
   2429         } else if (insn) {
   2430             tcg_out32(s, insn | TAB(datalo, rbase, addrlo));
   2431         } else {
   2432             insn = qemu_ldx_opc[opc & (MO_SIZE | MO_BSWAP)];
   2433             tcg_out32(s, insn | TAB(datalo, rbase, addrlo));
   2434             insn = qemu_exts_opc[s_bits];
   2435             tcg_out32(s, insn | RA(datalo) | RS(datalo));
   2436         }
   2437     }
   2438 
   2439 #ifdef CONFIG_SOFTMMU
   2440     add_qemu_ldst_label(s, true, oi, datalo, datahi, addrlo, addrhi,
   2441                         s->code_ptr, label_ptr);
   2442 #endif
   2443 }
   2444 
   2445 static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, bool is_64)
   2446 {
   2447     TCGReg datalo, datahi, addrlo, rbase;
   2448     TCGReg addrhi __attribute__((unused));
   2449     MemOpIdx oi;
   2450     MemOp opc, s_bits;
   2451 #ifdef CONFIG_SOFTMMU
   2452     int mem_index;
   2453     tcg_insn_unit *label_ptr;
   2454 #else
   2455     unsigned a_bits;
   2456 #endif
   2457 
   2458     datalo = *args++;
   2459     datahi = (TCG_TARGET_REG_BITS == 32 && is_64 ? *args++ : 0);
   2460     addrlo = *args++;
   2461     addrhi = (TCG_TARGET_REG_BITS < TARGET_LONG_BITS ? *args++ : 0);
   2462     oi = *args++;
   2463     opc = get_memop(oi);
   2464     s_bits = opc & MO_SIZE;
   2465 
   2466 #ifdef CONFIG_SOFTMMU
   2467     mem_index = get_mmuidx(oi);
   2468     addrlo = tcg_out_tlb_read(s, opc, addrlo, addrhi, mem_index, false);
   2469 
   2470     /* Load a pointer into the current opcode w/conditional branch-link. */
   2471     label_ptr = s->code_ptr;
   2472     tcg_out32(s, BC | BI(7, CR_EQ) | BO_COND_FALSE | LK);
   2473 
   2474     rbase = TCG_REG_R3;
   2475 #else  /* !CONFIG_SOFTMMU */
   2476     a_bits = get_alignment_bits(opc);
   2477     if (a_bits) {
   2478         tcg_out_test_alignment(s, false, addrlo, addrhi, a_bits);
   2479     }
   2480     rbase = guest_base ? TCG_GUEST_BASE_REG : 0;
   2481     if (TCG_TARGET_REG_BITS > TARGET_LONG_BITS) {
   2482         tcg_out_ext32u(s, TCG_REG_TMP1, addrlo);
   2483         addrlo = TCG_REG_TMP1;
   2484     }
   2485 #endif
   2486 
   2487     if (TCG_TARGET_REG_BITS == 32 && s_bits == MO_64) {
   2488         if (opc & MO_BSWAP) {
   2489             tcg_out32(s, ADDI | TAI(TCG_REG_R0, addrlo, 4));
   2490             tcg_out32(s, STWBRX | SAB(datalo, rbase, addrlo));
   2491             tcg_out32(s, STWBRX | SAB(datahi, rbase, TCG_REG_R0));
   2492         } else if (rbase != 0) {
   2493             tcg_out32(s, ADDI | TAI(TCG_REG_R0, addrlo, 4));
   2494             tcg_out32(s, STWX | SAB(datahi, rbase, addrlo));
   2495             tcg_out32(s, STWX | SAB(datalo, rbase, TCG_REG_R0));
   2496         } else {
   2497             tcg_out32(s, STW | TAI(datahi, addrlo, 0));
   2498             tcg_out32(s, STW | TAI(datalo, addrlo, 4));
   2499         }
   2500     } else {
   2501         uint32_t insn = qemu_stx_opc[opc & (MO_BSWAP | MO_SIZE)];
   2502         if (!have_isa_2_06 && insn == STDBRX) {
   2503             tcg_out32(s, STWBRX | SAB(datalo, rbase, addrlo));
   2504             tcg_out32(s, ADDI | TAI(TCG_REG_TMP1, addrlo, 4));
   2505             tcg_out_shri64(s, TCG_REG_R0, datalo, 32);
   2506             tcg_out32(s, STWBRX | SAB(TCG_REG_R0, rbase, TCG_REG_TMP1));
   2507         } else {
   2508             tcg_out32(s, insn | SAB(datalo, rbase, addrlo));
   2509         }
   2510     }
   2511 
   2512 #ifdef CONFIG_SOFTMMU
   2513     add_qemu_ldst_label(s, false, oi, datalo, datahi, addrlo, addrhi,
   2514                         s->code_ptr, label_ptr);
   2515 #endif
   2516 }
   2517 
   2518 static void tcg_out_nop_fill(tcg_insn_unit *p, int count)
   2519 {
   2520     int i;
   2521     for (i = 0; i < count; ++i) {
   2522         p[i] = NOP;
   2523     }
   2524 }
   2525 
   2526 /* Parameters for function call generation, used in tcg.c.  */
   2527 #define TCG_TARGET_STACK_ALIGN       16
   2528 #define TCG_TARGET_EXTEND_ARGS       1
   2529 
   2530 #ifdef _CALL_AIX
   2531 # define LINK_AREA_SIZE                (6 * SZR)
   2532 # define LR_OFFSET                     (1 * SZR)
   2533 # define TCG_TARGET_CALL_STACK_OFFSET  (LINK_AREA_SIZE + 8 * SZR)
   2534 #elif defined(_CALL_DARWIN)
   2535 # define LINK_AREA_SIZE                (6 * SZR)
   2536 # define LR_OFFSET                     (2 * SZR)
   2537 #elif TCG_TARGET_REG_BITS == 64
   2538 # if defined(_CALL_ELF) && _CALL_ELF == 2
   2539 #  define LINK_AREA_SIZE               (4 * SZR)
   2540 #  define LR_OFFSET                    (1 * SZR)
   2541 # endif
   2542 #else /* TCG_TARGET_REG_BITS == 32 */
   2543 # if defined(_CALL_SYSV)
   2544 #  define LINK_AREA_SIZE               (2 * SZR)
   2545 #  define LR_OFFSET                    (1 * SZR)
   2546 # endif
   2547 #endif
   2548 #ifndef LR_OFFSET
   2549 # error "Unhandled abi"
   2550 #endif
   2551 #ifndef TCG_TARGET_CALL_STACK_OFFSET
   2552 # define TCG_TARGET_CALL_STACK_OFFSET  LINK_AREA_SIZE
   2553 #endif
   2554 
   2555 #define CPU_TEMP_BUF_SIZE  (CPU_TEMP_BUF_NLONGS * (int)sizeof(long))
   2556 #define REG_SAVE_SIZE      ((int)ARRAY_SIZE(tcg_target_callee_save_regs) * SZR)
   2557 
   2558 #define FRAME_SIZE ((TCG_TARGET_CALL_STACK_OFFSET   \
   2559                      + TCG_STATIC_CALL_ARGS_SIZE    \
   2560                      + CPU_TEMP_BUF_SIZE            \
   2561                      + REG_SAVE_SIZE                \
   2562                      + TCG_TARGET_STACK_ALIGN - 1)  \
   2563                     & -TCG_TARGET_STACK_ALIGN)
   2564 
   2565 #define REG_SAVE_BOT (FRAME_SIZE - REG_SAVE_SIZE)
   2566 
   2567 static void tcg_target_qemu_prologue(TCGContext *s)
   2568 {
   2569     int i;
   2570 
   2571 #ifdef _CALL_AIX
   2572     const void **desc = (const void **)s->code_ptr;
   2573     desc[0] = tcg_splitwx_to_rx(desc + 2);  /* entry point */
   2574     desc[1] = 0;                            /* environment pointer */
   2575     s->code_ptr = (void *)(desc + 2);       /* skip over descriptor */
   2576 #endif
   2577 
   2578     tcg_set_frame(s, TCG_REG_CALL_STACK, REG_SAVE_BOT - CPU_TEMP_BUF_SIZE,
   2579                   CPU_TEMP_BUF_SIZE);
   2580 
   2581     /* Prologue */
   2582     tcg_out32(s, MFSPR | RT(TCG_REG_R0) | LR);
   2583     tcg_out32(s, (SZR == 8 ? STDU : STWU)
   2584               | SAI(TCG_REG_R1, TCG_REG_R1, -FRAME_SIZE));
   2585 
   2586     for (i = 0; i < ARRAY_SIZE(tcg_target_callee_save_regs); ++i) {
   2587         tcg_out_st(s, TCG_TYPE_REG, tcg_target_callee_save_regs[i],
   2588                    TCG_REG_R1, REG_SAVE_BOT + i * SZR);
   2589     }
   2590     tcg_out_st(s, TCG_TYPE_PTR, TCG_REG_R0, TCG_REG_R1, FRAME_SIZE+LR_OFFSET);
   2591 
   2592 #ifndef CONFIG_SOFTMMU
   2593     if (guest_base) {
   2594         tcg_out_movi_int(s, TCG_TYPE_PTR, TCG_GUEST_BASE_REG, guest_base, true);
   2595         tcg_regset_set_reg(s->reserved_regs, TCG_GUEST_BASE_REG);
   2596     }
   2597 #endif
   2598 
   2599     tcg_out_mov(s, TCG_TYPE_PTR, TCG_AREG0, tcg_target_call_iarg_regs[0]);
   2600     tcg_out32(s, MTSPR | RS(tcg_target_call_iarg_regs[1]) | CTR);
   2601     if (USE_REG_TB) {
   2602         tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_TB, tcg_target_call_iarg_regs[1]);
   2603     }
   2604     tcg_out32(s, BCCTR | BO_ALWAYS);
   2605 
   2606     /* Epilogue */
   2607     tcg_code_gen_epilogue = tcg_splitwx_to_rx(s->code_ptr);
   2608 
   2609     tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_R0, TCG_REG_R1, FRAME_SIZE+LR_OFFSET);
   2610     for (i = 0; i < ARRAY_SIZE(tcg_target_callee_save_regs); ++i) {
   2611         tcg_out_ld(s, TCG_TYPE_REG, tcg_target_callee_save_regs[i],
   2612                    TCG_REG_R1, REG_SAVE_BOT + i * SZR);
   2613     }
   2614     tcg_out32(s, MTSPR | RS(TCG_REG_R0) | LR);
   2615     tcg_out32(s, ADDI | TAI(TCG_REG_R1, TCG_REG_R1, FRAME_SIZE));
   2616     tcg_out32(s, BCLR | BO_ALWAYS);
   2617 }
   2618 
   2619 static void tcg_out_op(TCGContext *s, TCGOpcode opc,
   2620                        const TCGArg args[TCG_MAX_OP_ARGS],
   2621                        const int const_args[TCG_MAX_OP_ARGS])
   2622 {
   2623     TCGArg a0, a1, a2;
   2624 
   2625     switch (opc) {
   2626     case INDEX_op_exit_tb:
   2627         tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R3, args[0]);
   2628         tcg_out_b(s, 0, tcg_code_gen_epilogue);
   2629         break;
   2630     case INDEX_op_goto_tb:
   2631         if (s->tb_jmp_insn_offset) {
   2632             /* Direct jump. */
   2633             if (TCG_TARGET_REG_BITS == 64) {
   2634                 /* Ensure the next insns are 8 or 16-byte aligned. */
   2635                 while ((uintptr_t)s->code_ptr & (have_isa_2_07 ? 15 : 7)) {
   2636                     tcg_out32(s, NOP);
   2637                 }
   2638                 s->tb_jmp_insn_offset[args[0]] = tcg_current_code_size(s);
   2639                 tcg_out32(s, ADDIS | TAI(TCG_REG_TB, TCG_REG_TB, 0));
   2640                 tcg_out32(s, ADDI | TAI(TCG_REG_TB, TCG_REG_TB, 0));
   2641             } else {
   2642                 s->tb_jmp_insn_offset[args[0]] = tcg_current_code_size(s);
   2643                 tcg_out32(s, B);
   2644                 s->tb_jmp_reset_offset[args[0]] = tcg_current_code_size(s);
   2645                 break;
   2646             }
   2647         } else {
   2648             /* Indirect jump. */
   2649             tcg_debug_assert(s->tb_jmp_insn_offset == NULL);
   2650             tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TB, 0,
   2651                        (intptr_t)(s->tb_jmp_insn_offset + args[0]));
   2652         }
   2653         tcg_out32(s, MTSPR | RS(TCG_REG_TB) | CTR);
   2654         tcg_out32(s, BCCTR | BO_ALWAYS);
   2655         set_jmp_reset_offset(s, args[0]);
   2656         if (USE_REG_TB) {
   2657             /* For the unlinked case, need to reset TCG_REG_TB.  */
   2658             tcg_out_mem_long(s, ADDI, ADD, TCG_REG_TB, TCG_REG_TB,
   2659                              -tcg_current_code_size(s));
   2660         }
   2661         break;
   2662     case INDEX_op_goto_ptr:
   2663         tcg_out32(s, MTSPR | RS(args[0]) | CTR);
   2664         if (USE_REG_TB) {
   2665             tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_TB, args[0]);
   2666         }
   2667         tcg_out32(s, ADDI | TAI(TCG_REG_R3, 0, 0));
   2668         tcg_out32(s, BCCTR | BO_ALWAYS);
   2669         break;
   2670     case INDEX_op_br:
   2671         {
   2672             TCGLabel *l = arg_label(args[0]);
   2673             uint32_t insn = B;
   2674 
   2675             if (l->has_value) {
   2676                 insn |= reloc_pc24_val(tcg_splitwx_to_rx(s->code_ptr),
   2677                                        l->u.value_ptr);
   2678             } else {
   2679                 tcg_out_reloc(s, s->code_ptr, R_PPC_REL24, l, 0);
   2680             }
   2681             tcg_out32(s, insn);
   2682         }
   2683         break;
   2684     case INDEX_op_ld8u_i32:
   2685     case INDEX_op_ld8u_i64:
   2686         tcg_out_mem_long(s, LBZ, LBZX, args[0], args[1], args[2]);
   2687         break;
   2688     case INDEX_op_ld8s_i32:
   2689     case INDEX_op_ld8s_i64:
   2690         tcg_out_mem_long(s, LBZ, LBZX, args[0], args[1], args[2]);
   2691         tcg_out_ext8s(s, args[0], args[0]);
   2692         break;
   2693     case INDEX_op_ld16u_i32:
   2694     case INDEX_op_ld16u_i64:
   2695         tcg_out_mem_long(s, LHZ, LHZX, args[0], args[1], args[2]);
   2696         break;
   2697     case INDEX_op_ld16s_i32:
   2698     case INDEX_op_ld16s_i64:
   2699         tcg_out_mem_long(s, LHA, LHAX, args[0], args[1], args[2]);
   2700         break;
   2701     case INDEX_op_ld_i32:
   2702     case INDEX_op_ld32u_i64:
   2703         tcg_out_mem_long(s, LWZ, LWZX, args[0], args[1], args[2]);
   2704         break;
   2705     case INDEX_op_ld32s_i64:
   2706         tcg_out_mem_long(s, LWA, LWAX, args[0], args[1], args[2]);
   2707         break;
   2708     case INDEX_op_ld_i64:
   2709         tcg_out_mem_long(s, LD, LDX, args[0], args[1], args[2]);
   2710         break;
   2711     case INDEX_op_st8_i32:
   2712     case INDEX_op_st8_i64:
   2713         tcg_out_mem_long(s, STB, STBX, args[0], args[1], args[2]);
   2714         break;
   2715     case INDEX_op_st16_i32:
   2716     case INDEX_op_st16_i64:
   2717         tcg_out_mem_long(s, STH, STHX, args[0], args[1], args[2]);
   2718         break;
   2719     case INDEX_op_st_i32:
   2720     case INDEX_op_st32_i64:
   2721         tcg_out_mem_long(s, STW, STWX, args[0], args[1], args[2]);
   2722         break;
   2723     case INDEX_op_st_i64:
   2724         tcg_out_mem_long(s, STD, STDX, args[0], args[1], args[2]);
   2725         break;
   2726 
   2727     case INDEX_op_add_i32:
   2728         a0 = args[0], a1 = args[1], a2 = args[2];
   2729         if (const_args[2]) {
   2730         do_addi_32:
   2731             tcg_out_mem_long(s, ADDI, ADD, a0, a1, (int32_t)a2);
   2732         } else {
   2733             tcg_out32(s, ADD | TAB(a0, a1, a2));
   2734         }
   2735         break;
   2736     case INDEX_op_sub_i32:
   2737         a0 = args[0], a1 = args[1], a2 = args[2];
   2738         if (const_args[1]) {
   2739             if (const_args[2]) {
   2740                 tcg_out_movi(s, TCG_TYPE_I32, a0, a1 - a2);
   2741             } else {
   2742                 tcg_out32(s, SUBFIC | TAI(a0, a2, a1));
   2743             }
   2744         } else if (const_args[2]) {
   2745             a2 = -a2;
   2746             goto do_addi_32;
   2747         } else {
   2748             tcg_out32(s, SUBF | TAB(a0, a2, a1));
   2749         }
   2750         break;
   2751 
   2752     case INDEX_op_and_i32:
   2753         a0 = args[0], a1 = args[1], a2 = args[2];
   2754         if (const_args[2]) {
   2755             tcg_out_andi32(s, a0, a1, a2);
   2756         } else {
   2757             tcg_out32(s, AND | SAB(a1, a0, a2));
   2758         }
   2759         break;
   2760     case INDEX_op_and_i64:
   2761         a0 = args[0], a1 = args[1], a2 = args[2];
   2762         if (const_args[2]) {
   2763             tcg_out_andi64(s, a0, a1, a2);
   2764         } else {
   2765             tcg_out32(s, AND | SAB(a1, a0, a2));
   2766         }
   2767         break;
   2768     case INDEX_op_or_i64:
   2769     case INDEX_op_or_i32:
   2770         a0 = args[0], a1 = args[1], a2 = args[2];
   2771         if (const_args[2]) {
   2772             tcg_out_ori32(s, a0, a1, a2);
   2773         } else {
   2774             tcg_out32(s, OR | SAB(a1, a0, a2));
   2775         }
   2776         break;
   2777     case INDEX_op_xor_i64:
   2778     case INDEX_op_xor_i32:
   2779         a0 = args[0], a1 = args[1], a2 = args[2];
   2780         if (const_args[2]) {
   2781             tcg_out_xori32(s, a0, a1, a2);
   2782         } else {
   2783             tcg_out32(s, XOR | SAB(a1, a0, a2));
   2784         }
   2785         break;
   2786     case INDEX_op_andc_i32:
   2787         a0 = args[0], a1 = args[1], a2 = args[2];
   2788         if (const_args[2]) {
   2789             tcg_out_andi32(s, a0, a1, ~a2);
   2790         } else {
   2791             tcg_out32(s, ANDC | SAB(a1, a0, a2));
   2792         }
   2793         break;
   2794     case INDEX_op_andc_i64:
   2795         a0 = args[0], a1 = args[1], a2 = args[2];
   2796         if (const_args[2]) {
   2797             tcg_out_andi64(s, a0, a1, ~a2);
   2798         } else {
   2799             tcg_out32(s, ANDC | SAB(a1, a0, a2));
   2800         }
   2801         break;
   2802     case INDEX_op_orc_i32:
   2803         if (const_args[2]) {
   2804             tcg_out_ori32(s, args[0], args[1], ~args[2]);
   2805             break;
   2806         }
   2807         /* FALLTHRU */
   2808     case INDEX_op_orc_i64:
   2809         tcg_out32(s, ORC | SAB(args[1], args[0], args[2]));
   2810         break;
   2811     case INDEX_op_eqv_i32:
   2812         if (const_args[2]) {
   2813             tcg_out_xori32(s, args[0], args[1], ~args[2]);
   2814             break;
   2815         }
   2816         /* FALLTHRU */
   2817     case INDEX_op_eqv_i64:
   2818         tcg_out32(s, EQV | SAB(args[1], args[0], args[2]));
   2819         break;
   2820     case INDEX_op_nand_i32:
   2821     case INDEX_op_nand_i64:
   2822         tcg_out32(s, NAND | SAB(args[1], args[0], args[2]));
   2823         break;
   2824     case INDEX_op_nor_i32:
   2825     case INDEX_op_nor_i64:
   2826         tcg_out32(s, NOR | SAB(args[1], args[0], args[2]));
   2827         break;
   2828 
   2829     case INDEX_op_clz_i32:
   2830         tcg_out_cntxz(s, TCG_TYPE_I32, CNTLZW, args[0], args[1],
   2831                       args[2], const_args[2]);
   2832         break;
   2833     case INDEX_op_ctz_i32:
   2834         tcg_out_cntxz(s, TCG_TYPE_I32, CNTTZW, args[0], args[1],
   2835                       args[2], const_args[2]);
   2836         break;
   2837     case INDEX_op_ctpop_i32:
   2838         tcg_out32(s, CNTPOPW | SAB(args[1], args[0], 0));
   2839         break;
   2840 
   2841     case INDEX_op_clz_i64:
   2842         tcg_out_cntxz(s, TCG_TYPE_I64, CNTLZD, args[0], args[1],
   2843                       args[2], const_args[2]);
   2844         break;
   2845     case INDEX_op_ctz_i64:
   2846         tcg_out_cntxz(s, TCG_TYPE_I64, CNTTZD, args[0], args[1],
   2847                       args[2], const_args[2]);
   2848         break;
   2849     case INDEX_op_ctpop_i64:
   2850         tcg_out32(s, CNTPOPD | SAB(args[1], args[0], 0));
   2851         break;
   2852 
   2853     case INDEX_op_mul_i32:
   2854         a0 = args[0], a1 = args[1], a2 = args[2];
   2855         if (const_args[2]) {
   2856             tcg_out32(s, MULLI | TAI(a0, a1, a2));
   2857         } else {
   2858             tcg_out32(s, MULLW | TAB(a0, a1, a2));
   2859         }
   2860         break;
   2861 
   2862     case INDEX_op_div_i32:
   2863         tcg_out32(s, DIVW | TAB(args[0], args[1], args[2]));
   2864         break;
   2865 
   2866     case INDEX_op_divu_i32:
   2867         tcg_out32(s, DIVWU | TAB(args[0], args[1], args[2]));
   2868         break;
   2869 
   2870     case INDEX_op_rem_i32:
   2871         tcg_out32(s, MODSW | TAB(args[0], args[1], args[2]));
   2872         break;
   2873 
   2874     case INDEX_op_remu_i32:
   2875         tcg_out32(s, MODUW | TAB(args[0], args[1], args[2]));
   2876         break;
   2877 
   2878     case INDEX_op_shl_i32:
   2879         if (const_args[2]) {
   2880             /* Limit immediate shift count lest we create an illegal insn.  */
   2881             tcg_out_shli32(s, args[0], args[1], args[2] & 31);
   2882         } else {
   2883             tcg_out32(s, SLW | SAB(args[1], args[0], args[2]));
   2884         }
   2885         break;
   2886     case INDEX_op_shr_i32:
   2887         if (const_args[2]) {
   2888             /* Limit immediate shift count lest we create an illegal insn.  */
   2889             tcg_out_shri32(s, args[0], args[1], args[2] & 31);
   2890         } else {
   2891             tcg_out32(s, SRW | SAB(args[1], args[0], args[2]));
   2892         }
   2893         break;
   2894     case INDEX_op_sar_i32:
   2895         if (const_args[2]) {
   2896             tcg_out_sari32(s, args[0], args[1], args[2]);
   2897         } else {
   2898             tcg_out32(s, SRAW | SAB(args[1], args[0], args[2]));
   2899         }
   2900         break;
   2901     case INDEX_op_rotl_i32:
   2902         if (const_args[2]) {
   2903             tcg_out_rlw(s, RLWINM, args[0], args[1], args[2], 0, 31);
   2904         } else {
   2905             tcg_out32(s, RLWNM | SAB(args[1], args[0], args[2])
   2906                          | MB(0) | ME(31));
   2907         }
   2908         break;
   2909     case INDEX_op_rotr_i32:
   2910         if (const_args[2]) {
   2911             tcg_out_rlw(s, RLWINM, args[0], args[1], 32 - args[2], 0, 31);
   2912         } else {
   2913             tcg_out32(s, SUBFIC | TAI(TCG_REG_R0, args[2], 32));
   2914             tcg_out32(s, RLWNM | SAB(args[1], args[0], TCG_REG_R0)
   2915                          | MB(0) | ME(31));
   2916         }
   2917         break;
   2918 
   2919     case INDEX_op_brcond_i32:
   2920         tcg_out_brcond(s, args[2], args[0], args[1], const_args[1],
   2921                        arg_label(args[3]), TCG_TYPE_I32);
   2922         break;
   2923     case INDEX_op_brcond_i64:
   2924         tcg_out_brcond(s, args[2], args[0], args[1], const_args[1],
   2925                        arg_label(args[3]), TCG_TYPE_I64);
   2926         break;
   2927     case INDEX_op_brcond2_i32:
   2928         tcg_out_brcond2(s, args, const_args);
   2929         break;
   2930 
   2931     case INDEX_op_neg_i32:
   2932     case INDEX_op_neg_i64:
   2933         tcg_out32(s, NEG | RT(args[0]) | RA(args[1]));
   2934         break;
   2935 
   2936     case INDEX_op_not_i32:
   2937     case INDEX_op_not_i64:
   2938         tcg_out32(s, NOR | SAB(args[1], args[0], args[1]));
   2939         break;
   2940 
   2941     case INDEX_op_add_i64:
   2942         a0 = args[0], a1 = args[1], a2 = args[2];
   2943         if (const_args[2]) {
   2944         do_addi_64:
   2945             tcg_out_mem_long(s, ADDI, ADD, a0, a1, a2);
   2946         } else {
   2947             tcg_out32(s, ADD | TAB(a0, a1, a2));
   2948         }
   2949         break;
   2950     case INDEX_op_sub_i64:
   2951         a0 = args[0], a1 = args[1], a2 = args[2];
   2952         if (const_args[1]) {
   2953             if (const_args[2]) {
   2954                 tcg_out_movi(s, TCG_TYPE_I64, a0, a1 - a2);
   2955             } else {
   2956                 tcg_out32(s, SUBFIC | TAI(a0, a2, a1));
   2957             }
   2958         } else if (const_args[2]) {
   2959             a2 = -a2;
   2960             goto do_addi_64;
   2961         } else {
   2962             tcg_out32(s, SUBF | TAB(a0, a2, a1));
   2963         }
   2964         break;
   2965 
   2966     case INDEX_op_shl_i64:
   2967         if (const_args[2]) {
   2968             /* Limit immediate shift count lest we create an illegal insn.  */
   2969             tcg_out_shli64(s, args[0], args[1], args[2] & 63);
   2970         } else {
   2971             tcg_out32(s, SLD | SAB(args[1], args[0], args[2]));
   2972         }
   2973         break;
   2974     case INDEX_op_shr_i64:
   2975         if (const_args[2]) {
   2976             /* Limit immediate shift count lest we create an illegal insn.  */
   2977             tcg_out_shri64(s, args[0], args[1], args[2] & 63);
   2978         } else {
   2979             tcg_out32(s, SRD | SAB(args[1], args[0], args[2]));
   2980         }
   2981         break;
   2982     case INDEX_op_sar_i64:
   2983         if (const_args[2]) {
   2984             tcg_out_sari64(s, args[0], args[1], args[2]);
   2985         } else {
   2986             tcg_out32(s, SRAD | SAB(args[1], args[0], args[2]));
   2987         }
   2988         break;
   2989     case INDEX_op_rotl_i64:
   2990         if (const_args[2]) {
   2991             tcg_out_rld(s, RLDICL, args[0], args[1], args[2], 0);
   2992         } else {
   2993             tcg_out32(s, RLDCL | SAB(args[1], args[0], args[2]) | MB64(0));
   2994         }
   2995         break;
   2996     case INDEX_op_rotr_i64:
   2997         if (const_args[2]) {
   2998             tcg_out_rld(s, RLDICL, args[0], args[1], 64 - args[2], 0);
   2999         } else {
   3000             tcg_out32(s, SUBFIC | TAI(TCG_REG_R0, args[2], 64));
   3001             tcg_out32(s, RLDCL | SAB(args[1], args[0], TCG_REG_R0) | MB64(0));
   3002         }
   3003         break;
   3004 
   3005     case INDEX_op_mul_i64:
   3006         a0 = args[0], a1 = args[1], a2 = args[2];
   3007         if (const_args[2]) {
   3008             tcg_out32(s, MULLI | TAI(a0, a1, a2));
   3009         } else {
   3010             tcg_out32(s, MULLD | TAB(a0, a1, a2));
   3011         }
   3012         break;
   3013     case INDEX_op_div_i64:
   3014         tcg_out32(s, DIVD | TAB(args[0], args[1], args[2]));
   3015         break;
   3016     case INDEX_op_divu_i64:
   3017         tcg_out32(s, DIVDU | TAB(args[0], args[1], args[2]));
   3018         break;
   3019     case INDEX_op_rem_i64:
   3020         tcg_out32(s, MODSD | TAB(args[0], args[1], args[2]));
   3021         break;
   3022     case INDEX_op_remu_i64:
   3023         tcg_out32(s, MODUD | TAB(args[0], args[1], args[2]));
   3024         break;
   3025 
   3026     case INDEX_op_qemu_ld_i32:
   3027         tcg_out_qemu_ld(s, args, false);
   3028         break;
   3029     case INDEX_op_qemu_ld_i64:
   3030         tcg_out_qemu_ld(s, args, true);
   3031         break;
   3032     case INDEX_op_qemu_st_i32:
   3033         tcg_out_qemu_st(s, args, false);
   3034         break;
   3035     case INDEX_op_qemu_st_i64:
   3036         tcg_out_qemu_st(s, args, true);
   3037         break;
   3038 
   3039     case INDEX_op_ext8s_i32:
   3040     case INDEX_op_ext8s_i64:
   3041         tcg_out_ext8s(s, args[0], args[1]);
   3042         break;
   3043     case INDEX_op_ext16s_i32:
   3044     case INDEX_op_ext16s_i64:
   3045         tcg_out_ext16s(s, args[0], args[1]);
   3046         break;
   3047     case INDEX_op_ext_i32_i64:
   3048     case INDEX_op_ext32s_i64:
   3049         tcg_out_ext32s(s, args[0], args[1]);
   3050         break;
   3051     case INDEX_op_extu_i32_i64:
   3052         tcg_out_ext32u(s, args[0], args[1]);
   3053         break;
   3054 
   3055     case INDEX_op_setcond_i32:
   3056         tcg_out_setcond(s, TCG_TYPE_I32, args[3], args[0], args[1], args[2],
   3057                         const_args[2]);
   3058         break;
   3059     case INDEX_op_setcond_i64:
   3060         tcg_out_setcond(s, TCG_TYPE_I64, args[3], args[0], args[1], args[2],
   3061                         const_args[2]);
   3062         break;
   3063     case INDEX_op_setcond2_i32:
   3064         tcg_out_setcond2(s, args, const_args);
   3065         break;
   3066 
   3067     case INDEX_op_bswap16_i32:
   3068     case INDEX_op_bswap16_i64:
   3069         tcg_out_bswap16(s, args[0], args[1], args[2]);
   3070         break;
   3071     case INDEX_op_bswap32_i32:
   3072         tcg_out_bswap32(s, args[0], args[1], 0);
   3073         break;
   3074     case INDEX_op_bswap32_i64:
   3075         tcg_out_bswap32(s, args[0], args[1], args[2]);
   3076         break;
   3077     case INDEX_op_bswap64_i64:
   3078         tcg_out_bswap64(s, args[0], args[1]);
   3079         break;
   3080 
   3081     case INDEX_op_deposit_i32:
   3082         if (const_args[2]) {
   3083             uint32_t mask = ((2u << (args[4] - 1)) - 1) << args[3];
   3084             tcg_out_andi32(s, args[0], args[0], ~mask);
   3085         } else {
   3086             tcg_out_rlw(s, RLWIMI, args[0], args[2], args[3],
   3087                         32 - args[3] - args[4], 31 - args[3]);
   3088         }
   3089         break;
   3090     case INDEX_op_deposit_i64:
   3091         if (const_args[2]) {
   3092             uint64_t mask = ((2ull << (args[4] - 1)) - 1) << args[3];
   3093             tcg_out_andi64(s, args[0], args[0], ~mask);
   3094         } else {
   3095             tcg_out_rld(s, RLDIMI, args[0], args[2], args[3],
   3096                         64 - args[3] - args[4]);
   3097         }
   3098         break;
   3099 
   3100     case INDEX_op_extract_i32:
   3101         tcg_out_rlw(s, RLWINM, args[0], args[1],
   3102                     32 - args[2], 32 - args[3], 31);
   3103         break;
   3104     case INDEX_op_extract_i64:
   3105         tcg_out_rld(s, RLDICL, args[0], args[1], 64 - args[2], 64 - args[3]);
   3106         break;
   3107 
   3108     case INDEX_op_movcond_i32:
   3109         tcg_out_movcond(s, TCG_TYPE_I32, args[5], args[0], args[1], args[2],
   3110                         args[3], args[4], const_args[2]);
   3111         break;
   3112     case INDEX_op_movcond_i64:
   3113         tcg_out_movcond(s, TCG_TYPE_I64, args[5], args[0], args[1], args[2],
   3114                         args[3], args[4], const_args[2]);
   3115         break;
   3116 
   3117 #if TCG_TARGET_REG_BITS == 64
   3118     case INDEX_op_add2_i64:
   3119 #else
   3120     case INDEX_op_add2_i32:
   3121 #endif
   3122         /* Note that the CA bit is defined based on the word size of the
   3123            environment.  So in 64-bit mode it's always carry-out of bit 63.
   3124            The fallback code using deposit works just as well for 32-bit.  */
   3125         a0 = args[0], a1 = args[1];
   3126         if (a0 == args[3] || (!const_args[5] && a0 == args[5])) {
   3127             a0 = TCG_REG_R0;
   3128         }
   3129         if (const_args[4]) {
   3130             tcg_out32(s, ADDIC | TAI(a0, args[2], args[4]));
   3131         } else {
   3132             tcg_out32(s, ADDC | TAB(a0, args[2], args[4]));
   3133         }
   3134         if (const_args[5]) {
   3135             tcg_out32(s, (args[5] ? ADDME : ADDZE) | RT(a1) | RA(args[3]));
   3136         } else {
   3137             tcg_out32(s, ADDE | TAB(a1, args[3], args[5]));
   3138         }
   3139         if (a0 != args[0]) {
   3140             tcg_out_mov(s, TCG_TYPE_REG, args[0], a0);
   3141         }
   3142         break;
   3143 
   3144 #if TCG_TARGET_REG_BITS == 64
   3145     case INDEX_op_sub2_i64:
   3146 #else
   3147     case INDEX_op_sub2_i32:
   3148 #endif
   3149         a0 = args[0], a1 = args[1];
   3150         if (a0 == args[5] || (!const_args[3] && a0 == args[3])) {
   3151             a0 = TCG_REG_R0;
   3152         }
   3153         if (const_args[2]) {
   3154             tcg_out32(s, SUBFIC | TAI(a0, args[4], args[2]));
   3155         } else {
   3156             tcg_out32(s, SUBFC | TAB(a0, args[4], args[2]));
   3157         }
   3158         if (const_args[3]) {
   3159             tcg_out32(s, (args[3] ? SUBFME : SUBFZE) | RT(a1) | RA(args[5]));
   3160         } else {
   3161             tcg_out32(s, SUBFE | TAB(a1, args[5], args[3]));
   3162         }
   3163         if (a0 != args[0]) {
   3164             tcg_out_mov(s, TCG_TYPE_REG, args[0], a0);
   3165         }
   3166         break;
   3167 
   3168     case INDEX_op_muluh_i32:
   3169         tcg_out32(s, MULHWU | TAB(args[0], args[1], args[2]));
   3170         break;
   3171     case INDEX_op_mulsh_i32:
   3172         tcg_out32(s, MULHW | TAB(args[0], args[1], args[2]));
   3173         break;
   3174     case INDEX_op_muluh_i64:
   3175         tcg_out32(s, MULHDU | TAB(args[0], args[1], args[2]));
   3176         break;
   3177     case INDEX_op_mulsh_i64:
   3178         tcg_out32(s, MULHD | TAB(args[0], args[1], args[2]));
   3179         break;
   3180 
   3181     case INDEX_op_mb:
   3182         tcg_out_mb(s, args[0]);
   3183         break;
   3184 
   3185     case INDEX_op_mov_i32:   /* Always emitted via tcg_out_mov.  */
   3186     case INDEX_op_mov_i64:
   3187     case INDEX_op_call:      /* Always emitted via tcg_out_call.  */
   3188     default:
   3189         tcg_abort();
   3190     }
   3191 }
   3192 
   3193 int tcg_can_emit_vec_op(TCGOpcode opc, TCGType type, unsigned vece)
   3194 {
   3195     switch (opc) {
   3196     case INDEX_op_and_vec:
   3197     case INDEX_op_or_vec:
   3198     case INDEX_op_xor_vec:
   3199     case INDEX_op_andc_vec:
   3200     case INDEX_op_not_vec:
   3201     case INDEX_op_nor_vec:
   3202     case INDEX_op_eqv_vec:
   3203     case INDEX_op_nand_vec:
   3204         return 1;
   3205     case INDEX_op_orc_vec:
   3206         return have_isa_2_07;
   3207     case INDEX_op_add_vec:
   3208     case INDEX_op_sub_vec:
   3209     case INDEX_op_smax_vec:
   3210     case INDEX_op_smin_vec:
   3211     case INDEX_op_umax_vec:
   3212     case INDEX_op_umin_vec:
   3213     case INDEX_op_shlv_vec:
   3214     case INDEX_op_shrv_vec:
   3215     case INDEX_op_sarv_vec:
   3216     case INDEX_op_rotlv_vec:
   3217         return vece <= MO_32 || have_isa_2_07;
   3218     case INDEX_op_ssadd_vec:
   3219     case INDEX_op_sssub_vec:
   3220     case INDEX_op_usadd_vec:
   3221     case INDEX_op_ussub_vec:
   3222         return vece <= MO_32;
   3223     case INDEX_op_cmp_vec:
   3224     case INDEX_op_shli_vec:
   3225     case INDEX_op_shri_vec:
   3226     case INDEX_op_sari_vec:
   3227     case INDEX_op_rotli_vec:
   3228         return vece <= MO_32 || have_isa_2_07 ? -1 : 0;
   3229     case INDEX_op_neg_vec:
   3230         return vece >= MO_32 && have_isa_3_00;
   3231     case INDEX_op_mul_vec:
   3232         switch (vece) {
   3233         case MO_8:
   3234         case MO_16:
   3235             return -1;
   3236         case MO_32:
   3237             return have_isa_2_07 ? 1 : -1;
   3238         case MO_64:
   3239             return have_isa_3_10;
   3240         }
   3241         return 0;
   3242     case INDEX_op_bitsel_vec:
   3243         return have_vsx;
   3244     case INDEX_op_rotrv_vec:
   3245         return -1;
   3246     default:
   3247         return 0;
   3248     }
   3249 }
   3250 
   3251 static bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece,
   3252                             TCGReg dst, TCGReg src)
   3253 {
   3254     tcg_debug_assert(dst >= TCG_REG_V0);
   3255 
   3256     /* Splat from integer reg allowed via constraints for v3.00.  */
   3257     if (src < TCG_REG_V0) {
   3258         tcg_debug_assert(have_isa_3_00);
   3259         switch (vece) {
   3260         case MO_64:
   3261             tcg_out32(s, MTVSRDD | VRT(dst) | RA(src) | RB(src));
   3262             return true;
   3263         case MO_32:
   3264             tcg_out32(s, MTVSRWS | VRT(dst) | RA(src));
   3265             return true;
   3266         default:
   3267             /* Fail, so that we fall back on either dupm or mov+dup.  */
   3268             return false;
   3269         }
   3270     }
   3271 
   3272     /*
   3273      * Recall we use (or emulate) VSX integer loads, so the integer is
   3274      * right justified within the left (zero-index) double-word.
   3275      */
   3276     switch (vece) {
   3277     case MO_8:
   3278         tcg_out32(s, VSPLTB | VRT(dst) | VRB(src) | (7 << 16));
   3279         break;
   3280     case MO_16:
   3281         tcg_out32(s, VSPLTH | VRT(dst) | VRB(src) | (3 << 16));
   3282         break;
   3283     case MO_32:
   3284         tcg_out32(s, VSPLTW | VRT(dst) | VRB(src) | (1 << 16));
   3285         break;
   3286     case MO_64:
   3287         if (have_vsx) {
   3288             tcg_out32(s, XXPERMDI | VRT(dst) | VRA(src) | VRB(src));
   3289             break;
   3290         }
   3291         tcg_out_vsldoi(s, TCG_VEC_TMP1, src, src, 8);
   3292         tcg_out_vsldoi(s, dst, TCG_VEC_TMP1, src, 8);
   3293         break;
   3294     default:
   3295         g_assert_not_reached();
   3296     }
   3297     return true;
   3298 }
   3299 
   3300 static bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece,
   3301                              TCGReg out, TCGReg base, intptr_t offset)
   3302 {
   3303     int elt;
   3304 
   3305     tcg_debug_assert(out >= TCG_REG_V0);
   3306     switch (vece) {
   3307     case MO_8:
   3308         if (have_isa_3_00) {
   3309             tcg_out_mem_long(s, LXV, LVX, out, base, offset & -16);
   3310         } else {
   3311             tcg_out_mem_long(s, 0, LVEBX, out, base, offset);
   3312         }
   3313         elt = extract32(offset, 0, 4);
   3314 #if !HOST_BIG_ENDIAN
   3315         elt ^= 15;
   3316 #endif
   3317         tcg_out32(s, VSPLTB | VRT(out) | VRB(out) | (elt << 16));
   3318         break;
   3319     case MO_16:
   3320         tcg_debug_assert((offset & 1) == 0);
   3321         if (have_isa_3_00) {
   3322             tcg_out_mem_long(s, LXV | 8, LVX, out, base, offset & -16);
   3323         } else {
   3324             tcg_out_mem_long(s, 0, LVEHX, out, base, offset);
   3325         }
   3326         elt = extract32(offset, 1, 3);
   3327 #if !HOST_BIG_ENDIAN
   3328         elt ^= 7;
   3329 #endif
   3330         tcg_out32(s, VSPLTH | VRT(out) | VRB(out) | (elt << 16));
   3331         break;
   3332     case MO_32:
   3333         if (have_isa_3_00) {
   3334             tcg_out_mem_long(s, 0, LXVWSX, out, base, offset);
   3335             break;
   3336         }
   3337         tcg_debug_assert((offset & 3) == 0);
   3338         tcg_out_mem_long(s, 0, LVEWX, out, base, offset);
   3339         elt = extract32(offset, 2, 2);
   3340 #if !HOST_BIG_ENDIAN
   3341         elt ^= 3;
   3342 #endif
   3343         tcg_out32(s, VSPLTW | VRT(out) | VRB(out) | (elt << 16));
   3344         break;
   3345     case MO_64:
   3346         if (have_vsx) {
   3347             tcg_out_mem_long(s, 0, LXVDSX, out, base, offset);
   3348             break;
   3349         }
   3350         tcg_debug_assert((offset & 7) == 0);
   3351         tcg_out_mem_long(s, 0, LVX, out, base, offset & -16);
   3352         tcg_out_vsldoi(s, TCG_VEC_TMP1, out, out, 8);
   3353         elt = extract32(offset, 3, 1);
   3354 #if !HOST_BIG_ENDIAN
   3355         elt = !elt;
   3356 #endif
   3357         if (elt) {
   3358             tcg_out_vsldoi(s, out, out, TCG_VEC_TMP1, 8);
   3359         } else {
   3360             tcg_out_vsldoi(s, out, TCG_VEC_TMP1, out, 8);
   3361         }
   3362         break;
   3363     default:
   3364         g_assert_not_reached();
   3365     }
   3366     return true;
   3367 }
   3368 
   3369 static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
   3370                            unsigned vecl, unsigned vece,
   3371                            const TCGArg args[TCG_MAX_OP_ARGS],
   3372                            const int const_args[TCG_MAX_OP_ARGS])
   3373 {
   3374     static const uint32_t
   3375         add_op[4] = { VADDUBM, VADDUHM, VADDUWM, VADDUDM },
   3376         sub_op[4] = { VSUBUBM, VSUBUHM, VSUBUWM, VSUBUDM },
   3377         mul_op[4] = { 0, 0, VMULUWM, VMULLD },
   3378         neg_op[4] = { 0, 0, VNEGW, VNEGD },
   3379         eq_op[4]  = { VCMPEQUB, VCMPEQUH, VCMPEQUW, VCMPEQUD },
   3380         ne_op[4]  = { VCMPNEB, VCMPNEH, VCMPNEW, 0 },
   3381         gts_op[4] = { VCMPGTSB, VCMPGTSH, VCMPGTSW, VCMPGTSD },
   3382         gtu_op[4] = { VCMPGTUB, VCMPGTUH, VCMPGTUW, VCMPGTUD },
   3383         ssadd_op[4] = { VADDSBS, VADDSHS, VADDSWS, 0 },
   3384         usadd_op[4] = { VADDUBS, VADDUHS, VADDUWS, 0 },
   3385         sssub_op[4] = { VSUBSBS, VSUBSHS, VSUBSWS, 0 },
   3386         ussub_op[4] = { VSUBUBS, VSUBUHS, VSUBUWS, 0 },
   3387         umin_op[4] = { VMINUB, VMINUH, VMINUW, VMINUD },
   3388         smin_op[4] = { VMINSB, VMINSH, VMINSW, VMINSD },
   3389         umax_op[4] = { VMAXUB, VMAXUH, VMAXUW, VMAXUD },
   3390         smax_op[4] = { VMAXSB, VMAXSH, VMAXSW, VMAXSD },
   3391         shlv_op[4] = { VSLB, VSLH, VSLW, VSLD },
   3392         shrv_op[4] = { VSRB, VSRH, VSRW, VSRD },
   3393         sarv_op[4] = { VSRAB, VSRAH, VSRAW, VSRAD },
   3394         mrgh_op[4] = { VMRGHB, VMRGHH, VMRGHW, 0 },
   3395         mrgl_op[4] = { VMRGLB, VMRGLH, VMRGLW, 0 },
   3396         muleu_op[4] = { VMULEUB, VMULEUH, VMULEUW, 0 },
   3397         mulou_op[4] = { VMULOUB, VMULOUH, VMULOUW, 0 },
   3398         pkum_op[4] = { VPKUHUM, VPKUWUM, 0, 0 },
   3399         rotl_op[4] = { VRLB, VRLH, VRLW, VRLD };
   3400 
   3401     TCGType type = vecl + TCG_TYPE_V64;
   3402     TCGArg a0 = args[0], a1 = args[1], a2 = args[2];
   3403     uint32_t insn;
   3404 
   3405     switch (opc) {
   3406     case INDEX_op_ld_vec:
   3407         tcg_out_ld(s, type, a0, a1, a2);
   3408         return;
   3409     case INDEX_op_st_vec:
   3410         tcg_out_st(s, type, a0, a1, a2);
   3411         return;
   3412     case INDEX_op_dupm_vec:
   3413         tcg_out_dupm_vec(s, type, vece, a0, a1, a2);
   3414         return;
   3415 
   3416     case INDEX_op_add_vec:
   3417         insn = add_op[vece];
   3418         break;
   3419     case INDEX_op_sub_vec:
   3420         insn = sub_op[vece];
   3421         break;
   3422     case INDEX_op_neg_vec:
   3423         insn = neg_op[vece];
   3424         a2 = a1;
   3425         a1 = 0;
   3426         break;
   3427     case INDEX_op_mul_vec:
   3428         insn = mul_op[vece];
   3429         break;
   3430     case INDEX_op_ssadd_vec:
   3431         insn = ssadd_op[vece];
   3432         break;
   3433     case INDEX_op_sssub_vec:
   3434         insn = sssub_op[vece];
   3435         break;
   3436     case INDEX_op_usadd_vec:
   3437         insn = usadd_op[vece];
   3438         break;
   3439     case INDEX_op_ussub_vec:
   3440         insn = ussub_op[vece];
   3441         break;
   3442     case INDEX_op_smin_vec:
   3443         insn = smin_op[vece];
   3444         break;
   3445     case INDEX_op_umin_vec:
   3446         insn = umin_op[vece];
   3447         break;
   3448     case INDEX_op_smax_vec:
   3449         insn = smax_op[vece];
   3450         break;
   3451     case INDEX_op_umax_vec:
   3452         insn = umax_op[vece];
   3453         break;
   3454     case INDEX_op_shlv_vec:
   3455         insn = shlv_op[vece];
   3456         break;
   3457     case INDEX_op_shrv_vec:
   3458         insn = shrv_op[vece];
   3459         break;
   3460     case INDEX_op_sarv_vec:
   3461         insn = sarv_op[vece];
   3462         break;
   3463     case INDEX_op_and_vec:
   3464         insn = VAND;
   3465         break;
   3466     case INDEX_op_or_vec:
   3467         insn = VOR;
   3468         break;
   3469     case INDEX_op_xor_vec:
   3470         insn = VXOR;
   3471         break;
   3472     case INDEX_op_andc_vec:
   3473         insn = VANDC;
   3474         break;
   3475     case INDEX_op_not_vec:
   3476         insn = VNOR;
   3477         a2 = a1;
   3478         break;
   3479     case INDEX_op_orc_vec:
   3480         insn = VORC;
   3481         break;
   3482     case INDEX_op_nand_vec:
   3483         insn = VNAND;
   3484         break;
   3485     case INDEX_op_nor_vec:
   3486         insn = VNOR;
   3487         break;
   3488     case INDEX_op_eqv_vec:
   3489         insn = VEQV;
   3490         break;
   3491 
   3492     case INDEX_op_cmp_vec:
   3493         switch (args[3]) {
   3494         case TCG_COND_EQ:
   3495             insn = eq_op[vece];
   3496             break;
   3497         case TCG_COND_NE:
   3498             insn = ne_op[vece];
   3499             break;
   3500         case TCG_COND_GT:
   3501             insn = gts_op[vece];
   3502             break;
   3503         case TCG_COND_GTU:
   3504             insn = gtu_op[vece];
   3505             break;
   3506         default:
   3507             g_assert_not_reached();
   3508         }
   3509         break;
   3510 
   3511     case INDEX_op_bitsel_vec:
   3512         tcg_out32(s, XXSEL | VRT(a0) | VRC(a1) | VRB(a2) | VRA(args[3]));
   3513         return;
   3514 
   3515     case INDEX_op_dup2_vec:
   3516         assert(TCG_TARGET_REG_BITS == 32);
   3517         /* With inputs a1 = xLxx, a2 = xHxx  */
   3518         tcg_out32(s, VMRGHW | VRT(a0) | VRA(a2) | VRB(a1));  /* a0  = xxHL */
   3519         tcg_out_vsldoi(s, TCG_VEC_TMP1, a0, a0, 8);          /* tmp = HLxx */
   3520         tcg_out_vsldoi(s, a0, a0, TCG_VEC_TMP1, 8);          /* a0  = HLHL */
   3521         return;
   3522 
   3523     case INDEX_op_ppc_mrgh_vec:
   3524         insn = mrgh_op[vece];
   3525         break;
   3526     case INDEX_op_ppc_mrgl_vec:
   3527         insn = mrgl_op[vece];
   3528         break;
   3529     case INDEX_op_ppc_muleu_vec:
   3530         insn = muleu_op[vece];
   3531         break;
   3532     case INDEX_op_ppc_mulou_vec:
   3533         insn = mulou_op[vece];
   3534         break;
   3535     case INDEX_op_ppc_pkum_vec:
   3536         insn = pkum_op[vece];
   3537         break;
   3538     case INDEX_op_rotlv_vec:
   3539         insn = rotl_op[vece];
   3540         break;
   3541     case INDEX_op_ppc_msum_vec:
   3542         tcg_debug_assert(vece == MO_16);
   3543         tcg_out32(s, VMSUMUHM | VRT(a0) | VRA(a1) | VRB(a2) | VRC(args[3]));
   3544         return;
   3545 
   3546     case INDEX_op_mov_vec:  /* Always emitted via tcg_out_mov.  */
   3547     case INDEX_op_dup_vec:  /* Always emitted via tcg_out_dup_vec.  */
   3548     default:
   3549         g_assert_not_reached();
   3550     }
   3551 
   3552     tcg_debug_assert(insn != 0);
   3553     tcg_out32(s, insn | VRT(a0) | VRA(a1) | VRB(a2));
   3554 }
   3555 
   3556 static void expand_vec_shi(TCGType type, unsigned vece, TCGv_vec v0,
   3557                            TCGv_vec v1, TCGArg imm, TCGOpcode opci)
   3558 {
   3559     TCGv_vec t1;
   3560 
   3561     if (vece == MO_32) {
   3562         /*
   3563          * Only 5 bits are significant, and VSPLTISB can represent -16..15.
   3564          * So using negative numbers gets us the 4th bit easily.
   3565          */
   3566         imm = sextract32(imm, 0, 5);
   3567     } else {
   3568         imm &= (8 << vece) - 1;
   3569     }
   3570 
   3571     /* Splat w/bytes for xxspltib when 2.07 allows MO_64. */
   3572     t1 = tcg_constant_vec(type, MO_8, imm);
   3573     vec_gen_3(opci, type, vece, tcgv_vec_arg(v0),
   3574               tcgv_vec_arg(v1), tcgv_vec_arg(t1));
   3575 }
   3576 
   3577 static void expand_vec_cmp(TCGType type, unsigned vece, TCGv_vec v0,
   3578                            TCGv_vec v1, TCGv_vec v2, TCGCond cond)
   3579 {
   3580     bool need_swap = false, need_inv = false;
   3581 
   3582     tcg_debug_assert(vece <= MO_32 || have_isa_2_07);
   3583 
   3584     switch (cond) {
   3585     case TCG_COND_EQ:
   3586     case TCG_COND_GT:
   3587     case TCG_COND_GTU:
   3588         break;
   3589     case TCG_COND_NE:
   3590         if (have_isa_3_00 && vece <= MO_32) {
   3591             break;
   3592         }
   3593         /* fall through */
   3594     case TCG_COND_LE:
   3595     case TCG_COND_LEU:
   3596         need_inv = true;
   3597         break;
   3598     case TCG_COND_LT:
   3599     case TCG_COND_LTU:
   3600         need_swap = true;
   3601         break;
   3602     case TCG_COND_GE:
   3603     case TCG_COND_GEU:
   3604         need_swap = need_inv = true;
   3605         break;
   3606     default:
   3607         g_assert_not_reached();
   3608     }
   3609 
   3610     if (need_inv) {
   3611         cond = tcg_invert_cond(cond);
   3612     }
   3613     if (need_swap) {
   3614         TCGv_vec t1;
   3615         t1 = v1, v1 = v2, v2 = t1;
   3616         cond = tcg_swap_cond(cond);
   3617     }
   3618 
   3619     vec_gen_4(INDEX_op_cmp_vec, type, vece, tcgv_vec_arg(v0),
   3620               tcgv_vec_arg(v1), tcgv_vec_arg(v2), cond);
   3621 
   3622     if (need_inv) {
   3623         tcg_gen_not_vec(vece, v0, v0);
   3624     }
   3625 }
   3626 
   3627 static void expand_vec_mul(TCGType type, unsigned vece, TCGv_vec v0,
   3628                            TCGv_vec v1, TCGv_vec v2)
   3629 {
   3630     TCGv_vec t1 = tcg_temp_new_vec(type);
   3631     TCGv_vec t2 = tcg_temp_new_vec(type);
   3632     TCGv_vec c0, c16;
   3633 
   3634     switch (vece) {
   3635     case MO_8:
   3636     case MO_16:
   3637         vec_gen_3(INDEX_op_ppc_muleu_vec, type, vece, tcgv_vec_arg(t1),
   3638                   tcgv_vec_arg(v1), tcgv_vec_arg(v2));
   3639         vec_gen_3(INDEX_op_ppc_mulou_vec, type, vece, tcgv_vec_arg(t2),
   3640                   tcgv_vec_arg(v1), tcgv_vec_arg(v2));
   3641         vec_gen_3(INDEX_op_ppc_mrgh_vec, type, vece + 1, tcgv_vec_arg(v0),
   3642                   tcgv_vec_arg(t1), tcgv_vec_arg(t2));
   3643         vec_gen_3(INDEX_op_ppc_mrgl_vec, type, vece + 1, tcgv_vec_arg(t1),
   3644                   tcgv_vec_arg(t1), tcgv_vec_arg(t2));
   3645         vec_gen_3(INDEX_op_ppc_pkum_vec, type, vece, tcgv_vec_arg(v0),
   3646                   tcgv_vec_arg(v0), tcgv_vec_arg(t1));
   3647 	break;
   3648 
   3649     case MO_32:
   3650         tcg_debug_assert(!have_isa_2_07);
   3651         /*
   3652          * Only 5 bits are significant, and VSPLTISB can represent -16..15.
   3653          * So using -16 is a quick way to represent 16.
   3654          */
   3655         c16 = tcg_constant_vec(type, MO_8, -16);
   3656         c0 = tcg_constant_vec(type, MO_8, 0);
   3657 
   3658         vec_gen_3(INDEX_op_rotlv_vec, type, MO_32, tcgv_vec_arg(t1),
   3659                   tcgv_vec_arg(v2), tcgv_vec_arg(c16));
   3660         vec_gen_3(INDEX_op_ppc_mulou_vec, type, MO_16, tcgv_vec_arg(t2),
   3661                   tcgv_vec_arg(v1), tcgv_vec_arg(v2));
   3662         vec_gen_4(INDEX_op_ppc_msum_vec, type, MO_16, tcgv_vec_arg(t1),
   3663                   tcgv_vec_arg(v1), tcgv_vec_arg(t1), tcgv_vec_arg(c0));
   3664         vec_gen_3(INDEX_op_shlv_vec, type, MO_32, tcgv_vec_arg(t1),
   3665                   tcgv_vec_arg(t1), tcgv_vec_arg(c16));
   3666         tcg_gen_add_vec(MO_32, v0, t1, t2);
   3667         break;
   3668 
   3669     default:
   3670         g_assert_not_reached();
   3671     }
   3672     tcg_temp_free_vec(t1);
   3673     tcg_temp_free_vec(t2);
   3674 }
   3675 
   3676 void tcg_expand_vec_op(TCGOpcode opc, TCGType type, unsigned vece,
   3677                        TCGArg a0, ...)
   3678 {
   3679     va_list va;
   3680     TCGv_vec v0, v1, v2, t0;
   3681     TCGArg a2;
   3682 
   3683     va_start(va, a0);
   3684     v0 = temp_tcgv_vec(arg_temp(a0));
   3685     v1 = temp_tcgv_vec(arg_temp(va_arg(va, TCGArg)));
   3686     a2 = va_arg(va, TCGArg);
   3687 
   3688     switch (opc) {
   3689     case INDEX_op_shli_vec:
   3690         expand_vec_shi(type, vece, v0, v1, a2, INDEX_op_shlv_vec);
   3691         break;
   3692     case INDEX_op_shri_vec:
   3693         expand_vec_shi(type, vece, v0, v1, a2, INDEX_op_shrv_vec);
   3694         break;
   3695     case INDEX_op_sari_vec:
   3696         expand_vec_shi(type, vece, v0, v1, a2, INDEX_op_sarv_vec);
   3697         break;
   3698     case INDEX_op_rotli_vec:
   3699         expand_vec_shi(type, vece, v0, v1, a2, INDEX_op_rotlv_vec);
   3700         break;
   3701     case INDEX_op_cmp_vec:
   3702         v2 = temp_tcgv_vec(arg_temp(a2));
   3703         expand_vec_cmp(type, vece, v0, v1, v2, va_arg(va, TCGArg));
   3704         break;
   3705     case INDEX_op_mul_vec:
   3706         v2 = temp_tcgv_vec(arg_temp(a2));
   3707         expand_vec_mul(type, vece, v0, v1, v2);
   3708         break;
   3709     case INDEX_op_rotlv_vec:
   3710         v2 = temp_tcgv_vec(arg_temp(a2));
   3711         t0 = tcg_temp_new_vec(type);
   3712         tcg_gen_neg_vec(vece, t0, v2);
   3713         tcg_gen_rotlv_vec(vece, v0, v1, t0);
   3714         tcg_temp_free_vec(t0);
   3715         break;
   3716     default:
   3717         g_assert_not_reached();
   3718     }
   3719     va_end(va);
   3720 }
   3721 
   3722 static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
   3723 {
   3724     switch (op) {
   3725     case INDEX_op_goto_ptr:
   3726         return C_O0_I1(r);
   3727 
   3728     case INDEX_op_ld8u_i32:
   3729     case INDEX_op_ld8s_i32:
   3730     case INDEX_op_ld16u_i32:
   3731     case INDEX_op_ld16s_i32:
   3732     case INDEX_op_ld_i32:
   3733     case INDEX_op_ctpop_i32:
   3734     case INDEX_op_neg_i32:
   3735     case INDEX_op_not_i32:
   3736     case INDEX_op_ext8s_i32:
   3737     case INDEX_op_ext16s_i32:
   3738     case INDEX_op_bswap16_i32:
   3739     case INDEX_op_bswap32_i32:
   3740     case INDEX_op_extract_i32:
   3741     case INDEX_op_ld8u_i64:
   3742     case INDEX_op_ld8s_i64:
   3743     case INDEX_op_ld16u_i64:
   3744     case INDEX_op_ld16s_i64:
   3745     case INDEX_op_ld32u_i64:
   3746     case INDEX_op_ld32s_i64:
   3747     case INDEX_op_ld_i64:
   3748     case INDEX_op_ctpop_i64:
   3749     case INDEX_op_neg_i64:
   3750     case INDEX_op_not_i64:
   3751     case INDEX_op_ext8s_i64:
   3752     case INDEX_op_ext16s_i64:
   3753     case INDEX_op_ext32s_i64:
   3754     case INDEX_op_ext_i32_i64:
   3755     case INDEX_op_extu_i32_i64:
   3756     case INDEX_op_bswap16_i64:
   3757     case INDEX_op_bswap32_i64:
   3758     case INDEX_op_bswap64_i64:
   3759     case INDEX_op_extract_i64:
   3760         return C_O1_I1(r, r);
   3761 
   3762     case INDEX_op_st8_i32:
   3763     case INDEX_op_st16_i32:
   3764     case INDEX_op_st_i32:
   3765     case INDEX_op_st8_i64:
   3766     case INDEX_op_st16_i64:
   3767     case INDEX_op_st32_i64:
   3768     case INDEX_op_st_i64:
   3769         return C_O0_I2(r, r);
   3770 
   3771     case INDEX_op_add_i32:
   3772     case INDEX_op_and_i32:
   3773     case INDEX_op_or_i32:
   3774     case INDEX_op_xor_i32:
   3775     case INDEX_op_andc_i32:
   3776     case INDEX_op_orc_i32:
   3777     case INDEX_op_eqv_i32:
   3778     case INDEX_op_shl_i32:
   3779     case INDEX_op_shr_i32:
   3780     case INDEX_op_sar_i32:
   3781     case INDEX_op_rotl_i32:
   3782     case INDEX_op_rotr_i32:
   3783     case INDEX_op_setcond_i32:
   3784     case INDEX_op_and_i64:
   3785     case INDEX_op_andc_i64:
   3786     case INDEX_op_shl_i64:
   3787     case INDEX_op_shr_i64:
   3788     case INDEX_op_sar_i64:
   3789     case INDEX_op_rotl_i64:
   3790     case INDEX_op_rotr_i64:
   3791     case INDEX_op_setcond_i64:
   3792         return C_O1_I2(r, r, ri);
   3793 
   3794     case INDEX_op_mul_i32:
   3795     case INDEX_op_mul_i64:
   3796         return C_O1_I2(r, r, rI);
   3797 
   3798     case INDEX_op_div_i32:
   3799     case INDEX_op_divu_i32:
   3800     case INDEX_op_rem_i32:
   3801     case INDEX_op_remu_i32:
   3802     case INDEX_op_nand_i32:
   3803     case INDEX_op_nor_i32:
   3804     case INDEX_op_muluh_i32:
   3805     case INDEX_op_mulsh_i32:
   3806     case INDEX_op_orc_i64:
   3807     case INDEX_op_eqv_i64:
   3808     case INDEX_op_nand_i64:
   3809     case INDEX_op_nor_i64:
   3810     case INDEX_op_div_i64:
   3811     case INDEX_op_divu_i64:
   3812     case INDEX_op_rem_i64:
   3813     case INDEX_op_remu_i64:
   3814     case INDEX_op_mulsh_i64:
   3815     case INDEX_op_muluh_i64:
   3816         return C_O1_I2(r, r, r);
   3817 
   3818     case INDEX_op_sub_i32:
   3819         return C_O1_I2(r, rI, ri);
   3820     case INDEX_op_add_i64:
   3821         return C_O1_I2(r, r, rT);
   3822     case INDEX_op_or_i64:
   3823     case INDEX_op_xor_i64:
   3824         return C_O1_I2(r, r, rU);
   3825     case INDEX_op_sub_i64:
   3826         return C_O1_I2(r, rI, rT);
   3827     case INDEX_op_clz_i32:
   3828     case INDEX_op_ctz_i32:
   3829     case INDEX_op_clz_i64:
   3830     case INDEX_op_ctz_i64:
   3831         return C_O1_I2(r, r, rZW);
   3832 
   3833     case INDEX_op_brcond_i32:
   3834     case INDEX_op_brcond_i64:
   3835         return C_O0_I2(r, ri);
   3836 
   3837     case INDEX_op_movcond_i32:
   3838     case INDEX_op_movcond_i64:
   3839         return C_O1_I4(r, r, ri, rZ, rZ);
   3840     case INDEX_op_deposit_i32:
   3841     case INDEX_op_deposit_i64:
   3842         return C_O1_I2(r, 0, rZ);
   3843     case INDEX_op_brcond2_i32:
   3844         return C_O0_I4(r, r, ri, ri);
   3845     case INDEX_op_setcond2_i32:
   3846         return C_O1_I4(r, r, r, ri, ri);
   3847     case INDEX_op_add2_i64:
   3848     case INDEX_op_add2_i32:
   3849         return C_O2_I4(r, r, r, r, rI, rZM);
   3850     case INDEX_op_sub2_i64:
   3851     case INDEX_op_sub2_i32:
   3852         return C_O2_I4(r, r, rI, rZM, r, r);
   3853 
   3854     case INDEX_op_qemu_ld_i32:
   3855         return (TCG_TARGET_REG_BITS == 64 || TARGET_LONG_BITS == 32
   3856                 ? C_O1_I1(r, L)
   3857                 : C_O1_I2(r, L, L));
   3858 
   3859     case INDEX_op_qemu_st_i32:
   3860         return (TCG_TARGET_REG_BITS == 64 || TARGET_LONG_BITS == 32
   3861                 ? C_O0_I2(S, S)
   3862                 : C_O0_I3(S, S, S));
   3863 
   3864     case INDEX_op_qemu_ld_i64:
   3865         return (TCG_TARGET_REG_BITS == 64 ? C_O1_I1(r, L)
   3866                 : TARGET_LONG_BITS == 32 ? C_O2_I1(L, L, L)
   3867                 : C_O2_I2(L, L, L, L));
   3868 
   3869     case INDEX_op_qemu_st_i64:
   3870         return (TCG_TARGET_REG_BITS == 64 ? C_O0_I2(S, S)
   3871                 : TARGET_LONG_BITS == 32 ? C_O0_I3(S, S, S)
   3872                 : C_O0_I4(S, S, S, S));
   3873 
   3874     case INDEX_op_add_vec:
   3875     case INDEX_op_sub_vec:
   3876     case INDEX_op_mul_vec:
   3877     case INDEX_op_and_vec:
   3878     case INDEX_op_or_vec:
   3879     case INDEX_op_xor_vec:
   3880     case INDEX_op_andc_vec:
   3881     case INDEX_op_orc_vec:
   3882     case INDEX_op_nor_vec:
   3883     case INDEX_op_eqv_vec:
   3884     case INDEX_op_nand_vec:
   3885     case INDEX_op_cmp_vec:
   3886     case INDEX_op_ssadd_vec:
   3887     case INDEX_op_sssub_vec:
   3888     case INDEX_op_usadd_vec:
   3889     case INDEX_op_ussub_vec:
   3890     case INDEX_op_smax_vec:
   3891     case INDEX_op_smin_vec:
   3892     case INDEX_op_umax_vec:
   3893     case INDEX_op_umin_vec:
   3894     case INDEX_op_shlv_vec:
   3895     case INDEX_op_shrv_vec:
   3896     case INDEX_op_sarv_vec:
   3897     case INDEX_op_rotlv_vec:
   3898     case INDEX_op_rotrv_vec:
   3899     case INDEX_op_ppc_mrgh_vec:
   3900     case INDEX_op_ppc_mrgl_vec:
   3901     case INDEX_op_ppc_muleu_vec:
   3902     case INDEX_op_ppc_mulou_vec:
   3903     case INDEX_op_ppc_pkum_vec:
   3904     case INDEX_op_dup2_vec:
   3905         return C_O1_I2(v, v, v);
   3906 
   3907     case INDEX_op_not_vec:
   3908     case INDEX_op_neg_vec:
   3909         return C_O1_I1(v, v);
   3910 
   3911     case INDEX_op_dup_vec:
   3912         return have_isa_3_00 ? C_O1_I1(v, vr) : C_O1_I1(v, v);
   3913 
   3914     case INDEX_op_ld_vec:
   3915     case INDEX_op_dupm_vec:
   3916         return C_O1_I1(v, r);
   3917 
   3918     case INDEX_op_st_vec:
   3919         return C_O0_I2(v, r);
   3920 
   3921     case INDEX_op_bitsel_vec:
   3922     case INDEX_op_ppc_msum_vec:
   3923         return C_O1_I3(v, v, v, v);
   3924 
   3925     default:
   3926         g_assert_not_reached();
   3927     }
   3928 }
   3929 
   3930 static void tcg_target_init(TCGContext *s)
   3931 {
   3932     unsigned long hwcap = qemu_getauxval(AT_HWCAP);
   3933     unsigned long hwcap2 = qemu_getauxval(AT_HWCAP2);
   3934 
   3935     have_isa = tcg_isa_base;
   3936     if (hwcap & PPC_FEATURE_ARCH_2_06) {
   3937         have_isa = tcg_isa_2_06;
   3938     }
   3939 #ifdef PPC_FEATURE2_ARCH_2_07
   3940     if (hwcap2 & PPC_FEATURE2_ARCH_2_07) {
   3941         have_isa = tcg_isa_2_07;
   3942     }
   3943 #endif
   3944 #ifdef PPC_FEATURE2_ARCH_3_00
   3945     if (hwcap2 & PPC_FEATURE2_ARCH_3_00) {
   3946         have_isa = tcg_isa_3_00;
   3947     }
   3948 #endif
   3949 #ifdef PPC_FEATURE2_ARCH_3_10
   3950     if (hwcap2 & PPC_FEATURE2_ARCH_3_10) {
   3951         have_isa = tcg_isa_3_10;
   3952     }
   3953 #endif
   3954 
   3955 #ifdef PPC_FEATURE2_HAS_ISEL
   3956     /* Prefer explicit instruction from the kernel. */
   3957     have_isel = (hwcap2 & PPC_FEATURE2_HAS_ISEL) != 0;
   3958 #else
   3959     /* Fall back to knowing Power7 (2.06) has ISEL. */
   3960     have_isel = have_isa_2_06;
   3961 #endif
   3962 
   3963     if (hwcap & PPC_FEATURE_HAS_ALTIVEC) {
   3964         have_altivec = true;
   3965         /* We only care about the portion of VSX that overlaps Altivec. */
   3966         if (hwcap & PPC_FEATURE_HAS_VSX) {
   3967             have_vsx = true;
   3968         }
   3969     }
   3970 
   3971     tcg_target_available_regs[TCG_TYPE_I32] = 0xffffffff;
   3972     tcg_target_available_regs[TCG_TYPE_I64] = 0xffffffff;
   3973     if (have_altivec) {
   3974         tcg_target_available_regs[TCG_TYPE_V64] = 0xffffffff00000000ull;
   3975         tcg_target_available_regs[TCG_TYPE_V128] = 0xffffffff00000000ull;
   3976     }
   3977 
   3978     tcg_target_call_clobber_regs = 0;
   3979     tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R0);
   3980     tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R2);
   3981     tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R3);
   3982     tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R4);
   3983     tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R5);
   3984     tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R6);
   3985     tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R7);
   3986     tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R8);
   3987     tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R9);
   3988     tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R10);
   3989     tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R11);
   3990     tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R12);
   3991 
   3992     tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V0);
   3993     tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V1);
   3994     tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V2);
   3995     tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V3);
   3996     tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V4);
   3997     tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V5);
   3998     tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V6);
   3999     tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V7);
   4000     tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V8);
   4001     tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V9);
   4002     tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V10);
   4003     tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V11);
   4004     tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V12);
   4005     tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V13);
   4006     tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V14);
   4007     tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V15);
   4008     tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V16);
   4009     tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V17);
   4010     tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V18);
   4011     tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V19);
   4012 
   4013     s->reserved_regs = 0;
   4014     tcg_regset_set_reg(s->reserved_regs, TCG_REG_R0); /* tcg temp */
   4015     tcg_regset_set_reg(s->reserved_regs, TCG_REG_R1); /* stack pointer */
   4016 #if defined(_CALL_SYSV)
   4017     tcg_regset_set_reg(s->reserved_regs, TCG_REG_R2); /* toc pointer */
   4018 #endif
   4019 #if defined(_CALL_SYSV) || TCG_TARGET_REG_BITS == 64
   4020     tcg_regset_set_reg(s->reserved_regs, TCG_REG_R13); /* thread pointer */
   4021 #endif
   4022     tcg_regset_set_reg(s->reserved_regs, TCG_REG_TMP1); /* mem temp */
   4023     tcg_regset_set_reg(s->reserved_regs, TCG_VEC_TMP1);
   4024     tcg_regset_set_reg(s->reserved_regs, TCG_VEC_TMP2);
   4025     if (USE_REG_TB) {
   4026         tcg_regset_set_reg(s->reserved_regs, TCG_REG_TB);  /* tb->tc_ptr */
   4027     }
   4028 }
   4029 
   4030 #ifdef __ELF__
   4031 typedef struct {
   4032     DebugFrameCIE cie;
   4033     DebugFrameFDEHeader fde;
   4034     uint8_t fde_def_cfa[4];
   4035     uint8_t fde_reg_ofs[ARRAY_SIZE(tcg_target_callee_save_regs) * 2 + 3];
   4036 } DebugFrame;
   4037 
   4038 /* We're expecting a 2 byte uleb128 encoded value.  */
   4039 QEMU_BUILD_BUG_ON(FRAME_SIZE >= (1 << 14));
   4040 
   4041 #if TCG_TARGET_REG_BITS == 64
   4042 # define ELF_HOST_MACHINE EM_PPC64
   4043 #else
   4044 # define ELF_HOST_MACHINE EM_PPC
   4045 #endif
   4046 
   4047 static DebugFrame debug_frame = {
   4048     .cie.len = sizeof(DebugFrameCIE)-4, /* length after .len member */
   4049     .cie.id = -1,
   4050     .cie.version = 1,
   4051     .cie.code_align = 1,
   4052     .cie.data_align = (-SZR & 0x7f),         /* sleb128 -SZR */
   4053     .cie.return_column = 65,
   4054 
   4055     /* Total FDE size does not include the "len" member.  */
   4056     .fde.len = sizeof(DebugFrame) - offsetof(DebugFrame, fde.cie_offset),
   4057 
   4058     .fde_def_cfa = {
   4059         12, TCG_REG_R1,                 /* DW_CFA_def_cfa r1, ... */
   4060         (FRAME_SIZE & 0x7f) | 0x80,     /* ... uleb128 FRAME_SIZE */
   4061         (FRAME_SIZE >> 7)
   4062     },
   4063     .fde_reg_ofs = {
   4064         /* DW_CFA_offset_extended_sf, lr, LR_OFFSET */
   4065         0x11, 65, (LR_OFFSET / -SZR) & 0x7f,
   4066     }
   4067 };
   4068 
   4069 void tcg_register_jit(const void *buf, size_t buf_size)
   4070 {
   4071     uint8_t *p = &debug_frame.fde_reg_ofs[3];
   4072     int i;
   4073 
   4074     for (i = 0; i < ARRAY_SIZE(tcg_target_callee_save_regs); ++i, p += 2) {
   4075         p[0] = 0x80 + tcg_target_callee_save_regs[i];
   4076         p[1] = (FRAME_SIZE - (REG_SAVE_BOT + i * SZR)) / SZR;
   4077     }
   4078 
   4079     debug_frame.fde.func_start = (uintptr_t)buf;
   4080     debug_frame.fde.func_len = buf_size;
   4081 
   4082     tcg_register_jit_int(buf, buf_size, &debug_frame, sizeof(debug_frame));
   4083 }
   4084 #endif /* __ELF__ */
   4085 #undef VMULEUB
   4086 #undef VMULEUH
   4087 #undef VMULEUW
   4088 #undef VMULOUB
   4089 #undef VMULOUH
   4090 #undef VMULOUW
   4091 #undef VMSUMUHM