qemu

FORK: QEMU emulator
git clone https://git.neptards.moe/neptards/qemu.git
Log | Files | Refs | Submodules | LICENSE

optimize.c (58828B)


      1 /*
      2  * Optimizations for Tiny Code Generator for QEMU
      3  *
      4  * Copyright (c) 2010 Samsung Electronics.
      5  * Contributed by Kirill Batuzov <batuzovk@ispras.ru>
      6  *
      7  * Permission is hereby granted, free of charge, to any person obtaining a copy
      8  * of this software and associated documentation files (the "Software"), to deal
      9  * in the Software without restriction, including without limitation the rights
     10  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
     11  * copies of the Software, and to permit persons to whom the Software is
     12  * furnished to do so, subject to the following conditions:
     13  *
     14  * The above copyright notice and this permission notice shall be included in
     15  * all copies or substantial portions of the Software.
     16  *
     17  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
     18  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
     19  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
     20  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
     21  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
     22  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
     23  * THE SOFTWARE.
     24  */
     25 
     26 #include "qemu/osdep.h"
     27 #include "qemu/int128.h"
     28 #include "tcg/tcg-op.h"
     29 #include "tcg-internal.h"
     30 
     31 #define CASE_OP_32_64(x)                        \
     32         glue(glue(case INDEX_op_, x), _i32):    \
     33         glue(glue(case INDEX_op_, x), _i64)
     34 
     35 #define CASE_OP_32_64_VEC(x)                    \
     36         glue(glue(case INDEX_op_, x), _i32):    \
     37         glue(glue(case INDEX_op_, x), _i64):    \
     38         glue(glue(case INDEX_op_, x), _vec)
     39 
     40 typedef struct TempOptInfo {
     41     bool is_const;
     42     TCGTemp *prev_copy;
     43     TCGTemp *next_copy;
     44     uint64_t val;
     45     uint64_t z_mask;  /* mask bit is 0 if and only if value bit is 0 */
     46     uint64_t s_mask;  /* a left-aligned mask of clrsb(value) bits. */
     47 } TempOptInfo;
     48 
     49 typedef struct OptContext {
     50     TCGContext *tcg;
     51     TCGOp *prev_mb;
     52     TCGTempSet temps_used;
     53 
     54     /* In flight values from optimization. */
     55     uint64_t a_mask;  /* mask bit is 0 iff value identical to first input */
     56     uint64_t z_mask;  /* mask bit is 0 iff value bit is 0 */
     57     uint64_t s_mask;  /* mask of clrsb(value) bits */
     58     TCGType type;
     59 } OptContext;
     60 
     61 /* Calculate the smask for a specific value. */
     62 static uint64_t smask_from_value(uint64_t value)
     63 {
     64     int rep = clrsb64(value);
     65     return ~(~0ull >> rep);
     66 }
     67 
     68 /*
     69  * Calculate the smask for a given set of known-zeros.
     70  * If there are lots of zeros on the left, we can consider the remainder
     71  * an unsigned field, and thus the corresponding signed field is one bit
     72  * larger.
     73  */
     74 static uint64_t smask_from_zmask(uint64_t zmask)
     75 {
     76     /*
     77      * Only the 0 bits are significant for zmask, thus the msb itself
     78      * must be zero, else we have no sign information.
     79      */
     80     int rep = clz64(zmask);
     81     if (rep == 0) {
     82         return 0;
     83     }
     84     rep -= 1;
     85     return ~(~0ull >> rep);
     86 }
     87 
     88 /*
     89  * Recreate a properly left-aligned smask after manipulation.
     90  * Some bit-shuffling, particularly shifts and rotates, may
     91  * retain sign bits on the left, but may scatter disconnected
     92  * sign bits on the right.  Retain only what remains to the left.
     93  */
     94 static uint64_t smask_from_smask(int64_t smask)
     95 {
     96     /* Only the 1 bits are significant for smask */
     97     return smask_from_zmask(~smask);
     98 }
     99 
    100 static inline TempOptInfo *ts_info(TCGTemp *ts)
    101 {
    102     return ts->state_ptr;
    103 }
    104 
    105 static inline TempOptInfo *arg_info(TCGArg arg)
    106 {
    107     return ts_info(arg_temp(arg));
    108 }
    109 
    110 static inline bool ts_is_const(TCGTemp *ts)
    111 {
    112     return ts_info(ts)->is_const;
    113 }
    114 
    115 static inline bool arg_is_const(TCGArg arg)
    116 {
    117     return ts_is_const(arg_temp(arg));
    118 }
    119 
    120 static inline bool ts_is_copy(TCGTemp *ts)
    121 {
    122     return ts_info(ts)->next_copy != ts;
    123 }
    124 
    125 /* Reset TEMP's state, possibly removing the temp for the list of copies.  */
    126 static void reset_ts(TCGTemp *ts)
    127 {
    128     TempOptInfo *ti = ts_info(ts);
    129     TempOptInfo *pi = ts_info(ti->prev_copy);
    130     TempOptInfo *ni = ts_info(ti->next_copy);
    131 
    132     ni->prev_copy = ti->prev_copy;
    133     pi->next_copy = ti->next_copy;
    134     ti->next_copy = ts;
    135     ti->prev_copy = ts;
    136     ti->is_const = false;
    137     ti->z_mask = -1;
    138     ti->s_mask = 0;
    139 }
    140 
    141 static void reset_temp(TCGArg arg)
    142 {
    143     reset_ts(arg_temp(arg));
    144 }
    145 
    146 /* Initialize and activate a temporary.  */
    147 static void init_ts_info(OptContext *ctx, TCGTemp *ts)
    148 {
    149     size_t idx = temp_idx(ts);
    150     TempOptInfo *ti;
    151 
    152     if (test_bit(idx, ctx->temps_used.l)) {
    153         return;
    154     }
    155     set_bit(idx, ctx->temps_used.l);
    156 
    157     ti = ts->state_ptr;
    158     if (ti == NULL) {
    159         ti = tcg_malloc(sizeof(TempOptInfo));
    160         ts->state_ptr = ti;
    161     }
    162 
    163     ti->next_copy = ts;
    164     ti->prev_copy = ts;
    165     if (ts->kind == TEMP_CONST) {
    166         ti->is_const = true;
    167         ti->val = ts->val;
    168         ti->z_mask = ts->val;
    169         ti->s_mask = smask_from_value(ts->val);
    170     } else {
    171         ti->is_const = false;
    172         ti->z_mask = -1;
    173         ti->s_mask = 0;
    174     }
    175 }
    176 
    177 static TCGTemp *find_better_copy(TCGContext *s, TCGTemp *ts)
    178 {
    179     TCGTemp *i, *g, *l;
    180 
    181     /* If this is already readonly, we can't do better. */
    182     if (temp_readonly(ts)) {
    183         return ts;
    184     }
    185 
    186     g = l = NULL;
    187     for (i = ts_info(ts)->next_copy; i != ts; i = ts_info(i)->next_copy) {
    188         if (temp_readonly(i)) {
    189             return i;
    190         } else if (i->kind > ts->kind) {
    191             if (i->kind == TEMP_GLOBAL) {
    192                 g = i;
    193             } else if (i->kind == TEMP_LOCAL) {
    194                 l = i;
    195             }
    196         }
    197     }
    198 
    199     /* If we didn't find a better representation, return the same temp. */
    200     return g ? g : l ? l : ts;
    201 }
    202 
    203 static bool ts_are_copies(TCGTemp *ts1, TCGTemp *ts2)
    204 {
    205     TCGTemp *i;
    206 
    207     if (ts1 == ts2) {
    208         return true;
    209     }
    210 
    211     if (!ts_is_copy(ts1) || !ts_is_copy(ts2)) {
    212         return false;
    213     }
    214 
    215     for (i = ts_info(ts1)->next_copy; i != ts1; i = ts_info(i)->next_copy) {
    216         if (i == ts2) {
    217             return true;
    218         }
    219     }
    220 
    221     return false;
    222 }
    223 
    224 static bool args_are_copies(TCGArg arg1, TCGArg arg2)
    225 {
    226     return ts_are_copies(arg_temp(arg1), arg_temp(arg2));
    227 }
    228 
    229 static bool tcg_opt_gen_mov(OptContext *ctx, TCGOp *op, TCGArg dst, TCGArg src)
    230 {
    231     TCGTemp *dst_ts = arg_temp(dst);
    232     TCGTemp *src_ts = arg_temp(src);
    233     TempOptInfo *di;
    234     TempOptInfo *si;
    235     TCGOpcode new_op;
    236 
    237     if (ts_are_copies(dst_ts, src_ts)) {
    238         tcg_op_remove(ctx->tcg, op);
    239         return true;
    240     }
    241 
    242     reset_ts(dst_ts);
    243     di = ts_info(dst_ts);
    244     si = ts_info(src_ts);
    245 
    246     switch (ctx->type) {
    247     case TCG_TYPE_I32:
    248         new_op = INDEX_op_mov_i32;
    249         break;
    250     case TCG_TYPE_I64:
    251         new_op = INDEX_op_mov_i64;
    252         break;
    253     case TCG_TYPE_V64:
    254     case TCG_TYPE_V128:
    255     case TCG_TYPE_V256:
    256         /* TCGOP_VECL and TCGOP_VECE remain unchanged.  */
    257         new_op = INDEX_op_mov_vec;
    258         break;
    259     default:
    260         g_assert_not_reached();
    261     }
    262     op->opc = new_op;
    263     op->args[0] = dst;
    264     op->args[1] = src;
    265 
    266     di->z_mask = si->z_mask;
    267     di->s_mask = si->s_mask;
    268 
    269     if (src_ts->type == dst_ts->type) {
    270         TempOptInfo *ni = ts_info(si->next_copy);
    271 
    272         di->next_copy = si->next_copy;
    273         di->prev_copy = src_ts;
    274         ni->prev_copy = dst_ts;
    275         si->next_copy = dst_ts;
    276         di->is_const = si->is_const;
    277         di->val = si->val;
    278     }
    279     return true;
    280 }
    281 
    282 static bool tcg_opt_gen_movi(OptContext *ctx, TCGOp *op,
    283                              TCGArg dst, uint64_t val)
    284 {
    285     TCGTemp *tv;
    286 
    287     if (ctx->type == TCG_TYPE_I32) {
    288         val = (int32_t)val;
    289     }
    290 
    291     /* Convert movi to mov with constant temp. */
    292     tv = tcg_constant_internal(ctx->type, val);
    293     init_ts_info(ctx, tv);
    294     return tcg_opt_gen_mov(ctx, op, dst, temp_arg(tv));
    295 }
    296 
    297 static uint64_t do_constant_folding_2(TCGOpcode op, uint64_t x, uint64_t y)
    298 {
    299     uint64_t l64, h64;
    300 
    301     switch (op) {
    302     CASE_OP_32_64(add):
    303         return x + y;
    304 
    305     CASE_OP_32_64(sub):
    306         return x - y;
    307 
    308     CASE_OP_32_64(mul):
    309         return x * y;
    310 
    311     CASE_OP_32_64_VEC(and):
    312         return x & y;
    313 
    314     CASE_OP_32_64_VEC(or):
    315         return x | y;
    316 
    317     CASE_OP_32_64_VEC(xor):
    318         return x ^ y;
    319 
    320     case INDEX_op_shl_i32:
    321         return (uint32_t)x << (y & 31);
    322 
    323     case INDEX_op_shl_i64:
    324         return (uint64_t)x << (y & 63);
    325 
    326     case INDEX_op_shr_i32:
    327         return (uint32_t)x >> (y & 31);
    328 
    329     case INDEX_op_shr_i64:
    330         return (uint64_t)x >> (y & 63);
    331 
    332     case INDEX_op_sar_i32:
    333         return (int32_t)x >> (y & 31);
    334 
    335     case INDEX_op_sar_i64:
    336         return (int64_t)x >> (y & 63);
    337 
    338     case INDEX_op_rotr_i32:
    339         return ror32(x, y & 31);
    340 
    341     case INDEX_op_rotr_i64:
    342         return ror64(x, y & 63);
    343 
    344     case INDEX_op_rotl_i32:
    345         return rol32(x, y & 31);
    346 
    347     case INDEX_op_rotl_i64:
    348         return rol64(x, y & 63);
    349 
    350     CASE_OP_32_64_VEC(not):
    351         return ~x;
    352 
    353     CASE_OP_32_64(neg):
    354         return -x;
    355 
    356     CASE_OP_32_64_VEC(andc):
    357         return x & ~y;
    358 
    359     CASE_OP_32_64_VEC(orc):
    360         return x | ~y;
    361 
    362     CASE_OP_32_64_VEC(eqv):
    363         return ~(x ^ y);
    364 
    365     CASE_OP_32_64_VEC(nand):
    366         return ~(x & y);
    367 
    368     CASE_OP_32_64_VEC(nor):
    369         return ~(x | y);
    370 
    371     case INDEX_op_clz_i32:
    372         return (uint32_t)x ? clz32(x) : y;
    373 
    374     case INDEX_op_clz_i64:
    375         return x ? clz64(x) : y;
    376 
    377     case INDEX_op_ctz_i32:
    378         return (uint32_t)x ? ctz32(x) : y;
    379 
    380     case INDEX_op_ctz_i64:
    381         return x ? ctz64(x) : y;
    382 
    383     case INDEX_op_ctpop_i32:
    384         return ctpop32(x);
    385 
    386     case INDEX_op_ctpop_i64:
    387         return ctpop64(x);
    388 
    389     CASE_OP_32_64(ext8s):
    390         return (int8_t)x;
    391 
    392     CASE_OP_32_64(ext16s):
    393         return (int16_t)x;
    394 
    395     CASE_OP_32_64(ext8u):
    396         return (uint8_t)x;
    397 
    398     CASE_OP_32_64(ext16u):
    399         return (uint16_t)x;
    400 
    401     CASE_OP_32_64(bswap16):
    402         x = bswap16(x);
    403         return y & TCG_BSWAP_OS ? (int16_t)x : x;
    404 
    405     CASE_OP_32_64(bswap32):
    406         x = bswap32(x);
    407         return y & TCG_BSWAP_OS ? (int32_t)x : x;
    408 
    409     case INDEX_op_bswap64_i64:
    410         return bswap64(x);
    411 
    412     case INDEX_op_ext_i32_i64:
    413     case INDEX_op_ext32s_i64:
    414         return (int32_t)x;
    415 
    416     case INDEX_op_extu_i32_i64:
    417     case INDEX_op_extrl_i64_i32:
    418     case INDEX_op_ext32u_i64:
    419         return (uint32_t)x;
    420 
    421     case INDEX_op_extrh_i64_i32:
    422         return (uint64_t)x >> 32;
    423 
    424     case INDEX_op_muluh_i32:
    425         return ((uint64_t)(uint32_t)x * (uint32_t)y) >> 32;
    426     case INDEX_op_mulsh_i32:
    427         return ((int64_t)(int32_t)x * (int32_t)y) >> 32;
    428 
    429     case INDEX_op_muluh_i64:
    430         mulu64(&l64, &h64, x, y);
    431         return h64;
    432     case INDEX_op_mulsh_i64:
    433         muls64(&l64, &h64, x, y);
    434         return h64;
    435 
    436     case INDEX_op_div_i32:
    437         /* Avoid crashing on divide by zero, otherwise undefined.  */
    438         return (int32_t)x / ((int32_t)y ? : 1);
    439     case INDEX_op_divu_i32:
    440         return (uint32_t)x / ((uint32_t)y ? : 1);
    441     case INDEX_op_div_i64:
    442         return (int64_t)x / ((int64_t)y ? : 1);
    443     case INDEX_op_divu_i64:
    444         return (uint64_t)x / ((uint64_t)y ? : 1);
    445 
    446     case INDEX_op_rem_i32:
    447         return (int32_t)x % ((int32_t)y ? : 1);
    448     case INDEX_op_remu_i32:
    449         return (uint32_t)x % ((uint32_t)y ? : 1);
    450     case INDEX_op_rem_i64:
    451         return (int64_t)x % ((int64_t)y ? : 1);
    452     case INDEX_op_remu_i64:
    453         return (uint64_t)x % ((uint64_t)y ? : 1);
    454 
    455     default:
    456         fprintf(stderr,
    457                 "Unrecognized operation %d in do_constant_folding.\n", op);
    458         tcg_abort();
    459     }
    460 }
    461 
    462 static uint64_t do_constant_folding(TCGOpcode op, TCGType type,
    463                                     uint64_t x, uint64_t y)
    464 {
    465     uint64_t res = do_constant_folding_2(op, x, y);
    466     if (type == TCG_TYPE_I32) {
    467         res = (int32_t)res;
    468     }
    469     return res;
    470 }
    471 
    472 static bool do_constant_folding_cond_32(uint32_t x, uint32_t y, TCGCond c)
    473 {
    474     switch (c) {
    475     case TCG_COND_EQ:
    476         return x == y;
    477     case TCG_COND_NE:
    478         return x != y;
    479     case TCG_COND_LT:
    480         return (int32_t)x < (int32_t)y;
    481     case TCG_COND_GE:
    482         return (int32_t)x >= (int32_t)y;
    483     case TCG_COND_LE:
    484         return (int32_t)x <= (int32_t)y;
    485     case TCG_COND_GT:
    486         return (int32_t)x > (int32_t)y;
    487     case TCG_COND_LTU:
    488         return x < y;
    489     case TCG_COND_GEU:
    490         return x >= y;
    491     case TCG_COND_LEU:
    492         return x <= y;
    493     case TCG_COND_GTU:
    494         return x > y;
    495     default:
    496         tcg_abort();
    497     }
    498 }
    499 
    500 static bool do_constant_folding_cond_64(uint64_t x, uint64_t y, TCGCond c)
    501 {
    502     switch (c) {
    503     case TCG_COND_EQ:
    504         return x == y;
    505     case TCG_COND_NE:
    506         return x != y;
    507     case TCG_COND_LT:
    508         return (int64_t)x < (int64_t)y;
    509     case TCG_COND_GE:
    510         return (int64_t)x >= (int64_t)y;
    511     case TCG_COND_LE:
    512         return (int64_t)x <= (int64_t)y;
    513     case TCG_COND_GT:
    514         return (int64_t)x > (int64_t)y;
    515     case TCG_COND_LTU:
    516         return x < y;
    517     case TCG_COND_GEU:
    518         return x >= y;
    519     case TCG_COND_LEU:
    520         return x <= y;
    521     case TCG_COND_GTU:
    522         return x > y;
    523     default:
    524         tcg_abort();
    525     }
    526 }
    527 
    528 static bool do_constant_folding_cond_eq(TCGCond c)
    529 {
    530     switch (c) {
    531     case TCG_COND_GT:
    532     case TCG_COND_LTU:
    533     case TCG_COND_LT:
    534     case TCG_COND_GTU:
    535     case TCG_COND_NE:
    536         return 0;
    537     case TCG_COND_GE:
    538     case TCG_COND_GEU:
    539     case TCG_COND_LE:
    540     case TCG_COND_LEU:
    541     case TCG_COND_EQ:
    542         return 1;
    543     default:
    544         tcg_abort();
    545     }
    546 }
    547 
    548 /*
    549  * Return -1 if the condition can't be simplified,
    550  * and the result of the condition (0 or 1) if it can.
    551  */
    552 static int do_constant_folding_cond(TCGType type, TCGArg x,
    553                                     TCGArg y, TCGCond c)
    554 {
    555     if (arg_is_const(x) && arg_is_const(y)) {
    556         uint64_t xv = arg_info(x)->val;
    557         uint64_t yv = arg_info(y)->val;
    558 
    559         switch (type) {
    560         case TCG_TYPE_I32:
    561             return do_constant_folding_cond_32(xv, yv, c);
    562         case TCG_TYPE_I64:
    563             return do_constant_folding_cond_64(xv, yv, c);
    564         default:
    565             /* Only scalar comparisons are optimizable */
    566             return -1;
    567         }
    568     } else if (args_are_copies(x, y)) {
    569         return do_constant_folding_cond_eq(c);
    570     } else if (arg_is_const(y) && arg_info(y)->val == 0) {
    571         switch (c) {
    572         case TCG_COND_LTU:
    573             return 0;
    574         case TCG_COND_GEU:
    575             return 1;
    576         default:
    577             return -1;
    578         }
    579     }
    580     return -1;
    581 }
    582 
    583 /*
    584  * Return -1 if the condition can't be simplified,
    585  * and the result of the condition (0 or 1) if it can.
    586  */
    587 static int do_constant_folding_cond2(TCGArg *p1, TCGArg *p2, TCGCond c)
    588 {
    589     TCGArg al = p1[0], ah = p1[1];
    590     TCGArg bl = p2[0], bh = p2[1];
    591 
    592     if (arg_is_const(bl) && arg_is_const(bh)) {
    593         tcg_target_ulong blv = arg_info(bl)->val;
    594         tcg_target_ulong bhv = arg_info(bh)->val;
    595         uint64_t b = deposit64(blv, 32, 32, bhv);
    596 
    597         if (arg_is_const(al) && arg_is_const(ah)) {
    598             tcg_target_ulong alv = arg_info(al)->val;
    599             tcg_target_ulong ahv = arg_info(ah)->val;
    600             uint64_t a = deposit64(alv, 32, 32, ahv);
    601             return do_constant_folding_cond_64(a, b, c);
    602         }
    603         if (b == 0) {
    604             switch (c) {
    605             case TCG_COND_LTU:
    606                 return 0;
    607             case TCG_COND_GEU:
    608                 return 1;
    609             default:
    610                 break;
    611             }
    612         }
    613     }
    614     if (args_are_copies(al, bl) && args_are_copies(ah, bh)) {
    615         return do_constant_folding_cond_eq(c);
    616     }
    617     return -1;
    618 }
    619 
    620 /**
    621  * swap_commutative:
    622  * @dest: TCGArg of the destination argument, or NO_DEST.
    623  * @p1: first paired argument
    624  * @p2: second paired argument
    625  *
    626  * If *@p1 is a constant and *@p2 is not, swap.
    627  * If *@p2 matches @dest, swap.
    628  * Return true if a swap was performed.
    629  */
    630 
    631 #define NO_DEST  temp_arg(NULL)
    632 
    633 static bool swap_commutative(TCGArg dest, TCGArg *p1, TCGArg *p2)
    634 {
    635     TCGArg a1 = *p1, a2 = *p2;
    636     int sum = 0;
    637     sum += arg_is_const(a1);
    638     sum -= arg_is_const(a2);
    639 
    640     /* Prefer the constant in second argument, and then the form
    641        op a, a, b, which is better handled on non-RISC hosts. */
    642     if (sum > 0 || (sum == 0 && dest == a2)) {
    643         *p1 = a2;
    644         *p2 = a1;
    645         return true;
    646     }
    647     return false;
    648 }
    649 
    650 static bool swap_commutative2(TCGArg *p1, TCGArg *p2)
    651 {
    652     int sum = 0;
    653     sum += arg_is_const(p1[0]);
    654     sum += arg_is_const(p1[1]);
    655     sum -= arg_is_const(p2[0]);
    656     sum -= arg_is_const(p2[1]);
    657     if (sum > 0) {
    658         TCGArg t;
    659         t = p1[0], p1[0] = p2[0], p2[0] = t;
    660         t = p1[1], p1[1] = p2[1], p2[1] = t;
    661         return true;
    662     }
    663     return false;
    664 }
    665 
    666 static void init_arguments(OptContext *ctx, TCGOp *op, int nb_args)
    667 {
    668     for (int i = 0; i < nb_args; i++) {
    669         TCGTemp *ts = arg_temp(op->args[i]);
    670         if (ts) {
    671             init_ts_info(ctx, ts);
    672         }
    673     }
    674 }
    675 
    676 static void copy_propagate(OptContext *ctx, TCGOp *op,
    677                            int nb_oargs, int nb_iargs)
    678 {
    679     TCGContext *s = ctx->tcg;
    680 
    681     for (int i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
    682         TCGTemp *ts = arg_temp(op->args[i]);
    683         if (ts && ts_is_copy(ts)) {
    684             op->args[i] = temp_arg(find_better_copy(s, ts));
    685         }
    686     }
    687 }
    688 
    689 static void finish_folding(OptContext *ctx, TCGOp *op)
    690 {
    691     const TCGOpDef *def = &tcg_op_defs[op->opc];
    692     int i, nb_oargs;
    693 
    694     /*
    695      * For an opcode that ends a BB, reset all temp data.
    696      * We do no cross-BB optimization.
    697      */
    698     if (def->flags & TCG_OPF_BB_END) {
    699         memset(&ctx->temps_used, 0, sizeof(ctx->temps_used));
    700         ctx->prev_mb = NULL;
    701         return;
    702     }
    703 
    704     nb_oargs = def->nb_oargs;
    705     for (i = 0; i < nb_oargs; i++) {
    706         TCGTemp *ts = arg_temp(op->args[i]);
    707         reset_ts(ts);
    708         /*
    709          * Save the corresponding known-zero/sign bits mask for the
    710          * first output argument (only one supported so far).
    711          */
    712         if (i == 0) {
    713             ts_info(ts)->z_mask = ctx->z_mask;
    714             ts_info(ts)->s_mask = ctx->s_mask;
    715         }
    716     }
    717 }
    718 
    719 /*
    720  * The fold_* functions return true when processing is complete,
    721  * usually by folding the operation to a constant or to a copy,
    722  * and calling tcg_opt_gen_{mov,movi}.  They may do other things,
    723  * like collect information about the value produced, for use in
    724  * optimizing a subsequent operation.
    725  *
    726  * These first fold_* functions are all helpers, used by other
    727  * folders for more specific operations.
    728  */
    729 
    730 static bool fold_const1(OptContext *ctx, TCGOp *op)
    731 {
    732     if (arg_is_const(op->args[1])) {
    733         uint64_t t;
    734 
    735         t = arg_info(op->args[1])->val;
    736         t = do_constant_folding(op->opc, ctx->type, t, 0);
    737         return tcg_opt_gen_movi(ctx, op, op->args[0], t);
    738     }
    739     return false;
    740 }
    741 
    742 static bool fold_const2(OptContext *ctx, TCGOp *op)
    743 {
    744     if (arg_is_const(op->args[1]) && arg_is_const(op->args[2])) {
    745         uint64_t t1 = arg_info(op->args[1])->val;
    746         uint64_t t2 = arg_info(op->args[2])->val;
    747 
    748         t1 = do_constant_folding(op->opc, ctx->type, t1, t2);
    749         return tcg_opt_gen_movi(ctx, op, op->args[0], t1);
    750     }
    751     return false;
    752 }
    753 
    754 static bool fold_commutative(OptContext *ctx, TCGOp *op)
    755 {
    756     swap_commutative(op->args[0], &op->args[1], &op->args[2]);
    757     return false;
    758 }
    759 
    760 static bool fold_const2_commutative(OptContext *ctx, TCGOp *op)
    761 {
    762     swap_commutative(op->args[0], &op->args[1], &op->args[2]);
    763     return fold_const2(ctx, op);
    764 }
    765 
    766 static bool fold_masks(OptContext *ctx, TCGOp *op)
    767 {
    768     uint64_t a_mask = ctx->a_mask;
    769     uint64_t z_mask = ctx->z_mask;
    770     uint64_t s_mask = ctx->s_mask;
    771 
    772     /*
    773      * 32-bit ops generate 32-bit results, which for the purpose of
    774      * simplifying tcg are sign-extended.  Certainly that's how we
    775      * represent our constants elsewhere.  Note that the bits will
    776      * be reset properly for a 64-bit value when encountering the
    777      * type changing opcodes.
    778      */
    779     if (ctx->type == TCG_TYPE_I32) {
    780         a_mask = (int32_t)a_mask;
    781         z_mask = (int32_t)z_mask;
    782         s_mask |= MAKE_64BIT_MASK(32, 32);
    783         ctx->z_mask = z_mask;
    784         ctx->s_mask = s_mask;
    785     }
    786 
    787     if (z_mask == 0) {
    788         return tcg_opt_gen_movi(ctx, op, op->args[0], 0);
    789     }
    790     if (a_mask == 0) {
    791         return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[1]);
    792     }
    793     return false;
    794 }
    795 
    796 /*
    797  * Convert @op to NOT, if NOT is supported by the host.
    798  * Return true f the conversion is successful, which will still
    799  * indicate that the processing is complete.
    800  */
    801 static bool fold_not(OptContext *ctx, TCGOp *op);
    802 static bool fold_to_not(OptContext *ctx, TCGOp *op, int idx)
    803 {
    804     TCGOpcode not_op;
    805     bool have_not;
    806 
    807     switch (ctx->type) {
    808     case TCG_TYPE_I32:
    809         not_op = INDEX_op_not_i32;
    810         have_not = TCG_TARGET_HAS_not_i32;
    811         break;
    812     case TCG_TYPE_I64:
    813         not_op = INDEX_op_not_i64;
    814         have_not = TCG_TARGET_HAS_not_i64;
    815         break;
    816     case TCG_TYPE_V64:
    817     case TCG_TYPE_V128:
    818     case TCG_TYPE_V256:
    819         not_op = INDEX_op_not_vec;
    820         have_not = TCG_TARGET_HAS_not_vec;
    821         break;
    822     default:
    823         g_assert_not_reached();
    824     }
    825     if (have_not) {
    826         op->opc = not_op;
    827         op->args[1] = op->args[idx];
    828         return fold_not(ctx, op);
    829     }
    830     return false;
    831 }
    832 
    833 /* If the binary operation has first argument @i, fold to @i. */
    834 static bool fold_ix_to_i(OptContext *ctx, TCGOp *op, uint64_t i)
    835 {
    836     if (arg_is_const(op->args[1]) && arg_info(op->args[1])->val == i) {
    837         return tcg_opt_gen_movi(ctx, op, op->args[0], i);
    838     }
    839     return false;
    840 }
    841 
    842 /* If the binary operation has first argument @i, fold to NOT. */
    843 static bool fold_ix_to_not(OptContext *ctx, TCGOp *op, uint64_t i)
    844 {
    845     if (arg_is_const(op->args[1]) && arg_info(op->args[1])->val == i) {
    846         return fold_to_not(ctx, op, 2);
    847     }
    848     return false;
    849 }
    850 
    851 /* If the binary operation has second argument @i, fold to @i. */
    852 static bool fold_xi_to_i(OptContext *ctx, TCGOp *op, uint64_t i)
    853 {
    854     if (arg_is_const(op->args[2]) && arg_info(op->args[2])->val == i) {
    855         return tcg_opt_gen_movi(ctx, op, op->args[0], i);
    856     }
    857     return false;
    858 }
    859 
    860 /* If the binary operation has second argument @i, fold to identity. */
    861 static bool fold_xi_to_x(OptContext *ctx, TCGOp *op, uint64_t i)
    862 {
    863     if (arg_is_const(op->args[2]) && arg_info(op->args[2])->val == i) {
    864         return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[1]);
    865     }
    866     return false;
    867 }
    868 
    869 /* If the binary operation has second argument @i, fold to NOT. */
    870 static bool fold_xi_to_not(OptContext *ctx, TCGOp *op, uint64_t i)
    871 {
    872     if (arg_is_const(op->args[2]) && arg_info(op->args[2])->val == i) {
    873         return fold_to_not(ctx, op, 1);
    874     }
    875     return false;
    876 }
    877 
    878 /* If the binary operation has both arguments equal, fold to @i. */
    879 static bool fold_xx_to_i(OptContext *ctx, TCGOp *op, uint64_t i)
    880 {
    881     if (args_are_copies(op->args[1], op->args[2])) {
    882         return tcg_opt_gen_movi(ctx, op, op->args[0], i);
    883     }
    884     return false;
    885 }
    886 
    887 /* If the binary operation has both arguments equal, fold to identity. */
    888 static bool fold_xx_to_x(OptContext *ctx, TCGOp *op)
    889 {
    890     if (args_are_copies(op->args[1], op->args[2])) {
    891         return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[1]);
    892     }
    893     return false;
    894 }
    895 
    896 /*
    897  * These outermost fold_<op> functions are sorted alphabetically.
    898  *
    899  * The ordering of the transformations should be:
    900  *   1) those that produce a constant
    901  *   2) those that produce a copy
    902  *   3) those that produce information about the result value.
    903  */
    904 
    905 static bool fold_add(OptContext *ctx, TCGOp *op)
    906 {
    907     if (fold_const2_commutative(ctx, op) ||
    908         fold_xi_to_x(ctx, op, 0)) {
    909         return true;
    910     }
    911     return false;
    912 }
    913 
    914 /* We cannot as yet do_constant_folding with vectors. */
    915 static bool fold_add_vec(OptContext *ctx, TCGOp *op)
    916 {
    917     if (fold_commutative(ctx, op) ||
    918         fold_xi_to_x(ctx, op, 0)) {
    919         return true;
    920     }
    921     return false;
    922 }
    923 
    924 static bool fold_addsub2(OptContext *ctx, TCGOp *op, bool add)
    925 {
    926     if (arg_is_const(op->args[2]) && arg_is_const(op->args[3]) &&
    927         arg_is_const(op->args[4]) && arg_is_const(op->args[5])) {
    928         uint64_t al = arg_info(op->args[2])->val;
    929         uint64_t ah = arg_info(op->args[3])->val;
    930         uint64_t bl = arg_info(op->args[4])->val;
    931         uint64_t bh = arg_info(op->args[5])->val;
    932         TCGArg rl, rh;
    933         TCGOp *op2;
    934 
    935         if (ctx->type == TCG_TYPE_I32) {
    936             uint64_t a = deposit64(al, 32, 32, ah);
    937             uint64_t b = deposit64(bl, 32, 32, bh);
    938 
    939             if (add) {
    940                 a += b;
    941             } else {
    942                 a -= b;
    943             }
    944 
    945             al = sextract64(a, 0, 32);
    946             ah = sextract64(a, 32, 32);
    947         } else {
    948             Int128 a = int128_make128(al, ah);
    949             Int128 b = int128_make128(bl, bh);
    950 
    951             if (add) {
    952                 a = int128_add(a, b);
    953             } else {
    954                 a = int128_sub(a, b);
    955             }
    956 
    957             al = int128_getlo(a);
    958             ah = int128_gethi(a);
    959         }
    960 
    961         rl = op->args[0];
    962         rh = op->args[1];
    963 
    964         /* The proper opcode is supplied by tcg_opt_gen_mov. */
    965         op2 = tcg_op_insert_before(ctx->tcg, op, 0);
    966 
    967         tcg_opt_gen_movi(ctx, op, rl, al);
    968         tcg_opt_gen_movi(ctx, op2, rh, ah);
    969         return true;
    970     }
    971     return false;
    972 }
    973 
    974 static bool fold_add2(OptContext *ctx, TCGOp *op)
    975 {
    976     /* Note that the high and low parts may be independently swapped. */
    977     swap_commutative(op->args[0], &op->args[2], &op->args[4]);
    978     swap_commutative(op->args[1], &op->args[3], &op->args[5]);
    979 
    980     return fold_addsub2(ctx, op, true);
    981 }
    982 
    983 static bool fold_and(OptContext *ctx, TCGOp *op)
    984 {
    985     uint64_t z1, z2;
    986 
    987     if (fold_const2_commutative(ctx, op) ||
    988         fold_xi_to_i(ctx, op, 0) ||
    989         fold_xi_to_x(ctx, op, -1) ||
    990         fold_xx_to_x(ctx, op)) {
    991         return true;
    992     }
    993 
    994     z1 = arg_info(op->args[1])->z_mask;
    995     z2 = arg_info(op->args[2])->z_mask;
    996     ctx->z_mask = z1 & z2;
    997 
    998     /*
    999      * Sign repetitions are perforce all identical, whether they are 1 or 0.
   1000      * Bitwise operations preserve the relative quantity of the repetitions.
   1001      */
   1002     ctx->s_mask = arg_info(op->args[1])->s_mask
   1003                 & arg_info(op->args[2])->s_mask;
   1004 
   1005     /*
   1006      * Known-zeros does not imply known-ones.  Therefore unless
   1007      * arg2 is constant, we can't infer affected bits from it.
   1008      */
   1009     if (arg_is_const(op->args[2])) {
   1010         ctx->a_mask = z1 & ~z2;
   1011     }
   1012 
   1013     return fold_masks(ctx, op);
   1014 }
   1015 
   1016 static bool fold_andc(OptContext *ctx, TCGOp *op)
   1017 {
   1018     uint64_t z1;
   1019 
   1020     if (fold_const2(ctx, op) ||
   1021         fold_xx_to_i(ctx, op, 0) ||
   1022         fold_xi_to_x(ctx, op, 0) ||
   1023         fold_ix_to_not(ctx, op, -1)) {
   1024         return true;
   1025     }
   1026 
   1027     z1 = arg_info(op->args[1])->z_mask;
   1028 
   1029     /*
   1030      * Known-zeros does not imply known-ones.  Therefore unless
   1031      * arg2 is constant, we can't infer anything from it.
   1032      */
   1033     if (arg_is_const(op->args[2])) {
   1034         uint64_t z2 = ~arg_info(op->args[2])->z_mask;
   1035         ctx->a_mask = z1 & ~z2;
   1036         z1 &= z2;
   1037     }
   1038     ctx->z_mask = z1;
   1039 
   1040     ctx->s_mask = arg_info(op->args[1])->s_mask
   1041                 & arg_info(op->args[2])->s_mask;
   1042     return fold_masks(ctx, op);
   1043 }
   1044 
   1045 static bool fold_brcond(OptContext *ctx, TCGOp *op)
   1046 {
   1047     TCGCond cond = op->args[2];
   1048     int i;
   1049 
   1050     if (swap_commutative(NO_DEST, &op->args[0], &op->args[1])) {
   1051         op->args[2] = cond = tcg_swap_cond(cond);
   1052     }
   1053 
   1054     i = do_constant_folding_cond(ctx->type, op->args[0], op->args[1], cond);
   1055     if (i == 0) {
   1056         tcg_op_remove(ctx->tcg, op);
   1057         return true;
   1058     }
   1059     if (i > 0) {
   1060         op->opc = INDEX_op_br;
   1061         op->args[0] = op->args[3];
   1062     }
   1063     return false;
   1064 }
   1065 
   1066 static bool fold_brcond2(OptContext *ctx, TCGOp *op)
   1067 {
   1068     TCGCond cond = op->args[4];
   1069     TCGArg label = op->args[5];
   1070     int i, inv = 0;
   1071 
   1072     if (swap_commutative2(&op->args[0], &op->args[2])) {
   1073         op->args[4] = cond = tcg_swap_cond(cond);
   1074     }
   1075 
   1076     i = do_constant_folding_cond2(&op->args[0], &op->args[2], cond);
   1077     if (i >= 0) {
   1078         goto do_brcond_const;
   1079     }
   1080 
   1081     switch (cond) {
   1082     case TCG_COND_LT:
   1083     case TCG_COND_GE:
   1084         /*
   1085          * Simplify LT/GE comparisons vs zero to a single compare
   1086          * vs the high word of the input.
   1087          */
   1088         if (arg_is_const(op->args[2]) && arg_info(op->args[2])->val == 0 &&
   1089             arg_is_const(op->args[3]) && arg_info(op->args[3])->val == 0) {
   1090             goto do_brcond_high;
   1091         }
   1092         break;
   1093 
   1094     case TCG_COND_NE:
   1095         inv = 1;
   1096         QEMU_FALLTHROUGH;
   1097     case TCG_COND_EQ:
   1098         /*
   1099          * Simplify EQ/NE comparisons where one of the pairs
   1100          * can be simplified.
   1101          */
   1102         i = do_constant_folding_cond(TCG_TYPE_I32, op->args[0],
   1103                                      op->args[2], cond);
   1104         switch (i ^ inv) {
   1105         case 0:
   1106             goto do_brcond_const;
   1107         case 1:
   1108             goto do_brcond_high;
   1109         }
   1110 
   1111         i = do_constant_folding_cond(TCG_TYPE_I32, op->args[1],
   1112                                      op->args[3], cond);
   1113         switch (i ^ inv) {
   1114         case 0:
   1115             goto do_brcond_const;
   1116         case 1:
   1117             op->opc = INDEX_op_brcond_i32;
   1118             op->args[1] = op->args[2];
   1119             op->args[2] = cond;
   1120             op->args[3] = label;
   1121             break;
   1122         }
   1123         break;
   1124 
   1125     default:
   1126         break;
   1127 
   1128     do_brcond_high:
   1129         op->opc = INDEX_op_brcond_i32;
   1130         op->args[0] = op->args[1];
   1131         op->args[1] = op->args[3];
   1132         op->args[2] = cond;
   1133         op->args[3] = label;
   1134         break;
   1135 
   1136     do_brcond_const:
   1137         if (i == 0) {
   1138             tcg_op_remove(ctx->tcg, op);
   1139             return true;
   1140         }
   1141         op->opc = INDEX_op_br;
   1142         op->args[0] = label;
   1143         break;
   1144     }
   1145     return false;
   1146 }
   1147 
   1148 static bool fold_bswap(OptContext *ctx, TCGOp *op)
   1149 {
   1150     uint64_t z_mask, s_mask, sign;
   1151 
   1152     if (arg_is_const(op->args[1])) {
   1153         uint64_t t = arg_info(op->args[1])->val;
   1154 
   1155         t = do_constant_folding(op->opc, ctx->type, t, op->args[2]);
   1156         return tcg_opt_gen_movi(ctx, op, op->args[0], t);
   1157     }
   1158 
   1159     z_mask = arg_info(op->args[1])->z_mask;
   1160 
   1161     switch (op->opc) {
   1162     case INDEX_op_bswap16_i32:
   1163     case INDEX_op_bswap16_i64:
   1164         z_mask = bswap16(z_mask);
   1165         sign = INT16_MIN;
   1166         break;
   1167     case INDEX_op_bswap32_i32:
   1168     case INDEX_op_bswap32_i64:
   1169         z_mask = bswap32(z_mask);
   1170         sign = INT32_MIN;
   1171         break;
   1172     case INDEX_op_bswap64_i64:
   1173         z_mask = bswap64(z_mask);
   1174         sign = INT64_MIN;
   1175         break;
   1176     default:
   1177         g_assert_not_reached();
   1178     }
   1179     s_mask = smask_from_zmask(z_mask);
   1180 
   1181     switch (op->args[2] & (TCG_BSWAP_OZ | TCG_BSWAP_OS)) {
   1182     case TCG_BSWAP_OZ:
   1183         break;
   1184     case TCG_BSWAP_OS:
   1185         /* If the sign bit may be 1, force all the bits above to 1. */
   1186         if (z_mask & sign) {
   1187             z_mask |= sign;
   1188             s_mask = sign << 1;
   1189         }
   1190         break;
   1191     default:
   1192         /* The high bits are undefined: force all bits above the sign to 1. */
   1193         z_mask |= sign << 1;
   1194         s_mask = 0;
   1195         break;
   1196     }
   1197     ctx->z_mask = z_mask;
   1198     ctx->s_mask = s_mask;
   1199 
   1200     return fold_masks(ctx, op);
   1201 }
   1202 
   1203 static bool fold_call(OptContext *ctx, TCGOp *op)
   1204 {
   1205     TCGContext *s = ctx->tcg;
   1206     int nb_oargs = TCGOP_CALLO(op);
   1207     int nb_iargs = TCGOP_CALLI(op);
   1208     int flags, i;
   1209 
   1210     init_arguments(ctx, op, nb_oargs + nb_iargs);
   1211     copy_propagate(ctx, op, nb_oargs, nb_iargs);
   1212 
   1213     /* If the function reads or writes globals, reset temp data. */
   1214     flags = tcg_call_flags(op);
   1215     if (!(flags & (TCG_CALL_NO_READ_GLOBALS | TCG_CALL_NO_WRITE_GLOBALS))) {
   1216         int nb_globals = s->nb_globals;
   1217 
   1218         for (i = 0; i < nb_globals; i++) {
   1219             if (test_bit(i, ctx->temps_used.l)) {
   1220                 reset_ts(&ctx->tcg->temps[i]);
   1221             }
   1222         }
   1223     }
   1224 
   1225     /* Reset temp data for outputs. */
   1226     for (i = 0; i < nb_oargs; i++) {
   1227         reset_temp(op->args[i]);
   1228     }
   1229 
   1230     /* Stop optimizing MB across calls. */
   1231     ctx->prev_mb = NULL;
   1232     return true;
   1233 }
   1234 
   1235 static bool fold_count_zeros(OptContext *ctx, TCGOp *op)
   1236 {
   1237     uint64_t z_mask;
   1238 
   1239     if (arg_is_const(op->args[1])) {
   1240         uint64_t t = arg_info(op->args[1])->val;
   1241 
   1242         if (t != 0) {
   1243             t = do_constant_folding(op->opc, ctx->type, t, 0);
   1244             return tcg_opt_gen_movi(ctx, op, op->args[0], t);
   1245         }
   1246         return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[2]);
   1247     }
   1248 
   1249     switch (ctx->type) {
   1250     case TCG_TYPE_I32:
   1251         z_mask = 31;
   1252         break;
   1253     case TCG_TYPE_I64:
   1254         z_mask = 63;
   1255         break;
   1256     default:
   1257         g_assert_not_reached();
   1258     }
   1259     ctx->z_mask = arg_info(op->args[2])->z_mask | z_mask;
   1260     ctx->s_mask = smask_from_zmask(ctx->z_mask);
   1261     return false;
   1262 }
   1263 
   1264 static bool fold_ctpop(OptContext *ctx, TCGOp *op)
   1265 {
   1266     if (fold_const1(ctx, op)) {
   1267         return true;
   1268     }
   1269 
   1270     switch (ctx->type) {
   1271     case TCG_TYPE_I32:
   1272         ctx->z_mask = 32 | 31;
   1273         break;
   1274     case TCG_TYPE_I64:
   1275         ctx->z_mask = 64 | 63;
   1276         break;
   1277     default:
   1278         g_assert_not_reached();
   1279     }
   1280     ctx->s_mask = smask_from_zmask(ctx->z_mask);
   1281     return false;
   1282 }
   1283 
   1284 static bool fold_deposit(OptContext *ctx, TCGOp *op)
   1285 {
   1286     if (arg_is_const(op->args[1]) && arg_is_const(op->args[2])) {
   1287         uint64_t t1 = arg_info(op->args[1])->val;
   1288         uint64_t t2 = arg_info(op->args[2])->val;
   1289 
   1290         t1 = deposit64(t1, op->args[3], op->args[4], t2);
   1291         return tcg_opt_gen_movi(ctx, op, op->args[0], t1);
   1292     }
   1293 
   1294     ctx->z_mask = deposit64(arg_info(op->args[1])->z_mask,
   1295                             op->args[3], op->args[4],
   1296                             arg_info(op->args[2])->z_mask);
   1297     return false;
   1298 }
   1299 
   1300 static bool fold_divide(OptContext *ctx, TCGOp *op)
   1301 {
   1302     if (fold_const2(ctx, op) ||
   1303         fold_xi_to_x(ctx, op, 1)) {
   1304         return true;
   1305     }
   1306     return false;
   1307 }
   1308 
   1309 static bool fold_dup(OptContext *ctx, TCGOp *op)
   1310 {
   1311     if (arg_is_const(op->args[1])) {
   1312         uint64_t t = arg_info(op->args[1])->val;
   1313         t = dup_const(TCGOP_VECE(op), t);
   1314         return tcg_opt_gen_movi(ctx, op, op->args[0], t);
   1315     }
   1316     return false;
   1317 }
   1318 
   1319 static bool fold_dup2(OptContext *ctx, TCGOp *op)
   1320 {
   1321     if (arg_is_const(op->args[1]) && arg_is_const(op->args[2])) {
   1322         uint64_t t = deposit64(arg_info(op->args[1])->val, 32, 32,
   1323                                arg_info(op->args[2])->val);
   1324         return tcg_opt_gen_movi(ctx, op, op->args[0], t);
   1325     }
   1326 
   1327     if (args_are_copies(op->args[1], op->args[2])) {
   1328         op->opc = INDEX_op_dup_vec;
   1329         TCGOP_VECE(op) = MO_32;
   1330     }
   1331     return false;
   1332 }
   1333 
   1334 static bool fold_eqv(OptContext *ctx, TCGOp *op)
   1335 {
   1336     if (fold_const2_commutative(ctx, op) ||
   1337         fold_xi_to_x(ctx, op, -1) ||
   1338         fold_xi_to_not(ctx, op, 0)) {
   1339         return true;
   1340     }
   1341 
   1342     ctx->s_mask = arg_info(op->args[1])->s_mask
   1343                 & arg_info(op->args[2])->s_mask;
   1344     return false;
   1345 }
   1346 
   1347 static bool fold_extract(OptContext *ctx, TCGOp *op)
   1348 {
   1349     uint64_t z_mask_old, z_mask;
   1350     int pos = op->args[2];
   1351     int len = op->args[3];
   1352 
   1353     if (arg_is_const(op->args[1])) {
   1354         uint64_t t;
   1355 
   1356         t = arg_info(op->args[1])->val;
   1357         t = extract64(t, pos, len);
   1358         return tcg_opt_gen_movi(ctx, op, op->args[0], t);
   1359     }
   1360 
   1361     z_mask_old = arg_info(op->args[1])->z_mask;
   1362     z_mask = extract64(z_mask_old, pos, len);
   1363     if (pos == 0) {
   1364         ctx->a_mask = z_mask_old ^ z_mask;
   1365     }
   1366     ctx->z_mask = z_mask;
   1367     ctx->s_mask = smask_from_zmask(z_mask);
   1368 
   1369     return fold_masks(ctx, op);
   1370 }
   1371 
   1372 static bool fold_extract2(OptContext *ctx, TCGOp *op)
   1373 {
   1374     if (arg_is_const(op->args[1]) && arg_is_const(op->args[2])) {
   1375         uint64_t v1 = arg_info(op->args[1])->val;
   1376         uint64_t v2 = arg_info(op->args[2])->val;
   1377         int shr = op->args[3];
   1378 
   1379         if (op->opc == INDEX_op_extract2_i64) {
   1380             v1 >>= shr;
   1381             v2 <<= 64 - shr;
   1382         } else {
   1383             v1 = (uint32_t)v1 >> shr;
   1384             v2 = (uint64_t)((int32_t)v2 << (32 - shr));
   1385         }
   1386         return tcg_opt_gen_movi(ctx, op, op->args[0], v1 | v2);
   1387     }
   1388     return false;
   1389 }
   1390 
   1391 static bool fold_exts(OptContext *ctx, TCGOp *op)
   1392 {
   1393     uint64_t s_mask_old, s_mask, z_mask, sign;
   1394     bool type_change = false;
   1395 
   1396     if (fold_const1(ctx, op)) {
   1397         return true;
   1398     }
   1399 
   1400     z_mask = arg_info(op->args[1])->z_mask;
   1401     s_mask = arg_info(op->args[1])->s_mask;
   1402     s_mask_old = s_mask;
   1403 
   1404     switch (op->opc) {
   1405     CASE_OP_32_64(ext8s):
   1406         sign = INT8_MIN;
   1407         z_mask = (uint8_t)z_mask;
   1408         break;
   1409     CASE_OP_32_64(ext16s):
   1410         sign = INT16_MIN;
   1411         z_mask = (uint16_t)z_mask;
   1412         break;
   1413     case INDEX_op_ext_i32_i64:
   1414         type_change = true;
   1415         QEMU_FALLTHROUGH;
   1416     case INDEX_op_ext32s_i64:
   1417         sign = INT32_MIN;
   1418         z_mask = (uint32_t)z_mask;
   1419         break;
   1420     default:
   1421         g_assert_not_reached();
   1422     }
   1423 
   1424     if (z_mask & sign) {
   1425         z_mask |= sign;
   1426     }
   1427     s_mask |= sign << 1;
   1428 
   1429     ctx->z_mask = z_mask;
   1430     ctx->s_mask = s_mask;
   1431     if (!type_change) {
   1432         ctx->a_mask = s_mask & ~s_mask_old;
   1433     }
   1434 
   1435     return fold_masks(ctx, op);
   1436 }
   1437 
   1438 static bool fold_extu(OptContext *ctx, TCGOp *op)
   1439 {
   1440     uint64_t z_mask_old, z_mask;
   1441     bool type_change = false;
   1442 
   1443     if (fold_const1(ctx, op)) {
   1444         return true;
   1445     }
   1446 
   1447     z_mask_old = z_mask = arg_info(op->args[1])->z_mask;
   1448 
   1449     switch (op->opc) {
   1450     CASE_OP_32_64(ext8u):
   1451         z_mask = (uint8_t)z_mask;
   1452         break;
   1453     CASE_OP_32_64(ext16u):
   1454         z_mask = (uint16_t)z_mask;
   1455         break;
   1456     case INDEX_op_extrl_i64_i32:
   1457     case INDEX_op_extu_i32_i64:
   1458         type_change = true;
   1459         QEMU_FALLTHROUGH;
   1460     case INDEX_op_ext32u_i64:
   1461         z_mask = (uint32_t)z_mask;
   1462         break;
   1463     case INDEX_op_extrh_i64_i32:
   1464         type_change = true;
   1465         z_mask >>= 32;
   1466         break;
   1467     default:
   1468         g_assert_not_reached();
   1469     }
   1470 
   1471     ctx->z_mask = z_mask;
   1472     ctx->s_mask = smask_from_zmask(z_mask);
   1473     if (!type_change) {
   1474         ctx->a_mask = z_mask_old ^ z_mask;
   1475     }
   1476     return fold_masks(ctx, op);
   1477 }
   1478 
   1479 static bool fold_mb(OptContext *ctx, TCGOp *op)
   1480 {
   1481     /* Eliminate duplicate and redundant fence instructions.  */
   1482     if (ctx->prev_mb) {
   1483         /*
   1484          * Merge two barriers of the same type into one,
   1485          * or a weaker barrier into a stronger one,
   1486          * or two weaker barriers into a stronger one.
   1487          *   mb X; mb Y => mb X|Y
   1488          *   mb; strl => mb; st
   1489          *   ldaq; mb => ld; mb
   1490          *   ldaq; strl => ld; mb; st
   1491          * Other combinations are also merged into a strong
   1492          * barrier.  This is stricter than specified but for
   1493          * the purposes of TCG is better than not optimizing.
   1494          */
   1495         ctx->prev_mb->args[0] |= op->args[0];
   1496         tcg_op_remove(ctx->tcg, op);
   1497     } else {
   1498         ctx->prev_mb = op;
   1499     }
   1500     return true;
   1501 }
   1502 
   1503 static bool fold_mov(OptContext *ctx, TCGOp *op)
   1504 {
   1505     return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[1]);
   1506 }
   1507 
   1508 static bool fold_movcond(OptContext *ctx, TCGOp *op)
   1509 {
   1510     TCGCond cond = op->args[5];
   1511     int i;
   1512 
   1513     if (swap_commutative(NO_DEST, &op->args[1], &op->args[2])) {
   1514         op->args[5] = cond = tcg_swap_cond(cond);
   1515     }
   1516     /*
   1517      * Canonicalize the "false" input reg to match the destination reg so
   1518      * that the tcg backend can implement a "move if true" operation.
   1519      */
   1520     if (swap_commutative(op->args[0], &op->args[4], &op->args[3])) {
   1521         op->args[5] = cond = tcg_invert_cond(cond);
   1522     }
   1523 
   1524     i = do_constant_folding_cond(ctx->type, op->args[1], op->args[2], cond);
   1525     if (i >= 0) {
   1526         return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[4 - i]);
   1527     }
   1528 
   1529     ctx->z_mask = arg_info(op->args[3])->z_mask
   1530                 | arg_info(op->args[4])->z_mask;
   1531     ctx->s_mask = arg_info(op->args[3])->s_mask
   1532                 & arg_info(op->args[4])->s_mask;
   1533 
   1534     if (arg_is_const(op->args[3]) && arg_is_const(op->args[4])) {
   1535         uint64_t tv = arg_info(op->args[3])->val;
   1536         uint64_t fv = arg_info(op->args[4])->val;
   1537         TCGOpcode opc;
   1538 
   1539         switch (ctx->type) {
   1540         case TCG_TYPE_I32:
   1541             opc = INDEX_op_setcond_i32;
   1542             break;
   1543         case TCG_TYPE_I64:
   1544             opc = INDEX_op_setcond_i64;
   1545             break;
   1546         default:
   1547             g_assert_not_reached();
   1548         }
   1549 
   1550         if (tv == 1 && fv == 0) {
   1551             op->opc = opc;
   1552             op->args[3] = cond;
   1553         } else if (fv == 1 && tv == 0) {
   1554             op->opc = opc;
   1555             op->args[3] = tcg_invert_cond(cond);
   1556         }
   1557     }
   1558     return false;
   1559 }
   1560 
   1561 static bool fold_mul(OptContext *ctx, TCGOp *op)
   1562 {
   1563     if (fold_const2(ctx, op) ||
   1564         fold_xi_to_i(ctx, op, 0) ||
   1565         fold_xi_to_x(ctx, op, 1)) {
   1566         return true;
   1567     }
   1568     return false;
   1569 }
   1570 
   1571 static bool fold_mul_highpart(OptContext *ctx, TCGOp *op)
   1572 {
   1573     if (fold_const2_commutative(ctx, op) ||
   1574         fold_xi_to_i(ctx, op, 0)) {
   1575         return true;
   1576     }
   1577     return false;
   1578 }
   1579 
   1580 static bool fold_multiply2(OptContext *ctx, TCGOp *op)
   1581 {
   1582     swap_commutative(op->args[0], &op->args[2], &op->args[3]);
   1583 
   1584     if (arg_is_const(op->args[2]) && arg_is_const(op->args[3])) {
   1585         uint64_t a = arg_info(op->args[2])->val;
   1586         uint64_t b = arg_info(op->args[3])->val;
   1587         uint64_t h, l;
   1588         TCGArg rl, rh;
   1589         TCGOp *op2;
   1590 
   1591         switch (op->opc) {
   1592         case INDEX_op_mulu2_i32:
   1593             l = (uint64_t)(uint32_t)a * (uint32_t)b;
   1594             h = (int32_t)(l >> 32);
   1595             l = (int32_t)l;
   1596             break;
   1597         case INDEX_op_muls2_i32:
   1598             l = (int64_t)(int32_t)a * (int32_t)b;
   1599             h = l >> 32;
   1600             l = (int32_t)l;
   1601             break;
   1602         case INDEX_op_mulu2_i64:
   1603             mulu64(&l, &h, a, b);
   1604             break;
   1605         case INDEX_op_muls2_i64:
   1606             muls64(&l, &h, a, b);
   1607             break;
   1608         default:
   1609             g_assert_not_reached();
   1610         }
   1611 
   1612         rl = op->args[0];
   1613         rh = op->args[1];
   1614 
   1615         /* The proper opcode is supplied by tcg_opt_gen_mov. */
   1616         op2 = tcg_op_insert_before(ctx->tcg, op, 0);
   1617 
   1618         tcg_opt_gen_movi(ctx, op, rl, l);
   1619         tcg_opt_gen_movi(ctx, op2, rh, h);
   1620         return true;
   1621     }
   1622     return false;
   1623 }
   1624 
   1625 static bool fold_nand(OptContext *ctx, TCGOp *op)
   1626 {
   1627     if (fold_const2_commutative(ctx, op) ||
   1628         fold_xi_to_not(ctx, op, -1)) {
   1629         return true;
   1630     }
   1631 
   1632     ctx->s_mask = arg_info(op->args[1])->s_mask
   1633                 & arg_info(op->args[2])->s_mask;
   1634     return false;
   1635 }
   1636 
   1637 static bool fold_neg(OptContext *ctx, TCGOp *op)
   1638 {
   1639     uint64_t z_mask;
   1640 
   1641     if (fold_const1(ctx, op)) {
   1642         return true;
   1643     }
   1644 
   1645     /* Set to 1 all bits to the left of the rightmost.  */
   1646     z_mask = arg_info(op->args[1])->z_mask;
   1647     ctx->z_mask = -(z_mask & -z_mask);
   1648 
   1649     /*
   1650      * Because of fold_sub_to_neg, we want to always return true,
   1651      * via finish_folding.
   1652      */
   1653     finish_folding(ctx, op);
   1654     return true;
   1655 }
   1656 
   1657 static bool fold_nor(OptContext *ctx, TCGOp *op)
   1658 {
   1659     if (fold_const2_commutative(ctx, op) ||
   1660         fold_xi_to_not(ctx, op, 0)) {
   1661         return true;
   1662     }
   1663 
   1664     ctx->s_mask = arg_info(op->args[1])->s_mask
   1665                 & arg_info(op->args[2])->s_mask;
   1666     return false;
   1667 }
   1668 
   1669 static bool fold_not(OptContext *ctx, TCGOp *op)
   1670 {
   1671     if (fold_const1(ctx, op)) {
   1672         return true;
   1673     }
   1674 
   1675     ctx->s_mask = arg_info(op->args[1])->s_mask;
   1676 
   1677     /* Because of fold_to_not, we want to always return true, via finish. */
   1678     finish_folding(ctx, op);
   1679     return true;
   1680 }
   1681 
   1682 static bool fold_or(OptContext *ctx, TCGOp *op)
   1683 {
   1684     if (fold_const2_commutative(ctx, op) ||
   1685         fold_xi_to_x(ctx, op, 0) ||
   1686         fold_xx_to_x(ctx, op)) {
   1687         return true;
   1688     }
   1689 
   1690     ctx->z_mask = arg_info(op->args[1])->z_mask
   1691                 | arg_info(op->args[2])->z_mask;
   1692     ctx->s_mask = arg_info(op->args[1])->s_mask
   1693                 & arg_info(op->args[2])->s_mask;
   1694     return fold_masks(ctx, op);
   1695 }
   1696 
   1697 static bool fold_orc(OptContext *ctx, TCGOp *op)
   1698 {
   1699     if (fold_const2(ctx, op) ||
   1700         fold_xx_to_i(ctx, op, -1) ||
   1701         fold_xi_to_x(ctx, op, -1) ||
   1702         fold_ix_to_not(ctx, op, 0)) {
   1703         return true;
   1704     }
   1705 
   1706     ctx->s_mask = arg_info(op->args[1])->s_mask
   1707                 & arg_info(op->args[2])->s_mask;
   1708     return false;
   1709 }
   1710 
   1711 static bool fold_qemu_ld(OptContext *ctx, TCGOp *op)
   1712 {
   1713     const TCGOpDef *def = &tcg_op_defs[op->opc];
   1714     MemOpIdx oi = op->args[def->nb_oargs + def->nb_iargs];
   1715     MemOp mop = get_memop(oi);
   1716     int width = 8 * memop_size(mop);
   1717 
   1718     if (width < 64) {
   1719         ctx->s_mask = MAKE_64BIT_MASK(width, 64 - width);
   1720         if (!(mop & MO_SIGN)) {
   1721             ctx->z_mask = MAKE_64BIT_MASK(0, width);
   1722             ctx->s_mask <<= 1;
   1723         }
   1724     }
   1725 
   1726     /* Opcodes that touch guest memory stop the mb optimization.  */
   1727     ctx->prev_mb = NULL;
   1728     return false;
   1729 }
   1730 
   1731 static bool fold_qemu_st(OptContext *ctx, TCGOp *op)
   1732 {
   1733     /* Opcodes that touch guest memory stop the mb optimization.  */
   1734     ctx->prev_mb = NULL;
   1735     return false;
   1736 }
   1737 
   1738 static bool fold_remainder(OptContext *ctx, TCGOp *op)
   1739 {
   1740     if (fold_const2(ctx, op) ||
   1741         fold_xx_to_i(ctx, op, 0)) {
   1742         return true;
   1743     }
   1744     return false;
   1745 }
   1746 
   1747 static bool fold_setcond(OptContext *ctx, TCGOp *op)
   1748 {
   1749     TCGCond cond = op->args[3];
   1750     int i;
   1751 
   1752     if (swap_commutative(op->args[0], &op->args[1], &op->args[2])) {
   1753         op->args[3] = cond = tcg_swap_cond(cond);
   1754     }
   1755 
   1756     i = do_constant_folding_cond(ctx->type, op->args[1], op->args[2], cond);
   1757     if (i >= 0) {
   1758         return tcg_opt_gen_movi(ctx, op, op->args[0], i);
   1759     }
   1760 
   1761     ctx->z_mask = 1;
   1762     ctx->s_mask = smask_from_zmask(1);
   1763     return false;
   1764 }
   1765 
   1766 static bool fold_setcond2(OptContext *ctx, TCGOp *op)
   1767 {
   1768     TCGCond cond = op->args[5];
   1769     int i, inv = 0;
   1770 
   1771     if (swap_commutative2(&op->args[1], &op->args[3])) {
   1772         op->args[5] = cond = tcg_swap_cond(cond);
   1773     }
   1774 
   1775     i = do_constant_folding_cond2(&op->args[1], &op->args[3], cond);
   1776     if (i >= 0) {
   1777         goto do_setcond_const;
   1778     }
   1779 
   1780     switch (cond) {
   1781     case TCG_COND_LT:
   1782     case TCG_COND_GE:
   1783         /*
   1784          * Simplify LT/GE comparisons vs zero to a single compare
   1785          * vs the high word of the input.
   1786          */
   1787         if (arg_is_const(op->args[3]) && arg_info(op->args[3])->val == 0 &&
   1788             arg_is_const(op->args[4]) && arg_info(op->args[4])->val == 0) {
   1789             goto do_setcond_high;
   1790         }
   1791         break;
   1792 
   1793     case TCG_COND_NE:
   1794         inv = 1;
   1795         QEMU_FALLTHROUGH;
   1796     case TCG_COND_EQ:
   1797         /*
   1798          * Simplify EQ/NE comparisons where one of the pairs
   1799          * can be simplified.
   1800          */
   1801         i = do_constant_folding_cond(TCG_TYPE_I32, op->args[1],
   1802                                      op->args[3], cond);
   1803         switch (i ^ inv) {
   1804         case 0:
   1805             goto do_setcond_const;
   1806         case 1:
   1807             goto do_setcond_high;
   1808         }
   1809 
   1810         i = do_constant_folding_cond(TCG_TYPE_I32, op->args[2],
   1811                                      op->args[4], cond);
   1812         switch (i ^ inv) {
   1813         case 0:
   1814             goto do_setcond_const;
   1815         case 1:
   1816             op->args[2] = op->args[3];
   1817             op->args[3] = cond;
   1818             op->opc = INDEX_op_setcond_i32;
   1819             break;
   1820         }
   1821         break;
   1822 
   1823     default:
   1824         break;
   1825 
   1826     do_setcond_high:
   1827         op->args[1] = op->args[2];
   1828         op->args[2] = op->args[4];
   1829         op->args[3] = cond;
   1830         op->opc = INDEX_op_setcond_i32;
   1831         break;
   1832     }
   1833 
   1834     ctx->z_mask = 1;
   1835     ctx->s_mask = smask_from_zmask(1);
   1836     return false;
   1837 
   1838  do_setcond_const:
   1839     return tcg_opt_gen_movi(ctx, op, op->args[0], i);
   1840 }
   1841 
   1842 static bool fold_sextract(OptContext *ctx, TCGOp *op)
   1843 {
   1844     uint64_t z_mask, s_mask, s_mask_old;
   1845     int pos = op->args[2];
   1846     int len = op->args[3];
   1847 
   1848     if (arg_is_const(op->args[1])) {
   1849         uint64_t t;
   1850 
   1851         t = arg_info(op->args[1])->val;
   1852         t = sextract64(t, pos, len);
   1853         return tcg_opt_gen_movi(ctx, op, op->args[0], t);
   1854     }
   1855 
   1856     z_mask = arg_info(op->args[1])->z_mask;
   1857     z_mask = sextract64(z_mask, pos, len);
   1858     ctx->z_mask = z_mask;
   1859 
   1860     s_mask_old = arg_info(op->args[1])->s_mask;
   1861     s_mask = sextract64(s_mask_old, pos, len);
   1862     s_mask |= MAKE_64BIT_MASK(len, 64 - len);
   1863     ctx->s_mask = s_mask;
   1864 
   1865     if (pos == 0) {
   1866         ctx->a_mask = s_mask & ~s_mask_old;
   1867     }
   1868 
   1869     return fold_masks(ctx, op);
   1870 }
   1871 
   1872 static bool fold_shift(OptContext *ctx, TCGOp *op)
   1873 {
   1874     uint64_t s_mask, z_mask, sign;
   1875 
   1876     if (fold_const2(ctx, op) ||
   1877         fold_ix_to_i(ctx, op, 0) ||
   1878         fold_xi_to_x(ctx, op, 0)) {
   1879         return true;
   1880     }
   1881 
   1882     s_mask = arg_info(op->args[1])->s_mask;
   1883     z_mask = arg_info(op->args[1])->z_mask;
   1884 
   1885     if (arg_is_const(op->args[2])) {
   1886         int sh = arg_info(op->args[2])->val;
   1887 
   1888         ctx->z_mask = do_constant_folding(op->opc, ctx->type, z_mask, sh);
   1889 
   1890         s_mask = do_constant_folding(op->opc, ctx->type, s_mask, sh);
   1891         ctx->s_mask = smask_from_smask(s_mask);
   1892 
   1893         return fold_masks(ctx, op);
   1894     }
   1895 
   1896     switch (op->opc) {
   1897     CASE_OP_32_64(sar):
   1898         /*
   1899          * Arithmetic right shift will not reduce the number of
   1900          * input sign repetitions.
   1901          */
   1902         ctx->s_mask = s_mask;
   1903         break;
   1904     CASE_OP_32_64(shr):
   1905         /*
   1906          * If the sign bit is known zero, then logical right shift
   1907          * will not reduced the number of input sign repetitions.
   1908          */
   1909         sign = (s_mask & -s_mask) >> 1;
   1910         if (!(z_mask & sign)) {
   1911             ctx->s_mask = s_mask;
   1912         }
   1913         break;
   1914     default:
   1915         break;
   1916     }
   1917 
   1918     return false;
   1919 }
   1920 
   1921 static bool fold_sub_to_neg(OptContext *ctx, TCGOp *op)
   1922 {
   1923     TCGOpcode neg_op;
   1924     bool have_neg;
   1925 
   1926     if (!arg_is_const(op->args[1]) || arg_info(op->args[1])->val != 0) {
   1927         return false;
   1928     }
   1929 
   1930     switch (ctx->type) {
   1931     case TCG_TYPE_I32:
   1932         neg_op = INDEX_op_neg_i32;
   1933         have_neg = TCG_TARGET_HAS_neg_i32;
   1934         break;
   1935     case TCG_TYPE_I64:
   1936         neg_op = INDEX_op_neg_i64;
   1937         have_neg = TCG_TARGET_HAS_neg_i64;
   1938         break;
   1939     case TCG_TYPE_V64:
   1940     case TCG_TYPE_V128:
   1941     case TCG_TYPE_V256:
   1942         neg_op = INDEX_op_neg_vec;
   1943         have_neg = (TCG_TARGET_HAS_neg_vec &&
   1944                     tcg_can_emit_vec_op(neg_op, ctx->type, TCGOP_VECE(op)) > 0);
   1945         break;
   1946     default:
   1947         g_assert_not_reached();
   1948     }
   1949     if (have_neg) {
   1950         op->opc = neg_op;
   1951         op->args[1] = op->args[2];
   1952         return fold_neg(ctx, op);
   1953     }
   1954     return false;
   1955 }
   1956 
   1957 /* We cannot as yet do_constant_folding with vectors. */
   1958 static bool fold_sub_vec(OptContext *ctx, TCGOp *op)
   1959 {
   1960     if (fold_xx_to_i(ctx, op, 0) ||
   1961         fold_xi_to_x(ctx, op, 0) ||
   1962         fold_sub_to_neg(ctx, op)) {
   1963         return true;
   1964     }
   1965     return false;
   1966 }
   1967 
   1968 static bool fold_sub(OptContext *ctx, TCGOp *op)
   1969 {
   1970     return fold_const2(ctx, op) || fold_sub_vec(ctx, op);
   1971 }
   1972 
   1973 static bool fold_sub2(OptContext *ctx, TCGOp *op)
   1974 {
   1975     return fold_addsub2(ctx, op, false);
   1976 }
   1977 
   1978 static bool fold_tcg_ld(OptContext *ctx, TCGOp *op)
   1979 {
   1980     /* We can't do any folding with a load, but we can record bits. */
   1981     switch (op->opc) {
   1982     CASE_OP_32_64(ld8s):
   1983         ctx->s_mask = MAKE_64BIT_MASK(8, 56);
   1984         break;
   1985     CASE_OP_32_64(ld8u):
   1986         ctx->z_mask = MAKE_64BIT_MASK(0, 8);
   1987         ctx->s_mask = MAKE_64BIT_MASK(9, 55);
   1988         break;
   1989     CASE_OP_32_64(ld16s):
   1990         ctx->s_mask = MAKE_64BIT_MASK(16, 48);
   1991         break;
   1992     CASE_OP_32_64(ld16u):
   1993         ctx->z_mask = MAKE_64BIT_MASK(0, 16);
   1994         ctx->s_mask = MAKE_64BIT_MASK(17, 47);
   1995         break;
   1996     case INDEX_op_ld32s_i64:
   1997         ctx->s_mask = MAKE_64BIT_MASK(32, 32);
   1998         break;
   1999     case INDEX_op_ld32u_i64:
   2000         ctx->z_mask = MAKE_64BIT_MASK(0, 32);
   2001         ctx->s_mask = MAKE_64BIT_MASK(33, 31);
   2002         break;
   2003     default:
   2004         g_assert_not_reached();
   2005     }
   2006     return false;
   2007 }
   2008 
   2009 static bool fold_xor(OptContext *ctx, TCGOp *op)
   2010 {
   2011     if (fold_const2_commutative(ctx, op) ||
   2012         fold_xx_to_i(ctx, op, 0) ||
   2013         fold_xi_to_x(ctx, op, 0) ||
   2014         fold_xi_to_not(ctx, op, -1)) {
   2015         return true;
   2016     }
   2017 
   2018     ctx->z_mask = arg_info(op->args[1])->z_mask
   2019                 | arg_info(op->args[2])->z_mask;
   2020     ctx->s_mask = arg_info(op->args[1])->s_mask
   2021                 & arg_info(op->args[2])->s_mask;
   2022     return fold_masks(ctx, op);
   2023 }
   2024 
   2025 /* Propagate constants and copies, fold constant expressions. */
   2026 void tcg_optimize(TCGContext *s)
   2027 {
   2028     int nb_temps, i;
   2029     TCGOp *op, *op_next;
   2030     OptContext ctx = { .tcg = s };
   2031 
   2032     /* Array VALS has an element for each temp.
   2033        If this temp holds a constant then its value is kept in VALS' element.
   2034        If this temp is a copy of other ones then the other copies are
   2035        available through the doubly linked circular list. */
   2036 
   2037     nb_temps = s->nb_temps;
   2038     for (i = 0; i < nb_temps; ++i) {
   2039         s->temps[i].state_ptr = NULL;
   2040     }
   2041 
   2042     QTAILQ_FOREACH_SAFE(op, &s->ops, link, op_next) {
   2043         TCGOpcode opc = op->opc;
   2044         const TCGOpDef *def;
   2045         bool done = false;
   2046 
   2047         /* Calls are special. */
   2048         if (opc == INDEX_op_call) {
   2049             fold_call(&ctx, op);
   2050             continue;
   2051         }
   2052 
   2053         def = &tcg_op_defs[opc];
   2054         init_arguments(&ctx, op, def->nb_oargs + def->nb_iargs);
   2055         copy_propagate(&ctx, op, def->nb_oargs, def->nb_iargs);
   2056 
   2057         /* Pre-compute the type of the operation. */
   2058         if (def->flags & TCG_OPF_VECTOR) {
   2059             ctx.type = TCG_TYPE_V64 + TCGOP_VECL(op);
   2060         } else if (def->flags & TCG_OPF_64BIT) {
   2061             ctx.type = TCG_TYPE_I64;
   2062         } else {
   2063             ctx.type = TCG_TYPE_I32;
   2064         }
   2065 
   2066         /* Assume all bits affected, no bits known zero, no sign reps. */
   2067         ctx.a_mask = -1;
   2068         ctx.z_mask = -1;
   2069         ctx.s_mask = 0;
   2070 
   2071         /*
   2072          * Process each opcode.
   2073          * Sorted alphabetically by opcode as much as possible.
   2074          */
   2075         switch (opc) {
   2076         CASE_OP_32_64(add):
   2077             done = fold_add(&ctx, op);
   2078             break;
   2079         case INDEX_op_add_vec:
   2080             done = fold_add_vec(&ctx, op);
   2081             break;
   2082         CASE_OP_32_64(add2):
   2083             done = fold_add2(&ctx, op);
   2084             break;
   2085         CASE_OP_32_64_VEC(and):
   2086             done = fold_and(&ctx, op);
   2087             break;
   2088         CASE_OP_32_64_VEC(andc):
   2089             done = fold_andc(&ctx, op);
   2090             break;
   2091         CASE_OP_32_64(brcond):
   2092             done = fold_brcond(&ctx, op);
   2093             break;
   2094         case INDEX_op_brcond2_i32:
   2095             done = fold_brcond2(&ctx, op);
   2096             break;
   2097         CASE_OP_32_64(bswap16):
   2098         CASE_OP_32_64(bswap32):
   2099         case INDEX_op_bswap64_i64:
   2100             done = fold_bswap(&ctx, op);
   2101             break;
   2102         CASE_OP_32_64(clz):
   2103         CASE_OP_32_64(ctz):
   2104             done = fold_count_zeros(&ctx, op);
   2105             break;
   2106         CASE_OP_32_64(ctpop):
   2107             done = fold_ctpop(&ctx, op);
   2108             break;
   2109         CASE_OP_32_64(deposit):
   2110             done = fold_deposit(&ctx, op);
   2111             break;
   2112         CASE_OP_32_64(div):
   2113         CASE_OP_32_64(divu):
   2114             done = fold_divide(&ctx, op);
   2115             break;
   2116         case INDEX_op_dup_vec:
   2117             done = fold_dup(&ctx, op);
   2118             break;
   2119         case INDEX_op_dup2_vec:
   2120             done = fold_dup2(&ctx, op);
   2121             break;
   2122         CASE_OP_32_64_VEC(eqv):
   2123             done = fold_eqv(&ctx, op);
   2124             break;
   2125         CASE_OP_32_64(extract):
   2126             done = fold_extract(&ctx, op);
   2127             break;
   2128         CASE_OP_32_64(extract2):
   2129             done = fold_extract2(&ctx, op);
   2130             break;
   2131         CASE_OP_32_64(ext8s):
   2132         CASE_OP_32_64(ext16s):
   2133         case INDEX_op_ext32s_i64:
   2134         case INDEX_op_ext_i32_i64:
   2135             done = fold_exts(&ctx, op);
   2136             break;
   2137         CASE_OP_32_64(ext8u):
   2138         CASE_OP_32_64(ext16u):
   2139         case INDEX_op_ext32u_i64:
   2140         case INDEX_op_extu_i32_i64:
   2141         case INDEX_op_extrl_i64_i32:
   2142         case INDEX_op_extrh_i64_i32:
   2143             done = fold_extu(&ctx, op);
   2144             break;
   2145         CASE_OP_32_64(ld8s):
   2146         CASE_OP_32_64(ld8u):
   2147         CASE_OP_32_64(ld16s):
   2148         CASE_OP_32_64(ld16u):
   2149         case INDEX_op_ld32s_i64:
   2150         case INDEX_op_ld32u_i64:
   2151             done = fold_tcg_ld(&ctx, op);
   2152             break;
   2153         case INDEX_op_mb:
   2154             done = fold_mb(&ctx, op);
   2155             break;
   2156         CASE_OP_32_64_VEC(mov):
   2157             done = fold_mov(&ctx, op);
   2158             break;
   2159         CASE_OP_32_64(movcond):
   2160             done = fold_movcond(&ctx, op);
   2161             break;
   2162         CASE_OP_32_64(mul):
   2163             done = fold_mul(&ctx, op);
   2164             break;
   2165         CASE_OP_32_64(mulsh):
   2166         CASE_OP_32_64(muluh):
   2167             done = fold_mul_highpart(&ctx, op);
   2168             break;
   2169         CASE_OP_32_64(muls2):
   2170         CASE_OP_32_64(mulu2):
   2171             done = fold_multiply2(&ctx, op);
   2172             break;
   2173         CASE_OP_32_64_VEC(nand):
   2174             done = fold_nand(&ctx, op);
   2175             break;
   2176         CASE_OP_32_64(neg):
   2177             done = fold_neg(&ctx, op);
   2178             break;
   2179         CASE_OP_32_64_VEC(nor):
   2180             done = fold_nor(&ctx, op);
   2181             break;
   2182         CASE_OP_32_64_VEC(not):
   2183             done = fold_not(&ctx, op);
   2184             break;
   2185         CASE_OP_32_64_VEC(or):
   2186             done = fold_or(&ctx, op);
   2187             break;
   2188         CASE_OP_32_64_VEC(orc):
   2189             done = fold_orc(&ctx, op);
   2190             break;
   2191         case INDEX_op_qemu_ld_i32:
   2192         case INDEX_op_qemu_ld_i64:
   2193             done = fold_qemu_ld(&ctx, op);
   2194             break;
   2195         case INDEX_op_qemu_st_i32:
   2196         case INDEX_op_qemu_st8_i32:
   2197         case INDEX_op_qemu_st_i64:
   2198             done = fold_qemu_st(&ctx, op);
   2199             break;
   2200         CASE_OP_32_64(rem):
   2201         CASE_OP_32_64(remu):
   2202             done = fold_remainder(&ctx, op);
   2203             break;
   2204         CASE_OP_32_64(rotl):
   2205         CASE_OP_32_64(rotr):
   2206         CASE_OP_32_64(sar):
   2207         CASE_OP_32_64(shl):
   2208         CASE_OP_32_64(shr):
   2209             done = fold_shift(&ctx, op);
   2210             break;
   2211         CASE_OP_32_64(setcond):
   2212             done = fold_setcond(&ctx, op);
   2213             break;
   2214         case INDEX_op_setcond2_i32:
   2215             done = fold_setcond2(&ctx, op);
   2216             break;
   2217         CASE_OP_32_64(sextract):
   2218             done = fold_sextract(&ctx, op);
   2219             break;
   2220         CASE_OP_32_64(sub):
   2221             done = fold_sub(&ctx, op);
   2222             break;
   2223         case INDEX_op_sub_vec:
   2224             done = fold_sub_vec(&ctx, op);
   2225             break;
   2226         CASE_OP_32_64(sub2):
   2227             done = fold_sub2(&ctx, op);
   2228             break;
   2229         CASE_OP_32_64_VEC(xor):
   2230             done = fold_xor(&ctx, op);
   2231             break;
   2232         default:
   2233             break;
   2234         }
   2235 
   2236         if (!done) {
   2237             finish_folding(&ctx, op);
   2238         }
   2239     }
   2240 }