qemu

FORK: QEMU emulator
git clone https://git.neptards.moe/neptards/qemu.git
Log | Files | Refs | Submodules | LICENSE

tcg-op-vec.c (24347B)


      1 /*
      2  * Tiny Code Generator for QEMU
      3  *
      4  * Copyright (c) 2018 Linaro, Inc.
      5  *
      6  * This library is free software; you can redistribute it and/or
      7  * modify it under the terms of the GNU Lesser General Public
      8  * License as published by the Free Software Foundation; either
      9  * version 2.1 of the License, or (at your option) any later version.
     10  *
     11  * This library is distributed in the hope that it will be useful,
     12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
     13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
     14  * Lesser General Public License for more details.
     15  *
     16  * You should have received a copy of the GNU Lesser General Public
     17  * License along with this library; if not, see <http://www.gnu.org/licenses/>.
     18  */
     19 
     20 #include "qemu/osdep.h"
     21 #include "tcg/tcg.h"
     22 #include "tcg/tcg-op.h"
     23 #include "tcg/tcg-mo.h"
     24 
     25 /* Reduce the number of ifdefs below.  This assumes that all uses of
     26    TCGV_HIGH and TCGV_LOW are properly protected by a conditional that
     27    the compiler can eliminate.  */
     28 #if TCG_TARGET_REG_BITS == 64
     29 extern TCGv_i32 TCGV_LOW_link_error(TCGv_i64);
     30 extern TCGv_i32 TCGV_HIGH_link_error(TCGv_i64);
     31 #define TCGV_LOW  TCGV_LOW_link_error
     32 #define TCGV_HIGH TCGV_HIGH_link_error
     33 #endif
     34 
     35 /*
     36  * Vector optional opcode tracking.
     37  * Except for the basic logical operations (and, or, xor), and
     38  * data movement (mov, ld, st, dupi), many vector opcodes are
     39  * optional and may not be supported on the host.  Thank Intel
     40  * for the irregularity in their instruction set.
     41  *
     42  * The gvec expanders allow custom vector operations to be composed,
     43  * generally via the .fniv callback in the GVecGen* structures.  At
     44  * the same time, in deciding whether to use this hook we need to
     45  * know if the host supports the required operations.  This is
     46  * presented as an array of opcodes, terminated by 0.  Each opcode
     47  * is assumed to be expanded with the given VECE.
     48  *
     49  * For debugging, we want to validate this array.  Therefore, when
     50  * tcg_ctx->vec_opt_opc is non-NULL, the tcg_gen_*_vec expanders
     51  * will validate that their opcode is present in the list.
     52  */
     53 #ifdef CONFIG_DEBUG_TCG
     54 void tcg_assert_listed_vecop(TCGOpcode op)
     55 {
     56     const TCGOpcode *p = tcg_ctx->vecop_list;
     57     if (p) {
     58         for (; *p; ++p) {
     59             if (*p == op) {
     60                 return;
     61             }
     62         }
     63         g_assert_not_reached();
     64     }
     65 }
     66 #endif
     67 
     68 bool tcg_can_emit_vecop_list(const TCGOpcode *list,
     69                              TCGType type, unsigned vece)
     70 {
     71     if (list == NULL) {
     72         return true;
     73     }
     74 
     75     for (; *list; ++list) {
     76         TCGOpcode opc = *list;
     77 
     78 #ifdef CONFIG_DEBUG_TCG
     79         switch (opc) {
     80         case INDEX_op_and_vec:
     81         case INDEX_op_or_vec:
     82         case INDEX_op_xor_vec:
     83         case INDEX_op_mov_vec:
     84         case INDEX_op_dup_vec:
     85         case INDEX_op_dup2_vec:
     86         case INDEX_op_ld_vec:
     87         case INDEX_op_st_vec:
     88         case INDEX_op_bitsel_vec:
     89             /* These opcodes are mandatory and should not be listed.  */
     90             g_assert_not_reached();
     91         case INDEX_op_not_vec:
     92             /* These opcodes have generic expansions using the above.  */
     93             g_assert_not_reached();
     94         default:
     95             break;
     96         }
     97 #endif
     98 
     99         if (tcg_can_emit_vec_op(opc, type, vece)) {
    100             continue;
    101         }
    102 
    103         /*
    104          * The opcode list is created by front ends based on what they
    105          * actually invoke.  We must mirror the logic in the routines
    106          * below for generic expansions using other opcodes.
    107          */
    108         switch (opc) {
    109         case INDEX_op_neg_vec:
    110             if (tcg_can_emit_vec_op(INDEX_op_sub_vec, type, vece)) {
    111                 continue;
    112             }
    113             break;
    114         case INDEX_op_abs_vec:
    115             if (tcg_can_emit_vec_op(INDEX_op_sub_vec, type, vece)
    116                 && (tcg_can_emit_vec_op(INDEX_op_smax_vec, type, vece) > 0
    117                     || tcg_can_emit_vec_op(INDEX_op_sari_vec, type, vece) > 0
    118                     || tcg_can_emit_vec_op(INDEX_op_cmp_vec, type, vece))) {
    119                 continue;
    120             }
    121             break;
    122         case INDEX_op_usadd_vec:
    123             if (tcg_can_emit_vec_op(INDEX_op_umin_vec, type, vece) ||
    124                 tcg_can_emit_vec_op(INDEX_op_cmp_vec, type, vece)) {
    125                 continue;
    126             }
    127             break;
    128         case INDEX_op_ussub_vec:
    129             if (tcg_can_emit_vec_op(INDEX_op_umax_vec, type, vece) ||
    130                 tcg_can_emit_vec_op(INDEX_op_cmp_vec, type, vece)) {
    131                 continue;
    132             }
    133             break;
    134         case INDEX_op_cmpsel_vec:
    135         case INDEX_op_smin_vec:
    136         case INDEX_op_smax_vec:
    137         case INDEX_op_umin_vec:
    138         case INDEX_op_umax_vec:
    139             if (tcg_can_emit_vec_op(INDEX_op_cmp_vec, type, vece)) {
    140                 continue;
    141             }
    142             break;
    143         default:
    144             break;
    145         }
    146         return false;
    147     }
    148     return true;
    149 }
    150 
    151 void vec_gen_2(TCGOpcode opc, TCGType type, unsigned vece, TCGArg r, TCGArg a)
    152 {
    153     TCGOp *op = tcg_emit_op(opc);
    154     TCGOP_VECL(op) = type - TCG_TYPE_V64;
    155     TCGOP_VECE(op) = vece;
    156     op->args[0] = r;
    157     op->args[1] = a;
    158 }
    159 
    160 void vec_gen_3(TCGOpcode opc, TCGType type, unsigned vece,
    161                TCGArg r, TCGArg a, TCGArg b)
    162 {
    163     TCGOp *op = tcg_emit_op(opc);
    164     TCGOP_VECL(op) = type - TCG_TYPE_V64;
    165     TCGOP_VECE(op) = vece;
    166     op->args[0] = r;
    167     op->args[1] = a;
    168     op->args[2] = b;
    169 }
    170 
    171 void vec_gen_4(TCGOpcode opc, TCGType type, unsigned vece,
    172                TCGArg r, TCGArg a, TCGArg b, TCGArg c)
    173 {
    174     TCGOp *op = tcg_emit_op(opc);
    175     TCGOP_VECL(op) = type - TCG_TYPE_V64;
    176     TCGOP_VECE(op) = vece;
    177     op->args[0] = r;
    178     op->args[1] = a;
    179     op->args[2] = b;
    180     op->args[3] = c;
    181 }
    182 
    183 static void vec_gen_6(TCGOpcode opc, TCGType type, unsigned vece, TCGArg r,
    184                       TCGArg a, TCGArg b, TCGArg c, TCGArg d, TCGArg e)
    185 {
    186     TCGOp *op = tcg_emit_op(opc);
    187     TCGOP_VECL(op) = type - TCG_TYPE_V64;
    188     TCGOP_VECE(op) = vece;
    189     op->args[0] = r;
    190     op->args[1] = a;
    191     op->args[2] = b;
    192     op->args[3] = c;
    193     op->args[4] = d;
    194     op->args[5] = e;
    195 }
    196 
    197 static void vec_gen_op2(TCGOpcode opc, unsigned vece, TCGv_vec r, TCGv_vec a)
    198 {
    199     TCGTemp *rt = tcgv_vec_temp(r);
    200     TCGTemp *at = tcgv_vec_temp(a);
    201     TCGType type = rt->base_type;
    202 
    203     /* Must enough inputs for the output.  */
    204     tcg_debug_assert(at->base_type >= type);
    205     vec_gen_2(opc, type, vece, temp_arg(rt), temp_arg(at));
    206 }
    207 
    208 static void vec_gen_op3(TCGOpcode opc, unsigned vece,
    209                         TCGv_vec r, TCGv_vec a, TCGv_vec b)
    210 {
    211     TCGTemp *rt = tcgv_vec_temp(r);
    212     TCGTemp *at = tcgv_vec_temp(a);
    213     TCGTemp *bt = tcgv_vec_temp(b);
    214     TCGType type = rt->base_type;
    215 
    216     /* Must enough inputs for the output.  */
    217     tcg_debug_assert(at->base_type >= type);
    218     tcg_debug_assert(bt->base_type >= type);
    219     vec_gen_3(opc, type, vece, temp_arg(rt), temp_arg(at), temp_arg(bt));
    220 }
    221 
    222 void tcg_gen_mov_vec(TCGv_vec r, TCGv_vec a)
    223 {
    224     if (r != a) {
    225         vec_gen_op2(INDEX_op_mov_vec, 0, r, a);
    226     }
    227 }
    228 
    229 TCGv_vec tcg_const_zeros_vec(TCGType type)
    230 {
    231     TCGv_vec ret = tcg_temp_new_vec(type);
    232     tcg_gen_dupi_vec(MO_64, ret, 0);
    233     return ret;
    234 }
    235 
    236 TCGv_vec tcg_const_ones_vec(TCGType type)
    237 {
    238     TCGv_vec ret = tcg_temp_new_vec(type);
    239     tcg_gen_dupi_vec(MO_64, ret, -1);
    240     return ret;
    241 }
    242 
    243 TCGv_vec tcg_const_zeros_vec_matching(TCGv_vec m)
    244 {
    245     TCGTemp *t = tcgv_vec_temp(m);
    246     return tcg_const_zeros_vec(t->base_type);
    247 }
    248 
    249 TCGv_vec tcg_const_ones_vec_matching(TCGv_vec m)
    250 {
    251     TCGTemp *t = tcgv_vec_temp(m);
    252     return tcg_const_ones_vec(t->base_type);
    253 }
    254 
    255 void tcg_gen_dupi_vec(unsigned vece, TCGv_vec r, uint64_t a)
    256 {
    257     TCGTemp *rt = tcgv_vec_temp(r);
    258     tcg_gen_mov_vec(r, tcg_constant_vec(rt->base_type, vece, a));
    259 }
    260 
    261 void tcg_gen_dup_i64_vec(unsigned vece, TCGv_vec r, TCGv_i64 a)
    262 {
    263     TCGArg ri = tcgv_vec_arg(r);
    264     TCGTemp *rt = arg_temp(ri);
    265     TCGType type = rt->base_type;
    266 
    267     if (TCG_TARGET_REG_BITS == 64) {
    268         TCGArg ai = tcgv_i64_arg(a);
    269         vec_gen_2(INDEX_op_dup_vec, type, vece, ri, ai);
    270     } else if (vece == MO_64) {
    271         TCGArg al = tcgv_i32_arg(TCGV_LOW(a));
    272         TCGArg ah = tcgv_i32_arg(TCGV_HIGH(a));
    273         vec_gen_3(INDEX_op_dup2_vec, type, MO_64, ri, al, ah);
    274     } else {
    275         TCGArg ai = tcgv_i32_arg(TCGV_LOW(a));
    276         vec_gen_2(INDEX_op_dup_vec, type, vece, ri, ai);
    277     }
    278 }
    279 
    280 void tcg_gen_dup_i32_vec(unsigned vece, TCGv_vec r, TCGv_i32 a)
    281 {
    282     TCGArg ri = tcgv_vec_arg(r);
    283     TCGArg ai = tcgv_i32_arg(a);
    284     TCGTemp *rt = arg_temp(ri);
    285     TCGType type = rt->base_type;
    286 
    287     vec_gen_2(INDEX_op_dup_vec, type, vece, ri, ai);
    288 }
    289 
    290 void tcg_gen_dup_mem_vec(unsigned vece, TCGv_vec r, TCGv_ptr b,
    291                          tcg_target_long ofs)
    292 {
    293     TCGArg ri = tcgv_vec_arg(r);
    294     TCGArg bi = tcgv_ptr_arg(b);
    295     TCGTemp *rt = arg_temp(ri);
    296     TCGType type = rt->base_type;
    297 
    298     vec_gen_3(INDEX_op_dupm_vec, type, vece, ri, bi, ofs);
    299 }
    300 
    301 static void vec_gen_ldst(TCGOpcode opc, TCGv_vec r, TCGv_ptr b, TCGArg o)
    302 {
    303     TCGArg ri = tcgv_vec_arg(r);
    304     TCGArg bi = tcgv_ptr_arg(b);
    305     TCGTemp *rt = arg_temp(ri);
    306     TCGType type = rt->base_type;
    307 
    308     vec_gen_3(opc, type, 0, ri, bi, o);
    309 }
    310 
    311 void tcg_gen_ld_vec(TCGv_vec r, TCGv_ptr b, TCGArg o)
    312 {
    313     vec_gen_ldst(INDEX_op_ld_vec, r, b, o);
    314 }
    315 
    316 void tcg_gen_st_vec(TCGv_vec r, TCGv_ptr b, TCGArg o)
    317 {
    318     vec_gen_ldst(INDEX_op_st_vec, r, b, o);
    319 }
    320 
    321 void tcg_gen_stl_vec(TCGv_vec r, TCGv_ptr b, TCGArg o, TCGType low_type)
    322 {
    323     TCGArg ri = tcgv_vec_arg(r);
    324     TCGArg bi = tcgv_ptr_arg(b);
    325     TCGTemp *rt = arg_temp(ri);
    326     TCGType type = rt->base_type;
    327 
    328     tcg_debug_assert(low_type >= TCG_TYPE_V64);
    329     tcg_debug_assert(low_type <= type);
    330     vec_gen_3(INDEX_op_st_vec, low_type, 0, ri, bi, o);
    331 }
    332 
    333 void tcg_gen_and_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b)
    334 {
    335     vec_gen_op3(INDEX_op_and_vec, 0, r, a, b);
    336 }
    337 
    338 void tcg_gen_or_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b)
    339 {
    340     vec_gen_op3(INDEX_op_or_vec, 0, r, a, b);
    341 }
    342 
    343 void tcg_gen_xor_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b)
    344 {
    345     vec_gen_op3(INDEX_op_xor_vec, 0, r, a, b);
    346 }
    347 
    348 void tcg_gen_andc_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b)
    349 {
    350     if (TCG_TARGET_HAS_andc_vec) {
    351         vec_gen_op3(INDEX_op_andc_vec, 0, r, a, b);
    352     } else {
    353         TCGv_vec t = tcg_temp_new_vec_matching(r);
    354         tcg_gen_not_vec(0, t, b);
    355         tcg_gen_and_vec(0, r, a, t);
    356         tcg_temp_free_vec(t);
    357     }
    358 }
    359 
    360 void tcg_gen_orc_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b)
    361 {
    362     if (TCG_TARGET_HAS_orc_vec) {
    363         vec_gen_op3(INDEX_op_orc_vec, 0, r, a, b);
    364     } else {
    365         TCGv_vec t = tcg_temp_new_vec_matching(r);
    366         tcg_gen_not_vec(0, t, b);
    367         tcg_gen_or_vec(0, r, a, t);
    368         tcg_temp_free_vec(t);
    369     }
    370 }
    371 
    372 void tcg_gen_nand_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b)
    373 {
    374     if (TCG_TARGET_HAS_nand_vec) {
    375         vec_gen_op3(INDEX_op_nand_vec, 0, r, a, b);
    376     } else {
    377         tcg_gen_and_vec(0, r, a, b);
    378         tcg_gen_not_vec(0, r, r);
    379     }
    380 }
    381 
    382 void tcg_gen_nor_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b)
    383 {
    384     if (TCG_TARGET_HAS_nor_vec) {
    385         vec_gen_op3(INDEX_op_nor_vec, 0, r, a, b);
    386     } else {
    387         tcg_gen_or_vec(0, r, a, b);
    388         tcg_gen_not_vec(0, r, r);
    389     }
    390 }
    391 
    392 void tcg_gen_eqv_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b)
    393 {
    394     if (TCG_TARGET_HAS_eqv_vec) {
    395         vec_gen_op3(INDEX_op_eqv_vec, 0, r, a, b);
    396     } else {
    397         tcg_gen_xor_vec(0, r, a, b);
    398         tcg_gen_not_vec(0, r, r);
    399     }
    400 }
    401 
    402 static bool do_op2(unsigned vece, TCGv_vec r, TCGv_vec a, TCGOpcode opc)
    403 {
    404     TCGTemp *rt = tcgv_vec_temp(r);
    405     TCGTemp *at = tcgv_vec_temp(a);
    406     TCGArg ri = temp_arg(rt);
    407     TCGArg ai = temp_arg(at);
    408     TCGType type = rt->base_type;
    409     int can;
    410 
    411     tcg_debug_assert(at->base_type >= type);
    412     tcg_assert_listed_vecop(opc);
    413     can = tcg_can_emit_vec_op(opc, type, vece);
    414     if (can > 0) {
    415         vec_gen_2(opc, type, vece, ri, ai);
    416     } else if (can < 0) {
    417         const TCGOpcode *hold_list = tcg_swap_vecop_list(NULL);
    418         tcg_expand_vec_op(opc, type, vece, ri, ai);
    419         tcg_swap_vecop_list(hold_list);
    420     } else {
    421         return false;
    422     }
    423     return true;
    424 }
    425 
    426 void tcg_gen_not_vec(unsigned vece, TCGv_vec r, TCGv_vec a)
    427 {
    428     const TCGOpcode *hold_list = tcg_swap_vecop_list(NULL);
    429 
    430     if (!TCG_TARGET_HAS_not_vec || !do_op2(vece, r, a, INDEX_op_not_vec)) {
    431         TCGv_vec t = tcg_const_ones_vec_matching(r);
    432         tcg_gen_xor_vec(0, r, a, t);
    433         tcg_temp_free_vec(t);
    434     }
    435     tcg_swap_vecop_list(hold_list);
    436 }
    437 
    438 void tcg_gen_neg_vec(unsigned vece, TCGv_vec r, TCGv_vec a)
    439 {
    440     const TCGOpcode *hold_list;
    441 
    442     tcg_assert_listed_vecop(INDEX_op_neg_vec);
    443     hold_list = tcg_swap_vecop_list(NULL);
    444 
    445     if (!TCG_TARGET_HAS_neg_vec || !do_op2(vece, r, a, INDEX_op_neg_vec)) {
    446         TCGv_vec t = tcg_const_zeros_vec_matching(r);
    447         tcg_gen_sub_vec(vece, r, t, a);
    448         tcg_temp_free_vec(t);
    449     }
    450     tcg_swap_vecop_list(hold_list);
    451 }
    452 
    453 void tcg_gen_abs_vec(unsigned vece, TCGv_vec r, TCGv_vec a)
    454 {
    455     const TCGOpcode *hold_list;
    456 
    457     tcg_assert_listed_vecop(INDEX_op_abs_vec);
    458     hold_list = tcg_swap_vecop_list(NULL);
    459 
    460     if (!do_op2(vece, r, a, INDEX_op_abs_vec)) {
    461         TCGType type = tcgv_vec_temp(r)->base_type;
    462         TCGv_vec t = tcg_temp_new_vec(type);
    463 
    464         tcg_debug_assert(tcg_can_emit_vec_op(INDEX_op_sub_vec, type, vece));
    465         if (tcg_can_emit_vec_op(INDEX_op_smax_vec, type, vece) > 0) {
    466             tcg_gen_neg_vec(vece, t, a);
    467             tcg_gen_smax_vec(vece, r, a, t);
    468         } else {
    469             if (tcg_can_emit_vec_op(INDEX_op_sari_vec, type, vece) > 0) {
    470                 tcg_gen_sari_vec(vece, t, a, (8 << vece) - 1);
    471             } else {
    472                 tcg_gen_cmp_vec(TCG_COND_LT, vece, t, a,
    473                                 tcg_constant_vec(type, vece, 0));
    474             }
    475             tcg_gen_xor_vec(vece, r, a, t);
    476             tcg_gen_sub_vec(vece, r, r, t);
    477         }
    478 
    479         tcg_temp_free_vec(t);
    480     }
    481     tcg_swap_vecop_list(hold_list);
    482 }
    483 
    484 static void do_shifti(TCGOpcode opc, unsigned vece,
    485                       TCGv_vec r, TCGv_vec a, int64_t i)
    486 {
    487     TCGTemp *rt = tcgv_vec_temp(r);
    488     TCGTemp *at = tcgv_vec_temp(a);
    489     TCGArg ri = temp_arg(rt);
    490     TCGArg ai = temp_arg(at);
    491     TCGType type = rt->base_type;
    492     int can;
    493 
    494     tcg_debug_assert(at->base_type == type);
    495     tcg_debug_assert(i >= 0 && i < (8 << vece));
    496     tcg_assert_listed_vecop(opc);
    497 
    498     if (i == 0) {
    499         tcg_gen_mov_vec(r, a);
    500         return;
    501     }
    502 
    503     can = tcg_can_emit_vec_op(opc, type, vece);
    504     if (can > 0) {
    505         vec_gen_3(opc, type, vece, ri, ai, i);
    506     } else {
    507         /* We leave the choice of expansion via scalar or vector shift
    508            to the target.  Often, but not always, dupi can feed a vector
    509            shift easier than a scalar.  */
    510         const TCGOpcode *hold_list = tcg_swap_vecop_list(NULL);
    511         tcg_debug_assert(can < 0);
    512         tcg_expand_vec_op(opc, type, vece, ri, ai, i);
    513         tcg_swap_vecop_list(hold_list);
    514     }
    515 }
    516 
    517 void tcg_gen_shli_vec(unsigned vece, TCGv_vec r, TCGv_vec a, int64_t i)
    518 {
    519     do_shifti(INDEX_op_shli_vec, vece, r, a, i);
    520 }
    521 
    522 void tcg_gen_shri_vec(unsigned vece, TCGv_vec r, TCGv_vec a, int64_t i)
    523 {
    524     do_shifti(INDEX_op_shri_vec, vece, r, a, i);
    525 }
    526 
    527 void tcg_gen_sari_vec(unsigned vece, TCGv_vec r, TCGv_vec a, int64_t i)
    528 {
    529     do_shifti(INDEX_op_sari_vec, vece, r, a, i);
    530 }
    531 
    532 void tcg_gen_rotli_vec(unsigned vece, TCGv_vec r, TCGv_vec a, int64_t i)
    533 {
    534     do_shifti(INDEX_op_rotli_vec, vece, r, a, i);
    535 }
    536 
    537 void tcg_gen_rotri_vec(unsigned vece, TCGv_vec r, TCGv_vec a, int64_t i)
    538 {
    539     int bits = 8 << vece;
    540     tcg_debug_assert(i >= 0 && i < bits);
    541     do_shifti(INDEX_op_rotli_vec, vece, r, a, -i & (bits - 1));
    542 }
    543 
    544 void tcg_gen_cmp_vec(TCGCond cond, unsigned vece,
    545                      TCGv_vec r, TCGv_vec a, TCGv_vec b)
    546 {
    547     TCGTemp *rt = tcgv_vec_temp(r);
    548     TCGTemp *at = tcgv_vec_temp(a);
    549     TCGTemp *bt = tcgv_vec_temp(b);
    550     TCGArg ri = temp_arg(rt);
    551     TCGArg ai = temp_arg(at);
    552     TCGArg bi = temp_arg(bt);
    553     TCGType type = rt->base_type;
    554     int can;
    555 
    556     tcg_debug_assert(at->base_type >= type);
    557     tcg_debug_assert(bt->base_type >= type);
    558     tcg_assert_listed_vecop(INDEX_op_cmp_vec);
    559     can = tcg_can_emit_vec_op(INDEX_op_cmp_vec, type, vece);
    560     if (can > 0) {
    561         vec_gen_4(INDEX_op_cmp_vec, type, vece, ri, ai, bi, cond);
    562     } else {
    563         const TCGOpcode *hold_list = tcg_swap_vecop_list(NULL);
    564         tcg_debug_assert(can < 0);
    565         tcg_expand_vec_op(INDEX_op_cmp_vec, type, vece, ri, ai, bi, cond);
    566         tcg_swap_vecop_list(hold_list);
    567     }
    568 }
    569 
    570 static bool do_op3(unsigned vece, TCGv_vec r, TCGv_vec a,
    571                    TCGv_vec b, TCGOpcode opc)
    572 {
    573     TCGTemp *rt = tcgv_vec_temp(r);
    574     TCGTemp *at = tcgv_vec_temp(a);
    575     TCGTemp *bt = tcgv_vec_temp(b);
    576     TCGArg ri = temp_arg(rt);
    577     TCGArg ai = temp_arg(at);
    578     TCGArg bi = temp_arg(bt);
    579     TCGType type = rt->base_type;
    580     int can;
    581 
    582     tcg_debug_assert(at->base_type >= type);
    583     tcg_debug_assert(bt->base_type >= type);
    584     tcg_assert_listed_vecop(opc);
    585     can = tcg_can_emit_vec_op(opc, type, vece);
    586     if (can > 0) {
    587         vec_gen_3(opc, type, vece, ri, ai, bi);
    588     } else if (can < 0) {
    589         const TCGOpcode *hold_list = tcg_swap_vecop_list(NULL);
    590         tcg_expand_vec_op(opc, type, vece, ri, ai, bi);
    591         tcg_swap_vecop_list(hold_list);
    592     } else {
    593         return false;
    594     }
    595     return true;
    596 }
    597 
    598 static void do_op3_nofail(unsigned vece, TCGv_vec r, TCGv_vec a,
    599                           TCGv_vec b, TCGOpcode opc)
    600 {
    601     bool ok = do_op3(vece, r, a, b, opc);
    602     tcg_debug_assert(ok);
    603 }
    604 
    605 void tcg_gen_add_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b)
    606 {
    607     do_op3_nofail(vece, r, a, b, INDEX_op_add_vec);
    608 }
    609 
    610 void tcg_gen_sub_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b)
    611 {
    612     do_op3_nofail(vece, r, a, b, INDEX_op_sub_vec);
    613 }
    614 
    615 void tcg_gen_mul_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b)
    616 {
    617     do_op3_nofail(vece, r, a, b, INDEX_op_mul_vec);
    618 }
    619 
    620 void tcg_gen_ssadd_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b)
    621 {
    622     do_op3_nofail(vece, r, a, b, INDEX_op_ssadd_vec);
    623 }
    624 
    625 void tcg_gen_usadd_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b)
    626 {
    627     if (!do_op3(vece, r, a, b, INDEX_op_usadd_vec)) {
    628         const TCGOpcode *hold_list = tcg_swap_vecop_list(NULL);
    629         TCGv_vec t = tcg_temp_new_vec_matching(r);
    630 
    631         /* usadd(a, b) = min(a, ~b) + b */
    632         tcg_gen_not_vec(vece, t, b);
    633         tcg_gen_umin_vec(vece, t, t, a);
    634         tcg_gen_add_vec(vece, r, t, b);
    635 
    636         tcg_temp_free_vec(t);
    637         tcg_swap_vecop_list(hold_list);
    638     }
    639 }
    640 
    641 void tcg_gen_sssub_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b)
    642 {
    643     do_op3_nofail(vece, r, a, b, INDEX_op_sssub_vec);
    644 }
    645 
    646 void tcg_gen_ussub_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b)
    647 {
    648     if (!do_op3(vece, r, a, b, INDEX_op_ussub_vec)) {
    649         const TCGOpcode *hold_list = tcg_swap_vecop_list(NULL);
    650         TCGv_vec t = tcg_temp_new_vec_matching(r);
    651 
    652         /* ussub(a, b) = max(a, b) - b */
    653         tcg_gen_umax_vec(vece, t, a, b);
    654         tcg_gen_sub_vec(vece, r, t, b);
    655 
    656         tcg_temp_free_vec(t);
    657         tcg_swap_vecop_list(hold_list);
    658     }
    659 }
    660 
    661 static void do_minmax(unsigned vece, TCGv_vec r, TCGv_vec a,
    662                       TCGv_vec b, TCGOpcode opc, TCGCond cond)
    663 {
    664     if (!do_op3(vece, r, a, b, opc)) {
    665         const TCGOpcode *hold_list = tcg_swap_vecop_list(NULL);
    666         tcg_gen_cmpsel_vec(cond, vece, r, a, b, a, b);
    667         tcg_swap_vecop_list(hold_list);
    668     }
    669 }
    670 
    671 void tcg_gen_smin_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b)
    672 {
    673     do_minmax(vece, r, a, b, INDEX_op_smin_vec, TCG_COND_LT);
    674 }
    675 
    676 void tcg_gen_umin_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b)
    677 {
    678     do_minmax(vece, r, a, b, INDEX_op_umin_vec, TCG_COND_LTU);
    679 }
    680 
    681 void tcg_gen_smax_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b)
    682 {
    683     do_minmax(vece, r, a, b, INDEX_op_smax_vec, TCG_COND_GT);
    684 }
    685 
    686 void tcg_gen_umax_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b)
    687 {
    688     do_minmax(vece, r, a, b, INDEX_op_umax_vec, TCG_COND_GTU);
    689 }
    690 
    691 void tcg_gen_shlv_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b)
    692 {
    693     do_op3_nofail(vece, r, a, b, INDEX_op_shlv_vec);
    694 }
    695 
    696 void tcg_gen_shrv_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b)
    697 {
    698     do_op3_nofail(vece, r, a, b, INDEX_op_shrv_vec);
    699 }
    700 
    701 void tcg_gen_sarv_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b)
    702 {
    703     do_op3_nofail(vece, r, a, b, INDEX_op_sarv_vec);
    704 }
    705 
    706 void tcg_gen_rotlv_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b)
    707 {
    708     do_op3_nofail(vece, r, a, b, INDEX_op_rotlv_vec);
    709 }
    710 
    711 void tcg_gen_rotrv_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b)
    712 {
    713     do_op3_nofail(vece, r, a, b, INDEX_op_rotrv_vec);
    714 }
    715 
    716 static void do_shifts(unsigned vece, TCGv_vec r, TCGv_vec a,
    717                       TCGv_i32 s, TCGOpcode opc)
    718 {
    719     TCGTemp *rt = tcgv_vec_temp(r);
    720     TCGTemp *at = tcgv_vec_temp(a);
    721     TCGTemp *st = tcgv_i32_temp(s);
    722     TCGArg ri = temp_arg(rt);
    723     TCGArg ai = temp_arg(at);
    724     TCGArg si = temp_arg(st);
    725     TCGType type = rt->base_type;
    726     int can;
    727 
    728     tcg_debug_assert(at->base_type >= type);
    729     tcg_assert_listed_vecop(opc);
    730     can = tcg_can_emit_vec_op(opc, type, vece);
    731     if (can > 0) {
    732         vec_gen_3(opc, type, vece, ri, ai, si);
    733     } else if (can < 0) {
    734         const TCGOpcode *hold_list = tcg_swap_vecop_list(NULL);
    735         tcg_expand_vec_op(opc, type, vece, ri, ai, si);
    736         tcg_swap_vecop_list(hold_list);
    737     } else {
    738         g_assert_not_reached();
    739     }
    740 }
    741 
    742 void tcg_gen_shls_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_i32 b)
    743 {
    744     do_shifts(vece, r, a, b, INDEX_op_shls_vec);
    745 }
    746 
    747 void tcg_gen_shrs_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_i32 b)
    748 {
    749     do_shifts(vece, r, a, b, INDEX_op_shrs_vec);
    750 }
    751 
    752 void tcg_gen_sars_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_i32 b)
    753 {
    754     do_shifts(vece, r, a, b, INDEX_op_sars_vec);
    755 }
    756 
    757 void tcg_gen_rotls_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_i32 s)
    758 {
    759     do_shifts(vece, r, a, s, INDEX_op_rotls_vec);
    760 }
    761 
    762 void tcg_gen_bitsel_vec(unsigned vece, TCGv_vec r, TCGv_vec a,
    763                         TCGv_vec b, TCGv_vec c)
    764 {
    765     TCGTemp *rt = tcgv_vec_temp(r);
    766     TCGTemp *at = tcgv_vec_temp(a);
    767     TCGTemp *bt = tcgv_vec_temp(b);
    768     TCGTemp *ct = tcgv_vec_temp(c);
    769     TCGType type = rt->base_type;
    770 
    771     tcg_debug_assert(at->base_type >= type);
    772     tcg_debug_assert(bt->base_type >= type);
    773     tcg_debug_assert(ct->base_type >= type);
    774 
    775     if (TCG_TARGET_HAS_bitsel_vec) {
    776         vec_gen_4(INDEX_op_bitsel_vec, type, MO_8,
    777                   temp_arg(rt), temp_arg(at), temp_arg(bt), temp_arg(ct));
    778     } else {
    779         TCGv_vec t = tcg_temp_new_vec(type);
    780         tcg_gen_and_vec(MO_8, t, a, b);
    781         tcg_gen_andc_vec(MO_8, r, c, a);
    782         tcg_gen_or_vec(MO_8, r, r, t);
    783         tcg_temp_free_vec(t);
    784     }
    785 }
    786 
    787 void tcg_gen_cmpsel_vec(TCGCond cond, unsigned vece, TCGv_vec r,
    788                         TCGv_vec a, TCGv_vec b, TCGv_vec c, TCGv_vec d)
    789 {
    790     TCGTemp *rt = tcgv_vec_temp(r);
    791     TCGTemp *at = tcgv_vec_temp(a);
    792     TCGTemp *bt = tcgv_vec_temp(b);
    793     TCGTemp *ct = tcgv_vec_temp(c);
    794     TCGTemp *dt = tcgv_vec_temp(d);
    795     TCGArg ri = temp_arg(rt);
    796     TCGArg ai = temp_arg(at);
    797     TCGArg bi = temp_arg(bt);
    798     TCGArg ci = temp_arg(ct);
    799     TCGArg di = temp_arg(dt);
    800     TCGType type = rt->base_type;
    801     const TCGOpcode *hold_list;
    802     int can;
    803 
    804     tcg_debug_assert(at->base_type >= type);
    805     tcg_debug_assert(bt->base_type >= type);
    806     tcg_debug_assert(ct->base_type >= type);
    807     tcg_debug_assert(dt->base_type >= type);
    808 
    809     tcg_assert_listed_vecop(INDEX_op_cmpsel_vec);
    810     hold_list = tcg_swap_vecop_list(NULL);
    811     can = tcg_can_emit_vec_op(INDEX_op_cmpsel_vec, type, vece);
    812 
    813     if (can > 0) {
    814         vec_gen_6(INDEX_op_cmpsel_vec, type, vece, ri, ai, bi, ci, di, cond);
    815     } else if (can < 0) {
    816         tcg_expand_vec_op(INDEX_op_cmpsel_vec, type, vece,
    817                           ri, ai, bi, ci, di, cond);
    818     } else {
    819         TCGv_vec t = tcg_temp_new_vec(type);
    820         tcg_gen_cmp_vec(cond, vece, t, a, b);
    821         tcg_gen_bitsel_vec(vece, r, t, c, d);
    822         tcg_temp_free_vec(t);
    823     }
    824     tcg_swap_vecop_list(hold_list);
    825 }