qemu

FORK: QEMU emulator
git clone https://git.neptards.moe/neptards/qemu.git
Log | Files | Refs | Submodules | LICENSE

tcg-runtime-gvec.c (40491B)


      1 /*
      2  * Generic vectorized operation runtime
      3  *
      4  * Copyright (c) 2018 Linaro
      5  *
      6  * This library is free software; you can redistribute it and/or
      7  * modify it under the terms of the GNU Lesser General Public
      8  * License as published by the Free Software Foundation; either
      9  * version 2.1 of the License, or (at your option) any later version.
     10  *
     11  * This library is distributed in the hope that it will be useful,
     12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
     13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
     14  * Lesser General Public License for more details.
     15  *
     16  * You should have received a copy of the GNU Lesser General Public
     17  * License along with this library; if not, see <http://www.gnu.org/licenses/>.
     18  */
     19 
     20 #include "qemu/osdep.h"
     21 #include "qemu/host-utils.h"
     22 #include "cpu.h"
     23 #include "exec/helper-proto.h"
     24 #include "tcg/tcg-gvec-desc.h"
     25 
     26 
     27 static inline void clear_high(void *d, intptr_t oprsz, uint32_t desc)
     28 {
     29     intptr_t maxsz = simd_maxsz(desc);
     30     intptr_t i;
     31 
     32     if (unlikely(maxsz > oprsz)) {
     33         for (i = oprsz; i < maxsz; i += sizeof(uint64_t)) {
     34             *(uint64_t *)(d + i) = 0;
     35         }
     36     }
     37 }
     38 
     39 void HELPER(gvec_add8)(void *d, void *a, void *b, uint32_t desc)
     40 {
     41     intptr_t oprsz = simd_oprsz(desc);
     42     intptr_t i;
     43 
     44     for (i = 0; i < oprsz; i += sizeof(uint8_t)) {
     45         *(uint8_t *)(d + i) = *(uint8_t *)(a + i) + *(uint8_t *)(b + i);
     46     }
     47     clear_high(d, oprsz, desc);
     48 }
     49 
     50 void HELPER(gvec_add16)(void *d, void *a, void *b, uint32_t desc)
     51 {
     52     intptr_t oprsz = simd_oprsz(desc);
     53     intptr_t i;
     54 
     55     for (i = 0; i < oprsz; i += sizeof(uint16_t)) {
     56         *(uint16_t *)(d + i) = *(uint16_t *)(a + i) + *(uint16_t *)(b + i);
     57     }
     58     clear_high(d, oprsz, desc);
     59 }
     60 
     61 void HELPER(gvec_add32)(void *d, void *a, void *b, uint32_t desc)
     62 {
     63     intptr_t oprsz = simd_oprsz(desc);
     64     intptr_t i;
     65 
     66     for (i = 0; i < oprsz; i += sizeof(uint32_t)) {
     67         *(uint32_t *)(d + i) = *(uint32_t *)(a + i) + *(uint32_t *)(b + i);
     68     }
     69     clear_high(d, oprsz, desc);
     70 }
     71 
     72 void HELPER(gvec_add64)(void *d, void *a, void *b, uint32_t desc)
     73 {
     74     intptr_t oprsz = simd_oprsz(desc);
     75     intptr_t i;
     76 
     77     for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
     78         *(uint64_t *)(d + i) = *(uint64_t *)(a + i) + *(uint64_t *)(b + i);
     79     }
     80     clear_high(d, oprsz, desc);
     81 }
     82 
     83 void HELPER(gvec_adds8)(void *d, void *a, uint64_t b, uint32_t desc)
     84 {
     85     intptr_t oprsz = simd_oprsz(desc);
     86     intptr_t i;
     87 
     88     for (i = 0; i < oprsz; i += sizeof(uint8_t)) {
     89         *(uint8_t *)(d + i) = *(uint8_t *)(a + i) + (uint8_t)b;
     90     }
     91     clear_high(d, oprsz, desc);
     92 }
     93 
     94 void HELPER(gvec_adds16)(void *d, void *a, uint64_t b, uint32_t desc)
     95 {
     96     intptr_t oprsz = simd_oprsz(desc);
     97     intptr_t i;
     98 
     99     for (i = 0; i < oprsz; i += sizeof(uint16_t)) {
    100         *(uint16_t *)(d + i) = *(uint16_t *)(a + i) + (uint16_t)b;
    101     }
    102     clear_high(d, oprsz, desc);
    103 }
    104 
    105 void HELPER(gvec_adds32)(void *d, void *a, uint64_t b, uint32_t desc)
    106 {
    107     intptr_t oprsz = simd_oprsz(desc);
    108     intptr_t i;
    109 
    110     for (i = 0; i < oprsz; i += sizeof(uint32_t)) {
    111         *(uint32_t *)(d + i) = *(uint32_t *)(a + i) + (uint32_t)b;
    112     }
    113     clear_high(d, oprsz, desc);
    114 }
    115 
    116 void HELPER(gvec_adds64)(void *d, void *a, uint64_t b, uint32_t desc)
    117 {
    118     intptr_t oprsz = simd_oprsz(desc);
    119     intptr_t i;
    120 
    121     for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
    122         *(uint64_t *)(d + i) = *(uint64_t *)(a + i) + b;
    123     }
    124     clear_high(d, oprsz, desc);
    125 }
    126 
    127 void HELPER(gvec_sub8)(void *d, void *a, void *b, uint32_t desc)
    128 {
    129     intptr_t oprsz = simd_oprsz(desc);
    130     intptr_t i;
    131 
    132     for (i = 0; i < oprsz; i += sizeof(uint8_t)) {
    133         *(uint8_t *)(d + i) = *(uint8_t *)(a + i) - *(uint8_t *)(b + i);
    134     }
    135     clear_high(d, oprsz, desc);
    136 }
    137 
    138 void HELPER(gvec_sub16)(void *d, void *a, void *b, uint32_t desc)
    139 {
    140     intptr_t oprsz = simd_oprsz(desc);
    141     intptr_t i;
    142 
    143     for (i = 0; i < oprsz; i += sizeof(uint16_t)) {
    144         *(uint16_t *)(d + i) = *(uint16_t *)(a + i) - *(uint16_t *)(b + i);
    145     }
    146     clear_high(d, oprsz, desc);
    147 }
    148 
    149 void HELPER(gvec_sub32)(void *d, void *a, void *b, uint32_t desc)
    150 {
    151     intptr_t oprsz = simd_oprsz(desc);
    152     intptr_t i;
    153 
    154     for (i = 0; i < oprsz; i += sizeof(uint32_t)) {
    155         *(uint32_t *)(d + i) = *(uint32_t *)(a + i) - *(uint32_t *)(b + i);
    156     }
    157     clear_high(d, oprsz, desc);
    158 }
    159 
    160 void HELPER(gvec_sub64)(void *d, void *a, void *b, uint32_t desc)
    161 {
    162     intptr_t oprsz = simd_oprsz(desc);
    163     intptr_t i;
    164 
    165     for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
    166         *(uint64_t *)(d + i) = *(uint64_t *)(a + i) - *(uint64_t *)(b + i);
    167     }
    168     clear_high(d, oprsz, desc);
    169 }
    170 
    171 void HELPER(gvec_subs8)(void *d, void *a, uint64_t b, uint32_t desc)
    172 {
    173     intptr_t oprsz = simd_oprsz(desc);
    174     intptr_t i;
    175 
    176     for (i = 0; i < oprsz; i += sizeof(uint8_t)) {
    177         *(uint8_t *)(d + i) = *(uint8_t *)(a + i) - (uint8_t)b;
    178     }
    179     clear_high(d, oprsz, desc);
    180 }
    181 
    182 void HELPER(gvec_subs16)(void *d, void *a, uint64_t b, uint32_t desc)
    183 {
    184     intptr_t oprsz = simd_oprsz(desc);
    185     intptr_t i;
    186 
    187     for (i = 0; i < oprsz; i += sizeof(uint16_t)) {
    188         *(uint16_t *)(d + i) = *(uint16_t *)(a + i) - (uint16_t)b;
    189     }
    190     clear_high(d, oprsz, desc);
    191 }
    192 
    193 void HELPER(gvec_subs32)(void *d, void *a, uint64_t b, uint32_t desc)
    194 {
    195     intptr_t oprsz = simd_oprsz(desc);
    196     intptr_t i;
    197 
    198     for (i = 0; i < oprsz; i += sizeof(uint32_t)) {
    199         *(uint32_t *)(d + i) = *(uint32_t *)(a + i) - (uint32_t)b;
    200     }
    201     clear_high(d, oprsz, desc);
    202 }
    203 
    204 void HELPER(gvec_subs64)(void *d, void *a, uint64_t b, uint32_t desc)
    205 {
    206     intptr_t oprsz = simd_oprsz(desc);
    207     intptr_t i;
    208 
    209     for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
    210         *(uint64_t *)(d + i) = *(uint64_t *)(a + i) - b;
    211     }
    212     clear_high(d, oprsz, desc);
    213 }
    214 
    215 void HELPER(gvec_mul8)(void *d, void *a, void *b, uint32_t desc)
    216 {
    217     intptr_t oprsz = simd_oprsz(desc);
    218     intptr_t i;
    219 
    220     for (i = 0; i < oprsz; i += sizeof(uint8_t)) {
    221         *(uint8_t *)(d + i) = *(uint8_t *)(a + i) * *(uint8_t *)(b + i);
    222     }
    223     clear_high(d, oprsz, desc);
    224 }
    225 
    226 void HELPER(gvec_mul16)(void *d, void *a, void *b, uint32_t desc)
    227 {
    228     intptr_t oprsz = simd_oprsz(desc);
    229     intptr_t i;
    230 
    231     for (i = 0; i < oprsz; i += sizeof(uint16_t)) {
    232         *(uint16_t *)(d + i) = *(uint16_t *)(a + i) * *(uint16_t *)(b + i);
    233     }
    234     clear_high(d, oprsz, desc);
    235 }
    236 
    237 void HELPER(gvec_mul32)(void *d, void *a, void *b, uint32_t desc)
    238 {
    239     intptr_t oprsz = simd_oprsz(desc);
    240     intptr_t i;
    241 
    242     for (i = 0; i < oprsz; i += sizeof(uint32_t)) {
    243         *(uint32_t *)(d + i) = *(uint32_t *)(a + i) * *(uint32_t *)(b + i);
    244     }
    245     clear_high(d, oprsz, desc);
    246 }
    247 
    248 void HELPER(gvec_mul64)(void *d, void *a, void *b, uint32_t desc)
    249 {
    250     intptr_t oprsz = simd_oprsz(desc);
    251     intptr_t i;
    252 
    253     for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
    254         *(uint64_t *)(d + i) = *(uint64_t *)(a + i) * *(uint64_t *)(b + i);
    255     }
    256     clear_high(d, oprsz, desc);
    257 }
    258 
    259 void HELPER(gvec_muls8)(void *d, void *a, uint64_t b, uint32_t desc)
    260 {
    261     intptr_t oprsz = simd_oprsz(desc);
    262     intptr_t i;
    263 
    264     for (i = 0; i < oprsz; i += sizeof(uint8_t)) {
    265         *(uint8_t *)(d + i) = *(uint8_t *)(a + i) * (uint8_t)b;
    266     }
    267     clear_high(d, oprsz, desc);
    268 }
    269 
    270 void HELPER(gvec_muls16)(void *d, void *a, uint64_t b, uint32_t desc)
    271 {
    272     intptr_t oprsz = simd_oprsz(desc);
    273     intptr_t i;
    274 
    275     for (i = 0; i < oprsz; i += sizeof(uint16_t)) {
    276         *(uint16_t *)(d + i) = *(uint16_t *)(a + i) * (uint16_t)b;
    277     }
    278     clear_high(d, oprsz, desc);
    279 }
    280 
    281 void HELPER(gvec_muls32)(void *d, void *a, uint64_t b, uint32_t desc)
    282 {
    283     intptr_t oprsz = simd_oprsz(desc);
    284     intptr_t i;
    285 
    286     for (i = 0; i < oprsz; i += sizeof(uint32_t)) {
    287         *(uint32_t *)(d + i) = *(uint32_t *)(a + i) * (uint32_t)b;
    288     }
    289     clear_high(d, oprsz, desc);
    290 }
    291 
    292 void HELPER(gvec_muls64)(void *d, void *a, uint64_t b, uint32_t desc)
    293 {
    294     intptr_t oprsz = simd_oprsz(desc);
    295     intptr_t i;
    296 
    297     for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
    298         *(uint64_t *)(d + i) = *(uint64_t *)(a + i) * b;
    299     }
    300     clear_high(d, oprsz, desc);
    301 }
    302 
    303 void HELPER(gvec_neg8)(void *d, void *a, uint32_t desc)
    304 {
    305     intptr_t oprsz = simd_oprsz(desc);
    306     intptr_t i;
    307 
    308     for (i = 0; i < oprsz; i += sizeof(uint8_t)) {
    309         *(uint8_t *)(d + i) = -*(uint8_t *)(a + i);
    310     }
    311     clear_high(d, oprsz, desc);
    312 }
    313 
    314 void HELPER(gvec_neg16)(void *d, void *a, uint32_t desc)
    315 {
    316     intptr_t oprsz = simd_oprsz(desc);
    317     intptr_t i;
    318 
    319     for (i = 0; i < oprsz; i += sizeof(uint16_t)) {
    320         *(uint16_t *)(d + i) = -*(uint16_t *)(a + i);
    321     }
    322     clear_high(d, oprsz, desc);
    323 }
    324 
    325 void HELPER(gvec_neg32)(void *d, void *a, uint32_t desc)
    326 {
    327     intptr_t oprsz = simd_oprsz(desc);
    328     intptr_t i;
    329 
    330     for (i = 0; i < oprsz; i += sizeof(uint32_t)) {
    331         *(uint32_t *)(d + i) = -*(uint32_t *)(a + i);
    332     }
    333     clear_high(d, oprsz, desc);
    334 }
    335 
    336 void HELPER(gvec_neg64)(void *d, void *a, uint32_t desc)
    337 {
    338     intptr_t oprsz = simd_oprsz(desc);
    339     intptr_t i;
    340 
    341     for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
    342         *(uint64_t *)(d + i) = -*(uint64_t *)(a + i);
    343     }
    344     clear_high(d, oprsz, desc);
    345 }
    346 
    347 void HELPER(gvec_abs8)(void *d, void *a, uint32_t desc)
    348 {
    349     intptr_t oprsz = simd_oprsz(desc);
    350     intptr_t i;
    351 
    352     for (i = 0; i < oprsz; i += sizeof(int8_t)) {
    353         int8_t aa = *(int8_t *)(a + i);
    354         *(int8_t *)(d + i) = aa < 0 ? -aa : aa;
    355     }
    356     clear_high(d, oprsz, desc);
    357 }
    358 
    359 void HELPER(gvec_abs16)(void *d, void *a, uint32_t desc)
    360 {
    361     intptr_t oprsz = simd_oprsz(desc);
    362     intptr_t i;
    363 
    364     for (i = 0; i < oprsz; i += sizeof(int16_t)) {
    365         int16_t aa = *(int16_t *)(a + i);
    366         *(int16_t *)(d + i) = aa < 0 ? -aa : aa;
    367     }
    368     clear_high(d, oprsz, desc);
    369 }
    370 
    371 void HELPER(gvec_abs32)(void *d, void *a, uint32_t desc)
    372 {
    373     intptr_t oprsz = simd_oprsz(desc);
    374     intptr_t i;
    375 
    376     for (i = 0; i < oprsz; i += sizeof(int32_t)) {
    377         int32_t aa = *(int32_t *)(a + i);
    378         *(int32_t *)(d + i) = aa < 0 ? -aa : aa;
    379     }
    380     clear_high(d, oprsz, desc);
    381 }
    382 
    383 void HELPER(gvec_abs64)(void *d, void *a, uint32_t desc)
    384 {
    385     intptr_t oprsz = simd_oprsz(desc);
    386     intptr_t i;
    387 
    388     for (i = 0; i < oprsz; i += sizeof(int64_t)) {
    389         int64_t aa = *(int64_t *)(a + i);
    390         *(int64_t *)(d + i) = aa < 0 ? -aa : aa;
    391     }
    392     clear_high(d, oprsz, desc);
    393 }
    394 
    395 void HELPER(gvec_mov)(void *d, void *a, uint32_t desc)
    396 {
    397     intptr_t oprsz = simd_oprsz(desc);
    398 
    399     memcpy(d, a, oprsz);
    400     clear_high(d, oprsz, desc);
    401 }
    402 
    403 void HELPER(gvec_dup64)(void *d, uint32_t desc, uint64_t c)
    404 {
    405     intptr_t oprsz = simd_oprsz(desc);
    406     intptr_t i;
    407 
    408     if (c == 0) {
    409         oprsz = 0;
    410     } else {
    411         for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
    412             *(uint64_t *)(d + i) = c;
    413         }
    414     }
    415     clear_high(d, oprsz, desc);
    416 }
    417 
    418 void HELPER(gvec_dup32)(void *d, uint32_t desc, uint32_t c)
    419 {
    420     intptr_t oprsz = simd_oprsz(desc);
    421     intptr_t i;
    422 
    423     if (c == 0) {
    424         oprsz = 0;
    425     } else {
    426         for (i = 0; i < oprsz; i += sizeof(uint32_t)) {
    427             *(uint32_t *)(d + i) = c;
    428         }
    429     }
    430     clear_high(d, oprsz, desc);
    431 }
    432 
    433 void HELPER(gvec_dup16)(void *d, uint32_t desc, uint32_t c)
    434 {
    435     HELPER(gvec_dup32)(d, desc, 0x00010001 * (c & 0xffff));
    436 }
    437 
    438 void HELPER(gvec_dup8)(void *d, uint32_t desc, uint32_t c)
    439 {
    440     HELPER(gvec_dup32)(d, desc, 0x01010101 * (c & 0xff));
    441 }
    442 
    443 void HELPER(gvec_not)(void *d, void *a, uint32_t desc)
    444 {
    445     intptr_t oprsz = simd_oprsz(desc);
    446     intptr_t i;
    447 
    448     for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
    449         *(uint64_t *)(d + i) = ~*(uint64_t *)(a + i);
    450     }
    451     clear_high(d, oprsz, desc);
    452 }
    453 
    454 void HELPER(gvec_and)(void *d, void *a, void *b, uint32_t desc)
    455 {
    456     intptr_t oprsz = simd_oprsz(desc);
    457     intptr_t i;
    458 
    459     for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
    460         *(uint64_t *)(d + i) = *(uint64_t *)(a + i) & *(uint64_t *)(b + i);
    461     }
    462     clear_high(d, oprsz, desc);
    463 }
    464 
    465 void HELPER(gvec_or)(void *d, void *a, void *b, uint32_t desc)
    466 {
    467     intptr_t oprsz = simd_oprsz(desc);
    468     intptr_t i;
    469 
    470     for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
    471         *(uint64_t *)(d + i) = *(uint64_t *)(a + i) | *(uint64_t *)(b + i);
    472     }
    473     clear_high(d, oprsz, desc);
    474 }
    475 
    476 void HELPER(gvec_xor)(void *d, void *a, void *b, uint32_t desc)
    477 {
    478     intptr_t oprsz = simd_oprsz(desc);
    479     intptr_t i;
    480 
    481     for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
    482         *(uint64_t *)(d + i) = *(uint64_t *)(a + i) ^ *(uint64_t *)(b + i);
    483     }
    484     clear_high(d, oprsz, desc);
    485 }
    486 
    487 void HELPER(gvec_andc)(void *d, void *a, void *b, uint32_t desc)
    488 {
    489     intptr_t oprsz = simd_oprsz(desc);
    490     intptr_t i;
    491 
    492     for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
    493         *(uint64_t *)(d + i) = *(uint64_t *)(a + i) &~ *(uint64_t *)(b + i);
    494     }
    495     clear_high(d, oprsz, desc);
    496 }
    497 
    498 void HELPER(gvec_orc)(void *d, void *a, void *b, uint32_t desc)
    499 {
    500     intptr_t oprsz = simd_oprsz(desc);
    501     intptr_t i;
    502 
    503     for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
    504         *(uint64_t *)(d + i) = *(uint64_t *)(a + i) |~ *(uint64_t *)(b + i);
    505     }
    506     clear_high(d, oprsz, desc);
    507 }
    508 
    509 void HELPER(gvec_nand)(void *d, void *a, void *b, uint32_t desc)
    510 {
    511     intptr_t oprsz = simd_oprsz(desc);
    512     intptr_t i;
    513 
    514     for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
    515         *(uint64_t *)(d + i) = ~(*(uint64_t *)(a + i) & *(uint64_t *)(b + i));
    516     }
    517     clear_high(d, oprsz, desc);
    518 }
    519 
    520 void HELPER(gvec_nor)(void *d, void *a, void *b, uint32_t desc)
    521 {
    522     intptr_t oprsz = simd_oprsz(desc);
    523     intptr_t i;
    524 
    525     for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
    526         *(uint64_t *)(d + i) = ~(*(uint64_t *)(a + i) | *(uint64_t *)(b + i));
    527     }
    528     clear_high(d, oprsz, desc);
    529 }
    530 
    531 void HELPER(gvec_eqv)(void *d, void *a, void *b, uint32_t desc)
    532 {
    533     intptr_t oprsz = simd_oprsz(desc);
    534     intptr_t i;
    535 
    536     for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
    537         *(uint64_t *)(d + i) = ~(*(uint64_t *)(a + i) ^ *(uint64_t *)(b + i));
    538     }
    539     clear_high(d, oprsz, desc);
    540 }
    541 
    542 void HELPER(gvec_ands)(void *d, void *a, uint64_t b, uint32_t desc)
    543 {
    544     intptr_t oprsz = simd_oprsz(desc);
    545     intptr_t i;
    546 
    547     for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
    548         *(uint64_t *)(d + i) = *(uint64_t *)(a + i) & b;
    549     }
    550     clear_high(d, oprsz, desc);
    551 }
    552 
    553 void HELPER(gvec_xors)(void *d, void *a, uint64_t b, uint32_t desc)
    554 {
    555     intptr_t oprsz = simd_oprsz(desc);
    556     intptr_t i;
    557 
    558     for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
    559         *(uint64_t *)(d + i) = *(uint64_t *)(a + i) ^ b;
    560     }
    561     clear_high(d, oprsz, desc);
    562 }
    563 
    564 void HELPER(gvec_ors)(void *d, void *a, uint64_t b, uint32_t desc)
    565 {
    566     intptr_t oprsz = simd_oprsz(desc);
    567     intptr_t i;
    568 
    569     for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
    570         *(uint64_t *)(d + i) = *(uint64_t *)(a + i) | b;
    571     }
    572     clear_high(d, oprsz, desc);
    573 }
    574 
    575 void HELPER(gvec_shl8i)(void *d, void *a, uint32_t desc)
    576 {
    577     intptr_t oprsz = simd_oprsz(desc);
    578     int shift = simd_data(desc);
    579     intptr_t i;
    580 
    581     for (i = 0; i < oprsz; i += sizeof(uint8_t)) {
    582         *(uint8_t *)(d + i) = *(uint8_t *)(a + i) << shift;
    583     }
    584     clear_high(d, oprsz, desc);
    585 }
    586 
    587 void HELPER(gvec_shl16i)(void *d, void *a, uint32_t desc)
    588 {
    589     intptr_t oprsz = simd_oprsz(desc);
    590     int shift = simd_data(desc);
    591     intptr_t i;
    592 
    593     for (i = 0; i < oprsz; i += sizeof(uint16_t)) {
    594         *(uint16_t *)(d + i) = *(uint16_t *)(a + i) << shift;
    595     }
    596     clear_high(d, oprsz, desc);
    597 }
    598 
    599 void HELPER(gvec_shl32i)(void *d, void *a, uint32_t desc)
    600 {
    601     intptr_t oprsz = simd_oprsz(desc);
    602     int shift = simd_data(desc);
    603     intptr_t i;
    604 
    605     for (i = 0; i < oprsz; i += sizeof(uint32_t)) {
    606         *(uint32_t *)(d + i) = *(uint32_t *)(a + i) << shift;
    607     }
    608     clear_high(d, oprsz, desc);
    609 }
    610 
    611 void HELPER(gvec_shl64i)(void *d, void *a, uint32_t desc)
    612 {
    613     intptr_t oprsz = simd_oprsz(desc);
    614     int shift = simd_data(desc);
    615     intptr_t i;
    616 
    617     for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
    618         *(uint64_t *)(d + i) = *(uint64_t *)(a + i) << shift;
    619     }
    620     clear_high(d, oprsz, desc);
    621 }
    622 
    623 void HELPER(gvec_shr8i)(void *d, void *a, uint32_t desc)
    624 {
    625     intptr_t oprsz = simd_oprsz(desc);
    626     int shift = simd_data(desc);
    627     intptr_t i;
    628 
    629     for (i = 0; i < oprsz; i += sizeof(uint8_t)) {
    630         *(uint8_t *)(d + i) = *(uint8_t *)(a + i) >> shift;
    631     }
    632     clear_high(d, oprsz, desc);
    633 }
    634 
    635 void HELPER(gvec_shr16i)(void *d, void *a, uint32_t desc)
    636 {
    637     intptr_t oprsz = simd_oprsz(desc);
    638     int shift = simd_data(desc);
    639     intptr_t i;
    640 
    641     for (i = 0; i < oprsz; i += sizeof(uint16_t)) {
    642         *(uint16_t *)(d + i) = *(uint16_t *)(a + i) >> shift;
    643     }
    644     clear_high(d, oprsz, desc);
    645 }
    646 
    647 void HELPER(gvec_shr32i)(void *d, void *a, uint32_t desc)
    648 {
    649     intptr_t oprsz = simd_oprsz(desc);
    650     int shift = simd_data(desc);
    651     intptr_t i;
    652 
    653     for (i = 0; i < oprsz; i += sizeof(uint32_t)) {
    654         *(uint32_t *)(d + i) = *(uint32_t *)(a + i) >> shift;
    655     }
    656     clear_high(d, oprsz, desc);
    657 }
    658 
    659 void HELPER(gvec_shr64i)(void *d, void *a, uint32_t desc)
    660 {
    661     intptr_t oprsz = simd_oprsz(desc);
    662     int shift = simd_data(desc);
    663     intptr_t i;
    664 
    665     for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
    666         *(uint64_t *)(d + i) = *(uint64_t *)(a + i) >> shift;
    667     }
    668     clear_high(d, oprsz, desc);
    669 }
    670 
    671 void HELPER(gvec_sar8i)(void *d, void *a, uint32_t desc)
    672 {
    673     intptr_t oprsz = simd_oprsz(desc);
    674     int shift = simd_data(desc);
    675     intptr_t i;
    676 
    677     for (i = 0; i < oprsz; i += sizeof(uint8_t)) {
    678         *(int8_t *)(d + i) = *(int8_t *)(a + i) >> shift;
    679     }
    680     clear_high(d, oprsz, desc);
    681 }
    682 
    683 void HELPER(gvec_sar16i)(void *d, void *a, uint32_t desc)
    684 {
    685     intptr_t oprsz = simd_oprsz(desc);
    686     int shift = simd_data(desc);
    687     intptr_t i;
    688 
    689     for (i = 0; i < oprsz; i += sizeof(uint16_t)) {
    690         *(int16_t *)(d + i) = *(int16_t *)(a + i) >> shift;
    691     }
    692     clear_high(d, oprsz, desc);
    693 }
    694 
    695 void HELPER(gvec_sar32i)(void *d, void *a, uint32_t desc)
    696 {
    697     intptr_t oprsz = simd_oprsz(desc);
    698     int shift = simd_data(desc);
    699     intptr_t i;
    700 
    701     for (i = 0; i < oprsz; i += sizeof(uint32_t)) {
    702         *(int32_t *)(d + i) = *(int32_t *)(a + i) >> shift;
    703     }
    704     clear_high(d, oprsz, desc);
    705 }
    706 
    707 void HELPER(gvec_sar64i)(void *d, void *a, uint32_t desc)
    708 {
    709     intptr_t oprsz = simd_oprsz(desc);
    710     int shift = simd_data(desc);
    711     intptr_t i;
    712 
    713     for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
    714         *(int64_t *)(d + i) = *(int64_t *)(a + i) >> shift;
    715     }
    716     clear_high(d, oprsz, desc);
    717 }
    718 
    719 void HELPER(gvec_rotl8i)(void *d, void *a, uint32_t desc)
    720 {
    721     intptr_t oprsz = simd_oprsz(desc);
    722     int shift = simd_data(desc);
    723     intptr_t i;
    724 
    725     for (i = 0; i < oprsz; i += sizeof(uint8_t)) {
    726         *(uint8_t *)(d + i) = rol8(*(uint8_t *)(a + i), shift);
    727     }
    728     clear_high(d, oprsz, desc);
    729 }
    730 
    731 void HELPER(gvec_rotl16i)(void *d, void *a, uint32_t desc)
    732 {
    733     intptr_t oprsz = simd_oprsz(desc);
    734     int shift = simd_data(desc);
    735     intptr_t i;
    736 
    737     for (i = 0; i < oprsz; i += sizeof(uint16_t)) {
    738         *(uint16_t *)(d + i) = rol16(*(uint16_t *)(a + i), shift);
    739     }
    740     clear_high(d, oprsz, desc);
    741 }
    742 
    743 void HELPER(gvec_rotl32i)(void *d, void *a, uint32_t desc)
    744 {
    745     intptr_t oprsz = simd_oprsz(desc);
    746     int shift = simd_data(desc);
    747     intptr_t i;
    748 
    749     for (i = 0; i < oprsz; i += sizeof(uint32_t)) {
    750         *(uint32_t *)(d + i) = rol32(*(uint32_t *)(a + i), shift);
    751     }
    752     clear_high(d, oprsz, desc);
    753 }
    754 
    755 void HELPER(gvec_rotl64i)(void *d, void *a, uint32_t desc)
    756 {
    757     intptr_t oprsz = simd_oprsz(desc);
    758     int shift = simd_data(desc);
    759     intptr_t i;
    760 
    761     for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
    762         *(uint64_t *)(d + i) = rol64(*(uint64_t *)(a + i), shift);
    763     }
    764     clear_high(d, oprsz, desc);
    765 }
    766 
    767 void HELPER(gvec_shl8v)(void *d, void *a, void *b, uint32_t desc)
    768 {
    769     intptr_t oprsz = simd_oprsz(desc);
    770     intptr_t i;
    771 
    772     for (i = 0; i < oprsz; i += sizeof(uint8_t)) {
    773         uint8_t sh = *(uint8_t *)(b + i) & 7;
    774         *(uint8_t *)(d + i) = *(uint8_t *)(a + i) << sh;
    775     }
    776     clear_high(d, oprsz, desc);
    777 }
    778 
    779 void HELPER(gvec_shl16v)(void *d, void *a, void *b, uint32_t desc)
    780 {
    781     intptr_t oprsz = simd_oprsz(desc);
    782     intptr_t i;
    783 
    784     for (i = 0; i < oprsz; i += sizeof(uint16_t)) {
    785         uint8_t sh = *(uint16_t *)(b + i) & 15;
    786         *(uint16_t *)(d + i) = *(uint16_t *)(a + i) << sh;
    787     }
    788     clear_high(d, oprsz, desc);
    789 }
    790 
    791 void HELPER(gvec_shl32v)(void *d, void *a, void *b, uint32_t desc)
    792 {
    793     intptr_t oprsz = simd_oprsz(desc);
    794     intptr_t i;
    795 
    796     for (i = 0; i < oprsz; i += sizeof(uint32_t)) {
    797         uint8_t sh = *(uint32_t *)(b + i) & 31;
    798         *(uint32_t *)(d + i) = *(uint32_t *)(a + i) << sh;
    799     }
    800     clear_high(d, oprsz, desc);
    801 }
    802 
    803 void HELPER(gvec_shl64v)(void *d, void *a, void *b, uint32_t desc)
    804 {
    805     intptr_t oprsz = simd_oprsz(desc);
    806     intptr_t i;
    807 
    808     for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
    809         uint8_t sh = *(uint64_t *)(b + i) & 63;
    810         *(uint64_t *)(d + i) = *(uint64_t *)(a + i) << sh;
    811     }
    812     clear_high(d, oprsz, desc);
    813 }
    814 
    815 void HELPER(gvec_shr8v)(void *d, void *a, void *b, uint32_t desc)
    816 {
    817     intptr_t oprsz = simd_oprsz(desc);
    818     intptr_t i;
    819 
    820     for (i = 0; i < oprsz; i += sizeof(uint8_t)) {
    821         uint8_t sh = *(uint8_t *)(b + i) & 7;
    822         *(uint8_t *)(d + i) = *(uint8_t *)(a + i) >> sh;
    823     }
    824     clear_high(d, oprsz, desc);
    825 }
    826 
    827 void HELPER(gvec_shr16v)(void *d, void *a, void *b, uint32_t desc)
    828 {
    829     intptr_t oprsz = simd_oprsz(desc);
    830     intptr_t i;
    831 
    832     for (i = 0; i < oprsz; i += sizeof(uint16_t)) {
    833         uint8_t sh = *(uint16_t *)(b + i) & 15;
    834         *(uint16_t *)(d + i) = *(uint16_t *)(a + i) >> sh;
    835     }
    836     clear_high(d, oprsz, desc);
    837 }
    838 
    839 void HELPER(gvec_shr32v)(void *d, void *a, void *b, uint32_t desc)
    840 {
    841     intptr_t oprsz = simd_oprsz(desc);
    842     intptr_t i;
    843 
    844     for (i = 0; i < oprsz; i += sizeof(uint32_t)) {
    845         uint8_t sh = *(uint32_t *)(b + i) & 31;
    846         *(uint32_t *)(d + i) = *(uint32_t *)(a + i) >> sh;
    847     }
    848     clear_high(d, oprsz, desc);
    849 }
    850 
    851 void HELPER(gvec_shr64v)(void *d, void *a, void *b, uint32_t desc)
    852 {
    853     intptr_t oprsz = simd_oprsz(desc);
    854     intptr_t i;
    855 
    856     for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
    857         uint8_t sh = *(uint64_t *)(b + i) & 63;
    858         *(uint64_t *)(d + i) = *(uint64_t *)(a + i) >> sh;
    859     }
    860     clear_high(d, oprsz, desc);
    861 }
    862 
    863 void HELPER(gvec_sar8v)(void *d, void *a, void *b, uint32_t desc)
    864 {
    865     intptr_t oprsz = simd_oprsz(desc);
    866     intptr_t i;
    867 
    868     for (i = 0; i < oprsz; i += sizeof(int8_t)) {
    869         uint8_t sh = *(uint8_t *)(b + i) & 7;
    870         *(int8_t *)(d + i) = *(int8_t *)(a + i) >> sh;
    871     }
    872     clear_high(d, oprsz, desc);
    873 }
    874 
    875 void HELPER(gvec_sar16v)(void *d, void *a, void *b, uint32_t desc)
    876 {
    877     intptr_t oprsz = simd_oprsz(desc);
    878     intptr_t i;
    879 
    880     for (i = 0; i < oprsz; i += sizeof(int16_t)) {
    881         uint8_t sh = *(uint16_t *)(b + i) & 15;
    882         *(int16_t *)(d + i) = *(int16_t *)(a + i) >> sh;
    883     }
    884     clear_high(d, oprsz, desc);
    885 }
    886 
    887 void HELPER(gvec_sar32v)(void *d, void *a, void *b, uint32_t desc)
    888 {
    889     intptr_t oprsz = simd_oprsz(desc);
    890     intptr_t i;
    891 
    892     for (i = 0; i < oprsz; i += sizeof(int32_t)) {
    893         uint8_t sh = *(uint32_t *)(b + i) & 31;
    894         *(int32_t *)(d + i) = *(int32_t *)(a + i) >> sh;
    895     }
    896     clear_high(d, oprsz, desc);
    897 }
    898 
    899 void HELPER(gvec_sar64v)(void *d, void *a, void *b, uint32_t desc)
    900 {
    901     intptr_t oprsz = simd_oprsz(desc);
    902     intptr_t i;
    903 
    904     for (i = 0; i < oprsz; i += sizeof(int64_t)) {
    905         uint8_t sh = *(uint64_t *)(b + i) & 63;
    906         *(int64_t *)(d + i) = *(int64_t *)(a + i) >> sh;
    907     }
    908     clear_high(d, oprsz, desc);
    909 }
    910 
    911 void HELPER(gvec_rotl8v)(void *d, void *a, void *b, uint32_t desc)
    912 {
    913     intptr_t oprsz = simd_oprsz(desc);
    914     intptr_t i;
    915 
    916     for (i = 0; i < oprsz; i += sizeof(uint8_t)) {
    917         uint8_t sh = *(uint8_t *)(b + i) & 7;
    918         *(uint8_t *)(d + i) = rol8(*(uint8_t *)(a + i), sh);
    919     }
    920     clear_high(d, oprsz, desc);
    921 }
    922 
    923 void HELPER(gvec_rotl16v)(void *d, void *a, void *b, uint32_t desc)
    924 {
    925     intptr_t oprsz = simd_oprsz(desc);
    926     intptr_t i;
    927 
    928     for (i = 0; i < oprsz; i += sizeof(uint16_t)) {
    929         uint8_t sh = *(uint16_t *)(b + i) & 15;
    930         *(uint16_t *)(d + i) = rol16(*(uint16_t *)(a + i), sh);
    931     }
    932     clear_high(d, oprsz, desc);
    933 }
    934 
    935 void HELPER(gvec_rotl32v)(void *d, void *a, void *b, uint32_t desc)
    936 {
    937     intptr_t oprsz = simd_oprsz(desc);
    938     intptr_t i;
    939 
    940     for (i = 0; i < oprsz; i += sizeof(uint32_t)) {
    941         uint8_t sh = *(uint32_t *)(b + i) & 31;
    942         *(uint32_t *)(d + i) = rol32(*(uint32_t *)(a + i), sh);
    943     }
    944     clear_high(d, oprsz, desc);
    945 }
    946 
    947 void HELPER(gvec_rotl64v)(void *d, void *a, void *b, uint32_t desc)
    948 {
    949     intptr_t oprsz = simd_oprsz(desc);
    950     intptr_t i;
    951 
    952     for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
    953         uint8_t sh = *(uint64_t *)(b + i) & 63;
    954         *(uint64_t *)(d + i) = rol64(*(uint64_t *)(a + i), sh);
    955     }
    956     clear_high(d, oprsz, desc);
    957 }
    958 
    959 void HELPER(gvec_rotr8v)(void *d, void *a, void *b, uint32_t desc)
    960 {
    961     intptr_t oprsz = simd_oprsz(desc);
    962     intptr_t i;
    963 
    964     for (i = 0; i < oprsz; i += sizeof(uint8_t)) {
    965         uint8_t sh = *(uint8_t *)(b + i) & 7;
    966         *(uint8_t *)(d + i) = ror8(*(uint8_t *)(a + i), sh);
    967     }
    968     clear_high(d, oprsz, desc);
    969 }
    970 
    971 void HELPER(gvec_rotr16v)(void *d, void *a, void *b, uint32_t desc)
    972 {
    973     intptr_t oprsz = simd_oprsz(desc);
    974     intptr_t i;
    975 
    976     for (i = 0; i < oprsz; i += sizeof(uint16_t)) {
    977         uint8_t sh = *(uint16_t *)(b + i) & 15;
    978         *(uint16_t *)(d + i) = ror16(*(uint16_t *)(a + i), sh);
    979     }
    980     clear_high(d, oprsz, desc);
    981 }
    982 
    983 void HELPER(gvec_rotr32v)(void *d, void *a, void *b, uint32_t desc)
    984 {
    985     intptr_t oprsz = simd_oprsz(desc);
    986     intptr_t i;
    987 
    988     for (i = 0; i < oprsz; i += sizeof(uint32_t)) {
    989         uint8_t sh = *(uint32_t *)(b + i) & 31;
    990         *(uint32_t *)(d + i) = ror32(*(uint32_t *)(a + i), sh);
    991     }
    992     clear_high(d, oprsz, desc);
    993 }
    994 
    995 void HELPER(gvec_rotr64v)(void *d, void *a, void *b, uint32_t desc)
    996 {
    997     intptr_t oprsz = simd_oprsz(desc);
    998     intptr_t i;
    999 
   1000     for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
   1001         uint8_t sh = *(uint64_t *)(b + i) & 63;
   1002         *(uint64_t *)(d + i) = ror64(*(uint64_t *)(a + i), sh);
   1003     }
   1004     clear_high(d, oprsz, desc);
   1005 }
   1006 
   1007 #define DO_CMP1(NAME, TYPE, OP)                                            \
   1008 void HELPER(NAME)(void *d, void *a, void *b, uint32_t desc)                \
   1009 {                                                                          \
   1010     intptr_t oprsz = simd_oprsz(desc);                                     \
   1011     intptr_t i;                                                            \
   1012     for (i = 0; i < oprsz; i += sizeof(TYPE)) {                            \
   1013         *(TYPE *)(d + i) = -(*(TYPE *)(a + i) OP *(TYPE *)(b + i));        \
   1014     }                                                                      \
   1015     clear_high(d, oprsz, desc);                                            \
   1016 }
   1017 
   1018 #define DO_CMP2(SZ) \
   1019     DO_CMP1(gvec_eq##SZ, uint##SZ##_t, ==)    \
   1020     DO_CMP1(gvec_ne##SZ, uint##SZ##_t, !=)    \
   1021     DO_CMP1(gvec_lt##SZ, int##SZ##_t, <)      \
   1022     DO_CMP1(gvec_le##SZ, int##SZ##_t, <=)     \
   1023     DO_CMP1(gvec_ltu##SZ, uint##SZ##_t, <)    \
   1024     DO_CMP1(gvec_leu##SZ, uint##SZ##_t, <=)
   1025 
   1026 DO_CMP2(8)
   1027 DO_CMP2(16)
   1028 DO_CMP2(32)
   1029 DO_CMP2(64)
   1030 
   1031 #undef DO_CMP1
   1032 #undef DO_CMP2
   1033 
   1034 void HELPER(gvec_ssadd8)(void *d, void *a, void *b, uint32_t desc)
   1035 {
   1036     intptr_t oprsz = simd_oprsz(desc);
   1037     intptr_t i;
   1038 
   1039     for (i = 0; i < oprsz; i += sizeof(int8_t)) {
   1040         int r = *(int8_t *)(a + i) + *(int8_t *)(b + i);
   1041         if (r > INT8_MAX) {
   1042             r = INT8_MAX;
   1043         } else if (r < INT8_MIN) {
   1044             r = INT8_MIN;
   1045         }
   1046         *(int8_t *)(d + i) = r;
   1047     }
   1048     clear_high(d, oprsz, desc);
   1049 }
   1050 
   1051 void HELPER(gvec_ssadd16)(void *d, void *a, void *b, uint32_t desc)
   1052 {
   1053     intptr_t oprsz = simd_oprsz(desc);
   1054     intptr_t i;
   1055 
   1056     for (i = 0; i < oprsz; i += sizeof(int16_t)) {
   1057         int r = *(int16_t *)(a + i) + *(int16_t *)(b + i);
   1058         if (r > INT16_MAX) {
   1059             r = INT16_MAX;
   1060         } else if (r < INT16_MIN) {
   1061             r = INT16_MIN;
   1062         }
   1063         *(int16_t *)(d + i) = r;
   1064     }
   1065     clear_high(d, oprsz, desc);
   1066 }
   1067 
   1068 void HELPER(gvec_ssadd32)(void *d, void *a, void *b, uint32_t desc)
   1069 {
   1070     intptr_t oprsz = simd_oprsz(desc);
   1071     intptr_t i;
   1072 
   1073     for (i = 0; i < oprsz; i += sizeof(int32_t)) {
   1074         int32_t ai = *(int32_t *)(a + i);
   1075         int32_t bi = *(int32_t *)(b + i);
   1076         int32_t di;
   1077         if (sadd32_overflow(ai, bi, &di)) {
   1078             di = (di < 0 ? INT32_MAX : INT32_MIN);
   1079         }
   1080         *(int32_t *)(d + i) = di;
   1081     }
   1082     clear_high(d, oprsz, desc);
   1083 }
   1084 
   1085 void HELPER(gvec_ssadd64)(void *d, void *a, void *b, uint32_t desc)
   1086 {
   1087     intptr_t oprsz = simd_oprsz(desc);
   1088     intptr_t i;
   1089 
   1090     for (i = 0; i < oprsz; i += sizeof(int64_t)) {
   1091         int64_t ai = *(int64_t *)(a + i);
   1092         int64_t bi = *(int64_t *)(b + i);
   1093         int64_t di;
   1094         if (sadd64_overflow(ai, bi, &di)) {
   1095             di = (di < 0 ? INT64_MAX : INT64_MIN);
   1096         }
   1097         *(int64_t *)(d + i) = di;
   1098     }
   1099     clear_high(d, oprsz, desc);
   1100 }
   1101 
   1102 void HELPER(gvec_sssub8)(void *d, void *a, void *b, uint32_t desc)
   1103 {
   1104     intptr_t oprsz = simd_oprsz(desc);
   1105     intptr_t i;
   1106 
   1107     for (i = 0; i < oprsz; i += sizeof(uint8_t)) {
   1108         int r = *(int8_t *)(a + i) - *(int8_t *)(b + i);
   1109         if (r > INT8_MAX) {
   1110             r = INT8_MAX;
   1111         } else if (r < INT8_MIN) {
   1112             r = INT8_MIN;
   1113         }
   1114         *(uint8_t *)(d + i) = r;
   1115     }
   1116     clear_high(d, oprsz, desc);
   1117 }
   1118 
   1119 void HELPER(gvec_sssub16)(void *d, void *a, void *b, uint32_t desc)
   1120 {
   1121     intptr_t oprsz = simd_oprsz(desc);
   1122     intptr_t i;
   1123 
   1124     for (i = 0; i < oprsz; i += sizeof(int16_t)) {
   1125         int r = *(int16_t *)(a + i) - *(int16_t *)(b + i);
   1126         if (r > INT16_MAX) {
   1127             r = INT16_MAX;
   1128         } else if (r < INT16_MIN) {
   1129             r = INT16_MIN;
   1130         }
   1131         *(int16_t *)(d + i) = r;
   1132     }
   1133     clear_high(d, oprsz, desc);
   1134 }
   1135 
   1136 void HELPER(gvec_sssub32)(void *d, void *a, void *b, uint32_t desc)
   1137 {
   1138     intptr_t oprsz = simd_oprsz(desc);
   1139     intptr_t i;
   1140 
   1141     for (i = 0; i < oprsz; i += sizeof(int32_t)) {
   1142         int32_t ai = *(int32_t *)(a + i);
   1143         int32_t bi = *(int32_t *)(b + i);
   1144         int32_t di;
   1145         if (ssub32_overflow(ai, bi, &di)) {
   1146             di = (di < 0 ? INT32_MAX : INT32_MIN);
   1147         }
   1148         *(int32_t *)(d + i) = di;
   1149     }
   1150     clear_high(d, oprsz, desc);
   1151 }
   1152 
   1153 void HELPER(gvec_sssub64)(void *d, void *a, void *b, uint32_t desc)
   1154 {
   1155     intptr_t oprsz = simd_oprsz(desc);
   1156     intptr_t i;
   1157 
   1158     for (i = 0; i < oprsz; i += sizeof(int64_t)) {
   1159         int64_t ai = *(int64_t *)(a + i);
   1160         int64_t bi = *(int64_t *)(b + i);
   1161         int64_t di;
   1162         if (ssub64_overflow(ai, bi, &di)) {
   1163             di = (di < 0 ? INT64_MAX : INT64_MIN);
   1164         }
   1165         *(int64_t *)(d + i) = di;
   1166     }
   1167     clear_high(d, oprsz, desc);
   1168 }
   1169 
   1170 void HELPER(gvec_usadd8)(void *d, void *a, void *b, uint32_t desc)
   1171 {
   1172     intptr_t oprsz = simd_oprsz(desc);
   1173     intptr_t i;
   1174 
   1175     for (i = 0; i < oprsz; i += sizeof(uint8_t)) {
   1176         unsigned r = *(uint8_t *)(a + i) + *(uint8_t *)(b + i);
   1177         if (r > UINT8_MAX) {
   1178             r = UINT8_MAX;
   1179         }
   1180         *(uint8_t *)(d + i) = r;
   1181     }
   1182     clear_high(d, oprsz, desc);
   1183 }
   1184 
   1185 void HELPER(gvec_usadd16)(void *d, void *a, void *b, uint32_t desc)
   1186 {
   1187     intptr_t oprsz = simd_oprsz(desc);
   1188     intptr_t i;
   1189 
   1190     for (i = 0; i < oprsz; i += sizeof(uint16_t)) {
   1191         unsigned r = *(uint16_t *)(a + i) + *(uint16_t *)(b + i);
   1192         if (r > UINT16_MAX) {
   1193             r = UINT16_MAX;
   1194         }
   1195         *(uint16_t *)(d + i) = r;
   1196     }
   1197     clear_high(d, oprsz, desc);
   1198 }
   1199 
   1200 void HELPER(gvec_usadd32)(void *d, void *a, void *b, uint32_t desc)
   1201 {
   1202     intptr_t oprsz = simd_oprsz(desc);
   1203     intptr_t i;
   1204 
   1205     for (i = 0; i < oprsz; i += sizeof(uint32_t)) {
   1206         uint32_t ai = *(uint32_t *)(a + i);
   1207         uint32_t bi = *(uint32_t *)(b + i);
   1208         uint32_t di;
   1209         if (uadd32_overflow(ai, bi, &di)) {
   1210             di = UINT32_MAX;
   1211         }
   1212         *(uint32_t *)(d + i) = di;
   1213     }
   1214     clear_high(d, oprsz, desc);
   1215 }
   1216 
   1217 void HELPER(gvec_usadd64)(void *d, void *a, void *b, uint32_t desc)
   1218 {
   1219     intptr_t oprsz = simd_oprsz(desc);
   1220     intptr_t i;
   1221 
   1222     for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
   1223         uint64_t ai = *(uint64_t *)(a + i);
   1224         uint64_t bi = *(uint64_t *)(b + i);
   1225         uint64_t di;
   1226         if (uadd64_overflow(ai, bi, &di)) {
   1227             di = UINT64_MAX;
   1228         }
   1229         *(uint64_t *)(d + i) = di;
   1230     }
   1231     clear_high(d, oprsz, desc);
   1232 }
   1233 
   1234 void HELPER(gvec_ussub8)(void *d, void *a, void *b, uint32_t desc)
   1235 {
   1236     intptr_t oprsz = simd_oprsz(desc);
   1237     intptr_t i;
   1238 
   1239     for (i = 0; i < oprsz; i += sizeof(uint8_t)) {
   1240         int r = *(uint8_t *)(a + i) - *(uint8_t *)(b + i);
   1241         if (r < 0) {
   1242             r = 0;
   1243         }
   1244         *(uint8_t *)(d + i) = r;
   1245     }
   1246     clear_high(d, oprsz, desc);
   1247 }
   1248 
   1249 void HELPER(gvec_ussub16)(void *d, void *a, void *b, uint32_t desc)
   1250 {
   1251     intptr_t oprsz = simd_oprsz(desc);
   1252     intptr_t i;
   1253 
   1254     for (i = 0; i < oprsz; i += sizeof(uint16_t)) {
   1255         int r = *(uint16_t *)(a + i) - *(uint16_t *)(b + i);
   1256         if (r < 0) {
   1257             r = 0;
   1258         }
   1259         *(uint16_t *)(d + i) = r;
   1260     }
   1261     clear_high(d, oprsz, desc);
   1262 }
   1263 
   1264 void HELPER(gvec_ussub32)(void *d, void *a, void *b, uint32_t desc)
   1265 {
   1266     intptr_t oprsz = simd_oprsz(desc);
   1267     intptr_t i;
   1268 
   1269     for (i = 0; i < oprsz; i += sizeof(uint32_t)) {
   1270         uint32_t ai = *(uint32_t *)(a + i);
   1271         uint32_t bi = *(uint32_t *)(b + i);
   1272         uint32_t di;
   1273         if (usub32_overflow(ai, bi, &di)) {
   1274             di = 0;
   1275         }
   1276         *(uint32_t *)(d + i) = di;
   1277     }
   1278     clear_high(d, oprsz, desc);
   1279 }
   1280 
   1281 void HELPER(gvec_ussub64)(void *d, void *a, void *b, uint32_t desc)
   1282 {
   1283     intptr_t oprsz = simd_oprsz(desc);
   1284     intptr_t i;
   1285 
   1286     for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
   1287         uint64_t ai = *(uint64_t *)(a + i);
   1288         uint64_t bi = *(uint64_t *)(b + i);
   1289         uint64_t di;
   1290         if (usub64_overflow(ai, bi, &di)) {
   1291             di = 0;
   1292         }
   1293         *(uint64_t *)(d + i) = di;
   1294     }
   1295     clear_high(d, oprsz, desc);
   1296 }
   1297 
   1298 void HELPER(gvec_smin8)(void *d, void *a, void *b, uint32_t desc)
   1299 {
   1300     intptr_t oprsz = simd_oprsz(desc);
   1301     intptr_t i;
   1302 
   1303     for (i = 0; i < oprsz; i += sizeof(int8_t)) {
   1304         int8_t aa = *(int8_t *)(a + i);
   1305         int8_t bb = *(int8_t *)(b + i);
   1306         int8_t dd = aa < bb ? aa : bb;
   1307         *(int8_t *)(d + i) = dd;
   1308     }
   1309     clear_high(d, oprsz, desc);
   1310 }
   1311 
   1312 void HELPER(gvec_smin16)(void *d, void *a, void *b, uint32_t desc)
   1313 {
   1314     intptr_t oprsz = simd_oprsz(desc);
   1315     intptr_t i;
   1316 
   1317     for (i = 0; i < oprsz; i += sizeof(int16_t)) {
   1318         int16_t aa = *(int16_t *)(a + i);
   1319         int16_t bb = *(int16_t *)(b + i);
   1320         int16_t dd = aa < bb ? aa : bb;
   1321         *(int16_t *)(d + i) = dd;
   1322     }
   1323     clear_high(d, oprsz, desc);
   1324 }
   1325 
   1326 void HELPER(gvec_smin32)(void *d, void *a, void *b, uint32_t desc)
   1327 {
   1328     intptr_t oprsz = simd_oprsz(desc);
   1329     intptr_t i;
   1330 
   1331     for (i = 0; i < oprsz; i += sizeof(int32_t)) {
   1332         int32_t aa = *(int32_t *)(a + i);
   1333         int32_t bb = *(int32_t *)(b + i);
   1334         int32_t dd = aa < bb ? aa : bb;
   1335         *(int32_t *)(d + i) = dd;
   1336     }
   1337     clear_high(d, oprsz, desc);
   1338 }
   1339 
   1340 void HELPER(gvec_smin64)(void *d, void *a, void *b, uint32_t desc)
   1341 {
   1342     intptr_t oprsz = simd_oprsz(desc);
   1343     intptr_t i;
   1344 
   1345     for (i = 0; i < oprsz; i += sizeof(int64_t)) {
   1346         int64_t aa = *(int64_t *)(a + i);
   1347         int64_t bb = *(int64_t *)(b + i);
   1348         int64_t dd = aa < bb ? aa : bb;
   1349         *(int64_t *)(d + i) = dd;
   1350     }
   1351     clear_high(d, oprsz, desc);
   1352 }
   1353 
   1354 void HELPER(gvec_smax8)(void *d, void *a, void *b, uint32_t desc)
   1355 {
   1356     intptr_t oprsz = simd_oprsz(desc);
   1357     intptr_t i;
   1358 
   1359     for (i = 0; i < oprsz; i += sizeof(int8_t)) {
   1360         int8_t aa = *(int8_t *)(a + i);
   1361         int8_t bb = *(int8_t *)(b + i);
   1362         int8_t dd = aa > bb ? aa : bb;
   1363         *(int8_t *)(d + i) = dd;
   1364     }
   1365     clear_high(d, oprsz, desc);
   1366 }
   1367 
   1368 void HELPER(gvec_smax16)(void *d, void *a, void *b, uint32_t desc)
   1369 {
   1370     intptr_t oprsz = simd_oprsz(desc);
   1371     intptr_t i;
   1372 
   1373     for (i = 0; i < oprsz; i += sizeof(int16_t)) {
   1374         int16_t aa = *(int16_t *)(a + i);
   1375         int16_t bb = *(int16_t *)(b + i);
   1376         int16_t dd = aa > bb ? aa : bb;
   1377         *(int16_t *)(d + i) = dd;
   1378     }
   1379     clear_high(d, oprsz, desc);
   1380 }
   1381 
   1382 void HELPER(gvec_smax32)(void *d, void *a, void *b, uint32_t desc)
   1383 {
   1384     intptr_t oprsz = simd_oprsz(desc);
   1385     intptr_t i;
   1386 
   1387     for (i = 0; i < oprsz; i += sizeof(int32_t)) {
   1388         int32_t aa = *(int32_t *)(a + i);
   1389         int32_t bb = *(int32_t *)(b + i);
   1390         int32_t dd = aa > bb ? aa : bb;
   1391         *(int32_t *)(d + i) = dd;
   1392     }
   1393     clear_high(d, oprsz, desc);
   1394 }
   1395 
   1396 void HELPER(gvec_smax64)(void *d, void *a, void *b, uint32_t desc)
   1397 {
   1398     intptr_t oprsz = simd_oprsz(desc);
   1399     intptr_t i;
   1400 
   1401     for (i = 0; i < oprsz; i += sizeof(int64_t)) {
   1402         int64_t aa = *(int64_t *)(a + i);
   1403         int64_t bb = *(int64_t *)(b + i);
   1404         int64_t dd = aa > bb ? aa : bb;
   1405         *(int64_t *)(d + i) = dd;
   1406     }
   1407     clear_high(d, oprsz, desc);
   1408 }
   1409 
   1410 void HELPER(gvec_umin8)(void *d, void *a, void *b, uint32_t desc)
   1411 {
   1412     intptr_t oprsz = simd_oprsz(desc);
   1413     intptr_t i;
   1414 
   1415     for (i = 0; i < oprsz; i += sizeof(uint8_t)) {
   1416         uint8_t aa = *(uint8_t *)(a + i);
   1417         uint8_t bb = *(uint8_t *)(b + i);
   1418         uint8_t dd = aa < bb ? aa : bb;
   1419         *(uint8_t *)(d + i) = dd;
   1420     }
   1421     clear_high(d, oprsz, desc);
   1422 }
   1423 
   1424 void HELPER(gvec_umin16)(void *d, void *a, void *b, uint32_t desc)
   1425 {
   1426     intptr_t oprsz = simd_oprsz(desc);
   1427     intptr_t i;
   1428 
   1429     for (i = 0; i < oprsz; i += sizeof(uint16_t)) {
   1430         uint16_t aa = *(uint16_t *)(a + i);
   1431         uint16_t bb = *(uint16_t *)(b + i);
   1432         uint16_t dd = aa < bb ? aa : bb;
   1433         *(uint16_t *)(d + i) = dd;
   1434     }
   1435     clear_high(d, oprsz, desc);
   1436 }
   1437 
   1438 void HELPER(gvec_umin32)(void *d, void *a, void *b, uint32_t desc)
   1439 {
   1440     intptr_t oprsz = simd_oprsz(desc);
   1441     intptr_t i;
   1442 
   1443     for (i = 0; i < oprsz; i += sizeof(uint32_t)) {
   1444         uint32_t aa = *(uint32_t *)(a + i);
   1445         uint32_t bb = *(uint32_t *)(b + i);
   1446         uint32_t dd = aa < bb ? aa : bb;
   1447         *(uint32_t *)(d + i) = dd;
   1448     }
   1449     clear_high(d, oprsz, desc);
   1450 }
   1451 
   1452 void HELPER(gvec_umin64)(void *d, void *a, void *b, uint32_t desc)
   1453 {
   1454     intptr_t oprsz = simd_oprsz(desc);
   1455     intptr_t i;
   1456 
   1457     for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
   1458         uint64_t aa = *(uint64_t *)(a + i);
   1459         uint64_t bb = *(uint64_t *)(b + i);
   1460         uint64_t dd = aa < bb ? aa : bb;
   1461         *(uint64_t *)(d + i) = dd;
   1462     }
   1463     clear_high(d, oprsz, desc);
   1464 }
   1465 
   1466 void HELPER(gvec_umax8)(void *d, void *a, void *b, uint32_t desc)
   1467 {
   1468     intptr_t oprsz = simd_oprsz(desc);
   1469     intptr_t i;
   1470 
   1471     for (i = 0; i < oprsz; i += sizeof(uint8_t)) {
   1472         uint8_t aa = *(uint8_t *)(a + i);
   1473         uint8_t bb = *(uint8_t *)(b + i);
   1474         uint8_t dd = aa > bb ? aa : bb;
   1475         *(uint8_t *)(d + i) = dd;
   1476     }
   1477     clear_high(d, oprsz, desc);
   1478 }
   1479 
   1480 void HELPER(gvec_umax16)(void *d, void *a, void *b, uint32_t desc)
   1481 {
   1482     intptr_t oprsz = simd_oprsz(desc);
   1483     intptr_t i;
   1484 
   1485     for (i = 0; i < oprsz; i += sizeof(uint16_t)) {
   1486         uint16_t aa = *(uint16_t *)(a + i);
   1487         uint16_t bb = *(uint16_t *)(b + i);
   1488         uint16_t dd = aa > bb ? aa : bb;
   1489         *(uint16_t *)(d + i) = dd;
   1490     }
   1491     clear_high(d, oprsz, desc);
   1492 }
   1493 
   1494 void HELPER(gvec_umax32)(void *d, void *a, void *b, uint32_t desc)
   1495 {
   1496     intptr_t oprsz = simd_oprsz(desc);
   1497     intptr_t i;
   1498 
   1499     for (i = 0; i < oprsz; i += sizeof(uint32_t)) {
   1500         uint32_t aa = *(uint32_t *)(a + i);
   1501         uint32_t bb = *(uint32_t *)(b + i);
   1502         uint32_t dd = aa > bb ? aa : bb;
   1503         *(uint32_t *)(d + i) = dd;
   1504     }
   1505     clear_high(d, oprsz, desc);
   1506 }
   1507 
   1508 void HELPER(gvec_umax64)(void *d, void *a, void *b, uint32_t desc)
   1509 {
   1510     intptr_t oprsz = simd_oprsz(desc);
   1511     intptr_t i;
   1512 
   1513     for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
   1514         uint64_t aa = *(uint64_t *)(a + i);
   1515         uint64_t bb = *(uint64_t *)(b + i);
   1516         uint64_t dd = aa > bb ? aa : bb;
   1517         *(uint64_t *)(d + i) = dd;
   1518     }
   1519     clear_high(d, oprsz, desc);
   1520 }
   1521 
   1522 void HELPER(gvec_bitsel)(void *d, void *a, void *b, void *c, uint32_t desc)
   1523 {
   1524     intptr_t oprsz = simd_oprsz(desc);
   1525     intptr_t i;
   1526 
   1527     for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
   1528         uint64_t aa = *(uint64_t *)(a + i);
   1529         uint64_t bb = *(uint64_t *)(b + i);
   1530         uint64_t cc = *(uint64_t *)(c + i);
   1531         *(uint64_t *)(d + i) = (bb & aa) | (cc & ~aa);
   1532     }
   1533     clear_high(d, oprsz, desc);
   1534 }