qemu

FORK: QEMU emulator
git clone https://git.neptards.moe/neptards/qemu.git
Log | Files | Refs | Submodules | LICENSE

lmmi_helper.c (14378B)


      1 /*
      2  *  Loongson Multimedia Instruction emulation helpers for QEMU.
      3  *
      4  *  Copyright (c) 2011  Richard Henderson <rth@twiddle.net>
      5  *
      6  * This library is free software; you can redistribute it and/or
      7  * modify it under the terms of the GNU Lesser General Public
      8  * License as published by the Free Software Foundation; either
      9  * version 2.1 of the License, or (at your option) any later version.
     10  *
     11  * This library is distributed in the hope that it will be useful,
     12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
     13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
     14  * Lesser General Public License for more details.
     15  *
     16  * You should have received a copy of the GNU Lesser General Public
     17  * License along with this library; if not, see <http://www.gnu.org/licenses/>.
     18  */
     19 
     20 #include "qemu/osdep.h"
     21 #include "cpu.h"
     22 #include "exec/helper-proto.h"
     23 
     24 /*
     25  * If the byte ordering doesn't matter, i.e. all columns are treated
     26  * identically, then this union can be used directly.  If byte ordering
     27  * does matter, we generally ignore dumping to memory.
     28  */
     29 typedef union {
     30     uint8_t  ub[8];
     31     int8_t   sb[8];
     32     uint16_t uh[4];
     33     int16_t  sh[4];
     34     uint32_t uw[2];
     35     int32_t  sw[2];
     36     uint64_t d;
     37 } LMIValue;
     38 
     39 /* Some byte ordering issues can be mitigated by XORing in the following.  */
     40 #if HOST_BIG_ENDIAN
     41 # define BYTE_ORDER_XOR(N) N
     42 #else
     43 # define BYTE_ORDER_XOR(N) 0
     44 #endif
     45 
     46 #define SATSB(x)  (x < -0x80 ? -0x80 : x > 0x7f ? 0x7f : x)
     47 #define SATUB(x)  (x > 0xff ? 0xff : x)
     48 
     49 #define SATSH(x)  (x < -0x8000 ? -0x8000 : x > 0x7fff ? 0x7fff : x)
     50 #define SATUH(x)  (x > 0xffff ? 0xffff : x)
     51 
     52 #define SATSW(x) \
     53     (x < -0x80000000ll ? -0x80000000ll : x > 0x7fffffff ? 0x7fffffff : x)
     54 #define SATUW(x)  (x > 0xffffffffull ? 0xffffffffull : x)
     55 
     56 uint64_t helper_paddsb(uint64_t fs, uint64_t ft)
     57 {
     58     LMIValue vs, vt;
     59     unsigned int i;
     60 
     61     vs.d = fs;
     62     vt.d = ft;
     63     for (i = 0; i < 8; ++i) {
     64         int r = vs.sb[i] + vt.sb[i];
     65         vs.sb[i] = SATSB(r);
     66     }
     67     return vs.d;
     68 }
     69 
     70 uint64_t helper_paddusb(uint64_t fs, uint64_t ft)
     71 {
     72     LMIValue vs, vt;
     73     unsigned int i;
     74 
     75     vs.d = fs;
     76     vt.d = ft;
     77     for (i = 0; i < 8; ++i) {
     78         int r = vs.ub[i] + vt.ub[i];
     79         vs.ub[i] = SATUB(r);
     80     }
     81     return vs.d;
     82 }
     83 
     84 uint64_t helper_paddsh(uint64_t fs, uint64_t ft)
     85 {
     86     LMIValue vs, vt;
     87     unsigned int i;
     88 
     89     vs.d = fs;
     90     vt.d = ft;
     91     for (i = 0; i < 4; ++i) {
     92         int r = vs.sh[i] + vt.sh[i];
     93         vs.sh[i] = SATSH(r);
     94     }
     95     return vs.d;
     96 }
     97 
     98 uint64_t helper_paddush(uint64_t fs, uint64_t ft)
     99 {
    100     LMIValue vs, vt;
    101     unsigned int i;
    102 
    103     vs.d = fs;
    104     vt.d = ft;
    105     for (i = 0; i < 4; ++i) {
    106         int r = vs.uh[i] + vt.uh[i];
    107         vs.uh[i] = SATUH(r);
    108     }
    109     return vs.d;
    110 }
    111 
    112 uint64_t helper_paddb(uint64_t fs, uint64_t ft)
    113 {
    114     LMIValue vs, vt;
    115     unsigned int i;
    116 
    117     vs.d = fs;
    118     vt.d = ft;
    119     for (i = 0; i < 8; ++i) {
    120         vs.ub[i] += vt.ub[i];
    121     }
    122     return vs.d;
    123 }
    124 
    125 uint64_t helper_paddh(uint64_t fs, uint64_t ft)
    126 {
    127     LMIValue vs, vt;
    128     unsigned int i;
    129 
    130     vs.d = fs;
    131     vt.d = ft;
    132     for (i = 0; i < 4; ++i) {
    133         vs.uh[i] += vt.uh[i];
    134     }
    135     return vs.d;
    136 }
    137 
    138 uint64_t helper_paddw(uint64_t fs, uint64_t ft)
    139 {
    140     LMIValue vs, vt;
    141     unsigned int i;
    142 
    143     vs.d = fs;
    144     vt.d = ft;
    145     for (i = 0; i < 2; ++i) {
    146         vs.uw[i] += vt.uw[i];
    147     }
    148     return vs.d;
    149 }
    150 
    151 uint64_t helper_psubsb(uint64_t fs, uint64_t ft)
    152 {
    153     LMIValue vs, vt;
    154     unsigned int i;
    155 
    156     vs.d = fs;
    157     vt.d = ft;
    158     for (i = 0; i < 8; ++i) {
    159         int r = vs.sb[i] - vt.sb[i];
    160         vs.sb[i] = SATSB(r);
    161     }
    162     return vs.d;
    163 }
    164 
    165 uint64_t helper_psubusb(uint64_t fs, uint64_t ft)
    166 {
    167     LMIValue vs, vt;
    168     unsigned int i;
    169 
    170     vs.d = fs;
    171     vt.d = ft;
    172     for (i = 0; i < 8; ++i) {
    173         int r = vs.ub[i] - vt.ub[i];
    174         vs.ub[i] = SATUB(r);
    175     }
    176     return vs.d;
    177 }
    178 
    179 uint64_t helper_psubsh(uint64_t fs, uint64_t ft)
    180 {
    181     LMIValue vs, vt;
    182     unsigned int i;
    183 
    184     vs.d = fs;
    185     vt.d = ft;
    186     for (i = 0; i < 4; ++i) {
    187         int r = vs.sh[i] - vt.sh[i];
    188         vs.sh[i] = SATSH(r);
    189     }
    190     return vs.d;
    191 }
    192 
    193 uint64_t helper_psubush(uint64_t fs, uint64_t ft)
    194 {
    195     LMIValue vs, vt;
    196     unsigned int i;
    197 
    198     vs.d = fs;
    199     vt.d = ft;
    200     for (i = 0; i < 4; ++i) {
    201         int r = vs.uh[i] - vt.uh[i];
    202         vs.uh[i] = SATUH(r);
    203     }
    204     return vs.d;
    205 }
    206 
    207 uint64_t helper_psubb(uint64_t fs, uint64_t ft)
    208 {
    209     LMIValue vs, vt;
    210     unsigned int i;
    211 
    212     vs.d = fs;
    213     vt.d = ft;
    214     for (i = 0; i < 8; ++i) {
    215         vs.ub[i] -= vt.ub[i];
    216     }
    217     return vs.d;
    218 }
    219 
    220 uint64_t helper_psubh(uint64_t fs, uint64_t ft)
    221 {
    222     LMIValue vs, vt;
    223     unsigned int i;
    224 
    225     vs.d = fs;
    226     vt.d = ft;
    227     for (i = 0; i < 4; ++i) {
    228         vs.uh[i] -= vt.uh[i];
    229     }
    230     return vs.d;
    231 }
    232 
    233 uint64_t helper_psubw(uint64_t fs, uint64_t ft)
    234 {
    235     LMIValue vs, vt;
    236     unsigned int i;
    237 
    238     vs.d = fs;
    239     vt.d = ft;
    240     for (i = 0; i < 2; ++i) {
    241         vs.uw[i] -= vt.uw[i];
    242     }
    243     return vs.d;
    244 }
    245 
    246 uint64_t helper_pshufh(uint64_t fs, uint64_t ft)
    247 {
    248     unsigned host = BYTE_ORDER_XOR(3);
    249     LMIValue vd, vs;
    250     unsigned i;
    251 
    252     vs.d = fs;
    253     vd.d = 0;
    254     for (i = 0; i < 4; i++, ft >>= 2) {
    255         vd.uh[i ^ host] = vs.uh[(ft & 3) ^ host];
    256     }
    257     return vd.d;
    258 }
    259 
    260 uint64_t helper_packsswh(uint64_t fs, uint64_t ft)
    261 {
    262     uint64_t fd = 0;
    263     int64_t tmp;
    264 
    265     tmp = (int32_t)(fs >> 0);
    266     tmp = SATSH(tmp);
    267     fd |= (tmp & 0xffff) << 0;
    268 
    269     tmp = (int32_t)(fs >> 32);
    270     tmp = SATSH(tmp);
    271     fd |= (tmp & 0xffff) << 16;
    272 
    273     tmp = (int32_t)(ft >> 0);
    274     tmp = SATSH(tmp);
    275     fd |= (tmp & 0xffff) << 32;
    276 
    277     tmp = (int32_t)(ft >> 32);
    278     tmp = SATSH(tmp);
    279     fd |= (tmp & 0xffff) << 48;
    280 
    281     return fd;
    282 }
    283 
    284 uint64_t helper_packsshb(uint64_t fs, uint64_t ft)
    285 {
    286     uint64_t fd = 0;
    287     unsigned int i;
    288 
    289     for (i = 0; i < 4; ++i) {
    290         int16_t tmp = fs >> (i * 16);
    291         tmp = SATSB(tmp);
    292         fd |= (uint64_t)(tmp & 0xff) << (i * 8);
    293     }
    294     for (i = 0; i < 4; ++i) {
    295         int16_t tmp = ft >> (i * 16);
    296         tmp = SATSB(tmp);
    297         fd |= (uint64_t)(tmp & 0xff) << (i * 8 + 32);
    298     }
    299 
    300     return fd;
    301 }
    302 
    303 uint64_t helper_packushb(uint64_t fs, uint64_t ft)
    304 {
    305     uint64_t fd = 0;
    306     unsigned int i;
    307 
    308     for (i = 0; i < 4; ++i) {
    309         int16_t tmp = fs >> (i * 16);
    310         tmp = SATUB(tmp);
    311         fd |= (uint64_t)(tmp & 0xff) << (i * 8);
    312     }
    313     for (i = 0; i < 4; ++i) {
    314         int16_t tmp = ft >> (i * 16);
    315         tmp = SATUB(tmp);
    316         fd |= (uint64_t)(tmp & 0xff) << (i * 8 + 32);
    317     }
    318 
    319     return fd;
    320 }
    321 
    322 uint64_t helper_punpcklwd(uint64_t fs, uint64_t ft)
    323 {
    324     return (fs & 0xffffffff) | (ft << 32);
    325 }
    326 
    327 uint64_t helper_punpckhwd(uint64_t fs, uint64_t ft)
    328 {
    329     return (fs >> 32) | (ft & ~0xffffffffull);
    330 }
    331 
    332 uint64_t helper_punpcklhw(uint64_t fs, uint64_t ft)
    333 {
    334     unsigned host = BYTE_ORDER_XOR(3);
    335     LMIValue vd, vs, vt;
    336 
    337     vs.d = fs;
    338     vt.d = ft;
    339     vd.uh[0 ^ host] = vs.uh[0 ^ host];
    340     vd.uh[1 ^ host] = vt.uh[0 ^ host];
    341     vd.uh[2 ^ host] = vs.uh[1 ^ host];
    342     vd.uh[3 ^ host] = vt.uh[1 ^ host];
    343 
    344     return vd.d;
    345 }
    346 
    347 uint64_t helper_punpckhhw(uint64_t fs, uint64_t ft)
    348 {
    349     unsigned host = BYTE_ORDER_XOR(3);
    350     LMIValue vd, vs, vt;
    351 
    352     vs.d = fs;
    353     vt.d = ft;
    354     vd.uh[0 ^ host] = vs.uh[2 ^ host];
    355     vd.uh[1 ^ host] = vt.uh[2 ^ host];
    356     vd.uh[2 ^ host] = vs.uh[3 ^ host];
    357     vd.uh[3 ^ host] = vt.uh[3 ^ host];
    358 
    359     return vd.d;
    360 }
    361 
    362 uint64_t helper_punpcklbh(uint64_t fs, uint64_t ft)
    363 {
    364     unsigned host = BYTE_ORDER_XOR(7);
    365     LMIValue vd, vs, vt;
    366 
    367     vs.d = fs;
    368     vt.d = ft;
    369     vd.ub[0 ^ host] = vs.ub[0 ^ host];
    370     vd.ub[1 ^ host] = vt.ub[0 ^ host];
    371     vd.ub[2 ^ host] = vs.ub[1 ^ host];
    372     vd.ub[3 ^ host] = vt.ub[1 ^ host];
    373     vd.ub[4 ^ host] = vs.ub[2 ^ host];
    374     vd.ub[5 ^ host] = vt.ub[2 ^ host];
    375     vd.ub[6 ^ host] = vs.ub[3 ^ host];
    376     vd.ub[7 ^ host] = vt.ub[3 ^ host];
    377 
    378     return vd.d;
    379 }
    380 
    381 uint64_t helper_punpckhbh(uint64_t fs, uint64_t ft)
    382 {
    383     unsigned host = BYTE_ORDER_XOR(7);
    384     LMIValue vd, vs, vt;
    385 
    386     vs.d = fs;
    387     vt.d = ft;
    388     vd.ub[0 ^ host] = vs.ub[4 ^ host];
    389     vd.ub[1 ^ host] = vt.ub[4 ^ host];
    390     vd.ub[2 ^ host] = vs.ub[5 ^ host];
    391     vd.ub[3 ^ host] = vt.ub[5 ^ host];
    392     vd.ub[4 ^ host] = vs.ub[6 ^ host];
    393     vd.ub[5 ^ host] = vt.ub[6 ^ host];
    394     vd.ub[6 ^ host] = vs.ub[7 ^ host];
    395     vd.ub[7 ^ host] = vt.ub[7 ^ host];
    396 
    397     return vd.d;
    398 }
    399 
    400 uint64_t helper_pavgh(uint64_t fs, uint64_t ft)
    401 {
    402     LMIValue vs, vt;
    403     unsigned i;
    404 
    405     vs.d = fs;
    406     vt.d = ft;
    407     for (i = 0; i < 4; i++) {
    408         vs.uh[i] = (vs.uh[i] + vt.uh[i] + 1) >> 1;
    409     }
    410     return vs.d;
    411 }
    412 
    413 uint64_t helper_pavgb(uint64_t fs, uint64_t ft)
    414 {
    415     LMIValue vs, vt;
    416     unsigned i;
    417 
    418     vs.d = fs;
    419     vt.d = ft;
    420     for (i = 0; i < 8; i++) {
    421         vs.ub[i] = (vs.ub[i] + vt.ub[i] + 1) >> 1;
    422     }
    423     return vs.d;
    424 }
    425 
    426 uint64_t helper_pmaxsh(uint64_t fs, uint64_t ft)
    427 {
    428     LMIValue vs, vt;
    429     unsigned i;
    430 
    431     vs.d = fs;
    432     vt.d = ft;
    433     for (i = 0; i < 4; i++) {
    434         vs.sh[i] = (vs.sh[i] >= vt.sh[i] ? vs.sh[i] : vt.sh[i]);
    435     }
    436     return vs.d;
    437 }
    438 
    439 uint64_t helper_pminsh(uint64_t fs, uint64_t ft)
    440 {
    441     LMIValue vs, vt;
    442     unsigned i;
    443 
    444     vs.d = fs;
    445     vt.d = ft;
    446     for (i = 0; i < 4; i++) {
    447         vs.sh[i] = (vs.sh[i] <= vt.sh[i] ? vs.sh[i] : vt.sh[i]);
    448     }
    449     return vs.d;
    450 }
    451 
    452 uint64_t helper_pmaxub(uint64_t fs, uint64_t ft)
    453 {
    454     LMIValue vs, vt;
    455     unsigned i;
    456 
    457     vs.d = fs;
    458     vt.d = ft;
    459     for (i = 0; i < 4; i++) {
    460         vs.ub[i] = (vs.ub[i] >= vt.ub[i] ? vs.ub[i] : vt.ub[i]);
    461     }
    462     return vs.d;
    463 }
    464 
    465 uint64_t helper_pminub(uint64_t fs, uint64_t ft)
    466 {
    467     LMIValue vs, vt;
    468     unsigned i;
    469 
    470     vs.d = fs;
    471     vt.d = ft;
    472     for (i = 0; i < 4; i++) {
    473         vs.ub[i] = (vs.ub[i] <= vt.ub[i] ? vs.ub[i] : vt.ub[i]);
    474     }
    475     return vs.d;
    476 }
    477 
    478 uint64_t helper_pcmpeqw(uint64_t fs, uint64_t ft)
    479 {
    480     LMIValue vs, vt;
    481     unsigned i;
    482 
    483     vs.d = fs;
    484     vt.d = ft;
    485     for (i = 0; i < 2; i++) {
    486         vs.uw[i] = -(vs.uw[i] == vt.uw[i]);
    487     }
    488     return vs.d;
    489 }
    490 
    491 uint64_t helper_pcmpgtw(uint64_t fs, uint64_t ft)
    492 {
    493     LMIValue vs, vt;
    494     unsigned i;
    495 
    496     vs.d = fs;
    497     vt.d = ft;
    498     for (i = 0; i < 2; i++) {
    499         vs.uw[i] = -(vs.uw[i] > vt.uw[i]);
    500     }
    501     return vs.d;
    502 }
    503 
    504 uint64_t helper_pcmpeqh(uint64_t fs, uint64_t ft)
    505 {
    506     LMIValue vs, vt;
    507     unsigned i;
    508 
    509     vs.d = fs;
    510     vt.d = ft;
    511     for (i = 0; i < 4; i++) {
    512         vs.uh[i] = -(vs.uh[i] == vt.uh[i]);
    513     }
    514     return vs.d;
    515 }
    516 
    517 uint64_t helper_pcmpgth(uint64_t fs, uint64_t ft)
    518 {
    519     LMIValue vs, vt;
    520     unsigned i;
    521 
    522     vs.d = fs;
    523     vt.d = ft;
    524     for (i = 0; i < 4; i++) {
    525         vs.uh[i] = -(vs.uh[i] > vt.uh[i]);
    526     }
    527     return vs.d;
    528 }
    529 
    530 uint64_t helper_pcmpeqb(uint64_t fs, uint64_t ft)
    531 {
    532     LMIValue vs, vt;
    533     unsigned i;
    534 
    535     vs.d = fs;
    536     vt.d = ft;
    537     for (i = 0; i < 8; i++) {
    538         vs.ub[i] = -(vs.ub[i] == vt.ub[i]);
    539     }
    540     return vs.d;
    541 }
    542 
    543 uint64_t helper_pcmpgtb(uint64_t fs, uint64_t ft)
    544 {
    545     LMIValue vs, vt;
    546     unsigned i;
    547 
    548     vs.d = fs;
    549     vt.d = ft;
    550     for (i = 0; i < 8; i++) {
    551         vs.ub[i] = -(vs.ub[i] > vt.ub[i]);
    552     }
    553     return vs.d;
    554 }
    555 
    556 uint64_t helper_psllw(uint64_t fs, uint64_t ft)
    557 {
    558     LMIValue vs;
    559     unsigned i;
    560 
    561     ft &= 0x7f;
    562     if (ft > 31) {
    563         return 0;
    564     }
    565     vs.d = fs;
    566     for (i = 0; i < 2; ++i) {
    567         vs.uw[i] <<= ft;
    568     }
    569     return vs.d;
    570 }
    571 
    572 uint64_t helper_psrlw(uint64_t fs, uint64_t ft)
    573 {
    574     LMIValue vs;
    575     unsigned i;
    576 
    577     ft &= 0x7f;
    578     if (ft > 31) {
    579         return 0;
    580     }
    581     vs.d = fs;
    582     for (i = 0; i < 2; ++i) {
    583         vs.uw[i] >>= ft;
    584     }
    585     return vs.d;
    586 }
    587 
    588 uint64_t helper_psraw(uint64_t fs, uint64_t ft)
    589 {
    590     LMIValue vs;
    591     unsigned i;
    592 
    593     ft &= 0x7f;
    594     if (ft > 31) {
    595         ft = 31;
    596     }
    597     vs.d = fs;
    598     for (i = 0; i < 2; ++i) {
    599         vs.sw[i] >>= ft;
    600     }
    601     return vs.d;
    602 }
    603 
    604 uint64_t helper_psllh(uint64_t fs, uint64_t ft)
    605 {
    606     LMIValue vs;
    607     unsigned i;
    608 
    609     ft &= 0x7f;
    610     if (ft > 15) {
    611         return 0;
    612     }
    613     vs.d = fs;
    614     for (i = 0; i < 4; ++i) {
    615         vs.uh[i] <<= ft;
    616     }
    617     return vs.d;
    618 }
    619 
    620 uint64_t helper_psrlh(uint64_t fs, uint64_t ft)
    621 {
    622     LMIValue vs;
    623     unsigned i;
    624 
    625     ft &= 0x7f;
    626     if (ft > 15) {
    627         return 0;
    628     }
    629     vs.d = fs;
    630     for (i = 0; i < 4; ++i) {
    631         vs.uh[i] >>= ft;
    632     }
    633     return vs.d;
    634 }
    635 
    636 uint64_t helper_psrah(uint64_t fs, uint64_t ft)
    637 {
    638     LMIValue vs;
    639     unsigned i;
    640 
    641     ft &= 0x7f;
    642     if (ft > 15) {
    643         ft = 15;
    644     }
    645     vs.d = fs;
    646     for (i = 0; i < 4; ++i) {
    647         vs.sh[i] >>= ft;
    648     }
    649     return vs.d;
    650 }
    651 
    652 uint64_t helper_pmullh(uint64_t fs, uint64_t ft)
    653 {
    654     LMIValue vs, vt;
    655     unsigned i;
    656 
    657     vs.d = fs;
    658     vt.d = ft;
    659     for (i = 0; i < 4; ++i) {
    660         vs.sh[i] *= vt.sh[i];
    661     }
    662     return vs.d;
    663 }
    664 
    665 uint64_t helper_pmulhh(uint64_t fs, uint64_t ft)
    666 {
    667     LMIValue vs, vt;
    668     unsigned i;
    669 
    670     vs.d = fs;
    671     vt.d = ft;
    672     for (i = 0; i < 4; ++i) {
    673         int32_t r = vs.sh[i] * vt.sh[i];
    674         vs.sh[i] = r >> 16;
    675     }
    676     return vs.d;
    677 }
    678 
    679 uint64_t helper_pmulhuh(uint64_t fs, uint64_t ft)
    680 {
    681     LMIValue vs, vt;
    682     unsigned i;
    683 
    684     vs.d = fs;
    685     vt.d = ft;
    686     for (i = 0; i < 4; ++i) {
    687         uint32_t r = vs.uh[i] * vt.uh[i];
    688         vs.uh[i] = r >> 16;
    689     }
    690     return vs.d;
    691 }
    692 
    693 uint64_t helper_pmaddhw(uint64_t fs, uint64_t ft)
    694 {
    695     unsigned host = BYTE_ORDER_XOR(3);
    696     LMIValue vs, vt;
    697     uint32_t p0, p1;
    698 
    699     vs.d = fs;
    700     vt.d = ft;
    701     p0  = vs.sh[0 ^ host] * vt.sh[0 ^ host];
    702     p0 += vs.sh[1 ^ host] * vt.sh[1 ^ host];
    703     p1  = vs.sh[2 ^ host] * vt.sh[2 ^ host];
    704     p1 += vs.sh[3 ^ host] * vt.sh[3 ^ host];
    705 
    706     return ((uint64_t)p1 << 32) | p0;
    707 }
    708 
    709 uint64_t helper_pasubub(uint64_t fs, uint64_t ft)
    710 {
    711     LMIValue vs, vt;
    712     unsigned i;
    713 
    714     vs.d = fs;
    715     vt.d = ft;
    716     for (i = 0; i < 8; ++i) {
    717         int r = vs.ub[i] - vt.ub[i];
    718         vs.ub[i] = (r < 0 ? -r : r);
    719     }
    720     return vs.d;
    721 }
    722 
    723 uint64_t helper_biadd(uint64_t fs)
    724 {
    725     unsigned i, fd;
    726 
    727     for (i = fd = 0; i < 8; ++i) {
    728         fd += (fs >> (i * 8)) & 0xff;
    729     }
    730     return fd & 0xffff;
    731 }
    732 
    733 uint64_t helper_pmovmskb(uint64_t fs)
    734 {
    735     unsigned fd = 0;
    736 
    737     fd |= ((fs >>  7) & 1) << 0;
    738     fd |= ((fs >> 15) & 1) << 1;
    739     fd |= ((fs >> 23) & 1) << 2;
    740     fd |= ((fs >> 31) & 1) << 3;
    741     fd |= ((fs >> 39) & 1) << 4;
    742     fd |= ((fs >> 47) & 1) << 5;
    743     fd |= ((fs >> 55) & 1) << 6;
    744     fd |= ((fs >> 63) & 1) << 7;
    745 
    746     return fd & 0xff;
    747 }