qemu

FORK: QEMU emulator
git clone https://git.neptards.moe/neptards/qemu.git
Log | Files | Refs | Submodules | LICENSE

vis_helper.c (15152B)


      1 /*
      2  * VIS op helpers
      3  *
      4  *  Copyright (c) 2003-2005 Fabrice Bellard
      5  *
      6  * This library is free software; you can redistribute it and/or
      7  * modify it under the terms of the GNU Lesser General Public
      8  * License as published by the Free Software Foundation; either
      9  * version 2.1 of the License, or (at your option) any later version.
     10  *
     11  * This library is distributed in the hope that it will be useful,
     12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
     13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
     14  * Lesser General Public License for more details.
     15  *
     16  * You should have received a copy of the GNU Lesser General Public
     17  * License along with this library; if not, see <http://www.gnu.org/licenses/>.
     18  */
     19 
     20 #include "qemu/osdep.h"
     21 #include "cpu.h"
     22 #include "exec/helper-proto.h"
     23 
     24 /* This function uses non-native bit order */
     25 #define GET_FIELD(X, FROM, TO)                                  \
     26     ((X) >> (63 - (TO)) & ((1ULL << ((TO) - (FROM) + 1)) - 1))
     27 
     28 /* This function uses the order in the manuals, i.e. bit 0 is 2^0 */
     29 #define GET_FIELD_SP(X, FROM, TO)               \
     30     GET_FIELD(X, 63 - (TO), 63 - (FROM))
     31 
     32 target_ulong helper_array8(target_ulong pixel_addr, target_ulong cubesize)
     33 {
     34     return (GET_FIELD_SP(pixel_addr, 60, 63) << (17 + 2 * cubesize)) |
     35         (GET_FIELD_SP(pixel_addr, 39, 39 + cubesize - 1) << (17 + cubesize)) |
     36         (GET_FIELD_SP(pixel_addr, 17 + cubesize - 1, 17) << 17) |
     37         (GET_FIELD_SP(pixel_addr, 56, 59) << 13) |
     38         (GET_FIELD_SP(pixel_addr, 35, 38) << 9) |
     39         (GET_FIELD_SP(pixel_addr, 13, 16) << 5) |
     40         (((pixel_addr >> 55) & 1) << 4) |
     41         (GET_FIELD_SP(pixel_addr, 33, 34) << 2) |
     42         GET_FIELD_SP(pixel_addr, 11, 12);
     43 }
     44 
     45 #if HOST_BIG_ENDIAN
     46 #define VIS_B64(n) b[7 - (n)]
     47 #define VIS_W64(n) w[3 - (n)]
     48 #define VIS_SW64(n) sw[3 - (n)]
     49 #define VIS_L64(n) l[1 - (n)]
     50 #define VIS_B32(n) b[3 - (n)]
     51 #define VIS_W32(n) w[1 - (n)]
     52 #else
     53 #define VIS_B64(n) b[n]
     54 #define VIS_W64(n) w[n]
     55 #define VIS_SW64(n) sw[n]
     56 #define VIS_L64(n) l[n]
     57 #define VIS_B32(n) b[n]
     58 #define VIS_W32(n) w[n]
     59 #endif
     60 
     61 typedef union {
     62     uint8_t b[8];
     63     uint16_t w[4];
     64     int16_t sw[4];
     65     uint32_t l[2];
     66     uint64_t ll;
     67     float64 d;
     68 } VIS64;
     69 
     70 typedef union {
     71     uint8_t b[4];
     72     uint16_t w[2];
     73     uint32_t l;
     74     float32 f;
     75 } VIS32;
     76 
     77 uint64_t helper_fpmerge(uint64_t src1, uint64_t src2)
     78 {
     79     VIS64 s, d;
     80 
     81     s.ll = src1;
     82     d.ll = src2;
     83 
     84     /* Reverse calculation order to handle overlap */
     85     d.VIS_B64(7) = s.VIS_B64(3);
     86     d.VIS_B64(6) = d.VIS_B64(3);
     87     d.VIS_B64(5) = s.VIS_B64(2);
     88     d.VIS_B64(4) = d.VIS_B64(2);
     89     d.VIS_B64(3) = s.VIS_B64(1);
     90     d.VIS_B64(2) = d.VIS_B64(1);
     91     d.VIS_B64(1) = s.VIS_B64(0);
     92     /* d.VIS_B64(0) = d.VIS_B64(0); */
     93 
     94     return d.ll;
     95 }
     96 
     97 uint64_t helper_fmul8x16(uint64_t src1, uint64_t src2)
     98 {
     99     VIS64 s, d;
    100     uint32_t tmp;
    101 
    102     s.ll = src1;
    103     d.ll = src2;
    104 
    105 #define PMUL(r)                                                 \
    106     tmp = (int32_t)d.VIS_SW64(r) * (int32_t)s.VIS_B64(r);       \
    107     if ((tmp & 0xff) > 0x7f) {                                  \
    108         tmp += 0x100;                                           \
    109     }                                                           \
    110     d.VIS_W64(r) = tmp >> 8;
    111 
    112     PMUL(0);
    113     PMUL(1);
    114     PMUL(2);
    115     PMUL(3);
    116 #undef PMUL
    117 
    118     return d.ll;
    119 }
    120 
    121 uint64_t helper_fmul8x16al(uint64_t src1, uint64_t src2)
    122 {
    123     VIS64 s, d;
    124     uint32_t tmp;
    125 
    126     s.ll = src1;
    127     d.ll = src2;
    128 
    129 #define PMUL(r)                                                 \
    130     tmp = (int32_t)d.VIS_SW64(1) * (int32_t)s.VIS_B64(r);       \
    131     if ((tmp & 0xff) > 0x7f) {                                  \
    132         tmp += 0x100;                                           \
    133     }                                                           \
    134     d.VIS_W64(r) = tmp >> 8;
    135 
    136     PMUL(0);
    137     PMUL(1);
    138     PMUL(2);
    139     PMUL(3);
    140 #undef PMUL
    141 
    142     return d.ll;
    143 }
    144 
    145 uint64_t helper_fmul8x16au(uint64_t src1, uint64_t src2)
    146 {
    147     VIS64 s, d;
    148     uint32_t tmp;
    149 
    150     s.ll = src1;
    151     d.ll = src2;
    152 
    153 #define PMUL(r)                                                 \
    154     tmp = (int32_t)d.VIS_SW64(0) * (int32_t)s.VIS_B64(r);       \
    155     if ((tmp & 0xff) > 0x7f) {                                  \
    156         tmp += 0x100;                                           \
    157     }                                                           \
    158     d.VIS_W64(r) = tmp >> 8;
    159 
    160     PMUL(0);
    161     PMUL(1);
    162     PMUL(2);
    163     PMUL(3);
    164 #undef PMUL
    165 
    166     return d.ll;
    167 }
    168 
    169 uint64_t helper_fmul8sux16(uint64_t src1, uint64_t src2)
    170 {
    171     VIS64 s, d;
    172     uint32_t tmp;
    173 
    174     s.ll = src1;
    175     d.ll = src2;
    176 
    177 #define PMUL(r)                                                         \
    178     tmp = (int32_t)d.VIS_SW64(r) * ((int32_t)s.VIS_SW64(r) >> 8);       \
    179     if ((tmp & 0xff) > 0x7f) {                                          \
    180         tmp += 0x100;                                                   \
    181     }                                                                   \
    182     d.VIS_W64(r) = tmp >> 8;
    183 
    184     PMUL(0);
    185     PMUL(1);
    186     PMUL(2);
    187     PMUL(3);
    188 #undef PMUL
    189 
    190     return d.ll;
    191 }
    192 
    193 uint64_t helper_fmul8ulx16(uint64_t src1, uint64_t src2)
    194 {
    195     VIS64 s, d;
    196     uint32_t tmp;
    197 
    198     s.ll = src1;
    199     d.ll = src2;
    200 
    201 #define PMUL(r)                                                         \
    202     tmp = (int32_t)d.VIS_SW64(r) * ((uint32_t)s.VIS_B64(r * 2));        \
    203     if ((tmp & 0xff) > 0x7f) {                                          \
    204         tmp += 0x100;                                                   \
    205     }                                                                   \
    206     d.VIS_W64(r) = tmp >> 8;
    207 
    208     PMUL(0);
    209     PMUL(1);
    210     PMUL(2);
    211     PMUL(3);
    212 #undef PMUL
    213 
    214     return d.ll;
    215 }
    216 
    217 uint64_t helper_fmuld8sux16(uint64_t src1, uint64_t src2)
    218 {
    219     VIS64 s, d;
    220     uint32_t tmp;
    221 
    222     s.ll = src1;
    223     d.ll = src2;
    224 
    225 #define PMUL(r)                                                         \
    226     tmp = (int32_t)d.VIS_SW64(r) * ((int32_t)s.VIS_SW64(r) >> 8);       \
    227     if ((tmp & 0xff) > 0x7f) {                                          \
    228         tmp += 0x100;                                                   \
    229     }                                                                   \
    230     d.VIS_L64(r) = tmp;
    231 
    232     /* Reverse calculation order to handle overlap */
    233     PMUL(1);
    234     PMUL(0);
    235 #undef PMUL
    236 
    237     return d.ll;
    238 }
    239 
    240 uint64_t helper_fmuld8ulx16(uint64_t src1, uint64_t src2)
    241 {
    242     VIS64 s, d;
    243     uint32_t tmp;
    244 
    245     s.ll = src1;
    246     d.ll = src2;
    247 
    248 #define PMUL(r)                                                         \
    249     tmp = (int32_t)d.VIS_SW64(r) * ((uint32_t)s.VIS_B64(r * 2));        \
    250     if ((tmp & 0xff) > 0x7f) {                                          \
    251         tmp += 0x100;                                                   \
    252     }                                                                   \
    253     d.VIS_L64(r) = tmp;
    254 
    255     /* Reverse calculation order to handle overlap */
    256     PMUL(1);
    257     PMUL(0);
    258 #undef PMUL
    259 
    260     return d.ll;
    261 }
    262 
    263 uint64_t helper_fexpand(uint64_t src1, uint64_t src2)
    264 {
    265     VIS32 s;
    266     VIS64 d;
    267 
    268     s.l = (uint32_t)src1;
    269     d.ll = src2;
    270     d.VIS_W64(0) = s.VIS_B32(0) << 4;
    271     d.VIS_W64(1) = s.VIS_B32(1) << 4;
    272     d.VIS_W64(2) = s.VIS_B32(2) << 4;
    273     d.VIS_W64(3) = s.VIS_B32(3) << 4;
    274 
    275     return d.ll;
    276 }
    277 
    278 #define VIS_HELPER(name, F)                             \
    279     uint64_t name##16(uint64_t src1, uint64_t src2)     \
    280     {                                                   \
    281         VIS64 s, d;                                     \
    282                                                         \
    283         s.ll = src1;                                    \
    284         d.ll = src2;                                    \
    285                                                         \
    286         d.VIS_W64(0) = F(d.VIS_W64(0), s.VIS_W64(0));   \
    287         d.VIS_W64(1) = F(d.VIS_W64(1), s.VIS_W64(1));   \
    288         d.VIS_W64(2) = F(d.VIS_W64(2), s.VIS_W64(2));   \
    289         d.VIS_W64(3) = F(d.VIS_W64(3), s.VIS_W64(3));   \
    290                                                         \
    291         return d.ll;                                    \
    292     }                                                   \
    293                                                         \
    294     uint32_t name##16s(uint32_t src1, uint32_t src2)    \
    295     {                                                   \
    296         VIS32 s, d;                                     \
    297                                                         \
    298         s.l = src1;                                     \
    299         d.l = src2;                                     \
    300                                                         \
    301         d.VIS_W32(0) = F(d.VIS_W32(0), s.VIS_W32(0));   \
    302         d.VIS_W32(1) = F(d.VIS_W32(1), s.VIS_W32(1));   \
    303                                                         \
    304         return d.l;                                     \
    305     }                                                   \
    306                                                         \
    307     uint64_t name##32(uint64_t src1, uint64_t src2)     \
    308     {                                                   \
    309         VIS64 s, d;                                     \
    310                                                         \
    311         s.ll = src1;                                    \
    312         d.ll = src2;                                    \
    313                                                         \
    314         d.VIS_L64(0) = F(d.VIS_L64(0), s.VIS_L64(0));   \
    315         d.VIS_L64(1) = F(d.VIS_L64(1), s.VIS_L64(1));   \
    316                                                         \
    317         return d.ll;                                    \
    318     }                                                   \
    319                                                         \
    320     uint32_t name##32s(uint32_t src1, uint32_t src2)    \
    321     {                                                   \
    322         VIS32 s, d;                                     \
    323                                                         \
    324         s.l = src1;                                     \
    325         d.l = src2;                                     \
    326                                                         \
    327         d.l = F(d.l, s.l);                              \
    328                                                         \
    329         return d.l;                                     \
    330     }
    331 
    332 #define FADD(a, b) ((a) + (b))
    333 #define FSUB(a, b) ((a) - (b))
    334 VIS_HELPER(helper_fpadd, FADD)
    335 VIS_HELPER(helper_fpsub, FSUB)
    336 
    337 #define VIS_CMPHELPER(name, F)                                    \
    338     uint64_t name##16(uint64_t src1, uint64_t src2)               \
    339     {                                                             \
    340         VIS64 s, d;                                               \
    341                                                                   \
    342         s.ll = src1;                                              \
    343         d.ll = src2;                                              \
    344                                                                   \
    345         d.VIS_W64(0) = F(s.VIS_W64(0), d.VIS_W64(0)) ? 1 : 0;     \
    346         d.VIS_W64(0) |= F(s.VIS_W64(1), d.VIS_W64(1)) ? 2 : 0;    \
    347         d.VIS_W64(0) |= F(s.VIS_W64(2), d.VIS_W64(2)) ? 4 : 0;    \
    348         d.VIS_W64(0) |= F(s.VIS_W64(3), d.VIS_W64(3)) ? 8 : 0;    \
    349         d.VIS_W64(1) = d.VIS_W64(2) = d.VIS_W64(3) = 0;           \
    350                                                                   \
    351         return d.ll;                                              \
    352     }                                                             \
    353                                                                   \
    354     uint64_t name##32(uint64_t src1, uint64_t src2)               \
    355     {                                                             \
    356         VIS64 s, d;                                               \
    357                                                                   \
    358         s.ll = src1;                                              \
    359         d.ll = src2;                                              \
    360                                                                   \
    361         d.VIS_L64(0) = F(s.VIS_L64(0), d.VIS_L64(0)) ? 1 : 0;     \
    362         d.VIS_L64(0) |= F(s.VIS_L64(1), d.VIS_L64(1)) ? 2 : 0;    \
    363         d.VIS_L64(1) = 0;                                         \
    364                                                                   \
    365         return d.ll;                                              \
    366     }
    367 
    368 #define FCMPGT(a, b) ((a) > (b))
    369 #define FCMPEQ(a, b) ((a) == (b))
    370 #define FCMPLE(a, b) ((a) <= (b))
    371 #define FCMPNE(a, b) ((a) != (b))
    372 
    373 VIS_CMPHELPER(helper_fcmpgt, FCMPGT)
    374 VIS_CMPHELPER(helper_fcmpeq, FCMPEQ)
    375 VIS_CMPHELPER(helper_fcmple, FCMPLE)
    376 VIS_CMPHELPER(helper_fcmpne, FCMPNE)
    377 
    378 uint64_t helper_pdist(uint64_t sum, uint64_t src1, uint64_t src2)
    379 {
    380     int i;
    381     for (i = 0; i < 8; i++) {
    382         int s1, s2;
    383 
    384         s1 = (src1 >> (56 - (i * 8))) & 0xff;
    385         s2 = (src2 >> (56 - (i * 8))) & 0xff;
    386 
    387         /* Absolute value of difference. */
    388         s1 -= s2;
    389         if (s1 < 0) {
    390             s1 = -s1;
    391         }
    392 
    393         sum += s1;
    394     }
    395 
    396     return sum;
    397 }
    398 
    399 uint32_t helper_fpack16(uint64_t gsr, uint64_t rs2)
    400 {
    401     int scale = (gsr >> 3) & 0xf;
    402     uint32_t ret = 0;
    403     int byte;
    404 
    405     for (byte = 0; byte < 4; byte++) {
    406         uint32_t val;
    407         int16_t src = rs2 >> (byte * 16);
    408         int32_t scaled = src << scale;
    409         int32_t from_fixed = scaled >> 7;
    410 
    411         val = (from_fixed < 0 ?  0 :
    412                from_fixed > 255 ?  255 : from_fixed);
    413 
    414         ret |= val << (8 * byte);
    415     }
    416 
    417     return ret;
    418 }
    419 
    420 uint64_t helper_fpack32(uint64_t gsr, uint64_t rs1, uint64_t rs2)
    421 {
    422     int scale = (gsr >> 3) & 0x1f;
    423     uint64_t ret = 0;
    424     int word;
    425 
    426     ret = (rs1 << 8) & ~(0x000000ff000000ffULL);
    427     for (word = 0; word < 2; word++) {
    428         uint64_t val;
    429         int32_t src = rs2 >> (word * 32);
    430         int64_t scaled = (int64_t)src << scale;
    431         int64_t from_fixed = scaled >> 23;
    432 
    433         val = (from_fixed < 0 ? 0 :
    434                (from_fixed > 255) ? 255 : from_fixed);
    435 
    436         ret |= val << (32 * word);
    437     }
    438 
    439     return ret;
    440 }
    441 
    442 uint32_t helper_fpackfix(uint64_t gsr, uint64_t rs2)
    443 {
    444     int scale = (gsr >> 3) & 0x1f;
    445     uint32_t ret = 0;
    446     int word;
    447 
    448     for (word = 0; word < 2; word++) {
    449         uint32_t val;
    450         int32_t src = rs2 >> (word * 32);
    451         int64_t scaled = (int64_t)src << scale;
    452         int64_t from_fixed = scaled >> 16;
    453 
    454         val = (from_fixed < -32768 ? -32768 :
    455                from_fixed > 32767 ?  32767 : from_fixed);
    456 
    457         ret |= (val & 0xffff) << (word * 16);
    458     }
    459 
    460     return ret;
    461 }
    462 
    463 uint64_t helper_bshuffle(uint64_t gsr, uint64_t src1, uint64_t src2)
    464 {
    465     union {
    466         uint64_t ll[2];
    467         uint8_t b[16];
    468     } s;
    469     VIS64 r;
    470     uint32_t i, mask, host;
    471 
    472     /* Set up S such that we can index across all of the bytes.  */
    473 #if HOST_BIG_ENDIAN
    474     s.ll[0] = src1;
    475     s.ll[1] = src2;
    476     host = 0;
    477 #else
    478     s.ll[1] = src1;
    479     s.ll[0] = src2;
    480     host = 15;
    481 #endif
    482     mask = gsr >> 32;
    483 
    484     for (i = 0; i < 8; ++i) {
    485         unsigned e = (mask >> (28 - i*4)) & 0xf;
    486         r.VIS_B64(i) = s.b[e ^ host];
    487     }
    488 
    489     return r.ll;
    490 }