qemu

FORK: QEMU emulator
git clone https://git.neptards.moe/neptards/qemu.git
Log | Files | Refs | Submodules | LICENSE

vec_string_helper.c (20323B)


      1 /*
      2  * QEMU TCG support -- s390x vector string instruction support
      3  *
      4  * Copyright (C) 2019 Red Hat Inc
      5  *
      6  * Authors:
      7  *   David Hildenbrand <david@redhat.com>
      8  *
      9  * This work is licensed under the terms of the GNU GPL, version 2 or later.
     10  * See the COPYING file in the top-level directory.
     11  */
     12 #include "qemu/osdep.h"
     13 #include "cpu.h"
     14 #include "s390x-internal.h"
     15 #include "vec.h"
     16 #include "tcg/tcg.h"
     17 #include "tcg/tcg-gvec-desc.h"
     18 #include "exec/helper-proto.h"
     19 
     20 /*
     21  * Returns a bit set in the MSB of each element that is zero,
     22  * as defined by the mask.
     23  */
     24 static inline uint64_t zero_search(uint64_t a, uint64_t mask)
     25 {
     26     return ~(((a & mask) + mask) | a | mask);
     27 }
     28 
     29 /*
     30  * Returns a bit set in the MSB of each element that is not zero,
     31  * as defined by the mask.
     32  */
     33 static inline uint64_t nonzero_search(uint64_t a, uint64_t mask)
     34 {
     35     return (((a & mask) + mask) | a) & ~mask;
     36 }
     37 
     38 /*
     39  * Returns the byte offset for the first match, or 16 for no match.
     40  */
     41 static inline int match_index(uint64_t c0, uint64_t c1)
     42 {
     43     return (c0 ? clz64(c0) : clz64(c1) + 64) >> 3;
     44 }
     45 
     46 /*
     47  * Returns the number of bits composing one element.
     48  */
     49 static uint8_t get_element_bits(uint8_t es)
     50 {
     51     return (1 << es) * BITS_PER_BYTE;
     52 }
     53 
     54 /*
     55  * Returns the bitmask for a single element.
     56  */
     57 static uint64_t get_single_element_mask(uint8_t es)
     58 {
     59     return -1ull >> (64 - get_element_bits(es));
     60 }
     61 
     62 /*
     63  * Returns the bitmask for a single element (excluding the MSB).
     64  */
     65 static uint64_t get_single_element_lsbs_mask(uint8_t es)
     66 {
     67     return -1ull >> (65 - get_element_bits(es));
     68 }
     69 
     70 /*
     71  * Returns the bitmasks for multiple elements (excluding the MSBs).
     72  */
     73 static uint64_t get_element_lsbs_mask(uint8_t es)
     74 {
     75     return dup_const(es, get_single_element_lsbs_mask(es));
     76 }
     77 
     78 static int vfae(void *v1, const void *v2, const void *v3, bool in,
     79                 bool rt, bool zs, uint8_t es)
     80 {
     81     const uint64_t mask = get_element_lsbs_mask(es);
     82     const int bits = get_element_bits(es);
     83     uint64_t a0, a1, b0, b1, e0, e1, t0, t1, z0, z1;
     84     uint64_t first_zero = 16;
     85     uint64_t first_equal;
     86     int i;
     87 
     88     a0 = s390_vec_read_element64(v2, 0);
     89     a1 = s390_vec_read_element64(v2, 1);
     90     b0 = s390_vec_read_element64(v3, 0);
     91     b1 = s390_vec_read_element64(v3, 1);
     92     e0 = 0;
     93     e1 = 0;
     94     /* compare against equality with every other element */
     95     for (i = 0; i < 64; i += bits) {
     96         t0 = rol64(b0, i);
     97         t1 = rol64(b1, i);
     98         e0 |= zero_search(a0 ^ t0, mask);
     99         e0 |= zero_search(a0 ^ t1, mask);
    100         e1 |= zero_search(a1 ^ t0, mask);
    101         e1 |= zero_search(a1 ^ t1, mask);
    102     }
    103     /* invert the result if requested - invert only the MSBs */
    104     if (in) {
    105         e0 = ~e0 & ~mask;
    106         e1 = ~e1 & ~mask;
    107     }
    108     first_equal = match_index(e0, e1);
    109 
    110     if (zs) {
    111         z0 = zero_search(a0, mask);
    112         z1 = zero_search(a1, mask);
    113         first_zero = match_index(z0, z1);
    114     }
    115 
    116     if (rt) {
    117         e0 = (e0 >> (bits - 1)) * get_single_element_mask(es);
    118         e1 = (e1 >> (bits - 1)) * get_single_element_mask(es);
    119         s390_vec_write_element64(v1, 0, e0);
    120         s390_vec_write_element64(v1, 1, e1);
    121     } else {
    122         s390_vec_write_element64(v1, 0, MIN(first_equal, first_zero));
    123         s390_vec_write_element64(v1, 1, 0);
    124     }
    125 
    126     if (first_zero == 16 && first_equal == 16) {
    127         return 3; /* no match */
    128     } else if (first_zero == 16) {
    129         return 1; /* matching elements, no match for zero */
    130     } else if (first_equal < first_zero) {
    131         return 2; /* matching elements before match for zero */
    132     }
    133     return 0; /* match for zero */
    134 }
    135 
    136 #define DEF_VFAE_HELPER(BITS)                                                  \
    137 void HELPER(gvec_vfae##BITS)(void *v1, const void *v2, const void *v3,         \
    138                              uint32_t desc)                                    \
    139 {                                                                              \
    140     const bool in = extract32(simd_data(desc), 3, 1);                          \
    141     const bool rt = extract32(simd_data(desc), 2, 1);                          \
    142     const bool zs = extract32(simd_data(desc), 1, 1);                          \
    143                                                                                \
    144     vfae(v1, v2, v3, in, rt, zs, MO_##BITS);                                   \
    145 }
    146 DEF_VFAE_HELPER(8)
    147 DEF_VFAE_HELPER(16)
    148 DEF_VFAE_HELPER(32)
    149 
    150 #define DEF_VFAE_CC_HELPER(BITS)                                               \
    151 void HELPER(gvec_vfae_cc##BITS)(void *v1, const void *v2, const void *v3,      \
    152                                 CPUS390XState *env, uint32_t desc)             \
    153 {                                                                              \
    154     const bool in = extract32(simd_data(desc), 3, 1);                          \
    155     const bool rt = extract32(simd_data(desc), 2, 1);                          \
    156     const bool zs = extract32(simd_data(desc), 1, 1);                          \
    157                                                                                \
    158     env->cc_op = vfae(v1, v2, v3, in, rt, zs, MO_##BITS);                      \
    159 }
    160 DEF_VFAE_CC_HELPER(8)
    161 DEF_VFAE_CC_HELPER(16)
    162 DEF_VFAE_CC_HELPER(32)
    163 
    164 static int vfee(void *v1, const void *v2, const void *v3, bool zs, uint8_t es)
    165 {
    166     const uint64_t mask = get_element_lsbs_mask(es);
    167     uint64_t a0, a1, b0, b1, e0, e1, z0, z1;
    168     uint64_t first_zero = 16;
    169     uint64_t first_equal;
    170 
    171     a0 = s390_vec_read_element64(v2, 0);
    172     a1 = s390_vec_read_element64(v2, 1);
    173     b0 = s390_vec_read_element64(v3, 0);
    174     b1 = s390_vec_read_element64(v3, 1);
    175     e0 = zero_search(a0 ^ b0, mask);
    176     e1 = zero_search(a1 ^ b1, mask);
    177     first_equal = match_index(e0, e1);
    178 
    179     if (zs) {
    180         z0 = zero_search(a0, mask);
    181         z1 = zero_search(a1, mask);
    182         first_zero = match_index(z0, z1);
    183     }
    184 
    185     s390_vec_write_element64(v1, 0, MIN(first_equal, first_zero));
    186     s390_vec_write_element64(v1, 1, 0);
    187     if (first_zero == 16 && first_equal == 16) {
    188         return 3; /* no match */
    189     } else if (first_zero == 16) {
    190         return 1; /* matching elements, no match for zero */
    191     } else if (first_equal < first_zero) {
    192         return 2; /* matching elements before match for zero */
    193     }
    194     return 0; /* match for zero */
    195 }
    196 
    197 #define DEF_VFEE_HELPER(BITS)                                                  \
    198 void HELPER(gvec_vfee##BITS)(void *v1, const void *v2, const void *v3,         \
    199                              uint32_t desc)                                    \
    200 {                                                                              \
    201     const bool zs = extract32(simd_data(desc), 1, 1);                          \
    202                                                                                \
    203     vfee(v1, v2, v3, zs, MO_##BITS);                                           \
    204 }
    205 DEF_VFEE_HELPER(8)
    206 DEF_VFEE_HELPER(16)
    207 DEF_VFEE_HELPER(32)
    208 
    209 #define DEF_VFEE_CC_HELPER(BITS)                                               \
    210 void HELPER(gvec_vfee_cc##BITS)(void *v1, const void *v2, const void *v3,      \
    211                                 CPUS390XState *env, uint32_t desc)             \
    212 {                                                                              \
    213     const bool zs = extract32(simd_data(desc), 1, 1);                          \
    214                                                                                \
    215     env->cc_op = vfee(v1, v2, v3, zs, MO_##BITS);                              \
    216 }
    217 DEF_VFEE_CC_HELPER(8)
    218 DEF_VFEE_CC_HELPER(16)
    219 DEF_VFEE_CC_HELPER(32)
    220 
    221 static int vfene(void *v1, const void *v2, const void *v3, bool zs, uint8_t es)
    222 {
    223     const uint64_t mask = get_element_lsbs_mask(es);
    224     uint64_t a0, a1, b0, b1, e0, e1, z0, z1;
    225     uint64_t first_zero = 16;
    226     uint64_t first_inequal;
    227     bool smaller = false;
    228 
    229     a0 = s390_vec_read_element64(v2, 0);
    230     a1 = s390_vec_read_element64(v2, 1);
    231     b0 = s390_vec_read_element64(v3, 0);
    232     b1 = s390_vec_read_element64(v3, 1);
    233     e0 = nonzero_search(a0 ^ b0, mask);
    234     e1 = nonzero_search(a1 ^ b1, mask);
    235     first_inequal = match_index(e0, e1);
    236 
    237     /* identify the smaller element */
    238     if (first_inequal < 16) {
    239         uint8_t enr = first_inequal / (1 << es);
    240         uint32_t a = s390_vec_read_element(v2, enr, es);
    241         uint32_t b = s390_vec_read_element(v3, enr, es);
    242 
    243         smaller = a < b;
    244     }
    245 
    246     if (zs) {
    247         z0 = zero_search(a0, mask);
    248         z1 = zero_search(a1, mask);
    249         first_zero = match_index(z0, z1);
    250     }
    251 
    252     s390_vec_write_element64(v1, 0, MIN(first_inequal, first_zero));
    253     s390_vec_write_element64(v1, 1, 0);
    254     if (first_zero == 16 && first_inequal == 16) {
    255         return 3;
    256     } else if (first_zero < first_inequal) {
    257         return 0;
    258     }
    259     return smaller ? 1 : 2;
    260 }
    261 
    262 #define DEF_VFENE_HELPER(BITS)                                                 \
    263 void HELPER(gvec_vfene##BITS)(void *v1, const void *v2, const void *v3,        \
    264                               uint32_t desc)                                   \
    265 {                                                                              \
    266     const bool zs = extract32(simd_data(desc), 1, 1);                          \
    267                                                                                \
    268     vfene(v1, v2, v3, zs, MO_##BITS);                                          \
    269 }
    270 DEF_VFENE_HELPER(8)
    271 DEF_VFENE_HELPER(16)
    272 DEF_VFENE_HELPER(32)
    273 
    274 #define DEF_VFENE_CC_HELPER(BITS)                                              \
    275 void HELPER(gvec_vfene_cc##BITS)(void *v1, const void *v2, const void *v3,     \
    276                                  CPUS390XState *env, uint32_t desc)            \
    277 {                                                                              \
    278     const bool zs = extract32(simd_data(desc), 1, 1);                          \
    279                                                                                \
    280     env->cc_op = vfene(v1, v2, v3, zs, MO_##BITS);                             \
    281 }
    282 DEF_VFENE_CC_HELPER(8)
    283 DEF_VFENE_CC_HELPER(16)
    284 DEF_VFENE_CC_HELPER(32)
    285 
    286 static int vistr(void *v1, const void *v2, uint8_t es)
    287 {
    288     const uint64_t mask = get_element_lsbs_mask(es);
    289     uint64_t a0 = s390_vec_read_element64(v2, 0);
    290     uint64_t a1 = s390_vec_read_element64(v2, 1);
    291     uint64_t z;
    292     int cc = 3;
    293 
    294     z = zero_search(a0, mask);
    295     if (z) {
    296         a0 &= ~(-1ull >> clz64(z));
    297         a1 = 0;
    298         cc = 0;
    299     } else {
    300         z = zero_search(a1, mask);
    301         if (z) {
    302             a1 &= ~(-1ull >> clz64(z));
    303             cc = 0;
    304         }
    305     }
    306 
    307     s390_vec_write_element64(v1, 0, a0);
    308     s390_vec_write_element64(v1, 1, a1);
    309     return cc;
    310 }
    311 
    312 #define DEF_VISTR_HELPER(BITS)                                                 \
    313 void HELPER(gvec_vistr##BITS)(void *v1, const void *v2, uint32_t desc)         \
    314 {                                                                              \
    315     vistr(v1, v2, MO_##BITS);                                                  \
    316 }
    317 DEF_VISTR_HELPER(8)
    318 DEF_VISTR_HELPER(16)
    319 DEF_VISTR_HELPER(32)
    320 
    321 #define DEF_VISTR_CC_HELPER(BITS)                                              \
    322 void HELPER(gvec_vistr_cc##BITS)(void *v1, const void *v2, CPUS390XState *env, \
    323                                 uint32_t desc)                                 \
    324 {                                                                              \
    325     env->cc_op = vistr(v1, v2, MO_##BITS);                                     \
    326 }
    327 DEF_VISTR_CC_HELPER(8)
    328 DEF_VISTR_CC_HELPER(16)
    329 DEF_VISTR_CC_HELPER(32)
    330 
    331 static bool element_compare(uint32_t data, uint32_t l, uint8_t c)
    332 {
    333     const bool equal = extract32(c, 7, 1);
    334     const bool lower = extract32(c, 6, 1);
    335     const bool higher = extract32(c, 5, 1);
    336 
    337     if (data < l) {
    338         return lower;
    339     } else if (data > l) {
    340         return higher;
    341     }
    342     return equal;
    343 }
    344 
    345 static int vstrc(void *v1, const void *v2, const void *v3, const void *v4,
    346                  bool in, bool rt, bool zs, uint8_t es)
    347 {
    348     const uint64_t mask = get_element_lsbs_mask(es);
    349     uint64_t a0 = s390_vec_read_element64(v2, 0);
    350     uint64_t a1 = s390_vec_read_element64(v2, 1);
    351     int first_zero = 16, first_match = 16;
    352     S390Vector rt_result = {};
    353     uint64_t z0, z1;
    354     int i, j;
    355 
    356     if (zs) {
    357         z0 = zero_search(a0, mask);
    358         z1 = zero_search(a1, mask);
    359         first_zero = match_index(z0, z1);
    360     }
    361 
    362     for (i = 0; i < 16 / (1 << es); i++) {
    363         const uint32_t data = s390_vec_read_element(v2, i, es);
    364         const int cur_byte = i * (1 << es);
    365         bool any_match = false;
    366 
    367         /* if we don't need a bit vector, we can stop early */
    368         if (cur_byte == first_zero && !rt) {
    369             break;
    370         }
    371 
    372         for (j = 0; j < 16 / (1 << es); j += 2) {
    373             const uint32_t l1 = s390_vec_read_element(v3, j, es);
    374             const uint32_t l2 = s390_vec_read_element(v3, j + 1, es);
    375             /* we are only interested in the highest byte of each element */
    376             const uint8_t c1 = s390_vec_read_element8(v4, j * (1 << es));
    377             const uint8_t c2 = s390_vec_read_element8(v4, (j + 1) * (1 << es));
    378 
    379             if (element_compare(data, l1, c1) &&
    380                 element_compare(data, l2, c2)) {
    381                 any_match = true;
    382                 break;
    383             }
    384         }
    385         /* invert the result if requested */
    386         any_match = in ^ any_match;
    387 
    388         if (any_match) {
    389             /* indicate bit vector if requested */
    390             if (rt) {
    391                 const uint64_t val = -1ull;
    392 
    393                 first_match = MIN(cur_byte, first_match);
    394                 s390_vec_write_element(&rt_result, i, es, val);
    395             } else {
    396                 /* stop on the first match */
    397                 first_match = cur_byte;
    398                 break;
    399             }
    400         }
    401     }
    402 
    403     if (rt) {
    404         *(S390Vector *)v1 = rt_result;
    405     } else {
    406         s390_vec_write_element64(v1, 0, MIN(first_match, first_zero));
    407         s390_vec_write_element64(v1, 1, 0);
    408     }
    409 
    410     if (first_zero == 16 && first_match == 16) {
    411         return 3; /* no match */
    412     } else if (first_zero == 16) {
    413         return 1; /* matching elements, no match for zero */
    414     } else if (first_match < first_zero) {
    415         return 2; /* matching elements before match for zero */
    416     }
    417     return 0; /* match for zero */
    418 }
    419 
    420 #define DEF_VSTRC_HELPER(BITS)                                                 \
    421 void HELPER(gvec_vstrc##BITS)(void *v1, const void *v2, const void *v3,        \
    422                               const void *v4, uint32_t desc)                   \
    423 {                                                                              \
    424     const bool in = extract32(simd_data(desc), 3, 1);                          \
    425     const bool zs = extract32(simd_data(desc), 1, 1);                          \
    426                                                                                \
    427     vstrc(v1, v2, v3, v4, in, 0, zs, MO_##BITS);                               \
    428 }
    429 DEF_VSTRC_HELPER(8)
    430 DEF_VSTRC_HELPER(16)
    431 DEF_VSTRC_HELPER(32)
    432 
    433 #define DEF_VSTRC_RT_HELPER(BITS)                                              \
    434 void HELPER(gvec_vstrc_rt##BITS)(void *v1, const void *v2, const void *v3,     \
    435                                  const void *v4, uint32_t desc)                \
    436 {                                                                              \
    437     const bool in = extract32(simd_data(desc), 3, 1);                          \
    438     const bool zs = extract32(simd_data(desc), 1, 1);                          \
    439                                                                                \
    440     vstrc(v1, v2, v3, v4, in, 1, zs, MO_##BITS);                               \
    441 }
    442 DEF_VSTRC_RT_HELPER(8)
    443 DEF_VSTRC_RT_HELPER(16)
    444 DEF_VSTRC_RT_HELPER(32)
    445 
    446 #define DEF_VSTRC_CC_HELPER(BITS)                                              \
    447 void HELPER(gvec_vstrc_cc##BITS)(void *v1, const void *v2, const void *v3,     \
    448                                  const void *v4, CPUS390XState *env,           \
    449                                  uint32_t desc)                                \
    450 {                                                                              \
    451     const bool in = extract32(simd_data(desc), 3, 1);                          \
    452     const bool zs = extract32(simd_data(desc), 1, 1);                          \
    453                                                                                \
    454     env->cc_op = vstrc(v1, v2, v3, v4, in, 0, zs, MO_##BITS);                  \
    455 }
    456 DEF_VSTRC_CC_HELPER(8)
    457 DEF_VSTRC_CC_HELPER(16)
    458 DEF_VSTRC_CC_HELPER(32)
    459 
    460 #define DEF_VSTRC_CC_RT_HELPER(BITS)                                           \
    461 void HELPER(gvec_vstrc_cc_rt##BITS)(void *v1, const void *v2, const void *v3,  \
    462                                     const void *v4, CPUS390XState *env,        \
    463                                     uint32_t desc)                             \
    464 {                                                                              \
    465     const bool in = extract32(simd_data(desc), 3, 1);                          \
    466     const bool zs = extract32(simd_data(desc), 1, 1);                          \
    467                                                                                \
    468     env->cc_op = vstrc(v1, v2, v3, v4, in, 1, zs, MO_##BITS);                  \
    469 }
    470 DEF_VSTRC_CC_RT_HELPER(8)
    471 DEF_VSTRC_CC_RT_HELPER(16)
    472 DEF_VSTRC_CC_RT_HELPER(32)
    473 
    474 static int vstrs(S390Vector *v1, const S390Vector *v2, const S390Vector *v3,
    475                  const S390Vector *v4, uint8_t es, bool zs)
    476 {
    477     int substr_elen, substr_0, str_elen, i, j, k, cc;
    478     int nelem = 16 >> es;
    479     bool eos = false;
    480 
    481     substr_elen = s390_vec_read_element8(v4, 7) >> es;
    482 
    483     /* If ZS, bound substr length by min(nelem, strlen(v3)). */
    484     if (zs) {
    485         substr_elen = MIN(substr_elen, nelem);
    486         for (i = 0; i < substr_elen; i++) {
    487             if (s390_vec_read_element(v3, i, es) == 0) {
    488                 substr_elen = i;
    489                 break;
    490             }
    491         }
    492     }
    493 
    494     if (substr_elen == 0) {
    495         cc = 2; /* full match for degenerate case of empty substr */
    496         k = 0;
    497         goto done;
    498     }
    499 
    500     /* If ZS, look for eos in the searched string. */
    501     if (zs) {
    502         for (k = 0; k < nelem; k++) {
    503             if (s390_vec_read_element(v2, k, es) == 0) {
    504                 eos = true;
    505                 break;
    506             }
    507         }
    508         str_elen = k;
    509     } else {
    510         str_elen = nelem;
    511     }
    512 
    513     substr_0 = s390_vec_read_element(v3, 0, es);
    514 
    515     for (k = 0; ; k++) {
    516         for (; k < str_elen; k++) {
    517             if (s390_vec_read_element(v2, k, es) == substr_0) {
    518                 break;
    519             }
    520         }
    521 
    522         /* If we reached the end of the string, no match. */
    523         if (k == str_elen) {
    524             cc = eos; /* no match (with or without zero char) */
    525             goto done;
    526         }
    527 
    528         /* If the substring is only one char, match. */
    529         if (substr_elen == 1) {
    530             cc = 2; /* full match */
    531             goto done;
    532         }
    533 
    534         /* If the match begins at the last char, we have a partial match. */
    535         if (k == str_elen - 1) {
    536             cc = 3; /* partial match */
    537             goto done;
    538         }
    539 
    540         i = MIN(nelem, k + substr_elen);
    541         for (j = k + 1; j < i; j++) {
    542             uint32_t e2 = s390_vec_read_element(v2, j, es);
    543             uint32_t e3 = s390_vec_read_element(v3, j - k, es);
    544             if (e2 != e3) {
    545                 break;
    546             }
    547         }
    548         if (j == i) {
    549             /* Matched up until "end". */
    550             cc = i - k == substr_elen ? 2 : 3; /* full or partial match */
    551             goto done;
    552         }
    553     }
    554 
    555  done:
    556     s390_vec_write_element64(v1, 0, k << es);
    557     s390_vec_write_element64(v1, 1, 0);
    558     return cc;
    559 }
    560 
    561 #define DEF_VSTRS_HELPER(BITS)                                             \
    562 void QEMU_FLATTEN HELPER(gvec_vstrs_##BITS)(void *v1, const void *v2,      \
    563     const void *v3, const void *v4, CPUS390XState *env, uint32_t desc)     \
    564     { env->cc_op = vstrs(v1, v2, v3, v4, MO_##BITS, false); }              \
    565 void QEMU_FLATTEN HELPER(gvec_vstrs_zs##BITS)(void *v1, const void *v2,    \
    566     const void *v3, const void *v4, CPUS390XState *env, uint32_t desc)     \
    567     { env->cc_op = vstrs(v1, v2, v3, v4, MO_##BITS, true); }
    568 
    569 DEF_VSTRS_HELPER(8)
    570 DEF_VSTRS_HELPER(16)
    571 DEF_VSTRS_HELPER(32)