qemu

FORK: QEMU emulator
git clone https://git.neptards.moe/neptards/qemu.git
Log | Files | Refs | Submodules | LICENSE

mem_helper.c (90063B)


      1 /*
      2  *  S/390 memory access helper routines
      3  *
      4  *  Copyright (c) 2009 Ulrich Hecht
      5  *  Copyright (c) 2009 Alexander Graf
      6  *
      7  * This library is free software; you can redistribute it and/or
      8  * modify it under the terms of the GNU Lesser General Public
      9  * License as published by the Free Software Foundation; either
     10  * version 2.1 of the License, or (at your option) any later version.
     11  *
     12  * This library is distributed in the hope that it will be useful,
     13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
     14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
     15  * Lesser General Public License for more details.
     16  *
     17  * You should have received a copy of the GNU Lesser General Public
     18  * License along with this library; if not, see <http://www.gnu.org/licenses/>.
     19  */
     20 
     21 #include "qemu/osdep.h"
     22 #include "qemu/log.h"
     23 #include "cpu.h"
     24 #include "s390x-internal.h"
     25 #include "tcg_s390x.h"
     26 #include "exec/helper-proto.h"
     27 #include "exec/exec-all.h"
     28 #include "exec/cpu_ldst.h"
     29 #include "qemu/int128.h"
     30 #include "qemu/atomic128.h"
     31 #include "trace.h"
     32 
     33 #if !defined(CONFIG_USER_ONLY)
     34 #include "hw/s390x/storage-keys.h"
     35 #include "hw/boards.h"
     36 #endif
     37 
     38 /*****************************************************************************/
     39 /* Softmmu support */
     40 
     41 /* #define DEBUG_HELPER */
     42 #ifdef DEBUG_HELPER
     43 #define HELPER_LOG(x...) qemu_log(x)
     44 #else
     45 #define HELPER_LOG(x...)
     46 #endif
     47 
     48 static inline bool psw_key_valid(CPUS390XState *env, uint8_t psw_key)
     49 {
     50     uint16_t pkm = env->cregs[3] >> 16;
     51 
     52     if (env->psw.mask & PSW_MASK_PSTATE) {
     53         /* PSW key has range 0..15, it is valid if the bit is 1 in the PKM */
     54         return pkm & (0x80 >> psw_key);
     55     }
     56     return true;
     57 }
     58 
     59 static bool is_destructive_overlap(CPUS390XState *env, uint64_t dest,
     60                                    uint64_t src, uint32_t len)
     61 {
     62     if (!len || src == dest) {
     63         return false;
     64     }
     65     /* Take care of wrapping at the end of address space. */
     66     if (unlikely(wrap_address(env, src + len - 1) < src)) {
     67         return dest > src || dest <= wrap_address(env, src + len - 1);
     68     }
     69     return dest > src && dest <= src + len - 1;
     70 }
     71 
     72 /* Trigger a SPECIFICATION exception if an address or a length is not
     73    naturally aligned.  */
     74 static inline void check_alignment(CPUS390XState *env, uint64_t v,
     75                                    int wordsize, uintptr_t ra)
     76 {
     77     if (v % wordsize) {
     78         tcg_s390_program_interrupt(env, PGM_SPECIFICATION, ra);
     79     }
     80 }
     81 
     82 /* Load a value from memory according to its size.  */
     83 static inline uint64_t cpu_ldusize_data_ra(CPUS390XState *env, uint64_t addr,
     84                                            int wordsize, uintptr_t ra)
     85 {
     86     switch (wordsize) {
     87     case 1:
     88         return cpu_ldub_data_ra(env, addr, ra);
     89     case 2:
     90         return cpu_lduw_data_ra(env, addr, ra);
     91     default:
     92         abort();
     93     }
     94 }
     95 
     96 /* Store a to memory according to its size.  */
     97 static inline void cpu_stsize_data_ra(CPUS390XState *env, uint64_t addr,
     98                                       uint64_t value, int wordsize,
     99                                       uintptr_t ra)
    100 {
    101     switch (wordsize) {
    102     case 1:
    103         cpu_stb_data_ra(env, addr, value, ra);
    104         break;
    105     case 2:
    106         cpu_stw_data_ra(env, addr, value, ra);
    107         break;
    108     default:
    109         abort();
    110     }
    111 }
    112 
    113 /* An access covers at most 4096 bytes and therefore at most two pages. */
    114 typedef struct S390Access {
    115     target_ulong vaddr1;
    116     target_ulong vaddr2;
    117     char *haddr1;
    118     char *haddr2;
    119     uint16_t size1;
    120     uint16_t size2;
    121     /*
    122      * If we can't access the host page directly, we'll have to do I/O access
    123      * via ld/st helpers. These are internal details, so we store the
    124      * mmu idx to do the access here instead of passing it around in the
    125      * helpers. Maybe, one day we can get rid of ld/st access - once we can
    126      * handle TLB_NOTDIRTY differently. We don't expect these special accesses
    127      * to trigger exceptions - only if we would have TLB_NOTDIRTY on LAP
    128      * pages, we might trigger a new MMU translation - very unlikely that
    129      * the mapping changes in between and we would trigger a fault.
    130      */
    131     int mmu_idx;
    132 } S390Access;
    133 
    134 /*
    135  * With nonfault=1, return the PGM_ exception that would have been injected
    136  * into the guest; return 0 if no exception was detected.
    137  *
    138  * For !CONFIG_USER_ONLY, the TEC is stored stored to env->tlb_fill_tec.
    139  * For CONFIG_USER_ONLY, the faulting address is stored to env->__excp_addr.
    140  */
    141 static int s390_probe_access(CPUArchState *env, target_ulong addr, int size,
    142                              MMUAccessType access_type, int mmu_idx,
    143                              bool nonfault, void **phost, uintptr_t ra)
    144 {
    145 #if defined(CONFIG_USER_ONLY)
    146     return probe_access_flags(env, addr, access_type, mmu_idx,
    147                               nonfault, phost, ra);
    148 #else
    149     int flags;
    150 
    151     env->tlb_fill_exc = 0;
    152     flags = probe_access_flags(env, addr, access_type, mmu_idx, nonfault, phost,
    153                                ra);
    154     if (env->tlb_fill_exc) {
    155         return env->tlb_fill_exc;
    156     }
    157 
    158     if (unlikely(flags & TLB_WATCHPOINT)) {
    159         /* S390 does not presently use transaction attributes. */
    160         cpu_check_watchpoint(env_cpu(env), addr, size,
    161                              MEMTXATTRS_UNSPECIFIED,
    162                              (access_type == MMU_DATA_STORE
    163                               ? BP_MEM_WRITE : BP_MEM_READ), ra);
    164     }
    165     return 0;
    166 #endif
    167 }
    168 
    169 static int access_prepare_nf(S390Access *access, CPUS390XState *env,
    170                              bool nonfault, vaddr vaddr1, int size,
    171                              MMUAccessType access_type,
    172                              int mmu_idx, uintptr_t ra)
    173 {
    174     void *haddr1, *haddr2 = NULL;
    175     int size1, size2, exc;
    176     vaddr vaddr2 = 0;
    177 
    178     assert(size > 0 && size <= 4096);
    179 
    180     size1 = MIN(size, -(vaddr1 | TARGET_PAGE_MASK)),
    181     size2 = size - size1;
    182 
    183     exc = s390_probe_access(env, vaddr1, size1, access_type, mmu_idx, nonfault,
    184                             &haddr1, ra);
    185     if (exc) {
    186         return exc;
    187     }
    188     if (unlikely(size2)) {
    189         /* The access crosses page boundaries. */
    190         vaddr2 = wrap_address(env, vaddr1 + size1);
    191         exc = s390_probe_access(env, vaddr2, size2, access_type, mmu_idx,
    192                                 nonfault, &haddr2, ra);
    193         if (exc) {
    194             return exc;
    195         }
    196     }
    197 
    198     *access = (S390Access) {
    199         .vaddr1 = vaddr1,
    200         .vaddr2 = vaddr2,
    201         .haddr1 = haddr1,
    202         .haddr2 = haddr2,
    203         .size1 = size1,
    204         .size2 = size2,
    205         .mmu_idx = mmu_idx
    206     };
    207     return 0;
    208 }
    209 
    210 static S390Access access_prepare(CPUS390XState *env, vaddr vaddr, int size,
    211                                  MMUAccessType access_type, int mmu_idx,
    212                                  uintptr_t ra)
    213 {
    214     S390Access ret;
    215     int exc = access_prepare_nf(&ret, env, false, vaddr, size,
    216                                 access_type, mmu_idx, ra);
    217     assert(!exc);
    218     return ret;
    219 }
    220 
    221 /* Helper to handle memset on a single page. */
    222 static void do_access_memset(CPUS390XState *env, vaddr vaddr, char *haddr,
    223                              uint8_t byte, uint16_t size, int mmu_idx,
    224                              uintptr_t ra)
    225 {
    226 #ifdef CONFIG_USER_ONLY
    227     g_assert(haddr);
    228     memset(haddr, byte, size);
    229 #else
    230     MemOpIdx oi = make_memop_idx(MO_UB, mmu_idx);
    231     int i;
    232 
    233     if (likely(haddr)) {
    234         memset(haddr, byte, size);
    235     } else {
    236         /*
    237          * Do a single access and test if we can then get access to the
    238          * page. This is especially relevant to speed up TLB_NOTDIRTY.
    239          */
    240         g_assert(size > 0);
    241         cpu_stb_mmu(env, vaddr, byte, oi, ra);
    242         haddr = tlb_vaddr_to_host(env, vaddr, MMU_DATA_STORE, mmu_idx);
    243         if (likely(haddr)) {
    244             memset(haddr + 1, byte, size - 1);
    245         } else {
    246             for (i = 1; i < size; i++) {
    247                 cpu_stb_mmu(env, vaddr + i, byte, oi, ra);
    248             }
    249         }
    250     }
    251 #endif
    252 }
    253 
    254 static void access_memset(CPUS390XState *env, S390Access *desta,
    255                           uint8_t byte, uintptr_t ra)
    256 {
    257 
    258     do_access_memset(env, desta->vaddr1, desta->haddr1, byte, desta->size1,
    259                      desta->mmu_idx, ra);
    260     if (likely(!desta->size2)) {
    261         return;
    262     }
    263     do_access_memset(env, desta->vaddr2, desta->haddr2, byte, desta->size2,
    264                      desta->mmu_idx, ra);
    265 }
    266 
    267 static uint8_t do_access_get_byte(CPUS390XState *env, vaddr vaddr, char **haddr,
    268                                   int offset, int mmu_idx, uintptr_t ra)
    269 {
    270 #ifdef CONFIG_USER_ONLY
    271     return ldub_p(*haddr + offset);
    272 #else
    273     MemOpIdx oi = make_memop_idx(MO_UB, mmu_idx);
    274     uint8_t byte;
    275 
    276     if (likely(*haddr)) {
    277         return ldub_p(*haddr + offset);
    278     }
    279     /*
    280      * Do a single access and test if we can then get access to the
    281      * page. This is especially relevant to speed up TLB_NOTDIRTY.
    282      */
    283     byte = cpu_ldb_mmu(env, vaddr + offset, oi, ra);
    284     *haddr = tlb_vaddr_to_host(env, vaddr, MMU_DATA_LOAD, mmu_idx);
    285     return byte;
    286 #endif
    287 }
    288 
    289 static uint8_t access_get_byte(CPUS390XState *env, S390Access *access,
    290                                int offset, uintptr_t ra)
    291 {
    292     if (offset < access->size1) {
    293         return do_access_get_byte(env, access->vaddr1, &access->haddr1,
    294                                   offset, access->mmu_idx, ra);
    295     }
    296     return do_access_get_byte(env, access->vaddr2, &access->haddr2,
    297                               offset - access->size1, access->mmu_idx, ra);
    298 }
    299 
    300 static void do_access_set_byte(CPUS390XState *env, vaddr vaddr, char **haddr,
    301                                int offset, uint8_t byte, int mmu_idx,
    302                                uintptr_t ra)
    303 {
    304 #ifdef CONFIG_USER_ONLY
    305     stb_p(*haddr + offset, byte);
    306 #else
    307     MemOpIdx oi = make_memop_idx(MO_UB, mmu_idx);
    308 
    309     if (likely(*haddr)) {
    310         stb_p(*haddr + offset, byte);
    311         return;
    312     }
    313     /*
    314      * Do a single access and test if we can then get access to the
    315      * page. This is especially relevant to speed up TLB_NOTDIRTY.
    316      */
    317     cpu_stb_mmu(env, vaddr + offset, byte, oi, ra);
    318     *haddr = tlb_vaddr_to_host(env, vaddr, MMU_DATA_STORE, mmu_idx);
    319 #endif
    320 }
    321 
    322 static void access_set_byte(CPUS390XState *env, S390Access *access,
    323                             int offset, uint8_t byte, uintptr_t ra)
    324 {
    325     if (offset < access->size1) {
    326         do_access_set_byte(env, access->vaddr1, &access->haddr1, offset, byte,
    327                            access->mmu_idx, ra);
    328     } else {
    329         do_access_set_byte(env, access->vaddr2, &access->haddr2,
    330                            offset - access->size1, byte, access->mmu_idx, ra);
    331     }
    332 }
    333 
    334 /*
    335  * Move data with the same semantics as memmove() in case ranges don't overlap
    336  * or src > dest. Undefined behavior on destructive overlaps.
    337  */
    338 static void access_memmove(CPUS390XState *env, S390Access *desta,
    339                            S390Access *srca, uintptr_t ra)
    340 {
    341     int diff;
    342 
    343     g_assert(desta->size1 + desta->size2 == srca->size1 + srca->size2);
    344 
    345     /* Fallback to slow access in case we don't have access to all host pages */
    346     if (unlikely(!desta->haddr1 || (desta->size2 && !desta->haddr2) ||
    347                  !srca->haddr1 || (srca->size2 && !srca->haddr2))) {
    348         int i;
    349 
    350         for (i = 0; i < desta->size1 + desta->size2; i++) {
    351             uint8_t byte = access_get_byte(env, srca, i, ra);
    352 
    353             access_set_byte(env, desta, i, byte, ra);
    354         }
    355         return;
    356     }
    357 
    358     if (srca->size1 == desta->size1) {
    359         memmove(desta->haddr1, srca->haddr1, srca->size1);
    360         if (unlikely(srca->size2)) {
    361             memmove(desta->haddr2, srca->haddr2, srca->size2);
    362         }
    363     } else if (srca->size1 < desta->size1) {
    364         diff = desta->size1 - srca->size1;
    365         memmove(desta->haddr1, srca->haddr1, srca->size1);
    366         memmove(desta->haddr1 + srca->size1, srca->haddr2, diff);
    367         if (likely(desta->size2)) {
    368             memmove(desta->haddr2, srca->haddr2 + diff, desta->size2);
    369         }
    370     } else {
    371         diff = srca->size1 - desta->size1;
    372         memmove(desta->haddr1, srca->haddr1, desta->size1);
    373         memmove(desta->haddr2, srca->haddr1 + desta->size1, diff);
    374         if (likely(srca->size2)) {
    375             memmove(desta->haddr2 + diff, srca->haddr2, srca->size2);
    376         }
    377     }
    378 }
    379 
    380 static int mmu_idx_from_as(uint8_t as)
    381 {
    382     switch (as) {
    383     case AS_PRIMARY:
    384         return MMU_PRIMARY_IDX;
    385     case AS_SECONDARY:
    386         return MMU_SECONDARY_IDX;
    387     case AS_HOME:
    388         return MMU_HOME_IDX;
    389     default:
    390         /* FIXME AS_ACCREG */
    391         g_assert_not_reached();
    392     }
    393 }
    394 
    395 /* and on array */
    396 static uint32_t do_helper_nc(CPUS390XState *env, uint32_t l, uint64_t dest,
    397                              uint64_t src, uintptr_t ra)
    398 {
    399     const int mmu_idx = cpu_mmu_index(env, false);
    400     S390Access srca1, srca2, desta;
    401     uint32_t i;
    402     uint8_t c = 0;
    403 
    404     HELPER_LOG("%s l %d dest %" PRIx64 " src %" PRIx64 "\n",
    405                __func__, l, dest, src);
    406 
    407     /* NC always processes one more byte than specified - maximum is 256 */
    408     l++;
    409 
    410     srca1 = access_prepare(env, src, l, MMU_DATA_LOAD, mmu_idx, ra);
    411     srca2 = access_prepare(env, dest, l, MMU_DATA_LOAD, mmu_idx, ra);
    412     desta = access_prepare(env, dest, l, MMU_DATA_STORE, mmu_idx, ra);
    413     for (i = 0; i < l; i++) {
    414         const uint8_t x = access_get_byte(env, &srca1, i, ra) &
    415                           access_get_byte(env, &srca2, i, ra);
    416 
    417         c |= x;
    418         access_set_byte(env, &desta, i, x, ra);
    419     }
    420     return c != 0;
    421 }
    422 
    423 uint32_t HELPER(nc)(CPUS390XState *env, uint32_t l, uint64_t dest,
    424                     uint64_t src)
    425 {
    426     return do_helper_nc(env, l, dest, src, GETPC());
    427 }
    428 
    429 /* xor on array */
    430 static uint32_t do_helper_xc(CPUS390XState *env, uint32_t l, uint64_t dest,
    431                              uint64_t src, uintptr_t ra)
    432 {
    433     const int mmu_idx = cpu_mmu_index(env, false);
    434     S390Access srca1, srca2, desta;
    435     uint32_t i;
    436     uint8_t c = 0;
    437 
    438     HELPER_LOG("%s l %d dest %" PRIx64 " src %" PRIx64 "\n",
    439                __func__, l, dest, src);
    440 
    441     /* XC always processes one more byte than specified - maximum is 256 */
    442     l++;
    443 
    444     srca1 = access_prepare(env, src, l, MMU_DATA_LOAD, mmu_idx, ra);
    445     srca2 = access_prepare(env, dest, l, MMU_DATA_LOAD, mmu_idx, ra);
    446     desta = access_prepare(env, dest, l, MMU_DATA_STORE, mmu_idx, ra);
    447 
    448     /* xor with itself is the same as memset(0) */
    449     if (src == dest) {
    450         access_memset(env, &desta, 0, ra);
    451         return 0;
    452     }
    453 
    454     for (i = 0; i < l; i++) {
    455         const uint8_t x = access_get_byte(env, &srca1, i, ra) ^
    456                           access_get_byte(env, &srca2, i, ra);
    457 
    458         c |= x;
    459         access_set_byte(env, &desta, i, x, ra);
    460     }
    461     return c != 0;
    462 }
    463 
    464 uint32_t HELPER(xc)(CPUS390XState *env, uint32_t l, uint64_t dest,
    465                     uint64_t src)
    466 {
    467     return do_helper_xc(env, l, dest, src, GETPC());
    468 }
    469 
    470 /* or on array */
    471 static uint32_t do_helper_oc(CPUS390XState *env, uint32_t l, uint64_t dest,
    472                              uint64_t src, uintptr_t ra)
    473 {
    474     const int mmu_idx = cpu_mmu_index(env, false);
    475     S390Access srca1, srca2, desta;
    476     uint32_t i;
    477     uint8_t c = 0;
    478 
    479     HELPER_LOG("%s l %d dest %" PRIx64 " src %" PRIx64 "\n",
    480                __func__, l, dest, src);
    481 
    482     /* OC always processes one more byte than specified - maximum is 256 */
    483     l++;
    484 
    485     srca1 = access_prepare(env, src, l, MMU_DATA_LOAD, mmu_idx, ra);
    486     srca2 = access_prepare(env, dest, l, MMU_DATA_LOAD, mmu_idx, ra);
    487     desta = access_prepare(env, dest, l, MMU_DATA_STORE, mmu_idx, ra);
    488     for (i = 0; i < l; i++) {
    489         const uint8_t x = access_get_byte(env, &srca1, i, ra) |
    490                           access_get_byte(env, &srca2, i, ra);
    491 
    492         c |= x;
    493         access_set_byte(env, &desta, i, x, ra);
    494     }
    495     return c != 0;
    496 }
    497 
    498 uint32_t HELPER(oc)(CPUS390XState *env, uint32_t l, uint64_t dest,
    499                     uint64_t src)
    500 {
    501     return do_helper_oc(env, l, dest, src, GETPC());
    502 }
    503 
    504 /* memmove */
    505 static uint32_t do_helper_mvc(CPUS390XState *env, uint32_t l, uint64_t dest,
    506                               uint64_t src, uintptr_t ra)
    507 {
    508     const int mmu_idx = cpu_mmu_index(env, false);
    509     S390Access srca, desta;
    510     uint32_t i;
    511 
    512     HELPER_LOG("%s l %d dest %" PRIx64 " src %" PRIx64 "\n",
    513                __func__, l, dest, src);
    514 
    515     /* MVC always copies one more byte than specified - maximum is 256 */
    516     l++;
    517 
    518     srca = access_prepare(env, src, l, MMU_DATA_LOAD, mmu_idx, ra);
    519     desta = access_prepare(env, dest, l, MMU_DATA_STORE, mmu_idx, ra);
    520 
    521     /*
    522      * "When the operands overlap, the result is obtained as if the operands
    523      * were processed one byte at a time". Only non-destructive overlaps
    524      * behave like memmove().
    525      */
    526     if (dest == src + 1) {
    527         access_memset(env, &desta, access_get_byte(env, &srca, 0, ra), ra);
    528     } else if (!is_destructive_overlap(env, dest, src, l)) {
    529         access_memmove(env, &desta, &srca, ra);
    530     } else {
    531         for (i = 0; i < l; i++) {
    532             uint8_t byte = access_get_byte(env, &srca, i, ra);
    533 
    534             access_set_byte(env, &desta, i, byte, ra);
    535         }
    536     }
    537 
    538     return env->cc_op;
    539 }
    540 
    541 void HELPER(mvc)(CPUS390XState *env, uint32_t l, uint64_t dest, uint64_t src)
    542 {
    543     do_helper_mvc(env, l, dest, src, GETPC());
    544 }
    545 
    546 /* move right to left */
    547 void HELPER(mvcrl)(CPUS390XState *env, uint64_t l, uint64_t dest, uint64_t src)
    548 {
    549     const int mmu_idx = cpu_mmu_index(env, false);
    550     const uint64_t ra = GETPC();
    551     S390Access srca, desta;
    552     int32_t i;
    553 
    554     /* MVCRL always copies one more byte than specified - maximum is 256 */
    555     l++;
    556 
    557     srca = access_prepare(env, src, l, MMU_DATA_LOAD, mmu_idx, ra);
    558     desta = access_prepare(env, dest, l, MMU_DATA_STORE, mmu_idx, ra);
    559 
    560     for (i = l - 1; i >= 0; i--) {
    561         uint8_t byte = access_get_byte(env, &srca, i, ra);
    562         access_set_byte(env, &desta, i, byte, ra);
    563     }
    564 }
    565 
    566 /* move inverse  */
    567 void HELPER(mvcin)(CPUS390XState *env, uint32_t l, uint64_t dest, uint64_t src)
    568 {
    569     const int mmu_idx = cpu_mmu_index(env, false);
    570     S390Access srca, desta;
    571     uintptr_t ra = GETPC();
    572     int i;
    573 
    574     /* MVCIN always copies one more byte than specified - maximum is 256 */
    575     l++;
    576 
    577     src = wrap_address(env, src - l + 1);
    578     srca = access_prepare(env, src, l, MMU_DATA_LOAD, mmu_idx, ra);
    579     desta = access_prepare(env, dest, l, MMU_DATA_STORE, mmu_idx, ra);
    580     for (i = 0; i < l; i++) {
    581         const uint8_t x = access_get_byte(env, &srca, l - i - 1, ra);
    582 
    583         access_set_byte(env, &desta, i, x, ra);
    584     }
    585 }
    586 
    587 /* move numerics  */
    588 void HELPER(mvn)(CPUS390XState *env, uint32_t l, uint64_t dest, uint64_t src)
    589 {
    590     const int mmu_idx = cpu_mmu_index(env, false);
    591     S390Access srca1, srca2, desta;
    592     uintptr_t ra = GETPC();
    593     int i;
    594 
    595     /* MVN always copies one more byte than specified - maximum is 256 */
    596     l++;
    597 
    598     srca1 = access_prepare(env, src, l, MMU_DATA_LOAD, mmu_idx, ra);
    599     srca2 = access_prepare(env, dest, l, MMU_DATA_LOAD, mmu_idx, ra);
    600     desta = access_prepare(env, dest, l, MMU_DATA_STORE, mmu_idx, ra);
    601     for (i = 0; i < l; i++) {
    602         const uint8_t x = (access_get_byte(env, &srca1, i, ra) & 0x0f) |
    603                           (access_get_byte(env, &srca2, i, ra) & 0xf0);
    604 
    605         access_set_byte(env, &desta, i, x, ra);
    606     }
    607 }
    608 
    609 /* move with offset  */
    610 void HELPER(mvo)(CPUS390XState *env, uint32_t l, uint64_t dest, uint64_t src)
    611 {
    612     const int mmu_idx = cpu_mmu_index(env, false);
    613     /* MVO always processes one more byte than specified - maximum is 16 */
    614     const int len_dest = (l >> 4) + 1;
    615     const int len_src = (l & 0xf) + 1;
    616     uintptr_t ra = GETPC();
    617     uint8_t byte_dest, byte_src;
    618     S390Access srca, desta;
    619     int i, j;
    620 
    621     srca = access_prepare(env, src, len_src, MMU_DATA_LOAD, mmu_idx, ra);
    622     desta = access_prepare(env, dest, len_dest, MMU_DATA_STORE, mmu_idx, ra);
    623 
    624     /* Handle rightmost byte */
    625     byte_dest = cpu_ldub_data_ra(env, dest + len_dest - 1, ra);
    626     byte_src = access_get_byte(env, &srca, len_src - 1, ra);
    627     byte_dest = (byte_dest & 0x0f) | (byte_src << 4);
    628     access_set_byte(env, &desta, len_dest - 1, byte_dest, ra);
    629 
    630     /* Process remaining bytes from right to left */
    631     for (i = len_dest - 2, j = len_src - 2; i >= 0; i--, j--) {
    632         byte_dest = byte_src >> 4;
    633         if (j >= 0) {
    634             byte_src = access_get_byte(env, &srca, j, ra);
    635         } else {
    636             byte_src = 0;
    637         }
    638         byte_dest |= byte_src << 4;
    639         access_set_byte(env, &desta, i, byte_dest, ra);
    640     }
    641 }
    642 
    643 /* move zones  */
    644 void HELPER(mvz)(CPUS390XState *env, uint32_t l, uint64_t dest, uint64_t src)
    645 {
    646     const int mmu_idx = cpu_mmu_index(env, false);
    647     S390Access srca1, srca2, desta;
    648     uintptr_t ra = GETPC();
    649     int i;
    650 
    651     /* MVZ always copies one more byte than specified - maximum is 256 */
    652     l++;
    653 
    654     srca1 = access_prepare(env, src, l, MMU_DATA_LOAD, mmu_idx, ra);
    655     srca2 = access_prepare(env, dest, l, MMU_DATA_LOAD, mmu_idx, ra);
    656     desta = access_prepare(env, dest, l, MMU_DATA_STORE, mmu_idx, ra);
    657     for (i = 0; i < l; i++) {
    658         const uint8_t x = (access_get_byte(env, &srca1, i, ra) & 0xf0) |
    659                           (access_get_byte(env, &srca2, i, ra) & 0x0f);
    660 
    661         access_set_byte(env, &desta, i, x, ra);
    662     }
    663 }
    664 
    665 /* compare unsigned byte arrays */
    666 static uint32_t do_helper_clc(CPUS390XState *env, uint32_t l, uint64_t s1,
    667                               uint64_t s2, uintptr_t ra)
    668 {
    669     uint32_t i;
    670     uint32_t cc = 0;
    671 
    672     HELPER_LOG("%s l %d s1 %" PRIx64 " s2 %" PRIx64 "\n",
    673                __func__, l, s1, s2);
    674 
    675     for (i = 0; i <= l; i++) {
    676         uint8_t x = cpu_ldub_data_ra(env, s1 + i, ra);
    677         uint8_t y = cpu_ldub_data_ra(env, s2 + i, ra);
    678         HELPER_LOG("%02x (%c)/%02x (%c) ", x, x, y, y);
    679         if (x < y) {
    680             cc = 1;
    681             break;
    682         } else if (x > y) {
    683             cc = 2;
    684             break;
    685         }
    686     }
    687 
    688     HELPER_LOG("\n");
    689     return cc;
    690 }
    691 
    692 uint32_t HELPER(clc)(CPUS390XState *env, uint32_t l, uint64_t s1, uint64_t s2)
    693 {
    694     return do_helper_clc(env, l, s1, s2, GETPC());
    695 }
    696 
    697 /* compare logical under mask */
    698 uint32_t HELPER(clm)(CPUS390XState *env, uint32_t r1, uint32_t mask,
    699                      uint64_t addr)
    700 {
    701     uintptr_t ra = GETPC();
    702     uint32_t cc = 0;
    703 
    704     HELPER_LOG("%s: r1 0x%x mask 0x%x addr 0x%" PRIx64 "\n", __func__, r1,
    705                mask, addr);
    706 
    707     while (mask) {
    708         if (mask & 8) {
    709             uint8_t d = cpu_ldub_data_ra(env, addr, ra);
    710             uint8_t r = extract32(r1, 24, 8);
    711             HELPER_LOG("mask 0x%x %02x/%02x (0x%" PRIx64 ") ", mask, r, d,
    712                        addr);
    713             if (r < d) {
    714                 cc = 1;
    715                 break;
    716             } else if (r > d) {
    717                 cc = 2;
    718                 break;
    719             }
    720             addr++;
    721         }
    722         mask = (mask << 1) & 0xf;
    723         r1 <<= 8;
    724     }
    725 
    726     HELPER_LOG("\n");
    727     return cc;
    728 }
    729 
    730 static inline uint64_t get_address(CPUS390XState *env, int reg)
    731 {
    732     return wrap_address(env, env->regs[reg]);
    733 }
    734 
    735 /*
    736  * Store the address to the given register, zeroing out unused leftmost
    737  * bits in bit positions 32-63 (24-bit and 31-bit mode only).
    738  */
    739 static inline void set_address_zero(CPUS390XState *env, int reg,
    740                                     uint64_t address)
    741 {
    742     if (env->psw.mask & PSW_MASK_64) {
    743         env->regs[reg] = address;
    744     } else {
    745         if (!(env->psw.mask & PSW_MASK_32)) {
    746             address &= 0x00ffffff;
    747         } else {
    748             address &= 0x7fffffff;
    749         }
    750         env->regs[reg] = deposit64(env->regs[reg], 0, 32, address);
    751     }
    752 }
    753 
    754 static inline void set_address(CPUS390XState *env, int reg, uint64_t address)
    755 {
    756     if (env->psw.mask & PSW_MASK_64) {
    757         /* 64-Bit mode */
    758         env->regs[reg] = address;
    759     } else {
    760         if (!(env->psw.mask & PSW_MASK_32)) {
    761             /* 24-Bit mode. According to the PoO it is implementation
    762             dependent if bits 32-39 remain unchanged or are set to
    763             zeros.  Choose the former so that the function can also be
    764             used for TRT.  */
    765             env->regs[reg] = deposit64(env->regs[reg], 0, 24, address);
    766         } else {
    767             /* 31-Bit mode. According to the PoO it is implementation
    768             dependent if bit 32 remains unchanged or is set to zero.
    769             Choose the latter so that the function can also be used for
    770             TRT.  */
    771             address &= 0x7fffffff;
    772             env->regs[reg] = deposit64(env->regs[reg], 0, 32, address);
    773         }
    774     }
    775 }
    776 
    777 static inline uint64_t wrap_length32(CPUS390XState *env, uint64_t length)
    778 {
    779     if (!(env->psw.mask & PSW_MASK_64)) {
    780         return (uint32_t)length;
    781     }
    782     return length;
    783 }
    784 
    785 static inline uint64_t wrap_length31(CPUS390XState *env, uint64_t length)
    786 {
    787     if (!(env->psw.mask & PSW_MASK_64)) {
    788         /* 24-Bit and 31-Bit mode */
    789         length &= 0x7fffffff;
    790     }
    791     return length;
    792 }
    793 
    794 static inline uint64_t get_length(CPUS390XState *env, int reg)
    795 {
    796     return wrap_length31(env, env->regs[reg]);
    797 }
    798 
    799 static inline void set_length(CPUS390XState *env, int reg, uint64_t length)
    800 {
    801     if (env->psw.mask & PSW_MASK_64) {
    802         /* 64-Bit mode */
    803         env->regs[reg] = length;
    804     } else {
    805         /* 24-Bit and 31-Bit mode */
    806         env->regs[reg] = deposit64(env->regs[reg], 0, 32, length);
    807     }
    808 }
    809 
    810 /* search string (c is byte to search, r2 is string, r1 end of string) */
    811 void HELPER(srst)(CPUS390XState *env, uint32_t r1, uint32_t r2)
    812 {
    813     uintptr_t ra = GETPC();
    814     uint64_t end, str;
    815     uint32_t len;
    816     uint8_t v, c = env->regs[0];
    817 
    818     /* Bits 32-55 must contain all 0.  */
    819     if (env->regs[0] & 0xffffff00u) {
    820         tcg_s390_program_interrupt(env, PGM_SPECIFICATION, ra);
    821     }
    822 
    823     str = get_address(env, r2);
    824     end = get_address(env, r1);
    825 
    826     /* Lest we fail to service interrupts in a timely manner, limit the
    827        amount of work we're willing to do.  For now, let's cap at 8k.  */
    828     for (len = 0; len < 0x2000; ++len) {
    829         if (str + len == end) {
    830             /* Character not found.  R1 & R2 are unmodified.  */
    831             env->cc_op = 2;
    832             return;
    833         }
    834         v = cpu_ldub_data_ra(env, str + len, ra);
    835         if (v == c) {
    836             /* Character found.  Set R1 to the location; R2 is unmodified.  */
    837             env->cc_op = 1;
    838             set_address(env, r1, str + len);
    839             return;
    840         }
    841     }
    842 
    843     /* CPU-determined bytes processed.  Advance R2 to next byte to process.  */
    844     env->cc_op = 3;
    845     set_address(env, r2, str + len);
    846 }
    847 
    848 void HELPER(srstu)(CPUS390XState *env, uint32_t r1, uint32_t r2)
    849 {
    850     uintptr_t ra = GETPC();
    851     uint32_t len;
    852     uint16_t v, c = env->regs[0];
    853     uint64_t end, str, adj_end;
    854 
    855     /* Bits 32-47 of R0 must be zero.  */
    856     if (env->regs[0] & 0xffff0000u) {
    857         tcg_s390_program_interrupt(env, PGM_SPECIFICATION, ra);
    858     }
    859 
    860     str = get_address(env, r2);
    861     end = get_address(env, r1);
    862 
    863     /* If the LSB of the two addresses differ, use one extra byte.  */
    864     adj_end = end + ((str ^ end) & 1);
    865 
    866     /* Lest we fail to service interrupts in a timely manner, limit the
    867        amount of work we're willing to do.  For now, let's cap at 8k.  */
    868     for (len = 0; len < 0x2000; len += 2) {
    869         if (str + len == adj_end) {
    870             /* End of input found.  */
    871             env->cc_op = 2;
    872             return;
    873         }
    874         v = cpu_lduw_data_ra(env, str + len, ra);
    875         if (v == c) {
    876             /* Character found.  Set R1 to the location; R2 is unmodified.  */
    877             env->cc_op = 1;
    878             set_address(env, r1, str + len);
    879             return;
    880         }
    881     }
    882 
    883     /* CPU-determined bytes processed.  Advance R2 to next byte to process.  */
    884     env->cc_op = 3;
    885     set_address(env, r2, str + len);
    886 }
    887 
    888 /* unsigned string compare (c is string terminator) */
    889 uint64_t HELPER(clst)(CPUS390XState *env, uint64_t c, uint64_t s1, uint64_t s2)
    890 {
    891     uintptr_t ra = GETPC();
    892     uint32_t len;
    893 
    894     c = c & 0xff;
    895     s1 = wrap_address(env, s1);
    896     s2 = wrap_address(env, s2);
    897 
    898     /* Lest we fail to service interrupts in a timely manner, limit the
    899        amount of work we're willing to do.  For now, let's cap at 8k.  */
    900     for (len = 0; len < 0x2000; ++len) {
    901         uint8_t v1 = cpu_ldub_data_ra(env, s1 + len, ra);
    902         uint8_t v2 = cpu_ldub_data_ra(env, s2 + len, ra);
    903         if (v1 == v2) {
    904             if (v1 == c) {
    905                 /* Equal.  CC=0, and don't advance the registers.  */
    906                 env->cc_op = 0;
    907                 env->retxl = s2;
    908                 return s1;
    909             }
    910         } else {
    911             /* Unequal.  CC={1,2}, and advance the registers.  Note that
    912                the terminator need not be zero, but the string that contains
    913                the terminator is by definition "low".  */
    914             env->cc_op = (v1 == c ? 1 : v2 == c ? 2 : v1 < v2 ? 1 : 2);
    915             env->retxl = s2 + len;
    916             return s1 + len;
    917         }
    918     }
    919 
    920     /* CPU-determined bytes equal; advance the registers.  */
    921     env->cc_op = 3;
    922     env->retxl = s2 + len;
    923     return s1 + len;
    924 }
    925 
    926 /* move page */
    927 uint32_t HELPER(mvpg)(CPUS390XState *env, uint64_t r0, uint32_t r1, uint32_t r2)
    928 {
    929     const uint64_t src = get_address(env, r2) & TARGET_PAGE_MASK;
    930     const uint64_t dst = get_address(env, r1) & TARGET_PAGE_MASK;
    931     const int mmu_idx = cpu_mmu_index(env, false);
    932     const bool f = extract64(r0, 11, 1);
    933     const bool s = extract64(r0, 10, 1);
    934     const bool cco = extract64(r0, 8, 1);
    935     uintptr_t ra = GETPC();
    936     S390Access srca, desta;
    937     int exc;
    938 
    939     if ((f && s) || extract64(r0, 12, 4)) {
    940         tcg_s390_program_interrupt(env, PGM_SPECIFICATION, GETPC());
    941     }
    942 
    943     /*
    944      * We always manually handle exceptions such that we can properly store
    945      * r1/r2 to the lowcore on page-translation exceptions.
    946      *
    947      * TODO: Access key handling
    948      */
    949     exc = access_prepare_nf(&srca, env, true, src, TARGET_PAGE_SIZE,
    950                             MMU_DATA_LOAD, mmu_idx, ra);
    951     if (exc) {
    952         if (cco) {
    953             return 2;
    954         }
    955         goto inject_exc;
    956     }
    957     exc = access_prepare_nf(&desta, env, true, dst, TARGET_PAGE_SIZE,
    958                             MMU_DATA_STORE, mmu_idx, ra);
    959     if (exc) {
    960         if (cco && exc != PGM_PROTECTION) {
    961             return 1;
    962         }
    963         goto inject_exc;
    964     }
    965     access_memmove(env, &desta, &srca, ra);
    966     return 0; /* data moved */
    967 inject_exc:
    968 #if !defined(CONFIG_USER_ONLY)
    969     if (exc != PGM_ADDRESSING) {
    970         stq_phys(env_cpu(env)->as, env->psa + offsetof(LowCore, trans_exc_code),
    971                  env->tlb_fill_tec);
    972     }
    973     if (exc == PGM_PAGE_TRANS) {
    974         stb_phys(env_cpu(env)->as, env->psa + offsetof(LowCore, op_access_id),
    975                  r1 << 4 | r2);
    976     }
    977 #endif
    978     tcg_s390_program_interrupt(env, exc, ra);
    979 }
    980 
    981 /* string copy */
    982 uint32_t HELPER(mvst)(CPUS390XState *env, uint32_t r1, uint32_t r2)
    983 {
    984     const int mmu_idx = cpu_mmu_index(env, false);
    985     const uint64_t d = get_address(env, r1);
    986     const uint64_t s = get_address(env, r2);
    987     const uint8_t c = env->regs[0];
    988     const int len = MIN(-(d | TARGET_PAGE_MASK), -(s | TARGET_PAGE_MASK));
    989     S390Access srca, desta;
    990     uintptr_t ra = GETPC();
    991     int i;
    992 
    993     if (env->regs[0] & 0xffffff00ull) {
    994         tcg_s390_program_interrupt(env, PGM_SPECIFICATION, ra);
    995     }
    996 
    997     /*
    998      * Our access should not exceed single pages, as we must not report access
    999      * exceptions exceeding the actually copied range (which we don't know at
   1000      * this point). We might over-indicate watchpoints within the pages
   1001      * (if we ever care, we have to limit processing to a single byte).
   1002      */
   1003     srca = access_prepare(env, s, len, MMU_DATA_LOAD, mmu_idx, ra);
   1004     desta = access_prepare(env, d, len, MMU_DATA_STORE, mmu_idx, ra);
   1005     for (i = 0; i < len; i++) {
   1006         const uint8_t v = access_get_byte(env, &srca, i, ra);
   1007 
   1008         access_set_byte(env, &desta, i, v, ra);
   1009         if (v == c) {
   1010             set_address_zero(env, r1, d + i);
   1011             return 1;
   1012         }
   1013     }
   1014     set_address_zero(env, r1, d + len);
   1015     set_address_zero(env, r2, s + len);
   1016     return 3;
   1017 }
   1018 
   1019 /* load access registers r1 to r3 from memory at a2 */
   1020 void HELPER(lam)(CPUS390XState *env, uint32_t r1, uint64_t a2, uint32_t r3)
   1021 {
   1022     uintptr_t ra = GETPC();
   1023     int i;
   1024 
   1025     if (a2 & 0x3) {
   1026         tcg_s390_program_interrupt(env, PGM_SPECIFICATION, ra);
   1027     }
   1028 
   1029     for (i = r1;; i = (i + 1) % 16) {
   1030         env->aregs[i] = cpu_ldl_data_ra(env, a2, ra);
   1031         a2 += 4;
   1032 
   1033         if (i == r3) {
   1034             break;
   1035         }
   1036     }
   1037 }
   1038 
   1039 /* store access registers r1 to r3 in memory at a2 */
   1040 void HELPER(stam)(CPUS390XState *env, uint32_t r1, uint64_t a2, uint32_t r3)
   1041 {
   1042     uintptr_t ra = GETPC();
   1043     int i;
   1044 
   1045     if (a2 & 0x3) {
   1046         tcg_s390_program_interrupt(env, PGM_SPECIFICATION, ra);
   1047     }
   1048 
   1049     for (i = r1;; i = (i + 1) % 16) {
   1050         cpu_stl_data_ra(env, a2, env->aregs[i], ra);
   1051         a2 += 4;
   1052 
   1053         if (i == r3) {
   1054             break;
   1055         }
   1056     }
   1057 }
   1058 
   1059 /* move long helper */
   1060 static inline uint32_t do_mvcl(CPUS390XState *env,
   1061                                uint64_t *dest, uint64_t *destlen,
   1062                                uint64_t *src, uint64_t *srclen,
   1063                                uint16_t pad, int wordsize, uintptr_t ra)
   1064 {
   1065     const int mmu_idx = cpu_mmu_index(env, false);
   1066     int len = MIN(*destlen, -(*dest | TARGET_PAGE_MASK));
   1067     S390Access srca, desta;
   1068     int i, cc;
   1069 
   1070     if (*destlen == *srclen) {
   1071         cc = 0;
   1072     } else if (*destlen < *srclen) {
   1073         cc = 1;
   1074     } else {
   1075         cc = 2;
   1076     }
   1077 
   1078     if (!*destlen) {
   1079         return cc;
   1080     }
   1081 
   1082     /*
   1083      * Only perform one type of type of operation (move/pad) at a time.
   1084      * Stay within single pages.
   1085      */
   1086     if (*srclen) {
   1087         /* Copy the src array */
   1088         len = MIN(MIN(*srclen, -(*src | TARGET_PAGE_MASK)), len);
   1089         *destlen -= len;
   1090         *srclen -= len;
   1091         srca = access_prepare(env, *src, len, MMU_DATA_LOAD, mmu_idx, ra);
   1092         desta = access_prepare(env, *dest, len, MMU_DATA_STORE, mmu_idx, ra);
   1093         access_memmove(env, &desta, &srca, ra);
   1094         *src = wrap_address(env, *src + len);
   1095         *dest = wrap_address(env, *dest + len);
   1096     } else if (wordsize == 1) {
   1097         /* Pad the remaining area */
   1098         *destlen -= len;
   1099         desta = access_prepare(env, *dest, len, MMU_DATA_STORE, mmu_idx, ra);
   1100         access_memset(env, &desta, pad, ra);
   1101         *dest = wrap_address(env, *dest + len);
   1102     } else {
   1103         desta = access_prepare(env, *dest, len, MMU_DATA_STORE, mmu_idx, ra);
   1104 
   1105         /* The remaining length selects the padding byte. */
   1106         for (i = 0; i < len; (*destlen)--, i++) {
   1107             if (*destlen & 1) {
   1108                 access_set_byte(env, &desta, i, pad, ra);
   1109             } else {
   1110                 access_set_byte(env, &desta, i, pad >> 8, ra);
   1111             }
   1112         }
   1113         *dest = wrap_address(env, *dest + len);
   1114     }
   1115 
   1116     return *destlen ? 3 : cc;
   1117 }
   1118 
   1119 /* move long */
   1120 uint32_t HELPER(mvcl)(CPUS390XState *env, uint32_t r1, uint32_t r2)
   1121 {
   1122     const int mmu_idx = cpu_mmu_index(env, false);
   1123     uintptr_t ra = GETPC();
   1124     uint64_t destlen = env->regs[r1 + 1] & 0xffffff;
   1125     uint64_t dest = get_address(env, r1);
   1126     uint64_t srclen = env->regs[r2 + 1] & 0xffffff;
   1127     uint64_t src = get_address(env, r2);
   1128     uint8_t pad = env->regs[r2 + 1] >> 24;
   1129     CPUState *cs = env_cpu(env);
   1130     S390Access srca, desta;
   1131     uint32_t cc, cur_len;
   1132 
   1133     if (is_destructive_overlap(env, dest, src, MIN(srclen, destlen))) {
   1134         cc = 3;
   1135     } else if (srclen == destlen) {
   1136         cc = 0;
   1137     } else if (destlen < srclen) {
   1138         cc = 1;
   1139     } else {
   1140         cc = 2;
   1141     }
   1142 
   1143     /* We might have to zero-out some bits even if there was no action. */
   1144     if (unlikely(!destlen || cc == 3)) {
   1145         set_address_zero(env, r2, src);
   1146         set_address_zero(env, r1, dest);
   1147         return cc;
   1148     } else if (!srclen) {
   1149         set_address_zero(env, r2, src);
   1150     }
   1151 
   1152     /*
   1153      * Only perform one type of type of operation (move/pad) in one step.
   1154      * Stay within single pages.
   1155      */
   1156     while (destlen) {
   1157         cur_len = MIN(destlen, -(dest | TARGET_PAGE_MASK));
   1158         if (!srclen) {
   1159             desta = access_prepare(env, dest, cur_len, MMU_DATA_STORE, mmu_idx,
   1160                                    ra);
   1161             access_memset(env, &desta, pad, ra);
   1162         } else {
   1163             cur_len = MIN(MIN(srclen, -(src | TARGET_PAGE_MASK)), cur_len);
   1164 
   1165             srca = access_prepare(env, src, cur_len, MMU_DATA_LOAD, mmu_idx,
   1166                                   ra);
   1167             desta = access_prepare(env, dest, cur_len, MMU_DATA_STORE, mmu_idx,
   1168                                    ra);
   1169             access_memmove(env, &desta, &srca, ra);
   1170             src = wrap_address(env, src + cur_len);
   1171             srclen -= cur_len;
   1172             env->regs[r2 + 1] = deposit64(env->regs[r2 + 1], 0, 24, srclen);
   1173             set_address_zero(env, r2, src);
   1174         }
   1175         dest = wrap_address(env, dest + cur_len);
   1176         destlen -= cur_len;
   1177         env->regs[r1 + 1] = deposit64(env->regs[r1 + 1], 0, 24, destlen);
   1178         set_address_zero(env, r1, dest);
   1179 
   1180         /*
   1181          * MVCL is interruptible. Return to the main loop if requested after
   1182          * writing back all state to registers. If no interrupt will get
   1183          * injected, we'll end up back in this handler and continue processing
   1184          * the remaining parts.
   1185          */
   1186         if (destlen && unlikely(cpu_loop_exit_requested(cs))) {
   1187             cpu_loop_exit_restore(cs, ra);
   1188         }
   1189     }
   1190     return cc;
   1191 }
   1192 
   1193 /* move long extended */
   1194 uint32_t HELPER(mvcle)(CPUS390XState *env, uint32_t r1, uint64_t a2,
   1195                        uint32_t r3)
   1196 {
   1197     uintptr_t ra = GETPC();
   1198     uint64_t destlen = get_length(env, r1 + 1);
   1199     uint64_t dest = get_address(env, r1);
   1200     uint64_t srclen = get_length(env, r3 + 1);
   1201     uint64_t src = get_address(env, r3);
   1202     uint8_t pad = a2;
   1203     uint32_t cc;
   1204 
   1205     cc = do_mvcl(env, &dest, &destlen, &src, &srclen, pad, 1, ra);
   1206 
   1207     set_length(env, r1 + 1, destlen);
   1208     set_length(env, r3 + 1, srclen);
   1209     set_address(env, r1, dest);
   1210     set_address(env, r3, src);
   1211 
   1212     return cc;
   1213 }
   1214 
   1215 /* move long unicode */
   1216 uint32_t HELPER(mvclu)(CPUS390XState *env, uint32_t r1, uint64_t a2,
   1217                        uint32_t r3)
   1218 {
   1219     uintptr_t ra = GETPC();
   1220     uint64_t destlen = get_length(env, r1 + 1);
   1221     uint64_t dest = get_address(env, r1);
   1222     uint64_t srclen = get_length(env, r3 + 1);
   1223     uint64_t src = get_address(env, r3);
   1224     uint16_t pad = a2;
   1225     uint32_t cc;
   1226 
   1227     cc = do_mvcl(env, &dest, &destlen, &src, &srclen, pad, 2, ra);
   1228 
   1229     set_length(env, r1 + 1, destlen);
   1230     set_length(env, r3 + 1, srclen);
   1231     set_address(env, r1, dest);
   1232     set_address(env, r3, src);
   1233 
   1234     return cc;
   1235 }
   1236 
   1237 /* compare logical long helper */
   1238 static inline uint32_t do_clcl(CPUS390XState *env,
   1239                                uint64_t *src1, uint64_t *src1len,
   1240                                uint64_t *src3, uint64_t *src3len,
   1241                                uint16_t pad, uint64_t limit,
   1242                                int wordsize, uintptr_t ra)
   1243 {
   1244     uint64_t len = MAX(*src1len, *src3len);
   1245     uint32_t cc = 0;
   1246 
   1247     check_alignment(env, *src1len | *src3len, wordsize, ra);
   1248 
   1249     if (!len) {
   1250         return cc;
   1251     }
   1252 
   1253     /* Lest we fail to service interrupts in a timely manner, limit the
   1254        amount of work we're willing to do.  */
   1255     if (len > limit) {
   1256         len = limit;
   1257         cc = 3;
   1258     }
   1259 
   1260     for (; len; len -= wordsize) {
   1261         uint16_t v1 = pad;
   1262         uint16_t v3 = pad;
   1263 
   1264         if (*src1len) {
   1265             v1 = cpu_ldusize_data_ra(env, *src1, wordsize, ra);
   1266         }
   1267         if (*src3len) {
   1268             v3 = cpu_ldusize_data_ra(env, *src3, wordsize, ra);
   1269         }
   1270 
   1271         if (v1 != v3) {
   1272             cc = (v1 < v3) ? 1 : 2;
   1273             break;
   1274         }
   1275 
   1276         if (*src1len) {
   1277             *src1 += wordsize;
   1278             *src1len -= wordsize;
   1279         }
   1280         if (*src3len) {
   1281             *src3 += wordsize;
   1282             *src3len -= wordsize;
   1283         }
   1284     }
   1285 
   1286     return cc;
   1287 }
   1288 
   1289 
   1290 /* compare logical long */
   1291 uint32_t HELPER(clcl)(CPUS390XState *env, uint32_t r1, uint32_t r2)
   1292 {
   1293     uintptr_t ra = GETPC();
   1294     uint64_t src1len = extract64(env->regs[r1 + 1], 0, 24);
   1295     uint64_t src1 = get_address(env, r1);
   1296     uint64_t src3len = extract64(env->regs[r2 + 1], 0, 24);
   1297     uint64_t src3 = get_address(env, r2);
   1298     uint8_t pad = env->regs[r2 + 1] >> 24;
   1299     uint32_t cc;
   1300 
   1301     cc = do_clcl(env, &src1, &src1len, &src3, &src3len, pad, -1, 1, ra);
   1302 
   1303     env->regs[r1 + 1] = deposit64(env->regs[r1 + 1], 0, 24, src1len);
   1304     env->regs[r2 + 1] = deposit64(env->regs[r2 + 1], 0, 24, src3len);
   1305     set_address(env, r1, src1);
   1306     set_address(env, r2, src3);
   1307 
   1308     return cc;
   1309 }
   1310 
   1311 /* compare logical long extended memcompare insn with padding */
   1312 uint32_t HELPER(clcle)(CPUS390XState *env, uint32_t r1, uint64_t a2,
   1313                        uint32_t r3)
   1314 {
   1315     uintptr_t ra = GETPC();
   1316     uint64_t src1len = get_length(env, r1 + 1);
   1317     uint64_t src1 = get_address(env, r1);
   1318     uint64_t src3len = get_length(env, r3 + 1);
   1319     uint64_t src3 = get_address(env, r3);
   1320     uint8_t pad = a2;
   1321     uint32_t cc;
   1322 
   1323     cc = do_clcl(env, &src1, &src1len, &src3, &src3len, pad, 0x2000, 1, ra);
   1324 
   1325     set_length(env, r1 + 1, src1len);
   1326     set_length(env, r3 + 1, src3len);
   1327     set_address(env, r1, src1);
   1328     set_address(env, r3, src3);
   1329 
   1330     return cc;
   1331 }
   1332 
   1333 /* compare logical long unicode memcompare insn with padding */
   1334 uint32_t HELPER(clclu)(CPUS390XState *env, uint32_t r1, uint64_t a2,
   1335                        uint32_t r3)
   1336 {
   1337     uintptr_t ra = GETPC();
   1338     uint64_t src1len = get_length(env, r1 + 1);
   1339     uint64_t src1 = get_address(env, r1);
   1340     uint64_t src3len = get_length(env, r3 + 1);
   1341     uint64_t src3 = get_address(env, r3);
   1342     uint16_t pad = a2;
   1343     uint32_t cc = 0;
   1344 
   1345     cc = do_clcl(env, &src1, &src1len, &src3, &src3len, pad, 0x1000, 2, ra);
   1346 
   1347     set_length(env, r1 + 1, src1len);
   1348     set_length(env, r3 + 1, src3len);
   1349     set_address(env, r1, src1);
   1350     set_address(env, r3, src3);
   1351 
   1352     return cc;
   1353 }
   1354 
   1355 /* checksum */
   1356 uint64_t HELPER(cksm)(CPUS390XState *env, uint64_t r1,
   1357                       uint64_t src, uint64_t src_len)
   1358 {
   1359     uintptr_t ra = GETPC();
   1360     uint64_t max_len, len;
   1361     uint64_t cksm = (uint32_t)r1;
   1362 
   1363     /* Lest we fail to service interrupts in a timely manner, limit the
   1364        amount of work we're willing to do.  For now, let's cap at 8k.  */
   1365     max_len = (src_len > 0x2000 ? 0x2000 : src_len);
   1366 
   1367     /* Process full words as available.  */
   1368     for (len = 0; len + 4 <= max_len; len += 4, src += 4) {
   1369         cksm += (uint32_t)cpu_ldl_data_ra(env, src, ra);
   1370     }
   1371 
   1372     switch (max_len - len) {
   1373     case 1:
   1374         cksm += cpu_ldub_data_ra(env, src, ra) << 24;
   1375         len += 1;
   1376         break;
   1377     case 2:
   1378         cksm += cpu_lduw_data_ra(env, src, ra) << 16;
   1379         len += 2;
   1380         break;
   1381     case 3:
   1382         cksm += cpu_lduw_data_ra(env, src, ra) << 16;
   1383         cksm += cpu_ldub_data_ra(env, src + 2, ra) << 8;
   1384         len += 3;
   1385         break;
   1386     }
   1387 
   1388     /* Fold the carry from the checksum.  Note that we can see carry-out
   1389        during folding more than once (but probably not more than twice).  */
   1390     while (cksm > 0xffffffffull) {
   1391         cksm = (uint32_t)cksm + (cksm >> 32);
   1392     }
   1393 
   1394     /* Indicate whether or not we've processed everything.  */
   1395     env->cc_op = (len == src_len ? 0 : 3);
   1396 
   1397     /* Return both cksm and processed length.  */
   1398     env->retxl = cksm;
   1399     return len;
   1400 }
   1401 
   1402 void HELPER(pack)(CPUS390XState *env, uint32_t len, uint64_t dest, uint64_t src)
   1403 {
   1404     uintptr_t ra = GETPC();
   1405     int len_dest = len >> 4;
   1406     int len_src = len & 0xf;
   1407     uint8_t b;
   1408 
   1409     dest += len_dest;
   1410     src += len_src;
   1411 
   1412     /* last byte is special, it only flips the nibbles */
   1413     b = cpu_ldub_data_ra(env, src, ra);
   1414     cpu_stb_data_ra(env, dest, (b << 4) | (b >> 4), ra);
   1415     src--;
   1416     len_src--;
   1417 
   1418     /* now pack every value */
   1419     while (len_dest > 0) {
   1420         b = 0;
   1421 
   1422         if (len_src >= 0) {
   1423             b = cpu_ldub_data_ra(env, src, ra) & 0x0f;
   1424             src--;
   1425             len_src--;
   1426         }
   1427         if (len_src >= 0) {
   1428             b |= cpu_ldub_data_ra(env, src, ra) << 4;
   1429             src--;
   1430             len_src--;
   1431         }
   1432 
   1433         len_dest--;
   1434         dest--;
   1435         cpu_stb_data_ra(env, dest, b, ra);
   1436     }
   1437 }
   1438 
   1439 static inline void do_pkau(CPUS390XState *env, uint64_t dest, uint64_t src,
   1440                            uint32_t srclen, int ssize, uintptr_t ra)
   1441 {
   1442     int i;
   1443     /* The destination operand is always 16 bytes long.  */
   1444     const int destlen = 16;
   1445 
   1446     /* The operands are processed from right to left.  */
   1447     src += srclen - 1;
   1448     dest += destlen - 1;
   1449 
   1450     for (i = 0; i < destlen; i++) {
   1451         uint8_t b = 0;
   1452 
   1453         /* Start with a positive sign */
   1454         if (i == 0) {
   1455             b = 0xc;
   1456         } else if (srclen > ssize) {
   1457             b = cpu_ldub_data_ra(env, src, ra) & 0x0f;
   1458             src -= ssize;
   1459             srclen -= ssize;
   1460         }
   1461 
   1462         if (srclen > ssize) {
   1463             b |= cpu_ldub_data_ra(env, src, ra) << 4;
   1464             src -= ssize;
   1465             srclen -= ssize;
   1466         }
   1467 
   1468         cpu_stb_data_ra(env, dest, b, ra);
   1469         dest--;
   1470     }
   1471 }
   1472 
   1473 
   1474 void HELPER(pka)(CPUS390XState *env, uint64_t dest, uint64_t src,
   1475                  uint32_t srclen)
   1476 {
   1477     do_pkau(env, dest, src, srclen, 1, GETPC());
   1478 }
   1479 
   1480 void HELPER(pku)(CPUS390XState *env, uint64_t dest, uint64_t src,
   1481                  uint32_t srclen)
   1482 {
   1483     do_pkau(env, dest, src, srclen, 2, GETPC());
   1484 }
   1485 
   1486 void HELPER(unpk)(CPUS390XState *env, uint32_t len, uint64_t dest,
   1487                   uint64_t src)
   1488 {
   1489     uintptr_t ra = GETPC();
   1490     int len_dest = len >> 4;
   1491     int len_src = len & 0xf;
   1492     uint8_t b;
   1493     int second_nibble = 0;
   1494 
   1495     dest += len_dest;
   1496     src += len_src;
   1497 
   1498     /* last byte is special, it only flips the nibbles */
   1499     b = cpu_ldub_data_ra(env, src, ra);
   1500     cpu_stb_data_ra(env, dest, (b << 4) | (b >> 4), ra);
   1501     src--;
   1502     len_src--;
   1503 
   1504     /* now pad every nibble with 0xf0 */
   1505 
   1506     while (len_dest > 0) {
   1507         uint8_t cur_byte = 0;
   1508 
   1509         if (len_src > 0) {
   1510             cur_byte = cpu_ldub_data_ra(env, src, ra);
   1511         }
   1512 
   1513         len_dest--;
   1514         dest--;
   1515 
   1516         /* only advance one nibble at a time */
   1517         if (second_nibble) {
   1518             cur_byte >>= 4;
   1519             len_src--;
   1520             src--;
   1521         }
   1522         second_nibble = !second_nibble;
   1523 
   1524         /* digit */
   1525         cur_byte = (cur_byte & 0xf);
   1526         /* zone bits */
   1527         cur_byte |= 0xf0;
   1528 
   1529         cpu_stb_data_ra(env, dest, cur_byte, ra);
   1530     }
   1531 }
   1532 
   1533 static inline uint32_t do_unpkau(CPUS390XState *env, uint64_t dest,
   1534                                  uint32_t destlen, int dsize, uint64_t src,
   1535                                  uintptr_t ra)
   1536 {
   1537     int i;
   1538     uint32_t cc;
   1539     uint8_t b;
   1540     /* The source operand is always 16 bytes long.  */
   1541     const int srclen = 16;
   1542 
   1543     /* The operands are processed from right to left.  */
   1544     src += srclen - 1;
   1545     dest += destlen - dsize;
   1546 
   1547     /* Check for the sign.  */
   1548     b = cpu_ldub_data_ra(env, src, ra);
   1549     src--;
   1550     switch (b & 0xf) {
   1551     case 0xa:
   1552     case 0xc:
   1553     case 0xe ... 0xf:
   1554         cc = 0;  /* plus */
   1555         break;
   1556     case 0xb:
   1557     case 0xd:
   1558         cc = 1;  /* minus */
   1559         break;
   1560     default:
   1561     case 0x0 ... 0x9:
   1562         cc = 3;  /* invalid */
   1563         break;
   1564     }
   1565 
   1566     /* Now pad every nibble with 0x30, advancing one nibble at a time. */
   1567     for (i = 0; i < destlen; i += dsize) {
   1568         if (i == (31 * dsize)) {
   1569             /* If length is 32/64 bytes, the leftmost byte is 0. */
   1570             b = 0;
   1571         } else if (i % (2 * dsize)) {
   1572             b = cpu_ldub_data_ra(env, src, ra);
   1573             src--;
   1574         } else {
   1575             b >>= 4;
   1576         }
   1577         cpu_stsize_data_ra(env, dest, 0x30 + (b & 0xf), dsize, ra);
   1578         dest -= dsize;
   1579     }
   1580 
   1581     return cc;
   1582 }
   1583 
   1584 uint32_t HELPER(unpka)(CPUS390XState *env, uint64_t dest, uint32_t destlen,
   1585                        uint64_t src)
   1586 {
   1587     return do_unpkau(env, dest, destlen, 1, src, GETPC());
   1588 }
   1589 
   1590 uint32_t HELPER(unpku)(CPUS390XState *env, uint64_t dest, uint32_t destlen,
   1591                        uint64_t src)
   1592 {
   1593     return do_unpkau(env, dest, destlen, 2, src, GETPC());
   1594 }
   1595 
   1596 uint32_t HELPER(tp)(CPUS390XState *env, uint64_t dest, uint32_t destlen)
   1597 {
   1598     uintptr_t ra = GETPC();
   1599     uint32_t cc = 0;
   1600     int i;
   1601 
   1602     for (i = 0; i < destlen; i++) {
   1603         uint8_t b = cpu_ldub_data_ra(env, dest + i, ra);
   1604         /* digit */
   1605         cc |= (b & 0xf0) > 0x90 ? 2 : 0;
   1606 
   1607         if (i == (destlen - 1)) {
   1608             /* sign */
   1609             cc |= (b & 0xf) < 0xa ? 1 : 0;
   1610         } else {
   1611             /* digit */
   1612             cc |= (b & 0xf) > 0x9 ? 2 : 0;
   1613         }
   1614     }
   1615 
   1616     return cc;
   1617 }
   1618 
   1619 static uint32_t do_helper_tr(CPUS390XState *env, uint32_t len, uint64_t array,
   1620                              uint64_t trans, uintptr_t ra)
   1621 {
   1622     uint32_t i;
   1623 
   1624     for (i = 0; i <= len; i++) {
   1625         uint8_t byte = cpu_ldub_data_ra(env, array + i, ra);
   1626         uint8_t new_byte = cpu_ldub_data_ra(env, trans + byte, ra);
   1627         cpu_stb_data_ra(env, array + i, new_byte, ra);
   1628     }
   1629 
   1630     return env->cc_op;
   1631 }
   1632 
   1633 void HELPER(tr)(CPUS390XState *env, uint32_t len, uint64_t array,
   1634                 uint64_t trans)
   1635 {
   1636     do_helper_tr(env, len, array, trans, GETPC());
   1637 }
   1638 
   1639 uint64_t HELPER(tre)(CPUS390XState *env, uint64_t array,
   1640                      uint64_t len, uint64_t trans)
   1641 {
   1642     uintptr_t ra = GETPC();
   1643     uint8_t end = env->regs[0] & 0xff;
   1644     uint64_t l = len;
   1645     uint64_t i;
   1646     uint32_t cc = 0;
   1647 
   1648     if (!(env->psw.mask & PSW_MASK_64)) {
   1649         array &= 0x7fffffff;
   1650         l = (uint32_t)l;
   1651     }
   1652 
   1653     /* Lest we fail to service interrupts in a timely manner, limit the
   1654        amount of work we're willing to do.  For now, let's cap at 8k.  */
   1655     if (l > 0x2000) {
   1656         l = 0x2000;
   1657         cc = 3;
   1658     }
   1659 
   1660     for (i = 0; i < l; i++) {
   1661         uint8_t byte, new_byte;
   1662 
   1663         byte = cpu_ldub_data_ra(env, array + i, ra);
   1664 
   1665         if (byte == end) {
   1666             cc = 1;
   1667             break;
   1668         }
   1669 
   1670         new_byte = cpu_ldub_data_ra(env, trans + byte, ra);
   1671         cpu_stb_data_ra(env, array + i, new_byte, ra);
   1672     }
   1673 
   1674     env->cc_op = cc;
   1675     env->retxl = len - i;
   1676     return array + i;
   1677 }
   1678 
   1679 static inline uint32_t do_helper_trt(CPUS390XState *env, int len,
   1680                                      uint64_t array, uint64_t trans,
   1681                                      int inc, uintptr_t ra)
   1682 {
   1683     int i;
   1684 
   1685     for (i = 0; i <= len; i++) {
   1686         uint8_t byte = cpu_ldub_data_ra(env, array + i * inc, ra);
   1687         uint8_t sbyte = cpu_ldub_data_ra(env, trans + byte, ra);
   1688 
   1689         if (sbyte != 0) {
   1690             set_address(env, 1, array + i * inc);
   1691             env->regs[2] = deposit64(env->regs[2], 0, 8, sbyte);
   1692             return (i == len) ? 2 : 1;
   1693         }
   1694     }
   1695 
   1696     return 0;
   1697 }
   1698 
   1699 static uint32_t do_helper_trt_fwd(CPUS390XState *env, uint32_t len,
   1700                                   uint64_t array, uint64_t trans,
   1701                                   uintptr_t ra)
   1702 {
   1703     return do_helper_trt(env, len, array, trans, 1, ra);
   1704 }
   1705 
   1706 uint32_t HELPER(trt)(CPUS390XState *env, uint32_t len, uint64_t array,
   1707                      uint64_t trans)
   1708 {
   1709     return do_helper_trt(env, len, array, trans, 1, GETPC());
   1710 }
   1711 
   1712 static uint32_t do_helper_trt_bkwd(CPUS390XState *env, uint32_t len,
   1713                                    uint64_t array, uint64_t trans,
   1714                                    uintptr_t ra)
   1715 {
   1716     return do_helper_trt(env, len, array, trans, -1, ra);
   1717 }
   1718 
   1719 uint32_t HELPER(trtr)(CPUS390XState *env, uint32_t len, uint64_t array,
   1720                       uint64_t trans)
   1721 {
   1722     return do_helper_trt(env, len, array, trans, -1, GETPC());
   1723 }
   1724 
   1725 /* Translate one/two to one/two */
   1726 uint32_t HELPER(trXX)(CPUS390XState *env, uint32_t r1, uint32_t r2,
   1727                       uint32_t tst, uint32_t sizes)
   1728 {
   1729     uintptr_t ra = GETPC();
   1730     int dsize = (sizes & 1) ? 1 : 2;
   1731     int ssize = (sizes & 2) ? 1 : 2;
   1732     uint64_t tbl = get_address(env, 1);
   1733     uint64_t dst = get_address(env, r1);
   1734     uint64_t len = get_length(env, r1 + 1);
   1735     uint64_t src = get_address(env, r2);
   1736     uint32_t cc = 3;
   1737     int i;
   1738 
   1739     /* The lower address bits of TBL are ignored.  For TROO, TROT, it's
   1740        the low 3 bits (double-word aligned).  For TRTO, TRTT, it's either
   1741        the low 12 bits (4K, without ETF2-ENH) or 3 bits (with ETF2-ENH).  */
   1742     if (ssize == 2 && !s390_has_feat(S390_FEAT_ETF2_ENH)) {
   1743         tbl &= -4096;
   1744     } else {
   1745         tbl &= -8;
   1746     }
   1747 
   1748     check_alignment(env, len, ssize, ra);
   1749 
   1750     /* Lest we fail to service interrupts in a timely manner, */
   1751     /* limit the amount of work we're willing to do.   */
   1752     for (i = 0; i < 0x2000; i++) {
   1753         uint16_t sval = cpu_ldusize_data_ra(env, src, ssize, ra);
   1754         uint64_t tble = tbl + (sval * dsize);
   1755         uint16_t dval = cpu_ldusize_data_ra(env, tble, dsize, ra);
   1756         if (dval == tst) {
   1757             cc = 1;
   1758             break;
   1759         }
   1760         cpu_stsize_data_ra(env, dst, dval, dsize, ra);
   1761 
   1762         len -= ssize;
   1763         src += ssize;
   1764         dst += dsize;
   1765 
   1766         if (len == 0) {
   1767             cc = 0;
   1768             break;
   1769         }
   1770     }
   1771 
   1772     set_address(env, r1, dst);
   1773     set_length(env, r1 + 1, len);
   1774     set_address(env, r2, src);
   1775 
   1776     return cc;
   1777 }
   1778 
   1779 void HELPER(cdsg)(CPUS390XState *env, uint64_t addr,
   1780                   uint32_t r1, uint32_t r3)
   1781 {
   1782     uintptr_t ra = GETPC();
   1783     Int128 cmpv = int128_make128(env->regs[r1 + 1], env->regs[r1]);
   1784     Int128 newv = int128_make128(env->regs[r3 + 1], env->regs[r3]);
   1785     Int128 oldv;
   1786     uint64_t oldh, oldl;
   1787     bool fail;
   1788 
   1789     check_alignment(env, addr, 16, ra);
   1790 
   1791     oldh = cpu_ldq_data_ra(env, addr + 0, ra);
   1792     oldl = cpu_ldq_data_ra(env, addr + 8, ra);
   1793 
   1794     oldv = int128_make128(oldl, oldh);
   1795     fail = !int128_eq(oldv, cmpv);
   1796     if (fail) {
   1797         newv = oldv;
   1798     }
   1799 
   1800     cpu_stq_data_ra(env, addr + 0, int128_gethi(newv), ra);
   1801     cpu_stq_data_ra(env, addr + 8, int128_getlo(newv), ra);
   1802 
   1803     env->cc_op = fail;
   1804     env->regs[r1] = int128_gethi(oldv);
   1805     env->regs[r1 + 1] = int128_getlo(oldv);
   1806 }
   1807 
   1808 void HELPER(cdsg_parallel)(CPUS390XState *env, uint64_t addr,
   1809                            uint32_t r1, uint32_t r3)
   1810 {
   1811     uintptr_t ra = GETPC();
   1812     Int128 cmpv = int128_make128(env->regs[r1 + 1], env->regs[r1]);
   1813     Int128 newv = int128_make128(env->regs[r3 + 1], env->regs[r3]);
   1814     int mem_idx;
   1815     MemOpIdx oi;
   1816     Int128 oldv;
   1817     bool fail;
   1818 
   1819     assert(HAVE_CMPXCHG128);
   1820 
   1821     mem_idx = cpu_mmu_index(env, false);
   1822     oi = make_memop_idx(MO_TE | MO_128 | MO_ALIGN, mem_idx);
   1823     oldv = cpu_atomic_cmpxchgo_be_mmu(env, addr, cmpv, newv, oi, ra);
   1824     fail = !int128_eq(oldv, cmpv);
   1825 
   1826     env->cc_op = fail;
   1827     env->regs[r1] = int128_gethi(oldv);
   1828     env->regs[r1 + 1] = int128_getlo(oldv);
   1829 }
   1830 
   1831 static uint32_t do_csst(CPUS390XState *env, uint32_t r3, uint64_t a1,
   1832                         uint64_t a2, bool parallel)
   1833 {
   1834     uint32_t mem_idx = cpu_mmu_index(env, false);
   1835     uintptr_t ra = GETPC();
   1836     uint32_t fc = extract32(env->regs[0], 0, 8);
   1837     uint32_t sc = extract32(env->regs[0], 8, 8);
   1838     uint64_t pl = get_address(env, 1) & -16;
   1839     uint64_t svh, svl;
   1840     uint32_t cc;
   1841 
   1842     /* Sanity check the function code and storage characteristic.  */
   1843     if (fc > 1 || sc > 3) {
   1844         if (!s390_has_feat(S390_FEAT_COMPARE_AND_SWAP_AND_STORE_2)) {
   1845             goto spec_exception;
   1846         }
   1847         if (fc > 2 || sc > 4 || (fc == 2 && (r3 & 1))) {
   1848             goto spec_exception;
   1849         }
   1850     }
   1851 
   1852     /* Sanity check the alignments.  */
   1853     if (extract32(a1, 0, fc + 2) || extract32(a2, 0, sc)) {
   1854         goto spec_exception;
   1855     }
   1856 
   1857     /* Sanity check writability of the store address.  */
   1858     probe_write(env, a2, 1 << sc, mem_idx, ra);
   1859 
   1860     /*
   1861      * Note that the compare-and-swap is atomic, and the store is atomic,
   1862      * but the complete operation is not.  Therefore we do not need to
   1863      * assert serial context in order to implement this.  That said,
   1864      * restart early if we can't support either operation that is supposed
   1865      * to be atomic.
   1866      */
   1867     if (parallel) {
   1868         uint32_t max = 2;
   1869 #ifdef CONFIG_ATOMIC64
   1870         max = 3;
   1871 #endif
   1872         if ((HAVE_CMPXCHG128 ? 0 : fc + 2 > max) ||
   1873             (HAVE_ATOMIC128  ? 0 : sc > max)) {
   1874             cpu_loop_exit_atomic(env_cpu(env), ra);
   1875         }
   1876     }
   1877 
   1878     /* All loads happen before all stores.  For simplicity, load the entire
   1879        store value area from the parameter list.  */
   1880     svh = cpu_ldq_data_ra(env, pl + 16, ra);
   1881     svl = cpu_ldq_data_ra(env, pl + 24, ra);
   1882 
   1883     switch (fc) {
   1884     case 0:
   1885         {
   1886             uint32_t nv = cpu_ldl_data_ra(env, pl, ra);
   1887             uint32_t cv = env->regs[r3];
   1888             uint32_t ov;
   1889 
   1890             if (parallel) {
   1891 #ifdef CONFIG_USER_ONLY
   1892                 uint32_t *haddr = g2h(env_cpu(env), a1);
   1893                 ov = qatomic_cmpxchg__nocheck(haddr, cv, nv);
   1894 #else
   1895                 MemOpIdx oi = make_memop_idx(MO_TEUL | MO_ALIGN, mem_idx);
   1896                 ov = cpu_atomic_cmpxchgl_be_mmu(env, a1, cv, nv, oi, ra);
   1897 #endif
   1898             } else {
   1899                 ov = cpu_ldl_data_ra(env, a1, ra);
   1900                 cpu_stl_data_ra(env, a1, (ov == cv ? nv : ov), ra);
   1901             }
   1902             cc = (ov != cv);
   1903             env->regs[r3] = deposit64(env->regs[r3], 32, 32, ov);
   1904         }
   1905         break;
   1906 
   1907     case 1:
   1908         {
   1909             uint64_t nv = cpu_ldq_data_ra(env, pl, ra);
   1910             uint64_t cv = env->regs[r3];
   1911             uint64_t ov;
   1912 
   1913             if (parallel) {
   1914 #ifdef CONFIG_ATOMIC64
   1915                 MemOpIdx oi = make_memop_idx(MO_TEUQ | MO_ALIGN, mem_idx);
   1916                 ov = cpu_atomic_cmpxchgq_be_mmu(env, a1, cv, nv, oi, ra);
   1917 #else
   1918                 /* Note that we asserted !parallel above.  */
   1919                 g_assert_not_reached();
   1920 #endif
   1921             } else {
   1922                 ov = cpu_ldq_data_ra(env, a1, ra);
   1923                 cpu_stq_data_ra(env, a1, (ov == cv ? nv : ov), ra);
   1924             }
   1925             cc = (ov != cv);
   1926             env->regs[r3] = ov;
   1927         }
   1928         break;
   1929 
   1930     case 2:
   1931         {
   1932             uint64_t nvh = cpu_ldq_data_ra(env, pl, ra);
   1933             uint64_t nvl = cpu_ldq_data_ra(env, pl + 8, ra);
   1934             Int128 nv = int128_make128(nvl, nvh);
   1935             Int128 cv = int128_make128(env->regs[r3 + 1], env->regs[r3]);
   1936             Int128 ov;
   1937 
   1938             if (!parallel) {
   1939                 uint64_t oh = cpu_ldq_data_ra(env, a1 + 0, ra);
   1940                 uint64_t ol = cpu_ldq_data_ra(env, a1 + 8, ra);
   1941 
   1942                 ov = int128_make128(ol, oh);
   1943                 cc = !int128_eq(ov, cv);
   1944                 if (cc) {
   1945                     nv = ov;
   1946                 }
   1947 
   1948                 cpu_stq_data_ra(env, a1 + 0, int128_gethi(nv), ra);
   1949                 cpu_stq_data_ra(env, a1 + 8, int128_getlo(nv), ra);
   1950             } else if (HAVE_CMPXCHG128) {
   1951                 MemOpIdx oi = make_memop_idx(MO_TE | MO_128 | MO_ALIGN, mem_idx);
   1952                 ov = cpu_atomic_cmpxchgo_be_mmu(env, a1, cv, nv, oi, ra);
   1953                 cc = !int128_eq(ov, cv);
   1954             } else {
   1955                 /* Note that we asserted !parallel above.  */
   1956                 g_assert_not_reached();
   1957             }
   1958 
   1959             env->regs[r3 + 0] = int128_gethi(ov);
   1960             env->regs[r3 + 1] = int128_getlo(ov);
   1961         }
   1962         break;
   1963 
   1964     default:
   1965         g_assert_not_reached();
   1966     }
   1967 
   1968     /* Store only if the comparison succeeded.  Note that above we use a pair
   1969        of 64-bit big-endian loads, so for sc < 3 we must extract the value
   1970        from the most-significant bits of svh.  */
   1971     if (cc == 0) {
   1972         switch (sc) {
   1973         case 0:
   1974             cpu_stb_data_ra(env, a2, svh >> 56, ra);
   1975             break;
   1976         case 1:
   1977             cpu_stw_data_ra(env, a2, svh >> 48, ra);
   1978             break;
   1979         case 2:
   1980             cpu_stl_data_ra(env, a2, svh >> 32, ra);
   1981             break;
   1982         case 3:
   1983             cpu_stq_data_ra(env, a2, svh, ra);
   1984             break;
   1985         case 4:
   1986             if (!parallel) {
   1987                 cpu_stq_data_ra(env, a2 + 0, svh, ra);
   1988                 cpu_stq_data_ra(env, a2 + 8, svl, ra);
   1989             } else if (HAVE_ATOMIC128) {
   1990                 MemOpIdx oi = make_memop_idx(MO_TEUQ | MO_ALIGN_16, mem_idx);
   1991                 Int128 sv = int128_make128(svl, svh);
   1992                 cpu_atomic_sto_be_mmu(env, a2, sv, oi, ra);
   1993             } else {
   1994                 /* Note that we asserted !parallel above.  */
   1995                 g_assert_not_reached();
   1996             }
   1997             break;
   1998         default:
   1999             g_assert_not_reached();
   2000         }
   2001     }
   2002 
   2003     return cc;
   2004 
   2005  spec_exception:
   2006     tcg_s390_program_interrupt(env, PGM_SPECIFICATION, ra);
   2007 }
   2008 
   2009 uint32_t HELPER(csst)(CPUS390XState *env, uint32_t r3, uint64_t a1, uint64_t a2)
   2010 {
   2011     return do_csst(env, r3, a1, a2, false);
   2012 }
   2013 
   2014 uint32_t HELPER(csst_parallel)(CPUS390XState *env, uint32_t r3, uint64_t a1,
   2015                                uint64_t a2)
   2016 {
   2017     return do_csst(env, r3, a1, a2, true);
   2018 }
   2019 
   2020 #if !defined(CONFIG_USER_ONLY)
   2021 void HELPER(lctlg)(CPUS390XState *env, uint32_t r1, uint64_t a2, uint32_t r3)
   2022 {
   2023     uintptr_t ra = GETPC();
   2024     bool PERchanged = false;
   2025     uint64_t src = a2;
   2026     uint32_t i;
   2027 
   2028     if (src & 0x7) {
   2029         tcg_s390_program_interrupt(env, PGM_SPECIFICATION, ra);
   2030     }
   2031 
   2032     for (i = r1;; i = (i + 1) % 16) {
   2033         uint64_t val = cpu_ldq_data_ra(env, src, ra);
   2034         if (env->cregs[i] != val && i >= 9 && i <= 11) {
   2035             PERchanged = true;
   2036         }
   2037         env->cregs[i] = val;
   2038         HELPER_LOG("load ctl %d from 0x%" PRIx64 " == 0x%" PRIx64 "\n",
   2039                    i, src, val);
   2040         src += sizeof(uint64_t);
   2041 
   2042         if (i == r3) {
   2043             break;
   2044         }
   2045     }
   2046 
   2047     if (PERchanged && env->psw.mask & PSW_MASK_PER) {
   2048         s390_cpu_recompute_watchpoints(env_cpu(env));
   2049     }
   2050 
   2051     tlb_flush(env_cpu(env));
   2052 }
   2053 
   2054 void HELPER(lctl)(CPUS390XState *env, uint32_t r1, uint64_t a2, uint32_t r3)
   2055 {
   2056     uintptr_t ra = GETPC();
   2057     bool PERchanged = false;
   2058     uint64_t src = a2;
   2059     uint32_t i;
   2060 
   2061     if (src & 0x3) {
   2062         tcg_s390_program_interrupt(env, PGM_SPECIFICATION, ra);
   2063     }
   2064 
   2065     for (i = r1;; i = (i + 1) % 16) {
   2066         uint32_t val = cpu_ldl_data_ra(env, src, ra);
   2067         if ((uint32_t)env->cregs[i] != val && i >= 9 && i <= 11) {
   2068             PERchanged = true;
   2069         }
   2070         env->cregs[i] = deposit64(env->cregs[i], 0, 32, val);
   2071         HELPER_LOG("load ctl %d from 0x%" PRIx64 " == 0x%x\n", i, src, val);
   2072         src += sizeof(uint32_t);
   2073 
   2074         if (i == r3) {
   2075             break;
   2076         }
   2077     }
   2078 
   2079     if (PERchanged && env->psw.mask & PSW_MASK_PER) {
   2080         s390_cpu_recompute_watchpoints(env_cpu(env));
   2081     }
   2082 
   2083     tlb_flush(env_cpu(env));
   2084 }
   2085 
   2086 void HELPER(stctg)(CPUS390XState *env, uint32_t r1, uint64_t a2, uint32_t r3)
   2087 {
   2088     uintptr_t ra = GETPC();
   2089     uint64_t dest = a2;
   2090     uint32_t i;
   2091 
   2092     if (dest & 0x7) {
   2093         tcg_s390_program_interrupt(env, PGM_SPECIFICATION, ra);
   2094     }
   2095 
   2096     for (i = r1;; i = (i + 1) % 16) {
   2097         cpu_stq_data_ra(env, dest, env->cregs[i], ra);
   2098         dest += sizeof(uint64_t);
   2099 
   2100         if (i == r3) {
   2101             break;
   2102         }
   2103     }
   2104 }
   2105 
   2106 void HELPER(stctl)(CPUS390XState *env, uint32_t r1, uint64_t a2, uint32_t r3)
   2107 {
   2108     uintptr_t ra = GETPC();
   2109     uint64_t dest = a2;
   2110     uint32_t i;
   2111 
   2112     if (dest & 0x3) {
   2113         tcg_s390_program_interrupt(env, PGM_SPECIFICATION, ra);
   2114     }
   2115 
   2116     for (i = r1;; i = (i + 1) % 16) {
   2117         cpu_stl_data_ra(env, dest, env->cregs[i], ra);
   2118         dest += sizeof(uint32_t);
   2119 
   2120         if (i == r3) {
   2121             break;
   2122         }
   2123     }
   2124 }
   2125 
   2126 uint32_t HELPER(testblock)(CPUS390XState *env, uint64_t real_addr)
   2127 {
   2128     uintptr_t ra = GETPC();
   2129     int i;
   2130 
   2131     real_addr = wrap_address(env, real_addr) & TARGET_PAGE_MASK;
   2132 
   2133     for (i = 0; i < TARGET_PAGE_SIZE; i += 8) {
   2134         cpu_stq_mmuidx_ra(env, real_addr + i, 0, MMU_REAL_IDX, ra);
   2135     }
   2136 
   2137     return 0;
   2138 }
   2139 
   2140 uint32_t HELPER(tprot)(CPUS390XState *env, uint64_t a1, uint64_t a2)
   2141 {
   2142     S390CPU *cpu = env_archcpu(env);
   2143     CPUState *cs = env_cpu(env);
   2144 
   2145     /*
   2146      * TODO: we currently don't handle all access protection types
   2147      * (including access-list and key-controlled) as well as AR mode.
   2148      */
   2149     if (!s390_cpu_virt_mem_check_write(cpu, a1, 0, 1)) {
   2150         /* Fetching permitted; storing permitted */
   2151         return 0;
   2152     }
   2153 
   2154     if (env->int_pgm_code == PGM_PROTECTION) {
   2155         /* retry if reading is possible */
   2156         cs->exception_index = -1;
   2157         if (!s390_cpu_virt_mem_check_read(cpu, a1, 0, 1)) {
   2158             /* Fetching permitted; storing not permitted */
   2159             return 1;
   2160         }
   2161     }
   2162 
   2163     switch (env->int_pgm_code) {
   2164     case PGM_PROTECTION:
   2165         /* Fetching not permitted; storing not permitted */
   2166         cs->exception_index = -1;
   2167         return 2;
   2168     case PGM_ADDRESSING:
   2169     case PGM_TRANS_SPEC:
   2170         /* exceptions forwarded to the guest */
   2171         s390_cpu_virt_mem_handle_exc(cpu, GETPC());
   2172         return 0;
   2173     }
   2174 
   2175     /* Translation not available */
   2176     cs->exception_index = -1;
   2177     return 3;
   2178 }
   2179 
   2180 /* insert storage key extended */
   2181 uint64_t HELPER(iske)(CPUS390XState *env, uint64_t r2)
   2182 {
   2183     static S390SKeysState *ss;
   2184     static S390SKeysClass *skeyclass;
   2185     uint64_t addr = wrap_address(env, r2);
   2186     uint8_t key;
   2187     int rc;
   2188 
   2189     addr = mmu_real2abs(env, addr);
   2190     if (!mmu_absolute_addr_valid(addr, false)) {
   2191         tcg_s390_program_interrupt(env, PGM_ADDRESSING, GETPC());
   2192     }
   2193 
   2194     if (unlikely(!ss)) {
   2195         ss = s390_get_skeys_device();
   2196         skeyclass = S390_SKEYS_GET_CLASS(ss);
   2197         if (skeyclass->enable_skeys && !skeyclass->enable_skeys(ss)) {
   2198             tlb_flush_all_cpus_synced(env_cpu(env));
   2199         }
   2200     }
   2201 
   2202     rc = skeyclass->get_skeys(ss, addr / TARGET_PAGE_SIZE, 1, &key);
   2203     if (rc) {
   2204         trace_get_skeys_nonzero(rc);
   2205         return 0;
   2206     }
   2207     return key;
   2208 }
   2209 
   2210 /* set storage key extended */
   2211 void HELPER(sske)(CPUS390XState *env, uint64_t r1, uint64_t r2)
   2212 {
   2213     static S390SKeysState *ss;
   2214     static S390SKeysClass *skeyclass;
   2215     uint64_t addr = wrap_address(env, r2);
   2216     uint8_t key;
   2217     int rc;
   2218 
   2219     addr = mmu_real2abs(env, addr);
   2220     if (!mmu_absolute_addr_valid(addr, false)) {
   2221         tcg_s390_program_interrupt(env, PGM_ADDRESSING, GETPC());
   2222     }
   2223 
   2224     if (unlikely(!ss)) {
   2225         ss = s390_get_skeys_device();
   2226         skeyclass = S390_SKEYS_GET_CLASS(ss);
   2227         if (skeyclass->enable_skeys && !skeyclass->enable_skeys(ss)) {
   2228             tlb_flush_all_cpus_synced(env_cpu(env));
   2229         }
   2230     }
   2231 
   2232     key = r1 & 0xfe;
   2233     rc = skeyclass->set_skeys(ss, addr / TARGET_PAGE_SIZE, 1, &key);
   2234     if (rc) {
   2235         trace_set_skeys_nonzero(rc);
   2236     }
   2237    /*
   2238     * As we can only flush by virtual address and not all the entries
   2239     * that point to a physical address we have to flush the whole TLB.
   2240     */
   2241     tlb_flush_all_cpus_synced(env_cpu(env));
   2242 }
   2243 
   2244 /* reset reference bit extended */
   2245 uint32_t HELPER(rrbe)(CPUS390XState *env, uint64_t r2)
   2246 {
   2247     uint64_t addr = wrap_address(env, r2);
   2248     static S390SKeysState *ss;
   2249     static S390SKeysClass *skeyclass;
   2250     uint8_t re, key;
   2251     int rc;
   2252 
   2253     addr = mmu_real2abs(env, addr);
   2254     if (!mmu_absolute_addr_valid(addr, false)) {
   2255         tcg_s390_program_interrupt(env, PGM_ADDRESSING, GETPC());
   2256     }
   2257 
   2258     if (unlikely(!ss)) {
   2259         ss = s390_get_skeys_device();
   2260         skeyclass = S390_SKEYS_GET_CLASS(ss);
   2261         if (skeyclass->enable_skeys && !skeyclass->enable_skeys(ss)) {
   2262             tlb_flush_all_cpus_synced(env_cpu(env));
   2263         }
   2264     }
   2265 
   2266     rc = skeyclass->get_skeys(ss, addr / TARGET_PAGE_SIZE, 1, &key);
   2267     if (rc) {
   2268         trace_get_skeys_nonzero(rc);
   2269         return 0;
   2270     }
   2271 
   2272     re = key & (SK_R | SK_C);
   2273     key &= ~SK_R;
   2274 
   2275     rc = skeyclass->set_skeys(ss, addr / TARGET_PAGE_SIZE, 1, &key);
   2276     if (rc) {
   2277         trace_set_skeys_nonzero(rc);
   2278         return 0;
   2279     }
   2280    /*
   2281     * As we can only flush by virtual address and not all the entries
   2282     * that point to a physical address we have to flush the whole TLB.
   2283     */
   2284     tlb_flush_all_cpus_synced(env_cpu(env));
   2285 
   2286     /*
   2287      * cc
   2288      *
   2289      * 0  Reference bit zero; change bit zero
   2290      * 1  Reference bit zero; change bit one
   2291      * 2  Reference bit one; change bit zero
   2292      * 3  Reference bit one; change bit one
   2293      */
   2294 
   2295     return re >> 1;
   2296 }
   2297 
   2298 uint32_t HELPER(mvcs)(CPUS390XState *env, uint64_t l, uint64_t a1, uint64_t a2)
   2299 {
   2300     const uint8_t psw_as = (env->psw.mask & PSW_MASK_ASC) >> PSW_SHIFT_ASC;
   2301     S390Access srca, desta;
   2302     uintptr_t ra = GETPC();
   2303     int cc = 0;
   2304 
   2305     HELPER_LOG("%s: %16" PRIx64 " %16" PRIx64 " %16" PRIx64 "\n",
   2306                __func__, l, a1, a2);
   2307 
   2308     if (!(env->psw.mask & PSW_MASK_DAT) || !(env->cregs[0] & CR0_SECONDARY) ||
   2309         psw_as == AS_HOME || psw_as == AS_ACCREG) {
   2310         s390_program_interrupt(env, PGM_SPECIAL_OP, ra);
   2311     }
   2312 
   2313     l = wrap_length32(env, l);
   2314     if (l > 256) {
   2315         /* max 256 */
   2316         l = 256;
   2317         cc = 3;
   2318     } else if (!l) {
   2319         return cc;
   2320     }
   2321 
   2322     /* TODO: Access key handling */
   2323     srca = access_prepare(env, a2, l, MMU_DATA_LOAD, MMU_PRIMARY_IDX, ra);
   2324     desta = access_prepare(env, a1, l, MMU_DATA_STORE, MMU_SECONDARY_IDX, ra);
   2325     access_memmove(env, &desta, &srca, ra);
   2326     return cc;
   2327 }
   2328 
   2329 uint32_t HELPER(mvcp)(CPUS390XState *env, uint64_t l, uint64_t a1, uint64_t a2)
   2330 {
   2331     const uint8_t psw_as = (env->psw.mask & PSW_MASK_ASC) >> PSW_SHIFT_ASC;
   2332     S390Access srca, desta;
   2333     uintptr_t ra = GETPC();
   2334     int cc = 0;
   2335 
   2336     HELPER_LOG("%s: %16" PRIx64 " %16" PRIx64 " %16" PRIx64 "\n",
   2337                __func__, l, a1, a2);
   2338 
   2339     if (!(env->psw.mask & PSW_MASK_DAT) || !(env->cregs[0] & CR0_SECONDARY) ||
   2340         psw_as == AS_HOME || psw_as == AS_ACCREG) {
   2341         s390_program_interrupt(env, PGM_SPECIAL_OP, ra);
   2342     }
   2343 
   2344     l = wrap_length32(env, l);
   2345     if (l > 256) {
   2346         /* max 256 */
   2347         l = 256;
   2348         cc = 3;
   2349     } else if (!l) {
   2350         return cc;
   2351     }
   2352 
   2353     /* TODO: Access key handling */
   2354     srca = access_prepare(env, a2, l, MMU_DATA_LOAD, MMU_SECONDARY_IDX, ra);
   2355     desta = access_prepare(env, a1, l, MMU_DATA_STORE, MMU_PRIMARY_IDX, ra);
   2356     access_memmove(env, &desta, &srca, ra);
   2357     return cc;
   2358 }
   2359 
   2360 void HELPER(idte)(CPUS390XState *env, uint64_t r1, uint64_t r2, uint32_t m4)
   2361 {
   2362     CPUState *cs = env_cpu(env);
   2363     const uintptr_t ra = GETPC();
   2364     uint64_t table, entry, raddr;
   2365     uint16_t entries, i, index = 0;
   2366 
   2367     if (r2 & 0xff000) {
   2368         tcg_s390_program_interrupt(env, PGM_SPECIFICATION, ra);
   2369     }
   2370 
   2371     if (!(r2 & 0x800)) {
   2372         /* invalidation-and-clearing operation */
   2373         table = r1 & ASCE_ORIGIN;
   2374         entries = (r2 & 0x7ff) + 1;
   2375 
   2376         switch (r1 & ASCE_TYPE_MASK) {
   2377         case ASCE_TYPE_REGION1:
   2378             index = (r2 >> 53) & 0x7ff;
   2379             break;
   2380         case ASCE_TYPE_REGION2:
   2381             index = (r2 >> 42) & 0x7ff;
   2382             break;
   2383         case ASCE_TYPE_REGION3:
   2384             index = (r2 >> 31) & 0x7ff;
   2385             break;
   2386         case ASCE_TYPE_SEGMENT:
   2387             index = (r2 >> 20) & 0x7ff;
   2388             break;
   2389         }
   2390         for (i = 0; i < entries; i++) {
   2391             /* addresses are not wrapped in 24/31bit mode but table index is */
   2392             raddr = table + ((index + i) & 0x7ff) * sizeof(entry);
   2393             entry = cpu_ldq_mmuidx_ra(env, raddr, MMU_REAL_IDX, ra);
   2394             if (!(entry & REGION_ENTRY_I)) {
   2395                 /* we are allowed to not store if already invalid */
   2396                 entry |= REGION_ENTRY_I;
   2397                 cpu_stq_mmuidx_ra(env, raddr, entry, MMU_REAL_IDX, ra);
   2398             }
   2399         }
   2400     }
   2401 
   2402     /* We simply flush the complete tlb, therefore we can ignore r3. */
   2403     if (m4 & 1) {
   2404         tlb_flush(cs);
   2405     } else {
   2406         tlb_flush_all_cpus_synced(cs);
   2407     }
   2408 }
   2409 
   2410 /* invalidate pte */
   2411 void HELPER(ipte)(CPUS390XState *env, uint64_t pto, uint64_t vaddr,
   2412                   uint32_t m4)
   2413 {
   2414     CPUState *cs = env_cpu(env);
   2415     const uintptr_t ra = GETPC();
   2416     uint64_t page = vaddr & TARGET_PAGE_MASK;
   2417     uint64_t pte_addr, pte;
   2418 
   2419     /* Compute the page table entry address */
   2420     pte_addr = (pto & SEGMENT_ENTRY_ORIGIN);
   2421     pte_addr += VADDR_PAGE_TX(vaddr) * 8;
   2422 
   2423     /* Mark the page table entry as invalid */
   2424     pte = cpu_ldq_mmuidx_ra(env, pte_addr, MMU_REAL_IDX, ra);
   2425     pte |= PAGE_ENTRY_I;
   2426     cpu_stq_mmuidx_ra(env, pte_addr, pte, MMU_REAL_IDX, ra);
   2427 
   2428     /* XXX we exploit the fact that Linux passes the exact virtual
   2429        address here - it's not obliged to! */
   2430     if (m4 & 1) {
   2431         if (vaddr & ~VADDR_PAGE_TX_MASK) {
   2432             tlb_flush_page(cs, page);
   2433             /* XXX 31-bit hack */
   2434             tlb_flush_page(cs, page ^ 0x80000000);
   2435         } else {
   2436             /* looks like we don't have a valid virtual address */
   2437             tlb_flush(cs);
   2438         }
   2439     } else {
   2440         if (vaddr & ~VADDR_PAGE_TX_MASK) {
   2441             tlb_flush_page_all_cpus_synced(cs, page);
   2442             /* XXX 31-bit hack */
   2443             tlb_flush_page_all_cpus_synced(cs, page ^ 0x80000000);
   2444         } else {
   2445             /* looks like we don't have a valid virtual address */
   2446             tlb_flush_all_cpus_synced(cs);
   2447         }
   2448     }
   2449 }
   2450 
   2451 /* flush local tlb */
   2452 void HELPER(ptlb)(CPUS390XState *env)
   2453 {
   2454     tlb_flush(env_cpu(env));
   2455 }
   2456 
   2457 /* flush global tlb */
   2458 void HELPER(purge)(CPUS390XState *env)
   2459 {
   2460     tlb_flush_all_cpus_synced(env_cpu(env));
   2461 }
   2462 
   2463 /* load real address */
   2464 uint64_t HELPER(lra)(CPUS390XState *env, uint64_t addr)
   2465 {
   2466     uint64_t asc = env->psw.mask & PSW_MASK_ASC;
   2467     uint64_t ret, tec;
   2468     int flags, exc, cc;
   2469 
   2470     /* XXX incomplete - has more corner cases */
   2471     if (!(env->psw.mask & PSW_MASK_64) && (addr >> 32)) {
   2472         tcg_s390_program_interrupt(env, PGM_SPECIAL_OP, GETPC());
   2473     }
   2474 
   2475     exc = mmu_translate(env, addr, MMU_S390_LRA, asc, &ret, &flags, &tec);
   2476     if (exc) {
   2477         cc = 3;
   2478         ret = exc | 0x80000000;
   2479     } else {
   2480         cc = 0;
   2481         ret |= addr & ~TARGET_PAGE_MASK;
   2482     }
   2483 
   2484     env->cc_op = cc;
   2485     return ret;
   2486 }
   2487 #endif
   2488 
   2489 /* load pair from quadword */
   2490 uint64_t HELPER(lpq)(CPUS390XState *env, uint64_t addr)
   2491 {
   2492     uintptr_t ra = GETPC();
   2493     uint64_t hi, lo;
   2494 
   2495     check_alignment(env, addr, 16, ra);
   2496     hi = cpu_ldq_data_ra(env, addr + 0, ra);
   2497     lo = cpu_ldq_data_ra(env, addr + 8, ra);
   2498 
   2499     env->retxl = lo;
   2500     return hi;
   2501 }
   2502 
   2503 uint64_t HELPER(lpq_parallel)(CPUS390XState *env, uint64_t addr)
   2504 {
   2505     uintptr_t ra = GETPC();
   2506     uint64_t hi, lo;
   2507     int mem_idx;
   2508     MemOpIdx oi;
   2509     Int128 v;
   2510 
   2511     assert(HAVE_ATOMIC128);
   2512 
   2513     mem_idx = cpu_mmu_index(env, false);
   2514     oi = make_memop_idx(MO_TEUQ | MO_ALIGN_16, mem_idx);
   2515     v = cpu_atomic_ldo_be_mmu(env, addr, oi, ra);
   2516     hi = int128_gethi(v);
   2517     lo = int128_getlo(v);
   2518 
   2519     env->retxl = lo;
   2520     return hi;
   2521 }
   2522 
   2523 /* store pair to quadword */
   2524 void HELPER(stpq)(CPUS390XState *env, uint64_t addr,
   2525                   uint64_t low, uint64_t high)
   2526 {
   2527     uintptr_t ra = GETPC();
   2528 
   2529     check_alignment(env, addr, 16, ra);
   2530     cpu_stq_data_ra(env, addr + 0, high, ra);
   2531     cpu_stq_data_ra(env, addr + 8, low, ra);
   2532 }
   2533 
   2534 void HELPER(stpq_parallel)(CPUS390XState *env, uint64_t addr,
   2535                            uint64_t low, uint64_t high)
   2536 {
   2537     uintptr_t ra = GETPC();
   2538     int mem_idx;
   2539     MemOpIdx oi;
   2540     Int128 v;
   2541 
   2542     assert(HAVE_ATOMIC128);
   2543 
   2544     mem_idx = cpu_mmu_index(env, false);
   2545     oi = make_memop_idx(MO_TEUQ | MO_ALIGN_16, mem_idx);
   2546     v = int128_make128(low, high);
   2547     cpu_atomic_sto_be_mmu(env, addr, v, oi, ra);
   2548 }
   2549 
   2550 /* Execute instruction.  This instruction executes an insn modified with
   2551    the contents of r1.  It does not change the executed instruction in memory;
   2552    it does not change the program counter.
   2553 
   2554    Perform this by recording the modified instruction in env->ex_value.
   2555    This will be noticed by cpu_get_tb_cpu_state and thus tb translation.
   2556 */
   2557 void HELPER(ex)(CPUS390XState *env, uint32_t ilen, uint64_t r1, uint64_t addr)
   2558 {
   2559     uint64_t insn = cpu_lduw_code(env, addr);
   2560     uint8_t opc = insn >> 8;
   2561 
   2562     /* Or in the contents of R1[56:63].  */
   2563     insn |= r1 & 0xff;
   2564 
   2565     /* Load the rest of the instruction.  */
   2566     insn <<= 48;
   2567     switch (get_ilen(opc)) {
   2568     case 2:
   2569         break;
   2570     case 4:
   2571         insn |= (uint64_t)cpu_lduw_code(env, addr + 2) << 32;
   2572         break;
   2573     case 6:
   2574         insn |= (uint64_t)(uint32_t)cpu_ldl_code(env, addr + 2) << 16;
   2575         break;
   2576     default:
   2577         g_assert_not_reached();
   2578     }
   2579 
   2580     /* The very most common cases can be sped up by avoiding a new TB.  */
   2581     if ((opc & 0xf0) == 0xd0) {
   2582         typedef uint32_t (*dx_helper)(CPUS390XState *, uint32_t, uint64_t,
   2583                                       uint64_t, uintptr_t);
   2584         static const dx_helper dx[16] = {
   2585             [0x0] = do_helper_trt_bkwd,
   2586             [0x2] = do_helper_mvc,
   2587             [0x4] = do_helper_nc,
   2588             [0x5] = do_helper_clc,
   2589             [0x6] = do_helper_oc,
   2590             [0x7] = do_helper_xc,
   2591             [0xc] = do_helper_tr,
   2592             [0xd] = do_helper_trt_fwd,
   2593         };
   2594         dx_helper helper = dx[opc & 0xf];
   2595 
   2596         if (helper) {
   2597             uint32_t l = extract64(insn, 48, 8);
   2598             uint32_t b1 = extract64(insn, 44, 4);
   2599             uint32_t d1 = extract64(insn, 32, 12);
   2600             uint32_t b2 = extract64(insn, 28, 4);
   2601             uint32_t d2 = extract64(insn, 16, 12);
   2602             uint64_t a1 = wrap_address(env, (b1 ? env->regs[b1] : 0) + d1);
   2603             uint64_t a2 = wrap_address(env, (b2 ? env->regs[b2] : 0) + d2);
   2604 
   2605             env->cc_op = helper(env, l, a1, a2, 0);
   2606             env->psw.addr += ilen;
   2607             return;
   2608         }
   2609     } else if (opc == 0x0a) {
   2610         env->int_svc_code = extract64(insn, 48, 8);
   2611         env->int_svc_ilen = ilen;
   2612         helper_exception(env, EXCP_SVC);
   2613         g_assert_not_reached();
   2614     }
   2615 
   2616     /* Record the insn we want to execute as well as the ilen to use
   2617        during the execution of the target insn.  This will also ensure
   2618        that ex_value is non-zero, which flags that we are in a state
   2619        that requires such execution.  */
   2620     env->ex_value = insn | ilen;
   2621 }
   2622 
   2623 uint32_t HELPER(mvcos)(CPUS390XState *env, uint64_t dest, uint64_t src,
   2624                        uint64_t len)
   2625 {
   2626     const uint8_t psw_key = (env->psw.mask & PSW_MASK_KEY) >> PSW_SHIFT_KEY;
   2627     const uint8_t psw_as = (env->psw.mask & PSW_MASK_ASC) >> PSW_SHIFT_ASC;
   2628     const uint64_t r0 = env->regs[0];
   2629     const uintptr_t ra = GETPC();
   2630     uint8_t dest_key, dest_as, dest_k, dest_a;
   2631     uint8_t src_key, src_as, src_k, src_a;
   2632     uint64_t val;
   2633     int cc = 0;
   2634 
   2635     HELPER_LOG("%s dest %" PRIx64 ", src %" PRIx64 ", len %" PRIx64 "\n",
   2636                __func__, dest, src, len);
   2637 
   2638     if (!(env->psw.mask & PSW_MASK_DAT)) {
   2639         tcg_s390_program_interrupt(env, PGM_SPECIAL_OP, ra);
   2640     }
   2641 
   2642     /* OAC (operand access control) for the first operand -> dest */
   2643     val = (r0 & 0xffff0000ULL) >> 16;
   2644     dest_key = (val >> 12) & 0xf;
   2645     dest_as = (val >> 6) & 0x3;
   2646     dest_k = (val >> 1) & 0x1;
   2647     dest_a = val & 0x1;
   2648 
   2649     /* OAC (operand access control) for the second operand -> src */
   2650     val = (r0 & 0x0000ffffULL);
   2651     src_key = (val >> 12) & 0xf;
   2652     src_as = (val >> 6) & 0x3;
   2653     src_k = (val >> 1) & 0x1;
   2654     src_a = val & 0x1;
   2655 
   2656     if (!dest_k) {
   2657         dest_key = psw_key;
   2658     }
   2659     if (!src_k) {
   2660         src_key = psw_key;
   2661     }
   2662     if (!dest_a) {
   2663         dest_as = psw_as;
   2664     }
   2665     if (!src_a) {
   2666         src_as = psw_as;
   2667     }
   2668 
   2669     if (dest_a && dest_as == AS_HOME && (env->psw.mask & PSW_MASK_PSTATE)) {
   2670         tcg_s390_program_interrupt(env, PGM_SPECIAL_OP, ra);
   2671     }
   2672     if (!(env->cregs[0] & CR0_SECONDARY) &&
   2673         (dest_as == AS_SECONDARY || src_as == AS_SECONDARY)) {
   2674         tcg_s390_program_interrupt(env, PGM_SPECIAL_OP, ra);
   2675     }
   2676     if (!psw_key_valid(env, dest_key) || !psw_key_valid(env, src_key)) {
   2677         tcg_s390_program_interrupt(env, PGM_PRIVILEGED, ra);
   2678     }
   2679 
   2680     len = wrap_length32(env, len);
   2681     if (len > 4096) {
   2682         cc = 3;
   2683         len = 4096;
   2684     }
   2685 
   2686     /* FIXME: AR-mode and proper problem state mode (using PSW keys) missing */
   2687     if (src_as == AS_ACCREG || dest_as == AS_ACCREG ||
   2688         (env->psw.mask & PSW_MASK_PSTATE)) {
   2689         qemu_log_mask(LOG_UNIMP, "%s: AR-mode and PSTATE support missing\n",
   2690                       __func__);
   2691         tcg_s390_program_interrupt(env, PGM_ADDRESSING, ra);
   2692     }
   2693 
   2694     /* FIXME: Access using correct keys and AR-mode */
   2695     if (len) {
   2696         S390Access srca = access_prepare(env, src, len, MMU_DATA_LOAD,
   2697                                          mmu_idx_from_as(src_as), ra);
   2698         S390Access desta = access_prepare(env, dest, len, MMU_DATA_STORE,
   2699                                           mmu_idx_from_as(dest_as), ra);
   2700 
   2701         access_memmove(env, &desta, &srca, ra);
   2702     }
   2703 
   2704     return cc;
   2705 }
   2706 
   2707 /* Decode a Unicode character.  A return value < 0 indicates success, storing
   2708    the UTF-32 result into OCHAR and the input length into OLEN.  A return
   2709    value >= 0 indicates failure, and the CC value to be returned.  */
   2710 typedef int (*decode_unicode_fn)(CPUS390XState *env, uint64_t addr,
   2711                                  uint64_t ilen, bool enh_check, uintptr_t ra,
   2712                                  uint32_t *ochar, uint32_t *olen);
   2713 
   2714 /* Encode a Unicode character.  A return value < 0 indicates success, storing
   2715    the bytes into ADDR and the output length into OLEN.  A return value >= 0
   2716    indicates failure, and the CC value to be returned.  */
   2717 typedef int (*encode_unicode_fn)(CPUS390XState *env, uint64_t addr,
   2718                                  uint64_t ilen, uintptr_t ra, uint32_t c,
   2719                                  uint32_t *olen);
   2720 
   2721 static int decode_utf8(CPUS390XState *env, uint64_t addr, uint64_t ilen,
   2722                        bool enh_check, uintptr_t ra,
   2723                        uint32_t *ochar, uint32_t *olen)
   2724 {
   2725     uint8_t s0, s1, s2, s3;
   2726     uint32_t c, l;
   2727 
   2728     if (ilen < 1) {
   2729         return 0;
   2730     }
   2731     s0 = cpu_ldub_data_ra(env, addr, ra);
   2732     if (s0 <= 0x7f) {
   2733         /* one byte character */
   2734         l = 1;
   2735         c = s0;
   2736     } else if (s0 <= (enh_check ? 0xc1 : 0xbf)) {
   2737         /* invalid character */
   2738         return 2;
   2739     } else if (s0 <= 0xdf) {
   2740         /* two byte character */
   2741         l = 2;
   2742         if (ilen < 2) {
   2743             return 0;
   2744         }
   2745         s1 = cpu_ldub_data_ra(env, addr + 1, ra);
   2746         c = s0 & 0x1f;
   2747         c = (c << 6) | (s1 & 0x3f);
   2748         if (enh_check && (s1 & 0xc0) != 0x80) {
   2749             return 2;
   2750         }
   2751     } else if (s0 <= 0xef) {
   2752         /* three byte character */
   2753         l = 3;
   2754         if (ilen < 3) {
   2755             return 0;
   2756         }
   2757         s1 = cpu_ldub_data_ra(env, addr + 1, ra);
   2758         s2 = cpu_ldub_data_ra(env, addr + 2, ra);
   2759         c = s0 & 0x0f;
   2760         c = (c << 6) | (s1 & 0x3f);
   2761         c = (c << 6) | (s2 & 0x3f);
   2762         /* Fold the byte-by-byte range descriptions in the PoO into
   2763            tests against the complete value.  It disallows encodings
   2764            that could be smaller, and the UTF-16 surrogates.  */
   2765         if (enh_check
   2766             && ((s1 & 0xc0) != 0x80
   2767                 || (s2 & 0xc0) != 0x80
   2768                 || c < 0x1000
   2769                 || (c >= 0xd800 && c <= 0xdfff))) {
   2770             return 2;
   2771         }
   2772     } else if (s0 <= (enh_check ? 0xf4 : 0xf7)) {
   2773         /* four byte character */
   2774         l = 4;
   2775         if (ilen < 4) {
   2776             return 0;
   2777         }
   2778         s1 = cpu_ldub_data_ra(env, addr + 1, ra);
   2779         s2 = cpu_ldub_data_ra(env, addr + 2, ra);
   2780         s3 = cpu_ldub_data_ra(env, addr + 3, ra);
   2781         c = s0 & 0x07;
   2782         c = (c << 6) | (s1 & 0x3f);
   2783         c = (c << 6) | (s2 & 0x3f);
   2784         c = (c << 6) | (s3 & 0x3f);
   2785         /* See above.  */
   2786         if (enh_check
   2787             && ((s1 & 0xc0) != 0x80
   2788                 || (s2 & 0xc0) != 0x80
   2789                 || (s3 & 0xc0) != 0x80
   2790                 || c < 0x010000
   2791                 || c > 0x10ffff)) {
   2792             return 2;
   2793         }
   2794     } else {
   2795         /* invalid character */
   2796         return 2;
   2797     }
   2798 
   2799     *ochar = c;
   2800     *olen = l;
   2801     return -1;
   2802 }
   2803 
   2804 static int decode_utf16(CPUS390XState *env, uint64_t addr, uint64_t ilen,
   2805                         bool enh_check, uintptr_t ra,
   2806                         uint32_t *ochar, uint32_t *olen)
   2807 {
   2808     uint16_t s0, s1;
   2809     uint32_t c, l;
   2810 
   2811     if (ilen < 2) {
   2812         return 0;
   2813     }
   2814     s0 = cpu_lduw_data_ra(env, addr, ra);
   2815     if ((s0 & 0xfc00) != 0xd800) {
   2816         /* one word character */
   2817         l = 2;
   2818         c = s0;
   2819     } else {
   2820         /* two word character */
   2821         l = 4;
   2822         if (ilen < 4) {
   2823             return 0;
   2824         }
   2825         s1 = cpu_lduw_data_ra(env, addr + 2, ra);
   2826         c = extract32(s0, 6, 4) + 1;
   2827         c = (c << 6) | (s0 & 0x3f);
   2828         c = (c << 10) | (s1 & 0x3ff);
   2829         if (enh_check && (s1 & 0xfc00) != 0xdc00) {
   2830             /* invalid surrogate character */
   2831             return 2;
   2832         }
   2833     }
   2834 
   2835     *ochar = c;
   2836     *olen = l;
   2837     return -1;
   2838 }
   2839 
   2840 static int decode_utf32(CPUS390XState *env, uint64_t addr, uint64_t ilen,
   2841                         bool enh_check, uintptr_t ra,
   2842                         uint32_t *ochar, uint32_t *olen)
   2843 {
   2844     uint32_t c;
   2845 
   2846     if (ilen < 4) {
   2847         return 0;
   2848     }
   2849     c = cpu_ldl_data_ra(env, addr, ra);
   2850     if ((c >= 0xd800 && c <= 0xdbff) || c > 0x10ffff) {
   2851         /* invalid unicode character */
   2852         return 2;
   2853     }
   2854 
   2855     *ochar = c;
   2856     *olen = 4;
   2857     return -1;
   2858 }
   2859 
   2860 static int encode_utf8(CPUS390XState *env, uint64_t addr, uint64_t ilen,
   2861                        uintptr_t ra, uint32_t c, uint32_t *olen)
   2862 {
   2863     uint8_t d[4];
   2864     uint32_t l, i;
   2865 
   2866     if (c <= 0x7f) {
   2867         /* one byte character */
   2868         l = 1;
   2869         d[0] = c;
   2870     } else if (c <= 0x7ff) {
   2871         /* two byte character */
   2872         l = 2;
   2873         d[1] = 0x80 | extract32(c, 0, 6);
   2874         d[0] = 0xc0 | extract32(c, 6, 5);
   2875     } else if (c <= 0xffff) {
   2876         /* three byte character */
   2877         l = 3;
   2878         d[2] = 0x80 | extract32(c, 0, 6);
   2879         d[1] = 0x80 | extract32(c, 6, 6);
   2880         d[0] = 0xe0 | extract32(c, 12, 4);
   2881     } else {
   2882         /* four byte character */
   2883         l = 4;
   2884         d[3] = 0x80 | extract32(c, 0, 6);
   2885         d[2] = 0x80 | extract32(c, 6, 6);
   2886         d[1] = 0x80 | extract32(c, 12, 6);
   2887         d[0] = 0xf0 | extract32(c, 18, 3);
   2888     }
   2889 
   2890     if (ilen < l) {
   2891         return 1;
   2892     }
   2893     for (i = 0; i < l; ++i) {
   2894         cpu_stb_data_ra(env, addr + i, d[i], ra);
   2895     }
   2896 
   2897     *olen = l;
   2898     return -1;
   2899 }
   2900 
   2901 static int encode_utf16(CPUS390XState *env, uint64_t addr, uint64_t ilen,
   2902                         uintptr_t ra, uint32_t c, uint32_t *olen)
   2903 {
   2904     uint16_t d0, d1;
   2905 
   2906     if (c <= 0xffff) {
   2907         /* one word character */
   2908         if (ilen < 2) {
   2909             return 1;
   2910         }
   2911         cpu_stw_data_ra(env, addr, c, ra);
   2912         *olen = 2;
   2913     } else {
   2914         /* two word character */
   2915         if (ilen < 4) {
   2916             return 1;
   2917         }
   2918         d1 = 0xdc00 | extract32(c, 0, 10);
   2919         d0 = 0xd800 | extract32(c, 10, 6);
   2920         d0 = deposit32(d0, 6, 4, extract32(c, 16, 5) - 1);
   2921         cpu_stw_data_ra(env, addr + 0, d0, ra);
   2922         cpu_stw_data_ra(env, addr + 2, d1, ra);
   2923         *olen = 4;
   2924     }
   2925 
   2926     return -1;
   2927 }
   2928 
   2929 static int encode_utf32(CPUS390XState *env, uint64_t addr, uint64_t ilen,
   2930                         uintptr_t ra, uint32_t c, uint32_t *olen)
   2931 {
   2932     if (ilen < 4) {
   2933         return 1;
   2934     }
   2935     cpu_stl_data_ra(env, addr, c, ra);
   2936     *olen = 4;
   2937     return -1;
   2938 }
   2939 
   2940 static inline uint32_t convert_unicode(CPUS390XState *env, uint32_t r1,
   2941                                        uint32_t r2, uint32_t m3, uintptr_t ra,
   2942                                        decode_unicode_fn decode,
   2943                                        encode_unicode_fn encode)
   2944 {
   2945     uint64_t dst = get_address(env, r1);
   2946     uint64_t dlen = get_length(env, r1 + 1);
   2947     uint64_t src = get_address(env, r2);
   2948     uint64_t slen = get_length(env, r2 + 1);
   2949     bool enh_check = m3 & 1;
   2950     int cc, i;
   2951 
   2952     /* Lest we fail to service interrupts in a timely manner, limit the
   2953        amount of work we're willing to do.  For now, let's cap at 256.  */
   2954     for (i = 0; i < 256; ++i) {
   2955         uint32_t c, ilen, olen;
   2956 
   2957         cc = decode(env, src, slen, enh_check, ra, &c, &ilen);
   2958         if (unlikely(cc >= 0)) {
   2959             break;
   2960         }
   2961         cc = encode(env, dst, dlen, ra, c, &olen);
   2962         if (unlikely(cc >= 0)) {
   2963             break;
   2964         }
   2965 
   2966         src += ilen;
   2967         slen -= ilen;
   2968         dst += olen;
   2969         dlen -= olen;
   2970         cc = 3;
   2971     }
   2972 
   2973     set_address(env, r1, dst);
   2974     set_length(env, r1 + 1, dlen);
   2975     set_address(env, r2, src);
   2976     set_length(env, r2 + 1, slen);
   2977 
   2978     return cc;
   2979 }
   2980 
   2981 uint32_t HELPER(cu12)(CPUS390XState *env, uint32_t r1, uint32_t r2, uint32_t m3)
   2982 {
   2983     return convert_unicode(env, r1, r2, m3, GETPC(),
   2984                            decode_utf8, encode_utf16);
   2985 }
   2986 
   2987 uint32_t HELPER(cu14)(CPUS390XState *env, uint32_t r1, uint32_t r2, uint32_t m3)
   2988 {
   2989     return convert_unicode(env, r1, r2, m3, GETPC(),
   2990                            decode_utf8, encode_utf32);
   2991 }
   2992 
   2993 uint32_t HELPER(cu21)(CPUS390XState *env, uint32_t r1, uint32_t r2, uint32_t m3)
   2994 {
   2995     return convert_unicode(env, r1, r2, m3, GETPC(),
   2996                            decode_utf16, encode_utf8);
   2997 }
   2998 
   2999 uint32_t HELPER(cu24)(CPUS390XState *env, uint32_t r1, uint32_t r2, uint32_t m3)
   3000 {
   3001     return convert_unicode(env, r1, r2, m3, GETPC(),
   3002                            decode_utf16, encode_utf32);
   3003 }
   3004 
   3005 uint32_t HELPER(cu41)(CPUS390XState *env, uint32_t r1, uint32_t r2, uint32_t m3)
   3006 {
   3007     return convert_unicode(env, r1, r2, m3, GETPC(),
   3008                            decode_utf32, encode_utf8);
   3009 }
   3010 
   3011 uint32_t HELPER(cu42)(CPUS390XState *env, uint32_t r1, uint32_t r2, uint32_t m3)
   3012 {
   3013     return convert_unicode(env, r1, r2, m3, GETPC(),
   3014                            decode_utf32, encode_utf16);
   3015 }
   3016 
   3017 void probe_write_access(CPUS390XState *env, uint64_t addr, uint64_t len,
   3018                         uintptr_t ra)
   3019 {
   3020     /* test the actual access, not just any access to the page due to LAP */
   3021     while (len) {
   3022         const uint64_t pagelen = -(addr | TARGET_PAGE_MASK);
   3023         const uint64_t curlen = MIN(pagelen, len);
   3024 
   3025         probe_write(env, addr, curlen, cpu_mmu_index(env, false), ra);
   3026         addr = wrap_address(env, addr + curlen);
   3027         len -= curlen;
   3028     }
   3029 }
   3030 
   3031 void HELPER(probe_write_access)(CPUS390XState *env, uint64_t addr, uint64_t len)
   3032 {
   3033     probe_write_access(env, addr, len, GETPC());
   3034 }