duckstation

duckstation, but archived from the revision just before upstream changed it to a proprietary software project, this version is the libre one
git clone https://git.neptards.moe/u3shit/duckstation.git
Log | Files | Refs | README | LICENSE

simulator-aarch64.cc (470767B)


      1 // Copyright 2015, VIXL authors
      2 // All rights reserved.
      3 //
      4 // Redistribution and use in source and binary forms, with or without
      5 // modification, are permitted provided that the following conditions are met:
      6 //
      7 //   * Redistributions of source code must retain the above copyright notice,
      8 //     this list of conditions and the following disclaimer.
      9 //   * Redistributions in binary form must reproduce the above copyright notice,
     10 //     this list of conditions and the following disclaimer in the documentation
     11 //     and/or other materials provided with the distribution.
     12 //   * Neither the name of ARM Limited nor the names of its contributors may be
     13 //     used to endorse or promote products derived from this software without
     14 //     specific prior written permission.
     15 //
     16 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND
     17 // ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
     18 // WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
     19 // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
     20 // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
     21 // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
     22 // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
     23 // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
     24 // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
     25 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
     26 
     27 #ifdef VIXL_INCLUDE_SIMULATOR_AARCH64
     28 
     29 #include "simulator-aarch64.h"
     30 
     31 #include <cmath>
     32 #include <cstring>
     33 #include <errno.h>
     34 #include <limits>
     35 #include <sys/mman.h>
     36 #include <unistd.h>
     37 
     38 namespace vixl {
     39 namespace aarch64 {
     40 
     41 using vixl::internal::SimFloat16;
     42 
     43 const Instruction* Simulator::kEndOfSimAddress = NULL;
     44 
     45 MemoryAccessResult TryMemoryAccess(uintptr_t address, uintptr_t access_size) {
     46 #ifdef VIXL_ENABLE_IMPLICIT_CHECKS
     47   for (uintptr_t i = 0; i < access_size; i++) {
     48     if (_vixl_internal_ReadMemory(address, i) == MemoryAccessResult::Failure) {
     49       // The memory access failed.
     50       return MemoryAccessResult::Failure;
     51     }
     52   }
     53 
     54   // Either the memory access did not raise a signal or the signal handler did
     55   // not correctly return MemoryAccessResult::Failure.
     56   return MemoryAccessResult::Success;
     57 #else
     58   USE(address);
     59   USE(access_size);
     60   return MemoryAccessResult::Success;
     61 #endif  // VIXL_ENABLE_IMPLICIT_CHECKS
     62 }
     63 
     64 bool MetaDataDepot::MetaDataMTE::is_active = false;
     65 
     66 void SimSystemRegister::SetBits(int msb, int lsb, uint32_t bits) {
     67   int width = msb - lsb + 1;
     68   VIXL_ASSERT(IsUintN(width, bits) || IsIntN(width, bits));
     69 
     70   bits <<= lsb;
     71   uint32_t mask = ((1 << width) - 1) << lsb;
     72   VIXL_ASSERT((mask & write_ignore_mask_) == 0);
     73 
     74   value_ = (value_ & ~mask) | (bits & mask);
     75 }
     76 
     77 
     78 SimSystemRegister SimSystemRegister::DefaultValueFor(SystemRegister id) {
     79   switch (id) {
     80     case NZCV:
     81       return SimSystemRegister(0x00000000, NZCVWriteIgnoreMask);
     82     case FPCR:
     83       return SimSystemRegister(0x00000000, FPCRWriteIgnoreMask);
     84     default:
     85       VIXL_UNREACHABLE();
     86       return SimSystemRegister();
     87   }
     88 }
     89 
     90 const Simulator::FormToVisitorFnMap* Simulator::GetFormToVisitorFnMap() {
     91   static const FormToVisitorFnMap form_to_visitor = {
     92       DEFAULT_FORM_TO_VISITOR_MAP(Simulator),
     93       SIM_AUD_VISITOR_MAP(Simulator),
     94       {"smlal_asimdelem_l"_h, &Simulator::SimulateNEONMulByElementLong},
     95       {"smlsl_asimdelem_l"_h, &Simulator::SimulateNEONMulByElementLong},
     96       {"smull_asimdelem_l"_h, &Simulator::SimulateNEONMulByElementLong},
     97       {"sqdmlal_asimdelem_l"_h, &Simulator::SimulateNEONMulByElementLong},
     98       {"sqdmlsl_asimdelem_l"_h, &Simulator::SimulateNEONMulByElementLong},
     99       {"sqdmull_asimdelem_l"_h, &Simulator::SimulateNEONMulByElementLong},
    100       {"umlal_asimdelem_l"_h, &Simulator::SimulateNEONMulByElementLong},
    101       {"umlsl_asimdelem_l"_h, &Simulator::SimulateNEONMulByElementLong},
    102       {"umull_asimdelem_l"_h, &Simulator::SimulateNEONMulByElementLong},
    103       {"fcmla_asimdelem_c_h"_h, &Simulator::SimulateNEONComplexMulByElement},
    104       {"fcmla_asimdelem_c_s"_h, &Simulator::SimulateNEONComplexMulByElement},
    105       {"fmlal2_asimdelem_lh"_h, &Simulator::SimulateNEONFPMulByElementLong},
    106       {"fmlal_asimdelem_lh"_h, &Simulator::SimulateNEONFPMulByElementLong},
    107       {"fmlsl2_asimdelem_lh"_h, &Simulator::SimulateNEONFPMulByElementLong},
    108       {"fmlsl_asimdelem_lh"_h, &Simulator::SimulateNEONFPMulByElementLong},
    109       {"fmla_asimdelem_rh_h"_h, &Simulator::SimulateNEONFPMulByElement},
    110       {"fmls_asimdelem_rh_h"_h, &Simulator::SimulateNEONFPMulByElement},
    111       {"fmulx_asimdelem_rh_h"_h, &Simulator::SimulateNEONFPMulByElement},
    112       {"fmul_asimdelem_rh_h"_h, &Simulator::SimulateNEONFPMulByElement},
    113       {"fmla_asimdelem_r_sd"_h, &Simulator::SimulateNEONFPMulByElement},
    114       {"fmls_asimdelem_r_sd"_h, &Simulator::SimulateNEONFPMulByElement},
    115       {"fmulx_asimdelem_r_sd"_h, &Simulator::SimulateNEONFPMulByElement},
    116       {"fmul_asimdelem_r_sd"_h, &Simulator::SimulateNEONFPMulByElement},
    117       {"sdot_asimdelem_d"_h, &Simulator::SimulateNEONDotProdByElement},
    118       {"udot_asimdelem_d"_h, &Simulator::SimulateNEONDotProdByElement},
    119       {"adclb_z_zzz"_h, &Simulator::SimulateSVEAddSubCarry},
    120       {"adclt_z_zzz"_h, &Simulator::SimulateSVEAddSubCarry},
    121       {"addhnb_z_zz"_h, &Simulator::SimulateSVEAddSubHigh},
    122       {"addhnt_z_zz"_h, &Simulator::SimulateSVEAddSubHigh},
    123       {"addp_z_p_zz"_h, &Simulator::SimulateSVEIntArithPair},
    124       {"bcax_z_zzz"_h, &Simulator::SimulateSVEBitwiseTernary},
    125       {"bdep_z_zz"_h, &Simulator::Simulate_ZdT_ZnT_ZmT},
    126       {"bext_z_zz"_h, &Simulator::Simulate_ZdT_ZnT_ZmT},
    127       {"bgrp_z_zz"_h, &Simulator::Simulate_ZdT_ZnT_ZmT},
    128       {"bsl1n_z_zzz"_h, &Simulator::SimulateSVEBitwiseTernary},
    129       {"bsl2n_z_zzz"_h, &Simulator::SimulateSVEBitwiseTernary},
    130       {"bsl_z_zzz"_h, &Simulator::SimulateSVEBitwiseTernary},
    131       {"cadd_z_zz"_h, &Simulator::Simulate_ZdnT_ZdnT_ZmT_const},
    132       {"cdot_z_zzz"_h, &Simulator::SimulateSVEComplexDotProduct},
    133       {"cdot_z_zzzi_d"_h, &Simulator::SimulateSVEComplexDotProduct},
    134       {"cdot_z_zzzi_s"_h, &Simulator::SimulateSVEComplexDotProduct},
    135       {"cmla_z_zzz"_h, &Simulator::SimulateSVEComplexIntMulAdd},
    136       {"cmla_z_zzzi_h"_h, &Simulator::SimulateSVEComplexIntMulAdd},
    137       {"cmla_z_zzzi_s"_h, &Simulator::SimulateSVEComplexIntMulAdd},
    138       {"eor3_z_zzz"_h, &Simulator::SimulateSVEBitwiseTernary},
    139       {"eorbt_z_zz"_h, &Simulator::Simulate_ZdT_ZnT_ZmT},
    140       {"eortb_z_zz"_h, &Simulator::Simulate_ZdT_ZnT_ZmT},
    141       {"ext_z_zi_con"_h, &Simulator::Simulate_ZdB_Zn1B_Zn2B_imm},
    142       {"faddp_z_p_zz"_h, &Simulator::Simulate_ZdnT_PgM_ZdnT_ZmT},
    143       {"fcvtlt_z_p_z_h2s"_h, &Simulator::SimulateSVEFPConvertLong},
    144       {"fcvtlt_z_p_z_s2d"_h, &Simulator::SimulateSVEFPConvertLong},
    145       {"fcvtnt_z_p_z_d2s"_h, &Simulator::Simulate_ZdS_PgM_ZnD},
    146       {"fcvtnt_z_p_z_s2h"_h, &Simulator::Simulate_ZdH_PgM_ZnS},
    147       {"fcvtx_z_p_z_d2s"_h, &Simulator::Simulate_ZdS_PgM_ZnD},
    148       {"fcvtxnt_z_p_z_d2s"_h, &Simulator::Simulate_ZdS_PgM_ZnD},
    149       {"flogb_z_p_z"_h, &Simulator::Simulate_ZdT_PgM_ZnT},
    150       {"fmaxnmp_z_p_zz"_h, &Simulator::Simulate_ZdnT_PgM_ZdnT_ZmT},
    151       {"fmaxp_z_p_zz"_h, &Simulator::Simulate_ZdnT_PgM_ZdnT_ZmT},
    152       {"fminnmp_z_p_zz"_h, &Simulator::Simulate_ZdnT_PgM_ZdnT_ZmT},
    153       {"fminp_z_p_zz"_h, &Simulator::Simulate_ZdnT_PgM_ZdnT_ZmT},
    154       {"fmlalb_z_zzz"_h, &Simulator::Simulate_ZdaS_ZnH_ZmH},
    155       {"fmlalb_z_zzzi_s"_h, &Simulator::Simulate_ZdaS_ZnH_ZmH_imm},
    156       {"fmlalt_z_zzz"_h, &Simulator::Simulate_ZdaS_ZnH_ZmH},
    157       {"fmlalt_z_zzzi_s"_h, &Simulator::Simulate_ZdaS_ZnH_ZmH_imm},
    158       {"fmlslb_z_zzz"_h, &Simulator::Simulate_ZdaS_ZnH_ZmH},
    159       {"fmlslb_z_zzzi_s"_h, &Simulator::Simulate_ZdaS_ZnH_ZmH_imm},
    160       {"fmlslt_z_zzz"_h, &Simulator::Simulate_ZdaS_ZnH_ZmH},
    161       {"fmlslt_z_zzzi_s"_h, &Simulator::Simulate_ZdaS_ZnH_ZmH_imm},
    162       {"histcnt_z_p_zz"_h, &Simulator::Simulate_ZdT_PgZ_ZnT_ZmT},
    163       {"histseg_z_zz"_h, &Simulator::Simulate_ZdB_ZnB_ZmB},
    164       {"ldnt1b_z_p_ar_d_64_unscaled"_h, &Simulator::Simulate_ZtD_PgZ_ZnD_Xm},
    165       {"ldnt1b_z_p_ar_s_x32_unscaled"_h, &Simulator::Simulate_ZtS_PgZ_ZnS_Xm},
    166       {"ldnt1d_z_p_ar_d_64_unscaled"_h, &Simulator::Simulate_ZtD_PgZ_ZnD_Xm},
    167       {"ldnt1h_z_p_ar_d_64_unscaled"_h, &Simulator::Simulate_ZtD_PgZ_ZnD_Xm},
    168       {"ldnt1h_z_p_ar_s_x32_unscaled"_h, &Simulator::Simulate_ZtS_PgZ_ZnS_Xm},
    169       {"ldnt1sb_z_p_ar_d_64_unscaled"_h, &Simulator::Simulate_ZtD_PgZ_ZnD_Xm},
    170       {"ldnt1sb_z_p_ar_s_x32_unscaled"_h, &Simulator::Simulate_ZtS_PgZ_ZnS_Xm},
    171       {"ldnt1sh_z_p_ar_d_64_unscaled"_h, &Simulator::Simulate_ZtD_PgZ_ZnD_Xm},
    172       {"ldnt1sh_z_p_ar_s_x32_unscaled"_h, &Simulator::Simulate_ZtS_PgZ_ZnS_Xm},
    173       {"ldnt1sw_z_p_ar_d_64_unscaled"_h, &Simulator::Simulate_ZtD_PgZ_ZnD_Xm},
    174       {"ldnt1w_z_p_ar_d_64_unscaled"_h, &Simulator::Simulate_ZtD_PgZ_ZnD_Xm},
    175       {"ldnt1w_z_p_ar_s_x32_unscaled"_h, &Simulator::Simulate_ZtS_PgZ_ZnS_Xm},
    176       {"match_p_p_zz"_h, &Simulator::Simulate_PdT_PgZ_ZnT_ZmT},
    177       {"mla_z_zzzi_d"_h, &Simulator::SimulateSVEMlaMlsIndex},
    178       {"mla_z_zzzi_h"_h, &Simulator::SimulateSVEMlaMlsIndex},
    179       {"mla_z_zzzi_s"_h, &Simulator::SimulateSVEMlaMlsIndex},
    180       {"mls_z_zzzi_d"_h, &Simulator::SimulateSVEMlaMlsIndex},
    181       {"mls_z_zzzi_h"_h, &Simulator::SimulateSVEMlaMlsIndex},
    182       {"mls_z_zzzi_s"_h, &Simulator::SimulateSVEMlaMlsIndex},
    183       {"mul_z_zz"_h, &Simulator::Simulate_ZdT_ZnT_ZmT},
    184       {"mul_z_zzi_d"_h, &Simulator::SimulateSVEMulIndex},
    185       {"mul_z_zzi_h"_h, &Simulator::SimulateSVEMulIndex},
    186       {"mul_z_zzi_s"_h, &Simulator::SimulateSVEMulIndex},
    187       {"nbsl_z_zzz"_h, &Simulator::SimulateSVEBitwiseTernary},
    188       {"nmatch_p_p_zz"_h, &Simulator::Simulate_PdT_PgZ_ZnT_ZmT},
    189       {"pmul_z_zz"_h, &Simulator::Simulate_ZdB_ZnB_ZmB},
    190       {"pmullb_z_zz"_h, &Simulator::SimulateSVEIntMulLongVec},
    191       {"pmullt_z_zz"_h, &Simulator::SimulateSVEIntMulLongVec},
    192       {"raddhnb_z_zz"_h, &Simulator::SimulateSVEAddSubHigh},
    193       {"raddhnt_z_zz"_h, &Simulator::SimulateSVEAddSubHigh},
    194       {"rshrnb_z_zi"_h, &Simulator::SimulateSVENarrow},
    195       {"rshrnt_z_zi"_h, &Simulator::SimulateSVENarrow},
    196       {"rsubhnb_z_zz"_h, &Simulator::SimulateSVEAddSubHigh},
    197       {"rsubhnt_z_zz"_h, &Simulator::SimulateSVEAddSubHigh},
    198       {"saba_z_zzz"_h, &Simulator::Simulate_ZdaT_ZnT_ZmT},
    199       {"sabalb_z_zzz"_h, &Simulator::SimulateSVEInterleavedArithLong},
    200       {"sabalt_z_zzz"_h, &Simulator::SimulateSVEInterleavedArithLong},
    201       {"sabdlb_z_zz"_h, &Simulator::SimulateSVEInterleavedArithLong},
    202       {"sabdlt_z_zz"_h, &Simulator::SimulateSVEInterleavedArithLong},
    203       {"sadalp_z_p_z"_h, &Simulator::Simulate_ZdaT_PgM_ZnTb},
    204       {"saddlb_z_zz"_h, &Simulator::SimulateSVEInterleavedArithLong},
    205       {"saddlbt_z_zz"_h, &Simulator::SimulateSVEInterleavedArithLong},
    206       {"saddlt_z_zz"_h, &Simulator::SimulateSVEInterleavedArithLong},
    207       {"saddwb_z_zz"_h, &Simulator::Simulate_ZdT_ZnT_ZmTb},
    208       {"saddwt_z_zz"_h, &Simulator::Simulate_ZdT_ZnT_ZmTb},
    209       {"sbclb_z_zzz"_h, &Simulator::SimulateSVEAddSubCarry},
    210       {"sbclt_z_zzz"_h, &Simulator::SimulateSVEAddSubCarry},
    211       {"shadd_z_p_zz"_h, &Simulator::SimulateSVEHalvingAddSub},
    212       {"shrnb_z_zi"_h, &Simulator::SimulateSVENarrow},
    213       {"shrnt_z_zi"_h, &Simulator::SimulateSVENarrow},
    214       {"shsub_z_p_zz"_h, &Simulator::SimulateSVEHalvingAddSub},
    215       {"shsubr_z_p_zz"_h, &Simulator::SimulateSVEHalvingAddSub},
    216       {"sli_z_zzi"_h, &Simulator::Simulate_ZdT_ZnT_const},
    217       {"smaxp_z_p_zz"_h, &Simulator::SimulateSVEIntArithPair},
    218       {"sminp_z_p_zz"_h, &Simulator::SimulateSVEIntArithPair},
    219       {"smlalb_z_zzz"_h, &Simulator::Simulate_ZdaT_ZnTb_ZmTb},
    220       {"smlalb_z_zzzi_d"_h, &Simulator::SimulateSVESaturatingIntMulLongIdx},
    221       {"smlalb_z_zzzi_s"_h, &Simulator::SimulateSVESaturatingIntMulLongIdx},
    222       {"smlalt_z_zzz"_h, &Simulator::Simulate_ZdaT_ZnTb_ZmTb},
    223       {"smlalt_z_zzzi_d"_h, &Simulator::SimulateSVESaturatingIntMulLongIdx},
    224       {"smlalt_z_zzzi_s"_h, &Simulator::SimulateSVESaturatingIntMulLongIdx},
    225       {"smlslb_z_zzz"_h, &Simulator::Simulate_ZdaT_ZnTb_ZmTb},
    226       {"smlslb_z_zzzi_d"_h, &Simulator::SimulateSVESaturatingIntMulLongIdx},
    227       {"smlslb_z_zzzi_s"_h, &Simulator::SimulateSVESaturatingIntMulLongIdx},
    228       {"smlslt_z_zzz"_h, &Simulator::Simulate_ZdaT_ZnTb_ZmTb},
    229       {"smlslt_z_zzzi_d"_h, &Simulator::SimulateSVESaturatingIntMulLongIdx},
    230       {"smlslt_z_zzzi_s"_h, &Simulator::SimulateSVESaturatingIntMulLongIdx},
    231       {"smulh_z_zz"_h, &Simulator::Simulate_ZdT_ZnT_ZmT},
    232       {"smullb_z_zz"_h, &Simulator::SimulateSVEIntMulLongVec},
    233       {"smullb_z_zzi_d"_h, &Simulator::SimulateSVESaturatingIntMulLongIdx},
    234       {"smullb_z_zzi_s"_h, &Simulator::SimulateSVESaturatingIntMulLongIdx},
    235       {"smullt_z_zz"_h, &Simulator::SimulateSVEIntMulLongVec},
    236       {"smullt_z_zzi_d"_h, &Simulator::SimulateSVESaturatingIntMulLongIdx},
    237       {"smullt_z_zzi_s"_h, &Simulator::SimulateSVESaturatingIntMulLongIdx},
    238       {"splice_z_p_zz_con"_h, &Simulator::VisitSVEVectorSplice},
    239       {"sqabs_z_p_z"_h, &Simulator::Simulate_ZdT_PgM_ZnT},
    240       {"sqadd_z_p_zz"_h, &Simulator::SimulateSVESaturatingArithmetic},
    241       {"sqcadd_z_zz"_h, &Simulator::Simulate_ZdnT_ZdnT_ZmT_const},
    242       {"sqdmlalb_z_zzz"_h, &Simulator::Simulate_ZdaT_ZnTb_ZmTb},
    243       {"sqdmlalb_z_zzzi_d"_h, &Simulator::Simulate_ZdaD_ZnS_ZmS_imm},
    244       {"sqdmlalb_z_zzzi_s"_h, &Simulator::Simulate_ZdaS_ZnH_ZmH_imm},
    245       {"sqdmlalbt_z_zzz"_h, &Simulator::Simulate_ZdaT_ZnTb_ZmTb},
    246       {"sqdmlalt_z_zzz"_h, &Simulator::Simulate_ZdaT_ZnTb_ZmTb},
    247       {"sqdmlalt_z_zzzi_d"_h, &Simulator::Simulate_ZdaD_ZnS_ZmS_imm},
    248       {"sqdmlalt_z_zzzi_s"_h, &Simulator::Simulate_ZdaS_ZnH_ZmH_imm},
    249       {"sqdmlslb_z_zzz"_h, &Simulator::Simulate_ZdaT_ZnTb_ZmTb},
    250       {"sqdmlslb_z_zzzi_d"_h, &Simulator::Simulate_ZdaD_ZnS_ZmS_imm},
    251       {"sqdmlslb_z_zzzi_s"_h, &Simulator::Simulate_ZdaS_ZnH_ZmH_imm},
    252       {"sqdmlslbt_z_zzz"_h, &Simulator::Simulate_ZdaT_ZnTb_ZmTb},
    253       {"sqdmlslt_z_zzz"_h, &Simulator::Simulate_ZdaT_ZnTb_ZmTb},
    254       {"sqdmlslt_z_zzzi_d"_h, &Simulator::Simulate_ZdaD_ZnS_ZmS_imm},
    255       {"sqdmlslt_z_zzzi_s"_h, &Simulator::Simulate_ZdaS_ZnH_ZmH_imm},
    256       {"sqdmulh_z_zz"_h, &Simulator::Simulate_ZdT_ZnT_ZmT},
    257       {"sqdmulh_z_zzi_d"_h, &Simulator::SimulateSVESaturatingMulHighIndex},
    258       {"sqdmulh_z_zzi_h"_h, &Simulator::SimulateSVESaturatingMulHighIndex},
    259       {"sqdmulh_z_zzi_s"_h, &Simulator::SimulateSVESaturatingMulHighIndex},
    260       {"sqdmullb_z_zz"_h, &Simulator::SimulateSVEIntMulLongVec},
    261       {"sqdmullb_z_zzi_d"_h, &Simulator::SimulateSVESaturatingIntMulLongIdx},
    262       {"sqdmullb_z_zzi_s"_h, &Simulator::SimulateSVESaturatingIntMulLongIdx},
    263       {"sqdmullt_z_zz"_h, &Simulator::SimulateSVEIntMulLongVec},
    264       {"sqdmullt_z_zzi_d"_h, &Simulator::SimulateSVESaturatingIntMulLongIdx},
    265       {"sqdmullt_z_zzi_s"_h, &Simulator::SimulateSVESaturatingIntMulLongIdx},
    266       {"sqneg_z_p_z"_h, &Simulator::Simulate_ZdT_PgM_ZnT},
    267       {"sqrdcmlah_z_zzz"_h, &Simulator::SimulateSVEComplexIntMulAdd},
    268       {"sqrdcmlah_z_zzzi_h"_h, &Simulator::SimulateSVEComplexIntMulAdd},
    269       {"sqrdcmlah_z_zzzi_s"_h, &Simulator::SimulateSVEComplexIntMulAdd},
    270       {"sqrdmlah_z_zzz"_h, &Simulator::SimulateSVESaturatingMulAddHigh},
    271       {"sqrdmlah_z_zzzi_d"_h, &Simulator::SimulateSVESaturatingMulAddHigh},
    272       {"sqrdmlah_z_zzzi_h"_h, &Simulator::SimulateSVESaturatingMulAddHigh},
    273       {"sqrdmlah_z_zzzi_s"_h, &Simulator::SimulateSVESaturatingMulAddHigh},
    274       {"sqrdmlsh_z_zzz"_h, &Simulator::SimulateSVESaturatingMulAddHigh},
    275       {"sqrdmlsh_z_zzzi_d"_h, &Simulator::SimulateSVESaturatingMulAddHigh},
    276       {"sqrdmlsh_z_zzzi_h"_h, &Simulator::SimulateSVESaturatingMulAddHigh},
    277       {"sqrdmlsh_z_zzzi_s"_h, &Simulator::SimulateSVESaturatingMulAddHigh},
    278       {"sqrdmulh_z_zz"_h, &Simulator::Simulate_ZdT_ZnT_ZmT},
    279       {"sqrdmulh_z_zzi_d"_h, &Simulator::SimulateSVESaturatingMulHighIndex},
    280       {"sqrdmulh_z_zzi_h"_h, &Simulator::SimulateSVESaturatingMulHighIndex},
    281       {"sqrdmulh_z_zzi_s"_h, &Simulator::SimulateSVESaturatingMulHighIndex},
    282       {"sqrshl_z_p_zz"_h, &Simulator::VisitSVEBitwiseShiftByVector_Predicated},
    283       {"sqrshlr_z_p_zz"_h, &Simulator::VisitSVEBitwiseShiftByVector_Predicated},
    284       {"sqrshrnb_z_zi"_h, &Simulator::SimulateSVENarrow},
    285       {"sqrshrnt_z_zi"_h, &Simulator::SimulateSVENarrow},
    286       {"sqrshrunb_z_zi"_h, &Simulator::SimulateSVENarrow},
    287       {"sqrshrunt_z_zi"_h, &Simulator::SimulateSVENarrow},
    288       {"sqshl_z_p_zi"_h, &Simulator::Simulate_ZdnT_PgM_ZdnT_const},
    289       {"sqshl_z_p_zz"_h, &Simulator::VisitSVEBitwiseShiftByVector_Predicated},
    290       {"sqshlr_z_p_zz"_h, &Simulator::VisitSVEBitwiseShiftByVector_Predicated},
    291       {"sqshlu_z_p_zi"_h, &Simulator::Simulate_ZdnT_PgM_ZdnT_const},
    292       {"sqshrnb_z_zi"_h, &Simulator::SimulateSVENarrow},
    293       {"sqshrnt_z_zi"_h, &Simulator::SimulateSVENarrow},
    294       {"sqshrunb_z_zi"_h, &Simulator::SimulateSVENarrow},
    295       {"sqshrunt_z_zi"_h, &Simulator::SimulateSVENarrow},
    296       {"sqsub_z_p_zz"_h, &Simulator::SimulateSVESaturatingArithmetic},
    297       {"sqsubr_z_p_zz"_h, &Simulator::SimulateSVESaturatingArithmetic},
    298       {"sqxtnb_z_zz"_h, &Simulator::SimulateSVENarrow},
    299       {"sqxtnt_z_zz"_h, &Simulator::SimulateSVENarrow},
    300       {"sqxtunb_z_zz"_h, &Simulator::SimulateSVENarrow},
    301       {"sqxtunt_z_zz"_h, &Simulator::SimulateSVENarrow},
    302       {"srhadd_z_p_zz"_h, &Simulator::SimulateSVEHalvingAddSub},
    303       {"sri_z_zzi"_h, &Simulator::Simulate_ZdT_ZnT_const},
    304       {"srshl_z_p_zz"_h, &Simulator::VisitSVEBitwiseShiftByVector_Predicated},
    305       {"srshlr_z_p_zz"_h, &Simulator::VisitSVEBitwiseShiftByVector_Predicated},
    306       {"srshr_z_p_zi"_h, &Simulator::Simulate_ZdnT_PgM_ZdnT_const},
    307       {"srsra_z_zi"_h, &Simulator::Simulate_ZdaT_ZnT_const},
    308       {"sshllb_z_zi"_h, &Simulator::SimulateSVEShiftLeftImm},
    309       {"sshllt_z_zi"_h, &Simulator::SimulateSVEShiftLeftImm},
    310       {"ssra_z_zi"_h, &Simulator::Simulate_ZdaT_ZnT_const},
    311       {"ssublb_z_zz"_h, &Simulator::SimulateSVEInterleavedArithLong},
    312       {"ssublbt_z_zz"_h, &Simulator::SimulateSVEInterleavedArithLong},
    313       {"ssublt_z_zz"_h, &Simulator::SimulateSVEInterleavedArithLong},
    314       {"ssubltb_z_zz"_h, &Simulator::SimulateSVEInterleavedArithLong},
    315       {"ssubwb_z_zz"_h, &Simulator::Simulate_ZdT_ZnT_ZmTb},
    316       {"ssubwt_z_zz"_h, &Simulator::Simulate_ZdT_ZnT_ZmTb},
    317       {"stnt1b_z_p_ar_d_64_unscaled"_h, &Simulator::Simulate_ZtD_Pg_ZnD_Xm},
    318       {"stnt1b_z_p_ar_s_x32_unscaled"_h, &Simulator::Simulate_ZtS_Pg_ZnS_Xm},
    319       {"stnt1d_z_p_ar_d_64_unscaled"_h, &Simulator::Simulate_ZtD_Pg_ZnD_Xm},
    320       {"stnt1h_z_p_ar_d_64_unscaled"_h, &Simulator::Simulate_ZtD_Pg_ZnD_Xm},
    321       {"stnt1h_z_p_ar_s_x32_unscaled"_h, &Simulator::Simulate_ZtS_Pg_ZnS_Xm},
    322       {"stnt1w_z_p_ar_d_64_unscaled"_h, &Simulator::Simulate_ZtD_Pg_ZnD_Xm},
    323       {"stnt1w_z_p_ar_s_x32_unscaled"_h, &Simulator::Simulate_ZtS_Pg_ZnS_Xm},
    324       {"subhnb_z_zz"_h, &Simulator::SimulateSVEAddSubHigh},
    325       {"subhnt_z_zz"_h, &Simulator::SimulateSVEAddSubHigh},
    326       {"suqadd_z_p_zz"_h, &Simulator::SimulateSVESaturatingArithmetic},
    327       {"tbl_z_zz_2"_h, &Simulator::VisitSVETableLookup},
    328       {"tbx_z_zz"_h, &Simulator::VisitSVETableLookup},
    329       {"uaba_z_zzz"_h, &Simulator::Simulate_ZdaT_ZnT_ZmT},
    330       {"uabalb_z_zzz"_h, &Simulator::SimulateSVEInterleavedArithLong},
    331       {"uabalt_z_zzz"_h, &Simulator::SimulateSVEInterleavedArithLong},
    332       {"uabdlb_z_zz"_h, &Simulator::SimulateSVEInterleavedArithLong},
    333       {"uabdlt_z_zz"_h, &Simulator::SimulateSVEInterleavedArithLong},
    334       {"uadalp_z_p_z"_h, &Simulator::Simulate_ZdaT_PgM_ZnTb},
    335       {"uaddlb_z_zz"_h, &Simulator::SimulateSVEInterleavedArithLong},
    336       {"uaddlt_z_zz"_h, &Simulator::SimulateSVEInterleavedArithLong},
    337       {"uaddwb_z_zz"_h, &Simulator::Simulate_ZdT_ZnT_ZmTb},
    338       {"uaddwt_z_zz"_h, &Simulator::Simulate_ZdT_ZnT_ZmTb},
    339       {"uhadd_z_p_zz"_h, &Simulator::SimulateSVEHalvingAddSub},
    340       {"uhsub_z_p_zz"_h, &Simulator::SimulateSVEHalvingAddSub},
    341       {"uhsubr_z_p_zz"_h, &Simulator::SimulateSVEHalvingAddSub},
    342       {"umaxp_z_p_zz"_h, &Simulator::SimulateSVEIntArithPair},
    343       {"uminp_z_p_zz"_h, &Simulator::SimulateSVEIntArithPair},
    344       {"umlalb_z_zzz"_h, &Simulator::Simulate_ZdaT_ZnTb_ZmTb},
    345       {"umlalb_z_zzzi_d"_h, &Simulator::SimulateSVESaturatingIntMulLongIdx},
    346       {"umlalb_z_zzzi_s"_h, &Simulator::SimulateSVESaturatingIntMulLongIdx},
    347       {"umlalt_z_zzz"_h, &Simulator::Simulate_ZdaT_ZnTb_ZmTb},
    348       {"umlalt_z_zzzi_d"_h, &Simulator::SimulateSVESaturatingIntMulLongIdx},
    349       {"umlalt_z_zzzi_s"_h, &Simulator::SimulateSVESaturatingIntMulLongIdx},
    350       {"umlslb_z_zzz"_h, &Simulator::Simulate_ZdaT_ZnTb_ZmTb},
    351       {"umlslb_z_zzzi_d"_h, &Simulator::SimulateSVESaturatingIntMulLongIdx},
    352       {"umlslb_z_zzzi_s"_h, &Simulator::SimulateSVESaturatingIntMulLongIdx},
    353       {"umlslt_z_zzz"_h, &Simulator::Simulate_ZdaT_ZnTb_ZmTb},
    354       {"umlslt_z_zzzi_d"_h, &Simulator::SimulateSVESaturatingIntMulLongIdx},
    355       {"umlslt_z_zzzi_s"_h, &Simulator::SimulateSVESaturatingIntMulLongIdx},
    356       {"umulh_z_zz"_h, &Simulator::Simulate_ZdT_ZnT_ZmT},
    357       {"umullb_z_zz"_h, &Simulator::SimulateSVEIntMulLongVec},
    358       {"umullb_z_zzi_d"_h, &Simulator::SimulateSVESaturatingIntMulLongIdx},
    359       {"umullb_z_zzi_s"_h, &Simulator::SimulateSVESaturatingIntMulLongIdx},
    360       {"umullt_z_zz"_h, &Simulator::SimulateSVEIntMulLongVec},
    361       {"umullt_z_zzi_d"_h, &Simulator::SimulateSVESaturatingIntMulLongIdx},
    362       {"umullt_z_zzi_s"_h, &Simulator::SimulateSVESaturatingIntMulLongIdx},
    363       {"uqadd_z_p_zz"_h, &Simulator::SimulateSVESaturatingArithmetic},
    364       {"uqrshl_z_p_zz"_h, &Simulator::VisitSVEBitwiseShiftByVector_Predicated},
    365       {"uqrshlr_z_p_zz"_h, &Simulator::VisitSVEBitwiseShiftByVector_Predicated},
    366       {"uqrshrnb_z_zi"_h, &Simulator::SimulateSVENarrow},
    367       {"uqrshrnt_z_zi"_h, &Simulator::SimulateSVENarrow},
    368       {"uqshl_z_p_zi"_h, &Simulator::Simulate_ZdnT_PgM_ZdnT_const},
    369       {"uqshl_z_p_zz"_h, &Simulator::VisitSVEBitwiseShiftByVector_Predicated},
    370       {"uqshlr_z_p_zz"_h, &Simulator::VisitSVEBitwiseShiftByVector_Predicated},
    371       {"uqshrnb_z_zi"_h, &Simulator::SimulateSVENarrow},
    372       {"uqshrnt_z_zi"_h, &Simulator::SimulateSVENarrow},
    373       {"uqsub_z_p_zz"_h, &Simulator::SimulateSVESaturatingArithmetic},
    374       {"uqsubr_z_p_zz"_h, &Simulator::SimulateSVESaturatingArithmetic},
    375       {"uqxtnb_z_zz"_h, &Simulator::SimulateSVENarrow},
    376       {"uqxtnt_z_zz"_h, &Simulator::SimulateSVENarrow},
    377       {"urecpe_z_p_z"_h, &Simulator::Simulate_ZdS_PgM_ZnS},
    378       {"urhadd_z_p_zz"_h, &Simulator::SimulateSVEHalvingAddSub},
    379       {"urshl_z_p_zz"_h, &Simulator::VisitSVEBitwiseShiftByVector_Predicated},
    380       {"urshlr_z_p_zz"_h, &Simulator::VisitSVEBitwiseShiftByVector_Predicated},
    381       {"urshr_z_p_zi"_h, &Simulator::Simulate_ZdnT_PgM_ZdnT_const},
    382       {"ursqrte_z_p_z"_h, &Simulator::Simulate_ZdS_PgM_ZnS},
    383       {"ursra_z_zi"_h, &Simulator::Simulate_ZdaT_ZnT_const},
    384       {"ushllb_z_zi"_h, &Simulator::SimulateSVEShiftLeftImm},
    385       {"ushllt_z_zi"_h, &Simulator::SimulateSVEShiftLeftImm},
    386       {"usqadd_z_p_zz"_h, &Simulator::SimulateSVESaturatingArithmetic},
    387       {"usra_z_zi"_h, &Simulator::Simulate_ZdaT_ZnT_const},
    388       {"usublb_z_zz"_h, &Simulator::SimulateSVEInterleavedArithLong},
    389       {"usublt_z_zz"_h, &Simulator::SimulateSVEInterleavedArithLong},
    390       {"usubwb_z_zz"_h, &Simulator::Simulate_ZdT_ZnT_ZmTb},
    391       {"usubwt_z_zz"_h, &Simulator::Simulate_ZdT_ZnT_ZmTb},
    392       {"whilege_p_p_rr"_h, &Simulator::VisitSVEIntCompareScalarCountAndLimit},
    393       {"whilegt_p_p_rr"_h, &Simulator::VisitSVEIntCompareScalarCountAndLimit},
    394       {"whilehi_p_p_rr"_h, &Simulator::VisitSVEIntCompareScalarCountAndLimit},
    395       {"whilehs_p_p_rr"_h, &Simulator::VisitSVEIntCompareScalarCountAndLimit},
    396       {"whilerw_p_rr"_h, &Simulator::Simulate_PdT_Xn_Xm},
    397       {"whilewr_p_rr"_h, &Simulator::Simulate_PdT_Xn_Xm},
    398       {"xar_z_zzi"_h, &Simulator::SimulateSVEExclusiveOrRotate},
    399       {"smmla_z_zzz"_h, &Simulator::SimulateMatrixMul},
    400       {"ummla_z_zzz"_h, &Simulator::SimulateMatrixMul},
    401       {"usmmla_z_zzz"_h, &Simulator::SimulateMatrixMul},
    402       {"smmla_asimdsame2_g"_h, &Simulator::SimulateMatrixMul},
    403       {"ummla_asimdsame2_g"_h, &Simulator::SimulateMatrixMul},
    404       {"usmmla_asimdsame2_g"_h, &Simulator::SimulateMatrixMul},
    405       {"fmmla_z_zzz_s"_h, &Simulator::SimulateSVEFPMatrixMul},
    406       {"fmmla_z_zzz_d"_h, &Simulator::SimulateSVEFPMatrixMul},
    407       {"ld1row_z_p_bi_u32"_h,
    408        &Simulator::VisitSVELoadAndBroadcastQOWord_ScalarPlusImm},
    409       {"ld1row_z_p_br_contiguous"_h,
    410        &Simulator::VisitSVELoadAndBroadcastQOWord_ScalarPlusScalar},
    411       {"ld1rod_z_p_bi_u64"_h,
    412        &Simulator::VisitSVELoadAndBroadcastQOWord_ScalarPlusImm},
    413       {"ld1rod_z_p_br_contiguous"_h,
    414        &Simulator::VisitSVELoadAndBroadcastQOWord_ScalarPlusScalar},
    415       {"ld1rob_z_p_bi_u8"_h,
    416        &Simulator::VisitSVELoadAndBroadcastQOWord_ScalarPlusImm},
    417       {"ld1rob_z_p_br_contiguous"_h,
    418        &Simulator::VisitSVELoadAndBroadcastQOWord_ScalarPlusScalar},
    419       {"ld1roh_z_p_bi_u16"_h,
    420        &Simulator::VisitSVELoadAndBroadcastQOWord_ScalarPlusImm},
    421       {"ld1roh_z_p_br_contiguous"_h,
    422        &Simulator::VisitSVELoadAndBroadcastQOWord_ScalarPlusScalar},
    423       {"usdot_z_zzz_s"_h, &Simulator::VisitSVEIntMulAddUnpredicated},
    424       {"sudot_z_zzzi_s"_h, &Simulator::VisitSVEMulIndex},
    425       {"usdot_z_zzzi_s"_h, &Simulator::VisitSVEMulIndex},
    426       {"usdot_asimdsame2_d"_h, &Simulator::VisitNEON3SameExtra},
    427       {"sudot_asimdelem_d"_h, &Simulator::SimulateNEONDotProdByElement},
    428       {"usdot_asimdelem_d"_h, &Simulator::SimulateNEONDotProdByElement},
    429       {"addg_64_addsub_immtags"_h, &Simulator::SimulateMTEAddSubTag},
    430       {"gmi_64g_dp_2src"_h, &Simulator::SimulateMTETagMaskInsert},
    431       {"irg_64i_dp_2src"_h, &Simulator::Simulate_XdSP_XnSP_Xm},
    432       {"ldg_64loffset_ldsttags"_h, &Simulator::SimulateMTELoadTag},
    433       {"st2g_64soffset_ldsttags"_h, &Simulator::Simulator::SimulateMTEStoreTag},
    434       {"st2g_64spost_ldsttags"_h, &Simulator::Simulator::SimulateMTEStoreTag},
    435       {"st2g_64spre_ldsttags"_h, &Simulator::Simulator::SimulateMTEStoreTag},
    436       {"stgp_64_ldstpair_off"_h, &Simulator::SimulateMTEStoreTagPair},
    437       {"stgp_64_ldstpair_post"_h, &Simulator::SimulateMTEStoreTagPair},
    438       {"stgp_64_ldstpair_pre"_h, &Simulator::SimulateMTEStoreTagPair},
    439       {"stg_64soffset_ldsttags"_h, &Simulator::Simulator::SimulateMTEStoreTag},
    440       {"stg_64spost_ldsttags"_h, &Simulator::Simulator::SimulateMTEStoreTag},
    441       {"stg_64spre_ldsttags"_h, &Simulator::Simulator::SimulateMTEStoreTag},
    442       {"stz2g_64soffset_ldsttags"_h,
    443        &Simulator::Simulator::SimulateMTEStoreTag},
    444       {"stz2g_64spost_ldsttags"_h, &Simulator::Simulator::SimulateMTEStoreTag},
    445       {"stz2g_64spre_ldsttags"_h, &Simulator::Simulator::SimulateMTEStoreTag},
    446       {"stzg_64soffset_ldsttags"_h, &Simulator::Simulator::SimulateMTEStoreTag},
    447       {"stzg_64spost_ldsttags"_h, &Simulator::Simulator::SimulateMTEStoreTag},
    448       {"stzg_64spre_ldsttags"_h, &Simulator::Simulator::SimulateMTEStoreTag},
    449       {"subg_64_addsub_immtags"_h, &Simulator::SimulateMTEAddSubTag},
    450       {"subps_64s_dp_2src"_h, &Simulator::SimulateMTESubPointer},
    451       {"subp_64s_dp_2src"_h, &Simulator::SimulateMTESubPointer},
    452       {"cpyen_cpy_memcms"_h, &Simulator::SimulateCpyE},
    453       {"cpyern_cpy_memcms"_h, &Simulator::SimulateCpyE},
    454       {"cpyewn_cpy_memcms"_h, &Simulator::SimulateCpyE},
    455       {"cpye_cpy_memcms"_h, &Simulator::SimulateCpyE},
    456       {"cpyfen_cpy_memcms"_h, &Simulator::SimulateCpyE},
    457       {"cpyfern_cpy_memcms"_h, &Simulator::SimulateCpyE},
    458       {"cpyfewn_cpy_memcms"_h, &Simulator::SimulateCpyE},
    459       {"cpyfe_cpy_memcms"_h, &Simulator::SimulateCpyE},
    460       {"cpyfmn_cpy_memcms"_h, &Simulator::SimulateCpyM},
    461       {"cpyfmrn_cpy_memcms"_h, &Simulator::SimulateCpyM},
    462       {"cpyfmwn_cpy_memcms"_h, &Simulator::SimulateCpyM},
    463       {"cpyfm_cpy_memcms"_h, &Simulator::SimulateCpyM},
    464       {"cpyfpn_cpy_memcms"_h, &Simulator::SimulateCpyFP},
    465       {"cpyfprn_cpy_memcms"_h, &Simulator::SimulateCpyFP},
    466       {"cpyfpwn_cpy_memcms"_h, &Simulator::SimulateCpyFP},
    467       {"cpyfp_cpy_memcms"_h, &Simulator::SimulateCpyFP},
    468       {"cpymn_cpy_memcms"_h, &Simulator::SimulateCpyM},
    469       {"cpymrn_cpy_memcms"_h, &Simulator::SimulateCpyM},
    470       {"cpymwn_cpy_memcms"_h, &Simulator::SimulateCpyM},
    471       {"cpym_cpy_memcms"_h, &Simulator::SimulateCpyM},
    472       {"cpypn_cpy_memcms"_h, &Simulator::SimulateCpyP},
    473       {"cpyprn_cpy_memcms"_h, &Simulator::SimulateCpyP},
    474       {"cpypwn_cpy_memcms"_h, &Simulator::SimulateCpyP},
    475       {"cpyp_cpy_memcms"_h, &Simulator::SimulateCpyP},
    476       {"setp_set_memcms"_h, &Simulator::SimulateSetP},
    477       {"setpn_set_memcms"_h, &Simulator::SimulateSetP},
    478       {"setgp_set_memcms"_h, &Simulator::SimulateSetGP},
    479       {"setgpn_set_memcms"_h, &Simulator::SimulateSetGP},
    480       {"setm_set_memcms"_h, &Simulator::SimulateSetM},
    481       {"setmn_set_memcms"_h, &Simulator::SimulateSetM},
    482       {"setgm_set_memcms"_h, &Simulator::SimulateSetGM},
    483       {"setgmn_set_memcms"_h, &Simulator::SimulateSetGM},
    484       {"sete_set_memcms"_h, &Simulator::SimulateSetE},
    485       {"seten_set_memcms"_h, &Simulator::SimulateSetE},
    486       {"setge_set_memcms"_h, &Simulator::SimulateSetE},
    487       {"setgen_set_memcms"_h, &Simulator::SimulateSetE},
    488       {"abs_32_dp_1src"_h, &Simulator::VisitDataProcessing1Source},
    489       {"abs_64_dp_1src"_h, &Simulator::VisitDataProcessing1Source},
    490       {"cnt_32_dp_1src"_h, &Simulator::VisitDataProcessing1Source},
    491       {"cnt_64_dp_1src"_h, &Simulator::VisitDataProcessing1Source},
    492       {"ctz_32_dp_1src"_h, &Simulator::VisitDataProcessing1Source},
    493       {"ctz_64_dp_1src"_h, &Simulator::VisitDataProcessing1Source},
    494       {"smax_32_dp_2src"_h, &Simulator::SimulateSignedMinMax},
    495       {"smax_64_dp_2src"_h, &Simulator::SimulateSignedMinMax},
    496       {"smin_32_dp_2src"_h, &Simulator::SimulateSignedMinMax},
    497       {"smin_64_dp_2src"_h, &Simulator::SimulateSignedMinMax},
    498       {"smax_32_minmax_imm"_h, &Simulator::SimulateSignedMinMax},
    499       {"smax_64_minmax_imm"_h, &Simulator::SimulateSignedMinMax},
    500       {"smin_32_minmax_imm"_h, &Simulator::SimulateSignedMinMax},
    501       {"smin_64_minmax_imm"_h, &Simulator::SimulateSignedMinMax},
    502       {"umax_32_dp_2src"_h, &Simulator::SimulateUnsignedMinMax},
    503       {"umax_64_dp_2src"_h, &Simulator::SimulateUnsignedMinMax},
    504       {"umin_32_dp_2src"_h, &Simulator::SimulateUnsignedMinMax},
    505       {"umin_64_dp_2src"_h, &Simulator::SimulateUnsignedMinMax},
    506       {"umax_32u_minmax_imm"_h, &Simulator::SimulateUnsignedMinMax},
    507       {"umax_64u_minmax_imm"_h, &Simulator::SimulateUnsignedMinMax},
    508       {"umin_32u_minmax_imm"_h, &Simulator::SimulateUnsignedMinMax},
    509       {"umin_64u_minmax_imm"_h, &Simulator::SimulateUnsignedMinMax},
    510   };
    511   return &form_to_visitor;
    512 }
    513 
    514 // Try to access the piece of memory given by the address passed in RDI and the
    515 // offset passed in RSI, using testb. If a signal is raised then the signal
    516 // handler should set RIP to _vixl_internal_AccessMemory_continue and RAX to
    517 // MemoryAccessResult::Failure. If no signal is raised then zero RAX before
    518 // returning.
    519 #ifdef VIXL_ENABLE_IMPLICIT_CHECKS
    520 #ifdef __x86_64__
    521 asm(R"(
    522   .globl _vixl_internal_ReadMemory
    523   _vixl_internal_ReadMemory:
    524     testb (%rdi, %rsi), %al
    525     xorq %rax, %rax
    526     ret
    527   .globl _vixl_internal_AccessMemory_continue
    528   _vixl_internal_AccessMemory_continue:
    529     ret
    530 )");
    531 #else
    532 asm(R"(
    533   .globl _vixl_internal_ReadMemory
    534   _vixl_internal_ReadMemory:
    535     ret
    536 )");
    537 #endif  // __x86_64__
    538 #endif  // VIXL_ENABLE_IMPLICIT_CHECKS
    539 
    540 Simulator::Simulator(Decoder* decoder, FILE* stream, SimStack::Allocated stack)
    541     : memory_(std::move(stack)),
    542       last_instr_(NULL),
    543       cpu_features_auditor_(decoder, CPUFeatures::All()) {
    544   // Ensure that shift operations act as the simulator expects.
    545   VIXL_ASSERT((static_cast<int32_t>(-1) >> 1) == -1);
    546   VIXL_ASSERT((static_cast<uint32_t>(-1) >> 1) == 0x7fffffff);
    547 
    548   // Set up a placeholder pipe for CanReadMemory.
    549   VIXL_CHECK(pipe(placeholder_pipe_fd_) == 0);
    550 
    551   // Set up the decoder.
    552   decoder_ = decoder;
    553   decoder_->AppendVisitor(this);
    554 
    555   stream_ = stream;
    556 
    557   print_disasm_ = new PrintDisassembler(stream_);
    558 
    559   memory_.AppendMetaData(&meta_data_);
    560 
    561   // The Simulator and Disassembler share the same available list, held by the
    562   // auditor. The Disassembler only annotates instructions with features that
    563   // are _not_ available, so registering the auditor should have no effect
    564   // unless the simulator is about to abort (due to missing features). In
    565   // practice, this means that with trace enabled, the simulator will crash just
    566   // after the disassembler prints the instruction, with the missing features
    567   // enumerated.
    568   print_disasm_->RegisterCPUFeaturesAuditor(&cpu_features_auditor_);
    569 
    570   SetColouredTrace(false);
    571   trace_parameters_ = LOG_NONE;
    572 
    573   // We have to configure the SVE vector register length before calling
    574   // ResetState().
    575   SetVectorLengthInBits(kZRegMinSize);
    576 
    577   ResetState();
    578 
    579   // Print a warning about exclusive-access instructions, but only the first
    580   // time they are encountered. This warning can be silenced using
    581   // SilenceExclusiveAccessWarning().
    582   print_exclusive_access_warning_ = true;
    583 
    584   guard_pages_ = false;
    585 
    586   // Initialize the common state of RNDR and RNDRRS.
    587   uint16_t seed[3] = {11, 22, 33};
    588   VIXL_STATIC_ASSERT(sizeof(seed) == sizeof(rand_state_));
    589   memcpy(rand_state_, seed, sizeof(rand_state_));
    590 
    591   // Initialize all bits of pseudo predicate register to true.
    592   LogicPRegister ones(pregister_all_true_);
    593   ones.SetAllBits();
    594 
    595   // Initialize the debugger but disable it by default.
    596   SetDebuggerEnabled(false);
    597   debugger_ = std::make_unique<Debugger>(this);
    598 }
    599 
    600 void Simulator::ResetSystemRegisters() {
    601   // Reset the system registers.
    602   nzcv_ = SimSystemRegister::DefaultValueFor(NZCV);
    603   fpcr_ = SimSystemRegister::DefaultValueFor(FPCR);
    604   ResetFFR();
    605 }
    606 
    607 void Simulator::ResetRegisters() {
    608   for (unsigned i = 0; i < kNumberOfRegisters; i++) {
    609     WriteXRegister(i, 0xbadbeef);
    610   }
    611   // Returning to address 0 exits the Simulator.
    612   WriteLr(kEndOfSimAddress);
    613 }
    614 
    615 void Simulator::ResetVRegisters() {
    616   // Set SVE/FP registers to a value that is a NaN in both 32-bit and 64-bit FP.
    617   VIXL_ASSERT((GetVectorLengthInBytes() % kDRegSizeInBytes) == 0);
    618   int lane_count = GetVectorLengthInBytes() / kDRegSizeInBytes;
    619   for (unsigned i = 0; i < kNumberOfZRegisters; i++) {
    620     VIXL_ASSERT(vregisters_[i].GetSizeInBytes() == GetVectorLengthInBytes());
    621     vregisters_[i].NotifyAccessAsZ();
    622     for (int lane = 0; lane < lane_count; lane++) {
    623       // Encode the register number and (D-sized) lane into each NaN, to
    624       // make them easier to trace.
    625       uint64_t nan_bits = 0x7ff0f0007f80f000 | (0x0000000100000000 * i) |
    626                           (0x0000000000000001 * lane);
    627       VIXL_ASSERT(IsSignallingNaN(RawbitsToDouble(nan_bits & kDRegMask)));
    628       VIXL_ASSERT(IsSignallingNaN(RawbitsToFloat(nan_bits & kSRegMask)));
    629       vregisters_[i].Insert(lane, nan_bits);
    630     }
    631   }
    632 }
    633 
    634 void Simulator::ResetPRegisters() {
    635   VIXL_ASSERT((GetPredicateLengthInBytes() % kHRegSizeInBytes) == 0);
    636   int lane_count = GetPredicateLengthInBytes() / kHRegSizeInBytes;
    637   // Ensure the register configuration fits in this bit encoding.
    638   VIXL_STATIC_ASSERT(kNumberOfPRegisters <= UINT8_MAX);
    639   VIXL_ASSERT(lane_count <= UINT8_MAX);
    640   for (unsigned i = 0; i < kNumberOfPRegisters; i++) {
    641     VIXL_ASSERT(pregisters_[i].GetSizeInBytes() == GetPredicateLengthInBytes());
    642     for (int lane = 0; lane < lane_count; lane++) {
    643       // Encode the register number and (H-sized) lane into each lane slot.
    644       uint16_t bits = (0x0100 * lane) | i;
    645       pregisters_[i].Insert(lane, bits);
    646     }
    647   }
    648 }
    649 
    650 void Simulator::ResetFFR() {
    651   VIXL_ASSERT((GetPredicateLengthInBytes() % kHRegSizeInBytes) == 0);
    652   int default_active_lanes = GetPredicateLengthInBytes() / kHRegSizeInBytes;
    653   ffr_register_.Write(static_cast<uint16_t>(GetUintMask(default_active_lanes)));
    654 }
    655 
    656 void Simulator::ResetState() {
    657   ResetSystemRegisters();
    658   ResetRegisters();
    659   ResetVRegisters();
    660   ResetPRegisters();
    661 
    662   WriteSp(memory_.GetStack().GetBase());
    663 
    664   pc_ = NULL;
    665   pc_modified_ = false;
    666 
    667   // BTI state.
    668   btype_ = DefaultBType;
    669   next_btype_ = DefaultBType;
    670 
    671   meta_data_.ResetState();
    672 }
    673 
    674 void Simulator::SetVectorLengthInBits(unsigned vector_length) {
    675   VIXL_ASSERT((vector_length >= kZRegMinSize) &&
    676               (vector_length <= kZRegMaxSize));
    677   VIXL_ASSERT((vector_length % kZRegMinSize) == 0);
    678   vector_length_ = vector_length;
    679 
    680   for (unsigned i = 0; i < kNumberOfZRegisters; i++) {
    681     vregisters_[i].SetSizeInBytes(GetVectorLengthInBytes());
    682   }
    683   for (unsigned i = 0; i < kNumberOfPRegisters; i++) {
    684     pregisters_[i].SetSizeInBytes(GetPredicateLengthInBytes());
    685   }
    686 
    687   ffr_register_.SetSizeInBytes(GetPredicateLengthInBytes());
    688 
    689   ResetVRegisters();
    690   ResetPRegisters();
    691   ResetFFR();
    692 }
    693 
    694 Simulator::~Simulator() {
    695   // The decoder may outlive the simulator.
    696   decoder_->RemoveVisitor(print_disasm_);
    697   delete print_disasm_;
    698   close(placeholder_pipe_fd_[0]);
    699   close(placeholder_pipe_fd_[1]);
    700 }
    701 
    702 
    703 void Simulator::Run() {
    704   // Flush any written registers before executing anything, so that
    705   // manually-set registers are logged _before_ the first instruction.
    706   LogAllWrittenRegisters();
    707 
    708   if (debugger_enabled_) {
    709     // Slow path to check for breakpoints only if the debugger is enabled.
    710     Debugger* debugger = GetDebugger();
    711     while (!IsSimulationFinished()) {
    712       if (debugger->IsAtBreakpoint()) {
    713         fprintf(stream_, "Debugger hit breakpoint, breaking...\n");
    714         debugger->Debug();
    715       } else {
    716         ExecuteInstruction();
    717       }
    718     }
    719   } else {
    720     while (!IsSimulationFinished()) {
    721       ExecuteInstruction();
    722     }
    723   }
    724 }
    725 
    726 
    727 void Simulator::RunFrom(const Instruction* first) {
    728   WritePc(first, NoBranchLog);
    729   Run();
    730 }
    731 
    732 
    733 // clang-format off
    734 const char* Simulator::xreg_names[] = {"x0",  "x1",  "x2",  "x3",  "x4",  "x5",
    735                                        "x6",  "x7",  "x8",  "x9",  "x10", "x11",
    736                                        "x12", "x13", "x14", "x15", "x16", "x17",
    737                                        "x18", "x19", "x20", "x21", "x22", "x23",
    738                                        "x24", "x25", "x26", "x27", "x28", "x29",
    739                                        "lr",  "xzr", "sp"};
    740 
    741 const char* Simulator::wreg_names[] = {"w0",  "w1",  "w2",  "w3",  "w4",  "w5",
    742                                        "w6",  "w7",  "w8",  "w9",  "w10", "w11",
    743                                        "w12", "w13", "w14", "w15", "w16", "w17",
    744                                        "w18", "w19", "w20", "w21", "w22", "w23",
    745                                        "w24", "w25", "w26", "w27", "w28", "w29",
    746                                        "w30", "wzr", "wsp"};
    747 
    748 const char* Simulator::breg_names[] = {"b0",  "b1",  "b2",  "b3",  "b4",  "b5",
    749                                        "b6",  "b7",  "b8",  "b9",  "b10", "b11",
    750                                        "b12", "b13", "b14", "b15", "b16", "b17",
    751                                        "b18", "b19", "b20", "b21", "b22", "b23",
    752                                        "b24", "b25", "b26", "b27", "b28", "b29",
    753                                        "b30", "b31"};
    754 
    755 const char* Simulator::hreg_names[] = {"h0",  "h1",  "h2",  "h3",  "h4",  "h5",
    756                                        "h6",  "h7",  "h8",  "h9",  "h10", "h11",
    757                                        "h12", "h13", "h14", "h15", "h16", "h17",
    758                                        "h18", "h19", "h20", "h21", "h22", "h23",
    759                                        "h24", "h25", "h26", "h27", "h28", "h29",
    760                                        "h30", "h31"};
    761 
    762 const char* Simulator::sreg_names[] = {"s0",  "s1",  "s2",  "s3",  "s4",  "s5",
    763                                        "s6",  "s7",  "s8",  "s9",  "s10", "s11",
    764                                        "s12", "s13", "s14", "s15", "s16", "s17",
    765                                        "s18", "s19", "s20", "s21", "s22", "s23",
    766                                        "s24", "s25", "s26", "s27", "s28", "s29",
    767                                        "s30", "s31"};
    768 
    769 const char* Simulator::dreg_names[] = {"d0",  "d1",  "d2",  "d3",  "d4",  "d5",
    770                                        "d6",  "d7",  "d8",  "d9",  "d10", "d11",
    771                                        "d12", "d13", "d14", "d15", "d16", "d17",
    772                                        "d18", "d19", "d20", "d21", "d22", "d23",
    773                                        "d24", "d25", "d26", "d27", "d28", "d29",
    774                                        "d30", "d31"};
    775 
    776 const char* Simulator::vreg_names[] = {"v0",  "v1",  "v2",  "v3",  "v4",  "v5",
    777                                        "v6",  "v7",  "v8",  "v9",  "v10", "v11",
    778                                        "v12", "v13", "v14", "v15", "v16", "v17",
    779                                        "v18", "v19", "v20", "v21", "v22", "v23",
    780                                        "v24", "v25", "v26", "v27", "v28", "v29",
    781                                        "v30", "v31"};
    782 
    783 const char* Simulator::zreg_names[] = {"z0",  "z1",  "z2",  "z3",  "z4",  "z5",
    784                                        "z6",  "z7",  "z8",  "z9",  "z10", "z11",
    785                                        "z12", "z13", "z14", "z15", "z16", "z17",
    786                                        "z18", "z19", "z20", "z21", "z22", "z23",
    787                                        "z24", "z25", "z26", "z27", "z28", "z29",
    788                                        "z30", "z31"};
    789 
    790 const char* Simulator::preg_names[] = {"p0",  "p1",  "p2",  "p3",  "p4",  "p5",
    791                                        "p6",  "p7",  "p8",  "p9",  "p10", "p11",
    792                                        "p12", "p13", "p14", "p15"};
    793 // clang-format on
    794 
    795 
    796 const char* Simulator::WRegNameForCode(unsigned code, Reg31Mode mode) {
    797   // If the code represents the stack pointer, index the name after zr.
    798   if ((code == kSPRegInternalCode) ||
    799       ((code == kZeroRegCode) && (mode == Reg31IsStackPointer))) {
    800     code = kZeroRegCode + 1;
    801   }
    802   VIXL_ASSERT(code < ArrayLength(wreg_names));
    803   return wreg_names[code];
    804 }
    805 
    806 
    807 const char* Simulator::XRegNameForCode(unsigned code, Reg31Mode mode) {
    808   // If the code represents the stack pointer, index the name after zr.
    809   if ((code == kSPRegInternalCode) ||
    810       ((code == kZeroRegCode) && (mode == Reg31IsStackPointer))) {
    811     code = kZeroRegCode + 1;
    812   }
    813   VIXL_ASSERT(code < ArrayLength(xreg_names));
    814   return xreg_names[code];
    815 }
    816 
    817 
    818 const char* Simulator::BRegNameForCode(unsigned code) {
    819   VIXL_ASSERT(code < kNumberOfVRegisters);
    820   return breg_names[code];
    821 }
    822 
    823 
    824 const char* Simulator::HRegNameForCode(unsigned code) {
    825   VIXL_ASSERT(code < kNumberOfVRegisters);
    826   return hreg_names[code];
    827 }
    828 
    829 
    830 const char* Simulator::SRegNameForCode(unsigned code) {
    831   VIXL_ASSERT(code < kNumberOfVRegisters);
    832   return sreg_names[code];
    833 }
    834 
    835 
    836 const char* Simulator::DRegNameForCode(unsigned code) {
    837   VIXL_ASSERT(code < kNumberOfVRegisters);
    838   return dreg_names[code];
    839 }
    840 
    841 
    842 const char* Simulator::VRegNameForCode(unsigned code) {
    843   VIXL_ASSERT(code < kNumberOfVRegisters);
    844   return vreg_names[code];
    845 }
    846 
    847 
    848 const char* Simulator::ZRegNameForCode(unsigned code) {
    849   VIXL_ASSERT(code < kNumberOfZRegisters);
    850   return zreg_names[code];
    851 }
    852 
    853 
    854 const char* Simulator::PRegNameForCode(unsigned code) {
    855   VIXL_ASSERT(code < kNumberOfPRegisters);
    856   return preg_names[code];
    857 }
    858 
    859 SimVRegister Simulator::ExpandToSimVRegister(const SimPRegister& pg) {
    860   SimVRegister ones, result;
    861   dup_immediate(kFormatVnB, ones, 0xff);
    862   mov_zeroing(kFormatVnB, result, pg, ones);
    863   return result;
    864 }
    865 
    866 void Simulator::ExtractFromSimVRegister(VectorFormat vform,
    867                                         SimPRegister& pd,
    868                                         SimVRegister vreg) {
    869   SimVRegister zero;
    870   dup_immediate(kFormatVnB, zero, 0);
    871   SVEIntCompareVectorsHelper(ne,
    872                              vform,
    873                              pd,
    874                              GetPTrue(),
    875                              vreg,
    876                              zero,
    877                              false,
    878                              LeaveFlags);
    879 }
    880 
    881 #define COLOUR(colour_code) "\033[0;" colour_code "m"
    882 #define COLOUR_BOLD(colour_code) "\033[1;" colour_code "m"
    883 #define COLOUR_HIGHLIGHT "\033[43m"
    884 #define NORMAL ""
    885 #define GREY "30"
    886 #define RED "31"
    887 #define GREEN "32"
    888 #define YELLOW "33"
    889 #define BLUE "34"
    890 #define MAGENTA "35"
    891 #define CYAN "36"
    892 #define WHITE "37"
    893 void Simulator::SetColouredTrace(bool value) {
    894   coloured_trace_ = value;
    895 
    896   clr_normal = value ? COLOUR(NORMAL) : "";
    897   clr_flag_name = value ? COLOUR_BOLD(WHITE) : "";
    898   clr_flag_value = value ? COLOUR(NORMAL) : "";
    899   clr_reg_name = value ? COLOUR_BOLD(CYAN) : "";
    900   clr_reg_value = value ? COLOUR(CYAN) : "";
    901   clr_vreg_name = value ? COLOUR_BOLD(MAGENTA) : "";
    902   clr_vreg_value = value ? COLOUR(MAGENTA) : "";
    903   clr_preg_name = value ? COLOUR_BOLD(GREEN) : "";
    904   clr_preg_value = value ? COLOUR(GREEN) : "";
    905   clr_memory_address = value ? COLOUR_BOLD(BLUE) : "";
    906   clr_warning = value ? COLOUR_BOLD(YELLOW) : "";
    907   clr_warning_message = value ? COLOUR(YELLOW) : "";
    908   clr_printf = value ? COLOUR(GREEN) : "";
    909   clr_branch_marker = value ? COLOUR(GREY) COLOUR_HIGHLIGHT : "";
    910 
    911   if (value) {
    912     print_disasm_->SetCPUFeaturesPrefix("// Needs: " COLOUR_BOLD(RED));
    913     print_disasm_->SetCPUFeaturesSuffix(COLOUR(NORMAL));
    914   } else {
    915     print_disasm_->SetCPUFeaturesPrefix("// Needs: ");
    916     print_disasm_->SetCPUFeaturesSuffix("");
    917   }
    918 }
    919 
    920 
    921 void Simulator::SetTraceParameters(int parameters) {
    922   bool disasm_before = trace_parameters_ & LOG_DISASM;
    923   trace_parameters_ = parameters;
    924   bool disasm_after = trace_parameters_ & LOG_DISASM;
    925 
    926   if (disasm_before != disasm_after) {
    927     if (disasm_after) {
    928       decoder_->InsertVisitorBefore(print_disasm_, this);
    929     } else {
    930       decoder_->RemoveVisitor(print_disasm_);
    931     }
    932   }
    933 }
    934 
    935 // Helpers ---------------------------------------------------------------------
    936 uint64_t Simulator::AddWithCarry(unsigned reg_size,
    937                                  bool set_flags,
    938                                  uint64_t left,
    939                                  uint64_t right,
    940                                  int carry_in) {
    941   std::pair<uint64_t, uint8_t> result_and_flags =
    942       AddWithCarry(reg_size, left, right, carry_in);
    943   if (set_flags) {
    944     uint8_t flags = result_and_flags.second;
    945     ReadNzcv().SetN((flags >> 3) & 1);
    946     ReadNzcv().SetZ((flags >> 2) & 1);
    947     ReadNzcv().SetC((flags >> 1) & 1);
    948     ReadNzcv().SetV((flags >> 0) & 1);
    949     LogSystemRegister(NZCV);
    950   }
    951   return result_and_flags.first;
    952 }
    953 
    954 std::pair<uint64_t, uint8_t> Simulator::AddWithCarry(unsigned reg_size,
    955                                                      uint64_t left,
    956                                                      uint64_t right,
    957                                                      int carry_in) {
    958   VIXL_ASSERT((carry_in == 0) || (carry_in == 1));
    959   VIXL_ASSERT((reg_size == kXRegSize) || (reg_size == kWRegSize));
    960 
    961   uint64_t max_uint = (reg_size == kWRegSize) ? kWMaxUInt : kXMaxUInt;
    962   uint64_t reg_mask = (reg_size == kWRegSize) ? kWRegMask : kXRegMask;
    963   uint64_t sign_mask = (reg_size == kWRegSize) ? kWSignMask : kXSignMask;
    964 
    965   left &= reg_mask;
    966   right &= reg_mask;
    967   uint64_t result = (left + right + carry_in) & reg_mask;
    968 
    969   // NZCV bits, ordered N in bit 3 to V in bit 0.
    970   uint8_t nzcv = CalcNFlag(result, reg_size) ? 8 : 0;
    971   nzcv |= CalcZFlag(result) ? 4 : 0;
    972 
    973   // Compute the C flag by comparing the result to the max unsigned integer.
    974   uint64_t max_uint_2op = max_uint - carry_in;
    975   bool C = (left > max_uint_2op) || ((max_uint_2op - left) < right);
    976   nzcv |= C ? 2 : 0;
    977 
    978   // Overflow iff the sign bit is the same for the two inputs and different
    979   // for the result.
    980   uint64_t left_sign = left & sign_mask;
    981   uint64_t right_sign = right & sign_mask;
    982   uint64_t result_sign = result & sign_mask;
    983   bool V = (left_sign == right_sign) && (left_sign != result_sign);
    984   nzcv |= V ? 1 : 0;
    985 
    986   return std::make_pair(result, nzcv);
    987 }
    988 
    989 using vixl_uint128_t = std::pair<uint64_t, uint64_t>;
    990 
    991 vixl_uint128_t Simulator::Add128(vixl_uint128_t x, vixl_uint128_t y) {
    992   std::pair<uint64_t, uint8_t> sum_lo =
    993       AddWithCarry(kXRegSize, x.second, y.second, 0);
    994   int carry_in = (sum_lo.second & 0x2) >> 1;  // C flag in NZCV result.
    995   std::pair<uint64_t, uint8_t> sum_hi =
    996       AddWithCarry(kXRegSize, x.first, y.first, carry_in);
    997   return std::make_pair(sum_hi.first, sum_lo.first);
    998 }
    999 
   1000 vixl_uint128_t Simulator::Lsl128(vixl_uint128_t x, unsigned shift) const {
   1001   VIXL_ASSERT(shift <= 64);
   1002   if (shift == 0) return x;
   1003   if (shift == 64) return std::make_pair(x.second, 0);
   1004   uint64_t lo = x.second << shift;
   1005   uint64_t hi = (x.first << shift) | (x.second >> (64 - shift));
   1006   return std::make_pair(hi, lo);
   1007 }
   1008 
   1009 vixl_uint128_t Simulator::Eor128(vixl_uint128_t x, vixl_uint128_t y) const {
   1010   return std::make_pair(x.first ^ y.first, x.second ^ y.second);
   1011 }
   1012 
   1013 vixl_uint128_t Simulator::Neg128(vixl_uint128_t x) {
   1014   // Negate the integer value. Throw an assertion when the input is INT128_MIN.
   1015   VIXL_ASSERT((x.first != GetSignMask(64)) || (x.second != 0));
   1016   x.first = ~x.first;
   1017   x.second = ~x.second;
   1018   return Add128(x, {0, 1});
   1019 }
   1020 
   1021 vixl_uint128_t Simulator::Mul64(uint64_t x, uint64_t y) {
   1022   bool neg_result = false;
   1023   if ((x >> 63) == 1) {
   1024     x = -x;
   1025     neg_result = !neg_result;
   1026   }
   1027   if ((y >> 63) == 1) {
   1028     y = -y;
   1029     neg_result = !neg_result;
   1030   }
   1031 
   1032   uint64_t x_lo = x & 0xffffffff;
   1033   uint64_t x_hi = x >> 32;
   1034   uint64_t y_lo = y & 0xffffffff;
   1035   uint64_t y_hi = y >> 32;
   1036 
   1037   uint64_t t1 = x_lo * y_hi;
   1038   uint64_t t2 = x_hi * y_lo;
   1039   vixl_uint128_t a = std::make_pair(0, x_lo * y_lo);
   1040   vixl_uint128_t b = std::make_pair(t1 >> 32, t1 << 32);
   1041   vixl_uint128_t c = std::make_pair(t2 >> 32, t2 << 32);
   1042   vixl_uint128_t d = std::make_pair(x_hi * y_hi, 0);
   1043 
   1044   vixl_uint128_t result = Add128(a, b);
   1045   result = Add128(result, c);
   1046   result = Add128(result, d);
   1047   return neg_result ? std::make_pair(-result.first - 1, -result.second)
   1048                     : result;
   1049 }
   1050 
   1051 vixl_uint128_t Simulator::PolynomialMult128(uint64_t op1,
   1052                                             uint64_t op2,
   1053                                             int lane_size_in_bits) const {
   1054   VIXL_ASSERT(static_cast<unsigned>(lane_size_in_bits) <= kDRegSize);
   1055   vixl_uint128_t result = std::make_pair(0, 0);
   1056   vixl_uint128_t op2q = std::make_pair(0, op2);
   1057   for (int i = 0; i < lane_size_in_bits; i++) {
   1058     if ((op1 >> i) & 1) {
   1059       result = Eor128(result, Lsl128(op2q, i));
   1060     }
   1061   }
   1062   return result;
   1063 }
   1064 
   1065 int64_t Simulator::ShiftOperand(unsigned reg_size,
   1066                                 uint64_t uvalue,
   1067                                 Shift shift_type,
   1068                                 unsigned amount) const {
   1069   VIXL_ASSERT((reg_size == kBRegSize) || (reg_size == kHRegSize) ||
   1070               (reg_size == kSRegSize) || (reg_size == kDRegSize));
   1071   if (amount > 0) {
   1072     uint64_t mask = GetUintMask(reg_size);
   1073     bool is_negative = (uvalue & GetSignMask(reg_size)) != 0;
   1074     // The behavior is undefined in c++ if the shift amount greater than or
   1075     // equal to the register lane size. Work out the shifted result based on
   1076     // architectural behavior before performing the c++ type shift operations.
   1077     switch (shift_type) {
   1078       case LSL:
   1079         if (amount >= reg_size) {
   1080           return UINT64_C(0);
   1081         }
   1082         uvalue <<= amount;
   1083         break;
   1084       case LSR:
   1085         if (amount >= reg_size) {
   1086           return UINT64_C(0);
   1087         }
   1088         uvalue >>= amount;
   1089         break;
   1090       case ASR:
   1091         if (amount >= reg_size) {
   1092           return is_negative ? ~UINT64_C(0) : UINT64_C(0);
   1093         }
   1094         uvalue >>= amount;
   1095         if (is_negative) {
   1096           // Simulate sign-extension to 64 bits.
   1097           uvalue |= ~UINT64_C(0) << (reg_size - amount);
   1098         }
   1099         break;
   1100       case ROR: {
   1101         uvalue = RotateRight(uvalue, amount, reg_size);
   1102         break;
   1103       }
   1104       default:
   1105         VIXL_UNIMPLEMENTED();
   1106         return 0;
   1107     }
   1108     uvalue &= mask;
   1109   }
   1110 
   1111   int64_t result;
   1112   memcpy(&result, &uvalue, sizeof(result));
   1113   return result;
   1114 }
   1115 
   1116 
   1117 int64_t Simulator::ExtendValue(unsigned reg_size,
   1118                                int64_t value,
   1119                                Extend extend_type,
   1120                                unsigned left_shift) const {
   1121   switch (extend_type) {
   1122     case UXTB:
   1123       value &= kByteMask;
   1124       break;
   1125     case UXTH:
   1126       value &= kHalfWordMask;
   1127       break;
   1128     case UXTW:
   1129       value &= kWordMask;
   1130       break;
   1131     case SXTB:
   1132       value &= kByteMask;
   1133       if ((value & 0x80) != 0) {
   1134         value |= ~UINT64_C(0) << 8;
   1135       }
   1136       break;
   1137     case SXTH:
   1138       value &= kHalfWordMask;
   1139       if ((value & 0x8000) != 0) {
   1140         value |= ~UINT64_C(0) << 16;
   1141       }
   1142       break;
   1143     case SXTW:
   1144       value &= kWordMask;
   1145       if ((value & 0x80000000) != 0) {
   1146         value |= ~UINT64_C(0) << 32;
   1147       }
   1148       break;
   1149     case UXTX:
   1150     case SXTX:
   1151       break;
   1152     default:
   1153       VIXL_UNREACHABLE();
   1154   }
   1155   return ShiftOperand(reg_size, value, LSL, left_shift);
   1156 }
   1157 
   1158 
   1159 void Simulator::FPCompare(double val0, double val1, FPTrapFlags trap) {
   1160   AssertSupportedFPCR();
   1161 
   1162   // TODO: This assumes that the C++ implementation handles comparisons in the
   1163   // way that we expect (as per AssertSupportedFPCR()).
   1164   bool process_exception = false;
   1165   if ((IsNaN(val0) != 0) || (IsNaN(val1) != 0)) {
   1166     ReadNzcv().SetRawValue(FPUnorderedFlag);
   1167     if (IsSignallingNaN(val0) || IsSignallingNaN(val1) ||
   1168         (trap == EnableTrap)) {
   1169       process_exception = true;
   1170     }
   1171   } else if (val0 < val1) {
   1172     ReadNzcv().SetRawValue(FPLessThanFlag);
   1173   } else if (val0 > val1) {
   1174     ReadNzcv().SetRawValue(FPGreaterThanFlag);
   1175   } else if (val0 == val1) {
   1176     ReadNzcv().SetRawValue(FPEqualFlag);
   1177   } else {
   1178     VIXL_UNREACHABLE();
   1179   }
   1180   LogSystemRegister(NZCV);
   1181   if (process_exception) FPProcessException();
   1182 }
   1183 
   1184 
   1185 uint64_t Simulator::ComputeMemOperandAddress(const MemOperand& mem_op) const {
   1186   VIXL_ASSERT(mem_op.IsValid());
   1187   int64_t base = ReadRegister<int64_t>(mem_op.GetBaseRegister());
   1188   if (mem_op.IsImmediateOffset()) {
   1189     return base + mem_op.GetOffset();
   1190   } else {
   1191     VIXL_ASSERT(mem_op.GetRegisterOffset().IsValid());
   1192     int64_t offset = ReadRegister<int64_t>(mem_op.GetRegisterOffset());
   1193     unsigned shift_amount = mem_op.GetShiftAmount();
   1194     if (mem_op.GetShift() != NO_SHIFT) {
   1195       offset = ShiftOperand(kXRegSize, offset, mem_op.GetShift(), shift_amount);
   1196     }
   1197     if (mem_op.GetExtend() != NO_EXTEND) {
   1198       offset = ExtendValue(kXRegSize, offset, mem_op.GetExtend(), shift_amount);
   1199     }
   1200     return static_cast<uint64_t>(base + offset);
   1201   }
   1202 }
   1203 
   1204 
   1205 Simulator::PrintRegisterFormat Simulator::GetPrintRegisterFormatForSize(
   1206     unsigned reg_size, unsigned lane_size) {
   1207   VIXL_ASSERT(reg_size >= lane_size);
   1208 
   1209   uint32_t format = 0;
   1210   if (reg_size != lane_size) {
   1211     switch (reg_size) {
   1212       default:
   1213         VIXL_UNREACHABLE();
   1214         break;
   1215       case kQRegSizeInBytes:
   1216         format = kPrintRegAsQVector;
   1217         break;
   1218       case kDRegSizeInBytes:
   1219         format = kPrintRegAsDVector;
   1220         break;
   1221     }
   1222   }
   1223 
   1224   switch (lane_size) {
   1225     default:
   1226       VIXL_UNREACHABLE();
   1227       break;
   1228     case kQRegSizeInBytes:
   1229       format |= kPrintReg1Q;
   1230       break;
   1231     case kDRegSizeInBytes:
   1232       format |= kPrintReg1D;
   1233       break;
   1234     case kSRegSizeInBytes:
   1235       format |= kPrintReg1S;
   1236       break;
   1237     case kHRegSizeInBytes:
   1238       format |= kPrintReg1H;
   1239       break;
   1240     case kBRegSizeInBytes:
   1241       format |= kPrintReg1B;
   1242       break;
   1243   }
   1244   // These sizes would be duplicate case labels.
   1245   VIXL_STATIC_ASSERT(kXRegSizeInBytes == kDRegSizeInBytes);
   1246   VIXL_STATIC_ASSERT(kWRegSizeInBytes == kSRegSizeInBytes);
   1247   VIXL_STATIC_ASSERT(kPrintXReg == kPrintReg1D);
   1248   VIXL_STATIC_ASSERT(kPrintWReg == kPrintReg1S);
   1249 
   1250   return static_cast<PrintRegisterFormat>(format);
   1251 }
   1252 
   1253 
   1254 Simulator::PrintRegisterFormat Simulator::GetPrintRegisterFormat(
   1255     VectorFormat vform) {
   1256   switch (vform) {
   1257     default:
   1258       VIXL_UNREACHABLE();
   1259       return kPrintReg16B;
   1260     case kFormat16B:
   1261       return kPrintReg16B;
   1262     case kFormat8B:
   1263       return kPrintReg8B;
   1264     case kFormat8H:
   1265       return kPrintReg8H;
   1266     case kFormat4H:
   1267       return kPrintReg4H;
   1268     case kFormat4S:
   1269       return kPrintReg4S;
   1270     case kFormat2S:
   1271       return kPrintReg2S;
   1272     case kFormat2D:
   1273       return kPrintReg2D;
   1274     case kFormat1D:
   1275       return kPrintReg1D;
   1276 
   1277     case kFormatB:
   1278       return kPrintReg1B;
   1279     case kFormatH:
   1280       return kPrintReg1H;
   1281     case kFormatS:
   1282       return kPrintReg1S;
   1283     case kFormatD:
   1284       return kPrintReg1D;
   1285 
   1286     case kFormatVnB:
   1287       return kPrintRegVnB;
   1288     case kFormatVnH:
   1289       return kPrintRegVnH;
   1290     case kFormatVnS:
   1291       return kPrintRegVnS;
   1292     case kFormatVnD:
   1293       return kPrintRegVnD;
   1294   }
   1295 }
   1296 
   1297 
   1298 Simulator::PrintRegisterFormat Simulator::GetPrintRegisterFormatFP(
   1299     VectorFormat vform) {
   1300   switch (vform) {
   1301     default:
   1302       VIXL_UNREACHABLE();
   1303       return kPrintReg16B;
   1304     case kFormat8H:
   1305       return kPrintReg8HFP;
   1306     case kFormat4H:
   1307       return kPrintReg4HFP;
   1308     case kFormat4S:
   1309       return kPrintReg4SFP;
   1310     case kFormat2S:
   1311       return kPrintReg2SFP;
   1312     case kFormat2D:
   1313       return kPrintReg2DFP;
   1314     case kFormat1D:
   1315       return kPrintReg1DFP;
   1316     case kFormatH:
   1317       return kPrintReg1HFP;
   1318     case kFormatS:
   1319       return kPrintReg1SFP;
   1320     case kFormatD:
   1321       return kPrintReg1DFP;
   1322   }
   1323 }
   1324 
   1325 void Simulator::PrintRegisters() {
   1326   for (unsigned i = 0; i < kNumberOfRegisters; i++) {
   1327     if (i == kSpRegCode) i = kSPRegInternalCode;
   1328     PrintRegister(i);
   1329   }
   1330 }
   1331 
   1332 void Simulator::PrintVRegisters() {
   1333   for (unsigned i = 0; i < kNumberOfVRegisters; i++) {
   1334     PrintVRegister(i);
   1335   }
   1336 }
   1337 
   1338 void Simulator::PrintZRegisters() {
   1339   for (unsigned i = 0; i < kNumberOfZRegisters; i++) {
   1340     PrintZRegister(i);
   1341   }
   1342 }
   1343 
   1344 void Simulator::PrintWrittenRegisters() {
   1345   for (unsigned i = 0; i < kNumberOfRegisters; i++) {
   1346     if (registers_[i].WrittenSinceLastLog()) {
   1347       if (i == kSpRegCode) i = kSPRegInternalCode;
   1348       PrintRegister(i);
   1349     }
   1350   }
   1351 }
   1352 
   1353 void Simulator::PrintWrittenVRegisters() {
   1354   bool has_sve = GetCPUFeatures()->Has(CPUFeatures::kSVE);
   1355   for (unsigned i = 0; i < kNumberOfVRegisters; i++) {
   1356     if (vregisters_[i].WrittenSinceLastLog()) {
   1357       // Z registers are initialised in the constructor before the user can
   1358       // configure the CPU features, so we must also check for SVE here.
   1359       if (vregisters_[i].AccessedAsZSinceLastLog() && has_sve) {
   1360         PrintZRegister(i);
   1361       } else {
   1362         PrintVRegister(i);
   1363       }
   1364     }
   1365   }
   1366 }
   1367 
   1368 void Simulator::PrintWrittenPRegisters() {
   1369   // P registers are initialised in the constructor before the user can
   1370   // configure the CPU features, so we must check for SVE here.
   1371   if (!GetCPUFeatures()->Has(CPUFeatures::kSVE)) return;
   1372   for (unsigned i = 0; i < kNumberOfPRegisters; i++) {
   1373     if (pregisters_[i].WrittenSinceLastLog()) {
   1374       PrintPRegister(i);
   1375     }
   1376   }
   1377   if (ReadFFR().WrittenSinceLastLog()) PrintFFR();
   1378 }
   1379 
   1380 void Simulator::PrintSystemRegisters() {
   1381   PrintSystemRegister(NZCV);
   1382   PrintSystemRegister(FPCR);
   1383 }
   1384 
   1385 void Simulator::PrintRegisterValue(const uint8_t* value,
   1386                                    int value_size,
   1387                                    PrintRegisterFormat format) {
   1388   int print_width = GetPrintRegSizeInBytes(format);
   1389   VIXL_ASSERT(print_width <= value_size);
   1390   for (int i = value_size - 1; i >= print_width; i--) {
   1391     // Pad with spaces so that values align vertically.
   1392     fprintf(stream_, "  ");
   1393     // If we aren't explicitly printing a partial value, ensure that the
   1394     // unprinted bits are zero.
   1395     VIXL_ASSERT(((format & kPrintRegPartial) != 0) || (value[i] == 0));
   1396   }
   1397   fprintf(stream_, "0x");
   1398   for (int i = print_width - 1; i >= 0; i--) {
   1399     fprintf(stream_, "%02x", value[i]);
   1400   }
   1401 }
   1402 
   1403 void Simulator::PrintRegisterValueFPAnnotations(const uint8_t* value,
   1404                                                 uint16_t lane_mask,
   1405                                                 PrintRegisterFormat format) {
   1406   VIXL_ASSERT((format & kPrintRegAsFP) != 0);
   1407   int lane_size = GetPrintRegLaneSizeInBytes(format);
   1408   fprintf(stream_, " (");
   1409   bool last_inactive = false;
   1410   const char* sep = "";
   1411   for (int i = GetPrintRegLaneCount(format) - 1; i >= 0; i--, sep = ", ") {
   1412     bool access = (lane_mask & (1 << (i * lane_size))) != 0;
   1413     if (access) {
   1414       // Read the lane as a double, so we can format all FP types in the same
   1415       // way. We squash NaNs, and a double can exactly represent any other value
   1416       // that the smaller types can represent, so this is lossless.
   1417       double element;
   1418       switch (lane_size) {
   1419         case kHRegSizeInBytes: {
   1420           Float16 element_fp16;
   1421           VIXL_STATIC_ASSERT(sizeof(element_fp16) == kHRegSizeInBytes);
   1422           memcpy(&element_fp16, &value[i * lane_size], sizeof(element_fp16));
   1423           element = FPToDouble(element_fp16, kUseDefaultNaN);
   1424           break;
   1425         }
   1426         case kSRegSizeInBytes: {
   1427           float element_fp32;
   1428           memcpy(&element_fp32, &value[i * lane_size], sizeof(element_fp32));
   1429           element = static_cast<double>(element_fp32);
   1430           break;
   1431         }
   1432         case kDRegSizeInBytes: {
   1433           memcpy(&element, &value[i * lane_size], sizeof(element));
   1434           break;
   1435         }
   1436         default:
   1437           VIXL_UNREACHABLE();
   1438           fprintf(stream_, "{UnknownFPValue}");
   1439           continue;
   1440       }
   1441       if (IsNaN(element)) {
   1442         // The fprintf behaviour for NaNs is implementation-defined. Always
   1443         // print "nan", so that traces are consistent.
   1444         fprintf(stream_, "%s%snan%s", sep, clr_vreg_value, clr_normal);
   1445       } else {
   1446         fprintf(stream_,
   1447                 "%s%s%#.4g%s",
   1448                 sep,
   1449                 clr_vreg_value,
   1450                 element,
   1451                 clr_normal);
   1452       }
   1453       last_inactive = false;
   1454     } else if (!last_inactive) {
   1455       // Replace each contiguous sequence of inactive lanes with "...".
   1456       fprintf(stream_, "%s...", sep);
   1457       last_inactive = true;
   1458     }
   1459   }
   1460   fprintf(stream_, ")");
   1461 }
   1462 
   1463 void Simulator::PrintRegister(int code,
   1464                               PrintRegisterFormat format,
   1465                               const char* suffix) {
   1466   VIXL_ASSERT((static_cast<unsigned>(code) < kNumberOfRegisters) ||
   1467               (static_cast<unsigned>(code) == kSPRegInternalCode));
   1468   VIXL_ASSERT((format & kPrintRegAsVectorMask) == kPrintRegAsScalar);
   1469   VIXL_ASSERT((format & kPrintRegAsFP) == 0);
   1470 
   1471   SimRegister* reg;
   1472   SimRegister zero;
   1473   if (code == kZeroRegCode) {
   1474     reg = &zero;
   1475   } else {
   1476     // registers_[31] holds the SP.
   1477     VIXL_STATIC_ASSERT((kSPRegInternalCode % kNumberOfRegisters) == 31);
   1478     reg = &registers_[code % kNumberOfRegisters];
   1479   }
   1480 
   1481   // We trace register writes as whole register values, implying that any
   1482   // unprinted bits are all zero:
   1483   //   "#       x{code}: 0x{-----value----}"
   1484   //   "#       w{code}:         0x{-value}"
   1485   // Stores trace partial register values, implying nothing about the unprinted
   1486   // bits:
   1487   //   "# x{code}<63:0>: 0x{-----value----}"
   1488   //   "# x{code}<31:0>:         0x{-value}"
   1489   //   "# x{code}<15:0>:             0x{--}"
   1490   //   "#  x{code}<7:0>:               0x{}"
   1491 
   1492   bool is_partial = (format & kPrintRegPartial) != 0;
   1493   unsigned print_reg_size = GetPrintRegSizeInBits(format);
   1494   std::stringstream name;
   1495   if (is_partial) {
   1496     name << XRegNameForCode(code) << GetPartialRegSuffix(format);
   1497   } else {
   1498     // Notify the register that it has been logged, but only if we're printing
   1499     // all of it.
   1500     reg->NotifyRegisterLogged();
   1501     switch (print_reg_size) {
   1502       case kWRegSize:
   1503         name << WRegNameForCode(code);
   1504         break;
   1505       case kXRegSize:
   1506         name << XRegNameForCode(code);
   1507         break;
   1508       default:
   1509         VIXL_UNREACHABLE();
   1510         return;
   1511     }
   1512   }
   1513 
   1514   fprintf(stream_,
   1515           "# %s%*s: %s",
   1516           clr_reg_name,
   1517           kPrintRegisterNameFieldWidth,
   1518           name.str().c_str(),
   1519           clr_reg_value);
   1520   PrintRegisterValue(*reg, format);
   1521   fprintf(stream_, "%s%s", clr_normal, suffix);
   1522 }
   1523 
   1524 void Simulator::PrintVRegister(int code,
   1525                                PrintRegisterFormat format,
   1526                                const char* suffix) {
   1527   VIXL_ASSERT(static_cast<unsigned>(code) < kNumberOfVRegisters);
   1528   VIXL_ASSERT(((format & kPrintRegAsVectorMask) == kPrintRegAsScalar) ||
   1529               ((format & kPrintRegAsVectorMask) == kPrintRegAsDVector) ||
   1530               ((format & kPrintRegAsVectorMask) == kPrintRegAsQVector));
   1531 
   1532   // We trace register writes as whole register values, implying that any
   1533   // unprinted bits are all zero:
   1534   //   "#        v{code}: 0x{-------------value------------}"
   1535   //   "#        d{code}:                 0x{-----value----}"
   1536   //   "#        s{code}:                         0x{-value}"
   1537   //   "#        h{code}:                             0x{--}"
   1538   //   "#        b{code}:                               0x{}"
   1539   // Stores trace partial register values, implying nothing about the unprinted
   1540   // bits:
   1541   //   "# v{code}<127:0>: 0x{-------------value------------}"
   1542   //   "#  v{code}<63:0>:                 0x{-----value----}"
   1543   //   "#  v{code}<31:0>:                         0x{-value}"
   1544   //   "#  v{code}<15:0>:                             0x{--}"
   1545   //   "#   v{code}<7:0>:                               0x{}"
   1546 
   1547   bool is_partial = ((format & kPrintRegPartial) != 0);
   1548   std::stringstream name;
   1549   unsigned print_reg_size = GetPrintRegSizeInBits(format);
   1550   if (is_partial) {
   1551     name << VRegNameForCode(code) << GetPartialRegSuffix(format);
   1552   } else {
   1553     // Notify the register that it has been logged, but only if we're printing
   1554     // all of it.
   1555     vregisters_[code].NotifyRegisterLogged();
   1556     switch (print_reg_size) {
   1557       case kBRegSize:
   1558         name << BRegNameForCode(code);
   1559         break;
   1560       case kHRegSize:
   1561         name << HRegNameForCode(code);
   1562         break;
   1563       case kSRegSize:
   1564         name << SRegNameForCode(code);
   1565         break;
   1566       case kDRegSize:
   1567         name << DRegNameForCode(code);
   1568         break;
   1569       case kQRegSize:
   1570         name << VRegNameForCode(code);
   1571         break;
   1572       default:
   1573         VIXL_UNREACHABLE();
   1574         return;
   1575     }
   1576   }
   1577 
   1578   fprintf(stream_,
   1579           "# %s%*s: %s",
   1580           clr_vreg_name,
   1581           kPrintRegisterNameFieldWidth,
   1582           name.str().c_str(),
   1583           clr_vreg_value);
   1584   PrintRegisterValue(vregisters_[code], format);
   1585   fprintf(stream_, "%s", clr_normal);
   1586   if ((format & kPrintRegAsFP) != 0) {
   1587     PrintRegisterValueFPAnnotations(vregisters_[code], format);
   1588   }
   1589   fprintf(stream_, "%s", suffix);
   1590 }
   1591 
   1592 void Simulator::PrintVRegistersForStructuredAccess(int rt_code,
   1593                                                    int reg_count,
   1594                                                    uint16_t focus_mask,
   1595                                                    PrintRegisterFormat format) {
   1596   bool print_fp = (format & kPrintRegAsFP) != 0;
   1597   // Suppress FP formatting, so we can specify the lanes we're interested in.
   1598   PrintRegisterFormat format_no_fp =
   1599       static_cast<PrintRegisterFormat>(format & ~kPrintRegAsFP);
   1600 
   1601   for (int r = 0; r < reg_count; r++) {
   1602     int code = (rt_code + r) % kNumberOfVRegisters;
   1603     PrintVRegister(code, format_no_fp, "");
   1604     if (print_fp) {
   1605       PrintRegisterValueFPAnnotations(vregisters_[code], focus_mask, format);
   1606     }
   1607     fprintf(stream_, "\n");
   1608   }
   1609 }
   1610 
   1611 void Simulator::PrintZRegistersForStructuredAccess(int rt_code,
   1612                                                    int q_index,
   1613                                                    int reg_count,
   1614                                                    uint16_t focus_mask,
   1615                                                    PrintRegisterFormat format) {
   1616   bool print_fp = (format & kPrintRegAsFP) != 0;
   1617   // Suppress FP formatting, so we can specify the lanes we're interested in.
   1618   PrintRegisterFormat format_no_fp =
   1619       static_cast<PrintRegisterFormat>(format & ~kPrintRegAsFP);
   1620 
   1621   PrintRegisterFormat format_q = GetPrintRegAsQChunkOfSVE(format);
   1622 
   1623   const unsigned size = kQRegSizeInBytes;
   1624   unsigned byte_index = q_index * size;
   1625   const uint8_t* value = vregisters_[rt_code].GetBytes() + byte_index;
   1626   VIXL_ASSERT((byte_index + size) <= vregisters_[rt_code].GetSizeInBytes());
   1627 
   1628   for (int r = 0; r < reg_count; r++) {
   1629     int code = (rt_code + r) % kNumberOfZRegisters;
   1630     PrintPartialZRegister(code, q_index, format_no_fp, "");
   1631     if (print_fp) {
   1632       PrintRegisterValueFPAnnotations(value, focus_mask, format_q);
   1633     }
   1634     fprintf(stream_, "\n");
   1635   }
   1636 }
   1637 
   1638 void Simulator::PrintZRegister(int code, PrintRegisterFormat format) {
   1639   // We're going to print the register in parts, so force a partial format.
   1640   format = GetPrintRegPartial(format);
   1641   VIXL_ASSERT((format & kPrintRegAsVectorMask) == kPrintRegAsSVEVector);
   1642   int vl = GetVectorLengthInBits();
   1643   VIXL_ASSERT((vl % kQRegSize) == 0);
   1644   for (unsigned i = 0; i < (vl / kQRegSize); i++) {
   1645     PrintPartialZRegister(code, i, format);
   1646   }
   1647   vregisters_[code].NotifyRegisterLogged();
   1648 }
   1649 
   1650 void Simulator::PrintPRegister(int code, PrintRegisterFormat format) {
   1651   // We're going to print the register in parts, so force a partial format.
   1652   format = GetPrintRegPartial(format);
   1653   VIXL_ASSERT((format & kPrintRegAsVectorMask) == kPrintRegAsSVEVector);
   1654   int vl = GetVectorLengthInBits();
   1655   VIXL_ASSERT((vl % kQRegSize) == 0);
   1656   for (unsigned i = 0; i < (vl / kQRegSize); i++) {
   1657     PrintPartialPRegister(code, i, format);
   1658   }
   1659   pregisters_[code].NotifyRegisterLogged();
   1660 }
   1661 
   1662 void Simulator::PrintFFR(PrintRegisterFormat format) {
   1663   // We're going to print the register in parts, so force a partial format.
   1664   format = GetPrintRegPartial(format);
   1665   VIXL_ASSERT((format & kPrintRegAsVectorMask) == kPrintRegAsSVEVector);
   1666   int vl = GetVectorLengthInBits();
   1667   VIXL_ASSERT((vl % kQRegSize) == 0);
   1668   SimPRegister& ffr = ReadFFR();
   1669   for (unsigned i = 0; i < (vl / kQRegSize); i++) {
   1670     PrintPartialPRegister("FFR", ffr, i, format);
   1671   }
   1672   ffr.NotifyRegisterLogged();
   1673 }
   1674 
   1675 void Simulator::PrintPartialZRegister(int code,
   1676                                       int q_index,
   1677                                       PrintRegisterFormat format,
   1678                                       const char* suffix) {
   1679   VIXL_ASSERT(static_cast<unsigned>(code) < kNumberOfZRegisters);
   1680   VIXL_ASSERT((format & kPrintRegAsVectorMask) == kPrintRegAsSVEVector);
   1681   VIXL_ASSERT((format & kPrintRegPartial) != 0);
   1682   VIXL_ASSERT((q_index * kQRegSize) < GetVectorLengthInBits());
   1683 
   1684   // We _only_ trace partial Z register values in Q-sized chunks, because
   1685   // they're often too large to reasonably fit on a single line. Each line
   1686   // implies nothing about the unprinted bits.
   1687   //   "# z{code}<127:0>: 0x{-------------value------------}"
   1688 
   1689   format = GetPrintRegAsQChunkOfSVE(format);
   1690 
   1691   const unsigned size = kQRegSizeInBytes;
   1692   unsigned byte_index = q_index * size;
   1693   const uint8_t* value = vregisters_[code].GetBytes() + byte_index;
   1694   VIXL_ASSERT((byte_index + size) <= vregisters_[code].GetSizeInBytes());
   1695 
   1696   int lsb = q_index * kQRegSize;
   1697   int msb = lsb + kQRegSize - 1;
   1698   std::stringstream name;
   1699   name << ZRegNameForCode(code) << '<' << msb << ':' << lsb << '>';
   1700 
   1701   fprintf(stream_,
   1702           "# %s%*s: %s",
   1703           clr_vreg_name,
   1704           kPrintRegisterNameFieldWidth,
   1705           name.str().c_str(),
   1706           clr_vreg_value);
   1707   PrintRegisterValue(value, size, format);
   1708   fprintf(stream_, "%s", clr_normal);
   1709   if ((format & kPrintRegAsFP) != 0) {
   1710     PrintRegisterValueFPAnnotations(value, GetPrintRegLaneMask(format), format);
   1711   }
   1712   fprintf(stream_, "%s", suffix);
   1713 }
   1714 
   1715 void Simulator::PrintPartialPRegister(const char* name,
   1716                                       const SimPRegister& reg,
   1717                                       int q_index,
   1718                                       PrintRegisterFormat format,
   1719                                       const char* suffix) {
   1720   VIXL_ASSERT((format & kPrintRegAsVectorMask) == kPrintRegAsSVEVector);
   1721   VIXL_ASSERT((format & kPrintRegPartial) != 0);
   1722   VIXL_ASSERT((q_index * kQRegSize) < GetVectorLengthInBits());
   1723 
   1724   // We don't currently use the format for anything here.
   1725   USE(format);
   1726 
   1727   // We _only_ trace partial P register values, because they're often too large
   1728   // to reasonably fit on a single line. Each line implies nothing about the
   1729   // unprinted bits.
   1730   //
   1731   // We print values in binary, with spaces between each bit, in order for the
   1732   // bits to align with the Z register bytes that they predicate.
   1733   //   "# {name}<15:0>: 0b{-------------value------------}"
   1734 
   1735   int print_size_in_bits = kQRegSize / kZRegBitsPerPRegBit;
   1736   int lsb = q_index * print_size_in_bits;
   1737   int msb = lsb + print_size_in_bits - 1;
   1738   std::stringstream prefix;
   1739   prefix << name << '<' << msb << ':' << lsb << '>';
   1740 
   1741   fprintf(stream_,
   1742           "# %s%*s: %s0b",
   1743           clr_preg_name,
   1744           kPrintRegisterNameFieldWidth,
   1745           prefix.str().c_str(),
   1746           clr_preg_value);
   1747   for (int i = msb; i >= lsb; i--) {
   1748     fprintf(stream_, " %c", reg.GetBit(i) ? '1' : '0');
   1749   }
   1750   fprintf(stream_, "%s%s", clr_normal, suffix);
   1751 }
   1752 
   1753 void Simulator::PrintPartialPRegister(int code,
   1754                                       int q_index,
   1755                                       PrintRegisterFormat format,
   1756                                       const char* suffix) {
   1757   VIXL_ASSERT(static_cast<unsigned>(code) < kNumberOfPRegisters);
   1758   PrintPartialPRegister(PRegNameForCode(code),
   1759                         pregisters_[code],
   1760                         q_index,
   1761                         format,
   1762                         suffix);
   1763 }
   1764 
   1765 void Simulator::PrintSystemRegister(SystemRegister id) {
   1766   switch (id) {
   1767     case NZCV:
   1768       fprintf(stream_,
   1769               "# %sNZCV: %sN:%d Z:%d C:%d V:%d%s\n",
   1770               clr_flag_name,
   1771               clr_flag_value,
   1772               ReadNzcv().GetN(),
   1773               ReadNzcv().GetZ(),
   1774               ReadNzcv().GetC(),
   1775               ReadNzcv().GetV(),
   1776               clr_normal);
   1777       break;
   1778     case FPCR: {
   1779       static const char* rmode[] = {"0b00 (Round to Nearest)",
   1780                                     "0b01 (Round towards Plus Infinity)",
   1781                                     "0b10 (Round towards Minus Infinity)",
   1782                                     "0b11 (Round towards Zero)"};
   1783       VIXL_ASSERT(ReadFpcr().GetRMode() < ArrayLength(rmode));
   1784       fprintf(stream_,
   1785               "# %sFPCR: %sAHP:%d DN:%d FZ:%d RMode:%s%s\n",
   1786               clr_flag_name,
   1787               clr_flag_value,
   1788               ReadFpcr().GetAHP(),
   1789               ReadFpcr().GetDN(),
   1790               ReadFpcr().GetFZ(),
   1791               rmode[ReadFpcr().GetRMode()],
   1792               clr_normal);
   1793       break;
   1794     }
   1795     default:
   1796       VIXL_UNREACHABLE();
   1797   }
   1798 }
   1799 
   1800 uint16_t Simulator::PrintPartialAccess(uint16_t access_mask,
   1801                                        uint16_t future_access_mask,
   1802                                        int struct_element_count,
   1803                                        int lane_size_in_bytes,
   1804                                        const char* op,
   1805                                        uintptr_t address,
   1806                                        int reg_size_in_bytes) {
   1807   // We want to assume that we'll access at least one lane.
   1808   VIXL_ASSERT(access_mask != 0);
   1809   VIXL_ASSERT((reg_size_in_bytes == kXRegSizeInBytes) ||
   1810               (reg_size_in_bytes == kQRegSizeInBytes));
   1811   bool started_annotation = false;
   1812   // Indent to match the register field, the fixed formatting, and the value
   1813   // prefix ("0x"): "# {name}: 0x"
   1814   fprintf(stream_, "# %*s    ", kPrintRegisterNameFieldWidth, "");
   1815   // First, annotate the lanes (byte by byte).
   1816   for (int lane = reg_size_in_bytes - 1; lane >= 0; lane--) {
   1817     bool access = (access_mask & (1 << lane)) != 0;
   1818     bool future = (future_access_mask & (1 << lane)) != 0;
   1819     if (started_annotation) {
   1820       // If we've started an annotation, draw a horizontal line in addition to
   1821       // any other symbols.
   1822       if (access) {
   1823         fprintf(stream_, "─╨");
   1824       } else if (future) {
   1825         fprintf(stream_, "─║");
   1826       } else {
   1827         fprintf(stream_, "──");
   1828       }
   1829     } else {
   1830       if (access) {
   1831         started_annotation = true;
   1832         fprintf(stream_, " â•™");
   1833       } else if (future) {
   1834         fprintf(stream_, " â•‘");
   1835       } else {
   1836         fprintf(stream_, "  ");
   1837       }
   1838     }
   1839   }
   1840   VIXL_ASSERT(started_annotation);
   1841   fprintf(stream_, "─ 0x");
   1842   int lane_size_in_nibbles = lane_size_in_bytes * 2;
   1843   // Print the most-significant struct element first.
   1844   const char* sep = "";
   1845   for (int i = struct_element_count - 1; i >= 0; i--) {
   1846     int offset = lane_size_in_bytes * i;
   1847     auto nibble = MemReadUint(lane_size_in_bytes, address + offset);
   1848     VIXL_ASSERT(nibble);
   1849     fprintf(stream_, "%s%0*" PRIx64, sep, lane_size_in_nibbles, *nibble);
   1850     sep = "'";
   1851   }
   1852   fprintf(stream_,
   1853           " %s %s0x%016" PRIxPTR "%s\n",
   1854           op,
   1855           clr_memory_address,
   1856           address,
   1857           clr_normal);
   1858   return future_access_mask & ~access_mask;
   1859 }
   1860 
   1861 void Simulator::PrintAccess(int code,
   1862                             PrintRegisterFormat format,
   1863                             const char* op,
   1864                             uintptr_t address) {
   1865   VIXL_ASSERT(GetPrintRegLaneCount(format) == 1);
   1866   VIXL_ASSERT((strcmp(op, "->") == 0) || (strcmp(op, "<-") == 0));
   1867   if ((format & kPrintRegPartial) == 0) {
   1868     if (code != kZeroRegCode) {
   1869       registers_[code].NotifyRegisterLogged();
   1870     }
   1871   }
   1872   // Scalar-format accesses use a simple format:
   1873   //   "# {reg}: 0x{value} -> {address}"
   1874 
   1875   // Suppress the newline, so the access annotation goes on the same line.
   1876   PrintRegister(code, format, "");
   1877   fprintf(stream_,
   1878           " %s %s0x%016" PRIxPTR "%s\n",
   1879           op,
   1880           clr_memory_address,
   1881           address,
   1882           clr_normal);
   1883 }
   1884 
   1885 void Simulator::PrintVAccess(int code,
   1886                              PrintRegisterFormat format,
   1887                              const char* op,
   1888                              uintptr_t address) {
   1889   VIXL_ASSERT((strcmp(op, "->") == 0) || (strcmp(op, "<-") == 0));
   1890 
   1891   // Scalar-format accesses use a simple format:
   1892   //   "# v{code}: 0x{value} -> {address}"
   1893 
   1894   // Suppress the newline, so the access annotation goes on the same line.
   1895   PrintVRegister(code, format, "");
   1896   fprintf(stream_,
   1897           " %s %s0x%016" PRIxPTR "%s\n",
   1898           op,
   1899           clr_memory_address,
   1900           address,
   1901           clr_normal);
   1902 }
   1903 
   1904 void Simulator::PrintVStructAccess(int rt_code,
   1905                                    int reg_count,
   1906                                    PrintRegisterFormat format,
   1907                                    const char* op,
   1908                                    uintptr_t address) {
   1909   VIXL_ASSERT((strcmp(op, "->") == 0) || (strcmp(op, "<-") == 0));
   1910 
   1911   // For example:
   1912   //   "# v{code}: 0x{value}"
   1913   //   "#     ...: 0x{value}"
   1914   //   "#              ║   ╙─ {struct_value} -> {lowest_address}"
   1915   //   "#              ╙───── {struct_value} -> {highest_address}"
   1916 
   1917   uint16_t lane_mask = GetPrintRegLaneMask(format);
   1918   PrintVRegistersForStructuredAccess(rt_code, reg_count, lane_mask, format);
   1919 
   1920   int reg_size_in_bytes = GetPrintRegSizeInBytes(format);
   1921   int lane_size_in_bytes = GetPrintRegLaneSizeInBytes(format);
   1922   for (int i = 0; i < reg_size_in_bytes; i += lane_size_in_bytes) {
   1923     uint16_t access_mask = 1 << i;
   1924     VIXL_ASSERT((lane_mask & access_mask) != 0);
   1925     lane_mask = PrintPartialAccess(access_mask,
   1926                                    lane_mask,
   1927                                    reg_count,
   1928                                    lane_size_in_bytes,
   1929                                    op,
   1930                                    address + (i * reg_count));
   1931   }
   1932 }
   1933 
   1934 void Simulator::PrintVSingleStructAccess(int rt_code,
   1935                                          int reg_count,
   1936                                          int lane,
   1937                                          PrintRegisterFormat format,
   1938                                          const char* op,
   1939                                          uintptr_t address) {
   1940   VIXL_ASSERT((strcmp(op, "->") == 0) || (strcmp(op, "<-") == 0));
   1941 
   1942   // For example:
   1943   //   "# v{code}: 0x{value}"
   1944   //   "#     ...: 0x{value}"
   1945   //   "#              ╙───── {struct_value} -> {address}"
   1946 
   1947   int lane_size_in_bytes = GetPrintRegLaneSizeInBytes(format);
   1948   uint16_t lane_mask = 1 << (lane * lane_size_in_bytes);
   1949   PrintVRegistersForStructuredAccess(rt_code, reg_count, lane_mask, format);
   1950   PrintPartialAccess(lane_mask, 0, reg_count, lane_size_in_bytes, op, address);
   1951 }
   1952 
   1953 void Simulator::PrintVReplicatingStructAccess(int rt_code,
   1954                                               int reg_count,
   1955                                               PrintRegisterFormat format,
   1956                                               const char* op,
   1957                                               uintptr_t address) {
   1958   VIXL_ASSERT((strcmp(op, "->") == 0) || (strcmp(op, "<-") == 0));
   1959 
   1960   // For example:
   1961   //   "# v{code}: 0x{value}"
   1962   //   "#     ...: 0x{value}"
   1963   //   "#            ╙─╨─╨─╨─ {struct_value} -> {address}"
   1964 
   1965   int lane_size_in_bytes = GetPrintRegLaneSizeInBytes(format);
   1966   uint16_t lane_mask = GetPrintRegLaneMask(format);
   1967   PrintVRegistersForStructuredAccess(rt_code, reg_count, lane_mask, format);
   1968   PrintPartialAccess(lane_mask, 0, reg_count, lane_size_in_bytes, op, address);
   1969 }
   1970 
   1971 void Simulator::PrintZAccess(int rt_code, const char* op, uintptr_t address) {
   1972   VIXL_ASSERT((strcmp(op, "->") == 0) || (strcmp(op, "<-") == 0));
   1973 
   1974   // Scalar-format accesses are split into separate chunks, each of which uses a
   1975   // simple format:
   1976   //   "#   z{code}<127:0>: 0x{value} -> {address}"
   1977   //   "# z{code}<255:128>: 0x{value} -> {address + 16}"
   1978   //   "# z{code}<383:256>: 0x{value} -> {address + 32}"
   1979   // etc
   1980 
   1981   int vl = GetVectorLengthInBits();
   1982   VIXL_ASSERT((vl % kQRegSize) == 0);
   1983   for (unsigned q_index = 0; q_index < (vl / kQRegSize); q_index++) {
   1984     // Suppress the newline, so the access annotation goes on the same line.
   1985     PrintPartialZRegister(rt_code, q_index, kPrintRegVnQPartial, "");
   1986     fprintf(stream_,
   1987             " %s %s0x%016" PRIxPTR "%s\n",
   1988             op,
   1989             clr_memory_address,
   1990             address,
   1991             clr_normal);
   1992     address += kQRegSizeInBytes;
   1993   }
   1994 }
   1995 
   1996 void Simulator::PrintZStructAccess(int rt_code,
   1997                                    int reg_count,
   1998                                    const LogicPRegister& pg,
   1999                                    PrintRegisterFormat format,
   2000                                    int msize_in_bytes,
   2001                                    const char* op,
   2002                                    const LogicSVEAddressVector& addr) {
   2003   VIXL_ASSERT((strcmp(op, "->") == 0) || (strcmp(op, "<-") == 0));
   2004 
   2005   // For example:
   2006   //   "# z{code}<255:128>: 0x{value}"
   2007   //   "#     ...<255:128>: 0x{value}"
   2008   //   "#                       ║   ╙─ {struct_value} -> {first_address}"
   2009   //   "#                       ╙───── {struct_value} -> {last_address}"
   2010 
   2011   // We're going to print the register in parts, so force a partial format.
   2012   bool skip_inactive_chunks = (format & kPrintRegPartial) != 0;
   2013   format = GetPrintRegPartial(format);
   2014 
   2015   int esize_in_bytes = GetPrintRegLaneSizeInBytes(format);
   2016   int vl = GetVectorLengthInBits();
   2017   VIXL_ASSERT((vl % kQRegSize) == 0);
   2018   int lanes_per_q = kQRegSizeInBytes / esize_in_bytes;
   2019   for (unsigned q_index = 0; q_index < (vl / kQRegSize); q_index++) {
   2020     uint16_t pred =
   2021         pg.GetActiveMask<uint16_t>(q_index) & GetPrintRegLaneMask(format);
   2022     if ((pred == 0) && skip_inactive_chunks) continue;
   2023 
   2024     PrintZRegistersForStructuredAccess(rt_code,
   2025                                        q_index,
   2026                                        reg_count,
   2027                                        pred,
   2028                                        format);
   2029     if (pred == 0) {
   2030       // This register chunk has no active lanes. The loop below would print
   2031       // nothing, so leave a blank line to keep structures grouped together.
   2032       fprintf(stream_, "#\n");
   2033       continue;
   2034     }
   2035     for (int i = 0; i < lanes_per_q; i++) {
   2036       uint16_t access = 1 << (i * esize_in_bytes);
   2037       int lane = (q_index * lanes_per_q) + i;
   2038       // Skip inactive lanes.
   2039       if ((pred & access) == 0) continue;
   2040       pred = PrintPartialAccess(access,
   2041                                 pred,
   2042                                 reg_count,
   2043                                 msize_in_bytes,
   2044                                 op,
   2045                                 addr.GetStructAddress(lane));
   2046     }
   2047   }
   2048 
   2049   // We print the whole register, even for stores.
   2050   for (int i = 0; i < reg_count; i++) {
   2051     vregisters_[(rt_code + i) % kNumberOfZRegisters].NotifyRegisterLogged();
   2052   }
   2053 }
   2054 
   2055 void Simulator::PrintPAccess(int code, const char* op, uintptr_t address) {
   2056   VIXL_ASSERT((strcmp(op, "->") == 0) || (strcmp(op, "<-") == 0));
   2057 
   2058   // Scalar-format accesses are split into separate chunks, each of which uses a
   2059   // simple format:
   2060   //   "#  p{code}<15:0>: 0b{value} -> {address}"
   2061   //   "# p{code}<31:16>: 0b{value} -> {address + 2}"
   2062   //   "# p{code}<47:32>: 0b{value} -> {address + 4}"
   2063   // etc
   2064 
   2065   int vl = GetVectorLengthInBits();
   2066   VIXL_ASSERT((vl % kQRegSize) == 0);
   2067   for (unsigned q_index = 0; q_index < (vl / kQRegSize); q_index++) {
   2068     // Suppress the newline, so the access annotation goes on the same line.
   2069     PrintPartialPRegister(code, q_index, kPrintRegVnQPartial, "");
   2070     fprintf(stream_,
   2071             " %s %s0x%016" PRIxPTR "%s\n",
   2072             op,
   2073             clr_memory_address,
   2074             address,
   2075             clr_normal);
   2076     address += kQRegSizeInBytes;
   2077   }
   2078 }
   2079 
   2080 void Simulator::PrintMemTransfer(uintptr_t dst, uintptr_t src, uint8_t value) {
   2081   fprintf(stream_,
   2082           "#               %s: %s0x%016" PRIxPTR " %s<- %s0x%02x%s",
   2083           clr_reg_name,
   2084           clr_memory_address,
   2085           dst,
   2086           clr_normal,
   2087           clr_reg_value,
   2088           value,
   2089           clr_normal);
   2090 
   2091   fprintf(stream_,
   2092           " <- %s0x%016" PRIxPTR "%s\n",
   2093           clr_memory_address,
   2094           src,
   2095           clr_normal);
   2096 }
   2097 
   2098 void Simulator::PrintRead(int rt_code,
   2099                           PrintRegisterFormat format,
   2100                           uintptr_t address) {
   2101   VIXL_ASSERT(GetPrintRegLaneCount(format) == 1);
   2102   if (rt_code != kZeroRegCode) {
   2103     registers_[rt_code].NotifyRegisterLogged();
   2104   }
   2105   PrintAccess(rt_code, format, "<-", address);
   2106 }
   2107 
   2108 void Simulator::PrintExtendingRead(int rt_code,
   2109                                    PrintRegisterFormat format,
   2110                                    int access_size_in_bytes,
   2111                                    uintptr_t address) {
   2112   int reg_size_in_bytes = GetPrintRegSizeInBytes(format);
   2113   if (access_size_in_bytes == reg_size_in_bytes) {
   2114     // There is no extension here, so print a simple load.
   2115     PrintRead(rt_code, format, address);
   2116     return;
   2117   }
   2118   VIXL_ASSERT(access_size_in_bytes < reg_size_in_bytes);
   2119 
   2120   // For sign- and zero-extension, make it clear that the resulting register
   2121   // value is different from what is loaded from memory.
   2122   VIXL_ASSERT(GetPrintRegLaneCount(format) == 1);
   2123   if (rt_code != kZeroRegCode) {
   2124     registers_[rt_code].NotifyRegisterLogged();
   2125   }
   2126   PrintRegister(rt_code, format);
   2127   PrintPartialAccess(1,
   2128                      0,
   2129                      1,
   2130                      access_size_in_bytes,
   2131                      "<-",
   2132                      address,
   2133                      kXRegSizeInBytes);
   2134 }
   2135 
   2136 void Simulator::PrintVRead(int rt_code,
   2137                            PrintRegisterFormat format,
   2138                            uintptr_t address) {
   2139   VIXL_ASSERT(GetPrintRegLaneCount(format) == 1);
   2140   vregisters_[rt_code].NotifyRegisterLogged();
   2141   PrintVAccess(rt_code, format, "<-", address);
   2142 }
   2143 
   2144 void Simulator::PrintWrite(int rt_code,
   2145                            PrintRegisterFormat format,
   2146                            uintptr_t address) {
   2147   // Because this trace doesn't represent a change to the source register's
   2148   // value, only print the relevant part of the value.
   2149   format = GetPrintRegPartial(format);
   2150   VIXL_ASSERT(GetPrintRegLaneCount(format) == 1);
   2151   if (rt_code != kZeroRegCode) {
   2152     registers_[rt_code].NotifyRegisterLogged();
   2153   }
   2154   PrintAccess(rt_code, format, "->", address);
   2155 }
   2156 
   2157 void Simulator::PrintVWrite(int rt_code,
   2158                             PrintRegisterFormat format,
   2159                             uintptr_t address) {
   2160   // Because this trace doesn't represent a change to the source register's
   2161   // value, only print the relevant part of the value.
   2162   format = GetPrintRegPartial(format);
   2163   // It only makes sense to write scalar values here. Vectors are handled by
   2164   // PrintVStructAccess.
   2165   VIXL_ASSERT(GetPrintRegLaneCount(format) == 1);
   2166   PrintVAccess(rt_code, format, "->", address);
   2167 }
   2168 
   2169 void Simulator::PrintTakenBranch(const Instruction* target) {
   2170   fprintf(stream_,
   2171           "# %sBranch%s to 0x%016" PRIx64 ".\n",
   2172           clr_branch_marker,
   2173           clr_normal,
   2174           reinterpret_cast<uint64_t>(target));
   2175 }
   2176 
   2177 // Visitors---------------------------------------------------------------------
   2178 
   2179 
   2180 void Simulator::Visit(Metadata* metadata, const Instruction* instr) {
   2181   VIXL_ASSERT(metadata->count("form") > 0);
   2182   std::string form = (*metadata)["form"];
   2183   form_hash_ = Hash(form.c_str());
   2184   const FormToVisitorFnMap* fv = Simulator::GetFormToVisitorFnMap();
   2185   FormToVisitorFnMap::const_iterator it = fv->find(form_hash_);
   2186   if (it == fv->end()) {
   2187     VisitUnimplemented(instr);
   2188   } else {
   2189     (it->second)(this, instr);
   2190   }
   2191 }
   2192 
   2193 void Simulator::Simulate_PdT_PgZ_ZnT_ZmT(const Instruction* instr) {
   2194   VectorFormat vform = instr->GetSVEVectorFormat();
   2195   SimPRegister& pd = ReadPRegister(instr->GetPd());
   2196   SimPRegister& pg = ReadPRegister(instr->GetPgLow8());
   2197   SimVRegister& zm = ReadVRegister(instr->GetRm());
   2198   SimVRegister& zn = ReadVRegister(instr->GetRn());
   2199 
   2200   switch (form_hash_) {
   2201     case "match_p_p_zz"_h:
   2202       match(vform, pd, zn, zm, /* negate_match = */ false);
   2203       break;
   2204     case "nmatch_p_p_zz"_h:
   2205       match(vform, pd, zn, zm, /* negate_match = */ true);
   2206       break;
   2207     default:
   2208       VIXL_UNIMPLEMENTED();
   2209   }
   2210   mov_zeroing(pd, pg, pd);
   2211   PredTest(vform, pg, pd);
   2212 }
   2213 
   2214 void Simulator::Simulate_PdT_Xn_Xm(const Instruction* instr) {
   2215   VectorFormat vform = instr->GetSVEVectorFormat();
   2216   SimPRegister& pd = ReadPRegister(instr->GetPd());
   2217   uint64_t src1 = ReadXRegister(instr->GetRn());
   2218   uint64_t src2 = ReadXRegister(instr->GetRm());
   2219 
   2220   uint64_t absdiff = (src1 > src2) ? (src1 - src2) : (src2 - src1);
   2221   absdiff >>= LaneSizeInBytesLog2FromFormat(vform);
   2222 
   2223   bool no_conflict = false;
   2224   switch (form_hash_) {
   2225     case "whilerw_p_rr"_h:
   2226       no_conflict = (absdiff == 0);
   2227       break;
   2228     case "whilewr_p_rr"_h:
   2229       no_conflict = (absdiff == 0) || (src2 <= src1);
   2230       break;
   2231     default:
   2232       VIXL_UNIMPLEMENTED();
   2233   }
   2234 
   2235   LogicPRegister dst(pd);
   2236   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
   2237     dst.SetActive(vform,
   2238                   i,
   2239                   no_conflict || (static_cast<uint64_t>(i) < absdiff));
   2240   }
   2241 
   2242   PredTest(vform, GetPTrue(), pd);
   2243 }
   2244 
   2245 void Simulator::Simulate_ZdB_Zn1B_Zn2B_imm(const Instruction* instr) {
   2246   VIXL_ASSERT(form_hash_ == "ext_z_zi_con"_h);
   2247 
   2248   SimVRegister& zd = ReadVRegister(instr->GetRd());
   2249   SimVRegister& zn = ReadVRegister(instr->GetRn());
   2250   SimVRegister& zn2 = ReadVRegister((instr->GetRn() + 1) % kNumberOfZRegisters);
   2251 
   2252   int index = instr->GetSVEExtractImmediate();
   2253   int vl = GetVectorLengthInBytes();
   2254   index = (index >= vl) ? 0 : index;
   2255 
   2256   ext(kFormatVnB, zd, zn, zn2, index);
   2257 }
   2258 
   2259 void Simulator::Simulate_ZdB_ZnB_ZmB(const Instruction* instr) {
   2260   SimVRegister& zd = ReadVRegister(instr->GetRd());
   2261   SimVRegister& zm = ReadVRegister(instr->GetRm());
   2262   SimVRegister& zn = ReadVRegister(instr->GetRn());
   2263 
   2264   switch (form_hash_) {
   2265     case "histseg_z_zz"_h:
   2266       if (instr->GetSVEVectorFormat() == kFormatVnB) {
   2267         histogram(kFormatVnB,
   2268                   zd,
   2269                   GetPTrue(),
   2270                   zn,
   2271                   zm,
   2272                   /* do_segmented = */ true);
   2273       } else {
   2274         VIXL_UNIMPLEMENTED();
   2275       }
   2276       break;
   2277     case "pmul_z_zz"_h:
   2278       pmul(kFormatVnB, zd, zn, zm);
   2279       break;
   2280     default:
   2281       VIXL_UNIMPLEMENTED();
   2282   }
   2283 }
   2284 
   2285 void Simulator::SimulateSVEMulIndex(const Instruction* instr) {
   2286   VectorFormat vform = instr->GetSVEVectorFormat();
   2287   SimVRegister& zd = ReadVRegister(instr->GetRd());
   2288   SimVRegister& zn = ReadVRegister(instr->GetRn());
   2289 
   2290   // The encoding for B and H-sized lanes are redefined to encode the most
   2291   // significant bit of index for H-sized lanes. B-sized lanes are not
   2292   // supported.
   2293   if (vform == kFormatVnB) vform = kFormatVnH;
   2294 
   2295   VIXL_ASSERT((form_hash_ == "mul_z_zzi_d"_h) ||
   2296               (form_hash_ == "mul_z_zzi_h"_h) ||
   2297               (form_hash_ == "mul_z_zzi_s"_h));
   2298 
   2299   SimVRegister temp;
   2300   dup_elements_to_segments(vform, temp, instr->GetSVEMulZmAndIndex());
   2301   mul(vform, zd, zn, temp);
   2302 }
   2303 
   2304 void Simulator::SimulateSVEMlaMlsIndex(const Instruction* instr) {
   2305   VectorFormat vform = instr->GetSVEVectorFormat();
   2306   SimVRegister& zda = ReadVRegister(instr->GetRd());
   2307   SimVRegister& zn = ReadVRegister(instr->GetRn());
   2308 
   2309   // The encoding for B and H-sized lanes are redefined to encode the most
   2310   // significant bit of index for H-sized lanes. B-sized lanes are not
   2311   // supported.
   2312   if (vform == kFormatVnB) vform = kFormatVnH;
   2313 
   2314   VIXL_ASSERT(
   2315       (form_hash_ == "mla_z_zzzi_d"_h) || (form_hash_ == "mla_z_zzzi_h"_h) ||
   2316       (form_hash_ == "mla_z_zzzi_s"_h) || (form_hash_ == "mls_z_zzzi_d"_h) ||
   2317       (form_hash_ == "mls_z_zzzi_h"_h) || (form_hash_ == "mls_z_zzzi_s"_h));
   2318 
   2319   SimVRegister temp;
   2320   dup_elements_to_segments(vform, temp, instr->GetSVEMulZmAndIndex());
   2321   if (instr->ExtractBit(10) == 0) {
   2322     mla(vform, zda, zda, zn, temp);
   2323   } else {
   2324     mls(vform, zda, zda, zn, temp);
   2325   }
   2326 }
   2327 
   2328 void Simulator::SimulateSVESaturatingMulHighIndex(const Instruction* instr) {
   2329   VectorFormat vform = instr->GetSVEVectorFormat();
   2330   SimVRegister& zd = ReadVRegister(instr->GetRd());
   2331   SimVRegister& zn = ReadVRegister(instr->GetRn());
   2332 
   2333   // The encoding for B and H-sized lanes are redefined to encode the most
   2334   // significant bit of index for H-sized lanes. B-sized lanes are not
   2335   // supported.
   2336   if (vform == kFormatVnB) {
   2337     vform = kFormatVnH;
   2338   }
   2339 
   2340   SimVRegister temp;
   2341   dup_elements_to_segments(vform, temp, instr->GetSVEMulZmAndIndex());
   2342   switch (form_hash_) {
   2343     case "sqdmulh_z_zzi_h"_h:
   2344     case "sqdmulh_z_zzi_s"_h:
   2345     case "sqdmulh_z_zzi_d"_h:
   2346       sqdmulh(vform, zd, zn, temp);
   2347       break;
   2348     case "sqrdmulh_z_zzi_h"_h:
   2349     case "sqrdmulh_z_zzi_s"_h:
   2350     case "sqrdmulh_z_zzi_d"_h:
   2351       sqrdmulh(vform, zd, zn, temp);
   2352       break;
   2353     default:
   2354       VIXL_UNIMPLEMENTED();
   2355   }
   2356 }
   2357 
   2358 void Simulator::SimulateSVESaturatingIntMulLongIdx(const Instruction* instr) {
   2359   VectorFormat vform = instr->GetSVEVectorFormat();
   2360   SimVRegister& zd = ReadVRegister(instr->GetRd());
   2361   SimVRegister& zn = ReadVRegister(instr->GetRn());
   2362 
   2363   SimVRegister temp, zm_idx, zn_b, zn_t;
   2364   // Instead of calling the indexed form of the instruction logic, we call the
   2365   // vector form, which can reuse existing function logic without modification.
   2366   // Select the specified elements based on the index input and than pack them
   2367   // to the corresponding position.
   2368   VectorFormat vform_half = VectorFormatHalfWidth(vform);
   2369   dup_elements_to_segments(vform_half, temp, instr->GetSVEMulLongZmAndIndex());
   2370   pack_even_elements(vform_half, zm_idx, temp);
   2371 
   2372   pack_even_elements(vform_half, zn_b, zn);
   2373   pack_odd_elements(vform_half, zn_t, zn);
   2374 
   2375   switch (form_hash_) {
   2376     case "smullb_z_zzi_s"_h:
   2377     case "smullb_z_zzi_d"_h:
   2378       smull(vform, zd, zn_b, zm_idx);
   2379       break;
   2380     case "smullt_z_zzi_s"_h:
   2381     case "smullt_z_zzi_d"_h:
   2382       smull(vform, zd, zn_t, zm_idx);
   2383       break;
   2384     case "sqdmullb_z_zzi_d"_h:
   2385       sqdmull(vform, zd, zn_b, zm_idx);
   2386       break;
   2387     case "sqdmullt_z_zzi_d"_h:
   2388       sqdmull(vform, zd, zn_t, zm_idx);
   2389       break;
   2390     case "umullb_z_zzi_s"_h:
   2391     case "umullb_z_zzi_d"_h:
   2392       umull(vform, zd, zn_b, zm_idx);
   2393       break;
   2394     case "umullt_z_zzi_s"_h:
   2395     case "umullt_z_zzi_d"_h:
   2396       umull(vform, zd, zn_t, zm_idx);
   2397       break;
   2398     case "sqdmullb_z_zzi_s"_h:
   2399       sqdmull(vform, zd, zn_b, zm_idx);
   2400       break;
   2401     case "sqdmullt_z_zzi_s"_h:
   2402       sqdmull(vform, zd, zn_t, zm_idx);
   2403       break;
   2404     case "smlalb_z_zzzi_s"_h:
   2405     case "smlalb_z_zzzi_d"_h:
   2406       smlal(vform, zd, zn_b, zm_idx);
   2407       break;
   2408     case "smlalt_z_zzzi_s"_h:
   2409     case "smlalt_z_zzzi_d"_h:
   2410       smlal(vform, zd, zn_t, zm_idx);
   2411       break;
   2412     case "smlslb_z_zzzi_s"_h:
   2413     case "smlslb_z_zzzi_d"_h:
   2414       smlsl(vform, zd, zn_b, zm_idx);
   2415       break;
   2416     case "smlslt_z_zzzi_s"_h:
   2417     case "smlslt_z_zzzi_d"_h:
   2418       smlsl(vform, zd, zn_t, zm_idx);
   2419       break;
   2420     case "umlalb_z_zzzi_s"_h:
   2421     case "umlalb_z_zzzi_d"_h:
   2422       umlal(vform, zd, zn_b, zm_idx);
   2423       break;
   2424     case "umlalt_z_zzzi_s"_h:
   2425     case "umlalt_z_zzzi_d"_h:
   2426       umlal(vform, zd, zn_t, zm_idx);
   2427       break;
   2428     case "umlslb_z_zzzi_s"_h:
   2429     case "umlslb_z_zzzi_d"_h:
   2430       umlsl(vform, zd, zn_b, zm_idx);
   2431       break;
   2432     case "umlslt_z_zzzi_s"_h:
   2433     case "umlslt_z_zzzi_d"_h:
   2434       umlsl(vform, zd, zn_t, zm_idx);
   2435       break;
   2436     default:
   2437       VIXL_UNIMPLEMENTED();
   2438   }
   2439 }
   2440 
   2441 void Simulator::Simulate_ZdH_PgM_ZnS(const Instruction* instr) {
   2442   SimPRegister& pg = ReadPRegister(instr->GetPgLow8());
   2443   SimVRegister& zd = ReadVRegister(instr->GetRd());
   2444   SimVRegister& zn = ReadVRegister(instr->GetRn());
   2445   SimVRegister result, zd_b;
   2446 
   2447   pack_even_elements(kFormatVnH, zd_b, zd);
   2448 
   2449   switch (form_hash_) {
   2450     case "fcvtnt_z_p_z_s2h"_h:
   2451       fcvt(kFormatVnH, kFormatVnS, result, pg, zn);
   2452       pack_even_elements(kFormatVnH, result, result);
   2453       zip1(kFormatVnH, result, zd_b, result);
   2454       break;
   2455     default:
   2456       VIXL_UNIMPLEMENTED();
   2457   }
   2458   mov_merging(kFormatVnS, zd, pg, result);
   2459 }
   2460 
   2461 void Simulator::Simulate_ZdS_PgM_ZnD(const Instruction* instr) {
   2462   SimPRegister& pg = ReadPRegister(instr->GetPgLow8());
   2463   SimVRegister& zd = ReadVRegister(instr->GetRd());
   2464   SimVRegister& zn = ReadVRegister(instr->GetRn());
   2465   SimVRegister result, zero, zd_b;
   2466 
   2467   zero.Clear();
   2468   pack_even_elements(kFormatVnS, zd_b, zd);
   2469 
   2470   switch (form_hash_) {
   2471     case "fcvtnt_z_p_z_d2s"_h:
   2472       fcvt(kFormatVnS, kFormatVnD, result, pg, zn);
   2473       pack_even_elements(kFormatVnS, result, result);
   2474       zip1(kFormatVnS, result, zd_b, result);
   2475       break;
   2476     case "fcvtx_z_p_z_d2s"_h:
   2477       fcvtxn(kFormatVnS, result, zn);
   2478       zip1(kFormatVnS, result, result, zero);
   2479       break;
   2480     case "fcvtxnt_z_p_z_d2s"_h:
   2481       fcvtxn(kFormatVnS, result, zn);
   2482       zip1(kFormatVnS, result, zd_b, result);
   2483       break;
   2484     default:
   2485       VIXL_UNIMPLEMENTED();
   2486   }
   2487   mov_merging(kFormatVnD, zd, pg, result);
   2488 }
   2489 
   2490 void Simulator::SimulateSVEFPConvertLong(const Instruction* instr) {
   2491   SimPRegister& pg = ReadPRegister(instr->GetPgLow8());
   2492   SimVRegister& zd = ReadVRegister(instr->GetRd());
   2493   SimVRegister& zn = ReadVRegister(instr->GetRn());
   2494   SimVRegister result;
   2495 
   2496   switch (form_hash_) {
   2497     case "fcvtlt_z_p_z_h2s"_h:
   2498       ext(kFormatVnB, result, zn, zn, kHRegSizeInBytes);
   2499       fcvt(kFormatVnS, kFormatVnH, zd, pg, result);
   2500       break;
   2501     case "fcvtlt_z_p_z_s2d"_h:
   2502       ext(kFormatVnB, result, zn, zn, kSRegSizeInBytes);
   2503       fcvt(kFormatVnD, kFormatVnS, zd, pg, result);
   2504       break;
   2505     default:
   2506       VIXL_UNIMPLEMENTED();
   2507   }
   2508 }
   2509 
   2510 void Simulator::Simulate_ZdS_PgM_ZnS(const Instruction* instr) {
   2511   VectorFormat vform = instr->GetSVEVectorFormat();
   2512   SimPRegister& pg = ReadPRegister(instr->GetPgLow8());
   2513   SimVRegister& zd = ReadVRegister(instr->GetRd());
   2514   SimVRegister& zn = ReadVRegister(instr->GetRn());
   2515   SimVRegister result;
   2516 
   2517   if (vform != kFormatVnS) {
   2518     VIXL_UNIMPLEMENTED();
   2519   }
   2520 
   2521   switch (form_hash_) {
   2522     case "urecpe_z_p_z"_h:
   2523       urecpe(vform, result, zn);
   2524       break;
   2525     case "ursqrte_z_p_z"_h:
   2526       ursqrte(vform, result, zn);
   2527       break;
   2528     default:
   2529       VIXL_UNIMPLEMENTED();
   2530   }
   2531   mov_merging(vform, zd, pg, result);
   2532 }
   2533 
   2534 void Simulator::Simulate_ZdT_PgM_ZnT(const Instruction* instr) {
   2535   VectorFormat vform = instr->GetSVEVectorFormat();
   2536   SimPRegister& pg = ReadPRegister(instr->GetPgLow8());
   2537   SimVRegister& zd = ReadVRegister(instr->GetRd());
   2538   SimVRegister& zn = ReadVRegister(instr->GetRn());
   2539   SimVRegister result;
   2540 
   2541   switch (form_hash_) {
   2542     case "flogb_z_p_z"_h:
   2543       vform = instr->GetSVEVectorFormat(17);
   2544       flogb(vform, result, zn);
   2545       break;
   2546     case "sqabs_z_p_z"_h:
   2547       abs(vform, result, zn).SignedSaturate(vform);
   2548       break;
   2549     case "sqneg_z_p_z"_h:
   2550       neg(vform, result, zn).SignedSaturate(vform);
   2551       break;
   2552     default:
   2553       VIXL_UNIMPLEMENTED();
   2554   }
   2555   mov_merging(vform, zd, pg, result);
   2556 }
   2557 
   2558 void Simulator::Simulate_ZdT_PgZ_ZnT_ZmT(const Instruction* instr) {
   2559   VectorFormat vform = instr->GetSVEVectorFormat();
   2560   SimPRegister& pg = ReadPRegister(instr->GetPgLow8());
   2561   SimVRegister& zd = ReadVRegister(instr->GetRd());
   2562   SimVRegister& zm = ReadVRegister(instr->GetRm());
   2563   SimVRegister& zn = ReadVRegister(instr->GetRn());
   2564   SimVRegister result;
   2565 
   2566   VIXL_ASSERT(form_hash_ == "histcnt_z_p_zz"_h);
   2567   if ((vform == kFormatVnS) || (vform == kFormatVnD)) {
   2568     histogram(vform, result, pg, zn, zm);
   2569     mov_zeroing(vform, zd, pg, result);
   2570   } else {
   2571     VIXL_UNIMPLEMENTED();
   2572   }
   2573 }
   2574 
   2575 void Simulator::Simulate_ZdT_ZnT_ZmT(const Instruction* instr) {
   2576   VectorFormat vform = instr->GetSVEVectorFormat();
   2577   SimVRegister& zd = ReadVRegister(instr->GetRd());
   2578   SimVRegister& zm = ReadVRegister(instr->GetRm());
   2579   SimVRegister& zn = ReadVRegister(instr->GetRn());
   2580   SimVRegister result;
   2581   bool do_bext = false;
   2582 
   2583   switch (form_hash_) {
   2584     case "bdep_z_zz"_h:
   2585       bdep(vform, zd, zn, zm);
   2586       break;
   2587     case "bext_z_zz"_h:
   2588       do_bext = true;
   2589       VIXL_FALLTHROUGH();
   2590     case "bgrp_z_zz"_h:
   2591       bgrp(vform, zd, zn, zm, do_bext);
   2592       break;
   2593     case "eorbt_z_zz"_h:
   2594       rotate_elements_right(vform, result, zm, 1);
   2595       SVEBitwiseLogicalUnpredicatedHelper(EOR, kFormatVnD, result, zn, result);
   2596       mov_alternating(vform, zd, result, 0);
   2597       break;
   2598     case "eortb_z_zz"_h:
   2599       rotate_elements_right(vform, result, zm, -1);
   2600       SVEBitwiseLogicalUnpredicatedHelper(EOR, kFormatVnD, result, zn, result);
   2601       mov_alternating(vform, zd, result, 1);
   2602       break;
   2603     case "mul_z_zz"_h:
   2604       mul(vform, zd, zn, zm);
   2605       break;
   2606     case "smulh_z_zz"_h:
   2607       smulh(vform, zd, zn, zm);
   2608       break;
   2609     case "sqdmulh_z_zz"_h:
   2610       sqdmulh(vform, zd, zn, zm);
   2611       break;
   2612     case "sqrdmulh_z_zz"_h:
   2613       sqrdmulh(vform, zd, zn, zm);
   2614       break;
   2615     case "umulh_z_zz"_h:
   2616       umulh(vform, zd, zn, zm);
   2617       break;
   2618     default:
   2619       VIXL_UNIMPLEMENTED();
   2620   }
   2621 }
   2622 
   2623 void Simulator::Simulate_ZdT_ZnT_ZmTb(const Instruction* instr) {
   2624   VectorFormat vform = instr->GetSVEVectorFormat();
   2625   SimVRegister& zd = ReadVRegister(instr->GetRd());
   2626   SimVRegister& zm = ReadVRegister(instr->GetRm());
   2627   SimVRegister& zn = ReadVRegister(instr->GetRn());
   2628 
   2629   SimVRegister zm_b, zm_t;
   2630   VectorFormat vform_half = VectorFormatHalfWidth(vform);
   2631   pack_even_elements(vform_half, zm_b, zm);
   2632   pack_odd_elements(vform_half, zm_t, zm);
   2633 
   2634   switch (form_hash_) {
   2635     case "saddwb_z_zz"_h:
   2636       saddw(vform, zd, zn, zm_b);
   2637       break;
   2638     case "saddwt_z_zz"_h:
   2639       saddw(vform, zd, zn, zm_t);
   2640       break;
   2641     case "ssubwb_z_zz"_h:
   2642       ssubw(vform, zd, zn, zm_b);
   2643       break;
   2644     case "ssubwt_z_zz"_h:
   2645       ssubw(vform, zd, zn, zm_t);
   2646       break;
   2647     case "uaddwb_z_zz"_h:
   2648       uaddw(vform, zd, zn, zm_b);
   2649       break;
   2650     case "uaddwt_z_zz"_h:
   2651       uaddw(vform, zd, zn, zm_t);
   2652       break;
   2653     case "usubwb_z_zz"_h:
   2654       usubw(vform, zd, zn, zm_b);
   2655       break;
   2656     case "usubwt_z_zz"_h:
   2657       usubw(vform, zd, zn, zm_t);
   2658       break;
   2659     default:
   2660       VIXL_UNIMPLEMENTED();
   2661   }
   2662 }
   2663 
   2664 void Simulator::Simulate_ZdT_ZnT_const(const Instruction* instr) {
   2665   SimVRegister& zd = ReadVRegister(instr->GetRd());
   2666   SimVRegister& zn = ReadVRegister(instr->GetRn());
   2667 
   2668   std::pair<int, int> shift_and_lane_size =
   2669       instr->GetSVEImmShiftAndLaneSizeLog2(/* is_predicated = */ false);
   2670   int lane_size = shift_and_lane_size.second;
   2671   VIXL_ASSERT((lane_size >= 0) &&
   2672               (static_cast<unsigned>(lane_size) <= kDRegSizeInBytesLog2));
   2673   VectorFormat vform = SVEFormatFromLaneSizeInBytesLog2(lane_size);
   2674   int shift_dist = shift_and_lane_size.first;
   2675 
   2676   switch (form_hash_) {
   2677     case "sli_z_zzi"_h:
   2678       // Shift distance is computed differently for left shifts. Convert the
   2679       // result.
   2680       shift_dist = (8 << lane_size) - shift_dist;
   2681       sli(vform, zd, zn, shift_dist);
   2682       break;
   2683     case "sri_z_zzi"_h:
   2684       sri(vform, zd, zn, shift_dist);
   2685       break;
   2686     default:
   2687       VIXL_UNIMPLEMENTED();
   2688   }
   2689 }
   2690 
   2691 void Simulator::SimulateSVENarrow(const Instruction* instr) {
   2692   SimVRegister& zd = ReadVRegister(instr->GetRd());
   2693   SimVRegister& zn = ReadVRegister(instr->GetRn());
   2694   SimVRegister result;
   2695 
   2696   std::pair<int, int> shift_and_lane_size =
   2697       instr->GetSVEImmShiftAndLaneSizeLog2(/* is_predicated = */ false);
   2698   int lane_size = shift_and_lane_size.second;
   2699   VIXL_ASSERT((lane_size >= static_cast<int>(kBRegSizeInBytesLog2)) &&
   2700               (lane_size <= static_cast<int>(kSRegSizeInBytesLog2)));
   2701   VectorFormat vform = SVEFormatFromLaneSizeInBytesLog2(lane_size);
   2702   int right_shift_dist = shift_and_lane_size.first;
   2703   bool top = false;
   2704 
   2705   switch (form_hash_) {
   2706     case "sqxtnt_z_zz"_h:
   2707       top = true;
   2708       VIXL_FALLTHROUGH();
   2709     case "sqxtnb_z_zz"_h:
   2710       sqxtn(vform, result, zn);
   2711       break;
   2712     case "sqxtunt_z_zz"_h:
   2713       top = true;
   2714       VIXL_FALLTHROUGH();
   2715     case "sqxtunb_z_zz"_h:
   2716       sqxtun(vform, result, zn);
   2717       break;
   2718     case "uqxtnt_z_zz"_h:
   2719       top = true;
   2720       VIXL_FALLTHROUGH();
   2721     case "uqxtnb_z_zz"_h:
   2722       uqxtn(vform, result, zn);
   2723       break;
   2724     case "rshrnt_z_zi"_h:
   2725       top = true;
   2726       VIXL_FALLTHROUGH();
   2727     case "rshrnb_z_zi"_h:
   2728       rshrn(vform, result, zn, right_shift_dist);
   2729       break;
   2730     case "shrnt_z_zi"_h:
   2731       top = true;
   2732       VIXL_FALLTHROUGH();
   2733     case "shrnb_z_zi"_h:
   2734       shrn(vform, result, zn, right_shift_dist);
   2735       break;
   2736     case "sqrshrnt_z_zi"_h:
   2737       top = true;
   2738       VIXL_FALLTHROUGH();
   2739     case "sqrshrnb_z_zi"_h:
   2740       sqrshrn(vform, result, zn, right_shift_dist);
   2741       break;
   2742     case "sqrshrunt_z_zi"_h:
   2743       top = true;
   2744       VIXL_FALLTHROUGH();
   2745     case "sqrshrunb_z_zi"_h:
   2746       sqrshrun(vform, result, zn, right_shift_dist);
   2747       break;
   2748     case "sqshrnt_z_zi"_h:
   2749       top = true;
   2750       VIXL_FALLTHROUGH();
   2751     case "sqshrnb_z_zi"_h:
   2752       sqshrn(vform, result, zn, right_shift_dist);
   2753       break;
   2754     case "sqshrunt_z_zi"_h:
   2755       top = true;
   2756       VIXL_FALLTHROUGH();
   2757     case "sqshrunb_z_zi"_h:
   2758       sqshrun(vform, result, zn, right_shift_dist);
   2759       break;
   2760     case "uqrshrnt_z_zi"_h:
   2761       top = true;
   2762       VIXL_FALLTHROUGH();
   2763     case "uqrshrnb_z_zi"_h:
   2764       uqrshrn(vform, result, zn, right_shift_dist);
   2765       break;
   2766     case "uqshrnt_z_zi"_h:
   2767       top = true;
   2768       VIXL_FALLTHROUGH();
   2769     case "uqshrnb_z_zi"_h:
   2770       uqshrn(vform, result, zn, right_shift_dist);
   2771       break;
   2772     default:
   2773       VIXL_UNIMPLEMENTED();
   2774   }
   2775 
   2776   if (top) {
   2777     // Keep even elements, replace odd elements with the results.
   2778     xtn(vform, zd, zd);
   2779     zip1(vform, zd, zd, result);
   2780   } else {
   2781     // Zero odd elements, replace even elements with the results.
   2782     SimVRegister zero;
   2783     zero.Clear();
   2784     zip1(vform, zd, result, zero);
   2785   }
   2786 }
   2787 
   2788 void Simulator::SimulateSVEInterleavedArithLong(const Instruction* instr) {
   2789   VectorFormat vform = instr->GetSVEVectorFormat();
   2790   SimVRegister& zd = ReadVRegister(instr->GetRd());
   2791   SimVRegister& zm = ReadVRegister(instr->GetRm());
   2792   SimVRegister& zn = ReadVRegister(instr->GetRn());
   2793   SimVRegister temp, zn_b, zm_b, zn_t, zm_t;
   2794 
   2795   // Construct temporary registers containing the even (bottom) and odd (top)
   2796   // elements.
   2797   VectorFormat vform_half = VectorFormatHalfWidth(vform);
   2798   pack_even_elements(vform_half, zn_b, zn);
   2799   pack_even_elements(vform_half, zm_b, zm);
   2800   pack_odd_elements(vform_half, zn_t, zn);
   2801   pack_odd_elements(vform_half, zm_t, zm);
   2802 
   2803   switch (form_hash_) {
   2804     case "sabdlb_z_zz"_h:
   2805       sabdl(vform, zd, zn_b, zm_b);
   2806       break;
   2807     case "sabdlt_z_zz"_h:
   2808       sabdl(vform, zd, zn_t, zm_t);
   2809       break;
   2810     case "saddlb_z_zz"_h:
   2811       saddl(vform, zd, zn_b, zm_b);
   2812       break;
   2813     case "saddlbt_z_zz"_h:
   2814       saddl(vform, zd, zn_b, zm_t);
   2815       break;
   2816     case "saddlt_z_zz"_h:
   2817       saddl(vform, zd, zn_t, zm_t);
   2818       break;
   2819     case "ssublb_z_zz"_h:
   2820       ssubl(vform, zd, zn_b, zm_b);
   2821       break;
   2822     case "ssublbt_z_zz"_h:
   2823       ssubl(vform, zd, zn_b, zm_t);
   2824       break;
   2825     case "ssublt_z_zz"_h:
   2826       ssubl(vform, zd, zn_t, zm_t);
   2827       break;
   2828     case "ssubltb_z_zz"_h:
   2829       ssubl(vform, zd, zn_t, zm_b);
   2830       break;
   2831     case "uabdlb_z_zz"_h:
   2832       uabdl(vform, zd, zn_b, zm_b);
   2833       break;
   2834     case "uabdlt_z_zz"_h:
   2835       uabdl(vform, zd, zn_t, zm_t);
   2836       break;
   2837     case "uaddlb_z_zz"_h:
   2838       uaddl(vform, zd, zn_b, zm_b);
   2839       break;
   2840     case "uaddlt_z_zz"_h:
   2841       uaddl(vform, zd, zn_t, zm_t);
   2842       break;
   2843     case "usublb_z_zz"_h:
   2844       usubl(vform, zd, zn_b, zm_b);
   2845       break;
   2846     case "usublt_z_zz"_h:
   2847       usubl(vform, zd, zn_t, zm_t);
   2848       break;
   2849     case "sabalb_z_zzz"_h:
   2850       sabal(vform, zd, zn_b, zm_b);
   2851       break;
   2852     case "sabalt_z_zzz"_h:
   2853       sabal(vform, zd, zn_t, zm_t);
   2854       break;
   2855     case "uabalb_z_zzz"_h:
   2856       uabal(vform, zd, zn_b, zm_b);
   2857       break;
   2858     case "uabalt_z_zzz"_h:
   2859       uabal(vform, zd, zn_t, zm_t);
   2860       break;
   2861     default:
   2862       VIXL_UNIMPLEMENTED();
   2863   }
   2864 }
   2865 
   2866 void Simulator::SimulateSVEIntMulLongVec(const Instruction* instr) {
   2867   VectorFormat vform = instr->GetSVEVectorFormat();
   2868   SimVRegister& zd = ReadVRegister(instr->GetRd());
   2869   SimVRegister& zm = ReadVRegister(instr->GetRm());
   2870   SimVRegister& zn = ReadVRegister(instr->GetRn());
   2871   SimVRegister temp, zn_b, zm_b, zn_t, zm_t;
   2872   VectorFormat vform_half = VectorFormatHalfWidth(vform);
   2873   pack_even_elements(vform_half, zn_b, zn);
   2874   pack_even_elements(vform_half, zm_b, zm);
   2875   pack_odd_elements(vform_half, zn_t, zn);
   2876   pack_odd_elements(vform_half, zm_t, zm);
   2877 
   2878   switch (form_hash_) {
   2879     case "pmullb_z_zz"_h:
   2880       // '00' is reserved for Q-sized lane.
   2881       if (vform == kFormatVnB) {
   2882         VIXL_UNIMPLEMENTED();
   2883       }
   2884       pmull(vform, zd, zn_b, zm_b);
   2885       break;
   2886     case "pmullt_z_zz"_h:
   2887       // '00' is reserved for Q-sized lane.
   2888       if (vform == kFormatVnB) {
   2889         VIXL_UNIMPLEMENTED();
   2890       }
   2891       pmull(vform, zd, zn_t, zm_t);
   2892       break;
   2893     case "smullb_z_zz"_h:
   2894       smull(vform, zd, zn_b, zm_b);
   2895       break;
   2896     case "smullt_z_zz"_h:
   2897       smull(vform, zd, zn_t, zm_t);
   2898       break;
   2899     case "sqdmullb_z_zz"_h:
   2900       sqdmull(vform, zd, zn_b, zm_b);
   2901       break;
   2902     case "sqdmullt_z_zz"_h:
   2903       sqdmull(vform, zd, zn_t, zm_t);
   2904       break;
   2905     case "umullb_z_zz"_h:
   2906       umull(vform, zd, zn_b, zm_b);
   2907       break;
   2908     case "umullt_z_zz"_h:
   2909       umull(vform, zd, zn_t, zm_t);
   2910       break;
   2911     default:
   2912       VIXL_UNIMPLEMENTED();
   2913   }
   2914 }
   2915 
   2916 void Simulator::SimulateSVEAddSubHigh(const Instruction* instr) {
   2917   SimVRegister& zd = ReadVRegister(instr->GetRd());
   2918   SimVRegister& zm = ReadVRegister(instr->GetRm());
   2919   SimVRegister& zn = ReadVRegister(instr->GetRn());
   2920   SimVRegister result;
   2921   bool top = false;
   2922 
   2923   VectorFormat vform_src = instr->GetSVEVectorFormat();
   2924   if (vform_src == kFormatVnB) {
   2925     VIXL_UNIMPLEMENTED();
   2926   }
   2927   VectorFormat vform = VectorFormatHalfWidth(vform_src);
   2928 
   2929   switch (form_hash_) {
   2930     case "addhnt_z_zz"_h:
   2931       top = true;
   2932       VIXL_FALLTHROUGH();
   2933     case "addhnb_z_zz"_h:
   2934       addhn(vform, result, zn, zm);
   2935       break;
   2936     case "raddhnt_z_zz"_h:
   2937       top = true;
   2938       VIXL_FALLTHROUGH();
   2939     case "raddhnb_z_zz"_h:
   2940       raddhn(vform, result, zn, zm);
   2941       break;
   2942     case "rsubhnt_z_zz"_h:
   2943       top = true;
   2944       VIXL_FALLTHROUGH();
   2945     case "rsubhnb_z_zz"_h:
   2946       rsubhn(vform, result, zn, zm);
   2947       break;
   2948     case "subhnt_z_zz"_h:
   2949       top = true;
   2950       VIXL_FALLTHROUGH();
   2951     case "subhnb_z_zz"_h:
   2952       subhn(vform, result, zn, zm);
   2953       break;
   2954     default:
   2955       VIXL_UNIMPLEMENTED();
   2956   }
   2957 
   2958   if (top) {
   2959     // Keep even elements, replace odd elements with the results.
   2960     xtn(vform, zd, zd);
   2961     zip1(vform, zd, zd, result);
   2962   } else {
   2963     // Zero odd elements, replace even elements with the results.
   2964     SimVRegister zero;
   2965     zero.Clear();
   2966     zip1(vform, zd, result, zero);
   2967   }
   2968 }
   2969 
   2970 void Simulator::SimulateSVEShiftLeftImm(const Instruction* instr) {
   2971   SimVRegister& zd = ReadVRegister(instr->GetRd());
   2972   SimVRegister& zn = ReadVRegister(instr->GetRn());
   2973   SimVRegister zn_b, zn_t;
   2974 
   2975   std::pair<int, int> shift_and_lane_size =
   2976       instr->GetSVEImmShiftAndLaneSizeLog2(/* is_predicated = */ false);
   2977   int lane_size = shift_and_lane_size.second;
   2978   VIXL_ASSERT((lane_size >= 0) &&
   2979               (static_cast<unsigned>(lane_size) <= kDRegSizeInBytesLog2));
   2980   VectorFormat vform = SVEFormatFromLaneSizeInBytesLog2(lane_size + 1);
   2981   int right_shift_dist = shift_and_lane_size.first;
   2982   int left_shift_dist = (8 << lane_size) - right_shift_dist;
   2983 
   2984   // Construct temporary registers containing the even (bottom) and odd (top)
   2985   // elements.
   2986   VectorFormat vform_half = VectorFormatHalfWidth(vform);
   2987   pack_even_elements(vform_half, zn_b, zn);
   2988   pack_odd_elements(vform_half, zn_t, zn);
   2989 
   2990   switch (form_hash_) {
   2991     case "sshllb_z_zi"_h:
   2992       sshll(vform, zd, zn_b, left_shift_dist);
   2993       break;
   2994     case "sshllt_z_zi"_h:
   2995       sshll(vform, zd, zn_t, left_shift_dist);
   2996       break;
   2997     case "ushllb_z_zi"_h:
   2998       ushll(vform, zd, zn_b, left_shift_dist);
   2999       break;
   3000     case "ushllt_z_zi"_h:
   3001       ushll(vform, zd, zn_t, left_shift_dist);
   3002       break;
   3003     default:
   3004       VIXL_UNIMPLEMENTED();
   3005   }
   3006 }
   3007 
   3008 void Simulator::SimulateSVESaturatingMulAddHigh(const Instruction* instr) {
   3009   VectorFormat vform = instr->GetSVEVectorFormat();
   3010   SimVRegister& zda = ReadVRegister(instr->GetRd());
   3011   SimVRegister& zn = ReadVRegister(instr->GetRn());
   3012   unsigned zm_code = instr->GetRm();
   3013   int index = -1;
   3014   bool is_mla = false;
   3015 
   3016   switch (form_hash_) {
   3017     case "sqrdmlah_z_zzz"_h:
   3018       is_mla = true;
   3019       VIXL_FALLTHROUGH();
   3020     case "sqrdmlsh_z_zzz"_h:
   3021       // Nothing to do.
   3022       break;
   3023     case "sqrdmlah_z_zzzi_h"_h:
   3024       is_mla = true;
   3025       VIXL_FALLTHROUGH();
   3026     case "sqrdmlsh_z_zzzi_h"_h:
   3027       vform = kFormatVnH;
   3028       index = (instr->ExtractBit(22) << 2) | instr->ExtractBits(20, 19);
   3029       zm_code = instr->ExtractBits(18, 16);
   3030       break;
   3031     case "sqrdmlah_z_zzzi_s"_h:
   3032       is_mla = true;
   3033       VIXL_FALLTHROUGH();
   3034     case "sqrdmlsh_z_zzzi_s"_h:
   3035       vform = kFormatVnS;
   3036       index = instr->ExtractBits(20, 19);
   3037       zm_code = instr->ExtractBits(18, 16);
   3038       break;
   3039     case "sqrdmlah_z_zzzi_d"_h:
   3040       is_mla = true;
   3041       VIXL_FALLTHROUGH();
   3042     case "sqrdmlsh_z_zzzi_d"_h:
   3043       vform = kFormatVnD;
   3044       index = instr->ExtractBit(20);
   3045       zm_code = instr->ExtractBits(19, 16);
   3046       break;
   3047     default:
   3048       VIXL_UNIMPLEMENTED();
   3049   }
   3050 
   3051   SimVRegister& zm = ReadVRegister(zm_code);
   3052   SimVRegister zm_idx;
   3053   if (index >= 0) {
   3054     dup_elements_to_segments(vform, zm_idx, zm, index);
   3055   }
   3056 
   3057   if (is_mla) {
   3058     sqrdmlah(vform, zda, zn, (index >= 0) ? zm_idx : zm);
   3059   } else {
   3060     sqrdmlsh(vform, zda, zn, (index >= 0) ? zm_idx : zm);
   3061   }
   3062 }
   3063 
   3064 void Simulator::Simulate_ZdaD_ZnS_ZmS_imm(const Instruction* instr) {
   3065   SimVRegister& zda = ReadVRegister(instr->GetRd());
   3066   SimVRegister& zn = ReadVRegister(instr->GetRn());
   3067   SimVRegister& zm = ReadVRegister(instr->ExtractBits(19, 16));
   3068 
   3069   SimVRegister temp, zm_idx, zn_b, zn_t;
   3070   Instr index = (instr->ExtractBit(20) << 1) | instr->ExtractBit(11);
   3071   dup_elements_to_segments(kFormatVnS, temp, zm, index);
   3072   pack_even_elements(kFormatVnS, zm_idx, temp);
   3073   pack_even_elements(kFormatVnS, zn_b, zn);
   3074   pack_odd_elements(kFormatVnS, zn_t, zn);
   3075 
   3076   switch (form_hash_) {
   3077     case "sqdmlalb_z_zzzi_d"_h:
   3078       sqdmlal(kFormatVnD, zda, zn_b, zm_idx);
   3079       break;
   3080     case "sqdmlalt_z_zzzi_d"_h:
   3081       sqdmlal(kFormatVnD, zda, zn_t, zm_idx);
   3082       break;
   3083     case "sqdmlslb_z_zzzi_d"_h:
   3084       sqdmlsl(kFormatVnD, zda, zn_b, zm_idx);
   3085       break;
   3086     case "sqdmlslt_z_zzzi_d"_h:
   3087       sqdmlsl(kFormatVnD, zda, zn_t, zm_idx);
   3088       break;
   3089     default:
   3090       VIXL_UNIMPLEMENTED();
   3091   }
   3092 }
   3093 
   3094 void Simulator::Simulate_ZdaS_ZnH_ZmH(const Instruction* instr) {
   3095   SimVRegister& zda = ReadVRegister(instr->GetRd());
   3096   SimVRegister& zm = ReadVRegister(instr->GetRm());
   3097   SimVRegister& zn = ReadVRegister(instr->GetRn());
   3098 
   3099   SimVRegister temp, zn_b, zm_b, zn_t, zm_t;
   3100   pack_even_elements(kFormatVnH, zn_b, zn);
   3101   pack_even_elements(kFormatVnH, zm_b, zm);
   3102   pack_odd_elements(kFormatVnH, zn_t, zn);
   3103   pack_odd_elements(kFormatVnH, zm_t, zm);
   3104 
   3105   switch (form_hash_) {
   3106     case "fmlalb_z_zzz"_h:
   3107       fmlal(kFormatVnS, zda, zn_b, zm_b);
   3108       break;
   3109     case "fmlalt_z_zzz"_h:
   3110       fmlal(kFormatVnS, zda, zn_t, zm_t);
   3111       break;
   3112     case "fmlslb_z_zzz"_h:
   3113       fmlsl(kFormatVnS, zda, zn_b, zm_b);
   3114       break;
   3115     case "fmlslt_z_zzz"_h:
   3116       fmlsl(kFormatVnS, zda, zn_t, zm_t);
   3117       break;
   3118     default:
   3119       VIXL_UNIMPLEMENTED();
   3120   }
   3121 }
   3122 
   3123 void Simulator::Simulate_ZdaS_ZnH_ZmH_imm(const Instruction* instr) {
   3124   SimVRegister& zda = ReadVRegister(instr->GetRd());
   3125   SimVRegister& zn = ReadVRegister(instr->GetRn());
   3126   SimVRegister& zm = ReadVRegister(instr->ExtractBits(18, 16));
   3127 
   3128   SimVRegister temp, zm_idx, zn_b, zn_t;
   3129   Instr index = (instr->ExtractBits(20, 19) << 1) | instr->ExtractBit(11);
   3130   dup_elements_to_segments(kFormatVnH, temp, zm, index);
   3131   pack_even_elements(kFormatVnH, zm_idx, temp);
   3132   pack_even_elements(kFormatVnH, zn_b, zn);
   3133   pack_odd_elements(kFormatVnH, zn_t, zn);
   3134 
   3135   switch (form_hash_) {
   3136     case "fmlalb_z_zzzi_s"_h:
   3137       fmlal(kFormatVnS, zda, zn_b, zm_idx);
   3138       break;
   3139     case "fmlalt_z_zzzi_s"_h:
   3140       fmlal(kFormatVnS, zda, zn_t, zm_idx);
   3141       break;
   3142     case "fmlslb_z_zzzi_s"_h:
   3143       fmlsl(kFormatVnS, zda, zn_b, zm_idx);
   3144       break;
   3145     case "fmlslt_z_zzzi_s"_h:
   3146       fmlsl(kFormatVnS, zda, zn_t, zm_idx);
   3147       break;
   3148     case "sqdmlalb_z_zzzi_s"_h:
   3149       sqdmlal(kFormatVnS, zda, zn_b, zm_idx);
   3150       break;
   3151     case "sqdmlalt_z_zzzi_s"_h:
   3152       sqdmlal(kFormatVnS, zda, zn_t, zm_idx);
   3153       break;
   3154     case "sqdmlslb_z_zzzi_s"_h:
   3155       sqdmlsl(kFormatVnS, zda, zn_b, zm_idx);
   3156       break;
   3157     case "sqdmlslt_z_zzzi_s"_h:
   3158       sqdmlsl(kFormatVnS, zda, zn_t, zm_idx);
   3159       break;
   3160     default:
   3161       VIXL_UNIMPLEMENTED();
   3162   }
   3163 }
   3164 
   3165 void Simulator::Simulate_ZdaT_PgM_ZnTb(const Instruction* instr) {
   3166   VectorFormat vform = instr->GetSVEVectorFormat();
   3167   SimPRegister& pg = ReadPRegister(instr->GetPgLow8());
   3168   SimVRegister& zda = ReadVRegister(instr->GetRd());
   3169   SimVRegister& zn = ReadVRegister(instr->GetRn());
   3170   SimVRegister result;
   3171 
   3172   switch (form_hash_) {
   3173     case "sadalp_z_p_z"_h:
   3174       sadalp(vform, result, zn);
   3175       break;
   3176     case "uadalp_z_p_z"_h:
   3177       uadalp(vform, result, zn);
   3178       break;
   3179     default:
   3180       VIXL_UNIMPLEMENTED();
   3181   }
   3182   mov_merging(vform, zda, pg, result);
   3183 }
   3184 
   3185 void Simulator::SimulateSVEAddSubCarry(const Instruction* instr) {
   3186   VectorFormat vform = (instr->ExtractBit(22) == 0) ? kFormatVnS : kFormatVnD;
   3187   SimVRegister& zda = ReadVRegister(instr->GetRd());
   3188   SimVRegister& zm = ReadVRegister(instr->GetRm());
   3189   SimVRegister& zn = ReadVRegister(instr->GetRn());
   3190 
   3191   SimVRegister not_zn;
   3192   not_(vform, not_zn, zn);
   3193 
   3194   switch (form_hash_) {
   3195     case "adclb_z_zzz"_h:
   3196       adcl(vform, zda, zn, zm, /* top = */ false);
   3197       break;
   3198     case "adclt_z_zzz"_h:
   3199       adcl(vform, zda, zn, zm, /* top = */ true);
   3200       break;
   3201     case "sbclb_z_zzz"_h:
   3202       adcl(vform, zda, not_zn, zm, /* top = */ false);
   3203       break;
   3204     case "sbclt_z_zzz"_h:
   3205       adcl(vform, zda, not_zn, zm, /* top = */ true);
   3206       break;
   3207     default:
   3208       VIXL_UNIMPLEMENTED();
   3209   }
   3210 }
   3211 
   3212 void Simulator::Simulate_ZdaT_ZnT_ZmT(const Instruction* instr) {
   3213   VectorFormat vform = instr->GetSVEVectorFormat();
   3214   SimVRegister& zda = ReadVRegister(instr->GetRd());
   3215   SimVRegister& zm = ReadVRegister(instr->GetRm());
   3216   SimVRegister& zn = ReadVRegister(instr->GetRn());
   3217 
   3218   switch (form_hash_) {
   3219     case "saba_z_zzz"_h:
   3220       saba(vform, zda, zn, zm);
   3221       break;
   3222     case "uaba_z_zzz"_h:
   3223       uaba(vform, zda, zn, zm);
   3224       break;
   3225     default:
   3226       VIXL_UNIMPLEMENTED();
   3227   }
   3228 }
   3229 
   3230 void Simulator::SimulateSVEComplexIntMulAdd(const Instruction* instr) {
   3231   SimVRegister& zda = ReadVRegister(instr->GetRd());
   3232   SimVRegister& zn = ReadVRegister(instr->GetRn());
   3233   int rot = instr->ExtractBits(11, 10) * 90;
   3234   // vform and zm are only valid for the vector form of instruction.
   3235   VectorFormat vform = instr->GetSVEVectorFormat();
   3236   SimVRegister& zm = ReadVRegister(instr->GetRm());
   3237 
   3238   // Inputs for indexed form of instruction.
   3239   SimVRegister& zm_h = ReadVRegister(instr->ExtractBits(18, 16));
   3240   SimVRegister& zm_s = ReadVRegister(instr->ExtractBits(19, 16));
   3241   int idx_h = instr->ExtractBits(20, 19);
   3242   int idx_s = instr->ExtractBit(20);
   3243 
   3244   switch (form_hash_) {
   3245     case "cmla_z_zzz"_h:
   3246       cmla(vform, zda, zda, zn, zm, rot);
   3247       break;
   3248     case "cmla_z_zzzi_h"_h:
   3249       cmla(kFormatVnH, zda, zda, zn, zm_h, idx_h, rot);
   3250       break;
   3251     case "cmla_z_zzzi_s"_h:
   3252       cmla(kFormatVnS, zda, zda, zn, zm_s, idx_s, rot);
   3253       break;
   3254     case "sqrdcmlah_z_zzz"_h:
   3255       sqrdcmlah(vform, zda, zda, zn, zm, rot);
   3256       break;
   3257     case "sqrdcmlah_z_zzzi_h"_h:
   3258       sqrdcmlah(kFormatVnH, zda, zda, zn, zm_h, idx_h, rot);
   3259       break;
   3260     case "sqrdcmlah_z_zzzi_s"_h:
   3261       sqrdcmlah(kFormatVnS, zda, zda, zn, zm_s, idx_s, rot);
   3262       break;
   3263     default:
   3264       VIXL_UNIMPLEMENTED();
   3265   }
   3266 }
   3267 
   3268 void Simulator::Simulate_ZdaT_ZnT_const(const Instruction* instr) {
   3269   SimVRegister& zd = ReadVRegister(instr->GetRd());
   3270   SimVRegister& zn = ReadVRegister(instr->GetRn());
   3271 
   3272   std::pair<int, int> shift_and_lane_size =
   3273       instr->GetSVEImmShiftAndLaneSizeLog2(/* is_predicated = */ false);
   3274   int lane_size = shift_and_lane_size.second;
   3275   VIXL_ASSERT((lane_size >= 0) &&
   3276               (static_cast<unsigned>(lane_size) <= kDRegSizeInBytesLog2));
   3277   VectorFormat vform = SVEFormatFromLaneSizeInBytesLog2(lane_size);
   3278   int shift_dist = shift_and_lane_size.first;
   3279 
   3280   switch (form_hash_) {
   3281     case "srsra_z_zi"_h:
   3282       srsra(vform, zd, zn, shift_dist);
   3283       break;
   3284     case "ssra_z_zi"_h:
   3285       ssra(vform, zd, zn, shift_dist);
   3286       break;
   3287     case "ursra_z_zi"_h:
   3288       ursra(vform, zd, zn, shift_dist);
   3289       break;
   3290     case "usra_z_zi"_h:
   3291       usra(vform, zd, zn, shift_dist);
   3292       break;
   3293     default:
   3294       VIXL_UNIMPLEMENTED();
   3295   }
   3296 }
   3297 
   3298 void Simulator::Simulate_ZdaT_ZnTb_ZmTb(const Instruction* instr) {
   3299   VectorFormat vform = instr->GetSVEVectorFormat();
   3300   SimVRegister& zda = ReadVRegister(instr->GetRd());
   3301   SimVRegister& zm = ReadVRegister(instr->GetRm());
   3302   SimVRegister& zn = ReadVRegister(instr->GetRn());
   3303 
   3304   SimVRegister zero, zn_b, zm_b, zn_t, zm_t;
   3305   zero.Clear();
   3306 
   3307   VectorFormat vform_half = VectorFormatHalfWidth(vform);
   3308   uzp1(vform_half, zn_b, zn, zero);
   3309   uzp1(vform_half, zm_b, zm, zero);
   3310   uzp2(vform_half, zn_t, zn, zero);
   3311   uzp2(vform_half, zm_t, zm, zero);
   3312 
   3313   switch (form_hash_) {
   3314     case "smlalb_z_zzz"_h:
   3315       smlal(vform, zda, zn_b, zm_b);
   3316       break;
   3317     case "smlalt_z_zzz"_h:
   3318       smlal(vform, zda, zn_t, zm_t);
   3319       break;
   3320     case "smlslb_z_zzz"_h:
   3321       smlsl(vform, zda, zn_b, zm_b);
   3322       break;
   3323     case "smlslt_z_zzz"_h:
   3324       smlsl(vform, zda, zn_t, zm_t);
   3325       break;
   3326     case "sqdmlalb_z_zzz"_h:
   3327       sqdmlal(vform, zda, zn_b, zm_b);
   3328       break;
   3329     case "sqdmlalbt_z_zzz"_h:
   3330       sqdmlal(vform, zda, zn_b, zm_t);
   3331       break;
   3332     case "sqdmlalt_z_zzz"_h:
   3333       sqdmlal(vform, zda, zn_t, zm_t);
   3334       break;
   3335     case "sqdmlslb_z_zzz"_h:
   3336       sqdmlsl(vform, zda, zn_b, zm_b);
   3337       break;
   3338     case "sqdmlslbt_z_zzz"_h:
   3339       sqdmlsl(vform, zda, zn_b, zm_t);
   3340       break;
   3341     case "sqdmlslt_z_zzz"_h:
   3342       sqdmlsl(vform, zda, zn_t, zm_t);
   3343       break;
   3344     case "umlalb_z_zzz"_h:
   3345       umlal(vform, zda, zn_b, zm_b);
   3346       break;
   3347     case "umlalt_z_zzz"_h:
   3348       umlal(vform, zda, zn_t, zm_t);
   3349       break;
   3350     case "umlslb_z_zzz"_h:
   3351       umlsl(vform, zda, zn_b, zm_b);
   3352       break;
   3353     case "umlslt_z_zzz"_h:
   3354       umlsl(vform, zda, zn_t, zm_t);
   3355       break;
   3356     default:
   3357       VIXL_UNIMPLEMENTED();
   3358   }
   3359 }
   3360 
   3361 void Simulator::SimulateSVEComplexDotProduct(const Instruction* instr) {
   3362   VectorFormat vform = instr->GetSVEVectorFormat();
   3363   SimVRegister& zda = ReadVRegister(instr->GetRd());
   3364   SimVRegister& zn = ReadVRegister(instr->GetRn());
   3365   int rot = instr->ExtractBits(11, 10) * 90;
   3366   unsigned zm_code = instr->GetRm();
   3367   int index = -1;
   3368 
   3369   switch (form_hash_) {
   3370     case "cdot_z_zzz"_h:
   3371       // Nothing to do.
   3372       break;
   3373     case "cdot_z_zzzi_s"_h:
   3374       index = zm_code >> 3;
   3375       zm_code &= 0x7;
   3376       break;
   3377     case "cdot_z_zzzi_d"_h:
   3378       index = zm_code >> 4;
   3379       zm_code &= 0xf;
   3380       break;
   3381     default:
   3382       VIXL_UNIMPLEMENTED();
   3383   }
   3384 
   3385   SimVRegister temp;
   3386   SimVRegister& zm = ReadVRegister(zm_code);
   3387   if (index >= 0) dup_elements_to_segments(vform, temp, zm, index);
   3388   cdot(vform, zda, zda, zn, (index >= 0) ? temp : zm, rot);
   3389 }
   3390 
   3391 void Simulator::SimulateSVEBitwiseTernary(const Instruction* instr) {
   3392   VectorFormat vform = kFormatVnD;
   3393   SimVRegister& zdn = ReadVRegister(instr->GetRd());
   3394   SimVRegister& zm = ReadVRegister(instr->GetRm());
   3395   SimVRegister& zk = ReadVRegister(instr->GetRn());
   3396   SimVRegister temp;
   3397 
   3398   switch (form_hash_) {
   3399     case "bcax_z_zzz"_h:
   3400       bic(vform, temp, zm, zk);
   3401       eor(vform, zdn, temp, zdn);
   3402       break;
   3403     case "bsl1n_z_zzz"_h:
   3404       not_(vform, temp, zdn);
   3405       bsl(vform, zdn, zk, temp, zm);
   3406       break;
   3407     case "bsl2n_z_zzz"_h:
   3408       not_(vform, temp, zm);
   3409       bsl(vform, zdn, zk, zdn, temp);
   3410       break;
   3411     case "bsl_z_zzz"_h:
   3412       bsl(vform, zdn, zk, zdn, zm);
   3413       break;
   3414     case "eor3_z_zzz"_h:
   3415       eor(vform, temp, zdn, zm);
   3416       eor(vform, zdn, temp, zk);
   3417       break;
   3418     case "nbsl_z_zzz"_h:
   3419       bsl(vform, zdn, zk, zdn, zm);
   3420       not_(vform, zdn, zdn);
   3421       break;
   3422     default:
   3423       VIXL_UNIMPLEMENTED();
   3424   }
   3425 }
   3426 
   3427 void Simulator::SimulateSVEHalvingAddSub(const Instruction* instr) {
   3428   VectorFormat vform = instr->GetSVEVectorFormat();
   3429   SimPRegister& pg = ReadPRegister(instr->GetPgLow8());
   3430   SimVRegister& zdn = ReadVRegister(instr->GetRd());
   3431   SimVRegister& zm = ReadVRegister(instr->GetRn());
   3432   SimVRegister result;
   3433 
   3434   switch (form_hash_) {
   3435     case "shadd_z_p_zz"_h:
   3436       add(vform, result, zdn, zm).Halve(vform);
   3437       break;
   3438     case "shsub_z_p_zz"_h:
   3439       sub(vform, result, zdn, zm).Halve(vform);
   3440       break;
   3441     case "shsubr_z_p_zz"_h:
   3442       sub(vform, result, zm, zdn).Halve(vform);
   3443       break;
   3444     case "srhadd_z_p_zz"_h:
   3445       add(vform, result, zdn, zm).Halve(vform).Round(vform);
   3446       break;
   3447     case "uhadd_z_p_zz"_h:
   3448       add(vform, result, zdn, zm).Uhalve(vform);
   3449       break;
   3450     case "uhsub_z_p_zz"_h:
   3451       sub(vform, result, zdn, zm).Uhalve(vform);
   3452       break;
   3453     case "uhsubr_z_p_zz"_h:
   3454       sub(vform, result, zm, zdn).Uhalve(vform);
   3455       break;
   3456     case "urhadd_z_p_zz"_h:
   3457       add(vform, result, zdn, zm).Uhalve(vform).Round(vform);
   3458       break;
   3459     default:
   3460       VIXL_UNIMPLEMENTED();
   3461       break;
   3462   }
   3463   mov_merging(vform, zdn, pg, result);
   3464 }
   3465 
   3466 void Simulator::SimulateSVESaturatingArithmetic(const Instruction* instr) {
   3467   VectorFormat vform = instr->GetSVEVectorFormat();
   3468   SimVRegister& zdn = ReadVRegister(instr->GetRd());
   3469   SimVRegister& zm = ReadVRegister(instr->GetRn());
   3470   SimPRegister& pg = ReadPRegister(instr->GetPgLow8());
   3471   SimVRegister result;
   3472 
   3473   switch (form_hash_) {
   3474     case "sqadd_z_p_zz"_h:
   3475       add(vform, result, zdn, zm).SignedSaturate(vform);
   3476       break;
   3477     case "sqsub_z_p_zz"_h:
   3478       sub(vform, result, zdn, zm).SignedSaturate(vform);
   3479       break;
   3480     case "sqsubr_z_p_zz"_h:
   3481       sub(vform, result, zm, zdn).SignedSaturate(vform);
   3482       break;
   3483     case "suqadd_z_p_zz"_h:
   3484       suqadd(vform, result, zdn, zm);
   3485       break;
   3486     case "uqadd_z_p_zz"_h:
   3487       add(vform, result, zdn, zm).UnsignedSaturate(vform);
   3488       break;
   3489     case "uqsub_z_p_zz"_h:
   3490       sub(vform, result, zdn, zm).UnsignedSaturate(vform);
   3491       break;
   3492     case "uqsubr_z_p_zz"_h:
   3493       sub(vform, result, zm, zdn).UnsignedSaturate(vform);
   3494       break;
   3495     case "usqadd_z_p_zz"_h:
   3496       usqadd(vform, result, zdn, zm);
   3497       break;
   3498     default:
   3499       VIXL_UNIMPLEMENTED();
   3500       break;
   3501   }
   3502   mov_merging(vform, zdn, pg, result);
   3503 }
   3504 
   3505 void Simulator::SimulateSVEIntArithPair(const Instruction* instr) {
   3506   VectorFormat vform = instr->GetSVEVectorFormat();
   3507   SimPRegister& pg = ReadPRegister(instr->GetPgLow8());
   3508   SimVRegister& zdn = ReadVRegister(instr->GetRd());
   3509   SimVRegister& zm = ReadVRegister(instr->GetRn());
   3510   SimVRegister result;
   3511 
   3512   switch (form_hash_) {
   3513     case "addp_z_p_zz"_h:
   3514       addp(vform, result, zdn, zm);
   3515       break;
   3516     case "smaxp_z_p_zz"_h:
   3517       smaxp(vform, result, zdn, zm);
   3518       break;
   3519     case "sminp_z_p_zz"_h:
   3520       sminp(vform, result, zdn, zm);
   3521       break;
   3522     case "umaxp_z_p_zz"_h:
   3523       umaxp(vform, result, zdn, zm);
   3524       break;
   3525     case "uminp_z_p_zz"_h:
   3526       uminp(vform, result, zdn, zm);
   3527       break;
   3528     default:
   3529       VIXL_UNIMPLEMENTED();
   3530       break;
   3531   }
   3532   mov_merging(vform, zdn, pg, result);
   3533 }
   3534 
   3535 void Simulator::Simulate_ZdnT_PgM_ZdnT_ZmT(const Instruction* instr) {
   3536   VectorFormat vform = instr->GetSVEVectorFormat();
   3537   SimPRegister& pg = ReadPRegister(instr->GetPgLow8());
   3538   SimVRegister& zdn = ReadVRegister(instr->GetRd());
   3539   SimVRegister& zm = ReadVRegister(instr->GetRn());
   3540   SimVRegister result;
   3541 
   3542   switch (form_hash_) {
   3543     case "faddp_z_p_zz"_h:
   3544       faddp(vform, result, zdn, zm);
   3545       break;
   3546     case "fmaxnmp_z_p_zz"_h:
   3547       fmaxnmp(vform, result, zdn, zm);
   3548       break;
   3549     case "fmaxp_z_p_zz"_h:
   3550       fmaxp(vform, result, zdn, zm);
   3551       break;
   3552     case "fminnmp_z_p_zz"_h:
   3553       fminnmp(vform, result, zdn, zm);
   3554       break;
   3555     case "fminp_z_p_zz"_h:
   3556       fminp(vform, result, zdn, zm);
   3557       break;
   3558     default:
   3559       VIXL_UNIMPLEMENTED();
   3560   }
   3561   mov_merging(vform, zdn, pg, result);
   3562 }
   3563 
   3564 void Simulator::Simulate_ZdnT_PgM_ZdnT_const(const Instruction* instr) {
   3565   SimPRegister& pg = ReadPRegister(instr->GetPgLow8());
   3566   SimVRegister& zdn = ReadVRegister(instr->GetRd());
   3567 
   3568   std::pair<int, int> shift_and_lane_size =
   3569       instr->GetSVEImmShiftAndLaneSizeLog2(/* is_predicated = */ true);
   3570   unsigned lane_size = shift_and_lane_size.second;
   3571   VectorFormat vform = SVEFormatFromLaneSizeInBytesLog2(lane_size);
   3572   int right_shift_dist = shift_and_lane_size.first;
   3573   int left_shift_dist = (8 << lane_size) - right_shift_dist;
   3574   SimVRegister result;
   3575 
   3576   switch (form_hash_) {
   3577     case "sqshl_z_p_zi"_h:
   3578       sqshl(vform, result, zdn, left_shift_dist);
   3579       break;
   3580     case "sqshlu_z_p_zi"_h:
   3581       sqshlu(vform, result, zdn, left_shift_dist);
   3582       break;
   3583     case "srshr_z_p_zi"_h:
   3584       sshr(vform, result, zdn, right_shift_dist).Round(vform);
   3585       break;
   3586     case "uqshl_z_p_zi"_h:
   3587       uqshl(vform, result, zdn, left_shift_dist);
   3588       break;
   3589     case "urshr_z_p_zi"_h:
   3590       ushr(vform, result, zdn, right_shift_dist).Round(vform);
   3591       break;
   3592     default:
   3593       VIXL_UNIMPLEMENTED();
   3594   }
   3595   mov_merging(vform, zdn, pg, result);
   3596 }
   3597 
   3598 void Simulator::SimulateSVEExclusiveOrRotate(const Instruction* instr) {
   3599   VIXL_ASSERT(form_hash_ == "xar_z_zzi"_h);
   3600 
   3601   SimVRegister& zdn = ReadVRegister(instr->GetRd());
   3602   SimVRegister& zm = ReadVRegister(instr->GetRn());
   3603 
   3604   std::pair<int, int> shift_and_lane_size =
   3605       instr->GetSVEImmShiftAndLaneSizeLog2(/* is_predicated = */ false);
   3606   unsigned lane_size = shift_and_lane_size.second;
   3607   VIXL_ASSERT(lane_size <= kDRegSizeInBytesLog2);
   3608   VectorFormat vform = SVEFormatFromLaneSizeInBytesLog2(lane_size);
   3609   int shift_dist = shift_and_lane_size.first;
   3610   eor(vform, zdn, zdn, zm);
   3611   ror(vform, zdn, zdn, shift_dist);
   3612 }
   3613 
   3614 void Simulator::Simulate_ZdnT_ZdnT_ZmT_const(const Instruction* instr) {
   3615   VectorFormat vform = instr->GetSVEVectorFormat();
   3616   SimVRegister& zdn = ReadVRegister(instr->GetRd());
   3617   SimVRegister& zm = ReadVRegister(instr->GetRn());
   3618   int rot = (instr->ExtractBit(10) == 0) ? 90 : 270;
   3619 
   3620   switch (form_hash_) {
   3621     case "cadd_z_zz"_h:
   3622       cadd(vform, zdn, zdn, zm, rot);
   3623       break;
   3624     case "sqcadd_z_zz"_h:
   3625       cadd(vform, zdn, zdn, zm, rot, /* saturate = */ true);
   3626       break;
   3627     default:
   3628       VIXL_UNIMPLEMENTED();
   3629   }
   3630 }
   3631 
   3632 void Simulator::Simulate_ZtD_PgZ_ZnD_Xm(const Instruction* instr) {
   3633   SimPRegister& pg = ReadPRegister(instr->GetPgLow8());
   3634   SimVRegister& zn = ReadVRegister(instr->GetRn());
   3635   uint64_t xm = ReadXRegister(instr->GetRm());
   3636 
   3637   LogicSVEAddressVector addr(xm, &zn, kFormatVnD);
   3638   int msize = -1;
   3639   bool is_signed = false;
   3640 
   3641   switch (form_hash_) {
   3642     case "ldnt1b_z_p_ar_d_64_unscaled"_h:
   3643       msize = 0;
   3644       break;
   3645     case "ldnt1d_z_p_ar_d_64_unscaled"_h:
   3646       msize = 3;
   3647       break;
   3648     case "ldnt1h_z_p_ar_d_64_unscaled"_h:
   3649       msize = 1;
   3650       break;
   3651     case "ldnt1sb_z_p_ar_d_64_unscaled"_h:
   3652       msize = 0;
   3653       is_signed = true;
   3654       break;
   3655     case "ldnt1sh_z_p_ar_d_64_unscaled"_h:
   3656       msize = 1;
   3657       is_signed = true;
   3658       break;
   3659     case "ldnt1sw_z_p_ar_d_64_unscaled"_h:
   3660       msize = 2;
   3661       is_signed = true;
   3662       break;
   3663     case "ldnt1w_z_p_ar_d_64_unscaled"_h:
   3664       msize = 2;
   3665       break;
   3666     default:
   3667       VIXL_UNIMPLEMENTED();
   3668   }
   3669   addr.SetMsizeInBytesLog2(msize);
   3670   SVEStructuredLoadHelper(kFormatVnD, pg, instr->GetRt(), addr, is_signed);
   3671 }
   3672 
   3673 void Simulator::Simulate_ZtD_Pg_ZnD_Xm(const Instruction* instr) {
   3674   SimPRegister& pg = ReadPRegister(instr->GetPgLow8());
   3675   SimVRegister& zn = ReadVRegister(instr->GetRn());
   3676   uint64_t xm = ReadXRegister(instr->GetRm());
   3677 
   3678   LogicSVEAddressVector addr(xm, &zn, kFormatVnD);
   3679   VIXL_ASSERT((form_hash_ == "stnt1b_z_p_ar_d_64_unscaled"_h) ||
   3680               (form_hash_ == "stnt1d_z_p_ar_d_64_unscaled"_h) ||
   3681               (form_hash_ == "stnt1h_z_p_ar_d_64_unscaled"_h) ||
   3682               (form_hash_ == "stnt1w_z_p_ar_d_64_unscaled"_h));
   3683 
   3684   addr.SetMsizeInBytesLog2(
   3685       instr->GetSVEMsizeFromDtype(/* is_signed = */ false));
   3686   SVEStructuredStoreHelper(kFormatVnD, pg, instr->GetRt(), addr);
   3687 }
   3688 
   3689 void Simulator::Simulate_ZtS_PgZ_ZnS_Xm(const Instruction* instr) {
   3690   SimPRegister& pg = ReadPRegister(instr->GetPgLow8());
   3691   SimVRegister& zn = ReadVRegister(instr->GetRn());
   3692   uint64_t xm = ReadXRegister(instr->GetRm());
   3693 
   3694   LogicSVEAddressVector addr(xm, &zn, kFormatVnS);
   3695   int msize = -1;
   3696   bool is_signed = false;
   3697 
   3698   switch (form_hash_) {
   3699     case "ldnt1b_z_p_ar_s_x32_unscaled"_h:
   3700       msize = 0;
   3701       break;
   3702     case "ldnt1h_z_p_ar_s_x32_unscaled"_h:
   3703       msize = 1;
   3704       break;
   3705     case "ldnt1sb_z_p_ar_s_x32_unscaled"_h:
   3706       msize = 0;
   3707       is_signed = true;
   3708       break;
   3709     case "ldnt1sh_z_p_ar_s_x32_unscaled"_h:
   3710       msize = 1;
   3711       is_signed = true;
   3712       break;
   3713     case "ldnt1w_z_p_ar_s_x32_unscaled"_h:
   3714       msize = 2;
   3715       break;
   3716     default:
   3717       VIXL_UNIMPLEMENTED();
   3718   }
   3719   addr.SetMsizeInBytesLog2(msize);
   3720   SVEStructuredLoadHelper(kFormatVnS, pg, instr->GetRt(), addr, is_signed);
   3721 }
   3722 
   3723 void Simulator::Simulate_ZtS_Pg_ZnS_Xm(const Instruction* instr) {
   3724   SimPRegister& pg = ReadPRegister(instr->GetPgLow8());
   3725   SimVRegister& zn = ReadVRegister(instr->GetRn());
   3726   uint64_t xm = ReadXRegister(instr->GetRm());
   3727 
   3728   LogicSVEAddressVector addr(xm, &zn, kFormatVnS);
   3729   VIXL_ASSERT((form_hash_ == "stnt1b_z_p_ar_s_x32_unscaled"_h) ||
   3730               (form_hash_ == "stnt1h_z_p_ar_s_x32_unscaled"_h) ||
   3731               (form_hash_ == "stnt1w_z_p_ar_s_x32_unscaled"_h));
   3732 
   3733   addr.SetMsizeInBytesLog2(
   3734       instr->GetSVEMsizeFromDtype(/* is_signed = */ false));
   3735   SVEStructuredStoreHelper(kFormatVnS, pg, instr->GetRt(), addr);
   3736 }
   3737 
   3738 void Simulator::VisitReserved(const Instruction* instr) {
   3739   // UDF is the only instruction in this group, and the Decoder is precise here.
   3740   VIXL_ASSERT(instr->Mask(ReservedMask) == UDF);
   3741 
   3742   printf("UDF (permanently undefined) instruction at %p: 0x%08" PRIx32 "\n",
   3743          reinterpret_cast<const void*>(instr),
   3744          instr->GetInstructionBits());
   3745   VIXL_ABORT_WITH_MSG("UNDEFINED (UDF)\n");
   3746 }
   3747 
   3748 
   3749 void Simulator::VisitUnimplemented(const Instruction* instr) {
   3750   printf("Unimplemented instruction at %p: 0x%08" PRIx32 "\n",
   3751          reinterpret_cast<const void*>(instr),
   3752          instr->GetInstructionBits());
   3753   VIXL_UNIMPLEMENTED();
   3754 }
   3755 
   3756 
   3757 void Simulator::VisitUnallocated(const Instruction* instr) {
   3758   printf("Unallocated instruction at %p: 0x%08" PRIx32 "\n",
   3759          reinterpret_cast<const void*>(instr),
   3760          instr->GetInstructionBits());
   3761   VIXL_UNIMPLEMENTED();
   3762 }
   3763 
   3764 
   3765 void Simulator::VisitPCRelAddressing(const Instruction* instr) {
   3766   VIXL_ASSERT((instr->Mask(PCRelAddressingMask) == ADR) ||
   3767               (instr->Mask(PCRelAddressingMask) == ADRP));
   3768 
   3769   WriteRegister(instr->GetRd(), instr->GetImmPCOffsetTarget());
   3770 }
   3771 
   3772 
   3773 void Simulator::VisitUnconditionalBranch(const Instruction* instr) {
   3774   switch (instr->Mask(UnconditionalBranchMask)) {
   3775     case BL:
   3776       WriteLr(instr->GetNextInstruction());
   3777       VIXL_FALLTHROUGH();
   3778     case B:
   3779       WritePc(instr->GetImmPCOffsetTarget());
   3780       break;
   3781     default:
   3782       VIXL_UNREACHABLE();
   3783   }
   3784 }
   3785 
   3786 
   3787 void Simulator::VisitConditionalBranch(const Instruction* instr) {
   3788   VIXL_ASSERT(instr->Mask(ConditionalBranchMask) == B_cond);
   3789   if (ConditionPassed(instr->GetConditionBranch())) {
   3790     WritePc(instr->GetImmPCOffsetTarget());
   3791   }
   3792 }
   3793 
   3794 BType Simulator::GetBTypeFromInstruction(const Instruction* instr) const {
   3795   switch (instr->Mask(UnconditionalBranchToRegisterMask)) {
   3796     case BLR:
   3797     case BLRAA:
   3798     case BLRAB:
   3799     case BLRAAZ:
   3800     case BLRABZ:
   3801       return BranchAndLink;
   3802     case BR:
   3803     case BRAA:
   3804     case BRAB:
   3805     case BRAAZ:
   3806     case BRABZ:
   3807       if ((instr->GetRn() == 16) || (instr->GetRn() == 17) ||
   3808           !PcIsInGuardedPage()) {
   3809         return BranchFromUnguardedOrToIP;
   3810       }
   3811       return BranchFromGuardedNotToIP;
   3812   }
   3813   return DefaultBType;
   3814 }
   3815 
   3816 void Simulator::VisitUnconditionalBranchToRegister(const Instruction* instr) {
   3817   bool authenticate = false;
   3818   bool link = false;
   3819   bool ret = false;
   3820   uint64_t addr = ReadXRegister(instr->GetRn());
   3821   uint64_t context = 0;
   3822 
   3823   switch (instr->Mask(UnconditionalBranchToRegisterMask)) {
   3824     case BLR:
   3825       link = true;
   3826       VIXL_FALLTHROUGH();
   3827     case BR:
   3828       break;
   3829 
   3830     case BLRAAZ:
   3831     case BLRABZ:
   3832       link = true;
   3833       VIXL_FALLTHROUGH();
   3834     case BRAAZ:
   3835     case BRABZ:
   3836       authenticate = true;
   3837       break;
   3838 
   3839     case BLRAA:
   3840     case BLRAB:
   3841       link = true;
   3842       VIXL_FALLTHROUGH();
   3843     case BRAA:
   3844     case BRAB:
   3845       authenticate = true;
   3846       context = ReadXRegister(instr->GetRd());
   3847       break;
   3848 
   3849     case RETAA:
   3850     case RETAB:
   3851       authenticate = true;
   3852       addr = ReadXRegister(kLinkRegCode);
   3853       context = ReadXRegister(31, Reg31IsStackPointer);
   3854       VIXL_FALLTHROUGH();
   3855     case RET:
   3856       ret = true;
   3857       break;
   3858     default:
   3859       VIXL_UNREACHABLE();
   3860   }
   3861 
   3862   if (link) {
   3863     WriteLr(instr->GetNextInstruction());
   3864   }
   3865 
   3866   if (authenticate) {
   3867     PACKey key = (instr->ExtractBit(10) == 0) ? kPACKeyIA : kPACKeyIB;
   3868     addr = AuthPAC(addr, context, key, kInstructionPointer);
   3869 
   3870     int error_lsb = GetTopPACBit(addr, kInstructionPointer) - 2;
   3871     if (((addr >> error_lsb) & 0x3) != 0x0) {
   3872       VIXL_ABORT_WITH_MSG("Failed to authenticate pointer.");
   3873     }
   3874   }
   3875 
   3876   if (!ret) {
   3877     // Check for interceptions to the target address, if one is found, call it.
   3878     MetaDataDepot::BranchInterceptionAbstract* interception =
   3879         meta_data_.FindBranchInterception(addr);
   3880 
   3881     if (interception != nullptr) {
   3882       // Instead of writing the address of the function to the PC, call the
   3883       // function's interception directly. We change the address that will be
   3884       // branched to so that afterwards we continue execution from
   3885       // the address in the LR. Note: the interception may modify the LR so
   3886       // store it before calling the interception.
   3887       addr = ReadRegister<uint64_t>(kLinkRegCode);
   3888       (*interception)(this);
   3889     }
   3890   }
   3891 
   3892   WriteNextBType(GetBTypeFromInstruction(instr));
   3893   WritePc(Instruction::Cast(addr));
   3894 }
   3895 
   3896 
   3897 void Simulator::VisitTestBranch(const Instruction* instr) {
   3898   unsigned bit_pos =
   3899       (instr->GetImmTestBranchBit5() << 5) | instr->GetImmTestBranchBit40();
   3900   bool bit_zero = ((ReadXRegister(instr->GetRt()) >> bit_pos) & 1) == 0;
   3901   bool take_branch = false;
   3902   switch (instr->Mask(TestBranchMask)) {
   3903     case TBZ:
   3904       take_branch = bit_zero;
   3905       break;
   3906     case TBNZ:
   3907       take_branch = !bit_zero;
   3908       break;
   3909     default:
   3910       VIXL_UNIMPLEMENTED();
   3911   }
   3912   if (take_branch) {
   3913     WritePc(instr->GetImmPCOffsetTarget());
   3914   }
   3915 }
   3916 
   3917 
   3918 void Simulator::VisitCompareBranch(const Instruction* instr) {
   3919   unsigned rt = instr->GetRt();
   3920   bool take_branch = false;
   3921   switch (instr->Mask(CompareBranchMask)) {
   3922     case CBZ_w:
   3923       take_branch = (ReadWRegister(rt) == 0);
   3924       break;
   3925     case CBZ_x:
   3926       take_branch = (ReadXRegister(rt) == 0);
   3927       break;
   3928     case CBNZ_w:
   3929       take_branch = (ReadWRegister(rt) != 0);
   3930       break;
   3931     case CBNZ_x:
   3932       take_branch = (ReadXRegister(rt) != 0);
   3933       break;
   3934     default:
   3935       VIXL_UNIMPLEMENTED();
   3936   }
   3937   if (take_branch) {
   3938     WritePc(instr->GetImmPCOffsetTarget());
   3939   }
   3940 }
   3941 
   3942 
   3943 void Simulator::AddSubHelper(const Instruction* instr, int64_t op2) {
   3944   unsigned reg_size = instr->GetSixtyFourBits() ? kXRegSize : kWRegSize;
   3945   bool set_flags = instr->GetFlagsUpdate();
   3946   int64_t new_val = 0;
   3947   Instr operation = instr->Mask(AddSubOpMask);
   3948 
   3949   switch (operation) {
   3950     case ADD:
   3951     case ADDS: {
   3952       new_val = AddWithCarry(reg_size,
   3953                              set_flags,
   3954                              ReadRegister(reg_size,
   3955                                           instr->GetRn(),
   3956                                           instr->GetRnMode()),
   3957                              op2);
   3958       break;
   3959     }
   3960     case SUB:
   3961     case SUBS: {
   3962       new_val = AddWithCarry(reg_size,
   3963                              set_flags,
   3964                              ReadRegister(reg_size,
   3965                                           instr->GetRn(),
   3966                                           instr->GetRnMode()),
   3967                              ~op2,
   3968                              1);
   3969       break;
   3970     }
   3971     default:
   3972       VIXL_UNREACHABLE();
   3973   }
   3974 
   3975   WriteRegister(reg_size,
   3976                 instr->GetRd(),
   3977                 new_val,
   3978                 LogRegWrites,
   3979                 instr->GetRdMode());
   3980 }
   3981 
   3982 
   3983 void Simulator::VisitAddSubShifted(const Instruction* instr) {
   3984   // Add/sub/adds/subs don't allow ROR as a shift mode.
   3985   VIXL_ASSERT(instr->GetShiftDP() != ROR);
   3986 
   3987   unsigned reg_size = instr->GetSixtyFourBits() ? kXRegSize : kWRegSize;
   3988   int64_t op2 = ShiftOperand(reg_size,
   3989                              ReadRegister(reg_size, instr->GetRm()),
   3990                              static_cast<Shift>(instr->GetShiftDP()),
   3991                              instr->GetImmDPShift());
   3992   AddSubHelper(instr, op2);
   3993 }
   3994 
   3995 
   3996 void Simulator::VisitAddSubImmediate(const Instruction* instr) {
   3997   int64_t op2 = instr->GetImmAddSub()
   3998                 << ((instr->GetImmAddSubShift() == 1) ? 12 : 0);
   3999   AddSubHelper(instr, op2);
   4000 }
   4001 
   4002 
   4003 void Simulator::VisitAddSubExtended(const Instruction* instr) {
   4004   unsigned reg_size = instr->GetSixtyFourBits() ? kXRegSize : kWRegSize;
   4005   int64_t op2 = ExtendValue(reg_size,
   4006                             ReadRegister(reg_size, instr->GetRm()),
   4007                             static_cast<Extend>(instr->GetExtendMode()),
   4008                             instr->GetImmExtendShift());
   4009   AddSubHelper(instr, op2);
   4010 }
   4011 
   4012 
   4013 void Simulator::VisitAddSubWithCarry(const Instruction* instr) {
   4014   unsigned reg_size = instr->GetSixtyFourBits() ? kXRegSize : kWRegSize;
   4015   int64_t op2 = ReadRegister(reg_size, instr->GetRm());
   4016   int64_t new_val;
   4017 
   4018   if ((instr->Mask(AddSubOpMask) == SUB) ||
   4019       (instr->Mask(AddSubOpMask) == SUBS)) {
   4020     op2 = ~op2;
   4021   }
   4022 
   4023   new_val = AddWithCarry(reg_size,
   4024                          instr->GetFlagsUpdate(),
   4025                          ReadRegister(reg_size, instr->GetRn()),
   4026                          op2,
   4027                          ReadC());
   4028 
   4029   WriteRegister(reg_size, instr->GetRd(), new_val);
   4030 }
   4031 
   4032 
   4033 void Simulator::VisitRotateRightIntoFlags(const Instruction* instr) {
   4034   switch (instr->Mask(RotateRightIntoFlagsMask)) {
   4035     case RMIF: {
   4036       uint64_t value = ReadRegister<uint64_t>(instr->GetRn());
   4037       unsigned shift = instr->GetImmRMIFRotation();
   4038       unsigned mask = instr->GetNzcv();
   4039       uint64_t rotated = RotateRight(value, shift, kXRegSize);
   4040 
   4041       ReadNzcv().SetFlags((rotated & mask) | (ReadNzcv().GetFlags() & ~mask));
   4042       break;
   4043     }
   4044   }
   4045 }
   4046 
   4047 
   4048 void Simulator::VisitEvaluateIntoFlags(const Instruction* instr) {
   4049   uint32_t value = ReadRegister<uint32_t>(instr->GetRn());
   4050   unsigned msb = (instr->Mask(EvaluateIntoFlagsMask) == SETF16) ? 15 : 7;
   4051 
   4052   unsigned sign_bit = (value >> msb) & 1;
   4053   unsigned overflow_bit = (value >> (msb + 1)) & 1;
   4054   ReadNzcv().SetN(sign_bit);
   4055   ReadNzcv().SetZ((value << (31 - msb)) == 0);
   4056   ReadNzcv().SetV(sign_bit ^ overflow_bit);
   4057 }
   4058 
   4059 
   4060 void Simulator::VisitLogicalShifted(const Instruction* instr) {
   4061   unsigned reg_size = instr->GetSixtyFourBits() ? kXRegSize : kWRegSize;
   4062   Shift shift_type = static_cast<Shift>(instr->GetShiftDP());
   4063   unsigned shift_amount = instr->GetImmDPShift();
   4064   int64_t op2 = ShiftOperand(reg_size,
   4065                              ReadRegister(reg_size, instr->GetRm()),
   4066                              shift_type,
   4067                              shift_amount);
   4068   if (instr->Mask(NOT) == NOT) {
   4069     op2 = ~op2;
   4070   }
   4071   LogicalHelper(instr, op2);
   4072 }
   4073 
   4074 
   4075 void Simulator::VisitLogicalImmediate(const Instruction* instr) {
   4076   if (instr->GetImmLogical() == 0) {
   4077     VIXL_UNIMPLEMENTED();
   4078   } else {
   4079     LogicalHelper(instr, instr->GetImmLogical());
   4080   }
   4081 }
   4082 
   4083 
   4084 void Simulator::LogicalHelper(const Instruction* instr, int64_t op2) {
   4085   unsigned reg_size = instr->GetSixtyFourBits() ? kXRegSize : kWRegSize;
   4086   int64_t op1 = ReadRegister(reg_size, instr->GetRn());
   4087   int64_t result = 0;
   4088   bool update_flags = false;
   4089 
   4090   // Switch on the logical operation, stripping out the NOT bit, as it has a
   4091   // different meaning for logical immediate instructions.
   4092   switch (instr->Mask(LogicalOpMask & ~NOT)) {
   4093     case ANDS:
   4094       update_flags = true;
   4095       VIXL_FALLTHROUGH();
   4096     case AND:
   4097       result = op1 & op2;
   4098       break;
   4099     case ORR:
   4100       result = op1 | op2;
   4101       break;
   4102     case EOR:
   4103       result = op1 ^ op2;
   4104       break;
   4105     default:
   4106       VIXL_UNIMPLEMENTED();
   4107   }
   4108 
   4109   if (update_flags) {
   4110     ReadNzcv().SetN(CalcNFlag(result, reg_size));
   4111     ReadNzcv().SetZ(CalcZFlag(result));
   4112     ReadNzcv().SetC(0);
   4113     ReadNzcv().SetV(0);
   4114     LogSystemRegister(NZCV);
   4115   }
   4116 
   4117   WriteRegister(reg_size,
   4118                 instr->GetRd(),
   4119                 result,
   4120                 LogRegWrites,
   4121                 instr->GetRdMode());
   4122 }
   4123 
   4124 
   4125 void Simulator::VisitConditionalCompareRegister(const Instruction* instr) {
   4126   unsigned reg_size = instr->GetSixtyFourBits() ? kXRegSize : kWRegSize;
   4127   ConditionalCompareHelper(instr, ReadRegister(reg_size, instr->GetRm()));
   4128 }
   4129 
   4130 
   4131 void Simulator::VisitConditionalCompareImmediate(const Instruction* instr) {
   4132   ConditionalCompareHelper(instr, instr->GetImmCondCmp());
   4133 }
   4134 
   4135 
   4136 void Simulator::ConditionalCompareHelper(const Instruction* instr,
   4137                                          int64_t op2) {
   4138   unsigned reg_size = instr->GetSixtyFourBits() ? kXRegSize : kWRegSize;
   4139   int64_t op1 = ReadRegister(reg_size, instr->GetRn());
   4140 
   4141   if (ConditionPassed(instr->GetCondition())) {
   4142     // If the condition passes, set the status flags to the result of comparing
   4143     // the operands.
   4144     if (instr->Mask(ConditionalCompareMask) == CCMP) {
   4145       AddWithCarry(reg_size, true, op1, ~op2, 1);
   4146     } else {
   4147       VIXL_ASSERT(instr->Mask(ConditionalCompareMask) == CCMN);
   4148       AddWithCarry(reg_size, true, op1, op2, 0);
   4149     }
   4150   } else {
   4151     // If the condition fails, set the status flags to the nzcv immediate.
   4152     ReadNzcv().SetFlags(instr->GetNzcv());
   4153     LogSystemRegister(NZCV);
   4154   }
   4155 }
   4156 
   4157 
   4158 void Simulator::VisitLoadStoreUnsignedOffset(const Instruction* instr) {
   4159   int offset = instr->GetImmLSUnsigned() << instr->GetSizeLS();
   4160   LoadStoreHelper(instr, offset, Offset);
   4161 }
   4162 
   4163 
   4164 void Simulator::VisitLoadStoreUnscaledOffset(const Instruction* instr) {
   4165   LoadStoreHelper(instr, instr->GetImmLS(), Offset);
   4166 }
   4167 
   4168 
   4169 void Simulator::VisitLoadStorePreIndex(const Instruction* instr) {
   4170   LoadStoreHelper(instr, instr->GetImmLS(), PreIndex);
   4171 }
   4172 
   4173 
   4174 void Simulator::VisitLoadStorePostIndex(const Instruction* instr) {
   4175   LoadStoreHelper(instr, instr->GetImmLS(), PostIndex);
   4176 }
   4177 
   4178 
   4179 template <typename T1, typename T2>
   4180 void Simulator::LoadAcquireRCpcUnscaledOffsetHelper(const Instruction* instr) {
   4181   unsigned rt = instr->GetRt();
   4182   unsigned rn = instr->GetRn();
   4183 
   4184   unsigned element_size = sizeof(T2);
   4185   uint64_t address = ReadRegister<uint64_t>(rn, Reg31IsStackPointer);
   4186   int offset = instr->GetImmLS();
   4187   address += offset;
   4188 
   4189   // Verify that the address is available to the host.
   4190   VIXL_ASSERT(address == static_cast<uintptr_t>(address));
   4191 
   4192   // Check the alignment of `address`.
   4193   if (AlignDown(address, 16) != AlignDown(address + element_size - 1, 16)) {
   4194     VIXL_ALIGNMENT_EXCEPTION();
   4195   }
   4196 
   4197   VIXL_DEFINE_OR_RETURN(value, MemRead<T2>(address));
   4198 
   4199   WriteRegister<T1>(rt, static_cast<T1>(value));
   4200 
   4201   // Approximate load-acquire by issuing a full barrier after the load.
   4202   __sync_synchronize();
   4203 
   4204   LogRead(rt, GetPrintRegisterFormat(element_size), address);
   4205 }
   4206 
   4207 
   4208 template <typename T>
   4209 void Simulator::StoreReleaseUnscaledOffsetHelper(const Instruction* instr) {
   4210   unsigned rt = instr->GetRt();
   4211   unsigned rn = instr->GetRn();
   4212 
   4213   unsigned element_size = sizeof(T);
   4214   uint64_t address = ReadRegister<uint64_t>(rn, Reg31IsStackPointer);
   4215   int offset = instr->GetImmLS();
   4216   address += offset;
   4217 
   4218   // Verify that the address is available to the host.
   4219   VIXL_ASSERT(address == static_cast<uintptr_t>(address));
   4220 
   4221   // Check the alignment of `address`.
   4222   if (AlignDown(address, 16) != AlignDown(address + element_size - 1, 16)) {
   4223     VIXL_ALIGNMENT_EXCEPTION();
   4224   }
   4225 
   4226   // Approximate store-release by issuing a full barrier after the load.
   4227   __sync_synchronize();
   4228 
   4229   if (!MemWrite<T>(address, ReadRegister<T>(rt))) return;
   4230 
   4231   LogWrite(rt, GetPrintRegisterFormat(element_size), address);
   4232 }
   4233 
   4234 
   4235 void Simulator::VisitLoadStoreRCpcUnscaledOffset(const Instruction* instr) {
   4236   switch (instr->Mask(LoadStoreRCpcUnscaledOffsetMask)) {
   4237     case LDAPURB:
   4238       LoadAcquireRCpcUnscaledOffsetHelper<uint8_t, uint8_t>(instr);
   4239       break;
   4240     case LDAPURH:
   4241       LoadAcquireRCpcUnscaledOffsetHelper<uint16_t, uint16_t>(instr);
   4242       break;
   4243     case LDAPUR_w:
   4244       LoadAcquireRCpcUnscaledOffsetHelper<uint32_t, uint32_t>(instr);
   4245       break;
   4246     case LDAPUR_x:
   4247       LoadAcquireRCpcUnscaledOffsetHelper<uint64_t, uint64_t>(instr);
   4248       break;
   4249     case LDAPURSB_w:
   4250       LoadAcquireRCpcUnscaledOffsetHelper<int32_t, int8_t>(instr);
   4251       break;
   4252     case LDAPURSB_x:
   4253       LoadAcquireRCpcUnscaledOffsetHelper<int64_t, int8_t>(instr);
   4254       break;
   4255     case LDAPURSH_w:
   4256       LoadAcquireRCpcUnscaledOffsetHelper<int32_t, int16_t>(instr);
   4257       break;
   4258     case LDAPURSH_x:
   4259       LoadAcquireRCpcUnscaledOffsetHelper<int64_t, int16_t>(instr);
   4260       break;
   4261     case LDAPURSW:
   4262       LoadAcquireRCpcUnscaledOffsetHelper<int64_t, int32_t>(instr);
   4263       break;
   4264     case STLURB:
   4265       StoreReleaseUnscaledOffsetHelper<uint8_t>(instr);
   4266       break;
   4267     case STLURH:
   4268       StoreReleaseUnscaledOffsetHelper<uint16_t>(instr);
   4269       break;
   4270     case STLUR_w:
   4271       StoreReleaseUnscaledOffsetHelper<uint32_t>(instr);
   4272       break;
   4273     case STLUR_x:
   4274       StoreReleaseUnscaledOffsetHelper<uint64_t>(instr);
   4275       break;
   4276   }
   4277 }
   4278 
   4279 
   4280 void Simulator::VisitLoadStorePAC(const Instruction* instr) {
   4281   unsigned dst = instr->GetRt();
   4282   unsigned addr_reg = instr->GetRn();
   4283 
   4284   uint64_t address = ReadXRegister(addr_reg, Reg31IsStackPointer);
   4285 
   4286   PACKey key = (instr->ExtractBit(23) == 0) ? kPACKeyDA : kPACKeyDB;
   4287   address = AuthPAC(address, 0, key, kDataPointer);
   4288 
   4289   int error_lsb = GetTopPACBit(address, kInstructionPointer) - 2;
   4290   if (((address >> error_lsb) & 0x3) != 0x0) {
   4291     VIXL_ABORT_WITH_MSG("Failed to authenticate pointer.");
   4292   }
   4293 
   4294 
   4295   if ((addr_reg == 31) && ((address % 16) != 0)) {
   4296     // When the base register is SP the stack pointer is required to be
   4297     // quadword aligned prior to the address calculation and write-backs.
   4298     // Misalignment will cause a stack alignment fault.
   4299     VIXL_ALIGNMENT_EXCEPTION();
   4300   }
   4301 
   4302   int64_t offset = instr->GetImmLSPAC();
   4303   address += offset;
   4304 
   4305   if (instr->Mask(LoadStorePACPreBit) == LoadStorePACPreBit) {
   4306     // Pre-index mode.
   4307     VIXL_ASSERT(offset != 0);
   4308     WriteXRegister(addr_reg, address, LogRegWrites, Reg31IsStackPointer);
   4309   }
   4310 
   4311   uintptr_t addr_ptr = static_cast<uintptr_t>(address);
   4312 
   4313   // Verify that the calculated address is available to the host.
   4314   VIXL_ASSERT(address == addr_ptr);
   4315 
   4316   VIXL_DEFINE_OR_RETURN(value, MemRead<uint64_t>(addr_ptr));
   4317 
   4318   WriteXRegister(dst, value, NoRegLog);
   4319   unsigned access_size = 1 << 3;
   4320   LogRead(dst, GetPrintRegisterFormatForSize(access_size), addr_ptr);
   4321 }
   4322 
   4323 
   4324 void Simulator::VisitLoadStoreRegisterOffset(const Instruction* instr) {
   4325   Extend ext = static_cast<Extend>(instr->GetExtendMode());
   4326   VIXL_ASSERT((ext == UXTW) || (ext == UXTX) || (ext == SXTW) || (ext == SXTX));
   4327   unsigned shift_amount = instr->GetImmShiftLS() * instr->GetSizeLS();
   4328 
   4329   int64_t offset =
   4330       ExtendValue(kXRegSize, ReadXRegister(instr->GetRm()), ext, shift_amount);
   4331   LoadStoreHelper(instr, offset, Offset);
   4332 }
   4333 
   4334 
   4335 void Simulator::LoadStoreHelper(const Instruction* instr,
   4336                                 int64_t offset,
   4337                                 AddrMode addrmode) {
   4338   unsigned srcdst = instr->GetRt();
   4339   uintptr_t address = AddressModeHelper(instr->GetRn(), offset, addrmode);
   4340 
   4341   bool rt_is_vreg = false;
   4342   int extend_to_size = 0;
   4343   LoadStoreOp op = static_cast<LoadStoreOp>(instr->Mask(LoadStoreMask));
   4344   switch (op) {
   4345     case LDRB_w: {
   4346       VIXL_DEFINE_OR_RETURN(value, MemRead<uint8_t>(address));
   4347       WriteWRegister(srcdst, value, NoRegLog);
   4348       extend_to_size = kWRegSizeInBytes;
   4349       break;
   4350     }
   4351     case LDRH_w: {
   4352       VIXL_DEFINE_OR_RETURN(value, MemRead<uint16_t>(address));
   4353       WriteWRegister(srcdst, value, NoRegLog);
   4354       extend_to_size = kWRegSizeInBytes;
   4355       break;
   4356     }
   4357     case LDR_w: {
   4358       VIXL_DEFINE_OR_RETURN(value, MemRead<uint32_t>(address));
   4359       WriteWRegister(srcdst, value, NoRegLog);
   4360       extend_to_size = kWRegSizeInBytes;
   4361       break;
   4362     }
   4363     case LDR_x: {
   4364       VIXL_DEFINE_OR_RETURN(value, MemRead<uint64_t>(address));
   4365       WriteXRegister(srcdst, value, NoRegLog);
   4366       extend_to_size = kXRegSizeInBytes;
   4367       break;
   4368     }
   4369     case LDRSB_w: {
   4370       VIXL_DEFINE_OR_RETURN(value, MemRead<int8_t>(address));
   4371       WriteWRegister(srcdst, value, NoRegLog);
   4372       extend_to_size = kWRegSizeInBytes;
   4373       break;
   4374     }
   4375     case LDRSH_w: {
   4376       VIXL_DEFINE_OR_RETURN(value, MemRead<int16_t>(address));
   4377       WriteWRegister(srcdst, value, NoRegLog);
   4378       extend_to_size = kWRegSizeInBytes;
   4379       break;
   4380     }
   4381     case LDRSB_x: {
   4382       VIXL_DEFINE_OR_RETURN(value, MemRead<int8_t>(address));
   4383       WriteXRegister(srcdst, value, NoRegLog);
   4384       extend_to_size = kXRegSizeInBytes;
   4385       break;
   4386     }
   4387     case LDRSH_x: {
   4388       VIXL_DEFINE_OR_RETURN(value, MemRead<int16_t>(address));
   4389       WriteXRegister(srcdst, value, NoRegLog);
   4390       extend_to_size = kXRegSizeInBytes;
   4391       break;
   4392     }
   4393     case LDRSW_x: {
   4394       VIXL_DEFINE_OR_RETURN(value, MemRead<int32_t>(address));
   4395       WriteXRegister(srcdst, value, NoRegLog);
   4396       extend_to_size = kXRegSizeInBytes;
   4397       break;
   4398     }
   4399     case LDR_b: {
   4400       VIXL_DEFINE_OR_RETURN(value, MemRead<uint8_t>(address));
   4401       WriteBRegister(srcdst, value, NoRegLog);
   4402       rt_is_vreg = true;
   4403       break;
   4404     }
   4405     case LDR_h: {
   4406       VIXL_DEFINE_OR_RETURN(value, MemRead<uint16_t>(address));
   4407       WriteHRegister(srcdst, value, NoRegLog);
   4408       rt_is_vreg = true;
   4409       break;
   4410     }
   4411     case LDR_s: {
   4412       VIXL_DEFINE_OR_RETURN(value, MemRead<float>(address));
   4413       WriteSRegister(srcdst, value, NoRegLog);
   4414       rt_is_vreg = true;
   4415       break;
   4416     }
   4417     case LDR_d: {
   4418       VIXL_DEFINE_OR_RETURN(value, MemRead<double>(address));
   4419       WriteDRegister(srcdst, value, NoRegLog);
   4420       rt_is_vreg = true;
   4421       break;
   4422     }
   4423     case LDR_q: {
   4424       VIXL_DEFINE_OR_RETURN(value, MemRead<qreg_t>(address));
   4425       WriteQRegister(srcdst, value, NoRegLog);
   4426       rt_is_vreg = true;
   4427       break;
   4428     }
   4429 
   4430     case STRB_w:
   4431       if (!MemWrite<uint8_t>(address, ReadWRegister(srcdst))) return;
   4432       break;
   4433     case STRH_w:
   4434       if (!MemWrite<uint16_t>(address, ReadWRegister(srcdst))) return;
   4435       break;
   4436     case STR_w:
   4437       if (!MemWrite<uint32_t>(address, ReadWRegister(srcdst))) return;
   4438       break;
   4439     case STR_x:
   4440       if (!MemWrite<uint64_t>(address, ReadXRegister(srcdst))) return;
   4441       break;
   4442     case STR_b:
   4443       if (!MemWrite<uint8_t>(address, ReadBRegister(srcdst))) return;
   4444       rt_is_vreg = true;
   4445       break;
   4446     case STR_h:
   4447       if (!MemWrite<uint16_t>(address, ReadHRegisterBits(srcdst))) return;
   4448       rt_is_vreg = true;
   4449       break;
   4450     case STR_s:
   4451       if (!MemWrite<float>(address, ReadSRegister(srcdst))) return;
   4452       rt_is_vreg = true;
   4453       break;
   4454     case STR_d:
   4455       if (!MemWrite<double>(address, ReadDRegister(srcdst))) return;
   4456       rt_is_vreg = true;
   4457       break;
   4458     case STR_q:
   4459       if (!MemWrite<qreg_t>(address, ReadQRegister(srcdst))) return;
   4460       rt_is_vreg = true;
   4461       break;
   4462 
   4463     // Ignore prfm hint instructions.
   4464     case PRFM:
   4465       break;
   4466 
   4467     default:
   4468       VIXL_UNIMPLEMENTED();
   4469   }
   4470 
   4471   // Print a detailed trace (including the memory address).
   4472   bool extend = (extend_to_size != 0);
   4473   unsigned access_size = 1 << instr->GetSizeLS();
   4474   unsigned result_size = extend ? extend_to_size : access_size;
   4475   PrintRegisterFormat print_format =
   4476       rt_is_vreg ? GetPrintRegisterFormatForSizeTryFP(result_size)
   4477                  : GetPrintRegisterFormatForSize(result_size);
   4478 
   4479   if (instr->IsLoad()) {
   4480     if (rt_is_vreg) {
   4481       LogVRead(srcdst, print_format, address);
   4482     } else {
   4483       LogExtendingRead(srcdst, print_format, access_size, address);
   4484     }
   4485   } else if (instr->IsStore()) {
   4486     if (rt_is_vreg) {
   4487       LogVWrite(srcdst, print_format, address);
   4488     } else {
   4489       LogWrite(srcdst, GetPrintRegisterFormatForSize(result_size), address);
   4490     }
   4491   } else {
   4492     VIXL_ASSERT(op == PRFM);
   4493   }
   4494 
   4495   local_monitor_.MaybeClear();
   4496 }
   4497 
   4498 
   4499 void Simulator::VisitLoadStorePairOffset(const Instruction* instr) {
   4500   LoadStorePairHelper(instr, Offset);
   4501 }
   4502 
   4503 
   4504 void Simulator::VisitLoadStorePairPreIndex(const Instruction* instr) {
   4505   LoadStorePairHelper(instr, PreIndex);
   4506 }
   4507 
   4508 
   4509 void Simulator::VisitLoadStorePairPostIndex(const Instruction* instr) {
   4510   LoadStorePairHelper(instr, PostIndex);
   4511 }
   4512 
   4513 
   4514 void Simulator::VisitLoadStorePairNonTemporal(const Instruction* instr) {
   4515   LoadStorePairHelper(instr, Offset);
   4516 }
   4517 
   4518 
   4519 void Simulator::LoadStorePairHelper(const Instruction* instr,
   4520                                     AddrMode addrmode) {
   4521   unsigned rt = instr->GetRt();
   4522   unsigned rt2 = instr->GetRt2();
   4523   int element_size = 1 << instr->GetSizeLSPair();
   4524   int64_t offset = instr->GetImmLSPair() * element_size;
   4525   uintptr_t address = AddressModeHelper(instr->GetRn(), offset, addrmode);
   4526   uintptr_t address2 = address + element_size;
   4527 
   4528   LoadStorePairOp op =
   4529       static_cast<LoadStorePairOp>(instr->Mask(LoadStorePairMask));
   4530 
   4531   // 'rt' and 'rt2' can only be aliased for stores.
   4532   VIXL_ASSERT(((op & LoadStorePairLBit) == 0) || (rt != rt2));
   4533 
   4534   bool rt_is_vreg = false;
   4535   bool sign_extend = false;
   4536   switch (op) {
   4537     // Use NoRegLog to suppress the register trace (LOG_REGS, LOG_FP_REGS). We
   4538     // will print a more detailed log.
   4539     case LDP_w: {
   4540       VIXL_DEFINE_OR_RETURN(value, MemRead<uint32_t>(address));
   4541       VIXL_DEFINE_OR_RETURN(value2, MemRead<uint32_t>(address2));
   4542       WriteWRegister(rt, value, NoRegLog);
   4543       WriteWRegister(rt2, value2, NoRegLog);
   4544       break;
   4545     }
   4546     case LDP_s: {
   4547       VIXL_DEFINE_OR_RETURN(value, MemRead<float>(address));
   4548       VIXL_DEFINE_OR_RETURN(value2, MemRead<float>(address2));
   4549       WriteSRegister(rt, value, NoRegLog);
   4550       WriteSRegister(rt2, value2, NoRegLog);
   4551       rt_is_vreg = true;
   4552       break;
   4553     }
   4554     case LDP_x: {
   4555       VIXL_DEFINE_OR_RETURN(value, MemRead<uint64_t>(address));
   4556       VIXL_DEFINE_OR_RETURN(value2, MemRead<uint64_t>(address2));
   4557       WriteXRegister(rt, value, NoRegLog);
   4558       WriteXRegister(rt2, value2, NoRegLog);
   4559       break;
   4560     }
   4561     case LDP_d: {
   4562       VIXL_DEFINE_OR_RETURN(value, MemRead<double>(address));
   4563       VIXL_DEFINE_OR_RETURN(value2, MemRead<double>(address2));
   4564       WriteDRegister(rt, value, NoRegLog);
   4565       WriteDRegister(rt2, value2, NoRegLog);
   4566       rt_is_vreg = true;
   4567       break;
   4568     }
   4569     case LDP_q: {
   4570       VIXL_DEFINE_OR_RETURN(value, MemRead<qreg_t>(address));
   4571       VIXL_DEFINE_OR_RETURN(value2, MemRead<qreg_t>(address2));
   4572       WriteQRegister(rt, value, NoRegLog);
   4573       WriteQRegister(rt2, value2, NoRegLog);
   4574       rt_is_vreg = true;
   4575       break;
   4576     }
   4577     case LDPSW_x: {
   4578       VIXL_DEFINE_OR_RETURN(value, MemRead<int32_t>(address));
   4579       VIXL_DEFINE_OR_RETURN(value2, MemRead<int32_t>(address2));
   4580       WriteXRegister(rt, value, NoRegLog);
   4581       WriteXRegister(rt2, value2, NoRegLog);
   4582       sign_extend = true;
   4583       break;
   4584     }
   4585     case STP_w: {
   4586       if (!MemWrite<uint32_t>(address, ReadWRegister(rt))) return;
   4587       if (!MemWrite<uint32_t>(address2, ReadWRegister(rt2))) return;
   4588       break;
   4589     }
   4590     case STP_s: {
   4591       if (!MemWrite<float>(address, ReadSRegister(rt))) return;
   4592       if (!MemWrite<float>(address2, ReadSRegister(rt2))) return;
   4593       rt_is_vreg = true;
   4594       break;
   4595     }
   4596     case STP_x: {
   4597       if (!MemWrite<uint64_t>(address, ReadXRegister(rt))) return;
   4598       if (!MemWrite<uint64_t>(address2, ReadXRegister(rt2))) return;
   4599       break;
   4600     }
   4601     case STP_d: {
   4602       if (!MemWrite<double>(address, ReadDRegister(rt))) return;
   4603       if (!MemWrite<double>(address2, ReadDRegister(rt2))) return;
   4604       rt_is_vreg = true;
   4605       break;
   4606     }
   4607     case STP_q: {
   4608       if (!MemWrite<qreg_t>(address, ReadQRegister(rt))) return;
   4609       if (!MemWrite<qreg_t>(address2, ReadQRegister(rt2))) return;
   4610       rt_is_vreg = true;
   4611       break;
   4612     }
   4613     default:
   4614       VIXL_UNREACHABLE();
   4615   }
   4616 
   4617   // Print a detailed trace (including the memory address).
   4618   unsigned result_size = sign_extend ? kXRegSizeInBytes : element_size;
   4619   PrintRegisterFormat print_format =
   4620       rt_is_vreg ? GetPrintRegisterFormatForSizeTryFP(result_size)
   4621                  : GetPrintRegisterFormatForSize(result_size);
   4622 
   4623   if (instr->IsLoad()) {
   4624     if (rt_is_vreg) {
   4625       LogVRead(rt, print_format, address);
   4626       LogVRead(rt2, print_format, address2);
   4627     } else if (sign_extend) {
   4628       LogExtendingRead(rt, print_format, element_size, address);
   4629       LogExtendingRead(rt2, print_format, element_size, address2);
   4630     } else {
   4631       LogRead(rt, print_format, address);
   4632       LogRead(rt2, print_format, address2);
   4633     }
   4634   } else {
   4635     if (rt_is_vreg) {
   4636       LogVWrite(rt, print_format, address);
   4637       LogVWrite(rt2, print_format, address2);
   4638     } else {
   4639       LogWrite(rt, print_format, address);
   4640       LogWrite(rt2, print_format, address2);
   4641     }
   4642   }
   4643 
   4644   local_monitor_.MaybeClear();
   4645 }
   4646 
   4647 
   4648 template <typename T>
   4649 void Simulator::CompareAndSwapHelper(const Instruction* instr) {
   4650   unsigned rs = instr->GetRs();
   4651   unsigned rt = instr->GetRt();
   4652   unsigned rn = instr->GetRn();
   4653 
   4654   unsigned element_size = sizeof(T);
   4655   uint64_t address = ReadRegister<uint64_t>(rn, Reg31IsStackPointer);
   4656 
   4657   CheckIsValidUnalignedAtomicAccess(rn, address, element_size);
   4658 
   4659   bool is_acquire = instr->ExtractBit(22) == 1;
   4660   bool is_release = instr->ExtractBit(15) == 1;
   4661 
   4662   T comparevalue = ReadRegister<T>(rs);
   4663   T newvalue = ReadRegister<T>(rt);
   4664 
   4665   // The architecture permits that the data read clears any exclusive monitors
   4666   // associated with that location, even if the compare subsequently fails.
   4667   local_monitor_.Clear();
   4668 
   4669   VIXL_DEFINE_OR_RETURN(data, MemRead<T>(address));
   4670 
   4671   if (is_acquire) {
   4672     // Approximate load-acquire by issuing a full barrier after the load.
   4673     __sync_synchronize();
   4674   }
   4675 
   4676   if (data == comparevalue) {
   4677     if (is_release) {
   4678       // Approximate store-release by issuing a full barrier before the store.
   4679       __sync_synchronize();
   4680     }
   4681     if (!MemWrite<T>(address, newvalue)) return;
   4682     LogWrite(rt, GetPrintRegisterFormatForSize(element_size), address);
   4683   }
   4684   WriteRegister<T>(rs, data, NoRegLog);
   4685   LogRead(rs, GetPrintRegisterFormatForSize(element_size), address);
   4686 }
   4687 
   4688 
   4689 template <typename T>
   4690 void Simulator::CompareAndSwapPairHelper(const Instruction* instr) {
   4691   VIXL_ASSERT((sizeof(T) == 4) || (sizeof(T) == 8));
   4692   unsigned rs = instr->GetRs();
   4693   unsigned rt = instr->GetRt();
   4694   unsigned rn = instr->GetRn();
   4695 
   4696   VIXL_ASSERT((rs % 2 == 0) && (rt % 2 == 0));
   4697 
   4698   unsigned element_size = sizeof(T);
   4699   uint64_t address = ReadRegister<uint64_t>(rn, Reg31IsStackPointer);
   4700 
   4701   CheckIsValidUnalignedAtomicAccess(rn, address, element_size * 2);
   4702 
   4703   uint64_t address2 = address + element_size;
   4704 
   4705   bool is_acquire = instr->ExtractBit(22) == 1;
   4706   bool is_release = instr->ExtractBit(15) == 1;
   4707 
   4708   T comparevalue_high = ReadRegister<T>(rs + 1);
   4709   T comparevalue_low = ReadRegister<T>(rs);
   4710   T newvalue_high = ReadRegister<T>(rt + 1);
   4711   T newvalue_low = ReadRegister<T>(rt);
   4712 
   4713   // The architecture permits that the data read clears any exclusive monitors
   4714   // associated with that location, even if the compare subsequently fails.
   4715   local_monitor_.Clear();
   4716 
   4717   VIXL_DEFINE_OR_RETURN(data_low, MemRead<T>(address));
   4718   VIXL_DEFINE_OR_RETURN(data_high, MemRead<T>(address2));
   4719 
   4720   if (is_acquire) {
   4721     // Approximate load-acquire by issuing a full barrier after the load.
   4722     __sync_synchronize();
   4723   }
   4724 
   4725   bool same =
   4726       (data_high == comparevalue_high) && (data_low == comparevalue_low);
   4727   if (same) {
   4728     if (is_release) {
   4729       // Approximate store-release by issuing a full barrier before the store.
   4730       __sync_synchronize();
   4731     }
   4732 
   4733     if (!MemWrite<T>(address, newvalue_low)) return;
   4734     if (!MemWrite<T>(address2, newvalue_high)) return;
   4735   }
   4736 
   4737   WriteRegister<T>(rs + 1, data_high, NoRegLog);
   4738   WriteRegister<T>(rs, data_low, NoRegLog);
   4739 
   4740   PrintRegisterFormat format = GetPrintRegisterFormatForSize(element_size);
   4741   LogRead(rs, format, address);
   4742   LogRead(rs + 1, format, address2);
   4743 
   4744   if (same) {
   4745     LogWrite(rt, format, address);
   4746     LogWrite(rt + 1, format, address2);
   4747   }
   4748 }
   4749 
   4750 bool Simulator::CanReadMemory(uintptr_t address, size_t size) {
   4751   // To simulate fault-tolerant loads, we need to know what host addresses we
   4752   // can access without generating a real fault. One way to do that is to
   4753   // attempt to `write()` the memory to a placeholder pipe[1]. This is more
   4754   // portable and less intrusive than using (global) signal handlers.
   4755   //
   4756   // [1]: https://stackoverflow.com/questions/7134590
   4757 
   4758   size_t written = 0;
   4759   bool can_read = true;
   4760   // `write` will normally return after one invocation, but it is allowed to
   4761   // handle only part of the operation, so wrap it in a loop.
   4762   while (can_read && (written < size)) {
   4763     ssize_t result = write(placeholder_pipe_fd_[1],
   4764                            reinterpret_cast<void*>(address + written),
   4765                            size - written);
   4766     if (result > 0) {
   4767       written += result;
   4768     } else {
   4769       switch (result) {
   4770         case -EPERM:
   4771         case -EFAULT:
   4772           // The address range is not accessible.
   4773           // `write` is supposed to return -EFAULT in this case, but in practice
   4774           // it seems to return -EPERM, so we accept that too.
   4775           can_read = false;
   4776           break;
   4777         case -EINTR:
   4778           // The call was interrupted by a signal. Just try again.
   4779           break;
   4780         default:
   4781           // Any other error is fatal.
   4782           VIXL_ABORT();
   4783       }
   4784     }
   4785   }
   4786   // Drain the read side of the pipe. If we don't do this, we'll leak memory as
   4787   // the placeholder data is buffered. As before, we expect to drain the whole
   4788   // write in one invocation, but cannot guarantee that, so we wrap it in a
   4789   // loop. This function is primarily intended to implement SVE fault-tolerant
   4790   // loads, so the maximum Z register size is a good default buffer size.
   4791   char buffer[kZRegMaxSizeInBytes];
   4792   while (written > 0) {
   4793     ssize_t result = read(placeholder_pipe_fd_[0],
   4794                           reinterpret_cast<void*>(buffer),
   4795                           sizeof(buffer));
   4796     // `read` blocks, and returns 0 only at EOF. We should not hit EOF until
   4797     // we've read everything that was written, so treat 0 as an error.
   4798     if (result > 0) {
   4799       VIXL_ASSERT(static_cast<size_t>(result) <= written);
   4800       written -= result;
   4801     } else {
   4802       // For -EINTR, just try again. We can't handle any other error.
   4803       VIXL_CHECK(result == -EINTR);
   4804     }
   4805   }
   4806 
   4807   return can_read;
   4808 }
   4809 
   4810 void Simulator::PrintExclusiveAccessWarning() {
   4811   if (print_exclusive_access_warning_) {
   4812     fprintf(stderr,
   4813             "%sWARNING:%s VIXL simulator support for "
   4814             "load-/store-/clear-exclusive "
   4815             "instructions is limited. Refer to the README for details.%s\n",
   4816             clr_warning,
   4817             clr_warning_message,
   4818             clr_normal);
   4819     print_exclusive_access_warning_ = false;
   4820   }
   4821 }
   4822 
   4823 void Simulator::VisitLoadStoreExclusive(const Instruction* instr) {
   4824   LoadStoreExclusive op =
   4825       static_cast<LoadStoreExclusive>(instr->Mask(LoadStoreExclusiveMask));
   4826 
   4827   switch (op) {
   4828     case CAS_w:
   4829     case CASA_w:
   4830     case CASL_w:
   4831     case CASAL_w:
   4832       CompareAndSwapHelper<uint32_t>(instr);
   4833       break;
   4834     case CAS_x:
   4835     case CASA_x:
   4836     case CASL_x:
   4837     case CASAL_x:
   4838       CompareAndSwapHelper<uint64_t>(instr);
   4839       break;
   4840     case CASB:
   4841     case CASAB:
   4842     case CASLB:
   4843     case CASALB:
   4844       CompareAndSwapHelper<uint8_t>(instr);
   4845       break;
   4846     case CASH:
   4847     case CASAH:
   4848     case CASLH:
   4849     case CASALH:
   4850       CompareAndSwapHelper<uint16_t>(instr);
   4851       break;
   4852     case CASP_w:
   4853     case CASPA_w:
   4854     case CASPL_w:
   4855     case CASPAL_w:
   4856       CompareAndSwapPairHelper<uint32_t>(instr);
   4857       break;
   4858     case CASP_x:
   4859     case CASPA_x:
   4860     case CASPL_x:
   4861     case CASPAL_x:
   4862       CompareAndSwapPairHelper<uint64_t>(instr);
   4863       break;
   4864     default:
   4865       PrintExclusiveAccessWarning();
   4866 
   4867       unsigned rs = instr->GetRs();
   4868       unsigned rt = instr->GetRt();
   4869       unsigned rt2 = instr->GetRt2();
   4870       unsigned rn = instr->GetRn();
   4871 
   4872       bool is_exclusive = !instr->GetLdStXNotExclusive();
   4873       bool is_acquire_release =
   4874           !is_exclusive || instr->GetLdStXAcquireRelease();
   4875       bool is_load = instr->GetLdStXLoad();
   4876       bool is_pair = instr->GetLdStXPair();
   4877 
   4878       unsigned element_size = 1 << instr->GetLdStXSizeLog2();
   4879       unsigned access_size = is_pair ? element_size * 2 : element_size;
   4880       uint64_t address = ReadRegister<uint64_t>(rn, Reg31IsStackPointer);
   4881 
   4882       CheckIsValidUnalignedAtomicAccess(rn, address, access_size);
   4883 
   4884       if (is_load) {
   4885         if (is_exclusive) {
   4886           local_monitor_.MarkExclusive(address, access_size);
   4887         } else {
   4888           // Any non-exclusive load can clear the local monitor as a side
   4889           // effect. We don't need to do this, but it is useful to stress the
   4890           // simulated code.
   4891           local_monitor_.Clear();
   4892         }
   4893 
   4894         // Use NoRegLog to suppress the register trace (LOG_REGS, LOG_FP_REGS).
   4895         // We will print a more detailed log.
   4896         unsigned reg_size = 0;
   4897         switch (op) {
   4898           case LDXRB_w:
   4899           case LDAXRB_w:
   4900           case LDARB_w:
   4901           case LDLARB: {
   4902             VIXL_DEFINE_OR_RETURN(value, MemRead<uint8_t>(address));
   4903             WriteWRegister(rt, value, NoRegLog);
   4904             reg_size = kWRegSizeInBytes;
   4905             break;
   4906           }
   4907           case LDXRH_w:
   4908           case LDAXRH_w:
   4909           case LDARH_w:
   4910           case LDLARH: {
   4911             VIXL_DEFINE_OR_RETURN(value, MemRead<uint16_t>(address));
   4912             WriteWRegister(rt, value, NoRegLog);
   4913             reg_size = kWRegSizeInBytes;
   4914             break;
   4915           }
   4916           case LDXR_w:
   4917           case LDAXR_w:
   4918           case LDAR_w:
   4919           case LDLAR_w: {
   4920             VIXL_DEFINE_OR_RETURN(value, MemRead<uint32_t>(address));
   4921             WriteWRegister(rt, value, NoRegLog);
   4922             reg_size = kWRegSizeInBytes;
   4923             break;
   4924           }
   4925           case LDXR_x:
   4926           case LDAXR_x:
   4927           case LDAR_x:
   4928           case LDLAR_x: {
   4929             VIXL_DEFINE_OR_RETURN(value, MemRead<uint64_t>(address));
   4930             WriteXRegister(rt, value, NoRegLog);
   4931             reg_size = kXRegSizeInBytes;
   4932             break;
   4933           }
   4934           case LDXP_w:
   4935           case LDAXP_w: {
   4936             VIXL_DEFINE_OR_RETURN(value, MemRead<uint32_t>(address));
   4937             VIXL_DEFINE_OR_RETURN(value2,
   4938                                   MemRead<uint32_t>(address + element_size));
   4939             WriteWRegister(rt, value, NoRegLog);
   4940             WriteWRegister(rt2, value2, NoRegLog);
   4941             reg_size = kWRegSizeInBytes;
   4942             break;
   4943           }
   4944           case LDXP_x:
   4945           case LDAXP_x: {
   4946             VIXL_DEFINE_OR_RETURN(value, MemRead<uint64_t>(address));
   4947             VIXL_DEFINE_OR_RETURN(value2,
   4948                                   MemRead<uint64_t>(address + element_size));
   4949             WriteXRegister(rt, value, NoRegLog);
   4950             WriteXRegister(rt2, value2, NoRegLog);
   4951             reg_size = kXRegSizeInBytes;
   4952             break;
   4953           }
   4954           default:
   4955             VIXL_UNREACHABLE();
   4956         }
   4957 
   4958         if (is_acquire_release) {
   4959           // Approximate load-acquire by issuing a full barrier after the load.
   4960           __sync_synchronize();
   4961         }
   4962 
   4963         PrintRegisterFormat format = GetPrintRegisterFormatForSize(reg_size);
   4964         LogExtendingRead(rt, format, element_size, address);
   4965         if (is_pair) {
   4966           LogExtendingRead(rt2, format, element_size, address + element_size);
   4967         }
   4968       } else {
   4969         if (is_acquire_release) {
   4970           // Approximate store-release by issuing a full barrier before the
   4971           // store.
   4972           __sync_synchronize();
   4973         }
   4974 
   4975         bool do_store = true;
   4976         if (is_exclusive) {
   4977           do_store = local_monitor_.IsExclusive(address, access_size) &&
   4978                      global_monitor_.IsExclusive(address, access_size);
   4979           WriteWRegister(rs, do_store ? 0 : 1);
   4980 
   4981           //  - All exclusive stores explicitly clear the local monitor.
   4982           local_monitor_.Clear();
   4983         } else {
   4984           //  - Any other store can clear the local monitor as a side effect.
   4985           local_monitor_.MaybeClear();
   4986         }
   4987 
   4988         if (do_store) {
   4989           switch (op) {
   4990             case STXRB_w:
   4991             case STLXRB_w:
   4992             case STLRB_w:
   4993             case STLLRB:
   4994               if (!MemWrite<uint8_t>(address, ReadWRegister(rt))) return;
   4995               break;
   4996             case STXRH_w:
   4997             case STLXRH_w:
   4998             case STLRH_w:
   4999             case STLLRH:
   5000               if (!MemWrite<uint16_t>(address, ReadWRegister(rt))) return;
   5001               break;
   5002             case STXR_w:
   5003             case STLXR_w:
   5004             case STLR_w:
   5005             case STLLR_w:
   5006               if (!MemWrite<uint32_t>(address, ReadWRegister(rt))) return;
   5007               break;
   5008             case STXR_x:
   5009             case STLXR_x:
   5010             case STLR_x:
   5011             case STLLR_x:
   5012               if (!MemWrite<uint64_t>(address, ReadXRegister(rt))) return;
   5013               break;
   5014             case STXP_w:
   5015             case STLXP_w:
   5016               if (!MemWrite<uint32_t>(address, ReadWRegister(rt))) return;
   5017               if (!MemWrite<uint32_t>(address + element_size,
   5018                                       ReadWRegister(rt2))) {
   5019                 return;
   5020               }
   5021               break;
   5022             case STXP_x:
   5023             case STLXP_x:
   5024               if (!MemWrite<uint64_t>(address, ReadXRegister(rt))) return;
   5025               if (!MemWrite<uint64_t>(address + element_size,
   5026                                       ReadXRegister(rt2))) {
   5027                 return;
   5028               }
   5029               break;
   5030             default:
   5031               VIXL_UNREACHABLE();
   5032           }
   5033 
   5034           PrintRegisterFormat format =
   5035               GetPrintRegisterFormatForSize(element_size);
   5036           LogWrite(rt, format, address);
   5037           if (is_pair) {
   5038             LogWrite(rt2, format, address + element_size);
   5039           }
   5040         }
   5041       }
   5042   }
   5043 }
   5044 
   5045 template <typename T>
   5046 void Simulator::AtomicMemorySimpleHelper(const Instruction* instr) {
   5047   unsigned rs = instr->GetRs();
   5048   unsigned rt = instr->GetRt();
   5049   unsigned rn = instr->GetRn();
   5050 
   5051   bool is_acquire = (instr->ExtractBit(23) == 1) && (rt != kZeroRegCode);
   5052   bool is_release = instr->ExtractBit(22) == 1;
   5053 
   5054   unsigned element_size = sizeof(T);
   5055   uint64_t address = ReadRegister<uint64_t>(rn, Reg31IsStackPointer);
   5056 
   5057   CheckIsValidUnalignedAtomicAccess(rn, address, element_size);
   5058 
   5059   T value = ReadRegister<T>(rs);
   5060 
   5061   VIXL_DEFINE_OR_RETURN(data, MemRead<T>(address));
   5062 
   5063   if (is_acquire) {
   5064     // Approximate load-acquire by issuing a full barrier after the load.
   5065     __sync_synchronize();
   5066   }
   5067 
   5068   T result = 0;
   5069   switch (instr->Mask(AtomicMemorySimpleOpMask)) {
   5070     case LDADDOp:
   5071       result = data + value;
   5072       break;
   5073     case LDCLROp:
   5074       VIXL_ASSERT(!std::numeric_limits<T>::is_signed);
   5075       result = data & ~value;
   5076       break;
   5077     case LDEOROp:
   5078       VIXL_ASSERT(!std::numeric_limits<T>::is_signed);
   5079       result = data ^ value;
   5080       break;
   5081     case LDSETOp:
   5082       VIXL_ASSERT(!std::numeric_limits<T>::is_signed);
   5083       result = data | value;
   5084       break;
   5085 
   5086     // Signed/Unsigned difference is done via the templated type T.
   5087     case LDSMAXOp:
   5088     case LDUMAXOp:
   5089       result = (data > value) ? data : value;
   5090       break;
   5091     case LDSMINOp:
   5092     case LDUMINOp:
   5093       result = (data > value) ? value : data;
   5094       break;
   5095   }
   5096 
   5097   if (is_release) {
   5098     // Approximate store-release by issuing a full barrier before the store.
   5099     __sync_synchronize();
   5100   }
   5101 
   5102   WriteRegister<T>(rt, data, NoRegLog);
   5103 
   5104   unsigned register_size = element_size;
   5105   if (element_size < kXRegSizeInBytes) {
   5106     register_size = kWRegSizeInBytes;
   5107   }
   5108   PrintRegisterFormat format = GetPrintRegisterFormatForSize(register_size);
   5109   LogExtendingRead(rt, format, element_size, address);
   5110 
   5111   if (!MemWrite<T>(address, result)) return;
   5112   format = GetPrintRegisterFormatForSize(element_size);
   5113   LogWrite(rs, format, address);
   5114 }
   5115 
   5116 template <typename T>
   5117 void Simulator::AtomicMemorySwapHelper(const Instruction* instr) {
   5118   unsigned rs = instr->GetRs();
   5119   unsigned rt = instr->GetRt();
   5120   unsigned rn = instr->GetRn();
   5121 
   5122   bool is_acquire = (instr->ExtractBit(23) == 1) && (rt != kZeroRegCode);
   5123   bool is_release = instr->ExtractBit(22) == 1;
   5124 
   5125   unsigned element_size = sizeof(T);
   5126   uint64_t address = ReadRegister<uint64_t>(rn, Reg31IsStackPointer);
   5127 
   5128   CheckIsValidUnalignedAtomicAccess(rn, address, element_size);
   5129 
   5130   VIXL_DEFINE_OR_RETURN(data, MemRead<T>(address));
   5131 
   5132   if (is_acquire) {
   5133     // Approximate load-acquire by issuing a full barrier after the load.
   5134     __sync_synchronize();
   5135   }
   5136 
   5137   if (is_release) {
   5138     // Approximate store-release by issuing a full barrier before the store.
   5139     __sync_synchronize();
   5140   }
   5141   if (!MemWrite<T>(address, ReadRegister<T>(rs))) return;
   5142 
   5143   WriteRegister<T>(rt, data);
   5144 
   5145   PrintRegisterFormat format = GetPrintRegisterFormatForSize(element_size);
   5146   LogRead(rt, format, address);
   5147   LogWrite(rs, format, address);
   5148 }
   5149 
   5150 template <typename T>
   5151 void Simulator::LoadAcquireRCpcHelper(const Instruction* instr) {
   5152   unsigned rt = instr->GetRt();
   5153   unsigned rn = instr->GetRn();
   5154 
   5155   unsigned element_size = sizeof(T);
   5156   uint64_t address = ReadRegister<uint64_t>(rn, Reg31IsStackPointer);
   5157 
   5158   CheckIsValidUnalignedAtomicAccess(rn, address, element_size);
   5159 
   5160   VIXL_DEFINE_OR_RETURN(value, MemRead<T>(address));
   5161 
   5162   WriteRegister<T>(rt, value);
   5163 
   5164   // Approximate load-acquire by issuing a full barrier after the load.
   5165   __sync_synchronize();
   5166 
   5167   LogRead(rt, GetPrintRegisterFormatForSize(element_size), address);
   5168 }
   5169 
   5170 #define ATOMIC_MEMORY_SIMPLE_UINT_LIST(V) \
   5171   V(LDADD)                                \
   5172   V(LDCLR)                                \
   5173   V(LDEOR)                                \
   5174   V(LDSET)                                \
   5175   V(LDUMAX)                               \
   5176   V(LDUMIN)
   5177 
   5178 #define ATOMIC_MEMORY_SIMPLE_INT_LIST(V) \
   5179   V(LDSMAX)                              \
   5180   V(LDSMIN)
   5181 
   5182 void Simulator::VisitAtomicMemory(const Instruction* instr) {
   5183   switch (instr->Mask(AtomicMemoryMask)) {
   5184 // clang-format off
   5185 #define SIM_FUNC_B(A) \
   5186     case A##B:        \
   5187     case A##AB:       \
   5188     case A##LB:       \
   5189     case A##ALB:
   5190 #define SIM_FUNC_H(A) \
   5191     case A##H:        \
   5192     case A##AH:       \
   5193     case A##LH:       \
   5194     case A##ALH:
   5195 #define SIM_FUNC_w(A) \
   5196     case A##_w:       \
   5197     case A##A_w:      \
   5198     case A##L_w:      \
   5199     case A##AL_w:
   5200 #define SIM_FUNC_x(A) \
   5201     case A##_x:       \
   5202     case A##A_x:      \
   5203     case A##L_x:      \
   5204     case A##AL_x:
   5205 
   5206     ATOMIC_MEMORY_SIMPLE_UINT_LIST(SIM_FUNC_B)
   5207       AtomicMemorySimpleHelper<uint8_t>(instr);
   5208       break;
   5209     ATOMIC_MEMORY_SIMPLE_INT_LIST(SIM_FUNC_B)
   5210       AtomicMemorySimpleHelper<int8_t>(instr);
   5211       break;
   5212     ATOMIC_MEMORY_SIMPLE_UINT_LIST(SIM_FUNC_H)
   5213       AtomicMemorySimpleHelper<uint16_t>(instr);
   5214       break;
   5215     ATOMIC_MEMORY_SIMPLE_INT_LIST(SIM_FUNC_H)
   5216       AtomicMemorySimpleHelper<int16_t>(instr);
   5217       break;
   5218     ATOMIC_MEMORY_SIMPLE_UINT_LIST(SIM_FUNC_w)
   5219       AtomicMemorySimpleHelper<uint32_t>(instr);
   5220       break;
   5221     ATOMIC_MEMORY_SIMPLE_INT_LIST(SIM_FUNC_w)
   5222       AtomicMemorySimpleHelper<int32_t>(instr);
   5223       break;
   5224     ATOMIC_MEMORY_SIMPLE_UINT_LIST(SIM_FUNC_x)
   5225       AtomicMemorySimpleHelper<uint64_t>(instr);
   5226       break;
   5227     ATOMIC_MEMORY_SIMPLE_INT_LIST(SIM_FUNC_x)
   5228       AtomicMemorySimpleHelper<int64_t>(instr);
   5229       break;
   5230       // clang-format on
   5231 
   5232     case SWPB:
   5233     case SWPAB:
   5234     case SWPLB:
   5235     case SWPALB:
   5236       AtomicMemorySwapHelper<uint8_t>(instr);
   5237       break;
   5238     case SWPH:
   5239     case SWPAH:
   5240     case SWPLH:
   5241     case SWPALH:
   5242       AtomicMemorySwapHelper<uint16_t>(instr);
   5243       break;
   5244     case SWP_w:
   5245     case SWPA_w:
   5246     case SWPL_w:
   5247     case SWPAL_w:
   5248       AtomicMemorySwapHelper<uint32_t>(instr);
   5249       break;
   5250     case SWP_x:
   5251     case SWPA_x:
   5252     case SWPL_x:
   5253     case SWPAL_x:
   5254       AtomicMemorySwapHelper<uint64_t>(instr);
   5255       break;
   5256     case LDAPRB:
   5257       LoadAcquireRCpcHelper<uint8_t>(instr);
   5258       break;
   5259     case LDAPRH:
   5260       LoadAcquireRCpcHelper<uint16_t>(instr);
   5261       break;
   5262     case LDAPR_w:
   5263       LoadAcquireRCpcHelper<uint32_t>(instr);
   5264       break;
   5265     case LDAPR_x:
   5266       LoadAcquireRCpcHelper<uint64_t>(instr);
   5267       break;
   5268   }
   5269 }
   5270 
   5271 
   5272 void Simulator::VisitLoadLiteral(const Instruction* instr) {
   5273   unsigned rt = instr->GetRt();
   5274   uint64_t address = instr->GetLiteralAddress<uint64_t>();
   5275 
   5276   // Verify that the calculated address is available to the host.
   5277   VIXL_ASSERT(address == static_cast<uintptr_t>(address));
   5278 
   5279   switch (instr->Mask(LoadLiteralMask)) {
   5280     // Use NoRegLog to suppress the register trace (LOG_REGS, LOG_VREGS), then
   5281     // print a more detailed log.
   5282     case LDR_w_lit: {
   5283       VIXL_DEFINE_OR_RETURN(value, MemRead<uint32_t>(address));
   5284       WriteWRegister(rt, value, NoRegLog);
   5285       LogRead(rt, kPrintWReg, address);
   5286       break;
   5287     }
   5288     case LDR_x_lit: {
   5289       VIXL_DEFINE_OR_RETURN(value, MemRead<uint64_t>(address));
   5290       WriteXRegister(rt, value, NoRegLog);
   5291       LogRead(rt, kPrintXReg, address);
   5292       break;
   5293     }
   5294     case LDR_s_lit: {
   5295       VIXL_DEFINE_OR_RETURN(value, MemRead<float>(address));
   5296       WriteSRegister(rt, value, NoRegLog);
   5297       LogVRead(rt, kPrintSRegFP, address);
   5298       break;
   5299     }
   5300     case LDR_d_lit: {
   5301       VIXL_DEFINE_OR_RETURN(value, MemRead<double>(address));
   5302       WriteDRegister(rt, value, NoRegLog);
   5303       LogVRead(rt, kPrintDRegFP, address);
   5304       break;
   5305     }
   5306     case LDR_q_lit: {
   5307       VIXL_DEFINE_OR_RETURN(value, MemRead<qreg_t>(address));
   5308       WriteQRegister(rt, value, NoRegLog);
   5309       LogVRead(rt, kPrintReg1Q, address);
   5310       break;
   5311     }
   5312     case LDRSW_x_lit: {
   5313       VIXL_DEFINE_OR_RETURN(value, MemRead<int32_t>(address));
   5314       WriteXRegister(rt, value, NoRegLog);
   5315       LogExtendingRead(rt, kPrintXReg, kWRegSizeInBytes, address);
   5316       break;
   5317     }
   5318 
   5319     // Ignore prfm hint instructions.
   5320     case PRFM_lit:
   5321       break;
   5322 
   5323     default:
   5324       VIXL_UNREACHABLE();
   5325   }
   5326 
   5327   local_monitor_.MaybeClear();
   5328 }
   5329 
   5330 
   5331 uintptr_t Simulator::AddressModeHelper(unsigned addr_reg,
   5332                                        int64_t offset,
   5333                                        AddrMode addrmode) {
   5334   uint64_t address = ReadXRegister(addr_reg, Reg31IsStackPointer);
   5335 
   5336   if ((addr_reg == 31) && ((address % 16) != 0)) {
   5337     // When the base register is SP the stack pointer is required to be
   5338     // quadword aligned prior to the address calculation and write-backs.
   5339     // Misalignment will cause a stack alignment fault.
   5340     VIXL_ALIGNMENT_EXCEPTION();
   5341   }
   5342 
   5343   if ((addrmode == PreIndex) || (addrmode == PostIndex)) {
   5344     VIXL_ASSERT(offset != 0);
   5345     // Only preindex should log the register update here. For Postindex, the
   5346     // update will be printed automatically by LogWrittenRegisters _after_ the
   5347     // memory access itself is logged.
   5348     RegLogMode log_mode = (addrmode == PreIndex) ? LogRegWrites : NoRegLog;
   5349     WriteXRegister(addr_reg, address + offset, log_mode, Reg31IsStackPointer);
   5350   }
   5351 
   5352   if ((addrmode == Offset) || (addrmode == PreIndex)) {
   5353     address += offset;
   5354   }
   5355 
   5356   // Verify that the calculated address is available to the host.
   5357   VIXL_ASSERT(address == static_cast<uintptr_t>(address));
   5358 
   5359   return static_cast<uintptr_t>(address);
   5360 }
   5361 
   5362 
   5363 void Simulator::VisitMoveWideImmediate(const Instruction* instr) {
   5364   MoveWideImmediateOp mov_op =
   5365       static_cast<MoveWideImmediateOp>(instr->Mask(MoveWideImmediateMask));
   5366   int64_t new_xn_val = 0;
   5367 
   5368   bool is_64_bits = instr->GetSixtyFourBits() == 1;
   5369   // Shift is limited for W operations.
   5370   VIXL_ASSERT(is_64_bits || (instr->GetShiftMoveWide() < 2));
   5371 
   5372   // Get the shifted immediate.
   5373   int64_t shift = instr->GetShiftMoveWide() * 16;
   5374   int64_t shifted_imm16 = static_cast<int64_t>(instr->GetImmMoveWide())
   5375                           << shift;
   5376 
   5377   // Compute the new value.
   5378   switch (mov_op) {
   5379     case MOVN_w:
   5380     case MOVN_x: {
   5381       new_xn_val = ~shifted_imm16;
   5382       if (!is_64_bits) new_xn_val &= kWRegMask;
   5383       break;
   5384     }
   5385     case MOVK_w:
   5386     case MOVK_x: {
   5387       unsigned reg_code = instr->GetRd();
   5388       int64_t prev_xn_val =
   5389           is_64_bits ? ReadXRegister(reg_code) : ReadWRegister(reg_code);
   5390       new_xn_val = (prev_xn_val & ~(INT64_C(0xffff) << shift)) | shifted_imm16;
   5391       break;
   5392     }
   5393     case MOVZ_w:
   5394     case MOVZ_x: {
   5395       new_xn_val = shifted_imm16;
   5396       break;
   5397     }
   5398     default:
   5399       VIXL_UNREACHABLE();
   5400   }
   5401 
   5402   // Update the destination register.
   5403   WriteXRegister(instr->GetRd(), new_xn_val);
   5404 }
   5405 
   5406 
   5407 void Simulator::VisitConditionalSelect(const Instruction* instr) {
   5408   uint64_t new_val = ReadXRegister(instr->GetRn());
   5409 
   5410   if (ConditionFailed(static_cast<Condition>(instr->GetCondition()))) {
   5411     new_val = ReadXRegister(instr->GetRm());
   5412     switch (instr->Mask(ConditionalSelectMask)) {
   5413       case CSEL_w:
   5414       case CSEL_x:
   5415         break;
   5416       case CSINC_w:
   5417       case CSINC_x:
   5418         new_val++;
   5419         break;
   5420       case CSINV_w:
   5421       case CSINV_x:
   5422         new_val = ~new_val;
   5423         break;
   5424       case CSNEG_w:
   5425       case CSNEG_x:
   5426         new_val = -new_val;
   5427         break;
   5428       default:
   5429         VIXL_UNIMPLEMENTED();
   5430     }
   5431   }
   5432   unsigned reg_size = instr->GetSixtyFourBits() ? kXRegSize : kWRegSize;
   5433   WriteRegister(reg_size, instr->GetRd(), new_val);
   5434 }
   5435 
   5436 
   5437 #define PAUTH_MODES_REGISTER_CONTEXT(V)   \
   5438   V(i, a, kPACKeyIA, kInstructionPointer) \
   5439   V(i, b, kPACKeyIB, kInstructionPointer) \
   5440   V(d, a, kPACKeyDA, kDataPointer)        \
   5441   V(d, b, kPACKeyDB, kDataPointer)
   5442 
   5443 void Simulator::VisitDataProcessing1Source(const Instruction* instr) {
   5444   unsigned dst = instr->GetRd();
   5445   unsigned src = instr->GetRn();
   5446   Reg31Mode r31_pac = Reg31IsStackPointer;
   5447 
   5448   switch (form_hash_) {
   5449 #define DEFINE_PAUTH_FUNCS(SUF0, SUF1, KEY, D)      \
   5450   case "pac" #SUF0 "z" #SUF1 "_64z_dp_1src"_h:      \
   5451     VIXL_ASSERT(src == kZeroRegCode);               \
   5452     r31_pac = Reg31IsZeroRegister;                  \
   5453     VIXL_FALLTHROUGH();                             \
   5454   case "pac" #SUF0 #SUF1 "_64p_dp_1src"_h: {        \
   5455     uint64_t mod = ReadXRegister(src, r31_pac);     \
   5456     uint64_t ptr = ReadXRegister(dst);              \
   5457     WriteXRegister(dst, AddPAC(ptr, mod, KEY, D));  \
   5458     break;                                          \
   5459   }                                                 \
   5460   case "aut" #SUF0 "z" #SUF1 "_64z_dp_1src"_h:      \
   5461     VIXL_ASSERT(src == kZeroRegCode);               \
   5462     r31_pac = Reg31IsZeroRegister;                  \
   5463     VIXL_FALLTHROUGH();                             \
   5464   case "aut" #SUF0 #SUF1 "_64p_dp_1src"_h: {        \
   5465     uint64_t mod = ReadXRegister(src, r31_pac);     \
   5466     uint64_t ptr = ReadXRegister(dst);              \
   5467     WriteXRegister(dst, AuthPAC(ptr, mod, KEY, D)); \
   5468     break;                                          \
   5469   }
   5470     PAUTH_MODES_REGISTER_CONTEXT(DEFINE_PAUTH_FUNCS)
   5471 #undef DEFINE_PAUTH_FUNCS
   5472 
   5473     case "xpaci_64z_dp_1src"_h:
   5474       WriteXRegister(dst, StripPAC(ReadXRegister(dst), kInstructionPointer));
   5475       break;
   5476     case "xpacd_64z_dp_1src"_h:
   5477       WriteXRegister(dst, StripPAC(ReadXRegister(dst), kDataPointer));
   5478       break;
   5479     case "rbit_32_dp_1src"_h:
   5480       WriteWRegister(dst, ReverseBits(ReadWRegister(src)));
   5481       break;
   5482     case "rbit_64_dp_1src"_h:
   5483       WriteXRegister(dst, ReverseBits(ReadXRegister(src)));
   5484       break;
   5485     case "rev16_32_dp_1src"_h:
   5486       WriteWRegister(dst, ReverseBytes(ReadWRegister(src), 1));
   5487       break;
   5488     case "rev16_64_dp_1src"_h:
   5489       WriteXRegister(dst, ReverseBytes(ReadXRegister(src), 1));
   5490       break;
   5491     case "rev_32_dp_1src"_h:
   5492       WriteWRegister(dst, ReverseBytes(ReadWRegister(src), 2));
   5493       break;
   5494     case "rev32_64_dp_1src"_h:
   5495       WriteXRegister(dst, ReverseBytes(ReadXRegister(src), 2));
   5496       break;
   5497     case "rev_64_dp_1src"_h:
   5498       WriteXRegister(dst, ReverseBytes(ReadXRegister(src), 3));
   5499       break;
   5500     case "clz_32_dp_1src"_h:
   5501       WriteWRegister(dst, CountLeadingZeros(ReadWRegister(src)));
   5502       break;
   5503     case "clz_64_dp_1src"_h:
   5504       WriteXRegister(dst, CountLeadingZeros(ReadXRegister(src)));
   5505       break;
   5506     case "cls_32_dp_1src"_h:
   5507       WriteWRegister(dst, CountLeadingSignBits(ReadWRegister(src)));
   5508       break;
   5509     case "cls_64_dp_1src"_h:
   5510       WriteXRegister(dst, CountLeadingSignBits(ReadXRegister(src)));
   5511       break;
   5512     case "abs_32_dp_1src"_h:
   5513       WriteWRegister(dst, Abs(ReadWRegister(src)));
   5514       break;
   5515     case "abs_64_dp_1src"_h:
   5516       WriteXRegister(dst, Abs(ReadXRegister(src)));
   5517       break;
   5518     case "cnt_32_dp_1src"_h:
   5519       WriteWRegister(dst, CountSetBits(ReadWRegister(src)));
   5520       break;
   5521     case "cnt_64_dp_1src"_h:
   5522       WriteXRegister(dst, CountSetBits(ReadXRegister(src)));
   5523       break;
   5524     case "ctz_32_dp_1src"_h:
   5525       WriteWRegister(dst, CountTrailingZeros(ReadWRegister(src)));
   5526       break;
   5527     case "ctz_64_dp_1src"_h:
   5528       WriteXRegister(dst, CountTrailingZeros(ReadXRegister(src)));
   5529       break;
   5530   }
   5531 }
   5532 
   5533 uint32_t Simulator::Poly32Mod2(unsigned n, uint64_t data, uint32_t poly) {
   5534   VIXL_ASSERT((n > 32) && (n <= 64));
   5535   for (unsigned i = (n - 1); i >= 32; i--) {
   5536     if (((data >> i) & 1) != 0) {
   5537       uint64_t polysh32 = (uint64_t)poly << (i - 32);
   5538       uint64_t mask = (UINT64_C(1) << i) - 1;
   5539       data = ((data & mask) ^ polysh32);
   5540     }
   5541   }
   5542   return data & 0xffffffff;
   5543 }
   5544 
   5545 
   5546 template <typename T>
   5547 uint32_t Simulator::Crc32Checksum(uint32_t acc, T val, uint32_t poly) {
   5548   unsigned size = sizeof(val) * 8;  // Number of bits in type T.
   5549   VIXL_ASSERT((size == 8) || (size == 16) || (size == 32));
   5550   uint64_t tempacc = static_cast<uint64_t>(ReverseBits(acc)) << size;
   5551   uint64_t tempval = static_cast<uint64_t>(ReverseBits(val)) << 32;
   5552   return ReverseBits(Poly32Mod2(32 + size, tempacc ^ tempval, poly));
   5553 }
   5554 
   5555 
   5556 uint32_t Simulator::Crc32Checksum(uint32_t acc, uint64_t val, uint32_t poly) {
   5557   // Poly32Mod2 cannot handle inputs with more than 32 bits, so compute
   5558   // the CRC of each 32-bit word sequentially.
   5559   acc = Crc32Checksum(acc, (uint32_t)(val & 0xffffffff), poly);
   5560   return Crc32Checksum(acc, (uint32_t)(val >> 32), poly);
   5561 }
   5562 
   5563 
   5564 void Simulator::VisitDataProcessing2Source(const Instruction* instr) {
   5565   Shift shift_op = NO_SHIFT;
   5566   int64_t result = 0;
   5567   unsigned reg_size = instr->GetSixtyFourBits() ? kXRegSize : kWRegSize;
   5568 
   5569   switch (instr->Mask(DataProcessing2SourceMask)) {
   5570     case SDIV_w: {
   5571       int32_t rn = ReadWRegister(instr->GetRn());
   5572       int32_t rm = ReadWRegister(instr->GetRm());
   5573       if ((rn == kWMinInt) && (rm == -1)) {
   5574         result = kWMinInt;
   5575       } else if (rm == 0) {
   5576         // Division by zero can be trapped, but not on A-class processors.
   5577         result = 0;
   5578       } else {
   5579         result = rn / rm;
   5580       }
   5581       break;
   5582     }
   5583     case SDIV_x: {
   5584       int64_t rn = ReadXRegister(instr->GetRn());
   5585       int64_t rm = ReadXRegister(instr->GetRm());
   5586       if ((rn == kXMinInt) && (rm == -1)) {
   5587         result = kXMinInt;
   5588       } else if (rm == 0) {
   5589         // Division by zero can be trapped, but not on A-class processors.
   5590         result = 0;
   5591       } else {
   5592         result = rn / rm;
   5593       }
   5594       break;
   5595     }
   5596     case UDIV_w: {
   5597       uint32_t rn = static_cast<uint32_t>(ReadWRegister(instr->GetRn()));
   5598       uint32_t rm = static_cast<uint32_t>(ReadWRegister(instr->GetRm()));
   5599       if (rm == 0) {
   5600         // Division by zero can be trapped, but not on A-class processors.
   5601         result = 0;
   5602       } else {
   5603         result = rn / rm;
   5604       }
   5605       break;
   5606     }
   5607     case UDIV_x: {
   5608       uint64_t rn = static_cast<uint64_t>(ReadXRegister(instr->GetRn()));
   5609       uint64_t rm = static_cast<uint64_t>(ReadXRegister(instr->GetRm()));
   5610       if (rm == 0) {
   5611         // Division by zero can be trapped, but not on A-class processors.
   5612         result = 0;
   5613       } else {
   5614         result = rn / rm;
   5615       }
   5616       break;
   5617     }
   5618     case LSLV_w:
   5619     case LSLV_x:
   5620       shift_op = LSL;
   5621       break;
   5622     case LSRV_w:
   5623     case LSRV_x:
   5624       shift_op = LSR;
   5625       break;
   5626     case ASRV_w:
   5627     case ASRV_x:
   5628       shift_op = ASR;
   5629       break;
   5630     case RORV_w:
   5631     case RORV_x:
   5632       shift_op = ROR;
   5633       break;
   5634     case PACGA: {
   5635       uint64_t dst = static_cast<uint64_t>(ReadXRegister(instr->GetRn()));
   5636       uint64_t src = static_cast<uint64_t>(
   5637           ReadXRegister(instr->GetRm(), Reg31IsStackPointer));
   5638       uint64_t code = ComputePAC(dst, src, kPACKeyGA);
   5639       result = code & 0xffffffff00000000;
   5640       break;
   5641     }
   5642     case CRC32B: {
   5643       uint32_t acc = ReadRegister<uint32_t>(instr->GetRn());
   5644       uint8_t val = ReadRegister<uint8_t>(instr->GetRm());
   5645       result = Crc32Checksum(acc, val, CRC32_POLY);
   5646       break;
   5647     }
   5648     case CRC32H: {
   5649       uint32_t acc = ReadRegister<uint32_t>(instr->GetRn());
   5650       uint16_t val = ReadRegister<uint16_t>(instr->GetRm());
   5651       result = Crc32Checksum(acc, val, CRC32_POLY);
   5652       break;
   5653     }
   5654     case CRC32W: {
   5655       uint32_t acc = ReadRegister<uint32_t>(instr->GetRn());
   5656       uint32_t val = ReadRegister<uint32_t>(instr->GetRm());
   5657       result = Crc32Checksum(acc, val, CRC32_POLY);
   5658       break;
   5659     }
   5660     case CRC32X: {
   5661       uint32_t acc = ReadRegister<uint32_t>(instr->GetRn());
   5662       uint64_t val = ReadRegister<uint64_t>(instr->GetRm());
   5663       result = Crc32Checksum(acc, val, CRC32_POLY);
   5664       reg_size = kWRegSize;
   5665       break;
   5666     }
   5667     case CRC32CB: {
   5668       uint32_t acc = ReadRegister<uint32_t>(instr->GetRn());
   5669       uint8_t val = ReadRegister<uint8_t>(instr->GetRm());
   5670       result = Crc32Checksum(acc, val, CRC32C_POLY);
   5671       break;
   5672     }
   5673     case CRC32CH: {
   5674       uint32_t acc = ReadRegister<uint32_t>(instr->GetRn());
   5675       uint16_t val = ReadRegister<uint16_t>(instr->GetRm());
   5676       result = Crc32Checksum(acc, val, CRC32C_POLY);
   5677       break;
   5678     }
   5679     case CRC32CW: {
   5680       uint32_t acc = ReadRegister<uint32_t>(instr->GetRn());
   5681       uint32_t val = ReadRegister<uint32_t>(instr->GetRm());
   5682       result = Crc32Checksum(acc, val, CRC32C_POLY);
   5683       break;
   5684     }
   5685     case CRC32CX: {
   5686       uint32_t acc = ReadRegister<uint32_t>(instr->GetRn());
   5687       uint64_t val = ReadRegister<uint64_t>(instr->GetRm());
   5688       result = Crc32Checksum(acc, val, CRC32C_POLY);
   5689       reg_size = kWRegSize;
   5690       break;
   5691     }
   5692     default:
   5693       VIXL_UNIMPLEMENTED();
   5694   }
   5695 
   5696   if (shift_op != NO_SHIFT) {
   5697     // Shift distance encoded in the least-significant five/six bits of the
   5698     // register.
   5699     int mask = (instr->GetSixtyFourBits() == 1) ? 0x3f : 0x1f;
   5700     unsigned shift = ReadWRegister(instr->GetRm()) & mask;
   5701     result = ShiftOperand(reg_size,
   5702                           ReadRegister(reg_size, instr->GetRn()),
   5703                           shift_op,
   5704                           shift);
   5705   }
   5706   WriteRegister(reg_size, instr->GetRd(), result);
   5707 }
   5708 
   5709 void Simulator::SimulateSignedMinMax(const Instruction* instr) {
   5710   int32_t wn = ReadWRegister(instr->GetRn());
   5711   int32_t wm = ReadWRegister(instr->GetRm());
   5712   int64_t xn = ReadXRegister(instr->GetRn());
   5713   int64_t xm = ReadXRegister(instr->GetRm());
   5714   int32_t imm = instr->ExtractSignedBits(17, 10);
   5715   int dst = instr->GetRd();
   5716 
   5717   switch (form_hash_) {
   5718     case "smax_64_minmax_imm"_h:
   5719     case "smin_64_minmax_imm"_h:
   5720       xm = imm;
   5721       break;
   5722     case "smax_32_minmax_imm"_h:
   5723     case "smin_32_minmax_imm"_h:
   5724       wm = imm;
   5725       break;
   5726   }
   5727 
   5728   switch (form_hash_) {
   5729     case "smax_32_minmax_imm"_h:
   5730     case "smax_32_dp_2src"_h:
   5731       WriteWRegister(dst, std::max(wn, wm));
   5732       break;
   5733     case "smax_64_minmax_imm"_h:
   5734     case "smax_64_dp_2src"_h:
   5735       WriteXRegister(dst, std::max(xn, xm));
   5736       break;
   5737     case "smin_32_minmax_imm"_h:
   5738     case "smin_32_dp_2src"_h:
   5739       WriteWRegister(dst, std::min(wn, wm));
   5740       break;
   5741     case "smin_64_minmax_imm"_h:
   5742     case "smin_64_dp_2src"_h:
   5743       WriteXRegister(dst, std::min(xn, xm));
   5744       break;
   5745   }
   5746 }
   5747 
   5748 void Simulator::SimulateUnsignedMinMax(const Instruction* instr) {
   5749   uint64_t xn = ReadXRegister(instr->GetRn());
   5750   uint64_t xm = ReadXRegister(instr->GetRm());
   5751   uint32_t imm = instr->ExtractBits(17, 10);
   5752   int dst = instr->GetRd();
   5753 
   5754   switch (form_hash_) {
   5755     case "umax_64u_minmax_imm"_h:
   5756     case "umax_32u_minmax_imm"_h:
   5757     case "umin_64u_minmax_imm"_h:
   5758     case "umin_32u_minmax_imm"_h:
   5759       xm = imm;
   5760       break;
   5761   }
   5762 
   5763   switch (form_hash_) {
   5764     case "umax_32u_minmax_imm"_h:
   5765     case "umax_32_dp_2src"_h:
   5766       xn &= 0xffff'ffff;
   5767       xm &= 0xffff'ffff;
   5768       VIXL_FALLTHROUGH();
   5769     case "umax_64u_minmax_imm"_h:
   5770     case "umax_64_dp_2src"_h:
   5771       WriteXRegister(dst, std::max(xn, xm));
   5772       break;
   5773     case "umin_32u_minmax_imm"_h:
   5774     case "umin_32_dp_2src"_h:
   5775       xn &= 0xffff'ffff;
   5776       xm &= 0xffff'ffff;
   5777       VIXL_FALLTHROUGH();
   5778     case "umin_64u_minmax_imm"_h:
   5779     case "umin_64_dp_2src"_h:
   5780       WriteXRegister(dst, std::min(xn, xm));
   5781       break;
   5782   }
   5783 }
   5784 
   5785 void Simulator::VisitDataProcessing3Source(const Instruction* instr) {
   5786   unsigned reg_size = instr->GetSixtyFourBits() ? kXRegSize : kWRegSize;
   5787 
   5788   uint64_t result = 0;
   5789   // Extract and sign- or zero-extend 32-bit arguments for widening operations.
   5790   uint64_t rn_u32 = ReadRegister<uint32_t>(instr->GetRn());
   5791   uint64_t rm_u32 = ReadRegister<uint32_t>(instr->GetRm());
   5792   int64_t rn_s32 = ReadRegister<int32_t>(instr->GetRn());
   5793   int64_t rm_s32 = ReadRegister<int32_t>(instr->GetRm());
   5794   uint64_t rn_u64 = ReadXRegister(instr->GetRn());
   5795   uint64_t rm_u64 = ReadXRegister(instr->GetRm());
   5796   switch (instr->Mask(DataProcessing3SourceMask)) {
   5797     case MADD_w:
   5798     case MADD_x:
   5799       result = ReadXRegister(instr->GetRa()) + (rn_u64 * rm_u64);
   5800       break;
   5801     case MSUB_w:
   5802     case MSUB_x:
   5803       result = ReadXRegister(instr->GetRa()) - (rn_u64 * rm_u64);
   5804       break;
   5805     case SMADDL_x:
   5806       result = ReadXRegister(instr->GetRa()) +
   5807                static_cast<uint64_t>(rn_s32 * rm_s32);
   5808       break;
   5809     case SMSUBL_x:
   5810       result = ReadXRegister(instr->GetRa()) -
   5811                static_cast<uint64_t>(rn_s32 * rm_s32);
   5812       break;
   5813     case UMADDL_x:
   5814       result = ReadXRegister(instr->GetRa()) + (rn_u32 * rm_u32);
   5815       break;
   5816     case UMSUBL_x:
   5817       result = ReadXRegister(instr->GetRa()) - (rn_u32 * rm_u32);
   5818       break;
   5819     case UMULH_x:
   5820       result =
   5821           internal::MultiplyHigh<64>(ReadRegister<uint64_t>(instr->GetRn()),
   5822                                      ReadRegister<uint64_t>(instr->GetRm()));
   5823       break;
   5824     case SMULH_x:
   5825       result = internal::MultiplyHigh<64>(ReadXRegister(instr->GetRn()),
   5826                                           ReadXRegister(instr->GetRm()));
   5827       break;
   5828     default:
   5829       VIXL_UNIMPLEMENTED();
   5830   }
   5831   WriteRegister(reg_size, instr->GetRd(), result);
   5832 }
   5833 
   5834 
   5835 void Simulator::VisitBitfield(const Instruction* instr) {
   5836   unsigned reg_size = instr->GetSixtyFourBits() ? kXRegSize : kWRegSize;
   5837   int64_t reg_mask = instr->GetSixtyFourBits() ? kXRegMask : kWRegMask;
   5838   int R = instr->GetImmR();
   5839   int S = instr->GetImmS();
   5840 
   5841   if (instr->GetSixtyFourBits() != instr->GetBitN()) {
   5842     VisitUnallocated(instr);
   5843   }
   5844 
   5845   if ((instr->GetSixtyFourBits() == 0) && ((S > 31) || (R > 31))) {
   5846     VisitUnallocated(instr);
   5847   }
   5848 
   5849   int diff = S - R;
   5850   uint64_t mask;
   5851   if (diff >= 0) {
   5852     mask = ~UINT64_C(0) >> (64 - (diff + 1));
   5853     mask = (static_cast<unsigned>(diff) < (reg_size - 1)) ? mask : reg_mask;
   5854   } else {
   5855     mask = ~UINT64_C(0) >> (64 - (S + 1));
   5856     mask = RotateRight(mask, R, reg_size);
   5857     diff += reg_size;
   5858   }
   5859 
   5860   // inzero indicates if the extracted bitfield is inserted into the
   5861   // destination register value or in zero.
   5862   // If extend is true, extend the sign of the extracted bitfield.
   5863   bool inzero = false;
   5864   bool extend = false;
   5865   switch (instr->Mask(BitfieldMask)) {
   5866     case BFM_x:
   5867     case BFM_w:
   5868       break;
   5869     case SBFM_x:
   5870     case SBFM_w:
   5871       inzero = true;
   5872       extend = true;
   5873       break;
   5874     case UBFM_x:
   5875     case UBFM_w:
   5876       inzero = true;
   5877       break;
   5878     default:
   5879       VIXL_UNIMPLEMENTED();
   5880   }
   5881 
   5882   uint64_t dst = inzero ? 0 : ReadRegister(reg_size, instr->GetRd());
   5883   uint64_t src = ReadRegister(reg_size, instr->GetRn());
   5884   // Rotate source bitfield into place.
   5885   uint64_t result = RotateRight(src, R, reg_size);
   5886   // Determine the sign extension.
   5887   uint64_t topbits = (diff == 63) ? 0 : (~UINT64_C(0) << (diff + 1));
   5888   uint64_t signbits = extend && ((src >> S) & 1) ? topbits : 0;
   5889 
   5890   // Merge sign extension, dest/zero and bitfield.
   5891   result = signbits | (result & mask) | (dst & ~mask);
   5892 
   5893   WriteRegister(reg_size, instr->GetRd(), result);
   5894 }
   5895 
   5896 
   5897 void Simulator::VisitExtract(const Instruction* instr) {
   5898   unsigned lsb = instr->GetImmS();
   5899   unsigned reg_size = (instr->GetSixtyFourBits() == 1) ? kXRegSize : kWRegSize;
   5900   uint64_t low_res =
   5901       static_cast<uint64_t>(ReadRegister(reg_size, instr->GetRm())) >> lsb;
   5902   uint64_t high_res = (lsb == 0)
   5903                           ? 0
   5904                           : ReadRegister<uint64_t>(reg_size, instr->GetRn())
   5905                                 << (reg_size - lsb);
   5906   WriteRegister(reg_size, instr->GetRd(), low_res | high_res);
   5907 }
   5908 
   5909 
   5910 void Simulator::VisitFPImmediate(const Instruction* instr) {
   5911   AssertSupportedFPCR();
   5912   unsigned dest = instr->GetRd();
   5913   switch (instr->Mask(FPImmediateMask)) {
   5914     case FMOV_h_imm:
   5915       WriteHRegister(dest, Float16ToRawbits(instr->GetImmFP16()));
   5916       break;
   5917     case FMOV_s_imm:
   5918       WriteSRegister(dest, instr->GetImmFP32());
   5919       break;
   5920     case FMOV_d_imm:
   5921       WriteDRegister(dest, instr->GetImmFP64());
   5922       break;
   5923     default:
   5924       VIXL_UNREACHABLE();
   5925   }
   5926 }
   5927 
   5928 
   5929 void Simulator::VisitFPIntegerConvert(const Instruction* instr) {
   5930   AssertSupportedFPCR();
   5931 
   5932   unsigned dst = instr->GetRd();
   5933   unsigned src = instr->GetRn();
   5934 
   5935   FPRounding round = ReadRMode();
   5936 
   5937   switch (instr->Mask(FPIntegerConvertMask)) {
   5938     case FCVTAS_wh:
   5939       WriteWRegister(dst, FPToInt32(ReadHRegister(src), FPTieAway));
   5940       break;
   5941     case FCVTAS_xh:
   5942       WriteXRegister(dst, FPToInt64(ReadHRegister(src), FPTieAway));
   5943       break;
   5944     case FCVTAS_ws:
   5945       WriteWRegister(dst, FPToInt32(ReadSRegister(src), FPTieAway));
   5946       break;
   5947     case FCVTAS_xs:
   5948       WriteXRegister(dst, FPToInt64(ReadSRegister(src), FPTieAway));
   5949       break;
   5950     case FCVTAS_wd:
   5951       WriteWRegister(dst, FPToInt32(ReadDRegister(src), FPTieAway));
   5952       break;
   5953     case FCVTAS_xd:
   5954       WriteXRegister(dst, FPToInt64(ReadDRegister(src), FPTieAway));
   5955       break;
   5956     case FCVTAU_wh:
   5957       WriteWRegister(dst, FPToUInt32(ReadHRegister(src), FPTieAway));
   5958       break;
   5959     case FCVTAU_xh:
   5960       WriteXRegister(dst, FPToUInt64(ReadHRegister(src), FPTieAway));
   5961       break;
   5962     case FCVTAU_ws:
   5963       WriteWRegister(dst, FPToUInt32(ReadSRegister(src), FPTieAway));
   5964       break;
   5965     case FCVTAU_xs:
   5966       WriteXRegister(dst, FPToUInt64(ReadSRegister(src), FPTieAway));
   5967       break;
   5968     case FCVTAU_wd:
   5969       WriteWRegister(dst, FPToUInt32(ReadDRegister(src), FPTieAway));
   5970       break;
   5971     case FCVTAU_xd:
   5972       WriteXRegister(dst, FPToUInt64(ReadDRegister(src), FPTieAway));
   5973       break;
   5974     case FCVTMS_wh:
   5975       WriteWRegister(dst, FPToInt32(ReadHRegister(src), FPNegativeInfinity));
   5976       break;
   5977     case FCVTMS_xh:
   5978       WriteXRegister(dst, FPToInt64(ReadHRegister(src), FPNegativeInfinity));
   5979       break;
   5980     case FCVTMS_ws:
   5981       WriteWRegister(dst, FPToInt32(ReadSRegister(src), FPNegativeInfinity));
   5982       break;
   5983     case FCVTMS_xs:
   5984       WriteXRegister(dst, FPToInt64(ReadSRegister(src), FPNegativeInfinity));
   5985       break;
   5986     case FCVTMS_wd:
   5987       WriteWRegister(dst, FPToInt32(ReadDRegister(src), FPNegativeInfinity));
   5988       break;
   5989     case FCVTMS_xd:
   5990       WriteXRegister(dst, FPToInt64(ReadDRegister(src), FPNegativeInfinity));
   5991       break;
   5992     case FCVTMU_wh:
   5993       WriteWRegister(dst, FPToUInt32(ReadHRegister(src), FPNegativeInfinity));
   5994       break;
   5995     case FCVTMU_xh:
   5996       WriteXRegister(dst, FPToUInt64(ReadHRegister(src), FPNegativeInfinity));
   5997       break;
   5998     case FCVTMU_ws:
   5999       WriteWRegister(dst, FPToUInt32(ReadSRegister(src), FPNegativeInfinity));
   6000       break;
   6001     case FCVTMU_xs:
   6002       WriteXRegister(dst, FPToUInt64(ReadSRegister(src), FPNegativeInfinity));
   6003       break;
   6004     case FCVTMU_wd:
   6005       WriteWRegister(dst, FPToUInt32(ReadDRegister(src), FPNegativeInfinity));
   6006       break;
   6007     case FCVTMU_xd:
   6008       WriteXRegister(dst, FPToUInt64(ReadDRegister(src), FPNegativeInfinity));
   6009       break;
   6010     case FCVTPS_wh:
   6011       WriteWRegister(dst, FPToInt32(ReadHRegister(src), FPPositiveInfinity));
   6012       break;
   6013     case FCVTPS_xh:
   6014       WriteXRegister(dst, FPToInt64(ReadHRegister(src), FPPositiveInfinity));
   6015       break;
   6016     case FCVTPS_ws:
   6017       WriteWRegister(dst, FPToInt32(ReadSRegister(src), FPPositiveInfinity));
   6018       break;
   6019     case FCVTPS_xs:
   6020       WriteXRegister(dst, FPToInt64(ReadSRegister(src), FPPositiveInfinity));
   6021       break;
   6022     case FCVTPS_wd:
   6023       WriteWRegister(dst, FPToInt32(ReadDRegister(src), FPPositiveInfinity));
   6024       break;
   6025     case FCVTPS_xd:
   6026       WriteXRegister(dst, FPToInt64(ReadDRegister(src), FPPositiveInfinity));
   6027       break;
   6028     case FCVTPU_wh:
   6029       WriteWRegister(dst, FPToUInt32(ReadHRegister(src), FPPositiveInfinity));
   6030       break;
   6031     case FCVTPU_xh:
   6032       WriteXRegister(dst, FPToUInt64(ReadHRegister(src), FPPositiveInfinity));
   6033       break;
   6034     case FCVTPU_ws:
   6035       WriteWRegister(dst, FPToUInt32(ReadSRegister(src), FPPositiveInfinity));
   6036       break;
   6037     case FCVTPU_xs:
   6038       WriteXRegister(dst, FPToUInt64(ReadSRegister(src), FPPositiveInfinity));
   6039       break;
   6040     case FCVTPU_wd:
   6041       WriteWRegister(dst, FPToUInt32(ReadDRegister(src), FPPositiveInfinity));
   6042       break;
   6043     case FCVTPU_xd:
   6044       WriteXRegister(dst, FPToUInt64(ReadDRegister(src), FPPositiveInfinity));
   6045       break;
   6046     case FCVTNS_wh:
   6047       WriteWRegister(dst, FPToInt32(ReadHRegister(src), FPTieEven));
   6048       break;
   6049     case FCVTNS_xh:
   6050       WriteXRegister(dst, FPToInt64(ReadHRegister(src), FPTieEven));
   6051       break;
   6052     case FCVTNS_ws:
   6053       WriteWRegister(dst, FPToInt32(ReadSRegister(src), FPTieEven));
   6054       break;
   6055     case FCVTNS_xs:
   6056       WriteXRegister(dst, FPToInt64(ReadSRegister(src), FPTieEven));
   6057       break;
   6058     case FCVTNS_wd:
   6059       WriteWRegister(dst, FPToInt32(ReadDRegister(src), FPTieEven));
   6060       break;
   6061     case FCVTNS_xd:
   6062       WriteXRegister(dst, FPToInt64(ReadDRegister(src), FPTieEven));
   6063       break;
   6064     case FCVTNU_wh:
   6065       WriteWRegister(dst, FPToUInt32(ReadHRegister(src), FPTieEven));
   6066       break;
   6067     case FCVTNU_xh:
   6068       WriteXRegister(dst, FPToUInt64(ReadHRegister(src), FPTieEven));
   6069       break;
   6070     case FCVTNU_ws:
   6071       WriteWRegister(dst, FPToUInt32(ReadSRegister(src), FPTieEven));
   6072       break;
   6073     case FCVTNU_xs:
   6074       WriteXRegister(dst, FPToUInt64(ReadSRegister(src), FPTieEven));
   6075       break;
   6076     case FCVTNU_wd:
   6077       WriteWRegister(dst, FPToUInt32(ReadDRegister(src), FPTieEven));
   6078       break;
   6079     case FCVTNU_xd:
   6080       WriteXRegister(dst, FPToUInt64(ReadDRegister(src), FPTieEven));
   6081       break;
   6082     case FCVTZS_wh:
   6083       WriteWRegister(dst, FPToInt32(ReadHRegister(src), FPZero));
   6084       break;
   6085     case FCVTZS_xh:
   6086       WriteXRegister(dst, FPToInt64(ReadHRegister(src), FPZero));
   6087       break;
   6088     case FCVTZS_ws:
   6089       WriteWRegister(dst, FPToInt32(ReadSRegister(src), FPZero));
   6090       break;
   6091     case FCVTZS_xs:
   6092       WriteXRegister(dst, FPToInt64(ReadSRegister(src), FPZero));
   6093       break;
   6094     case FCVTZS_wd:
   6095       WriteWRegister(dst, FPToInt32(ReadDRegister(src), FPZero));
   6096       break;
   6097     case FCVTZS_xd:
   6098       WriteXRegister(dst, FPToInt64(ReadDRegister(src), FPZero));
   6099       break;
   6100     case FCVTZU_wh:
   6101       WriteWRegister(dst, FPToUInt32(ReadHRegister(src), FPZero));
   6102       break;
   6103     case FCVTZU_xh:
   6104       WriteXRegister(dst, FPToUInt64(ReadHRegister(src), FPZero));
   6105       break;
   6106     case FCVTZU_ws:
   6107       WriteWRegister(dst, FPToUInt32(ReadSRegister(src), FPZero));
   6108       break;
   6109     case FCVTZU_xs:
   6110       WriteXRegister(dst, FPToUInt64(ReadSRegister(src), FPZero));
   6111       break;
   6112     case FCVTZU_wd:
   6113       WriteWRegister(dst, FPToUInt32(ReadDRegister(src), FPZero));
   6114       break;
   6115     case FCVTZU_xd:
   6116       WriteXRegister(dst, FPToUInt64(ReadDRegister(src), FPZero));
   6117       break;
   6118     case FJCVTZS:
   6119       WriteWRegister(dst, FPToFixedJS(ReadDRegister(src)));
   6120       break;
   6121     case FMOV_hw:
   6122       WriteHRegister(dst, ReadWRegister(src) & kHRegMask);
   6123       break;
   6124     case FMOV_wh:
   6125       WriteWRegister(dst, ReadHRegisterBits(src));
   6126       break;
   6127     case FMOV_xh:
   6128       WriteXRegister(dst, ReadHRegisterBits(src));
   6129       break;
   6130     case FMOV_hx:
   6131       WriteHRegister(dst, ReadXRegister(src) & kHRegMask);
   6132       break;
   6133     case FMOV_ws:
   6134       WriteWRegister(dst, ReadSRegisterBits(src));
   6135       break;
   6136     case FMOV_xd:
   6137       WriteXRegister(dst, ReadDRegisterBits(src));
   6138       break;
   6139     case FMOV_sw:
   6140       WriteSRegisterBits(dst, ReadWRegister(src));
   6141       break;
   6142     case FMOV_dx:
   6143       WriteDRegisterBits(dst, ReadXRegister(src));
   6144       break;
   6145     case FMOV_d1_x:
   6146       // Zero bits beyond the MSB of a Q register.
   6147       mov(kFormat16B, ReadVRegister(dst), ReadVRegister(dst));
   6148       LogicVRegister(ReadVRegister(dst))
   6149           .SetUint(kFormatD, 1, ReadXRegister(src));
   6150       break;
   6151     case FMOV_x_d1:
   6152       WriteXRegister(dst, LogicVRegister(ReadVRegister(src)).Uint(kFormatD, 1));
   6153       break;
   6154 
   6155     // A 32-bit input can be handled in the same way as a 64-bit input, since
   6156     // the sign- or zero-extension will not affect the conversion.
   6157     case SCVTF_dx:
   6158       WriteDRegister(dst, FixedToDouble(ReadXRegister(src), 0, round));
   6159       break;
   6160     case SCVTF_dw:
   6161       WriteDRegister(dst, FixedToDouble(ReadWRegister(src), 0, round));
   6162       break;
   6163     case UCVTF_dx:
   6164       WriteDRegister(dst, UFixedToDouble(ReadXRegister(src), 0, round));
   6165       break;
   6166     case UCVTF_dw: {
   6167       WriteDRegister(dst,
   6168                      UFixedToDouble(ReadRegister<uint32_t>(src), 0, round));
   6169       break;
   6170     }
   6171     case SCVTF_sx:
   6172       WriteSRegister(dst, FixedToFloat(ReadXRegister(src), 0, round));
   6173       break;
   6174     case SCVTF_sw:
   6175       WriteSRegister(dst, FixedToFloat(ReadWRegister(src), 0, round));
   6176       break;
   6177     case UCVTF_sx:
   6178       WriteSRegister(dst, UFixedToFloat(ReadXRegister(src), 0, round));
   6179       break;
   6180     case UCVTF_sw: {
   6181       WriteSRegister(dst, UFixedToFloat(ReadRegister<uint32_t>(src), 0, round));
   6182       break;
   6183     }
   6184     case SCVTF_hx:
   6185       WriteHRegister(dst, FixedToFloat16(ReadXRegister(src), 0, round));
   6186       break;
   6187     case SCVTF_hw:
   6188       WriteHRegister(dst, FixedToFloat16(ReadWRegister(src), 0, round));
   6189       break;
   6190     case UCVTF_hx:
   6191       WriteHRegister(dst, UFixedToFloat16(ReadXRegister(src), 0, round));
   6192       break;
   6193     case UCVTF_hw: {
   6194       WriteHRegister(dst,
   6195                      UFixedToFloat16(ReadRegister<uint32_t>(src), 0, round));
   6196       break;
   6197     }
   6198 
   6199     default:
   6200       VIXL_UNREACHABLE();
   6201   }
   6202 }
   6203 
   6204 
   6205 void Simulator::VisitFPFixedPointConvert(const Instruction* instr) {
   6206   AssertSupportedFPCR();
   6207 
   6208   unsigned dst = instr->GetRd();
   6209   unsigned src = instr->GetRn();
   6210   int fbits = 64 - instr->GetFPScale();
   6211 
   6212   FPRounding round = ReadRMode();
   6213 
   6214   switch (instr->Mask(FPFixedPointConvertMask)) {
   6215     // A 32-bit input can be handled in the same way as a 64-bit input, since
   6216     // the sign- or zero-extension will not affect the conversion.
   6217     case SCVTF_dx_fixed:
   6218       WriteDRegister(dst, FixedToDouble(ReadXRegister(src), fbits, round));
   6219       break;
   6220     case SCVTF_dw_fixed:
   6221       WriteDRegister(dst, FixedToDouble(ReadWRegister(src), fbits, round));
   6222       break;
   6223     case UCVTF_dx_fixed:
   6224       WriteDRegister(dst, UFixedToDouble(ReadXRegister(src), fbits, round));
   6225       break;
   6226     case UCVTF_dw_fixed: {
   6227       WriteDRegister(dst,
   6228                      UFixedToDouble(ReadRegister<uint32_t>(src), fbits, round));
   6229       break;
   6230     }
   6231     case SCVTF_sx_fixed:
   6232       WriteSRegister(dst, FixedToFloat(ReadXRegister(src), fbits, round));
   6233       break;
   6234     case SCVTF_sw_fixed:
   6235       WriteSRegister(dst, FixedToFloat(ReadWRegister(src), fbits, round));
   6236       break;
   6237     case UCVTF_sx_fixed:
   6238       WriteSRegister(dst, UFixedToFloat(ReadXRegister(src), fbits, round));
   6239       break;
   6240     case UCVTF_sw_fixed: {
   6241       WriteSRegister(dst,
   6242                      UFixedToFloat(ReadRegister<uint32_t>(src), fbits, round));
   6243       break;
   6244     }
   6245     case SCVTF_hx_fixed:
   6246       WriteHRegister(dst, FixedToFloat16(ReadXRegister(src), fbits, round));
   6247       break;
   6248     case SCVTF_hw_fixed:
   6249       WriteHRegister(dst, FixedToFloat16(ReadWRegister(src), fbits, round));
   6250       break;
   6251     case UCVTF_hx_fixed:
   6252       WriteHRegister(dst, UFixedToFloat16(ReadXRegister(src), fbits, round));
   6253       break;
   6254     case UCVTF_hw_fixed: {
   6255       WriteHRegister(dst,
   6256                      UFixedToFloat16(ReadRegister<uint32_t>(src),
   6257                                      fbits,
   6258                                      round));
   6259       break;
   6260     }
   6261     case FCVTZS_xd_fixed:
   6262       WriteXRegister(dst,
   6263                      FPToInt64(ReadDRegister(src) * std::pow(2.0, fbits),
   6264                                FPZero));
   6265       break;
   6266     case FCVTZS_wd_fixed:
   6267       WriteWRegister(dst,
   6268                      FPToInt32(ReadDRegister(src) * std::pow(2.0, fbits),
   6269                                FPZero));
   6270       break;
   6271     case FCVTZU_xd_fixed:
   6272       WriteXRegister(dst,
   6273                      FPToUInt64(ReadDRegister(src) * std::pow(2.0, fbits),
   6274                                 FPZero));
   6275       break;
   6276     case FCVTZU_wd_fixed:
   6277       WriteWRegister(dst,
   6278                      FPToUInt32(ReadDRegister(src) * std::pow(2.0, fbits),
   6279                                 FPZero));
   6280       break;
   6281     case FCVTZS_xs_fixed:
   6282       WriteXRegister(dst,
   6283                      FPToInt64(ReadSRegister(src) * std::pow(2.0f, fbits),
   6284                                FPZero));
   6285       break;
   6286     case FCVTZS_ws_fixed:
   6287       WriteWRegister(dst,
   6288                      FPToInt32(ReadSRegister(src) * std::pow(2.0f, fbits),
   6289                                FPZero));
   6290       break;
   6291     case FCVTZU_xs_fixed:
   6292       WriteXRegister(dst,
   6293                      FPToUInt64(ReadSRegister(src) * std::pow(2.0f, fbits),
   6294                                 FPZero));
   6295       break;
   6296     case FCVTZU_ws_fixed:
   6297       WriteWRegister(dst,
   6298                      FPToUInt32(ReadSRegister(src) * std::pow(2.0f, fbits),
   6299                                 FPZero));
   6300       break;
   6301     case FCVTZS_xh_fixed: {
   6302       double output =
   6303           static_cast<double>(ReadHRegister(src)) * std::pow(2.0, fbits);
   6304       WriteXRegister(dst, FPToInt64(output, FPZero));
   6305       break;
   6306     }
   6307     case FCVTZS_wh_fixed: {
   6308       double output =
   6309           static_cast<double>(ReadHRegister(src)) * std::pow(2.0, fbits);
   6310       WriteWRegister(dst, FPToInt32(output, FPZero));
   6311       break;
   6312     }
   6313     case FCVTZU_xh_fixed: {
   6314       double output =
   6315           static_cast<double>(ReadHRegister(src)) * std::pow(2.0, fbits);
   6316       WriteXRegister(dst, FPToUInt64(output, FPZero));
   6317       break;
   6318     }
   6319     case FCVTZU_wh_fixed: {
   6320       double output =
   6321           static_cast<double>(ReadHRegister(src)) * std::pow(2.0, fbits);
   6322       WriteWRegister(dst, FPToUInt32(output, FPZero));
   6323       break;
   6324     }
   6325     default:
   6326       VIXL_UNREACHABLE();
   6327   }
   6328 }
   6329 
   6330 
   6331 void Simulator::VisitFPCompare(const Instruction* instr) {
   6332   AssertSupportedFPCR();
   6333 
   6334   FPTrapFlags trap = DisableTrap;
   6335   switch (instr->Mask(FPCompareMask)) {
   6336     case FCMPE_h:
   6337       trap = EnableTrap;
   6338       VIXL_FALLTHROUGH();
   6339     case FCMP_h:
   6340       FPCompare(ReadHRegister(instr->GetRn()),
   6341                 ReadHRegister(instr->GetRm()),
   6342                 trap);
   6343       break;
   6344     case FCMPE_s:
   6345       trap = EnableTrap;
   6346       VIXL_FALLTHROUGH();
   6347     case FCMP_s:
   6348       FPCompare(ReadSRegister(instr->GetRn()),
   6349                 ReadSRegister(instr->GetRm()),
   6350                 trap);
   6351       break;
   6352     case FCMPE_d:
   6353       trap = EnableTrap;
   6354       VIXL_FALLTHROUGH();
   6355     case FCMP_d:
   6356       FPCompare(ReadDRegister(instr->GetRn()),
   6357                 ReadDRegister(instr->GetRm()),
   6358                 trap);
   6359       break;
   6360     case FCMPE_h_zero:
   6361       trap = EnableTrap;
   6362       VIXL_FALLTHROUGH();
   6363     case FCMP_h_zero:
   6364       FPCompare(ReadHRegister(instr->GetRn()), SimFloat16(0.0), trap);
   6365       break;
   6366     case FCMPE_s_zero:
   6367       trap = EnableTrap;
   6368       VIXL_FALLTHROUGH();
   6369     case FCMP_s_zero:
   6370       FPCompare(ReadSRegister(instr->GetRn()), 0.0f, trap);
   6371       break;
   6372     case FCMPE_d_zero:
   6373       trap = EnableTrap;
   6374       VIXL_FALLTHROUGH();
   6375     case FCMP_d_zero:
   6376       FPCompare(ReadDRegister(instr->GetRn()), 0.0, trap);
   6377       break;
   6378     default:
   6379       VIXL_UNIMPLEMENTED();
   6380   }
   6381 }
   6382 
   6383 
   6384 void Simulator::VisitFPConditionalCompare(const Instruction* instr) {
   6385   AssertSupportedFPCR();
   6386 
   6387   FPTrapFlags trap = DisableTrap;
   6388   switch (instr->Mask(FPConditionalCompareMask)) {
   6389     case FCCMPE_h:
   6390       trap = EnableTrap;
   6391       VIXL_FALLTHROUGH();
   6392     case FCCMP_h:
   6393       if (ConditionPassed(instr->GetCondition())) {
   6394         FPCompare(ReadHRegister(instr->GetRn()),
   6395                   ReadHRegister(instr->GetRm()),
   6396                   trap);
   6397       } else {
   6398         ReadNzcv().SetFlags(instr->GetNzcv());
   6399         LogSystemRegister(NZCV);
   6400       }
   6401       break;
   6402     case FCCMPE_s:
   6403       trap = EnableTrap;
   6404       VIXL_FALLTHROUGH();
   6405     case FCCMP_s:
   6406       if (ConditionPassed(instr->GetCondition())) {
   6407         FPCompare(ReadSRegister(instr->GetRn()),
   6408                   ReadSRegister(instr->GetRm()),
   6409                   trap);
   6410       } else {
   6411         ReadNzcv().SetFlags(instr->GetNzcv());
   6412         LogSystemRegister(NZCV);
   6413       }
   6414       break;
   6415     case FCCMPE_d:
   6416       trap = EnableTrap;
   6417       VIXL_FALLTHROUGH();
   6418     case FCCMP_d:
   6419       if (ConditionPassed(instr->GetCondition())) {
   6420         FPCompare(ReadDRegister(instr->GetRn()),
   6421                   ReadDRegister(instr->GetRm()),
   6422                   trap);
   6423       } else {
   6424         ReadNzcv().SetFlags(instr->GetNzcv());
   6425         LogSystemRegister(NZCV);
   6426       }
   6427       break;
   6428     default:
   6429       VIXL_UNIMPLEMENTED();
   6430   }
   6431 }
   6432 
   6433 
   6434 void Simulator::VisitFPConditionalSelect(const Instruction* instr) {
   6435   AssertSupportedFPCR();
   6436 
   6437   Instr selected;
   6438   if (ConditionPassed(instr->GetCondition())) {
   6439     selected = instr->GetRn();
   6440   } else {
   6441     selected = instr->GetRm();
   6442   }
   6443 
   6444   switch (instr->Mask(FPConditionalSelectMask)) {
   6445     case FCSEL_h:
   6446       WriteHRegister(instr->GetRd(), ReadHRegister(selected));
   6447       break;
   6448     case FCSEL_s:
   6449       WriteSRegister(instr->GetRd(), ReadSRegister(selected));
   6450       break;
   6451     case FCSEL_d:
   6452       WriteDRegister(instr->GetRd(), ReadDRegister(selected));
   6453       break;
   6454     default:
   6455       VIXL_UNIMPLEMENTED();
   6456   }
   6457 }
   6458 
   6459 
   6460 void Simulator::VisitFPDataProcessing1Source(const Instruction* instr) {
   6461   AssertSupportedFPCR();
   6462 
   6463   FPRounding fpcr_rounding = static_cast<FPRounding>(ReadFpcr().GetRMode());
   6464   VectorFormat vform;
   6465   switch (instr->Mask(FPTypeMask)) {
   6466     default:
   6467       VIXL_UNREACHABLE_OR_FALLTHROUGH();
   6468     case FP64:
   6469       vform = kFormatD;
   6470       break;
   6471     case FP32:
   6472       vform = kFormatS;
   6473       break;
   6474     case FP16:
   6475       vform = kFormatH;
   6476       break;
   6477   }
   6478 
   6479   SimVRegister& rd = ReadVRegister(instr->GetRd());
   6480   SimVRegister& rn = ReadVRegister(instr->GetRn());
   6481   bool inexact_exception = false;
   6482   FrintMode frint_mode = kFrintToInteger;
   6483 
   6484   unsigned fd = instr->GetRd();
   6485   unsigned fn = instr->GetRn();
   6486 
   6487   switch (instr->Mask(FPDataProcessing1SourceMask)) {
   6488     case FMOV_h:
   6489       WriteHRegister(fd, ReadHRegister(fn));
   6490       return;
   6491     case FMOV_s:
   6492       WriteSRegister(fd, ReadSRegister(fn));
   6493       return;
   6494     case FMOV_d:
   6495       WriteDRegister(fd, ReadDRegister(fn));
   6496       return;
   6497     case FABS_h:
   6498     case FABS_s:
   6499     case FABS_d:
   6500       fabs_(vform, ReadVRegister(fd), ReadVRegister(fn));
   6501       // Explicitly log the register update whilst we have type information.
   6502       LogVRegister(fd, GetPrintRegisterFormatFP(vform));
   6503       return;
   6504     case FNEG_h:
   6505     case FNEG_s:
   6506     case FNEG_d:
   6507       fneg(vform, ReadVRegister(fd), ReadVRegister(fn));
   6508       // Explicitly log the register update whilst we have type information.
   6509       LogVRegister(fd, GetPrintRegisterFormatFP(vform));
   6510       return;
   6511     case FCVT_ds:
   6512       WriteDRegister(fd, FPToDouble(ReadSRegister(fn), ReadDN()));
   6513       return;
   6514     case FCVT_sd:
   6515       WriteSRegister(fd, FPToFloat(ReadDRegister(fn), FPTieEven, ReadDN()));
   6516       return;
   6517     case FCVT_hs:
   6518       WriteHRegister(fd,
   6519                      Float16ToRawbits(
   6520                          FPToFloat16(ReadSRegister(fn), FPTieEven, ReadDN())));
   6521       return;
   6522     case FCVT_sh:
   6523       WriteSRegister(fd, FPToFloat(ReadHRegister(fn), ReadDN()));
   6524       return;
   6525     case FCVT_dh:
   6526       WriteDRegister(fd, FPToDouble(ReadHRegister(fn), ReadDN()));
   6527       return;
   6528     case FCVT_hd:
   6529       WriteHRegister(fd,
   6530                      Float16ToRawbits(
   6531                          FPToFloat16(ReadDRegister(fn), FPTieEven, ReadDN())));
   6532       return;
   6533     case FSQRT_h:
   6534     case FSQRT_s:
   6535     case FSQRT_d:
   6536       fsqrt(vform, rd, rn);
   6537       // Explicitly log the register update whilst we have type information.
   6538       LogVRegister(fd, GetPrintRegisterFormatFP(vform));
   6539       return;
   6540     case FRINT32X_s:
   6541     case FRINT32X_d:
   6542       inexact_exception = true;
   6543       frint_mode = kFrintToInt32;
   6544       break;  // Use FPCR rounding mode.
   6545     case FRINT64X_s:
   6546     case FRINT64X_d:
   6547       inexact_exception = true;
   6548       frint_mode = kFrintToInt64;
   6549       break;  // Use FPCR rounding mode.
   6550     case FRINT32Z_s:
   6551     case FRINT32Z_d:
   6552       inexact_exception = true;
   6553       frint_mode = kFrintToInt32;
   6554       fpcr_rounding = FPZero;
   6555       break;
   6556     case FRINT64Z_s:
   6557     case FRINT64Z_d:
   6558       inexact_exception = true;
   6559       frint_mode = kFrintToInt64;
   6560       fpcr_rounding = FPZero;
   6561       break;
   6562     case FRINTI_h:
   6563     case FRINTI_s:
   6564     case FRINTI_d:
   6565       break;  // Use FPCR rounding mode.
   6566     case FRINTX_h:
   6567     case FRINTX_s:
   6568     case FRINTX_d:
   6569       inexact_exception = true;
   6570       break;
   6571     case FRINTA_h:
   6572     case FRINTA_s:
   6573     case FRINTA_d:
   6574       fpcr_rounding = FPTieAway;
   6575       break;
   6576     case FRINTM_h:
   6577     case FRINTM_s:
   6578     case FRINTM_d:
   6579       fpcr_rounding = FPNegativeInfinity;
   6580       break;
   6581     case FRINTN_h:
   6582     case FRINTN_s:
   6583     case FRINTN_d:
   6584       fpcr_rounding = FPTieEven;
   6585       break;
   6586     case FRINTP_h:
   6587     case FRINTP_s:
   6588     case FRINTP_d:
   6589       fpcr_rounding = FPPositiveInfinity;
   6590       break;
   6591     case FRINTZ_h:
   6592     case FRINTZ_s:
   6593     case FRINTZ_d:
   6594       fpcr_rounding = FPZero;
   6595       break;
   6596     default:
   6597       VIXL_UNIMPLEMENTED();
   6598   }
   6599 
   6600   // Only FRINT* instructions fall through the switch above.
   6601   frint(vform, rd, rn, fpcr_rounding, inexact_exception, frint_mode);
   6602   // Explicitly log the register update whilst we have type information.
   6603   LogVRegister(fd, GetPrintRegisterFormatFP(vform));
   6604 }
   6605 
   6606 
   6607 void Simulator::VisitFPDataProcessing2Source(const Instruction* instr) {
   6608   AssertSupportedFPCR();
   6609 
   6610   VectorFormat vform;
   6611   switch (instr->Mask(FPTypeMask)) {
   6612     default:
   6613       VIXL_UNREACHABLE_OR_FALLTHROUGH();
   6614     case FP64:
   6615       vform = kFormatD;
   6616       break;
   6617     case FP32:
   6618       vform = kFormatS;
   6619       break;
   6620     case FP16:
   6621       vform = kFormatH;
   6622       break;
   6623   }
   6624   SimVRegister& rd = ReadVRegister(instr->GetRd());
   6625   SimVRegister& rn = ReadVRegister(instr->GetRn());
   6626   SimVRegister& rm = ReadVRegister(instr->GetRm());
   6627 
   6628   switch (instr->Mask(FPDataProcessing2SourceMask)) {
   6629     case FADD_h:
   6630     case FADD_s:
   6631     case FADD_d:
   6632       fadd(vform, rd, rn, rm);
   6633       break;
   6634     case FSUB_h:
   6635     case FSUB_s:
   6636     case FSUB_d:
   6637       fsub(vform, rd, rn, rm);
   6638       break;
   6639     case FMUL_h:
   6640     case FMUL_s:
   6641     case FMUL_d:
   6642       fmul(vform, rd, rn, rm);
   6643       break;
   6644     case FNMUL_h:
   6645     case FNMUL_s:
   6646     case FNMUL_d:
   6647       fnmul(vform, rd, rn, rm);
   6648       break;
   6649     case FDIV_h:
   6650     case FDIV_s:
   6651     case FDIV_d:
   6652       fdiv(vform, rd, rn, rm);
   6653       break;
   6654     case FMAX_h:
   6655     case FMAX_s:
   6656     case FMAX_d:
   6657       fmax(vform, rd, rn, rm);
   6658       break;
   6659     case FMIN_h:
   6660     case FMIN_s:
   6661     case FMIN_d:
   6662       fmin(vform, rd, rn, rm);
   6663       break;
   6664     case FMAXNM_h:
   6665     case FMAXNM_s:
   6666     case FMAXNM_d:
   6667       fmaxnm(vform, rd, rn, rm);
   6668       break;
   6669     case FMINNM_h:
   6670     case FMINNM_s:
   6671     case FMINNM_d:
   6672       fminnm(vform, rd, rn, rm);
   6673       break;
   6674     default:
   6675       VIXL_UNREACHABLE();
   6676   }
   6677   // Explicitly log the register update whilst we have type information.
   6678   LogVRegister(instr->GetRd(), GetPrintRegisterFormatFP(vform));
   6679 }
   6680 
   6681 
   6682 void Simulator::VisitFPDataProcessing3Source(const Instruction* instr) {
   6683   AssertSupportedFPCR();
   6684 
   6685   unsigned fd = instr->GetRd();
   6686   unsigned fn = instr->GetRn();
   6687   unsigned fm = instr->GetRm();
   6688   unsigned fa = instr->GetRa();
   6689 
   6690   switch (instr->Mask(FPDataProcessing3SourceMask)) {
   6691     // fd = fa +/- (fn * fm)
   6692     case FMADD_h:
   6693       WriteHRegister(fd,
   6694                      FPMulAdd(ReadHRegister(fa),
   6695                               ReadHRegister(fn),
   6696                               ReadHRegister(fm)));
   6697       break;
   6698     case FMSUB_h:
   6699       WriteHRegister(fd,
   6700                      FPMulAdd(ReadHRegister(fa),
   6701                               -ReadHRegister(fn),
   6702                               ReadHRegister(fm)));
   6703       break;
   6704     case FMADD_s:
   6705       WriteSRegister(fd,
   6706                      FPMulAdd(ReadSRegister(fa),
   6707                               ReadSRegister(fn),
   6708                               ReadSRegister(fm)));
   6709       break;
   6710     case FMSUB_s:
   6711       WriteSRegister(fd,
   6712                      FPMulAdd(ReadSRegister(fa),
   6713                               -ReadSRegister(fn),
   6714                               ReadSRegister(fm)));
   6715       break;
   6716     case FMADD_d:
   6717       WriteDRegister(fd,
   6718                      FPMulAdd(ReadDRegister(fa),
   6719                               ReadDRegister(fn),
   6720                               ReadDRegister(fm)));
   6721       break;
   6722     case FMSUB_d:
   6723       WriteDRegister(fd,
   6724                      FPMulAdd(ReadDRegister(fa),
   6725                               -ReadDRegister(fn),
   6726                               ReadDRegister(fm)));
   6727       break;
   6728     // Negated variants of the above.
   6729     case FNMADD_h:
   6730       WriteHRegister(fd,
   6731                      FPMulAdd(-ReadHRegister(fa),
   6732                               -ReadHRegister(fn),
   6733                               ReadHRegister(fm)));
   6734       break;
   6735     case FNMSUB_h:
   6736       WriteHRegister(fd,
   6737                      FPMulAdd(-ReadHRegister(fa),
   6738                               ReadHRegister(fn),
   6739                               ReadHRegister(fm)));
   6740       break;
   6741     case FNMADD_s:
   6742       WriteSRegister(fd,
   6743                      FPMulAdd(-ReadSRegister(fa),
   6744                               -ReadSRegister(fn),
   6745                               ReadSRegister(fm)));
   6746       break;
   6747     case FNMSUB_s:
   6748       WriteSRegister(fd,
   6749                      FPMulAdd(-ReadSRegister(fa),
   6750                               ReadSRegister(fn),
   6751                               ReadSRegister(fm)));
   6752       break;
   6753     case FNMADD_d:
   6754       WriteDRegister(fd,
   6755                      FPMulAdd(-ReadDRegister(fa),
   6756                               -ReadDRegister(fn),
   6757                               ReadDRegister(fm)));
   6758       break;
   6759     case FNMSUB_d:
   6760       WriteDRegister(fd,
   6761                      FPMulAdd(-ReadDRegister(fa),
   6762                               ReadDRegister(fn),
   6763                               ReadDRegister(fm)));
   6764       break;
   6765     default:
   6766       VIXL_UNIMPLEMENTED();
   6767   }
   6768 }
   6769 
   6770 
   6771 bool Simulator::FPProcessNaNs(const Instruction* instr) {
   6772   unsigned fd = instr->GetRd();
   6773   unsigned fn = instr->GetRn();
   6774   unsigned fm = instr->GetRm();
   6775   bool done = false;
   6776 
   6777   if (instr->Mask(FP64) == FP64) {
   6778     double result = FPProcessNaNs(ReadDRegister(fn), ReadDRegister(fm));
   6779     if (IsNaN(result)) {
   6780       WriteDRegister(fd, result);
   6781       done = true;
   6782     }
   6783   } else if (instr->Mask(FP32) == FP32) {
   6784     float result = FPProcessNaNs(ReadSRegister(fn), ReadSRegister(fm));
   6785     if (IsNaN(result)) {
   6786       WriteSRegister(fd, result);
   6787       done = true;
   6788     }
   6789   } else {
   6790     VIXL_ASSERT(instr->Mask(FP16) == FP16);
   6791     VIXL_UNIMPLEMENTED();
   6792   }
   6793 
   6794   return done;
   6795 }
   6796 
   6797 
   6798 void Simulator::SysOp_W(int op, int64_t val) {
   6799   switch (op) {
   6800     case IVAU:
   6801     case CVAC:
   6802     case CVAU:
   6803     case CVAP:
   6804     case CVADP:
   6805     case CIVAC:
   6806     case CGVAC:
   6807     case CGDVAC:
   6808     case CGVAP:
   6809     case CGDVAP:
   6810     case CIGVAC:
   6811     case CIGDVAC: {
   6812       // Perform a placeholder memory access to ensure that we have read access
   6813       // to the specified address. The read access does not require a tag match,
   6814       // so temporarily disable MTE.
   6815       bool mte_enabled = MetaDataDepot::MetaDataMTE::IsActive();
   6816       MetaDataDepot::MetaDataMTE::SetActive(false);
   6817       volatile uint8_t y = *MemRead<uint8_t>(val);
   6818       MetaDataDepot::MetaDataMTE::SetActive(mte_enabled);
   6819       USE(y);
   6820       // TODO: Implement ZVA, GVA, GZVA.
   6821       break;
   6822     }
   6823     default:
   6824       VIXL_UNIMPLEMENTED();
   6825   }
   6826 }
   6827 
   6828 void Simulator::PACHelper(int dst,
   6829                           int src,
   6830                           PACKey key,
   6831                           decltype(&Simulator::AddPAC) pac_fn) {
   6832   VIXL_ASSERT((dst == 17) || (dst == 30));
   6833   VIXL_ASSERT((src == -1) || (src == 16) || (src == 31));
   6834 
   6835   uint64_t modifier = (src == -1) ? 0 : ReadXRegister(src, Reg31IsStackPointer);
   6836   uint64_t result =
   6837       (this->*pac_fn)(ReadXRegister(dst), modifier, key, kInstructionPointer);
   6838   WriteXRegister(dst, result);
   6839 }
   6840 
   6841 void Simulator::VisitSystem(const Instruction* instr) {
   6842   PACKey pac_key = kPACKeyIA;  // Default key for PAC/AUTH handling.
   6843 
   6844   switch (form_hash_) {
   6845     case "cfinv_m_pstate"_h:
   6846       ReadNzcv().SetC(!ReadC());
   6847       break;
   6848     case "axflag_m_pstate"_h:
   6849       ReadNzcv().SetN(0);
   6850       ReadNzcv().SetZ(ReadNzcv().GetZ() | ReadNzcv().GetV());
   6851       ReadNzcv().SetC(ReadNzcv().GetC() & ~ReadNzcv().GetV());
   6852       ReadNzcv().SetV(0);
   6853       break;
   6854     case "xaflag_m_pstate"_h: {
   6855       // Can't set the flags in place due to the logical dependencies.
   6856       uint32_t n = (~ReadNzcv().GetC() & ~ReadNzcv().GetZ()) & 1;
   6857       uint32_t z = ReadNzcv().GetZ() & ReadNzcv().GetC();
   6858       uint32_t c = ReadNzcv().GetC() | ReadNzcv().GetZ();
   6859       uint32_t v = ~ReadNzcv().GetC() & ReadNzcv().GetZ();
   6860       ReadNzcv().SetN(n);
   6861       ReadNzcv().SetZ(z);
   6862       ReadNzcv().SetC(c);
   6863       ReadNzcv().SetV(v);
   6864       break;
   6865     }
   6866     case "xpaclri_hi_hints"_h:
   6867       WriteXRegister(30, StripPAC(ReadXRegister(30), kInstructionPointer));
   6868       break;
   6869     case "clrex_bn_barriers"_h:
   6870       PrintExclusiveAccessWarning();
   6871       ClearLocalMonitor();
   6872       break;
   6873     case "msr_sr_systemmove"_h:
   6874       switch (instr->GetImmSystemRegister()) {
   6875         case NZCV:
   6876           ReadNzcv().SetRawValue(ReadWRegister(instr->GetRt()));
   6877           LogSystemRegister(NZCV);
   6878           break;
   6879         case FPCR:
   6880           ReadFpcr().SetRawValue(ReadWRegister(instr->GetRt()));
   6881           LogSystemRegister(FPCR);
   6882           break;
   6883         default:
   6884           VIXL_UNIMPLEMENTED();
   6885       }
   6886       break;
   6887     case "mrs_rs_systemmove"_h:
   6888       switch (instr->GetImmSystemRegister()) {
   6889         case NZCV:
   6890           WriteXRegister(instr->GetRt(), ReadNzcv().GetRawValue());
   6891           break;
   6892         case FPCR:
   6893           WriteXRegister(instr->GetRt(), ReadFpcr().GetRawValue());
   6894           break;
   6895         case RNDR:
   6896         case RNDRRS: {
   6897           uint64_t high = jrand48(rand_state_);
   6898           uint64_t low = jrand48(rand_state_);
   6899           uint64_t rand_num = (high << 32) | (low & 0xffffffff);
   6900           WriteXRegister(instr->GetRt(), rand_num);
   6901           // Simulate successful random number generation.
   6902           // TODO: Return failure occasionally as a random number cannot be
   6903           // returned in a period of time.
   6904           ReadNzcv().SetRawValue(NoFlag);
   6905           LogSystemRegister(NZCV);
   6906           break;
   6907         }
   6908         default:
   6909           VIXL_UNIMPLEMENTED();
   6910       }
   6911       break;
   6912     case "nop_hi_hints"_h:
   6913     case "esb_hi_hints"_h:
   6914     case "csdb_hi_hints"_h:
   6915       break;
   6916     case "bti_hb_hints"_h:
   6917       switch (instr->GetImmHint()) {
   6918         case BTI_jc:
   6919           break;
   6920         case BTI:
   6921           if (PcIsInGuardedPage() && (ReadBType() != DefaultBType)) {
   6922             VIXL_ABORT_WITH_MSG("Executing BTI with wrong BType.");
   6923           }
   6924           break;
   6925         case BTI_c:
   6926           if (PcIsInGuardedPage() &&
   6927               (ReadBType() == BranchFromGuardedNotToIP)) {
   6928             VIXL_ABORT_WITH_MSG("Executing BTI c with wrong BType.");
   6929           }
   6930           break;
   6931         case BTI_j:
   6932           if (PcIsInGuardedPage() && (ReadBType() == BranchAndLink)) {
   6933             VIXL_ABORT_WITH_MSG("Executing BTI j with wrong BType.");
   6934           }
   6935           break;
   6936         default:
   6937           VIXL_UNREACHABLE();
   6938       }
   6939       return;
   6940     case "pacib1716_hi_hints"_h:
   6941       pac_key = kPACKeyIB;
   6942       VIXL_FALLTHROUGH();
   6943     case "pacia1716_hi_hints"_h:
   6944       PACHelper(17, 16, pac_key, &Simulator::AddPAC);
   6945       break;
   6946     case "pacibsp_hi_hints"_h:
   6947       pac_key = kPACKeyIB;
   6948       VIXL_FALLTHROUGH();
   6949     case "paciasp_hi_hints"_h:
   6950       PACHelper(30, 31, pac_key, &Simulator::AddPAC);
   6951 
   6952       // Check BType allows PACI[AB]SP instructions.
   6953       if (PcIsInGuardedPage()) {
   6954         switch (ReadBType()) {
   6955           case BranchFromGuardedNotToIP:
   6956           // TODO: This case depends on the value of SCTLR_EL1.BT0, which we
   6957           // assume here to be zero. This allows execution of PACI[AB]SP when
   6958           // BTYPE is BranchFromGuardedNotToIP (0b11).
   6959           case DefaultBType:
   6960           case BranchFromUnguardedOrToIP:
   6961           case BranchAndLink:
   6962             break;
   6963         }
   6964       }
   6965       break;
   6966     case "pacibz_hi_hints"_h:
   6967       pac_key = kPACKeyIB;
   6968       VIXL_FALLTHROUGH();
   6969     case "paciaz_hi_hints"_h:
   6970       PACHelper(30, -1, pac_key, &Simulator::AddPAC);
   6971       break;
   6972     case "autib1716_hi_hints"_h:
   6973       pac_key = kPACKeyIB;
   6974       VIXL_FALLTHROUGH();
   6975     case "autia1716_hi_hints"_h:
   6976       PACHelper(17, 16, pac_key, &Simulator::AuthPAC);
   6977       break;
   6978     case "autibsp_hi_hints"_h:
   6979       pac_key = kPACKeyIB;
   6980       VIXL_FALLTHROUGH();
   6981     case "autiasp_hi_hints"_h:
   6982       PACHelper(30, 31, pac_key, &Simulator::AuthPAC);
   6983       break;
   6984     case "autibz_hi_hints"_h:
   6985       pac_key = kPACKeyIB;
   6986       VIXL_FALLTHROUGH();
   6987     case "autiaz_hi_hints"_h:
   6988       PACHelper(30, -1, pac_key, &Simulator::AuthPAC);
   6989       break;
   6990     case "dsb_bo_barriers"_h:
   6991     case "dmb_bo_barriers"_h:
   6992     case "isb_bi_barriers"_h:
   6993       __sync_synchronize();
   6994       break;
   6995     case "sys_cr_systeminstrs"_h:
   6996       SysOp_W(instr->GetSysOp(), ReadXRegister(instr->GetRt()));
   6997       break;
   6998     default:
   6999       VIXL_UNIMPLEMENTED();
   7000   }
   7001 }
   7002 
   7003 
   7004 void Simulator::VisitException(const Instruction* instr) {
   7005   switch (instr->Mask(ExceptionMask)) {
   7006     case HLT:
   7007       switch (instr->GetImmException()) {
   7008         case kUnreachableOpcode:
   7009           DoUnreachable(instr);
   7010           return;
   7011         case kTraceOpcode:
   7012           DoTrace(instr);
   7013           return;
   7014         case kLogOpcode:
   7015           DoLog(instr);
   7016           return;
   7017         case kPrintfOpcode:
   7018           DoPrintf(instr);
   7019           return;
   7020         case kRuntimeCallOpcode:
   7021           DoRuntimeCall(instr);
   7022           return;
   7023         case kSetCPUFeaturesOpcode:
   7024         case kEnableCPUFeaturesOpcode:
   7025         case kDisableCPUFeaturesOpcode:
   7026           DoConfigureCPUFeatures(instr);
   7027           return;
   7028         case kSaveCPUFeaturesOpcode:
   7029           DoSaveCPUFeatures(instr);
   7030           return;
   7031         case kRestoreCPUFeaturesOpcode:
   7032           DoRestoreCPUFeatures(instr);
   7033           return;
   7034         case kMTEActive:
   7035           MetaDataDepot::MetaDataMTE::SetActive(true);
   7036           return;
   7037         case kMTEInactive:
   7038           MetaDataDepot::MetaDataMTE::SetActive(false);
   7039           return;
   7040         default:
   7041           HostBreakpoint();
   7042           return;
   7043       }
   7044     case BRK:
   7045       if (debugger_enabled_) {
   7046         uint64_t next_instr =
   7047             reinterpret_cast<uint64_t>(pc_->GetNextInstruction());
   7048         if (!debugger_->IsBreakpoint(next_instr)) {
   7049           debugger_->RegisterBreakpoint(next_instr);
   7050         }
   7051       } else {
   7052         HostBreakpoint();
   7053       }
   7054       return;
   7055     default:
   7056       VIXL_UNIMPLEMENTED();
   7057   }
   7058 }
   7059 
   7060 
   7061 void Simulator::VisitCrypto2RegSHA(const Instruction* instr) {
   7062   VisitUnimplemented(instr);
   7063 }
   7064 
   7065 
   7066 void Simulator::VisitCrypto3RegSHA(const Instruction* instr) {
   7067   VisitUnimplemented(instr);
   7068 }
   7069 
   7070 
   7071 void Simulator::VisitCryptoAES(const Instruction* instr) {
   7072   VisitUnimplemented(instr);
   7073 }
   7074 
   7075 
   7076 void Simulator::VisitNEON2RegMisc(const Instruction* instr) {
   7077   NEONFormatDecoder nfd(instr);
   7078   VectorFormat vf = nfd.GetVectorFormat();
   7079 
   7080   static const NEONFormatMap map_lp =
   7081       {{23, 22, 30}, {NF_4H, NF_8H, NF_2S, NF_4S, NF_1D, NF_2D}};
   7082   VectorFormat vf_lp = nfd.GetVectorFormat(&map_lp);
   7083 
   7084   static const NEONFormatMap map_fcvtl = {{22}, {NF_4S, NF_2D}};
   7085   VectorFormat vf_fcvtl = nfd.GetVectorFormat(&map_fcvtl);
   7086 
   7087   static const NEONFormatMap map_fcvtn = {{22, 30},
   7088                                           {NF_4H, NF_8H, NF_2S, NF_4S}};
   7089   VectorFormat vf_fcvtn = nfd.GetVectorFormat(&map_fcvtn);
   7090 
   7091   SimVRegister& rd = ReadVRegister(instr->GetRd());
   7092   SimVRegister& rn = ReadVRegister(instr->GetRn());
   7093 
   7094   if (instr->Mask(NEON2RegMiscOpcode) <= NEON_NEG_opcode) {
   7095     // These instructions all use a two bit size field, except NOT and RBIT,
   7096     // which use the field to encode the operation.
   7097     switch (instr->Mask(NEON2RegMiscMask)) {
   7098       case NEON_REV64:
   7099         rev64(vf, rd, rn);
   7100         break;
   7101       case NEON_REV32:
   7102         rev32(vf, rd, rn);
   7103         break;
   7104       case NEON_REV16:
   7105         rev16(vf, rd, rn);
   7106         break;
   7107       case NEON_SUQADD:
   7108         suqadd(vf, rd, rd, rn);
   7109         break;
   7110       case NEON_USQADD:
   7111         usqadd(vf, rd, rd, rn);
   7112         break;
   7113       case NEON_CLS:
   7114         cls(vf, rd, rn);
   7115         break;
   7116       case NEON_CLZ:
   7117         clz(vf, rd, rn);
   7118         break;
   7119       case NEON_CNT:
   7120         cnt(vf, rd, rn);
   7121         break;
   7122       case NEON_SQABS:
   7123         abs(vf, rd, rn).SignedSaturate(vf);
   7124         break;
   7125       case NEON_SQNEG:
   7126         neg(vf, rd, rn).SignedSaturate(vf);
   7127         break;
   7128       case NEON_CMGT_zero:
   7129         cmp(vf, rd, rn, 0, gt);
   7130         break;
   7131       case NEON_CMGE_zero:
   7132         cmp(vf, rd, rn, 0, ge);
   7133         break;
   7134       case NEON_CMEQ_zero:
   7135         cmp(vf, rd, rn, 0, eq);
   7136         break;
   7137       case NEON_CMLE_zero:
   7138         cmp(vf, rd, rn, 0, le);
   7139         break;
   7140       case NEON_CMLT_zero:
   7141         cmp(vf, rd, rn, 0, lt);
   7142         break;
   7143       case NEON_ABS:
   7144         abs(vf, rd, rn);
   7145         break;
   7146       case NEON_NEG:
   7147         neg(vf, rd, rn);
   7148         break;
   7149       case NEON_SADDLP:
   7150         saddlp(vf_lp, rd, rn);
   7151         break;
   7152       case NEON_UADDLP:
   7153         uaddlp(vf_lp, rd, rn);
   7154         break;
   7155       case NEON_SADALP:
   7156         sadalp(vf_lp, rd, rn);
   7157         break;
   7158       case NEON_UADALP:
   7159         uadalp(vf_lp, rd, rn);
   7160         break;
   7161       case NEON_RBIT_NOT:
   7162         vf = nfd.GetVectorFormat(nfd.LogicalFormatMap());
   7163         switch (instr->GetFPType()) {
   7164           case 0:
   7165             not_(vf, rd, rn);
   7166             break;
   7167           case 1:
   7168             rbit(vf, rd, rn);
   7169             break;
   7170           default:
   7171             VIXL_UNIMPLEMENTED();
   7172         }
   7173         break;
   7174     }
   7175   } else {
   7176     VectorFormat fpf = nfd.GetVectorFormat(nfd.FPFormatMap());
   7177     FPRounding fpcr_rounding = static_cast<FPRounding>(ReadFpcr().GetRMode());
   7178     bool inexact_exception = false;
   7179     FrintMode frint_mode = kFrintToInteger;
   7180 
   7181     // These instructions all use a one bit size field, except XTN, SQXTUN,
   7182     // SHLL, SQXTN and UQXTN, which use a two bit size field.
   7183     switch (instr->Mask(NEON2RegMiscFPMask)) {
   7184       case NEON_FABS:
   7185         fabs_(fpf, rd, rn);
   7186         return;
   7187       case NEON_FNEG:
   7188         fneg(fpf, rd, rn);
   7189         return;
   7190       case NEON_FSQRT:
   7191         fsqrt(fpf, rd, rn);
   7192         return;
   7193       case NEON_FCVTL:
   7194         if (instr->Mask(NEON_Q)) {
   7195           fcvtl2(vf_fcvtl, rd, rn);
   7196         } else {
   7197           fcvtl(vf_fcvtl, rd, rn);
   7198         }
   7199         return;
   7200       case NEON_FCVTN:
   7201         if (instr->Mask(NEON_Q)) {
   7202           fcvtn2(vf_fcvtn, rd, rn);
   7203         } else {
   7204           fcvtn(vf_fcvtn, rd, rn);
   7205         }
   7206         return;
   7207       case NEON_FCVTXN:
   7208         if (instr->Mask(NEON_Q)) {
   7209           fcvtxn2(vf_fcvtn, rd, rn);
   7210         } else {
   7211           fcvtxn(vf_fcvtn, rd, rn);
   7212         }
   7213         return;
   7214 
   7215       // The following instructions break from the switch statement, rather
   7216       // than return.
   7217       case NEON_FRINT32X:
   7218         inexact_exception = true;
   7219         frint_mode = kFrintToInt32;
   7220         break;  // Use FPCR rounding mode.
   7221       case NEON_FRINT32Z:
   7222         inexact_exception = true;
   7223         frint_mode = kFrintToInt32;
   7224         fpcr_rounding = FPZero;
   7225         break;
   7226       case NEON_FRINT64X:
   7227         inexact_exception = true;
   7228         frint_mode = kFrintToInt64;
   7229         break;  // Use FPCR rounding mode.
   7230       case NEON_FRINT64Z:
   7231         inexact_exception = true;
   7232         frint_mode = kFrintToInt64;
   7233         fpcr_rounding = FPZero;
   7234         break;
   7235       case NEON_FRINTI:
   7236         break;  // Use FPCR rounding mode.
   7237       case NEON_FRINTX:
   7238         inexact_exception = true;
   7239         break;
   7240       case NEON_FRINTA:
   7241         fpcr_rounding = FPTieAway;
   7242         break;
   7243       case NEON_FRINTM:
   7244         fpcr_rounding = FPNegativeInfinity;
   7245         break;
   7246       case NEON_FRINTN:
   7247         fpcr_rounding = FPTieEven;
   7248         break;
   7249       case NEON_FRINTP:
   7250         fpcr_rounding = FPPositiveInfinity;
   7251         break;
   7252       case NEON_FRINTZ:
   7253         fpcr_rounding = FPZero;
   7254         break;
   7255 
   7256       case NEON_FCVTNS:
   7257         fcvts(fpf, rd, rn, FPTieEven);
   7258         return;
   7259       case NEON_FCVTNU:
   7260         fcvtu(fpf, rd, rn, FPTieEven);
   7261         return;
   7262       case NEON_FCVTPS:
   7263         fcvts(fpf, rd, rn, FPPositiveInfinity);
   7264         return;
   7265       case NEON_FCVTPU:
   7266         fcvtu(fpf, rd, rn, FPPositiveInfinity);
   7267         return;
   7268       case NEON_FCVTMS:
   7269         fcvts(fpf, rd, rn, FPNegativeInfinity);
   7270         return;
   7271       case NEON_FCVTMU:
   7272         fcvtu(fpf, rd, rn, FPNegativeInfinity);
   7273         return;
   7274       case NEON_FCVTZS:
   7275         fcvts(fpf, rd, rn, FPZero);
   7276         return;
   7277       case NEON_FCVTZU:
   7278         fcvtu(fpf, rd, rn, FPZero);
   7279         return;
   7280       case NEON_FCVTAS:
   7281         fcvts(fpf, rd, rn, FPTieAway);
   7282         return;
   7283       case NEON_FCVTAU:
   7284         fcvtu(fpf, rd, rn, FPTieAway);
   7285         return;
   7286       case NEON_SCVTF:
   7287         scvtf(fpf, rd, rn, 0, fpcr_rounding);
   7288         return;
   7289       case NEON_UCVTF:
   7290         ucvtf(fpf, rd, rn, 0, fpcr_rounding);
   7291         return;
   7292       case NEON_URSQRTE:
   7293         ursqrte(fpf, rd, rn);
   7294         return;
   7295       case NEON_URECPE:
   7296         urecpe(fpf, rd, rn);
   7297         return;
   7298       case NEON_FRSQRTE:
   7299         frsqrte(fpf, rd, rn);
   7300         return;
   7301       case NEON_FRECPE:
   7302         frecpe(fpf, rd, rn, fpcr_rounding);
   7303         return;
   7304       case NEON_FCMGT_zero:
   7305         fcmp_zero(fpf, rd, rn, gt);
   7306         return;
   7307       case NEON_FCMGE_zero:
   7308         fcmp_zero(fpf, rd, rn, ge);
   7309         return;
   7310       case NEON_FCMEQ_zero:
   7311         fcmp_zero(fpf, rd, rn, eq);
   7312         return;
   7313       case NEON_FCMLE_zero:
   7314         fcmp_zero(fpf, rd, rn, le);
   7315         return;
   7316       case NEON_FCMLT_zero:
   7317         fcmp_zero(fpf, rd, rn, lt);
   7318         return;
   7319       default:
   7320         if ((NEON_XTN_opcode <= instr->Mask(NEON2RegMiscOpcode)) &&
   7321             (instr->Mask(NEON2RegMiscOpcode) <= NEON_UQXTN_opcode)) {
   7322           switch (instr->Mask(NEON2RegMiscMask)) {
   7323             case NEON_XTN:
   7324               xtn(vf, rd, rn);
   7325               return;
   7326             case NEON_SQXTN:
   7327               sqxtn(vf, rd, rn);
   7328               return;
   7329             case NEON_UQXTN:
   7330               uqxtn(vf, rd, rn);
   7331               return;
   7332             case NEON_SQXTUN:
   7333               sqxtun(vf, rd, rn);
   7334               return;
   7335             case NEON_SHLL:
   7336               vf = nfd.GetVectorFormat(nfd.LongIntegerFormatMap());
   7337               if (instr->Mask(NEON_Q)) {
   7338                 shll2(vf, rd, rn);
   7339               } else {
   7340                 shll(vf, rd, rn);
   7341               }
   7342               return;
   7343             default:
   7344               VIXL_UNIMPLEMENTED();
   7345           }
   7346         } else {
   7347           VIXL_UNIMPLEMENTED();
   7348         }
   7349     }
   7350 
   7351     // Only FRINT* instructions fall through the switch above.
   7352     frint(fpf, rd, rn, fpcr_rounding, inexact_exception, frint_mode);
   7353   }
   7354 }
   7355 
   7356 
   7357 void Simulator::VisitNEON2RegMiscFP16(const Instruction* instr) {
   7358   static const NEONFormatMap map_half = {{30}, {NF_4H, NF_8H}};
   7359   NEONFormatDecoder nfd(instr);
   7360   VectorFormat fpf = nfd.GetVectorFormat(&map_half);
   7361 
   7362   FPRounding fpcr_rounding = static_cast<FPRounding>(ReadFpcr().GetRMode());
   7363 
   7364   SimVRegister& rd = ReadVRegister(instr->GetRd());
   7365   SimVRegister& rn = ReadVRegister(instr->GetRn());
   7366 
   7367   switch (instr->Mask(NEON2RegMiscFP16Mask)) {
   7368     case NEON_SCVTF_H:
   7369       scvtf(fpf, rd, rn, 0, fpcr_rounding);
   7370       return;
   7371     case NEON_UCVTF_H:
   7372       ucvtf(fpf, rd, rn, 0, fpcr_rounding);
   7373       return;
   7374     case NEON_FCVTNS_H:
   7375       fcvts(fpf, rd, rn, FPTieEven);
   7376       return;
   7377     case NEON_FCVTNU_H:
   7378       fcvtu(fpf, rd, rn, FPTieEven);
   7379       return;
   7380     case NEON_FCVTPS_H:
   7381       fcvts(fpf, rd, rn, FPPositiveInfinity);
   7382       return;
   7383     case NEON_FCVTPU_H:
   7384       fcvtu(fpf, rd, rn, FPPositiveInfinity);
   7385       return;
   7386     case NEON_FCVTMS_H:
   7387       fcvts(fpf, rd, rn, FPNegativeInfinity);
   7388       return;
   7389     case NEON_FCVTMU_H:
   7390       fcvtu(fpf, rd, rn, FPNegativeInfinity);
   7391       return;
   7392     case NEON_FCVTZS_H:
   7393       fcvts(fpf, rd, rn, FPZero);
   7394       return;
   7395     case NEON_FCVTZU_H:
   7396       fcvtu(fpf, rd, rn, FPZero);
   7397       return;
   7398     case NEON_FCVTAS_H:
   7399       fcvts(fpf, rd, rn, FPTieAway);
   7400       return;
   7401     case NEON_FCVTAU_H:
   7402       fcvtu(fpf, rd, rn, FPTieAway);
   7403       return;
   7404     case NEON_FRINTI_H:
   7405       frint(fpf, rd, rn, fpcr_rounding, false);
   7406       return;
   7407     case NEON_FRINTX_H:
   7408       frint(fpf, rd, rn, fpcr_rounding, true);
   7409       return;
   7410     case NEON_FRINTA_H:
   7411       frint(fpf, rd, rn, FPTieAway, false);
   7412       return;
   7413     case NEON_FRINTM_H:
   7414       frint(fpf, rd, rn, FPNegativeInfinity, false);
   7415       return;
   7416     case NEON_FRINTN_H:
   7417       frint(fpf, rd, rn, FPTieEven, false);
   7418       return;
   7419     case NEON_FRINTP_H:
   7420       frint(fpf, rd, rn, FPPositiveInfinity, false);
   7421       return;
   7422     case NEON_FRINTZ_H:
   7423       frint(fpf, rd, rn, FPZero, false);
   7424       return;
   7425     case NEON_FABS_H:
   7426       fabs_(fpf, rd, rn);
   7427       return;
   7428     case NEON_FNEG_H:
   7429       fneg(fpf, rd, rn);
   7430       return;
   7431     case NEON_FSQRT_H:
   7432       fsqrt(fpf, rd, rn);
   7433       return;
   7434     case NEON_FRSQRTE_H:
   7435       frsqrte(fpf, rd, rn);
   7436       return;
   7437     case NEON_FRECPE_H:
   7438       frecpe(fpf, rd, rn, fpcr_rounding);
   7439       return;
   7440     case NEON_FCMGT_H_zero:
   7441       fcmp_zero(fpf, rd, rn, gt);
   7442       return;
   7443     case NEON_FCMGE_H_zero:
   7444       fcmp_zero(fpf, rd, rn, ge);
   7445       return;
   7446     case NEON_FCMEQ_H_zero:
   7447       fcmp_zero(fpf, rd, rn, eq);
   7448       return;
   7449     case NEON_FCMLE_H_zero:
   7450       fcmp_zero(fpf, rd, rn, le);
   7451       return;
   7452     case NEON_FCMLT_H_zero:
   7453       fcmp_zero(fpf, rd, rn, lt);
   7454       return;
   7455     default:
   7456       VIXL_UNIMPLEMENTED();
   7457       return;
   7458   }
   7459 }
   7460 
   7461 
   7462 void Simulator::VisitNEON3Same(const Instruction* instr) {
   7463   NEONFormatDecoder nfd(instr);
   7464   SimVRegister& rd = ReadVRegister(instr->GetRd());
   7465   SimVRegister& rn = ReadVRegister(instr->GetRn());
   7466   SimVRegister& rm = ReadVRegister(instr->GetRm());
   7467 
   7468   if (instr->Mask(NEON3SameLogicalFMask) == NEON3SameLogicalFixed) {
   7469     VectorFormat vf = nfd.GetVectorFormat(nfd.LogicalFormatMap());
   7470     switch (instr->Mask(NEON3SameLogicalMask)) {
   7471       case NEON_AND:
   7472         and_(vf, rd, rn, rm);
   7473         break;
   7474       case NEON_ORR:
   7475         orr(vf, rd, rn, rm);
   7476         break;
   7477       case NEON_ORN:
   7478         orn(vf, rd, rn, rm);
   7479         break;
   7480       case NEON_EOR:
   7481         eor(vf, rd, rn, rm);
   7482         break;
   7483       case NEON_BIC:
   7484         bic(vf, rd, rn, rm);
   7485         break;
   7486       case NEON_BIF:
   7487         bif(vf, rd, rn, rm);
   7488         break;
   7489       case NEON_BIT:
   7490         bit(vf, rd, rn, rm);
   7491         break;
   7492       case NEON_BSL:
   7493         bsl(vf, rd, rd, rn, rm);
   7494         break;
   7495       default:
   7496         VIXL_UNIMPLEMENTED();
   7497     }
   7498   } else if (instr->Mask(NEON3SameFPFMask) == NEON3SameFPFixed) {
   7499     VectorFormat vf = nfd.GetVectorFormat(nfd.FPFormatMap());
   7500     switch (instr->Mask(NEON3SameFPMask)) {
   7501       case NEON_FADD:
   7502         fadd(vf, rd, rn, rm);
   7503         break;
   7504       case NEON_FSUB:
   7505         fsub(vf, rd, rn, rm);
   7506         break;
   7507       case NEON_FMUL:
   7508         fmul(vf, rd, rn, rm);
   7509         break;
   7510       case NEON_FDIV:
   7511         fdiv(vf, rd, rn, rm);
   7512         break;
   7513       case NEON_FMAX:
   7514         fmax(vf, rd, rn, rm);
   7515         break;
   7516       case NEON_FMIN:
   7517         fmin(vf, rd, rn, rm);
   7518         break;
   7519       case NEON_FMAXNM:
   7520         fmaxnm(vf, rd, rn, rm);
   7521         break;
   7522       case NEON_FMINNM:
   7523         fminnm(vf, rd, rn, rm);
   7524         break;
   7525       case NEON_FMLA:
   7526         fmla(vf, rd, rd, rn, rm);
   7527         break;
   7528       case NEON_FMLS:
   7529         fmls(vf, rd, rd, rn, rm);
   7530         break;
   7531       case NEON_FMULX:
   7532         fmulx(vf, rd, rn, rm);
   7533         break;
   7534       case NEON_FACGE:
   7535         fabscmp(vf, rd, rn, rm, ge);
   7536         break;
   7537       case NEON_FACGT:
   7538         fabscmp(vf, rd, rn, rm, gt);
   7539         break;
   7540       case NEON_FCMEQ:
   7541         fcmp(vf, rd, rn, rm, eq);
   7542         break;
   7543       case NEON_FCMGE:
   7544         fcmp(vf, rd, rn, rm, ge);
   7545         break;
   7546       case NEON_FCMGT:
   7547         fcmp(vf, rd, rn, rm, gt);
   7548         break;
   7549       case NEON_FRECPS:
   7550         frecps(vf, rd, rn, rm);
   7551         break;
   7552       case NEON_FRSQRTS:
   7553         frsqrts(vf, rd, rn, rm);
   7554         break;
   7555       case NEON_FABD:
   7556         fabd(vf, rd, rn, rm);
   7557         break;
   7558       case NEON_FADDP:
   7559         faddp(vf, rd, rn, rm);
   7560         break;
   7561       case NEON_FMAXP:
   7562         fmaxp(vf, rd, rn, rm);
   7563         break;
   7564       case NEON_FMAXNMP:
   7565         fmaxnmp(vf, rd, rn, rm);
   7566         break;
   7567       case NEON_FMINP:
   7568         fminp(vf, rd, rn, rm);
   7569         break;
   7570       case NEON_FMINNMP:
   7571         fminnmp(vf, rd, rn, rm);
   7572         break;
   7573       default:
   7574         // FMLAL{2} and FMLSL{2} have special-case encodings.
   7575         switch (instr->Mask(NEON3SameFHMMask)) {
   7576           case NEON_FMLAL:
   7577             fmlal(vf, rd, rn, rm);
   7578             break;
   7579           case NEON_FMLAL2:
   7580             fmlal2(vf, rd, rn, rm);
   7581             break;
   7582           case NEON_FMLSL:
   7583             fmlsl(vf, rd, rn, rm);
   7584             break;
   7585           case NEON_FMLSL2:
   7586             fmlsl2(vf, rd, rn, rm);
   7587             break;
   7588           default:
   7589             VIXL_UNIMPLEMENTED();
   7590         }
   7591     }
   7592   } else {
   7593     VectorFormat vf = nfd.GetVectorFormat();
   7594     switch (instr->Mask(NEON3SameMask)) {
   7595       case NEON_ADD:
   7596         add(vf, rd, rn, rm);
   7597         break;
   7598       case NEON_ADDP:
   7599         addp(vf, rd, rn, rm);
   7600         break;
   7601       case NEON_CMEQ:
   7602         cmp(vf, rd, rn, rm, eq);
   7603         break;
   7604       case NEON_CMGE:
   7605         cmp(vf, rd, rn, rm, ge);
   7606         break;
   7607       case NEON_CMGT:
   7608         cmp(vf, rd, rn, rm, gt);
   7609         break;
   7610       case NEON_CMHI:
   7611         cmp(vf, rd, rn, rm, hi);
   7612         break;
   7613       case NEON_CMHS:
   7614         cmp(vf, rd, rn, rm, hs);
   7615         break;
   7616       case NEON_CMTST:
   7617         cmptst(vf, rd, rn, rm);
   7618         break;
   7619       case NEON_MLS:
   7620         mls(vf, rd, rd, rn, rm);
   7621         break;
   7622       case NEON_MLA:
   7623         mla(vf, rd, rd, rn, rm);
   7624         break;
   7625       case NEON_MUL:
   7626         mul(vf, rd, rn, rm);
   7627         break;
   7628       case NEON_PMUL:
   7629         pmul(vf, rd, rn, rm);
   7630         break;
   7631       case NEON_SMAX:
   7632         smax(vf, rd, rn, rm);
   7633         break;
   7634       case NEON_SMAXP:
   7635         smaxp(vf, rd, rn, rm);
   7636         break;
   7637       case NEON_SMIN:
   7638         smin(vf, rd, rn, rm);
   7639         break;
   7640       case NEON_SMINP:
   7641         sminp(vf, rd, rn, rm);
   7642         break;
   7643       case NEON_SUB:
   7644         sub(vf, rd, rn, rm);
   7645         break;
   7646       case NEON_UMAX:
   7647         umax(vf, rd, rn, rm);
   7648         break;
   7649       case NEON_UMAXP:
   7650         umaxp(vf, rd, rn, rm);
   7651         break;
   7652       case NEON_UMIN:
   7653         umin(vf, rd, rn, rm);
   7654         break;
   7655       case NEON_UMINP:
   7656         uminp(vf, rd, rn, rm);
   7657         break;
   7658       case NEON_SSHL:
   7659         sshl(vf, rd, rn, rm);
   7660         break;
   7661       case NEON_USHL:
   7662         ushl(vf, rd, rn, rm);
   7663         break;
   7664       case NEON_SABD:
   7665         absdiff(vf, rd, rn, rm, true);
   7666         break;
   7667       case NEON_UABD:
   7668         absdiff(vf, rd, rn, rm, false);
   7669         break;
   7670       case NEON_SABA:
   7671         saba(vf, rd, rn, rm);
   7672         break;
   7673       case NEON_UABA:
   7674         uaba(vf, rd, rn, rm);
   7675         break;
   7676       case NEON_UQADD:
   7677         add(vf, rd, rn, rm).UnsignedSaturate(vf);
   7678         break;
   7679       case NEON_SQADD:
   7680         add(vf, rd, rn, rm).SignedSaturate(vf);
   7681         break;
   7682       case NEON_UQSUB:
   7683         sub(vf, rd, rn, rm).UnsignedSaturate(vf);
   7684         break;
   7685       case NEON_SQSUB:
   7686         sub(vf, rd, rn, rm).SignedSaturate(vf);
   7687         break;
   7688       case NEON_SQDMULH:
   7689         sqdmulh(vf, rd, rn, rm);
   7690         break;
   7691       case NEON_SQRDMULH:
   7692         sqrdmulh(vf, rd, rn, rm);
   7693         break;
   7694       case NEON_UQSHL:
   7695         ushl(vf, rd, rn, rm).UnsignedSaturate(vf);
   7696         break;
   7697       case NEON_SQSHL:
   7698         sshl(vf, rd, rn, rm).SignedSaturate(vf);
   7699         break;
   7700       case NEON_URSHL:
   7701         ushl(vf, rd, rn, rm).Round(vf);
   7702         break;
   7703       case NEON_SRSHL:
   7704         sshl(vf, rd, rn, rm).Round(vf);
   7705         break;
   7706       case NEON_UQRSHL:
   7707         ushl(vf, rd, rn, rm).Round(vf).UnsignedSaturate(vf);
   7708         break;
   7709       case NEON_SQRSHL:
   7710         sshl(vf, rd, rn, rm).Round(vf).SignedSaturate(vf);
   7711         break;
   7712       case NEON_UHADD:
   7713         add(vf, rd, rn, rm).Uhalve(vf);
   7714         break;
   7715       case NEON_URHADD:
   7716         add(vf, rd, rn, rm).Uhalve(vf).Round(vf);
   7717         break;
   7718       case NEON_SHADD:
   7719         add(vf, rd, rn, rm).Halve(vf);
   7720         break;
   7721       case NEON_SRHADD:
   7722         add(vf, rd, rn, rm).Halve(vf).Round(vf);
   7723         break;
   7724       case NEON_UHSUB:
   7725         sub(vf, rd, rn, rm).Uhalve(vf);
   7726         break;
   7727       case NEON_SHSUB:
   7728         sub(vf, rd, rn, rm).Halve(vf);
   7729         break;
   7730       default:
   7731         VIXL_UNIMPLEMENTED();
   7732     }
   7733   }
   7734 }
   7735 
   7736 
   7737 void Simulator::VisitNEON3SameFP16(const Instruction* instr) {
   7738   NEONFormatDecoder nfd(instr);
   7739   SimVRegister& rd = ReadVRegister(instr->GetRd());
   7740   SimVRegister& rn = ReadVRegister(instr->GetRn());
   7741   SimVRegister& rm = ReadVRegister(instr->GetRm());
   7742 
   7743   VectorFormat vf = nfd.GetVectorFormat(nfd.FP16FormatMap());
   7744   switch (instr->Mask(NEON3SameFP16Mask)) {
   7745 #define SIM_FUNC(A, B) \
   7746   case NEON_##A##_H:   \
   7747     B(vf, rd, rn, rm); \
   7748     break;
   7749     SIM_FUNC(FMAXNM, fmaxnm);
   7750     SIM_FUNC(FADD, fadd);
   7751     SIM_FUNC(FMULX, fmulx);
   7752     SIM_FUNC(FMAX, fmax);
   7753     SIM_FUNC(FRECPS, frecps);
   7754     SIM_FUNC(FMINNM, fminnm);
   7755     SIM_FUNC(FSUB, fsub);
   7756     SIM_FUNC(FMIN, fmin);
   7757     SIM_FUNC(FRSQRTS, frsqrts);
   7758     SIM_FUNC(FMAXNMP, fmaxnmp);
   7759     SIM_FUNC(FADDP, faddp);
   7760     SIM_FUNC(FMUL, fmul);
   7761     SIM_FUNC(FMAXP, fmaxp);
   7762     SIM_FUNC(FDIV, fdiv);
   7763     SIM_FUNC(FMINNMP, fminnmp);
   7764     SIM_FUNC(FABD, fabd);
   7765     SIM_FUNC(FMINP, fminp);
   7766 #undef SIM_FUNC
   7767     case NEON_FMLA_H:
   7768       fmla(vf, rd, rd, rn, rm);
   7769       break;
   7770     case NEON_FMLS_H:
   7771       fmls(vf, rd, rd, rn, rm);
   7772       break;
   7773     case NEON_FCMEQ_H:
   7774       fcmp(vf, rd, rn, rm, eq);
   7775       break;
   7776     case NEON_FCMGE_H:
   7777       fcmp(vf, rd, rn, rm, ge);
   7778       break;
   7779     case NEON_FACGE_H:
   7780       fabscmp(vf, rd, rn, rm, ge);
   7781       break;
   7782     case NEON_FCMGT_H:
   7783       fcmp(vf, rd, rn, rm, gt);
   7784       break;
   7785     case NEON_FACGT_H:
   7786       fabscmp(vf, rd, rn, rm, gt);
   7787       break;
   7788     default:
   7789       VIXL_UNIMPLEMENTED();
   7790       break;
   7791   }
   7792 }
   7793 
   7794 void Simulator::VisitNEON3SameExtra(const Instruction* instr) {
   7795   NEONFormatDecoder nfd(instr);
   7796   SimVRegister& rd = ReadVRegister(instr->GetRd());
   7797   SimVRegister& rn = ReadVRegister(instr->GetRn());
   7798   SimVRegister& rm = ReadVRegister(instr->GetRm());
   7799   int rot = 0;
   7800   VectorFormat vf = nfd.GetVectorFormat();
   7801 
   7802   switch (form_hash_) {
   7803     case "fcmla_asimdsame2_c"_h:
   7804       rot = instr->GetImmRotFcmlaVec();
   7805       fcmla(vf, rd, rn, rm, rd, rot);
   7806       break;
   7807     case "fcadd_asimdsame2_c"_h:
   7808       rot = instr->GetImmRotFcadd();
   7809       fcadd(vf, rd, rn, rm, rot);
   7810       break;
   7811     case "sdot_asimdsame2_d"_h:
   7812       sdot(vf, rd, rn, rm);
   7813       break;
   7814     case "udot_asimdsame2_d"_h:
   7815       udot(vf, rd, rn, rm);
   7816       break;
   7817     case "usdot_asimdsame2_d"_h:
   7818       usdot(vf, rd, rn, rm);
   7819       break;
   7820     case "sqrdmlah_asimdsame2_only"_h:
   7821       sqrdmlah(vf, rd, rn, rm);
   7822       break;
   7823     case "sqrdmlsh_asimdsame2_only"_h:
   7824       sqrdmlsh(vf, rd, rn, rm);
   7825       break;
   7826   }
   7827 }
   7828 
   7829 
   7830 void Simulator::VisitNEON3Different(const Instruction* instr) {
   7831   NEONFormatDecoder nfd(instr);
   7832   VectorFormat vf = nfd.GetVectorFormat();
   7833   VectorFormat vf_l = nfd.GetVectorFormat(nfd.LongIntegerFormatMap());
   7834 
   7835   SimVRegister& rd = ReadVRegister(instr->GetRd());
   7836   SimVRegister& rn = ReadVRegister(instr->GetRn());
   7837   SimVRegister& rm = ReadVRegister(instr->GetRm());
   7838   int size = instr->GetNEONSize();
   7839 
   7840   switch (instr->Mask(NEON3DifferentMask)) {
   7841     case NEON_PMULL:
   7842       if ((size == 1) || (size == 2)) {  // S/D reserved.
   7843         VisitUnallocated(instr);
   7844       } else {
   7845         if (size == 3) vf_l = kFormat1Q;
   7846         pmull(vf_l, rd, rn, rm);
   7847       }
   7848       break;
   7849     case NEON_PMULL2:
   7850       if ((size == 1) || (size == 2)) {  // S/D reserved.
   7851         VisitUnallocated(instr);
   7852       } else {
   7853         if (size == 3) vf_l = kFormat1Q;
   7854         pmull2(vf_l, rd, rn, rm);
   7855       }
   7856       break;
   7857     case NEON_UADDL:
   7858       uaddl(vf_l, rd, rn, rm);
   7859       break;
   7860     case NEON_UADDL2:
   7861       uaddl2(vf_l, rd, rn, rm);
   7862       break;
   7863     case NEON_SADDL:
   7864       saddl(vf_l, rd, rn, rm);
   7865       break;
   7866     case NEON_SADDL2:
   7867       saddl2(vf_l, rd, rn, rm);
   7868       break;
   7869     case NEON_USUBL:
   7870       usubl(vf_l, rd, rn, rm);
   7871       break;
   7872     case NEON_USUBL2:
   7873       usubl2(vf_l, rd, rn, rm);
   7874       break;
   7875     case NEON_SSUBL:
   7876       ssubl(vf_l, rd, rn, rm);
   7877       break;
   7878     case NEON_SSUBL2:
   7879       ssubl2(vf_l, rd, rn, rm);
   7880       break;
   7881     case NEON_SABAL:
   7882       sabal(vf_l, rd, rn, rm);
   7883       break;
   7884     case NEON_SABAL2:
   7885       sabal2(vf_l, rd, rn, rm);
   7886       break;
   7887     case NEON_UABAL:
   7888       uabal(vf_l, rd, rn, rm);
   7889       break;
   7890     case NEON_UABAL2:
   7891       uabal2(vf_l, rd, rn, rm);
   7892       break;
   7893     case NEON_SABDL:
   7894       sabdl(vf_l, rd, rn, rm);
   7895       break;
   7896     case NEON_SABDL2:
   7897       sabdl2(vf_l, rd, rn, rm);
   7898       break;
   7899     case NEON_UABDL:
   7900       uabdl(vf_l, rd, rn, rm);
   7901       break;
   7902     case NEON_UABDL2:
   7903       uabdl2(vf_l, rd, rn, rm);
   7904       break;
   7905     case NEON_SMLAL:
   7906       smlal(vf_l, rd, rn, rm);
   7907       break;
   7908     case NEON_SMLAL2:
   7909       smlal2(vf_l, rd, rn, rm);
   7910       break;
   7911     case NEON_UMLAL:
   7912       umlal(vf_l, rd, rn, rm);
   7913       break;
   7914     case NEON_UMLAL2:
   7915       umlal2(vf_l, rd, rn, rm);
   7916       break;
   7917     case NEON_SMLSL:
   7918       smlsl(vf_l, rd, rn, rm);
   7919       break;
   7920     case NEON_SMLSL2:
   7921       smlsl2(vf_l, rd, rn, rm);
   7922       break;
   7923     case NEON_UMLSL:
   7924       umlsl(vf_l, rd, rn, rm);
   7925       break;
   7926     case NEON_UMLSL2:
   7927       umlsl2(vf_l, rd, rn, rm);
   7928       break;
   7929     case NEON_SMULL:
   7930       smull(vf_l, rd, rn, rm);
   7931       break;
   7932     case NEON_SMULL2:
   7933       smull2(vf_l, rd, rn, rm);
   7934       break;
   7935     case NEON_UMULL:
   7936       umull(vf_l, rd, rn, rm);
   7937       break;
   7938     case NEON_UMULL2:
   7939       umull2(vf_l, rd, rn, rm);
   7940       break;
   7941     case NEON_SQDMLAL:
   7942       sqdmlal(vf_l, rd, rn, rm);
   7943       break;
   7944     case NEON_SQDMLAL2:
   7945       sqdmlal2(vf_l, rd, rn, rm);
   7946       break;
   7947     case NEON_SQDMLSL:
   7948       sqdmlsl(vf_l, rd, rn, rm);
   7949       break;
   7950     case NEON_SQDMLSL2:
   7951       sqdmlsl2(vf_l, rd, rn, rm);
   7952       break;
   7953     case NEON_SQDMULL:
   7954       sqdmull(vf_l, rd, rn, rm);
   7955       break;
   7956     case NEON_SQDMULL2:
   7957       sqdmull2(vf_l, rd, rn, rm);
   7958       break;
   7959     case NEON_UADDW:
   7960       uaddw(vf_l, rd, rn, rm);
   7961       break;
   7962     case NEON_UADDW2:
   7963       uaddw2(vf_l, rd, rn, rm);
   7964       break;
   7965     case NEON_SADDW:
   7966       saddw(vf_l, rd, rn, rm);
   7967       break;
   7968     case NEON_SADDW2:
   7969       saddw2(vf_l, rd, rn, rm);
   7970       break;
   7971     case NEON_USUBW:
   7972       usubw(vf_l, rd, rn, rm);
   7973       break;
   7974     case NEON_USUBW2:
   7975       usubw2(vf_l, rd, rn, rm);
   7976       break;
   7977     case NEON_SSUBW:
   7978       ssubw(vf_l, rd, rn, rm);
   7979       break;
   7980     case NEON_SSUBW2:
   7981       ssubw2(vf_l, rd, rn, rm);
   7982       break;
   7983     case NEON_ADDHN:
   7984       addhn(vf, rd, rn, rm);
   7985       break;
   7986     case NEON_ADDHN2:
   7987       addhn2(vf, rd, rn, rm);
   7988       break;
   7989     case NEON_RADDHN:
   7990       raddhn(vf, rd, rn, rm);
   7991       break;
   7992     case NEON_RADDHN2:
   7993       raddhn2(vf, rd, rn, rm);
   7994       break;
   7995     case NEON_SUBHN:
   7996       subhn(vf, rd, rn, rm);
   7997       break;
   7998     case NEON_SUBHN2:
   7999       subhn2(vf, rd, rn, rm);
   8000       break;
   8001     case NEON_RSUBHN:
   8002       rsubhn(vf, rd, rn, rm);
   8003       break;
   8004     case NEON_RSUBHN2:
   8005       rsubhn2(vf, rd, rn, rm);
   8006       break;
   8007     default:
   8008       VIXL_UNIMPLEMENTED();
   8009   }
   8010 }
   8011 
   8012 
   8013 void Simulator::VisitNEONAcrossLanes(const Instruction* instr) {
   8014   NEONFormatDecoder nfd(instr);
   8015 
   8016   static const NEONFormatMap map_half = {{30}, {NF_4H, NF_8H}};
   8017 
   8018   SimVRegister& rd = ReadVRegister(instr->GetRd());
   8019   SimVRegister& rn = ReadVRegister(instr->GetRn());
   8020 
   8021   if (instr->Mask(NEONAcrossLanesFP16FMask) == NEONAcrossLanesFP16Fixed) {
   8022     VectorFormat vf = nfd.GetVectorFormat(&map_half);
   8023     switch (instr->Mask(NEONAcrossLanesFP16Mask)) {
   8024       case NEON_FMAXV_H:
   8025         fmaxv(vf, rd, rn);
   8026         break;
   8027       case NEON_FMINV_H:
   8028         fminv(vf, rd, rn);
   8029         break;
   8030       case NEON_FMAXNMV_H:
   8031         fmaxnmv(vf, rd, rn);
   8032         break;
   8033       case NEON_FMINNMV_H:
   8034         fminnmv(vf, rd, rn);
   8035         break;
   8036       default:
   8037         VIXL_UNIMPLEMENTED();
   8038     }
   8039   } else if (instr->Mask(NEONAcrossLanesFPFMask) == NEONAcrossLanesFPFixed) {
   8040     // The input operand's VectorFormat is passed for these instructions.
   8041     VectorFormat vf = nfd.GetVectorFormat(nfd.FPFormatMap());
   8042 
   8043     switch (instr->Mask(NEONAcrossLanesFPMask)) {
   8044       case NEON_FMAXV:
   8045         fmaxv(vf, rd, rn);
   8046         break;
   8047       case NEON_FMINV:
   8048         fminv(vf, rd, rn);
   8049         break;
   8050       case NEON_FMAXNMV:
   8051         fmaxnmv(vf, rd, rn);
   8052         break;
   8053       case NEON_FMINNMV:
   8054         fminnmv(vf, rd, rn);
   8055         break;
   8056       default:
   8057         VIXL_UNIMPLEMENTED();
   8058     }
   8059   } else {
   8060     VectorFormat vf = nfd.GetVectorFormat();
   8061 
   8062     switch (instr->Mask(NEONAcrossLanesMask)) {
   8063       case NEON_ADDV:
   8064         addv(vf, rd, rn);
   8065         break;
   8066       case NEON_SMAXV:
   8067         smaxv(vf, rd, rn);
   8068         break;
   8069       case NEON_SMINV:
   8070         sminv(vf, rd, rn);
   8071         break;
   8072       case NEON_UMAXV:
   8073         umaxv(vf, rd, rn);
   8074         break;
   8075       case NEON_UMINV:
   8076         uminv(vf, rd, rn);
   8077         break;
   8078       case NEON_SADDLV:
   8079         saddlv(vf, rd, rn);
   8080         break;
   8081       case NEON_UADDLV:
   8082         uaddlv(vf, rd, rn);
   8083         break;
   8084       default:
   8085         VIXL_UNIMPLEMENTED();
   8086     }
   8087   }
   8088 }
   8089 
   8090 void Simulator::SimulateNEONMulByElementLong(const Instruction* instr) {
   8091   NEONFormatDecoder nfd(instr);
   8092   VectorFormat vf = nfd.GetVectorFormat(nfd.LongIntegerFormatMap());
   8093 
   8094   SimVRegister& rd = ReadVRegister(instr->GetRd());
   8095   SimVRegister& rn = ReadVRegister(instr->GetRn());
   8096 
   8097   int rm_reg = instr->GetRm();
   8098   int index = (instr->GetNEONH() << 1) | instr->GetNEONL();
   8099   if (instr->GetNEONSize() == 1) {
   8100     rm_reg = instr->GetRmLow16();
   8101     index = (index << 1) | instr->GetNEONM();
   8102   }
   8103   SimVRegister& rm = ReadVRegister(rm_reg);
   8104 
   8105   SimVRegister temp;
   8106   VectorFormat indexform =
   8107       VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vf));
   8108   dup_element(indexform, temp, rm, index);
   8109 
   8110   bool is_2 = instr->Mask(NEON_Q) ? true : false;
   8111 
   8112   switch (form_hash_) {
   8113     case "smull_asimdelem_l"_h:
   8114       smull(vf, rd, rn, temp, is_2);
   8115       break;
   8116     case "umull_asimdelem_l"_h:
   8117       umull(vf, rd, rn, temp, is_2);
   8118       break;
   8119     case "smlal_asimdelem_l"_h:
   8120       smlal(vf, rd, rn, temp, is_2);
   8121       break;
   8122     case "umlal_asimdelem_l"_h:
   8123       umlal(vf, rd, rn, temp, is_2);
   8124       break;
   8125     case "smlsl_asimdelem_l"_h:
   8126       smlsl(vf, rd, rn, temp, is_2);
   8127       break;
   8128     case "umlsl_asimdelem_l"_h:
   8129       umlsl(vf, rd, rn, temp, is_2);
   8130       break;
   8131     case "sqdmull_asimdelem_l"_h:
   8132       sqdmull(vf, rd, rn, temp, is_2);
   8133       break;
   8134     case "sqdmlal_asimdelem_l"_h:
   8135       sqdmlal(vf, rd, rn, temp, is_2);
   8136       break;
   8137     case "sqdmlsl_asimdelem_l"_h:
   8138       sqdmlsl(vf, rd, rn, temp, is_2);
   8139       break;
   8140     default:
   8141       VIXL_UNREACHABLE();
   8142   }
   8143 }
   8144 
   8145 void Simulator::SimulateNEONFPMulByElementLong(const Instruction* instr) {
   8146   VectorFormat vform = instr->GetNEONQ() ? kFormat4S : kFormat2S;
   8147   SimVRegister& rd = ReadVRegister(instr->GetRd());
   8148   SimVRegister& rn = ReadVRegister(instr->GetRn());
   8149   SimVRegister& rm = ReadVRegister(instr->GetRmLow16());
   8150 
   8151   int index =
   8152       (instr->GetNEONH() << 2) | (instr->GetNEONL() << 1) | instr->GetNEONM();
   8153 
   8154   switch (form_hash_) {
   8155     case "fmlal_asimdelem_lh"_h:
   8156       fmlal(vform, rd, rn, rm, index);
   8157       break;
   8158     case "fmlal2_asimdelem_lh"_h:
   8159       fmlal2(vform, rd, rn, rm, index);
   8160       break;
   8161     case "fmlsl_asimdelem_lh"_h:
   8162       fmlsl(vform, rd, rn, rm, index);
   8163       break;
   8164     case "fmlsl2_asimdelem_lh"_h:
   8165       fmlsl2(vform, rd, rn, rm, index);
   8166       break;
   8167     default:
   8168       VIXL_UNREACHABLE();
   8169   }
   8170 }
   8171 
   8172 void Simulator::SimulateNEONFPMulByElement(const Instruction* instr) {
   8173   NEONFormatDecoder nfd(instr);
   8174   static const NEONFormatMap map =
   8175       {{23, 22, 30},
   8176        {NF_4H, NF_8H, NF_UNDEF, NF_UNDEF, NF_2S, NF_4S, NF_UNDEF, NF_2D}};
   8177   VectorFormat vform = nfd.GetVectorFormat(&map);
   8178 
   8179   SimVRegister& rd = ReadVRegister(instr->GetRd());
   8180   SimVRegister& rn = ReadVRegister(instr->GetRn());
   8181 
   8182   int rm_reg = instr->GetRm();
   8183   int index =
   8184       (instr->GetNEONH() << 2) | (instr->GetNEONL() << 1) | instr->GetNEONM();
   8185 
   8186   if ((vform == kFormat4H) || (vform == kFormat8H)) {
   8187     rm_reg &= 0xf;
   8188   } else if ((vform == kFormat2S) || (vform == kFormat4S)) {
   8189     index >>= 1;
   8190   } else {
   8191     VIXL_ASSERT(vform == kFormat2D);
   8192     VIXL_ASSERT(instr->GetNEONL() == 0);
   8193     index >>= 2;
   8194   }
   8195 
   8196   SimVRegister& rm = ReadVRegister(rm_reg);
   8197 
   8198   switch (form_hash_) {
   8199     case "fmul_asimdelem_rh_h"_h:
   8200     case "fmul_asimdelem_r_sd"_h:
   8201       fmul(vform, rd, rn, rm, index);
   8202       break;
   8203     case "fmla_asimdelem_rh_h"_h:
   8204     case "fmla_asimdelem_r_sd"_h:
   8205       fmla(vform, rd, rn, rm, index);
   8206       break;
   8207     case "fmls_asimdelem_rh_h"_h:
   8208     case "fmls_asimdelem_r_sd"_h:
   8209       fmls(vform, rd, rn, rm, index);
   8210       break;
   8211     case "fmulx_asimdelem_rh_h"_h:
   8212     case "fmulx_asimdelem_r_sd"_h:
   8213       fmulx(vform, rd, rn, rm, index);
   8214       break;
   8215     default:
   8216       VIXL_UNREACHABLE();
   8217   }
   8218 }
   8219 
   8220 void Simulator::SimulateNEONComplexMulByElement(const Instruction* instr) {
   8221   VectorFormat vform = instr->GetNEONQ() ? kFormat8H : kFormat4H;
   8222   SimVRegister& rd = ReadVRegister(instr->GetRd());
   8223   SimVRegister& rn = ReadVRegister(instr->GetRn());
   8224   SimVRegister& rm = ReadVRegister(instr->GetRm());
   8225   int index = (instr->GetNEONH() << 1) | instr->GetNEONL();
   8226 
   8227   switch (form_hash_) {
   8228     case "fcmla_asimdelem_c_s"_h:
   8229       vform = kFormat4S;
   8230       index >>= 1;
   8231       VIXL_FALLTHROUGH();
   8232     case "fcmla_asimdelem_c_h"_h:
   8233       fcmla(vform, rd, rn, rm, index, instr->GetImmRotFcmlaSca());
   8234       break;
   8235     default:
   8236       VIXL_UNREACHABLE();
   8237   }
   8238 }
   8239 
   8240 void Simulator::SimulateNEONDotProdByElement(const Instruction* instr) {
   8241   VectorFormat vform = instr->GetNEONQ() ? kFormat4S : kFormat2S;
   8242 
   8243   SimVRegister& rd = ReadVRegister(instr->GetRd());
   8244   SimVRegister& rn = ReadVRegister(instr->GetRn());
   8245   SimVRegister& rm = ReadVRegister(instr->GetRm());
   8246   int index = (instr->GetNEONH() << 1) | instr->GetNEONL();
   8247 
   8248   SimVRegister temp;
   8249   // NEON indexed `dot` allows the index value exceed the register size.
   8250   // Promote the format to Q-sized vector format before the duplication.
   8251   dup_elements_to_segments(VectorFormatFillQ(vform), temp, rm, index);
   8252 
   8253   switch (form_hash_) {
   8254     case "sdot_asimdelem_d"_h:
   8255       sdot(vform, rd, rn, temp);
   8256       break;
   8257     case "udot_asimdelem_d"_h:
   8258       udot(vform, rd, rn, temp);
   8259       break;
   8260     case "sudot_asimdelem_d"_h:
   8261       usdot(vform, rd, temp, rn);
   8262       break;
   8263     case "usdot_asimdelem_d"_h:
   8264       usdot(vform, rd, rn, temp);
   8265       break;
   8266   }
   8267 }
   8268 
   8269 void Simulator::VisitNEONByIndexedElement(const Instruction* instr) {
   8270   NEONFormatDecoder nfd(instr);
   8271   VectorFormat vform = nfd.GetVectorFormat();
   8272 
   8273   SimVRegister& rd = ReadVRegister(instr->GetRd());
   8274   SimVRegister& rn = ReadVRegister(instr->GetRn());
   8275 
   8276   int rm_reg = instr->GetRm();
   8277   int index = (instr->GetNEONH() << 1) | instr->GetNEONL();
   8278 
   8279   if ((vform == kFormat4H) || (vform == kFormat8H)) {
   8280     rm_reg &= 0xf;
   8281     index = (index << 1) | instr->GetNEONM();
   8282   }
   8283 
   8284   SimVRegister& rm = ReadVRegister(rm_reg);
   8285 
   8286   switch (form_hash_) {
   8287     case "mul_asimdelem_r"_h:
   8288       mul(vform, rd, rn, rm, index);
   8289       break;
   8290     case "mla_asimdelem_r"_h:
   8291       mla(vform, rd, rn, rm, index);
   8292       break;
   8293     case "mls_asimdelem_r"_h:
   8294       mls(vform, rd, rn, rm, index);
   8295       break;
   8296     case "sqdmulh_asimdelem_r"_h:
   8297       sqdmulh(vform, rd, rn, rm, index);
   8298       break;
   8299     case "sqrdmulh_asimdelem_r"_h:
   8300       sqrdmulh(vform, rd, rn, rm, index);
   8301       break;
   8302     case "sqrdmlah_asimdelem_r"_h:
   8303       sqrdmlah(vform, rd, rn, rm, index);
   8304       break;
   8305     case "sqrdmlsh_asimdelem_r"_h:
   8306       sqrdmlsh(vform, rd, rn, rm, index);
   8307       break;
   8308   }
   8309 }
   8310 
   8311 
   8312 void Simulator::VisitNEONCopy(const Instruction* instr) {
   8313   NEONFormatDecoder nfd(instr, NEONFormatDecoder::TriangularFormatMap());
   8314   VectorFormat vf = nfd.GetVectorFormat();
   8315 
   8316   SimVRegister& rd = ReadVRegister(instr->GetRd());
   8317   SimVRegister& rn = ReadVRegister(instr->GetRn());
   8318   int imm5 = instr->GetImmNEON5();
   8319   int tz = CountTrailingZeros(imm5, 32);
   8320   int reg_index = ExtractSignedBitfield32(31, tz + 1, imm5);
   8321 
   8322   if (instr->Mask(NEONCopyInsElementMask) == NEON_INS_ELEMENT) {
   8323     int imm4 = instr->GetImmNEON4();
   8324     int rn_index = ExtractSignedBitfield32(31, tz, imm4);
   8325     mov(kFormat16B, rd, rd);  // Zero bits beyond the MSB of a Q register.
   8326     ins_element(vf, rd, reg_index, rn, rn_index);
   8327   } else if (instr->Mask(NEONCopyInsGeneralMask) == NEON_INS_GENERAL) {
   8328     mov(kFormat16B, rd, rd);  // Zero bits beyond the MSB of a Q register.
   8329     ins_immediate(vf, rd, reg_index, ReadXRegister(instr->GetRn()));
   8330   } else if (instr->Mask(NEONCopyUmovMask) == NEON_UMOV) {
   8331     uint64_t value = LogicVRegister(rn).Uint(vf, reg_index);
   8332     value &= MaxUintFromFormat(vf);
   8333     WriteXRegister(instr->GetRd(), value);
   8334   } else if (instr->Mask(NEONCopyUmovMask) == NEON_SMOV) {
   8335     int64_t value = LogicVRegister(rn).Int(vf, reg_index);
   8336     if (instr->GetNEONQ()) {
   8337       WriteXRegister(instr->GetRd(), value);
   8338     } else {
   8339       WriteWRegister(instr->GetRd(), (int32_t)value);
   8340     }
   8341   } else if (instr->Mask(NEONCopyDupElementMask) == NEON_DUP_ELEMENT) {
   8342     dup_element(vf, rd, rn, reg_index);
   8343   } else if (instr->Mask(NEONCopyDupGeneralMask) == NEON_DUP_GENERAL) {
   8344     dup_immediate(vf, rd, ReadXRegister(instr->GetRn()));
   8345   } else {
   8346     VIXL_UNIMPLEMENTED();
   8347   }
   8348 }
   8349 
   8350 
   8351 void Simulator::VisitNEONExtract(const Instruction* instr) {
   8352   NEONFormatDecoder nfd(instr, NEONFormatDecoder::LogicalFormatMap());
   8353   VectorFormat vf = nfd.GetVectorFormat();
   8354   SimVRegister& rd = ReadVRegister(instr->GetRd());
   8355   SimVRegister& rn = ReadVRegister(instr->GetRn());
   8356   SimVRegister& rm = ReadVRegister(instr->GetRm());
   8357   if (instr->Mask(NEONExtractMask) == NEON_EXT) {
   8358     int index = instr->GetImmNEONExt();
   8359     ext(vf, rd, rn, rm, index);
   8360   } else {
   8361     VIXL_UNIMPLEMENTED();
   8362   }
   8363 }
   8364 
   8365 
   8366 void Simulator::NEONLoadStoreMultiStructHelper(const Instruction* instr,
   8367                                                AddrMode addr_mode) {
   8368   NEONFormatDecoder nfd(instr, NEONFormatDecoder::LoadStoreFormatMap());
   8369   VectorFormat vf = nfd.GetVectorFormat();
   8370 
   8371   uint64_t addr_base = ReadXRegister(instr->GetRn(), Reg31IsStackPointer);
   8372   int reg_size = RegisterSizeInBytesFromFormat(vf);
   8373 
   8374   int reg[4];
   8375   uint64_t addr[4];
   8376   for (int i = 0; i < 4; i++) {
   8377     reg[i] = (instr->GetRt() + i) % kNumberOfVRegisters;
   8378     addr[i] = addr_base + (i * reg_size);
   8379   }
   8380   int struct_parts = 1;
   8381   int reg_count = 1;
   8382   bool log_read = true;
   8383 
   8384   // Bit 23 determines whether this is an offset or post-index addressing mode.
   8385   // In offset mode, bits 20 to 16 should be zero; these bits encode the
   8386   // register or immediate in post-index mode.
   8387   if ((instr->ExtractBit(23) == 0) && (instr->ExtractBits(20, 16) != 0)) {
   8388     VIXL_UNREACHABLE();
   8389   }
   8390 
   8391   // We use the PostIndex mask here, as it works in this case for both Offset
   8392   // and PostIndex addressing.
   8393   switch (instr->Mask(NEONLoadStoreMultiStructPostIndexMask)) {
   8394     case NEON_LD1_4v:
   8395     case NEON_LD1_4v_post:
   8396       if (!ld1(vf, ReadVRegister(reg[3]), addr[3])) {
   8397         return;
   8398       }
   8399       reg_count++;
   8400       VIXL_FALLTHROUGH();
   8401     case NEON_LD1_3v:
   8402     case NEON_LD1_3v_post:
   8403       if (!ld1(vf, ReadVRegister(reg[2]), addr[2])) {
   8404         return;
   8405       }
   8406       reg_count++;
   8407       VIXL_FALLTHROUGH();
   8408     case NEON_LD1_2v:
   8409     case NEON_LD1_2v_post:
   8410       if (!ld1(vf, ReadVRegister(reg[1]), addr[1])) {
   8411         return;
   8412       }
   8413       reg_count++;
   8414       VIXL_FALLTHROUGH();
   8415     case NEON_LD1_1v:
   8416     case NEON_LD1_1v_post:
   8417       if (!ld1(vf, ReadVRegister(reg[0]), addr[0])) {
   8418         return;
   8419       }
   8420       break;
   8421     case NEON_ST1_4v:
   8422     case NEON_ST1_4v_post:
   8423       if (!st1(vf, ReadVRegister(reg[3]), addr[3])) return;
   8424       reg_count++;
   8425       VIXL_FALLTHROUGH();
   8426     case NEON_ST1_3v:
   8427     case NEON_ST1_3v_post:
   8428       if (!st1(vf, ReadVRegister(reg[2]), addr[2])) return;
   8429       reg_count++;
   8430       VIXL_FALLTHROUGH();
   8431     case NEON_ST1_2v:
   8432     case NEON_ST1_2v_post:
   8433       if (!st1(vf, ReadVRegister(reg[1]), addr[1])) return;
   8434       reg_count++;
   8435       VIXL_FALLTHROUGH();
   8436     case NEON_ST1_1v:
   8437     case NEON_ST1_1v_post:
   8438       if (!st1(vf, ReadVRegister(reg[0]), addr[0])) return;
   8439       log_read = false;
   8440       break;
   8441     case NEON_LD2_post:
   8442     case NEON_LD2:
   8443       if (!ld2(vf, ReadVRegister(reg[0]), ReadVRegister(reg[1]), addr[0])) {
   8444         return;
   8445       }
   8446       struct_parts = 2;
   8447       reg_count = 2;
   8448       break;
   8449     case NEON_ST2:
   8450     case NEON_ST2_post:
   8451       if (!st2(vf, ReadVRegister(reg[0]), ReadVRegister(reg[1]), addr[0])) {
   8452         return;
   8453       }
   8454       struct_parts = 2;
   8455       reg_count = 2;
   8456       log_read = false;
   8457       break;
   8458     case NEON_LD3_post:
   8459     case NEON_LD3:
   8460       if (!ld3(vf,
   8461                ReadVRegister(reg[0]),
   8462                ReadVRegister(reg[1]),
   8463                ReadVRegister(reg[2]),
   8464                addr[0])) {
   8465         return;
   8466       }
   8467       struct_parts = 3;
   8468       reg_count = 3;
   8469       break;
   8470     case NEON_ST3:
   8471     case NEON_ST3_post:
   8472       if (!st3(vf,
   8473                ReadVRegister(reg[0]),
   8474                ReadVRegister(reg[1]),
   8475                ReadVRegister(reg[2]),
   8476                addr[0])) {
   8477         return;
   8478       }
   8479       struct_parts = 3;
   8480       reg_count = 3;
   8481       log_read = false;
   8482       break;
   8483     case NEON_ST4:
   8484     case NEON_ST4_post:
   8485       if (!st4(vf,
   8486                ReadVRegister(reg[0]),
   8487                ReadVRegister(reg[1]),
   8488                ReadVRegister(reg[2]),
   8489                ReadVRegister(reg[3]),
   8490                addr[0])) {
   8491         return;
   8492       }
   8493       struct_parts = 4;
   8494       reg_count = 4;
   8495       log_read = false;
   8496       break;
   8497     case NEON_LD4_post:
   8498     case NEON_LD4:
   8499       if (!ld4(vf,
   8500                ReadVRegister(reg[0]),
   8501                ReadVRegister(reg[1]),
   8502                ReadVRegister(reg[2]),
   8503                ReadVRegister(reg[3]),
   8504                addr[0])) {
   8505         return;
   8506       }
   8507       struct_parts = 4;
   8508       reg_count = 4;
   8509       break;
   8510     default:
   8511       VIXL_UNIMPLEMENTED();
   8512   }
   8513 
   8514   bool do_trace = log_read ? ShouldTraceVRegs() : ShouldTraceWrites();
   8515   if (do_trace) {
   8516     PrintRegisterFormat print_format =
   8517         GetPrintRegisterFormatTryFP(GetPrintRegisterFormat(vf));
   8518     const char* op;
   8519     if (log_read) {
   8520       op = "<-";
   8521     } else {
   8522       op = "->";
   8523       // Stores don't represent a change to the source register's value, so only
   8524       // print the relevant part of the value.
   8525       print_format = GetPrintRegPartial(print_format);
   8526     }
   8527 
   8528     VIXL_ASSERT((struct_parts == reg_count) || (struct_parts == 1));
   8529     for (int s = reg_count - struct_parts; s >= 0; s -= struct_parts) {
   8530       uintptr_t address = addr_base + (s * RegisterSizeInBytesFromFormat(vf));
   8531       PrintVStructAccess(reg[s], struct_parts, print_format, op, address);
   8532     }
   8533   }
   8534 
   8535   if (addr_mode == PostIndex) {
   8536     int rm = instr->GetRm();
   8537     // The immediate post index addressing mode is indicated by rm = 31.
   8538     // The immediate is implied by the number of vector registers used.
   8539     addr_base += (rm == 31) ? (RegisterSizeInBytesFromFormat(vf) * reg_count)
   8540                             : ReadXRegister(rm);
   8541     WriteXRegister(instr->GetRn(),
   8542                    addr_base,
   8543                    LogRegWrites,
   8544                    Reg31IsStackPointer);
   8545   } else {
   8546     VIXL_ASSERT(addr_mode == Offset);
   8547   }
   8548 }
   8549 
   8550 
   8551 void Simulator::VisitNEONLoadStoreMultiStruct(const Instruction* instr) {
   8552   NEONLoadStoreMultiStructHelper(instr, Offset);
   8553 }
   8554 
   8555 
   8556 void Simulator::VisitNEONLoadStoreMultiStructPostIndex(
   8557     const Instruction* instr) {
   8558   NEONLoadStoreMultiStructHelper(instr, PostIndex);
   8559 }
   8560 
   8561 
   8562 void Simulator::NEONLoadStoreSingleStructHelper(const Instruction* instr,
   8563                                                 AddrMode addr_mode) {
   8564   uint64_t addr = ReadXRegister(instr->GetRn(), Reg31IsStackPointer);
   8565   int rt = instr->GetRt();
   8566 
   8567   // Bit 23 determines whether this is an offset or post-index addressing mode.
   8568   // In offset mode, bits 20 to 16 should be zero; these bits encode the
   8569   // register or immediate in post-index mode.
   8570   if ((instr->ExtractBit(23) == 0) && (instr->ExtractBits(20, 16) != 0)) {
   8571     VIXL_UNREACHABLE();
   8572   }
   8573 
   8574   // We use the PostIndex mask here, as it works in this case for both Offset
   8575   // and PostIndex addressing.
   8576   bool do_load = false;
   8577 
   8578   bool replicating = false;
   8579 
   8580   NEONFormatDecoder nfd(instr, NEONFormatDecoder::LoadStoreFormatMap());
   8581   VectorFormat vf_t = nfd.GetVectorFormat();
   8582 
   8583   VectorFormat vf = kFormat16B;
   8584   switch (instr->Mask(NEONLoadStoreSingleStructPostIndexMask)) {
   8585     case NEON_LD1_b:
   8586     case NEON_LD1_b_post:
   8587     case NEON_LD2_b:
   8588     case NEON_LD2_b_post:
   8589     case NEON_LD3_b:
   8590     case NEON_LD3_b_post:
   8591     case NEON_LD4_b:
   8592     case NEON_LD4_b_post:
   8593       do_load = true;
   8594       VIXL_FALLTHROUGH();
   8595     case NEON_ST1_b:
   8596     case NEON_ST1_b_post:
   8597     case NEON_ST2_b:
   8598     case NEON_ST2_b_post:
   8599     case NEON_ST3_b:
   8600     case NEON_ST3_b_post:
   8601     case NEON_ST4_b:
   8602     case NEON_ST4_b_post:
   8603       break;
   8604 
   8605     case NEON_LD1_h:
   8606     case NEON_LD1_h_post:
   8607     case NEON_LD2_h:
   8608     case NEON_LD2_h_post:
   8609     case NEON_LD3_h:
   8610     case NEON_LD3_h_post:
   8611     case NEON_LD4_h:
   8612     case NEON_LD4_h_post:
   8613       do_load = true;
   8614       VIXL_FALLTHROUGH();
   8615     case NEON_ST1_h:
   8616     case NEON_ST1_h_post:
   8617     case NEON_ST2_h:
   8618     case NEON_ST2_h_post:
   8619     case NEON_ST3_h:
   8620     case NEON_ST3_h_post:
   8621     case NEON_ST4_h:
   8622     case NEON_ST4_h_post:
   8623       vf = kFormat8H;
   8624       break;
   8625     case NEON_LD1_s:
   8626     case NEON_LD1_s_post:
   8627     case NEON_LD2_s:
   8628     case NEON_LD2_s_post:
   8629     case NEON_LD3_s:
   8630     case NEON_LD3_s_post:
   8631     case NEON_LD4_s:
   8632     case NEON_LD4_s_post:
   8633       do_load = true;
   8634       VIXL_FALLTHROUGH();
   8635     case NEON_ST1_s:
   8636     case NEON_ST1_s_post:
   8637     case NEON_ST2_s:
   8638     case NEON_ST2_s_post:
   8639     case NEON_ST3_s:
   8640     case NEON_ST3_s_post:
   8641     case NEON_ST4_s:
   8642     case NEON_ST4_s_post: {
   8643       VIXL_STATIC_ASSERT((NEON_LD1_s | (1 << NEONLSSize_offset)) == NEON_LD1_d);
   8644       VIXL_STATIC_ASSERT((NEON_LD1_s_post | (1 << NEONLSSize_offset)) ==
   8645                          NEON_LD1_d_post);
   8646       VIXL_STATIC_ASSERT((NEON_ST1_s | (1 << NEONLSSize_offset)) == NEON_ST1_d);
   8647       VIXL_STATIC_ASSERT((NEON_ST1_s_post | (1 << NEONLSSize_offset)) ==
   8648                          NEON_ST1_d_post);
   8649       vf = ((instr->GetNEONLSSize() & 1) == 0) ? kFormat4S : kFormat2D;
   8650       break;
   8651     }
   8652 
   8653     case NEON_LD1R:
   8654     case NEON_LD1R_post:
   8655     case NEON_LD2R:
   8656     case NEON_LD2R_post:
   8657     case NEON_LD3R:
   8658     case NEON_LD3R_post:
   8659     case NEON_LD4R:
   8660     case NEON_LD4R_post:
   8661       vf = vf_t;
   8662       do_load = true;
   8663       replicating = true;
   8664       break;
   8665 
   8666     default:
   8667       VIXL_UNIMPLEMENTED();
   8668   }
   8669 
   8670   int index_shift = LaneSizeInBytesLog2FromFormat(vf);
   8671   int lane = instr->GetNEONLSIndex(index_shift);
   8672   int reg_count = 0;
   8673   int rt2 = (rt + 1) % kNumberOfVRegisters;
   8674   int rt3 = (rt2 + 1) % kNumberOfVRegisters;
   8675   int rt4 = (rt3 + 1) % kNumberOfVRegisters;
   8676   switch (instr->Mask(NEONLoadStoreSingleLenMask)) {
   8677     case NEONLoadStoreSingle1:
   8678       reg_count = 1;
   8679       if (replicating) {
   8680         VIXL_ASSERT(do_load);
   8681         if (!ld1r(vf, ReadVRegister(rt), addr)) {
   8682           return;
   8683         }
   8684       } else if (do_load) {
   8685         if (!ld1(vf, ReadVRegister(rt), lane, addr)) {
   8686           return;
   8687         }
   8688       } else {
   8689         if (!st1(vf, ReadVRegister(rt), lane, addr)) return;
   8690       }
   8691       break;
   8692     case NEONLoadStoreSingle2:
   8693       reg_count = 2;
   8694       if (replicating) {
   8695         VIXL_ASSERT(do_load);
   8696         if (!ld2r(vf, ReadVRegister(rt), ReadVRegister(rt2), addr)) {
   8697           return;
   8698         }
   8699       } else if (do_load) {
   8700         if (!ld2(vf, ReadVRegister(rt), ReadVRegister(rt2), lane, addr)) {
   8701           return;
   8702         }
   8703       } else {
   8704         if (!st2(vf, ReadVRegister(rt), ReadVRegister(rt2), lane, addr)) return;
   8705       }
   8706       break;
   8707     case NEONLoadStoreSingle3:
   8708       reg_count = 3;
   8709       if (replicating) {
   8710         VIXL_ASSERT(do_load);
   8711         if (!ld3r(vf,
   8712                   ReadVRegister(rt),
   8713                   ReadVRegister(rt2),
   8714                   ReadVRegister(rt3),
   8715                   addr)) {
   8716           return;
   8717         }
   8718       } else if (do_load) {
   8719         if (!ld3(vf,
   8720                  ReadVRegister(rt),
   8721                  ReadVRegister(rt2),
   8722                  ReadVRegister(rt3),
   8723                  lane,
   8724                  addr)) {
   8725           return;
   8726         }
   8727       } else {
   8728         if (!st3(vf,
   8729                  ReadVRegister(rt),
   8730                  ReadVRegister(rt2),
   8731                  ReadVRegister(rt3),
   8732                  lane,
   8733                  addr)) {
   8734           return;
   8735         }
   8736       }
   8737       break;
   8738     case NEONLoadStoreSingle4:
   8739       reg_count = 4;
   8740       if (replicating) {
   8741         VIXL_ASSERT(do_load);
   8742         if (!ld4r(vf,
   8743                   ReadVRegister(rt),
   8744                   ReadVRegister(rt2),
   8745                   ReadVRegister(rt3),
   8746                   ReadVRegister(rt4),
   8747                   addr)) {
   8748           return;
   8749         }
   8750       } else if (do_load) {
   8751         if (!ld4(vf,
   8752                  ReadVRegister(rt),
   8753                  ReadVRegister(rt2),
   8754                  ReadVRegister(rt3),
   8755                  ReadVRegister(rt4),
   8756                  lane,
   8757                  addr)) {
   8758           return;
   8759         }
   8760       } else {
   8761         if (!st4(vf,
   8762                  ReadVRegister(rt),
   8763                  ReadVRegister(rt2),
   8764                  ReadVRegister(rt3),
   8765                  ReadVRegister(rt4),
   8766                  lane,
   8767                  addr)) {
   8768           return;
   8769         }
   8770       }
   8771       break;
   8772     default:
   8773       VIXL_UNIMPLEMENTED();
   8774   }
   8775 
   8776   // Trace registers and/or memory writes.
   8777   PrintRegisterFormat print_format =
   8778       GetPrintRegisterFormatTryFP(GetPrintRegisterFormat(vf));
   8779   if (do_load) {
   8780     if (ShouldTraceVRegs()) {
   8781       if (replicating) {
   8782         PrintVReplicatingStructAccess(rt, reg_count, print_format, "<-", addr);
   8783       } else {
   8784         PrintVSingleStructAccess(rt, reg_count, lane, print_format, "<-", addr);
   8785       }
   8786     }
   8787   } else {
   8788     if (ShouldTraceWrites()) {
   8789       // Stores don't represent a change to the source register's value, so only
   8790       // print the relevant part of the value.
   8791       print_format = GetPrintRegPartial(print_format);
   8792       PrintVSingleStructAccess(rt, reg_count, lane, print_format, "->", addr);
   8793     }
   8794   }
   8795 
   8796   if (addr_mode == PostIndex) {
   8797     int rm = instr->GetRm();
   8798     int lane_size = LaneSizeInBytesFromFormat(vf);
   8799     WriteXRegister(instr->GetRn(),
   8800                    addr + ((rm == 31) ? (reg_count * lane_size)
   8801                                       : ReadXRegister(rm)),
   8802                    LogRegWrites,
   8803                    Reg31IsStackPointer);
   8804   }
   8805 }
   8806 
   8807 
   8808 void Simulator::VisitNEONLoadStoreSingleStruct(const Instruction* instr) {
   8809   NEONLoadStoreSingleStructHelper(instr, Offset);
   8810 }
   8811 
   8812 
   8813 void Simulator::VisitNEONLoadStoreSingleStructPostIndex(
   8814     const Instruction* instr) {
   8815   NEONLoadStoreSingleStructHelper(instr, PostIndex);
   8816 }
   8817 
   8818 
   8819 void Simulator::VisitNEONModifiedImmediate(const Instruction* instr) {
   8820   SimVRegister& rd = ReadVRegister(instr->GetRd());
   8821   int cmode = instr->GetNEONCmode();
   8822   int cmode_3_1 = (cmode >> 1) & 7;
   8823   int cmode_3 = (cmode >> 3) & 1;
   8824   int cmode_2 = (cmode >> 2) & 1;
   8825   int cmode_1 = (cmode >> 1) & 1;
   8826   int cmode_0 = cmode & 1;
   8827   int half_enc = instr->ExtractBit(11);
   8828   int q = instr->GetNEONQ();
   8829   int op_bit = instr->GetNEONModImmOp();
   8830   uint64_t imm8 = instr->GetImmNEONabcdefgh();
   8831   // Find the format and immediate value
   8832   uint64_t imm = 0;
   8833   VectorFormat vform = kFormatUndefined;
   8834   switch (cmode_3_1) {
   8835     case 0x0:
   8836     case 0x1:
   8837     case 0x2:
   8838     case 0x3:
   8839       vform = (q == 1) ? kFormat4S : kFormat2S;
   8840       imm = imm8 << (8 * cmode_3_1);
   8841       break;
   8842     case 0x4:
   8843     case 0x5:
   8844       vform = (q == 1) ? kFormat8H : kFormat4H;
   8845       imm = imm8 << (8 * cmode_1);
   8846       break;
   8847     case 0x6:
   8848       vform = (q == 1) ? kFormat4S : kFormat2S;
   8849       if (cmode_0 == 0) {
   8850         imm = imm8 << 8 | 0x000000ff;
   8851       } else {
   8852         imm = imm8 << 16 | 0x0000ffff;
   8853       }
   8854       break;
   8855     case 0x7:
   8856       if (cmode_0 == 0 && op_bit == 0) {
   8857         vform = q ? kFormat16B : kFormat8B;
   8858         imm = imm8;
   8859       } else if (cmode_0 == 0 && op_bit == 1) {
   8860         vform = q ? kFormat2D : kFormat1D;
   8861         imm = 0;
   8862         for (int i = 0; i < 8; ++i) {
   8863           if (imm8 & (1 << i)) {
   8864             imm |= (UINT64_C(0xff) << (8 * i));
   8865           }
   8866         }
   8867       } else {  // cmode_0 == 1, cmode == 0xf.
   8868         if (half_enc == 1) {
   8869           vform = q ? kFormat8H : kFormat4H;
   8870           imm = Float16ToRawbits(instr->GetImmNEONFP16());
   8871         } else if (op_bit == 0) {
   8872           vform = q ? kFormat4S : kFormat2S;
   8873           imm = FloatToRawbits(instr->GetImmNEONFP32());
   8874         } else if (q == 1) {
   8875           vform = kFormat2D;
   8876           imm = DoubleToRawbits(instr->GetImmNEONFP64());
   8877         } else {
   8878           VIXL_ASSERT((q == 0) && (op_bit == 1) && (cmode == 0xf));
   8879           VisitUnallocated(instr);
   8880         }
   8881       }
   8882       break;
   8883     default:
   8884       VIXL_UNREACHABLE();
   8885       break;
   8886   }
   8887 
   8888   // Find the operation
   8889   NEONModifiedImmediateOp op;
   8890   if (cmode_3 == 0) {
   8891     if (cmode_0 == 0) {
   8892       op = op_bit ? NEONModifiedImmediate_MVNI : NEONModifiedImmediate_MOVI;
   8893     } else {  // cmode<0> == '1'
   8894       op = op_bit ? NEONModifiedImmediate_BIC : NEONModifiedImmediate_ORR;
   8895     }
   8896   } else {  // cmode<3> == '1'
   8897     if (cmode_2 == 0) {
   8898       if (cmode_0 == 0) {
   8899         op = op_bit ? NEONModifiedImmediate_MVNI : NEONModifiedImmediate_MOVI;
   8900       } else {  // cmode<0> == '1'
   8901         op = op_bit ? NEONModifiedImmediate_BIC : NEONModifiedImmediate_ORR;
   8902       }
   8903     } else {  // cmode<2> == '1'
   8904       if (cmode_1 == 0) {
   8905         op = op_bit ? NEONModifiedImmediate_MVNI : NEONModifiedImmediate_MOVI;
   8906       } else {  // cmode<1> == '1'
   8907         if (cmode_0 == 0) {
   8908           op = NEONModifiedImmediate_MOVI;
   8909         } else {  // cmode<0> == '1'
   8910           op = NEONModifiedImmediate_MOVI;
   8911         }
   8912       }
   8913     }
   8914   }
   8915 
   8916   // Call the logic function
   8917   if (op == NEONModifiedImmediate_ORR) {
   8918     orr(vform, rd, rd, imm);
   8919   } else if (op == NEONModifiedImmediate_BIC) {
   8920     bic(vform, rd, rd, imm);
   8921   } else if (op == NEONModifiedImmediate_MOVI) {
   8922     movi(vform, rd, imm);
   8923   } else if (op == NEONModifiedImmediate_MVNI) {
   8924     mvni(vform, rd, imm);
   8925   } else {
   8926     VisitUnimplemented(instr);
   8927   }
   8928 }
   8929 
   8930 
   8931 void Simulator::VisitNEONScalar2RegMisc(const Instruction* instr) {
   8932   NEONFormatDecoder nfd(instr, NEONFormatDecoder::ScalarFormatMap());
   8933   VectorFormat vf = nfd.GetVectorFormat();
   8934 
   8935   SimVRegister& rd = ReadVRegister(instr->GetRd());
   8936   SimVRegister& rn = ReadVRegister(instr->GetRn());
   8937 
   8938   if (instr->Mask(NEON2RegMiscOpcode) <= NEON_NEG_scalar_opcode) {
   8939     // These instructions all use a two bit size field, except NOT and RBIT,
   8940     // which use the field to encode the operation.
   8941     switch (instr->Mask(NEONScalar2RegMiscMask)) {
   8942       case NEON_CMEQ_zero_scalar:
   8943         cmp(vf, rd, rn, 0, eq);
   8944         break;
   8945       case NEON_CMGE_zero_scalar:
   8946         cmp(vf, rd, rn, 0, ge);
   8947         break;
   8948       case NEON_CMGT_zero_scalar:
   8949         cmp(vf, rd, rn, 0, gt);
   8950         break;
   8951       case NEON_CMLT_zero_scalar:
   8952         cmp(vf, rd, rn, 0, lt);
   8953         break;
   8954       case NEON_CMLE_zero_scalar:
   8955         cmp(vf, rd, rn, 0, le);
   8956         break;
   8957       case NEON_ABS_scalar:
   8958         abs(vf, rd, rn);
   8959         break;
   8960       case NEON_SQABS_scalar:
   8961         abs(vf, rd, rn).SignedSaturate(vf);
   8962         break;
   8963       case NEON_NEG_scalar:
   8964         neg(vf, rd, rn);
   8965         break;
   8966       case NEON_SQNEG_scalar:
   8967         neg(vf, rd, rn).SignedSaturate(vf);
   8968         break;
   8969       case NEON_SUQADD_scalar:
   8970         suqadd(vf, rd, rd, rn);
   8971         break;
   8972       case NEON_USQADD_scalar:
   8973         usqadd(vf, rd, rd, rn);
   8974         break;
   8975       default:
   8976         VIXL_UNIMPLEMENTED();
   8977         break;
   8978     }
   8979   } else {
   8980     VectorFormat fpf = nfd.GetVectorFormat(nfd.FPScalarFormatMap());
   8981     FPRounding fpcr_rounding = static_cast<FPRounding>(ReadFpcr().GetRMode());
   8982 
   8983     // These instructions all use a one bit size field, except SQXTUN, SQXTN
   8984     // and UQXTN, which use a two bit size field.
   8985     switch (instr->Mask(NEONScalar2RegMiscFPMask)) {
   8986       case NEON_FRECPE_scalar:
   8987         frecpe(fpf, rd, rn, fpcr_rounding);
   8988         break;
   8989       case NEON_FRECPX_scalar:
   8990         frecpx(fpf, rd, rn);
   8991         break;
   8992       case NEON_FRSQRTE_scalar:
   8993         frsqrte(fpf, rd, rn);
   8994         break;
   8995       case NEON_FCMGT_zero_scalar:
   8996         fcmp_zero(fpf, rd, rn, gt);
   8997         break;
   8998       case NEON_FCMGE_zero_scalar:
   8999         fcmp_zero(fpf, rd, rn, ge);
   9000         break;
   9001       case NEON_FCMEQ_zero_scalar:
   9002         fcmp_zero(fpf, rd, rn, eq);
   9003         break;
   9004       case NEON_FCMLE_zero_scalar:
   9005         fcmp_zero(fpf, rd, rn, le);
   9006         break;
   9007       case NEON_FCMLT_zero_scalar:
   9008         fcmp_zero(fpf, rd, rn, lt);
   9009         break;
   9010       case NEON_SCVTF_scalar:
   9011         scvtf(fpf, rd, rn, 0, fpcr_rounding);
   9012         break;
   9013       case NEON_UCVTF_scalar:
   9014         ucvtf(fpf, rd, rn, 0, fpcr_rounding);
   9015         break;
   9016       case NEON_FCVTNS_scalar:
   9017         fcvts(fpf, rd, rn, FPTieEven);
   9018         break;
   9019       case NEON_FCVTNU_scalar:
   9020         fcvtu(fpf, rd, rn, FPTieEven);
   9021         break;
   9022       case NEON_FCVTPS_scalar:
   9023         fcvts(fpf, rd, rn, FPPositiveInfinity);
   9024         break;
   9025       case NEON_FCVTPU_scalar:
   9026         fcvtu(fpf, rd, rn, FPPositiveInfinity);
   9027         break;
   9028       case NEON_FCVTMS_scalar:
   9029         fcvts(fpf, rd, rn, FPNegativeInfinity);
   9030         break;
   9031       case NEON_FCVTMU_scalar:
   9032         fcvtu(fpf, rd, rn, FPNegativeInfinity);
   9033         break;
   9034       case NEON_FCVTZS_scalar:
   9035         fcvts(fpf, rd, rn, FPZero);
   9036         break;
   9037       case NEON_FCVTZU_scalar:
   9038         fcvtu(fpf, rd, rn, FPZero);
   9039         break;
   9040       case NEON_FCVTAS_scalar:
   9041         fcvts(fpf, rd, rn, FPTieAway);
   9042         break;
   9043       case NEON_FCVTAU_scalar:
   9044         fcvtu(fpf, rd, rn, FPTieAway);
   9045         break;
   9046       case NEON_FCVTXN_scalar:
   9047         // Unlike all of the other FP instructions above, fcvtxn encodes dest
   9048         // size S as size<0>=1. There's only one case, so we ignore the form.
   9049         VIXL_ASSERT(instr->ExtractBit(22) == 1);
   9050         fcvtxn(kFormatS, rd, rn);
   9051         break;
   9052       default:
   9053         switch (instr->Mask(NEONScalar2RegMiscMask)) {
   9054           case NEON_SQXTN_scalar:
   9055             sqxtn(vf, rd, rn);
   9056             break;
   9057           case NEON_UQXTN_scalar:
   9058             uqxtn(vf, rd, rn);
   9059             break;
   9060           case NEON_SQXTUN_scalar:
   9061             sqxtun(vf, rd, rn);
   9062             break;
   9063           default:
   9064             VIXL_UNIMPLEMENTED();
   9065         }
   9066     }
   9067   }
   9068 }
   9069 
   9070 
   9071 void Simulator::VisitNEONScalar2RegMiscFP16(const Instruction* instr) {
   9072   VectorFormat fpf = kFormatH;
   9073   FPRounding fpcr_rounding = static_cast<FPRounding>(ReadFpcr().GetRMode());
   9074 
   9075   SimVRegister& rd = ReadVRegister(instr->GetRd());
   9076   SimVRegister& rn = ReadVRegister(instr->GetRn());
   9077 
   9078   switch (instr->Mask(NEONScalar2RegMiscFP16Mask)) {
   9079     case NEON_FRECPE_H_scalar:
   9080       frecpe(fpf, rd, rn, fpcr_rounding);
   9081       break;
   9082     case NEON_FRECPX_H_scalar:
   9083       frecpx(fpf, rd, rn);
   9084       break;
   9085     case NEON_FRSQRTE_H_scalar:
   9086       frsqrte(fpf, rd, rn);
   9087       break;
   9088     case NEON_FCMGT_H_zero_scalar:
   9089       fcmp_zero(fpf, rd, rn, gt);
   9090       break;
   9091     case NEON_FCMGE_H_zero_scalar:
   9092       fcmp_zero(fpf, rd, rn, ge);
   9093       break;
   9094     case NEON_FCMEQ_H_zero_scalar:
   9095       fcmp_zero(fpf, rd, rn, eq);
   9096       break;
   9097     case NEON_FCMLE_H_zero_scalar:
   9098       fcmp_zero(fpf, rd, rn, le);
   9099       break;
   9100     case NEON_FCMLT_H_zero_scalar:
   9101       fcmp_zero(fpf, rd, rn, lt);
   9102       break;
   9103     case NEON_SCVTF_H_scalar:
   9104       scvtf(fpf, rd, rn, 0, fpcr_rounding);
   9105       break;
   9106     case NEON_UCVTF_H_scalar:
   9107       ucvtf(fpf, rd, rn, 0, fpcr_rounding);
   9108       break;
   9109     case NEON_FCVTNS_H_scalar:
   9110       fcvts(fpf, rd, rn, FPTieEven);
   9111       break;
   9112     case NEON_FCVTNU_H_scalar:
   9113       fcvtu(fpf, rd, rn, FPTieEven);
   9114       break;
   9115     case NEON_FCVTPS_H_scalar:
   9116       fcvts(fpf, rd, rn, FPPositiveInfinity);
   9117       break;
   9118     case NEON_FCVTPU_H_scalar:
   9119       fcvtu(fpf, rd, rn, FPPositiveInfinity);
   9120       break;
   9121     case NEON_FCVTMS_H_scalar:
   9122       fcvts(fpf, rd, rn, FPNegativeInfinity);
   9123       break;
   9124     case NEON_FCVTMU_H_scalar:
   9125       fcvtu(fpf, rd, rn, FPNegativeInfinity);
   9126       break;
   9127     case NEON_FCVTZS_H_scalar:
   9128       fcvts(fpf, rd, rn, FPZero);
   9129       break;
   9130     case NEON_FCVTZU_H_scalar:
   9131       fcvtu(fpf, rd, rn, FPZero);
   9132       break;
   9133     case NEON_FCVTAS_H_scalar:
   9134       fcvts(fpf, rd, rn, FPTieAway);
   9135       break;
   9136     case NEON_FCVTAU_H_scalar:
   9137       fcvtu(fpf, rd, rn, FPTieAway);
   9138       break;
   9139   }
   9140 }
   9141 
   9142 
   9143 void Simulator::VisitNEONScalar3Diff(const Instruction* instr) {
   9144   NEONFormatDecoder nfd(instr, NEONFormatDecoder::LongScalarFormatMap());
   9145   VectorFormat vf = nfd.GetVectorFormat();
   9146 
   9147   SimVRegister& rd = ReadVRegister(instr->GetRd());
   9148   SimVRegister& rn = ReadVRegister(instr->GetRn());
   9149   SimVRegister& rm = ReadVRegister(instr->GetRm());
   9150   switch (instr->Mask(NEONScalar3DiffMask)) {
   9151     case NEON_SQDMLAL_scalar:
   9152       sqdmlal(vf, rd, rn, rm);
   9153       break;
   9154     case NEON_SQDMLSL_scalar:
   9155       sqdmlsl(vf, rd, rn, rm);
   9156       break;
   9157     case NEON_SQDMULL_scalar:
   9158       sqdmull(vf, rd, rn, rm);
   9159       break;
   9160     default:
   9161       VIXL_UNIMPLEMENTED();
   9162   }
   9163 }
   9164 
   9165 
   9166 void Simulator::VisitNEONScalar3Same(const Instruction* instr) {
   9167   NEONFormatDecoder nfd(instr, NEONFormatDecoder::ScalarFormatMap());
   9168   VectorFormat vf = nfd.GetVectorFormat();
   9169 
   9170   SimVRegister& rd = ReadVRegister(instr->GetRd());
   9171   SimVRegister& rn = ReadVRegister(instr->GetRn());
   9172   SimVRegister& rm = ReadVRegister(instr->GetRm());
   9173 
   9174   if (instr->Mask(NEONScalar3SameFPFMask) == NEONScalar3SameFPFixed) {
   9175     vf = nfd.GetVectorFormat(nfd.FPScalarFormatMap());
   9176     switch (instr->Mask(NEONScalar3SameFPMask)) {
   9177       case NEON_FMULX_scalar:
   9178         fmulx(vf, rd, rn, rm);
   9179         break;
   9180       case NEON_FACGE_scalar:
   9181         fabscmp(vf, rd, rn, rm, ge);
   9182         break;
   9183       case NEON_FACGT_scalar:
   9184         fabscmp(vf, rd, rn, rm, gt);
   9185         break;
   9186       case NEON_FCMEQ_scalar:
   9187         fcmp(vf, rd, rn, rm, eq);
   9188         break;
   9189       case NEON_FCMGE_scalar:
   9190         fcmp(vf, rd, rn, rm, ge);
   9191         break;
   9192       case NEON_FCMGT_scalar:
   9193         fcmp(vf, rd, rn, rm, gt);
   9194         break;
   9195       case NEON_FRECPS_scalar:
   9196         frecps(vf, rd, rn, rm);
   9197         break;
   9198       case NEON_FRSQRTS_scalar:
   9199         frsqrts(vf, rd, rn, rm);
   9200         break;
   9201       case NEON_FABD_scalar:
   9202         fabd(vf, rd, rn, rm);
   9203         break;
   9204       default:
   9205         VIXL_UNIMPLEMENTED();
   9206     }
   9207   } else {
   9208     switch (instr->Mask(NEONScalar3SameMask)) {
   9209       case NEON_ADD_scalar:
   9210         add(vf, rd, rn, rm);
   9211         break;
   9212       case NEON_SUB_scalar:
   9213         sub(vf, rd, rn, rm);
   9214         break;
   9215       case NEON_CMEQ_scalar:
   9216         cmp(vf, rd, rn, rm, eq);
   9217         break;
   9218       case NEON_CMGE_scalar:
   9219         cmp(vf, rd, rn, rm, ge);
   9220         break;
   9221       case NEON_CMGT_scalar:
   9222         cmp(vf, rd, rn, rm, gt);
   9223         break;
   9224       case NEON_CMHI_scalar:
   9225         cmp(vf, rd, rn, rm, hi);
   9226         break;
   9227       case NEON_CMHS_scalar:
   9228         cmp(vf, rd, rn, rm, hs);
   9229         break;
   9230       case NEON_CMTST_scalar:
   9231         cmptst(vf, rd, rn, rm);
   9232         break;
   9233       case NEON_USHL_scalar:
   9234         ushl(vf, rd, rn, rm);
   9235         break;
   9236       case NEON_SSHL_scalar:
   9237         sshl(vf, rd, rn, rm);
   9238         break;
   9239       case NEON_SQDMULH_scalar:
   9240         sqdmulh(vf, rd, rn, rm);
   9241         break;
   9242       case NEON_SQRDMULH_scalar:
   9243         sqrdmulh(vf, rd, rn, rm);
   9244         break;
   9245       case NEON_UQADD_scalar:
   9246         add(vf, rd, rn, rm).UnsignedSaturate(vf);
   9247         break;
   9248       case NEON_SQADD_scalar:
   9249         add(vf, rd, rn, rm).SignedSaturate(vf);
   9250         break;
   9251       case NEON_UQSUB_scalar:
   9252         sub(vf, rd, rn, rm).UnsignedSaturate(vf);
   9253         break;
   9254       case NEON_SQSUB_scalar:
   9255         sub(vf, rd, rn, rm).SignedSaturate(vf);
   9256         break;
   9257       case NEON_UQSHL_scalar:
   9258         ushl(vf, rd, rn, rm).UnsignedSaturate(vf);
   9259         break;
   9260       case NEON_SQSHL_scalar:
   9261         sshl(vf, rd, rn, rm).SignedSaturate(vf);
   9262         break;
   9263       case NEON_URSHL_scalar:
   9264         ushl(vf, rd, rn, rm).Round(vf);
   9265         break;
   9266       case NEON_SRSHL_scalar:
   9267         sshl(vf, rd, rn, rm).Round(vf);
   9268         break;
   9269       case NEON_UQRSHL_scalar:
   9270         ushl(vf, rd, rn, rm).Round(vf).UnsignedSaturate(vf);
   9271         break;
   9272       case NEON_SQRSHL_scalar:
   9273         sshl(vf, rd, rn, rm).Round(vf).SignedSaturate(vf);
   9274         break;
   9275       default:
   9276         VIXL_UNIMPLEMENTED();
   9277     }
   9278   }
   9279 }
   9280 
   9281 void Simulator::VisitNEONScalar3SameFP16(const Instruction* instr) {
   9282   SimVRegister& rd = ReadVRegister(instr->GetRd());
   9283   SimVRegister& rn = ReadVRegister(instr->GetRn());
   9284   SimVRegister& rm = ReadVRegister(instr->GetRm());
   9285 
   9286   switch (instr->Mask(NEONScalar3SameFP16Mask)) {
   9287     case NEON_FABD_H_scalar:
   9288       fabd(kFormatH, rd, rn, rm);
   9289       break;
   9290     case NEON_FMULX_H_scalar:
   9291       fmulx(kFormatH, rd, rn, rm);
   9292       break;
   9293     case NEON_FCMEQ_H_scalar:
   9294       fcmp(kFormatH, rd, rn, rm, eq);
   9295       break;
   9296     case NEON_FCMGE_H_scalar:
   9297       fcmp(kFormatH, rd, rn, rm, ge);
   9298       break;
   9299     case NEON_FCMGT_H_scalar:
   9300       fcmp(kFormatH, rd, rn, rm, gt);
   9301       break;
   9302     case NEON_FACGE_H_scalar:
   9303       fabscmp(kFormatH, rd, rn, rm, ge);
   9304       break;
   9305     case NEON_FACGT_H_scalar:
   9306       fabscmp(kFormatH, rd, rn, rm, gt);
   9307       break;
   9308     case NEON_FRECPS_H_scalar:
   9309       frecps(kFormatH, rd, rn, rm);
   9310       break;
   9311     case NEON_FRSQRTS_H_scalar:
   9312       frsqrts(kFormatH, rd, rn, rm);
   9313       break;
   9314     default:
   9315       VIXL_UNREACHABLE();
   9316   }
   9317 }
   9318 
   9319 
   9320 void Simulator::VisitNEONScalar3SameExtra(const Instruction* instr) {
   9321   NEONFormatDecoder nfd(instr, NEONFormatDecoder::ScalarFormatMap());
   9322   VectorFormat vf = nfd.GetVectorFormat();
   9323 
   9324   SimVRegister& rd = ReadVRegister(instr->GetRd());
   9325   SimVRegister& rn = ReadVRegister(instr->GetRn());
   9326   SimVRegister& rm = ReadVRegister(instr->GetRm());
   9327 
   9328   switch (instr->Mask(NEONScalar3SameExtraMask)) {
   9329     case NEON_SQRDMLAH_scalar:
   9330       sqrdmlah(vf, rd, rn, rm);
   9331       break;
   9332     case NEON_SQRDMLSH_scalar:
   9333       sqrdmlsh(vf, rd, rn, rm);
   9334       break;
   9335     default:
   9336       VIXL_UNIMPLEMENTED();
   9337   }
   9338 }
   9339 
   9340 void Simulator::VisitNEONScalarByIndexedElement(const Instruction* instr) {
   9341   NEONFormatDecoder nfd(instr, NEONFormatDecoder::LongScalarFormatMap());
   9342   VectorFormat vf = nfd.GetVectorFormat();
   9343   VectorFormat vf_r = nfd.GetVectorFormat(nfd.ScalarFormatMap());
   9344 
   9345   SimVRegister& rd = ReadVRegister(instr->GetRd());
   9346   SimVRegister& rn = ReadVRegister(instr->GetRn());
   9347   ByElementOp Op = NULL;
   9348 
   9349   int rm_reg = instr->GetRm();
   9350   int index = (instr->GetNEONH() << 1) | instr->GetNEONL();
   9351   if (instr->GetNEONSize() == 1) {
   9352     rm_reg &= 0xf;
   9353     index = (index << 1) | instr->GetNEONM();
   9354   }
   9355 
   9356   switch (instr->Mask(NEONScalarByIndexedElementMask)) {
   9357     case NEON_SQDMULL_byelement_scalar:
   9358       Op = &Simulator::sqdmull;
   9359       break;
   9360     case NEON_SQDMLAL_byelement_scalar:
   9361       Op = &Simulator::sqdmlal;
   9362       break;
   9363     case NEON_SQDMLSL_byelement_scalar:
   9364       Op = &Simulator::sqdmlsl;
   9365       break;
   9366     case NEON_SQDMULH_byelement_scalar:
   9367       Op = &Simulator::sqdmulh;
   9368       vf = vf_r;
   9369       break;
   9370     case NEON_SQRDMULH_byelement_scalar:
   9371       Op = &Simulator::sqrdmulh;
   9372       vf = vf_r;
   9373       break;
   9374     case NEON_SQRDMLAH_byelement_scalar:
   9375       Op = &Simulator::sqrdmlah;
   9376       vf = vf_r;
   9377       break;
   9378     case NEON_SQRDMLSH_byelement_scalar:
   9379       Op = &Simulator::sqrdmlsh;
   9380       vf = vf_r;
   9381       break;
   9382     default:
   9383       vf = nfd.GetVectorFormat(nfd.FPScalarFormatMap());
   9384       index = instr->GetNEONH();
   9385       if (instr->GetFPType() == 0) {
   9386         index = (index << 2) | (instr->GetNEONL() << 1) | instr->GetNEONM();
   9387         rm_reg &= 0xf;
   9388         vf = kFormatH;
   9389       } else if ((instr->GetFPType() & 1) == 0) {
   9390         index = (index << 1) | instr->GetNEONL();
   9391       }
   9392       switch (instr->Mask(NEONScalarByIndexedElementFPMask)) {
   9393         case NEON_FMUL_H_byelement_scalar:
   9394         case NEON_FMUL_byelement_scalar:
   9395           Op = &Simulator::fmul;
   9396           break;
   9397         case NEON_FMLA_H_byelement_scalar:
   9398         case NEON_FMLA_byelement_scalar:
   9399           Op = &Simulator::fmla;
   9400           break;
   9401         case NEON_FMLS_H_byelement_scalar:
   9402         case NEON_FMLS_byelement_scalar:
   9403           Op = &Simulator::fmls;
   9404           break;
   9405         case NEON_FMULX_H_byelement_scalar:
   9406         case NEON_FMULX_byelement_scalar:
   9407           Op = &Simulator::fmulx;
   9408           break;
   9409         default:
   9410           VIXL_UNIMPLEMENTED();
   9411       }
   9412   }
   9413 
   9414   (this->*Op)(vf, rd, rn, ReadVRegister(rm_reg), index);
   9415 }
   9416 
   9417 
   9418 void Simulator::VisitNEONScalarCopy(const Instruction* instr) {
   9419   NEONFormatDecoder nfd(instr, NEONFormatDecoder::TriangularScalarFormatMap());
   9420   VectorFormat vf = nfd.GetVectorFormat();
   9421 
   9422   SimVRegister& rd = ReadVRegister(instr->GetRd());
   9423   SimVRegister& rn = ReadVRegister(instr->GetRn());
   9424 
   9425   if (instr->Mask(NEONScalarCopyMask) == NEON_DUP_ELEMENT_scalar) {
   9426     int imm5 = instr->GetImmNEON5();
   9427     int tz = CountTrailingZeros(imm5, 32);
   9428     int rn_index = ExtractSignedBitfield32(31, tz + 1, imm5);
   9429     dup_element(vf, rd, rn, rn_index);
   9430   } else {
   9431     VIXL_UNIMPLEMENTED();
   9432   }
   9433 }
   9434 
   9435 
   9436 void Simulator::VisitNEONScalarPairwise(const Instruction* instr) {
   9437   NEONFormatDecoder nfd(instr, NEONFormatDecoder::FPScalarPairwiseFormatMap());
   9438   VectorFormat vf = nfd.GetVectorFormat();
   9439 
   9440   SimVRegister& rd = ReadVRegister(instr->GetRd());
   9441   SimVRegister& rn = ReadVRegister(instr->GetRn());
   9442   switch (instr->Mask(NEONScalarPairwiseMask)) {
   9443     case NEON_ADDP_scalar: {
   9444       // All pairwise operations except ADDP use bit U to differentiate FP16
   9445       // from FP32/FP64 variations.
   9446       NEONFormatDecoder nfd_addp(instr, NEONFormatDecoder::FPScalarFormatMap());
   9447       addp(nfd_addp.GetVectorFormat(), rd, rn);
   9448       break;
   9449     }
   9450     case NEON_FADDP_h_scalar:
   9451     case NEON_FADDP_scalar:
   9452       faddp(vf, rd, rn);
   9453       break;
   9454     case NEON_FMAXP_h_scalar:
   9455     case NEON_FMAXP_scalar:
   9456       fmaxp(vf, rd, rn);
   9457       break;
   9458     case NEON_FMAXNMP_h_scalar:
   9459     case NEON_FMAXNMP_scalar:
   9460       fmaxnmp(vf, rd, rn);
   9461       break;
   9462     case NEON_FMINP_h_scalar:
   9463     case NEON_FMINP_scalar:
   9464       fminp(vf, rd, rn);
   9465       break;
   9466     case NEON_FMINNMP_h_scalar:
   9467     case NEON_FMINNMP_scalar:
   9468       fminnmp(vf, rd, rn);
   9469       break;
   9470     default:
   9471       VIXL_UNIMPLEMENTED();
   9472   }
   9473 }
   9474 
   9475 
   9476 void Simulator::VisitNEONScalarShiftImmediate(const Instruction* instr) {
   9477   SimVRegister& rd = ReadVRegister(instr->GetRd());
   9478   SimVRegister& rn = ReadVRegister(instr->GetRn());
   9479   FPRounding fpcr_rounding = static_cast<FPRounding>(ReadFpcr().GetRMode());
   9480 
   9481   static const NEONFormatMap map = {{22, 21, 20, 19},
   9482                                     {NF_UNDEF,
   9483                                      NF_B,
   9484                                      NF_H,
   9485                                      NF_H,
   9486                                      NF_S,
   9487                                      NF_S,
   9488                                      NF_S,
   9489                                      NF_S,
   9490                                      NF_D,
   9491                                      NF_D,
   9492                                      NF_D,
   9493                                      NF_D,
   9494                                      NF_D,
   9495                                      NF_D,
   9496                                      NF_D,
   9497                                      NF_D}};
   9498   NEONFormatDecoder nfd(instr, &map);
   9499   VectorFormat vf = nfd.GetVectorFormat();
   9500 
   9501   int highest_set_bit = HighestSetBitPosition(instr->GetImmNEONImmh());
   9502   int immh_immb = instr->GetImmNEONImmhImmb();
   9503   int right_shift = (16 << highest_set_bit) - immh_immb;
   9504   int left_shift = immh_immb - (8 << highest_set_bit);
   9505   switch (instr->Mask(NEONScalarShiftImmediateMask)) {
   9506     case NEON_SHL_scalar:
   9507       shl(vf, rd, rn, left_shift);
   9508       break;
   9509     case NEON_SLI_scalar:
   9510       sli(vf, rd, rn, left_shift);
   9511       break;
   9512     case NEON_SQSHL_imm_scalar:
   9513       sqshl(vf, rd, rn, left_shift);
   9514       break;
   9515     case NEON_UQSHL_imm_scalar:
   9516       uqshl(vf, rd, rn, left_shift);
   9517       break;
   9518     case NEON_SQSHLU_scalar:
   9519       sqshlu(vf, rd, rn, left_shift);
   9520       break;
   9521     case NEON_SRI_scalar:
   9522       sri(vf, rd, rn, right_shift);
   9523       break;
   9524     case NEON_SSHR_scalar:
   9525       sshr(vf, rd, rn, right_shift);
   9526       break;
   9527     case NEON_USHR_scalar:
   9528       ushr(vf, rd, rn, right_shift);
   9529       break;
   9530     case NEON_SRSHR_scalar:
   9531       sshr(vf, rd, rn, right_shift).Round(vf);
   9532       break;
   9533     case NEON_URSHR_scalar:
   9534       ushr(vf, rd, rn, right_shift).Round(vf);
   9535       break;
   9536     case NEON_SSRA_scalar:
   9537       ssra(vf, rd, rn, right_shift);
   9538       break;
   9539     case NEON_USRA_scalar:
   9540       usra(vf, rd, rn, right_shift);
   9541       break;
   9542     case NEON_SRSRA_scalar:
   9543       srsra(vf, rd, rn, right_shift);
   9544       break;
   9545     case NEON_URSRA_scalar:
   9546       ursra(vf, rd, rn, right_shift);
   9547       break;
   9548     case NEON_UQSHRN_scalar:
   9549       uqshrn(vf, rd, rn, right_shift);
   9550       break;
   9551     case NEON_UQRSHRN_scalar:
   9552       uqrshrn(vf, rd, rn, right_shift);
   9553       break;
   9554     case NEON_SQSHRN_scalar:
   9555       sqshrn(vf, rd, rn, right_shift);
   9556       break;
   9557     case NEON_SQRSHRN_scalar:
   9558       sqrshrn(vf, rd, rn, right_shift);
   9559       break;
   9560     case NEON_SQSHRUN_scalar:
   9561       sqshrun(vf, rd, rn, right_shift);
   9562       break;
   9563     case NEON_SQRSHRUN_scalar:
   9564       sqrshrun(vf, rd, rn, right_shift);
   9565       break;
   9566     case NEON_FCVTZS_imm_scalar:
   9567       fcvts(vf, rd, rn, FPZero, right_shift);
   9568       break;
   9569     case NEON_FCVTZU_imm_scalar:
   9570       fcvtu(vf, rd, rn, FPZero, right_shift);
   9571       break;
   9572     case NEON_SCVTF_imm_scalar:
   9573       scvtf(vf, rd, rn, right_shift, fpcr_rounding);
   9574       break;
   9575     case NEON_UCVTF_imm_scalar:
   9576       ucvtf(vf, rd, rn, right_shift, fpcr_rounding);
   9577       break;
   9578     default:
   9579       VIXL_UNIMPLEMENTED();
   9580   }
   9581 }
   9582 
   9583 
   9584 void Simulator::VisitNEONShiftImmediate(const Instruction* instr) {
   9585   SimVRegister& rd = ReadVRegister(instr->GetRd());
   9586   SimVRegister& rn = ReadVRegister(instr->GetRn());
   9587   FPRounding fpcr_rounding = static_cast<FPRounding>(ReadFpcr().GetRMode());
   9588 
   9589   // 00010->8B, 00011->16B, 001x0->4H, 001x1->8H,
   9590   // 01xx0->2S, 01xx1->4S, 1xxx1->2D, all others undefined.
   9591   static const NEONFormatMap map = {{22, 21, 20, 19, 30},
   9592                                     {NF_UNDEF, NF_UNDEF, NF_8B,    NF_16B,
   9593                                      NF_4H,    NF_8H,    NF_4H,    NF_8H,
   9594                                      NF_2S,    NF_4S,    NF_2S,    NF_4S,
   9595                                      NF_2S,    NF_4S,    NF_2S,    NF_4S,
   9596                                      NF_UNDEF, NF_2D,    NF_UNDEF, NF_2D,
   9597                                      NF_UNDEF, NF_2D,    NF_UNDEF, NF_2D,
   9598                                      NF_UNDEF, NF_2D,    NF_UNDEF, NF_2D,
   9599                                      NF_UNDEF, NF_2D,    NF_UNDEF, NF_2D}};
   9600   NEONFormatDecoder nfd(instr, &map);
   9601   VectorFormat vf = nfd.GetVectorFormat();
   9602 
   9603   // 0001->8H, 001x->4S, 01xx->2D, all others undefined.
   9604   static const NEONFormatMap map_l =
   9605       {{22, 21, 20, 19},
   9606        {NF_UNDEF, NF_8H, NF_4S, NF_4S, NF_2D, NF_2D, NF_2D, NF_2D}};
   9607   VectorFormat vf_l = nfd.GetVectorFormat(&map_l);
   9608 
   9609   int highest_set_bit = HighestSetBitPosition(instr->GetImmNEONImmh());
   9610   int immh_immb = instr->GetImmNEONImmhImmb();
   9611   int right_shift = (16 << highest_set_bit) - immh_immb;
   9612   int left_shift = immh_immb - (8 << highest_set_bit);
   9613 
   9614   switch (instr->Mask(NEONShiftImmediateMask)) {
   9615     case NEON_SHL:
   9616       shl(vf, rd, rn, left_shift);
   9617       break;
   9618     case NEON_SLI:
   9619       sli(vf, rd, rn, left_shift);
   9620       break;
   9621     case NEON_SQSHLU:
   9622       sqshlu(vf, rd, rn, left_shift);
   9623       break;
   9624     case NEON_SRI:
   9625       sri(vf, rd, rn, right_shift);
   9626       break;
   9627     case NEON_SSHR:
   9628       sshr(vf, rd, rn, right_shift);
   9629       break;
   9630     case NEON_USHR:
   9631       ushr(vf, rd, rn, right_shift);
   9632       break;
   9633     case NEON_SRSHR:
   9634       sshr(vf, rd, rn, right_shift).Round(vf);
   9635       break;
   9636     case NEON_URSHR:
   9637       ushr(vf, rd, rn, right_shift).Round(vf);
   9638       break;
   9639     case NEON_SSRA:
   9640       ssra(vf, rd, rn, right_shift);
   9641       break;
   9642     case NEON_USRA:
   9643       usra(vf, rd, rn, right_shift);
   9644       break;
   9645     case NEON_SRSRA:
   9646       srsra(vf, rd, rn, right_shift);
   9647       break;
   9648     case NEON_URSRA:
   9649       ursra(vf, rd, rn, right_shift);
   9650       break;
   9651     case NEON_SQSHL_imm:
   9652       sqshl(vf, rd, rn, left_shift);
   9653       break;
   9654     case NEON_UQSHL_imm:
   9655       uqshl(vf, rd, rn, left_shift);
   9656       break;
   9657     case NEON_SCVTF_imm:
   9658       scvtf(vf, rd, rn, right_shift, fpcr_rounding);
   9659       break;
   9660     case NEON_UCVTF_imm:
   9661       ucvtf(vf, rd, rn, right_shift, fpcr_rounding);
   9662       break;
   9663     case NEON_FCVTZS_imm:
   9664       fcvts(vf, rd, rn, FPZero, right_shift);
   9665       break;
   9666     case NEON_FCVTZU_imm:
   9667       fcvtu(vf, rd, rn, FPZero, right_shift);
   9668       break;
   9669     case NEON_SSHLL:
   9670       vf = vf_l;
   9671       if (instr->Mask(NEON_Q)) {
   9672         sshll2(vf, rd, rn, left_shift);
   9673       } else {
   9674         sshll(vf, rd, rn, left_shift);
   9675       }
   9676       break;
   9677     case NEON_USHLL:
   9678       vf = vf_l;
   9679       if (instr->Mask(NEON_Q)) {
   9680         ushll2(vf, rd, rn, left_shift);
   9681       } else {
   9682         ushll(vf, rd, rn, left_shift);
   9683       }
   9684       break;
   9685     case NEON_SHRN:
   9686       if (instr->Mask(NEON_Q)) {
   9687         shrn2(vf, rd, rn, right_shift);
   9688       } else {
   9689         shrn(vf, rd, rn, right_shift);
   9690       }
   9691       break;
   9692     case NEON_RSHRN:
   9693       if (instr->Mask(NEON_Q)) {
   9694         rshrn2(vf, rd, rn, right_shift);
   9695       } else {
   9696         rshrn(vf, rd, rn, right_shift);
   9697       }
   9698       break;
   9699     case NEON_UQSHRN:
   9700       if (instr->Mask(NEON_Q)) {
   9701         uqshrn2(vf, rd, rn, right_shift);
   9702       } else {
   9703         uqshrn(vf, rd, rn, right_shift);
   9704       }
   9705       break;
   9706     case NEON_UQRSHRN:
   9707       if (instr->Mask(NEON_Q)) {
   9708         uqrshrn2(vf, rd, rn, right_shift);
   9709       } else {
   9710         uqrshrn(vf, rd, rn, right_shift);
   9711       }
   9712       break;
   9713     case NEON_SQSHRN:
   9714       if (instr->Mask(NEON_Q)) {
   9715         sqshrn2(vf, rd, rn, right_shift);
   9716       } else {
   9717         sqshrn(vf, rd, rn, right_shift);
   9718       }
   9719       break;
   9720     case NEON_SQRSHRN:
   9721       if (instr->Mask(NEON_Q)) {
   9722         sqrshrn2(vf, rd, rn, right_shift);
   9723       } else {
   9724         sqrshrn(vf, rd, rn, right_shift);
   9725       }
   9726       break;
   9727     case NEON_SQSHRUN:
   9728       if (instr->Mask(NEON_Q)) {
   9729         sqshrun2(vf, rd, rn, right_shift);
   9730       } else {
   9731         sqshrun(vf, rd, rn, right_shift);
   9732       }
   9733       break;
   9734     case NEON_SQRSHRUN:
   9735       if (instr->Mask(NEON_Q)) {
   9736         sqrshrun2(vf, rd, rn, right_shift);
   9737       } else {
   9738         sqrshrun(vf, rd, rn, right_shift);
   9739       }
   9740       break;
   9741     default:
   9742       VIXL_UNIMPLEMENTED();
   9743   }
   9744 }
   9745 
   9746 
   9747 void Simulator::VisitNEONTable(const Instruction* instr) {
   9748   NEONFormatDecoder nfd(instr, NEONFormatDecoder::LogicalFormatMap());
   9749   VectorFormat vf = nfd.GetVectorFormat();
   9750 
   9751   SimVRegister& rd = ReadVRegister(instr->GetRd());
   9752   SimVRegister& rn = ReadVRegister(instr->GetRn());
   9753   SimVRegister& rn2 = ReadVRegister((instr->GetRn() + 1) % kNumberOfVRegisters);
   9754   SimVRegister& rn3 = ReadVRegister((instr->GetRn() + 2) % kNumberOfVRegisters);
   9755   SimVRegister& rn4 = ReadVRegister((instr->GetRn() + 3) % kNumberOfVRegisters);
   9756   SimVRegister& rm = ReadVRegister(instr->GetRm());
   9757 
   9758   switch (instr->Mask(NEONTableMask)) {
   9759     case NEON_TBL_1v:
   9760       tbl(vf, rd, rn, rm);
   9761       break;
   9762     case NEON_TBL_2v:
   9763       tbl(vf, rd, rn, rn2, rm);
   9764       break;
   9765     case NEON_TBL_3v:
   9766       tbl(vf, rd, rn, rn2, rn3, rm);
   9767       break;
   9768     case NEON_TBL_4v:
   9769       tbl(vf, rd, rn, rn2, rn3, rn4, rm);
   9770       break;
   9771     case NEON_TBX_1v:
   9772       tbx(vf, rd, rn, rm);
   9773       break;
   9774     case NEON_TBX_2v:
   9775       tbx(vf, rd, rn, rn2, rm);
   9776       break;
   9777     case NEON_TBX_3v:
   9778       tbx(vf, rd, rn, rn2, rn3, rm);
   9779       break;
   9780     case NEON_TBX_4v:
   9781       tbx(vf, rd, rn, rn2, rn3, rn4, rm);
   9782       break;
   9783     default:
   9784       VIXL_UNIMPLEMENTED();
   9785   }
   9786 }
   9787 
   9788 
   9789 void Simulator::VisitNEONPerm(const Instruction* instr) {
   9790   NEONFormatDecoder nfd(instr);
   9791   VectorFormat vf = nfd.GetVectorFormat();
   9792 
   9793   SimVRegister& rd = ReadVRegister(instr->GetRd());
   9794   SimVRegister& rn = ReadVRegister(instr->GetRn());
   9795   SimVRegister& rm = ReadVRegister(instr->GetRm());
   9796 
   9797   switch (instr->Mask(NEONPermMask)) {
   9798     case NEON_TRN1:
   9799       trn1(vf, rd, rn, rm);
   9800       break;
   9801     case NEON_TRN2:
   9802       trn2(vf, rd, rn, rm);
   9803       break;
   9804     case NEON_UZP1:
   9805       uzp1(vf, rd, rn, rm);
   9806       break;
   9807     case NEON_UZP2:
   9808       uzp2(vf, rd, rn, rm);
   9809       break;
   9810     case NEON_ZIP1:
   9811       zip1(vf, rd, rn, rm);
   9812       break;
   9813     case NEON_ZIP2:
   9814       zip2(vf, rd, rn, rm);
   9815       break;
   9816     default:
   9817       VIXL_UNIMPLEMENTED();
   9818   }
   9819 }
   9820 
   9821 void Simulator::VisitSVEAddressGeneration(const Instruction* instr) {
   9822   SimVRegister& zd = ReadVRegister(instr->GetRd());
   9823   SimVRegister& zn = ReadVRegister(instr->GetRn());
   9824   SimVRegister& zm = ReadVRegister(instr->GetRm());
   9825   SimVRegister temp;
   9826 
   9827   VectorFormat vform = kFormatVnD;
   9828   mov(vform, temp, zm);
   9829 
   9830   switch (instr->Mask(SVEAddressGenerationMask)) {
   9831     case ADR_z_az_d_s32_scaled:
   9832       sxt(vform, temp, temp, kSRegSize);
   9833       break;
   9834     case ADR_z_az_d_u32_scaled:
   9835       uxt(vform, temp, temp, kSRegSize);
   9836       break;
   9837     case ADR_z_az_s_same_scaled:
   9838       vform = kFormatVnS;
   9839       break;
   9840     case ADR_z_az_d_same_scaled:
   9841       // Nothing to do.
   9842       break;
   9843     default:
   9844       VIXL_UNIMPLEMENTED();
   9845       break;
   9846   }
   9847 
   9848   int shift_amount = instr->ExtractBits(11, 10);
   9849   shl(vform, temp, temp, shift_amount);
   9850   add(vform, zd, zn, temp);
   9851 }
   9852 
   9853 void Simulator::VisitSVEBitwiseLogicalWithImm_Unpredicated(
   9854     const Instruction* instr) {
   9855   Instr op = instr->Mask(SVEBitwiseLogicalWithImm_UnpredicatedMask);
   9856   switch (op) {
   9857     case AND_z_zi:
   9858     case EOR_z_zi:
   9859     case ORR_z_zi: {
   9860       int lane_size = instr->GetSVEBitwiseImmLaneSizeInBytesLog2();
   9861       uint64_t imm = instr->GetSVEImmLogical();
   9862       // Valid immediate is a non-zero bits
   9863       VIXL_ASSERT(imm != 0);
   9864       SVEBitwiseImmHelper(static_cast<SVEBitwiseLogicalWithImm_UnpredicatedOp>(
   9865                               op),
   9866                           SVEFormatFromLaneSizeInBytesLog2(lane_size),
   9867                           ReadVRegister(instr->GetRd()),
   9868                           imm);
   9869       break;
   9870     }
   9871     default:
   9872       VIXL_UNIMPLEMENTED();
   9873       break;
   9874   }
   9875 }
   9876 
   9877 void Simulator::VisitSVEBroadcastBitmaskImm(const Instruction* instr) {
   9878   switch (instr->Mask(SVEBroadcastBitmaskImmMask)) {
   9879     case DUPM_z_i: {
   9880       /* DUPM uses the same lane size and immediate encoding as bitwise logical
   9881        * immediate instructions. */
   9882       int lane_size = instr->GetSVEBitwiseImmLaneSizeInBytesLog2();
   9883       uint64_t imm = instr->GetSVEImmLogical();
   9884       VectorFormat vform = SVEFormatFromLaneSizeInBytesLog2(lane_size);
   9885       dup_immediate(vform, ReadVRegister(instr->GetRd()), imm);
   9886       break;
   9887     }
   9888     default:
   9889       VIXL_UNIMPLEMENTED();
   9890       break;
   9891   }
   9892 }
   9893 
   9894 void Simulator::VisitSVEBitwiseLogicalUnpredicated(const Instruction* instr) {
   9895   SimVRegister& zd = ReadVRegister(instr->GetRd());
   9896   SimVRegister& zn = ReadVRegister(instr->GetRn());
   9897   SimVRegister& zm = ReadVRegister(instr->GetRm());
   9898   Instr op = instr->Mask(SVEBitwiseLogicalUnpredicatedMask);
   9899 
   9900   LogicalOp logical_op = LogicalOpMask;
   9901   switch (op) {
   9902     case AND_z_zz:
   9903       logical_op = AND;
   9904       break;
   9905     case BIC_z_zz:
   9906       logical_op = BIC;
   9907       break;
   9908     case EOR_z_zz:
   9909       logical_op = EOR;
   9910       break;
   9911     case ORR_z_zz:
   9912       logical_op = ORR;
   9913       break;
   9914     default:
   9915       VIXL_UNIMPLEMENTED();
   9916       break;
   9917   }
   9918   // Lane size of registers is irrelevant to the bitwise operations, so perform
   9919   // the operation on D-sized lanes.
   9920   SVEBitwiseLogicalUnpredicatedHelper(logical_op, kFormatVnD, zd, zn, zm);
   9921 }
   9922 
   9923 void Simulator::VisitSVEBitwiseShiftByImm_Predicated(const Instruction* instr) {
   9924   SimVRegister& zdn = ReadVRegister(instr->GetRd());
   9925   SimPRegister& pg = ReadPRegister(instr->GetPgLow8());
   9926 
   9927   SimVRegister scratch;
   9928   SimVRegister result;
   9929 
   9930   bool for_division = false;
   9931   Shift shift_op = NO_SHIFT;
   9932   switch (instr->Mask(SVEBitwiseShiftByImm_PredicatedMask)) {
   9933     case ASRD_z_p_zi:
   9934       shift_op = ASR;
   9935       for_division = true;
   9936       break;
   9937     case ASR_z_p_zi:
   9938       shift_op = ASR;
   9939       break;
   9940     case LSL_z_p_zi:
   9941       shift_op = LSL;
   9942       break;
   9943     case LSR_z_p_zi:
   9944       shift_op = LSR;
   9945       break;
   9946     default:
   9947       VIXL_UNIMPLEMENTED();
   9948       break;
   9949   }
   9950 
   9951   std::pair<int, int> shift_and_lane_size =
   9952       instr->GetSVEImmShiftAndLaneSizeLog2(/* is_predicated = */ true);
   9953   unsigned lane_size = shift_and_lane_size.second;
   9954   VectorFormat vform = SVEFormatFromLaneSizeInBytesLog2(lane_size);
   9955   int shift_dist = shift_and_lane_size.first;
   9956 
   9957   if ((shift_op == ASR) && for_division) {
   9958     asrd(vform, result, zdn, shift_dist);
   9959   } else {
   9960     if (shift_op == LSL) {
   9961       // Shift distance is computed differently for LSL. Convert the result.
   9962       shift_dist = (8 << lane_size) - shift_dist;
   9963     }
   9964     dup_immediate(vform, scratch, shift_dist);
   9965     SVEBitwiseShiftHelper(shift_op, vform, result, zdn, scratch, false);
   9966   }
   9967   mov_merging(vform, zdn, pg, result);
   9968 }
   9969 
   9970 void Simulator::VisitSVEBitwiseShiftByVector_Predicated(
   9971     const Instruction* instr) {
   9972   VectorFormat vform = instr->GetSVEVectorFormat();
   9973   SimVRegister& zdn = ReadVRegister(instr->GetRd());
   9974   SimVRegister& zm = ReadVRegister(instr->GetRn());
   9975   SimPRegister& pg = ReadPRegister(instr->GetPgLow8());
   9976   SimVRegister result;
   9977 
   9978   // SVE uses the whole (saturated) lane for the shift amount.
   9979   bool shift_in_ls_byte = false;
   9980 
   9981   switch (form_hash_) {
   9982     case "asrr_z_p_zz"_h:
   9983       sshr(vform, result, zm, zdn);
   9984       break;
   9985     case "asr_z_p_zz"_h:
   9986       sshr(vform, result, zdn, zm);
   9987       break;
   9988     case "lslr_z_p_zz"_h:
   9989       sshl(vform, result, zm, zdn, shift_in_ls_byte);
   9990       break;
   9991     case "lsl_z_p_zz"_h:
   9992       sshl(vform, result, zdn, zm, shift_in_ls_byte);
   9993       break;
   9994     case "lsrr_z_p_zz"_h:
   9995       ushr(vform, result, zm, zdn);
   9996       break;
   9997     case "lsr_z_p_zz"_h:
   9998       ushr(vform, result, zdn, zm);
   9999       break;
  10000     case "sqrshl_z_p_zz"_h:
  10001       sshl(vform, result, zdn, zm, shift_in_ls_byte)
  10002           .Round(vform)
  10003           .SignedSaturate(vform);
  10004       break;
  10005     case "sqrshlr_z_p_zz"_h:
  10006       sshl(vform, result, zm, zdn, shift_in_ls_byte)
  10007           .Round(vform)
  10008           .SignedSaturate(vform);
  10009       break;
  10010     case "sqshl_z_p_zz"_h:
  10011       sshl(vform, result, zdn, zm, shift_in_ls_byte).SignedSaturate(vform);
  10012       break;
  10013     case "sqshlr_z_p_zz"_h:
  10014       sshl(vform, result, zm, zdn, shift_in_ls_byte).SignedSaturate(vform);
  10015       break;
  10016     case "srshl_z_p_zz"_h:
  10017       sshl(vform, result, zdn, zm, shift_in_ls_byte).Round(vform);
  10018       break;
  10019     case "srshlr_z_p_zz"_h:
  10020       sshl(vform, result, zm, zdn, shift_in_ls_byte).Round(vform);
  10021       break;
  10022     case "uqrshl_z_p_zz"_h:
  10023       ushl(vform, result, zdn, zm, shift_in_ls_byte)
  10024           .Round(vform)
  10025           .UnsignedSaturate(vform);
  10026       break;
  10027     case "uqrshlr_z_p_zz"_h:
  10028       ushl(vform, result, zm, zdn, shift_in_ls_byte)
  10029           .Round(vform)
  10030           .UnsignedSaturate(vform);
  10031       break;
  10032     case "uqshl_z_p_zz"_h:
  10033       ushl(vform, result, zdn, zm, shift_in_ls_byte).UnsignedSaturate(vform);
  10034       break;
  10035     case "uqshlr_z_p_zz"_h:
  10036       ushl(vform, result, zm, zdn, shift_in_ls_byte).UnsignedSaturate(vform);
  10037       break;
  10038     case "urshl_z_p_zz"_h:
  10039       ushl(vform, result, zdn, zm, shift_in_ls_byte).Round(vform);
  10040       break;
  10041     case "urshlr_z_p_zz"_h:
  10042       ushl(vform, result, zm, zdn, shift_in_ls_byte).Round(vform);
  10043       break;
  10044     default:
  10045       VIXL_UNIMPLEMENTED();
  10046       break;
  10047   }
  10048   mov_merging(vform, zdn, pg, result);
  10049 }
  10050 
  10051 void Simulator::VisitSVEBitwiseShiftByWideElements_Predicated(
  10052     const Instruction* instr) {
  10053   VectorFormat vform = instr->GetSVEVectorFormat();
  10054   SimVRegister& zdn = ReadVRegister(instr->GetRd());
  10055   SimVRegister& zm = ReadVRegister(instr->GetRn());
  10056   SimPRegister& pg = ReadPRegister(instr->GetPgLow8());
  10057 
  10058   SimVRegister result;
  10059   Shift shift_op = ASR;
  10060 
  10061   switch (instr->Mask(SVEBitwiseShiftByWideElements_PredicatedMask)) {
  10062     case ASR_z_p_zw:
  10063       break;
  10064     case LSL_z_p_zw:
  10065       shift_op = LSL;
  10066       break;
  10067     case LSR_z_p_zw:
  10068       shift_op = LSR;
  10069       break;
  10070     default:
  10071       VIXL_UNIMPLEMENTED();
  10072       break;
  10073   }
  10074   SVEBitwiseShiftHelper(shift_op,
  10075                         vform,
  10076                         result,
  10077                         zdn,
  10078                         zm,
  10079                         /* is_wide_elements = */ true);
  10080   mov_merging(vform, zdn, pg, result);
  10081 }
  10082 
  10083 void Simulator::VisitSVEBitwiseShiftUnpredicated(const Instruction* instr) {
  10084   SimVRegister& zd = ReadVRegister(instr->GetRd());
  10085   SimVRegister& zn = ReadVRegister(instr->GetRn());
  10086 
  10087   Shift shift_op = NO_SHIFT;
  10088   switch (instr->Mask(SVEBitwiseShiftUnpredicatedMask)) {
  10089     case ASR_z_zi:
  10090     case ASR_z_zw:
  10091       shift_op = ASR;
  10092       break;
  10093     case LSL_z_zi:
  10094     case LSL_z_zw:
  10095       shift_op = LSL;
  10096       break;
  10097     case LSR_z_zi:
  10098     case LSR_z_zw:
  10099       shift_op = LSR;
  10100       break;
  10101     default:
  10102       VIXL_UNIMPLEMENTED();
  10103       break;
  10104   }
  10105 
  10106   switch (instr->Mask(SVEBitwiseShiftUnpredicatedMask)) {
  10107     case ASR_z_zi:
  10108     case LSL_z_zi:
  10109     case LSR_z_zi: {
  10110       SimVRegister scratch;
  10111       std::pair<int, int> shift_and_lane_size =
  10112           instr->GetSVEImmShiftAndLaneSizeLog2(/* is_predicated = */ false);
  10113       unsigned lane_size = shift_and_lane_size.second;
  10114       VIXL_ASSERT(lane_size <= kDRegSizeInBytesLog2);
  10115       VectorFormat vform = SVEFormatFromLaneSizeInBytesLog2(lane_size);
  10116       int shift_dist = shift_and_lane_size.first;
  10117       if (shift_op == LSL) {
  10118         // Shift distance is computed differently for LSL. Convert the result.
  10119         shift_dist = (8 << lane_size) - shift_dist;
  10120       }
  10121       dup_immediate(vform, scratch, shift_dist);
  10122       SVEBitwiseShiftHelper(shift_op, vform, zd, zn, scratch, false);
  10123       break;
  10124     }
  10125     case ASR_z_zw:
  10126     case LSL_z_zw:
  10127     case LSR_z_zw:
  10128       SVEBitwiseShiftHelper(shift_op,
  10129                             instr->GetSVEVectorFormat(),
  10130                             zd,
  10131                             zn,
  10132                             ReadVRegister(instr->GetRm()),
  10133                             true);
  10134       break;
  10135     default:
  10136       VIXL_UNIMPLEMENTED();
  10137       break;
  10138   }
  10139 }
  10140 
  10141 void Simulator::VisitSVEIncDecRegisterByElementCount(const Instruction* instr) {
  10142   // Although the instructions have a separate encoding class, the lane size is
  10143   // encoded in the same way as most other SVE instructions.
  10144   VectorFormat vform = instr->GetSVEVectorFormat();
  10145 
  10146   int pattern = instr->GetImmSVEPredicateConstraint();
  10147   int count = GetPredicateConstraintLaneCount(vform, pattern);
  10148   int multiplier = instr->ExtractBits(19, 16) + 1;
  10149 
  10150   switch (instr->Mask(SVEIncDecRegisterByElementCountMask)) {
  10151     case DECB_r_rs:
  10152     case DECD_r_rs:
  10153     case DECH_r_rs:
  10154     case DECW_r_rs:
  10155       count = -count;
  10156       break;
  10157     case INCB_r_rs:
  10158     case INCD_r_rs:
  10159     case INCH_r_rs:
  10160     case INCW_r_rs:
  10161       // Nothing to do.
  10162       break;
  10163     default:
  10164       VIXL_UNIMPLEMENTED();
  10165       return;
  10166   }
  10167 
  10168   WriteXRegister(instr->GetRd(),
  10169                  IncDecN(ReadXRegister(instr->GetRd()),
  10170                          count * multiplier,
  10171                          kXRegSize));
  10172 }
  10173 
  10174 void Simulator::VisitSVEIncDecVectorByElementCount(const Instruction* instr) {
  10175   VectorFormat vform = instr->GetSVEVectorFormat();
  10176   if (LaneSizeInBitsFromFormat(vform) == kBRegSize) {
  10177     VIXL_UNIMPLEMENTED();
  10178   }
  10179 
  10180   int pattern = instr->GetImmSVEPredicateConstraint();
  10181   int count = GetPredicateConstraintLaneCount(vform, pattern);
  10182   int multiplier = instr->ExtractBits(19, 16) + 1;
  10183 
  10184   switch (instr->Mask(SVEIncDecVectorByElementCountMask)) {
  10185     case DECD_z_zs:
  10186     case DECH_z_zs:
  10187     case DECW_z_zs:
  10188       count = -count;
  10189       break;
  10190     case INCD_z_zs:
  10191     case INCH_z_zs:
  10192     case INCW_z_zs:
  10193       // Nothing to do.
  10194       break;
  10195     default:
  10196       VIXL_UNIMPLEMENTED();
  10197       break;
  10198   }
  10199 
  10200   SimVRegister& zd = ReadVRegister(instr->GetRd());
  10201   SimVRegister scratch;
  10202   dup_immediate(vform,
  10203                 scratch,
  10204                 IncDecN(0,
  10205                         count * multiplier,
  10206                         LaneSizeInBitsFromFormat(vform)));
  10207   add(vform, zd, zd, scratch);
  10208 }
  10209 
  10210 void Simulator::VisitSVESaturatingIncDecRegisterByElementCount(
  10211     const Instruction* instr) {
  10212   // Although the instructions have a separate encoding class, the lane size is
  10213   // encoded in the same way as most other SVE instructions.
  10214   VectorFormat vform = instr->GetSVEVectorFormat();
  10215 
  10216   int pattern = instr->GetImmSVEPredicateConstraint();
  10217   int count = GetPredicateConstraintLaneCount(vform, pattern);
  10218   int multiplier = instr->ExtractBits(19, 16) + 1;
  10219 
  10220   unsigned width = kXRegSize;
  10221   bool is_signed = false;
  10222 
  10223   switch (instr->Mask(SVESaturatingIncDecRegisterByElementCountMask)) {
  10224     case SQDECB_r_rs_sx:
  10225     case SQDECD_r_rs_sx:
  10226     case SQDECH_r_rs_sx:
  10227     case SQDECW_r_rs_sx:
  10228       width = kWRegSize;
  10229       VIXL_FALLTHROUGH();
  10230     case SQDECB_r_rs_x:
  10231     case SQDECD_r_rs_x:
  10232     case SQDECH_r_rs_x:
  10233     case SQDECW_r_rs_x:
  10234       is_signed = true;
  10235       count = -count;
  10236       break;
  10237     case SQINCB_r_rs_sx:
  10238     case SQINCD_r_rs_sx:
  10239     case SQINCH_r_rs_sx:
  10240     case SQINCW_r_rs_sx:
  10241       width = kWRegSize;
  10242       VIXL_FALLTHROUGH();
  10243     case SQINCB_r_rs_x:
  10244     case SQINCD_r_rs_x:
  10245     case SQINCH_r_rs_x:
  10246     case SQINCW_r_rs_x:
  10247       is_signed = true;
  10248       break;
  10249     case UQDECB_r_rs_uw:
  10250     case UQDECD_r_rs_uw:
  10251     case UQDECH_r_rs_uw:
  10252     case UQDECW_r_rs_uw:
  10253       width = kWRegSize;
  10254       VIXL_FALLTHROUGH();
  10255     case UQDECB_r_rs_x:
  10256     case UQDECD_r_rs_x:
  10257     case UQDECH_r_rs_x:
  10258     case UQDECW_r_rs_x:
  10259       count = -count;
  10260       break;
  10261     case UQINCB_r_rs_uw:
  10262     case UQINCD_r_rs_uw:
  10263     case UQINCH_r_rs_uw:
  10264     case UQINCW_r_rs_uw:
  10265       width = kWRegSize;
  10266       VIXL_FALLTHROUGH();
  10267     case UQINCB_r_rs_x:
  10268     case UQINCD_r_rs_x:
  10269     case UQINCH_r_rs_x:
  10270     case UQINCW_r_rs_x:
  10271       // Nothing to do.
  10272       break;
  10273     default:
  10274       VIXL_UNIMPLEMENTED();
  10275       break;
  10276   }
  10277 
  10278   WriteXRegister(instr->GetRd(),
  10279                  IncDecN(ReadXRegister(instr->GetRd()),
  10280                          count * multiplier,
  10281                          width,
  10282                          true,
  10283                          is_signed));
  10284 }
  10285 
  10286 void Simulator::VisitSVESaturatingIncDecVectorByElementCount(
  10287     const Instruction* instr) {
  10288   VectorFormat vform = instr->GetSVEVectorFormat();
  10289   if (LaneSizeInBitsFromFormat(vform) == kBRegSize) {
  10290     VIXL_UNIMPLEMENTED();
  10291   }
  10292 
  10293   int pattern = instr->GetImmSVEPredicateConstraint();
  10294   int count = GetPredicateConstraintLaneCount(vform, pattern);
  10295   int multiplier = instr->ExtractBits(19, 16) + 1;
  10296 
  10297   SimVRegister& zd = ReadVRegister(instr->GetRd());
  10298   SimVRegister scratch;
  10299   dup_immediate(vform,
  10300                 scratch,
  10301                 IncDecN(0,
  10302                         count * multiplier,
  10303                         LaneSizeInBitsFromFormat(vform)));
  10304 
  10305   switch (instr->Mask(SVESaturatingIncDecVectorByElementCountMask)) {
  10306     case SQDECD_z_zs:
  10307     case SQDECH_z_zs:
  10308     case SQDECW_z_zs:
  10309       sub(vform, zd, zd, scratch).SignedSaturate(vform);
  10310       break;
  10311     case SQINCD_z_zs:
  10312     case SQINCH_z_zs:
  10313     case SQINCW_z_zs:
  10314       add(vform, zd, zd, scratch).SignedSaturate(vform);
  10315       break;
  10316     case UQDECD_z_zs:
  10317     case UQDECH_z_zs:
  10318     case UQDECW_z_zs:
  10319       sub(vform, zd, zd, scratch).UnsignedSaturate(vform);
  10320       break;
  10321     case UQINCD_z_zs:
  10322     case UQINCH_z_zs:
  10323     case UQINCW_z_zs:
  10324       add(vform, zd, zd, scratch).UnsignedSaturate(vform);
  10325       break;
  10326     default:
  10327       VIXL_UNIMPLEMENTED();
  10328       break;
  10329   }
  10330 }
  10331 
  10332 void Simulator::VisitSVEElementCount(const Instruction* instr) {
  10333   switch (instr->Mask(SVEElementCountMask)) {
  10334     case CNTB_r_s:
  10335     case CNTD_r_s:
  10336     case CNTH_r_s:
  10337     case CNTW_r_s:
  10338       // All handled below.
  10339       break;
  10340     default:
  10341       VIXL_UNIMPLEMENTED();
  10342       break;
  10343   }
  10344 
  10345   // Although the instructions are separated, the lane size is encoded in the
  10346   // same way as most other SVE instructions.
  10347   VectorFormat vform = instr->GetSVEVectorFormat();
  10348 
  10349   int pattern = instr->GetImmSVEPredicateConstraint();
  10350   int count = GetPredicateConstraintLaneCount(vform, pattern);
  10351   int multiplier = instr->ExtractBits(19, 16) + 1;
  10352   WriteXRegister(instr->GetRd(), count * multiplier);
  10353 }
  10354 
  10355 void Simulator::VisitSVEFPAccumulatingReduction(const Instruction* instr) {
  10356   VectorFormat vform = instr->GetSVEVectorFormat();
  10357   SimVRegister& vdn = ReadVRegister(instr->GetRd());
  10358   SimVRegister& zm = ReadVRegister(instr->GetRn());
  10359   SimPRegister& pg = ReadPRegister(instr->GetPgLow8());
  10360 
  10361   if (vform == kFormatVnB) VIXL_UNIMPLEMENTED();
  10362 
  10363   switch (instr->Mask(SVEFPAccumulatingReductionMask)) {
  10364     case FADDA_v_p_z:
  10365       fadda(vform, vdn, pg, zm);
  10366       break;
  10367     default:
  10368       VIXL_UNIMPLEMENTED();
  10369       break;
  10370   }
  10371 }
  10372 
  10373 void Simulator::VisitSVEFPArithmetic_Predicated(const Instruction* instr) {
  10374   VectorFormat vform = instr->GetSVEVectorFormat();
  10375   SimVRegister& zdn = ReadVRegister(instr->GetRd());
  10376   SimVRegister& zm = ReadVRegister(instr->GetRn());
  10377   SimPRegister& pg = ReadPRegister(instr->GetPgLow8());
  10378 
  10379   if (vform == kFormatVnB) VIXL_UNIMPLEMENTED();
  10380 
  10381   SimVRegister result;
  10382   switch (instr->Mask(SVEFPArithmetic_PredicatedMask)) {
  10383     case FABD_z_p_zz:
  10384       fabd(vform, result, zdn, zm);
  10385       break;
  10386     case FADD_z_p_zz:
  10387       fadd(vform, result, zdn, zm);
  10388       break;
  10389     case FDIVR_z_p_zz:
  10390       fdiv(vform, result, zm, zdn);
  10391       break;
  10392     case FDIV_z_p_zz:
  10393       fdiv(vform, result, zdn, zm);
  10394       break;
  10395     case FMAXNM_z_p_zz:
  10396       fmaxnm(vform, result, zdn, zm);
  10397       break;
  10398     case FMAX_z_p_zz:
  10399       fmax(vform, result, zdn, zm);
  10400       break;
  10401     case FMINNM_z_p_zz:
  10402       fminnm(vform, result, zdn, zm);
  10403       break;
  10404     case FMIN_z_p_zz:
  10405       fmin(vform, result, zdn, zm);
  10406       break;
  10407     case FMULX_z_p_zz:
  10408       fmulx(vform, result, zdn, zm);
  10409       break;
  10410     case FMUL_z_p_zz:
  10411       fmul(vform, result, zdn, zm);
  10412       break;
  10413     case FSCALE_z_p_zz:
  10414       fscale(vform, result, zdn, zm);
  10415       break;
  10416     case FSUBR_z_p_zz:
  10417       fsub(vform, result, zm, zdn);
  10418       break;
  10419     case FSUB_z_p_zz:
  10420       fsub(vform, result, zdn, zm);
  10421       break;
  10422     default:
  10423       VIXL_UNIMPLEMENTED();
  10424       break;
  10425   }
  10426   mov_merging(vform, zdn, pg, result);
  10427 }
  10428 
  10429 void Simulator::VisitSVEFPArithmeticWithImm_Predicated(
  10430     const Instruction* instr) {
  10431   VectorFormat vform = instr->GetSVEVectorFormat();
  10432   if (LaneSizeInBitsFromFormat(vform) == kBRegSize) {
  10433     VIXL_UNIMPLEMENTED();
  10434   }
  10435 
  10436   SimVRegister& zdn = ReadVRegister(instr->GetRd());
  10437   SimPRegister& pg = ReadPRegister(instr->GetPgLow8());
  10438   SimVRegister result;
  10439 
  10440   int i1 = instr->ExtractBit(5);
  10441   SimVRegister add_sub_imm, min_max_imm, mul_imm;
  10442   uint64_t half = FPToRawbitsWithSize(LaneSizeInBitsFromFormat(vform), 0.5);
  10443   uint64_t one = FPToRawbitsWithSize(LaneSizeInBitsFromFormat(vform), 1.0);
  10444   uint64_t two = FPToRawbitsWithSize(LaneSizeInBitsFromFormat(vform), 2.0);
  10445   dup_immediate(vform, add_sub_imm, i1 ? one : half);
  10446   dup_immediate(vform, min_max_imm, i1 ? one : 0);
  10447   dup_immediate(vform, mul_imm, i1 ? two : half);
  10448 
  10449   switch (instr->Mask(SVEFPArithmeticWithImm_PredicatedMask)) {
  10450     case FADD_z_p_zs:
  10451       fadd(vform, result, zdn, add_sub_imm);
  10452       break;
  10453     case FMAXNM_z_p_zs:
  10454       fmaxnm(vform, result, zdn, min_max_imm);
  10455       break;
  10456     case FMAX_z_p_zs:
  10457       fmax(vform, result, zdn, min_max_imm);
  10458       break;
  10459     case FMINNM_z_p_zs:
  10460       fminnm(vform, result, zdn, min_max_imm);
  10461       break;
  10462     case FMIN_z_p_zs:
  10463       fmin(vform, result, zdn, min_max_imm);
  10464       break;
  10465     case FMUL_z_p_zs:
  10466       fmul(vform, result, zdn, mul_imm);
  10467       break;
  10468     case FSUBR_z_p_zs:
  10469       fsub(vform, result, add_sub_imm, zdn);
  10470       break;
  10471     case FSUB_z_p_zs:
  10472       fsub(vform, result, zdn, add_sub_imm);
  10473       break;
  10474     default:
  10475       VIXL_UNIMPLEMENTED();
  10476       break;
  10477   }
  10478   mov_merging(vform, zdn, pg, result);
  10479 }
  10480 
  10481 void Simulator::VisitSVEFPTrigMulAddCoefficient(const Instruction* instr) {
  10482   VectorFormat vform = instr->GetSVEVectorFormat();
  10483   SimVRegister& zd = ReadVRegister(instr->GetRd());
  10484   SimVRegister& zm = ReadVRegister(instr->GetRn());
  10485 
  10486   if (vform == kFormatVnB) VIXL_UNIMPLEMENTED();
  10487 
  10488   switch (instr->Mask(SVEFPTrigMulAddCoefficientMask)) {
  10489     case FTMAD_z_zzi:
  10490       ftmad(vform, zd, zd, zm, instr->ExtractBits(18, 16));
  10491       break;
  10492     default:
  10493       VIXL_UNIMPLEMENTED();
  10494       break;
  10495   }
  10496 }
  10497 
  10498 void Simulator::VisitSVEFPArithmeticUnpredicated(const Instruction* instr) {
  10499   VectorFormat vform = instr->GetSVEVectorFormat();
  10500   SimVRegister& zd = ReadVRegister(instr->GetRd());
  10501   SimVRegister& zn = ReadVRegister(instr->GetRn());
  10502   SimVRegister& zm = ReadVRegister(instr->GetRm());
  10503 
  10504   if (vform == kFormatVnB) VIXL_UNIMPLEMENTED();
  10505 
  10506   switch (instr->Mask(SVEFPArithmeticUnpredicatedMask)) {
  10507     case FADD_z_zz:
  10508       fadd(vform, zd, zn, zm);
  10509       break;
  10510     case FMUL_z_zz:
  10511       fmul(vform, zd, zn, zm);
  10512       break;
  10513     case FRECPS_z_zz:
  10514       frecps(vform, zd, zn, zm);
  10515       break;
  10516     case FRSQRTS_z_zz:
  10517       frsqrts(vform, zd, zn, zm);
  10518       break;
  10519     case FSUB_z_zz:
  10520       fsub(vform, zd, zn, zm);
  10521       break;
  10522     case FTSMUL_z_zz:
  10523       ftsmul(vform, zd, zn, zm);
  10524       break;
  10525     default:
  10526       VIXL_UNIMPLEMENTED();
  10527       break;
  10528   }
  10529 }
  10530 
  10531 void Simulator::VisitSVEFPCompareVectors(const Instruction* instr) {
  10532   SimPRegister& pd = ReadPRegister(instr->GetPd());
  10533   SimVRegister& zn = ReadVRegister(instr->GetRn());
  10534   SimVRegister& zm = ReadVRegister(instr->GetRm());
  10535   SimPRegister& pg = ReadPRegister(instr->GetPgLow8());
  10536   VectorFormat vform = instr->GetSVEVectorFormat();
  10537   SimVRegister result;
  10538 
  10539   if (vform == kFormatVnB) VIXL_UNIMPLEMENTED();
  10540 
  10541   switch (instr->Mask(SVEFPCompareVectorsMask)) {
  10542     case FACGE_p_p_zz:
  10543       fabscmp(vform, result, zn, zm, ge);
  10544       break;
  10545     case FACGT_p_p_zz:
  10546       fabscmp(vform, result, zn, zm, gt);
  10547       break;
  10548     case FCMEQ_p_p_zz:
  10549       fcmp(vform, result, zn, zm, eq);
  10550       break;
  10551     case FCMGE_p_p_zz:
  10552       fcmp(vform, result, zn, zm, ge);
  10553       break;
  10554     case FCMGT_p_p_zz:
  10555       fcmp(vform, result, zn, zm, gt);
  10556       break;
  10557     case FCMNE_p_p_zz:
  10558       fcmp(vform, result, zn, zm, ne);
  10559       break;
  10560     case FCMUO_p_p_zz:
  10561       fcmp(vform, result, zn, zm, uo);
  10562       break;
  10563     default:
  10564       VIXL_UNIMPLEMENTED();
  10565       break;
  10566   }
  10567 
  10568   ExtractFromSimVRegister(vform, pd, result);
  10569   mov_zeroing(pd, pg, pd);
  10570 }
  10571 
  10572 void Simulator::VisitSVEFPCompareWithZero(const Instruction* instr) {
  10573   SimPRegister& pd = ReadPRegister(instr->GetPd());
  10574   SimVRegister& zn = ReadVRegister(instr->GetRn());
  10575   SimPRegister& pg = ReadPRegister(instr->GetPgLow8());
  10576   VectorFormat vform = instr->GetSVEVectorFormat();
  10577 
  10578   if (vform == kFormatVnB) VIXL_UNIMPLEMENTED();
  10579 
  10580   SimVRegister result;
  10581   SimVRegister zeros;
  10582   dup_immediate(kFormatVnD, zeros, 0);
  10583 
  10584   switch (instr->Mask(SVEFPCompareWithZeroMask)) {
  10585     case FCMEQ_p_p_z0:
  10586       fcmp(vform, result, zn, zeros, eq);
  10587       break;
  10588     case FCMGE_p_p_z0:
  10589       fcmp(vform, result, zn, zeros, ge);
  10590       break;
  10591     case FCMGT_p_p_z0:
  10592       fcmp(vform, result, zn, zeros, gt);
  10593       break;
  10594     case FCMLE_p_p_z0:
  10595       fcmp(vform, result, zn, zeros, le);
  10596       break;
  10597     case FCMLT_p_p_z0:
  10598       fcmp(vform, result, zn, zeros, lt);
  10599       break;
  10600     case FCMNE_p_p_z0:
  10601       fcmp(vform, result, zn, zeros, ne);
  10602       break;
  10603     default:
  10604       VIXL_UNIMPLEMENTED();
  10605       break;
  10606   }
  10607 
  10608   ExtractFromSimVRegister(vform, pd, result);
  10609   mov_zeroing(pd, pg, pd);
  10610 }
  10611 
  10612 void Simulator::VisitSVEFPComplexAddition(const Instruction* instr) {
  10613   VectorFormat vform = instr->GetSVEVectorFormat();
  10614 
  10615   if (LaneSizeInBitsFromFormat(vform) == kBRegSize) {
  10616     VIXL_UNIMPLEMENTED();
  10617   }
  10618 
  10619   SimVRegister& zdn = ReadVRegister(instr->GetRd());
  10620   SimVRegister& zm = ReadVRegister(instr->GetRn());
  10621   SimPRegister& pg = ReadPRegister(instr->GetPgLow8());
  10622   int rot = instr->ExtractBit(16);
  10623 
  10624   SimVRegister result;
  10625 
  10626   switch (instr->Mask(SVEFPComplexAdditionMask)) {
  10627     case FCADD_z_p_zz:
  10628       fcadd(vform, result, zdn, zm, rot);
  10629       break;
  10630     default:
  10631       VIXL_UNIMPLEMENTED();
  10632       break;
  10633   }
  10634   mov_merging(vform, zdn, pg, result);
  10635 }
  10636 
  10637 void Simulator::VisitSVEFPComplexMulAdd(const Instruction* instr) {
  10638   VectorFormat vform = instr->GetSVEVectorFormat();
  10639 
  10640   if (LaneSizeInBitsFromFormat(vform) == kBRegSize) {
  10641     VIXL_UNIMPLEMENTED();
  10642   }
  10643 
  10644   SimVRegister& zda = ReadVRegister(instr->GetRd());
  10645   SimVRegister& zn = ReadVRegister(instr->GetRn());
  10646   SimVRegister& zm = ReadVRegister(instr->GetRm());
  10647   SimPRegister& pg = ReadPRegister(instr->GetPgLow8());
  10648   int rot = instr->ExtractBits(14, 13);
  10649 
  10650   SimVRegister result;
  10651 
  10652   switch (instr->Mask(SVEFPComplexMulAddMask)) {
  10653     case FCMLA_z_p_zzz:
  10654       fcmla(vform, result, zn, zm, zda, rot);
  10655       break;
  10656     default:
  10657       VIXL_UNIMPLEMENTED();
  10658       break;
  10659   }
  10660   mov_merging(vform, zda, pg, result);
  10661 }
  10662 
  10663 void Simulator::VisitSVEFPComplexMulAddIndex(const Instruction* instr) {
  10664   SimVRegister& zda = ReadVRegister(instr->GetRd());
  10665   SimVRegister& zn = ReadVRegister(instr->GetRn());
  10666   int rot = instr->ExtractBits(11, 10);
  10667   unsigned zm_code = instr->GetRm();
  10668   int index = -1;
  10669   VectorFormat vform, vform_dup;
  10670 
  10671   switch (instr->Mask(SVEFPComplexMulAddIndexMask)) {
  10672     case FCMLA_z_zzzi_h:
  10673       vform = kFormatVnH;
  10674       vform_dup = kFormatVnS;
  10675       index = zm_code >> 3;
  10676       zm_code &= 0x7;
  10677       break;
  10678     case FCMLA_z_zzzi_s:
  10679       vform = kFormatVnS;
  10680       vform_dup = kFormatVnD;
  10681       index = zm_code >> 4;
  10682       zm_code &= 0xf;
  10683       break;
  10684     default:
  10685       VIXL_UNIMPLEMENTED();
  10686       break;
  10687   }
  10688 
  10689   if (index >= 0) {
  10690     SimVRegister temp;
  10691     dup_elements_to_segments(vform_dup, temp, ReadVRegister(zm_code), index);
  10692     fcmla(vform, zda, zn, temp, zda, rot);
  10693   }
  10694 }
  10695 
  10696 typedef LogicVRegister (Simulator::*FastReduceFn)(VectorFormat vform,
  10697                                                   LogicVRegister dst,
  10698                                                   const LogicVRegister& src);
  10699 
  10700 void Simulator::VisitSVEFPFastReduction(const Instruction* instr) {
  10701   VectorFormat vform = instr->GetSVEVectorFormat();
  10702   SimVRegister& vd = ReadVRegister(instr->GetRd());
  10703   SimVRegister& zn = ReadVRegister(instr->GetRn());
  10704   SimPRegister& pg = ReadPRegister(instr->GetPgLow8());
  10705   int lane_size = LaneSizeInBitsFromFormat(vform);
  10706 
  10707   uint64_t inactive_value = 0;
  10708   FastReduceFn fn = nullptr;
  10709 
  10710   if (vform == kFormatVnB) VIXL_UNIMPLEMENTED();
  10711 
  10712   switch (instr->Mask(SVEFPFastReductionMask)) {
  10713     case FADDV_v_p_z:
  10714       fn = &Simulator::faddv;
  10715       break;
  10716     case FMAXNMV_v_p_z:
  10717       inactive_value = FPToRawbitsWithSize(lane_size, kFP64DefaultNaN);
  10718       fn = &Simulator::fmaxnmv;
  10719       break;
  10720     case FMAXV_v_p_z:
  10721       inactive_value = FPToRawbitsWithSize(lane_size, kFP64NegativeInfinity);
  10722       fn = &Simulator::fmaxv;
  10723       break;
  10724     case FMINNMV_v_p_z:
  10725       inactive_value = FPToRawbitsWithSize(lane_size, kFP64DefaultNaN);
  10726       fn = &Simulator::fminnmv;
  10727       break;
  10728     case FMINV_v_p_z:
  10729       inactive_value = FPToRawbitsWithSize(lane_size, kFP64PositiveInfinity);
  10730       fn = &Simulator::fminv;
  10731       break;
  10732     default:
  10733       VIXL_UNIMPLEMENTED();
  10734       break;
  10735   }
  10736 
  10737   SimVRegister scratch;
  10738   dup_immediate(vform, scratch, inactive_value);
  10739   mov_merging(vform, scratch, pg, zn);
  10740   if (fn != nullptr) (this->*fn)(vform, vd, scratch);
  10741 }
  10742 
  10743 void Simulator::VisitSVEFPMulIndex(const Instruction* instr) {
  10744   VectorFormat vform = kFormatUndefined;
  10745 
  10746   switch (instr->Mask(SVEFPMulIndexMask)) {
  10747     case FMUL_z_zzi_d:
  10748       vform = kFormatVnD;
  10749       break;
  10750     case FMUL_z_zzi_h_i3h:
  10751     case FMUL_z_zzi_h:
  10752       vform = kFormatVnH;
  10753       break;
  10754     case FMUL_z_zzi_s:
  10755       vform = kFormatVnS;
  10756       break;
  10757     default:
  10758       VIXL_UNIMPLEMENTED();
  10759       break;
  10760   }
  10761 
  10762   SimVRegister& zd = ReadVRegister(instr->GetRd());
  10763   SimVRegister& zn = ReadVRegister(instr->GetRn());
  10764   SimVRegister temp;
  10765 
  10766   dup_elements_to_segments(vform, temp, instr->GetSVEMulZmAndIndex());
  10767   fmul(vform, zd, zn, temp);
  10768 }
  10769 
  10770 void Simulator::VisitSVEFPMulAdd(const Instruction* instr) {
  10771   VectorFormat vform = instr->GetSVEVectorFormat();
  10772   SimVRegister& zd = ReadVRegister(instr->GetRd());
  10773   SimPRegister& pg = ReadPRegister(instr->GetPgLow8());
  10774   SimVRegister result;
  10775 
  10776   if (vform == kFormatVnB) VIXL_UNIMPLEMENTED();
  10777 
  10778   if (instr->ExtractBit(15) == 0) {
  10779     // Floating-point multiply-accumulate writing addend.
  10780     SimVRegister& zm = ReadVRegister(instr->GetRm());
  10781     SimVRegister& zn = ReadVRegister(instr->GetRn());
  10782 
  10783     switch (instr->Mask(SVEFPMulAddMask)) {
  10784       // zda = zda + zn * zm
  10785       case FMLA_z_p_zzz:
  10786         fmla(vform, result, zd, zn, zm);
  10787         break;
  10788       // zda = -zda + -zn * zm
  10789       case FNMLA_z_p_zzz:
  10790         fneg(vform, result, zd);
  10791         fmls(vform, result, result, zn, zm);
  10792         break;
  10793       // zda = zda + -zn * zm
  10794       case FMLS_z_p_zzz:
  10795         fmls(vform, result, zd, zn, zm);
  10796         break;
  10797       // zda = -zda + zn * zm
  10798       case FNMLS_z_p_zzz:
  10799         fneg(vform, result, zd);
  10800         fmla(vform, result, result, zn, zm);
  10801         break;
  10802       default:
  10803         VIXL_UNIMPLEMENTED();
  10804         break;
  10805     }
  10806   } else {
  10807     // Floating-point multiply-accumulate writing multiplicand.
  10808     SimVRegister& za = ReadVRegister(instr->GetRm());
  10809     SimVRegister& zm = ReadVRegister(instr->GetRn());
  10810 
  10811     switch (instr->Mask(SVEFPMulAddMask)) {
  10812       // zdn = za + zdn * zm
  10813       case FMAD_z_p_zzz:
  10814         fmla(vform, result, za, zd, zm);
  10815         break;
  10816       // zdn = -za + -zdn * zm
  10817       case FNMAD_z_p_zzz:
  10818         fneg(vform, result, za);
  10819         fmls(vform, result, result, zd, zm);
  10820         break;
  10821       // zdn = za + -zdn * zm
  10822       case FMSB_z_p_zzz:
  10823         fmls(vform, result, za, zd, zm);
  10824         break;
  10825       // zdn = -za + zdn * zm
  10826       case FNMSB_z_p_zzz:
  10827         fneg(vform, result, za);
  10828         fmla(vform, result, result, zd, zm);
  10829         break;
  10830       default:
  10831         VIXL_UNIMPLEMENTED();
  10832         break;
  10833     }
  10834   }
  10835 
  10836   mov_merging(vform, zd, pg, result);
  10837 }
  10838 
  10839 void Simulator::VisitSVEFPMulAddIndex(const Instruction* instr) {
  10840   VectorFormat vform = kFormatUndefined;
  10841 
  10842   switch (instr->Mask(SVEFPMulAddIndexMask)) {
  10843     case FMLA_z_zzzi_d:
  10844     case FMLS_z_zzzi_d:
  10845       vform = kFormatVnD;
  10846       break;
  10847     case FMLA_z_zzzi_s:
  10848     case FMLS_z_zzzi_s:
  10849       vform = kFormatVnS;
  10850       break;
  10851     case FMLA_z_zzzi_h:
  10852     case FMLS_z_zzzi_h:
  10853     case FMLA_z_zzzi_h_i3h:
  10854     case FMLS_z_zzzi_h_i3h:
  10855       vform = kFormatVnH;
  10856       break;
  10857     default:
  10858       VIXL_UNIMPLEMENTED();
  10859       break;
  10860   }
  10861 
  10862   SimVRegister& zd = ReadVRegister(instr->GetRd());
  10863   SimVRegister& zn = ReadVRegister(instr->GetRn());
  10864   SimVRegister temp;
  10865 
  10866   dup_elements_to_segments(vform, temp, instr->GetSVEMulZmAndIndex());
  10867   if (instr->ExtractBit(10) == 1) {
  10868     fmls(vform, zd, zd, zn, temp);
  10869   } else {
  10870     fmla(vform, zd, zd, zn, temp);
  10871   }
  10872 }
  10873 
  10874 void Simulator::VisitSVEFPConvertToInt(const Instruction* instr) {
  10875   SimVRegister& zd = ReadVRegister(instr->GetRd());
  10876   SimVRegister& zn = ReadVRegister(instr->GetRn());
  10877   SimPRegister& pg = ReadPRegister(instr->GetPgLow8());
  10878   int dst_data_size;
  10879   int src_data_size;
  10880 
  10881   switch (instr->Mask(SVEFPConvertToIntMask)) {
  10882     case FCVTZS_z_p_z_d2w:
  10883     case FCVTZU_z_p_z_d2w:
  10884       dst_data_size = kSRegSize;
  10885       src_data_size = kDRegSize;
  10886       break;
  10887     case FCVTZS_z_p_z_d2x:
  10888     case FCVTZU_z_p_z_d2x:
  10889       dst_data_size = kDRegSize;
  10890       src_data_size = kDRegSize;
  10891       break;
  10892     case FCVTZS_z_p_z_fp162h:
  10893     case FCVTZU_z_p_z_fp162h:
  10894       dst_data_size = kHRegSize;
  10895       src_data_size = kHRegSize;
  10896       break;
  10897     case FCVTZS_z_p_z_fp162w:
  10898     case FCVTZU_z_p_z_fp162w:
  10899       dst_data_size = kSRegSize;
  10900       src_data_size = kHRegSize;
  10901       break;
  10902     case FCVTZS_z_p_z_fp162x:
  10903     case FCVTZU_z_p_z_fp162x:
  10904       dst_data_size = kDRegSize;
  10905       src_data_size = kHRegSize;
  10906       break;
  10907     case FCVTZS_z_p_z_s2w:
  10908     case FCVTZU_z_p_z_s2w:
  10909       dst_data_size = kSRegSize;
  10910       src_data_size = kSRegSize;
  10911       break;
  10912     case FCVTZS_z_p_z_s2x:
  10913     case FCVTZU_z_p_z_s2x:
  10914       dst_data_size = kDRegSize;
  10915       src_data_size = kSRegSize;
  10916       break;
  10917     default:
  10918       VIXL_UNIMPLEMENTED();
  10919       dst_data_size = 0;
  10920       src_data_size = 0;
  10921       break;
  10922   }
  10923 
  10924   VectorFormat vform =
  10925       SVEFormatFromLaneSizeInBits(std::max(dst_data_size, src_data_size));
  10926 
  10927   if (instr->ExtractBit(16) == 0) {
  10928     fcvts(vform, dst_data_size, src_data_size, zd, pg, zn, FPZero);
  10929   } else {
  10930     fcvtu(vform, dst_data_size, src_data_size, zd, pg, zn, FPZero);
  10931   }
  10932 }
  10933 
  10934 void Simulator::VisitSVEFPConvertPrecision(const Instruction* instr) {
  10935   SimVRegister& zd = ReadVRegister(instr->GetRd());
  10936   SimVRegister& zn = ReadVRegister(instr->GetRn());
  10937   SimPRegister& pg = ReadPRegister(instr->GetPgLow8());
  10938   VectorFormat dst_data_size = kFormatUndefined;
  10939   VectorFormat src_data_size = kFormatUndefined;
  10940 
  10941   switch (instr->Mask(SVEFPConvertPrecisionMask)) {
  10942     case FCVT_z_p_z_d2h:
  10943       dst_data_size = kFormatVnH;
  10944       src_data_size = kFormatVnD;
  10945       break;
  10946     case FCVT_z_p_z_d2s:
  10947       dst_data_size = kFormatVnS;
  10948       src_data_size = kFormatVnD;
  10949       break;
  10950     case FCVT_z_p_z_h2d:
  10951       dst_data_size = kFormatVnD;
  10952       src_data_size = kFormatVnH;
  10953       break;
  10954     case FCVT_z_p_z_h2s:
  10955       dst_data_size = kFormatVnS;
  10956       src_data_size = kFormatVnH;
  10957       break;
  10958     case FCVT_z_p_z_s2d:
  10959       dst_data_size = kFormatVnD;
  10960       src_data_size = kFormatVnS;
  10961       break;
  10962     case FCVT_z_p_z_s2h:
  10963       dst_data_size = kFormatVnH;
  10964       src_data_size = kFormatVnS;
  10965       break;
  10966     default:
  10967       VIXL_UNIMPLEMENTED();
  10968       break;
  10969   }
  10970 
  10971   fcvt(dst_data_size, src_data_size, zd, pg, zn);
  10972 }
  10973 
  10974 void Simulator::VisitSVEFPUnaryOp(const Instruction* instr) {
  10975   SimVRegister& zd = ReadVRegister(instr->GetRd());
  10976   SimVRegister& zn = ReadVRegister(instr->GetRn());
  10977   SimPRegister& pg = ReadPRegister(instr->GetPgLow8());
  10978   VectorFormat vform = instr->GetSVEVectorFormat();
  10979   SimVRegister result;
  10980 
  10981   switch (instr->Mask(SVEFPUnaryOpMask)) {
  10982     case FRECPX_z_p_z:
  10983       frecpx(vform, result, zn);
  10984       break;
  10985     case FSQRT_z_p_z:
  10986       fsqrt(vform, result, zn);
  10987       break;
  10988     default:
  10989       VIXL_UNIMPLEMENTED();
  10990       break;
  10991   }
  10992   mov_merging(vform, zd, pg, result);
  10993 }
  10994 
  10995 void Simulator::VisitSVEFPRoundToIntegralValue(const Instruction* instr) {
  10996   SimVRegister& zd = ReadVRegister(instr->GetRd());
  10997   SimVRegister& zn = ReadVRegister(instr->GetRn());
  10998   SimPRegister& pg = ReadPRegister(instr->GetPgLow8());
  10999   VectorFormat vform = instr->GetSVEVectorFormat();
  11000   FPRounding fpcr_rounding = static_cast<FPRounding>(ReadFpcr().GetRMode());
  11001   bool exact_exception = false;
  11002 
  11003   if (vform == kFormatVnB) VIXL_UNIMPLEMENTED();
  11004 
  11005   switch (instr->Mask(SVEFPRoundToIntegralValueMask)) {
  11006     case FRINTA_z_p_z:
  11007       fpcr_rounding = FPTieAway;
  11008       break;
  11009     case FRINTI_z_p_z:
  11010       break;  // Use FPCR rounding mode.
  11011     case FRINTM_z_p_z:
  11012       fpcr_rounding = FPNegativeInfinity;
  11013       break;
  11014     case FRINTN_z_p_z:
  11015       fpcr_rounding = FPTieEven;
  11016       break;
  11017     case FRINTP_z_p_z:
  11018       fpcr_rounding = FPPositiveInfinity;
  11019       break;
  11020     case FRINTX_z_p_z:
  11021       exact_exception = true;
  11022       break;
  11023     case FRINTZ_z_p_z:
  11024       fpcr_rounding = FPZero;
  11025       break;
  11026     default:
  11027       VIXL_UNIMPLEMENTED();
  11028       break;
  11029   }
  11030 
  11031   SimVRegister result;
  11032   frint(vform, result, zn, fpcr_rounding, exact_exception, kFrintToInteger);
  11033   mov_merging(vform, zd, pg, result);
  11034 }
  11035 
  11036 void Simulator::VisitSVEIntConvertToFP(const Instruction* instr) {
  11037   SimVRegister& zd = ReadVRegister(instr->GetRd());
  11038   SimVRegister& zn = ReadVRegister(instr->GetRn());
  11039   SimPRegister& pg = ReadPRegister(instr->GetPgLow8());
  11040   FPRounding fpcr_rounding = static_cast<FPRounding>(ReadFpcr().GetRMode());
  11041   int dst_data_size;
  11042   int src_data_size;
  11043 
  11044   switch (instr->Mask(SVEIntConvertToFPMask)) {
  11045     case SCVTF_z_p_z_h2fp16:
  11046     case UCVTF_z_p_z_h2fp16:
  11047       dst_data_size = kHRegSize;
  11048       src_data_size = kHRegSize;
  11049       break;
  11050     case SCVTF_z_p_z_w2d:
  11051     case UCVTF_z_p_z_w2d:
  11052       dst_data_size = kDRegSize;
  11053       src_data_size = kSRegSize;
  11054       break;
  11055     case SCVTF_z_p_z_w2fp16:
  11056     case UCVTF_z_p_z_w2fp16:
  11057       dst_data_size = kHRegSize;
  11058       src_data_size = kSRegSize;
  11059       break;
  11060     case SCVTF_z_p_z_w2s:
  11061     case UCVTF_z_p_z_w2s:
  11062       dst_data_size = kSRegSize;
  11063       src_data_size = kSRegSize;
  11064       break;
  11065     case SCVTF_z_p_z_x2d:
  11066     case UCVTF_z_p_z_x2d:
  11067       dst_data_size = kDRegSize;
  11068       src_data_size = kDRegSize;
  11069       break;
  11070     case SCVTF_z_p_z_x2fp16:
  11071     case UCVTF_z_p_z_x2fp16:
  11072       dst_data_size = kHRegSize;
  11073       src_data_size = kDRegSize;
  11074       break;
  11075     case SCVTF_z_p_z_x2s:
  11076     case UCVTF_z_p_z_x2s:
  11077       dst_data_size = kSRegSize;
  11078       src_data_size = kDRegSize;
  11079       break;
  11080     default:
  11081       VIXL_UNIMPLEMENTED();
  11082       dst_data_size = 0;
  11083       src_data_size = 0;
  11084       break;
  11085   }
  11086 
  11087   VectorFormat vform =
  11088       SVEFormatFromLaneSizeInBits(std::max(dst_data_size, src_data_size));
  11089 
  11090   if (instr->ExtractBit(16) == 0) {
  11091     scvtf(vform, dst_data_size, src_data_size, zd, pg, zn, fpcr_rounding);
  11092   } else {
  11093     ucvtf(vform, dst_data_size, src_data_size, zd, pg, zn, fpcr_rounding);
  11094   }
  11095 }
  11096 
  11097 void Simulator::VisitSVEFPUnaryOpUnpredicated(const Instruction* instr) {
  11098   VectorFormat vform = instr->GetSVEVectorFormat();
  11099   SimVRegister& zd = ReadVRegister(instr->GetRd());
  11100   SimVRegister& zn = ReadVRegister(instr->GetRn());
  11101   FPRounding fpcr_rounding = static_cast<FPRounding>(ReadFpcr().GetRMode());
  11102 
  11103   if (vform == kFormatVnB) VIXL_UNIMPLEMENTED();
  11104 
  11105   switch (instr->Mask(SVEFPUnaryOpUnpredicatedMask)) {
  11106     case FRECPE_z_z:
  11107       frecpe(vform, zd, zn, fpcr_rounding);
  11108       break;
  11109     case FRSQRTE_z_z:
  11110       frsqrte(vform, zd, zn);
  11111       break;
  11112     default:
  11113       VIXL_UNIMPLEMENTED();
  11114       break;
  11115   }
  11116 }
  11117 
  11118 void Simulator::VisitSVEIncDecByPredicateCount(const Instruction* instr) {
  11119   VectorFormat vform = instr->GetSVEVectorFormat();
  11120   SimPRegister& pg = ReadPRegister(instr->ExtractBits(8, 5));
  11121 
  11122   int count = CountActiveLanes(vform, pg);
  11123 
  11124   if (instr->ExtractBit(11) == 0) {
  11125     SimVRegister& zdn = ReadVRegister(instr->GetRd());
  11126     switch (instr->Mask(SVEIncDecByPredicateCountMask)) {
  11127       case DECP_z_p_z:
  11128         sub_uint(vform, zdn, zdn, count);
  11129         break;
  11130       case INCP_z_p_z:
  11131         add_uint(vform, zdn, zdn, count);
  11132         break;
  11133       case SQDECP_z_p_z:
  11134         sub_uint(vform, zdn, zdn, count).SignedSaturate(vform);
  11135         break;
  11136       case SQINCP_z_p_z:
  11137         add_uint(vform, zdn, zdn, count).SignedSaturate(vform);
  11138         break;
  11139       case UQDECP_z_p_z:
  11140         sub_uint(vform, zdn, zdn, count).UnsignedSaturate(vform);
  11141         break;
  11142       case UQINCP_z_p_z:
  11143         add_uint(vform, zdn, zdn, count).UnsignedSaturate(vform);
  11144         break;
  11145       default:
  11146         VIXL_UNIMPLEMENTED();
  11147         break;
  11148     }
  11149   } else {
  11150     bool is_saturating = (instr->ExtractBit(18) == 0);
  11151     bool decrement =
  11152         is_saturating ? instr->ExtractBit(17) : instr->ExtractBit(16);
  11153     bool is_signed = (instr->ExtractBit(16) == 0);
  11154     bool sf = is_saturating ? (instr->ExtractBit(10) != 0) : true;
  11155     unsigned width = sf ? kXRegSize : kWRegSize;
  11156 
  11157     switch (instr->Mask(SVEIncDecByPredicateCountMask)) {
  11158       case DECP_r_p_r:
  11159       case INCP_r_p_r:
  11160       case SQDECP_r_p_r_sx:
  11161       case SQDECP_r_p_r_x:
  11162       case SQINCP_r_p_r_sx:
  11163       case SQINCP_r_p_r_x:
  11164       case UQDECP_r_p_r_uw:
  11165       case UQDECP_r_p_r_x:
  11166       case UQINCP_r_p_r_uw:
  11167       case UQINCP_r_p_r_x:
  11168         WriteXRegister(instr->GetRd(),
  11169                        IncDecN(ReadXRegister(instr->GetRd()),
  11170                                decrement ? -count : count,
  11171                                width,
  11172                                is_saturating,
  11173                                is_signed));
  11174         break;
  11175       default:
  11176         VIXL_UNIMPLEMENTED();
  11177         break;
  11178     }
  11179   }
  11180 }
  11181 
  11182 uint64_t Simulator::IncDecN(uint64_t acc,
  11183                             int64_t delta,
  11184                             unsigned n,
  11185                             bool is_saturating,
  11186                             bool is_signed) {
  11187   VIXL_ASSERT(n <= 64);
  11188   VIXL_ASSERT(IsIntN(n, delta));
  11189 
  11190   uint64_t sign_mask = UINT64_C(1) << (n - 1);
  11191   uint64_t mask = GetUintMask(n);
  11192 
  11193   acc &= mask;  // Ignore initial accumulator high bits.
  11194   uint64_t result = (acc + delta) & mask;
  11195 
  11196   bool result_negative = ((result & sign_mask) != 0);
  11197 
  11198   if (is_saturating) {
  11199     if (is_signed) {
  11200       bool acc_negative = ((acc & sign_mask) != 0);
  11201       bool delta_negative = delta < 0;
  11202 
  11203       // If the signs of the operands are the same, but different from the
  11204       // result, there was an overflow.
  11205       if ((acc_negative == delta_negative) &&
  11206           (acc_negative != result_negative)) {
  11207         if (result_negative) {
  11208           // Saturate to [..., INT<n>_MAX].
  11209           result_negative = false;
  11210           result = mask & ~sign_mask;  // E.g. 0x000000007fffffff
  11211         } else {
  11212           // Saturate to [INT<n>_MIN, ...].
  11213           result_negative = true;
  11214           result = ~mask | sign_mask;  // E.g. 0xffffffff80000000
  11215         }
  11216       }
  11217     } else {
  11218       if ((delta < 0) && (result > acc)) {
  11219         // Saturate to [0, ...].
  11220         result = 0;
  11221       } else if ((delta > 0) && (result < acc)) {
  11222         // Saturate to [..., UINT<n>_MAX].
  11223         result = mask;
  11224       }
  11225     }
  11226   }
  11227 
  11228   // Sign-extend if necessary.
  11229   if (result_negative && is_signed) result |= ~mask;
  11230 
  11231   return result;
  11232 }
  11233 
  11234 void Simulator::VisitSVEIndexGeneration(const Instruction* instr) {
  11235   VectorFormat vform = instr->GetSVEVectorFormat();
  11236   SimVRegister& zd = ReadVRegister(instr->GetRd());
  11237   switch (instr->Mask(SVEIndexGenerationMask)) {
  11238     case INDEX_z_ii:
  11239     case INDEX_z_ir:
  11240     case INDEX_z_ri:
  11241     case INDEX_z_rr: {
  11242       uint64_t start = instr->ExtractBit(10) ? ReadXRegister(instr->GetRn())
  11243                                              : instr->ExtractSignedBits(9, 5);
  11244       uint64_t step = instr->ExtractBit(11) ? ReadXRegister(instr->GetRm())
  11245                                             : instr->ExtractSignedBits(20, 16);
  11246       index(vform, zd, start, step);
  11247       break;
  11248     }
  11249     default:
  11250       VIXL_UNIMPLEMENTED();
  11251       break;
  11252   }
  11253 }
  11254 
  11255 void Simulator::VisitSVEIntArithmeticUnpredicated(const Instruction* instr) {
  11256   VectorFormat vform = instr->GetSVEVectorFormat();
  11257   SimVRegister& zd = ReadVRegister(instr->GetRd());
  11258   SimVRegister& zn = ReadVRegister(instr->GetRn());
  11259   SimVRegister& zm = ReadVRegister(instr->GetRm());
  11260   switch (instr->Mask(SVEIntArithmeticUnpredicatedMask)) {
  11261     case ADD_z_zz:
  11262       add(vform, zd, zn, zm);
  11263       break;
  11264     case SQADD_z_zz:
  11265       add(vform, zd, zn, zm).SignedSaturate(vform);
  11266       break;
  11267     case SQSUB_z_zz:
  11268       sub(vform, zd, zn, zm).SignedSaturate(vform);
  11269       break;
  11270     case SUB_z_zz:
  11271       sub(vform, zd, zn, zm);
  11272       break;
  11273     case UQADD_z_zz:
  11274       add(vform, zd, zn, zm).UnsignedSaturate(vform);
  11275       break;
  11276     case UQSUB_z_zz:
  11277       sub(vform, zd, zn, zm).UnsignedSaturate(vform);
  11278       break;
  11279     default:
  11280       VIXL_UNIMPLEMENTED();
  11281       break;
  11282   }
  11283 }
  11284 
  11285 void Simulator::VisitSVEIntAddSubtractVectors_Predicated(
  11286     const Instruction* instr) {
  11287   VectorFormat vform = instr->GetSVEVectorFormat();
  11288   SimVRegister& zdn = ReadVRegister(instr->GetRd());
  11289   SimVRegister& zm = ReadVRegister(instr->GetRn());
  11290   SimPRegister& pg = ReadPRegister(instr->GetPgLow8());
  11291   SimVRegister result;
  11292 
  11293   switch (instr->Mask(SVEIntAddSubtractVectors_PredicatedMask)) {
  11294     case ADD_z_p_zz:
  11295       add(vform, result, zdn, zm);
  11296       break;
  11297     case SUBR_z_p_zz:
  11298       sub(vform, result, zm, zdn);
  11299       break;
  11300     case SUB_z_p_zz:
  11301       sub(vform, result, zdn, zm);
  11302       break;
  11303     default:
  11304       VIXL_UNIMPLEMENTED();
  11305       break;
  11306   }
  11307   mov_merging(vform, zdn, pg, result);
  11308 }
  11309 
  11310 void Simulator::VisitSVEBitwiseLogical_Predicated(const Instruction* instr) {
  11311   VectorFormat vform = instr->GetSVEVectorFormat();
  11312   SimVRegister& zdn = ReadVRegister(instr->GetRd());
  11313   SimVRegister& zm = ReadVRegister(instr->GetRn());
  11314   SimPRegister& pg = ReadPRegister(instr->GetPgLow8());
  11315   SimVRegister result;
  11316 
  11317   switch (instr->Mask(SVEBitwiseLogical_PredicatedMask)) {
  11318     case AND_z_p_zz:
  11319       SVEBitwiseLogicalUnpredicatedHelper(AND, vform, result, zdn, zm);
  11320       break;
  11321     case BIC_z_p_zz:
  11322       SVEBitwiseLogicalUnpredicatedHelper(BIC, vform, result, zdn, zm);
  11323       break;
  11324     case EOR_z_p_zz:
  11325       SVEBitwiseLogicalUnpredicatedHelper(EOR, vform, result, zdn, zm);
  11326       break;
  11327     case ORR_z_p_zz:
  11328       SVEBitwiseLogicalUnpredicatedHelper(ORR, vform, result, zdn, zm);
  11329       break;
  11330     default:
  11331       VIXL_UNIMPLEMENTED();
  11332       break;
  11333   }
  11334   mov_merging(vform, zdn, pg, result);
  11335 }
  11336 
  11337 void Simulator::VisitSVEIntMulVectors_Predicated(const Instruction* instr) {
  11338   VectorFormat vform = instr->GetSVEVectorFormat();
  11339   SimVRegister& zdn = ReadVRegister(instr->GetRd());
  11340   SimVRegister& zm = ReadVRegister(instr->GetRn());
  11341   SimPRegister& pg = ReadPRegister(instr->GetPgLow8());
  11342   SimVRegister result;
  11343 
  11344   switch (instr->Mask(SVEIntMulVectors_PredicatedMask)) {
  11345     case MUL_z_p_zz:
  11346       mul(vform, result, zdn, zm);
  11347       break;
  11348     case SMULH_z_p_zz:
  11349       smulh(vform, result, zdn, zm);
  11350       break;
  11351     case UMULH_z_p_zz:
  11352       umulh(vform, result, zdn, zm);
  11353       break;
  11354     default:
  11355       VIXL_UNIMPLEMENTED();
  11356       break;
  11357   }
  11358   mov_merging(vform, zdn, pg, result);
  11359 }
  11360 
  11361 void Simulator::VisitSVEIntMinMaxDifference_Predicated(
  11362     const Instruction* instr) {
  11363   VectorFormat vform = instr->GetSVEVectorFormat();
  11364   SimVRegister& zdn = ReadVRegister(instr->GetRd());
  11365   SimVRegister& zm = ReadVRegister(instr->GetRn());
  11366   SimPRegister& pg = ReadPRegister(instr->GetPgLow8());
  11367   SimVRegister result;
  11368 
  11369   switch (instr->Mask(SVEIntMinMaxDifference_PredicatedMask)) {
  11370     case SABD_z_p_zz:
  11371       absdiff(vform, result, zdn, zm, true);
  11372       break;
  11373     case SMAX_z_p_zz:
  11374       smax(vform, result, zdn, zm);
  11375       break;
  11376     case SMIN_z_p_zz:
  11377       smin(vform, result, zdn, zm);
  11378       break;
  11379     case UABD_z_p_zz:
  11380       absdiff(vform, result, zdn, zm, false);
  11381       break;
  11382     case UMAX_z_p_zz:
  11383       umax(vform, result, zdn, zm);
  11384       break;
  11385     case UMIN_z_p_zz:
  11386       umin(vform, result, zdn, zm);
  11387       break;
  11388     default:
  11389       VIXL_UNIMPLEMENTED();
  11390       break;
  11391   }
  11392   mov_merging(vform, zdn, pg, result);
  11393 }
  11394 
  11395 void Simulator::VisitSVEIntMulImm_Unpredicated(const Instruction* instr) {
  11396   VectorFormat vform = instr->GetSVEVectorFormat();
  11397   SimVRegister& zd = ReadVRegister(instr->GetRd());
  11398   SimVRegister scratch;
  11399 
  11400   switch (instr->Mask(SVEIntMulImm_UnpredicatedMask)) {
  11401     case MUL_z_zi:
  11402       dup_immediate(vform, scratch, instr->GetImmSVEIntWideSigned());
  11403       mul(vform, zd, zd, scratch);
  11404       break;
  11405     default:
  11406       VIXL_UNIMPLEMENTED();
  11407       break;
  11408   }
  11409 }
  11410 
  11411 void Simulator::VisitSVEIntDivideVectors_Predicated(const Instruction* instr) {
  11412   VectorFormat vform = instr->GetSVEVectorFormat();
  11413   SimVRegister& zdn = ReadVRegister(instr->GetRd());
  11414   SimVRegister& zm = ReadVRegister(instr->GetRn());
  11415   SimPRegister& pg = ReadPRegister(instr->GetPgLow8());
  11416   SimVRegister result;
  11417 
  11418   VIXL_ASSERT((vform == kFormatVnS) || (vform == kFormatVnD));
  11419 
  11420   switch (instr->Mask(SVEIntDivideVectors_PredicatedMask)) {
  11421     case SDIVR_z_p_zz:
  11422       sdiv(vform, result, zm, zdn);
  11423       break;
  11424     case SDIV_z_p_zz:
  11425       sdiv(vform, result, zdn, zm);
  11426       break;
  11427     case UDIVR_z_p_zz:
  11428       udiv(vform, result, zm, zdn);
  11429       break;
  11430     case UDIV_z_p_zz:
  11431       udiv(vform, result, zdn, zm);
  11432       break;
  11433     default:
  11434       VIXL_UNIMPLEMENTED();
  11435       break;
  11436   }
  11437   mov_merging(vform, zdn, pg, result);
  11438 }
  11439 
  11440 void Simulator::VisitSVEIntMinMaxImm_Unpredicated(const Instruction* instr) {
  11441   VectorFormat vform = instr->GetSVEVectorFormat();
  11442   SimVRegister& zd = ReadVRegister(instr->GetRd());
  11443   SimVRegister scratch;
  11444 
  11445   uint64_t unsigned_imm = instr->GetImmSVEIntWideUnsigned();
  11446   int64_t signed_imm = instr->GetImmSVEIntWideSigned();
  11447 
  11448   switch (instr->Mask(SVEIntMinMaxImm_UnpredicatedMask)) {
  11449     case SMAX_z_zi:
  11450       dup_immediate(vform, scratch, signed_imm);
  11451       smax(vform, zd, zd, scratch);
  11452       break;
  11453     case SMIN_z_zi:
  11454       dup_immediate(vform, scratch, signed_imm);
  11455       smin(vform, zd, zd, scratch);
  11456       break;
  11457     case UMAX_z_zi:
  11458       dup_immediate(vform, scratch, unsigned_imm);
  11459       umax(vform, zd, zd, scratch);
  11460       break;
  11461     case UMIN_z_zi:
  11462       dup_immediate(vform, scratch, unsigned_imm);
  11463       umin(vform, zd, zd, scratch);
  11464       break;
  11465     default:
  11466       VIXL_UNIMPLEMENTED();
  11467       break;
  11468   }
  11469 }
  11470 
  11471 void Simulator::VisitSVEIntCompareScalarCountAndLimit(
  11472     const Instruction* instr) {
  11473   unsigned rn_code = instr->GetRn();
  11474   unsigned rm_code = instr->GetRm();
  11475   SimPRegister& pd = ReadPRegister(instr->GetPd());
  11476   VectorFormat vform = instr->GetSVEVectorFormat();
  11477 
  11478   bool is_64_bit = instr->ExtractBit(12) == 1;
  11479   int rsize = is_64_bit ? kXRegSize : kWRegSize;
  11480   uint64_t mask = is_64_bit ? kXRegMask : kWRegMask;
  11481 
  11482   uint64_t usrc1 = ReadXRegister(rn_code);
  11483   int64_t ssrc2 = is_64_bit ? ReadXRegister(rm_code) : ReadWRegister(rm_code);
  11484   uint64_t usrc2 = ssrc2 & mask;
  11485 
  11486   bool reverse = (form_hash_ == "whilege_p_p_rr"_h) ||
  11487                  (form_hash_ == "whilegt_p_p_rr"_h) ||
  11488                  (form_hash_ == "whilehi_p_p_rr"_h) ||
  11489                  (form_hash_ == "whilehs_p_p_rr"_h);
  11490 
  11491   int lane_count = LaneCountFromFormat(vform);
  11492   bool last = true;
  11493   for (int i = 0; i < lane_count; i++) {
  11494     usrc1 &= mask;
  11495     int64_t ssrc1 = ExtractSignedBitfield64(rsize - 1, 0, usrc1);
  11496 
  11497     bool cond = false;
  11498     switch (form_hash_) {
  11499       case "whilele_p_p_rr"_h:
  11500         cond = ssrc1 <= ssrc2;
  11501         break;
  11502       case "whilelo_p_p_rr"_h:
  11503         cond = usrc1 < usrc2;
  11504         break;
  11505       case "whilels_p_p_rr"_h:
  11506         cond = usrc1 <= usrc2;
  11507         break;
  11508       case "whilelt_p_p_rr"_h:
  11509         cond = ssrc1 < ssrc2;
  11510         break;
  11511       case "whilege_p_p_rr"_h:
  11512         cond = ssrc1 >= ssrc2;
  11513         break;
  11514       case "whilegt_p_p_rr"_h:
  11515         cond = ssrc1 > ssrc2;
  11516         break;
  11517       case "whilehi_p_p_rr"_h:
  11518         cond = usrc1 > usrc2;
  11519         break;
  11520       case "whilehs_p_p_rr"_h:
  11521         cond = usrc1 >= usrc2;
  11522         break;
  11523       default:
  11524         VIXL_UNIMPLEMENTED();
  11525         break;
  11526     }
  11527     last = last && cond;
  11528     LogicPRegister dst(pd);
  11529     int lane = reverse ? ((lane_count - 1) - i) : i;
  11530     dst.SetActive(vform, lane, last);
  11531     usrc1 += reverse ? -1 : 1;
  11532   }
  11533 
  11534   PredTest(vform, GetPTrue(), pd);
  11535   LogSystemRegister(NZCV);
  11536 }
  11537 
  11538 void Simulator::VisitSVEConditionallyTerminateScalars(
  11539     const Instruction* instr) {
  11540   unsigned rn_code = instr->GetRn();
  11541   unsigned rm_code = instr->GetRm();
  11542   bool is_64_bit = instr->ExtractBit(22) == 1;
  11543   uint64_t src1 = is_64_bit ? ReadXRegister(rn_code) : ReadWRegister(rn_code);
  11544   uint64_t src2 = is_64_bit ? ReadXRegister(rm_code) : ReadWRegister(rm_code);
  11545   bool term = false;
  11546   switch (instr->Mask(SVEConditionallyTerminateScalarsMask)) {
  11547     case CTERMEQ_rr:
  11548       term = src1 == src2;
  11549       break;
  11550     case CTERMNE_rr:
  11551       term = src1 != src2;
  11552       break;
  11553     default:
  11554       VIXL_UNIMPLEMENTED();
  11555       break;
  11556   }
  11557   ReadNzcv().SetN(term ? 1 : 0);
  11558   ReadNzcv().SetV(term ? 0 : !ReadC());
  11559   LogSystemRegister(NZCV);
  11560 }
  11561 
  11562 void Simulator::VisitSVEIntCompareSignedImm(const Instruction* instr) {
  11563   bool commute_inputs = false;
  11564   Condition cond = al;
  11565   switch (instr->Mask(SVEIntCompareSignedImmMask)) {
  11566     case CMPEQ_p_p_zi:
  11567       cond = eq;
  11568       break;
  11569     case CMPGE_p_p_zi:
  11570       cond = ge;
  11571       break;
  11572     case CMPGT_p_p_zi:
  11573       cond = gt;
  11574       break;
  11575     case CMPLE_p_p_zi:
  11576       cond = ge;
  11577       commute_inputs = true;
  11578       break;
  11579     case CMPLT_p_p_zi:
  11580       cond = gt;
  11581       commute_inputs = true;
  11582       break;
  11583     case CMPNE_p_p_zi:
  11584       cond = ne;
  11585       break;
  11586     default:
  11587       VIXL_UNIMPLEMENTED();
  11588       break;
  11589   }
  11590 
  11591   VectorFormat vform = instr->GetSVEVectorFormat();
  11592   SimVRegister src2;
  11593   dup_immediate(vform,
  11594                 src2,
  11595                 ExtractSignedBitfield64(4, 0, instr->ExtractBits(20, 16)));
  11596   SVEIntCompareVectorsHelper(cond,
  11597                              vform,
  11598                              ReadPRegister(instr->GetPd()),
  11599                              ReadPRegister(instr->GetPgLow8()),
  11600                              commute_inputs ? src2
  11601                                             : ReadVRegister(instr->GetRn()),
  11602                              commute_inputs ? ReadVRegister(instr->GetRn())
  11603                                             : src2);
  11604 }
  11605 
  11606 void Simulator::VisitSVEIntCompareUnsignedImm(const Instruction* instr) {
  11607   bool commute_inputs = false;
  11608   Condition cond = al;
  11609   switch (instr->Mask(SVEIntCompareUnsignedImmMask)) {
  11610     case CMPHI_p_p_zi:
  11611       cond = hi;
  11612       break;
  11613     case CMPHS_p_p_zi:
  11614       cond = hs;
  11615       break;
  11616     case CMPLO_p_p_zi:
  11617       cond = hi;
  11618       commute_inputs = true;
  11619       break;
  11620     case CMPLS_p_p_zi:
  11621       cond = hs;
  11622       commute_inputs = true;
  11623       break;
  11624     default:
  11625       VIXL_UNIMPLEMENTED();
  11626       break;
  11627   }
  11628 
  11629   VectorFormat vform = instr->GetSVEVectorFormat();
  11630   SimVRegister src2;
  11631   dup_immediate(vform, src2, instr->ExtractBits(20, 14));
  11632   SVEIntCompareVectorsHelper(cond,
  11633                              vform,
  11634                              ReadPRegister(instr->GetPd()),
  11635                              ReadPRegister(instr->GetPgLow8()),
  11636                              commute_inputs ? src2
  11637                                             : ReadVRegister(instr->GetRn()),
  11638                              commute_inputs ? ReadVRegister(instr->GetRn())
  11639                                             : src2);
  11640 }
  11641 
  11642 void Simulator::VisitSVEIntCompareVectors(const Instruction* instr) {
  11643   Instr op = instr->Mask(SVEIntCompareVectorsMask);
  11644   bool is_wide_elements = false;
  11645   switch (op) {
  11646     case CMPEQ_p_p_zw:
  11647     case CMPGE_p_p_zw:
  11648     case CMPGT_p_p_zw:
  11649     case CMPHI_p_p_zw:
  11650     case CMPHS_p_p_zw:
  11651     case CMPLE_p_p_zw:
  11652     case CMPLO_p_p_zw:
  11653     case CMPLS_p_p_zw:
  11654     case CMPLT_p_p_zw:
  11655     case CMPNE_p_p_zw:
  11656       is_wide_elements = true;
  11657       break;
  11658   }
  11659 
  11660   Condition cond;
  11661   switch (op) {
  11662     case CMPEQ_p_p_zw:
  11663     case CMPEQ_p_p_zz:
  11664       cond = eq;
  11665       break;
  11666     case CMPGE_p_p_zw:
  11667     case CMPGE_p_p_zz:
  11668       cond = ge;
  11669       break;
  11670     case CMPGT_p_p_zw:
  11671     case CMPGT_p_p_zz:
  11672       cond = gt;
  11673       break;
  11674     case CMPHI_p_p_zw:
  11675     case CMPHI_p_p_zz:
  11676       cond = hi;
  11677       break;
  11678     case CMPHS_p_p_zw:
  11679     case CMPHS_p_p_zz:
  11680       cond = hs;
  11681       break;
  11682     case CMPNE_p_p_zw:
  11683     case CMPNE_p_p_zz:
  11684       cond = ne;
  11685       break;
  11686     case CMPLE_p_p_zw:
  11687       cond = le;
  11688       break;
  11689     case CMPLO_p_p_zw:
  11690       cond = lo;
  11691       break;
  11692     case CMPLS_p_p_zw:
  11693       cond = ls;
  11694       break;
  11695     case CMPLT_p_p_zw:
  11696       cond = lt;
  11697       break;
  11698     default:
  11699       VIXL_UNIMPLEMENTED();
  11700       cond = al;
  11701       break;
  11702   }
  11703 
  11704   SVEIntCompareVectorsHelper(cond,
  11705                              instr->GetSVEVectorFormat(),
  11706                              ReadPRegister(instr->GetPd()),
  11707                              ReadPRegister(instr->GetPgLow8()),
  11708                              ReadVRegister(instr->GetRn()),
  11709                              ReadVRegister(instr->GetRm()),
  11710                              is_wide_elements);
  11711 }
  11712 
  11713 void Simulator::VisitSVEFPExponentialAccelerator(const Instruction* instr) {
  11714   VectorFormat vform = instr->GetSVEVectorFormat();
  11715   SimVRegister& zd = ReadVRegister(instr->GetRd());
  11716   SimVRegister& zn = ReadVRegister(instr->GetRn());
  11717 
  11718   VIXL_ASSERT((vform == kFormatVnH) || (vform == kFormatVnS) ||
  11719               (vform == kFormatVnD));
  11720 
  11721   switch (instr->Mask(SVEFPExponentialAcceleratorMask)) {
  11722     case FEXPA_z_z:
  11723       fexpa(vform, zd, zn);
  11724       break;
  11725     default:
  11726       VIXL_UNIMPLEMENTED();
  11727       break;
  11728   }
  11729 }
  11730 
  11731 void Simulator::VisitSVEFPTrigSelectCoefficient(const Instruction* instr) {
  11732   VectorFormat vform = instr->GetSVEVectorFormat();
  11733   SimVRegister& zd = ReadVRegister(instr->GetRd());
  11734   SimVRegister& zn = ReadVRegister(instr->GetRn());
  11735   SimVRegister& zm = ReadVRegister(instr->GetRm());
  11736 
  11737   VIXL_ASSERT((vform == kFormatVnH) || (vform == kFormatVnS) ||
  11738               (vform == kFormatVnD));
  11739 
  11740   switch (instr->Mask(SVEFPTrigSelectCoefficientMask)) {
  11741     case FTSSEL_z_zz:
  11742       ftssel(vform, zd, zn, zm);
  11743       break;
  11744     default:
  11745       VIXL_UNIMPLEMENTED();
  11746       break;
  11747   }
  11748 }
  11749 
  11750 void Simulator::VisitSVEConstructivePrefix_Unpredicated(
  11751     const Instruction* instr) {
  11752   SimVRegister& zd = ReadVRegister(instr->GetRd());
  11753   SimVRegister& zn = ReadVRegister(instr->GetRn());
  11754 
  11755   switch (instr->Mask(SVEConstructivePrefix_UnpredicatedMask)) {
  11756     case MOVPRFX_z_z:
  11757       mov(kFormatVnD, zd, zn);  // The lane size is arbitrary.
  11758       break;
  11759     default:
  11760       VIXL_UNIMPLEMENTED();
  11761       break;
  11762   }
  11763 }
  11764 
  11765 void Simulator::VisitSVEIntMulAddPredicated(const Instruction* instr) {
  11766   VectorFormat vform = instr->GetSVEVectorFormat();
  11767 
  11768   SimVRegister& zd = ReadVRegister(instr->GetRd());
  11769   SimVRegister& zm = ReadVRegister(instr->GetRm());
  11770 
  11771   SimVRegister result;
  11772   switch (instr->Mask(SVEIntMulAddPredicatedMask)) {
  11773     case MLA_z_p_zzz:
  11774       mla(vform, result, zd, ReadVRegister(instr->GetRn()), zm);
  11775       break;
  11776     case MLS_z_p_zzz:
  11777       mls(vform, result, zd, ReadVRegister(instr->GetRn()), zm);
  11778       break;
  11779     case MAD_z_p_zzz:
  11780       // 'za' is encoded in 'Rn'.
  11781       mla(vform, result, ReadVRegister(instr->GetRn()), zd, zm);
  11782       break;
  11783     case MSB_z_p_zzz: {
  11784       // 'za' is encoded in 'Rn'.
  11785       mls(vform, result, ReadVRegister(instr->GetRn()), zd, zm);
  11786       break;
  11787     }
  11788     default:
  11789       VIXL_UNIMPLEMENTED();
  11790       break;
  11791   }
  11792   mov_merging(vform, zd, ReadPRegister(instr->GetPgLow8()), result);
  11793 }
  11794 
  11795 void Simulator::VisitSVEIntMulAddUnpredicated(const Instruction* instr) {
  11796   VectorFormat vform = instr->GetSVEVectorFormat();
  11797   SimVRegister& zda = ReadVRegister(instr->GetRd());
  11798   SimVRegister& zn = ReadVRegister(instr->GetRn());
  11799   SimVRegister& zm = ReadVRegister(instr->GetRm());
  11800 
  11801   switch (form_hash_) {
  11802     case "sdot_z_zzz"_h:
  11803       sdot(vform, zda, zn, zm);
  11804       break;
  11805     case "udot_z_zzz"_h:
  11806       udot(vform, zda, zn, zm);
  11807       break;
  11808     case "usdot_z_zzz_s"_h:
  11809       usdot(vform, zda, zn, zm);
  11810       break;
  11811     default:
  11812       VIXL_UNIMPLEMENTED();
  11813       break;
  11814   }
  11815 }
  11816 
  11817 void Simulator::VisitSVEMovprfx(const Instruction* instr) {
  11818   VectorFormat vform = instr->GetSVEVectorFormat();
  11819   SimVRegister& zn = ReadVRegister(instr->GetRn());
  11820   SimPRegister& pg = ReadPRegister(instr->GetPgLow8());
  11821   SimVRegister& zd = ReadVRegister(instr->GetRd());
  11822 
  11823   switch (instr->Mask(SVEMovprfxMask)) {
  11824     case MOVPRFX_z_p_z:
  11825       if (instr->ExtractBit(16)) {
  11826         mov_merging(vform, zd, pg, zn);
  11827       } else {
  11828         mov_zeroing(vform, zd, pg, zn);
  11829       }
  11830       break;
  11831     default:
  11832       VIXL_UNIMPLEMENTED();
  11833       break;
  11834   }
  11835 }
  11836 
  11837 void Simulator::VisitSVEIntReduction(const Instruction* instr) {
  11838   VectorFormat vform = instr->GetSVEVectorFormat();
  11839   SimVRegister& vd = ReadVRegister(instr->GetRd());
  11840   SimVRegister& zn = ReadVRegister(instr->GetRn());
  11841   SimPRegister& pg = ReadPRegister(instr->GetPgLow8());
  11842 
  11843   if (instr->Mask(SVEIntReductionLogicalFMask) == SVEIntReductionLogicalFixed) {
  11844     switch (instr->Mask(SVEIntReductionLogicalMask)) {
  11845       case ANDV_r_p_z:
  11846         andv(vform, vd, pg, zn);
  11847         break;
  11848       case EORV_r_p_z:
  11849         eorv(vform, vd, pg, zn);
  11850         break;
  11851       case ORV_r_p_z:
  11852         orv(vform, vd, pg, zn);
  11853         break;
  11854       default:
  11855         VIXL_UNIMPLEMENTED();
  11856         break;
  11857     }
  11858   } else {
  11859     switch (instr->Mask(SVEIntReductionMask)) {
  11860       case SADDV_r_p_z:
  11861         saddv(vform, vd, pg, zn);
  11862         break;
  11863       case SMAXV_r_p_z:
  11864         smaxv(vform, vd, pg, zn);
  11865         break;
  11866       case SMINV_r_p_z:
  11867         sminv(vform, vd, pg, zn);
  11868         break;
  11869       case UADDV_r_p_z:
  11870         uaddv(vform, vd, pg, zn);
  11871         break;
  11872       case UMAXV_r_p_z:
  11873         umaxv(vform, vd, pg, zn);
  11874         break;
  11875       case UMINV_r_p_z:
  11876         uminv(vform, vd, pg, zn);
  11877         break;
  11878       default:
  11879         VIXL_UNIMPLEMENTED();
  11880         break;
  11881     }
  11882   }
  11883 }
  11884 
  11885 void Simulator::VisitSVEIntUnaryArithmeticPredicated(const Instruction* instr) {
  11886   VectorFormat vform = instr->GetSVEVectorFormat();
  11887   SimVRegister& zn = ReadVRegister(instr->GetRn());
  11888 
  11889   SimVRegister result;
  11890   switch (instr->Mask(SVEIntUnaryArithmeticPredicatedMask)) {
  11891     case ABS_z_p_z:
  11892       abs(vform, result, zn);
  11893       break;
  11894     case CLS_z_p_z:
  11895       cls(vform, result, zn);
  11896       break;
  11897     case CLZ_z_p_z:
  11898       clz(vform, result, zn);
  11899       break;
  11900     case CNOT_z_p_z:
  11901       cnot(vform, result, zn);
  11902       break;
  11903     case CNT_z_p_z:
  11904       cnt(vform, result, zn);
  11905       break;
  11906     case FABS_z_p_z:
  11907       fabs_(vform, result, zn);
  11908       break;
  11909     case FNEG_z_p_z:
  11910       fneg(vform, result, zn);
  11911       break;
  11912     case NEG_z_p_z:
  11913       neg(vform, result, zn);
  11914       break;
  11915     case NOT_z_p_z:
  11916       not_(vform, result, zn);
  11917       break;
  11918     case SXTB_z_p_z:
  11919     case SXTH_z_p_z:
  11920     case SXTW_z_p_z:
  11921       sxt(vform, result, zn, (kBitsPerByte << instr->ExtractBits(18, 17)));
  11922       break;
  11923     case UXTB_z_p_z:
  11924     case UXTH_z_p_z:
  11925     case UXTW_z_p_z:
  11926       uxt(vform, result, zn, (kBitsPerByte << instr->ExtractBits(18, 17)));
  11927       break;
  11928     default:
  11929       VIXL_UNIMPLEMENTED();
  11930       break;
  11931   }
  11932 
  11933   SimVRegister& zd = ReadVRegister(instr->GetRd());
  11934   SimPRegister& pg = ReadPRegister(instr->GetPgLow8());
  11935   mov_merging(vform, zd, pg, result);
  11936 }
  11937 
  11938 void Simulator::VisitSVECopyFPImm_Predicated(const Instruction* instr) {
  11939   // There is only one instruction in this group.
  11940   VIXL_ASSERT(instr->Mask(SVECopyFPImm_PredicatedMask) == FCPY_z_p_i);
  11941 
  11942   VectorFormat vform = instr->GetSVEVectorFormat();
  11943   SimPRegister& pg = ReadPRegister(instr->ExtractBits(19, 16));
  11944   SimVRegister& zd = ReadVRegister(instr->GetRd());
  11945 
  11946   if (vform == kFormatVnB) VIXL_UNIMPLEMENTED();
  11947 
  11948   SimVRegister result;
  11949   switch (instr->Mask(SVECopyFPImm_PredicatedMask)) {
  11950     case FCPY_z_p_i: {
  11951       int imm8 = instr->ExtractBits(12, 5);
  11952       uint64_t value = FPToRawbitsWithSize(LaneSizeInBitsFromFormat(vform),
  11953                                            Instruction::Imm8ToFP64(imm8));
  11954       dup_immediate(vform, result, value);
  11955       break;
  11956     }
  11957     default:
  11958       VIXL_UNIMPLEMENTED();
  11959       break;
  11960   }
  11961   mov_merging(vform, zd, pg, result);
  11962 }
  11963 
  11964 void Simulator::VisitSVEIntAddSubtractImm_Unpredicated(
  11965     const Instruction* instr) {
  11966   VectorFormat vform = instr->GetSVEVectorFormat();
  11967   SimVRegister& zd = ReadVRegister(instr->GetRd());
  11968   SimVRegister scratch;
  11969 
  11970   uint64_t imm = instr->GetImmSVEIntWideUnsigned();
  11971   imm <<= instr->ExtractBit(13) * 8;
  11972 
  11973   switch (instr->Mask(SVEIntAddSubtractImm_UnpredicatedMask)) {
  11974     case ADD_z_zi:
  11975       add_uint(vform, zd, zd, imm);
  11976       break;
  11977     case SQADD_z_zi:
  11978       add_uint(vform, zd, zd, imm).SignedSaturate(vform);
  11979       break;
  11980     case SQSUB_z_zi:
  11981       sub_uint(vform, zd, zd, imm).SignedSaturate(vform);
  11982       break;
  11983     case SUBR_z_zi:
  11984       dup_immediate(vform, scratch, imm);
  11985       sub(vform, zd, scratch, zd);
  11986       break;
  11987     case SUB_z_zi:
  11988       sub_uint(vform, zd, zd, imm);
  11989       break;
  11990     case UQADD_z_zi:
  11991       add_uint(vform, zd, zd, imm).UnsignedSaturate(vform);
  11992       break;
  11993     case UQSUB_z_zi:
  11994       sub_uint(vform, zd, zd, imm).UnsignedSaturate(vform);
  11995       break;
  11996     default:
  11997       break;
  11998   }
  11999 }
  12000 
  12001 void Simulator::VisitSVEBroadcastIntImm_Unpredicated(const Instruction* instr) {
  12002   SimVRegister& zd = ReadVRegister(instr->GetRd());
  12003 
  12004   VectorFormat format = instr->GetSVEVectorFormat();
  12005   int64_t imm = instr->GetImmSVEIntWideSigned();
  12006   int shift = instr->ExtractBit(13) * 8;
  12007   imm *= 1 << shift;
  12008 
  12009   switch (instr->Mask(SVEBroadcastIntImm_UnpredicatedMask)) {
  12010     case DUP_z_i:
  12011       // The encoding of byte-sized lanes with lsl #8 is undefined.
  12012       if ((format == kFormatVnB) && (shift == 8)) {
  12013         VIXL_UNIMPLEMENTED();
  12014       } else {
  12015         dup_immediate(format, zd, imm);
  12016       }
  12017       break;
  12018     default:
  12019       VIXL_UNIMPLEMENTED();
  12020       break;
  12021   }
  12022 }
  12023 
  12024 void Simulator::VisitSVEBroadcastFPImm_Unpredicated(const Instruction* instr) {
  12025   VectorFormat vform = instr->GetSVEVectorFormat();
  12026   SimVRegister& zd = ReadVRegister(instr->GetRd());
  12027 
  12028   switch (instr->Mask(SVEBroadcastFPImm_UnpredicatedMask)) {
  12029     case FDUP_z_i:
  12030       switch (vform) {
  12031         case kFormatVnH:
  12032           dup_immediate(vform, zd, Float16ToRawbits(instr->GetSVEImmFP16()));
  12033           break;
  12034         case kFormatVnS:
  12035           dup_immediate(vform, zd, FloatToRawbits(instr->GetSVEImmFP32()));
  12036           break;
  12037         case kFormatVnD:
  12038           dup_immediate(vform, zd, DoubleToRawbits(instr->GetSVEImmFP64()));
  12039           break;
  12040         default:
  12041           VIXL_UNIMPLEMENTED();
  12042       }
  12043       break;
  12044     default:
  12045       VIXL_UNIMPLEMENTED();
  12046       break;
  12047   }
  12048 }
  12049 
  12050 void Simulator::VisitSVE32BitGatherLoadHalfwords_ScalarPlus32BitScaledOffsets(
  12051     const Instruction* instr) {
  12052   switch (instr->Mask(
  12053       SVE32BitGatherLoadHalfwords_ScalarPlus32BitScaledOffsetsMask)) {
  12054     case LD1H_z_p_bz_s_x32_scaled:
  12055     case LD1SH_z_p_bz_s_x32_scaled:
  12056     case LDFF1H_z_p_bz_s_x32_scaled:
  12057     case LDFF1SH_z_p_bz_s_x32_scaled:
  12058       break;
  12059     default:
  12060       VIXL_UNIMPLEMENTED();
  12061       break;
  12062   }
  12063 
  12064   SVEOffsetModifier mod = (instr->ExtractBit(22) == 1) ? SVE_SXTW : SVE_UXTW;
  12065   SVEGatherLoadScalarPlusVectorHelper(instr, kFormatVnS, mod);
  12066 }
  12067 
  12068 void Simulator::VisitSVE32BitGatherLoad_ScalarPlus32BitUnscaledOffsets(
  12069     const Instruction* instr) {
  12070   switch (instr->Mask(SVE32BitGatherLoad_ScalarPlus32BitUnscaledOffsetsMask)) {
  12071     case LD1B_z_p_bz_s_x32_unscaled:
  12072     case LD1H_z_p_bz_s_x32_unscaled:
  12073     case LD1SB_z_p_bz_s_x32_unscaled:
  12074     case LD1SH_z_p_bz_s_x32_unscaled:
  12075     case LD1W_z_p_bz_s_x32_unscaled:
  12076     case LDFF1B_z_p_bz_s_x32_unscaled:
  12077     case LDFF1H_z_p_bz_s_x32_unscaled:
  12078     case LDFF1SB_z_p_bz_s_x32_unscaled:
  12079     case LDFF1SH_z_p_bz_s_x32_unscaled:
  12080     case LDFF1W_z_p_bz_s_x32_unscaled:
  12081       break;
  12082     default:
  12083       VIXL_UNIMPLEMENTED();
  12084       break;
  12085   }
  12086 
  12087   SVEOffsetModifier mod = (instr->ExtractBit(22) == 1) ? SVE_SXTW : SVE_UXTW;
  12088   SVEGatherLoadScalarPlusVectorHelper(instr, kFormatVnS, mod);
  12089 }
  12090 
  12091 void Simulator::VisitSVE32BitGatherLoad_VectorPlusImm(
  12092     const Instruction* instr) {
  12093   switch (instr->Mask(SVE32BitGatherLoad_VectorPlusImmMask)) {
  12094     case LD1B_z_p_ai_s:
  12095       VIXL_UNIMPLEMENTED();
  12096       break;
  12097     case LD1H_z_p_ai_s:
  12098       VIXL_UNIMPLEMENTED();
  12099       break;
  12100     case LD1SB_z_p_ai_s:
  12101       VIXL_UNIMPLEMENTED();
  12102       break;
  12103     case LD1SH_z_p_ai_s:
  12104       VIXL_UNIMPLEMENTED();
  12105       break;
  12106     case LD1W_z_p_ai_s:
  12107       VIXL_UNIMPLEMENTED();
  12108       break;
  12109     case LDFF1B_z_p_ai_s:
  12110       VIXL_UNIMPLEMENTED();
  12111       break;
  12112     case LDFF1H_z_p_ai_s:
  12113       VIXL_UNIMPLEMENTED();
  12114       break;
  12115     case LDFF1SB_z_p_ai_s:
  12116       VIXL_UNIMPLEMENTED();
  12117       break;
  12118     case LDFF1SH_z_p_ai_s:
  12119       VIXL_UNIMPLEMENTED();
  12120       break;
  12121     case LDFF1W_z_p_ai_s:
  12122       VIXL_UNIMPLEMENTED();
  12123       break;
  12124     default:
  12125       VIXL_UNIMPLEMENTED();
  12126       break;
  12127   }
  12128 }
  12129 
  12130 void Simulator::VisitSVE32BitGatherLoadWords_ScalarPlus32BitScaledOffsets(
  12131     const Instruction* instr) {
  12132   switch (
  12133       instr->Mask(SVE32BitGatherLoadWords_ScalarPlus32BitScaledOffsetsMask)) {
  12134     case LD1W_z_p_bz_s_x32_scaled:
  12135     case LDFF1W_z_p_bz_s_x32_scaled:
  12136       break;
  12137     default:
  12138       VIXL_UNIMPLEMENTED();
  12139       break;
  12140   }
  12141 
  12142   SVEOffsetModifier mod = (instr->ExtractBit(22) == 1) ? SVE_SXTW : SVE_UXTW;
  12143   SVEGatherLoadScalarPlusVectorHelper(instr, kFormatVnS, mod);
  12144 }
  12145 
  12146 void Simulator::VisitSVE32BitGatherPrefetch_ScalarPlus32BitScaledOffsets(
  12147     const Instruction* instr) {
  12148   switch (
  12149       instr->Mask(SVE32BitGatherPrefetch_ScalarPlus32BitScaledOffsetsMask)) {
  12150     // Ignore prefetch hint instructions.
  12151     case PRFB_i_p_bz_s_x32_scaled:
  12152     case PRFD_i_p_bz_s_x32_scaled:
  12153     case PRFH_i_p_bz_s_x32_scaled:
  12154     case PRFW_i_p_bz_s_x32_scaled:
  12155       break;
  12156     default:
  12157       VIXL_UNIMPLEMENTED();
  12158       break;
  12159   }
  12160 }
  12161 
  12162 void Simulator::VisitSVE32BitGatherPrefetch_VectorPlusImm(
  12163     const Instruction* instr) {
  12164   switch (instr->Mask(SVE32BitGatherPrefetch_VectorPlusImmMask)) {
  12165     // Ignore prefetch hint instructions.
  12166     case PRFB_i_p_ai_s:
  12167     case PRFD_i_p_ai_s:
  12168     case PRFH_i_p_ai_s:
  12169     case PRFW_i_p_ai_s:
  12170       break;
  12171     default:
  12172       VIXL_UNIMPLEMENTED();
  12173       break;
  12174   }
  12175 }
  12176 
  12177 void Simulator::VisitSVEContiguousPrefetch_ScalarPlusImm(
  12178     const Instruction* instr) {
  12179   switch (instr->Mask(SVEContiguousPrefetch_ScalarPlusImmMask)) {
  12180     // Ignore prefetch hint instructions.
  12181     case PRFB_i_p_bi_s:
  12182     case PRFD_i_p_bi_s:
  12183     case PRFH_i_p_bi_s:
  12184     case PRFW_i_p_bi_s:
  12185       break;
  12186     default:
  12187       VIXL_UNIMPLEMENTED();
  12188       break;
  12189   }
  12190 }
  12191 
  12192 void Simulator::VisitSVEContiguousPrefetch_ScalarPlusScalar(
  12193     const Instruction* instr) {
  12194   switch (instr->Mask(SVEContiguousPrefetch_ScalarPlusScalarMask)) {
  12195     // Ignore prefetch hint instructions.
  12196     case PRFB_i_p_br_s:
  12197     case PRFD_i_p_br_s:
  12198     case PRFH_i_p_br_s:
  12199     case PRFW_i_p_br_s:
  12200       if (instr->GetRm() == kZeroRegCode) {
  12201         VIXL_UNIMPLEMENTED();
  12202       }
  12203       break;
  12204     default:
  12205       VIXL_UNIMPLEMENTED();
  12206       break;
  12207   }
  12208 }
  12209 
  12210 void Simulator::VisitSVELoadAndBroadcastElement(const Instruction* instr) {
  12211   bool is_signed;
  12212   switch (instr->Mask(SVELoadAndBroadcastElementMask)) {
  12213     case LD1RB_z_p_bi_u8:
  12214     case LD1RB_z_p_bi_u16:
  12215     case LD1RB_z_p_bi_u32:
  12216     case LD1RB_z_p_bi_u64:
  12217     case LD1RH_z_p_bi_u16:
  12218     case LD1RH_z_p_bi_u32:
  12219     case LD1RH_z_p_bi_u64:
  12220     case LD1RW_z_p_bi_u32:
  12221     case LD1RW_z_p_bi_u64:
  12222     case LD1RD_z_p_bi_u64:
  12223       is_signed = false;
  12224       break;
  12225     case LD1RSB_z_p_bi_s16:
  12226     case LD1RSB_z_p_bi_s32:
  12227     case LD1RSB_z_p_bi_s64:
  12228     case LD1RSH_z_p_bi_s32:
  12229     case LD1RSH_z_p_bi_s64:
  12230     case LD1RSW_z_p_bi_s64:
  12231       is_signed = true;
  12232       break;
  12233     default:
  12234       // This encoding group is complete, so no other values should be possible.
  12235       VIXL_UNREACHABLE();
  12236       is_signed = false;
  12237       break;
  12238   }
  12239 
  12240   int msize_in_bytes_log2 = instr->GetSVEMsizeFromDtype(is_signed);
  12241   int esize_in_bytes_log2 = instr->GetSVEEsizeFromDtype(is_signed, 13);
  12242   VIXL_ASSERT(msize_in_bytes_log2 <= esize_in_bytes_log2);
  12243   VectorFormat vform = SVEFormatFromLaneSizeInBytesLog2(esize_in_bytes_log2);
  12244   uint64_t offset = instr->ExtractBits(21, 16) << msize_in_bytes_log2;
  12245   uint64_t base = ReadXRegister(instr->GetRn(), Reg31IsStackPointer) + offset;
  12246   VectorFormat unpack_vform =
  12247       SVEFormatFromLaneSizeInBytesLog2(msize_in_bytes_log2);
  12248   SimVRegister temp;
  12249   if (!ld1r(vform, unpack_vform, temp, base, is_signed)) return;
  12250   mov_zeroing(vform,
  12251               ReadVRegister(instr->GetRt()),
  12252               ReadPRegister(instr->GetPgLow8()),
  12253               temp);
  12254 }
  12255 
  12256 void Simulator::VisitSVELoadPredicateRegister(const Instruction* instr) {
  12257   switch (instr->Mask(SVELoadPredicateRegisterMask)) {
  12258     case LDR_p_bi: {
  12259       SimPRegister& pt = ReadPRegister(instr->GetPt());
  12260       int pl = GetPredicateLengthInBytes();
  12261       int imm9 = (instr->ExtractBits(21, 16) << 3) | instr->ExtractBits(12, 10);
  12262       uint64_t multiplier = ExtractSignedBitfield64(8, 0, imm9);
  12263       uint64_t base = ReadXRegister(instr->GetRn(), Reg31IsStackPointer);
  12264       uint64_t address = base + multiplier * pl;
  12265       for (int i = 0; i < pl; i++) {
  12266         VIXL_DEFINE_OR_RETURN(value, MemRead<uint8_t>(address + i));
  12267         pt.Insert(i, value);
  12268       }
  12269       LogPRead(instr->GetPt(), address);
  12270       break;
  12271     }
  12272     default:
  12273       VIXL_UNIMPLEMENTED();
  12274       break;
  12275   }
  12276 }
  12277 
  12278 void Simulator::VisitSVELoadVectorRegister(const Instruction* instr) {
  12279   switch (instr->Mask(SVELoadVectorRegisterMask)) {
  12280     case LDR_z_bi: {
  12281       SimVRegister& zt = ReadVRegister(instr->GetRt());
  12282       int vl = GetVectorLengthInBytes();
  12283       int imm9 = (instr->ExtractBits(21, 16) << 3) | instr->ExtractBits(12, 10);
  12284       uint64_t multiplier = ExtractSignedBitfield64(8, 0, imm9);
  12285       uint64_t base = ReadXRegister(instr->GetRn(), Reg31IsStackPointer);
  12286       uint64_t address = base + multiplier * vl;
  12287       for (int i = 0; i < vl; i++) {
  12288         VIXL_DEFINE_OR_RETURN(value, MemRead<uint8_t>(address + i));
  12289         zt.Insert(i, value);
  12290       }
  12291       LogZRead(instr->GetRt(), address);
  12292       break;
  12293     }
  12294     default:
  12295       VIXL_UNIMPLEMENTED();
  12296       break;
  12297   }
  12298 }
  12299 
  12300 void Simulator::VisitSVE64BitGatherLoad_ScalarPlus32BitUnpackedScaledOffsets(
  12301     const Instruction* instr) {
  12302   switch (instr->Mask(
  12303       SVE64BitGatherLoad_ScalarPlus32BitUnpackedScaledOffsetsMask)) {
  12304     case LD1D_z_p_bz_d_x32_scaled:
  12305     case LD1H_z_p_bz_d_x32_scaled:
  12306     case LD1SH_z_p_bz_d_x32_scaled:
  12307     case LD1SW_z_p_bz_d_x32_scaled:
  12308     case LD1W_z_p_bz_d_x32_scaled:
  12309     case LDFF1H_z_p_bz_d_x32_scaled:
  12310     case LDFF1W_z_p_bz_d_x32_scaled:
  12311     case LDFF1D_z_p_bz_d_x32_scaled:
  12312     case LDFF1SH_z_p_bz_d_x32_scaled:
  12313     case LDFF1SW_z_p_bz_d_x32_scaled:
  12314       break;
  12315     default:
  12316       VIXL_UNIMPLEMENTED();
  12317       break;
  12318   }
  12319 
  12320   SVEOffsetModifier mod = (instr->ExtractBit(22) == 1) ? SVE_SXTW : SVE_UXTW;
  12321   SVEGatherLoadScalarPlusVectorHelper(instr, kFormatVnD, mod);
  12322 }
  12323 
  12324 void Simulator::VisitSVE64BitGatherLoad_ScalarPlus64BitScaledOffsets(
  12325     const Instruction* instr) {
  12326   switch (instr->Mask(SVE64BitGatherLoad_ScalarPlus64BitScaledOffsetsMask)) {
  12327     case LD1D_z_p_bz_d_64_scaled:
  12328     case LD1H_z_p_bz_d_64_scaled:
  12329     case LD1SH_z_p_bz_d_64_scaled:
  12330     case LD1SW_z_p_bz_d_64_scaled:
  12331     case LD1W_z_p_bz_d_64_scaled:
  12332     case LDFF1H_z_p_bz_d_64_scaled:
  12333     case LDFF1W_z_p_bz_d_64_scaled:
  12334     case LDFF1D_z_p_bz_d_64_scaled:
  12335     case LDFF1SH_z_p_bz_d_64_scaled:
  12336     case LDFF1SW_z_p_bz_d_64_scaled:
  12337       break;
  12338     default:
  12339       VIXL_UNIMPLEMENTED();
  12340       break;
  12341   }
  12342 
  12343   SVEGatherLoadScalarPlusVectorHelper(instr, kFormatVnD, SVE_LSL);
  12344 }
  12345 
  12346 void Simulator::VisitSVE64BitGatherLoad_ScalarPlus64BitUnscaledOffsets(
  12347     const Instruction* instr) {
  12348   switch (instr->Mask(SVE64BitGatherLoad_ScalarPlus64BitUnscaledOffsetsMask)) {
  12349     case LD1B_z_p_bz_d_64_unscaled:
  12350     case LD1D_z_p_bz_d_64_unscaled:
  12351     case LD1H_z_p_bz_d_64_unscaled:
  12352     case LD1SB_z_p_bz_d_64_unscaled:
  12353     case LD1SH_z_p_bz_d_64_unscaled:
  12354     case LD1SW_z_p_bz_d_64_unscaled:
  12355     case LD1W_z_p_bz_d_64_unscaled:
  12356     case LDFF1B_z_p_bz_d_64_unscaled:
  12357     case LDFF1D_z_p_bz_d_64_unscaled:
  12358     case LDFF1H_z_p_bz_d_64_unscaled:
  12359     case LDFF1SB_z_p_bz_d_64_unscaled:
  12360     case LDFF1SH_z_p_bz_d_64_unscaled:
  12361     case LDFF1SW_z_p_bz_d_64_unscaled:
  12362     case LDFF1W_z_p_bz_d_64_unscaled:
  12363       break;
  12364     default:
  12365       VIXL_UNIMPLEMENTED();
  12366       break;
  12367   }
  12368 
  12369   SVEGatherLoadScalarPlusVectorHelper(instr,
  12370                                       kFormatVnD,
  12371                                       NO_SVE_OFFSET_MODIFIER);
  12372 }
  12373 
  12374 void Simulator::VisitSVE64BitGatherLoad_ScalarPlusUnpacked32BitUnscaledOffsets(
  12375     const Instruction* instr) {
  12376   switch (instr->Mask(
  12377       SVE64BitGatherLoad_ScalarPlusUnpacked32BitUnscaledOffsetsMask)) {
  12378     case LD1B_z_p_bz_d_x32_unscaled:
  12379     case LD1D_z_p_bz_d_x32_unscaled:
  12380     case LD1H_z_p_bz_d_x32_unscaled:
  12381     case LD1SB_z_p_bz_d_x32_unscaled:
  12382     case LD1SH_z_p_bz_d_x32_unscaled:
  12383     case LD1SW_z_p_bz_d_x32_unscaled:
  12384     case LD1W_z_p_bz_d_x32_unscaled:
  12385     case LDFF1B_z_p_bz_d_x32_unscaled:
  12386     case LDFF1H_z_p_bz_d_x32_unscaled:
  12387     case LDFF1W_z_p_bz_d_x32_unscaled:
  12388     case LDFF1D_z_p_bz_d_x32_unscaled:
  12389     case LDFF1SB_z_p_bz_d_x32_unscaled:
  12390     case LDFF1SH_z_p_bz_d_x32_unscaled:
  12391     case LDFF1SW_z_p_bz_d_x32_unscaled:
  12392       break;
  12393     default:
  12394       VIXL_UNIMPLEMENTED();
  12395       break;
  12396   }
  12397 
  12398   SVEOffsetModifier mod = (instr->ExtractBit(22) == 1) ? SVE_SXTW : SVE_UXTW;
  12399   SVEGatherLoadScalarPlusVectorHelper(instr, kFormatVnD, mod);
  12400 }
  12401 
  12402 void Simulator::VisitSVE64BitGatherLoad_VectorPlusImm(
  12403     const Instruction* instr) {
  12404   switch (instr->Mask(SVE64BitGatherLoad_VectorPlusImmMask)) {
  12405     case LD1B_z_p_ai_d:
  12406     case LD1D_z_p_ai_d:
  12407     case LD1H_z_p_ai_d:
  12408     case LD1SB_z_p_ai_d:
  12409     case LD1SH_z_p_ai_d:
  12410     case LD1SW_z_p_ai_d:
  12411     case LD1W_z_p_ai_d:
  12412     case LDFF1B_z_p_ai_d:
  12413     case LDFF1D_z_p_ai_d:
  12414     case LDFF1H_z_p_ai_d:
  12415     case LDFF1SB_z_p_ai_d:
  12416     case LDFF1SH_z_p_ai_d:
  12417     case LDFF1SW_z_p_ai_d:
  12418     case LDFF1W_z_p_ai_d:
  12419       break;
  12420     default:
  12421       VIXL_UNIMPLEMENTED();
  12422       break;
  12423   }
  12424   bool is_signed = instr->ExtractBit(14) == 0;
  12425   bool is_ff = instr->ExtractBit(13) == 1;
  12426   // Note that these instructions don't use the Dtype encoding.
  12427   int msize_in_bytes_log2 = instr->ExtractBits(24, 23);
  12428   uint64_t imm = instr->ExtractBits(20, 16) << msize_in_bytes_log2;
  12429   LogicSVEAddressVector addr(imm, &ReadVRegister(instr->GetRn()), kFormatVnD);
  12430   addr.SetMsizeInBytesLog2(msize_in_bytes_log2);
  12431   if (is_ff) {
  12432     VIXL_UNIMPLEMENTED();
  12433   } else {
  12434     SVEStructuredLoadHelper(kFormatVnD,
  12435                             ReadPRegister(instr->GetPgLow8()),
  12436                             instr->GetRt(),
  12437                             addr,
  12438                             is_signed);
  12439   }
  12440 }
  12441 
  12442 void Simulator::VisitSVE64BitGatherPrefetch_ScalarPlus64BitScaledOffsets(
  12443     const Instruction* instr) {
  12444   switch (
  12445       instr->Mask(SVE64BitGatherPrefetch_ScalarPlus64BitScaledOffsetsMask)) {
  12446     // Ignore prefetch hint instructions.
  12447     case PRFB_i_p_bz_d_64_scaled:
  12448     case PRFD_i_p_bz_d_64_scaled:
  12449     case PRFH_i_p_bz_d_64_scaled:
  12450     case PRFW_i_p_bz_d_64_scaled:
  12451       break;
  12452     default:
  12453       VIXL_UNIMPLEMENTED();
  12454       break;
  12455   }
  12456 }
  12457 
  12458 void Simulator::
  12459     VisitSVE64BitGatherPrefetch_ScalarPlusUnpacked32BitScaledOffsets(
  12460         const Instruction* instr) {
  12461   switch (instr->Mask(
  12462       SVE64BitGatherPrefetch_ScalarPlusUnpacked32BitScaledOffsetsMask)) {
  12463     // Ignore prefetch hint instructions.
  12464     case PRFB_i_p_bz_d_x32_scaled:
  12465     case PRFD_i_p_bz_d_x32_scaled:
  12466     case PRFH_i_p_bz_d_x32_scaled:
  12467     case PRFW_i_p_bz_d_x32_scaled:
  12468       break;
  12469     default:
  12470       VIXL_UNIMPLEMENTED();
  12471       break;
  12472   }
  12473 }
  12474 
  12475 void Simulator::VisitSVE64BitGatherPrefetch_VectorPlusImm(
  12476     const Instruction* instr) {
  12477   switch (instr->Mask(SVE64BitGatherPrefetch_VectorPlusImmMask)) {
  12478     // Ignore prefetch hint instructions.
  12479     case PRFB_i_p_ai_d:
  12480     case PRFD_i_p_ai_d:
  12481     case PRFH_i_p_ai_d:
  12482     case PRFW_i_p_ai_d:
  12483       break;
  12484     default:
  12485       VIXL_UNIMPLEMENTED();
  12486       break;
  12487   }
  12488 }
  12489 
  12490 void Simulator::VisitSVEContiguousFirstFaultLoad_ScalarPlusScalar(
  12491     const Instruction* instr) {
  12492   bool is_signed;
  12493   switch (instr->Mask(SVEContiguousLoad_ScalarPlusScalarMask)) {
  12494     case LDFF1B_z_p_br_u8:
  12495     case LDFF1B_z_p_br_u16:
  12496     case LDFF1B_z_p_br_u32:
  12497     case LDFF1B_z_p_br_u64:
  12498     case LDFF1H_z_p_br_u16:
  12499     case LDFF1H_z_p_br_u32:
  12500     case LDFF1H_z_p_br_u64:
  12501     case LDFF1W_z_p_br_u32:
  12502     case LDFF1W_z_p_br_u64:
  12503     case LDFF1D_z_p_br_u64:
  12504       is_signed = false;
  12505       break;
  12506     case LDFF1SB_z_p_br_s16:
  12507     case LDFF1SB_z_p_br_s32:
  12508     case LDFF1SB_z_p_br_s64:
  12509     case LDFF1SH_z_p_br_s32:
  12510     case LDFF1SH_z_p_br_s64:
  12511     case LDFF1SW_z_p_br_s64:
  12512       is_signed = true;
  12513       break;
  12514     default:
  12515       // This encoding group is complete, so no other values should be possible.
  12516       VIXL_UNREACHABLE();
  12517       is_signed = false;
  12518       break;
  12519   }
  12520 
  12521   int msize_in_bytes_log2 = instr->GetSVEMsizeFromDtype(is_signed);
  12522   int esize_in_bytes_log2 = instr->GetSVEEsizeFromDtype(is_signed);
  12523   VIXL_ASSERT(msize_in_bytes_log2 <= esize_in_bytes_log2);
  12524   VectorFormat vform = SVEFormatFromLaneSizeInBytesLog2(esize_in_bytes_log2);
  12525   uint64_t base = ReadXRegister(instr->GetRn(), Reg31IsStackPointer);
  12526   uint64_t offset = ReadXRegister(instr->GetRm());
  12527   offset <<= msize_in_bytes_log2;
  12528   LogicSVEAddressVector addr(base + offset);
  12529   addr.SetMsizeInBytesLog2(msize_in_bytes_log2);
  12530   SVEFaultTolerantLoadHelper(vform,
  12531                              ReadPRegister(instr->GetPgLow8()),
  12532                              instr->GetRt(),
  12533                              addr,
  12534                              kSVEFirstFaultLoad,
  12535                              is_signed);
  12536 }
  12537 
  12538 void Simulator::VisitSVEContiguousNonFaultLoad_ScalarPlusImm(
  12539     const Instruction* instr) {
  12540   bool is_signed = false;
  12541   switch (instr->Mask(SVEContiguousNonFaultLoad_ScalarPlusImmMask)) {
  12542     case LDNF1B_z_p_bi_u16:
  12543     case LDNF1B_z_p_bi_u32:
  12544     case LDNF1B_z_p_bi_u64:
  12545     case LDNF1B_z_p_bi_u8:
  12546     case LDNF1D_z_p_bi_u64:
  12547     case LDNF1H_z_p_bi_u16:
  12548     case LDNF1H_z_p_bi_u32:
  12549     case LDNF1H_z_p_bi_u64:
  12550     case LDNF1W_z_p_bi_u32:
  12551     case LDNF1W_z_p_bi_u64:
  12552       break;
  12553     case LDNF1SB_z_p_bi_s16:
  12554     case LDNF1SB_z_p_bi_s32:
  12555     case LDNF1SB_z_p_bi_s64:
  12556     case LDNF1SH_z_p_bi_s32:
  12557     case LDNF1SH_z_p_bi_s64:
  12558     case LDNF1SW_z_p_bi_s64:
  12559       is_signed = true;
  12560       break;
  12561     default:
  12562       VIXL_UNIMPLEMENTED();
  12563       break;
  12564   }
  12565   int msize_in_bytes_log2 = instr->GetSVEMsizeFromDtype(is_signed);
  12566   int esize_in_bytes_log2 = instr->GetSVEEsizeFromDtype(is_signed);
  12567   VIXL_ASSERT(msize_in_bytes_log2 <= esize_in_bytes_log2);
  12568   VectorFormat vform = SVEFormatFromLaneSizeInBytesLog2(esize_in_bytes_log2);
  12569   int vl = GetVectorLengthInBytes();
  12570   int vl_divisor_log2 = esize_in_bytes_log2 - msize_in_bytes_log2;
  12571   uint64_t base = ReadXRegister(instr->GetRn(), Reg31IsStackPointer);
  12572   uint64_t offset =
  12573       (instr->ExtractSignedBits(19, 16) * vl) / (1 << vl_divisor_log2);
  12574   LogicSVEAddressVector addr(base + offset);
  12575   addr.SetMsizeInBytesLog2(msize_in_bytes_log2);
  12576   SVEFaultTolerantLoadHelper(vform,
  12577                              ReadPRegister(instr->GetPgLow8()),
  12578                              instr->GetRt(),
  12579                              addr,
  12580                              kSVENonFaultLoad,
  12581                              is_signed);
  12582 }
  12583 
  12584 void Simulator::VisitSVEContiguousNonTemporalLoad_ScalarPlusImm(
  12585     const Instruction* instr) {
  12586   SimPRegister& pg = ReadPRegister(instr->GetPgLow8());
  12587   VectorFormat vform = kFormatUndefined;
  12588 
  12589   switch (instr->Mask(SVEContiguousNonTemporalLoad_ScalarPlusImmMask)) {
  12590     case LDNT1B_z_p_bi_contiguous:
  12591       vform = kFormatVnB;
  12592       break;
  12593     case LDNT1D_z_p_bi_contiguous:
  12594       vform = kFormatVnD;
  12595       break;
  12596     case LDNT1H_z_p_bi_contiguous:
  12597       vform = kFormatVnH;
  12598       break;
  12599     case LDNT1W_z_p_bi_contiguous:
  12600       vform = kFormatVnS;
  12601       break;
  12602     default:
  12603       VIXL_UNIMPLEMENTED();
  12604       break;
  12605   }
  12606   int msize_in_bytes_log2 = LaneSizeInBytesLog2FromFormat(vform);
  12607   int vl = GetVectorLengthInBytes();
  12608   uint64_t base = ReadXRegister(instr->GetRn(), Reg31IsStackPointer);
  12609   uint64_t offset = instr->ExtractSignedBits(19, 16) * vl;
  12610   LogicSVEAddressVector addr(base + offset);
  12611   addr.SetMsizeInBytesLog2(msize_in_bytes_log2);
  12612   SVEStructuredLoadHelper(vform,
  12613                           pg,
  12614                           instr->GetRt(),
  12615                           addr,
  12616                           /* is_signed = */ false);
  12617 }
  12618 
  12619 void Simulator::VisitSVEContiguousNonTemporalLoad_ScalarPlusScalar(
  12620     const Instruction* instr) {
  12621   SimPRegister& pg = ReadPRegister(instr->GetPgLow8());
  12622   VectorFormat vform = kFormatUndefined;
  12623 
  12624   switch (instr->Mask(SVEContiguousNonTemporalLoad_ScalarPlusScalarMask)) {
  12625     case LDNT1B_z_p_br_contiguous:
  12626       vform = kFormatVnB;
  12627       break;
  12628     case LDNT1D_z_p_br_contiguous:
  12629       vform = kFormatVnD;
  12630       break;
  12631     case LDNT1H_z_p_br_contiguous:
  12632       vform = kFormatVnH;
  12633       break;
  12634     case LDNT1W_z_p_br_contiguous:
  12635       vform = kFormatVnS;
  12636       break;
  12637     default:
  12638       VIXL_UNIMPLEMENTED();
  12639       break;
  12640   }
  12641   int msize_in_bytes_log2 = LaneSizeInBytesLog2FromFormat(vform);
  12642   uint64_t base = ReadXRegister(instr->GetRn(), Reg31IsStackPointer);
  12643   uint64_t offset = ReadXRegister(instr->GetRm()) << msize_in_bytes_log2;
  12644   LogicSVEAddressVector addr(base + offset);
  12645   addr.SetMsizeInBytesLog2(msize_in_bytes_log2);
  12646   SVEStructuredLoadHelper(vform,
  12647                           pg,
  12648                           instr->GetRt(),
  12649                           addr,
  12650                           /* is_signed = */ false);
  12651 }
  12652 
  12653 void Simulator::VisitSVELoadAndBroadcastQOWord_ScalarPlusImm(
  12654     const Instruction* instr) {
  12655   SimVRegister& zt = ReadVRegister(instr->GetRt());
  12656   SimPRegister& pg = ReadPRegister(instr->GetPgLow8());
  12657 
  12658   uint64_t dwords = 2;
  12659   VectorFormat vform_dst = kFormatVnQ;
  12660   if ((form_hash_ == "ld1rob_z_p_bi_u8"_h) ||
  12661       (form_hash_ == "ld1roh_z_p_bi_u16"_h) ||
  12662       (form_hash_ == "ld1row_z_p_bi_u32"_h) ||
  12663       (form_hash_ == "ld1rod_z_p_bi_u64"_h)) {
  12664     dwords = 4;
  12665     vform_dst = kFormatVnO;
  12666   }
  12667 
  12668   uint64_t addr = ReadXRegister(instr->GetRn(), Reg31IsStackPointer);
  12669   uint64_t offset =
  12670       instr->ExtractSignedBits(19, 16) * dwords * kDRegSizeInBytes;
  12671   int msz = instr->ExtractBits(24, 23);
  12672   VectorFormat vform = SVEFormatFromLaneSizeInBytesLog2(msz);
  12673 
  12674   for (unsigned i = 0; i < dwords; i++) {
  12675     if (!ld1(kFormatVnD, zt, i, addr + offset + (i * kDRegSizeInBytes))) return;
  12676   }
  12677   mov_zeroing(vform, zt, pg, zt);
  12678   dup_element(vform_dst, zt, zt, 0);
  12679 }
  12680 
  12681 void Simulator::VisitSVELoadAndBroadcastQOWord_ScalarPlusScalar(
  12682     const Instruction* instr) {
  12683   SimVRegister& zt = ReadVRegister(instr->GetRt());
  12684   SimPRegister& pg = ReadPRegister(instr->GetPgLow8());
  12685 
  12686   uint64_t bytes = 16;
  12687   VectorFormat vform_dst = kFormatVnQ;
  12688   if ((form_hash_ == "ld1rob_z_p_br_contiguous"_h) ||
  12689       (form_hash_ == "ld1roh_z_p_br_contiguous"_h) ||
  12690       (form_hash_ == "ld1row_z_p_br_contiguous"_h) ||
  12691       (form_hash_ == "ld1rod_z_p_br_contiguous"_h)) {
  12692     bytes = 32;
  12693     vform_dst = kFormatVnO;
  12694   }
  12695 
  12696   uint64_t addr = ReadXRegister(instr->GetRn(), Reg31IsStackPointer);
  12697   uint64_t offset = ReadXRegister(instr->GetRm());
  12698   int msz = instr->ExtractBits(24, 23);
  12699   VectorFormat vform = SVEFormatFromLaneSizeInBytesLog2(msz);
  12700   offset <<= msz;
  12701   for (unsigned i = 0; i < bytes; i++) {
  12702     if (!ld1(kFormatVnB, zt, i, addr + offset + i)) return;
  12703   }
  12704   mov_zeroing(vform, zt, pg, zt);
  12705   dup_element(vform_dst, zt, zt, 0);
  12706 }
  12707 
  12708 void Simulator::VisitSVELoadMultipleStructures_ScalarPlusImm(
  12709     const Instruction* instr) {
  12710   switch (instr->Mask(SVELoadMultipleStructures_ScalarPlusImmMask)) {
  12711     case LD2B_z_p_bi_contiguous:
  12712     case LD2D_z_p_bi_contiguous:
  12713     case LD2H_z_p_bi_contiguous:
  12714     case LD2W_z_p_bi_contiguous:
  12715     case LD3B_z_p_bi_contiguous:
  12716     case LD3D_z_p_bi_contiguous:
  12717     case LD3H_z_p_bi_contiguous:
  12718     case LD3W_z_p_bi_contiguous:
  12719     case LD4B_z_p_bi_contiguous:
  12720     case LD4D_z_p_bi_contiguous:
  12721     case LD4H_z_p_bi_contiguous:
  12722     case LD4W_z_p_bi_contiguous: {
  12723       int vl = GetVectorLengthInBytes();
  12724       int msz = instr->ExtractBits(24, 23);
  12725       int reg_count = instr->ExtractBits(22, 21) + 1;
  12726       uint64_t offset = instr->ExtractSignedBits(19, 16) * vl * reg_count;
  12727       LogicSVEAddressVector addr(
  12728           ReadXRegister(instr->GetRn(), Reg31IsStackPointer) + offset);
  12729       addr.SetMsizeInBytesLog2(msz);
  12730       addr.SetRegCount(reg_count);
  12731       SVEStructuredLoadHelper(SVEFormatFromLaneSizeInBytesLog2(msz),
  12732                               ReadPRegister(instr->GetPgLow8()),
  12733                               instr->GetRt(),
  12734                               addr);
  12735       break;
  12736     }
  12737     default:
  12738       VIXL_UNIMPLEMENTED();
  12739       break;
  12740   }
  12741 }
  12742 
  12743 void Simulator::VisitSVELoadMultipleStructures_ScalarPlusScalar(
  12744     const Instruction* instr) {
  12745   switch (instr->Mask(SVELoadMultipleStructures_ScalarPlusScalarMask)) {
  12746     case LD2B_z_p_br_contiguous:
  12747     case LD2D_z_p_br_contiguous:
  12748     case LD2H_z_p_br_contiguous:
  12749     case LD2W_z_p_br_contiguous:
  12750     case LD3B_z_p_br_contiguous:
  12751     case LD3D_z_p_br_contiguous:
  12752     case LD3H_z_p_br_contiguous:
  12753     case LD3W_z_p_br_contiguous:
  12754     case LD4B_z_p_br_contiguous:
  12755     case LD4D_z_p_br_contiguous:
  12756     case LD4H_z_p_br_contiguous:
  12757     case LD4W_z_p_br_contiguous: {
  12758       int msz = instr->ExtractBits(24, 23);
  12759       uint64_t offset = ReadXRegister(instr->GetRm()) * (1 << msz);
  12760       VectorFormat vform = SVEFormatFromLaneSizeInBytesLog2(msz);
  12761       LogicSVEAddressVector addr(
  12762           ReadXRegister(instr->GetRn(), Reg31IsStackPointer) + offset);
  12763       addr.SetMsizeInBytesLog2(msz);
  12764       addr.SetRegCount(instr->ExtractBits(22, 21) + 1);
  12765       SVEStructuredLoadHelper(vform,
  12766                               ReadPRegister(instr->GetPgLow8()),
  12767                               instr->GetRt(),
  12768                               addr,
  12769                               false);
  12770       break;
  12771     }
  12772     default:
  12773       VIXL_UNIMPLEMENTED();
  12774       break;
  12775   }
  12776 }
  12777 
  12778 void Simulator::VisitSVE32BitScatterStore_ScalarPlus32BitScaledOffsets(
  12779     const Instruction* instr) {
  12780   switch (instr->Mask(SVE32BitScatterStore_ScalarPlus32BitScaledOffsetsMask)) {
  12781     case ST1H_z_p_bz_s_x32_scaled:
  12782     case ST1W_z_p_bz_s_x32_scaled: {
  12783       unsigned msize_in_bytes_log2 = instr->GetSVEMsizeFromDtype(false);
  12784       VIXL_ASSERT(kDRegSizeInBytesLog2 >= msize_in_bytes_log2);
  12785       int scale = instr->ExtractBit(21) * msize_in_bytes_log2;
  12786       uint64_t base = ReadXRegister(instr->GetRn(), Reg31IsStackPointer);
  12787       SVEOffsetModifier mod =
  12788           (instr->ExtractBit(14) == 1) ? SVE_SXTW : SVE_UXTW;
  12789       LogicSVEAddressVector addr(base,
  12790                                  &ReadVRegister(instr->GetRm()),
  12791                                  kFormatVnS,
  12792                                  mod,
  12793                                  scale);
  12794       addr.SetMsizeInBytesLog2(msize_in_bytes_log2);
  12795       SVEStructuredStoreHelper(kFormatVnS,
  12796                                ReadPRegister(instr->GetPgLow8()),
  12797                                instr->GetRt(),
  12798                                addr);
  12799       break;
  12800     }
  12801     default:
  12802       VIXL_UNIMPLEMENTED();
  12803       break;
  12804   }
  12805 }
  12806 
  12807 void Simulator::VisitSVE32BitScatterStore_ScalarPlus32BitUnscaledOffsets(
  12808     const Instruction* instr) {
  12809   switch (
  12810       instr->Mask(SVE32BitScatterStore_ScalarPlus32BitUnscaledOffsetsMask)) {
  12811     case ST1B_z_p_bz_s_x32_unscaled:
  12812     case ST1H_z_p_bz_s_x32_unscaled:
  12813     case ST1W_z_p_bz_s_x32_unscaled: {
  12814       unsigned msize_in_bytes_log2 = instr->GetSVEMsizeFromDtype(false);
  12815       VIXL_ASSERT(kDRegSizeInBytesLog2 >= msize_in_bytes_log2);
  12816       uint64_t base = ReadXRegister(instr->GetRn(), Reg31IsStackPointer);
  12817       SVEOffsetModifier mod =
  12818           (instr->ExtractBit(14) == 1) ? SVE_SXTW : SVE_UXTW;
  12819       LogicSVEAddressVector addr(base,
  12820                                  &ReadVRegister(instr->GetRm()),
  12821                                  kFormatVnS,
  12822                                  mod);
  12823       addr.SetMsizeInBytesLog2(msize_in_bytes_log2);
  12824       SVEStructuredStoreHelper(kFormatVnS,
  12825                                ReadPRegister(instr->GetPgLow8()),
  12826                                instr->GetRt(),
  12827                                addr);
  12828       break;
  12829     }
  12830     default:
  12831       VIXL_UNIMPLEMENTED();
  12832       break;
  12833   }
  12834 }
  12835 
  12836 void Simulator::VisitSVE32BitScatterStore_VectorPlusImm(
  12837     const Instruction* instr) {
  12838   int msz = 0;
  12839   switch (instr->Mask(SVE32BitScatterStore_VectorPlusImmMask)) {
  12840     case ST1B_z_p_ai_s:
  12841       msz = 0;
  12842       break;
  12843     case ST1H_z_p_ai_s:
  12844       msz = 1;
  12845       break;
  12846     case ST1W_z_p_ai_s:
  12847       msz = 2;
  12848       break;
  12849     default:
  12850       VIXL_UNIMPLEMENTED();
  12851       break;
  12852   }
  12853   uint64_t imm = instr->ExtractBits(20, 16) << msz;
  12854   LogicSVEAddressVector addr(imm, &ReadVRegister(instr->GetRn()), kFormatVnS);
  12855   addr.SetMsizeInBytesLog2(msz);
  12856   SVEStructuredStoreHelper(kFormatVnS,
  12857                            ReadPRegister(instr->GetPgLow8()),
  12858                            instr->GetRt(),
  12859                            addr);
  12860 }
  12861 
  12862 void Simulator::VisitSVE64BitScatterStore_ScalarPlus64BitScaledOffsets(
  12863     const Instruction* instr) {
  12864   switch (instr->Mask(SVE64BitScatterStore_ScalarPlus64BitScaledOffsetsMask)) {
  12865     case ST1D_z_p_bz_d_64_scaled:
  12866     case ST1H_z_p_bz_d_64_scaled:
  12867     case ST1W_z_p_bz_d_64_scaled: {
  12868       unsigned msize_in_bytes_log2 = instr->GetSVEMsizeFromDtype(false);
  12869       VIXL_ASSERT(kDRegSizeInBytesLog2 >= msize_in_bytes_log2);
  12870       int scale = instr->ExtractBit(21) * msize_in_bytes_log2;
  12871       uint64_t base = ReadXRegister(instr->GetRn(), Reg31IsStackPointer);
  12872       LogicSVEAddressVector addr(base,
  12873                                  &ReadVRegister(instr->GetRm()),
  12874                                  kFormatVnD,
  12875                                  SVE_LSL,
  12876                                  scale);
  12877       addr.SetMsizeInBytesLog2(msize_in_bytes_log2);
  12878       SVEStructuredStoreHelper(kFormatVnD,
  12879                                ReadPRegister(instr->GetPgLow8()),
  12880                                instr->GetRt(),
  12881                                addr);
  12882       break;
  12883     }
  12884     default:
  12885       VIXL_UNIMPLEMENTED();
  12886       break;
  12887   }
  12888 }
  12889 
  12890 void Simulator::VisitSVE64BitScatterStore_ScalarPlus64BitUnscaledOffsets(
  12891     const Instruction* instr) {
  12892   switch (
  12893       instr->Mask(SVE64BitScatterStore_ScalarPlus64BitUnscaledOffsetsMask)) {
  12894     case ST1B_z_p_bz_d_64_unscaled:
  12895     case ST1D_z_p_bz_d_64_unscaled:
  12896     case ST1H_z_p_bz_d_64_unscaled:
  12897     case ST1W_z_p_bz_d_64_unscaled: {
  12898       unsigned msize_in_bytes_log2 = instr->GetSVEMsizeFromDtype(false);
  12899       VIXL_ASSERT(kDRegSizeInBytesLog2 >= msize_in_bytes_log2);
  12900       uint64_t base = ReadXRegister(instr->GetRn(), Reg31IsStackPointer);
  12901       LogicSVEAddressVector addr(base,
  12902                                  &ReadVRegister(instr->GetRm()),
  12903                                  kFormatVnD,
  12904                                  NO_SVE_OFFSET_MODIFIER);
  12905       addr.SetMsizeInBytesLog2(msize_in_bytes_log2);
  12906       SVEStructuredStoreHelper(kFormatVnD,
  12907                                ReadPRegister(instr->GetPgLow8()),
  12908                                instr->GetRt(),
  12909                                addr);
  12910       break;
  12911     }
  12912     default:
  12913       VIXL_UNIMPLEMENTED();
  12914       break;
  12915   }
  12916 }
  12917 
  12918 void Simulator::VisitSVE64BitScatterStore_ScalarPlusUnpacked32BitScaledOffsets(
  12919     const Instruction* instr) {
  12920   switch (instr->Mask(
  12921       SVE64BitScatterStore_ScalarPlusUnpacked32BitScaledOffsetsMask)) {
  12922     case ST1D_z_p_bz_d_x32_scaled:
  12923     case ST1H_z_p_bz_d_x32_scaled:
  12924     case ST1W_z_p_bz_d_x32_scaled: {
  12925       unsigned msize_in_bytes_log2 = instr->GetSVEMsizeFromDtype(false);
  12926       VIXL_ASSERT(kDRegSizeInBytesLog2 >= msize_in_bytes_log2);
  12927       int scale = instr->ExtractBit(21) * msize_in_bytes_log2;
  12928       uint64_t base = ReadXRegister(instr->GetRn(), Reg31IsStackPointer);
  12929       SVEOffsetModifier mod =
  12930           (instr->ExtractBit(14) == 1) ? SVE_SXTW : SVE_UXTW;
  12931       LogicSVEAddressVector addr(base,
  12932                                  &ReadVRegister(instr->GetRm()),
  12933                                  kFormatVnD,
  12934                                  mod,
  12935                                  scale);
  12936       addr.SetMsizeInBytesLog2(msize_in_bytes_log2);
  12937       SVEStructuredStoreHelper(kFormatVnD,
  12938                                ReadPRegister(instr->GetPgLow8()),
  12939                                instr->GetRt(),
  12940                                addr);
  12941       break;
  12942     }
  12943     default:
  12944       VIXL_UNIMPLEMENTED();
  12945       break;
  12946   }
  12947 }
  12948 
  12949 void Simulator::
  12950     VisitSVE64BitScatterStore_ScalarPlusUnpacked32BitUnscaledOffsets(
  12951         const Instruction* instr) {
  12952   switch (instr->Mask(
  12953       SVE64BitScatterStore_ScalarPlusUnpacked32BitUnscaledOffsetsMask)) {
  12954     case ST1B_z_p_bz_d_x32_unscaled:
  12955     case ST1D_z_p_bz_d_x32_unscaled:
  12956     case ST1H_z_p_bz_d_x32_unscaled:
  12957     case ST1W_z_p_bz_d_x32_unscaled: {
  12958       unsigned msize_in_bytes_log2 = instr->GetSVEMsizeFromDtype(false);
  12959       VIXL_ASSERT(kDRegSizeInBytesLog2 >= msize_in_bytes_log2);
  12960       uint64_t base = ReadXRegister(instr->GetRn(), Reg31IsStackPointer);
  12961       SVEOffsetModifier mod =
  12962           (instr->ExtractBit(14) == 1) ? SVE_SXTW : SVE_UXTW;
  12963       LogicSVEAddressVector addr(base,
  12964                                  &ReadVRegister(instr->GetRm()),
  12965                                  kFormatVnD,
  12966                                  mod);
  12967       addr.SetMsizeInBytesLog2(msize_in_bytes_log2);
  12968       SVEStructuredStoreHelper(kFormatVnD,
  12969                                ReadPRegister(instr->GetPgLow8()),
  12970                                instr->GetRt(),
  12971                                addr);
  12972       break;
  12973     }
  12974     default:
  12975       VIXL_UNIMPLEMENTED();
  12976       break;
  12977   }
  12978 }
  12979 
  12980 void Simulator::VisitSVE64BitScatterStore_VectorPlusImm(
  12981     const Instruction* instr) {
  12982   int msz = 0;
  12983   switch (instr->Mask(SVE64BitScatterStore_VectorPlusImmMask)) {
  12984     case ST1B_z_p_ai_d:
  12985       msz = 0;
  12986       break;
  12987     case ST1D_z_p_ai_d:
  12988       msz = 3;
  12989       break;
  12990     case ST1H_z_p_ai_d:
  12991       msz = 1;
  12992       break;
  12993     case ST1W_z_p_ai_d:
  12994       msz = 2;
  12995       break;
  12996     default:
  12997       VIXL_UNIMPLEMENTED();
  12998       break;
  12999   }
  13000   uint64_t imm = instr->ExtractBits(20, 16) << msz;
  13001   LogicSVEAddressVector addr(imm, &ReadVRegister(instr->GetRn()), kFormatVnD);
  13002   addr.SetMsizeInBytesLog2(msz);
  13003   SVEStructuredStoreHelper(kFormatVnD,
  13004                            ReadPRegister(instr->GetPgLow8()),
  13005                            instr->GetRt(),
  13006                            addr);
  13007 }
  13008 
  13009 void Simulator::VisitSVEContiguousNonTemporalStore_ScalarPlusImm(
  13010     const Instruction* instr) {
  13011   SimPRegister& pg = ReadPRegister(instr->GetPgLow8());
  13012   VectorFormat vform = kFormatUndefined;
  13013 
  13014   switch (instr->Mask(SVEContiguousNonTemporalStore_ScalarPlusImmMask)) {
  13015     case STNT1B_z_p_bi_contiguous:
  13016       vform = kFormatVnB;
  13017       break;
  13018     case STNT1D_z_p_bi_contiguous:
  13019       vform = kFormatVnD;
  13020       break;
  13021     case STNT1H_z_p_bi_contiguous:
  13022       vform = kFormatVnH;
  13023       break;
  13024     case STNT1W_z_p_bi_contiguous:
  13025       vform = kFormatVnS;
  13026       break;
  13027     default:
  13028       VIXL_UNIMPLEMENTED();
  13029       break;
  13030   }
  13031   int msize_in_bytes_log2 = LaneSizeInBytesLog2FromFormat(vform);
  13032   int vl = GetVectorLengthInBytes();
  13033   uint64_t base = ReadXRegister(instr->GetRn(), Reg31IsStackPointer);
  13034   uint64_t offset = instr->ExtractSignedBits(19, 16) * vl;
  13035   LogicSVEAddressVector addr(base + offset);
  13036   addr.SetMsizeInBytesLog2(msize_in_bytes_log2);
  13037   SVEStructuredStoreHelper(vform, pg, instr->GetRt(), addr);
  13038 }
  13039 
  13040 void Simulator::VisitSVEContiguousNonTemporalStore_ScalarPlusScalar(
  13041     const Instruction* instr) {
  13042   SimPRegister& pg = ReadPRegister(instr->GetPgLow8());
  13043   VectorFormat vform = kFormatUndefined;
  13044 
  13045   switch (instr->Mask(SVEContiguousNonTemporalStore_ScalarPlusScalarMask)) {
  13046     case STNT1B_z_p_br_contiguous:
  13047       vform = kFormatVnB;
  13048       break;
  13049     case STNT1D_z_p_br_contiguous:
  13050       vform = kFormatVnD;
  13051       break;
  13052     case STNT1H_z_p_br_contiguous:
  13053       vform = kFormatVnH;
  13054       break;
  13055     case STNT1W_z_p_br_contiguous:
  13056       vform = kFormatVnS;
  13057       break;
  13058     default:
  13059       VIXL_UNIMPLEMENTED();
  13060       break;
  13061   }
  13062   int msize_in_bytes_log2 = LaneSizeInBytesLog2FromFormat(vform);
  13063   uint64_t base = ReadXRegister(instr->GetRn(), Reg31IsStackPointer);
  13064   uint64_t offset = ReadXRegister(instr->GetRm()) << msize_in_bytes_log2;
  13065   LogicSVEAddressVector addr(base + offset);
  13066   addr.SetMsizeInBytesLog2(msize_in_bytes_log2);
  13067   SVEStructuredStoreHelper(vform, pg, instr->GetRt(), addr);
  13068 }
  13069 
  13070 void Simulator::VisitSVEContiguousStore_ScalarPlusImm(
  13071     const Instruction* instr) {
  13072   switch (instr->Mask(SVEContiguousStore_ScalarPlusImmMask)) {
  13073     case ST1B_z_p_bi:
  13074     case ST1D_z_p_bi:
  13075     case ST1H_z_p_bi:
  13076     case ST1W_z_p_bi: {
  13077       int vl = GetVectorLengthInBytes();
  13078       int msize_in_bytes_log2 = instr->GetSVEMsizeFromDtype(false);
  13079       int esize_in_bytes_log2 = instr->GetSVEEsizeFromDtype(false);
  13080       VIXL_ASSERT(esize_in_bytes_log2 >= msize_in_bytes_log2);
  13081       int vl_divisor_log2 = esize_in_bytes_log2 - msize_in_bytes_log2;
  13082       uint64_t base = ReadXRegister(instr->GetRn(), Reg31IsStackPointer);
  13083       uint64_t offset =
  13084           (instr->ExtractSignedBits(19, 16) * vl) / (1 << vl_divisor_log2);
  13085       VectorFormat vform =
  13086           SVEFormatFromLaneSizeInBytesLog2(esize_in_bytes_log2);
  13087       LogicSVEAddressVector addr(base + offset);
  13088       addr.SetMsizeInBytesLog2(msize_in_bytes_log2);
  13089       SVEStructuredStoreHelper(vform,
  13090                                ReadPRegister(instr->GetPgLow8()),
  13091                                instr->GetRt(),
  13092                                addr);
  13093       break;
  13094     }
  13095     default:
  13096       VIXL_UNIMPLEMENTED();
  13097       break;
  13098   }
  13099 }
  13100 
  13101 void Simulator::VisitSVEContiguousStore_ScalarPlusScalar(
  13102     const Instruction* instr) {
  13103   switch (instr->Mask(SVEContiguousStore_ScalarPlusScalarMask)) {
  13104     case ST1B_z_p_br:
  13105     case ST1D_z_p_br:
  13106     case ST1H_z_p_br:
  13107     case ST1W_z_p_br: {
  13108       uint64_t base = ReadXRegister(instr->GetRn(), Reg31IsStackPointer);
  13109       uint64_t offset = ReadXRegister(instr->GetRm());
  13110       offset <<= instr->ExtractBits(24, 23);
  13111       VectorFormat vform =
  13112           SVEFormatFromLaneSizeInBytesLog2(instr->ExtractBits(22, 21));
  13113       LogicSVEAddressVector addr(base + offset);
  13114       addr.SetMsizeInBytesLog2(instr->ExtractBits(24, 23));
  13115       SVEStructuredStoreHelper(vform,
  13116                                ReadPRegister(instr->GetPgLow8()),
  13117                                instr->GetRt(),
  13118                                addr);
  13119       break;
  13120     }
  13121     default:
  13122       VIXL_UNIMPLEMENTED();
  13123       break;
  13124   }
  13125 }
  13126 
  13127 void Simulator::VisitSVECopySIMDFPScalarRegisterToVector_Predicated(
  13128     const Instruction* instr) {
  13129   VectorFormat vform = instr->GetSVEVectorFormat();
  13130   SimPRegister& pg = ReadPRegister(instr->GetPgLow8());
  13131   SimVRegister z_result;
  13132 
  13133   switch (instr->Mask(SVECopySIMDFPScalarRegisterToVector_PredicatedMask)) {
  13134     case CPY_z_p_v:
  13135       dup_element(vform, z_result, ReadVRegister(instr->GetRn()), 0);
  13136       mov_merging(vform, ReadVRegister(instr->GetRd()), pg, z_result);
  13137       break;
  13138     default:
  13139       VIXL_UNIMPLEMENTED();
  13140       break;
  13141   }
  13142 }
  13143 
  13144 void Simulator::VisitSVEStoreMultipleStructures_ScalarPlusImm(
  13145     const Instruction* instr) {
  13146   switch (instr->Mask(SVEStoreMultipleStructures_ScalarPlusImmMask)) {
  13147     case ST2B_z_p_bi_contiguous:
  13148     case ST2D_z_p_bi_contiguous:
  13149     case ST2H_z_p_bi_contiguous:
  13150     case ST2W_z_p_bi_contiguous:
  13151     case ST3B_z_p_bi_contiguous:
  13152     case ST3D_z_p_bi_contiguous:
  13153     case ST3H_z_p_bi_contiguous:
  13154     case ST3W_z_p_bi_contiguous:
  13155     case ST4B_z_p_bi_contiguous:
  13156     case ST4D_z_p_bi_contiguous:
  13157     case ST4H_z_p_bi_contiguous:
  13158     case ST4W_z_p_bi_contiguous: {
  13159       int vl = GetVectorLengthInBytes();
  13160       int msz = instr->ExtractBits(24, 23);
  13161       int reg_count = instr->ExtractBits(22, 21) + 1;
  13162       uint64_t offset = instr->ExtractSignedBits(19, 16) * vl * reg_count;
  13163       LogicSVEAddressVector addr(
  13164           ReadXRegister(instr->GetRn(), Reg31IsStackPointer) + offset);
  13165       addr.SetMsizeInBytesLog2(msz);
  13166       addr.SetRegCount(reg_count);
  13167       SVEStructuredStoreHelper(SVEFormatFromLaneSizeInBytesLog2(msz),
  13168                                ReadPRegister(instr->GetPgLow8()),
  13169                                instr->GetRt(),
  13170                                addr);
  13171       break;
  13172     }
  13173     default:
  13174       VIXL_UNIMPLEMENTED();
  13175       break;
  13176   }
  13177 }
  13178 
  13179 void Simulator::VisitSVEStoreMultipleStructures_ScalarPlusScalar(
  13180     const Instruction* instr) {
  13181   switch (instr->Mask(SVEStoreMultipleStructures_ScalarPlusScalarMask)) {
  13182     case ST2B_z_p_br_contiguous:
  13183     case ST2D_z_p_br_contiguous:
  13184     case ST2H_z_p_br_contiguous:
  13185     case ST2W_z_p_br_contiguous:
  13186     case ST3B_z_p_br_contiguous:
  13187     case ST3D_z_p_br_contiguous:
  13188     case ST3H_z_p_br_contiguous:
  13189     case ST3W_z_p_br_contiguous:
  13190     case ST4B_z_p_br_contiguous:
  13191     case ST4D_z_p_br_contiguous:
  13192     case ST4H_z_p_br_contiguous:
  13193     case ST4W_z_p_br_contiguous: {
  13194       int msz = instr->ExtractBits(24, 23);
  13195       uint64_t offset = ReadXRegister(instr->GetRm()) * (1 << msz);
  13196       VectorFormat vform = SVEFormatFromLaneSizeInBytesLog2(msz);
  13197       LogicSVEAddressVector addr(
  13198           ReadXRegister(instr->GetRn(), Reg31IsStackPointer) + offset);
  13199       addr.SetMsizeInBytesLog2(msz);
  13200       addr.SetRegCount(instr->ExtractBits(22, 21) + 1);
  13201       SVEStructuredStoreHelper(vform,
  13202                                ReadPRegister(instr->GetPgLow8()),
  13203                                instr->GetRt(),
  13204                                addr);
  13205       break;
  13206     }
  13207     default:
  13208       VIXL_UNIMPLEMENTED();
  13209       break;
  13210   }
  13211 }
  13212 
  13213 void Simulator::VisitSVEStorePredicateRegister(const Instruction* instr) {
  13214   switch (instr->Mask(SVEStorePredicateRegisterMask)) {
  13215     case STR_p_bi: {
  13216       SimPRegister& pt = ReadPRegister(instr->GetPt());
  13217       int pl = GetPredicateLengthInBytes();
  13218       int imm9 = (instr->ExtractBits(21, 16) << 3) | instr->ExtractBits(12, 10);
  13219       uint64_t multiplier = ExtractSignedBitfield64(8, 0, imm9);
  13220       uint64_t base = ReadXRegister(instr->GetRn(), Reg31IsStackPointer);
  13221       uint64_t address = base + multiplier * pl;
  13222       for (int i = 0; i < pl; i++) {
  13223         if (!MemWrite(address + i, pt.GetLane<uint8_t>(i))) return;
  13224       }
  13225       LogPWrite(instr->GetPt(), address);
  13226       break;
  13227     }
  13228     default:
  13229       VIXL_UNIMPLEMENTED();
  13230       break;
  13231   }
  13232 }
  13233 
  13234 void Simulator::VisitSVEStoreVectorRegister(const Instruction* instr) {
  13235   switch (instr->Mask(SVEStoreVectorRegisterMask)) {
  13236     case STR_z_bi: {
  13237       SimVRegister& zt = ReadVRegister(instr->GetRt());
  13238       int vl = GetVectorLengthInBytes();
  13239       int imm9 = (instr->ExtractBits(21, 16) << 3) | instr->ExtractBits(12, 10);
  13240       uint64_t multiplier = ExtractSignedBitfield64(8, 0, imm9);
  13241       uint64_t base = ReadXRegister(instr->GetRn(), Reg31IsStackPointer);
  13242       uint64_t address = base + multiplier * vl;
  13243       for (int i = 0; i < vl; i++) {
  13244         if (!MemWrite(address + i, zt.GetLane<uint8_t>(i))) return;
  13245       }
  13246       LogZWrite(instr->GetRt(), address);
  13247       break;
  13248     }
  13249     default:
  13250       VIXL_UNIMPLEMENTED();
  13251       break;
  13252   }
  13253 }
  13254 
  13255 void Simulator::VisitSVEMulIndex(const Instruction* instr) {
  13256   VectorFormat vform = instr->GetSVEVectorFormat();
  13257   SimVRegister& zda = ReadVRegister(instr->GetRd());
  13258   SimVRegister& zn = ReadVRegister(instr->GetRn());
  13259   std::pair<int, int> zm_and_index = instr->GetSVEMulZmAndIndex();
  13260   SimVRegister zm = ReadVRegister(zm_and_index.first);
  13261   int index = zm_and_index.second;
  13262 
  13263   SimVRegister temp;
  13264   dup_elements_to_segments(vform, temp, zm, index);
  13265 
  13266   switch (form_hash_) {
  13267     case "sdot_z_zzzi_d"_h:
  13268     case "sdot_z_zzzi_s"_h:
  13269       sdot(vform, zda, zn, temp);
  13270       break;
  13271     case "udot_z_zzzi_d"_h:
  13272     case "udot_z_zzzi_s"_h:
  13273       udot(vform, zda, zn, temp);
  13274       break;
  13275     case "sudot_z_zzzi_s"_h:
  13276       usdot(vform, zda, temp, zn);
  13277       break;
  13278     case "usdot_z_zzzi_s"_h:
  13279       usdot(vform, zda, zn, temp);
  13280       break;
  13281     default:
  13282       VIXL_UNIMPLEMENTED();
  13283       break;
  13284   }
  13285 }
  13286 
  13287 void Simulator::SimulateMatrixMul(const Instruction* instr) {
  13288   VectorFormat vform = kFormatVnS;
  13289   SimVRegister& dn = ReadVRegister(instr->GetRd());
  13290   SimVRegister& n = ReadVRegister(instr->GetRn());
  13291   SimVRegister& m = ReadVRegister(instr->GetRm());
  13292 
  13293   bool n_signed = false;
  13294   bool m_signed = false;
  13295   switch (form_hash_) {
  13296     case "smmla_asimdsame2_g"_h:
  13297       vform = kFormat4S;
  13298       VIXL_FALLTHROUGH();
  13299     case "smmla_z_zzz"_h:
  13300       n_signed = m_signed = true;
  13301       break;
  13302     case "ummla_asimdsame2_g"_h:
  13303       vform = kFormat4S;
  13304       VIXL_FALLTHROUGH();
  13305     case "ummla_z_zzz"_h:
  13306       // Nothing to do.
  13307       break;
  13308     case "usmmla_asimdsame2_g"_h:
  13309       vform = kFormat4S;
  13310       VIXL_FALLTHROUGH();
  13311     case "usmmla_z_zzz"_h:
  13312       m_signed = true;
  13313       break;
  13314     default:
  13315       VIXL_UNIMPLEMENTED();
  13316       break;
  13317   }
  13318   matmul(vform, dn, n, m, n_signed, m_signed);
  13319 }
  13320 
  13321 void Simulator::SimulateSVEFPMatrixMul(const Instruction* instr) {
  13322   VectorFormat vform = instr->GetSVEVectorFormat();
  13323   SimVRegister& zdn = ReadVRegister(instr->GetRd());
  13324   SimVRegister& zn = ReadVRegister(instr->GetRn());
  13325   SimVRegister& zm = ReadVRegister(instr->GetRm());
  13326 
  13327   switch (form_hash_) {
  13328     case "fmmla_z_zzz_s"_h:
  13329     case "fmmla_z_zzz_d"_h:
  13330       fmatmul(vform, zdn, zn, zm);
  13331       break;
  13332     default:
  13333       VIXL_UNIMPLEMENTED();
  13334       break;
  13335   }
  13336 }
  13337 
  13338 void Simulator::VisitSVEPartitionBreakCondition(const Instruction* instr) {
  13339   SimPRegister& pd = ReadPRegister(instr->GetPd());
  13340   SimPRegister& pg = ReadPRegister(instr->ExtractBits(13, 10));
  13341   SimPRegister& pn = ReadPRegister(instr->GetPn());
  13342   SimPRegister result;
  13343 
  13344   switch (instr->Mask(SVEPartitionBreakConditionMask)) {
  13345     case BRKAS_p_p_p_z:
  13346     case BRKA_p_p_p:
  13347       brka(result, pg, pn);
  13348       break;
  13349     case BRKBS_p_p_p_z:
  13350     case BRKB_p_p_p:
  13351       brkb(result, pg, pn);
  13352       break;
  13353     default:
  13354       VIXL_UNIMPLEMENTED();
  13355       break;
  13356   }
  13357 
  13358   if (instr->ExtractBit(4) == 1) {
  13359     mov_merging(pd, pg, result);
  13360   } else {
  13361     mov_zeroing(pd, pg, result);
  13362   }
  13363 
  13364   // Set flag if needed.
  13365   if (instr->ExtractBit(22) == 1) {
  13366     PredTest(kFormatVnB, pg, pd);
  13367   }
  13368 }
  13369 
  13370 void Simulator::VisitSVEPropagateBreakToNextPartition(
  13371     const Instruction* instr) {
  13372   SimPRegister& pdm = ReadPRegister(instr->GetPd());
  13373   SimPRegister& pg = ReadPRegister(instr->ExtractBits(13, 10));
  13374   SimPRegister& pn = ReadPRegister(instr->GetPn());
  13375 
  13376   switch (instr->Mask(SVEPropagateBreakToNextPartitionMask)) {
  13377     case BRKNS_p_p_pp:
  13378     case BRKN_p_p_pp:
  13379       brkn(pdm, pg, pn);
  13380       break;
  13381     default:
  13382       VIXL_UNIMPLEMENTED();
  13383       break;
  13384   }
  13385 
  13386   // Set flag if needed.
  13387   if (instr->ExtractBit(22) == 1) {
  13388     // Note that this ignores `pg`.
  13389     PredTest(kFormatVnB, GetPTrue(), pdm);
  13390   }
  13391 }
  13392 
  13393 void Simulator::VisitSVEUnpackPredicateElements(const Instruction* instr) {
  13394   SimPRegister& pd = ReadPRegister(instr->GetPd());
  13395   SimPRegister& pn = ReadPRegister(instr->GetPn());
  13396 
  13397   SimVRegister temp = Simulator::ExpandToSimVRegister(pn);
  13398   SimVRegister zero;
  13399   dup_immediate(kFormatVnB, zero, 0);
  13400 
  13401   switch (instr->Mask(SVEUnpackPredicateElementsMask)) {
  13402     case PUNPKHI_p_p:
  13403       zip2(kFormatVnB, temp, temp, zero);
  13404       break;
  13405     case PUNPKLO_p_p:
  13406       zip1(kFormatVnB, temp, temp, zero);
  13407       break;
  13408     default:
  13409       VIXL_UNIMPLEMENTED();
  13410       break;
  13411   }
  13412   Simulator::ExtractFromSimVRegister(kFormatVnB, pd, temp);
  13413 }
  13414 
  13415 void Simulator::VisitSVEPermutePredicateElements(const Instruction* instr) {
  13416   VectorFormat vform = instr->GetSVEVectorFormat();
  13417   SimPRegister& pd = ReadPRegister(instr->GetPd());
  13418   SimPRegister& pn = ReadPRegister(instr->GetPn());
  13419   SimPRegister& pm = ReadPRegister(instr->GetPm());
  13420 
  13421   SimVRegister temp0 = Simulator::ExpandToSimVRegister(pn);
  13422   SimVRegister temp1 = Simulator::ExpandToSimVRegister(pm);
  13423 
  13424   switch (instr->Mask(SVEPermutePredicateElementsMask)) {
  13425     case TRN1_p_pp:
  13426       trn1(vform, temp0, temp0, temp1);
  13427       break;
  13428     case TRN2_p_pp:
  13429       trn2(vform, temp0, temp0, temp1);
  13430       break;
  13431     case UZP1_p_pp:
  13432       uzp1(vform, temp0, temp0, temp1);
  13433       break;
  13434     case UZP2_p_pp:
  13435       uzp2(vform, temp0, temp0, temp1);
  13436       break;
  13437     case ZIP1_p_pp:
  13438       zip1(vform, temp0, temp0, temp1);
  13439       break;
  13440     case ZIP2_p_pp:
  13441       zip2(vform, temp0, temp0, temp1);
  13442       break;
  13443     default:
  13444       VIXL_UNIMPLEMENTED();
  13445       break;
  13446   }
  13447   Simulator::ExtractFromSimVRegister(kFormatVnB, pd, temp0);
  13448 }
  13449 
  13450 void Simulator::VisitSVEReversePredicateElements(const Instruction* instr) {
  13451   switch (instr->Mask(SVEReversePredicateElementsMask)) {
  13452     case REV_p_p: {
  13453       VectorFormat vform = instr->GetSVEVectorFormat();
  13454       SimPRegister& pn = ReadPRegister(instr->GetPn());
  13455       SimPRegister& pd = ReadPRegister(instr->GetPd());
  13456       SimVRegister temp = Simulator::ExpandToSimVRegister(pn);
  13457       rev(vform, temp, temp);
  13458       Simulator::ExtractFromSimVRegister(kFormatVnB, pd, temp);
  13459       break;
  13460     }
  13461     default:
  13462       VIXL_UNIMPLEMENTED();
  13463       break;
  13464   }
  13465 }
  13466 
  13467 void Simulator::VisitSVEPermuteVectorExtract(const Instruction* instr) {
  13468   SimVRegister& zdn = ReadVRegister(instr->GetRd());
  13469   // Second source register "Zm" is encoded where "Zn" would usually be.
  13470   SimVRegister& zm = ReadVRegister(instr->GetRn());
  13471 
  13472   int index = instr->GetSVEExtractImmediate();
  13473   int vl = GetVectorLengthInBytes();
  13474   index = (index >= vl) ? 0 : index;
  13475 
  13476   switch (instr->Mask(SVEPermuteVectorExtractMask)) {
  13477     case EXT_z_zi_des:
  13478       ext(kFormatVnB, zdn, zdn, zm, index);
  13479       break;
  13480     default:
  13481       VIXL_UNIMPLEMENTED();
  13482       break;
  13483   }
  13484 }
  13485 
  13486 void Simulator::VisitSVEPermuteVectorInterleaving(const Instruction* instr) {
  13487   VectorFormat vform = instr->GetSVEVectorFormat();
  13488   SimVRegister& zd = ReadVRegister(instr->GetRd());
  13489   SimVRegister& zn = ReadVRegister(instr->GetRn());
  13490   SimVRegister& zm = ReadVRegister(instr->GetRm());
  13491 
  13492   switch (instr->Mask(SVEPermuteVectorInterleavingMask)) {
  13493     case TRN1_z_zz:
  13494       trn1(vform, zd, zn, zm);
  13495       break;
  13496     case TRN2_z_zz:
  13497       trn2(vform, zd, zn, zm);
  13498       break;
  13499     case UZP1_z_zz:
  13500       uzp1(vform, zd, zn, zm);
  13501       break;
  13502     case UZP2_z_zz:
  13503       uzp2(vform, zd, zn, zm);
  13504       break;
  13505     case ZIP1_z_zz:
  13506       zip1(vform, zd, zn, zm);
  13507       break;
  13508     case ZIP2_z_zz:
  13509       zip2(vform, zd, zn, zm);
  13510       break;
  13511     default:
  13512       VIXL_UNIMPLEMENTED();
  13513       break;
  13514   }
  13515 }
  13516 
  13517 void Simulator::VisitSVEConditionallyBroadcastElementToVector(
  13518     const Instruction* instr) {
  13519   VectorFormat vform = instr->GetSVEVectorFormat();
  13520   SimVRegister& zdn = ReadVRegister(instr->GetRd());
  13521   SimVRegister& zm = ReadVRegister(instr->GetRn());
  13522   SimPRegister& pg = ReadPRegister(instr->GetPgLow8());
  13523 
  13524   int active_offset = -1;
  13525   switch (instr->Mask(SVEConditionallyBroadcastElementToVectorMask)) {
  13526     case CLASTA_z_p_zz:
  13527       active_offset = 1;
  13528       break;
  13529     case CLASTB_z_p_zz:
  13530       active_offset = 0;
  13531       break;
  13532     default:
  13533       VIXL_UNIMPLEMENTED();
  13534       break;
  13535   }
  13536 
  13537   if (active_offset >= 0) {
  13538     std::pair<bool, uint64_t> value = clast(vform, pg, zm, active_offset);
  13539     if (value.first) {
  13540       dup_immediate(vform, zdn, value.second);
  13541     } else {
  13542       // Trigger a line of trace for the operation, even though it doesn't
  13543       // change the register value.
  13544       mov(vform, zdn, zdn);
  13545     }
  13546   }
  13547 }
  13548 
  13549 void Simulator::VisitSVEConditionallyExtractElementToSIMDFPScalar(
  13550     const Instruction* instr) {
  13551   VectorFormat vform = instr->GetSVEVectorFormat();
  13552   SimVRegister& vdn = ReadVRegister(instr->GetRd());
  13553   SimVRegister& zm = ReadVRegister(instr->GetRn());
  13554   SimPRegister& pg = ReadPRegister(instr->GetPgLow8());
  13555 
  13556   int active_offset = -1;
  13557   switch (instr->Mask(SVEConditionallyExtractElementToSIMDFPScalarMask)) {
  13558     case CLASTA_v_p_z:
  13559       active_offset = 1;
  13560       break;
  13561     case CLASTB_v_p_z:
  13562       active_offset = 0;
  13563       break;
  13564     default:
  13565       VIXL_UNIMPLEMENTED();
  13566       break;
  13567   }
  13568 
  13569   if (active_offset >= 0) {
  13570     LogicVRegister dst(vdn);
  13571     uint64_t src1_value = dst.Uint(vform, 0);
  13572     std::pair<bool, uint64_t> src2_value = clast(vform, pg, zm, active_offset);
  13573     dup_immediate(vform, vdn, 0);
  13574     dst.SetUint(vform, 0, src2_value.first ? src2_value.second : src1_value);
  13575   }
  13576 }
  13577 
  13578 void Simulator::VisitSVEConditionallyExtractElementToGeneralRegister(
  13579     const Instruction* instr) {
  13580   VectorFormat vform = instr->GetSVEVectorFormat();
  13581   SimVRegister& zm = ReadVRegister(instr->GetRn());
  13582   SimPRegister& pg = ReadPRegister(instr->GetPgLow8());
  13583 
  13584   int active_offset = -1;
  13585   switch (instr->Mask(SVEConditionallyExtractElementToGeneralRegisterMask)) {
  13586     case CLASTA_r_p_z:
  13587       active_offset = 1;
  13588       break;
  13589     case CLASTB_r_p_z:
  13590       active_offset = 0;
  13591       break;
  13592     default:
  13593       VIXL_UNIMPLEMENTED();
  13594       break;
  13595   }
  13596 
  13597   if (active_offset >= 0) {
  13598     std::pair<bool, uint64_t> value = clast(vform, pg, zm, active_offset);
  13599     uint64_t masked_src = ReadXRegister(instr->GetRd()) &
  13600                           GetUintMask(LaneSizeInBitsFromFormat(vform));
  13601     WriteXRegister(instr->GetRd(), value.first ? value.second : masked_src);
  13602   }
  13603 }
  13604 
  13605 void Simulator::VisitSVEExtractElementToSIMDFPScalarRegister(
  13606     const Instruction* instr) {
  13607   VectorFormat vform = instr->GetSVEVectorFormat();
  13608   SimVRegister& vdn = ReadVRegister(instr->GetRd());
  13609   SimVRegister& zm = ReadVRegister(instr->GetRn());
  13610   SimPRegister& pg = ReadPRegister(instr->GetPgLow8());
  13611 
  13612   int active_offset = -1;
  13613   switch (instr->Mask(SVEExtractElementToSIMDFPScalarRegisterMask)) {
  13614     case LASTA_v_p_z:
  13615       active_offset = 1;
  13616       break;
  13617     case LASTB_v_p_z:
  13618       active_offset = 0;
  13619       break;
  13620     default:
  13621       VIXL_UNIMPLEMENTED();
  13622       break;
  13623   }
  13624 
  13625   if (active_offset >= 0) {
  13626     LogicVRegister dst(vdn);
  13627     std::pair<bool, uint64_t> value = clast(vform, pg, zm, active_offset);
  13628     dup_immediate(vform, vdn, 0);
  13629     dst.SetUint(vform, 0, value.second);
  13630   }
  13631 }
  13632 
  13633 void Simulator::VisitSVEExtractElementToGeneralRegister(
  13634     const Instruction* instr) {
  13635   VectorFormat vform = instr->GetSVEVectorFormat();
  13636   SimVRegister& zm = ReadVRegister(instr->GetRn());
  13637   SimPRegister& pg = ReadPRegister(instr->GetPgLow8());
  13638 
  13639   int active_offset = -1;
  13640   switch (instr->Mask(SVEExtractElementToGeneralRegisterMask)) {
  13641     case LASTA_r_p_z:
  13642       active_offset = 1;
  13643       break;
  13644     case LASTB_r_p_z:
  13645       active_offset = 0;
  13646       break;
  13647     default:
  13648       VIXL_UNIMPLEMENTED();
  13649       break;
  13650   }
  13651 
  13652   if (active_offset >= 0) {
  13653     std::pair<bool, uint64_t> value = clast(vform, pg, zm, active_offset);
  13654     WriteXRegister(instr->GetRd(), value.second);
  13655   }
  13656 }
  13657 
  13658 void Simulator::VisitSVECompressActiveElements(const Instruction* instr) {
  13659   VectorFormat vform = instr->GetSVEVectorFormat();
  13660   SimVRegister& zd = ReadVRegister(instr->GetRd());
  13661   SimVRegister& zn = ReadVRegister(instr->GetRn());
  13662   SimPRegister& pg = ReadPRegister(instr->GetPgLow8());
  13663 
  13664   switch (instr->Mask(SVECompressActiveElementsMask)) {
  13665     case COMPACT_z_p_z:
  13666       compact(vform, zd, pg, zn);
  13667       break;
  13668     default:
  13669       VIXL_UNIMPLEMENTED();
  13670       break;
  13671   }
  13672 }
  13673 
  13674 void Simulator::VisitSVECopyGeneralRegisterToVector_Predicated(
  13675     const Instruction* instr) {
  13676   VectorFormat vform = instr->GetSVEVectorFormat();
  13677   SimPRegister& pg = ReadPRegister(instr->GetPgLow8());
  13678   SimVRegister z_result;
  13679 
  13680   switch (instr->Mask(SVECopyGeneralRegisterToVector_PredicatedMask)) {
  13681     case CPY_z_p_r:
  13682       dup_immediate(vform,
  13683                     z_result,
  13684                     ReadXRegister(instr->GetRn(), Reg31IsStackPointer));
  13685       mov_merging(vform, ReadVRegister(instr->GetRd()), pg, z_result);
  13686       break;
  13687     default:
  13688       VIXL_UNIMPLEMENTED();
  13689       break;
  13690   }
  13691 }
  13692 
  13693 void Simulator::VisitSVECopyIntImm_Predicated(const Instruction* instr) {
  13694   VectorFormat vform = instr->GetSVEVectorFormat();
  13695   SimPRegister& pg = ReadPRegister(instr->ExtractBits(19, 16));
  13696   SimVRegister& zd = ReadVRegister(instr->GetRd());
  13697 
  13698   SimVRegister result;
  13699   switch (instr->Mask(SVECopyIntImm_PredicatedMask)) {
  13700     case CPY_z_p_i: {
  13701       // Use unsigned arithmetic to avoid undefined behaviour during the shift.
  13702       uint64_t imm8 = instr->GetImmSVEIntWideSigned();
  13703       dup_immediate(vform, result, imm8 << (instr->ExtractBit(13) * 8));
  13704       break;
  13705     }
  13706     default:
  13707       VIXL_UNIMPLEMENTED();
  13708       break;
  13709   }
  13710 
  13711   if (instr->ExtractBit(14) != 0) {
  13712     mov_merging(vform, zd, pg, result);
  13713   } else {
  13714     mov_zeroing(vform, zd, pg, result);
  13715   }
  13716 }
  13717 
  13718 void Simulator::VisitSVEReverseWithinElements(const Instruction* instr) {
  13719   SimVRegister& zd = ReadVRegister(instr->GetRd());
  13720   SimVRegister& zn = ReadVRegister(instr->GetRn());
  13721   SimPRegister& pg = ReadPRegister(instr->GetPgLow8());
  13722   SimVRegister result;
  13723 
  13724   // In NEON, the chunk size in which elements are REVersed is in the
  13725   // instruction mnemonic, and the element size attached to the register.
  13726   // SVE reverses the semantics; the mapping to logic functions below is to
  13727   // account for this.
  13728   VectorFormat chunk_form = instr->GetSVEVectorFormat();
  13729   VectorFormat element_form = kFormatUndefined;
  13730 
  13731   switch (instr->Mask(SVEReverseWithinElementsMask)) {
  13732     case RBIT_z_p_z:
  13733       rbit(chunk_form, result, zn);
  13734       break;
  13735     case REVB_z_z:
  13736       VIXL_ASSERT((chunk_form == kFormatVnH) || (chunk_form == kFormatVnS) ||
  13737                   (chunk_form == kFormatVnD));
  13738       element_form = kFormatVnB;
  13739       break;
  13740     case REVH_z_z:
  13741       VIXL_ASSERT((chunk_form == kFormatVnS) || (chunk_form == kFormatVnD));
  13742       element_form = kFormatVnH;
  13743       break;
  13744     case REVW_z_z:
  13745       VIXL_ASSERT(chunk_form == kFormatVnD);
  13746       element_form = kFormatVnS;
  13747       break;
  13748     default:
  13749       VIXL_UNIMPLEMENTED();
  13750       break;
  13751   }
  13752 
  13753   if (instr->Mask(SVEReverseWithinElementsMask) != RBIT_z_p_z) {
  13754     VIXL_ASSERT(element_form != kFormatUndefined);
  13755     switch (chunk_form) {
  13756       case kFormatVnH:
  13757         rev16(element_form, result, zn);
  13758         break;
  13759       case kFormatVnS:
  13760         rev32(element_form, result, zn);
  13761         break;
  13762       case kFormatVnD:
  13763         rev64(element_form, result, zn);
  13764         break;
  13765       default:
  13766         VIXL_UNIMPLEMENTED();
  13767     }
  13768   }
  13769 
  13770   mov_merging(chunk_form, zd, pg, result);
  13771 }
  13772 
  13773 void Simulator::VisitSVEVectorSplice(const Instruction* instr) {
  13774   VectorFormat vform = instr->GetSVEVectorFormat();
  13775   SimVRegister& zd = ReadVRegister(instr->GetRd());
  13776   SimVRegister& zn = ReadVRegister(instr->GetRn());
  13777   SimVRegister& zn2 = ReadVRegister((instr->GetRn() + 1) % kNumberOfZRegisters);
  13778   SimPRegister& pg = ReadPRegister(instr->GetPgLow8());
  13779 
  13780   switch (form_hash_) {
  13781     case "splice_z_p_zz_des"_h:
  13782       splice(vform, zd, pg, zd, zn);
  13783       break;
  13784     case "splice_z_p_zz_con"_h:
  13785       splice(vform, zd, pg, zn, zn2);
  13786       break;
  13787     default:
  13788       VIXL_UNIMPLEMENTED();
  13789       break;
  13790   }
  13791 }
  13792 
  13793 void Simulator::VisitSVEBroadcastGeneralRegister(const Instruction* instr) {
  13794   SimVRegister& zd = ReadVRegister(instr->GetRd());
  13795   switch (instr->Mask(SVEBroadcastGeneralRegisterMask)) {
  13796     case DUP_z_r:
  13797       dup_immediate(instr->GetSVEVectorFormat(),
  13798                     zd,
  13799                     ReadXRegister(instr->GetRn(), Reg31IsStackPointer));
  13800       break;
  13801     default:
  13802       VIXL_UNIMPLEMENTED();
  13803       break;
  13804   }
  13805 }
  13806 
  13807 void Simulator::VisitSVEInsertSIMDFPScalarRegister(const Instruction* instr) {
  13808   SimVRegister& zd = ReadVRegister(instr->GetRd());
  13809   VectorFormat vform = instr->GetSVEVectorFormat();
  13810   switch (instr->Mask(SVEInsertSIMDFPScalarRegisterMask)) {
  13811     case INSR_z_v:
  13812       insr(vform, zd, ReadDRegisterBits(instr->GetRn()));
  13813       break;
  13814     default:
  13815       VIXL_UNIMPLEMENTED();
  13816       break;
  13817   }
  13818 }
  13819 
  13820 void Simulator::VisitSVEInsertGeneralRegister(const Instruction* instr) {
  13821   SimVRegister& zd = ReadVRegister(instr->GetRd());
  13822   VectorFormat vform = instr->GetSVEVectorFormat();
  13823   switch (instr->Mask(SVEInsertGeneralRegisterMask)) {
  13824     case INSR_z_r:
  13825       insr(vform, zd, ReadXRegister(instr->GetRn()));
  13826       break;
  13827     default:
  13828       VIXL_UNIMPLEMENTED();
  13829       break;
  13830   }
  13831 }
  13832 
  13833 void Simulator::VisitSVEBroadcastIndexElement(const Instruction* instr) {
  13834   SimVRegister& zd = ReadVRegister(instr->GetRd());
  13835   switch (instr->Mask(SVEBroadcastIndexElementMask)) {
  13836     case DUP_z_zi: {
  13837       std::pair<int, int> index_and_lane_size =
  13838           instr->GetSVEPermuteIndexAndLaneSizeLog2();
  13839       int index = index_and_lane_size.first;
  13840       int lane_size_in_bytes_log_2 = index_and_lane_size.second;
  13841       VectorFormat vform =
  13842           SVEFormatFromLaneSizeInBytesLog2(lane_size_in_bytes_log_2);
  13843       if ((index < 0) || (index >= LaneCountFromFormat(vform))) {
  13844         // Out of bounds, set the destination register to zero.
  13845         dup_immediate(kFormatVnD, zd, 0);
  13846       } else {
  13847         dup_element(vform, zd, ReadVRegister(instr->GetRn()), index);
  13848       }
  13849       return;
  13850     }
  13851     default:
  13852       VIXL_UNIMPLEMENTED();
  13853       break;
  13854   }
  13855 }
  13856 
  13857 void Simulator::VisitSVEReverseVectorElements(const Instruction* instr) {
  13858   SimVRegister& zd = ReadVRegister(instr->GetRd());
  13859   VectorFormat vform = instr->GetSVEVectorFormat();
  13860   switch (instr->Mask(SVEReverseVectorElementsMask)) {
  13861     case REV_z_z:
  13862       rev(vform, zd, ReadVRegister(instr->GetRn()));
  13863       break;
  13864     default:
  13865       VIXL_UNIMPLEMENTED();
  13866       break;
  13867   }
  13868 }
  13869 
  13870 void Simulator::VisitSVEUnpackVectorElements(const Instruction* instr) {
  13871   SimVRegister& zd = ReadVRegister(instr->GetRd());
  13872   VectorFormat vform = instr->GetSVEVectorFormat();
  13873   switch (instr->Mask(SVEUnpackVectorElementsMask)) {
  13874     case SUNPKHI_z_z:
  13875       unpk(vform, zd, ReadVRegister(instr->GetRn()), kHiHalf, kSignedExtend);
  13876       break;
  13877     case SUNPKLO_z_z:
  13878       unpk(vform, zd, ReadVRegister(instr->GetRn()), kLoHalf, kSignedExtend);
  13879       break;
  13880     case UUNPKHI_z_z:
  13881       unpk(vform, zd, ReadVRegister(instr->GetRn()), kHiHalf, kUnsignedExtend);
  13882       break;
  13883     case UUNPKLO_z_z:
  13884       unpk(vform, zd, ReadVRegister(instr->GetRn()), kLoHalf, kUnsignedExtend);
  13885       break;
  13886     default:
  13887       VIXL_UNIMPLEMENTED();
  13888       break;
  13889   }
  13890 }
  13891 
  13892 void Simulator::VisitSVETableLookup(const Instruction* instr) {
  13893   VectorFormat vform = instr->GetSVEVectorFormat();
  13894   SimVRegister& zd = ReadVRegister(instr->GetRd());
  13895   SimVRegister& zn = ReadVRegister(instr->GetRn());
  13896   SimVRegister& zn2 = ReadVRegister((instr->GetRn() + 1) % kNumberOfZRegisters);
  13897   SimVRegister& zm = ReadVRegister(instr->GetRm());
  13898 
  13899   switch (form_hash_) {
  13900     case "tbl_z_zz_1"_h:
  13901       tbl(vform, zd, zn, zm);
  13902       break;
  13903     case "tbl_z_zz_2"_h:
  13904       tbl(vform, zd, zn, zn2, zm);
  13905       break;
  13906     case "tbx_z_zz"_h:
  13907       tbx(vform, zd, zn, zm);
  13908       break;
  13909     default:
  13910       VIXL_UNIMPLEMENTED();
  13911       break;
  13912   }
  13913 }
  13914 
  13915 void Simulator::VisitSVEPredicateCount(const Instruction* instr) {
  13916   VectorFormat vform = instr->GetSVEVectorFormat();
  13917   SimPRegister& pg = ReadPRegister(instr->ExtractBits(13, 10));
  13918   SimPRegister& pn = ReadPRegister(instr->GetPn());
  13919 
  13920   switch (instr->Mask(SVEPredicateCountMask)) {
  13921     case CNTP_r_p_p: {
  13922       WriteXRegister(instr->GetRd(), CountActiveAndTrueLanes(vform, pg, pn));
  13923       break;
  13924     }
  13925     default:
  13926       VIXL_UNIMPLEMENTED();
  13927       break;
  13928   }
  13929 }
  13930 
  13931 void Simulator::VisitSVEPredicateLogical(const Instruction* instr) {
  13932   Instr op = instr->Mask(SVEPredicateLogicalMask);
  13933   SimPRegister& pd = ReadPRegister(instr->GetPd());
  13934   SimPRegister& pg = ReadPRegister(instr->ExtractBits(13, 10));
  13935   SimPRegister& pn = ReadPRegister(instr->GetPn());
  13936   SimPRegister& pm = ReadPRegister(instr->GetPm());
  13937   SimPRegister result;
  13938   switch (op) {
  13939     case ANDS_p_p_pp_z:
  13940     case AND_p_p_pp_z:
  13941     case BICS_p_p_pp_z:
  13942     case BIC_p_p_pp_z:
  13943     case EORS_p_p_pp_z:
  13944     case EOR_p_p_pp_z:
  13945     case NANDS_p_p_pp_z:
  13946     case NAND_p_p_pp_z:
  13947     case NORS_p_p_pp_z:
  13948     case NOR_p_p_pp_z:
  13949     case ORNS_p_p_pp_z:
  13950     case ORN_p_p_pp_z:
  13951     case ORRS_p_p_pp_z:
  13952     case ORR_p_p_pp_z:
  13953       SVEPredicateLogicalHelper(static_cast<SVEPredicateLogicalOp>(op),
  13954                                 result,
  13955                                 pn,
  13956                                 pm);
  13957       break;
  13958     case SEL_p_p_pp:
  13959       sel(pd, pg, pn, pm);
  13960       return;
  13961     default:
  13962       VIXL_UNIMPLEMENTED();
  13963       break;
  13964   }
  13965 
  13966   mov_zeroing(pd, pg, result);
  13967   if (instr->Mask(SVEPredicateLogicalSetFlagsBit) != 0) {
  13968     PredTest(kFormatVnB, pg, pd);
  13969   }
  13970 }
  13971 
  13972 void Simulator::VisitSVEPredicateFirstActive(const Instruction* instr) {
  13973   LogicPRegister pg = ReadPRegister(instr->ExtractBits(8, 5));
  13974   LogicPRegister pdn = ReadPRegister(instr->GetPd());
  13975   switch (instr->Mask(SVEPredicateFirstActiveMask)) {
  13976     case PFIRST_p_p_p:
  13977       pfirst(pdn, pg, pdn);
  13978       // TODO: Is this broken when pg == pdn?
  13979       PredTest(kFormatVnB, pg, pdn);
  13980       break;
  13981     default:
  13982       VIXL_UNIMPLEMENTED();
  13983       break;
  13984   }
  13985 }
  13986 
  13987 void Simulator::VisitSVEPredicateInitialize(const Instruction* instr) {
  13988   // This group only contains PTRUE{S}, and there are no unallocated encodings.
  13989   VIXL_STATIC_ASSERT(
  13990       SVEPredicateInitializeMask ==
  13991       (SVEPredicateInitializeFMask | SVEPredicateInitializeSetFlagsBit));
  13992   VIXL_ASSERT((instr->Mask(SVEPredicateInitializeMask) == PTRUE_p_s) ||
  13993               (instr->Mask(SVEPredicateInitializeMask) == PTRUES_p_s));
  13994 
  13995   LogicPRegister pdn = ReadPRegister(instr->GetPd());
  13996   VectorFormat vform = instr->GetSVEVectorFormat();
  13997 
  13998   ptrue(vform, pdn, instr->GetImmSVEPredicateConstraint());
  13999   if (instr->ExtractBit(16)) PredTest(vform, pdn, pdn);
  14000 }
  14001 
  14002 void Simulator::VisitSVEPredicateNextActive(const Instruction* instr) {
  14003   // This group only contains PNEXT, and there are no unallocated encodings.
  14004   VIXL_STATIC_ASSERT(SVEPredicateNextActiveFMask == SVEPredicateNextActiveMask);
  14005   VIXL_ASSERT(instr->Mask(SVEPredicateNextActiveMask) == PNEXT_p_p_p);
  14006 
  14007   LogicPRegister pg = ReadPRegister(instr->ExtractBits(8, 5));
  14008   LogicPRegister pdn = ReadPRegister(instr->GetPd());
  14009   VectorFormat vform = instr->GetSVEVectorFormat();
  14010 
  14011   pnext(vform, pdn, pg, pdn);
  14012   // TODO: Is this broken when pg == pdn?
  14013   PredTest(vform, pg, pdn);
  14014 }
  14015 
  14016 void Simulator::VisitSVEPredicateReadFromFFR_Predicated(
  14017     const Instruction* instr) {
  14018   LogicPRegister pd(ReadPRegister(instr->GetPd()));
  14019   LogicPRegister pg(ReadPRegister(instr->GetPn()));
  14020   FlagsUpdate flags = LeaveFlags;
  14021   switch (instr->Mask(SVEPredicateReadFromFFR_PredicatedMask)) {
  14022     case RDFFR_p_p_f:
  14023       // Do nothing.
  14024       break;
  14025     case RDFFRS_p_p_f:
  14026       flags = SetFlags;
  14027       break;
  14028     default:
  14029       VIXL_UNIMPLEMENTED();
  14030       break;
  14031   }
  14032 
  14033   LogicPRegister ffr(ReadFFR());
  14034   mov_zeroing(pd, pg, ffr);
  14035 
  14036   if (flags == SetFlags) {
  14037     PredTest(kFormatVnB, pg, pd);
  14038   }
  14039 }
  14040 
  14041 void Simulator::VisitSVEPredicateReadFromFFR_Unpredicated(
  14042     const Instruction* instr) {
  14043   LogicPRegister pd(ReadPRegister(instr->GetPd()));
  14044   LogicPRegister ffr(ReadFFR());
  14045   switch (instr->Mask(SVEPredicateReadFromFFR_UnpredicatedMask)) {
  14046     case RDFFR_p_f:
  14047       mov(pd, ffr);
  14048       break;
  14049     default:
  14050       VIXL_UNIMPLEMENTED();
  14051       break;
  14052   }
  14053 }
  14054 
  14055 void Simulator::VisitSVEPredicateTest(const Instruction* instr) {
  14056   switch (instr->Mask(SVEPredicateTestMask)) {
  14057     case PTEST_p_p:
  14058       PredTest(kFormatVnB,
  14059                ReadPRegister(instr->ExtractBits(13, 10)),
  14060                ReadPRegister(instr->GetPn()));
  14061       break;
  14062     default:
  14063       VIXL_UNIMPLEMENTED();
  14064       break;
  14065   }
  14066 }
  14067 
  14068 void Simulator::VisitSVEPredicateZero(const Instruction* instr) {
  14069   switch (instr->Mask(SVEPredicateZeroMask)) {
  14070     case PFALSE_p:
  14071       pfalse(ReadPRegister(instr->GetPd()));
  14072       break;
  14073     default:
  14074       VIXL_UNIMPLEMENTED();
  14075       break;
  14076   }
  14077 }
  14078 
  14079 void Simulator::VisitSVEPropagateBreak(const Instruction* instr) {
  14080   SimPRegister& pd = ReadPRegister(instr->GetPd());
  14081   SimPRegister& pg = ReadPRegister(instr->ExtractBits(13, 10));
  14082   SimPRegister& pn = ReadPRegister(instr->GetPn());
  14083   SimPRegister& pm = ReadPRegister(instr->GetPm());
  14084 
  14085   bool set_flags = false;
  14086   switch (instr->Mask(SVEPropagateBreakMask)) {
  14087     case BRKPAS_p_p_pp:
  14088       set_flags = true;
  14089       VIXL_FALLTHROUGH();
  14090     case BRKPA_p_p_pp:
  14091       brkpa(pd, pg, pn, pm);
  14092       break;
  14093     case BRKPBS_p_p_pp:
  14094       set_flags = true;
  14095       VIXL_FALLTHROUGH();
  14096     case BRKPB_p_p_pp:
  14097       brkpb(pd, pg, pn, pm);
  14098       break;
  14099     default:
  14100       VIXL_UNIMPLEMENTED();
  14101       break;
  14102   }
  14103 
  14104   if (set_flags) {
  14105     PredTest(kFormatVnB, pg, pd);
  14106   }
  14107 }
  14108 
  14109 void Simulator::VisitSVEStackFrameAdjustment(const Instruction* instr) {
  14110   uint64_t length = 0;
  14111   switch (instr->Mask(SVEStackFrameAdjustmentMask)) {
  14112     case ADDPL_r_ri:
  14113       length = GetPredicateLengthInBytes();
  14114       break;
  14115     case ADDVL_r_ri:
  14116       length = GetVectorLengthInBytes();
  14117       break;
  14118     default:
  14119       VIXL_UNIMPLEMENTED();
  14120   }
  14121   uint64_t base = ReadXRegister(instr->GetRm(), Reg31IsStackPointer);
  14122   WriteXRegister(instr->GetRd(),
  14123                  base + (length * instr->GetImmSVEVLScale()),
  14124                  LogRegWrites,
  14125                  Reg31IsStackPointer);
  14126 }
  14127 
  14128 void Simulator::VisitSVEStackFrameSize(const Instruction* instr) {
  14129   int64_t scale = instr->GetImmSVEVLScale();
  14130 
  14131   switch (instr->Mask(SVEStackFrameSizeMask)) {
  14132     case RDVL_r_i:
  14133       WriteXRegister(instr->GetRd(), GetVectorLengthInBytes() * scale);
  14134       break;
  14135     default:
  14136       VIXL_UNIMPLEMENTED();
  14137   }
  14138 }
  14139 
  14140 void Simulator::VisitSVEVectorSelect(const Instruction* instr) {
  14141   // The only instruction in this group is `sel`, and there are no unused
  14142   // encodings.
  14143   VIXL_ASSERT(instr->Mask(SVEVectorSelectMask) == SEL_z_p_zz);
  14144 
  14145   VectorFormat vform = instr->GetSVEVectorFormat();
  14146   SimVRegister& zd = ReadVRegister(instr->GetRd());
  14147   SimPRegister& pg = ReadPRegister(instr->ExtractBits(13, 10));
  14148   SimVRegister& zn = ReadVRegister(instr->GetRn());
  14149   SimVRegister& zm = ReadVRegister(instr->GetRm());
  14150 
  14151   sel(vform, zd, pg, zn, zm);
  14152 }
  14153 
  14154 void Simulator::VisitSVEFFRInitialise(const Instruction* instr) {
  14155   switch (instr->Mask(SVEFFRInitialiseMask)) {
  14156     case SETFFR_f: {
  14157       LogicPRegister ffr(ReadFFR());
  14158       ffr.SetAllBits();
  14159       break;
  14160     }
  14161     default:
  14162       VIXL_UNIMPLEMENTED();
  14163       break;
  14164   }
  14165 }
  14166 
  14167 void Simulator::VisitSVEFFRWriteFromPredicate(const Instruction* instr) {
  14168   switch (instr->Mask(SVEFFRWriteFromPredicateMask)) {
  14169     case WRFFR_f_p: {
  14170       SimPRegister pn(ReadPRegister(instr->GetPn()));
  14171       bool last_active = true;
  14172       for (unsigned i = 0; i < pn.GetSizeInBits(); i++) {
  14173         bool active = pn.GetBit(i);
  14174         if (active && !last_active) {
  14175           // `pn` is non-monotonic. This is UNPREDICTABLE.
  14176           VIXL_ABORT();
  14177         }
  14178         last_active = active;
  14179       }
  14180       mov(ReadFFR(), pn);
  14181       break;
  14182     }
  14183     default:
  14184       VIXL_UNIMPLEMENTED();
  14185       break;
  14186   }
  14187 }
  14188 
  14189 void Simulator::VisitSVEContiguousLoad_ScalarPlusImm(const Instruction* instr) {
  14190   bool is_signed;
  14191   switch (instr->Mask(SVEContiguousLoad_ScalarPlusImmMask)) {
  14192     case LD1B_z_p_bi_u8:
  14193     case LD1B_z_p_bi_u16:
  14194     case LD1B_z_p_bi_u32:
  14195     case LD1B_z_p_bi_u64:
  14196     case LD1H_z_p_bi_u16:
  14197     case LD1H_z_p_bi_u32:
  14198     case LD1H_z_p_bi_u64:
  14199     case LD1W_z_p_bi_u32:
  14200     case LD1W_z_p_bi_u64:
  14201     case LD1D_z_p_bi_u64:
  14202       is_signed = false;
  14203       break;
  14204     case LD1SB_z_p_bi_s16:
  14205     case LD1SB_z_p_bi_s32:
  14206     case LD1SB_z_p_bi_s64:
  14207     case LD1SH_z_p_bi_s32:
  14208     case LD1SH_z_p_bi_s64:
  14209     case LD1SW_z_p_bi_s64:
  14210       is_signed = true;
  14211       break;
  14212     default:
  14213       // This encoding group is complete, so no other values should be possible.
  14214       VIXL_UNREACHABLE();
  14215       is_signed = false;
  14216       break;
  14217   }
  14218 
  14219   int vl = GetVectorLengthInBytes();
  14220   int msize_in_bytes_log2 = instr->GetSVEMsizeFromDtype(is_signed);
  14221   int esize_in_bytes_log2 = instr->GetSVEEsizeFromDtype(is_signed);
  14222   VIXL_ASSERT(esize_in_bytes_log2 >= msize_in_bytes_log2);
  14223   int vl_divisor_log2 = esize_in_bytes_log2 - msize_in_bytes_log2;
  14224   uint64_t base = ReadXRegister(instr->GetRn(), Reg31IsStackPointer);
  14225   uint64_t offset =
  14226       (instr->ExtractSignedBits(19, 16) * vl) / (1 << vl_divisor_log2);
  14227   VectorFormat vform = SVEFormatFromLaneSizeInBytesLog2(esize_in_bytes_log2);
  14228   LogicSVEAddressVector addr(base + offset);
  14229   addr.SetMsizeInBytesLog2(msize_in_bytes_log2);
  14230   SVEStructuredLoadHelper(vform,
  14231                           ReadPRegister(instr->GetPgLow8()),
  14232                           instr->GetRt(),
  14233                           addr,
  14234                           is_signed);
  14235 }
  14236 
  14237 void Simulator::VisitSVEContiguousLoad_ScalarPlusScalar(
  14238     const Instruction* instr) {
  14239   bool is_signed;
  14240   switch (instr->Mask(SVEContiguousLoad_ScalarPlusScalarMask)) {
  14241     case LD1B_z_p_br_u8:
  14242     case LD1B_z_p_br_u16:
  14243     case LD1B_z_p_br_u32:
  14244     case LD1B_z_p_br_u64:
  14245     case LD1H_z_p_br_u16:
  14246     case LD1H_z_p_br_u32:
  14247     case LD1H_z_p_br_u64:
  14248     case LD1W_z_p_br_u32:
  14249     case LD1W_z_p_br_u64:
  14250     case LD1D_z_p_br_u64:
  14251       is_signed = false;
  14252       break;
  14253     case LD1SB_z_p_br_s16:
  14254     case LD1SB_z_p_br_s32:
  14255     case LD1SB_z_p_br_s64:
  14256     case LD1SH_z_p_br_s32:
  14257     case LD1SH_z_p_br_s64:
  14258     case LD1SW_z_p_br_s64:
  14259       is_signed = true;
  14260       break;
  14261     default:
  14262       // This encoding group is complete, so no other values should be possible.
  14263       VIXL_UNREACHABLE();
  14264       is_signed = false;
  14265       break;
  14266   }
  14267 
  14268   int msize_in_bytes_log2 = instr->GetSVEMsizeFromDtype(is_signed);
  14269   int esize_in_bytes_log2 = instr->GetSVEEsizeFromDtype(is_signed);
  14270   VIXL_ASSERT(msize_in_bytes_log2 <= esize_in_bytes_log2);
  14271   VectorFormat vform = SVEFormatFromLaneSizeInBytesLog2(esize_in_bytes_log2);
  14272   uint64_t base = ReadXRegister(instr->GetRn(), Reg31IsStackPointer);
  14273   uint64_t offset = ReadXRegister(instr->GetRm());
  14274   offset <<= msize_in_bytes_log2;
  14275   LogicSVEAddressVector addr(base + offset);
  14276   addr.SetMsizeInBytesLog2(msize_in_bytes_log2);
  14277   SVEStructuredLoadHelper(vform,
  14278                           ReadPRegister(instr->GetPgLow8()),
  14279                           instr->GetRt(),
  14280                           addr,
  14281                           is_signed);
  14282 }
  14283 
  14284 void Simulator::DoUnreachable(const Instruction* instr) {
  14285   VIXL_ASSERT((instr->Mask(ExceptionMask) == HLT) &&
  14286               (instr->GetImmException() == kUnreachableOpcode));
  14287 
  14288   fprintf(stream_,
  14289           "Hit UNREACHABLE marker at pc=%p.\n",
  14290           reinterpret_cast<const void*>(instr));
  14291   abort();
  14292 }
  14293 
  14294 void Simulator::Simulate_XdSP_XnSP_Xm(const Instruction* instr) {
  14295   VIXL_ASSERT(form_hash_ == Hash("irg_64i_dp_2src"));
  14296   uint64_t rn = ReadXRegister(instr->GetRn(), Reg31IsStackPointer);
  14297   uint64_t rm = ReadXRegister(instr->GetRm());
  14298   uint64_t tag = GenerateRandomTag(rm & 0xffff);
  14299   uint64_t new_val = GetAddressWithAllocationTag(rn, tag);
  14300   WriteXRegister(instr->GetRd(), new_val, LogRegWrites, Reg31IsStackPointer);
  14301 }
  14302 
  14303 void Simulator::SimulateMTEAddSubTag(const Instruction* instr) {
  14304   uint64_t rn = ReadXRegister(instr->GetRn(), Reg31IsStackPointer);
  14305   uint64_t rn_tag = GetAllocationTagFromAddress(rn);
  14306   uint64_t tag_offset = instr->ExtractBits(13, 10);
  14307   // TODO: implement GCR_EL1.Exclude to provide a tag exclusion list.
  14308   uint64_t new_tag = ChooseNonExcludedTag(rn_tag, tag_offset);
  14309 
  14310   uint64_t offset = instr->ExtractBits(21, 16) * kMTETagGranuleInBytes;
  14311   int carry = 0;
  14312   if (form_hash_ == Hash("subg_64_addsub_immtags")) {
  14313     offset = ~offset;
  14314     carry = 1;
  14315   } else {
  14316     VIXL_ASSERT(form_hash_ == Hash("addg_64_addsub_immtags"));
  14317   }
  14318   uint64_t new_val =
  14319       AddWithCarry(kXRegSize, /* set_flags = */ false, rn, offset, carry);
  14320   new_val = GetAddressWithAllocationTag(new_val, new_tag);
  14321   WriteXRegister(instr->GetRd(), new_val, LogRegWrites, Reg31IsStackPointer);
  14322 }
  14323 
  14324 void Simulator::SimulateMTETagMaskInsert(const Instruction* instr) {
  14325   VIXL_ASSERT(form_hash_ == Hash("gmi_64g_dp_2src"));
  14326   uint64_t mask = ReadXRegister(instr->GetRm());
  14327   uint64_t tag = GetAllocationTagFromAddress(
  14328       ReadXRegister(instr->GetRn(), Reg31IsStackPointer));
  14329   uint64_t mask_bit = 1 << tag;
  14330   WriteXRegister(instr->GetRd(), mask | mask_bit);
  14331 }
  14332 
  14333 void Simulator::SimulateMTESubPointer(const Instruction* instr) {
  14334   uint64_t rn = ReadXRegister(instr->GetRn(), Reg31IsStackPointer);
  14335   uint64_t rm = ReadXRegister(instr->GetRm(), Reg31IsStackPointer);
  14336 
  14337   VIXL_ASSERT((form_hash_ == Hash("subps_64s_dp_2src")) ||
  14338               (form_hash_ == Hash("subp_64s_dp_2src")));
  14339   bool set_flags = (form_hash_ == Hash("subps_64s_dp_2src"));
  14340 
  14341   rn = ExtractSignedBitfield64(55, 0, rn);
  14342   rm = ExtractSignedBitfield64(55, 0, rm);
  14343   uint64_t new_val = AddWithCarry(kXRegSize, set_flags, rn, ~rm, 1);
  14344   WriteXRegister(instr->GetRd(), new_val);
  14345 }
  14346 
  14347 void Simulator::SimulateMTEStoreTagPair(const Instruction* instr) {
  14348   uint64_t rn = ReadXRegister(instr->GetRn(), Reg31IsStackPointer);
  14349   uint64_t rt = ReadXRegister(instr->GetRt());
  14350   uint64_t rt2 = ReadXRegister(instr->GetRt2());
  14351   int offset = instr->GetImmLSPair() * static_cast<int>(kMTETagGranuleInBytes);
  14352 
  14353   AddrMode addr_mode = Offset;
  14354   switch (form_hash_) {
  14355     case Hash("stgp_64_ldstpair_off"):
  14356       // Default is the offset mode.
  14357       break;
  14358     case Hash("stgp_64_ldstpair_post"):
  14359       addr_mode = PostIndex;
  14360       break;
  14361     case Hash("stgp_64_ldstpair_pre"):
  14362       addr_mode = PreIndex;
  14363       break;
  14364     default:
  14365       VIXL_UNIMPLEMENTED();
  14366   }
  14367 
  14368   uintptr_t address = AddressModeHelper(instr->GetRn(), offset, addr_mode);
  14369   if (!IsAligned(address, kMTETagGranuleInBytes)) {
  14370     VIXL_ALIGNMENT_EXCEPTION();
  14371   }
  14372 
  14373   int tag = GetAllocationTagFromAddress(rn);
  14374   meta_data_.SetMTETag(address, tag);
  14375 
  14376   if (!MemWrite<uint64_t>(address, rt)) return;
  14377   if (!MemWrite<uint64_t>(address + kXRegSizeInBytes, rt2)) return;
  14378 }
  14379 
  14380 void Simulator::SimulateMTEStoreTag(const Instruction* instr) {
  14381   uint64_t rt = ReadXRegister(instr->GetRt(), Reg31IsStackPointer);
  14382   int offset = instr->GetImmLS() * static_cast<int>(kMTETagGranuleInBytes);
  14383 
  14384   AddrMode addr_mode = Offset;
  14385   switch (form_hash_) {
  14386     case Hash("st2g_64soffset_ldsttags"):
  14387     case Hash("stg_64soffset_ldsttags"):
  14388     case Hash("stz2g_64soffset_ldsttags"):
  14389     case Hash("stzg_64soffset_ldsttags"):
  14390       // Default is the offset mode.
  14391       break;
  14392     case Hash("st2g_64spost_ldsttags"):
  14393     case Hash("stg_64spost_ldsttags"):
  14394     case Hash("stz2g_64spost_ldsttags"):
  14395     case Hash("stzg_64spost_ldsttags"):
  14396       addr_mode = PostIndex;
  14397       break;
  14398     case Hash("st2g_64spre_ldsttags"):
  14399     case Hash("stg_64spre_ldsttags"):
  14400     case Hash("stz2g_64spre_ldsttags"):
  14401     case Hash("stzg_64spre_ldsttags"):
  14402       addr_mode = PreIndex;
  14403       break;
  14404     default:
  14405       VIXL_UNIMPLEMENTED();
  14406   }
  14407 
  14408   bool is_pair = false;
  14409   switch (form_hash_) {
  14410     case Hash("st2g_64soffset_ldsttags"):
  14411     case Hash("st2g_64spost_ldsttags"):
  14412     case Hash("st2g_64spre_ldsttags"):
  14413     case Hash("stz2g_64soffset_ldsttags"):
  14414     case Hash("stz2g_64spost_ldsttags"):
  14415     case Hash("stz2g_64spre_ldsttags"):
  14416       is_pair = true;
  14417       break;
  14418     default:
  14419       break;
  14420   }
  14421 
  14422   bool is_zeroing = false;
  14423   switch (form_hash_) {
  14424     case Hash("stz2g_64soffset_ldsttags"):
  14425     case Hash("stz2g_64spost_ldsttags"):
  14426     case Hash("stz2g_64spre_ldsttags"):
  14427     case Hash("stzg_64soffset_ldsttags"):
  14428     case Hash("stzg_64spost_ldsttags"):
  14429     case Hash("stzg_64spre_ldsttags"):
  14430       is_zeroing = true;
  14431       break;
  14432     default:
  14433       break;
  14434   }
  14435 
  14436   uintptr_t address = AddressModeHelper(instr->GetRn(), offset, addr_mode);
  14437 
  14438   if (is_zeroing) {
  14439     if (!IsAligned(reinterpret_cast<uintptr_t>(address),
  14440                    kMTETagGranuleInBytes)) {
  14441       VIXL_ALIGNMENT_EXCEPTION();
  14442     }
  14443     VIXL_STATIC_ASSERT(kMTETagGranuleInBytes >= sizeof(uint64_t));
  14444     VIXL_STATIC_ASSERT(kMTETagGranuleInBytes % sizeof(uint64_t) == 0);
  14445 
  14446     size_t fill_size = kMTETagGranuleInBytes;
  14447     if (is_pair) {
  14448       fill_size += kMTETagGranuleInBytes;
  14449     }
  14450 
  14451     size_t fill_offset = 0;
  14452     while (fill_offset < fill_size) {
  14453       if (!MemWrite<uint64_t>(address + fill_offset, 0)) return;
  14454       fill_offset += sizeof(uint64_t);
  14455     }
  14456   }
  14457 
  14458   int tag = GetAllocationTagFromAddress(rt);
  14459   meta_data_.SetMTETag(address, tag, instr);
  14460   if (is_pair) {
  14461     meta_data_.SetMTETag(address + kMTETagGranuleInBytes, tag, instr);
  14462   }
  14463 }
  14464 
  14465 void Simulator::SimulateMTELoadTag(const Instruction* instr) {
  14466   uint64_t rt = ReadXRegister(instr->GetRt());
  14467   int offset = instr->GetImmLS() * static_cast<int>(kMTETagGranuleInBytes);
  14468 
  14469   switch (form_hash_) {
  14470     case Hash("ldg_64loffset_ldsttags"):
  14471       break;
  14472     default:
  14473       VIXL_UNIMPLEMENTED();
  14474   }
  14475 
  14476   uintptr_t address = AddressModeHelper(instr->GetRn(), offset, Offset);
  14477   address = AlignDown(address, kMTETagGranuleInBytes);
  14478   uint64_t tag = meta_data_.GetMTETag(address, instr);
  14479   WriteXRegister(instr->GetRt(), GetAddressWithAllocationTag(rt, tag));
  14480 }
  14481 
  14482 void Simulator::SimulateCpyFP(const Instruction* instr) {
  14483   MOPSPHelper<"cpy"_h>(instr);
  14484   LogSystemRegister(NZCV);
  14485 }
  14486 
  14487 void Simulator::SimulateCpyP(const Instruction* instr) {
  14488   MOPSPHelper<"cpy"_h>(instr);
  14489 
  14490   int d = instr->GetRd();
  14491   int n = instr->GetRn();
  14492   int s = instr->GetRs();
  14493 
  14494   // Determine copy direction. For cases in which direction is implementation
  14495   // defined, use forward.
  14496   bool is_backwards = false;
  14497   uint64_t xs = ReadXRegister(s);
  14498   uint64_t xd = ReadXRegister(d);
  14499   uint64_t xn = ReadXRegister(n);
  14500 
  14501   // Ignore the top byte of addresses for comparisons. We can use xn as is,
  14502   // as it should have zero in bits 63:55.
  14503   uint64_t xs_tbi = ExtractUnsignedBitfield64(55, 0, xs);
  14504   uint64_t xd_tbi = ExtractUnsignedBitfield64(55, 0, xd);
  14505   VIXL_ASSERT(ExtractUnsignedBitfield64(63, 55, xn) == 0);
  14506   if ((xs_tbi < xd_tbi) && ((xs_tbi + xn) > xd_tbi)) {
  14507     is_backwards = true;
  14508     WriteXRegister(s, xs + xn);
  14509     WriteXRegister(d, xd + xn);
  14510   }
  14511 
  14512   ReadNzcv().SetN(is_backwards ? 1 : 0);
  14513   LogSystemRegister(NZCV);
  14514 }
  14515 
  14516 void Simulator::SimulateCpyM(const Instruction* instr) {
  14517   VIXL_ASSERT(instr->IsConsistentMOPSTriplet<"cpy"_h>());
  14518   VIXL_ASSERT(instr->IsMOPSMainOf(GetLastExecutedInstruction(), "cpy"_h));
  14519 
  14520   int d = instr->GetRd();
  14521   int n = instr->GetRn();
  14522   int s = instr->GetRs();
  14523 
  14524   uint64_t xd = ReadXRegister(d);
  14525   uint64_t xn = ReadXRegister(n);
  14526   uint64_t xs = ReadXRegister(s);
  14527   bool is_backwards = ReadN();
  14528 
  14529   int step = 1;
  14530   if (is_backwards) {
  14531     step = -1;
  14532     xs--;
  14533     xd--;
  14534   }
  14535 
  14536   while (xn--) {
  14537     VIXL_DEFINE_OR_RETURN(temp, MemRead<uint8_t>(xs));
  14538     if (!MemWrite<uint8_t>(xd, temp)) return;
  14539     LogMemTransfer(xd, xs, temp);
  14540     xs += step;
  14541     xd += step;
  14542   }
  14543 
  14544   if (is_backwards) {
  14545     xs++;
  14546     xd++;
  14547   }
  14548 
  14549   WriteXRegister(d, xd);
  14550   WriteXRegister(n, 0);
  14551   WriteXRegister(s, xs);
  14552 }
  14553 
  14554 void Simulator::SimulateCpyE(const Instruction* instr) {
  14555   USE(instr);
  14556   VIXL_ASSERT(instr->IsConsistentMOPSTriplet<"cpy"_h>());
  14557   VIXL_ASSERT(instr->IsMOPSEpilogueOf(GetLastExecutedInstruction(), "cpy"_h));
  14558   // This implementation does nothing in the epilogue; all copying is completed
  14559   // in the "main" part.
  14560 }
  14561 
  14562 void Simulator::SimulateSetP(const Instruction* instr) {
  14563   MOPSPHelper<"set"_h>(instr);
  14564   LogSystemRegister(NZCV);
  14565 }
  14566 
  14567 void Simulator::SimulateSetM(const Instruction* instr) {
  14568   VIXL_ASSERT(instr->IsConsistentMOPSTriplet<"set"_h>());
  14569   VIXL_ASSERT(instr->IsMOPSMainOf(GetLastExecutedInstruction(), "set"_h));
  14570 
  14571   uint64_t xd = ReadXRegister(instr->GetRd());
  14572   uint64_t xn = ReadXRegister(instr->GetRn());
  14573   uint64_t xs = ReadXRegister(instr->GetRs());
  14574 
  14575   while (xn--) {
  14576     LogWrite(instr->GetRs(), GetPrintRegPartial(kPrintRegLaneSizeB), xd);
  14577     if (!MemWrite<uint8_t>(xd++, xs)) return;
  14578   }
  14579   WriteXRegister(instr->GetRd(), xd);
  14580   WriteXRegister(instr->GetRn(), 0);
  14581 }
  14582 
  14583 void Simulator::SimulateSetE(const Instruction* instr) {
  14584   USE(instr);
  14585   VIXL_ASSERT(instr->IsConsistentMOPSTriplet<"set"_h>());
  14586   VIXL_ASSERT(instr->IsMOPSEpilogueOf(GetLastExecutedInstruction(), "set"_h));
  14587   // This implementation does nothing in the epilogue; all setting is completed
  14588   // in the "main" part.
  14589 }
  14590 
  14591 void Simulator::SimulateSetGP(const Instruction* instr) {
  14592   MOPSPHelper<"setg"_h>(instr);
  14593 
  14594   uint64_t xd = ReadXRegister(instr->GetRd());
  14595   uint64_t xn = ReadXRegister(instr->GetRn());
  14596 
  14597   if ((xn > 0) && !IsAligned(xd, kMTETagGranuleInBytes)) {
  14598     VIXL_ALIGNMENT_EXCEPTION();
  14599   }
  14600 
  14601   if (!IsAligned(xn, kMTETagGranuleInBytes)) {
  14602     VIXL_ALIGNMENT_EXCEPTION();
  14603   }
  14604 
  14605   LogSystemRegister(NZCV);
  14606 }
  14607 
  14608 void Simulator::SimulateSetGM(const Instruction* instr) {
  14609   uint64_t xd = ReadXRegister(instr->GetRd());
  14610   uint64_t xn = ReadXRegister(instr->GetRn());
  14611 
  14612   int tag = GetAllocationTagFromAddress(xd);
  14613   while (xn) {
  14614     meta_data_.SetMTETag(xd, tag);
  14615     xd += 16;
  14616     xn -= 16;
  14617   }
  14618   SimulateSetM(instr);
  14619 }
  14620 
  14621 void Simulator::DoTrace(const Instruction* instr) {
  14622   VIXL_ASSERT((instr->Mask(ExceptionMask) == HLT) &&
  14623               (instr->GetImmException() == kTraceOpcode));
  14624 
  14625   // Read the arguments encoded inline in the instruction stream.
  14626   uint32_t parameters;
  14627   uint32_t command;
  14628 
  14629   VIXL_STATIC_ASSERT(sizeof(*instr) == 1);
  14630   memcpy(&parameters, instr + kTraceParamsOffset, sizeof(parameters));
  14631   memcpy(&command, instr + kTraceCommandOffset, sizeof(command));
  14632 
  14633   switch (command) {
  14634     case TRACE_ENABLE:
  14635       SetTraceParameters(GetTraceParameters() | parameters);
  14636       break;
  14637     case TRACE_DISABLE:
  14638       SetTraceParameters(GetTraceParameters() & ~parameters);
  14639       break;
  14640     default:
  14641       VIXL_UNREACHABLE();
  14642   }
  14643 
  14644   WritePc(instr->GetInstructionAtOffset(kTraceLength));
  14645 }
  14646 
  14647 
  14648 void Simulator::DoLog(const Instruction* instr) {
  14649   VIXL_ASSERT((instr->Mask(ExceptionMask) == HLT) &&
  14650               (instr->GetImmException() == kLogOpcode));
  14651 
  14652   // Read the arguments encoded inline in the instruction stream.
  14653   uint32_t parameters;
  14654 
  14655   VIXL_STATIC_ASSERT(sizeof(*instr) == 1);
  14656   memcpy(&parameters, instr + kTraceParamsOffset, sizeof(parameters));
  14657 
  14658   // We don't support a one-shot LOG_DISASM.
  14659   VIXL_ASSERT((parameters & LOG_DISASM) == 0);
  14660   // Print the requested information.
  14661   if (parameters & LOG_SYSREGS) PrintSystemRegisters();
  14662   if (parameters & LOG_REGS) PrintRegisters();
  14663   if (parameters & LOG_VREGS) PrintVRegisters();
  14664 
  14665   WritePc(instr->GetInstructionAtOffset(kLogLength));
  14666 }
  14667 
  14668 
  14669 void Simulator::DoPrintf(const Instruction* instr) {
  14670   VIXL_ASSERT((instr->Mask(ExceptionMask) == HLT) &&
  14671               (instr->GetImmException() == kPrintfOpcode));
  14672 
  14673   // Read the arguments encoded inline in the instruction stream.
  14674   uint32_t arg_count;
  14675   uint32_t arg_pattern_list;
  14676   VIXL_STATIC_ASSERT(sizeof(*instr) == 1);
  14677   memcpy(&arg_count, instr + kPrintfArgCountOffset, sizeof(arg_count));
  14678   memcpy(&arg_pattern_list,
  14679          instr + kPrintfArgPatternListOffset,
  14680          sizeof(arg_pattern_list));
  14681 
  14682   VIXL_ASSERT(arg_count <= kPrintfMaxArgCount);
  14683   VIXL_ASSERT((arg_pattern_list >> (kPrintfArgPatternBits * arg_count)) == 0);
  14684 
  14685   // We need to call the host printf function with a set of arguments defined by
  14686   // arg_pattern_list. Because we don't know the types and sizes of the
  14687   // arguments, this is very difficult to do in a robust and portable way. To
  14688   // work around the problem, we pick apart the format string, and print one
  14689   // format placeholder at a time.
  14690 
  14691   // Allocate space for the format string. We take a copy, so we can modify it.
  14692   // Leave enough space for one extra character per expected argument (plus the
  14693   // '\0' termination).
  14694   const char* format_base = ReadRegister<const char*>(0);
  14695   VIXL_ASSERT(format_base != NULL);
  14696   size_t length = strlen(format_base) + 1;
  14697   char* const format = new char[length + arg_count];
  14698 
  14699   // A list of chunks, each with exactly one format placeholder.
  14700   const char* chunks[kPrintfMaxArgCount];
  14701 
  14702   // Copy the format string and search for format placeholders.
  14703   uint32_t placeholder_count = 0;
  14704   char* format_scratch = format;
  14705   for (size_t i = 0; i < length; i++) {
  14706     if (format_base[i] != '%') {
  14707       *format_scratch++ = format_base[i];
  14708     } else {
  14709       if (format_base[i + 1] == '%') {
  14710         // Ignore explicit "%%" sequences.
  14711         *format_scratch++ = format_base[i];
  14712         i++;
  14713         // Chunks after the first are passed as format strings to printf, so we
  14714         // need to escape '%' characters in those chunks.
  14715         if (placeholder_count > 0) *format_scratch++ = format_base[i];
  14716       } else {
  14717         VIXL_CHECK(placeholder_count < arg_count);
  14718         // Insert '\0' before placeholders, and store their locations.
  14719         *format_scratch++ = '\0';
  14720         chunks[placeholder_count++] = format_scratch;
  14721         *format_scratch++ = format_base[i];
  14722       }
  14723     }
  14724   }
  14725   VIXL_CHECK(placeholder_count == arg_count);
  14726 
  14727   // Finally, call printf with each chunk, passing the appropriate register
  14728   // argument. Normally, printf returns the number of bytes transmitted, so we
  14729   // can emulate a single printf call by adding the result from each chunk. If
  14730   // any call returns a negative (error) value, though, just return that value.
  14731 
  14732   printf("%s", clr_printf);
  14733 
  14734   // Because '\0' is inserted before each placeholder, the first string in
  14735   // 'format' contains no format placeholders and should be printed literally.
  14736   int result = printf("%s", format);
  14737   int pcs_r = 1;  // Start at x1. x0 holds the format string.
  14738   int pcs_f = 0;  // Start at d0.
  14739   if (result >= 0) {
  14740     for (uint32_t i = 0; i < placeholder_count; i++) {
  14741       int part_result = -1;
  14742 
  14743       uint32_t arg_pattern = arg_pattern_list >> (i * kPrintfArgPatternBits);
  14744       arg_pattern &= (1 << kPrintfArgPatternBits) - 1;
  14745       switch (arg_pattern) {
  14746         case kPrintfArgW:
  14747           part_result = printf(chunks[i], ReadWRegister(pcs_r++));
  14748           break;
  14749         case kPrintfArgX:
  14750           part_result = printf(chunks[i], ReadXRegister(pcs_r++));
  14751           break;
  14752         case kPrintfArgD:
  14753           part_result = printf(chunks[i], ReadDRegister(pcs_f++));
  14754           break;
  14755         default:
  14756           VIXL_UNREACHABLE();
  14757       }
  14758 
  14759       if (part_result < 0) {
  14760         // Handle error values.
  14761         result = part_result;
  14762         break;
  14763       }
  14764 
  14765       result += part_result;
  14766     }
  14767   }
  14768 
  14769   printf("%s", clr_normal);
  14770 
  14771   // Printf returns its result in x0 (just like the C library's printf).
  14772   WriteXRegister(0, result);
  14773 
  14774   // The printf parameters are inlined in the code, so skip them.
  14775   WritePc(instr->GetInstructionAtOffset(kPrintfLength));
  14776 
  14777   // Set LR as if we'd just called a native printf function.
  14778   WriteLr(ReadPc());
  14779 
  14780   delete[] format;
  14781 }
  14782 
  14783 
  14784 #ifdef VIXL_HAS_SIMULATED_RUNTIME_CALL_SUPPORT
  14785 void Simulator::DoRuntimeCall(const Instruction* instr) {
  14786   VIXL_STATIC_ASSERT(kRuntimeCallAddressSize == sizeof(uintptr_t));
  14787   // The appropriate `Simulator::SimulateRuntimeCall()` wrapper and the function
  14788   // to call are passed inlined in the assembly.
  14789   VIXL_DEFINE_OR_RETURN(call_wrapper_address,
  14790                         MemRead<uintptr_t>(instr + kRuntimeCallWrapperOffset));
  14791   VIXL_DEFINE_OR_RETURN(function_address,
  14792                         MemRead<uintptr_t>(instr + kRuntimeCallFunctionOffset));
  14793   VIXL_DEFINE_OR_RETURN(call_type,
  14794                         MemRead<uint32_t>(instr + kRuntimeCallTypeOffset));
  14795   auto runtime_call_wrapper =
  14796       reinterpret_cast<void (*)(Simulator*, uintptr_t)>(call_wrapper_address);
  14797 
  14798   if (static_cast<RuntimeCallType>(call_type) == kCallRuntime) {
  14799     WriteRegister(kLinkRegCode,
  14800                   instr->GetInstructionAtOffset(kRuntimeCallLength));
  14801   }
  14802   runtime_call_wrapper(this, function_address);
  14803   // Read the return address from `lr` and write it into `pc`.
  14804   WritePc(ReadRegister<Instruction*>(kLinkRegCode));
  14805 }
  14806 #else
  14807 void Simulator::DoRuntimeCall(const Instruction* instr) {
  14808   USE(instr);
  14809   VIXL_UNREACHABLE();
  14810 }
  14811 #endif
  14812 
  14813 
  14814 void Simulator::DoConfigureCPUFeatures(const Instruction* instr) {
  14815   VIXL_ASSERT(instr->Mask(ExceptionMask) == HLT);
  14816 
  14817   typedef ConfigureCPUFeaturesElementType ElementType;
  14818   VIXL_ASSERT(CPUFeatures::kNumberOfFeatures <
  14819               std::numeric_limits<ElementType>::max());
  14820 
  14821   // k{Set,Enable,Disable}CPUFeatures have the same parameter encoding.
  14822 
  14823   size_t element_size = sizeof(ElementType);
  14824   size_t offset = kConfigureCPUFeaturesListOffset;
  14825 
  14826   // Read the kNone-terminated list of features.
  14827   CPUFeatures parameters;
  14828   while (true) {
  14829     VIXL_DEFINE_OR_RETURN(feature, MemRead<ElementType>(instr + offset));
  14830     offset += element_size;
  14831     if (feature == static_cast<ElementType>(CPUFeatures::kNone)) break;
  14832     parameters.Combine(static_cast<CPUFeatures::Feature>(feature));
  14833   }
  14834 
  14835   switch (instr->GetImmException()) {
  14836     case kSetCPUFeaturesOpcode:
  14837       SetCPUFeatures(parameters);
  14838       break;
  14839     case kEnableCPUFeaturesOpcode:
  14840       GetCPUFeatures()->Combine(parameters);
  14841       break;
  14842     case kDisableCPUFeaturesOpcode:
  14843       GetCPUFeatures()->Remove(parameters);
  14844       break;
  14845     default:
  14846       VIXL_UNREACHABLE();
  14847       break;
  14848   }
  14849 
  14850   WritePc(instr->GetInstructionAtOffset(AlignUp(offset, kInstructionSize)));
  14851 }
  14852 
  14853 
  14854 void Simulator::DoSaveCPUFeatures(const Instruction* instr) {
  14855   VIXL_ASSERT((instr->Mask(ExceptionMask) == HLT) &&
  14856               (instr->GetImmException() == kSaveCPUFeaturesOpcode));
  14857   USE(instr);
  14858 
  14859   saved_cpu_features_.push_back(*GetCPUFeatures());
  14860 }
  14861 
  14862 
  14863 void Simulator::DoRestoreCPUFeatures(const Instruction* instr) {
  14864   VIXL_ASSERT((instr->Mask(ExceptionMask) == HLT) &&
  14865               (instr->GetImmException() == kRestoreCPUFeaturesOpcode));
  14866   USE(instr);
  14867 
  14868   SetCPUFeatures(saved_cpu_features_.back());
  14869   saved_cpu_features_.pop_back();
  14870 }
  14871 
  14872 void* Simulator::Mmap(
  14873     void* address, size_t length, int prot, int flags, int fd, off_t offset) {
  14874   // The underlying system `mmap` in the simulated environment doesn't recognize
  14875   // PROT_BTI and PROT_MTE. Although the kernel probably just ignores the bits
  14876   // it doesn't know, mask those protections out before calling is safer.
  14877   int intenal_prot = prot;
  14878   prot &= ~(PROT_BTI | PROT_MTE);
  14879 
  14880   uint64_t address2 = reinterpret_cast<uint64_t>(
  14881       mmap(address, length, prot, flags, fd, offset));
  14882 
  14883   if (intenal_prot & PROT_MTE) {
  14884     // The returning address of `mmap` isn't tagged.
  14885     int tag = static_cast<int>(GenerateRandomTag());
  14886     SetGranuleTag(address2, tag, length);
  14887     address2 = GetAddressWithAllocationTag(address2, tag);
  14888   }
  14889 
  14890   return reinterpret_cast<void*>(address2);
  14891 }
  14892 
  14893 
  14894 int Simulator::Munmap(void* address, size_t length, int prot) {
  14895   if (prot & PROT_MTE) {
  14896     // Untag the address since `munmap` doesn't recognize the memory tagging
  14897     // managed by the Simulator.
  14898     address = AddressUntag(address);
  14899     CleanGranuleTag(reinterpret_cast<char*>(address), length);
  14900   }
  14901 
  14902   return munmap(address, length);
  14903 }
  14904 
  14905 
  14906 }  // namespace aarch64
  14907 }  // namespace vixl
  14908 
  14909 #endif  // VIXL_INCLUDE_SIMULATOR_AARCH64