duckstation

duckstation, but archived from the revision just before upstream changed it to a proprietary software project, this version is the libre one
git clone https://git.neptards.moe/u3shit/duckstation.git
Log | Files | Refs | README | LICENSE

cpu-aarch64.cc (20911B)


      1 // Copyright 2015, VIXL authors
      2 // All rights reserved.
      3 //
      4 // Redistribution and use in source and binary forms, with or without
      5 // modification, are permitted provided that the following conditions are met:
      6 //
      7 //   * Redistributions of source code must retain the above copyright notice,
      8 //     this list of conditions and the following disclaimer.
      9 //   * Redistributions in binary form must reproduce the above copyright notice,
     10 //     this list of conditions and the following disclaimer in the documentation
     11 //     and/or other materials provided with the distribution.
     12 //   * Neither the name of ARM Limited nor the names of its contributors may be
     13 //     used to endorse or promote products derived from this software without
     14 //     specific prior written permission.
     15 //
     16 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND
     17 // ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
     18 // WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
     19 // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
     20 // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
     21 // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
     22 // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
     23 // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
     24 // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
     25 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
     26 
     27 #if defined(__aarch64__) && (defined(__ANDROID__) || defined(__linux__))
     28 #include <sys/auxv.h>
     29 #define VIXL_USE_LINUX_HWCAP 1
     30 #endif
     31 
     32 #include "../utils-vixl.h"
     33 
     34 #include "cpu-aarch64.h"
     35 
     36 namespace vixl {
     37 namespace aarch64 {
     38 
     39 
     40 const IDRegister::Field AA64PFR0::kFP(16, Field::kSigned);
     41 const IDRegister::Field AA64PFR0::kAdvSIMD(20, Field::kSigned);
     42 const IDRegister::Field AA64PFR0::kRAS(28);
     43 const IDRegister::Field AA64PFR0::kSVE(32);
     44 const IDRegister::Field AA64PFR0::kDIT(48);
     45 const IDRegister::Field AA64PFR0::kCSV2(56);
     46 const IDRegister::Field AA64PFR0::kCSV3(60);
     47 
     48 const IDRegister::Field AA64PFR1::kBT(0);
     49 const IDRegister::Field AA64PFR1::kSSBS(4);
     50 const IDRegister::Field AA64PFR1::kMTE(8);
     51 const IDRegister::Field AA64PFR1::kSME(24);
     52 
     53 const IDRegister::Field AA64ISAR0::kAES(4);
     54 const IDRegister::Field AA64ISAR0::kSHA1(8);
     55 const IDRegister::Field AA64ISAR0::kSHA2(12);
     56 const IDRegister::Field AA64ISAR0::kCRC32(16);
     57 const IDRegister::Field AA64ISAR0::kAtomic(20);
     58 const IDRegister::Field AA64ISAR0::kRDM(28);
     59 const IDRegister::Field AA64ISAR0::kSHA3(32);
     60 const IDRegister::Field AA64ISAR0::kSM3(36);
     61 const IDRegister::Field AA64ISAR0::kSM4(40);
     62 const IDRegister::Field AA64ISAR0::kDP(44);
     63 const IDRegister::Field AA64ISAR0::kFHM(48);
     64 const IDRegister::Field AA64ISAR0::kTS(52);
     65 const IDRegister::Field AA64ISAR0::kRNDR(60);
     66 
     67 const IDRegister::Field AA64ISAR1::kDPB(0);
     68 const IDRegister::Field AA64ISAR1::kAPA(4);
     69 const IDRegister::Field AA64ISAR1::kAPI(8);
     70 const IDRegister::Field AA64ISAR1::kJSCVT(12);
     71 const IDRegister::Field AA64ISAR1::kFCMA(16);
     72 const IDRegister::Field AA64ISAR1::kLRCPC(20);
     73 const IDRegister::Field AA64ISAR1::kGPA(24);
     74 const IDRegister::Field AA64ISAR1::kGPI(28);
     75 const IDRegister::Field AA64ISAR1::kFRINTTS(32);
     76 const IDRegister::Field AA64ISAR1::kSB(36);
     77 const IDRegister::Field AA64ISAR1::kSPECRES(40);
     78 const IDRegister::Field AA64ISAR1::kBF16(44);
     79 const IDRegister::Field AA64ISAR1::kDGH(48);
     80 const IDRegister::Field AA64ISAR1::kI8MM(52);
     81 
     82 const IDRegister::Field AA64ISAR2::kWFXT(0);
     83 const IDRegister::Field AA64ISAR2::kRPRES(4);
     84 const IDRegister::Field AA64ISAR2::kMOPS(16);
     85 const IDRegister::Field AA64ISAR2::kCSSC(52);
     86 
     87 const IDRegister::Field AA64MMFR0::kECV(60);
     88 
     89 const IDRegister::Field AA64MMFR1::kLO(16);
     90 const IDRegister::Field AA64MMFR1::kAFP(44);
     91 
     92 const IDRegister::Field AA64MMFR2::kAT(32);
     93 
     94 const IDRegister::Field AA64ZFR0::kSVEver(0);
     95 const IDRegister::Field AA64ZFR0::kAES(4);
     96 const IDRegister::Field AA64ZFR0::kBitPerm(16);
     97 const IDRegister::Field AA64ZFR0::kBF16(20);
     98 const IDRegister::Field AA64ZFR0::kSHA3(32);
     99 const IDRegister::Field AA64ZFR0::kSM4(40);
    100 const IDRegister::Field AA64ZFR0::kI8MM(44);
    101 const IDRegister::Field AA64ZFR0::kF32MM(52);
    102 const IDRegister::Field AA64ZFR0::kF64MM(56);
    103 
    104 const IDRegister::Field AA64SMFR0::kSMEf32f32(32, 1);
    105 const IDRegister::Field AA64SMFR0::kSMEb16f32(34, 1);
    106 const IDRegister::Field AA64SMFR0::kSMEf16f32(35, 1);
    107 const IDRegister::Field AA64SMFR0::kSMEi8i32(36);
    108 const IDRegister::Field AA64SMFR0::kSMEf64f64(48, 1);
    109 const IDRegister::Field AA64SMFR0::kSMEi16i64(52);
    110 const IDRegister::Field AA64SMFR0::kSMEfa64(63, 1);
    111 
    112 CPUFeatures AA64PFR0::GetCPUFeatures() const {
    113   CPUFeatures f;
    114   if (Get(kFP) >= 0) f.Combine(CPUFeatures::kFP);
    115   if (Get(kFP) >= 1) f.Combine(CPUFeatures::kFPHalf);
    116   if (Get(kAdvSIMD) >= 0) f.Combine(CPUFeatures::kNEON);
    117   if (Get(kAdvSIMD) >= 1) f.Combine(CPUFeatures::kNEONHalf);
    118   if (Get(kRAS) >= 1) f.Combine(CPUFeatures::kRAS);
    119   if (Get(kSVE) >= 1) f.Combine(CPUFeatures::kSVE);
    120   if (Get(kDIT) >= 1) f.Combine(CPUFeatures::kDIT);
    121   if (Get(kCSV2) >= 1) f.Combine(CPUFeatures::kCSV2);
    122   if (Get(kCSV2) >= 2) f.Combine(CPUFeatures::kSCXTNUM);
    123   if (Get(kCSV3) >= 1) f.Combine(CPUFeatures::kCSV3);
    124   return f;
    125 }
    126 
    127 CPUFeatures AA64PFR1::GetCPUFeatures() const {
    128   CPUFeatures f;
    129   if (Get(kBT) >= 1) f.Combine(CPUFeatures::kBTI);
    130   if (Get(kSSBS) >= 1) f.Combine(CPUFeatures::kSSBS);
    131   if (Get(kSSBS) >= 2) f.Combine(CPUFeatures::kSSBSControl);
    132   if (Get(kMTE) >= 1) f.Combine(CPUFeatures::kMTEInstructions);
    133   if (Get(kMTE) >= 2) f.Combine(CPUFeatures::kMTE);
    134   if (Get(kMTE) >= 3) f.Combine(CPUFeatures::kMTE3);
    135   if (Get(kSME) >= 1) f.Combine(CPUFeatures::kSME);
    136   return f;
    137 }
    138 
    139 CPUFeatures AA64ISAR0::GetCPUFeatures() const {
    140   CPUFeatures f;
    141   if (Get(kAES) >= 1) f.Combine(CPUFeatures::kAES);
    142   if (Get(kAES) >= 2) f.Combine(CPUFeatures::kPmull1Q);
    143   if (Get(kSHA1) >= 1) f.Combine(CPUFeatures::kSHA1);
    144   if (Get(kSHA2) >= 1) f.Combine(CPUFeatures::kSHA2);
    145   if (Get(kSHA2) >= 2) f.Combine(CPUFeatures::kSHA512);
    146   if (Get(kCRC32) >= 1) f.Combine(CPUFeatures::kCRC32);
    147   if (Get(kAtomic) >= 1) f.Combine(CPUFeatures::kAtomics);
    148   if (Get(kRDM) >= 1) f.Combine(CPUFeatures::kRDM);
    149   if (Get(kSHA3) >= 1) f.Combine(CPUFeatures::kSHA3);
    150   if (Get(kSM3) >= 1) f.Combine(CPUFeatures::kSM3);
    151   if (Get(kSM4) >= 1) f.Combine(CPUFeatures::kSM4);
    152   if (Get(kDP) >= 1) f.Combine(CPUFeatures::kDotProduct);
    153   if (Get(kFHM) >= 1) f.Combine(CPUFeatures::kFHM);
    154   if (Get(kTS) >= 1) f.Combine(CPUFeatures::kFlagM);
    155   if (Get(kTS) >= 2) f.Combine(CPUFeatures::kAXFlag);
    156   if (Get(kRNDR) >= 1) f.Combine(CPUFeatures::kRNG);
    157   return f;
    158 }
    159 
    160 CPUFeatures AA64ISAR1::GetCPUFeatures() const {
    161   CPUFeatures f;
    162   if (Get(kDPB) >= 1) f.Combine(CPUFeatures::kDCPoP);
    163   if (Get(kDPB) >= 2) f.Combine(CPUFeatures::kDCCVADP);
    164   if (Get(kJSCVT) >= 1) f.Combine(CPUFeatures::kJSCVT);
    165   if (Get(kFCMA) >= 1) f.Combine(CPUFeatures::kFcma);
    166   if (Get(kLRCPC) >= 1) f.Combine(CPUFeatures::kRCpc);
    167   if (Get(kLRCPC) >= 2) f.Combine(CPUFeatures::kRCpcImm);
    168   if (Get(kFRINTTS) >= 1) f.Combine(CPUFeatures::kFrintToFixedSizedInt);
    169   if (Get(kSB) >= 1) f.Combine(CPUFeatures::kSB);
    170   if (Get(kSPECRES) >= 1) f.Combine(CPUFeatures::kSPECRES);
    171   if (Get(kBF16) >= 1) f.Combine(CPUFeatures::kBF16);
    172   if (Get(kBF16) >= 2) f.Combine(CPUFeatures::kEBF16);
    173   if (Get(kDGH) >= 1) f.Combine(CPUFeatures::kDGH);
    174   if (Get(kI8MM) >= 1) f.Combine(CPUFeatures::kI8MM);
    175 
    176   // Only one of these fields should be non-zero, but they have the same
    177   // encodings, so merge the logic.
    178   int apx = std::max(Get(kAPI), Get(kAPA));
    179   if (apx >= 1) {
    180     f.Combine(CPUFeatures::kPAuth);
    181     // APA (rather than API) indicates QARMA.
    182     if (Get(kAPA) >= 1) f.Combine(CPUFeatures::kPAuthQARMA);
    183     if (apx == 0b0010) f.Combine(CPUFeatures::kPAuthEnhancedPAC);
    184     if (apx >= 0b0011) f.Combine(CPUFeatures::kPAuthEnhancedPAC2);
    185     if (apx >= 0b0100) f.Combine(CPUFeatures::kPAuthFPAC);
    186     if (apx >= 0b0101) f.Combine(CPUFeatures::kPAuthFPACCombined);
    187   }
    188 
    189   if (Get(kGPI) >= 1) f.Combine(CPUFeatures::kPAuthGeneric);
    190   if (Get(kGPA) >= 1) {
    191     f.Combine(CPUFeatures::kPAuthGeneric, CPUFeatures::kPAuthGenericQARMA);
    192   }
    193   return f;
    194 }
    195 
    196 CPUFeatures AA64ISAR2::GetCPUFeatures() const {
    197   CPUFeatures f;
    198   if (Get(kWFXT) >= 2) f.Combine(CPUFeatures::kWFXT);
    199   if (Get(kRPRES) >= 1) f.Combine(CPUFeatures::kRPRES);
    200   if (Get(kMOPS) >= 1) f.Combine(CPUFeatures::kMOPS);
    201   if (Get(kCSSC) >= 1) f.Combine(CPUFeatures::kCSSC);
    202   return f;
    203 }
    204 
    205 CPUFeatures AA64MMFR0::GetCPUFeatures() const {
    206   CPUFeatures f;
    207   if (Get(kECV) >= 1) f.Combine(CPUFeatures::kECV);
    208   return f;
    209 }
    210 
    211 CPUFeatures AA64MMFR1::GetCPUFeatures() const {
    212   CPUFeatures f;
    213   if (Get(kLO) >= 1) f.Combine(CPUFeatures::kLORegions);
    214   if (Get(kAFP) >= 1) f.Combine(CPUFeatures::kAFP);
    215   return f;
    216 }
    217 
    218 CPUFeatures AA64MMFR2::GetCPUFeatures() const {
    219   CPUFeatures f;
    220   if (Get(kAT) >= 1) f.Combine(CPUFeatures::kUSCAT);
    221   return f;
    222 }
    223 
    224 CPUFeatures AA64ZFR0::GetCPUFeatures() const {
    225   // This register is only available with SVE, but reads-as-zero in its absence,
    226   // so it's always safe to read it.
    227   CPUFeatures f;
    228   if (Get(kF64MM) >= 1) f.Combine(CPUFeatures::kSVEF64MM);
    229   if (Get(kF32MM) >= 1) f.Combine(CPUFeatures::kSVEF32MM);
    230   if (Get(kI8MM) >= 1) f.Combine(CPUFeatures::kSVEI8MM);
    231   if (Get(kSM4) >= 1) f.Combine(CPUFeatures::kSVESM4);
    232   if (Get(kSHA3) >= 1) f.Combine(CPUFeatures::kSVESHA3);
    233   if (Get(kBF16) >= 1) f.Combine(CPUFeatures::kSVEBF16);
    234   if (Get(kBF16) >= 2) f.Combine(CPUFeatures::kSVE_EBF16);
    235   if (Get(kBitPerm) >= 1) f.Combine(CPUFeatures::kSVEBitPerm);
    236   if (Get(kAES) >= 1) f.Combine(CPUFeatures::kSVEAES);
    237   if (Get(kAES) >= 2) f.Combine(CPUFeatures::kSVEPmull128);
    238   if (Get(kSVEver) >= 1) f.Combine(CPUFeatures::kSVE2);
    239   return f;
    240 }
    241 
    242 CPUFeatures AA64SMFR0::GetCPUFeatures() const {
    243   CPUFeatures f;
    244   if (Get(kSMEf32f32) >= 1) f.Combine(CPUFeatures::kSMEf32f32);
    245   if (Get(kSMEb16f32) >= 1) f.Combine(CPUFeatures::kSMEb16f32);
    246   if (Get(kSMEf16f32) >= 1) f.Combine(CPUFeatures::kSMEf16f32);
    247   if (Get(kSMEi8i32) >= 15) f.Combine(CPUFeatures::kSMEi8i32);
    248   if (Get(kSMEf64f64) >= 1) f.Combine(CPUFeatures::kSMEf64f64);
    249   if (Get(kSMEi16i64) >= 15) f.Combine(CPUFeatures::kSMEi16i64);
    250   if (Get(kSMEfa64) >= 1) f.Combine(CPUFeatures::kSMEfa64);
    251   return f;
    252 }
    253 
    254 int IDRegister::Get(IDRegister::Field field) const {
    255   int msb = field.GetMsb();
    256   int lsb = field.GetLsb();
    257   VIXL_STATIC_ASSERT(static_cast<size_t>(Field::kMaxWidthInBits) <
    258                      (sizeof(int) * kBitsPerByte));
    259   switch (field.GetType()) {
    260     case Field::kSigned:
    261       return static_cast<int>(ExtractSignedBitfield64(msb, lsb, value_));
    262     case Field::kUnsigned:
    263       return static_cast<int>(ExtractUnsignedBitfield64(msb, lsb, value_));
    264   }
    265   VIXL_UNREACHABLE();
    266   return 0;
    267 }
    268 
    269 CPUFeatures CPU::InferCPUFeaturesFromIDRegisters() {
    270   CPUFeatures f;
    271 #define VIXL_COMBINE_ID_REG(NAME, MRS_ARG) \
    272   f.Combine(Read##NAME().GetCPUFeatures());
    273   VIXL_AARCH64_ID_REG_LIST(VIXL_COMBINE_ID_REG)
    274 #undef VIXL_COMBINE_ID_REG
    275   return f;
    276 }
    277 
    278 CPUFeatures CPU::InferCPUFeaturesFromOS(
    279     CPUFeatures::QueryIDRegistersOption option) {
    280   CPUFeatures features;
    281 
    282 #ifdef VIXL_USE_LINUX_HWCAP
    283   // Map each set bit onto a feature. Ideally, we'd use HWCAP_* macros rather
    284   // than explicit bits, but explicit bits allow us to identify features that
    285   // the toolchain doesn't know about.
    286   static const CPUFeatures::Feature kFeatureBitsLow[] =
    287       {// Bits 0-7
    288        CPUFeatures::kFP,
    289        CPUFeatures::kNEON,
    290        CPUFeatures::kNone,  // "EVTSTRM", which VIXL doesn't track.
    291        CPUFeatures::kAES,
    292        CPUFeatures::kPmull1Q,
    293        CPUFeatures::kSHA1,
    294        CPUFeatures::kSHA2,
    295        CPUFeatures::kCRC32,
    296        // Bits 8-15
    297        CPUFeatures::kAtomics,
    298        CPUFeatures::kFPHalf,
    299        CPUFeatures::kNEONHalf,
    300        CPUFeatures::kIDRegisterEmulation,
    301        CPUFeatures::kRDM,
    302        CPUFeatures::kJSCVT,
    303        CPUFeatures::kFcma,
    304        CPUFeatures::kRCpc,
    305        // Bits 16-23
    306        CPUFeatures::kDCPoP,
    307        CPUFeatures::kSHA3,
    308        CPUFeatures::kSM3,
    309        CPUFeatures::kSM4,
    310        CPUFeatures::kDotProduct,
    311        CPUFeatures::kSHA512,
    312        CPUFeatures::kSVE,
    313        CPUFeatures::kFHM,
    314        // Bits 24-31
    315        CPUFeatures::kDIT,
    316        CPUFeatures::kUSCAT,
    317        CPUFeatures::kRCpcImm,
    318        CPUFeatures::kFlagM,
    319        CPUFeatures::kSSBSControl,
    320        CPUFeatures::kSB,
    321        CPUFeatures::kPAuth,
    322        CPUFeatures::kPAuthGeneric};
    323   VIXL_STATIC_ASSERT(ArrayLength(kFeatureBitsLow) < 64);
    324 
    325   static const CPUFeatures::Feature kFeatureBitsHigh[] =
    326       {// Bits 0-7
    327        CPUFeatures::kDCCVADP,
    328        CPUFeatures::kSVE2,
    329        CPUFeatures::kSVEAES,
    330        CPUFeatures::kSVEPmull128,
    331        CPUFeatures::kSVEBitPerm,
    332        CPUFeatures::kSVESHA3,
    333        CPUFeatures::kSVESM4,
    334        CPUFeatures::kAXFlag,
    335        // Bits 8-15
    336        CPUFeatures::kFrintToFixedSizedInt,
    337        CPUFeatures::kSVEI8MM,
    338        CPUFeatures::kSVEF32MM,
    339        CPUFeatures::kSVEF64MM,
    340        CPUFeatures::kSVEBF16,
    341        CPUFeatures::kI8MM,
    342        CPUFeatures::kBF16,
    343        CPUFeatures::kDGH,
    344        // Bits 16-23
    345        CPUFeatures::kRNG,
    346        CPUFeatures::kBTI,
    347        CPUFeatures::kMTE,
    348        CPUFeatures::kECV,
    349        CPUFeatures::kAFP,
    350        CPUFeatures::kRPRES,
    351        CPUFeatures::kMTE3,
    352        CPUFeatures::kSME,
    353        // Bits 24-31
    354        CPUFeatures::kSMEi16i64,
    355        CPUFeatures::kSMEf64f64,
    356        CPUFeatures::kSMEi8i32,
    357        CPUFeatures::kSMEf16f32,
    358        CPUFeatures::kSMEb16f32,
    359        CPUFeatures::kSMEf32f32,
    360        CPUFeatures::kSMEfa64,
    361        CPUFeatures::kWFXT,
    362        // Bits 32-39
    363        CPUFeatures::kEBF16,
    364        CPUFeatures::kSVE_EBF16};
    365   VIXL_STATIC_ASSERT(ArrayLength(kFeatureBitsHigh) < 64);
    366 
    367   auto combine_features = [&features](uint64_t hwcap,
    368                                       const CPUFeatures::Feature* feature_array,
    369                                       size_t features_size) {
    370     for (size_t i = 0; i < features_size; i++) {
    371       if (hwcap & (UINT64_C(1) << i)) features.Combine(feature_array[i]);
    372     }
    373   };
    374 
    375   uint64_t hwcap_low = getauxval(AT_HWCAP);
    376   uint64_t hwcap_high = getauxval(AT_HWCAP2);
    377 
    378   combine_features(hwcap_low, kFeatureBitsLow, ArrayLength(kFeatureBitsLow));
    379   combine_features(hwcap_high, kFeatureBitsHigh, ArrayLength(kFeatureBitsHigh));
    380 
    381   // MTE support from HWCAP2 signifies FEAT_MTE1 and FEAT_MTE2 support
    382   if (features.Has(CPUFeatures::kMTE)) {
    383     features.Combine(CPUFeatures::kMTEInstructions);
    384   }
    385 #endif  // VIXL_USE_LINUX_HWCAP
    386 
    387   if ((option == CPUFeatures::kQueryIDRegistersIfAvailable) &&
    388       (features.Has(CPUFeatures::kIDRegisterEmulation))) {
    389     features.Combine(InferCPUFeaturesFromIDRegisters());
    390   }
    391   return features;
    392 }
    393 
    394 
    395 #ifdef __aarch64__
    396 #define VIXL_READ_ID_REG(NAME, MRS_ARG)        \
    397   NAME CPU::Read##NAME() {                     \
    398     uint64_t value = 0;                        \
    399     __asm__("mrs %0, " MRS_ARG : "=r"(value)); \
    400     return NAME(value);                        \
    401   }
    402 #else  // __aarch64__
    403 #define VIXL_READ_ID_REG(NAME, MRS_ARG) \
    404   NAME CPU::Read##NAME() {              \
    405     VIXL_UNREACHABLE();                 \
    406     return NAME(0);                     \
    407   }
    408 #endif  // __aarch64__
    409 
    410 VIXL_AARCH64_ID_REG_LIST(VIXL_READ_ID_REG)
    411 
    412 #undef VIXL_READ_ID_REG
    413 
    414 
    415 // Initialise to smallest possible cache size.
    416 unsigned CPU::dcache_line_size_ = 1;
    417 unsigned CPU::icache_line_size_ = 1;
    418 
    419 
    420 // Currently computes I and D cache line size.
    421 void CPU::SetUp() {
    422   uint32_t cache_type_register = GetCacheType();
    423 
    424   // The cache type register holds information about the caches, including I
    425   // D caches line size.
    426   static const int kDCacheLineSizeShift = 16;
    427   static const int kICacheLineSizeShift = 0;
    428   static const uint32_t kDCacheLineSizeMask = 0xf << kDCacheLineSizeShift;
    429   static const uint32_t kICacheLineSizeMask = 0xf << kICacheLineSizeShift;
    430 
    431   // The cache type register holds the size of the I and D caches in words as
    432   // a power of two.
    433   uint32_t dcache_line_size_power_of_two =
    434       (cache_type_register & kDCacheLineSizeMask) >> kDCacheLineSizeShift;
    435   uint32_t icache_line_size_power_of_two =
    436       (cache_type_register & kICacheLineSizeMask) >> kICacheLineSizeShift;
    437 
    438   dcache_line_size_ = 4 << dcache_line_size_power_of_two;
    439   icache_line_size_ = 4 << icache_line_size_power_of_two;
    440 }
    441 
    442 
    443 uint32_t CPU::GetCacheType() {
    444 #ifdef __aarch64__
    445   uint64_t cache_type_register;
    446   // Copy the content of the cache type register to a core register.
    447   __asm__ __volatile__("mrs %[ctr], ctr_el0"  // NOLINT(runtime/references)
    448                        : [ctr] "=r"(cache_type_register));
    449   VIXL_ASSERT(IsUint32(cache_type_register));
    450   return static_cast<uint32_t>(cache_type_register);
    451 #else
    452   // This will lead to a cache with 1 byte long lines, which is fine since
    453   // neither EnsureIAndDCacheCoherency nor the simulator will need this
    454   // information.
    455   return 0;
    456 #endif
    457 }
    458 
    459 
    460 // Query the SVE vector length. This requires CPUFeatures::kSVE.
    461 int CPU::ReadSVEVectorLengthInBits() {
    462 #ifdef __aarch64__
    463   uint64_t vl;
    464   // To support compilers that don't understand `rdvl`, encode the value
    465   // directly and move it manually.
    466   __asm__(
    467       "   .word 0x04bf5100\n"  // rdvl x0, #8
    468       "   mov %[vl], x0\n"
    469       : [vl] "=r"(vl)
    470       :
    471       : "x0");
    472   VIXL_ASSERT(vl <= INT_MAX);
    473   return static_cast<int>(vl);
    474 #else
    475   VIXL_UNREACHABLE();
    476   return 0;
    477 #endif
    478 }
    479 
    480 
    481 void CPU::EnsureIAndDCacheCoherency(void* address, size_t length) {
    482 #ifdef __aarch64__
    483   // Implement the cache synchronisation for all targets where AArch64 is the
    484   // host, even if we're building the simulator for an AAarch64 host. This
    485   // allows for cases where the user wants to simulate code as well as run it
    486   // natively.
    487 
    488   if (length == 0) {
    489     return;
    490   }
    491 
    492   // The code below assumes user space cache operations are allowed.
    493 
    494   // Work out the line sizes for each cache, and use them to determine the
    495   // start addresses.
    496   uintptr_t start = reinterpret_cast<uintptr_t>(address);
    497   uintptr_t dsize = static_cast<uintptr_t>(dcache_line_size_);
    498   uintptr_t isize = static_cast<uintptr_t>(icache_line_size_);
    499   uintptr_t dline = start & ~(dsize - 1);
    500   uintptr_t iline = start & ~(isize - 1);
    501 
    502   // Cache line sizes are always a power of 2.
    503   VIXL_ASSERT(IsPowerOf2(dsize));
    504   VIXL_ASSERT(IsPowerOf2(isize));
    505   uintptr_t end = start + length;
    506 
    507   do {
    508     __asm__ __volatile__(
    509         // Clean each line of the D cache containing the target data.
    510         //
    511         // dc       : Data Cache maintenance
    512         //     c    : Clean
    513         //      va  : by (Virtual) Address
    514         //        u : to the point of Unification
    515         // The point of unification for a processor is the point by which the
    516         // instruction and data caches are guaranteed to see the same copy of a
    517         // memory location. See ARM DDI 0406B page B2-12 for more information.
    518         "   dc    cvau, %[dline]\n"
    519         :
    520         : [dline] "r"(dline)
    521         // This code does not write to memory, but the "memory" dependency
    522         // prevents GCC from reordering the code.
    523         : "memory");
    524     dline += dsize;
    525   } while (dline < end);
    526 
    527   __asm__ __volatile__(
    528       // Make sure that the data cache operations (above) complete before the
    529       // instruction cache operations (below).
    530       //
    531       // dsb      : Data Synchronisation Barrier
    532       //      ish : Inner SHareable domain
    533       //
    534       // The point of unification for an Inner Shareable shareability domain is
    535       // the point by which the instruction and data caches of all the
    536       // processors
    537       // in that Inner Shareable shareability domain are guaranteed to see the
    538       // same copy of a memory location. See ARM DDI 0406B page B2-12 for more
    539       // information.
    540       "   dsb   ish\n"
    541       :
    542       :
    543       : "memory");
    544 
    545   do {
    546     __asm__ __volatile__(
    547         // Invalidate each line of the I cache containing the target data.
    548         //
    549         // ic      : Instruction Cache maintenance
    550         //    i    : Invalidate
    551         //     va  : by Address
    552         //       u : to the point of Unification
    553         "   ic   ivau, %[iline]\n"
    554         :
    555         : [iline] "r"(iline)
    556         : "memory");
    557     iline += isize;
    558   } while (iline < end);
    559 
    560   __asm__ __volatile__(
    561       // Make sure that the instruction cache operations (above) take effect
    562       // before the isb (below).
    563       "   dsb  ish\n"
    564 
    565       // Ensure that any instructions already in the pipeline are discarded and
    566       // reloaded from the new data.
    567       // isb : Instruction Synchronisation Barrier
    568       "   isb\n"
    569       :
    570       :
    571       : "memory");
    572 #else
    573   // If the host isn't AArch64, we must be using the simulator, so this function
    574   // doesn't have to do anything.
    575   USE(address, length);
    576 #endif
    577 }
    578 
    579 
    580 }  // namespace aarch64
    581 }  // namespace vixl