cpu-aarch64.cc (20911B)
1 // Copyright 2015, VIXL authors 2 // All rights reserved. 3 // 4 // Redistribution and use in source and binary forms, with or without 5 // modification, are permitted provided that the following conditions are met: 6 // 7 // * Redistributions of source code must retain the above copyright notice, 8 // this list of conditions and the following disclaimer. 9 // * Redistributions in binary form must reproduce the above copyright notice, 10 // this list of conditions and the following disclaimer in the documentation 11 // and/or other materials provided with the distribution. 12 // * Neither the name of ARM Limited nor the names of its contributors may be 13 // used to endorse or promote products derived from this software without 14 // specific prior written permission. 15 // 16 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND 17 // ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 18 // WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 19 // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE 20 // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21 // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 22 // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 23 // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 24 // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 25 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 26 27 #if defined(__aarch64__) && (defined(__ANDROID__) || defined(__linux__)) 28 #include <sys/auxv.h> 29 #define VIXL_USE_LINUX_HWCAP 1 30 #endif 31 32 #include "../utils-vixl.h" 33 34 #include "cpu-aarch64.h" 35 36 namespace vixl { 37 namespace aarch64 { 38 39 40 const IDRegister::Field AA64PFR0::kFP(16, Field::kSigned); 41 const IDRegister::Field AA64PFR0::kAdvSIMD(20, Field::kSigned); 42 const IDRegister::Field AA64PFR0::kRAS(28); 43 const IDRegister::Field AA64PFR0::kSVE(32); 44 const IDRegister::Field AA64PFR0::kDIT(48); 45 const IDRegister::Field AA64PFR0::kCSV2(56); 46 const IDRegister::Field AA64PFR0::kCSV3(60); 47 48 const IDRegister::Field AA64PFR1::kBT(0); 49 const IDRegister::Field AA64PFR1::kSSBS(4); 50 const IDRegister::Field AA64PFR1::kMTE(8); 51 const IDRegister::Field AA64PFR1::kSME(24); 52 53 const IDRegister::Field AA64ISAR0::kAES(4); 54 const IDRegister::Field AA64ISAR0::kSHA1(8); 55 const IDRegister::Field AA64ISAR0::kSHA2(12); 56 const IDRegister::Field AA64ISAR0::kCRC32(16); 57 const IDRegister::Field AA64ISAR0::kAtomic(20); 58 const IDRegister::Field AA64ISAR0::kRDM(28); 59 const IDRegister::Field AA64ISAR0::kSHA3(32); 60 const IDRegister::Field AA64ISAR0::kSM3(36); 61 const IDRegister::Field AA64ISAR0::kSM4(40); 62 const IDRegister::Field AA64ISAR0::kDP(44); 63 const IDRegister::Field AA64ISAR0::kFHM(48); 64 const IDRegister::Field AA64ISAR0::kTS(52); 65 const IDRegister::Field AA64ISAR0::kRNDR(60); 66 67 const IDRegister::Field AA64ISAR1::kDPB(0); 68 const IDRegister::Field AA64ISAR1::kAPA(4); 69 const IDRegister::Field AA64ISAR1::kAPI(8); 70 const IDRegister::Field AA64ISAR1::kJSCVT(12); 71 const IDRegister::Field AA64ISAR1::kFCMA(16); 72 const IDRegister::Field AA64ISAR1::kLRCPC(20); 73 const IDRegister::Field AA64ISAR1::kGPA(24); 74 const IDRegister::Field AA64ISAR1::kGPI(28); 75 const IDRegister::Field AA64ISAR1::kFRINTTS(32); 76 const IDRegister::Field AA64ISAR1::kSB(36); 77 const IDRegister::Field AA64ISAR1::kSPECRES(40); 78 const IDRegister::Field AA64ISAR1::kBF16(44); 79 const IDRegister::Field AA64ISAR1::kDGH(48); 80 const IDRegister::Field AA64ISAR1::kI8MM(52); 81 82 const IDRegister::Field AA64ISAR2::kWFXT(0); 83 const IDRegister::Field AA64ISAR2::kRPRES(4); 84 const IDRegister::Field AA64ISAR2::kMOPS(16); 85 const IDRegister::Field AA64ISAR2::kCSSC(52); 86 87 const IDRegister::Field AA64MMFR0::kECV(60); 88 89 const IDRegister::Field AA64MMFR1::kLO(16); 90 const IDRegister::Field AA64MMFR1::kAFP(44); 91 92 const IDRegister::Field AA64MMFR2::kAT(32); 93 94 const IDRegister::Field AA64ZFR0::kSVEver(0); 95 const IDRegister::Field AA64ZFR0::kAES(4); 96 const IDRegister::Field AA64ZFR0::kBitPerm(16); 97 const IDRegister::Field AA64ZFR0::kBF16(20); 98 const IDRegister::Field AA64ZFR0::kSHA3(32); 99 const IDRegister::Field AA64ZFR0::kSM4(40); 100 const IDRegister::Field AA64ZFR0::kI8MM(44); 101 const IDRegister::Field AA64ZFR0::kF32MM(52); 102 const IDRegister::Field AA64ZFR0::kF64MM(56); 103 104 const IDRegister::Field AA64SMFR0::kSMEf32f32(32, 1); 105 const IDRegister::Field AA64SMFR0::kSMEb16f32(34, 1); 106 const IDRegister::Field AA64SMFR0::kSMEf16f32(35, 1); 107 const IDRegister::Field AA64SMFR0::kSMEi8i32(36); 108 const IDRegister::Field AA64SMFR0::kSMEf64f64(48, 1); 109 const IDRegister::Field AA64SMFR0::kSMEi16i64(52); 110 const IDRegister::Field AA64SMFR0::kSMEfa64(63, 1); 111 112 CPUFeatures AA64PFR0::GetCPUFeatures() const { 113 CPUFeatures f; 114 if (Get(kFP) >= 0) f.Combine(CPUFeatures::kFP); 115 if (Get(kFP) >= 1) f.Combine(CPUFeatures::kFPHalf); 116 if (Get(kAdvSIMD) >= 0) f.Combine(CPUFeatures::kNEON); 117 if (Get(kAdvSIMD) >= 1) f.Combine(CPUFeatures::kNEONHalf); 118 if (Get(kRAS) >= 1) f.Combine(CPUFeatures::kRAS); 119 if (Get(kSVE) >= 1) f.Combine(CPUFeatures::kSVE); 120 if (Get(kDIT) >= 1) f.Combine(CPUFeatures::kDIT); 121 if (Get(kCSV2) >= 1) f.Combine(CPUFeatures::kCSV2); 122 if (Get(kCSV2) >= 2) f.Combine(CPUFeatures::kSCXTNUM); 123 if (Get(kCSV3) >= 1) f.Combine(CPUFeatures::kCSV3); 124 return f; 125 } 126 127 CPUFeatures AA64PFR1::GetCPUFeatures() const { 128 CPUFeatures f; 129 if (Get(kBT) >= 1) f.Combine(CPUFeatures::kBTI); 130 if (Get(kSSBS) >= 1) f.Combine(CPUFeatures::kSSBS); 131 if (Get(kSSBS) >= 2) f.Combine(CPUFeatures::kSSBSControl); 132 if (Get(kMTE) >= 1) f.Combine(CPUFeatures::kMTEInstructions); 133 if (Get(kMTE) >= 2) f.Combine(CPUFeatures::kMTE); 134 if (Get(kMTE) >= 3) f.Combine(CPUFeatures::kMTE3); 135 if (Get(kSME) >= 1) f.Combine(CPUFeatures::kSME); 136 return f; 137 } 138 139 CPUFeatures AA64ISAR0::GetCPUFeatures() const { 140 CPUFeatures f; 141 if (Get(kAES) >= 1) f.Combine(CPUFeatures::kAES); 142 if (Get(kAES) >= 2) f.Combine(CPUFeatures::kPmull1Q); 143 if (Get(kSHA1) >= 1) f.Combine(CPUFeatures::kSHA1); 144 if (Get(kSHA2) >= 1) f.Combine(CPUFeatures::kSHA2); 145 if (Get(kSHA2) >= 2) f.Combine(CPUFeatures::kSHA512); 146 if (Get(kCRC32) >= 1) f.Combine(CPUFeatures::kCRC32); 147 if (Get(kAtomic) >= 1) f.Combine(CPUFeatures::kAtomics); 148 if (Get(kRDM) >= 1) f.Combine(CPUFeatures::kRDM); 149 if (Get(kSHA3) >= 1) f.Combine(CPUFeatures::kSHA3); 150 if (Get(kSM3) >= 1) f.Combine(CPUFeatures::kSM3); 151 if (Get(kSM4) >= 1) f.Combine(CPUFeatures::kSM4); 152 if (Get(kDP) >= 1) f.Combine(CPUFeatures::kDotProduct); 153 if (Get(kFHM) >= 1) f.Combine(CPUFeatures::kFHM); 154 if (Get(kTS) >= 1) f.Combine(CPUFeatures::kFlagM); 155 if (Get(kTS) >= 2) f.Combine(CPUFeatures::kAXFlag); 156 if (Get(kRNDR) >= 1) f.Combine(CPUFeatures::kRNG); 157 return f; 158 } 159 160 CPUFeatures AA64ISAR1::GetCPUFeatures() const { 161 CPUFeatures f; 162 if (Get(kDPB) >= 1) f.Combine(CPUFeatures::kDCPoP); 163 if (Get(kDPB) >= 2) f.Combine(CPUFeatures::kDCCVADP); 164 if (Get(kJSCVT) >= 1) f.Combine(CPUFeatures::kJSCVT); 165 if (Get(kFCMA) >= 1) f.Combine(CPUFeatures::kFcma); 166 if (Get(kLRCPC) >= 1) f.Combine(CPUFeatures::kRCpc); 167 if (Get(kLRCPC) >= 2) f.Combine(CPUFeatures::kRCpcImm); 168 if (Get(kFRINTTS) >= 1) f.Combine(CPUFeatures::kFrintToFixedSizedInt); 169 if (Get(kSB) >= 1) f.Combine(CPUFeatures::kSB); 170 if (Get(kSPECRES) >= 1) f.Combine(CPUFeatures::kSPECRES); 171 if (Get(kBF16) >= 1) f.Combine(CPUFeatures::kBF16); 172 if (Get(kBF16) >= 2) f.Combine(CPUFeatures::kEBF16); 173 if (Get(kDGH) >= 1) f.Combine(CPUFeatures::kDGH); 174 if (Get(kI8MM) >= 1) f.Combine(CPUFeatures::kI8MM); 175 176 // Only one of these fields should be non-zero, but they have the same 177 // encodings, so merge the logic. 178 int apx = std::max(Get(kAPI), Get(kAPA)); 179 if (apx >= 1) { 180 f.Combine(CPUFeatures::kPAuth); 181 // APA (rather than API) indicates QARMA. 182 if (Get(kAPA) >= 1) f.Combine(CPUFeatures::kPAuthQARMA); 183 if (apx == 0b0010) f.Combine(CPUFeatures::kPAuthEnhancedPAC); 184 if (apx >= 0b0011) f.Combine(CPUFeatures::kPAuthEnhancedPAC2); 185 if (apx >= 0b0100) f.Combine(CPUFeatures::kPAuthFPAC); 186 if (apx >= 0b0101) f.Combine(CPUFeatures::kPAuthFPACCombined); 187 } 188 189 if (Get(kGPI) >= 1) f.Combine(CPUFeatures::kPAuthGeneric); 190 if (Get(kGPA) >= 1) { 191 f.Combine(CPUFeatures::kPAuthGeneric, CPUFeatures::kPAuthGenericQARMA); 192 } 193 return f; 194 } 195 196 CPUFeatures AA64ISAR2::GetCPUFeatures() const { 197 CPUFeatures f; 198 if (Get(kWFXT) >= 2) f.Combine(CPUFeatures::kWFXT); 199 if (Get(kRPRES) >= 1) f.Combine(CPUFeatures::kRPRES); 200 if (Get(kMOPS) >= 1) f.Combine(CPUFeatures::kMOPS); 201 if (Get(kCSSC) >= 1) f.Combine(CPUFeatures::kCSSC); 202 return f; 203 } 204 205 CPUFeatures AA64MMFR0::GetCPUFeatures() const { 206 CPUFeatures f; 207 if (Get(kECV) >= 1) f.Combine(CPUFeatures::kECV); 208 return f; 209 } 210 211 CPUFeatures AA64MMFR1::GetCPUFeatures() const { 212 CPUFeatures f; 213 if (Get(kLO) >= 1) f.Combine(CPUFeatures::kLORegions); 214 if (Get(kAFP) >= 1) f.Combine(CPUFeatures::kAFP); 215 return f; 216 } 217 218 CPUFeatures AA64MMFR2::GetCPUFeatures() const { 219 CPUFeatures f; 220 if (Get(kAT) >= 1) f.Combine(CPUFeatures::kUSCAT); 221 return f; 222 } 223 224 CPUFeatures AA64ZFR0::GetCPUFeatures() const { 225 // This register is only available with SVE, but reads-as-zero in its absence, 226 // so it's always safe to read it. 227 CPUFeatures f; 228 if (Get(kF64MM) >= 1) f.Combine(CPUFeatures::kSVEF64MM); 229 if (Get(kF32MM) >= 1) f.Combine(CPUFeatures::kSVEF32MM); 230 if (Get(kI8MM) >= 1) f.Combine(CPUFeatures::kSVEI8MM); 231 if (Get(kSM4) >= 1) f.Combine(CPUFeatures::kSVESM4); 232 if (Get(kSHA3) >= 1) f.Combine(CPUFeatures::kSVESHA3); 233 if (Get(kBF16) >= 1) f.Combine(CPUFeatures::kSVEBF16); 234 if (Get(kBF16) >= 2) f.Combine(CPUFeatures::kSVE_EBF16); 235 if (Get(kBitPerm) >= 1) f.Combine(CPUFeatures::kSVEBitPerm); 236 if (Get(kAES) >= 1) f.Combine(CPUFeatures::kSVEAES); 237 if (Get(kAES) >= 2) f.Combine(CPUFeatures::kSVEPmull128); 238 if (Get(kSVEver) >= 1) f.Combine(CPUFeatures::kSVE2); 239 return f; 240 } 241 242 CPUFeatures AA64SMFR0::GetCPUFeatures() const { 243 CPUFeatures f; 244 if (Get(kSMEf32f32) >= 1) f.Combine(CPUFeatures::kSMEf32f32); 245 if (Get(kSMEb16f32) >= 1) f.Combine(CPUFeatures::kSMEb16f32); 246 if (Get(kSMEf16f32) >= 1) f.Combine(CPUFeatures::kSMEf16f32); 247 if (Get(kSMEi8i32) >= 15) f.Combine(CPUFeatures::kSMEi8i32); 248 if (Get(kSMEf64f64) >= 1) f.Combine(CPUFeatures::kSMEf64f64); 249 if (Get(kSMEi16i64) >= 15) f.Combine(CPUFeatures::kSMEi16i64); 250 if (Get(kSMEfa64) >= 1) f.Combine(CPUFeatures::kSMEfa64); 251 return f; 252 } 253 254 int IDRegister::Get(IDRegister::Field field) const { 255 int msb = field.GetMsb(); 256 int lsb = field.GetLsb(); 257 VIXL_STATIC_ASSERT(static_cast<size_t>(Field::kMaxWidthInBits) < 258 (sizeof(int) * kBitsPerByte)); 259 switch (field.GetType()) { 260 case Field::kSigned: 261 return static_cast<int>(ExtractSignedBitfield64(msb, lsb, value_)); 262 case Field::kUnsigned: 263 return static_cast<int>(ExtractUnsignedBitfield64(msb, lsb, value_)); 264 } 265 VIXL_UNREACHABLE(); 266 return 0; 267 } 268 269 CPUFeatures CPU::InferCPUFeaturesFromIDRegisters() { 270 CPUFeatures f; 271 #define VIXL_COMBINE_ID_REG(NAME, MRS_ARG) \ 272 f.Combine(Read##NAME().GetCPUFeatures()); 273 VIXL_AARCH64_ID_REG_LIST(VIXL_COMBINE_ID_REG) 274 #undef VIXL_COMBINE_ID_REG 275 return f; 276 } 277 278 CPUFeatures CPU::InferCPUFeaturesFromOS( 279 CPUFeatures::QueryIDRegistersOption option) { 280 CPUFeatures features; 281 282 #ifdef VIXL_USE_LINUX_HWCAP 283 // Map each set bit onto a feature. Ideally, we'd use HWCAP_* macros rather 284 // than explicit bits, but explicit bits allow us to identify features that 285 // the toolchain doesn't know about. 286 static const CPUFeatures::Feature kFeatureBitsLow[] = 287 {// Bits 0-7 288 CPUFeatures::kFP, 289 CPUFeatures::kNEON, 290 CPUFeatures::kNone, // "EVTSTRM", which VIXL doesn't track. 291 CPUFeatures::kAES, 292 CPUFeatures::kPmull1Q, 293 CPUFeatures::kSHA1, 294 CPUFeatures::kSHA2, 295 CPUFeatures::kCRC32, 296 // Bits 8-15 297 CPUFeatures::kAtomics, 298 CPUFeatures::kFPHalf, 299 CPUFeatures::kNEONHalf, 300 CPUFeatures::kIDRegisterEmulation, 301 CPUFeatures::kRDM, 302 CPUFeatures::kJSCVT, 303 CPUFeatures::kFcma, 304 CPUFeatures::kRCpc, 305 // Bits 16-23 306 CPUFeatures::kDCPoP, 307 CPUFeatures::kSHA3, 308 CPUFeatures::kSM3, 309 CPUFeatures::kSM4, 310 CPUFeatures::kDotProduct, 311 CPUFeatures::kSHA512, 312 CPUFeatures::kSVE, 313 CPUFeatures::kFHM, 314 // Bits 24-31 315 CPUFeatures::kDIT, 316 CPUFeatures::kUSCAT, 317 CPUFeatures::kRCpcImm, 318 CPUFeatures::kFlagM, 319 CPUFeatures::kSSBSControl, 320 CPUFeatures::kSB, 321 CPUFeatures::kPAuth, 322 CPUFeatures::kPAuthGeneric}; 323 VIXL_STATIC_ASSERT(ArrayLength(kFeatureBitsLow) < 64); 324 325 static const CPUFeatures::Feature kFeatureBitsHigh[] = 326 {// Bits 0-7 327 CPUFeatures::kDCCVADP, 328 CPUFeatures::kSVE2, 329 CPUFeatures::kSVEAES, 330 CPUFeatures::kSVEPmull128, 331 CPUFeatures::kSVEBitPerm, 332 CPUFeatures::kSVESHA3, 333 CPUFeatures::kSVESM4, 334 CPUFeatures::kAXFlag, 335 // Bits 8-15 336 CPUFeatures::kFrintToFixedSizedInt, 337 CPUFeatures::kSVEI8MM, 338 CPUFeatures::kSVEF32MM, 339 CPUFeatures::kSVEF64MM, 340 CPUFeatures::kSVEBF16, 341 CPUFeatures::kI8MM, 342 CPUFeatures::kBF16, 343 CPUFeatures::kDGH, 344 // Bits 16-23 345 CPUFeatures::kRNG, 346 CPUFeatures::kBTI, 347 CPUFeatures::kMTE, 348 CPUFeatures::kECV, 349 CPUFeatures::kAFP, 350 CPUFeatures::kRPRES, 351 CPUFeatures::kMTE3, 352 CPUFeatures::kSME, 353 // Bits 24-31 354 CPUFeatures::kSMEi16i64, 355 CPUFeatures::kSMEf64f64, 356 CPUFeatures::kSMEi8i32, 357 CPUFeatures::kSMEf16f32, 358 CPUFeatures::kSMEb16f32, 359 CPUFeatures::kSMEf32f32, 360 CPUFeatures::kSMEfa64, 361 CPUFeatures::kWFXT, 362 // Bits 32-39 363 CPUFeatures::kEBF16, 364 CPUFeatures::kSVE_EBF16}; 365 VIXL_STATIC_ASSERT(ArrayLength(kFeatureBitsHigh) < 64); 366 367 auto combine_features = [&features](uint64_t hwcap, 368 const CPUFeatures::Feature* feature_array, 369 size_t features_size) { 370 for (size_t i = 0; i < features_size; i++) { 371 if (hwcap & (UINT64_C(1) << i)) features.Combine(feature_array[i]); 372 } 373 }; 374 375 uint64_t hwcap_low = getauxval(AT_HWCAP); 376 uint64_t hwcap_high = getauxval(AT_HWCAP2); 377 378 combine_features(hwcap_low, kFeatureBitsLow, ArrayLength(kFeatureBitsLow)); 379 combine_features(hwcap_high, kFeatureBitsHigh, ArrayLength(kFeatureBitsHigh)); 380 381 // MTE support from HWCAP2 signifies FEAT_MTE1 and FEAT_MTE2 support 382 if (features.Has(CPUFeatures::kMTE)) { 383 features.Combine(CPUFeatures::kMTEInstructions); 384 } 385 #endif // VIXL_USE_LINUX_HWCAP 386 387 if ((option == CPUFeatures::kQueryIDRegistersIfAvailable) && 388 (features.Has(CPUFeatures::kIDRegisterEmulation))) { 389 features.Combine(InferCPUFeaturesFromIDRegisters()); 390 } 391 return features; 392 } 393 394 395 #ifdef __aarch64__ 396 #define VIXL_READ_ID_REG(NAME, MRS_ARG) \ 397 NAME CPU::Read##NAME() { \ 398 uint64_t value = 0; \ 399 __asm__("mrs %0, " MRS_ARG : "=r"(value)); \ 400 return NAME(value); \ 401 } 402 #else // __aarch64__ 403 #define VIXL_READ_ID_REG(NAME, MRS_ARG) \ 404 NAME CPU::Read##NAME() { \ 405 VIXL_UNREACHABLE(); \ 406 return NAME(0); \ 407 } 408 #endif // __aarch64__ 409 410 VIXL_AARCH64_ID_REG_LIST(VIXL_READ_ID_REG) 411 412 #undef VIXL_READ_ID_REG 413 414 415 // Initialise to smallest possible cache size. 416 unsigned CPU::dcache_line_size_ = 1; 417 unsigned CPU::icache_line_size_ = 1; 418 419 420 // Currently computes I and D cache line size. 421 void CPU::SetUp() { 422 uint32_t cache_type_register = GetCacheType(); 423 424 // The cache type register holds information about the caches, including I 425 // D caches line size. 426 static const int kDCacheLineSizeShift = 16; 427 static const int kICacheLineSizeShift = 0; 428 static const uint32_t kDCacheLineSizeMask = 0xf << kDCacheLineSizeShift; 429 static const uint32_t kICacheLineSizeMask = 0xf << kICacheLineSizeShift; 430 431 // The cache type register holds the size of the I and D caches in words as 432 // a power of two. 433 uint32_t dcache_line_size_power_of_two = 434 (cache_type_register & kDCacheLineSizeMask) >> kDCacheLineSizeShift; 435 uint32_t icache_line_size_power_of_two = 436 (cache_type_register & kICacheLineSizeMask) >> kICacheLineSizeShift; 437 438 dcache_line_size_ = 4 << dcache_line_size_power_of_two; 439 icache_line_size_ = 4 << icache_line_size_power_of_two; 440 } 441 442 443 uint32_t CPU::GetCacheType() { 444 #ifdef __aarch64__ 445 uint64_t cache_type_register; 446 // Copy the content of the cache type register to a core register. 447 __asm__ __volatile__("mrs %[ctr], ctr_el0" // NOLINT(runtime/references) 448 : [ctr] "=r"(cache_type_register)); 449 VIXL_ASSERT(IsUint32(cache_type_register)); 450 return static_cast<uint32_t>(cache_type_register); 451 #else 452 // This will lead to a cache with 1 byte long lines, which is fine since 453 // neither EnsureIAndDCacheCoherency nor the simulator will need this 454 // information. 455 return 0; 456 #endif 457 } 458 459 460 // Query the SVE vector length. This requires CPUFeatures::kSVE. 461 int CPU::ReadSVEVectorLengthInBits() { 462 #ifdef __aarch64__ 463 uint64_t vl; 464 // To support compilers that don't understand `rdvl`, encode the value 465 // directly and move it manually. 466 __asm__( 467 " .word 0x04bf5100\n" // rdvl x0, #8 468 " mov %[vl], x0\n" 469 : [vl] "=r"(vl) 470 : 471 : "x0"); 472 VIXL_ASSERT(vl <= INT_MAX); 473 return static_cast<int>(vl); 474 #else 475 VIXL_UNREACHABLE(); 476 return 0; 477 #endif 478 } 479 480 481 void CPU::EnsureIAndDCacheCoherency(void* address, size_t length) { 482 #ifdef __aarch64__ 483 // Implement the cache synchronisation for all targets where AArch64 is the 484 // host, even if we're building the simulator for an AAarch64 host. This 485 // allows for cases where the user wants to simulate code as well as run it 486 // natively. 487 488 if (length == 0) { 489 return; 490 } 491 492 // The code below assumes user space cache operations are allowed. 493 494 // Work out the line sizes for each cache, and use them to determine the 495 // start addresses. 496 uintptr_t start = reinterpret_cast<uintptr_t>(address); 497 uintptr_t dsize = static_cast<uintptr_t>(dcache_line_size_); 498 uintptr_t isize = static_cast<uintptr_t>(icache_line_size_); 499 uintptr_t dline = start & ~(dsize - 1); 500 uintptr_t iline = start & ~(isize - 1); 501 502 // Cache line sizes are always a power of 2. 503 VIXL_ASSERT(IsPowerOf2(dsize)); 504 VIXL_ASSERT(IsPowerOf2(isize)); 505 uintptr_t end = start + length; 506 507 do { 508 __asm__ __volatile__( 509 // Clean each line of the D cache containing the target data. 510 // 511 // dc : Data Cache maintenance 512 // c : Clean 513 // va : by (Virtual) Address 514 // u : to the point of Unification 515 // The point of unification for a processor is the point by which the 516 // instruction and data caches are guaranteed to see the same copy of a 517 // memory location. See ARM DDI 0406B page B2-12 for more information. 518 " dc cvau, %[dline]\n" 519 : 520 : [dline] "r"(dline) 521 // This code does not write to memory, but the "memory" dependency 522 // prevents GCC from reordering the code. 523 : "memory"); 524 dline += dsize; 525 } while (dline < end); 526 527 __asm__ __volatile__( 528 // Make sure that the data cache operations (above) complete before the 529 // instruction cache operations (below). 530 // 531 // dsb : Data Synchronisation Barrier 532 // ish : Inner SHareable domain 533 // 534 // The point of unification for an Inner Shareable shareability domain is 535 // the point by which the instruction and data caches of all the 536 // processors 537 // in that Inner Shareable shareability domain are guaranteed to see the 538 // same copy of a memory location. See ARM DDI 0406B page B2-12 for more 539 // information. 540 " dsb ish\n" 541 : 542 : 543 : "memory"); 544 545 do { 546 __asm__ __volatile__( 547 // Invalidate each line of the I cache containing the target data. 548 // 549 // ic : Instruction Cache maintenance 550 // i : Invalidate 551 // va : by Address 552 // u : to the point of Unification 553 " ic ivau, %[iline]\n" 554 : 555 : [iline] "r"(iline) 556 : "memory"); 557 iline += isize; 558 } while (iline < end); 559 560 __asm__ __volatile__( 561 // Make sure that the instruction cache operations (above) take effect 562 // before the isb (below). 563 " dsb ish\n" 564 565 // Ensure that any instructions already in the pipeline are discarded and 566 // reloaded from the new data. 567 // isb : Instruction Synchronisation Barrier 568 " isb\n" 569 : 570 : 571 : "memory"); 572 #else 573 // If the host isn't AArch64, we must be using the simulator, so this function 574 // doesn't have to do anything. 575 USE(address, length); 576 #endif 577 } 578 579 580 } // namespace aarch64 581 } // namespace vixl