cpu_code_cache.cpp (59534B)
1 // SPDX-FileCopyrightText: 2019-2024 Connor McLaughlin <stenzek@gmail.com> 2 // SPDX-License-Identifier: (GPL-3.0 OR CC-BY-NC-ND-4.0) 3 4 #include "bus.h" 5 #include "cpu_code_cache_private.h" 6 #include "cpu_core.h" 7 #include "cpu_core_private.h" 8 #include "cpu_disasm.h" 9 #include "cpu_recompiler_types.h" 10 #include "host.h" 11 #include "settings.h" 12 #include "system.h" 13 #include "timing_event.h" 14 15 #include "util/page_fault_handler.h" 16 17 #include "common/align.h" 18 #include "common/assert.h" 19 #include "common/error.h" 20 #include "common/intrin.h" 21 #include "common/log.h" 22 #include "common/memmap.h" 23 24 Log_SetChannel(CPU::CodeCache); 25 26 // Enable dumping of recompiled block code size statistics. 27 // #define DUMP_CODE_SIZE_STATS 1 28 29 // Enable profiling of JIT blocks. 30 // #define ENABLE_RECOMPILER_PROFILING 1 31 32 #ifdef ENABLE_RECOMPILER 33 #include "cpu_recompiler_code_generator.h" 34 #endif 35 36 #ifdef ENABLE_NEWREC 37 #include "cpu_newrec_compiler.h" 38 #endif 39 40 #include <map> 41 #include <unordered_set> 42 #include <zlib.h> 43 44 namespace CPU::CodeCache { 45 46 using LUTRangeList = std::array<std::pair<VirtualMemoryAddress, VirtualMemoryAddress>, 9>; 47 using PageProtectionArray = std::array<PageProtectionInfo, Bus::RAM_8MB_CODE_PAGE_COUNT>; 48 using BlockInstructionInfoPair = std::pair<Instruction, InstructionInfo>; 49 using BlockInstructionList = std::vector<BlockInstructionInfoPair>; 50 51 // Switch to manual protection if we invalidate more than 4 times within 60 frames. 52 // Fall blocks back to interpreter if we recompile more than 3 times within 15 frames. 53 // The interpreter fallback is set before the manual protection switch, so that if it's just a single block 54 // which is constantly getting mutated, we won't hurt the performance of the rest in the page. 55 static constexpr u32 RECOMPILE_COUNT_FOR_INTERPRETER_FALLBACK = 3; 56 static constexpr u32 RECOMPILE_FRAMES_FOR_INTERPRETER_FALLBACK = 15; 57 static constexpr u32 INVALIDATE_COUNT_FOR_MANUAL_PROTECTION = 4; 58 static constexpr u32 INVALIDATE_FRAMES_FOR_MANUAL_PROTECTION = 60; 59 60 static CodeLUT DecodeCodeLUTPointer(u32 slot, CodeLUT ptr); 61 static CodeLUT EncodeCodeLUTPointer(u32 slot, CodeLUT ptr); 62 static CodeLUT OffsetCodeLUTPointer(CodeLUT fake_ptr, u32 pc); 63 64 static void AllocateLUTs(); 65 static void DeallocateLUTs(); 66 static void ResetCodeLUT(); 67 static void SetCodeLUT(u32 pc, const void* function); 68 static void InvalidateBlock(Block* block, BlockState new_state); 69 static void ClearBlocks(); 70 71 static Block* LookupBlock(u32 pc); 72 static Block* CreateBlock(u32 pc, const BlockInstructionList& instructions, const BlockMetadata& metadata); 73 static bool IsBlockCodeCurrent(const Block* block); 74 static bool RevalidateBlock(Block* block); 75 PageProtectionMode GetProtectionModeForPC(u32 pc); 76 PageProtectionMode GetProtectionModeForBlock(const Block* block); 77 static bool ReadBlockInstructions(u32 start_pc, BlockInstructionList* instructions, BlockMetadata* metadata); 78 static void FillBlockRegInfo(Block* block); 79 static void CopyRegInfo(InstructionInfo* dst, const InstructionInfo* src); 80 static void SetRegAccess(InstructionInfo* inst, Reg reg, bool write); 81 static void AddBlockToPageList(Block* block); 82 static void RemoveBlockFromPageList(Block* block); 83 84 static Block* CreateCachedInterpreterBlock(u32 pc); 85 [[noreturn]] static void ExecuteCachedInterpreter(); 86 template<PGXPMode pgxp_mode> 87 [[noreturn]] static void ExecuteCachedInterpreterImpl(); 88 89 // Fast map provides lookup from PC to function 90 // Function pointers are offset so that you don't need to subtract 91 CodeLUTArray g_code_lut; 92 static BlockLUTArray s_block_lut; 93 static std::unique_ptr<const void*[]> s_lut_code_pointers; 94 static std::unique_ptr<Block*[]> s_lut_block_pointers; 95 static PageProtectionArray s_page_protection = {}; 96 static std::vector<Block*> s_blocks; 97 98 // for compiling - reuse to avoid allocations 99 static BlockInstructionList s_block_instructions; 100 101 static void BacklinkBlocks(u32 pc, const void* dst); 102 static void UnlinkBlockExits(Block* block); 103 static void ResetCodeBuffer(); 104 105 static void ClearASMFunctions(); 106 static void CompileASMFunctions(); 107 static bool CompileBlock(Block* block); 108 static PageFaultHandler::HandlerResult HandleFastmemException(void* exception_pc, void* fault_address, bool is_write); 109 static void BackpatchLoadStore(void* host_pc, const LoadstoreBackpatchInfo& info); 110 static void RemoveBackpatchInfoForRange(const void* host_code, u32 size); 111 112 static BlockLinkMap s_block_links; 113 static std::map<const void*, LoadstoreBackpatchInfo> s_fastmem_backpatch_info; 114 static std::unordered_set<u32> s_fastmem_faulting_pcs; 115 116 NORETURN_FUNCTION_POINTER void (*g_enter_recompiler)(); 117 const void* g_compile_or_revalidate_block; 118 const void* g_check_events_and_dispatch; 119 const void* g_run_events_and_dispatch; 120 const void* g_dispatcher; 121 const void* g_interpret_block; 122 const void* g_discard_and_recompile_block; 123 124 #ifdef ENABLE_RECOMPILER_PROFILING 125 126 PerfScope MIPSPerfScope("MIPS"); 127 128 #endif 129 130 #if defined(CPU_ARCH_ARM32) 131 // Use a smaller code buffer size on AArch32 to have a better chance of being in range. 132 static constexpr u32 RECOMPILER_CODE_CACHE_SIZE = 16 * 1024 * 1024; 133 static constexpr u32 RECOMPILER_FAR_CODE_CACHE_SIZE = 4 * 1024 * 1024; 134 #else 135 static constexpr u32 RECOMPILER_CODE_CACHE_SIZE = 48 * 1024 * 1024; 136 static constexpr u32 RECOMPILER_FAR_CODE_CACHE_SIZE = 16 * 1024 * 1024; 137 #endif 138 139 // On Linux ARM32/ARM64, we use a dedicated section in the ELF for storing code. 140 // This is because without ASLR, or on certain ASLR offsets, the sbrk() heap ends up immediately following the text/data 141 // sections, which means there isn't a large enough gap to fit within range on ARM32. 142 #if defined(__linux__) && (defined(CPU_ARCH_ARM32) || defined(CPU_ARCH_ARM64)) 143 #define USE_CODE_BUFFER_SECTION 1 144 #ifdef __clang__ 145 #pragma clang section bss = ".jitstorage" 146 __attribute__((aligned(HOST_PAGE_SIZE))) static u8 s_code_buffer_ptr[RECOMPILER_CODE_CACHE_SIZE]; 147 #pragma clang section bss = "" 148 #endif 149 #else 150 static u8* s_code_buffer_ptr = nullptr; 151 #endif 152 153 static u8* s_code_ptr = nullptr; 154 static u8* s_free_code_ptr = nullptr; 155 static u32 s_code_size = 0; 156 static u32 s_code_used = 0; 157 158 static u8* s_far_code_ptr = nullptr; 159 static u8* s_free_far_code_ptr = nullptr; 160 static u32 s_far_code_size = 0; 161 static u32 s_far_code_used = 0; 162 163 #ifdef _DEBUG 164 static u32 s_total_instructions_compiled = 0; 165 static u32 s_total_host_instructions_emitted = 0; 166 #endif 167 } // namespace CPU::CodeCache 168 169 bool CPU::CodeCache::IsUsingAnyRecompiler() 170 { 171 return (g_settings.cpu_execution_mode == CPUExecutionMode::Recompiler || 172 g_settings.cpu_execution_mode == CPUExecutionMode::NewRec); 173 } 174 175 bool CPU::CodeCache::IsUsingFastmem() 176 { 177 return IsUsingAnyRecompiler() && g_settings.cpu_fastmem_mode != CPUFastmemMode::Disabled; 178 } 179 180 bool CPU::CodeCache::ProcessStartup(Error* error) 181 { 182 #ifdef USE_CODE_BUFFER_SECTION 183 const u8* module_base = static_cast<const u8*>(MemMap::GetBaseAddress()); 184 INFO_LOG("Using JIT buffer section of size {} at {} (0x{:X} bytes / {} MB away)", sizeof(s_code_buffer_ptr), 185 static_cast<void*>(s_code_buffer_ptr), std::abs(static_cast<ptrdiff_t>(s_code_buffer_ptr - module_base)), 186 (std::abs(static_cast<ptrdiff_t>(s_code_buffer_ptr - module_base)) + (1024 * 1024 - 1)) / (1024 * 1024)); 187 const bool code_buffer_allocated = 188 MemMap::MemProtect(s_code_buffer_ptr, RECOMPILER_CODE_CACHE_SIZE, PageProtect::ReadWriteExecute); 189 #else 190 s_code_buffer_ptr = static_cast<u8*>(MemMap::AllocateJITMemory(RECOMPILER_CODE_CACHE_SIZE)); 191 const bool code_buffer_allocated = (s_code_buffer_ptr != nullptr); 192 #endif 193 if (!code_buffer_allocated) [[unlikely]] 194 { 195 Error::SetStringView(error, "Failed to allocate code storage. The log may contain more information, you will need " 196 "to run DuckStation with -earlyconsole in the command line."); 197 return false; 198 } 199 200 AllocateLUTs(); 201 202 if (!PageFaultHandler::Install(error)) 203 return false; 204 205 return true; 206 } 207 208 void CPU::CodeCache::ProcessShutdown() 209 { 210 DeallocateLUTs(); 211 212 #ifndef USE_CODE_BUFFER_SECTION 213 MemMap::ReleaseJITMemory(s_code_buffer_ptr, RECOMPILER_CODE_CACHE_SIZE); 214 #endif 215 } 216 217 void CPU::CodeCache::Initialize() 218 { 219 Assert(s_blocks.empty()); 220 221 if (IsUsingAnyRecompiler()) 222 { 223 ResetCodeBuffer(); 224 CompileASMFunctions(); 225 ResetCodeLUT(); 226 } 227 228 Bus::UpdateFastmemViews(IsUsingAnyRecompiler() ? g_settings.cpu_fastmem_mode : CPUFastmemMode::Disabled); 229 CPU::UpdateMemoryPointers(); 230 } 231 232 void CPU::CodeCache::Shutdown() 233 { 234 ClearBlocks(); 235 ClearASMFunctions(); 236 237 Bus::UpdateFastmemViews(CPUFastmemMode::Disabled); 238 CPU::UpdateMemoryPointers(); 239 } 240 241 void CPU::CodeCache::Reset() 242 { 243 ClearBlocks(); 244 245 if (IsUsingAnyRecompiler()) 246 { 247 ClearASMFunctions(); 248 ResetCodeBuffer(); 249 CompileASMFunctions(); 250 ResetCodeLUT(); 251 } 252 } 253 254 void CPU::CodeCache::Execute() 255 { 256 if (IsUsingAnyRecompiler()) 257 { 258 g_enter_recompiler(); 259 UnreachableCode(); 260 } 261 else 262 { 263 ExecuteCachedInterpreter(); 264 } 265 } 266 267 //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// 268 // MARK: - Block Management 269 //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// 270 271 namespace CPU::CodeCache { 272 static constexpr u32 GetLUTTableCount(u32 start, u32 end) 273 { 274 return ((end >> LUT_TABLE_SHIFT) - (start >> LUT_TABLE_SHIFT)) + 1; 275 } 276 277 static constexpr LUTRangeList GetLUTRanges() 278 { 279 const LUTRangeList ranges = {{ 280 {0x00000000, 0x00800000}, // RAM 281 {0x1F000000, 0x1F800000}, // EXP1 282 {0x1FC00000, 0x1FC80000}, // BIOS 283 284 {0x80000000, 0x80800000}, // RAM 285 {0x9F000000, 0x9F800000}, // EXP1 286 {0x9FC00000, 0x9FC80000}, // BIOS 287 288 {0xA0000000, 0xA0800000}, // RAM 289 {0xBF000000, 0xBF800000}, // EXP1 290 {0xBFC00000, 0xBFC80000} // BIOS 291 }}; 292 return ranges; 293 } 294 295 static constexpr u32 GetLUTSlotCount(bool include_unreachable) 296 { 297 u32 tables = include_unreachable ? 1 : 0; // unreachable table 298 for (const auto& [start, end] : GetLUTRanges()) 299 tables += GetLUTTableCount(start, end); 300 301 return tables * LUT_TABLE_SIZE; 302 } 303 } // namespace CPU::CodeCache 304 305 CPU::CodeCache::CodeLUT CPU::CodeCache::DecodeCodeLUTPointer(u32 slot, CodeLUT ptr) 306 { 307 if constexpr (sizeof(void*) == 8) 308 return reinterpret_cast<CodeLUT>(reinterpret_cast<u8*>(ptr) + (static_cast<u64>(slot) << 17)); 309 else 310 return reinterpret_cast<CodeLUT>(reinterpret_cast<u8*>(ptr) + (slot << 16)); 311 } 312 313 CPU::CodeCache::CodeLUT CPU::CodeCache::EncodeCodeLUTPointer(u32 slot, CodeLUT ptr) 314 { 315 if constexpr (sizeof(void*) == 8) 316 return reinterpret_cast<CodeLUT>(reinterpret_cast<u8*>(ptr) - (static_cast<u64>(slot) << 17)); 317 else 318 return reinterpret_cast<CodeLUT>(reinterpret_cast<u8*>(ptr) - (slot << 16)); 319 } 320 321 CPU::CodeCache::CodeLUT CPU::CodeCache::OffsetCodeLUTPointer(CodeLUT fake_ptr, u32 pc) 322 { 323 u8* fake_byte_ptr = reinterpret_cast<u8*>(fake_ptr); 324 if constexpr (sizeof(void*) == 8) 325 return reinterpret_cast<const void**>(fake_byte_ptr + (static_cast<u64>(pc) << 1)); 326 else 327 return reinterpret_cast<const void**>(fake_byte_ptr + pc); 328 } 329 330 void CPU::CodeCache::AllocateLUTs() 331 { 332 constexpr u32 num_code_slots = GetLUTSlotCount(true); 333 constexpr u32 num_block_slots = GetLUTSlotCount(false); 334 335 Assert(!s_lut_code_pointers && !s_lut_block_pointers); 336 s_lut_code_pointers = std::make_unique<const void*[]>(num_code_slots); 337 s_lut_block_pointers = std::make_unique<Block*[]>(num_block_slots); 338 std::memset(s_lut_block_pointers.get(), 0, sizeof(Block*) * num_block_slots); 339 340 CodeLUT code_table_ptr = s_lut_code_pointers.get(); 341 Block** block_table_ptr = s_lut_block_pointers.get(); 342 CodeLUT const code_table_ptr_end = code_table_ptr + num_code_slots; 343 Block** const block_table_ptr_end = block_table_ptr + num_block_slots; 344 345 // Make the unreachable table jump to the invalid code callback. 346 MemsetPtrs(code_table_ptr, static_cast<const void*>(nullptr), LUT_TABLE_COUNT); 347 348 // Mark everything as unreachable to begin with. 349 for (u32 i = 0; i < LUT_TABLE_COUNT; i++) 350 { 351 g_code_lut[i] = EncodeCodeLUTPointer(i, code_table_ptr); 352 s_block_lut[i] = nullptr; 353 } 354 code_table_ptr += LUT_TABLE_SIZE; 355 356 // Allocate ranges. 357 for (const auto& [start, end] : GetLUTRanges()) 358 { 359 const u32 start_slot = start >> LUT_TABLE_SHIFT; 360 const u32 count = GetLUTTableCount(start, end); 361 for (u32 i = 0; i < count; i++) 362 { 363 const u32 slot = start_slot + i; 364 365 g_code_lut[slot] = EncodeCodeLUTPointer(slot, code_table_ptr); 366 code_table_ptr += LUT_TABLE_SIZE; 367 368 s_block_lut[slot] = block_table_ptr; 369 block_table_ptr += LUT_TABLE_SIZE; 370 } 371 } 372 373 Assert(code_table_ptr == code_table_ptr_end); 374 Assert(block_table_ptr == block_table_ptr_end); 375 } 376 377 void CPU::CodeCache::DeallocateLUTs() 378 { 379 s_lut_block_pointers.reset(); 380 s_lut_code_pointers.reset(); 381 } 382 383 void CPU::CodeCache::ResetCodeLUT() 384 { 385 if (!s_lut_code_pointers) 386 return; 387 388 // Make the unreachable table jump to the invalid code callback. 389 MemsetPtrs(s_lut_code_pointers.get(), g_interpret_block, LUT_TABLE_COUNT); 390 391 for (u32 i = 0; i < LUT_TABLE_COUNT; i++) 392 { 393 CodeLUT ptr = DecodeCodeLUTPointer(i, g_code_lut[i]); 394 if (ptr == s_lut_code_pointers.get()) 395 continue; 396 397 MemsetPtrs(ptr, g_compile_or_revalidate_block, LUT_TABLE_SIZE); 398 } 399 } 400 401 void CPU::CodeCache::SetCodeLUT(u32 pc, const void* function) 402 { 403 if (!s_lut_code_pointers) 404 return; 405 406 const u32 table = pc >> LUT_TABLE_SHIFT; 407 CodeLUT encoded_ptr = g_code_lut[table]; 408 409 #ifdef _DEBUG 410 const CodeLUT table_ptr = DecodeCodeLUTPointer(table, encoded_ptr); 411 DebugAssert(table_ptr != nullptr && table_ptr != s_lut_code_pointers.get()); 412 #endif 413 414 *OffsetCodeLUTPointer(encoded_ptr, pc) = function; 415 } 416 417 CPU::CodeCache::Block* CPU::CodeCache::LookupBlock(u32 pc) 418 { 419 const u32 table = pc >> LUT_TABLE_SHIFT; 420 if (!s_block_lut[table]) 421 return nullptr; 422 423 const u32 idx = (pc & 0xFFFF) >> 2; 424 return s_block_lut[table][idx]; 425 } 426 427 CPU::CodeCache::Block* CPU::CodeCache::CreateBlock(u32 pc, const BlockInstructionList& instructions, 428 const BlockMetadata& metadata) 429 { 430 const u32 size = static_cast<u32>(instructions.size()); 431 const u32 table = pc >> LUT_TABLE_SHIFT; 432 Assert(s_block_lut[table]); 433 434 // retain from old block 435 const u32 frame_number = System::GetFrameNumber(); 436 u32 recompile_frame = System::GetFrameNumber(); 437 u8 recompile_count = 0; 438 439 const u32 idx = (pc & 0xFFFF) >> 2; 440 Block* block = s_block_lut[table][idx]; 441 if (block) 442 { 443 // shouldn't be in the page list.. since we should come here after invalidating 444 Assert(!block->next_block_in_page); 445 446 // keep recompile stats before resetting, that way we actually count recompiles 447 recompile_frame = block->compile_frame; 448 recompile_count = block->compile_count; 449 450 // if it has the same number of instructions, we can reuse it 451 if (block->size != size) 452 { 453 // this sucks.. hopefully won't happen very often 454 // TODO: allocate max size, allow shrink but not grow 455 auto it = std::find(s_blocks.begin(), s_blocks.end(), block); 456 Assert(it != s_blocks.end()); 457 s_blocks.erase(it); 458 459 block->~Block(); 460 Common::AlignedFree(block); 461 block = nullptr; 462 } 463 } 464 465 if (!block) 466 { 467 block = static_cast<Block*>(Common::AlignedMalloc( 468 sizeof(Block) + (sizeof(Instruction) * size) + (sizeof(InstructionInfo) * size), alignof(Block))); 469 Assert(block); 470 new (block) Block(); 471 s_blocks.push_back(block); 472 } 473 474 block->pc = pc; 475 block->size = size; 476 block->host_code = nullptr; 477 block->next_block_in_page = nullptr; 478 block->num_exit_links = 0; 479 block->state = BlockState::Valid; 480 block->flags = metadata.flags; 481 block->protection = GetProtectionModeForBlock(block); 482 block->uncached_fetch_ticks = metadata.uncached_fetch_ticks; 483 block->icache_line_count = metadata.icache_line_count; 484 block->host_code_size = 0; 485 block->compile_frame = recompile_frame; 486 block->compile_count = recompile_count + 1; 487 488 // copy instructions/info 489 { 490 const std::pair<Instruction, InstructionInfo>* ip = instructions.data(); 491 Instruction* dsti = block->Instructions(); 492 InstructionInfo* dstii = block->InstructionsInfo(); 493 494 for (u32 i = 0; i < size; i++, ip++, dsti++, dstii++) 495 { 496 dsti->bits = ip->first.bits; 497 *dstii = ip->second; 498 } 499 } 500 501 s_block_lut[table][idx] = block; 502 503 // if the block is being recompiled too often, leave it in the list, but don't compile it. 504 const u32 frame_delta = frame_number - recompile_frame; 505 if (frame_delta >= RECOMPILE_FRAMES_FOR_INTERPRETER_FALLBACK) 506 { 507 block->compile_frame = frame_number; 508 block->compile_count = 1; 509 } 510 else if (block->compile_count >= RECOMPILE_COUNT_FOR_INTERPRETER_FALLBACK) 511 { 512 DEV_LOG("{} recompiles in {} frames to block 0x{:08X}, not caching.", block->compile_count, frame_delta, block->pc); 513 block->size = 0; 514 } 515 516 // cached interpreter creates empty blocks when falling back 517 if (block->size == 0) 518 { 519 block->state = BlockState::FallbackToInterpreter; 520 block->protection = PageProtectionMode::Unprotected; 521 return block; 522 } 523 524 // Old rec doesn't use backprop info, don't waste time filling it. 525 if (g_settings.cpu_execution_mode == CPUExecutionMode::NewRec) 526 FillBlockRegInfo(block); 527 528 // add it to the tracking list for its page 529 AddBlockToPageList(block); 530 531 return block; 532 } 533 534 bool CPU::CodeCache::IsBlockCodeCurrent(const Block* block) 535 { 536 // blocks shouldn't be wrapping.. 537 const PhysicalMemoryAddress phys_addr = VirtualAddressToPhysical(block->pc); 538 DebugAssert((phys_addr + (sizeof(Instruction) * block->size)) <= Bus::g_ram_size); 539 540 // can just do a straight memcmp.. 541 return (std::memcmp(Bus::g_ram + phys_addr, block->Instructions(), sizeof(Instruction) * block->size) == 0); 542 } 543 544 bool CPU::CodeCache::RevalidateBlock(Block* block) 545 { 546 DebugAssert(block->state != BlockState::Valid); 547 DebugAssert(AddressInRAM(block->pc) || block->state == BlockState::NeedsRecompile); 548 549 if (block->state >= BlockState::NeedsRecompile) 550 return false; 551 552 // Protection may have changed if we didn't execute before it got invalidated again. e.g. THPS2. 553 if (block->protection != GetProtectionModeForBlock(block)) 554 return false; 555 556 if (!IsBlockCodeCurrent(block)) 557 { 558 // changed, needs recompiling 559 DEBUG_LOG("Block at PC {:08X} has changed and needs recompiling", block->pc); 560 return false; 561 } 562 563 block->state = BlockState::Valid; 564 AddBlockToPageList(block); 565 return true; 566 } 567 568 void CPU::CodeCache::AddBlockToPageList(Block* block) 569 { 570 DebugAssert(block->size > 0); 571 if (!AddressInRAM(block->pc) || block->protection != PageProtectionMode::WriteProtected) 572 return; 573 574 const u32 page_idx = block->StartPageIndex(); 575 PageProtectionInfo& entry = s_page_protection[page_idx]; 576 Bus::SetRAMCodePage(page_idx); 577 578 if (entry.last_block_in_page) 579 { 580 entry.last_block_in_page->next_block_in_page = block; 581 entry.last_block_in_page = block; 582 } 583 else 584 { 585 entry.first_block_in_page = block; 586 entry.last_block_in_page = block; 587 } 588 } 589 590 void CPU::CodeCache::RemoveBlockFromPageList(Block* block) 591 { 592 DebugAssert(block->size > 0); 593 if (!AddressInRAM(block->pc) || block->protection != PageProtectionMode::WriteProtected) 594 return; 595 596 const u32 page_idx = block->StartPageIndex(); 597 PageProtectionInfo& entry = s_page_protection[page_idx]; 598 599 // unlink from list 600 Block* prev_block = nullptr; 601 Block* cur_block = entry.first_block_in_page; 602 while (cur_block) 603 { 604 if (cur_block != block) 605 { 606 prev_block = cur_block; 607 cur_block = cur_block->next_block_in_page; 608 continue; 609 } 610 611 if (prev_block) 612 prev_block->next_block_in_page = cur_block->next_block_in_page; 613 else 614 entry.first_block_in_page = cur_block->next_block_in_page; 615 if (!cur_block->next_block_in_page) 616 entry.last_block_in_page = prev_block; 617 618 cur_block->next_block_in_page = nullptr; 619 break; 620 } 621 } 622 623 void CPU::CodeCache::InvalidateBlocksWithPageIndex(u32 index) 624 { 625 DebugAssert(index < Bus::RAM_8MB_CODE_PAGE_COUNT); 626 Bus::ClearRAMCodePage(index); 627 628 BlockState new_block_state = BlockState::Invalidated; 629 PageProtectionInfo& ppi = s_page_protection[index]; 630 631 const u32 frame_number = System::GetFrameNumber(); 632 const u32 frame_delta = frame_number - ppi.invalidate_frame; 633 ppi.invalidate_count++; 634 635 if (frame_delta >= INVALIDATE_FRAMES_FOR_MANUAL_PROTECTION) 636 { 637 ppi.invalidate_count = 1; 638 ppi.invalidate_frame = frame_number; 639 } 640 else if (ppi.invalidate_count > INVALIDATE_COUNT_FOR_MANUAL_PROTECTION) 641 { 642 DEV_LOG("{} invalidations in {} frames to page {} [0x{:08X} -> 0x{:08X}], switching to manual protection", 643 ppi.invalidate_count, frame_delta, index, (index * HOST_PAGE_SIZE), ((index + 1) * HOST_PAGE_SIZE)); 644 ppi.mode = PageProtectionMode::ManualCheck; 645 new_block_state = BlockState::NeedsRecompile; 646 } 647 648 if (!ppi.first_block_in_page) 649 return; 650 651 MemMap::BeginCodeWrite(); 652 653 Block* block = ppi.first_block_in_page; 654 while (block) 655 { 656 InvalidateBlock(block, new_block_state); 657 block = std::exchange(block->next_block_in_page, nullptr); 658 } 659 660 ppi.first_block_in_page = nullptr; 661 ppi.last_block_in_page = nullptr; 662 663 MemMap::EndCodeWrite(); 664 } 665 666 CPU::CodeCache::PageProtectionMode CPU::CodeCache::GetProtectionModeForPC(u32 pc) 667 { 668 if (!AddressInRAM(pc)) 669 return PageProtectionMode::Unprotected; 670 671 const u32 page_idx = Bus::GetRAMCodePageIndex(pc); 672 return s_page_protection[page_idx].mode; 673 } 674 675 CPU::CodeCache::PageProtectionMode CPU::CodeCache::GetProtectionModeForBlock(const Block* block) 676 { 677 // if the block has a branch delay slot crossing a page, we must use manual protection. 678 // no other way about it. 679 if (block->HasFlag(BlockFlags::BranchDelaySpansPages)) 680 return PageProtectionMode::ManualCheck; 681 682 return GetProtectionModeForPC(block->pc); 683 } 684 685 void CPU::CodeCache::InvalidateBlock(Block* block, BlockState new_state) 686 { 687 if (block->state == BlockState::Valid) 688 { 689 SetCodeLUT(block->pc, g_compile_or_revalidate_block); 690 BacklinkBlocks(block->pc, g_compile_or_revalidate_block); 691 } 692 693 block->state = new_state; 694 } 695 696 void CPU::CodeCache::InvalidateAllRAMBlocks() 697 { 698 // TODO: maybe combine the backlink into one big instruction flush cache? 699 MemMap::BeginCodeWrite(); 700 701 for (Block* block : s_blocks) 702 { 703 if (AddressInRAM(block->pc)) 704 { 705 InvalidateBlock(block, BlockState::Invalidated); 706 block->next_block_in_page = nullptr; 707 } 708 } 709 710 for (PageProtectionInfo& ppi : s_page_protection) 711 { 712 ppi.first_block_in_page = nullptr; 713 ppi.last_block_in_page = nullptr; 714 } 715 716 MemMap::EndCodeWrite(); 717 Bus::ClearRAMCodePageFlags(); 718 } 719 720 void CPU::CodeCache::ClearBlocks() 721 { 722 for (u32 i = 0; i < Bus::RAM_8MB_CODE_PAGE_COUNT; i++) 723 { 724 PageProtectionInfo& ppi = s_page_protection[i]; 725 if (ppi.mode == PageProtectionMode::WriteProtected && ppi.first_block_in_page) 726 Bus::ClearRAMCodePage(i); 727 728 ppi = {}; 729 } 730 731 s_fastmem_backpatch_info.clear(); 732 s_fastmem_faulting_pcs.clear(); 733 s_block_links.clear(); 734 735 for (Block* block : s_blocks) 736 { 737 block->~Block(); 738 Common::AlignedFree(block); 739 } 740 s_blocks.clear(); 741 742 std::memset(s_lut_block_pointers.get(), 0, sizeof(Block*) * GetLUTSlotCount(false)); 743 } 744 745 PageFaultHandler::HandlerResult PageFaultHandler::HandlePageFault(void* exception_pc, void* fault_address, 746 bool is_write) 747 { 748 if (static_cast<const u8*>(fault_address) >= Bus::g_ram && 749 static_cast<const u8*>(fault_address) < (Bus::g_ram + Bus::RAM_8MB_SIZE)) 750 { 751 // Writing to protected RAM. 752 DebugAssert(is_write); 753 const u32 guest_address = static_cast<u32>(static_cast<const u8*>(fault_address) - Bus::g_ram); 754 const u32 page_index = Bus::GetRAMCodePageIndex(guest_address); 755 DEV_LOG("Page fault on protected RAM @ 0x{:08X} (page #{}), invalidating code cache.", guest_address, page_index); 756 CPU::CodeCache::InvalidateBlocksWithPageIndex(page_index); 757 return PageFaultHandler::HandlerResult::ContinueExecution; 758 } 759 760 return CPU::CodeCache::HandleFastmemException(exception_pc, fault_address, is_write); 761 } 762 763 //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// 764 // MARK: - Cached Interpreter 765 //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// 766 767 CPU::CodeCache::Block* CPU::CodeCache::CreateCachedInterpreterBlock(u32 pc) 768 { 769 BlockMetadata metadata = {}; 770 ReadBlockInstructions(pc, &s_block_instructions, &metadata); 771 return CreateBlock(pc, s_block_instructions, metadata); 772 } 773 774 template<PGXPMode pgxp_mode> 775 [[noreturn]] void CPU::CodeCache::ExecuteCachedInterpreterImpl() 776 { 777 #define CHECK_DOWNCOUNT() \ 778 if (g_state.pending_ticks >= g_state.downcount) \ 779 break; 780 781 for (;;) 782 { 783 TimingEvents::RunEvents(); 784 785 while (g_state.pending_ticks < g_state.downcount) 786 { 787 #if 0 788 LogCurrentState(); 789 #endif 790 #if 0 791 if ((g_state.pending_ticks + TimingEvents::GetGlobalTickCounter()) == 3301006214) 792 __debugbreak(); 793 #endif 794 // Manually done because we don't want to compile blocks without a LUT. 795 const u32 pc = g_state.pc; 796 const u32 table = pc >> LUT_TABLE_SHIFT; 797 Block* block; 798 if (s_block_lut[table]) 799 { 800 const u32 idx = (pc & 0xFFFF) >> 2; 801 block = s_block_lut[table][idx]; 802 } 803 else 804 { 805 // Likely invalid code... 806 goto interpret_block; 807 } 808 809 reexecute_block: 810 if (!block) 811 { 812 if ((block = CreateCachedInterpreterBlock(pc))->size == 0) [[unlikely]] 813 goto interpret_block; 814 } 815 else 816 { 817 if (block->state == BlockState::FallbackToInterpreter) [[unlikely]] 818 goto interpret_block; 819 820 if ((block->state != BlockState::Valid && !RevalidateBlock(block)) || 821 (block->protection == PageProtectionMode::ManualCheck && !IsBlockCodeCurrent(block))) 822 { 823 if ((block = CreateCachedInterpreterBlock(pc))->size == 0) [[unlikely]] 824 goto interpret_block; 825 } 826 } 827 828 DebugAssert(!(HasPendingInterrupt())); 829 if (block->HasFlag(BlockFlags::IsUsingICache)) 830 { 831 CheckAndUpdateICacheTags(block->icache_line_count); 832 } 833 else if (block->HasFlag(BlockFlags::NeedsDynamicFetchTicks)) 834 { 835 AddPendingTicks( 836 static_cast<TickCount>(block->size * static_cast<u32>(*Bus::GetMemoryAccessTimePtr( 837 block->pc & PHYSICAL_MEMORY_ADDRESS_MASK, MemoryAccessSize::Word)))); 838 } 839 else 840 { 841 AddPendingTicks(block->uncached_fetch_ticks); 842 } 843 844 InterpretCachedBlock<pgxp_mode>(block); 845 846 CHECK_DOWNCOUNT(); 847 848 // Handle self-looping blocks 849 if (g_state.pc == block->pc) 850 goto reexecute_block; 851 else 852 continue; 853 854 interpret_block: 855 InterpretUncachedBlock<pgxp_mode>(); 856 CHECK_DOWNCOUNT(); 857 continue; 858 } 859 } 860 } 861 862 [[noreturn]] void CPU::CodeCache::ExecuteCachedInterpreter() 863 { 864 if (g_settings.gpu_pgxp_enable) 865 { 866 if (g_settings.gpu_pgxp_cpu) 867 ExecuteCachedInterpreterImpl<PGXPMode::CPU>(); 868 else 869 ExecuteCachedInterpreterImpl<PGXPMode::Memory>(); 870 } 871 else 872 { 873 ExecuteCachedInterpreterImpl<PGXPMode::Disabled>(); 874 } 875 } 876 877 void CPU::CodeCache::LogCurrentState() 878 { 879 #if 0 880 if (System::GetGlobalTickCounter() == 2546728915) 881 __debugbreak(); 882 #endif 883 #if 0 884 if (System::GetGlobalTickCounter() < 2546729174) 885 return; 886 #endif 887 888 const auto& regs = g_state.regs; 889 WriteToExecutionLog( 890 "tick=%" PRIu64 891 " dc=%u/%u pc=%08X at=%08X v0=%08X v1=%08X a0=%08X a1=%08X a2=%08X a3=%08X t0=%08X t1=%08X t2=%08X t3=%08X t4=%08X " 892 "t5=%08X t6=%08X t7=%08X s0=%08X s1=%08X s2=%08X s3=%08X s4=%08X s5=%08X s6=%08X s7=%08X t8=%08X t9=%08X k0=%08X " 893 "k1=%08X gp=%08X sp=%08X fp=%08X ra=%08X hi=%08X lo=%08X ldr=%s ldv=%08X cause=%08X sr=%08X gte=%08X\n", 894 System::GetGlobalTickCounter(), g_state.pending_ticks, g_state.downcount, g_state.pc, regs.at, regs.v0, regs.v1, 895 regs.a0, regs.a1, regs.a2, regs.a3, regs.t0, regs.t1, regs.t2, regs.t3, regs.t4, regs.t5, regs.t6, regs.t7, regs.s0, 896 regs.s1, regs.s2, regs.s3, regs.s4, regs.s5, regs.s6, regs.s7, regs.t8, regs.t9, regs.k0, regs.k1, regs.gp, regs.sp, 897 regs.fp, regs.ra, regs.hi, regs.lo, 898 (g_state.next_load_delay_reg == Reg::count) ? "NONE" : GetRegName(g_state.next_load_delay_reg), 899 (g_state.next_load_delay_reg == Reg::count) ? 0 : g_state.next_load_delay_value, g_state.cop0_regs.cause.bits, 900 g_state.cop0_regs.sr.bits, static_cast<u32>(crc32(0, (const Bytef*)&g_state.gte_regs, sizeof(g_state.gte_regs)))); 901 } 902 903 //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// 904 // MARK: - Block Compilation: Shared Code 905 //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// 906 907 bool CPU::CodeCache::ReadBlockInstructions(u32 start_pc, BlockInstructionList* instructions, BlockMetadata* metadata) 908 { 909 // TODO: Jump to other block if it exists at this pc? 910 911 const PageProtectionMode protection = GetProtectionModeForPC(start_pc); 912 const bool use_icache = CPU::IsCachedAddress(start_pc); 913 const bool dynamic_fetch_ticks = (!use_icache && Bus::GetMemoryAccessTimePtr(start_pc & PHYSICAL_MEMORY_ADDRESS_MASK, 914 MemoryAccessSize::Word) != nullptr); 915 u32 pc = start_pc; 916 bool is_branch_delay_slot = false; 917 bool is_load_delay_slot = false; 918 919 #if 0 920 if (pc == 0x0005aa90) 921 __debugbreak(); 922 #endif 923 924 instructions->clear(); 925 metadata->icache_line_count = 0; 926 metadata->uncached_fetch_ticks = 0; 927 metadata->flags = use_icache ? BlockFlags::IsUsingICache : 928 (dynamic_fetch_ticks ? BlockFlags::NeedsDynamicFetchTicks : BlockFlags::None); 929 930 u32 last_cache_line = ICACHE_LINES; 931 u32 last_page = (protection == PageProtectionMode::WriteProtected) ? Bus::GetRAMCodePageIndex(start_pc) : 0; 932 933 for (;;) 934 { 935 if (protection == PageProtectionMode::WriteProtected) 936 { 937 const u32 this_page = Bus::GetRAMCodePageIndex(pc); 938 if (this_page != last_page) 939 { 940 // if we're just crossing the page and not in a branch delay slot, jump directly to the next block 941 if (!is_branch_delay_slot) 942 { 943 DEV_LOG("Breaking block 0x{:08X} at 0x{:08X} due to page crossing", start_pc, pc); 944 metadata->flags |= BlockFlags::SpansPages; 945 break; 946 } 947 else 948 { 949 // otherwise, we need to use manual protection in case the delay slot changes. 950 // may as well keep going then, since we're doing manual check anyways. 951 DEV_LOG("Block 0x{:08X} has branch delay slot crossing page at 0x{:08X}, forcing manual protection", start_pc, 952 pc); 953 metadata->flags |= BlockFlags::BranchDelaySpansPages; 954 } 955 } 956 } 957 958 Instruction instruction; 959 if (!SafeReadInstruction(pc, &instruction.bits) || !IsValidInstruction(instruction)) 960 { 961 // Away to the int you go! 962 ERROR_LOG("Instruction read failed at PC=0x{:08X}, truncating block.", pc); 963 break; 964 } 965 966 InstructionInfo info; 967 std::memset(&info, 0, sizeof(info)); 968 969 info.pc = pc; 970 info.is_branch_delay_slot = is_branch_delay_slot; 971 info.is_load_delay_slot = is_load_delay_slot; 972 info.is_branch_instruction = IsBranchInstruction(instruction); 973 info.is_direct_branch_instruction = IsDirectBranchInstruction(instruction); 974 info.is_unconditional_branch_instruction = IsUnconditionalBranchInstruction(instruction); 975 info.is_load_instruction = IsMemoryLoadInstruction(instruction); 976 info.is_store_instruction = IsMemoryStoreInstruction(instruction); 977 info.has_load_delay = InstructionHasLoadDelay(instruction); 978 979 if (use_icache) 980 { 981 if (g_settings.cpu_recompiler_icache) 982 { 983 const u32 icache_line = GetICacheLine(pc); 984 if (icache_line != last_cache_line) 985 { 986 metadata->icache_line_count++; 987 last_cache_line = icache_line; 988 } 989 } 990 } 991 else if (!dynamic_fetch_ticks) 992 { 993 metadata->uncached_fetch_ticks += GetInstructionReadTicks(pc); 994 } 995 996 if (info.is_load_instruction || info.is_store_instruction) 997 metadata->flags |= BlockFlags::ContainsLoadStoreInstructions; 998 999 pc += sizeof(Instruction); 1000 1001 if (is_branch_delay_slot && info.is_branch_instruction) 1002 { 1003 const BlockInstructionInfoPair& prev = instructions->back(); 1004 if (!prev.second.is_unconditional_branch_instruction || !prev.second.is_direct_branch_instruction) 1005 { 1006 WARNING_LOG("Conditional or indirect branch delay slot at {:08X}, skipping block", info.pc); 1007 return false; 1008 } 1009 if (!IsDirectBranchInstruction(instruction)) 1010 { 1011 WARNING_LOG("Indirect branch in delay slot at {:08X}, skipping block", info.pc); 1012 return false; 1013 } 1014 1015 // we _could_ fetch the delay slot from the first branch's target, but it's probably in a different 1016 // page, and that's an invalidation nightmare. so just fallback to the int, this is very rare anyway. 1017 WARNING_LOG("Direct branch in delay slot at {:08X}, skipping block", info.pc); 1018 return false; 1019 } 1020 1021 // instruction is decoded now 1022 instructions->emplace_back(instruction, info); 1023 1024 // if we're in a branch delay slot, the block is now done 1025 // except if this is a branch in a branch delay slot, then we grab the one after that, and so on... 1026 if (is_branch_delay_slot && !info.is_branch_instruction) 1027 break; 1028 1029 // if this is a branch, we grab the next instruction (delay slot), and then exit 1030 is_branch_delay_slot = info.is_branch_instruction; 1031 1032 // same for load delay 1033 is_load_delay_slot = info.has_load_delay; 1034 1035 // is this a non-branchy exit? (e.g. syscall) 1036 if (IsExitBlockInstruction(instruction)) 1037 break; 1038 } 1039 1040 if (instructions->empty()) 1041 { 1042 WARNING_LOG("Empty block compiled at 0x{:08X}", start_pc); 1043 return false; 1044 } 1045 1046 instructions->back().second.is_last_instruction = true; 1047 1048 #ifdef _DEBUG 1049 SmallString disasm; 1050 DEBUG_LOG("Block at 0x{:08X}", start_pc); 1051 DEBUG_LOG(" Uncached fetch ticks: {}", metadata->uncached_fetch_ticks); 1052 DEBUG_LOG(" ICache line count: {}", metadata->icache_line_count); 1053 for (const auto& cbi : *instructions) 1054 { 1055 CPU::DisassembleInstruction(&disasm, cbi.second.pc, cbi.first.bits); 1056 DEBUG_LOG("[{} {} 0x{:08X}] {:08X} {}", cbi.second.is_branch_delay_slot ? "BD" : " ", 1057 cbi.second.is_load_delay_slot ? "LD" : " ", cbi.second.pc, cbi.first.bits, disasm); 1058 } 1059 #endif 1060 1061 return true; 1062 } 1063 1064 void CPU::CodeCache::CopyRegInfo(InstructionInfo* dst, const InstructionInfo* src) 1065 { 1066 std::memcpy(dst->reg_flags, src->reg_flags, sizeof(dst->reg_flags)); 1067 std::memcpy(dst->read_reg, src->read_reg, sizeof(dst->read_reg)); 1068 } 1069 1070 void CPU::CodeCache::SetRegAccess(InstructionInfo* inst, Reg reg, bool write) 1071 { 1072 if (reg == Reg::zero) 1073 return; 1074 1075 if (!write) 1076 { 1077 for (u32 i = 0; i < std::size(inst->read_reg); i++) 1078 { 1079 if (inst->read_reg[i] == Reg::zero) 1080 { 1081 inst->read_reg[i] = reg; 1082 break; 1083 } 1084 } 1085 } 1086 else 1087 { 1088 #if 0 1089 for (u32 i = 0; i < std::size(inst->write_reg); i++) 1090 { 1091 if (inst->write_reg[i] == Reg::zero) 1092 { 1093 inst->write_reg[i] = reg; 1094 break; 1095 } 1096 } 1097 #endif 1098 } 1099 } 1100 1101 #define BackpropSetReads(reg) \ 1102 do \ 1103 { \ 1104 if (!(inst->reg_flags[static_cast<u8>(reg)] & RI_USED)) \ 1105 inst->reg_flags[static_cast<u8>(reg)] |= RI_LASTUSE; \ 1106 prev->reg_flags[static_cast<u8>(reg)] |= RI_LIVE | RI_USED; \ 1107 inst->reg_flags[static_cast<u8>(reg)] |= RI_USED; \ 1108 SetRegAccess(inst, reg, false); \ 1109 } while (0) 1110 1111 #define BackpropSetWrites(reg) \ 1112 do \ 1113 { \ 1114 prev->reg_flags[static_cast<u8>(reg)] &= ~(RI_LIVE | RI_USED); \ 1115 if (!(inst->reg_flags[static_cast<u8>(reg)] & RI_USED)) \ 1116 inst->reg_flags[static_cast<u8>(reg)] |= RI_LASTUSE; \ 1117 inst->reg_flags[static_cast<u8>(reg)] |= RI_USED; \ 1118 SetRegAccess(inst, reg, true); \ 1119 } while (0) 1120 1121 // TODO: memory loads should be delayed one instruction because of stupid load delays. 1122 #define BackpropSetWritesDelayed(reg) BackpropSetWrites(reg) 1123 1124 void CPU::CodeCache::FillBlockRegInfo(Block* block) 1125 { 1126 const Instruction* iinst = block->Instructions() + (block->size - 1); 1127 InstructionInfo* const start = block->InstructionsInfo(); 1128 InstructionInfo* inst = start + (block->size - 1); 1129 std::memset(inst->reg_flags, RI_LIVE, sizeof(inst->reg_flags)); 1130 std::memset(inst->read_reg, 0, sizeof(inst->read_reg)); 1131 // std::memset(inst->write_reg, 0, sizeof(inst->write_reg)); 1132 1133 while (inst != start) 1134 { 1135 InstructionInfo* prev = inst - 1; 1136 CopyRegInfo(prev, inst); 1137 1138 const Reg rs = iinst->r.rs; 1139 const Reg rt = iinst->r.rt; 1140 1141 switch (iinst->op) 1142 { 1143 case InstructionOp::funct: 1144 { 1145 const Reg rd = iinst->r.rd; 1146 1147 switch (iinst->r.funct) 1148 { 1149 case InstructionFunct::sll: 1150 case InstructionFunct::srl: 1151 case InstructionFunct::sra: 1152 BackpropSetWrites(rd); 1153 BackpropSetReads(rt); 1154 break; 1155 1156 case InstructionFunct::sllv: 1157 case InstructionFunct::srlv: 1158 case InstructionFunct::srav: 1159 case InstructionFunct::add: 1160 case InstructionFunct::addu: 1161 case InstructionFunct::sub: 1162 case InstructionFunct::subu: 1163 case InstructionFunct::and_: 1164 case InstructionFunct::or_: 1165 case InstructionFunct::xor_: 1166 case InstructionFunct::nor: 1167 case InstructionFunct::slt: 1168 case InstructionFunct::sltu: 1169 BackpropSetWrites(rd); 1170 BackpropSetReads(rt); 1171 BackpropSetReads(rs); 1172 break; 1173 1174 case InstructionFunct::jr: 1175 BackpropSetReads(rs); 1176 break; 1177 1178 case InstructionFunct::jalr: 1179 BackpropSetReads(rs); 1180 BackpropSetWrites(rd); 1181 break; 1182 1183 case InstructionFunct::mfhi: 1184 BackpropSetWrites(rd); 1185 BackpropSetReads(Reg::hi); 1186 break; 1187 1188 case InstructionFunct::mflo: 1189 BackpropSetWrites(rd); 1190 BackpropSetReads(Reg::lo); 1191 break; 1192 1193 case InstructionFunct::mthi: 1194 BackpropSetWrites(Reg::hi); 1195 BackpropSetReads(rs); 1196 break; 1197 1198 case InstructionFunct::mtlo: 1199 BackpropSetWrites(Reg::lo); 1200 BackpropSetReads(rs); 1201 break; 1202 1203 case InstructionFunct::mult: 1204 case InstructionFunct::multu: 1205 case InstructionFunct::div: 1206 case InstructionFunct::divu: 1207 BackpropSetWrites(Reg::hi); 1208 BackpropSetWrites(Reg::lo); 1209 BackpropSetReads(rs); 1210 BackpropSetReads(rt); 1211 break; 1212 1213 case InstructionFunct::syscall: 1214 case InstructionFunct::break_: 1215 break; 1216 1217 default: 1218 ERROR_LOG("Unknown funct {}", static_cast<u32>(iinst->r.funct.GetValue())); 1219 break; 1220 } 1221 } 1222 break; 1223 1224 case InstructionOp::b: 1225 { 1226 if ((static_cast<u8>(iinst->i.rt.GetValue()) & u8(0x1E)) == u8(0x10)) 1227 BackpropSetWrites(Reg::ra); 1228 BackpropSetReads(rs); 1229 } 1230 break; 1231 1232 case InstructionOp::j: 1233 break; 1234 1235 case InstructionOp::jal: 1236 BackpropSetWrites(Reg::ra); 1237 break; 1238 1239 case InstructionOp::beq: 1240 case InstructionOp::bne: 1241 BackpropSetReads(rs); 1242 BackpropSetReads(rt); 1243 break; 1244 1245 case InstructionOp::blez: 1246 case InstructionOp::bgtz: 1247 BackpropSetReads(rs); 1248 break; 1249 1250 case InstructionOp::addi: 1251 case InstructionOp::addiu: 1252 case InstructionOp::slti: 1253 case InstructionOp::sltiu: 1254 case InstructionOp::andi: 1255 case InstructionOp::ori: 1256 case InstructionOp::xori: 1257 BackpropSetWrites(rt); 1258 BackpropSetReads(rs); 1259 break; 1260 1261 case InstructionOp::lui: 1262 BackpropSetWrites(rt); 1263 break; 1264 1265 case InstructionOp::lb: 1266 case InstructionOp::lh: 1267 case InstructionOp::lw: 1268 case InstructionOp::lbu: 1269 case InstructionOp::lhu: 1270 BackpropSetWritesDelayed(rt); 1271 BackpropSetReads(rs); 1272 break; 1273 1274 case InstructionOp::lwl: 1275 case InstructionOp::lwr: 1276 BackpropSetWritesDelayed(rt); 1277 BackpropSetReads(rs); 1278 BackpropSetReads(rt); 1279 break; 1280 1281 case InstructionOp::sb: 1282 case InstructionOp::sh: 1283 case InstructionOp::swl: 1284 case InstructionOp::sw: 1285 case InstructionOp::swr: 1286 BackpropSetReads(rt); 1287 BackpropSetReads(rs); 1288 break; 1289 1290 case InstructionOp::cop0: 1291 case InstructionOp::cop2: 1292 { 1293 if (iinst->cop.IsCommonInstruction()) 1294 { 1295 switch (iinst->cop.CommonOp()) 1296 { 1297 case CopCommonInstruction::mfcn: 1298 case CopCommonInstruction::cfcn: 1299 BackpropSetWritesDelayed(rt); 1300 break; 1301 1302 case CopCommonInstruction::mtcn: 1303 case CopCommonInstruction::ctcn: 1304 BackpropSetReads(rt); 1305 break; 1306 } 1307 } 1308 break; 1309 1310 case InstructionOp::lwc2: 1311 case InstructionOp::swc2: 1312 BackpropSetReads(rs); 1313 BackpropSetReads(rt); 1314 break; 1315 1316 default: 1317 ERROR_LOG("Unknown op {}", static_cast<u32>(iinst->op.GetValue())); 1318 break; 1319 } 1320 } // end switch 1321 1322 inst--; 1323 iinst--; 1324 } // end while 1325 } 1326 1327 //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// 1328 // MARK: - Recompiler Glue 1329 //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// 1330 1331 void CPU::CodeCache::CompileOrRevalidateBlock(u32 start_pc) 1332 { 1333 // TODO: this doesn't currently handle when the cache overflows... 1334 DebugAssert(IsUsingAnyRecompiler()); 1335 MemMap::BeginCodeWrite(); 1336 1337 Block* block = LookupBlock(start_pc); 1338 if (block) 1339 { 1340 // we should only be here if the block got invalidated 1341 DebugAssert(block->state != BlockState::Valid); 1342 if (RevalidateBlock(block)) 1343 { 1344 DebugAssert(block->host_code); 1345 SetCodeLUT(start_pc, block->host_code); 1346 BacklinkBlocks(start_pc, block->host_code); 1347 MemMap::EndCodeWrite(); 1348 return; 1349 } 1350 1351 // remove outward links from this block, since we're recompiling it 1352 UnlinkBlockExits(block); 1353 1354 // clean up backpatch info so it doesn't keep growing indefinitely 1355 if (block->HasFlag(BlockFlags::ContainsLoadStoreInstructions)) 1356 RemoveBackpatchInfoForRange(block->host_code, block->host_code_size); 1357 } 1358 1359 BlockMetadata metadata = {}; 1360 if (!ReadBlockInstructions(start_pc, &s_block_instructions, &metadata)) 1361 { 1362 ERROR_LOG("Failed to read block at 0x{:08X}, falling back to uncached interpreter", start_pc); 1363 SetCodeLUT(start_pc, g_interpret_block); 1364 BacklinkBlocks(start_pc, g_interpret_block); 1365 MemMap::EndCodeWrite(); 1366 return; 1367 } 1368 1369 // Ensure we're not going to run out of space while compiling this block. 1370 // We could definitely do better here... TODO: far code is no longer needed for newrec 1371 const u32 block_size = static_cast<u32>(s_block_instructions.size()); 1372 if (GetFreeCodeSpace() < (block_size * Recompiler::MAX_NEAR_HOST_BYTES_PER_INSTRUCTION) || 1373 GetFreeFarCodeSpace() < (block_size * Recompiler::MAX_FAR_HOST_BYTES_PER_INSTRUCTION)) 1374 { 1375 ERROR_LOG("Out of code space while compiling {:08X}. Resetting code cache.", start_pc); 1376 CodeCache::Reset(); 1377 } 1378 1379 if ((block = CreateBlock(start_pc, s_block_instructions, metadata)) == nullptr || block->size == 0 || 1380 !CompileBlock(block)) 1381 { 1382 ERROR_LOG("Failed to compile block at 0x{:08X}, falling back to uncached interpreter", start_pc); 1383 SetCodeLUT(start_pc, g_interpret_block); 1384 BacklinkBlocks(start_pc, g_interpret_block); 1385 MemMap::EndCodeWrite(); 1386 return; 1387 } 1388 1389 SetCodeLUT(start_pc, block->host_code); 1390 BacklinkBlocks(start_pc, block->host_code); 1391 MemMap::EndCodeWrite(); 1392 } 1393 1394 void CPU::CodeCache::DiscardAndRecompileBlock(u32 start_pc) 1395 { 1396 MemMap::BeginCodeWrite(); 1397 1398 DEV_LOG("Discard block {:08X} with manual protection", start_pc); 1399 Block* block = LookupBlock(start_pc); 1400 DebugAssert(block && block->state == BlockState::Valid); 1401 InvalidateBlock(block, BlockState::NeedsRecompile); 1402 CompileOrRevalidateBlock(start_pc); 1403 1404 MemMap::EndCodeWrite(); 1405 } 1406 1407 const void* CPU::CodeCache::CreateBlockLink(Block* block, void* code, u32 newpc) 1408 { 1409 // self-linking should be handled by the caller 1410 DebugAssert(newpc != block->pc); 1411 1412 const void* dst = g_dispatcher; 1413 if (g_settings.cpu_recompiler_block_linking) 1414 { 1415 const Block* next_block = LookupBlock(newpc); 1416 if (next_block) 1417 { 1418 dst = (next_block->state == BlockState::Valid) ? 1419 next_block->host_code : 1420 ((next_block->state == BlockState::FallbackToInterpreter) ? g_interpret_block : 1421 g_compile_or_revalidate_block); 1422 DebugAssert(dst); 1423 } 1424 else 1425 { 1426 dst = g_compile_or_revalidate_block; 1427 } 1428 1429 BlockLinkMap::iterator iter = s_block_links.emplace(newpc, code); 1430 DebugAssert(block->num_exit_links < MAX_BLOCK_EXIT_LINKS); 1431 block->exit_links[block->num_exit_links++] = iter; 1432 } 1433 1434 DEBUG_LOG("Linking {} with dst pc {:08X} to {}{}", code, newpc, dst, 1435 (dst == g_compile_or_revalidate_block) ? "[compiler]" : ""); 1436 return dst; 1437 } 1438 1439 void CPU::CodeCache::BacklinkBlocks(u32 pc, const void* dst) 1440 { 1441 if (!g_settings.cpu_recompiler_block_linking) 1442 return; 1443 1444 const auto link_range = s_block_links.equal_range(pc); 1445 for (auto it = link_range.first; it != link_range.second; ++it) 1446 { 1447 DEBUG_LOG("Backlinking {} with dst pc {:08X} to {}{}", it->second, pc, dst, 1448 (dst == g_compile_or_revalidate_block) ? "[compiler]" : ""); 1449 EmitJump(it->second, dst, true); 1450 } 1451 } 1452 1453 void CPU::CodeCache::UnlinkBlockExits(Block* block) 1454 { 1455 const u32 num_exit_links = block->num_exit_links; 1456 for (u32 i = 0; i < num_exit_links; i++) 1457 s_block_links.erase(block->exit_links[i]); 1458 block->num_exit_links = 0; 1459 } 1460 1461 void CPU::CodeCache::ResetCodeBuffer() 1462 { 1463 s_code_ptr = static_cast<u8*>(s_code_buffer_ptr); 1464 s_free_code_ptr = s_code_ptr; 1465 s_code_size = RECOMPILER_CODE_CACHE_SIZE - RECOMPILER_FAR_CODE_CACHE_SIZE; 1466 s_code_used = 0; 1467 1468 // Use half the far code size when using newrec and memory exceptions aren't enabled. It's only used for backpatching. 1469 const u32 far_code_size = 1470 (g_settings.cpu_execution_mode == CPUExecutionMode::NewRec && !g_settings.cpu_recompiler_memory_exceptions) ? 1471 (RECOMPILER_FAR_CODE_CACHE_SIZE / 2) : 1472 RECOMPILER_FAR_CODE_CACHE_SIZE; 1473 s_far_code_size = far_code_size; 1474 s_far_code_ptr = (far_code_size > 0) ? (static_cast<u8*>(s_code_ptr) + s_code_size) : nullptr; 1475 s_free_far_code_ptr = s_far_code_ptr; 1476 s_far_code_used = 0; 1477 1478 MemMap::BeginCodeWrite(); 1479 1480 std::memset(s_code_ptr, 0, RECOMPILER_CODE_CACHE_SIZE); 1481 MemMap::FlushInstructionCache(s_code_ptr, RECOMPILER_CODE_CACHE_SIZE); 1482 1483 MemMap::EndCodeWrite(); 1484 } 1485 1486 u8* CPU::CodeCache::GetFreeCodePointer() 1487 { 1488 return s_free_code_ptr; 1489 } 1490 1491 u32 CPU::CodeCache::GetFreeCodeSpace() 1492 { 1493 return s_code_size - s_code_used; 1494 } 1495 1496 void CPU::CodeCache::CommitCode(u32 length) 1497 { 1498 if (length == 0) [[unlikely]] 1499 return; 1500 1501 MemMap::FlushInstructionCache(s_free_code_ptr, length); 1502 1503 Assert(length <= (s_code_size - s_code_used)); 1504 s_free_code_ptr += length; 1505 s_code_used += length; 1506 } 1507 1508 u8* CPU::CodeCache::GetFreeFarCodePointer() 1509 { 1510 return s_free_far_code_ptr; 1511 } 1512 1513 u32 CPU::CodeCache::GetFreeFarCodeSpace() 1514 { 1515 return s_far_code_size - s_far_code_used; 1516 } 1517 1518 void CPU::CodeCache::CommitFarCode(u32 length) 1519 { 1520 if (length == 0) [[unlikely]] 1521 return; 1522 1523 MemMap::FlushInstructionCache(s_free_far_code_ptr, length); 1524 1525 Assert(length <= (s_far_code_size - s_far_code_used)); 1526 s_free_far_code_ptr += length; 1527 s_far_code_used += length; 1528 } 1529 1530 void CPU::CodeCache::AlignCode(u32 alignment) 1531 { 1532 #if defined(CPU_ARCH_X64) 1533 constexpr u8 padding_value = 0xcc; // int3 1534 #else 1535 constexpr u8 padding_value = 0x00; 1536 #endif 1537 1538 DebugAssert(Common::IsPow2(alignment)); 1539 const u32 num_padding_bytes = 1540 std::min(static_cast<u32>(Common::AlignUpPow2(reinterpret_cast<uintptr_t>(s_free_code_ptr), alignment) - 1541 reinterpret_cast<uintptr_t>(s_free_code_ptr)), 1542 GetFreeCodeSpace()); 1543 std::memset(s_free_code_ptr, padding_value, num_padding_bytes); 1544 s_free_code_ptr += num_padding_bytes; 1545 s_code_used += num_padding_bytes; 1546 } 1547 1548 const void* CPU::CodeCache::GetInterpretUncachedBlockFunction() 1549 { 1550 if (g_settings.gpu_pgxp_enable) 1551 { 1552 if (g_settings.gpu_pgxp_cpu) 1553 return reinterpret_cast<const void*>(InterpretUncachedBlock<PGXPMode::CPU>); 1554 else 1555 return reinterpret_cast<const void*>(InterpretUncachedBlock<PGXPMode::Memory>); 1556 } 1557 else 1558 { 1559 return reinterpret_cast<const void*>(InterpretUncachedBlock<PGXPMode::Disabled>); 1560 } 1561 } 1562 1563 void CPU::CodeCache::ClearASMFunctions() 1564 { 1565 g_enter_recompiler = nullptr; 1566 g_compile_or_revalidate_block = nullptr; 1567 g_check_events_and_dispatch = nullptr; 1568 g_run_events_and_dispatch = nullptr; 1569 g_dispatcher = nullptr; 1570 g_interpret_block = nullptr; 1571 g_discard_and_recompile_block = nullptr; 1572 1573 #ifdef _DEBUG 1574 s_total_instructions_compiled = 0; 1575 s_total_host_instructions_emitted = 0; 1576 #endif 1577 } 1578 1579 void CPU::CodeCache::CompileASMFunctions() 1580 { 1581 MemMap::BeginCodeWrite(); 1582 1583 const u32 asm_size = EmitASMFunctions(GetFreeCodePointer(), GetFreeCodeSpace()); 1584 1585 #ifdef ENABLE_RECOMPILER_PROFILING 1586 MIPSPerfScope.Register(GetFreeCodePointer(), asm_size, "ASMFunctions"); 1587 #endif 1588 1589 CommitCode(asm_size); 1590 MemMap::EndCodeWrite(); 1591 } 1592 1593 bool CPU::CodeCache::CompileBlock(Block* block) 1594 { 1595 const void* host_code = nullptr; 1596 u32 host_code_size = 0; 1597 u32 host_far_code_size = 0; 1598 1599 #ifdef ENABLE_RECOMPILER 1600 if (g_settings.cpu_execution_mode == CPUExecutionMode::Recompiler) 1601 { 1602 Recompiler::CodeGenerator codegen; 1603 host_code = codegen.CompileBlock(block, &host_code_size, &host_far_code_size); 1604 } 1605 #endif 1606 #ifdef ENABLE_NEWREC 1607 if (g_settings.cpu_execution_mode == CPUExecutionMode::NewRec) 1608 host_code = NewRec::g_compiler->CompileBlock(block, &host_code_size, &host_far_code_size); 1609 #endif 1610 1611 block->host_code = host_code; 1612 block->host_code_size = host_code_size; 1613 1614 if (!host_code) 1615 { 1616 ERROR_LOG("Failed to compile host code for block at 0x{:08X}", block->pc); 1617 block->state = BlockState::FallbackToInterpreter; 1618 return false; 1619 } 1620 1621 #ifdef DUMP_CODE_SIZE_STATS 1622 const u32 host_instructions = GetHostInstructionCount(host_code, host_code_size); 1623 s_total_instructions_compiled += block->size; 1624 s_total_host_instructions_emitted += host_instructions; 1625 1626 DEV_LOG("0x{:08X}: {}/{}b for {}b ({}i), blowup: {:.2f}x, cache: {:.2f}%/{:.2f}%, ipi: {:.2f}/{:.2f}", block->pc, 1627 host_code_size, host_far_code_size, block->size * 4, block->size, 1628 static_cast<float>(host_code_size) / static_cast<float>(block->size * 4), 1629 (static_cast<float>(s_code_used) / static_cast<float>(s_code_size)) * 100.0f, 1630 (static_cast<float>(s_far_code_used) / static_cast<float>(s_far_code_size)) * 100.0f, 1631 static_cast<float>(host_instructions) / static_cast<float>(block->size), 1632 static_cast<float>(s_total_host_instructions_emitted) / static_cast<float>(s_total_instructions_compiled)); 1633 #endif 1634 1635 #if 0 1636 Log_DebugPrint("***HOST CODE**"); 1637 DisassembleAndLogHostCode(host_code, host_code_size); 1638 #endif 1639 1640 #ifdef ENABLE_RECOMPILER_PROFILING 1641 MIPSPerfScope.RegisterPC(host_code, host_code_size, block->pc); 1642 #endif 1643 1644 return true; 1645 } 1646 1647 void CPU::CodeCache::AddLoadStoreInfo(void* code_address, u32 code_size, u32 guest_pc, const void* thunk_address) 1648 { 1649 DebugAssert(code_size < std::numeric_limits<u8>::max()); 1650 1651 auto iter = s_fastmem_backpatch_info.find(code_address); 1652 if (iter != s_fastmem_backpatch_info.end()) 1653 s_fastmem_backpatch_info.erase(iter); 1654 1655 LoadstoreBackpatchInfo info; 1656 info.thunk_address = thunk_address; 1657 info.guest_pc = guest_pc; 1658 info.guest_block = 0; 1659 info.code_size = static_cast<u8>(code_size); 1660 s_fastmem_backpatch_info.emplace(code_address, info); 1661 } 1662 1663 void CPU::CodeCache::AddLoadStoreInfo(void* code_address, u32 code_size, u32 guest_pc, u32 guest_block, 1664 TickCount cycles, u32 gpr_bitmask, u8 address_register, u8 data_register, 1665 MemoryAccessSize size, bool is_signed, bool is_load) 1666 { 1667 DebugAssert(code_size < std::numeric_limits<u8>::max()); 1668 DebugAssert(cycles >= 0 && cycles < std::numeric_limits<u16>::max()); 1669 1670 auto iter = s_fastmem_backpatch_info.find(code_address); 1671 if (iter != s_fastmem_backpatch_info.end()) 1672 s_fastmem_backpatch_info.erase(iter); 1673 1674 LoadstoreBackpatchInfo info; 1675 info.thunk_address = nullptr; 1676 info.guest_pc = guest_pc; 1677 info.guest_block = guest_block; 1678 info.gpr_bitmask = gpr_bitmask; 1679 info.cycles = static_cast<u16>(cycles); 1680 info.address_register = address_register; 1681 info.data_register = data_register; 1682 info.size = static_cast<u16>(size); 1683 info.is_signed = is_signed; 1684 info.is_load = is_load; 1685 info.code_size = static_cast<u8>(code_size); 1686 s_fastmem_backpatch_info.emplace(code_address, info); 1687 } 1688 1689 PageFaultHandler::HandlerResult CPU::CodeCache::HandleFastmemException(void* exception_pc, void* fault_address, 1690 bool is_write) 1691 { 1692 PhysicalMemoryAddress guest_address; 1693 1694 #ifdef ENABLE_MMAP_FASTMEM 1695 if (g_settings.cpu_fastmem_mode == CPUFastmemMode::MMap) 1696 { 1697 if (static_cast<u8*>(fault_address) < static_cast<u8*>(g_state.fastmem_base) || 1698 (static_cast<u8*>(fault_address) - static_cast<u8*>(g_state.fastmem_base)) >= 1699 static_cast<ptrdiff_t>(Bus::FASTMEM_ARENA_SIZE)) 1700 { 1701 return PageFaultHandler::HandlerResult::ExecuteNextHandler; 1702 } 1703 1704 guest_address = static_cast<PhysicalMemoryAddress>( 1705 static_cast<ptrdiff_t>(static_cast<u8*>(fault_address) - static_cast<u8*>(g_state.fastmem_base))); 1706 1707 // if we're writing to ram, let it go through a few times, and use manual block protection to sort it out 1708 // TODO: path for manual protection to return back to read-only pages 1709 if (is_write && !g_state.cop0_regs.sr.Isc && AddressInRAM(guest_address)) 1710 { 1711 DEV_LOG("Ignoring fault due to RAM write @ 0x{:08X}", guest_address); 1712 InvalidateBlocksWithPageIndex(Bus::GetRAMCodePageIndex(guest_address)); 1713 return PageFaultHandler::HandlerResult::ContinueExecution; 1714 } 1715 } 1716 else 1717 #endif 1718 { 1719 // LUT fastmem - we can't compute the address. 1720 guest_address = std::numeric_limits<PhysicalMemoryAddress>::max(); 1721 } 1722 1723 DEV_LOG("Page fault handler invoked at PC={} Address={} {}, fastmem offset {:08X}", exception_pc, fault_address, 1724 is_write ? "(write)" : "(read)", guest_address); 1725 1726 auto iter = s_fastmem_backpatch_info.find(exception_pc); 1727 if (iter == s_fastmem_backpatch_info.end()) 1728 { 1729 ERROR_LOG("No backpatch info found for {}", exception_pc); 1730 return PageFaultHandler::HandlerResult::ExecuteNextHandler; 1731 } 1732 1733 LoadstoreBackpatchInfo& info = iter->second; 1734 DEV_LOG("Backpatching {} at {}[{}] (pc {:08X} addr {:08X}): Bitmask {:08X} Addr {} Data {} Size {} Signed {:02X}", 1735 info.is_load ? "load" : "store", exception_pc, info.code_size, info.guest_pc, guest_address, info.gpr_bitmask, 1736 static_cast<unsigned>(info.address_register), static_cast<unsigned>(info.data_register), 1737 info.AccessSizeInBytes(), static_cast<unsigned>(info.is_signed)); 1738 1739 MemMap::BeginCodeWrite(); 1740 1741 BackpatchLoadStore(exception_pc, info); 1742 1743 // queue block for recompilation later 1744 if (g_settings.cpu_execution_mode == CPUExecutionMode::NewRec) 1745 { 1746 Block* block = LookupBlock(info.guest_block); 1747 if (block) 1748 { 1749 // This is a bit annoying, we have to remove it from the page list if it's a RAM block. 1750 DEV_LOG("Queuing block {:08X} for recompilation due to backpatch", block->pc); 1751 RemoveBlockFromPageList(block); 1752 InvalidateBlock(block, BlockState::NeedsRecompile); 1753 1754 // Need to reset the recompile count, otherwise it'll get trolled into an interpreter fallback. 1755 block->compile_frame = System::GetFrameNumber(); 1756 block->compile_count = 1; 1757 } 1758 } 1759 1760 MemMap::EndCodeWrite(); 1761 1762 // and store the pc in the faulting list, so that we don't emit another fastmem loadstore 1763 s_fastmem_faulting_pcs.insert(info.guest_pc); 1764 s_fastmem_backpatch_info.erase(iter); 1765 return PageFaultHandler::HandlerResult::ContinueExecution; 1766 } 1767 1768 bool CPU::CodeCache::HasPreviouslyFaultedOnPC(u32 guest_pc) 1769 { 1770 return (s_fastmem_faulting_pcs.find(guest_pc) != s_fastmem_faulting_pcs.end()); 1771 } 1772 1773 void CPU::CodeCache::BackpatchLoadStore(void* host_pc, const LoadstoreBackpatchInfo& info) 1774 { 1775 #ifdef ENABLE_RECOMPILER 1776 if (g_settings.cpu_execution_mode == CPUExecutionMode::Recompiler) 1777 Recompiler::CodeGenerator::BackpatchLoadStore(host_pc, info); 1778 #endif 1779 #ifdef ENABLE_NEWREC 1780 if (g_settings.cpu_execution_mode == CPUExecutionMode::NewRec) 1781 NewRec::BackpatchLoadStore(host_pc, info); 1782 #endif 1783 } 1784 1785 void CPU::CodeCache::RemoveBackpatchInfoForRange(const void* host_code, u32 size) 1786 { 1787 const u8* start = static_cast<const u8*>(host_code); 1788 const u8* end = start + size; 1789 1790 auto start_iter = s_fastmem_backpatch_info.lower_bound(start); 1791 if (start_iter == s_fastmem_backpatch_info.end()) 1792 return; 1793 1794 // this might point to another block, so bail out in that case 1795 if (start_iter->first >= end) 1796 return; 1797 1798 // find the end point, or last instruction in the range 1799 auto end_iter = start_iter; 1800 do 1801 { 1802 ++end_iter; 1803 } while (end_iter != s_fastmem_backpatch_info.end() && end_iter->first < end); 1804 1805 // erase the whole range at once 1806 s_fastmem_backpatch_info.erase(start_iter, end_iter); 1807 }