duckstation

duckstation, but archived from the revision just before upstream changed it to a proprietary software project, this version is the libre one
git clone https://git.neptards.moe/u3shit/duckstation.git
Log | Files | Refs | README | LICENSE

cpu_code_cache.cpp (59534B)


      1 // SPDX-FileCopyrightText: 2019-2024 Connor McLaughlin <stenzek@gmail.com>
      2 // SPDX-License-Identifier: (GPL-3.0 OR CC-BY-NC-ND-4.0)
      3 
      4 #include "bus.h"
      5 #include "cpu_code_cache_private.h"
      6 #include "cpu_core.h"
      7 #include "cpu_core_private.h"
      8 #include "cpu_disasm.h"
      9 #include "cpu_recompiler_types.h"
     10 #include "host.h"
     11 #include "settings.h"
     12 #include "system.h"
     13 #include "timing_event.h"
     14 
     15 #include "util/page_fault_handler.h"
     16 
     17 #include "common/align.h"
     18 #include "common/assert.h"
     19 #include "common/error.h"
     20 #include "common/intrin.h"
     21 #include "common/log.h"
     22 #include "common/memmap.h"
     23 
     24 Log_SetChannel(CPU::CodeCache);
     25 
     26 // Enable dumping of recompiled block code size statistics.
     27 // #define DUMP_CODE_SIZE_STATS 1
     28 
     29 // Enable profiling of JIT blocks.
     30 // #define ENABLE_RECOMPILER_PROFILING 1
     31 
     32 #ifdef ENABLE_RECOMPILER
     33 #include "cpu_recompiler_code_generator.h"
     34 #endif
     35 
     36 #ifdef ENABLE_NEWREC
     37 #include "cpu_newrec_compiler.h"
     38 #endif
     39 
     40 #include <map>
     41 #include <unordered_set>
     42 #include <zlib.h>
     43 
     44 namespace CPU::CodeCache {
     45 
     46 using LUTRangeList = std::array<std::pair<VirtualMemoryAddress, VirtualMemoryAddress>, 9>;
     47 using PageProtectionArray = std::array<PageProtectionInfo, Bus::RAM_8MB_CODE_PAGE_COUNT>;
     48 using BlockInstructionInfoPair = std::pair<Instruction, InstructionInfo>;
     49 using BlockInstructionList = std::vector<BlockInstructionInfoPair>;
     50 
     51 // Switch to manual protection if we invalidate more than 4 times within 60 frames.
     52 // Fall blocks back to interpreter if we recompile more than 3 times within 15 frames.
     53 // The interpreter fallback is set before the manual protection switch, so that if it's just a single block
     54 // which is constantly getting mutated, we won't hurt the performance of the rest in the page.
     55 static constexpr u32 RECOMPILE_COUNT_FOR_INTERPRETER_FALLBACK = 3;
     56 static constexpr u32 RECOMPILE_FRAMES_FOR_INTERPRETER_FALLBACK = 15;
     57 static constexpr u32 INVALIDATE_COUNT_FOR_MANUAL_PROTECTION = 4;
     58 static constexpr u32 INVALIDATE_FRAMES_FOR_MANUAL_PROTECTION = 60;
     59 
     60 static CodeLUT DecodeCodeLUTPointer(u32 slot, CodeLUT ptr);
     61 static CodeLUT EncodeCodeLUTPointer(u32 slot, CodeLUT ptr);
     62 static CodeLUT OffsetCodeLUTPointer(CodeLUT fake_ptr, u32 pc);
     63 
     64 static void AllocateLUTs();
     65 static void DeallocateLUTs();
     66 static void ResetCodeLUT();
     67 static void SetCodeLUT(u32 pc, const void* function);
     68 static void InvalidateBlock(Block* block, BlockState new_state);
     69 static void ClearBlocks();
     70 
     71 static Block* LookupBlock(u32 pc);
     72 static Block* CreateBlock(u32 pc, const BlockInstructionList& instructions, const BlockMetadata& metadata);
     73 static bool IsBlockCodeCurrent(const Block* block);
     74 static bool RevalidateBlock(Block* block);
     75 PageProtectionMode GetProtectionModeForPC(u32 pc);
     76 PageProtectionMode GetProtectionModeForBlock(const Block* block);
     77 static bool ReadBlockInstructions(u32 start_pc, BlockInstructionList* instructions, BlockMetadata* metadata);
     78 static void FillBlockRegInfo(Block* block);
     79 static void CopyRegInfo(InstructionInfo* dst, const InstructionInfo* src);
     80 static void SetRegAccess(InstructionInfo* inst, Reg reg, bool write);
     81 static void AddBlockToPageList(Block* block);
     82 static void RemoveBlockFromPageList(Block* block);
     83 
     84 static Block* CreateCachedInterpreterBlock(u32 pc);
     85 [[noreturn]] static void ExecuteCachedInterpreter();
     86 template<PGXPMode pgxp_mode>
     87 [[noreturn]] static void ExecuteCachedInterpreterImpl();
     88 
     89 // Fast map provides lookup from PC to function
     90 // Function pointers are offset so that you don't need to subtract
     91 CodeLUTArray g_code_lut;
     92 static BlockLUTArray s_block_lut;
     93 static std::unique_ptr<const void*[]> s_lut_code_pointers;
     94 static std::unique_ptr<Block*[]> s_lut_block_pointers;
     95 static PageProtectionArray s_page_protection = {};
     96 static std::vector<Block*> s_blocks;
     97 
     98 // for compiling - reuse to avoid allocations
     99 static BlockInstructionList s_block_instructions;
    100 
    101 static void BacklinkBlocks(u32 pc, const void* dst);
    102 static void UnlinkBlockExits(Block* block);
    103 static void ResetCodeBuffer();
    104 
    105 static void ClearASMFunctions();
    106 static void CompileASMFunctions();
    107 static bool CompileBlock(Block* block);
    108 static PageFaultHandler::HandlerResult HandleFastmemException(void* exception_pc, void* fault_address, bool is_write);
    109 static void BackpatchLoadStore(void* host_pc, const LoadstoreBackpatchInfo& info);
    110 static void RemoveBackpatchInfoForRange(const void* host_code, u32 size);
    111 
    112 static BlockLinkMap s_block_links;
    113 static std::map<const void*, LoadstoreBackpatchInfo> s_fastmem_backpatch_info;
    114 static std::unordered_set<u32> s_fastmem_faulting_pcs;
    115 
    116 NORETURN_FUNCTION_POINTER void (*g_enter_recompiler)();
    117 const void* g_compile_or_revalidate_block;
    118 const void* g_check_events_and_dispatch;
    119 const void* g_run_events_and_dispatch;
    120 const void* g_dispatcher;
    121 const void* g_interpret_block;
    122 const void* g_discard_and_recompile_block;
    123 
    124 #ifdef ENABLE_RECOMPILER_PROFILING
    125 
    126 PerfScope MIPSPerfScope("MIPS");
    127 
    128 #endif
    129 
    130 #if defined(CPU_ARCH_ARM32)
    131 // Use a smaller code buffer size on AArch32 to have a better chance of being in range.
    132 static constexpr u32 RECOMPILER_CODE_CACHE_SIZE = 16 * 1024 * 1024;
    133 static constexpr u32 RECOMPILER_FAR_CODE_CACHE_SIZE = 4 * 1024 * 1024;
    134 #else
    135 static constexpr u32 RECOMPILER_CODE_CACHE_SIZE = 48 * 1024 * 1024;
    136 static constexpr u32 RECOMPILER_FAR_CODE_CACHE_SIZE = 16 * 1024 * 1024;
    137 #endif
    138 
    139 // On Linux ARM32/ARM64, we use a dedicated section in the ELF for storing code.
    140 // This is because without ASLR, or on certain ASLR offsets, the sbrk() heap ends up immediately following the text/data
    141 // sections, which means there isn't a large enough gap to fit within range on ARM32.
    142 #if defined(__linux__) && (defined(CPU_ARCH_ARM32) || defined(CPU_ARCH_ARM64))
    143 #define USE_CODE_BUFFER_SECTION 1
    144 #ifdef __clang__
    145 #pragma clang section bss = ".jitstorage"
    146 __attribute__((aligned(HOST_PAGE_SIZE))) static u8 s_code_buffer_ptr[RECOMPILER_CODE_CACHE_SIZE];
    147 #pragma clang section bss = ""
    148 #endif
    149 #else
    150 static u8* s_code_buffer_ptr = nullptr;
    151 #endif
    152 
    153 static u8* s_code_ptr = nullptr;
    154 static u8* s_free_code_ptr = nullptr;
    155 static u32 s_code_size = 0;
    156 static u32 s_code_used = 0;
    157 
    158 static u8* s_far_code_ptr = nullptr;
    159 static u8* s_free_far_code_ptr = nullptr;
    160 static u32 s_far_code_size = 0;
    161 static u32 s_far_code_used = 0;
    162 
    163 #ifdef _DEBUG
    164 static u32 s_total_instructions_compiled = 0;
    165 static u32 s_total_host_instructions_emitted = 0;
    166 #endif
    167 } // namespace CPU::CodeCache
    168 
    169 bool CPU::CodeCache::IsUsingAnyRecompiler()
    170 {
    171   return (g_settings.cpu_execution_mode == CPUExecutionMode::Recompiler ||
    172           g_settings.cpu_execution_mode == CPUExecutionMode::NewRec);
    173 }
    174 
    175 bool CPU::CodeCache::IsUsingFastmem()
    176 {
    177   return IsUsingAnyRecompiler() && g_settings.cpu_fastmem_mode != CPUFastmemMode::Disabled;
    178 }
    179 
    180 bool CPU::CodeCache::ProcessStartup(Error* error)
    181 {
    182 #ifdef USE_CODE_BUFFER_SECTION
    183   const u8* module_base = static_cast<const u8*>(MemMap::GetBaseAddress());
    184   INFO_LOG("Using JIT buffer section of size {} at {} (0x{:X} bytes / {} MB away)", sizeof(s_code_buffer_ptr),
    185            static_cast<void*>(s_code_buffer_ptr), std::abs(static_cast<ptrdiff_t>(s_code_buffer_ptr - module_base)),
    186            (std::abs(static_cast<ptrdiff_t>(s_code_buffer_ptr - module_base)) + (1024 * 1024 - 1)) / (1024 * 1024));
    187   const bool code_buffer_allocated =
    188     MemMap::MemProtect(s_code_buffer_ptr, RECOMPILER_CODE_CACHE_SIZE, PageProtect::ReadWriteExecute);
    189 #else
    190   s_code_buffer_ptr = static_cast<u8*>(MemMap::AllocateJITMemory(RECOMPILER_CODE_CACHE_SIZE));
    191   const bool code_buffer_allocated = (s_code_buffer_ptr != nullptr);
    192 #endif
    193   if (!code_buffer_allocated) [[unlikely]]
    194   {
    195     Error::SetStringView(error, "Failed to allocate code storage. The log may contain more information, you will need "
    196                                 "to run DuckStation with -earlyconsole in the command line.");
    197     return false;
    198   }
    199 
    200   AllocateLUTs();
    201 
    202   if (!PageFaultHandler::Install(error))
    203     return false;
    204 
    205   return true;
    206 }
    207 
    208 void CPU::CodeCache::ProcessShutdown()
    209 {
    210   DeallocateLUTs();
    211 
    212 #ifndef USE_CODE_BUFFER_SECTION
    213   MemMap::ReleaseJITMemory(s_code_buffer_ptr, RECOMPILER_CODE_CACHE_SIZE);
    214 #endif
    215 }
    216 
    217 void CPU::CodeCache::Initialize()
    218 {
    219   Assert(s_blocks.empty());
    220 
    221   if (IsUsingAnyRecompiler())
    222   {
    223     ResetCodeBuffer();
    224     CompileASMFunctions();
    225     ResetCodeLUT();
    226   }
    227 
    228   Bus::UpdateFastmemViews(IsUsingAnyRecompiler() ? g_settings.cpu_fastmem_mode : CPUFastmemMode::Disabled);
    229   CPU::UpdateMemoryPointers();
    230 }
    231 
    232 void CPU::CodeCache::Shutdown()
    233 {
    234   ClearBlocks();
    235   ClearASMFunctions();
    236 
    237   Bus::UpdateFastmemViews(CPUFastmemMode::Disabled);
    238   CPU::UpdateMemoryPointers();
    239 }
    240 
    241 void CPU::CodeCache::Reset()
    242 {
    243   ClearBlocks();
    244 
    245   if (IsUsingAnyRecompiler())
    246   {
    247     ClearASMFunctions();
    248     ResetCodeBuffer();
    249     CompileASMFunctions();
    250     ResetCodeLUT();
    251   }
    252 }
    253 
    254 void CPU::CodeCache::Execute()
    255 {
    256   if (IsUsingAnyRecompiler())
    257   {
    258     g_enter_recompiler();
    259     UnreachableCode();
    260   }
    261   else
    262   {
    263     ExecuteCachedInterpreter();
    264   }
    265 }
    266 
    267 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
    268 // MARK: - Block Management
    269 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
    270 
    271 namespace CPU::CodeCache {
    272 static constexpr u32 GetLUTTableCount(u32 start, u32 end)
    273 {
    274   return ((end >> LUT_TABLE_SHIFT) - (start >> LUT_TABLE_SHIFT)) + 1;
    275 }
    276 
    277 static constexpr LUTRangeList GetLUTRanges()
    278 {
    279   const LUTRangeList ranges = {{
    280     {0x00000000, 0x00800000}, // RAM
    281     {0x1F000000, 0x1F800000}, // EXP1
    282     {0x1FC00000, 0x1FC80000}, // BIOS
    283 
    284     {0x80000000, 0x80800000}, // RAM
    285     {0x9F000000, 0x9F800000}, // EXP1
    286     {0x9FC00000, 0x9FC80000}, // BIOS
    287 
    288     {0xA0000000, 0xA0800000}, // RAM
    289     {0xBF000000, 0xBF800000}, // EXP1
    290     {0xBFC00000, 0xBFC80000}  // BIOS
    291   }};
    292   return ranges;
    293 }
    294 
    295 static constexpr u32 GetLUTSlotCount(bool include_unreachable)
    296 {
    297   u32 tables = include_unreachable ? 1 : 0; // unreachable table
    298   for (const auto& [start, end] : GetLUTRanges())
    299     tables += GetLUTTableCount(start, end);
    300 
    301   return tables * LUT_TABLE_SIZE;
    302 }
    303 } // namespace CPU::CodeCache
    304 
    305 CPU::CodeCache::CodeLUT CPU::CodeCache::DecodeCodeLUTPointer(u32 slot, CodeLUT ptr)
    306 {
    307   if constexpr (sizeof(void*) == 8)
    308     return reinterpret_cast<CodeLUT>(reinterpret_cast<u8*>(ptr) + (static_cast<u64>(slot) << 17));
    309   else
    310     return reinterpret_cast<CodeLUT>(reinterpret_cast<u8*>(ptr) + (slot << 16));
    311 }
    312 
    313 CPU::CodeCache::CodeLUT CPU::CodeCache::EncodeCodeLUTPointer(u32 slot, CodeLUT ptr)
    314 {
    315   if constexpr (sizeof(void*) == 8)
    316     return reinterpret_cast<CodeLUT>(reinterpret_cast<u8*>(ptr) - (static_cast<u64>(slot) << 17));
    317   else
    318     return reinterpret_cast<CodeLUT>(reinterpret_cast<u8*>(ptr) - (slot << 16));
    319 }
    320 
    321 CPU::CodeCache::CodeLUT CPU::CodeCache::OffsetCodeLUTPointer(CodeLUT fake_ptr, u32 pc)
    322 {
    323   u8* fake_byte_ptr = reinterpret_cast<u8*>(fake_ptr);
    324   if constexpr (sizeof(void*) == 8)
    325     return reinterpret_cast<const void**>(fake_byte_ptr + (static_cast<u64>(pc) << 1));
    326   else
    327     return reinterpret_cast<const void**>(fake_byte_ptr + pc);
    328 }
    329 
    330 void CPU::CodeCache::AllocateLUTs()
    331 {
    332   constexpr u32 num_code_slots = GetLUTSlotCount(true);
    333   constexpr u32 num_block_slots = GetLUTSlotCount(false);
    334 
    335   Assert(!s_lut_code_pointers && !s_lut_block_pointers);
    336   s_lut_code_pointers = std::make_unique<const void*[]>(num_code_slots);
    337   s_lut_block_pointers = std::make_unique<Block*[]>(num_block_slots);
    338   std::memset(s_lut_block_pointers.get(), 0, sizeof(Block*) * num_block_slots);
    339 
    340   CodeLUT code_table_ptr = s_lut_code_pointers.get();
    341   Block** block_table_ptr = s_lut_block_pointers.get();
    342   CodeLUT const code_table_ptr_end = code_table_ptr + num_code_slots;
    343   Block** const block_table_ptr_end = block_table_ptr + num_block_slots;
    344 
    345   // Make the unreachable table jump to the invalid code callback.
    346   MemsetPtrs(code_table_ptr, static_cast<const void*>(nullptr), LUT_TABLE_COUNT);
    347 
    348   // Mark everything as unreachable to begin with.
    349   for (u32 i = 0; i < LUT_TABLE_COUNT; i++)
    350   {
    351     g_code_lut[i] = EncodeCodeLUTPointer(i, code_table_ptr);
    352     s_block_lut[i] = nullptr;
    353   }
    354   code_table_ptr += LUT_TABLE_SIZE;
    355 
    356   // Allocate ranges.
    357   for (const auto& [start, end] : GetLUTRanges())
    358   {
    359     const u32 start_slot = start >> LUT_TABLE_SHIFT;
    360     const u32 count = GetLUTTableCount(start, end);
    361     for (u32 i = 0; i < count; i++)
    362     {
    363       const u32 slot = start_slot + i;
    364 
    365       g_code_lut[slot] = EncodeCodeLUTPointer(slot, code_table_ptr);
    366       code_table_ptr += LUT_TABLE_SIZE;
    367 
    368       s_block_lut[slot] = block_table_ptr;
    369       block_table_ptr += LUT_TABLE_SIZE;
    370     }
    371   }
    372 
    373   Assert(code_table_ptr == code_table_ptr_end);
    374   Assert(block_table_ptr == block_table_ptr_end);
    375 }
    376 
    377 void CPU::CodeCache::DeallocateLUTs()
    378 {
    379   s_lut_block_pointers.reset();
    380   s_lut_code_pointers.reset();
    381 }
    382 
    383 void CPU::CodeCache::ResetCodeLUT()
    384 {
    385   if (!s_lut_code_pointers)
    386     return;
    387 
    388   // Make the unreachable table jump to the invalid code callback.
    389   MemsetPtrs(s_lut_code_pointers.get(), g_interpret_block, LUT_TABLE_COUNT);
    390 
    391   for (u32 i = 0; i < LUT_TABLE_COUNT; i++)
    392   {
    393     CodeLUT ptr = DecodeCodeLUTPointer(i, g_code_lut[i]);
    394     if (ptr == s_lut_code_pointers.get())
    395       continue;
    396 
    397     MemsetPtrs(ptr, g_compile_or_revalidate_block, LUT_TABLE_SIZE);
    398   }
    399 }
    400 
    401 void CPU::CodeCache::SetCodeLUT(u32 pc, const void* function)
    402 {
    403   if (!s_lut_code_pointers)
    404     return;
    405 
    406   const u32 table = pc >> LUT_TABLE_SHIFT;
    407   CodeLUT encoded_ptr = g_code_lut[table];
    408 
    409 #ifdef _DEBUG
    410   const CodeLUT table_ptr = DecodeCodeLUTPointer(table, encoded_ptr);
    411   DebugAssert(table_ptr != nullptr && table_ptr != s_lut_code_pointers.get());
    412 #endif
    413 
    414   *OffsetCodeLUTPointer(encoded_ptr, pc) = function;
    415 }
    416 
    417 CPU::CodeCache::Block* CPU::CodeCache::LookupBlock(u32 pc)
    418 {
    419   const u32 table = pc >> LUT_TABLE_SHIFT;
    420   if (!s_block_lut[table])
    421     return nullptr;
    422 
    423   const u32 idx = (pc & 0xFFFF) >> 2;
    424   return s_block_lut[table][idx];
    425 }
    426 
    427 CPU::CodeCache::Block* CPU::CodeCache::CreateBlock(u32 pc, const BlockInstructionList& instructions,
    428                                                    const BlockMetadata& metadata)
    429 {
    430   const u32 size = static_cast<u32>(instructions.size());
    431   const u32 table = pc >> LUT_TABLE_SHIFT;
    432   Assert(s_block_lut[table]);
    433 
    434   // retain from old block
    435   const u32 frame_number = System::GetFrameNumber();
    436   u32 recompile_frame = System::GetFrameNumber();
    437   u8 recompile_count = 0;
    438 
    439   const u32 idx = (pc & 0xFFFF) >> 2;
    440   Block* block = s_block_lut[table][idx];
    441   if (block)
    442   {
    443     // shouldn't be in the page list.. since we should come here after invalidating
    444     Assert(!block->next_block_in_page);
    445 
    446     // keep recompile stats before resetting, that way we actually count recompiles
    447     recompile_frame = block->compile_frame;
    448     recompile_count = block->compile_count;
    449 
    450     // if it has the same number of instructions, we can reuse it
    451     if (block->size != size)
    452     {
    453       // this sucks.. hopefully won't happen very often
    454       // TODO: allocate max size, allow shrink but not grow
    455       auto it = std::find(s_blocks.begin(), s_blocks.end(), block);
    456       Assert(it != s_blocks.end());
    457       s_blocks.erase(it);
    458 
    459       block->~Block();
    460       Common::AlignedFree(block);
    461       block = nullptr;
    462     }
    463   }
    464 
    465   if (!block)
    466   {
    467     block = static_cast<Block*>(Common::AlignedMalloc(
    468       sizeof(Block) + (sizeof(Instruction) * size) + (sizeof(InstructionInfo) * size), alignof(Block)));
    469     Assert(block);
    470     new (block) Block();
    471     s_blocks.push_back(block);
    472   }
    473 
    474   block->pc = pc;
    475   block->size = size;
    476   block->host_code = nullptr;
    477   block->next_block_in_page = nullptr;
    478   block->num_exit_links = 0;
    479   block->state = BlockState::Valid;
    480   block->flags = metadata.flags;
    481   block->protection = GetProtectionModeForBlock(block);
    482   block->uncached_fetch_ticks = metadata.uncached_fetch_ticks;
    483   block->icache_line_count = metadata.icache_line_count;
    484   block->host_code_size = 0;
    485   block->compile_frame = recompile_frame;
    486   block->compile_count = recompile_count + 1;
    487 
    488   // copy instructions/info
    489   {
    490     const std::pair<Instruction, InstructionInfo>* ip = instructions.data();
    491     Instruction* dsti = block->Instructions();
    492     InstructionInfo* dstii = block->InstructionsInfo();
    493 
    494     for (u32 i = 0; i < size; i++, ip++, dsti++, dstii++)
    495     {
    496       dsti->bits = ip->first.bits;
    497       *dstii = ip->second;
    498     }
    499   }
    500 
    501   s_block_lut[table][idx] = block;
    502 
    503   // if the block is being recompiled too often, leave it in the list, but don't compile it.
    504   const u32 frame_delta = frame_number - recompile_frame;
    505   if (frame_delta >= RECOMPILE_FRAMES_FOR_INTERPRETER_FALLBACK)
    506   {
    507     block->compile_frame = frame_number;
    508     block->compile_count = 1;
    509   }
    510   else if (block->compile_count >= RECOMPILE_COUNT_FOR_INTERPRETER_FALLBACK)
    511   {
    512     DEV_LOG("{} recompiles in {} frames to block 0x{:08X}, not caching.", block->compile_count, frame_delta, block->pc);
    513     block->size = 0;
    514   }
    515 
    516   // cached interpreter creates empty blocks when falling back
    517   if (block->size == 0)
    518   {
    519     block->state = BlockState::FallbackToInterpreter;
    520     block->protection = PageProtectionMode::Unprotected;
    521     return block;
    522   }
    523 
    524   // Old rec doesn't use backprop info, don't waste time filling it.
    525   if (g_settings.cpu_execution_mode == CPUExecutionMode::NewRec)
    526     FillBlockRegInfo(block);
    527 
    528   // add it to the tracking list for its page
    529   AddBlockToPageList(block);
    530 
    531   return block;
    532 }
    533 
    534 bool CPU::CodeCache::IsBlockCodeCurrent(const Block* block)
    535 {
    536   // blocks shouldn't be wrapping..
    537   const PhysicalMemoryAddress phys_addr = VirtualAddressToPhysical(block->pc);
    538   DebugAssert((phys_addr + (sizeof(Instruction) * block->size)) <= Bus::g_ram_size);
    539 
    540   // can just do a straight memcmp..
    541   return (std::memcmp(Bus::g_ram + phys_addr, block->Instructions(), sizeof(Instruction) * block->size) == 0);
    542 }
    543 
    544 bool CPU::CodeCache::RevalidateBlock(Block* block)
    545 {
    546   DebugAssert(block->state != BlockState::Valid);
    547   DebugAssert(AddressInRAM(block->pc) || block->state == BlockState::NeedsRecompile);
    548 
    549   if (block->state >= BlockState::NeedsRecompile)
    550     return false;
    551 
    552   // Protection may have changed if we didn't execute before it got invalidated again. e.g. THPS2.
    553   if (block->protection != GetProtectionModeForBlock(block))
    554     return false;
    555 
    556   if (!IsBlockCodeCurrent(block))
    557   {
    558     // changed, needs recompiling
    559     DEBUG_LOG("Block at PC {:08X} has changed and needs recompiling", block->pc);
    560     return false;
    561   }
    562 
    563   block->state = BlockState::Valid;
    564   AddBlockToPageList(block);
    565   return true;
    566 }
    567 
    568 void CPU::CodeCache::AddBlockToPageList(Block* block)
    569 {
    570   DebugAssert(block->size > 0);
    571   if (!AddressInRAM(block->pc) || block->protection != PageProtectionMode::WriteProtected)
    572     return;
    573 
    574   const u32 page_idx = block->StartPageIndex();
    575   PageProtectionInfo& entry = s_page_protection[page_idx];
    576   Bus::SetRAMCodePage(page_idx);
    577 
    578   if (entry.last_block_in_page)
    579   {
    580     entry.last_block_in_page->next_block_in_page = block;
    581     entry.last_block_in_page = block;
    582   }
    583   else
    584   {
    585     entry.first_block_in_page = block;
    586     entry.last_block_in_page = block;
    587   }
    588 }
    589 
    590 void CPU::CodeCache::RemoveBlockFromPageList(Block* block)
    591 {
    592   DebugAssert(block->size > 0);
    593   if (!AddressInRAM(block->pc) || block->protection != PageProtectionMode::WriteProtected)
    594     return;
    595 
    596   const u32 page_idx = block->StartPageIndex();
    597   PageProtectionInfo& entry = s_page_protection[page_idx];
    598 
    599   // unlink from list
    600   Block* prev_block = nullptr;
    601   Block* cur_block = entry.first_block_in_page;
    602   while (cur_block)
    603   {
    604     if (cur_block != block)
    605     {
    606       prev_block = cur_block;
    607       cur_block = cur_block->next_block_in_page;
    608       continue;
    609     }
    610 
    611     if (prev_block)
    612       prev_block->next_block_in_page = cur_block->next_block_in_page;
    613     else
    614       entry.first_block_in_page = cur_block->next_block_in_page;
    615     if (!cur_block->next_block_in_page)
    616       entry.last_block_in_page = prev_block;
    617 
    618     cur_block->next_block_in_page = nullptr;
    619     break;
    620   }
    621 }
    622 
    623 void CPU::CodeCache::InvalidateBlocksWithPageIndex(u32 index)
    624 {
    625   DebugAssert(index < Bus::RAM_8MB_CODE_PAGE_COUNT);
    626   Bus::ClearRAMCodePage(index);
    627 
    628   BlockState new_block_state = BlockState::Invalidated;
    629   PageProtectionInfo& ppi = s_page_protection[index];
    630 
    631   const u32 frame_number = System::GetFrameNumber();
    632   const u32 frame_delta = frame_number - ppi.invalidate_frame;
    633   ppi.invalidate_count++;
    634 
    635   if (frame_delta >= INVALIDATE_FRAMES_FOR_MANUAL_PROTECTION)
    636   {
    637     ppi.invalidate_count = 1;
    638     ppi.invalidate_frame = frame_number;
    639   }
    640   else if (ppi.invalidate_count > INVALIDATE_COUNT_FOR_MANUAL_PROTECTION)
    641   {
    642     DEV_LOG("{} invalidations in {} frames to page {} [0x{:08X} -> 0x{:08X}], switching to manual protection",
    643             ppi.invalidate_count, frame_delta, index, (index * HOST_PAGE_SIZE), ((index + 1) * HOST_PAGE_SIZE));
    644     ppi.mode = PageProtectionMode::ManualCheck;
    645     new_block_state = BlockState::NeedsRecompile;
    646   }
    647 
    648   if (!ppi.first_block_in_page)
    649     return;
    650 
    651   MemMap::BeginCodeWrite();
    652 
    653   Block* block = ppi.first_block_in_page;
    654   while (block)
    655   {
    656     InvalidateBlock(block, new_block_state);
    657     block = std::exchange(block->next_block_in_page, nullptr);
    658   }
    659 
    660   ppi.first_block_in_page = nullptr;
    661   ppi.last_block_in_page = nullptr;
    662 
    663   MemMap::EndCodeWrite();
    664 }
    665 
    666 CPU::CodeCache::PageProtectionMode CPU::CodeCache::GetProtectionModeForPC(u32 pc)
    667 {
    668   if (!AddressInRAM(pc))
    669     return PageProtectionMode::Unprotected;
    670 
    671   const u32 page_idx = Bus::GetRAMCodePageIndex(pc);
    672   return s_page_protection[page_idx].mode;
    673 }
    674 
    675 CPU::CodeCache::PageProtectionMode CPU::CodeCache::GetProtectionModeForBlock(const Block* block)
    676 {
    677   // if the block has a branch delay slot crossing a page, we must use manual protection.
    678   // no other way about it.
    679   if (block->HasFlag(BlockFlags::BranchDelaySpansPages))
    680     return PageProtectionMode::ManualCheck;
    681 
    682   return GetProtectionModeForPC(block->pc);
    683 }
    684 
    685 void CPU::CodeCache::InvalidateBlock(Block* block, BlockState new_state)
    686 {
    687   if (block->state == BlockState::Valid)
    688   {
    689     SetCodeLUT(block->pc, g_compile_or_revalidate_block);
    690     BacklinkBlocks(block->pc, g_compile_or_revalidate_block);
    691   }
    692 
    693   block->state = new_state;
    694 }
    695 
    696 void CPU::CodeCache::InvalidateAllRAMBlocks()
    697 {
    698   // TODO: maybe combine the backlink into one big instruction flush cache?
    699   MemMap::BeginCodeWrite();
    700 
    701   for (Block* block : s_blocks)
    702   {
    703     if (AddressInRAM(block->pc))
    704     {
    705       InvalidateBlock(block, BlockState::Invalidated);
    706       block->next_block_in_page = nullptr;
    707     }
    708   }
    709 
    710   for (PageProtectionInfo& ppi : s_page_protection)
    711   {
    712     ppi.first_block_in_page = nullptr;
    713     ppi.last_block_in_page = nullptr;
    714   }
    715 
    716   MemMap::EndCodeWrite();
    717   Bus::ClearRAMCodePageFlags();
    718 }
    719 
    720 void CPU::CodeCache::ClearBlocks()
    721 {
    722   for (u32 i = 0; i < Bus::RAM_8MB_CODE_PAGE_COUNT; i++)
    723   {
    724     PageProtectionInfo& ppi = s_page_protection[i];
    725     if (ppi.mode == PageProtectionMode::WriteProtected && ppi.first_block_in_page)
    726       Bus::ClearRAMCodePage(i);
    727 
    728     ppi = {};
    729   }
    730 
    731   s_fastmem_backpatch_info.clear();
    732   s_fastmem_faulting_pcs.clear();
    733   s_block_links.clear();
    734 
    735   for (Block* block : s_blocks)
    736   {
    737     block->~Block();
    738     Common::AlignedFree(block);
    739   }
    740   s_blocks.clear();
    741 
    742   std::memset(s_lut_block_pointers.get(), 0, sizeof(Block*) * GetLUTSlotCount(false));
    743 }
    744 
    745 PageFaultHandler::HandlerResult PageFaultHandler::HandlePageFault(void* exception_pc, void* fault_address,
    746                                                                   bool is_write)
    747 {
    748   if (static_cast<const u8*>(fault_address) >= Bus::g_ram &&
    749       static_cast<const u8*>(fault_address) < (Bus::g_ram + Bus::RAM_8MB_SIZE))
    750   {
    751     // Writing to protected RAM.
    752     DebugAssert(is_write);
    753     const u32 guest_address = static_cast<u32>(static_cast<const u8*>(fault_address) - Bus::g_ram);
    754     const u32 page_index = Bus::GetRAMCodePageIndex(guest_address);
    755     DEV_LOG("Page fault on protected RAM @ 0x{:08X} (page #{}), invalidating code cache.", guest_address, page_index);
    756     CPU::CodeCache::InvalidateBlocksWithPageIndex(page_index);
    757     return PageFaultHandler::HandlerResult::ContinueExecution;
    758   }
    759 
    760   return CPU::CodeCache::HandleFastmemException(exception_pc, fault_address, is_write);
    761 }
    762 
    763 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
    764 // MARK: - Cached Interpreter
    765 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
    766 
    767 CPU::CodeCache::Block* CPU::CodeCache::CreateCachedInterpreterBlock(u32 pc)
    768 {
    769   BlockMetadata metadata = {};
    770   ReadBlockInstructions(pc, &s_block_instructions, &metadata);
    771   return CreateBlock(pc, s_block_instructions, metadata);
    772 }
    773 
    774 template<PGXPMode pgxp_mode>
    775 [[noreturn]] void CPU::CodeCache::ExecuteCachedInterpreterImpl()
    776 {
    777 #define CHECK_DOWNCOUNT()                                                                                              \
    778   if (g_state.pending_ticks >= g_state.downcount)                                                                      \
    779     break;
    780 
    781   for (;;)
    782   {
    783     TimingEvents::RunEvents();
    784 
    785     while (g_state.pending_ticks < g_state.downcount)
    786     {
    787 #if 0
    788       LogCurrentState();
    789 #endif
    790 #if 0
    791       if ((g_state.pending_ticks + TimingEvents::GetGlobalTickCounter()) == 3301006214)
    792         __debugbreak();
    793 #endif
    794       // Manually done because we don't want to compile blocks without a LUT.
    795       const u32 pc = g_state.pc;
    796       const u32 table = pc >> LUT_TABLE_SHIFT;
    797       Block* block;
    798       if (s_block_lut[table])
    799       {
    800         const u32 idx = (pc & 0xFFFF) >> 2;
    801         block = s_block_lut[table][idx];
    802       }
    803       else
    804       {
    805         // Likely invalid code...
    806         goto interpret_block;
    807       }
    808 
    809     reexecute_block:
    810       if (!block)
    811       {
    812         if ((block = CreateCachedInterpreterBlock(pc))->size == 0) [[unlikely]]
    813           goto interpret_block;
    814       }
    815       else
    816       {
    817         if (block->state == BlockState::FallbackToInterpreter) [[unlikely]]
    818           goto interpret_block;
    819 
    820         if ((block->state != BlockState::Valid && !RevalidateBlock(block)) ||
    821             (block->protection == PageProtectionMode::ManualCheck && !IsBlockCodeCurrent(block)))
    822         {
    823           if ((block = CreateCachedInterpreterBlock(pc))->size == 0) [[unlikely]]
    824             goto interpret_block;
    825         }
    826       }
    827 
    828       DebugAssert(!(HasPendingInterrupt()));
    829       if (block->HasFlag(BlockFlags::IsUsingICache))
    830       {
    831         CheckAndUpdateICacheTags(block->icache_line_count);
    832       }
    833       else if (block->HasFlag(BlockFlags::NeedsDynamicFetchTicks))
    834       {
    835         AddPendingTicks(
    836           static_cast<TickCount>(block->size * static_cast<u32>(*Bus::GetMemoryAccessTimePtr(
    837                                                  block->pc & PHYSICAL_MEMORY_ADDRESS_MASK, MemoryAccessSize::Word))));
    838       }
    839       else
    840       {
    841         AddPendingTicks(block->uncached_fetch_ticks);
    842       }
    843 
    844       InterpretCachedBlock<pgxp_mode>(block);
    845 
    846       CHECK_DOWNCOUNT();
    847 
    848       // Handle self-looping blocks
    849       if (g_state.pc == block->pc)
    850         goto reexecute_block;
    851       else
    852         continue;
    853 
    854     interpret_block:
    855       InterpretUncachedBlock<pgxp_mode>();
    856       CHECK_DOWNCOUNT();
    857       continue;
    858     }
    859   }
    860 }
    861 
    862 [[noreturn]] void CPU::CodeCache::ExecuteCachedInterpreter()
    863 {
    864   if (g_settings.gpu_pgxp_enable)
    865   {
    866     if (g_settings.gpu_pgxp_cpu)
    867       ExecuteCachedInterpreterImpl<PGXPMode::CPU>();
    868     else
    869       ExecuteCachedInterpreterImpl<PGXPMode::Memory>();
    870   }
    871   else
    872   {
    873     ExecuteCachedInterpreterImpl<PGXPMode::Disabled>();
    874   }
    875 }
    876 
    877 void CPU::CodeCache::LogCurrentState()
    878 {
    879 #if 0
    880   if (System::GetGlobalTickCounter() == 2546728915)
    881     __debugbreak();
    882 #endif
    883 #if 0
    884   if (System::GetGlobalTickCounter() < 2546729174)
    885     return;
    886 #endif
    887 
    888   const auto& regs = g_state.regs;
    889   WriteToExecutionLog(
    890     "tick=%" PRIu64
    891     " dc=%u/%u pc=%08X at=%08X v0=%08X v1=%08X a0=%08X a1=%08X a2=%08X a3=%08X t0=%08X t1=%08X t2=%08X t3=%08X t4=%08X "
    892     "t5=%08X t6=%08X t7=%08X s0=%08X s1=%08X s2=%08X s3=%08X s4=%08X s5=%08X s6=%08X s7=%08X t8=%08X t9=%08X k0=%08X "
    893     "k1=%08X gp=%08X sp=%08X fp=%08X ra=%08X hi=%08X lo=%08X ldr=%s ldv=%08X cause=%08X sr=%08X gte=%08X\n",
    894     System::GetGlobalTickCounter(), g_state.pending_ticks, g_state.downcount, g_state.pc, regs.at, regs.v0, regs.v1,
    895     regs.a0, regs.a1, regs.a2, regs.a3, regs.t0, regs.t1, regs.t2, regs.t3, regs.t4, regs.t5, regs.t6, regs.t7, regs.s0,
    896     regs.s1, regs.s2, regs.s3, regs.s4, regs.s5, regs.s6, regs.s7, regs.t8, regs.t9, regs.k0, regs.k1, regs.gp, regs.sp,
    897     regs.fp, regs.ra, regs.hi, regs.lo,
    898     (g_state.next_load_delay_reg == Reg::count) ? "NONE" : GetRegName(g_state.next_load_delay_reg),
    899     (g_state.next_load_delay_reg == Reg::count) ? 0 : g_state.next_load_delay_value, g_state.cop0_regs.cause.bits,
    900     g_state.cop0_regs.sr.bits, static_cast<u32>(crc32(0, (const Bytef*)&g_state.gte_regs, sizeof(g_state.gte_regs))));
    901 }
    902 
    903 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
    904 // MARK: - Block Compilation: Shared Code
    905 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
    906 
    907 bool CPU::CodeCache::ReadBlockInstructions(u32 start_pc, BlockInstructionList* instructions, BlockMetadata* metadata)
    908 {
    909   // TODO: Jump to other block if it exists at this pc?
    910 
    911   const PageProtectionMode protection = GetProtectionModeForPC(start_pc);
    912   const bool use_icache = CPU::IsCachedAddress(start_pc);
    913   const bool dynamic_fetch_ticks = (!use_icache && Bus::GetMemoryAccessTimePtr(start_pc & PHYSICAL_MEMORY_ADDRESS_MASK,
    914                                                                                MemoryAccessSize::Word) != nullptr);
    915   u32 pc = start_pc;
    916   bool is_branch_delay_slot = false;
    917   bool is_load_delay_slot = false;
    918 
    919 #if 0
    920   if (pc == 0x0005aa90)
    921     __debugbreak();
    922 #endif
    923 
    924   instructions->clear();
    925   metadata->icache_line_count = 0;
    926   metadata->uncached_fetch_ticks = 0;
    927   metadata->flags = use_icache ? BlockFlags::IsUsingICache :
    928                                  (dynamic_fetch_ticks ? BlockFlags::NeedsDynamicFetchTicks : BlockFlags::None);
    929 
    930   u32 last_cache_line = ICACHE_LINES;
    931   u32 last_page = (protection == PageProtectionMode::WriteProtected) ? Bus::GetRAMCodePageIndex(start_pc) : 0;
    932 
    933   for (;;)
    934   {
    935     if (protection == PageProtectionMode::WriteProtected)
    936     {
    937       const u32 this_page = Bus::GetRAMCodePageIndex(pc);
    938       if (this_page != last_page)
    939       {
    940         // if we're just crossing the page and not in a branch delay slot, jump directly to the next block
    941         if (!is_branch_delay_slot)
    942         {
    943           DEV_LOG("Breaking block 0x{:08X} at 0x{:08X} due to page crossing", start_pc, pc);
    944           metadata->flags |= BlockFlags::SpansPages;
    945           break;
    946         }
    947         else
    948         {
    949           // otherwise, we need to use manual protection in case the delay slot changes.
    950           // may as well keep going then, since we're doing manual check anyways.
    951           DEV_LOG("Block 0x{:08X} has branch delay slot crossing page at 0x{:08X}, forcing manual protection", start_pc,
    952                   pc);
    953           metadata->flags |= BlockFlags::BranchDelaySpansPages;
    954         }
    955       }
    956     }
    957 
    958     Instruction instruction;
    959     if (!SafeReadInstruction(pc, &instruction.bits) || !IsValidInstruction(instruction))
    960     {
    961       // Away to the int you go!
    962       ERROR_LOG("Instruction read failed at PC=0x{:08X}, truncating block.", pc);
    963       break;
    964     }
    965 
    966     InstructionInfo info;
    967     std::memset(&info, 0, sizeof(info));
    968 
    969     info.pc = pc;
    970     info.is_branch_delay_slot = is_branch_delay_slot;
    971     info.is_load_delay_slot = is_load_delay_slot;
    972     info.is_branch_instruction = IsBranchInstruction(instruction);
    973     info.is_direct_branch_instruction = IsDirectBranchInstruction(instruction);
    974     info.is_unconditional_branch_instruction = IsUnconditionalBranchInstruction(instruction);
    975     info.is_load_instruction = IsMemoryLoadInstruction(instruction);
    976     info.is_store_instruction = IsMemoryStoreInstruction(instruction);
    977     info.has_load_delay = InstructionHasLoadDelay(instruction);
    978 
    979     if (use_icache)
    980     {
    981       if (g_settings.cpu_recompiler_icache)
    982       {
    983         const u32 icache_line = GetICacheLine(pc);
    984         if (icache_line != last_cache_line)
    985         {
    986           metadata->icache_line_count++;
    987           last_cache_line = icache_line;
    988         }
    989       }
    990     }
    991     else if (!dynamic_fetch_ticks)
    992     {
    993       metadata->uncached_fetch_ticks += GetInstructionReadTicks(pc);
    994     }
    995 
    996     if (info.is_load_instruction || info.is_store_instruction)
    997       metadata->flags |= BlockFlags::ContainsLoadStoreInstructions;
    998 
    999     pc += sizeof(Instruction);
   1000 
   1001     if (is_branch_delay_slot && info.is_branch_instruction)
   1002     {
   1003       const BlockInstructionInfoPair& prev = instructions->back();
   1004       if (!prev.second.is_unconditional_branch_instruction || !prev.second.is_direct_branch_instruction)
   1005       {
   1006         WARNING_LOG("Conditional or indirect branch delay slot at {:08X}, skipping block", info.pc);
   1007         return false;
   1008       }
   1009       if (!IsDirectBranchInstruction(instruction))
   1010       {
   1011         WARNING_LOG("Indirect branch in delay slot at {:08X}, skipping block", info.pc);
   1012         return false;
   1013       }
   1014 
   1015       // we _could_ fetch the delay slot from the first branch's target, but it's probably in a different
   1016       // page, and that's an invalidation nightmare. so just fallback to the int, this is very rare anyway.
   1017       WARNING_LOG("Direct branch in delay slot at {:08X}, skipping block", info.pc);
   1018       return false;
   1019     }
   1020 
   1021     // instruction is decoded now
   1022     instructions->emplace_back(instruction, info);
   1023 
   1024     // if we're in a branch delay slot, the block is now done
   1025     // except if this is a branch in a branch delay slot, then we grab the one after that, and so on...
   1026     if (is_branch_delay_slot && !info.is_branch_instruction)
   1027       break;
   1028 
   1029     // if this is a branch, we grab the next instruction (delay slot), and then exit
   1030     is_branch_delay_slot = info.is_branch_instruction;
   1031 
   1032     // same for load delay
   1033     is_load_delay_slot = info.has_load_delay;
   1034 
   1035     // is this a non-branchy exit? (e.g. syscall)
   1036     if (IsExitBlockInstruction(instruction))
   1037       break;
   1038   }
   1039 
   1040   if (instructions->empty())
   1041   {
   1042     WARNING_LOG("Empty block compiled at 0x{:08X}", start_pc);
   1043     return false;
   1044   }
   1045 
   1046   instructions->back().second.is_last_instruction = true;
   1047 
   1048 #ifdef _DEBUG
   1049   SmallString disasm;
   1050   DEBUG_LOG("Block at 0x{:08X}", start_pc);
   1051   DEBUG_LOG(" Uncached fetch ticks: {}", metadata->uncached_fetch_ticks);
   1052   DEBUG_LOG(" ICache line count: {}", metadata->icache_line_count);
   1053   for (const auto& cbi : *instructions)
   1054   {
   1055     CPU::DisassembleInstruction(&disasm, cbi.second.pc, cbi.first.bits);
   1056     DEBUG_LOG("[{} {} 0x{:08X}] {:08X} {}", cbi.second.is_branch_delay_slot ? "BD" : "  ",
   1057               cbi.second.is_load_delay_slot ? "LD" : "  ", cbi.second.pc, cbi.first.bits, disasm);
   1058   }
   1059 #endif
   1060 
   1061   return true;
   1062 }
   1063 
   1064 void CPU::CodeCache::CopyRegInfo(InstructionInfo* dst, const InstructionInfo* src)
   1065 {
   1066   std::memcpy(dst->reg_flags, src->reg_flags, sizeof(dst->reg_flags));
   1067   std::memcpy(dst->read_reg, src->read_reg, sizeof(dst->read_reg));
   1068 }
   1069 
   1070 void CPU::CodeCache::SetRegAccess(InstructionInfo* inst, Reg reg, bool write)
   1071 {
   1072   if (reg == Reg::zero)
   1073     return;
   1074 
   1075   if (!write)
   1076   {
   1077     for (u32 i = 0; i < std::size(inst->read_reg); i++)
   1078     {
   1079       if (inst->read_reg[i] == Reg::zero)
   1080       {
   1081         inst->read_reg[i] = reg;
   1082         break;
   1083       }
   1084     }
   1085   }
   1086   else
   1087   {
   1088 #if 0
   1089     for (u32 i = 0; i < std::size(inst->write_reg); i++)
   1090     {
   1091       if (inst->write_reg[i] == Reg::zero)
   1092       {
   1093         inst->write_reg[i] = reg;
   1094         break;
   1095       }
   1096     }
   1097 #endif
   1098   }
   1099 }
   1100 
   1101 #define BackpropSetReads(reg)                                                                                          \
   1102   do                                                                                                                   \
   1103   {                                                                                                                    \
   1104     if (!(inst->reg_flags[static_cast<u8>(reg)] & RI_USED))                                                            \
   1105       inst->reg_flags[static_cast<u8>(reg)] |= RI_LASTUSE;                                                             \
   1106     prev->reg_flags[static_cast<u8>(reg)] |= RI_LIVE | RI_USED;                                                        \
   1107     inst->reg_flags[static_cast<u8>(reg)] |= RI_USED;                                                                  \
   1108     SetRegAccess(inst, reg, false);                                                                                    \
   1109   } while (0)
   1110 
   1111 #define BackpropSetWrites(reg)                                                                                         \
   1112   do                                                                                                                   \
   1113   {                                                                                                                    \
   1114     prev->reg_flags[static_cast<u8>(reg)] &= ~(RI_LIVE | RI_USED);                                                     \
   1115     if (!(inst->reg_flags[static_cast<u8>(reg)] & RI_USED))                                                            \
   1116       inst->reg_flags[static_cast<u8>(reg)] |= RI_LASTUSE;                                                             \
   1117     inst->reg_flags[static_cast<u8>(reg)] |= RI_USED;                                                                  \
   1118     SetRegAccess(inst, reg, true);                                                                                     \
   1119   } while (0)
   1120 
   1121 // TODO: memory loads should be delayed one instruction because of stupid load delays.
   1122 #define BackpropSetWritesDelayed(reg) BackpropSetWrites(reg)
   1123 
   1124 void CPU::CodeCache::FillBlockRegInfo(Block* block)
   1125 {
   1126   const Instruction* iinst = block->Instructions() + (block->size - 1);
   1127   InstructionInfo* const start = block->InstructionsInfo();
   1128   InstructionInfo* inst = start + (block->size - 1);
   1129   std::memset(inst->reg_flags, RI_LIVE, sizeof(inst->reg_flags));
   1130   std::memset(inst->read_reg, 0, sizeof(inst->read_reg));
   1131   // std::memset(inst->write_reg, 0, sizeof(inst->write_reg));
   1132 
   1133   while (inst != start)
   1134   {
   1135     InstructionInfo* prev = inst - 1;
   1136     CopyRegInfo(prev, inst);
   1137 
   1138     const Reg rs = iinst->r.rs;
   1139     const Reg rt = iinst->r.rt;
   1140 
   1141     switch (iinst->op)
   1142     {
   1143       case InstructionOp::funct:
   1144       {
   1145         const Reg rd = iinst->r.rd;
   1146 
   1147         switch (iinst->r.funct)
   1148         {
   1149           case InstructionFunct::sll:
   1150           case InstructionFunct::srl:
   1151           case InstructionFunct::sra:
   1152             BackpropSetWrites(rd);
   1153             BackpropSetReads(rt);
   1154             break;
   1155 
   1156           case InstructionFunct::sllv:
   1157           case InstructionFunct::srlv:
   1158           case InstructionFunct::srav:
   1159           case InstructionFunct::add:
   1160           case InstructionFunct::addu:
   1161           case InstructionFunct::sub:
   1162           case InstructionFunct::subu:
   1163           case InstructionFunct::and_:
   1164           case InstructionFunct::or_:
   1165           case InstructionFunct::xor_:
   1166           case InstructionFunct::nor:
   1167           case InstructionFunct::slt:
   1168           case InstructionFunct::sltu:
   1169             BackpropSetWrites(rd);
   1170             BackpropSetReads(rt);
   1171             BackpropSetReads(rs);
   1172             break;
   1173 
   1174           case InstructionFunct::jr:
   1175             BackpropSetReads(rs);
   1176             break;
   1177 
   1178           case InstructionFunct::jalr:
   1179             BackpropSetReads(rs);
   1180             BackpropSetWrites(rd);
   1181             break;
   1182 
   1183           case InstructionFunct::mfhi:
   1184             BackpropSetWrites(rd);
   1185             BackpropSetReads(Reg::hi);
   1186             break;
   1187 
   1188           case InstructionFunct::mflo:
   1189             BackpropSetWrites(rd);
   1190             BackpropSetReads(Reg::lo);
   1191             break;
   1192 
   1193           case InstructionFunct::mthi:
   1194             BackpropSetWrites(Reg::hi);
   1195             BackpropSetReads(rs);
   1196             break;
   1197 
   1198           case InstructionFunct::mtlo:
   1199             BackpropSetWrites(Reg::lo);
   1200             BackpropSetReads(rs);
   1201             break;
   1202 
   1203           case InstructionFunct::mult:
   1204           case InstructionFunct::multu:
   1205           case InstructionFunct::div:
   1206           case InstructionFunct::divu:
   1207             BackpropSetWrites(Reg::hi);
   1208             BackpropSetWrites(Reg::lo);
   1209             BackpropSetReads(rs);
   1210             BackpropSetReads(rt);
   1211             break;
   1212 
   1213           case InstructionFunct::syscall:
   1214           case InstructionFunct::break_:
   1215             break;
   1216 
   1217           default:
   1218             ERROR_LOG("Unknown funct {}", static_cast<u32>(iinst->r.funct.GetValue()));
   1219             break;
   1220         }
   1221       }
   1222       break;
   1223 
   1224       case InstructionOp::b:
   1225       {
   1226         if ((static_cast<u8>(iinst->i.rt.GetValue()) & u8(0x1E)) == u8(0x10))
   1227           BackpropSetWrites(Reg::ra);
   1228         BackpropSetReads(rs);
   1229       }
   1230       break;
   1231 
   1232       case InstructionOp::j:
   1233         break;
   1234 
   1235       case InstructionOp::jal:
   1236         BackpropSetWrites(Reg::ra);
   1237         break;
   1238 
   1239       case InstructionOp::beq:
   1240       case InstructionOp::bne:
   1241         BackpropSetReads(rs);
   1242         BackpropSetReads(rt);
   1243         break;
   1244 
   1245       case InstructionOp::blez:
   1246       case InstructionOp::bgtz:
   1247         BackpropSetReads(rs);
   1248         break;
   1249 
   1250       case InstructionOp::addi:
   1251       case InstructionOp::addiu:
   1252       case InstructionOp::slti:
   1253       case InstructionOp::sltiu:
   1254       case InstructionOp::andi:
   1255       case InstructionOp::ori:
   1256       case InstructionOp::xori:
   1257         BackpropSetWrites(rt);
   1258         BackpropSetReads(rs);
   1259         break;
   1260 
   1261       case InstructionOp::lui:
   1262         BackpropSetWrites(rt);
   1263         break;
   1264 
   1265       case InstructionOp::lb:
   1266       case InstructionOp::lh:
   1267       case InstructionOp::lw:
   1268       case InstructionOp::lbu:
   1269       case InstructionOp::lhu:
   1270         BackpropSetWritesDelayed(rt);
   1271         BackpropSetReads(rs);
   1272         break;
   1273 
   1274       case InstructionOp::lwl:
   1275       case InstructionOp::lwr:
   1276         BackpropSetWritesDelayed(rt);
   1277         BackpropSetReads(rs);
   1278         BackpropSetReads(rt);
   1279         break;
   1280 
   1281       case InstructionOp::sb:
   1282       case InstructionOp::sh:
   1283       case InstructionOp::swl:
   1284       case InstructionOp::sw:
   1285       case InstructionOp::swr:
   1286         BackpropSetReads(rt);
   1287         BackpropSetReads(rs);
   1288         break;
   1289 
   1290       case InstructionOp::cop0:
   1291       case InstructionOp::cop2:
   1292       {
   1293         if (iinst->cop.IsCommonInstruction())
   1294         {
   1295           switch (iinst->cop.CommonOp())
   1296           {
   1297             case CopCommonInstruction::mfcn:
   1298             case CopCommonInstruction::cfcn:
   1299               BackpropSetWritesDelayed(rt);
   1300               break;
   1301 
   1302             case CopCommonInstruction::mtcn:
   1303             case CopCommonInstruction::ctcn:
   1304               BackpropSetReads(rt);
   1305               break;
   1306           }
   1307         }
   1308         break;
   1309 
   1310         case InstructionOp::lwc2:
   1311         case InstructionOp::swc2:
   1312           BackpropSetReads(rs);
   1313           BackpropSetReads(rt);
   1314           break;
   1315 
   1316         default:
   1317           ERROR_LOG("Unknown op {}", static_cast<u32>(iinst->op.GetValue()));
   1318           break;
   1319       }
   1320     } // end switch
   1321 
   1322     inst--;
   1323     iinst--;
   1324   } // end while
   1325 }
   1326 
   1327 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
   1328 // MARK: - Recompiler Glue
   1329 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
   1330 
   1331 void CPU::CodeCache::CompileOrRevalidateBlock(u32 start_pc)
   1332 {
   1333   // TODO: this doesn't currently handle when the cache overflows...
   1334   DebugAssert(IsUsingAnyRecompiler());
   1335   MemMap::BeginCodeWrite();
   1336 
   1337   Block* block = LookupBlock(start_pc);
   1338   if (block)
   1339   {
   1340     // we should only be here if the block got invalidated
   1341     DebugAssert(block->state != BlockState::Valid);
   1342     if (RevalidateBlock(block))
   1343     {
   1344       DebugAssert(block->host_code);
   1345       SetCodeLUT(start_pc, block->host_code);
   1346       BacklinkBlocks(start_pc, block->host_code);
   1347       MemMap::EndCodeWrite();
   1348       return;
   1349     }
   1350 
   1351     // remove outward links from this block, since we're recompiling it
   1352     UnlinkBlockExits(block);
   1353 
   1354     // clean up backpatch info so it doesn't keep growing indefinitely
   1355     if (block->HasFlag(BlockFlags::ContainsLoadStoreInstructions))
   1356       RemoveBackpatchInfoForRange(block->host_code, block->host_code_size);
   1357   }
   1358 
   1359   BlockMetadata metadata = {};
   1360   if (!ReadBlockInstructions(start_pc, &s_block_instructions, &metadata))
   1361   {
   1362     ERROR_LOG("Failed to read block at 0x{:08X}, falling back to uncached interpreter", start_pc);
   1363     SetCodeLUT(start_pc, g_interpret_block);
   1364     BacklinkBlocks(start_pc, g_interpret_block);
   1365     MemMap::EndCodeWrite();
   1366     return;
   1367   }
   1368 
   1369   // Ensure we're not going to run out of space while compiling this block.
   1370   // We could definitely do better here... TODO: far code is no longer needed for newrec
   1371   const u32 block_size = static_cast<u32>(s_block_instructions.size());
   1372   if (GetFreeCodeSpace() < (block_size * Recompiler::MAX_NEAR_HOST_BYTES_PER_INSTRUCTION) ||
   1373       GetFreeFarCodeSpace() < (block_size * Recompiler::MAX_FAR_HOST_BYTES_PER_INSTRUCTION))
   1374   {
   1375     ERROR_LOG("Out of code space while compiling {:08X}. Resetting code cache.", start_pc);
   1376     CodeCache::Reset();
   1377   }
   1378 
   1379   if ((block = CreateBlock(start_pc, s_block_instructions, metadata)) == nullptr || block->size == 0 ||
   1380       !CompileBlock(block))
   1381   {
   1382     ERROR_LOG("Failed to compile block at 0x{:08X}, falling back to uncached interpreter", start_pc);
   1383     SetCodeLUT(start_pc, g_interpret_block);
   1384     BacklinkBlocks(start_pc, g_interpret_block);
   1385     MemMap::EndCodeWrite();
   1386     return;
   1387   }
   1388 
   1389   SetCodeLUT(start_pc, block->host_code);
   1390   BacklinkBlocks(start_pc, block->host_code);
   1391   MemMap::EndCodeWrite();
   1392 }
   1393 
   1394 void CPU::CodeCache::DiscardAndRecompileBlock(u32 start_pc)
   1395 {
   1396   MemMap::BeginCodeWrite();
   1397 
   1398   DEV_LOG("Discard block {:08X} with manual protection", start_pc);
   1399   Block* block = LookupBlock(start_pc);
   1400   DebugAssert(block && block->state == BlockState::Valid);
   1401   InvalidateBlock(block, BlockState::NeedsRecompile);
   1402   CompileOrRevalidateBlock(start_pc);
   1403 
   1404   MemMap::EndCodeWrite();
   1405 }
   1406 
   1407 const void* CPU::CodeCache::CreateBlockLink(Block* block, void* code, u32 newpc)
   1408 {
   1409   // self-linking should be handled by the caller
   1410   DebugAssert(newpc != block->pc);
   1411 
   1412   const void* dst = g_dispatcher;
   1413   if (g_settings.cpu_recompiler_block_linking)
   1414   {
   1415     const Block* next_block = LookupBlock(newpc);
   1416     if (next_block)
   1417     {
   1418       dst = (next_block->state == BlockState::Valid) ?
   1419               next_block->host_code :
   1420               ((next_block->state == BlockState::FallbackToInterpreter) ? g_interpret_block :
   1421                                                                           g_compile_or_revalidate_block);
   1422       DebugAssert(dst);
   1423     }
   1424     else
   1425     {
   1426       dst = g_compile_or_revalidate_block;
   1427     }
   1428 
   1429     BlockLinkMap::iterator iter = s_block_links.emplace(newpc, code);
   1430     DebugAssert(block->num_exit_links < MAX_BLOCK_EXIT_LINKS);
   1431     block->exit_links[block->num_exit_links++] = iter;
   1432   }
   1433 
   1434   DEBUG_LOG("Linking {} with dst pc {:08X} to {}{}", code, newpc, dst,
   1435             (dst == g_compile_or_revalidate_block) ? "[compiler]" : "");
   1436   return dst;
   1437 }
   1438 
   1439 void CPU::CodeCache::BacklinkBlocks(u32 pc, const void* dst)
   1440 {
   1441   if (!g_settings.cpu_recompiler_block_linking)
   1442     return;
   1443 
   1444   const auto link_range = s_block_links.equal_range(pc);
   1445   for (auto it = link_range.first; it != link_range.second; ++it)
   1446   {
   1447     DEBUG_LOG("Backlinking {} with dst pc {:08X} to {}{}", it->second, pc, dst,
   1448               (dst == g_compile_or_revalidate_block) ? "[compiler]" : "");
   1449     EmitJump(it->second, dst, true);
   1450   }
   1451 }
   1452 
   1453 void CPU::CodeCache::UnlinkBlockExits(Block* block)
   1454 {
   1455   const u32 num_exit_links = block->num_exit_links;
   1456   for (u32 i = 0; i < num_exit_links; i++)
   1457     s_block_links.erase(block->exit_links[i]);
   1458   block->num_exit_links = 0;
   1459 }
   1460 
   1461 void CPU::CodeCache::ResetCodeBuffer()
   1462 {
   1463   s_code_ptr = static_cast<u8*>(s_code_buffer_ptr);
   1464   s_free_code_ptr = s_code_ptr;
   1465   s_code_size = RECOMPILER_CODE_CACHE_SIZE - RECOMPILER_FAR_CODE_CACHE_SIZE;
   1466   s_code_used = 0;
   1467 
   1468   // Use half the far code size when using newrec and memory exceptions aren't enabled. It's only used for backpatching.
   1469   const u32 far_code_size =
   1470     (g_settings.cpu_execution_mode == CPUExecutionMode::NewRec && !g_settings.cpu_recompiler_memory_exceptions) ?
   1471       (RECOMPILER_FAR_CODE_CACHE_SIZE / 2) :
   1472       RECOMPILER_FAR_CODE_CACHE_SIZE;
   1473   s_far_code_size = far_code_size;
   1474   s_far_code_ptr = (far_code_size > 0) ? (static_cast<u8*>(s_code_ptr) + s_code_size) : nullptr;
   1475   s_free_far_code_ptr = s_far_code_ptr;
   1476   s_far_code_used = 0;
   1477 
   1478   MemMap::BeginCodeWrite();
   1479 
   1480   std::memset(s_code_ptr, 0, RECOMPILER_CODE_CACHE_SIZE);
   1481   MemMap::FlushInstructionCache(s_code_ptr, RECOMPILER_CODE_CACHE_SIZE);
   1482 
   1483   MemMap::EndCodeWrite();
   1484 }
   1485 
   1486 u8* CPU::CodeCache::GetFreeCodePointer()
   1487 {
   1488   return s_free_code_ptr;
   1489 }
   1490 
   1491 u32 CPU::CodeCache::GetFreeCodeSpace()
   1492 {
   1493   return s_code_size - s_code_used;
   1494 }
   1495 
   1496 void CPU::CodeCache::CommitCode(u32 length)
   1497 {
   1498   if (length == 0) [[unlikely]]
   1499     return;
   1500 
   1501   MemMap::FlushInstructionCache(s_free_code_ptr, length);
   1502 
   1503   Assert(length <= (s_code_size - s_code_used));
   1504   s_free_code_ptr += length;
   1505   s_code_used += length;
   1506 }
   1507 
   1508 u8* CPU::CodeCache::GetFreeFarCodePointer()
   1509 {
   1510   return s_free_far_code_ptr;
   1511 }
   1512 
   1513 u32 CPU::CodeCache::GetFreeFarCodeSpace()
   1514 {
   1515   return s_far_code_size - s_far_code_used;
   1516 }
   1517 
   1518 void CPU::CodeCache::CommitFarCode(u32 length)
   1519 {
   1520   if (length == 0) [[unlikely]]
   1521     return;
   1522 
   1523   MemMap::FlushInstructionCache(s_free_far_code_ptr, length);
   1524 
   1525   Assert(length <= (s_far_code_size - s_far_code_used));
   1526   s_free_far_code_ptr += length;
   1527   s_far_code_used += length;
   1528 }
   1529 
   1530 void CPU::CodeCache::AlignCode(u32 alignment)
   1531 {
   1532 #if defined(CPU_ARCH_X64)
   1533   constexpr u8 padding_value = 0xcc; // int3
   1534 #else
   1535   constexpr u8 padding_value = 0x00;
   1536 #endif
   1537 
   1538   DebugAssert(Common::IsPow2(alignment));
   1539   const u32 num_padding_bytes =
   1540     std::min(static_cast<u32>(Common::AlignUpPow2(reinterpret_cast<uintptr_t>(s_free_code_ptr), alignment) -
   1541                               reinterpret_cast<uintptr_t>(s_free_code_ptr)),
   1542              GetFreeCodeSpace());
   1543   std::memset(s_free_code_ptr, padding_value, num_padding_bytes);
   1544   s_free_code_ptr += num_padding_bytes;
   1545   s_code_used += num_padding_bytes;
   1546 }
   1547 
   1548 const void* CPU::CodeCache::GetInterpretUncachedBlockFunction()
   1549 {
   1550   if (g_settings.gpu_pgxp_enable)
   1551   {
   1552     if (g_settings.gpu_pgxp_cpu)
   1553       return reinterpret_cast<const void*>(InterpretUncachedBlock<PGXPMode::CPU>);
   1554     else
   1555       return reinterpret_cast<const void*>(InterpretUncachedBlock<PGXPMode::Memory>);
   1556   }
   1557   else
   1558   {
   1559     return reinterpret_cast<const void*>(InterpretUncachedBlock<PGXPMode::Disabled>);
   1560   }
   1561 }
   1562 
   1563 void CPU::CodeCache::ClearASMFunctions()
   1564 {
   1565   g_enter_recompiler = nullptr;
   1566   g_compile_or_revalidate_block = nullptr;
   1567   g_check_events_and_dispatch = nullptr;
   1568   g_run_events_and_dispatch = nullptr;
   1569   g_dispatcher = nullptr;
   1570   g_interpret_block = nullptr;
   1571   g_discard_and_recompile_block = nullptr;
   1572 
   1573 #ifdef _DEBUG
   1574   s_total_instructions_compiled = 0;
   1575   s_total_host_instructions_emitted = 0;
   1576 #endif
   1577 }
   1578 
   1579 void CPU::CodeCache::CompileASMFunctions()
   1580 {
   1581   MemMap::BeginCodeWrite();
   1582 
   1583   const u32 asm_size = EmitASMFunctions(GetFreeCodePointer(), GetFreeCodeSpace());
   1584 
   1585 #ifdef ENABLE_RECOMPILER_PROFILING
   1586   MIPSPerfScope.Register(GetFreeCodePointer(), asm_size, "ASMFunctions");
   1587 #endif
   1588 
   1589   CommitCode(asm_size);
   1590   MemMap::EndCodeWrite();
   1591 }
   1592 
   1593 bool CPU::CodeCache::CompileBlock(Block* block)
   1594 {
   1595   const void* host_code = nullptr;
   1596   u32 host_code_size = 0;
   1597   u32 host_far_code_size = 0;
   1598 
   1599 #ifdef ENABLE_RECOMPILER
   1600   if (g_settings.cpu_execution_mode == CPUExecutionMode::Recompiler)
   1601   {
   1602     Recompiler::CodeGenerator codegen;
   1603     host_code = codegen.CompileBlock(block, &host_code_size, &host_far_code_size);
   1604   }
   1605 #endif
   1606 #ifdef ENABLE_NEWREC
   1607   if (g_settings.cpu_execution_mode == CPUExecutionMode::NewRec)
   1608     host_code = NewRec::g_compiler->CompileBlock(block, &host_code_size, &host_far_code_size);
   1609 #endif
   1610 
   1611   block->host_code = host_code;
   1612   block->host_code_size = host_code_size;
   1613 
   1614   if (!host_code)
   1615   {
   1616     ERROR_LOG("Failed to compile host code for block at 0x{:08X}", block->pc);
   1617     block->state = BlockState::FallbackToInterpreter;
   1618     return false;
   1619   }
   1620 
   1621 #ifdef DUMP_CODE_SIZE_STATS
   1622   const u32 host_instructions = GetHostInstructionCount(host_code, host_code_size);
   1623   s_total_instructions_compiled += block->size;
   1624   s_total_host_instructions_emitted += host_instructions;
   1625 
   1626   DEV_LOG("0x{:08X}: {}/{}b for {}b ({}i), blowup: {:.2f}x, cache: {:.2f}%/{:.2f}%, ipi: {:.2f}/{:.2f}", block->pc,
   1627           host_code_size, host_far_code_size, block->size * 4, block->size,
   1628           static_cast<float>(host_code_size) / static_cast<float>(block->size * 4),
   1629           (static_cast<float>(s_code_used) / static_cast<float>(s_code_size)) * 100.0f,
   1630           (static_cast<float>(s_far_code_used) / static_cast<float>(s_far_code_size)) * 100.0f,
   1631           static_cast<float>(host_instructions) / static_cast<float>(block->size),
   1632           static_cast<float>(s_total_host_instructions_emitted) / static_cast<float>(s_total_instructions_compiled));
   1633 #endif
   1634 
   1635 #if 0
   1636   Log_DebugPrint("***HOST CODE**");
   1637   DisassembleAndLogHostCode(host_code, host_code_size);
   1638 #endif
   1639 
   1640 #ifdef ENABLE_RECOMPILER_PROFILING
   1641   MIPSPerfScope.RegisterPC(host_code, host_code_size, block->pc);
   1642 #endif
   1643 
   1644   return true;
   1645 }
   1646 
   1647 void CPU::CodeCache::AddLoadStoreInfo(void* code_address, u32 code_size, u32 guest_pc, const void* thunk_address)
   1648 {
   1649   DebugAssert(code_size < std::numeric_limits<u8>::max());
   1650 
   1651   auto iter = s_fastmem_backpatch_info.find(code_address);
   1652   if (iter != s_fastmem_backpatch_info.end())
   1653     s_fastmem_backpatch_info.erase(iter);
   1654 
   1655   LoadstoreBackpatchInfo info;
   1656   info.thunk_address = thunk_address;
   1657   info.guest_pc = guest_pc;
   1658   info.guest_block = 0;
   1659   info.code_size = static_cast<u8>(code_size);
   1660   s_fastmem_backpatch_info.emplace(code_address, info);
   1661 }
   1662 
   1663 void CPU::CodeCache::AddLoadStoreInfo(void* code_address, u32 code_size, u32 guest_pc, u32 guest_block,
   1664                                       TickCount cycles, u32 gpr_bitmask, u8 address_register, u8 data_register,
   1665                                       MemoryAccessSize size, bool is_signed, bool is_load)
   1666 {
   1667   DebugAssert(code_size < std::numeric_limits<u8>::max());
   1668   DebugAssert(cycles >= 0 && cycles < std::numeric_limits<u16>::max());
   1669 
   1670   auto iter = s_fastmem_backpatch_info.find(code_address);
   1671   if (iter != s_fastmem_backpatch_info.end())
   1672     s_fastmem_backpatch_info.erase(iter);
   1673 
   1674   LoadstoreBackpatchInfo info;
   1675   info.thunk_address = nullptr;
   1676   info.guest_pc = guest_pc;
   1677   info.guest_block = guest_block;
   1678   info.gpr_bitmask = gpr_bitmask;
   1679   info.cycles = static_cast<u16>(cycles);
   1680   info.address_register = address_register;
   1681   info.data_register = data_register;
   1682   info.size = static_cast<u16>(size);
   1683   info.is_signed = is_signed;
   1684   info.is_load = is_load;
   1685   info.code_size = static_cast<u8>(code_size);
   1686   s_fastmem_backpatch_info.emplace(code_address, info);
   1687 }
   1688 
   1689 PageFaultHandler::HandlerResult CPU::CodeCache::HandleFastmemException(void* exception_pc, void* fault_address,
   1690                                                                        bool is_write)
   1691 {
   1692   PhysicalMemoryAddress guest_address;
   1693 
   1694 #ifdef ENABLE_MMAP_FASTMEM
   1695   if (g_settings.cpu_fastmem_mode == CPUFastmemMode::MMap)
   1696   {
   1697     if (static_cast<u8*>(fault_address) < static_cast<u8*>(g_state.fastmem_base) ||
   1698         (static_cast<u8*>(fault_address) - static_cast<u8*>(g_state.fastmem_base)) >=
   1699           static_cast<ptrdiff_t>(Bus::FASTMEM_ARENA_SIZE))
   1700     {
   1701       return PageFaultHandler::HandlerResult::ExecuteNextHandler;
   1702     }
   1703 
   1704     guest_address = static_cast<PhysicalMemoryAddress>(
   1705       static_cast<ptrdiff_t>(static_cast<u8*>(fault_address) - static_cast<u8*>(g_state.fastmem_base)));
   1706 
   1707     // if we're writing to ram, let it go through a few times, and use manual block protection to sort it out
   1708     // TODO: path for manual protection to return back to read-only pages
   1709     if (is_write && !g_state.cop0_regs.sr.Isc && AddressInRAM(guest_address))
   1710     {
   1711       DEV_LOG("Ignoring fault due to RAM write @ 0x{:08X}", guest_address);
   1712       InvalidateBlocksWithPageIndex(Bus::GetRAMCodePageIndex(guest_address));
   1713       return PageFaultHandler::HandlerResult::ContinueExecution;
   1714     }
   1715   }
   1716   else
   1717 #endif
   1718   {
   1719     // LUT fastmem - we can't compute the address.
   1720     guest_address = std::numeric_limits<PhysicalMemoryAddress>::max();
   1721   }
   1722 
   1723   DEV_LOG("Page fault handler invoked at PC={} Address={} {}, fastmem offset {:08X}", exception_pc, fault_address,
   1724           is_write ? "(write)" : "(read)", guest_address);
   1725 
   1726   auto iter = s_fastmem_backpatch_info.find(exception_pc);
   1727   if (iter == s_fastmem_backpatch_info.end())
   1728   {
   1729     ERROR_LOG("No backpatch info found for {}", exception_pc);
   1730     return PageFaultHandler::HandlerResult::ExecuteNextHandler;
   1731   }
   1732 
   1733   LoadstoreBackpatchInfo& info = iter->second;
   1734   DEV_LOG("Backpatching {} at {}[{}] (pc {:08X} addr {:08X}): Bitmask {:08X} Addr {} Data {} Size {} Signed {:02X}",
   1735           info.is_load ? "load" : "store", exception_pc, info.code_size, info.guest_pc, guest_address, info.gpr_bitmask,
   1736           static_cast<unsigned>(info.address_register), static_cast<unsigned>(info.data_register),
   1737           info.AccessSizeInBytes(), static_cast<unsigned>(info.is_signed));
   1738 
   1739   MemMap::BeginCodeWrite();
   1740 
   1741   BackpatchLoadStore(exception_pc, info);
   1742 
   1743   // queue block for recompilation later
   1744   if (g_settings.cpu_execution_mode == CPUExecutionMode::NewRec)
   1745   {
   1746     Block* block = LookupBlock(info.guest_block);
   1747     if (block)
   1748     {
   1749       // This is a bit annoying, we have to remove it from the page list if it's a RAM block.
   1750       DEV_LOG("Queuing block {:08X} for recompilation due to backpatch", block->pc);
   1751       RemoveBlockFromPageList(block);
   1752       InvalidateBlock(block, BlockState::NeedsRecompile);
   1753 
   1754       // Need to reset the recompile count, otherwise it'll get trolled into an interpreter fallback.
   1755       block->compile_frame = System::GetFrameNumber();
   1756       block->compile_count = 1;
   1757     }
   1758   }
   1759 
   1760   MemMap::EndCodeWrite();
   1761 
   1762   // and store the pc in the faulting list, so that we don't emit another fastmem loadstore
   1763   s_fastmem_faulting_pcs.insert(info.guest_pc);
   1764   s_fastmem_backpatch_info.erase(iter);
   1765   return PageFaultHandler::HandlerResult::ContinueExecution;
   1766 }
   1767 
   1768 bool CPU::CodeCache::HasPreviouslyFaultedOnPC(u32 guest_pc)
   1769 {
   1770   return (s_fastmem_faulting_pcs.find(guest_pc) != s_fastmem_faulting_pcs.end());
   1771 }
   1772 
   1773 void CPU::CodeCache::BackpatchLoadStore(void* host_pc, const LoadstoreBackpatchInfo& info)
   1774 {
   1775 #ifdef ENABLE_RECOMPILER
   1776   if (g_settings.cpu_execution_mode == CPUExecutionMode::Recompiler)
   1777     Recompiler::CodeGenerator::BackpatchLoadStore(host_pc, info);
   1778 #endif
   1779 #ifdef ENABLE_NEWREC
   1780   if (g_settings.cpu_execution_mode == CPUExecutionMode::NewRec)
   1781     NewRec::BackpatchLoadStore(host_pc, info);
   1782 #endif
   1783 }
   1784 
   1785 void CPU::CodeCache::RemoveBackpatchInfoForRange(const void* host_code, u32 size)
   1786 {
   1787   const u8* start = static_cast<const u8*>(host_code);
   1788   const u8* end = start + size;
   1789 
   1790   auto start_iter = s_fastmem_backpatch_info.lower_bound(start);
   1791   if (start_iter == s_fastmem_backpatch_info.end())
   1792     return;
   1793 
   1794   // this might point to another block, so bail out in that case
   1795   if (start_iter->first >= end)
   1796     return;
   1797 
   1798   // find the end point, or last instruction in the range
   1799   auto end_iter = start_iter;
   1800   do
   1801   {
   1802     ++end_iter;
   1803   } while (end_iter != s_fastmem_backpatch_info.end() && end_iter->first < end);
   1804 
   1805   // erase the whole range at once
   1806   s_fastmem_backpatch_info.erase(start_iter, end_iter);
   1807 }