duckstation

duckstation, but archived from the revision just before upstream changed it to a proprietary software project, this version is the libre one
git clone https://git.neptards.moe/u3shit/duckstation.git
Log | Files | Refs | README | LICENSE

cpu_newrec_compiler.cpp (95159B)


      1 // SPDX-FileCopyrightText: 2023 Connor McLaughlin <stenzek@gmail.com>
      2 // SPDX-License-Identifier: (GPL-3.0 OR CC-BY-NC-ND-4.0)
      3 
      4 #include "cpu_newrec_compiler.h"
      5 #include "common/assert.h"
      6 #include "common/log.h"
      7 #include "common/small_string.h"
      8 #include "cpu_code_cache.h"
      9 #include "cpu_core_private.h"
     10 #include "cpu_disasm.h"
     11 #include "cpu_pgxp.h"
     12 #include "settings.h"
     13 #include <cstdint>
     14 #include <limits>
     15 Log_SetChannel(NewRec::Compiler);
     16 
     17 // TODO: direct link skip delay slot check
     18 // TODO: speculative constants
     19 // TODO: std::bitset in msvc has bounds checks even in release...
     20 
     21 const std::array<std::array<const void*, 2>, 3> CPU::NewRec::Compiler::s_pgxp_mem_load_functions = {
     22   {{{reinterpret_cast<const void*>(&PGXP::CPU_LBx), reinterpret_cast<const void*>(&PGXP::CPU_LBx)}},
     23    {{reinterpret_cast<const void*>(&PGXP::CPU_LHU), reinterpret_cast<const void*>(&PGXP::CPU_LH)}},
     24    {{reinterpret_cast<const void*>(&PGXP::CPU_LW)}}}};
     25 const std::array<const void*, 3> CPU::NewRec::Compiler::s_pgxp_mem_store_functions = {
     26   {reinterpret_cast<const void*>(&PGXP::CPU_SB), reinterpret_cast<const void*>(&PGXP::CPU_SH),
     27    reinterpret_cast<const void*>(&PGXP::CPU_SW)}};
     28 
     29 CPU::NewRec::Compiler::Compiler() = default;
     30 
     31 CPU::NewRec::Compiler::~Compiler() = default;
     32 
     33 void CPU::NewRec::Compiler::Reset(CodeCache::Block* block, u8* code_buffer, u32 code_buffer_space, u8* far_code_buffer,
     34                                   u32 far_code_space)
     35 {
     36   m_block = block;
     37   m_compiler_pc = block->pc;
     38   m_cycles = 0;
     39   m_gte_done_cycle = 0;
     40   inst = nullptr;
     41   iinfo = nullptr;
     42   m_current_instruction_pc = 0;
     43   m_current_instruction_branch_delay_slot = false;
     44   m_dirty_pc = false;
     45   m_dirty_instruction_bits = false;
     46   m_dirty_gte_done_cycle = true;
     47   m_block_ended = false;
     48   m_constant_reg_values.fill(0);
     49   m_constant_regs_valid.reset();
     50   m_constant_regs_dirty.reset();
     51 
     52   for (u32 i = 0; i < NUM_HOST_REGS; i++)
     53     ClearHostReg(i);
     54   m_register_alloc_counter = 0;
     55 
     56   m_constant_reg_values[static_cast<u32>(Reg::zero)] = 0;
     57   m_constant_regs_valid.set(static_cast<u32>(Reg::zero));
     58 
     59   m_load_delay_dirty = EMULATE_LOAD_DELAYS;
     60   m_load_delay_register = Reg::count;
     61   m_load_delay_value_register = NUM_HOST_REGS;
     62 
     63   InitSpeculativeRegs();
     64 }
     65 
     66 void CPU::NewRec::Compiler::BeginBlock()
     67 {
     68 #if 0
     69   GenerateCall(reinterpret_cast<const void*>(&CPU::CodeCache::LogCurrentState));
     70 #endif
     71 
     72   if (m_block->protection == CodeCache::PageProtectionMode::ManualCheck)
     73   {
     74     DEBUG_LOG("Generate manual protection for PC {:08X}", m_block->pc);
     75     const u8* ram_ptr = Bus::g_ram + VirtualAddressToPhysical(m_block->pc);
     76     const u8* shadow_ptr = reinterpret_cast<const u8*>(m_block->Instructions());
     77     GenerateBlockProtectCheck(ram_ptr, shadow_ptr, m_block->size * sizeof(Instruction));
     78   }
     79 
     80   GenerateICacheCheckAndUpdate();
     81 
     82   if (g_settings.bios_tty_logging)
     83   {
     84     if (m_block->pc == 0xa0)
     85       GenerateCall(reinterpret_cast<const void*>(&CPU::HandleA0Syscall));
     86     else if (m_block->pc == 0xb0)
     87       GenerateCall(reinterpret_cast<const void*>(&CPU::HandleB0Syscall));
     88   }
     89 
     90   inst = m_block->Instructions();
     91   iinfo = m_block->InstructionsInfo();
     92   m_current_instruction_pc = m_block->pc;
     93   m_current_instruction_branch_delay_slot = false;
     94   m_compiler_pc += sizeof(Instruction);
     95   m_dirty_pc = true;
     96   m_dirty_instruction_bits = true;
     97 }
     98 
     99 const void* CPU::NewRec::Compiler::CompileBlock(CodeCache::Block* block, u32* host_code_size, u32* host_far_code_size)
    100 {
    101   Reset(block, CPU::CodeCache::GetFreeCodePointer(), CPU::CodeCache::GetFreeCodeSpace(),
    102         CPU::CodeCache::GetFreeFarCodePointer(), CPU::CodeCache::GetFreeFarCodeSpace());
    103 
    104   DEBUG_LOG("Block range: {:08X} -> {:08X}", block->pc, block->pc + block->size * 4);
    105 
    106   BeginBlock();
    107 
    108   for (;;)
    109   {
    110     CompileInstruction();
    111 
    112     if (m_block_ended || iinfo->is_last_instruction)
    113     {
    114       if (!m_block_ended)
    115       {
    116         // Block was truncated. Link it.
    117         EndBlock(m_compiler_pc, false);
    118       }
    119 
    120       break;
    121     }
    122 
    123     inst++;
    124     iinfo++;
    125     m_current_instruction_pc += sizeof(Instruction);
    126     m_compiler_pc += sizeof(Instruction);
    127     m_dirty_pc = true;
    128     m_dirty_instruction_bits = true;
    129   }
    130 
    131   // Nothing should be valid anymore
    132   for (u32 i = 0; i < NUM_HOST_REGS; i++)
    133     DebugAssert(!IsHostRegAllocated(i));
    134   for (u32 i = 1; i < static_cast<u32>(Reg::count); i++)
    135     DebugAssert(!m_constant_regs_dirty.test(i) && !m_constant_regs_valid.test(i));
    136   m_speculative_constants.memory.clear();
    137 
    138   u32 code_size, far_code_size;
    139   const void* code = EndCompile(&code_size, &far_code_size);
    140   *host_code_size = code_size;
    141   *host_far_code_size = far_code_size;
    142   CPU::CodeCache::CommitCode(code_size);
    143   CPU::CodeCache::CommitFarCode(far_code_size);
    144 
    145   return code;
    146 }
    147 
    148 void CPU::NewRec::Compiler::SetConstantReg(Reg r, u32 v)
    149 {
    150   DebugAssert(r < Reg::count && r != Reg::zero);
    151 
    152   // There might still be an incoming load delay which we need to cancel.
    153   CancelLoadDelaysToReg(r);
    154 
    155   if (m_constant_regs_valid.test(static_cast<u32>(r)) && m_constant_reg_values[static_cast<u8>(r)] == v)
    156   {
    157     // Shouldn't be any host regs though.
    158     DebugAssert(!CheckHostReg(0, HR_TYPE_CPU_REG, r).has_value());
    159     return;
    160   }
    161 
    162   m_constant_reg_values[static_cast<u32>(r)] = v;
    163   m_constant_regs_valid.set(static_cast<u32>(r));
    164   m_constant_regs_dirty.set(static_cast<u32>(r));
    165 
    166   if (const std::optional<u32> hostreg = CheckHostReg(0, HR_TYPE_CPU_REG, r); hostreg.has_value())
    167   {
    168     DEBUG_LOG("Discarding guest register {} in host register {} due to constant set", GetRegName(r),
    169               GetHostRegName(hostreg.value()));
    170     FreeHostReg(hostreg.value());
    171   }
    172 }
    173 
    174 void CPU::NewRec::Compiler::CancelLoadDelaysToReg(Reg reg)
    175 {
    176   if (m_load_delay_register != reg)
    177     return;
    178 
    179   DEBUG_LOG("Cancelling load delay to {}", GetRegName(reg));
    180   m_load_delay_register = Reg::count;
    181   if (m_load_delay_value_register != NUM_HOST_REGS)
    182     ClearHostReg(m_load_delay_value_register);
    183 }
    184 
    185 void CPU::NewRec::Compiler::UpdateLoadDelay()
    186 {
    187   if (m_load_delay_dirty)
    188   {
    189     // we shouldn't have a static load delay.
    190     DebugAssert(!HasLoadDelay());
    191 
    192     // have to invalidate registers, we might have one of them cached
    193     // TODO: double check the order here, will we trash a new value? we shouldn't...
    194     // thankfully since this only happens on the first instruction, we can get away with just killing anything which
    195     // isn't in write mode, because nothing could've been written before it, and the new value overwrites any
    196     // load-delayed value
    197     DEBUG_LOG("Invalidating non-dirty registers, and flushing load delay from state");
    198 
    199     constexpr u32 req_flags = (HR_ALLOCATED | HR_MODE_WRITE);
    200 
    201     for (u32 i = 0; i < NUM_HOST_REGS; i++)
    202     {
    203       HostRegAlloc& ra = m_host_regs[i];
    204       if (ra.type != HR_TYPE_CPU_REG || !IsHostRegAllocated(i) || ((ra.flags & req_flags) == req_flags))
    205         continue;
    206 
    207       DEBUG_LOG("Freeing non-dirty cached register {} in {}", GetRegName(ra.reg), GetHostRegName(i));
    208       DebugAssert(!(ra.flags & HR_MODE_WRITE));
    209       ClearHostReg(i);
    210     }
    211 
    212     // remove any non-dirty constants too
    213     for (u32 i = 1; i < static_cast<u32>(Reg::count); i++)
    214     {
    215       if (!HasConstantReg(static_cast<Reg>(i)) || HasDirtyConstantReg(static_cast<Reg>(i)))
    216         continue;
    217 
    218       DEBUG_LOG("Clearing non-dirty constant {}", GetRegName(static_cast<Reg>(i)));
    219       ClearConstantReg(static_cast<Reg>(i));
    220     }
    221 
    222     Flush(FLUSH_LOAD_DELAY_FROM_STATE);
    223   }
    224 
    225   // commit the delayed register load
    226   FinishLoadDelay();
    227 
    228   // move next load delay forward
    229   if (m_next_load_delay_register != Reg::count)
    230   {
    231     // if it somehow got flushed, read it back in.
    232     if (m_next_load_delay_value_register == NUM_HOST_REGS)
    233     {
    234       AllocateHostReg(HR_MODE_READ, HR_TYPE_NEXT_LOAD_DELAY_VALUE, m_next_load_delay_register);
    235       DebugAssert(m_next_load_delay_value_register != NUM_HOST_REGS);
    236     }
    237 
    238     HostRegAlloc& ra = m_host_regs[m_next_load_delay_value_register];
    239     ra.flags |= HR_MODE_WRITE;
    240     ra.type = HR_TYPE_LOAD_DELAY_VALUE;
    241 
    242     m_load_delay_register = m_next_load_delay_register;
    243     m_load_delay_value_register = m_next_load_delay_value_register;
    244     m_next_load_delay_register = Reg::count;
    245     m_next_load_delay_value_register = NUM_HOST_REGS;
    246   }
    247 }
    248 
    249 void CPU::NewRec::Compiler::FinishLoadDelay()
    250 {
    251   DebugAssert(!m_load_delay_dirty);
    252   if (!HasLoadDelay())
    253     return;
    254 
    255   // we may need to reload the value..
    256   if (m_load_delay_value_register == NUM_HOST_REGS)
    257   {
    258     AllocateHostReg(HR_MODE_READ, HR_TYPE_LOAD_DELAY_VALUE, m_load_delay_register);
    259     DebugAssert(m_load_delay_value_register != NUM_HOST_REGS);
    260   }
    261 
    262   // kill any (old) cached value for this register
    263   DeleteMIPSReg(m_load_delay_register, false);
    264 
    265   DEBUG_LOG("Finished delayed load to {} in host register {}", GetRegName(m_load_delay_register),
    266             GetHostRegName(m_load_delay_value_register));
    267 
    268   // and swap the mode over so it gets written back later
    269   HostRegAlloc& ra = m_host_regs[m_load_delay_value_register];
    270   DebugAssert(ra.reg == m_load_delay_register);
    271   ra.flags = (ra.flags & IMMUTABLE_HR_FLAGS) | HR_ALLOCATED | HR_MODE_READ | HR_MODE_WRITE;
    272   ra.counter = m_register_alloc_counter++;
    273   ra.type = HR_TYPE_CPU_REG;
    274 
    275   // constants are gone
    276   DEBUG_LOG("Clearing constant in {} due to load delay", GetRegName(m_load_delay_register));
    277   ClearConstantReg(m_load_delay_register);
    278 
    279   m_load_delay_register = Reg::count;
    280   m_load_delay_value_register = NUM_HOST_REGS;
    281 }
    282 
    283 void CPU::NewRec::Compiler::FinishLoadDelayToReg(Reg reg)
    284 {
    285   if (m_load_delay_dirty)
    286   {
    287     // inter-block :(
    288     UpdateLoadDelay();
    289     return;
    290   }
    291 
    292   if (m_load_delay_register != reg)
    293     return;
    294 
    295   FinishLoadDelay();
    296 }
    297 
    298 u32 CPU::NewRec::Compiler::GetFlagsForNewLoadDelayedReg() const
    299 {
    300   return g_settings.gpu_pgxp_enable ? (HR_MODE_WRITE | HR_CALLEE_SAVED) : (HR_MODE_WRITE);
    301 }
    302 
    303 void CPU::NewRec::Compiler::ClearConstantReg(Reg r)
    304 {
    305   DebugAssert(r < Reg::count && r != Reg::zero);
    306   m_constant_reg_values[static_cast<u32>(r)] = 0;
    307   m_constant_regs_valid.reset(static_cast<u32>(r));
    308   m_constant_regs_dirty.reset(static_cast<u32>(r));
    309 }
    310 
    311 void CPU::NewRec::Compiler::FlushConstantRegs(bool invalidate)
    312 {
    313   for (u32 i = 1; i < static_cast<u32>(Reg::count); i++)
    314   {
    315     if (m_constant_regs_dirty.test(static_cast<u32>(i)))
    316       FlushConstantReg(static_cast<Reg>(i));
    317     if (invalidate)
    318       ClearConstantReg(static_cast<Reg>(i));
    319   }
    320 }
    321 
    322 CPU::Reg CPU::NewRec::Compiler::MipsD() const
    323 {
    324   return inst->r.rd;
    325 }
    326 
    327 u32 CPU::NewRec::Compiler::GetConditionalBranchTarget(CompileFlags cf) const
    328 {
    329   // compiler pc has already been advanced when swapping branch delay slots
    330   const u32 current_pc = m_compiler_pc - (cf.delay_slot_swapped ? sizeof(Instruction) : 0);
    331   return current_pc + (inst->i.imm_sext32() << 2);
    332 }
    333 
    334 u32 CPU::NewRec::Compiler::GetBranchReturnAddress(CompileFlags cf) const
    335 {
    336   // compiler pc has already been advanced when swapping branch delay slots
    337   return m_compiler_pc + (cf.delay_slot_swapped ? 0 : sizeof(Instruction));
    338 }
    339 
    340 bool CPU::NewRec::Compiler::TrySwapDelaySlot(Reg rs, Reg rt, Reg rd)
    341 {
    342   if constexpr (!SWAP_BRANCH_DELAY_SLOTS)
    343     return false;
    344 
    345   const Instruction* next_instruction = inst + 1;
    346   DebugAssert(next_instruction < (m_block->Instructions() + m_block->size));
    347 
    348   const Reg opcode_rs = next_instruction->r.rs;
    349   const Reg opcode_rt = next_instruction->r.rt;
    350   const Reg opcode_rd = next_instruction->r.rd;
    351 
    352 #ifdef _DEBUG
    353   TinyString disasm;
    354   DisassembleInstruction(&disasm, m_current_instruction_pc + 4, next_instruction->bits);
    355 #endif
    356 
    357   // Just in case we read it in the instruction.. but the block should end after this.
    358   const Instruction* const backup_instruction = inst;
    359   const u32 backup_instruction_pc = m_current_instruction_pc;
    360   const bool backup_instruction_delay_slot = m_current_instruction_branch_delay_slot;
    361 
    362   if (next_instruction->bits == 0)
    363   {
    364     // nop
    365     goto is_safe;
    366   }
    367 
    368   // can't swap when the branch is the first instruction because of bloody load delays
    369   if ((EMULATE_LOAD_DELAYS && m_block->pc == m_current_instruction_pc) || m_load_delay_dirty ||
    370       (HasLoadDelay() && (m_load_delay_register == rs || m_load_delay_register == rt || m_load_delay_register == rd)))
    371   {
    372     goto is_unsafe;
    373   }
    374 
    375   switch (next_instruction->op)
    376   {
    377     case InstructionOp::addi:
    378     case InstructionOp::addiu:
    379     case InstructionOp::slti:
    380     case InstructionOp::sltiu:
    381     case InstructionOp::andi:
    382     case InstructionOp::ori:
    383     case InstructionOp::xori:
    384     case InstructionOp::lui:
    385     case InstructionOp::lb:
    386     case InstructionOp::lh:
    387     case InstructionOp::lwl:
    388     case InstructionOp::lw:
    389     case InstructionOp::lbu:
    390     case InstructionOp::lhu:
    391     case InstructionOp::lwr:
    392     {
    393       if ((rs != Reg::zero && rs == opcode_rt) || (rt != Reg::zero && rt == opcode_rt) ||
    394           (rd != Reg::zero && (rd == opcode_rs || rd == opcode_rt)))
    395       {
    396         goto is_unsafe;
    397       }
    398     }
    399     break;
    400 
    401     case InstructionOp::sb:
    402     case InstructionOp::sh:
    403     case InstructionOp::swl:
    404     case InstructionOp::sw:
    405     case InstructionOp::swr:
    406     case InstructionOp::lwc2:
    407     case InstructionOp::swc2:
    408       break;
    409 
    410     case InstructionOp::funct: // SPECIAL
    411     {
    412       switch (next_instruction->r.funct)
    413       {
    414         case InstructionFunct::sll:
    415         case InstructionFunct::srl:
    416         case InstructionFunct::sra:
    417         case InstructionFunct::sllv:
    418         case InstructionFunct::srlv:
    419         case InstructionFunct::srav:
    420         case InstructionFunct::add:
    421         case InstructionFunct::addu:
    422         case InstructionFunct::sub:
    423         case InstructionFunct::subu:
    424         case InstructionFunct::and_:
    425         case InstructionFunct::or_:
    426         case InstructionFunct::xor_:
    427         case InstructionFunct::nor:
    428         case InstructionFunct::slt:
    429         case InstructionFunct::sltu:
    430         {
    431           if ((rs != Reg::zero && rs == opcode_rd) || (rt != Reg::zero && rt == opcode_rd) ||
    432               (rd != Reg::zero && (rd == opcode_rs || rd == opcode_rt)))
    433           {
    434             goto is_unsafe;
    435           }
    436         }
    437         break;
    438 
    439         case InstructionFunct::mult:
    440         case InstructionFunct::multu:
    441         case InstructionFunct::div:
    442         case InstructionFunct::divu:
    443           break;
    444 
    445         default:
    446           goto is_unsafe;
    447       }
    448     }
    449     break;
    450 
    451     case InstructionOp::cop0: // COP0
    452     case InstructionOp::cop1: // COP1
    453     case InstructionOp::cop2: // COP2
    454     case InstructionOp::cop3: // COP3
    455     {
    456       if (next_instruction->cop.IsCommonInstruction())
    457       {
    458         switch (next_instruction->cop.CommonOp())
    459         {
    460           case CopCommonInstruction::mfcn: // MFC0
    461           case CopCommonInstruction::cfcn: // CFC0
    462           {
    463             if ((rs != Reg::zero && rs == opcode_rt) || (rt != Reg::zero && rt == opcode_rt) ||
    464                 (rd != Reg::zero && rd == opcode_rt))
    465             {
    466               goto is_unsafe;
    467             }
    468           }
    469           break;
    470 
    471           case CopCommonInstruction::mtcn: // MTC0
    472           case CopCommonInstruction::ctcn: // CTC0
    473             break;
    474         }
    475       }
    476       else
    477       {
    478         // swap when it's GTE
    479         if (next_instruction->op != InstructionOp::cop2)
    480           goto is_unsafe;
    481       }
    482     }
    483     break;
    484 
    485     default:
    486       goto is_unsafe;
    487   }
    488 
    489 is_safe:
    490 #ifdef _DEBUG
    491   DEBUG_LOG("Swapping delay slot {:08X} {}", m_current_instruction_pc + 4, disasm);
    492 #endif
    493 
    494   CompileBranchDelaySlot();
    495 
    496   inst = backup_instruction;
    497   m_current_instruction_pc = backup_instruction_pc;
    498   m_current_instruction_branch_delay_slot = backup_instruction_delay_slot;
    499   return true;
    500 
    501 is_unsafe:
    502 #ifdef _DEBUG
    503   DEBUG_LOG("NOT swapping delay slot {:08X} {}", m_current_instruction_pc + 4, disasm);
    504 #endif
    505 
    506   return false;
    507 }
    508 
    509 void CPU::NewRec::Compiler::SetCompilerPC(u32 newpc)
    510 {
    511   m_compiler_pc = newpc;
    512   m_dirty_pc = true;
    513 }
    514 
    515 u32 CPU::NewRec::Compiler::GetFreeHostReg(u32 flags)
    516 {
    517   const u32 req_flags = HR_USABLE | (flags & HR_CALLEE_SAVED);
    518 
    519   u32 fallback = NUM_HOST_REGS;
    520   for (u32 i = 0; i < NUM_HOST_REGS; i++)
    521   {
    522     if ((m_host_regs[i].flags & (req_flags | HR_NEEDED | HR_ALLOCATED)) == req_flags)
    523     {
    524       // Prefer callee-saved registers.
    525       if (m_host_regs[i].flags & HR_CALLEE_SAVED)
    526         return i;
    527       else if (fallback == NUM_HOST_REGS)
    528         fallback = i;
    529     }
    530   }
    531   if (fallback != NUM_HOST_REGS)
    532     return fallback;
    533 
    534   // find register with lowest counter
    535   u32 lowest = NUM_HOST_REGS;
    536   u32 lowest_count = std::numeric_limits<u32>::max();
    537   for (u32 i = 0; i < NUM_HOST_REGS; i++)
    538   {
    539     const HostRegAlloc& ra = m_host_regs[i];
    540     if ((ra.flags & (req_flags | HR_NEEDED)) != req_flags)
    541       continue;
    542 
    543     DebugAssert(ra.flags & HR_ALLOCATED);
    544     if (ra.type == HR_TYPE_TEMP)
    545     {
    546       // can't punt temps
    547       continue;
    548     }
    549 
    550     if (ra.counter < lowest_count)
    551     {
    552       lowest = i;
    553       lowest_count = ra.counter;
    554     }
    555   }
    556 
    557   //
    558 
    559   AssertMsg(lowest != NUM_HOST_REGS, "Register allocation failed.");
    560 
    561   const HostRegAlloc& ra = m_host_regs[lowest];
    562   switch (ra.type)
    563   {
    564     case HR_TYPE_CPU_REG:
    565     {
    566       // If the register is needed later, and we're allocating a callee-saved register, try moving it to a caller-saved
    567       // register.
    568       if (iinfo->UsedTest(ra.reg) && flags & HR_CALLEE_SAVED)
    569       {
    570         u32 caller_saved_lowest = NUM_HOST_REGS;
    571         u32 caller_saved_lowest_count = std::numeric_limits<u32>::max();
    572         for (u32 i = 0; i < NUM_HOST_REGS; i++)
    573         {
    574           constexpr u32 caller_req_flags = HR_USABLE;
    575           constexpr u32 caller_req_mask = HR_USABLE | HR_NEEDED | HR_CALLEE_SAVED;
    576           const HostRegAlloc& caller_ra = m_host_regs[i];
    577           if ((caller_ra.flags & caller_req_mask) != caller_req_flags)
    578             continue;
    579 
    580           if (!(caller_ra.flags & HR_ALLOCATED))
    581           {
    582             caller_saved_lowest = i;
    583             caller_saved_lowest_count = 0;
    584             break;
    585           }
    586 
    587           if (caller_ra.type == HR_TYPE_TEMP)
    588             continue;
    589 
    590           if (caller_ra.counter < caller_saved_lowest_count)
    591           {
    592             caller_saved_lowest = i;
    593             caller_saved_lowest_count = caller_ra.counter;
    594           }
    595         }
    596 
    597         if (caller_saved_lowest_count < lowest_count)
    598         {
    599           DEBUG_LOG("Moving caller-saved host register {} with MIPS register {} to {} for allocation",
    600                     GetHostRegName(lowest), GetRegName(ra.reg), GetHostRegName(caller_saved_lowest));
    601           if (IsHostRegAllocated(caller_saved_lowest))
    602             FreeHostReg(caller_saved_lowest);
    603           CopyHostReg(caller_saved_lowest, lowest);
    604           SwapHostRegAlloc(caller_saved_lowest, lowest);
    605           DebugAssert(!IsHostRegAllocated(lowest));
    606           return lowest;
    607         }
    608       }
    609 
    610       DEBUG_LOG("Freeing register {} in host register {} for allocation", GetRegName(ra.reg), GetHostRegName(lowest));
    611     }
    612     break;
    613     case HR_TYPE_LOAD_DELAY_VALUE:
    614     {
    615       DEBUG_LOG("Freeing load delay register {} in host register {} for allocation", GetHostRegName(lowest),
    616                 GetRegName(ra.reg));
    617     }
    618     break;
    619     case HR_TYPE_NEXT_LOAD_DELAY_VALUE:
    620     {
    621       DEBUG_LOG("Freeing next load delay register {} in host register {} due for allocation", GetRegName(ra.reg),
    622                 GetHostRegName(lowest));
    623     }
    624     break;
    625     default:
    626     {
    627       Panic("Unknown type freed");
    628     }
    629     break;
    630   }
    631 
    632   FreeHostReg(lowest);
    633   return lowest;
    634 }
    635 
    636 const char* CPU::NewRec::Compiler::GetReadWriteModeString(u32 flags)
    637 {
    638   if ((flags & (HR_MODE_READ | HR_MODE_WRITE)) == (HR_MODE_READ | HR_MODE_WRITE))
    639     return "read-write";
    640   else if (flags & HR_MODE_READ)
    641     return "read-only";
    642   else if (flags & HR_MODE_WRITE)
    643     return "write-only";
    644   else
    645     return "UNKNOWN";
    646 }
    647 
    648 u32 CPU::NewRec::Compiler::AllocateHostReg(u32 flags, HostRegAllocType type /* = HR_TYPE_TEMP */,
    649                                            Reg reg /* = Reg::count */)
    650 {
    651   // Cancel any load delays before booting anything out
    652   if (flags & HR_MODE_WRITE && (type == HR_TYPE_CPU_REG || type == HR_TYPE_NEXT_LOAD_DELAY_VALUE))
    653     CancelLoadDelaysToReg(reg);
    654 
    655   // Already have a matching type?
    656   if (type != HR_TYPE_TEMP)
    657   {
    658     const std::optional<u32> check_reg = CheckHostReg(flags, type, reg);
    659 
    660     // shouldn't be allocating >1 load delay in a single instruction..
    661     // TODO: prefer callee saved registers for load delay
    662     DebugAssert((type != HR_TYPE_LOAD_DELAY_VALUE && type != HR_TYPE_NEXT_LOAD_DELAY_VALUE) || !check_reg.has_value());
    663     if (check_reg.has_value())
    664       return check_reg.value();
    665   }
    666 
    667   const u32 hreg = GetFreeHostReg(flags);
    668   HostRegAlloc& ra = m_host_regs[hreg];
    669   ra.flags = (ra.flags & IMMUTABLE_HR_FLAGS) | (flags & ALLOWED_HR_FLAGS) | HR_ALLOCATED | HR_NEEDED;
    670   ra.type = type;
    671   ra.reg = reg;
    672   ra.counter = m_register_alloc_counter++;
    673 
    674   switch (type)
    675   {
    676     case HR_TYPE_CPU_REG:
    677     {
    678       DebugAssert(reg != Reg::zero);
    679 
    680       DEBUG_LOG("Allocate host reg {} to guest reg {} in {} mode", GetHostRegName(hreg), GetRegName(reg),
    681                 GetReadWriteModeString(flags));
    682 
    683       if (flags & HR_MODE_READ)
    684       {
    685         DebugAssert(ra.reg > Reg::zero && ra.reg < Reg::count);
    686 
    687         if (HasConstantReg(reg))
    688         {
    689           // may as well flush it now
    690           DEBUG_LOG("Flush constant register in guest reg {} to host reg {}", GetRegName(reg), GetHostRegName(hreg));
    691           LoadHostRegWithConstant(hreg, GetConstantRegU32(reg));
    692           m_constant_regs_dirty.reset(static_cast<u8>(reg));
    693           ra.flags |= HR_MODE_WRITE;
    694         }
    695         else
    696         {
    697           LoadHostRegFromCPUPointer(hreg, &g_state.regs.r[static_cast<u8>(reg)]);
    698         }
    699       }
    700 
    701       if (flags & HR_MODE_WRITE && HasConstantReg(reg))
    702       {
    703         DebugAssert(reg != Reg::zero);
    704         DEBUG_LOG("Clearing constant register in guest reg {} due to write mode in {}", GetRegName(reg),
    705                   GetHostRegName(hreg));
    706 
    707         ClearConstantReg(reg);
    708       }
    709     }
    710     break;
    711 
    712     case HR_TYPE_LOAD_DELAY_VALUE:
    713     {
    714       DebugAssert(!m_load_delay_dirty && (!HasLoadDelay() || !(flags & HR_MODE_WRITE)));
    715       DEBUG_LOG("Allocating load delayed guest register {} in host reg {} in {} mode", GetRegName(reg),
    716                 GetHostRegName(hreg), GetReadWriteModeString(flags));
    717       m_load_delay_register = reg;
    718       m_load_delay_value_register = hreg;
    719       if (flags & HR_MODE_READ)
    720         LoadHostRegFromCPUPointer(hreg, &g_state.load_delay_value);
    721     }
    722     break;
    723 
    724     case HR_TYPE_NEXT_LOAD_DELAY_VALUE:
    725     {
    726       DEBUG_LOG("Allocating next load delayed guest register {} in host reg {} in {} mode", GetRegName(reg),
    727                 GetHostRegName(hreg), GetReadWriteModeString(flags));
    728       m_next_load_delay_register = reg;
    729       m_next_load_delay_value_register = hreg;
    730       if (flags & HR_MODE_READ)
    731         LoadHostRegFromCPUPointer(hreg, &g_state.next_load_delay_value);
    732     }
    733     break;
    734 
    735     case HR_TYPE_TEMP:
    736     {
    737       DebugAssert(!(flags & (HR_MODE_READ | HR_MODE_WRITE)));
    738       DEBUG_LOG("Allocate host reg {} as temporary", GetHostRegName(hreg));
    739     }
    740     break;
    741 
    742     default:
    743       Panic("Unknown type");
    744       break;
    745   }
    746 
    747   return hreg;
    748 }
    749 
    750 std::optional<u32> CPU::NewRec::Compiler::CheckHostReg(u32 flags, HostRegAllocType type /* = HR_TYPE_TEMP */,
    751                                                        Reg reg /* = Reg::count */)
    752 {
    753   for (u32 i = 0; i < NUM_HOST_REGS; i++)
    754   {
    755     HostRegAlloc& ra = m_host_regs[i];
    756     if (!(ra.flags & HR_ALLOCATED) || ra.type != type || ra.reg != reg)
    757       continue;
    758 
    759     DebugAssert(ra.flags & HR_MODE_READ);
    760     if (flags & HR_MODE_WRITE)
    761     {
    762       DebugAssert(type == HR_TYPE_CPU_REG);
    763       if (!(ra.flags & HR_MODE_WRITE))
    764         DEBUG_LOG("Switch guest reg {} from read to read-write in host reg {}", GetRegName(reg), GetHostRegName(i));
    765 
    766       if (HasConstantReg(reg))
    767       {
    768         DebugAssert(reg != Reg::zero);
    769         DEBUG_LOG("Clearing constant register in guest reg {} due to write mode in {}", GetRegName(reg),
    770                   GetHostRegName(i));
    771 
    772         ClearConstantReg(reg);
    773       }
    774     }
    775 
    776     ra.flags |= (flags & ALLOWED_HR_FLAGS) | HR_NEEDED;
    777     ra.counter = m_register_alloc_counter++;
    778 
    779     // Need a callee saved reg?
    780     if (flags & HR_CALLEE_SAVED && !(ra.flags & HR_CALLEE_SAVED))
    781     {
    782       // Need to move it to one which is
    783       const u32 new_reg = GetFreeHostReg(HR_CALLEE_SAVED);
    784       DEBUG_LOG("Rename host reg {} to {} for callee saved", GetHostRegName(i), GetHostRegName(new_reg));
    785 
    786       CopyHostReg(new_reg, i);
    787       SwapHostRegAlloc(i, new_reg);
    788       DebugAssert(!IsHostRegAllocated(i));
    789       return new_reg;
    790     }
    791 
    792     return i;
    793   }
    794 
    795   return std::nullopt;
    796 }
    797 
    798 u32 CPU::NewRec::Compiler::AllocateTempHostReg(u32 flags)
    799 {
    800   return AllocateHostReg(flags, HR_TYPE_TEMP);
    801 }
    802 
    803 void CPU::NewRec::Compiler::SwapHostRegAlloc(u32 lhs, u32 rhs)
    804 {
    805   HostRegAlloc& lra = m_host_regs[lhs];
    806   HostRegAlloc& rra = m_host_regs[rhs];
    807 
    808   const u8 lra_flags = lra.flags;
    809   lra.flags = (lra.flags & IMMUTABLE_HR_FLAGS) | (rra.flags & ~IMMUTABLE_HR_FLAGS);
    810   rra.flags = (rra.flags & IMMUTABLE_HR_FLAGS) | (lra_flags & ~IMMUTABLE_HR_FLAGS);
    811   std::swap(lra.type, rra.type);
    812   std::swap(lra.reg, rra.reg);
    813   std::swap(lra.counter, rra.counter);
    814 }
    815 
    816 void CPU::NewRec::Compiler::FlushHostReg(u32 reg)
    817 {
    818   HostRegAlloc& ra = m_host_regs[reg];
    819   if (ra.flags & HR_MODE_WRITE)
    820   {
    821     switch (ra.type)
    822     {
    823       case HR_TYPE_CPU_REG:
    824       {
    825         DebugAssert(ra.reg > Reg::zero && ra.reg < Reg::count);
    826         DEBUG_LOG("Flushing register {} in host register {} to state", GetRegName(ra.reg), GetHostRegName(reg));
    827         StoreHostRegToCPUPointer(reg, &g_state.regs.r[static_cast<u8>(ra.reg)]);
    828       }
    829       break;
    830 
    831       case HR_TYPE_LOAD_DELAY_VALUE:
    832       {
    833         DebugAssert(m_load_delay_value_register == reg);
    834         DEBUG_LOG("Flushing load delayed register {} in host register {} to state", GetRegName(ra.reg),
    835                   GetHostRegName(reg));
    836 
    837         StoreHostRegToCPUPointer(reg, &g_state.load_delay_value);
    838         m_load_delay_value_register = NUM_HOST_REGS;
    839       }
    840       break;
    841 
    842       case HR_TYPE_NEXT_LOAD_DELAY_VALUE:
    843       {
    844         DebugAssert(m_next_load_delay_value_register == reg);
    845         WARNING_LOG("Flushing NEXT load delayed register {} in host register {} to state", GetRegName(ra.reg),
    846                     GetHostRegName(reg));
    847 
    848         StoreHostRegToCPUPointer(reg, &g_state.next_load_delay_value);
    849         m_next_load_delay_value_register = NUM_HOST_REGS;
    850       }
    851       break;
    852 
    853       default:
    854         break;
    855     }
    856 
    857     ra.flags = (ra.flags & ~HR_MODE_WRITE) | HR_MODE_READ;
    858   }
    859 }
    860 
    861 void CPU::NewRec::Compiler::FreeHostReg(u32 reg)
    862 {
    863   DebugAssert(IsHostRegAllocated(reg));
    864   DEBUG_LOG("Freeing host register {}", GetHostRegName(reg));
    865   FlushHostReg(reg);
    866   ClearHostReg(reg);
    867 }
    868 
    869 void CPU::NewRec::Compiler::ClearHostReg(u32 reg)
    870 {
    871   HostRegAlloc& ra = m_host_regs[reg];
    872   ra.flags &= IMMUTABLE_HR_FLAGS;
    873   ra.type = HR_TYPE_TEMP;
    874   ra.counter = 0;
    875   ra.reg = Reg::count;
    876 }
    877 
    878 void CPU::NewRec::Compiler::MarkRegsNeeded(HostRegAllocType type, Reg reg)
    879 {
    880   for (u32 i = 0; i < NUM_HOST_REGS; i++)
    881   {
    882     HostRegAlloc& ra = m_host_regs[i];
    883     if (ra.flags & HR_ALLOCATED && ra.type == type && ra.reg == reg)
    884       ra.flags |= HR_NEEDED;
    885   }
    886 }
    887 
    888 void CPU::NewRec::Compiler::RenameHostReg(u32 reg, u32 new_flags, HostRegAllocType new_type, Reg new_reg)
    889 {
    890   // only supported for cpu regs for now
    891   DebugAssert(new_type == HR_TYPE_TEMP || new_type == HR_TYPE_CPU_REG || new_type == HR_TYPE_NEXT_LOAD_DELAY_VALUE);
    892 
    893   const std::optional<u32> old_reg = CheckHostReg(0, new_type, new_reg);
    894   if (old_reg.has_value())
    895   {
    896     // don't writeback
    897     ClearHostReg(old_reg.value());
    898   }
    899 
    900   // kill any load delay to this reg
    901   if (new_type == HR_TYPE_CPU_REG || new_type == HR_TYPE_NEXT_LOAD_DELAY_VALUE)
    902     CancelLoadDelaysToReg(new_reg);
    903 
    904   if (new_type == HR_TYPE_CPU_REG)
    905   {
    906     DEBUG_LOG("Renaming host reg {} to guest reg {}", GetHostRegName(reg), GetRegName(new_reg));
    907   }
    908   else if (new_type == HR_TYPE_NEXT_LOAD_DELAY_VALUE)
    909   {
    910     DEBUG_LOG("Renaming host reg {} to load delayed guest reg {}", GetHostRegName(reg), GetRegName(new_reg));
    911     DebugAssert(m_next_load_delay_register == Reg::count && m_next_load_delay_value_register == NUM_HOST_REGS);
    912     m_next_load_delay_register = new_reg;
    913     m_next_load_delay_value_register = reg;
    914   }
    915   else
    916   {
    917     DEBUG_LOG("Renaming host reg {} to temp", GetHostRegName(reg));
    918   }
    919 
    920   HostRegAlloc& ra = m_host_regs[reg];
    921   ra.flags = (ra.flags & IMMUTABLE_HR_FLAGS) | HR_NEEDED | HR_ALLOCATED | (new_flags & ALLOWED_HR_FLAGS);
    922   ra.counter = m_register_alloc_counter++;
    923   ra.type = new_type;
    924   ra.reg = new_reg;
    925 }
    926 
    927 void CPU::NewRec::Compiler::ClearHostRegNeeded(u32 reg)
    928 {
    929   DebugAssert(reg < NUM_HOST_REGS && IsHostRegAllocated(reg));
    930   HostRegAlloc& ra = m_host_regs[reg];
    931   if (ra.flags & HR_MODE_WRITE)
    932     ra.flags |= HR_MODE_READ;
    933 
    934   ra.flags &= ~HR_NEEDED;
    935 }
    936 
    937 void CPU::NewRec::Compiler::ClearHostRegsNeeded()
    938 {
    939   for (u32 i = 0; i < NUM_HOST_REGS; i++)
    940   {
    941     HostRegAlloc& ra = m_host_regs[i];
    942     if (!(ra.flags & HR_ALLOCATED))
    943       continue;
    944 
    945     // shouldn't have any temps left
    946     DebugAssert(ra.type != HR_TYPE_TEMP);
    947 
    948     if (ra.flags & HR_MODE_WRITE)
    949       ra.flags |= HR_MODE_READ;
    950 
    951     ra.flags &= ~HR_NEEDED;
    952   }
    953 }
    954 
    955 void CPU::NewRec::Compiler::DeleteMIPSReg(Reg reg, bool flush)
    956 {
    957   DebugAssert(reg != Reg::zero);
    958 
    959   for (u32 i = 0; i < NUM_HOST_REGS; i++)
    960   {
    961     HostRegAlloc& ra = m_host_regs[i];
    962     if (ra.flags & HR_ALLOCATED && ra.type == HR_TYPE_CPU_REG && ra.reg == reg)
    963     {
    964       if (flush)
    965         FlushHostReg(i);
    966       ClearHostReg(i);
    967       ClearConstantReg(reg);
    968       return;
    969     }
    970   }
    971 
    972   if (flush)
    973     FlushConstantReg(reg);
    974   ClearConstantReg(reg);
    975 }
    976 
    977 bool CPU::NewRec::Compiler::TryRenameMIPSReg(Reg to, Reg from, u32 fromhost, Reg other)
    978 {
    979   // can't rename when in form Rd = Rs op Rt and Rd == Rs or Rd == Rt
    980   if (to == from || to == other || !iinfo->RenameTest(from))
    981     return false;
    982 
    983   DEBUG_LOG("Renaming MIPS register {} to {}", GetRegName(from), GetRegName(to));
    984 
    985   if (iinfo->LiveTest(from))
    986     FlushHostReg(fromhost);
    987 
    988   // remove all references to renamed-to register
    989   DeleteMIPSReg(to, false);
    990   CancelLoadDelaysToReg(to);
    991 
    992   // and do the actual rename, new register has been modified.
    993   m_host_regs[fromhost].reg = to;
    994   m_host_regs[fromhost].flags |= HR_MODE_READ | HR_MODE_WRITE;
    995   return true;
    996 }
    997 
    998 void CPU::NewRec::Compiler::UpdateHostRegCounters()
    999 {
   1000   const CodeCache::InstructionInfo* const info_end = m_block->InstructionsInfo() + m_block->size;
   1001 
   1002   for (u32 i = 0; i < NUM_HOST_REGS; i++)
   1003   {
   1004     HostRegAlloc& ra = m_host_regs[i];
   1005     if ((ra.flags & (HR_ALLOCATED | HR_NEEDED)) != HR_ALLOCATED)
   1006       continue;
   1007 
   1008     // Try not to punt out load delays.
   1009     if (ra.type != HR_TYPE_CPU_REG)
   1010     {
   1011       ra.counter = std::numeric_limits<u16>::max();
   1012       continue;
   1013     }
   1014 
   1015     DebugAssert(IsHostRegAllocated(i));
   1016     const CodeCache::InstructionInfo* cur = iinfo;
   1017     const Reg reg = ra.reg;
   1018     if (!(cur->reg_flags[static_cast<u8>(reg)] & CodeCache::RI_USED))
   1019     {
   1020       ra.counter = 0;
   1021       continue;
   1022     }
   1023 
   1024     // order based on the number of instructions until this register is used
   1025     u16 counter_val = std::numeric_limits<u16>::max();
   1026     for (; cur != info_end; cur++, counter_val--)
   1027     {
   1028       if (cur->ReadsReg(reg))
   1029         break;
   1030     }
   1031 
   1032     ra.counter = counter_val;
   1033   }
   1034 }
   1035 
   1036 void CPU::NewRec::Compiler::Flush(u32 flags)
   1037 {
   1038   // TODO: Flush unneeded caller-saved regs (backup/replace calle-saved needed with caller-saved)
   1039   if (flags &
   1040       (FLUSH_FREE_UNNEEDED_CALLER_SAVED_REGISTERS | FLUSH_FREE_CALLER_SAVED_REGISTERS | FLUSH_FREE_ALL_REGISTERS))
   1041   {
   1042     const u32 req_mask = (flags & FLUSH_FREE_ALL_REGISTERS) ?
   1043                            HR_ALLOCATED :
   1044                            ((flags & FLUSH_FREE_CALLER_SAVED_REGISTERS) ? (HR_ALLOCATED | HR_CALLEE_SAVED) :
   1045                                                                           (HR_ALLOCATED | HR_CALLEE_SAVED | HR_NEEDED));
   1046     constexpr u32 req_flags = HR_ALLOCATED;
   1047 
   1048     for (u32 i = 0; i < NUM_HOST_REGS; i++)
   1049     {
   1050       HostRegAlloc& ra = m_host_regs[i];
   1051       if ((ra.flags & req_mask) == req_flags)
   1052         FreeHostReg(i);
   1053     }
   1054   }
   1055 
   1056   if (flags & FLUSH_INVALIDATE_MIPS_REGISTERS)
   1057   {
   1058     for (u32 i = 0; i < NUM_HOST_REGS; i++)
   1059     {
   1060       HostRegAlloc& ra = m_host_regs[i];
   1061       if (ra.flags & HR_ALLOCATED && ra.type == HR_TYPE_CPU_REG)
   1062         FreeHostReg(i);
   1063     }
   1064 
   1065     FlushConstantRegs(true);
   1066   }
   1067   else
   1068   {
   1069     if (flags & FLUSH_FLUSH_MIPS_REGISTERS)
   1070     {
   1071       for (u32 i = 0; i < NUM_HOST_REGS; i++)
   1072       {
   1073         HostRegAlloc& ra = m_host_regs[i];
   1074         if ((ra.flags & (HR_ALLOCATED | HR_MODE_WRITE)) == (HR_ALLOCATED | HR_MODE_WRITE) && ra.type == HR_TYPE_CPU_REG)
   1075           FlushHostReg(i);
   1076       }
   1077 
   1078       // flush any constant registers which are dirty too
   1079       FlushConstantRegs(false);
   1080     }
   1081   }
   1082 
   1083   if (flags & FLUSH_INVALIDATE_SPECULATIVE_CONSTANTS)
   1084     InvalidateSpeculativeValues();
   1085 }
   1086 
   1087 void CPU::NewRec::Compiler::FlushConstantReg(Reg r)
   1088 {
   1089   DebugAssert(m_constant_regs_valid.test(static_cast<u32>(r)));
   1090   DEBUG_LOG("Writing back register {} with constant value 0x{:08X}", GetRegName(r),
   1091             m_constant_reg_values[static_cast<u32>(r)]);
   1092   StoreConstantToCPUPointer(m_constant_reg_values[static_cast<u32>(r)], &g_state.regs.r[static_cast<u32>(r)]);
   1093   m_constant_regs_dirty.reset(static_cast<u32>(r));
   1094 }
   1095 
   1096 void CPU::NewRec::Compiler::BackupHostState()
   1097 {
   1098   DebugAssert(m_host_state_backup_count < m_host_state_backup.size());
   1099 
   1100   // need to back up everything...
   1101   HostStateBackup& bu = m_host_state_backup[m_host_state_backup_count];
   1102   bu.cycles = m_cycles;
   1103   bu.gte_done_cycle = m_gte_done_cycle;
   1104   bu.compiler_pc = m_compiler_pc;
   1105   bu.dirty_pc = m_dirty_pc;
   1106   bu.dirty_instruction_bits = m_dirty_instruction_bits;
   1107   bu.dirty_gte_done_cycle = m_dirty_gte_done_cycle;
   1108   bu.block_ended = m_block_ended;
   1109   bu.inst = inst;
   1110   bu.iinfo = iinfo;
   1111   bu.current_instruction_pc = m_current_instruction_pc;
   1112   bu.current_instruction_delay_slot = m_current_instruction_branch_delay_slot;
   1113   bu.const_regs_valid = m_constant_regs_valid;
   1114   bu.const_regs_dirty = m_constant_regs_dirty;
   1115   bu.const_regs_values = m_constant_reg_values;
   1116   bu.host_regs = m_host_regs;
   1117   bu.register_alloc_counter = m_register_alloc_counter;
   1118   bu.load_delay_dirty = m_load_delay_dirty;
   1119   bu.load_delay_register = m_load_delay_register;
   1120   bu.load_delay_value_register = m_load_delay_value_register;
   1121   bu.next_load_delay_register = m_next_load_delay_register;
   1122   bu.next_load_delay_value_register = m_next_load_delay_value_register;
   1123   m_host_state_backup_count++;
   1124 }
   1125 
   1126 void CPU::NewRec::Compiler::RestoreHostState()
   1127 {
   1128   DebugAssert(m_host_state_backup_count > 0);
   1129   m_host_state_backup_count--;
   1130 
   1131   HostStateBackup& bu = m_host_state_backup[m_host_state_backup_count];
   1132   m_host_regs = std::move(bu.host_regs);
   1133   m_constant_reg_values = std::move(bu.const_regs_values);
   1134   m_constant_regs_dirty = std::move(bu.const_regs_dirty);
   1135   m_constant_regs_valid = std::move(bu.const_regs_valid);
   1136   m_current_instruction_branch_delay_slot = bu.current_instruction_delay_slot;
   1137   m_current_instruction_pc = bu.current_instruction_pc;
   1138   inst = bu.inst;
   1139   iinfo = bu.iinfo;
   1140   m_block_ended = bu.block_ended;
   1141   m_dirty_gte_done_cycle = bu.dirty_gte_done_cycle;
   1142   m_dirty_instruction_bits = bu.dirty_instruction_bits;
   1143   m_dirty_pc = bu.dirty_pc;
   1144   m_compiler_pc = bu.compiler_pc;
   1145   m_register_alloc_counter = bu.register_alloc_counter;
   1146   m_load_delay_dirty = bu.load_delay_dirty;
   1147   m_load_delay_register = bu.load_delay_register;
   1148   m_load_delay_value_register = bu.load_delay_value_register;
   1149   m_next_load_delay_register = bu.next_load_delay_register;
   1150   m_next_load_delay_value_register = bu.next_load_delay_value_register;
   1151   m_gte_done_cycle = bu.gte_done_cycle;
   1152   m_cycles = bu.cycles;
   1153 }
   1154 
   1155 void CPU::NewRec::Compiler::AddLoadStoreInfo(void* code_address, u32 code_size, u32 address_register, u32 data_register,
   1156                                              MemoryAccessSize size, bool is_signed, bool is_load)
   1157 {
   1158   DebugAssert(CodeCache::IsUsingFastmem());
   1159   DebugAssert(address_register < NUM_HOST_REGS);
   1160   DebugAssert(data_register < NUM_HOST_REGS);
   1161 
   1162   u32 gpr_bitmask = 0;
   1163   for (u32 i = 0; i < NUM_HOST_REGS; i++)
   1164   {
   1165     if (IsHostRegAllocated(i))
   1166       gpr_bitmask |= (1u << i);
   1167   }
   1168 
   1169   CPU::CodeCache::AddLoadStoreInfo(code_address, code_size, m_current_instruction_pc, m_block->pc, m_cycles,
   1170                                    gpr_bitmask, static_cast<u8>(address_register), static_cast<u8>(data_register), size,
   1171                                    is_signed, is_load);
   1172 }
   1173 
   1174 void CPU::NewRec::Compiler::CompileInstruction()
   1175 {
   1176 #ifdef _DEBUG
   1177   TinyString str;
   1178   DisassembleInstruction(&str, m_current_instruction_pc, inst->bits);
   1179   DEBUG_LOG("Compiling{} {:08X}: {}", m_current_instruction_branch_delay_slot ? " branch delay slot" : "",
   1180             m_current_instruction_pc, str);
   1181 #endif
   1182 
   1183   m_cycles++;
   1184 
   1185   if (IsNopInstruction(*inst))
   1186   {
   1187     UpdateLoadDelay();
   1188     return;
   1189   }
   1190 
   1191   switch (inst->op)
   1192   {
   1193 #define PGXPFN(x) reinterpret_cast<const void*>(&PGXP::x)
   1194 
   1195       // clang-format off
   1196       // TODO: PGXP for jalr
   1197 
   1198     case InstructionOp::funct:
   1199     {
   1200       switch (inst->r.funct)
   1201       {
   1202         case InstructionFunct::sll: CompileTemplate(&Compiler::Compile_sll_const, &Compiler::Compile_sll, PGXPFN(CPU_SLL), TF_WRITES_D | TF_READS_T); SpecExec_sll(); break;
   1203         case InstructionFunct::srl: CompileTemplate(&Compiler::Compile_srl_const, &Compiler::Compile_srl, PGXPFN(CPU_SRL), TF_WRITES_D | TF_READS_T); SpecExec_srl(); break;
   1204         case InstructionFunct::sra: CompileTemplate(&Compiler::Compile_sra_const, &Compiler::Compile_sra, PGXPFN(CPU_SRA), TF_WRITES_D | TF_READS_T); SpecExec_sra(); break;
   1205         case InstructionFunct::sllv: CompileTemplate(&Compiler::Compile_sllv_const, &Compiler::Compile_sllv, PGXPFN(CPU_SLLV), TF_WRITES_D | TF_READS_S | TF_READS_T); SpecExec_sllv(); break;
   1206         case InstructionFunct::srlv: CompileTemplate(&Compiler::Compile_srlv_const, &Compiler::Compile_srlv, PGXPFN(CPU_SRLV), TF_WRITES_D | TF_READS_S | TF_READS_T); SpecExec_srlv(); break;
   1207         case InstructionFunct::srav: CompileTemplate(&Compiler::Compile_srav_const, &Compiler::Compile_srav, PGXPFN(CPU_SRAV), TF_WRITES_D | TF_READS_S | TF_READS_T); SpecExec_srav(); break;
   1208         case InstructionFunct::jr: CompileTemplate(&Compiler::Compile_jr_const, &Compiler::Compile_jr, nullptr, TF_READS_S); break;
   1209         case InstructionFunct::jalr: CompileTemplate(&Compiler::Compile_jalr_const, &Compiler::Compile_jalr, nullptr, /*TF_WRITES_D |*/ TF_READS_S | TF_NO_NOP); SpecExec_jalr(); break;
   1210         case InstructionFunct::syscall: Compile_syscall(); break;
   1211         case InstructionFunct::break_: Compile_break(); break;
   1212         case InstructionFunct::mfhi: SpecCopyReg(inst->r.rd, Reg::hi); CompileMoveRegTemplate(inst->r.rd, Reg::hi, g_settings.gpu_pgxp_cpu); break;
   1213         case InstructionFunct::mthi: SpecCopyReg(Reg::hi, inst->r.rs); CompileMoveRegTemplate(Reg::hi, inst->r.rs, g_settings.gpu_pgxp_cpu); break;
   1214         case InstructionFunct::mflo: SpecCopyReg(inst->r.rd, Reg::lo); CompileMoveRegTemplate(inst->r.rd, Reg::lo, g_settings.gpu_pgxp_cpu); break;
   1215         case InstructionFunct::mtlo: SpecCopyReg(Reg::lo, inst->r.rs); CompileMoveRegTemplate(Reg::lo, inst->r.rs, g_settings.gpu_pgxp_cpu); break;
   1216         case InstructionFunct::mult: CompileTemplate(&Compiler::Compile_mult_const, &Compiler::Compile_mult, PGXPFN(CPU_MULT), TF_READS_S | TF_READS_T | TF_WRITES_LO | TF_WRITES_HI | TF_COMMUTATIVE); SpecExec_mult(); break;
   1217         case InstructionFunct::multu: CompileTemplate(&Compiler::Compile_multu_const, &Compiler::Compile_multu, PGXPFN(CPU_MULTU), TF_READS_S | TF_READS_T | TF_WRITES_LO | TF_WRITES_HI | TF_COMMUTATIVE); SpecExec_multu(); break;
   1218         case InstructionFunct::div: CompileTemplate(&Compiler::Compile_div_const, &Compiler::Compile_div, PGXPFN(CPU_DIV), TF_READS_S | TF_READS_T | TF_WRITES_LO | TF_WRITES_HI); SpecExec_div(); break;
   1219         case InstructionFunct::divu: CompileTemplate(&Compiler::Compile_divu_const, &Compiler::Compile_divu, PGXPFN(CPU_DIVU), TF_READS_S | TF_READS_T | TF_WRITES_LO | TF_WRITES_HI); SpecExec_divu(); break;
   1220         case InstructionFunct::add: CompileTemplate(&Compiler::Compile_add_const, &Compiler::Compile_add, PGXPFN(CPU_ADD), TF_WRITES_D | TF_READS_S | TF_READS_T | TF_COMMUTATIVE | TF_CAN_OVERFLOW | TF_RENAME_WITH_ZERO_T); SpecExec_add(); break;
   1221         case InstructionFunct::addu: CompileTemplate(&Compiler::Compile_addu_const, &Compiler::Compile_addu, PGXPFN(CPU_ADD), TF_WRITES_D | TF_READS_S | TF_READS_T | TF_COMMUTATIVE | TF_RENAME_WITH_ZERO_T); SpecExec_addu(); break;
   1222         case InstructionFunct::sub: CompileTemplate(&Compiler::Compile_sub_const, &Compiler::Compile_sub, PGXPFN(CPU_SUB), TF_WRITES_D | TF_READS_S | TF_READS_T | TF_CAN_OVERFLOW | TF_RENAME_WITH_ZERO_T); SpecExec_sub(); break;
   1223         case InstructionFunct::subu: CompileTemplate(&Compiler::Compile_subu_const, &Compiler::Compile_subu, PGXPFN(CPU_SUB), TF_WRITES_D | TF_READS_S | TF_READS_T | TF_RENAME_WITH_ZERO_T); SpecExec_subu(); break;
   1224         case InstructionFunct::and_: CompileTemplate(&Compiler::Compile_and_const, &Compiler::Compile_and, PGXPFN(CPU_AND_), TF_WRITES_D | TF_READS_S | TF_READS_T | TF_COMMUTATIVE); SpecExec_and(); break;
   1225         case InstructionFunct::or_: CompileTemplate(&Compiler::Compile_or_const, &Compiler::Compile_or, PGXPFN(CPU_OR_), TF_WRITES_D | TF_READS_S | TF_READS_T | TF_COMMUTATIVE | TF_RENAME_WITH_ZERO_T); SpecExec_or(); break;
   1226         case InstructionFunct::xor_: CompileTemplate(&Compiler::Compile_xor_const, &Compiler::Compile_xor, PGXPFN(CPU_XOR_), TF_WRITES_D | TF_READS_S | TF_READS_T | TF_COMMUTATIVE | TF_RENAME_WITH_ZERO_T); SpecExec_xor(); break;
   1227         case InstructionFunct::nor: CompileTemplate(&Compiler::Compile_nor_const, &Compiler::Compile_nor, PGXPFN(CPU_NOR), TF_WRITES_D | TF_READS_S | TF_READS_T | TF_COMMUTATIVE); SpecExec_nor(); break;
   1228         case InstructionFunct::slt: CompileTemplate(&Compiler::Compile_slt_const, &Compiler::Compile_slt, PGXPFN(CPU_SLT), TF_WRITES_D | TF_READS_T | TF_READS_S); SpecExec_slt(); break;
   1229         case InstructionFunct::sltu: CompileTemplate(&Compiler::Compile_sltu_const, &Compiler::Compile_sltu, PGXPFN(CPU_SLTU), TF_WRITES_D | TF_READS_T | TF_READS_S); SpecExec_sltu(); break;
   1230         default: Compile_Fallback(); InvalidateSpeculativeValues(); TruncateBlock(); break;
   1231       }
   1232     }
   1233     break;
   1234 
   1235     case InstructionOp::j: Compile_j(); break;
   1236     case InstructionOp::jal: Compile_jal(); SpecExec_jal(); break;
   1237 
   1238     case InstructionOp::b: CompileTemplate(&Compiler::Compile_b_const, &Compiler::Compile_b, nullptr, TF_READS_S | TF_CAN_SWAP_DELAY_SLOT); SpecExec_b(); break;
   1239     case InstructionOp::blez: CompileTemplate(&Compiler::Compile_blez_const, &Compiler::Compile_blez, nullptr, TF_READS_S | TF_CAN_SWAP_DELAY_SLOT); break;
   1240     case InstructionOp::bgtz: CompileTemplate(&Compiler::Compile_bgtz_const, &Compiler::Compile_bgtz, nullptr, TF_READS_S | TF_CAN_SWAP_DELAY_SLOT); break;
   1241     case InstructionOp::beq: CompileTemplate(&Compiler::Compile_beq_const, &Compiler::Compile_beq, nullptr, TF_READS_S | TF_READS_T | TF_COMMUTATIVE | TF_CAN_SWAP_DELAY_SLOT); break;
   1242     case InstructionOp::bne: CompileTemplate(&Compiler::Compile_bne_const, &Compiler::Compile_bne, nullptr, TF_READS_S | TF_READS_T | TF_COMMUTATIVE | TF_CAN_SWAP_DELAY_SLOT); break;
   1243 
   1244     case InstructionOp::addi: CompileTemplate(&Compiler::Compile_addi_const, &Compiler::Compile_addi, PGXPFN(CPU_ADDI), TF_WRITES_T | TF_READS_S | TF_COMMUTATIVE | TF_CAN_OVERFLOW | TF_RENAME_WITH_ZERO_IMM); SpecExec_addi(); break;
   1245     case InstructionOp::addiu: CompileTemplate(&Compiler::Compile_addiu_const, &Compiler::Compile_addiu, PGXPFN(CPU_ADDI), TF_WRITES_T | TF_READS_S | TF_COMMUTATIVE | TF_RENAME_WITH_ZERO_IMM); SpecExec_addiu(); break;
   1246     case InstructionOp::slti: CompileTemplate(&Compiler::Compile_slti_const, &Compiler::Compile_slti, PGXPFN(CPU_SLTI), TF_WRITES_T | TF_READS_S); SpecExec_slti(); break;
   1247     case InstructionOp::sltiu: CompileTemplate(&Compiler::Compile_sltiu_const, &Compiler::Compile_sltiu, PGXPFN(CPU_SLTIU), TF_WRITES_T | TF_READS_S); SpecExec_sltiu(); break;
   1248     case InstructionOp::andi: CompileTemplate(&Compiler::Compile_andi_const, &Compiler::Compile_andi, PGXPFN(CPU_ANDI), TF_WRITES_T | TF_READS_S | TF_COMMUTATIVE); SpecExec_andi(); break;
   1249     case InstructionOp::ori: CompileTemplate(&Compiler::Compile_ori_const, &Compiler::Compile_ori, PGXPFN(CPU_ORI), TF_WRITES_T | TF_READS_S | TF_COMMUTATIVE | TF_RENAME_WITH_ZERO_IMM); SpecExec_ori(); break;
   1250     case InstructionOp::xori: CompileTemplate(&Compiler::Compile_xori_const, &Compiler::Compile_xori, PGXPFN(CPU_XORI), TF_WRITES_T | TF_READS_S | TF_COMMUTATIVE | TF_RENAME_WITH_ZERO_IMM); SpecExec_xori(); break;
   1251     case InstructionOp::lui: Compile_lui(); SpecExec_lui(); break;
   1252 
   1253     case InstructionOp::lb: CompileLoadStoreTemplate(&Compiler::Compile_lxx, MemoryAccessSize::Byte, false, true, TF_READS_S | TF_WRITES_T | TF_LOAD_DELAY); SpecExec_lxx(MemoryAccessSize::Byte, true); break;
   1254     case InstructionOp::lbu: CompileLoadStoreTemplate(&Compiler::Compile_lxx, MemoryAccessSize::Byte, false, false, TF_READS_S | TF_WRITES_T | TF_LOAD_DELAY); SpecExec_lxx(MemoryAccessSize::Byte, false); break;
   1255     case InstructionOp::lh: CompileLoadStoreTemplate(&Compiler::Compile_lxx, MemoryAccessSize::HalfWord, false, true, TF_READS_S | TF_WRITES_T | TF_LOAD_DELAY); SpecExec_lxx(MemoryAccessSize::HalfWord, true); break;
   1256     case InstructionOp::lhu: CompileLoadStoreTemplate(&Compiler::Compile_lxx, MemoryAccessSize::HalfWord, false, false, TF_READS_S | TF_WRITES_T | TF_LOAD_DELAY); SpecExec_lxx(MemoryAccessSize::HalfWord, false); break;
   1257     case InstructionOp::lw: CompileLoadStoreTemplate(&Compiler::Compile_lxx, MemoryAccessSize::Word, false, false, TF_READS_S | TF_WRITES_T | TF_LOAD_DELAY); SpecExec_lxx(MemoryAccessSize::Word, false); break;
   1258     case InstructionOp::lwl: CompileLoadStoreTemplate(&Compiler::Compile_lwx, MemoryAccessSize::Word, false, false, TF_READS_S | /*TF_READS_T | TF_WRITES_T | */TF_LOAD_DELAY); SpecExec_lwx(false); break;
   1259     case InstructionOp::lwr: CompileLoadStoreTemplate(&Compiler::Compile_lwx, MemoryAccessSize::Word, false, false, TF_READS_S | /*TF_READS_T | TF_WRITES_T | */TF_LOAD_DELAY); SpecExec_lwx(true); break;
   1260     case InstructionOp::sb: CompileLoadStoreTemplate(&Compiler::Compile_sxx, MemoryAccessSize::Byte, true, false, TF_READS_S | TF_READS_T); SpecExec_sxx(MemoryAccessSize::Byte); break;
   1261     case InstructionOp::sh: CompileLoadStoreTemplate(&Compiler::Compile_sxx, MemoryAccessSize::HalfWord, true, false, TF_READS_S | TF_READS_T); SpecExec_sxx(MemoryAccessSize::HalfWord); break;
   1262     case InstructionOp::sw: CompileLoadStoreTemplate(&Compiler::Compile_sxx, MemoryAccessSize::Word, true, false, TF_READS_S | TF_READS_T); SpecExec_sxx(MemoryAccessSize::Word); break;
   1263     case InstructionOp::swl: CompileLoadStoreTemplate(&Compiler::Compile_swx, MemoryAccessSize::Word, false, false, TF_READS_S /*| TF_READS_T*/); SpecExec_swx(false); break;
   1264     case InstructionOp::swr: CompileLoadStoreTemplate(&Compiler::Compile_swx, MemoryAccessSize::Word, false, false, TF_READS_S /*| TF_READS_T*/); SpecExec_swx(true); break;
   1265 
   1266     case InstructionOp::cop0:
   1267       {
   1268         if (inst->cop.IsCommonInstruction())
   1269         {
   1270           switch (inst->cop.CommonOp())
   1271           {
   1272             case CopCommonInstruction::mfcn: if (inst->r.rt != Reg::zero) { CompileTemplate(nullptr, &Compiler::Compile_mfc0, PGXPFN(CPU_MFC0), TF_WRITES_T | TF_LOAD_DELAY); } SpecExec_mfc0(); break;
   1273             case CopCommonInstruction::mtcn: CompileTemplate(nullptr, &Compiler::Compile_mtc0, PGXPFN(CPU_MTC0), TF_READS_T); SpecExec_mtc0(); break;
   1274             default: Compile_Fallback(); break;
   1275           }
   1276         }
   1277         else
   1278         {
   1279           switch (inst->cop.Cop0Op())
   1280           {
   1281             case Cop0Instruction::rfe: CompileTemplate(nullptr, &Compiler::Compile_rfe, nullptr, 0); SpecExec_rfe(); break;
   1282             default: Compile_Fallback(); break;
   1283           }
   1284         }
   1285       }
   1286       break;
   1287 
   1288     case InstructionOp::cop2:
   1289       {
   1290         if (inst->cop.IsCommonInstruction())
   1291         {
   1292           switch (inst->cop.CommonOp())
   1293           {
   1294             case CopCommonInstruction::mfcn: if (inst->r.rt != Reg::zero) { CompileTemplate(nullptr, &Compiler::Compile_mfc2, nullptr, TF_GTE_STALL); } break;
   1295             case CopCommonInstruction::cfcn: if (inst->r.rt != Reg::zero) { CompileTemplate(nullptr, &Compiler::Compile_mfc2, nullptr, TF_GTE_STALL); } break;
   1296             case CopCommonInstruction::mtcn: CompileTemplate(nullptr, &Compiler::Compile_mtc2, PGXPFN(CPU_MTC2), TF_GTE_STALL | TF_READS_T | TF_PGXP_WITHOUT_CPU); break;
   1297             case CopCommonInstruction::ctcn: CompileTemplate(nullptr, &Compiler::Compile_mtc2, PGXPFN(CPU_MTC2), TF_GTE_STALL | TF_READS_T | TF_PGXP_WITHOUT_CPU); break;
   1298             default: Compile_Fallback(); break;
   1299           }
   1300         }
   1301         else
   1302         {
   1303           // GTE ops
   1304           CompileTemplate(nullptr, &Compiler::Compile_cop2, nullptr, TF_GTE_STALL);
   1305         }
   1306       }
   1307       break;
   1308 
   1309     case InstructionOp::lwc2: CompileLoadStoreTemplate(&Compiler::Compile_lwc2, MemoryAccessSize::Word, false, false, TF_GTE_STALL | TF_READS_S | TF_LOAD_DELAY); break;
   1310     case InstructionOp::swc2: CompileLoadStoreTemplate(&Compiler::Compile_swc2, MemoryAccessSize::Word, true, false, TF_GTE_STALL | TF_READS_S); SpecExec_swc2(); break;
   1311 
   1312       // swc0/lwc0/cop1/cop3 are essentially no-ops
   1313     case InstructionOp::cop1:
   1314     case InstructionOp::cop3:
   1315     case InstructionOp::lwc0:
   1316     case InstructionOp::lwc1:
   1317     case InstructionOp::lwc3:
   1318     case InstructionOp::swc0:
   1319     case InstructionOp::swc1:
   1320     case InstructionOp::swc3:
   1321       break;
   1322 
   1323     default: Compile_Fallback(); InvalidateSpeculativeValues(); TruncateBlock(); break;
   1324       // clang-format on
   1325 
   1326 #undef PGXPFN
   1327   }
   1328 
   1329   ClearHostRegsNeeded();
   1330   UpdateLoadDelay();
   1331 
   1332 #if 0
   1333   const void* end = GetCurrentCodePointer();
   1334   if (start != end && !m_current_instruction_branch_delay_slot)
   1335   {
   1336     CodeCache::DisassembleAndLogHostCode(start,
   1337                                          static_cast<u32>(static_cast<const u8*>(end) - static_cast<const u8*>(start)));
   1338   }
   1339 #endif
   1340 }
   1341 
   1342 void CPU::NewRec::Compiler::CompileBranchDelaySlot(bool dirty_pc /* = true */)
   1343 {
   1344   // Update load delay at the end of the previous instruction.
   1345   UpdateLoadDelay();
   1346 
   1347   // Don't need the branch instruction's inputs.
   1348   ClearHostRegsNeeded();
   1349 
   1350   // TODO: Move cycle add before this.
   1351   inst++;
   1352   iinfo++;
   1353   m_current_instruction_pc += sizeof(Instruction);
   1354   m_current_instruction_branch_delay_slot = true;
   1355   m_compiler_pc += sizeof(Instruction);
   1356   m_dirty_pc = dirty_pc;
   1357   m_dirty_instruction_bits = true;
   1358 
   1359   CompileInstruction();
   1360 
   1361   m_current_instruction_branch_delay_slot = false;
   1362 }
   1363 
   1364 void CPU::NewRec::Compiler::CompileTemplate(void (Compiler::*const_func)(CompileFlags),
   1365                                             void (Compiler::*func)(CompileFlags), const void* pgxp_cpu_func, u32 tflags)
   1366 {
   1367   // TODO: This is where we will do memory operand optimization. Remember to kill constants!
   1368   // TODO: Swap S and T if commutative
   1369   // TODO: For and, treat as zeroing if imm is zero
   1370   // TODO: Optimize slt + bne to cmp + jump
   1371   // TODO: Prefer memory operands when load delay is dirty, since we're going to invalidate immediately after the first
   1372   // instruction..
   1373   // TODO: andi with zero -> zero const
   1374   // TODO: load constant so it can be flushed if it's not overwritten later
   1375   // TODO: inline PGXP ops.
   1376   // TODO: don't rename on sltu.
   1377 
   1378   bool allow_constant = static_cast<bool>(const_func);
   1379   Reg rs = inst->r.rs.GetValue();
   1380   Reg rt = inst->r.rt.GetValue();
   1381   Reg rd = inst->r.rd.GetValue();
   1382 
   1383   if (tflags & TF_GTE_STALL)
   1384     StallUntilGTEComplete();
   1385 
   1386   // throw away instructions writing to $zero
   1387   if (!(tflags & TF_NO_NOP) && (!g_settings.cpu_recompiler_memory_exceptions || !(tflags & TF_CAN_OVERFLOW)) &&
   1388       ((tflags & TF_WRITES_T && rt == Reg::zero) || (tflags & TF_WRITES_D && rd == Reg::zero)))
   1389   {
   1390     DEBUG_LOG("Skipping instruction because it writes to zero");
   1391     return;
   1392   }
   1393 
   1394   // handle rename operations
   1395   if ((tflags & TF_RENAME_WITH_ZERO_T && HasConstantRegValue(rt, 0)))
   1396   {
   1397     DebugAssert((tflags & (TF_WRITES_D | TF_READS_S | TF_READS_T)) == (TF_WRITES_D | TF_READS_S | TF_READS_T));
   1398     CompileMoveRegTemplate(rd, rs, true);
   1399     return;
   1400   }
   1401   else if ((tflags & (TF_RENAME_WITH_ZERO_T | TF_COMMUTATIVE)) == (TF_RENAME_WITH_ZERO_T | TF_COMMUTATIVE) &&
   1402            HasConstantRegValue(rs, 0))
   1403   {
   1404     DebugAssert((tflags & (TF_WRITES_D | TF_READS_S | TF_READS_T)) == (TF_WRITES_D | TF_READS_S | TF_READS_T));
   1405     CompileMoveRegTemplate(rd, rt, true);
   1406     return;
   1407   }
   1408   else if (tflags & TF_RENAME_WITH_ZERO_IMM && inst->i.imm == 0)
   1409   {
   1410     CompileMoveRegTemplate(rt, rs, true);
   1411     return;
   1412   }
   1413 
   1414   if (pgxp_cpu_func && g_settings.gpu_pgxp_enable && ((tflags & TF_PGXP_WITHOUT_CPU) || g_settings.UsingPGXPCPUMode()))
   1415   {
   1416     std::array<Reg, 2> reg_args = {{Reg::count, Reg::count}};
   1417     u32 num_reg_args = 0;
   1418     if (tflags & TF_READS_S)
   1419       reg_args[num_reg_args++] = rs;
   1420     if (tflags & TF_READS_T)
   1421       reg_args[num_reg_args++] = rt;
   1422     if (tflags & TF_READS_LO)
   1423       reg_args[num_reg_args++] = Reg::lo;
   1424     if (tflags & TF_READS_HI)
   1425       reg_args[num_reg_args++] = Reg::hi;
   1426 
   1427     DebugAssert(num_reg_args <= 2);
   1428     GeneratePGXPCallWithMIPSRegs(pgxp_cpu_func, inst->bits, reg_args[0], reg_args[1]);
   1429   }
   1430 
   1431   // if it's a commutative op, and we have one constant reg but not the other, swap them
   1432   // TODO: make it swap when writing to T as well
   1433   // TODO: drop the hack for rd == rt
   1434   if (tflags & TF_COMMUTATIVE && !(tflags & TF_WRITES_T) &&
   1435       ((HasConstantReg(rs) && !HasConstantReg(rt)) || (tflags & TF_WRITES_D && rd == rt)))
   1436   {
   1437     DEBUG_LOG("Swapping S:{} and T:{} due to commutative op and constants", GetRegName(rs), GetRegName(rt));
   1438     std::swap(rs, rt);
   1439   }
   1440 
   1441   CompileFlags cf = {};
   1442 
   1443   if (tflags & TF_READS_S)
   1444   {
   1445     MarkRegsNeeded(HR_TYPE_CPU_REG, rs);
   1446     if (HasConstantReg(rs))
   1447       cf.const_s = true;
   1448     else
   1449       allow_constant = false;
   1450   }
   1451   if (tflags & TF_READS_T)
   1452   {
   1453     MarkRegsNeeded(HR_TYPE_CPU_REG, rt);
   1454     if (HasConstantReg(rt))
   1455       cf.const_t = true;
   1456     else
   1457       allow_constant = false;
   1458   }
   1459   if (tflags & TF_READS_LO)
   1460   {
   1461     MarkRegsNeeded(HR_TYPE_CPU_REG, Reg::lo);
   1462     if (HasConstantReg(Reg::lo))
   1463       cf.const_lo = true;
   1464     else
   1465       allow_constant = false;
   1466   }
   1467   if (tflags & TF_READS_HI)
   1468   {
   1469     MarkRegsNeeded(HR_TYPE_CPU_REG, Reg::hi);
   1470     if (HasConstantReg(Reg::hi))
   1471       cf.const_hi = true;
   1472     else
   1473       allow_constant = false;
   1474   }
   1475 
   1476   // Needed because of potential swapping
   1477   if (tflags & TF_READS_S)
   1478     cf.mips_s = static_cast<u8>(rs);
   1479   if (tflags & (TF_READS_T | TF_WRITES_T))
   1480     cf.mips_t = static_cast<u8>(rt);
   1481 
   1482   if (allow_constant)
   1483   {
   1484     // woot, constant path
   1485     (this->*const_func)(cf);
   1486     return;
   1487   }
   1488 
   1489   UpdateHostRegCounters();
   1490 
   1491   if (tflags & TF_CAN_SWAP_DELAY_SLOT && TrySwapDelaySlot(cf.MipsS(), cf.MipsT()))
   1492   {
   1493     // CompileBranchDelaySlot() clears needed, so need to reset.
   1494     cf.delay_slot_swapped = true;
   1495     if (tflags & TF_READS_S)
   1496       MarkRegsNeeded(HR_TYPE_CPU_REG, rs);
   1497     if (tflags & TF_READS_T)
   1498       MarkRegsNeeded(HR_TYPE_CPU_REG, rt);
   1499     if (tflags & TF_READS_LO)
   1500       MarkRegsNeeded(HR_TYPE_CPU_REG, Reg::lo);
   1501     if (tflags & TF_READS_HI)
   1502       MarkRegsNeeded(HR_TYPE_CPU_REG, Reg::hi);
   1503   }
   1504 
   1505   if (tflags & TF_READS_S &&
   1506       (tflags & TF_NEEDS_REG_S || !cf.const_s || (tflags & TF_WRITES_D && rd != Reg::zero && rd == rs)))
   1507   {
   1508     cf.host_s = AllocateHostReg(HR_MODE_READ, HR_TYPE_CPU_REG, rs);
   1509     cf.const_s = false;
   1510     cf.valid_host_s = true;
   1511   }
   1512 
   1513   if (tflags & TF_READS_T &&
   1514       (tflags & (TF_NEEDS_REG_T | TF_WRITES_T) || !cf.const_t || (tflags & TF_WRITES_D && rd != Reg::zero && rd == rt)))
   1515   {
   1516     cf.host_t = AllocateHostReg(HR_MODE_READ, HR_TYPE_CPU_REG, rt);
   1517     cf.const_t = false;
   1518     cf.valid_host_t = true;
   1519   }
   1520 
   1521   if (tflags & (TF_READS_LO | TF_WRITES_LO))
   1522   {
   1523     cf.host_lo =
   1524       AllocateHostReg(((tflags & TF_READS_LO) ? HR_MODE_READ : 0u) | ((tflags & TF_WRITES_LO) ? HR_MODE_WRITE : 0u),
   1525                       HR_TYPE_CPU_REG, Reg::lo);
   1526     cf.const_lo = false;
   1527     cf.valid_host_lo = true;
   1528   }
   1529 
   1530   if (tflags & (TF_READS_HI | TF_WRITES_HI))
   1531   {
   1532     cf.host_hi =
   1533       AllocateHostReg(((tflags & TF_READS_HI) ? HR_MODE_READ : 0u) | ((tflags & TF_WRITES_HI) ? HR_MODE_WRITE : 0u),
   1534                       HR_TYPE_CPU_REG, Reg::hi);
   1535     cf.const_hi = false;
   1536     cf.valid_host_hi = true;
   1537   }
   1538 
   1539   const HostRegAllocType write_type =
   1540     (tflags & TF_LOAD_DELAY && EMULATE_LOAD_DELAYS) ? HR_TYPE_NEXT_LOAD_DELAY_VALUE : HR_TYPE_CPU_REG;
   1541 
   1542   if (tflags & TF_CAN_OVERFLOW && g_settings.cpu_recompiler_memory_exceptions)
   1543   {
   1544     // allocate a temp register for the result, then swap it back
   1545     const u32 tempreg = AllocateHostReg(0, HR_TYPE_TEMP);
   1546     ;
   1547     if (tflags & TF_WRITES_D)
   1548     {
   1549       cf.host_d = tempreg;
   1550       cf.valid_host_d = true;
   1551     }
   1552     else if (tflags & TF_WRITES_T)
   1553     {
   1554       cf.host_t = tempreg;
   1555       cf.valid_host_t = true;
   1556     }
   1557 
   1558     (this->*func)(cf);
   1559 
   1560     if (tflags & TF_WRITES_D && rd != Reg::zero)
   1561     {
   1562       DeleteMIPSReg(rd, false);
   1563       RenameHostReg(tempreg, HR_MODE_WRITE, write_type, rd);
   1564     }
   1565     else if (tflags & TF_WRITES_T && rt != Reg::zero)
   1566     {
   1567       DeleteMIPSReg(rt, false);
   1568       RenameHostReg(tempreg, HR_MODE_WRITE, write_type, rt);
   1569     }
   1570     else
   1571     {
   1572       FreeHostReg(tempreg);
   1573     }
   1574   }
   1575   else
   1576   {
   1577     if (tflags & TF_WRITES_D && rd != Reg::zero)
   1578     {
   1579       if (tflags & TF_READS_S && cf.valid_host_s && TryRenameMIPSReg(rd, rs, cf.host_s, Reg::count))
   1580         cf.host_d = cf.host_s;
   1581       else
   1582         cf.host_d = AllocateHostReg(HR_MODE_WRITE, write_type, rd);
   1583       cf.valid_host_d = true;
   1584     }
   1585 
   1586     if (tflags & TF_WRITES_T && rt != Reg::zero)
   1587     {
   1588       if (tflags & TF_READS_S && cf.valid_host_s && TryRenameMIPSReg(rt, rs, cf.host_s, Reg::count))
   1589         cf.host_t = cf.host_s;
   1590       else
   1591         cf.host_t = AllocateHostReg(HR_MODE_WRITE, write_type, rt);
   1592       cf.valid_host_t = true;
   1593     }
   1594 
   1595     (this->*func)(cf);
   1596   }
   1597 }
   1598 
   1599 void CPU::NewRec::Compiler::CompileLoadStoreTemplate(void (Compiler::*func)(CompileFlags, MemoryAccessSize, bool, bool,
   1600                                                                             const std::optional<VirtualMemoryAddress>&),
   1601                                                      MemoryAccessSize size, bool store, bool sign, u32 tflags)
   1602 {
   1603   const Reg rs = inst->i.rs;
   1604   const Reg rt = inst->i.rt;
   1605 
   1606   if (tflags & TF_GTE_STALL)
   1607     StallUntilGTEComplete();
   1608 
   1609   CompileFlags cf = {};
   1610 
   1611   if (tflags & TF_READS_S)
   1612   {
   1613     MarkRegsNeeded(HR_TYPE_CPU_REG, rs);
   1614     cf.mips_s = static_cast<u8>(rs);
   1615   }
   1616   if (tflags & (TF_READS_T | TF_WRITES_T))
   1617   {
   1618     if (tflags & TF_READS_T)
   1619       MarkRegsNeeded(HR_TYPE_CPU_REG, rt);
   1620     cf.mips_t = static_cast<u8>(rt);
   1621   }
   1622 
   1623   UpdateHostRegCounters();
   1624 
   1625   // constant address?
   1626   std::optional<VirtualMemoryAddress> addr;
   1627   std::optional<VirtualMemoryAddress> spec_addr;
   1628   bool use_fastmem = CodeCache::IsUsingFastmem() && !g_settings.cpu_recompiler_memory_exceptions &&
   1629                      !SpecIsCacheIsolated() && !CodeCache::HasPreviouslyFaultedOnPC(m_current_instruction_pc);
   1630   if (HasConstantReg(rs))
   1631   {
   1632     addr = GetConstantRegU32(rs) + inst->i.imm_sext32();
   1633     spec_addr = addr;
   1634     cf.const_s = true;
   1635 
   1636     if (!Bus::CanUseFastmemForAddress(addr.value()))
   1637     {
   1638       DEBUG_LOG("Not using fastmem for {:08X}", addr.value());
   1639       use_fastmem = false;
   1640     }
   1641   }
   1642   else
   1643   {
   1644     spec_addr = SpecExec_LoadStoreAddr();
   1645     if (use_fastmem && spec_addr.has_value() && !Bus::CanUseFastmemForAddress(spec_addr.value()))
   1646     {
   1647       DEBUG_LOG("Not using fastmem for speculative {:08X}", spec_addr.value());
   1648       use_fastmem = false;
   1649     }
   1650 
   1651     if constexpr (HAS_MEMORY_OPERANDS)
   1652     {
   1653       // don't bother caching it since we're going to flush anyway
   1654       // TODO: make less rubbish, if it's caller saved we don't need to flush...
   1655       const std::optional<u32> hreg = CheckHostReg(HR_MODE_READ, HR_TYPE_CPU_REG, rs);
   1656       if (hreg.has_value())
   1657       {
   1658         cf.valid_host_s = true;
   1659         cf.host_s = hreg.value();
   1660       }
   1661     }
   1662     else
   1663     {
   1664       // need rs in a register
   1665       cf.host_s = AllocateHostReg(HR_MODE_READ, HR_TYPE_CPU_REG, rs);
   1666       cf.valid_host_s = true;
   1667     }
   1668   }
   1669 
   1670   // reads T -> store, writes T -> load
   1671   // for now, we defer the allocation to afterwards, because C call
   1672   if (tflags & TF_READS_T)
   1673   {
   1674     if (HasConstantReg(rt))
   1675     {
   1676       cf.const_t = true;
   1677     }
   1678     else
   1679     {
   1680       if constexpr (HAS_MEMORY_OPERANDS)
   1681       {
   1682         const std::optional<u32> hreg = CheckHostReg(HR_MODE_READ, HR_TYPE_CPU_REG, rt);
   1683         if (hreg.has_value())
   1684         {
   1685           cf.valid_host_t = true;
   1686           cf.host_t = hreg.value();
   1687         }
   1688       }
   1689       else
   1690       {
   1691         cf.host_t = AllocateHostReg(HR_MODE_READ, HR_TYPE_CPU_REG, rt);
   1692         cf.valid_host_t = true;
   1693       }
   1694     }
   1695   }
   1696 
   1697   (this->*func)(cf, size, sign, use_fastmem, addr);
   1698 
   1699   if (store && !m_block_ended && !m_current_instruction_branch_delay_slot && spec_addr.has_value() &&
   1700       GetSegmentForAddress(spec_addr.value()) != Segment::KSEG2)
   1701   {
   1702     // Get rid of physical aliases.
   1703     const u32 phys_spec_addr = VirtualAddressToPhysical(spec_addr.value());
   1704     if (phys_spec_addr >= VirtualAddressToPhysical(m_block->pc) &&
   1705         phys_spec_addr < VirtualAddressToPhysical(m_block->pc + (m_block->size * sizeof(Instruction))))
   1706     {
   1707       WARNING_LOG("Instruction {:08X} speculatively writes to {:08X} inside block {:08X}-{:08X}. Truncating block.",
   1708                   m_current_instruction_pc, phys_spec_addr, m_block->pc,
   1709                   m_block->pc + (m_block->size * sizeof(Instruction)));
   1710       TruncateBlock();
   1711     }
   1712   }
   1713 }
   1714 
   1715 void CPU::NewRec::Compiler::TruncateBlock()
   1716 {
   1717   m_block->size = ((m_current_instruction_pc - m_block->pc) / sizeof(Instruction)) + 1;
   1718   iinfo->is_last_instruction = true;
   1719 }
   1720 
   1721 const TickCount* CPU::NewRec::Compiler::GetFetchMemoryAccessTimePtr() const
   1722 {
   1723   const TickCount* ptr =
   1724     Bus::GetMemoryAccessTimePtr(m_block->pc & PHYSICAL_MEMORY_ADDRESS_MASK, MemoryAccessSize::Word);
   1725   AssertMsg(ptr, "Address has dynamic fetch ticks");
   1726   return ptr;
   1727 }
   1728 
   1729 void CPU::NewRec::Compiler::FlushForLoadStore(const std::optional<VirtualMemoryAddress>& address, bool store,
   1730                                               bool use_fastmem)
   1731 {
   1732   if (use_fastmem)
   1733     return;
   1734 
   1735   // TODO: Stores don't need to flush GTE cycles...
   1736   Flush(FLUSH_FOR_C_CALL | FLUSH_FOR_LOADSTORE);
   1737 }
   1738 
   1739 void CPU::NewRec::Compiler::CompileMoveRegTemplate(Reg dst, Reg src, bool pgxp_move)
   1740 {
   1741   if (dst == src || dst == Reg::zero)
   1742     return;
   1743 
   1744   if (HasConstantReg(src))
   1745   {
   1746     DeleteMIPSReg(dst, false);
   1747     SetConstantReg(dst, GetConstantRegU32(src));
   1748   }
   1749   else
   1750   {
   1751     const u32 srcreg = AllocateHostReg(HR_MODE_READ, HR_TYPE_CPU_REG, src);
   1752     if (!TryRenameMIPSReg(dst, src, srcreg, Reg::count))
   1753     {
   1754       const u32 dstreg = AllocateHostReg(HR_MODE_WRITE, HR_TYPE_CPU_REG, dst);
   1755       CopyHostReg(dstreg, srcreg);
   1756       ClearHostRegNeeded(dstreg);
   1757     }
   1758   }
   1759 
   1760   // TODO: This could be made better if we only did it for registers where there was a previous MFC2.
   1761   if (g_settings.gpu_pgxp_enable && pgxp_move)
   1762   {
   1763     // might've been renamed, so use dst here
   1764     GeneratePGXPCallWithMIPSRegs(reinterpret_cast<const void*>(&PGXP::CPU_MOVE_Packed), PGXP::PackMoveArgs(dst, src),
   1765                                  dst);
   1766   }
   1767 }
   1768 
   1769 void CPU::NewRec::Compiler::Compile_j()
   1770 {
   1771   const u32 newpc = (m_compiler_pc & UINT32_C(0xF0000000)) | (inst->j.target << 2);
   1772 
   1773   // TODO: Delay slot swap.
   1774   // We could also move the cycle commit back.
   1775   CompileBranchDelaySlot();
   1776   EndBlock(newpc, true);
   1777 }
   1778 
   1779 void CPU::NewRec::Compiler::Compile_jr_const(CompileFlags cf)
   1780 {
   1781   DebugAssert(HasConstantReg(cf.MipsS()));
   1782   const u32 newpc = GetConstantRegU32(cf.MipsS());
   1783   if (newpc & 3 && g_settings.cpu_recompiler_memory_exceptions)
   1784   {
   1785     EndBlockWithException(Exception::AdEL);
   1786     return;
   1787   }
   1788 
   1789   CompileBranchDelaySlot();
   1790   EndBlock(newpc, true);
   1791 }
   1792 
   1793 void CPU::NewRec::Compiler::Compile_jal()
   1794 {
   1795   const u32 newpc = (m_compiler_pc & UINT32_C(0xF0000000)) | (inst->j.target << 2);
   1796   SetConstantReg(Reg::ra, GetBranchReturnAddress({}));
   1797   CompileBranchDelaySlot();
   1798   EndBlock(newpc, true);
   1799 }
   1800 
   1801 void CPU::NewRec::Compiler::Compile_jalr_const(CompileFlags cf)
   1802 {
   1803   DebugAssert(HasConstantReg(cf.MipsS()));
   1804   const u32 newpc = GetConstantRegU32(cf.MipsS());
   1805   if (MipsD() != Reg::zero)
   1806     SetConstantReg(MipsD(), GetBranchReturnAddress({}));
   1807 
   1808   CompileBranchDelaySlot();
   1809   EndBlock(newpc, true);
   1810 }
   1811 
   1812 void CPU::NewRec::Compiler::Compile_syscall()
   1813 {
   1814   EndBlockWithException(Exception::Syscall);
   1815 }
   1816 
   1817 void CPU::NewRec::Compiler::Compile_break()
   1818 {
   1819   EndBlockWithException(Exception::BP);
   1820 }
   1821 
   1822 void CPU::NewRec::Compiler::Compile_b_const(CompileFlags cf)
   1823 {
   1824   DebugAssert(HasConstantReg(cf.MipsS()));
   1825 
   1826   const u8 irt = static_cast<u8>(inst->i.rt.GetValue());
   1827   const bool bgez = ConvertToBoolUnchecked(irt & u8(1));
   1828   const bool link = (irt & u8(0x1E)) == u8(0x10);
   1829 
   1830   const s32 rs = GetConstantRegS32(cf.MipsS());
   1831   const bool taken = bgez ? (rs >= 0) : (rs < 0);
   1832   const u32 taken_pc = GetConditionalBranchTarget(cf);
   1833 
   1834   if (link)
   1835     SetConstantReg(Reg::ra, GetBranchReturnAddress(cf));
   1836 
   1837   CompileBranchDelaySlot();
   1838   EndBlock(taken ? taken_pc : m_compiler_pc, true);
   1839 }
   1840 
   1841 void CPU::NewRec::Compiler::Compile_b(CompileFlags cf)
   1842 {
   1843   const u8 irt = static_cast<u8>(inst->i.rt.GetValue());
   1844   const bool bgez = ConvertToBoolUnchecked(irt & u8(1));
   1845   const bool link = (irt & u8(0x1E)) == u8(0x10);
   1846 
   1847   if (link)
   1848     SetConstantReg(Reg::ra, GetBranchReturnAddress(cf));
   1849 
   1850   Compile_bxx(cf, bgez ? BranchCondition::GreaterEqualZero : BranchCondition::LessThanZero);
   1851 }
   1852 
   1853 void CPU::NewRec::Compiler::Compile_blez(CompileFlags cf)
   1854 {
   1855   Compile_bxx(cf, BranchCondition::LessEqualZero);
   1856 }
   1857 
   1858 void CPU::NewRec::Compiler::Compile_blez_const(CompileFlags cf)
   1859 {
   1860   Compile_bxx_const(cf, BranchCondition::LessEqualZero);
   1861 }
   1862 
   1863 void CPU::NewRec::Compiler::Compile_bgtz(CompileFlags cf)
   1864 {
   1865   Compile_bxx(cf, BranchCondition::GreaterThanZero);
   1866 }
   1867 
   1868 void CPU::NewRec::Compiler::Compile_bgtz_const(CompileFlags cf)
   1869 {
   1870   Compile_bxx_const(cf, BranchCondition::GreaterThanZero);
   1871 }
   1872 
   1873 void CPU::NewRec::Compiler::Compile_beq(CompileFlags cf)
   1874 {
   1875   Compile_bxx(cf, BranchCondition::Equal);
   1876 }
   1877 
   1878 void CPU::NewRec::Compiler::Compile_beq_const(CompileFlags cf)
   1879 {
   1880   Compile_bxx_const(cf, BranchCondition::Equal);
   1881 }
   1882 
   1883 void CPU::NewRec::Compiler::Compile_bne(CompileFlags cf)
   1884 {
   1885   Compile_bxx(cf, BranchCondition::NotEqual);
   1886 }
   1887 
   1888 void CPU::NewRec::Compiler::Compile_bne_const(CompileFlags cf)
   1889 {
   1890   Compile_bxx_const(cf, BranchCondition::NotEqual);
   1891 }
   1892 
   1893 void CPU::NewRec::Compiler::Compile_bxx_const(CompileFlags cf, BranchCondition cond)
   1894 {
   1895   DebugAssert(HasConstantReg(cf.MipsS()) && HasConstantReg(cf.MipsT()));
   1896 
   1897   bool taken;
   1898   switch (cond)
   1899   {
   1900     case BranchCondition::Equal:
   1901       taken = GetConstantRegU32(cf.MipsS()) == GetConstantRegU32(cf.MipsT());
   1902       break;
   1903 
   1904     case BranchCondition::NotEqual:
   1905       taken = GetConstantRegU32(cf.MipsS()) != GetConstantRegU32(cf.MipsT());
   1906       break;
   1907 
   1908     case BranchCondition::GreaterThanZero:
   1909       taken = GetConstantRegS32(cf.MipsS()) > 0;
   1910       break;
   1911 
   1912     case BranchCondition::GreaterEqualZero:
   1913       taken = GetConstantRegS32(cf.MipsS()) >= 0;
   1914       break;
   1915 
   1916     case BranchCondition::LessThanZero:
   1917       taken = GetConstantRegS32(cf.MipsS()) < 0;
   1918       break;
   1919 
   1920     case BranchCondition::LessEqualZero:
   1921       taken = GetConstantRegS32(cf.MipsS()) <= 0;
   1922       break;
   1923 
   1924     default:
   1925       Panic("Unhandled condition");
   1926       return;
   1927   }
   1928 
   1929   const u32 taken_pc = GetConditionalBranchTarget(cf);
   1930   CompileBranchDelaySlot();
   1931   EndBlock(taken ? taken_pc : m_compiler_pc, true);
   1932 }
   1933 
   1934 void CPU::NewRec::Compiler::Compile_sll_const(CompileFlags cf)
   1935 {
   1936   DebugAssert(HasConstantReg(cf.MipsT()));
   1937   SetConstantReg(MipsD(), GetConstantRegU32(cf.MipsT()) << inst->r.shamt);
   1938 }
   1939 
   1940 void CPU::NewRec::Compiler::Compile_srl_const(CompileFlags cf)
   1941 {
   1942   DebugAssert(HasConstantReg(cf.MipsT()));
   1943   SetConstantReg(MipsD(), GetConstantRegU32(cf.MipsT()) >> inst->r.shamt);
   1944 }
   1945 
   1946 void CPU::NewRec::Compiler::Compile_sra_const(CompileFlags cf)
   1947 {
   1948   DebugAssert(HasConstantReg(cf.MipsT()));
   1949   SetConstantReg(MipsD(), static_cast<u32>(GetConstantRegS32(cf.MipsT()) >> inst->r.shamt));
   1950 }
   1951 
   1952 void CPU::NewRec::Compiler::Compile_sllv_const(CompileFlags cf)
   1953 {
   1954   DebugAssert(HasConstantReg(cf.MipsS()) && HasConstantReg(cf.MipsT()));
   1955   SetConstantReg(MipsD(), GetConstantRegU32(cf.MipsT()) << (GetConstantRegU32(cf.MipsS()) & 0x1Fu));
   1956 }
   1957 
   1958 void CPU::NewRec::Compiler::Compile_srlv_const(CompileFlags cf)
   1959 {
   1960   DebugAssert(HasConstantReg(cf.MipsS()) && HasConstantReg(cf.MipsT()));
   1961   SetConstantReg(MipsD(), GetConstantRegU32(cf.MipsT()) >> (GetConstantRegU32(cf.MipsS()) & 0x1Fu));
   1962 }
   1963 
   1964 void CPU::NewRec::Compiler::Compile_srav_const(CompileFlags cf)
   1965 {
   1966   DebugAssert(HasConstantReg(cf.MipsS()) && HasConstantReg(cf.MipsT()));
   1967   SetConstantReg(MipsD(), static_cast<u32>(GetConstantRegS32(cf.MipsT()) >> (GetConstantRegU32(cf.MipsS()) & 0x1Fu)));
   1968 }
   1969 
   1970 void CPU::NewRec::Compiler::Compile_and_const(CompileFlags cf)
   1971 {
   1972   DebugAssert(HasConstantReg(cf.MipsS()) && HasConstantReg(cf.MipsT()));
   1973   SetConstantReg(MipsD(), GetConstantRegU32(cf.MipsS()) & GetConstantRegU32(cf.MipsT()));
   1974 }
   1975 
   1976 void CPU::NewRec::Compiler::Compile_or_const(CompileFlags cf)
   1977 {
   1978   DebugAssert(HasConstantReg(cf.MipsS()) && HasConstantReg(cf.MipsT()));
   1979   SetConstantReg(MipsD(), GetConstantRegU32(cf.MipsS()) | GetConstantRegU32(cf.MipsT()));
   1980 }
   1981 
   1982 void CPU::NewRec::Compiler::Compile_xor_const(CompileFlags cf)
   1983 {
   1984   DebugAssert(HasConstantReg(cf.MipsS()) && HasConstantReg(cf.MipsT()));
   1985   SetConstantReg(MipsD(), GetConstantRegU32(cf.MipsS()) ^ GetConstantRegU32(cf.MipsT()));
   1986 }
   1987 
   1988 void CPU::NewRec::Compiler::Compile_nor_const(CompileFlags cf)
   1989 {
   1990   DebugAssert(HasConstantReg(cf.MipsS()) && HasConstantReg(cf.MipsT()));
   1991   SetConstantReg(MipsD(), ~(GetConstantRegU32(cf.MipsS()) | GetConstantRegU32(cf.MipsT())));
   1992 }
   1993 
   1994 void CPU::NewRec::Compiler::Compile_slt_const(CompileFlags cf)
   1995 {
   1996   DebugAssert(HasConstantReg(cf.MipsS()) && HasConstantReg(cf.MipsT()));
   1997   SetConstantReg(MipsD(), BoolToUInt32(GetConstantRegS32(cf.MipsS()) < GetConstantRegS32(cf.MipsT())));
   1998 }
   1999 
   2000 void CPU::NewRec::Compiler::Compile_sltu_const(CompileFlags cf)
   2001 {
   2002   DebugAssert(HasConstantReg(cf.MipsS()) && HasConstantReg(cf.MipsT()));
   2003   SetConstantReg(MipsD(), BoolToUInt32(GetConstantRegU32(cf.MipsS()) < GetConstantRegU32(cf.MipsT())));
   2004 }
   2005 
   2006 void CPU::NewRec::Compiler::Compile_mult_const(CompileFlags cf)
   2007 {
   2008   DebugAssert(HasConstantReg(cf.MipsS()) && HasConstantReg(cf.MipsT()));
   2009 
   2010   const u64 res =
   2011     static_cast<u64>(static_cast<s64>(GetConstantRegS32(cf.MipsS())) * static_cast<s64>(GetConstantRegS32(cf.MipsT())));
   2012   SetConstantReg(Reg::hi, static_cast<u32>(res >> 32));
   2013   SetConstantReg(Reg::lo, static_cast<u32>(res));
   2014 }
   2015 
   2016 void CPU::NewRec::Compiler::Compile_multu_const(CompileFlags cf)
   2017 {
   2018   DebugAssert(HasConstantReg(cf.MipsS()) && HasConstantReg(cf.MipsT()));
   2019 
   2020   const u64 res = static_cast<u64>(GetConstantRegU32(cf.MipsS())) * static_cast<u64>(GetConstantRegU32(cf.MipsT()));
   2021   SetConstantReg(Reg::hi, static_cast<u32>(res >> 32));
   2022   SetConstantReg(Reg::lo, static_cast<u32>(res));
   2023 }
   2024 
   2025 void CPU::NewRec::Compiler::MIPSSignedDivide(s32 num, s32 denom, u32* lo, u32* hi)
   2026 {
   2027   if (denom == 0)
   2028   {
   2029     // divide by zero
   2030     *lo = (num >= 0) ? UINT32_C(0xFFFFFFFF) : UINT32_C(1);
   2031     *hi = static_cast<u32>(num);
   2032   }
   2033   else if (static_cast<u32>(num) == UINT32_C(0x80000000) && denom == -1)
   2034   {
   2035     // unrepresentable
   2036     *lo = UINT32_C(0x80000000);
   2037     *hi = 0;
   2038   }
   2039   else
   2040   {
   2041     *lo = static_cast<u32>(num / denom);
   2042     *hi = static_cast<u32>(num % denom);
   2043   }
   2044 }
   2045 
   2046 void CPU::NewRec::Compiler::MIPSUnsignedDivide(u32 num, u32 denom, u32* lo, u32* hi)
   2047 {
   2048   if (denom == 0)
   2049   {
   2050     // divide by zero
   2051     *lo = UINT32_C(0xFFFFFFFF);
   2052     *hi = static_cast<u32>(num);
   2053   }
   2054   else
   2055   {
   2056     *lo = num / denom;
   2057     *hi = num % denom;
   2058   }
   2059 }
   2060 
   2061 void CPU::NewRec::Compiler::Compile_div_const(CompileFlags cf)
   2062 {
   2063   DebugAssert(HasConstantReg(cf.MipsS()) && HasConstantReg(cf.MipsT()));
   2064 
   2065   const s32 num = GetConstantRegS32(cf.MipsS());
   2066   const s32 denom = GetConstantRegS32(cf.MipsT());
   2067 
   2068   u32 lo, hi;
   2069   MIPSSignedDivide(num, denom, &lo, &hi);
   2070 
   2071   SetConstantReg(Reg::hi, hi);
   2072   SetConstantReg(Reg::lo, lo);
   2073 }
   2074 
   2075 void CPU::NewRec::Compiler::Compile_divu_const(CompileFlags cf)
   2076 {
   2077   DebugAssert(HasConstantReg(cf.MipsS()) && HasConstantReg(cf.MipsT()));
   2078 
   2079   const u32 num = GetConstantRegU32(cf.MipsS());
   2080   const u32 denom = GetConstantRegU32(cf.MipsT());
   2081 
   2082   u32 lo, hi;
   2083   MIPSUnsignedDivide(num, denom, &lo, &hi);
   2084 
   2085   SetConstantReg(Reg::hi, hi);
   2086   SetConstantReg(Reg::lo, lo);
   2087 }
   2088 
   2089 void CPU::NewRec::Compiler::Compile_add_const(CompileFlags cf)
   2090 {
   2091   // TODO: Overflow
   2092   DebugAssert(HasConstantReg(cf.MipsS()) && HasConstantReg(cf.MipsT()));
   2093   if (MipsD() != Reg::zero)
   2094     SetConstantReg(MipsD(), GetConstantRegU32(cf.MipsS()) + GetConstantRegU32(cf.MipsT()));
   2095 }
   2096 
   2097 void CPU::NewRec::Compiler::Compile_addu_const(CompileFlags cf)
   2098 {
   2099   DebugAssert(HasConstantReg(cf.MipsS()) && HasConstantReg(cf.MipsT()));
   2100   SetConstantReg(MipsD(), GetConstantRegU32(cf.MipsS()) + GetConstantRegU32(cf.MipsT()));
   2101 }
   2102 
   2103 void CPU::NewRec::Compiler::Compile_sub_const(CompileFlags cf)
   2104 {
   2105   // TODO: Overflow
   2106   DebugAssert(HasConstantReg(cf.MipsS()) && HasConstantReg(cf.MipsT()));
   2107   if (MipsD() != Reg::zero)
   2108     SetConstantReg(MipsD(), GetConstantRegU32(cf.MipsS()) - GetConstantRegU32(cf.MipsT()));
   2109 }
   2110 
   2111 void CPU::NewRec::Compiler::Compile_subu_const(CompileFlags cf)
   2112 {
   2113   DebugAssert(HasConstantReg(cf.MipsS()) && HasConstantReg(cf.MipsT()));
   2114   SetConstantReg(MipsD(), GetConstantRegU32(cf.MipsS()) - GetConstantRegU32(cf.MipsT()));
   2115 }
   2116 
   2117 void CPU::NewRec::Compiler::Compile_addi_const(CompileFlags cf)
   2118 {
   2119   // TODO: Overflow
   2120   DebugAssert(HasConstantReg(cf.MipsS()));
   2121   if (cf.MipsT() != Reg::zero)
   2122     SetConstantReg(cf.MipsT(), GetConstantRegU32(cf.MipsS()) + inst->i.imm_sext32());
   2123 }
   2124 
   2125 void CPU::NewRec::Compiler::Compile_addiu_const(CompileFlags cf)
   2126 {
   2127   DebugAssert(HasConstantReg(cf.MipsS()));
   2128   SetConstantReg(cf.MipsT(), GetConstantRegU32(cf.MipsS()) + inst->i.imm_sext32());
   2129 }
   2130 
   2131 void CPU::NewRec::Compiler::Compile_slti_const(CompileFlags cf)
   2132 {
   2133   DebugAssert(HasConstantReg(cf.MipsS()));
   2134   SetConstantReg(cf.MipsT(), BoolToUInt32(GetConstantRegS32(cf.MipsS()) < static_cast<s32>(inst->i.imm_sext32())));
   2135 }
   2136 
   2137 void CPU::NewRec::Compiler::Compile_sltiu_const(CompileFlags cf)
   2138 {
   2139   DebugAssert(HasConstantReg(cf.MipsS()));
   2140   SetConstantReg(cf.MipsT(), GetConstantRegU32(cf.MipsS()) < inst->i.imm_sext32());
   2141 }
   2142 
   2143 void CPU::NewRec::Compiler::Compile_andi_const(CompileFlags cf)
   2144 {
   2145   DebugAssert(HasConstantReg(cf.MipsS()));
   2146   SetConstantReg(cf.MipsT(), GetConstantRegU32(cf.MipsS()) & inst->i.imm_zext32());
   2147 }
   2148 
   2149 void CPU::NewRec::Compiler::Compile_ori_const(CompileFlags cf)
   2150 {
   2151   DebugAssert(HasConstantReg(cf.MipsS()));
   2152   SetConstantReg(cf.MipsT(), GetConstantRegU32(cf.MipsS()) | inst->i.imm_zext32());
   2153 }
   2154 
   2155 void CPU::NewRec::Compiler::Compile_xori_const(CompileFlags cf)
   2156 {
   2157   DebugAssert(HasConstantReg(cf.MipsS()));
   2158   SetConstantReg(cf.MipsT(), GetConstantRegU32(cf.MipsS()) ^ inst->i.imm_zext32());
   2159 }
   2160 
   2161 void CPU::NewRec::Compiler::Compile_lui()
   2162 {
   2163   if (inst->i.rt == Reg::zero)
   2164     return;
   2165 
   2166   SetConstantReg(inst->i.rt, inst->i.imm_zext32() << 16);
   2167 
   2168   if (g_settings.UsingPGXPCPUMode())
   2169     GeneratePGXPCallWithMIPSRegs(reinterpret_cast<const void*>(&PGXP::CPU_LUI), inst->bits);
   2170 }
   2171 
   2172 static constexpr const std::array<std::pair<u32*, u32>, 16> s_cop0_table = {
   2173   {{nullptr, 0x00000000u},
   2174    {nullptr, 0x00000000u},
   2175    {nullptr, 0x00000000u},
   2176    {&CPU::g_state.cop0_regs.BPC, 0xffffffffu},
   2177    {nullptr, 0},
   2178    {&CPU::g_state.cop0_regs.BDA, 0xffffffffu},
   2179    {&CPU::g_state.cop0_regs.TAR, 0x00000000u},
   2180    {&CPU::g_state.cop0_regs.dcic.bits, CPU::Cop0Registers::DCIC::WRITE_MASK},
   2181    {&CPU::g_state.cop0_regs.BadVaddr, 0x00000000u},
   2182    {&CPU::g_state.cop0_regs.BDAM, 0xffffffffu},
   2183    {nullptr, 0x00000000u},
   2184    {&CPU::g_state.cop0_regs.BPCM, 0xffffffffu},
   2185    {&CPU::g_state.cop0_regs.sr.bits, CPU::Cop0Registers::SR::WRITE_MASK},
   2186    {&CPU::g_state.cop0_regs.cause.bits, CPU::Cop0Registers::CAUSE::WRITE_MASK},
   2187    {&CPU::g_state.cop0_regs.EPC, 0x00000000u},
   2188    {&CPU::g_state.cop0_regs.PRID, 0x00000000u}}};
   2189 
   2190 u32* CPU::NewRec::Compiler::GetCop0RegPtr(Cop0Reg reg)
   2191 {
   2192   return (static_cast<u8>(reg) < s_cop0_table.size()) ? s_cop0_table[static_cast<u8>(reg)].first : nullptr;
   2193 }
   2194 
   2195 u32 CPU::NewRec::Compiler::GetCop0RegWriteMask(Cop0Reg reg)
   2196 {
   2197   return (static_cast<u8>(reg) < s_cop0_table.size()) ? s_cop0_table[static_cast<u8>(reg)].second : 0;
   2198 }
   2199 
   2200 void CPU::NewRec::Compiler::Compile_mfc0(CompileFlags cf)
   2201 {
   2202   const Cop0Reg r = static_cast<Cop0Reg>(MipsD());
   2203   const u32* ptr = GetCop0RegPtr(r);
   2204   if (!ptr)
   2205   {
   2206     ERROR_LOG("Read from unknown cop0 reg {}", static_cast<u32>(r));
   2207     Compile_Fallback();
   2208     return;
   2209   }
   2210 
   2211   DebugAssert(cf.valid_host_t);
   2212   LoadHostRegFromCPUPointer(cf.host_t, ptr);
   2213 }
   2214 
   2215 std::pair<u32*, CPU::NewRec::Compiler::GTERegisterAccessAction>
   2216 CPU::NewRec::Compiler::GetGTERegisterPointer(u32 index, bool writing)
   2217 {
   2218   if (!writing)
   2219   {
   2220     // Most GTE registers can be read directly. Handle the special cases here.
   2221     if (index == 15) // SXY3
   2222     {
   2223       // mirror of SXY2
   2224       index = 14;
   2225     }
   2226 
   2227     switch (index)
   2228     {
   2229       case 28: // IRGB
   2230       case 29: // ORGB
   2231       {
   2232         return std::make_pair(&g_state.gte_regs.r32[index], GTERegisterAccessAction::CallHandler);
   2233       }
   2234       break;
   2235 
   2236       default:
   2237       {
   2238         return std::make_pair(&g_state.gte_regs.r32[index], GTERegisterAccessAction::Direct);
   2239       }
   2240       break;
   2241     }
   2242   }
   2243   else
   2244   {
   2245     switch (index)
   2246     {
   2247       case 1:  // V0[z]
   2248       case 3:  // V1[z]
   2249       case 5:  // V2[z]
   2250       case 8:  // IR0
   2251       case 9:  // IR1
   2252       case 10: // IR2
   2253       case 11: // IR3
   2254       case 36: // RT33
   2255       case 44: // L33
   2256       case 52: // LR33
   2257       case 58: // H       - sign-extended on read but zext on use
   2258       case 59: // DQA
   2259       case 61: // ZSF3
   2260       case 62: // ZSF4
   2261       {
   2262         // sign-extend z component of vector registers
   2263         return std::make_pair(&g_state.gte_regs.r32[index], GTERegisterAccessAction::SignExtend16);
   2264       }
   2265       break;
   2266 
   2267       case 7:  // OTZ
   2268       case 16: // SZ0
   2269       case 17: // SZ1
   2270       case 18: // SZ2
   2271       case 19: // SZ3
   2272       {
   2273         // zero-extend unsigned values
   2274         return std::make_pair(&g_state.gte_regs.r32[index], GTERegisterAccessAction::ZeroExtend16);
   2275       }
   2276       break;
   2277 
   2278       case 15: // SXY3
   2279       {
   2280         // writing to SXYP pushes to the FIFO
   2281         return std::make_pair(&g_state.gte_regs.r32[index], GTERegisterAccessAction::PushFIFO);
   2282       }
   2283       break;
   2284 
   2285       case 28: // IRGB
   2286       case 30: // LZCS
   2287       case 63: // FLAG
   2288       {
   2289         return std::make_pair(&g_state.gte_regs.r32[index], GTERegisterAccessAction::CallHandler);
   2290       }
   2291 
   2292       case 29: // ORGB
   2293       case 31: // LZCR
   2294       {
   2295         // read-only registers
   2296         return std::make_pair(&g_state.gte_regs.r32[index], GTERegisterAccessAction::Ignore);
   2297       }
   2298 
   2299       default:
   2300       {
   2301         // written as-is, 2x16 or 1x32 bits
   2302         return std::make_pair(&g_state.gte_regs.r32[index], GTERegisterAccessAction::Direct);
   2303       }
   2304     }
   2305   }
   2306 }
   2307 
   2308 void CPU::NewRec::Compiler::AddGTETicks(TickCount ticks)
   2309 {
   2310   // TODO: check, int has +1 here
   2311   m_gte_done_cycle = m_cycles + ticks;
   2312   DEBUG_LOG("Adding {} GTE ticks", ticks);
   2313 }
   2314 
   2315 void CPU::NewRec::Compiler::StallUntilGTEComplete()
   2316 {
   2317   // TODO: hack to match old rec.. this may or may not be correct behavior
   2318   // it's the difference between stalling before and after the current instruction's cycle
   2319   DebugAssert(m_cycles > 0);
   2320   m_cycles--;
   2321 
   2322   if (!m_dirty_gte_done_cycle)
   2323   {
   2324     // simple case - in block scheduling
   2325     if (m_gte_done_cycle > m_cycles)
   2326     {
   2327       DEBUG_LOG("Stalling for {} ticks from GTE", m_gte_done_cycle - m_cycles);
   2328       m_cycles += (m_gte_done_cycle - m_cycles);
   2329     }
   2330   }
   2331   else
   2332   {
   2333     // switch to in block scheduling
   2334     DEBUG_LOG("Flushing GTE stall from state");
   2335     Flush(FLUSH_GTE_STALL_FROM_STATE);
   2336   }
   2337 
   2338   m_cycles++;
   2339 }
   2340 
   2341 void CPU::NewRec::BackpatchLoadStore(void* exception_pc, const CodeCache::LoadstoreBackpatchInfo& info)
   2342 {
   2343   // remove the cycles we added for the memory read, then take them off again after the backpatch
   2344   // the normal rec path will add the ram read ticks later, so we need to take them off at the end
   2345   DebugAssert(!info.is_load || info.cycles >= Bus::RAM_READ_TICKS);
   2346   const TickCount cycles_to_add =
   2347     static_cast<TickCount>(static_cast<u32>(info.cycles)) - (info.is_load ? Bus::RAM_READ_TICKS : 0);
   2348   const TickCount cycles_to_remove = static_cast<TickCount>(static_cast<u32>(info.cycles));
   2349 
   2350   void* thunk_address = CPU::CodeCache::GetFreeFarCodePointer();
   2351   const u32 thunk_size = CompileLoadStoreThunk(
   2352     thunk_address, CPU::CodeCache::GetFreeFarCodeSpace(), exception_pc, info.code_size, cycles_to_add, cycles_to_remove,
   2353     info.gpr_bitmask, info.address_register, info.data_register, info.AccessSize(), info.is_signed, info.is_load);
   2354 
   2355 #if 0
   2356   Log_DebugPrint("**Backpatch Thunk**");
   2357   CPU::CodeCache::DisassembleAndLogHostCode(thunk_address, thunk_size);
   2358 #endif
   2359 
   2360   // backpatch to a jump to the slowmem handler
   2361   CPU::CodeCache::EmitJump(exception_pc, thunk_address, true);
   2362 
   2363   CPU::CodeCache::CommitFarCode(thunk_size);
   2364 }
   2365 
   2366 void CPU::NewRec::Compiler::InitSpeculativeRegs()
   2367 {
   2368   for (u8 i = 0; i < static_cast<u8>(Reg::count); i++)
   2369     m_speculative_constants.regs[i] = g_state.regs.r[i];
   2370 
   2371   m_speculative_constants.cop0_sr = g_state.cop0_regs.sr.bits;
   2372   m_speculative_constants.memory.clear();
   2373 }
   2374 
   2375 void CPU::NewRec::Compiler::InvalidateSpeculativeValues()
   2376 {
   2377   m_speculative_constants.regs.fill(std::nullopt);
   2378   m_speculative_constants.memory.clear();
   2379   m_speculative_constants.cop0_sr.reset();
   2380 }
   2381 
   2382 CPU::NewRec::Compiler::SpecValue CPU::NewRec::Compiler::SpecReadReg(Reg reg)
   2383 {
   2384   return m_speculative_constants.regs[static_cast<u8>(reg)];
   2385 }
   2386 
   2387 void CPU::NewRec::Compiler::SpecWriteReg(Reg reg, SpecValue value)
   2388 {
   2389   if (reg == Reg::zero)
   2390     return;
   2391 
   2392   m_speculative_constants.regs[static_cast<u8>(reg)] = value;
   2393 }
   2394 
   2395 void CPU::NewRec::Compiler::SpecInvalidateReg(Reg reg)
   2396 {
   2397   if (reg == Reg::zero)
   2398     return;
   2399 
   2400   m_speculative_constants.regs[static_cast<u8>(reg)].reset();
   2401 }
   2402 
   2403 void CPU::NewRec::Compiler::SpecCopyReg(Reg dst, Reg src)
   2404 {
   2405   if (dst == Reg::zero)
   2406     return;
   2407 
   2408   m_speculative_constants.regs[static_cast<u8>(dst)] = m_speculative_constants.regs[static_cast<u8>(src)];
   2409 }
   2410 
   2411 CPU::NewRec::Compiler::SpecValue CPU::NewRec::Compiler::SpecReadMem(VirtualMemoryAddress address)
   2412 {
   2413   auto it = m_speculative_constants.memory.find(address);
   2414   if (it != m_speculative_constants.memory.end())
   2415     return it->second;
   2416 
   2417   u32 value;
   2418   if ((address & SCRATCHPAD_ADDR_MASK) == SCRATCHPAD_ADDR)
   2419   {
   2420     u32 scratchpad_offset = address & SCRATCHPAD_OFFSET_MASK;
   2421     std::memcpy(&value, &CPU::g_state.scratchpad[scratchpad_offset], sizeof(value));
   2422     return value;
   2423   }
   2424 
   2425   const PhysicalMemoryAddress phys_addr = address & PHYSICAL_MEMORY_ADDRESS_MASK;
   2426   if (Bus::IsRAMAddress(phys_addr))
   2427   {
   2428     u32 ram_offset = phys_addr & Bus::g_ram_mask;
   2429     std::memcpy(&value, &Bus::g_ram[ram_offset], sizeof(value));
   2430     return value;
   2431   }
   2432 
   2433   return std::nullopt;
   2434 }
   2435 
   2436 void CPU::NewRec::Compiler::SpecWriteMem(u32 address, SpecValue value)
   2437 {
   2438   auto it = m_speculative_constants.memory.find(address);
   2439   if (it != m_speculative_constants.memory.end())
   2440   {
   2441     it->second = value;
   2442     return;
   2443   }
   2444 
   2445   const PhysicalMemoryAddress phys_addr = address & PHYSICAL_MEMORY_ADDRESS_MASK;
   2446   if ((address & SCRATCHPAD_ADDR_MASK) == SCRATCHPAD_ADDR || Bus::IsRAMAddress(phys_addr))
   2447     m_speculative_constants.memory.emplace(address, value);
   2448 }
   2449 
   2450 void CPU::NewRec::Compiler::SpecInvalidateMem(VirtualMemoryAddress address)
   2451 {
   2452   SpecWriteMem(address, std::nullopt);
   2453 }
   2454 
   2455 bool CPU::NewRec::Compiler::SpecIsCacheIsolated()
   2456 {
   2457   if (!m_speculative_constants.cop0_sr.has_value())
   2458     return false;
   2459 
   2460   const Cop0Registers::SR sr{m_speculative_constants.cop0_sr.value()};
   2461   return sr.Isc;
   2462 }
   2463 
   2464 void CPU::NewRec::Compiler::SpecExec_b()
   2465 {
   2466   const bool link = (static_cast<u8>(inst->i.rt.GetValue()) & u8(0x1E)) == u8(0x10);
   2467   if (link)
   2468     SpecWriteReg(Reg::ra, m_compiler_pc);
   2469 }
   2470 
   2471 void CPU::NewRec::Compiler::SpecExec_jal()
   2472 {
   2473   SpecWriteReg(Reg::ra, m_compiler_pc);
   2474 }
   2475 
   2476 void CPU::NewRec::Compiler::SpecExec_jalr()
   2477 {
   2478   SpecWriteReg(inst->r.rd, m_compiler_pc);
   2479 }
   2480 
   2481 void CPU::NewRec::Compiler::SpecExec_sll()
   2482 {
   2483   const SpecValue rt = SpecReadReg(inst->r.rt);
   2484   if (rt.has_value())
   2485     SpecWriteReg(inst->r.rd, rt.value() << inst->r.shamt);
   2486   else
   2487     SpecInvalidateReg(inst->r.rd);
   2488 }
   2489 
   2490 void CPU::NewRec::Compiler::SpecExec_srl()
   2491 {
   2492   const SpecValue rt = SpecReadReg(inst->r.rt);
   2493   if (rt.has_value())
   2494     SpecWriteReg(inst->r.rd, rt.value() >> inst->r.shamt);
   2495   else
   2496     SpecInvalidateReg(inst->r.rd);
   2497 }
   2498 
   2499 void CPU::NewRec::Compiler::SpecExec_sra()
   2500 {
   2501   const SpecValue rt = SpecReadReg(inst->r.rt);
   2502   if (rt.has_value())
   2503     SpecWriteReg(inst->r.rd, static_cast<u32>(static_cast<s32>(rt.value()) >> inst->r.shamt));
   2504   else
   2505     SpecInvalidateReg(inst->r.rd);
   2506 }
   2507 
   2508 void CPU::NewRec::Compiler::SpecExec_sllv()
   2509 {
   2510   const SpecValue rs = SpecReadReg(inst->r.rs);
   2511   const SpecValue rt = SpecReadReg(inst->r.rt);
   2512   if (rs.has_value() && rt.has_value())
   2513     SpecWriteReg(inst->r.rd, rt.value() << (rs.value() & 0x1F));
   2514   else
   2515     SpecInvalidateReg(inst->r.rd);
   2516 }
   2517 
   2518 void CPU::NewRec::Compiler::SpecExec_srlv()
   2519 {
   2520   const SpecValue rs = SpecReadReg(inst->r.rs);
   2521   const SpecValue rt = SpecReadReg(inst->r.rt);
   2522   if (rs.has_value() && rt.has_value())
   2523     SpecWriteReg(inst->r.rd, rt.value() >> (rs.value() & 0x1F));
   2524   else
   2525     SpecInvalidateReg(inst->r.rd);
   2526 }
   2527 
   2528 void CPU::NewRec::Compiler::SpecExec_srav()
   2529 {
   2530   const SpecValue rs = SpecReadReg(inst->r.rs);
   2531   const SpecValue rt = SpecReadReg(inst->r.rt);
   2532   if (rs.has_value() && rt.has_value())
   2533     SpecWriteReg(inst->r.rd, static_cast<u32>(static_cast<s32>(rt.value()) >> (rs.value() & 0x1F)));
   2534   else
   2535     SpecInvalidateReg(inst->r.rd);
   2536 }
   2537 
   2538 void CPU::NewRec::Compiler::SpecExec_mult()
   2539 {
   2540   const SpecValue rs = SpecReadReg(inst->r.rs);
   2541   const SpecValue rt = SpecReadReg(inst->r.rt);
   2542   if (rs.has_value() && rt.has_value())
   2543   {
   2544     const u64 result =
   2545       static_cast<u64>(static_cast<s64>(SignExtend64(rs.value())) * static_cast<s64>(SignExtend64(rt.value())));
   2546     SpecWriteReg(Reg::hi, Truncate32(result >> 32));
   2547     SpecWriteReg(Reg::lo, Truncate32(result));
   2548   }
   2549   else
   2550   {
   2551     SpecInvalidateReg(Reg::hi);
   2552     SpecInvalidateReg(Reg::lo);
   2553   }
   2554 }
   2555 
   2556 void CPU::NewRec::Compiler::SpecExec_multu()
   2557 {
   2558   const SpecValue rs = SpecReadReg(inst->r.rs);
   2559   const SpecValue rt = SpecReadReg(inst->r.rt);
   2560   if (rs.has_value() && rt.has_value())
   2561   {
   2562     const u64 result = ZeroExtend64(rs.value()) * SignExtend64(rt.value());
   2563     SpecWriteReg(Reg::hi, Truncate32(result >> 32));
   2564     SpecWriteReg(Reg::lo, Truncate32(result));
   2565   }
   2566   else
   2567   {
   2568     SpecInvalidateReg(Reg::hi);
   2569     SpecInvalidateReg(Reg::lo);
   2570   }
   2571 }
   2572 
   2573 void CPU::NewRec::Compiler::SpecExec_div()
   2574 {
   2575   const SpecValue rs = SpecReadReg(inst->r.rs);
   2576   const SpecValue rt = SpecReadReg(inst->r.rt);
   2577   if (rs.has_value() && rt.has_value())
   2578   {
   2579     u32 lo, hi;
   2580     MIPSSignedDivide(static_cast<s32>(rs.value()), static_cast<s32>(rt.value()), &lo, &hi);
   2581     SpecWriteReg(Reg::hi, hi);
   2582     SpecWriteReg(Reg::lo, lo);
   2583   }
   2584   else
   2585   {
   2586     SpecInvalidateReg(Reg::hi);
   2587     SpecInvalidateReg(Reg::lo);
   2588   }
   2589 }
   2590 
   2591 void CPU::NewRec::Compiler::SpecExec_divu()
   2592 {
   2593   const SpecValue rs = SpecReadReg(inst->r.rs);
   2594   const SpecValue rt = SpecReadReg(inst->r.rt);
   2595   if (rs.has_value() && rt.has_value())
   2596   {
   2597     u32 lo, hi;
   2598     MIPSUnsignedDivide(rs.value(), rt.value(), &lo, &hi);
   2599     SpecWriteReg(Reg::hi, hi);
   2600     SpecWriteReg(Reg::lo, lo);
   2601   }
   2602   else
   2603   {
   2604     SpecInvalidateReg(Reg::hi);
   2605     SpecInvalidateReg(Reg::lo);
   2606   }
   2607 }
   2608 
   2609 void CPU::NewRec::Compiler::SpecExec_add()
   2610 {
   2611   SpecExec_addu();
   2612 }
   2613 
   2614 void CPU::NewRec::Compiler::SpecExec_addu()
   2615 {
   2616   const SpecValue rs = SpecReadReg(inst->r.rs);
   2617   const SpecValue rt = SpecReadReg(inst->r.rt);
   2618   if (rs.has_value() && rt.has_value())
   2619     SpecWriteReg(inst->r.rd, rs.value() + rt.value());
   2620   else
   2621     SpecInvalidateReg(inst->r.rd);
   2622 }
   2623 
   2624 void CPU::NewRec::Compiler::SpecExec_sub()
   2625 {
   2626   SpecExec_subu();
   2627 }
   2628 
   2629 void CPU::NewRec::Compiler::SpecExec_subu()
   2630 {
   2631   const SpecValue rs = SpecReadReg(inst->r.rs);
   2632   const SpecValue rt = SpecReadReg(inst->r.rt);
   2633   if (rs.has_value() && rt.has_value())
   2634     SpecWriteReg(inst->r.rd, rs.value() - rt.value());
   2635   else
   2636     SpecInvalidateReg(inst->r.rd);
   2637 }
   2638 
   2639 void CPU::NewRec::Compiler::SpecExec_and()
   2640 {
   2641   const SpecValue rs = SpecReadReg(inst->r.rs);
   2642   const SpecValue rt = SpecReadReg(inst->r.rt);
   2643   if (rs.has_value() && rt.has_value())
   2644     SpecWriteReg(inst->r.rd, rs.value() & rt.value());
   2645   else
   2646     SpecInvalidateReg(inst->r.rd);
   2647 }
   2648 
   2649 void CPU::NewRec::Compiler::SpecExec_or()
   2650 {
   2651   const SpecValue rs = SpecReadReg(inst->r.rs);
   2652   const SpecValue rt = SpecReadReg(inst->r.rt);
   2653   if (rs.has_value() && rt.has_value())
   2654     SpecWriteReg(inst->r.rd, rs.value() | rt.value());
   2655   else
   2656     SpecInvalidateReg(inst->r.rd);
   2657 }
   2658 
   2659 void CPU::NewRec::Compiler::SpecExec_xor()
   2660 {
   2661   const SpecValue rs = SpecReadReg(inst->r.rs);
   2662   const SpecValue rt = SpecReadReg(inst->r.rt);
   2663   if (rs.has_value() && rt.has_value())
   2664     SpecWriteReg(inst->r.rd, rs.value() ^ rt.value());
   2665   else
   2666     SpecInvalidateReg(inst->r.rd);
   2667 }
   2668 
   2669 void CPU::NewRec::Compiler::SpecExec_nor()
   2670 {
   2671   const SpecValue rs = SpecReadReg(inst->r.rs);
   2672   const SpecValue rt = SpecReadReg(inst->r.rt);
   2673   if (rs.has_value() && rt.has_value())
   2674     SpecWriteReg(inst->r.rd, ~(rs.value() | rt.value()));
   2675   else
   2676     SpecInvalidateReg(inst->r.rd);
   2677 }
   2678 
   2679 void CPU::NewRec::Compiler::SpecExec_slt()
   2680 {
   2681   const SpecValue rs = SpecReadReg(inst->r.rs);
   2682   const SpecValue rt = SpecReadReg(inst->r.rt);
   2683   if (rs.has_value() && rt.has_value())
   2684     SpecWriteReg(inst->r.rd, BoolToUInt32(static_cast<s32>(rs.value()) < static_cast<s32>(rt.value())));
   2685   else
   2686     SpecInvalidateReg(inst->r.rd);
   2687 }
   2688 
   2689 void CPU::NewRec::Compiler::SpecExec_sltu()
   2690 {
   2691   const SpecValue rs = SpecReadReg(inst->r.rs);
   2692   const SpecValue rt = SpecReadReg(inst->r.rt);
   2693   if (rs.has_value() && rt.has_value())
   2694     SpecWriteReg(inst->r.rd, BoolToUInt32(rs.value() < rt.value()));
   2695   else
   2696     SpecInvalidateReg(inst->r.rd);
   2697 }
   2698 
   2699 void CPU::NewRec::Compiler::SpecExec_addi()
   2700 {
   2701   SpecExec_addiu();
   2702 }
   2703 
   2704 void CPU::NewRec::Compiler::SpecExec_addiu()
   2705 {
   2706   const SpecValue rs = SpecReadReg(inst->i.rs);
   2707   if (rs.has_value())
   2708     SpecWriteReg(inst->i.rt, rs.value() + inst->i.imm_sext32());
   2709   else
   2710     SpecInvalidateReg(inst->i.rt);
   2711 }
   2712 
   2713 void CPU::NewRec::Compiler::SpecExec_slti()
   2714 {
   2715   const SpecValue rs = SpecReadReg(inst->i.rs);
   2716   if (rs.has_value())
   2717     SpecWriteReg(inst->i.rt, BoolToUInt32(static_cast<s32>(rs.value()) < static_cast<s32>(inst->i.imm_sext32())));
   2718   else
   2719     SpecInvalidateReg(inst->i.rt);
   2720 }
   2721 
   2722 void CPU::NewRec::Compiler::SpecExec_sltiu()
   2723 {
   2724   const SpecValue rs = SpecReadReg(inst->i.rs);
   2725   if (rs.has_value())
   2726     SpecWriteReg(inst->i.rt, BoolToUInt32(rs.value() < inst->i.imm_sext32()));
   2727   else
   2728     SpecInvalidateReg(inst->i.rt);
   2729 }
   2730 
   2731 void CPU::NewRec::Compiler::SpecExec_andi()
   2732 {
   2733   const SpecValue rs = SpecReadReg(inst->i.rs);
   2734   if (rs.has_value())
   2735     SpecWriteReg(inst->i.rt, rs.value() & inst->i.imm_zext32());
   2736   else
   2737     SpecInvalidateReg(inst->i.rt);
   2738 }
   2739 
   2740 void CPU::NewRec::Compiler::SpecExec_ori()
   2741 {
   2742   const SpecValue rs = SpecReadReg(inst->i.rs);
   2743   if (rs.has_value())
   2744     SpecWriteReg(inst->i.rt, rs.value() | inst->i.imm_zext32());
   2745   else
   2746     SpecInvalidateReg(inst->i.rt);
   2747 }
   2748 
   2749 void CPU::NewRec::Compiler::SpecExec_xori()
   2750 {
   2751   const SpecValue rs = SpecReadReg(inst->i.rs);
   2752   if (rs.has_value())
   2753     SpecWriteReg(inst->i.rt, rs.value() ^ inst->i.imm_zext32());
   2754   else
   2755     SpecInvalidateReg(inst->i.rt);
   2756 }
   2757 
   2758 void CPU::NewRec::Compiler::SpecExec_lui()
   2759 {
   2760   SpecWriteReg(inst->i.rt, inst->i.imm_zext32() << 16);
   2761 }
   2762 
   2763 CPU::NewRec::Compiler::SpecValue CPU::NewRec::Compiler::SpecExec_LoadStoreAddr()
   2764 {
   2765   const SpecValue rs = SpecReadReg(inst->i.rs);
   2766   return rs.has_value() ? (rs.value() + inst->i.imm_sext32()) : rs;
   2767 }
   2768 
   2769 void CPU::NewRec::Compiler::SpecExec_lxx(MemoryAccessSize size, bool sign)
   2770 {
   2771   const SpecValue addr = SpecExec_LoadStoreAddr();
   2772   SpecValue val;
   2773   if (!addr.has_value() || !(val = SpecReadMem(addr.value())).has_value())
   2774   {
   2775     SpecInvalidateReg(inst->i.rt);
   2776     return;
   2777   }
   2778 
   2779   switch (size)
   2780   {
   2781     case MemoryAccessSize::Byte:
   2782       val = sign ? SignExtend32(static_cast<u8>(val.value())) : ZeroExtend32(static_cast<u8>(val.value()));
   2783       break;
   2784 
   2785     case MemoryAccessSize::HalfWord:
   2786       val = sign ? SignExtend32(static_cast<u16>(val.value())) : ZeroExtend32(static_cast<u16>(val.value()));
   2787       break;
   2788 
   2789     case MemoryAccessSize::Word:
   2790       break;
   2791 
   2792     default:
   2793       UnreachableCode();
   2794   }
   2795 
   2796   SpecWriteReg(inst->r.rt, val);
   2797 }
   2798 
   2799 void CPU::NewRec::Compiler::SpecExec_lwx(bool lwr)
   2800 {
   2801   // TODO
   2802   SpecInvalidateReg(inst->i.rt);
   2803 }
   2804 
   2805 void CPU::NewRec::Compiler::SpecExec_sxx(MemoryAccessSize size)
   2806 {
   2807   const SpecValue addr = SpecExec_LoadStoreAddr();
   2808   if (!addr.has_value())
   2809     return;
   2810 
   2811   SpecValue rt = SpecReadReg(inst->i.rt);
   2812   if (rt.has_value())
   2813   {
   2814     switch (size)
   2815     {
   2816       case MemoryAccessSize::Byte:
   2817         rt = ZeroExtend32(static_cast<u8>(rt.value()));
   2818         break;
   2819 
   2820       case MemoryAccessSize::HalfWord:
   2821         rt = ZeroExtend32(static_cast<u16>(rt.value()));
   2822         break;
   2823 
   2824       case MemoryAccessSize::Word:
   2825         break;
   2826 
   2827       default:
   2828         UnreachableCode();
   2829     }
   2830   }
   2831 
   2832   SpecWriteMem(addr.value(), rt);
   2833 }
   2834 
   2835 void CPU::NewRec::Compiler::SpecExec_swx(bool swr)
   2836 {
   2837   const SpecValue addr = SpecExec_LoadStoreAddr();
   2838   if (addr.has_value())
   2839     SpecInvalidateMem(addr.value() & ~3u);
   2840 }
   2841 
   2842 void CPU::NewRec::Compiler::SpecExec_swc2()
   2843 {
   2844   const SpecValue addr = SpecExec_LoadStoreAddr();
   2845   if (addr.has_value())
   2846     SpecInvalidateMem(addr.value());
   2847 }
   2848 
   2849 void CPU::NewRec::Compiler::SpecExec_mfc0()
   2850 {
   2851   const Cop0Reg rd = static_cast<Cop0Reg>(inst->r.rd.GetValue());
   2852   if (rd != Cop0Reg::SR)
   2853   {
   2854     SpecInvalidateReg(inst->r.rt);
   2855     return;
   2856   }
   2857 
   2858   SpecWriteReg(inst->r.rt, m_speculative_constants.cop0_sr);
   2859 }
   2860 
   2861 void CPU::NewRec::Compiler::SpecExec_mtc0()
   2862 {
   2863   const Cop0Reg rd = static_cast<Cop0Reg>(inst->r.rd.GetValue());
   2864   if (rd != Cop0Reg::SR || !m_speculative_constants.cop0_sr.has_value())
   2865     return;
   2866 
   2867   SpecValue val = SpecReadReg(inst->r.rt);
   2868   if (val.has_value())
   2869   {
   2870     constexpr u32 mask = Cop0Registers::SR::WRITE_MASK;
   2871     val = (m_speculative_constants.cop0_sr.value() & mask) | (val.value() & mask);
   2872   }
   2873 
   2874   m_speculative_constants.cop0_sr = val;
   2875 }
   2876 
   2877 void CPU::NewRec::Compiler::SpecExec_rfe()
   2878 {
   2879   if (!m_speculative_constants.cop0_sr.has_value())
   2880     return;
   2881 
   2882   const u32 val = m_speculative_constants.cop0_sr.value();
   2883   m_speculative_constants.cop0_sr = (val & UINT32_C(0b110000)) | ((val & UINT32_C(0b111111)) >> 2);
   2884 }