duckstation

duckstation, but archived from the revision just before upstream changed it to a proprietary software project, this version is the libre one
git clone https://git.neptards.moe/u3shit/duckstation.git
Log | Files | Refs | README | LICENSE

cpu_recompiler_code_generator.cpp (102356B)


      1 // SPDX-FileCopyrightText: 2019-2023 Connor McLaughlin <stenzek@gmail.com>
      2 // SPDX-License-Identifier: (GPL-3.0 OR CC-BY-NC-ND-4.0)
      3 
      4 #include "cpu_recompiler_code_generator.h"
      5 #include "common/log.h"
      6 #include "cpu_core.h"
      7 #include "cpu_core_private.h"
      8 #include "cpu_disasm.h"
      9 #include "cpu_pgxp.h"
     10 #include "gte.h"
     11 #include "settings.h"
     12 Log_SetChannel(CPU::Recompiler);
     13 
     14 // TODO: Turn load+sext/zext into a single signed/unsigned load
     15 // TODO: mulx/shlx/etc
     16 // TODO: when writing to the same register, don't allocate a temporary and copy it (mainly for shifts)
     17 
     18 namespace CPU::Recompiler {
     19 
     20 const void* CodeGenerator::CompileBlock(CodeCache::Block* block, u32* out_host_code_size, u32* out_host_far_code_size)
     21 {
     22   // TODO: Align code buffer.
     23 
     24   m_block = block;
     25   m_block_start = {block->Instructions(), block->InstructionsInfo()};
     26   m_block_end = {block->Instructions() + block->size, block->InstructionsInfo() + block->size};
     27 
     28   m_pc = block->pc;
     29   m_pc_valid = true;
     30 
     31   EmitBeginBlock(true);
     32   BlockPrologue();
     33 
     34   m_current_instruction = m_block_start;
     35   while (m_current_instruction.instruction != m_block_end.instruction)
     36   {
     37     if (!CompileInstruction(*m_current_instruction.instruction, *m_current_instruction.info))
     38     {
     39       m_current_instruction = {};
     40       m_block_end = {};
     41       m_block_start = {};
     42       m_block = nullptr;
     43       return nullptr;
     44     }
     45 
     46     m_current_instruction.instruction++;
     47     m_current_instruction.info++;
     48   }
     49 
     50   if (!m_block_linked)
     51   {
     52     BlockEpilogue();
     53 
     54     if (block->HasFlag(CodeCache::BlockFlags::SpansPages))
     55     {
     56       // jump directly to the next block
     57       const Value pc = CalculatePC();
     58       WriteNewPC(pc, true);
     59       const void* host_target =
     60         CPU::CodeCache::CreateBlockLink(m_block, GetCurrentCodePointer(), static_cast<u32>(pc.constant_value));
     61       EmitBranch(host_target);
     62       EmitEndBlock(true, nullptr);
     63     }
     64     else
     65     {
     66       EmitEndBlock(true, CodeCache::g_check_events_and_dispatch);
     67     }
     68   }
     69 
     70   const void* code = FinalizeBlock(out_host_code_size, out_host_far_code_size);
     71   DebugAssert(m_register_cache.GetUsedHostRegisters() == 0);
     72 
     73   m_current_instruction = {};
     74   m_block_end = {};
     75   m_block_start = {};
     76   m_block = nullptr;
     77   return code;
     78 }
     79 
     80 bool CodeGenerator::CompileInstruction(Instruction instruction, const CodeCache::InstructionInfo& info)
     81 {
     82   if (IsNopInstruction(instruction))
     83   {
     84     InstructionPrologue(instruction, info, 1);
     85     InstructionEpilogue(instruction, info);
     86     return true;
     87   }
     88 
     89   bool result;
     90   switch (instruction.op)
     91   {
     92 #if 1
     93     case InstructionOp::ori:
     94     case InstructionOp::andi:
     95     case InstructionOp::xori:
     96       result = Compile_Bitwise(instruction, info);
     97       break;
     98 
     99     case InstructionOp::lb:
    100     case InstructionOp::lbu:
    101     case InstructionOp::lh:
    102     case InstructionOp::lhu:
    103     case InstructionOp::lw:
    104       result = Compile_Load(instruction, info);
    105       break;
    106 
    107     case InstructionOp::lwl:
    108     case InstructionOp::lwr:
    109       result = Compile_LoadLeftRight(instruction, info);
    110       break;
    111 
    112     case InstructionOp::swl:
    113     case InstructionOp::swr:
    114       result = Compile_StoreLeftRight(instruction, info);
    115       break;
    116 
    117     case InstructionOp::sb:
    118     case InstructionOp::sh:
    119     case InstructionOp::sw:
    120       result = Compile_Store(instruction, info);
    121       break;
    122 
    123     case InstructionOp::j:
    124     case InstructionOp::jal:
    125     case InstructionOp::b:
    126     case InstructionOp::beq:
    127     case InstructionOp::bne:
    128     case InstructionOp::bgtz:
    129     case InstructionOp::blez:
    130       result = Compile_Branch(instruction, info);
    131       break;
    132 
    133     case InstructionOp::addi:
    134     case InstructionOp::addiu:
    135       result = Compile_Add(instruction, info);
    136       break;
    137 
    138     case InstructionOp::slti:
    139     case InstructionOp::sltiu:
    140       result = Compile_SetLess(instruction, info);
    141       break;
    142 
    143     case InstructionOp::lui:
    144       result = Compile_lui(instruction, info);
    145       break;
    146 
    147     case InstructionOp::cop0:
    148       result = Compile_cop0(instruction, info);
    149       break;
    150 
    151     case InstructionOp::cop2:
    152     case InstructionOp::lwc2:
    153     case InstructionOp::swc2:
    154       result = Compile_cop2(instruction, info);
    155       break;
    156 
    157     case InstructionOp::funct:
    158     {
    159       switch (instruction.r.funct)
    160       {
    161         case InstructionFunct::and_:
    162         case InstructionFunct::or_:
    163         case InstructionFunct::xor_:
    164         case InstructionFunct::nor:
    165           result = Compile_Bitwise(instruction, info);
    166           break;
    167 
    168         case InstructionFunct::sll:
    169         case InstructionFunct::srl:
    170         case InstructionFunct::sra:
    171         case InstructionFunct::sllv:
    172         case InstructionFunct::srlv:
    173         case InstructionFunct::srav:
    174           result = Compile_Shift(instruction, info);
    175           break;
    176 
    177         case InstructionFunct::mfhi:
    178         case InstructionFunct::mflo:
    179         case InstructionFunct::mthi:
    180         case InstructionFunct::mtlo:
    181           result = Compile_MoveHiLo(instruction, info);
    182           break;
    183 
    184         case InstructionFunct::add:
    185         case InstructionFunct::addu:
    186           result = Compile_Add(instruction, info);
    187           break;
    188 
    189         case InstructionFunct::sub:
    190         case InstructionFunct::subu:
    191           result = Compile_Subtract(instruction, info);
    192           break;
    193 
    194         case InstructionFunct::mult:
    195         case InstructionFunct::multu:
    196           result = Compile_Multiply(instruction, info);
    197           break;
    198 
    199         case InstructionFunct::div:
    200           result = Compile_SignedDivide(instruction, info);
    201           break;
    202 
    203         case InstructionFunct::divu:
    204           result = Compile_Divide(instruction, info);
    205           break;
    206 
    207         case InstructionFunct::slt:
    208         case InstructionFunct::sltu:
    209           result = Compile_SetLess(instruction, info);
    210           break;
    211 
    212         case InstructionFunct::jr:
    213         case InstructionFunct::jalr:
    214         case InstructionFunct::syscall:
    215         case InstructionFunct::break_:
    216           result = Compile_Branch(instruction, info);
    217           break;
    218 
    219         default:
    220           result = Compile_Fallback(instruction, info);
    221           break;
    222       }
    223     }
    224     break;
    225 #endif
    226 
    227     default:
    228       result = Compile_Fallback(instruction, info);
    229       break;
    230   }
    231 
    232   return result;
    233 }
    234 
    235 Value CodeGenerator::ConvertValueSize(const Value& value, RegSize size, bool sign_extend)
    236 {
    237   DebugAssert(value.size != size);
    238 
    239   if (value.IsConstant())
    240   {
    241     // compile-time conversion, woo!
    242     switch (size)
    243     {
    244       case RegSize_8:
    245         return Value::FromConstantU8(value.constant_value & 0xFF);
    246 
    247       case RegSize_16:
    248       {
    249         switch (value.size)
    250         {
    251           case RegSize_8:
    252             return Value::FromConstantU16(sign_extend ? SignExtend16(Truncate8(value.constant_value)) :
    253                                                         ZeroExtend16(Truncate8(value.constant_value)));
    254 
    255           default:
    256             return Value::FromConstantU16(value.constant_value & 0xFFFF);
    257         }
    258       }
    259       break;
    260 
    261       case RegSize_32:
    262       {
    263         switch (value.size)
    264         {
    265           case RegSize_8:
    266             return Value::FromConstantU32(sign_extend ? SignExtend32(Truncate8(value.constant_value)) :
    267                                                         ZeroExtend32(Truncate8(value.constant_value)));
    268           case RegSize_16:
    269             return Value::FromConstantU32(sign_extend ? SignExtend32(Truncate16(value.constant_value)) :
    270                                                         ZeroExtend32(Truncate16(value.constant_value)));
    271 
    272           case RegSize_32:
    273             return value;
    274 
    275           default:
    276             break;
    277         }
    278       }
    279       break;
    280 
    281       default:
    282         break;
    283     }
    284 
    285     UnreachableCode();
    286   }
    287 
    288   Value new_value = m_register_cache.AllocateScratch(size);
    289   if (size < value.size)
    290   {
    291     EmitCopyValue(new_value.host_reg, value);
    292   }
    293   else
    294   {
    295     if (sign_extend)
    296       EmitSignExtend(new_value.host_reg, size, value.host_reg, value.size);
    297     else
    298       EmitZeroExtend(new_value.host_reg, size, value.host_reg, value.size);
    299   }
    300 
    301   return new_value;
    302 }
    303 
    304 void CodeGenerator::ConvertValueSizeInPlace(Value* value, RegSize size, bool sign_extend)
    305 {
    306   DebugAssert(value->size != size);
    307 
    308   // We don't want to mess up the register cache value, so generate a new value if it's not scratch.
    309   if (value->IsConstant() || !value->IsScratch())
    310   {
    311     *value = ConvertValueSize(*value, size, sign_extend);
    312     return;
    313   }
    314 
    315   DebugAssert(value->IsInHostRegister() && value->IsScratch());
    316 
    317   // If the size is smaller and the value is in a register, we can just "view" the lower part.
    318   if (size < value->size)
    319   {
    320     value->size = size;
    321   }
    322   else
    323   {
    324     if (sign_extend)
    325       EmitSignExtend(value->host_reg, size, value->host_reg, value->size);
    326     else
    327       EmitZeroExtend(value->host_reg, size, value->host_reg, value->size);
    328   }
    329 
    330   value->size = size;
    331 }
    332 
    333 void* CodeGenerator::GetCurrentCodePointer() const
    334 {
    335   if (m_emit == &m_near_emitter)
    336     return GetCurrentNearCodePointer();
    337   else if (m_emit == &m_far_emitter)
    338     return GetCurrentFarCodePointer();
    339 
    340   Panic("unknown emitter");
    341 }
    342 
    343 Value CodeGenerator::AddValues(const Value& lhs, const Value& rhs, bool set_flags)
    344 {
    345   DebugAssert(lhs.size == rhs.size);
    346   if (lhs.IsConstant() && rhs.IsConstant() && !set_flags)
    347   {
    348     // compile-time
    349     u64 new_cv = lhs.constant_value + rhs.constant_value;
    350     switch (lhs.size)
    351     {
    352       case RegSize_8:
    353         return Value::FromConstantU8(Truncate8(new_cv));
    354 
    355       case RegSize_16:
    356         return Value::FromConstantU16(Truncate16(new_cv));
    357 
    358       case RegSize_32:
    359         return Value::FromConstantU32(Truncate32(new_cv));
    360 
    361       case RegSize_64:
    362         return Value::FromConstantU64(new_cv);
    363 
    364       default:
    365         return Value();
    366     }
    367   }
    368 
    369   Value res = m_register_cache.AllocateScratch(lhs.size);
    370   if (lhs.HasConstantValue(0) && !set_flags)
    371   {
    372     EmitCopyValue(res.host_reg, rhs);
    373     return res;
    374   }
    375   else if (rhs.HasConstantValue(0) && !set_flags)
    376   {
    377     EmitCopyValue(res.host_reg, lhs);
    378     return res;
    379   }
    380   else
    381   {
    382     if (lhs.IsInHostRegister())
    383     {
    384       EmitAdd(res.host_reg, lhs.host_reg, rhs, set_flags);
    385     }
    386     else
    387     {
    388       EmitCopyValue(res.host_reg, lhs);
    389       EmitAdd(res.host_reg, res.host_reg, rhs, set_flags);
    390     }
    391     return res;
    392   }
    393 }
    394 
    395 Value CodeGenerator::SubValues(const Value& lhs, const Value& rhs, bool set_flags)
    396 {
    397   DebugAssert(lhs.size == rhs.size);
    398   if (lhs.IsConstant() && rhs.IsConstant() && !set_flags)
    399   {
    400     // compile-time
    401     u64 new_cv = lhs.constant_value - rhs.constant_value;
    402     switch (lhs.size)
    403     {
    404       case RegSize_8:
    405         return Value::FromConstantU8(Truncate8(new_cv));
    406 
    407       case RegSize_16:
    408         return Value::FromConstantU16(Truncate16(new_cv));
    409 
    410       case RegSize_32:
    411         return Value::FromConstantU32(Truncate32(new_cv));
    412 
    413       case RegSize_64:
    414         return Value::FromConstantU64(new_cv);
    415 
    416       default:
    417         return Value();
    418     }
    419   }
    420 
    421   Value res = m_register_cache.AllocateScratch(lhs.size);
    422   if (rhs.HasConstantValue(0) && !set_flags)
    423   {
    424     EmitCopyValue(res.host_reg, lhs);
    425     return res;
    426   }
    427   else
    428   {
    429     if (lhs.IsInHostRegister())
    430     {
    431       EmitSub(res.host_reg, lhs.host_reg, rhs, set_flags);
    432     }
    433     else
    434     {
    435       EmitCopyValue(res.host_reg, lhs);
    436       EmitSub(res.host_reg, res.host_reg, rhs, set_flags);
    437     }
    438 
    439     return res;
    440   }
    441 }
    442 
    443 std::pair<Value, Value> CodeGenerator::MulValues(const Value& lhs, const Value& rhs, bool signed_multiply)
    444 {
    445   DebugAssert(lhs.size == rhs.size);
    446   if (lhs.IsConstant() && rhs.IsConstant())
    447   {
    448     // compile-time
    449     switch (lhs.size)
    450     {
    451       case RegSize_8:
    452       {
    453         u16 res;
    454         if (signed_multiply)
    455           res = u16(s16(s8(lhs.constant_value)) * s16(s8(rhs.constant_value)));
    456         else
    457           res = u16(u8(lhs.constant_value)) * u16(u8(rhs.constant_value));
    458 
    459         return std::make_pair(Value::FromConstantU8(Truncate8(res >> 8)), Value::FromConstantU8(Truncate8(res)));
    460       }
    461 
    462       case RegSize_16:
    463       {
    464         u32 res;
    465         if (signed_multiply)
    466           res = u32(s32(s16(lhs.constant_value)) * s32(s16(rhs.constant_value)));
    467         else
    468           res = u32(u16(lhs.constant_value)) * u32(u16(rhs.constant_value));
    469 
    470         return std::make_pair(Value::FromConstantU16(Truncate16(res >> 16)), Value::FromConstantU16(Truncate16(res)));
    471       }
    472 
    473       case RegSize_32:
    474       {
    475         u64 res;
    476         if (signed_multiply)
    477           res = u64(s64(s32(lhs.constant_value)) * s64(s32(rhs.constant_value)));
    478         else
    479           res = u64(u32(lhs.constant_value)) * u64(u32(rhs.constant_value));
    480 
    481         return std::make_pair(Value::FromConstantU32(Truncate32(res >> 32)), Value::FromConstantU32(Truncate32(res)));
    482       }
    483       break;
    484 
    485       case RegSize_64:
    486       {
    487         u64 res;
    488         if (signed_multiply)
    489           res = u64(s64(lhs.constant_value) * s64(rhs.constant_value));
    490         else
    491           res = lhs.constant_value * rhs.constant_value;
    492 
    493         // TODO: 128-bit multiply...
    494         Panic("128-bit multiply");
    495         return std::make_pair(Value::FromConstantU64(0), Value::FromConstantU64(res));
    496       }
    497 
    498       default:
    499         return std::make_pair(Value::FromConstantU64(0), Value::FromConstantU64(0));
    500     }
    501   }
    502 
    503   // We need two registers for both components.
    504   Value hi = m_register_cache.AllocateScratch(lhs.size);
    505   Value lo = m_register_cache.AllocateScratch(lhs.size);
    506   EmitMul(hi.host_reg, lo.host_reg, lhs, rhs, signed_multiply);
    507   return std::make_pair(std::move(hi), std::move(lo));
    508 }
    509 
    510 Value CodeGenerator::ShlValues(const Value& lhs, const Value& rhs, bool assume_amount_masked /* = true */)
    511 {
    512   DebugAssert(lhs.size == rhs.size);
    513   if (lhs.IsConstant() && rhs.IsConstant())
    514   {
    515     // compile-time
    516     u64 new_cv = lhs.constant_value << (rhs.constant_value & 0x1F);
    517     switch (lhs.size)
    518     {
    519       case RegSize_8:
    520         return Value::FromConstantU8(Truncate8(new_cv));
    521 
    522       case RegSize_16:
    523         return Value::FromConstantU16(Truncate16(new_cv));
    524 
    525       case RegSize_32:
    526         return Value::FromConstantU32(Truncate32(new_cv));
    527 
    528       case RegSize_64:
    529         return Value::FromConstantU64(new_cv);
    530 
    531       default:
    532         return Value();
    533     }
    534   }
    535 
    536   Value res = m_register_cache.AllocateScratch(lhs.size);
    537   if (rhs.HasConstantValue(0))
    538   {
    539     EmitCopyValue(res.host_reg, lhs);
    540   }
    541   else
    542   {
    543     if (lhs.IsInHostRegister())
    544     {
    545       EmitShl(res.host_reg, lhs.host_reg, res.size, rhs, assume_amount_masked);
    546     }
    547     else
    548     {
    549       EmitCopyValue(res.host_reg, lhs);
    550       EmitShl(res.host_reg, res.host_reg, res.size, rhs, assume_amount_masked);
    551     }
    552   }
    553   return res;
    554 }
    555 
    556 Value CodeGenerator::ShrValues(const Value& lhs, const Value& rhs, bool assume_amount_masked /* = true */)
    557 {
    558   DebugAssert(lhs.size == rhs.size);
    559   if (lhs.IsConstant() && rhs.IsConstant())
    560   {
    561     // compile-time
    562     u64 new_cv = lhs.constant_value >> (rhs.constant_value & 0x1F);
    563     switch (lhs.size)
    564     {
    565       case RegSize_8:
    566         return Value::FromConstantU8(Truncate8(new_cv));
    567 
    568       case RegSize_16:
    569         return Value::FromConstantU16(Truncate16(new_cv));
    570 
    571       case RegSize_32:
    572         return Value::FromConstantU32(Truncate32(new_cv));
    573 
    574       case RegSize_64:
    575         return Value::FromConstantU64(new_cv);
    576 
    577       default:
    578         return Value();
    579     }
    580   }
    581 
    582   Value res = m_register_cache.AllocateScratch(lhs.size);
    583   if (rhs.HasConstantValue(0))
    584   {
    585     EmitCopyValue(res.host_reg, lhs);
    586   }
    587   else
    588   {
    589     if (lhs.IsInHostRegister())
    590     {
    591       EmitShr(res.host_reg, lhs.host_reg, res.size, rhs, assume_amount_masked);
    592     }
    593     else
    594     {
    595       EmitCopyValue(res.host_reg, lhs);
    596       EmitShr(res.host_reg, res.host_reg, res.size, rhs, assume_amount_masked);
    597     }
    598   }
    599   return res;
    600 }
    601 
    602 Value CodeGenerator::SarValues(const Value& lhs, const Value& rhs, bool assume_amount_masked /* = true */)
    603 {
    604   DebugAssert(lhs.size == rhs.size);
    605   if (lhs.IsConstant() && rhs.IsConstant())
    606   {
    607     // compile-time
    608     switch (lhs.size)
    609     {
    610       case RegSize_8:
    611         return Value::FromConstantU8(
    612           static_cast<u8>(static_cast<s8>(Truncate8(lhs.constant_value)) >> (rhs.constant_value & 0x1F)));
    613 
    614       case RegSize_16:
    615         return Value::FromConstantU16(
    616           static_cast<u16>(static_cast<s16>(Truncate16(lhs.constant_value)) >> (rhs.constant_value & 0x1F)));
    617 
    618       case RegSize_32:
    619         return Value::FromConstantU32(
    620           static_cast<u32>(static_cast<s32>(Truncate32(lhs.constant_value)) >> (rhs.constant_value & 0x1F)));
    621 
    622       case RegSize_64:
    623         return Value::FromConstantU64(
    624           static_cast<u64>(static_cast<s64>(lhs.constant_value) >> (rhs.constant_value & 0x3F)));
    625 
    626       default:
    627         return Value();
    628     }
    629   }
    630 
    631   Value res = m_register_cache.AllocateScratch(lhs.size);
    632   if (rhs.HasConstantValue(0))
    633   {
    634     EmitCopyValue(res.host_reg, lhs);
    635   }
    636   else
    637   {
    638     if (lhs.IsInHostRegister())
    639     {
    640       EmitSar(res.host_reg, lhs.host_reg, res.size, rhs, assume_amount_masked);
    641     }
    642     else
    643     {
    644       EmitCopyValue(res.host_reg, lhs);
    645       EmitSar(res.host_reg, res.host_reg, res.size, rhs, assume_amount_masked);
    646     }
    647   }
    648   return res;
    649 }
    650 
    651 Value CodeGenerator::OrValues(const Value& lhs, const Value& rhs)
    652 {
    653   DebugAssert(lhs.size == rhs.size);
    654   if (lhs.IsConstant() && rhs.IsConstant())
    655   {
    656     // compile-time
    657     u64 new_cv = lhs.constant_value | rhs.constant_value;
    658     switch (lhs.size)
    659     {
    660       case RegSize_8:
    661         return Value::FromConstantU8(Truncate8(new_cv));
    662 
    663       case RegSize_16:
    664         return Value::FromConstantU16(Truncate16(new_cv));
    665 
    666       case RegSize_32:
    667         return Value::FromConstantU32(Truncate32(new_cv));
    668 
    669       case RegSize_64:
    670         return Value::FromConstantU64(new_cv);
    671 
    672       default:
    673         return Value();
    674     }
    675   }
    676 
    677   Value res = m_register_cache.AllocateScratch(lhs.size);
    678   if (lhs.HasConstantValue(0))
    679   {
    680     EmitCopyValue(res.host_reg, rhs);
    681     return res;
    682   }
    683   else if (rhs.HasConstantValue(0))
    684   {
    685     EmitCopyValue(res.host_reg, lhs);
    686     return res;
    687   }
    688 
    689   if (lhs.IsInHostRegister())
    690   {
    691     EmitOr(res.host_reg, lhs.host_reg, rhs);
    692   }
    693   else
    694   {
    695     EmitCopyValue(res.host_reg, lhs);
    696     EmitOr(res.host_reg, res.host_reg, rhs);
    697   }
    698   return res;
    699 }
    700 
    701 void CodeGenerator::OrValueInPlace(Value& lhs, const Value& rhs)
    702 {
    703   DebugAssert(lhs.size == rhs.size);
    704   if (lhs.IsConstant() && rhs.IsConstant())
    705   {
    706     // compile-time
    707     u64 new_cv = lhs.constant_value | rhs.constant_value;
    708     switch (lhs.size)
    709     {
    710       case RegSize_8:
    711         lhs = Value::FromConstantU8(Truncate8(new_cv));
    712         break;
    713 
    714       case RegSize_16:
    715         lhs = Value::FromConstantU16(Truncate16(new_cv));
    716         break;
    717 
    718       case RegSize_32:
    719         lhs = Value::FromConstantU32(Truncate32(new_cv));
    720         break;
    721 
    722       case RegSize_64:
    723         lhs = Value::FromConstantU64(new_cv);
    724         break;
    725 
    726       default:
    727         lhs = Value();
    728         break;
    729     }
    730   }
    731 
    732   // unlikely
    733   if (rhs.HasConstantValue(0))
    734     return;
    735 
    736   if (lhs.IsInHostRegister())
    737   {
    738     EmitOr(lhs.host_reg, lhs.host_reg, rhs);
    739   }
    740   else
    741   {
    742     Value new_lhs = m_register_cache.AllocateScratch(lhs.size);
    743     EmitCopyValue(new_lhs.host_reg, lhs);
    744     EmitOr(new_lhs.host_reg, new_lhs.host_reg, rhs);
    745     lhs = std::move(new_lhs);
    746   }
    747 }
    748 
    749 Value CodeGenerator::AndValues(const Value& lhs, const Value& rhs)
    750 {
    751   DebugAssert(lhs.size == rhs.size);
    752   if (lhs.IsConstant() && rhs.IsConstant())
    753   {
    754     // compile-time
    755     u64 new_cv = lhs.constant_value & rhs.constant_value;
    756     switch (lhs.size)
    757     {
    758       case RegSize_8:
    759         return Value::FromConstantU8(Truncate8(new_cv));
    760 
    761       case RegSize_16:
    762         return Value::FromConstantU16(Truncate16(new_cv));
    763 
    764       case RegSize_32:
    765         return Value::FromConstantU32(Truncate32(new_cv));
    766 
    767       case RegSize_64:
    768         return Value::FromConstantU64(new_cv);
    769 
    770       default:
    771         return Value();
    772     }
    773   }
    774 
    775   // TODO: and with -1 -> noop
    776   Value res = m_register_cache.AllocateScratch(lhs.size);
    777   if (lhs.HasConstantValue(0) || rhs.HasConstantValue(0))
    778   {
    779     EmitXor(res.host_reg, res.host_reg, res);
    780     return res;
    781   }
    782 
    783   if (lhs.IsInHostRegister())
    784   {
    785     EmitAnd(res.host_reg, lhs.host_reg, rhs);
    786   }
    787   else
    788   {
    789     EmitCopyValue(res.host_reg, lhs);
    790     EmitAnd(res.host_reg, res.host_reg, rhs);
    791   }
    792   return res;
    793 }
    794 
    795 void CodeGenerator::AndValueInPlace(Value& lhs, const Value& rhs)
    796 {
    797   DebugAssert(lhs.size == rhs.size);
    798   if (lhs.IsConstant() && rhs.IsConstant())
    799   {
    800     // compile-time
    801     u64 new_cv = lhs.constant_value & rhs.constant_value;
    802     switch (lhs.size)
    803     {
    804       case RegSize_8:
    805         lhs = Value::FromConstantU8(Truncate8(new_cv));
    806         break;
    807 
    808       case RegSize_16:
    809         lhs = Value::FromConstantU16(Truncate16(new_cv));
    810         break;
    811 
    812       case RegSize_32:
    813         lhs = Value::FromConstantU32(Truncate32(new_cv));
    814         break;
    815 
    816       case RegSize_64:
    817         lhs = Value::FromConstantU64(new_cv);
    818         break;
    819 
    820       default:
    821         lhs = Value();
    822         break;
    823     }
    824   }
    825 
    826   // TODO: and with -1 -> noop
    827   if (lhs.HasConstantValue(0) || rhs.HasConstantValue(0))
    828   {
    829     EmitXor(lhs.host_reg, lhs.host_reg, lhs);
    830     return;
    831   }
    832 
    833   if (lhs.IsInHostRegister())
    834   {
    835     EmitAnd(lhs.host_reg, lhs.host_reg, rhs);
    836   }
    837   else
    838   {
    839     Value new_lhs = m_register_cache.AllocateScratch(lhs.size);
    840     EmitCopyValue(new_lhs.host_reg, lhs);
    841     EmitAnd(new_lhs.host_reg, new_lhs.host_reg, rhs);
    842     lhs = std::move(new_lhs);
    843   }
    844 }
    845 
    846 Value CodeGenerator::XorValues(const Value& lhs, const Value& rhs)
    847 {
    848   DebugAssert(lhs.size == rhs.size);
    849   if (lhs.IsConstant() && rhs.IsConstant())
    850   {
    851     // compile-time
    852     u64 new_cv = lhs.constant_value ^ rhs.constant_value;
    853     switch (lhs.size)
    854     {
    855       case RegSize_8:
    856         return Value::FromConstantU8(Truncate8(new_cv));
    857 
    858       case RegSize_16:
    859         return Value::FromConstantU16(Truncate16(new_cv));
    860 
    861       case RegSize_32:
    862         return Value::FromConstantU32(Truncate32(new_cv));
    863 
    864       case RegSize_64:
    865         return Value::FromConstantU64(new_cv);
    866 
    867       default:
    868         return Value();
    869     }
    870   }
    871 
    872   Value res = m_register_cache.AllocateScratch(lhs.size);
    873   EmitCopyValue(res.host_reg, lhs);
    874   if (lhs.HasConstantValue(0))
    875   {
    876     EmitCopyValue(res.host_reg, rhs);
    877     return res;
    878   }
    879   else if (rhs.HasConstantValue(0))
    880   {
    881     EmitCopyValue(res.host_reg, lhs);
    882     return res;
    883   }
    884 
    885   if (lhs.IsInHostRegister())
    886   {
    887     EmitXor(res.host_reg, lhs.host_reg, rhs);
    888   }
    889   else
    890   {
    891     EmitCopyValue(res.host_reg, lhs);
    892     EmitXor(res.host_reg, res.host_reg, rhs);
    893   }
    894 
    895   return res;
    896 }
    897 
    898 Value CodeGenerator::NotValue(const Value& val)
    899 {
    900   if (val.IsConstant())
    901   {
    902     u64 new_cv = ~val.constant_value;
    903     switch (val.size)
    904     {
    905       case RegSize_8:
    906         return Value::FromConstantU8(Truncate8(new_cv));
    907 
    908       case RegSize_16:
    909         return Value::FromConstantU16(Truncate16(new_cv));
    910 
    911       case RegSize_32:
    912         return Value::FromConstantU32(Truncate32(new_cv));
    913 
    914       case RegSize_64:
    915         return Value::FromConstantU64(new_cv);
    916 
    917       default:
    918         return Value();
    919     }
    920   }
    921 
    922   // TODO: Don't allocate scratch if the lhs is a scratch?
    923   Value res = m_register_cache.AllocateScratch(RegSize_32);
    924   EmitCopyValue(res.host_reg, val);
    925   EmitNot(res.host_reg, val.size);
    926   return res;
    927 }
    928 
    929 const TickCount* CodeGenerator::GetFetchMemoryAccessTimePtr() const
    930 {
    931   const TickCount* ptr =
    932     Bus::GetMemoryAccessTimePtr(m_block->pc & PHYSICAL_MEMORY_ADDRESS_MASK, MemoryAccessSize::Word);
    933   AssertMsg(ptr, "Address has dynamic fetch ticks");
    934   return ptr;
    935 }
    936 
    937 void CodeGenerator::GenerateExceptionExit(Instruction instruction, const CodeCache::InstructionInfo& info,
    938                                           Exception excode, Condition condition /* = Condition::Always */)
    939 {
    940   const Value CAUSE_bits = Value::FromConstantU32(
    941     Cop0Registers::CAUSE::MakeValueForException(excode, info.is_branch_delay_slot, false, instruction.cop.cop_n));
    942 
    943   if (condition == Condition::Always)
    944   {
    945     // no need to use far code if we're always raising the exception
    946     m_register_cache.FlushAllGuestRegisters(true, true);
    947     m_register_cache.FlushLoadDelay(true);
    948 
    949     if (excode == Exception::BP)
    950     {
    951       EmitFunctionCall(nullptr, static_cast<void (*)(u32, u32, u32)>(&CPU::RaiseBreakException), CAUSE_bits,
    952                        GetCurrentInstructionPC(), Value::FromConstantU32(instruction.bits));
    953     }
    954     else
    955     {
    956       EmitFunctionCall(nullptr, static_cast<void (*)(u32, u32)>(&CPU::RaiseException), CAUSE_bits,
    957                        GetCurrentInstructionPC());
    958     }
    959 
    960     return;
    961   }
    962 
    963   LabelType skip_exception;
    964   EmitConditionalBranch(condition, true, &skip_exception);
    965 
    966   m_register_cache.PushState();
    967 
    968   EmitBranch(GetCurrentFarCodePointer());
    969 
    970   SwitchToFarCode();
    971   EmitFunctionCall(nullptr, static_cast<void (*)(u32, u32)>(&CPU::RaiseException), CAUSE_bits,
    972                    GetCurrentInstructionPC());
    973   EmitExceptionExit();
    974   SwitchToNearCode();
    975 
    976   m_register_cache.PopState();
    977 
    978   EmitBindLabel(&skip_exception);
    979 }
    980 
    981 void CodeGenerator::BlockPrologue()
    982 {
    983 #if 0
    984   EmitFunctionCall(nullptr, &CodeCache::LogCurrentState);
    985 #endif
    986 
    987   InitSpeculativeRegs();
    988 
    989   if (m_block->protection == CodeCache::PageProtectionMode::ManualCheck)
    990   {
    991     DEBUG_LOG("Generate manual protection for PC {:08X}", m_block->pc);
    992     const u8* ram_ptr = Bus::g_ram + VirtualAddressToPhysical(m_block->pc);
    993     const u8* shadow_ptr = reinterpret_cast<const u8*>(m_block->Instructions());
    994     EmitBlockProtectCheck(ram_ptr, shadow_ptr, m_block->size * sizeof(Instruction));
    995   }
    996 
    997   EmitStoreCPUStructField(OFFSETOF(State, exception_raised), Value::FromConstantU8(0));
    998 
    999   if (g_settings.bios_tty_logging)
   1000   {
   1001     if (m_pc == 0xa0)
   1002       EmitFunctionCall(nullptr, &CPU::HandleA0Syscall);
   1003     else if (m_pc == 0xb0)
   1004       EmitFunctionCall(nullptr, &CPU::HandleB0Syscall);
   1005   }
   1006 
   1007   EmitICacheCheckAndUpdate();
   1008 
   1009   // we don't know the state of the last block, so assume load delays might be in progress
   1010   // TODO: Pull load delay into register cache
   1011   m_current_instruction_in_branch_delay_slot_dirty = g_settings.cpu_recompiler_memory_exceptions;
   1012   m_branch_was_taken_dirty = g_settings.cpu_recompiler_memory_exceptions;
   1013   m_current_instruction_was_branch_taken_dirty = false;
   1014   m_load_delay_dirty = true;
   1015   m_gte_busy_cycles_dirty = true;
   1016 }
   1017 
   1018 void CodeGenerator::BlockEpilogue()
   1019 {
   1020 #if defined(_DEBUG) && defined(CPU_ARCH_X64)
   1021   m_emit->nop();
   1022 #endif
   1023 
   1024   m_register_cache.FlushAllGuestRegisters(true, true);
   1025   if (m_register_cache.HasLoadDelay())
   1026     m_register_cache.WriteLoadDelayToCPU(true);
   1027 
   1028   AddPendingCycles(true);
   1029 }
   1030 
   1031 void CodeGenerator::InstructionPrologue(Instruction instruction, const CodeCache::InstructionInfo& info,
   1032                                         TickCount cycles, bool force_sync /* = false */)
   1033 {
   1034 #if defined(_DEBUG) && defined(CPU_ARCH_X64)
   1035   m_emit->nop();
   1036 #endif
   1037 
   1038   // move instruction offsets forward
   1039   if (m_pc_valid)
   1040     m_pc += 4;
   1041 
   1042   // reset dirty flags
   1043   if (m_branch_was_taken_dirty)
   1044   {
   1045     Value temp = m_register_cache.AllocateScratch(RegSize_8);
   1046     EmitLoadCPUStructField(temp.host_reg, RegSize_8, OFFSETOF(State, branch_was_taken));
   1047     EmitStoreCPUStructField(OFFSETOF(State, current_instruction_was_branch_taken), temp);
   1048     EmitStoreCPUStructField(OFFSETOF(State, branch_was_taken), Value::FromConstantU8(0));
   1049     m_current_instruction_was_branch_taken_dirty = true;
   1050     m_branch_was_taken_dirty = false;
   1051   }
   1052   else if (m_current_instruction_was_branch_taken_dirty)
   1053   {
   1054     EmitStoreCPUStructField(OFFSETOF(State, current_instruction_was_branch_taken), Value::FromConstantU8(0));
   1055     m_current_instruction_was_branch_taken_dirty = false;
   1056   }
   1057 
   1058   if (m_current_instruction_in_branch_delay_slot_dirty && !info.is_branch_delay_slot)
   1059   {
   1060     EmitStoreCPUStructField(OFFSETOF(State, current_instruction_in_branch_delay_slot), Value::FromConstantU8(0));
   1061     m_current_instruction_in_branch_delay_slot_dirty = false;
   1062   }
   1063 
   1064   if (!force_sync)
   1065   {
   1066     // Defer updates for non-faulting instructions.
   1067     m_delayed_cycles_add += cycles;
   1068     return;
   1069   }
   1070 
   1071   if (info.is_branch_delay_slot && g_settings.cpu_recompiler_memory_exceptions)
   1072   {
   1073     // m_current_instruction_in_branch_delay_slot = true
   1074     EmitStoreCPUStructField(OFFSETOF(State, current_instruction_in_branch_delay_slot), Value::FromConstantU8(1));
   1075     m_current_instruction_in_branch_delay_slot_dirty = true;
   1076   }
   1077 
   1078   m_delayed_cycles_add += cycles;
   1079   AddPendingCycles(true);
   1080 }
   1081 
   1082 void CodeGenerator::InstructionEpilogue(Instruction instruction, const CodeCache::InstructionInfo& info)
   1083 {
   1084   m_register_cache.UpdateLoadDelay();
   1085 
   1086   if (m_load_delay_dirty)
   1087   {
   1088     // we have to invalidate the register cache, since the load delayed register might've been cached
   1089     DEBUG_LOG("Emitting delay slot flush");
   1090     EmitFlushInterpreterLoadDelay();
   1091     m_register_cache.InvalidateAllNonDirtyGuestRegisters();
   1092     m_load_delay_dirty = false;
   1093   }
   1094 
   1095   // copy if the previous instruction was a load, reset the current value on the next instruction
   1096   if (m_next_load_delay_dirty)
   1097   {
   1098     DEBUG_LOG("Emitting delay slot flush (with move next)");
   1099     EmitMoveNextInterpreterLoadDelay();
   1100     m_next_load_delay_dirty = false;
   1101     m_load_delay_dirty = true;
   1102   }
   1103 }
   1104 
   1105 void CodeGenerator::TruncateBlockAtCurrentInstruction()
   1106 {
   1107   DEV_LOG("Truncating block {:08X} at {:08X}", m_block->pc, m_current_instruction.info->pc);
   1108   m_block_end.instruction = m_current_instruction.instruction + 1;
   1109   m_block_end.info = m_current_instruction.info + 1;
   1110   WriteNewPC(CalculatePC(), true);
   1111 }
   1112 
   1113 void CodeGenerator::AddPendingCycles(bool commit)
   1114 {
   1115   if (m_delayed_cycles_add == 0 && m_gte_done_cycle <= m_delayed_cycles_add)
   1116     return;
   1117 
   1118   if (m_gte_done_cycle > m_delayed_cycles_add)
   1119   {
   1120     Value temp = m_register_cache.AllocateScratch(RegSize_32);
   1121     EmitLoadCPUStructField(temp.GetHostRegister(), RegSize_32, OFFSETOF(State, pending_ticks));
   1122     if (m_delayed_cycles_add > 0)
   1123     {
   1124       EmitAdd(temp.GetHostRegister(), temp.GetHostRegister(), Value::FromConstantU32(m_delayed_cycles_add), false);
   1125       EmitStoreCPUStructField(OFFSETOF(State, pending_ticks), temp);
   1126       EmitAdd(temp.GetHostRegister(), temp.GetHostRegister(),
   1127               Value::FromConstantU32(m_gte_done_cycle - m_delayed_cycles_add), false);
   1128       EmitStoreCPUStructField(OFFSETOF(State, gte_completion_tick), temp);
   1129     }
   1130     else
   1131     {
   1132       EmitAdd(temp.GetHostRegister(), temp.GetHostRegister(), Value::FromConstantU32(m_gte_done_cycle), false);
   1133       EmitStoreCPUStructField(OFFSETOF(State, gte_completion_tick), temp);
   1134     }
   1135   }
   1136   else
   1137   {
   1138     EmitAddCPUStructField(OFFSETOF(State, pending_ticks), Value::FromConstantU32(m_delayed_cycles_add));
   1139   }
   1140 
   1141   if (commit)
   1142   {
   1143     m_gte_done_cycle = std::max<TickCount>(m_gte_done_cycle - m_delayed_cycles_add, 0);
   1144     m_delayed_cycles_add = 0;
   1145   }
   1146 }
   1147 
   1148 void CodeGenerator::AddGTETicks(TickCount ticks)
   1149 {
   1150   m_gte_done_cycle = m_delayed_cycles_add + ticks;
   1151   DEBUG_LOG("Adding {} GTE ticks", ticks);
   1152 }
   1153 
   1154 void CodeGenerator::StallUntilGTEComplete()
   1155 {
   1156   if (!m_gte_busy_cycles_dirty)
   1157   {
   1158     // simple case - in block scheduling
   1159     if (m_gte_done_cycle > m_delayed_cycles_add)
   1160     {
   1161       DEBUG_LOG("Stalling for {} ticks from GTE", m_gte_done_cycle - m_delayed_cycles_add);
   1162       m_delayed_cycles_add += (m_gte_done_cycle - m_delayed_cycles_add);
   1163     }
   1164 
   1165     return;
   1166   }
   1167 
   1168   // switch to in block scheduling
   1169   EmitStallUntilGTEComplete();
   1170   m_gte_done_cycle = 0;
   1171   m_gte_busy_cycles_dirty = false;
   1172 }
   1173 
   1174 Value CodeGenerator::CalculatePC(u32 offset /* = 0 */)
   1175 {
   1176   if (!m_pc_valid)
   1177     Panic("Attempt to get an indeterminate PC");
   1178 
   1179   return Value::FromConstantU32(m_pc + offset);
   1180 }
   1181 
   1182 Value CodeGenerator::GetCurrentInstructionPC(u32 offset /* = 0 */)
   1183 {
   1184   return Value::FromConstantU32(m_current_instruction.info->pc);
   1185 }
   1186 
   1187 void CodeGenerator::WriteNewPC(const Value& value, bool commit)
   1188 {
   1189   // TODO: This _could_ be moved into the register cache, but would it gain anything?
   1190   EmitStoreCPUStructField(OFFSETOF(CPU::State, pc), value);
   1191   if (commit)
   1192   {
   1193     m_pc_valid = value.IsConstant();
   1194     if (m_pc_valid)
   1195       m_pc = static_cast<u32>(value.constant_value);
   1196   }
   1197 }
   1198 
   1199 bool CodeGenerator::Compile_Fallback(Instruction instruction, const CodeCache::InstructionInfo& info)
   1200 {
   1201   WARNING_LOG("Compiling instruction fallback at PC=0x{:08X}, instruction=0x{:08X}", info.pc, instruction.bits);
   1202 
   1203   InstructionPrologue(instruction, info, 1, true);
   1204 
   1205   // flush and invalidate all guest registers, since the fallback could change any of them
   1206   m_register_cache.FlushAllGuestRegisters(true, true);
   1207   if (m_register_cache.HasLoadDelay())
   1208   {
   1209     m_load_delay_dirty = true;
   1210     m_register_cache.WriteLoadDelayToCPU(true);
   1211   }
   1212 
   1213   EmitStoreCPUStructField(OFFSETOF(State, current_instruction_pc), Value::FromConstantU32(info.pc));
   1214   EmitStoreCPUStructField(OFFSETOF(State, current_instruction.bits), Value::FromConstantU32(instruction.bits));
   1215 
   1216   // TODO: Use carry flag or something here too
   1217   Value return_value = m_register_cache.AllocateScratch(RegSize_8);
   1218   EmitFunctionCall(&return_value,
   1219                    g_settings.gpu_pgxp_enable ? &Thunks::InterpretInstructionPGXP : &Thunks::InterpretInstruction);
   1220   EmitExceptionExitOnBool(return_value);
   1221 
   1222   m_current_instruction_in_branch_delay_slot_dirty = info.is_branch_instruction;
   1223   m_branch_was_taken_dirty = info.is_branch_instruction;
   1224   m_next_load_delay_dirty = info.has_load_delay;
   1225   InvalidateSpeculativeValues();
   1226   InstructionEpilogue(instruction, info);
   1227   return true;
   1228 }
   1229 
   1230 bool CodeGenerator::Compile_Bitwise(Instruction instruction, const CodeCache::InstructionInfo& info)
   1231 {
   1232   InstructionPrologue(instruction, info, 1);
   1233 
   1234   Value lhs;
   1235   Value rhs;
   1236   Reg dest;
   1237 
   1238   SpeculativeValue spec_lhs, spec_rhs;
   1239   SpeculativeValue spec_value;
   1240 
   1241   if (instruction.op != InstructionOp::funct)
   1242   {
   1243     // rt <- rs op zext(imm)
   1244     lhs = m_register_cache.ReadGuestRegister(instruction.i.rs);
   1245     rhs = Value::FromConstantU32(instruction.i.imm_zext32());
   1246     dest = instruction.i.rt;
   1247 
   1248     spec_lhs = SpeculativeReadReg(instruction.i.rs);
   1249     spec_rhs = instruction.i.imm_zext32();
   1250   }
   1251   else
   1252   {
   1253     lhs = m_register_cache.ReadGuestRegister(instruction.r.rs);
   1254     rhs = m_register_cache.ReadGuestRegister(instruction.r.rt);
   1255     dest = instruction.r.rd;
   1256 
   1257     spec_lhs = SpeculativeReadReg(instruction.r.rs);
   1258     spec_rhs = SpeculativeReadReg(instruction.r.rt);
   1259   }
   1260 
   1261   Value result;
   1262   switch (instruction.op)
   1263   {
   1264     case InstructionOp::ori:
   1265     {
   1266       if (g_settings.UsingPGXPCPUMode())
   1267         EmitFunctionCall(nullptr, &PGXP::CPU_ORI, Value::FromConstantU32(instruction.bits), lhs);
   1268 
   1269       result = OrValues(lhs, rhs);
   1270       if (spec_lhs && spec_rhs)
   1271         spec_value = *spec_lhs | *spec_rhs;
   1272 
   1273       if (g_settings.gpu_pgxp_enable && !g_settings.gpu_pgxp_cpu && dest != Reg::zero &&
   1274           instruction.i.rs != Reg::zero && dest != instruction.i.rs && rhs.HasConstantValue(0))
   1275       {
   1276         EmitFunctionCall(nullptr, &PGXP::CPU_MOVE_Packed,
   1277                          Value::FromConstantU32(PGXP::PackMoveArgs(dest, instruction.i.rs)), lhs);
   1278       }
   1279     }
   1280     break;
   1281 
   1282     case InstructionOp::andi:
   1283     {
   1284       if (g_settings.UsingPGXPCPUMode())
   1285         EmitFunctionCall(nullptr, &PGXP::CPU_ANDI, Value::FromConstantU32(instruction.bits), lhs);
   1286 
   1287       result = AndValues(lhs, rhs);
   1288       if (spec_lhs && spec_rhs)
   1289         spec_value = *spec_lhs & *spec_rhs;
   1290     }
   1291     break;
   1292 
   1293     case InstructionOp::xori:
   1294     {
   1295       if (g_settings.UsingPGXPCPUMode())
   1296         EmitFunctionCall(nullptr, &PGXP::CPU_XORI, Value::FromConstantU32(instruction.bits), lhs);
   1297 
   1298       result = XorValues(lhs, rhs);
   1299       if (spec_lhs && spec_rhs)
   1300         spec_value = *spec_lhs ^ *spec_rhs;
   1301 
   1302       if (g_settings.gpu_pgxp_enable && !g_settings.gpu_pgxp_cpu && dest != Reg::zero &&
   1303           instruction.i.rs != Reg::zero && dest != instruction.i.rs && rhs.HasConstantValue(0))
   1304       {
   1305         EmitFunctionCall(nullptr, &PGXP::CPU_MOVE_Packed,
   1306                          Value::FromConstantU32(PGXP::PackMoveArgs(dest, instruction.i.rs)), lhs);
   1307       }
   1308     }
   1309     break;
   1310 
   1311     case InstructionOp::funct:
   1312     {
   1313       switch (instruction.r.funct)
   1314       {
   1315         case InstructionFunct::or_:
   1316         {
   1317           if (g_settings.UsingPGXPCPUMode())
   1318             EmitFunctionCall(nullptr, &PGXP::CPU_OR_, Value::FromConstantU32(instruction.bits), lhs, rhs);
   1319 
   1320           result = OrValues(lhs, rhs);
   1321           if (spec_lhs && spec_rhs)
   1322             spec_value = *spec_lhs | *spec_rhs;
   1323 
   1324           if (g_settings.gpu_pgxp_enable && !g_settings.gpu_pgxp_cpu && dest != Reg::zero &&
   1325               ((lhs.HasConstantValue(0) && instruction.r.rt != Reg::zero && dest != instruction.r.rs) ||
   1326                (rhs.HasConstantValue(0) && instruction.r.rs != Reg::zero && dest != instruction.r.rt)))
   1327           {
   1328             const auto rs = lhs.HasConstantValue(0) ? static_cast<CPU::Reg>(instruction.r.rt) :
   1329                                                       static_cast<CPU::Reg>(instruction.r.rs);
   1330 
   1331             EmitFunctionCall(nullptr, &PGXP::CPU_MOVE_Packed, Value::FromConstantU32(PGXP::PackMoveArgs(dest, rs)),
   1332                              lhs.HasConstantValue(0) ? rhs : lhs);
   1333           }
   1334         }
   1335         break;
   1336 
   1337         case InstructionFunct::and_:
   1338         {
   1339           if (g_settings.UsingPGXPCPUMode())
   1340             EmitFunctionCall(nullptr, &PGXP::CPU_AND_, Value::FromConstantU32(instruction.bits), lhs, rhs);
   1341 
   1342           result = AndValues(lhs, rhs);
   1343           if (spec_lhs && spec_rhs)
   1344             spec_value = *spec_lhs & *spec_rhs;
   1345         }
   1346         break;
   1347 
   1348         case InstructionFunct::xor_:
   1349         {
   1350           if (g_settings.UsingPGXPCPUMode())
   1351             EmitFunctionCall(nullptr, &PGXP::CPU_XOR_, Value::FromConstantU32(instruction.bits), lhs, rhs);
   1352 
   1353           result = XorValues(lhs, rhs);
   1354           if (spec_lhs && spec_rhs)
   1355             spec_value = *spec_lhs ^ *spec_rhs;
   1356 
   1357           if (g_settings.gpu_pgxp_enable && !g_settings.gpu_pgxp_cpu && dest != Reg::zero &&
   1358               ((lhs.HasConstantValue(0) && instruction.r.rt != Reg::zero && dest != instruction.r.rs) ||
   1359                (rhs.HasConstantValue(0) && instruction.r.rs != Reg::zero && dest != instruction.r.rt)))
   1360           {
   1361             const auto rs = lhs.HasConstantValue(0) ? static_cast<CPU::Reg>(instruction.r.rt) :
   1362                                                       static_cast<CPU::Reg>(instruction.r.rs);
   1363 
   1364             EmitFunctionCall(nullptr, &PGXP::CPU_MOVE_Packed, Value::FromConstantU32(PGXP::PackMoveArgs(dest, rs)),
   1365                              lhs.HasConstantValue(0) ? rhs : lhs);
   1366           }
   1367         }
   1368         break;
   1369 
   1370         case InstructionFunct::nor:
   1371         {
   1372           if (g_settings.UsingPGXPCPUMode())
   1373             EmitFunctionCall(nullptr, &PGXP::CPU_NOR, Value::FromConstantU32(instruction.bits), lhs, rhs);
   1374 
   1375           result = NotValue(OrValues(lhs, rhs));
   1376           if (spec_lhs && spec_rhs)
   1377             spec_value = ~(*spec_lhs | *spec_rhs);
   1378         }
   1379         break;
   1380 
   1381         default:
   1382           UnreachableCode();
   1383           break;
   1384       }
   1385     }
   1386     break;
   1387 
   1388     default:
   1389       UnreachableCode();
   1390       break;
   1391   }
   1392 
   1393   m_register_cache.WriteGuestRegister(dest, std::move(result));
   1394   SpeculativeWriteReg(dest, spec_value);
   1395 
   1396   InstructionEpilogue(instruction, info);
   1397   return true;
   1398 }
   1399 
   1400 bool CodeGenerator::Compile_Shift(Instruction instruction, const CodeCache::InstructionInfo& info)
   1401 {
   1402   InstructionPrologue(instruction, info, 1);
   1403 
   1404   const InstructionFunct funct = instruction.r.funct;
   1405   Value rt = m_register_cache.ReadGuestRegister(instruction.r.rt);
   1406   SpeculativeValue rt_spec = SpeculativeReadReg(instruction.r.rt);
   1407   Value shamt;
   1408   SpeculativeValue shamt_spec;
   1409   if (funct == InstructionFunct::sll || funct == InstructionFunct::srl || funct == InstructionFunct::sra)
   1410   {
   1411     // rd <- rt op shamt
   1412     shamt = Value::FromConstantU32(instruction.r.shamt);
   1413     shamt_spec = instruction.r.shamt;
   1414   }
   1415   else
   1416   {
   1417     // rd <- rt op (rs & 0x1F)
   1418     shamt = m_register_cache.ReadGuestRegister(instruction.r.rs);
   1419     shamt_spec = SpeculativeReadReg(instruction.r.rs);
   1420   }
   1421 
   1422   Value result;
   1423   SpeculativeValue result_spec;
   1424   switch (instruction.r.funct)
   1425   {
   1426     case InstructionFunct::sll:
   1427     case InstructionFunct::sllv:
   1428     {
   1429       if (g_settings.UsingPGXPCPUMode())
   1430       {
   1431         if (instruction.r.funct == InstructionFunct::sll)
   1432           EmitFunctionCall(nullptr, &PGXP::CPU_SLL, Value::FromConstantU32(instruction.bits), rt);
   1433         else // if (instruction.r.funct == InstructionFunct::sllv)
   1434           EmitFunctionCall(nullptr, &PGXP::CPU_SLLV, Value::FromConstantU32(instruction.bits), rt, shamt);
   1435       }
   1436 
   1437       result = ShlValues(rt, shamt, false);
   1438       if (rt_spec && shamt_spec)
   1439         result_spec = *rt_spec << *shamt_spec;
   1440     }
   1441     break;
   1442 
   1443     case InstructionFunct::srl:
   1444     case InstructionFunct::srlv:
   1445     {
   1446       if (g_settings.UsingPGXPCPUMode())
   1447       {
   1448         if (instruction.r.funct == InstructionFunct::srl)
   1449           EmitFunctionCall(nullptr, &PGXP::CPU_SRL, Value::FromConstantU32(instruction.bits), rt);
   1450         else // if (instruction.r.funct == InstructionFunct::srlv)
   1451           EmitFunctionCall(nullptr, &PGXP::CPU_SRLV, Value::FromConstantU32(instruction.bits), rt, shamt);
   1452       }
   1453 
   1454       result = ShrValues(rt, shamt, false);
   1455       if (rt_spec && shamt_spec)
   1456         result_spec = *rt_spec >> *shamt_spec;
   1457     }
   1458     break;
   1459 
   1460     case InstructionFunct::sra:
   1461     case InstructionFunct::srav:
   1462     {
   1463       if (g_settings.UsingPGXPCPUMode())
   1464       {
   1465         if (instruction.r.funct == InstructionFunct::sra)
   1466           EmitFunctionCall(nullptr, &PGXP::CPU_SRA, Value::FromConstantU32(instruction.bits), rt);
   1467         else // if (instruction.r.funct == InstructionFunct::srav)
   1468           EmitFunctionCall(nullptr, &PGXP::CPU_SRAV, Value::FromConstantU32(instruction.bits), rt, shamt);
   1469       }
   1470 
   1471       result = SarValues(rt, shamt, false);
   1472       if (rt_spec && shamt_spec)
   1473         result_spec = static_cast<u32>(static_cast<s32>(*rt_spec) << *shamt_spec);
   1474     }
   1475     break;
   1476 
   1477     default:
   1478       UnreachableCode();
   1479       break;
   1480   }
   1481 
   1482   m_register_cache.WriteGuestRegister(instruction.r.rd, std::move(result));
   1483   SpeculativeWriteReg(instruction.r.rd, result_spec);
   1484 
   1485   InstructionEpilogue(instruction, info);
   1486   return true;
   1487 }
   1488 
   1489 bool CodeGenerator::Compile_Load(Instruction instruction, const CodeCache::InstructionInfo& info)
   1490 {
   1491   InstructionPrologue(instruction, info, 1);
   1492 
   1493   // rt <- mem[rs + sext(imm)]
   1494   Value base = m_register_cache.ReadGuestRegister(instruction.i.rs);
   1495   Value offset = Value::FromConstantU32(instruction.i.imm_sext32());
   1496   Value address = AddValues(base, offset, false);
   1497 
   1498   SpeculativeValue address_spec = SpeculativeReadReg(instruction.i.rs);
   1499   SpeculativeValue value_spec;
   1500   if (address_spec)
   1501     address_spec = *address_spec + instruction.i.imm_sext32();
   1502 
   1503   Value result;
   1504   switch (instruction.op)
   1505   {
   1506     case InstructionOp::lb:
   1507     case InstructionOp::lbu:
   1508     {
   1509       result = EmitLoadGuestMemory(instruction, info, address, address_spec, RegSize_8);
   1510       ConvertValueSizeInPlace(&result, RegSize_32, (instruction.op == InstructionOp::lb));
   1511       if (g_settings.gpu_pgxp_enable)
   1512         EmitFunctionCall(nullptr, PGXP::CPU_LBx, Value::FromConstantU32(instruction.bits), address, result);
   1513 
   1514       if (address_spec)
   1515       {
   1516         value_spec = SpeculativeReadMemory(*address_spec & ~3u);
   1517         if (value_spec)
   1518           value_spec = (*value_spec >> ((*address_spec & 3u) * 8u)) & 0xFFu;
   1519       }
   1520     }
   1521     break;
   1522 
   1523     case InstructionOp::lh:
   1524     case InstructionOp::lhu:
   1525     {
   1526       result = EmitLoadGuestMemory(instruction, info, address, address_spec, RegSize_16);
   1527       ConvertValueSizeInPlace(&result, RegSize_32, (instruction.op == InstructionOp::lh));
   1528 
   1529       if (g_settings.gpu_pgxp_enable)
   1530       {
   1531         EmitFunctionCall(nullptr, (instruction.op == InstructionOp::lhu) ? &PGXP::CPU_LHU : PGXP::CPU_LH,
   1532                          Value::FromConstantU32(instruction.bits), address, result);
   1533       }
   1534 
   1535       if (address_spec)
   1536       {
   1537         value_spec = SpeculativeReadMemory(*address_spec & ~3u);
   1538         if (value_spec)
   1539           value_spec = (*value_spec >> ((*address_spec & 3u) * 8u)) & 0xFFFFu;
   1540       }
   1541     }
   1542     break;
   1543 
   1544     case InstructionOp::lw:
   1545     {
   1546       result = EmitLoadGuestMemory(instruction, info, address, address_spec, RegSize_32);
   1547       if (g_settings.gpu_pgxp_enable)
   1548         EmitFunctionCall(nullptr, PGXP::CPU_LW, Value::FromConstantU32(instruction.bits), address, result);
   1549 
   1550       if (address_spec)
   1551         value_spec = SpeculativeReadMemory(*address_spec);
   1552     }
   1553     break;
   1554 
   1555     default:
   1556       UnreachableCode();
   1557       break;
   1558   }
   1559 
   1560   m_register_cache.WriteGuestRegisterDelayed(instruction.i.rt, std::move(result));
   1561   SpeculativeWriteReg(instruction.i.rt, value_spec);
   1562 
   1563   InstructionEpilogue(instruction, info);
   1564   return true;
   1565 }
   1566 
   1567 bool CodeGenerator::Compile_Store(Instruction instruction, const CodeCache::InstructionInfo& info)
   1568 {
   1569   InstructionPrologue(instruction, info, 1);
   1570 
   1571   // mem[rs + sext(imm)] <- rt
   1572   Value base = m_register_cache.ReadGuestRegister(instruction.i.rs);
   1573   Value offset = Value::FromConstantU32(instruction.i.imm_sext32());
   1574   Value address = AddValues(base, offset, false);
   1575   Value value = m_register_cache.ReadGuestRegister(instruction.i.rt);
   1576 
   1577   SpeculativeValue address_spec = SpeculativeReadReg(instruction.i.rs);
   1578   SpeculativeValue value_spec = SpeculativeReadReg(instruction.i.rt);
   1579   if (address_spec)
   1580     address_spec = *address_spec + instruction.i.imm_sext32();
   1581 
   1582   switch (instruction.op)
   1583   {
   1584     case InstructionOp::sb:
   1585     {
   1586       if (g_settings.gpu_pgxp_enable)
   1587         EmitFunctionCall(nullptr, PGXP::CPU_SB, Value::FromConstantU32(instruction.bits), address, value);
   1588 
   1589       EmitStoreGuestMemory(instruction, info, address, address_spec, RegSize_8, value);
   1590 
   1591       if (address_spec)
   1592       {
   1593         const VirtualMemoryAddress aligned_addr = (*address_spec & ~3u);
   1594         const SpeculativeValue aligned_existing_value = SpeculativeReadMemory(aligned_addr);
   1595         if (aligned_existing_value)
   1596         {
   1597           if (value_spec)
   1598           {
   1599             const u32 shift = (aligned_addr & 3u) * 8u;
   1600             SpeculativeWriteMemory(aligned_addr,
   1601                                    (*aligned_existing_value & ~(0xFFu << shift)) | ((*value_spec & 0xFFu) << shift));
   1602           }
   1603           else
   1604           {
   1605             SpeculativeWriteMemory(aligned_addr, std::nullopt);
   1606           }
   1607         }
   1608       }
   1609     }
   1610     break;
   1611 
   1612     case InstructionOp::sh:
   1613     {
   1614       if (g_settings.gpu_pgxp_enable)
   1615         EmitFunctionCall(nullptr, PGXP::CPU_SH, Value::FromConstantU32(instruction.bits), address, value);
   1616 
   1617       EmitStoreGuestMemory(instruction, info, address, address_spec, RegSize_16, value);
   1618 
   1619       if (address_spec)
   1620       {
   1621         const VirtualMemoryAddress aligned_addr = (*address_spec & ~3u);
   1622         const SpeculativeValue aligned_existing_value = SpeculativeReadMemory(aligned_addr);
   1623         if (aligned_existing_value)
   1624         {
   1625           if (value_spec)
   1626           {
   1627             const u32 shift = (aligned_addr & 1u) * 16u;
   1628             SpeculativeWriteMemory(aligned_addr, (*aligned_existing_value & ~(0xFFFFu << shift)) |
   1629                                                    ((*value_spec & 0xFFFFu) << shift));
   1630           }
   1631           else
   1632           {
   1633             SpeculativeWriteMemory(aligned_addr, std::nullopt);
   1634           }
   1635         }
   1636       }
   1637     }
   1638     break;
   1639 
   1640     case InstructionOp::sw:
   1641     {
   1642       if (g_settings.gpu_pgxp_enable)
   1643         EmitFunctionCall(nullptr, PGXP::CPU_SW, Value::FromConstantU32(instruction.bits), address, value);
   1644 
   1645       EmitStoreGuestMemory(instruction, info, address, address_spec, RegSize_32, value);
   1646 
   1647       if (address_spec)
   1648         SpeculativeWriteMemory(*address_spec, value_spec);
   1649     }
   1650     break;
   1651 
   1652     default:
   1653       UnreachableCode();
   1654       break;
   1655   }
   1656 
   1657   InstructionEpilogue(instruction, info);
   1658 
   1659   if (address_spec)
   1660   {
   1661     const CPU::Segment seg = GetSegmentForAddress(*address_spec);
   1662     if (seg == Segment::KUSEG || seg == Segment::KSEG0 || seg == Segment::KSEG1)
   1663     {
   1664       const PhysicalMemoryAddress phys_addr = VirtualAddressToPhysical(*address_spec);
   1665       const PhysicalMemoryAddress block_start = VirtualAddressToPhysical(m_block->pc);
   1666       const PhysicalMemoryAddress block_end =
   1667         VirtualAddressToPhysical(m_block->pc + (m_block->size * sizeof(Instruction)));
   1668       if (phys_addr >= block_start && phys_addr < block_end)
   1669       {
   1670         WARNING_LOG("Instruction {:08X} speculatively writes to {:08X} inside block {:08X}-{:08X}. Truncating block.",
   1671                     info.pc, phys_addr, block_start, block_end);
   1672         TruncateBlockAtCurrentInstruction();
   1673       }
   1674     }
   1675   }
   1676 
   1677   return true;
   1678 }
   1679 
   1680 bool CodeGenerator::Compile_LoadLeftRight(Instruction instruction, const CodeCache::InstructionInfo& info)
   1681 {
   1682   InstructionPrologue(instruction, info, 1);
   1683 
   1684   Value base = m_register_cache.ReadGuestRegister(instruction.i.rs);
   1685   Value offset = Value::FromConstantU32(instruction.i.imm_sext32());
   1686   Value address = AddValues(base, offset, false);
   1687   base.ReleaseAndClear();
   1688 
   1689   SpeculativeValue address_spec = SpeculativeReadReg(instruction.i.rs);
   1690   if (address_spec)
   1691     address_spec = *address_spec + instruction.i.imm_sext32();
   1692 
   1693   Value shift = ShlValues(AndValues(address, Value::FromConstantU32(3)), Value::FromConstantU32(3)); // * 8
   1694   address = AndValues(address, Value::FromConstantU32(~u32(3)));
   1695 
   1696   // hack to bypass load delays
   1697   Value value;
   1698   if (instruction.i.rt == m_register_cache.GetLoadDelayRegister())
   1699   {
   1700     const Value& ld_value = m_register_cache.GetLoadDelayValue();
   1701     if (ld_value.IsInHostRegister())
   1702       value.SetHostReg(&m_register_cache, ld_value.GetHostRegister(), ld_value.size);
   1703     else
   1704       value = ld_value;
   1705   }
   1706   else
   1707   {
   1708     // if this is the first instruction in the block, we need to stall until the load finishes
   1709     // we don't actually care if it's our target reg or not, if it's not, it won't affect anything
   1710     if (m_load_delay_dirty)
   1711     {
   1712       DEV_LOG("Flushing interpreter load delay for lwl/lwr instruction at 0x{:08X}", info.pc);
   1713       EmitFlushInterpreterLoadDelay();
   1714       m_register_cache.InvalidateGuestRegister(instruction.r.rt);
   1715       m_load_delay_dirty = false;
   1716     }
   1717 
   1718     value = m_register_cache.ReadGuestRegister(instruction.i.rt, true, true);
   1719   }
   1720 
   1721   Value mem;
   1722   if (instruction.op == InstructionOp::lwl)
   1723   {
   1724     Value lhs = ShrValues(Value::FromConstantU32(0x00FFFFFF), shift);
   1725     AndValueInPlace(lhs, value);
   1726     shift = SubValues(Value::FromConstantU32(24), shift, false);
   1727     value.ReleaseAndClear();
   1728 
   1729     mem = EmitLoadGuestMemory(instruction, info, address, address_spec, RegSize_32);
   1730     EmitShl(mem.GetHostRegister(), mem.GetHostRegister(), RegSize_32, shift);
   1731     EmitOr(mem.GetHostRegister(), mem.GetHostRegister(), lhs);
   1732   }
   1733   else
   1734   {
   1735     Value lhs = ShlValues(Value::FromConstantU32(0xFFFFFF00), SubValues(Value::FromConstantU32(24), shift, false));
   1736     AndValueInPlace(lhs, value);
   1737     value.ReleaseAndClear();
   1738 
   1739     mem = EmitLoadGuestMemory(instruction, info, address, address_spec, RegSize_32);
   1740     EmitShr(mem.GetHostRegister(), mem.GetHostRegister(), RegSize_32, shift);
   1741     EmitOr(mem.GetHostRegister(), mem.GetHostRegister(), lhs);
   1742   }
   1743 
   1744   shift.ReleaseAndClear();
   1745 
   1746   if (g_settings.gpu_pgxp_enable)
   1747     EmitFunctionCall(nullptr, PGXP::CPU_LW, Value::FromConstantU32(instruction.bits), address, mem);
   1748 
   1749   m_register_cache.WriteGuestRegisterDelayed(instruction.i.rt, std::move(mem));
   1750 
   1751   // TODO: Speculative values
   1752   SpeculativeWriteReg(instruction.r.rt, std::nullopt);
   1753 
   1754   InstructionEpilogue(instruction, info);
   1755   return true;
   1756 }
   1757 
   1758 bool CodeGenerator::Compile_StoreLeftRight(Instruction instruction, const CodeCache::InstructionInfo& info)
   1759 {
   1760   InstructionPrologue(instruction, info, 1);
   1761 
   1762   Value base = m_register_cache.ReadGuestRegister(instruction.i.rs);
   1763   Value offset = Value::FromConstantU32(instruction.i.imm_sext32());
   1764   Value address = AddValues(base, offset, false);
   1765   base.ReleaseAndClear();
   1766 
   1767   // TODO: Speculative values
   1768   SpeculativeValue address_spec = SpeculativeReadReg(instruction.i.rs);
   1769   if (address_spec)
   1770   {
   1771     address_spec = *address_spec + instruction.i.imm_sext32();
   1772     SpeculativeWriteMemory(*address_spec & ~3u, std::nullopt);
   1773   }
   1774 
   1775   Value shift = ShlValues(AndValues(address, Value::FromConstantU32(3)), Value::FromConstantU32(3)); // * 8
   1776   address = AndValues(address, Value::FromConstantU32(~u32(3)));
   1777 
   1778   Value mem;
   1779   if (instruction.op == InstructionOp::swl)
   1780   {
   1781     Value mask = ShlValues(Value::FromConstantU32(0xFFFFFF00), shift);
   1782     mem = EmitLoadGuestMemory(instruction, info, address, address_spec, RegSize_32);
   1783     EmitAnd(mem.GetHostRegister(), mem.GetHostRegister(), mask);
   1784     mask.ReleaseAndClear();
   1785 
   1786     Value reg = m_register_cache.ReadGuestRegister(instruction.r.rt);
   1787     Value lhs = ShrValues(reg, SubValues(Value::FromConstantU32(24), shift, false));
   1788     reg.ReleaseAndClear();
   1789 
   1790     EmitOr(mem.GetHostRegister(), mem.GetHostRegister(), lhs);
   1791   }
   1792   else
   1793   {
   1794     Value mask = ShrValues(Value::FromConstantU32(0x00FFFFFF), SubValues(Value::FromConstantU32(24), shift, false));
   1795     mem = EmitLoadGuestMemory(instruction, info, address, address_spec, RegSize_32);
   1796     AndValueInPlace(mem, mask);
   1797     mask.ReleaseAndClear();
   1798 
   1799     Value reg = m_register_cache.ReadGuestRegister(instruction.r.rt);
   1800     Value lhs = ShlValues(reg, shift);
   1801     reg.ReleaseAndClear();
   1802 
   1803     EmitOr(mem.GetHostRegister(), mem.GetHostRegister(), lhs);
   1804   }
   1805 
   1806   shift.ReleaseAndClear();
   1807 
   1808   EmitStoreGuestMemory(instruction, info, address, address_spec, RegSize_32, mem);
   1809   if (g_settings.gpu_pgxp_enable)
   1810     EmitFunctionCall(nullptr, PGXP::CPU_SW, Value::FromConstantU32(instruction.bits), address, mem);
   1811 
   1812   InstructionEpilogue(instruction, info);
   1813   return true;
   1814 }
   1815 
   1816 bool CodeGenerator::Compile_MoveHiLo(Instruction instruction, const CodeCache::InstructionInfo& info)
   1817 {
   1818   InstructionPrologue(instruction, info, 1);
   1819 
   1820   switch (instruction.r.funct)
   1821   {
   1822     case InstructionFunct::mfhi:
   1823     {
   1824       Value hi = m_register_cache.ReadGuestRegister(Reg::hi);
   1825       if (g_settings.UsingPGXPCPUMode())
   1826       {
   1827         EmitFunctionCall(nullptr, &PGXP::CPU_MOVE_Packed,
   1828                          Value::FromConstantU32(PGXP::PackMoveArgs(instruction.r.rd, Reg::hi)), hi);
   1829       }
   1830 
   1831       m_register_cache.WriteGuestRegister(instruction.r.rd, std::move(hi));
   1832       SpeculativeWriteReg(instruction.r.rd, std::nullopt);
   1833     }
   1834     break;
   1835 
   1836     case InstructionFunct::mthi:
   1837     {
   1838       Value rs = m_register_cache.ReadGuestRegister(instruction.r.rs);
   1839       if (g_settings.UsingPGXPCPUMode())
   1840       {
   1841         EmitFunctionCall(nullptr, &PGXP::CPU_MOVE_Packed,
   1842                          Value::FromConstantU32(PGXP::PackMoveArgs(Reg::hi, instruction.r.rs)), rs);
   1843       }
   1844 
   1845       m_register_cache.WriteGuestRegister(Reg::hi, std::move(rs));
   1846     }
   1847     break;
   1848 
   1849     case InstructionFunct::mflo:
   1850     {
   1851       Value lo = m_register_cache.ReadGuestRegister(Reg::lo);
   1852       if (g_settings.UsingPGXPCPUMode())
   1853       {
   1854         EmitFunctionCall(nullptr, &PGXP::CPU_MOVE_Packed,
   1855                          Value::FromConstantU32(PGXP::PackMoveArgs(instruction.r.rd, Reg::lo)), lo);
   1856       }
   1857 
   1858       m_register_cache.WriteGuestRegister(instruction.r.rd, std::move(lo));
   1859       SpeculativeWriteReg(instruction.r.rd, std::nullopt);
   1860     }
   1861     break;
   1862 
   1863     case InstructionFunct::mtlo:
   1864     {
   1865       Value rs = m_register_cache.ReadGuestRegister(instruction.r.rs);
   1866       if (g_settings.UsingPGXPCPUMode())
   1867       {
   1868         EmitFunctionCall(nullptr, &PGXP::CPU_MOVE_Packed,
   1869                          Value::FromConstantU32(PGXP::PackMoveArgs(Reg::lo, instruction.r.rs)), rs);
   1870       }
   1871 
   1872       m_register_cache.WriteGuestRegister(Reg::lo, std::move(rs));
   1873     }
   1874     break;
   1875 
   1876     default:
   1877       UnreachableCode();
   1878       break;
   1879   }
   1880 
   1881   InstructionEpilogue(instruction, info);
   1882   return true;
   1883 }
   1884 
   1885 bool CodeGenerator::Compile_Add(Instruction instruction, const CodeCache::InstructionInfo& info)
   1886 {
   1887   InstructionPrologue(instruction, info, 1);
   1888 
   1889   const bool check_overflow = (instruction.op == InstructionOp::addi || (instruction.op == InstructionOp::funct &&
   1890                                                                          instruction.r.funct == InstructionFunct::add));
   1891 
   1892   Value lhs, rhs;
   1893   SpeculativeValue lhs_spec, rhs_spec;
   1894   Reg dest;
   1895 
   1896   switch (instruction.op)
   1897   {
   1898     case InstructionOp::addi:
   1899     case InstructionOp::addiu:
   1900     {
   1901       // rt <- rs + sext(imm)
   1902       dest = instruction.i.rt;
   1903       lhs = m_register_cache.ReadGuestRegister(instruction.i.rs);
   1904       rhs = Value::FromConstantU32(instruction.i.imm_sext32());
   1905 
   1906       lhs_spec = SpeculativeReadReg(instruction.i.rs);
   1907       rhs_spec = instruction.i.imm_sext32();
   1908     }
   1909     break;
   1910 
   1911     case InstructionOp::funct:
   1912     {
   1913       Assert(instruction.r.funct == InstructionFunct::add || instruction.r.funct == InstructionFunct::addu);
   1914       dest = instruction.r.rd;
   1915       lhs = m_register_cache.ReadGuestRegister(instruction.r.rs);
   1916       rhs = m_register_cache.ReadGuestRegister(instruction.r.rt);
   1917       lhs_spec = SpeculativeReadReg(instruction.r.rs);
   1918       rhs_spec = SpeculativeReadReg(instruction.r.rt);
   1919     }
   1920     break;
   1921 
   1922     default:
   1923       UnreachableCode();
   1924       return false;
   1925   }
   1926 
   1927   // detect register moves and handle them for pgxp
   1928   if (dest != Reg::zero && g_settings.gpu_pgxp_enable)
   1929   {
   1930     bool handled = false;
   1931     if (instruction.op != InstructionOp::funct)
   1932     {
   1933       if (g_settings.gpu_pgxp_enable && !g_settings.gpu_pgxp_cpu && instruction.i.rs != Reg::zero &&
   1934           dest != instruction.i.rs && rhs.HasConstantValue(0))
   1935       {
   1936         handled = true;
   1937         EmitFunctionCall(nullptr, &PGXP::CPU_MOVE_Packed,
   1938                          Value::FromConstantU32(PGXP::PackMoveArgs(dest, instruction.i.rs)), lhs);
   1939       }
   1940     }
   1941     else
   1942     {
   1943       if (g_settings.gpu_pgxp_enable && !g_settings.gpu_pgxp_cpu &&
   1944           ((lhs.HasConstantValue(0) && instruction.r.rt != Reg::zero && dest != instruction.r.rs) ||
   1945            (rhs.HasConstantValue(0) && instruction.r.rs != Reg::zero && dest != instruction.r.rt)))
   1946       {
   1947         handled = true;
   1948         EmitFunctionCall(nullptr, &PGXP::CPU_MOVE_Packed,
   1949                          Value::FromConstantU32(PGXP::PackMoveArgs(dest, instruction.i.rs)), lhs);
   1950       }
   1951     }
   1952 
   1953     if (g_settings.gpu_pgxp_cpu && !handled)
   1954     {
   1955       if (instruction.op != InstructionOp::funct)
   1956         EmitFunctionCall(nullptr, &PGXP::CPU_ADDI, Value::FromConstantU32(instruction.bits), lhs);
   1957       else
   1958         EmitFunctionCall(nullptr, &PGXP::CPU_ADD, Value::FromConstantU32(instruction.bits), lhs, rhs);
   1959     }
   1960   }
   1961 
   1962   Value result = AddValues(lhs, rhs, check_overflow);
   1963   if (check_overflow)
   1964     GenerateExceptionExit(instruction, info, Exception::Ov, Condition::Overflow);
   1965 
   1966   m_register_cache.WriteGuestRegister(dest, std::move(result));
   1967 
   1968   SpeculativeValue value_spec;
   1969   if (lhs_spec && rhs_spec)
   1970     value_spec = *lhs_spec + *rhs_spec;
   1971   SpeculativeWriteReg(dest, value_spec);
   1972 
   1973   InstructionEpilogue(instruction, info);
   1974   return true;
   1975 }
   1976 
   1977 bool CodeGenerator::Compile_Subtract(Instruction instruction, const CodeCache::InstructionInfo& info)
   1978 {
   1979   InstructionPrologue(instruction, info, 1);
   1980 
   1981   Assert(instruction.op == InstructionOp::funct);
   1982   const bool check_overflow = (instruction.r.funct == InstructionFunct::sub);
   1983 
   1984   Value lhs = m_register_cache.ReadGuestRegister(instruction.r.rs);
   1985   Value rhs = m_register_cache.ReadGuestRegister(instruction.r.rt);
   1986 
   1987   SpeculativeValue lhs_spec = SpeculativeReadReg(instruction.r.rs);
   1988   SpeculativeValue rhs_spec = SpeculativeReadReg(instruction.r.rt);
   1989 
   1990   if (g_settings.UsingPGXPCPUMode())
   1991     EmitFunctionCall(nullptr, &PGXP::CPU_SUB, Value::FromConstantU32(instruction.bits), lhs, rhs);
   1992 
   1993   Value result = SubValues(lhs, rhs, check_overflow);
   1994   if (check_overflow)
   1995     GenerateExceptionExit(instruction, info, Exception::Ov, Condition::Overflow);
   1996 
   1997   m_register_cache.WriteGuestRegister(instruction.r.rd, std::move(result));
   1998 
   1999   SpeculativeValue value_spec;
   2000   if (lhs_spec && rhs_spec)
   2001     value_spec = *lhs_spec - *rhs_spec;
   2002   SpeculativeWriteReg(instruction.r.rd, value_spec);
   2003 
   2004   InstructionEpilogue(instruction, info);
   2005   return true;
   2006 }
   2007 
   2008 bool CodeGenerator::Compile_Multiply(Instruction instruction, const CodeCache::InstructionInfo& info)
   2009 {
   2010   InstructionPrologue(instruction, info, 1);
   2011 
   2012   const bool signed_multiply = (instruction.r.funct == InstructionFunct::mult);
   2013   Value rs = m_register_cache.ReadGuestRegister(instruction.r.rs);
   2014   Value rt = m_register_cache.ReadGuestRegister(instruction.r.rt);
   2015   if (g_settings.UsingPGXPCPUMode())
   2016   {
   2017     EmitFunctionCall(nullptr, signed_multiply ? &PGXP::CPU_MULT : &PGXP::CPU_MULTU,
   2018                      Value::FromConstantU32(instruction.bits), rs, rt);
   2019   }
   2020 
   2021   std::pair<Value, Value> result = MulValues(rs, rt, signed_multiply);
   2022   rs.ReleaseAndClear();
   2023   rt.ReleaseAndClear();
   2024   m_register_cache.WriteGuestRegister(Reg::hi, std::move(result.first));
   2025   m_register_cache.WriteGuestRegister(Reg::lo, std::move(result.second));
   2026 
   2027   InstructionEpilogue(instruction, info);
   2028   return true;
   2029 }
   2030 
   2031 static std::tuple<u32, u32> MIPSDivide(u32 num, u32 denom)
   2032 {
   2033   u32 lo, hi;
   2034 
   2035   if (denom == 0)
   2036   {
   2037     // divide by zero
   2038     lo = UINT32_C(0xFFFFFFFF);
   2039     hi = static_cast<u32>(num);
   2040   }
   2041   else
   2042   {
   2043     lo = num / denom;
   2044     hi = num % denom;
   2045   }
   2046 
   2047   return std::tie(lo, hi);
   2048 }
   2049 
   2050 static std::tuple<s32, s32> MIPSDivide(s32 num, s32 denom)
   2051 {
   2052   s32 lo, hi;
   2053   if (denom == 0)
   2054   {
   2055     // divide by zero
   2056     lo = (num >= 0) ? UINT32_C(0xFFFFFFFF) : UINT32_C(1);
   2057     hi = static_cast<u32>(num);
   2058   }
   2059   else if (static_cast<u32>(num) == UINT32_C(0x80000000) && denom == -1)
   2060   {
   2061     // unrepresentable
   2062     lo = UINT32_C(0x80000000);
   2063     hi = 0;
   2064   }
   2065   else
   2066   {
   2067     lo = num / denom;
   2068     hi = num % denom;
   2069   }
   2070 
   2071   return std::tie(lo, hi);
   2072 }
   2073 
   2074 bool CodeGenerator::Compile_Divide(Instruction instruction, const CodeCache::InstructionInfo& info)
   2075 {
   2076   InstructionPrologue(instruction, info, 1);
   2077 
   2078   Value num = m_register_cache.ReadGuestRegister(instruction.r.rs);
   2079   Value denom = m_register_cache.ReadGuestRegister(instruction.r.rt);
   2080 
   2081   if (g_settings.UsingPGXPCPUMode())
   2082     EmitFunctionCall(nullptr, &PGXP::CPU_DIV, Value::FromConstantU32(instruction.bits), num, denom);
   2083 
   2084   if (num.IsConstant() && denom.IsConstant())
   2085   {
   2086     const auto [lo, hi] = MIPSDivide(static_cast<u32>(num.constant_value), static_cast<u32>(denom.constant_value));
   2087     m_register_cache.WriteGuestRegister(Reg::lo, Value::FromConstantU32(lo));
   2088     m_register_cache.WriteGuestRegister(Reg::hi, Value::FromConstantU32(hi));
   2089   }
   2090   else
   2091   {
   2092     Value num_reg = GetValueInHostRegister(num, false);
   2093     Value denom_reg = GetValueInHostRegister(denom, false);
   2094 
   2095     m_register_cache.InvalidateGuestRegister(Reg::lo);
   2096     m_register_cache.InvalidateGuestRegister(Reg::hi);
   2097 
   2098     Value lo = m_register_cache.AllocateScratch(RegSize_32);
   2099     Value hi = m_register_cache.AllocateScratch(RegSize_32);
   2100     m_register_cache.InhibitAllocation();
   2101 
   2102     LabelType do_divide, done;
   2103 
   2104     if (!denom.IsConstant() || denom.HasConstantValue(0))
   2105     {
   2106       // if (denom == 0)
   2107       EmitConditionalBranch(Condition::NotEqual, false, denom_reg.GetHostRegister(), Value::FromConstantU32(0),
   2108                             &do_divide);
   2109       {
   2110         // unrepresentable
   2111         EmitCopyValue(lo.GetHostRegister(), Value::FromConstantU32(0xFFFFFFFF));
   2112         EmitCopyValue(hi.GetHostRegister(), num_reg);
   2113         EmitBranch(&done);
   2114       }
   2115     }
   2116 
   2117     // else
   2118     {
   2119       EmitBindLabel(&do_divide);
   2120       EmitDiv(lo.GetHostRegister(), hi.GetHostRegister(), num_reg.GetHostRegister(), denom_reg.GetHostRegister(),
   2121               RegSize_32, false);
   2122     }
   2123 
   2124     EmitBindLabel(&done);
   2125 
   2126     m_register_cache.UninhibitAllocation();
   2127     m_register_cache.WriteGuestRegister(Reg::lo, std::move(lo));
   2128     m_register_cache.WriteGuestRegister(Reg::hi, std::move(hi));
   2129   }
   2130 
   2131   InstructionEpilogue(instruction, info);
   2132   return true;
   2133 }
   2134 
   2135 bool CodeGenerator::Compile_SignedDivide(Instruction instruction, const CodeCache::InstructionInfo& info)
   2136 {
   2137   InstructionPrologue(instruction, info, 1);
   2138 
   2139   Value num = m_register_cache.ReadGuestRegister(instruction.r.rs);
   2140   Value denom = m_register_cache.ReadGuestRegister(instruction.r.rt);
   2141 
   2142   if (g_settings.UsingPGXPCPUMode())
   2143     EmitFunctionCall(nullptr, &PGXP::CPU_DIV, Value::FromConstantU32(instruction.bits), num, denom);
   2144 
   2145   if (num.IsConstant() && denom.IsConstant())
   2146   {
   2147     const auto [lo, hi] = MIPSDivide(num.GetS32ConstantValue(), denom.GetS32ConstantValue());
   2148     m_register_cache.WriteGuestRegister(Reg::lo, Value::FromConstantU32(static_cast<u32>(lo)));
   2149     m_register_cache.WriteGuestRegister(Reg::hi, Value::FromConstantU32(static_cast<u32>(hi)));
   2150   }
   2151   else
   2152   {
   2153     Value num_reg = GetValueInHostRegister(num, false);
   2154     Value denom_reg = GetValueInHostRegister(denom, false);
   2155 
   2156     m_register_cache.InvalidateGuestRegister(Reg::lo);
   2157     m_register_cache.InvalidateGuestRegister(Reg::hi);
   2158 
   2159     Value lo = m_register_cache.AllocateScratch(RegSize_32);
   2160     Value hi = m_register_cache.AllocateScratch(RegSize_32);
   2161     m_register_cache.InhibitAllocation();
   2162 
   2163     // we need this in a register on ARM because it won't fit in an immediate
   2164     EmitCopyValue(lo.GetHostRegister(), Value::FromConstantU32(0x80000000u));
   2165 
   2166     LabelType do_divide, done;
   2167 
   2168     LabelType not_zero;
   2169     if (!denom.IsConstant() || denom.HasConstantValue(0))
   2170     {
   2171       // if (denom == 0)
   2172       EmitConditionalBranch(Condition::NotEqual, false, denom_reg.GetHostRegister(), Value::FromConstantU32(0),
   2173                             &not_zero);
   2174       {
   2175         // hi = static_cast<u32>(num);
   2176         EmitCopyValue(hi.GetHostRegister(), num_reg);
   2177 
   2178         // lo = (num >= 0) ? UINT32_C(0xFFFFFFFF) : UINT32_C(1);
   2179         LabelType greater_equal_zero;
   2180         EmitConditionalBranch(Condition::GreaterEqual, false, num_reg.GetHostRegister(), Value::FromConstantU32(0),
   2181                               &greater_equal_zero);
   2182         EmitCopyValue(lo.GetHostRegister(), Value::FromConstantU32(1));
   2183         EmitBranch(&done);
   2184         EmitBindLabel(&greater_equal_zero);
   2185         EmitCopyValue(lo.GetHostRegister(), Value::FromConstantU32(0xFFFFFFFFu));
   2186         EmitBranch(&done);
   2187       }
   2188     }
   2189 
   2190     // else if (static_cast<u32>(num) == UINT32_C(0x80000000) && denom == -1)
   2191     {
   2192       EmitBindLabel(&not_zero);
   2193       EmitConditionalBranch(Condition::NotEqual, false, denom_reg.GetHostRegister(), Value::FromConstantS32(-1),
   2194                             &do_divide);
   2195       EmitConditionalBranch(Condition::NotEqual, false, num_reg.GetHostRegister(), lo, &do_divide);
   2196 
   2197       // unrepresentable
   2198       // EmitCopyValue(lo.GetHostRegister(), Value::FromConstantU32(0x80000000u)); // done above
   2199       EmitCopyValue(hi.GetHostRegister(), Value::FromConstantU32(0));
   2200       EmitBranch(&done);
   2201     }
   2202 
   2203     // else
   2204     {
   2205       EmitBindLabel(&do_divide);
   2206       EmitDiv(lo.GetHostRegister(), hi.GetHostRegister(), num_reg.GetHostRegister(), denom_reg.GetHostRegister(),
   2207               RegSize_32, true);
   2208     }
   2209 
   2210     EmitBindLabel(&done);
   2211 
   2212     m_register_cache.UninhibitAllocation();
   2213     m_register_cache.WriteGuestRegister(Reg::lo, std::move(lo));
   2214     m_register_cache.WriteGuestRegister(Reg::hi, std::move(hi));
   2215   }
   2216 
   2217   InstructionEpilogue(instruction, info);
   2218   return true;
   2219 }
   2220 
   2221 bool CodeGenerator::Compile_SetLess(Instruction instruction, const CodeCache::InstructionInfo& info)
   2222 {
   2223   InstructionPrologue(instruction, info, 1);
   2224 
   2225   const bool signed_comparison =
   2226     (instruction.op == InstructionOp::slti ||
   2227      (instruction.op == InstructionOp::funct && instruction.r.funct == InstructionFunct::slt));
   2228 
   2229   Reg dest;
   2230   Value lhs, rhs;
   2231   SpeculativeValue lhs_spec, rhs_spec;
   2232   if (instruction.op == InstructionOp::slti || instruction.op == InstructionOp::sltiu)
   2233   {
   2234     // rt <- rs < {z,s}ext(imm)
   2235     dest = instruction.i.rt;
   2236     lhs = m_register_cache.ReadGuestRegister(instruction.i.rs, true, true);
   2237     rhs = Value::FromConstantU32(instruction.i.imm_sext32());
   2238     lhs_spec = SpeculativeReadReg(instruction.i.rs);
   2239     rhs_spec = instruction.i.imm_sext32();
   2240 
   2241     // flush the old value which might free up a register
   2242     if (dest != instruction.r.rs)
   2243       m_register_cache.InvalidateGuestRegister(dest);
   2244   }
   2245   else
   2246   {
   2247     // rd <- rs < rt
   2248     dest = instruction.r.rd;
   2249     lhs = m_register_cache.ReadGuestRegister(instruction.r.rs, true, true);
   2250     rhs = m_register_cache.ReadGuestRegister(instruction.r.rt);
   2251     lhs_spec = SpeculativeReadReg(instruction.r.rs);
   2252     rhs_spec = SpeculativeReadReg(instruction.r.rt);
   2253 
   2254     // flush the old value which might free up a register
   2255     if (dest != instruction.i.rs && dest != instruction.r.rt)
   2256       m_register_cache.InvalidateGuestRegister(dest);
   2257   }
   2258 
   2259   if (g_settings.UsingPGXPCPUMode())
   2260   {
   2261     if (instruction.op == InstructionOp::slti)
   2262       EmitFunctionCall(nullptr, &PGXP::CPU_SLTI, Value::FromConstantU32(instruction.bits), lhs);
   2263     else if (instruction.op == InstructionOp::sltiu)
   2264       EmitFunctionCall(nullptr, &PGXP::CPU_SLTIU, Value::FromConstantU32(instruction.bits), lhs);
   2265     else if (instruction.r.funct == InstructionFunct::slt)
   2266       EmitFunctionCall(nullptr, &PGXP::CPU_SLT, Value::FromConstantU32(instruction.bits), lhs, rhs);
   2267     else // if (instruction.r.funct == InstructionFunct::sltu)
   2268       EmitFunctionCall(nullptr, &PGXP::CPU_SLTU, Value::FromConstantU32(instruction.bits), lhs, rhs);
   2269   }
   2270 
   2271   Value result = m_register_cache.AllocateScratch(RegSize_32);
   2272   EmitCmp(lhs.host_reg, rhs);
   2273   EmitSetConditionResult(result.host_reg, result.size, signed_comparison ? Condition::Less : Condition::Below);
   2274 
   2275   m_register_cache.WriteGuestRegister(dest, std::move(result));
   2276 
   2277   SpeculativeValue value_spec;
   2278   if (lhs_spec && rhs_spec)
   2279   {
   2280     value_spec = BoolToUInt32(signed_comparison ? (static_cast<s32>(*lhs_spec) < static_cast<s32>(*rhs_spec)) :
   2281                                                   (*lhs_spec < *rhs_spec));
   2282   }
   2283   SpeculativeWriteReg(instruction.r.rd, value_spec);
   2284 
   2285   InstructionEpilogue(instruction, info);
   2286   return true;
   2287 }
   2288 
   2289 bool CodeGenerator::Compile_Branch(Instruction instruction, const CodeCache::InstructionInfo& info)
   2290 {
   2291   InstructionPrologue(instruction, info, 1);
   2292 
   2293   auto DoBranch = [this, &instruction, &info](Condition condition, const Value& lhs, const Value& rhs, Reg lr_reg,
   2294                                               Value&& branch_target) {
   2295     const bool can_link_block = info.is_direct_branch_instruction && g_settings.cpu_recompiler_block_linking;
   2296 
   2297     // ensure the lr register is flushed, since we want it's correct value after the branch
   2298     // we don't want to invalidate it yet because of "jalr r0, r0", branch_target could be the lr_reg.
   2299     if (lr_reg != Reg::count && lr_reg != Reg::zero)
   2300       m_register_cache.FlushGuestRegister(lr_reg, false, true);
   2301 
   2302     // compute return address, which is also set as the new pc when the branch isn't taken
   2303     Value constant_next_pc = CalculatePC(4);
   2304     Value next_pc = constant_next_pc;
   2305     DebugAssert(constant_next_pc.IsConstant());
   2306     if (condition != Condition::Always)
   2307     {
   2308       next_pc = m_register_cache.AllocateScratch(RegSize_32);
   2309       EmitCopyValue(next_pc.GetHostRegister(), constant_next_pc);
   2310     }
   2311 
   2312     Value take_branch;
   2313     LabelType branch_taken, branch_not_taken;
   2314     if (condition != Condition::Always)
   2315     {
   2316       if (!can_link_block)
   2317       {
   2318         // condition is inverted because we want the case for skipping it
   2319         if (lhs.IsValid() && rhs.IsValid())
   2320           EmitConditionalBranch(condition, true, lhs.host_reg, rhs, &branch_not_taken);
   2321         else if (lhs.IsValid())
   2322           EmitConditionalBranch(condition, true, lhs.host_reg, lhs.size, &branch_not_taken);
   2323         else
   2324           EmitConditionalBranch(condition, true, &branch_not_taken);
   2325       }
   2326       else
   2327       {
   2328         take_branch = m_register_cache.AllocateScratch(RegSize_32);
   2329         switch (condition)
   2330         {
   2331           case Condition::NotEqual:
   2332           case Condition::Equal:
   2333           case Condition::Overflow:
   2334           case Condition::Greater:
   2335           case Condition::GreaterEqual:
   2336           case Condition::LessEqual:
   2337           case Condition::Less:
   2338           case Condition::Above:
   2339           case Condition::AboveEqual:
   2340           case Condition::Below:
   2341           case Condition::BelowEqual:
   2342           {
   2343             EmitCmp(lhs.GetHostRegister(), rhs);
   2344             EmitSetConditionResult(take_branch.GetHostRegister(), take_branch.size, condition);
   2345           }
   2346           break;
   2347 
   2348           case Condition::Negative:
   2349           case Condition::PositiveOrZero:
   2350           case Condition::NotZero:
   2351           case Condition::Zero:
   2352           {
   2353             Assert(!rhs.IsValid() || (rhs.IsConstant() && rhs.GetS64ConstantValue() == 0));
   2354             EmitTest(lhs.GetHostRegister(), lhs);
   2355             EmitSetConditionResult(take_branch.GetHostRegister(), take_branch.size, condition);
   2356           }
   2357           break;
   2358 
   2359           default:
   2360             UnreachableCode();
   2361             break;
   2362         }
   2363       }
   2364     }
   2365 
   2366     // save the old PC if we want to
   2367     if (lr_reg != Reg::count && lr_reg != Reg::zero)
   2368     {
   2369       // Can't cache because we have two branches. Load delay cancel is due to the immediate flush afterwards,
   2370       // if we don't cancel it, at the end of the instruction the value we write can be overridden.
   2371       EmitCancelInterpreterLoadDelayForReg(lr_reg);
   2372       EmitStoreGuestRegister(lr_reg, next_pc);
   2373 
   2374       // now invalidate lr because it was possibly written in the branch
   2375       m_register_cache.InvalidateGuestRegister(lr_reg);
   2376       if (m_register_cache.GetLoadDelayRegister() == lr_reg)
   2377         m_register_cache.CancelLoadDelay();
   2378     }
   2379 
   2380     // we don't need to test the address of constant branches unless they're definitely misaligned, which would be
   2381     // strange.
   2382     if (g_settings.cpu_recompiler_memory_exceptions &&
   2383         (!branch_target.IsConstant() || (branch_target.constant_value & 0x3) != 0))
   2384     {
   2385       LabelType branch_okay;
   2386 
   2387       if (branch_target.IsConstant())
   2388       {
   2389         WARNING_LOG("Misaligned constant target branch 0x{:08X}, this is strange",
   2390                     Truncate32(branch_target.constant_value));
   2391       }
   2392       else
   2393       {
   2394         // check the alignment of the target
   2395         EmitTest(branch_target.host_reg, Value::FromConstantU32(0x3));
   2396         EmitConditionalBranch(Condition::Zero, false, &branch_okay);
   2397       }
   2398 
   2399       // exception exit for misaligned target
   2400       m_register_cache.PushState();
   2401       EmitBranch(GetCurrentFarCodePointer());
   2402       EmitBindLabel(&branch_okay);
   2403 
   2404       SwitchToFarCode();
   2405       EmitStoreCPUStructField(OFFSETOF(State, cop0_regs.BadVaddr), branch_target);
   2406       EmitFunctionCall(
   2407         nullptr, static_cast<void (*)(u32, u32)>(&CPU::RaiseException),
   2408         Value::FromConstantU32(Cop0Registers::CAUSE::MakeValueForException(Exception::AdEL, false, false, 0)),
   2409         branch_target);
   2410       EmitExceptionExit();
   2411       SwitchToNearCode();
   2412 
   2413       m_register_cache.PopState();
   2414     }
   2415 
   2416     if (can_link_block)
   2417     {
   2418       // if it's an in-block branch, compile the delay slot now
   2419       // TODO: Make this more optimal by moving the condition down if it's a nop
   2420       Assert((m_current_instruction.instruction + 1) != m_block_end.instruction);
   2421       InstructionEpilogue(instruction, info);
   2422       m_current_instruction.instruction++;
   2423       m_current_instruction.info++;
   2424       if (!CompileInstruction(*m_current_instruction.instruction, *m_current_instruction.info))
   2425         return false;
   2426 
   2427       // flush all regs since we're at the end of the block now
   2428       BlockEpilogue();
   2429       m_block_linked = true;
   2430 
   2431       // check downcount
   2432       Value pending_ticks = m_register_cache.AllocateScratch(RegSize_32);
   2433       Value downcount = m_register_cache.AllocateScratch(RegSize_32);
   2434       EmitLoadCPUStructField(pending_ticks.GetHostRegister(), RegSize_32, OFFSETOF(State, pending_ticks));
   2435       EmitLoadCPUStructField(downcount.GetHostRegister(), RegSize_32, OFFSETOF(State, downcount));
   2436 
   2437       // pending < downcount
   2438       LabelType return_to_dispatcher;
   2439 
   2440       if (condition != Condition::Always)
   2441       {
   2442         EmitBranchIfBitClear(take_branch.GetHostRegister(), take_branch.size, 0, &branch_not_taken);
   2443         m_register_cache.PushState();
   2444         {
   2445           WriteNewPC(branch_target, false);
   2446           EmitConditionalBranch(Condition::GreaterEqual, false, pending_ticks.GetHostRegister(), downcount,
   2447                                 &return_to_dispatcher);
   2448 
   2449           // we're committed at this point :D
   2450           EmitEndBlock(true, nullptr);
   2451 
   2452           DebugAssert(branch_target.IsConstant());
   2453           if (static_cast<u32>(branch_target.constant_value) == m_block->pc)
   2454           {
   2455             // self-link
   2456             EmitBranch(GetStartNearCodePointer());
   2457           }
   2458           else
   2459           {
   2460             const void* host_target = CPU::CodeCache::CreateBlockLink(m_block, GetCurrentCodePointer(),
   2461                                                                       static_cast<u32>(branch_target.constant_value));
   2462             EmitBranch(host_target);
   2463           }
   2464         }
   2465         m_register_cache.PopState();
   2466 
   2467         SwitchToNearCode();
   2468         EmitBindLabel(&branch_not_taken);
   2469       }
   2470 
   2471       m_register_cache.PushState();
   2472 
   2473       if (condition != Condition::Always)
   2474       {
   2475         WriteNewPC(next_pc, true);
   2476       }
   2477       else
   2478       {
   2479         WriteNewPC(branch_target, true);
   2480       }
   2481 
   2482       EmitConditionalBranch(Condition::GreaterEqual, false, pending_ticks.GetHostRegister(), downcount,
   2483                             &return_to_dispatcher);
   2484 
   2485       EmitEndBlock(true, nullptr);
   2486 
   2487       const Value& jump_target = (condition != Condition::Always) ? constant_next_pc : branch_target;
   2488       DebugAssert(jump_target.IsConstant());
   2489       if (static_cast<u32>(jump_target.constant_value) == m_block->pc)
   2490       {
   2491         // self-link
   2492         EmitBranch(GetStartNearCodePointer());
   2493       }
   2494       else
   2495       {
   2496         const void* host_target = CPU::CodeCache::CreateBlockLink(m_block, GetCurrentCodePointer(),
   2497                                                                   static_cast<u32>(jump_target.constant_value));
   2498         EmitBranch(host_target);
   2499       }
   2500 
   2501       m_register_cache.PopState();
   2502 
   2503       EmitBindLabel(&return_to_dispatcher);
   2504       EmitEndBlock(true, CodeCache::g_run_events_and_dispatch);
   2505     }
   2506     else
   2507     {
   2508       if (condition != Condition::Always)
   2509       {
   2510         // branch taken path - modify the next pc
   2511         EmitBindLabel(&branch_taken);
   2512         EmitCopyValue(next_pc.GetHostRegister(), branch_target);
   2513 
   2514         // converge point
   2515         EmitBindLabel(&branch_not_taken);
   2516         WriteNewPC(next_pc, true);
   2517       }
   2518       else
   2519       {
   2520         // next_pc is not used for unconditional branches
   2521         WriteNewPC(branch_target, true);
   2522       }
   2523 
   2524       InstructionEpilogue(instruction, info);
   2525     }
   2526 
   2527     return true;
   2528   };
   2529 
   2530   // Compute the branch target.
   2531   // This depends on the form of the instruction.
   2532   switch (instruction.op)
   2533   {
   2534     case InstructionOp::j:
   2535     case InstructionOp::jal:
   2536     {
   2537       // npc = (pc & 0xF0000000) | (target << 2)
   2538       Value branch_target = OrValues(AndValues(CalculatePC(), Value::FromConstantU32(0xF0000000)),
   2539                                      Value::FromConstantU32(instruction.j.target << 2));
   2540 
   2541       return DoBranch(Condition::Always, Value(), Value(),
   2542                       (instruction.op == InstructionOp::jal) ? Reg::ra : Reg::count, std::move(branch_target));
   2543     }
   2544 
   2545     case InstructionOp::funct:
   2546     {
   2547       if (instruction.r.funct == InstructionFunct::jr || instruction.r.funct == InstructionFunct::jalr)
   2548       {
   2549         // npc = rs, link to rt
   2550         Value branch_target = m_register_cache.ReadGuestRegister(instruction.r.rs);
   2551         return DoBranch(Condition::Always, Value(), Value(),
   2552                         (instruction.r.funct == InstructionFunct::jalr) ? instruction.r.rd : Reg::count,
   2553                         std::move(branch_target));
   2554       }
   2555       else if (instruction.r.funct == InstructionFunct::syscall || instruction.r.funct == InstructionFunct::break_)
   2556       {
   2557         const Exception excode =
   2558           (instruction.r.funct == InstructionFunct::syscall) ? Exception::Syscall : Exception::BP;
   2559         GenerateExceptionExit(instruction, info, excode);
   2560         InstructionEpilogue(instruction, info);
   2561         return true;
   2562       }
   2563       else
   2564       {
   2565         UnreachableCode();
   2566       }
   2567     }
   2568 
   2569     case InstructionOp::beq:
   2570     case InstructionOp::bne:
   2571     {
   2572       // npc = pc + (sext(imm) << 2)
   2573       Value branch_target = CalculatePC(instruction.i.imm_sext32() << 2);
   2574 
   2575       // beq zero, zero, addr -> unconditional branch
   2576       if (instruction.op == InstructionOp::beq && instruction.i.rs == Reg::zero && instruction.i.rt == Reg::zero)
   2577       {
   2578         return DoBranch(Condition::Always, Value(), Value(), Reg::count, std::move(branch_target));
   2579       }
   2580       else
   2581       {
   2582         // branch <- rs op rt
   2583         Value lhs = m_register_cache.ReadGuestRegister(instruction.i.rs, true, true);
   2584         Value rhs = m_register_cache.ReadGuestRegister(instruction.i.rt);
   2585         const Condition condition = (instruction.op == InstructionOp::beq) ? Condition::Equal : Condition::NotEqual;
   2586         return DoBranch(condition, lhs, rhs, Reg::count, std::move(branch_target));
   2587       }
   2588     }
   2589 
   2590     case InstructionOp::bgtz:
   2591     case InstructionOp::blez:
   2592     {
   2593       // npc = pc + (sext(imm) << 2)
   2594       Value branch_target = CalculatePC(instruction.i.imm_sext32() << 2);
   2595 
   2596       // branch <- rs op 0
   2597       Value lhs = m_register_cache.ReadGuestRegister(instruction.i.rs, true, true);
   2598 
   2599       const Condition condition = (instruction.op == InstructionOp::bgtz) ? Condition::Greater : Condition::LessEqual;
   2600       return DoBranch(condition, lhs, Value::FromConstantU32(0), Reg::count, std::move(branch_target));
   2601     }
   2602 
   2603     case InstructionOp::b:
   2604     {
   2605       // npc = pc + (sext(imm) << 2)
   2606       Value branch_target = CalculatePC(instruction.i.imm_sext32() << 2);
   2607 
   2608       const u8 rt = static_cast<u8>(instruction.i.rt.GetValue());
   2609       const bool bgez = ConvertToBoolUnchecked(rt & u8(1));
   2610       const Condition condition = (bgez && instruction.r.rs == Reg::zero) ?
   2611                                     Condition::Always :
   2612                                     (bgez ? Condition::PositiveOrZero : Condition::Negative);
   2613       const bool link = (rt & u8(0x1E)) == u8(0x10);
   2614 
   2615       // Read has to happen before the link as the compare can use ra.
   2616       Value lhs;
   2617       if (condition != Condition::Always)
   2618         lhs = m_register_cache.ReadGuestRegisterToScratch(instruction.i.rs);
   2619 
   2620       // The return address is always written if link is set, regardless of whether the branch is taken.
   2621       if (link)
   2622       {
   2623         EmitCancelInterpreterLoadDelayForReg(Reg::ra);
   2624         m_register_cache.WriteGuestRegister(Reg::ra, CalculatePC(4));
   2625       }
   2626 
   2627       return DoBranch(condition, lhs, Value(), Reg::count, std::move(branch_target));
   2628     }
   2629 
   2630     default:
   2631       UnreachableCode();
   2632   }
   2633 }
   2634 
   2635 bool CodeGenerator::Compile_lui(Instruction instruction, const CodeCache::InstructionInfo& info)
   2636 {
   2637   InstructionPrologue(instruction, info, 1);
   2638 
   2639   if (g_settings.UsingPGXPCPUMode())
   2640     EmitFunctionCall(nullptr, &PGXP::CPU_LUI, Value::FromConstantU32(instruction.bits));
   2641 
   2642   // rt <- (imm << 16)
   2643   const u32 value = instruction.i.imm_zext32() << 16;
   2644   m_register_cache.WriteGuestRegister(instruction.i.rt, Value::FromConstantU32(value));
   2645   SpeculativeWriteReg(instruction.i.rt, value);
   2646 
   2647   InstructionEpilogue(instruction, info);
   2648   return true;
   2649 }
   2650 
   2651 bool CodeGenerator::Compile_cop0(Instruction instruction, const CodeCache::InstructionInfo& info)
   2652 {
   2653   if (instruction.cop.IsCommonInstruction())
   2654   {
   2655     switch (instruction.cop.CommonOp())
   2656     {
   2657       case CopCommonInstruction::mfcn:
   2658       case CopCommonInstruction::mtcn:
   2659       {
   2660         u32 offset;
   2661         u32 write_mask = UINT32_C(0xFFFFFFFF);
   2662 
   2663         const Cop0Reg reg = static_cast<Cop0Reg>(instruction.r.rd.GetValue());
   2664         switch (reg)
   2665         {
   2666           case Cop0Reg::BPC:
   2667             offset = OFFSETOF(State, cop0_regs.BPC);
   2668             break;
   2669 
   2670           case Cop0Reg::BPCM:
   2671             offset = OFFSETOF(State, cop0_regs.BPCM);
   2672             break;
   2673 
   2674           case Cop0Reg::BDA:
   2675             offset = OFFSETOF(State, cop0_regs.BDA);
   2676             break;
   2677 
   2678           case Cop0Reg::BDAM:
   2679             offset = OFFSETOF(State, cop0_regs.BDAM);
   2680             break;
   2681 
   2682           case Cop0Reg::DCIC:
   2683             offset = OFFSETOF(State, cop0_regs.dcic.bits);
   2684             write_mask = Cop0Registers::DCIC::WRITE_MASK;
   2685             break;
   2686 
   2687           case Cop0Reg::JUMPDEST:
   2688             offset = OFFSETOF(State, cop0_regs.TAR);
   2689             write_mask = 0;
   2690             break;
   2691 
   2692           case Cop0Reg::BadVaddr:
   2693             offset = OFFSETOF(State, cop0_regs.BadVaddr);
   2694             write_mask = 0;
   2695             break;
   2696 
   2697           case Cop0Reg::SR:
   2698             offset = OFFSETOF(State, cop0_regs.sr.bits);
   2699             write_mask = Cop0Registers::SR::WRITE_MASK;
   2700             break;
   2701 
   2702           case Cop0Reg::CAUSE:
   2703             offset = OFFSETOF(State, cop0_regs.cause.bits);
   2704             write_mask = Cop0Registers::CAUSE::WRITE_MASK;
   2705             break;
   2706 
   2707           case Cop0Reg::EPC:
   2708             offset = OFFSETOF(State, cop0_regs.EPC);
   2709             write_mask = 0;
   2710             break;
   2711 
   2712           case Cop0Reg::PRID:
   2713             offset = OFFSETOF(State, cop0_regs.PRID);
   2714             write_mask = 0;
   2715             break;
   2716 
   2717           default:
   2718             return Compile_Fallback(instruction, info);
   2719         }
   2720 
   2721         InstructionPrologue(instruction, info, 1);
   2722 
   2723         if (instruction.cop.CommonOp() == CopCommonInstruction::mfcn)
   2724         {
   2725           // coprocessor loads are load-delayed
   2726           Value value = m_register_cache.AllocateScratch(RegSize_32);
   2727           EmitLoadCPUStructField(value.host_reg, value.size, offset);
   2728 
   2729           if (g_settings.UsingPGXPCPUMode())
   2730             EmitFunctionCall(nullptr, &PGXP::CPU_MFC0, Value::FromConstantU32(instruction.bits), value);
   2731 
   2732           m_register_cache.WriteGuestRegisterDelayed(instruction.r.rt, std::move(value));
   2733 
   2734           if (reg == Cop0Reg::SR)
   2735             SpeculativeWriteReg(instruction.r.rt, m_speculative_constants.cop0_sr);
   2736           else
   2737             SpeculativeWriteReg(instruction.r.rt, std::nullopt);
   2738         }
   2739         else
   2740         {
   2741           // some registers are not writable, so ignore those
   2742           if (write_mask != 0)
   2743           {
   2744             Value value = m_register_cache.ReadGuestRegister(instruction.r.rt);
   2745             if (write_mask != UINT32_C(0xFFFFFFFF))
   2746             {
   2747               // need to adjust the mask
   2748               Value masked_value = AndValues(value, Value::FromConstantU32(write_mask));
   2749               {
   2750                 Value old_value = m_register_cache.AllocateScratch(RegSize_32);
   2751                 EmitLoadCPUStructField(old_value.GetHostRegister(), RegSize_32, offset);
   2752                 EmitAnd(old_value.GetHostRegister(), old_value.GetHostRegister(), Value::FromConstantU32(~write_mask));
   2753                 OrValueInPlace(masked_value, old_value);
   2754               }
   2755 
   2756               if (g_settings.UsingPGXPCPUMode())
   2757               {
   2758                 EmitFunctionCall(nullptr, &PGXP::CPU_MTC0, Value::FromConstantU32(instruction.bits), masked_value,
   2759                                  value);
   2760               }
   2761               value = std::move(masked_value);
   2762             }
   2763             else
   2764             {
   2765               if (g_settings.UsingPGXPCPUMode())
   2766                 EmitFunctionCall(nullptr, &PGXP::CPU_MTC0, Value::FromConstantU32(instruction.bits), value, value);
   2767             }
   2768 
   2769             if (reg == Cop0Reg::SR)
   2770               m_speculative_constants.cop0_sr = SpeculativeReadReg(instruction.r.rt);
   2771 
   2772             // changing SR[Isc] needs to update fastmem views
   2773             if (reg == Cop0Reg::SR)
   2774             {
   2775               LabelType skip_mem_update;
   2776               Value old_value = m_register_cache.AllocateScratch(RegSize_32);
   2777               EmitLoadCPUStructField(old_value.host_reg, RegSize_32, offset);
   2778               EmitStoreCPUStructField(offset, value);
   2779               EmitXor(old_value.host_reg, old_value.host_reg, value);
   2780               EmitBranchIfBitClear(old_value.host_reg, RegSize_32, 16, &skip_mem_update);
   2781               m_register_cache.InhibitAllocation();
   2782               EmitFunctionCall(nullptr, &UpdateMemoryPointers, m_register_cache.GetCPUPtr());
   2783               EmitUpdateFastmemBase();
   2784               EmitBindLabel(&skip_mem_update);
   2785               m_register_cache.UninhibitAllocation();
   2786             }
   2787             else
   2788             {
   2789               EmitStoreCPUStructField(offset, value);
   2790             }
   2791           }
   2792         }
   2793 
   2794         if (instruction.cop.CommonOp() == CopCommonInstruction::mtcn)
   2795         {
   2796           if (reg == Cop0Reg::CAUSE || reg == Cop0Reg::SR)
   2797           {
   2798             // Emit an interrupt check on load of CAUSE/SR.
   2799             Value sr_value = m_register_cache.AllocateScratch(RegSize_32);
   2800             Value cause_value = m_register_cache.AllocateScratch(RegSize_32);
   2801             m_register_cache.InhibitAllocation();
   2802 
   2803             // m_cop0_regs.sr.IEc && ((m_cop0_regs.cause.Ip & m_cop0_regs.sr.Im) != 0)
   2804             LabelType no_interrupt;
   2805             EmitLoadCPUStructField(sr_value.host_reg, sr_value.size, OFFSETOF(State, cop0_regs.sr.bits));
   2806             EmitLoadCPUStructField(cause_value.host_reg, cause_value.size, OFFSETOF(State, cop0_regs.cause.bits));
   2807             EmitBranchIfBitClear(sr_value.host_reg, sr_value.size, 0, &no_interrupt);
   2808             EmitAnd(sr_value.host_reg, sr_value.host_reg, cause_value);
   2809             EmitTest(sr_value.host_reg, Value::FromConstantU32(0xFF00));
   2810             EmitConditionalBranch(Condition::Zero, false, &no_interrupt);
   2811             m_register_cache.UninhibitAllocation();
   2812 
   2813             EmitBranch(GetCurrentFarCodePointer());
   2814             SwitchToFarCode();
   2815             m_register_cache.PushState();
   2816             if (!info.is_last_instruction)
   2817               WriteNewPC(CalculatePC(), false);
   2818             EmitStoreCPUStructField(OFFSETOF(State, downcount), Value::FromConstantU32(0));
   2819             EmitExceptionExit();
   2820             m_register_cache.PopState();
   2821             SwitchToNearCode();
   2822 
   2823             EmitBindLabel(&no_interrupt);
   2824           }
   2825           else if (reg == Cop0Reg::DCIC && g_settings.cpu_recompiler_memory_exceptions)
   2826           {
   2827             Value dcic_value = m_register_cache.AllocateScratch(RegSize_32);
   2828             m_register_cache.InhibitAllocation();
   2829 
   2830             // if ((dcic & master_enable_bits) != master_enable_bits) goto not_enabled;
   2831             LabelType not_enabled;
   2832             EmitLoadCPUStructField(dcic_value.GetHostRegister(), dcic_value.size, OFFSETOF(State, cop0_regs.dcic.bits));
   2833             EmitAnd(dcic_value.GetHostRegister(), dcic_value.GetHostRegister(),
   2834                     Value::FromConstantU32(Cop0Registers::DCIC::MASTER_ENABLE_BITS));
   2835             EmitConditionalBranch(Condition::NotEqual, false, dcic_value.host_reg,
   2836                                   Value::FromConstantU32(Cop0Registers::DCIC::MASTER_ENABLE_BITS), &not_enabled);
   2837 
   2838             // if ((dcic & breakpoint_bits) == 0) goto not_enabled;
   2839             EmitLoadCPUStructField(dcic_value.GetHostRegister(), dcic_value.size, OFFSETOF(State, cop0_regs.dcic.bits));
   2840             EmitTest(dcic_value.GetHostRegister(),
   2841                      Value::FromConstantU32(Cop0Registers::DCIC::ANY_BREAKPOINTS_ENABLED_BITS));
   2842             EmitConditionalBranch(Condition::Zero, false, &not_enabled);
   2843 
   2844             // update dispatcher flag, if enabled, exit block
   2845             EmitFunctionCall(nullptr, &UpdateDebugDispatcherFlag);
   2846             EmitLoadCPUStructField(dcic_value.GetHostRegister(), RegSize_8, OFFSETOF(State, using_debug_dispatcher));
   2847             EmitBranchIfBitClear(dcic_value.GetHostRegister(), RegSize_8, 0, &not_enabled);
   2848 
   2849             m_register_cache.UninhibitAllocation();
   2850 
   2851             // exit block early if enabled
   2852             EmitBranch(GetCurrentFarCodePointer());
   2853             SwitchToFarCode();
   2854             m_register_cache.PushState();
   2855             WriteNewPC(CalculatePC(), false);
   2856             EmitExceptionExit();
   2857             m_register_cache.PopState();
   2858             SwitchToNearCode();
   2859 
   2860             EmitBindLabel(&not_enabled);
   2861           }
   2862         }
   2863 
   2864         InstructionEpilogue(instruction, info);
   2865         return true;
   2866       }
   2867 
   2868       // only mfc/mtc for cop0
   2869       default:
   2870         return Compile_Fallback(instruction, info);
   2871     }
   2872   }
   2873   else
   2874   {
   2875     switch (instruction.cop.Cop0Op())
   2876     {
   2877       case Cop0Instruction::rfe:
   2878       {
   2879         InstructionPrologue(instruction, info, 1);
   2880 
   2881         // shift mode bits right two, preserving upper bits
   2882         static constexpr u32 mode_bits_mask = UINT32_C(0b1111);
   2883         Value sr = m_register_cache.AllocateScratch(RegSize_32);
   2884         EmitLoadCPUStructField(sr.host_reg, RegSize_32, OFFSETOF(State, cop0_regs.sr.bits));
   2885         {
   2886           Value new_mode_bits = m_register_cache.AllocateScratch(RegSize_32);
   2887           EmitShr(new_mode_bits.host_reg, sr.host_reg, new_mode_bits.size, Value::FromConstantU32(2));
   2888           EmitAnd(new_mode_bits.host_reg, new_mode_bits.host_reg, Value::FromConstantU32(mode_bits_mask));
   2889           EmitAnd(sr.host_reg, sr.host_reg, Value::FromConstantU32(~mode_bits_mask));
   2890           EmitOr(sr.host_reg, sr.host_reg, new_mode_bits);
   2891         }
   2892 
   2893         EmitStoreCPUStructField(OFFSETOF(State, cop0_regs.sr.bits), sr);
   2894 
   2895         Value cause_value = m_register_cache.AllocateScratch(RegSize_32);
   2896         EmitLoadCPUStructField(cause_value.host_reg, cause_value.size, OFFSETOF(State, cop0_regs.cause.bits));
   2897 
   2898         LabelType no_interrupt;
   2899         EmitAnd(sr.host_reg, sr.host_reg, cause_value);
   2900         EmitTest(sr.host_reg, Value::FromConstantU32(0xFF00));
   2901         EmitConditionalBranch(Condition::Zero, false, &no_interrupt);
   2902         m_register_cache.InhibitAllocation();
   2903         EmitStoreCPUStructField(OFFSETOF(State, downcount), Value::FromConstantU32(0));
   2904         EmitBindLabel(&no_interrupt);
   2905         m_register_cache.UninhibitAllocation();
   2906 
   2907         InstructionEpilogue(instruction, info);
   2908         return true;
   2909       }
   2910 
   2911       default:
   2912         return Compile_Fallback(instruction, info);
   2913     }
   2914   }
   2915 }
   2916 
   2917 Value CodeGenerator::DoGTERegisterRead(u32 index)
   2918 {
   2919   Value value = m_register_cache.AllocateScratch(RegSize_32);
   2920 
   2921   // Most GTE registers can be read directly. Handle the special cases here.
   2922   if (index == 15) // SXY3
   2923   {
   2924     // mirror of SXY2
   2925     index = 14;
   2926   }
   2927 
   2928   switch (index)
   2929   {
   2930     case 28: // IRGB
   2931     case 29: // ORGB
   2932     {
   2933       EmitFunctionCall(&value, &GTE::ReadRegister, Value::FromConstantU32(index));
   2934     }
   2935     break;
   2936 
   2937     default:
   2938     {
   2939       EmitLoadCPUStructField(value.host_reg, RegSize_32, State::GTERegisterOffset(index));
   2940     }
   2941     break;
   2942   }
   2943 
   2944   return value;
   2945 }
   2946 
   2947 void CodeGenerator::DoGTERegisterWrite(u32 index, const Value& value)
   2948 {
   2949   switch (index)
   2950   {
   2951     case 1:  // V0[z]
   2952     case 3:  // V1[z]
   2953     case 5:  // V2[z]
   2954     case 8:  // IR0
   2955     case 9:  // IR1
   2956     case 10: // IR2
   2957     case 11: // IR3
   2958     case 36: // RT33
   2959     case 44: // L33
   2960     case 52: // LR33
   2961     case 58: // H       - sign-extended on read but zext on use
   2962     case 59: // DQA
   2963     case 61: // ZSF3
   2964     case 62: // ZSF4
   2965     {
   2966       // sign-extend z component of vector registers
   2967       Value temp = ConvertValueSize(value.ViewAsSize(RegSize_16), RegSize_32, true);
   2968       EmitStoreCPUStructField(State::GTERegisterOffset(index), temp);
   2969       return;
   2970     }
   2971     break;
   2972 
   2973     case 7:  // OTZ
   2974     case 16: // SZ0
   2975     case 17: // SZ1
   2976     case 18: // SZ2
   2977     case 19: // SZ3
   2978     {
   2979       // zero-extend unsigned values
   2980       Value temp = ConvertValueSize(value.ViewAsSize(RegSize_16), RegSize_32, false);
   2981       EmitStoreCPUStructField(State::GTERegisterOffset(index), temp);
   2982       return;
   2983     }
   2984     break;
   2985 
   2986     case 15: // SXY3
   2987     {
   2988       // writing to SXYP pushes to the FIFO
   2989       Value temp = m_register_cache.AllocateScratch(RegSize_32);
   2990 
   2991       // SXY0 <- SXY1
   2992       EmitLoadCPUStructField(temp.host_reg, RegSize_32, State::GTERegisterOffset(13));
   2993       EmitStoreCPUStructField(State::GTERegisterOffset(12), temp);
   2994 
   2995       // SXY1 <- SXY2
   2996       EmitLoadCPUStructField(temp.host_reg, RegSize_32, State::GTERegisterOffset(14));
   2997       EmitStoreCPUStructField(State::GTERegisterOffset(13), temp);
   2998 
   2999       // SXY2 <- SXYP
   3000       EmitStoreCPUStructField(State::GTERegisterOffset(14), value);
   3001       return;
   3002     }
   3003     break;
   3004 
   3005     case 28: // IRGB
   3006     case 30: // LZCS
   3007     case 63: // FLAG
   3008     {
   3009       EmitFunctionCall(nullptr, &GTE::WriteRegister, Value::FromConstantU32(index), value);
   3010       return;
   3011     }
   3012 
   3013     case 29: // ORGB
   3014     case 31: // LZCR
   3015     {
   3016       // read-only registers
   3017       return;
   3018     }
   3019 
   3020     default:
   3021     {
   3022       // written as-is, 2x16 or 1x32 bits
   3023       EmitStoreCPUStructField(State::GTERegisterOffset(index), value);
   3024       return;
   3025     }
   3026   }
   3027 }
   3028 
   3029 bool CodeGenerator::Compile_cop2(Instruction instruction, const CodeCache::InstructionInfo& info)
   3030 {
   3031   if (instruction.op == InstructionOp::lwc2 || instruction.op == InstructionOp::swc2)
   3032   {
   3033     StallUntilGTEComplete();
   3034     InstructionPrologue(instruction, info, 1);
   3035 
   3036     const u32 reg = static_cast<u32>(instruction.i.rt.GetValue());
   3037     Value address = AddValues(m_register_cache.ReadGuestRegister(instruction.i.rs),
   3038                               Value::FromConstantU32(instruction.i.imm_sext32()), false);
   3039     SpeculativeValue spec_address = SpeculativeReadReg(instruction.i.rs);
   3040     if (spec_address)
   3041       spec_address = *spec_address + instruction.i.imm_sext32();
   3042 
   3043     if (instruction.op == InstructionOp::lwc2)
   3044     {
   3045       Value value = EmitLoadGuestMemory(instruction, info, address, spec_address, RegSize_32);
   3046       DoGTERegisterWrite(reg, value);
   3047 
   3048       if (g_settings.gpu_pgxp_enable)
   3049         EmitFunctionCall(nullptr, PGXP::CPU_LWC2, Value::FromConstantU32(instruction.bits), address, value);
   3050     }
   3051     else
   3052     {
   3053       Value value = DoGTERegisterRead(reg);
   3054       EmitStoreGuestMemory(instruction, info, address, spec_address, RegSize_32, value);
   3055 
   3056       if (g_settings.gpu_pgxp_enable)
   3057         EmitFunctionCall(nullptr, PGXP::CPU_SWC2, Value::FromConstantU32(instruction.bits), address, value);
   3058 
   3059       if (spec_address)
   3060         SpeculativeWriteMemory(*spec_address, std::nullopt);
   3061     }
   3062 
   3063     InstructionEpilogue(instruction, info);
   3064     return true;
   3065   }
   3066 
   3067   Assert(instruction.op == InstructionOp::cop2);
   3068 
   3069   if (instruction.cop.IsCommonInstruction())
   3070   {
   3071     switch (instruction.cop.CommonOp())
   3072     {
   3073       case CopCommonInstruction::mfcn:
   3074       case CopCommonInstruction::cfcn:
   3075       {
   3076         const u32 reg = static_cast<u32>(instruction.r.rd.GetValue()) +
   3077                         ((instruction.cop.CommonOp() == CopCommonInstruction::cfcn) ? 32 : 0);
   3078 
   3079         StallUntilGTEComplete();
   3080         InstructionPrologue(instruction, info, 1);
   3081 
   3082         Value value = DoGTERegisterRead(reg);
   3083 
   3084         // PGXP done first here before ownership is transferred.
   3085         if (g_settings.gpu_pgxp_enable)
   3086           EmitFunctionCall(nullptr, PGXP::CPU_MFC2, Value::FromConstantU32(instruction.bits), value);
   3087 
   3088         m_register_cache.WriteGuestRegisterDelayed(instruction.r.rt, std::move(value));
   3089         SpeculativeWriteReg(instruction.r.rt, std::nullopt);
   3090 
   3091         InstructionEpilogue(instruction, info);
   3092         return true;
   3093       }
   3094 
   3095       case CopCommonInstruction::mtcn:
   3096       case CopCommonInstruction::ctcn:
   3097       {
   3098         const u32 reg = static_cast<u32>(instruction.r.rd.GetValue()) +
   3099                         ((instruction.cop.CommonOp() == CopCommonInstruction::ctcn) ? 32 : 0);
   3100 
   3101         StallUntilGTEComplete();
   3102         InstructionPrologue(instruction, info, 1);
   3103 
   3104         Value value = m_register_cache.ReadGuestRegister(instruction.r.rt);
   3105         DoGTERegisterWrite(reg, value);
   3106 
   3107         if (g_settings.gpu_pgxp_enable)
   3108           EmitFunctionCall(nullptr, PGXP::CPU_MTC2, Value::FromConstantU32(instruction.bits), value);
   3109 
   3110         InstructionEpilogue(instruction, info);
   3111         return true;
   3112       }
   3113 
   3114       default:
   3115         return Compile_Fallback(instruction, info);
   3116     }
   3117   }
   3118   else
   3119   {
   3120     TickCount func_ticks;
   3121     GTE::InstructionImpl func = GTE::GetInstructionImpl(instruction.bits, &func_ticks);
   3122 
   3123     // forward everything to the GTE.
   3124     StallUntilGTEComplete();
   3125     InstructionPrologue(instruction, info, 1);
   3126 
   3127     Value instruction_bits = Value::FromConstantU32(instruction.bits & GTE::Instruction::REQUIRED_BITS_MASK);
   3128     EmitFunctionCall(nullptr, func, instruction_bits);
   3129     AddGTETicks(func_ticks);
   3130 
   3131     InstructionEpilogue(instruction, info);
   3132     return true;
   3133   }
   3134 }
   3135 
   3136 void CodeGenerator::InitSpeculativeRegs()
   3137 {
   3138   for (u8 i = 0; i < static_cast<u8>(Reg::count); i++)
   3139     m_speculative_constants.regs[i] = g_state.regs.r[i];
   3140 
   3141   m_speculative_constants.cop0_sr = g_state.cop0_regs.sr.bits;
   3142 }
   3143 
   3144 void CodeGenerator::InvalidateSpeculativeValues()
   3145 {
   3146   m_speculative_constants.regs.fill(std::nullopt);
   3147   m_speculative_constants.memory.clear();
   3148   m_speculative_constants.cop0_sr.reset();
   3149 }
   3150 
   3151 CodeGenerator::SpeculativeValue CodeGenerator::SpeculativeReadReg(Reg reg)
   3152 {
   3153   return m_speculative_constants.regs[static_cast<u8>(reg)];
   3154 }
   3155 
   3156 void CodeGenerator::SpeculativeWriteReg(Reg reg, SpeculativeValue value)
   3157 {
   3158   m_speculative_constants.regs[static_cast<u8>(reg)] = value;
   3159 }
   3160 
   3161 CodeGenerator::SpeculativeValue CodeGenerator::SpeculativeReadMemory(VirtualMemoryAddress address)
   3162 {
   3163   PhysicalMemoryAddress phys_addr = address & PHYSICAL_MEMORY_ADDRESS_MASK;
   3164 
   3165   auto it = m_speculative_constants.memory.find(address);
   3166   if (it != m_speculative_constants.memory.end())
   3167     return it->second;
   3168 
   3169   u32 value;
   3170   if ((phys_addr & SCRATCHPAD_ADDR_MASK) == SCRATCHPAD_ADDR)
   3171   {
   3172     u32 scratchpad_offset = phys_addr & SCRATCHPAD_OFFSET_MASK;
   3173     std::memcpy(&value, &CPU::g_state.scratchpad[scratchpad_offset], sizeof(value));
   3174     return value;
   3175   }
   3176 
   3177   if (Bus::IsRAMAddress(phys_addr))
   3178   {
   3179     u32 ram_offset = phys_addr & Bus::g_ram_mask;
   3180     std::memcpy(&value, &Bus::g_ram[ram_offset], sizeof(value));
   3181     return value;
   3182   }
   3183 
   3184   return std::nullopt;
   3185 }
   3186 
   3187 void CodeGenerator::SpeculativeWriteMemory(u32 address, SpeculativeValue value)
   3188 {
   3189   PhysicalMemoryAddress phys_addr = address & PHYSICAL_MEMORY_ADDRESS_MASK;
   3190 
   3191   auto it = m_speculative_constants.memory.find(address);
   3192   if (it != m_speculative_constants.memory.end())
   3193   {
   3194     it->second = value;
   3195     return;
   3196   }
   3197 
   3198   if ((phys_addr & SCRATCHPAD_ADDR_MASK) == SCRATCHPAD_ADDR || Bus::IsRAMAddress(phys_addr))
   3199     m_speculative_constants.memory.emplace(address, value);
   3200 }
   3201 
   3202 bool CodeGenerator::SpeculativeIsCacheIsolated()
   3203 {
   3204   if (!m_speculative_constants.cop0_sr.has_value())
   3205     return false;
   3206 
   3207   const Cop0Registers::SR sr{m_speculative_constants.cop0_sr.value()};
   3208   return sr.Isc;
   3209 }
   3210 
   3211 } // namespace CPU::Recompiler