duckstation

duckstation, but archived from the revision just before upstream changed it to a proprietary software project, this version is the libre one
git clone https://git.neptards.moe/u3shit/duckstation.git
Log | Files | Refs | README | LICENSE

cpu_recompiler_code_generator_x64.cpp (93892B)


      1 // SPDX-FileCopyrightText: 2019-2023 Connor McLaughlin <stenzek@gmail.com>
      2 // SPDX-License-Identifier: (GPL-3.0 OR CC-BY-NC-ND-4.0)
      3 
      4 #include "cpu_code_cache_private.h"
      5 #include "cpu_core.h"
      6 #include "cpu_core_private.h"
      7 #include "cpu_recompiler_code_generator.h"
      8 #include "cpu_recompiler_thunks.h"
      9 #include "settings.h"
     10 #include "timing_event.h"
     11 
     12 #include "common/align.h"
     13 #include "common/assert.h"
     14 #include "common/log.h"
     15 #include "common/memmap.h"
     16 
     17 #ifdef CPU_ARCH_X64
     18 
     19 Log_SetChannel(Recompiler::CodeGenerator);
     20 
     21 #ifdef ENABLE_HOST_DISASSEMBLY
     22 #include "Zycore/Format.h"
     23 #include "Zycore/Status.h"
     24 #include "Zydis/Zydis.h"
     25 #endif
     26 
     27 bool CPU::Recompiler::IsCallerSavedRegister(u32 id)
     28 {
     29 #ifdef _WIN32
     30   // The x64 ABI considers the registers RAX, RCX, RDX, R8, R9, R10, R11, and XMM0-XMM5 volatile.
     31   return (id <= 2 || (id >= 8 && id <= 11));
     32 #else
     33   // rax, rdi, rsi, rdx, rcx, r8, r9, r10, r11 are scratch registers.
     34   return (id <= 2 || id == 6 || id == 7 || (id >= 8 && id <= 11));
     35 #endif
     36 }
     37 
     38 u32 CPU::CodeCache::EmitASMFunctions(void* code, u32 code_size)
     39 {
     40   using namespace Xbyak;
     41 
     42 #define PTR(x) (cg->rbp + (((u8*)(x)) - ((u8*)&g_state)))
     43 
     44 #ifdef _WIN32
     45   // Shadow space for Win32
     46   constexpr u32 stack_size = 32 + 8;
     47 #else
     48   // Stack still needs to be aligned
     49   constexpr u32 stack_size = 8;
     50 #endif
     51 
     52   DebugAssert(g_settings.cpu_execution_mode == CPUExecutionMode::Recompiler ||
     53               g_settings.cpu_execution_mode == CPUExecutionMode::NewRec);
     54 
     55   CodeGenerator acg(code_size, static_cast<u8*>(code));
     56   CodeGenerator* cg = &acg;
     57 
     58   Label dispatch;
     59   Label exit_recompiler;
     60 
     61   g_enter_recompiler = reinterpret_cast<decltype(g_enter_recompiler)>(const_cast<u8*>(cg->getCurr()));
     62   {
     63     // Don't need to save registers, because we fastjmp out when execution is interrupted.
     64     cg->sub(cg->rsp, stack_size);
     65 
     66     // CPU state pointer
     67     cg->lea(cg->rbp, cg->qword[cg->rip + &g_state]);
     68 
     69     // newrec preloads fastmem base
     70     if (g_settings.cpu_execution_mode != CPUExecutionMode::Recompiler && CodeCache::IsUsingFastmem())
     71       cg->mov(cg->rbx, cg->qword[PTR(&g_state.fastmem_base)]);
     72 
     73     // Fall through to event dispatcher
     74   }
     75 
     76   // check events then for frame done
     77   g_check_events_and_dispatch = cg->getCurr();
     78   {
     79     Label skip_event_check;
     80     cg->mov(RWARG1, cg->dword[PTR(&g_state.pending_ticks)]);
     81     cg->cmp(RWARG1, cg->dword[PTR(&g_state.downcount)]);
     82     cg->jl(skip_event_check);
     83 
     84     g_run_events_and_dispatch = cg->getCurr();
     85     cg->call(reinterpret_cast<const void*>(&TimingEvents::RunEvents));
     86 
     87     cg->L(skip_event_check);
     88   }
     89 
     90   // TODO: align?
     91   g_dispatcher = cg->getCurr();
     92   {
     93     cg->L(dispatch);
     94 
     95     // rcx <- s_fast_map[pc >> 16]
     96     cg->mov(RWARG1, cg->dword[PTR(&g_state.pc)]);
     97     cg->lea(RXARG2, cg->dword[PTR(g_code_lut.data())]);
     98     cg->mov(RWARG3, RWARG1);
     99     cg->shr(RWARG3, 16);
    100     cg->mov(RXARG2, cg->qword[RXARG2 + RXARG3 * 8]);
    101 
    102     // call(rcx[pc * 2]) (fast_map[pc >> 2])
    103     cg->jmp(cg->qword[RXARG2 + RXARG1 * 2]);
    104   }
    105 
    106   g_compile_or_revalidate_block = cg->getCurr();
    107   {
    108     cg->mov(RWARG1, cg->dword[PTR(&g_state.pc)]);
    109     cg->call(&CompileOrRevalidateBlock);
    110     cg->jmp(dispatch);
    111   }
    112 
    113   g_discard_and_recompile_block = cg->getCurr();
    114   {
    115     cg->mov(RWARG1, cg->dword[PTR(&g_state.pc)]);
    116     cg->call(&DiscardAndRecompileBlock);
    117     cg->jmp(dispatch);
    118   }
    119 
    120   g_interpret_block = cg->getCurr();
    121   {
    122     cg->call(CodeCache::GetInterpretUncachedBlockFunction());
    123     cg->jmp(dispatch);
    124   }
    125 
    126 #undef PTR
    127 
    128   return static_cast<u32>(cg->getSize());
    129 }
    130 
    131 u32 CPU::CodeCache::EmitJump(void* code, const void* dst, bool flush_icache)
    132 {
    133   u8* ptr = static_cast<u8*>(code);
    134   *(ptr++) = 0xE9; // jmp
    135 
    136   const ptrdiff_t disp = (reinterpret_cast<uintptr_t>(dst) - reinterpret_cast<uintptr_t>(code)) - 5;
    137   DebugAssert(disp >= static_cast<ptrdiff_t>(std::numeric_limits<s32>::min()) &&
    138               disp <= static_cast<ptrdiff_t>(std::numeric_limits<s32>::max()));
    139 
    140   const s32 disp32 = static_cast<s32>(disp);
    141   std::memcpy(ptr, &disp32, sizeof(disp32));
    142   return 5;
    143 }
    144 
    145 #ifdef ENABLE_HOST_DISASSEMBLY
    146 
    147 static ZydisFormatterFunc s_old_print_address;
    148 
    149 static ZyanStatus ZydisFormatterPrintAddressAbsolute(const ZydisFormatter* formatter, ZydisFormatterBuffer* buffer,
    150                                                      ZydisFormatterContext* context)
    151 {
    152   using namespace CPU;
    153 
    154   ZyanU64 address;
    155   ZYAN_CHECK(ZydisCalcAbsoluteAddress(context->instruction, context->operand, context->runtime_address, &address));
    156 
    157   char buf[128];
    158   u32 len = 0;
    159 
    160 #define A(x) static_cast<ZyanU64>(reinterpret_cast<uintptr_t>(x))
    161 
    162   if (address >= A(Bus::g_ram) && address < A(Bus::g_ram + Bus::g_ram_size))
    163   {
    164     len = snprintf(buf, sizeof(buf), "g_ram+0x%08X", static_cast<u32>(address - A(Bus::g_ram)));
    165   }
    166   else if (address >= A(&g_state.regs) &&
    167            address < A(reinterpret_cast<const u8*>(&g_state.regs) + sizeof(CPU::Registers)))
    168   {
    169     len = snprintf(buf, sizeof(buf), "g_state.regs.%s",
    170                    GetRegName(static_cast<CPU::Reg>(((address - A(&g_state.regs.r[0])) / 4u))));
    171   }
    172   else if (address >= A(&g_state.cop0_regs) &&
    173            address < A(reinterpret_cast<const u8*>(&g_state.cop0_regs) + sizeof(CPU::Cop0Registers)))
    174   {
    175     for (const DebuggerRegisterListEntry& rle : g_debugger_register_list)
    176     {
    177       if (address == static_cast<ZyanU64>(reinterpret_cast<uintptr_t>(rle.value_ptr)))
    178       {
    179         len = snprintf(buf, sizeof(buf), "g_state.cop0_regs.%s", rle.name);
    180         break;
    181       }
    182     }
    183   }
    184   else if (address >= A(&g_state.gte_regs) &&
    185            address < A(reinterpret_cast<const u8*>(&g_state.gte_regs) + sizeof(GTE::Regs)))
    186   {
    187     for (const DebuggerRegisterListEntry& rle : g_debugger_register_list)
    188     {
    189       if (address == static_cast<ZyanU64>(reinterpret_cast<uintptr_t>(rle.value_ptr)))
    190       {
    191         len = snprintf(buf, sizeof(buf), "g_state.gte_regs.%s", rle.name);
    192         break;
    193       }
    194     }
    195   }
    196   else if (address == A(&g_state.load_delay_reg))
    197   {
    198     len = snprintf(buf, sizeof(buf), "g_state.load_delay_reg");
    199   }
    200   else if (address == A(&g_state.next_load_delay_reg))
    201   {
    202     len = snprintf(buf, sizeof(buf), "g_state.next_load_delay_reg");
    203   }
    204   else if (address == A(&g_state.load_delay_value))
    205   {
    206     len = snprintf(buf, sizeof(buf), "g_state.load_delay_value");
    207   }
    208   else if (address == A(&g_state.next_load_delay_value))
    209   {
    210     len = snprintf(buf, sizeof(buf), "g_state.next_load_delay_value");
    211   }
    212   else if (address == A(&g_state.pending_ticks))
    213   {
    214     len = snprintf(buf, sizeof(buf), "g_state.pending_ticks");
    215   }
    216   else if (address == A(&g_state.downcount))
    217   {
    218     len = snprintf(buf, sizeof(buf), "g_state.downcount");
    219   }
    220 
    221 #undef A
    222 
    223   if (len > 0)
    224   {
    225     ZYAN_CHECK(ZydisFormatterBufferAppend(buffer, ZYDIS_TOKEN_SYMBOL));
    226     ZyanString* string;
    227     ZYAN_CHECK(ZydisFormatterBufferGetString(buffer, &string));
    228     return ZyanStringAppendFormat(string, "&%s", buf);
    229   }
    230 
    231   return s_old_print_address(formatter, buffer, context);
    232 }
    233 
    234 void CPU::CodeCache::DisassembleAndLogHostCode(const void* start, u32 size)
    235 {
    236   ZydisDecoder disas_decoder;
    237   ZydisFormatter disas_formatter;
    238   ZydisDecodedInstruction disas_instruction;
    239   ZydisDecodedOperand disas_operands[ZYDIS_MAX_OPERAND_COUNT];
    240   ZydisDecoderInit(&disas_decoder, ZYDIS_MACHINE_MODE_LONG_64, ZYDIS_STACK_WIDTH_64);
    241   ZydisFormatterInit(&disas_formatter, ZYDIS_FORMATTER_STYLE_INTEL);
    242   s_old_print_address = (ZydisFormatterFunc)&ZydisFormatterPrintAddressAbsolute;
    243   ZydisFormatterSetHook(&disas_formatter, ZYDIS_FORMATTER_FUNC_PRINT_ADDRESS_ABS, (const void**)&s_old_print_address);
    244 
    245   const u8* ptr = static_cast<const u8*>(start);
    246   TinyString hex;
    247   ZyanUSize remaining = size;
    248   while (ZYAN_SUCCESS(ZydisDecoderDecodeFull(&disas_decoder, ptr, remaining, &disas_instruction, disas_operands)))
    249   {
    250     char buffer[256];
    251     if (ZYAN_SUCCESS(ZydisFormatterFormatInstruction(&disas_formatter, &disas_instruction, disas_operands,
    252                                                      ZYDIS_MAX_OPERAND_COUNT, buffer, sizeof(buffer),
    253                                                      static_cast<ZyanU64>(reinterpret_cast<uintptr_t>(ptr)), nullptr)))
    254     {
    255       hex.clear();
    256       for (u32 i = 0; i < 10; i++)
    257       {
    258         if (i < disas_instruction.length)
    259           hex.append_format(" {:02X}", ptr[i]);
    260         else
    261           hex.append("   ");
    262       }
    263       Log::FastWrite("HostCode", "", LOGLEVEL_DEBUG, "  {:016X} {} {}",
    264                      static_cast<u64>(reinterpret_cast<uintptr_t>(ptr)), hex, buffer);
    265     }
    266 
    267     ptr += disas_instruction.length;
    268     remaining -= disas_instruction.length;
    269   }
    270 }
    271 
    272 u32 CPU::CodeCache::GetHostInstructionCount(const void* start, u32 size)
    273 {
    274   ZydisDecoder disas_decoder;
    275   ZydisDecodedInstruction disas_instruction;
    276   ZydisDecoderContext disas_context;
    277   ZydisDecoderInit(&disas_decoder, ZYDIS_MACHINE_MODE_LONG_64, ZYDIS_STACK_WIDTH_64);
    278 
    279   const u8* ptr = static_cast<const u8*>(start);
    280   ZyanUSize remaining = size;
    281   u32 inst_count = 0;
    282   while (
    283     ZYAN_SUCCESS(ZydisDecoderDecodeInstruction(&disas_decoder, &disas_context, ptr, remaining, &disas_instruction)))
    284   {
    285     ptr += disas_instruction.length;
    286     remaining -= disas_instruction.length;
    287     inst_count++;
    288   }
    289 
    290   return inst_count;
    291 }
    292 
    293 #else
    294 
    295 void CPU::CodeCache::DisassembleAndLogHostCode(const void* start, u32 size)
    296 {
    297   ERROR_LOG("Not compiled with ENABLE_HOST_DISASSEMBLY.");
    298 }
    299 
    300 u32 CPU::CodeCache::GetHostInstructionCount(const void* start, u32 size)
    301 {
    302   ERROR_LOG("Not compiled with ENABLE_HOST_DISASSEMBLY.");
    303   return 0;
    304 }
    305 
    306 #endif // ENABLE_HOST_DISASSEMBLY
    307 
    308 namespace CPU::Recompiler {
    309 
    310 static constexpr HostReg RCPUPTR = Xbyak::Operand::RBP;
    311 static constexpr HostReg RMEMBASEPTR = Xbyak::Operand::RBX;
    312 static constexpr HostReg RRETURN = RXRET.getIdx();
    313 static constexpr HostReg RARG1 = RXARG1.getIdx();
    314 static constexpr HostReg RARG2 = RXARG2.getIdx();
    315 static constexpr HostReg RARG3 = RXARG3.getIdx();
    316 static constexpr HostReg RARG4 = RXARG4.getIdx();
    317 
    318 static const Xbyak::Reg8 GetHostReg8(HostReg reg)
    319 {
    320   return Xbyak::Reg8(reg, reg >= Xbyak::Operand::SPL);
    321 }
    322 
    323 static const Xbyak::Reg8 GetHostReg8(const Value& value)
    324 {
    325   DebugAssert(value.size == RegSize_8 && value.IsInHostRegister());
    326   return Xbyak::Reg8(value.host_reg, value.host_reg >= Xbyak::Operand::SPL);
    327 }
    328 
    329 static const Xbyak::Reg16 GetHostReg16(HostReg reg)
    330 {
    331   return Xbyak::Reg16(reg);
    332 }
    333 
    334 static const Xbyak::Reg16 GetHostReg16(const Value& value)
    335 {
    336   DebugAssert(value.size == RegSize_16 && value.IsInHostRegister());
    337   return Xbyak::Reg16(value.host_reg);
    338 }
    339 
    340 static const Xbyak::Reg32 GetHostReg32(HostReg reg)
    341 {
    342   return Xbyak::Reg32(reg);
    343 }
    344 
    345 static const Xbyak::Reg32 GetHostReg32(const Value& value)
    346 {
    347   DebugAssert(value.size == RegSize_32 && value.IsInHostRegister());
    348   return Xbyak::Reg32(value.host_reg);
    349 }
    350 
    351 static const Xbyak::Reg64 GetHostReg64(HostReg reg)
    352 {
    353   return Xbyak::Reg64(reg);
    354 }
    355 
    356 static const Xbyak::Reg64 GetHostReg64(const Value& value)
    357 {
    358   DebugAssert(value.size == RegSize_64 && value.IsInHostRegister());
    359   return Xbyak::Reg64(value.host_reg);
    360 }
    361 
    362 static const Xbyak::Reg64 GetCPUPtrReg()
    363 {
    364   return Xbyak::Reg64(RCPUPTR);
    365 }
    366 
    367 static const Xbyak::Reg64 GetFastmemBasePtrReg()
    368 {
    369   return GetHostReg64(RMEMBASEPTR);
    370 }
    371 
    372 CodeGenerator::CodeGenerator()
    373   : m_register_cache(*this), m_near_emitter(CPU::CodeCache::GetFreeCodeSpace(), CPU::CodeCache::GetFreeCodePointer()),
    374     m_far_emitter(CPU::CodeCache::GetFreeFarCodeSpace(), CPU::CodeCache::GetFreeFarCodePointer()),
    375     m_emit(&m_near_emitter)
    376 {
    377   InitHostRegs();
    378 }
    379 
    380 CodeGenerator::~CodeGenerator() = default;
    381 
    382 const char* CodeGenerator::GetHostRegName(HostReg reg, RegSize size /*= HostPointerSize*/)
    383 {
    384   static constexpr std::array<const char*, HostReg_Count> reg8_names = {
    385     {"al", "cl", "dl", "bl", "spl", "bpl", "sil", "dil", "r8b", "r9b", "r10b", "r11b", "r12b", "r13b", "r14b", "r15b"}};
    386   static constexpr std::array<const char*, HostReg_Count> reg16_names = {
    387     {"ax", "cx", "dx", "bx", "sp", "bp", "si", "di", "r8w", "r9w", "r10w", "r11w", "r12w", "r13w", "r14w", "r15w"}};
    388   static constexpr std::array<const char*, HostReg_Count> reg32_names = {{"eax", "ecx", "edx", "ebx", "esp", "ebp",
    389                                                                           "esi", "edi", "r8d", "r9d", "r10d", "r11d",
    390                                                                           "r12d", "r13d", "r14d", "r15d"}};
    391   static constexpr std::array<const char*, HostReg_Count> reg64_names = {
    392     {"rax", "rcx", "rdx", "rbx", "rsp", "rbp", "rsi", "rdi", "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15"}};
    393   if (reg >= static_cast<HostReg>(HostReg_Count))
    394     return "";
    395 
    396   switch (size)
    397   {
    398     case RegSize_8:
    399       return reg8_names[reg];
    400     case RegSize_16:
    401       return reg16_names[reg];
    402     case RegSize_32:
    403       return reg32_names[reg];
    404     case RegSize_64:
    405       return reg64_names[reg];
    406     default:
    407       return "";
    408   }
    409 }
    410 
    411 void CodeGenerator::InitHostRegs()
    412 {
    413 #if defined(ABI_WIN64)
    414   // TODO: function calls mess up the parameter registers if we use them.. fix it
    415   // allocate nonvolatile before volatile
    416   m_register_cache.SetHostRegAllocationOrder(
    417     {Xbyak::Operand::RBX, Xbyak::Operand::RBP, Xbyak::Operand::RDI, Xbyak::Operand::RSI, /*Xbyak::Operand::RSP, */
    418      Xbyak::Operand::R12, Xbyak::Operand::R13, Xbyak::Operand::R14, Xbyak::Operand::R15, /*Xbyak::Operand::RCX,
    419      Xbyak::Operand::RDX, Xbyak::Operand::R8, Xbyak::Operand::R9, */
    420      Xbyak::Operand::R10, Xbyak::Operand::R11,
    421      /*Xbyak::Operand::RAX*/});
    422   m_register_cache.SetCallerSavedHostRegs({Xbyak::Operand::RAX, Xbyak::Operand::RCX, Xbyak::Operand::RDX,
    423                                            Xbyak::Operand::R8, Xbyak::Operand::R9, Xbyak::Operand::R10,
    424                                            Xbyak::Operand::R11});
    425   m_register_cache.SetCalleeSavedHostRegs({Xbyak::Operand::RBX, Xbyak::Operand::RBP, Xbyak::Operand::RDI,
    426                                            Xbyak::Operand::RSI, Xbyak::Operand::RSP, Xbyak::Operand::R12,
    427                                            Xbyak::Operand::R13, Xbyak::Operand::R14, Xbyak::Operand::R15});
    428 #elif defined(ABI_SYSV)
    429   m_register_cache.SetHostRegAllocationOrder(
    430     {Xbyak::Operand::RBX, /*Xbyak::Operand::RSP, */ Xbyak::Operand::RBP, Xbyak::Operand::R12, Xbyak::Operand::R13,
    431      Xbyak::Operand::R14, Xbyak::Operand::R15,
    432      /*Xbyak::Operand::RAX, */ /*Xbyak::Operand::RDI, */ /*Xbyak::Operand::RSI, */
    433      /*Xbyak::Operand::RDX, */ /*Xbyak::Operand::RCX, */ Xbyak::Operand::R8, Xbyak::Operand::R9, Xbyak::Operand::R10,
    434      Xbyak::Operand::R11});
    435   m_register_cache.SetCallerSavedHostRegs({Xbyak::Operand::RAX, Xbyak::Operand::RDI, Xbyak::Operand::RSI,
    436                                            Xbyak::Operand::RDX, Xbyak::Operand::RCX, Xbyak::Operand::R8,
    437                                            Xbyak::Operand::R9, Xbyak::Operand::R10, Xbyak::Operand::R11});
    438   m_register_cache.SetCalleeSavedHostRegs({Xbyak::Operand::RBX, Xbyak::Operand::RSP, Xbyak::Operand::RBP,
    439                                            Xbyak::Operand::R12, Xbyak::Operand::R13, Xbyak::Operand::R14,
    440                                            Xbyak::Operand::R15});
    441 #endif
    442 
    443   m_register_cache.SetCPUPtrHostReg(RCPUPTR);
    444 }
    445 
    446 void CodeGenerator::SwitchToFarCode()
    447 {
    448   m_emit = &m_far_emitter;
    449 }
    450 
    451 void CodeGenerator::SwitchToNearCode()
    452 {
    453   m_emit = &m_near_emitter;
    454 }
    455 
    456 void* CodeGenerator::GetStartNearCodePointer() const
    457 {
    458   return m_near_emitter.getCode<u8*>();
    459 }
    460 
    461 void* CodeGenerator::GetCurrentNearCodePointer() const
    462 {
    463   return m_near_emitter.getCurr<void*>();
    464 }
    465 
    466 void* CodeGenerator::GetCurrentFarCodePointer() const
    467 {
    468   return m_far_emitter.getCurr<void*>();
    469 }
    470 
    471 Value CodeGenerator::GetValueInHostRegister(const Value& value, bool allow_zero_register /* = true */)
    472 {
    473   if (value.IsInHostRegister())
    474     return Value(value.regcache, value.host_reg, value.size, ValueFlags::Valid | ValueFlags::InHostRegister);
    475 
    476   Value new_value = m_register_cache.AllocateScratch(value.size);
    477   EmitCopyValue(new_value.host_reg, value);
    478   return new_value;
    479 }
    480 
    481 Value CodeGenerator::GetValueInHostOrScratchRegister(const Value& value, bool allow_zero_register /* = true */)
    482 {
    483   if (value.IsInHostRegister())
    484     return Value(value.regcache, value.host_reg, value.size, ValueFlags::Valid | ValueFlags::InHostRegister);
    485 
    486   Value new_value = m_register_cache.AllocateScratch(value.size);
    487   EmitCopyValue(new_value.host_reg, value);
    488   return new_value;
    489 }
    490 
    491 void CodeGenerator::EmitBeginBlock(bool allocate_registers /* = true */)
    492 {
    493   if (allocate_registers)
    494   {
    495     m_register_cache.AssumeCalleeSavedRegistersAreSaved();
    496 
    497     // Store the CPU struct pointer.
    498     const bool cpu_reg_allocated = m_register_cache.AllocateHostReg(RCPUPTR);
    499     DebugAssert(cpu_reg_allocated);
    500     UNREFERENCED_VARIABLE(cpu_reg_allocated);
    501 
    502     // If there's loadstore instructions, preload the fastmem base.
    503     if (m_block->HasFlag(CodeCache::BlockFlags::ContainsLoadStoreInstructions))
    504     {
    505       const bool fastmem_reg_allocated = m_register_cache.AllocateHostReg(RMEMBASEPTR);
    506       DebugAssert(fastmem_reg_allocated);
    507       UNREFERENCED_VARIABLE(fastmem_reg_allocated);
    508       m_emit->mov(GetFastmemBasePtrReg(), m_emit->qword[GetCPUPtrReg() + OFFSETOF(CPU::State, fastmem_base)]);
    509     }
    510   }
    511 }
    512 
    513 void CodeGenerator::EmitEndBlock(bool free_registers, const void* jump_to)
    514 {
    515   if (free_registers)
    516   {
    517     m_register_cache.FreeHostReg(RCPUPTR);
    518     if (m_block->HasFlag(CodeCache::BlockFlags::ContainsLoadStoreInstructions))
    519       m_register_cache.FreeHostReg(RMEMBASEPTR);
    520 
    521     m_register_cache.PopCalleeSavedRegisters(true);
    522   }
    523 
    524   if (jump_to)
    525     m_emit->jmp(jump_to);
    526 }
    527 
    528 void CodeGenerator::EmitExceptionExit()
    529 {
    530   AddPendingCycles(false);
    531 
    532   // ensure all unflushed registers are written back
    533   m_register_cache.FlushAllGuestRegisters(false, false);
    534 
    535   // the interpreter load delay might have its own value, but we'll overwrite it here anyway
    536   // technically RaiseException() and FlushPipeline() have already been called, but that should be okay
    537   m_register_cache.FlushLoadDelay(false);
    538 
    539   m_register_cache.PopCalleeSavedRegisters(false);
    540   m_emit->jmp(CodeCache::g_check_events_and_dispatch);
    541 }
    542 
    543 void CodeGenerator::EmitExceptionExitOnBool(const Value& value)
    544 {
    545   Assert(!value.IsConstant() && value.IsInHostRegister());
    546 
    547   m_emit->test(GetHostReg8(value), GetHostReg8(value));
    548   m_emit->jnz(GetCurrentFarCodePointer());
    549 
    550   m_register_cache.PushState();
    551 
    552   SwitchToFarCode();
    553   EmitExceptionExit();
    554   SwitchToNearCode();
    555 
    556   m_register_cache.PopState();
    557 }
    558 
    559 const void* CodeGenerator::FinalizeBlock(u32* out_host_code_size, u32* out_host_far_code_size)
    560 {
    561   m_near_emitter.ready();
    562   m_far_emitter.ready();
    563 
    564   const u32 near_size = static_cast<u32>(m_near_emitter.getSize());
    565   const u32 far_size = static_cast<u32>(m_far_emitter.getSize());
    566   const void* code = m_near_emitter.getCode<const void*>();
    567   *out_host_code_size = near_size;
    568   *out_host_far_code_size = far_size;
    569   CPU::CodeCache::CommitCode(near_size);
    570   CPU::CodeCache::CommitFarCode(far_size);
    571 
    572   m_near_emitter.reset();
    573   m_far_emitter.reset();
    574 
    575   return code;
    576 }
    577 
    578 void CodeGenerator::EmitSignExtend(HostReg to_reg, RegSize to_size, HostReg from_reg, RegSize from_size)
    579 {
    580   switch (to_size)
    581   {
    582     case RegSize_16:
    583     {
    584       switch (from_size)
    585       {
    586         case RegSize_8:
    587           m_emit->movsx(GetHostReg16(to_reg), GetHostReg8(from_reg));
    588           return;
    589         default:
    590           break;
    591       }
    592     }
    593     break;
    594 
    595     case RegSize_32:
    596     {
    597       switch (from_size)
    598       {
    599         case RegSize_8:
    600           m_emit->movsx(GetHostReg32(to_reg), GetHostReg8(from_reg));
    601           return;
    602         case RegSize_16:
    603           m_emit->movsx(GetHostReg32(to_reg), GetHostReg16(from_reg));
    604           return;
    605         default:
    606           break;
    607       }
    608     }
    609     break;
    610 
    611     default:
    612       break;
    613   }
    614 
    615   Panic("Unknown sign-extend combination");
    616 }
    617 
    618 void CodeGenerator::EmitZeroExtend(HostReg to_reg, RegSize to_size, HostReg from_reg, RegSize from_size)
    619 {
    620   switch (to_size)
    621   {
    622     case RegSize_16:
    623     {
    624       switch (from_size)
    625       {
    626         case RegSize_8:
    627           m_emit->movzx(GetHostReg16(to_reg), GetHostReg8(from_reg));
    628           return;
    629         default:
    630           break;
    631       }
    632     }
    633     break;
    634 
    635     case RegSize_32:
    636     {
    637       switch (from_size)
    638       {
    639         case RegSize_8:
    640           m_emit->movzx(GetHostReg32(to_reg), GetHostReg8(from_reg));
    641           return;
    642         case RegSize_16:
    643           m_emit->movzx(GetHostReg32(to_reg), GetHostReg16(from_reg));
    644           return;
    645         default:
    646           break;
    647       }
    648     }
    649     break;
    650 
    651     default:
    652       break;
    653   }
    654 
    655   Panic("Unknown sign-extend combination");
    656 }
    657 
    658 void CodeGenerator::EmitCopyValue(HostReg to_reg, const Value& value)
    659 {
    660   // TODO: mov x, 0 -> xor x, x
    661   DebugAssert(value.IsConstant() || value.IsInHostRegister());
    662 
    663   switch (value.size)
    664   {
    665     case RegSize_8:
    666     {
    667       if (value.HasConstantValue(0))
    668         m_emit->xor_(GetHostReg8(to_reg), GetHostReg8(to_reg));
    669       else if (value.IsConstant())
    670         m_emit->mov(GetHostReg8(to_reg), value.constant_value);
    671       else
    672         m_emit->mov(GetHostReg8(to_reg), GetHostReg8(value.host_reg));
    673     }
    674     break;
    675 
    676     case RegSize_16:
    677     {
    678       if (value.HasConstantValue(0))
    679         m_emit->xor_(GetHostReg16(to_reg), GetHostReg16(to_reg));
    680       else if (value.IsConstant())
    681         m_emit->mov(GetHostReg16(to_reg), value.constant_value);
    682       else
    683         m_emit->mov(GetHostReg16(to_reg), GetHostReg16(value.host_reg));
    684     }
    685     break;
    686 
    687     case RegSize_32:
    688     {
    689       if (value.HasConstantValue(0))
    690         m_emit->xor_(GetHostReg32(to_reg), GetHostReg32(to_reg));
    691       else if (value.IsConstant())
    692         m_emit->mov(GetHostReg32(to_reg), value.constant_value);
    693       else
    694         m_emit->mov(GetHostReg32(to_reg), GetHostReg32(value.host_reg));
    695     }
    696     break;
    697 
    698     case RegSize_64:
    699     {
    700       if (value.HasConstantValue(0))
    701         m_emit->xor_(GetHostReg64(to_reg), GetHostReg64(to_reg));
    702       else if (value.IsConstant())
    703         m_emit->mov(GetHostReg64(to_reg), value.constant_value);
    704       else
    705         m_emit->mov(GetHostReg64(to_reg), GetHostReg64(value.host_reg));
    706     }
    707     break;
    708 
    709     default:
    710       UnreachableCode();
    711       break;
    712   }
    713 }
    714 
    715 void CodeGenerator::EmitAdd(HostReg to_reg, HostReg from_reg, const Value& value, bool set_flags)
    716 {
    717   DebugAssert(value.IsConstant() || value.IsInHostRegister());
    718 
    719   switch (value.size)
    720   {
    721     case RegSize_8:
    722     {
    723       if (to_reg != from_reg)
    724         m_emit->mov(GetHostReg8(to_reg), GetHostReg8(from_reg));
    725 
    726       if (value.IsConstant())
    727         m_emit->add(GetHostReg8(to_reg), SignExtend32(Truncate8(value.constant_value)));
    728       else
    729         m_emit->add(GetHostReg8(to_reg), GetHostReg8(value.host_reg));
    730     }
    731     break;
    732 
    733     case RegSize_16:
    734     {
    735       if (to_reg != from_reg)
    736         m_emit->mov(GetHostReg16(to_reg), GetHostReg16(from_reg));
    737 
    738       if (value.IsConstant())
    739         m_emit->add(GetHostReg16(to_reg), SignExtend32(Truncate16(value.constant_value)));
    740       else
    741         m_emit->add(GetHostReg16(to_reg), GetHostReg16(value.host_reg));
    742     }
    743     break;
    744 
    745     case RegSize_32:
    746     {
    747       if (to_reg != from_reg)
    748         m_emit->mov(GetHostReg32(to_reg), GetHostReg32(from_reg));
    749 
    750       if (value.IsConstant())
    751         m_emit->add(GetHostReg32(to_reg), Truncate32(value.constant_value));
    752       else
    753         m_emit->add(GetHostReg32(to_reg), GetHostReg32(value.host_reg));
    754     }
    755     break;
    756 
    757     case RegSize_64:
    758     {
    759       if (to_reg != from_reg)
    760         m_emit->mov(GetHostReg64(to_reg), GetHostReg64(from_reg));
    761 
    762       if (value.IsConstant())
    763       {
    764         if (!Xbyak::inner::IsInInt32(value.constant_value))
    765         {
    766           Value temp = m_register_cache.AllocateScratch(RegSize_64);
    767           m_emit->mov(GetHostReg64(temp.host_reg), value.constant_value);
    768           m_emit->add(GetHostReg64(to_reg), GetHostReg64(temp.host_reg));
    769         }
    770         else
    771         {
    772           m_emit->add(GetHostReg64(to_reg), Truncate32(value.constant_value));
    773         }
    774       }
    775       else
    776       {
    777         m_emit->add(GetHostReg64(to_reg), GetHostReg64(value.host_reg));
    778       }
    779     }
    780     break;
    781 
    782     default:
    783       UnreachableCode();
    784       break;
    785   }
    786 }
    787 
    788 void CodeGenerator::EmitSub(HostReg to_reg, HostReg from_reg, const Value& value, bool set_flags)
    789 {
    790   DebugAssert(value.IsConstant() || value.IsInHostRegister());
    791 
    792   switch (value.size)
    793   {
    794     case RegSize_8:
    795     {
    796       if (to_reg != from_reg)
    797         m_emit->mov(GetHostReg8(to_reg), GetHostReg8(from_reg));
    798 
    799       if (value.IsConstant())
    800         m_emit->sub(GetHostReg8(to_reg), SignExtend32(Truncate8(value.constant_value)));
    801       else
    802         m_emit->sub(GetHostReg8(to_reg), GetHostReg8(value.host_reg));
    803     }
    804     break;
    805 
    806     case RegSize_16:
    807     {
    808       if (to_reg != from_reg)
    809         m_emit->mov(GetHostReg16(to_reg), GetHostReg16(from_reg));
    810 
    811       if (value.IsConstant())
    812         m_emit->sub(GetHostReg16(to_reg), SignExtend32(Truncate16(value.constant_value)));
    813       else
    814         m_emit->sub(GetHostReg16(to_reg), GetHostReg16(value.host_reg));
    815     }
    816     break;
    817 
    818     case RegSize_32:
    819     {
    820       if (to_reg != from_reg)
    821         m_emit->mov(GetHostReg32(to_reg), GetHostReg32(from_reg));
    822 
    823       if (value.IsConstant())
    824         m_emit->sub(GetHostReg32(to_reg), Truncate32(value.constant_value));
    825       else
    826         m_emit->sub(GetHostReg32(to_reg), GetHostReg32(value.host_reg));
    827     }
    828     break;
    829 
    830     case RegSize_64:
    831     {
    832       if (to_reg != from_reg)
    833         m_emit->mov(GetHostReg64(to_reg), GetHostReg64(from_reg));
    834 
    835       if (value.IsConstant())
    836       {
    837         if (!Xbyak::inner::IsInInt32(value.constant_value))
    838         {
    839           Value temp = m_register_cache.AllocateScratch(RegSize_64);
    840           m_emit->mov(GetHostReg64(temp.host_reg), value.constant_value);
    841           m_emit->sub(GetHostReg64(to_reg), GetHostReg64(temp.host_reg));
    842         }
    843         else
    844         {
    845           m_emit->sub(GetHostReg64(to_reg), Truncate32(value.constant_value));
    846         }
    847       }
    848       else
    849       {
    850         m_emit->sub(GetHostReg64(to_reg), GetHostReg64(value.host_reg));
    851       }
    852     }
    853     break;
    854 
    855     default:
    856       UnreachableCode();
    857       break;
    858   }
    859 }
    860 
    861 void CodeGenerator::EmitCmp(HostReg to_reg, const Value& value)
    862 {
    863   DebugAssert(value.IsConstant() || value.IsInHostRegister());
    864 
    865   switch (value.size)
    866   {
    867     case RegSize_8:
    868     {
    869       if (value.IsConstant())
    870         m_emit->cmp(GetHostReg8(to_reg), SignExtend32(Truncate8(value.constant_value)));
    871       else
    872         m_emit->cmp(GetHostReg8(to_reg), GetHostReg8(value.host_reg));
    873     }
    874     break;
    875 
    876     case RegSize_16:
    877     {
    878       if (value.IsConstant())
    879         m_emit->cmp(GetHostReg16(to_reg), SignExtend32(Truncate16(value.constant_value)));
    880       else
    881         m_emit->cmp(GetHostReg16(to_reg), GetHostReg16(value.host_reg));
    882     }
    883     break;
    884 
    885     case RegSize_32:
    886     {
    887       if (value.IsConstant())
    888         m_emit->cmp(GetHostReg32(to_reg), Truncate32(value.constant_value));
    889       else
    890         m_emit->cmp(GetHostReg32(to_reg), GetHostReg32(value.host_reg));
    891     }
    892     break;
    893 
    894     case RegSize_64:
    895     {
    896       if (value.IsConstant())
    897       {
    898         if (!Xbyak::inner::IsInInt32(value.constant_value))
    899         {
    900           Value temp = m_register_cache.AllocateScratch(RegSize_64);
    901           m_emit->mov(GetHostReg64(temp.host_reg), value.constant_value);
    902           m_emit->cmp(GetHostReg64(to_reg), GetHostReg64(temp.host_reg));
    903         }
    904         else
    905         {
    906           m_emit->cmp(GetHostReg64(to_reg), Truncate32(value.constant_value));
    907         }
    908       }
    909       else
    910       {
    911         m_emit->cmp(GetHostReg64(to_reg), GetHostReg64(value.host_reg));
    912       }
    913     }
    914     break;
    915 
    916     default:
    917       UnreachableCode();
    918       break;
    919   }
    920 }
    921 
    922 void CodeGenerator::EmitMul(HostReg to_reg_hi, HostReg to_reg_lo, const Value& lhs, const Value& rhs,
    923                             bool signed_multiply)
    924 {
    925   const bool save_eax = (to_reg_hi != Xbyak::Operand::RAX && to_reg_lo != Xbyak::Operand::RAX);
    926   const bool save_edx = (to_reg_hi != Xbyak::Operand::RDX && to_reg_lo != Xbyak::Operand::RDX);
    927 
    928   if (save_eax)
    929     m_emit->push(m_emit->rax);
    930 
    931   if (save_edx)
    932     m_emit->push(m_emit->rdx);
    933 
    934 #define DO_MUL(src)                                                                                                    \
    935   if (lhs.size == RegSize_8)                                                                                           \
    936     signed_multiply ? m_emit->imul(src.changeBit(8)) : m_emit->mul(src.changeBit(8));                                  \
    937   else if (lhs.size == RegSize_16)                                                                                     \
    938     signed_multiply ? m_emit->imul(src.changeBit(16)) : m_emit->mul(src.changeBit(16));                                \
    939   else if (lhs.size == RegSize_32)                                                                                     \
    940     signed_multiply ? m_emit->imul(src.changeBit(32)) : m_emit->mul(src.changeBit(32));                                \
    941   else                                                                                                                 \
    942     signed_multiply ? m_emit->imul(src.changeBit(64)) : m_emit->mul(src.changeBit(64));
    943 
    944   // x*x
    945   if (lhs.IsInHostRegister() && rhs.IsInHostRegister() && lhs.GetHostRegister() == rhs.GetHostRegister())
    946   {
    947     if (lhs.GetHostRegister() != Xbyak::Operand::RAX)
    948       EmitCopyValue(Xbyak::Operand::RAX, lhs);
    949 
    950     DO_MUL(m_emit->rax);
    951   }
    952   else if (lhs.IsInHostRegister() && lhs.GetHostRegister() == Xbyak::Operand::RAX)
    953   {
    954     if (!rhs.IsInHostRegister())
    955     {
    956       EmitCopyValue(Xbyak::Operand::RDX, rhs);
    957       DO_MUL(m_emit->rdx);
    958     }
    959     else
    960     {
    961       DO_MUL(GetHostReg64(rhs));
    962     }
    963   }
    964   else if (rhs.IsInHostRegister() && rhs.GetHostRegister() == Xbyak::Operand::RAX)
    965   {
    966     if (!lhs.IsInHostRegister())
    967     {
    968       EmitCopyValue(Xbyak::Operand::RDX, lhs);
    969       DO_MUL(m_emit->rdx);
    970     }
    971     else
    972     {
    973       DO_MUL(GetHostReg64(lhs));
    974     }
    975   }
    976   else
    977   {
    978     if (lhs.IsInHostRegister())
    979     {
    980       EmitCopyValue(Xbyak::Operand::RAX, rhs);
    981       if (lhs.size == RegSize_8)
    982         signed_multiply ? m_emit->imul(GetHostReg8(lhs)) : m_emit->mul(GetHostReg8(lhs));
    983       else if (lhs.size == RegSize_16)
    984         signed_multiply ? m_emit->imul(GetHostReg16(lhs)) : m_emit->mul(GetHostReg16(lhs));
    985       else if (lhs.size == RegSize_32)
    986         signed_multiply ? m_emit->imul(GetHostReg32(lhs)) : m_emit->mul(GetHostReg32(lhs));
    987       else
    988         signed_multiply ? m_emit->imul(GetHostReg64(lhs)) : m_emit->mul(GetHostReg64(lhs));
    989     }
    990     else if (rhs.IsInHostRegister())
    991     {
    992       EmitCopyValue(Xbyak::Operand::RAX, lhs);
    993       if (lhs.size == RegSize_8)
    994         signed_multiply ? m_emit->imul(GetHostReg8(rhs)) : m_emit->mul(GetHostReg8(rhs));
    995       else if (lhs.size == RegSize_16)
    996         signed_multiply ? m_emit->imul(GetHostReg16(rhs)) : m_emit->mul(GetHostReg16(rhs));
    997       else if (lhs.size == RegSize_32)
    998         signed_multiply ? m_emit->imul(GetHostReg32(rhs)) : m_emit->mul(GetHostReg32(rhs));
    999       else
   1000         signed_multiply ? m_emit->imul(GetHostReg64(rhs)) : m_emit->mul(GetHostReg64(rhs));
   1001     }
   1002     else
   1003     {
   1004       EmitCopyValue(Xbyak::Operand::RAX, lhs);
   1005       EmitCopyValue(Xbyak::Operand::RDX, rhs);
   1006       DO_MUL(m_emit->rdx);
   1007     }
   1008   }
   1009 
   1010 #undef DO_MUL
   1011 
   1012   if (to_reg_hi == Xbyak::Operand::RDX && to_reg_lo == Xbyak::Operand::RAX)
   1013   {
   1014     // ideal case: registers are the ones we want: don't have to do anything
   1015   }
   1016   else if (to_reg_hi == Xbyak::Operand::RAX && to_reg_lo == Xbyak::Operand::RDX)
   1017   {
   1018     // what we want, but swapped, so exchange them
   1019     m_emit->xchg(m_emit->rax, m_emit->rdx);
   1020   }
   1021   else
   1022   {
   1023     // store to the registers we want.. this could be optimized better
   1024     m_emit->push(m_emit->rdx);
   1025     m_emit->push(m_emit->rax);
   1026     m_emit->pop(GetHostReg64(to_reg_lo));
   1027     m_emit->pop(GetHostReg64(to_reg_hi));
   1028   }
   1029 
   1030   // restore original contents
   1031   if (save_edx)
   1032     m_emit->pop(m_emit->rdx);
   1033 
   1034   if (save_eax)
   1035     m_emit->pop(m_emit->rax);
   1036 }
   1037 
   1038 void CodeGenerator::EmitDiv(HostReg to_reg_quotient, HostReg to_reg_remainder, HostReg num, HostReg denom, RegSize size,
   1039                             bool signed_divide)
   1040 {
   1041   const bool save_eax = (to_reg_quotient != Xbyak::Operand::RAX && to_reg_remainder != Xbyak::Operand::RAX);
   1042   const bool save_edx = (to_reg_quotient != Xbyak::Operand::RDX && to_reg_remainder != Xbyak::Operand::RDX);
   1043 
   1044   if (save_eax)
   1045     m_emit->push(m_emit->rax);
   1046 
   1047   if (save_edx)
   1048     m_emit->push(m_emit->rdx);
   1049 
   1050   // unsupported cases.. for now
   1051   Assert(num != Xbyak::Operand::RDX && num != Xbyak::Operand::RAX);
   1052   if (num != Xbyak::Operand::RAX)
   1053     EmitCopyValue(Xbyak::Operand::RAX, Value::FromHostReg(&m_register_cache, num, size));
   1054 
   1055   if (size == RegSize_8)
   1056   {
   1057     if (signed_divide)
   1058     {
   1059       m_emit->cbw();
   1060       m_emit->idiv(GetHostReg8(denom));
   1061     }
   1062     else
   1063     {
   1064       m_emit->xor_(m_emit->dx, m_emit->dx);
   1065       m_emit->div(GetHostReg8(denom));
   1066     }
   1067   }
   1068   else if (size == RegSize_16)
   1069   {
   1070     if (signed_divide)
   1071     {
   1072       m_emit->cwd();
   1073       m_emit->idiv(GetHostReg16(denom));
   1074     }
   1075     else
   1076     {
   1077       m_emit->xor_(m_emit->edx, m_emit->edx);
   1078       m_emit->div(GetHostReg16(denom));
   1079     }
   1080   }
   1081   else if (size == RegSize_32)
   1082   {
   1083     if (signed_divide)
   1084     {
   1085       m_emit->cdq();
   1086       m_emit->idiv(GetHostReg32(denom));
   1087     }
   1088     else
   1089     {
   1090       m_emit->xor_(m_emit->rdx, m_emit->edx);
   1091       m_emit->div(GetHostReg32(denom));
   1092     }
   1093   }
   1094   else
   1095   {
   1096     if (signed_divide)
   1097       m_emit->idiv(GetHostReg64(denom));
   1098     else
   1099       m_emit->div(GetHostReg64(denom));
   1100   }
   1101 
   1102   if (to_reg_quotient == Xbyak::Operand::RAX && to_reg_remainder == Xbyak::Operand::RDX)
   1103   {
   1104     // ideal case: registers are the ones we want: don't have to do anything
   1105   }
   1106   else if (to_reg_quotient == Xbyak::Operand::RDX && to_reg_remainder == Xbyak::Operand::RAX)
   1107   {
   1108     // what we want, but swapped, so exchange them
   1109     m_emit->xchg(m_emit->rax, m_emit->rdx);
   1110   }
   1111   else if (to_reg_quotient != Xbyak::Operand::RAX && to_reg_quotient != Xbyak::Operand::RDX &&
   1112            to_reg_remainder != Xbyak::Operand::RAX && to_reg_remainder != Xbyak::Operand::RDX)
   1113   {
   1114     // store to the registers we want.. this could be optimized better
   1115     if (static_cast<u32>(to_reg_quotient) != HostReg_Count)
   1116       m_emit->mov(GetHostReg64(to_reg_quotient), m_emit->rax);
   1117     if (static_cast<u32>(to_reg_remainder) != HostReg_Count)
   1118       m_emit->mov(GetHostReg64(to_reg_remainder), m_emit->rdx);
   1119   }
   1120   else
   1121   {
   1122     // store to the registers we want.. this could be optimized better
   1123     if (static_cast<u32>(to_reg_quotient) != HostReg_Count)
   1124     {
   1125       m_emit->push(m_emit->rax);
   1126       m_emit->pop(GetHostReg64(to_reg_quotient));
   1127     }
   1128     if (static_cast<u32>(to_reg_remainder) != HostReg_Count)
   1129     {
   1130       m_emit->push(m_emit->rdx);
   1131       m_emit->pop(GetHostReg64(to_reg_remainder));
   1132     }
   1133   }
   1134 
   1135   // restore original contents
   1136   if (save_edx)
   1137     m_emit->pop(m_emit->rdx);
   1138 
   1139   if (save_eax)
   1140     m_emit->pop(m_emit->rax);
   1141 }
   1142 
   1143 void CodeGenerator::EmitInc(HostReg to_reg, RegSize size)
   1144 {
   1145   switch (size)
   1146   {
   1147     case RegSize_8:
   1148       m_emit->inc(GetHostReg8(to_reg));
   1149       break;
   1150     case RegSize_16:
   1151       m_emit->inc(GetHostReg16(to_reg));
   1152       break;
   1153     case RegSize_32:
   1154       m_emit->inc(GetHostReg32(to_reg));
   1155       break;
   1156     default:
   1157       UnreachableCode();
   1158       break;
   1159   }
   1160 }
   1161 
   1162 void CodeGenerator::EmitDec(HostReg to_reg, RegSize size)
   1163 {
   1164   switch (size)
   1165   {
   1166     case RegSize_8:
   1167       m_emit->dec(GetHostReg8(to_reg));
   1168       break;
   1169     case RegSize_16:
   1170       m_emit->dec(GetHostReg16(to_reg));
   1171       break;
   1172     case RegSize_32:
   1173       m_emit->dec(GetHostReg32(to_reg));
   1174       break;
   1175     default:
   1176       UnreachableCode();
   1177       break;
   1178   }
   1179 }
   1180 
   1181 void CodeGenerator::EmitShl(HostReg to_reg, HostReg from_reg, RegSize size, const Value& amount_value,
   1182                             bool assume_amount_masked /* = true */)
   1183 {
   1184   DebugAssert(amount_value.IsConstant() || amount_value.IsInHostRegister());
   1185 
   1186   // We have to use CL for the shift amount :(
   1187   const bool save_cl = (!amount_value.IsConstant() && m_register_cache.IsHostRegInUse(Xbyak::Operand::RCX) &&
   1188                         (!amount_value.IsInHostRegister() || amount_value.host_reg != Xbyak::Operand::RCX));
   1189   if (save_cl)
   1190     m_emit->push(m_emit->rcx);
   1191 
   1192   if (!amount_value.IsConstant())
   1193     m_emit->mov(m_emit->cl, GetHostReg8(amount_value.host_reg));
   1194 
   1195   switch (size)
   1196   {
   1197     case RegSize_8:
   1198     {
   1199       if (to_reg != from_reg)
   1200         m_emit->mov(GetHostReg8(to_reg), GetHostReg8(from_reg));
   1201 
   1202       if (amount_value.IsConstant())
   1203         m_emit->shl(GetHostReg8(to_reg), Truncate8(amount_value.constant_value));
   1204       else
   1205         m_emit->shl(GetHostReg8(to_reg), m_emit->cl);
   1206     }
   1207     break;
   1208 
   1209     case RegSize_16:
   1210     {
   1211       if (to_reg != from_reg)
   1212         m_emit->mov(GetHostReg16(to_reg), GetHostReg16(from_reg));
   1213 
   1214       if (amount_value.IsConstant())
   1215         m_emit->shl(GetHostReg16(to_reg), Truncate8(amount_value.constant_value));
   1216       else
   1217         m_emit->shl(GetHostReg16(to_reg), m_emit->cl);
   1218     }
   1219     break;
   1220 
   1221     case RegSize_32:
   1222     {
   1223       if (to_reg != from_reg)
   1224         m_emit->mov(GetHostReg32(to_reg), GetHostReg32(from_reg));
   1225 
   1226       if (amount_value.IsConstant())
   1227         m_emit->shl(GetHostReg32(to_reg), Truncate32(amount_value.constant_value));
   1228       else
   1229         m_emit->shl(GetHostReg32(to_reg), m_emit->cl);
   1230     }
   1231     break;
   1232 
   1233     case RegSize_64:
   1234     {
   1235       if (to_reg != from_reg)
   1236         m_emit->mov(GetHostReg64(to_reg), GetHostReg64(from_reg));
   1237 
   1238       if (amount_value.IsConstant())
   1239         m_emit->shl(GetHostReg64(to_reg), Truncate32(amount_value.constant_value));
   1240       else
   1241         m_emit->shl(GetHostReg64(to_reg), m_emit->cl);
   1242     }
   1243     break;
   1244   }
   1245 
   1246   if (save_cl)
   1247     m_emit->pop(m_emit->rcx);
   1248 }
   1249 
   1250 void CodeGenerator::EmitShr(HostReg to_reg, HostReg from_reg, RegSize size, const Value& amount_value,
   1251                             bool assume_amount_masked /* = true */)
   1252 {
   1253   DebugAssert(amount_value.IsConstant() || amount_value.IsInHostRegister());
   1254 
   1255   // We have to use CL for the shift amount :(
   1256   const bool save_cl = (!amount_value.IsConstant() && m_register_cache.IsHostRegInUse(Xbyak::Operand::RCX) &&
   1257                         (!amount_value.IsInHostRegister() || amount_value.host_reg != Xbyak::Operand::RCX));
   1258   if (save_cl)
   1259     m_emit->push(m_emit->rcx);
   1260 
   1261   if (!amount_value.IsConstant())
   1262     m_emit->mov(m_emit->cl, GetHostReg8(amount_value.host_reg));
   1263 
   1264   switch (size)
   1265   {
   1266     case RegSize_8:
   1267     {
   1268       if (to_reg != from_reg)
   1269         m_emit->mov(GetHostReg8(to_reg), GetHostReg8(from_reg));
   1270 
   1271       if (amount_value.IsConstant())
   1272         m_emit->shr(GetHostReg8(to_reg), Truncate8(amount_value.constant_value));
   1273       else
   1274         m_emit->shr(GetHostReg8(to_reg), m_emit->cl);
   1275     }
   1276     break;
   1277 
   1278     case RegSize_16:
   1279     {
   1280       if (to_reg != from_reg)
   1281         m_emit->mov(GetHostReg16(to_reg), GetHostReg16(from_reg));
   1282 
   1283       if (amount_value.IsConstant())
   1284         m_emit->shr(GetHostReg16(to_reg), Truncate8(amount_value.constant_value));
   1285       else
   1286         m_emit->shr(GetHostReg16(to_reg), m_emit->cl);
   1287     }
   1288     break;
   1289 
   1290     case RegSize_32:
   1291     {
   1292       if (to_reg != from_reg)
   1293         m_emit->mov(GetHostReg32(to_reg), GetHostReg32(from_reg));
   1294 
   1295       if (amount_value.IsConstant())
   1296         m_emit->shr(GetHostReg32(to_reg), Truncate32(amount_value.constant_value));
   1297       else
   1298         m_emit->shr(GetHostReg32(to_reg), m_emit->cl);
   1299     }
   1300     break;
   1301 
   1302     case RegSize_64:
   1303     {
   1304       if (to_reg != from_reg)
   1305         m_emit->mov(GetHostReg64(to_reg), GetHostReg64(from_reg));
   1306 
   1307       if (amount_value.IsConstant())
   1308         m_emit->shr(GetHostReg64(to_reg), Truncate32(amount_value.constant_value));
   1309       else
   1310         m_emit->shr(GetHostReg64(to_reg), m_emit->cl);
   1311     }
   1312     break;
   1313   }
   1314 
   1315   if (save_cl)
   1316     m_emit->pop(m_emit->rcx);
   1317 }
   1318 
   1319 void CodeGenerator::EmitSar(HostReg to_reg, HostReg from_reg, RegSize size, const Value& amount_value,
   1320                             bool assume_amount_masked /* = true */)
   1321 {
   1322   DebugAssert(amount_value.IsConstant() || amount_value.IsInHostRegister());
   1323 
   1324   // We have to use CL for the shift amount :(
   1325   const bool save_cl = (!amount_value.IsConstant() && m_register_cache.IsHostRegInUse(Xbyak::Operand::RCX) &&
   1326                         (!amount_value.IsInHostRegister() || amount_value.host_reg != Xbyak::Operand::RCX));
   1327   if (save_cl)
   1328     m_emit->push(m_emit->rcx);
   1329 
   1330   if (!amount_value.IsConstant())
   1331     m_emit->mov(m_emit->cl, GetHostReg8(amount_value.host_reg));
   1332 
   1333   switch (size)
   1334   {
   1335     case RegSize_8:
   1336     {
   1337       if (to_reg != from_reg)
   1338         m_emit->mov(GetHostReg8(to_reg), GetHostReg8(from_reg));
   1339 
   1340       if (amount_value.IsConstant())
   1341         m_emit->sar(GetHostReg8(to_reg), Truncate8(amount_value.constant_value));
   1342       else
   1343         m_emit->sar(GetHostReg8(to_reg), m_emit->cl);
   1344     }
   1345     break;
   1346 
   1347     case RegSize_16:
   1348     {
   1349       if (to_reg != from_reg)
   1350         m_emit->mov(GetHostReg16(to_reg), GetHostReg16(from_reg));
   1351 
   1352       if (amount_value.IsConstant())
   1353         m_emit->sar(GetHostReg16(to_reg), Truncate8(amount_value.constant_value));
   1354       else
   1355         m_emit->sar(GetHostReg16(to_reg), m_emit->cl);
   1356     }
   1357     break;
   1358 
   1359     case RegSize_32:
   1360     {
   1361       if (to_reg != from_reg)
   1362         m_emit->mov(GetHostReg32(to_reg), GetHostReg32(from_reg));
   1363 
   1364       if (amount_value.IsConstant())
   1365         m_emit->sar(GetHostReg32(to_reg), Truncate32(amount_value.constant_value));
   1366       else
   1367         m_emit->sar(GetHostReg32(to_reg), m_emit->cl);
   1368     }
   1369     break;
   1370 
   1371     case RegSize_64:
   1372     {
   1373       if (to_reg != from_reg)
   1374         m_emit->mov(GetHostReg64(to_reg), GetHostReg64(from_reg));
   1375 
   1376       if (amount_value.IsConstant())
   1377         m_emit->sar(GetHostReg64(to_reg), Truncate32(amount_value.constant_value));
   1378       else
   1379         m_emit->sar(GetHostReg64(to_reg), m_emit->cl);
   1380     }
   1381     break;
   1382   }
   1383 
   1384   if (save_cl)
   1385     m_emit->pop(m_emit->rcx);
   1386 }
   1387 
   1388 void CodeGenerator::EmitAnd(HostReg to_reg, HostReg from_reg, const Value& value)
   1389 {
   1390   DebugAssert(value.IsConstant() || value.IsInHostRegister());
   1391   switch (value.size)
   1392   {
   1393     case RegSize_8:
   1394     {
   1395       if (to_reg != from_reg)
   1396         m_emit->mov(GetHostReg8(to_reg), GetHostReg8(from_reg));
   1397 
   1398       if (value.IsConstant())
   1399         m_emit->and_(GetHostReg8(to_reg), Truncate32(value.constant_value & UINT32_C(0xFF)));
   1400       else
   1401         m_emit->and_(GetHostReg8(to_reg), GetHostReg8(value));
   1402     }
   1403     break;
   1404 
   1405     case RegSize_16:
   1406     {
   1407       if (to_reg != from_reg)
   1408         m_emit->mov(GetHostReg16(to_reg), GetHostReg16(from_reg));
   1409 
   1410       if (value.IsConstant())
   1411         m_emit->and_(GetHostReg16(to_reg), Truncate32(value.constant_value & UINT32_C(0xFFFF)));
   1412       else
   1413         m_emit->and_(GetHostReg16(to_reg), GetHostReg16(value));
   1414     }
   1415     break;
   1416 
   1417     case RegSize_32:
   1418     {
   1419       if (to_reg != from_reg)
   1420         m_emit->mov(GetHostReg32(to_reg), GetHostReg32(from_reg));
   1421 
   1422       if (value.IsConstant())
   1423         m_emit->and_(GetHostReg32(to_reg), Truncate32(value.constant_value));
   1424       else
   1425         m_emit->and_(GetHostReg32(to_reg), GetHostReg32(value));
   1426     }
   1427     break;
   1428 
   1429     case RegSize_64:
   1430     {
   1431       if (to_reg != from_reg)
   1432         m_emit->mov(GetHostReg64(to_reg), GetHostReg64(from_reg));
   1433 
   1434       if (value.IsConstant())
   1435       {
   1436         if (!Xbyak::inner::IsInInt32(value.constant_value))
   1437         {
   1438           Value temp = m_register_cache.AllocateScratch(RegSize_64);
   1439           m_emit->mov(GetHostReg64(temp), value.constant_value);
   1440           m_emit->and_(GetHostReg64(to_reg), GetHostReg64(temp));
   1441         }
   1442         else
   1443         {
   1444           m_emit->and_(GetHostReg64(to_reg), Truncate32(value.constant_value));
   1445         }
   1446       }
   1447       else
   1448       {
   1449         m_emit->and_(GetHostReg64(to_reg), GetHostReg64(value));
   1450       }
   1451     }
   1452     break;
   1453   }
   1454 }
   1455 
   1456 void CodeGenerator::EmitOr(HostReg to_reg, HostReg from_reg, const Value& value)
   1457 {
   1458   DebugAssert(value.IsConstant() || value.IsInHostRegister());
   1459   switch (value.size)
   1460   {
   1461     case RegSize_8:
   1462     {
   1463       if (to_reg != from_reg)
   1464         m_emit->mov(GetHostReg8(to_reg), GetHostReg8(from_reg));
   1465 
   1466       if (value.IsConstant())
   1467         m_emit->or_(GetHostReg8(to_reg), Truncate32(value.constant_value & UINT32_C(0xFF)));
   1468       else
   1469         m_emit->or_(GetHostReg8(to_reg), GetHostReg8(value));
   1470     }
   1471     break;
   1472 
   1473     case RegSize_16:
   1474     {
   1475       if (to_reg != from_reg)
   1476         m_emit->mov(GetHostReg16(to_reg), GetHostReg16(from_reg));
   1477 
   1478       if (value.IsConstant())
   1479         m_emit->or_(GetHostReg16(to_reg), Truncate32(value.constant_value & UINT32_C(0xFFFF)));
   1480       else
   1481         m_emit->or_(GetHostReg16(to_reg), GetHostReg16(value));
   1482     }
   1483     break;
   1484 
   1485     case RegSize_32:
   1486     {
   1487       if (to_reg != from_reg)
   1488         m_emit->mov(GetHostReg32(to_reg), GetHostReg32(from_reg));
   1489 
   1490       if (value.IsConstant())
   1491         m_emit->or_(GetHostReg32(to_reg), Truncate32(value.constant_value));
   1492       else
   1493         m_emit->or_(GetHostReg32(to_reg), GetHostReg32(value));
   1494     }
   1495     break;
   1496 
   1497     case RegSize_64:
   1498     {
   1499       if (to_reg != from_reg)
   1500         m_emit->mov(GetHostReg64(to_reg), GetHostReg64(from_reg));
   1501 
   1502       if (value.IsConstant())
   1503       {
   1504         if (!Xbyak::inner::IsInInt32(value.constant_value))
   1505         {
   1506           Value temp = m_register_cache.AllocateScratch(RegSize_64);
   1507           m_emit->mov(GetHostReg64(temp), value.constant_value);
   1508           m_emit->or_(GetHostReg64(to_reg), GetHostReg64(temp));
   1509         }
   1510         else
   1511         {
   1512           m_emit->or_(GetHostReg64(to_reg), Truncate32(value.constant_value));
   1513         }
   1514       }
   1515       else
   1516       {
   1517         m_emit->or_(GetHostReg64(to_reg), GetHostReg64(value));
   1518       }
   1519     }
   1520     break;
   1521   }
   1522 }
   1523 
   1524 void CodeGenerator::EmitXor(HostReg to_reg, HostReg from_reg, const Value& value)
   1525 {
   1526   DebugAssert(value.IsConstant() || value.IsInHostRegister());
   1527   switch (value.size)
   1528   {
   1529     case RegSize_8:
   1530     {
   1531       if (to_reg != from_reg)
   1532         m_emit->mov(GetHostReg8(to_reg), GetHostReg8(from_reg));
   1533 
   1534       if (value.IsConstant())
   1535         m_emit->xor_(GetHostReg8(to_reg), Truncate32(value.constant_value & UINT32_C(0xFF)));
   1536       else
   1537         m_emit->xor_(GetHostReg8(to_reg), GetHostReg8(value));
   1538     }
   1539     break;
   1540 
   1541     case RegSize_16:
   1542     {
   1543       if (to_reg != from_reg)
   1544         m_emit->mov(GetHostReg16(to_reg), GetHostReg16(from_reg));
   1545 
   1546       if (value.IsConstant())
   1547         m_emit->xor_(GetHostReg16(to_reg), Truncate32(value.constant_value & UINT32_C(0xFFFF)));
   1548       else
   1549         m_emit->xor_(GetHostReg16(to_reg), GetHostReg16(value));
   1550     }
   1551     break;
   1552 
   1553     case RegSize_32:
   1554     {
   1555       if (to_reg != from_reg)
   1556         m_emit->mov(GetHostReg32(to_reg), GetHostReg32(from_reg));
   1557 
   1558       if (value.IsConstant())
   1559         m_emit->xor_(GetHostReg32(to_reg), Truncate32(value.constant_value));
   1560       else
   1561         m_emit->xor_(GetHostReg32(to_reg), GetHostReg32(value));
   1562     }
   1563     break;
   1564 
   1565     case RegSize_64:
   1566     {
   1567       if (to_reg != from_reg)
   1568         m_emit->mov(GetHostReg64(to_reg), GetHostReg64(from_reg));
   1569 
   1570       if (value.IsConstant())
   1571       {
   1572         if (!Xbyak::inner::IsInInt32(value.constant_value))
   1573         {
   1574           Value temp = m_register_cache.AllocateScratch(RegSize_64);
   1575           m_emit->mov(GetHostReg64(temp), value.constant_value);
   1576           m_emit->xor_(GetHostReg64(to_reg), GetHostReg64(temp));
   1577         }
   1578         else
   1579         {
   1580           m_emit->xor_(GetHostReg64(to_reg), Truncate32(value.constant_value));
   1581         }
   1582       }
   1583       else
   1584       {
   1585         m_emit->xor_(GetHostReg64(to_reg), GetHostReg64(value));
   1586       }
   1587     }
   1588     break;
   1589   }
   1590 }
   1591 
   1592 void CodeGenerator::EmitTest(HostReg to_reg, const Value& value)
   1593 {
   1594   DebugAssert(value.IsConstant() || value.IsInHostRegister());
   1595   switch (value.size)
   1596   {
   1597     case RegSize_8:
   1598     {
   1599       if (value.IsConstant())
   1600         m_emit->test(GetHostReg8(to_reg), Truncate32(value.constant_value & UINT32_C(0xFF)));
   1601       else
   1602         m_emit->test(GetHostReg8(to_reg), GetHostReg8(value));
   1603     }
   1604     break;
   1605 
   1606     case RegSize_16:
   1607     {
   1608       if (value.IsConstant())
   1609         m_emit->test(GetHostReg16(to_reg), Truncate32(value.constant_value & UINT32_C(0xFFFF)));
   1610       else
   1611         m_emit->test(GetHostReg16(to_reg), GetHostReg16(value));
   1612     }
   1613     break;
   1614 
   1615     case RegSize_32:
   1616     {
   1617       if (value.IsConstant())
   1618         m_emit->test(GetHostReg32(to_reg), Truncate32(value.constant_value));
   1619       else
   1620         m_emit->test(GetHostReg32(to_reg), GetHostReg32(value));
   1621     }
   1622     break;
   1623 
   1624     case RegSize_64:
   1625     {
   1626       if (value.IsConstant())
   1627       {
   1628         if (!Xbyak::inner::IsInInt32(value.constant_value))
   1629         {
   1630           Value temp = m_register_cache.AllocateScratch(RegSize_64);
   1631           m_emit->mov(GetHostReg64(temp), value.constant_value);
   1632           m_emit->test(GetHostReg64(to_reg), GetHostReg64(temp));
   1633         }
   1634         else
   1635         {
   1636           m_emit->test(GetHostReg64(to_reg), Truncate32(value.constant_value));
   1637         }
   1638       }
   1639       else
   1640       {
   1641         m_emit->test(GetHostReg64(to_reg), GetHostReg64(value));
   1642       }
   1643     }
   1644     break;
   1645   }
   1646 }
   1647 
   1648 void CodeGenerator::EmitNot(HostReg to_reg, RegSize size)
   1649 {
   1650   switch (size)
   1651   {
   1652     case RegSize_8:
   1653       m_emit->not_(GetHostReg8(to_reg));
   1654       break;
   1655 
   1656     case RegSize_16:
   1657       m_emit->not_(GetHostReg16(to_reg));
   1658       break;
   1659 
   1660     case RegSize_32:
   1661       m_emit->not_(GetHostReg32(to_reg));
   1662       break;
   1663 
   1664     case RegSize_64:
   1665       m_emit->not_(GetHostReg64(to_reg));
   1666       break;
   1667 
   1668     default:
   1669       break;
   1670   }
   1671 }
   1672 
   1673 void CodeGenerator::EmitSetConditionResult(HostReg to_reg, RegSize to_size, Condition condition)
   1674 {
   1675   switch (condition)
   1676   {
   1677     case Condition::Always:
   1678       m_emit->mov(GetHostReg8(to_reg), 1);
   1679       break;
   1680 
   1681     case Condition::NotEqual:
   1682       m_emit->setne(GetHostReg8(to_reg));
   1683       break;
   1684 
   1685     case Condition::Equal:
   1686       m_emit->sete(GetHostReg8(to_reg));
   1687       break;
   1688 
   1689     case Condition::Overflow:
   1690       m_emit->seto(GetHostReg8(to_reg));
   1691       break;
   1692 
   1693     case Condition::Greater:
   1694       m_emit->setg(GetHostReg8(to_reg));
   1695       break;
   1696 
   1697     case Condition::GreaterEqual:
   1698       m_emit->setge(GetHostReg8(to_reg));
   1699       break;
   1700 
   1701     case Condition::Less:
   1702       m_emit->setl(GetHostReg8(to_reg));
   1703       break;
   1704 
   1705     case Condition::LessEqual:
   1706       m_emit->setle(GetHostReg8(to_reg));
   1707       break;
   1708 
   1709     case Condition::Negative:
   1710       m_emit->sets(GetHostReg8(to_reg));
   1711       break;
   1712 
   1713     case Condition::PositiveOrZero:
   1714       m_emit->setns(GetHostReg8(to_reg));
   1715       break;
   1716 
   1717     case Condition::Above:
   1718       m_emit->seta(GetHostReg8(to_reg));
   1719       break;
   1720 
   1721     case Condition::AboveEqual:
   1722       m_emit->setae(GetHostReg8(to_reg));
   1723       break;
   1724 
   1725     case Condition::Below:
   1726       m_emit->setb(GetHostReg8(to_reg));
   1727       break;
   1728 
   1729     case Condition::BelowEqual:
   1730       m_emit->setbe(GetHostReg8(to_reg));
   1731       break;
   1732 
   1733     default:
   1734       UnreachableCode();
   1735       break;
   1736   }
   1737 
   1738   if (to_size != RegSize_8)
   1739     EmitZeroExtend(to_reg, to_size, to_reg, RegSize_8);
   1740 }
   1741 
   1742 u32 CodeGenerator::PrepareStackForCall()
   1743 {
   1744   // we assume that the stack is unaligned at this point
   1745   const u32 num_callee_saved = m_register_cache.GetActiveCalleeSavedRegisterCount();
   1746   const u32 num_caller_saved = m_register_cache.PushCallerSavedRegisters();
   1747   const u32 current_offset = (num_callee_saved + num_caller_saved) * 8;
   1748   const u32 aligned_offset =
   1749     (current_offset == 0) ? 0 : Common::AlignUp(current_offset + FUNCTION_CALL_SHADOW_SPACE, 16);
   1750   const u32 adjust_size = aligned_offset - current_offset;
   1751   if (adjust_size > 0)
   1752     m_emit->sub(m_emit->rsp, adjust_size);
   1753 
   1754   return adjust_size;
   1755 }
   1756 
   1757 void CodeGenerator::RestoreStackAfterCall(u32 adjust_size)
   1758 {
   1759   if (adjust_size > 0)
   1760     m_emit->add(m_emit->rsp, adjust_size);
   1761 
   1762   m_register_cache.PopCallerSavedRegisters();
   1763 }
   1764 
   1765 void CodeGenerator::EmitCall(const void* ptr)
   1766 {
   1767   DebugAssert(Xbyak::inner::IsInInt32(reinterpret_cast<size_t>(ptr) - reinterpret_cast<size_t>(m_emit->getCurr())));
   1768   m_emit->call(ptr);
   1769 }
   1770 
   1771 void CodeGenerator::EmitFunctionCallPtr(Value* return_value, const void* ptr)
   1772 {
   1773   if (return_value)
   1774     return_value->Discard();
   1775 
   1776   // shadow space allocate
   1777   const u32 adjust_size = PrepareStackForCall();
   1778 
   1779   // actually call the function
   1780   EmitCall(ptr);
   1781 
   1782   // shadow space release
   1783   RestoreStackAfterCall(adjust_size);
   1784 
   1785   // copy out return value if requested
   1786   if (return_value)
   1787   {
   1788     return_value->Undiscard();
   1789     EmitCopyValue(return_value->GetHostRegister(), Value::FromHostReg(&m_register_cache, RRETURN, return_value->size));
   1790   }
   1791 }
   1792 
   1793 void CodeGenerator::EmitFunctionCallPtr(Value* return_value, const void* ptr, const Value& arg1)
   1794 {
   1795   if (return_value)
   1796     return_value->Discard();
   1797 
   1798   // shadow space allocate
   1799   const u32 adjust_size = PrepareStackForCall();
   1800 
   1801   // push arguments
   1802   EmitCopyValue(RARG1, arg1);
   1803 
   1804   // actually call the function
   1805   EmitCall(ptr);
   1806 
   1807   // shadow space release
   1808   RestoreStackAfterCall(adjust_size);
   1809 
   1810   // copy out return value if requested
   1811   if (return_value)
   1812   {
   1813     return_value->Undiscard();
   1814     EmitCopyValue(return_value->GetHostRegister(), Value::FromHostReg(&m_register_cache, RRETURN, return_value->size));
   1815   }
   1816 }
   1817 
   1818 void CodeGenerator::EmitFunctionCallPtr(Value* return_value, const void* ptr, const Value& arg1, const Value& arg2)
   1819 {
   1820   if (return_value)
   1821     return_value->Discard();
   1822 
   1823   // shadow space allocate
   1824   const u32 adjust_size = PrepareStackForCall();
   1825 
   1826   // push arguments
   1827   EmitCopyValue(RARG1, arg1);
   1828   EmitCopyValue(RARG2, arg2);
   1829 
   1830   // actually call the function
   1831   EmitCall(ptr);
   1832 
   1833   // shadow space release
   1834   RestoreStackAfterCall(adjust_size);
   1835 
   1836   // copy out return value if requested
   1837   if (return_value)
   1838   {
   1839     return_value->Undiscard();
   1840     EmitCopyValue(return_value->GetHostRegister(), Value::FromHostReg(&m_register_cache, RRETURN, return_value->size));
   1841   }
   1842 }
   1843 
   1844 void CodeGenerator::EmitFunctionCallPtr(Value* return_value, const void* ptr, const Value& arg1, const Value& arg2,
   1845                                         const Value& arg3)
   1846 {
   1847   if (return_value)
   1848     m_register_cache.DiscardHostReg(return_value->GetHostRegister());
   1849 
   1850   // shadow space allocate
   1851   const u32 adjust_size = PrepareStackForCall();
   1852 
   1853   // push arguments
   1854   EmitCopyValue(RARG1, arg1);
   1855   EmitCopyValue(RARG2, arg2);
   1856   EmitCopyValue(RARG3, arg3);
   1857 
   1858   // actually call the function
   1859   EmitCall(ptr);
   1860 
   1861   // shadow space release
   1862   RestoreStackAfterCall(adjust_size);
   1863 
   1864   // copy out return value if requested
   1865   if (return_value)
   1866   {
   1867     return_value->Undiscard();
   1868     EmitCopyValue(return_value->GetHostRegister(), Value::FromHostReg(&m_register_cache, RRETURN, return_value->size));
   1869   }
   1870 }
   1871 
   1872 void CodeGenerator::EmitFunctionCallPtr(Value* return_value, const void* ptr, const Value& arg1, const Value& arg2,
   1873                                         const Value& arg3, const Value& arg4)
   1874 {
   1875   if (return_value)
   1876     return_value->Discard();
   1877 
   1878   // shadow space allocate
   1879   const u32 adjust_size = PrepareStackForCall();
   1880 
   1881   // push arguments
   1882   EmitCopyValue(RARG1, arg1);
   1883   EmitCopyValue(RARG2, arg2);
   1884   EmitCopyValue(RARG3, arg3);
   1885   EmitCopyValue(RARG4, arg4);
   1886 
   1887   // actually call the function
   1888   EmitCall(ptr);
   1889 
   1890   // shadow space release
   1891   RestoreStackAfterCall(adjust_size);
   1892 
   1893   // copy out return value if requested
   1894   if (return_value)
   1895   {
   1896     return_value->Undiscard();
   1897     EmitCopyValue(return_value->GetHostRegister(), Value::FromHostReg(&m_register_cache, RRETURN, return_value->size));
   1898   }
   1899 }
   1900 
   1901 void CodeGenerator::EmitPushHostReg(HostReg reg, u32 position)
   1902 {
   1903   m_emit->push(GetHostReg64(reg));
   1904 }
   1905 
   1906 void CodeGenerator::EmitPushHostRegPair(HostReg reg, HostReg reg2, u32 position)
   1907 {
   1908   m_emit->push(GetHostReg64(reg));
   1909   m_emit->push(GetHostReg64(reg2));
   1910 }
   1911 
   1912 void CodeGenerator::EmitPopHostReg(HostReg reg, u32 position)
   1913 {
   1914   m_emit->pop(GetHostReg64(reg));
   1915 }
   1916 
   1917 void CodeGenerator::EmitPopHostRegPair(HostReg reg, HostReg reg2, u32 position)
   1918 {
   1919   m_emit->pop(GetHostReg64(reg2));
   1920   m_emit->pop(GetHostReg64(reg));
   1921 }
   1922 
   1923 void CodeGenerator::EmitLoadCPUStructField(HostReg host_reg, RegSize guest_size, u32 offset)
   1924 {
   1925   switch (guest_size)
   1926   {
   1927     case RegSize_8:
   1928       m_emit->mov(GetHostReg8(host_reg), m_emit->byte[GetCPUPtrReg() + offset]);
   1929       break;
   1930 
   1931     case RegSize_16:
   1932       m_emit->mov(GetHostReg16(host_reg), m_emit->word[GetCPUPtrReg() + offset]);
   1933       break;
   1934 
   1935     case RegSize_32:
   1936       m_emit->mov(GetHostReg32(host_reg), m_emit->dword[GetCPUPtrReg() + offset]);
   1937       break;
   1938 
   1939     case RegSize_64:
   1940       m_emit->mov(GetHostReg64(host_reg), m_emit->qword[GetCPUPtrReg() + offset]);
   1941       break;
   1942 
   1943     default:
   1944     {
   1945       UnreachableCode();
   1946     }
   1947     break;
   1948   }
   1949 }
   1950 
   1951 void CodeGenerator::EmitStoreCPUStructField(u32 offset, const Value& value)
   1952 {
   1953   DebugAssert(value.IsInHostRegister() || value.IsConstant());
   1954   switch (value.size)
   1955   {
   1956     case RegSize_8:
   1957     {
   1958       if (value.IsConstant())
   1959         m_emit->mov(m_emit->byte[GetCPUPtrReg() + offset], value.constant_value);
   1960       else
   1961         m_emit->mov(m_emit->byte[GetCPUPtrReg() + offset], GetHostReg8(value.host_reg));
   1962     }
   1963     break;
   1964 
   1965     case RegSize_16:
   1966     {
   1967       if (value.IsConstant())
   1968         m_emit->mov(m_emit->word[GetCPUPtrReg() + offset], value.constant_value);
   1969       else
   1970         m_emit->mov(m_emit->word[GetCPUPtrReg() + offset], GetHostReg16(value.host_reg));
   1971     }
   1972     break;
   1973 
   1974     case RegSize_32:
   1975     {
   1976       if (value.IsConstant())
   1977         m_emit->mov(m_emit->dword[GetCPUPtrReg() + offset], value.constant_value);
   1978       else
   1979         m_emit->mov(m_emit->dword[GetCPUPtrReg() + offset], GetHostReg32(value.host_reg));
   1980     }
   1981     break;
   1982 
   1983     case RegSize_64:
   1984     {
   1985       if (value.IsConstant())
   1986       {
   1987         // we need a temporary to load the value if it doesn't fit in 32-bits
   1988         if (!Xbyak::inner::IsInInt32(value.constant_value))
   1989         {
   1990           Value temp = m_register_cache.AllocateScratch(RegSize_64);
   1991           EmitCopyValue(temp.host_reg, value);
   1992           m_emit->mov(m_emit->qword[GetCPUPtrReg() + offset], GetHostReg64(temp.host_reg));
   1993         }
   1994         else
   1995         {
   1996           m_emit->mov(m_emit->qword[GetCPUPtrReg() + offset], value.constant_value);
   1997         }
   1998       }
   1999       else
   2000       {
   2001         m_emit->mov(m_emit->qword[GetCPUPtrReg() + offset], GetHostReg64(value.host_reg));
   2002       }
   2003     }
   2004     break;
   2005 
   2006     default:
   2007     {
   2008       UnreachableCode();
   2009     }
   2010     break;
   2011   }
   2012 }
   2013 
   2014 void CodeGenerator::EmitAddCPUStructField(u32 offset, const Value& value)
   2015 {
   2016   DebugAssert(value.IsInHostRegister() || value.IsConstant());
   2017   switch (value.size)
   2018   {
   2019     case RegSize_8:
   2020     {
   2021       if (value.IsConstant() && value.constant_value == 1)
   2022         m_emit->inc(m_emit->byte[GetCPUPtrReg() + offset]);
   2023       else if (value.IsConstant())
   2024         m_emit->add(m_emit->byte[GetCPUPtrReg() + offset], Truncate32(value.constant_value));
   2025       else
   2026         m_emit->add(m_emit->byte[GetCPUPtrReg() + offset], GetHostReg8(value.host_reg));
   2027     }
   2028     break;
   2029 
   2030     case RegSize_16:
   2031     {
   2032       if (value.IsConstant() && value.constant_value == 1)
   2033         m_emit->inc(m_emit->word[GetCPUPtrReg() + offset]);
   2034       else if (value.IsConstant())
   2035         m_emit->add(m_emit->word[GetCPUPtrReg() + offset], Truncate32(value.constant_value));
   2036       else
   2037         m_emit->add(m_emit->word[GetCPUPtrReg() + offset], GetHostReg16(value.host_reg));
   2038     }
   2039     break;
   2040 
   2041     case RegSize_32:
   2042     {
   2043       if (value.IsConstant() && value.constant_value == 1)
   2044         m_emit->inc(m_emit->dword[GetCPUPtrReg() + offset]);
   2045       else if (value.IsConstant())
   2046         m_emit->add(m_emit->dword[GetCPUPtrReg() + offset], Truncate32(value.constant_value));
   2047       else
   2048         m_emit->add(m_emit->dword[GetCPUPtrReg() + offset], GetHostReg32(value.host_reg));
   2049     }
   2050     break;
   2051 
   2052     case RegSize_64:
   2053     {
   2054       if (value.IsConstant() && value.constant_value == 1)
   2055       {
   2056         m_emit->inc(m_emit->qword[GetCPUPtrReg() + offset]);
   2057       }
   2058       else if (value.IsConstant())
   2059       {
   2060         // we need a temporary to load the value if it doesn't fit in 32-bits
   2061         if (!Xbyak::inner::IsInInt32(value.constant_value))
   2062         {
   2063           Value temp = m_register_cache.AllocateScratch(RegSize_64);
   2064           EmitCopyValue(temp.host_reg, value);
   2065           m_emit->add(m_emit->qword[GetCPUPtrReg() + offset], GetHostReg64(temp.host_reg));
   2066         }
   2067         else
   2068         {
   2069           m_emit->add(m_emit->qword[GetCPUPtrReg() + offset], Truncate32(value.constant_value));
   2070         }
   2071       }
   2072       else
   2073       {
   2074         m_emit->add(m_emit->qword[GetCPUPtrReg() + offset], GetHostReg64(value.host_reg));
   2075       }
   2076     }
   2077     break;
   2078 
   2079     default:
   2080     {
   2081       UnreachableCode();
   2082     }
   2083     break;
   2084   }
   2085 }
   2086 
   2087 void CodeGenerator::EmitLoadGuestRAMFastmem(const Value& address, RegSize size, Value& result)
   2088 {
   2089   if (g_settings.cpu_fastmem_mode == CPUFastmemMode::LUT)
   2090   {
   2091     EmitCopyValue(RARG1, address);
   2092     m_emit->shr(GetHostReg64(RARG1), Bus::FASTMEM_LUT_PAGE_SHIFT);
   2093     m_emit->mov(GetHostReg64(RARG1), m_emit->qword[GetFastmemBasePtrReg() + GetHostReg64(RARG1) * 8]);
   2094   }
   2095 
   2096   const Xbyak::Reg64 membase =
   2097     (g_settings.cpu_fastmem_mode == CPUFastmemMode::LUT) ? GetHostReg64(RARG1) : GetFastmemBasePtrReg();
   2098 
   2099   // can't store displacements > 0x80000000 in-line
   2100   const Value* actual_address = &address;
   2101   if (address.IsConstant() && address.constant_value >= 0x80000000)
   2102   {
   2103     actual_address = &result;
   2104     m_emit->mov(GetHostReg32(result.host_reg), address.constant_value);
   2105   }
   2106 
   2107   // TODO: movsx/zx inline here
   2108   switch (size)
   2109   {
   2110     case RegSize_8:
   2111     {
   2112       if (actual_address->IsConstant())
   2113         m_emit->mov(GetHostReg8(result.host_reg), m_emit->byte[membase + actual_address->constant_value]);
   2114       else
   2115         m_emit->mov(GetHostReg8(result.host_reg), m_emit->byte[membase + GetHostReg64(actual_address->host_reg)]);
   2116     }
   2117     break;
   2118 
   2119     case RegSize_16:
   2120     {
   2121       if (actual_address->IsConstant())
   2122         m_emit->mov(GetHostReg16(result.host_reg), m_emit->word[membase + actual_address->constant_value]);
   2123       else
   2124         m_emit->mov(GetHostReg16(result.host_reg), m_emit->word[membase + GetHostReg64(actual_address->host_reg)]);
   2125     }
   2126     break;
   2127 
   2128     case RegSize_32:
   2129     {
   2130       if (actual_address->IsConstant())
   2131         m_emit->mov(GetHostReg32(result.host_reg), m_emit->dword[membase + actual_address->constant_value]);
   2132       else
   2133         m_emit->mov(GetHostReg32(result.host_reg), m_emit->dword[membase + GetHostReg64(actual_address->host_reg)]);
   2134     }
   2135     break;
   2136 
   2137     default:
   2138       UnreachableCode();
   2139       break;
   2140   }
   2141 }
   2142 
   2143 void CodeGenerator::EmitLoadGuestMemoryFastmem(Instruction instruction, const CodeCache::InstructionInfo& info,
   2144                                                const Value& address, RegSize size, Value& result)
   2145 {
   2146   if (g_settings.cpu_fastmem_mode == CPUFastmemMode::LUT)
   2147   {
   2148     EmitCopyValue(RARG1, address);
   2149     m_emit->shr(GetHostReg64(RARG1), Bus::FASTMEM_LUT_PAGE_SHIFT);
   2150     m_emit->mov(GetHostReg64(RARG1), m_emit->qword[GetFastmemBasePtrReg() + GetHostReg64(RARG1) * 8]);
   2151   }
   2152 
   2153   const Xbyak::Reg64 membase =
   2154     (g_settings.cpu_fastmem_mode == CPUFastmemMode::LUT) ? GetHostReg64(RARG1) : GetFastmemBasePtrReg();
   2155 
   2156   // can't store displacements > 0x80000000 in-line
   2157   const Value* actual_address = &address;
   2158   if (address.IsConstant() && address.constant_value >= 0x80000000)
   2159   {
   2160     actual_address = &result;
   2161     m_emit->mov(GetHostReg32(result.host_reg), address.constant_value);
   2162   }
   2163 
   2164   void* host_pc = GetCurrentNearCodePointer();
   2165 
   2166   m_register_cache.InhibitAllocation();
   2167 
   2168   switch (size)
   2169   {
   2170     case RegSize_8:
   2171     {
   2172       if (actual_address->IsConstant())
   2173         m_emit->mov(GetHostReg8(result.host_reg), m_emit->byte[membase + actual_address->constant_value]);
   2174       else
   2175         m_emit->mov(GetHostReg8(result.host_reg), m_emit->byte[membase + GetHostReg64(actual_address->host_reg)]);
   2176     }
   2177     break;
   2178 
   2179     case RegSize_16:
   2180     {
   2181       if (actual_address->IsConstant())
   2182         m_emit->mov(GetHostReg16(result.host_reg), m_emit->word[membase + actual_address->constant_value]);
   2183       else
   2184         m_emit->mov(GetHostReg16(result.host_reg), m_emit->word[membase + GetHostReg64(actual_address->host_reg)]);
   2185     }
   2186     break;
   2187 
   2188     case RegSize_32:
   2189     {
   2190       if (actual_address->IsConstant())
   2191         m_emit->mov(GetHostReg32(result.host_reg), m_emit->dword[membase + actual_address->constant_value]);
   2192       else
   2193         m_emit->mov(GetHostReg32(result.host_reg), m_emit->dword[membase + GetHostReg64(actual_address->host_reg)]);
   2194     }
   2195     break;
   2196 
   2197     default:
   2198       UnreachableCode();
   2199       break;
   2200   }
   2201 
   2202   // insert nops, we need at least 5 bytes for a relative jump
   2203   const u32 fastmem_size = static_cast<u32>(static_cast<u8*>(GetCurrentNearCodePointer()) - static_cast<u8*>(host_pc));
   2204   const u32 nops = (fastmem_size < 5 ? 5 - fastmem_size : 0);
   2205   for (u32 i = 0; i < nops; i++)
   2206     m_emit->nop();
   2207 
   2208   const u32 host_code_size =
   2209     static_cast<u32>(static_cast<ptrdiff_t>(static_cast<u8*>(GetCurrentNearCodePointer()) - static_cast<u8*>(host_pc)));
   2210 
   2211   // generate slowmem fallback
   2212   m_far_emitter.align(16);
   2213   void* thunk_host_pc = GetCurrentFarCodePointer();
   2214   SwitchToFarCode();
   2215 
   2216   // we add the ticks *after* the add here, since we counted incorrectly, then correct for it below
   2217   DebugAssert(m_delayed_cycles_add > 0);
   2218   EmitAddCPUStructField(OFFSETOF(State, pending_ticks), Value::FromConstantU32(static_cast<u32>(m_delayed_cycles_add)));
   2219   m_delayed_cycles_add += Bus::RAM_READ_TICKS;
   2220 
   2221   EmitLoadGuestMemorySlowmem(instruction, info, address, size, result, true);
   2222 
   2223   EmitAddCPUStructField(OFFSETOF(State, pending_ticks),
   2224                         Value::FromConstantU32(static_cast<u32>(-m_delayed_cycles_add)));
   2225 
   2226   // return to the block code
   2227   m_emit->jmp(GetCurrentNearCodePointer());
   2228 
   2229   SwitchToNearCode();
   2230   m_register_cache.UninhibitAllocation();
   2231 
   2232   CPU::CodeCache::AddLoadStoreInfo(host_pc, host_code_size, info.pc, thunk_host_pc);
   2233 }
   2234 
   2235 void CodeGenerator::EmitLoadGuestMemorySlowmem(Instruction instruction, const CodeCache::InstructionInfo& info,
   2236                                                const Value& address, RegSize size, Value& result, bool in_far_code)
   2237 {
   2238   if (g_settings.cpu_recompiler_memory_exceptions)
   2239   {
   2240     // NOTE: This can leave junk in the upper bits
   2241     switch (size)
   2242     {
   2243       case RegSize_8:
   2244         EmitFunctionCall(&result, &Thunks::ReadMemoryByte, address);
   2245         break;
   2246 
   2247       case RegSize_16:
   2248         EmitFunctionCall(&result, &Thunks::ReadMemoryHalfWord, address);
   2249         break;
   2250 
   2251       case RegSize_32:
   2252         EmitFunctionCall(&result, &Thunks::ReadMemoryWord, address);
   2253         break;
   2254 
   2255       default:
   2256         UnreachableCode();
   2257         break;
   2258     }
   2259 
   2260     m_emit->test(GetHostReg64(result.host_reg), GetHostReg64(result.host_reg));
   2261     m_emit->js(GetCurrentFarCodePointer());
   2262 
   2263     m_register_cache.PushState();
   2264 
   2265     // load exception path
   2266     if (!in_far_code)
   2267       SwitchToFarCode();
   2268 
   2269     // cause_bits = (-result << 2) | BD | cop_n
   2270     m_emit->neg(GetHostReg32(result.host_reg));
   2271     m_emit->shl(GetHostReg32(result.host_reg), 2);
   2272     m_emit->or_(GetHostReg32(result.host_reg),
   2273                 Cop0Registers::CAUSE::MakeValueForException(static_cast<Exception>(0), info.is_branch_delay_slot, false,
   2274                                                             instruction.cop.cop_n));
   2275     EmitFunctionCall(nullptr, static_cast<void (*)(u32, u32)>(&CPU::RaiseException), result, GetCurrentInstructionPC());
   2276 
   2277     EmitExceptionExit();
   2278 
   2279     if (!in_far_code)
   2280       SwitchToNearCode();
   2281 
   2282     m_register_cache.PopState();
   2283   }
   2284   else
   2285   {
   2286     switch (size)
   2287     {
   2288       case RegSize_8:
   2289         EmitFunctionCall(&result, &Thunks::UncheckedReadMemoryByte, address);
   2290         break;
   2291 
   2292       case RegSize_16:
   2293         EmitFunctionCall(&result, &Thunks::UncheckedReadMemoryHalfWord, address);
   2294         break;
   2295 
   2296       case RegSize_32:
   2297         EmitFunctionCall(&result, &Thunks::UncheckedReadMemoryWord, address);
   2298         break;
   2299 
   2300       default:
   2301         UnreachableCode();
   2302         break;
   2303     }
   2304   }
   2305 }
   2306 
   2307 void CodeGenerator::EmitStoreGuestMemoryFastmem(Instruction instruction, const CodeCache::InstructionInfo& info,
   2308                                                 const Value& address, RegSize size, const Value& value)
   2309 {
   2310   if (g_settings.cpu_fastmem_mode == CPUFastmemMode::LUT)
   2311   {
   2312     EmitCopyValue(RARG1, address);
   2313     m_emit->shr(GetHostReg64(RARG1), Bus::FASTMEM_LUT_PAGE_SHIFT);
   2314     m_emit->mov(GetHostReg64(RARG1), m_emit->qword[GetFastmemBasePtrReg() + GetHostReg64(RARG1) * 8]);
   2315   }
   2316 
   2317   // can't store displacements > 0x80000000 in-line
   2318   const Value* actual_address = &address;
   2319   Value temp_address;
   2320   if (address.IsConstant() && address.constant_value >= 0x80000000)
   2321   {
   2322     temp_address.SetHostReg(&m_register_cache, RRETURN, RegSize_32);
   2323     actual_address = &temp_address;
   2324     m_emit->mov(GetHostReg32(temp_address), address.constant_value);
   2325   }
   2326 
   2327   const Xbyak::Reg64 membase =
   2328     (g_settings.cpu_fastmem_mode == CPUFastmemMode::LUT) ? GetHostReg64(RARG1) : GetFastmemBasePtrReg();
   2329 
   2330   // fastmem
   2331   void* host_pc = GetCurrentNearCodePointer();
   2332 
   2333   m_register_cache.InhibitAllocation();
   2334 
   2335   switch (size)
   2336   {
   2337     case RegSize_8:
   2338     {
   2339       if (actual_address->IsConstant())
   2340       {
   2341         if (value.IsConstant())
   2342           m_emit->mov(m_emit->byte[membase + actual_address->constant_value], value.constant_value & 0xFFu);
   2343         else
   2344           m_emit->mov(m_emit->byte[membase + actual_address->constant_value], GetHostReg8(value.host_reg));
   2345       }
   2346       else
   2347       {
   2348         if (value.IsConstant())
   2349           m_emit->mov(m_emit->byte[membase + GetHostReg64(actual_address->host_reg)], value.constant_value & 0xFFu);
   2350         else
   2351           m_emit->mov(m_emit->byte[membase + GetHostReg64(actual_address->host_reg)], GetHostReg8(value.host_reg));
   2352       }
   2353     }
   2354     break;
   2355 
   2356     case RegSize_16:
   2357     {
   2358       if (actual_address->IsConstant())
   2359       {
   2360         if (value.IsConstant())
   2361           m_emit->mov(m_emit->word[membase + actual_address->constant_value], value.constant_value & 0xFFFFu);
   2362         else
   2363           m_emit->mov(m_emit->word[membase + actual_address->constant_value], GetHostReg16(value.host_reg));
   2364       }
   2365       else
   2366       {
   2367         if (value.IsConstant())
   2368           m_emit->mov(m_emit->word[membase + GetHostReg64(actual_address->host_reg)], value.constant_value & 0xFFFFu);
   2369         else
   2370           m_emit->mov(m_emit->word[membase + GetHostReg64(actual_address->host_reg)], GetHostReg16(value.host_reg));
   2371       }
   2372     }
   2373     break;
   2374 
   2375     case RegSize_32:
   2376     {
   2377       if (actual_address->IsConstant())
   2378       {
   2379         if (value.IsConstant())
   2380           m_emit->mov(m_emit->dword[membase + actual_address->constant_value], value.constant_value);
   2381         else
   2382           m_emit->mov(m_emit->dword[membase + actual_address->constant_value], GetHostReg32(value.host_reg));
   2383       }
   2384       else
   2385       {
   2386         if (value.IsConstant())
   2387           m_emit->mov(m_emit->dword[membase + GetHostReg64(actual_address->host_reg)], value.constant_value);
   2388         else
   2389           m_emit->mov(m_emit->dword[membase + GetHostReg64(actual_address->host_reg)], GetHostReg32(value.host_reg));
   2390       }
   2391     }
   2392     break;
   2393 
   2394     default:
   2395       UnreachableCode();
   2396       break;
   2397   }
   2398 
   2399   // insert nops, we need at least 5 bytes for a relative jump
   2400   const u32 fastmem_size = static_cast<u32>(static_cast<u8*>(GetCurrentNearCodePointer()) - static_cast<u8*>(host_pc));
   2401   const u32 nops = (fastmem_size < 5 ? 5 - fastmem_size : 0);
   2402   for (u32 i = 0; i < nops; i++)
   2403     m_emit->nop();
   2404 
   2405   const u32 host_code_size =
   2406     static_cast<u32>(static_cast<ptrdiff_t>(static_cast<u8*>(GetCurrentNearCodePointer()) - static_cast<u8*>(host_pc)));
   2407 
   2408   // generate slowmem fallback
   2409   m_far_emitter.align();
   2410   const void* host_thunk_pc = GetCurrentFarCodePointer();
   2411   SwitchToFarCode();
   2412 
   2413   DebugAssert(m_delayed_cycles_add > 0);
   2414   EmitAddCPUStructField(OFFSETOF(State, pending_ticks), Value::FromConstantU32(static_cast<u32>(m_delayed_cycles_add)));
   2415 
   2416   EmitStoreGuestMemorySlowmem(instruction, info, address, size, value, true);
   2417 
   2418   EmitAddCPUStructField(OFFSETOF(State, pending_ticks),
   2419                         Value::FromConstantU32(static_cast<u32>(-m_delayed_cycles_add)));
   2420 
   2421   // return to the block code
   2422   m_emit->jmp(GetCurrentNearCodePointer());
   2423 
   2424   SwitchToNearCode();
   2425   m_register_cache.UninhibitAllocation();
   2426 
   2427   CPU::CodeCache::AddLoadStoreInfo(host_pc, host_code_size, info.pc, host_thunk_pc);
   2428 }
   2429 
   2430 void CodeGenerator::EmitStoreGuestMemorySlowmem(Instruction instruction, const CodeCache::InstructionInfo& info,
   2431                                                 const Value& address, RegSize size, const Value& value,
   2432                                                 bool in_far_code)
   2433 {
   2434   if (g_settings.cpu_recompiler_memory_exceptions)
   2435   {
   2436     Assert(!in_far_code);
   2437 
   2438     Value result = m_register_cache.AllocateScratch(RegSize_32);
   2439     switch (size)
   2440     {
   2441       case RegSize_8:
   2442         EmitFunctionCall(&result, &Thunks::WriteMemoryByte, address, value);
   2443         break;
   2444 
   2445       case RegSize_16:
   2446         EmitFunctionCall(&result, &Thunks::WriteMemoryHalfWord, address, value);
   2447         break;
   2448 
   2449       case RegSize_32:
   2450         EmitFunctionCall(&result, &Thunks::WriteMemoryWord, address, value);
   2451         break;
   2452 
   2453       default:
   2454         UnreachableCode();
   2455         break;
   2456     }
   2457 
   2458     m_register_cache.PushState();
   2459 
   2460     m_emit->test(GetHostReg32(result), GetHostReg32(result));
   2461     m_emit->jnz(GetCurrentFarCodePointer());
   2462 
   2463     // store exception path
   2464     if (!in_far_code)
   2465       SwitchToFarCode();
   2466 
   2467     // cause_bits = (result << 2) | BD | cop_n
   2468     m_emit->shl(GetHostReg32(result), 2);
   2469     m_emit->or_(GetHostReg32(result),
   2470                 Cop0Registers::CAUSE::MakeValueForException(static_cast<Exception>(0), info.is_branch_delay_slot, false,
   2471                                                             instruction.cop.cop_n));
   2472     EmitFunctionCall(nullptr, static_cast<void (*)(u32, u32)>(&CPU::RaiseException), result, GetCurrentInstructionPC());
   2473 
   2474     EmitExceptionExit();
   2475     if (!in_far_code)
   2476       SwitchToNearCode();
   2477 
   2478     m_register_cache.PopState();
   2479   }
   2480   else
   2481   {
   2482     switch (size)
   2483     {
   2484       case RegSize_8:
   2485         EmitFunctionCall(nullptr, &Thunks::UncheckedWriteMemoryByte, address, value);
   2486         break;
   2487 
   2488       case RegSize_16:
   2489         EmitFunctionCall(nullptr, &Thunks::UncheckedWriteMemoryHalfWord, address, value);
   2490         break;
   2491 
   2492       case RegSize_32:
   2493         EmitFunctionCall(nullptr, &Thunks::UncheckedWriteMemoryWord, address, value);
   2494         break;
   2495 
   2496       default:
   2497         UnreachableCode();
   2498         break;
   2499     }
   2500   }
   2501 }
   2502 
   2503 void CodeGenerator::EmitUpdateFastmemBase()
   2504 {
   2505   m_emit->mov(GetFastmemBasePtrReg(), m_emit->qword[GetCPUPtrReg() + OFFSETOF(CPU::State, fastmem_base)]);
   2506 }
   2507 
   2508 void CodeGenerator::BackpatchLoadStore(void* host_pc, const CodeCache::LoadstoreBackpatchInfo& lbi)
   2509 {
   2510   DEV_LOG("Backpatching {} (guest PC 0x{:08X}) to slowmem", host_pc, lbi.guest_pc);
   2511 
   2512   // turn it into a jump to the slowmem handler
   2513   Xbyak::CodeGenerator cg(lbi.code_size, host_pc);
   2514   cg.jmp(lbi.thunk_address);
   2515 
   2516   const s32 nops = static_cast<s32>(lbi.code_size) -
   2517                    static_cast<s32>(static_cast<ptrdiff_t>(cg.getCurr() - static_cast<u8*>(host_pc)));
   2518   Assert(nops >= 0);
   2519   for (s32 i = 0; i < nops; i++)
   2520     cg.nop();
   2521 
   2522   MemMap::FlushInstructionCache(host_pc, lbi.code_size);
   2523 }
   2524 
   2525 void CodeGenerator::EmitLoadGlobal(HostReg host_reg, RegSize size, const void* ptr)
   2526 {
   2527   const s64 displacement =
   2528     static_cast<s64>(reinterpret_cast<size_t>(ptr) - reinterpret_cast<size_t>(m_emit->getCurr())) + 2;
   2529   if (Xbyak::inner::IsInInt32(static_cast<u64>(displacement)))
   2530   {
   2531     switch (size)
   2532     {
   2533       case RegSize_8:
   2534         m_emit->mov(GetHostReg8(host_reg), m_emit->byte[m_emit->rip + ptr]);
   2535         break;
   2536 
   2537       case RegSize_16:
   2538         m_emit->mov(GetHostReg16(host_reg), m_emit->word[m_emit->rip + ptr]);
   2539         break;
   2540 
   2541       case RegSize_32:
   2542         m_emit->mov(GetHostReg32(host_reg), m_emit->dword[m_emit->rip + ptr]);
   2543         break;
   2544 
   2545       case RegSize_64:
   2546         m_emit->mov(GetHostReg64(host_reg), m_emit->qword[m_emit->rip + ptr]);
   2547         break;
   2548 
   2549       default:
   2550       {
   2551         UnreachableCode();
   2552       }
   2553       break;
   2554     }
   2555   }
   2556   else
   2557   {
   2558     Value temp = m_register_cache.AllocateScratch(RegSize_64);
   2559     m_emit->mov(GetHostReg64(temp), reinterpret_cast<size_t>(ptr));
   2560     switch (size)
   2561     {
   2562       case RegSize_8:
   2563         m_emit->mov(GetHostReg8(host_reg), m_emit->byte[GetHostReg64(temp)]);
   2564         break;
   2565 
   2566       case RegSize_16:
   2567         m_emit->mov(GetHostReg16(host_reg), m_emit->word[GetHostReg64(temp)]);
   2568         break;
   2569 
   2570       case RegSize_32:
   2571         m_emit->mov(GetHostReg32(host_reg), m_emit->dword[GetHostReg64(temp)]);
   2572         break;
   2573 
   2574       case RegSize_64:
   2575         m_emit->mov(GetHostReg64(host_reg), m_emit->qword[GetHostReg64(temp)]);
   2576         break;
   2577 
   2578       default:
   2579       {
   2580         UnreachableCode();
   2581       }
   2582       break;
   2583     }
   2584   }
   2585 }
   2586 
   2587 void CodeGenerator::EmitStoreGlobal(void* ptr, const Value& value)
   2588 {
   2589   DebugAssert(value.IsInHostRegister() || value.IsConstant());
   2590 
   2591   const s64 displacement =
   2592     static_cast<s64>(reinterpret_cast<size_t>(ptr) - reinterpret_cast<size_t>(m_emit->getCurr()));
   2593   if (Xbyak::inner::IsInInt32(static_cast<u64>(displacement)))
   2594   {
   2595     switch (value.size)
   2596     {
   2597       case RegSize_8:
   2598       {
   2599         if (value.IsConstant())
   2600           m_emit->mov(m_emit->byte[m_emit->rip + ptr], value.constant_value);
   2601         else
   2602           m_emit->mov(m_emit->byte[m_emit->rip + ptr], GetHostReg8(value.host_reg));
   2603       }
   2604       break;
   2605 
   2606       case RegSize_16:
   2607       {
   2608         if (value.IsConstant())
   2609           m_emit->mov(m_emit->word[m_emit->rip + ptr], value.constant_value);
   2610         else
   2611           m_emit->mov(m_emit->word[m_emit->rip + ptr], GetHostReg16(value.host_reg));
   2612       }
   2613       break;
   2614 
   2615       case RegSize_32:
   2616       {
   2617         if (value.IsConstant())
   2618           m_emit->mov(m_emit->dword[m_emit->rip + ptr], value.constant_value);
   2619         else
   2620           m_emit->mov(m_emit->dword[m_emit->rip + ptr], GetHostReg32(value.host_reg));
   2621       }
   2622       break;
   2623 
   2624       case RegSize_64:
   2625       {
   2626         if (value.IsConstant())
   2627         {
   2628           // we need a temporary to load the value if it doesn't fit in 32-bits
   2629           if (!Xbyak::inner::IsInInt32(value.constant_value))
   2630           {
   2631             Value temp = m_register_cache.AllocateScratch(RegSize_64);
   2632             EmitCopyValue(temp.host_reg, value);
   2633             m_emit->mov(m_emit->qword[m_emit->rip + ptr], GetHostReg64(temp.host_reg));
   2634           }
   2635           else
   2636           {
   2637             m_emit->mov(m_emit->qword[m_emit->rip + ptr], value.constant_value);
   2638           }
   2639         }
   2640         else
   2641         {
   2642           m_emit->mov(m_emit->qword[m_emit->rip + ptr], GetHostReg64(value.host_reg));
   2643         }
   2644       }
   2645       break;
   2646 
   2647       default:
   2648       {
   2649         UnreachableCode();
   2650       }
   2651       break;
   2652     }
   2653   }
   2654   else
   2655   {
   2656     Value address_temp = m_register_cache.AllocateScratch(RegSize_64);
   2657     m_emit->mov(GetHostReg64(address_temp), reinterpret_cast<size_t>(ptr));
   2658     switch (value.size)
   2659     {
   2660       case RegSize_8:
   2661       {
   2662         if (value.IsConstant())
   2663           m_emit->mov(m_emit->byte[GetHostReg64(address_temp)], value.constant_value);
   2664         else
   2665           m_emit->mov(m_emit->byte[GetHostReg64(address_temp)], GetHostReg8(value.host_reg));
   2666       }
   2667       break;
   2668 
   2669       case RegSize_16:
   2670       {
   2671         if (value.IsConstant())
   2672           m_emit->mov(m_emit->word[GetHostReg64(address_temp)], value.constant_value);
   2673         else
   2674           m_emit->mov(m_emit->word[GetHostReg64(address_temp)], GetHostReg16(value.host_reg));
   2675       }
   2676       break;
   2677 
   2678       case RegSize_32:
   2679       {
   2680         if (value.IsConstant())
   2681           m_emit->mov(m_emit->dword[GetHostReg64(address_temp)], value.constant_value);
   2682         else
   2683           m_emit->mov(m_emit->dword[GetHostReg64(address_temp)], GetHostReg32(value.host_reg));
   2684       }
   2685       break;
   2686 
   2687       case RegSize_64:
   2688       {
   2689         if (value.IsConstant())
   2690         {
   2691           // we need a temporary to load the value if it doesn't fit in 32-bits
   2692           if (!Xbyak::inner::IsInInt32(value.constant_value))
   2693           {
   2694             Value temp = m_register_cache.AllocateScratch(RegSize_64);
   2695             EmitCopyValue(temp.host_reg, value);
   2696             m_emit->mov(m_emit->qword[GetHostReg64(address_temp)], GetHostReg64(temp.host_reg));
   2697           }
   2698           else
   2699           {
   2700             m_emit->mov(m_emit->qword[GetHostReg64(address_temp)], value.constant_value);
   2701           }
   2702         }
   2703         else
   2704         {
   2705           m_emit->mov(m_emit->qword[GetHostReg64(address_temp)], GetHostReg64(value.host_reg));
   2706         }
   2707       }
   2708       break;
   2709 
   2710       default:
   2711       {
   2712         UnreachableCode();
   2713       }
   2714       break;
   2715     }
   2716   }
   2717 }
   2718 
   2719 void CodeGenerator::EmitFlushInterpreterLoadDelay()
   2720 {
   2721   Value reg = m_register_cache.AllocateScratch(RegSize_8);
   2722   Value value = m_register_cache.AllocateScratch(RegSize_32);
   2723 
   2724   auto load_delay_reg = m_emit->byte[GetCPUPtrReg() + OFFSETOF(State, load_delay_reg)];
   2725   auto load_delay_value = m_emit->dword[GetCPUPtrReg() + OFFSETOF(State, load_delay_value)];
   2726   auto reg_ptr = m_emit->dword[GetCPUPtrReg() + OFFSETOF(State, regs.r[0]) + GetHostReg64(reg.host_reg) * 4];
   2727 
   2728   Xbyak::Label skip_flush;
   2729 
   2730   // reg = load_delay_reg
   2731   m_emit->movzx(GetHostReg32(reg.host_reg), load_delay_reg);
   2732 
   2733   // if load_delay_reg == Reg::count goto skip_flush
   2734   m_emit->cmp(GetHostReg32(reg.host_reg), static_cast<u8>(Reg::count));
   2735   m_emit->je(skip_flush);
   2736 
   2737   // r[reg] = load_delay_value
   2738   m_emit->mov(GetHostReg32(value), load_delay_value);
   2739   m_emit->mov(reg_ptr, GetHostReg32(value));
   2740 
   2741   // load_delay_reg = Reg::count
   2742   m_emit->mov(load_delay_reg, static_cast<u8>(Reg::count));
   2743 
   2744   m_emit->L(skip_flush);
   2745 }
   2746 
   2747 void CodeGenerator::EmitMoveNextInterpreterLoadDelay()
   2748 {
   2749   Value reg = m_register_cache.AllocateScratch(RegSize_8);
   2750   Value value = m_register_cache.AllocateScratch(RegSize_32);
   2751 
   2752   auto load_delay_reg = m_emit->byte[GetCPUPtrReg() + OFFSETOF(State, load_delay_reg)];
   2753   auto load_delay_value = m_emit->dword[GetCPUPtrReg() + OFFSETOF(State, load_delay_value)];
   2754   auto next_load_delay_reg = m_emit->byte[GetCPUPtrReg() + OFFSETOF(State, next_load_delay_reg)];
   2755   auto next_load_delay_value = m_emit->dword[GetCPUPtrReg() + OFFSETOF(State, next_load_delay_value)];
   2756 
   2757   m_emit->mov(GetHostReg32(value), next_load_delay_value);
   2758   m_emit->mov(GetHostReg8(reg), next_load_delay_reg);
   2759   m_emit->mov(load_delay_value, GetHostReg32(value));
   2760   m_emit->mov(load_delay_reg, GetHostReg8(reg));
   2761   m_emit->mov(next_load_delay_reg, static_cast<u8>(Reg::count));
   2762 }
   2763 
   2764 void CodeGenerator::EmitCancelInterpreterLoadDelayForReg(Reg reg)
   2765 {
   2766   if (!m_load_delay_dirty)
   2767     return;
   2768 
   2769   auto load_delay_reg = m_emit->byte[GetCPUPtrReg() + OFFSETOF(State, load_delay_reg)];
   2770 
   2771   Xbyak::Label skip_cancel;
   2772 
   2773   // if load_delay_reg != reg goto skip_cancel
   2774   m_emit->cmp(load_delay_reg, static_cast<u8>(reg));
   2775   m_emit->jne(skip_cancel);
   2776 
   2777   // load_delay_reg = Reg::count
   2778   m_emit->mov(load_delay_reg, static_cast<u8>(Reg::count));
   2779 
   2780   m_emit->L(skip_cancel);
   2781 }
   2782 
   2783 void CodeGenerator::EmitICacheCheckAndUpdate()
   2784 {
   2785   if (!m_block->HasFlag(CodeCache::BlockFlags::IsUsingICache))
   2786   {
   2787     if (m_block->HasFlag(CodeCache::BlockFlags::NeedsDynamicFetchTicks))
   2788     {
   2789       m_emit->mov(m_emit->eax, m_block->size);
   2790       m_emit->mul(m_emit->dword[m_emit->rip + GetFetchMemoryAccessTimePtr()]);
   2791       m_emit->add(m_emit->dword[GetCPUPtrReg() + OFFSETOF(State, pending_ticks)], m_emit->eax);
   2792     }
   2793     else
   2794     {
   2795       m_emit->add(m_emit->dword[GetCPUPtrReg() + OFFSETOF(State, pending_ticks)],
   2796                   static_cast<u32>(m_block->uncached_fetch_ticks));
   2797     }
   2798   }
   2799   else if (m_block->icache_line_count > 0)
   2800   {
   2801     VirtualMemoryAddress current_pc = m_pc & ICACHE_TAG_ADDRESS_MASK;
   2802     for (u32 i = 0; i < m_block->icache_line_count; i++, current_pc += ICACHE_LINE_SIZE)
   2803     {
   2804       const VirtualMemoryAddress tag = GetICacheTagForAddress(current_pc);
   2805       const TickCount fill_ticks = GetICacheFillTicks(current_pc);
   2806       if (fill_ticks <= 0)
   2807         continue;
   2808 
   2809       const u32 line = GetICacheLine(current_pc);
   2810       const u32 offset = OFFSETOF(State, icache_tags) + (line * sizeof(u32));
   2811       Xbyak::Label cache_hit;
   2812 
   2813       m_emit->cmp(m_emit->dword[GetCPUPtrReg() + offset], tag);
   2814       m_emit->je(cache_hit);
   2815       m_emit->mov(m_emit->dword[GetCPUPtrReg() + offset], tag);
   2816       m_emit->add(m_emit->dword[GetCPUPtrReg() + OFFSETOF(State, pending_ticks)], static_cast<u32>(fill_ticks));
   2817       m_emit->L(cache_hit);
   2818     }
   2819   }
   2820 }
   2821 
   2822 void CodeGenerator::EmitBlockProtectCheck(const u8* ram_ptr, const u8* shadow_ptr, u32 size)
   2823 {
   2824   const auto ram_ptr_reg = GetHostReg64(RARG1);
   2825   const auto shadow_ptr_reg = GetHostReg64(RARG2);
   2826   const auto temp_reg = GetHostReg64(RARG3);
   2827   const auto temp_reg32 = GetHostReg32(RARG3);
   2828 
   2829   // store it first to reduce code size, because we can offset
   2830   m_emit->mov(ram_ptr_reg, static_cast<size_t>(reinterpret_cast<uintptr_t>(ram_ptr)));
   2831   m_emit->mov(shadow_ptr_reg, static_cast<size_t>(reinterpret_cast<uintptr_t>(shadow_ptr)));
   2832 
   2833   bool first = true;
   2834   u32 offset = 0;
   2835   while (size >= 16)
   2836   {
   2837     const Xbyak::Xmm& dst = first ? m_emit->xmm0 : m_emit->xmm1;
   2838     m_emit->movups(dst, m_emit->xword[ram_ptr_reg + offset]);
   2839     m_emit->pcmpeqd(dst, m_emit->xword[shadow_ptr_reg + offset]);
   2840     if (!first)
   2841       m_emit->pand(m_emit->xmm0, dst);
   2842     else
   2843       first = false;
   2844 
   2845     offset += 16;
   2846     size -= 16;
   2847   }
   2848 
   2849   // TODO: better codegen for 16 byte aligned blocks
   2850   if (!first)
   2851   {
   2852     m_emit->movmskps(temp_reg32, m_emit->xmm0);
   2853     m_emit->cmp(temp_reg32, 0xf);
   2854     m_emit->jne(CodeCache::g_discard_and_recompile_block);
   2855   }
   2856 
   2857   while (size >= 8)
   2858   {
   2859     m_emit->mov(temp_reg, m_emit->qword[ram_ptr_reg + offset]);
   2860     m_emit->cmp(temp_reg, m_emit->qword[shadow_ptr_reg + offset]);
   2861     m_emit->jne(CodeCache::g_discard_and_recompile_block);
   2862     offset += 8;
   2863     size -= 8;
   2864   }
   2865 
   2866   while (size >= 4)
   2867   {
   2868     m_emit->mov(temp_reg32, m_emit->dword[ram_ptr_reg + offset]);
   2869     m_emit->cmp(temp_reg32, m_emit->dword[shadow_ptr_reg + offset]);
   2870     m_emit->jne(CodeCache::g_discard_and_recompile_block);
   2871     offset += 4;
   2872     size -= 4;
   2873   }
   2874 
   2875   DebugAssert(size == 0);
   2876 }
   2877 
   2878 void CodeGenerator::EmitStallUntilGTEComplete()
   2879 {
   2880   m_emit->mov(GetHostReg32(RRETURN), m_emit->dword[GetCPUPtrReg() + OFFSETOF(State, pending_ticks)]);
   2881   m_emit->mov(GetHostReg32(RARG1), m_emit->dword[GetCPUPtrReg() + OFFSETOF(State, gte_completion_tick)]);
   2882 
   2883   if (m_delayed_cycles_add > 0)
   2884   {
   2885     m_emit->add(GetHostReg32(RRETURN), static_cast<u32>(m_delayed_cycles_add));
   2886     m_delayed_cycles_add = 0;
   2887   }
   2888 
   2889   m_emit->cmp(GetHostReg32(RARG1), GetHostReg32(RRETURN));
   2890   m_emit->cmova(GetHostReg32(RRETURN), GetHostReg32(RARG1));
   2891   m_emit->mov(m_emit->dword[GetCPUPtrReg() + OFFSETOF(State, pending_ticks)], GetHostReg32(RRETURN));
   2892 }
   2893 
   2894 void CodeGenerator::EmitBranch(const void* address, bool allow_scratch)
   2895 {
   2896   const s64 jump_distance =
   2897     static_cast<s64>(reinterpret_cast<intptr_t>(address) - reinterpret_cast<intptr_t>(GetCurrentCodePointer()));
   2898   if (Xbyak::inner::IsInInt32(static_cast<u64>(jump_distance)))
   2899   {
   2900     m_emit->jmp(address, Xbyak::CodeGenerator::T_NEAR);
   2901     return;
   2902   }
   2903 
   2904   Assert(allow_scratch);
   2905 
   2906   Value temp = m_register_cache.AllocateScratch(RegSize_64);
   2907   m_emit->mov(GetHostReg64(temp), reinterpret_cast<uintptr_t>(address));
   2908   m_emit->jmp(GetHostReg64(temp));
   2909 }
   2910 
   2911 void CodeGenerator::EmitBranch(LabelType* label)
   2912 {
   2913   m_emit->jmp(*label);
   2914 }
   2915 
   2916 void CodeGenerator::EmitConditionalBranch(Condition condition, bool invert, HostReg value, RegSize size,
   2917                                           LabelType* label)
   2918 {
   2919   switch (condition)
   2920   {
   2921     case Condition::NotEqual:
   2922     case Condition::Equal:
   2923     case Condition::Overflow:
   2924     case Condition::Greater:
   2925     case Condition::GreaterEqual:
   2926     case Condition::LessEqual:
   2927     case Condition::Less:
   2928     case Condition::Above:
   2929     case Condition::AboveEqual:
   2930     case Condition::Below:
   2931     case Condition::BelowEqual:
   2932       Panic("Needs a comparison value");
   2933       return;
   2934 
   2935     case Condition::Negative:
   2936     case Condition::PositiveOrZero:
   2937     case Condition::NotZero:
   2938     case Condition::Zero:
   2939     {
   2940       switch (size)
   2941       {
   2942         case RegSize_8:
   2943           m_emit->test(GetHostReg8(value), GetHostReg8(value));
   2944           break;
   2945         case RegSize_16:
   2946           m_emit->test(GetHostReg16(value), GetHostReg16(value));
   2947           break;
   2948         case RegSize_32:
   2949           m_emit->test(GetHostReg32(value), GetHostReg32(value));
   2950           break;
   2951         case RegSize_64:
   2952           m_emit->test(GetHostReg64(value), GetHostReg64(value));
   2953           break;
   2954         default:
   2955           UnreachableCode();
   2956           break;
   2957       }
   2958 
   2959       EmitConditionalBranch(condition, invert, label);
   2960       return;
   2961     }
   2962 
   2963     case Condition::Always:
   2964       m_emit->jmp(*label);
   2965       return;
   2966 
   2967     default:
   2968       UnreachableCode();
   2969       return;
   2970   }
   2971 }
   2972 
   2973 void CodeGenerator::EmitConditionalBranch(Condition condition, bool invert, HostReg lhs, const Value& rhs,
   2974                                           LabelType* label)
   2975 {
   2976   switch (condition)
   2977   {
   2978     case Condition::NotEqual:
   2979     case Condition::Equal:
   2980     case Condition::Overflow:
   2981     case Condition::Greater:
   2982     case Condition::GreaterEqual:
   2983     case Condition::LessEqual:
   2984     case Condition::Less:
   2985     case Condition::Above:
   2986     case Condition::AboveEqual:
   2987     case Condition::Below:
   2988     case Condition::BelowEqual:
   2989     {
   2990       EmitCmp(lhs, rhs);
   2991       EmitConditionalBranch(condition, invert, label);
   2992       return;
   2993     }
   2994 
   2995     case Condition::Negative:
   2996     case Condition::PositiveOrZero:
   2997     case Condition::NotZero:
   2998     case Condition::Zero:
   2999     {
   3000       Assert(!rhs.IsValid() || (rhs.IsConstant() && rhs.GetS64ConstantValue() == 0));
   3001       EmitConditionalBranch(condition, invert, lhs, rhs.size, label);
   3002       return;
   3003     }
   3004 
   3005     case Condition::Always:
   3006       m_emit->jmp(*label);
   3007       return;
   3008 
   3009     default:
   3010       UnreachableCode();
   3011       return;
   3012   }
   3013 }
   3014 
   3015 void CodeGenerator::EmitConditionalBranch(Condition condition, bool invert, LabelType* label)
   3016 {
   3017   switch (condition)
   3018   {
   3019     case Condition::Always:
   3020       m_emit->jmp(*label);
   3021       break;
   3022 
   3023     case Condition::NotEqual:
   3024       invert ? m_emit->je(*label) : m_emit->jne(*label);
   3025       break;
   3026 
   3027     case Condition::Equal:
   3028       invert ? m_emit->jne(*label) : m_emit->je(*label);
   3029       break;
   3030 
   3031     case Condition::Overflow:
   3032       invert ? m_emit->jno(*label) : m_emit->jo(*label);
   3033       break;
   3034 
   3035     case Condition::Greater:
   3036       invert ? m_emit->jng(*label) : m_emit->jg(*label);
   3037       break;
   3038 
   3039     case Condition::GreaterEqual:
   3040       invert ? m_emit->jnge(*label) : m_emit->jge(*label);
   3041       break;
   3042 
   3043     case Condition::Less:
   3044       invert ? m_emit->jnl(*label) : m_emit->jl(*label);
   3045       break;
   3046 
   3047     case Condition::LessEqual:
   3048       invert ? m_emit->jnle(*label) : m_emit->jle(*label);
   3049       break;
   3050 
   3051     case Condition::Negative:
   3052       invert ? m_emit->jns(*label) : m_emit->js(*label);
   3053       break;
   3054 
   3055     case Condition::PositiveOrZero:
   3056       invert ? m_emit->js(*label) : m_emit->jns(*label);
   3057       break;
   3058 
   3059     case Condition::Above:
   3060       invert ? m_emit->jna(*label) : m_emit->ja(*label);
   3061       break;
   3062 
   3063     case Condition::AboveEqual:
   3064       invert ? m_emit->jnae(*label) : m_emit->jae(*label);
   3065       break;
   3066 
   3067     case Condition::Below:
   3068       invert ? m_emit->jnb(*label) : m_emit->jb(*label);
   3069       break;
   3070 
   3071     case Condition::BelowEqual:
   3072       invert ? m_emit->jnbe(*label) : m_emit->jbe(*label);
   3073       break;
   3074 
   3075     case Condition::NotZero:
   3076       invert ? m_emit->jz(*label) : m_emit->jnz(*label);
   3077       break;
   3078 
   3079     case Condition::Zero:
   3080       invert ? m_emit->jnz(*label) : m_emit->jz(*label);
   3081       break;
   3082 
   3083     default:
   3084       UnreachableCode();
   3085       break;
   3086   }
   3087 }
   3088 
   3089 void CodeGenerator::EmitBranchIfBitSet(HostReg reg, RegSize size, u8 bit, LabelType* label)
   3090 {
   3091   if (bit < 8)
   3092   {
   3093     // same size, probably faster
   3094     switch (size)
   3095     {
   3096       case RegSize_8:
   3097         m_emit->test(GetHostReg8(reg), (1u << bit));
   3098         m_emit->jnz(*label);
   3099         break;
   3100 
   3101       case RegSize_16:
   3102         m_emit->test(GetHostReg16(reg), (1u << bit));
   3103         m_emit->jnz(*label);
   3104         break;
   3105 
   3106       case RegSize_32:
   3107         m_emit->test(GetHostReg32(reg), (1u << bit));
   3108         m_emit->jnz(*label);
   3109         break;
   3110 
   3111       default:
   3112         UnreachableCode();
   3113         break;
   3114     }
   3115   }
   3116   else
   3117   {
   3118     switch (size)
   3119     {
   3120       case RegSize_8:
   3121         m_emit->bt(GetHostReg8(reg), bit);
   3122         m_emit->jc(*label);
   3123         break;
   3124 
   3125       case RegSize_16:
   3126         m_emit->bt(GetHostReg16(reg), bit);
   3127         m_emit->jc(*label);
   3128         break;
   3129 
   3130       case RegSize_32:
   3131         m_emit->bt(GetHostReg32(reg), bit);
   3132         m_emit->jc(*label);
   3133         break;
   3134 
   3135       default:
   3136         UnreachableCode();
   3137         break;
   3138     }
   3139   }
   3140 }
   3141 
   3142 void CodeGenerator::EmitBranchIfBitClear(HostReg reg, RegSize size, u8 bit, LabelType* label)
   3143 {
   3144   if (bit < 8)
   3145   {
   3146     // same size, probably faster
   3147     switch (size)
   3148     {
   3149       case RegSize_8:
   3150         m_emit->test(GetHostReg8(reg), (1u << bit));
   3151         m_emit->jz(*label);
   3152         break;
   3153 
   3154       case RegSize_16:
   3155         m_emit->test(GetHostReg16(reg), (1u << bit));
   3156         m_emit->jz(*label);
   3157         break;
   3158 
   3159       case RegSize_32:
   3160         m_emit->test(GetHostReg32(reg), (1u << bit));
   3161         m_emit->jz(*label);
   3162         break;
   3163 
   3164       default:
   3165         UnreachableCode();
   3166         break;
   3167     }
   3168   }
   3169   else
   3170   {
   3171     switch (size)
   3172     {
   3173       case RegSize_8:
   3174         m_emit->bt(GetHostReg8(reg), bit);
   3175         m_emit->jnc(*label);
   3176         break;
   3177 
   3178       case RegSize_16:
   3179         m_emit->bt(GetHostReg16(reg), bit);
   3180         m_emit->jnc(*label);
   3181         break;
   3182 
   3183       case RegSize_32:
   3184         m_emit->bt(GetHostReg32(reg), bit);
   3185         m_emit->jnc(*label);
   3186         break;
   3187 
   3188       default:
   3189         UnreachableCode();
   3190         break;
   3191     }
   3192   }
   3193 }
   3194 
   3195 void CodeGenerator::EmitBindLabel(LabelType* label)
   3196 {
   3197   m_emit->L(*label);
   3198 }
   3199 
   3200 void CodeGenerator::EmitLoadGlobalAddress(HostReg host_reg, const void* ptr)
   3201 {
   3202   const s64 displacement =
   3203     static_cast<s64>(reinterpret_cast<size_t>(ptr) - reinterpret_cast<size_t>(m_emit->getCurr())) + 2;
   3204   if (Xbyak::inner::IsInInt32(static_cast<u64>(displacement)))
   3205     m_emit->lea(GetHostReg64(host_reg), m_emit->dword[m_emit->rip + ptr]);
   3206   else
   3207     m_emit->mov(GetHostReg64(host_reg), reinterpret_cast<size_t>(ptr));
   3208 }
   3209 } // namespace CPU::Recompiler
   3210 
   3211 #endif // CPU_ARCH_X64