duckstation

duckstation, but archived from the revision just before upstream changed it to a proprietary software project, this version is the libre one
git clone https://git.neptards.moe/u3shit/duckstation.git
Log | Files | Refs | README | LICENSE

cpu_recompiler_code_generator_aarch64.cpp (81114B)


      1 // SPDX-FileCopyrightText: 2019-2024 Connor McLaughlin <stenzek@gmail.com>
      2 // SPDX-License-Identifier: (GPL-3.0 OR CC-BY-NC-ND-4.0)
      3 
      4 #include "common/align.h"
      5 #include "common/assert.h"
      6 #include "common/log.h"
      7 #include "common/memmap.h"
      8 
      9 #include "cpu_code_cache_private.h"
     10 #include "cpu_core.h"
     11 #include "cpu_core_private.h"
     12 #include "cpu_recompiler_code_generator.h"
     13 #include "cpu_recompiler_thunks.h"
     14 #include "settings.h"
     15 #include "timing_event.h"
     16 
     17 #ifdef CPU_ARCH_ARM64
     18 
     19 Log_SetChannel(CPU::Recompiler);
     20 
     21 #ifdef ENABLE_HOST_DISASSEMBLY
     22 #include "vixl/aarch64/disasm-aarch64.h"
     23 #endif
     24 
     25 namespace a64 = vixl::aarch64;
     26 
     27 namespace CPU::Recompiler {
     28 constexpr u64 FUNCTION_CALLEE_SAVED_SPACE_RESERVE = 80;  // 8 registers
     29 constexpr u64 FUNCTION_CALLER_SAVED_SPACE_RESERVE = 144; // 18 registers -> 224 bytes
     30 constexpr u64 FUNCTION_STACK_SIZE = FUNCTION_CALLEE_SAVED_SPACE_RESERVE + FUNCTION_CALLER_SAVED_SPACE_RESERVE;
     31 
     32 static constexpr u32 TRAMPOLINE_AREA_SIZE = 4 * 1024;
     33 static std::unordered_map<const void*, u32> s_trampoline_targets;
     34 static u8* s_trampoline_start_ptr = nullptr;
     35 static u32 s_trampoline_used = 0;
     36 } // namespace CPU::Recompiler
     37 
     38 bool CPU::Recompiler::armIsCallerSavedRegister(u32 id)
     39 {
     40   // same on both linux and windows
     41   return (id <= 18);
     42 }
     43 
     44 void CPU::Recompiler::armEmitMov(a64::Assembler* armAsm, const a64::Register& rd, u64 imm)
     45 {
     46   DebugAssert(vixl::IsUint32(imm) || vixl::IsInt32(imm) || rd.Is64Bits());
     47   DebugAssert(rd.GetCode() != a64::sp.GetCode());
     48 
     49   if (imm == 0)
     50   {
     51     armAsm->mov(rd, a64::Assembler::AppropriateZeroRegFor(rd));
     52     return;
     53   }
     54 
     55   // The worst case for size is mov 64-bit immediate to sp:
     56   //  * up to 4 instructions to materialise the constant
     57   //  * 1 instruction to move to sp
     58 
     59   // Immediates on Aarch64 can be produced using an initial value, and zero to
     60   // three move keep operations.
     61   //
     62   // Initial values can be generated with:
     63   //  1. 64-bit move zero (movz).
     64   //  2. 32-bit move inverted (movn).
     65   //  3. 64-bit move inverted.
     66   //  4. 32-bit orr immediate.
     67   //  5. 64-bit orr immediate.
     68   // Move-keep may then be used to modify each of the 16-bit half words.
     69   //
     70   // The code below supports all five initial value generators, and
     71   // applying move-keep operations to move-zero and move-inverted initial
     72   // values.
     73 
     74   // Try to move the immediate in one instruction, and if that fails, switch to
     75   // using multiple instructions.
     76   const unsigned reg_size = rd.GetSizeInBits();
     77 
     78   if (a64::Assembler::IsImmMovz(imm, reg_size) && !rd.IsSP())
     79   {
     80     // Immediate can be represented in a move zero instruction. Movz can't write
     81     // to the stack pointer.
     82     armAsm->movz(rd, imm);
     83     return;
     84   }
     85   else if (a64::Assembler::IsImmMovn(imm, reg_size) && !rd.IsSP())
     86   {
     87     // Immediate can be represented in a move negative instruction. Movn can't
     88     // write to the stack pointer.
     89     armAsm->movn(rd, rd.Is64Bits() ? ~imm : (~imm & a64::kWRegMask));
     90     return;
     91   }
     92   else if (a64::Assembler::IsImmLogical(imm, reg_size))
     93   {
     94     // Immediate can be represented in a logical orr instruction.
     95     DebugAssert(!rd.IsZero());
     96     armAsm->orr(rd, a64::Assembler::AppropriateZeroRegFor(rd), imm);
     97     return;
     98   }
     99 
    100   // Generic immediate case. Imm will be represented by
    101   //   [imm3, imm2, imm1, imm0], where each imm is 16 bits.
    102   // A move-zero or move-inverted is generated for the first non-zero or
    103   // non-0xffff immX, and a move-keep for subsequent non-zero immX.
    104 
    105   uint64_t ignored_halfword = 0;
    106   bool invert_move = false;
    107   // If the number of 0xffff halfwords is greater than the number of 0x0000
    108   // halfwords, it's more efficient to use move-inverted.
    109   if (vixl::CountClearHalfWords(~imm, reg_size) > vixl::CountClearHalfWords(imm, reg_size))
    110   {
    111     ignored_halfword = 0xffff;
    112     invert_move = true;
    113   }
    114 
    115   // Iterate through the halfwords. Use movn/movz for the first non-ignored
    116   // halfword, and movk for subsequent halfwords.
    117   DebugAssert((reg_size % 16) == 0);
    118   bool first_mov_done = false;
    119   for (unsigned i = 0; i < (reg_size / 16); i++)
    120   {
    121     uint64_t imm16 = (imm >> (16 * i)) & 0xffff;
    122     if (imm16 != ignored_halfword)
    123     {
    124       if (!first_mov_done)
    125       {
    126         if (invert_move)
    127           armAsm->movn(rd, ~imm16 & 0xffff, 16 * i);
    128         else
    129           armAsm->movz(rd, imm16, 16 * i);
    130         first_mov_done = true;
    131       }
    132       else
    133       {
    134         // Construct a wider constant.
    135         armAsm->movk(rd, imm16, 16 * i);
    136       }
    137     }
    138   }
    139 
    140   DebugAssert(first_mov_done);
    141 }
    142 
    143 s64 CPU::Recompiler::armGetPCDisplacement(const void* current, const void* target)
    144 {
    145   // pxAssert(Common::IsAlignedPow2(reinterpret_cast<size_t>(current), 4));
    146   // pxAssert(Common::IsAlignedPow2(reinterpret_cast<size_t>(target), 4));
    147   return static_cast<s64>((reinterpret_cast<ptrdiff_t>(target) - reinterpret_cast<ptrdiff_t>(current)) >> 2);
    148 }
    149 
    150 bool CPU::Recompiler::armIsInAdrpRange(vixl::aarch64::Assembler* armAsm, const void* addr)
    151 {
    152   const void* cur = armAsm->GetCursorAddress<const void*>();
    153   const void* current_code_ptr_page =
    154     reinterpret_cast<const void*>(reinterpret_cast<uintptr_t>(cur) & ~static_cast<uintptr_t>(0xFFF));
    155   const void* ptr_page =
    156     reinterpret_cast<const void*>(reinterpret_cast<uintptr_t>(addr) & ~static_cast<uintptr_t>(0xFFF));
    157   const s64 page_displacement = armGetPCDisplacement(current_code_ptr_page, ptr_page) >> 10;
    158   const u32 page_offset = static_cast<u32>(reinterpret_cast<uintptr_t>(addr) & 0xFFFu);
    159 
    160   return (vixl::IsInt21(page_displacement) &&
    161           (a64::Assembler::IsImmAddSub(page_offset) || a64::Assembler::IsImmLogical(page_offset, 64)));
    162 }
    163 
    164 void CPU::Recompiler::armMoveAddressToReg(a64::Assembler* armAsm, const a64::Register& reg, const void* addr)
    165 {
    166   DebugAssert(reg.IsX());
    167 
    168   const void* cur = armAsm->GetCursorAddress<const void*>();
    169   const void* current_code_ptr_page =
    170     reinterpret_cast<const void*>(reinterpret_cast<uintptr_t>(cur) & ~static_cast<uintptr_t>(0xFFF));
    171   const void* ptr_page =
    172     reinterpret_cast<const void*>(reinterpret_cast<uintptr_t>(addr) & ~static_cast<uintptr_t>(0xFFF));
    173   const s64 page_displacement = armGetPCDisplacement(current_code_ptr_page, ptr_page) >> 10;
    174   const u32 page_offset = static_cast<u32>(reinterpret_cast<uintptr_t>(addr) & 0xFFFu);
    175   if (vixl::IsInt21(page_displacement) && a64::Assembler::IsImmAddSub(page_offset))
    176   {
    177     armAsm->adrp(reg, page_displacement);
    178     armAsm->add(reg, reg, page_offset);
    179   }
    180   else if (vixl::IsInt21(page_displacement) && a64::Assembler::IsImmLogical(page_offset, 64))
    181   {
    182     armAsm->adrp(reg, page_displacement);
    183     armAsm->orr(reg, reg, page_offset);
    184   }
    185   else
    186   {
    187     armEmitMov(armAsm, reg, reinterpret_cast<uintptr_t>(addr));
    188   }
    189 }
    190 void CPU::Recompiler::armEmitJmp(a64::Assembler* armAsm, const void* ptr, bool force_inline)
    191 {
    192   const void* cur = armAsm->GetCursorAddress<const void*>();
    193   s64 displacement = armGetPCDisplacement(cur, ptr);
    194   bool use_blr = !vixl::IsInt26(displacement);
    195   bool use_trampoline = use_blr && !armIsInAdrpRange(armAsm, ptr);
    196   if (use_blr && use_trampoline && !force_inline)
    197   {
    198     if (u8* trampoline = armGetJumpTrampoline(ptr); trampoline)
    199     {
    200       displacement = armGetPCDisplacement(cur, trampoline);
    201       use_blr = !vixl::IsInt26(displacement);
    202     }
    203   }
    204 
    205   if (use_blr)
    206   {
    207     armMoveAddressToReg(armAsm, RXSCRATCH, ptr);
    208     armAsm->br(RXSCRATCH);
    209   }
    210   else
    211   {
    212     armAsm->b(displacement);
    213   }
    214 }
    215 
    216 void CPU::Recompiler::armEmitCall(a64::Assembler* armAsm, const void* ptr, bool force_inline)
    217 {
    218   const void* cur = armAsm->GetCursorAddress<const void*>();
    219   s64 displacement = armGetPCDisplacement(cur, ptr);
    220   bool use_blr = !vixl::IsInt26(displacement);
    221   bool use_trampoline = use_blr && !armIsInAdrpRange(armAsm, ptr);
    222   if (use_blr && use_trampoline && !force_inline)
    223   {
    224     if (u8* trampoline = armGetJumpTrampoline(ptr); trampoline)
    225     {
    226       displacement = armGetPCDisplacement(cur, trampoline);
    227       use_blr = !vixl::IsInt26(displacement);
    228     }
    229   }
    230 
    231   if (use_blr)
    232   {
    233     armMoveAddressToReg(armAsm, RXSCRATCH, ptr);
    234     armAsm->blr(RXSCRATCH);
    235   }
    236   else
    237   {
    238     armAsm->bl(displacement);
    239   }
    240 }
    241 
    242 void CPU::Recompiler::armEmitCondBranch(a64::Assembler* armAsm, a64::Condition cond, const void* ptr)
    243 {
    244   const s64 jump_distance = static_cast<s64>(reinterpret_cast<intptr_t>(ptr) -
    245                                              reinterpret_cast<intptr_t>(armAsm->GetCursorAddress<const void*>()));
    246   // pxAssert(Common::IsAligned(jump_distance, 4));
    247 
    248   if (a64::Instruction::IsValidImmPCOffset(a64::CondBranchType, jump_distance >> 2))
    249   {
    250     armAsm->b(jump_distance >> 2, cond);
    251   }
    252   else
    253   {
    254     a64::Label branch_not_taken;
    255     armAsm->b(&branch_not_taken, InvertCondition(cond));
    256 
    257     const s64 new_jump_distance = static_cast<s64>(reinterpret_cast<intptr_t>(ptr) -
    258                                                    reinterpret_cast<intptr_t>(armAsm->GetCursorAddress<const void*>()));
    259     armAsm->b(new_jump_distance >> 2);
    260     armAsm->bind(&branch_not_taken);
    261   }
    262 }
    263 
    264 void CPU::Recompiler::armEmitFarLoad(vixl::aarch64::Assembler* armAsm, const vixl::aarch64::Register& reg,
    265                                      const void* addr, bool sign_extend_word)
    266 {
    267   const void* cur = armAsm->GetCursorAddress<const void*>();
    268   const void* current_code_ptr_page =
    269     reinterpret_cast<const void*>(reinterpret_cast<uintptr_t>(cur) & ~static_cast<uintptr_t>(0xFFF));
    270   const void* ptr_page =
    271     reinterpret_cast<const void*>(reinterpret_cast<uintptr_t>(addr) & ~static_cast<uintptr_t>(0xFFF));
    272   const s64 page_displacement = armGetPCDisplacement(current_code_ptr_page, ptr_page) >> 10;
    273   const u32 page_offset = static_cast<u32>(reinterpret_cast<uintptr_t>(addr) & 0xFFFu);
    274   a64::MemOperand memop;
    275 
    276   const vixl::aarch64::Register xreg = reg.X();
    277   if (vixl::IsInt21(page_displacement))
    278   {
    279     armAsm->adrp(xreg, page_displacement);
    280     memop = vixl::aarch64::MemOperand(xreg, static_cast<int64_t>(page_offset));
    281   }
    282   else
    283   {
    284     armMoveAddressToReg(armAsm, xreg, addr);
    285     memop = vixl::aarch64::MemOperand(xreg);
    286   }
    287 
    288   if (sign_extend_word)
    289     armAsm->ldrsw(reg, memop);
    290   else
    291     armAsm->ldr(reg, memop);
    292 }
    293 
    294 void CPU::Recompiler::armEmitFarStore(vixl::aarch64::Assembler* armAsm, const vixl::aarch64::Register& reg,
    295                                       const void* addr, const vixl::aarch64::Register& tempreg)
    296 {
    297   DebugAssert(tempreg.IsX());
    298 
    299   const void* cur = armAsm->GetCursorAddress<const void*>();
    300   const void* current_code_ptr_page =
    301     reinterpret_cast<const void*>(reinterpret_cast<uintptr_t>(cur) & ~static_cast<uintptr_t>(0xFFF));
    302   const void* ptr_page =
    303     reinterpret_cast<const void*>(reinterpret_cast<uintptr_t>(addr) & ~static_cast<uintptr_t>(0xFFF));
    304   const s64 page_displacement = armGetPCDisplacement(current_code_ptr_page, ptr_page) >> 10;
    305   const u32 page_offset = static_cast<u32>(reinterpret_cast<uintptr_t>(addr) & 0xFFFu);
    306 
    307   if (vixl::IsInt21(page_displacement))
    308   {
    309     armAsm->adrp(tempreg, page_displacement);
    310     armAsm->str(reg, vixl::aarch64::MemOperand(tempreg, static_cast<int64_t>(page_offset)));
    311   }
    312   else
    313   {
    314     armMoveAddressToReg(armAsm, tempreg, addr);
    315     armAsm->str(reg, vixl::aarch64::MemOperand(tempreg));
    316   }
    317 }
    318 
    319 u8* CPU::Recompiler::armGetJumpTrampoline(const void* target)
    320 {
    321   auto it = s_trampoline_targets.find(target);
    322   if (it != s_trampoline_targets.end())
    323     return s_trampoline_start_ptr + it->second;
    324 
    325   // align to 16 bytes?
    326   const u32 offset = s_trampoline_used; // Common::AlignUpPow2(s_trampoline_used, 16);
    327 
    328   // 4 movs plus a jump
    329   if (TRAMPOLINE_AREA_SIZE - offset < 20)
    330   {
    331     Panic("Ran out of space in constant pool");
    332     return nullptr;
    333   }
    334 
    335   u8* start = s_trampoline_start_ptr + offset;
    336   a64::Assembler armAsm(start, TRAMPOLINE_AREA_SIZE - offset);
    337 #ifdef VIXL_DEBUG
    338   vixl::CodeBufferCheckScope armAsmCheck(&armAsm, TRAMPOLINE_AREA_SIZE - offset,
    339                                          vixl::CodeBufferCheckScope::kDontReserveBufferSpace);
    340 #endif
    341   armMoveAddressToReg(&armAsm, RXSCRATCH, target);
    342   armAsm.br(RXSCRATCH);
    343   armAsm.FinalizeCode();
    344 
    345   const u32 size = static_cast<u32>(armAsm.GetSizeOfCodeGenerated());
    346   DebugAssert(size < 20);
    347   s_trampoline_targets.emplace(target, offset);
    348   s_trampoline_used = offset + static_cast<u32>(size);
    349 
    350   MemMap::FlushInstructionCache(start, size);
    351   return start;
    352 }
    353 
    354 void CPU::CodeCache::DisassembleAndLogHostCode(const void* start, u32 size)
    355 {
    356 #ifdef ENABLE_HOST_DISASSEMBLY
    357   class MyDisassembler : public a64::Disassembler
    358   {
    359   protected:
    360     void ProcessOutput(const a64::Instruction* instr) override
    361     {
    362       DEBUG_LOG("0x{:016X}  {:08X}\t\t{}", reinterpret_cast<uint64_t>(instr), instr->GetInstructionBits(), GetOutput());
    363     }
    364   };
    365 
    366   a64::Decoder decoder;
    367   MyDisassembler disas;
    368   decoder.AppendVisitor(&disas);
    369   decoder.Decode(static_cast<const a64::Instruction*>(start),
    370                  reinterpret_cast<const a64::Instruction*>(static_cast<const u8*>(start) + size));
    371 #else
    372   ERROR_LOG("Not compiled with ENABLE_HOST_DISASSEMBLY.");
    373 #endif
    374 }
    375 
    376 u32 CPU::CodeCache::GetHostInstructionCount(const void* start, u32 size)
    377 {
    378   return size / a64::kInstructionSize;
    379 }
    380 
    381 u32 CPU::CodeCache::EmitJump(void* code, const void* dst, bool flush_icache)
    382 {
    383   using namespace a64;
    384   using namespace CPU::Recompiler;
    385 
    386   const s64 disp = armGetPCDisplacement(code, dst);
    387   DebugAssert(vixl::IsInt26(disp));
    388 
    389   const u32 new_code = B | Assembler::ImmUncondBranch(disp);
    390   std::memcpy(code, &new_code, sizeof(new_code));
    391   if (flush_icache)
    392     MemMap::FlushInstructionCache(code, kInstructionSize);
    393 
    394   return kInstructionSize;
    395 }
    396 
    397 u32 CPU::CodeCache::EmitASMFunctions(void* code, u32 code_size)
    398 {
    399   using namespace vixl::aarch64;
    400   using namespace CPU::Recompiler;
    401 
    402 #define PTR(x) a64::MemOperand(RSTATE, (s64)(((u8*)(x)) - ((u8*)&g_state)))
    403 
    404   Assembler actual_asm(static_cast<u8*>(code), code_size);
    405   Assembler* armAsm = &actual_asm;
    406 
    407 #ifdef VIXL_DEBUG
    408   vixl::CodeBufferCheckScope asm_check(armAsm, code_size, vixl::CodeBufferCheckScope::kDontReserveBufferSpace);
    409 #endif
    410 
    411   Label dispatch;
    412 
    413   g_enter_recompiler = armAsm->GetCursorAddress<decltype(g_enter_recompiler)>();
    414   {
    415     // reserve some space for saving caller-saved registers
    416     armAsm->sub(sp, sp, CPU::Recompiler::FUNCTION_STACK_SIZE);
    417 
    418     // Need the CPU state for basically everything :-)
    419     armMoveAddressToReg(armAsm, RSTATE, &g_state);
    420 
    421     // Fastmem setup, oldrec doesn't need it
    422     if (IsUsingFastmem() && g_settings.cpu_execution_mode != CPUExecutionMode::Recompiler)
    423       armAsm->ldr(RMEMBASE, PTR(&g_state.fastmem_base));
    424 
    425     // Fall through to event dispatcher
    426   }
    427 
    428   // check events then for frame done
    429   g_check_events_and_dispatch = armAsm->GetCursorAddress<const void*>();
    430   {
    431     Label skip_event_check;
    432     armAsm->ldr(RWARG1, PTR(&g_state.pending_ticks));
    433     armAsm->ldr(RWARG2, PTR(&g_state.downcount));
    434     armAsm->cmp(RWARG1, RWARG2);
    435     armAsm->b(&skip_event_check, lt);
    436 
    437     g_run_events_and_dispatch = armAsm->GetCursorAddress<const void*>();
    438     armEmitCall(armAsm, reinterpret_cast<const void*>(&TimingEvents::RunEvents), true);
    439 
    440     armAsm->bind(&skip_event_check);
    441   }
    442 
    443   // TODO: align?
    444   g_dispatcher = armAsm->GetCursorAddress<const void*>();
    445   {
    446     armAsm->bind(&dispatch);
    447 
    448     // x9 <- s_fast_map[pc >> 16]
    449     armAsm->ldr(RWARG1, PTR(&g_state.pc));
    450     armMoveAddressToReg(armAsm, RXARG3, g_code_lut.data());
    451     armAsm->lsr(RWARG2, RWARG1, 16);
    452     armAsm->lsr(RWARG1, RWARG1, 2);
    453     armAsm->ldr(RXARG2, MemOperand(RXARG3, RXARG2, LSL, 3));
    454 
    455     // blr(x9[pc * 2]) (fast_map[pc >> 2])
    456     armAsm->ldr(RXARG1, MemOperand(RXARG2, RXARG1, LSL, 3));
    457     armAsm->blr(RXARG1);
    458   }
    459 
    460   g_compile_or_revalidate_block = armAsm->GetCursorAddress<const void*>();
    461   {
    462     armAsm->ldr(RWARG1, PTR(&g_state.pc));
    463     armEmitCall(armAsm, reinterpret_cast<const void*>(&CompileOrRevalidateBlock), true);
    464     armAsm->b(&dispatch);
    465   }
    466 
    467   g_discard_and_recompile_block = armAsm->GetCursorAddress<const void*>();
    468   {
    469     armAsm->ldr(RWARG1, PTR(&g_state.pc));
    470     armEmitCall(armAsm, reinterpret_cast<const void*>(&DiscardAndRecompileBlock), true);
    471     armAsm->b(&dispatch);
    472   }
    473 
    474   g_interpret_block = armAsm->GetCursorAddress<const void*>();
    475   {
    476     armEmitCall(armAsm, reinterpret_cast<const void*>(GetInterpretUncachedBlockFunction()), true);
    477     armAsm->b(&dispatch);
    478   }
    479 
    480   armAsm->FinalizeCode();
    481 
    482   // TODO: align?
    483   s_trampoline_targets.clear();
    484   s_trampoline_start_ptr = static_cast<u8*>(code) + armAsm->GetCursorOffset();
    485   s_trampoline_used = 0;
    486 
    487 #undef PTR
    488   return static_cast<u32>(armAsm->GetCursorOffset()) + TRAMPOLINE_AREA_SIZE;
    489 }
    490 
    491 namespace CPU::Recompiler {
    492 
    493 constexpr HostReg RCPUPTR = 19;
    494 constexpr HostReg RMEMBASEPTR = 20;
    495 constexpr HostReg RRETURN = 0;
    496 constexpr HostReg RARG1 = 0;
    497 constexpr HostReg RARG2 = 1;
    498 constexpr HostReg RARG3 = 2;
    499 constexpr HostReg RARG4 = 3;
    500 constexpr HostReg RSCRATCH = 8;
    501 
    502 static const a64::WRegister GetHostReg8(HostReg reg)
    503 {
    504   return a64::WRegister(reg);
    505 }
    506 
    507 static const a64::WRegister GetHostReg8(const Value& value)
    508 {
    509   DebugAssert(value.size == RegSize_8 && value.IsInHostRegister());
    510   return a64::WRegister(value.host_reg);
    511 }
    512 
    513 static const a64::WRegister GetHostReg16(HostReg reg)
    514 {
    515   return a64::WRegister(reg);
    516 }
    517 
    518 static const a64::WRegister GetHostReg16(const Value& value)
    519 {
    520   DebugAssert(value.size == RegSize_16 && value.IsInHostRegister());
    521   return a64::WRegister(value.host_reg);
    522 }
    523 
    524 static const a64::WRegister GetHostReg32(HostReg reg)
    525 {
    526   return a64::WRegister(reg);
    527 }
    528 
    529 static const a64::WRegister GetHostReg32(const Value& value)
    530 {
    531   DebugAssert(value.size == RegSize_32 && value.IsInHostRegister());
    532   return a64::WRegister(value.host_reg);
    533 }
    534 
    535 static const a64::XRegister GetHostReg64(HostReg reg)
    536 {
    537   return a64::XRegister(reg);
    538 }
    539 
    540 static const a64::XRegister GetHostReg64(const Value& value)
    541 {
    542   DebugAssert(value.size == RegSize_64 && value.IsInHostRegister());
    543   return a64::XRegister(value.host_reg);
    544 }
    545 
    546 static const a64::XRegister GetCPUPtrReg()
    547 {
    548   return GetHostReg64(RCPUPTR);
    549 }
    550 
    551 static const a64::XRegister GetFastmemBasePtrReg()
    552 {
    553   return GetHostReg64(RMEMBASEPTR);
    554 }
    555 
    556 CodeGenerator::CodeGenerator()
    557   : m_register_cache(*this), m_near_emitter(static_cast<vixl::byte*>(CPU::CodeCache::GetFreeCodePointer()),
    558                                             CPU::CodeCache::GetFreeCodeSpace(), a64::PositionDependentCode),
    559     m_far_emitter(static_cast<vixl::byte*>(CPU::CodeCache::GetFreeFarCodePointer()),
    560                   CPU::CodeCache::GetFreeFarCodeSpace(), a64::PositionDependentCode),
    561     m_emit(&m_near_emitter)
    562 {
    563   // remove the temporaries from vixl's list to prevent it from using them.
    564   // eventually we won't use the macro assembler and this won't be a problem...
    565   m_near_emitter.GetScratchRegisterList()->Remove(16);
    566   m_near_emitter.GetScratchRegisterList()->Remove(17);
    567   m_far_emitter.GetScratchRegisterList()->Remove(16);
    568   m_far_emitter.GetScratchRegisterList()->Remove(17);
    569   InitHostRegs();
    570 }
    571 
    572 CodeGenerator::~CodeGenerator() = default;
    573 
    574 const char* CodeGenerator::GetHostRegName(HostReg reg, RegSize size /*= HostPointerSize*/)
    575 {
    576   static constexpr std::array<const char*, HostReg_Count> reg32_names = {
    577     {"w0",  "w1",  "w2",  "w3",  "w4",  "w5",  "w6",  "w7",  "w8",  "w9",  "w10", "w11", "w12", "w13", "w14", "w15",
    578      "w16", "w17", "w18", "w19", "w20", "w21", "w22", "w23", "w24", "w25", "w26", "w27", "w28", "w29", "w30", "w31"}};
    579   static constexpr std::array<const char*, HostReg_Count> reg64_names = {
    580     {"x0",  "x1",  "x2",  "x3",  "x4",  "x5",  "x6",  "x7",  "x8",  "x9",  "x10", "x11", "x12", "x13", "x14", "x15",
    581      "x16", "x17", "x18", "x19", "x20", "x21", "x22", "x23", "x24", "x25", "x26", "x27", "x28", "x29", "x30", "x31"}};
    582   if (reg >= static_cast<HostReg>(HostReg_Count))
    583     return "";
    584 
    585   switch (size)
    586   {
    587     case RegSize_32:
    588       return reg32_names[reg];
    589     case RegSize_64:
    590       return reg64_names[reg];
    591     default:
    592       return "";
    593   }
    594 }
    595 
    596 void CodeGenerator::InitHostRegs()
    597 {
    598   // TODO: function calls mess up the parameter registers if we use them.. fix it
    599   // allocate nonvolatile before volatile
    600   m_register_cache.SetHostRegAllocationOrder(
    601     {19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 4, 5, 6, 7, 9, 10, 11, 12, 13, 14, 15, 16, 17});
    602   m_register_cache.SetCallerSavedHostRegs({0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17});
    603   m_register_cache.SetCalleeSavedHostRegs({19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 30});
    604   m_register_cache.SetCPUPtrHostReg(RCPUPTR);
    605 }
    606 
    607 void CodeGenerator::SwitchToFarCode()
    608 {
    609   m_emit = &m_far_emitter;
    610 }
    611 
    612 void CodeGenerator::SwitchToNearCode()
    613 {
    614   m_emit = &m_near_emitter;
    615 }
    616 
    617 void* CodeGenerator::GetStartNearCodePointer() const
    618 {
    619   return static_cast<u8*>(CPU::CodeCache::GetFreeCodePointer());
    620 }
    621 
    622 void* CodeGenerator::GetCurrentNearCodePointer() const
    623 {
    624   return static_cast<u8*>(CPU::CodeCache::GetFreeCodePointer()) + m_near_emitter.GetCursorOffset();
    625 }
    626 
    627 void* CodeGenerator::GetCurrentFarCodePointer() const
    628 {
    629   return static_cast<u8*>(CPU::CodeCache::GetFreeFarCodePointer()) + m_far_emitter.GetCursorOffset();
    630 }
    631 
    632 Value CodeGenerator::GetValueInHostRegister(const Value& value, bool allow_zero_register /* = true */)
    633 {
    634   if (value.IsInHostRegister())
    635     return Value::FromHostReg(&m_register_cache, value.host_reg, value.size);
    636 
    637   if (value.HasConstantValue(0) && allow_zero_register)
    638     return Value::FromHostReg(&m_register_cache, static_cast<HostReg>(31), value.size);
    639 
    640   Value new_value = m_register_cache.AllocateScratch(value.size);
    641   EmitCopyValue(new_value.host_reg, value);
    642   return new_value;
    643 }
    644 
    645 Value CodeGenerator::GetValueInHostOrScratchRegister(const Value& value, bool allow_zero_register /* = true */)
    646 {
    647   if (value.IsInHostRegister())
    648     return Value::FromHostReg(&m_register_cache, value.host_reg, value.size);
    649 
    650   if (value.HasConstantValue(0) && allow_zero_register)
    651     return Value::FromHostReg(&m_register_cache, static_cast<HostReg>(31), value.size);
    652 
    653   Value new_value = Value::FromHostReg(&m_register_cache, RSCRATCH, value.size);
    654   EmitCopyValue(new_value.host_reg, value);
    655   return new_value;
    656 }
    657 
    658 void CodeGenerator::EmitBeginBlock(bool allocate_registers /* = true */)
    659 {
    660   if (allocate_registers)
    661   {
    662     // Save the link register, since we'll be calling functions.
    663     const bool link_reg_allocated = m_register_cache.AllocateHostReg(30);
    664     DebugAssert(link_reg_allocated);
    665     UNREFERENCED_VARIABLE(link_reg_allocated);
    666 
    667     m_register_cache.AssumeCalleeSavedRegistersAreSaved();
    668 
    669     // Store the CPU struct pointer. TODO: make this better.
    670     const bool cpu_reg_allocated = m_register_cache.AllocateHostReg(RCPUPTR);
    671     DebugAssert(cpu_reg_allocated);
    672     UNREFERENCED_VARIABLE(cpu_reg_allocated);
    673 
    674     // If there's loadstore instructions, preload the fastmem base.
    675     if (m_block->HasFlag(CodeCache::BlockFlags::ContainsLoadStoreInstructions))
    676     {
    677       const bool fastmem_reg_allocated = m_register_cache.AllocateHostReg(RMEMBASEPTR);
    678       Assert(fastmem_reg_allocated);
    679       m_emit->Ldr(GetFastmemBasePtrReg(), a64::MemOperand(GetCPUPtrReg(), OFFSETOF(State, fastmem_base)));
    680     }
    681   }
    682 }
    683 
    684 void CodeGenerator::EmitEndBlock(bool free_registers, const void* jump_to)
    685 {
    686   if (free_registers)
    687   {
    688     if (m_block->HasFlag(CodeCache::BlockFlags::ContainsLoadStoreInstructions))
    689       m_register_cache.FreeHostReg(RMEMBASEPTR);
    690 
    691     m_register_cache.FreeHostReg(RCPUPTR);
    692     m_register_cache.FreeHostReg(30); // lr
    693 
    694     m_register_cache.PopCalleeSavedRegisters(true);
    695   }
    696 
    697   if (jump_to)
    698     armEmitJmp(m_emit, jump_to, true);
    699 }
    700 
    701 void CodeGenerator::EmitExceptionExit()
    702 {
    703   // ensure all unflushed registers are written back
    704   m_register_cache.FlushAllGuestRegisters(false, false);
    705 
    706   // the interpreter load delay might have its own value, but we'll overwrite it here anyway
    707   // technically RaiseException() and FlushPipeline() have already been called, but that should be okay
    708   m_register_cache.FlushLoadDelay(false);
    709 
    710   m_register_cache.PopCalleeSavedRegisters(false);
    711 
    712   armEmitJmp(m_emit, CodeCache::g_check_events_and_dispatch, true);
    713 }
    714 
    715 void CodeGenerator::EmitExceptionExitOnBool(const Value& value)
    716 {
    717   Assert(!value.IsConstant() && value.IsInHostRegister());
    718 
    719   m_register_cache.PushState();
    720 
    721   // TODO: This is... not great.
    722   a64::Label skip_branch;
    723   m_emit->Cbz(GetHostReg64(value.host_reg), &skip_branch);
    724   EmitBranch(GetCurrentFarCodePointer());
    725   m_emit->Bind(&skip_branch);
    726 
    727   SwitchToFarCode();
    728   EmitExceptionExit();
    729   SwitchToNearCode();
    730 
    731   m_register_cache.PopState();
    732 }
    733 
    734 const void* CodeGenerator::FinalizeBlock(u32* out_host_code_size, u32* out_host_far_code_size)
    735 {
    736   m_near_emitter.FinalizeCode();
    737   m_far_emitter.FinalizeCode();
    738 
    739   const void* code = CPU::CodeCache::GetFreeCodePointer();
    740   *out_host_code_size = static_cast<u32>(m_near_emitter.GetSizeOfCodeGenerated());
    741   *out_host_far_code_size = static_cast<u32>(m_far_emitter.GetSizeOfCodeGenerated());
    742 
    743   CPU::CodeCache::CommitCode(static_cast<u32>(m_near_emitter.GetSizeOfCodeGenerated()));
    744   CPU::CodeCache::CommitFarCode(static_cast<u32>(m_far_emitter.GetSizeOfCodeGenerated()));
    745 
    746   m_near_emitter.Reset();
    747   m_far_emitter.Reset();
    748 
    749   return code;
    750 }
    751 
    752 void CodeGenerator::EmitSignExtend(HostReg to_reg, RegSize to_size, HostReg from_reg, RegSize from_size)
    753 {
    754   switch (to_size)
    755   {
    756     case RegSize_16:
    757     {
    758       switch (from_size)
    759       {
    760         case RegSize_8:
    761           m_emit->sxtb(GetHostReg16(to_reg), GetHostReg8(from_reg));
    762           m_emit->and_(GetHostReg16(to_reg), GetHostReg16(to_reg), 0xFFFF);
    763           return;
    764 
    765         default:
    766           break;
    767       }
    768     }
    769     break;
    770 
    771     case RegSize_32:
    772     {
    773       switch (from_size)
    774       {
    775         case RegSize_8:
    776           m_emit->sxtb(GetHostReg32(to_reg), GetHostReg8(from_reg));
    777           return;
    778         case RegSize_16:
    779           m_emit->sxth(GetHostReg32(to_reg), GetHostReg16(from_reg));
    780           return;
    781 
    782         default:
    783           break;
    784       }
    785     }
    786     break;
    787 
    788     default:
    789       break;
    790   }
    791 
    792   Panic("Unknown sign-extend combination");
    793 }
    794 
    795 void CodeGenerator::EmitZeroExtend(HostReg to_reg, RegSize to_size, HostReg from_reg, RegSize from_size)
    796 {
    797   switch (to_size)
    798   {
    799     case RegSize_16:
    800     {
    801       switch (from_size)
    802       {
    803         case RegSize_8:
    804           m_emit->and_(GetHostReg16(to_reg), GetHostReg8(from_reg), 0xFF);
    805           return;
    806 
    807         default:
    808           break;
    809       }
    810     }
    811     break;
    812 
    813     case RegSize_32:
    814     {
    815       switch (from_size)
    816       {
    817         case RegSize_8:
    818           m_emit->and_(GetHostReg32(to_reg), GetHostReg8(from_reg), 0xFF);
    819           return;
    820         case RegSize_16:
    821           m_emit->and_(GetHostReg32(to_reg), GetHostReg16(from_reg), 0xFFFF);
    822           return;
    823 
    824         default:
    825           break;
    826       }
    827     }
    828     break;
    829 
    830     default:
    831       break;
    832   }
    833 
    834   Panic("Unknown sign-extend combination");
    835 }
    836 
    837 void CodeGenerator::EmitCopyValue(HostReg to_reg, const Value& value)
    838 {
    839   // TODO: mov x, 0 -> xor x, x
    840   DebugAssert(value.IsConstant() || value.IsInHostRegister());
    841 
    842   switch (value.size)
    843   {
    844     case RegSize_8:
    845     case RegSize_16:
    846     case RegSize_32:
    847     {
    848       if (value.IsConstant())
    849         m_emit->Mov(GetHostReg32(to_reg), value.constant_value);
    850       else
    851         m_emit->Mov(GetHostReg32(to_reg), GetHostReg32(value.host_reg));
    852     }
    853     break;
    854 
    855     case RegSize_64:
    856     {
    857       if (value.IsConstant())
    858         m_emit->Mov(GetHostReg64(to_reg), value.constant_value);
    859       else
    860         m_emit->Mov(GetHostReg64(to_reg), GetHostReg64(value.host_reg));
    861     }
    862     break;
    863 
    864     default:
    865       UnreachableCode();
    866       break;
    867   }
    868 }
    869 
    870 void CodeGenerator::EmitAdd(HostReg to_reg, HostReg from_reg, const Value& value, bool set_flags)
    871 {
    872   Assert(value.IsConstant() || value.IsInHostRegister());
    873 
    874   // if it's in a host register already, this is easy
    875   if (value.IsInHostRegister())
    876   {
    877     if (value.size < RegSize_64)
    878     {
    879       if (set_flags)
    880         m_emit->adds(GetHostReg32(to_reg), GetHostReg32(from_reg), GetHostReg32(value.host_reg));
    881       else
    882         m_emit->add(GetHostReg32(to_reg), GetHostReg32(from_reg), GetHostReg32(value.host_reg));
    883     }
    884     else
    885     {
    886       if (set_flags)
    887         m_emit->adds(GetHostReg64(to_reg), GetHostReg64(from_reg), GetHostReg64(value.host_reg));
    888       else
    889         m_emit->add(GetHostReg64(to_reg), GetHostReg64(from_reg), GetHostReg64(value.host_reg));
    890     }
    891 
    892     return;
    893   }
    894 
    895   // do we need temporary storage for the constant, if it won't fit in an immediate?
    896   const s64 constant_value = value.GetS64ConstantValue();
    897   if (a64::Assembler::IsImmAddSub(constant_value))
    898   {
    899     if (value.size < RegSize_64)
    900     {
    901       if (set_flags)
    902         m_emit->adds(GetHostReg32(to_reg), GetHostReg32(from_reg), constant_value);
    903       else
    904         m_emit->add(GetHostReg32(to_reg), GetHostReg32(from_reg), constant_value);
    905     }
    906     else
    907     {
    908       if (set_flags)
    909         m_emit->adds(GetHostReg64(to_reg), GetHostReg64(from_reg), constant_value);
    910       else
    911         m_emit->add(GetHostReg64(to_reg), GetHostReg64(from_reg), constant_value);
    912     }
    913 
    914     return;
    915   }
    916 
    917   // need a temporary
    918   Assert(from_reg != RSCRATCH);
    919   Value temp_value(Value::FromHostReg(&m_register_cache, RSCRATCH, value.size));
    920   if (value.size < RegSize_64)
    921     m_emit->Mov(GetHostReg32(temp_value.host_reg), constant_value);
    922   else
    923     m_emit->Mov(GetHostReg64(temp_value.host_reg), constant_value);
    924   EmitAdd(to_reg, from_reg, temp_value, set_flags);
    925 }
    926 
    927 void CodeGenerator::EmitSub(HostReg to_reg, HostReg from_reg, const Value& value, bool set_flags)
    928 {
    929   Assert(value.IsConstant() || value.IsInHostRegister());
    930 
    931   // if it's in a host register already, this is easy
    932   if (value.IsInHostRegister())
    933   {
    934     if (value.size < RegSize_64)
    935     {
    936       if (set_flags)
    937         m_emit->subs(GetHostReg32(to_reg), GetHostReg32(from_reg), GetHostReg32(value.host_reg));
    938       else
    939         m_emit->sub(GetHostReg32(to_reg), GetHostReg32(from_reg), GetHostReg32(value.host_reg));
    940     }
    941     else
    942     {
    943       if (set_flags)
    944         m_emit->subs(GetHostReg64(to_reg), GetHostReg64(from_reg), GetHostReg64(value.host_reg));
    945       else
    946         m_emit->sub(GetHostReg64(to_reg), GetHostReg64(from_reg), GetHostReg64(value.host_reg));
    947     }
    948 
    949     return;
    950   }
    951 
    952   // do we need temporary storage for the constant, if it won't fit in an immediate?
    953   const s64 constant_value = value.GetS64ConstantValue();
    954   if (a64::Assembler::IsImmAddSub(value.constant_value))
    955   {
    956     if (value.size < RegSize_64)
    957     {
    958       if (set_flags)
    959         m_emit->subs(GetHostReg32(to_reg), GetHostReg32(from_reg), constant_value);
    960       else
    961         m_emit->sub(GetHostReg32(to_reg), GetHostReg32(from_reg), constant_value);
    962     }
    963     else
    964     {
    965       if (set_flags)
    966         m_emit->subs(GetHostReg64(to_reg), GetHostReg64(from_reg), constant_value);
    967       else
    968         m_emit->sub(GetHostReg64(to_reg), GetHostReg64(from_reg), constant_value);
    969     }
    970 
    971     return;
    972   }
    973 
    974   // need a temporary
    975   Assert(from_reg != RSCRATCH);
    976   Value temp_value(Value::FromHostReg(&m_register_cache, RSCRATCH, value.size));
    977   if (value.size < RegSize_64)
    978     m_emit->Mov(GetHostReg32(temp_value.host_reg), constant_value);
    979   else
    980     m_emit->Mov(GetHostReg64(temp_value.host_reg), constant_value);
    981   EmitSub(to_reg, from_reg, temp_value, set_flags);
    982 }
    983 
    984 void CodeGenerator::EmitCmp(HostReg to_reg, const Value& value)
    985 {
    986   Assert(value.IsConstant() || value.IsInHostRegister());
    987 
    988   // if it's in a host register already, this is easy
    989   if (value.IsInHostRegister())
    990   {
    991     if (value.size < RegSize_64)
    992       m_emit->cmp(GetHostReg32(to_reg), GetHostReg32(value.host_reg));
    993     else
    994       m_emit->cmp(GetHostReg64(to_reg), GetHostReg64(value.host_reg));
    995 
    996     return;
    997   }
    998 
    999   // do we need temporary storage for the constant, if it won't fit in an immediate?
   1000   const s64 constant_value = value.GetS64ConstantValue();
   1001   if (constant_value >= 0)
   1002   {
   1003     if (a64::Assembler::IsImmAddSub(constant_value))
   1004     {
   1005       if (value.size < RegSize_64)
   1006         m_emit->cmp(GetHostReg32(to_reg), constant_value);
   1007       else
   1008         m_emit->cmp(GetHostReg64(to_reg), constant_value);
   1009 
   1010       return;
   1011     }
   1012   }
   1013   else
   1014   {
   1015     if (a64::Assembler::IsImmAddSub(-constant_value))
   1016     {
   1017       if (value.size < RegSize_64)
   1018         m_emit->cmn(GetHostReg32(to_reg), -constant_value);
   1019       else
   1020         m_emit->cmn(GetHostReg64(to_reg), -constant_value);
   1021 
   1022       return;
   1023     }
   1024   }
   1025 
   1026   // need a temporary
   1027   Assert(to_reg != RSCRATCH);
   1028   Value temp_value(Value::FromHostReg(&m_register_cache, RSCRATCH, value.size));
   1029   if (value.size < RegSize_64)
   1030     m_emit->Mov(GetHostReg32(temp_value.host_reg), constant_value);
   1031   else
   1032     m_emit->Mov(GetHostReg64(temp_value.host_reg), constant_value);
   1033   EmitCmp(to_reg, temp_value);
   1034 }
   1035 
   1036 void CodeGenerator::EmitMul(HostReg to_reg_hi, HostReg to_reg_lo, const Value& lhs, const Value& rhs,
   1037                             bool signed_multiply)
   1038 {
   1039   Value lhs_in_reg = GetValueInHostRegister(lhs);
   1040   Value rhs_in_reg = GetValueInHostRegister(rhs);
   1041 
   1042   if (lhs.size < RegSize_64)
   1043   {
   1044     if (signed_multiply)
   1045     {
   1046       m_emit->smull(GetHostReg64(to_reg_lo), GetHostReg32(lhs_in_reg.host_reg), GetHostReg32(rhs_in_reg.host_reg));
   1047       m_emit->asr(GetHostReg64(to_reg_hi), GetHostReg64(to_reg_lo), 32);
   1048     }
   1049     else
   1050     {
   1051       m_emit->umull(GetHostReg64(to_reg_lo), GetHostReg32(lhs_in_reg.host_reg), GetHostReg32(rhs_in_reg.host_reg));
   1052       m_emit->lsr(GetHostReg64(to_reg_hi), GetHostReg64(to_reg_lo), 32);
   1053     }
   1054   }
   1055   else
   1056   {
   1057     // TODO: Use mul + smulh
   1058     Panic("Not implemented");
   1059   }
   1060 }
   1061 
   1062 void CodeGenerator::EmitDiv(HostReg to_reg_quotient, HostReg to_reg_remainder, HostReg num, HostReg denom, RegSize size,
   1063                             bool signed_divide)
   1064 {
   1065   // only 32-bit supported for now..
   1066   Assert(size == RegSize_32);
   1067 
   1068   Value quotient_value;
   1069   if (to_reg_quotient == HostReg_Count)
   1070   {
   1071     Assert(to_reg_quotient != RSCRATCH);
   1072     quotient_value = Value::FromHostReg(&m_register_cache, RSCRATCH, size);
   1073   }
   1074   else
   1075   {
   1076     quotient_value.SetHostReg(&m_register_cache, to_reg_quotient, size);
   1077   }
   1078 
   1079   if (signed_divide)
   1080   {
   1081     m_emit->sdiv(GetHostReg32(quotient_value), GetHostReg32(num), GetHostReg32(denom));
   1082     if (to_reg_remainder != HostReg_Count)
   1083     {
   1084       m_emit->msub(GetHostReg32(to_reg_remainder), GetHostReg32(quotient_value), GetHostReg32(denom),
   1085                    GetHostReg32(num));
   1086     }
   1087   }
   1088   else
   1089   {
   1090     m_emit->udiv(GetHostReg32(quotient_value), GetHostReg32(num), GetHostReg32(denom));
   1091     if (to_reg_remainder != HostReg_Count)
   1092     {
   1093       m_emit->msub(GetHostReg32(to_reg_remainder), GetHostReg32(quotient_value), GetHostReg32(denom),
   1094                    GetHostReg32(num));
   1095     }
   1096   }
   1097 }
   1098 
   1099 void CodeGenerator::EmitInc(HostReg to_reg, RegSize size)
   1100 {
   1101   Panic("Not implemented");
   1102 #if 0
   1103   switch (size)
   1104   {
   1105     case RegSize_8:
   1106       m_emit->inc(GetHostReg8(to_reg));
   1107       break;
   1108     case RegSize_16:
   1109       m_emit->inc(GetHostReg16(to_reg));
   1110       break;
   1111     case RegSize_32:
   1112       m_emit->inc(GetHostReg32(to_reg));
   1113       break;
   1114     default:
   1115       UnreachableCode();
   1116       break;
   1117   }
   1118 #endif
   1119 }
   1120 
   1121 void CodeGenerator::EmitDec(HostReg to_reg, RegSize size)
   1122 {
   1123   Panic("Not implemented");
   1124 #if 0
   1125   switch (size)
   1126   {
   1127     case RegSize_8:
   1128       m_emit->dec(GetHostReg8(to_reg));
   1129       break;
   1130     case RegSize_16:
   1131       m_emit->dec(GetHostReg16(to_reg));
   1132       break;
   1133     case RegSize_32:
   1134       m_emit->dec(GetHostReg32(to_reg));
   1135       break;
   1136     default:
   1137       UnreachableCode();
   1138       break;
   1139   }
   1140 #endif
   1141 }
   1142 
   1143 void CodeGenerator::EmitShl(HostReg to_reg, HostReg from_reg, RegSize size, const Value& amount_value,
   1144                             bool assume_amount_masked /* = true */)
   1145 {
   1146   switch (size)
   1147   {
   1148     case RegSize_8:
   1149     case RegSize_16:
   1150     case RegSize_32:
   1151     {
   1152       if (amount_value.IsConstant())
   1153         m_emit->lsl(GetHostReg32(to_reg), GetHostReg32(from_reg), amount_value.constant_value & 0x1F);
   1154       else
   1155         m_emit->lslv(GetHostReg32(to_reg), GetHostReg32(from_reg), GetHostReg32(amount_value));
   1156 
   1157       if (size == RegSize_8)
   1158         m_emit->and_(GetHostReg32(to_reg), GetHostReg32(from_reg), 0xFF);
   1159       else if (size == RegSize_16)
   1160         m_emit->and_(GetHostReg32(to_reg), GetHostReg32(from_reg), 0xFFFF);
   1161     }
   1162     break;
   1163 
   1164     case RegSize_64:
   1165     {
   1166       if (amount_value.IsConstant())
   1167         m_emit->lsl(GetHostReg64(to_reg), GetHostReg64(from_reg), amount_value.constant_value & 0x3F);
   1168       else
   1169         m_emit->lslv(GetHostReg64(to_reg), GetHostReg64(from_reg), GetHostReg64(amount_value));
   1170     }
   1171     break;
   1172   }
   1173 }
   1174 
   1175 void CodeGenerator::EmitShr(HostReg to_reg, HostReg from_reg, RegSize size, const Value& amount_value,
   1176                             bool assume_amount_masked /* = true */)
   1177 {
   1178   switch (size)
   1179   {
   1180     case RegSize_8:
   1181     case RegSize_16:
   1182     case RegSize_32:
   1183     {
   1184       if (amount_value.IsConstant())
   1185         m_emit->lsr(GetHostReg32(to_reg), GetHostReg32(from_reg), amount_value.constant_value & 0x1F);
   1186       else
   1187         m_emit->lsrv(GetHostReg32(to_reg), GetHostReg32(from_reg), GetHostReg32(amount_value));
   1188 
   1189       if (size == RegSize_8)
   1190         m_emit->and_(GetHostReg32(to_reg), GetHostReg32(from_reg), 0xFF);
   1191       else if (size == RegSize_16)
   1192         m_emit->and_(GetHostReg32(to_reg), GetHostReg32(from_reg), 0xFFFF);
   1193     }
   1194     break;
   1195 
   1196     case RegSize_64:
   1197     {
   1198       if (amount_value.IsConstant())
   1199         m_emit->lsr(GetHostReg64(to_reg), GetHostReg64(to_reg), amount_value.constant_value & 0x3F);
   1200       else
   1201         m_emit->lsrv(GetHostReg64(to_reg), GetHostReg64(to_reg), GetHostReg64(amount_value));
   1202     }
   1203     break;
   1204   }
   1205 }
   1206 
   1207 void CodeGenerator::EmitSar(HostReg to_reg, HostReg from_reg, RegSize size, const Value& amount_value,
   1208                             bool assume_amount_masked /* = true */)
   1209 {
   1210   switch (size)
   1211   {
   1212     case RegSize_8:
   1213     case RegSize_16:
   1214     case RegSize_32:
   1215     {
   1216       if (amount_value.IsConstant())
   1217         m_emit->asr(GetHostReg32(to_reg), GetHostReg32(from_reg), amount_value.constant_value & 0x1F);
   1218       else
   1219         m_emit->asrv(GetHostReg32(to_reg), GetHostReg32(from_reg), GetHostReg32(amount_value));
   1220 
   1221       if (size == RegSize_8)
   1222         m_emit->and_(GetHostReg32(to_reg), GetHostReg32(from_reg), 0xFF);
   1223       else if (size == RegSize_16)
   1224         m_emit->and_(GetHostReg32(to_reg), GetHostReg32(from_reg), 0xFFFF);
   1225     }
   1226     break;
   1227 
   1228     case RegSize_64:
   1229     {
   1230       if (amount_value.IsConstant())
   1231         m_emit->asr(GetHostReg64(to_reg), GetHostReg64(from_reg), amount_value.constant_value & 0x3F);
   1232       else
   1233         m_emit->asrv(GetHostReg64(to_reg), GetHostReg64(from_reg), GetHostReg64(amount_value));
   1234     }
   1235     break;
   1236   }
   1237 }
   1238 
   1239 static bool CanFitInBitwiseImmediate(const Value& value)
   1240 {
   1241   const unsigned reg_size = (value.size < RegSize_64) ? 32 : 64;
   1242   unsigned n, imm_s, imm_r;
   1243   return a64::Assembler::IsImmLogical(s64(value.constant_value), reg_size, &n, &imm_s, &imm_r);
   1244 }
   1245 
   1246 void CodeGenerator::EmitAnd(HostReg to_reg, HostReg from_reg, const Value& value)
   1247 {
   1248   Assert(value.IsConstant() || value.IsInHostRegister());
   1249 
   1250   // if it's in a host register already, this is easy
   1251   if (value.IsInHostRegister())
   1252   {
   1253     if (value.size < RegSize_64)
   1254       m_emit->and_(GetHostReg32(to_reg), GetHostReg32(from_reg), GetHostReg32(value.host_reg));
   1255     else
   1256       m_emit->and_(GetHostReg64(to_reg), GetHostReg64(from_reg), GetHostReg64(value.host_reg));
   1257 
   1258     return;
   1259   }
   1260 
   1261   // do we need temporary storage for the constant, if it won't fit in an immediate?
   1262   if (CanFitInBitwiseImmediate(value))
   1263   {
   1264     if (value.size < RegSize_64)
   1265       m_emit->and_(GetHostReg32(to_reg), GetHostReg32(from_reg), s64(value.constant_value));
   1266     else
   1267       m_emit->and_(GetHostReg64(to_reg), GetHostReg64(from_reg), s64(value.constant_value));
   1268 
   1269     return;
   1270   }
   1271 
   1272   // need a temporary
   1273   Assert(from_reg != RSCRATCH);
   1274   Value temp_value(Value::FromHostReg(&m_register_cache, RSCRATCH, value.size));
   1275   if (value.size < RegSize_64)
   1276     m_emit->Mov(GetHostReg32(temp_value.host_reg), s64(value.constant_value));
   1277   else
   1278     m_emit->Mov(GetHostReg64(temp_value.host_reg), s64(value.constant_value));
   1279   EmitAnd(to_reg, from_reg, temp_value);
   1280 }
   1281 
   1282 void CodeGenerator::EmitOr(HostReg to_reg, HostReg from_reg, const Value& value)
   1283 {
   1284   Assert(value.IsConstant() || value.IsInHostRegister());
   1285 
   1286   // if it's in a host register already, this is easy
   1287   if (value.IsInHostRegister())
   1288   {
   1289     if (value.size < RegSize_64)
   1290       m_emit->orr(GetHostReg32(to_reg), GetHostReg32(from_reg), GetHostReg32(value.host_reg));
   1291     else
   1292       m_emit->orr(GetHostReg64(to_reg), GetHostReg64(from_reg), GetHostReg64(value.host_reg));
   1293 
   1294     return;
   1295   }
   1296 
   1297   // do we need temporary storage for the constant, if it won't fit in an immediate?
   1298   if (CanFitInBitwiseImmediate(value))
   1299   {
   1300     if (value.size < RegSize_64)
   1301       m_emit->orr(GetHostReg32(to_reg), GetHostReg32(from_reg), s64(value.constant_value));
   1302     else
   1303       m_emit->orr(GetHostReg64(to_reg), GetHostReg64(from_reg), s64(value.constant_value));
   1304 
   1305     return;
   1306   }
   1307 
   1308   // need a temporary
   1309   Assert(from_reg != RSCRATCH);
   1310   Value temp_value(Value::FromHostReg(&m_register_cache, RSCRATCH, value.size));
   1311   if (value.size < RegSize_64)
   1312     m_emit->Mov(GetHostReg32(temp_value.host_reg), s64(value.constant_value));
   1313   else
   1314     m_emit->Mov(GetHostReg64(temp_value.host_reg), s64(value.constant_value));
   1315   EmitOr(to_reg, from_reg, temp_value);
   1316 }
   1317 
   1318 void CodeGenerator::EmitXor(HostReg to_reg, HostReg from_reg, const Value& value)
   1319 {
   1320   Assert(value.IsConstant() || value.IsInHostRegister());
   1321 
   1322   // if it's in a host register already, this is easy
   1323   if (value.IsInHostRegister())
   1324   {
   1325     if (value.size < RegSize_64)
   1326       m_emit->eor(GetHostReg32(to_reg), GetHostReg32(from_reg), GetHostReg32(value.host_reg));
   1327     else
   1328       m_emit->eor(GetHostReg64(to_reg), GetHostReg64(from_reg), GetHostReg64(value.host_reg));
   1329 
   1330     return;
   1331   }
   1332 
   1333   // do we need temporary storage for the constant, if it won't fit in an immediate?
   1334   if (CanFitInBitwiseImmediate(value))
   1335   {
   1336     if (value.size < RegSize_64)
   1337       m_emit->eor(GetHostReg32(to_reg), GetHostReg32(from_reg), s64(value.constant_value));
   1338     else
   1339       m_emit->eor(GetHostReg64(to_reg), GetHostReg64(from_reg), s64(value.constant_value));
   1340 
   1341     return;
   1342   }
   1343 
   1344   // need a temporary
   1345   Assert(from_reg != RSCRATCH);
   1346   Value temp_value(Value::FromHostReg(&m_register_cache, RSCRATCH, value.size));
   1347   if (value.size < RegSize_64)
   1348     m_emit->Mov(GetHostReg32(temp_value.host_reg), s64(value.constant_value));
   1349   else
   1350     m_emit->Mov(GetHostReg64(temp_value.host_reg), s64(value.constant_value));
   1351   EmitXor(to_reg, from_reg, temp_value);
   1352 }
   1353 
   1354 void CodeGenerator::EmitTest(HostReg to_reg, const Value& value)
   1355 {
   1356   Assert(value.IsConstant() || value.IsInHostRegister());
   1357 
   1358   // if it's in a host register already, this is easy
   1359   if (value.IsInHostRegister())
   1360   {
   1361     if (value.size < RegSize_64)
   1362       m_emit->tst(GetHostReg32(to_reg), GetHostReg32(value.host_reg));
   1363     else
   1364       m_emit->tst(GetHostReg64(to_reg), GetHostReg64(value.host_reg));
   1365 
   1366     return;
   1367   }
   1368 
   1369   // do we need temporary storage for the constant, if it won't fit in an immediate?
   1370   if (CanFitInBitwiseImmediate(value))
   1371   {
   1372     if (value.size < RegSize_64)
   1373       m_emit->tst(GetHostReg32(to_reg), s64(value.constant_value));
   1374     else
   1375       m_emit->tst(GetHostReg64(to_reg), s64(value.constant_value));
   1376 
   1377     return;
   1378   }
   1379 
   1380   // need a temporary
   1381   Assert(to_reg != RSCRATCH);
   1382   Value temp_value(Value::FromHostReg(&m_register_cache, RSCRATCH, value.size));
   1383   if (value.size < RegSize_64)
   1384     m_emit->Mov(GetHostReg32(temp_value.host_reg), s64(value.constant_value));
   1385   else
   1386     m_emit->Mov(GetHostReg64(temp_value.host_reg), s64(value.constant_value));
   1387   EmitTest(to_reg, temp_value);
   1388 }
   1389 
   1390 void CodeGenerator::EmitNot(HostReg to_reg, RegSize size)
   1391 {
   1392   switch (size)
   1393   {
   1394     case RegSize_8:
   1395       m_emit->mvn(GetHostReg8(to_reg), GetHostReg8(to_reg));
   1396       m_emit->and_(GetHostReg8(to_reg), GetHostReg8(to_reg), 0xFF);
   1397       break;
   1398 
   1399     case RegSize_16:
   1400       m_emit->mvn(GetHostReg16(to_reg), GetHostReg16(to_reg));
   1401       m_emit->and_(GetHostReg16(to_reg), GetHostReg16(to_reg), 0xFFFF);
   1402       break;
   1403 
   1404     case RegSize_32:
   1405       m_emit->mvn(GetHostReg32(to_reg), GetHostReg32(to_reg));
   1406       break;
   1407 
   1408     case RegSize_64:
   1409       m_emit->mvn(GetHostReg64(to_reg), GetHostReg64(to_reg));
   1410       break;
   1411 
   1412     default:
   1413       break;
   1414   }
   1415 }
   1416 
   1417 void CodeGenerator::EmitSetConditionResult(HostReg to_reg, RegSize to_size, Condition condition)
   1418 {
   1419   if (condition == Condition::Always)
   1420   {
   1421     if (to_size < RegSize_64)
   1422       m_emit->Mov(GetHostReg32(to_reg), 1);
   1423     else
   1424       m_emit->Mov(GetHostReg64(to_reg), 1);
   1425 
   1426     return;
   1427   }
   1428 
   1429   a64::Condition acond;
   1430   switch (condition)
   1431   {
   1432     case Condition::NotEqual:
   1433       acond = a64::ne;
   1434       break;
   1435 
   1436     case Condition::Equal:
   1437       acond = a64::eq;
   1438       break;
   1439 
   1440     case Condition::Overflow:
   1441       acond = a64::vs;
   1442       break;
   1443 
   1444     case Condition::Greater:
   1445       acond = a64::gt;
   1446       break;
   1447 
   1448     case Condition::GreaterEqual:
   1449       acond = a64::ge;
   1450       break;
   1451 
   1452     case Condition::Less:
   1453       acond = a64::lt;
   1454       break;
   1455 
   1456     case Condition::LessEqual:
   1457       acond = a64::le;
   1458       break;
   1459 
   1460     case Condition::Negative:
   1461       acond = a64::mi;
   1462       break;
   1463 
   1464     case Condition::PositiveOrZero:
   1465       acond = a64::pl;
   1466       break;
   1467 
   1468     case Condition::Above:
   1469       acond = a64::hi;
   1470       break;
   1471 
   1472     case Condition::AboveEqual:
   1473       acond = a64::cs;
   1474       break;
   1475 
   1476     case Condition::Below:
   1477       acond = a64::cc;
   1478       break;
   1479 
   1480     case Condition::BelowEqual:
   1481       acond = a64::ls;
   1482       break;
   1483 
   1484     default:
   1485       UnreachableCode();
   1486       return;
   1487   }
   1488 
   1489   if (to_size < RegSize_64)
   1490     m_emit->cset(GetHostReg32(to_reg), acond);
   1491   else
   1492     m_emit->cset(GetHostReg64(to_reg), acond);
   1493 }
   1494 
   1495 u32 CodeGenerator::PrepareStackForCall()
   1496 {
   1497   m_register_cache.PushCallerSavedRegisters();
   1498   return 0;
   1499 }
   1500 
   1501 void CodeGenerator::RestoreStackAfterCall(u32 adjust_size)
   1502 {
   1503   m_register_cache.PopCallerSavedRegisters();
   1504 }
   1505 
   1506 void CodeGenerator::EmitCall(const void* ptr)
   1507 {
   1508   armEmitCall(m_emit, ptr, false);
   1509 }
   1510 
   1511 void CodeGenerator::EmitFunctionCallPtr(Value* return_value, const void* ptr)
   1512 {
   1513   if (return_value)
   1514     return_value->Discard();
   1515 
   1516   // shadow space allocate
   1517   const u32 adjust_size = PrepareStackForCall();
   1518 
   1519   // actually call the function
   1520   EmitCall(ptr);
   1521 
   1522   // shadow space release
   1523   RestoreStackAfterCall(adjust_size);
   1524 
   1525   // copy out return value if requested
   1526   if (return_value)
   1527   {
   1528     return_value->Undiscard();
   1529     EmitCopyValue(return_value->GetHostRegister(), Value::FromHostReg(&m_register_cache, RRETURN, return_value->size));
   1530   }
   1531 }
   1532 
   1533 void CodeGenerator::EmitFunctionCallPtr(Value* return_value, const void* ptr, const Value& arg1)
   1534 {
   1535   if (return_value)
   1536     return_value->Discard();
   1537 
   1538   // shadow space allocate
   1539   const u32 adjust_size = PrepareStackForCall();
   1540 
   1541   // push arguments
   1542   EmitCopyValue(RARG1, arg1);
   1543 
   1544   // actually call the function
   1545   EmitCall(ptr);
   1546 
   1547   // shadow space release
   1548   RestoreStackAfterCall(adjust_size);
   1549 
   1550   // copy out return value if requested
   1551   if (return_value)
   1552   {
   1553     return_value->Undiscard();
   1554     EmitCopyValue(return_value->GetHostRegister(), Value::FromHostReg(&m_register_cache, RRETURN, return_value->size));
   1555   }
   1556 }
   1557 
   1558 void CodeGenerator::EmitFunctionCallPtr(Value* return_value, const void* ptr, const Value& arg1, const Value& arg2)
   1559 {
   1560   if (return_value)
   1561     return_value->Discard();
   1562 
   1563   // shadow space allocate
   1564   const u32 adjust_size = PrepareStackForCall();
   1565 
   1566   // push arguments
   1567   EmitCopyValue(RARG1, arg1);
   1568   EmitCopyValue(RARG2, arg2);
   1569 
   1570   // actually call the function
   1571   EmitCall(ptr);
   1572 
   1573   // shadow space release
   1574   RestoreStackAfterCall(adjust_size);
   1575 
   1576   // copy out return value if requested
   1577   if (return_value)
   1578   {
   1579     return_value->Undiscard();
   1580     EmitCopyValue(return_value->GetHostRegister(), Value::FromHostReg(&m_register_cache, RRETURN, return_value->size));
   1581   }
   1582 }
   1583 
   1584 void CodeGenerator::EmitFunctionCallPtr(Value* return_value, const void* ptr, const Value& arg1, const Value& arg2,
   1585                                         const Value& arg3)
   1586 {
   1587   if (return_value)
   1588     m_register_cache.DiscardHostReg(return_value->GetHostRegister());
   1589 
   1590   // shadow space allocate
   1591   const u32 adjust_size = PrepareStackForCall();
   1592 
   1593   // push arguments
   1594   EmitCopyValue(RARG1, arg1);
   1595   EmitCopyValue(RARG2, arg2);
   1596   EmitCopyValue(RARG3, arg3);
   1597 
   1598   // actually call the function
   1599   EmitCall(ptr);
   1600 
   1601   // shadow space release
   1602   RestoreStackAfterCall(adjust_size);
   1603 
   1604   // copy out return value if requested
   1605   if (return_value)
   1606   {
   1607     return_value->Undiscard();
   1608     EmitCopyValue(return_value->GetHostRegister(), Value::FromHostReg(&m_register_cache, RRETURN, return_value->size));
   1609   }
   1610 }
   1611 
   1612 void CodeGenerator::EmitFunctionCallPtr(Value* return_value, const void* ptr, const Value& arg1, const Value& arg2,
   1613                                         const Value& arg3, const Value& arg4)
   1614 {
   1615   if (return_value)
   1616     return_value->Discard();
   1617 
   1618   // shadow space allocate
   1619   const u32 adjust_size = PrepareStackForCall();
   1620 
   1621   // push arguments
   1622   EmitCopyValue(RARG1, arg1);
   1623   EmitCopyValue(RARG2, arg2);
   1624   EmitCopyValue(RARG3, arg3);
   1625   EmitCopyValue(RARG4, arg4);
   1626 
   1627   // actually call the function
   1628   EmitCall(ptr);
   1629 
   1630   // shadow space release
   1631   RestoreStackAfterCall(adjust_size);
   1632 
   1633   // copy out return value if requested
   1634   if (return_value)
   1635   {
   1636     return_value->Undiscard();
   1637     EmitCopyValue(return_value->GetHostRegister(), Value::FromHostReg(&m_register_cache, RRETURN, return_value->size));
   1638   }
   1639 }
   1640 
   1641 void CodeGenerator::EmitPushHostReg(HostReg reg, u32 position)
   1642 {
   1643   const a64::MemOperand addr(a64::sp, FUNCTION_STACK_SIZE - (position * 8));
   1644   m_emit->str(GetHostReg64(reg), addr);
   1645 }
   1646 
   1647 void CodeGenerator::EmitPushHostRegPair(HostReg reg, HostReg reg2, u32 position)
   1648 {
   1649   const a64::MemOperand addr(a64::sp, FUNCTION_STACK_SIZE - ((position + 1) * 8));
   1650   m_emit->stp(GetHostReg64(reg2), GetHostReg64(reg), addr);
   1651 }
   1652 
   1653 void CodeGenerator::EmitPopHostReg(HostReg reg, u32 position)
   1654 {
   1655   const a64::MemOperand addr(a64::sp, FUNCTION_STACK_SIZE - (position * 8));
   1656   m_emit->ldr(GetHostReg64(reg), addr);
   1657 }
   1658 
   1659 void CodeGenerator::EmitPopHostRegPair(HostReg reg, HostReg reg2, u32 position)
   1660 {
   1661   const a64::MemOperand addr(a64::sp, FUNCTION_STACK_SIZE - (position * 8));
   1662   m_emit->ldp(GetHostReg64(reg2), GetHostReg64(reg), addr);
   1663 }
   1664 
   1665 void CodeGenerator::EmitLoadCPUStructField(HostReg host_reg, RegSize guest_size, u32 offset)
   1666 {
   1667   const s64 s_offset = static_cast<s64>(ZeroExtend64(offset));
   1668 
   1669   switch (guest_size)
   1670   {
   1671     case RegSize_8:
   1672       m_emit->Ldrb(GetHostReg8(host_reg), a64::MemOperand(GetCPUPtrReg(), s_offset));
   1673       break;
   1674 
   1675     case RegSize_16:
   1676       m_emit->Ldrh(GetHostReg16(host_reg), a64::MemOperand(GetCPUPtrReg(), s_offset));
   1677       break;
   1678 
   1679     case RegSize_32:
   1680       m_emit->Ldr(GetHostReg32(host_reg), a64::MemOperand(GetCPUPtrReg(), s_offset));
   1681       break;
   1682 
   1683     case RegSize_64:
   1684       m_emit->Ldr(GetHostReg64(host_reg), a64::MemOperand(GetCPUPtrReg(), s_offset));
   1685       break;
   1686 
   1687     default:
   1688     {
   1689       UnreachableCode();
   1690     }
   1691     break;
   1692   }
   1693 }
   1694 
   1695 void CodeGenerator::EmitStoreCPUStructField(u32 offset, const Value& value)
   1696 {
   1697   const Value hr_value = GetValueInHostRegister(value);
   1698   const s64 s_offset = static_cast<s64>(ZeroExtend64(offset));
   1699 
   1700   switch (value.size)
   1701   {
   1702     case RegSize_8:
   1703       m_emit->Strb(GetHostReg8(hr_value), a64::MemOperand(GetCPUPtrReg(), s_offset));
   1704       break;
   1705 
   1706     case RegSize_16:
   1707       m_emit->Strh(GetHostReg16(hr_value), a64::MemOperand(GetCPUPtrReg(), s_offset));
   1708       break;
   1709 
   1710     case RegSize_32:
   1711       m_emit->Str(GetHostReg32(hr_value), a64::MemOperand(GetCPUPtrReg(), s_offset));
   1712       break;
   1713 
   1714     case RegSize_64:
   1715       m_emit->Str(GetHostReg64(hr_value), a64::MemOperand(GetCPUPtrReg(), s_offset));
   1716       break;
   1717 
   1718     default:
   1719     {
   1720       UnreachableCode();
   1721     }
   1722     break;
   1723   }
   1724 }
   1725 
   1726 void CodeGenerator::EmitAddCPUStructField(u32 offset, const Value& value)
   1727 {
   1728   const s64 s_offset = static_cast<s64>(ZeroExtend64(offset));
   1729   const a64::MemOperand o_offset(GetCPUPtrReg(), s_offset);
   1730 
   1731   Value real_value;
   1732   if (value.IsInHostRegister())
   1733   {
   1734     real_value.SetHostReg(&m_register_cache, value.host_reg, value.size);
   1735   }
   1736   else
   1737   {
   1738     // do we need temporary storage for the constant, if it won't fit in an immediate?
   1739     Assert(value.IsConstant());
   1740     const s64 constant_value = value.GetS64ConstantValue();
   1741     if (!a64::Assembler::IsImmAddSub(constant_value))
   1742     {
   1743       real_value.SetHostReg(&m_register_cache, RARG4, value.size);
   1744       EmitCopyValue(real_value.host_reg, value);
   1745     }
   1746     else
   1747     {
   1748       real_value = value;
   1749     }
   1750   }
   1751 
   1752   // Don't need to mask here because we're storing back to memory.
   1753   switch (value.size)
   1754   {
   1755     case RegSize_8:
   1756     {
   1757       m_emit->Ldrb(GetHostReg8(RSCRATCH), o_offset);
   1758       if (real_value.IsConstant())
   1759         m_emit->Add(GetHostReg8(RSCRATCH), GetHostReg8(RSCRATCH), real_value.GetS64ConstantValue());
   1760       else
   1761         m_emit->Add(GetHostReg8(RSCRATCH), GetHostReg8(RSCRATCH), GetHostReg8(real_value));
   1762       m_emit->Strb(GetHostReg8(RSCRATCH), o_offset);
   1763     }
   1764     break;
   1765 
   1766     case RegSize_16:
   1767     {
   1768       m_emit->Ldrh(GetHostReg16(RSCRATCH), o_offset);
   1769       if (real_value.IsConstant())
   1770         m_emit->Add(GetHostReg16(RSCRATCH), GetHostReg16(RSCRATCH), real_value.GetS64ConstantValue());
   1771       else
   1772         m_emit->Add(GetHostReg16(RSCRATCH), GetHostReg16(RSCRATCH), GetHostReg16(real_value));
   1773       m_emit->Strh(GetHostReg16(RSCRATCH), o_offset);
   1774     }
   1775     break;
   1776 
   1777     case RegSize_32:
   1778     {
   1779       m_emit->Ldr(GetHostReg32(RSCRATCH), o_offset);
   1780       if (real_value.IsConstant())
   1781         m_emit->Add(GetHostReg32(RSCRATCH), GetHostReg32(RSCRATCH), real_value.GetS64ConstantValue());
   1782       else
   1783         m_emit->Add(GetHostReg32(RSCRATCH), GetHostReg32(RSCRATCH), GetHostReg32(real_value));
   1784       m_emit->Str(GetHostReg32(RSCRATCH), o_offset);
   1785     }
   1786     break;
   1787 
   1788     case RegSize_64:
   1789     {
   1790       m_emit->Ldr(GetHostReg64(RSCRATCH), o_offset);
   1791       if (real_value.IsConstant())
   1792         m_emit->Add(GetHostReg64(RSCRATCH), GetHostReg64(RSCRATCH), s64(real_value.constant_value));
   1793       else
   1794         m_emit->Add(GetHostReg64(RSCRATCH), GetHostReg64(RSCRATCH), GetHostReg64(real_value));
   1795       m_emit->Str(GetHostReg64(RSCRATCH), o_offset);
   1796     }
   1797     break;
   1798 
   1799     default:
   1800     {
   1801       UnreachableCode();
   1802     }
   1803     break;
   1804   }
   1805 }
   1806 
   1807 void CodeGenerator::EmitLoadGuestRAMFastmem(const Value& address, RegSize size, Value& result)
   1808 {
   1809   HostReg address_reg;
   1810   a64::MemOperand actual_address;
   1811   if (address.IsConstant())
   1812   {
   1813     m_emit->Mov(GetHostReg32(result.host_reg), address.constant_value);
   1814     address_reg = result.host_reg;
   1815   }
   1816   else
   1817   {
   1818     address_reg = address.host_reg;
   1819   }
   1820 
   1821   if (g_settings.cpu_fastmem_mode == CPUFastmemMode::LUT)
   1822   {
   1823     m_emit->lsr(GetHostReg64(RARG1), GetHostReg32(address_reg), Bus::FASTMEM_LUT_PAGE_SHIFT);
   1824     m_emit->ldr(GetHostReg64(RARG1), a64::MemOperand(GetFastmemBasePtrReg(), GetHostReg64(RARG1), a64::LSL, 3));
   1825   }
   1826 
   1827   const a64::XRegister membase =
   1828     (g_settings.cpu_fastmem_mode == CPUFastmemMode::LUT) ? GetHostReg64(RARG1) : GetFastmemBasePtrReg();
   1829 
   1830   switch (size)
   1831   {
   1832     case RegSize_8:
   1833       m_emit->ldrb(GetHostReg32(result.host_reg), a64::MemOperand(membase, GetHostReg32(address_reg)));
   1834       break;
   1835 
   1836     case RegSize_16:
   1837       m_emit->ldrh(GetHostReg32(result.host_reg), a64::MemOperand(membase, GetHostReg32(address_reg)));
   1838       break;
   1839 
   1840     case RegSize_32:
   1841       m_emit->ldr(GetHostReg32(result.host_reg), a64::MemOperand(membase, GetHostReg32(address_reg)));
   1842       break;
   1843 
   1844     default:
   1845       UnreachableCode();
   1846       break;
   1847   }
   1848 }
   1849 
   1850 void CodeGenerator::EmitLoadGuestMemoryFastmem(Instruction instruction, const CodeCache::InstructionInfo& info,
   1851                                                const Value& address, RegSize size, Value& result)
   1852 {
   1853   HostReg address_reg;
   1854   if (address.IsConstant())
   1855   {
   1856     m_emit->Mov(GetHostReg32(result.host_reg), address.constant_value);
   1857     address_reg = result.host_reg;
   1858   }
   1859   else
   1860   {
   1861     address_reg = address.host_reg;
   1862   }
   1863 
   1864   if (g_settings.cpu_fastmem_mode == CPUFastmemMode::LUT)
   1865   {
   1866     m_emit->lsr(GetHostReg64(RARG1), GetHostReg32(address_reg), Bus::FASTMEM_LUT_PAGE_SHIFT);
   1867     m_emit->ldr(GetHostReg64(RARG1), a64::MemOperand(GetFastmemBasePtrReg(), GetHostReg64(RARG1), a64::LSL, 3));
   1868   }
   1869 
   1870   const a64::XRegister membase =
   1871     (g_settings.cpu_fastmem_mode == CPUFastmemMode::LUT) ? GetHostReg64(RARG1) : GetFastmemBasePtrReg();
   1872 
   1873   m_register_cache.InhibitAllocation();
   1874 
   1875   void* host_pc = GetCurrentNearCodePointer();
   1876 
   1877   switch (size)
   1878   {
   1879     case RegSize_8:
   1880       m_emit->ldrb(GetHostReg32(result.host_reg), a64::MemOperand(membase, GetHostReg32(address_reg)));
   1881       break;
   1882 
   1883     case RegSize_16:
   1884       m_emit->ldrh(GetHostReg32(result.host_reg), a64::MemOperand(membase, GetHostReg32(address_reg)));
   1885       break;
   1886 
   1887     case RegSize_32:
   1888       m_emit->ldr(GetHostReg32(result.host_reg), a64::MemOperand(membase, GetHostReg32(address_reg)));
   1889       break;
   1890 
   1891     default:
   1892       UnreachableCode();
   1893       break;
   1894   }
   1895 
   1896   const u32 host_code_size =
   1897     static_cast<u32>(static_cast<ptrdiff_t>(static_cast<u8*>(GetCurrentNearCodePointer()) - static_cast<u8*>(host_pc)));
   1898 
   1899   // generate slowmem fallback
   1900   const void* host_slowmem_pc = GetCurrentFarCodePointer();
   1901   SwitchToFarCode();
   1902 
   1903   // we add the ticks *after* the add here, since we counted incorrectly, then correct for it below
   1904   DebugAssert(m_delayed_cycles_add > 0);
   1905   EmitAddCPUStructField(OFFSETOF(State, pending_ticks), Value::FromConstantU32(static_cast<u32>(m_delayed_cycles_add)));
   1906   m_delayed_cycles_add += Bus::RAM_READ_TICKS;
   1907 
   1908   EmitLoadGuestMemorySlowmem(instruction, info, address, size, result, true);
   1909 
   1910   EmitAddCPUStructField(OFFSETOF(State, pending_ticks),
   1911                         Value::FromConstantU32(static_cast<u32>(-m_delayed_cycles_add)));
   1912 
   1913   // return to the block code
   1914   EmitBranch(GetCurrentNearCodePointer(), false);
   1915 
   1916   SwitchToNearCode();
   1917   m_register_cache.UninhibitAllocation();
   1918 
   1919   CPU::CodeCache::AddLoadStoreInfo(host_pc, host_code_size, info.pc, host_slowmem_pc);
   1920 }
   1921 
   1922 void CodeGenerator::EmitLoadGuestMemorySlowmem(Instruction instruction, const CodeCache::InstructionInfo& info,
   1923                                                const Value& address, RegSize size, Value& result, bool in_far_code)
   1924 {
   1925   if (g_settings.cpu_recompiler_memory_exceptions)
   1926   {
   1927     // NOTE: This can leave junk in the upper bits
   1928     switch (size)
   1929     {
   1930       case RegSize_8:
   1931         EmitFunctionCall(&result, &Thunks::ReadMemoryByte, address);
   1932         break;
   1933 
   1934       case RegSize_16:
   1935         EmitFunctionCall(&result, &Thunks::ReadMemoryHalfWord, address);
   1936         break;
   1937 
   1938       case RegSize_32:
   1939         EmitFunctionCall(&result, &Thunks::ReadMemoryWord, address);
   1940         break;
   1941 
   1942       default:
   1943         UnreachableCode();
   1944         break;
   1945     }
   1946 
   1947     m_register_cache.PushState();
   1948 
   1949     a64::Label load_okay;
   1950     m_emit->Tbz(GetHostReg64(result.host_reg), 63, &load_okay);
   1951     EmitBranch(GetCurrentFarCodePointer());
   1952     m_emit->Bind(&load_okay);
   1953 
   1954     // load exception path
   1955     if (!in_far_code)
   1956       SwitchToFarCode();
   1957 
   1958     // cause_bits = (-result << 2) | BD | cop_n
   1959     m_emit->neg(GetHostReg32(result.host_reg), GetHostReg32(result.host_reg));
   1960     m_emit->lsl(GetHostReg32(result.host_reg), GetHostReg32(result.host_reg), 2);
   1961     EmitOr(result.host_reg, result.host_reg,
   1962            Value::FromConstantU32(Cop0Registers::CAUSE::MakeValueForException(
   1963              static_cast<Exception>(0), info.is_branch_delay_slot, false, instruction.cop.cop_n)));
   1964     EmitFunctionCall(nullptr, static_cast<void (*)(u32, u32)>(&CPU::RaiseException), result, GetCurrentInstructionPC());
   1965 
   1966     EmitExceptionExit();
   1967 
   1968     if (!in_far_code)
   1969       SwitchToNearCode();
   1970 
   1971     m_register_cache.PopState();
   1972   }
   1973   else
   1974   {
   1975     switch (size)
   1976     {
   1977       case RegSize_8:
   1978         EmitFunctionCall(&result, &Thunks::UncheckedReadMemoryByte, address);
   1979         break;
   1980 
   1981       case RegSize_16:
   1982         EmitFunctionCall(&result, &Thunks::UncheckedReadMemoryHalfWord, address);
   1983         break;
   1984 
   1985       case RegSize_32:
   1986         EmitFunctionCall(&result, &Thunks::UncheckedReadMemoryWord, address);
   1987         break;
   1988 
   1989       default:
   1990         UnreachableCode();
   1991         break;
   1992     }
   1993   }
   1994 }
   1995 
   1996 void CodeGenerator::EmitStoreGuestMemoryFastmem(Instruction instruction, const CodeCache::InstructionInfo& info,
   1997                                                 const Value& address, RegSize size, const Value& value)
   1998 {
   1999   Value value_in_hr = GetValueInHostRegister(value);
   2000 
   2001   HostReg address_reg;
   2002   if (address.IsConstant())
   2003   {
   2004     m_emit->Mov(GetHostReg32(RSCRATCH), address.constant_value);
   2005     address_reg = RSCRATCH;
   2006   }
   2007   else
   2008   {
   2009     address_reg = address.host_reg;
   2010   }
   2011 
   2012   if (g_settings.cpu_fastmem_mode == CPUFastmemMode::LUT)
   2013   {
   2014     m_emit->lsr(GetHostReg64(RARG1), GetHostReg32(address_reg), Bus::FASTMEM_LUT_PAGE_SHIFT);
   2015     m_emit->ldr(GetHostReg64(RARG1), a64::MemOperand(GetFastmemBasePtrReg(), GetHostReg64(RARG1), a64::LSL, 3));
   2016   }
   2017 
   2018   const a64::XRegister membase =
   2019     (g_settings.cpu_fastmem_mode == CPUFastmemMode::LUT) ? GetHostReg64(RARG1) : GetFastmemBasePtrReg();
   2020 
   2021   // fastmem
   2022   void* host_pc = GetCurrentNearCodePointer();
   2023 
   2024   m_register_cache.InhibitAllocation();
   2025 
   2026   switch (size)
   2027   {
   2028     case RegSize_8:
   2029       m_emit->strb(GetHostReg32(value_in_hr), a64::MemOperand(membase, GetHostReg32(address_reg)));
   2030       break;
   2031 
   2032     case RegSize_16:
   2033       m_emit->strh(GetHostReg32(value_in_hr), a64::MemOperand(membase, GetHostReg32(address_reg)));
   2034       break;
   2035 
   2036     case RegSize_32:
   2037       m_emit->str(GetHostReg32(value_in_hr), a64::MemOperand(membase, GetHostReg32(address_reg)));
   2038       break;
   2039 
   2040     default:
   2041       UnreachableCode();
   2042       break;
   2043   }
   2044 
   2045   const u32 host_code_size =
   2046     static_cast<u32>(static_cast<ptrdiff_t>(static_cast<u8*>(GetCurrentNearCodePointer()) - static_cast<u8*>(host_pc)));
   2047 
   2048   // generate slowmem fallback
   2049   void* host_slowmem_pc = GetCurrentFarCodePointer();
   2050   SwitchToFarCode();
   2051 
   2052   DebugAssert(m_delayed_cycles_add > 0);
   2053   EmitAddCPUStructField(OFFSETOF(State, pending_ticks), Value::FromConstantU32(static_cast<u32>(m_delayed_cycles_add)));
   2054 
   2055   EmitStoreGuestMemorySlowmem(instruction, info, address, size, value_in_hr, true);
   2056 
   2057   EmitAddCPUStructField(OFFSETOF(State, pending_ticks),
   2058                         Value::FromConstantU32(static_cast<u32>(-m_delayed_cycles_add)));
   2059 
   2060   // return to the block code
   2061   EmitBranch(GetCurrentNearCodePointer(), false);
   2062 
   2063   SwitchToNearCode();
   2064   m_register_cache.UninhibitAllocation();
   2065 
   2066   CPU::CodeCache::AddLoadStoreInfo(host_pc, host_code_size, info.pc, host_slowmem_pc);
   2067 }
   2068 
   2069 void CodeGenerator::EmitStoreGuestMemorySlowmem(Instruction instruction, const CodeCache::InstructionInfo& info,
   2070                                                 const Value& address, RegSize size, const Value& value,
   2071                                                 bool in_far_code)
   2072 {
   2073   Value value_in_hr = GetValueInHostRegister(value);
   2074 
   2075   if (g_settings.cpu_recompiler_memory_exceptions)
   2076   {
   2077     Assert(!in_far_code);
   2078 
   2079     Value result = m_register_cache.AllocateScratch(RegSize_32);
   2080     switch (size)
   2081     {
   2082       case RegSize_8:
   2083         EmitFunctionCall(&result, &Thunks::WriteMemoryByte, address, value_in_hr);
   2084         break;
   2085 
   2086       case RegSize_16:
   2087         EmitFunctionCall(&result, &Thunks::WriteMemoryHalfWord, address, value_in_hr);
   2088         break;
   2089 
   2090       case RegSize_32:
   2091         EmitFunctionCall(&result, &Thunks::WriteMemoryWord, address, value_in_hr);
   2092         break;
   2093 
   2094       default:
   2095         UnreachableCode();
   2096         break;
   2097     }
   2098 
   2099     m_register_cache.PushState();
   2100 
   2101     a64::Label store_okay;
   2102     m_emit->Cbz(GetHostReg64(result.host_reg), &store_okay);
   2103     EmitBranch(GetCurrentFarCodePointer());
   2104     m_emit->Bind(&store_okay);
   2105 
   2106     // store exception path
   2107     if (!in_far_code)
   2108       SwitchToFarCode();
   2109 
   2110     // cause_bits = (result << 2) | BD | cop_n
   2111     m_emit->lsl(GetHostReg32(result.host_reg), GetHostReg32(result.host_reg), 2);
   2112     EmitOr(result.host_reg, result.host_reg,
   2113            Value::FromConstantU32(Cop0Registers::CAUSE::MakeValueForException(
   2114              static_cast<Exception>(0), info.is_branch_delay_slot, false, instruction.cop.cop_n)));
   2115     EmitFunctionCall(nullptr, static_cast<void (*)(u32, u32)>(&CPU::RaiseException), result, GetCurrentInstructionPC());
   2116 
   2117     if (!in_far_code)
   2118       EmitExceptionExit();
   2119     SwitchToNearCode();
   2120 
   2121     m_register_cache.PopState();
   2122   }
   2123   else
   2124   {
   2125     switch (size)
   2126     {
   2127       case RegSize_8:
   2128         EmitFunctionCall(nullptr, &Thunks::UncheckedWriteMemoryByte, address, value_in_hr);
   2129         break;
   2130 
   2131       case RegSize_16:
   2132         EmitFunctionCall(nullptr, &Thunks::UncheckedWriteMemoryHalfWord, address, value_in_hr);
   2133         break;
   2134 
   2135       case RegSize_32:
   2136         EmitFunctionCall(nullptr, &Thunks::UncheckedWriteMemoryWord, address, value_in_hr);
   2137         break;
   2138 
   2139       default:
   2140         UnreachableCode();
   2141         break;
   2142     }
   2143   }
   2144 }
   2145 
   2146 void CodeGenerator::EmitUpdateFastmemBase()
   2147 {
   2148   m_emit->Ldr(GetFastmemBasePtrReg(), a64::MemOperand(GetCPUPtrReg(), OFFSETOF(State, fastmem_base)));
   2149 }
   2150 
   2151 void CodeGenerator::BackpatchLoadStore(void* host_pc, const CodeCache::LoadstoreBackpatchInfo& lbi)
   2152 {
   2153   DEV_LOG("Backpatching {} (guest PC 0x{:08X}) to slowmem at {}", host_pc, lbi.guest_pc, lbi.thunk_address);
   2154 
   2155   // check jump distance
   2156   const s64 jump_distance =
   2157     static_cast<s64>(reinterpret_cast<intptr_t>(lbi.thunk_address) - reinterpret_cast<intptr_t>(host_pc));
   2158   Assert(Common::IsAligned(jump_distance, 4));
   2159   Assert(a64::Instruction::IsValidImmPCOffset(a64::UncondBranchType, jump_distance >> 2));
   2160 
   2161   // turn it into a jump to the slowmem handler
   2162   vixl::aarch64::MacroAssembler emit(static_cast<vixl::byte*>(host_pc), lbi.code_size, a64::PositionDependentCode);
   2163   emit.b(jump_distance >> 2);
   2164 
   2165   const s32 nops = (static_cast<s32>(lbi.code_size) - static_cast<s32>(emit.GetCursorOffset())) / 4;
   2166   Assert(nops >= 0);
   2167   for (s32 i = 0; i < nops; i++)
   2168     emit.nop();
   2169 
   2170   MemMap::FlushInstructionCache(host_pc, lbi.code_size);
   2171 }
   2172 
   2173 void CodeGenerator::EmitLoadGlobal(HostReg host_reg, RegSize size, const void* ptr)
   2174 {
   2175   EmitLoadGlobalAddress(RSCRATCH, ptr);
   2176   switch (size)
   2177   {
   2178     case RegSize_8:
   2179       m_emit->Ldrb(GetHostReg8(host_reg), a64::MemOperand(GetHostReg64(RSCRATCH)));
   2180       break;
   2181 
   2182     case RegSize_16:
   2183       m_emit->Ldrh(GetHostReg16(host_reg), a64::MemOperand(GetHostReg64(RSCRATCH)));
   2184       break;
   2185 
   2186     case RegSize_32:
   2187       m_emit->Ldr(GetHostReg32(host_reg), a64::MemOperand(GetHostReg64(RSCRATCH)));
   2188       break;
   2189 
   2190     default:
   2191       UnreachableCode();
   2192       break;
   2193   }
   2194 }
   2195 
   2196 void CodeGenerator::EmitStoreGlobal(void* ptr, const Value& value)
   2197 {
   2198   Value value_in_hr = GetValueInHostRegister(value);
   2199 
   2200   EmitLoadGlobalAddress(RSCRATCH, ptr);
   2201   switch (value.size)
   2202   {
   2203     case RegSize_8:
   2204       m_emit->Strb(GetHostReg8(value_in_hr), a64::MemOperand(GetHostReg64(RSCRATCH)));
   2205       break;
   2206 
   2207     case RegSize_16:
   2208       m_emit->Strh(GetHostReg16(value_in_hr), a64::MemOperand(GetHostReg64(RSCRATCH)));
   2209       break;
   2210 
   2211     case RegSize_32:
   2212       m_emit->Str(GetHostReg32(value_in_hr), a64::MemOperand(GetHostReg64(RSCRATCH)));
   2213       break;
   2214 
   2215     default:
   2216       UnreachableCode();
   2217       break;
   2218   }
   2219 }
   2220 
   2221 void CodeGenerator::EmitFlushInterpreterLoadDelay()
   2222 {
   2223   Value reg = m_register_cache.AllocateScratch(RegSize_32);
   2224   Value value = m_register_cache.AllocateScratch(RegSize_32);
   2225 
   2226   const a64::MemOperand load_delay_reg(GetCPUPtrReg(), OFFSETOF(State, load_delay_reg));
   2227   const a64::MemOperand load_delay_value(GetCPUPtrReg(), OFFSETOF(State, load_delay_value));
   2228   const a64::MemOperand regs_base(GetCPUPtrReg(), OFFSETOF(State, regs.r[0]));
   2229 
   2230   a64::Label skip_flush;
   2231 
   2232   // reg = load_delay_reg
   2233   m_emit->Ldrb(GetHostReg32(reg), load_delay_reg);
   2234 
   2235   // if load_delay_reg == Reg::count goto skip_flush
   2236   m_emit->Cmp(GetHostReg32(reg), static_cast<u8>(Reg::count));
   2237   m_emit->B(a64::eq, &skip_flush);
   2238 
   2239   // value = load_delay_value
   2240   m_emit->Ldr(GetHostReg32(value), load_delay_value);
   2241 
   2242   // reg = offset(r[0] + reg << 2)
   2243   m_emit->Lsl(GetHostReg32(reg), GetHostReg32(reg), 2);
   2244   m_emit->Add(GetHostReg32(reg), GetHostReg32(reg), OFFSETOF(State, regs.r[0]));
   2245 
   2246   // r[reg] = value
   2247   m_emit->Str(GetHostReg32(value), a64::MemOperand(GetCPUPtrReg(), GetHostReg32(reg)));
   2248 
   2249   // load_delay_reg = Reg::count
   2250   m_emit->Mov(GetHostReg32(reg), static_cast<u8>(Reg::count));
   2251   m_emit->Strb(GetHostReg32(reg), load_delay_reg);
   2252 
   2253   m_emit->Bind(&skip_flush);
   2254 }
   2255 
   2256 void CodeGenerator::EmitMoveNextInterpreterLoadDelay()
   2257 {
   2258   Value reg = m_register_cache.AllocateScratch(RegSize_32);
   2259   Value value = m_register_cache.AllocateScratch(RegSize_32);
   2260 
   2261   const a64::MemOperand load_delay_reg(GetCPUPtrReg(), OFFSETOF(State, load_delay_reg));
   2262   const a64::MemOperand load_delay_value(GetCPUPtrReg(), OFFSETOF(State, load_delay_value));
   2263   const a64::MemOperand next_load_delay_reg(GetCPUPtrReg(), OFFSETOF(State, next_load_delay_reg));
   2264   const a64::MemOperand next_load_delay_value(GetCPUPtrReg(), OFFSETOF(State, next_load_delay_value));
   2265 
   2266   m_emit->Ldrb(GetHostReg32(reg), next_load_delay_reg);
   2267   m_emit->Ldr(GetHostReg32(value), next_load_delay_value);
   2268   m_emit->Strb(GetHostReg32(reg), load_delay_reg);
   2269   m_emit->Str(GetHostReg32(value), load_delay_value);
   2270   m_emit->Mov(GetHostReg32(reg), static_cast<u8>(Reg::count));
   2271   m_emit->Strb(GetHostReg32(reg), next_load_delay_reg);
   2272 }
   2273 
   2274 void CodeGenerator::EmitCancelInterpreterLoadDelayForReg(Reg reg)
   2275 {
   2276   if (!m_load_delay_dirty)
   2277     return;
   2278 
   2279   const a64::MemOperand load_delay_reg(GetCPUPtrReg(), OFFSETOF(State, load_delay_reg));
   2280   Value temp = m_register_cache.AllocateScratch(RegSize_8);
   2281 
   2282   a64::Label skip_cancel;
   2283 
   2284   // if load_delay_reg != reg goto skip_cancel
   2285   m_emit->Ldrb(GetHostReg8(temp), load_delay_reg);
   2286   m_emit->Cmp(GetHostReg8(temp), static_cast<u8>(reg));
   2287   m_emit->B(a64::ne, &skip_cancel);
   2288 
   2289   // load_delay_reg = Reg::count
   2290   m_emit->Mov(GetHostReg8(temp), static_cast<u8>(Reg::count));
   2291   m_emit->Strb(GetHostReg8(temp), load_delay_reg);
   2292 
   2293   m_emit->Bind(&skip_cancel);
   2294 }
   2295 
   2296 void CodeGenerator::EmitICacheCheckAndUpdate()
   2297 {
   2298   if (!m_block->HasFlag(CodeCache::BlockFlags::IsUsingICache))
   2299   {
   2300     if (m_block->HasFlag(CodeCache::BlockFlags::NeedsDynamicFetchTicks))
   2301     {
   2302       armEmitFarLoad(m_emit, RWARG2, GetFetchMemoryAccessTimePtr());
   2303       m_emit->Ldr(RWARG1, a64::MemOperand(GetCPUPtrReg(), OFFSETOF(State, pending_ticks)));
   2304       m_emit->Mov(RWARG3, m_block->size);
   2305       m_emit->Mul(RWARG2, RWARG2, RWARG3);
   2306       m_emit->Add(RWARG1, RWARG1, RWARG2);
   2307       m_emit->Str(RWARG1, a64::MemOperand(GetCPUPtrReg(), OFFSETOF(State, pending_ticks)));
   2308     }
   2309     else
   2310     {
   2311       EmitAddCPUStructField(OFFSETOF(State, pending_ticks),
   2312                             Value::FromConstantU32(static_cast<u32>(m_block->uncached_fetch_ticks)));
   2313     }
   2314   }
   2315   else if (m_block->icache_line_count > 0)
   2316   {
   2317     const auto& ticks_reg = a64::w0;
   2318     const auto& current_tag_reg = a64::w1;
   2319     const auto& existing_tag_reg = a64::w2;
   2320 
   2321     VirtualMemoryAddress current_pc = m_pc & ICACHE_TAG_ADDRESS_MASK;
   2322     m_emit->Ldr(ticks_reg, a64::MemOperand(GetCPUPtrReg(), OFFSETOF(State, pending_ticks)));
   2323     m_emit->Mov(current_tag_reg, current_pc);
   2324 
   2325     for (u32 i = 0; i < m_block->icache_line_count; i++, current_pc += ICACHE_LINE_SIZE)
   2326     {
   2327       const TickCount fill_ticks = GetICacheFillTicks(current_pc);
   2328       if (fill_ticks <= 0)
   2329         continue;
   2330 
   2331       const u32 line = GetICacheLine(current_pc);
   2332       const u32 offset = OFFSETOF(State, icache_tags) + (line * sizeof(u32));
   2333 
   2334       a64::Label cache_hit;
   2335       m_emit->Ldr(existing_tag_reg, a64::MemOperand(GetCPUPtrReg(), offset));
   2336       m_emit->Cmp(existing_tag_reg, current_tag_reg);
   2337       m_emit->B(&cache_hit, a64::eq);
   2338 
   2339       m_emit->Str(current_tag_reg, a64::MemOperand(GetCPUPtrReg(), offset));
   2340       EmitAdd(0, 0, Value::FromConstantU32(static_cast<u32>(fill_ticks)), false);
   2341       m_emit->Bind(&cache_hit);
   2342 
   2343       if (i != (m_block->icache_line_count - 1))
   2344         m_emit->Add(current_tag_reg, current_tag_reg, ICACHE_LINE_SIZE);
   2345     }
   2346 
   2347     m_emit->Str(ticks_reg, a64::MemOperand(GetCPUPtrReg(), OFFSETOF(State, pending_ticks)));
   2348   }
   2349 }
   2350 
   2351 void CodeGenerator::EmitBlockProtectCheck(const u8* ram_ptr, const u8* shadow_ptr, u32 size)
   2352 {
   2353   // store it first to reduce code size, because we can offset
   2354   armMoveAddressToReg(m_emit, RXARG1, ram_ptr);
   2355   armMoveAddressToReg(m_emit, RXARG2, shadow_ptr);
   2356 
   2357   bool first = true;
   2358   u32 offset = 0;
   2359   a64::Label block_changed;
   2360 
   2361   while (size >= 16)
   2362   {
   2363     const a64::VRegister vtmp = a64::v2.V4S();
   2364     const a64::VRegister dst = first ? a64::v0.V4S() : a64::v1.V4S();
   2365     m_emit->ldr(dst, a64::MemOperand(RXARG1, offset));
   2366     m_emit->ldr(vtmp, a64::MemOperand(RXARG2, offset));
   2367     m_emit->cmeq(dst, dst, vtmp);
   2368     if (!first)
   2369       m_emit->and_(a64::v0.V16B(), a64::v0.V16B(), dst.V16B());
   2370     else
   2371       first = false;
   2372 
   2373     offset += 16;
   2374     size -= 16;
   2375   }
   2376 
   2377   if (!first)
   2378   {
   2379     // TODO: make sure this doesn't choke on ffffffff
   2380     m_emit->uminv(a64::s0, a64::v0.V4S());
   2381     m_emit->fcmp(a64::s0, 0.0);
   2382     m_emit->b(&block_changed, a64::eq);
   2383   }
   2384 
   2385   while (size >= 8)
   2386   {
   2387     m_emit->ldr(RXARG3, a64::MemOperand(RXARG1, offset));
   2388     m_emit->ldr(RXSCRATCH, a64::MemOperand(RXARG2, offset));
   2389     m_emit->cmp(RXARG3, RXSCRATCH);
   2390     m_emit->b(&block_changed, a64::ne);
   2391     offset += 8;
   2392     size -= 8;
   2393   }
   2394 
   2395   while (size >= 4)
   2396   {
   2397     m_emit->ldr(RWARG3, a64::MemOperand(RXARG1, offset));
   2398     m_emit->ldr(RWSCRATCH, a64::MemOperand(RXARG2, offset));
   2399     m_emit->cmp(RWARG3, RWSCRATCH);
   2400     m_emit->b(&block_changed, a64::ne);
   2401     offset += 4;
   2402     size -= 4;
   2403   }
   2404 
   2405   DebugAssert(size == 0);
   2406 
   2407   a64::Label block_unchanged;
   2408   m_emit->b(&block_unchanged);
   2409   m_emit->bind(&block_changed);
   2410   armEmitJmp(m_emit, CodeCache::g_discard_and_recompile_block, false);
   2411   m_emit->bind(&block_unchanged);
   2412 }
   2413 
   2414 void CodeGenerator::EmitStallUntilGTEComplete()
   2415 {
   2416   static_assert(OFFSETOF(State, pending_ticks) + sizeof(u32) == OFFSETOF(State, gte_completion_tick));
   2417   m_emit->ldp(GetHostReg32(RARG1), GetHostReg32(RARG2),
   2418               a64::MemOperand(GetCPUPtrReg(), OFFSETOF(State, pending_ticks)));
   2419 
   2420   if (m_delayed_cycles_add > 0)
   2421   {
   2422     m_emit->Add(GetHostReg32(RARG1), GetHostReg32(RARG1), static_cast<u32>(m_delayed_cycles_add));
   2423     m_delayed_cycles_add = 0;
   2424   }
   2425 
   2426   m_emit->cmp(GetHostReg32(RARG2), GetHostReg32(RARG1));
   2427   m_emit->csel(GetHostReg32(RARG1), GetHostReg32(RARG2), GetHostReg32(RARG1), a64::Condition::hi);
   2428   m_emit->str(GetHostReg32(RARG1), a64::MemOperand(GetCPUPtrReg(), OFFSETOF(State, pending_ticks)));
   2429 }
   2430 
   2431 void CodeGenerator::EmitBranch(const void* address, bool allow_scratch)
   2432 {
   2433   const s64 jump_distance =
   2434     static_cast<s64>(reinterpret_cast<intptr_t>(address) - reinterpret_cast<intptr_t>(GetCurrentCodePointer()));
   2435   Assert(Common::IsAligned(jump_distance, 4));
   2436   if (a64::Instruction::IsValidImmPCOffset(a64::UncondBranchType, jump_distance >> 2))
   2437   {
   2438     m_emit->b(jump_distance >> 2);
   2439     return;
   2440   }
   2441 
   2442   Assert(allow_scratch);
   2443 
   2444   m_emit->Mov(GetHostReg64(RSCRATCH), reinterpret_cast<uintptr_t>(address));
   2445   m_emit->br(GetHostReg64(RSCRATCH));
   2446 }
   2447 
   2448 void CodeGenerator::EmitBranch(LabelType* label)
   2449 {
   2450   m_emit->B(label);
   2451 }
   2452 
   2453 static a64::Condition TranslateCondition(Condition condition, bool invert)
   2454 {
   2455   switch (condition)
   2456   {
   2457     case Condition::Always:
   2458       return a64::nv;
   2459 
   2460     case Condition::NotEqual:
   2461     case Condition::NotZero:
   2462       return invert ? a64::eq : a64::ne;
   2463 
   2464     case Condition::Equal:
   2465     case Condition::Zero:
   2466       return invert ? a64::ne : a64::eq;
   2467 
   2468     case Condition::Overflow:
   2469       return invert ? a64::vc : a64::vs;
   2470 
   2471     case Condition::Greater:
   2472       return invert ? a64::le : a64::gt;
   2473 
   2474     case Condition::GreaterEqual:
   2475       return invert ? a64::lt : a64::ge;
   2476 
   2477     case Condition::Less:
   2478       return invert ? a64::ge : a64::lt;
   2479 
   2480     case Condition::LessEqual:
   2481       return invert ? a64::gt : a64::le;
   2482 
   2483     case Condition::Negative:
   2484       return invert ? a64::pl : a64::mi;
   2485 
   2486     case Condition::PositiveOrZero:
   2487       return invert ? a64::mi : a64::pl;
   2488 
   2489     case Condition::Above:
   2490       return invert ? a64::ls : a64::hi;
   2491 
   2492     case Condition::AboveEqual:
   2493       return invert ? a64::cc : a64::cs;
   2494 
   2495     case Condition::Below:
   2496       return invert ? a64::cs : a64::cc;
   2497 
   2498     case Condition::BelowEqual:
   2499       return invert ? a64::hi : a64::ls;
   2500 
   2501     default:
   2502       UnreachableCode();
   2503       return a64::nv;
   2504   }
   2505 }
   2506 
   2507 void CodeGenerator::EmitConditionalBranch(Condition condition, bool invert, HostReg value, RegSize size,
   2508                                           LabelType* label)
   2509 {
   2510   switch (condition)
   2511   {
   2512     case Condition::NotEqual:
   2513     case Condition::Equal:
   2514     case Condition::Overflow:
   2515     case Condition::Greater:
   2516     case Condition::GreaterEqual:
   2517     case Condition::LessEqual:
   2518     case Condition::Less:
   2519     case Condition::Above:
   2520     case Condition::AboveEqual:
   2521     case Condition::Below:
   2522     case Condition::BelowEqual:
   2523       Panic("Needs a comparison value");
   2524       return;
   2525 
   2526     case Condition::Negative:
   2527     case Condition::PositiveOrZero:
   2528     {
   2529       switch (size)
   2530       {
   2531         case RegSize_8:
   2532           m_emit->tst(GetHostReg8(value), GetHostReg8(value));
   2533           break;
   2534         case RegSize_16:
   2535           m_emit->tst(GetHostReg16(value), GetHostReg16(value));
   2536           break;
   2537         case RegSize_32:
   2538           m_emit->tst(GetHostReg32(value), GetHostReg32(value));
   2539           break;
   2540         case RegSize_64:
   2541           m_emit->tst(GetHostReg64(value), GetHostReg64(value));
   2542           break;
   2543         default:
   2544           UnreachableCode();
   2545           break;
   2546       }
   2547 
   2548       EmitConditionalBranch(condition, invert, label);
   2549       return;
   2550     }
   2551 
   2552     case Condition::NotZero:
   2553     {
   2554       switch (size)
   2555       {
   2556         case RegSize_8:
   2557           m_emit->cbnz(GetHostReg8(value), label);
   2558           break;
   2559         case RegSize_16:
   2560           m_emit->cbz(GetHostReg16(value), label);
   2561           break;
   2562         case RegSize_32:
   2563           m_emit->cbnz(GetHostReg32(value), label);
   2564           break;
   2565         case RegSize_64:
   2566           m_emit->cbnz(GetHostReg64(value), label);
   2567           break;
   2568         default:
   2569           UnreachableCode();
   2570           break;
   2571       }
   2572 
   2573       return;
   2574     }
   2575 
   2576     case Condition::Zero:
   2577     {
   2578       switch (size)
   2579       {
   2580         case RegSize_8:
   2581           m_emit->cbz(GetHostReg8(value), label);
   2582           break;
   2583         case RegSize_16:
   2584           m_emit->cbz(GetHostReg16(value), label);
   2585           break;
   2586         case RegSize_32:
   2587           m_emit->cbz(GetHostReg32(value), label);
   2588           break;
   2589         case RegSize_64:
   2590           m_emit->cbz(GetHostReg64(value), label);
   2591           break;
   2592         default:
   2593           UnreachableCode();
   2594           break;
   2595       }
   2596 
   2597       return;
   2598     }
   2599 
   2600     case Condition::Always:
   2601       m_emit->b(label);
   2602       return;
   2603 
   2604     default:
   2605       UnreachableCode();
   2606       return;
   2607   }
   2608 }
   2609 
   2610 void CodeGenerator::EmitConditionalBranch(Condition condition, bool invert, HostReg lhs, const Value& rhs,
   2611                                           LabelType* label)
   2612 {
   2613   switch (condition)
   2614   {
   2615     case Condition::NotEqual:
   2616     case Condition::Equal:
   2617     case Condition::Overflow:
   2618     case Condition::Greater:
   2619     case Condition::GreaterEqual:
   2620     case Condition::LessEqual:
   2621     case Condition::Less:
   2622     case Condition::Above:
   2623     case Condition::AboveEqual:
   2624     case Condition::Below:
   2625     case Condition::BelowEqual:
   2626     {
   2627       EmitCmp(lhs, rhs);
   2628       EmitConditionalBranch(condition, invert, label);
   2629       return;
   2630     }
   2631 
   2632     case Condition::Negative:
   2633     case Condition::PositiveOrZero:
   2634     case Condition::NotZero:
   2635     case Condition::Zero:
   2636     {
   2637       Assert(!rhs.IsValid() || (rhs.IsConstant() && rhs.GetS64ConstantValue() == 0));
   2638       EmitConditionalBranch(condition, invert, lhs, rhs.size, label);
   2639       return;
   2640     }
   2641 
   2642     case Condition::Always:
   2643       m_emit->b(label);
   2644       return;
   2645 
   2646     default:
   2647       UnreachableCode();
   2648       return;
   2649   }
   2650 }
   2651 
   2652 void CodeGenerator::EmitConditionalBranch(Condition condition, bool invert, LabelType* label)
   2653 {
   2654   if (condition == Condition::Always)
   2655     m_emit->b(label);
   2656   else
   2657     m_emit->b(label, TranslateCondition(condition, invert));
   2658 }
   2659 
   2660 void CodeGenerator::EmitBranchIfBitClear(HostReg reg, RegSize size, u8 bit, LabelType* label)
   2661 {
   2662   switch (size)
   2663   {
   2664     case RegSize_8:
   2665     case RegSize_16:
   2666     case RegSize_32:
   2667       m_emit->tbz(GetHostReg32(reg), bit, label);
   2668       break;
   2669 
   2670     default:
   2671       UnreachableCode();
   2672       break;
   2673   }
   2674 }
   2675 
   2676 void CodeGenerator::EmitBindLabel(LabelType* label)
   2677 {
   2678   m_emit->Bind(label);
   2679 }
   2680 
   2681 void CodeGenerator::EmitLoadGlobalAddress(HostReg host_reg, const void* ptr)
   2682 {
   2683   const void* current_code_ptr_page = reinterpret_cast<const void*>(
   2684     reinterpret_cast<uintptr_t>(GetCurrentCodePointer()) & ~static_cast<uintptr_t>(0xFFF));
   2685   const void* ptr_page =
   2686     reinterpret_cast<const void*>(reinterpret_cast<uintptr_t>(ptr) & ~static_cast<uintptr_t>(0xFFF));
   2687   const s64 page_displacement = armGetPCDisplacement(current_code_ptr_page, ptr_page) >> 10;
   2688   const u32 page_offset = static_cast<u32>(reinterpret_cast<uintptr_t>(ptr) & 0xFFFu);
   2689   if (vixl::IsInt21(page_displacement) && a64::Assembler::IsImmLogical(page_offset, 64))
   2690   {
   2691     m_emit->adrp(GetHostReg64(host_reg), page_displacement);
   2692     m_emit->orr(GetHostReg64(host_reg), GetHostReg64(host_reg), page_offset);
   2693   }
   2694   else
   2695   {
   2696     m_emit->Mov(GetHostReg64(host_reg), reinterpret_cast<uintptr_t>(ptr));
   2697   }
   2698 }
   2699 
   2700 } // namespace CPU::Recompiler
   2701 
   2702 #endif // CPU_ARCH_ARM64