cpu_recompiler_code_generator_aarch64.cpp (81114B)
1 // SPDX-FileCopyrightText: 2019-2024 Connor McLaughlin <stenzek@gmail.com> 2 // SPDX-License-Identifier: (GPL-3.0 OR CC-BY-NC-ND-4.0) 3 4 #include "common/align.h" 5 #include "common/assert.h" 6 #include "common/log.h" 7 #include "common/memmap.h" 8 9 #include "cpu_code_cache_private.h" 10 #include "cpu_core.h" 11 #include "cpu_core_private.h" 12 #include "cpu_recompiler_code_generator.h" 13 #include "cpu_recompiler_thunks.h" 14 #include "settings.h" 15 #include "timing_event.h" 16 17 #ifdef CPU_ARCH_ARM64 18 19 Log_SetChannel(CPU::Recompiler); 20 21 #ifdef ENABLE_HOST_DISASSEMBLY 22 #include "vixl/aarch64/disasm-aarch64.h" 23 #endif 24 25 namespace a64 = vixl::aarch64; 26 27 namespace CPU::Recompiler { 28 constexpr u64 FUNCTION_CALLEE_SAVED_SPACE_RESERVE = 80; // 8 registers 29 constexpr u64 FUNCTION_CALLER_SAVED_SPACE_RESERVE = 144; // 18 registers -> 224 bytes 30 constexpr u64 FUNCTION_STACK_SIZE = FUNCTION_CALLEE_SAVED_SPACE_RESERVE + FUNCTION_CALLER_SAVED_SPACE_RESERVE; 31 32 static constexpr u32 TRAMPOLINE_AREA_SIZE = 4 * 1024; 33 static std::unordered_map<const void*, u32> s_trampoline_targets; 34 static u8* s_trampoline_start_ptr = nullptr; 35 static u32 s_trampoline_used = 0; 36 } // namespace CPU::Recompiler 37 38 bool CPU::Recompiler::armIsCallerSavedRegister(u32 id) 39 { 40 // same on both linux and windows 41 return (id <= 18); 42 } 43 44 void CPU::Recompiler::armEmitMov(a64::Assembler* armAsm, const a64::Register& rd, u64 imm) 45 { 46 DebugAssert(vixl::IsUint32(imm) || vixl::IsInt32(imm) || rd.Is64Bits()); 47 DebugAssert(rd.GetCode() != a64::sp.GetCode()); 48 49 if (imm == 0) 50 { 51 armAsm->mov(rd, a64::Assembler::AppropriateZeroRegFor(rd)); 52 return; 53 } 54 55 // The worst case for size is mov 64-bit immediate to sp: 56 // * up to 4 instructions to materialise the constant 57 // * 1 instruction to move to sp 58 59 // Immediates on Aarch64 can be produced using an initial value, and zero to 60 // three move keep operations. 61 // 62 // Initial values can be generated with: 63 // 1. 64-bit move zero (movz). 64 // 2. 32-bit move inverted (movn). 65 // 3. 64-bit move inverted. 66 // 4. 32-bit orr immediate. 67 // 5. 64-bit orr immediate. 68 // Move-keep may then be used to modify each of the 16-bit half words. 69 // 70 // The code below supports all five initial value generators, and 71 // applying move-keep operations to move-zero and move-inverted initial 72 // values. 73 74 // Try to move the immediate in one instruction, and if that fails, switch to 75 // using multiple instructions. 76 const unsigned reg_size = rd.GetSizeInBits(); 77 78 if (a64::Assembler::IsImmMovz(imm, reg_size) && !rd.IsSP()) 79 { 80 // Immediate can be represented in a move zero instruction. Movz can't write 81 // to the stack pointer. 82 armAsm->movz(rd, imm); 83 return; 84 } 85 else if (a64::Assembler::IsImmMovn(imm, reg_size) && !rd.IsSP()) 86 { 87 // Immediate can be represented in a move negative instruction. Movn can't 88 // write to the stack pointer. 89 armAsm->movn(rd, rd.Is64Bits() ? ~imm : (~imm & a64::kWRegMask)); 90 return; 91 } 92 else if (a64::Assembler::IsImmLogical(imm, reg_size)) 93 { 94 // Immediate can be represented in a logical orr instruction. 95 DebugAssert(!rd.IsZero()); 96 armAsm->orr(rd, a64::Assembler::AppropriateZeroRegFor(rd), imm); 97 return; 98 } 99 100 // Generic immediate case. Imm will be represented by 101 // [imm3, imm2, imm1, imm0], where each imm is 16 bits. 102 // A move-zero or move-inverted is generated for the first non-zero or 103 // non-0xffff immX, and a move-keep for subsequent non-zero immX. 104 105 uint64_t ignored_halfword = 0; 106 bool invert_move = false; 107 // If the number of 0xffff halfwords is greater than the number of 0x0000 108 // halfwords, it's more efficient to use move-inverted. 109 if (vixl::CountClearHalfWords(~imm, reg_size) > vixl::CountClearHalfWords(imm, reg_size)) 110 { 111 ignored_halfword = 0xffff; 112 invert_move = true; 113 } 114 115 // Iterate through the halfwords. Use movn/movz for the first non-ignored 116 // halfword, and movk for subsequent halfwords. 117 DebugAssert((reg_size % 16) == 0); 118 bool first_mov_done = false; 119 for (unsigned i = 0; i < (reg_size / 16); i++) 120 { 121 uint64_t imm16 = (imm >> (16 * i)) & 0xffff; 122 if (imm16 != ignored_halfword) 123 { 124 if (!first_mov_done) 125 { 126 if (invert_move) 127 armAsm->movn(rd, ~imm16 & 0xffff, 16 * i); 128 else 129 armAsm->movz(rd, imm16, 16 * i); 130 first_mov_done = true; 131 } 132 else 133 { 134 // Construct a wider constant. 135 armAsm->movk(rd, imm16, 16 * i); 136 } 137 } 138 } 139 140 DebugAssert(first_mov_done); 141 } 142 143 s64 CPU::Recompiler::armGetPCDisplacement(const void* current, const void* target) 144 { 145 // pxAssert(Common::IsAlignedPow2(reinterpret_cast<size_t>(current), 4)); 146 // pxAssert(Common::IsAlignedPow2(reinterpret_cast<size_t>(target), 4)); 147 return static_cast<s64>((reinterpret_cast<ptrdiff_t>(target) - reinterpret_cast<ptrdiff_t>(current)) >> 2); 148 } 149 150 bool CPU::Recompiler::armIsInAdrpRange(vixl::aarch64::Assembler* armAsm, const void* addr) 151 { 152 const void* cur = armAsm->GetCursorAddress<const void*>(); 153 const void* current_code_ptr_page = 154 reinterpret_cast<const void*>(reinterpret_cast<uintptr_t>(cur) & ~static_cast<uintptr_t>(0xFFF)); 155 const void* ptr_page = 156 reinterpret_cast<const void*>(reinterpret_cast<uintptr_t>(addr) & ~static_cast<uintptr_t>(0xFFF)); 157 const s64 page_displacement = armGetPCDisplacement(current_code_ptr_page, ptr_page) >> 10; 158 const u32 page_offset = static_cast<u32>(reinterpret_cast<uintptr_t>(addr) & 0xFFFu); 159 160 return (vixl::IsInt21(page_displacement) && 161 (a64::Assembler::IsImmAddSub(page_offset) || a64::Assembler::IsImmLogical(page_offset, 64))); 162 } 163 164 void CPU::Recompiler::armMoveAddressToReg(a64::Assembler* armAsm, const a64::Register& reg, const void* addr) 165 { 166 DebugAssert(reg.IsX()); 167 168 const void* cur = armAsm->GetCursorAddress<const void*>(); 169 const void* current_code_ptr_page = 170 reinterpret_cast<const void*>(reinterpret_cast<uintptr_t>(cur) & ~static_cast<uintptr_t>(0xFFF)); 171 const void* ptr_page = 172 reinterpret_cast<const void*>(reinterpret_cast<uintptr_t>(addr) & ~static_cast<uintptr_t>(0xFFF)); 173 const s64 page_displacement = armGetPCDisplacement(current_code_ptr_page, ptr_page) >> 10; 174 const u32 page_offset = static_cast<u32>(reinterpret_cast<uintptr_t>(addr) & 0xFFFu); 175 if (vixl::IsInt21(page_displacement) && a64::Assembler::IsImmAddSub(page_offset)) 176 { 177 armAsm->adrp(reg, page_displacement); 178 armAsm->add(reg, reg, page_offset); 179 } 180 else if (vixl::IsInt21(page_displacement) && a64::Assembler::IsImmLogical(page_offset, 64)) 181 { 182 armAsm->adrp(reg, page_displacement); 183 armAsm->orr(reg, reg, page_offset); 184 } 185 else 186 { 187 armEmitMov(armAsm, reg, reinterpret_cast<uintptr_t>(addr)); 188 } 189 } 190 void CPU::Recompiler::armEmitJmp(a64::Assembler* armAsm, const void* ptr, bool force_inline) 191 { 192 const void* cur = armAsm->GetCursorAddress<const void*>(); 193 s64 displacement = armGetPCDisplacement(cur, ptr); 194 bool use_blr = !vixl::IsInt26(displacement); 195 bool use_trampoline = use_blr && !armIsInAdrpRange(armAsm, ptr); 196 if (use_blr && use_trampoline && !force_inline) 197 { 198 if (u8* trampoline = armGetJumpTrampoline(ptr); trampoline) 199 { 200 displacement = armGetPCDisplacement(cur, trampoline); 201 use_blr = !vixl::IsInt26(displacement); 202 } 203 } 204 205 if (use_blr) 206 { 207 armMoveAddressToReg(armAsm, RXSCRATCH, ptr); 208 armAsm->br(RXSCRATCH); 209 } 210 else 211 { 212 armAsm->b(displacement); 213 } 214 } 215 216 void CPU::Recompiler::armEmitCall(a64::Assembler* armAsm, const void* ptr, bool force_inline) 217 { 218 const void* cur = armAsm->GetCursorAddress<const void*>(); 219 s64 displacement = armGetPCDisplacement(cur, ptr); 220 bool use_blr = !vixl::IsInt26(displacement); 221 bool use_trampoline = use_blr && !armIsInAdrpRange(armAsm, ptr); 222 if (use_blr && use_trampoline && !force_inline) 223 { 224 if (u8* trampoline = armGetJumpTrampoline(ptr); trampoline) 225 { 226 displacement = armGetPCDisplacement(cur, trampoline); 227 use_blr = !vixl::IsInt26(displacement); 228 } 229 } 230 231 if (use_blr) 232 { 233 armMoveAddressToReg(armAsm, RXSCRATCH, ptr); 234 armAsm->blr(RXSCRATCH); 235 } 236 else 237 { 238 armAsm->bl(displacement); 239 } 240 } 241 242 void CPU::Recompiler::armEmitCondBranch(a64::Assembler* armAsm, a64::Condition cond, const void* ptr) 243 { 244 const s64 jump_distance = static_cast<s64>(reinterpret_cast<intptr_t>(ptr) - 245 reinterpret_cast<intptr_t>(armAsm->GetCursorAddress<const void*>())); 246 // pxAssert(Common::IsAligned(jump_distance, 4)); 247 248 if (a64::Instruction::IsValidImmPCOffset(a64::CondBranchType, jump_distance >> 2)) 249 { 250 armAsm->b(jump_distance >> 2, cond); 251 } 252 else 253 { 254 a64::Label branch_not_taken; 255 armAsm->b(&branch_not_taken, InvertCondition(cond)); 256 257 const s64 new_jump_distance = static_cast<s64>(reinterpret_cast<intptr_t>(ptr) - 258 reinterpret_cast<intptr_t>(armAsm->GetCursorAddress<const void*>())); 259 armAsm->b(new_jump_distance >> 2); 260 armAsm->bind(&branch_not_taken); 261 } 262 } 263 264 void CPU::Recompiler::armEmitFarLoad(vixl::aarch64::Assembler* armAsm, const vixl::aarch64::Register& reg, 265 const void* addr, bool sign_extend_word) 266 { 267 const void* cur = armAsm->GetCursorAddress<const void*>(); 268 const void* current_code_ptr_page = 269 reinterpret_cast<const void*>(reinterpret_cast<uintptr_t>(cur) & ~static_cast<uintptr_t>(0xFFF)); 270 const void* ptr_page = 271 reinterpret_cast<const void*>(reinterpret_cast<uintptr_t>(addr) & ~static_cast<uintptr_t>(0xFFF)); 272 const s64 page_displacement = armGetPCDisplacement(current_code_ptr_page, ptr_page) >> 10; 273 const u32 page_offset = static_cast<u32>(reinterpret_cast<uintptr_t>(addr) & 0xFFFu); 274 a64::MemOperand memop; 275 276 const vixl::aarch64::Register xreg = reg.X(); 277 if (vixl::IsInt21(page_displacement)) 278 { 279 armAsm->adrp(xreg, page_displacement); 280 memop = vixl::aarch64::MemOperand(xreg, static_cast<int64_t>(page_offset)); 281 } 282 else 283 { 284 armMoveAddressToReg(armAsm, xreg, addr); 285 memop = vixl::aarch64::MemOperand(xreg); 286 } 287 288 if (sign_extend_word) 289 armAsm->ldrsw(reg, memop); 290 else 291 armAsm->ldr(reg, memop); 292 } 293 294 void CPU::Recompiler::armEmitFarStore(vixl::aarch64::Assembler* armAsm, const vixl::aarch64::Register& reg, 295 const void* addr, const vixl::aarch64::Register& tempreg) 296 { 297 DebugAssert(tempreg.IsX()); 298 299 const void* cur = armAsm->GetCursorAddress<const void*>(); 300 const void* current_code_ptr_page = 301 reinterpret_cast<const void*>(reinterpret_cast<uintptr_t>(cur) & ~static_cast<uintptr_t>(0xFFF)); 302 const void* ptr_page = 303 reinterpret_cast<const void*>(reinterpret_cast<uintptr_t>(addr) & ~static_cast<uintptr_t>(0xFFF)); 304 const s64 page_displacement = armGetPCDisplacement(current_code_ptr_page, ptr_page) >> 10; 305 const u32 page_offset = static_cast<u32>(reinterpret_cast<uintptr_t>(addr) & 0xFFFu); 306 307 if (vixl::IsInt21(page_displacement)) 308 { 309 armAsm->adrp(tempreg, page_displacement); 310 armAsm->str(reg, vixl::aarch64::MemOperand(tempreg, static_cast<int64_t>(page_offset))); 311 } 312 else 313 { 314 armMoveAddressToReg(armAsm, tempreg, addr); 315 armAsm->str(reg, vixl::aarch64::MemOperand(tempreg)); 316 } 317 } 318 319 u8* CPU::Recompiler::armGetJumpTrampoline(const void* target) 320 { 321 auto it = s_trampoline_targets.find(target); 322 if (it != s_trampoline_targets.end()) 323 return s_trampoline_start_ptr + it->second; 324 325 // align to 16 bytes? 326 const u32 offset = s_trampoline_used; // Common::AlignUpPow2(s_trampoline_used, 16); 327 328 // 4 movs plus a jump 329 if (TRAMPOLINE_AREA_SIZE - offset < 20) 330 { 331 Panic("Ran out of space in constant pool"); 332 return nullptr; 333 } 334 335 u8* start = s_trampoline_start_ptr + offset; 336 a64::Assembler armAsm(start, TRAMPOLINE_AREA_SIZE - offset); 337 #ifdef VIXL_DEBUG 338 vixl::CodeBufferCheckScope armAsmCheck(&armAsm, TRAMPOLINE_AREA_SIZE - offset, 339 vixl::CodeBufferCheckScope::kDontReserveBufferSpace); 340 #endif 341 armMoveAddressToReg(&armAsm, RXSCRATCH, target); 342 armAsm.br(RXSCRATCH); 343 armAsm.FinalizeCode(); 344 345 const u32 size = static_cast<u32>(armAsm.GetSizeOfCodeGenerated()); 346 DebugAssert(size < 20); 347 s_trampoline_targets.emplace(target, offset); 348 s_trampoline_used = offset + static_cast<u32>(size); 349 350 MemMap::FlushInstructionCache(start, size); 351 return start; 352 } 353 354 void CPU::CodeCache::DisassembleAndLogHostCode(const void* start, u32 size) 355 { 356 #ifdef ENABLE_HOST_DISASSEMBLY 357 class MyDisassembler : public a64::Disassembler 358 { 359 protected: 360 void ProcessOutput(const a64::Instruction* instr) override 361 { 362 DEBUG_LOG("0x{:016X} {:08X}\t\t{}", reinterpret_cast<uint64_t>(instr), instr->GetInstructionBits(), GetOutput()); 363 } 364 }; 365 366 a64::Decoder decoder; 367 MyDisassembler disas; 368 decoder.AppendVisitor(&disas); 369 decoder.Decode(static_cast<const a64::Instruction*>(start), 370 reinterpret_cast<const a64::Instruction*>(static_cast<const u8*>(start) + size)); 371 #else 372 ERROR_LOG("Not compiled with ENABLE_HOST_DISASSEMBLY."); 373 #endif 374 } 375 376 u32 CPU::CodeCache::GetHostInstructionCount(const void* start, u32 size) 377 { 378 return size / a64::kInstructionSize; 379 } 380 381 u32 CPU::CodeCache::EmitJump(void* code, const void* dst, bool flush_icache) 382 { 383 using namespace a64; 384 using namespace CPU::Recompiler; 385 386 const s64 disp = armGetPCDisplacement(code, dst); 387 DebugAssert(vixl::IsInt26(disp)); 388 389 const u32 new_code = B | Assembler::ImmUncondBranch(disp); 390 std::memcpy(code, &new_code, sizeof(new_code)); 391 if (flush_icache) 392 MemMap::FlushInstructionCache(code, kInstructionSize); 393 394 return kInstructionSize; 395 } 396 397 u32 CPU::CodeCache::EmitASMFunctions(void* code, u32 code_size) 398 { 399 using namespace vixl::aarch64; 400 using namespace CPU::Recompiler; 401 402 #define PTR(x) a64::MemOperand(RSTATE, (s64)(((u8*)(x)) - ((u8*)&g_state))) 403 404 Assembler actual_asm(static_cast<u8*>(code), code_size); 405 Assembler* armAsm = &actual_asm; 406 407 #ifdef VIXL_DEBUG 408 vixl::CodeBufferCheckScope asm_check(armAsm, code_size, vixl::CodeBufferCheckScope::kDontReserveBufferSpace); 409 #endif 410 411 Label dispatch; 412 413 g_enter_recompiler = armAsm->GetCursorAddress<decltype(g_enter_recompiler)>(); 414 { 415 // reserve some space for saving caller-saved registers 416 armAsm->sub(sp, sp, CPU::Recompiler::FUNCTION_STACK_SIZE); 417 418 // Need the CPU state for basically everything :-) 419 armMoveAddressToReg(armAsm, RSTATE, &g_state); 420 421 // Fastmem setup, oldrec doesn't need it 422 if (IsUsingFastmem() && g_settings.cpu_execution_mode != CPUExecutionMode::Recompiler) 423 armAsm->ldr(RMEMBASE, PTR(&g_state.fastmem_base)); 424 425 // Fall through to event dispatcher 426 } 427 428 // check events then for frame done 429 g_check_events_and_dispatch = armAsm->GetCursorAddress<const void*>(); 430 { 431 Label skip_event_check; 432 armAsm->ldr(RWARG1, PTR(&g_state.pending_ticks)); 433 armAsm->ldr(RWARG2, PTR(&g_state.downcount)); 434 armAsm->cmp(RWARG1, RWARG2); 435 armAsm->b(&skip_event_check, lt); 436 437 g_run_events_and_dispatch = armAsm->GetCursorAddress<const void*>(); 438 armEmitCall(armAsm, reinterpret_cast<const void*>(&TimingEvents::RunEvents), true); 439 440 armAsm->bind(&skip_event_check); 441 } 442 443 // TODO: align? 444 g_dispatcher = armAsm->GetCursorAddress<const void*>(); 445 { 446 armAsm->bind(&dispatch); 447 448 // x9 <- s_fast_map[pc >> 16] 449 armAsm->ldr(RWARG1, PTR(&g_state.pc)); 450 armMoveAddressToReg(armAsm, RXARG3, g_code_lut.data()); 451 armAsm->lsr(RWARG2, RWARG1, 16); 452 armAsm->lsr(RWARG1, RWARG1, 2); 453 armAsm->ldr(RXARG2, MemOperand(RXARG3, RXARG2, LSL, 3)); 454 455 // blr(x9[pc * 2]) (fast_map[pc >> 2]) 456 armAsm->ldr(RXARG1, MemOperand(RXARG2, RXARG1, LSL, 3)); 457 armAsm->blr(RXARG1); 458 } 459 460 g_compile_or_revalidate_block = armAsm->GetCursorAddress<const void*>(); 461 { 462 armAsm->ldr(RWARG1, PTR(&g_state.pc)); 463 armEmitCall(armAsm, reinterpret_cast<const void*>(&CompileOrRevalidateBlock), true); 464 armAsm->b(&dispatch); 465 } 466 467 g_discard_and_recompile_block = armAsm->GetCursorAddress<const void*>(); 468 { 469 armAsm->ldr(RWARG1, PTR(&g_state.pc)); 470 armEmitCall(armAsm, reinterpret_cast<const void*>(&DiscardAndRecompileBlock), true); 471 armAsm->b(&dispatch); 472 } 473 474 g_interpret_block = armAsm->GetCursorAddress<const void*>(); 475 { 476 armEmitCall(armAsm, reinterpret_cast<const void*>(GetInterpretUncachedBlockFunction()), true); 477 armAsm->b(&dispatch); 478 } 479 480 armAsm->FinalizeCode(); 481 482 // TODO: align? 483 s_trampoline_targets.clear(); 484 s_trampoline_start_ptr = static_cast<u8*>(code) + armAsm->GetCursorOffset(); 485 s_trampoline_used = 0; 486 487 #undef PTR 488 return static_cast<u32>(armAsm->GetCursorOffset()) + TRAMPOLINE_AREA_SIZE; 489 } 490 491 namespace CPU::Recompiler { 492 493 constexpr HostReg RCPUPTR = 19; 494 constexpr HostReg RMEMBASEPTR = 20; 495 constexpr HostReg RRETURN = 0; 496 constexpr HostReg RARG1 = 0; 497 constexpr HostReg RARG2 = 1; 498 constexpr HostReg RARG3 = 2; 499 constexpr HostReg RARG4 = 3; 500 constexpr HostReg RSCRATCH = 8; 501 502 static const a64::WRegister GetHostReg8(HostReg reg) 503 { 504 return a64::WRegister(reg); 505 } 506 507 static const a64::WRegister GetHostReg8(const Value& value) 508 { 509 DebugAssert(value.size == RegSize_8 && value.IsInHostRegister()); 510 return a64::WRegister(value.host_reg); 511 } 512 513 static const a64::WRegister GetHostReg16(HostReg reg) 514 { 515 return a64::WRegister(reg); 516 } 517 518 static const a64::WRegister GetHostReg16(const Value& value) 519 { 520 DebugAssert(value.size == RegSize_16 && value.IsInHostRegister()); 521 return a64::WRegister(value.host_reg); 522 } 523 524 static const a64::WRegister GetHostReg32(HostReg reg) 525 { 526 return a64::WRegister(reg); 527 } 528 529 static const a64::WRegister GetHostReg32(const Value& value) 530 { 531 DebugAssert(value.size == RegSize_32 && value.IsInHostRegister()); 532 return a64::WRegister(value.host_reg); 533 } 534 535 static const a64::XRegister GetHostReg64(HostReg reg) 536 { 537 return a64::XRegister(reg); 538 } 539 540 static const a64::XRegister GetHostReg64(const Value& value) 541 { 542 DebugAssert(value.size == RegSize_64 && value.IsInHostRegister()); 543 return a64::XRegister(value.host_reg); 544 } 545 546 static const a64::XRegister GetCPUPtrReg() 547 { 548 return GetHostReg64(RCPUPTR); 549 } 550 551 static const a64::XRegister GetFastmemBasePtrReg() 552 { 553 return GetHostReg64(RMEMBASEPTR); 554 } 555 556 CodeGenerator::CodeGenerator() 557 : m_register_cache(*this), m_near_emitter(static_cast<vixl::byte*>(CPU::CodeCache::GetFreeCodePointer()), 558 CPU::CodeCache::GetFreeCodeSpace(), a64::PositionDependentCode), 559 m_far_emitter(static_cast<vixl::byte*>(CPU::CodeCache::GetFreeFarCodePointer()), 560 CPU::CodeCache::GetFreeFarCodeSpace(), a64::PositionDependentCode), 561 m_emit(&m_near_emitter) 562 { 563 // remove the temporaries from vixl's list to prevent it from using them. 564 // eventually we won't use the macro assembler and this won't be a problem... 565 m_near_emitter.GetScratchRegisterList()->Remove(16); 566 m_near_emitter.GetScratchRegisterList()->Remove(17); 567 m_far_emitter.GetScratchRegisterList()->Remove(16); 568 m_far_emitter.GetScratchRegisterList()->Remove(17); 569 InitHostRegs(); 570 } 571 572 CodeGenerator::~CodeGenerator() = default; 573 574 const char* CodeGenerator::GetHostRegName(HostReg reg, RegSize size /*= HostPointerSize*/) 575 { 576 static constexpr std::array<const char*, HostReg_Count> reg32_names = { 577 {"w0", "w1", "w2", "w3", "w4", "w5", "w6", "w7", "w8", "w9", "w10", "w11", "w12", "w13", "w14", "w15", 578 "w16", "w17", "w18", "w19", "w20", "w21", "w22", "w23", "w24", "w25", "w26", "w27", "w28", "w29", "w30", "w31"}}; 579 static constexpr std::array<const char*, HostReg_Count> reg64_names = { 580 {"x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15", 581 "x16", "x17", "x18", "x19", "x20", "x21", "x22", "x23", "x24", "x25", "x26", "x27", "x28", "x29", "x30", "x31"}}; 582 if (reg >= static_cast<HostReg>(HostReg_Count)) 583 return ""; 584 585 switch (size) 586 { 587 case RegSize_32: 588 return reg32_names[reg]; 589 case RegSize_64: 590 return reg64_names[reg]; 591 default: 592 return ""; 593 } 594 } 595 596 void CodeGenerator::InitHostRegs() 597 { 598 // TODO: function calls mess up the parameter registers if we use them.. fix it 599 // allocate nonvolatile before volatile 600 m_register_cache.SetHostRegAllocationOrder( 601 {19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 4, 5, 6, 7, 9, 10, 11, 12, 13, 14, 15, 16, 17}); 602 m_register_cache.SetCallerSavedHostRegs({0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17}); 603 m_register_cache.SetCalleeSavedHostRegs({19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 30}); 604 m_register_cache.SetCPUPtrHostReg(RCPUPTR); 605 } 606 607 void CodeGenerator::SwitchToFarCode() 608 { 609 m_emit = &m_far_emitter; 610 } 611 612 void CodeGenerator::SwitchToNearCode() 613 { 614 m_emit = &m_near_emitter; 615 } 616 617 void* CodeGenerator::GetStartNearCodePointer() const 618 { 619 return static_cast<u8*>(CPU::CodeCache::GetFreeCodePointer()); 620 } 621 622 void* CodeGenerator::GetCurrentNearCodePointer() const 623 { 624 return static_cast<u8*>(CPU::CodeCache::GetFreeCodePointer()) + m_near_emitter.GetCursorOffset(); 625 } 626 627 void* CodeGenerator::GetCurrentFarCodePointer() const 628 { 629 return static_cast<u8*>(CPU::CodeCache::GetFreeFarCodePointer()) + m_far_emitter.GetCursorOffset(); 630 } 631 632 Value CodeGenerator::GetValueInHostRegister(const Value& value, bool allow_zero_register /* = true */) 633 { 634 if (value.IsInHostRegister()) 635 return Value::FromHostReg(&m_register_cache, value.host_reg, value.size); 636 637 if (value.HasConstantValue(0) && allow_zero_register) 638 return Value::FromHostReg(&m_register_cache, static_cast<HostReg>(31), value.size); 639 640 Value new_value = m_register_cache.AllocateScratch(value.size); 641 EmitCopyValue(new_value.host_reg, value); 642 return new_value; 643 } 644 645 Value CodeGenerator::GetValueInHostOrScratchRegister(const Value& value, bool allow_zero_register /* = true */) 646 { 647 if (value.IsInHostRegister()) 648 return Value::FromHostReg(&m_register_cache, value.host_reg, value.size); 649 650 if (value.HasConstantValue(0) && allow_zero_register) 651 return Value::FromHostReg(&m_register_cache, static_cast<HostReg>(31), value.size); 652 653 Value new_value = Value::FromHostReg(&m_register_cache, RSCRATCH, value.size); 654 EmitCopyValue(new_value.host_reg, value); 655 return new_value; 656 } 657 658 void CodeGenerator::EmitBeginBlock(bool allocate_registers /* = true */) 659 { 660 if (allocate_registers) 661 { 662 // Save the link register, since we'll be calling functions. 663 const bool link_reg_allocated = m_register_cache.AllocateHostReg(30); 664 DebugAssert(link_reg_allocated); 665 UNREFERENCED_VARIABLE(link_reg_allocated); 666 667 m_register_cache.AssumeCalleeSavedRegistersAreSaved(); 668 669 // Store the CPU struct pointer. TODO: make this better. 670 const bool cpu_reg_allocated = m_register_cache.AllocateHostReg(RCPUPTR); 671 DebugAssert(cpu_reg_allocated); 672 UNREFERENCED_VARIABLE(cpu_reg_allocated); 673 674 // If there's loadstore instructions, preload the fastmem base. 675 if (m_block->HasFlag(CodeCache::BlockFlags::ContainsLoadStoreInstructions)) 676 { 677 const bool fastmem_reg_allocated = m_register_cache.AllocateHostReg(RMEMBASEPTR); 678 Assert(fastmem_reg_allocated); 679 m_emit->Ldr(GetFastmemBasePtrReg(), a64::MemOperand(GetCPUPtrReg(), OFFSETOF(State, fastmem_base))); 680 } 681 } 682 } 683 684 void CodeGenerator::EmitEndBlock(bool free_registers, const void* jump_to) 685 { 686 if (free_registers) 687 { 688 if (m_block->HasFlag(CodeCache::BlockFlags::ContainsLoadStoreInstructions)) 689 m_register_cache.FreeHostReg(RMEMBASEPTR); 690 691 m_register_cache.FreeHostReg(RCPUPTR); 692 m_register_cache.FreeHostReg(30); // lr 693 694 m_register_cache.PopCalleeSavedRegisters(true); 695 } 696 697 if (jump_to) 698 armEmitJmp(m_emit, jump_to, true); 699 } 700 701 void CodeGenerator::EmitExceptionExit() 702 { 703 // ensure all unflushed registers are written back 704 m_register_cache.FlushAllGuestRegisters(false, false); 705 706 // the interpreter load delay might have its own value, but we'll overwrite it here anyway 707 // technically RaiseException() and FlushPipeline() have already been called, but that should be okay 708 m_register_cache.FlushLoadDelay(false); 709 710 m_register_cache.PopCalleeSavedRegisters(false); 711 712 armEmitJmp(m_emit, CodeCache::g_check_events_and_dispatch, true); 713 } 714 715 void CodeGenerator::EmitExceptionExitOnBool(const Value& value) 716 { 717 Assert(!value.IsConstant() && value.IsInHostRegister()); 718 719 m_register_cache.PushState(); 720 721 // TODO: This is... not great. 722 a64::Label skip_branch; 723 m_emit->Cbz(GetHostReg64(value.host_reg), &skip_branch); 724 EmitBranch(GetCurrentFarCodePointer()); 725 m_emit->Bind(&skip_branch); 726 727 SwitchToFarCode(); 728 EmitExceptionExit(); 729 SwitchToNearCode(); 730 731 m_register_cache.PopState(); 732 } 733 734 const void* CodeGenerator::FinalizeBlock(u32* out_host_code_size, u32* out_host_far_code_size) 735 { 736 m_near_emitter.FinalizeCode(); 737 m_far_emitter.FinalizeCode(); 738 739 const void* code = CPU::CodeCache::GetFreeCodePointer(); 740 *out_host_code_size = static_cast<u32>(m_near_emitter.GetSizeOfCodeGenerated()); 741 *out_host_far_code_size = static_cast<u32>(m_far_emitter.GetSizeOfCodeGenerated()); 742 743 CPU::CodeCache::CommitCode(static_cast<u32>(m_near_emitter.GetSizeOfCodeGenerated())); 744 CPU::CodeCache::CommitFarCode(static_cast<u32>(m_far_emitter.GetSizeOfCodeGenerated())); 745 746 m_near_emitter.Reset(); 747 m_far_emitter.Reset(); 748 749 return code; 750 } 751 752 void CodeGenerator::EmitSignExtend(HostReg to_reg, RegSize to_size, HostReg from_reg, RegSize from_size) 753 { 754 switch (to_size) 755 { 756 case RegSize_16: 757 { 758 switch (from_size) 759 { 760 case RegSize_8: 761 m_emit->sxtb(GetHostReg16(to_reg), GetHostReg8(from_reg)); 762 m_emit->and_(GetHostReg16(to_reg), GetHostReg16(to_reg), 0xFFFF); 763 return; 764 765 default: 766 break; 767 } 768 } 769 break; 770 771 case RegSize_32: 772 { 773 switch (from_size) 774 { 775 case RegSize_8: 776 m_emit->sxtb(GetHostReg32(to_reg), GetHostReg8(from_reg)); 777 return; 778 case RegSize_16: 779 m_emit->sxth(GetHostReg32(to_reg), GetHostReg16(from_reg)); 780 return; 781 782 default: 783 break; 784 } 785 } 786 break; 787 788 default: 789 break; 790 } 791 792 Panic("Unknown sign-extend combination"); 793 } 794 795 void CodeGenerator::EmitZeroExtend(HostReg to_reg, RegSize to_size, HostReg from_reg, RegSize from_size) 796 { 797 switch (to_size) 798 { 799 case RegSize_16: 800 { 801 switch (from_size) 802 { 803 case RegSize_8: 804 m_emit->and_(GetHostReg16(to_reg), GetHostReg8(from_reg), 0xFF); 805 return; 806 807 default: 808 break; 809 } 810 } 811 break; 812 813 case RegSize_32: 814 { 815 switch (from_size) 816 { 817 case RegSize_8: 818 m_emit->and_(GetHostReg32(to_reg), GetHostReg8(from_reg), 0xFF); 819 return; 820 case RegSize_16: 821 m_emit->and_(GetHostReg32(to_reg), GetHostReg16(from_reg), 0xFFFF); 822 return; 823 824 default: 825 break; 826 } 827 } 828 break; 829 830 default: 831 break; 832 } 833 834 Panic("Unknown sign-extend combination"); 835 } 836 837 void CodeGenerator::EmitCopyValue(HostReg to_reg, const Value& value) 838 { 839 // TODO: mov x, 0 -> xor x, x 840 DebugAssert(value.IsConstant() || value.IsInHostRegister()); 841 842 switch (value.size) 843 { 844 case RegSize_8: 845 case RegSize_16: 846 case RegSize_32: 847 { 848 if (value.IsConstant()) 849 m_emit->Mov(GetHostReg32(to_reg), value.constant_value); 850 else 851 m_emit->Mov(GetHostReg32(to_reg), GetHostReg32(value.host_reg)); 852 } 853 break; 854 855 case RegSize_64: 856 { 857 if (value.IsConstant()) 858 m_emit->Mov(GetHostReg64(to_reg), value.constant_value); 859 else 860 m_emit->Mov(GetHostReg64(to_reg), GetHostReg64(value.host_reg)); 861 } 862 break; 863 864 default: 865 UnreachableCode(); 866 break; 867 } 868 } 869 870 void CodeGenerator::EmitAdd(HostReg to_reg, HostReg from_reg, const Value& value, bool set_flags) 871 { 872 Assert(value.IsConstant() || value.IsInHostRegister()); 873 874 // if it's in a host register already, this is easy 875 if (value.IsInHostRegister()) 876 { 877 if (value.size < RegSize_64) 878 { 879 if (set_flags) 880 m_emit->adds(GetHostReg32(to_reg), GetHostReg32(from_reg), GetHostReg32(value.host_reg)); 881 else 882 m_emit->add(GetHostReg32(to_reg), GetHostReg32(from_reg), GetHostReg32(value.host_reg)); 883 } 884 else 885 { 886 if (set_flags) 887 m_emit->adds(GetHostReg64(to_reg), GetHostReg64(from_reg), GetHostReg64(value.host_reg)); 888 else 889 m_emit->add(GetHostReg64(to_reg), GetHostReg64(from_reg), GetHostReg64(value.host_reg)); 890 } 891 892 return; 893 } 894 895 // do we need temporary storage for the constant, if it won't fit in an immediate? 896 const s64 constant_value = value.GetS64ConstantValue(); 897 if (a64::Assembler::IsImmAddSub(constant_value)) 898 { 899 if (value.size < RegSize_64) 900 { 901 if (set_flags) 902 m_emit->adds(GetHostReg32(to_reg), GetHostReg32(from_reg), constant_value); 903 else 904 m_emit->add(GetHostReg32(to_reg), GetHostReg32(from_reg), constant_value); 905 } 906 else 907 { 908 if (set_flags) 909 m_emit->adds(GetHostReg64(to_reg), GetHostReg64(from_reg), constant_value); 910 else 911 m_emit->add(GetHostReg64(to_reg), GetHostReg64(from_reg), constant_value); 912 } 913 914 return; 915 } 916 917 // need a temporary 918 Assert(from_reg != RSCRATCH); 919 Value temp_value(Value::FromHostReg(&m_register_cache, RSCRATCH, value.size)); 920 if (value.size < RegSize_64) 921 m_emit->Mov(GetHostReg32(temp_value.host_reg), constant_value); 922 else 923 m_emit->Mov(GetHostReg64(temp_value.host_reg), constant_value); 924 EmitAdd(to_reg, from_reg, temp_value, set_flags); 925 } 926 927 void CodeGenerator::EmitSub(HostReg to_reg, HostReg from_reg, const Value& value, bool set_flags) 928 { 929 Assert(value.IsConstant() || value.IsInHostRegister()); 930 931 // if it's in a host register already, this is easy 932 if (value.IsInHostRegister()) 933 { 934 if (value.size < RegSize_64) 935 { 936 if (set_flags) 937 m_emit->subs(GetHostReg32(to_reg), GetHostReg32(from_reg), GetHostReg32(value.host_reg)); 938 else 939 m_emit->sub(GetHostReg32(to_reg), GetHostReg32(from_reg), GetHostReg32(value.host_reg)); 940 } 941 else 942 { 943 if (set_flags) 944 m_emit->subs(GetHostReg64(to_reg), GetHostReg64(from_reg), GetHostReg64(value.host_reg)); 945 else 946 m_emit->sub(GetHostReg64(to_reg), GetHostReg64(from_reg), GetHostReg64(value.host_reg)); 947 } 948 949 return; 950 } 951 952 // do we need temporary storage for the constant, if it won't fit in an immediate? 953 const s64 constant_value = value.GetS64ConstantValue(); 954 if (a64::Assembler::IsImmAddSub(value.constant_value)) 955 { 956 if (value.size < RegSize_64) 957 { 958 if (set_flags) 959 m_emit->subs(GetHostReg32(to_reg), GetHostReg32(from_reg), constant_value); 960 else 961 m_emit->sub(GetHostReg32(to_reg), GetHostReg32(from_reg), constant_value); 962 } 963 else 964 { 965 if (set_flags) 966 m_emit->subs(GetHostReg64(to_reg), GetHostReg64(from_reg), constant_value); 967 else 968 m_emit->sub(GetHostReg64(to_reg), GetHostReg64(from_reg), constant_value); 969 } 970 971 return; 972 } 973 974 // need a temporary 975 Assert(from_reg != RSCRATCH); 976 Value temp_value(Value::FromHostReg(&m_register_cache, RSCRATCH, value.size)); 977 if (value.size < RegSize_64) 978 m_emit->Mov(GetHostReg32(temp_value.host_reg), constant_value); 979 else 980 m_emit->Mov(GetHostReg64(temp_value.host_reg), constant_value); 981 EmitSub(to_reg, from_reg, temp_value, set_flags); 982 } 983 984 void CodeGenerator::EmitCmp(HostReg to_reg, const Value& value) 985 { 986 Assert(value.IsConstant() || value.IsInHostRegister()); 987 988 // if it's in a host register already, this is easy 989 if (value.IsInHostRegister()) 990 { 991 if (value.size < RegSize_64) 992 m_emit->cmp(GetHostReg32(to_reg), GetHostReg32(value.host_reg)); 993 else 994 m_emit->cmp(GetHostReg64(to_reg), GetHostReg64(value.host_reg)); 995 996 return; 997 } 998 999 // do we need temporary storage for the constant, if it won't fit in an immediate? 1000 const s64 constant_value = value.GetS64ConstantValue(); 1001 if (constant_value >= 0) 1002 { 1003 if (a64::Assembler::IsImmAddSub(constant_value)) 1004 { 1005 if (value.size < RegSize_64) 1006 m_emit->cmp(GetHostReg32(to_reg), constant_value); 1007 else 1008 m_emit->cmp(GetHostReg64(to_reg), constant_value); 1009 1010 return; 1011 } 1012 } 1013 else 1014 { 1015 if (a64::Assembler::IsImmAddSub(-constant_value)) 1016 { 1017 if (value.size < RegSize_64) 1018 m_emit->cmn(GetHostReg32(to_reg), -constant_value); 1019 else 1020 m_emit->cmn(GetHostReg64(to_reg), -constant_value); 1021 1022 return; 1023 } 1024 } 1025 1026 // need a temporary 1027 Assert(to_reg != RSCRATCH); 1028 Value temp_value(Value::FromHostReg(&m_register_cache, RSCRATCH, value.size)); 1029 if (value.size < RegSize_64) 1030 m_emit->Mov(GetHostReg32(temp_value.host_reg), constant_value); 1031 else 1032 m_emit->Mov(GetHostReg64(temp_value.host_reg), constant_value); 1033 EmitCmp(to_reg, temp_value); 1034 } 1035 1036 void CodeGenerator::EmitMul(HostReg to_reg_hi, HostReg to_reg_lo, const Value& lhs, const Value& rhs, 1037 bool signed_multiply) 1038 { 1039 Value lhs_in_reg = GetValueInHostRegister(lhs); 1040 Value rhs_in_reg = GetValueInHostRegister(rhs); 1041 1042 if (lhs.size < RegSize_64) 1043 { 1044 if (signed_multiply) 1045 { 1046 m_emit->smull(GetHostReg64(to_reg_lo), GetHostReg32(lhs_in_reg.host_reg), GetHostReg32(rhs_in_reg.host_reg)); 1047 m_emit->asr(GetHostReg64(to_reg_hi), GetHostReg64(to_reg_lo), 32); 1048 } 1049 else 1050 { 1051 m_emit->umull(GetHostReg64(to_reg_lo), GetHostReg32(lhs_in_reg.host_reg), GetHostReg32(rhs_in_reg.host_reg)); 1052 m_emit->lsr(GetHostReg64(to_reg_hi), GetHostReg64(to_reg_lo), 32); 1053 } 1054 } 1055 else 1056 { 1057 // TODO: Use mul + smulh 1058 Panic("Not implemented"); 1059 } 1060 } 1061 1062 void CodeGenerator::EmitDiv(HostReg to_reg_quotient, HostReg to_reg_remainder, HostReg num, HostReg denom, RegSize size, 1063 bool signed_divide) 1064 { 1065 // only 32-bit supported for now.. 1066 Assert(size == RegSize_32); 1067 1068 Value quotient_value; 1069 if (to_reg_quotient == HostReg_Count) 1070 { 1071 Assert(to_reg_quotient != RSCRATCH); 1072 quotient_value = Value::FromHostReg(&m_register_cache, RSCRATCH, size); 1073 } 1074 else 1075 { 1076 quotient_value.SetHostReg(&m_register_cache, to_reg_quotient, size); 1077 } 1078 1079 if (signed_divide) 1080 { 1081 m_emit->sdiv(GetHostReg32(quotient_value), GetHostReg32(num), GetHostReg32(denom)); 1082 if (to_reg_remainder != HostReg_Count) 1083 { 1084 m_emit->msub(GetHostReg32(to_reg_remainder), GetHostReg32(quotient_value), GetHostReg32(denom), 1085 GetHostReg32(num)); 1086 } 1087 } 1088 else 1089 { 1090 m_emit->udiv(GetHostReg32(quotient_value), GetHostReg32(num), GetHostReg32(denom)); 1091 if (to_reg_remainder != HostReg_Count) 1092 { 1093 m_emit->msub(GetHostReg32(to_reg_remainder), GetHostReg32(quotient_value), GetHostReg32(denom), 1094 GetHostReg32(num)); 1095 } 1096 } 1097 } 1098 1099 void CodeGenerator::EmitInc(HostReg to_reg, RegSize size) 1100 { 1101 Panic("Not implemented"); 1102 #if 0 1103 switch (size) 1104 { 1105 case RegSize_8: 1106 m_emit->inc(GetHostReg8(to_reg)); 1107 break; 1108 case RegSize_16: 1109 m_emit->inc(GetHostReg16(to_reg)); 1110 break; 1111 case RegSize_32: 1112 m_emit->inc(GetHostReg32(to_reg)); 1113 break; 1114 default: 1115 UnreachableCode(); 1116 break; 1117 } 1118 #endif 1119 } 1120 1121 void CodeGenerator::EmitDec(HostReg to_reg, RegSize size) 1122 { 1123 Panic("Not implemented"); 1124 #if 0 1125 switch (size) 1126 { 1127 case RegSize_8: 1128 m_emit->dec(GetHostReg8(to_reg)); 1129 break; 1130 case RegSize_16: 1131 m_emit->dec(GetHostReg16(to_reg)); 1132 break; 1133 case RegSize_32: 1134 m_emit->dec(GetHostReg32(to_reg)); 1135 break; 1136 default: 1137 UnreachableCode(); 1138 break; 1139 } 1140 #endif 1141 } 1142 1143 void CodeGenerator::EmitShl(HostReg to_reg, HostReg from_reg, RegSize size, const Value& amount_value, 1144 bool assume_amount_masked /* = true */) 1145 { 1146 switch (size) 1147 { 1148 case RegSize_8: 1149 case RegSize_16: 1150 case RegSize_32: 1151 { 1152 if (amount_value.IsConstant()) 1153 m_emit->lsl(GetHostReg32(to_reg), GetHostReg32(from_reg), amount_value.constant_value & 0x1F); 1154 else 1155 m_emit->lslv(GetHostReg32(to_reg), GetHostReg32(from_reg), GetHostReg32(amount_value)); 1156 1157 if (size == RegSize_8) 1158 m_emit->and_(GetHostReg32(to_reg), GetHostReg32(from_reg), 0xFF); 1159 else if (size == RegSize_16) 1160 m_emit->and_(GetHostReg32(to_reg), GetHostReg32(from_reg), 0xFFFF); 1161 } 1162 break; 1163 1164 case RegSize_64: 1165 { 1166 if (amount_value.IsConstant()) 1167 m_emit->lsl(GetHostReg64(to_reg), GetHostReg64(from_reg), amount_value.constant_value & 0x3F); 1168 else 1169 m_emit->lslv(GetHostReg64(to_reg), GetHostReg64(from_reg), GetHostReg64(amount_value)); 1170 } 1171 break; 1172 } 1173 } 1174 1175 void CodeGenerator::EmitShr(HostReg to_reg, HostReg from_reg, RegSize size, const Value& amount_value, 1176 bool assume_amount_masked /* = true */) 1177 { 1178 switch (size) 1179 { 1180 case RegSize_8: 1181 case RegSize_16: 1182 case RegSize_32: 1183 { 1184 if (amount_value.IsConstant()) 1185 m_emit->lsr(GetHostReg32(to_reg), GetHostReg32(from_reg), amount_value.constant_value & 0x1F); 1186 else 1187 m_emit->lsrv(GetHostReg32(to_reg), GetHostReg32(from_reg), GetHostReg32(amount_value)); 1188 1189 if (size == RegSize_8) 1190 m_emit->and_(GetHostReg32(to_reg), GetHostReg32(from_reg), 0xFF); 1191 else if (size == RegSize_16) 1192 m_emit->and_(GetHostReg32(to_reg), GetHostReg32(from_reg), 0xFFFF); 1193 } 1194 break; 1195 1196 case RegSize_64: 1197 { 1198 if (amount_value.IsConstant()) 1199 m_emit->lsr(GetHostReg64(to_reg), GetHostReg64(to_reg), amount_value.constant_value & 0x3F); 1200 else 1201 m_emit->lsrv(GetHostReg64(to_reg), GetHostReg64(to_reg), GetHostReg64(amount_value)); 1202 } 1203 break; 1204 } 1205 } 1206 1207 void CodeGenerator::EmitSar(HostReg to_reg, HostReg from_reg, RegSize size, const Value& amount_value, 1208 bool assume_amount_masked /* = true */) 1209 { 1210 switch (size) 1211 { 1212 case RegSize_8: 1213 case RegSize_16: 1214 case RegSize_32: 1215 { 1216 if (amount_value.IsConstant()) 1217 m_emit->asr(GetHostReg32(to_reg), GetHostReg32(from_reg), amount_value.constant_value & 0x1F); 1218 else 1219 m_emit->asrv(GetHostReg32(to_reg), GetHostReg32(from_reg), GetHostReg32(amount_value)); 1220 1221 if (size == RegSize_8) 1222 m_emit->and_(GetHostReg32(to_reg), GetHostReg32(from_reg), 0xFF); 1223 else if (size == RegSize_16) 1224 m_emit->and_(GetHostReg32(to_reg), GetHostReg32(from_reg), 0xFFFF); 1225 } 1226 break; 1227 1228 case RegSize_64: 1229 { 1230 if (amount_value.IsConstant()) 1231 m_emit->asr(GetHostReg64(to_reg), GetHostReg64(from_reg), amount_value.constant_value & 0x3F); 1232 else 1233 m_emit->asrv(GetHostReg64(to_reg), GetHostReg64(from_reg), GetHostReg64(amount_value)); 1234 } 1235 break; 1236 } 1237 } 1238 1239 static bool CanFitInBitwiseImmediate(const Value& value) 1240 { 1241 const unsigned reg_size = (value.size < RegSize_64) ? 32 : 64; 1242 unsigned n, imm_s, imm_r; 1243 return a64::Assembler::IsImmLogical(s64(value.constant_value), reg_size, &n, &imm_s, &imm_r); 1244 } 1245 1246 void CodeGenerator::EmitAnd(HostReg to_reg, HostReg from_reg, const Value& value) 1247 { 1248 Assert(value.IsConstant() || value.IsInHostRegister()); 1249 1250 // if it's in a host register already, this is easy 1251 if (value.IsInHostRegister()) 1252 { 1253 if (value.size < RegSize_64) 1254 m_emit->and_(GetHostReg32(to_reg), GetHostReg32(from_reg), GetHostReg32(value.host_reg)); 1255 else 1256 m_emit->and_(GetHostReg64(to_reg), GetHostReg64(from_reg), GetHostReg64(value.host_reg)); 1257 1258 return; 1259 } 1260 1261 // do we need temporary storage for the constant, if it won't fit in an immediate? 1262 if (CanFitInBitwiseImmediate(value)) 1263 { 1264 if (value.size < RegSize_64) 1265 m_emit->and_(GetHostReg32(to_reg), GetHostReg32(from_reg), s64(value.constant_value)); 1266 else 1267 m_emit->and_(GetHostReg64(to_reg), GetHostReg64(from_reg), s64(value.constant_value)); 1268 1269 return; 1270 } 1271 1272 // need a temporary 1273 Assert(from_reg != RSCRATCH); 1274 Value temp_value(Value::FromHostReg(&m_register_cache, RSCRATCH, value.size)); 1275 if (value.size < RegSize_64) 1276 m_emit->Mov(GetHostReg32(temp_value.host_reg), s64(value.constant_value)); 1277 else 1278 m_emit->Mov(GetHostReg64(temp_value.host_reg), s64(value.constant_value)); 1279 EmitAnd(to_reg, from_reg, temp_value); 1280 } 1281 1282 void CodeGenerator::EmitOr(HostReg to_reg, HostReg from_reg, const Value& value) 1283 { 1284 Assert(value.IsConstant() || value.IsInHostRegister()); 1285 1286 // if it's in a host register already, this is easy 1287 if (value.IsInHostRegister()) 1288 { 1289 if (value.size < RegSize_64) 1290 m_emit->orr(GetHostReg32(to_reg), GetHostReg32(from_reg), GetHostReg32(value.host_reg)); 1291 else 1292 m_emit->orr(GetHostReg64(to_reg), GetHostReg64(from_reg), GetHostReg64(value.host_reg)); 1293 1294 return; 1295 } 1296 1297 // do we need temporary storage for the constant, if it won't fit in an immediate? 1298 if (CanFitInBitwiseImmediate(value)) 1299 { 1300 if (value.size < RegSize_64) 1301 m_emit->orr(GetHostReg32(to_reg), GetHostReg32(from_reg), s64(value.constant_value)); 1302 else 1303 m_emit->orr(GetHostReg64(to_reg), GetHostReg64(from_reg), s64(value.constant_value)); 1304 1305 return; 1306 } 1307 1308 // need a temporary 1309 Assert(from_reg != RSCRATCH); 1310 Value temp_value(Value::FromHostReg(&m_register_cache, RSCRATCH, value.size)); 1311 if (value.size < RegSize_64) 1312 m_emit->Mov(GetHostReg32(temp_value.host_reg), s64(value.constant_value)); 1313 else 1314 m_emit->Mov(GetHostReg64(temp_value.host_reg), s64(value.constant_value)); 1315 EmitOr(to_reg, from_reg, temp_value); 1316 } 1317 1318 void CodeGenerator::EmitXor(HostReg to_reg, HostReg from_reg, const Value& value) 1319 { 1320 Assert(value.IsConstant() || value.IsInHostRegister()); 1321 1322 // if it's in a host register already, this is easy 1323 if (value.IsInHostRegister()) 1324 { 1325 if (value.size < RegSize_64) 1326 m_emit->eor(GetHostReg32(to_reg), GetHostReg32(from_reg), GetHostReg32(value.host_reg)); 1327 else 1328 m_emit->eor(GetHostReg64(to_reg), GetHostReg64(from_reg), GetHostReg64(value.host_reg)); 1329 1330 return; 1331 } 1332 1333 // do we need temporary storage for the constant, if it won't fit in an immediate? 1334 if (CanFitInBitwiseImmediate(value)) 1335 { 1336 if (value.size < RegSize_64) 1337 m_emit->eor(GetHostReg32(to_reg), GetHostReg32(from_reg), s64(value.constant_value)); 1338 else 1339 m_emit->eor(GetHostReg64(to_reg), GetHostReg64(from_reg), s64(value.constant_value)); 1340 1341 return; 1342 } 1343 1344 // need a temporary 1345 Assert(from_reg != RSCRATCH); 1346 Value temp_value(Value::FromHostReg(&m_register_cache, RSCRATCH, value.size)); 1347 if (value.size < RegSize_64) 1348 m_emit->Mov(GetHostReg32(temp_value.host_reg), s64(value.constant_value)); 1349 else 1350 m_emit->Mov(GetHostReg64(temp_value.host_reg), s64(value.constant_value)); 1351 EmitXor(to_reg, from_reg, temp_value); 1352 } 1353 1354 void CodeGenerator::EmitTest(HostReg to_reg, const Value& value) 1355 { 1356 Assert(value.IsConstant() || value.IsInHostRegister()); 1357 1358 // if it's in a host register already, this is easy 1359 if (value.IsInHostRegister()) 1360 { 1361 if (value.size < RegSize_64) 1362 m_emit->tst(GetHostReg32(to_reg), GetHostReg32(value.host_reg)); 1363 else 1364 m_emit->tst(GetHostReg64(to_reg), GetHostReg64(value.host_reg)); 1365 1366 return; 1367 } 1368 1369 // do we need temporary storage for the constant, if it won't fit in an immediate? 1370 if (CanFitInBitwiseImmediate(value)) 1371 { 1372 if (value.size < RegSize_64) 1373 m_emit->tst(GetHostReg32(to_reg), s64(value.constant_value)); 1374 else 1375 m_emit->tst(GetHostReg64(to_reg), s64(value.constant_value)); 1376 1377 return; 1378 } 1379 1380 // need a temporary 1381 Assert(to_reg != RSCRATCH); 1382 Value temp_value(Value::FromHostReg(&m_register_cache, RSCRATCH, value.size)); 1383 if (value.size < RegSize_64) 1384 m_emit->Mov(GetHostReg32(temp_value.host_reg), s64(value.constant_value)); 1385 else 1386 m_emit->Mov(GetHostReg64(temp_value.host_reg), s64(value.constant_value)); 1387 EmitTest(to_reg, temp_value); 1388 } 1389 1390 void CodeGenerator::EmitNot(HostReg to_reg, RegSize size) 1391 { 1392 switch (size) 1393 { 1394 case RegSize_8: 1395 m_emit->mvn(GetHostReg8(to_reg), GetHostReg8(to_reg)); 1396 m_emit->and_(GetHostReg8(to_reg), GetHostReg8(to_reg), 0xFF); 1397 break; 1398 1399 case RegSize_16: 1400 m_emit->mvn(GetHostReg16(to_reg), GetHostReg16(to_reg)); 1401 m_emit->and_(GetHostReg16(to_reg), GetHostReg16(to_reg), 0xFFFF); 1402 break; 1403 1404 case RegSize_32: 1405 m_emit->mvn(GetHostReg32(to_reg), GetHostReg32(to_reg)); 1406 break; 1407 1408 case RegSize_64: 1409 m_emit->mvn(GetHostReg64(to_reg), GetHostReg64(to_reg)); 1410 break; 1411 1412 default: 1413 break; 1414 } 1415 } 1416 1417 void CodeGenerator::EmitSetConditionResult(HostReg to_reg, RegSize to_size, Condition condition) 1418 { 1419 if (condition == Condition::Always) 1420 { 1421 if (to_size < RegSize_64) 1422 m_emit->Mov(GetHostReg32(to_reg), 1); 1423 else 1424 m_emit->Mov(GetHostReg64(to_reg), 1); 1425 1426 return; 1427 } 1428 1429 a64::Condition acond; 1430 switch (condition) 1431 { 1432 case Condition::NotEqual: 1433 acond = a64::ne; 1434 break; 1435 1436 case Condition::Equal: 1437 acond = a64::eq; 1438 break; 1439 1440 case Condition::Overflow: 1441 acond = a64::vs; 1442 break; 1443 1444 case Condition::Greater: 1445 acond = a64::gt; 1446 break; 1447 1448 case Condition::GreaterEqual: 1449 acond = a64::ge; 1450 break; 1451 1452 case Condition::Less: 1453 acond = a64::lt; 1454 break; 1455 1456 case Condition::LessEqual: 1457 acond = a64::le; 1458 break; 1459 1460 case Condition::Negative: 1461 acond = a64::mi; 1462 break; 1463 1464 case Condition::PositiveOrZero: 1465 acond = a64::pl; 1466 break; 1467 1468 case Condition::Above: 1469 acond = a64::hi; 1470 break; 1471 1472 case Condition::AboveEqual: 1473 acond = a64::cs; 1474 break; 1475 1476 case Condition::Below: 1477 acond = a64::cc; 1478 break; 1479 1480 case Condition::BelowEqual: 1481 acond = a64::ls; 1482 break; 1483 1484 default: 1485 UnreachableCode(); 1486 return; 1487 } 1488 1489 if (to_size < RegSize_64) 1490 m_emit->cset(GetHostReg32(to_reg), acond); 1491 else 1492 m_emit->cset(GetHostReg64(to_reg), acond); 1493 } 1494 1495 u32 CodeGenerator::PrepareStackForCall() 1496 { 1497 m_register_cache.PushCallerSavedRegisters(); 1498 return 0; 1499 } 1500 1501 void CodeGenerator::RestoreStackAfterCall(u32 adjust_size) 1502 { 1503 m_register_cache.PopCallerSavedRegisters(); 1504 } 1505 1506 void CodeGenerator::EmitCall(const void* ptr) 1507 { 1508 armEmitCall(m_emit, ptr, false); 1509 } 1510 1511 void CodeGenerator::EmitFunctionCallPtr(Value* return_value, const void* ptr) 1512 { 1513 if (return_value) 1514 return_value->Discard(); 1515 1516 // shadow space allocate 1517 const u32 adjust_size = PrepareStackForCall(); 1518 1519 // actually call the function 1520 EmitCall(ptr); 1521 1522 // shadow space release 1523 RestoreStackAfterCall(adjust_size); 1524 1525 // copy out return value if requested 1526 if (return_value) 1527 { 1528 return_value->Undiscard(); 1529 EmitCopyValue(return_value->GetHostRegister(), Value::FromHostReg(&m_register_cache, RRETURN, return_value->size)); 1530 } 1531 } 1532 1533 void CodeGenerator::EmitFunctionCallPtr(Value* return_value, const void* ptr, const Value& arg1) 1534 { 1535 if (return_value) 1536 return_value->Discard(); 1537 1538 // shadow space allocate 1539 const u32 adjust_size = PrepareStackForCall(); 1540 1541 // push arguments 1542 EmitCopyValue(RARG1, arg1); 1543 1544 // actually call the function 1545 EmitCall(ptr); 1546 1547 // shadow space release 1548 RestoreStackAfterCall(adjust_size); 1549 1550 // copy out return value if requested 1551 if (return_value) 1552 { 1553 return_value->Undiscard(); 1554 EmitCopyValue(return_value->GetHostRegister(), Value::FromHostReg(&m_register_cache, RRETURN, return_value->size)); 1555 } 1556 } 1557 1558 void CodeGenerator::EmitFunctionCallPtr(Value* return_value, const void* ptr, const Value& arg1, const Value& arg2) 1559 { 1560 if (return_value) 1561 return_value->Discard(); 1562 1563 // shadow space allocate 1564 const u32 adjust_size = PrepareStackForCall(); 1565 1566 // push arguments 1567 EmitCopyValue(RARG1, arg1); 1568 EmitCopyValue(RARG2, arg2); 1569 1570 // actually call the function 1571 EmitCall(ptr); 1572 1573 // shadow space release 1574 RestoreStackAfterCall(adjust_size); 1575 1576 // copy out return value if requested 1577 if (return_value) 1578 { 1579 return_value->Undiscard(); 1580 EmitCopyValue(return_value->GetHostRegister(), Value::FromHostReg(&m_register_cache, RRETURN, return_value->size)); 1581 } 1582 } 1583 1584 void CodeGenerator::EmitFunctionCallPtr(Value* return_value, const void* ptr, const Value& arg1, const Value& arg2, 1585 const Value& arg3) 1586 { 1587 if (return_value) 1588 m_register_cache.DiscardHostReg(return_value->GetHostRegister()); 1589 1590 // shadow space allocate 1591 const u32 adjust_size = PrepareStackForCall(); 1592 1593 // push arguments 1594 EmitCopyValue(RARG1, arg1); 1595 EmitCopyValue(RARG2, arg2); 1596 EmitCopyValue(RARG3, arg3); 1597 1598 // actually call the function 1599 EmitCall(ptr); 1600 1601 // shadow space release 1602 RestoreStackAfterCall(adjust_size); 1603 1604 // copy out return value if requested 1605 if (return_value) 1606 { 1607 return_value->Undiscard(); 1608 EmitCopyValue(return_value->GetHostRegister(), Value::FromHostReg(&m_register_cache, RRETURN, return_value->size)); 1609 } 1610 } 1611 1612 void CodeGenerator::EmitFunctionCallPtr(Value* return_value, const void* ptr, const Value& arg1, const Value& arg2, 1613 const Value& arg3, const Value& arg4) 1614 { 1615 if (return_value) 1616 return_value->Discard(); 1617 1618 // shadow space allocate 1619 const u32 adjust_size = PrepareStackForCall(); 1620 1621 // push arguments 1622 EmitCopyValue(RARG1, arg1); 1623 EmitCopyValue(RARG2, arg2); 1624 EmitCopyValue(RARG3, arg3); 1625 EmitCopyValue(RARG4, arg4); 1626 1627 // actually call the function 1628 EmitCall(ptr); 1629 1630 // shadow space release 1631 RestoreStackAfterCall(adjust_size); 1632 1633 // copy out return value if requested 1634 if (return_value) 1635 { 1636 return_value->Undiscard(); 1637 EmitCopyValue(return_value->GetHostRegister(), Value::FromHostReg(&m_register_cache, RRETURN, return_value->size)); 1638 } 1639 } 1640 1641 void CodeGenerator::EmitPushHostReg(HostReg reg, u32 position) 1642 { 1643 const a64::MemOperand addr(a64::sp, FUNCTION_STACK_SIZE - (position * 8)); 1644 m_emit->str(GetHostReg64(reg), addr); 1645 } 1646 1647 void CodeGenerator::EmitPushHostRegPair(HostReg reg, HostReg reg2, u32 position) 1648 { 1649 const a64::MemOperand addr(a64::sp, FUNCTION_STACK_SIZE - ((position + 1) * 8)); 1650 m_emit->stp(GetHostReg64(reg2), GetHostReg64(reg), addr); 1651 } 1652 1653 void CodeGenerator::EmitPopHostReg(HostReg reg, u32 position) 1654 { 1655 const a64::MemOperand addr(a64::sp, FUNCTION_STACK_SIZE - (position * 8)); 1656 m_emit->ldr(GetHostReg64(reg), addr); 1657 } 1658 1659 void CodeGenerator::EmitPopHostRegPair(HostReg reg, HostReg reg2, u32 position) 1660 { 1661 const a64::MemOperand addr(a64::sp, FUNCTION_STACK_SIZE - (position * 8)); 1662 m_emit->ldp(GetHostReg64(reg2), GetHostReg64(reg), addr); 1663 } 1664 1665 void CodeGenerator::EmitLoadCPUStructField(HostReg host_reg, RegSize guest_size, u32 offset) 1666 { 1667 const s64 s_offset = static_cast<s64>(ZeroExtend64(offset)); 1668 1669 switch (guest_size) 1670 { 1671 case RegSize_8: 1672 m_emit->Ldrb(GetHostReg8(host_reg), a64::MemOperand(GetCPUPtrReg(), s_offset)); 1673 break; 1674 1675 case RegSize_16: 1676 m_emit->Ldrh(GetHostReg16(host_reg), a64::MemOperand(GetCPUPtrReg(), s_offset)); 1677 break; 1678 1679 case RegSize_32: 1680 m_emit->Ldr(GetHostReg32(host_reg), a64::MemOperand(GetCPUPtrReg(), s_offset)); 1681 break; 1682 1683 case RegSize_64: 1684 m_emit->Ldr(GetHostReg64(host_reg), a64::MemOperand(GetCPUPtrReg(), s_offset)); 1685 break; 1686 1687 default: 1688 { 1689 UnreachableCode(); 1690 } 1691 break; 1692 } 1693 } 1694 1695 void CodeGenerator::EmitStoreCPUStructField(u32 offset, const Value& value) 1696 { 1697 const Value hr_value = GetValueInHostRegister(value); 1698 const s64 s_offset = static_cast<s64>(ZeroExtend64(offset)); 1699 1700 switch (value.size) 1701 { 1702 case RegSize_8: 1703 m_emit->Strb(GetHostReg8(hr_value), a64::MemOperand(GetCPUPtrReg(), s_offset)); 1704 break; 1705 1706 case RegSize_16: 1707 m_emit->Strh(GetHostReg16(hr_value), a64::MemOperand(GetCPUPtrReg(), s_offset)); 1708 break; 1709 1710 case RegSize_32: 1711 m_emit->Str(GetHostReg32(hr_value), a64::MemOperand(GetCPUPtrReg(), s_offset)); 1712 break; 1713 1714 case RegSize_64: 1715 m_emit->Str(GetHostReg64(hr_value), a64::MemOperand(GetCPUPtrReg(), s_offset)); 1716 break; 1717 1718 default: 1719 { 1720 UnreachableCode(); 1721 } 1722 break; 1723 } 1724 } 1725 1726 void CodeGenerator::EmitAddCPUStructField(u32 offset, const Value& value) 1727 { 1728 const s64 s_offset = static_cast<s64>(ZeroExtend64(offset)); 1729 const a64::MemOperand o_offset(GetCPUPtrReg(), s_offset); 1730 1731 Value real_value; 1732 if (value.IsInHostRegister()) 1733 { 1734 real_value.SetHostReg(&m_register_cache, value.host_reg, value.size); 1735 } 1736 else 1737 { 1738 // do we need temporary storage for the constant, if it won't fit in an immediate? 1739 Assert(value.IsConstant()); 1740 const s64 constant_value = value.GetS64ConstantValue(); 1741 if (!a64::Assembler::IsImmAddSub(constant_value)) 1742 { 1743 real_value.SetHostReg(&m_register_cache, RARG4, value.size); 1744 EmitCopyValue(real_value.host_reg, value); 1745 } 1746 else 1747 { 1748 real_value = value; 1749 } 1750 } 1751 1752 // Don't need to mask here because we're storing back to memory. 1753 switch (value.size) 1754 { 1755 case RegSize_8: 1756 { 1757 m_emit->Ldrb(GetHostReg8(RSCRATCH), o_offset); 1758 if (real_value.IsConstant()) 1759 m_emit->Add(GetHostReg8(RSCRATCH), GetHostReg8(RSCRATCH), real_value.GetS64ConstantValue()); 1760 else 1761 m_emit->Add(GetHostReg8(RSCRATCH), GetHostReg8(RSCRATCH), GetHostReg8(real_value)); 1762 m_emit->Strb(GetHostReg8(RSCRATCH), o_offset); 1763 } 1764 break; 1765 1766 case RegSize_16: 1767 { 1768 m_emit->Ldrh(GetHostReg16(RSCRATCH), o_offset); 1769 if (real_value.IsConstant()) 1770 m_emit->Add(GetHostReg16(RSCRATCH), GetHostReg16(RSCRATCH), real_value.GetS64ConstantValue()); 1771 else 1772 m_emit->Add(GetHostReg16(RSCRATCH), GetHostReg16(RSCRATCH), GetHostReg16(real_value)); 1773 m_emit->Strh(GetHostReg16(RSCRATCH), o_offset); 1774 } 1775 break; 1776 1777 case RegSize_32: 1778 { 1779 m_emit->Ldr(GetHostReg32(RSCRATCH), o_offset); 1780 if (real_value.IsConstant()) 1781 m_emit->Add(GetHostReg32(RSCRATCH), GetHostReg32(RSCRATCH), real_value.GetS64ConstantValue()); 1782 else 1783 m_emit->Add(GetHostReg32(RSCRATCH), GetHostReg32(RSCRATCH), GetHostReg32(real_value)); 1784 m_emit->Str(GetHostReg32(RSCRATCH), o_offset); 1785 } 1786 break; 1787 1788 case RegSize_64: 1789 { 1790 m_emit->Ldr(GetHostReg64(RSCRATCH), o_offset); 1791 if (real_value.IsConstant()) 1792 m_emit->Add(GetHostReg64(RSCRATCH), GetHostReg64(RSCRATCH), s64(real_value.constant_value)); 1793 else 1794 m_emit->Add(GetHostReg64(RSCRATCH), GetHostReg64(RSCRATCH), GetHostReg64(real_value)); 1795 m_emit->Str(GetHostReg64(RSCRATCH), o_offset); 1796 } 1797 break; 1798 1799 default: 1800 { 1801 UnreachableCode(); 1802 } 1803 break; 1804 } 1805 } 1806 1807 void CodeGenerator::EmitLoadGuestRAMFastmem(const Value& address, RegSize size, Value& result) 1808 { 1809 HostReg address_reg; 1810 a64::MemOperand actual_address; 1811 if (address.IsConstant()) 1812 { 1813 m_emit->Mov(GetHostReg32(result.host_reg), address.constant_value); 1814 address_reg = result.host_reg; 1815 } 1816 else 1817 { 1818 address_reg = address.host_reg; 1819 } 1820 1821 if (g_settings.cpu_fastmem_mode == CPUFastmemMode::LUT) 1822 { 1823 m_emit->lsr(GetHostReg64(RARG1), GetHostReg32(address_reg), Bus::FASTMEM_LUT_PAGE_SHIFT); 1824 m_emit->ldr(GetHostReg64(RARG1), a64::MemOperand(GetFastmemBasePtrReg(), GetHostReg64(RARG1), a64::LSL, 3)); 1825 } 1826 1827 const a64::XRegister membase = 1828 (g_settings.cpu_fastmem_mode == CPUFastmemMode::LUT) ? GetHostReg64(RARG1) : GetFastmemBasePtrReg(); 1829 1830 switch (size) 1831 { 1832 case RegSize_8: 1833 m_emit->ldrb(GetHostReg32(result.host_reg), a64::MemOperand(membase, GetHostReg32(address_reg))); 1834 break; 1835 1836 case RegSize_16: 1837 m_emit->ldrh(GetHostReg32(result.host_reg), a64::MemOperand(membase, GetHostReg32(address_reg))); 1838 break; 1839 1840 case RegSize_32: 1841 m_emit->ldr(GetHostReg32(result.host_reg), a64::MemOperand(membase, GetHostReg32(address_reg))); 1842 break; 1843 1844 default: 1845 UnreachableCode(); 1846 break; 1847 } 1848 } 1849 1850 void CodeGenerator::EmitLoadGuestMemoryFastmem(Instruction instruction, const CodeCache::InstructionInfo& info, 1851 const Value& address, RegSize size, Value& result) 1852 { 1853 HostReg address_reg; 1854 if (address.IsConstant()) 1855 { 1856 m_emit->Mov(GetHostReg32(result.host_reg), address.constant_value); 1857 address_reg = result.host_reg; 1858 } 1859 else 1860 { 1861 address_reg = address.host_reg; 1862 } 1863 1864 if (g_settings.cpu_fastmem_mode == CPUFastmemMode::LUT) 1865 { 1866 m_emit->lsr(GetHostReg64(RARG1), GetHostReg32(address_reg), Bus::FASTMEM_LUT_PAGE_SHIFT); 1867 m_emit->ldr(GetHostReg64(RARG1), a64::MemOperand(GetFastmemBasePtrReg(), GetHostReg64(RARG1), a64::LSL, 3)); 1868 } 1869 1870 const a64::XRegister membase = 1871 (g_settings.cpu_fastmem_mode == CPUFastmemMode::LUT) ? GetHostReg64(RARG1) : GetFastmemBasePtrReg(); 1872 1873 m_register_cache.InhibitAllocation(); 1874 1875 void* host_pc = GetCurrentNearCodePointer(); 1876 1877 switch (size) 1878 { 1879 case RegSize_8: 1880 m_emit->ldrb(GetHostReg32(result.host_reg), a64::MemOperand(membase, GetHostReg32(address_reg))); 1881 break; 1882 1883 case RegSize_16: 1884 m_emit->ldrh(GetHostReg32(result.host_reg), a64::MemOperand(membase, GetHostReg32(address_reg))); 1885 break; 1886 1887 case RegSize_32: 1888 m_emit->ldr(GetHostReg32(result.host_reg), a64::MemOperand(membase, GetHostReg32(address_reg))); 1889 break; 1890 1891 default: 1892 UnreachableCode(); 1893 break; 1894 } 1895 1896 const u32 host_code_size = 1897 static_cast<u32>(static_cast<ptrdiff_t>(static_cast<u8*>(GetCurrentNearCodePointer()) - static_cast<u8*>(host_pc))); 1898 1899 // generate slowmem fallback 1900 const void* host_slowmem_pc = GetCurrentFarCodePointer(); 1901 SwitchToFarCode(); 1902 1903 // we add the ticks *after* the add here, since we counted incorrectly, then correct for it below 1904 DebugAssert(m_delayed_cycles_add > 0); 1905 EmitAddCPUStructField(OFFSETOF(State, pending_ticks), Value::FromConstantU32(static_cast<u32>(m_delayed_cycles_add))); 1906 m_delayed_cycles_add += Bus::RAM_READ_TICKS; 1907 1908 EmitLoadGuestMemorySlowmem(instruction, info, address, size, result, true); 1909 1910 EmitAddCPUStructField(OFFSETOF(State, pending_ticks), 1911 Value::FromConstantU32(static_cast<u32>(-m_delayed_cycles_add))); 1912 1913 // return to the block code 1914 EmitBranch(GetCurrentNearCodePointer(), false); 1915 1916 SwitchToNearCode(); 1917 m_register_cache.UninhibitAllocation(); 1918 1919 CPU::CodeCache::AddLoadStoreInfo(host_pc, host_code_size, info.pc, host_slowmem_pc); 1920 } 1921 1922 void CodeGenerator::EmitLoadGuestMemorySlowmem(Instruction instruction, const CodeCache::InstructionInfo& info, 1923 const Value& address, RegSize size, Value& result, bool in_far_code) 1924 { 1925 if (g_settings.cpu_recompiler_memory_exceptions) 1926 { 1927 // NOTE: This can leave junk in the upper bits 1928 switch (size) 1929 { 1930 case RegSize_8: 1931 EmitFunctionCall(&result, &Thunks::ReadMemoryByte, address); 1932 break; 1933 1934 case RegSize_16: 1935 EmitFunctionCall(&result, &Thunks::ReadMemoryHalfWord, address); 1936 break; 1937 1938 case RegSize_32: 1939 EmitFunctionCall(&result, &Thunks::ReadMemoryWord, address); 1940 break; 1941 1942 default: 1943 UnreachableCode(); 1944 break; 1945 } 1946 1947 m_register_cache.PushState(); 1948 1949 a64::Label load_okay; 1950 m_emit->Tbz(GetHostReg64(result.host_reg), 63, &load_okay); 1951 EmitBranch(GetCurrentFarCodePointer()); 1952 m_emit->Bind(&load_okay); 1953 1954 // load exception path 1955 if (!in_far_code) 1956 SwitchToFarCode(); 1957 1958 // cause_bits = (-result << 2) | BD | cop_n 1959 m_emit->neg(GetHostReg32(result.host_reg), GetHostReg32(result.host_reg)); 1960 m_emit->lsl(GetHostReg32(result.host_reg), GetHostReg32(result.host_reg), 2); 1961 EmitOr(result.host_reg, result.host_reg, 1962 Value::FromConstantU32(Cop0Registers::CAUSE::MakeValueForException( 1963 static_cast<Exception>(0), info.is_branch_delay_slot, false, instruction.cop.cop_n))); 1964 EmitFunctionCall(nullptr, static_cast<void (*)(u32, u32)>(&CPU::RaiseException), result, GetCurrentInstructionPC()); 1965 1966 EmitExceptionExit(); 1967 1968 if (!in_far_code) 1969 SwitchToNearCode(); 1970 1971 m_register_cache.PopState(); 1972 } 1973 else 1974 { 1975 switch (size) 1976 { 1977 case RegSize_8: 1978 EmitFunctionCall(&result, &Thunks::UncheckedReadMemoryByte, address); 1979 break; 1980 1981 case RegSize_16: 1982 EmitFunctionCall(&result, &Thunks::UncheckedReadMemoryHalfWord, address); 1983 break; 1984 1985 case RegSize_32: 1986 EmitFunctionCall(&result, &Thunks::UncheckedReadMemoryWord, address); 1987 break; 1988 1989 default: 1990 UnreachableCode(); 1991 break; 1992 } 1993 } 1994 } 1995 1996 void CodeGenerator::EmitStoreGuestMemoryFastmem(Instruction instruction, const CodeCache::InstructionInfo& info, 1997 const Value& address, RegSize size, const Value& value) 1998 { 1999 Value value_in_hr = GetValueInHostRegister(value); 2000 2001 HostReg address_reg; 2002 if (address.IsConstant()) 2003 { 2004 m_emit->Mov(GetHostReg32(RSCRATCH), address.constant_value); 2005 address_reg = RSCRATCH; 2006 } 2007 else 2008 { 2009 address_reg = address.host_reg; 2010 } 2011 2012 if (g_settings.cpu_fastmem_mode == CPUFastmemMode::LUT) 2013 { 2014 m_emit->lsr(GetHostReg64(RARG1), GetHostReg32(address_reg), Bus::FASTMEM_LUT_PAGE_SHIFT); 2015 m_emit->ldr(GetHostReg64(RARG1), a64::MemOperand(GetFastmemBasePtrReg(), GetHostReg64(RARG1), a64::LSL, 3)); 2016 } 2017 2018 const a64::XRegister membase = 2019 (g_settings.cpu_fastmem_mode == CPUFastmemMode::LUT) ? GetHostReg64(RARG1) : GetFastmemBasePtrReg(); 2020 2021 // fastmem 2022 void* host_pc = GetCurrentNearCodePointer(); 2023 2024 m_register_cache.InhibitAllocation(); 2025 2026 switch (size) 2027 { 2028 case RegSize_8: 2029 m_emit->strb(GetHostReg32(value_in_hr), a64::MemOperand(membase, GetHostReg32(address_reg))); 2030 break; 2031 2032 case RegSize_16: 2033 m_emit->strh(GetHostReg32(value_in_hr), a64::MemOperand(membase, GetHostReg32(address_reg))); 2034 break; 2035 2036 case RegSize_32: 2037 m_emit->str(GetHostReg32(value_in_hr), a64::MemOperand(membase, GetHostReg32(address_reg))); 2038 break; 2039 2040 default: 2041 UnreachableCode(); 2042 break; 2043 } 2044 2045 const u32 host_code_size = 2046 static_cast<u32>(static_cast<ptrdiff_t>(static_cast<u8*>(GetCurrentNearCodePointer()) - static_cast<u8*>(host_pc))); 2047 2048 // generate slowmem fallback 2049 void* host_slowmem_pc = GetCurrentFarCodePointer(); 2050 SwitchToFarCode(); 2051 2052 DebugAssert(m_delayed_cycles_add > 0); 2053 EmitAddCPUStructField(OFFSETOF(State, pending_ticks), Value::FromConstantU32(static_cast<u32>(m_delayed_cycles_add))); 2054 2055 EmitStoreGuestMemorySlowmem(instruction, info, address, size, value_in_hr, true); 2056 2057 EmitAddCPUStructField(OFFSETOF(State, pending_ticks), 2058 Value::FromConstantU32(static_cast<u32>(-m_delayed_cycles_add))); 2059 2060 // return to the block code 2061 EmitBranch(GetCurrentNearCodePointer(), false); 2062 2063 SwitchToNearCode(); 2064 m_register_cache.UninhibitAllocation(); 2065 2066 CPU::CodeCache::AddLoadStoreInfo(host_pc, host_code_size, info.pc, host_slowmem_pc); 2067 } 2068 2069 void CodeGenerator::EmitStoreGuestMemorySlowmem(Instruction instruction, const CodeCache::InstructionInfo& info, 2070 const Value& address, RegSize size, const Value& value, 2071 bool in_far_code) 2072 { 2073 Value value_in_hr = GetValueInHostRegister(value); 2074 2075 if (g_settings.cpu_recompiler_memory_exceptions) 2076 { 2077 Assert(!in_far_code); 2078 2079 Value result = m_register_cache.AllocateScratch(RegSize_32); 2080 switch (size) 2081 { 2082 case RegSize_8: 2083 EmitFunctionCall(&result, &Thunks::WriteMemoryByte, address, value_in_hr); 2084 break; 2085 2086 case RegSize_16: 2087 EmitFunctionCall(&result, &Thunks::WriteMemoryHalfWord, address, value_in_hr); 2088 break; 2089 2090 case RegSize_32: 2091 EmitFunctionCall(&result, &Thunks::WriteMemoryWord, address, value_in_hr); 2092 break; 2093 2094 default: 2095 UnreachableCode(); 2096 break; 2097 } 2098 2099 m_register_cache.PushState(); 2100 2101 a64::Label store_okay; 2102 m_emit->Cbz(GetHostReg64(result.host_reg), &store_okay); 2103 EmitBranch(GetCurrentFarCodePointer()); 2104 m_emit->Bind(&store_okay); 2105 2106 // store exception path 2107 if (!in_far_code) 2108 SwitchToFarCode(); 2109 2110 // cause_bits = (result << 2) | BD | cop_n 2111 m_emit->lsl(GetHostReg32(result.host_reg), GetHostReg32(result.host_reg), 2); 2112 EmitOr(result.host_reg, result.host_reg, 2113 Value::FromConstantU32(Cop0Registers::CAUSE::MakeValueForException( 2114 static_cast<Exception>(0), info.is_branch_delay_slot, false, instruction.cop.cop_n))); 2115 EmitFunctionCall(nullptr, static_cast<void (*)(u32, u32)>(&CPU::RaiseException), result, GetCurrentInstructionPC()); 2116 2117 if (!in_far_code) 2118 EmitExceptionExit(); 2119 SwitchToNearCode(); 2120 2121 m_register_cache.PopState(); 2122 } 2123 else 2124 { 2125 switch (size) 2126 { 2127 case RegSize_8: 2128 EmitFunctionCall(nullptr, &Thunks::UncheckedWriteMemoryByte, address, value_in_hr); 2129 break; 2130 2131 case RegSize_16: 2132 EmitFunctionCall(nullptr, &Thunks::UncheckedWriteMemoryHalfWord, address, value_in_hr); 2133 break; 2134 2135 case RegSize_32: 2136 EmitFunctionCall(nullptr, &Thunks::UncheckedWriteMemoryWord, address, value_in_hr); 2137 break; 2138 2139 default: 2140 UnreachableCode(); 2141 break; 2142 } 2143 } 2144 } 2145 2146 void CodeGenerator::EmitUpdateFastmemBase() 2147 { 2148 m_emit->Ldr(GetFastmemBasePtrReg(), a64::MemOperand(GetCPUPtrReg(), OFFSETOF(State, fastmem_base))); 2149 } 2150 2151 void CodeGenerator::BackpatchLoadStore(void* host_pc, const CodeCache::LoadstoreBackpatchInfo& lbi) 2152 { 2153 DEV_LOG("Backpatching {} (guest PC 0x{:08X}) to slowmem at {}", host_pc, lbi.guest_pc, lbi.thunk_address); 2154 2155 // check jump distance 2156 const s64 jump_distance = 2157 static_cast<s64>(reinterpret_cast<intptr_t>(lbi.thunk_address) - reinterpret_cast<intptr_t>(host_pc)); 2158 Assert(Common::IsAligned(jump_distance, 4)); 2159 Assert(a64::Instruction::IsValidImmPCOffset(a64::UncondBranchType, jump_distance >> 2)); 2160 2161 // turn it into a jump to the slowmem handler 2162 vixl::aarch64::MacroAssembler emit(static_cast<vixl::byte*>(host_pc), lbi.code_size, a64::PositionDependentCode); 2163 emit.b(jump_distance >> 2); 2164 2165 const s32 nops = (static_cast<s32>(lbi.code_size) - static_cast<s32>(emit.GetCursorOffset())) / 4; 2166 Assert(nops >= 0); 2167 for (s32 i = 0; i < nops; i++) 2168 emit.nop(); 2169 2170 MemMap::FlushInstructionCache(host_pc, lbi.code_size); 2171 } 2172 2173 void CodeGenerator::EmitLoadGlobal(HostReg host_reg, RegSize size, const void* ptr) 2174 { 2175 EmitLoadGlobalAddress(RSCRATCH, ptr); 2176 switch (size) 2177 { 2178 case RegSize_8: 2179 m_emit->Ldrb(GetHostReg8(host_reg), a64::MemOperand(GetHostReg64(RSCRATCH))); 2180 break; 2181 2182 case RegSize_16: 2183 m_emit->Ldrh(GetHostReg16(host_reg), a64::MemOperand(GetHostReg64(RSCRATCH))); 2184 break; 2185 2186 case RegSize_32: 2187 m_emit->Ldr(GetHostReg32(host_reg), a64::MemOperand(GetHostReg64(RSCRATCH))); 2188 break; 2189 2190 default: 2191 UnreachableCode(); 2192 break; 2193 } 2194 } 2195 2196 void CodeGenerator::EmitStoreGlobal(void* ptr, const Value& value) 2197 { 2198 Value value_in_hr = GetValueInHostRegister(value); 2199 2200 EmitLoadGlobalAddress(RSCRATCH, ptr); 2201 switch (value.size) 2202 { 2203 case RegSize_8: 2204 m_emit->Strb(GetHostReg8(value_in_hr), a64::MemOperand(GetHostReg64(RSCRATCH))); 2205 break; 2206 2207 case RegSize_16: 2208 m_emit->Strh(GetHostReg16(value_in_hr), a64::MemOperand(GetHostReg64(RSCRATCH))); 2209 break; 2210 2211 case RegSize_32: 2212 m_emit->Str(GetHostReg32(value_in_hr), a64::MemOperand(GetHostReg64(RSCRATCH))); 2213 break; 2214 2215 default: 2216 UnreachableCode(); 2217 break; 2218 } 2219 } 2220 2221 void CodeGenerator::EmitFlushInterpreterLoadDelay() 2222 { 2223 Value reg = m_register_cache.AllocateScratch(RegSize_32); 2224 Value value = m_register_cache.AllocateScratch(RegSize_32); 2225 2226 const a64::MemOperand load_delay_reg(GetCPUPtrReg(), OFFSETOF(State, load_delay_reg)); 2227 const a64::MemOperand load_delay_value(GetCPUPtrReg(), OFFSETOF(State, load_delay_value)); 2228 const a64::MemOperand regs_base(GetCPUPtrReg(), OFFSETOF(State, regs.r[0])); 2229 2230 a64::Label skip_flush; 2231 2232 // reg = load_delay_reg 2233 m_emit->Ldrb(GetHostReg32(reg), load_delay_reg); 2234 2235 // if load_delay_reg == Reg::count goto skip_flush 2236 m_emit->Cmp(GetHostReg32(reg), static_cast<u8>(Reg::count)); 2237 m_emit->B(a64::eq, &skip_flush); 2238 2239 // value = load_delay_value 2240 m_emit->Ldr(GetHostReg32(value), load_delay_value); 2241 2242 // reg = offset(r[0] + reg << 2) 2243 m_emit->Lsl(GetHostReg32(reg), GetHostReg32(reg), 2); 2244 m_emit->Add(GetHostReg32(reg), GetHostReg32(reg), OFFSETOF(State, regs.r[0])); 2245 2246 // r[reg] = value 2247 m_emit->Str(GetHostReg32(value), a64::MemOperand(GetCPUPtrReg(), GetHostReg32(reg))); 2248 2249 // load_delay_reg = Reg::count 2250 m_emit->Mov(GetHostReg32(reg), static_cast<u8>(Reg::count)); 2251 m_emit->Strb(GetHostReg32(reg), load_delay_reg); 2252 2253 m_emit->Bind(&skip_flush); 2254 } 2255 2256 void CodeGenerator::EmitMoveNextInterpreterLoadDelay() 2257 { 2258 Value reg = m_register_cache.AllocateScratch(RegSize_32); 2259 Value value = m_register_cache.AllocateScratch(RegSize_32); 2260 2261 const a64::MemOperand load_delay_reg(GetCPUPtrReg(), OFFSETOF(State, load_delay_reg)); 2262 const a64::MemOperand load_delay_value(GetCPUPtrReg(), OFFSETOF(State, load_delay_value)); 2263 const a64::MemOperand next_load_delay_reg(GetCPUPtrReg(), OFFSETOF(State, next_load_delay_reg)); 2264 const a64::MemOperand next_load_delay_value(GetCPUPtrReg(), OFFSETOF(State, next_load_delay_value)); 2265 2266 m_emit->Ldrb(GetHostReg32(reg), next_load_delay_reg); 2267 m_emit->Ldr(GetHostReg32(value), next_load_delay_value); 2268 m_emit->Strb(GetHostReg32(reg), load_delay_reg); 2269 m_emit->Str(GetHostReg32(value), load_delay_value); 2270 m_emit->Mov(GetHostReg32(reg), static_cast<u8>(Reg::count)); 2271 m_emit->Strb(GetHostReg32(reg), next_load_delay_reg); 2272 } 2273 2274 void CodeGenerator::EmitCancelInterpreterLoadDelayForReg(Reg reg) 2275 { 2276 if (!m_load_delay_dirty) 2277 return; 2278 2279 const a64::MemOperand load_delay_reg(GetCPUPtrReg(), OFFSETOF(State, load_delay_reg)); 2280 Value temp = m_register_cache.AllocateScratch(RegSize_8); 2281 2282 a64::Label skip_cancel; 2283 2284 // if load_delay_reg != reg goto skip_cancel 2285 m_emit->Ldrb(GetHostReg8(temp), load_delay_reg); 2286 m_emit->Cmp(GetHostReg8(temp), static_cast<u8>(reg)); 2287 m_emit->B(a64::ne, &skip_cancel); 2288 2289 // load_delay_reg = Reg::count 2290 m_emit->Mov(GetHostReg8(temp), static_cast<u8>(Reg::count)); 2291 m_emit->Strb(GetHostReg8(temp), load_delay_reg); 2292 2293 m_emit->Bind(&skip_cancel); 2294 } 2295 2296 void CodeGenerator::EmitICacheCheckAndUpdate() 2297 { 2298 if (!m_block->HasFlag(CodeCache::BlockFlags::IsUsingICache)) 2299 { 2300 if (m_block->HasFlag(CodeCache::BlockFlags::NeedsDynamicFetchTicks)) 2301 { 2302 armEmitFarLoad(m_emit, RWARG2, GetFetchMemoryAccessTimePtr()); 2303 m_emit->Ldr(RWARG1, a64::MemOperand(GetCPUPtrReg(), OFFSETOF(State, pending_ticks))); 2304 m_emit->Mov(RWARG3, m_block->size); 2305 m_emit->Mul(RWARG2, RWARG2, RWARG3); 2306 m_emit->Add(RWARG1, RWARG1, RWARG2); 2307 m_emit->Str(RWARG1, a64::MemOperand(GetCPUPtrReg(), OFFSETOF(State, pending_ticks))); 2308 } 2309 else 2310 { 2311 EmitAddCPUStructField(OFFSETOF(State, pending_ticks), 2312 Value::FromConstantU32(static_cast<u32>(m_block->uncached_fetch_ticks))); 2313 } 2314 } 2315 else if (m_block->icache_line_count > 0) 2316 { 2317 const auto& ticks_reg = a64::w0; 2318 const auto& current_tag_reg = a64::w1; 2319 const auto& existing_tag_reg = a64::w2; 2320 2321 VirtualMemoryAddress current_pc = m_pc & ICACHE_TAG_ADDRESS_MASK; 2322 m_emit->Ldr(ticks_reg, a64::MemOperand(GetCPUPtrReg(), OFFSETOF(State, pending_ticks))); 2323 m_emit->Mov(current_tag_reg, current_pc); 2324 2325 for (u32 i = 0; i < m_block->icache_line_count; i++, current_pc += ICACHE_LINE_SIZE) 2326 { 2327 const TickCount fill_ticks = GetICacheFillTicks(current_pc); 2328 if (fill_ticks <= 0) 2329 continue; 2330 2331 const u32 line = GetICacheLine(current_pc); 2332 const u32 offset = OFFSETOF(State, icache_tags) + (line * sizeof(u32)); 2333 2334 a64::Label cache_hit; 2335 m_emit->Ldr(existing_tag_reg, a64::MemOperand(GetCPUPtrReg(), offset)); 2336 m_emit->Cmp(existing_tag_reg, current_tag_reg); 2337 m_emit->B(&cache_hit, a64::eq); 2338 2339 m_emit->Str(current_tag_reg, a64::MemOperand(GetCPUPtrReg(), offset)); 2340 EmitAdd(0, 0, Value::FromConstantU32(static_cast<u32>(fill_ticks)), false); 2341 m_emit->Bind(&cache_hit); 2342 2343 if (i != (m_block->icache_line_count - 1)) 2344 m_emit->Add(current_tag_reg, current_tag_reg, ICACHE_LINE_SIZE); 2345 } 2346 2347 m_emit->Str(ticks_reg, a64::MemOperand(GetCPUPtrReg(), OFFSETOF(State, pending_ticks))); 2348 } 2349 } 2350 2351 void CodeGenerator::EmitBlockProtectCheck(const u8* ram_ptr, const u8* shadow_ptr, u32 size) 2352 { 2353 // store it first to reduce code size, because we can offset 2354 armMoveAddressToReg(m_emit, RXARG1, ram_ptr); 2355 armMoveAddressToReg(m_emit, RXARG2, shadow_ptr); 2356 2357 bool first = true; 2358 u32 offset = 0; 2359 a64::Label block_changed; 2360 2361 while (size >= 16) 2362 { 2363 const a64::VRegister vtmp = a64::v2.V4S(); 2364 const a64::VRegister dst = first ? a64::v0.V4S() : a64::v1.V4S(); 2365 m_emit->ldr(dst, a64::MemOperand(RXARG1, offset)); 2366 m_emit->ldr(vtmp, a64::MemOperand(RXARG2, offset)); 2367 m_emit->cmeq(dst, dst, vtmp); 2368 if (!first) 2369 m_emit->and_(a64::v0.V16B(), a64::v0.V16B(), dst.V16B()); 2370 else 2371 first = false; 2372 2373 offset += 16; 2374 size -= 16; 2375 } 2376 2377 if (!first) 2378 { 2379 // TODO: make sure this doesn't choke on ffffffff 2380 m_emit->uminv(a64::s0, a64::v0.V4S()); 2381 m_emit->fcmp(a64::s0, 0.0); 2382 m_emit->b(&block_changed, a64::eq); 2383 } 2384 2385 while (size >= 8) 2386 { 2387 m_emit->ldr(RXARG3, a64::MemOperand(RXARG1, offset)); 2388 m_emit->ldr(RXSCRATCH, a64::MemOperand(RXARG2, offset)); 2389 m_emit->cmp(RXARG3, RXSCRATCH); 2390 m_emit->b(&block_changed, a64::ne); 2391 offset += 8; 2392 size -= 8; 2393 } 2394 2395 while (size >= 4) 2396 { 2397 m_emit->ldr(RWARG3, a64::MemOperand(RXARG1, offset)); 2398 m_emit->ldr(RWSCRATCH, a64::MemOperand(RXARG2, offset)); 2399 m_emit->cmp(RWARG3, RWSCRATCH); 2400 m_emit->b(&block_changed, a64::ne); 2401 offset += 4; 2402 size -= 4; 2403 } 2404 2405 DebugAssert(size == 0); 2406 2407 a64::Label block_unchanged; 2408 m_emit->b(&block_unchanged); 2409 m_emit->bind(&block_changed); 2410 armEmitJmp(m_emit, CodeCache::g_discard_and_recompile_block, false); 2411 m_emit->bind(&block_unchanged); 2412 } 2413 2414 void CodeGenerator::EmitStallUntilGTEComplete() 2415 { 2416 static_assert(OFFSETOF(State, pending_ticks) + sizeof(u32) == OFFSETOF(State, gte_completion_tick)); 2417 m_emit->ldp(GetHostReg32(RARG1), GetHostReg32(RARG2), 2418 a64::MemOperand(GetCPUPtrReg(), OFFSETOF(State, pending_ticks))); 2419 2420 if (m_delayed_cycles_add > 0) 2421 { 2422 m_emit->Add(GetHostReg32(RARG1), GetHostReg32(RARG1), static_cast<u32>(m_delayed_cycles_add)); 2423 m_delayed_cycles_add = 0; 2424 } 2425 2426 m_emit->cmp(GetHostReg32(RARG2), GetHostReg32(RARG1)); 2427 m_emit->csel(GetHostReg32(RARG1), GetHostReg32(RARG2), GetHostReg32(RARG1), a64::Condition::hi); 2428 m_emit->str(GetHostReg32(RARG1), a64::MemOperand(GetCPUPtrReg(), OFFSETOF(State, pending_ticks))); 2429 } 2430 2431 void CodeGenerator::EmitBranch(const void* address, bool allow_scratch) 2432 { 2433 const s64 jump_distance = 2434 static_cast<s64>(reinterpret_cast<intptr_t>(address) - reinterpret_cast<intptr_t>(GetCurrentCodePointer())); 2435 Assert(Common::IsAligned(jump_distance, 4)); 2436 if (a64::Instruction::IsValidImmPCOffset(a64::UncondBranchType, jump_distance >> 2)) 2437 { 2438 m_emit->b(jump_distance >> 2); 2439 return; 2440 } 2441 2442 Assert(allow_scratch); 2443 2444 m_emit->Mov(GetHostReg64(RSCRATCH), reinterpret_cast<uintptr_t>(address)); 2445 m_emit->br(GetHostReg64(RSCRATCH)); 2446 } 2447 2448 void CodeGenerator::EmitBranch(LabelType* label) 2449 { 2450 m_emit->B(label); 2451 } 2452 2453 static a64::Condition TranslateCondition(Condition condition, bool invert) 2454 { 2455 switch (condition) 2456 { 2457 case Condition::Always: 2458 return a64::nv; 2459 2460 case Condition::NotEqual: 2461 case Condition::NotZero: 2462 return invert ? a64::eq : a64::ne; 2463 2464 case Condition::Equal: 2465 case Condition::Zero: 2466 return invert ? a64::ne : a64::eq; 2467 2468 case Condition::Overflow: 2469 return invert ? a64::vc : a64::vs; 2470 2471 case Condition::Greater: 2472 return invert ? a64::le : a64::gt; 2473 2474 case Condition::GreaterEqual: 2475 return invert ? a64::lt : a64::ge; 2476 2477 case Condition::Less: 2478 return invert ? a64::ge : a64::lt; 2479 2480 case Condition::LessEqual: 2481 return invert ? a64::gt : a64::le; 2482 2483 case Condition::Negative: 2484 return invert ? a64::pl : a64::mi; 2485 2486 case Condition::PositiveOrZero: 2487 return invert ? a64::mi : a64::pl; 2488 2489 case Condition::Above: 2490 return invert ? a64::ls : a64::hi; 2491 2492 case Condition::AboveEqual: 2493 return invert ? a64::cc : a64::cs; 2494 2495 case Condition::Below: 2496 return invert ? a64::cs : a64::cc; 2497 2498 case Condition::BelowEqual: 2499 return invert ? a64::hi : a64::ls; 2500 2501 default: 2502 UnreachableCode(); 2503 return a64::nv; 2504 } 2505 } 2506 2507 void CodeGenerator::EmitConditionalBranch(Condition condition, bool invert, HostReg value, RegSize size, 2508 LabelType* label) 2509 { 2510 switch (condition) 2511 { 2512 case Condition::NotEqual: 2513 case Condition::Equal: 2514 case Condition::Overflow: 2515 case Condition::Greater: 2516 case Condition::GreaterEqual: 2517 case Condition::LessEqual: 2518 case Condition::Less: 2519 case Condition::Above: 2520 case Condition::AboveEqual: 2521 case Condition::Below: 2522 case Condition::BelowEqual: 2523 Panic("Needs a comparison value"); 2524 return; 2525 2526 case Condition::Negative: 2527 case Condition::PositiveOrZero: 2528 { 2529 switch (size) 2530 { 2531 case RegSize_8: 2532 m_emit->tst(GetHostReg8(value), GetHostReg8(value)); 2533 break; 2534 case RegSize_16: 2535 m_emit->tst(GetHostReg16(value), GetHostReg16(value)); 2536 break; 2537 case RegSize_32: 2538 m_emit->tst(GetHostReg32(value), GetHostReg32(value)); 2539 break; 2540 case RegSize_64: 2541 m_emit->tst(GetHostReg64(value), GetHostReg64(value)); 2542 break; 2543 default: 2544 UnreachableCode(); 2545 break; 2546 } 2547 2548 EmitConditionalBranch(condition, invert, label); 2549 return; 2550 } 2551 2552 case Condition::NotZero: 2553 { 2554 switch (size) 2555 { 2556 case RegSize_8: 2557 m_emit->cbnz(GetHostReg8(value), label); 2558 break; 2559 case RegSize_16: 2560 m_emit->cbz(GetHostReg16(value), label); 2561 break; 2562 case RegSize_32: 2563 m_emit->cbnz(GetHostReg32(value), label); 2564 break; 2565 case RegSize_64: 2566 m_emit->cbnz(GetHostReg64(value), label); 2567 break; 2568 default: 2569 UnreachableCode(); 2570 break; 2571 } 2572 2573 return; 2574 } 2575 2576 case Condition::Zero: 2577 { 2578 switch (size) 2579 { 2580 case RegSize_8: 2581 m_emit->cbz(GetHostReg8(value), label); 2582 break; 2583 case RegSize_16: 2584 m_emit->cbz(GetHostReg16(value), label); 2585 break; 2586 case RegSize_32: 2587 m_emit->cbz(GetHostReg32(value), label); 2588 break; 2589 case RegSize_64: 2590 m_emit->cbz(GetHostReg64(value), label); 2591 break; 2592 default: 2593 UnreachableCode(); 2594 break; 2595 } 2596 2597 return; 2598 } 2599 2600 case Condition::Always: 2601 m_emit->b(label); 2602 return; 2603 2604 default: 2605 UnreachableCode(); 2606 return; 2607 } 2608 } 2609 2610 void CodeGenerator::EmitConditionalBranch(Condition condition, bool invert, HostReg lhs, const Value& rhs, 2611 LabelType* label) 2612 { 2613 switch (condition) 2614 { 2615 case Condition::NotEqual: 2616 case Condition::Equal: 2617 case Condition::Overflow: 2618 case Condition::Greater: 2619 case Condition::GreaterEqual: 2620 case Condition::LessEqual: 2621 case Condition::Less: 2622 case Condition::Above: 2623 case Condition::AboveEqual: 2624 case Condition::Below: 2625 case Condition::BelowEqual: 2626 { 2627 EmitCmp(lhs, rhs); 2628 EmitConditionalBranch(condition, invert, label); 2629 return; 2630 } 2631 2632 case Condition::Negative: 2633 case Condition::PositiveOrZero: 2634 case Condition::NotZero: 2635 case Condition::Zero: 2636 { 2637 Assert(!rhs.IsValid() || (rhs.IsConstant() && rhs.GetS64ConstantValue() == 0)); 2638 EmitConditionalBranch(condition, invert, lhs, rhs.size, label); 2639 return; 2640 } 2641 2642 case Condition::Always: 2643 m_emit->b(label); 2644 return; 2645 2646 default: 2647 UnreachableCode(); 2648 return; 2649 } 2650 } 2651 2652 void CodeGenerator::EmitConditionalBranch(Condition condition, bool invert, LabelType* label) 2653 { 2654 if (condition == Condition::Always) 2655 m_emit->b(label); 2656 else 2657 m_emit->b(label, TranslateCondition(condition, invert)); 2658 } 2659 2660 void CodeGenerator::EmitBranchIfBitClear(HostReg reg, RegSize size, u8 bit, LabelType* label) 2661 { 2662 switch (size) 2663 { 2664 case RegSize_8: 2665 case RegSize_16: 2666 case RegSize_32: 2667 m_emit->tbz(GetHostReg32(reg), bit, label); 2668 break; 2669 2670 default: 2671 UnreachableCode(); 2672 break; 2673 } 2674 } 2675 2676 void CodeGenerator::EmitBindLabel(LabelType* label) 2677 { 2678 m_emit->Bind(label); 2679 } 2680 2681 void CodeGenerator::EmitLoadGlobalAddress(HostReg host_reg, const void* ptr) 2682 { 2683 const void* current_code_ptr_page = reinterpret_cast<const void*>( 2684 reinterpret_cast<uintptr_t>(GetCurrentCodePointer()) & ~static_cast<uintptr_t>(0xFFF)); 2685 const void* ptr_page = 2686 reinterpret_cast<const void*>(reinterpret_cast<uintptr_t>(ptr) & ~static_cast<uintptr_t>(0xFFF)); 2687 const s64 page_displacement = armGetPCDisplacement(current_code_ptr_page, ptr_page) >> 10; 2688 const u32 page_offset = static_cast<u32>(reinterpret_cast<uintptr_t>(ptr) & 0xFFFu); 2689 if (vixl::IsInt21(page_displacement) && a64::Assembler::IsImmLogical(page_offset, 64)) 2690 { 2691 m_emit->adrp(GetHostReg64(host_reg), page_displacement); 2692 m_emit->orr(GetHostReg64(host_reg), GetHostReg64(host_reg), page_offset); 2693 } 2694 else 2695 { 2696 m_emit->Mov(GetHostReg64(host_reg), reinterpret_cast<uintptr_t>(ptr)); 2697 } 2698 } 2699 2700 } // namespace CPU::Recompiler 2701 2702 #endif // CPU_ARCH_ARM64