cpu_recompiler_code_generator_aarch32.cpp (67558B)
1 // SPDX-FileCopyrightText: 2019-2024 Connor McLaughlin <stenzek@gmail.com> 2 // SPDX-License-Identifier: (GPL-3.0 OR CC-BY-NC-ND-4.0) 3 4 #include "common/align.h" 5 #include "common/assert.h" 6 #include "common/log.h" 7 #include "common/memmap.h" 8 9 #include "cpu_code_cache_private.h" 10 #include "cpu_core.h" 11 #include "cpu_core_private.h" 12 #include "cpu_recompiler_code_generator.h" 13 #include "cpu_recompiler_thunks.h" 14 #include "settings.h" 15 #include "timing_event.h" 16 17 #ifdef CPU_ARCH_ARM32 18 19 Log_SetChannel(CPU::Recompiler); 20 21 #ifdef ENABLE_HOST_DISASSEMBLY 22 #include "vixl/aarch32/disasm-aarch32.h" 23 #include <iostream> 24 #endif 25 26 namespace a32 = vixl::aarch32; 27 28 namespace CPU::Recompiler { 29 constexpr u32 FUNCTION_CALLEE_SAVED_SPACE_RESERVE = 80; // 8 registers 30 constexpr u32 FUNCTION_CALLER_SAVED_SPACE_RESERVE = 144; // 18 registers -> 224 bytes 31 constexpr u32 FUNCTION_STACK_SIZE = FUNCTION_CALLEE_SAVED_SPACE_RESERVE + FUNCTION_CALLER_SAVED_SPACE_RESERVE; 32 33 static constexpr u32 TRAMPOLINE_AREA_SIZE = 4 * 1024; 34 static std::unordered_map<const void*, u32> s_trampoline_targets; 35 static u8* s_trampoline_start_ptr = nullptr; 36 static u32 s_trampoline_used = 0; 37 } // namespace CPU::Recompiler 38 39 bool CPU::Recompiler::armIsCallerSavedRegister(u32 id) 40 { 41 return ((id >= 0 && id <= 3) || // r0-r3 42 (id == 12 || id == 14)); // sp, pc 43 } 44 45 s32 CPU::Recompiler::armGetPCDisplacement(const void* current, const void* target) 46 { 47 Assert(Common::IsAlignedPow2(reinterpret_cast<size_t>(current), 4)); 48 Assert(Common::IsAlignedPow2(reinterpret_cast<size_t>(target), 4)); 49 return static_cast<s32>((reinterpret_cast<ptrdiff_t>(target) - reinterpret_cast<ptrdiff_t>(current))); 50 } 51 52 bool CPU::Recompiler::armIsPCDisplacementInImmediateRange(s32 displacement) 53 { 54 return (displacement >= -33554432 && displacement <= 33554428); 55 } 56 57 void CPU::Recompiler::armEmitMov(vixl::aarch32::Assembler* armAsm, const vixl::aarch32::Register& rd, u32 imm) 58 { 59 if (vixl::IsUintN(16, imm)) 60 { 61 armAsm->mov(vixl::aarch32::al, rd, imm & 0xffff); 62 return; 63 } 64 65 armAsm->mov(vixl::aarch32::al, rd, imm & 0xffff); 66 armAsm->movt(vixl::aarch32::al, rd, imm >> 16); 67 } 68 69 void CPU::Recompiler::armMoveAddressToReg(vixl::aarch32::Assembler* armAsm, const vixl::aarch32::Register& reg, 70 const void* addr) 71 { 72 armEmitMov(armAsm, reg, static_cast<u32>(reinterpret_cast<uintptr_t>(addr))); 73 } 74 75 void CPU::Recompiler::armEmitJmp(vixl::aarch32::Assembler* armAsm, const void* ptr, bool force_inline) 76 { 77 const void* cur = armAsm->GetCursorAddress<const void*>(); 78 s32 displacement = armGetPCDisplacement(cur, ptr); 79 bool use_bx = !armIsPCDisplacementInImmediateRange(displacement); 80 if (use_bx && !force_inline) 81 { 82 if (u8* trampoline = armGetJumpTrampoline(ptr); trampoline) 83 { 84 displacement = armGetPCDisplacement(cur, trampoline); 85 use_bx = !armIsPCDisplacementInImmediateRange(displacement); 86 } 87 } 88 89 if (use_bx) 90 { 91 armMoveAddressToReg(armAsm, RSCRATCH, ptr); 92 armAsm->bx(RSCRATCH); 93 } 94 else 95 { 96 a32::Label label(displacement + armAsm->GetCursorOffset()); 97 armAsm->b(&label); 98 } 99 } 100 101 void CPU::Recompiler::armEmitCall(vixl::aarch32::Assembler* armAsm, const void* ptr, bool force_inline) 102 { 103 const void* cur = armAsm->GetCursorAddress<const void*>(); 104 s32 displacement = armGetPCDisplacement(cur, ptr); 105 bool use_blx = !armIsPCDisplacementInImmediateRange(displacement); 106 if (use_blx && !force_inline) 107 { 108 if (u8* trampoline = armGetJumpTrampoline(ptr); trampoline) 109 { 110 displacement = armGetPCDisplacement(cur, trampoline); 111 use_blx = !armIsPCDisplacementInImmediateRange(displacement); 112 } 113 } 114 115 if (use_blx) 116 { 117 armMoveAddressToReg(armAsm, RSCRATCH, ptr); 118 armAsm->blx(RSCRATCH); 119 } 120 else 121 { 122 a32::Label label(displacement + armAsm->GetCursorOffset()); 123 armAsm->bl(&label); 124 } 125 } 126 127 void CPU::Recompiler::armEmitCondBranch(vixl::aarch32::Assembler* armAsm, vixl::aarch32::Condition cond, 128 const void* ptr) 129 { 130 const s32 displacement = armGetPCDisplacement(armAsm->GetCursorAddress<const void*>(), ptr); 131 if (!armIsPCDisplacementInImmediateRange(displacement)) 132 { 133 armMoveAddressToReg(armAsm, RSCRATCH, ptr); 134 armAsm->blx(cond, RSCRATCH); 135 } 136 else 137 { 138 a32::Label label(displacement + armAsm->GetCursorOffset()); 139 armAsm->b(cond, &label); 140 } 141 } 142 143 void CPU::Recompiler::armEmitFarLoad(vixl::aarch32::Assembler* armAsm, const vixl::aarch32::Register& reg, 144 const void* addr) 145 { 146 armMoveAddressToReg(armAsm, reg, addr); 147 armAsm->ldr(reg, vixl::aarch32::MemOperand(reg)); 148 } 149 150 void CPU::Recompiler::armEmitFarStore(vixl::aarch32::Assembler* armAsm, const vixl::aarch32::Register& reg, 151 const void* addr, const vixl::aarch32::Register& tempreg) 152 { 153 armMoveAddressToReg(armAsm, tempreg, addr); 154 armAsm->str(reg, vixl::aarch32::MemOperand(tempreg)); 155 } 156 157 void CPU::CodeCache::DisassembleAndLogHostCode(const void* start, u32 size) 158 { 159 #ifdef ENABLE_HOST_DISASSEMBLY 160 a32::PrintDisassembler dis(std::cout, 0); 161 dis.SetCodeAddress(reinterpret_cast<uintptr_t>(start)); 162 dis.DisassembleA32Buffer(static_cast<const u32*>(start), size); 163 #else 164 ERROR_LOG("Not compiled with ENABLE_HOST_DISASSEMBLY."); 165 #endif 166 } 167 168 u32 CPU::CodeCache::GetHostInstructionCount(const void* start, u32 size) 169 { 170 return size / a32::kA32InstructionSizeInBytes; 171 } 172 173 u32 CPU::CodeCache::EmitJump(void* code, const void* dst, bool flush_icache) 174 { 175 using namespace vixl::aarch32; 176 using namespace CPU::Recompiler; 177 178 const s32 disp = armGetPCDisplacement(code, dst); 179 DebugAssert(armIsPCDisplacementInImmediateRange(disp)); 180 181 // A32 jumps are silly. 182 { 183 vixl::aarch32::Assembler emit(static_cast<vixl::byte*>(code), kA32InstructionSizeInBytes, a32::A32); 184 a32::Label label(disp); 185 emit.b(&label); 186 } 187 188 if (flush_icache) 189 MemMap::FlushInstructionCache(code, kA32InstructionSizeInBytes); 190 191 return kA32InstructionSizeInBytes; 192 } 193 194 u8* CPU::Recompiler::armGetJumpTrampoline(const void* target) 195 { 196 auto it = s_trampoline_targets.find(target); 197 if (it != s_trampoline_targets.end()) 198 return s_trampoline_start_ptr + it->second; 199 200 // align to 16 bytes? 201 const u32 offset = s_trampoline_used; // Common::AlignUpPow2(s_trampoline_used, 16); 202 203 // 4 movs plus a jump 204 if (TRAMPOLINE_AREA_SIZE - offset < 20) 205 { 206 Panic("Ran out of space in constant pool"); 207 return nullptr; 208 } 209 210 u8* start = s_trampoline_start_ptr + offset; 211 a32::Assembler armAsm(start, TRAMPOLINE_AREA_SIZE - offset); 212 armMoveAddressToReg(&armAsm, RSCRATCH, target); 213 armAsm.bx(RSCRATCH); 214 215 const u32 size = static_cast<u32>(armAsm.GetSizeOfCodeGenerated()); 216 DebugAssert(size < 20); 217 s_trampoline_targets.emplace(target, offset); 218 s_trampoline_used = offset + static_cast<u32>(size); 219 220 MemMap::FlushInstructionCache(start, size); 221 return start; 222 } 223 224 u32 CPU::CodeCache::EmitASMFunctions(void* code, u32 code_size) 225 { 226 using namespace vixl::aarch32; 227 using namespace CPU::Recompiler; 228 229 #define PTR(x) a32::MemOperand(RSTATE, (s32)(((u8*)(x)) - ((u8*)&g_state))) 230 231 Assembler actual_asm(static_cast<u8*>(code), code_size); 232 Assembler* armAsm = &actual_asm; 233 234 #ifdef VIXL_DEBUG 235 vixl::CodeBufferCheckScope asm_check(armAsm, code_size, vixl::CodeBufferCheckScope::kDontReserveBufferSpace); 236 #endif 237 238 Label dispatch; 239 240 g_enter_recompiler = armAsm->GetCursorAddress<decltype(g_enter_recompiler)>(); 241 { 242 // reserve some space for saving caller-saved registers 243 armAsm->sub(sp, sp, FUNCTION_STACK_SIZE); 244 245 // Need the CPU state for basically everything :-) 246 armMoveAddressToReg(armAsm, RSTATE, &g_state); 247 } 248 249 // check events then for frame done 250 g_check_events_and_dispatch = armAsm->GetCursorAddress<const void*>(); 251 { 252 Label skip_event_check; 253 armAsm->ldr(RARG1, PTR(&g_state.pending_ticks)); 254 armAsm->ldr(RARG2, PTR(&g_state.downcount)); 255 armAsm->cmp(RARG1, RARG2); 256 armAsm->b(lt, &skip_event_check); 257 258 g_run_events_and_dispatch = armAsm->GetCursorAddress<const void*>(); 259 armEmitCall(armAsm, reinterpret_cast<const void*>(&TimingEvents::RunEvents), true); 260 261 armAsm->bind(&skip_event_check); 262 } 263 264 // TODO: align? 265 g_dispatcher = armAsm->GetCursorAddress<const void*>(); 266 { 267 armAsm->bind(&dispatch); 268 269 // x9 <- s_fast_map[pc >> 16] 270 armAsm->ldr(RARG1, PTR(&g_state.pc)); 271 armMoveAddressToReg(armAsm, RARG3, g_code_lut.data()); 272 armAsm->lsr(RARG2, RARG1, 16); 273 armAsm->ldr(RARG2, MemOperand(RARG3, RARG2, LSL, 2)); 274 275 // blr(x9[pc * 2]) (fast_map[pc >> 2]) 276 armAsm->ldr(RARG1, MemOperand(RARG2, RARG1)); 277 armAsm->blx(RARG1); 278 } 279 280 g_compile_or_revalidate_block = armAsm->GetCursorAddress<const void*>(); 281 { 282 armAsm->ldr(RARG1, PTR(&g_state.pc)); 283 armEmitCall(armAsm, reinterpret_cast<const void*>(&CompileOrRevalidateBlock), true); 284 armAsm->b(&dispatch); 285 } 286 287 g_discard_and_recompile_block = armAsm->GetCursorAddress<const void*>(); 288 { 289 armAsm->ldr(RARG1, PTR(&g_state.pc)); 290 armEmitCall(armAsm, reinterpret_cast<const void*>(&DiscardAndRecompileBlock), true); 291 armAsm->b(&dispatch); 292 } 293 294 g_interpret_block = armAsm->GetCursorAddress<const void*>(); 295 { 296 armEmitCall(armAsm, reinterpret_cast<const void*>(GetInterpretUncachedBlockFunction()), true); 297 armAsm->b(&dispatch); 298 } 299 300 armAsm->FinalizeCode(); 301 302 #if 0 303 // TODO: align? 304 s_trampoline_targets.clear(); 305 s_trampoline_start_ptr = static_cast<u8*>(code) + armAsm->GetCursorOffset(); 306 s_trampoline_used = 0; 307 #endif 308 309 #undef PTR 310 return static_cast<u32>(armAsm->GetCursorOffset()) /* + TRAMPOLINE_AREA_SIZE*/; 311 } 312 313 // Macros aren't used with old-rec. 314 #undef RRET 315 #undef RARG1 316 #undef RARG2 317 #undef RARG3 318 #undef RSCRATCH 319 #undef RSTATE 320 321 namespace CPU::Recompiler { 322 323 constexpr HostReg RCPUPTR = 4; 324 constexpr HostReg RMEMBASEPTR = 3; 325 constexpr HostReg RRETURN = 0; 326 constexpr HostReg RARG1 = 0; 327 constexpr HostReg RARG2 = 1; 328 constexpr HostReg RARG3 = 2; 329 constexpr HostReg RARG4 = 3; 330 constexpr HostReg RSCRATCH = 12; 331 332 static const a32::Register GetHostReg8(HostReg reg) 333 { 334 return a32::Register(reg); 335 } 336 337 static const a32::Register GetHostReg8(const Value& value) 338 { 339 DebugAssert(value.size == RegSize_8 && value.IsInHostRegister()); 340 return a32::Register(value.host_reg); 341 } 342 343 static const a32::Register GetHostReg16(HostReg reg) 344 { 345 return a32::Register(reg); 346 } 347 348 static const a32::Register GetHostReg16(const Value& value) 349 { 350 DebugAssert(value.size == RegSize_16 && value.IsInHostRegister()); 351 return a32::Register(value.host_reg); 352 } 353 354 static const a32::Register GetHostReg32(HostReg reg) 355 { 356 return a32::Register(reg); 357 } 358 359 static const a32::Register GetHostReg32(const Value& value) 360 { 361 DebugAssert(value.size == RegSize_32 && value.IsInHostRegister()); 362 return a32::Register(value.host_reg); 363 } 364 365 static const a32::Register GetCPUPtrReg() 366 { 367 return GetHostReg32(RCPUPTR); 368 } 369 370 static const a32::Register GetFastmemBasePtrReg() 371 { 372 return GetHostReg32(RMEMBASEPTR); 373 } 374 375 CodeGenerator::CodeGenerator() 376 : m_register_cache(*this), m_near_emitter(static_cast<vixl::byte*>(CPU::CodeCache::GetFreeCodePointer()), 377 CPU::CodeCache::GetFreeCodeSpace(), a32::A32), 378 m_far_emitter(static_cast<vixl::byte*>(CPU::CodeCache::GetFreeFarCodePointer()), 379 CPU::CodeCache::GetFreeFarCodeSpace(), a32::A32), 380 m_emit(&m_near_emitter) 381 { 382 InitHostRegs(); 383 } 384 385 CodeGenerator::~CodeGenerator() = default; 386 387 const char* CodeGenerator::GetHostRegName(HostReg reg, RegSize size /*= HostPointerSize*/) 388 { 389 static constexpr std::array<const char*, HostReg_Count> reg_names = { 390 {"r0", "r1", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15"}}; 391 if (reg >= static_cast<HostReg>(HostReg_Count)) 392 return ""; 393 394 switch (size) 395 { 396 case RegSize_32: 397 return reg_names[reg]; 398 default: 399 return ""; 400 } 401 } 402 403 void CodeGenerator::InitHostRegs() 404 { 405 // allocate nonvolatile before volatile 406 // NOTE: vixl also uses r12 for the macro assembler 407 m_register_cache.SetHostRegAllocationOrder({4, 5, 6, 7, 8, 9, 10, 11}); 408 m_register_cache.SetCallerSavedHostRegs({0, 1, 2, 3, 12}); 409 m_register_cache.SetCalleeSavedHostRegs({4, 5, 6, 7, 8, 9, 10, 11, 13, 14}); 410 m_register_cache.SetCPUPtrHostReg(RCPUPTR); 411 } 412 413 void CodeGenerator::SwitchToFarCode() 414 { 415 m_emit = &m_far_emitter; 416 } 417 418 void CodeGenerator::SwitchToNearCode() 419 { 420 m_emit = &m_near_emitter; 421 } 422 423 void* CodeGenerator::GetStartNearCodePointer() const 424 { 425 return static_cast<u8*>(CPU::CodeCache::GetFreeCodePointer()); 426 } 427 428 void* CodeGenerator::GetCurrentNearCodePointer() const 429 { 430 return static_cast<u8*>(CPU::CodeCache::GetFreeCodePointer()) + m_near_emitter.GetCursorOffset(); 431 } 432 433 void* CodeGenerator::GetCurrentFarCodePointer() const 434 { 435 return static_cast<u8*>(CPU::CodeCache::GetFreeFarCodePointer()) + m_far_emitter.GetCursorOffset(); 436 } 437 438 Value CodeGenerator::GetValueInHostRegister(const Value& value, bool allow_zero_register /* = true */) 439 { 440 if (value.IsInHostRegister()) 441 return Value::FromHostReg(&m_register_cache, value.host_reg, value.size); 442 443 Value new_value = m_register_cache.AllocateScratch(value.size); 444 EmitCopyValue(new_value.host_reg, value); 445 return new_value; 446 } 447 448 Value CodeGenerator::GetValueInHostOrScratchRegister(const Value& value, bool allow_zero_register /* = true */) 449 { 450 if (value.IsInHostRegister()) 451 return Value::FromHostReg(&m_register_cache, value.host_reg, value.size); 452 453 Value new_value = Value::FromHostReg(&m_register_cache, RSCRATCH, value.size); 454 EmitCopyValue(new_value.host_reg, value); 455 return new_value; 456 } 457 458 void CodeGenerator::EmitBeginBlock(bool allocate_registers /* = true */) 459 { 460 if (allocate_registers) 461 { 462 // Save the link register, since we'll be calling functions. 463 const bool link_reg_allocated = m_register_cache.AllocateHostReg(14); 464 DebugAssert(link_reg_allocated); 465 UNREFERENCED_VARIABLE(link_reg_allocated); 466 m_register_cache.AssumeCalleeSavedRegistersAreSaved(); 467 468 // Store the CPU struct pointer. TODO: make this better. 469 const bool cpu_reg_allocated = m_register_cache.AllocateHostReg(RCPUPTR); 470 // m_emit->Mov(GetCPUPtrReg(), reinterpret_cast<uintptr_t>(&g_state)); 471 DebugAssert(cpu_reg_allocated); 472 UNREFERENCED_VARIABLE(cpu_reg_allocated); 473 } 474 } 475 476 void CodeGenerator::EmitEndBlock(bool free_registers /* = true */, const void* jump_to) 477 { 478 if (free_registers) 479 { 480 m_register_cache.FreeHostReg(RCPUPTR); 481 m_register_cache.FreeHostReg(14); 482 m_register_cache.PopCalleeSavedRegisters(true); 483 } 484 485 if (jump_to) 486 armEmitJmp(m_emit, jump_to, true); 487 } 488 489 void CodeGenerator::EmitExceptionExit() 490 { 491 // ensure all unflushed registers are written back 492 m_register_cache.FlushAllGuestRegisters(false, false); 493 494 // the interpreter load delay might have its own value, but we'll overwrite it here anyway 495 // technically RaiseException() and FlushPipeline() have already been called, but that should be okay 496 m_register_cache.FlushLoadDelay(false); 497 498 m_register_cache.PopCalleeSavedRegisters(false); 499 500 armEmitJmp(m_emit, CodeCache::g_check_events_and_dispatch, true); 501 } 502 503 void CodeGenerator::EmitExceptionExitOnBool(const Value& value) 504 { 505 Assert(!value.IsConstant() && value.IsInHostRegister()); 506 507 m_register_cache.PushState(); 508 509 // TODO: This is... not great. 510 a32::Label skip_branch; 511 m_emit->tst(GetHostReg32(value.host_reg), 1); 512 m_emit->b(a32::eq, &skip_branch); 513 EmitBranch(GetCurrentFarCodePointer()); 514 m_emit->Bind(&skip_branch); 515 516 SwitchToFarCode(); 517 EmitExceptionExit(); 518 SwitchToNearCode(); 519 520 m_register_cache.PopState(); 521 } 522 523 const void* CodeGenerator::FinalizeBlock(u32* out_host_code_size, u32* out_host_far_code_size) 524 { 525 m_near_emitter.FinalizeCode(); 526 m_far_emitter.FinalizeCode(); 527 528 const void* code = CPU::CodeCache::GetFreeCodePointer(); 529 *out_host_code_size = static_cast<u32>(m_near_emitter.GetSizeOfCodeGenerated()); 530 *out_host_far_code_size = static_cast<u32>(m_far_emitter.GetSizeOfCodeGenerated()); 531 532 CPU::CodeCache::CommitCode(static_cast<u32>(m_near_emitter.GetSizeOfCodeGenerated())); 533 CPU::CodeCache::CommitFarCode(static_cast<u32>(m_far_emitter.GetSizeOfCodeGenerated())); 534 535 m_near_emitter = CodeEmitter(static_cast<vixl::byte*>(CPU::CodeCache::GetFreeCodePointer()), 536 CPU::CodeCache::GetFreeCodeSpace(), a32::A32); 537 m_far_emitter = CodeEmitter(static_cast<vixl::byte*>(CPU::CodeCache::GetFreeFarCodePointer()), 538 CPU::CodeCache::GetFreeFarCodeSpace(), a32::A32); 539 540 return code; 541 } 542 543 void CodeGenerator::EmitSignExtend(HostReg to_reg, RegSize to_size, HostReg from_reg, RegSize from_size) 544 { 545 switch (to_size) 546 { 547 case RegSize_16: 548 { 549 switch (from_size) 550 { 551 case RegSize_8: 552 m_emit->sxtb(GetHostReg16(to_reg), GetHostReg8(from_reg)); 553 m_emit->and_(GetHostReg16(to_reg), GetHostReg16(to_reg), 0xFFFF); 554 return; 555 } 556 } 557 break; 558 559 case RegSize_32: 560 { 561 switch (from_size) 562 { 563 case RegSize_8: 564 m_emit->sxtb(GetHostReg32(to_reg), GetHostReg8(from_reg)); 565 return; 566 case RegSize_16: 567 m_emit->sxth(GetHostReg32(to_reg), GetHostReg16(from_reg)); 568 return; 569 } 570 } 571 break; 572 } 573 574 Panic("Unknown sign-extend combination"); 575 } 576 577 void CodeGenerator::EmitZeroExtend(HostReg to_reg, RegSize to_size, HostReg from_reg, RegSize from_size) 578 { 579 switch (to_size) 580 { 581 case RegSize_16: 582 { 583 switch (from_size) 584 { 585 case RegSize_8: 586 m_emit->and_(GetHostReg16(to_reg), GetHostReg8(from_reg), 0xFF); 587 return; 588 } 589 } 590 break; 591 592 case RegSize_32: 593 { 594 switch (from_size) 595 { 596 case RegSize_8: 597 m_emit->and_(GetHostReg32(to_reg), GetHostReg8(from_reg), 0xFF); 598 return; 599 case RegSize_16: 600 m_emit->and_(GetHostReg32(to_reg), GetHostReg16(from_reg), 0xFFFF); 601 return; 602 } 603 } 604 break; 605 } 606 607 Panic("Unknown sign-extend combination"); 608 } 609 610 void CodeGenerator::EmitCopyValue(HostReg to_reg, const Value& value) 611 { 612 // TODO: mov x, 0 -> xor x, x 613 DebugAssert(value.IsConstant() || value.IsInHostRegister()); 614 615 switch (value.size) 616 { 617 case RegSize_8: 618 case RegSize_16: 619 case RegSize_32: 620 { 621 if (value.IsConstant()) 622 m_emit->Mov(GetHostReg32(to_reg), value.GetS32ConstantValue()); 623 else 624 m_emit->Mov(GetHostReg32(to_reg), GetHostReg32(value.host_reg)); 625 } 626 break; 627 628 default: 629 UnreachableCode(); 630 break; 631 } 632 } 633 634 void CodeGenerator::EmitAdd(HostReg to_reg, HostReg from_reg, const Value& value, bool set_flags) 635 { 636 Assert(value.IsConstant() || value.IsInHostRegister()); 637 638 // if it's in a host register already, this is easy 639 if (value.IsInHostRegister()) 640 { 641 if (set_flags) 642 m_emit->adds(GetHostReg32(to_reg), GetHostReg32(from_reg), GetHostReg32(value.host_reg)); 643 else 644 m_emit->add(GetHostReg32(to_reg), GetHostReg32(from_reg), GetHostReg32(value.host_reg)); 645 646 return; 647 } 648 649 // do we need temporary storage for the constant, if it won't fit in an immediate? 650 const s32 constant_value = value.GetS32ConstantValue(); 651 if (a32::ImmediateA32::IsImmediateA32(static_cast<u32>(constant_value))) 652 { 653 if (set_flags) 654 m_emit->adds(GetHostReg32(to_reg), GetHostReg32(from_reg), constant_value); 655 else 656 m_emit->add(GetHostReg32(to_reg), GetHostReg32(from_reg), constant_value); 657 658 return; 659 } 660 661 // need a temporary 662 m_emit->Mov(GetHostReg32(RSCRATCH), constant_value); 663 if (set_flags) 664 m_emit->adds(GetHostReg32(to_reg), GetHostReg32(from_reg), GetHostReg32(RSCRATCH)); 665 else 666 m_emit->add(GetHostReg32(to_reg), GetHostReg32(from_reg), GetHostReg32(RSCRATCH)); 667 } 668 669 void CodeGenerator::EmitSub(HostReg to_reg, HostReg from_reg, const Value& value, bool set_flags) 670 { 671 Assert(value.IsConstant() || value.IsInHostRegister()); 672 673 // if it's in a host register already, this is easy 674 if (value.IsInHostRegister()) 675 { 676 if (set_flags) 677 m_emit->subs(GetHostReg32(to_reg), GetHostReg32(from_reg), GetHostReg32(value.host_reg)); 678 else 679 m_emit->sub(GetHostReg32(to_reg), GetHostReg32(from_reg), GetHostReg32(value.host_reg)); 680 681 return; 682 } 683 684 // do we need temporary storage for the constant, if it won't fit in an immediate? 685 const s32 constant_value = value.GetS32ConstantValue(); 686 if (a32::ImmediateA32::IsImmediateA32(static_cast<u32>(constant_value))) 687 { 688 if (set_flags) 689 m_emit->subs(GetHostReg32(to_reg), GetHostReg32(from_reg), constant_value); 690 else 691 m_emit->sub(GetHostReg32(to_reg), GetHostReg32(from_reg), constant_value); 692 693 return; 694 } 695 696 // need a temporary 697 m_emit->Mov(GetHostReg32(RSCRATCH), constant_value); 698 if (set_flags) 699 m_emit->subs(GetHostReg32(to_reg), GetHostReg32(from_reg), GetHostReg32(RSCRATCH)); 700 else 701 m_emit->sub(GetHostReg32(to_reg), GetHostReg32(from_reg), GetHostReg32(RSCRATCH)); 702 } 703 704 void CodeGenerator::EmitCmp(HostReg to_reg, const Value& value) 705 { 706 Assert(value.IsConstant() || value.IsInHostRegister()); 707 708 // if it's in a host register already, this is easy 709 if (value.IsInHostRegister()) 710 { 711 m_emit->cmp(GetHostReg32(to_reg), GetHostReg32(value.host_reg)); 712 return; 713 } 714 715 // do we need temporary storage for the constant, if it won't fit in an immediate? 716 const s32 constant_value = value.GetS32ConstantValue(); 717 if (constant_value >= 0) 718 { 719 if (a32::ImmediateA32::IsImmediateA32(static_cast<u32>(constant_value))) 720 { 721 m_emit->cmp(GetHostReg32(to_reg), constant_value); 722 return; 723 } 724 } 725 else 726 { 727 if (a32::ImmediateA32::IsImmediateA32(static_cast<u32>(-constant_value))) 728 { 729 m_emit->cmn(GetHostReg32(to_reg), -constant_value); 730 return; 731 } 732 } 733 734 // need a temporary 735 m_emit->Mov(GetHostReg32(RSCRATCH), constant_value); 736 m_emit->cmp(GetHostReg32(to_reg), GetHostReg32(RSCRATCH)); 737 } 738 739 void CodeGenerator::EmitMul(HostReg to_reg_hi, HostReg to_reg_lo, const Value& lhs, const Value& rhs, 740 bool signed_multiply) 741 { 742 // We could use GetValueInHostRegister() here, but we run out of registers... 743 // Value lhs_in_reg = GetValueInHostRegister(lhs); 744 // Value rhs_in_reg = GetValueInHostRegister(rhs); 745 const HostReg lhs_in_reg = lhs.IsInHostRegister() ? lhs.GetHostRegister() : (EmitCopyValue(RARG1, lhs), RARG1); 746 const HostReg rhs_in_reg = rhs.IsInHostRegister() ? rhs.GetHostRegister() : (EmitCopyValue(RARG2, rhs), RARG2); 747 748 if (lhs.size < RegSize_64) 749 { 750 if (signed_multiply) 751 { 752 m_emit->smull(GetHostReg32(to_reg_lo), GetHostReg32(to_reg_hi), GetHostReg32(lhs_in_reg), 753 GetHostReg32(rhs_in_reg)); 754 } 755 else 756 { 757 m_emit->umull(GetHostReg32(to_reg_lo), GetHostReg32(to_reg_hi), GetHostReg32(lhs_in_reg), 758 GetHostReg32(rhs_in_reg)); 759 } 760 } 761 else 762 { 763 // TODO: Use mul + smulh 764 Panic("Not implemented"); 765 } 766 } 767 768 void CodeGenerator::EmitDiv(HostReg to_reg_quotient, HostReg to_reg_remainder, HostReg num, HostReg denom, RegSize size, 769 bool signed_divide) 770 { 771 // only 32-bit supported for now.. 772 Assert(size == RegSize_32); 773 774 Value quotient_value; 775 if (to_reg_quotient == HostReg_Count) 776 quotient_value.SetHostReg(&m_register_cache, RSCRATCH, size); 777 else 778 quotient_value.SetHostReg(&m_register_cache, to_reg_quotient, size); 779 780 if (signed_divide) 781 { 782 m_emit->sdiv(GetHostReg32(quotient_value), GetHostReg32(num), GetHostReg32(denom)); 783 if (to_reg_remainder != HostReg_Count) 784 { 785 m_emit->mul(GetHostReg32(to_reg_remainder), GetHostReg32(quotient_value), GetHostReg32(denom)); 786 m_emit->sub(GetHostReg32(to_reg_remainder), GetHostReg32(num), GetHostReg32(to_reg_remainder)); 787 } 788 } 789 else 790 { 791 m_emit->udiv(GetHostReg32(quotient_value), GetHostReg32(num), GetHostReg32(denom)); 792 if (to_reg_remainder != HostReg_Count) 793 { 794 m_emit->mul(GetHostReg32(to_reg_remainder), GetHostReg32(quotient_value), GetHostReg32(denom)); 795 m_emit->sub(GetHostReg32(to_reg_remainder), GetHostReg32(num), GetHostReg32(to_reg_remainder)); 796 } 797 } 798 } 799 800 void CodeGenerator::EmitInc(HostReg to_reg, RegSize size) 801 { 802 Panic("Not implemented"); 803 #if 0 804 switch (size) 805 { 806 case RegSize_8: 807 m_emit->inc(GetHostReg8(to_reg)); 808 break; 809 case RegSize_16: 810 m_emit->inc(GetHostReg16(to_reg)); 811 break; 812 case RegSize_32: 813 m_emit->inc(GetHostReg32(to_reg)); 814 break; 815 default: 816 UnreachableCode(); 817 break; 818 } 819 #endif 820 } 821 822 void CodeGenerator::EmitDec(HostReg to_reg, RegSize size) 823 { 824 Panic("Not implemented"); 825 #if 0 826 switch (size) 827 { 828 case RegSize_8: 829 m_emit->dec(GetHostReg8(to_reg)); 830 break; 831 case RegSize_16: 832 m_emit->dec(GetHostReg16(to_reg)); 833 break; 834 case RegSize_32: 835 m_emit->dec(GetHostReg32(to_reg)); 836 break; 837 default: 838 UnreachableCode(); 839 break; 840 } 841 #endif 842 } 843 844 void CodeGenerator::EmitShl(HostReg to_reg, HostReg from_reg, RegSize size, const Value& amount_value, 845 bool assume_amount_masked) 846 { 847 switch (size) 848 { 849 case RegSize_8: 850 case RegSize_16: 851 case RegSize_32: 852 { 853 if (amount_value.IsConstant()) 854 { 855 m_emit->lsl(GetHostReg32(to_reg), GetHostReg32(from_reg), static_cast<u32>(amount_value.constant_value & 0x1F)); 856 } 857 else if (assume_amount_masked) 858 { 859 m_emit->lsl(GetHostReg32(to_reg), GetHostReg32(from_reg), GetHostReg32(amount_value)); 860 } 861 else 862 { 863 m_emit->and_(GetHostReg32(RSCRATCH), GetHostReg32(amount_value), 0x1F); 864 m_emit->lsl(GetHostReg32(to_reg), GetHostReg32(from_reg), GetHostReg32(RSCRATCH)); 865 } 866 867 if (size == RegSize_8) 868 m_emit->and_(GetHostReg32(to_reg), GetHostReg32(from_reg), 0xFF); 869 else if (size == RegSize_16) 870 m_emit->and_(GetHostReg32(to_reg), GetHostReg32(from_reg), 0xFFFF); 871 } 872 break; 873 } 874 } 875 876 void CodeGenerator::EmitShr(HostReg to_reg, HostReg from_reg, RegSize size, const Value& amount_value, 877 bool assume_amount_masked) 878 { 879 switch (size) 880 { 881 case RegSize_8: 882 case RegSize_16: 883 case RegSize_32: 884 { 885 if (amount_value.IsConstant()) 886 { 887 m_emit->lsr(GetHostReg32(to_reg), GetHostReg32(from_reg), static_cast<u32>(amount_value.constant_value & 0x1F)); 888 } 889 else if (assume_amount_masked) 890 { 891 m_emit->lsr(GetHostReg32(to_reg), GetHostReg32(from_reg), GetHostReg32(amount_value)); 892 } 893 else 894 { 895 m_emit->and_(GetHostReg32(RSCRATCH), GetHostReg32(amount_value), 0x1F); 896 m_emit->lsr(GetHostReg32(to_reg), GetHostReg32(from_reg), GetHostReg32(RSCRATCH)); 897 } 898 899 if (size == RegSize_8) 900 m_emit->and_(GetHostReg32(to_reg), GetHostReg32(from_reg), 0xFF); 901 else if (size == RegSize_16) 902 m_emit->and_(GetHostReg32(to_reg), GetHostReg32(from_reg), 0xFFFF); 903 } 904 break; 905 } 906 } 907 908 void CodeGenerator::EmitSar(HostReg to_reg, HostReg from_reg, RegSize size, const Value& amount_value, 909 bool assume_amount_masked) 910 { 911 switch (size) 912 { 913 case RegSize_8: 914 case RegSize_16: 915 case RegSize_32: 916 { 917 if (amount_value.IsConstant()) 918 { 919 m_emit->asr(GetHostReg32(to_reg), GetHostReg32(from_reg), static_cast<u32>(amount_value.constant_value & 0x1F)); 920 } 921 else if (assume_amount_masked) 922 { 923 m_emit->asr(GetHostReg32(to_reg), GetHostReg32(from_reg), GetHostReg32(amount_value)); 924 } 925 else 926 { 927 m_emit->and_(GetHostReg32(RSCRATCH), GetHostReg32(amount_value), 0x1F); 928 m_emit->asr(GetHostReg32(to_reg), GetHostReg32(from_reg), GetHostReg32(RSCRATCH)); 929 } 930 931 if (size == RegSize_8) 932 m_emit->and_(GetHostReg32(to_reg), GetHostReg32(from_reg), 0xFF); 933 else if (size == RegSize_16) 934 m_emit->and_(GetHostReg32(to_reg), GetHostReg32(from_reg), 0xFFFF); 935 } 936 break; 937 } 938 } 939 940 static bool CanFitInBitwiseImmediate(const Value& value) 941 { 942 return a32::ImmediateA32::IsImmediateA32(static_cast<u32>(value.constant_value)); 943 } 944 945 void CodeGenerator::EmitAnd(HostReg to_reg, HostReg from_reg, const Value& value) 946 { 947 Assert(value.IsConstant() || value.IsInHostRegister()); 948 949 // if it's in a host register already, this is easy 950 if (value.IsInHostRegister()) 951 { 952 m_emit->and_(GetHostReg32(to_reg), GetHostReg32(from_reg), GetHostReg32(value.host_reg)); 953 return; 954 } 955 956 // do we need temporary storage for the constant, if it won't fit in an immediate? 957 if (CanFitInBitwiseImmediate(value)) 958 { 959 m_emit->and_(GetHostReg32(to_reg), GetHostReg32(from_reg), s32(value.constant_value)); 960 return; 961 } 962 963 // need a temporary 964 m_emit->Mov(GetHostReg32(RSCRATCH), s32(value.constant_value)); 965 m_emit->and_(GetHostReg32(to_reg), GetHostReg32(from_reg), GetHostReg32(RSCRATCH)); 966 } 967 968 void CodeGenerator::EmitOr(HostReg to_reg, HostReg from_reg, const Value& value) 969 { 970 Assert(value.IsConstant() || value.IsInHostRegister()); 971 972 // if it's in a host register already, this is easy 973 if (value.IsInHostRegister()) 974 { 975 m_emit->orr(GetHostReg32(to_reg), GetHostReg32(from_reg), GetHostReg32(value.host_reg)); 976 return; 977 } 978 979 // do we need temporary storage for the constant, if it won't fit in an immediate? 980 if (CanFitInBitwiseImmediate(value)) 981 { 982 m_emit->orr(GetHostReg32(to_reg), GetHostReg32(from_reg), s32(value.constant_value)); 983 return; 984 } 985 986 // need a temporary 987 m_emit->Mov(GetHostReg32(RSCRATCH), s32(value.constant_value)); 988 m_emit->orr(GetHostReg32(to_reg), GetHostReg32(from_reg), GetHostReg32(RSCRATCH)); 989 } 990 991 void CodeGenerator::EmitXor(HostReg to_reg, HostReg from_reg, const Value& value) 992 { 993 Assert(value.IsConstant() || value.IsInHostRegister()); 994 995 // if it's in a host register already, this is easy 996 if (value.IsInHostRegister()) 997 { 998 m_emit->eor(GetHostReg32(to_reg), GetHostReg32(from_reg), GetHostReg32(value.host_reg)); 999 return; 1000 } 1001 1002 // do we need temporary storage for the constant, if it won't fit in an immediate? 1003 if (CanFitInBitwiseImmediate(value)) 1004 { 1005 m_emit->eor(GetHostReg32(to_reg), GetHostReg32(from_reg), s32(value.constant_value)); 1006 return; 1007 } 1008 1009 // need a temporary 1010 m_emit->Mov(GetHostReg32(RSCRATCH), s32(value.constant_value)); 1011 m_emit->eor(GetHostReg32(to_reg), GetHostReg32(from_reg), GetHostReg32(RSCRATCH)); 1012 } 1013 1014 void CodeGenerator::EmitTest(HostReg to_reg, const Value& value) 1015 { 1016 Assert(value.IsConstant() || value.IsInHostRegister()); 1017 1018 // if it's in a host register already, this is easy 1019 if (value.IsInHostRegister()) 1020 { 1021 m_emit->tst(GetHostReg32(to_reg), GetHostReg32(value.host_reg)); 1022 return; 1023 } 1024 1025 // do we need temporary storage for the constant, if it won't fit in an immediate? 1026 if (CanFitInBitwiseImmediate(value)) 1027 { 1028 m_emit->tst(GetHostReg32(to_reg), s32(value.constant_value)); 1029 return; 1030 } 1031 1032 // need a temporary 1033 m_emit->Mov(GetHostReg32(RSCRATCH), s32(value.constant_value)); 1034 m_emit->tst(GetHostReg32(to_reg), GetHostReg32(RSCRATCH)); 1035 } 1036 1037 void CodeGenerator::EmitNot(HostReg to_reg, RegSize size) 1038 { 1039 switch (size) 1040 { 1041 case RegSize_8: 1042 m_emit->mvn(GetHostReg8(to_reg), GetHostReg8(to_reg)); 1043 m_emit->and_(GetHostReg8(to_reg), GetHostReg8(to_reg), 0xFF); 1044 break; 1045 1046 case RegSize_16: 1047 m_emit->mvn(GetHostReg16(to_reg), GetHostReg16(to_reg)); 1048 m_emit->and_(GetHostReg16(to_reg), GetHostReg16(to_reg), 0xFFFF); 1049 break; 1050 1051 case RegSize_32: 1052 m_emit->mvn(GetHostReg32(to_reg), GetHostReg32(to_reg)); 1053 break; 1054 1055 default: 1056 break; 1057 } 1058 } 1059 1060 void CodeGenerator::EmitSetConditionResult(HostReg to_reg, RegSize to_size, Condition condition) 1061 { 1062 if (condition == Condition::Always) 1063 { 1064 m_emit->Mov(GetHostReg32(to_reg), 1); 1065 return; 1066 } 1067 1068 a32::Condition acond(a32::Condition::Never()); 1069 switch (condition) 1070 { 1071 case Condition::NotEqual: 1072 acond = a32::ne; 1073 break; 1074 1075 case Condition::Equal: 1076 acond = a32::eq; 1077 break; 1078 1079 case Condition::Overflow: 1080 acond = a32::vs; 1081 break; 1082 1083 case Condition::Greater: 1084 acond = a32::gt; 1085 break; 1086 1087 case Condition::GreaterEqual: 1088 acond = a32::ge; 1089 break; 1090 1091 case Condition::Less: 1092 acond = a32::lt; 1093 break; 1094 1095 case Condition::LessEqual: 1096 acond = a32::le; 1097 break; 1098 1099 case Condition::Negative: 1100 acond = a32::mi; 1101 break; 1102 1103 case Condition::PositiveOrZero: 1104 acond = a32::pl; 1105 break; 1106 1107 case Condition::Above: 1108 acond = a32::hi; 1109 break; 1110 1111 case Condition::AboveEqual: 1112 acond = a32::cs; 1113 break; 1114 1115 case Condition::Below: 1116 acond = a32::cc; 1117 break; 1118 1119 case Condition::BelowEqual: 1120 acond = a32::ls; 1121 break; 1122 1123 default: 1124 UnreachableCode(); 1125 return; 1126 } 1127 1128 m_emit->mov(GetHostReg32(to_reg), 0); 1129 m_emit->mov(acond, GetHostReg32(to_reg), 1); 1130 } 1131 1132 u32 CodeGenerator::PrepareStackForCall() 1133 { 1134 m_register_cache.PushCallerSavedRegisters(); 1135 m_membase_loaded = false; 1136 return 0; 1137 } 1138 1139 void CodeGenerator::RestoreStackAfterCall(u32 adjust_size) 1140 { 1141 m_register_cache.PopCallerSavedRegisters(); 1142 } 1143 1144 void CodeGenerator::EmitCall(const void* ptr) 1145 { 1146 armEmitCall(m_emit, ptr, false); 1147 } 1148 1149 void CodeGenerator::EmitFunctionCallPtr(Value* return_value, const void* ptr) 1150 { 1151 if (return_value) 1152 return_value->Discard(); 1153 1154 // shadow space allocate 1155 const u32 adjust_size = PrepareStackForCall(); 1156 1157 // actually call the function 1158 EmitCall(ptr); 1159 1160 // shadow space release 1161 RestoreStackAfterCall(adjust_size); 1162 1163 // copy out return value if requested 1164 if (return_value) 1165 { 1166 return_value->Undiscard(); 1167 EmitCopyValue(return_value->GetHostRegister(), Value::FromHostReg(&m_register_cache, RRETURN, return_value->size)); 1168 } 1169 } 1170 1171 void CodeGenerator::EmitFunctionCallPtr(Value* return_value, const void* ptr, const Value& arg1) 1172 { 1173 if (return_value) 1174 return_value->Discard(); 1175 1176 // shadow space allocate 1177 const u32 adjust_size = PrepareStackForCall(); 1178 1179 // push arguments 1180 EmitCopyValue(RARG1, arg1); 1181 1182 // actually call the function 1183 EmitCall(ptr); 1184 1185 // shadow space release 1186 RestoreStackAfterCall(adjust_size); 1187 1188 // copy out return value if requested 1189 if (return_value) 1190 { 1191 return_value->Undiscard(); 1192 EmitCopyValue(return_value->GetHostRegister(), Value::FromHostReg(&m_register_cache, RRETURN, return_value->size)); 1193 } 1194 } 1195 1196 void CodeGenerator::EmitFunctionCallPtr(Value* return_value, const void* ptr, const Value& arg1, const Value& arg2) 1197 { 1198 if (return_value) 1199 return_value->Discard(); 1200 1201 // shadow space allocate 1202 const u32 adjust_size = PrepareStackForCall(); 1203 1204 // push arguments 1205 EmitCopyValue(RARG1, arg1); 1206 EmitCopyValue(RARG2, arg2); 1207 1208 // actually call the function 1209 EmitCall(ptr); 1210 1211 // shadow space release 1212 RestoreStackAfterCall(adjust_size); 1213 1214 // copy out return value if requested 1215 if (return_value) 1216 { 1217 return_value->Undiscard(); 1218 EmitCopyValue(return_value->GetHostRegister(), Value::FromHostReg(&m_register_cache, RRETURN, return_value->size)); 1219 } 1220 } 1221 1222 void CodeGenerator::EmitFunctionCallPtr(Value* return_value, const void* ptr, const Value& arg1, const Value& arg2, 1223 const Value& arg3) 1224 { 1225 if (return_value) 1226 m_register_cache.DiscardHostReg(return_value->GetHostRegister()); 1227 1228 // shadow space allocate 1229 const u32 adjust_size = PrepareStackForCall(); 1230 1231 // push arguments 1232 EmitCopyValue(RARG1, arg1); 1233 EmitCopyValue(RARG2, arg2); 1234 EmitCopyValue(RARG3, arg3); 1235 1236 // actually call the function 1237 EmitCall(ptr); 1238 1239 // shadow space release 1240 RestoreStackAfterCall(adjust_size); 1241 1242 // copy out return value if requested 1243 if (return_value) 1244 { 1245 return_value->Undiscard(); 1246 EmitCopyValue(return_value->GetHostRegister(), Value::FromHostReg(&m_register_cache, RRETURN, return_value->size)); 1247 } 1248 } 1249 1250 void CodeGenerator::EmitFunctionCallPtr(Value* return_value, const void* ptr, const Value& arg1, const Value& arg2, 1251 const Value& arg3, const Value& arg4) 1252 { 1253 if (return_value) 1254 return_value->Discard(); 1255 1256 // shadow space allocate 1257 const u32 adjust_size = PrepareStackForCall(); 1258 1259 // push arguments 1260 EmitCopyValue(RARG1, arg1); 1261 EmitCopyValue(RARG2, arg2); 1262 EmitCopyValue(RARG3, arg3); 1263 EmitCopyValue(RARG4, arg4); 1264 1265 // actually call the function 1266 EmitCall(ptr); 1267 1268 // shadow space release 1269 RestoreStackAfterCall(adjust_size); 1270 1271 // copy out return value if requested 1272 if (return_value) 1273 { 1274 return_value->Undiscard(); 1275 EmitCopyValue(return_value->GetHostRegister(), Value::FromHostReg(&m_register_cache, RRETURN, return_value->size)); 1276 } 1277 } 1278 1279 void CodeGenerator::EmitPushHostReg(HostReg reg, u32 position) 1280 { 1281 const a32::MemOperand addr(a32::sp, FUNCTION_STACK_SIZE - (position * 4)); 1282 m_emit->str(GetHostReg32(reg), addr); 1283 } 1284 1285 void CodeGenerator::EmitPushHostRegPair(HostReg reg, HostReg reg2, u32 position) 1286 { 1287 // TODO: Use stm? 1288 EmitPushHostReg(reg, position); 1289 EmitPushHostReg(reg2, position + 1); 1290 } 1291 1292 void CodeGenerator::EmitPopHostReg(HostReg reg, u32 position) 1293 { 1294 const a32::MemOperand addr(a32::sp, FUNCTION_STACK_SIZE - (position * 4)); 1295 m_emit->ldr(GetHostReg32(reg), addr); 1296 } 1297 1298 void CodeGenerator::EmitPopHostRegPair(HostReg reg, HostReg reg2, u32 position) 1299 { 1300 // TODO: Use ldm? 1301 Assert(position > 0); 1302 EmitPopHostReg(reg2, position); 1303 EmitPopHostReg(reg, position - 1); 1304 } 1305 1306 void CodeGenerator::EmitLoadCPUStructField(HostReg host_reg, RegSize guest_size, u32 offset) 1307 { 1308 const s32 s_offset = static_cast<s32>(offset); 1309 1310 switch (guest_size) 1311 { 1312 case RegSize_8: 1313 m_emit->ldrb(GetHostReg8(host_reg), a32::MemOperand(GetCPUPtrReg(), s_offset)); 1314 break; 1315 1316 case RegSize_16: 1317 m_emit->ldrh(GetHostReg16(host_reg), a32::MemOperand(GetCPUPtrReg(), s_offset)); 1318 break; 1319 1320 case RegSize_32: 1321 m_emit->ldr(GetHostReg32(host_reg), a32::MemOperand(GetCPUPtrReg(), s_offset)); 1322 break; 1323 1324 default: 1325 { 1326 UnreachableCode(); 1327 } 1328 break; 1329 } 1330 } 1331 1332 void CodeGenerator::EmitStoreCPUStructField(u32 offset, const Value& value) 1333 { 1334 const Value hr_value = GetValueInHostOrScratchRegister(value); 1335 const s32 s_offset = static_cast<s32>(offset); 1336 1337 switch (value.size) 1338 { 1339 case RegSize_8: 1340 m_emit->strb(GetHostReg8(hr_value), a32::MemOperand(GetCPUPtrReg(), s_offset)); 1341 break; 1342 1343 case RegSize_16: 1344 m_emit->strh(GetHostReg16(hr_value), a32::MemOperand(GetCPUPtrReg(), s_offset)); 1345 break; 1346 1347 case RegSize_32: 1348 m_emit->str(GetHostReg32(hr_value), a32::MemOperand(GetCPUPtrReg(), s_offset)); 1349 break; 1350 1351 default: 1352 { 1353 UnreachableCode(); 1354 } 1355 break; 1356 } 1357 } 1358 1359 void CodeGenerator::EmitAddCPUStructField(u32 offset, const Value& value) 1360 { 1361 const s32 s_offset = static_cast<s32>(offset); 1362 const a32::MemOperand o_offset(GetCPUPtrReg(), s_offset); 1363 1364 Value real_value; 1365 if (value.IsInHostRegister()) 1366 { 1367 real_value.SetHostReg(&m_register_cache, value.host_reg, value.size); 1368 } 1369 else 1370 { 1371 // do we need temporary storage for the constant, if it won't fit in an immediate? 1372 Assert(value.IsConstant()); 1373 const s32 constant_value = value.GetS32ConstantValue(); 1374 if (!a32::ImmediateA32::IsImmediateA32(static_cast<u32>(constant_value))) 1375 { 1376 real_value.SetHostReg(&m_register_cache, RARG2, value.size); 1377 EmitCopyValue(real_value.host_reg, value); 1378 } 1379 else 1380 { 1381 real_value = value; 1382 } 1383 } 1384 1385 // Don't need to mask here because we're storing back to memory. 1386 switch (value.size) 1387 { 1388 case RegSize_8: 1389 { 1390 m_emit->Ldrb(GetHostReg8(RARG1), o_offset); 1391 if (real_value.IsConstant()) 1392 m_emit->Add(GetHostReg8(RARG1), GetHostReg8(RARG1), real_value.GetS32ConstantValue()); 1393 else 1394 m_emit->Add(GetHostReg8(RARG1), GetHostReg8(RARG1), GetHostReg8(real_value)); 1395 m_emit->Strb(GetHostReg8(RARG1), o_offset); 1396 } 1397 break; 1398 1399 case RegSize_16: 1400 { 1401 m_emit->Ldrh(GetHostReg16(RARG1), o_offset); 1402 if (real_value.IsConstant()) 1403 m_emit->Add(GetHostReg16(RARG1), GetHostReg16(RARG1), real_value.GetS32ConstantValue()); 1404 else 1405 m_emit->Add(GetHostReg16(RARG1), GetHostReg16(RARG1), GetHostReg16(real_value)); 1406 m_emit->Strh(GetHostReg16(RARG1), o_offset); 1407 } 1408 break; 1409 1410 case RegSize_32: 1411 { 1412 m_emit->Ldr(GetHostReg32(RARG1), o_offset); 1413 if (real_value.IsConstant()) 1414 m_emit->Add(GetHostReg32(RARG1), GetHostReg32(RARG1), real_value.GetS32ConstantValue()); 1415 else 1416 m_emit->Add(GetHostReg32(RARG1), GetHostReg32(RARG1), GetHostReg32(real_value)); 1417 m_emit->Str(GetHostReg32(RARG1), o_offset); 1418 } 1419 break; 1420 1421 default: 1422 { 1423 UnreachableCode(); 1424 } 1425 break; 1426 } 1427 } 1428 1429 void CodeGenerator::EnsureMembaseLoaded() 1430 { 1431 if (m_membase_loaded) 1432 return; 1433 1434 m_emit->Ldr(GetFastmemBasePtrReg(), a32::MemOperand(GetCPUPtrReg(), OFFSETOF(State, fastmem_base))); 1435 m_membase_loaded = true; 1436 } 1437 1438 void CodeGenerator::EmitUpdateFastmemBase() 1439 { 1440 m_membase_loaded = false; 1441 } 1442 1443 void CodeGenerator::EmitLoadGuestRAMFastmem(const Value& address, RegSize size, Value& result) 1444 { 1445 EnsureMembaseLoaded(); 1446 1447 HostReg address_reg; 1448 if (address.IsConstant()) 1449 { 1450 m_emit->Mov(GetHostReg32(RSCRATCH), static_cast<u32>(address.constant_value)); 1451 address_reg = RSCRATCH; 1452 } 1453 else 1454 { 1455 address_reg = address.host_reg; 1456 } 1457 1458 m_emit->lsr(GetHostReg32(RARG1), GetHostReg32(address_reg), Bus::FASTMEM_LUT_PAGE_SHIFT); 1459 m_emit->ldr(GetHostReg32(RARG1), 1460 a32::MemOperand(GetFastmemBasePtrReg(), GetHostReg32(RARG1), a32::LSL, 2)); // pointer load 1461 1462 switch (size) 1463 { 1464 case RegSize_8: 1465 m_emit->ldrb(GetHostReg32(result.host_reg), a32::MemOperand(GetHostReg32(RARG1), GetHostReg32(address_reg))); 1466 break; 1467 1468 case RegSize_16: 1469 m_emit->ldrh(GetHostReg32(result.host_reg), a32::MemOperand(GetHostReg32(RARG1), GetHostReg32(address_reg))); 1470 break; 1471 1472 case RegSize_32: 1473 m_emit->ldr(GetHostReg32(result.host_reg), a32::MemOperand(GetHostReg32(RARG1), GetHostReg32(address_reg))); 1474 break; 1475 1476 default: 1477 UnreachableCode(); 1478 break; 1479 } 1480 } 1481 1482 void CodeGenerator::EmitLoadGuestMemoryFastmem(Instruction instruction, const CodeCache::InstructionInfo& info, 1483 const Value& address, RegSize size, Value& result) 1484 { 1485 EnsureMembaseLoaded(); 1486 1487 HostReg address_reg; 1488 if (address.IsConstant()) 1489 { 1490 m_emit->Mov(GetHostReg32(RSCRATCH), static_cast<u32>(address.constant_value)); 1491 address_reg = RSCRATCH; 1492 } 1493 else 1494 { 1495 address_reg = address.host_reg; 1496 } 1497 1498 m_emit->lsr(GetHostReg32(RARG1), GetHostReg32(address_reg), Bus::FASTMEM_LUT_PAGE_SHIFT); 1499 m_emit->ldr(GetHostReg32(RARG1), 1500 a32::MemOperand(GetFastmemBasePtrReg(), GetHostReg32(RARG1), a32::LSL, 2)); // pointer load 1501 1502 m_register_cache.InhibitAllocation(); 1503 1504 void* host_pc = GetCurrentNearCodePointer(); 1505 1506 switch (size) 1507 { 1508 case RegSize_8: 1509 m_emit->ldrb(GetHostReg32(result.host_reg), a32::MemOperand(GetHostReg32(RARG1), GetHostReg32(address_reg))); 1510 break; 1511 1512 case RegSize_16: 1513 m_emit->ldrh(GetHostReg32(result.host_reg), a32::MemOperand(GetHostReg32(RARG1), GetHostReg32(address_reg))); 1514 break; 1515 1516 case RegSize_32: 1517 m_emit->ldr(GetHostReg32(result.host_reg), a32::MemOperand(GetHostReg32(RARG1), GetHostReg32(address_reg))); 1518 break; 1519 1520 default: 1521 UnreachableCode(); 1522 break; 1523 } 1524 1525 const u32 host_code_size = 1526 static_cast<u32>(static_cast<ptrdiff_t>(static_cast<u8*>(GetCurrentNearCodePointer()) - static_cast<u8*>(host_pc))); 1527 1528 // generate slowmem fallback 1529 const void* host_slowmem_pc = GetCurrentFarCodePointer(); 1530 SwitchToFarCode(); 1531 1532 // we add the ticks *after* the add here, since we counted incorrectly, then correct for it below 1533 DebugAssert(m_delayed_cycles_add > 0); 1534 EmitAddCPUStructField(OFFSETOF(State, pending_ticks), Value::FromConstantU32(static_cast<u32>(m_delayed_cycles_add))); 1535 m_delayed_cycles_add += Bus::RAM_READ_TICKS; 1536 1537 EmitLoadGuestMemorySlowmem(instruction, info, address, size, result, true); 1538 1539 EmitAddCPUStructField(OFFSETOF(State, pending_ticks), 1540 Value::FromConstantU32(static_cast<u32>(-m_delayed_cycles_add))); 1541 1542 // return to the block code 1543 EmitBranch(GetCurrentNearCodePointer(), false); 1544 1545 SwitchToNearCode(); 1546 m_register_cache.UninhibitAllocation(); 1547 1548 CPU::CodeCache::AddLoadStoreInfo(host_pc, host_code_size, info.pc, host_slowmem_pc); 1549 } 1550 1551 void CodeGenerator::EmitLoadGuestMemorySlowmem(Instruction instruction, const CodeCache::InstructionInfo& info, 1552 const Value& address, RegSize size, Value& result, bool in_far_code) 1553 { 1554 if (g_settings.cpu_recompiler_memory_exceptions) 1555 { 1556 // NOTE: This can leave junk in the upper bits 1557 switch (size) 1558 { 1559 case RegSize_8: 1560 EmitFunctionCall(&result, &Thunks::ReadMemoryByte, address); 1561 break; 1562 1563 case RegSize_16: 1564 EmitFunctionCall(&result, &Thunks::ReadMemoryHalfWord, address); 1565 break; 1566 1567 case RegSize_32: 1568 EmitFunctionCall(&result, &Thunks::ReadMemoryWord, address); 1569 break; 1570 1571 default: 1572 UnreachableCode(); 1573 break; 1574 } 1575 1576 m_register_cache.PushState(); 1577 1578 a32::Label load_okay; 1579 m_emit->tst(GetHostReg32(1), 1); 1580 m_emit->b(a32::ne, &load_okay); 1581 EmitBranch(GetCurrentFarCodePointer()); 1582 m_emit->Bind(&load_okay); 1583 1584 // load exception path 1585 if (!in_far_code) 1586 SwitchToFarCode(); 1587 1588 // cause_bits = (-result << 2) | BD | cop_n 1589 m_emit->rsb(GetHostReg32(result.host_reg), GetHostReg32(result.host_reg), 0); 1590 m_emit->lsl(GetHostReg32(result.host_reg), GetHostReg32(result.host_reg), 2); 1591 EmitOr(result.host_reg, result.host_reg, 1592 Value::FromConstantU32(Cop0Registers::CAUSE::MakeValueForException( 1593 static_cast<Exception>(0), info.is_branch_delay_slot, false, instruction.cop.cop_n))); 1594 EmitFunctionCall(nullptr, static_cast<void (*)(u32, u32)>(&CPU::RaiseException), result, GetCurrentInstructionPC()); 1595 1596 EmitExceptionExit(); 1597 1598 if (!in_far_code) 1599 SwitchToNearCode(); 1600 1601 m_register_cache.PopState(); 1602 } 1603 else 1604 { 1605 switch (size) 1606 { 1607 case RegSize_8: 1608 EmitFunctionCall(&result, &Thunks::UncheckedReadMemoryByte, address); 1609 break; 1610 1611 case RegSize_16: 1612 EmitFunctionCall(&result, &Thunks::UncheckedReadMemoryHalfWord, address); 1613 break; 1614 1615 case RegSize_32: 1616 EmitFunctionCall(&result, &Thunks::UncheckedReadMemoryWord, address); 1617 break; 1618 1619 default: 1620 UnreachableCode(); 1621 break; 1622 } 1623 } 1624 } 1625 1626 void CodeGenerator::EmitStoreGuestMemoryFastmem(Instruction instruction, const CodeCache::InstructionInfo& info, 1627 const Value& address, RegSize size, const Value& value) 1628 { 1629 EnsureMembaseLoaded(); 1630 1631 Value actual_value = GetValueInHostRegister(value); 1632 1633 HostReg address_reg; 1634 if (address.IsConstant()) 1635 { 1636 m_emit->Mov(GetHostReg32(RSCRATCH), static_cast<u32>(address.constant_value)); 1637 address_reg = RSCRATCH; 1638 } 1639 else 1640 { 1641 address_reg = address.host_reg; 1642 } 1643 1644 // TODO: if this gets backpatched, these instructions are wasted 1645 1646 m_emit->lsr(GetHostReg32(RARG1), GetHostReg32(address_reg), Bus::FASTMEM_LUT_PAGE_SHIFT); 1647 m_emit->ldr(GetHostReg32(RARG1), 1648 a32::MemOperand(GetFastmemBasePtrReg(), GetHostReg32(RARG1), a32::LSL, 2)); // pointer load 1649 1650 m_register_cache.InhibitAllocation(); 1651 1652 void* host_pc = GetCurrentNearCodePointer(); 1653 1654 switch (size) 1655 { 1656 case RegSize_8: 1657 m_emit->strb(GetHostReg32(actual_value.host_reg), 1658 a32::MemOperand(GetHostReg32(RARG1), GetHostReg32(address_reg))); 1659 break; 1660 1661 case RegSize_16: 1662 m_emit->strh(GetHostReg32(actual_value.host_reg), 1663 a32::MemOperand(GetHostReg32(RARG1), GetHostReg32(address_reg))); 1664 break; 1665 1666 case RegSize_32: 1667 m_emit->str(GetHostReg32(actual_value.host_reg), a32::MemOperand(GetHostReg32(RARG1), GetHostReg32(address_reg))); 1668 break; 1669 1670 default: 1671 UnreachableCode(); 1672 break; 1673 } 1674 1675 const u32 host_code_size = 1676 static_cast<u32>(static_cast<ptrdiff_t>(static_cast<u8*>(GetCurrentNearCodePointer()) - static_cast<u8*>(host_pc))); 1677 1678 // generate slowmem fallback 1679 void* host_slowmem_pc = GetCurrentFarCodePointer(); 1680 SwitchToFarCode(); 1681 1682 DebugAssert(m_delayed_cycles_add > 0); 1683 EmitAddCPUStructField(OFFSETOF(State, pending_ticks), Value::FromConstantU32(static_cast<u32>(m_delayed_cycles_add))); 1684 1685 EmitStoreGuestMemorySlowmem(instruction, info, address, size, actual_value, true); 1686 1687 EmitAddCPUStructField(OFFSETOF(State, pending_ticks), 1688 Value::FromConstantU32(static_cast<u32>(-m_delayed_cycles_add))); 1689 1690 // return to the block code 1691 EmitBranch(GetCurrentNearCodePointer(), false); 1692 1693 SwitchToNearCode(); 1694 m_register_cache.UninhibitAllocation(); 1695 1696 CPU::CodeCache::AddLoadStoreInfo(host_pc, host_code_size, info.pc, host_slowmem_pc); 1697 } 1698 1699 void CodeGenerator::EmitStoreGuestMemorySlowmem(Instruction instruction, const CodeCache::InstructionInfo& info, 1700 const Value& address, RegSize size, const Value& value, 1701 bool in_far_code) 1702 { 1703 Value value_in_hr = GetValueInHostRegister(value); 1704 1705 if (g_settings.cpu_recompiler_memory_exceptions) 1706 { 1707 Assert(!in_far_code); 1708 1709 Value result = m_register_cache.AllocateScratch(RegSize_32); 1710 switch (size) 1711 { 1712 case RegSize_8: 1713 EmitFunctionCall(&result, &Thunks::WriteMemoryByte, address, value_in_hr); 1714 break; 1715 1716 case RegSize_16: 1717 EmitFunctionCall(&result, &Thunks::WriteMemoryHalfWord, address, value_in_hr); 1718 break; 1719 1720 case RegSize_32: 1721 EmitFunctionCall(&result, &Thunks::WriteMemoryWord, address, value_in_hr); 1722 break; 1723 1724 default: 1725 UnreachableCode(); 1726 break; 1727 } 1728 1729 m_register_cache.PushState(); 1730 1731 a32::Label store_okay; 1732 m_emit->tst(GetHostReg32(result.host_reg), 1); 1733 m_emit->b(a32::eq, &store_okay); 1734 EmitBranch(GetCurrentFarCodePointer()); 1735 m_emit->Bind(&store_okay); 1736 1737 // store exception path 1738 if (!in_far_code) 1739 SwitchToFarCode(); 1740 1741 // cause_bits = (result << 2) | BD | cop_n 1742 m_emit->lsl(GetHostReg32(result.host_reg), GetHostReg32(result.host_reg), 2); 1743 EmitOr(result.host_reg, result.host_reg, 1744 Value::FromConstantU32(Cop0Registers::CAUSE::MakeValueForException( 1745 static_cast<Exception>(0), info.is_branch_delay_slot, false, instruction.cop.cop_n))); 1746 EmitFunctionCall(nullptr, static_cast<void (*)(u32, u32)>(&CPU::RaiseException), result, GetCurrentInstructionPC()); 1747 1748 if (!in_far_code) 1749 EmitExceptionExit(); 1750 SwitchToNearCode(); 1751 1752 m_register_cache.PopState(); 1753 } 1754 else 1755 { 1756 switch (size) 1757 { 1758 case RegSize_8: 1759 EmitFunctionCall(nullptr, &Thunks::UncheckedWriteMemoryByte, address, value_in_hr); 1760 break; 1761 1762 case RegSize_16: 1763 EmitFunctionCall(nullptr, &Thunks::UncheckedWriteMemoryHalfWord, address, value_in_hr); 1764 break; 1765 1766 case RegSize_32: 1767 EmitFunctionCall(nullptr, &Thunks::UncheckedWriteMemoryWord, address, value_in_hr); 1768 break; 1769 1770 default: 1771 UnreachableCode(); 1772 break; 1773 } 1774 } 1775 } 1776 1777 void CodeGenerator::BackpatchLoadStore(void* host_pc, const CodeCache::LoadstoreBackpatchInfo& lbi) 1778 { 1779 DEV_LOG("Backpatching {} (guest PC 0x{:08X}) to slowmem at {}", host_pc, lbi.guest_pc, lbi.thunk_address); 1780 1781 // turn it into a jump to the slowmem handler 1782 vixl::aarch32::MacroAssembler emit(static_cast<vixl::byte*>(host_pc), lbi.code_size, a32::A32); 1783 1784 // check jump distance 1785 const s32 displacement = armGetPCDisplacement(host_pc, lbi.thunk_address); 1786 if (!armIsPCDisplacementInImmediateRange(displacement)) 1787 { 1788 armMoveAddressToReg(&emit, GetHostReg32(RSCRATCH), lbi.thunk_address); 1789 emit.bx(GetHostReg32(RSCRATCH)); 1790 } 1791 else 1792 { 1793 a32::Label label(displacement + emit.GetCursorOffset()); 1794 emit.b(&label); 1795 } 1796 1797 const s32 nops = (static_cast<s32>(lbi.code_size) - static_cast<s32>(emit.GetCursorOffset())) / 4; 1798 Assert(nops >= 0); 1799 for (s32 i = 0; i < nops; i++) 1800 emit.nop(); 1801 1802 MemMap::FlushInstructionCache(host_pc, lbi.code_size); 1803 } 1804 1805 void CodeGenerator::EmitLoadGlobal(HostReg host_reg, RegSize size, const void* ptr) 1806 { 1807 EmitLoadGlobalAddress(RSCRATCH, ptr); 1808 switch (size) 1809 { 1810 case RegSize_8: 1811 m_emit->Ldrb(GetHostReg8(host_reg), a32::MemOperand(GetHostReg32(RSCRATCH))); 1812 break; 1813 1814 case RegSize_16: 1815 m_emit->Ldrh(GetHostReg16(host_reg), a32::MemOperand(GetHostReg32(RSCRATCH))); 1816 break; 1817 1818 case RegSize_32: 1819 m_emit->Ldr(GetHostReg32(host_reg), a32::MemOperand(GetHostReg32(RSCRATCH))); 1820 break; 1821 1822 default: 1823 UnreachableCode(); 1824 break; 1825 } 1826 } 1827 1828 void CodeGenerator::EmitStoreGlobal(void* ptr, const Value& value) 1829 { 1830 Value value_in_hr = GetValueInHostRegister(value); 1831 1832 EmitLoadGlobalAddress(RSCRATCH, ptr); 1833 switch (value.size) 1834 { 1835 case RegSize_8: 1836 m_emit->Strb(GetHostReg8(value_in_hr), a32::MemOperand(GetHostReg32(RSCRATCH))); 1837 break; 1838 1839 case RegSize_16: 1840 m_emit->Strh(GetHostReg16(value_in_hr), a32::MemOperand(GetHostReg32(RSCRATCH))); 1841 break; 1842 1843 case RegSize_32: 1844 m_emit->Str(GetHostReg32(value_in_hr), a32::MemOperand(GetHostReg32(RSCRATCH))); 1845 break; 1846 1847 default: 1848 UnreachableCode(); 1849 break; 1850 } 1851 } 1852 1853 void CodeGenerator::EmitFlushInterpreterLoadDelay() 1854 { 1855 Value reg = Value::FromHostReg(&m_register_cache, 0, RegSize_32); 1856 Value value = Value::FromHostReg(&m_register_cache, 1, RegSize_32); 1857 1858 const a32::MemOperand load_delay_reg(GetCPUPtrReg(), OFFSETOF(State, load_delay_reg)); 1859 const a32::MemOperand load_delay_value(GetCPUPtrReg(), OFFSETOF(State, load_delay_value)); 1860 const a32::MemOperand regs_base(GetCPUPtrReg(), OFFSETOF(State, regs.r[0])); 1861 1862 a32::Label skip_flush; 1863 1864 // reg = load_delay_reg 1865 m_emit->Ldrb(GetHostReg32(reg), load_delay_reg); 1866 1867 // if load_delay_reg == Reg::count goto skip_flush 1868 m_emit->Cmp(GetHostReg32(reg), static_cast<u8>(Reg::count)); 1869 m_emit->B(a32::eq, &skip_flush); 1870 1871 // value = load_delay_value 1872 m_emit->Ldr(GetHostReg32(value), load_delay_value); 1873 1874 // reg = offset(r[0] + reg << 2) 1875 m_emit->Lsl(GetHostReg32(reg), GetHostReg32(reg), 2); 1876 m_emit->Add(GetHostReg32(reg), GetHostReg32(reg), OFFSETOF(State, regs.r[0])); 1877 1878 // r[reg] = value 1879 m_emit->Str(GetHostReg32(value), a32::MemOperand(GetCPUPtrReg(), GetHostReg32(reg))); 1880 1881 // load_delay_reg = Reg::count 1882 m_emit->Mov(GetHostReg32(reg), static_cast<u8>(Reg::count)); 1883 m_emit->Strb(GetHostReg32(reg), load_delay_reg); 1884 1885 m_emit->Bind(&skip_flush); 1886 } 1887 1888 void CodeGenerator::EmitMoveNextInterpreterLoadDelay() 1889 { 1890 Value reg = Value::FromHostReg(&m_register_cache, 0, RegSize_32); 1891 Value value = Value::FromHostReg(&m_register_cache, 1, RegSize_32); 1892 1893 const a32::MemOperand load_delay_reg(GetCPUPtrReg(), OFFSETOF(State, load_delay_reg)); 1894 const a32::MemOperand load_delay_value(GetCPUPtrReg(), OFFSETOF(State, load_delay_value)); 1895 const a32::MemOperand next_load_delay_reg(GetCPUPtrReg(), OFFSETOF(State, next_load_delay_reg)); 1896 const a32::MemOperand next_load_delay_value(GetCPUPtrReg(), OFFSETOF(State, next_load_delay_value)); 1897 1898 m_emit->ldrb(GetHostReg32(reg), next_load_delay_reg); 1899 m_emit->ldr(GetHostReg32(value), next_load_delay_value); 1900 m_emit->strb(GetHostReg32(reg), load_delay_reg); 1901 m_emit->str(GetHostReg32(value), load_delay_value); 1902 m_emit->Mov(GetHostReg32(reg), static_cast<u8>(Reg::count)); 1903 m_emit->strb(GetHostReg32(reg), next_load_delay_reg); 1904 } 1905 1906 void CodeGenerator::EmitCancelInterpreterLoadDelayForReg(Reg reg) 1907 { 1908 if (!m_load_delay_dirty) 1909 return; 1910 1911 const a32::MemOperand load_delay_reg(GetCPUPtrReg(), OFFSETOF(State, load_delay_reg)); 1912 Value temp = Value::FromHostReg(&m_register_cache, RSCRATCH, RegSize_8); 1913 1914 a32::Label skip_cancel; 1915 1916 // if load_delay_reg != reg goto skip_cancel 1917 m_emit->ldrb(GetHostReg8(temp), load_delay_reg); 1918 m_emit->cmp(GetHostReg8(temp), static_cast<u8>(reg)); 1919 m_emit->B(a32::ne, &skip_cancel); 1920 1921 // load_delay_reg = Reg::count 1922 m_emit->Mov(GetHostReg8(temp), static_cast<u8>(Reg::count)); 1923 m_emit->strb(GetHostReg8(temp), load_delay_reg); 1924 1925 m_emit->Bind(&skip_cancel); 1926 } 1927 1928 void CodeGenerator::EmitICacheCheckAndUpdate() 1929 { 1930 if (!m_block->HasFlag(CodeCache::BlockFlags::IsUsingICache)) 1931 { 1932 if (m_block->HasFlag(CodeCache::BlockFlags::NeedsDynamicFetchTicks)) 1933 { 1934 armEmitFarLoad(m_emit, GetHostReg32(RARG2), GetFetchMemoryAccessTimePtr()); 1935 m_emit->ldr(GetHostReg32(RARG1), a32::MemOperand(GetCPUPtrReg(), OFFSETOF(State, pending_ticks))); 1936 m_emit->Mov(GetHostReg32(RARG3), m_block->size); 1937 m_emit->mul(GetHostReg32(RARG2), GetHostReg32(RARG2), GetHostReg32(RARG3)); 1938 m_emit->add(GetHostReg32(RARG1), GetHostReg32(RARG1), GetHostReg32(RARG2)); 1939 m_emit->str(GetHostReg32(RARG1), a32::MemOperand(GetCPUPtrReg(), OFFSETOF(State, pending_ticks))); 1940 } 1941 else 1942 { 1943 EmitAddCPUStructField(OFFSETOF(State, pending_ticks), 1944 Value::FromConstantU32(static_cast<u32>(m_block->uncached_fetch_ticks))); 1945 } 1946 } 1947 else if (m_block->icache_line_count > 0) 1948 { 1949 const auto& ticks_reg = a32::r0; 1950 const auto& current_tag_reg = a32::r1; 1951 const auto& existing_tag_reg = a32::r2; 1952 1953 VirtualMemoryAddress current_pc = m_pc & ICACHE_TAG_ADDRESS_MASK; 1954 m_emit->ldr(ticks_reg, a32::MemOperand(GetCPUPtrReg(), OFFSETOF(State, pending_ticks))); 1955 m_emit->Mov(current_tag_reg, current_pc); 1956 1957 for (u32 i = 0; i < m_block->icache_line_count; i++, current_pc += ICACHE_LINE_SIZE) 1958 { 1959 const TickCount fill_ticks = GetICacheFillTicks(current_pc); 1960 if (fill_ticks <= 0) 1961 continue; 1962 1963 const u32 line = GetICacheLine(current_pc); 1964 const u32 offset = OFFSETOF(State, icache_tags) + (line * sizeof(u32)); 1965 1966 a32::Label cache_hit; 1967 m_emit->ldr(existing_tag_reg, a32::MemOperand(GetCPUPtrReg(), offset)); 1968 m_emit->cmp(existing_tag_reg, current_tag_reg); 1969 m_emit->B(a32::eq, &cache_hit); 1970 1971 m_emit->str(current_tag_reg, a32::MemOperand(GetCPUPtrReg(), offset)); 1972 EmitAdd(0, 0, Value::FromConstantU32(static_cast<u32>(fill_ticks)), false); 1973 m_emit->Bind(&cache_hit); 1974 1975 if (i != (m_block->icache_line_count - 1)) 1976 m_emit->add(current_tag_reg, current_tag_reg, ICACHE_LINE_SIZE); 1977 } 1978 1979 m_emit->str(ticks_reg, a32::MemOperand(GetCPUPtrReg(), OFFSETOF(State, pending_ticks))); 1980 } 1981 } 1982 1983 void CodeGenerator::EmitBlockProtectCheck(const u8* ram_ptr, const u8* shadow_ptr, u32 size) 1984 { 1985 // store it first to reduce code size, because we can offset 1986 armMoveAddressToReg(m_emit, GetHostReg32(RARG1), ram_ptr); 1987 armMoveAddressToReg(m_emit, GetHostReg32(RARG2), shadow_ptr); 1988 1989 u32 offset = 0; 1990 a32::Label block_changed; 1991 1992 #if 0 1993 /* TODO: Vectorize 1994 #include <arm_neon.h> 1995 #include <stdint.h> 1996 1997 bool foo(const void* a, const void* b) 1998 { 1999 uint8x16_t v1 = vld1q_u8((const uint8_t*)a); 2000 uint8x16_t v2 = vld1q_u8((const uint8_t*)b); 2001 uint8x16_t v3 = vld1q_u8((const uint8_t*)a + 16); 2002 uint8x16_t v4 = vld1q_u8((const uint8_t*)a + 16); 2003 uint8x16_t r = vceqq_u8(v1, v2); 2004 uint8x16_t r2 = vceqq_u8(v2, v3); 2005 uint8x16_t r3 = vandq_u8(r, r2); 2006 uint32x2_t rr = vpmin_u32(vget_low_u32(vreinterpretq_u32_u8(r3)), vget_high_u32(vreinterpretq_u32_u8(r3))); 2007 if ((vget_lane_u32(rr, 0) & vget_lane_u32(rr, 1)) != 0xFFFFFFFFu) 2008 return false; 2009 else 2010 return true; 2011 } 2012 */ 2013 bool first = true; 2014 2015 while (size >= 16) 2016 { 2017 const a32::VRegister vtmp = a32::v2.V4S(); 2018 const a32::VRegister dst = first ? a32::v0.V4S() : a32::v1.V4S(); 2019 m_emit->ldr(dst, a32::MemOperand(RXARG1, offset)); 2020 m_emit->ldr(vtmp, a32::MemOperand(RXARG2, offset)); 2021 m_emit->cmeq(dst, dst, vtmp); 2022 if (!first) 2023 m_emit->and_(dst.V16B(), dst.V16B(), vtmp.V16B()); 2024 else 2025 first = false; 2026 2027 offset += 16; 2028 size -= 16; 2029 } 2030 2031 if (!first) 2032 { 2033 // TODO: make sure this doesn't choke on ffffffff 2034 m_emit->uminv(a32::s0, a32::v0.V4S()); 2035 m_emit->fcmp(a32::s0, 0.0); 2036 m_emit->b(&block_changed, a32::eq); 2037 } 2038 #endif 2039 2040 while (size >= 4) 2041 { 2042 m_emit->ldr(GetHostReg32(RARG3), a32::MemOperand(GetHostReg32(RARG1), offset)); 2043 m_emit->ldr(GetHostReg32(RARG4), a32::MemOperand(GetHostReg32(RARG2), offset)); 2044 m_emit->cmp(GetHostReg32(RARG3), GetHostReg32(RARG4)); 2045 m_emit->b(a32::ne, &block_changed); 2046 offset += 4; 2047 size -= 4; 2048 } 2049 2050 DebugAssert(size == 0); 2051 2052 a32::Label block_unchanged; 2053 m_emit->b(&block_unchanged); 2054 m_emit->bind(&block_changed); 2055 armEmitJmp(m_emit, CodeCache::g_discard_and_recompile_block, false); 2056 m_emit->bind(&block_unchanged); 2057 } 2058 2059 void CodeGenerator::EmitStallUntilGTEComplete() 2060 { 2061 static_assert(OFFSETOF(State, pending_ticks) + sizeof(u32) == OFFSETOF(State, gte_completion_tick)); 2062 2063 m_emit->ldr(GetHostReg32(RARG1), a32::MemOperand(GetCPUPtrReg(), OFFSETOF(State, pending_ticks))); 2064 m_emit->ldr(GetHostReg32(RARG2), a32::MemOperand(GetCPUPtrReg(), OFFSETOF(State, gte_completion_tick))); 2065 2066 if (m_delayed_cycles_add > 0) 2067 { 2068 m_emit->Add(GetHostReg32(RARG1), GetHostReg32(RARG1), static_cast<u32>(m_delayed_cycles_add)); 2069 m_delayed_cycles_add = 0; 2070 } 2071 2072 m_emit->cmp(GetHostReg32(RARG2), GetHostReg32(RARG1)); 2073 m_emit->mov(a32::hi, GetHostReg32(RARG1), GetHostReg32(RARG2)); 2074 m_emit->str(GetHostReg32(RARG1), a32::MemOperand(GetCPUPtrReg(), OFFSETOF(State, pending_ticks))); 2075 } 2076 2077 void CodeGenerator::EmitBranch(const void* address, bool allow_scratch) 2078 { 2079 const s32 displacement = armGetPCDisplacement(GetCurrentCodePointer(), address); 2080 if (armIsPCDisplacementInImmediateRange(displacement)) 2081 { 2082 a32::Label label(displacement + m_emit->GetCursorOffset()); 2083 m_emit->b(&label); 2084 return; 2085 } 2086 2087 m_emit->Mov(GetHostReg32(RSCRATCH), reinterpret_cast<uintptr_t>(address)); 2088 m_emit->bx(GetHostReg32(RSCRATCH)); 2089 } 2090 2091 void CodeGenerator::EmitBranch(LabelType* label) 2092 { 2093 m_emit->b(label); 2094 } 2095 2096 static a32::Condition TranslateCondition(Condition condition, bool invert) 2097 { 2098 switch (condition) 2099 { 2100 case Condition::Always: 2101 return a32::Condition::None(); 2102 2103 case Condition::NotEqual: 2104 case Condition::NotZero: 2105 return invert ? a32::eq : a32::ne; 2106 2107 case Condition::Equal: 2108 case Condition::Zero: 2109 return invert ? a32::ne : a32::eq; 2110 2111 case Condition::Overflow: 2112 return invert ? a32::vc : a32::vs; 2113 2114 case Condition::Greater: 2115 return invert ? a32::le : a32::gt; 2116 2117 case Condition::GreaterEqual: 2118 return invert ? a32::lt : a32::ge; 2119 2120 case Condition::Less: 2121 return invert ? a32::ge : a32::lt; 2122 2123 case Condition::LessEqual: 2124 return invert ? a32::gt : a32::le; 2125 2126 case Condition::Negative: 2127 return invert ? a32::pl : a32::mi; 2128 2129 case Condition::PositiveOrZero: 2130 return invert ? a32::mi : a32::pl; 2131 2132 case Condition::Above: 2133 return invert ? a32::ls : a32::hi; 2134 2135 case Condition::AboveEqual: 2136 return invert ? a32::cc : a32::cs; 2137 2138 case Condition::Below: 2139 return invert ? a32::cs : a32::cc; 2140 2141 case Condition::BelowEqual: 2142 return invert ? a32::hi : a32::ls; 2143 2144 default: 2145 UnreachableCode(); 2146 return a32::Condition::Never(); 2147 } 2148 } 2149 2150 void CodeGenerator::EmitConditionalBranch(Condition condition, bool invert, HostReg value, RegSize size, 2151 LabelType* label) 2152 { 2153 switch (condition) 2154 { 2155 case Condition::NotEqual: 2156 case Condition::Equal: 2157 case Condition::Overflow: 2158 case Condition::Greater: 2159 case Condition::GreaterEqual: 2160 case Condition::LessEqual: 2161 case Condition::Less: 2162 case Condition::Above: 2163 case Condition::AboveEqual: 2164 case Condition::Below: 2165 case Condition::BelowEqual: 2166 Panic("Needs a comparison value"); 2167 return; 2168 2169 case Condition::Negative: 2170 case Condition::PositiveOrZero: 2171 { 2172 switch (size) 2173 { 2174 case RegSize_8: 2175 m_emit->tst(GetHostReg8(value), GetHostReg8(value)); 2176 break; 2177 case RegSize_16: 2178 m_emit->tst(GetHostReg16(value), GetHostReg16(value)); 2179 break; 2180 case RegSize_32: 2181 m_emit->tst(GetHostReg32(value), GetHostReg32(value)); 2182 break; 2183 default: 2184 UnreachableCode(); 2185 break; 2186 } 2187 2188 EmitConditionalBranch(condition, invert, label); 2189 return; 2190 } 2191 2192 case Condition::NotZero: 2193 { 2194 switch (size) 2195 { 2196 case RegSize_8: 2197 m_emit->tst(GetHostReg8(value), GetHostReg8(value)); 2198 m_emit->b(a32::ne, label); 2199 break; 2200 case RegSize_16: 2201 m_emit->tst(GetHostReg8(value), GetHostReg8(value)); 2202 m_emit->b(a32::ne, label); 2203 break; 2204 case RegSize_32: 2205 m_emit->tst(GetHostReg8(value), GetHostReg8(value)); 2206 m_emit->b(a32::ne, label); 2207 break; 2208 default: 2209 UnreachableCode(); 2210 break; 2211 } 2212 2213 return; 2214 } 2215 2216 case Condition::Zero: 2217 { 2218 switch (size) 2219 { 2220 case RegSize_8: 2221 m_emit->tst(GetHostReg8(value), GetHostReg8(value)); 2222 m_emit->b(a32::eq, label); 2223 break; 2224 case RegSize_16: 2225 m_emit->tst(GetHostReg8(value), GetHostReg8(value)); 2226 m_emit->b(a32::eq, label); 2227 break; 2228 case RegSize_32: 2229 m_emit->tst(GetHostReg8(value), GetHostReg8(value)); 2230 m_emit->b(a32::eq, label); 2231 break; 2232 default: 2233 UnreachableCode(); 2234 break; 2235 } 2236 2237 return; 2238 } 2239 2240 case Condition::Always: 2241 m_emit->b(label); 2242 return; 2243 2244 default: 2245 UnreachableCode(); 2246 return; 2247 } 2248 } 2249 2250 void CodeGenerator::EmitConditionalBranch(Condition condition, bool invert, HostReg lhs, const Value& rhs, 2251 LabelType* label) 2252 { 2253 switch (condition) 2254 { 2255 case Condition::NotEqual: 2256 case Condition::Equal: 2257 case Condition::Overflow: 2258 case Condition::Greater: 2259 case Condition::GreaterEqual: 2260 case Condition::LessEqual: 2261 case Condition::Less: 2262 case Condition::Above: 2263 case Condition::AboveEqual: 2264 case Condition::Below: 2265 case Condition::BelowEqual: 2266 { 2267 EmitCmp(lhs, rhs); 2268 EmitConditionalBranch(condition, invert, label); 2269 return; 2270 } 2271 2272 case Condition::Negative: 2273 case Condition::PositiveOrZero: 2274 case Condition::NotZero: 2275 case Condition::Zero: 2276 { 2277 Assert(!rhs.IsValid() || (rhs.IsConstant() && rhs.GetS64ConstantValue() == 0)); 2278 EmitConditionalBranch(condition, invert, lhs, rhs.size, label); 2279 return; 2280 } 2281 2282 case Condition::Always: 2283 m_emit->b(label); 2284 return; 2285 2286 default: 2287 UnreachableCode(); 2288 return; 2289 } 2290 } 2291 2292 void CodeGenerator::EmitConditionalBranch(Condition condition, bool invert, LabelType* label) 2293 { 2294 if (condition == Condition::Always) 2295 m_emit->b(label); 2296 else 2297 m_emit->b(TranslateCondition(condition, invert), label); 2298 } 2299 2300 void CodeGenerator::EmitBranchIfBitClear(HostReg reg, RegSize size, u8 bit, LabelType* label) 2301 { 2302 switch (size) 2303 { 2304 case RegSize_8: 2305 case RegSize_16: 2306 case RegSize_32: 2307 m_emit->tst(GetHostReg32(reg), static_cast<s32>(1u << bit)); 2308 m_emit->b(a32::eq, label); 2309 break; 2310 2311 default: 2312 UnreachableCode(); 2313 break; 2314 } 2315 } 2316 2317 void CodeGenerator::EmitBindLabel(LabelType* label) 2318 { 2319 m_emit->Bind(label); 2320 } 2321 2322 void CodeGenerator::EmitLoadGlobalAddress(HostReg host_reg, const void* ptr) 2323 { 2324 m_emit->Mov(GetHostReg32(host_reg), reinterpret_cast<uintptr_t>(ptr)); 2325 } 2326 2327 } // namespace CPU::Recompiler 2328 2329 #endif // CPU_ARCH_ARM32