cpu_newrec_compiler_aarch64.cpp (70496B)
1 // SPDX-FileCopyrightText: 2024 Connor McLaughlin <stenzek@gmail.com> 2 // SPDX-License-Identifier: (GPL-3.0 OR CC-BY-NC-ND-4.0) 3 4 #include "cpu_newrec_compiler_aarch64.h" 5 #include "common/align.h" 6 #include "common/assert.h" 7 #include "common/log.h" 8 #include "common/string_util.h" 9 #include "cpu_core_private.h" 10 #include "cpu_pgxp.h" 11 #include "cpu_recompiler_thunks.h" 12 #include "cpu_recompiler_types.h" 13 #include "gte.h" 14 #include "settings.h" 15 #include "timing_event.h" 16 #include <limits> 17 18 #ifdef CPU_ARCH_ARM64 19 20 Log_SetChannel(CPU::NewRec); 21 22 #define PTR(x) vixl::aarch64::MemOperand(RSTATE, (((u8*)(x)) - ((u8*)&g_state))) 23 24 namespace CPU::NewRec { 25 26 using namespace vixl::aarch64; 27 28 using CPU::Recompiler::armEmitCall; 29 using CPU::Recompiler::armEmitCondBranch; 30 using CPU::Recompiler::armEmitFarLoad; 31 using CPU::Recompiler::armEmitJmp; 32 using CPU::Recompiler::armEmitMov; 33 using CPU::Recompiler::armGetJumpTrampoline; 34 using CPU::Recompiler::armGetPCDisplacement; 35 using CPU::Recompiler::armIsCallerSavedRegister; 36 using CPU::Recompiler::armMoveAddressToReg; 37 38 AArch64Compiler s_instance; 39 Compiler* g_compiler = &s_instance; 40 41 } // namespace CPU::NewRec 42 43 CPU::NewRec::AArch64Compiler::AArch64Compiler() 44 : m_emitter(PositionDependentCode), m_far_emitter(PositionIndependentCode) 45 { 46 } 47 48 CPU::NewRec::AArch64Compiler::~AArch64Compiler() = default; 49 50 const void* CPU::NewRec::AArch64Compiler::GetCurrentCodePointer() 51 { 52 return armAsm->GetCursorAddress<const void*>(); 53 } 54 55 void CPU::NewRec::AArch64Compiler::Reset(CodeCache::Block* block, u8* code_buffer, u32 code_buffer_space, 56 u8* far_code_buffer, u32 far_code_space) 57 { 58 Compiler::Reset(block, code_buffer, code_buffer_space, far_code_buffer, far_code_space); 59 60 // TODO: don't recreate this every time.. 61 DebugAssert(!armAsm); 62 m_emitter.GetBuffer()->Reset(code_buffer, code_buffer_space); 63 m_far_emitter.GetBuffer()->Reset(far_code_buffer, far_code_space); 64 armAsm = &m_emitter; 65 66 #ifdef VIXL_DEBUG 67 m_emitter_check = std::make_unique<vixl::CodeBufferCheckScope>(&m_emitter, code_buffer_space, 68 vixl::CodeBufferCheckScope::kDontReserveBufferSpace); 69 m_far_emitter_check = std::make_unique<vixl::CodeBufferCheckScope>( 70 &m_far_emitter, far_code_space, vixl::CodeBufferCheckScope::kDontReserveBufferSpace); 71 #endif 72 73 // Need to wipe it out so it's correct when toggling fastmem. 74 m_host_regs = {}; 75 76 const u32 membase_idx = CodeCache::IsUsingFastmem() ? RMEMBASE.GetCode() : NUM_HOST_REGS; 77 for (u32 i = 0; i < NUM_HOST_REGS; i++) 78 { 79 HostRegAlloc& ra = m_host_regs[i]; 80 81 if (i == RWARG1.GetCode() || i == RWARG1.GetCode() || i == RWARG2.GetCode() || i == RWARG3.GetCode() || 82 i == RWSCRATCH.GetCode() || i == RSTATE.GetCode() || i == membase_idx || i == x18.GetCode() || i >= 30) 83 { 84 continue; 85 } 86 87 ra.flags = HR_USABLE | (armIsCallerSavedRegister(i) ? 0 : HR_CALLEE_SAVED); 88 } 89 } 90 91 void CPU::NewRec::AArch64Compiler::SwitchToFarCode(bool emit_jump, vixl::aarch64::Condition cond) 92 { 93 DebugAssert(armAsm == &m_emitter); 94 if (emit_jump) 95 { 96 const s64 disp = armGetPCDisplacement(GetCurrentCodePointer(), m_far_emitter.GetCursorAddress<const void*>()); 97 if (cond != Condition::al) 98 { 99 if (vixl::IsInt19(disp)) 100 { 101 armAsm->b(disp, cond); 102 } 103 else 104 { 105 Label skip; 106 armAsm->b(&skip, vixl::aarch64::InvertCondition(cond)); 107 armAsm->b(armGetPCDisplacement(GetCurrentCodePointer(), m_far_emitter.GetCursorAddress<const void*>())); 108 armAsm->bind(&skip); 109 } 110 } 111 else 112 { 113 armAsm->b(disp); 114 } 115 } 116 armAsm = &m_far_emitter; 117 } 118 119 void CPU::NewRec::AArch64Compiler::SwitchToFarCodeIfBitSet(const vixl::aarch64::Register& reg, u32 bit) 120 { 121 const s64 disp = armGetPCDisplacement(GetCurrentCodePointer(), m_far_emitter.GetCursorAddress<const void*>()); 122 if (vixl::IsInt14(disp)) 123 { 124 armAsm->tbnz(reg, bit, disp); 125 } 126 else 127 { 128 Label skip; 129 armAsm->tbz(reg, bit, &skip); 130 armAsm->b(armGetPCDisplacement(GetCurrentCodePointer(), m_far_emitter.GetCursorAddress<const void*>())); 131 armAsm->bind(&skip); 132 } 133 134 armAsm = &m_far_emitter; 135 } 136 137 void CPU::NewRec::AArch64Compiler::SwitchToFarCodeIfRegZeroOrNonZero(const vixl::aarch64::Register& reg, bool nonzero) 138 { 139 const s64 disp = armGetPCDisplacement(GetCurrentCodePointer(), m_far_emitter.GetCursorAddress<const void*>()); 140 if (vixl::IsInt19(disp)) 141 { 142 nonzero ? armAsm->cbnz(reg, disp) : armAsm->cbz(reg, disp); 143 } 144 else 145 { 146 Label skip; 147 nonzero ? armAsm->cbz(reg, &skip) : armAsm->cbnz(reg, &skip); 148 armAsm->b(armGetPCDisplacement(GetCurrentCodePointer(), m_far_emitter.GetCursorAddress<const void*>())); 149 armAsm->bind(&skip); 150 } 151 152 armAsm = &m_far_emitter; 153 } 154 155 void CPU::NewRec::AArch64Compiler::SwitchToNearCode(bool emit_jump, vixl::aarch64::Condition cond) 156 { 157 DebugAssert(armAsm == &m_far_emitter); 158 if (emit_jump) 159 { 160 const s64 disp = armGetPCDisplacement(GetCurrentCodePointer(), m_emitter.GetCursorAddress<const void*>()); 161 (cond != Condition::al) ? armAsm->b(disp, cond) : armAsm->b(disp); 162 } 163 armAsm = &m_emitter; 164 } 165 166 void CPU::NewRec::AArch64Compiler::EmitMov(const vixl::aarch64::Register& dst, u32 val) 167 { 168 armEmitMov(armAsm, dst, val); 169 } 170 171 void CPU::NewRec::AArch64Compiler::EmitCall(const void* ptr, bool force_inline /*= false*/) 172 { 173 armEmitCall(armAsm, ptr, force_inline); 174 } 175 176 vixl::aarch64::Operand CPU::NewRec::AArch64Compiler::armCheckAddSubConstant(s32 val) 177 { 178 if (Assembler::IsImmAddSub(val)) 179 return vixl::aarch64::Operand(static_cast<int64_t>(val)); 180 181 EmitMov(RWSCRATCH, static_cast<u32>(val)); 182 return vixl::aarch64::Operand(RWSCRATCH); 183 } 184 185 vixl::aarch64::Operand CPU::NewRec::AArch64Compiler::armCheckAddSubConstant(u32 val) 186 { 187 return armCheckAddSubConstant(static_cast<s32>(val)); 188 } 189 190 vixl::aarch64::Operand CPU::NewRec::AArch64Compiler::armCheckCompareConstant(s32 val) 191 { 192 if (Assembler::IsImmConditionalCompare(val)) 193 return vixl::aarch64::Operand(static_cast<int64_t>(val)); 194 195 EmitMov(RWSCRATCH, static_cast<u32>(val)); 196 return vixl::aarch64::Operand(RWSCRATCH); 197 } 198 199 vixl::aarch64::Operand CPU::NewRec::AArch64Compiler::armCheckLogicalConstant(u32 val) 200 { 201 if (Assembler::IsImmLogical(val, 32)) 202 return vixl::aarch64::Operand(static_cast<s64>(static_cast<u64>(val))); 203 204 EmitMov(RWSCRATCH, val); 205 return vixl::aarch64::Operand(RWSCRATCH); 206 } 207 208 void CPU::NewRec::AArch64Compiler::BeginBlock() 209 { 210 Compiler::BeginBlock(); 211 } 212 213 void CPU::NewRec::AArch64Compiler::GenerateBlockProtectCheck(const u8* ram_ptr, const u8* shadow_ptr, u32 size) 214 { 215 // store it first to reduce code size, because we can offset 216 armMoveAddressToReg(armAsm, RXARG1, ram_ptr); 217 armMoveAddressToReg(armAsm, RXARG2, shadow_ptr); 218 219 bool first = true; 220 u32 offset = 0; 221 Label block_changed; 222 223 while (size >= 16) 224 { 225 const VRegister vtmp = v2.V4S(); 226 const VRegister dst = first ? v0.V4S() : v1.V4S(); 227 armAsm->ldr(dst, MemOperand(RXARG1, offset)); 228 armAsm->ldr(vtmp, MemOperand(RXARG2, offset)); 229 armAsm->cmeq(dst, dst, vtmp); 230 if (!first) 231 armAsm->and_(v0.V16B(), v0.V16B(), dst.V16B()); 232 else 233 first = false; 234 235 offset += 16; 236 size -= 16; 237 } 238 239 if (!first) 240 { 241 // TODO: make sure this doesn't choke on ffffffff 242 armAsm->uminv(s0, v0.V4S()); 243 armAsm->fcmp(s0, 0.0); 244 armAsm->b(&block_changed, eq); 245 } 246 247 while (size >= 8) 248 { 249 armAsm->ldr(RXARG3, MemOperand(RXARG1, offset)); 250 armAsm->ldr(RXSCRATCH, MemOperand(RXARG2, offset)); 251 armAsm->cmp(RXARG3, RXSCRATCH); 252 armAsm->b(&block_changed, ne); 253 offset += 8; 254 size -= 8; 255 } 256 257 while (size >= 4) 258 { 259 armAsm->ldr(RWARG3, MemOperand(RXARG1, offset)); 260 armAsm->ldr(RWSCRATCH, MemOperand(RXARG2, offset)); 261 armAsm->cmp(RWARG3, RWSCRATCH); 262 armAsm->b(&block_changed, ne); 263 offset += 4; 264 size -= 4; 265 } 266 267 DebugAssert(size == 0); 268 269 Label block_unchanged; 270 armAsm->b(&block_unchanged); 271 armAsm->bind(&block_changed); 272 armEmitJmp(armAsm, CodeCache::g_discard_and_recompile_block, false); 273 armAsm->bind(&block_unchanged); 274 } 275 276 void CPU::NewRec::AArch64Compiler::GenerateICacheCheckAndUpdate() 277 { 278 if (!m_block->HasFlag(CodeCache::BlockFlags::IsUsingICache)) 279 { 280 if (m_block->HasFlag(CodeCache::BlockFlags::NeedsDynamicFetchTicks)) 281 { 282 armEmitFarLoad(armAsm, RWARG2, GetFetchMemoryAccessTimePtr()); 283 armAsm->ldr(RWARG1, PTR(&g_state.pending_ticks)); 284 armEmitMov(armAsm, RWARG3, m_block->size); 285 armAsm->mul(RWARG2, RWARG2, RWARG3); 286 armAsm->add(RWARG1, RWARG1, RWARG2); 287 armAsm->str(RWARG1, PTR(&g_state.pending_ticks)); 288 } 289 else 290 { 291 armAsm->ldr(RWARG1, PTR(&g_state.pending_ticks)); 292 armAsm->add(RWARG1, RWARG1, armCheckAddSubConstant(static_cast<u32>(m_block->uncached_fetch_ticks))); 293 armAsm->str(RWARG1, PTR(&g_state.pending_ticks)); 294 } 295 } 296 else if (m_block->icache_line_count > 0) 297 { 298 const auto& ticks_reg = RWARG1; 299 const auto& current_tag_reg = RWARG2; 300 const auto& existing_tag_reg = RWARG3; 301 302 VirtualMemoryAddress current_pc = m_block->pc & ICACHE_TAG_ADDRESS_MASK; 303 armAsm->ldr(ticks_reg, PTR(&g_state.pending_ticks)); 304 armEmitMov(armAsm, current_tag_reg, current_pc); 305 306 for (u32 i = 0; i < m_block->icache_line_count; i++, current_pc += ICACHE_LINE_SIZE) 307 { 308 const TickCount fill_ticks = GetICacheFillTicks(current_pc); 309 if (fill_ticks <= 0) 310 continue; 311 312 const u32 line = GetICacheLine(current_pc); 313 const u32 offset = OFFSETOF(State, icache_tags) + (line * sizeof(u32)); 314 315 Label cache_hit; 316 armAsm->ldr(existing_tag_reg, MemOperand(RSTATE, offset)); 317 armAsm->cmp(existing_tag_reg, current_tag_reg); 318 armAsm->b(&cache_hit, eq); 319 320 armAsm->str(current_tag_reg, MemOperand(RSTATE, offset)); 321 armAsm->add(ticks_reg, ticks_reg, armCheckAddSubConstant(static_cast<u32>(fill_ticks))); 322 armAsm->bind(&cache_hit); 323 324 if (i != (m_block->icache_line_count - 1)) 325 armAsm->add(current_tag_reg, current_tag_reg, armCheckAddSubConstant(ICACHE_LINE_SIZE)); 326 } 327 328 armAsm->str(ticks_reg, PTR(&g_state.pending_ticks)); 329 } 330 } 331 332 void CPU::NewRec::AArch64Compiler::GenerateCall(const void* func, s32 arg1reg /*= -1*/, s32 arg2reg /*= -1*/, 333 s32 arg3reg /*= -1*/) 334 { 335 if (arg1reg >= 0 && arg1reg != static_cast<s32>(RXARG1.GetCode())) 336 armAsm->mov(RXARG1, XRegister(arg1reg)); 337 if (arg2reg >= 0 && arg2reg != static_cast<s32>(RXARG2.GetCode())) 338 armAsm->mov(RXARG2, XRegister(arg2reg)); 339 if (arg3reg >= 0 && arg3reg != static_cast<s32>(RXARG3.GetCode())) 340 armAsm->mov(RXARG3, XRegister(arg3reg)); 341 EmitCall(func); 342 } 343 344 void CPU::NewRec::AArch64Compiler::EndBlock(const std::optional<u32>& newpc, bool do_event_test) 345 { 346 if (newpc.has_value()) 347 { 348 if (m_dirty_pc || m_compiler_pc != newpc) 349 { 350 EmitMov(RWSCRATCH, newpc.value()); 351 armAsm->str(RWSCRATCH, PTR(&g_state.pc)); 352 } 353 } 354 m_dirty_pc = false; 355 356 // flush regs 357 Flush(FLUSH_END_BLOCK); 358 EndAndLinkBlock(newpc, do_event_test, false); 359 } 360 361 void CPU::NewRec::AArch64Compiler::EndBlockWithException(Exception excode) 362 { 363 // flush regs, but not pc, it's going to get overwritten 364 // flush cycles because of the GTE instruction stuff... 365 Flush(FLUSH_END_BLOCK | FLUSH_FOR_EXCEPTION | FLUSH_FOR_C_CALL); 366 367 // TODO: flush load delay 368 // TODO: break for pcdrv 369 370 EmitMov(RWARG1, Cop0Registers::CAUSE::MakeValueForException(excode, m_current_instruction_branch_delay_slot, false, 371 inst->cop.cop_n)); 372 EmitMov(RWARG2, m_current_instruction_pc); 373 EmitCall(reinterpret_cast<const void*>(static_cast<void (*)(u32, u32)>(&CPU::RaiseException))); 374 m_dirty_pc = false; 375 376 EndAndLinkBlock(std::nullopt, true, false); 377 } 378 379 void CPU::NewRec::AArch64Compiler::EndAndLinkBlock(const std::optional<u32>& newpc, bool do_event_test, 380 bool force_run_events) 381 { 382 // event test 383 // pc should've been flushed 384 DebugAssert(!m_dirty_pc && !m_block_ended); 385 m_block_ended = true; 386 387 // TODO: try extracting this to a function 388 389 // save cycles for event test 390 const TickCount cycles = std::exchange(m_cycles, 0); 391 392 // pending_ticks += cycles 393 // if (pending_ticks >= downcount) { dispatch_event(); } 394 if (do_event_test || m_gte_done_cycle > cycles || cycles > 0) 395 armAsm->ldr(RWARG1, PTR(&g_state.pending_ticks)); 396 if (do_event_test) 397 armAsm->ldr(RWARG2, PTR(&g_state.downcount)); 398 if (cycles > 0) 399 armAsm->add(RWARG1, RWARG1, armCheckAddSubConstant(cycles)); 400 if (m_gte_done_cycle > cycles) 401 { 402 armAsm->add(RWARG2, RWARG1, armCheckAddSubConstant(m_gte_done_cycle - cycles)); 403 armAsm->str(RWARG2, PTR(&g_state.gte_completion_tick)); 404 } 405 if (do_event_test) 406 armAsm->cmp(RWARG1, RWARG2); 407 if (cycles > 0) 408 armAsm->str(RWARG1, PTR(&g_state.pending_ticks)); 409 if (do_event_test) 410 armEmitCondBranch(armAsm, ge, CodeCache::g_run_events_and_dispatch); 411 412 // jump to dispatcher or next block 413 if (force_run_events) 414 { 415 armEmitJmp(armAsm, CodeCache::g_run_events_and_dispatch, false); 416 } 417 else if (!newpc.has_value()) 418 { 419 armEmitJmp(armAsm, CodeCache::g_dispatcher, false); 420 } 421 else 422 { 423 if (newpc.value() == m_block->pc) 424 { 425 // Special case: ourselves! No need to backlink then. 426 DEBUG_LOG("Linking block at {:08X} to self", m_block->pc); 427 armEmitJmp(armAsm, armAsm->GetBuffer()->GetStartAddress<const void*>(), true); 428 } 429 else 430 { 431 const void* target = CodeCache::CreateBlockLink(m_block, armAsm->GetCursorAddress<void*>(), newpc.value()); 432 armEmitJmp(armAsm, target, true); 433 } 434 } 435 } 436 437 const void* CPU::NewRec::AArch64Compiler::EndCompile(u32* code_size, u32* far_code_size) 438 { 439 #ifdef VIXL_DEBUG 440 m_emitter_check.reset(); 441 m_far_emitter_check.reset(); 442 #endif 443 444 m_emitter.FinalizeCode(); 445 m_far_emitter.FinalizeCode(); 446 447 u8* const code = m_emitter.GetBuffer()->GetStartAddress<u8*>(); 448 *code_size = static_cast<u32>(m_emitter.GetCursorOffset()); 449 *far_code_size = static_cast<u32>(m_far_emitter.GetCursorOffset()); 450 armAsm = nullptr; 451 return code; 452 } 453 454 const char* CPU::NewRec::AArch64Compiler::GetHostRegName(u32 reg) const 455 { 456 static constexpr std::array<const char*, 32> reg64_names = { 457 {"x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15", 458 "x16", "x17", "x18", "x19", "x20", "x21", "x22", "x23", "x24", "x25", "x26", "x27", "x28", "fp", "lr", "sp"}}; 459 return (reg < reg64_names.size()) ? reg64_names[reg] : "UNKNOWN"; 460 } 461 462 void CPU::NewRec::AArch64Compiler::LoadHostRegWithConstant(u32 reg, u32 val) 463 { 464 EmitMov(WRegister(reg), val); 465 } 466 467 void CPU::NewRec::AArch64Compiler::LoadHostRegFromCPUPointer(u32 reg, const void* ptr) 468 { 469 armAsm->ldr(WRegister(reg), PTR(ptr)); 470 } 471 472 void CPU::NewRec::AArch64Compiler::StoreHostRegToCPUPointer(u32 reg, const void* ptr) 473 { 474 armAsm->str(WRegister(reg), PTR(ptr)); 475 } 476 477 void CPU::NewRec::AArch64Compiler::StoreConstantToCPUPointer(u32 val, const void* ptr) 478 { 479 if (val == 0) 480 { 481 armAsm->str(wzr, PTR(ptr)); 482 return; 483 } 484 485 EmitMov(RWSCRATCH, val); 486 armAsm->str(RWSCRATCH, PTR(ptr)); 487 } 488 489 void CPU::NewRec::AArch64Compiler::CopyHostReg(u32 dst, u32 src) 490 { 491 if (src != dst) 492 armAsm->mov(WRegister(dst), WRegister(src)); 493 } 494 495 void CPU::NewRec::AArch64Compiler::AssertRegOrConstS(CompileFlags cf) const 496 { 497 DebugAssert(cf.valid_host_s || cf.const_s); 498 } 499 500 void CPU::NewRec::AArch64Compiler::AssertRegOrConstT(CompileFlags cf) const 501 { 502 DebugAssert(cf.valid_host_t || cf.const_t); 503 } 504 505 vixl::aarch64::MemOperand CPU::NewRec::AArch64Compiler::MipsPtr(Reg r) const 506 { 507 DebugAssert(r < Reg::count); 508 return PTR(&g_state.regs.r[static_cast<u32>(r)]); 509 } 510 511 vixl::aarch64::Register CPU::NewRec::AArch64Compiler::CFGetRegD(CompileFlags cf) const 512 { 513 DebugAssert(cf.valid_host_d); 514 return WRegister(cf.host_d); 515 } 516 517 vixl::aarch64::Register CPU::NewRec::AArch64Compiler::CFGetRegS(CompileFlags cf) const 518 { 519 DebugAssert(cf.valid_host_s); 520 return WRegister(cf.host_s); 521 } 522 523 vixl::aarch64::Register CPU::NewRec::AArch64Compiler::CFGetRegT(CompileFlags cf) const 524 { 525 DebugAssert(cf.valid_host_t); 526 return WRegister(cf.host_t); 527 } 528 529 vixl::aarch64::Register CPU::NewRec::AArch64Compiler::CFGetRegLO(CompileFlags cf) const 530 { 531 DebugAssert(cf.valid_host_lo); 532 return WRegister(cf.host_lo); 533 } 534 535 vixl::aarch64::Register CPU::NewRec::AArch64Compiler::CFGetRegHI(CompileFlags cf) const 536 { 537 DebugAssert(cf.valid_host_hi); 538 return WRegister(cf.host_hi); 539 } 540 541 void CPU::NewRec::AArch64Compiler::MoveSToReg(const vixl::aarch64::Register& dst, CompileFlags cf) 542 { 543 DebugAssert(dst.IsW()); 544 if (cf.valid_host_s) 545 { 546 if (cf.host_s != dst.GetCode()) 547 armAsm->mov(dst, WRegister(cf.host_s)); 548 } 549 else if (cf.const_s) 550 { 551 const u32 cv = GetConstantRegU32(cf.MipsS()); 552 if (cv == 0) 553 armAsm->mov(dst, wzr); 554 else 555 EmitMov(dst, cv); 556 } 557 else 558 { 559 WARNING_LOG("Hit memory path in MoveSToReg() for {}", GetRegName(cf.MipsS())); 560 armAsm->ldr(dst, PTR(&g_state.regs.r[cf.mips_s])); 561 } 562 } 563 564 void CPU::NewRec::AArch64Compiler::MoveTToReg(const vixl::aarch64::Register& dst, CompileFlags cf) 565 { 566 DebugAssert(dst.IsW()); 567 if (cf.valid_host_t) 568 { 569 if (cf.host_t != dst.GetCode()) 570 armAsm->mov(dst, WRegister(cf.host_t)); 571 } 572 else if (cf.const_t) 573 { 574 const u32 cv = GetConstantRegU32(cf.MipsT()); 575 if (cv == 0) 576 armAsm->mov(dst, wzr); 577 else 578 EmitMov(dst, cv); 579 } 580 else 581 { 582 WARNING_LOG("Hit memory path in MoveTToReg() for {}", GetRegName(cf.MipsT())); 583 armAsm->ldr(dst, PTR(&g_state.regs.r[cf.mips_t])); 584 } 585 } 586 587 void CPU::NewRec::AArch64Compiler::MoveMIPSRegToReg(const vixl::aarch64::Register& dst, Reg reg) 588 { 589 DebugAssert(reg < Reg::count && dst.IsW()); 590 if (const std::optional<u32> hreg = CheckHostReg(0, Compiler::HR_TYPE_CPU_REG, reg)) 591 armAsm->mov(dst, WRegister(hreg.value())); 592 else if (HasConstantReg(reg)) 593 EmitMov(dst, GetConstantRegU32(reg)); 594 else 595 armAsm->ldr(dst, MipsPtr(reg)); 596 } 597 598 void CPU::NewRec::AArch64Compiler::GeneratePGXPCallWithMIPSRegs(const void* func, u32 arg1val, 599 Reg arg2reg /* = Reg::count */, 600 Reg arg3reg /* = Reg::count */) 601 { 602 DebugAssert(g_settings.gpu_pgxp_enable); 603 604 Flush(FLUSH_FOR_C_CALL); 605 606 if (arg2reg != Reg::count) 607 MoveMIPSRegToReg(RWARG2, arg2reg); 608 if (arg3reg != Reg::count) 609 MoveMIPSRegToReg(RWARG3, arg3reg); 610 611 EmitMov(RWARG1, arg1val); 612 EmitCall(func); 613 } 614 615 void CPU::NewRec::AArch64Compiler::Flush(u32 flags) 616 { 617 Compiler::Flush(flags); 618 619 if (flags & FLUSH_PC && m_dirty_pc) 620 { 621 StoreConstantToCPUPointer(m_compiler_pc, &g_state.pc); 622 m_dirty_pc = false; 623 } 624 625 if (flags & FLUSH_INSTRUCTION_BITS) 626 { 627 // This sucks, but it's only used for fallbacks. 628 EmitMov(RWARG1, inst->bits); 629 EmitMov(RWARG2, m_current_instruction_pc); 630 EmitMov(RWARG3, m_current_instruction_branch_delay_slot); 631 armAsm->str(RWARG1, PTR(&g_state.current_instruction.bits)); 632 armAsm->str(RWARG2, PTR(&g_state.current_instruction_pc)); 633 armAsm->strb(RWARG3, PTR(&g_state.current_instruction_in_branch_delay_slot)); 634 } 635 636 if (flags & FLUSH_LOAD_DELAY_FROM_STATE && m_load_delay_dirty) 637 { 638 // This sucks :( 639 // TODO: make it a function? 640 armAsm->ldrb(RWARG1, PTR(&g_state.load_delay_reg)); 641 armAsm->ldr(RWARG2, PTR(&g_state.load_delay_value)); 642 EmitMov(RWSCRATCH, OFFSETOF(CPU::State, regs.r[0])); 643 armAsm->add(RWARG1, RWSCRATCH, vixl::aarch64::Operand(RWARG1, LSL, 2)); 644 armAsm->str(RWARG2, MemOperand(RSTATE, RXARG1)); 645 EmitMov(RWSCRATCH, static_cast<u8>(Reg::count)); 646 armAsm->strb(RWSCRATCH, PTR(&g_state.load_delay_reg)); 647 m_load_delay_dirty = false; 648 } 649 650 if (flags & FLUSH_LOAD_DELAY && m_load_delay_register != Reg::count) 651 { 652 if (m_load_delay_value_register != NUM_HOST_REGS) 653 FreeHostReg(m_load_delay_value_register); 654 655 EmitMov(RWSCRATCH, static_cast<u8>(m_load_delay_register)); 656 armAsm->strb(RWSCRATCH, PTR(&g_state.load_delay_reg)); 657 m_load_delay_register = Reg::count; 658 m_load_delay_dirty = true; 659 } 660 661 if (flags & FLUSH_GTE_STALL_FROM_STATE && m_dirty_gte_done_cycle) 662 { 663 // May as well flush cycles while we're here. 664 // GTE spanning blocks is very rare, we _could_ disable this for speed. 665 armAsm->ldr(RWARG1, PTR(&g_state.pending_ticks)); 666 armAsm->ldr(RWARG2, PTR(&g_state.gte_completion_tick)); 667 if (m_cycles > 0) 668 { 669 armAsm->add(RWARG1, RWARG1, armCheckAddSubConstant(m_cycles)); 670 m_cycles = 0; 671 } 672 armAsm->cmp(RWARG2, RWARG1); 673 armAsm->csel(RWARG1, RWARG2, RWARG1, hs); 674 armAsm->str(RWARG1, PTR(&g_state.pending_ticks)); 675 m_dirty_gte_done_cycle = false; 676 } 677 678 if (flags & FLUSH_GTE_DONE_CYCLE && m_gte_done_cycle > m_cycles) 679 { 680 armAsm->ldr(RWARG1, PTR(&g_state.pending_ticks)); 681 682 // update cycles at the same time 683 if (flags & FLUSH_CYCLES && m_cycles > 0) 684 { 685 armAsm->add(RWARG1, RWARG1, armCheckAddSubConstant(m_cycles)); 686 armAsm->str(RWARG1, PTR(&g_state.pending_ticks)); 687 m_gte_done_cycle -= m_cycles; 688 m_cycles = 0; 689 } 690 691 armAsm->add(RWARG1, RWARG1, armCheckAddSubConstant(m_gte_done_cycle)); 692 armAsm->str(RWARG1, PTR(&g_state.gte_completion_tick)); 693 m_gte_done_cycle = 0; 694 m_dirty_gte_done_cycle = true; 695 } 696 697 if (flags & FLUSH_CYCLES && m_cycles > 0) 698 { 699 armAsm->ldr(RWARG1, PTR(&g_state.pending_ticks)); 700 armAsm->add(RWARG1, RWARG1, armCheckAddSubConstant(m_cycles)); 701 armAsm->str(RWARG1, PTR(&g_state.pending_ticks)); 702 m_gte_done_cycle = std::max<TickCount>(m_gte_done_cycle - m_cycles, 0); 703 m_cycles = 0; 704 } 705 } 706 707 void CPU::NewRec::AArch64Compiler::Compile_Fallback() 708 { 709 WARNING_LOG("Compiling instruction fallback at PC=0x{:08X}, instruction=0x{:08X}", iinfo->pc, inst->bits); 710 711 Flush(FLUSH_FOR_INTERPRETER); 712 713 EmitCall(reinterpret_cast<const void*>(&CPU::Recompiler::Thunks::InterpretInstruction)); 714 715 // TODO: make me less garbage 716 // TODO: this is wrong, it flushes the load delay on the same cycle when we return. 717 // but nothing should be going through here.. 718 Label no_load_delay; 719 armAsm->ldrb(RWARG1, PTR(&g_state.next_load_delay_reg)); 720 armAsm->cmp(RWARG1, static_cast<u8>(Reg::count)); 721 armAsm->b(&no_load_delay, eq); 722 armAsm->ldr(RWARG2, PTR(&g_state.next_load_delay_value)); 723 armAsm->strb(RWARG1, PTR(&g_state.load_delay_reg)); 724 armAsm->str(RWARG2, PTR(&g_state.load_delay_value)); 725 EmitMov(RWARG1, static_cast<u32>(Reg::count)); 726 armAsm->strb(RWARG1, PTR(&g_state.next_load_delay_reg)); 727 armAsm->bind(&no_load_delay); 728 729 m_load_delay_dirty = EMULATE_LOAD_DELAYS; 730 } 731 732 void CPU::NewRec::AArch64Compiler::CheckBranchTarget(const vixl::aarch64::Register& pcreg) 733 { 734 DebugAssert(pcreg.IsW()); 735 if (!g_settings.cpu_recompiler_memory_exceptions) 736 return; 737 738 armAsm->tst(pcreg, armCheckLogicalConstant(0x3)); 739 SwitchToFarCode(true, ne); 740 741 BackupHostState(); 742 EndBlockWithException(Exception::AdEL); 743 744 RestoreHostState(); 745 SwitchToNearCode(false); 746 } 747 748 void CPU::NewRec::AArch64Compiler::Compile_jr(CompileFlags cf) 749 { 750 const Register pcreg = CFGetRegS(cf); 751 CheckBranchTarget(pcreg); 752 753 armAsm->str(pcreg, PTR(&g_state.pc)); 754 755 CompileBranchDelaySlot(false); 756 EndBlock(std::nullopt, true); 757 } 758 759 void CPU::NewRec::AArch64Compiler::Compile_jalr(CompileFlags cf) 760 { 761 const Register pcreg = CFGetRegS(cf); 762 if (MipsD() != Reg::zero) 763 SetConstantReg(MipsD(), GetBranchReturnAddress(cf)); 764 765 CheckBranchTarget(pcreg); 766 armAsm->str(pcreg, PTR(&g_state.pc)); 767 768 CompileBranchDelaySlot(false); 769 EndBlock(std::nullopt, true); 770 } 771 772 void CPU::NewRec::AArch64Compiler::Compile_bxx(CompileFlags cf, BranchCondition cond) 773 { 774 AssertRegOrConstS(cf); 775 776 const u32 taken_pc = GetConditionalBranchTarget(cf); 777 778 Flush(FLUSH_FOR_BRANCH); 779 780 DebugAssert(cf.valid_host_s); 781 782 // MipsT() here should equal zero for zero branches. 783 DebugAssert(cond == BranchCondition::Equal || cond == BranchCondition::NotEqual || cf.MipsT() == Reg::zero); 784 785 Label taken; 786 const Register rs = CFGetRegS(cf); 787 switch (cond) 788 { 789 case BranchCondition::Equal: 790 case BranchCondition::NotEqual: 791 { 792 AssertRegOrConstT(cf); 793 if (cf.const_t && HasConstantRegValue(cf.MipsT(), 0)) 794 { 795 (cond == BranchCondition::Equal) ? armAsm->cbz(rs, &taken) : armAsm->cbnz(rs, &taken); 796 } 797 else 798 { 799 if (cf.valid_host_t) 800 armAsm->cmp(rs, CFGetRegT(cf)); 801 else if (cf.const_t) 802 armAsm->cmp(rs, armCheckCompareConstant(GetConstantRegU32(cf.MipsT()))); 803 804 armAsm->b(&taken, (cond == BranchCondition::Equal) ? eq : ne); 805 } 806 } 807 break; 808 809 case BranchCondition::GreaterThanZero: 810 { 811 armAsm->cmp(rs, 0); 812 armAsm->b(&taken, gt); 813 } 814 break; 815 816 case BranchCondition::GreaterEqualZero: 817 { 818 armAsm->cmp(rs, 0); 819 armAsm->b(&taken, ge); 820 } 821 break; 822 823 case BranchCondition::LessThanZero: 824 { 825 armAsm->cmp(rs, 0); 826 armAsm->b(&taken, lt); 827 } 828 break; 829 830 case BranchCondition::LessEqualZero: 831 { 832 armAsm->cmp(rs, 0); 833 armAsm->b(&taken, le); 834 } 835 break; 836 } 837 838 BackupHostState(); 839 if (!cf.delay_slot_swapped) 840 CompileBranchDelaySlot(); 841 842 EndBlock(m_compiler_pc, true); 843 844 armAsm->bind(&taken); 845 846 RestoreHostState(); 847 if (!cf.delay_slot_swapped) 848 CompileBranchDelaySlot(); 849 850 EndBlock(taken_pc, true); 851 } 852 853 void CPU::NewRec::AArch64Compiler::Compile_addi(CompileFlags cf, bool overflow) 854 { 855 const Register rs = CFGetRegS(cf); 856 const Register rt = CFGetRegT(cf); 857 if (const u32 imm = inst->i.imm_sext32(); imm != 0) 858 { 859 if (!overflow) 860 { 861 armAsm->add(rt, rs, armCheckAddSubConstant(imm)); 862 } 863 else 864 { 865 armAsm->adds(rt, rs, armCheckAddSubConstant(imm)); 866 TestOverflow(rt); 867 } 868 } 869 else if (rt.GetCode() != rs.GetCode()) 870 { 871 armAsm->mov(rt, rs); 872 } 873 } 874 875 void CPU::NewRec::AArch64Compiler::Compile_addi(CompileFlags cf) 876 { 877 Compile_addi(cf, g_settings.cpu_recompiler_memory_exceptions); 878 } 879 880 void CPU::NewRec::AArch64Compiler::Compile_addiu(CompileFlags cf) 881 { 882 Compile_addi(cf, false); 883 } 884 885 void CPU::NewRec::AArch64Compiler::Compile_slti(CompileFlags cf) 886 { 887 Compile_slti(cf, true); 888 } 889 890 void CPU::NewRec::AArch64Compiler::Compile_sltiu(CompileFlags cf) 891 { 892 Compile_slti(cf, false); 893 } 894 895 void CPU::NewRec::AArch64Compiler::Compile_slti(CompileFlags cf, bool sign) 896 { 897 armAsm->cmp(CFGetRegS(cf), armCheckCompareConstant(static_cast<s32>(inst->i.imm_sext32()))); 898 armAsm->cset(CFGetRegT(cf), sign ? lt : lo); 899 } 900 901 void CPU::NewRec::AArch64Compiler::Compile_andi(CompileFlags cf) 902 { 903 const Register rt = CFGetRegT(cf); 904 if (const u32 imm = inst->i.imm_zext32(); imm != 0) 905 armAsm->and_(rt, CFGetRegS(cf), armCheckLogicalConstant(imm)); 906 else 907 armAsm->mov(rt, wzr); 908 } 909 910 void CPU::NewRec::AArch64Compiler::Compile_ori(CompileFlags cf) 911 { 912 const Register rt = CFGetRegT(cf); 913 const Register rs = CFGetRegS(cf); 914 if (const u32 imm = inst->i.imm_zext32(); imm != 0) 915 armAsm->orr(rt, rs, armCheckLogicalConstant(imm)); 916 else if (rt.GetCode() != rs.GetCode()) 917 armAsm->mov(rt, rs); 918 } 919 920 void CPU::NewRec::AArch64Compiler::Compile_xori(CompileFlags cf) 921 { 922 const Register rt = CFGetRegT(cf); 923 const Register rs = CFGetRegS(cf); 924 if (const u32 imm = inst->i.imm_zext32(); imm != 0) 925 armAsm->eor(rt, rs, armCheckLogicalConstant(imm)); 926 else if (rt.GetCode() != rs.GetCode()) 927 armAsm->mov(rt, rs); 928 } 929 930 void CPU::NewRec::AArch64Compiler::Compile_shift(CompileFlags cf, 931 void (vixl::aarch64::Assembler::*op)(const vixl::aarch64::Register&, 932 const vixl::aarch64::Register&, 933 unsigned)) 934 { 935 const Register rd = CFGetRegD(cf); 936 const Register rt = CFGetRegT(cf); 937 if (inst->r.shamt > 0) 938 (armAsm->*op)(rd, rt, inst->r.shamt); 939 else if (rd.GetCode() != rt.GetCode()) 940 armAsm->mov(rd, rt); 941 } 942 943 void CPU::NewRec::AArch64Compiler::Compile_sll(CompileFlags cf) 944 { 945 Compile_shift(cf, &Assembler::lsl); 946 } 947 948 void CPU::NewRec::AArch64Compiler::Compile_srl(CompileFlags cf) 949 { 950 Compile_shift(cf, &Assembler::lsr); 951 } 952 953 void CPU::NewRec::AArch64Compiler::Compile_sra(CompileFlags cf) 954 { 955 Compile_shift(cf, &Assembler::asr); 956 } 957 958 void CPU::NewRec::AArch64Compiler::Compile_variable_shift( 959 CompileFlags cf, 960 void (vixl::aarch64::Assembler::*op)(const vixl::aarch64::Register&, const vixl::aarch64::Register&, 961 const vixl::aarch64::Register&), 962 void (vixl::aarch64::Assembler::*op_const)(const vixl::aarch64::Register&, const vixl::aarch64::Register&, unsigned)) 963 { 964 const Register rd = CFGetRegD(cf); 965 966 AssertRegOrConstS(cf); 967 AssertRegOrConstT(cf); 968 969 const Register rt = cf.valid_host_t ? CFGetRegT(cf) : RWARG2; 970 if (!cf.valid_host_t) 971 MoveTToReg(rt, cf); 972 973 if (cf.const_s) 974 { 975 if (const u32 shift = GetConstantRegU32(cf.MipsS()); shift != 0) 976 (armAsm->*op_const)(rd, rt, shift); 977 else if (rd.GetCode() != rt.GetCode()) 978 armAsm->mov(rd, rt); 979 } 980 else 981 { 982 (armAsm->*op)(rd, rt, CFGetRegS(cf)); 983 } 984 } 985 986 void CPU::NewRec::AArch64Compiler::Compile_sllv(CompileFlags cf) 987 { 988 Compile_variable_shift(cf, &Assembler::lslv, &Assembler::lsl); 989 } 990 991 void CPU::NewRec::AArch64Compiler::Compile_srlv(CompileFlags cf) 992 { 993 Compile_variable_shift(cf, &Assembler::lsrv, &Assembler::lsr); 994 } 995 996 void CPU::NewRec::AArch64Compiler::Compile_srav(CompileFlags cf) 997 { 998 Compile_variable_shift(cf, &Assembler::asrv, &Assembler::asr); 999 } 1000 1001 void CPU::NewRec::AArch64Compiler::Compile_mult(CompileFlags cf, bool sign) 1002 { 1003 const Register rs = cf.valid_host_s ? CFGetRegS(cf) : RWARG1; 1004 if (!cf.valid_host_s) 1005 MoveSToReg(rs, cf); 1006 1007 const Register rt = cf.valid_host_t ? CFGetRegT(cf) : RWARG2; 1008 if (!cf.valid_host_t) 1009 MoveTToReg(rt, cf); 1010 1011 // TODO: if lo/hi gets killed, we can use a 32-bit multiply 1012 const Register lo = CFGetRegLO(cf); 1013 const Register hi = CFGetRegHI(cf); 1014 1015 (sign) ? armAsm->smull(lo.X(), rs, rt) : armAsm->umull(lo.X(), rs, rt); 1016 armAsm->lsr(hi.X(), lo.X(), 32); 1017 } 1018 1019 void CPU::NewRec::AArch64Compiler::Compile_mult(CompileFlags cf) 1020 { 1021 Compile_mult(cf, true); 1022 } 1023 1024 void CPU::NewRec::AArch64Compiler::Compile_multu(CompileFlags cf) 1025 { 1026 Compile_mult(cf, false); 1027 } 1028 1029 void CPU::NewRec::AArch64Compiler::Compile_div(CompileFlags cf) 1030 { 1031 const Register rs = cf.valid_host_s ? CFGetRegS(cf) : RWARG1; 1032 if (!cf.valid_host_s) 1033 MoveSToReg(rs, cf); 1034 1035 const Register rt = cf.valid_host_t ? CFGetRegT(cf) : RWARG2; 1036 if (!cf.valid_host_t) 1037 MoveTToReg(rt, cf); 1038 1039 const Register rlo = CFGetRegLO(cf); 1040 const Register rhi = CFGetRegHI(cf); 1041 1042 // TODO: This could be slightly more optimal 1043 Label done; 1044 Label not_divide_by_zero; 1045 armAsm->cbnz(rt, ¬_divide_by_zero); 1046 armAsm->mov(rhi, rs); // hi = num 1047 EmitMov(rlo, 1); 1048 EmitMov(RWSCRATCH, static_cast<u32>(-1)); 1049 armAsm->cmp(rs, 0); 1050 armAsm->csel(rlo, RWSCRATCH, rlo, ge); // lo = s >= 0 ? -1 : 1 1051 armAsm->b(&done); 1052 1053 armAsm->bind(¬_divide_by_zero); 1054 Label not_unrepresentable; 1055 armAsm->cmp(rs, armCheckCompareConstant(static_cast<s32>(0x80000000u))); 1056 armAsm->b(¬_unrepresentable, ne); 1057 armAsm->cmp(rt, armCheckCompareConstant(-1)); 1058 armAsm->b(¬_unrepresentable, ne); 1059 1060 EmitMov(rlo, 0x80000000u); 1061 EmitMov(rhi, 0); 1062 armAsm->b(&done); 1063 1064 armAsm->bind(¬_unrepresentable); 1065 1066 armAsm->sdiv(rlo, rs, rt); 1067 1068 // TODO: skip when hi is dead 1069 armAsm->msub(rhi, rlo, rt, rs); 1070 1071 armAsm->bind(&done); 1072 } 1073 1074 void CPU::NewRec::AArch64Compiler::Compile_divu(CompileFlags cf) 1075 { 1076 const Register rs = cf.valid_host_s ? CFGetRegS(cf) : RWARG1; 1077 if (!cf.valid_host_s) 1078 MoveSToReg(rs, cf); 1079 1080 const Register rt = cf.valid_host_t ? CFGetRegT(cf) : RWARG2; 1081 if (!cf.valid_host_t) 1082 MoveTToReg(rt, cf); 1083 1084 const Register rlo = CFGetRegLO(cf); 1085 const Register rhi = CFGetRegHI(cf); 1086 1087 Label done; 1088 Label not_divide_by_zero; 1089 armAsm->cbnz(rt, ¬_divide_by_zero); 1090 EmitMov(rlo, static_cast<u32>(-1)); 1091 armAsm->mov(rhi, rs); 1092 armAsm->b(&done); 1093 1094 armAsm->bind(¬_divide_by_zero); 1095 1096 armAsm->udiv(rlo, rs, rt); 1097 1098 // TODO: skip when hi is dead 1099 armAsm->msub(rhi, rlo, rt, rs); 1100 1101 armAsm->bind(&done); 1102 } 1103 1104 void CPU::NewRec::AArch64Compiler::TestOverflow(const vixl::aarch64::Register& result) 1105 { 1106 DebugAssert(result.IsW()); 1107 SwitchToFarCode(true, vs); 1108 1109 BackupHostState(); 1110 1111 // toss the result 1112 ClearHostReg(result.GetCode()); 1113 1114 EndBlockWithException(Exception::Ov); 1115 1116 RestoreHostState(); 1117 1118 SwitchToNearCode(false); 1119 } 1120 1121 void CPU::NewRec::AArch64Compiler::Compile_dst_op(CompileFlags cf, 1122 void (vixl::aarch64::Assembler::*op)(const vixl::aarch64::Register&, 1123 const vixl::aarch64::Register&, 1124 const vixl::aarch64::Operand&), 1125 bool commutative, bool logical, bool overflow) 1126 { 1127 AssertRegOrConstS(cf); 1128 AssertRegOrConstT(cf); 1129 1130 const Register rd = CFGetRegD(cf); 1131 if (cf.valid_host_s && cf.valid_host_t) 1132 { 1133 (armAsm->*op)(rd, CFGetRegS(cf), CFGetRegT(cf)); 1134 } 1135 else if (commutative && (cf.const_s || cf.const_t)) 1136 { 1137 const Register src = cf.const_s ? CFGetRegT(cf) : CFGetRegS(cf); 1138 if (const u32 cv = GetConstantRegU32(cf.const_s ? cf.MipsS() : cf.MipsT()); cv != 0) 1139 { 1140 (armAsm->*op)(rd, src, logical ? armCheckLogicalConstant(cv) : armCheckAddSubConstant(cv)); 1141 } 1142 else 1143 { 1144 if (rd.GetCode() != src.GetCode()) 1145 armAsm->mov(rd, src); 1146 overflow = false; 1147 } 1148 } 1149 else if (cf.const_s) 1150 { 1151 // TODO: Check where we can use wzr here 1152 EmitMov(RWSCRATCH, GetConstantRegU32(cf.MipsS())); 1153 (armAsm->*op)(rd, RWSCRATCH, CFGetRegT(cf)); 1154 } 1155 else if (cf.const_t) 1156 { 1157 const Register rs = CFGetRegS(cf); 1158 if (const u32 cv = GetConstantRegU32(cf.const_s ? cf.MipsS() : cf.MipsT()); cv != 0) 1159 { 1160 (armAsm->*op)(rd, rs, logical ? armCheckLogicalConstant(cv) : armCheckAddSubConstant(cv)); 1161 } 1162 else 1163 { 1164 if (rd.GetCode() != rs.GetCode()) 1165 armAsm->mov(rd, rs); 1166 overflow = false; 1167 } 1168 } 1169 1170 if (overflow) 1171 TestOverflow(rd); 1172 } 1173 1174 void CPU::NewRec::AArch64Compiler::Compile_add(CompileFlags cf) 1175 { 1176 if (g_settings.cpu_recompiler_memory_exceptions) 1177 Compile_dst_op(cf, &Assembler::adds, true, false, true); 1178 else 1179 Compile_dst_op(cf, &Assembler::add, true, false, false); 1180 } 1181 1182 void CPU::NewRec::AArch64Compiler::Compile_addu(CompileFlags cf) 1183 { 1184 Compile_dst_op(cf, &Assembler::add, true, false, false); 1185 } 1186 1187 void CPU::NewRec::AArch64Compiler::Compile_sub(CompileFlags cf) 1188 { 1189 if (g_settings.cpu_recompiler_memory_exceptions) 1190 Compile_dst_op(cf, &Assembler::subs, false, false, true); 1191 else 1192 Compile_dst_op(cf, &Assembler::sub, false, false, false); 1193 } 1194 1195 void CPU::NewRec::AArch64Compiler::Compile_subu(CompileFlags cf) 1196 { 1197 Compile_dst_op(cf, &Assembler::sub, false, false, false); 1198 } 1199 1200 void CPU::NewRec::AArch64Compiler::Compile_and(CompileFlags cf) 1201 { 1202 AssertRegOrConstS(cf); 1203 AssertRegOrConstT(cf); 1204 1205 // special cases - and with self -> self, and with 0 -> 0 1206 const Register regd = CFGetRegD(cf); 1207 if (cf.MipsS() == cf.MipsT()) 1208 { 1209 armAsm->mov(regd, CFGetRegS(cf)); 1210 return; 1211 } 1212 else if (HasConstantRegValue(cf.MipsS(), 0) || HasConstantRegValue(cf.MipsT(), 0)) 1213 { 1214 armAsm->mov(regd, wzr); 1215 return; 1216 } 1217 1218 Compile_dst_op(cf, &Assembler::and_, true, true, false); 1219 } 1220 1221 void CPU::NewRec::AArch64Compiler::Compile_or(CompileFlags cf) 1222 { 1223 AssertRegOrConstS(cf); 1224 AssertRegOrConstT(cf); 1225 1226 // or/nor with 0 -> no effect 1227 const Register regd = CFGetRegD(cf); 1228 if (HasConstantRegValue(cf.MipsS(), 0) || HasConstantRegValue(cf.MipsT(), 0) || cf.MipsS() == cf.MipsT()) 1229 { 1230 cf.const_s ? MoveTToReg(regd, cf) : MoveSToReg(regd, cf); 1231 return; 1232 } 1233 1234 Compile_dst_op(cf, &Assembler::orr, true, true, false); 1235 } 1236 1237 void CPU::NewRec::AArch64Compiler::Compile_xor(CompileFlags cf) 1238 { 1239 AssertRegOrConstS(cf); 1240 AssertRegOrConstT(cf); 1241 1242 const Register regd = CFGetRegD(cf); 1243 if (cf.MipsS() == cf.MipsT()) 1244 { 1245 // xor with self -> zero 1246 armAsm->mov(regd, wzr); 1247 return; 1248 } 1249 else if (HasConstantRegValue(cf.MipsS(), 0) || HasConstantRegValue(cf.MipsT(), 0)) 1250 { 1251 // xor with zero -> no effect 1252 cf.const_s ? MoveTToReg(regd, cf) : MoveSToReg(regd, cf); 1253 return; 1254 } 1255 1256 Compile_dst_op(cf, &Assembler::eor, true, true, false); 1257 } 1258 1259 void CPU::NewRec::AArch64Compiler::Compile_nor(CompileFlags cf) 1260 { 1261 Compile_or(cf); 1262 armAsm->mvn(CFGetRegD(cf), CFGetRegD(cf)); 1263 } 1264 1265 void CPU::NewRec::AArch64Compiler::Compile_slt(CompileFlags cf) 1266 { 1267 Compile_slt(cf, true); 1268 } 1269 1270 void CPU::NewRec::AArch64Compiler::Compile_sltu(CompileFlags cf) 1271 { 1272 Compile_slt(cf, false); 1273 } 1274 1275 void CPU::NewRec::AArch64Compiler::Compile_slt(CompileFlags cf, bool sign) 1276 { 1277 AssertRegOrConstS(cf); 1278 AssertRegOrConstT(cf); 1279 1280 // TODO: swap and reverse op for constants 1281 if (cf.const_s) 1282 { 1283 EmitMov(RWSCRATCH, GetConstantRegS32(cf.MipsS())); 1284 armAsm->cmp(RWSCRATCH, CFGetRegT(cf)); 1285 } 1286 else if (cf.const_t) 1287 { 1288 armAsm->cmp(CFGetRegS(cf), armCheckCompareConstant(GetConstantRegS32(cf.MipsT()))); 1289 } 1290 else 1291 { 1292 armAsm->cmp(CFGetRegS(cf), CFGetRegT(cf)); 1293 } 1294 1295 armAsm->cset(CFGetRegD(cf), sign ? lt : lo); 1296 } 1297 1298 vixl::aarch64::Register 1299 CPU::NewRec::AArch64Compiler::ComputeLoadStoreAddressArg(CompileFlags cf, 1300 const std::optional<VirtualMemoryAddress>& address, 1301 const std::optional<const vixl::aarch64::Register>& reg) 1302 { 1303 const u32 imm = inst->i.imm_sext32(); 1304 if (cf.valid_host_s && imm == 0 && !reg.has_value()) 1305 return CFGetRegS(cf); 1306 1307 const Register dst = reg.has_value() ? reg.value() : RWARG1; 1308 if (address.has_value()) 1309 { 1310 EmitMov(dst, address.value()); 1311 } 1312 else if (imm == 0) 1313 { 1314 if (cf.valid_host_s) 1315 { 1316 if (const Register src = CFGetRegS(cf); src.GetCode() != dst.GetCode()) 1317 armAsm->mov(dst, CFGetRegS(cf)); 1318 } 1319 else 1320 { 1321 armAsm->ldr(dst, MipsPtr(cf.MipsS())); 1322 } 1323 } 1324 else 1325 { 1326 if (cf.valid_host_s) 1327 { 1328 armAsm->add(dst, CFGetRegS(cf), armCheckAddSubConstant(static_cast<s32>(inst->i.imm_sext32()))); 1329 } 1330 else 1331 { 1332 armAsm->ldr(dst, MipsPtr(cf.MipsS())); 1333 armAsm->add(dst, dst, armCheckAddSubConstant(static_cast<s32>(inst->i.imm_sext32()))); 1334 } 1335 } 1336 1337 return dst; 1338 } 1339 1340 template<typename RegAllocFn> 1341 vixl::aarch64::Register CPU::NewRec::AArch64Compiler::GenerateLoad(const vixl::aarch64::Register& addr_reg, 1342 MemoryAccessSize size, bool sign, bool use_fastmem, 1343 const RegAllocFn& dst_reg_alloc) 1344 { 1345 DebugAssert(addr_reg.IsW()); 1346 if (use_fastmem) 1347 { 1348 m_cycles += Bus::RAM_READ_TICKS; 1349 1350 const Register dst = dst_reg_alloc(); 1351 1352 if (g_settings.cpu_fastmem_mode == CPUFastmemMode::LUT) 1353 { 1354 DebugAssert(addr_reg.GetCode() != RWARG3.GetCode()); 1355 armAsm->lsr(RXARG3, addr_reg, Bus::FASTMEM_LUT_PAGE_SHIFT); 1356 armAsm->ldr(RXARG3, MemOperand(RMEMBASE, RXARG3, LSL, 3)); 1357 } 1358 1359 const MemOperand mem = 1360 MemOperand((g_settings.cpu_fastmem_mode == CPUFastmemMode::LUT) ? RXARG3 : RMEMBASE, addr_reg.X()); 1361 u8* start = armAsm->GetCursorAddress<u8*>(); 1362 switch (size) 1363 { 1364 case MemoryAccessSize::Byte: 1365 sign ? armAsm->ldrsb(dst, mem) : armAsm->ldrb(dst, mem); 1366 break; 1367 1368 case MemoryAccessSize::HalfWord: 1369 sign ? armAsm->ldrsh(dst, mem) : armAsm->ldrh(dst, mem); 1370 break; 1371 1372 case MemoryAccessSize::Word: 1373 armAsm->ldr(dst, mem); 1374 break; 1375 } 1376 1377 AddLoadStoreInfo(start, kInstructionSize, addr_reg.GetCode(), dst.GetCode(), size, sign, true); 1378 return dst; 1379 } 1380 1381 if (addr_reg.GetCode() != RWARG1.GetCode()) 1382 armAsm->mov(RWARG1, addr_reg); 1383 1384 const bool checked = g_settings.cpu_recompiler_memory_exceptions; 1385 switch (size) 1386 { 1387 case MemoryAccessSize::Byte: 1388 { 1389 EmitCall(checked ? reinterpret_cast<const void*>(&Recompiler::Thunks::ReadMemoryByte) : 1390 reinterpret_cast<const void*>(&Recompiler::Thunks::UncheckedReadMemoryByte)); 1391 } 1392 break; 1393 case MemoryAccessSize::HalfWord: 1394 { 1395 EmitCall(checked ? reinterpret_cast<const void*>(&Recompiler::Thunks::ReadMemoryHalfWord) : 1396 reinterpret_cast<const void*>(&Recompiler::Thunks::UncheckedReadMemoryHalfWord)); 1397 } 1398 break; 1399 case MemoryAccessSize::Word: 1400 { 1401 EmitCall(checked ? reinterpret_cast<const void*>(&Recompiler::Thunks::ReadMemoryWord) : 1402 reinterpret_cast<const void*>(&Recompiler::Thunks::UncheckedReadMemoryWord)); 1403 } 1404 break; 1405 } 1406 1407 // TODO: turn this into an asm function instead 1408 if (checked) 1409 { 1410 SwitchToFarCodeIfBitSet(RXRET, 63); 1411 BackupHostState(); 1412 1413 // Need to stash this in a temp because of the flush. 1414 const WRegister temp = WRegister(AllocateTempHostReg(HR_CALLEE_SAVED)); 1415 armAsm->neg(temp.X(), RXRET); 1416 armAsm->lsl(temp, temp, 2); 1417 1418 Flush(FLUSH_FOR_C_CALL | FLUSH_FLUSH_MIPS_REGISTERS | FLUSH_FOR_EXCEPTION); 1419 1420 // cause_bits = (-result << 2) | BD | cop_n 1421 armAsm->orr(RWARG1, temp, 1422 armCheckLogicalConstant(Cop0Registers::CAUSE::MakeValueForException( 1423 static_cast<Exception>(0), m_current_instruction_branch_delay_slot, false, inst->cop.cop_n))); 1424 EmitMov(RWARG2, m_current_instruction_pc); 1425 EmitCall(reinterpret_cast<const void*>(static_cast<void (*)(u32, u32)>(&CPU::RaiseException))); 1426 FreeHostReg(temp.GetCode()); 1427 EndBlock(std::nullopt, true); 1428 1429 RestoreHostState(); 1430 SwitchToNearCode(false); 1431 } 1432 1433 const Register dst_reg = dst_reg_alloc(); 1434 switch (size) 1435 { 1436 case MemoryAccessSize::Byte: 1437 { 1438 sign ? armAsm->sxtb(dst_reg, RWRET) : armAsm->uxtb(dst_reg, RWRET); 1439 } 1440 break; 1441 case MemoryAccessSize::HalfWord: 1442 { 1443 sign ? armAsm->sxth(dst_reg, RWRET) : armAsm->uxth(dst_reg, RWRET); 1444 } 1445 break; 1446 case MemoryAccessSize::Word: 1447 { 1448 if (dst_reg.GetCode() != RWRET.GetCode()) 1449 armAsm->mov(dst_reg, RWRET); 1450 } 1451 break; 1452 } 1453 1454 return dst_reg; 1455 } 1456 1457 void CPU::NewRec::AArch64Compiler::GenerateStore(const vixl::aarch64::Register& addr_reg, 1458 const vixl::aarch64::Register& value_reg, MemoryAccessSize size, 1459 bool use_fastmem) 1460 { 1461 DebugAssert(addr_reg.IsW() && value_reg.IsW()); 1462 if (use_fastmem) 1463 { 1464 if (g_settings.cpu_fastmem_mode == CPUFastmemMode::LUT) 1465 { 1466 DebugAssert(addr_reg.GetCode() != RWARG3.GetCode()); 1467 armAsm->lsr(RXARG3, addr_reg, Bus::FASTMEM_LUT_PAGE_SHIFT); 1468 armAsm->ldr(RXARG3, MemOperand(RMEMBASE, RXARG3, LSL, 3)); 1469 } 1470 1471 const MemOperand mem = 1472 MemOperand((g_settings.cpu_fastmem_mode == CPUFastmemMode::LUT) ? RXARG3 : RMEMBASE, addr_reg.X()); 1473 u8* start = armAsm->GetCursorAddress<u8*>(); 1474 switch (size) 1475 { 1476 case MemoryAccessSize::Byte: 1477 armAsm->strb(value_reg, mem); 1478 break; 1479 1480 case MemoryAccessSize::HalfWord: 1481 armAsm->strh(value_reg, mem); 1482 break; 1483 1484 case MemoryAccessSize::Word: 1485 armAsm->str(value_reg, mem); 1486 break; 1487 } 1488 AddLoadStoreInfo(start, kInstructionSize, addr_reg.GetCode(), value_reg.GetCode(), size, false, false); 1489 return; 1490 } 1491 1492 if (addr_reg.GetCode() != RWARG1.GetCode()) 1493 armAsm->mov(RWARG1, addr_reg); 1494 if (value_reg.GetCode() != RWARG2.GetCode()) 1495 armAsm->mov(RWARG2, value_reg); 1496 1497 const bool checked = g_settings.cpu_recompiler_memory_exceptions; 1498 switch (size) 1499 { 1500 case MemoryAccessSize::Byte: 1501 { 1502 EmitCall(checked ? reinterpret_cast<const void*>(&Recompiler::Thunks::WriteMemoryByte) : 1503 reinterpret_cast<const void*>(&Recompiler::Thunks::UncheckedWriteMemoryByte)); 1504 } 1505 break; 1506 case MemoryAccessSize::HalfWord: 1507 { 1508 EmitCall(checked ? reinterpret_cast<const void*>(&Recompiler::Thunks::WriteMemoryHalfWord) : 1509 reinterpret_cast<const void*>(&Recompiler::Thunks::UncheckedWriteMemoryHalfWord)); 1510 } 1511 break; 1512 case MemoryAccessSize::Word: 1513 { 1514 EmitCall(checked ? reinterpret_cast<const void*>(&Recompiler::Thunks::WriteMemoryWord) : 1515 reinterpret_cast<const void*>(&Recompiler::Thunks::UncheckedWriteMemoryWord)); 1516 } 1517 break; 1518 } 1519 1520 // TODO: turn this into an asm function instead 1521 if (checked) 1522 { 1523 SwitchToFarCodeIfRegZeroOrNonZero(RXRET, true); 1524 BackupHostState(); 1525 1526 // Need to stash this in a temp because of the flush. 1527 const WRegister temp = WRegister(AllocateTempHostReg(HR_CALLEE_SAVED)); 1528 armAsm->lsl(temp, RWRET, 2); 1529 1530 Flush(FLUSH_FOR_C_CALL | FLUSH_FLUSH_MIPS_REGISTERS | FLUSH_FOR_EXCEPTION); 1531 1532 // cause_bits = (result << 2) | BD | cop_n 1533 armAsm->orr(RWARG1, temp, 1534 armCheckLogicalConstant(Cop0Registers::CAUSE::MakeValueForException( 1535 static_cast<Exception>(0), m_current_instruction_branch_delay_slot, false, inst->cop.cop_n))); 1536 EmitMov(RWARG2, m_current_instruction_pc); 1537 EmitCall(reinterpret_cast<const void*>(static_cast<void (*)(u32, u32)>(&CPU::RaiseException))); 1538 FreeHostReg(temp.GetCode()); 1539 EndBlock(std::nullopt, true); 1540 1541 RestoreHostState(); 1542 SwitchToNearCode(false); 1543 } 1544 } 1545 1546 void CPU::NewRec::AArch64Compiler::Compile_lxx(CompileFlags cf, MemoryAccessSize size, bool sign, bool use_fastmem, 1547 const std::optional<VirtualMemoryAddress>& address) 1548 { 1549 const std::optional<WRegister> addr_reg = 1550 g_settings.gpu_pgxp_enable ? std::optional<WRegister>(WRegister(AllocateTempHostReg(HR_CALLEE_SAVED))) : 1551 std::optional<WRegister>(); 1552 FlushForLoadStore(address, false, use_fastmem); 1553 const Register addr = ComputeLoadStoreAddressArg(cf, address, addr_reg); 1554 const Register data = GenerateLoad(addr, size, sign, use_fastmem, [this, cf]() -> Register { 1555 if (cf.MipsT() == Reg::zero) 1556 return RWRET; 1557 1558 return WRegister(AllocateHostReg(GetFlagsForNewLoadDelayedReg(), 1559 EMULATE_LOAD_DELAYS ? HR_TYPE_NEXT_LOAD_DELAY_VALUE : HR_TYPE_CPU_REG, 1560 cf.MipsT())); 1561 }); 1562 1563 if (g_settings.gpu_pgxp_enable) 1564 { 1565 Flush(FLUSH_FOR_C_CALL); 1566 1567 EmitMov(RWARG1, inst->bits); 1568 armAsm->mov(RWARG2, addr); 1569 armAsm->mov(RWARG3, data); 1570 EmitCall(s_pgxp_mem_load_functions[static_cast<u32>(size)][static_cast<u32>(sign)]); 1571 FreeHostReg(addr_reg.value().GetCode()); 1572 } 1573 } 1574 1575 void CPU::NewRec::AArch64Compiler::Compile_lwx(CompileFlags cf, MemoryAccessSize size, bool sign, bool use_fastmem, 1576 const std::optional<VirtualMemoryAddress>& address) 1577 { 1578 DebugAssert(size == MemoryAccessSize::Word && !sign); 1579 1580 const Register addr = WRegister(AllocateTempHostReg(HR_CALLEE_SAVED)); 1581 FlushForLoadStore(address, false, use_fastmem); 1582 1583 // TODO: if address is constant, this can be simplified.. 1584 1585 // If we're coming from another block, just flush the load delay and hope for the best.. 1586 if (m_load_delay_dirty) 1587 UpdateLoadDelay(); 1588 1589 // We'd need to be careful here if we weren't overwriting it.. 1590 ComputeLoadStoreAddressArg(cf, address, addr); 1591 armAsm->and_(RWARG1, addr, armCheckLogicalConstant(~0x3u)); 1592 GenerateLoad(RWARG1, MemoryAccessSize::Word, false, use_fastmem, []() { return RWRET; }); 1593 1594 if (inst->r.rt == Reg::zero) 1595 { 1596 FreeHostReg(addr.GetCode()); 1597 return; 1598 } 1599 1600 // lwl/lwr from a load-delayed value takes the new value, but it itself, is load delayed, so the original value is 1601 // never written back. NOTE: can't trust T in cf because of the flush 1602 const Reg rt = inst->r.rt; 1603 Register value; 1604 if (m_load_delay_register == rt) 1605 { 1606 const u32 existing_ld_rt = (m_load_delay_value_register == NUM_HOST_REGS) ? 1607 AllocateHostReg(HR_MODE_READ, HR_TYPE_LOAD_DELAY_VALUE, rt) : 1608 m_load_delay_value_register; 1609 RenameHostReg(existing_ld_rt, HR_MODE_WRITE, HR_TYPE_NEXT_LOAD_DELAY_VALUE, rt); 1610 value = WRegister(existing_ld_rt); 1611 } 1612 else 1613 { 1614 if constexpr (EMULATE_LOAD_DELAYS) 1615 { 1616 value = WRegister(AllocateHostReg(HR_MODE_WRITE, HR_TYPE_NEXT_LOAD_DELAY_VALUE, rt)); 1617 if (const std::optional<u32> rtreg = CheckHostReg(HR_MODE_READ, HR_TYPE_CPU_REG, rt); rtreg.has_value()) 1618 armAsm->mov(value, WRegister(rtreg.value())); 1619 else if (HasConstantReg(rt)) 1620 EmitMov(value, GetConstantRegU32(rt)); 1621 else 1622 armAsm->ldr(value, MipsPtr(rt)); 1623 } 1624 else 1625 { 1626 value = WRegister(AllocateHostReg(HR_MODE_READ | HR_MODE_WRITE, HR_TYPE_CPU_REG, rt)); 1627 } 1628 } 1629 1630 DebugAssert(value.GetCode() != RWARG2.GetCode() && value.GetCode() != RWARG3.GetCode()); 1631 armAsm->and_(RWARG2, addr, 3); 1632 armAsm->lsl(RWARG2, RWARG2, 3); // *8 1633 EmitMov(RWARG3, 24); 1634 armAsm->sub(RWARG3, RWARG3, RWARG2); 1635 1636 if (inst->op == InstructionOp::lwl) 1637 { 1638 // const u32 mask = UINT32_C(0x00FFFFFF) >> shift; 1639 // new_value = (value & mask) | (RWRET << (24 - shift)); 1640 EmitMov(RWSCRATCH, 0xFFFFFFu); 1641 armAsm->lsrv(RWSCRATCH, RWSCRATCH, RWARG2); 1642 armAsm->and_(value, value, RWSCRATCH); 1643 armAsm->lslv(RWRET, RWRET, RWARG3); 1644 armAsm->orr(value, value, RWRET); 1645 } 1646 else 1647 { 1648 // const u32 mask = UINT32_C(0xFFFFFF00) << (24 - shift); 1649 // new_value = (value & mask) | (RWRET >> shift); 1650 armAsm->lsrv(RWRET, RWRET, RWARG2); 1651 EmitMov(RWSCRATCH, 0xFFFFFF00u); 1652 armAsm->lslv(RWSCRATCH, RWSCRATCH, RWARG3); 1653 armAsm->and_(value, value, RWSCRATCH); 1654 armAsm->orr(value, value, RWRET); 1655 } 1656 1657 FreeHostReg(addr.GetCode()); 1658 1659 if (g_settings.gpu_pgxp_enable) 1660 { 1661 Flush(FLUSH_FOR_C_CALL); 1662 armAsm->mov(RWARG3, value); 1663 armAsm->and_(RWARG2, addr, armCheckLogicalConstant(~0x3u)); 1664 EmitMov(RWARG1, inst->bits); 1665 EmitCall(reinterpret_cast<const void*>(&PGXP::CPU_LW)); 1666 } 1667 } 1668 1669 void CPU::NewRec::AArch64Compiler::Compile_lwc2(CompileFlags cf, MemoryAccessSize size, bool sign, bool use_fastmem, 1670 const std::optional<VirtualMemoryAddress>& address) 1671 { 1672 const u32 index = static_cast<u32>(inst->r.rt.GetValue()); 1673 const auto [ptr, action] = GetGTERegisterPointer(index, true); 1674 const std::optional<WRegister> addr_reg = 1675 g_settings.gpu_pgxp_enable ? std::optional<WRegister>(WRegister(AllocateTempHostReg(HR_CALLEE_SAVED))) : 1676 std::optional<WRegister>(); 1677 FlushForLoadStore(address, false, use_fastmem); 1678 const Register addr = ComputeLoadStoreAddressArg(cf, address, addr_reg); 1679 const Register value = GenerateLoad(addr, MemoryAccessSize::Word, false, use_fastmem, [this, action = action]() { 1680 return (action == GTERegisterAccessAction::CallHandler && g_settings.gpu_pgxp_enable) ? 1681 WRegister(AllocateTempHostReg(HR_CALLEE_SAVED)) : 1682 RWRET; 1683 }); 1684 1685 switch (action) 1686 { 1687 case GTERegisterAccessAction::Ignore: 1688 { 1689 break; 1690 } 1691 1692 case GTERegisterAccessAction::Direct: 1693 { 1694 armAsm->str(value, PTR(ptr)); 1695 break; 1696 } 1697 1698 case GTERegisterAccessAction::SignExtend16: 1699 { 1700 armAsm->sxth(RWARG3, value); 1701 armAsm->str(RWARG3, PTR(ptr)); 1702 break; 1703 } 1704 1705 case GTERegisterAccessAction::ZeroExtend16: 1706 { 1707 armAsm->uxth(RWARG3, value); 1708 armAsm->str(RWARG3, PTR(ptr)); 1709 break; 1710 } 1711 1712 case GTERegisterAccessAction::CallHandler: 1713 { 1714 Flush(FLUSH_FOR_C_CALL); 1715 armAsm->mov(RWARG2, value); 1716 EmitMov(RWARG1, index); 1717 EmitCall(reinterpret_cast<const void*>(>E::WriteRegister)); 1718 break; 1719 } 1720 1721 case GTERegisterAccessAction::PushFIFO: 1722 { 1723 // SXY0 <- SXY1 1724 // SXY1 <- SXY2 1725 // SXY2 <- SXYP 1726 DebugAssert(value.GetCode() != RWARG2.GetCode() && value.GetCode() != RWARG3.GetCode()); 1727 armAsm->ldr(RWARG2, PTR(&g_state.gte_regs.SXY1[0])); 1728 armAsm->ldr(RWARG3, PTR(&g_state.gte_regs.SXY2[0])); 1729 armAsm->str(RWARG2, PTR(&g_state.gte_regs.SXY0[0])); 1730 armAsm->str(RWARG3, PTR(&g_state.gte_regs.SXY1[0])); 1731 armAsm->str(value, PTR(&g_state.gte_regs.SXY2[0])); 1732 break; 1733 } 1734 1735 default: 1736 { 1737 Panic("Unknown action"); 1738 return; 1739 } 1740 } 1741 1742 if (g_settings.gpu_pgxp_enable) 1743 { 1744 Flush(FLUSH_FOR_C_CALL); 1745 armAsm->mov(RWARG3, value); 1746 if (value.GetCode() != RWRET.GetCode()) 1747 FreeHostReg(value.GetCode()); 1748 armAsm->mov(RWARG2, addr); 1749 FreeHostReg(addr_reg.value().GetCode()); 1750 EmitMov(RWARG1, inst->bits); 1751 EmitCall(reinterpret_cast<const void*>(&PGXP::CPU_LWC2)); 1752 } 1753 } 1754 1755 void CPU::NewRec::AArch64Compiler::Compile_sxx(CompileFlags cf, MemoryAccessSize size, bool sign, bool use_fastmem, 1756 const std::optional<VirtualMemoryAddress>& address) 1757 { 1758 AssertRegOrConstS(cf); 1759 AssertRegOrConstT(cf); 1760 1761 const std::optional<WRegister> addr_reg = 1762 g_settings.gpu_pgxp_enable ? std::optional<WRegister>(WRegister(AllocateTempHostReg(HR_CALLEE_SAVED))) : 1763 std::optional<WRegister>(); 1764 FlushForLoadStore(address, true, use_fastmem); 1765 const Register addr = ComputeLoadStoreAddressArg(cf, address, addr_reg); 1766 const Register data = cf.valid_host_t ? CFGetRegT(cf) : RWARG2; 1767 if (!cf.valid_host_t) 1768 MoveTToReg(RWARG2, cf); 1769 1770 GenerateStore(addr, data, size, use_fastmem); 1771 1772 if (g_settings.gpu_pgxp_enable) 1773 { 1774 Flush(FLUSH_FOR_C_CALL); 1775 MoveMIPSRegToReg(RWARG3, cf.MipsT()); 1776 armAsm->mov(RWARG2, addr); 1777 EmitMov(RWARG1, inst->bits); 1778 EmitCall(s_pgxp_mem_store_functions[static_cast<u32>(size)]); 1779 FreeHostReg(addr_reg.value().GetCode()); 1780 } 1781 } 1782 1783 void CPU::NewRec::AArch64Compiler::Compile_swx(CompileFlags cf, MemoryAccessSize size, bool sign, bool use_fastmem, 1784 const std::optional<VirtualMemoryAddress>& address) 1785 { 1786 DebugAssert(size == MemoryAccessSize::Word && !sign); 1787 1788 // TODO: this can take over rt's value if it's no longer needed 1789 // NOTE: can't trust T in cf because of the alloc 1790 const Register addr = WRegister(AllocateTempHostReg(HR_CALLEE_SAVED)); 1791 const Register value = g_settings.gpu_pgxp_enable ? WRegister(AllocateTempHostReg(HR_CALLEE_SAVED)) : RWARG2; 1792 if (g_settings.gpu_pgxp_enable) 1793 MoveMIPSRegToReg(value, inst->r.rt); 1794 1795 FlushForLoadStore(address, true, use_fastmem); 1796 1797 // TODO: if address is constant, this can be simplified.. 1798 // We'd need to be careful here if we weren't overwriting it.. 1799 ComputeLoadStoreAddressArg(cf, address, addr); 1800 armAsm->and_(RWARG1, addr, armCheckLogicalConstant(~0x3u)); 1801 GenerateLoad(RWARG1, MemoryAccessSize::Word, false, use_fastmem, []() { return RWRET; }); 1802 1803 armAsm->and_(RWSCRATCH, addr, 3); 1804 armAsm->lsl(RWSCRATCH, RWSCRATCH, 3); // *8 1805 armAsm->and_(addr, addr, armCheckLogicalConstant(~0x3u)); 1806 1807 // Need to load down here for PGXP-off, because it's in a volatile reg that can get overwritten by flush. 1808 if (!g_settings.gpu_pgxp_enable) 1809 MoveMIPSRegToReg(value, inst->r.rt); 1810 1811 if (inst->op == InstructionOp::swl) 1812 { 1813 // const u32 mem_mask = UINT32_C(0xFFFFFF00) << shift; 1814 // new_value = (RWRET & mem_mask) | (value >> (24 - shift)); 1815 EmitMov(RWARG3, 0xFFFFFF00u); 1816 armAsm->lslv(RWARG3, RWARG3, RWSCRATCH); 1817 armAsm->and_(RWRET, RWRET, RWARG3); 1818 1819 EmitMov(RWARG3, 24); 1820 armAsm->sub(RWARG3, RWARG3, RWSCRATCH); 1821 armAsm->lsrv(value, value, RWARG3); 1822 armAsm->orr(value, value, RWRET); 1823 } 1824 else 1825 { 1826 // const u32 mem_mask = UINT32_C(0x00FFFFFF) >> (24 - shift); 1827 // new_value = (RWRET & mem_mask) | (value << shift); 1828 armAsm->lslv(value, value, RWSCRATCH); 1829 1830 EmitMov(RWARG3, 24); 1831 armAsm->sub(RWARG3, RWARG3, RWSCRATCH); 1832 EmitMov(RWSCRATCH, 0x00FFFFFFu); 1833 armAsm->lsrv(RWSCRATCH, RWSCRATCH, RWARG3); 1834 armAsm->and_(RWRET, RWRET, RWSCRATCH); 1835 armAsm->orr(value, value, RWRET); 1836 } 1837 1838 if (!g_settings.gpu_pgxp_enable) 1839 { 1840 GenerateStore(addr, value, MemoryAccessSize::Word, use_fastmem); 1841 FreeHostReg(addr.GetCode()); 1842 } 1843 else 1844 { 1845 GenerateStore(addr, value, MemoryAccessSize::Word, use_fastmem); 1846 1847 Flush(FLUSH_FOR_C_CALL); 1848 armAsm->mov(RWARG3, value); 1849 FreeHostReg(value.GetCode()); 1850 armAsm->mov(RWARG2, addr); 1851 FreeHostReg(addr.GetCode()); 1852 EmitMov(RWARG1, inst->bits); 1853 EmitCall(reinterpret_cast<const void*>(&PGXP::CPU_SW)); 1854 } 1855 } 1856 1857 void CPU::NewRec::AArch64Compiler::Compile_swc2(CompileFlags cf, MemoryAccessSize size, bool sign, bool use_fastmem, 1858 const std::optional<VirtualMemoryAddress>& address) 1859 { 1860 const u32 index = static_cast<u32>(inst->r.rt.GetValue()); 1861 const auto [ptr, action] = GetGTERegisterPointer(index, false); 1862 const Register addr = (g_settings.gpu_pgxp_enable || action == GTERegisterAccessAction::CallHandler) ? 1863 WRegister(AllocateTempHostReg(HR_CALLEE_SAVED)) : 1864 RWARG1; 1865 const Register data = g_settings.gpu_pgxp_enable ? WRegister(AllocateTempHostReg(HR_CALLEE_SAVED)) : RWARG2; 1866 FlushForLoadStore(address, true, use_fastmem); 1867 ComputeLoadStoreAddressArg(cf, address, addr); 1868 1869 switch (action) 1870 { 1871 case GTERegisterAccessAction::Direct: 1872 { 1873 armAsm->ldr(data, PTR(ptr)); 1874 } 1875 break; 1876 1877 case GTERegisterAccessAction::CallHandler: 1878 { 1879 // should already be flushed.. except in fastmem case 1880 Flush(FLUSH_FOR_C_CALL); 1881 EmitMov(RWARG1, index); 1882 EmitCall(reinterpret_cast<const void*>(>E::ReadRegister)); 1883 armAsm->mov(data, RWRET); 1884 } 1885 break; 1886 1887 default: 1888 { 1889 Panic("Unknown action"); 1890 } 1891 break; 1892 } 1893 1894 GenerateStore(addr, data, size, use_fastmem); 1895 if (!g_settings.gpu_pgxp_enable) 1896 { 1897 if (addr.GetCode() != RWARG1.GetCode()) 1898 FreeHostReg(addr.GetCode()); 1899 } 1900 else 1901 { 1902 // TODO: This can be simplified because we don't need to validate in PGXP.. 1903 Flush(FLUSH_FOR_C_CALL); 1904 armAsm->mov(RWARG3, data); 1905 FreeHostReg(data.GetCode()); 1906 armAsm->mov(RWARG2, addr); 1907 FreeHostReg(addr.GetCode()); 1908 EmitMov(RWARG1, inst->bits); 1909 EmitCall(reinterpret_cast<const void*>(&PGXP::CPU_SWC2)); 1910 } 1911 } 1912 1913 void CPU::NewRec::AArch64Compiler::Compile_mtc0(CompileFlags cf) 1914 { 1915 // TODO: we need better constant setting here.. which will need backprop 1916 AssertRegOrConstT(cf); 1917 1918 const Cop0Reg reg = static_cast<Cop0Reg>(MipsD()); 1919 const u32* ptr = GetCop0RegPtr(reg); 1920 const u32 mask = GetCop0RegWriteMask(reg); 1921 if (!ptr) 1922 { 1923 Compile_Fallback(); 1924 return; 1925 } 1926 1927 if (mask == 0) 1928 { 1929 // if it's a read-only register, ignore 1930 DEBUG_LOG("Ignoring write to read-only cop0 reg {}", static_cast<u32>(reg)); 1931 return; 1932 } 1933 1934 // for some registers, we need to test certain bits 1935 const bool needs_bit_test = (reg == Cop0Reg::SR); 1936 const Register new_value = RWARG1; 1937 const Register old_value = RWARG2; 1938 const Register changed_bits = RWARG3; 1939 const Register mask_reg = RWSCRATCH; 1940 1941 // Load old value 1942 armAsm->ldr(old_value, PTR(ptr)); 1943 1944 // No way we fit this in an immediate.. 1945 EmitMov(mask_reg, mask); 1946 1947 // update value 1948 if (cf.valid_host_t) 1949 armAsm->and_(new_value, CFGetRegT(cf), mask_reg); 1950 else 1951 EmitMov(new_value, GetConstantRegU32(cf.MipsT()) & mask); 1952 1953 if (needs_bit_test) 1954 armAsm->eor(changed_bits, old_value, new_value); 1955 armAsm->bic(old_value, old_value, mask_reg); 1956 armAsm->orr(new_value, old_value, new_value); 1957 armAsm->str(new_value, PTR(ptr)); 1958 1959 if (reg == Cop0Reg::SR) 1960 { 1961 // TODO: replace with register backup 1962 // We could just inline the whole thing.. 1963 Flush(FLUSH_FOR_C_CALL); 1964 1965 SwitchToFarCodeIfBitSet(changed_bits, 16); 1966 armAsm->sub(sp, sp, 16); 1967 armAsm->str(RWARG1, MemOperand(sp)); 1968 EmitCall(reinterpret_cast<const void*>(&CPU::UpdateMemoryPointers)); 1969 armAsm->ldr(RWARG1, MemOperand(sp)); 1970 armAsm->add(sp, sp, 16); 1971 armAsm->ldr(RMEMBASE, PTR(&g_state.fastmem_base)); 1972 SwitchToNearCode(true); 1973 1974 TestInterrupts(RWARG1); 1975 } 1976 else if (reg == Cop0Reg::CAUSE) 1977 { 1978 armAsm->ldr(RWARG1, PTR(&g_state.cop0_regs.sr.bits)); 1979 TestInterrupts(RWARG1); 1980 } 1981 1982 if (reg == Cop0Reg::DCIC && g_settings.cpu_recompiler_memory_exceptions) 1983 { 1984 // TODO: DCIC handling for debug breakpoints 1985 WARNING_LOG("TODO: DCIC handling for debug breakpoints"); 1986 } 1987 } 1988 1989 void CPU::NewRec::AArch64Compiler::Compile_rfe(CompileFlags cf) 1990 { 1991 // shift mode bits right two, preserving upper bits 1992 armAsm->ldr(RWARG1, PTR(&g_state.cop0_regs.sr.bits)); 1993 armAsm->bfxil(RWARG1, RWARG1, 2, 4); 1994 armAsm->str(RWARG1, PTR(&g_state.cop0_regs.sr.bits)); 1995 1996 TestInterrupts(RWARG1); 1997 } 1998 1999 void CPU::NewRec::AArch64Compiler::TestInterrupts(const vixl::aarch64::Register& sr) 2000 { 2001 DebugAssert(sr.IsW()); 2002 2003 // if Iec == 0 then goto no_interrupt 2004 Label no_interrupt; 2005 armAsm->tbz(sr, 0, &no_interrupt); 2006 2007 // sr & cause 2008 armAsm->ldr(RWSCRATCH, PTR(&g_state.cop0_regs.cause.bits)); 2009 armAsm->and_(sr, sr, RWSCRATCH); 2010 2011 // ((sr & cause) & 0xff00) == 0 goto no_interrupt 2012 armAsm->tst(sr, 0xFF00); 2013 2014 SwitchToFarCode(true, ne); 2015 BackupHostState(); 2016 2017 // Update load delay, this normally happens at the end of an instruction, but we're finishing it early. 2018 UpdateLoadDelay(); 2019 2020 Flush(FLUSH_END_BLOCK | FLUSH_FOR_EXCEPTION | FLUSH_FOR_C_CALL); 2021 2022 // Can't use EndBlockWithException() here, because it'll use the wrong PC. 2023 // Can't use RaiseException() on the fast path if we're the last instruction, because the next PC is unknown. 2024 if (!iinfo->is_last_instruction) 2025 { 2026 EmitMov(RWARG1, Cop0Registers::CAUSE::MakeValueForException(Exception::INT, iinfo->is_branch_instruction, false, 2027 (inst + 1)->cop.cop_n)); 2028 EmitMov(RWARG2, m_compiler_pc); 2029 EmitCall(reinterpret_cast<const void*>(static_cast<void (*)(u32, u32)>(&CPU::RaiseException))); 2030 m_dirty_pc = false; 2031 EndAndLinkBlock(std::nullopt, true, false); 2032 } 2033 else 2034 { 2035 if (m_dirty_pc) 2036 EmitMov(RWARG1, m_compiler_pc); 2037 armAsm->str(wzr, PTR(&g_state.downcount)); 2038 if (m_dirty_pc) 2039 armAsm->str(RWARG1, PTR(&g_state.pc)); 2040 m_dirty_pc = false; 2041 EndAndLinkBlock(std::nullopt, false, true); 2042 } 2043 2044 RestoreHostState(); 2045 SwitchToNearCode(false); 2046 2047 armAsm->bind(&no_interrupt); 2048 } 2049 2050 void CPU::NewRec::AArch64Compiler::Compile_mfc2(CompileFlags cf) 2051 { 2052 const u32 index = inst->cop.Cop2Index(); 2053 const Reg rt = inst->r.rt; 2054 2055 const auto [ptr, action] = GetGTERegisterPointer(index, false); 2056 if (action == GTERegisterAccessAction::Ignore) 2057 return; 2058 2059 u32 hreg; 2060 if (action == GTERegisterAccessAction::Direct) 2061 { 2062 hreg = AllocateHostReg(GetFlagsForNewLoadDelayedReg(), 2063 EMULATE_LOAD_DELAYS ? HR_TYPE_NEXT_LOAD_DELAY_VALUE : HR_TYPE_CPU_REG, rt); 2064 armAsm->ldr(WRegister(hreg), PTR(ptr)); 2065 } 2066 else if (action == GTERegisterAccessAction::CallHandler) 2067 { 2068 Flush(FLUSH_FOR_C_CALL); 2069 EmitMov(RWARG1, index); 2070 EmitCall(reinterpret_cast<const void*>(>E::ReadRegister)); 2071 2072 hreg = AllocateHostReg(GetFlagsForNewLoadDelayedReg(), 2073 EMULATE_LOAD_DELAYS ? HR_TYPE_NEXT_LOAD_DELAY_VALUE : HR_TYPE_CPU_REG, rt); 2074 armAsm->mov(WRegister(hreg), RWRET); 2075 } 2076 else 2077 { 2078 Panic("Unknown action"); 2079 return; 2080 } 2081 2082 if (g_settings.gpu_pgxp_enable) 2083 { 2084 Flush(FLUSH_FOR_C_CALL); 2085 EmitMov(RWARG1, inst->bits); 2086 armAsm->mov(RWARG2, WRegister(hreg)); 2087 EmitCall(reinterpret_cast<const void*>(&PGXP::CPU_MFC2)); 2088 } 2089 } 2090 2091 void CPU::NewRec::AArch64Compiler::Compile_mtc2(CompileFlags cf) 2092 { 2093 const u32 index = inst->cop.Cop2Index(); 2094 const auto [ptr, action] = GetGTERegisterPointer(index, true); 2095 if (action == GTERegisterAccessAction::Ignore) 2096 return; 2097 2098 if (action == GTERegisterAccessAction::Direct) 2099 { 2100 if (cf.const_t) 2101 StoreConstantToCPUPointer(GetConstantRegU32(cf.MipsT()), ptr); 2102 else 2103 armAsm->str(CFGetRegT(cf), PTR(ptr)); 2104 } 2105 else if (action == GTERegisterAccessAction::SignExtend16 || action == GTERegisterAccessAction::ZeroExtend16) 2106 { 2107 const bool sign = (action == GTERegisterAccessAction::SignExtend16); 2108 if (cf.valid_host_t) 2109 { 2110 sign ? armAsm->sxth(RWARG1, CFGetRegT(cf)) : armAsm->uxth(RWARG1, CFGetRegT(cf)); 2111 armAsm->str(RWARG1, PTR(ptr)); 2112 } 2113 else if (cf.const_t) 2114 { 2115 const u16 cv = Truncate16(GetConstantRegU32(cf.MipsT())); 2116 StoreConstantToCPUPointer(sign ? ::SignExtend32(cv) : ::ZeroExtend32(cv), ptr); 2117 } 2118 else 2119 { 2120 Panic("Unsupported setup"); 2121 } 2122 } 2123 else if (action == GTERegisterAccessAction::CallHandler) 2124 { 2125 Flush(FLUSH_FOR_C_CALL); 2126 EmitMov(RWARG1, index); 2127 MoveTToReg(RWARG2, cf); 2128 EmitCall(reinterpret_cast<const void*>(>E::WriteRegister)); 2129 } 2130 else if (action == GTERegisterAccessAction::PushFIFO) 2131 { 2132 // SXY0 <- SXY1 2133 // SXY1 <- SXY2 2134 // SXY2 <- SXYP 2135 DebugAssert(RWRET.GetCode() != RWARG2.GetCode() && RWRET.GetCode() != RWARG3.GetCode()); 2136 armAsm->ldr(RWARG2, PTR(&g_state.gte_regs.SXY1[0])); 2137 armAsm->ldr(RWARG3, PTR(&g_state.gte_regs.SXY2[0])); 2138 armAsm->str(RWARG2, PTR(&g_state.gte_regs.SXY0[0])); 2139 armAsm->str(RWARG3, PTR(&g_state.gte_regs.SXY1[0])); 2140 if (cf.valid_host_t) 2141 armAsm->str(CFGetRegT(cf), PTR(&g_state.gte_regs.SXY2[0])); 2142 else if (cf.const_t) 2143 StoreConstantToCPUPointer(GetConstantRegU32(cf.MipsT()), &g_state.gte_regs.SXY2[0]); 2144 else 2145 Panic("Unsupported setup"); 2146 } 2147 else 2148 { 2149 Panic("Unknown action"); 2150 } 2151 } 2152 2153 void CPU::NewRec::AArch64Compiler::Compile_cop2(CompileFlags cf) 2154 { 2155 TickCount func_ticks; 2156 GTE::InstructionImpl func = GTE::GetInstructionImpl(inst->bits, &func_ticks); 2157 2158 Flush(FLUSH_FOR_C_CALL); 2159 EmitMov(RWARG1, inst->bits & GTE::Instruction::REQUIRED_BITS_MASK); 2160 EmitCall(reinterpret_cast<const void*>(func)); 2161 2162 AddGTETicks(func_ticks); 2163 } 2164 2165 u32 CPU::NewRec::CompileLoadStoreThunk(void* thunk_code, u32 thunk_space, void* code_address, u32 code_size, 2166 TickCount cycles_to_add, TickCount cycles_to_remove, u32 gpr_bitmask, 2167 u8 address_register, u8 data_register, MemoryAccessSize size, bool is_signed, 2168 bool is_load) 2169 { 2170 Assembler arm_asm(static_cast<u8*>(thunk_code), thunk_space); 2171 Assembler* armAsm = &arm_asm; 2172 2173 #ifdef VIXL_DEBUG 2174 vixl::CodeBufferCheckScope asm_check(armAsm, thunk_space, vixl::CodeBufferCheckScope::kDontReserveBufferSpace); 2175 #endif 2176 2177 static constexpr u32 GPR_SIZE = 8; 2178 2179 // save regs 2180 u32 num_gprs = 0; 2181 2182 for (u32 i = 0; i < NUM_HOST_REGS; i++) 2183 { 2184 if ((gpr_bitmask & (1u << i)) && armIsCallerSavedRegister(i) && (!is_load || data_register != i)) 2185 num_gprs++; 2186 } 2187 2188 const u32 stack_size = (((num_gprs + 1) & ~1u) * GPR_SIZE); 2189 2190 // TODO: use stp+ldp, vixl helper? 2191 2192 if (stack_size > 0) 2193 { 2194 armAsm->sub(sp, sp, stack_size); 2195 2196 u32 stack_offset = 0; 2197 for (u32 i = 0; i < NUM_HOST_REGS; i++) 2198 { 2199 if ((gpr_bitmask & (1u << i)) && armIsCallerSavedRegister(i) && (!is_load || data_register != i)) 2200 { 2201 armAsm->str(XRegister(i), MemOperand(sp, stack_offset)); 2202 stack_offset += GPR_SIZE; 2203 } 2204 } 2205 } 2206 2207 if (cycles_to_add != 0) 2208 { 2209 // NOTE: we have to reload here, because memory writes can run DMA, which can screw with cycles 2210 Assert(Assembler::IsImmAddSub(cycles_to_add)); 2211 armAsm->ldr(RWSCRATCH, PTR(&g_state.pending_ticks)); 2212 armAsm->add(RWSCRATCH, RWSCRATCH, cycles_to_add); 2213 armAsm->str(RWSCRATCH, PTR(&g_state.pending_ticks)); 2214 } 2215 2216 if (address_register != static_cast<u8>(RWARG1.GetCode())) 2217 armAsm->mov(RWARG1, WRegister(address_register)); 2218 2219 if (!is_load) 2220 { 2221 if (data_register != static_cast<u8>(RWARG2.GetCode())) 2222 armAsm->mov(RWARG2, WRegister(data_register)); 2223 } 2224 2225 switch (size) 2226 { 2227 case MemoryAccessSize::Byte: 2228 { 2229 armEmitCall(armAsm, 2230 is_load ? reinterpret_cast<const void*>(&Recompiler::Thunks::UncheckedReadMemoryByte) : 2231 reinterpret_cast<const void*>(&Recompiler::Thunks::UncheckedWriteMemoryByte), 2232 false); 2233 } 2234 break; 2235 case MemoryAccessSize::HalfWord: 2236 { 2237 armEmitCall(armAsm, 2238 is_load ? reinterpret_cast<const void*>(&Recompiler::Thunks::UncheckedReadMemoryHalfWord) : 2239 reinterpret_cast<const void*>(&Recompiler::Thunks::UncheckedWriteMemoryHalfWord), 2240 false); 2241 } 2242 break; 2243 case MemoryAccessSize::Word: 2244 { 2245 armEmitCall(armAsm, 2246 is_load ? reinterpret_cast<const void*>(&Recompiler::Thunks::UncheckedReadMemoryWord) : 2247 reinterpret_cast<const void*>(&Recompiler::Thunks::UncheckedWriteMemoryWord), 2248 false); 2249 } 2250 break; 2251 } 2252 2253 if (is_load) 2254 { 2255 const WRegister dst = WRegister(data_register); 2256 switch (size) 2257 { 2258 case MemoryAccessSize::Byte: 2259 { 2260 is_signed ? armAsm->sxtb(dst, RWRET) : armAsm->uxtb(dst, RWRET); 2261 } 2262 break; 2263 case MemoryAccessSize::HalfWord: 2264 { 2265 is_signed ? armAsm->sxth(dst, RWRET) : armAsm->uxth(dst, RWRET); 2266 } 2267 break; 2268 case MemoryAccessSize::Word: 2269 { 2270 if (dst.GetCode() != RWRET.GetCode()) 2271 armAsm->mov(dst, RWRET); 2272 } 2273 break; 2274 } 2275 } 2276 2277 if (cycles_to_remove != 0) 2278 { 2279 Assert(Assembler::IsImmAddSub(cycles_to_remove)); 2280 armAsm->ldr(RWSCRATCH, PTR(&g_state.pending_ticks)); 2281 armAsm->sub(RWSCRATCH, RWSCRATCH, cycles_to_remove); 2282 armAsm->str(RWSCRATCH, PTR(&g_state.pending_ticks)); 2283 } 2284 2285 // restore regs 2286 if (stack_size > 0) 2287 { 2288 u32 stack_offset = 0; 2289 for (u32 i = 0; i < NUM_HOST_REGS; i++) 2290 { 2291 if ((gpr_bitmask & (1u << i)) && armIsCallerSavedRegister(i) && (!is_load || data_register != i)) 2292 { 2293 armAsm->ldr(XRegister(i), MemOperand(sp, stack_offset)); 2294 stack_offset += GPR_SIZE; 2295 } 2296 } 2297 2298 armAsm->add(sp, sp, stack_size); 2299 } 2300 2301 armEmitJmp(armAsm, static_cast<const u8*>(code_address) + code_size, true); 2302 armAsm->FinalizeCode(); 2303 2304 return static_cast<u32>(armAsm->GetCursorOffset()); 2305 } 2306 2307 #endif // CPU_ARCH_ARM64