cpu_newrec_compiler_aarch32.cpp (70516B)
1 // SPDX-FileCopyrightText: 2024 Connor McLaughlin <stenzek@gmail.com> 2 // SPDX-License-Identifier: (GPL-3.0 OR CC-BY-NC-ND-4.0) 3 4 #include "cpu_newrec_compiler_aarch32.h" 5 #include "common/align.h" 6 #include "common/assert.h" 7 #include "common/log.h" 8 #include "common/string_util.h" 9 #include "cpu_core_private.h" 10 #include "cpu_pgxp.h" 11 #include "cpu_recompiler_thunks.h" 12 #include "cpu_recompiler_types.h" 13 #include "gte.h" 14 #include "settings.h" 15 #include "timing_event.h" 16 #include <limits> 17 18 #ifdef CPU_ARCH_ARM32 19 20 Log_SetChannel(CPU::NewRec); 21 22 #define PTR(x) vixl::aarch32::MemOperand(RSTATE, (((u8*)(x)) - ((u8*)&g_state))) 23 #define RMEMBASE vixl::aarch32::r3 24 25 namespace CPU::NewRec { 26 27 using namespace vixl::aarch32; 28 29 using CPU::Recompiler::armEmitCall; 30 using CPU::Recompiler::armEmitCondBranch; 31 using CPU::Recompiler::armEmitFarLoad; 32 using CPU::Recompiler::armEmitJmp; 33 using CPU::Recompiler::armEmitMov; 34 using CPU::Recompiler::armGetJumpTrampoline; 35 using CPU::Recompiler::armGetPCDisplacement; 36 using CPU::Recompiler::armIsCallerSavedRegister; 37 using CPU::Recompiler::armIsPCDisplacementInImmediateRange; 38 using CPU::Recompiler::armMoveAddressToReg; 39 40 AArch32Compiler s_instance; 41 Compiler* g_compiler = &s_instance; 42 43 } // namespace CPU::NewRec 44 45 CPU::NewRec::AArch32Compiler::AArch32Compiler() : m_emitter(A32), m_far_emitter(A32) 46 { 47 } 48 49 CPU::NewRec::AArch32Compiler::~AArch32Compiler() = default; 50 51 const void* CPU::NewRec::AArch32Compiler::GetCurrentCodePointer() 52 { 53 return armAsm->GetCursorAddress<const void*>(); 54 } 55 56 void CPU::NewRec::AArch32Compiler::Reset(CodeCache::Block* block, u8* code_buffer, u32 code_buffer_space, 57 u8* far_code_buffer, u32 far_code_space) 58 { 59 Compiler::Reset(block, code_buffer, code_buffer_space, far_code_buffer, far_code_space); 60 61 // TODO: don't recreate this every time.. 62 DebugAssert(!armAsm); 63 m_emitter.GetBuffer()->Reset(code_buffer, code_buffer_space); 64 m_far_emitter.GetBuffer()->Reset(far_code_buffer, far_code_space); 65 armAsm = &m_emitter; 66 67 #ifdef VIXL_DEBUG 68 m_emitter_check = std::make_unique<vixl::CodeBufferCheckScope>(m_emitter.get(), code_buffer_space, 69 vixl::CodeBufferCheckScope::kDontReserveBufferSpace); 70 m_far_emitter_check = std::make_unique<vixl::CodeBufferCheckScope>( 71 m_far_emitter.get(), far_code_space, vixl::CodeBufferCheckScope::kDontReserveBufferSpace); 72 #endif 73 74 // Need to wipe it out so it's correct when toggling fastmem. 75 m_host_regs = {}; 76 77 const u32 membase_idx = 78 (CodeCache::IsUsingFastmem() && block->HasFlag(CodeCache::BlockFlags::ContainsLoadStoreInstructions)) ? 79 RMEMBASE.GetCode() : 80 NUM_HOST_REGS; 81 for (u32 i = 0; i < NUM_HOST_REGS; i++) 82 { 83 HostRegAlloc& ra = m_host_regs[i]; 84 85 if (i == RARG1.GetCode() || i == RARG2.GetCode() || i == RARG3.GetCode() || i == RSCRATCH.GetCode() || 86 i == RSTATE.GetCode() || i == membase_idx || i == sp.GetCode() || i == pc.GetCode()) 87 { 88 continue; 89 } 90 91 ra.flags = HR_USABLE | (armIsCallerSavedRegister(i) ? 0 : HR_CALLEE_SAVED); 92 } 93 } 94 95 void CPU::NewRec::AArch32Compiler::SwitchToFarCode(bool emit_jump, vixl::aarch32::ConditionType cond) 96 { 97 DebugAssert(armAsm == &m_emitter); 98 if (emit_jump) 99 { 100 const s32 disp = armGetPCDisplacement(GetCurrentCodePointer(), m_far_emitter.GetCursorAddress<const void*>()); 101 if (armIsPCDisplacementInImmediateRange(disp)) 102 { 103 Label ldisp(armAsm->GetCursorOffset() + disp); 104 armAsm->b(cond, &ldisp); 105 } 106 else if (cond != vixl::aarch32::al) 107 { 108 Label skip; 109 armAsm->b(Condition(cond).Negate(), &skip); 110 armEmitJmp(armAsm, m_far_emitter.GetCursorAddress<const void*>(), true); 111 armAsm->bind(&skip); 112 } 113 else 114 { 115 armEmitJmp(armAsm, m_far_emitter.GetCursorAddress<const void*>(), true); 116 } 117 } 118 armAsm = &m_far_emitter; 119 } 120 121 void CPU::NewRec::AArch32Compiler::SwitchToFarCodeIfBitSet(const vixl::aarch32::Register& reg, u32 bit) 122 { 123 armAsm->tst(reg, 1u << bit); 124 125 const s32 disp = armGetPCDisplacement(GetCurrentCodePointer(), m_far_emitter.GetCursorAddress<const void*>()); 126 if (armIsPCDisplacementInImmediateRange(disp)) 127 { 128 Label ldisp(armAsm->GetCursorOffset() + disp); 129 armAsm->b(ne, &ldisp); 130 } 131 else 132 { 133 Label skip; 134 armAsm->b(eq, &skip); 135 armEmitJmp(armAsm, m_far_emitter.GetCursorAddress<const void*>(), true); 136 armAsm->bind(&skip); 137 } 138 139 armAsm = &m_far_emitter; 140 } 141 142 void CPU::NewRec::AArch32Compiler::SwitchToFarCodeIfRegZeroOrNonZero(const vixl::aarch32::Register& reg, bool nonzero) 143 { 144 armAsm->cmp(reg, 0); 145 146 const s32 disp = armGetPCDisplacement(GetCurrentCodePointer(), m_far_emitter.GetCursorAddress<const void*>()); 147 if (armIsPCDisplacementInImmediateRange(disp)) 148 { 149 Label ldisp(armAsm->GetCursorOffset() + disp); 150 nonzero ? armAsm->b(ne, &ldisp) : armAsm->b(eq, &ldisp); 151 } 152 else 153 { 154 Label skip; 155 nonzero ? armAsm->b(eq, &skip) : armAsm->b(ne, &skip); 156 armEmitJmp(armAsm, m_far_emitter.GetCursorAddress<const void*>(), true); 157 armAsm->bind(&skip); 158 } 159 160 armAsm = &m_far_emitter; 161 } 162 163 void CPU::NewRec::AArch32Compiler::SwitchToNearCode(bool emit_jump, vixl::aarch32::ConditionType cond) 164 { 165 DebugAssert(armAsm == &m_far_emitter); 166 if (emit_jump) 167 { 168 const s32 disp = armGetPCDisplacement(GetCurrentCodePointer(), m_emitter.GetCursorAddress<const void*>()); 169 if (armIsPCDisplacementInImmediateRange(disp)) 170 { 171 Label ldisp(armAsm->GetCursorOffset() + disp); 172 armAsm->b(cond, &ldisp); 173 } 174 else if (cond != vixl::aarch32::al) 175 { 176 Label skip; 177 armAsm->b(Condition(cond).Negate(), &skip); 178 armEmitJmp(armAsm, m_far_emitter.GetCursorAddress<const void*>(), true); 179 armAsm->bind(&skip); 180 } 181 else 182 { 183 armEmitJmp(armAsm, m_far_emitter.GetCursorAddress<const void*>(), true); 184 } 185 } 186 armAsm = &m_emitter; 187 } 188 189 void CPU::NewRec::AArch32Compiler::EmitMov(const vixl::aarch32::Register& dst, u32 val) 190 { 191 armEmitMov(armAsm, dst, val); 192 } 193 194 void CPU::NewRec::AArch32Compiler::EmitCall(const void* ptr, bool force_inline /*= false*/) 195 { 196 armEmitCall(armAsm, ptr, force_inline); 197 } 198 199 vixl::aarch32::Operand CPU::NewRec::AArch32Compiler::armCheckAddSubConstant(s32 val) 200 { 201 if (ImmediateA32::IsImmediateA32(static_cast<u32>(val))) 202 return vixl::aarch32::Operand(static_cast<int32_t>(val)); 203 204 EmitMov(RSCRATCH, static_cast<u32>(val)); 205 return vixl::aarch32::Operand(RSCRATCH); 206 } 207 208 vixl::aarch32::Operand CPU::NewRec::AArch32Compiler::armCheckAddSubConstant(u32 val) 209 { 210 return armCheckAddSubConstant(static_cast<s32>(val)); 211 } 212 213 vixl::aarch32::Operand CPU::NewRec::AArch32Compiler::armCheckCompareConstant(s32 val) 214 { 215 return armCheckAddSubConstant(val); 216 } 217 218 vixl::aarch32::Operand CPU::NewRec::AArch32Compiler::armCheckLogicalConstant(u32 val) 219 { 220 return armCheckAddSubConstant(val); 221 } 222 223 void CPU::NewRec::AArch32Compiler::BeginBlock() 224 { 225 Compiler::BeginBlock(); 226 } 227 228 void CPU::NewRec::AArch32Compiler::GenerateBlockProtectCheck(const u8* ram_ptr, const u8* shadow_ptr, u32 size) 229 { 230 // store it first to reduce code size, because we can offset 231 armMoveAddressToReg(armAsm, RARG1, ram_ptr); 232 armMoveAddressToReg(armAsm, RARG2, shadow_ptr); 233 234 u32 offset = 0; 235 Label block_changed; 236 237 #if 0 238 /* TODO: Vectorize 239 #include <arm_neon.h> 240 #include <stdint.h> 241 242 bool foo(const void* a, const void* b) 243 { 244 uint8x16_t v1 = vld1q_u8((const uint8_t*)a); 245 uint8x16_t v2 = vld1q_u8((const uint8_t*)b); 246 uint8x16_t v3 = vld1q_u8((const uint8_t*)a + 16); 247 uint8x16_t v4 = vld1q_u8((const uint8_t*)a + 16); 248 uint8x16_t r = vceqq_u8(v1, v2); 249 uint8x16_t r2 = vceqq_u8(v2, v3); 250 uint8x16_t r3 = vandq_u8(r, r2); 251 uint32x2_t rr = vpmin_u32(vget_low_u32(vreinterpretq_u32_u8(r3)), vget_high_u32(vreinterpretq_u32_u8(r3))); 252 if ((vget_lane_u32(rr, 0) & vget_lane_u32(rr, 1)) != 0xFFFFFFFFu) 253 return false; 254 else 255 return true; 256 } 257 */ 258 bool first = true; 259 260 while (size >= 16) 261 { 262 const VRegister vtmp = a32::v2.V4S(); 263 const VRegister dst = first ? a32::v0.V4S() : a32::v1.V4S(); 264 m_emit->ldr(dst, a32::MemOperand(RXARG1, offset)); 265 m_emit->ldr(vtmp, a32::MemOperand(RXARG2, offset)); 266 m_emit->cmeq(dst, dst, vtmp); 267 if (!first) 268 m_emit->and_(dst.V16B(), dst.V16B(), vtmp.V16B()); 269 else 270 first = false; 271 272 offset += 16; 273 size -= 16; 274 } 275 276 if (!first) 277 { 278 // TODO: make sure this doesn't choke on ffffffff 279 armAsm->uminv(a32::s0, a32::v0.V4S()); 280 armAsm->fcmp(a32::s0, 0.0); 281 armAsm->b(&block_changed, a32::eq); 282 } 283 #endif 284 285 while (size >= 4) 286 { 287 armAsm->ldr(RARG3, MemOperand(RARG1, offset)); 288 armAsm->ldr(RSCRATCH, MemOperand(RARG2, offset)); 289 armAsm->cmp(RARG3, RSCRATCH); 290 armAsm->b(ne, &block_changed); 291 offset += 4; 292 size -= 4; 293 } 294 295 DebugAssert(size == 0); 296 297 Label block_unchanged; 298 armAsm->b(&block_unchanged); 299 armAsm->bind(&block_changed); 300 armEmitJmp(armAsm, CodeCache::g_discard_and_recompile_block, false); 301 armAsm->bind(&block_unchanged); 302 } 303 304 void CPU::NewRec::AArch32Compiler::GenerateICacheCheckAndUpdate() 305 { 306 if (!m_block->HasFlag(CodeCache::BlockFlags::IsUsingICache)) 307 { 308 if (m_block->HasFlag(CodeCache::BlockFlags::NeedsDynamicFetchTicks)) 309 { 310 armEmitFarLoad(armAsm, RARG2, GetFetchMemoryAccessTimePtr()); 311 armAsm->ldr(RARG1, PTR(&g_state.pending_ticks)); 312 armEmitMov(armAsm, RARG3, m_block->size); 313 armAsm->mul(RARG2, RARG2, RARG3); 314 armAsm->add(RARG1, RARG1, RARG2); 315 armAsm->str(RARG1, PTR(&g_state.pending_ticks)); 316 } 317 else 318 { 319 armAsm->ldr(RARG1, PTR(&g_state.pending_ticks)); 320 armAsm->add(RARG1, RARG1, armCheckAddSubConstant(static_cast<u32>(m_block->uncached_fetch_ticks))); 321 armAsm->str(RARG1, PTR(&g_state.pending_ticks)); 322 } 323 } 324 else if (m_block->icache_line_count > 0) 325 { 326 const auto& ticks_reg = RARG1; 327 const auto& current_tag_reg = RARG2; 328 const auto& existing_tag_reg = RARG3; 329 330 VirtualMemoryAddress current_pc = m_block->pc & ICACHE_TAG_ADDRESS_MASK; 331 armAsm->ldr(ticks_reg, PTR(&g_state.pending_ticks)); 332 armEmitMov(armAsm, current_tag_reg, current_pc); 333 334 for (u32 i = 0; i < m_block->icache_line_count; i++, current_pc += ICACHE_LINE_SIZE) 335 { 336 const TickCount fill_ticks = GetICacheFillTicks(current_pc); 337 if (fill_ticks <= 0) 338 continue; 339 340 const u32 line = GetICacheLine(current_pc); 341 const u32 offset = OFFSETOF(State, icache_tags) + (line * sizeof(u32)); 342 343 Label cache_hit; 344 armAsm->ldr(existing_tag_reg, MemOperand(RSTATE, offset)); 345 armAsm->cmp(existing_tag_reg, current_tag_reg); 346 armAsm->b(eq, &cache_hit); 347 348 armAsm->str(current_tag_reg, MemOperand(RSTATE, offset)); 349 armAsm->add(ticks_reg, ticks_reg, armCheckAddSubConstant(static_cast<u32>(fill_ticks))); 350 armAsm->bind(&cache_hit); 351 352 if (i != (m_block->icache_line_count - 1)) 353 armAsm->add(current_tag_reg, current_tag_reg, armCheckAddSubConstant(ICACHE_LINE_SIZE)); 354 } 355 356 armAsm->str(ticks_reg, PTR(&g_state.pending_ticks)); 357 } 358 } 359 360 void CPU::NewRec::AArch32Compiler::GenerateCall(const void* func, s32 arg1reg /*= -1*/, s32 arg2reg /*= -1*/, 361 s32 arg3reg /*= -1*/) 362 { 363 if (arg1reg >= 0 && arg1reg != static_cast<s32>(RARG1.GetCode())) 364 armAsm->mov(RARG1, Register(arg1reg)); 365 if (arg2reg >= 0 && arg2reg != static_cast<s32>(RARG2.GetCode())) 366 armAsm->mov(RARG2, Register(arg2reg)); 367 if (arg3reg >= 0 && arg3reg != static_cast<s32>(RARG3.GetCode())) 368 armAsm->mov(RARG3, Register(arg3reg)); 369 EmitCall(func); 370 } 371 372 void CPU::NewRec::AArch32Compiler::EndBlock(const std::optional<u32>& newpc, bool do_event_test) 373 { 374 if (newpc.has_value()) 375 { 376 if (m_dirty_pc || m_compiler_pc != newpc) 377 { 378 EmitMov(RSCRATCH, newpc.value()); 379 armAsm->str(RSCRATCH, PTR(&g_state.pc)); 380 } 381 } 382 m_dirty_pc = false; 383 384 // flush regs 385 Flush(FLUSH_END_BLOCK); 386 EndAndLinkBlock(newpc, do_event_test, false); 387 } 388 389 void CPU::NewRec::AArch32Compiler::EndBlockWithException(Exception excode) 390 { 391 // flush regs, but not pc, it's going to get overwritten 392 // flush cycles because of the GTE instruction stuff... 393 Flush(FLUSH_END_BLOCK | FLUSH_FOR_EXCEPTION | FLUSH_FOR_C_CALL); 394 395 // TODO: flush load delay 396 // TODO: break for pcdrv 397 398 EmitMov(RARG1, Cop0Registers::CAUSE::MakeValueForException(excode, m_current_instruction_branch_delay_slot, false, 399 inst->cop.cop_n)); 400 EmitMov(RARG2, m_current_instruction_pc); 401 EmitCall(reinterpret_cast<const void*>(static_cast<void (*)(u32, u32)>(&CPU::RaiseException))); 402 m_dirty_pc = false; 403 404 EndAndLinkBlock(std::nullopt, true, false); 405 } 406 407 void CPU::NewRec::AArch32Compiler::EndAndLinkBlock(const std::optional<u32>& newpc, bool do_event_test, 408 bool force_run_events) 409 { 410 // event test 411 // pc should've been flushed 412 DebugAssert(!m_dirty_pc && !m_block_ended); 413 m_block_ended = true; 414 415 // TODO: try extracting this to a function 416 417 // save cycles for event test 418 const TickCount cycles = std::exchange(m_cycles, 0); 419 420 // pending_ticks += cycles 421 // if (pending_ticks >= downcount) { dispatch_event(); } 422 if (do_event_test || m_gte_done_cycle > cycles || cycles > 0) 423 armAsm->ldr(RARG1, PTR(&g_state.pending_ticks)); 424 if (do_event_test) 425 armAsm->ldr(RARG2, PTR(&g_state.downcount)); 426 if (cycles > 0) 427 armAsm->add(RARG1, RARG1, armCheckAddSubConstant(cycles)); 428 if (m_gte_done_cycle > cycles) 429 { 430 armAsm->add(RARG2, RARG1, armCheckAddSubConstant(m_gte_done_cycle - cycles)); 431 armAsm->str(RARG2, PTR(&g_state.gte_completion_tick)); 432 } 433 if (do_event_test) 434 armAsm->cmp(RARG1, RARG2); 435 if (cycles > 0) 436 armAsm->str(RARG1, PTR(&g_state.pending_ticks)); 437 if (do_event_test) 438 armEmitCondBranch(armAsm, ge, CodeCache::g_run_events_and_dispatch); 439 440 // jump to dispatcher or next block 441 if (force_run_events) 442 { 443 armEmitJmp(armAsm, CodeCache::g_run_events_and_dispatch, false); 444 } 445 else if (!newpc.has_value()) 446 { 447 armEmitJmp(armAsm, CodeCache::g_dispatcher, false); 448 } 449 else 450 { 451 if (newpc.value() == m_block->pc) 452 { 453 // Special case: ourselves! No need to backlink then. 454 DEBUG_LOG("Linking block at {:08X} to self", m_block->pc); 455 armEmitJmp(armAsm, armAsm->GetBuffer()->GetStartAddress<const void*>(), true); 456 } 457 else 458 { 459 const void* target = CodeCache::CreateBlockLink(m_block, armAsm->GetCursorAddress<void*>(), newpc.value()); 460 armEmitJmp(armAsm, target, true); 461 } 462 } 463 } 464 465 const void* CPU::NewRec::AArch32Compiler::EndCompile(u32* code_size, u32* far_code_size) 466 { 467 #ifdef VIXL_DEBUG 468 m_emitter_check.reset(); 469 m_far_emitter_check.reset(); 470 #endif 471 472 m_emitter.FinalizeCode(); 473 m_far_emitter.FinalizeCode(); 474 475 u8* const code = m_emitter.GetBuffer()->GetStartAddress<u8*>(); 476 *code_size = static_cast<u32>(m_emitter.GetCursorOffset()); 477 *far_code_size = static_cast<u32>(m_far_emitter.GetCursorOffset()); 478 armAsm = nullptr; 479 return code; 480 } 481 482 const char* CPU::NewRec::AArch32Compiler::GetHostRegName(u32 reg) const 483 { 484 static constexpr std::array<const char*, 32> reg64_names = { 485 {"x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15", 486 "x16", "x17", "x18", "x19", "x20", "x21", "x22", "x23", "x24", "x25", "x26", "x27", "x28", "fp", "lr", "sp"}}; 487 return (reg < reg64_names.size()) ? reg64_names[reg] : "UNKNOWN"; 488 } 489 490 void CPU::NewRec::AArch32Compiler::LoadHostRegWithConstant(u32 reg, u32 val) 491 { 492 EmitMov(Register(reg), val); 493 } 494 495 void CPU::NewRec::AArch32Compiler::LoadHostRegFromCPUPointer(u32 reg, const void* ptr) 496 { 497 armAsm->ldr(Register(reg), PTR(ptr)); 498 } 499 500 void CPU::NewRec::AArch32Compiler::StoreHostRegToCPUPointer(u32 reg, const void* ptr) 501 { 502 armAsm->str(Register(reg), PTR(ptr)); 503 } 504 505 void CPU::NewRec::AArch32Compiler::StoreConstantToCPUPointer(u32 val, const void* ptr) 506 { 507 EmitMov(RSCRATCH, val); 508 armAsm->str(RSCRATCH, PTR(ptr)); 509 } 510 511 void CPU::NewRec::AArch32Compiler::CopyHostReg(u32 dst, u32 src) 512 { 513 if (src != dst) 514 armAsm->mov(Register(dst), Register(src)); 515 } 516 517 void CPU::NewRec::AArch32Compiler::AssertRegOrConstS(CompileFlags cf) const 518 { 519 DebugAssert(cf.valid_host_s || cf.const_s); 520 } 521 522 void CPU::NewRec::AArch32Compiler::AssertRegOrConstT(CompileFlags cf) const 523 { 524 DebugAssert(cf.valid_host_t || cf.const_t); 525 } 526 527 vixl::aarch32::MemOperand CPU::NewRec::AArch32Compiler::MipsPtr(Reg r) const 528 { 529 DebugAssert(r < Reg::count); 530 return PTR(&g_state.regs.r[static_cast<u32>(r)]); 531 } 532 533 vixl::aarch32::Register CPU::NewRec::AArch32Compiler::CFGetRegD(CompileFlags cf) const 534 { 535 DebugAssert(cf.valid_host_d); 536 return Register(cf.host_d); 537 } 538 539 vixl::aarch32::Register CPU::NewRec::AArch32Compiler::CFGetRegS(CompileFlags cf) const 540 { 541 DebugAssert(cf.valid_host_s); 542 return Register(cf.host_s); 543 } 544 545 vixl::aarch32::Register CPU::NewRec::AArch32Compiler::CFGetRegT(CompileFlags cf) const 546 { 547 DebugAssert(cf.valid_host_t); 548 return Register(cf.host_t); 549 } 550 551 vixl::aarch32::Register CPU::NewRec::AArch32Compiler::CFGetRegLO(CompileFlags cf) const 552 { 553 DebugAssert(cf.valid_host_lo); 554 return Register(cf.host_lo); 555 } 556 557 vixl::aarch32::Register CPU::NewRec::AArch32Compiler::CFGetRegHI(CompileFlags cf) const 558 { 559 DebugAssert(cf.valid_host_hi); 560 return Register(cf.host_hi); 561 } 562 563 vixl::aarch32::Register CPU::NewRec::AArch32Compiler::GetMembaseReg() 564 { 565 const u32 code = RMEMBASE.GetCode(); 566 if (!IsHostRegAllocated(code)) 567 { 568 // Leave usable unset, so we don't try to allocate it later. 569 m_host_regs[code].type = HR_TYPE_MEMBASE; 570 m_host_regs[code].flags = HR_ALLOCATED; 571 armAsm->ldr(RMEMBASE, PTR(&g_state.fastmem_base)); 572 } 573 574 return RMEMBASE; 575 } 576 577 void CPU::NewRec::AArch32Compiler::MoveSToReg(const vixl::aarch32::Register& dst, CompileFlags cf) 578 { 579 if (cf.valid_host_s) 580 { 581 if (cf.host_s != dst.GetCode()) 582 armAsm->mov(dst, Register(cf.host_s)); 583 } 584 else if (cf.const_s) 585 { 586 const u32 cv = GetConstantRegU32(cf.MipsS()); 587 EmitMov(dst, cv); 588 } 589 else 590 { 591 WARNING_LOG("Hit memory path in MoveSToReg() for {}", GetRegName(cf.MipsS())); 592 armAsm->ldr(dst, PTR(&g_state.regs.r[cf.mips_s])); 593 } 594 } 595 596 void CPU::NewRec::AArch32Compiler::MoveTToReg(const vixl::aarch32::Register& dst, CompileFlags cf) 597 { 598 if (cf.valid_host_t) 599 { 600 if (cf.host_t != dst.GetCode()) 601 armAsm->mov(dst, Register(cf.host_t)); 602 } 603 else if (cf.const_t) 604 { 605 const u32 cv = GetConstantRegU32(cf.MipsT()); 606 EmitMov(dst, cv); 607 } 608 else 609 { 610 WARNING_LOG("Hit memory path in MoveTToReg() for {}", GetRegName(cf.MipsT())); 611 armAsm->ldr(dst, PTR(&g_state.regs.r[cf.mips_t])); 612 } 613 } 614 615 void CPU::NewRec::AArch32Compiler::MoveMIPSRegToReg(const vixl::aarch32::Register& dst, Reg reg) 616 { 617 DebugAssert(reg < Reg::count); 618 if (const std::optional<u32> hreg = CheckHostReg(0, Compiler::HR_TYPE_CPU_REG, reg)) 619 armAsm->mov(dst, Register(hreg.value())); 620 else if (HasConstantReg(reg)) 621 EmitMov(dst, GetConstantRegU32(reg)); 622 else 623 armAsm->ldr(dst, MipsPtr(reg)); 624 } 625 626 void CPU::NewRec::AArch32Compiler::GeneratePGXPCallWithMIPSRegs(const void* func, u32 arg1val, 627 Reg arg2reg /* = Reg::count */, 628 Reg arg3reg /* = Reg::count */) 629 { 630 DebugAssert(g_settings.gpu_pgxp_enable); 631 632 Flush(FLUSH_FOR_C_CALL); 633 634 if (arg2reg != Reg::count) 635 MoveMIPSRegToReg(RARG2, arg2reg); 636 if (arg3reg != Reg::count) 637 MoveMIPSRegToReg(RARG3, arg3reg); 638 639 EmitMov(RARG1, arg1val); 640 EmitCall(func); 641 } 642 643 void CPU::NewRec::AArch32Compiler::Flush(u32 flags) 644 { 645 Compiler::Flush(flags); 646 647 if (flags & FLUSH_PC && m_dirty_pc) 648 { 649 StoreConstantToCPUPointer(m_compiler_pc, &g_state.pc); 650 m_dirty_pc = false; 651 } 652 653 if (flags & FLUSH_INSTRUCTION_BITS) 654 { 655 // This sucks, but it's only used for fallbacks. 656 EmitMov(RARG1, inst->bits); 657 EmitMov(RARG2, m_current_instruction_pc); 658 EmitMov(RARG3, m_current_instruction_branch_delay_slot); 659 armAsm->str(RARG1, PTR(&g_state.current_instruction.bits)); 660 armAsm->str(RARG2, PTR(&g_state.current_instruction_pc)); 661 armAsm->strb(RARG3, PTR(&g_state.current_instruction_in_branch_delay_slot)); 662 } 663 664 if (flags & FLUSH_LOAD_DELAY_FROM_STATE && m_load_delay_dirty) 665 { 666 // This sucks :( 667 // TODO: make it a function? 668 armAsm->ldrb(RARG1, PTR(&g_state.load_delay_reg)); 669 armAsm->ldr(RARG2, PTR(&g_state.load_delay_value)); 670 EmitMov(RSCRATCH, OFFSETOF(CPU::State, regs.r[0])); 671 armAsm->add(RARG1, RSCRATCH, vixl::aarch32::Operand(RARG1, LSL, 2)); 672 armAsm->str(RARG2, MemOperand(RSTATE, RARG1)); 673 EmitMov(RSCRATCH, static_cast<u8>(Reg::count)); 674 armAsm->strb(RSCRATCH, PTR(&g_state.load_delay_reg)); 675 m_load_delay_dirty = false; 676 } 677 678 if (flags & FLUSH_LOAD_DELAY && m_load_delay_register != Reg::count) 679 { 680 if (m_load_delay_value_register != NUM_HOST_REGS) 681 FreeHostReg(m_load_delay_value_register); 682 683 EmitMov(RSCRATCH, static_cast<u8>(m_load_delay_register)); 684 armAsm->strb(RSCRATCH, PTR(&g_state.load_delay_reg)); 685 m_load_delay_register = Reg::count; 686 m_load_delay_dirty = true; 687 } 688 689 if (flags & FLUSH_GTE_STALL_FROM_STATE && m_dirty_gte_done_cycle) 690 { 691 // May as well flush cycles while we're here. 692 // GTE spanning blocks is very rare, we _could_ disable this for speed. 693 armAsm->ldr(RARG1, PTR(&g_state.pending_ticks)); 694 armAsm->ldr(RARG2, PTR(&g_state.gte_completion_tick)); 695 if (m_cycles > 0) 696 { 697 armAsm->add(RARG1, RARG1, armCheckAddSubConstant(m_cycles)); 698 m_cycles = 0; 699 } 700 armAsm->cmp(RARG2, RARG1); 701 armAsm->mov(hs, RARG1, RARG2); 702 armAsm->str(RARG1, PTR(&g_state.pending_ticks)); 703 m_dirty_gte_done_cycle = false; 704 } 705 706 if (flags & FLUSH_GTE_DONE_CYCLE && m_gte_done_cycle > m_cycles) 707 { 708 armAsm->ldr(RARG1, PTR(&g_state.pending_ticks)); 709 710 // update cycles at the same time 711 if (flags & FLUSH_CYCLES && m_cycles > 0) 712 { 713 armAsm->add(RARG1, RARG1, armCheckAddSubConstant(m_cycles)); 714 armAsm->str(RARG1, PTR(&g_state.pending_ticks)); 715 m_gte_done_cycle -= m_cycles; 716 m_cycles = 0; 717 } 718 719 armAsm->add(RARG1, RARG1, armCheckAddSubConstant(m_gte_done_cycle)); 720 armAsm->str(RARG1, PTR(&g_state.gte_completion_tick)); 721 m_gte_done_cycle = 0; 722 m_dirty_gte_done_cycle = true; 723 } 724 725 if (flags & FLUSH_CYCLES && m_cycles > 0) 726 { 727 armAsm->ldr(RARG1, PTR(&g_state.pending_ticks)); 728 armAsm->add(RARG1, RARG1, armCheckAddSubConstant(m_cycles)); 729 armAsm->str(RARG1, PTR(&g_state.pending_ticks)); 730 m_gte_done_cycle = std::max<TickCount>(m_gte_done_cycle - m_cycles, 0); 731 m_cycles = 0; 732 } 733 } 734 735 void CPU::NewRec::AArch32Compiler::Compile_Fallback() 736 { 737 WARNING_LOG("Compiling instruction fallback at PC=0x{:08X}, instruction=0x{:08X}", iinfo->pc, inst->bits); 738 739 Flush(FLUSH_FOR_INTERPRETER); 740 741 EmitCall(reinterpret_cast<const void*>(&CPU::Recompiler::Thunks::InterpretInstruction)); 742 743 // TODO: make me less garbage 744 // TODO: this is wrong, it flushes the load delay on the same cycle when we return. 745 // but nothing should be going through here.. 746 Label no_load_delay; 747 armAsm->ldrb(RARG1, PTR(&g_state.next_load_delay_reg)); 748 armAsm->cmp(RARG1, static_cast<u8>(Reg::count)); 749 armAsm->b(eq, &no_load_delay); 750 armAsm->ldr(RARG2, PTR(&g_state.next_load_delay_value)); 751 armAsm->strb(RARG1, PTR(&g_state.load_delay_reg)); 752 armAsm->str(RARG2, PTR(&g_state.load_delay_value)); 753 EmitMov(RARG1, static_cast<u32>(Reg::count)); 754 armAsm->strb(RARG1, PTR(&g_state.next_load_delay_reg)); 755 armAsm->bind(&no_load_delay); 756 757 m_load_delay_dirty = EMULATE_LOAD_DELAYS; 758 } 759 760 void CPU::NewRec::AArch32Compiler::CheckBranchTarget(const vixl::aarch32::Register& pcreg) 761 { 762 if (!g_settings.cpu_recompiler_memory_exceptions) 763 return; 764 765 armAsm->tst(pcreg, armCheckLogicalConstant(0x3)); 766 SwitchToFarCode(true, ne); 767 768 BackupHostState(); 769 EndBlockWithException(Exception::AdEL); 770 771 RestoreHostState(); 772 SwitchToNearCode(false); 773 } 774 775 void CPU::NewRec::AArch32Compiler::Compile_jr(CompileFlags cf) 776 { 777 const Register pcreg = CFGetRegS(cf); 778 CheckBranchTarget(pcreg); 779 780 armAsm->str(pcreg, PTR(&g_state.pc)); 781 782 CompileBranchDelaySlot(false); 783 EndBlock(std::nullopt, true); 784 } 785 786 void CPU::NewRec::AArch32Compiler::Compile_jalr(CompileFlags cf) 787 { 788 const Register pcreg = CFGetRegS(cf); 789 if (MipsD() != Reg::zero) 790 SetConstantReg(MipsD(), GetBranchReturnAddress(cf)); 791 792 CheckBranchTarget(pcreg); 793 armAsm->str(pcreg, PTR(&g_state.pc)); 794 795 CompileBranchDelaySlot(false); 796 EndBlock(std::nullopt, true); 797 } 798 799 void CPU::NewRec::AArch32Compiler::Compile_bxx(CompileFlags cf, BranchCondition cond) 800 { 801 AssertRegOrConstS(cf); 802 803 const u32 taken_pc = GetConditionalBranchTarget(cf); 804 805 Flush(FLUSH_FOR_BRANCH); 806 807 DebugAssert(cf.valid_host_s); 808 809 // MipsT() here should equal zero for zero branches. 810 DebugAssert(cond == BranchCondition::Equal || cond == BranchCondition::NotEqual || cf.MipsT() == Reg::zero); 811 812 Label taken; 813 const Register rs = CFGetRegS(cf); 814 switch (cond) 815 { 816 case BranchCondition::Equal: 817 case BranchCondition::NotEqual: 818 { 819 AssertRegOrConstT(cf); 820 if (cf.valid_host_t) 821 armAsm->cmp(rs, CFGetRegT(cf)); 822 else if (cf.const_t) 823 armAsm->cmp(rs, armCheckCompareConstant(GetConstantRegU32(cf.MipsT()))); 824 825 armAsm->b((cond == BranchCondition::Equal) ? eq : ne, &taken); 826 } 827 break; 828 829 case BranchCondition::GreaterThanZero: 830 { 831 armAsm->cmp(rs, 0); 832 armAsm->b(gt, &taken); 833 } 834 break; 835 836 case BranchCondition::GreaterEqualZero: 837 { 838 armAsm->cmp(rs, 0); 839 armAsm->b(ge, &taken); 840 } 841 break; 842 843 case BranchCondition::LessThanZero: 844 { 845 armAsm->cmp(rs, 0); 846 armAsm->b(lt, &taken); 847 } 848 break; 849 850 case BranchCondition::LessEqualZero: 851 { 852 armAsm->cmp(rs, 0); 853 armAsm->b(le, &taken); 854 } 855 break; 856 } 857 858 BackupHostState(); 859 if (!cf.delay_slot_swapped) 860 CompileBranchDelaySlot(); 861 862 EndBlock(m_compiler_pc, true); 863 864 armAsm->bind(&taken); 865 866 RestoreHostState(); 867 if (!cf.delay_slot_swapped) 868 CompileBranchDelaySlot(); 869 870 EndBlock(taken_pc, true); 871 } 872 873 void CPU::NewRec::AArch32Compiler::Compile_addi(CompileFlags cf, bool overflow) 874 { 875 const Register rs = CFGetRegS(cf); 876 const Register rt = CFGetRegT(cf); 877 if (const u32 imm = inst->i.imm_sext32(); imm != 0) 878 { 879 if (!overflow) 880 { 881 armAsm->add(rt, rs, armCheckAddSubConstant(imm)); 882 } 883 else 884 { 885 armAsm->adds(rt, rs, armCheckAddSubConstant(imm)); 886 TestOverflow(rt); 887 } 888 } 889 else if (rt.GetCode() != rs.GetCode()) 890 { 891 armAsm->mov(rt, rs); 892 } 893 } 894 895 void CPU::NewRec::AArch32Compiler::Compile_addi(CompileFlags cf) 896 { 897 Compile_addi(cf, g_settings.cpu_recompiler_memory_exceptions); 898 } 899 900 void CPU::NewRec::AArch32Compiler::Compile_addiu(CompileFlags cf) 901 { 902 Compile_addi(cf, false); 903 } 904 905 void CPU::NewRec::AArch32Compiler::Compile_slti(CompileFlags cf) 906 { 907 Compile_slti(cf, true); 908 } 909 910 void CPU::NewRec::AArch32Compiler::Compile_sltiu(CompileFlags cf) 911 { 912 Compile_slti(cf, false); 913 } 914 915 void CPU::NewRec::AArch32Compiler::Compile_slti(CompileFlags cf, bool sign) 916 { 917 const Register rs = CFGetRegS(cf); 918 const Register rt = CFGetRegT(cf); 919 armAsm->cmp(rs, armCheckCompareConstant(static_cast<s32>(inst->i.imm_sext32()))); 920 armAsm->mov(sign ? ge : hs, rt, 0); 921 armAsm->mov(sign ? lt : lo, rt, 1); 922 } 923 924 void CPU::NewRec::AArch32Compiler::Compile_andi(CompileFlags cf) 925 { 926 const Register rt = CFGetRegT(cf); 927 if (const u32 imm = inst->i.imm_zext32(); imm != 0) 928 armAsm->and_(rt, CFGetRegS(cf), armCheckLogicalConstant(imm)); 929 else 930 EmitMov(rt, 0); 931 } 932 933 void CPU::NewRec::AArch32Compiler::Compile_ori(CompileFlags cf) 934 { 935 const Register rt = CFGetRegT(cf); 936 const Register rs = CFGetRegS(cf); 937 if (const u32 imm = inst->i.imm_zext32(); imm != 0) 938 armAsm->orr(rt, rs, armCheckLogicalConstant(imm)); 939 else if (rt.GetCode() != rs.GetCode()) 940 armAsm->mov(rt, rs); 941 } 942 943 void CPU::NewRec::AArch32Compiler::Compile_xori(CompileFlags cf) 944 { 945 const Register rt = CFGetRegT(cf); 946 const Register rs = CFGetRegS(cf); 947 if (const u32 imm = inst->i.imm_zext32(); imm != 0) 948 armAsm->eor(rt, rs, armCheckLogicalConstant(imm)); 949 else if (rt.GetCode() != rs.GetCode()) 950 armAsm->mov(rt, rs); 951 } 952 953 void CPU::NewRec::AArch32Compiler::Compile_shift(CompileFlags cf, 954 void (vixl::aarch32::Assembler::*op)(vixl::aarch32::Register, 955 vixl::aarch32::Register, 956 const Operand&)) 957 { 958 const Register rd = CFGetRegD(cf); 959 const Register rt = CFGetRegT(cf); 960 if (inst->r.shamt > 0) 961 (armAsm->*op)(rd, rt, inst->r.shamt.GetValue()); 962 else if (rd.GetCode() != rt.GetCode()) 963 armAsm->mov(rd, rt); 964 } 965 966 void CPU::NewRec::AArch32Compiler::Compile_sll(CompileFlags cf) 967 { 968 Compile_shift(cf, &Assembler::lsl); 969 } 970 971 void CPU::NewRec::AArch32Compiler::Compile_srl(CompileFlags cf) 972 { 973 Compile_shift(cf, &Assembler::lsr); 974 } 975 976 void CPU::NewRec::AArch32Compiler::Compile_sra(CompileFlags cf) 977 { 978 Compile_shift(cf, &Assembler::asr); 979 } 980 981 void CPU::NewRec::AArch32Compiler::Compile_variable_shift(CompileFlags cf, 982 void (vixl::aarch32::Assembler::*op)(vixl::aarch32::Register, 983 vixl::aarch32::Register, 984 const Operand&)) 985 { 986 const Register rd = CFGetRegD(cf); 987 988 AssertRegOrConstS(cf); 989 AssertRegOrConstT(cf); 990 991 const Register rt = cf.valid_host_t ? CFGetRegT(cf) : RARG2; 992 if (!cf.valid_host_t) 993 MoveTToReg(rt, cf); 994 995 if (cf.const_s) 996 { 997 if (const u32 shift = GetConstantRegU32(cf.MipsS()); shift != 0) 998 (armAsm->*op)(rd, rt, shift); 999 else if (rd.GetCode() != rt.GetCode()) 1000 armAsm->mov(rd, rt); 1001 } 1002 else 1003 { 1004 (armAsm->*op)(rd, rt, CFGetRegS(cf)); 1005 } 1006 } 1007 1008 void CPU::NewRec::AArch32Compiler::Compile_sllv(CompileFlags cf) 1009 { 1010 Compile_variable_shift(cf, &Assembler::lsl); 1011 } 1012 1013 void CPU::NewRec::AArch32Compiler::Compile_srlv(CompileFlags cf) 1014 { 1015 Compile_variable_shift(cf, &Assembler::lsr); 1016 } 1017 1018 void CPU::NewRec::AArch32Compiler::Compile_srav(CompileFlags cf) 1019 { 1020 Compile_variable_shift(cf, &Assembler::asr); 1021 } 1022 1023 void CPU::NewRec::AArch32Compiler::Compile_mult(CompileFlags cf, bool sign) 1024 { 1025 const Register rs = cf.valid_host_s ? CFGetRegS(cf) : RARG1; 1026 if (!cf.valid_host_s) 1027 MoveSToReg(rs, cf); 1028 1029 const Register rt = cf.valid_host_t ? CFGetRegT(cf) : RARG2; 1030 if (!cf.valid_host_t) 1031 MoveTToReg(rt, cf); 1032 1033 // TODO: if lo/hi gets killed, we can use a 32-bit multiply 1034 const Register lo = CFGetRegLO(cf); 1035 const Register hi = CFGetRegHI(cf); 1036 1037 (sign) ? armAsm->smull(lo, hi, rs, rt) : armAsm->umull(lo, hi, rs, rt); 1038 } 1039 1040 void CPU::NewRec::AArch32Compiler::Compile_mult(CompileFlags cf) 1041 { 1042 Compile_mult(cf, true); 1043 } 1044 1045 void CPU::NewRec::AArch32Compiler::Compile_multu(CompileFlags cf) 1046 { 1047 Compile_mult(cf, false); 1048 } 1049 1050 void CPU::NewRec::AArch32Compiler::Compile_div(CompileFlags cf) 1051 { 1052 const Register rs = cf.valid_host_s ? CFGetRegS(cf) : RARG1; 1053 if (!cf.valid_host_s) 1054 MoveSToReg(rs, cf); 1055 1056 const Register rt = cf.valid_host_t ? CFGetRegT(cf) : RARG2; 1057 if (!cf.valid_host_t) 1058 MoveTToReg(rt, cf); 1059 1060 const Register rlo = CFGetRegLO(cf); 1061 const Register rhi = CFGetRegHI(cf); 1062 1063 // TODO: This could be slightly more optimal 1064 Label done; 1065 Label not_divide_by_zero; 1066 armAsm->cmp(rt, 0); 1067 armAsm->b(ne, ¬_divide_by_zero); 1068 armAsm->mov(rhi, rs); // hi = num 1069 EmitMov(rlo, 1); 1070 EmitMov(RSCRATCH, static_cast<u32>(-1)); 1071 armAsm->cmp(rs, 0); 1072 armAsm->mov(ge, rlo, RSCRATCH); // lo = s >= 0 ? -1 : 1 1073 armAsm->b(&done); 1074 1075 armAsm->bind(¬_divide_by_zero); 1076 Label not_unrepresentable; 1077 armAsm->cmp(rs, armCheckCompareConstant(static_cast<s32>(0x80000000u))); 1078 armAsm->b(ne, ¬_unrepresentable); 1079 armAsm->cmp(rt, armCheckCompareConstant(-1)); 1080 armAsm->b(ne, ¬_unrepresentable); 1081 1082 EmitMov(rlo, 0x80000000u); 1083 EmitMov(rhi, 0); 1084 armAsm->b(&done); 1085 1086 armAsm->bind(¬_unrepresentable); 1087 1088 armAsm->sdiv(rlo, rs, rt); 1089 1090 // TODO: skip when hi is dead 1091 armAsm->mls(rhi, rlo, rt, rs); 1092 1093 armAsm->bind(&done); 1094 } 1095 1096 void CPU::NewRec::AArch32Compiler::Compile_divu(CompileFlags cf) 1097 { 1098 const Register rs = cf.valid_host_s ? CFGetRegS(cf) : RARG1; 1099 if (!cf.valid_host_s) 1100 MoveSToReg(rs, cf); 1101 1102 const Register rt = cf.valid_host_t ? CFGetRegT(cf) : RARG2; 1103 if (!cf.valid_host_t) 1104 MoveTToReg(rt, cf); 1105 1106 const Register rlo = CFGetRegLO(cf); 1107 const Register rhi = CFGetRegHI(cf); 1108 1109 Label done; 1110 Label not_divide_by_zero; 1111 armAsm->cmp(rt, 0); 1112 armAsm->b(ne, ¬_divide_by_zero); 1113 EmitMov(rlo, static_cast<u32>(-1)); 1114 armAsm->mov(rhi, rs); 1115 armAsm->b(&done); 1116 1117 armAsm->bind(¬_divide_by_zero); 1118 1119 armAsm->udiv(rlo, rs, rt); 1120 1121 // TODO: skip when hi is dead 1122 armAsm->mls(rhi, rlo, rt, rs); 1123 1124 armAsm->bind(&done); 1125 } 1126 1127 void CPU::NewRec::AArch32Compiler::TestOverflow(const vixl::aarch32::Register& result) 1128 { 1129 SwitchToFarCode(true, vs); 1130 1131 BackupHostState(); 1132 1133 // toss the result 1134 ClearHostReg(result.GetCode()); 1135 1136 EndBlockWithException(Exception::Ov); 1137 1138 RestoreHostState(); 1139 1140 SwitchToNearCode(false); 1141 } 1142 1143 void CPU::NewRec::AArch32Compiler::Compile_dst_op(CompileFlags cf, 1144 void (vixl::aarch32::Assembler::*op)(vixl::aarch32::Register, 1145 vixl::aarch32::Register, 1146 const Operand&), 1147 bool commutative, bool logical, bool overflow) 1148 { 1149 AssertRegOrConstS(cf); 1150 AssertRegOrConstT(cf); 1151 1152 const Register rd = CFGetRegD(cf); 1153 if (cf.valid_host_s && cf.valid_host_t) 1154 { 1155 (armAsm->*op)(rd, CFGetRegS(cf), CFGetRegT(cf)); 1156 } 1157 else if (commutative && (cf.const_s || cf.const_t)) 1158 { 1159 const Register src = cf.const_s ? CFGetRegT(cf) : CFGetRegS(cf); 1160 if (const u32 cv = GetConstantRegU32(cf.const_s ? cf.MipsS() : cf.MipsT()); cv != 0) 1161 { 1162 (armAsm->*op)(rd, src, logical ? armCheckLogicalConstant(cv) : armCheckAddSubConstant(cv)); 1163 } 1164 else 1165 { 1166 if (rd.GetCode() != src.GetCode()) 1167 armAsm->mov(rd, src); 1168 overflow = false; 1169 } 1170 } 1171 else if (cf.const_s) 1172 { 1173 EmitMov(RSCRATCH, GetConstantRegU32(cf.MipsS())); 1174 (armAsm->*op)(rd, RSCRATCH, CFGetRegT(cf)); 1175 } 1176 else if (cf.const_t) 1177 { 1178 const Register rs = CFGetRegS(cf); 1179 if (const u32 cv = GetConstantRegU32(cf.const_s ? cf.MipsS() : cf.MipsT()); cv != 0) 1180 { 1181 (armAsm->*op)(rd, rs, logical ? armCheckLogicalConstant(cv) : armCheckAddSubConstant(cv)); 1182 } 1183 else 1184 { 1185 if (rd.GetCode() != rs.GetCode()) 1186 armAsm->mov(rd, rs); 1187 overflow = false; 1188 } 1189 } 1190 1191 if (overflow) 1192 TestOverflow(rd); 1193 } 1194 1195 void CPU::NewRec::AArch32Compiler::Compile_add(CompileFlags cf) 1196 { 1197 if (g_settings.cpu_recompiler_memory_exceptions) 1198 Compile_dst_op(cf, &Assembler::adds, true, false, true); 1199 else 1200 Compile_dst_op(cf, &Assembler::add, true, false, false); 1201 } 1202 1203 void CPU::NewRec::AArch32Compiler::Compile_addu(CompileFlags cf) 1204 { 1205 Compile_dst_op(cf, &Assembler::add, true, false, false); 1206 } 1207 1208 void CPU::NewRec::AArch32Compiler::Compile_sub(CompileFlags cf) 1209 { 1210 if (g_settings.cpu_recompiler_memory_exceptions) 1211 Compile_dst_op(cf, &Assembler::subs, false, false, true); 1212 else 1213 Compile_dst_op(cf, &Assembler::sub, false, false, false); 1214 } 1215 1216 void CPU::NewRec::AArch32Compiler::Compile_subu(CompileFlags cf) 1217 { 1218 Compile_dst_op(cf, &Assembler::sub, false, false, false); 1219 } 1220 1221 void CPU::NewRec::AArch32Compiler::Compile_and(CompileFlags cf) 1222 { 1223 AssertRegOrConstS(cf); 1224 AssertRegOrConstT(cf); 1225 1226 // special cases - and with self -> self, and with 0 -> 0 1227 const Register regd = CFGetRegD(cf); 1228 if (cf.MipsS() == cf.MipsT()) 1229 { 1230 armAsm->mov(regd, CFGetRegS(cf)); 1231 return; 1232 } 1233 else if (HasConstantRegValue(cf.MipsS(), 0) || HasConstantRegValue(cf.MipsT(), 0)) 1234 { 1235 EmitMov(regd, 0); 1236 return; 1237 } 1238 1239 Compile_dst_op(cf, &Assembler::and_, true, true, false); 1240 } 1241 1242 void CPU::NewRec::AArch32Compiler::Compile_or(CompileFlags cf) 1243 { 1244 AssertRegOrConstS(cf); 1245 AssertRegOrConstT(cf); 1246 1247 // or/nor with 0 -> no effect 1248 const Register regd = CFGetRegD(cf); 1249 if (HasConstantRegValue(cf.MipsS(), 0) || HasConstantRegValue(cf.MipsT(), 0) || cf.MipsS() == cf.MipsT()) 1250 { 1251 cf.const_s ? MoveTToReg(regd, cf) : MoveSToReg(regd, cf); 1252 return; 1253 } 1254 1255 Compile_dst_op(cf, &Assembler::orr, true, true, false); 1256 } 1257 1258 void CPU::NewRec::AArch32Compiler::Compile_xor(CompileFlags cf) 1259 { 1260 AssertRegOrConstS(cf); 1261 AssertRegOrConstT(cf); 1262 1263 const Register regd = CFGetRegD(cf); 1264 if (cf.MipsS() == cf.MipsT()) 1265 { 1266 // xor with self -> zero 1267 EmitMov(regd, 0); 1268 return; 1269 } 1270 else if (HasConstantRegValue(cf.MipsS(), 0) || HasConstantRegValue(cf.MipsT(), 0)) 1271 { 1272 // xor with zero -> no effect 1273 cf.const_s ? MoveTToReg(regd, cf) : MoveSToReg(regd, cf); 1274 return; 1275 } 1276 1277 Compile_dst_op(cf, &Assembler::eor, true, true, false); 1278 } 1279 1280 void CPU::NewRec::AArch32Compiler::Compile_nor(CompileFlags cf) 1281 { 1282 Compile_or(cf); 1283 armAsm->mvn(CFGetRegD(cf), CFGetRegD(cf)); 1284 } 1285 1286 void CPU::NewRec::AArch32Compiler::Compile_slt(CompileFlags cf) 1287 { 1288 Compile_slt(cf, true); 1289 } 1290 1291 void CPU::NewRec::AArch32Compiler::Compile_sltu(CompileFlags cf) 1292 { 1293 Compile_slt(cf, false); 1294 } 1295 1296 void CPU::NewRec::AArch32Compiler::Compile_slt(CompileFlags cf, bool sign) 1297 { 1298 AssertRegOrConstS(cf); 1299 AssertRegOrConstT(cf); 1300 1301 // TODO: swap and reverse op for constants 1302 if (cf.const_s) 1303 { 1304 EmitMov(RSCRATCH, GetConstantRegS32(cf.MipsS())); 1305 armAsm->cmp(RSCRATCH, CFGetRegT(cf)); 1306 } 1307 else if (cf.const_t) 1308 { 1309 armAsm->cmp(CFGetRegS(cf), armCheckCompareConstant(GetConstantRegS32(cf.MipsT()))); 1310 } 1311 else 1312 { 1313 armAsm->cmp(CFGetRegS(cf), CFGetRegT(cf)); 1314 } 1315 1316 const Register rd = CFGetRegD(cf); 1317 armAsm->mov(sign ? ge : cs, rd, 0); 1318 armAsm->mov(sign ? lt : lo, rd, 1); 1319 } 1320 1321 vixl::aarch32::Register 1322 CPU::NewRec::AArch32Compiler::ComputeLoadStoreAddressArg(CompileFlags cf, 1323 const std::optional<VirtualMemoryAddress>& address, 1324 const std::optional<const vixl::aarch32::Register>& reg) 1325 { 1326 const u32 imm = inst->i.imm_sext32(); 1327 if (cf.valid_host_s && imm == 0 && !reg.has_value()) 1328 return CFGetRegS(cf); 1329 1330 const Register dst = reg.has_value() ? reg.value() : RARG1; 1331 if (address.has_value()) 1332 { 1333 EmitMov(dst, address.value()); 1334 } 1335 else if (imm == 0) 1336 { 1337 if (cf.valid_host_s) 1338 { 1339 if (const Register src = CFGetRegS(cf); src.GetCode() != dst.GetCode()) 1340 armAsm->mov(dst, CFGetRegS(cf)); 1341 } 1342 else 1343 { 1344 armAsm->ldr(dst, MipsPtr(cf.MipsS())); 1345 } 1346 } 1347 else 1348 { 1349 if (cf.valid_host_s) 1350 { 1351 armAsm->add(dst, CFGetRegS(cf), armCheckAddSubConstant(static_cast<s32>(inst->i.imm_sext32()))); 1352 } 1353 else 1354 { 1355 armAsm->ldr(dst, MipsPtr(cf.MipsS())); 1356 armAsm->add(dst, dst, armCheckAddSubConstant(static_cast<s32>(inst->i.imm_sext32()))); 1357 } 1358 } 1359 1360 return dst; 1361 } 1362 1363 template<typename RegAllocFn> 1364 vixl::aarch32::Register CPU::NewRec::AArch32Compiler::GenerateLoad(const vixl::aarch32::Register& addr_reg, 1365 MemoryAccessSize size, bool sign, bool use_fastmem, 1366 const RegAllocFn& dst_reg_alloc) 1367 { 1368 if (use_fastmem) 1369 { 1370 DebugAssert(g_settings.cpu_fastmem_mode == CPUFastmemMode::LUT); 1371 m_cycles += Bus::RAM_READ_TICKS; 1372 1373 const Register dst = dst_reg_alloc(); 1374 const Register membase = GetMembaseReg(); 1375 DebugAssert(addr_reg.GetCode() != RARG3.GetCode()); 1376 armAsm->lsr(RARG3, addr_reg, Bus::FASTMEM_LUT_PAGE_SHIFT); 1377 armAsm->ldr(RARG3, MemOperand(membase, RARG3, LSL, 2)); 1378 1379 const MemOperand mem = MemOperand(RARG3, addr_reg); 1380 u8* start = armAsm->GetCursorAddress<u8*>(); 1381 switch (size) 1382 { 1383 case MemoryAccessSize::Byte: 1384 sign ? armAsm->ldrsb(dst, mem) : armAsm->ldrb(dst, mem); 1385 break; 1386 1387 case MemoryAccessSize::HalfWord: 1388 sign ? armAsm->ldrsh(dst, mem) : armAsm->ldrh(dst, mem); 1389 break; 1390 1391 case MemoryAccessSize::Word: 1392 armAsm->ldr(dst, mem); 1393 break; 1394 } 1395 1396 AddLoadStoreInfo(start, kA32InstructionSizeInBytes, addr_reg.GetCode(), dst.GetCode(), size, sign, true); 1397 return dst; 1398 } 1399 1400 if (addr_reg.GetCode() != RARG1.GetCode()) 1401 armAsm->mov(RARG1, addr_reg); 1402 1403 const bool checked = g_settings.cpu_recompiler_memory_exceptions; 1404 switch (size) 1405 { 1406 case MemoryAccessSize::Byte: 1407 { 1408 EmitCall(checked ? reinterpret_cast<const void*>(&Recompiler::Thunks::ReadMemoryByte) : 1409 reinterpret_cast<const void*>(&Recompiler::Thunks::UncheckedReadMemoryByte)); 1410 } 1411 break; 1412 case MemoryAccessSize::HalfWord: 1413 { 1414 EmitCall(checked ? reinterpret_cast<const void*>(&Recompiler::Thunks::ReadMemoryHalfWord) : 1415 reinterpret_cast<const void*>(&Recompiler::Thunks::UncheckedReadMemoryHalfWord)); 1416 } 1417 break; 1418 case MemoryAccessSize::Word: 1419 { 1420 EmitCall(checked ? reinterpret_cast<const void*>(&Recompiler::Thunks::ReadMemoryWord) : 1421 reinterpret_cast<const void*>(&Recompiler::Thunks::UncheckedReadMemoryWord)); 1422 } 1423 break; 1424 } 1425 1426 // TODO: turn this into an asm function instead 1427 if (checked) 1428 { 1429 SwitchToFarCodeIfBitSet(RRETHI, 31); 1430 BackupHostState(); 1431 1432 // Need to stash this in a temp because of the flush. 1433 const Register temp = Register(AllocateTempHostReg(HR_CALLEE_SAVED)); 1434 armAsm->rsb(temp, RRETHI, 0); 1435 armAsm->lsl(temp, temp, 2); 1436 1437 Flush(FLUSH_FOR_C_CALL | FLUSH_FLUSH_MIPS_REGISTERS | FLUSH_FOR_EXCEPTION); 1438 1439 // cause_bits = (-result << 2) | BD | cop_n 1440 armAsm->orr(RARG1, temp, 1441 armCheckLogicalConstant(Cop0Registers::CAUSE::MakeValueForException( 1442 static_cast<Exception>(0), m_current_instruction_branch_delay_slot, false, inst->cop.cop_n))); 1443 EmitMov(RARG2, m_current_instruction_pc); 1444 EmitCall(reinterpret_cast<const void*>(static_cast<void (*)(u32, u32)>(&CPU::RaiseException))); 1445 FreeHostReg(temp.GetCode()); 1446 EndBlock(std::nullopt, true); 1447 1448 RestoreHostState(); 1449 SwitchToNearCode(false); 1450 } 1451 1452 const Register dst_reg = dst_reg_alloc(); 1453 switch (size) 1454 { 1455 case MemoryAccessSize::Byte: 1456 { 1457 sign ? armAsm->sxtb(dst_reg, RRET) : armAsm->uxtb(dst_reg, RRET); 1458 } 1459 break; 1460 case MemoryAccessSize::HalfWord: 1461 { 1462 sign ? armAsm->sxth(dst_reg, RRET) : armAsm->uxth(dst_reg, RRET); 1463 } 1464 break; 1465 case MemoryAccessSize::Word: 1466 { 1467 if (dst_reg.GetCode() != RRET.GetCode()) 1468 armAsm->mov(dst_reg, RRET); 1469 } 1470 break; 1471 } 1472 1473 return dst_reg; 1474 } 1475 1476 void CPU::NewRec::AArch32Compiler::GenerateStore(const vixl::aarch32::Register& addr_reg, 1477 const vixl::aarch32::Register& value_reg, MemoryAccessSize size, 1478 bool use_fastmem) 1479 { 1480 if (use_fastmem) 1481 { 1482 DebugAssert(g_settings.cpu_fastmem_mode == CPUFastmemMode::LUT); 1483 DebugAssert(addr_reg.GetCode() != RARG3.GetCode()); 1484 const Register membase = GetMembaseReg(); 1485 armAsm->lsr(RARG3, addr_reg, Bus::FASTMEM_LUT_PAGE_SHIFT); 1486 armAsm->ldr(RARG3, MemOperand(membase, RARG3, LSL, 2)); 1487 1488 const MemOperand mem = MemOperand(RARG3, addr_reg); 1489 u8* start = armAsm->GetCursorAddress<u8*>(); 1490 switch (size) 1491 { 1492 case MemoryAccessSize::Byte: 1493 armAsm->strb(value_reg, mem); 1494 break; 1495 1496 case MemoryAccessSize::HalfWord: 1497 armAsm->strh(value_reg, mem); 1498 break; 1499 1500 case MemoryAccessSize::Word: 1501 armAsm->str(value_reg, mem); 1502 break; 1503 } 1504 AddLoadStoreInfo(start, kA32InstructionSizeInBytes, addr_reg.GetCode(), value_reg.GetCode(), size, false, false); 1505 return; 1506 } 1507 1508 if (addr_reg.GetCode() != RARG1.GetCode()) 1509 armAsm->mov(RARG1, addr_reg); 1510 if (value_reg.GetCode() != RARG2.GetCode()) 1511 armAsm->mov(RARG2, value_reg); 1512 1513 const bool checked = g_settings.cpu_recompiler_memory_exceptions; 1514 switch (size) 1515 { 1516 case MemoryAccessSize::Byte: 1517 { 1518 EmitCall(checked ? reinterpret_cast<const void*>(&Recompiler::Thunks::WriteMemoryByte) : 1519 reinterpret_cast<const void*>(&Recompiler::Thunks::UncheckedWriteMemoryByte)); 1520 } 1521 break; 1522 case MemoryAccessSize::HalfWord: 1523 { 1524 EmitCall(checked ? reinterpret_cast<const void*>(&Recompiler::Thunks::WriteMemoryHalfWord) : 1525 reinterpret_cast<const void*>(&Recompiler::Thunks::UncheckedWriteMemoryHalfWord)); 1526 } 1527 break; 1528 case MemoryAccessSize::Word: 1529 { 1530 EmitCall(checked ? reinterpret_cast<const void*>(&Recompiler::Thunks::WriteMemoryWord) : 1531 reinterpret_cast<const void*>(&Recompiler::Thunks::UncheckedWriteMemoryWord)); 1532 } 1533 break; 1534 } 1535 1536 // TODO: turn this into an asm function instead 1537 if (checked) 1538 { 1539 SwitchToFarCodeIfRegZeroOrNonZero(RRET, true); 1540 BackupHostState(); 1541 1542 // Need to stash this in a temp because of the flush. 1543 const Register temp = Register(AllocateTempHostReg(HR_CALLEE_SAVED)); 1544 armAsm->lsl(temp, RRET, 2); 1545 1546 Flush(FLUSH_FOR_C_CALL | FLUSH_FLUSH_MIPS_REGISTERS | FLUSH_FOR_EXCEPTION); 1547 1548 // cause_bits = (result << 2) | BD | cop_n 1549 armAsm->orr(RARG1, temp, 1550 armCheckLogicalConstant(Cop0Registers::CAUSE::MakeValueForException( 1551 static_cast<Exception>(0), m_current_instruction_branch_delay_slot, false, inst->cop.cop_n))); 1552 EmitMov(RARG2, m_current_instruction_pc); 1553 EmitCall(reinterpret_cast<const void*>(static_cast<void (*)(u32, u32)>(&CPU::RaiseException))); 1554 FreeHostReg(temp.GetCode()); 1555 EndBlock(std::nullopt, true); 1556 1557 RestoreHostState(); 1558 SwitchToNearCode(false); 1559 } 1560 } 1561 1562 void CPU::NewRec::AArch32Compiler::Compile_lxx(CompileFlags cf, MemoryAccessSize size, bool sign, bool use_fastmem, 1563 const std::optional<VirtualMemoryAddress>& address) 1564 { 1565 const std::optional<Register> addr_reg = g_settings.gpu_pgxp_enable ? 1566 std::optional<Register>(Register(AllocateTempHostReg(HR_CALLEE_SAVED))) : 1567 std::optional<Register>(); 1568 FlushForLoadStore(address, false, use_fastmem); 1569 const Register addr = ComputeLoadStoreAddressArg(cf, address, addr_reg); 1570 const Register data = GenerateLoad(addr, size, sign, use_fastmem, [this, cf]() { 1571 if (cf.MipsT() == Reg::zero) 1572 return RRET; 1573 1574 return Register(AllocateHostReg(GetFlagsForNewLoadDelayedReg(), 1575 EMULATE_LOAD_DELAYS ? HR_TYPE_NEXT_LOAD_DELAY_VALUE : HR_TYPE_CPU_REG, cf.MipsT())); 1576 }); 1577 1578 if (g_settings.gpu_pgxp_enable) 1579 { 1580 Flush(FLUSH_FOR_C_CALL); 1581 1582 EmitMov(RARG1, inst->bits); 1583 armAsm->mov(RARG2, addr); 1584 armAsm->mov(RARG3, data); 1585 EmitCall(s_pgxp_mem_load_functions[static_cast<u32>(size)][static_cast<u32>(sign)]); 1586 FreeHostReg(addr_reg.value().GetCode()); 1587 } 1588 } 1589 1590 void CPU::NewRec::AArch32Compiler::Compile_lwx(CompileFlags cf, MemoryAccessSize size, bool sign, bool use_fastmem, 1591 const std::optional<VirtualMemoryAddress>& address) 1592 { 1593 DebugAssert(size == MemoryAccessSize::Word && !sign); 1594 1595 const Register addr = Register(AllocateTempHostReg(HR_CALLEE_SAVED)); 1596 FlushForLoadStore(address, false, use_fastmem); 1597 1598 // TODO: if address is constant, this can be simplified.. 1599 1600 // If we're coming from another block, just flush the load delay and hope for the best.. 1601 if (m_load_delay_dirty) 1602 UpdateLoadDelay(); 1603 1604 // We'd need to be careful here if we weren't overwriting it.. 1605 ComputeLoadStoreAddressArg(cf, address, addr); 1606 armAsm->bic(RARG1, addr, 3); 1607 GenerateLoad(RARG1, MemoryAccessSize::Word, false, use_fastmem, []() { return RRET; }); 1608 1609 if (inst->r.rt == Reg::zero) 1610 { 1611 FreeHostReg(addr.GetCode()); 1612 return; 1613 } 1614 1615 // lwl/lwr from a load-delayed value takes the new value, but it itself, is load delayed, so the original value is 1616 // never written back. NOTE: can't trust T in cf because of the flush 1617 const Reg rt = inst->r.rt; 1618 Register value; 1619 if (m_load_delay_register == rt) 1620 { 1621 const u32 existing_ld_rt = (m_load_delay_value_register == NUM_HOST_REGS) ? 1622 AllocateHostReg(HR_MODE_READ, HR_TYPE_LOAD_DELAY_VALUE, rt) : 1623 m_load_delay_value_register; 1624 RenameHostReg(existing_ld_rt, HR_MODE_WRITE, HR_TYPE_NEXT_LOAD_DELAY_VALUE, rt); 1625 value = Register(existing_ld_rt); 1626 } 1627 else 1628 { 1629 if constexpr (EMULATE_LOAD_DELAYS) 1630 { 1631 value = Register(AllocateHostReg(HR_MODE_WRITE, HR_TYPE_NEXT_LOAD_DELAY_VALUE, rt)); 1632 if (const std::optional<u32> rtreg = CheckHostReg(HR_MODE_READ, HR_TYPE_CPU_REG, rt); rtreg.has_value()) 1633 armAsm->mov(value, Register(rtreg.value())); 1634 else if (HasConstantReg(rt)) 1635 EmitMov(value, GetConstantRegU32(rt)); 1636 else 1637 armAsm->ldr(value, MipsPtr(rt)); 1638 } 1639 else 1640 { 1641 value = Register(AllocateHostReg(HR_MODE_READ | HR_MODE_WRITE, HR_TYPE_CPU_REG, rt)); 1642 } 1643 } 1644 1645 DebugAssert(value.GetCode() != RARG2.GetCode() && value.GetCode() != RARG3.GetCode()); 1646 armAsm->and_(RARG2, addr, 3); 1647 armAsm->lsl(RARG2, RARG2, 3); // *8 1648 EmitMov(RARG3, 24); 1649 armAsm->sub(RARG3, RARG3, RARG2); 1650 1651 if (inst->op == InstructionOp::lwl) 1652 { 1653 // const u32 mask = UINT32_C(0x00FFFFFF) >> shift; 1654 // new_value = (value & mask) | (RWRET << (24 - shift)); 1655 EmitMov(RSCRATCH, 0xFFFFFFu); 1656 armAsm->lsr(RSCRATCH, RSCRATCH, RARG2); 1657 armAsm->and_(value, value, RSCRATCH); 1658 armAsm->lsl(RRET, RRET, RARG3); 1659 armAsm->orr(value, value, RRET); 1660 } 1661 else 1662 { 1663 // const u32 mask = UINT32_C(0xFFFFFF00) << (24 - shift); 1664 // new_value = (value & mask) | (RWRET >> shift); 1665 armAsm->lsr(RRET, RRET, RARG2); 1666 EmitMov(RSCRATCH, 0xFFFFFF00u); 1667 armAsm->lsl(RSCRATCH, RSCRATCH, RARG3); 1668 armAsm->and_(value, value, RSCRATCH); 1669 armAsm->orr(value, value, RRET); 1670 } 1671 1672 FreeHostReg(addr.GetCode()); 1673 1674 if (g_settings.gpu_pgxp_enable) 1675 { 1676 Flush(FLUSH_FOR_C_CALL); 1677 armAsm->mov(RARG3, value); 1678 armAsm->bic(RARG2, addr, 3); 1679 EmitMov(RARG1, inst->bits); 1680 EmitCall(reinterpret_cast<const void*>(&PGXP::CPU_LW)); 1681 } 1682 } 1683 1684 void CPU::NewRec::AArch32Compiler::Compile_lwc2(CompileFlags cf, MemoryAccessSize size, bool sign, bool use_fastmem, 1685 const std::optional<VirtualMemoryAddress>& address) 1686 { 1687 const u32 index = static_cast<u32>(inst->r.rt.GetValue()); 1688 const auto [ptr, action] = GetGTERegisterPointer(index, true); 1689 const std::optional<Register> addr_reg = g_settings.gpu_pgxp_enable ? 1690 std::optional<Register>(Register(AllocateTempHostReg(HR_CALLEE_SAVED))) : 1691 std::optional<Register>(); 1692 FlushForLoadStore(address, false, use_fastmem); 1693 const Register addr = ComputeLoadStoreAddressArg(cf, address, addr_reg); 1694 const Register value = GenerateLoad(addr, MemoryAccessSize::Word, false, use_fastmem, [this, action = action]() { 1695 return (action == GTERegisterAccessAction::CallHandler && g_settings.gpu_pgxp_enable) ? 1696 Register(AllocateTempHostReg(HR_CALLEE_SAVED)) : 1697 RRET; 1698 }); 1699 1700 switch (action) 1701 { 1702 case GTERegisterAccessAction::Ignore: 1703 { 1704 break; 1705 } 1706 1707 case GTERegisterAccessAction::Direct: 1708 { 1709 armAsm->str(value, PTR(ptr)); 1710 break; 1711 } 1712 1713 case GTERegisterAccessAction::SignExtend16: 1714 { 1715 armAsm->sxth(RARG3, value); 1716 armAsm->str(RARG3, PTR(ptr)); 1717 break; 1718 } 1719 1720 case GTERegisterAccessAction::ZeroExtend16: 1721 { 1722 armAsm->uxth(RARG3, value); 1723 armAsm->str(RARG3, PTR(ptr)); 1724 break; 1725 } 1726 1727 case GTERegisterAccessAction::CallHandler: 1728 { 1729 Flush(FLUSH_FOR_C_CALL); 1730 armAsm->mov(RARG2, value); 1731 EmitMov(RARG1, index); 1732 EmitCall(reinterpret_cast<const void*>(>E::WriteRegister)); 1733 break; 1734 } 1735 1736 case GTERegisterAccessAction::PushFIFO: 1737 { 1738 // SXY0 <- SXY1 1739 // SXY1 <- SXY2 1740 // SXY2 <- SXYP 1741 DebugAssert(value.GetCode() != RARG2.GetCode() && value.GetCode() != RARG3.GetCode()); 1742 armAsm->ldr(RARG2, PTR(&g_state.gte_regs.SXY1[0])); 1743 armAsm->ldr(RARG3, PTR(&g_state.gte_regs.SXY2[0])); 1744 armAsm->str(RARG2, PTR(&g_state.gte_regs.SXY0[0])); 1745 armAsm->str(RARG3, PTR(&g_state.gte_regs.SXY1[0])); 1746 armAsm->str(value, PTR(&g_state.gte_regs.SXY2[0])); 1747 break; 1748 } 1749 1750 default: 1751 { 1752 Panic("Unknown action"); 1753 return; 1754 } 1755 } 1756 1757 if (g_settings.gpu_pgxp_enable) 1758 { 1759 Flush(FLUSH_FOR_C_CALL); 1760 armAsm->mov(RARG3, value); 1761 if (value.GetCode() != RRET.GetCode()) 1762 FreeHostReg(value.GetCode()); 1763 armAsm->mov(RARG2, addr); 1764 FreeHostReg(addr_reg.value().GetCode()); 1765 EmitMov(RARG1, inst->bits); 1766 EmitCall(reinterpret_cast<const void*>(&PGXP::CPU_LWC2)); 1767 } 1768 } 1769 1770 void CPU::NewRec::AArch32Compiler::Compile_sxx(CompileFlags cf, MemoryAccessSize size, bool sign, bool use_fastmem, 1771 const std::optional<VirtualMemoryAddress>& address) 1772 { 1773 AssertRegOrConstS(cf); 1774 AssertRegOrConstT(cf); 1775 1776 const std::optional<Register> addr_reg = g_settings.gpu_pgxp_enable ? 1777 std::optional<Register>(Register(AllocateTempHostReg(HR_CALLEE_SAVED))) : 1778 std::optional<Register>(); 1779 FlushForLoadStore(address, true, use_fastmem); 1780 const Register addr = ComputeLoadStoreAddressArg(cf, address, addr_reg); 1781 const Register data = cf.valid_host_t ? CFGetRegT(cf) : RARG2; 1782 if (!cf.valid_host_t) 1783 MoveTToReg(RARG2, cf); 1784 1785 GenerateStore(addr, data, size, use_fastmem); 1786 1787 if (g_settings.gpu_pgxp_enable) 1788 { 1789 Flush(FLUSH_FOR_C_CALL); 1790 MoveMIPSRegToReg(RARG3, cf.MipsT()); 1791 armAsm->mov(RARG2, addr); 1792 EmitMov(RARG1, inst->bits); 1793 EmitCall(s_pgxp_mem_store_functions[static_cast<u32>(size)]); 1794 FreeHostReg(addr_reg.value().GetCode()); 1795 } 1796 } 1797 1798 void CPU::NewRec::AArch32Compiler::Compile_swx(CompileFlags cf, MemoryAccessSize size, bool sign, bool use_fastmem, 1799 const std::optional<VirtualMemoryAddress>& address) 1800 { 1801 DebugAssert(size == MemoryAccessSize::Word && !sign); 1802 1803 // TODO: this can take over rt's value if it's no longer needed 1804 // NOTE: can't trust T in cf because of the alloc 1805 const Register addr = Register(AllocateTempHostReg(HR_CALLEE_SAVED)); 1806 const Register value = g_settings.gpu_pgxp_enable ? Register(AllocateTempHostReg(HR_CALLEE_SAVED)) : RARG2; 1807 if (g_settings.gpu_pgxp_enable) 1808 MoveMIPSRegToReg(value, inst->r.rt); 1809 1810 FlushForLoadStore(address, true, use_fastmem); 1811 1812 // TODO: if address is constant, this can be simplified.. 1813 // We'd need to be careful here if we weren't overwriting it.. 1814 ComputeLoadStoreAddressArg(cf, address, addr); 1815 armAsm->bic(RARG1, addr, 3); 1816 GenerateLoad(RARG1, MemoryAccessSize::Word, false, use_fastmem, []() { return RRET; }); 1817 1818 armAsm->and_(RSCRATCH, addr, 3); 1819 armAsm->lsl(RSCRATCH, RSCRATCH, 3); // *8 1820 armAsm->bic(addr, addr, 3); 1821 1822 // Need to load down here for PGXP-off, because it's in a volatile reg that can get overwritten by flush. 1823 if (!g_settings.gpu_pgxp_enable) 1824 MoveMIPSRegToReg(value, inst->r.rt); 1825 1826 if (inst->op == InstructionOp::swl) 1827 { 1828 // const u32 mem_mask = UINT32_C(0xFFFFFF00) << shift; 1829 // new_value = (RWRET & mem_mask) | (value >> (24 - shift)); 1830 EmitMov(RARG3, 0xFFFFFF00u); 1831 armAsm->lsl(RARG3, RARG3, RSCRATCH); 1832 armAsm->and_(RRET, RRET, RARG3); 1833 1834 EmitMov(RARG3, 24); 1835 armAsm->sub(RARG3, RARG3, RSCRATCH); 1836 armAsm->lsr(value, value, RARG3); 1837 armAsm->orr(value, value, RRET); 1838 } 1839 else 1840 { 1841 // const u32 mem_mask = UINT32_C(0x00FFFFFF) >> (24 - shift); 1842 // new_value = (RWRET & mem_mask) | (value << shift); 1843 armAsm->lsl(value, value, RSCRATCH); 1844 1845 EmitMov(RARG3, 24); 1846 armAsm->sub(RARG3, RARG3, RSCRATCH); 1847 EmitMov(RSCRATCH, 0x00FFFFFFu); 1848 armAsm->lsr(RSCRATCH, RSCRATCH, RARG3); 1849 armAsm->and_(RRET, RRET, RSCRATCH); 1850 armAsm->orr(value, value, RRET); 1851 } 1852 1853 if (!g_settings.gpu_pgxp_enable) 1854 { 1855 GenerateStore(addr, value, MemoryAccessSize::Word, use_fastmem); 1856 FreeHostReg(addr.GetCode()); 1857 } 1858 else 1859 { 1860 GenerateStore(addr, value, MemoryAccessSize::Word, use_fastmem); 1861 1862 Flush(FLUSH_FOR_C_CALL); 1863 armAsm->mov(RARG3, value); 1864 FreeHostReg(value.GetCode()); 1865 armAsm->mov(RARG2, addr); 1866 FreeHostReg(addr.GetCode()); 1867 EmitMov(RARG1, inst->bits); 1868 EmitCall(reinterpret_cast<const void*>(&PGXP::CPU_SW)); 1869 } 1870 } 1871 1872 void CPU::NewRec::AArch32Compiler::Compile_swc2(CompileFlags cf, MemoryAccessSize size, bool sign, bool use_fastmem, 1873 const std::optional<VirtualMemoryAddress>& address) 1874 { 1875 const u32 index = static_cast<u32>(inst->r.rt.GetValue()); 1876 const auto [ptr, action] = GetGTERegisterPointer(index, false); 1877 const Register addr = (g_settings.gpu_pgxp_enable || action == GTERegisterAccessAction::CallHandler) ? 1878 Register(AllocateTempHostReg(HR_CALLEE_SAVED)) : 1879 RARG1; 1880 const Register data = g_settings.gpu_pgxp_enable ? Register(AllocateTempHostReg(HR_CALLEE_SAVED)) : RARG2; 1881 FlushForLoadStore(address, true, use_fastmem); 1882 ComputeLoadStoreAddressArg(cf, address, addr); 1883 1884 switch (action) 1885 { 1886 case GTERegisterAccessAction::Direct: 1887 { 1888 armAsm->ldr(data, PTR(ptr)); 1889 } 1890 break; 1891 1892 case GTERegisterAccessAction::CallHandler: 1893 { 1894 // should already be flushed.. except in fastmem case 1895 Flush(FLUSH_FOR_C_CALL); 1896 EmitMov(RARG1, index); 1897 EmitCall(reinterpret_cast<const void*>(>E::ReadRegister)); 1898 armAsm->mov(data, RRET); 1899 } 1900 break; 1901 1902 default: 1903 { 1904 Panic("Unknown action"); 1905 } 1906 break; 1907 } 1908 1909 GenerateStore(addr, data, size, use_fastmem); 1910 if (!g_settings.gpu_pgxp_enable) 1911 { 1912 if (addr.GetCode() != RARG1.GetCode()) 1913 FreeHostReg(addr.GetCode()); 1914 } 1915 else 1916 { 1917 // TODO: This can be simplified because we don't need to validate in PGXP.. 1918 Flush(FLUSH_FOR_C_CALL); 1919 armAsm->mov(RARG3, data); 1920 FreeHostReg(data.GetCode()); 1921 armAsm->mov(RARG2, addr); 1922 FreeHostReg(addr.GetCode()); 1923 EmitMov(RARG1, inst->bits); 1924 EmitCall(reinterpret_cast<const void*>(&PGXP::CPU_SWC2)); 1925 } 1926 } 1927 1928 void CPU::NewRec::AArch32Compiler::Compile_mtc0(CompileFlags cf) 1929 { 1930 // TODO: we need better constant setting here.. which will need backprop 1931 AssertRegOrConstT(cf); 1932 1933 const Cop0Reg reg = static_cast<Cop0Reg>(MipsD()); 1934 const u32* ptr = GetCop0RegPtr(reg); 1935 const u32 mask = GetCop0RegWriteMask(reg); 1936 if (!ptr) 1937 { 1938 Compile_Fallback(); 1939 return; 1940 } 1941 1942 if (mask == 0) 1943 { 1944 // if it's a read-only register, ignore 1945 DEBUG_LOG("Ignoring write to read-only cop0 reg {}", static_cast<u32>(reg)); 1946 return; 1947 } 1948 1949 // for some registers, we need to test certain bits 1950 const bool needs_bit_test = (reg == Cop0Reg::SR); 1951 const Register new_value = RARG1; 1952 const Register old_value = RARG2; 1953 const Register changed_bits = RARG3; 1954 const Register mask_reg = RSCRATCH; 1955 1956 // Load old value 1957 armAsm->ldr(old_value, PTR(ptr)); 1958 1959 // No way we fit this in an immediate.. 1960 EmitMov(mask_reg, mask); 1961 1962 // update value 1963 if (cf.valid_host_t) 1964 armAsm->and_(new_value, CFGetRegT(cf), mask_reg); 1965 else 1966 EmitMov(new_value, GetConstantRegU32(cf.MipsT()) & mask); 1967 1968 if (needs_bit_test) 1969 armAsm->eor(changed_bits, old_value, new_value); 1970 armAsm->bic(old_value, old_value, mask_reg); 1971 armAsm->orr(new_value, old_value, new_value); 1972 armAsm->str(new_value, PTR(ptr)); 1973 1974 if (reg == Cop0Reg::SR) 1975 { 1976 // TODO: replace with register backup 1977 // We could just inline the whole thing.. 1978 Flush(FLUSH_FOR_C_CALL); 1979 1980 SwitchToFarCodeIfBitSet(changed_bits, 16); 1981 armAsm->push(RegisterList(RARG1)); 1982 EmitCall(reinterpret_cast<const void*>(&CPU::UpdateMemoryPointers)); 1983 armAsm->pop(RegisterList(RARG1)); 1984 if (CodeCache::IsUsingFastmem() && m_block->HasFlag(CodeCache::BlockFlags::ContainsLoadStoreInstructions) && 1985 IsHostRegAllocated(RMEMBASE.GetCode())) 1986 { 1987 FreeHostReg(RMEMBASE.GetCode()); 1988 } 1989 SwitchToNearCode(true); 1990 1991 TestInterrupts(RARG1); 1992 } 1993 else if (reg == Cop0Reg::CAUSE) 1994 { 1995 armAsm->ldr(RARG1, PTR(&g_state.cop0_regs.sr.bits)); 1996 TestInterrupts(RARG1); 1997 } 1998 1999 if (reg == Cop0Reg::DCIC && g_settings.cpu_recompiler_memory_exceptions) 2000 { 2001 // TODO: DCIC handling for debug breakpoints 2002 WARNING_LOG("TODO: DCIC handling for debug breakpoints"); 2003 } 2004 } 2005 2006 void CPU::NewRec::AArch32Compiler::Compile_rfe(CompileFlags cf) 2007 { 2008 // shift mode bits right two, preserving upper bits 2009 armAsm->ldr(RARG1, PTR(&g_state.cop0_regs.sr.bits)); 2010 armAsm->bic(RARG2, RARG1, 15); 2011 armAsm->ubfx(RARG1, RARG1, 2, 4); 2012 armAsm->orr(RARG1, RARG1, RARG2); 2013 armAsm->str(RARG1, PTR(&g_state.cop0_regs.sr.bits)); 2014 2015 TestInterrupts(RARG1); 2016 } 2017 2018 void CPU::NewRec::AArch32Compiler::TestInterrupts(const vixl::aarch32::Register& sr) 2019 { 2020 // if Iec == 0 then goto no_interrupt 2021 Label no_interrupt; 2022 armAsm->tst(sr, 1); 2023 armAsm->b(eq, &no_interrupt); 2024 2025 // sr & cause 2026 armAsm->ldr(RSCRATCH, PTR(&g_state.cop0_regs.cause.bits)); 2027 armAsm->and_(sr, sr, RSCRATCH); 2028 2029 // ((sr & cause) & 0xff00) == 0 goto no_interrupt 2030 armAsm->tst(sr, 0xFF00); 2031 2032 SwitchToFarCode(true, ne); 2033 BackupHostState(); 2034 2035 // Update load delay, this normally happens at the end of an instruction, but we're finishing it early. 2036 UpdateLoadDelay(); 2037 2038 Flush(FLUSH_END_BLOCK | FLUSH_FOR_EXCEPTION | FLUSH_FOR_C_CALL); 2039 2040 // Can't use EndBlockWithException() here, because it'll use the wrong PC. 2041 // Can't use RaiseException() on the fast path if we're the last instruction, because the next PC is unknown. 2042 if (!iinfo->is_last_instruction) 2043 { 2044 EmitMov(RARG1, Cop0Registers::CAUSE::MakeValueForException(Exception::INT, iinfo->is_branch_instruction, false, 2045 (inst + 1)->cop.cop_n)); 2046 EmitMov(RARG2, m_compiler_pc); 2047 EmitCall(reinterpret_cast<const void*>(static_cast<void (*)(u32, u32)>(&CPU::RaiseException))); 2048 m_dirty_pc = false; 2049 EndAndLinkBlock(std::nullopt, true, false); 2050 } 2051 else 2052 { 2053 EmitMov(RARG1, 0); 2054 if (m_dirty_pc) 2055 EmitMov(RARG2, m_compiler_pc); 2056 armAsm->str(RARG1, PTR(&g_state.downcount)); 2057 if (m_dirty_pc) 2058 armAsm->str(RARG2, PTR(&g_state.pc)); 2059 m_dirty_pc = false; 2060 EndAndLinkBlock(std::nullopt, false, true); 2061 } 2062 2063 RestoreHostState(); 2064 SwitchToNearCode(false); 2065 2066 armAsm->bind(&no_interrupt); 2067 } 2068 2069 void CPU::NewRec::AArch32Compiler::Compile_mfc2(CompileFlags cf) 2070 { 2071 const u32 index = inst->cop.Cop2Index(); 2072 const Reg rt = inst->r.rt; 2073 2074 const auto [ptr, action] = GetGTERegisterPointer(index, false); 2075 if (action == GTERegisterAccessAction::Ignore) 2076 return; 2077 2078 u32 hreg; 2079 if (action == GTERegisterAccessAction::Direct) 2080 { 2081 hreg = AllocateHostReg(GetFlagsForNewLoadDelayedReg(), 2082 EMULATE_LOAD_DELAYS ? HR_TYPE_NEXT_LOAD_DELAY_VALUE : HR_TYPE_CPU_REG, rt); 2083 armAsm->ldr(Register(hreg), PTR(ptr)); 2084 } 2085 else if (action == GTERegisterAccessAction::CallHandler) 2086 { 2087 Flush(FLUSH_FOR_C_CALL); 2088 EmitMov(RARG1, index); 2089 EmitCall(reinterpret_cast<const void*>(>E::ReadRegister)); 2090 2091 hreg = AllocateHostReg(GetFlagsForNewLoadDelayedReg(), 2092 EMULATE_LOAD_DELAYS ? HR_TYPE_NEXT_LOAD_DELAY_VALUE : HR_TYPE_CPU_REG, rt); 2093 armAsm->mov(Register(hreg), RRET); 2094 } 2095 else 2096 { 2097 Panic("Unknown action"); 2098 return; 2099 } 2100 2101 if (g_settings.gpu_pgxp_enable) 2102 { 2103 Flush(FLUSH_FOR_C_CALL); 2104 EmitMov(RARG1, inst->bits); 2105 armAsm->mov(RARG2, Register(hreg)); 2106 EmitCall(reinterpret_cast<const void*>(&PGXP::CPU_MFC2)); 2107 } 2108 } 2109 2110 void CPU::NewRec::AArch32Compiler::Compile_mtc2(CompileFlags cf) 2111 { 2112 const u32 index = inst->cop.Cop2Index(); 2113 const auto [ptr, action] = GetGTERegisterPointer(index, true); 2114 if (action == GTERegisterAccessAction::Ignore) 2115 return; 2116 2117 if (action == GTERegisterAccessAction::Direct) 2118 { 2119 if (cf.const_t) 2120 StoreConstantToCPUPointer(GetConstantRegU32(cf.MipsT()), ptr); 2121 else 2122 armAsm->str(CFGetRegT(cf), PTR(ptr)); 2123 } 2124 else if (action == GTERegisterAccessAction::SignExtend16 || action == GTERegisterAccessAction::ZeroExtend16) 2125 { 2126 const bool sign = (action == GTERegisterAccessAction::SignExtend16); 2127 if (cf.valid_host_t) 2128 { 2129 sign ? armAsm->sxth(RARG1, CFGetRegT(cf)) : armAsm->uxth(RARG1, CFGetRegT(cf)); 2130 armAsm->str(RARG1, PTR(ptr)); 2131 } 2132 else if (cf.const_t) 2133 { 2134 const u16 cv = Truncate16(GetConstantRegU32(cf.MipsT())); 2135 StoreConstantToCPUPointer(sign ? ::SignExtend32(cv) : ::ZeroExtend32(cv), ptr); 2136 } 2137 else 2138 { 2139 Panic("Unsupported setup"); 2140 } 2141 } 2142 else if (action == GTERegisterAccessAction::CallHandler) 2143 { 2144 Flush(FLUSH_FOR_C_CALL); 2145 EmitMov(RARG1, index); 2146 MoveTToReg(RARG2, cf); 2147 EmitCall(reinterpret_cast<const void*>(>E::WriteRegister)); 2148 } 2149 else if (action == GTERegisterAccessAction::PushFIFO) 2150 { 2151 // SXY0 <- SXY1 2152 // SXY1 <- SXY2 2153 // SXY2 <- SXYP 2154 DebugAssert(RRET.GetCode() != RARG2.GetCode() && RRET.GetCode() != RARG3.GetCode()); 2155 armAsm->ldr(RARG2, PTR(&g_state.gte_regs.SXY1[0])); 2156 armAsm->ldr(RARG3, PTR(&g_state.gte_regs.SXY2[0])); 2157 armAsm->str(RARG2, PTR(&g_state.gte_regs.SXY0[0])); 2158 armAsm->str(RARG3, PTR(&g_state.gte_regs.SXY1[0])); 2159 if (cf.valid_host_t) 2160 armAsm->str(CFGetRegT(cf), PTR(&g_state.gte_regs.SXY2[0])); 2161 else if (cf.const_t) 2162 StoreConstantToCPUPointer(GetConstantRegU32(cf.MipsT()), &g_state.gte_regs.SXY2[0]); 2163 else 2164 Panic("Unsupported setup"); 2165 } 2166 else 2167 { 2168 Panic("Unknown action"); 2169 } 2170 } 2171 2172 void CPU::NewRec::AArch32Compiler::Compile_cop2(CompileFlags cf) 2173 { 2174 TickCount func_ticks; 2175 GTE::InstructionImpl func = GTE::GetInstructionImpl(inst->bits, &func_ticks); 2176 2177 Flush(FLUSH_FOR_C_CALL); 2178 EmitMov(RARG1, inst->bits & GTE::Instruction::REQUIRED_BITS_MASK); 2179 EmitCall(reinterpret_cast<const void*>(func)); 2180 2181 AddGTETicks(func_ticks); 2182 } 2183 2184 u32 CPU::NewRec::CompileLoadStoreThunk(void* thunk_code, u32 thunk_space, void* code_address, u32 code_size, 2185 TickCount cycles_to_add, TickCount cycles_to_remove, u32 gpr_bitmask, 2186 u8 address_register, u8 data_register, MemoryAccessSize size, bool is_signed, 2187 bool is_load) 2188 { 2189 Assembler arm_asm(static_cast<u8*>(thunk_code), thunk_space); 2190 Assembler* armAsm = &arm_asm; 2191 2192 #ifdef VIXL_DEBUG 2193 vixl::CodeBufferCheckScope asm_check(armAsm, thunk_space, vixl::CodeBufferCheckScope::kDontReserveBufferSpace); 2194 #endif 2195 2196 // save regs 2197 RegisterList save_regs; 2198 2199 for (u32 i = 0; i < NUM_HOST_REGS; i++) 2200 { 2201 if ((gpr_bitmask & (1u << i)) && armIsCallerSavedRegister(i) && (!is_load || data_register != i)) 2202 save_regs.Combine(RegisterList(Register(i))); 2203 } 2204 2205 if (!save_regs.IsEmpty()) 2206 armAsm->push(save_regs); 2207 2208 if (address_register != static_cast<u8>(RARG1.GetCode())) 2209 armAsm->mov(RARG1, Register(address_register)); 2210 2211 if (!is_load) 2212 { 2213 if (data_register != static_cast<u8>(RARG2.GetCode())) 2214 armAsm->mov(RARG2, Register(data_register)); 2215 } 2216 2217 if (cycles_to_add != 0) 2218 { 2219 // NOTE: we have to reload here, because memory writes can run DMA, which can screw with cycles 2220 armAsm->ldr(RARG3, PTR(&g_state.pending_ticks)); 2221 if (!ImmediateA32::IsImmediateA32(cycles_to_add)) 2222 { 2223 armEmitMov(armAsm, RSCRATCH, cycles_to_add); 2224 armAsm->add(RARG3, RARG3, RSCRATCH); 2225 } 2226 else 2227 { 2228 armAsm->add(RARG3, RARG3, cycles_to_add); 2229 } 2230 2231 armAsm->str(RARG3, PTR(&g_state.pending_ticks)); 2232 } 2233 2234 switch (size) 2235 { 2236 case MemoryAccessSize::Byte: 2237 { 2238 armEmitCall(armAsm, 2239 is_load ? reinterpret_cast<const void*>(&Recompiler::Thunks::UncheckedReadMemoryByte) : 2240 reinterpret_cast<const void*>(&Recompiler::Thunks::UncheckedWriteMemoryByte), 2241 false); 2242 } 2243 break; 2244 case MemoryAccessSize::HalfWord: 2245 { 2246 armEmitCall(armAsm, 2247 is_load ? reinterpret_cast<const void*>(&Recompiler::Thunks::UncheckedReadMemoryHalfWord) : 2248 reinterpret_cast<const void*>(&Recompiler::Thunks::UncheckedWriteMemoryHalfWord), 2249 false); 2250 } 2251 break; 2252 case MemoryAccessSize::Word: 2253 { 2254 armEmitCall(armAsm, 2255 is_load ? reinterpret_cast<const void*>(&Recompiler::Thunks::UncheckedReadMemoryWord) : 2256 reinterpret_cast<const void*>(&Recompiler::Thunks::UncheckedWriteMemoryWord), 2257 false); 2258 } 2259 break; 2260 } 2261 2262 if (is_load) 2263 { 2264 const Register dst = Register(data_register); 2265 switch (size) 2266 { 2267 case MemoryAccessSize::Byte: 2268 { 2269 is_signed ? armAsm->sxtb(dst, RRET) : armAsm->uxtb(dst, RRET); 2270 } 2271 break; 2272 case MemoryAccessSize::HalfWord: 2273 { 2274 is_signed ? armAsm->sxth(dst, RRET) : armAsm->uxth(dst, RRET); 2275 } 2276 break; 2277 case MemoryAccessSize::Word: 2278 { 2279 if (dst.GetCode() != RRET.GetCode()) 2280 armAsm->mov(dst, RRET); 2281 } 2282 break; 2283 } 2284 } 2285 2286 if (cycles_to_remove != 0) 2287 { 2288 armAsm->ldr(RARG3, PTR(&g_state.pending_ticks)); 2289 if (!ImmediateA32::IsImmediateA32(cycles_to_remove)) 2290 { 2291 armEmitMov(armAsm, RSCRATCH, cycles_to_remove); 2292 armAsm->sub(RARG3, RARG3, RSCRATCH); 2293 } 2294 else 2295 { 2296 armAsm->sub(RARG3, RARG3, cycles_to_remove); 2297 } 2298 armAsm->str(RARG3, PTR(&g_state.pending_ticks)); 2299 } 2300 2301 // restore regs 2302 if (!save_regs.IsEmpty()) 2303 armAsm->pop(save_regs); 2304 2305 armEmitJmp(armAsm, static_cast<const u8*>(code_address) + code_size, true); 2306 armAsm->FinalizeCode(); 2307 2308 return static_cast<u32>(armAsm->GetCursorOffset()); 2309 } 2310 2311 #endif // CPU_ARCH_ARM32