cpu_newrec_compiler_riscv64.cpp (77586B)
1 // SPDX-FileCopyrightText: 2024 Connor McLaughlin <stenzek@gmail.com> 2 // SPDX-License-Identifier: (GPL-3.0 OR CC-BY-NC-ND-4.0) 3 4 #include "cpu_newrec_compiler_riscv64.h" 5 #include "cpu_code_cache_private.h" 6 #include "cpu_core_private.h" 7 #include "cpu_pgxp.h" 8 #include "cpu_recompiler_thunks.h" 9 #include "gte.h" 10 #include "settings.h" 11 #include "timing_event.h" 12 13 #include "common/align.h" 14 #include "common/assert.h" 15 #include "common/log.h" 16 #include "common/memmap.h" 17 #include "common/string_util.h" 18 19 #include <limits> 20 21 #ifdef CPU_ARCH_RISCV64 22 23 Log_SetChannel(CPU::NewRec); 24 25 #ifdef ENABLE_HOST_DISASSEMBLY 26 extern "C" { 27 #include "riscv-disas.h" 28 } 29 #endif 30 31 // For LW/SW/etc. 32 #define PTR(x) ((u32)(((u8*)(x)) - ((u8*)&g_state))), RSTATE 33 34 static constexpr u32 BLOCK_LINK_SIZE = 8; // auipc+jr 35 36 namespace CPU::NewRec { 37 38 using namespace biscuit; 39 40 using CPU::Recompiler::rvEmitCall; 41 using CPU::Recompiler::rvEmitDSExtW; 42 using CPU::Recompiler::rvEmitDUExtW; 43 using CPU::Recompiler::rvEmitFarLoad; 44 using CPU::Recompiler::rvEmitJmp; 45 using CPU::Recompiler::rvEmitMov; 46 using CPU::Recompiler::rvEmitMov64; 47 using CPU::Recompiler::rvEmitSExtB; 48 using CPU::Recompiler::rvEmitSExtH; 49 using CPU::Recompiler::rvEmitUExtB; 50 using CPU::Recompiler::rvEmitUExtH; 51 using CPU::Recompiler::rvGetAddressImmediates; 52 using CPU::Recompiler::rvIsCallerSavedRegister; 53 using CPU::Recompiler::rvIsValidSExtITypeImm; 54 using CPU::Recompiler::rvMoveAddressToReg; 55 56 RISCV64Compiler s_instance; 57 Compiler* g_compiler = &s_instance; 58 59 } // namespace CPU::NewRec 60 61 bool CPU::Recompiler::rvIsCallerSavedRegister(u32 id) 62 { 63 return (id == 1 || (id >= 3 && id < 8) || (id >= 10 && id <= 17) || (id >= 28 && id <= 31)); 64 } 65 66 bool CPU::Recompiler::rvIsValidSExtITypeImm(u32 imm) 67 { 68 return (static_cast<u32>((static_cast<s32>(imm) << 20) >> 20) == imm); 69 } 70 71 std::pair<s32, s32> CPU::Recompiler::rvGetAddressImmediates(const void* cur, const void* target) 72 { 73 const s64 disp = static_cast<s64>(reinterpret_cast<intptr_t>(target) - reinterpret_cast<intptr_t>(cur)); 74 Assert(disp >= static_cast<s64>(std::numeric_limits<s32>::min()) && 75 disp <= static_cast<s64>(std::numeric_limits<s32>::max())); 76 77 const s64 hi = disp + 0x800; 78 const s64 lo = disp - (hi & 0xFFFFF000); 79 return std::make_pair(static_cast<s32>(hi >> 12), static_cast<s32>((lo << 52) >> 52)); 80 } 81 82 void CPU::Recompiler::rvMoveAddressToReg(biscuit::Assembler* rvAsm, const biscuit::GPR& reg, const void* addr) 83 { 84 const auto [hi, lo] = rvGetAddressImmediates(rvAsm->GetCursorPointer(), addr); 85 rvAsm->AUIPC(reg, hi); 86 rvAsm->ADDI(reg, reg, lo); 87 } 88 89 void CPU::Recompiler::rvEmitMov(biscuit::Assembler* rvAsm, const biscuit::GPR& rd, u32 imm) 90 { 91 // Borrowed from biscuit, but doesn't emit an ADDI if the lower 12 bits are zero. 92 const u32 lower = imm & 0xFFF; 93 const u32 upper = (imm & 0xFFFFF000) >> 12; 94 const s32 simm = static_cast<s32>(imm); 95 if (rvIsValidSExtITypeImm(simm)) 96 { 97 rvAsm->ADDI(rd, biscuit::zero, static_cast<s32>(lower)); 98 } 99 else 100 { 101 const bool needs_increment = (lower & 0x800) != 0; 102 const u32 upper_imm = needs_increment ? upper + 1 : upper; 103 rvAsm->LUI(rd, upper_imm); 104 rvAsm->ADDI(rd, rd, static_cast<int32_t>(lower)); 105 } 106 } 107 108 void CPU::Recompiler::rvEmitMov64(biscuit::Assembler* rvAsm, const biscuit::GPR& rd, const biscuit::GPR& scratch, 109 u64 imm) 110 { 111 // TODO: Make better.. 112 rvEmitMov(rvAsm, rd, static_cast<u32>(imm >> 32)); 113 rvEmitMov(rvAsm, scratch, static_cast<u32>(imm)); 114 rvAsm->SLLI64(rd, rd, 32); 115 rvAsm->SLLI64(scratch, scratch, 32); 116 rvAsm->SRLI64(scratch, scratch, 32); 117 rvAsm->ADD(rd, rd, scratch); 118 } 119 120 u32 CPU::Recompiler::rvEmitJmp(biscuit::Assembler* rvAsm, const void* ptr, const biscuit::GPR& link_reg) 121 { 122 // TODO: use J if displacement is <1MB, needs a bool because backpatch must be 8 bytes 123 const auto [hi, lo] = rvGetAddressImmediates(rvAsm->GetCursorPointer(), ptr); 124 rvAsm->AUIPC(RSCRATCH, hi); 125 rvAsm->JALR(link_reg, lo, RSCRATCH); 126 return 8; 127 } 128 129 u32 CPU::Recompiler::rvEmitCall(biscuit::Assembler* rvAsm, const void* ptr) 130 { 131 return rvEmitJmp(rvAsm, ptr, biscuit::ra); 132 } 133 134 void CPU::Recompiler::rvEmitFarLoad(biscuit::Assembler* rvAsm, const biscuit::GPR& reg, const void* addr, 135 bool sign_extend_word) 136 { 137 const auto [hi, lo] = rvGetAddressImmediates(rvAsm->GetCursorPointer(), addr); 138 rvAsm->AUIPC(reg, hi); 139 if (sign_extend_word) 140 rvAsm->LW(reg, lo, reg); 141 else 142 rvAsm->LWU(reg, lo, reg); 143 } 144 145 void CPU::Recompiler::rvEmitFarStore(biscuit::Assembler* rvAsm, const biscuit::GPR& reg, const void* addr, 146 const biscuit::GPR& tempreg) 147 { 148 const auto [hi, lo] = rvGetAddressImmediates(rvAsm->GetCursorPointer(), addr); 149 rvAsm->AUIPC(tempreg, hi); 150 rvAsm->SW(reg, lo, tempreg); 151 } 152 153 void CPU::Recompiler::rvEmitSExtB(biscuit::Assembler* rvAsm, const biscuit::GPR& rd, const biscuit::GPR& rs) 154 { 155 rvAsm->SLLI(rd, rs, 24); 156 rvAsm->SRAIW(rd, rd, 24); 157 } 158 159 void CPU::Recompiler::rvEmitUExtB(biscuit::Assembler* rvAsm, const biscuit::GPR& rd, const biscuit::GPR& rs) 160 { 161 rvAsm->ANDI(rd, rs, 0xFF); 162 } 163 164 void CPU::Recompiler::rvEmitSExtH(biscuit::Assembler* rvAsm, const biscuit::GPR& rd, const biscuit::GPR& rs) 165 { 166 rvAsm->SLLI(rd, rs, 16); 167 rvAsm->SRAIW(rd, rd, 16); 168 } 169 170 void CPU::Recompiler::rvEmitUExtH(biscuit::Assembler* rvAsm, const biscuit::GPR& rd, const biscuit::GPR& rs) 171 { 172 rvAsm->SLLI(rd, rs, 16); 173 rvAsm->SRLI(rd, rd, 16); 174 } 175 176 void CPU::Recompiler::rvEmitDSExtW(biscuit::Assembler* rvAsm, const biscuit::GPR& rd, const biscuit::GPR& rs) 177 { 178 rvAsm->ADDIW(rd, rs, 0); 179 } 180 181 void CPU::Recompiler::rvEmitDUExtW(biscuit::Assembler* rvAsm, const biscuit::GPR& rd, const biscuit::GPR& rs) 182 { 183 rvAsm->SLLI64(rd, rs, 32); 184 rvAsm->SRLI64(rd, rd, 32); 185 } 186 187 void CPU::CodeCache::DisassembleAndLogHostCode(const void* start, u32 size) 188 { 189 #ifdef ENABLE_HOST_DISASSEMBLY 190 const u8* cur = static_cast<const u8*>(start); 191 const u8* end = cur + size; 192 char buf[256]; 193 while (cur < end) 194 { 195 rv_inst inst; 196 size_t instlen; 197 inst_fetch(cur, &inst, &instlen); 198 disasm_inst(buf, std::size(buf), rv64, static_cast<u64>(reinterpret_cast<uintptr_t>(cur)), inst); 199 DEBUG_LOG("\t0x{:016X}\t{}", static_cast<u64>(reinterpret_cast<uintptr_t>(cur)), buf); 200 cur += instlen; 201 } 202 #else 203 ERROR_LOG("Not compiled with ENABLE_HOST_DISASSEMBLY."); 204 #endif 205 } 206 207 u32 CPU::CodeCache::GetHostInstructionCount(const void* start, u32 size) 208 { 209 #ifdef ENABLE_HOST_DISASSEMBLY 210 const u8* cur = static_cast<const u8*>(start); 211 const u8* end = cur + size; 212 u32 icount = 0; 213 while (cur < end) 214 { 215 rv_inst inst; 216 size_t instlen; 217 inst_fetch(cur, &inst, &instlen); 218 cur += instlen; 219 icount++; 220 } 221 return icount; 222 #else 223 ERROR_LOG("Not compiled with ENABLE_HOST_DISASSEMBLY."); 224 return 0; 225 #endif 226 } 227 228 u32 CPU::CodeCache::EmitASMFunctions(void* code, u32 code_size) 229 { 230 using namespace CPU::Recompiler; 231 using namespace biscuit; 232 233 Assembler actual_asm(static_cast<u8*>(code), code_size); 234 Assembler* rvAsm = &actual_asm; 235 236 Label dispatch; 237 238 g_enter_recompiler = reinterpret_cast<decltype(g_enter_recompiler)>(rvAsm->GetCursorPointer()); 239 { 240 // TODO: reserve some space for saving caller-saved registers 241 242 // Need the CPU state for basically everything :-) 243 rvMoveAddressToReg(rvAsm, RSTATE, &g_state); 244 245 // Fastmem setup 246 if (IsUsingFastmem()) 247 rvAsm->LD(RMEMBASE, PTR(&g_state.fastmem_base)); 248 249 // Downcount isn't set on entry, so we need to initialize it 250 rvMoveAddressToReg(rvAsm, RARG1, TimingEvents::GetHeadEventPtr()); 251 rvAsm->LD(RARG1, 0, RARG1); 252 rvAsm->LW(RARG1, OFFSETOF(TimingEvent, m_downcount), RARG1); 253 rvAsm->SW(RARG1, PTR(&g_state.downcount)); 254 255 // Fall through to event dispatcher 256 } 257 258 // check events then for frame done 259 g_check_events_and_dispatch = rvAsm->GetCursorPointer(); 260 { 261 Label skip_event_check; 262 rvAsm->LW(RARG1, PTR(&g_state.pending_ticks)); 263 rvAsm->LW(RARG2, PTR(&g_state.downcount)); 264 rvAsm->BLTU(RARG1, RARG2, &skip_event_check); 265 266 g_run_events_and_dispatch = rvAsm->GetCursorPointer(); 267 rvEmitCall(rvAsm, reinterpret_cast<const void*>(&TimingEvents::RunEvents)); 268 269 rvAsm->Bind(&skip_event_check); 270 } 271 272 // TODO: align? 273 g_dispatcher = rvAsm->GetCursorPointer(); 274 { 275 rvAsm->Bind(&dispatch); 276 277 // x9 <- s_fast_map[pc >> 16] 278 rvAsm->LWU(RARG1, PTR(&g_state.pc)); 279 rvMoveAddressToReg(rvAsm, RARG3, g_code_lut.data()); 280 rvAsm->SRLI(RARG2, RARG1, 16); 281 rvAsm->SLLI(RARG1, RARG1, 1); 282 rvAsm->SLLI(RARG2, RARG2, 3); 283 rvAsm->ADD(RARG2, RARG2, RARG3); 284 rvAsm->LD(RARG2, 0, RARG2); 285 286 // blr(x9[pc * 2]) (fast_map[pc >> 2]) 287 rvAsm->ADD(RARG1, RARG1, RARG2); 288 rvAsm->LD(RARG1, 0, RARG1); 289 rvAsm->JR(RARG1); 290 } 291 292 g_compile_or_revalidate_block = rvAsm->GetCursorPointer(); 293 { 294 rvAsm->LW(RARG1, PTR(&g_state.pc)); 295 rvEmitCall(rvAsm, reinterpret_cast<const void*>(&CompileOrRevalidateBlock)); 296 rvAsm->J(&dispatch); 297 } 298 299 g_discard_and_recompile_block = rvAsm->GetCursorPointer(); 300 { 301 rvAsm->LW(RARG1, PTR(&g_state.pc)); 302 rvEmitCall(rvAsm, reinterpret_cast<const void*>(&DiscardAndRecompileBlock)); 303 rvAsm->J(&dispatch); 304 } 305 306 g_interpret_block = rvAsm->GetCursorPointer(); 307 { 308 rvEmitCall(rvAsm, CodeCache::GetInterpretUncachedBlockFunction()); 309 rvAsm->J(&dispatch); 310 } 311 312 // TODO: align? 313 314 return static_cast<u32>(rvAsm->GetCodeBuffer().GetSizeInBytes()); 315 } 316 317 u32 CPU::CodeCache::EmitJump(void* code, const void* dst, bool flush_icache) 318 { 319 // TODO: get rid of assembler construction here 320 { 321 biscuit::Assembler assembler(static_cast<u8*>(code), BLOCK_LINK_SIZE); 322 CPU::Recompiler::rvEmitCall(&assembler, dst); 323 324 DebugAssert(assembler.GetCodeBuffer().GetSizeInBytes() <= BLOCK_LINK_SIZE); 325 if (assembler.GetCodeBuffer().GetRemainingBytes() > 0) 326 assembler.NOP(); 327 } 328 329 if (flush_icache) 330 MemMap::FlushInstructionCache(code, BLOCK_LINK_SIZE); 331 332 return BLOCK_LINK_SIZE; 333 } 334 335 CPU::NewRec::RISCV64Compiler::RISCV64Compiler() = default; 336 337 CPU::NewRec::RISCV64Compiler::~RISCV64Compiler() = default; 338 339 const void* CPU::NewRec::RISCV64Compiler::GetCurrentCodePointer() 340 { 341 return rvAsm->GetCursorPointer(); 342 } 343 344 void CPU::NewRec::RISCV64Compiler::Reset(CodeCache::Block* block, u8* code_buffer, u32 code_buffer_space, 345 u8* far_code_buffer, u32 far_code_space) 346 { 347 Compiler::Reset(block, code_buffer, code_buffer_space, far_code_buffer, far_code_space); 348 349 // TODO: don't recreate this every time.. 350 DebugAssert(!m_emitter && !m_far_emitter && !rvAsm); 351 m_emitter = std::make_unique<Assembler>(code_buffer, code_buffer_space); 352 m_far_emitter = std::make_unique<Assembler>(far_code_buffer, far_code_space); 353 rvAsm = m_emitter.get(); 354 355 // Need to wipe it out so it's correct when toggling fastmem. 356 m_host_regs = {}; 357 358 const u32 membase_idx = CodeCache::IsUsingFastmem() ? RMEMBASE.Index() : NUM_HOST_REGS; 359 for (u32 i = 0; i < NUM_HOST_REGS; i++) 360 { 361 HostRegAlloc& hra = m_host_regs[i]; 362 363 if (i == RARG1.Index() || i == RARG2.Index() || i == RARG3.Index() || i == RSCRATCH.Index() || 364 i == RSTATE.Index() || i == membase_idx || i < 5 /* zero, ra, sp, gp, tp */) 365 { 366 continue; 367 } 368 369 hra.flags = HR_USABLE | (rvIsCallerSavedRegister(i) ? 0 : HR_CALLEE_SAVED); 370 } 371 } 372 373 void CPU::NewRec::RISCV64Compiler::SwitchToFarCode( 374 bool emit_jump, 375 void (biscuit::Assembler::*inverted_cond)(biscuit::GPR, biscuit::GPR, biscuit::Label*) /* = nullptr */, 376 const biscuit::GPR& rs1 /* = biscuit::zero */, const biscuit::GPR& rs2 /* = biscuit::zero */) 377 { 378 DebugAssert(rvAsm == m_emitter.get()); 379 if (emit_jump) 380 { 381 const void* target = m_far_emitter->GetCursorPointer(); 382 if (inverted_cond) 383 { 384 Label skip; 385 (rvAsm->*inverted_cond)(rs1, rs2, &skip); 386 rvEmitJmp(rvAsm, target); 387 rvAsm->Bind(&skip); 388 } 389 else 390 { 391 rvEmitCall(rvAsm, target); 392 } 393 } 394 rvAsm = m_far_emitter.get(); 395 } 396 397 void CPU::NewRec::RISCV64Compiler::SwitchToNearCode(bool emit_jump) 398 { 399 DebugAssert(rvAsm == m_far_emitter.get()); 400 if (emit_jump) 401 rvEmitJmp(rvAsm, m_emitter->GetCursorPointer()); 402 rvAsm = m_emitter.get(); 403 } 404 405 void CPU::NewRec::RISCV64Compiler::EmitMov(const biscuit::GPR& dst, u32 val) 406 { 407 rvEmitMov(rvAsm, dst, val); 408 } 409 410 void CPU::NewRec::RISCV64Compiler::EmitCall(const void* ptr) 411 { 412 rvEmitCall(rvAsm, ptr); 413 } 414 415 void CPU::NewRec::RISCV64Compiler::SafeImmSExtIType(const biscuit::GPR& rd, const biscuit::GPR& rs, u32 imm, 416 void (biscuit::Assembler::*iop)(GPR, GPR, u32), 417 void (biscuit::Assembler::*rop)(GPR, GPR, GPR)) 418 { 419 DebugAssert(rd != RSCRATCH && rs != RSCRATCH); 420 421 if (rvIsValidSExtITypeImm(imm)) 422 { 423 (rvAsm->*iop)(rd, rs, imm); 424 return; 425 } 426 427 rvEmitMov(rvAsm, RSCRATCH, imm); 428 (rvAsm->*rop)(rd, rs, RSCRATCH); 429 } 430 431 void CPU::NewRec::RISCV64Compiler::SafeADDI(const biscuit::GPR& rd, const biscuit::GPR& rs, u32 imm) 432 { 433 SafeImmSExtIType(rd, rs, imm, reinterpret_cast<void (biscuit::Assembler::*)(GPR, GPR, u32)>(&Assembler::ADDI), 434 &Assembler::ADD); 435 } 436 437 void CPU::NewRec::RISCV64Compiler::SafeADDIW(const biscuit::GPR& rd, const biscuit::GPR& rs, u32 imm) 438 { 439 SafeImmSExtIType(rd, rs, imm, reinterpret_cast<void (biscuit::Assembler::*)(GPR, GPR, u32)>(&Assembler::ADDIW), 440 &Assembler::ADDW); 441 } 442 443 void CPU::NewRec::RISCV64Compiler::SafeSUBIW(const biscuit::GPR& rd, const biscuit::GPR& rs, u32 imm) 444 { 445 const u32 nimm = static_cast<u32>(-static_cast<s32>(imm)); 446 SafeImmSExtIType(rd, rs, nimm, reinterpret_cast<void (biscuit::Assembler::*)(GPR, GPR, u32)>(&Assembler::ADDIW), 447 &Assembler::ADDW); 448 } 449 450 void CPU::NewRec::RISCV64Compiler::SafeANDI(const biscuit::GPR& rd, const biscuit::GPR& rs, u32 imm) 451 { 452 SafeImmSExtIType(rd, rs, imm, &Assembler::ANDI, &Assembler::AND); 453 } 454 455 void CPU::NewRec::RISCV64Compiler::SafeORI(const biscuit::GPR& rd, const biscuit::GPR& rs, u32 imm) 456 { 457 SafeImmSExtIType(rd, rs, imm, &Assembler::ORI, &Assembler::OR); 458 } 459 460 void CPU::NewRec::RISCV64Compiler::SafeXORI(const biscuit::GPR& rd, const biscuit::GPR& rs, u32 imm) 461 { 462 SafeImmSExtIType(rd, rs, imm, &Assembler::XORI, &Assembler::XOR); 463 } 464 465 void CPU::NewRec::RISCV64Compiler::SafeSLTI(const biscuit::GPR& rd, const biscuit::GPR& rs, u32 imm) 466 { 467 SafeImmSExtIType(rd, rs, imm, reinterpret_cast<void (biscuit::Assembler::*)(GPR, GPR, u32)>(&Assembler::SLTI), 468 &Assembler::SLT); 469 } 470 471 void CPU::NewRec::RISCV64Compiler::SafeSLTIU(const biscuit::GPR& rd, const biscuit::GPR& rs, u32 imm) 472 { 473 SafeImmSExtIType(rd, rs, imm, reinterpret_cast<void (biscuit::Assembler::*)(GPR, GPR, u32)>(&Assembler::SLTIU), 474 &Assembler::SLTU); 475 } 476 477 void CPU::NewRec::RISCV64Compiler::EmitSExtB(const biscuit::GPR& rd, const biscuit::GPR& rs) 478 { 479 rvEmitSExtB(rvAsm, rd, rs); 480 } 481 482 void CPU::NewRec::RISCV64Compiler::EmitUExtB(const biscuit::GPR& rd, const biscuit::GPR& rs) 483 { 484 rvEmitUExtB(rvAsm, rd, rs); 485 } 486 487 void CPU::NewRec::RISCV64Compiler::EmitSExtH(const biscuit::GPR& rd, const biscuit::GPR& rs) 488 { 489 rvEmitSExtH(rvAsm, rd, rs); 490 } 491 492 void CPU::NewRec::RISCV64Compiler::EmitUExtH(const biscuit::GPR& rd, const biscuit::GPR& rs) 493 { 494 rvEmitUExtH(rvAsm, rd, rs); 495 } 496 497 void CPU::NewRec::RISCV64Compiler::EmitDSExtW(const biscuit::GPR& rd, const biscuit::GPR& rs) 498 { 499 rvEmitDSExtW(rvAsm, rd, rs); 500 } 501 502 void CPU::NewRec::RISCV64Compiler::EmitDUExtW(const biscuit::GPR& rd, const biscuit::GPR& rs) 503 { 504 rvEmitDUExtW(rvAsm, rd, rs); 505 } 506 507 void CPU::NewRec::RISCV64Compiler::GenerateBlockProtectCheck(const u8* ram_ptr, const u8* shadow_ptr, u32 size) 508 { 509 // store it first to reduce code size, because we can offset 510 // TODO: 64-bit displacement is needed :/ 511 // rvMoveAddressToReg(rvAsm, RARG1, ram_ptr); 512 // rvMoveAddressToReg(rvAsm, RARG2, shadow_ptr); 513 rvEmitMov64(rvAsm, RARG1, RSCRATCH, static_cast<u64>(reinterpret_cast<uintptr_t>(ram_ptr))); 514 rvEmitMov64(rvAsm, RARG2, RSCRATCH, static_cast<u64>(reinterpret_cast<uintptr_t>(shadow_ptr))); 515 516 u32 offset = 0; 517 Label block_changed; 518 519 while (size >= 8) 520 { 521 rvAsm->LD(RARG3, offset, RARG1); 522 rvAsm->LD(RSCRATCH, offset, RARG2); 523 rvAsm->BNE(RARG3, RSCRATCH, &block_changed); 524 offset += 8; 525 size -= 8; 526 } 527 528 while (size >= 4) 529 { 530 rvAsm->LWU(RARG3, offset, RARG1); 531 rvAsm->LWU(RSCRATCH, offset, RARG2); 532 rvAsm->BNE(RARG3, RSCRATCH, &block_changed); 533 offset += 4; 534 size -= 4; 535 } 536 537 DebugAssert(size == 0); 538 539 Label block_unchanged; 540 rvAsm->J(&block_unchanged); 541 rvAsm->Bind(&block_changed); 542 rvEmitJmp(rvAsm, CodeCache::g_discard_and_recompile_block); 543 rvAsm->Bind(&block_unchanged); 544 } 545 546 void CPU::NewRec::RISCV64Compiler::GenerateICacheCheckAndUpdate() 547 { 548 if (!m_block->HasFlag(CodeCache::BlockFlags::IsUsingICache)) 549 { 550 if (m_block->HasFlag(CodeCache::BlockFlags::NeedsDynamicFetchTicks)) 551 { 552 rvEmitFarLoad(rvAsm, RARG2, GetFetchMemoryAccessTimePtr()); 553 rvAsm->LW(RARG1, PTR(&g_state.pending_ticks)); 554 rvEmitMov(rvAsm, RARG3, m_block->size); 555 rvAsm->MULW(RARG2, RARG2, RARG3); 556 rvAsm->ADD(RARG1, RARG1, RARG2); 557 rvAsm->SW(RARG1, PTR(&g_state.pending_ticks)); 558 } 559 else 560 { 561 rvAsm->LW(RARG1, PTR(&g_state.pending_ticks)); 562 SafeADDIW(RARG1, RARG1, static_cast<u32>(m_block->uncached_fetch_ticks)); 563 rvAsm->SW(RARG1, PTR(&g_state.pending_ticks)); 564 } 565 } 566 else if (m_block->icache_line_count > 0) 567 { 568 const auto& ticks_reg = RARG1; 569 const auto& current_tag_reg = RARG2; 570 const auto& existing_tag_reg = RARG3; 571 572 VirtualMemoryAddress current_pc = m_block->pc & ICACHE_TAG_ADDRESS_MASK; 573 rvAsm->LW(ticks_reg, PTR(&g_state.pending_ticks)); 574 rvEmitMov(rvAsm, current_tag_reg, current_pc); 575 576 for (u32 i = 0; i < m_block->icache_line_count; i++, current_pc += ICACHE_LINE_SIZE) 577 { 578 const TickCount fill_ticks = GetICacheFillTicks(current_pc); 579 if (fill_ticks <= 0) 580 continue; 581 582 const u32 line = GetICacheLine(current_pc); 583 const u32 offset = OFFSETOF(State, icache_tags) + (line * sizeof(u32)); 584 585 // TODO: Verify sign extension here... 586 Label cache_hit; 587 rvAsm->LW(existing_tag_reg, offset, RSTATE); 588 rvAsm->BEQ(existing_tag_reg, current_tag_reg, &cache_hit); 589 590 rvAsm->SW(current_tag_reg, offset, RSTATE); 591 SafeADDIW(ticks_reg, ticks_reg, static_cast<u32>(fill_ticks)); 592 rvAsm->Bind(&cache_hit); 593 594 if (i != (m_block->icache_line_count - 1)) 595 SafeADDIW(current_tag_reg, current_tag_reg, ICACHE_LINE_SIZE); 596 } 597 598 rvAsm->SW(ticks_reg, PTR(&g_state.pending_ticks)); 599 } 600 } 601 602 void CPU::NewRec::RISCV64Compiler::GenerateCall(const void* func, s32 arg1reg /*= -1*/, s32 arg2reg /*= -1*/, 603 s32 arg3reg /*= -1*/) 604 { 605 if (arg1reg >= 0 && arg1reg != static_cast<s32>(RARG1.Index())) 606 rvAsm->MV(RARG1, GPR(arg1reg)); 607 if (arg2reg >= 0 && arg2reg != static_cast<s32>(RARG2.Index())) 608 rvAsm->MV(RARG2, GPR(arg2reg)); 609 if (arg3reg >= 0 && arg3reg != static_cast<s32>(RARG3.Index())) 610 rvAsm->MV(RARG3, GPR(arg3reg)); 611 EmitCall(func); 612 } 613 614 void CPU::NewRec::RISCV64Compiler::EndBlock(const std::optional<u32>& newpc, bool do_event_test) 615 { 616 if (newpc.has_value()) 617 { 618 if (m_dirty_pc || m_compiler_pc != newpc) 619 { 620 EmitMov(RSCRATCH, newpc.value()); 621 rvAsm->SW(RSCRATCH, PTR(&g_state.pc)); 622 } 623 } 624 m_dirty_pc = false; 625 626 // flush regs 627 Flush(FLUSH_END_BLOCK); 628 EndAndLinkBlock(newpc, do_event_test, false); 629 } 630 631 void CPU::NewRec::RISCV64Compiler::EndBlockWithException(Exception excode) 632 { 633 // flush regs, but not pc, it's going to get overwritten 634 // flush cycles because of the GTE instruction stuff... 635 Flush(FLUSH_END_BLOCK | FLUSH_FOR_EXCEPTION | FLUSH_FOR_C_CALL); 636 637 // TODO: flush load delay 638 // TODO: break for pcdrv 639 640 EmitMov(RARG1, Cop0Registers::CAUSE::MakeValueForException(excode, m_current_instruction_branch_delay_slot, false, 641 inst->cop.cop_n)); 642 EmitMov(RARG2, m_current_instruction_pc); 643 EmitCall(reinterpret_cast<const void*>(static_cast<void (*)(u32, u32)>(&CPU::RaiseException))); 644 m_dirty_pc = false; 645 646 EndAndLinkBlock(std::nullopt, true, false); 647 } 648 649 void CPU::NewRec::RISCV64Compiler::EndAndLinkBlock(const std::optional<u32>& newpc, bool do_event_test, 650 bool force_run_events) 651 { 652 // event test 653 // pc should've been flushed 654 DebugAssert(!m_dirty_pc && !m_block_ended); 655 m_block_ended = true; 656 657 // TODO: try extracting this to a function 658 // TODO: move the cycle flush in here.. 659 660 // save cycles for event test 661 const TickCount cycles = std::exchange(m_cycles, 0); 662 663 // pending_ticks += cycles 664 // if (pending_ticks >= downcount) { dispatch_event(); } 665 if (do_event_test || m_gte_done_cycle > cycles || cycles > 0) 666 rvAsm->LW(RARG1, PTR(&g_state.pending_ticks)); 667 if (do_event_test) 668 rvAsm->LW(RARG2, PTR(&g_state.downcount)); 669 if (cycles > 0) 670 { 671 SafeADDIW(RARG1, RARG1, cycles); 672 rvAsm->SW(RARG1, PTR(&g_state.pending_ticks)); 673 } 674 if (m_gte_done_cycle > cycles) 675 { 676 SafeADDIW(RARG2, RARG1, m_gte_done_cycle - cycles); 677 rvAsm->SW(RARG1, PTR(&g_state.gte_completion_tick)); 678 } 679 680 if (do_event_test) 681 { 682 // TODO: see if we can do a far jump somehow with this.. 683 Label cont; 684 rvAsm->BLT(RARG1, RARG2, &cont); 685 rvEmitJmp(rvAsm, CodeCache::g_run_events_and_dispatch); 686 rvAsm->Bind(&cont); 687 } 688 689 // jump to dispatcher or next block 690 if (force_run_events) 691 { 692 rvEmitJmp(rvAsm, CodeCache::g_run_events_and_dispatch); 693 } 694 else if (!newpc.has_value()) 695 { 696 rvEmitJmp(rvAsm, CodeCache::g_dispatcher); 697 } 698 else 699 { 700 if (newpc.value() == m_block->pc) 701 { 702 // Special case: ourselves! No need to backlink then. 703 DEBUG_LOG("Linking block at {:08X} to self", m_block->pc); 704 rvEmitJmp(rvAsm, rvAsm->GetBufferPointer(0)); 705 } 706 else 707 { 708 const void* target = CreateBlockLink(m_block, rvAsm->GetCursorPointer(), newpc.value()); 709 rvEmitJmp(rvAsm, target); 710 } 711 } 712 } 713 714 const void* CPU::NewRec::RISCV64Compiler::EndCompile(u32* code_size, u32* far_code_size) 715 { 716 u8* const code = m_emitter->GetBufferPointer(0); 717 *code_size = static_cast<u32>(m_emitter->GetCodeBuffer().GetSizeInBytes()); 718 *far_code_size = static_cast<u32>(m_far_emitter->GetCodeBuffer().GetSizeInBytes()); 719 rvAsm = nullptr; 720 m_far_emitter.reset(); 721 m_emitter.reset(); 722 return code; 723 } 724 725 const char* CPU::NewRec::RISCV64Compiler::GetHostRegName(u32 reg) const 726 { 727 static constexpr std::array<const char*, 32> reg64_names = { 728 {"zero", "ra", "sp", "gp", "tp", "t0", "t1", "t2", "s0", "s1", "a0", "a1", "a2", "a3", "a4", "a5", 729 "a6", "a7", "s2", "s3", "s4", "s5", "s6", "s7", "s8", "s9", "s10", "s11", "t3", "t4", "t5", "t6"}}; 730 return (reg < reg64_names.size()) ? reg64_names[reg] : "UNKNOWN"; 731 } 732 733 void CPU::NewRec::RISCV64Compiler::LoadHostRegWithConstant(u32 reg, u32 val) 734 { 735 EmitMov(GPR(reg), val); 736 } 737 738 void CPU::NewRec::RISCV64Compiler::LoadHostRegFromCPUPointer(u32 reg, const void* ptr) 739 { 740 rvAsm->LW(GPR(reg), PTR(ptr)); 741 } 742 743 void CPU::NewRec::RISCV64Compiler::StoreHostRegToCPUPointer(u32 reg, const void* ptr) 744 { 745 rvAsm->SW(GPR(reg), PTR(ptr)); 746 } 747 748 void CPU::NewRec::RISCV64Compiler::StoreConstantToCPUPointer(u32 val, const void* ptr) 749 { 750 if (val == 0) 751 { 752 rvAsm->SW(zero, PTR(ptr)); 753 return; 754 } 755 756 EmitMov(RSCRATCH, val); 757 rvAsm->SW(RSCRATCH, PTR(ptr)); 758 } 759 760 void CPU::NewRec::RISCV64Compiler::CopyHostReg(u32 dst, u32 src) 761 { 762 if (src != dst) 763 rvAsm->MV(GPR(dst), GPR(src)); 764 } 765 766 void CPU::NewRec::RISCV64Compiler::AssertRegOrConstS(CompileFlags cf) const 767 { 768 DebugAssert(cf.valid_host_s || cf.const_s); 769 } 770 771 void CPU::NewRec::RISCV64Compiler::AssertRegOrConstT(CompileFlags cf) const 772 { 773 DebugAssert(cf.valid_host_t || cf.const_t); 774 } 775 776 biscuit::GPR CPU::NewRec::RISCV64Compiler::CFGetSafeRegS(CompileFlags cf, const biscuit::GPR& temp_reg) 777 { 778 if (cf.valid_host_s) 779 { 780 return GPR(cf.host_s); 781 } 782 else if (cf.const_s) 783 { 784 if (HasConstantRegValue(cf.MipsS(), 0)) 785 return zero; 786 787 EmitMov(temp_reg, GetConstantRegU32(cf.MipsS())); 788 return temp_reg; 789 } 790 else 791 { 792 WARNING_LOG("Hit memory path in CFGetSafeRegS() for {}", GetRegName(cf.MipsS())); 793 rvAsm->LW(temp_reg, PTR(&g_state.regs.r[cf.mips_s])); 794 return temp_reg; 795 } 796 } 797 798 biscuit::GPR CPU::NewRec::RISCV64Compiler::CFGetSafeRegT(CompileFlags cf, const biscuit::GPR& temp_reg) 799 { 800 if (cf.valid_host_t) 801 { 802 return GPR(cf.host_t); 803 } 804 else if (cf.const_t) 805 { 806 if (HasConstantRegValue(cf.MipsT(), 0)) 807 return zero; 808 809 EmitMov(temp_reg, GetConstantRegU32(cf.MipsT())); 810 return temp_reg; 811 } 812 else 813 { 814 WARNING_LOG("Hit memory path in CFGetSafeRegT() for {}", GetRegName(cf.MipsT())); 815 rvAsm->LW(temp_reg, PTR(&g_state.regs.r[cf.mips_t])); 816 return temp_reg; 817 } 818 } 819 820 biscuit::GPR CPU::NewRec::RISCV64Compiler::CFGetRegD(CompileFlags cf) const 821 { 822 DebugAssert(cf.valid_host_d); 823 return GPR(cf.host_d); 824 } 825 826 biscuit::GPR CPU::NewRec::RISCV64Compiler::CFGetRegS(CompileFlags cf) const 827 { 828 DebugAssert(cf.valid_host_s); 829 return GPR(cf.host_s); 830 } 831 832 biscuit::GPR CPU::NewRec::RISCV64Compiler::CFGetRegT(CompileFlags cf) const 833 { 834 DebugAssert(cf.valid_host_t); 835 return GPR(cf.host_t); 836 } 837 838 biscuit::GPR CPU::NewRec::RISCV64Compiler::CFGetRegLO(CompileFlags cf) const 839 { 840 DebugAssert(cf.valid_host_lo); 841 return GPR(cf.host_lo); 842 } 843 844 biscuit::GPR CPU::NewRec::RISCV64Compiler::CFGetRegHI(CompileFlags cf) const 845 { 846 DebugAssert(cf.valid_host_hi); 847 return GPR(cf.host_hi); 848 } 849 850 void CPU::NewRec::RISCV64Compiler::MoveSToReg(const biscuit::GPR& dst, CompileFlags cf) 851 { 852 if (cf.valid_host_s) 853 { 854 if (cf.host_s != dst.Index()) 855 rvAsm->MV(dst, GPR(cf.host_s)); 856 } 857 else if (cf.const_s) 858 { 859 EmitMov(dst, GetConstantRegU32(cf.MipsS())); 860 } 861 else 862 { 863 WARNING_LOG("Hit memory path in MoveSToReg() for {}", GetRegName(cf.MipsS())); 864 rvAsm->LW(dst, PTR(&g_state.regs.r[cf.mips_s])); 865 } 866 } 867 868 void CPU::NewRec::RISCV64Compiler::MoveTToReg(const biscuit::GPR& dst, CompileFlags cf) 869 { 870 if (cf.valid_host_t) 871 { 872 if (cf.host_t != dst.Index()) 873 rvAsm->MV(dst, GPR(cf.host_t)); 874 } 875 else if (cf.const_t) 876 { 877 EmitMov(dst, GetConstantRegU32(cf.MipsT())); 878 } 879 else 880 { 881 WARNING_LOG("Hit memory path in MoveTToReg() for {}", GetRegName(cf.MipsT())); 882 rvAsm->LW(dst, PTR(&g_state.regs.r[cf.mips_t])); 883 } 884 } 885 886 void CPU::NewRec::RISCV64Compiler::MoveMIPSRegToReg(const biscuit::GPR& dst, Reg reg) 887 { 888 DebugAssert(reg < Reg::count); 889 if (const std::optional<u32> hreg = CheckHostReg(0, Compiler::HR_TYPE_CPU_REG, reg)) 890 rvAsm->MV(dst, GPR(hreg.value())); 891 else if (HasConstantReg(reg)) 892 EmitMov(dst, GetConstantRegU32(reg)); 893 else 894 rvAsm->LW(dst, PTR(&g_state.regs.r[static_cast<u8>(reg)])); 895 } 896 897 void CPU::NewRec::RISCV64Compiler::GeneratePGXPCallWithMIPSRegs(const void* func, u32 arg1val, 898 Reg arg2reg /* = Reg::count */, 899 Reg arg3reg /* = Reg::count */) 900 { 901 DebugAssert(g_settings.gpu_pgxp_enable); 902 903 Flush(FLUSH_FOR_C_CALL); 904 905 if (arg2reg != Reg::count) 906 MoveMIPSRegToReg(RARG2, arg2reg); 907 if (arg3reg != Reg::count) 908 MoveMIPSRegToReg(RARG3, arg3reg); 909 910 EmitMov(RARG1, arg1val); 911 EmitCall(func); 912 } 913 914 void CPU::NewRec::RISCV64Compiler::Flush(u32 flags) 915 { 916 Compiler::Flush(flags); 917 918 if (flags & FLUSH_PC && m_dirty_pc) 919 { 920 StoreConstantToCPUPointer(m_compiler_pc, &g_state.pc); 921 m_dirty_pc = false; 922 } 923 924 if (flags & FLUSH_INSTRUCTION_BITS) 925 { 926 // This sucks, but it's only used for fallbacks. 927 Panic("Not implemented"); 928 } 929 930 if (flags & FLUSH_LOAD_DELAY_FROM_STATE && m_load_delay_dirty) 931 { 932 // This sucks :( 933 // TODO: make it a function? 934 rvAsm->LBU(RARG1, PTR(&g_state.load_delay_reg)); 935 rvAsm->LW(RARG2, PTR(&g_state.load_delay_value)); 936 rvAsm->SLLI(RARG1, RARG1, 2); // *4 937 rvAsm->ADD(RARG1, RARG1, RSTATE); 938 rvAsm->SW(RARG2, OFFSETOF(CPU::State, regs.r[0]), RARG1); 939 rvAsm->LI(RSCRATCH, static_cast<u8>(Reg::count)); 940 rvAsm->SB(RSCRATCH, PTR(&g_state.load_delay_reg)); 941 m_load_delay_dirty = false; 942 } 943 944 if (flags & FLUSH_LOAD_DELAY && m_load_delay_register != Reg::count) 945 { 946 if (m_load_delay_value_register != NUM_HOST_REGS) 947 FreeHostReg(m_load_delay_value_register); 948 949 EmitMov(RSCRATCH, static_cast<u8>(m_load_delay_register)); 950 rvAsm->SB(RSCRATCH, PTR(&g_state.load_delay_reg)); 951 m_load_delay_register = Reg::count; 952 m_load_delay_dirty = true; 953 } 954 955 if (flags & FLUSH_GTE_STALL_FROM_STATE && m_dirty_gte_done_cycle) 956 { 957 // May as well flush cycles while we're here. 958 // GTE spanning blocks is very rare, we _could_ disable this for speed. 959 rvAsm->LW(RARG1, PTR(&g_state.pending_ticks)); 960 rvAsm->LW(RARG2, PTR(&g_state.gte_completion_tick)); 961 if (m_cycles > 0) 962 { 963 SafeADDIW(RARG1, RARG1, m_cycles); 964 m_cycles = 0; 965 } 966 Label no_stall; 967 rvAsm->BGE(RARG1, RARG2, &no_stall); 968 rvAsm->MV(RARG1, RARG2); 969 rvAsm->Bind(&no_stall); 970 rvAsm->SW(RARG1, PTR(&g_state.pending_ticks)); 971 m_dirty_gte_done_cycle = false; 972 } 973 974 if (flags & FLUSH_GTE_DONE_CYCLE && m_gte_done_cycle > m_cycles) 975 { 976 rvAsm->LW(RARG1, PTR(&g_state.pending_ticks)); 977 978 // update cycles at the same time 979 if (flags & FLUSH_CYCLES && m_cycles > 0) 980 { 981 SafeADDIW(RARG1, RARG1, m_cycles); 982 rvAsm->SW(RARG1, PTR(&g_state.pending_ticks)); 983 m_gte_done_cycle -= m_cycles; 984 m_cycles = 0; 985 } 986 987 SafeADDIW(RARG1, RARG1, m_gte_done_cycle); 988 rvAsm->SW(RARG1, PTR(&g_state.gte_completion_tick)); 989 m_gte_done_cycle = 0; 990 m_dirty_gte_done_cycle = true; 991 } 992 993 if (flags & FLUSH_CYCLES && m_cycles > 0) 994 { 995 rvAsm->LW(RARG1, PTR(&g_state.pending_ticks)); 996 SafeADDIW(RARG1, RARG1, m_cycles); 997 rvAsm->SW(RARG1, PTR(&g_state.pending_ticks)); 998 m_gte_done_cycle = std::max<TickCount>(m_gte_done_cycle - m_cycles, 0); 999 m_cycles = 0; 1000 } 1001 } 1002 1003 void CPU::NewRec::RISCV64Compiler::Compile_Fallback() 1004 { 1005 WARNING_LOG("Compiling instruction fallback at PC=0x{:08X}, instruction=0x{:08X}", iinfo->pc, inst->bits); 1006 1007 Flush(FLUSH_FOR_INTERPRETER); 1008 1009 #if 0 1010 cg->call(&CPU::Recompiler::Thunks::InterpretInstruction); 1011 1012 // TODO: make me less garbage 1013 // TODO: this is wrong, it flushes the load delay on the same cycle when we return. 1014 // but nothing should be going through here.. 1015 Label no_load_delay; 1016 cg->movzx(RWARG1, cg->byte[PTR(&g_state.next_load_delay_reg)]); 1017 cg->cmp(RWARG1, static_cast<u8>(Reg::count)); 1018 cg->je(no_load_delay, CodeGenerator::T_SHORT); 1019 cg->mov(RWARG2, cg->dword[PTR(&g_state.next_load_delay_value)]); 1020 cg->mov(cg->byte[PTR(&g_state.load_delay_reg)], RWARG1); 1021 cg->mov(cg->dword[PTR(&g_state.load_delay_value)], RWARG2); 1022 cg->mov(cg->byte[PTR(&g_state.next_load_delay_reg)], static_cast<u32>(Reg::count)); 1023 cg->L(no_load_delay); 1024 1025 m_load_delay_dirty = EMULATE_LOAD_DELAYS; 1026 #else 1027 Panic("Fixme"); 1028 #endif 1029 } 1030 1031 void CPU::NewRec::RISCV64Compiler::CheckBranchTarget(const biscuit::GPR& pcreg) 1032 { 1033 if (!g_settings.cpu_recompiler_memory_exceptions) 1034 return; 1035 1036 DebugAssert(pcreg != RSCRATCH); 1037 rvAsm->ANDI(RSCRATCH, pcreg, 0x3); 1038 SwitchToFarCode(true, &Assembler::BEQ, RSCRATCH, zero); 1039 1040 BackupHostState(); 1041 EndBlockWithException(Exception::AdEL); 1042 1043 RestoreHostState(); 1044 SwitchToNearCode(false); 1045 } 1046 1047 void CPU::NewRec::RISCV64Compiler::Compile_jr(CompileFlags cf) 1048 { 1049 const GPR pcreg = CFGetRegS(cf); 1050 CheckBranchTarget(pcreg); 1051 1052 rvAsm->SW(pcreg, PTR(&g_state.pc)); 1053 1054 CompileBranchDelaySlot(false); 1055 EndBlock(std::nullopt, true); 1056 } 1057 1058 void CPU::NewRec::RISCV64Compiler::Compile_jalr(CompileFlags cf) 1059 { 1060 const GPR pcreg = CFGetRegS(cf); 1061 if (MipsD() != Reg::zero) 1062 SetConstantReg(MipsD(), GetBranchReturnAddress(cf)); 1063 1064 CheckBranchTarget(pcreg); 1065 rvAsm->SW(pcreg, PTR(&g_state.pc)); 1066 1067 CompileBranchDelaySlot(false); 1068 EndBlock(std::nullopt, true); 1069 } 1070 1071 void CPU::NewRec::RISCV64Compiler::Compile_bxx(CompileFlags cf, BranchCondition cond) 1072 { 1073 AssertRegOrConstS(cf); 1074 1075 const u32 taken_pc = GetConditionalBranchTarget(cf); 1076 1077 Flush(FLUSH_FOR_BRANCH); 1078 1079 DebugAssert(cf.valid_host_s); 1080 1081 // MipsT() here should equal zero for zero branches. 1082 DebugAssert(cond == BranchCondition::Equal || cond == BranchCondition::NotEqual || cf.MipsT() == Reg::zero); 1083 1084 Label taken; 1085 const GPR rs = CFGetRegS(cf); 1086 switch (cond) 1087 { 1088 case BranchCondition::Equal: 1089 case BranchCondition::NotEqual: 1090 { 1091 AssertRegOrConstT(cf); 1092 if (cf.const_t && HasConstantRegValue(cf.MipsT(), 0)) 1093 { 1094 (cond == BranchCondition::Equal) ? rvAsm->BEQZ(rs, &taken) : rvAsm->BNEZ(rs, &taken); 1095 } 1096 else 1097 { 1098 const GPR rt = cf.valid_host_t ? CFGetRegT(cf) : RARG1; 1099 if (!cf.valid_host_t) 1100 MoveTToReg(RARG1, cf); 1101 if (cond == Compiler::BranchCondition::Equal) 1102 rvAsm->BEQ(rs, rt, &taken); 1103 else 1104 rvAsm->BNE(rs, rt, &taken); 1105 } 1106 } 1107 break; 1108 1109 case BranchCondition::GreaterThanZero: 1110 { 1111 rvAsm->BGTZ(rs, &taken); 1112 } 1113 break; 1114 1115 case BranchCondition::GreaterEqualZero: 1116 { 1117 rvAsm->BGEZ(rs, &taken); 1118 } 1119 break; 1120 1121 case BranchCondition::LessThanZero: 1122 { 1123 rvAsm->BLTZ(rs, &taken); 1124 } 1125 break; 1126 1127 case BranchCondition::LessEqualZero: 1128 { 1129 rvAsm->BLEZ(rs, &taken); 1130 } 1131 break; 1132 } 1133 1134 BackupHostState(); 1135 if (!cf.delay_slot_swapped) 1136 CompileBranchDelaySlot(); 1137 1138 EndBlock(m_compiler_pc, true); 1139 1140 rvAsm->Bind(&taken); 1141 1142 RestoreHostState(); 1143 if (!cf.delay_slot_swapped) 1144 CompileBranchDelaySlot(); 1145 1146 EndBlock(taken_pc, true); 1147 } 1148 1149 void CPU::NewRec::RISCV64Compiler::Compile_addi(CompileFlags cf, bool overflow) 1150 { 1151 const GPR rs = CFGetRegS(cf); 1152 const GPR rt = CFGetRegT(cf); 1153 if (const u32 imm = inst->i.imm_sext32(); imm != 0) 1154 { 1155 if (!overflow) 1156 { 1157 SafeADDIW(rt, rs, imm); 1158 } 1159 else 1160 { 1161 SafeADDI(RARG1, rs, imm); 1162 SafeADDIW(rt, rs, imm); 1163 TestOverflow(RARG1, rt, rt); 1164 } 1165 } 1166 else if (rt.Index() != rs.Index()) 1167 { 1168 rvAsm->MV(rt, rs); 1169 } 1170 } 1171 1172 void CPU::NewRec::RISCV64Compiler::Compile_addi(CompileFlags cf) 1173 { 1174 Compile_addi(cf, g_settings.cpu_recompiler_memory_exceptions); 1175 } 1176 1177 void CPU::NewRec::RISCV64Compiler::Compile_addiu(CompileFlags cf) 1178 { 1179 Compile_addi(cf, false); 1180 } 1181 1182 void CPU::NewRec::RISCV64Compiler::Compile_slti(CompileFlags cf) 1183 { 1184 Compile_slti(cf, true); 1185 } 1186 1187 void CPU::NewRec::RISCV64Compiler::Compile_sltiu(CompileFlags cf) 1188 { 1189 Compile_slti(cf, false); 1190 } 1191 1192 void CPU::NewRec::RISCV64Compiler::Compile_slti(CompileFlags cf, bool sign) 1193 { 1194 if (sign) 1195 SafeSLTI(CFGetRegT(cf), CFGetRegS(cf), inst->i.imm_sext32()); 1196 else 1197 SafeSLTIU(CFGetRegT(cf), CFGetRegS(cf), inst->i.imm_sext32()); 1198 } 1199 1200 void CPU::NewRec::RISCV64Compiler::Compile_andi(CompileFlags cf) 1201 { 1202 const GPR rt = CFGetRegT(cf); 1203 if (const u32 imm = inst->i.imm_zext32(); imm != 0) 1204 SafeANDI(rt, CFGetRegS(cf), imm); 1205 else 1206 EmitMov(rt, 0); 1207 } 1208 1209 void CPU::NewRec::RISCV64Compiler::Compile_ori(CompileFlags cf) 1210 { 1211 const GPR rt = CFGetRegT(cf); 1212 const GPR rs = CFGetRegS(cf); 1213 if (const u32 imm = inst->i.imm_zext32(); imm != 0) 1214 SafeORI(rt, rs, imm); 1215 else if (rt.Index() != rs.Index()) 1216 rvAsm->MV(rt, rs); 1217 } 1218 1219 void CPU::NewRec::RISCV64Compiler::Compile_xori(CompileFlags cf) 1220 { 1221 const GPR rt = CFGetRegT(cf); 1222 const GPR rs = CFGetRegS(cf); 1223 if (const u32 imm = inst->i.imm_zext32(); imm != 0) 1224 SafeXORI(rt, rs, imm); 1225 else if (rt.Index() != rs.Index()) 1226 rvAsm->MV(rt, rs); 1227 } 1228 1229 void CPU::NewRec::RISCV64Compiler::Compile_shift( 1230 CompileFlags cf, void (biscuit::Assembler::*op)(biscuit::GPR, biscuit::GPR, biscuit::GPR), 1231 void (biscuit::Assembler::*op_const)(biscuit::GPR, biscuit::GPR, unsigned)) 1232 { 1233 const GPR rd = CFGetRegD(cf); 1234 const GPR rt = CFGetRegT(cf); 1235 if (inst->r.shamt > 0) 1236 (rvAsm->*op_const)(rd, rt, inst->r.shamt); 1237 else if (rd.Index() != rt.Index()) 1238 rvAsm->MV(rd, rt); 1239 } 1240 1241 void CPU::NewRec::RISCV64Compiler::Compile_sll(CompileFlags cf) 1242 { 1243 Compile_shift(cf, &Assembler::SLLW, &Assembler::SLLIW); 1244 } 1245 1246 void CPU::NewRec::RISCV64Compiler::Compile_srl(CompileFlags cf) 1247 { 1248 Compile_shift(cf, &Assembler::SRLW, &Assembler::SRLIW); 1249 } 1250 1251 void CPU::NewRec::RISCV64Compiler::Compile_sra(CompileFlags cf) 1252 { 1253 Compile_shift(cf, &Assembler::SRAW, &Assembler::SRAIW); 1254 } 1255 1256 void CPU::NewRec::RISCV64Compiler::Compile_variable_shift( 1257 CompileFlags cf, void (biscuit::Assembler::*op)(biscuit::GPR, biscuit::GPR, biscuit::GPR), 1258 void (biscuit::Assembler::*op_const)(biscuit::GPR, biscuit::GPR, unsigned)) 1259 { 1260 const GPR rd = CFGetRegD(cf); 1261 1262 AssertRegOrConstS(cf); 1263 AssertRegOrConstT(cf); 1264 1265 const GPR rt = cf.valid_host_t ? CFGetRegT(cf) : RARG2; 1266 if (!cf.valid_host_t) 1267 MoveTToReg(rt, cf); 1268 1269 if (cf.const_s) 1270 { 1271 if (const u32 shift = GetConstantRegU32(cf.MipsS()); shift != 0) 1272 (rvAsm->*op_const)(rd, rt, shift & 31u); 1273 else if (rd.Index() != rt.Index()) 1274 rvAsm->MV(rd, rt); 1275 } 1276 else 1277 { 1278 (rvAsm->*op)(rd, rt, CFGetRegS(cf)); 1279 } 1280 } 1281 1282 void CPU::NewRec::RISCV64Compiler::Compile_sllv(CompileFlags cf) 1283 { 1284 Compile_variable_shift(cf, &Assembler::SLLW, &Assembler::SLLIW); 1285 } 1286 1287 void CPU::NewRec::RISCV64Compiler::Compile_srlv(CompileFlags cf) 1288 { 1289 Compile_variable_shift(cf, &Assembler::SRLW, &Assembler::SRLIW); 1290 } 1291 1292 void CPU::NewRec::RISCV64Compiler::Compile_srav(CompileFlags cf) 1293 { 1294 Compile_variable_shift(cf, &Assembler::SRAW, &Assembler::SRAIW); 1295 } 1296 1297 void CPU::NewRec::RISCV64Compiler::Compile_mult(CompileFlags cf, bool sign) 1298 { 1299 const GPR rs = cf.valid_host_s ? CFGetRegS(cf) : RARG1; 1300 if (!cf.valid_host_s) 1301 MoveSToReg(rs, cf); 1302 1303 const GPR rt = cf.valid_host_t ? CFGetRegT(cf) : RARG2; 1304 if (!cf.valid_host_t) 1305 MoveTToReg(rt, cf); 1306 1307 // TODO: if lo/hi gets killed, we can use a 32-bit multiply 1308 const GPR lo = CFGetRegLO(cf); 1309 const GPR hi = CFGetRegHI(cf); 1310 1311 if (sign) 1312 { 1313 rvAsm->MUL(lo, rs, rt); 1314 rvAsm->SRAI64(hi, lo, 32); 1315 EmitDSExtW(lo, lo); 1316 } 1317 else 1318 { 1319 // Need to make it unsigned. 1320 EmitDUExtW(RARG1, rs); 1321 EmitDUExtW(RARG2, rt); 1322 rvAsm->MUL(lo, RARG1, RARG2); 1323 rvAsm->SRAI64(hi, lo, 32); 1324 EmitDSExtW(lo, lo); 1325 } 1326 } 1327 1328 void CPU::NewRec::RISCV64Compiler::Compile_mult(CompileFlags cf) 1329 { 1330 Compile_mult(cf, true); 1331 } 1332 1333 void CPU::NewRec::RISCV64Compiler::Compile_multu(CompileFlags cf) 1334 { 1335 Compile_mult(cf, false); 1336 } 1337 1338 void CPU::NewRec::RISCV64Compiler::Compile_div(CompileFlags cf) 1339 { 1340 // 36 Volume I: RISC-V User-Level ISA V2.2 1341 const GPR rs = cf.valid_host_s ? CFGetRegS(cf) : RARG1; 1342 if (!cf.valid_host_s) 1343 MoveSToReg(rs, cf); 1344 1345 const GPR rt = cf.valid_host_t ? CFGetRegT(cf) : RARG2; 1346 if (!cf.valid_host_t) 1347 MoveTToReg(rt, cf); 1348 1349 const GPR rlo = CFGetRegLO(cf); 1350 const GPR rhi = CFGetRegHI(cf); 1351 1352 Label done; 1353 Label not_divide_by_zero; 1354 rvAsm->BNEZ(rt, ¬_divide_by_zero); 1355 rvAsm->MV(rhi, rs); // hi = num 1356 rvAsm->SRAI64(rlo, rs, 63); 1357 rvAsm->ANDI(rlo, rlo, 2); 1358 rvAsm->ADDI(rlo, rlo, -1); // lo = s >= 0 ? -1 : 1 1359 rvAsm->J(&done); 1360 1361 rvAsm->Bind(¬_divide_by_zero); 1362 Label not_unrepresentable; 1363 EmitMov(RSCRATCH, static_cast<u32>(-1)); 1364 rvAsm->BNE(rt, RSCRATCH, ¬_unrepresentable); 1365 EmitMov(rlo, 0x80000000u); 1366 rvAsm->BNE(rs, rlo, ¬_unrepresentable); 1367 EmitMov(rhi, 0); 1368 rvAsm->J(&done); 1369 1370 rvAsm->Bind(¬_unrepresentable); 1371 1372 rvAsm->DIVW(rlo, rs, rt); 1373 rvAsm->REMW(rhi, rs, rt); 1374 1375 rvAsm->Bind(&done); 1376 } 1377 1378 void CPU::NewRec::RISCV64Compiler::Compile_divu(CompileFlags cf) 1379 { 1380 const GPR rs = cf.valid_host_s ? CFGetRegS(cf) : RARG1; 1381 if (!cf.valid_host_s) 1382 MoveSToReg(rs, cf); 1383 1384 const GPR rt = cf.valid_host_t ? CFGetRegT(cf) : RARG2; 1385 if (!cf.valid_host_t) 1386 MoveTToReg(rt, cf); 1387 1388 const GPR rlo = CFGetRegLO(cf); 1389 const GPR rhi = CFGetRegHI(cf); 1390 1391 // Semantics match? :-) 1392 rvAsm->DIVUW(rlo, rs, rt); 1393 rvAsm->REMUW(rhi, rs, rt); 1394 } 1395 1396 void CPU::NewRec::RISCV64Compiler::TestOverflow(const biscuit::GPR& long_res, const biscuit::GPR& res, 1397 const biscuit::GPR& reg_to_discard) 1398 { 1399 SwitchToFarCode(true, &Assembler::BEQ, long_res, res); 1400 1401 BackupHostState(); 1402 1403 // toss the result 1404 ClearHostReg(reg_to_discard.Index()); 1405 1406 EndBlockWithException(Exception::Ov); 1407 1408 RestoreHostState(); 1409 1410 SwitchToNearCode(false); 1411 } 1412 1413 void CPU::NewRec::RISCV64Compiler::Compile_dst_op( 1414 CompileFlags cf, void (biscuit::Assembler::*op)(biscuit::GPR, biscuit::GPR, biscuit::GPR), 1415 void (RISCV64Compiler::*op_const)(const biscuit::GPR& rd, const biscuit::GPR& rs, u32 imm), 1416 void (biscuit::Assembler::*op_long)(biscuit::GPR, biscuit::GPR, biscuit::GPR), bool commutative, bool overflow) 1417 { 1418 AssertRegOrConstS(cf); 1419 AssertRegOrConstT(cf); 1420 1421 const GPR rd = CFGetRegD(cf); 1422 1423 if (overflow) 1424 { 1425 const GPR rs = CFGetSafeRegS(cf, RARG1); 1426 const GPR rt = CFGetSafeRegT(cf, RARG2); 1427 (rvAsm->*op)(RARG3, rs, rt); 1428 (rvAsm->*op_long)(rd, rs, rt); 1429 TestOverflow(RARG3, rd, rd); 1430 return; 1431 } 1432 1433 if (cf.valid_host_s && cf.valid_host_t) 1434 { 1435 (rvAsm->*op)(rd, CFGetRegS(cf), CFGetRegT(cf)); 1436 } 1437 else if (commutative && (cf.const_s || cf.const_t)) 1438 { 1439 const GPR src = cf.const_s ? CFGetRegT(cf) : CFGetRegS(cf); 1440 if (const u32 cv = GetConstantRegU32(cf.const_s ? cf.MipsS() : cf.MipsT()); cv != 0) 1441 { 1442 (this->*op_const)(rd, src, cv); 1443 } 1444 else 1445 { 1446 if (rd.Index() != src.Index()) 1447 rvAsm->MV(rd, src); 1448 overflow = false; 1449 } 1450 } 1451 else if (cf.const_s) 1452 { 1453 if (HasConstantRegValue(cf.MipsS(), 0)) 1454 { 1455 (rvAsm->*op)(rd, zero, CFGetRegT(cf)); 1456 } 1457 else 1458 { 1459 EmitMov(RSCRATCH, GetConstantRegU32(cf.MipsS())); 1460 (rvAsm->*op)(rd, RSCRATCH, CFGetRegT(cf)); 1461 } 1462 } 1463 else if (cf.const_t) 1464 { 1465 const GPR rs = CFGetRegS(cf); 1466 if (const u32 cv = GetConstantRegU32(cf.const_s ? cf.MipsS() : cf.MipsT()); cv != 0) 1467 { 1468 (this->*op_const)(rd, rs, cv); 1469 } 1470 else 1471 { 1472 if (rd.Index() != rs.Index()) 1473 rvAsm->MV(rd, rs); 1474 overflow = false; 1475 } 1476 } 1477 } 1478 1479 void CPU::NewRec::RISCV64Compiler::Compile_add(CompileFlags cf) 1480 { 1481 Compile_dst_op(cf, &Assembler::ADDW, &RISCV64Compiler::SafeADDIW, &Assembler::ADD, true, 1482 g_settings.cpu_recompiler_memory_exceptions); 1483 } 1484 1485 void CPU::NewRec::RISCV64Compiler::Compile_addu(CompileFlags cf) 1486 { 1487 Compile_dst_op(cf, &Assembler::ADDW, &RISCV64Compiler::SafeADDIW, &Assembler::ADD, true, false); 1488 } 1489 1490 void CPU::NewRec::RISCV64Compiler::Compile_sub(CompileFlags cf) 1491 { 1492 Compile_dst_op(cf, &Assembler::SUBW, &RISCV64Compiler::SafeSUBIW, &Assembler::SUB, false, 1493 g_settings.cpu_recompiler_memory_exceptions); 1494 } 1495 1496 void CPU::NewRec::RISCV64Compiler::Compile_subu(CompileFlags cf) 1497 { 1498 Compile_dst_op(cf, &Assembler::SUBW, &RISCV64Compiler::SafeSUBIW, &Assembler::SUB, false, false); 1499 } 1500 1501 void CPU::NewRec::RISCV64Compiler::Compile_and(CompileFlags cf) 1502 { 1503 AssertRegOrConstS(cf); 1504 AssertRegOrConstT(cf); 1505 1506 // special cases - and with self -> self, and with 0 -> 0 1507 const GPR regd = CFGetRegD(cf); 1508 if (cf.MipsS() == cf.MipsT()) 1509 { 1510 rvAsm->MV(regd, CFGetRegS(cf)); 1511 return; 1512 } 1513 else if (HasConstantRegValue(cf.MipsS(), 0) || HasConstantRegValue(cf.MipsT(), 0)) 1514 { 1515 EmitMov(regd, 0); 1516 return; 1517 } 1518 1519 Compile_dst_op(cf, &Assembler::AND, &RISCV64Compiler::SafeANDI, &Assembler::AND, true, false); 1520 } 1521 1522 void CPU::NewRec::RISCV64Compiler::Compile_or(CompileFlags cf) 1523 { 1524 AssertRegOrConstS(cf); 1525 AssertRegOrConstT(cf); 1526 1527 // or/nor with 0 -> no effect 1528 const GPR regd = CFGetRegD(cf); 1529 if (HasConstantRegValue(cf.MipsS(), 0) || HasConstantRegValue(cf.MipsT(), 0) || cf.MipsS() == cf.MipsT()) 1530 { 1531 cf.const_s ? MoveTToReg(regd, cf) : MoveSToReg(regd, cf); 1532 return; 1533 } 1534 1535 Compile_dst_op(cf, &Assembler::OR, &RISCV64Compiler::SafeORI, &Assembler::OR, true, false); 1536 } 1537 1538 void CPU::NewRec::RISCV64Compiler::Compile_xor(CompileFlags cf) 1539 { 1540 AssertRegOrConstS(cf); 1541 AssertRegOrConstT(cf); 1542 1543 const GPR regd = CFGetRegD(cf); 1544 if (cf.MipsS() == cf.MipsT()) 1545 { 1546 // xor with self -> zero 1547 EmitMov(regd, 0); 1548 return; 1549 } 1550 else if (HasConstantRegValue(cf.MipsS(), 0) || HasConstantRegValue(cf.MipsT(), 0)) 1551 { 1552 // xor with zero -> no effect 1553 cf.const_s ? MoveTToReg(regd, cf) : MoveSToReg(regd, cf); 1554 return; 1555 } 1556 1557 Compile_dst_op(cf, &Assembler::XOR, &RISCV64Compiler::SafeXORI, &Assembler::XOR, true, false); 1558 } 1559 1560 void CPU::NewRec::RISCV64Compiler::Compile_nor(CompileFlags cf) 1561 { 1562 Compile_or(cf); 1563 rvAsm->NOT(CFGetRegD(cf), CFGetRegD(cf)); 1564 } 1565 1566 void CPU::NewRec::RISCV64Compiler::Compile_slt(CompileFlags cf) 1567 { 1568 Compile_slt(cf, true); 1569 } 1570 1571 void CPU::NewRec::RISCV64Compiler::Compile_sltu(CompileFlags cf) 1572 { 1573 Compile_slt(cf, false); 1574 } 1575 1576 void CPU::NewRec::RISCV64Compiler::Compile_slt(CompileFlags cf, bool sign) 1577 { 1578 AssertRegOrConstS(cf); 1579 AssertRegOrConstT(cf); 1580 1581 const GPR rd = CFGetRegD(cf); 1582 const GPR rs = CFGetSafeRegS(cf, RARG1); 1583 1584 if (cf.const_t && rvIsValidSExtITypeImm(GetConstantRegU32(cf.MipsT()))) 1585 { 1586 if (sign) 1587 rvAsm->SLTI(rd, rs, GetConstantRegS32(cf.MipsT())); 1588 else 1589 rvAsm->SLTIU(rd, rs, GetConstantRegS32(cf.MipsT())); 1590 } 1591 else 1592 { 1593 const GPR rt = CFGetSafeRegT(cf, RARG2); 1594 if (sign) 1595 rvAsm->SLT(rd, rs, rt); 1596 else 1597 rvAsm->SLTU(rd, rs, rt); 1598 } 1599 } 1600 1601 biscuit::GPR CPU::NewRec::RISCV64Compiler::ComputeLoadStoreAddressArg( 1602 CompileFlags cf, const std::optional<VirtualMemoryAddress>& address, const std::optional<const biscuit::GPR>& reg) 1603 { 1604 const u32 imm = inst->i.imm_sext32(); 1605 if (cf.valid_host_s && imm == 0 && !reg.has_value()) 1606 return CFGetRegS(cf); 1607 1608 const GPR dst = reg.has_value() ? reg.value() : RARG1; 1609 if (address.has_value()) 1610 { 1611 EmitMov(dst, address.value()); 1612 } 1613 else if (imm == 0) 1614 { 1615 if (cf.valid_host_s) 1616 { 1617 if (const GPR src = CFGetRegS(cf); src.Index() != dst.Index()) 1618 rvAsm->MV(dst, CFGetRegS(cf)); 1619 } 1620 else 1621 { 1622 rvAsm->LW(dst, PTR(&g_state.regs.r[cf.mips_s])); 1623 } 1624 } 1625 else 1626 { 1627 if (cf.valid_host_s) 1628 { 1629 SafeADDIW(dst, CFGetRegS(cf), inst->i.imm_sext32()); 1630 } 1631 else 1632 { 1633 rvAsm->LW(dst, PTR(&g_state.regs.r[cf.mips_s])); 1634 SafeADDIW(dst, dst, inst->i.imm_sext32()); 1635 } 1636 } 1637 1638 return dst; 1639 } 1640 1641 template<typename RegAllocFn> 1642 biscuit::GPR CPU::NewRec::RISCV64Compiler::GenerateLoad(const biscuit::GPR& addr_reg, MemoryAccessSize size, bool sign, 1643 bool use_fastmem, const RegAllocFn& dst_reg_alloc) 1644 { 1645 if (use_fastmem) 1646 { 1647 m_cycles += Bus::RAM_READ_TICKS; 1648 1649 // TODO: Make this better. If we're loading the address from state, we can use LWU instead, and skip this. 1650 // TODO: LUT fastmem 1651 const GPR dst = dst_reg_alloc(); 1652 rvAsm->SLLI64(RSCRATCH, addr_reg, 32); 1653 rvAsm->SRLI64(RSCRATCH, RSCRATCH, 32); 1654 1655 if (g_settings.cpu_fastmem_mode == CPUFastmemMode::LUT) 1656 { 1657 DebugAssert(addr_reg.Index() != RARG3.Index()); 1658 rvAsm->SRLI64(RARG3, RSCRATCH, Bus::FASTMEM_LUT_PAGE_SHIFT); 1659 rvAsm->SLLI64(RARG3, RARG3, 8); 1660 rvAsm->ADD(RARG3, RARG3, RMEMBASE); 1661 rvAsm->LD(RARG3, 0, RARG3); 1662 rvAsm->ADD(RSCRATCH, RSCRATCH, RARG3); 1663 } 1664 else 1665 { 1666 rvAsm->ADD(RSCRATCH, RSCRATCH, RMEMBASE); 1667 } 1668 1669 u8* start = m_emitter->GetCursorPointer(); 1670 switch (size) 1671 { 1672 case MemoryAccessSize::Byte: 1673 sign ? rvAsm->LB(dst, 0, RSCRATCH) : rvAsm->LBU(dst, 0, RSCRATCH); 1674 break; 1675 1676 case MemoryAccessSize::HalfWord: 1677 sign ? rvAsm->LH(dst, 0, RSCRATCH) : rvAsm->LHU(dst, 0, RSCRATCH); 1678 break; 1679 1680 case MemoryAccessSize::Word: 1681 rvAsm->LW(dst, 0, RSCRATCH); 1682 break; 1683 } 1684 1685 // We need a nop, because the slowmem jump might be more than 1MB away. 1686 rvAsm->NOP(); 1687 1688 AddLoadStoreInfo(start, 8, addr_reg.Index(), dst.Index(), size, sign, true); 1689 return dst; 1690 } 1691 1692 if (addr_reg.Index() != RARG1.Index()) 1693 rvAsm->MV(RARG1, addr_reg); 1694 1695 const bool checked = g_settings.cpu_recompiler_memory_exceptions; 1696 switch (size) 1697 { 1698 case MemoryAccessSize::Byte: 1699 { 1700 EmitCall(checked ? reinterpret_cast<const void*>(&Recompiler::Thunks::ReadMemoryByte) : 1701 reinterpret_cast<const void*>(&Recompiler::Thunks::UncheckedReadMemoryByte)); 1702 } 1703 break; 1704 case MemoryAccessSize::HalfWord: 1705 { 1706 EmitCall(checked ? reinterpret_cast<const void*>(&Recompiler::Thunks::ReadMemoryHalfWord) : 1707 reinterpret_cast<const void*>(&Recompiler::Thunks::UncheckedReadMemoryHalfWord)); 1708 } 1709 break; 1710 case MemoryAccessSize::Word: 1711 { 1712 EmitCall(checked ? reinterpret_cast<const void*>(&Recompiler::Thunks::ReadMemoryWord) : 1713 reinterpret_cast<const void*>(&Recompiler::Thunks::UncheckedReadMemoryWord)); 1714 } 1715 break; 1716 } 1717 1718 // TODO: turn this into an asm function instead 1719 if (checked) 1720 { 1721 rvAsm->SRLI64(RSCRATCH, RRET, 63); 1722 SwitchToFarCode(true, &Assembler::BEQ, RSCRATCH, zero); 1723 BackupHostState(); 1724 1725 // Need to stash this in a temp because of the flush. 1726 const GPR temp = GPR(AllocateTempHostReg(HR_CALLEE_SAVED)); 1727 rvAsm->NEG(temp, RRET); 1728 rvAsm->SLLIW(temp, temp, 2); 1729 1730 Flush(FLUSH_FOR_C_CALL | FLUSH_FLUSH_MIPS_REGISTERS | FLUSH_FOR_EXCEPTION); 1731 1732 // cause_bits = (-result << 2) | BD | cop_n 1733 SafeORI(RARG1, temp, 1734 Cop0Registers::CAUSE::MakeValueForException( 1735 static_cast<Exception>(0), m_current_instruction_branch_delay_slot, false, inst->cop.cop_n)); 1736 EmitMov(RARG2, m_current_instruction_pc); 1737 EmitCall(reinterpret_cast<const void*>(static_cast<void (*)(u32, u32)>(&CPU::RaiseException))); 1738 FreeHostReg(temp.Index()); 1739 EndBlock(std::nullopt, true); 1740 1741 RestoreHostState(); 1742 SwitchToNearCode(false); 1743 } 1744 1745 const GPR dst_reg = dst_reg_alloc(); 1746 switch (size) 1747 { 1748 case MemoryAccessSize::Byte: 1749 { 1750 sign ? EmitSExtB(dst_reg, RRET) : EmitUExtB(dst_reg, RRET); 1751 } 1752 break; 1753 case MemoryAccessSize::HalfWord: 1754 { 1755 sign ? EmitSExtH(dst_reg, RRET) : EmitUExtH(dst_reg, RRET); 1756 } 1757 break; 1758 case MemoryAccessSize::Word: 1759 { 1760 // Need to undo the zero-extend. 1761 if (checked) 1762 rvEmitDSExtW(rvAsm, dst_reg, RRET); 1763 else if (dst_reg.Index() != RRET.Index()) 1764 rvAsm->MV(dst_reg, RRET); 1765 } 1766 break; 1767 } 1768 1769 return dst_reg; 1770 } 1771 1772 void CPU::NewRec::RISCV64Compiler::GenerateStore(const biscuit::GPR& addr_reg, const biscuit::GPR& value_reg, 1773 MemoryAccessSize size, bool use_fastmem) 1774 { 1775 if (use_fastmem) 1776 { 1777 DebugAssert(value_reg != RSCRATCH); 1778 rvAsm->SLLI64(RSCRATCH, addr_reg, 32); 1779 rvAsm->SRLI64(RSCRATCH, RSCRATCH, 32); 1780 1781 if (g_settings.cpu_fastmem_mode == CPUFastmemMode::LUT) 1782 { 1783 DebugAssert(addr_reg.Index() != RARG3.Index()); 1784 rvAsm->SRLI64(RARG3, RSCRATCH, Bus::FASTMEM_LUT_PAGE_SHIFT); 1785 rvAsm->SLLI64(RARG3, RARG3, 8); 1786 rvAsm->ADD(RARG3, RARG3, RMEMBASE); 1787 rvAsm->LD(RARG3, 0, RARG3); 1788 rvAsm->ADD(RSCRATCH, RSCRATCH, RARG3); 1789 } 1790 else 1791 { 1792 rvAsm->ADD(RSCRATCH, RSCRATCH, RMEMBASE); 1793 } 1794 1795 u8* start = m_emitter->GetCursorPointer(); 1796 switch (size) 1797 { 1798 case MemoryAccessSize::Byte: 1799 rvAsm->SB(value_reg, 0, RSCRATCH); 1800 break; 1801 1802 case MemoryAccessSize::HalfWord: 1803 rvAsm->SH(value_reg, 0, RSCRATCH); 1804 break; 1805 1806 case MemoryAccessSize::Word: 1807 rvAsm->SW(value_reg, 0, RSCRATCH); 1808 break; 1809 } 1810 1811 // We need a nop, because the slowmem jump might be more than 1MB away. 1812 rvAsm->NOP(); 1813 1814 AddLoadStoreInfo(start, 8, addr_reg.Index(), value_reg.Index(), size, false, false); 1815 return; 1816 } 1817 1818 if (addr_reg.Index() != RARG1.Index()) 1819 rvAsm->MV(RARG1, addr_reg); 1820 if (value_reg.Index() != RARG2.Index()) 1821 rvAsm->MV(RARG2, value_reg); 1822 1823 const bool checked = g_settings.cpu_recompiler_memory_exceptions; 1824 switch (size) 1825 { 1826 case MemoryAccessSize::Byte: 1827 { 1828 EmitCall(checked ? reinterpret_cast<const void*>(&Recompiler::Thunks::WriteMemoryByte) : 1829 reinterpret_cast<const void*>(&Recompiler::Thunks::UncheckedWriteMemoryByte)); 1830 } 1831 break; 1832 case MemoryAccessSize::HalfWord: 1833 { 1834 EmitCall(checked ? reinterpret_cast<const void*>(&Recompiler::Thunks::WriteMemoryHalfWord) : 1835 reinterpret_cast<const void*>(&Recompiler::Thunks::UncheckedWriteMemoryHalfWord)); 1836 } 1837 break; 1838 case MemoryAccessSize::Word: 1839 { 1840 EmitCall(checked ? reinterpret_cast<const void*>(&Recompiler::Thunks::WriteMemoryWord) : 1841 reinterpret_cast<const void*>(&Recompiler::Thunks::UncheckedWriteMemoryWord)); 1842 } 1843 break; 1844 } 1845 1846 // TODO: turn this into an asm function instead 1847 if (checked) 1848 { 1849 SwitchToFarCode(true, &Assembler::BEQ, RRET, zero); 1850 BackupHostState(); 1851 1852 // Need to stash this in a temp because of the flush. 1853 const GPR temp = GPR(AllocateTempHostReg(HR_CALLEE_SAVED)); 1854 rvAsm->SLLIW(temp, RRET, 2); 1855 1856 Flush(FLUSH_FOR_C_CALL | FLUSH_FLUSH_MIPS_REGISTERS | FLUSH_FOR_EXCEPTION); 1857 1858 // cause_bits = (result << 2) | BD | cop_n 1859 SafeORI(RARG1, temp, 1860 Cop0Registers::CAUSE::MakeValueForException( 1861 static_cast<Exception>(0), m_current_instruction_branch_delay_slot, false, inst->cop.cop_n)); 1862 EmitMov(RARG2, m_current_instruction_pc); 1863 EmitCall(reinterpret_cast<const void*>(static_cast<void (*)(u32, u32)>(&CPU::RaiseException))); 1864 FreeHostReg(temp.Index()); 1865 EndBlock(std::nullopt, true); 1866 1867 RestoreHostState(); 1868 SwitchToNearCode(false); 1869 } 1870 } 1871 1872 void CPU::NewRec::RISCV64Compiler::Compile_lxx(CompileFlags cf, MemoryAccessSize size, bool sign, bool use_fastmem, 1873 const std::optional<VirtualMemoryAddress>& address) 1874 { 1875 const std::optional<GPR> addr_reg = (g_settings.gpu_pgxp_enable && cf.MipsT() != Reg::zero) ? 1876 std::optional<GPR>(GPR(AllocateTempHostReg(HR_CALLEE_SAVED))) : 1877 std::optional<GPR>(); 1878 FlushForLoadStore(address, false, use_fastmem); 1879 const GPR addr = ComputeLoadStoreAddressArg(cf, address, addr_reg); 1880 const GPR data = GenerateLoad(addr, size, sign, use_fastmem, [this, cf]() { 1881 if (cf.MipsT() == Reg::zero) 1882 return RRET; 1883 1884 return GPR(AllocateHostReg(GetFlagsForNewLoadDelayedReg(), 1885 EMULATE_LOAD_DELAYS ? HR_TYPE_NEXT_LOAD_DELAY_VALUE : HR_TYPE_CPU_REG, cf.MipsT())); 1886 }); 1887 1888 if (g_settings.gpu_pgxp_enable && cf.MipsT() != Reg::zero) 1889 { 1890 Flush(FLUSH_FOR_C_CALL); 1891 1892 EmitMov(RARG1, inst->bits); 1893 rvAsm->MV(RARG2, addr); 1894 rvAsm->MV(RARG3, data); 1895 EmitCall(s_pgxp_mem_load_functions[static_cast<u32>(size)][static_cast<u32>(sign)]); 1896 FreeHostReg(addr_reg.value().Index()); 1897 } 1898 } 1899 1900 void CPU::NewRec::RISCV64Compiler::Compile_lwx(CompileFlags cf, MemoryAccessSize size, bool sign, bool use_fastmem, 1901 const std::optional<VirtualMemoryAddress>& address) 1902 { 1903 DebugAssert(size == MemoryAccessSize::Word && !sign); 1904 1905 const GPR addr = GPR(AllocateTempHostReg(HR_CALLEE_SAVED)); 1906 FlushForLoadStore(address, false, use_fastmem); 1907 1908 // TODO: if address is constant, this can be simplified.. 1909 1910 // If we're coming from another block, just flush the load delay and hope for the best.. 1911 if (m_load_delay_dirty) 1912 UpdateLoadDelay(); 1913 1914 // We'd need to be careful here if we weren't overwriting it.. 1915 ComputeLoadStoreAddressArg(cf, address, addr); 1916 rvAsm->ANDI(RARG1, addr, ~0x3u); 1917 GenerateLoad(RARG1, MemoryAccessSize::Word, false, use_fastmem, []() { return RRET; }); 1918 1919 if (inst->r.rt == Reg::zero) 1920 { 1921 FreeHostReg(addr.Index()); 1922 return; 1923 } 1924 1925 // lwl/lwr from a load-delayed value takes the new value, but it itself, is load delayed, so the original value is 1926 // never written back. NOTE: can't trust T in cf because of the flush 1927 const Reg rt = inst->r.rt; 1928 GPR value; 1929 if (m_load_delay_register == rt) 1930 { 1931 const u32 existing_ld_rt = (m_load_delay_value_register == NUM_HOST_REGS) ? 1932 AllocateHostReg(HR_MODE_READ, HR_TYPE_LOAD_DELAY_VALUE, rt) : 1933 m_load_delay_value_register; 1934 RenameHostReg(existing_ld_rt, HR_MODE_WRITE, HR_TYPE_NEXT_LOAD_DELAY_VALUE, rt); 1935 value = GPR(existing_ld_rt); 1936 } 1937 else 1938 { 1939 if constexpr (EMULATE_LOAD_DELAYS) 1940 { 1941 value = GPR(AllocateHostReg(HR_MODE_WRITE, HR_TYPE_NEXT_LOAD_DELAY_VALUE, rt)); 1942 if (const std::optional<u32> rtreg = CheckHostReg(HR_MODE_READ, HR_TYPE_CPU_REG, rt); rtreg.has_value()) 1943 rvAsm->MV(value, GPR(rtreg.value())); 1944 else if (HasConstantReg(rt)) 1945 EmitMov(value, GetConstantRegU32(rt)); 1946 else 1947 rvAsm->LW(value, PTR(&g_state.regs.r[static_cast<u8>(rt)])); 1948 } 1949 else 1950 { 1951 value = GPR(AllocateHostReg(HR_MODE_READ | HR_MODE_WRITE, HR_TYPE_CPU_REG, rt)); 1952 } 1953 } 1954 1955 DebugAssert(value.Index() != RARG2.Index() && value.Index() != RARG3.Index()); 1956 rvAsm->ANDI(RARG2, addr, 3); 1957 rvAsm->SLLIW(RARG2, RARG2, 3); // *8 1958 EmitMov(RARG3, 24); 1959 rvAsm->SUBW(RARG3, RARG3, RARG2); 1960 1961 if (inst->op == InstructionOp::lwl) 1962 { 1963 // const u32 mask = UINT32_C(0x00FFFFFF) >> shift; 1964 // new_value = (value & mask) | (RWRET << (24 - shift)); 1965 EmitMov(RSCRATCH, 0xFFFFFFu); 1966 rvAsm->SRLW(RSCRATCH, RSCRATCH, RARG2); 1967 rvAsm->AND(value, value, RSCRATCH); 1968 rvAsm->SLLW(RRET, RRET, RARG3); 1969 rvAsm->OR(value, value, RRET); 1970 } 1971 else 1972 { 1973 // const u32 mask = UINT32_C(0xFFFFFF00) << (24 - shift); 1974 // new_value = (value & mask) | (RWRET >> shift); 1975 rvAsm->SRLW(RRET, RRET, RARG2); 1976 EmitMov(RSCRATCH, 0xFFFFFF00u); 1977 rvAsm->SLLW(RSCRATCH, RSCRATCH, RARG3); 1978 rvAsm->AND(value, value, RSCRATCH); 1979 rvAsm->OR(value, value, RRET); 1980 } 1981 1982 FreeHostReg(addr.Index()); 1983 1984 if (g_settings.gpu_pgxp_enable) 1985 { 1986 Flush(FLUSH_FOR_C_CALL); 1987 rvAsm->MV(RARG3, value); 1988 rvAsm->ANDI(RARG2, addr, ~0x3u); 1989 EmitMov(RARG1, inst->bits); 1990 EmitCall(reinterpret_cast<const void*>(&PGXP::CPU_LW)); 1991 } 1992 } 1993 1994 void CPU::NewRec::RISCV64Compiler::Compile_lwc2(CompileFlags cf, MemoryAccessSize size, bool sign, bool use_fastmem, 1995 const std::optional<VirtualMemoryAddress>& address) 1996 { 1997 const u32 index = static_cast<u32>(inst->r.rt.GetValue()); 1998 const auto [ptr, action] = GetGTERegisterPointer(index, true); 1999 const std::optional<GPR> addr_reg = 2000 g_settings.gpu_pgxp_enable ? std::optional<GPR>(GPR(AllocateTempHostReg(HR_CALLEE_SAVED))) : std::optional<GPR>(); 2001 FlushForLoadStore(address, false, use_fastmem); 2002 const GPR addr = ComputeLoadStoreAddressArg(cf, address, addr_reg); 2003 const GPR value = GenerateLoad(addr, MemoryAccessSize::Word, false, use_fastmem, [this, action = action]() { 2004 return (action == GTERegisterAccessAction::CallHandler && g_settings.gpu_pgxp_enable) ? 2005 GPR(AllocateTempHostReg(HR_CALLEE_SAVED)) : 2006 RRET; 2007 }); 2008 2009 switch (action) 2010 { 2011 case GTERegisterAccessAction::Ignore: 2012 { 2013 break; 2014 } 2015 2016 case GTERegisterAccessAction::Direct: 2017 { 2018 rvAsm->SW(value, PTR(ptr)); 2019 break; 2020 } 2021 2022 case GTERegisterAccessAction::SignExtend16: 2023 { 2024 EmitSExtH(RARG3, value); 2025 rvAsm->SW(RARG3, PTR(ptr)); 2026 break; 2027 } 2028 2029 case GTERegisterAccessAction::ZeroExtend16: 2030 { 2031 EmitUExtH(RARG3, value); 2032 rvAsm->SW(RARG3, PTR(ptr)); 2033 break; 2034 } 2035 2036 case GTERegisterAccessAction::CallHandler: 2037 { 2038 Flush(FLUSH_FOR_C_CALL); 2039 rvAsm->MV(RARG2, value); 2040 EmitMov(RARG1, index); 2041 EmitCall(reinterpret_cast<const void*>(>E::WriteRegister)); 2042 break; 2043 } 2044 2045 case GTERegisterAccessAction::PushFIFO: 2046 { 2047 // SXY0 <- SXY1 2048 // SXY1 <- SXY2 2049 // SXY2 <- SXYP 2050 DebugAssert(value.Index() != RARG2.Index() && value.Index() != RARG3.Index()); 2051 rvAsm->LW(RARG2, PTR(&g_state.gte_regs.SXY1[0])); 2052 rvAsm->LW(RARG3, PTR(&g_state.gte_regs.SXY2[0])); 2053 rvAsm->SW(RARG2, PTR(&g_state.gte_regs.SXY0[0])); 2054 rvAsm->SW(RARG3, PTR(&g_state.gte_regs.SXY1[0])); 2055 rvAsm->SW(value, PTR(&g_state.gte_regs.SXY2[0])); 2056 break; 2057 } 2058 2059 default: 2060 { 2061 Panic("Unknown action"); 2062 return; 2063 } 2064 } 2065 2066 if (g_settings.gpu_pgxp_enable) 2067 { 2068 Flush(FLUSH_FOR_C_CALL); 2069 rvAsm->MV(RARG3, value); 2070 if (value.Index() != RRET.Index()) 2071 FreeHostReg(value.Index()); 2072 rvAsm->MV(RARG2, addr); 2073 FreeHostReg(addr_reg.value().Index()); 2074 EmitMov(RARG1, inst->bits); 2075 EmitCall(reinterpret_cast<const void*>(&PGXP::CPU_LWC2)); 2076 } 2077 } 2078 2079 void CPU::NewRec::RISCV64Compiler::Compile_sxx(CompileFlags cf, MemoryAccessSize size, bool sign, bool use_fastmem, 2080 const std::optional<VirtualMemoryAddress>& address) 2081 { 2082 AssertRegOrConstS(cf); 2083 AssertRegOrConstT(cf); 2084 2085 const std::optional<GPR> addr_reg = 2086 g_settings.gpu_pgxp_enable ? std::optional<GPR>(GPR(AllocateTempHostReg(HR_CALLEE_SAVED))) : std::optional<GPR>(); 2087 FlushForLoadStore(address, true, use_fastmem); 2088 const GPR addr = ComputeLoadStoreAddressArg(cf, address, addr_reg); 2089 const GPR data = cf.valid_host_t ? CFGetRegT(cf) : RARG2; 2090 if (!cf.valid_host_t) 2091 MoveTToReg(RARG2, cf); 2092 2093 GenerateStore(addr, data, size, use_fastmem); 2094 2095 if (g_settings.gpu_pgxp_enable) 2096 { 2097 Flush(FLUSH_FOR_C_CALL); 2098 MoveMIPSRegToReg(RARG3, cf.MipsT()); 2099 rvAsm->MV(RARG2, addr); 2100 EmitMov(RARG1, inst->bits); 2101 EmitCall(s_pgxp_mem_store_functions[static_cast<u32>(size)]); 2102 FreeHostReg(addr_reg.value().Index()); 2103 } 2104 } 2105 2106 void CPU::NewRec::RISCV64Compiler::Compile_swx(CompileFlags cf, MemoryAccessSize size, bool sign, bool use_fastmem, 2107 const std::optional<VirtualMemoryAddress>& address) 2108 { 2109 DebugAssert(size == MemoryAccessSize::Word && !sign); 2110 2111 // TODO: this can take over rt's value if it's no longer needed 2112 // NOTE: can't trust T in cf because of the alloc 2113 const GPR addr = GPR(AllocateTempHostReg(HR_CALLEE_SAVED)); 2114 const GPR value = g_settings.gpu_pgxp_enable ? GPR(AllocateTempHostReg(HR_CALLEE_SAVED)) : RARG2; 2115 if (g_settings.gpu_pgxp_enable) 2116 MoveMIPSRegToReg(value, inst->r.rt); 2117 2118 FlushForLoadStore(address, true, use_fastmem); 2119 2120 // TODO: if address is constant, this can be simplified.. 2121 // We'd need to be careful here if we weren't overwriting it.. 2122 ComputeLoadStoreAddressArg(cf, address, addr); 2123 rvAsm->ANDI(RARG1, addr, ~0x3u); 2124 GenerateLoad(RARG1, MemoryAccessSize::Word, false, use_fastmem, []() { return RRET; }); 2125 2126 rvAsm->ANDI(RSCRATCH, addr, 3); 2127 rvAsm->SLLIW(RSCRATCH, RSCRATCH, 3); // *8 2128 rvAsm->ANDI(addr, addr, ~0x3u); 2129 2130 // Need to load down here for PGXP-off, because it's in a volatile reg that can get overwritten by flush. 2131 if (!g_settings.gpu_pgxp_enable) 2132 MoveMIPSRegToReg(value, inst->r.rt); 2133 2134 if (inst->op == InstructionOp::swl) 2135 { 2136 // const u32 mem_mask = UINT32_C(0xFFFFFF00) << shift; 2137 // new_value = (RWRET & mem_mask) | (value >> (24 - shift)); 2138 EmitMov(RARG3, 0xFFFFFF00u); 2139 rvAsm->SLLW(RARG3, RARG3, RSCRATCH); 2140 rvAsm->AND(RRET, RRET, RARG3); 2141 2142 EmitMov(RARG3, 24); 2143 rvAsm->SUBW(RARG3, RARG3, RSCRATCH); 2144 rvAsm->SRLW(value, value, RARG3); 2145 rvAsm->OR(value, value, RRET); 2146 } 2147 else 2148 { 2149 // const u32 mem_mask = UINT32_C(0x00FFFFFF) >> (24 - shift); 2150 // new_value = (RWRET & mem_mask) | (value << shift); 2151 rvAsm->SLLW(value, value, RSCRATCH); 2152 2153 EmitMov(RARG3, 24); 2154 rvAsm->SUBW(RARG3, RARG3, RSCRATCH); 2155 EmitMov(RSCRATCH, 0x00FFFFFFu); 2156 rvAsm->SRLW(RSCRATCH, RSCRATCH, RARG3); 2157 rvAsm->AND(RRET, RRET, RSCRATCH); 2158 rvAsm->OR(value, value, RRET); 2159 } 2160 2161 if (!g_settings.gpu_pgxp_enable) 2162 { 2163 GenerateStore(addr, value, MemoryAccessSize::Word, use_fastmem); 2164 FreeHostReg(addr.Index()); 2165 } 2166 else 2167 { 2168 GenerateStore(addr, value, MemoryAccessSize::Word, use_fastmem); 2169 2170 Flush(FLUSH_FOR_C_CALL); 2171 rvAsm->MV(RARG3, value); 2172 FreeHostReg(value.Index()); 2173 rvAsm->MV(RARG2, addr); 2174 FreeHostReg(addr.Index()); 2175 EmitMov(RARG1, inst->bits); 2176 EmitCall(reinterpret_cast<const void*>(&PGXP::CPU_SW)); 2177 } 2178 } 2179 2180 void CPU::NewRec::RISCV64Compiler::Compile_swc2(CompileFlags cf, MemoryAccessSize size, bool sign, bool use_fastmem, 2181 const std::optional<VirtualMemoryAddress>& address) 2182 { 2183 const u32 index = static_cast<u32>(inst->r.rt.GetValue()); 2184 const auto [ptr, action] = GetGTERegisterPointer(index, false); 2185 const GPR addr = (g_settings.gpu_pgxp_enable || action == GTERegisterAccessAction::CallHandler) ? 2186 GPR(AllocateTempHostReg(HR_CALLEE_SAVED)) : 2187 RARG1; 2188 const GPR data = g_settings.gpu_pgxp_enable ? GPR(AllocateTempHostReg(HR_CALLEE_SAVED)) : RARG2; 2189 FlushForLoadStore(address, true, use_fastmem); 2190 ComputeLoadStoreAddressArg(cf, address, addr); 2191 2192 switch (action) 2193 { 2194 case GTERegisterAccessAction::Direct: 2195 { 2196 rvAsm->LW(data, PTR(ptr)); 2197 } 2198 break; 2199 2200 case GTERegisterAccessAction::CallHandler: 2201 { 2202 // should already be flushed.. except in fastmem case 2203 Flush(FLUSH_FOR_C_CALL); 2204 EmitMov(RARG1, index); 2205 EmitCall(reinterpret_cast<const void*>(>E::ReadRegister)); 2206 rvAsm->MV(data, RRET); 2207 } 2208 break; 2209 2210 default: 2211 { 2212 Panic("Unknown action"); 2213 } 2214 break; 2215 } 2216 2217 GenerateStore(addr, data, size, use_fastmem); 2218 2219 if (!g_settings.gpu_pgxp_enable) 2220 { 2221 if (addr.Index() != RARG1.Index()) 2222 FreeHostReg(addr.Index()); 2223 } 2224 else 2225 { 2226 // TODO: This can be simplified because we don't need to validate in PGXP.. 2227 Flush(FLUSH_FOR_C_CALL); 2228 rvAsm->MV(RARG3, data); 2229 FreeHostReg(data.Index()); 2230 rvAsm->MV(RARG2, addr); 2231 FreeHostReg(addr.Index()); 2232 EmitMov(RARG1, inst->bits); 2233 EmitCall(reinterpret_cast<const void*>(&PGXP::CPU_SWC2)); 2234 } 2235 } 2236 2237 void CPU::NewRec::RISCV64Compiler::Compile_mtc0(CompileFlags cf) 2238 { 2239 // TODO: we need better constant setting here.. which will need backprop 2240 AssertRegOrConstT(cf); 2241 2242 const Cop0Reg reg = static_cast<Cop0Reg>(MipsD()); 2243 const u32* ptr = GetCop0RegPtr(reg); 2244 const u32 mask = GetCop0RegWriteMask(reg); 2245 if (!ptr) 2246 { 2247 Compile_Fallback(); 2248 return; 2249 } 2250 2251 if (mask == 0) 2252 { 2253 // if it's a read-only register, ignore 2254 DEBUG_LOG("Ignoring write to read-only cop0 reg {}", static_cast<u32>(reg)); 2255 return; 2256 } 2257 2258 // for some registers, we need to test certain bits 2259 const bool needs_bit_test = (reg == Cop0Reg::SR); 2260 const GPR new_value = RARG1; 2261 const GPR old_value = RARG2; 2262 const GPR changed_bits = RARG3; 2263 const GPR mask_reg = RSCRATCH; 2264 2265 // Load old value 2266 rvAsm->LW(old_value, PTR(ptr)); 2267 2268 // No way we fit this in an immediate.. 2269 EmitMov(mask_reg, mask); 2270 2271 // update value 2272 // TODO: This is creating pointless MV instructions.. why? 2273 if (cf.valid_host_t) 2274 rvAsm->AND(new_value, CFGetRegT(cf), mask_reg); 2275 else 2276 EmitMov(new_value, GetConstantRegU32(cf.MipsT()) & mask); 2277 2278 if (needs_bit_test) 2279 rvAsm->XOR(changed_bits, old_value, new_value); 2280 rvAsm->NOT(mask_reg, mask_reg); 2281 rvAsm->AND(old_value, old_value, mask_reg); 2282 rvAsm->OR(new_value, old_value, new_value); 2283 rvAsm->SW(new_value, PTR(ptr)); 2284 2285 if (reg == Cop0Reg::SR) 2286 { 2287 // TODO: replace with register backup 2288 // We could just inline the whole thing.. 2289 Flush(FLUSH_FOR_C_CALL); 2290 2291 rvAsm->SRLIW(RSCRATCH, changed_bits, 16); 2292 rvAsm->ANDI(RSCRATCH, RSCRATCH, 1); 2293 SwitchToFarCode(true, &Assembler::BEQ, RSCRATCH, zero); 2294 rvAsm->ADDI(sp, sp, -16); 2295 rvAsm->SW(RARG1, 0, sp); 2296 EmitCall(reinterpret_cast<const void*>(&CPU::UpdateMemoryPointers)); 2297 rvAsm->LW(RARG1, 0, sp); 2298 rvAsm->ADDI(sp, sp, 16); 2299 rvAsm->LD(RMEMBASE, PTR(&g_state.fastmem_base)); 2300 SwitchToNearCode(true); 2301 2302 TestInterrupts(RARG1); 2303 } 2304 else if (reg == Cop0Reg::CAUSE) 2305 { 2306 rvAsm->LW(RARG1, PTR(&g_state.cop0_regs.sr.bits)); 2307 TestInterrupts(RARG1); 2308 } 2309 2310 if (reg == Cop0Reg::DCIC && g_settings.cpu_recompiler_memory_exceptions) 2311 { 2312 // TODO: DCIC handling for debug breakpoints 2313 WARNING_LOG("TODO: DCIC handling for debug breakpoints"); 2314 } 2315 } 2316 2317 void CPU::NewRec::RISCV64Compiler::Compile_rfe(CompileFlags cf) 2318 { 2319 // shift mode bits right two, preserving upper bits 2320 rvAsm->LW(RARG1, PTR(&g_state.cop0_regs.sr.bits)); 2321 rvAsm->SRLIW(RSCRATCH, RARG1, 2); 2322 rvAsm->ANDI(RSCRATCH, RSCRATCH, 0xf); 2323 rvAsm->ANDI(RARG1, RARG1, ~0xfu); 2324 rvAsm->OR(RARG1, RARG1, RSCRATCH); 2325 rvAsm->SW(RARG1, PTR(&g_state.cop0_regs.sr.bits)); 2326 2327 TestInterrupts(RARG1); 2328 } 2329 2330 void CPU::NewRec::RISCV64Compiler::TestInterrupts(const biscuit::GPR& sr) 2331 { 2332 DebugAssert(sr != RSCRATCH); 2333 2334 // if Iec == 0 then goto no_interrupt 2335 Label no_interrupt; 2336 rvAsm->ANDI(RSCRATCH, sr, 1); 2337 rvAsm->BEQZ(RSCRATCH, &no_interrupt); 2338 2339 // sr & cause 2340 rvAsm->LW(RSCRATCH, PTR(&g_state.cop0_regs.cause.bits)); 2341 rvAsm->AND(sr, sr, RSCRATCH); 2342 2343 // ((sr & cause) & 0xff00) == 0 goto no_interrupt 2344 rvAsm->SRLIW(sr, sr, 8); 2345 rvAsm->ANDI(sr, sr, 0xFF); 2346 SwitchToFarCode(true, &Assembler::BEQ, sr, zero); 2347 2348 BackupHostState(); 2349 2350 // Update load delay, this normally happens at the end of an instruction, but we're finishing it early. 2351 UpdateLoadDelay(); 2352 2353 Flush(FLUSH_END_BLOCK | FLUSH_FOR_EXCEPTION | FLUSH_FOR_C_CALL); 2354 2355 // Can't use EndBlockWithException() here, because it'll use the wrong PC. 2356 // Can't use RaiseException() on the fast path if we're the last instruction, because the next PC is unknown. 2357 if (!iinfo->is_last_instruction) 2358 { 2359 EmitMov(RARG1, Cop0Registers::CAUSE::MakeValueForException(Exception::INT, iinfo->is_branch_instruction, false, 2360 (inst + 1)->cop.cop_n)); 2361 EmitMov(RARG2, m_compiler_pc); 2362 EmitCall(reinterpret_cast<const void*>(static_cast<void (*)(u32, u32)>(&CPU::RaiseException))); 2363 m_dirty_pc = false; 2364 EndAndLinkBlock(std::nullopt, true, false); 2365 } 2366 else 2367 { 2368 if (m_dirty_pc) 2369 EmitMov(RARG1, m_compiler_pc); 2370 rvAsm->SW(biscuit::zero, PTR(&g_state.downcount)); 2371 if (m_dirty_pc) 2372 rvAsm->SW(RARG1, PTR(&g_state.pc)); 2373 m_dirty_pc = false; 2374 EndAndLinkBlock(std::nullopt, false, true); 2375 } 2376 2377 RestoreHostState(); 2378 SwitchToNearCode(false); 2379 2380 rvAsm->Bind(&no_interrupt); 2381 } 2382 2383 void CPU::NewRec::RISCV64Compiler::Compile_mfc2(CompileFlags cf) 2384 { 2385 const u32 index = inst->cop.Cop2Index(); 2386 const Reg rt = inst->r.rt; 2387 2388 const auto [ptr, action] = GetGTERegisterPointer(index, false); 2389 if (action == GTERegisterAccessAction::Ignore) 2390 return; 2391 2392 u32 hreg; 2393 if (action == GTERegisterAccessAction::Direct) 2394 { 2395 hreg = AllocateHostReg(GetFlagsForNewLoadDelayedReg(), 2396 EMULATE_LOAD_DELAYS ? HR_TYPE_NEXT_LOAD_DELAY_VALUE : HR_TYPE_CPU_REG, rt); 2397 rvAsm->LW(GPR(hreg), PTR(ptr)); 2398 } 2399 else if (action == GTERegisterAccessAction::CallHandler) 2400 { 2401 Flush(FLUSH_FOR_C_CALL); 2402 EmitMov(RARG1, index); 2403 EmitCall(reinterpret_cast<const void*>(>E::ReadRegister)); 2404 2405 hreg = AllocateHostReg(GetFlagsForNewLoadDelayedReg(), 2406 EMULATE_LOAD_DELAYS ? HR_TYPE_NEXT_LOAD_DELAY_VALUE : HR_TYPE_CPU_REG, rt); 2407 rvAsm->MV(GPR(hreg), RRET); 2408 } 2409 else 2410 { 2411 Panic("Unknown action"); 2412 } 2413 2414 if (g_settings.gpu_pgxp_enable) 2415 { 2416 Flush(FLUSH_FOR_C_CALL); 2417 EmitMov(RARG1, inst->bits); 2418 rvAsm->MV(RARG2, GPR(hreg)); 2419 EmitCall(reinterpret_cast<const void*>(&PGXP::CPU_MFC2)); 2420 } 2421 } 2422 2423 void CPU::NewRec::RISCV64Compiler::Compile_mtc2(CompileFlags cf) 2424 { 2425 const u32 index = inst->cop.Cop2Index(); 2426 const auto [ptr, action] = GetGTERegisterPointer(index, true); 2427 if (action == GTERegisterAccessAction::Ignore) 2428 return; 2429 2430 if (action == GTERegisterAccessAction::Direct) 2431 { 2432 if (cf.const_t) 2433 StoreConstantToCPUPointer(GetConstantRegU32(cf.MipsT()), ptr); 2434 else 2435 rvAsm->SW(CFGetRegT(cf), PTR(ptr)); 2436 } 2437 else if (action == GTERegisterAccessAction::SignExtend16 || action == GTERegisterAccessAction::ZeroExtend16) 2438 { 2439 const bool sign = (action == GTERegisterAccessAction::SignExtend16); 2440 if (cf.valid_host_t) 2441 { 2442 sign ? EmitSExtH(RARG1, CFGetRegT(cf)) : EmitUExtH(RARG1, CFGetRegT(cf)); 2443 rvAsm->SW(RARG1, PTR(ptr)); 2444 } 2445 else if (cf.const_t) 2446 { 2447 const u16 cv = Truncate16(GetConstantRegU32(cf.MipsT())); 2448 StoreConstantToCPUPointer(sign ? ::SignExtend32(cv) : ::ZeroExtend32(cv), ptr); 2449 } 2450 else 2451 { 2452 Panic("Unsupported setup"); 2453 } 2454 } 2455 else if (action == GTERegisterAccessAction::CallHandler) 2456 { 2457 Flush(FLUSH_FOR_C_CALL); 2458 EmitMov(RARG1, index); 2459 MoveTToReg(RARG2, cf); 2460 EmitCall(reinterpret_cast<const void*>(>E::WriteRegister)); 2461 } 2462 else if (action == GTERegisterAccessAction::PushFIFO) 2463 { 2464 // SXY0 <- SXY1 2465 // SXY1 <- SXY2 2466 // SXY2 <- SXYP 2467 DebugAssert(RRET.Index() != RARG2.Index() && RRET.Index() != RARG3.Index()); 2468 rvAsm->LW(RARG2, PTR(&g_state.gte_regs.SXY1[0])); 2469 rvAsm->LW(RARG3, PTR(&g_state.gte_regs.SXY2[0])); 2470 rvAsm->SW(RARG2, PTR(&g_state.gte_regs.SXY0[0])); 2471 rvAsm->SW(RARG3, PTR(&g_state.gte_regs.SXY1[0])); 2472 if (cf.valid_host_t) 2473 rvAsm->SW(CFGetRegT(cf), PTR(&g_state.gte_regs.SXY2[0])); 2474 else if (cf.const_t) 2475 StoreConstantToCPUPointer(GetConstantRegU32(cf.MipsT()), &g_state.gte_regs.SXY2[0]); 2476 else 2477 Panic("Unsupported setup"); 2478 } 2479 else 2480 { 2481 Panic("Unknown action"); 2482 } 2483 } 2484 2485 void CPU::NewRec::RISCV64Compiler::Compile_cop2(CompileFlags cf) 2486 { 2487 TickCount func_ticks; 2488 GTE::InstructionImpl func = GTE::GetInstructionImpl(inst->bits, &func_ticks); 2489 2490 Flush(FLUSH_FOR_C_CALL); 2491 EmitMov(RARG1, inst->bits & GTE::Instruction::REQUIRED_BITS_MASK); 2492 EmitCall(reinterpret_cast<const void*>(func)); 2493 2494 AddGTETicks(func_ticks); 2495 } 2496 2497 u32 CPU::NewRec::CompileLoadStoreThunk(void* thunk_code, u32 thunk_space, void* code_address, u32 code_size, 2498 TickCount cycles_to_add, TickCount cycles_to_remove, u32 gpr_bitmask, 2499 u8 address_register, u8 data_register, MemoryAccessSize size, bool is_signed, 2500 bool is_load) 2501 { 2502 Assembler arm_asm(static_cast<u8*>(thunk_code), thunk_space); 2503 Assembler* rvAsm = &arm_asm; 2504 2505 static constexpr u32 GPR_SIZE = 8; 2506 2507 // save regs 2508 u32 num_gprs = 0; 2509 2510 for (u32 i = 0; i < NUM_HOST_REGS; i++) 2511 { 2512 if ((gpr_bitmask & (1u << i)) && rvIsCallerSavedRegister(i) && (!is_load || data_register != i)) 2513 num_gprs++; 2514 } 2515 2516 const u32 stack_size = (((num_gprs + 1) & ~1u) * GPR_SIZE); 2517 2518 if (stack_size > 0) 2519 { 2520 rvAsm->ADDI(sp, sp, -static_cast<s32>(stack_size)); 2521 2522 u32 stack_offset = 0; 2523 for (u32 i = 0; i < NUM_HOST_REGS; i++) 2524 { 2525 if ((gpr_bitmask & (1u << i)) && rvIsCallerSavedRegister(i) && (!is_load || data_register != i)) 2526 { 2527 rvAsm->SD(GPR(i), stack_offset, sp); 2528 stack_offset += GPR_SIZE; 2529 } 2530 } 2531 } 2532 2533 if (cycles_to_add != 0) 2534 { 2535 // NOTE: we have to reload here, because memory writes can run DMA, which can screw with cycles 2536 Assert(rvIsValidSExtITypeImm(cycles_to_add)); 2537 rvAsm->LW(RSCRATCH, PTR(&g_state.pending_ticks)); 2538 rvAsm->ADDIW(RSCRATCH, RSCRATCH, cycles_to_add); 2539 rvAsm->SW(RSCRATCH, PTR(&g_state.pending_ticks)); 2540 } 2541 2542 if (address_register != RARG1.Index()) 2543 rvAsm->MV(RARG1, GPR(address_register)); 2544 2545 if (!is_load) 2546 { 2547 if (data_register != RARG2.Index()) 2548 rvAsm->MV(RARG2, GPR(data_register)); 2549 } 2550 2551 switch (size) 2552 { 2553 case MemoryAccessSize::Byte: 2554 { 2555 rvEmitCall(rvAsm, is_load ? reinterpret_cast<const void*>(&Recompiler::Thunks::UncheckedReadMemoryByte) : 2556 reinterpret_cast<const void*>(&Recompiler::Thunks::UncheckedWriteMemoryByte)); 2557 } 2558 break; 2559 case MemoryAccessSize::HalfWord: 2560 { 2561 rvEmitCall(rvAsm, is_load ? reinterpret_cast<const void*>(&Recompiler::Thunks::UncheckedReadMemoryHalfWord) : 2562 reinterpret_cast<const void*>(&Recompiler::Thunks::UncheckedWriteMemoryHalfWord)); 2563 } 2564 break; 2565 case MemoryAccessSize::Word: 2566 { 2567 rvEmitCall(rvAsm, is_load ? reinterpret_cast<const void*>(&Recompiler::Thunks::UncheckedReadMemoryWord) : 2568 reinterpret_cast<const void*>(&Recompiler::Thunks::UncheckedWriteMemoryWord)); 2569 } 2570 break; 2571 } 2572 2573 if (is_load) 2574 { 2575 const GPR dst = GPR(data_register); 2576 switch (size) 2577 { 2578 case MemoryAccessSize::Byte: 2579 { 2580 is_signed ? rvEmitSExtB(rvAsm, dst, RRET) : rvEmitUExtB(rvAsm, dst, RRET); 2581 } 2582 break; 2583 case MemoryAccessSize::HalfWord: 2584 { 2585 is_signed ? rvEmitSExtH(rvAsm, dst, RRET) : rvEmitUExtH(rvAsm, dst, RRET); 2586 } 2587 break; 2588 case MemoryAccessSize::Word: 2589 { 2590 if (dst.Index() != RRET.Index()) 2591 rvAsm->MV(dst, RRET); 2592 } 2593 break; 2594 } 2595 } 2596 2597 if (cycles_to_remove != 0) 2598 { 2599 Assert(rvIsValidSExtITypeImm(-cycles_to_remove)); 2600 rvAsm->LW(RSCRATCH, PTR(&g_state.pending_ticks)); 2601 rvAsm->ADDIW(RSCRATCH, RSCRATCH, -cycles_to_remove); 2602 rvAsm->SW(RSCRATCH, PTR(&g_state.pending_ticks)); 2603 } 2604 2605 // restore regs 2606 if (stack_size > 0) 2607 { 2608 u32 stack_offset = 0; 2609 for (u32 i = 0; i < NUM_HOST_REGS; i++) 2610 { 2611 if ((gpr_bitmask & (1u << i)) && rvIsCallerSavedRegister(i) && (!is_load || data_register != i)) 2612 { 2613 rvAsm->LD(GPR(i), stack_offset, sp); 2614 stack_offset += GPR_SIZE; 2615 } 2616 } 2617 2618 rvAsm->ADDI(sp, sp, stack_size); 2619 } 2620 2621 rvEmitJmp(rvAsm, static_cast<const u8*>(code_address) + code_size); 2622 2623 return static_cast<u32>(rvAsm->GetCodeBuffer().GetSizeInBytes()); 2624 } 2625 2626 #endif // CPU_ARCH_RISCV64