cpu_newrec_compiler.cpp (95159B)
1 // SPDX-FileCopyrightText: 2023 Connor McLaughlin <stenzek@gmail.com> 2 // SPDX-License-Identifier: (GPL-3.0 OR CC-BY-NC-ND-4.0) 3 4 #include "cpu_newrec_compiler.h" 5 #include "common/assert.h" 6 #include "common/log.h" 7 #include "common/small_string.h" 8 #include "cpu_code_cache.h" 9 #include "cpu_core_private.h" 10 #include "cpu_disasm.h" 11 #include "cpu_pgxp.h" 12 #include "settings.h" 13 #include <cstdint> 14 #include <limits> 15 Log_SetChannel(NewRec::Compiler); 16 17 // TODO: direct link skip delay slot check 18 // TODO: speculative constants 19 // TODO: std::bitset in msvc has bounds checks even in release... 20 21 const std::array<std::array<const void*, 2>, 3> CPU::NewRec::Compiler::s_pgxp_mem_load_functions = { 22 {{{reinterpret_cast<const void*>(&PGXP::CPU_LBx), reinterpret_cast<const void*>(&PGXP::CPU_LBx)}}, 23 {{reinterpret_cast<const void*>(&PGXP::CPU_LHU), reinterpret_cast<const void*>(&PGXP::CPU_LH)}}, 24 {{reinterpret_cast<const void*>(&PGXP::CPU_LW)}}}}; 25 const std::array<const void*, 3> CPU::NewRec::Compiler::s_pgxp_mem_store_functions = { 26 {reinterpret_cast<const void*>(&PGXP::CPU_SB), reinterpret_cast<const void*>(&PGXP::CPU_SH), 27 reinterpret_cast<const void*>(&PGXP::CPU_SW)}}; 28 29 CPU::NewRec::Compiler::Compiler() = default; 30 31 CPU::NewRec::Compiler::~Compiler() = default; 32 33 void CPU::NewRec::Compiler::Reset(CodeCache::Block* block, u8* code_buffer, u32 code_buffer_space, u8* far_code_buffer, 34 u32 far_code_space) 35 { 36 m_block = block; 37 m_compiler_pc = block->pc; 38 m_cycles = 0; 39 m_gte_done_cycle = 0; 40 inst = nullptr; 41 iinfo = nullptr; 42 m_current_instruction_pc = 0; 43 m_current_instruction_branch_delay_slot = false; 44 m_dirty_pc = false; 45 m_dirty_instruction_bits = false; 46 m_dirty_gte_done_cycle = true; 47 m_block_ended = false; 48 m_constant_reg_values.fill(0); 49 m_constant_regs_valid.reset(); 50 m_constant_regs_dirty.reset(); 51 52 for (u32 i = 0; i < NUM_HOST_REGS; i++) 53 ClearHostReg(i); 54 m_register_alloc_counter = 0; 55 56 m_constant_reg_values[static_cast<u32>(Reg::zero)] = 0; 57 m_constant_regs_valid.set(static_cast<u32>(Reg::zero)); 58 59 m_load_delay_dirty = EMULATE_LOAD_DELAYS; 60 m_load_delay_register = Reg::count; 61 m_load_delay_value_register = NUM_HOST_REGS; 62 63 InitSpeculativeRegs(); 64 } 65 66 void CPU::NewRec::Compiler::BeginBlock() 67 { 68 #if 0 69 GenerateCall(reinterpret_cast<const void*>(&CPU::CodeCache::LogCurrentState)); 70 #endif 71 72 if (m_block->protection == CodeCache::PageProtectionMode::ManualCheck) 73 { 74 DEBUG_LOG("Generate manual protection for PC {:08X}", m_block->pc); 75 const u8* ram_ptr = Bus::g_ram + VirtualAddressToPhysical(m_block->pc); 76 const u8* shadow_ptr = reinterpret_cast<const u8*>(m_block->Instructions()); 77 GenerateBlockProtectCheck(ram_ptr, shadow_ptr, m_block->size * sizeof(Instruction)); 78 } 79 80 GenerateICacheCheckAndUpdate(); 81 82 if (g_settings.bios_tty_logging) 83 { 84 if (m_block->pc == 0xa0) 85 GenerateCall(reinterpret_cast<const void*>(&CPU::HandleA0Syscall)); 86 else if (m_block->pc == 0xb0) 87 GenerateCall(reinterpret_cast<const void*>(&CPU::HandleB0Syscall)); 88 } 89 90 inst = m_block->Instructions(); 91 iinfo = m_block->InstructionsInfo(); 92 m_current_instruction_pc = m_block->pc; 93 m_current_instruction_branch_delay_slot = false; 94 m_compiler_pc += sizeof(Instruction); 95 m_dirty_pc = true; 96 m_dirty_instruction_bits = true; 97 } 98 99 const void* CPU::NewRec::Compiler::CompileBlock(CodeCache::Block* block, u32* host_code_size, u32* host_far_code_size) 100 { 101 Reset(block, CPU::CodeCache::GetFreeCodePointer(), CPU::CodeCache::GetFreeCodeSpace(), 102 CPU::CodeCache::GetFreeFarCodePointer(), CPU::CodeCache::GetFreeFarCodeSpace()); 103 104 DEBUG_LOG("Block range: {:08X} -> {:08X}", block->pc, block->pc + block->size * 4); 105 106 BeginBlock(); 107 108 for (;;) 109 { 110 CompileInstruction(); 111 112 if (m_block_ended || iinfo->is_last_instruction) 113 { 114 if (!m_block_ended) 115 { 116 // Block was truncated. Link it. 117 EndBlock(m_compiler_pc, false); 118 } 119 120 break; 121 } 122 123 inst++; 124 iinfo++; 125 m_current_instruction_pc += sizeof(Instruction); 126 m_compiler_pc += sizeof(Instruction); 127 m_dirty_pc = true; 128 m_dirty_instruction_bits = true; 129 } 130 131 // Nothing should be valid anymore 132 for (u32 i = 0; i < NUM_HOST_REGS; i++) 133 DebugAssert(!IsHostRegAllocated(i)); 134 for (u32 i = 1; i < static_cast<u32>(Reg::count); i++) 135 DebugAssert(!m_constant_regs_dirty.test(i) && !m_constant_regs_valid.test(i)); 136 m_speculative_constants.memory.clear(); 137 138 u32 code_size, far_code_size; 139 const void* code = EndCompile(&code_size, &far_code_size); 140 *host_code_size = code_size; 141 *host_far_code_size = far_code_size; 142 CPU::CodeCache::CommitCode(code_size); 143 CPU::CodeCache::CommitFarCode(far_code_size); 144 145 return code; 146 } 147 148 void CPU::NewRec::Compiler::SetConstantReg(Reg r, u32 v) 149 { 150 DebugAssert(r < Reg::count && r != Reg::zero); 151 152 // There might still be an incoming load delay which we need to cancel. 153 CancelLoadDelaysToReg(r); 154 155 if (m_constant_regs_valid.test(static_cast<u32>(r)) && m_constant_reg_values[static_cast<u8>(r)] == v) 156 { 157 // Shouldn't be any host regs though. 158 DebugAssert(!CheckHostReg(0, HR_TYPE_CPU_REG, r).has_value()); 159 return; 160 } 161 162 m_constant_reg_values[static_cast<u32>(r)] = v; 163 m_constant_regs_valid.set(static_cast<u32>(r)); 164 m_constant_regs_dirty.set(static_cast<u32>(r)); 165 166 if (const std::optional<u32> hostreg = CheckHostReg(0, HR_TYPE_CPU_REG, r); hostreg.has_value()) 167 { 168 DEBUG_LOG("Discarding guest register {} in host register {} due to constant set", GetRegName(r), 169 GetHostRegName(hostreg.value())); 170 FreeHostReg(hostreg.value()); 171 } 172 } 173 174 void CPU::NewRec::Compiler::CancelLoadDelaysToReg(Reg reg) 175 { 176 if (m_load_delay_register != reg) 177 return; 178 179 DEBUG_LOG("Cancelling load delay to {}", GetRegName(reg)); 180 m_load_delay_register = Reg::count; 181 if (m_load_delay_value_register != NUM_HOST_REGS) 182 ClearHostReg(m_load_delay_value_register); 183 } 184 185 void CPU::NewRec::Compiler::UpdateLoadDelay() 186 { 187 if (m_load_delay_dirty) 188 { 189 // we shouldn't have a static load delay. 190 DebugAssert(!HasLoadDelay()); 191 192 // have to invalidate registers, we might have one of them cached 193 // TODO: double check the order here, will we trash a new value? we shouldn't... 194 // thankfully since this only happens on the first instruction, we can get away with just killing anything which 195 // isn't in write mode, because nothing could've been written before it, and the new value overwrites any 196 // load-delayed value 197 DEBUG_LOG("Invalidating non-dirty registers, and flushing load delay from state"); 198 199 constexpr u32 req_flags = (HR_ALLOCATED | HR_MODE_WRITE); 200 201 for (u32 i = 0; i < NUM_HOST_REGS; i++) 202 { 203 HostRegAlloc& ra = m_host_regs[i]; 204 if (ra.type != HR_TYPE_CPU_REG || !IsHostRegAllocated(i) || ((ra.flags & req_flags) == req_flags)) 205 continue; 206 207 DEBUG_LOG("Freeing non-dirty cached register {} in {}", GetRegName(ra.reg), GetHostRegName(i)); 208 DebugAssert(!(ra.flags & HR_MODE_WRITE)); 209 ClearHostReg(i); 210 } 211 212 // remove any non-dirty constants too 213 for (u32 i = 1; i < static_cast<u32>(Reg::count); i++) 214 { 215 if (!HasConstantReg(static_cast<Reg>(i)) || HasDirtyConstantReg(static_cast<Reg>(i))) 216 continue; 217 218 DEBUG_LOG("Clearing non-dirty constant {}", GetRegName(static_cast<Reg>(i))); 219 ClearConstantReg(static_cast<Reg>(i)); 220 } 221 222 Flush(FLUSH_LOAD_DELAY_FROM_STATE); 223 } 224 225 // commit the delayed register load 226 FinishLoadDelay(); 227 228 // move next load delay forward 229 if (m_next_load_delay_register != Reg::count) 230 { 231 // if it somehow got flushed, read it back in. 232 if (m_next_load_delay_value_register == NUM_HOST_REGS) 233 { 234 AllocateHostReg(HR_MODE_READ, HR_TYPE_NEXT_LOAD_DELAY_VALUE, m_next_load_delay_register); 235 DebugAssert(m_next_load_delay_value_register != NUM_HOST_REGS); 236 } 237 238 HostRegAlloc& ra = m_host_regs[m_next_load_delay_value_register]; 239 ra.flags |= HR_MODE_WRITE; 240 ra.type = HR_TYPE_LOAD_DELAY_VALUE; 241 242 m_load_delay_register = m_next_load_delay_register; 243 m_load_delay_value_register = m_next_load_delay_value_register; 244 m_next_load_delay_register = Reg::count; 245 m_next_load_delay_value_register = NUM_HOST_REGS; 246 } 247 } 248 249 void CPU::NewRec::Compiler::FinishLoadDelay() 250 { 251 DebugAssert(!m_load_delay_dirty); 252 if (!HasLoadDelay()) 253 return; 254 255 // we may need to reload the value.. 256 if (m_load_delay_value_register == NUM_HOST_REGS) 257 { 258 AllocateHostReg(HR_MODE_READ, HR_TYPE_LOAD_DELAY_VALUE, m_load_delay_register); 259 DebugAssert(m_load_delay_value_register != NUM_HOST_REGS); 260 } 261 262 // kill any (old) cached value for this register 263 DeleteMIPSReg(m_load_delay_register, false); 264 265 DEBUG_LOG("Finished delayed load to {} in host register {}", GetRegName(m_load_delay_register), 266 GetHostRegName(m_load_delay_value_register)); 267 268 // and swap the mode over so it gets written back later 269 HostRegAlloc& ra = m_host_regs[m_load_delay_value_register]; 270 DebugAssert(ra.reg == m_load_delay_register); 271 ra.flags = (ra.flags & IMMUTABLE_HR_FLAGS) | HR_ALLOCATED | HR_MODE_READ | HR_MODE_WRITE; 272 ra.counter = m_register_alloc_counter++; 273 ra.type = HR_TYPE_CPU_REG; 274 275 // constants are gone 276 DEBUG_LOG("Clearing constant in {} due to load delay", GetRegName(m_load_delay_register)); 277 ClearConstantReg(m_load_delay_register); 278 279 m_load_delay_register = Reg::count; 280 m_load_delay_value_register = NUM_HOST_REGS; 281 } 282 283 void CPU::NewRec::Compiler::FinishLoadDelayToReg(Reg reg) 284 { 285 if (m_load_delay_dirty) 286 { 287 // inter-block :( 288 UpdateLoadDelay(); 289 return; 290 } 291 292 if (m_load_delay_register != reg) 293 return; 294 295 FinishLoadDelay(); 296 } 297 298 u32 CPU::NewRec::Compiler::GetFlagsForNewLoadDelayedReg() const 299 { 300 return g_settings.gpu_pgxp_enable ? (HR_MODE_WRITE | HR_CALLEE_SAVED) : (HR_MODE_WRITE); 301 } 302 303 void CPU::NewRec::Compiler::ClearConstantReg(Reg r) 304 { 305 DebugAssert(r < Reg::count && r != Reg::zero); 306 m_constant_reg_values[static_cast<u32>(r)] = 0; 307 m_constant_regs_valid.reset(static_cast<u32>(r)); 308 m_constant_regs_dirty.reset(static_cast<u32>(r)); 309 } 310 311 void CPU::NewRec::Compiler::FlushConstantRegs(bool invalidate) 312 { 313 for (u32 i = 1; i < static_cast<u32>(Reg::count); i++) 314 { 315 if (m_constant_regs_dirty.test(static_cast<u32>(i))) 316 FlushConstantReg(static_cast<Reg>(i)); 317 if (invalidate) 318 ClearConstantReg(static_cast<Reg>(i)); 319 } 320 } 321 322 CPU::Reg CPU::NewRec::Compiler::MipsD() const 323 { 324 return inst->r.rd; 325 } 326 327 u32 CPU::NewRec::Compiler::GetConditionalBranchTarget(CompileFlags cf) const 328 { 329 // compiler pc has already been advanced when swapping branch delay slots 330 const u32 current_pc = m_compiler_pc - (cf.delay_slot_swapped ? sizeof(Instruction) : 0); 331 return current_pc + (inst->i.imm_sext32() << 2); 332 } 333 334 u32 CPU::NewRec::Compiler::GetBranchReturnAddress(CompileFlags cf) const 335 { 336 // compiler pc has already been advanced when swapping branch delay slots 337 return m_compiler_pc + (cf.delay_slot_swapped ? 0 : sizeof(Instruction)); 338 } 339 340 bool CPU::NewRec::Compiler::TrySwapDelaySlot(Reg rs, Reg rt, Reg rd) 341 { 342 if constexpr (!SWAP_BRANCH_DELAY_SLOTS) 343 return false; 344 345 const Instruction* next_instruction = inst + 1; 346 DebugAssert(next_instruction < (m_block->Instructions() + m_block->size)); 347 348 const Reg opcode_rs = next_instruction->r.rs; 349 const Reg opcode_rt = next_instruction->r.rt; 350 const Reg opcode_rd = next_instruction->r.rd; 351 352 #ifdef _DEBUG 353 TinyString disasm; 354 DisassembleInstruction(&disasm, m_current_instruction_pc + 4, next_instruction->bits); 355 #endif 356 357 // Just in case we read it in the instruction.. but the block should end after this. 358 const Instruction* const backup_instruction = inst; 359 const u32 backup_instruction_pc = m_current_instruction_pc; 360 const bool backup_instruction_delay_slot = m_current_instruction_branch_delay_slot; 361 362 if (next_instruction->bits == 0) 363 { 364 // nop 365 goto is_safe; 366 } 367 368 // can't swap when the branch is the first instruction because of bloody load delays 369 if ((EMULATE_LOAD_DELAYS && m_block->pc == m_current_instruction_pc) || m_load_delay_dirty || 370 (HasLoadDelay() && (m_load_delay_register == rs || m_load_delay_register == rt || m_load_delay_register == rd))) 371 { 372 goto is_unsafe; 373 } 374 375 switch (next_instruction->op) 376 { 377 case InstructionOp::addi: 378 case InstructionOp::addiu: 379 case InstructionOp::slti: 380 case InstructionOp::sltiu: 381 case InstructionOp::andi: 382 case InstructionOp::ori: 383 case InstructionOp::xori: 384 case InstructionOp::lui: 385 case InstructionOp::lb: 386 case InstructionOp::lh: 387 case InstructionOp::lwl: 388 case InstructionOp::lw: 389 case InstructionOp::lbu: 390 case InstructionOp::lhu: 391 case InstructionOp::lwr: 392 { 393 if ((rs != Reg::zero && rs == opcode_rt) || (rt != Reg::zero && rt == opcode_rt) || 394 (rd != Reg::zero && (rd == opcode_rs || rd == opcode_rt))) 395 { 396 goto is_unsafe; 397 } 398 } 399 break; 400 401 case InstructionOp::sb: 402 case InstructionOp::sh: 403 case InstructionOp::swl: 404 case InstructionOp::sw: 405 case InstructionOp::swr: 406 case InstructionOp::lwc2: 407 case InstructionOp::swc2: 408 break; 409 410 case InstructionOp::funct: // SPECIAL 411 { 412 switch (next_instruction->r.funct) 413 { 414 case InstructionFunct::sll: 415 case InstructionFunct::srl: 416 case InstructionFunct::sra: 417 case InstructionFunct::sllv: 418 case InstructionFunct::srlv: 419 case InstructionFunct::srav: 420 case InstructionFunct::add: 421 case InstructionFunct::addu: 422 case InstructionFunct::sub: 423 case InstructionFunct::subu: 424 case InstructionFunct::and_: 425 case InstructionFunct::or_: 426 case InstructionFunct::xor_: 427 case InstructionFunct::nor: 428 case InstructionFunct::slt: 429 case InstructionFunct::sltu: 430 { 431 if ((rs != Reg::zero && rs == opcode_rd) || (rt != Reg::zero && rt == opcode_rd) || 432 (rd != Reg::zero && (rd == opcode_rs || rd == opcode_rt))) 433 { 434 goto is_unsafe; 435 } 436 } 437 break; 438 439 case InstructionFunct::mult: 440 case InstructionFunct::multu: 441 case InstructionFunct::div: 442 case InstructionFunct::divu: 443 break; 444 445 default: 446 goto is_unsafe; 447 } 448 } 449 break; 450 451 case InstructionOp::cop0: // COP0 452 case InstructionOp::cop1: // COP1 453 case InstructionOp::cop2: // COP2 454 case InstructionOp::cop3: // COP3 455 { 456 if (next_instruction->cop.IsCommonInstruction()) 457 { 458 switch (next_instruction->cop.CommonOp()) 459 { 460 case CopCommonInstruction::mfcn: // MFC0 461 case CopCommonInstruction::cfcn: // CFC0 462 { 463 if ((rs != Reg::zero && rs == opcode_rt) || (rt != Reg::zero && rt == opcode_rt) || 464 (rd != Reg::zero && rd == opcode_rt)) 465 { 466 goto is_unsafe; 467 } 468 } 469 break; 470 471 case CopCommonInstruction::mtcn: // MTC0 472 case CopCommonInstruction::ctcn: // CTC0 473 break; 474 } 475 } 476 else 477 { 478 // swap when it's GTE 479 if (next_instruction->op != InstructionOp::cop2) 480 goto is_unsafe; 481 } 482 } 483 break; 484 485 default: 486 goto is_unsafe; 487 } 488 489 is_safe: 490 #ifdef _DEBUG 491 DEBUG_LOG("Swapping delay slot {:08X} {}", m_current_instruction_pc + 4, disasm); 492 #endif 493 494 CompileBranchDelaySlot(); 495 496 inst = backup_instruction; 497 m_current_instruction_pc = backup_instruction_pc; 498 m_current_instruction_branch_delay_slot = backup_instruction_delay_slot; 499 return true; 500 501 is_unsafe: 502 #ifdef _DEBUG 503 DEBUG_LOG("NOT swapping delay slot {:08X} {}", m_current_instruction_pc + 4, disasm); 504 #endif 505 506 return false; 507 } 508 509 void CPU::NewRec::Compiler::SetCompilerPC(u32 newpc) 510 { 511 m_compiler_pc = newpc; 512 m_dirty_pc = true; 513 } 514 515 u32 CPU::NewRec::Compiler::GetFreeHostReg(u32 flags) 516 { 517 const u32 req_flags = HR_USABLE | (flags & HR_CALLEE_SAVED); 518 519 u32 fallback = NUM_HOST_REGS; 520 for (u32 i = 0; i < NUM_HOST_REGS; i++) 521 { 522 if ((m_host_regs[i].flags & (req_flags | HR_NEEDED | HR_ALLOCATED)) == req_flags) 523 { 524 // Prefer callee-saved registers. 525 if (m_host_regs[i].flags & HR_CALLEE_SAVED) 526 return i; 527 else if (fallback == NUM_HOST_REGS) 528 fallback = i; 529 } 530 } 531 if (fallback != NUM_HOST_REGS) 532 return fallback; 533 534 // find register with lowest counter 535 u32 lowest = NUM_HOST_REGS; 536 u32 lowest_count = std::numeric_limits<u32>::max(); 537 for (u32 i = 0; i < NUM_HOST_REGS; i++) 538 { 539 const HostRegAlloc& ra = m_host_regs[i]; 540 if ((ra.flags & (req_flags | HR_NEEDED)) != req_flags) 541 continue; 542 543 DebugAssert(ra.flags & HR_ALLOCATED); 544 if (ra.type == HR_TYPE_TEMP) 545 { 546 // can't punt temps 547 continue; 548 } 549 550 if (ra.counter < lowest_count) 551 { 552 lowest = i; 553 lowest_count = ra.counter; 554 } 555 } 556 557 // 558 559 AssertMsg(lowest != NUM_HOST_REGS, "Register allocation failed."); 560 561 const HostRegAlloc& ra = m_host_regs[lowest]; 562 switch (ra.type) 563 { 564 case HR_TYPE_CPU_REG: 565 { 566 // If the register is needed later, and we're allocating a callee-saved register, try moving it to a caller-saved 567 // register. 568 if (iinfo->UsedTest(ra.reg) && flags & HR_CALLEE_SAVED) 569 { 570 u32 caller_saved_lowest = NUM_HOST_REGS; 571 u32 caller_saved_lowest_count = std::numeric_limits<u32>::max(); 572 for (u32 i = 0; i < NUM_HOST_REGS; i++) 573 { 574 constexpr u32 caller_req_flags = HR_USABLE; 575 constexpr u32 caller_req_mask = HR_USABLE | HR_NEEDED | HR_CALLEE_SAVED; 576 const HostRegAlloc& caller_ra = m_host_regs[i]; 577 if ((caller_ra.flags & caller_req_mask) != caller_req_flags) 578 continue; 579 580 if (!(caller_ra.flags & HR_ALLOCATED)) 581 { 582 caller_saved_lowest = i; 583 caller_saved_lowest_count = 0; 584 break; 585 } 586 587 if (caller_ra.type == HR_TYPE_TEMP) 588 continue; 589 590 if (caller_ra.counter < caller_saved_lowest_count) 591 { 592 caller_saved_lowest = i; 593 caller_saved_lowest_count = caller_ra.counter; 594 } 595 } 596 597 if (caller_saved_lowest_count < lowest_count) 598 { 599 DEBUG_LOG("Moving caller-saved host register {} with MIPS register {} to {} for allocation", 600 GetHostRegName(lowest), GetRegName(ra.reg), GetHostRegName(caller_saved_lowest)); 601 if (IsHostRegAllocated(caller_saved_lowest)) 602 FreeHostReg(caller_saved_lowest); 603 CopyHostReg(caller_saved_lowest, lowest); 604 SwapHostRegAlloc(caller_saved_lowest, lowest); 605 DebugAssert(!IsHostRegAllocated(lowest)); 606 return lowest; 607 } 608 } 609 610 DEBUG_LOG("Freeing register {} in host register {} for allocation", GetRegName(ra.reg), GetHostRegName(lowest)); 611 } 612 break; 613 case HR_TYPE_LOAD_DELAY_VALUE: 614 { 615 DEBUG_LOG("Freeing load delay register {} in host register {} for allocation", GetHostRegName(lowest), 616 GetRegName(ra.reg)); 617 } 618 break; 619 case HR_TYPE_NEXT_LOAD_DELAY_VALUE: 620 { 621 DEBUG_LOG("Freeing next load delay register {} in host register {} due for allocation", GetRegName(ra.reg), 622 GetHostRegName(lowest)); 623 } 624 break; 625 default: 626 { 627 Panic("Unknown type freed"); 628 } 629 break; 630 } 631 632 FreeHostReg(lowest); 633 return lowest; 634 } 635 636 const char* CPU::NewRec::Compiler::GetReadWriteModeString(u32 flags) 637 { 638 if ((flags & (HR_MODE_READ | HR_MODE_WRITE)) == (HR_MODE_READ | HR_MODE_WRITE)) 639 return "read-write"; 640 else if (flags & HR_MODE_READ) 641 return "read-only"; 642 else if (flags & HR_MODE_WRITE) 643 return "write-only"; 644 else 645 return "UNKNOWN"; 646 } 647 648 u32 CPU::NewRec::Compiler::AllocateHostReg(u32 flags, HostRegAllocType type /* = HR_TYPE_TEMP */, 649 Reg reg /* = Reg::count */) 650 { 651 // Cancel any load delays before booting anything out 652 if (flags & HR_MODE_WRITE && (type == HR_TYPE_CPU_REG || type == HR_TYPE_NEXT_LOAD_DELAY_VALUE)) 653 CancelLoadDelaysToReg(reg); 654 655 // Already have a matching type? 656 if (type != HR_TYPE_TEMP) 657 { 658 const std::optional<u32> check_reg = CheckHostReg(flags, type, reg); 659 660 // shouldn't be allocating >1 load delay in a single instruction.. 661 // TODO: prefer callee saved registers for load delay 662 DebugAssert((type != HR_TYPE_LOAD_DELAY_VALUE && type != HR_TYPE_NEXT_LOAD_DELAY_VALUE) || !check_reg.has_value()); 663 if (check_reg.has_value()) 664 return check_reg.value(); 665 } 666 667 const u32 hreg = GetFreeHostReg(flags); 668 HostRegAlloc& ra = m_host_regs[hreg]; 669 ra.flags = (ra.flags & IMMUTABLE_HR_FLAGS) | (flags & ALLOWED_HR_FLAGS) | HR_ALLOCATED | HR_NEEDED; 670 ra.type = type; 671 ra.reg = reg; 672 ra.counter = m_register_alloc_counter++; 673 674 switch (type) 675 { 676 case HR_TYPE_CPU_REG: 677 { 678 DebugAssert(reg != Reg::zero); 679 680 DEBUG_LOG("Allocate host reg {} to guest reg {} in {} mode", GetHostRegName(hreg), GetRegName(reg), 681 GetReadWriteModeString(flags)); 682 683 if (flags & HR_MODE_READ) 684 { 685 DebugAssert(ra.reg > Reg::zero && ra.reg < Reg::count); 686 687 if (HasConstantReg(reg)) 688 { 689 // may as well flush it now 690 DEBUG_LOG("Flush constant register in guest reg {} to host reg {}", GetRegName(reg), GetHostRegName(hreg)); 691 LoadHostRegWithConstant(hreg, GetConstantRegU32(reg)); 692 m_constant_regs_dirty.reset(static_cast<u8>(reg)); 693 ra.flags |= HR_MODE_WRITE; 694 } 695 else 696 { 697 LoadHostRegFromCPUPointer(hreg, &g_state.regs.r[static_cast<u8>(reg)]); 698 } 699 } 700 701 if (flags & HR_MODE_WRITE && HasConstantReg(reg)) 702 { 703 DebugAssert(reg != Reg::zero); 704 DEBUG_LOG("Clearing constant register in guest reg {} due to write mode in {}", GetRegName(reg), 705 GetHostRegName(hreg)); 706 707 ClearConstantReg(reg); 708 } 709 } 710 break; 711 712 case HR_TYPE_LOAD_DELAY_VALUE: 713 { 714 DebugAssert(!m_load_delay_dirty && (!HasLoadDelay() || !(flags & HR_MODE_WRITE))); 715 DEBUG_LOG("Allocating load delayed guest register {} in host reg {} in {} mode", GetRegName(reg), 716 GetHostRegName(hreg), GetReadWriteModeString(flags)); 717 m_load_delay_register = reg; 718 m_load_delay_value_register = hreg; 719 if (flags & HR_MODE_READ) 720 LoadHostRegFromCPUPointer(hreg, &g_state.load_delay_value); 721 } 722 break; 723 724 case HR_TYPE_NEXT_LOAD_DELAY_VALUE: 725 { 726 DEBUG_LOG("Allocating next load delayed guest register {} in host reg {} in {} mode", GetRegName(reg), 727 GetHostRegName(hreg), GetReadWriteModeString(flags)); 728 m_next_load_delay_register = reg; 729 m_next_load_delay_value_register = hreg; 730 if (flags & HR_MODE_READ) 731 LoadHostRegFromCPUPointer(hreg, &g_state.next_load_delay_value); 732 } 733 break; 734 735 case HR_TYPE_TEMP: 736 { 737 DebugAssert(!(flags & (HR_MODE_READ | HR_MODE_WRITE))); 738 DEBUG_LOG("Allocate host reg {} as temporary", GetHostRegName(hreg)); 739 } 740 break; 741 742 default: 743 Panic("Unknown type"); 744 break; 745 } 746 747 return hreg; 748 } 749 750 std::optional<u32> CPU::NewRec::Compiler::CheckHostReg(u32 flags, HostRegAllocType type /* = HR_TYPE_TEMP */, 751 Reg reg /* = Reg::count */) 752 { 753 for (u32 i = 0; i < NUM_HOST_REGS; i++) 754 { 755 HostRegAlloc& ra = m_host_regs[i]; 756 if (!(ra.flags & HR_ALLOCATED) || ra.type != type || ra.reg != reg) 757 continue; 758 759 DebugAssert(ra.flags & HR_MODE_READ); 760 if (flags & HR_MODE_WRITE) 761 { 762 DebugAssert(type == HR_TYPE_CPU_REG); 763 if (!(ra.flags & HR_MODE_WRITE)) 764 DEBUG_LOG("Switch guest reg {} from read to read-write in host reg {}", GetRegName(reg), GetHostRegName(i)); 765 766 if (HasConstantReg(reg)) 767 { 768 DebugAssert(reg != Reg::zero); 769 DEBUG_LOG("Clearing constant register in guest reg {} due to write mode in {}", GetRegName(reg), 770 GetHostRegName(i)); 771 772 ClearConstantReg(reg); 773 } 774 } 775 776 ra.flags |= (flags & ALLOWED_HR_FLAGS) | HR_NEEDED; 777 ra.counter = m_register_alloc_counter++; 778 779 // Need a callee saved reg? 780 if (flags & HR_CALLEE_SAVED && !(ra.flags & HR_CALLEE_SAVED)) 781 { 782 // Need to move it to one which is 783 const u32 new_reg = GetFreeHostReg(HR_CALLEE_SAVED); 784 DEBUG_LOG("Rename host reg {} to {} for callee saved", GetHostRegName(i), GetHostRegName(new_reg)); 785 786 CopyHostReg(new_reg, i); 787 SwapHostRegAlloc(i, new_reg); 788 DebugAssert(!IsHostRegAllocated(i)); 789 return new_reg; 790 } 791 792 return i; 793 } 794 795 return std::nullopt; 796 } 797 798 u32 CPU::NewRec::Compiler::AllocateTempHostReg(u32 flags) 799 { 800 return AllocateHostReg(flags, HR_TYPE_TEMP); 801 } 802 803 void CPU::NewRec::Compiler::SwapHostRegAlloc(u32 lhs, u32 rhs) 804 { 805 HostRegAlloc& lra = m_host_regs[lhs]; 806 HostRegAlloc& rra = m_host_regs[rhs]; 807 808 const u8 lra_flags = lra.flags; 809 lra.flags = (lra.flags & IMMUTABLE_HR_FLAGS) | (rra.flags & ~IMMUTABLE_HR_FLAGS); 810 rra.flags = (rra.flags & IMMUTABLE_HR_FLAGS) | (lra_flags & ~IMMUTABLE_HR_FLAGS); 811 std::swap(lra.type, rra.type); 812 std::swap(lra.reg, rra.reg); 813 std::swap(lra.counter, rra.counter); 814 } 815 816 void CPU::NewRec::Compiler::FlushHostReg(u32 reg) 817 { 818 HostRegAlloc& ra = m_host_regs[reg]; 819 if (ra.flags & HR_MODE_WRITE) 820 { 821 switch (ra.type) 822 { 823 case HR_TYPE_CPU_REG: 824 { 825 DebugAssert(ra.reg > Reg::zero && ra.reg < Reg::count); 826 DEBUG_LOG("Flushing register {} in host register {} to state", GetRegName(ra.reg), GetHostRegName(reg)); 827 StoreHostRegToCPUPointer(reg, &g_state.regs.r[static_cast<u8>(ra.reg)]); 828 } 829 break; 830 831 case HR_TYPE_LOAD_DELAY_VALUE: 832 { 833 DebugAssert(m_load_delay_value_register == reg); 834 DEBUG_LOG("Flushing load delayed register {} in host register {} to state", GetRegName(ra.reg), 835 GetHostRegName(reg)); 836 837 StoreHostRegToCPUPointer(reg, &g_state.load_delay_value); 838 m_load_delay_value_register = NUM_HOST_REGS; 839 } 840 break; 841 842 case HR_TYPE_NEXT_LOAD_DELAY_VALUE: 843 { 844 DebugAssert(m_next_load_delay_value_register == reg); 845 WARNING_LOG("Flushing NEXT load delayed register {} in host register {} to state", GetRegName(ra.reg), 846 GetHostRegName(reg)); 847 848 StoreHostRegToCPUPointer(reg, &g_state.next_load_delay_value); 849 m_next_load_delay_value_register = NUM_HOST_REGS; 850 } 851 break; 852 853 default: 854 break; 855 } 856 857 ra.flags = (ra.flags & ~HR_MODE_WRITE) | HR_MODE_READ; 858 } 859 } 860 861 void CPU::NewRec::Compiler::FreeHostReg(u32 reg) 862 { 863 DebugAssert(IsHostRegAllocated(reg)); 864 DEBUG_LOG("Freeing host register {}", GetHostRegName(reg)); 865 FlushHostReg(reg); 866 ClearHostReg(reg); 867 } 868 869 void CPU::NewRec::Compiler::ClearHostReg(u32 reg) 870 { 871 HostRegAlloc& ra = m_host_regs[reg]; 872 ra.flags &= IMMUTABLE_HR_FLAGS; 873 ra.type = HR_TYPE_TEMP; 874 ra.counter = 0; 875 ra.reg = Reg::count; 876 } 877 878 void CPU::NewRec::Compiler::MarkRegsNeeded(HostRegAllocType type, Reg reg) 879 { 880 for (u32 i = 0; i < NUM_HOST_REGS; i++) 881 { 882 HostRegAlloc& ra = m_host_regs[i]; 883 if (ra.flags & HR_ALLOCATED && ra.type == type && ra.reg == reg) 884 ra.flags |= HR_NEEDED; 885 } 886 } 887 888 void CPU::NewRec::Compiler::RenameHostReg(u32 reg, u32 new_flags, HostRegAllocType new_type, Reg new_reg) 889 { 890 // only supported for cpu regs for now 891 DebugAssert(new_type == HR_TYPE_TEMP || new_type == HR_TYPE_CPU_REG || new_type == HR_TYPE_NEXT_LOAD_DELAY_VALUE); 892 893 const std::optional<u32> old_reg = CheckHostReg(0, new_type, new_reg); 894 if (old_reg.has_value()) 895 { 896 // don't writeback 897 ClearHostReg(old_reg.value()); 898 } 899 900 // kill any load delay to this reg 901 if (new_type == HR_TYPE_CPU_REG || new_type == HR_TYPE_NEXT_LOAD_DELAY_VALUE) 902 CancelLoadDelaysToReg(new_reg); 903 904 if (new_type == HR_TYPE_CPU_REG) 905 { 906 DEBUG_LOG("Renaming host reg {} to guest reg {}", GetHostRegName(reg), GetRegName(new_reg)); 907 } 908 else if (new_type == HR_TYPE_NEXT_LOAD_DELAY_VALUE) 909 { 910 DEBUG_LOG("Renaming host reg {} to load delayed guest reg {}", GetHostRegName(reg), GetRegName(new_reg)); 911 DebugAssert(m_next_load_delay_register == Reg::count && m_next_load_delay_value_register == NUM_HOST_REGS); 912 m_next_load_delay_register = new_reg; 913 m_next_load_delay_value_register = reg; 914 } 915 else 916 { 917 DEBUG_LOG("Renaming host reg {} to temp", GetHostRegName(reg)); 918 } 919 920 HostRegAlloc& ra = m_host_regs[reg]; 921 ra.flags = (ra.flags & IMMUTABLE_HR_FLAGS) | HR_NEEDED | HR_ALLOCATED | (new_flags & ALLOWED_HR_FLAGS); 922 ra.counter = m_register_alloc_counter++; 923 ra.type = new_type; 924 ra.reg = new_reg; 925 } 926 927 void CPU::NewRec::Compiler::ClearHostRegNeeded(u32 reg) 928 { 929 DebugAssert(reg < NUM_HOST_REGS && IsHostRegAllocated(reg)); 930 HostRegAlloc& ra = m_host_regs[reg]; 931 if (ra.flags & HR_MODE_WRITE) 932 ra.flags |= HR_MODE_READ; 933 934 ra.flags &= ~HR_NEEDED; 935 } 936 937 void CPU::NewRec::Compiler::ClearHostRegsNeeded() 938 { 939 for (u32 i = 0; i < NUM_HOST_REGS; i++) 940 { 941 HostRegAlloc& ra = m_host_regs[i]; 942 if (!(ra.flags & HR_ALLOCATED)) 943 continue; 944 945 // shouldn't have any temps left 946 DebugAssert(ra.type != HR_TYPE_TEMP); 947 948 if (ra.flags & HR_MODE_WRITE) 949 ra.flags |= HR_MODE_READ; 950 951 ra.flags &= ~HR_NEEDED; 952 } 953 } 954 955 void CPU::NewRec::Compiler::DeleteMIPSReg(Reg reg, bool flush) 956 { 957 DebugAssert(reg != Reg::zero); 958 959 for (u32 i = 0; i < NUM_HOST_REGS; i++) 960 { 961 HostRegAlloc& ra = m_host_regs[i]; 962 if (ra.flags & HR_ALLOCATED && ra.type == HR_TYPE_CPU_REG && ra.reg == reg) 963 { 964 if (flush) 965 FlushHostReg(i); 966 ClearHostReg(i); 967 ClearConstantReg(reg); 968 return; 969 } 970 } 971 972 if (flush) 973 FlushConstantReg(reg); 974 ClearConstantReg(reg); 975 } 976 977 bool CPU::NewRec::Compiler::TryRenameMIPSReg(Reg to, Reg from, u32 fromhost, Reg other) 978 { 979 // can't rename when in form Rd = Rs op Rt and Rd == Rs or Rd == Rt 980 if (to == from || to == other || !iinfo->RenameTest(from)) 981 return false; 982 983 DEBUG_LOG("Renaming MIPS register {} to {}", GetRegName(from), GetRegName(to)); 984 985 if (iinfo->LiveTest(from)) 986 FlushHostReg(fromhost); 987 988 // remove all references to renamed-to register 989 DeleteMIPSReg(to, false); 990 CancelLoadDelaysToReg(to); 991 992 // and do the actual rename, new register has been modified. 993 m_host_regs[fromhost].reg = to; 994 m_host_regs[fromhost].flags |= HR_MODE_READ | HR_MODE_WRITE; 995 return true; 996 } 997 998 void CPU::NewRec::Compiler::UpdateHostRegCounters() 999 { 1000 const CodeCache::InstructionInfo* const info_end = m_block->InstructionsInfo() + m_block->size; 1001 1002 for (u32 i = 0; i < NUM_HOST_REGS; i++) 1003 { 1004 HostRegAlloc& ra = m_host_regs[i]; 1005 if ((ra.flags & (HR_ALLOCATED | HR_NEEDED)) != HR_ALLOCATED) 1006 continue; 1007 1008 // Try not to punt out load delays. 1009 if (ra.type != HR_TYPE_CPU_REG) 1010 { 1011 ra.counter = std::numeric_limits<u16>::max(); 1012 continue; 1013 } 1014 1015 DebugAssert(IsHostRegAllocated(i)); 1016 const CodeCache::InstructionInfo* cur = iinfo; 1017 const Reg reg = ra.reg; 1018 if (!(cur->reg_flags[static_cast<u8>(reg)] & CodeCache::RI_USED)) 1019 { 1020 ra.counter = 0; 1021 continue; 1022 } 1023 1024 // order based on the number of instructions until this register is used 1025 u16 counter_val = std::numeric_limits<u16>::max(); 1026 for (; cur != info_end; cur++, counter_val--) 1027 { 1028 if (cur->ReadsReg(reg)) 1029 break; 1030 } 1031 1032 ra.counter = counter_val; 1033 } 1034 } 1035 1036 void CPU::NewRec::Compiler::Flush(u32 flags) 1037 { 1038 // TODO: Flush unneeded caller-saved regs (backup/replace calle-saved needed with caller-saved) 1039 if (flags & 1040 (FLUSH_FREE_UNNEEDED_CALLER_SAVED_REGISTERS | FLUSH_FREE_CALLER_SAVED_REGISTERS | FLUSH_FREE_ALL_REGISTERS)) 1041 { 1042 const u32 req_mask = (flags & FLUSH_FREE_ALL_REGISTERS) ? 1043 HR_ALLOCATED : 1044 ((flags & FLUSH_FREE_CALLER_SAVED_REGISTERS) ? (HR_ALLOCATED | HR_CALLEE_SAVED) : 1045 (HR_ALLOCATED | HR_CALLEE_SAVED | HR_NEEDED)); 1046 constexpr u32 req_flags = HR_ALLOCATED; 1047 1048 for (u32 i = 0; i < NUM_HOST_REGS; i++) 1049 { 1050 HostRegAlloc& ra = m_host_regs[i]; 1051 if ((ra.flags & req_mask) == req_flags) 1052 FreeHostReg(i); 1053 } 1054 } 1055 1056 if (flags & FLUSH_INVALIDATE_MIPS_REGISTERS) 1057 { 1058 for (u32 i = 0; i < NUM_HOST_REGS; i++) 1059 { 1060 HostRegAlloc& ra = m_host_regs[i]; 1061 if (ra.flags & HR_ALLOCATED && ra.type == HR_TYPE_CPU_REG) 1062 FreeHostReg(i); 1063 } 1064 1065 FlushConstantRegs(true); 1066 } 1067 else 1068 { 1069 if (flags & FLUSH_FLUSH_MIPS_REGISTERS) 1070 { 1071 for (u32 i = 0; i < NUM_HOST_REGS; i++) 1072 { 1073 HostRegAlloc& ra = m_host_regs[i]; 1074 if ((ra.flags & (HR_ALLOCATED | HR_MODE_WRITE)) == (HR_ALLOCATED | HR_MODE_WRITE) && ra.type == HR_TYPE_CPU_REG) 1075 FlushHostReg(i); 1076 } 1077 1078 // flush any constant registers which are dirty too 1079 FlushConstantRegs(false); 1080 } 1081 } 1082 1083 if (flags & FLUSH_INVALIDATE_SPECULATIVE_CONSTANTS) 1084 InvalidateSpeculativeValues(); 1085 } 1086 1087 void CPU::NewRec::Compiler::FlushConstantReg(Reg r) 1088 { 1089 DebugAssert(m_constant_regs_valid.test(static_cast<u32>(r))); 1090 DEBUG_LOG("Writing back register {} with constant value 0x{:08X}", GetRegName(r), 1091 m_constant_reg_values[static_cast<u32>(r)]); 1092 StoreConstantToCPUPointer(m_constant_reg_values[static_cast<u32>(r)], &g_state.regs.r[static_cast<u32>(r)]); 1093 m_constant_regs_dirty.reset(static_cast<u32>(r)); 1094 } 1095 1096 void CPU::NewRec::Compiler::BackupHostState() 1097 { 1098 DebugAssert(m_host_state_backup_count < m_host_state_backup.size()); 1099 1100 // need to back up everything... 1101 HostStateBackup& bu = m_host_state_backup[m_host_state_backup_count]; 1102 bu.cycles = m_cycles; 1103 bu.gte_done_cycle = m_gte_done_cycle; 1104 bu.compiler_pc = m_compiler_pc; 1105 bu.dirty_pc = m_dirty_pc; 1106 bu.dirty_instruction_bits = m_dirty_instruction_bits; 1107 bu.dirty_gte_done_cycle = m_dirty_gte_done_cycle; 1108 bu.block_ended = m_block_ended; 1109 bu.inst = inst; 1110 bu.iinfo = iinfo; 1111 bu.current_instruction_pc = m_current_instruction_pc; 1112 bu.current_instruction_delay_slot = m_current_instruction_branch_delay_slot; 1113 bu.const_regs_valid = m_constant_regs_valid; 1114 bu.const_regs_dirty = m_constant_regs_dirty; 1115 bu.const_regs_values = m_constant_reg_values; 1116 bu.host_regs = m_host_regs; 1117 bu.register_alloc_counter = m_register_alloc_counter; 1118 bu.load_delay_dirty = m_load_delay_dirty; 1119 bu.load_delay_register = m_load_delay_register; 1120 bu.load_delay_value_register = m_load_delay_value_register; 1121 bu.next_load_delay_register = m_next_load_delay_register; 1122 bu.next_load_delay_value_register = m_next_load_delay_value_register; 1123 m_host_state_backup_count++; 1124 } 1125 1126 void CPU::NewRec::Compiler::RestoreHostState() 1127 { 1128 DebugAssert(m_host_state_backup_count > 0); 1129 m_host_state_backup_count--; 1130 1131 HostStateBackup& bu = m_host_state_backup[m_host_state_backup_count]; 1132 m_host_regs = std::move(bu.host_regs); 1133 m_constant_reg_values = std::move(bu.const_regs_values); 1134 m_constant_regs_dirty = std::move(bu.const_regs_dirty); 1135 m_constant_regs_valid = std::move(bu.const_regs_valid); 1136 m_current_instruction_branch_delay_slot = bu.current_instruction_delay_slot; 1137 m_current_instruction_pc = bu.current_instruction_pc; 1138 inst = bu.inst; 1139 iinfo = bu.iinfo; 1140 m_block_ended = bu.block_ended; 1141 m_dirty_gte_done_cycle = bu.dirty_gte_done_cycle; 1142 m_dirty_instruction_bits = bu.dirty_instruction_bits; 1143 m_dirty_pc = bu.dirty_pc; 1144 m_compiler_pc = bu.compiler_pc; 1145 m_register_alloc_counter = bu.register_alloc_counter; 1146 m_load_delay_dirty = bu.load_delay_dirty; 1147 m_load_delay_register = bu.load_delay_register; 1148 m_load_delay_value_register = bu.load_delay_value_register; 1149 m_next_load_delay_register = bu.next_load_delay_register; 1150 m_next_load_delay_value_register = bu.next_load_delay_value_register; 1151 m_gte_done_cycle = bu.gte_done_cycle; 1152 m_cycles = bu.cycles; 1153 } 1154 1155 void CPU::NewRec::Compiler::AddLoadStoreInfo(void* code_address, u32 code_size, u32 address_register, u32 data_register, 1156 MemoryAccessSize size, bool is_signed, bool is_load) 1157 { 1158 DebugAssert(CodeCache::IsUsingFastmem()); 1159 DebugAssert(address_register < NUM_HOST_REGS); 1160 DebugAssert(data_register < NUM_HOST_REGS); 1161 1162 u32 gpr_bitmask = 0; 1163 for (u32 i = 0; i < NUM_HOST_REGS; i++) 1164 { 1165 if (IsHostRegAllocated(i)) 1166 gpr_bitmask |= (1u << i); 1167 } 1168 1169 CPU::CodeCache::AddLoadStoreInfo(code_address, code_size, m_current_instruction_pc, m_block->pc, m_cycles, 1170 gpr_bitmask, static_cast<u8>(address_register), static_cast<u8>(data_register), size, 1171 is_signed, is_load); 1172 } 1173 1174 void CPU::NewRec::Compiler::CompileInstruction() 1175 { 1176 #ifdef _DEBUG 1177 TinyString str; 1178 DisassembleInstruction(&str, m_current_instruction_pc, inst->bits); 1179 DEBUG_LOG("Compiling{} {:08X}: {}", m_current_instruction_branch_delay_slot ? " branch delay slot" : "", 1180 m_current_instruction_pc, str); 1181 #endif 1182 1183 m_cycles++; 1184 1185 if (IsNopInstruction(*inst)) 1186 { 1187 UpdateLoadDelay(); 1188 return; 1189 } 1190 1191 switch (inst->op) 1192 { 1193 #define PGXPFN(x) reinterpret_cast<const void*>(&PGXP::x) 1194 1195 // clang-format off 1196 // TODO: PGXP for jalr 1197 1198 case InstructionOp::funct: 1199 { 1200 switch (inst->r.funct) 1201 { 1202 case InstructionFunct::sll: CompileTemplate(&Compiler::Compile_sll_const, &Compiler::Compile_sll, PGXPFN(CPU_SLL), TF_WRITES_D | TF_READS_T); SpecExec_sll(); break; 1203 case InstructionFunct::srl: CompileTemplate(&Compiler::Compile_srl_const, &Compiler::Compile_srl, PGXPFN(CPU_SRL), TF_WRITES_D | TF_READS_T); SpecExec_srl(); break; 1204 case InstructionFunct::sra: CompileTemplate(&Compiler::Compile_sra_const, &Compiler::Compile_sra, PGXPFN(CPU_SRA), TF_WRITES_D | TF_READS_T); SpecExec_sra(); break; 1205 case InstructionFunct::sllv: CompileTemplate(&Compiler::Compile_sllv_const, &Compiler::Compile_sllv, PGXPFN(CPU_SLLV), TF_WRITES_D | TF_READS_S | TF_READS_T); SpecExec_sllv(); break; 1206 case InstructionFunct::srlv: CompileTemplate(&Compiler::Compile_srlv_const, &Compiler::Compile_srlv, PGXPFN(CPU_SRLV), TF_WRITES_D | TF_READS_S | TF_READS_T); SpecExec_srlv(); break; 1207 case InstructionFunct::srav: CompileTemplate(&Compiler::Compile_srav_const, &Compiler::Compile_srav, PGXPFN(CPU_SRAV), TF_WRITES_D | TF_READS_S | TF_READS_T); SpecExec_srav(); break; 1208 case InstructionFunct::jr: CompileTemplate(&Compiler::Compile_jr_const, &Compiler::Compile_jr, nullptr, TF_READS_S); break; 1209 case InstructionFunct::jalr: CompileTemplate(&Compiler::Compile_jalr_const, &Compiler::Compile_jalr, nullptr, /*TF_WRITES_D |*/ TF_READS_S | TF_NO_NOP); SpecExec_jalr(); break; 1210 case InstructionFunct::syscall: Compile_syscall(); break; 1211 case InstructionFunct::break_: Compile_break(); break; 1212 case InstructionFunct::mfhi: SpecCopyReg(inst->r.rd, Reg::hi); CompileMoveRegTemplate(inst->r.rd, Reg::hi, g_settings.gpu_pgxp_cpu); break; 1213 case InstructionFunct::mthi: SpecCopyReg(Reg::hi, inst->r.rs); CompileMoveRegTemplate(Reg::hi, inst->r.rs, g_settings.gpu_pgxp_cpu); break; 1214 case InstructionFunct::mflo: SpecCopyReg(inst->r.rd, Reg::lo); CompileMoveRegTemplate(inst->r.rd, Reg::lo, g_settings.gpu_pgxp_cpu); break; 1215 case InstructionFunct::mtlo: SpecCopyReg(Reg::lo, inst->r.rs); CompileMoveRegTemplate(Reg::lo, inst->r.rs, g_settings.gpu_pgxp_cpu); break; 1216 case InstructionFunct::mult: CompileTemplate(&Compiler::Compile_mult_const, &Compiler::Compile_mult, PGXPFN(CPU_MULT), TF_READS_S | TF_READS_T | TF_WRITES_LO | TF_WRITES_HI | TF_COMMUTATIVE); SpecExec_mult(); break; 1217 case InstructionFunct::multu: CompileTemplate(&Compiler::Compile_multu_const, &Compiler::Compile_multu, PGXPFN(CPU_MULTU), TF_READS_S | TF_READS_T | TF_WRITES_LO | TF_WRITES_HI | TF_COMMUTATIVE); SpecExec_multu(); break; 1218 case InstructionFunct::div: CompileTemplate(&Compiler::Compile_div_const, &Compiler::Compile_div, PGXPFN(CPU_DIV), TF_READS_S | TF_READS_T | TF_WRITES_LO | TF_WRITES_HI); SpecExec_div(); break; 1219 case InstructionFunct::divu: CompileTemplate(&Compiler::Compile_divu_const, &Compiler::Compile_divu, PGXPFN(CPU_DIVU), TF_READS_S | TF_READS_T | TF_WRITES_LO | TF_WRITES_HI); SpecExec_divu(); break; 1220 case InstructionFunct::add: CompileTemplate(&Compiler::Compile_add_const, &Compiler::Compile_add, PGXPFN(CPU_ADD), TF_WRITES_D | TF_READS_S | TF_READS_T | TF_COMMUTATIVE | TF_CAN_OVERFLOW | TF_RENAME_WITH_ZERO_T); SpecExec_add(); break; 1221 case InstructionFunct::addu: CompileTemplate(&Compiler::Compile_addu_const, &Compiler::Compile_addu, PGXPFN(CPU_ADD), TF_WRITES_D | TF_READS_S | TF_READS_T | TF_COMMUTATIVE | TF_RENAME_WITH_ZERO_T); SpecExec_addu(); break; 1222 case InstructionFunct::sub: CompileTemplate(&Compiler::Compile_sub_const, &Compiler::Compile_sub, PGXPFN(CPU_SUB), TF_WRITES_D | TF_READS_S | TF_READS_T | TF_CAN_OVERFLOW | TF_RENAME_WITH_ZERO_T); SpecExec_sub(); break; 1223 case InstructionFunct::subu: CompileTemplate(&Compiler::Compile_subu_const, &Compiler::Compile_subu, PGXPFN(CPU_SUB), TF_WRITES_D | TF_READS_S | TF_READS_T | TF_RENAME_WITH_ZERO_T); SpecExec_subu(); break; 1224 case InstructionFunct::and_: CompileTemplate(&Compiler::Compile_and_const, &Compiler::Compile_and, PGXPFN(CPU_AND_), TF_WRITES_D | TF_READS_S | TF_READS_T | TF_COMMUTATIVE); SpecExec_and(); break; 1225 case InstructionFunct::or_: CompileTemplate(&Compiler::Compile_or_const, &Compiler::Compile_or, PGXPFN(CPU_OR_), TF_WRITES_D | TF_READS_S | TF_READS_T | TF_COMMUTATIVE | TF_RENAME_WITH_ZERO_T); SpecExec_or(); break; 1226 case InstructionFunct::xor_: CompileTemplate(&Compiler::Compile_xor_const, &Compiler::Compile_xor, PGXPFN(CPU_XOR_), TF_WRITES_D | TF_READS_S | TF_READS_T | TF_COMMUTATIVE | TF_RENAME_WITH_ZERO_T); SpecExec_xor(); break; 1227 case InstructionFunct::nor: CompileTemplate(&Compiler::Compile_nor_const, &Compiler::Compile_nor, PGXPFN(CPU_NOR), TF_WRITES_D | TF_READS_S | TF_READS_T | TF_COMMUTATIVE); SpecExec_nor(); break; 1228 case InstructionFunct::slt: CompileTemplate(&Compiler::Compile_slt_const, &Compiler::Compile_slt, PGXPFN(CPU_SLT), TF_WRITES_D | TF_READS_T | TF_READS_S); SpecExec_slt(); break; 1229 case InstructionFunct::sltu: CompileTemplate(&Compiler::Compile_sltu_const, &Compiler::Compile_sltu, PGXPFN(CPU_SLTU), TF_WRITES_D | TF_READS_T | TF_READS_S); SpecExec_sltu(); break; 1230 default: Compile_Fallback(); InvalidateSpeculativeValues(); TruncateBlock(); break; 1231 } 1232 } 1233 break; 1234 1235 case InstructionOp::j: Compile_j(); break; 1236 case InstructionOp::jal: Compile_jal(); SpecExec_jal(); break; 1237 1238 case InstructionOp::b: CompileTemplate(&Compiler::Compile_b_const, &Compiler::Compile_b, nullptr, TF_READS_S | TF_CAN_SWAP_DELAY_SLOT); SpecExec_b(); break; 1239 case InstructionOp::blez: CompileTemplate(&Compiler::Compile_blez_const, &Compiler::Compile_blez, nullptr, TF_READS_S | TF_CAN_SWAP_DELAY_SLOT); break; 1240 case InstructionOp::bgtz: CompileTemplate(&Compiler::Compile_bgtz_const, &Compiler::Compile_bgtz, nullptr, TF_READS_S | TF_CAN_SWAP_DELAY_SLOT); break; 1241 case InstructionOp::beq: CompileTemplate(&Compiler::Compile_beq_const, &Compiler::Compile_beq, nullptr, TF_READS_S | TF_READS_T | TF_COMMUTATIVE | TF_CAN_SWAP_DELAY_SLOT); break; 1242 case InstructionOp::bne: CompileTemplate(&Compiler::Compile_bne_const, &Compiler::Compile_bne, nullptr, TF_READS_S | TF_READS_T | TF_COMMUTATIVE | TF_CAN_SWAP_DELAY_SLOT); break; 1243 1244 case InstructionOp::addi: CompileTemplate(&Compiler::Compile_addi_const, &Compiler::Compile_addi, PGXPFN(CPU_ADDI), TF_WRITES_T | TF_READS_S | TF_COMMUTATIVE | TF_CAN_OVERFLOW | TF_RENAME_WITH_ZERO_IMM); SpecExec_addi(); break; 1245 case InstructionOp::addiu: CompileTemplate(&Compiler::Compile_addiu_const, &Compiler::Compile_addiu, PGXPFN(CPU_ADDI), TF_WRITES_T | TF_READS_S | TF_COMMUTATIVE | TF_RENAME_WITH_ZERO_IMM); SpecExec_addiu(); break; 1246 case InstructionOp::slti: CompileTemplate(&Compiler::Compile_slti_const, &Compiler::Compile_slti, PGXPFN(CPU_SLTI), TF_WRITES_T | TF_READS_S); SpecExec_slti(); break; 1247 case InstructionOp::sltiu: CompileTemplate(&Compiler::Compile_sltiu_const, &Compiler::Compile_sltiu, PGXPFN(CPU_SLTIU), TF_WRITES_T | TF_READS_S); SpecExec_sltiu(); break; 1248 case InstructionOp::andi: CompileTemplate(&Compiler::Compile_andi_const, &Compiler::Compile_andi, PGXPFN(CPU_ANDI), TF_WRITES_T | TF_READS_S | TF_COMMUTATIVE); SpecExec_andi(); break; 1249 case InstructionOp::ori: CompileTemplate(&Compiler::Compile_ori_const, &Compiler::Compile_ori, PGXPFN(CPU_ORI), TF_WRITES_T | TF_READS_S | TF_COMMUTATIVE | TF_RENAME_WITH_ZERO_IMM); SpecExec_ori(); break; 1250 case InstructionOp::xori: CompileTemplate(&Compiler::Compile_xori_const, &Compiler::Compile_xori, PGXPFN(CPU_XORI), TF_WRITES_T | TF_READS_S | TF_COMMUTATIVE | TF_RENAME_WITH_ZERO_IMM); SpecExec_xori(); break; 1251 case InstructionOp::lui: Compile_lui(); SpecExec_lui(); break; 1252 1253 case InstructionOp::lb: CompileLoadStoreTemplate(&Compiler::Compile_lxx, MemoryAccessSize::Byte, false, true, TF_READS_S | TF_WRITES_T | TF_LOAD_DELAY); SpecExec_lxx(MemoryAccessSize::Byte, true); break; 1254 case InstructionOp::lbu: CompileLoadStoreTemplate(&Compiler::Compile_lxx, MemoryAccessSize::Byte, false, false, TF_READS_S | TF_WRITES_T | TF_LOAD_DELAY); SpecExec_lxx(MemoryAccessSize::Byte, false); break; 1255 case InstructionOp::lh: CompileLoadStoreTemplate(&Compiler::Compile_lxx, MemoryAccessSize::HalfWord, false, true, TF_READS_S | TF_WRITES_T | TF_LOAD_DELAY); SpecExec_lxx(MemoryAccessSize::HalfWord, true); break; 1256 case InstructionOp::lhu: CompileLoadStoreTemplate(&Compiler::Compile_lxx, MemoryAccessSize::HalfWord, false, false, TF_READS_S | TF_WRITES_T | TF_LOAD_DELAY); SpecExec_lxx(MemoryAccessSize::HalfWord, false); break; 1257 case InstructionOp::lw: CompileLoadStoreTemplate(&Compiler::Compile_lxx, MemoryAccessSize::Word, false, false, TF_READS_S | TF_WRITES_T | TF_LOAD_DELAY); SpecExec_lxx(MemoryAccessSize::Word, false); break; 1258 case InstructionOp::lwl: CompileLoadStoreTemplate(&Compiler::Compile_lwx, MemoryAccessSize::Word, false, false, TF_READS_S | /*TF_READS_T | TF_WRITES_T | */TF_LOAD_DELAY); SpecExec_lwx(false); break; 1259 case InstructionOp::lwr: CompileLoadStoreTemplate(&Compiler::Compile_lwx, MemoryAccessSize::Word, false, false, TF_READS_S | /*TF_READS_T | TF_WRITES_T | */TF_LOAD_DELAY); SpecExec_lwx(true); break; 1260 case InstructionOp::sb: CompileLoadStoreTemplate(&Compiler::Compile_sxx, MemoryAccessSize::Byte, true, false, TF_READS_S | TF_READS_T); SpecExec_sxx(MemoryAccessSize::Byte); break; 1261 case InstructionOp::sh: CompileLoadStoreTemplate(&Compiler::Compile_sxx, MemoryAccessSize::HalfWord, true, false, TF_READS_S | TF_READS_T); SpecExec_sxx(MemoryAccessSize::HalfWord); break; 1262 case InstructionOp::sw: CompileLoadStoreTemplate(&Compiler::Compile_sxx, MemoryAccessSize::Word, true, false, TF_READS_S | TF_READS_T); SpecExec_sxx(MemoryAccessSize::Word); break; 1263 case InstructionOp::swl: CompileLoadStoreTemplate(&Compiler::Compile_swx, MemoryAccessSize::Word, false, false, TF_READS_S /*| TF_READS_T*/); SpecExec_swx(false); break; 1264 case InstructionOp::swr: CompileLoadStoreTemplate(&Compiler::Compile_swx, MemoryAccessSize::Word, false, false, TF_READS_S /*| TF_READS_T*/); SpecExec_swx(true); break; 1265 1266 case InstructionOp::cop0: 1267 { 1268 if (inst->cop.IsCommonInstruction()) 1269 { 1270 switch (inst->cop.CommonOp()) 1271 { 1272 case CopCommonInstruction::mfcn: if (inst->r.rt != Reg::zero) { CompileTemplate(nullptr, &Compiler::Compile_mfc0, PGXPFN(CPU_MFC0), TF_WRITES_T | TF_LOAD_DELAY); } SpecExec_mfc0(); break; 1273 case CopCommonInstruction::mtcn: CompileTemplate(nullptr, &Compiler::Compile_mtc0, PGXPFN(CPU_MTC0), TF_READS_T); SpecExec_mtc0(); break; 1274 default: Compile_Fallback(); break; 1275 } 1276 } 1277 else 1278 { 1279 switch (inst->cop.Cop0Op()) 1280 { 1281 case Cop0Instruction::rfe: CompileTemplate(nullptr, &Compiler::Compile_rfe, nullptr, 0); SpecExec_rfe(); break; 1282 default: Compile_Fallback(); break; 1283 } 1284 } 1285 } 1286 break; 1287 1288 case InstructionOp::cop2: 1289 { 1290 if (inst->cop.IsCommonInstruction()) 1291 { 1292 switch (inst->cop.CommonOp()) 1293 { 1294 case CopCommonInstruction::mfcn: if (inst->r.rt != Reg::zero) { CompileTemplate(nullptr, &Compiler::Compile_mfc2, nullptr, TF_GTE_STALL); } break; 1295 case CopCommonInstruction::cfcn: if (inst->r.rt != Reg::zero) { CompileTemplate(nullptr, &Compiler::Compile_mfc2, nullptr, TF_GTE_STALL); } break; 1296 case CopCommonInstruction::mtcn: CompileTemplate(nullptr, &Compiler::Compile_mtc2, PGXPFN(CPU_MTC2), TF_GTE_STALL | TF_READS_T | TF_PGXP_WITHOUT_CPU); break; 1297 case CopCommonInstruction::ctcn: CompileTemplate(nullptr, &Compiler::Compile_mtc2, PGXPFN(CPU_MTC2), TF_GTE_STALL | TF_READS_T | TF_PGXP_WITHOUT_CPU); break; 1298 default: Compile_Fallback(); break; 1299 } 1300 } 1301 else 1302 { 1303 // GTE ops 1304 CompileTemplate(nullptr, &Compiler::Compile_cop2, nullptr, TF_GTE_STALL); 1305 } 1306 } 1307 break; 1308 1309 case InstructionOp::lwc2: CompileLoadStoreTemplate(&Compiler::Compile_lwc2, MemoryAccessSize::Word, false, false, TF_GTE_STALL | TF_READS_S | TF_LOAD_DELAY); break; 1310 case InstructionOp::swc2: CompileLoadStoreTemplate(&Compiler::Compile_swc2, MemoryAccessSize::Word, true, false, TF_GTE_STALL | TF_READS_S); SpecExec_swc2(); break; 1311 1312 // swc0/lwc0/cop1/cop3 are essentially no-ops 1313 case InstructionOp::cop1: 1314 case InstructionOp::cop3: 1315 case InstructionOp::lwc0: 1316 case InstructionOp::lwc1: 1317 case InstructionOp::lwc3: 1318 case InstructionOp::swc0: 1319 case InstructionOp::swc1: 1320 case InstructionOp::swc3: 1321 break; 1322 1323 default: Compile_Fallback(); InvalidateSpeculativeValues(); TruncateBlock(); break; 1324 // clang-format on 1325 1326 #undef PGXPFN 1327 } 1328 1329 ClearHostRegsNeeded(); 1330 UpdateLoadDelay(); 1331 1332 #if 0 1333 const void* end = GetCurrentCodePointer(); 1334 if (start != end && !m_current_instruction_branch_delay_slot) 1335 { 1336 CodeCache::DisassembleAndLogHostCode(start, 1337 static_cast<u32>(static_cast<const u8*>(end) - static_cast<const u8*>(start))); 1338 } 1339 #endif 1340 } 1341 1342 void CPU::NewRec::Compiler::CompileBranchDelaySlot(bool dirty_pc /* = true */) 1343 { 1344 // Update load delay at the end of the previous instruction. 1345 UpdateLoadDelay(); 1346 1347 // Don't need the branch instruction's inputs. 1348 ClearHostRegsNeeded(); 1349 1350 // TODO: Move cycle add before this. 1351 inst++; 1352 iinfo++; 1353 m_current_instruction_pc += sizeof(Instruction); 1354 m_current_instruction_branch_delay_slot = true; 1355 m_compiler_pc += sizeof(Instruction); 1356 m_dirty_pc = dirty_pc; 1357 m_dirty_instruction_bits = true; 1358 1359 CompileInstruction(); 1360 1361 m_current_instruction_branch_delay_slot = false; 1362 } 1363 1364 void CPU::NewRec::Compiler::CompileTemplate(void (Compiler::*const_func)(CompileFlags), 1365 void (Compiler::*func)(CompileFlags), const void* pgxp_cpu_func, u32 tflags) 1366 { 1367 // TODO: This is where we will do memory operand optimization. Remember to kill constants! 1368 // TODO: Swap S and T if commutative 1369 // TODO: For and, treat as zeroing if imm is zero 1370 // TODO: Optimize slt + bne to cmp + jump 1371 // TODO: Prefer memory operands when load delay is dirty, since we're going to invalidate immediately after the first 1372 // instruction.. 1373 // TODO: andi with zero -> zero const 1374 // TODO: load constant so it can be flushed if it's not overwritten later 1375 // TODO: inline PGXP ops. 1376 // TODO: don't rename on sltu. 1377 1378 bool allow_constant = static_cast<bool>(const_func); 1379 Reg rs = inst->r.rs.GetValue(); 1380 Reg rt = inst->r.rt.GetValue(); 1381 Reg rd = inst->r.rd.GetValue(); 1382 1383 if (tflags & TF_GTE_STALL) 1384 StallUntilGTEComplete(); 1385 1386 // throw away instructions writing to $zero 1387 if (!(tflags & TF_NO_NOP) && (!g_settings.cpu_recompiler_memory_exceptions || !(tflags & TF_CAN_OVERFLOW)) && 1388 ((tflags & TF_WRITES_T && rt == Reg::zero) || (tflags & TF_WRITES_D && rd == Reg::zero))) 1389 { 1390 DEBUG_LOG("Skipping instruction because it writes to zero"); 1391 return; 1392 } 1393 1394 // handle rename operations 1395 if ((tflags & TF_RENAME_WITH_ZERO_T && HasConstantRegValue(rt, 0))) 1396 { 1397 DebugAssert((tflags & (TF_WRITES_D | TF_READS_S | TF_READS_T)) == (TF_WRITES_D | TF_READS_S | TF_READS_T)); 1398 CompileMoveRegTemplate(rd, rs, true); 1399 return; 1400 } 1401 else if ((tflags & (TF_RENAME_WITH_ZERO_T | TF_COMMUTATIVE)) == (TF_RENAME_WITH_ZERO_T | TF_COMMUTATIVE) && 1402 HasConstantRegValue(rs, 0)) 1403 { 1404 DebugAssert((tflags & (TF_WRITES_D | TF_READS_S | TF_READS_T)) == (TF_WRITES_D | TF_READS_S | TF_READS_T)); 1405 CompileMoveRegTemplate(rd, rt, true); 1406 return; 1407 } 1408 else if (tflags & TF_RENAME_WITH_ZERO_IMM && inst->i.imm == 0) 1409 { 1410 CompileMoveRegTemplate(rt, rs, true); 1411 return; 1412 } 1413 1414 if (pgxp_cpu_func && g_settings.gpu_pgxp_enable && ((tflags & TF_PGXP_WITHOUT_CPU) || g_settings.UsingPGXPCPUMode())) 1415 { 1416 std::array<Reg, 2> reg_args = {{Reg::count, Reg::count}}; 1417 u32 num_reg_args = 0; 1418 if (tflags & TF_READS_S) 1419 reg_args[num_reg_args++] = rs; 1420 if (tflags & TF_READS_T) 1421 reg_args[num_reg_args++] = rt; 1422 if (tflags & TF_READS_LO) 1423 reg_args[num_reg_args++] = Reg::lo; 1424 if (tflags & TF_READS_HI) 1425 reg_args[num_reg_args++] = Reg::hi; 1426 1427 DebugAssert(num_reg_args <= 2); 1428 GeneratePGXPCallWithMIPSRegs(pgxp_cpu_func, inst->bits, reg_args[0], reg_args[1]); 1429 } 1430 1431 // if it's a commutative op, and we have one constant reg but not the other, swap them 1432 // TODO: make it swap when writing to T as well 1433 // TODO: drop the hack for rd == rt 1434 if (tflags & TF_COMMUTATIVE && !(tflags & TF_WRITES_T) && 1435 ((HasConstantReg(rs) && !HasConstantReg(rt)) || (tflags & TF_WRITES_D && rd == rt))) 1436 { 1437 DEBUG_LOG("Swapping S:{} and T:{} due to commutative op and constants", GetRegName(rs), GetRegName(rt)); 1438 std::swap(rs, rt); 1439 } 1440 1441 CompileFlags cf = {}; 1442 1443 if (tflags & TF_READS_S) 1444 { 1445 MarkRegsNeeded(HR_TYPE_CPU_REG, rs); 1446 if (HasConstantReg(rs)) 1447 cf.const_s = true; 1448 else 1449 allow_constant = false; 1450 } 1451 if (tflags & TF_READS_T) 1452 { 1453 MarkRegsNeeded(HR_TYPE_CPU_REG, rt); 1454 if (HasConstantReg(rt)) 1455 cf.const_t = true; 1456 else 1457 allow_constant = false; 1458 } 1459 if (tflags & TF_READS_LO) 1460 { 1461 MarkRegsNeeded(HR_TYPE_CPU_REG, Reg::lo); 1462 if (HasConstantReg(Reg::lo)) 1463 cf.const_lo = true; 1464 else 1465 allow_constant = false; 1466 } 1467 if (tflags & TF_READS_HI) 1468 { 1469 MarkRegsNeeded(HR_TYPE_CPU_REG, Reg::hi); 1470 if (HasConstantReg(Reg::hi)) 1471 cf.const_hi = true; 1472 else 1473 allow_constant = false; 1474 } 1475 1476 // Needed because of potential swapping 1477 if (tflags & TF_READS_S) 1478 cf.mips_s = static_cast<u8>(rs); 1479 if (tflags & (TF_READS_T | TF_WRITES_T)) 1480 cf.mips_t = static_cast<u8>(rt); 1481 1482 if (allow_constant) 1483 { 1484 // woot, constant path 1485 (this->*const_func)(cf); 1486 return; 1487 } 1488 1489 UpdateHostRegCounters(); 1490 1491 if (tflags & TF_CAN_SWAP_DELAY_SLOT && TrySwapDelaySlot(cf.MipsS(), cf.MipsT())) 1492 { 1493 // CompileBranchDelaySlot() clears needed, so need to reset. 1494 cf.delay_slot_swapped = true; 1495 if (tflags & TF_READS_S) 1496 MarkRegsNeeded(HR_TYPE_CPU_REG, rs); 1497 if (tflags & TF_READS_T) 1498 MarkRegsNeeded(HR_TYPE_CPU_REG, rt); 1499 if (tflags & TF_READS_LO) 1500 MarkRegsNeeded(HR_TYPE_CPU_REG, Reg::lo); 1501 if (tflags & TF_READS_HI) 1502 MarkRegsNeeded(HR_TYPE_CPU_REG, Reg::hi); 1503 } 1504 1505 if (tflags & TF_READS_S && 1506 (tflags & TF_NEEDS_REG_S || !cf.const_s || (tflags & TF_WRITES_D && rd != Reg::zero && rd == rs))) 1507 { 1508 cf.host_s = AllocateHostReg(HR_MODE_READ, HR_TYPE_CPU_REG, rs); 1509 cf.const_s = false; 1510 cf.valid_host_s = true; 1511 } 1512 1513 if (tflags & TF_READS_T && 1514 (tflags & (TF_NEEDS_REG_T | TF_WRITES_T) || !cf.const_t || (tflags & TF_WRITES_D && rd != Reg::zero && rd == rt))) 1515 { 1516 cf.host_t = AllocateHostReg(HR_MODE_READ, HR_TYPE_CPU_REG, rt); 1517 cf.const_t = false; 1518 cf.valid_host_t = true; 1519 } 1520 1521 if (tflags & (TF_READS_LO | TF_WRITES_LO)) 1522 { 1523 cf.host_lo = 1524 AllocateHostReg(((tflags & TF_READS_LO) ? HR_MODE_READ : 0u) | ((tflags & TF_WRITES_LO) ? HR_MODE_WRITE : 0u), 1525 HR_TYPE_CPU_REG, Reg::lo); 1526 cf.const_lo = false; 1527 cf.valid_host_lo = true; 1528 } 1529 1530 if (tflags & (TF_READS_HI | TF_WRITES_HI)) 1531 { 1532 cf.host_hi = 1533 AllocateHostReg(((tflags & TF_READS_HI) ? HR_MODE_READ : 0u) | ((tflags & TF_WRITES_HI) ? HR_MODE_WRITE : 0u), 1534 HR_TYPE_CPU_REG, Reg::hi); 1535 cf.const_hi = false; 1536 cf.valid_host_hi = true; 1537 } 1538 1539 const HostRegAllocType write_type = 1540 (tflags & TF_LOAD_DELAY && EMULATE_LOAD_DELAYS) ? HR_TYPE_NEXT_LOAD_DELAY_VALUE : HR_TYPE_CPU_REG; 1541 1542 if (tflags & TF_CAN_OVERFLOW && g_settings.cpu_recompiler_memory_exceptions) 1543 { 1544 // allocate a temp register for the result, then swap it back 1545 const u32 tempreg = AllocateHostReg(0, HR_TYPE_TEMP); 1546 ; 1547 if (tflags & TF_WRITES_D) 1548 { 1549 cf.host_d = tempreg; 1550 cf.valid_host_d = true; 1551 } 1552 else if (tflags & TF_WRITES_T) 1553 { 1554 cf.host_t = tempreg; 1555 cf.valid_host_t = true; 1556 } 1557 1558 (this->*func)(cf); 1559 1560 if (tflags & TF_WRITES_D && rd != Reg::zero) 1561 { 1562 DeleteMIPSReg(rd, false); 1563 RenameHostReg(tempreg, HR_MODE_WRITE, write_type, rd); 1564 } 1565 else if (tflags & TF_WRITES_T && rt != Reg::zero) 1566 { 1567 DeleteMIPSReg(rt, false); 1568 RenameHostReg(tempreg, HR_MODE_WRITE, write_type, rt); 1569 } 1570 else 1571 { 1572 FreeHostReg(tempreg); 1573 } 1574 } 1575 else 1576 { 1577 if (tflags & TF_WRITES_D && rd != Reg::zero) 1578 { 1579 if (tflags & TF_READS_S && cf.valid_host_s && TryRenameMIPSReg(rd, rs, cf.host_s, Reg::count)) 1580 cf.host_d = cf.host_s; 1581 else 1582 cf.host_d = AllocateHostReg(HR_MODE_WRITE, write_type, rd); 1583 cf.valid_host_d = true; 1584 } 1585 1586 if (tflags & TF_WRITES_T && rt != Reg::zero) 1587 { 1588 if (tflags & TF_READS_S && cf.valid_host_s && TryRenameMIPSReg(rt, rs, cf.host_s, Reg::count)) 1589 cf.host_t = cf.host_s; 1590 else 1591 cf.host_t = AllocateHostReg(HR_MODE_WRITE, write_type, rt); 1592 cf.valid_host_t = true; 1593 } 1594 1595 (this->*func)(cf); 1596 } 1597 } 1598 1599 void CPU::NewRec::Compiler::CompileLoadStoreTemplate(void (Compiler::*func)(CompileFlags, MemoryAccessSize, bool, bool, 1600 const std::optional<VirtualMemoryAddress>&), 1601 MemoryAccessSize size, bool store, bool sign, u32 tflags) 1602 { 1603 const Reg rs = inst->i.rs; 1604 const Reg rt = inst->i.rt; 1605 1606 if (tflags & TF_GTE_STALL) 1607 StallUntilGTEComplete(); 1608 1609 CompileFlags cf = {}; 1610 1611 if (tflags & TF_READS_S) 1612 { 1613 MarkRegsNeeded(HR_TYPE_CPU_REG, rs); 1614 cf.mips_s = static_cast<u8>(rs); 1615 } 1616 if (tflags & (TF_READS_T | TF_WRITES_T)) 1617 { 1618 if (tflags & TF_READS_T) 1619 MarkRegsNeeded(HR_TYPE_CPU_REG, rt); 1620 cf.mips_t = static_cast<u8>(rt); 1621 } 1622 1623 UpdateHostRegCounters(); 1624 1625 // constant address? 1626 std::optional<VirtualMemoryAddress> addr; 1627 std::optional<VirtualMemoryAddress> spec_addr; 1628 bool use_fastmem = CodeCache::IsUsingFastmem() && !g_settings.cpu_recompiler_memory_exceptions && 1629 !SpecIsCacheIsolated() && !CodeCache::HasPreviouslyFaultedOnPC(m_current_instruction_pc); 1630 if (HasConstantReg(rs)) 1631 { 1632 addr = GetConstantRegU32(rs) + inst->i.imm_sext32(); 1633 spec_addr = addr; 1634 cf.const_s = true; 1635 1636 if (!Bus::CanUseFastmemForAddress(addr.value())) 1637 { 1638 DEBUG_LOG("Not using fastmem for {:08X}", addr.value()); 1639 use_fastmem = false; 1640 } 1641 } 1642 else 1643 { 1644 spec_addr = SpecExec_LoadStoreAddr(); 1645 if (use_fastmem && spec_addr.has_value() && !Bus::CanUseFastmemForAddress(spec_addr.value())) 1646 { 1647 DEBUG_LOG("Not using fastmem for speculative {:08X}", spec_addr.value()); 1648 use_fastmem = false; 1649 } 1650 1651 if constexpr (HAS_MEMORY_OPERANDS) 1652 { 1653 // don't bother caching it since we're going to flush anyway 1654 // TODO: make less rubbish, if it's caller saved we don't need to flush... 1655 const std::optional<u32> hreg = CheckHostReg(HR_MODE_READ, HR_TYPE_CPU_REG, rs); 1656 if (hreg.has_value()) 1657 { 1658 cf.valid_host_s = true; 1659 cf.host_s = hreg.value(); 1660 } 1661 } 1662 else 1663 { 1664 // need rs in a register 1665 cf.host_s = AllocateHostReg(HR_MODE_READ, HR_TYPE_CPU_REG, rs); 1666 cf.valid_host_s = true; 1667 } 1668 } 1669 1670 // reads T -> store, writes T -> load 1671 // for now, we defer the allocation to afterwards, because C call 1672 if (tflags & TF_READS_T) 1673 { 1674 if (HasConstantReg(rt)) 1675 { 1676 cf.const_t = true; 1677 } 1678 else 1679 { 1680 if constexpr (HAS_MEMORY_OPERANDS) 1681 { 1682 const std::optional<u32> hreg = CheckHostReg(HR_MODE_READ, HR_TYPE_CPU_REG, rt); 1683 if (hreg.has_value()) 1684 { 1685 cf.valid_host_t = true; 1686 cf.host_t = hreg.value(); 1687 } 1688 } 1689 else 1690 { 1691 cf.host_t = AllocateHostReg(HR_MODE_READ, HR_TYPE_CPU_REG, rt); 1692 cf.valid_host_t = true; 1693 } 1694 } 1695 } 1696 1697 (this->*func)(cf, size, sign, use_fastmem, addr); 1698 1699 if (store && !m_block_ended && !m_current_instruction_branch_delay_slot && spec_addr.has_value() && 1700 GetSegmentForAddress(spec_addr.value()) != Segment::KSEG2) 1701 { 1702 // Get rid of physical aliases. 1703 const u32 phys_spec_addr = VirtualAddressToPhysical(spec_addr.value()); 1704 if (phys_spec_addr >= VirtualAddressToPhysical(m_block->pc) && 1705 phys_spec_addr < VirtualAddressToPhysical(m_block->pc + (m_block->size * sizeof(Instruction)))) 1706 { 1707 WARNING_LOG("Instruction {:08X} speculatively writes to {:08X} inside block {:08X}-{:08X}. Truncating block.", 1708 m_current_instruction_pc, phys_spec_addr, m_block->pc, 1709 m_block->pc + (m_block->size * sizeof(Instruction))); 1710 TruncateBlock(); 1711 } 1712 } 1713 } 1714 1715 void CPU::NewRec::Compiler::TruncateBlock() 1716 { 1717 m_block->size = ((m_current_instruction_pc - m_block->pc) / sizeof(Instruction)) + 1; 1718 iinfo->is_last_instruction = true; 1719 } 1720 1721 const TickCount* CPU::NewRec::Compiler::GetFetchMemoryAccessTimePtr() const 1722 { 1723 const TickCount* ptr = 1724 Bus::GetMemoryAccessTimePtr(m_block->pc & PHYSICAL_MEMORY_ADDRESS_MASK, MemoryAccessSize::Word); 1725 AssertMsg(ptr, "Address has dynamic fetch ticks"); 1726 return ptr; 1727 } 1728 1729 void CPU::NewRec::Compiler::FlushForLoadStore(const std::optional<VirtualMemoryAddress>& address, bool store, 1730 bool use_fastmem) 1731 { 1732 if (use_fastmem) 1733 return; 1734 1735 // TODO: Stores don't need to flush GTE cycles... 1736 Flush(FLUSH_FOR_C_CALL | FLUSH_FOR_LOADSTORE); 1737 } 1738 1739 void CPU::NewRec::Compiler::CompileMoveRegTemplate(Reg dst, Reg src, bool pgxp_move) 1740 { 1741 if (dst == src || dst == Reg::zero) 1742 return; 1743 1744 if (HasConstantReg(src)) 1745 { 1746 DeleteMIPSReg(dst, false); 1747 SetConstantReg(dst, GetConstantRegU32(src)); 1748 } 1749 else 1750 { 1751 const u32 srcreg = AllocateHostReg(HR_MODE_READ, HR_TYPE_CPU_REG, src); 1752 if (!TryRenameMIPSReg(dst, src, srcreg, Reg::count)) 1753 { 1754 const u32 dstreg = AllocateHostReg(HR_MODE_WRITE, HR_TYPE_CPU_REG, dst); 1755 CopyHostReg(dstreg, srcreg); 1756 ClearHostRegNeeded(dstreg); 1757 } 1758 } 1759 1760 // TODO: This could be made better if we only did it for registers where there was a previous MFC2. 1761 if (g_settings.gpu_pgxp_enable && pgxp_move) 1762 { 1763 // might've been renamed, so use dst here 1764 GeneratePGXPCallWithMIPSRegs(reinterpret_cast<const void*>(&PGXP::CPU_MOVE_Packed), PGXP::PackMoveArgs(dst, src), 1765 dst); 1766 } 1767 } 1768 1769 void CPU::NewRec::Compiler::Compile_j() 1770 { 1771 const u32 newpc = (m_compiler_pc & UINT32_C(0xF0000000)) | (inst->j.target << 2); 1772 1773 // TODO: Delay slot swap. 1774 // We could also move the cycle commit back. 1775 CompileBranchDelaySlot(); 1776 EndBlock(newpc, true); 1777 } 1778 1779 void CPU::NewRec::Compiler::Compile_jr_const(CompileFlags cf) 1780 { 1781 DebugAssert(HasConstantReg(cf.MipsS())); 1782 const u32 newpc = GetConstantRegU32(cf.MipsS()); 1783 if (newpc & 3 && g_settings.cpu_recompiler_memory_exceptions) 1784 { 1785 EndBlockWithException(Exception::AdEL); 1786 return; 1787 } 1788 1789 CompileBranchDelaySlot(); 1790 EndBlock(newpc, true); 1791 } 1792 1793 void CPU::NewRec::Compiler::Compile_jal() 1794 { 1795 const u32 newpc = (m_compiler_pc & UINT32_C(0xF0000000)) | (inst->j.target << 2); 1796 SetConstantReg(Reg::ra, GetBranchReturnAddress({})); 1797 CompileBranchDelaySlot(); 1798 EndBlock(newpc, true); 1799 } 1800 1801 void CPU::NewRec::Compiler::Compile_jalr_const(CompileFlags cf) 1802 { 1803 DebugAssert(HasConstantReg(cf.MipsS())); 1804 const u32 newpc = GetConstantRegU32(cf.MipsS()); 1805 if (MipsD() != Reg::zero) 1806 SetConstantReg(MipsD(), GetBranchReturnAddress({})); 1807 1808 CompileBranchDelaySlot(); 1809 EndBlock(newpc, true); 1810 } 1811 1812 void CPU::NewRec::Compiler::Compile_syscall() 1813 { 1814 EndBlockWithException(Exception::Syscall); 1815 } 1816 1817 void CPU::NewRec::Compiler::Compile_break() 1818 { 1819 EndBlockWithException(Exception::BP); 1820 } 1821 1822 void CPU::NewRec::Compiler::Compile_b_const(CompileFlags cf) 1823 { 1824 DebugAssert(HasConstantReg(cf.MipsS())); 1825 1826 const u8 irt = static_cast<u8>(inst->i.rt.GetValue()); 1827 const bool bgez = ConvertToBoolUnchecked(irt & u8(1)); 1828 const bool link = (irt & u8(0x1E)) == u8(0x10); 1829 1830 const s32 rs = GetConstantRegS32(cf.MipsS()); 1831 const bool taken = bgez ? (rs >= 0) : (rs < 0); 1832 const u32 taken_pc = GetConditionalBranchTarget(cf); 1833 1834 if (link) 1835 SetConstantReg(Reg::ra, GetBranchReturnAddress(cf)); 1836 1837 CompileBranchDelaySlot(); 1838 EndBlock(taken ? taken_pc : m_compiler_pc, true); 1839 } 1840 1841 void CPU::NewRec::Compiler::Compile_b(CompileFlags cf) 1842 { 1843 const u8 irt = static_cast<u8>(inst->i.rt.GetValue()); 1844 const bool bgez = ConvertToBoolUnchecked(irt & u8(1)); 1845 const bool link = (irt & u8(0x1E)) == u8(0x10); 1846 1847 if (link) 1848 SetConstantReg(Reg::ra, GetBranchReturnAddress(cf)); 1849 1850 Compile_bxx(cf, bgez ? BranchCondition::GreaterEqualZero : BranchCondition::LessThanZero); 1851 } 1852 1853 void CPU::NewRec::Compiler::Compile_blez(CompileFlags cf) 1854 { 1855 Compile_bxx(cf, BranchCondition::LessEqualZero); 1856 } 1857 1858 void CPU::NewRec::Compiler::Compile_blez_const(CompileFlags cf) 1859 { 1860 Compile_bxx_const(cf, BranchCondition::LessEqualZero); 1861 } 1862 1863 void CPU::NewRec::Compiler::Compile_bgtz(CompileFlags cf) 1864 { 1865 Compile_bxx(cf, BranchCondition::GreaterThanZero); 1866 } 1867 1868 void CPU::NewRec::Compiler::Compile_bgtz_const(CompileFlags cf) 1869 { 1870 Compile_bxx_const(cf, BranchCondition::GreaterThanZero); 1871 } 1872 1873 void CPU::NewRec::Compiler::Compile_beq(CompileFlags cf) 1874 { 1875 Compile_bxx(cf, BranchCondition::Equal); 1876 } 1877 1878 void CPU::NewRec::Compiler::Compile_beq_const(CompileFlags cf) 1879 { 1880 Compile_bxx_const(cf, BranchCondition::Equal); 1881 } 1882 1883 void CPU::NewRec::Compiler::Compile_bne(CompileFlags cf) 1884 { 1885 Compile_bxx(cf, BranchCondition::NotEqual); 1886 } 1887 1888 void CPU::NewRec::Compiler::Compile_bne_const(CompileFlags cf) 1889 { 1890 Compile_bxx_const(cf, BranchCondition::NotEqual); 1891 } 1892 1893 void CPU::NewRec::Compiler::Compile_bxx_const(CompileFlags cf, BranchCondition cond) 1894 { 1895 DebugAssert(HasConstantReg(cf.MipsS()) && HasConstantReg(cf.MipsT())); 1896 1897 bool taken; 1898 switch (cond) 1899 { 1900 case BranchCondition::Equal: 1901 taken = GetConstantRegU32(cf.MipsS()) == GetConstantRegU32(cf.MipsT()); 1902 break; 1903 1904 case BranchCondition::NotEqual: 1905 taken = GetConstantRegU32(cf.MipsS()) != GetConstantRegU32(cf.MipsT()); 1906 break; 1907 1908 case BranchCondition::GreaterThanZero: 1909 taken = GetConstantRegS32(cf.MipsS()) > 0; 1910 break; 1911 1912 case BranchCondition::GreaterEqualZero: 1913 taken = GetConstantRegS32(cf.MipsS()) >= 0; 1914 break; 1915 1916 case BranchCondition::LessThanZero: 1917 taken = GetConstantRegS32(cf.MipsS()) < 0; 1918 break; 1919 1920 case BranchCondition::LessEqualZero: 1921 taken = GetConstantRegS32(cf.MipsS()) <= 0; 1922 break; 1923 1924 default: 1925 Panic("Unhandled condition"); 1926 return; 1927 } 1928 1929 const u32 taken_pc = GetConditionalBranchTarget(cf); 1930 CompileBranchDelaySlot(); 1931 EndBlock(taken ? taken_pc : m_compiler_pc, true); 1932 } 1933 1934 void CPU::NewRec::Compiler::Compile_sll_const(CompileFlags cf) 1935 { 1936 DebugAssert(HasConstantReg(cf.MipsT())); 1937 SetConstantReg(MipsD(), GetConstantRegU32(cf.MipsT()) << inst->r.shamt); 1938 } 1939 1940 void CPU::NewRec::Compiler::Compile_srl_const(CompileFlags cf) 1941 { 1942 DebugAssert(HasConstantReg(cf.MipsT())); 1943 SetConstantReg(MipsD(), GetConstantRegU32(cf.MipsT()) >> inst->r.shamt); 1944 } 1945 1946 void CPU::NewRec::Compiler::Compile_sra_const(CompileFlags cf) 1947 { 1948 DebugAssert(HasConstantReg(cf.MipsT())); 1949 SetConstantReg(MipsD(), static_cast<u32>(GetConstantRegS32(cf.MipsT()) >> inst->r.shamt)); 1950 } 1951 1952 void CPU::NewRec::Compiler::Compile_sllv_const(CompileFlags cf) 1953 { 1954 DebugAssert(HasConstantReg(cf.MipsS()) && HasConstantReg(cf.MipsT())); 1955 SetConstantReg(MipsD(), GetConstantRegU32(cf.MipsT()) << (GetConstantRegU32(cf.MipsS()) & 0x1Fu)); 1956 } 1957 1958 void CPU::NewRec::Compiler::Compile_srlv_const(CompileFlags cf) 1959 { 1960 DebugAssert(HasConstantReg(cf.MipsS()) && HasConstantReg(cf.MipsT())); 1961 SetConstantReg(MipsD(), GetConstantRegU32(cf.MipsT()) >> (GetConstantRegU32(cf.MipsS()) & 0x1Fu)); 1962 } 1963 1964 void CPU::NewRec::Compiler::Compile_srav_const(CompileFlags cf) 1965 { 1966 DebugAssert(HasConstantReg(cf.MipsS()) && HasConstantReg(cf.MipsT())); 1967 SetConstantReg(MipsD(), static_cast<u32>(GetConstantRegS32(cf.MipsT()) >> (GetConstantRegU32(cf.MipsS()) & 0x1Fu))); 1968 } 1969 1970 void CPU::NewRec::Compiler::Compile_and_const(CompileFlags cf) 1971 { 1972 DebugAssert(HasConstantReg(cf.MipsS()) && HasConstantReg(cf.MipsT())); 1973 SetConstantReg(MipsD(), GetConstantRegU32(cf.MipsS()) & GetConstantRegU32(cf.MipsT())); 1974 } 1975 1976 void CPU::NewRec::Compiler::Compile_or_const(CompileFlags cf) 1977 { 1978 DebugAssert(HasConstantReg(cf.MipsS()) && HasConstantReg(cf.MipsT())); 1979 SetConstantReg(MipsD(), GetConstantRegU32(cf.MipsS()) | GetConstantRegU32(cf.MipsT())); 1980 } 1981 1982 void CPU::NewRec::Compiler::Compile_xor_const(CompileFlags cf) 1983 { 1984 DebugAssert(HasConstantReg(cf.MipsS()) && HasConstantReg(cf.MipsT())); 1985 SetConstantReg(MipsD(), GetConstantRegU32(cf.MipsS()) ^ GetConstantRegU32(cf.MipsT())); 1986 } 1987 1988 void CPU::NewRec::Compiler::Compile_nor_const(CompileFlags cf) 1989 { 1990 DebugAssert(HasConstantReg(cf.MipsS()) && HasConstantReg(cf.MipsT())); 1991 SetConstantReg(MipsD(), ~(GetConstantRegU32(cf.MipsS()) | GetConstantRegU32(cf.MipsT()))); 1992 } 1993 1994 void CPU::NewRec::Compiler::Compile_slt_const(CompileFlags cf) 1995 { 1996 DebugAssert(HasConstantReg(cf.MipsS()) && HasConstantReg(cf.MipsT())); 1997 SetConstantReg(MipsD(), BoolToUInt32(GetConstantRegS32(cf.MipsS()) < GetConstantRegS32(cf.MipsT()))); 1998 } 1999 2000 void CPU::NewRec::Compiler::Compile_sltu_const(CompileFlags cf) 2001 { 2002 DebugAssert(HasConstantReg(cf.MipsS()) && HasConstantReg(cf.MipsT())); 2003 SetConstantReg(MipsD(), BoolToUInt32(GetConstantRegU32(cf.MipsS()) < GetConstantRegU32(cf.MipsT()))); 2004 } 2005 2006 void CPU::NewRec::Compiler::Compile_mult_const(CompileFlags cf) 2007 { 2008 DebugAssert(HasConstantReg(cf.MipsS()) && HasConstantReg(cf.MipsT())); 2009 2010 const u64 res = 2011 static_cast<u64>(static_cast<s64>(GetConstantRegS32(cf.MipsS())) * static_cast<s64>(GetConstantRegS32(cf.MipsT()))); 2012 SetConstantReg(Reg::hi, static_cast<u32>(res >> 32)); 2013 SetConstantReg(Reg::lo, static_cast<u32>(res)); 2014 } 2015 2016 void CPU::NewRec::Compiler::Compile_multu_const(CompileFlags cf) 2017 { 2018 DebugAssert(HasConstantReg(cf.MipsS()) && HasConstantReg(cf.MipsT())); 2019 2020 const u64 res = static_cast<u64>(GetConstantRegU32(cf.MipsS())) * static_cast<u64>(GetConstantRegU32(cf.MipsT())); 2021 SetConstantReg(Reg::hi, static_cast<u32>(res >> 32)); 2022 SetConstantReg(Reg::lo, static_cast<u32>(res)); 2023 } 2024 2025 void CPU::NewRec::Compiler::MIPSSignedDivide(s32 num, s32 denom, u32* lo, u32* hi) 2026 { 2027 if (denom == 0) 2028 { 2029 // divide by zero 2030 *lo = (num >= 0) ? UINT32_C(0xFFFFFFFF) : UINT32_C(1); 2031 *hi = static_cast<u32>(num); 2032 } 2033 else if (static_cast<u32>(num) == UINT32_C(0x80000000) && denom == -1) 2034 { 2035 // unrepresentable 2036 *lo = UINT32_C(0x80000000); 2037 *hi = 0; 2038 } 2039 else 2040 { 2041 *lo = static_cast<u32>(num / denom); 2042 *hi = static_cast<u32>(num % denom); 2043 } 2044 } 2045 2046 void CPU::NewRec::Compiler::MIPSUnsignedDivide(u32 num, u32 denom, u32* lo, u32* hi) 2047 { 2048 if (denom == 0) 2049 { 2050 // divide by zero 2051 *lo = UINT32_C(0xFFFFFFFF); 2052 *hi = static_cast<u32>(num); 2053 } 2054 else 2055 { 2056 *lo = num / denom; 2057 *hi = num % denom; 2058 } 2059 } 2060 2061 void CPU::NewRec::Compiler::Compile_div_const(CompileFlags cf) 2062 { 2063 DebugAssert(HasConstantReg(cf.MipsS()) && HasConstantReg(cf.MipsT())); 2064 2065 const s32 num = GetConstantRegS32(cf.MipsS()); 2066 const s32 denom = GetConstantRegS32(cf.MipsT()); 2067 2068 u32 lo, hi; 2069 MIPSSignedDivide(num, denom, &lo, &hi); 2070 2071 SetConstantReg(Reg::hi, hi); 2072 SetConstantReg(Reg::lo, lo); 2073 } 2074 2075 void CPU::NewRec::Compiler::Compile_divu_const(CompileFlags cf) 2076 { 2077 DebugAssert(HasConstantReg(cf.MipsS()) && HasConstantReg(cf.MipsT())); 2078 2079 const u32 num = GetConstantRegU32(cf.MipsS()); 2080 const u32 denom = GetConstantRegU32(cf.MipsT()); 2081 2082 u32 lo, hi; 2083 MIPSUnsignedDivide(num, denom, &lo, &hi); 2084 2085 SetConstantReg(Reg::hi, hi); 2086 SetConstantReg(Reg::lo, lo); 2087 } 2088 2089 void CPU::NewRec::Compiler::Compile_add_const(CompileFlags cf) 2090 { 2091 // TODO: Overflow 2092 DebugAssert(HasConstantReg(cf.MipsS()) && HasConstantReg(cf.MipsT())); 2093 if (MipsD() != Reg::zero) 2094 SetConstantReg(MipsD(), GetConstantRegU32(cf.MipsS()) + GetConstantRegU32(cf.MipsT())); 2095 } 2096 2097 void CPU::NewRec::Compiler::Compile_addu_const(CompileFlags cf) 2098 { 2099 DebugAssert(HasConstantReg(cf.MipsS()) && HasConstantReg(cf.MipsT())); 2100 SetConstantReg(MipsD(), GetConstantRegU32(cf.MipsS()) + GetConstantRegU32(cf.MipsT())); 2101 } 2102 2103 void CPU::NewRec::Compiler::Compile_sub_const(CompileFlags cf) 2104 { 2105 // TODO: Overflow 2106 DebugAssert(HasConstantReg(cf.MipsS()) && HasConstantReg(cf.MipsT())); 2107 if (MipsD() != Reg::zero) 2108 SetConstantReg(MipsD(), GetConstantRegU32(cf.MipsS()) - GetConstantRegU32(cf.MipsT())); 2109 } 2110 2111 void CPU::NewRec::Compiler::Compile_subu_const(CompileFlags cf) 2112 { 2113 DebugAssert(HasConstantReg(cf.MipsS()) && HasConstantReg(cf.MipsT())); 2114 SetConstantReg(MipsD(), GetConstantRegU32(cf.MipsS()) - GetConstantRegU32(cf.MipsT())); 2115 } 2116 2117 void CPU::NewRec::Compiler::Compile_addi_const(CompileFlags cf) 2118 { 2119 // TODO: Overflow 2120 DebugAssert(HasConstantReg(cf.MipsS())); 2121 if (cf.MipsT() != Reg::zero) 2122 SetConstantReg(cf.MipsT(), GetConstantRegU32(cf.MipsS()) + inst->i.imm_sext32()); 2123 } 2124 2125 void CPU::NewRec::Compiler::Compile_addiu_const(CompileFlags cf) 2126 { 2127 DebugAssert(HasConstantReg(cf.MipsS())); 2128 SetConstantReg(cf.MipsT(), GetConstantRegU32(cf.MipsS()) + inst->i.imm_sext32()); 2129 } 2130 2131 void CPU::NewRec::Compiler::Compile_slti_const(CompileFlags cf) 2132 { 2133 DebugAssert(HasConstantReg(cf.MipsS())); 2134 SetConstantReg(cf.MipsT(), BoolToUInt32(GetConstantRegS32(cf.MipsS()) < static_cast<s32>(inst->i.imm_sext32()))); 2135 } 2136 2137 void CPU::NewRec::Compiler::Compile_sltiu_const(CompileFlags cf) 2138 { 2139 DebugAssert(HasConstantReg(cf.MipsS())); 2140 SetConstantReg(cf.MipsT(), GetConstantRegU32(cf.MipsS()) < inst->i.imm_sext32()); 2141 } 2142 2143 void CPU::NewRec::Compiler::Compile_andi_const(CompileFlags cf) 2144 { 2145 DebugAssert(HasConstantReg(cf.MipsS())); 2146 SetConstantReg(cf.MipsT(), GetConstantRegU32(cf.MipsS()) & inst->i.imm_zext32()); 2147 } 2148 2149 void CPU::NewRec::Compiler::Compile_ori_const(CompileFlags cf) 2150 { 2151 DebugAssert(HasConstantReg(cf.MipsS())); 2152 SetConstantReg(cf.MipsT(), GetConstantRegU32(cf.MipsS()) | inst->i.imm_zext32()); 2153 } 2154 2155 void CPU::NewRec::Compiler::Compile_xori_const(CompileFlags cf) 2156 { 2157 DebugAssert(HasConstantReg(cf.MipsS())); 2158 SetConstantReg(cf.MipsT(), GetConstantRegU32(cf.MipsS()) ^ inst->i.imm_zext32()); 2159 } 2160 2161 void CPU::NewRec::Compiler::Compile_lui() 2162 { 2163 if (inst->i.rt == Reg::zero) 2164 return; 2165 2166 SetConstantReg(inst->i.rt, inst->i.imm_zext32() << 16); 2167 2168 if (g_settings.UsingPGXPCPUMode()) 2169 GeneratePGXPCallWithMIPSRegs(reinterpret_cast<const void*>(&PGXP::CPU_LUI), inst->bits); 2170 } 2171 2172 static constexpr const std::array<std::pair<u32*, u32>, 16> s_cop0_table = { 2173 {{nullptr, 0x00000000u}, 2174 {nullptr, 0x00000000u}, 2175 {nullptr, 0x00000000u}, 2176 {&CPU::g_state.cop0_regs.BPC, 0xffffffffu}, 2177 {nullptr, 0}, 2178 {&CPU::g_state.cop0_regs.BDA, 0xffffffffu}, 2179 {&CPU::g_state.cop0_regs.TAR, 0x00000000u}, 2180 {&CPU::g_state.cop0_regs.dcic.bits, CPU::Cop0Registers::DCIC::WRITE_MASK}, 2181 {&CPU::g_state.cop0_regs.BadVaddr, 0x00000000u}, 2182 {&CPU::g_state.cop0_regs.BDAM, 0xffffffffu}, 2183 {nullptr, 0x00000000u}, 2184 {&CPU::g_state.cop0_regs.BPCM, 0xffffffffu}, 2185 {&CPU::g_state.cop0_regs.sr.bits, CPU::Cop0Registers::SR::WRITE_MASK}, 2186 {&CPU::g_state.cop0_regs.cause.bits, CPU::Cop0Registers::CAUSE::WRITE_MASK}, 2187 {&CPU::g_state.cop0_regs.EPC, 0x00000000u}, 2188 {&CPU::g_state.cop0_regs.PRID, 0x00000000u}}}; 2189 2190 u32* CPU::NewRec::Compiler::GetCop0RegPtr(Cop0Reg reg) 2191 { 2192 return (static_cast<u8>(reg) < s_cop0_table.size()) ? s_cop0_table[static_cast<u8>(reg)].first : nullptr; 2193 } 2194 2195 u32 CPU::NewRec::Compiler::GetCop0RegWriteMask(Cop0Reg reg) 2196 { 2197 return (static_cast<u8>(reg) < s_cop0_table.size()) ? s_cop0_table[static_cast<u8>(reg)].second : 0; 2198 } 2199 2200 void CPU::NewRec::Compiler::Compile_mfc0(CompileFlags cf) 2201 { 2202 const Cop0Reg r = static_cast<Cop0Reg>(MipsD()); 2203 const u32* ptr = GetCop0RegPtr(r); 2204 if (!ptr) 2205 { 2206 ERROR_LOG("Read from unknown cop0 reg {}", static_cast<u32>(r)); 2207 Compile_Fallback(); 2208 return; 2209 } 2210 2211 DebugAssert(cf.valid_host_t); 2212 LoadHostRegFromCPUPointer(cf.host_t, ptr); 2213 } 2214 2215 std::pair<u32*, CPU::NewRec::Compiler::GTERegisterAccessAction> 2216 CPU::NewRec::Compiler::GetGTERegisterPointer(u32 index, bool writing) 2217 { 2218 if (!writing) 2219 { 2220 // Most GTE registers can be read directly. Handle the special cases here. 2221 if (index == 15) // SXY3 2222 { 2223 // mirror of SXY2 2224 index = 14; 2225 } 2226 2227 switch (index) 2228 { 2229 case 28: // IRGB 2230 case 29: // ORGB 2231 { 2232 return std::make_pair(&g_state.gte_regs.r32[index], GTERegisterAccessAction::CallHandler); 2233 } 2234 break; 2235 2236 default: 2237 { 2238 return std::make_pair(&g_state.gte_regs.r32[index], GTERegisterAccessAction::Direct); 2239 } 2240 break; 2241 } 2242 } 2243 else 2244 { 2245 switch (index) 2246 { 2247 case 1: // V0[z] 2248 case 3: // V1[z] 2249 case 5: // V2[z] 2250 case 8: // IR0 2251 case 9: // IR1 2252 case 10: // IR2 2253 case 11: // IR3 2254 case 36: // RT33 2255 case 44: // L33 2256 case 52: // LR33 2257 case 58: // H - sign-extended on read but zext on use 2258 case 59: // DQA 2259 case 61: // ZSF3 2260 case 62: // ZSF4 2261 { 2262 // sign-extend z component of vector registers 2263 return std::make_pair(&g_state.gte_regs.r32[index], GTERegisterAccessAction::SignExtend16); 2264 } 2265 break; 2266 2267 case 7: // OTZ 2268 case 16: // SZ0 2269 case 17: // SZ1 2270 case 18: // SZ2 2271 case 19: // SZ3 2272 { 2273 // zero-extend unsigned values 2274 return std::make_pair(&g_state.gte_regs.r32[index], GTERegisterAccessAction::ZeroExtend16); 2275 } 2276 break; 2277 2278 case 15: // SXY3 2279 { 2280 // writing to SXYP pushes to the FIFO 2281 return std::make_pair(&g_state.gte_regs.r32[index], GTERegisterAccessAction::PushFIFO); 2282 } 2283 break; 2284 2285 case 28: // IRGB 2286 case 30: // LZCS 2287 case 63: // FLAG 2288 { 2289 return std::make_pair(&g_state.gte_regs.r32[index], GTERegisterAccessAction::CallHandler); 2290 } 2291 2292 case 29: // ORGB 2293 case 31: // LZCR 2294 { 2295 // read-only registers 2296 return std::make_pair(&g_state.gte_regs.r32[index], GTERegisterAccessAction::Ignore); 2297 } 2298 2299 default: 2300 { 2301 // written as-is, 2x16 or 1x32 bits 2302 return std::make_pair(&g_state.gte_regs.r32[index], GTERegisterAccessAction::Direct); 2303 } 2304 } 2305 } 2306 } 2307 2308 void CPU::NewRec::Compiler::AddGTETicks(TickCount ticks) 2309 { 2310 // TODO: check, int has +1 here 2311 m_gte_done_cycle = m_cycles + ticks; 2312 DEBUG_LOG("Adding {} GTE ticks", ticks); 2313 } 2314 2315 void CPU::NewRec::Compiler::StallUntilGTEComplete() 2316 { 2317 // TODO: hack to match old rec.. this may or may not be correct behavior 2318 // it's the difference between stalling before and after the current instruction's cycle 2319 DebugAssert(m_cycles > 0); 2320 m_cycles--; 2321 2322 if (!m_dirty_gte_done_cycle) 2323 { 2324 // simple case - in block scheduling 2325 if (m_gte_done_cycle > m_cycles) 2326 { 2327 DEBUG_LOG("Stalling for {} ticks from GTE", m_gte_done_cycle - m_cycles); 2328 m_cycles += (m_gte_done_cycle - m_cycles); 2329 } 2330 } 2331 else 2332 { 2333 // switch to in block scheduling 2334 DEBUG_LOG("Flushing GTE stall from state"); 2335 Flush(FLUSH_GTE_STALL_FROM_STATE); 2336 } 2337 2338 m_cycles++; 2339 } 2340 2341 void CPU::NewRec::BackpatchLoadStore(void* exception_pc, const CodeCache::LoadstoreBackpatchInfo& info) 2342 { 2343 // remove the cycles we added for the memory read, then take them off again after the backpatch 2344 // the normal rec path will add the ram read ticks later, so we need to take them off at the end 2345 DebugAssert(!info.is_load || info.cycles >= Bus::RAM_READ_TICKS); 2346 const TickCount cycles_to_add = 2347 static_cast<TickCount>(static_cast<u32>(info.cycles)) - (info.is_load ? Bus::RAM_READ_TICKS : 0); 2348 const TickCount cycles_to_remove = static_cast<TickCount>(static_cast<u32>(info.cycles)); 2349 2350 void* thunk_address = CPU::CodeCache::GetFreeFarCodePointer(); 2351 const u32 thunk_size = CompileLoadStoreThunk( 2352 thunk_address, CPU::CodeCache::GetFreeFarCodeSpace(), exception_pc, info.code_size, cycles_to_add, cycles_to_remove, 2353 info.gpr_bitmask, info.address_register, info.data_register, info.AccessSize(), info.is_signed, info.is_load); 2354 2355 #if 0 2356 Log_DebugPrint("**Backpatch Thunk**"); 2357 CPU::CodeCache::DisassembleAndLogHostCode(thunk_address, thunk_size); 2358 #endif 2359 2360 // backpatch to a jump to the slowmem handler 2361 CPU::CodeCache::EmitJump(exception_pc, thunk_address, true); 2362 2363 CPU::CodeCache::CommitFarCode(thunk_size); 2364 } 2365 2366 void CPU::NewRec::Compiler::InitSpeculativeRegs() 2367 { 2368 for (u8 i = 0; i < static_cast<u8>(Reg::count); i++) 2369 m_speculative_constants.regs[i] = g_state.regs.r[i]; 2370 2371 m_speculative_constants.cop0_sr = g_state.cop0_regs.sr.bits; 2372 m_speculative_constants.memory.clear(); 2373 } 2374 2375 void CPU::NewRec::Compiler::InvalidateSpeculativeValues() 2376 { 2377 m_speculative_constants.regs.fill(std::nullopt); 2378 m_speculative_constants.memory.clear(); 2379 m_speculative_constants.cop0_sr.reset(); 2380 } 2381 2382 CPU::NewRec::Compiler::SpecValue CPU::NewRec::Compiler::SpecReadReg(Reg reg) 2383 { 2384 return m_speculative_constants.regs[static_cast<u8>(reg)]; 2385 } 2386 2387 void CPU::NewRec::Compiler::SpecWriteReg(Reg reg, SpecValue value) 2388 { 2389 if (reg == Reg::zero) 2390 return; 2391 2392 m_speculative_constants.regs[static_cast<u8>(reg)] = value; 2393 } 2394 2395 void CPU::NewRec::Compiler::SpecInvalidateReg(Reg reg) 2396 { 2397 if (reg == Reg::zero) 2398 return; 2399 2400 m_speculative_constants.regs[static_cast<u8>(reg)].reset(); 2401 } 2402 2403 void CPU::NewRec::Compiler::SpecCopyReg(Reg dst, Reg src) 2404 { 2405 if (dst == Reg::zero) 2406 return; 2407 2408 m_speculative_constants.regs[static_cast<u8>(dst)] = m_speculative_constants.regs[static_cast<u8>(src)]; 2409 } 2410 2411 CPU::NewRec::Compiler::SpecValue CPU::NewRec::Compiler::SpecReadMem(VirtualMemoryAddress address) 2412 { 2413 auto it = m_speculative_constants.memory.find(address); 2414 if (it != m_speculative_constants.memory.end()) 2415 return it->second; 2416 2417 u32 value; 2418 if ((address & SCRATCHPAD_ADDR_MASK) == SCRATCHPAD_ADDR) 2419 { 2420 u32 scratchpad_offset = address & SCRATCHPAD_OFFSET_MASK; 2421 std::memcpy(&value, &CPU::g_state.scratchpad[scratchpad_offset], sizeof(value)); 2422 return value; 2423 } 2424 2425 const PhysicalMemoryAddress phys_addr = address & PHYSICAL_MEMORY_ADDRESS_MASK; 2426 if (Bus::IsRAMAddress(phys_addr)) 2427 { 2428 u32 ram_offset = phys_addr & Bus::g_ram_mask; 2429 std::memcpy(&value, &Bus::g_ram[ram_offset], sizeof(value)); 2430 return value; 2431 } 2432 2433 return std::nullopt; 2434 } 2435 2436 void CPU::NewRec::Compiler::SpecWriteMem(u32 address, SpecValue value) 2437 { 2438 auto it = m_speculative_constants.memory.find(address); 2439 if (it != m_speculative_constants.memory.end()) 2440 { 2441 it->second = value; 2442 return; 2443 } 2444 2445 const PhysicalMemoryAddress phys_addr = address & PHYSICAL_MEMORY_ADDRESS_MASK; 2446 if ((address & SCRATCHPAD_ADDR_MASK) == SCRATCHPAD_ADDR || Bus::IsRAMAddress(phys_addr)) 2447 m_speculative_constants.memory.emplace(address, value); 2448 } 2449 2450 void CPU::NewRec::Compiler::SpecInvalidateMem(VirtualMemoryAddress address) 2451 { 2452 SpecWriteMem(address, std::nullopt); 2453 } 2454 2455 bool CPU::NewRec::Compiler::SpecIsCacheIsolated() 2456 { 2457 if (!m_speculative_constants.cop0_sr.has_value()) 2458 return false; 2459 2460 const Cop0Registers::SR sr{m_speculative_constants.cop0_sr.value()}; 2461 return sr.Isc; 2462 } 2463 2464 void CPU::NewRec::Compiler::SpecExec_b() 2465 { 2466 const bool link = (static_cast<u8>(inst->i.rt.GetValue()) & u8(0x1E)) == u8(0x10); 2467 if (link) 2468 SpecWriteReg(Reg::ra, m_compiler_pc); 2469 } 2470 2471 void CPU::NewRec::Compiler::SpecExec_jal() 2472 { 2473 SpecWriteReg(Reg::ra, m_compiler_pc); 2474 } 2475 2476 void CPU::NewRec::Compiler::SpecExec_jalr() 2477 { 2478 SpecWriteReg(inst->r.rd, m_compiler_pc); 2479 } 2480 2481 void CPU::NewRec::Compiler::SpecExec_sll() 2482 { 2483 const SpecValue rt = SpecReadReg(inst->r.rt); 2484 if (rt.has_value()) 2485 SpecWriteReg(inst->r.rd, rt.value() << inst->r.shamt); 2486 else 2487 SpecInvalidateReg(inst->r.rd); 2488 } 2489 2490 void CPU::NewRec::Compiler::SpecExec_srl() 2491 { 2492 const SpecValue rt = SpecReadReg(inst->r.rt); 2493 if (rt.has_value()) 2494 SpecWriteReg(inst->r.rd, rt.value() >> inst->r.shamt); 2495 else 2496 SpecInvalidateReg(inst->r.rd); 2497 } 2498 2499 void CPU::NewRec::Compiler::SpecExec_sra() 2500 { 2501 const SpecValue rt = SpecReadReg(inst->r.rt); 2502 if (rt.has_value()) 2503 SpecWriteReg(inst->r.rd, static_cast<u32>(static_cast<s32>(rt.value()) >> inst->r.shamt)); 2504 else 2505 SpecInvalidateReg(inst->r.rd); 2506 } 2507 2508 void CPU::NewRec::Compiler::SpecExec_sllv() 2509 { 2510 const SpecValue rs = SpecReadReg(inst->r.rs); 2511 const SpecValue rt = SpecReadReg(inst->r.rt); 2512 if (rs.has_value() && rt.has_value()) 2513 SpecWriteReg(inst->r.rd, rt.value() << (rs.value() & 0x1F)); 2514 else 2515 SpecInvalidateReg(inst->r.rd); 2516 } 2517 2518 void CPU::NewRec::Compiler::SpecExec_srlv() 2519 { 2520 const SpecValue rs = SpecReadReg(inst->r.rs); 2521 const SpecValue rt = SpecReadReg(inst->r.rt); 2522 if (rs.has_value() && rt.has_value()) 2523 SpecWriteReg(inst->r.rd, rt.value() >> (rs.value() & 0x1F)); 2524 else 2525 SpecInvalidateReg(inst->r.rd); 2526 } 2527 2528 void CPU::NewRec::Compiler::SpecExec_srav() 2529 { 2530 const SpecValue rs = SpecReadReg(inst->r.rs); 2531 const SpecValue rt = SpecReadReg(inst->r.rt); 2532 if (rs.has_value() && rt.has_value()) 2533 SpecWriteReg(inst->r.rd, static_cast<u32>(static_cast<s32>(rt.value()) >> (rs.value() & 0x1F))); 2534 else 2535 SpecInvalidateReg(inst->r.rd); 2536 } 2537 2538 void CPU::NewRec::Compiler::SpecExec_mult() 2539 { 2540 const SpecValue rs = SpecReadReg(inst->r.rs); 2541 const SpecValue rt = SpecReadReg(inst->r.rt); 2542 if (rs.has_value() && rt.has_value()) 2543 { 2544 const u64 result = 2545 static_cast<u64>(static_cast<s64>(SignExtend64(rs.value())) * static_cast<s64>(SignExtend64(rt.value()))); 2546 SpecWriteReg(Reg::hi, Truncate32(result >> 32)); 2547 SpecWriteReg(Reg::lo, Truncate32(result)); 2548 } 2549 else 2550 { 2551 SpecInvalidateReg(Reg::hi); 2552 SpecInvalidateReg(Reg::lo); 2553 } 2554 } 2555 2556 void CPU::NewRec::Compiler::SpecExec_multu() 2557 { 2558 const SpecValue rs = SpecReadReg(inst->r.rs); 2559 const SpecValue rt = SpecReadReg(inst->r.rt); 2560 if (rs.has_value() && rt.has_value()) 2561 { 2562 const u64 result = ZeroExtend64(rs.value()) * SignExtend64(rt.value()); 2563 SpecWriteReg(Reg::hi, Truncate32(result >> 32)); 2564 SpecWriteReg(Reg::lo, Truncate32(result)); 2565 } 2566 else 2567 { 2568 SpecInvalidateReg(Reg::hi); 2569 SpecInvalidateReg(Reg::lo); 2570 } 2571 } 2572 2573 void CPU::NewRec::Compiler::SpecExec_div() 2574 { 2575 const SpecValue rs = SpecReadReg(inst->r.rs); 2576 const SpecValue rt = SpecReadReg(inst->r.rt); 2577 if (rs.has_value() && rt.has_value()) 2578 { 2579 u32 lo, hi; 2580 MIPSSignedDivide(static_cast<s32>(rs.value()), static_cast<s32>(rt.value()), &lo, &hi); 2581 SpecWriteReg(Reg::hi, hi); 2582 SpecWriteReg(Reg::lo, lo); 2583 } 2584 else 2585 { 2586 SpecInvalidateReg(Reg::hi); 2587 SpecInvalidateReg(Reg::lo); 2588 } 2589 } 2590 2591 void CPU::NewRec::Compiler::SpecExec_divu() 2592 { 2593 const SpecValue rs = SpecReadReg(inst->r.rs); 2594 const SpecValue rt = SpecReadReg(inst->r.rt); 2595 if (rs.has_value() && rt.has_value()) 2596 { 2597 u32 lo, hi; 2598 MIPSUnsignedDivide(rs.value(), rt.value(), &lo, &hi); 2599 SpecWriteReg(Reg::hi, hi); 2600 SpecWriteReg(Reg::lo, lo); 2601 } 2602 else 2603 { 2604 SpecInvalidateReg(Reg::hi); 2605 SpecInvalidateReg(Reg::lo); 2606 } 2607 } 2608 2609 void CPU::NewRec::Compiler::SpecExec_add() 2610 { 2611 SpecExec_addu(); 2612 } 2613 2614 void CPU::NewRec::Compiler::SpecExec_addu() 2615 { 2616 const SpecValue rs = SpecReadReg(inst->r.rs); 2617 const SpecValue rt = SpecReadReg(inst->r.rt); 2618 if (rs.has_value() && rt.has_value()) 2619 SpecWriteReg(inst->r.rd, rs.value() + rt.value()); 2620 else 2621 SpecInvalidateReg(inst->r.rd); 2622 } 2623 2624 void CPU::NewRec::Compiler::SpecExec_sub() 2625 { 2626 SpecExec_subu(); 2627 } 2628 2629 void CPU::NewRec::Compiler::SpecExec_subu() 2630 { 2631 const SpecValue rs = SpecReadReg(inst->r.rs); 2632 const SpecValue rt = SpecReadReg(inst->r.rt); 2633 if (rs.has_value() && rt.has_value()) 2634 SpecWriteReg(inst->r.rd, rs.value() - rt.value()); 2635 else 2636 SpecInvalidateReg(inst->r.rd); 2637 } 2638 2639 void CPU::NewRec::Compiler::SpecExec_and() 2640 { 2641 const SpecValue rs = SpecReadReg(inst->r.rs); 2642 const SpecValue rt = SpecReadReg(inst->r.rt); 2643 if (rs.has_value() && rt.has_value()) 2644 SpecWriteReg(inst->r.rd, rs.value() & rt.value()); 2645 else 2646 SpecInvalidateReg(inst->r.rd); 2647 } 2648 2649 void CPU::NewRec::Compiler::SpecExec_or() 2650 { 2651 const SpecValue rs = SpecReadReg(inst->r.rs); 2652 const SpecValue rt = SpecReadReg(inst->r.rt); 2653 if (rs.has_value() && rt.has_value()) 2654 SpecWriteReg(inst->r.rd, rs.value() | rt.value()); 2655 else 2656 SpecInvalidateReg(inst->r.rd); 2657 } 2658 2659 void CPU::NewRec::Compiler::SpecExec_xor() 2660 { 2661 const SpecValue rs = SpecReadReg(inst->r.rs); 2662 const SpecValue rt = SpecReadReg(inst->r.rt); 2663 if (rs.has_value() && rt.has_value()) 2664 SpecWriteReg(inst->r.rd, rs.value() ^ rt.value()); 2665 else 2666 SpecInvalidateReg(inst->r.rd); 2667 } 2668 2669 void CPU::NewRec::Compiler::SpecExec_nor() 2670 { 2671 const SpecValue rs = SpecReadReg(inst->r.rs); 2672 const SpecValue rt = SpecReadReg(inst->r.rt); 2673 if (rs.has_value() && rt.has_value()) 2674 SpecWriteReg(inst->r.rd, ~(rs.value() | rt.value())); 2675 else 2676 SpecInvalidateReg(inst->r.rd); 2677 } 2678 2679 void CPU::NewRec::Compiler::SpecExec_slt() 2680 { 2681 const SpecValue rs = SpecReadReg(inst->r.rs); 2682 const SpecValue rt = SpecReadReg(inst->r.rt); 2683 if (rs.has_value() && rt.has_value()) 2684 SpecWriteReg(inst->r.rd, BoolToUInt32(static_cast<s32>(rs.value()) < static_cast<s32>(rt.value()))); 2685 else 2686 SpecInvalidateReg(inst->r.rd); 2687 } 2688 2689 void CPU::NewRec::Compiler::SpecExec_sltu() 2690 { 2691 const SpecValue rs = SpecReadReg(inst->r.rs); 2692 const SpecValue rt = SpecReadReg(inst->r.rt); 2693 if (rs.has_value() && rt.has_value()) 2694 SpecWriteReg(inst->r.rd, BoolToUInt32(rs.value() < rt.value())); 2695 else 2696 SpecInvalidateReg(inst->r.rd); 2697 } 2698 2699 void CPU::NewRec::Compiler::SpecExec_addi() 2700 { 2701 SpecExec_addiu(); 2702 } 2703 2704 void CPU::NewRec::Compiler::SpecExec_addiu() 2705 { 2706 const SpecValue rs = SpecReadReg(inst->i.rs); 2707 if (rs.has_value()) 2708 SpecWriteReg(inst->i.rt, rs.value() + inst->i.imm_sext32()); 2709 else 2710 SpecInvalidateReg(inst->i.rt); 2711 } 2712 2713 void CPU::NewRec::Compiler::SpecExec_slti() 2714 { 2715 const SpecValue rs = SpecReadReg(inst->i.rs); 2716 if (rs.has_value()) 2717 SpecWriteReg(inst->i.rt, BoolToUInt32(static_cast<s32>(rs.value()) < static_cast<s32>(inst->i.imm_sext32()))); 2718 else 2719 SpecInvalidateReg(inst->i.rt); 2720 } 2721 2722 void CPU::NewRec::Compiler::SpecExec_sltiu() 2723 { 2724 const SpecValue rs = SpecReadReg(inst->i.rs); 2725 if (rs.has_value()) 2726 SpecWriteReg(inst->i.rt, BoolToUInt32(rs.value() < inst->i.imm_sext32())); 2727 else 2728 SpecInvalidateReg(inst->i.rt); 2729 } 2730 2731 void CPU::NewRec::Compiler::SpecExec_andi() 2732 { 2733 const SpecValue rs = SpecReadReg(inst->i.rs); 2734 if (rs.has_value()) 2735 SpecWriteReg(inst->i.rt, rs.value() & inst->i.imm_zext32()); 2736 else 2737 SpecInvalidateReg(inst->i.rt); 2738 } 2739 2740 void CPU::NewRec::Compiler::SpecExec_ori() 2741 { 2742 const SpecValue rs = SpecReadReg(inst->i.rs); 2743 if (rs.has_value()) 2744 SpecWriteReg(inst->i.rt, rs.value() | inst->i.imm_zext32()); 2745 else 2746 SpecInvalidateReg(inst->i.rt); 2747 } 2748 2749 void CPU::NewRec::Compiler::SpecExec_xori() 2750 { 2751 const SpecValue rs = SpecReadReg(inst->i.rs); 2752 if (rs.has_value()) 2753 SpecWriteReg(inst->i.rt, rs.value() ^ inst->i.imm_zext32()); 2754 else 2755 SpecInvalidateReg(inst->i.rt); 2756 } 2757 2758 void CPU::NewRec::Compiler::SpecExec_lui() 2759 { 2760 SpecWriteReg(inst->i.rt, inst->i.imm_zext32() << 16); 2761 } 2762 2763 CPU::NewRec::Compiler::SpecValue CPU::NewRec::Compiler::SpecExec_LoadStoreAddr() 2764 { 2765 const SpecValue rs = SpecReadReg(inst->i.rs); 2766 return rs.has_value() ? (rs.value() + inst->i.imm_sext32()) : rs; 2767 } 2768 2769 void CPU::NewRec::Compiler::SpecExec_lxx(MemoryAccessSize size, bool sign) 2770 { 2771 const SpecValue addr = SpecExec_LoadStoreAddr(); 2772 SpecValue val; 2773 if (!addr.has_value() || !(val = SpecReadMem(addr.value())).has_value()) 2774 { 2775 SpecInvalidateReg(inst->i.rt); 2776 return; 2777 } 2778 2779 switch (size) 2780 { 2781 case MemoryAccessSize::Byte: 2782 val = sign ? SignExtend32(static_cast<u8>(val.value())) : ZeroExtend32(static_cast<u8>(val.value())); 2783 break; 2784 2785 case MemoryAccessSize::HalfWord: 2786 val = sign ? SignExtend32(static_cast<u16>(val.value())) : ZeroExtend32(static_cast<u16>(val.value())); 2787 break; 2788 2789 case MemoryAccessSize::Word: 2790 break; 2791 2792 default: 2793 UnreachableCode(); 2794 } 2795 2796 SpecWriteReg(inst->r.rt, val); 2797 } 2798 2799 void CPU::NewRec::Compiler::SpecExec_lwx(bool lwr) 2800 { 2801 // TODO 2802 SpecInvalidateReg(inst->i.rt); 2803 } 2804 2805 void CPU::NewRec::Compiler::SpecExec_sxx(MemoryAccessSize size) 2806 { 2807 const SpecValue addr = SpecExec_LoadStoreAddr(); 2808 if (!addr.has_value()) 2809 return; 2810 2811 SpecValue rt = SpecReadReg(inst->i.rt); 2812 if (rt.has_value()) 2813 { 2814 switch (size) 2815 { 2816 case MemoryAccessSize::Byte: 2817 rt = ZeroExtend32(static_cast<u8>(rt.value())); 2818 break; 2819 2820 case MemoryAccessSize::HalfWord: 2821 rt = ZeroExtend32(static_cast<u16>(rt.value())); 2822 break; 2823 2824 case MemoryAccessSize::Word: 2825 break; 2826 2827 default: 2828 UnreachableCode(); 2829 } 2830 } 2831 2832 SpecWriteMem(addr.value(), rt); 2833 } 2834 2835 void CPU::NewRec::Compiler::SpecExec_swx(bool swr) 2836 { 2837 const SpecValue addr = SpecExec_LoadStoreAddr(); 2838 if (addr.has_value()) 2839 SpecInvalidateMem(addr.value() & ~3u); 2840 } 2841 2842 void CPU::NewRec::Compiler::SpecExec_swc2() 2843 { 2844 const SpecValue addr = SpecExec_LoadStoreAddr(); 2845 if (addr.has_value()) 2846 SpecInvalidateMem(addr.value()); 2847 } 2848 2849 void CPU::NewRec::Compiler::SpecExec_mfc0() 2850 { 2851 const Cop0Reg rd = static_cast<Cop0Reg>(inst->r.rd.GetValue()); 2852 if (rd != Cop0Reg::SR) 2853 { 2854 SpecInvalidateReg(inst->r.rt); 2855 return; 2856 } 2857 2858 SpecWriteReg(inst->r.rt, m_speculative_constants.cop0_sr); 2859 } 2860 2861 void CPU::NewRec::Compiler::SpecExec_mtc0() 2862 { 2863 const Cop0Reg rd = static_cast<Cop0Reg>(inst->r.rd.GetValue()); 2864 if (rd != Cop0Reg::SR || !m_speculative_constants.cop0_sr.has_value()) 2865 return; 2866 2867 SpecValue val = SpecReadReg(inst->r.rt); 2868 if (val.has_value()) 2869 { 2870 constexpr u32 mask = Cop0Registers::SR::WRITE_MASK; 2871 val = (m_speculative_constants.cop0_sr.value() & mask) | (val.value() & mask); 2872 } 2873 2874 m_speculative_constants.cop0_sr = val; 2875 } 2876 2877 void CPU::NewRec::Compiler::SpecExec_rfe() 2878 { 2879 if (!m_speculative_constants.cop0_sr.has_value()) 2880 return; 2881 2882 const u32 val = m_speculative_constants.cop0_sr.value(); 2883 m_speculative_constants.cop0_sr = (val & UINT32_C(0b110000)) | ((val & UINT32_C(0b111111)) >> 2); 2884 }