cpu_recompiler_code_generator.cpp (102356B)
1 // SPDX-FileCopyrightText: 2019-2023 Connor McLaughlin <stenzek@gmail.com> 2 // SPDX-License-Identifier: (GPL-3.0 OR CC-BY-NC-ND-4.0) 3 4 #include "cpu_recompiler_code_generator.h" 5 #include "common/log.h" 6 #include "cpu_core.h" 7 #include "cpu_core_private.h" 8 #include "cpu_disasm.h" 9 #include "cpu_pgxp.h" 10 #include "gte.h" 11 #include "settings.h" 12 Log_SetChannel(CPU::Recompiler); 13 14 // TODO: Turn load+sext/zext into a single signed/unsigned load 15 // TODO: mulx/shlx/etc 16 // TODO: when writing to the same register, don't allocate a temporary and copy it (mainly for shifts) 17 18 namespace CPU::Recompiler { 19 20 const void* CodeGenerator::CompileBlock(CodeCache::Block* block, u32* out_host_code_size, u32* out_host_far_code_size) 21 { 22 // TODO: Align code buffer. 23 24 m_block = block; 25 m_block_start = {block->Instructions(), block->InstructionsInfo()}; 26 m_block_end = {block->Instructions() + block->size, block->InstructionsInfo() + block->size}; 27 28 m_pc = block->pc; 29 m_pc_valid = true; 30 31 EmitBeginBlock(true); 32 BlockPrologue(); 33 34 m_current_instruction = m_block_start; 35 while (m_current_instruction.instruction != m_block_end.instruction) 36 { 37 if (!CompileInstruction(*m_current_instruction.instruction, *m_current_instruction.info)) 38 { 39 m_current_instruction = {}; 40 m_block_end = {}; 41 m_block_start = {}; 42 m_block = nullptr; 43 return nullptr; 44 } 45 46 m_current_instruction.instruction++; 47 m_current_instruction.info++; 48 } 49 50 if (!m_block_linked) 51 { 52 BlockEpilogue(); 53 54 if (block->HasFlag(CodeCache::BlockFlags::SpansPages)) 55 { 56 // jump directly to the next block 57 const Value pc = CalculatePC(); 58 WriteNewPC(pc, true); 59 const void* host_target = 60 CPU::CodeCache::CreateBlockLink(m_block, GetCurrentCodePointer(), static_cast<u32>(pc.constant_value)); 61 EmitBranch(host_target); 62 EmitEndBlock(true, nullptr); 63 } 64 else 65 { 66 EmitEndBlock(true, CodeCache::g_check_events_and_dispatch); 67 } 68 } 69 70 const void* code = FinalizeBlock(out_host_code_size, out_host_far_code_size); 71 DebugAssert(m_register_cache.GetUsedHostRegisters() == 0); 72 73 m_current_instruction = {}; 74 m_block_end = {}; 75 m_block_start = {}; 76 m_block = nullptr; 77 return code; 78 } 79 80 bool CodeGenerator::CompileInstruction(Instruction instruction, const CodeCache::InstructionInfo& info) 81 { 82 if (IsNopInstruction(instruction)) 83 { 84 InstructionPrologue(instruction, info, 1); 85 InstructionEpilogue(instruction, info); 86 return true; 87 } 88 89 bool result; 90 switch (instruction.op) 91 { 92 #if 1 93 case InstructionOp::ori: 94 case InstructionOp::andi: 95 case InstructionOp::xori: 96 result = Compile_Bitwise(instruction, info); 97 break; 98 99 case InstructionOp::lb: 100 case InstructionOp::lbu: 101 case InstructionOp::lh: 102 case InstructionOp::lhu: 103 case InstructionOp::lw: 104 result = Compile_Load(instruction, info); 105 break; 106 107 case InstructionOp::lwl: 108 case InstructionOp::lwr: 109 result = Compile_LoadLeftRight(instruction, info); 110 break; 111 112 case InstructionOp::swl: 113 case InstructionOp::swr: 114 result = Compile_StoreLeftRight(instruction, info); 115 break; 116 117 case InstructionOp::sb: 118 case InstructionOp::sh: 119 case InstructionOp::sw: 120 result = Compile_Store(instruction, info); 121 break; 122 123 case InstructionOp::j: 124 case InstructionOp::jal: 125 case InstructionOp::b: 126 case InstructionOp::beq: 127 case InstructionOp::bne: 128 case InstructionOp::bgtz: 129 case InstructionOp::blez: 130 result = Compile_Branch(instruction, info); 131 break; 132 133 case InstructionOp::addi: 134 case InstructionOp::addiu: 135 result = Compile_Add(instruction, info); 136 break; 137 138 case InstructionOp::slti: 139 case InstructionOp::sltiu: 140 result = Compile_SetLess(instruction, info); 141 break; 142 143 case InstructionOp::lui: 144 result = Compile_lui(instruction, info); 145 break; 146 147 case InstructionOp::cop0: 148 result = Compile_cop0(instruction, info); 149 break; 150 151 case InstructionOp::cop2: 152 case InstructionOp::lwc2: 153 case InstructionOp::swc2: 154 result = Compile_cop2(instruction, info); 155 break; 156 157 case InstructionOp::funct: 158 { 159 switch (instruction.r.funct) 160 { 161 case InstructionFunct::and_: 162 case InstructionFunct::or_: 163 case InstructionFunct::xor_: 164 case InstructionFunct::nor: 165 result = Compile_Bitwise(instruction, info); 166 break; 167 168 case InstructionFunct::sll: 169 case InstructionFunct::srl: 170 case InstructionFunct::sra: 171 case InstructionFunct::sllv: 172 case InstructionFunct::srlv: 173 case InstructionFunct::srav: 174 result = Compile_Shift(instruction, info); 175 break; 176 177 case InstructionFunct::mfhi: 178 case InstructionFunct::mflo: 179 case InstructionFunct::mthi: 180 case InstructionFunct::mtlo: 181 result = Compile_MoveHiLo(instruction, info); 182 break; 183 184 case InstructionFunct::add: 185 case InstructionFunct::addu: 186 result = Compile_Add(instruction, info); 187 break; 188 189 case InstructionFunct::sub: 190 case InstructionFunct::subu: 191 result = Compile_Subtract(instruction, info); 192 break; 193 194 case InstructionFunct::mult: 195 case InstructionFunct::multu: 196 result = Compile_Multiply(instruction, info); 197 break; 198 199 case InstructionFunct::div: 200 result = Compile_SignedDivide(instruction, info); 201 break; 202 203 case InstructionFunct::divu: 204 result = Compile_Divide(instruction, info); 205 break; 206 207 case InstructionFunct::slt: 208 case InstructionFunct::sltu: 209 result = Compile_SetLess(instruction, info); 210 break; 211 212 case InstructionFunct::jr: 213 case InstructionFunct::jalr: 214 case InstructionFunct::syscall: 215 case InstructionFunct::break_: 216 result = Compile_Branch(instruction, info); 217 break; 218 219 default: 220 result = Compile_Fallback(instruction, info); 221 break; 222 } 223 } 224 break; 225 #endif 226 227 default: 228 result = Compile_Fallback(instruction, info); 229 break; 230 } 231 232 return result; 233 } 234 235 Value CodeGenerator::ConvertValueSize(const Value& value, RegSize size, bool sign_extend) 236 { 237 DebugAssert(value.size != size); 238 239 if (value.IsConstant()) 240 { 241 // compile-time conversion, woo! 242 switch (size) 243 { 244 case RegSize_8: 245 return Value::FromConstantU8(value.constant_value & 0xFF); 246 247 case RegSize_16: 248 { 249 switch (value.size) 250 { 251 case RegSize_8: 252 return Value::FromConstantU16(sign_extend ? SignExtend16(Truncate8(value.constant_value)) : 253 ZeroExtend16(Truncate8(value.constant_value))); 254 255 default: 256 return Value::FromConstantU16(value.constant_value & 0xFFFF); 257 } 258 } 259 break; 260 261 case RegSize_32: 262 { 263 switch (value.size) 264 { 265 case RegSize_8: 266 return Value::FromConstantU32(sign_extend ? SignExtend32(Truncate8(value.constant_value)) : 267 ZeroExtend32(Truncate8(value.constant_value))); 268 case RegSize_16: 269 return Value::FromConstantU32(sign_extend ? SignExtend32(Truncate16(value.constant_value)) : 270 ZeroExtend32(Truncate16(value.constant_value))); 271 272 case RegSize_32: 273 return value; 274 275 default: 276 break; 277 } 278 } 279 break; 280 281 default: 282 break; 283 } 284 285 UnreachableCode(); 286 } 287 288 Value new_value = m_register_cache.AllocateScratch(size); 289 if (size < value.size) 290 { 291 EmitCopyValue(new_value.host_reg, value); 292 } 293 else 294 { 295 if (sign_extend) 296 EmitSignExtend(new_value.host_reg, size, value.host_reg, value.size); 297 else 298 EmitZeroExtend(new_value.host_reg, size, value.host_reg, value.size); 299 } 300 301 return new_value; 302 } 303 304 void CodeGenerator::ConvertValueSizeInPlace(Value* value, RegSize size, bool sign_extend) 305 { 306 DebugAssert(value->size != size); 307 308 // We don't want to mess up the register cache value, so generate a new value if it's not scratch. 309 if (value->IsConstant() || !value->IsScratch()) 310 { 311 *value = ConvertValueSize(*value, size, sign_extend); 312 return; 313 } 314 315 DebugAssert(value->IsInHostRegister() && value->IsScratch()); 316 317 // If the size is smaller and the value is in a register, we can just "view" the lower part. 318 if (size < value->size) 319 { 320 value->size = size; 321 } 322 else 323 { 324 if (sign_extend) 325 EmitSignExtend(value->host_reg, size, value->host_reg, value->size); 326 else 327 EmitZeroExtend(value->host_reg, size, value->host_reg, value->size); 328 } 329 330 value->size = size; 331 } 332 333 void* CodeGenerator::GetCurrentCodePointer() const 334 { 335 if (m_emit == &m_near_emitter) 336 return GetCurrentNearCodePointer(); 337 else if (m_emit == &m_far_emitter) 338 return GetCurrentFarCodePointer(); 339 340 Panic("unknown emitter"); 341 } 342 343 Value CodeGenerator::AddValues(const Value& lhs, const Value& rhs, bool set_flags) 344 { 345 DebugAssert(lhs.size == rhs.size); 346 if (lhs.IsConstant() && rhs.IsConstant() && !set_flags) 347 { 348 // compile-time 349 u64 new_cv = lhs.constant_value + rhs.constant_value; 350 switch (lhs.size) 351 { 352 case RegSize_8: 353 return Value::FromConstantU8(Truncate8(new_cv)); 354 355 case RegSize_16: 356 return Value::FromConstantU16(Truncate16(new_cv)); 357 358 case RegSize_32: 359 return Value::FromConstantU32(Truncate32(new_cv)); 360 361 case RegSize_64: 362 return Value::FromConstantU64(new_cv); 363 364 default: 365 return Value(); 366 } 367 } 368 369 Value res = m_register_cache.AllocateScratch(lhs.size); 370 if (lhs.HasConstantValue(0) && !set_flags) 371 { 372 EmitCopyValue(res.host_reg, rhs); 373 return res; 374 } 375 else if (rhs.HasConstantValue(0) && !set_flags) 376 { 377 EmitCopyValue(res.host_reg, lhs); 378 return res; 379 } 380 else 381 { 382 if (lhs.IsInHostRegister()) 383 { 384 EmitAdd(res.host_reg, lhs.host_reg, rhs, set_flags); 385 } 386 else 387 { 388 EmitCopyValue(res.host_reg, lhs); 389 EmitAdd(res.host_reg, res.host_reg, rhs, set_flags); 390 } 391 return res; 392 } 393 } 394 395 Value CodeGenerator::SubValues(const Value& lhs, const Value& rhs, bool set_flags) 396 { 397 DebugAssert(lhs.size == rhs.size); 398 if (lhs.IsConstant() && rhs.IsConstant() && !set_flags) 399 { 400 // compile-time 401 u64 new_cv = lhs.constant_value - rhs.constant_value; 402 switch (lhs.size) 403 { 404 case RegSize_8: 405 return Value::FromConstantU8(Truncate8(new_cv)); 406 407 case RegSize_16: 408 return Value::FromConstantU16(Truncate16(new_cv)); 409 410 case RegSize_32: 411 return Value::FromConstantU32(Truncate32(new_cv)); 412 413 case RegSize_64: 414 return Value::FromConstantU64(new_cv); 415 416 default: 417 return Value(); 418 } 419 } 420 421 Value res = m_register_cache.AllocateScratch(lhs.size); 422 if (rhs.HasConstantValue(0) && !set_flags) 423 { 424 EmitCopyValue(res.host_reg, lhs); 425 return res; 426 } 427 else 428 { 429 if (lhs.IsInHostRegister()) 430 { 431 EmitSub(res.host_reg, lhs.host_reg, rhs, set_flags); 432 } 433 else 434 { 435 EmitCopyValue(res.host_reg, lhs); 436 EmitSub(res.host_reg, res.host_reg, rhs, set_flags); 437 } 438 439 return res; 440 } 441 } 442 443 std::pair<Value, Value> CodeGenerator::MulValues(const Value& lhs, const Value& rhs, bool signed_multiply) 444 { 445 DebugAssert(lhs.size == rhs.size); 446 if (lhs.IsConstant() && rhs.IsConstant()) 447 { 448 // compile-time 449 switch (lhs.size) 450 { 451 case RegSize_8: 452 { 453 u16 res; 454 if (signed_multiply) 455 res = u16(s16(s8(lhs.constant_value)) * s16(s8(rhs.constant_value))); 456 else 457 res = u16(u8(lhs.constant_value)) * u16(u8(rhs.constant_value)); 458 459 return std::make_pair(Value::FromConstantU8(Truncate8(res >> 8)), Value::FromConstantU8(Truncate8(res))); 460 } 461 462 case RegSize_16: 463 { 464 u32 res; 465 if (signed_multiply) 466 res = u32(s32(s16(lhs.constant_value)) * s32(s16(rhs.constant_value))); 467 else 468 res = u32(u16(lhs.constant_value)) * u32(u16(rhs.constant_value)); 469 470 return std::make_pair(Value::FromConstantU16(Truncate16(res >> 16)), Value::FromConstantU16(Truncate16(res))); 471 } 472 473 case RegSize_32: 474 { 475 u64 res; 476 if (signed_multiply) 477 res = u64(s64(s32(lhs.constant_value)) * s64(s32(rhs.constant_value))); 478 else 479 res = u64(u32(lhs.constant_value)) * u64(u32(rhs.constant_value)); 480 481 return std::make_pair(Value::FromConstantU32(Truncate32(res >> 32)), Value::FromConstantU32(Truncate32(res))); 482 } 483 break; 484 485 case RegSize_64: 486 { 487 u64 res; 488 if (signed_multiply) 489 res = u64(s64(lhs.constant_value) * s64(rhs.constant_value)); 490 else 491 res = lhs.constant_value * rhs.constant_value; 492 493 // TODO: 128-bit multiply... 494 Panic("128-bit multiply"); 495 return std::make_pair(Value::FromConstantU64(0), Value::FromConstantU64(res)); 496 } 497 498 default: 499 return std::make_pair(Value::FromConstantU64(0), Value::FromConstantU64(0)); 500 } 501 } 502 503 // We need two registers for both components. 504 Value hi = m_register_cache.AllocateScratch(lhs.size); 505 Value lo = m_register_cache.AllocateScratch(lhs.size); 506 EmitMul(hi.host_reg, lo.host_reg, lhs, rhs, signed_multiply); 507 return std::make_pair(std::move(hi), std::move(lo)); 508 } 509 510 Value CodeGenerator::ShlValues(const Value& lhs, const Value& rhs, bool assume_amount_masked /* = true */) 511 { 512 DebugAssert(lhs.size == rhs.size); 513 if (lhs.IsConstant() && rhs.IsConstant()) 514 { 515 // compile-time 516 u64 new_cv = lhs.constant_value << (rhs.constant_value & 0x1F); 517 switch (lhs.size) 518 { 519 case RegSize_8: 520 return Value::FromConstantU8(Truncate8(new_cv)); 521 522 case RegSize_16: 523 return Value::FromConstantU16(Truncate16(new_cv)); 524 525 case RegSize_32: 526 return Value::FromConstantU32(Truncate32(new_cv)); 527 528 case RegSize_64: 529 return Value::FromConstantU64(new_cv); 530 531 default: 532 return Value(); 533 } 534 } 535 536 Value res = m_register_cache.AllocateScratch(lhs.size); 537 if (rhs.HasConstantValue(0)) 538 { 539 EmitCopyValue(res.host_reg, lhs); 540 } 541 else 542 { 543 if (lhs.IsInHostRegister()) 544 { 545 EmitShl(res.host_reg, lhs.host_reg, res.size, rhs, assume_amount_masked); 546 } 547 else 548 { 549 EmitCopyValue(res.host_reg, lhs); 550 EmitShl(res.host_reg, res.host_reg, res.size, rhs, assume_amount_masked); 551 } 552 } 553 return res; 554 } 555 556 Value CodeGenerator::ShrValues(const Value& lhs, const Value& rhs, bool assume_amount_masked /* = true */) 557 { 558 DebugAssert(lhs.size == rhs.size); 559 if (lhs.IsConstant() && rhs.IsConstant()) 560 { 561 // compile-time 562 u64 new_cv = lhs.constant_value >> (rhs.constant_value & 0x1F); 563 switch (lhs.size) 564 { 565 case RegSize_8: 566 return Value::FromConstantU8(Truncate8(new_cv)); 567 568 case RegSize_16: 569 return Value::FromConstantU16(Truncate16(new_cv)); 570 571 case RegSize_32: 572 return Value::FromConstantU32(Truncate32(new_cv)); 573 574 case RegSize_64: 575 return Value::FromConstantU64(new_cv); 576 577 default: 578 return Value(); 579 } 580 } 581 582 Value res = m_register_cache.AllocateScratch(lhs.size); 583 if (rhs.HasConstantValue(0)) 584 { 585 EmitCopyValue(res.host_reg, lhs); 586 } 587 else 588 { 589 if (lhs.IsInHostRegister()) 590 { 591 EmitShr(res.host_reg, lhs.host_reg, res.size, rhs, assume_amount_masked); 592 } 593 else 594 { 595 EmitCopyValue(res.host_reg, lhs); 596 EmitShr(res.host_reg, res.host_reg, res.size, rhs, assume_amount_masked); 597 } 598 } 599 return res; 600 } 601 602 Value CodeGenerator::SarValues(const Value& lhs, const Value& rhs, bool assume_amount_masked /* = true */) 603 { 604 DebugAssert(lhs.size == rhs.size); 605 if (lhs.IsConstant() && rhs.IsConstant()) 606 { 607 // compile-time 608 switch (lhs.size) 609 { 610 case RegSize_8: 611 return Value::FromConstantU8( 612 static_cast<u8>(static_cast<s8>(Truncate8(lhs.constant_value)) >> (rhs.constant_value & 0x1F))); 613 614 case RegSize_16: 615 return Value::FromConstantU16( 616 static_cast<u16>(static_cast<s16>(Truncate16(lhs.constant_value)) >> (rhs.constant_value & 0x1F))); 617 618 case RegSize_32: 619 return Value::FromConstantU32( 620 static_cast<u32>(static_cast<s32>(Truncate32(lhs.constant_value)) >> (rhs.constant_value & 0x1F))); 621 622 case RegSize_64: 623 return Value::FromConstantU64( 624 static_cast<u64>(static_cast<s64>(lhs.constant_value) >> (rhs.constant_value & 0x3F))); 625 626 default: 627 return Value(); 628 } 629 } 630 631 Value res = m_register_cache.AllocateScratch(lhs.size); 632 if (rhs.HasConstantValue(0)) 633 { 634 EmitCopyValue(res.host_reg, lhs); 635 } 636 else 637 { 638 if (lhs.IsInHostRegister()) 639 { 640 EmitSar(res.host_reg, lhs.host_reg, res.size, rhs, assume_amount_masked); 641 } 642 else 643 { 644 EmitCopyValue(res.host_reg, lhs); 645 EmitSar(res.host_reg, res.host_reg, res.size, rhs, assume_amount_masked); 646 } 647 } 648 return res; 649 } 650 651 Value CodeGenerator::OrValues(const Value& lhs, const Value& rhs) 652 { 653 DebugAssert(lhs.size == rhs.size); 654 if (lhs.IsConstant() && rhs.IsConstant()) 655 { 656 // compile-time 657 u64 new_cv = lhs.constant_value | rhs.constant_value; 658 switch (lhs.size) 659 { 660 case RegSize_8: 661 return Value::FromConstantU8(Truncate8(new_cv)); 662 663 case RegSize_16: 664 return Value::FromConstantU16(Truncate16(new_cv)); 665 666 case RegSize_32: 667 return Value::FromConstantU32(Truncate32(new_cv)); 668 669 case RegSize_64: 670 return Value::FromConstantU64(new_cv); 671 672 default: 673 return Value(); 674 } 675 } 676 677 Value res = m_register_cache.AllocateScratch(lhs.size); 678 if (lhs.HasConstantValue(0)) 679 { 680 EmitCopyValue(res.host_reg, rhs); 681 return res; 682 } 683 else if (rhs.HasConstantValue(0)) 684 { 685 EmitCopyValue(res.host_reg, lhs); 686 return res; 687 } 688 689 if (lhs.IsInHostRegister()) 690 { 691 EmitOr(res.host_reg, lhs.host_reg, rhs); 692 } 693 else 694 { 695 EmitCopyValue(res.host_reg, lhs); 696 EmitOr(res.host_reg, res.host_reg, rhs); 697 } 698 return res; 699 } 700 701 void CodeGenerator::OrValueInPlace(Value& lhs, const Value& rhs) 702 { 703 DebugAssert(lhs.size == rhs.size); 704 if (lhs.IsConstant() && rhs.IsConstant()) 705 { 706 // compile-time 707 u64 new_cv = lhs.constant_value | rhs.constant_value; 708 switch (lhs.size) 709 { 710 case RegSize_8: 711 lhs = Value::FromConstantU8(Truncate8(new_cv)); 712 break; 713 714 case RegSize_16: 715 lhs = Value::FromConstantU16(Truncate16(new_cv)); 716 break; 717 718 case RegSize_32: 719 lhs = Value::FromConstantU32(Truncate32(new_cv)); 720 break; 721 722 case RegSize_64: 723 lhs = Value::FromConstantU64(new_cv); 724 break; 725 726 default: 727 lhs = Value(); 728 break; 729 } 730 } 731 732 // unlikely 733 if (rhs.HasConstantValue(0)) 734 return; 735 736 if (lhs.IsInHostRegister()) 737 { 738 EmitOr(lhs.host_reg, lhs.host_reg, rhs); 739 } 740 else 741 { 742 Value new_lhs = m_register_cache.AllocateScratch(lhs.size); 743 EmitCopyValue(new_lhs.host_reg, lhs); 744 EmitOr(new_lhs.host_reg, new_lhs.host_reg, rhs); 745 lhs = std::move(new_lhs); 746 } 747 } 748 749 Value CodeGenerator::AndValues(const Value& lhs, const Value& rhs) 750 { 751 DebugAssert(lhs.size == rhs.size); 752 if (lhs.IsConstant() && rhs.IsConstant()) 753 { 754 // compile-time 755 u64 new_cv = lhs.constant_value & rhs.constant_value; 756 switch (lhs.size) 757 { 758 case RegSize_8: 759 return Value::FromConstantU8(Truncate8(new_cv)); 760 761 case RegSize_16: 762 return Value::FromConstantU16(Truncate16(new_cv)); 763 764 case RegSize_32: 765 return Value::FromConstantU32(Truncate32(new_cv)); 766 767 case RegSize_64: 768 return Value::FromConstantU64(new_cv); 769 770 default: 771 return Value(); 772 } 773 } 774 775 // TODO: and with -1 -> noop 776 Value res = m_register_cache.AllocateScratch(lhs.size); 777 if (lhs.HasConstantValue(0) || rhs.HasConstantValue(0)) 778 { 779 EmitXor(res.host_reg, res.host_reg, res); 780 return res; 781 } 782 783 if (lhs.IsInHostRegister()) 784 { 785 EmitAnd(res.host_reg, lhs.host_reg, rhs); 786 } 787 else 788 { 789 EmitCopyValue(res.host_reg, lhs); 790 EmitAnd(res.host_reg, res.host_reg, rhs); 791 } 792 return res; 793 } 794 795 void CodeGenerator::AndValueInPlace(Value& lhs, const Value& rhs) 796 { 797 DebugAssert(lhs.size == rhs.size); 798 if (lhs.IsConstant() && rhs.IsConstant()) 799 { 800 // compile-time 801 u64 new_cv = lhs.constant_value & rhs.constant_value; 802 switch (lhs.size) 803 { 804 case RegSize_8: 805 lhs = Value::FromConstantU8(Truncate8(new_cv)); 806 break; 807 808 case RegSize_16: 809 lhs = Value::FromConstantU16(Truncate16(new_cv)); 810 break; 811 812 case RegSize_32: 813 lhs = Value::FromConstantU32(Truncate32(new_cv)); 814 break; 815 816 case RegSize_64: 817 lhs = Value::FromConstantU64(new_cv); 818 break; 819 820 default: 821 lhs = Value(); 822 break; 823 } 824 } 825 826 // TODO: and with -1 -> noop 827 if (lhs.HasConstantValue(0) || rhs.HasConstantValue(0)) 828 { 829 EmitXor(lhs.host_reg, lhs.host_reg, lhs); 830 return; 831 } 832 833 if (lhs.IsInHostRegister()) 834 { 835 EmitAnd(lhs.host_reg, lhs.host_reg, rhs); 836 } 837 else 838 { 839 Value new_lhs = m_register_cache.AllocateScratch(lhs.size); 840 EmitCopyValue(new_lhs.host_reg, lhs); 841 EmitAnd(new_lhs.host_reg, new_lhs.host_reg, rhs); 842 lhs = std::move(new_lhs); 843 } 844 } 845 846 Value CodeGenerator::XorValues(const Value& lhs, const Value& rhs) 847 { 848 DebugAssert(lhs.size == rhs.size); 849 if (lhs.IsConstant() && rhs.IsConstant()) 850 { 851 // compile-time 852 u64 new_cv = lhs.constant_value ^ rhs.constant_value; 853 switch (lhs.size) 854 { 855 case RegSize_8: 856 return Value::FromConstantU8(Truncate8(new_cv)); 857 858 case RegSize_16: 859 return Value::FromConstantU16(Truncate16(new_cv)); 860 861 case RegSize_32: 862 return Value::FromConstantU32(Truncate32(new_cv)); 863 864 case RegSize_64: 865 return Value::FromConstantU64(new_cv); 866 867 default: 868 return Value(); 869 } 870 } 871 872 Value res = m_register_cache.AllocateScratch(lhs.size); 873 EmitCopyValue(res.host_reg, lhs); 874 if (lhs.HasConstantValue(0)) 875 { 876 EmitCopyValue(res.host_reg, rhs); 877 return res; 878 } 879 else if (rhs.HasConstantValue(0)) 880 { 881 EmitCopyValue(res.host_reg, lhs); 882 return res; 883 } 884 885 if (lhs.IsInHostRegister()) 886 { 887 EmitXor(res.host_reg, lhs.host_reg, rhs); 888 } 889 else 890 { 891 EmitCopyValue(res.host_reg, lhs); 892 EmitXor(res.host_reg, res.host_reg, rhs); 893 } 894 895 return res; 896 } 897 898 Value CodeGenerator::NotValue(const Value& val) 899 { 900 if (val.IsConstant()) 901 { 902 u64 new_cv = ~val.constant_value; 903 switch (val.size) 904 { 905 case RegSize_8: 906 return Value::FromConstantU8(Truncate8(new_cv)); 907 908 case RegSize_16: 909 return Value::FromConstantU16(Truncate16(new_cv)); 910 911 case RegSize_32: 912 return Value::FromConstantU32(Truncate32(new_cv)); 913 914 case RegSize_64: 915 return Value::FromConstantU64(new_cv); 916 917 default: 918 return Value(); 919 } 920 } 921 922 // TODO: Don't allocate scratch if the lhs is a scratch? 923 Value res = m_register_cache.AllocateScratch(RegSize_32); 924 EmitCopyValue(res.host_reg, val); 925 EmitNot(res.host_reg, val.size); 926 return res; 927 } 928 929 const TickCount* CodeGenerator::GetFetchMemoryAccessTimePtr() const 930 { 931 const TickCount* ptr = 932 Bus::GetMemoryAccessTimePtr(m_block->pc & PHYSICAL_MEMORY_ADDRESS_MASK, MemoryAccessSize::Word); 933 AssertMsg(ptr, "Address has dynamic fetch ticks"); 934 return ptr; 935 } 936 937 void CodeGenerator::GenerateExceptionExit(Instruction instruction, const CodeCache::InstructionInfo& info, 938 Exception excode, Condition condition /* = Condition::Always */) 939 { 940 const Value CAUSE_bits = Value::FromConstantU32( 941 Cop0Registers::CAUSE::MakeValueForException(excode, info.is_branch_delay_slot, false, instruction.cop.cop_n)); 942 943 if (condition == Condition::Always) 944 { 945 // no need to use far code if we're always raising the exception 946 m_register_cache.FlushAllGuestRegisters(true, true); 947 m_register_cache.FlushLoadDelay(true); 948 949 if (excode == Exception::BP) 950 { 951 EmitFunctionCall(nullptr, static_cast<void (*)(u32, u32, u32)>(&CPU::RaiseBreakException), CAUSE_bits, 952 GetCurrentInstructionPC(), Value::FromConstantU32(instruction.bits)); 953 } 954 else 955 { 956 EmitFunctionCall(nullptr, static_cast<void (*)(u32, u32)>(&CPU::RaiseException), CAUSE_bits, 957 GetCurrentInstructionPC()); 958 } 959 960 return; 961 } 962 963 LabelType skip_exception; 964 EmitConditionalBranch(condition, true, &skip_exception); 965 966 m_register_cache.PushState(); 967 968 EmitBranch(GetCurrentFarCodePointer()); 969 970 SwitchToFarCode(); 971 EmitFunctionCall(nullptr, static_cast<void (*)(u32, u32)>(&CPU::RaiseException), CAUSE_bits, 972 GetCurrentInstructionPC()); 973 EmitExceptionExit(); 974 SwitchToNearCode(); 975 976 m_register_cache.PopState(); 977 978 EmitBindLabel(&skip_exception); 979 } 980 981 void CodeGenerator::BlockPrologue() 982 { 983 #if 0 984 EmitFunctionCall(nullptr, &CodeCache::LogCurrentState); 985 #endif 986 987 InitSpeculativeRegs(); 988 989 if (m_block->protection == CodeCache::PageProtectionMode::ManualCheck) 990 { 991 DEBUG_LOG("Generate manual protection for PC {:08X}", m_block->pc); 992 const u8* ram_ptr = Bus::g_ram + VirtualAddressToPhysical(m_block->pc); 993 const u8* shadow_ptr = reinterpret_cast<const u8*>(m_block->Instructions()); 994 EmitBlockProtectCheck(ram_ptr, shadow_ptr, m_block->size * sizeof(Instruction)); 995 } 996 997 EmitStoreCPUStructField(OFFSETOF(State, exception_raised), Value::FromConstantU8(0)); 998 999 if (g_settings.bios_tty_logging) 1000 { 1001 if (m_pc == 0xa0) 1002 EmitFunctionCall(nullptr, &CPU::HandleA0Syscall); 1003 else if (m_pc == 0xb0) 1004 EmitFunctionCall(nullptr, &CPU::HandleB0Syscall); 1005 } 1006 1007 EmitICacheCheckAndUpdate(); 1008 1009 // we don't know the state of the last block, so assume load delays might be in progress 1010 // TODO: Pull load delay into register cache 1011 m_current_instruction_in_branch_delay_slot_dirty = g_settings.cpu_recompiler_memory_exceptions; 1012 m_branch_was_taken_dirty = g_settings.cpu_recompiler_memory_exceptions; 1013 m_current_instruction_was_branch_taken_dirty = false; 1014 m_load_delay_dirty = true; 1015 m_gte_busy_cycles_dirty = true; 1016 } 1017 1018 void CodeGenerator::BlockEpilogue() 1019 { 1020 #if defined(_DEBUG) && defined(CPU_ARCH_X64) 1021 m_emit->nop(); 1022 #endif 1023 1024 m_register_cache.FlushAllGuestRegisters(true, true); 1025 if (m_register_cache.HasLoadDelay()) 1026 m_register_cache.WriteLoadDelayToCPU(true); 1027 1028 AddPendingCycles(true); 1029 } 1030 1031 void CodeGenerator::InstructionPrologue(Instruction instruction, const CodeCache::InstructionInfo& info, 1032 TickCount cycles, bool force_sync /* = false */) 1033 { 1034 #if defined(_DEBUG) && defined(CPU_ARCH_X64) 1035 m_emit->nop(); 1036 #endif 1037 1038 // move instruction offsets forward 1039 if (m_pc_valid) 1040 m_pc += 4; 1041 1042 // reset dirty flags 1043 if (m_branch_was_taken_dirty) 1044 { 1045 Value temp = m_register_cache.AllocateScratch(RegSize_8); 1046 EmitLoadCPUStructField(temp.host_reg, RegSize_8, OFFSETOF(State, branch_was_taken)); 1047 EmitStoreCPUStructField(OFFSETOF(State, current_instruction_was_branch_taken), temp); 1048 EmitStoreCPUStructField(OFFSETOF(State, branch_was_taken), Value::FromConstantU8(0)); 1049 m_current_instruction_was_branch_taken_dirty = true; 1050 m_branch_was_taken_dirty = false; 1051 } 1052 else if (m_current_instruction_was_branch_taken_dirty) 1053 { 1054 EmitStoreCPUStructField(OFFSETOF(State, current_instruction_was_branch_taken), Value::FromConstantU8(0)); 1055 m_current_instruction_was_branch_taken_dirty = false; 1056 } 1057 1058 if (m_current_instruction_in_branch_delay_slot_dirty && !info.is_branch_delay_slot) 1059 { 1060 EmitStoreCPUStructField(OFFSETOF(State, current_instruction_in_branch_delay_slot), Value::FromConstantU8(0)); 1061 m_current_instruction_in_branch_delay_slot_dirty = false; 1062 } 1063 1064 if (!force_sync) 1065 { 1066 // Defer updates for non-faulting instructions. 1067 m_delayed_cycles_add += cycles; 1068 return; 1069 } 1070 1071 if (info.is_branch_delay_slot && g_settings.cpu_recompiler_memory_exceptions) 1072 { 1073 // m_current_instruction_in_branch_delay_slot = true 1074 EmitStoreCPUStructField(OFFSETOF(State, current_instruction_in_branch_delay_slot), Value::FromConstantU8(1)); 1075 m_current_instruction_in_branch_delay_slot_dirty = true; 1076 } 1077 1078 m_delayed_cycles_add += cycles; 1079 AddPendingCycles(true); 1080 } 1081 1082 void CodeGenerator::InstructionEpilogue(Instruction instruction, const CodeCache::InstructionInfo& info) 1083 { 1084 m_register_cache.UpdateLoadDelay(); 1085 1086 if (m_load_delay_dirty) 1087 { 1088 // we have to invalidate the register cache, since the load delayed register might've been cached 1089 DEBUG_LOG("Emitting delay slot flush"); 1090 EmitFlushInterpreterLoadDelay(); 1091 m_register_cache.InvalidateAllNonDirtyGuestRegisters(); 1092 m_load_delay_dirty = false; 1093 } 1094 1095 // copy if the previous instruction was a load, reset the current value on the next instruction 1096 if (m_next_load_delay_dirty) 1097 { 1098 DEBUG_LOG("Emitting delay slot flush (with move next)"); 1099 EmitMoveNextInterpreterLoadDelay(); 1100 m_next_load_delay_dirty = false; 1101 m_load_delay_dirty = true; 1102 } 1103 } 1104 1105 void CodeGenerator::TruncateBlockAtCurrentInstruction() 1106 { 1107 DEV_LOG("Truncating block {:08X} at {:08X}", m_block->pc, m_current_instruction.info->pc); 1108 m_block_end.instruction = m_current_instruction.instruction + 1; 1109 m_block_end.info = m_current_instruction.info + 1; 1110 WriteNewPC(CalculatePC(), true); 1111 } 1112 1113 void CodeGenerator::AddPendingCycles(bool commit) 1114 { 1115 if (m_delayed_cycles_add == 0 && m_gte_done_cycle <= m_delayed_cycles_add) 1116 return; 1117 1118 if (m_gte_done_cycle > m_delayed_cycles_add) 1119 { 1120 Value temp = m_register_cache.AllocateScratch(RegSize_32); 1121 EmitLoadCPUStructField(temp.GetHostRegister(), RegSize_32, OFFSETOF(State, pending_ticks)); 1122 if (m_delayed_cycles_add > 0) 1123 { 1124 EmitAdd(temp.GetHostRegister(), temp.GetHostRegister(), Value::FromConstantU32(m_delayed_cycles_add), false); 1125 EmitStoreCPUStructField(OFFSETOF(State, pending_ticks), temp); 1126 EmitAdd(temp.GetHostRegister(), temp.GetHostRegister(), 1127 Value::FromConstantU32(m_gte_done_cycle - m_delayed_cycles_add), false); 1128 EmitStoreCPUStructField(OFFSETOF(State, gte_completion_tick), temp); 1129 } 1130 else 1131 { 1132 EmitAdd(temp.GetHostRegister(), temp.GetHostRegister(), Value::FromConstantU32(m_gte_done_cycle), false); 1133 EmitStoreCPUStructField(OFFSETOF(State, gte_completion_tick), temp); 1134 } 1135 } 1136 else 1137 { 1138 EmitAddCPUStructField(OFFSETOF(State, pending_ticks), Value::FromConstantU32(m_delayed_cycles_add)); 1139 } 1140 1141 if (commit) 1142 { 1143 m_gte_done_cycle = std::max<TickCount>(m_gte_done_cycle - m_delayed_cycles_add, 0); 1144 m_delayed_cycles_add = 0; 1145 } 1146 } 1147 1148 void CodeGenerator::AddGTETicks(TickCount ticks) 1149 { 1150 m_gte_done_cycle = m_delayed_cycles_add + ticks; 1151 DEBUG_LOG("Adding {} GTE ticks", ticks); 1152 } 1153 1154 void CodeGenerator::StallUntilGTEComplete() 1155 { 1156 if (!m_gte_busy_cycles_dirty) 1157 { 1158 // simple case - in block scheduling 1159 if (m_gte_done_cycle > m_delayed_cycles_add) 1160 { 1161 DEBUG_LOG("Stalling for {} ticks from GTE", m_gte_done_cycle - m_delayed_cycles_add); 1162 m_delayed_cycles_add += (m_gte_done_cycle - m_delayed_cycles_add); 1163 } 1164 1165 return; 1166 } 1167 1168 // switch to in block scheduling 1169 EmitStallUntilGTEComplete(); 1170 m_gte_done_cycle = 0; 1171 m_gte_busy_cycles_dirty = false; 1172 } 1173 1174 Value CodeGenerator::CalculatePC(u32 offset /* = 0 */) 1175 { 1176 if (!m_pc_valid) 1177 Panic("Attempt to get an indeterminate PC"); 1178 1179 return Value::FromConstantU32(m_pc + offset); 1180 } 1181 1182 Value CodeGenerator::GetCurrentInstructionPC(u32 offset /* = 0 */) 1183 { 1184 return Value::FromConstantU32(m_current_instruction.info->pc); 1185 } 1186 1187 void CodeGenerator::WriteNewPC(const Value& value, bool commit) 1188 { 1189 // TODO: This _could_ be moved into the register cache, but would it gain anything? 1190 EmitStoreCPUStructField(OFFSETOF(CPU::State, pc), value); 1191 if (commit) 1192 { 1193 m_pc_valid = value.IsConstant(); 1194 if (m_pc_valid) 1195 m_pc = static_cast<u32>(value.constant_value); 1196 } 1197 } 1198 1199 bool CodeGenerator::Compile_Fallback(Instruction instruction, const CodeCache::InstructionInfo& info) 1200 { 1201 WARNING_LOG("Compiling instruction fallback at PC=0x{:08X}, instruction=0x{:08X}", info.pc, instruction.bits); 1202 1203 InstructionPrologue(instruction, info, 1, true); 1204 1205 // flush and invalidate all guest registers, since the fallback could change any of them 1206 m_register_cache.FlushAllGuestRegisters(true, true); 1207 if (m_register_cache.HasLoadDelay()) 1208 { 1209 m_load_delay_dirty = true; 1210 m_register_cache.WriteLoadDelayToCPU(true); 1211 } 1212 1213 EmitStoreCPUStructField(OFFSETOF(State, current_instruction_pc), Value::FromConstantU32(info.pc)); 1214 EmitStoreCPUStructField(OFFSETOF(State, current_instruction.bits), Value::FromConstantU32(instruction.bits)); 1215 1216 // TODO: Use carry flag or something here too 1217 Value return_value = m_register_cache.AllocateScratch(RegSize_8); 1218 EmitFunctionCall(&return_value, 1219 g_settings.gpu_pgxp_enable ? &Thunks::InterpretInstructionPGXP : &Thunks::InterpretInstruction); 1220 EmitExceptionExitOnBool(return_value); 1221 1222 m_current_instruction_in_branch_delay_slot_dirty = info.is_branch_instruction; 1223 m_branch_was_taken_dirty = info.is_branch_instruction; 1224 m_next_load_delay_dirty = info.has_load_delay; 1225 InvalidateSpeculativeValues(); 1226 InstructionEpilogue(instruction, info); 1227 return true; 1228 } 1229 1230 bool CodeGenerator::Compile_Bitwise(Instruction instruction, const CodeCache::InstructionInfo& info) 1231 { 1232 InstructionPrologue(instruction, info, 1); 1233 1234 Value lhs; 1235 Value rhs; 1236 Reg dest; 1237 1238 SpeculativeValue spec_lhs, spec_rhs; 1239 SpeculativeValue spec_value; 1240 1241 if (instruction.op != InstructionOp::funct) 1242 { 1243 // rt <- rs op zext(imm) 1244 lhs = m_register_cache.ReadGuestRegister(instruction.i.rs); 1245 rhs = Value::FromConstantU32(instruction.i.imm_zext32()); 1246 dest = instruction.i.rt; 1247 1248 spec_lhs = SpeculativeReadReg(instruction.i.rs); 1249 spec_rhs = instruction.i.imm_zext32(); 1250 } 1251 else 1252 { 1253 lhs = m_register_cache.ReadGuestRegister(instruction.r.rs); 1254 rhs = m_register_cache.ReadGuestRegister(instruction.r.rt); 1255 dest = instruction.r.rd; 1256 1257 spec_lhs = SpeculativeReadReg(instruction.r.rs); 1258 spec_rhs = SpeculativeReadReg(instruction.r.rt); 1259 } 1260 1261 Value result; 1262 switch (instruction.op) 1263 { 1264 case InstructionOp::ori: 1265 { 1266 if (g_settings.UsingPGXPCPUMode()) 1267 EmitFunctionCall(nullptr, &PGXP::CPU_ORI, Value::FromConstantU32(instruction.bits), lhs); 1268 1269 result = OrValues(lhs, rhs); 1270 if (spec_lhs && spec_rhs) 1271 spec_value = *spec_lhs | *spec_rhs; 1272 1273 if (g_settings.gpu_pgxp_enable && !g_settings.gpu_pgxp_cpu && dest != Reg::zero && 1274 instruction.i.rs != Reg::zero && dest != instruction.i.rs && rhs.HasConstantValue(0)) 1275 { 1276 EmitFunctionCall(nullptr, &PGXP::CPU_MOVE_Packed, 1277 Value::FromConstantU32(PGXP::PackMoveArgs(dest, instruction.i.rs)), lhs); 1278 } 1279 } 1280 break; 1281 1282 case InstructionOp::andi: 1283 { 1284 if (g_settings.UsingPGXPCPUMode()) 1285 EmitFunctionCall(nullptr, &PGXP::CPU_ANDI, Value::FromConstantU32(instruction.bits), lhs); 1286 1287 result = AndValues(lhs, rhs); 1288 if (spec_lhs && spec_rhs) 1289 spec_value = *spec_lhs & *spec_rhs; 1290 } 1291 break; 1292 1293 case InstructionOp::xori: 1294 { 1295 if (g_settings.UsingPGXPCPUMode()) 1296 EmitFunctionCall(nullptr, &PGXP::CPU_XORI, Value::FromConstantU32(instruction.bits), lhs); 1297 1298 result = XorValues(lhs, rhs); 1299 if (spec_lhs && spec_rhs) 1300 spec_value = *spec_lhs ^ *spec_rhs; 1301 1302 if (g_settings.gpu_pgxp_enable && !g_settings.gpu_pgxp_cpu && dest != Reg::zero && 1303 instruction.i.rs != Reg::zero && dest != instruction.i.rs && rhs.HasConstantValue(0)) 1304 { 1305 EmitFunctionCall(nullptr, &PGXP::CPU_MOVE_Packed, 1306 Value::FromConstantU32(PGXP::PackMoveArgs(dest, instruction.i.rs)), lhs); 1307 } 1308 } 1309 break; 1310 1311 case InstructionOp::funct: 1312 { 1313 switch (instruction.r.funct) 1314 { 1315 case InstructionFunct::or_: 1316 { 1317 if (g_settings.UsingPGXPCPUMode()) 1318 EmitFunctionCall(nullptr, &PGXP::CPU_OR_, Value::FromConstantU32(instruction.bits), lhs, rhs); 1319 1320 result = OrValues(lhs, rhs); 1321 if (spec_lhs && spec_rhs) 1322 spec_value = *spec_lhs | *spec_rhs; 1323 1324 if (g_settings.gpu_pgxp_enable && !g_settings.gpu_pgxp_cpu && dest != Reg::zero && 1325 ((lhs.HasConstantValue(0) && instruction.r.rt != Reg::zero && dest != instruction.r.rs) || 1326 (rhs.HasConstantValue(0) && instruction.r.rs != Reg::zero && dest != instruction.r.rt))) 1327 { 1328 const auto rs = lhs.HasConstantValue(0) ? static_cast<CPU::Reg>(instruction.r.rt) : 1329 static_cast<CPU::Reg>(instruction.r.rs); 1330 1331 EmitFunctionCall(nullptr, &PGXP::CPU_MOVE_Packed, Value::FromConstantU32(PGXP::PackMoveArgs(dest, rs)), 1332 lhs.HasConstantValue(0) ? rhs : lhs); 1333 } 1334 } 1335 break; 1336 1337 case InstructionFunct::and_: 1338 { 1339 if (g_settings.UsingPGXPCPUMode()) 1340 EmitFunctionCall(nullptr, &PGXP::CPU_AND_, Value::FromConstantU32(instruction.bits), lhs, rhs); 1341 1342 result = AndValues(lhs, rhs); 1343 if (spec_lhs && spec_rhs) 1344 spec_value = *spec_lhs & *spec_rhs; 1345 } 1346 break; 1347 1348 case InstructionFunct::xor_: 1349 { 1350 if (g_settings.UsingPGXPCPUMode()) 1351 EmitFunctionCall(nullptr, &PGXP::CPU_XOR_, Value::FromConstantU32(instruction.bits), lhs, rhs); 1352 1353 result = XorValues(lhs, rhs); 1354 if (spec_lhs && spec_rhs) 1355 spec_value = *spec_lhs ^ *spec_rhs; 1356 1357 if (g_settings.gpu_pgxp_enable && !g_settings.gpu_pgxp_cpu && dest != Reg::zero && 1358 ((lhs.HasConstantValue(0) && instruction.r.rt != Reg::zero && dest != instruction.r.rs) || 1359 (rhs.HasConstantValue(0) && instruction.r.rs != Reg::zero && dest != instruction.r.rt))) 1360 { 1361 const auto rs = lhs.HasConstantValue(0) ? static_cast<CPU::Reg>(instruction.r.rt) : 1362 static_cast<CPU::Reg>(instruction.r.rs); 1363 1364 EmitFunctionCall(nullptr, &PGXP::CPU_MOVE_Packed, Value::FromConstantU32(PGXP::PackMoveArgs(dest, rs)), 1365 lhs.HasConstantValue(0) ? rhs : lhs); 1366 } 1367 } 1368 break; 1369 1370 case InstructionFunct::nor: 1371 { 1372 if (g_settings.UsingPGXPCPUMode()) 1373 EmitFunctionCall(nullptr, &PGXP::CPU_NOR, Value::FromConstantU32(instruction.bits), lhs, rhs); 1374 1375 result = NotValue(OrValues(lhs, rhs)); 1376 if (spec_lhs && spec_rhs) 1377 spec_value = ~(*spec_lhs | *spec_rhs); 1378 } 1379 break; 1380 1381 default: 1382 UnreachableCode(); 1383 break; 1384 } 1385 } 1386 break; 1387 1388 default: 1389 UnreachableCode(); 1390 break; 1391 } 1392 1393 m_register_cache.WriteGuestRegister(dest, std::move(result)); 1394 SpeculativeWriteReg(dest, spec_value); 1395 1396 InstructionEpilogue(instruction, info); 1397 return true; 1398 } 1399 1400 bool CodeGenerator::Compile_Shift(Instruction instruction, const CodeCache::InstructionInfo& info) 1401 { 1402 InstructionPrologue(instruction, info, 1); 1403 1404 const InstructionFunct funct = instruction.r.funct; 1405 Value rt = m_register_cache.ReadGuestRegister(instruction.r.rt); 1406 SpeculativeValue rt_spec = SpeculativeReadReg(instruction.r.rt); 1407 Value shamt; 1408 SpeculativeValue shamt_spec; 1409 if (funct == InstructionFunct::sll || funct == InstructionFunct::srl || funct == InstructionFunct::sra) 1410 { 1411 // rd <- rt op shamt 1412 shamt = Value::FromConstantU32(instruction.r.shamt); 1413 shamt_spec = instruction.r.shamt; 1414 } 1415 else 1416 { 1417 // rd <- rt op (rs & 0x1F) 1418 shamt = m_register_cache.ReadGuestRegister(instruction.r.rs); 1419 shamt_spec = SpeculativeReadReg(instruction.r.rs); 1420 } 1421 1422 Value result; 1423 SpeculativeValue result_spec; 1424 switch (instruction.r.funct) 1425 { 1426 case InstructionFunct::sll: 1427 case InstructionFunct::sllv: 1428 { 1429 if (g_settings.UsingPGXPCPUMode()) 1430 { 1431 if (instruction.r.funct == InstructionFunct::sll) 1432 EmitFunctionCall(nullptr, &PGXP::CPU_SLL, Value::FromConstantU32(instruction.bits), rt); 1433 else // if (instruction.r.funct == InstructionFunct::sllv) 1434 EmitFunctionCall(nullptr, &PGXP::CPU_SLLV, Value::FromConstantU32(instruction.bits), rt, shamt); 1435 } 1436 1437 result = ShlValues(rt, shamt, false); 1438 if (rt_spec && shamt_spec) 1439 result_spec = *rt_spec << *shamt_spec; 1440 } 1441 break; 1442 1443 case InstructionFunct::srl: 1444 case InstructionFunct::srlv: 1445 { 1446 if (g_settings.UsingPGXPCPUMode()) 1447 { 1448 if (instruction.r.funct == InstructionFunct::srl) 1449 EmitFunctionCall(nullptr, &PGXP::CPU_SRL, Value::FromConstantU32(instruction.bits), rt); 1450 else // if (instruction.r.funct == InstructionFunct::srlv) 1451 EmitFunctionCall(nullptr, &PGXP::CPU_SRLV, Value::FromConstantU32(instruction.bits), rt, shamt); 1452 } 1453 1454 result = ShrValues(rt, shamt, false); 1455 if (rt_spec && shamt_spec) 1456 result_spec = *rt_spec >> *shamt_spec; 1457 } 1458 break; 1459 1460 case InstructionFunct::sra: 1461 case InstructionFunct::srav: 1462 { 1463 if (g_settings.UsingPGXPCPUMode()) 1464 { 1465 if (instruction.r.funct == InstructionFunct::sra) 1466 EmitFunctionCall(nullptr, &PGXP::CPU_SRA, Value::FromConstantU32(instruction.bits), rt); 1467 else // if (instruction.r.funct == InstructionFunct::srav) 1468 EmitFunctionCall(nullptr, &PGXP::CPU_SRAV, Value::FromConstantU32(instruction.bits), rt, shamt); 1469 } 1470 1471 result = SarValues(rt, shamt, false); 1472 if (rt_spec && shamt_spec) 1473 result_spec = static_cast<u32>(static_cast<s32>(*rt_spec) << *shamt_spec); 1474 } 1475 break; 1476 1477 default: 1478 UnreachableCode(); 1479 break; 1480 } 1481 1482 m_register_cache.WriteGuestRegister(instruction.r.rd, std::move(result)); 1483 SpeculativeWriteReg(instruction.r.rd, result_spec); 1484 1485 InstructionEpilogue(instruction, info); 1486 return true; 1487 } 1488 1489 bool CodeGenerator::Compile_Load(Instruction instruction, const CodeCache::InstructionInfo& info) 1490 { 1491 InstructionPrologue(instruction, info, 1); 1492 1493 // rt <- mem[rs + sext(imm)] 1494 Value base = m_register_cache.ReadGuestRegister(instruction.i.rs); 1495 Value offset = Value::FromConstantU32(instruction.i.imm_sext32()); 1496 Value address = AddValues(base, offset, false); 1497 1498 SpeculativeValue address_spec = SpeculativeReadReg(instruction.i.rs); 1499 SpeculativeValue value_spec; 1500 if (address_spec) 1501 address_spec = *address_spec + instruction.i.imm_sext32(); 1502 1503 Value result; 1504 switch (instruction.op) 1505 { 1506 case InstructionOp::lb: 1507 case InstructionOp::lbu: 1508 { 1509 result = EmitLoadGuestMemory(instruction, info, address, address_spec, RegSize_8); 1510 ConvertValueSizeInPlace(&result, RegSize_32, (instruction.op == InstructionOp::lb)); 1511 if (g_settings.gpu_pgxp_enable) 1512 EmitFunctionCall(nullptr, PGXP::CPU_LBx, Value::FromConstantU32(instruction.bits), address, result); 1513 1514 if (address_spec) 1515 { 1516 value_spec = SpeculativeReadMemory(*address_spec & ~3u); 1517 if (value_spec) 1518 value_spec = (*value_spec >> ((*address_spec & 3u) * 8u)) & 0xFFu; 1519 } 1520 } 1521 break; 1522 1523 case InstructionOp::lh: 1524 case InstructionOp::lhu: 1525 { 1526 result = EmitLoadGuestMemory(instruction, info, address, address_spec, RegSize_16); 1527 ConvertValueSizeInPlace(&result, RegSize_32, (instruction.op == InstructionOp::lh)); 1528 1529 if (g_settings.gpu_pgxp_enable) 1530 { 1531 EmitFunctionCall(nullptr, (instruction.op == InstructionOp::lhu) ? &PGXP::CPU_LHU : PGXP::CPU_LH, 1532 Value::FromConstantU32(instruction.bits), address, result); 1533 } 1534 1535 if (address_spec) 1536 { 1537 value_spec = SpeculativeReadMemory(*address_spec & ~3u); 1538 if (value_spec) 1539 value_spec = (*value_spec >> ((*address_spec & 3u) * 8u)) & 0xFFFFu; 1540 } 1541 } 1542 break; 1543 1544 case InstructionOp::lw: 1545 { 1546 result = EmitLoadGuestMemory(instruction, info, address, address_spec, RegSize_32); 1547 if (g_settings.gpu_pgxp_enable) 1548 EmitFunctionCall(nullptr, PGXP::CPU_LW, Value::FromConstantU32(instruction.bits), address, result); 1549 1550 if (address_spec) 1551 value_spec = SpeculativeReadMemory(*address_spec); 1552 } 1553 break; 1554 1555 default: 1556 UnreachableCode(); 1557 break; 1558 } 1559 1560 m_register_cache.WriteGuestRegisterDelayed(instruction.i.rt, std::move(result)); 1561 SpeculativeWriteReg(instruction.i.rt, value_spec); 1562 1563 InstructionEpilogue(instruction, info); 1564 return true; 1565 } 1566 1567 bool CodeGenerator::Compile_Store(Instruction instruction, const CodeCache::InstructionInfo& info) 1568 { 1569 InstructionPrologue(instruction, info, 1); 1570 1571 // mem[rs + sext(imm)] <- rt 1572 Value base = m_register_cache.ReadGuestRegister(instruction.i.rs); 1573 Value offset = Value::FromConstantU32(instruction.i.imm_sext32()); 1574 Value address = AddValues(base, offset, false); 1575 Value value = m_register_cache.ReadGuestRegister(instruction.i.rt); 1576 1577 SpeculativeValue address_spec = SpeculativeReadReg(instruction.i.rs); 1578 SpeculativeValue value_spec = SpeculativeReadReg(instruction.i.rt); 1579 if (address_spec) 1580 address_spec = *address_spec + instruction.i.imm_sext32(); 1581 1582 switch (instruction.op) 1583 { 1584 case InstructionOp::sb: 1585 { 1586 if (g_settings.gpu_pgxp_enable) 1587 EmitFunctionCall(nullptr, PGXP::CPU_SB, Value::FromConstantU32(instruction.bits), address, value); 1588 1589 EmitStoreGuestMemory(instruction, info, address, address_spec, RegSize_8, value); 1590 1591 if (address_spec) 1592 { 1593 const VirtualMemoryAddress aligned_addr = (*address_spec & ~3u); 1594 const SpeculativeValue aligned_existing_value = SpeculativeReadMemory(aligned_addr); 1595 if (aligned_existing_value) 1596 { 1597 if (value_spec) 1598 { 1599 const u32 shift = (aligned_addr & 3u) * 8u; 1600 SpeculativeWriteMemory(aligned_addr, 1601 (*aligned_existing_value & ~(0xFFu << shift)) | ((*value_spec & 0xFFu) << shift)); 1602 } 1603 else 1604 { 1605 SpeculativeWriteMemory(aligned_addr, std::nullopt); 1606 } 1607 } 1608 } 1609 } 1610 break; 1611 1612 case InstructionOp::sh: 1613 { 1614 if (g_settings.gpu_pgxp_enable) 1615 EmitFunctionCall(nullptr, PGXP::CPU_SH, Value::FromConstantU32(instruction.bits), address, value); 1616 1617 EmitStoreGuestMemory(instruction, info, address, address_spec, RegSize_16, value); 1618 1619 if (address_spec) 1620 { 1621 const VirtualMemoryAddress aligned_addr = (*address_spec & ~3u); 1622 const SpeculativeValue aligned_existing_value = SpeculativeReadMemory(aligned_addr); 1623 if (aligned_existing_value) 1624 { 1625 if (value_spec) 1626 { 1627 const u32 shift = (aligned_addr & 1u) * 16u; 1628 SpeculativeWriteMemory(aligned_addr, (*aligned_existing_value & ~(0xFFFFu << shift)) | 1629 ((*value_spec & 0xFFFFu) << shift)); 1630 } 1631 else 1632 { 1633 SpeculativeWriteMemory(aligned_addr, std::nullopt); 1634 } 1635 } 1636 } 1637 } 1638 break; 1639 1640 case InstructionOp::sw: 1641 { 1642 if (g_settings.gpu_pgxp_enable) 1643 EmitFunctionCall(nullptr, PGXP::CPU_SW, Value::FromConstantU32(instruction.bits), address, value); 1644 1645 EmitStoreGuestMemory(instruction, info, address, address_spec, RegSize_32, value); 1646 1647 if (address_spec) 1648 SpeculativeWriteMemory(*address_spec, value_spec); 1649 } 1650 break; 1651 1652 default: 1653 UnreachableCode(); 1654 break; 1655 } 1656 1657 InstructionEpilogue(instruction, info); 1658 1659 if (address_spec) 1660 { 1661 const CPU::Segment seg = GetSegmentForAddress(*address_spec); 1662 if (seg == Segment::KUSEG || seg == Segment::KSEG0 || seg == Segment::KSEG1) 1663 { 1664 const PhysicalMemoryAddress phys_addr = VirtualAddressToPhysical(*address_spec); 1665 const PhysicalMemoryAddress block_start = VirtualAddressToPhysical(m_block->pc); 1666 const PhysicalMemoryAddress block_end = 1667 VirtualAddressToPhysical(m_block->pc + (m_block->size * sizeof(Instruction))); 1668 if (phys_addr >= block_start && phys_addr < block_end) 1669 { 1670 WARNING_LOG("Instruction {:08X} speculatively writes to {:08X} inside block {:08X}-{:08X}. Truncating block.", 1671 info.pc, phys_addr, block_start, block_end); 1672 TruncateBlockAtCurrentInstruction(); 1673 } 1674 } 1675 } 1676 1677 return true; 1678 } 1679 1680 bool CodeGenerator::Compile_LoadLeftRight(Instruction instruction, const CodeCache::InstructionInfo& info) 1681 { 1682 InstructionPrologue(instruction, info, 1); 1683 1684 Value base = m_register_cache.ReadGuestRegister(instruction.i.rs); 1685 Value offset = Value::FromConstantU32(instruction.i.imm_sext32()); 1686 Value address = AddValues(base, offset, false); 1687 base.ReleaseAndClear(); 1688 1689 SpeculativeValue address_spec = SpeculativeReadReg(instruction.i.rs); 1690 if (address_spec) 1691 address_spec = *address_spec + instruction.i.imm_sext32(); 1692 1693 Value shift = ShlValues(AndValues(address, Value::FromConstantU32(3)), Value::FromConstantU32(3)); // * 8 1694 address = AndValues(address, Value::FromConstantU32(~u32(3))); 1695 1696 // hack to bypass load delays 1697 Value value; 1698 if (instruction.i.rt == m_register_cache.GetLoadDelayRegister()) 1699 { 1700 const Value& ld_value = m_register_cache.GetLoadDelayValue(); 1701 if (ld_value.IsInHostRegister()) 1702 value.SetHostReg(&m_register_cache, ld_value.GetHostRegister(), ld_value.size); 1703 else 1704 value = ld_value; 1705 } 1706 else 1707 { 1708 // if this is the first instruction in the block, we need to stall until the load finishes 1709 // we don't actually care if it's our target reg or not, if it's not, it won't affect anything 1710 if (m_load_delay_dirty) 1711 { 1712 DEV_LOG("Flushing interpreter load delay for lwl/lwr instruction at 0x{:08X}", info.pc); 1713 EmitFlushInterpreterLoadDelay(); 1714 m_register_cache.InvalidateGuestRegister(instruction.r.rt); 1715 m_load_delay_dirty = false; 1716 } 1717 1718 value = m_register_cache.ReadGuestRegister(instruction.i.rt, true, true); 1719 } 1720 1721 Value mem; 1722 if (instruction.op == InstructionOp::lwl) 1723 { 1724 Value lhs = ShrValues(Value::FromConstantU32(0x00FFFFFF), shift); 1725 AndValueInPlace(lhs, value); 1726 shift = SubValues(Value::FromConstantU32(24), shift, false); 1727 value.ReleaseAndClear(); 1728 1729 mem = EmitLoadGuestMemory(instruction, info, address, address_spec, RegSize_32); 1730 EmitShl(mem.GetHostRegister(), mem.GetHostRegister(), RegSize_32, shift); 1731 EmitOr(mem.GetHostRegister(), mem.GetHostRegister(), lhs); 1732 } 1733 else 1734 { 1735 Value lhs = ShlValues(Value::FromConstantU32(0xFFFFFF00), SubValues(Value::FromConstantU32(24), shift, false)); 1736 AndValueInPlace(lhs, value); 1737 value.ReleaseAndClear(); 1738 1739 mem = EmitLoadGuestMemory(instruction, info, address, address_spec, RegSize_32); 1740 EmitShr(mem.GetHostRegister(), mem.GetHostRegister(), RegSize_32, shift); 1741 EmitOr(mem.GetHostRegister(), mem.GetHostRegister(), lhs); 1742 } 1743 1744 shift.ReleaseAndClear(); 1745 1746 if (g_settings.gpu_pgxp_enable) 1747 EmitFunctionCall(nullptr, PGXP::CPU_LW, Value::FromConstantU32(instruction.bits), address, mem); 1748 1749 m_register_cache.WriteGuestRegisterDelayed(instruction.i.rt, std::move(mem)); 1750 1751 // TODO: Speculative values 1752 SpeculativeWriteReg(instruction.r.rt, std::nullopt); 1753 1754 InstructionEpilogue(instruction, info); 1755 return true; 1756 } 1757 1758 bool CodeGenerator::Compile_StoreLeftRight(Instruction instruction, const CodeCache::InstructionInfo& info) 1759 { 1760 InstructionPrologue(instruction, info, 1); 1761 1762 Value base = m_register_cache.ReadGuestRegister(instruction.i.rs); 1763 Value offset = Value::FromConstantU32(instruction.i.imm_sext32()); 1764 Value address = AddValues(base, offset, false); 1765 base.ReleaseAndClear(); 1766 1767 // TODO: Speculative values 1768 SpeculativeValue address_spec = SpeculativeReadReg(instruction.i.rs); 1769 if (address_spec) 1770 { 1771 address_spec = *address_spec + instruction.i.imm_sext32(); 1772 SpeculativeWriteMemory(*address_spec & ~3u, std::nullopt); 1773 } 1774 1775 Value shift = ShlValues(AndValues(address, Value::FromConstantU32(3)), Value::FromConstantU32(3)); // * 8 1776 address = AndValues(address, Value::FromConstantU32(~u32(3))); 1777 1778 Value mem; 1779 if (instruction.op == InstructionOp::swl) 1780 { 1781 Value mask = ShlValues(Value::FromConstantU32(0xFFFFFF00), shift); 1782 mem = EmitLoadGuestMemory(instruction, info, address, address_spec, RegSize_32); 1783 EmitAnd(mem.GetHostRegister(), mem.GetHostRegister(), mask); 1784 mask.ReleaseAndClear(); 1785 1786 Value reg = m_register_cache.ReadGuestRegister(instruction.r.rt); 1787 Value lhs = ShrValues(reg, SubValues(Value::FromConstantU32(24), shift, false)); 1788 reg.ReleaseAndClear(); 1789 1790 EmitOr(mem.GetHostRegister(), mem.GetHostRegister(), lhs); 1791 } 1792 else 1793 { 1794 Value mask = ShrValues(Value::FromConstantU32(0x00FFFFFF), SubValues(Value::FromConstantU32(24), shift, false)); 1795 mem = EmitLoadGuestMemory(instruction, info, address, address_spec, RegSize_32); 1796 AndValueInPlace(mem, mask); 1797 mask.ReleaseAndClear(); 1798 1799 Value reg = m_register_cache.ReadGuestRegister(instruction.r.rt); 1800 Value lhs = ShlValues(reg, shift); 1801 reg.ReleaseAndClear(); 1802 1803 EmitOr(mem.GetHostRegister(), mem.GetHostRegister(), lhs); 1804 } 1805 1806 shift.ReleaseAndClear(); 1807 1808 EmitStoreGuestMemory(instruction, info, address, address_spec, RegSize_32, mem); 1809 if (g_settings.gpu_pgxp_enable) 1810 EmitFunctionCall(nullptr, PGXP::CPU_SW, Value::FromConstantU32(instruction.bits), address, mem); 1811 1812 InstructionEpilogue(instruction, info); 1813 return true; 1814 } 1815 1816 bool CodeGenerator::Compile_MoveHiLo(Instruction instruction, const CodeCache::InstructionInfo& info) 1817 { 1818 InstructionPrologue(instruction, info, 1); 1819 1820 switch (instruction.r.funct) 1821 { 1822 case InstructionFunct::mfhi: 1823 { 1824 Value hi = m_register_cache.ReadGuestRegister(Reg::hi); 1825 if (g_settings.UsingPGXPCPUMode()) 1826 { 1827 EmitFunctionCall(nullptr, &PGXP::CPU_MOVE_Packed, 1828 Value::FromConstantU32(PGXP::PackMoveArgs(instruction.r.rd, Reg::hi)), hi); 1829 } 1830 1831 m_register_cache.WriteGuestRegister(instruction.r.rd, std::move(hi)); 1832 SpeculativeWriteReg(instruction.r.rd, std::nullopt); 1833 } 1834 break; 1835 1836 case InstructionFunct::mthi: 1837 { 1838 Value rs = m_register_cache.ReadGuestRegister(instruction.r.rs); 1839 if (g_settings.UsingPGXPCPUMode()) 1840 { 1841 EmitFunctionCall(nullptr, &PGXP::CPU_MOVE_Packed, 1842 Value::FromConstantU32(PGXP::PackMoveArgs(Reg::hi, instruction.r.rs)), rs); 1843 } 1844 1845 m_register_cache.WriteGuestRegister(Reg::hi, std::move(rs)); 1846 } 1847 break; 1848 1849 case InstructionFunct::mflo: 1850 { 1851 Value lo = m_register_cache.ReadGuestRegister(Reg::lo); 1852 if (g_settings.UsingPGXPCPUMode()) 1853 { 1854 EmitFunctionCall(nullptr, &PGXP::CPU_MOVE_Packed, 1855 Value::FromConstantU32(PGXP::PackMoveArgs(instruction.r.rd, Reg::lo)), lo); 1856 } 1857 1858 m_register_cache.WriteGuestRegister(instruction.r.rd, std::move(lo)); 1859 SpeculativeWriteReg(instruction.r.rd, std::nullopt); 1860 } 1861 break; 1862 1863 case InstructionFunct::mtlo: 1864 { 1865 Value rs = m_register_cache.ReadGuestRegister(instruction.r.rs); 1866 if (g_settings.UsingPGXPCPUMode()) 1867 { 1868 EmitFunctionCall(nullptr, &PGXP::CPU_MOVE_Packed, 1869 Value::FromConstantU32(PGXP::PackMoveArgs(Reg::lo, instruction.r.rs)), rs); 1870 } 1871 1872 m_register_cache.WriteGuestRegister(Reg::lo, std::move(rs)); 1873 } 1874 break; 1875 1876 default: 1877 UnreachableCode(); 1878 break; 1879 } 1880 1881 InstructionEpilogue(instruction, info); 1882 return true; 1883 } 1884 1885 bool CodeGenerator::Compile_Add(Instruction instruction, const CodeCache::InstructionInfo& info) 1886 { 1887 InstructionPrologue(instruction, info, 1); 1888 1889 const bool check_overflow = (instruction.op == InstructionOp::addi || (instruction.op == InstructionOp::funct && 1890 instruction.r.funct == InstructionFunct::add)); 1891 1892 Value lhs, rhs; 1893 SpeculativeValue lhs_spec, rhs_spec; 1894 Reg dest; 1895 1896 switch (instruction.op) 1897 { 1898 case InstructionOp::addi: 1899 case InstructionOp::addiu: 1900 { 1901 // rt <- rs + sext(imm) 1902 dest = instruction.i.rt; 1903 lhs = m_register_cache.ReadGuestRegister(instruction.i.rs); 1904 rhs = Value::FromConstantU32(instruction.i.imm_sext32()); 1905 1906 lhs_spec = SpeculativeReadReg(instruction.i.rs); 1907 rhs_spec = instruction.i.imm_sext32(); 1908 } 1909 break; 1910 1911 case InstructionOp::funct: 1912 { 1913 Assert(instruction.r.funct == InstructionFunct::add || instruction.r.funct == InstructionFunct::addu); 1914 dest = instruction.r.rd; 1915 lhs = m_register_cache.ReadGuestRegister(instruction.r.rs); 1916 rhs = m_register_cache.ReadGuestRegister(instruction.r.rt); 1917 lhs_spec = SpeculativeReadReg(instruction.r.rs); 1918 rhs_spec = SpeculativeReadReg(instruction.r.rt); 1919 } 1920 break; 1921 1922 default: 1923 UnreachableCode(); 1924 return false; 1925 } 1926 1927 // detect register moves and handle them for pgxp 1928 if (dest != Reg::zero && g_settings.gpu_pgxp_enable) 1929 { 1930 bool handled = false; 1931 if (instruction.op != InstructionOp::funct) 1932 { 1933 if (g_settings.gpu_pgxp_enable && !g_settings.gpu_pgxp_cpu && instruction.i.rs != Reg::zero && 1934 dest != instruction.i.rs && rhs.HasConstantValue(0)) 1935 { 1936 handled = true; 1937 EmitFunctionCall(nullptr, &PGXP::CPU_MOVE_Packed, 1938 Value::FromConstantU32(PGXP::PackMoveArgs(dest, instruction.i.rs)), lhs); 1939 } 1940 } 1941 else 1942 { 1943 if (g_settings.gpu_pgxp_enable && !g_settings.gpu_pgxp_cpu && 1944 ((lhs.HasConstantValue(0) && instruction.r.rt != Reg::zero && dest != instruction.r.rs) || 1945 (rhs.HasConstantValue(0) && instruction.r.rs != Reg::zero && dest != instruction.r.rt))) 1946 { 1947 handled = true; 1948 EmitFunctionCall(nullptr, &PGXP::CPU_MOVE_Packed, 1949 Value::FromConstantU32(PGXP::PackMoveArgs(dest, instruction.i.rs)), lhs); 1950 } 1951 } 1952 1953 if (g_settings.gpu_pgxp_cpu && !handled) 1954 { 1955 if (instruction.op != InstructionOp::funct) 1956 EmitFunctionCall(nullptr, &PGXP::CPU_ADDI, Value::FromConstantU32(instruction.bits), lhs); 1957 else 1958 EmitFunctionCall(nullptr, &PGXP::CPU_ADD, Value::FromConstantU32(instruction.bits), lhs, rhs); 1959 } 1960 } 1961 1962 Value result = AddValues(lhs, rhs, check_overflow); 1963 if (check_overflow) 1964 GenerateExceptionExit(instruction, info, Exception::Ov, Condition::Overflow); 1965 1966 m_register_cache.WriteGuestRegister(dest, std::move(result)); 1967 1968 SpeculativeValue value_spec; 1969 if (lhs_spec && rhs_spec) 1970 value_spec = *lhs_spec + *rhs_spec; 1971 SpeculativeWriteReg(dest, value_spec); 1972 1973 InstructionEpilogue(instruction, info); 1974 return true; 1975 } 1976 1977 bool CodeGenerator::Compile_Subtract(Instruction instruction, const CodeCache::InstructionInfo& info) 1978 { 1979 InstructionPrologue(instruction, info, 1); 1980 1981 Assert(instruction.op == InstructionOp::funct); 1982 const bool check_overflow = (instruction.r.funct == InstructionFunct::sub); 1983 1984 Value lhs = m_register_cache.ReadGuestRegister(instruction.r.rs); 1985 Value rhs = m_register_cache.ReadGuestRegister(instruction.r.rt); 1986 1987 SpeculativeValue lhs_spec = SpeculativeReadReg(instruction.r.rs); 1988 SpeculativeValue rhs_spec = SpeculativeReadReg(instruction.r.rt); 1989 1990 if (g_settings.UsingPGXPCPUMode()) 1991 EmitFunctionCall(nullptr, &PGXP::CPU_SUB, Value::FromConstantU32(instruction.bits), lhs, rhs); 1992 1993 Value result = SubValues(lhs, rhs, check_overflow); 1994 if (check_overflow) 1995 GenerateExceptionExit(instruction, info, Exception::Ov, Condition::Overflow); 1996 1997 m_register_cache.WriteGuestRegister(instruction.r.rd, std::move(result)); 1998 1999 SpeculativeValue value_spec; 2000 if (lhs_spec && rhs_spec) 2001 value_spec = *lhs_spec - *rhs_spec; 2002 SpeculativeWriteReg(instruction.r.rd, value_spec); 2003 2004 InstructionEpilogue(instruction, info); 2005 return true; 2006 } 2007 2008 bool CodeGenerator::Compile_Multiply(Instruction instruction, const CodeCache::InstructionInfo& info) 2009 { 2010 InstructionPrologue(instruction, info, 1); 2011 2012 const bool signed_multiply = (instruction.r.funct == InstructionFunct::mult); 2013 Value rs = m_register_cache.ReadGuestRegister(instruction.r.rs); 2014 Value rt = m_register_cache.ReadGuestRegister(instruction.r.rt); 2015 if (g_settings.UsingPGXPCPUMode()) 2016 { 2017 EmitFunctionCall(nullptr, signed_multiply ? &PGXP::CPU_MULT : &PGXP::CPU_MULTU, 2018 Value::FromConstantU32(instruction.bits), rs, rt); 2019 } 2020 2021 std::pair<Value, Value> result = MulValues(rs, rt, signed_multiply); 2022 rs.ReleaseAndClear(); 2023 rt.ReleaseAndClear(); 2024 m_register_cache.WriteGuestRegister(Reg::hi, std::move(result.first)); 2025 m_register_cache.WriteGuestRegister(Reg::lo, std::move(result.second)); 2026 2027 InstructionEpilogue(instruction, info); 2028 return true; 2029 } 2030 2031 static std::tuple<u32, u32> MIPSDivide(u32 num, u32 denom) 2032 { 2033 u32 lo, hi; 2034 2035 if (denom == 0) 2036 { 2037 // divide by zero 2038 lo = UINT32_C(0xFFFFFFFF); 2039 hi = static_cast<u32>(num); 2040 } 2041 else 2042 { 2043 lo = num / denom; 2044 hi = num % denom; 2045 } 2046 2047 return std::tie(lo, hi); 2048 } 2049 2050 static std::tuple<s32, s32> MIPSDivide(s32 num, s32 denom) 2051 { 2052 s32 lo, hi; 2053 if (denom == 0) 2054 { 2055 // divide by zero 2056 lo = (num >= 0) ? UINT32_C(0xFFFFFFFF) : UINT32_C(1); 2057 hi = static_cast<u32>(num); 2058 } 2059 else if (static_cast<u32>(num) == UINT32_C(0x80000000) && denom == -1) 2060 { 2061 // unrepresentable 2062 lo = UINT32_C(0x80000000); 2063 hi = 0; 2064 } 2065 else 2066 { 2067 lo = num / denom; 2068 hi = num % denom; 2069 } 2070 2071 return std::tie(lo, hi); 2072 } 2073 2074 bool CodeGenerator::Compile_Divide(Instruction instruction, const CodeCache::InstructionInfo& info) 2075 { 2076 InstructionPrologue(instruction, info, 1); 2077 2078 Value num = m_register_cache.ReadGuestRegister(instruction.r.rs); 2079 Value denom = m_register_cache.ReadGuestRegister(instruction.r.rt); 2080 2081 if (g_settings.UsingPGXPCPUMode()) 2082 EmitFunctionCall(nullptr, &PGXP::CPU_DIV, Value::FromConstantU32(instruction.bits), num, denom); 2083 2084 if (num.IsConstant() && denom.IsConstant()) 2085 { 2086 const auto [lo, hi] = MIPSDivide(static_cast<u32>(num.constant_value), static_cast<u32>(denom.constant_value)); 2087 m_register_cache.WriteGuestRegister(Reg::lo, Value::FromConstantU32(lo)); 2088 m_register_cache.WriteGuestRegister(Reg::hi, Value::FromConstantU32(hi)); 2089 } 2090 else 2091 { 2092 Value num_reg = GetValueInHostRegister(num, false); 2093 Value denom_reg = GetValueInHostRegister(denom, false); 2094 2095 m_register_cache.InvalidateGuestRegister(Reg::lo); 2096 m_register_cache.InvalidateGuestRegister(Reg::hi); 2097 2098 Value lo = m_register_cache.AllocateScratch(RegSize_32); 2099 Value hi = m_register_cache.AllocateScratch(RegSize_32); 2100 m_register_cache.InhibitAllocation(); 2101 2102 LabelType do_divide, done; 2103 2104 if (!denom.IsConstant() || denom.HasConstantValue(0)) 2105 { 2106 // if (denom == 0) 2107 EmitConditionalBranch(Condition::NotEqual, false, denom_reg.GetHostRegister(), Value::FromConstantU32(0), 2108 &do_divide); 2109 { 2110 // unrepresentable 2111 EmitCopyValue(lo.GetHostRegister(), Value::FromConstantU32(0xFFFFFFFF)); 2112 EmitCopyValue(hi.GetHostRegister(), num_reg); 2113 EmitBranch(&done); 2114 } 2115 } 2116 2117 // else 2118 { 2119 EmitBindLabel(&do_divide); 2120 EmitDiv(lo.GetHostRegister(), hi.GetHostRegister(), num_reg.GetHostRegister(), denom_reg.GetHostRegister(), 2121 RegSize_32, false); 2122 } 2123 2124 EmitBindLabel(&done); 2125 2126 m_register_cache.UninhibitAllocation(); 2127 m_register_cache.WriteGuestRegister(Reg::lo, std::move(lo)); 2128 m_register_cache.WriteGuestRegister(Reg::hi, std::move(hi)); 2129 } 2130 2131 InstructionEpilogue(instruction, info); 2132 return true; 2133 } 2134 2135 bool CodeGenerator::Compile_SignedDivide(Instruction instruction, const CodeCache::InstructionInfo& info) 2136 { 2137 InstructionPrologue(instruction, info, 1); 2138 2139 Value num = m_register_cache.ReadGuestRegister(instruction.r.rs); 2140 Value denom = m_register_cache.ReadGuestRegister(instruction.r.rt); 2141 2142 if (g_settings.UsingPGXPCPUMode()) 2143 EmitFunctionCall(nullptr, &PGXP::CPU_DIV, Value::FromConstantU32(instruction.bits), num, denom); 2144 2145 if (num.IsConstant() && denom.IsConstant()) 2146 { 2147 const auto [lo, hi] = MIPSDivide(num.GetS32ConstantValue(), denom.GetS32ConstantValue()); 2148 m_register_cache.WriteGuestRegister(Reg::lo, Value::FromConstantU32(static_cast<u32>(lo))); 2149 m_register_cache.WriteGuestRegister(Reg::hi, Value::FromConstantU32(static_cast<u32>(hi))); 2150 } 2151 else 2152 { 2153 Value num_reg = GetValueInHostRegister(num, false); 2154 Value denom_reg = GetValueInHostRegister(denom, false); 2155 2156 m_register_cache.InvalidateGuestRegister(Reg::lo); 2157 m_register_cache.InvalidateGuestRegister(Reg::hi); 2158 2159 Value lo = m_register_cache.AllocateScratch(RegSize_32); 2160 Value hi = m_register_cache.AllocateScratch(RegSize_32); 2161 m_register_cache.InhibitAllocation(); 2162 2163 // we need this in a register on ARM because it won't fit in an immediate 2164 EmitCopyValue(lo.GetHostRegister(), Value::FromConstantU32(0x80000000u)); 2165 2166 LabelType do_divide, done; 2167 2168 LabelType not_zero; 2169 if (!denom.IsConstant() || denom.HasConstantValue(0)) 2170 { 2171 // if (denom == 0) 2172 EmitConditionalBranch(Condition::NotEqual, false, denom_reg.GetHostRegister(), Value::FromConstantU32(0), 2173 ¬_zero); 2174 { 2175 // hi = static_cast<u32>(num); 2176 EmitCopyValue(hi.GetHostRegister(), num_reg); 2177 2178 // lo = (num >= 0) ? UINT32_C(0xFFFFFFFF) : UINT32_C(1); 2179 LabelType greater_equal_zero; 2180 EmitConditionalBranch(Condition::GreaterEqual, false, num_reg.GetHostRegister(), Value::FromConstantU32(0), 2181 &greater_equal_zero); 2182 EmitCopyValue(lo.GetHostRegister(), Value::FromConstantU32(1)); 2183 EmitBranch(&done); 2184 EmitBindLabel(&greater_equal_zero); 2185 EmitCopyValue(lo.GetHostRegister(), Value::FromConstantU32(0xFFFFFFFFu)); 2186 EmitBranch(&done); 2187 } 2188 } 2189 2190 // else if (static_cast<u32>(num) == UINT32_C(0x80000000) && denom == -1) 2191 { 2192 EmitBindLabel(¬_zero); 2193 EmitConditionalBranch(Condition::NotEqual, false, denom_reg.GetHostRegister(), Value::FromConstantS32(-1), 2194 &do_divide); 2195 EmitConditionalBranch(Condition::NotEqual, false, num_reg.GetHostRegister(), lo, &do_divide); 2196 2197 // unrepresentable 2198 // EmitCopyValue(lo.GetHostRegister(), Value::FromConstantU32(0x80000000u)); // done above 2199 EmitCopyValue(hi.GetHostRegister(), Value::FromConstantU32(0)); 2200 EmitBranch(&done); 2201 } 2202 2203 // else 2204 { 2205 EmitBindLabel(&do_divide); 2206 EmitDiv(lo.GetHostRegister(), hi.GetHostRegister(), num_reg.GetHostRegister(), denom_reg.GetHostRegister(), 2207 RegSize_32, true); 2208 } 2209 2210 EmitBindLabel(&done); 2211 2212 m_register_cache.UninhibitAllocation(); 2213 m_register_cache.WriteGuestRegister(Reg::lo, std::move(lo)); 2214 m_register_cache.WriteGuestRegister(Reg::hi, std::move(hi)); 2215 } 2216 2217 InstructionEpilogue(instruction, info); 2218 return true; 2219 } 2220 2221 bool CodeGenerator::Compile_SetLess(Instruction instruction, const CodeCache::InstructionInfo& info) 2222 { 2223 InstructionPrologue(instruction, info, 1); 2224 2225 const bool signed_comparison = 2226 (instruction.op == InstructionOp::slti || 2227 (instruction.op == InstructionOp::funct && instruction.r.funct == InstructionFunct::slt)); 2228 2229 Reg dest; 2230 Value lhs, rhs; 2231 SpeculativeValue lhs_spec, rhs_spec; 2232 if (instruction.op == InstructionOp::slti || instruction.op == InstructionOp::sltiu) 2233 { 2234 // rt <- rs < {z,s}ext(imm) 2235 dest = instruction.i.rt; 2236 lhs = m_register_cache.ReadGuestRegister(instruction.i.rs, true, true); 2237 rhs = Value::FromConstantU32(instruction.i.imm_sext32()); 2238 lhs_spec = SpeculativeReadReg(instruction.i.rs); 2239 rhs_spec = instruction.i.imm_sext32(); 2240 2241 // flush the old value which might free up a register 2242 if (dest != instruction.r.rs) 2243 m_register_cache.InvalidateGuestRegister(dest); 2244 } 2245 else 2246 { 2247 // rd <- rs < rt 2248 dest = instruction.r.rd; 2249 lhs = m_register_cache.ReadGuestRegister(instruction.r.rs, true, true); 2250 rhs = m_register_cache.ReadGuestRegister(instruction.r.rt); 2251 lhs_spec = SpeculativeReadReg(instruction.r.rs); 2252 rhs_spec = SpeculativeReadReg(instruction.r.rt); 2253 2254 // flush the old value which might free up a register 2255 if (dest != instruction.i.rs && dest != instruction.r.rt) 2256 m_register_cache.InvalidateGuestRegister(dest); 2257 } 2258 2259 if (g_settings.UsingPGXPCPUMode()) 2260 { 2261 if (instruction.op == InstructionOp::slti) 2262 EmitFunctionCall(nullptr, &PGXP::CPU_SLTI, Value::FromConstantU32(instruction.bits), lhs); 2263 else if (instruction.op == InstructionOp::sltiu) 2264 EmitFunctionCall(nullptr, &PGXP::CPU_SLTIU, Value::FromConstantU32(instruction.bits), lhs); 2265 else if (instruction.r.funct == InstructionFunct::slt) 2266 EmitFunctionCall(nullptr, &PGXP::CPU_SLT, Value::FromConstantU32(instruction.bits), lhs, rhs); 2267 else // if (instruction.r.funct == InstructionFunct::sltu) 2268 EmitFunctionCall(nullptr, &PGXP::CPU_SLTU, Value::FromConstantU32(instruction.bits), lhs, rhs); 2269 } 2270 2271 Value result = m_register_cache.AllocateScratch(RegSize_32); 2272 EmitCmp(lhs.host_reg, rhs); 2273 EmitSetConditionResult(result.host_reg, result.size, signed_comparison ? Condition::Less : Condition::Below); 2274 2275 m_register_cache.WriteGuestRegister(dest, std::move(result)); 2276 2277 SpeculativeValue value_spec; 2278 if (lhs_spec && rhs_spec) 2279 { 2280 value_spec = BoolToUInt32(signed_comparison ? (static_cast<s32>(*lhs_spec) < static_cast<s32>(*rhs_spec)) : 2281 (*lhs_spec < *rhs_spec)); 2282 } 2283 SpeculativeWriteReg(instruction.r.rd, value_spec); 2284 2285 InstructionEpilogue(instruction, info); 2286 return true; 2287 } 2288 2289 bool CodeGenerator::Compile_Branch(Instruction instruction, const CodeCache::InstructionInfo& info) 2290 { 2291 InstructionPrologue(instruction, info, 1); 2292 2293 auto DoBranch = [this, &instruction, &info](Condition condition, const Value& lhs, const Value& rhs, Reg lr_reg, 2294 Value&& branch_target) { 2295 const bool can_link_block = info.is_direct_branch_instruction && g_settings.cpu_recompiler_block_linking; 2296 2297 // ensure the lr register is flushed, since we want it's correct value after the branch 2298 // we don't want to invalidate it yet because of "jalr r0, r0", branch_target could be the lr_reg. 2299 if (lr_reg != Reg::count && lr_reg != Reg::zero) 2300 m_register_cache.FlushGuestRegister(lr_reg, false, true); 2301 2302 // compute return address, which is also set as the new pc when the branch isn't taken 2303 Value constant_next_pc = CalculatePC(4); 2304 Value next_pc = constant_next_pc; 2305 DebugAssert(constant_next_pc.IsConstant()); 2306 if (condition != Condition::Always) 2307 { 2308 next_pc = m_register_cache.AllocateScratch(RegSize_32); 2309 EmitCopyValue(next_pc.GetHostRegister(), constant_next_pc); 2310 } 2311 2312 Value take_branch; 2313 LabelType branch_taken, branch_not_taken; 2314 if (condition != Condition::Always) 2315 { 2316 if (!can_link_block) 2317 { 2318 // condition is inverted because we want the case for skipping it 2319 if (lhs.IsValid() && rhs.IsValid()) 2320 EmitConditionalBranch(condition, true, lhs.host_reg, rhs, &branch_not_taken); 2321 else if (lhs.IsValid()) 2322 EmitConditionalBranch(condition, true, lhs.host_reg, lhs.size, &branch_not_taken); 2323 else 2324 EmitConditionalBranch(condition, true, &branch_not_taken); 2325 } 2326 else 2327 { 2328 take_branch = m_register_cache.AllocateScratch(RegSize_32); 2329 switch (condition) 2330 { 2331 case Condition::NotEqual: 2332 case Condition::Equal: 2333 case Condition::Overflow: 2334 case Condition::Greater: 2335 case Condition::GreaterEqual: 2336 case Condition::LessEqual: 2337 case Condition::Less: 2338 case Condition::Above: 2339 case Condition::AboveEqual: 2340 case Condition::Below: 2341 case Condition::BelowEqual: 2342 { 2343 EmitCmp(lhs.GetHostRegister(), rhs); 2344 EmitSetConditionResult(take_branch.GetHostRegister(), take_branch.size, condition); 2345 } 2346 break; 2347 2348 case Condition::Negative: 2349 case Condition::PositiveOrZero: 2350 case Condition::NotZero: 2351 case Condition::Zero: 2352 { 2353 Assert(!rhs.IsValid() || (rhs.IsConstant() && rhs.GetS64ConstantValue() == 0)); 2354 EmitTest(lhs.GetHostRegister(), lhs); 2355 EmitSetConditionResult(take_branch.GetHostRegister(), take_branch.size, condition); 2356 } 2357 break; 2358 2359 default: 2360 UnreachableCode(); 2361 break; 2362 } 2363 } 2364 } 2365 2366 // save the old PC if we want to 2367 if (lr_reg != Reg::count && lr_reg != Reg::zero) 2368 { 2369 // Can't cache because we have two branches. Load delay cancel is due to the immediate flush afterwards, 2370 // if we don't cancel it, at the end of the instruction the value we write can be overridden. 2371 EmitCancelInterpreterLoadDelayForReg(lr_reg); 2372 EmitStoreGuestRegister(lr_reg, next_pc); 2373 2374 // now invalidate lr because it was possibly written in the branch 2375 m_register_cache.InvalidateGuestRegister(lr_reg); 2376 if (m_register_cache.GetLoadDelayRegister() == lr_reg) 2377 m_register_cache.CancelLoadDelay(); 2378 } 2379 2380 // we don't need to test the address of constant branches unless they're definitely misaligned, which would be 2381 // strange. 2382 if (g_settings.cpu_recompiler_memory_exceptions && 2383 (!branch_target.IsConstant() || (branch_target.constant_value & 0x3) != 0)) 2384 { 2385 LabelType branch_okay; 2386 2387 if (branch_target.IsConstant()) 2388 { 2389 WARNING_LOG("Misaligned constant target branch 0x{:08X}, this is strange", 2390 Truncate32(branch_target.constant_value)); 2391 } 2392 else 2393 { 2394 // check the alignment of the target 2395 EmitTest(branch_target.host_reg, Value::FromConstantU32(0x3)); 2396 EmitConditionalBranch(Condition::Zero, false, &branch_okay); 2397 } 2398 2399 // exception exit for misaligned target 2400 m_register_cache.PushState(); 2401 EmitBranch(GetCurrentFarCodePointer()); 2402 EmitBindLabel(&branch_okay); 2403 2404 SwitchToFarCode(); 2405 EmitStoreCPUStructField(OFFSETOF(State, cop0_regs.BadVaddr), branch_target); 2406 EmitFunctionCall( 2407 nullptr, static_cast<void (*)(u32, u32)>(&CPU::RaiseException), 2408 Value::FromConstantU32(Cop0Registers::CAUSE::MakeValueForException(Exception::AdEL, false, false, 0)), 2409 branch_target); 2410 EmitExceptionExit(); 2411 SwitchToNearCode(); 2412 2413 m_register_cache.PopState(); 2414 } 2415 2416 if (can_link_block) 2417 { 2418 // if it's an in-block branch, compile the delay slot now 2419 // TODO: Make this more optimal by moving the condition down if it's a nop 2420 Assert((m_current_instruction.instruction + 1) != m_block_end.instruction); 2421 InstructionEpilogue(instruction, info); 2422 m_current_instruction.instruction++; 2423 m_current_instruction.info++; 2424 if (!CompileInstruction(*m_current_instruction.instruction, *m_current_instruction.info)) 2425 return false; 2426 2427 // flush all regs since we're at the end of the block now 2428 BlockEpilogue(); 2429 m_block_linked = true; 2430 2431 // check downcount 2432 Value pending_ticks = m_register_cache.AllocateScratch(RegSize_32); 2433 Value downcount = m_register_cache.AllocateScratch(RegSize_32); 2434 EmitLoadCPUStructField(pending_ticks.GetHostRegister(), RegSize_32, OFFSETOF(State, pending_ticks)); 2435 EmitLoadCPUStructField(downcount.GetHostRegister(), RegSize_32, OFFSETOF(State, downcount)); 2436 2437 // pending < downcount 2438 LabelType return_to_dispatcher; 2439 2440 if (condition != Condition::Always) 2441 { 2442 EmitBranchIfBitClear(take_branch.GetHostRegister(), take_branch.size, 0, &branch_not_taken); 2443 m_register_cache.PushState(); 2444 { 2445 WriteNewPC(branch_target, false); 2446 EmitConditionalBranch(Condition::GreaterEqual, false, pending_ticks.GetHostRegister(), downcount, 2447 &return_to_dispatcher); 2448 2449 // we're committed at this point :D 2450 EmitEndBlock(true, nullptr); 2451 2452 DebugAssert(branch_target.IsConstant()); 2453 if (static_cast<u32>(branch_target.constant_value) == m_block->pc) 2454 { 2455 // self-link 2456 EmitBranch(GetStartNearCodePointer()); 2457 } 2458 else 2459 { 2460 const void* host_target = CPU::CodeCache::CreateBlockLink(m_block, GetCurrentCodePointer(), 2461 static_cast<u32>(branch_target.constant_value)); 2462 EmitBranch(host_target); 2463 } 2464 } 2465 m_register_cache.PopState(); 2466 2467 SwitchToNearCode(); 2468 EmitBindLabel(&branch_not_taken); 2469 } 2470 2471 m_register_cache.PushState(); 2472 2473 if (condition != Condition::Always) 2474 { 2475 WriteNewPC(next_pc, true); 2476 } 2477 else 2478 { 2479 WriteNewPC(branch_target, true); 2480 } 2481 2482 EmitConditionalBranch(Condition::GreaterEqual, false, pending_ticks.GetHostRegister(), downcount, 2483 &return_to_dispatcher); 2484 2485 EmitEndBlock(true, nullptr); 2486 2487 const Value& jump_target = (condition != Condition::Always) ? constant_next_pc : branch_target; 2488 DebugAssert(jump_target.IsConstant()); 2489 if (static_cast<u32>(jump_target.constant_value) == m_block->pc) 2490 { 2491 // self-link 2492 EmitBranch(GetStartNearCodePointer()); 2493 } 2494 else 2495 { 2496 const void* host_target = CPU::CodeCache::CreateBlockLink(m_block, GetCurrentCodePointer(), 2497 static_cast<u32>(jump_target.constant_value)); 2498 EmitBranch(host_target); 2499 } 2500 2501 m_register_cache.PopState(); 2502 2503 EmitBindLabel(&return_to_dispatcher); 2504 EmitEndBlock(true, CodeCache::g_run_events_and_dispatch); 2505 } 2506 else 2507 { 2508 if (condition != Condition::Always) 2509 { 2510 // branch taken path - modify the next pc 2511 EmitBindLabel(&branch_taken); 2512 EmitCopyValue(next_pc.GetHostRegister(), branch_target); 2513 2514 // converge point 2515 EmitBindLabel(&branch_not_taken); 2516 WriteNewPC(next_pc, true); 2517 } 2518 else 2519 { 2520 // next_pc is not used for unconditional branches 2521 WriteNewPC(branch_target, true); 2522 } 2523 2524 InstructionEpilogue(instruction, info); 2525 } 2526 2527 return true; 2528 }; 2529 2530 // Compute the branch target. 2531 // This depends on the form of the instruction. 2532 switch (instruction.op) 2533 { 2534 case InstructionOp::j: 2535 case InstructionOp::jal: 2536 { 2537 // npc = (pc & 0xF0000000) | (target << 2) 2538 Value branch_target = OrValues(AndValues(CalculatePC(), Value::FromConstantU32(0xF0000000)), 2539 Value::FromConstantU32(instruction.j.target << 2)); 2540 2541 return DoBranch(Condition::Always, Value(), Value(), 2542 (instruction.op == InstructionOp::jal) ? Reg::ra : Reg::count, std::move(branch_target)); 2543 } 2544 2545 case InstructionOp::funct: 2546 { 2547 if (instruction.r.funct == InstructionFunct::jr || instruction.r.funct == InstructionFunct::jalr) 2548 { 2549 // npc = rs, link to rt 2550 Value branch_target = m_register_cache.ReadGuestRegister(instruction.r.rs); 2551 return DoBranch(Condition::Always, Value(), Value(), 2552 (instruction.r.funct == InstructionFunct::jalr) ? instruction.r.rd : Reg::count, 2553 std::move(branch_target)); 2554 } 2555 else if (instruction.r.funct == InstructionFunct::syscall || instruction.r.funct == InstructionFunct::break_) 2556 { 2557 const Exception excode = 2558 (instruction.r.funct == InstructionFunct::syscall) ? Exception::Syscall : Exception::BP; 2559 GenerateExceptionExit(instruction, info, excode); 2560 InstructionEpilogue(instruction, info); 2561 return true; 2562 } 2563 else 2564 { 2565 UnreachableCode(); 2566 } 2567 } 2568 2569 case InstructionOp::beq: 2570 case InstructionOp::bne: 2571 { 2572 // npc = pc + (sext(imm) << 2) 2573 Value branch_target = CalculatePC(instruction.i.imm_sext32() << 2); 2574 2575 // beq zero, zero, addr -> unconditional branch 2576 if (instruction.op == InstructionOp::beq && instruction.i.rs == Reg::zero && instruction.i.rt == Reg::zero) 2577 { 2578 return DoBranch(Condition::Always, Value(), Value(), Reg::count, std::move(branch_target)); 2579 } 2580 else 2581 { 2582 // branch <- rs op rt 2583 Value lhs = m_register_cache.ReadGuestRegister(instruction.i.rs, true, true); 2584 Value rhs = m_register_cache.ReadGuestRegister(instruction.i.rt); 2585 const Condition condition = (instruction.op == InstructionOp::beq) ? Condition::Equal : Condition::NotEqual; 2586 return DoBranch(condition, lhs, rhs, Reg::count, std::move(branch_target)); 2587 } 2588 } 2589 2590 case InstructionOp::bgtz: 2591 case InstructionOp::blez: 2592 { 2593 // npc = pc + (sext(imm) << 2) 2594 Value branch_target = CalculatePC(instruction.i.imm_sext32() << 2); 2595 2596 // branch <- rs op 0 2597 Value lhs = m_register_cache.ReadGuestRegister(instruction.i.rs, true, true); 2598 2599 const Condition condition = (instruction.op == InstructionOp::bgtz) ? Condition::Greater : Condition::LessEqual; 2600 return DoBranch(condition, lhs, Value::FromConstantU32(0), Reg::count, std::move(branch_target)); 2601 } 2602 2603 case InstructionOp::b: 2604 { 2605 // npc = pc + (sext(imm) << 2) 2606 Value branch_target = CalculatePC(instruction.i.imm_sext32() << 2); 2607 2608 const u8 rt = static_cast<u8>(instruction.i.rt.GetValue()); 2609 const bool bgez = ConvertToBoolUnchecked(rt & u8(1)); 2610 const Condition condition = (bgez && instruction.r.rs == Reg::zero) ? 2611 Condition::Always : 2612 (bgez ? Condition::PositiveOrZero : Condition::Negative); 2613 const bool link = (rt & u8(0x1E)) == u8(0x10); 2614 2615 // Read has to happen before the link as the compare can use ra. 2616 Value lhs; 2617 if (condition != Condition::Always) 2618 lhs = m_register_cache.ReadGuestRegisterToScratch(instruction.i.rs); 2619 2620 // The return address is always written if link is set, regardless of whether the branch is taken. 2621 if (link) 2622 { 2623 EmitCancelInterpreterLoadDelayForReg(Reg::ra); 2624 m_register_cache.WriteGuestRegister(Reg::ra, CalculatePC(4)); 2625 } 2626 2627 return DoBranch(condition, lhs, Value(), Reg::count, std::move(branch_target)); 2628 } 2629 2630 default: 2631 UnreachableCode(); 2632 } 2633 } 2634 2635 bool CodeGenerator::Compile_lui(Instruction instruction, const CodeCache::InstructionInfo& info) 2636 { 2637 InstructionPrologue(instruction, info, 1); 2638 2639 if (g_settings.UsingPGXPCPUMode()) 2640 EmitFunctionCall(nullptr, &PGXP::CPU_LUI, Value::FromConstantU32(instruction.bits)); 2641 2642 // rt <- (imm << 16) 2643 const u32 value = instruction.i.imm_zext32() << 16; 2644 m_register_cache.WriteGuestRegister(instruction.i.rt, Value::FromConstantU32(value)); 2645 SpeculativeWriteReg(instruction.i.rt, value); 2646 2647 InstructionEpilogue(instruction, info); 2648 return true; 2649 } 2650 2651 bool CodeGenerator::Compile_cop0(Instruction instruction, const CodeCache::InstructionInfo& info) 2652 { 2653 if (instruction.cop.IsCommonInstruction()) 2654 { 2655 switch (instruction.cop.CommonOp()) 2656 { 2657 case CopCommonInstruction::mfcn: 2658 case CopCommonInstruction::mtcn: 2659 { 2660 u32 offset; 2661 u32 write_mask = UINT32_C(0xFFFFFFFF); 2662 2663 const Cop0Reg reg = static_cast<Cop0Reg>(instruction.r.rd.GetValue()); 2664 switch (reg) 2665 { 2666 case Cop0Reg::BPC: 2667 offset = OFFSETOF(State, cop0_regs.BPC); 2668 break; 2669 2670 case Cop0Reg::BPCM: 2671 offset = OFFSETOF(State, cop0_regs.BPCM); 2672 break; 2673 2674 case Cop0Reg::BDA: 2675 offset = OFFSETOF(State, cop0_regs.BDA); 2676 break; 2677 2678 case Cop0Reg::BDAM: 2679 offset = OFFSETOF(State, cop0_regs.BDAM); 2680 break; 2681 2682 case Cop0Reg::DCIC: 2683 offset = OFFSETOF(State, cop0_regs.dcic.bits); 2684 write_mask = Cop0Registers::DCIC::WRITE_MASK; 2685 break; 2686 2687 case Cop0Reg::JUMPDEST: 2688 offset = OFFSETOF(State, cop0_regs.TAR); 2689 write_mask = 0; 2690 break; 2691 2692 case Cop0Reg::BadVaddr: 2693 offset = OFFSETOF(State, cop0_regs.BadVaddr); 2694 write_mask = 0; 2695 break; 2696 2697 case Cop0Reg::SR: 2698 offset = OFFSETOF(State, cop0_regs.sr.bits); 2699 write_mask = Cop0Registers::SR::WRITE_MASK; 2700 break; 2701 2702 case Cop0Reg::CAUSE: 2703 offset = OFFSETOF(State, cop0_regs.cause.bits); 2704 write_mask = Cop0Registers::CAUSE::WRITE_MASK; 2705 break; 2706 2707 case Cop0Reg::EPC: 2708 offset = OFFSETOF(State, cop0_regs.EPC); 2709 write_mask = 0; 2710 break; 2711 2712 case Cop0Reg::PRID: 2713 offset = OFFSETOF(State, cop0_regs.PRID); 2714 write_mask = 0; 2715 break; 2716 2717 default: 2718 return Compile_Fallback(instruction, info); 2719 } 2720 2721 InstructionPrologue(instruction, info, 1); 2722 2723 if (instruction.cop.CommonOp() == CopCommonInstruction::mfcn) 2724 { 2725 // coprocessor loads are load-delayed 2726 Value value = m_register_cache.AllocateScratch(RegSize_32); 2727 EmitLoadCPUStructField(value.host_reg, value.size, offset); 2728 2729 if (g_settings.UsingPGXPCPUMode()) 2730 EmitFunctionCall(nullptr, &PGXP::CPU_MFC0, Value::FromConstantU32(instruction.bits), value); 2731 2732 m_register_cache.WriteGuestRegisterDelayed(instruction.r.rt, std::move(value)); 2733 2734 if (reg == Cop0Reg::SR) 2735 SpeculativeWriteReg(instruction.r.rt, m_speculative_constants.cop0_sr); 2736 else 2737 SpeculativeWriteReg(instruction.r.rt, std::nullopt); 2738 } 2739 else 2740 { 2741 // some registers are not writable, so ignore those 2742 if (write_mask != 0) 2743 { 2744 Value value = m_register_cache.ReadGuestRegister(instruction.r.rt); 2745 if (write_mask != UINT32_C(0xFFFFFFFF)) 2746 { 2747 // need to adjust the mask 2748 Value masked_value = AndValues(value, Value::FromConstantU32(write_mask)); 2749 { 2750 Value old_value = m_register_cache.AllocateScratch(RegSize_32); 2751 EmitLoadCPUStructField(old_value.GetHostRegister(), RegSize_32, offset); 2752 EmitAnd(old_value.GetHostRegister(), old_value.GetHostRegister(), Value::FromConstantU32(~write_mask)); 2753 OrValueInPlace(masked_value, old_value); 2754 } 2755 2756 if (g_settings.UsingPGXPCPUMode()) 2757 { 2758 EmitFunctionCall(nullptr, &PGXP::CPU_MTC0, Value::FromConstantU32(instruction.bits), masked_value, 2759 value); 2760 } 2761 value = std::move(masked_value); 2762 } 2763 else 2764 { 2765 if (g_settings.UsingPGXPCPUMode()) 2766 EmitFunctionCall(nullptr, &PGXP::CPU_MTC0, Value::FromConstantU32(instruction.bits), value, value); 2767 } 2768 2769 if (reg == Cop0Reg::SR) 2770 m_speculative_constants.cop0_sr = SpeculativeReadReg(instruction.r.rt); 2771 2772 // changing SR[Isc] needs to update fastmem views 2773 if (reg == Cop0Reg::SR) 2774 { 2775 LabelType skip_mem_update; 2776 Value old_value = m_register_cache.AllocateScratch(RegSize_32); 2777 EmitLoadCPUStructField(old_value.host_reg, RegSize_32, offset); 2778 EmitStoreCPUStructField(offset, value); 2779 EmitXor(old_value.host_reg, old_value.host_reg, value); 2780 EmitBranchIfBitClear(old_value.host_reg, RegSize_32, 16, &skip_mem_update); 2781 m_register_cache.InhibitAllocation(); 2782 EmitFunctionCall(nullptr, &UpdateMemoryPointers, m_register_cache.GetCPUPtr()); 2783 EmitUpdateFastmemBase(); 2784 EmitBindLabel(&skip_mem_update); 2785 m_register_cache.UninhibitAllocation(); 2786 } 2787 else 2788 { 2789 EmitStoreCPUStructField(offset, value); 2790 } 2791 } 2792 } 2793 2794 if (instruction.cop.CommonOp() == CopCommonInstruction::mtcn) 2795 { 2796 if (reg == Cop0Reg::CAUSE || reg == Cop0Reg::SR) 2797 { 2798 // Emit an interrupt check on load of CAUSE/SR. 2799 Value sr_value = m_register_cache.AllocateScratch(RegSize_32); 2800 Value cause_value = m_register_cache.AllocateScratch(RegSize_32); 2801 m_register_cache.InhibitAllocation(); 2802 2803 // m_cop0_regs.sr.IEc && ((m_cop0_regs.cause.Ip & m_cop0_regs.sr.Im) != 0) 2804 LabelType no_interrupt; 2805 EmitLoadCPUStructField(sr_value.host_reg, sr_value.size, OFFSETOF(State, cop0_regs.sr.bits)); 2806 EmitLoadCPUStructField(cause_value.host_reg, cause_value.size, OFFSETOF(State, cop0_regs.cause.bits)); 2807 EmitBranchIfBitClear(sr_value.host_reg, sr_value.size, 0, &no_interrupt); 2808 EmitAnd(sr_value.host_reg, sr_value.host_reg, cause_value); 2809 EmitTest(sr_value.host_reg, Value::FromConstantU32(0xFF00)); 2810 EmitConditionalBranch(Condition::Zero, false, &no_interrupt); 2811 m_register_cache.UninhibitAllocation(); 2812 2813 EmitBranch(GetCurrentFarCodePointer()); 2814 SwitchToFarCode(); 2815 m_register_cache.PushState(); 2816 if (!info.is_last_instruction) 2817 WriteNewPC(CalculatePC(), false); 2818 EmitStoreCPUStructField(OFFSETOF(State, downcount), Value::FromConstantU32(0)); 2819 EmitExceptionExit(); 2820 m_register_cache.PopState(); 2821 SwitchToNearCode(); 2822 2823 EmitBindLabel(&no_interrupt); 2824 } 2825 else if (reg == Cop0Reg::DCIC && g_settings.cpu_recompiler_memory_exceptions) 2826 { 2827 Value dcic_value = m_register_cache.AllocateScratch(RegSize_32); 2828 m_register_cache.InhibitAllocation(); 2829 2830 // if ((dcic & master_enable_bits) != master_enable_bits) goto not_enabled; 2831 LabelType not_enabled; 2832 EmitLoadCPUStructField(dcic_value.GetHostRegister(), dcic_value.size, OFFSETOF(State, cop0_regs.dcic.bits)); 2833 EmitAnd(dcic_value.GetHostRegister(), dcic_value.GetHostRegister(), 2834 Value::FromConstantU32(Cop0Registers::DCIC::MASTER_ENABLE_BITS)); 2835 EmitConditionalBranch(Condition::NotEqual, false, dcic_value.host_reg, 2836 Value::FromConstantU32(Cop0Registers::DCIC::MASTER_ENABLE_BITS), ¬_enabled); 2837 2838 // if ((dcic & breakpoint_bits) == 0) goto not_enabled; 2839 EmitLoadCPUStructField(dcic_value.GetHostRegister(), dcic_value.size, OFFSETOF(State, cop0_regs.dcic.bits)); 2840 EmitTest(dcic_value.GetHostRegister(), 2841 Value::FromConstantU32(Cop0Registers::DCIC::ANY_BREAKPOINTS_ENABLED_BITS)); 2842 EmitConditionalBranch(Condition::Zero, false, ¬_enabled); 2843 2844 // update dispatcher flag, if enabled, exit block 2845 EmitFunctionCall(nullptr, &UpdateDebugDispatcherFlag); 2846 EmitLoadCPUStructField(dcic_value.GetHostRegister(), RegSize_8, OFFSETOF(State, using_debug_dispatcher)); 2847 EmitBranchIfBitClear(dcic_value.GetHostRegister(), RegSize_8, 0, ¬_enabled); 2848 2849 m_register_cache.UninhibitAllocation(); 2850 2851 // exit block early if enabled 2852 EmitBranch(GetCurrentFarCodePointer()); 2853 SwitchToFarCode(); 2854 m_register_cache.PushState(); 2855 WriteNewPC(CalculatePC(), false); 2856 EmitExceptionExit(); 2857 m_register_cache.PopState(); 2858 SwitchToNearCode(); 2859 2860 EmitBindLabel(¬_enabled); 2861 } 2862 } 2863 2864 InstructionEpilogue(instruction, info); 2865 return true; 2866 } 2867 2868 // only mfc/mtc for cop0 2869 default: 2870 return Compile_Fallback(instruction, info); 2871 } 2872 } 2873 else 2874 { 2875 switch (instruction.cop.Cop0Op()) 2876 { 2877 case Cop0Instruction::rfe: 2878 { 2879 InstructionPrologue(instruction, info, 1); 2880 2881 // shift mode bits right two, preserving upper bits 2882 static constexpr u32 mode_bits_mask = UINT32_C(0b1111); 2883 Value sr = m_register_cache.AllocateScratch(RegSize_32); 2884 EmitLoadCPUStructField(sr.host_reg, RegSize_32, OFFSETOF(State, cop0_regs.sr.bits)); 2885 { 2886 Value new_mode_bits = m_register_cache.AllocateScratch(RegSize_32); 2887 EmitShr(new_mode_bits.host_reg, sr.host_reg, new_mode_bits.size, Value::FromConstantU32(2)); 2888 EmitAnd(new_mode_bits.host_reg, new_mode_bits.host_reg, Value::FromConstantU32(mode_bits_mask)); 2889 EmitAnd(sr.host_reg, sr.host_reg, Value::FromConstantU32(~mode_bits_mask)); 2890 EmitOr(sr.host_reg, sr.host_reg, new_mode_bits); 2891 } 2892 2893 EmitStoreCPUStructField(OFFSETOF(State, cop0_regs.sr.bits), sr); 2894 2895 Value cause_value = m_register_cache.AllocateScratch(RegSize_32); 2896 EmitLoadCPUStructField(cause_value.host_reg, cause_value.size, OFFSETOF(State, cop0_regs.cause.bits)); 2897 2898 LabelType no_interrupt; 2899 EmitAnd(sr.host_reg, sr.host_reg, cause_value); 2900 EmitTest(sr.host_reg, Value::FromConstantU32(0xFF00)); 2901 EmitConditionalBranch(Condition::Zero, false, &no_interrupt); 2902 m_register_cache.InhibitAllocation(); 2903 EmitStoreCPUStructField(OFFSETOF(State, downcount), Value::FromConstantU32(0)); 2904 EmitBindLabel(&no_interrupt); 2905 m_register_cache.UninhibitAllocation(); 2906 2907 InstructionEpilogue(instruction, info); 2908 return true; 2909 } 2910 2911 default: 2912 return Compile_Fallback(instruction, info); 2913 } 2914 } 2915 } 2916 2917 Value CodeGenerator::DoGTERegisterRead(u32 index) 2918 { 2919 Value value = m_register_cache.AllocateScratch(RegSize_32); 2920 2921 // Most GTE registers can be read directly. Handle the special cases here. 2922 if (index == 15) // SXY3 2923 { 2924 // mirror of SXY2 2925 index = 14; 2926 } 2927 2928 switch (index) 2929 { 2930 case 28: // IRGB 2931 case 29: // ORGB 2932 { 2933 EmitFunctionCall(&value, >E::ReadRegister, Value::FromConstantU32(index)); 2934 } 2935 break; 2936 2937 default: 2938 { 2939 EmitLoadCPUStructField(value.host_reg, RegSize_32, State::GTERegisterOffset(index)); 2940 } 2941 break; 2942 } 2943 2944 return value; 2945 } 2946 2947 void CodeGenerator::DoGTERegisterWrite(u32 index, const Value& value) 2948 { 2949 switch (index) 2950 { 2951 case 1: // V0[z] 2952 case 3: // V1[z] 2953 case 5: // V2[z] 2954 case 8: // IR0 2955 case 9: // IR1 2956 case 10: // IR2 2957 case 11: // IR3 2958 case 36: // RT33 2959 case 44: // L33 2960 case 52: // LR33 2961 case 58: // H - sign-extended on read but zext on use 2962 case 59: // DQA 2963 case 61: // ZSF3 2964 case 62: // ZSF4 2965 { 2966 // sign-extend z component of vector registers 2967 Value temp = ConvertValueSize(value.ViewAsSize(RegSize_16), RegSize_32, true); 2968 EmitStoreCPUStructField(State::GTERegisterOffset(index), temp); 2969 return; 2970 } 2971 break; 2972 2973 case 7: // OTZ 2974 case 16: // SZ0 2975 case 17: // SZ1 2976 case 18: // SZ2 2977 case 19: // SZ3 2978 { 2979 // zero-extend unsigned values 2980 Value temp = ConvertValueSize(value.ViewAsSize(RegSize_16), RegSize_32, false); 2981 EmitStoreCPUStructField(State::GTERegisterOffset(index), temp); 2982 return; 2983 } 2984 break; 2985 2986 case 15: // SXY3 2987 { 2988 // writing to SXYP pushes to the FIFO 2989 Value temp = m_register_cache.AllocateScratch(RegSize_32); 2990 2991 // SXY0 <- SXY1 2992 EmitLoadCPUStructField(temp.host_reg, RegSize_32, State::GTERegisterOffset(13)); 2993 EmitStoreCPUStructField(State::GTERegisterOffset(12), temp); 2994 2995 // SXY1 <- SXY2 2996 EmitLoadCPUStructField(temp.host_reg, RegSize_32, State::GTERegisterOffset(14)); 2997 EmitStoreCPUStructField(State::GTERegisterOffset(13), temp); 2998 2999 // SXY2 <- SXYP 3000 EmitStoreCPUStructField(State::GTERegisterOffset(14), value); 3001 return; 3002 } 3003 break; 3004 3005 case 28: // IRGB 3006 case 30: // LZCS 3007 case 63: // FLAG 3008 { 3009 EmitFunctionCall(nullptr, >E::WriteRegister, Value::FromConstantU32(index), value); 3010 return; 3011 } 3012 3013 case 29: // ORGB 3014 case 31: // LZCR 3015 { 3016 // read-only registers 3017 return; 3018 } 3019 3020 default: 3021 { 3022 // written as-is, 2x16 or 1x32 bits 3023 EmitStoreCPUStructField(State::GTERegisterOffset(index), value); 3024 return; 3025 } 3026 } 3027 } 3028 3029 bool CodeGenerator::Compile_cop2(Instruction instruction, const CodeCache::InstructionInfo& info) 3030 { 3031 if (instruction.op == InstructionOp::lwc2 || instruction.op == InstructionOp::swc2) 3032 { 3033 StallUntilGTEComplete(); 3034 InstructionPrologue(instruction, info, 1); 3035 3036 const u32 reg = static_cast<u32>(instruction.i.rt.GetValue()); 3037 Value address = AddValues(m_register_cache.ReadGuestRegister(instruction.i.rs), 3038 Value::FromConstantU32(instruction.i.imm_sext32()), false); 3039 SpeculativeValue spec_address = SpeculativeReadReg(instruction.i.rs); 3040 if (spec_address) 3041 spec_address = *spec_address + instruction.i.imm_sext32(); 3042 3043 if (instruction.op == InstructionOp::lwc2) 3044 { 3045 Value value = EmitLoadGuestMemory(instruction, info, address, spec_address, RegSize_32); 3046 DoGTERegisterWrite(reg, value); 3047 3048 if (g_settings.gpu_pgxp_enable) 3049 EmitFunctionCall(nullptr, PGXP::CPU_LWC2, Value::FromConstantU32(instruction.bits), address, value); 3050 } 3051 else 3052 { 3053 Value value = DoGTERegisterRead(reg); 3054 EmitStoreGuestMemory(instruction, info, address, spec_address, RegSize_32, value); 3055 3056 if (g_settings.gpu_pgxp_enable) 3057 EmitFunctionCall(nullptr, PGXP::CPU_SWC2, Value::FromConstantU32(instruction.bits), address, value); 3058 3059 if (spec_address) 3060 SpeculativeWriteMemory(*spec_address, std::nullopt); 3061 } 3062 3063 InstructionEpilogue(instruction, info); 3064 return true; 3065 } 3066 3067 Assert(instruction.op == InstructionOp::cop2); 3068 3069 if (instruction.cop.IsCommonInstruction()) 3070 { 3071 switch (instruction.cop.CommonOp()) 3072 { 3073 case CopCommonInstruction::mfcn: 3074 case CopCommonInstruction::cfcn: 3075 { 3076 const u32 reg = static_cast<u32>(instruction.r.rd.GetValue()) + 3077 ((instruction.cop.CommonOp() == CopCommonInstruction::cfcn) ? 32 : 0); 3078 3079 StallUntilGTEComplete(); 3080 InstructionPrologue(instruction, info, 1); 3081 3082 Value value = DoGTERegisterRead(reg); 3083 3084 // PGXP done first here before ownership is transferred. 3085 if (g_settings.gpu_pgxp_enable) 3086 EmitFunctionCall(nullptr, PGXP::CPU_MFC2, Value::FromConstantU32(instruction.bits), value); 3087 3088 m_register_cache.WriteGuestRegisterDelayed(instruction.r.rt, std::move(value)); 3089 SpeculativeWriteReg(instruction.r.rt, std::nullopt); 3090 3091 InstructionEpilogue(instruction, info); 3092 return true; 3093 } 3094 3095 case CopCommonInstruction::mtcn: 3096 case CopCommonInstruction::ctcn: 3097 { 3098 const u32 reg = static_cast<u32>(instruction.r.rd.GetValue()) + 3099 ((instruction.cop.CommonOp() == CopCommonInstruction::ctcn) ? 32 : 0); 3100 3101 StallUntilGTEComplete(); 3102 InstructionPrologue(instruction, info, 1); 3103 3104 Value value = m_register_cache.ReadGuestRegister(instruction.r.rt); 3105 DoGTERegisterWrite(reg, value); 3106 3107 if (g_settings.gpu_pgxp_enable) 3108 EmitFunctionCall(nullptr, PGXP::CPU_MTC2, Value::FromConstantU32(instruction.bits), value); 3109 3110 InstructionEpilogue(instruction, info); 3111 return true; 3112 } 3113 3114 default: 3115 return Compile_Fallback(instruction, info); 3116 } 3117 } 3118 else 3119 { 3120 TickCount func_ticks; 3121 GTE::InstructionImpl func = GTE::GetInstructionImpl(instruction.bits, &func_ticks); 3122 3123 // forward everything to the GTE. 3124 StallUntilGTEComplete(); 3125 InstructionPrologue(instruction, info, 1); 3126 3127 Value instruction_bits = Value::FromConstantU32(instruction.bits & GTE::Instruction::REQUIRED_BITS_MASK); 3128 EmitFunctionCall(nullptr, func, instruction_bits); 3129 AddGTETicks(func_ticks); 3130 3131 InstructionEpilogue(instruction, info); 3132 return true; 3133 } 3134 } 3135 3136 void CodeGenerator::InitSpeculativeRegs() 3137 { 3138 for (u8 i = 0; i < static_cast<u8>(Reg::count); i++) 3139 m_speculative_constants.regs[i] = g_state.regs.r[i]; 3140 3141 m_speculative_constants.cop0_sr = g_state.cop0_regs.sr.bits; 3142 } 3143 3144 void CodeGenerator::InvalidateSpeculativeValues() 3145 { 3146 m_speculative_constants.regs.fill(std::nullopt); 3147 m_speculative_constants.memory.clear(); 3148 m_speculative_constants.cop0_sr.reset(); 3149 } 3150 3151 CodeGenerator::SpeculativeValue CodeGenerator::SpeculativeReadReg(Reg reg) 3152 { 3153 return m_speculative_constants.regs[static_cast<u8>(reg)]; 3154 } 3155 3156 void CodeGenerator::SpeculativeWriteReg(Reg reg, SpeculativeValue value) 3157 { 3158 m_speculative_constants.regs[static_cast<u8>(reg)] = value; 3159 } 3160 3161 CodeGenerator::SpeculativeValue CodeGenerator::SpeculativeReadMemory(VirtualMemoryAddress address) 3162 { 3163 PhysicalMemoryAddress phys_addr = address & PHYSICAL_MEMORY_ADDRESS_MASK; 3164 3165 auto it = m_speculative_constants.memory.find(address); 3166 if (it != m_speculative_constants.memory.end()) 3167 return it->second; 3168 3169 u32 value; 3170 if ((phys_addr & SCRATCHPAD_ADDR_MASK) == SCRATCHPAD_ADDR) 3171 { 3172 u32 scratchpad_offset = phys_addr & SCRATCHPAD_OFFSET_MASK; 3173 std::memcpy(&value, &CPU::g_state.scratchpad[scratchpad_offset], sizeof(value)); 3174 return value; 3175 } 3176 3177 if (Bus::IsRAMAddress(phys_addr)) 3178 { 3179 u32 ram_offset = phys_addr & Bus::g_ram_mask; 3180 std::memcpy(&value, &Bus::g_ram[ram_offset], sizeof(value)); 3181 return value; 3182 } 3183 3184 return std::nullopt; 3185 } 3186 3187 void CodeGenerator::SpeculativeWriteMemory(u32 address, SpeculativeValue value) 3188 { 3189 PhysicalMemoryAddress phys_addr = address & PHYSICAL_MEMORY_ADDRESS_MASK; 3190 3191 auto it = m_speculative_constants.memory.find(address); 3192 if (it != m_speculative_constants.memory.end()) 3193 { 3194 it->second = value; 3195 return; 3196 } 3197 3198 if ((phys_addr & SCRATCHPAD_ADDR_MASK) == SCRATCHPAD_ADDR || Bus::IsRAMAddress(phys_addr)) 3199 m_speculative_constants.memory.emplace(address, value); 3200 } 3201 3202 bool CodeGenerator::SpeculativeIsCacheIsolated() 3203 { 3204 if (!m_speculative_constants.cop0_sr.has_value()) 3205 return false; 3206 3207 const Cop0Registers::SR sr{m_speculative_constants.cop0_sr.value()}; 3208 return sr.Isc; 3209 } 3210 3211 } // namespace CPU::Recompiler