cpu_pgxp.cpp (44342B)
1 // SPDX-FileCopyrightText: 2016 iCatButler, 2019-2023 Connor McLaughlin <stenzek@gmail.com> 2 // SPDX-License-Identifier: GPL-2.0+ 3 // 4 // This file has been completely rewritten over the years compared to the original PCSXR-PGXP release. 5 // No original code remains. The original copyright notice is included above for historical purposes. 6 // 7 8 #include "cpu_pgxp.h" 9 #include "bus.h" 10 #include "cpu_core.h" 11 #include "cpu_disasm.h" 12 #include "settings.h" 13 14 #include "util/gpu_device.h" 15 16 #include "common/assert.h" 17 #include "common/log.h" 18 19 #include <climits> 20 #include <cmath> 21 22 Log_SetChannel(CPU::PGXP); 23 24 // #define LOG_VALUES 1 25 // #define LOG_LOOKUPS 1 26 27 // TODO: Don't update flags on Validate(), instead return it. 28 29 namespace CPU::PGXP { 30 31 enum : u32 32 { 33 VERTEX_CACHE_WIDTH = 2048, 34 VERTEX_CACHE_HEIGHT = 2048, 35 VERTEX_CACHE_SIZE = VERTEX_CACHE_WIDTH * VERTEX_CACHE_HEIGHT, 36 PGXP_MEM_SIZE = (static_cast<u32>(Bus::RAM_8MB_SIZE) + static_cast<u32>(CPU::SCRATCHPAD_SIZE)) / 4, 37 PGXP_MEM_SCRATCH_OFFSET = Bus::RAM_8MB_SIZE / 4, 38 }; 39 40 enum : u32 41 { 42 VALID_X = (1u << 0), 43 VALID_Y = (1u << 1), 44 VALID_Z = (1u << 2), 45 VALID_LOWZ = (1u << 16), // Valid Z from the low part of a 32-bit value. 46 VALID_HIGHZ = (1u << 17), // Valid Z from the high part of a 32-bit value. 47 VALID_TAINTED_Z = (1u << 31), // X/Y has been changed, Z may not be accurate. 48 49 VALID_XY = (VALID_X | VALID_Y), 50 VALID_XYZ = (VALID_X | VALID_Y | VALID_Z), 51 VALID_ALL = (VALID_X | VALID_Y | VALID_Z), 52 }; 53 54 #define LOWORD_U16(val) (static_cast<u16>(val)) 55 #define HIWORD_U16(val) (static_cast<u16>(static_cast<u32>(val) >> 16)) 56 #define LOWORD_S16(val) (static_cast<s16>(static_cast<u16>(val))) 57 #define HIWORD_S16(val) (static_cast<s16>(static_cast<u16>(static_cast<u32>(val) >> 16))) 58 #define SET_LOWORD(val, loword) ((static_cast<u32>(val) & 0xFFFF0000u) | static_cast<u32>(static_cast<u16>(loword))) 59 #define SET_HIWORD(val, hiword) ((static_cast<u32>(val) & 0x0000FFFFu) | (static_cast<u32>(hiword) << 16)) 60 61 static double f16Sign(double val); 62 static double f16Unsign(double val); 63 static double f16Overflow(double val); 64 65 static void CacheVertex(u32 value, const PGXPValue& vertex); 66 static PGXPValue* GetCachedVertex(u32 value); 67 68 static float TruncateVertexPosition(float p); 69 static bool IsWithinTolerance(float precise_x, float precise_y, int int_x, int int_y); 70 71 static PGXPValue& GetRdValue(Instruction instr); 72 static PGXPValue& GetRtValue(Instruction instr); 73 static PGXPValue& ValidateAndGetRtValue(Instruction instr, u32 rtVal); 74 static PGXPValue& ValidateAndGetRsValue(Instruction instr, u32 rsVal); 75 static void SetRtValue(Instruction instr, const PGXPValue& val); 76 static void SetRtValue(Instruction instr, const PGXPValue& val, u32 rtVal); 77 static PGXPValue& GetSXY0(); 78 static PGXPValue& GetSXY1(); 79 static PGXPValue& GetSXY2(); 80 static PGXPValue& PushSXY(); 81 82 static PGXPValue* GetPtr(u32 addr); 83 static const PGXPValue& ValidateAndLoadMem(u32 addr, u32 value); 84 static void ValidateAndLoadMem16(PGXPValue& dest, u32 addr, u32 value, bool sign); 85 86 static void CPU_MTC2(u32 reg, const PGXPValue& value, u32 val); 87 static void CPU_BITWISE(Instruction instr, u32 rdVal, u32 rsVal, u32 rtVal); 88 static void CPU_SLL(Instruction instr, u32 rtVal, u32 sh); 89 static void CPU_SRx(Instruction instr, u32 rtVal, u32 sh, bool sign, bool is_variable); 90 91 static void WriteMem(u32 addr, const PGXPValue& value); 92 static void WriteMem16(u32 addr, const PGXPValue& value); 93 94 static void CopyZIfMissing(PGXPValue& dst, const PGXPValue& src); 95 static void SelectZ(float& dst_z, u32& dst_flags, const PGXPValue& src1, const PGXPValue& src2); 96 97 #ifdef LOG_VALUES 98 static void LogInstruction(u32 pc, Instruction instr); 99 static void LogValue(const char* name, u32 rval, const PGXPValue* val); 100 static void LogValueStr(SmallStringBase& str, const char* name, u32 rval, const PGXPValue* val); 101 102 // clang-format off 103 #define LOG_VALUES_NV() do { LogInstruction(CPU::g_state.current_instruction_pc, instr); } while (0) 104 #define LOG_VALUES_1(name, rval, val) do { LogInstruction(CPU::g_state.current_instruction_pc, instr); LogValue(name, rval, val); } while (0) 105 #define LOG_VALUES_C1(rnum, rval) do { LogInstruction(CPU::g_state.current_instruction_pc,instr); LogValue(CPU::GetRegName(static_cast<CPU::Reg>(rnum)), rval, &g_state.pgxp_gpr[static_cast<u32>(rnum)]); } while(0) 106 #define LOG_VALUES_C2(r1num, r1val, r2num, r2val) do { LogInstruction(CPU::g_state.current_instruction_pc,instr); LogValue(CPU::GetRegName(static_cast<CPU::Reg>(r1num)), r1val, &g_state.pgxp_gpr[static_cast<u32>(r1num)]); LogValue(CPU::GetRegName(static_cast<CPU::Reg>(r2num)), r2val, &g_state.pgxp_gpr[static_cast<u32>(r2num)]); } while(0) 107 #define LOG_VALUES_LOAD(addr, val) do { LogInstruction(CPU::g_state.current_instruction_pc,instr); LogValue(TinyString::from_format("MEM[{:08X}]", addr).c_str(), val, GetPtr(addr)); } while(0) 108 #define LOG_VALUES_STORE(rnum, rval, addr) do { LOG_VALUES_C1(rnum, rval); std::fprintf(s_log, " addr=%08X", addr); } while(0) 109 #else 110 #define LOG_VALUES_NV() (void)0 111 #define LOG_VALUES_1(name, rval, val) (void)0 112 #define LOG_VALUES_C1(rnum, rval) (void)0 113 #define LOG_VALUES_C2(r1num, r1val, r2num, r2val) (void)0 114 #define LOG_VALUES_LOAD(addr, val) (void)0 115 #define LOG_VALUES_STORE(rnum, rval, addr) (void)0 116 #endif 117 // clang-format on 118 119 static constexpr const PGXPValue INVALID_VALUE = {}; 120 121 static PGXPValue* s_mem = nullptr; 122 static PGXPValue* s_vertex_cache = nullptr; 123 124 #ifdef LOG_VALUES 125 static std::FILE* s_log; 126 #endif 127 } // namespace CPU::PGXP 128 129 void CPU::PGXP::Initialize() 130 { 131 std::memset(g_state.pgxp_gpr, 0, sizeof(g_state.pgxp_gpr)); 132 std::memset(g_state.pgxp_cop0, 0, sizeof(g_state.pgxp_cop0)); 133 std::memset(g_state.pgxp_gte, 0, sizeof(g_state.pgxp_gte)); 134 135 if (!s_mem) 136 { 137 s_mem = static_cast<PGXPValue*>(std::calloc(PGXP_MEM_SIZE, sizeof(PGXPValue))); 138 if (!s_mem) 139 Panic("Failed to allocate PGXP memory"); 140 } 141 142 if (g_settings.gpu_pgxp_vertex_cache && !s_vertex_cache) 143 { 144 s_vertex_cache = static_cast<PGXPValue*>(std::calloc(VERTEX_CACHE_SIZE, sizeof(PGXPValue))); 145 if (!s_vertex_cache) 146 { 147 ERROR_LOG("Failed to allocate memory for vertex cache, disabling."); 148 g_settings.gpu_pgxp_vertex_cache = false; 149 } 150 } 151 152 if (s_vertex_cache) 153 std::memset(s_vertex_cache, 0, sizeof(PGXPValue) * VERTEX_CACHE_SIZE); 154 } 155 156 void CPU::PGXP::Reset() 157 { 158 std::memset(g_state.pgxp_gpr, 0, sizeof(g_state.pgxp_gpr)); 159 std::memset(g_state.pgxp_cop0, 0, sizeof(g_state.pgxp_cop0)); 160 std::memset(g_state.pgxp_gte, 0, sizeof(g_state.pgxp_gte)); 161 162 if (s_mem) 163 std::memset(s_mem, 0, sizeof(PGXPValue) * PGXP_MEM_SIZE); 164 165 if (g_settings.gpu_pgxp_vertex_cache && s_vertex_cache) 166 std::memset(s_vertex_cache, 0, sizeof(PGXPValue) * VERTEX_CACHE_SIZE); 167 } 168 169 void CPU::PGXP::Shutdown() 170 { 171 if (s_vertex_cache) 172 { 173 std::free(s_vertex_cache); 174 s_vertex_cache = nullptr; 175 } 176 if (s_mem) 177 { 178 std::free(s_mem); 179 s_mem = nullptr; 180 } 181 182 std::memset(g_state.pgxp_gte, 0, sizeof(g_state.pgxp_gte)); 183 std::memset(g_state.pgxp_gpr, 0, sizeof(g_state.pgxp_gpr)); 184 std::memset(g_state.pgxp_cop0, 0, sizeof(g_state.pgxp_cop0)); 185 } 186 187 ALWAYS_INLINE_RELEASE double CPU::PGXP::f16Sign(double val) 188 { 189 const s32 s = static_cast<s32>(static_cast<s64>(val * (USHRT_MAX + 1))); 190 return static_cast<double>(s) / static_cast<double>(USHRT_MAX + 1); 191 } 192 193 ALWAYS_INLINE_RELEASE double CPU::PGXP::f16Unsign(double val) 194 { 195 return (val >= 0) ? val : (val + (USHRT_MAX + 1)); 196 } 197 198 ALWAYS_INLINE_RELEASE double CPU::PGXP::f16Overflow(double val) 199 { 200 return static_cast<double>(static_cast<s64>(val) >> 16); 201 } 202 203 ALWAYS_INLINE CPU::PGXPValue& CPU::PGXP::GetRdValue(Instruction instr) 204 { 205 return g_state.pgxp_gpr[static_cast<u8>(instr.r.rd.GetValue())]; 206 } 207 208 ALWAYS_INLINE CPU::PGXPValue& CPU::PGXP::GetRtValue(Instruction instr) 209 { 210 return g_state.pgxp_gpr[static_cast<u8>(instr.r.rt.GetValue())]; 211 } 212 213 ALWAYS_INLINE CPU::PGXPValue& CPU::PGXP::ValidateAndGetRtValue(Instruction instr, u32 rtVal) 214 { 215 PGXPValue& ret = g_state.pgxp_gpr[static_cast<u8>(instr.r.rt.GetValue())]; 216 ret.Validate(rtVal); 217 return ret; 218 } 219 220 ALWAYS_INLINE CPU::PGXPValue& CPU::PGXP::ValidateAndGetRsValue(Instruction instr, u32 rsVal) 221 { 222 PGXPValue& ret = g_state.pgxp_gpr[static_cast<u8>(instr.r.rs.GetValue())]; 223 ret.Validate(rsVal); 224 return ret; 225 } 226 227 ALWAYS_INLINE void CPU::PGXP::SetRtValue(Instruction instr, const PGXPValue& val) 228 { 229 g_state.pgxp_gpr[static_cast<u8>(instr.r.rt.GetValue())] = val; 230 } 231 232 ALWAYS_INLINE void CPU::PGXP::SetRtValue(Instruction instr, const PGXPValue& val, u32 rtVal) 233 { 234 PGXPValue& prtVal = g_state.pgxp_gpr[static_cast<u8>(instr.r.rt.GetValue())]; 235 prtVal = val; 236 prtVal.value = rtVal; 237 } 238 239 ALWAYS_INLINE CPU::PGXPValue& CPU::PGXP::GetSXY0() 240 { 241 return g_state.pgxp_gte[12]; 242 } 243 244 ALWAYS_INLINE CPU::PGXPValue& CPU::PGXP::GetSXY1() 245 { 246 return g_state.pgxp_gte[13]; 247 } 248 249 ALWAYS_INLINE CPU::PGXPValue& CPU::PGXP::GetSXY2() 250 { 251 return g_state.pgxp_gte[14]; 252 } 253 254 ALWAYS_INLINE CPU::PGXPValue& CPU::PGXP::PushSXY() 255 { 256 g_state.pgxp_gte[12] = g_state.pgxp_gte[13]; 257 g_state.pgxp_gte[13] = g_state.pgxp_gte[14]; 258 return g_state.pgxp_gte[14]; 259 } 260 261 ALWAYS_INLINE_RELEASE CPU::PGXPValue* CPU::PGXP::GetPtr(u32 addr) 262 { 263 #if 0 264 if ((addr & CPU::PHYSICAL_MEMORY_ADDRESS_MASK) >= 0x0017A2B4 && 265 (addr & CPU::PHYSICAL_MEMORY_ADDRESS_MASK) <= 0x0017A2B4) 266 __debugbreak(); 267 #endif 268 269 if ((addr & SCRATCHPAD_ADDR_MASK) == SCRATCHPAD_ADDR) 270 return &s_mem[PGXP_MEM_SCRATCH_OFFSET + ((addr & SCRATCHPAD_OFFSET_MASK) >> 2)]; 271 272 const u32 paddr = (addr & PHYSICAL_MEMORY_ADDRESS_MASK); 273 if (paddr < Bus::RAM_MIRROR_END) 274 return &s_mem[(paddr & Bus::g_ram_mask) >> 2]; 275 else 276 return nullptr; 277 } 278 279 ALWAYS_INLINE_RELEASE const CPU::PGXPValue& CPU::PGXP::ValidateAndLoadMem(u32 addr, u32 value) 280 { 281 PGXPValue* pMem = GetPtr(addr); 282 if (!pMem) [[unlikely]] 283 return INVALID_VALUE; 284 285 pMem->Validate(value); 286 return *pMem; 287 } 288 289 ALWAYS_INLINE_RELEASE void CPU::PGXP::ValidateAndLoadMem16(PGXPValue& dest, u32 addr, u32 value, bool sign) 290 { 291 PGXPValue* pMem = GetPtr(addr); 292 if (!pMem) [[unlikely]] 293 { 294 dest = INVALID_VALUE; 295 return; 296 } 297 298 // determine if high or low word 299 const bool hiword = ((addr & 2) != 0); 300 301 // only validate the component we're interested in 302 pMem->flags = hiword ? 303 ((Truncate16(pMem->value >> 16) == Truncate16(value)) ? pMem->flags : (pMem->flags & ~VALID_Y)) : 304 ((Truncate16(pMem->value) == Truncate16(value)) ? pMem->flags : (pMem->flags & ~VALID_X)); 305 306 // copy whole value 307 dest = *pMem; 308 309 // if high word then shift 310 if (hiword) 311 { 312 dest.x = dest.y; 313 dest.flags = (dest.flags & ~VALID_X) | ((dest.flags & VALID_Y) >> 1); 314 } 315 316 // only set y as valid if x is also valid.. don't want to make fake values 317 if (dest.flags & VALID_X) 318 { 319 dest.y = (dest.x < 0) ? -1.0f * sign : 0.0f; 320 dest.flags |= VALID_Y; 321 } 322 else 323 { 324 dest.y = 0.0f; 325 dest.flags &= ~VALID_Y; 326 } 327 328 dest.value = value; 329 } 330 331 ALWAYS_INLINE_RELEASE void CPU::PGXP::WriteMem(u32 addr, const PGXPValue& value) 332 { 333 PGXPValue* pMem = GetPtr(addr); 334 if (!pMem) [[unlikely]] 335 return; 336 337 *pMem = value; 338 pMem->flags |= VALID_LOWZ | VALID_HIGHZ; 339 } 340 341 ALWAYS_INLINE_RELEASE void CPU::PGXP::WriteMem16(u32 addr, const PGXPValue& value) 342 { 343 PGXPValue* dest = GetPtr(addr); 344 if (!dest) [[unlikely]] 345 return; 346 347 // determine if high or low word 348 const bool hiword = ((addr & 2) != 0); 349 if (hiword) 350 { 351 dest->y = value.x; 352 dest->flags = (dest->flags & ~VALID_Y) | ((value.flags & VALID_X) << 1); 353 dest->value = (dest->value & UINT32_C(0x0000FFFF)) | (value.value << 16); 354 } 355 else 356 { 357 dest->x = value.x; 358 dest->flags = (dest->flags & ~VALID_X) | (value.flags & VALID_X); 359 dest->value = (dest->value & UINT32_C(0xFFFF0000)) | (value.value & UINT32_C(0x0000FFFF)); 360 } 361 362 // overwrite z/w if valid 363 // TODO: Check modified 364 if (value.flags & VALID_Z) 365 { 366 dest->z = value.z; 367 dest->flags |= VALID_Z | (hiword ? VALID_HIGHZ : VALID_LOWZ); 368 } 369 else 370 { 371 dest->flags &= hiword ? ~VALID_HIGHZ : ~VALID_LOWZ; 372 if (dest->flags & VALID_Z && !(dest->flags & (VALID_HIGHZ | VALID_LOWZ))) 373 dest->flags &= ~VALID_Z; 374 } 375 } 376 377 ALWAYS_INLINE_RELEASE void CPU::PGXP::CopyZIfMissing(PGXPValue& dst, const PGXPValue& src) 378 { 379 dst.z = (dst.flags & VALID_Z) ? dst.z : src.z; 380 dst.flags |= (src.flags & VALID_Z); 381 } 382 383 ALWAYS_INLINE_RELEASE void CPU::PGXP::SelectZ(float& dst_z, u32& dst_flags, const PGXPValue& src1, 384 const PGXPValue& src2) 385 { 386 // Prefer src2 if src1 is missing Z, or is potentially an imprecise value, when src2 is precise. 387 dst_z = (!(src1.flags & VALID_Z) || 388 (src1.flags & VALID_TAINTED_Z && (src2.flags & (VALID_Z | VALID_TAINTED_Z)) == VALID_Z)) ? 389 src2.z : 390 src1.z; 391 dst_flags |= ((src1.flags | src2.flags) & VALID_Z); 392 } 393 394 #ifdef LOG_VALUES 395 void CPU::PGXP::LogInstruction(u32 pc, Instruction instr) 396 { 397 if (!s_log) [[unlikely]] 398 { 399 s_log = std::fopen("pgxp.log", "wb"); 400 } 401 else 402 { 403 std::fflush(s_log); 404 std::fputc('\n', s_log); 405 } 406 407 SmallString str; 408 DisassembleInstruction(&str, pc, instr.bits); 409 std::fprintf(s_log, "%08X %08X %-20s", pc, instr.bits, str.c_str()); 410 } 411 412 void CPU::PGXP::LogValue(const char* name, u32 rval, const PGXPValue* val) 413 { 414 if (!s_log) [[unlikely]] 415 return; 416 417 SmallString str; 418 LogValueStr(str, name, rval, val); 419 std::fprintf(s_log, " %s", str.c_str()); 420 } 421 422 void CPU::PGXP::LogValueStr(SmallStringBase& str, const char* name, u32 rval, const PGXPValue* val) 423 { 424 str.append_format("{}=[{:08X}", name, rval); 425 if (!val) 426 { 427 str.append(", NULL]"); 428 } 429 else 430 { 431 if (val->value != rval) 432 str.append_format(", PGXP{:08X}", val->value); 433 434 str.append_format(", {{{},{},{}}}", val->x, val->y, val->z); 435 436 if (val->flags & VALID_ALL) 437 { 438 str.append(", valid="); 439 if (val->flags & VALID_X) 440 str.append('X'); 441 if (val->flags & VALID_Y) 442 str.append('Y'); 443 if (val->flags & VALID_Z) 444 str.append('Z'); 445 } 446 447 // if (val->flags & VALID_TAINTED_Z) 448 // str.append(", tainted"); 449 450 str.append(']'); 451 } 452 } 453 454 #endif 455 456 void CPU::PGXP::GTE_RTPS(float x, float y, float z, u32 value) 457 { 458 PGXPValue& pvalue = PushSXY(); 459 pvalue.x = x; 460 pvalue.y = y; 461 pvalue.z = z; 462 pvalue.value = value; 463 pvalue.flags = VALID_ALL; 464 465 if (g_settings.gpu_pgxp_vertex_cache) 466 CacheVertex(value, pvalue); 467 } 468 469 bool CPU::PGXP::GTE_HasPreciseVertices(u32 sxy0, u32 sxy1, u32 sxy2) 470 { 471 PGXPValue& SXY0 = GetSXY0(); 472 SXY0.Validate(sxy0); 473 PGXPValue& SXY1 = GetSXY1(); 474 SXY1.Validate(sxy1); 475 PGXPValue& SXY2 = GetSXY2(); 476 SXY2.Validate(sxy2); 477 478 // Don't use accurate clipping for game-constructed values, which don't have a valid Z. 479 return (((SXY0.flags & SXY1.flags & SXY2.flags & VALID_XYZ) == VALID_XYZ)); 480 } 481 482 float CPU::PGXP::GTE_NCLIP() 483 { 484 const PGXPValue& SXY0 = GetSXY0(); 485 const PGXPValue& SXY1 = GetSXY1(); 486 const PGXPValue& SXY2 = GetSXY2(); 487 float nclip = ((SXY0.x * SXY1.y) + (SXY1.x * SXY2.y) + (SXY2.x * SXY0.y) - (SXY0.x * SXY2.y) - (SXY1.x * SXY0.y) - 488 (SXY2.x * SXY1.y)); 489 490 // ensure fractional values are not incorrectly rounded to 0 491 const float nclip_abs = std::abs(nclip); 492 if (0.1f < nclip_abs && nclip_abs < 1.0f) 493 nclip += (nclip < 0.0f ? -1.0f : 1.0f); 494 495 return nclip; 496 } 497 498 ALWAYS_INLINE_RELEASE void CPU::PGXP::CPU_MTC2(u32 reg, const PGXPValue& value, u32 val) 499 { 500 switch (reg) 501 { 502 case 15: 503 { 504 // push FIFO 505 PGXPValue& SXY2 = PushSXY(); 506 SXY2 = value; 507 return; 508 } 509 510 // read-only registers 511 case 29: 512 case 31: 513 { 514 return; 515 } 516 517 default: 518 { 519 PGXPValue& gteVal = g_state.pgxp_gte[reg]; 520 gteVal = value; 521 gteVal.value = val; 522 return; 523 } 524 } 525 } 526 527 void CPU::PGXP::CPU_MFC2(Instruction instr, u32 rdVal) 528 { 529 // CPU[Rt] = GTE_D[Rd] 530 const u32 idx = instr.cop.Cop2Index(); 531 LOG_VALUES_1(CPU::GetGTERegisterName(idx), rdVal, &g_state.pgxp_gte[idx]); 532 533 PGXPValue& prdVal = g_state.pgxp_gte[idx]; 534 prdVal.Validate(rdVal); 535 SetRtValue(instr, prdVal, rdVal); 536 } 537 538 void CPU::PGXP::CPU_MTC2(Instruction instr, u32 rtVal) 539 { 540 // GTE_D[Rd] = CPU[Rt] 541 const u32 idx = instr.cop.Cop2Index(); 542 LOG_VALUES_C1(instr.r.rt.GetValue(), rtVal); 543 544 PGXPValue& prtVal = ValidateAndGetRtValue(instr, rtVal); 545 CPU_MTC2(idx, prtVal, rtVal); 546 } 547 548 void CPU::PGXP::CPU_LWC2(Instruction instr, u32 addr, u32 rtVal) 549 { 550 // GTE_D[Rt] = Mem[addr] 551 LOG_VALUES_LOAD(addr, rtVal); 552 553 const PGXPValue& pMem = ValidateAndLoadMem(addr, rtVal); 554 CPU_MTC2(static_cast<u32>(instr.r.rt.GetValue()), pMem, rtVal); 555 } 556 557 void CPU::PGXP::CPU_SWC2(Instruction instr, u32 addr, u32 rtVal) 558 { 559 // Mem[addr] = GTE_D[Rt] 560 const u32 idx = static_cast<u32>(instr.r.rt.GetValue()); 561 PGXPValue& prtVal = g_state.pgxp_gte[idx]; 562 #ifdef LOG_VALUES 563 LOG_VALUES_1(CPU::GetGTERegisterName(idx), rtVal, &prtVal); 564 std::fprintf(s_log, " addr=%08X", addr); 565 #endif 566 prtVal.Validate(rtVal); 567 WriteMem(addr, prtVal); 568 } 569 570 ALWAYS_INLINE_RELEASE void CPU::PGXP::CacheVertex(u32 value, const PGXPValue& vertex) 571 { 572 const s16 sx = static_cast<s16>(value & 0xFFFFu); 573 const s16 sy = static_cast<s16>(value >> 16); 574 DebugAssert(sx >= -1024 && sx <= 1023 && sy >= -1024 && sy <= 1023); 575 s_vertex_cache[(sy + 1024) * VERTEX_CACHE_WIDTH + (sx + 1024)] = vertex; 576 } 577 578 ALWAYS_INLINE_RELEASE CPU::PGXPValue* CPU::PGXP::GetCachedVertex(u32 value) 579 { 580 const s16 sx = static_cast<s16>(value & 0xFFFFu); 581 const s16 sy = static_cast<s16>(value >> 16); 582 return (sx >= -1024 && sx <= 1023 && sy >= -1024 && sy <= 1013) ? 583 &s_vertex_cache[(sy + 1024) * VERTEX_CACHE_WIDTH + (sx + 1024)] : 584 nullptr; 585 } 586 587 ALWAYS_INLINE_RELEASE float CPU::PGXP::TruncateVertexPosition(float p) 588 { 589 const s32 int_part = static_cast<s32>(p); 590 const float int_part_f = static_cast<float>(int_part); 591 return static_cast<float>(static_cast<s16>(int_part << 5) >> 5) + (p - int_part_f); 592 } 593 594 ALWAYS_INLINE_RELEASE bool CPU::PGXP::IsWithinTolerance(float precise_x, float precise_y, int int_x, int int_y) 595 { 596 const float tolerance = g_settings.gpu_pgxp_tolerance; 597 if (tolerance < 0.0f) 598 return true; 599 600 return (std::abs(precise_x - static_cast<float>(int_x)) <= tolerance && 601 std::abs(precise_y - static_cast<float>(int_y)) <= tolerance); 602 } 603 604 bool CPU::PGXP::GetPreciseVertex(u32 addr, u32 value, int x, int y, int xOffs, int yOffs, float* out_x, float* out_y, 605 float* out_w) 606 { 607 const PGXPValue* vert = GetPtr(addr); 608 if (vert && ((vert->flags & VALID_XY) == VALID_XY) && (vert->value == value)) 609 { 610 // There is a value here with valid X and Y coordinates 611 *out_x = TruncateVertexPosition(vert->x) + static_cast<float>(xOffs); 612 *out_y = TruncateVertexPosition(vert->y) + static_cast<float>(yOffs); 613 *out_w = vert->z / 32768.0f; 614 615 #ifdef LOG_LOOKUPS 616 GL_INS_FMT("0x{:08X} {},{} => {},{} ({},{},{}) ({},{})", addr, x, y, *out_x, *out_y, 617 TruncateVertexPosition(vert->x), TruncateVertexPosition(vert->y), vert->z, std::abs(*out_x - x), 618 std::abs(*out_y - y)); 619 #endif 620 621 if (IsWithinTolerance(*out_x, *out_y, x, y)) 622 { 623 // check validity of z component 624 return ((vert->flags & VALID_Z) == VALID_Z); 625 } 626 } 627 628 if (g_settings.gpu_pgxp_vertex_cache) 629 { 630 vert = GetCachedVertex(value); 631 if (vert && (vert->flags & VALID_XY) == VALID_XY) 632 { 633 *out_x = TruncateVertexPosition(vert->x) + static_cast<float>(xOffs); 634 *out_y = TruncateVertexPosition(vert->y) + static_cast<float>(yOffs); 635 *out_w = vert->z / 32768.0f; 636 637 if (IsWithinTolerance(*out_x, *out_y, x, y)) 638 return false; 639 } 640 } 641 642 // no valid value can be found anywhere, use the native PSX data 643 *out_x = static_cast<float>(x); 644 *out_y = static_cast<float>(y); 645 *out_w = 1.0f; 646 return false; 647 } 648 649 void CPU::PGXP::CPU_LW(Instruction instr, u32 addr, u32 rtVal) 650 { 651 // Rt = Mem[Rs + Im] 652 LOG_VALUES_LOAD(addr, rtVal); 653 SetRtValue(instr, ValidateAndLoadMem(addr, rtVal)); 654 } 655 656 void CPU::PGXP::CPU_LBx(Instruction instr, u32 addr, u32 rtVal) 657 { 658 LOG_VALUES_LOAD(addr, rtVal); 659 SetRtValue(instr, INVALID_VALUE); 660 } 661 662 void CPU::PGXP::CPU_LH(Instruction instr, u32 addr, u32 rtVal) 663 { 664 // Rt = Mem[Rs + Im] (sign extended) 665 LOG_VALUES_LOAD(addr, rtVal); 666 ValidateAndLoadMem16(GetRtValue(instr), addr, rtVal, true); 667 } 668 669 void CPU::PGXP::CPU_LHU(Instruction instr, u32 addr, u32 rtVal) 670 { 671 // Rt = Mem[Rs + Im] (zero extended) 672 LOG_VALUES_LOAD(addr, rtVal); 673 ValidateAndLoadMem16(GetRtValue(instr), addr, rtVal, false); 674 } 675 676 void CPU::PGXP::CPU_SB(Instruction instr, u32 addr, u32 rtVal) 677 { 678 LOG_VALUES_STORE(instr.r.rt.GetValue(), rtVal, addr); 679 WriteMem(addr, INVALID_VALUE); 680 } 681 682 void CPU::PGXP::CPU_SH(Instruction instr, u32 addr, u32 rtVal) 683 { 684 LOG_VALUES_STORE(instr.r.rt.GetValue(), rtVal, addr); 685 PGXPValue& prtVal = ValidateAndGetRtValue(instr, rtVal); 686 WriteMem16(addr, prtVal); 687 } 688 689 void CPU::PGXP::CPU_SW(Instruction instr, u32 addr, u32 rtVal) 690 { 691 // Mem[Rs + Im] = Rt 692 LOG_VALUES_STORE(instr.r.rt.GetValue(), rtVal, addr); 693 PGXPValue& prtVal = ValidateAndGetRtValue(instr, rtVal); 694 WriteMem(addr, prtVal); 695 } 696 697 void CPU::PGXP::CPU_MOVE_Packed(u32 rd_and_rs, u32 rsVal) 698 { 699 const u32 Rs = (rd_and_rs & 0xFFu); 700 const u32 Rd = (rd_and_rs >> 8); 701 CPU_MOVE(Rd, Rs, rsVal); 702 } 703 704 void CPU::PGXP::CPU_MOVE(u32 Rd, u32 Rs, u32 rsVal) 705 { 706 #ifdef LOG_VALUES 707 const Instruction instr = {0}; 708 LOG_VALUES_C1(Rs, rsVal); 709 #endif 710 PGXPValue& prsVal = g_state.pgxp_gpr[Rs]; 711 prsVal.Validate(rsVal); 712 g_state.pgxp_gpr[Rd] = prsVal; 713 } 714 715 void CPU::PGXP::CPU_ADDI(Instruction instr, u32 rsVal) 716 { 717 LOG_VALUES_C1(instr.i.rs.GetValue(), rsVal); 718 719 // Rt = Rs + Imm (signed) 720 PGXPValue& prsVal = ValidateAndGetRsValue(instr, rsVal); 721 722 const u32 immVal = instr.i.imm_sext32(); 723 724 PGXPValue& prtVal = GetRtValue(instr); 725 prtVal = prsVal; 726 727 if (immVal == 0) 728 return; 729 730 if (rsVal == 0) 731 { 732 // x is low precision value 733 prtVal.x = static_cast<float>(LOWORD_S16(immVal)); 734 prtVal.y = static_cast<float>(HIWORD_S16(immVal)); 735 prtVal.flags |= VALID_X | VALID_Y | VALID_TAINTED_Z; 736 prtVal.value = immVal; 737 return; 738 } 739 740 prtVal.x = static_cast<float>(f16Unsign(prtVal.x)); 741 prtVal.x += static_cast<float>(LOWORD_U16(immVal)); 742 743 // carry on over/underflow 744 const float of = (prtVal.x > USHRT_MAX) ? 1.0f : (prtVal.x < 0.0f) ? -1.0f : 0.0f; 745 prtVal.x = static_cast<float>(f16Sign(prtVal.x)); 746 prtVal.y += HIWORD_S16(immVal) + of; 747 748 // truncate on overflow/underflow 749 prtVal.y += (prtVal.y > SHRT_MAX) ? -(USHRT_MAX + 1) : (prtVal.y < SHRT_MIN) ? (USHRT_MAX + 1) : 0.0f; 750 751 prtVal.value = rsVal + immVal; 752 753 prtVal.flags |= VALID_TAINTED_Z; 754 } 755 756 void CPU::PGXP::CPU_ANDI(Instruction instr, u32 rsVal) 757 { 758 LOG_VALUES_C1(instr.i.rs.GetValue(), rsVal); 759 760 // Rt = Rs & Imm 761 const u32 imm = instr.i.imm_zext32(); 762 const u32 rtVal = rsVal & imm; 763 PGXPValue& prsVal = ValidateAndGetRsValue(instr, rsVal); 764 PGXPValue& prtVal = GetRtValue(instr); 765 766 // remove upper 16-bits 767 prtVal.y = 0.0f; 768 prtVal.z = prsVal.z; 769 prtVal.value = rtVal; 770 prtVal.flags = prsVal.flags | VALID_Y | VALID_TAINTED_Z; 771 772 switch (imm) 773 { 774 case 0: 775 { 776 // if 0 then x == 0 777 prtVal.x = 0.0f; 778 prtVal.flags |= VALID_X; 779 } 780 break; 781 782 case 0xFFFFu: 783 { 784 // if saturated then x == x 785 prtVal.x = prsVal.x; 786 } 787 break; 788 789 default: 790 { 791 // otherwise x is low precision value 792 prtVal.x = static_cast<float>(LOWORD_S16(rtVal)); 793 prtVal.flags |= VALID_X; 794 } 795 break; 796 } 797 } 798 799 void CPU::PGXP::CPU_ORI(Instruction instr, u32 rsVal) 800 { 801 LOG_VALUES_C1(instr.i.rs.GetValue(), rsVal); 802 803 // Rt = Rs | Imm 804 const u32 imm = instr.i.imm_zext32(); 805 const u32 rtVal = rsVal | imm; 806 807 PGXPValue& pRsVal = ValidateAndGetRsValue(instr, rsVal); 808 PGXPValue& pRtVal = GetRtValue(instr); 809 pRtVal = pRsVal; 810 pRtVal.value = rtVal; 811 812 if (imm == 0) [[unlikely]] 813 { 814 // if 0 then x == x 815 } 816 else 817 { 818 // otherwise x is low precision value 819 pRtVal.x = static_cast<float>(LOWORD_S16(rtVal)); 820 pRtVal.flags |= VALID_X | VALID_TAINTED_Z; 821 } 822 } 823 824 void CPU::PGXP::CPU_XORI(Instruction instr, u32 rsVal) 825 { 826 LOG_VALUES_C1(instr.i.rs.GetValue(), rsVal); 827 828 // Rt = Rs ^ Imm 829 const u32 imm = instr.i.imm_zext32(); 830 const u32 rtVal = rsVal ^ imm; 831 832 PGXPValue& pRsVal = ValidateAndGetRsValue(instr, rsVal); 833 PGXPValue& pRtVal = GetRtValue(instr); 834 pRtVal = pRsVal; 835 pRtVal.value = rtVal; 836 837 if (imm == 0) [[unlikely]] 838 { 839 // if 0 then x == x 840 } 841 else 842 { 843 // otherwise x is low precision value 844 pRtVal.x = static_cast<float>(LOWORD_S16(rtVal)); 845 pRtVal.flags |= VALID_X | VALID_TAINTED_Z; 846 } 847 } 848 849 void CPU::PGXP::CPU_SLTI(Instruction instr, u32 rsVal) 850 { 851 LOG_VALUES_C1(instr.i.rs.GetValue(), rsVal); 852 853 // Rt = Rs < Imm (signed) 854 const s32 imm = instr.i.imm_s16(); 855 PGXPValue& prsVal = ValidateAndGetRsValue(instr, rsVal); 856 857 const float fimmx = static_cast<float>(imm); 858 const float fimmy = fimmx < 0.0f ? -1.0f : 0.0f; 859 860 PGXPValue& prtVal = GetRtValue(instr); 861 prtVal.x = (prsVal.GetValidY(rsVal) < fimmy || prsVal.GetValidX(rsVal) < fimmx) ? 1.0f : 0.0f; 862 prtVal.y = 0.0f; 863 prtVal.z = prsVal.z; 864 prtVal.flags = prsVal.flags | VALID_X | VALID_Y | VALID_TAINTED_Z; 865 prtVal.value = BoolToUInt32(static_cast<s32>(rsVal) < imm); 866 } 867 868 void CPU::PGXP::CPU_SLTIU(Instruction instr, u32 rsVal) 869 { 870 LOG_VALUES_C1(instr.i.rs.GetValue(), rsVal); 871 872 // Rt = Rs < Imm (Unsigned) 873 const u32 imm = instr.i.imm_u16(); 874 PGXPValue& prsVal = ValidateAndGetRsValue(instr, rsVal); 875 876 const float fimmx = static_cast<float>(static_cast<s16>(imm)); // deliberately signed 877 const float fimmy = fimmx < 0.0f ? -1.0f : 0.0f; 878 879 PGXPValue& prtVal = GetRtValue(instr); 880 prtVal.x = 881 (f16Unsign(prsVal.GetValidY(rsVal)) < f16Unsign(fimmy) || f16Unsign(prsVal.GetValidX(rsVal)) < fimmx) ? 1.0f : 0.0f; 882 prtVal.y = 0.0f; 883 prtVal.z = prsVal.z; 884 prtVal.flags = prsVal.flags | VALID_X | VALID_Y | VALID_TAINTED_Z; 885 prtVal.value = BoolToUInt32(rsVal < imm); 886 } 887 888 void CPU::PGXP::CPU_LUI(Instruction instr) 889 { 890 LOG_VALUES_NV(); 891 892 // Rt = Imm << 16 893 PGXPValue& pRtVal = GetRtValue(instr); 894 pRtVal.x = 0.0f; 895 pRtVal.y = static_cast<float>(instr.i.imm_s16()); 896 pRtVal.z = 0.0f; 897 pRtVal.value = instr.i.imm_zext32() << 16; 898 pRtVal.flags = VALID_XY; 899 } 900 901 void CPU::PGXP::CPU_ADD(Instruction instr, u32 rsVal, u32 rtVal) 902 { 903 LOG_VALUES_C2(instr.r.rs.GetValue(), rsVal, instr.r.rt.GetValue(), rtVal); 904 905 // Rd = Rs + Rt (signed) 906 PGXPValue& prsVal = ValidateAndGetRsValue(instr, rsVal); 907 PGXPValue& prtVal = ValidateAndGetRtValue(instr, rtVal); 908 PGXPValue& prdVal = GetRdValue(instr); 909 910 if (rtVal == 0) 911 { 912 prdVal = prsVal; 913 CopyZIfMissing(prdVal, prtVal); 914 } 915 else if (rsVal == 0) 916 { 917 prdVal = prtVal; 918 CopyZIfMissing(prdVal, prsVal); 919 } 920 else 921 { 922 const double x = f16Unsign(prsVal.GetValidX(rsVal)) + f16Unsign(prtVal.GetValidX(rtVal)); 923 924 // carry on over/underflow 925 const float of = (x > USHRT_MAX) ? 1.0f : (x < 0.0f) ? -1.0f : 0.0f; 926 prdVal.x = static_cast<float>(f16Sign(x)); 927 prdVal.y = prsVal.GetValidY(rsVal) + prtVal.GetValidY(rtVal) + of; 928 929 // truncate on overflow/underflow 930 prdVal.y += (prdVal.y > SHRT_MAX) ? -(USHRT_MAX + 1) : (prdVal.y < SHRT_MIN) ? (USHRT_MAX + 1) : 0.0f; 931 932 prdVal.value = rsVal + rtVal; 933 934 // valid x/y only if one side had a valid x/y 935 prdVal.flags = prsVal.flags | (prtVal.flags & VALID_XY) | VALID_TAINTED_Z; 936 937 SelectZ(prdVal.z, prdVal.flags, prsVal, prtVal); 938 } 939 } 940 941 void CPU::PGXP::CPU_SUB(Instruction instr, u32 rsVal, u32 rtVal) 942 { 943 LOG_VALUES_C2(instr.r.rs.GetValue(), rsVal, instr.r.rt.GetValue(), rtVal); 944 945 // Rd = Rs - Rt (signed) 946 PGXPValue& prsVal = ValidateAndGetRsValue(instr, rsVal); 947 PGXPValue& prtVal = ValidateAndGetRtValue(instr, rtVal); 948 PGXPValue& prdVal = GetRdValue(instr); 949 950 if (rtVal == 0) 951 { 952 prdVal = prsVal; 953 CopyZIfMissing(prdVal, prtVal); 954 } 955 else 956 { 957 const double x = f16Unsign(prsVal.GetValidX(rsVal)) - f16Unsign(prtVal.GetValidX(rtVal)); 958 959 // carry on over/underflow 960 const float of = (x > USHRT_MAX) ? 1.0f : (x < 0.0f) ? -1.0f : 0.0f; 961 prdVal.x = static_cast<float>(f16Sign(x)); 962 prdVal.y = prsVal.GetValidY(rsVal) - (prtVal.GetValidY(rtVal) - of); 963 964 // truncate on overflow/underflow 965 prdVal.y += (prdVal.y > SHRT_MAX) ? -(USHRT_MAX + 1) : (prdVal.y < SHRT_MIN) ? (USHRT_MAX + 1) : 0.0f; 966 967 prdVal.value = rsVal - rtVal; 968 969 // valid x/y only if one side had a valid x/y 970 prdVal.flags = prsVal.flags | (prtVal.flags & VALID_XY) | VALID_TAINTED_Z; 971 972 SelectZ(prdVal.z, prdVal.flags, prsVal, prtVal); 973 } 974 } 975 976 ALWAYS_INLINE_RELEASE void CPU::PGXP::CPU_BITWISE(Instruction instr, u32 rdVal, u32 rsVal, u32 rtVal) 977 { 978 // Rd = Rs & Rt 979 PGXPValue& prsVal = ValidateAndGetRsValue(instr, rsVal); 980 PGXPValue& prtVal = ValidateAndGetRtValue(instr, rtVal); 981 982 float x, y; 983 if (LOWORD_U16(rdVal) == 0) 984 x = 0.0f; 985 else if (LOWORD_U16(rdVal) == LOWORD_U16(rsVal)) 986 x = prsVal.GetValidX(rsVal); 987 else if (LOWORD_U16(rdVal) == LOWORD_U16(rtVal)) 988 x = prtVal.GetValidX(rtVal); 989 else 990 x = static_cast<float>(LOWORD_S16(rdVal)); 991 992 if (HIWORD_U16(rdVal) == 0) 993 y = 0.0f; 994 else if (HIWORD_U16(rdVal) == HIWORD_U16(rsVal)) 995 y = prsVal.GetValidY(rsVal); 996 else if (HIWORD_U16(rdVal) == HIWORD_U16(rtVal)) 997 y = prtVal.GetValidY(rtVal); 998 else 999 y = static_cast<float>(HIWORD_S16(rdVal)); 1000 1001 // Why not write directly to prdVal? Because it might be the same as the source. 1002 u32 flags = ((prsVal.flags | prtVal.flags) & VALID_XY) ? (VALID_XY | VALID_TAINTED_Z) : 0; 1003 PGXPValue& prdVal = GetRdValue(instr); 1004 SelectZ(prdVal.z, flags, prsVal, prtVal); 1005 prdVal.x = x; 1006 prdVal.y = y; 1007 prdVal.flags = flags; 1008 prdVal.value = rdVal; 1009 } 1010 1011 void CPU::PGXP::CPU_AND_(Instruction instr, u32 rsVal, u32 rtVal) 1012 { 1013 LOG_VALUES_C2(instr.r.rs.GetValue(), rsVal, instr.r.rt.GetValue(), rtVal); 1014 1015 // Rd = Rs & Rt 1016 const u32 rdVal = rsVal & rtVal; 1017 CPU_BITWISE(instr, rdVal, rsVal, rtVal); 1018 } 1019 1020 void CPU::PGXP::CPU_OR_(Instruction instr, u32 rsVal, u32 rtVal) 1021 { 1022 LOG_VALUES_C2(instr.r.rs.GetValue(), rsVal, instr.r.rt.GetValue(), rtVal); 1023 1024 // Rd = Rs | Rt 1025 const u32 rdVal = rsVal | rtVal; 1026 CPU_BITWISE(instr, rdVal, rsVal, rtVal); 1027 } 1028 1029 void CPU::PGXP::CPU_XOR_(Instruction instr, u32 rsVal, u32 rtVal) 1030 { 1031 LOG_VALUES_C2(instr.r.rs.GetValue(), rsVal, instr.r.rt.GetValue(), rtVal); 1032 1033 // Rd = Rs ^ Rt 1034 const u32 rdVal = rsVal ^ rtVal; 1035 CPU_BITWISE(instr, rdVal, rsVal, rtVal); 1036 } 1037 1038 void CPU::PGXP::CPU_NOR(Instruction instr, u32 rsVal, u32 rtVal) 1039 { 1040 LOG_VALUES_C2(instr.r.rs.GetValue(), rsVal, instr.r.rt.GetValue(), rtVal); 1041 1042 // Rd = Rs NOR Rt 1043 const u32 rdVal = ~(rsVal | rtVal); 1044 CPU_BITWISE(instr, rdVal, rsVal, rtVal); 1045 } 1046 1047 void CPU::PGXP::CPU_SLT(Instruction instr, u32 rsVal, u32 rtVal) 1048 { 1049 LOG_VALUES_C2(instr.r.rs.GetValue(), rsVal, instr.r.rt.GetValue(), rtVal); 1050 1051 // Rd = Rs < Rt (signed) 1052 PGXPValue& prsVal = ValidateAndGetRsValue(instr, rsVal); 1053 PGXPValue& prtVal = ValidateAndGetRtValue(instr, rtVal); 1054 PGXPValue& prdVal = GetRdValue(instr); 1055 prdVal.x = (prsVal.GetValidY(rsVal) < prtVal.GetValidY(rtVal) || 1056 f16Unsign(prsVal.GetValidX(rsVal)) < f16Unsign(prtVal.GetValidX(rtVal))) ? 1057 1.0f : 1058 0.0f; 1059 prdVal.y = 0.0f; 1060 prdVal.z = prsVal.z; 1061 prdVal.flags = prsVal.flags | VALID_TAINTED_Z | VALID_X | VALID_Y; 1062 prdVal.value = BoolToUInt32(static_cast<s32>(rsVal) < static_cast<s32>(rtVal)); 1063 } 1064 1065 void CPU::PGXP::CPU_SLTU(Instruction instr, u32 rsVal, u32 rtVal) 1066 { 1067 LOG_VALUES_C2(instr.r.rs.GetValue(), rsVal, instr.r.rt.GetValue(), rtVal); 1068 1069 // Rd = Rs < Rt (unsigned) 1070 PGXPValue& prsVal = ValidateAndGetRsValue(instr, rsVal); 1071 PGXPValue& prtVal = ValidateAndGetRtValue(instr, rtVal); 1072 PGXPValue& prdVal = GetRdValue(instr); 1073 prdVal.x = (f16Unsign(prsVal.GetValidY(rsVal)) < f16Unsign(prtVal.GetValidY(rtVal)) || 1074 f16Unsign(prsVal.GetValidX(rsVal)) < f16Unsign(prtVal.GetValidX(rtVal))) ? 1075 1.0f : 1076 0.0f; 1077 prdVal.y = 0.0f; 1078 prdVal.z = prsVal.z; 1079 prdVal.flags = prsVal.flags | VALID_TAINTED_Z | VALID_X | VALID_Y; 1080 prdVal.value = BoolToUInt32(rsVal < rtVal); 1081 } 1082 1083 void CPU::PGXP::CPU_MULT(Instruction instr, u32 rsVal, u32 rtVal) 1084 { 1085 LOG_VALUES_C2(instr.r.rs.GetValue(), rsVal, instr.r.rt.GetValue(), rtVal); 1086 1087 // Hi/Lo = Rs * Rt (signed) 1088 PGXPValue& prsVal = ValidateAndGetRsValue(instr, rsVal); 1089 PGXPValue& prtVal = ValidateAndGetRtValue(instr, rtVal); 1090 1091 PGXPValue& ploVal = g_state.pgxp_gpr[static_cast<u8>(Reg::lo)]; 1092 PGXPValue& phiVal = g_state.pgxp_gpr[static_cast<u8>(Reg::hi)]; 1093 ploVal = prsVal; 1094 CopyZIfMissing(ploVal, prsVal); 1095 1096 // Z/valid is the same 1097 phiVal = ploVal; 1098 1099 const float rsx = prsVal.GetValidX(rsVal); 1100 const float rsy = prsVal.GetValidY(rsVal); 1101 const float rtx = prtVal.GetValidX(rtVal); 1102 const float rty = prtVal.GetValidY(rtVal); 1103 1104 // Multiply out components 1105 const double xx = f16Unsign(rsx) * f16Unsign(rtx); 1106 const double xy = f16Unsign(rsx) * (rty); 1107 const double yx = rsy * f16Unsign(rtx); 1108 const double yy = rsy * rty; 1109 1110 // Split values into outputs 1111 const double lx = xx; 1112 const double ly = f16Overflow(xx) + (xy + yx); 1113 const double hx = f16Overflow(ly) + yy; 1114 const double hy = f16Overflow(hx); 1115 1116 ploVal.x = static_cast<float>(f16Sign(lx)); 1117 ploVal.y = static_cast<float>(f16Sign(ly)); 1118 ploVal.flags |= VALID_TAINTED_Z | (prtVal.flags & VALID_XY); 1119 phiVal.x = static_cast<float>(f16Sign(hx)); 1120 phiVal.y = static_cast<float>(f16Sign(hy)); 1121 phiVal.flags |= VALID_TAINTED_Z | (prtVal.flags & VALID_XY); 1122 1123 // compute PSX value 1124 const u64 result = static_cast<u64>(static_cast<s64>(SignExtend64(rsVal)) * static_cast<s64>(SignExtend64(rtVal))); 1125 phiVal.value = Truncate32(result >> 32); 1126 ploVal.value = Truncate32(result); 1127 } 1128 1129 void CPU::PGXP::CPU_MULTU(Instruction instr, u32 rsVal, u32 rtVal) 1130 { 1131 LOG_VALUES_C2(instr.r.rs.GetValue(), rsVal, instr.r.rt.GetValue(), rtVal); 1132 1133 // Hi/Lo = Rs * Rt (unsigned) 1134 PGXPValue& prsVal = ValidateAndGetRsValue(instr, rsVal); 1135 PGXPValue& prtVal = ValidateAndGetRtValue(instr, rtVal); 1136 1137 PGXPValue& ploVal = g_state.pgxp_gpr[static_cast<u8>(Reg::lo)]; 1138 PGXPValue& phiVal = g_state.pgxp_gpr[static_cast<u8>(Reg::hi)]; 1139 ploVal = prsVal; 1140 CopyZIfMissing(ploVal, prsVal); 1141 1142 // Z/valid is the same 1143 phiVal = ploVal; 1144 1145 const float rsx = prsVal.GetValidX(rsVal); 1146 const float rsy = prsVal.GetValidY(rsVal); 1147 const float rtx = prtVal.GetValidX(rtVal); 1148 const float rty = prtVal.GetValidY(rtVal); 1149 1150 // Multiply out components 1151 const double xx = f16Unsign(rsx) * f16Unsign(rtx); 1152 const double xy = f16Unsign(rsx) * f16Unsign(rty); 1153 const double yx = f16Unsign(rsy) * f16Unsign(rtx); 1154 const double yy = f16Unsign(rsy) * f16Unsign(rty); 1155 1156 // Split values into outputs 1157 const double lx = xx; 1158 const double ly = f16Overflow(xx) + (xy + yx); 1159 const double hx = f16Overflow(ly) + yy; 1160 const double hy = f16Overflow(hx); 1161 1162 ploVal.x = static_cast<float>(f16Sign(lx)); 1163 ploVal.y = static_cast<float>(f16Sign(ly)); 1164 ploVal.flags |= VALID_TAINTED_Z | (prtVal.flags & VALID_XY); 1165 phiVal.x = static_cast<float>(f16Sign(hx)); 1166 phiVal.y = static_cast<float>(f16Sign(hy)); 1167 phiVal.flags |= VALID_TAINTED_Z | (prtVal.flags & VALID_XY); 1168 1169 // compute PSX value 1170 const u64 result = ZeroExtend64(rsVal) * ZeroExtend64(rtVal); 1171 phiVal.value = Truncate32(result >> 32); 1172 ploVal.value = Truncate32(result); 1173 } 1174 1175 void CPU::PGXP::CPU_DIV(Instruction instr, u32 rsVal, u32 rtVal) 1176 { 1177 LOG_VALUES_C2(instr.r.rs.GetValue(), rsVal, instr.r.rt.GetValue(), rtVal); 1178 1179 // Lo = Rs / Rt (signed) 1180 // Hi = Rs % Rt (signed) 1181 PGXPValue& prsVal = ValidateAndGetRsValue(instr, rsVal); 1182 PGXPValue& prtVal = ValidateAndGetRtValue(instr, rtVal); 1183 1184 PGXPValue& ploVal = g_state.pgxp_gpr[static_cast<u8>(Reg::lo)]; 1185 PGXPValue& phiVal = g_state.pgxp_gpr[static_cast<u8>(Reg::hi)]; 1186 ploVal = prsVal; 1187 CopyZIfMissing(ploVal, prsVal); 1188 1189 // Z/valid is the same 1190 phiVal = ploVal; 1191 1192 const double vs = f16Unsign(prsVal.GetValidX(rsVal)) + prsVal.GetValidY(rsVal) * static_cast<double>(1 << 16); 1193 const double vt = f16Unsign(prtVal.GetValidX(rtVal)) + prtVal.GetValidY(rtVal) * static_cast<double>(1 << 16); 1194 1195 const double lo = vs / vt; 1196 ploVal.y = static_cast<float>(f16Sign(f16Overflow(lo))); 1197 ploVal.x = static_cast<float>(f16Sign(lo)); 1198 ploVal.flags |= VALID_TAINTED_Z | (prtVal.flags & VALID_XY); 1199 1200 const double hi = std::fmod(vs, vt); 1201 phiVal.y = static_cast<float>(f16Sign(f16Overflow(hi))); 1202 phiVal.x = static_cast<float>(f16Sign(hi)); 1203 phiVal.flags |= VALID_TAINTED_Z | (prtVal.flags & VALID_XY); 1204 1205 // compute PSX value 1206 if (static_cast<s32>(rtVal) == 0) 1207 { 1208 // divide by zero 1209 ploVal.value = (static_cast<s32>(rsVal) >= 0) ? UINT32_C(0xFFFFFFFF) : UINT32_C(1); 1210 phiVal.value = static_cast<u32>(static_cast<s32>(rsVal)); 1211 } 1212 else if (rsVal == UINT32_C(0x80000000) && static_cast<s32>(rtVal) == -1) 1213 { 1214 // unrepresentable 1215 ploVal.value = UINT32_C(0x80000000); 1216 phiVal.value = 0; 1217 } 1218 else 1219 { 1220 ploVal.value = static_cast<u32>(static_cast<s32>(rsVal) / static_cast<s32>(rtVal)); 1221 phiVal.value = static_cast<u32>(static_cast<s32>(rsVal) % static_cast<s32>(rtVal)); 1222 } 1223 } 1224 1225 void CPU::PGXP::CPU_DIVU(Instruction instr, u32 rsVal, u32 rtVal) 1226 { 1227 LOG_VALUES_C2(instr.r.rs.GetValue(), rsVal, instr.r.rt.GetValue(), rtVal); 1228 1229 // Lo = Rs / Rt (unsigned) 1230 // Hi = Rs % Rt (unsigned) 1231 PGXPValue& prsVal = ValidateAndGetRsValue(instr, rsVal); 1232 PGXPValue& prtVal = ValidateAndGetRtValue(instr, rtVal); 1233 1234 PGXPValue& ploVal = g_state.pgxp_gpr[static_cast<u8>(Reg::lo)]; 1235 PGXPValue& phiVal = g_state.pgxp_gpr[static_cast<u8>(Reg::hi)]; 1236 ploVal = prsVal; 1237 CopyZIfMissing(ploVal, prsVal); 1238 1239 // Z/valid is the same 1240 phiVal = ploVal; 1241 1242 const double vs = 1243 f16Unsign(prsVal.GetValidX(rsVal)) + f16Unsign(prsVal.GetValidY(rsVal)) * static_cast<double>(1 << 16); 1244 const double vt = 1245 f16Unsign(prtVal.GetValidX(rtVal)) + f16Unsign(prtVal.GetValidY(rtVal)) * static_cast<double>(1 << 16); 1246 1247 const double lo = vs / vt; 1248 ploVal.y = static_cast<float>(f16Sign(f16Overflow(lo))); 1249 ploVal.x = static_cast<float>(f16Sign(lo)); 1250 ploVal.flags |= VALID_TAINTED_Z | (prtVal.flags & VALID_XY); 1251 1252 const double hi = std::fmod(vs, vt); 1253 phiVal.y = static_cast<float>(f16Sign(f16Overflow(hi))); 1254 phiVal.x = static_cast<float>(f16Sign(hi)); 1255 phiVal.flags |= VALID_TAINTED_Z | (prtVal.flags & VALID_XY); 1256 1257 if (rtVal == 0) 1258 { 1259 // divide by zero 1260 ploVal.value = UINT32_C(0xFFFFFFFF); 1261 phiVal.value = rsVal; 1262 } 1263 else 1264 { 1265 ploVal.value = rsVal / rtVal; 1266 phiVal.value = rsVal % rtVal; 1267 } 1268 } 1269 1270 ALWAYS_INLINE_RELEASE void CPU::PGXP::CPU_SLL(Instruction instr, u32 rtVal, u32 sh) 1271 { 1272 const u32 rdVal = rtVal << sh; 1273 PGXPValue& prtVal = ValidateAndGetRtValue(instr, rtVal); 1274 PGXPValue& prdVal = GetRdValue(instr); 1275 prdVal.z = prtVal.z; 1276 prdVal.value = rdVal; 1277 1278 if (sh >= 32) [[unlikely]] 1279 { 1280 prdVal.x = 0.0f; 1281 prdVal.y = 0.0f; 1282 prdVal.flags = prtVal.flags | VALID_XY | VALID_TAINTED_Z; 1283 } 1284 else if (sh == 16) 1285 { 1286 prdVal.y = prtVal.x; 1287 prdVal.x = 0.0f; 1288 1289 // Only set valid X if there's also a valid Y. We could use GetValidX() to pull it from the low precision value 1290 // instead, need to investigate further. Spyro breaks if only X is set even if Y is not valid. 1291 // prdVal.flags = (prtVal.flags & ~VALID_Y) | ((prtVal.flags & VALID_X) << 1) | VALID_X | VALID_TAINTED_Z; 1292 prdVal.flags = (prtVal.flags | VALID_TAINTED_Z) | ((prtVal.flags & VALID_Y) >> 1); 1293 } 1294 else if (sh >= 16) 1295 { 1296 prdVal.y = static_cast<float>(f16Sign(f16Unsign(prtVal.x * static_cast<double>(1 << (sh - 16))))); 1297 prdVal.x = 0.0f; 1298 1299 // See above. 1300 // prdVal.flags = (prtVal.flags & ~VALID_Y) | ((prtVal.flags & VALID_X) << 1) | VALID_X | VALID_TAINTED_Z; 1301 prdVal.flags = (prtVal.flags | VALID_TAINTED_Z) | ((prtVal.flags & VALID_Y) >> 1); 1302 } 1303 else 1304 { 1305 const double x = f16Unsign(prtVal.x) * static_cast<double>(1 << sh); 1306 const double y = (f16Unsign(prtVal.y) * static_cast<double>(1 << sh)) + f16Overflow(x); 1307 prdVal.x = static_cast<float>(f16Sign(x)); 1308 prdVal.y = static_cast<float>(f16Sign(y)); 1309 prdVal.flags = (prtVal.flags | VALID_TAINTED_Z); 1310 } 1311 } 1312 1313 void CPU::PGXP::CPU_SLL(Instruction instr, u32 rtVal) 1314 { 1315 LOG_VALUES_C1(instr.r.rt.GetValue(), rtVal); 1316 1317 // Rd = Rt << Sa 1318 const u32 sh = instr.r.shamt; 1319 CPU_SLL(instr, rtVal, sh); 1320 } 1321 1322 void CPU::PGXP::CPU_SLLV(Instruction instr, u32 rtVal, u32 rsVal) 1323 { 1324 LOG_VALUES_C2(instr.r.rt.GetValue(), rtVal, instr.r.rs.GetValue(), rsVal); 1325 1326 // Rd = Rt << Rs 1327 const u32 sh = rsVal & 0x1F; 1328 CPU_SLL(instr, rtVal, sh); 1329 } 1330 1331 ALWAYS_INLINE_RELEASE void CPU::PGXP::CPU_SRx(Instruction instr, u32 rtVal, u32 sh, bool sign, bool is_variable) 1332 { 1333 const u32 rdVal = sign ? static_cast<u32>(static_cast<s32>(rtVal) >> sh) : (rtVal >> sh); 1334 PGXPValue& prtVal = ValidateAndGetRtValue(instr, rtVal); 1335 1336 double x = prtVal.x; 1337 double y = sign ? prtVal.y : f16Unsign(prtVal.y); 1338 1339 const u32 iX = SignExtend32(LOWORD_S16(rtVal)); // remove Y 1340 const u32 iY = SET_LOWORD(rtVal, HIWORD_U16(iX)); // overwrite x with sign(x) 1341 1342 // Shift test values 1343 const u32 dX = static_cast<u32>(static_cast<s32>(iX) >> sh); 1344 const u32 dY = sign ? static_cast<u32>(static_cast<s32>(iY) >> sh) : (iY >> sh); 1345 1346 if (LOWORD_S16(dX) != HIWORD_S16(iX)) 1347 x = x / static_cast<double>(1 << sh); 1348 else 1349 x = LOWORD_S16(dX); // only sign bits left 1350 1351 if (LOWORD_S16(dY) != HIWORD_S16(iX)) 1352 { 1353 if (sh == 16) 1354 { 1355 x = y; 1356 } 1357 else if (sh < 16) 1358 { 1359 x += y * static_cast<double>(1 << (16 - sh)); 1360 if (prtVal.x < 0) 1361 x += static_cast<double>(1 << (16 - sh)); 1362 } 1363 else 1364 { 1365 x += y / static_cast<double>(1 << (sh - 16)); 1366 } 1367 } 1368 1369 if ((HIWORD_S16(dY) == 0) || (HIWORD_S16(dY) == -1)) 1370 y = HIWORD_S16(dY); 1371 else 1372 y = y / static_cast<double>(1 << sh); 1373 1374 PGXPValue& prdVal = GetRdValue(instr); 1375 1376 // Use low precision/rounded values when we're not shifting an entire component, 1377 // and it's not originally from a 3D value. Too many false positives in P2/etc. 1378 // What we probably should do is not set the valid flag on non-3D values to begin 1379 // with, only letting them become valid when used in another expression. 1380 if (sign && !is_variable && !(prtVal.flags & VALID_Z) && sh < 16) 1381 { 1382 prdVal.x = static_cast<float>(LOWORD_S16(rdVal)); 1383 prdVal.y = static_cast<float>(HIWORD_S16(rdVal)); 1384 prdVal.z = 0.0f; 1385 prdVal.value = rdVal; 1386 prdVal.flags = VALID_XY | VALID_TAINTED_Z; 1387 } 1388 else 1389 { 1390 prdVal.x = static_cast<float>(f16Sign(x)); 1391 prdVal.y = static_cast<float>(f16Sign(y)); 1392 prdVal.z = prtVal.z; 1393 prdVal.value = rdVal; 1394 prdVal.flags = prtVal.flags | VALID_TAINTED_Z; 1395 } 1396 } 1397 1398 void CPU::PGXP::CPU_SRL(Instruction instr, u32 rtVal) 1399 { 1400 LOG_VALUES_C1(instr.r.rt.GetValue(), rtVal); 1401 1402 // Rd = Rt >> Sa 1403 const u32 sh = instr.r.shamt; 1404 CPU_SRx(instr, rtVal, sh, false, false); 1405 } 1406 1407 void CPU::PGXP::CPU_SRLV(Instruction instr, u32 rtVal, u32 rsVal) 1408 { 1409 LOG_VALUES_C2(instr.r.rt.GetValue(), rtVal, instr.r.rs.GetValue(), rsVal); 1410 1411 // Rd = Rt >> Sa 1412 const u32 sh = rsVal & 0x1F; 1413 CPU_SRx(instr, rtVal, sh, false, true); 1414 } 1415 1416 void CPU::PGXP::CPU_SRA(Instruction instr, u32 rtVal) 1417 { 1418 LOG_VALUES_C1(instr.r.rt.GetValue(), rtVal); 1419 1420 // Rd = Rt >> Sa 1421 const u32 sh = instr.r.shamt; 1422 CPU_SRx(instr, rtVal, sh, true, false); 1423 } 1424 1425 void CPU::PGXP::CPU_SRAV(Instruction instr, u32 rtVal, u32 rsVal) 1426 { 1427 LOG_VALUES_C2(instr.r.rt.GetValue(), rtVal, instr.r.rs.GetValue(), rsVal); 1428 1429 // Rd = Rt >> Sa 1430 const u32 sh = rsVal & 0x1F; 1431 CPU_SRx(instr, rtVal, sh, true, true); 1432 } 1433 1434 void CPU::PGXP::CPU_MFC0(Instruction instr, u32 rdVal) 1435 { 1436 const u32 idx = static_cast<u8>(instr.r.rd.GetValue()); 1437 LOG_VALUES_1(TinyString::from_format("cop0_{}", idx).c_str(), rdVal, &g_state.pgxp_cop0[idx]); 1438 1439 // CPU[Rt] = CP0[Rd] 1440 PGXPValue& prdVal = g_state.pgxp_cop0[idx]; 1441 prdVal.Validate(rdVal); 1442 1443 PGXPValue& prtVal = GetRtValue(instr); 1444 prtVal = prdVal; 1445 prtVal.value = rdVal; 1446 } 1447 1448 void CPU::PGXP::CPU_MTC0(Instruction instr, u32 rdVal, u32 rtVal) 1449 { 1450 LOG_VALUES_C1(instr.r.rt.GetValue(), rtVal); 1451 1452 // CP0[Rd] = CPU[Rt] 1453 PGXPValue& prtVal = ValidateAndGetRtValue(instr, rtVal); 1454 PGXPValue& prdVal = g_state.pgxp_cop0[static_cast<u8>(instr.r.rd.GetValue())]; 1455 prdVal = prtVal; 1456 prtVal.value = rdVal; 1457 }