gpu_sw_backend.cpp (33350B)
1 // SPDX-FileCopyrightText: 2019-2024 Connor McLaughlin <stenzek@gmail.com> 2 // SPDX-License-Identifier: (GPL-3.0 OR CC-BY-NC-ND-4.0) 3 4 #include "gpu_sw_backend.h" 5 #include "gpu.h" 6 #include "system.h" 7 8 #include "util/gpu_device.h" 9 10 #include <algorithm> 11 12 GPU_SW_Backend::GPU_SW_Backend() = default; 13 14 GPU_SW_Backend::~GPU_SW_Backend() = default; 15 16 bool GPU_SW_Backend::Initialize(bool force_thread) 17 { 18 return GPUBackend::Initialize(force_thread); 19 } 20 21 void GPU_SW_Backend::Reset() 22 { 23 GPUBackend::Reset(); 24 } 25 26 void GPU_SW_Backend::DrawPolygon(const GPUBackendDrawPolygonCommand* cmd) 27 { 28 const GPURenderCommand rc{cmd->rc.bits}; 29 const bool dithering_enable = rc.IsDitheringEnabled() && cmd->draw_mode.dither_enable; 30 31 const DrawTriangleFunction DrawFunction = GetDrawTriangleFunction( 32 rc.shading_enable, rc.texture_enable, rc.raw_texture_enable, rc.transparency_enable, dithering_enable); 33 34 (this->*DrawFunction)(cmd, &cmd->vertices[0], &cmd->vertices[1], &cmd->vertices[2]); 35 if (rc.quad_polygon) 36 (this->*DrawFunction)(cmd, &cmd->vertices[2], &cmd->vertices[1], &cmd->vertices[3]); 37 } 38 39 void GPU_SW_Backend::DrawRectangle(const GPUBackendDrawRectangleCommand* cmd) 40 { 41 const GPURenderCommand rc{cmd->rc.bits}; 42 43 const DrawRectangleFunction DrawFunction = 44 GetDrawRectangleFunction(rc.texture_enable, rc.raw_texture_enable, rc.transparency_enable); 45 46 (this->*DrawFunction)(cmd); 47 } 48 49 void GPU_SW_Backend::DrawLine(const GPUBackendDrawLineCommand* cmd) 50 { 51 const DrawLineFunction DrawFunction = 52 GetDrawLineFunction(cmd->rc.shading_enable, cmd->rc.transparency_enable, cmd->IsDitheringEnabled()); 53 54 for (u16 i = 1; i < cmd->num_vertices; i++) 55 (this->*DrawFunction)(cmd, &cmd->vertices[i - 1], &cmd->vertices[i]); 56 } 57 58 constexpr GPU_SW_Backend::DitherLUT GPU_SW_Backend::ComputeDitherLUT() 59 { 60 DitherLUT lut = {}; 61 for (u32 i = 0; i < DITHER_MATRIX_SIZE; i++) 62 { 63 for (u32 j = 0; j < DITHER_MATRIX_SIZE; j++) 64 { 65 for (u32 value = 0; value < DITHER_LUT_SIZE; value++) 66 { 67 const s32 dithered_value = (static_cast<s32>(value) + DITHER_MATRIX[i][j]) >> 3; 68 lut[i][j][value] = static_cast<u8>((dithered_value < 0) ? 0 : ((dithered_value > 31) ? 31 : dithered_value)); 69 } 70 } 71 } 72 return lut; 73 } 74 75 static constexpr GPU_SW_Backend::DitherLUT s_dither_lut = GPU_SW_Backend::ComputeDitherLUT(); 76 77 template<bool texture_enable, bool raw_texture_enable, bool transparency_enable, bool dithering_enable> 78 void ALWAYS_INLINE_RELEASE GPU_SW_Backend::ShadePixel(const GPUBackendDrawCommand* cmd, u32 x, u32 y, u8 color_r, 79 u8 color_g, u8 color_b, u8 texcoord_x, u8 texcoord_y) 80 { 81 VRAMPixel color; 82 if constexpr (texture_enable) 83 { 84 // Apply texture window 85 texcoord_x = (texcoord_x & cmd->window.and_x) | cmd->window.or_x; 86 texcoord_y = (texcoord_y & cmd->window.and_y) | cmd->window.or_y; 87 88 VRAMPixel texture_color; 89 switch (cmd->draw_mode.texture_mode) 90 { 91 case GPUTextureMode::Palette4Bit: 92 { 93 const u16 palette_value = 94 GetPixel((cmd->draw_mode.GetTexturePageBaseX() + ZeroExtend32(texcoord_x / 4)) % VRAM_WIDTH, 95 (cmd->draw_mode.GetTexturePageBaseY() + ZeroExtend32(texcoord_y)) % VRAM_HEIGHT); 96 const size_t palette_index = (palette_value >> ((texcoord_x % 4) * 4)) & 0x0Fu; 97 texture_color.bits = g_gpu_clut[palette_index]; 98 } 99 break; 100 101 case GPUTextureMode::Palette8Bit: 102 { 103 const u16 palette_value = 104 GetPixel((cmd->draw_mode.GetTexturePageBaseX() + ZeroExtend32(texcoord_x / 2)) % VRAM_WIDTH, 105 (cmd->draw_mode.GetTexturePageBaseY() + ZeroExtend32(texcoord_y)) % VRAM_HEIGHT); 106 const size_t palette_index = (palette_value >> ((texcoord_x % 2) * 8)) & 0xFFu; 107 texture_color.bits = g_gpu_clut[palette_index]; 108 } 109 break; 110 111 default: 112 { 113 texture_color.bits = GetPixel((cmd->draw_mode.GetTexturePageBaseX() + ZeroExtend32(texcoord_x)) % VRAM_WIDTH, 114 (cmd->draw_mode.GetTexturePageBaseY() + ZeroExtend32(texcoord_y)) % VRAM_HEIGHT); 115 } 116 break; 117 } 118 119 if (texture_color.bits == 0) 120 return; 121 122 if constexpr (raw_texture_enable) 123 { 124 color.bits = texture_color.bits; 125 } 126 else 127 { 128 const u32 dither_y = (dithering_enable) ? (y & 3u) : 2u; 129 const u32 dither_x = (dithering_enable) ? (x & 3u) : 3u; 130 131 color.bits = (ZeroExtend16(s_dither_lut[dither_y][dither_x][(u16(texture_color.r) * u16(color_r)) >> 4]) << 0) | 132 (ZeroExtend16(s_dither_lut[dither_y][dither_x][(u16(texture_color.g) * u16(color_g)) >> 4]) << 5) | 133 (ZeroExtend16(s_dither_lut[dither_y][dither_x][(u16(texture_color.b) * u16(color_b)) >> 4]) << 10) | 134 (texture_color.bits & 0x8000u); 135 } 136 } 137 else 138 { 139 const u32 dither_y = (dithering_enable) ? (y & 3u) : 2u; 140 const u32 dither_x = (dithering_enable) ? (x & 3u) : 3u; 141 142 // Non-textured transparent polygons don't set bit 15, but are treated as transparent. 143 color.bits = (ZeroExtend16(s_dither_lut[dither_y][dither_x][color_r]) << 0) | 144 (ZeroExtend16(s_dither_lut[dither_y][dither_x][color_g]) << 5) | 145 (ZeroExtend16(s_dither_lut[dither_y][dither_x][color_b]) << 10) | (transparency_enable ? 0x8000u : 0); 146 } 147 148 const VRAMPixel bg_color{GetPixel(static_cast<u32>(x), static_cast<u32>(y))}; 149 if constexpr (transparency_enable) 150 { 151 if (color.bits & 0x8000u || !texture_enable) 152 { 153 // Based on blargg's efficient 15bpp pixel math. 154 u32 bg_bits = ZeroExtend32(bg_color.bits); 155 u32 fg_bits = ZeroExtend32(color.bits); 156 switch (cmd->draw_mode.transparency_mode) 157 { 158 case GPUTransparencyMode::HalfBackgroundPlusHalfForeground: 159 { 160 bg_bits |= 0x8000u; 161 color.bits = Truncate16(((fg_bits + bg_bits) - ((fg_bits ^ bg_bits) & 0x0421u)) >> 1); 162 } 163 break; 164 165 case GPUTransparencyMode::BackgroundPlusForeground: 166 { 167 bg_bits &= ~0x8000u; 168 169 const u32 sum = fg_bits + bg_bits; 170 const u32 carry = (sum - ((fg_bits ^ bg_bits) & 0x8421u)) & 0x8420u; 171 172 color.bits = Truncate16((sum - carry) | (carry - (carry >> 5))); 173 } 174 break; 175 176 case GPUTransparencyMode::BackgroundMinusForeground: 177 { 178 bg_bits |= 0x8000u; 179 fg_bits &= ~0x8000u; 180 181 const u32 diff = bg_bits - fg_bits + 0x108420u; 182 const u32 borrow = (diff - ((bg_bits ^ fg_bits) & 0x108420u)) & 0x108420u; 183 184 color.bits = Truncate16((diff - borrow) & (borrow - (borrow >> 5))); 185 } 186 break; 187 188 case GPUTransparencyMode::BackgroundPlusQuarterForeground: 189 { 190 bg_bits &= ~0x8000u; 191 fg_bits = ((fg_bits >> 2) & 0x1CE7u) | 0x8000u; 192 193 const u32 sum = fg_bits + bg_bits; 194 const u32 carry = (sum - ((fg_bits ^ bg_bits) & 0x8421u)) & 0x8420u; 195 196 color.bits = Truncate16((sum - carry) | (carry - (carry >> 5))); 197 } 198 break; 199 200 default: 201 break; 202 } 203 204 // See above. 205 if constexpr (!texture_enable) 206 color.bits &= ~0x8000u; 207 } 208 } 209 210 const u16 mask_and = cmd->params.GetMaskAND(); 211 if ((bg_color.bits & mask_and) != 0) 212 return; 213 214 DebugAssert(static_cast<u32>(x) < VRAM_WIDTH && static_cast<u32>(y) < VRAM_HEIGHT); 215 SetPixel(static_cast<u32>(x), static_cast<u32>(y), color.bits | cmd->params.GetMaskOR()); 216 } 217 218 template<bool texture_enable, bool raw_texture_enable, bool transparency_enable> 219 void GPU_SW_Backend::DrawRectangle(const GPUBackendDrawRectangleCommand* cmd) 220 { 221 const s32 origin_x = cmd->x; 222 const s32 origin_y = cmd->y; 223 const auto [r, g, b] = UnpackColorRGB24(cmd->color); 224 const auto [origin_texcoord_x, origin_texcoord_y] = UnpackTexcoord(cmd->texcoord); 225 226 for (u32 offset_y = 0; offset_y < cmd->height; offset_y++) 227 { 228 const s32 y = origin_y + static_cast<s32>(offset_y); 229 if (y < static_cast<s32>(m_drawing_area.top) || y > static_cast<s32>(m_drawing_area.bottom) || 230 (cmd->params.interlaced_rendering && cmd->params.active_line_lsb == (Truncate8(static_cast<u32>(y)) & 1u))) 231 { 232 continue; 233 } 234 235 const u32 draw_y = static_cast<u32>(y) & VRAM_HEIGHT_MASK; 236 const u8 texcoord_y = Truncate8(ZeroExtend32(origin_texcoord_y) + offset_y); 237 238 for (u32 offset_x = 0; offset_x < cmd->width; offset_x++) 239 { 240 const s32 x = origin_x + static_cast<s32>(offset_x); 241 if (x < static_cast<s32>(m_drawing_area.left) || x > static_cast<s32>(m_drawing_area.right)) 242 continue; 243 244 const u8 texcoord_x = Truncate8(ZeroExtend32(origin_texcoord_x) + offset_x); 245 246 ShadePixel<texture_enable, raw_texture_enable, transparency_enable, false>(cmd, static_cast<u32>(x), draw_y, r, g, 247 b, texcoord_x, texcoord_y); 248 } 249 } 250 } 251 252 ////////////////////////////////////////////////////////////////////////// 253 // Polygon and line rasterization ported from Mednafen 254 ////////////////////////////////////////////////////////////////////////// 255 256 #define COORD_FBS 12 257 #define COORD_MF_INT(n) ((n) << COORD_FBS) 258 #define COORD_POST_PADDING 12 259 260 static ALWAYS_INLINE_RELEASE s64 MakePolyXFP(s32 x) 261 { 262 return ((u64)x << 32) + ((1ULL << 32) - (1 << 11)); 263 } 264 265 static ALWAYS_INLINE_RELEASE s64 MakePolyXFPStep(s32 dx, s32 dy) 266 { 267 s64 ret; 268 s64 dx_ex = (u64)dx << 32; 269 270 if (dx_ex < 0) 271 dx_ex -= dy - 1; 272 273 if (dx_ex > 0) 274 dx_ex += dy - 1; 275 276 ret = dx_ex / dy; 277 278 return (ret); 279 } 280 281 static ALWAYS_INLINE_RELEASE s32 GetPolyXFP_Int(s64 xfp) 282 { 283 return (xfp >> 32); 284 } 285 286 template<bool shading_enable, bool texture_enable> 287 bool ALWAYS_INLINE_RELEASE GPU_SW_Backend::CalcIDeltas(i_deltas& idl, const GPUBackendDrawPolygonCommand::Vertex* A, 288 const GPUBackendDrawPolygonCommand::Vertex* B, 289 const GPUBackendDrawPolygonCommand::Vertex* C) 290 { 291 #define CALCIS(x, y) (((B->x - A->x) * (C->y - B->y)) - ((C->x - B->x) * (B->y - A->y))) 292 293 s32 denom = CALCIS(x, y); 294 295 if (!denom) 296 return false; 297 298 if constexpr (shading_enable) 299 { 300 idl.dr_dx = (u32)(CALCIS(r, y) * (1 << COORD_FBS) / denom) << COORD_POST_PADDING; 301 idl.dr_dy = (u32)(CALCIS(x, r) * (1 << COORD_FBS) / denom) << COORD_POST_PADDING; 302 303 idl.dg_dx = (u32)(CALCIS(g, y) * (1 << COORD_FBS) / denom) << COORD_POST_PADDING; 304 idl.dg_dy = (u32)(CALCIS(x, g) * (1 << COORD_FBS) / denom) << COORD_POST_PADDING; 305 306 idl.db_dx = (u32)(CALCIS(b, y) * (1 << COORD_FBS) / denom) << COORD_POST_PADDING; 307 idl.db_dy = (u32)(CALCIS(x, b) * (1 << COORD_FBS) / denom) << COORD_POST_PADDING; 308 } 309 310 if constexpr (texture_enable) 311 { 312 idl.du_dx = (u32)(CALCIS(u, y) * (1 << COORD_FBS) / denom) << COORD_POST_PADDING; 313 idl.du_dy = (u32)(CALCIS(x, u) * (1 << COORD_FBS) / denom) << COORD_POST_PADDING; 314 315 idl.dv_dx = (u32)(CALCIS(v, y) * (1 << COORD_FBS) / denom) << COORD_POST_PADDING; 316 idl.dv_dy = (u32)(CALCIS(x, v) * (1 << COORD_FBS) / denom) << COORD_POST_PADDING; 317 } 318 319 return true; 320 321 #undef CALCIS 322 } 323 324 template<bool shading_enable, bool texture_enable> 325 void ALWAYS_INLINE_RELEASE GPU_SW_Backend::AddIDeltas_DX(i_group& ig, const i_deltas& idl, u32 count /*= 1*/) 326 { 327 if constexpr (shading_enable) 328 { 329 ig.r += idl.dr_dx * count; 330 ig.g += idl.dg_dx * count; 331 ig.b += idl.db_dx * count; 332 } 333 334 if constexpr (texture_enable) 335 { 336 ig.u += idl.du_dx * count; 337 ig.v += idl.dv_dx * count; 338 } 339 } 340 341 template<bool shading_enable, bool texture_enable> 342 void ALWAYS_INLINE_RELEASE GPU_SW_Backend::AddIDeltas_DY(i_group& ig, const i_deltas& idl, u32 count /*= 1*/) 343 { 344 if constexpr (shading_enable) 345 { 346 ig.r += idl.dr_dy * count; 347 ig.g += idl.dg_dy * count; 348 ig.b += idl.db_dy * count; 349 } 350 351 if constexpr (texture_enable) 352 { 353 ig.u += idl.du_dy * count; 354 ig.v += idl.dv_dy * count; 355 } 356 } 357 358 template<bool shading_enable, bool texture_enable, bool raw_texture_enable, bool transparency_enable, 359 bool dithering_enable> 360 void GPU_SW_Backend::DrawSpan(const GPUBackendDrawPolygonCommand* cmd, s32 y, s32 x_start, s32 x_bound, i_group ig, 361 const i_deltas& idl) 362 { 363 if (cmd->params.interlaced_rendering && cmd->params.active_line_lsb == (Truncate8(static_cast<u32>(y)) & 1u)) 364 return; 365 366 s32 x_ig_adjust = x_start; 367 s32 w = x_bound - x_start; 368 s32 x = TruncateGPUVertexPosition(x_start); 369 370 if (x < static_cast<s32>(m_drawing_area.left)) 371 { 372 s32 delta = static_cast<s32>(m_drawing_area.left) - x; 373 x_ig_adjust += delta; 374 x += delta; 375 w -= delta; 376 } 377 378 if ((x + w) > (static_cast<s32>(m_drawing_area.right) + 1)) 379 w = static_cast<s32>(m_drawing_area.right) + 1 - x; 380 381 if (w <= 0) 382 return; 383 384 AddIDeltas_DX<shading_enable, texture_enable>(ig, idl, x_ig_adjust); 385 AddIDeltas_DY<shading_enable, texture_enable>(ig, idl, y); 386 387 do 388 { 389 const u32 r = ig.r >> (COORD_FBS + COORD_POST_PADDING); 390 const u32 g = ig.g >> (COORD_FBS + COORD_POST_PADDING); 391 const u32 b = ig.b >> (COORD_FBS + COORD_POST_PADDING); 392 const u32 u = ig.u >> (COORD_FBS + COORD_POST_PADDING); 393 const u32 v = ig.v >> (COORD_FBS + COORD_POST_PADDING); 394 395 ShadePixel<texture_enable, raw_texture_enable, transparency_enable, dithering_enable>( 396 cmd, static_cast<u32>(x), static_cast<u32>(y), Truncate8(r), Truncate8(g), Truncate8(b), Truncate8(u), 397 Truncate8(v)); 398 399 x++; 400 AddIDeltas_DX<shading_enable, texture_enable>(ig, idl); 401 } while (--w > 0); 402 } 403 404 template<bool shading_enable, bool texture_enable, bool raw_texture_enable, bool transparency_enable, 405 bool dithering_enable> 406 void GPU_SW_Backend::DrawTriangle(const GPUBackendDrawPolygonCommand* cmd, 407 const GPUBackendDrawPolygonCommand::Vertex* v0, 408 const GPUBackendDrawPolygonCommand::Vertex* v1, 409 const GPUBackendDrawPolygonCommand::Vertex* v2) 410 { 411 u32 core_vertex; 412 { 413 u32 cvtemp = 0; 414 415 if (v1->x <= v0->x) 416 { 417 if (v2->x <= v1->x) 418 cvtemp = (1 << 2); 419 else 420 cvtemp = (1 << 1); 421 } 422 else if (v2->x < v0->x) 423 cvtemp = (1 << 2); 424 else 425 cvtemp = (1 << 0); 426 427 if (v2->y < v1->y) 428 { 429 std::swap(v2, v1); 430 cvtemp = ((cvtemp >> 1) & 0x2) | ((cvtemp << 1) & 0x4) | (cvtemp & 0x1); 431 } 432 433 if (v1->y < v0->y) 434 { 435 std::swap(v1, v0); 436 cvtemp = ((cvtemp >> 1) & 0x1) | ((cvtemp << 1) & 0x2) | (cvtemp & 0x4); 437 } 438 439 if (v2->y < v1->y) 440 { 441 std::swap(v2, v1); 442 cvtemp = ((cvtemp >> 1) & 0x2) | ((cvtemp << 1) & 0x4) | (cvtemp & 0x1); 443 } 444 445 core_vertex = cvtemp >> 1; 446 } 447 448 if (v0->y == v2->y) 449 return; 450 451 if (static_cast<u32>(std::abs(v2->x - v0->x)) >= MAX_PRIMITIVE_WIDTH || 452 static_cast<u32>(std::abs(v2->x - v1->x)) >= MAX_PRIMITIVE_WIDTH || 453 static_cast<u32>(std::abs(v1->x - v0->x)) >= MAX_PRIMITIVE_WIDTH || 454 static_cast<u32>(v2->y - v0->y) >= MAX_PRIMITIVE_HEIGHT) 455 { 456 return; 457 } 458 459 s64 base_coord = MakePolyXFP(v0->x); 460 s64 base_step = MakePolyXFPStep((v2->x - v0->x), (v2->y - v0->y)); 461 s64 bound_coord_us; 462 s64 bound_coord_ls; 463 bool right_facing; 464 465 if (v1->y == v0->y) 466 { 467 bound_coord_us = 0; 468 right_facing = (bool)(v1->x > v0->x); 469 } 470 else 471 { 472 bound_coord_us = MakePolyXFPStep((v1->x - v0->x), (v1->y - v0->y)); 473 right_facing = (bool)(bound_coord_us > base_step); 474 } 475 476 if (v2->y == v1->y) 477 bound_coord_ls = 0; 478 else 479 bound_coord_ls = MakePolyXFPStep((v2->x - v1->x), (v2->y - v1->y)); 480 481 i_deltas idl; 482 if (!CalcIDeltas<shading_enable, texture_enable>(idl, v0, v1, v2)) 483 return; 484 485 const GPUBackendDrawPolygonCommand::Vertex* vertices[3] = {v0, v1, v2}; 486 487 i_group ig; 488 if constexpr (texture_enable) 489 { 490 ig.u = (COORD_MF_INT(vertices[core_vertex]->u) + (1 << (COORD_FBS - 1))) << COORD_POST_PADDING; 491 ig.v = (COORD_MF_INT(vertices[core_vertex]->v) + (1 << (COORD_FBS - 1))) << COORD_POST_PADDING; 492 } 493 494 ig.r = (COORD_MF_INT(vertices[core_vertex]->r) + (1 << (COORD_FBS - 1))) << COORD_POST_PADDING; 495 ig.g = (COORD_MF_INT(vertices[core_vertex]->g) + (1 << (COORD_FBS - 1))) << COORD_POST_PADDING; 496 ig.b = (COORD_MF_INT(vertices[core_vertex]->b) + (1 << (COORD_FBS - 1))) << COORD_POST_PADDING; 497 498 AddIDeltas_DX<shading_enable, texture_enable>(ig, idl, -vertices[core_vertex]->x); 499 AddIDeltas_DY<shading_enable, texture_enable>(ig, idl, -vertices[core_vertex]->y); 500 501 struct TriangleHalf 502 { 503 u64 x_coord[2]; 504 u64 x_step[2]; 505 506 s32 y_coord; 507 s32 y_bound; 508 509 bool dec_mode; 510 } tripart[2]; 511 512 u32 vo = 0; 513 u32 vp = 0; 514 if (core_vertex != 0) 515 vo = 1; 516 if (core_vertex == 2) 517 vp = 3; 518 519 { 520 TriangleHalf* tp = &tripart[vo]; 521 tp->y_coord = vertices[0 ^ vo]->y; 522 tp->y_bound = vertices[1 ^ vo]->y; 523 tp->x_coord[right_facing] = MakePolyXFP(vertices[0 ^ vo]->x); 524 tp->x_step[right_facing] = bound_coord_us; 525 tp->x_coord[!right_facing] = base_coord + ((vertices[vo]->y - vertices[0]->y) * base_step); 526 tp->x_step[!right_facing] = base_step; 527 tp->dec_mode = vo; 528 } 529 530 { 531 TriangleHalf* tp = &tripart[vo ^ 1]; 532 tp->y_coord = vertices[1 ^ vp]->y; 533 tp->y_bound = vertices[2 ^ vp]->y; 534 tp->x_coord[right_facing] = MakePolyXFP(vertices[1 ^ vp]->x); 535 tp->x_step[right_facing] = bound_coord_ls; 536 tp->x_coord[!right_facing] = 537 base_coord + ((vertices[1 ^ vp]->y - vertices[0]->y) * 538 base_step); // base_coord + ((vertices[1].y - vertices[0].y) * base_step); 539 tp->x_step[!right_facing] = base_step; 540 tp->dec_mode = vp; 541 } 542 543 for (u32 i = 0; i < 2; i++) 544 { 545 s32 yi = tripart[i].y_coord; 546 s32 yb = tripart[i].y_bound; 547 548 u64 lc = tripart[i].x_coord[0]; 549 u64 ls = tripart[i].x_step[0]; 550 551 u64 rc = tripart[i].x_coord[1]; 552 u64 rs = tripart[i].x_step[1]; 553 554 if (tripart[i].dec_mode) 555 { 556 while (yi > yb) 557 { 558 yi--; 559 lc -= ls; 560 rc -= rs; 561 562 s32 y = TruncateGPUVertexPosition(yi); 563 564 if (y < static_cast<s32>(m_drawing_area.top)) 565 break; 566 567 if (y > static_cast<s32>(m_drawing_area.bottom)) 568 continue; 569 570 DrawSpan<shading_enable, texture_enable, raw_texture_enable, transparency_enable, dithering_enable>( 571 cmd, y & VRAM_HEIGHT_MASK, GetPolyXFP_Int(lc), GetPolyXFP_Int(rc), ig, idl); 572 } 573 } 574 else 575 { 576 while (yi < yb) 577 { 578 s32 y = TruncateGPUVertexPosition(yi); 579 580 if (y > static_cast<s32>(m_drawing_area.bottom)) 581 break; 582 583 if (y >= static_cast<s32>(m_drawing_area.top)) 584 { 585 DrawSpan<shading_enable, texture_enable, raw_texture_enable, transparency_enable, dithering_enable>( 586 cmd, y & VRAM_HEIGHT_MASK, GetPolyXFP_Int(lc), GetPolyXFP_Int(rc), ig, idl); 587 } 588 589 yi++; 590 lc += ls; 591 rc += rs; 592 } 593 } 594 } 595 } 596 597 enum 598 { 599 Line_XY_FractBits = 32 600 }; 601 enum 602 { 603 Line_RGB_FractBits = 12 604 }; 605 606 struct line_fxp_coord 607 { 608 u64 x, y; 609 u32 r, g, b; 610 }; 611 612 struct line_fxp_step 613 { 614 s64 dx_dk, dy_dk; 615 s32 dr_dk, dg_dk, db_dk; 616 }; 617 618 static ALWAYS_INLINE_RELEASE s64 LineDivide(s64 delta, s32 dk) 619 { 620 delta = (u64)delta << Line_XY_FractBits; 621 622 if (delta < 0) 623 delta -= dk - 1; 624 if (delta > 0) 625 delta += dk - 1; 626 627 return (delta / dk); 628 } 629 630 template<bool shading_enable, bool transparency_enable, bool dithering_enable> 631 void GPU_SW_Backend::DrawLine(const GPUBackendDrawLineCommand* cmd, const GPUBackendDrawLineCommand::Vertex* p0, 632 const GPUBackendDrawLineCommand::Vertex* p1) 633 { 634 const s32 i_dx = std::abs(p1->x - p0->x); 635 const s32 i_dy = std::abs(p1->y - p0->y); 636 const s32 k = (i_dx > i_dy) ? i_dx : i_dy; 637 if (i_dx >= MAX_PRIMITIVE_WIDTH || i_dy >= MAX_PRIMITIVE_HEIGHT) 638 return; 639 640 if (p0->x >= p1->x && k > 0) 641 std::swap(p0, p1); 642 643 line_fxp_step step; 644 if (k == 0) 645 { 646 step.dx_dk = 0; 647 step.dy_dk = 0; 648 649 if constexpr (shading_enable) 650 { 651 step.dr_dk = 0; 652 step.dg_dk = 0; 653 step.db_dk = 0; 654 } 655 } 656 else 657 { 658 step.dx_dk = LineDivide(p1->x - p0->x, k); 659 step.dy_dk = LineDivide(p1->y - p0->y, k); 660 661 if constexpr (shading_enable) 662 { 663 step.dr_dk = (s32)((u32)(p1->r - p0->r) << Line_RGB_FractBits) / k; 664 step.dg_dk = (s32)((u32)(p1->g - p0->g) << Line_RGB_FractBits) / k; 665 step.db_dk = (s32)((u32)(p1->b - p0->b) << Line_RGB_FractBits) / k; 666 } 667 } 668 669 line_fxp_coord cur_point; 670 cur_point.x = ((u64)p0->x << Line_XY_FractBits) | (1ULL << (Line_XY_FractBits - 1)); 671 cur_point.y = ((u64)p0->y << Line_XY_FractBits) | (1ULL << (Line_XY_FractBits - 1)); 672 673 cur_point.x -= 1024; 674 675 if (step.dy_dk < 0) 676 cur_point.y -= 1024; 677 678 if constexpr (shading_enable) 679 { 680 cur_point.r = (p0->r << Line_RGB_FractBits) | (1 << (Line_RGB_FractBits - 1)); 681 cur_point.g = (p0->g << Line_RGB_FractBits) | (1 << (Line_RGB_FractBits - 1)); 682 cur_point.b = (p0->b << Line_RGB_FractBits) | (1 << (Line_RGB_FractBits - 1)); 683 } 684 685 for (s32 i = 0; i <= k; i++) 686 { 687 // Sign extension is not necessary here for x and y, due to the maximum values that ClipX1 and ClipY1 can contain. 688 const s32 x = (cur_point.x >> Line_XY_FractBits) & 2047; 689 const s32 y = (cur_point.y >> Line_XY_FractBits) & 2047; 690 691 if ((!cmd->params.interlaced_rendering || cmd->params.active_line_lsb != (Truncate8(static_cast<u32>(y)) & 1u)) && 692 x >= static_cast<s32>(m_drawing_area.left) && x <= static_cast<s32>(m_drawing_area.right) && 693 y >= static_cast<s32>(m_drawing_area.top) && y <= static_cast<s32>(m_drawing_area.bottom)) 694 { 695 const u8 r = shading_enable ? static_cast<u8>(cur_point.r >> Line_RGB_FractBits) : p0->r; 696 const u8 g = shading_enable ? static_cast<u8>(cur_point.g >> Line_RGB_FractBits) : p0->g; 697 const u8 b = shading_enable ? static_cast<u8>(cur_point.b >> Line_RGB_FractBits) : p0->b; 698 699 ShadePixel<false, false, transparency_enable, dithering_enable>( 700 cmd, static_cast<u32>(x), static_cast<u32>(y) & VRAM_HEIGHT_MASK, r, g, b, 0, 0); 701 } 702 703 cur_point.x += step.dx_dk; 704 cur_point.y += step.dy_dk; 705 706 if constexpr (shading_enable) 707 { 708 cur_point.r += step.dr_dk; 709 cur_point.g += step.dg_dk; 710 cur_point.b += step.db_dk; 711 } 712 } 713 } 714 715 void GPU_SW_Backend::FillVRAM(u32 x, u32 y, u32 width, u32 height, u32 color, GPUBackendCommandParameters params) 716 { 717 const u16 color16 = VRAMRGBA8888ToRGBA5551(color); 718 const GSVector4i fill = GSVector4i(color16, color16, color16, color16, color16, color16, color16, color16); 719 constexpr u32 vector_width = 8; 720 const u32 aligned_width = Common::AlignDownPow2(width, vector_width); 721 722 if ((x + width) <= VRAM_WIDTH && !params.interlaced_rendering) 723 { 724 for (u32 yoffs = 0; yoffs < height; yoffs++) 725 { 726 const u32 row = (y + yoffs) % VRAM_HEIGHT; 727 728 u16* row_ptr = &g_vram[row * VRAM_WIDTH + x]; 729 u32 xoffs = 0; 730 for (; xoffs < aligned_width; xoffs += vector_width, row_ptr += vector_width) 731 GSVector4i::store<false>(row_ptr, fill); 732 for (; xoffs < width; xoffs++) 733 *(row_ptr++) = color16; 734 } 735 } 736 else if (params.interlaced_rendering) 737 { 738 // Hardware tests show that fills seem to break on the first two lines when the offset matches the displayed field. 739 const u32 active_field = params.active_line_lsb; 740 741 if ((x + width) <= VRAM_WIDTH) 742 { 743 for (u32 yoffs = 0; yoffs < height; yoffs++) 744 { 745 const u32 row = (y + yoffs) % VRAM_HEIGHT; 746 if ((row & u32(1)) == active_field) 747 continue; 748 749 u16* row_ptr = &g_vram[row * VRAM_WIDTH + x]; 750 u32 xoffs = 0; 751 for (; xoffs < aligned_width; xoffs += vector_width, row_ptr += vector_width) 752 GSVector4i::store<false>(row_ptr, fill); 753 for (; xoffs < width; xoffs++) 754 *(row_ptr++) = color16; 755 } 756 } 757 else 758 { 759 for (u32 yoffs = 0; yoffs < height; yoffs++) 760 { 761 const u32 row = (y + yoffs) % VRAM_HEIGHT; 762 if ((row & u32(1)) == active_field) 763 continue; 764 765 u16* row_ptr = &g_vram[row * VRAM_WIDTH]; 766 for (u32 xoffs = 0; xoffs < width; xoffs++) 767 { 768 const u32 col = (x + xoffs) % VRAM_WIDTH; 769 row_ptr[col] = color16; 770 } 771 } 772 } 773 } 774 else 775 { 776 for (u32 yoffs = 0; yoffs < height; yoffs++) 777 { 778 const u32 row = (y + yoffs) % VRAM_HEIGHT; 779 u16* row_ptr = &g_vram[row * VRAM_WIDTH]; 780 for (u32 xoffs = 0; xoffs < width; xoffs++) 781 { 782 const u32 col = (x + xoffs) % VRAM_WIDTH; 783 row_ptr[col] = color16; 784 } 785 } 786 } 787 } 788 789 void GPU_SW_Backend::UpdateVRAM(u32 x, u32 y, u32 width, u32 height, const void* data, 790 GPUBackendCommandParameters params) 791 { 792 // Fast path when the copy is not oversized. 793 if ((x + width) <= VRAM_WIDTH && (y + height) <= VRAM_HEIGHT && !params.IsMaskingEnabled()) 794 { 795 const u16* src_ptr = static_cast<const u16*>(data); 796 u16* dst_ptr = &g_vram[y * VRAM_WIDTH + x]; 797 for (u32 yoffs = 0; yoffs < height; yoffs++) 798 { 799 std::copy_n(src_ptr, width, dst_ptr); 800 src_ptr += width; 801 dst_ptr += VRAM_WIDTH; 802 } 803 } 804 else 805 { 806 // Slow path when we need to handle wrap-around. 807 const u16* src_ptr = static_cast<const u16*>(data); 808 const u16 mask_and = params.GetMaskAND(); 809 const u16 mask_or = params.GetMaskOR(); 810 811 for (u32 row = 0; row < height;) 812 { 813 u16* dst_row_ptr = &g_vram[((y + row++) % VRAM_HEIGHT) * VRAM_WIDTH]; 814 for (u32 col = 0; col < width;) 815 { 816 // TODO: Handle unaligned reads... 817 u16* pixel_ptr = &dst_row_ptr[(x + col++) % VRAM_WIDTH]; 818 if (((*pixel_ptr) & mask_and) == 0) 819 *pixel_ptr = *(src_ptr++) | mask_or; 820 } 821 } 822 } 823 } 824 825 void GPU_SW_Backend::CopyVRAM(u32 src_x, u32 src_y, u32 dst_x, u32 dst_y, u32 width, u32 height, 826 GPUBackendCommandParameters params) 827 { 828 // Break up oversized copies. This behavior has not been verified on console. 829 if ((src_x + width) > VRAM_WIDTH || (dst_x + width) > VRAM_WIDTH) 830 { 831 u32 remaining_rows = height; 832 u32 current_src_y = src_y; 833 u32 current_dst_y = dst_y; 834 while (remaining_rows > 0) 835 { 836 const u32 rows_to_copy = 837 std::min<u32>(remaining_rows, std::min<u32>(VRAM_HEIGHT - current_src_y, VRAM_HEIGHT - current_dst_y)); 838 839 u32 remaining_columns = width; 840 u32 current_src_x = src_x; 841 u32 current_dst_x = dst_x; 842 while (remaining_columns > 0) 843 { 844 const u32 columns_to_copy = 845 std::min<u32>(remaining_columns, std::min<u32>(VRAM_WIDTH - current_src_x, VRAM_WIDTH - current_dst_x)); 846 CopyVRAM(current_src_x, current_src_y, current_dst_x, current_dst_y, columns_to_copy, rows_to_copy, params); 847 current_src_x = (current_src_x + columns_to_copy) % VRAM_WIDTH; 848 current_dst_x = (current_dst_x + columns_to_copy) % VRAM_WIDTH; 849 remaining_columns -= columns_to_copy; 850 } 851 852 current_src_y = (current_src_y + rows_to_copy) % VRAM_HEIGHT; 853 current_dst_y = (current_dst_y + rows_to_copy) % VRAM_HEIGHT; 854 remaining_rows -= rows_to_copy; 855 } 856 857 return; 858 } 859 860 // This doesn't have a fast path, but do we really need one? It's not common. 861 const u16 mask_and = params.GetMaskAND(); 862 const u16 mask_or = params.GetMaskOR(); 863 864 // Copy in reverse when src_x < dst_x, this is verified on console. 865 if (src_x < dst_x || ((src_x + width - 1) % VRAM_WIDTH) < ((dst_x + width - 1) % VRAM_WIDTH)) 866 { 867 for (u32 row = 0; row < height; row++) 868 { 869 const u16* src_row_ptr = &g_vram[((src_y + row) % VRAM_HEIGHT) * VRAM_WIDTH]; 870 u16* dst_row_ptr = &g_vram[((dst_y + row) % VRAM_HEIGHT) * VRAM_WIDTH]; 871 872 for (s32 col = static_cast<s32>(width - 1); col >= 0; col--) 873 { 874 const u16 src_pixel = src_row_ptr[(src_x + static_cast<u32>(col)) % VRAM_WIDTH]; 875 u16* dst_pixel_ptr = &dst_row_ptr[(dst_x + static_cast<u32>(col)) % VRAM_WIDTH]; 876 if ((*dst_pixel_ptr & mask_and) == 0) 877 *dst_pixel_ptr = src_pixel | mask_or; 878 } 879 } 880 } 881 else 882 { 883 for (u32 row = 0; row < height; row++) 884 { 885 const u16* src_row_ptr = &g_vram[((src_y + row) % VRAM_HEIGHT) * VRAM_WIDTH]; 886 u16* dst_row_ptr = &g_vram[((dst_y + row) % VRAM_HEIGHT) * VRAM_WIDTH]; 887 888 for (u32 col = 0; col < width; col++) 889 { 890 const u16 src_pixel = src_row_ptr[(src_x + col) % VRAM_WIDTH]; 891 u16* dst_pixel_ptr = &dst_row_ptr[(dst_x + col) % VRAM_WIDTH]; 892 if ((*dst_pixel_ptr & mask_and) == 0) 893 *dst_pixel_ptr = src_pixel | mask_or; 894 } 895 } 896 } 897 } 898 899 void GPU_SW_Backend::FlushRender() 900 { 901 } 902 903 void GPU_SW_Backend::DrawingAreaChanged() 904 { 905 } 906 907 void GPU_SW_Backend::UpdateCLUT(GPUTexturePaletteReg reg, bool clut_is_8bit) 908 { 909 GPU::ReadCLUT(g_gpu_clut, reg, clut_is_8bit); 910 } 911 912 GPU_SW_Backend::DrawLineFunction GPU_SW_Backend::GetDrawLineFunction(bool shading_enable, bool transparency_enable, 913 bool dithering_enable) 914 { 915 static constexpr DrawLineFunction funcs[2][2][2] = { 916 {{&GPU_SW_Backend::DrawLine<false, false, false>, &GPU_SW_Backend::DrawLine<false, false, true>}, 917 {&GPU_SW_Backend::DrawLine<false, true, false>, &GPU_SW_Backend::DrawLine<false, true, true>}}, 918 {{&GPU_SW_Backend::DrawLine<true, false, false>, &GPU_SW_Backend::DrawLine<true, false, true>}, 919 {&GPU_SW_Backend::DrawLine<true, true, false>, &GPU_SW_Backend::DrawLine<true, true, true>}}}; 920 921 return funcs[u8(shading_enable)][u8(transparency_enable)][u8(dithering_enable)]; 922 } 923 924 GPU_SW_Backend::DrawRectangleFunction 925 GPU_SW_Backend::GetDrawRectangleFunction(bool texture_enable, bool raw_texture_enable, bool transparency_enable) 926 { 927 static constexpr DrawRectangleFunction funcs[2][2][2] = { 928 {{&GPU_SW_Backend::DrawRectangle<false, false, false>, &GPU_SW_Backend::DrawRectangle<false, false, true>}, 929 {&GPU_SW_Backend::DrawRectangle<false, false, false>, &GPU_SW_Backend::DrawRectangle<false, false, true>}}, 930 {{&GPU_SW_Backend::DrawRectangle<true, false, false>, &GPU_SW_Backend::DrawRectangle<true, false, true>}, 931 {&GPU_SW_Backend::DrawRectangle<true, true, false>, &GPU_SW_Backend::DrawRectangle<true, true, true>}}}; 932 933 return funcs[u8(texture_enable)][u8(raw_texture_enable)][u8(transparency_enable)]; 934 } 935 936 GPU_SW_Backend::DrawTriangleFunction GPU_SW_Backend::GetDrawTriangleFunction(bool shading_enable, bool texture_enable, 937 bool raw_texture_enable, 938 bool transparency_enable, 939 bool dithering_enable) 940 { 941 static constexpr DrawTriangleFunction funcs[2][2][2][2][2] = { 942 {{{{&GPU_SW_Backend::DrawTriangle<false, false, false, false, false>, 943 &GPU_SW_Backend::DrawTriangle<false, false, false, false, true>}, 944 {&GPU_SW_Backend::DrawTriangle<false, false, false, true, false>, 945 &GPU_SW_Backend::DrawTriangle<false, false, false, true, true>}}, 946 {{&GPU_SW_Backend::DrawTriangle<false, false, false, false, false>, 947 &GPU_SW_Backend::DrawTriangle<false, false, false, false, false>}, 948 {&GPU_SW_Backend::DrawTriangle<false, false, false, true, false>, 949 &GPU_SW_Backend::DrawTriangle<false, false, false, true, false>}}}, 950 {{{&GPU_SW_Backend::DrawTriangle<false, true, false, false, false>, 951 &GPU_SW_Backend::DrawTriangle<false, true, false, false, true>}, 952 {&GPU_SW_Backend::DrawTriangle<false, true, false, true, false>, 953 &GPU_SW_Backend::DrawTriangle<false, true, false, true, true>}}, 954 {{&GPU_SW_Backend::DrawTriangle<false, true, true, false, false>, 955 &GPU_SW_Backend::DrawTriangle<false, true, true, false, false>}, 956 {&GPU_SW_Backend::DrawTriangle<false, true, true, true, false>, 957 &GPU_SW_Backend::DrawTriangle<false, true, true, true, false>}}}}, 958 {{{{&GPU_SW_Backend::DrawTriangle<true, false, false, false, false>, 959 &GPU_SW_Backend::DrawTriangle<true, false, false, false, true>}, 960 {&GPU_SW_Backend::DrawTriangle<true, false, false, true, false>, 961 &GPU_SW_Backend::DrawTriangle<true, false, false, true, true>}}, 962 {{&GPU_SW_Backend::DrawTriangle<true, false, false, false, false>, 963 &GPU_SW_Backend::DrawTriangle<true, false, false, false, false>}, 964 {&GPU_SW_Backend::DrawTriangle<true, false, false, true, false>, 965 &GPU_SW_Backend::DrawTriangle<true, false, false, true, false>}}}, 966 {{{&GPU_SW_Backend::DrawTriangle<true, true, false, false, false>, 967 &GPU_SW_Backend::DrawTriangle<true, true, false, false, true>}, 968 {&GPU_SW_Backend::DrawTriangle<true, true, false, true, false>, 969 &GPU_SW_Backend::DrawTriangle<true, true, false, true, true>}}, 970 {{&GPU_SW_Backend::DrawTriangle<true, true, true, false, false>, 971 &GPU_SW_Backend::DrawTriangle<true, true, true, false, false>}, 972 {&GPU_SW_Backend::DrawTriangle<true, true, true, true, false>, 973 &GPU_SW_Backend::DrawTriangle<true, true, true, true, false>}}}}}; 974 975 return funcs[u8(shading_enable)][u8(texture_enable)][u8(raw_texture_enable)][u8(transparency_enable)] 976 [u8(dithering_enable)]; 977 }