gpu_sw.cpp (28092B)
1 // SPDX-FileCopyrightText: 2019-2024 Connor McLaughlin <stenzek@gmail.com> 2 // SPDX-License-Identifier: (GPL-3.0 OR CC-BY-NC-ND-4.0) 3 4 #include "gpu_sw.h" 5 #include "system.h" 6 7 #include "util/gpu_device.h" 8 9 #include "common/align.h" 10 #include "common/assert.h" 11 #include "common/gsvector.h" 12 #include "common/gsvector_formatter.h" 13 #include "common/log.h" 14 15 #include <algorithm> 16 17 Log_SetChannel(GPU_SW); 18 19 GPU_SW::GPU_SW() = default; 20 21 GPU_SW::~GPU_SW() 22 { 23 g_gpu_device->RecycleTexture(std::move(m_upload_texture)); 24 m_backend.Shutdown(); 25 } 26 27 const Threading::Thread* GPU_SW::GetSWThread() const 28 { 29 return m_backend.GetThread(); 30 } 31 32 bool GPU_SW::IsHardwareRenderer() const 33 { 34 return false; 35 } 36 37 bool GPU_SW::Initialize() 38 { 39 if (!GPU::Initialize() || !m_backend.Initialize(false)) 40 return false; 41 42 static constexpr const std::array formats_for_16bit = {GPUTexture::Format::RGB565, GPUTexture::Format::RGBA5551, 43 GPUTexture::Format::RGBA8, GPUTexture::Format::BGRA8}; 44 static constexpr const std::array formats_for_24bit = {GPUTexture::Format::RGBA8, GPUTexture::Format::BGRA8, 45 GPUTexture::Format::RGB565, GPUTexture::Format::RGBA5551}; 46 for (const GPUTexture::Format format : formats_for_16bit) 47 { 48 if (g_gpu_device->SupportsTextureFormat(format)) 49 { 50 m_16bit_display_format = format; 51 break; 52 } 53 } 54 for (const GPUTexture::Format format : formats_for_24bit) 55 { 56 if (g_gpu_device->SupportsTextureFormat(format)) 57 { 58 m_24bit_display_format = format; 59 break; 60 } 61 } 62 63 return true; 64 } 65 66 bool GPU_SW::DoState(StateWrapper& sw, GPUTexture** host_texture, bool update_display) 67 { 68 // need to ensure the worker thread is done 69 m_backend.Sync(true); 70 71 // ignore the host texture for software mode, since we want to save vram here 72 return GPU::DoState(sw, nullptr, update_display); 73 } 74 75 void GPU_SW::Reset(bool clear_vram) 76 { 77 GPU::Reset(clear_vram); 78 79 m_backend.Reset(); 80 } 81 82 void GPU_SW::UpdateSettings(const Settings& old_settings) 83 { 84 GPU::UpdateSettings(old_settings); 85 m_backend.UpdateSettings(); 86 } 87 88 GPUTexture* GPU_SW::GetDisplayTexture(u32 width, u32 height, GPUTexture::Format format) 89 { 90 if (!m_upload_texture || m_upload_texture->GetWidth() != width || m_upload_texture->GetHeight() != height || 91 m_upload_texture->GetFormat() != format) 92 { 93 ClearDisplayTexture(); 94 g_gpu_device->RecycleTexture(std::move(m_upload_texture)); 95 m_upload_texture = 96 g_gpu_device->FetchTexture(width, height, 1, 1, 1, GPUTexture::Type::DynamicTexture, format, nullptr, 0); 97 if (!m_upload_texture) [[unlikely]] 98 ERROR_LOG("Failed to create {}x{} {} texture", width, height, static_cast<u32>(format)); 99 } 100 101 return m_upload_texture.get(); 102 } 103 104 template<GPUTexture::Format out_format, typename out_type> 105 static void CopyOutRow16(const u16* src_ptr, out_type* dst_ptr, u32 width); 106 107 template<GPUTexture::Format out_format, typename out_type> 108 static out_type VRAM16ToOutput(u16 value); 109 110 template<> 111 ALWAYS_INLINE u16 VRAM16ToOutput<GPUTexture::Format::RGBA5551, u16>(u16 value) 112 { 113 return (value & 0x3E0) | ((value >> 10) & 0x1F) | ((value & 0x1F) << 10); 114 } 115 116 template<> 117 ALWAYS_INLINE u16 VRAM16ToOutput<GPUTexture::Format::RGB565, u16>(u16 value) 118 { 119 return ((value & 0x3E0) << 1) | ((value & 0x20) << 1) | ((value >> 10) & 0x1F) | ((value & 0x1F) << 11); 120 } 121 122 template<> 123 ALWAYS_INLINE u32 VRAM16ToOutput<GPUTexture::Format::RGBA8, u32>(u16 value) 124 { 125 const u32 value32 = ZeroExtend32(value); 126 const u32 r = (value32 & 31u) << 3; 127 const u32 g = ((value32 >> 5) & 31u) << 3; 128 const u32 b = ((value32 >> 10) & 31u) << 3; 129 const u32 a = ((value >> 15) != 0) ? 255 : 0; 130 return ZeroExtend32(r) | (ZeroExtend32(g) << 8) | (ZeroExtend32(b) << 16) | (ZeroExtend32(a) << 24); 131 } 132 133 template<> 134 ALWAYS_INLINE u32 VRAM16ToOutput<GPUTexture::Format::BGRA8, u32>(u16 value) 135 { 136 const u32 value32 = ZeroExtend32(value); 137 const u32 r = (value32 & 31u) << 3; 138 const u32 g = ((value32 >> 5) & 31u) << 3; 139 const u32 b = ((value32 >> 10) & 31u) << 3; 140 return ZeroExtend32(b) | (ZeroExtend32(g) << 8) | (ZeroExtend32(r) << 16) | (0xFF000000u); 141 } 142 143 template<> 144 ALWAYS_INLINE void CopyOutRow16<GPUTexture::Format::RGBA5551, u16>(const u16* src_ptr, u16* dst_ptr, u32 width) 145 { 146 u32 col = 0; 147 148 const u32 aligned_width = Common::AlignDownPow2(width, 8); 149 for (; col < aligned_width; col += 8) 150 { 151 constexpr GSVector4i single_mask = GSVector4i::cxpr16(0x1F); 152 GSVector4i value = GSVector4i::load<false>(src_ptr); 153 src_ptr += 8; 154 GSVector4i a = value & GSVector4i::cxpr16(0x3E0); 155 GSVector4i b = value.srl16<10>() & single_mask; 156 GSVector4i c = (value & single_mask).sll16<10>(); 157 value = (a | b) | c; 158 GSVector4i::store<false>(dst_ptr, value); 159 dst_ptr += 8; 160 } 161 162 for (; col < width; col++) 163 *(dst_ptr++) = VRAM16ToOutput<GPUTexture::Format::RGBA5551, u16>(*(src_ptr++)); 164 } 165 166 template<> 167 ALWAYS_INLINE void CopyOutRow16<GPUTexture::Format::RGB565, u16>(const u16* src_ptr, u16* dst_ptr, u32 width) 168 { 169 u32 col = 0; 170 171 const u32 aligned_width = Common::AlignDownPow2(width, 8); 172 for (; col < aligned_width; col += 8) 173 { 174 constexpr GSVector4i single_mask = GSVector4i::cxpr16(0x1F); 175 GSVector4i value = GSVector4i::load<false>(src_ptr); 176 src_ptr += 8; 177 GSVector4i a = (value & GSVector4i::cxpr16(0x3E0)).sll16<1>(); // (value & 0x3E0) << 1 178 GSVector4i b = (value & GSVector4i::cxpr16(0x20)).sll16<1>(); // (value & 0x20) << 1 179 GSVector4i c = (value.srl16<10>() & single_mask); // ((value >> 10) & 0x1F) 180 GSVector4i d = (value & single_mask).sll16<11>(); // ((value & 0x1F) << 11) 181 value = (((a | b) | c) | d); 182 GSVector4i::store<false>(dst_ptr, value); 183 dst_ptr += 8; 184 } 185 186 for (; col < width; col++) 187 *(dst_ptr++) = VRAM16ToOutput<GPUTexture::Format::RGB565, u16>(*(src_ptr++)); 188 } 189 190 template<> 191 ALWAYS_INLINE void CopyOutRow16<GPUTexture::Format::RGBA8, u32>(const u16* src_ptr, u32* dst_ptr, u32 width) 192 { 193 for (u32 col = 0; col < width; col++) 194 *(dst_ptr++) = VRAM16ToOutput<GPUTexture::Format::RGBA8, u32>(*(src_ptr++)); 195 } 196 197 template<> 198 ALWAYS_INLINE void CopyOutRow16<GPUTexture::Format::BGRA8, u32>(const u16* src_ptr, u32* dst_ptr, u32 width) 199 { 200 for (u32 col = 0; col < width; col++) 201 *(dst_ptr++) = VRAM16ToOutput<GPUTexture::Format::BGRA8, u32>(*(src_ptr++)); 202 } 203 204 template<GPUTexture::Format display_format> 205 ALWAYS_INLINE_RELEASE bool GPU_SW::CopyOut15Bit(u32 src_x, u32 src_y, u32 width, u32 height, u32 line_skip) 206 { 207 using OutputPixelType = 208 std::conditional_t<display_format == GPUTexture::Format::RGBA8 || display_format == GPUTexture::Format::BGRA8, u32, 209 u16>; 210 211 GPUTexture* texture = GetDisplayTexture(width, height, display_format); 212 if (!texture) [[unlikely]] 213 return false; 214 215 u32 dst_stride = width * sizeof(OutputPixelType); 216 u8* dst_ptr = m_upload_buffer.data(); 217 const bool mapped = texture->Map(reinterpret_cast<void**>(&dst_ptr), &dst_stride, 0, 0, width, height); 218 219 // Fast path when not wrapping around. 220 if ((src_x + width) <= VRAM_WIDTH && (src_y + height) <= VRAM_HEIGHT) 221 { 222 const u16* src_ptr = &g_vram[src_y * VRAM_WIDTH + src_x]; 223 const u32 src_step = VRAM_WIDTH << line_skip; 224 for (u32 row = 0; row < height; row++) 225 { 226 CopyOutRow16<display_format>(src_ptr, reinterpret_cast<OutputPixelType*>(dst_ptr), width); 227 src_ptr += src_step; 228 dst_ptr += dst_stride; 229 } 230 } 231 else 232 { 233 const u32 end_x = src_x + width; 234 const u32 y_step = (1 << line_skip); 235 for (u32 row = 0; row < height; row++) 236 { 237 const u16* src_row_ptr = &g_vram[(src_y % VRAM_HEIGHT) * VRAM_WIDTH]; 238 OutputPixelType* dst_row_ptr = reinterpret_cast<OutputPixelType*>(dst_ptr); 239 240 for (u32 col = src_x; col < end_x; col++) 241 *(dst_row_ptr++) = VRAM16ToOutput<display_format, OutputPixelType>(src_row_ptr[col % VRAM_WIDTH]); 242 243 src_y += y_step; 244 dst_ptr += dst_stride; 245 } 246 } 247 248 if (mapped) 249 texture->Unmap(); 250 else 251 texture->Update(0, 0, width, height, m_upload_buffer.data(), dst_stride); 252 253 return true; 254 } 255 256 template<GPUTexture::Format display_format> 257 ALWAYS_INLINE_RELEASE bool GPU_SW::CopyOut24Bit(u32 src_x, u32 src_y, u32 skip_x, u32 width, u32 height, u32 line_skip) 258 { 259 using OutputPixelType = 260 std::conditional_t<display_format == GPUTexture::Format::RGBA8 || display_format == GPUTexture::Format::BGRA8, u32, 261 u16>; 262 263 GPUTexture* texture = GetDisplayTexture(width, height, display_format); 264 if (!texture) [[unlikely]] 265 return false; 266 267 u32 dst_stride = Common::AlignUpPow2<u32>(width * sizeof(OutputPixelType), 4); 268 u8* dst_ptr = m_upload_buffer.data(); 269 const bool mapped = texture->Map(reinterpret_cast<void**>(&dst_ptr), &dst_stride, 0, 0, width, height); 270 271 if ((src_x + width) <= VRAM_WIDTH && (src_y + (height << line_skip)) <= VRAM_HEIGHT) 272 { 273 const u8* src_ptr = reinterpret_cast<const u8*>(&g_vram[src_y * VRAM_WIDTH + src_x]) + (skip_x * 3); 274 const u32 src_stride = (VRAM_WIDTH << line_skip) * sizeof(u16); 275 for (u32 row = 0; row < height; row++) 276 { 277 if constexpr (display_format == GPUTexture::Format::RGBA8) 278 { 279 const u8* src_row_ptr = src_ptr; 280 u8* dst_row_ptr = reinterpret_cast<u8*>(dst_ptr); 281 for (u32 col = 0; col < width; col++) 282 { 283 *(dst_row_ptr++) = *(src_row_ptr++); 284 *(dst_row_ptr++) = *(src_row_ptr++); 285 *(dst_row_ptr++) = *(src_row_ptr++); 286 *(dst_row_ptr++) = 0xFF; 287 } 288 } 289 else if constexpr (display_format == GPUTexture::Format::BGRA8) 290 { 291 const u8* src_row_ptr = src_ptr; 292 u8* dst_row_ptr = reinterpret_cast<u8*>(dst_ptr); 293 for (u32 col = 0; col < width; col++) 294 { 295 *(dst_row_ptr++) = src_row_ptr[2]; 296 *(dst_row_ptr++) = src_row_ptr[1]; 297 *(dst_row_ptr++) = src_row_ptr[0]; 298 *(dst_row_ptr++) = 0xFF; 299 src_row_ptr += 3; 300 } 301 } 302 else if constexpr (display_format == GPUTexture::Format::RGB565) 303 { 304 const u8* src_row_ptr = src_ptr; 305 u16* dst_row_ptr = reinterpret_cast<u16*>(dst_ptr); 306 for (u32 col = 0; col < width; col++) 307 { 308 *(dst_row_ptr++) = ((static_cast<u16>(src_row_ptr[0]) >> 3) << 11) | 309 ((static_cast<u16>(src_row_ptr[1]) >> 2) << 5) | (static_cast<u16>(src_row_ptr[2]) >> 3); 310 src_row_ptr += 3; 311 } 312 } 313 else if constexpr (display_format == GPUTexture::Format::RGBA5551) 314 { 315 const u8* src_row_ptr = src_ptr; 316 u16* dst_row_ptr = reinterpret_cast<u16*>(dst_ptr); 317 for (u32 col = 0; col < width; col++) 318 { 319 *(dst_row_ptr++) = ((static_cast<u16>(src_row_ptr[0]) >> 3) << 10) | 320 ((static_cast<u16>(src_row_ptr[1]) >> 3) << 5) | (static_cast<u16>(src_row_ptr[2]) >> 3); 321 src_row_ptr += 3; 322 } 323 } 324 325 src_ptr += src_stride; 326 dst_ptr += dst_stride; 327 } 328 } 329 else 330 { 331 const u32 y_step = (1 << line_skip); 332 333 for (u32 row = 0; row < height; row++) 334 { 335 const u16* src_row_ptr = &g_vram[(src_y % VRAM_HEIGHT) * VRAM_WIDTH]; 336 OutputPixelType* dst_row_ptr = reinterpret_cast<OutputPixelType*>(dst_ptr); 337 338 for (u32 col = 0; col < width; col++) 339 { 340 const u32 offset = (src_x + (((skip_x + col) * 3) / 2)); 341 const u16 s0 = src_row_ptr[offset % VRAM_WIDTH]; 342 const u16 s1 = src_row_ptr[(offset + 1) % VRAM_WIDTH]; 343 const u8 shift = static_cast<u8>(col & 1u) * 8; 344 const u32 rgb = (((ZeroExtend32(s1) << 16) | ZeroExtend32(s0)) >> shift); 345 346 if constexpr (display_format == GPUTexture::Format::RGBA8) 347 { 348 *(dst_row_ptr++) = rgb | 0xFF000000u; 349 } 350 else if constexpr (display_format == GPUTexture::Format::BGRA8) 351 { 352 *(dst_row_ptr++) = (rgb & 0x00FF00) | ((rgb & 0xFF) << 16) | ((rgb >> 16) & 0xFF) | 0xFF000000u; 353 } 354 else if constexpr (display_format == GPUTexture::Format::RGB565) 355 { 356 *(dst_row_ptr++) = ((rgb >> 3) & 0x1F) | (((rgb >> 10) << 5) & 0x7E0) | (((rgb >> 19) << 11) & 0x3E0000); 357 } 358 else if constexpr (display_format == GPUTexture::Format::RGBA5551) 359 { 360 *(dst_row_ptr++) = ((rgb >> 3) & 0x1F) | (((rgb >> 11) << 5) & 0x3E0) | (((rgb >> 19) << 10) & 0x1F0000); 361 } 362 } 363 364 src_y += y_step; 365 dst_ptr += dst_stride; 366 } 367 } 368 369 if (mapped) 370 texture->Unmap(); 371 else 372 texture->Update(0, 0, width, height, m_upload_buffer.data(), dst_stride); 373 374 return true; 375 } 376 377 bool GPU_SW::CopyOut(u32 src_x, u32 src_y, u32 skip_x, u32 width, u32 height, u32 line_skip, bool is_24bit) 378 { 379 if (!is_24bit) 380 { 381 DebugAssert(skip_x == 0); 382 383 switch (m_16bit_display_format) 384 { 385 case GPUTexture::Format::RGBA5551: 386 return CopyOut15Bit<GPUTexture::Format::RGBA5551>(src_x, src_y, width, height, line_skip); 387 388 case GPUTexture::Format::RGB565: 389 return CopyOut15Bit<GPUTexture::Format::RGB565>(src_x, src_y, width, height, line_skip); 390 391 case GPUTexture::Format::RGBA8: 392 return CopyOut15Bit<GPUTexture::Format::RGBA8>(src_x, src_y, width, height, line_skip); 393 394 case GPUTexture::Format::BGRA8: 395 return CopyOut15Bit<GPUTexture::Format::BGRA8>(src_x, src_y, width, height, line_skip); 396 397 default: 398 UnreachableCode(); 399 } 400 } 401 else 402 { 403 switch (m_24bit_display_format) 404 { 405 case GPUTexture::Format::RGBA5551: 406 return CopyOut24Bit<GPUTexture::Format::RGBA5551>(src_x, src_y, skip_x, width, height, line_skip); 407 408 case GPUTexture::Format::RGB565: 409 return CopyOut24Bit<GPUTexture::Format::RGB565>(src_x, src_y, skip_x, width, height, line_skip); 410 411 case GPUTexture::Format::RGBA8: 412 return CopyOut24Bit<GPUTexture::Format::RGBA8>(src_x, src_y, skip_x, width, height, line_skip); 413 414 case GPUTexture::Format::BGRA8: 415 return CopyOut24Bit<GPUTexture::Format::BGRA8>(src_x, src_y, skip_x, width, height, line_skip); 416 417 default: 418 UnreachableCode(); 419 } 420 } 421 } 422 423 void GPU_SW::UpdateDisplay() 424 { 425 // fill display texture 426 m_backend.Sync(true); 427 428 if (!g_settings.debugging.show_vram) 429 { 430 if (IsDisplayDisabled()) 431 { 432 ClearDisplayTexture(); 433 return; 434 } 435 436 const bool is_24bit = m_GPUSTAT.display_area_color_depth_24; 437 const bool interlaced = IsInterlacedDisplayEnabled(); 438 const u32 field = GetInterlacedDisplayField(); 439 const u32 vram_offset_x = is_24bit ? m_crtc_state.regs.X : m_crtc_state.display_vram_left; 440 const u32 vram_offset_y = 441 m_crtc_state.display_vram_top + ((interlaced && m_GPUSTAT.vertical_resolution) ? field : 0); 442 const u32 skip_x = is_24bit ? (m_crtc_state.display_vram_left - m_crtc_state.regs.X) : 0; 443 const u32 read_width = m_crtc_state.display_vram_width; 444 const u32 read_height = interlaced ? (m_crtc_state.display_vram_height / 2) : m_crtc_state.display_vram_height; 445 446 if (IsInterlacedDisplayEnabled()) 447 { 448 const u32 line_skip = m_GPUSTAT.vertical_resolution; 449 if (CopyOut(vram_offset_x, vram_offset_y, skip_x, read_width, read_height, line_skip, is_24bit)) 450 { 451 SetDisplayTexture(m_upload_texture.get(), nullptr, 0, 0, read_width, read_height); 452 if (is_24bit && g_settings.display_24bit_chroma_smoothing) 453 { 454 if (ApplyChromaSmoothing()) 455 Deinterlace(field, 0); 456 } 457 else 458 { 459 Deinterlace(field, 0); 460 } 461 } 462 } 463 else 464 { 465 if (CopyOut(vram_offset_x, vram_offset_y, skip_x, read_width, read_height, 0, is_24bit)) 466 { 467 SetDisplayTexture(m_upload_texture.get(), nullptr, 0, 0, read_width, read_height); 468 if (is_24bit && g_settings.display_24bit_chroma_smoothing) 469 ApplyChromaSmoothing(); 470 } 471 } 472 } 473 else 474 { 475 if (CopyOut(0, 0, 0, VRAM_WIDTH, VRAM_HEIGHT, 0, false)) 476 SetDisplayTexture(m_upload_texture.get(), nullptr, 0, 0, VRAM_WIDTH, VRAM_HEIGHT); 477 } 478 } 479 480 void GPU_SW::FillBackendCommandParameters(GPUBackendCommand* cmd) const 481 { 482 cmd->params.bits = 0; 483 cmd->params.check_mask_before_draw = m_GPUSTAT.check_mask_before_draw; 484 cmd->params.set_mask_while_drawing = m_GPUSTAT.set_mask_while_drawing; 485 cmd->params.active_line_lsb = m_crtc_state.active_line_lsb; 486 cmd->params.interlaced_rendering = IsInterlacedRenderingEnabled(); 487 } 488 489 void GPU_SW::FillDrawCommand(GPUBackendDrawCommand* cmd, GPURenderCommand rc) const 490 { 491 FillBackendCommandParameters(cmd); 492 cmd->rc.bits = rc.bits; 493 cmd->draw_mode.bits = m_draw_mode.mode_reg.bits; 494 cmd->palette.bits = m_draw_mode.palette_reg.bits; 495 cmd->window = m_draw_mode.texture_window; 496 } 497 498 void GPU_SW::DispatchRenderCommand() 499 { 500 if (m_drawing_area_changed) 501 { 502 GPUBackendSetDrawingAreaCommand* cmd = m_backend.NewSetDrawingAreaCommand(); 503 cmd->new_area = m_drawing_area; 504 m_backend.PushCommand(cmd); 505 m_drawing_area_changed = false; 506 } 507 508 const GPURenderCommand rc{m_render_command.bits}; 509 510 switch (rc.primitive) 511 { 512 case GPUPrimitive::Polygon: 513 { 514 const u32 num_vertices = rc.quad_polygon ? 4 : 3; 515 GPUBackendDrawPolygonCommand* cmd = m_backend.NewDrawPolygonCommand(num_vertices); 516 FillDrawCommand(cmd, rc); 517 518 std::array<GSVector2i, 4> positions; 519 const u32 first_color = rc.color_for_first_vertex; 520 const bool shaded = rc.shading_enable; 521 const bool textured = rc.texture_enable; 522 for (u32 i = 0; i < num_vertices; i++) 523 { 524 GPUBackendDrawPolygonCommand::Vertex* vert = &cmd->vertices[i]; 525 vert->color = (shaded && i > 0) ? (FifoPop() & UINT32_C(0x00FFFFFF)) : first_color; 526 const u64 maddr_and_pos = m_fifo.Pop(); 527 const GPUVertexPosition vp{Truncate32(maddr_and_pos)}; 528 vert->x = m_drawing_offset.x + vp.x; 529 vert->y = m_drawing_offset.y + vp.y; 530 vert->texcoord = textured ? Truncate16(FifoPop()) : 0; 531 positions[i] = GSVector2i::load(&vert->x); 532 } 533 534 // Cull polygons which are too large. 535 const GSVector2i min_pos_12 = positions[1].min_i32(positions[2]); 536 const GSVector2i max_pos_12 = positions[1].max_i32(positions[2]); 537 const GSVector4i draw_rect_012 = GSVector4i(min_pos_12.min_i32(positions[0])) 538 .upl64(GSVector4i(max_pos_12.max_i32(positions[0]))) 539 .add32(GSVector4i::cxpr(0, 0, 1, 1)); 540 const bool first_tri_culled = 541 (draw_rect_012.width() > MAX_PRIMITIVE_WIDTH || draw_rect_012.height() > MAX_PRIMITIVE_HEIGHT || 542 !m_clamped_drawing_area.rintersects(draw_rect_012)); 543 if (first_tri_culled) 544 { 545 DEBUG_LOG("Culling off-screen/too-large polygon: {},{} {},{} {},{}", cmd->vertices[0].x, cmd->vertices[0].y, 546 cmd->vertices[1].x, cmd->vertices[1].y, cmd->vertices[2].x, cmd->vertices[2].y); 547 548 if (!rc.quad_polygon) 549 return; 550 } 551 else 552 { 553 AddDrawTriangleTicks(positions[0], positions[1], positions[2], rc.shading_enable, rc.texture_enable, 554 rc.transparency_enable); 555 } 556 557 // quads 558 if (rc.quad_polygon) 559 { 560 const GSVector4i draw_rect_123 = GSVector4i(min_pos_12.min_i32(positions[3])) 561 .upl64(GSVector4i(max_pos_12.max_i32(positions[3]))) 562 .add32(GSVector4i::cxpr(0, 0, 1, 1)); 563 564 // Cull polygons which are too large. 565 const bool second_tri_culled = 566 (draw_rect_123.width() > MAX_PRIMITIVE_WIDTH || draw_rect_123.height() > MAX_PRIMITIVE_HEIGHT || 567 !m_clamped_drawing_area.rintersects(draw_rect_123)); 568 if (second_tri_culled) 569 { 570 DEBUG_LOG("Culling too-large polygon (quad second half): {},{} {},{} {},{}", cmd->vertices[2].x, 571 cmd->vertices[2].y, cmd->vertices[1].x, cmd->vertices[1].y, cmd->vertices[0].x, cmd->vertices[0].y); 572 573 if (first_tri_culled) 574 return; 575 } 576 else 577 { 578 AddDrawTriangleTicks(positions[2], positions[1], positions[3], rc.shading_enable, rc.texture_enable, 579 rc.transparency_enable); 580 } 581 } 582 583 m_backend.PushCommand(cmd); 584 } 585 break; 586 587 case GPUPrimitive::Rectangle: 588 { 589 GPUBackendDrawRectangleCommand* cmd = m_backend.NewDrawRectangleCommand(); 590 FillDrawCommand(cmd, rc); 591 cmd->color = rc.color_for_first_vertex; 592 593 const GPUVertexPosition vp{FifoPop()}; 594 cmd->x = TruncateGPUVertexPosition(m_drawing_offset.x + vp.x); 595 cmd->y = TruncateGPUVertexPosition(m_drawing_offset.y + vp.y); 596 597 if (rc.texture_enable) 598 { 599 const u32 texcoord_and_palette = FifoPop(); 600 cmd->palette.bits = Truncate16(texcoord_and_palette >> 16); 601 cmd->texcoord = Truncate16(texcoord_and_palette); 602 } 603 else 604 { 605 cmd->palette.bits = 0; 606 cmd->texcoord = 0; 607 } 608 609 switch (rc.rectangle_size) 610 { 611 case GPUDrawRectangleSize::R1x1: 612 cmd->width = 1; 613 cmd->height = 1; 614 break; 615 case GPUDrawRectangleSize::R8x8: 616 cmd->width = 8; 617 cmd->height = 8; 618 break; 619 case GPUDrawRectangleSize::R16x16: 620 cmd->width = 16; 621 cmd->height = 16; 622 break; 623 default: 624 { 625 const u32 width_and_height = FifoPop(); 626 cmd->width = static_cast<u16>(width_and_height & VRAM_WIDTH_MASK); 627 cmd->height = static_cast<u16>((width_and_height >> 16) & VRAM_HEIGHT_MASK); 628 } 629 break; 630 } 631 632 const GSVector4i rect = GSVector4i(cmd->x, cmd->y, cmd->x + cmd->width, cmd->y + cmd->height); 633 const GSVector4i clamped_rect = m_clamped_drawing_area.rintersect(rect); 634 if (clamped_rect.rempty()) [[unlikely]] 635 { 636 DEBUG_LOG("Culling off-screen rectangle {}", rect); 637 return; 638 } 639 640 AddDrawRectangleTicks(clamped_rect, rc.texture_enable, rc.transparency_enable); 641 642 m_backend.PushCommand(cmd); 643 } 644 break; 645 646 case GPUPrimitive::Line: 647 { 648 if (!rc.polyline) 649 { 650 GPUBackendDrawLineCommand* cmd = m_backend.NewDrawLineCommand(2); 651 FillDrawCommand(cmd, rc); 652 cmd->palette.bits = 0; 653 654 if (rc.shading_enable) 655 { 656 cmd->vertices[0].color = rc.color_for_first_vertex; 657 const GPUVertexPosition start_pos{FifoPop()}; 658 cmd->vertices[0].x = m_drawing_offset.x + start_pos.x; 659 cmd->vertices[0].y = m_drawing_offset.y + start_pos.y; 660 661 cmd->vertices[1].color = FifoPop() & UINT32_C(0x00FFFFFF); 662 const GPUVertexPosition end_pos{FifoPop()}; 663 cmd->vertices[1].x = m_drawing_offset.x + end_pos.x; 664 cmd->vertices[1].y = m_drawing_offset.y + end_pos.y; 665 } 666 else 667 { 668 cmd->vertices[0].color = rc.color_for_first_vertex; 669 cmd->vertices[1].color = rc.color_for_first_vertex; 670 671 const GPUVertexPosition start_pos{FifoPop()}; 672 cmd->vertices[0].x = m_drawing_offset.x + start_pos.x; 673 cmd->vertices[0].y = m_drawing_offset.y + start_pos.y; 674 675 const GPUVertexPosition end_pos{FifoPop()}; 676 cmd->vertices[1].x = m_drawing_offset.x + end_pos.x; 677 cmd->vertices[1].y = m_drawing_offset.y + end_pos.y; 678 } 679 680 const GSVector4i v0 = GSVector4i::loadl(&cmd->vertices[0].x); 681 const GSVector4i v1 = GSVector4i::loadl(&cmd->vertices[1].x); 682 const GSVector4i rect = v0.min_i32(v1).xyxy(v0.max_i32(v1)).add32(GSVector4i::cxpr(0, 0, 1, 1)); 683 const GSVector4i clamped_rect = rect.rintersect(m_clamped_drawing_area); 684 685 if (rect.width() > MAX_PRIMITIVE_WIDTH || rect.height() > MAX_PRIMITIVE_HEIGHT || clamped_rect.rempty()) 686 { 687 DEBUG_LOG("Culling too-large/off-screen line: {},{} - {},{}", cmd->vertices[0].y, cmd->vertices[0].y, 688 cmd->vertices[1].x, cmd->vertices[1].y); 689 return; 690 } 691 692 AddDrawLineTicks(clamped_rect, rc.shading_enable); 693 694 m_backend.PushCommand(cmd); 695 } 696 else 697 { 698 const u32 num_vertices = GetPolyLineVertexCount(); 699 700 GPUBackendDrawLineCommand* cmd = m_backend.NewDrawLineCommand(num_vertices); 701 FillDrawCommand(cmd, m_render_command); 702 703 u32 buffer_pos = 0; 704 const GPUVertexPosition start_vp{m_blit_buffer[buffer_pos++]}; 705 cmd->vertices[0].x = start_vp.x + m_drawing_offset.x; 706 cmd->vertices[0].y = start_vp.y + m_drawing_offset.y; 707 cmd->vertices[0].color = m_render_command.color_for_first_vertex; 708 709 const bool shaded = m_render_command.shading_enable; 710 for (u32 i = 1; i < num_vertices; i++) 711 { 712 cmd->vertices[i].color = 713 shaded ? (m_blit_buffer[buffer_pos++] & UINT32_C(0x00FFFFFF)) : m_render_command.color_for_first_vertex; 714 const GPUVertexPosition vp{m_blit_buffer[buffer_pos++]}; 715 cmd->vertices[i].x = m_drawing_offset.x + vp.x; 716 cmd->vertices[i].y = m_drawing_offset.y + vp.y; 717 718 const GSVector4i v0 = GSVector4i::loadl(&cmd->vertices[0].x); 719 const GSVector4i v1 = GSVector4i::loadl(&cmd->vertices[1].x); 720 const GSVector4i rect = v0.min_i32(v1).xyxy(v0.max_i32(v1)).add32(GSVector4i::cxpr(0, 0, 1, 1)); 721 const GSVector4i clamped_rect = rect.rintersect(m_clamped_drawing_area); 722 723 if (rect.width() > MAX_PRIMITIVE_WIDTH || rect.height() > MAX_PRIMITIVE_HEIGHT || clamped_rect.rempty()) 724 { 725 DEBUG_LOG("Culling too-large/off-screen line: {},{} - {},{}", cmd->vertices[i - 1].x, 726 cmd->vertices[i - 1].y, cmd->vertices[i].x, cmd->vertices[i].y); 727 return; 728 } 729 else 730 { 731 AddDrawLineTicks(clamped_rect, rc.shading_enable); 732 } 733 } 734 735 m_backend.PushCommand(cmd); 736 } 737 } 738 break; 739 740 default: 741 UnreachableCode(); 742 break; 743 } 744 } 745 746 void GPU_SW::ReadVRAM(u32 x, u32 y, u32 width, u32 height) 747 { 748 m_backend.Sync(false); 749 } 750 751 void GPU_SW::FillVRAM(u32 x, u32 y, u32 width, u32 height, u32 color) 752 { 753 GPUBackendFillVRAMCommand* cmd = m_backend.NewFillVRAMCommand(); 754 FillBackendCommandParameters(cmd); 755 cmd->x = static_cast<u16>(x); 756 cmd->y = static_cast<u16>(y); 757 cmd->width = static_cast<u16>(width); 758 cmd->height = static_cast<u16>(height); 759 cmd->color = color; 760 m_backend.PushCommand(cmd); 761 } 762 763 void GPU_SW::UpdateVRAM(u32 x, u32 y, u32 width, u32 height, const void* data, bool set_mask, bool check_mask) 764 { 765 const u32 num_words = width * height; 766 GPUBackendUpdateVRAMCommand* cmd = m_backend.NewUpdateVRAMCommand(num_words); 767 FillBackendCommandParameters(cmd); 768 cmd->params.set_mask_while_drawing = set_mask; 769 cmd->params.check_mask_before_draw = check_mask; 770 cmd->x = static_cast<u16>(x); 771 cmd->y = static_cast<u16>(y); 772 cmd->width = static_cast<u16>(width); 773 cmd->height = static_cast<u16>(height); 774 std::memcpy(cmd->data, data, sizeof(u16) * num_words); 775 m_backend.PushCommand(cmd); 776 } 777 778 void GPU_SW::CopyVRAM(u32 src_x, u32 src_y, u32 dst_x, u32 dst_y, u32 width, u32 height) 779 { 780 GPUBackendCopyVRAMCommand* cmd = m_backend.NewCopyVRAMCommand(); 781 FillBackendCommandParameters(cmd); 782 cmd->src_x = static_cast<u16>(src_x); 783 cmd->src_y = static_cast<u16>(src_y); 784 cmd->dst_x = static_cast<u16>(dst_x); 785 cmd->dst_y = static_cast<u16>(dst_y); 786 cmd->width = static_cast<u16>(width); 787 cmd->height = static_cast<u16>(height); 788 m_backend.PushCommand(cmd); 789 } 790 791 void GPU_SW::FlushRender() 792 { 793 } 794 795 void GPU_SW::UpdateCLUT(GPUTexturePaletteReg reg, bool clut_is_8bit) 796 { 797 GPUBackendUpdateCLUTCommand* cmd = m_backend.NewUpdateCLUTCommand(); 798 FillBackendCommandParameters(cmd); 799 cmd->reg.bits = reg.bits; 800 cmd->clut_is_8bit = clut_is_8bit; 801 m_backend.PushCommand(cmd); 802 } 803 804 std::unique_ptr<GPU> GPU::CreateSoftwareRenderer() 805 { 806 std::unique_ptr<GPU_SW> gpu(std::make_unique<GPU_SW>()); 807 if (!gpu->Initialize()) 808 return nullptr; 809 810 return gpu; 811 }