gpu.cpp (111111B)
1 // SPDX-FileCopyrightText: 2019-2024 Connor McLaughlin <stenzek@gmail.com> 2 // SPDX-License-Identifier: (GPL-3.0 OR CC-BY-NC-ND-4.0) 3 4 #include "gpu.h" 5 #include "dma.h" 6 #include "gpu_shadergen.h" 7 #include "host.h" 8 #include "interrupt_controller.h" 9 #include "settings.h" 10 #include "system.h" 11 #include "timers.h" 12 13 #include "util/gpu_device.h" 14 #include "util/image.h" 15 #include "util/imgui_manager.h" 16 #include "util/media_capture.h" 17 #include "util/postprocessing.h" 18 #include "util/shadergen.h" 19 #include "util/state_wrapper.h" 20 21 #include "common/align.h" 22 #include "common/error.h" 23 #include "common/file_system.h" 24 #include "common/gsvector_formatter.h" 25 #include "common/log.h" 26 #include "common/path.h" 27 #include "common/small_string.h" 28 #include "common/string_util.h" 29 30 #include "IconsEmoji.h" 31 #include "fmt/format.h" 32 #include "imgui.h" 33 34 #include <cmath> 35 #include <numbers> 36 #include <thread> 37 38 Log_SetChannel(GPU); 39 40 std::unique_ptr<GPU> g_gpu; 41 alignas(HOST_PAGE_SIZE) u16 g_vram[VRAM_SIZE / sizeof(u16)]; 42 u16 g_gpu_clut[GPU_CLUT_SIZE]; 43 44 const GPU::GP0CommandHandlerTable GPU::s_GP0_command_handler_table = GPU::GenerateGP0CommandHandlerTable(); 45 46 static TimingEvent s_crtc_tick_event( 47 "GPU CRTC Tick", 1, 1, [](void* param, TickCount ticks, TickCount ticks_late) { g_gpu->CRTCTickEvent(ticks); }, 48 nullptr); 49 static TimingEvent s_command_tick_event( 50 "GPU Command Tick", 1, 1, [](void* param, TickCount ticks, TickCount ticks_late) { g_gpu->CommandTickEvent(ticks); }, 51 nullptr); 52 53 static std::deque<std::thread> s_screenshot_threads; 54 static std::mutex s_screenshot_threads_mutex; 55 56 // #define PSX_GPU_STATS 57 #ifdef PSX_GPU_STATS 58 static u64 s_active_gpu_cycles = 0; 59 static u32 s_active_gpu_cycles_frames = 0; 60 #endif 61 62 static constexpr GPUTexture::Format DISPLAY_INTERNAL_POSTFX_FORMAT = GPUTexture::Format::RGBA8; 63 64 static bool CompressAndWriteTextureToFile(u32 width, u32 height, std::string filename, FileSystem::ManagedCFilePtr fp, 65 u8 quality, bool clear_alpha, bool flip_y, std::vector<u32> texture_data, 66 u32 texture_data_stride, GPUTexture::Format texture_format, 67 bool display_osd_message, bool use_thread); 68 static void JoinScreenshotThreads(); 69 70 GPU::GPU() 71 { 72 ResetStatistics(); 73 } 74 75 GPU::~GPU() 76 { 77 s_command_tick_event.Deactivate(); 78 s_crtc_tick_event.Deactivate(); 79 80 JoinScreenshotThreads(); 81 DestroyDeinterlaceTextures(); 82 g_gpu_device->RecycleTexture(std::move(m_chroma_smoothing_texture)); 83 84 if (g_gpu_device) 85 g_gpu_device->SetGPUTimingEnabled(false); 86 } 87 88 bool GPU::Initialize() 89 { 90 m_force_progressive_scan = g_settings.gpu_disable_interlacing; 91 m_force_ntsc_timings = g_settings.gpu_force_ntsc_timings; 92 s_crtc_tick_event.Activate(); 93 m_fifo_size = g_settings.gpu_fifo_size; 94 m_max_run_ahead = g_settings.gpu_max_run_ahead; 95 m_console_is_pal = System::IsPALRegion(); 96 UpdateCRTCConfig(); 97 98 if (!CompileDisplayPipelines(true, true, g_settings.display_24bit_chroma_smoothing)) 99 { 100 Host::ReportErrorAsync("Error", "Failed to compile base GPU pipelines."); 101 return false; 102 } 103 104 g_gpu_device->SetGPUTimingEnabled(g_settings.display_show_gpu_usage); 105 106 #ifdef PSX_GPU_STATS 107 s_active_gpu_cycles = 0; 108 s_active_gpu_cycles_frames = 0; 109 #endif 110 111 return true; 112 } 113 114 void GPU::UpdateSettings(const Settings& old_settings) 115 { 116 FlushRender(); 117 118 m_force_progressive_scan = g_settings.gpu_disable_interlacing; 119 m_fifo_size = g_settings.gpu_fifo_size; 120 m_max_run_ahead = g_settings.gpu_max_run_ahead; 121 122 if (m_force_ntsc_timings != g_settings.gpu_force_ntsc_timings || m_console_is_pal != System::IsPALRegion()) 123 { 124 m_force_ntsc_timings = g_settings.gpu_force_ntsc_timings; 125 m_console_is_pal = System::IsPALRegion(); 126 UpdateCRTCConfig(); 127 } 128 129 // Crop mode calls this, so recalculate the display area 130 UpdateCRTCDisplayParameters(); 131 132 if (g_settings.display_scaling != old_settings.display_scaling || 133 g_settings.display_deinterlacing_mode != old_settings.display_deinterlacing_mode || 134 g_settings.display_24bit_chroma_smoothing != old_settings.display_24bit_chroma_smoothing) 135 { 136 // Toss buffers on mode change. 137 if (g_settings.display_deinterlacing_mode != old_settings.display_deinterlacing_mode) 138 DestroyDeinterlaceTextures(); 139 140 if (!CompileDisplayPipelines(g_settings.display_scaling != old_settings.display_scaling, 141 g_settings.display_deinterlacing_mode != old_settings.display_deinterlacing_mode, 142 g_settings.display_24bit_chroma_smoothing != 143 old_settings.display_24bit_chroma_smoothing)) 144 { 145 Panic("Failed to compile display pipeline on settings change."); 146 } 147 } 148 149 g_gpu_device->SetGPUTimingEnabled(g_settings.display_show_gpu_usage); 150 } 151 152 void GPU::CPUClockChanged() 153 { 154 UpdateCRTCConfig(); 155 } 156 157 void GPU::UpdateResolutionScale() 158 { 159 } 160 161 std::tuple<u32, u32> GPU::GetEffectiveDisplayResolution(bool scaled /* = true */) 162 { 163 return std::tie(m_crtc_state.display_vram_width, m_crtc_state.display_vram_height); 164 } 165 166 std::tuple<u32, u32> GPU::GetFullDisplayResolution(bool scaled /* = true */) 167 { 168 return std::tie(m_crtc_state.display_width, m_crtc_state.display_height); 169 } 170 171 void GPU::Reset(bool clear_vram) 172 { 173 m_GPUSTAT.bits = 0x14802000; 174 m_set_texture_disable_mask = false; 175 m_GPUREAD_latch = 0; 176 m_crtc_state.fractional_ticks = 0; 177 m_crtc_state.fractional_dot_ticks = 0; 178 m_crtc_state.current_tick_in_scanline = 0; 179 m_crtc_state.current_scanline = 0; 180 m_crtc_state.in_hblank = false; 181 m_crtc_state.in_vblank = false; 182 m_crtc_state.interlaced_field = 0; 183 m_crtc_state.interlaced_display_field = 0; 184 185 if (clear_vram) 186 { 187 std::memset(g_vram, 0, sizeof(g_vram)); 188 std::memset(g_gpu_clut, 0, sizeof(g_gpu_clut)); 189 } 190 191 // Cancel VRAM writes. 192 m_blitter_state = BlitterState::Idle; 193 194 // Force event to reschedule itself. 195 s_crtc_tick_event.Deactivate(); 196 s_command_tick_event.Deactivate(); 197 198 SoftReset(); 199 UpdateDisplay(); 200 } 201 202 void GPU::SoftReset() 203 { 204 FlushRender(); 205 if (m_blitter_state == BlitterState::WritingVRAM) 206 FinishVRAMWrite(); 207 208 m_GPUSTAT.texture_page_x_base = 0; 209 m_GPUSTAT.texture_page_y_base = 0; 210 m_GPUSTAT.semi_transparency_mode = GPUTransparencyMode::HalfBackgroundPlusHalfForeground; 211 m_GPUSTAT.texture_color_mode = GPUTextureMode::Palette4Bit; 212 m_GPUSTAT.dither_enable = false; 213 m_GPUSTAT.draw_to_displayed_field = false; 214 m_GPUSTAT.set_mask_while_drawing = false; 215 m_GPUSTAT.check_mask_before_draw = false; 216 m_GPUSTAT.reverse_flag = false; 217 m_GPUSTAT.texture_disable = false; 218 m_GPUSTAT.horizontal_resolution_2 = 0; 219 m_GPUSTAT.horizontal_resolution_1 = 0; 220 m_GPUSTAT.vertical_resolution = false; 221 m_GPUSTAT.pal_mode = System::IsPALRegion(); 222 m_GPUSTAT.display_area_color_depth_24 = false; 223 m_GPUSTAT.vertical_interlace = false; 224 m_GPUSTAT.display_disable = true; 225 m_GPUSTAT.dma_direction = DMADirection::Off; 226 m_drawing_area = {}; 227 m_drawing_area_changed = true; 228 m_drawing_offset = {}; 229 std::memset(&m_crtc_state.regs, 0, sizeof(m_crtc_state.regs)); 230 m_crtc_state.regs.horizontal_display_range = 0xC60260; 231 m_crtc_state.regs.vertical_display_range = 0x3FC10; 232 m_blitter_state = BlitterState::Idle; 233 m_pending_command_ticks = 0; 234 m_command_total_words = 0; 235 m_vram_transfer = {}; 236 m_fifo.Clear(); 237 m_blit_buffer.clear(); 238 m_blit_remaining_words = 0; 239 m_draw_mode.texture_window_value = 0xFFFFFFFFu; 240 SetDrawMode(0); 241 SetTexturePalette(0); 242 SetTextureWindow(0); 243 InvalidateCLUT(); 244 UpdateDMARequest(); 245 UpdateCRTCConfig(); 246 UpdateCommandTickEvent(); 247 UpdateGPUIdle(); 248 } 249 250 bool GPU::DoState(StateWrapper& sw, GPUTexture** host_texture, bool update_display) 251 { 252 FlushRender(); 253 254 if (sw.IsReading()) 255 { 256 // perform a reset to discard all pending draws/fb state 257 Reset(host_texture == nullptr); 258 } 259 260 sw.Do(&m_GPUSTAT.bits); 261 262 sw.Do(&m_draw_mode.mode_reg.bits); 263 sw.Do(&m_draw_mode.palette_reg.bits); 264 sw.Do(&m_draw_mode.texture_window_value); 265 266 if (sw.GetVersion() < 62) [[unlikely]] 267 { 268 // texture_page_x, texture_page_y, texture_palette_x, texture_palette_y 269 DebugAssert(sw.IsReading()); 270 sw.SkipBytes(sizeof(u32) * 4); 271 } 272 273 sw.Do(&m_draw_mode.texture_window.and_x); 274 sw.Do(&m_draw_mode.texture_window.and_y); 275 sw.Do(&m_draw_mode.texture_window.or_x); 276 sw.Do(&m_draw_mode.texture_window.or_y); 277 sw.Do(&m_draw_mode.texture_x_flip); 278 sw.Do(&m_draw_mode.texture_y_flip); 279 280 sw.Do(&m_drawing_area.left); 281 sw.Do(&m_drawing_area.top); 282 sw.Do(&m_drawing_area.right); 283 sw.Do(&m_drawing_area.bottom); 284 sw.Do(&m_drawing_offset.x); 285 sw.Do(&m_drawing_offset.y); 286 sw.Do(&m_drawing_offset.x); 287 288 sw.Do(&m_console_is_pal); 289 sw.Do(&m_set_texture_disable_mask); 290 291 sw.Do(&m_crtc_state.regs.display_address_start); 292 sw.Do(&m_crtc_state.regs.horizontal_display_range); 293 sw.Do(&m_crtc_state.regs.vertical_display_range); 294 sw.Do(&m_crtc_state.dot_clock_divider); 295 sw.Do(&m_crtc_state.display_width); 296 sw.Do(&m_crtc_state.display_height); 297 sw.Do(&m_crtc_state.display_origin_left); 298 sw.Do(&m_crtc_state.display_origin_top); 299 sw.Do(&m_crtc_state.display_vram_left); 300 sw.Do(&m_crtc_state.display_vram_top); 301 sw.Do(&m_crtc_state.display_vram_width); 302 sw.Do(&m_crtc_state.display_vram_height); 303 sw.Do(&m_crtc_state.horizontal_total); 304 sw.Do(&m_crtc_state.horizontal_visible_start); 305 sw.Do(&m_crtc_state.horizontal_visible_end); 306 sw.Do(&m_crtc_state.horizontal_display_start); 307 sw.Do(&m_crtc_state.horizontal_display_end); 308 sw.Do(&m_crtc_state.vertical_total); 309 sw.Do(&m_crtc_state.vertical_visible_start); 310 sw.Do(&m_crtc_state.vertical_visible_end); 311 sw.Do(&m_crtc_state.vertical_display_start); 312 sw.Do(&m_crtc_state.vertical_display_end); 313 sw.Do(&m_crtc_state.fractional_ticks); 314 sw.Do(&m_crtc_state.current_tick_in_scanline); 315 sw.Do(&m_crtc_state.current_scanline); 316 sw.DoEx(&m_crtc_state.fractional_dot_ticks, 46, 0); 317 sw.Do(&m_crtc_state.in_hblank); 318 sw.Do(&m_crtc_state.in_vblank); 319 sw.Do(&m_crtc_state.interlaced_field); 320 sw.Do(&m_crtc_state.interlaced_display_field); 321 sw.Do(&m_crtc_state.active_line_lsb); 322 323 sw.Do(&m_blitter_state); 324 sw.Do(&m_pending_command_ticks); 325 sw.Do(&m_command_total_words); 326 sw.Do(&m_GPUREAD_latch); 327 328 if (sw.GetVersion() < 64) [[unlikely]] 329 { 330 // Clear CLUT cache and let it populate later. 331 InvalidateCLUT(); 332 } 333 else 334 { 335 sw.Do(&m_current_clut_reg_bits); 336 sw.Do(&m_current_clut_is_8bit); 337 sw.DoArray(g_gpu_clut, std::size(g_gpu_clut)); 338 } 339 340 sw.Do(&m_vram_transfer.x); 341 sw.Do(&m_vram_transfer.y); 342 sw.Do(&m_vram_transfer.width); 343 sw.Do(&m_vram_transfer.height); 344 sw.Do(&m_vram_transfer.col); 345 sw.Do(&m_vram_transfer.row); 346 347 sw.Do(&m_fifo); 348 sw.Do(&m_blit_buffer); 349 sw.Do(&m_blit_remaining_words); 350 sw.Do(&m_render_command.bits); 351 352 sw.Do(&m_max_run_ahead); 353 sw.Do(&m_fifo_size); 354 355 if (sw.IsReading()) 356 { 357 m_draw_mode.texture_page_changed = true; 358 m_draw_mode.texture_window_changed = true; 359 m_drawing_area_changed = true; 360 SetClampedDrawingArea(); 361 UpdateDMARequest(); 362 } 363 364 if (!host_texture) 365 { 366 if (!sw.DoMarker("GPU-VRAM")) 367 return false; 368 369 sw.DoBytes(g_vram, VRAM_WIDTH * VRAM_HEIGHT * sizeof(u16)); 370 } 371 372 if (sw.IsReading()) 373 { 374 UpdateCRTCConfig(); 375 if (update_display) 376 UpdateDisplay(); 377 378 UpdateCommandTickEvent(); 379 } 380 381 return !sw.HasError(); 382 } 383 384 void GPU::RestoreDeviceContext() 385 { 386 } 387 388 void GPU::UpdateDMARequest() 389 { 390 switch (m_blitter_state) 391 { 392 case BlitterState::Idle: 393 m_GPUSTAT.ready_to_send_vram = false; 394 m_GPUSTAT.ready_to_recieve_dma = (m_fifo.IsEmpty() || m_fifo.GetSize() < m_command_total_words); 395 break; 396 397 case BlitterState::WritingVRAM: 398 m_GPUSTAT.ready_to_send_vram = false; 399 m_GPUSTAT.ready_to_recieve_dma = (m_fifo.GetSize() < m_fifo_size); 400 break; 401 402 case BlitterState::ReadingVRAM: 403 m_GPUSTAT.ready_to_send_vram = true; 404 m_GPUSTAT.ready_to_recieve_dma = m_fifo.IsEmpty(); 405 break; 406 407 case BlitterState::DrawingPolyLine: 408 m_GPUSTAT.ready_to_send_vram = false; 409 m_GPUSTAT.ready_to_recieve_dma = (m_fifo.GetSize() < m_fifo_size); 410 break; 411 412 default: 413 UnreachableCode(); 414 break; 415 } 416 417 bool dma_request; 418 switch (m_GPUSTAT.dma_direction) 419 { 420 case DMADirection::Off: 421 dma_request = false; 422 break; 423 424 case DMADirection::FIFO: 425 dma_request = m_GPUSTAT.ready_to_recieve_dma; 426 break; 427 428 case DMADirection::CPUtoGP0: 429 dma_request = m_GPUSTAT.ready_to_recieve_dma; 430 break; 431 432 case DMADirection::GPUREADtoCPU: 433 dma_request = m_GPUSTAT.ready_to_send_vram; 434 break; 435 436 default: 437 dma_request = false; 438 break; 439 } 440 m_GPUSTAT.dma_data_request = dma_request; 441 DMA::SetRequest(DMA::Channel::GPU, dma_request); 442 } 443 444 void GPU::UpdateGPUIdle() 445 { 446 m_GPUSTAT.gpu_idle = (m_blitter_state == BlitterState::Idle && m_pending_command_ticks <= 0 && m_fifo.IsEmpty()); 447 } 448 449 u32 GPU::ReadRegister(u32 offset) 450 { 451 switch (offset) 452 { 453 case 0x00: 454 return ReadGPUREAD(); 455 456 case 0x04: 457 { 458 // code can be dependent on the odd/even bit, so update the GPU state when reading. 459 // we can mitigate this slightly by only updating when the raster is actually hitting a new line 460 if (IsCRTCScanlinePending()) 461 SynchronizeCRTC(); 462 if (IsCommandCompletionPending()) 463 s_command_tick_event.InvokeEarly(); 464 465 return m_GPUSTAT.bits; 466 } 467 468 default: 469 ERROR_LOG("Unhandled register read: {:02X}", offset); 470 return UINT32_C(0xFFFFFFFF); 471 } 472 } 473 474 void GPU::WriteRegister(u32 offset, u32 value) 475 { 476 switch (offset) 477 { 478 case 0x00: 479 m_fifo.Push(value); 480 ExecuteCommands(); 481 return; 482 483 case 0x04: 484 WriteGP1(value); 485 return; 486 487 default: 488 ERROR_LOG("Unhandled register write: {:02X} <- {:08X}", offset, value); 489 return; 490 } 491 } 492 493 void GPU::DMARead(u32* words, u32 word_count) 494 { 495 if (m_GPUSTAT.dma_direction != DMADirection::GPUREADtoCPU) 496 { 497 ERROR_LOG("Invalid DMA direction from GPU DMA read"); 498 std::fill_n(words, word_count, UINT32_C(0xFFFFFFFF)); 499 return; 500 } 501 502 for (u32 i = 0; i < word_count; i++) 503 words[i] = ReadGPUREAD(); 504 } 505 506 void GPU::EndDMAWrite() 507 { 508 ExecuteCommands(); 509 } 510 511 /** 512 * NTSC GPU clock 53.693175 MHz 513 * PAL GPU clock 53.203425 MHz 514 * courtesy of @ggrtk 515 * 516 * NTSC - sysclk * 715909 / 451584 517 * PAL - sysclk * 709379 / 451584 518 */ 519 520 TickCount GPU::GetCRTCFrequency() const 521 { 522 return m_console_is_pal ? 53203425 : 53693175; 523 } 524 525 TickCount GPU::CRTCTicksToSystemTicks(TickCount gpu_ticks, TickCount fractional_ticks) const 526 { 527 // convert to master clock, rounding up as we want to overshoot not undershoot 528 if (!m_console_is_pal) 529 return static_cast<TickCount>((u64(gpu_ticks) * u64(451584) + fractional_ticks + u64(715908)) / u64(715909)); 530 else 531 return static_cast<TickCount>((u64(gpu_ticks) * u64(451584) + fractional_ticks + u64(709378)) / u64(709379)); 532 } 533 534 TickCount GPU::SystemTicksToCRTCTicks(TickCount sysclk_ticks, TickCount* fractional_ticks) const 535 { 536 u64 mul = u64(sysclk_ticks); 537 mul *= !m_console_is_pal ? u64(715909) : u64(709379); 538 mul += u64(*fractional_ticks); 539 540 const TickCount ticks = static_cast<TickCount>(mul / u64(451584)); 541 *fractional_ticks = static_cast<TickCount>(mul % u64(451584)); 542 return ticks; 543 } 544 545 void GPU::AddCommandTicks(TickCount ticks) 546 { 547 m_pending_command_ticks += ticks; 548 #ifdef PSX_GPU_STATS 549 s_active_gpu_cycles += ticks; 550 #endif 551 } 552 553 void GPU::SynchronizeCRTC() 554 { 555 s_crtc_tick_event.InvokeEarly(); 556 } 557 558 float GPU::ComputeHorizontalFrequency() const 559 { 560 const CRTCState& cs = m_crtc_state; 561 TickCount fractional_ticks = 0; 562 return static_cast<float>( 563 static_cast<double>(SystemTicksToCRTCTicks(System::GetTicksPerSecond(), &fractional_ticks)) / 564 static_cast<double>(cs.horizontal_total)); 565 } 566 567 float GPU::ComputeVerticalFrequency() const 568 { 569 const CRTCState& cs = m_crtc_state; 570 const TickCount ticks_per_frame = cs.horizontal_total * cs.vertical_total; 571 TickCount fractional_ticks = 0; 572 return static_cast<float>( 573 static_cast<double>(SystemTicksToCRTCTicks(System::GetTicksPerSecond(), &fractional_ticks)) / 574 static_cast<double>(ticks_per_frame)); 575 } 576 577 float GPU::ComputeDisplayAspectRatio() const 578 { 579 if (g_settings.debugging.show_vram) 580 { 581 return static_cast<float>(VRAM_WIDTH) / static_cast<float>(VRAM_HEIGHT); 582 } 583 else if (g_settings.display_force_4_3_for_24bit && m_GPUSTAT.display_area_color_depth_24) 584 { 585 return 4.0f / 3.0f; 586 } 587 else if (g_settings.display_aspect_ratio == DisplayAspectRatio::Auto) 588 { 589 const CRTCState& cs = m_crtc_state; 590 float relative_width = static_cast<float>(cs.horizontal_visible_end - cs.horizontal_visible_start); 591 float relative_height = static_cast<float>(cs.vertical_visible_end - cs.vertical_visible_start); 592 593 if (relative_width <= 0 || relative_height <= 0) 594 return 4.0f / 3.0f; 595 596 if (m_GPUSTAT.pal_mode) 597 { 598 relative_width /= static_cast<float>(PAL_HORIZONTAL_ACTIVE_END - PAL_HORIZONTAL_ACTIVE_START); 599 relative_height /= static_cast<float>(PAL_VERTICAL_ACTIVE_END - PAL_VERTICAL_ACTIVE_START); 600 } 601 else 602 { 603 relative_width /= static_cast<float>(NTSC_HORIZONTAL_ACTIVE_END - NTSC_HORIZONTAL_ACTIVE_START); 604 relative_height /= static_cast<float>(NTSC_VERTICAL_ACTIVE_END - NTSC_VERTICAL_ACTIVE_START); 605 } 606 return (relative_width / relative_height) * (4.0f / 3.0f); 607 } 608 else if (g_settings.display_aspect_ratio == DisplayAspectRatio::PAR1_1) 609 { 610 if (m_crtc_state.display_width == 0 || m_crtc_state.display_height == 0) 611 return 4.0f / 3.0f; 612 613 return static_cast<float>(m_crtc_state.display_width) / static_cast<float>(m_crtc_state.display_height); 614 } 615 else 616 { 617 return g_settings.GetDisplayAspectRatioValue(); 618 } 619 } 620 621 void GPU::UpdateCRTCConfig() 622 { 623 static constexpr std::array<u16, 8> dot_clock_dividers = {{10, 8, 5, 4, 7, 7, 7, 7}}; 624 CRTCState& cs = m_crtc_state; 625 626 cs.vertical_total = m_GPUSTAT.pal_mode ? PAL_TOTAL_LINES : NTSC_TOTAL_LINES; 627 cs.horizontal_total = m_GPUSTAT.pal_mode ? PAL_TICKS_PER_LINE : NTSC_TICKS_PER_LINE; 628 cs.horizontal_active_start = m_GPUSTAT.pal_mode ? PAL_HORIZONTAL_ACTIVE_START : NTSC_HORIZONTAL_ACTIVE_START; 629 cs.horizontal_active_end = m_GPUSTAT.pal_mode ? PAL_HORIZONTAL_ACTIVE_END : NTSC_HORIZONTAL_ACTIVE_END; 630 631 const u8 horizontal_resolution_index = m_GPUSTAT.horizontal_resolution_1 | (m_GPUSTAT.horizontal_resolution_2 << 2); 632 cs.dot_clock_divider = dot_clock_dividers[horizontal_resolution_index]; 633 cs.horizontal_display_start = 634 (std::min<u16>(cs.regs.X1, cs.horizontal_total) / cs.dot_clock_divider) * cs.dot_clock_divider; 635 cs.horizontal_display_end = 636 (std::min<u16>(cs.regs.X2, cs.horizontal_total) / cs.dot_clock_divider) * cs.dot_clock_divider; 637 cs.vertical_display_start = std::min<u16>(cs.regs.Y1, cs.vertical_total); 638 cs.vertical_display_end = std::min<u16>(cs.regs.Y2, cs.vertical_total); 639 640 if (m_GPUSTAT.pal_mode && m_force_ntsc_timings) 641 { 642 // scale to NTSC parameters 643 cs.horizontal_display_start = 644 static_cast<u16>((static_cast<u32>(cs.horizontal_display_start) * NTSC_TICKS_PER_LINE) / PAL_TICKS_PER_LINE); 645 cs.horizontal_display_end = static_cast<u16>( 646 ((static_cast<u32>(cs.horizontal_display_end) * NTSC_TICKS_PER_LINE) + (PAL_TICKS_PER_LINE - 1)) / 647 PAL_TICKS_PER_LINE); 648 cs.vertical_display_start = 649 static_cast<u16>((static_cast<u32>(cs.vertical_display_start) * NTSC_TOTAL_LINES) / PAL_TOTAL_LINES); 650 cs.vertical_display_end = static_cast<u16>( 651 ((static_cast<u32>(cs.vertical_display_end) * NTSC_TOTAL_LINES) + (PAL_TOTAL_LINES - 1)) / PAL_TOTAL_LINES); 652 653 cs.vertical_total = NTSC_TOTAL_LINES; 654 cs.current_scanline %= NTSC_TOTAL_LINES; 655 cs.horizontal_total = NTSC_TICKS_PER_LINE; 656 cs.current_tick_in_scanline %= NTSC_TICKS_PER_LINE; 657 } 658 659 cs.horizontal_display_start = 660 static_cast<u16>(System::ScaleTicksToOverclock(static_cast<TickCount>(cs.horizontal_display_start))); 661 cs.horizontal_display_end = 662 static_cast<u16>(System::ScaleTicksToOverclock(static_cast<TickCount>(cs.horizontal_display_end))); 663 cs.horizontal_active_start = 664 static_cast<u16>(System::ScaleTicksToOverclock(static_cast<TickCount>(cs.horizontal_active_start))); 665 cs.horizontal_active_end = 666 static_cast<u16>(System::ScaleTicksToOverclock(static_cast<TickCount>(cs.horizontal_active_end))); 667 cs.horizontal_total = static_cast<u16>(System::ScaleTicksToOverclock(static_cast<TickCount>(cs.horizontal_total))); 668 669 cs.current_tick_in_scanline %= cs.horizontal_total; 670 cs.UpdateHBlankFlag(); 671 672 cs.current_scanline %= cs.vertical_total; 673 674 System::SetThrottleFrequency(ComputeVerticalFrequency()); 675 676 UpdateCRTCDisplayParameters(); 677 UpdateCRTCTickEvent(); 678 } 679 680 void GPU::UpdateCRTCDisplayParameters() 681 { 682 CRTCState& cs = m_crtc_state; 683 const DisplayCropMode crop_mode = g_settings.display_crop_mode; 684 685 const u16 horizontal_total = m_GPUSTAT.pal_mode ? PAL_TICKS_PER_LINE : NTSC_TICKS_PER_LINE; 686 const u16 vertical_total = m_GPUSTAT.pal_mode ? PAL_TOTAL_LINES : NTSC_TOTAL_LINES; 687 const u16 horizontal_display_start = 688 (std::min<u16>(cs.regs.X1, horizontal_total) / cs.dot_clock_divider) * cs.dot_clock_divider; 689 const u16 horizontal_display_end = 690 (std::min<u16>(cs.regs.X2, horizontal_total) / cs.dot_clock_divider) * cs.dot_clock_divider; 691 const u16 vertical_display_start = std::min<u16>(cs.regs.Y1, vertical_total); 692 const u16 vertical_display_end = std::min<u16>(cs.regs.Y2, vertical_total); 693 694 if (m_GPUSTAT.pal_mode) 695 { 696 // TODO: Verify PAL numbers. 697 switch (crop_mode) 698 { 699 case DisplayCropMode::None: 700 cs.horizontal_visible_start = PAL_HORIZONTAL_ACTIVE_START; 701 cs.horizontal_visible_end = PAL_HORIZONTAL_ACTIVE_END; 702 cs.vertical_visible_start = PAL_VERTICAL_ACTIVE_START; 703 cs.vertical_visible_end = PAL_VERTICAL_ACTIVE_END; 704 break; 705 706 case DisplayCropMode::Overscan: 707 cs.horizontal_visible_start = static_cast<u16>(std::max<int>(0, 628 + g_settings.display_active_start_offset)); 708 cs.horizontal_visible_end = 709 static_cast<u16>(std::max<int>(cs.horizontal_visible_start, 3188 + g_settings.display_active_end_offset)); 710 cs.vertical_visible_start = static_cast<u16>(std::max<int>(0, 30 + g_settings.display_line_start_offset)); 711 cs.vertical_visible_end = 712 static_cast<u16>(std::max<int>(cs.vertical_visible_start, 298 + g_settings.display_line_end_offset)); 713 break; 714 715 case DisplayCropMode::Borders: 716 default: 717 cs.horizontal_visible_start = horizontal_display_start; 718 cs.horizontal_visible_end = horizontal_display_end; 719 cs.vertical_visible_start = vertical_display_start; 720 cs.vertical_visible_end = vertical_display_end; 721 break; 722 } 723 cs.horizontal_visible_start = 724 std::clamp<u16>(cs.horizontal_visible_start, PAL_HORIZONTAL_ACTIVE_START, PAL_HORIZONTAL_ACTIVE_END); 725 cs.horizontal_visible_end = 726 std::clamp<u16>(cs.horizontal_visible_end, cs.horizontal_visible_start, PAL_HORIZONTAL_ACTIVE_END); 727 cs.vertical_visible_start = 728 std::clamp<u16>(cs.vertical_visible_start, PAL_VERTICAL_ACTIVE_START, PAL_VERTICAL_ACTIVE_END); 729 cs.vertical_visible_end = 730 std::clamp<u16>(cs.vertical_visible_end, cs.vertical_visible_start, PAL_VERTICAL_ACTIVE_END); 731 } 732 else 733 { 734 switch (crop_mode) 735 { 736 case DisplayCropMode::None: 737 cs.horizontal_visible_start = NTSC_HORIZONTAL_ACTIVE_START; 738 cs.horizontal_visible_end = NTSC_HORIZONTAL_ACTIVE_END; 739 cs.vertical_visible_start = NTSC_VERTICAL_ACTIVE_START; 740 cs.vertical_visible_end = NTSC_VERTICAL_ACTIVE_END; 741 break; 742 743 case DisplayCropMode::Overscan: 744 cs.horizontal_visible_start = static_cast<u16>(std::max<int>(0, 608 + g_settings.display_active_start_offset)); 745 cs.horizontal_visible_end = 746 static_cast<u16>(std::max<int>(cs.horizontal_visible_start, 3168 + g_settings.display_active_end_offset)); 747 cs.vertical_visible_start = static_cast<u16>(std::max<int>(0, 24 + g_settings.display_line_start_offset)); 748 cs.vertical_visible_end = 749 static_cast<u16>(std::max<int>(cs.vertical_visible_start, 248 + g_settings.display_line_end_offset)); 750 break; 751 752 case DisplayCropMode::Borders: 753 default: 754 cs.horizontal_visible_start = horizontal_display_start; 755 cs.horizontal_visible_end = horizontal_display_end; 756 cs.vertical_visible_start = vertical_display_start; 757 cs.vertical_visible_end = vertical_display_end; 758 break; 759 } 760 cs.horizontal_visible_start = 761 std::clamp<u16>(cs.horizontal_visible_start, NTSC_HORIZONTAL_ACTIVE_START, NTSC_HORIZONTAL_ACTIVE_END); 762 cs.horizontal_visible_end = 763 std::clamp<u16>(cs.horizontal_visible_end, cs.horizontal_visible_start, NTSC_HORIZONTAL_ACTIVE_END); 764 cs.vertical_visible_start = 765 std::clamp<u16>(cs.vertical_visible_start, NTSC_VERTICAL_ACTIVE_START, NTSC_VERTICAL_ACTIVE_END); 766 cs.vertical_visible_end = 767 std::clamp<u16>(cs.vertical_visible_end, cs.vertical_visible_start, NTSC_VERTICAL_ACTIVE_END); 768 } 769 770 // If force-progressive is enabled, we only double the height in 480i mode. This way non-interleaved 480i framebuffers 771 // won't be broken when displayed. 772 const u8 y_shift = BoolToUInt8(m_GPUSTAT.vertical_interlace && m_GPUSTAT.vertical_resolution); 773 const u8 height_shift = m_force_progressive_scan ? y_shift : BoolToUInt8(m_GPUSTAT.vertical_interlace); 774 775 // Determine screen size. 776 cs.display_width = (cs.horizontal_visible_end - cs.horizontal_visible_start) / cs.dot_clock_divider; 777 cs.display_height = (cs.vertical_visible_end - cs.vertical_visible_start) << height_shift; 778 779 // Determine number of pixels outputted from VRAM (in general, round to 4-pixel multiple). 780 // TODO: Verify behavior if values are outside of the active video portion of scanline. 781 const u16 horizontal_display_ticks = 782 (horizontal_display_end < horizontal_display_start) ? 0 : (horizontal_display_end - horizontal_display_start); 783 784 const u16 horizontal_display_pixels = horizontal_display_ticks / cs.dot_clock_divider; 785 if (horizontal_display_pixels == 1u) 786 cs.display_vram_width = 4u; 787 else 788 cs.display_vram_width = (horizontal_display_pixels + 2u) & ~3u; 789 790 // Determine if we need to adjust the VRAM rectangle (because the display is starting outside the visible area) or add 791 // padding. 792 u16 horizontal_skip_pixels; 793 if (horizontal_display_start >= cs.horizontal_visible_start) 794 { 795 cs.display_origin_left = (horizontal_display_start - cs.horizontal_visible_start) / cs.dot_clock_divider; 796 cs.display_vram_left = cs.regs.X; 797 horizontal_skip_pixels = 0; 798 } 799 else 800 { 801 horizontal_skip_pixels = (cs.horizontal_visible_start - horizontal_display_start) / cs.dot_clock_divider; 802 cs.display_origin_left = 0; 803 cs.display_vram_left = (cs.regs.X + horizontal_skip_pixels) % VRAM_WIDTH; 804 } 805 806 // apply the crop from the start (usually overscan) 807 cs.display_vram_width -= std::min(cs.display_vram_width, horizontal_skip_pixels); 808 809 // Apply crop from the end by shrinking VRAM rectangle width if display would end outside the visible area. 810 cs.display_vram_width = std::min<u16>(cs.display_vram_width, cs.display_width - cs.display_origin_left); 811 812 if (vertical_display_start >= cs.vertical_visible_start) 813 { 814 cs.display_origin_top = (vertical_display_start - cs.vertical_visible_start) << y_shift; 815 cs.display_vram_top = cs.regs.Y; 816 } 817 else 818 { 819 cs.display_origin_top = 0; 820 cs.display_vram_top = (cs.regs.Y + ((cs.vertical_visible_start - vertical_display_start) << y_shift)) % VRAM_HEIGHT; 821 } 822 823 if (vertical_display_end <= cs.vertical_visible_end) 824 { 825 cs.display_vram_height = 826 (vertical_display_end - 827 std::min(vertical_display_end, std::max(vertical_display_start, cs.vertical_visible_start))) 828 << height_shift; 829 } 830 else 831 { 832 cs.display_vram_height = 833 (cs.vertical_visible_end - 834 std::min(cs.vertical_visible_end, std::max(vertical_display_start, cs.vertical_visible_start))) 835 << height_shift; 836 } 837 } 838 839 TickCount GPU::GetPendingCRTCTicks() const 840 { 841 const TickCount pending_sysclk_ticks = s_crtc_tick_event.GetTicksSinceLastExecution(); 842 TickCount fractional_ticks = m_crtc_state.fractional_ticks; 843 return SystemTicksToCRTCTicks(pending_sysclk_ticks, &fractional_ticks); 844 } 845 846 TickCount GPU::GetPendingCommandTicks() const 847 { 848 if (!s_command_tick_event.IsActive()) 849 return 0; 850 851 return SystemTicksToGPUTicks(s_command_tick_event.GetTicksSinceLastExecution()); 852 } 853 854 void GPU::UpdateCRTCTickEvent() 855 { 856 // figure out how many GPU ticks until the next vblank or event 857 TickCount lines_until_event; 858 if (Timers::IsSyncEnabled(HBLANK_TIMER_INDEX)) 859 { 860 // when the timer sync is enabled we need to sync at vblank start and end 861 lines_until_event = 862 (m_crtc_state.current_scanline >= m_crtc_state.vertical_display_end) ? 863 (m_crtc_state.vertical_total - m_crtc_state.current_scanline + m_crtc_state.vertical_display_start) : 864 (m_crtc_state.vertical_display_end - m_crtc_state.current_scanline); 865 } 866 else 867 { 868 lines_until_event = 869 (m_crtc_state.current_scanline >= m_crtc_state.vertical_display_end ? 870 (m_crtc_state.vertical_total - m_crtc_state.current_scanline + m_crtc_state.vertical_display_end) : 871 (m_crtc_state.vertical_display_end - m_crtc_state.current_scanline)); 872 } 873 if (Timers::IsExternalIRQEnabled(HBLANK_TIMER_INDEX)) 874 lines_until_event = std::min(lines_until_event, Timers::GetTicksUntilIRQ(HBLANK_TIMER_INDEX)); 875 876 TickCount ticks_until_event = 877 lines_until_event * m_crtc_state.horizontal_total - m_crtc_state.current_tick_in_scanline; 878 if (Timers::IsExternalIRQEnabled(DOT_TIMER_INDEX)) 879 { 880 const TickCount dots_until_irq = Timers::GetTicksUntilIRQ(DOT_TIMER_INDEX); 881 const TickCount ticks_until_irq = 882 (dots_until_irq * m_crtc_state.dot_clock_divider) - m_crtc_state.fractional_dot_ticks; 883 ticks_until_event = std::min(ticks_until_event, std::max<TickCount>(ticks_until_irq, 0)); 884 } 885 886 if (Timers::IsSyncEnabled(DOT_TIMER_INDEX)) 887 { 888 // This could potentially be optimized to skip the time the gate is active, if we're resetting and free running. 889 // But realistically, I've only seen sync off (most games), or reset+pause on gate (Konami Lightgun games). 890 TickCount ticks_until_hblank_start_or_end; 891 if (m_crtc_state.current_tick_in_scanline >= m_crtc_state.horizontal_active_end) 892 { 893 ticks_until_hblank_start_or_end = 894 m_crtc_state.horizontal_total - m_crtc_state.current_tick_in_scanline + m_crtc_state.horizontal_active_start; 895 } 896 else if (m_crtc_state.current_tick_in_scanline < m_crtc_state.horizontal_active_start) 897 { 898 ticks_until_hblank_start_or_end = m_crtc_state.horizontal_active_start - m_crtc_state.current_tick_in_scanline; 899 } 900 else 901 { 902 ticks_until_hblank_start_or_end = m_crtc_state.horizontal_active_end - m_crtc_state.current_tick_in_scanline; 903 } 904 905 ticks_until_event = std::min(ticks_until_event, ticks_until_hblank_start_or_end); 906 } 907 908 s_crtc_tick_event.Schedule(CRTCTicksToSystemTicks(ticks_until_event, m_crtc_state.fractional_ticks)); 909 } 910 911 bool GPU::IsCRTCScanlinePending() const 912 { 913 // TODO: Most of these should be fields, not lines. 914 const TickCount ticks = (GetPendingCRTCTicks() + m_crtc_state.current_tick_in_scanline); 915 return (ticks >= m_crtc_state.horizontal_total); 916 } 917 918 bool GPU::IsCommandCompletionPending() const 919 { 920 return (m_pending_command_ticks > 0 && GetPendingCommandTicks() >= m_pending_command_ticks); 921 } 922 923 void GPU::CRTCTickEvent(TickCount ticks) 924 { 925 // convert cpu/master clock to GPU ticks, accounting for partial cycles because of the non-integer divider 926 const TickCount prev_tick = m_crtc_state.current_tick_in_scanline; 927 const TickCount gpu_ticks = SystemTicksToCRTCTicks(ticks, &m_crtc_state.fractional_ticks); 928 m_crtc_state.current_tick_in_scanline += gpu_ticks; 929 930 if (Timers::IsUsingExternalClock(DOT_TIMER_INDEX)) 931 { 932 m_crtc_state.fractional_dot_ticks += gpu_ticks; 933 const TickCount dots = m_crtc_state.fractional_dot_ticks / m_crtc_state.dot_clock_divider; 934 m_crtc_state.fractional_dot_ticks = m_crtc_state.fractional_dot_ticks % m_crtc_state.dot_clock_divider; 935 if (dots > 0) 936 Timers::AddTicks(DOT_TIMER_INDEX, dots); 937 } 938 939 if (m_crtc_state.current_tick_in_scanline < m_crtc_state.horizontal_total) 940 { 941 // short path when we execute <1 line.. this shouldn't occur often, except when gated (konami lightgun games). 942 m_crtc_state.UpdateHBlankFlag(); 943 Timers::SetGate(DOT_TIMER_INDEX, m_crtc_state.in_hblank); 944 if (Timers::IsUsingExternalClock(HBLANK_TIMER_INDEX)) 945 { 946 const u32 hblank_timer_ticks = 947 BoolToUInt32(m_crtc_state.current_tick_in_scanline >= m_crtc_state.horizontal_active_end) - 948 BoolToUInt32(prev_tick >= m_crtc_state.horizontal_active_end); 949 if (hblank_timer_ticks > 0) 950 Timers::AddTicks(HBLANK_TIMER_INDEX, static_cast<TickCount>(hblank_timer_ticks)); 951 } 952 953 UpdateCRTCTickEvent(); 954 return; 955 } 956 957 u32 lines_to_draw = m_crtc_state.current_tick_in_scanline / m_crtc_state.horizontal_total; 958 m_crtc_state.current_tick_in_scanline %= m_crtc_state.horizontal_total; 959 #if 0 960 Log_WarningPrintf("Old line: %u, new line: %u, drawing %u", m_crtc_state.current_scanline, 961 m_crtc_state.current_scanline + lines_to_draw, lines_to_draw); 962 #endif 963 964 m_crtc_state.UpdateHBlankFlag(); 965 Timers::SetGate(DOT_TIMER_INDEX, m_crtc_state.in_hblank); 966 967 if (Timers::IsUsingExternalClock(HBLANK_TIMER_INDEX)) 968 { 969 // lines_to_draw => number of times ticks passed horizontal_total. 970 // Subtract one if we were previously in hblank, but only on that line. If it was previously less than 971 // horizontal_active_start, we still want to add one, because hblank would have gone inactive, and then active again 972 // during the line. Finally add the current line being drawn, if hblank went inactive->active during the line. 973 const u32 hblank_timer_ticks = 974 lines_to_draw - BoolToUInt32(prev_tick >= m_crtc_state.horizontal_active_end) + 975 BoolToUInt32(m_crtc_state.current_tick_in_scanline >= m_crtc_state.horizontal_active_end); 976 if (hblank_timer_ticks > 0) 977 Timers::AddTicks(HBLANK_TIMER_INDEX, static_cast<TickCount>(hblank_timer_ticks)); 978 } 979 980 bool frame_done = false; 981 while (lines_to_draw > 0) 982 { 983 const u32 lines_to_draw_this_loop = 984 std::min(lines_to_draw, m_crtc_state.vertical_total - m_crtc_state.current_scanline); 985 const u32 prev_scanline = m_crtc_state.current_scanline; 986 m_crtc_state.current_scanline += lines_to_draw_this_loop; 987 DebugAssert(m_crtc_state.current_scanline <= m_crtc_state.vertical_total); 988 lines_to_draw -= lines_to_draw_this_loop; 989 990 // clear the vblank flag if the beam would pass through the display area 991 if (prev_scanline < m_crtc_state.vertical_display_start && 992 m_crtc_state.current_scanline >= m_crtc_state.vertical_display_end) 993 { 994 Timers::SetGate(HBLANK_TIMER_INDEX, false); 995 InterruptController::SetLineState(InterruptController::IRQ::VBLANK, false); 996 m_crtc_state.in_vblank = false; 997 } 998 999 const bool new_vblank = m_crtc_state.current_scanline < m_crtc_state.vertical_display_start || 1000 m_crtc_state.current_scanline >= m_crtc_state.vertical_display_end; 1001 if (m_crtc_state.in_vblank != new_vblank) 1002 { 1003 if (new_vblank) 1004 { 1005 DEBUG_LOG("Now in v-blank"); 1006 1007 // flush any pending draws and "scan out" the image 1008 // TODO: move present in here I guess 1009 FlushRender(); 1010 UpdateDisplay(); 1011 frame_done = true; 1012 1013 // switch fields early. this is needed so we draw to the correct one. 1014 if (m_GPUSTAT.InInterleaved480iMode()) 1015 m_crtc_state.interlaced_display_field = m_crtc_state.interlaced_field ^ 1u; 1016 else 1017 m_crtc_state.interlaced_display_field = 0; 1018 1019 #ifdef PSX_GPU_STATS 1020 if ((++s_active_gpu_cycles_frames) == 60) 1021 { 1022 const double busy_frac = 1023 static_cast<double>(s_active_gpu_cycles) / 1024 static_cast<double>(SystemTicksToGPUTicks(System::ScaleTicksToOverclock(System::MASTER_CLOCK)) * 1025 (ComputeVerticalFrequency() / 60.0f)); 1026 DEV_LOG("PSX GPU Usage: {:.2f}% [{:.0f} cycles avg per frame]", busy_frac * 100, 1027 static_cast<double>(s_active_gpu_cycles) / static_cast<double>(s_active_gpu_cycles_frames)); 1028 s_active_gpu_cycles = 0; 1029 s_active_gpu_cycles_frames = 0; 1030 } 1031 #endif 1032 } 1033 1034 Timers::SetGate(HBLANK_TIMER_INDEX, new_vblank); 1035 InterruptController::SetLineState(InterruptController::IRQ::VBLANK, new_vblank); 1036 m_crtc_state.in_vblank = new_vblank; 1037 } 1038 1039 // past the end of vblank? 1040 if (m_crtc_state.current_scanline == m_crtc_state.vertical_total) 1041 { 1042 // start the new frame 1043 m_crtc_state.current_scanline = 0; 1044 if (m_GPUSTAT.vertical_interlace) 1045 { 1046 m_crtc_state.interlaced_field ^= 1u; 1047 m_GPUSTAT.interlaced_field = !m_crtc_state.interlaced_field; 1048 } 1049 else 1050 { 1051 m_crtc_state.interlaced_field = 0; 1052 m_GPUSTAT.interlaced_field = 0u; // new GPU = 1, old GPU = 0 1053 } 1054 } 1055 } 1056 1057 // alternating even line bit in 240-line mode 1058 if (m_GPUSTAT.InInterleaved480iMode()) 1059 { 1060 m_crtc_state.active_line_lsb = 1061 Truncate8((m_crtc_state.regs.Y + BoolToUInt32(m_crtc_state.interlaced_display_field)) & u32(1)); 1062 m_GPUSTAT.display_line_lsb = ConvertToBoolUnchecked( 1063 (m_crtc_state.regs.Y + (BoolToUInt8(!m_crtc_state.in_vblank) & m_crtc_state.interlaced_display_field)) & u32(1)); 1064 } 1065 else 1066 { 1067 m_crtc_state.active_line_lsb = 0; 1068 m_GPUSTAT.display_line_lsb = ConvertToBoolUnchecked((m_crtc_state.regs.Y + m_crtc_state.current_scanline) & u32(1)); 1069 } 1070 1071 UpdateCRTCTickEvent(); 1072 1073 if (frame_done) 1074 System::FrameDone(); 1075 } 1076 1077 void GPU::CommandTickEvent(TickCount ticks) 1078 { 1079 m_pending_command_ticks -= SystemTicksToGPUTicks(ticks); 1080 1081 m_executing_commands = true; 1082 ExecuteCommands(); 1083 UpdateCommandTickEvent(); 1084 m_executing_commands = false; 1085 } 1086 1087 void GPU::UpdateCommandTickEvent() 1088 { 1089 if (m_pending_command_ticks <= 0) 1090 { 1091 m_pending_command_ticks = 0; 1092 s_command_tick_event.Deactivate(); 1093 } 1094 else 1095 { 1096 s_command_tick_event.SetIntervalAndSchedule(GPUTicksToSystemTicks(m_pending_command_ticks)); 1097 } 1098 } 1099 1100 void GPU::ConvertScreenCoordinatesToDisplayCoordinates(float window_x, float window_y, float* display_x, 1101 float* display_y) const 1102 { 1103 GSVector4i display_rc, draw_rc; 1104 CalculateDrawRect(g_gpu_device->GetWindowWidth(), g_gpu_device->GetWindowHeight(), true, true, &display_rc, &draw_rc); 1105 1106 // convert coordinates to active display region, then to full display region 1107 const float scaled_display_x = 1108 (window_x - static_cast<float>(display_rc.left)) / static_cast<float>(display_rc.width()); 1109 const float scaled_display_y = 1110 (window_y - static_cast<float>(display_rc.top)) / static_cast<float>(display_rc.height()); 1111 1112 // scale back to internal resolution 1113 *display_x = scaled_display_x * static_cast<float>(m_crtc_state.display_width); 1114 *display_y = scaled_display_y * static_cast<float>(m_crtc_state.display_height); 1115 1116 // TODO: apply rotation matrix 1117 1118 DEV_LOG("win {:.0f},{:.0f} -> local {:.0f},{:.0f}, disp {:.2f},{:.2f} (size {},{} frac {},{})", window_x, window_y, 1119 window_x - draw_rc.left, window_y - draw_rc.top, *display_x, *display_y, m_crtc_state.display_width, 1120 m_crtc_state.display_height, *display_x / static_cast<float>(m_crtc_state.display_width), 1121 *display_y / static_cast<float>(m_crtc_state.display_height)); 1122 } 1123 1124 bool GPU::ConvertDisplayCoordinatesToBeamTicksAndLines(float display_x, float display_y, float x_scale, u32* out_tick, 1125 u32* out_line) const 1126 { 1127 if (x_scale != 1.0f) 1128 { 1129 const float dw = static_cast<float>(m_crtc_state.display_width); 1130 float scaled_x = ((display_x / dw) * 2.0f) - 1.0f; // 0..1 -> -1..1 1131 scaled_x *= x_scale; 1132 display_x = (((scaled_x + 1.0f) * 0.5f) * dw); // -1..1 -> 0..1 1133 } 1134 1135 if (display_x < 0 || static_cast<u32>(display_x) >= m_crtc_state.display_width || display_y < 0 || 1136 static_cast<u32>(display_y) >= m_crtc_state.display_height) 1137 { 1138 return false; 1139 } 1140 1141 *out_line = (static_cast<u32>(std::round(display_y)) >> BoolToUInt8(IsInterlacedDisplayEnabled())) + 1142 m_crtc_state.vertical_visible_start; 1143 *out_tick = static_cast<u32>(System::ScaleTicksToOverclock( 1144 static_cast<TickCount>(std::round(display_x * static_cast<float>(m_crtc_state.dot_clock_divider))))) + 1145 m_crtc_state.horizontal_visible_start; 1146 return true; 1147 } 1148 1149 void GPU::GetBeamPosition(u32* out_ticks, u32* out_line) 1150 { 1151 const u32 current_tick = (GetPendingCRTCTicks() + m_crtc_state.current_tick_in_scanline); 1152 *out_line = 1153 (m_crtc_state.current_scanline + (current_tick / m_crtc_state.horizontal_total)) % m_crtc_state.vertical_total; 1154 *out_ticks = current_tick % m_crtc_state.horizontal_total; 1155 } 1156 1157 TickCount GPU::GetSystemTicksUntilTicksAndLine(u32 ticks, u32 line) 1158 { 1159 u32 current_tick, current_line; 1160 GetBeamPosition(¤t_tick, ¤t_line); 1161 1162 u32 ticks_to_target; 1163 if (ticks >= current_tick) 1164 { 1165 ticks_to_target = ticks - current_tick; 1166 } 1167 else 1168 { 1169 ticks_to_target = (m_crtc_state.horizontal_total - current_tick) + ticks; 1170 current_line = (current_line + 1) % m_crtc_state.vertical_total; 1171 } 1172 1173 const u32 lines_to_target = 1174 (line >= current_line) ? (line - current_line) : ((m_crtc_state.vertical_total - current_line) + line); 1175 1176 const TickCount total_ticks_to_target = 1177 static_cast<TickCount>((lines_to_target * m_crtc_state.horizontal_total) + ticks_to_target); 1178 1179 return CRTCTicksToSystemTicks(total_ticks_to_target, m_crtc_state.fractional_ticks); 1180 } 1181 1182 u32 GPU::ReadGPUREAD() 1183 { 1184 if (m_blitter_state != BlitterState::ReadingVRAM) 1185 return m_GPUREAD_latch; 1186 1187 // Read two pixels out of VRAM and combine them. Zero fill odd pixel counts. 1188 u32 value = 0; 1189 for (u32 i = 0; i < 2; i++) 1190 { 1191 // Read with correct wrap-around behavior. 1192 const u16 read_x = (m_vram_transfer.x + m_vram_transfer.col) % VRAM_WIDTH; 1193 const u16 read_y = (m_vram_transfer.y + m_vram_transfer.row) % VRAM_HEIGHT; 1194 value |= ZeroExtend32(g_vram[read_y * VRAM_WIDTH + read_x]) << (i * 16); 1195 1196 if (++m_vram_transfer.col == m_vram_transfer.width) 1197 { 1198 m_vram_transfer.col = 0; 1199 1200 if (++m_vram_transfer.row == m_vram_transfer.height) 1201 { 1202 DEBUG_LOG("End of VRAM->CPU transfer"); 1203 m_vram_transfer = {}; 1204 m_blitter_state = BlitterState::Idle; 1205 1206 // end of transfer, catch up on any commands which were written (unlikely) 1207 ExecuteCommands(); 1208 break; 1209 } 1210 } 1211 } 1212 1213 m_GPUREAD_latch = value; 1214 return value; 1215 } 1216 1217 void GPU::WriteGP1(u32 value) 1218 { 1219 const u32 command = (value >> 24) & 0x3Fu; 1220 const u32 param = value & UINT32_C(0x00FFFFFF); 1221 switch (command) 1222 { 1223 case 0x00: // Reset GPU 1224 { 1225 DEBUG_LOG("GP1 reset GPU"); 1226 s_command_tick_event.InvokeEarly(); 1227 SynchronizeCRTC(); 1228 SoftReset(); 1229 } 1230 break; 1231 1232 case 0x01: // Clear FIFO 1233 { 1234 DEBUG_LOG("GP1 clear FIFO"); 1235 s_command_tick_event.InvokeEarly(); 1236 SynchronizeCRTC(); 1237 1238 // flush partial writes 1239 if (m_blitter_state == BlitterState::WritingVRAM) 1240 FinishVRAMWrite(); 1241 1242 m_blitter_state = BlitterState::Idle; 1243 m_command_total_words = 0; 1244 m_vram_transfer = {}; 1245 m_fifo.Clear(); 1246 m_blit_buffer.clear(); 1247 m_blit_remaining_words = 0; 1248 m_pending_command_ticks = 0; 1249 s_command_tick_event.Deactivate(); 1250 UpdateDMARequest(); 1251 UpdateGPUIdle(); 1252 } 1253 break; 1254 1255 case 0x02: // Acknowledge Interrupt 1256 { 1257 DEBUG_LOG("Acknowledge interrupt"); 1258 m_GPUSTAT.interrupt_request = false; 1259 InterruptController::SetLineState(InterruptController::IRQ::GPU, false); 1260 } 1261 break; 1262 1263 case 0x03: // Display on/off 1264 { 1265 const bool disable = ConvertToBoolUnchecked(value & 0x01); 1266 DEBUG_LOG("Display {}", disable ? "disabled" : "enabled"); 1267 SynchronizeCRTC(); 1268 1269 if (!m_GPUSTAT.display_disable && disable && IsInterlacedDisplayEnabled()) 1270 ClearDisplay(); 1271 1272 m_GPUSTAT.display_disable = disable; 1273 } 1274 break; 1275 1276 case 0x04: // DMA Direction 1277 { 1278 DEBUG_LOG("DMA direction <- 0x{:02X}", static_cast<u32>(param)); 1279 if (m_GPUSTAT.dma_direction != static_cast<DMADirection>(param)) 1280 { 1281 m_GPUSTAT.dma_direction = static_cast<DMADirection>(param); 1282 UpdateDMARequest(); 1283 } 1284 } 1285 break; 1286 1287 case 0x05: // Set display start address 1288 { 1289 const u32 new_value = param & CRTCState::Regs::DISPLAY_ADDRESS_START_MASK; 1290 DEBUG_LOG("Display address start <- 0x{:08X}", new_value); 1291 1292 System::IncrementInternalFrameNumber(); 1293 if (m_crtc_state.regs.display_address_start != new_value) 1294 { 1295 SynchronizeCRTC(); 1296 m_crtc_state.regs.display_address_start = new_value; 1297 UpdateCRTCDisplayParameters(); 1298 OnBufferSwapped(); 1299 } 1300 } 1301 break; 1302 1303 case 0x06: // Set horizontal display range 1304 { 1305 const u32 new_value = param & CRTCState::Regs::HORIZONTAL_DISPLAY_RANGE_MASK; 1306 DEBUG_LOG("Horizontal display range <- 0x{:08X}", new_value); 1307 1308 if (m_crtc_state.regs.horizontal_display_range != new_value) 1309 { 1310 SynchronizeCRTC(); 1311 m_crtc_state.regs.horizontal_display_range = new_value; 1312 UpdateCRTCConfig(); 1313 } 1314 } 1315 break; 1316 1317 case 0x07: // Set vertical display range 1318 { 1319 const u32 new_value = param & CRTCState::Regs::VERTICAL_DISPLAY_RANGE_MASK; 1320 DEBUG_LOG("Vertical display range <- 0x{:08X}", new_value); 1321 1322 if (m_crtc_state.regs.vertical_display_range != new_value) 1323 { 1324 SynchronizeCRTC(); 1325 m_crtc_state.regs.vertical_display_range = new_value; 1326 UpdateCRTCConfig(); 1327 } 1328 } 1329 break; 1330 1331 case 0x08: // Set display mode 1332 { 1333 union GP1_08h 1334 { 1335 u32 bits; 1336 1337 BitField<u32, u8, 0, 2> horizontal_resolution_1; 1338 BitField<u32, bool, 2, 1> vertical_resolution; 1339 BitField<u32, bool, 3, 1> pal_mode; 1340 BitField<u32, bool, 4, 1> display_area_color_depth; 1341 BitField<u32, bool, 5, 1> vertical_interlace; 1342 BitField<u32, bool, 6, 1> horizontal_resolution_2; 1343 BitField<u32, bool, 7, 1> reverse_flag; 1344 }; 1345 1346 const GP1_08h dm{param}; 1347 GPUSTAT new_GPUSTAT{m_GPUSTAT.bits}; 1348 new_GPUSTAT.horizontal_resolution_1 = dm.horizontal_resolution_1; 1349 new_GPUSTAT.vertical_resolution = dm.vertical_resolution; 1350 new_GPUSTAT.pal_mode = dm.pal_mode; 1351 new_GPUSTAT.display_area_color_depth_24 = dm.display_area_color_depth; 1352 new_GPUSTAT.vertical_interlace = dm.vertical_interlace; 1353 new_GPUSTAT.horizontal_resolution_2 = dm.horizontal_resolution_2; 1354 new_GPUSTAT.reverse_flag = dm.reverse_flag; 1355 DEBUG_LOG("Set display mode <- 0x{:08X}", dm.bits); 1356 1357 if (!m_GPUSTAT.vertical_interlace && dm.vertical_interlace && !m_force_progressive_scan) 1358 { 1359 // bit of a hack, technically we should pull the previous frame in, but this may not exist anymore 1360 ClearDisplay(); 1361 } 1362 1363 if (m_GPUSTAT.bits != new_GPUSTAT.bits) 1364 { 1365 // Have to be careful when setting this because Synchronize() can modify GPUSTAT. 1366 static constexpr u32 SET_MASK = UINT32_C(0b00000000011111110100000000000000); 1367 s_command_tick_event.InvokeEarly(); 1368 SynchronizeCRTC(); 1369 m_GPUSTAT.bits = (m_GPUSTAT.bits & ~SET_MASK) | (new_GPUSTAT.bits & SET_MASK); 1370 UpdateCRTCConfig(); 1371 } 1372 } 1373 break; 1374 1375 case 0x09: // Allow texture disable 1376 { 1377 m_set_texture_disable_mask = ConvertToBoolUnchecked(param & 0x01); 1378 DEBUG_LOG("Set texture disable mask <- {}", m_set_texture_disable_mask ? "allowed" : "ignored"); 1379 } 1380 break; 1381 1382 case 0x10: 1383 case 0x11: 1384 case 0x12: 1385 case 0x13: 1386 case 0x14: 1387 case 0x15: 1388 case 0x16: 1389 case 0x17: 1390 case 0x18: 1391 case 0x19: 1392 case 0x1A: 1393 case 0x1B: 1394 case 0x1C: 1395 case 0x1D: 1396 case 0x1E: 1397 case 0x1F: 1398 { 1399 HandleGetGPUInfoCommand(value); 1400 } 1401 break; 1402 1403 [[unlikely]] default : ERROR_LOG("Unimplemented GP1 command 0x{:02X}", command); 1404 break; 1405 } 1406 } 1407 1408 void GPU::HandleGetGPUInfoCommand(u32 value) 1409 { 1410 const u8 subcommand = Truncate8(value & 0x07); 1411 switch (subcommand) 1412 { 1413 case 0x00: 1414 case 0x01: 1415 case 0x06: 1416 case 0x07: 1417 // leave GPUREAD intact 1418 break; 1419 1420 case 0x02: // Get Texture Window 1421 { 1422 DEBUG_LOG("Get texture window"); 1423 m_GPUREAD_latch = m_draw_mode.texture_window_value; 1424 } 1425 break; 1426 1427 case 0x03: // Get Draw Area Top Left 1428 { 1429 DEBUG_LOG("Get drawing area top left"); 1430 m_GPUREAD_latch = 1431 ((m_drawing_area.left & UINT32_C(0b1111111111)) | ((m_drawing_area.top & UINT32_C(0b1111111111)) << 10)); 1432 } 1433 break; 1434 1435 case 0x04: // Get Draw Area Bottom Right 1436 { 1437 DEBUG_LOG("Get drawing area bottom right"); 1438 m_GPUREAD_latch = 1439 ((m_drawing_area.right & UINT32_C(0b1111111111)) | ((m_drawing_area.bottom & UINT32_C(0b1111111111)) << 10)); 1440 } 1441 break; 1442 1443 case 0x05: // Get Drawing Offset 1444 { 1445 DEBUG_LOG("Get drawing offset"); 1446 m_GPUREAD_latch = 1447 ((m_drawing_offset.x & INT32_C(0b11111111111)) | ((m_drawing_offset.y & INT32_C(0b11111111111)) << 11)); 1448 } 1449 break; 1450 1451 [[unlikely]] default : WARNING_LOG("Unhandled GetGPUInfo(0x{:02X})", subcommand); 1452 break; 1453 } 1454 } 1455 1456 void GPU::UpdateCLUTIfNeeded(GPUTextureMode texmode, GPUTexturePaletteReg clut) 1457 { 1458 if (texmode >= GPUTextureMode::Direct16Bit) 1459 return; 1460 1461 const bool needs_8bit = (texmode == GPUTextureMode::Palette8Bit); 1462 if ((clut.bits != m_current_clut_reg_bits) || BoolToUInt8(needs_8bit) > BoolToUInt8(m_current_clut_is_8bit)) 1463 { 1464 DEBUG_LOG("Reloading CLUT from {},{}, {}", clut.GetXBase(), clut.GetYBase(), needs_8bit ? "8-bit" : "4-bit"); 1465 AddCommandTicks(needs_8bit ? 256 : 16); 1466 UpdateCLUT(clut, needs_8bit); 1467 m_current_clut_reg_bits = clut.bits; 1468 m_current_clut_is_8bit = needs_8bit; 1469 } 1470 } 1471 1472 void GPU::InvalidateCLUT() 1473 { 1474 m_current_clut_reg_bits = std::numeric_limits<decltype(m_current_clut_reg_bits)>::max(); // will never match 1475 m_current_clut_is_8bit = false; 1476 } 1477 1478 bool GPU::IsCLUTValid() const 1479 { 1480 return (m_current_clut_reg_bits != std::numeric_limits<decltype(m_current_clut_reg_bits)>::max()); 1481 } 1482 1483 void GPU::ClearDisplay() 1484 { 1485 ClearDisplayTexture(); 1486 1487 // Just recycle the textures, it'll get re-fetched. 1488 DestroyDeinterlaceTextures(); 1489 } 1490 1491 void GPU::ReadVRAM(u32 x, u32 y, u32 width, u32 height) 1492 { 1493 } 1494 1495 void GPU::FillVRAM(u32 x, u32 y, u32 width, u32 height, u32 color) 1496 { 1497 const u16 color16 = VRAMRGBA8888ToRGBA5551(color); 1498 const GSVector4i fill = GSVector4i(color16, color16, color16, color16, color16, color16, color16, color16); 1499 constexpr u32 vector_width = 8; 1500 const u32 aligned_width = Common::AlignDownPow2(width, vector_width); 1501 1502 if ((x + width) <= VRAM_WIDTH && !IsInterlacedRenderingEnabled()) 1503 { 1504 for (u32 yoffs = 0; yoffs < height; yoffs++) 1505 { 1506 const u32 row = (y + yoffs) % VRAM_HEIGHT; 1507 1508 u16* row_ptr = &g_vram[row * VRAM_WIDTH + x]; 1509 u32 xoffs = 0; 1510 for (; xoffs < aligned_width; xoffs += vector_width, row_ptr += vector_width) 1511 GSVector4i::store<false>(row_ptr, fill); 1512 for (; xoffs < width; xoffs++) 1513 *(row_ptr++) = color16; 1514 } 1515 } 1516 else if (IsInterlacedRenderingEnabled()) 1517 { 1518 // Hardware tests show that fills seem to break on the first two lines when the offset matches the displayed field. 1519 if (IsCRTCScanlinePending()) 1520 SynchronizeCRTC(); 1521 1522 const u32 active_field = GetActiveLineLSB(); 1523 if ((x + width) <= VRAM_WIDTH) 1524 { 1525 for (u32 yoffs = 0; yoffs < height; yoffs++) 1526 { 1527 const u32 row = (y + yoffs) % VRAM_HEIGHT; 1528 if ((row & u32(1)) == active_field) 1529 continue; 1530 1531 u16* row_ptr = &g_vram[row * VRAM_WIDTH + x]; 1532 u32 xoffs = 0; 1533 for (; xoffs < aligned_width; xoffs += vector_width, row_ptr += vector_width) 1534 GSVector4i::store<false>(row_ptr, fill); 1535 for (; xoffs < width; xoffs++) 1536 *(row_ptr++) = color16; 1537 } 1538 } 1539 else 1540 { 1541 for (u32 yoffs = 0; yoffs < height; yoffs++) 1542 { 1543 const u32 row = (y + yoffs) % VRAM_HEIGHT; 1544 if ((row & u32(1)) == active_field) 1545 continue; 1546 1547 u16* row_ptr = &g_vram[row * VRAM_WIDTH]; 1548 for (u32 xoffs = 0; xoffs < width; xoffs++) 1549 { 1550 const u32 col = (x + xoffs) % VRAM_WIDTH; 1551 row_ptr[col] = color16; 1552 } 1553 } 1554 } 1555 } 1556 else 1557 { 1558 for (u32 yoffs = 0; yoffs < height; yoffs++) 1559 { 1560 const u32 row = (y + yoffs) % VRAM_HEIGHT; 1561 u16* row_ptr = &g_vram[row * VRAM_WIDTH]; 1562 for (u32 xoffs = 0; xoffs < width; xoffs++) 1563 { 1564 const u32 col = (x + xoffs) % VRAM_WIDTH; 1565 row_ptr[col] = color16; 1566 } 1567 } 1568 } 1569 } 1570 1571 void GPU::UpdateVRAM(u32 x, u32 y, u32 width, u32 height, const void* data, bool set_mask, bool check_mask) 1572 { 1573 // Fast path when the copy is not oversized. 1574 if ((x + width) <= VRAM_WIDTH && (y + height) <= VRAM_HEIGHT && !set_mask && !check_mask) 1575 { 1576 const u16* src_ptr = static_cast<const u16*>(data); 1577 u16* dst_ptr = &g_vram[y * VRAM_WIDTH + x]; 1578 for (u32 yoffs = 0; yoffs < height; yoffs++) 1579 { 1580 std::copy_n(src_ptr, width, dst_ptr); 1581 src_ptr += width; 1582 dst_ptr += VRAM_WIDTH; 1583 } 1584 } 1585 else 1586 { 1587 // Slow path when we need to handle wrap-around. 1588 // During transfer/render operations, if ((dst_pixel & mask_and) == 0) { pixel = src_pixel | mask_or } 1589 const u16* src_ptr = static_cast<const u16*>(data); 1590 const u16 mask_and = check_mask ? 0x8000 : 0; 1591 const u16 mask_or = set_mask ? 0x8000 : 0; 1592 1593 for (u32 row = 0; row < height;) 1594 { 1595 u16* dst_row_ptr = &g_vram[((y + row++) % VRAM_HEIGHT) * VRAM_WIDTH]; 1596 for (u32 col = 0; col < width;) 1597 { 1598 // TODO: Handle unaligned reads... 1599 u16* pixel_ptr = &dst_row_ptr[(x + col++) % VRAM_WIDTH]; 1600 if (((*pixel_ptr) & mask_and) == 0) 1601 *pixel_ptr = *(src_ptr++) | mask_or; 1602 } 1603 } 1604 } 1605 } 1606 1607 void GPU::CopyVRAM(u32 src_x, u32 src_y, u32 dst_x, u32 dst_y, u32 width, u32 height) 1608 { 1609 // Break up oversized copies. This behavior has not been verified on console. 1610 if ((src_x + width) > VRAM_WIDTH || (dst_x + width) > VRAM_WIDTH) 1611 { 1612 u32 remaining_rows = height; 1613 u32 current_src_y = src_y; 1614 u32 current_dst_y = dst_y; 1615 while (remaining_rows > 0) 1616 { 1617 const u32 rows_to_copy = 1618 std::min<u32>(remaining_rows, std::min<u32>(VRAM_HEIGHT - current_src_y, VRAM_HEIGHT - current_dst_y)); 1619 1620 u32 remaining_columns = width; 1621 u32 current_src_x = src_x; 1622 u32 current_dst_x = dst_x; 1623 while (remaining_columns > 0) 1624 { 1625 const u32 columns_to_copy = 1626 std::min<u32>(remaining_columns, std::min<u32>(VRAM_WIDTH - current_src_x, VRAM_WIDTH - current_dst_x)); 1627 CopyVRAM(current_src_x, current_src_y, current_dst_x, current_dst_y, columns_to_copy, rows_to_copy); 1628 current_src_x = (current_src_x + columns_to_copy) % VRAM_WIDTH; 1629 current_dst_x = (current_dst_x + columns_to_copy) % VRAM_WIDTH; 1630 remaining_columns -= columns_to_copy; 1631 } 1632 1633 current_src_y = (current_src_y + rows_to_copy) % VRAM_HEIGHT; 1634 current_dst_y = (current_dst_y + rows_to_copy) % VRAM_HEIGHT; 1635 remaining_rows -= rows_to_copy; 1636 } 1637 1638 return; 1639 } 1640 1641 // This doesn't have a fast path, but do we really need one? It's not common. 1642 const u16 mask_and = m_GPUSTAT.GetMaskAND(); 1643 const u16 mask_or = m_GPUSTAT.GetMaskOR(); 1644 1645 // Copy in reverse when src_x < dst_x, this is verified on console. 1646 if (src_x < dst_x || ((src_x + width - 1) % VRAM_WIDTH) < ((dst_x + width - 1) % VRAM_WIDTH)) 1647 { 1648 for (u32 row = 0; row < height; row++) 1649 { 1650 const u16* src_row_ptr = &g_vram[((src_y + row) % VRAM_HEIGHT) * VRAM_WIDTH]; 1651 u16* dst_row_ptr = &g_vram[((dst_y + row) % VRAM_HEIGHT) * VRAM_WIDTH]; 1652 1653 for (s32 col = static_cast<s32>(width - 1); col >= 0; col--) 1654 { 1655 const u16 src_pixel = src_row_ptr[(src_x + static_cast<u32>(col)) % VRAM_WIDTH]; 1656 u16* dst_pixel_ptr = &dst_row_ptr[(dst_x + static_cast<u32>(col)) % VRAM_WIDTH]; 1657 if ((*dst_pixel_ptr & mask_and) == 0) 1658 *dst_pixel_ptr = src_pixel | mask_or; 1659 } 1660 } 1661 } 1662 else 1663 { 1664 for (u32 row = 0; row < height; row++) 1665 { 1666 const u16* src_row_ptr = &g_vram[((src_y + row) % VRAM_HEIGHT) * VRAM_WIDTH]; 1667 u16* dst_row_ptr = &g_vram[((dst_y + row) % VRAM_HEIGHT) * VRAM_WIDTH]; 1668 1669 for (u32 col = 0; col < width; col++) 1670 { 1671 const u16 src_pixel = src_row_ptr[(src_x + col) % VRAM_WIDTH]; 1672 u16* dst_pixel_ptr = &dst_row_ptr[(dst_x + col) % VRAM_WIDTH]; 1673 if ((*dst_pixel_ptr & mask_and) == 0) 1674 *dst_pixel_ptr = src_pixel | mask_or; 1675 } 1676 } 1677 } 1678 } 1679 1680 void GPU::SetClampedDrawingArea() 1681 { 1682 if (m_drawing_area.left > m_drawing_area.right || m_drawing_area.top > m_drawing_area.bottom) [[unlikely]] 1683 { 1684 m_clamped_drawing_area = GSVector4i::zero(); 1685 return; 1686 } 1687 1688 const u32 right = std::min(m_drawing_area.right + 1, static_cast<u32>(VRAM_WIDTH)); 1689 const u32 left = std::min(m_drawing_area.left, std::min(m_drawing_area.right, VRAM_WIDTH - 1)); 1690 const u32 bottom = std::min(m_drawing_area.bottom + 1, static_cast<u32>(VRAM_HEIGHT)); 1691 const u32 top = std::min(m_drawing_area.top, std::min(m_drawing_area.bottom, VRAM_HEIGHT - 1)); 1692 m_clamped_drawing_area = GSVector4i(left, top, right, bottom); 1693 } 1694 1695 void GPU::SetDrawMode(u16 value) 1696 { 1697 GPUDrawModeReg new_mode_reg{static_cast<u16>(value & GPUDrawModeReg::MASK)}; 1698 if (!m_set_texture_disable_mask) 1699 new_mode_reg.texture_disable = false; 1700 1701 if (new_mode_reg.bits == m_draw_mode.mode_reg.bits) 1702 return; 1703 1704 m_draw_mode.texture_page_changed |= ((new_mode_reg.bits & GPUDrawModeReg::TEXTURE_PAGE_MASK) != 1705 (m_draw_mode.mode_reg.bits & GPUDrawModeReg::TEXTURE_PAGE_MASK)); 1706 m_draw_mode.mode_reg.bits = new_mode_reg.bits; 1707 1708 if (m_GPUSTAT.draw_to_displayed_field != new_mode_reg.draw_to_displayed_field) 1709 FlushRender(); 1710 1711 // Bits 0..10 are returned in the GPU status register. 1712 m_GPUSTAT.bits = (m_GPUSTAT.bits & ~(GPUDrawModeReg::GPUSTAT_MASK)) | 1713 (ZeroExtend32(new_mode_reg.bits) & GPUDrawModeReg::GPUSTAT_MASK); 1714 m_GPUSTAT.texture_disable = m_draw_mode.mode_reg.texture_disable; 1715 } 1716 1717 void GPU::SetTexturePalette(u16 value) 1718 { 1719 value &= DrawMode::PALETTE_MASK; 1720 if (m_draw_mode.palette_reg.bits == value) 1721 return; 1722 1723 m_draw_mode.palette_reg.bits = value; 1724 m_draw_mode.texture_page_changed = true; 1725 } 1726 1727 void GPU::SetTextureWindow(u32 value) 1728 { 1729 value &= DrawMode::TEXTURE_WINDOW_MASK; 1730 if (m_draw_mode.texture_window_value == value) 1731 return; 1732 1733 FlushRender(); 1734 1735 const u8 mask_x = Truncate8(value & UINT32_C(0x1F)); 1736 const u8 mask_y = Truncate8((value >> 5) & UINT32_C(0x1F)); 1737 const u8 offset_x = Truncate8((value >> 10) & UINT32_C(0x1F)); 1738 const u8 offset_y = Truncate8((value >> 15) & UINT32_C(0x1F)); 1739 DEBUG_LOG("Set texture window {:02X} {:02X} {:02X} {:02X}", mask_x, mask_y, offset_x, offset_y); 1740 1741 m_draw_mode.texture_window.and_x = ~(mask_x * 8); 1742 m_draw_mode.texture_window.and_y = ~(mask_y * 8); 1743 m_draw_mode.texture_window.or_x = (offset_x & mask_x) * 8u; 1744 m_draw_mode.texture_window.or_y = (offset_y & mask_y) * 8u; 1745 m_draw_mode.texture_window_value = value; 1746 m_draw_mode.texture_window_changed = true; 1747 } 1748 1749 void GPU::ReadCLUT(u16* dest, GPUTexturePaletteReg reg, bool clut_is_8bit) 1750 { 1751 const u16* src_row = &g_vram[reg.GetYBase() * VRAM_WIDTH]; 1752 const u32 start_x = reg.GetXBase(); 1753 if (!clut_is_8bit) 1754 { 1755 // Wraparound can't happen in 4-bit mode. 1756 std::memcpy(dest, &src_row[start_x], sizeof(u16) * 16); 1757 } 1758 else 1759 { 1760 if ((start_x + 256) > VRAM_WIDTH) [[unlikely]] 1761 { 1762 const u32 end = VRAM_WIDTH - start_x; 1763 const u32 start = 256 - end; 1764 std::memcpy(dest, &src_row[start_x], sizeof(u16) * end); 1765 std::memcpy(dest + end, src_row, sizeof(u16) * start); 1766 } 1767 else 1768 { 1769 std::memcpy(dest, &src_row[start_x], sizeof(u16) * 256); 1770 } 1771 } 1772 } 1773 1774 bool GPU::CompileDisplayPipelines(bool display, bool deinterlace, bool chroma_smoothing) 1775 { 1776 GPUShaderGen shadergen(g_gpu_device->GetRenderAPI(), g_gpu_device->GetFeatures().dual_source_blend, 1777 g_gpu_device->GetFeatures().framebuffer_fetch); 1778 1779 GPUPipeline::GraphicsConfig plconfig; 1780 plconfig.input_layout.vertex_stride = 0; 1781 plconfig.primitive = GPUPipeline::Primitive::Triangles; 1782 plconfig.rasterization = GPUPipeline::RasterizationState::GetNoCullState(); 1783 plconfig.depth = GPUPipeline::DepthState::GetNoTestsState(); 1784 plconfig.blend = GPUPipeline::BlendState::GetNoBlendingState(); 1785 plconfig.geometry_shader = nullptr; 1786 plconfig.depth_format = GPUTexture::Format::Unknown; 1787 plconfig.samples = 1; 1788 plconfig.per_sample_shading = false; 1789 plconfig.render_pass_flags = GPUPipeline::NoRenderPassFlags; 1790 1791 if (display) 1792 { 1793 plconfig.layout = GPUPipeline::Layout::SingleTextureAndPushConstants; 1794 plconfig.SetTargetFormats(g_gpu_device->HasSurface() ? g_gpu_device->GetWindowFormat() : GPUTexture::Format::RGBA8); 1795 1796 std::string vs = shadergen.GenerateDisplayVertexShader(); 1797 std::string fs; 1798 switch (g_settings.display_scaling) 1799 { 1800 case DisplayScalingMode::BilinearSharp: 1801 fs = shadergen.GenerateDisplaySharpBilinearFragmentShader(); 1802 break; 1803 1804 case DisplayScalingMode::BilinearSmooth: 1805 case DisplayScalingMode::BilinearInteger: 1806 fs = shadergen.GenerateDisplayFragmentShader(true); 1807 break; 1808 1809 case DisplayScalingMode::Nearest: 1810 case DisplayScalingMode::NearestInteger: 1811 default: 1812 fs = shadergen.GenerateDisplayFragmentShader(false); 1813 break; 1814 } 1815 1816 std::unique_ptr<GPUShader> vso = g_gpu_device->CreateShader(GPUShaderStage::Vertex, shadergen.GetLanguage(), vs); 1817 std::unique_ptr<GPUShader> fso = g_gpu_device->CreateShader(GPUShaderStage::Fragment, shadergen.GetLanguage(), fs); 1818 if (!vso || !fso) 1819 return false; 1820 GL_OBJECT_NAME(vso, "Display Vertex Shader"); 1821 GL_OBJECT_NAME_FMT(fso, "Display Fragment Shader [{}]", 1822 Settings::GetDisplayScalingName(g_settings.display_scaling)); 1823 plconfig.vertex_shader = vso.get(); 1824 plconfig.fragment_shader = fso.get(); 1825 if (!(m_display_pipeline = g_gpu_device->CreatePipeline(plconfig))) 1826 return false; 1827 GL_OBJECT_NAME_FMT(m_display_pipeline, "Display Pipeline [{}]", 1828 Settings::GetDisplayScalingName(g_settings.display_scaling)); 1829 } 1830 1831 if (deinterlace) 1832 { 1833 plconfig.SetTargetFormats(GPUTexture::Format::RGBA8); 1834 1835 std::unique_ptr<GPUShader> vso = g_gpu_device->CreateShader(GPUShaderStage::Vertex, shadergen.GetLanguage(), 1836 shadergen.GenerateScreenQuadVertexShader()); 1837 if (!vso) 1838 return false; 1839 GL_OBJECT_NAME(vso, "Deinterlace Vertex Shader"); 1840 1841 std::unique_ptr<GPUShader> fso; 1842 if (!(fso = g_gpu_device->CreateShader(GPUShaderStage::Fragment, shadergen.GetLanguage(), 1843 shadergen.GenerateInterleavedFieldExtractFragmentShader()))) 1844 { 1845 return false; 1846 } 1847 1848 GL_OBJECT_NAME(fso, "Deinterlace Field Extract Fragment Shader"); 1849 1850 plconfig.layout = GPUPipeline::Layout::SingleTextureAndPushConstants; 1851 plconfig.vertex_shader = vso.get(); 1852 plconfig.fragment_shader = fso.get(); 1853 if (!(m_deinterlace_extract_pipeline = g_gpu_device->CreatePipeline(plconfig))) 1854 return false; 1855 1856 GL_OBJECT_NAME(m_deinterlace_extract_pipeline, "Deinterlace Field Extract Pipeline"); 1857 1858 switch (g_settings.display_deinterlacing_mode) 1859 { 1860 case DisplayDeinterlacingMode::Disabled: 1861 break; 1862 1863 case DisplayDeinterlacingMode::Weave: 1864 { 1865 if (!(fso = g_gpu_device->CreateShader(GPUShaderStage::Fragment, shadergen.GetLanguage(), 1866 shadergen.GenerateDeinterlaceWeaveFragmentShader()))) 1867 { 1868 return false; 1869 } 1870 1871 GL_OBJECT_NAME(fso, "Weave Deinterlace Fragment Shader"); 1872 1873 plconfig.layout = GPUPipeline::Layout::SingleTextureAndPushConstants; 1874 plconfig.vertex_shader = vso.get(); 1875 plconfig.fragment_shader = fso.get(); 1876 if (!(m_deinterlace_pipeline = g_gpu_device->CreatePipeline(plconfig))) 1877 return false; 1878 1879 GL_OBJECT_NAME(m_deinterlace_pipeline, "Weave Deinterlace Pipeline"); 1880 } 1881 break; 1882 1883 case DisplayDeinterlacingMode::Blend: 1884 { 1885 if (!(fso = g_gpu_device->CreateShader(GPUShaderStage::Fragment, shadergen.GetLanguage(), 1886 shadergen.GenerateDeinterlaceBlendFragmentShader()))) 1887 { 1888 return false; 1889 } 1890 1891 GL_OBJECT_NAME(fso, "Blend Deinterlace Fragment Shader"); 1892 1893 plconfig.layout = GPUPipeline::Layout::MultiTextureAndPushConstants; 1894 plconfig.vertex_shader = vso.get(); 1895 plconfig.fragment_shader = fso.get(); 1896 if (!(m_deinterlace_pipeline = g_gpu_device->CreatePipeline(plconfig))) 1897 return false; 1898 1899 GL_OBJECT_NAME(m_deinterlace_pipeline, "Blend Deinterlace Pipeline"); 1900 } 1901 break; 1902 1903 case DisplayDeinterlacingMode::Adaptive: 1904 { 1905 fso = g_gpu_device->CreateShader(GPUShaderStage::Fragment, shadergen.GetLanguage(), 1906 shadergen.GenerateFastMADReconstructFragmentShader()); 1907 if (!fso) 1908 return false; 1909 1910 GL_OBJECT_NAME(fso, "FastMAD Reconstruct Fragment Shader"); 1911 1912 plconfig.layout = GPUPipeline::Layout::MultiTextureAndPushConstants; 1913 plconfig.fragment_shader = fso.get(); 1914 if (!(m_deinterlace_pipeline = g_gpu_device->CreatePipeline(plconfig))) 1915 return false; 1916 1917 GL_OBJECT_NAME(m_deinterlace_pipeline, "FastMAD Reconstruct Pipeline"); 1918 } 1919 break; 1920 1921 default: 1922 UnreachableCode(); 1923 } 1924 } 1925 1926 if (chroma_smoothing) 1927 { 1928 m_chroma_smoothing_pipeline.reset(); 1929 g_gpu_device->RecycleTexture(std::move(m_chroma_smoothing_texture)); 1930 1931 if (g_settings.display_24bit_chroma_smoothing) 1932 { 1933 plconfig.layout = GPUPipeline::Layout::SingleTextureAndPushConstants; 1934 plconfig.SetTargetFormats(GPUTexture::Format::RGBA8); 1935 1936 std::unique_ptr<GPUShader> vso = g_gpu_device->CreateShader(GPUShaderStage::Vertex, shadergen.GetLanguage(), 1937 shadergen.GenerateScreenQuadVertexShader()); 1938 std::unique_ptr<GPUShader> fso = g_gpu_device->CreateShader(GPUShaderStage::Fragment, shadergen.GetLanguage(), 1939 shadergen.GenerateChromaSmoothingFragmentShader()); 1940 if (!vso || !fso) 1941 return false; 1942 GL_OBJECT_NAME(vso, "Chroma Smoothing Vertex Shader"); 1943 GL_OBJECT_NAME(fso, "Chroma Smoothing Fragment Shader"); 1944 1945 plconfig.vertex_shader = vso.get(); 1946 plconfig.fragment_shader = fso.get(); 1947 if (!(m_chroma_smoothing_pipeline = g_gpu_device->CreatePipeline(plconfig))) 1948 return false; 1949 GL_OBJECT_NAME(m_chroma_smoothing_pipeline, "Chroma Smoothing Pipeline"); 1950 } 1951 } 1952 1953 return true; 1954 } 1955 1956 void GPU::ClearDisplayTexture() 1957 { 1958 m_display_texture = nullptr; 1959 m_display_texture_view_x = 0; 1960 m_display_texture_view_y = 0; 1961 m_display_texture_view_width = 0; 1962 m_display_texture_view_height = 0; 1963 } 1964 1965 void GPU::SetDisplayTexture(GPUTexture* texture, GPUTexture* depth_buffer, s32 view_x, s32 view_y, s32 view_width, 1966 s32 view_height) 1967 { 1968 DebugAssert(texture); 1969 m_display_texture = texture; 1970 m_display_depth_buffer = depth_buffer; 1971 m_display_texture_view_x = view_x; 1972 m_display_texture_view_y = view_y; 1973 m_display_texture_view_width = view_width; 1974 m_display_texture_view_height = view_height; 1975 } 1976 1977 bool GPU::PresentDisplay() 1978 { 1979 FlushRender(); 1980 1981 GSVector4i display_rect; 1982 GSVector4i draw_rect; 1983 CalculateDrawRect(g_gpu_device->GetWindowWidth(), g_gpu_device->GetWindowHeight(), !g_settings.debugging.show_vram, 1984 true, &display_rect, &draw_rect); 1985 return RenderDisplay(nullptr, display_rect, draw_rect, !g_settings.debugging.show_vram); 1986 } 1987 1988 bool GPU::RenderDisplay(GPUTexture* target, const GSVector4i display_rect, const GSVector4i draw_rect, bool postfx) 1989 { 1990 GL_SCOPE_FMT("RenderDisplay: {}", draw_rect); 1991 1992 if (m_display_texture) 1993 m_display_texture->MakeReadyForSampling(); 1994 1995 // Internal post-processing. 1996 GPUTexture* display_texture = m_display_texture; 1997 s32 display_texture_view_x = m_display_texture_view_x; 1998 s32 display_texture_view_y = m_display_texture_view_y; 1999 s32 display_texture_view_width = m_display_texture_view_width; 2000 s32 display_texture_view_height = m_display_texture_view_height; 2001 if (postfx && display_texture && PostProcessing::InternalChain.IsActive() && 2002 PostProcessing::InternalChain.CheckTargets(DISPLAY_INTERNAL_POSTFX_FORMAT, display_texture_view_width, 2003 display_texture_view_height)) 2004 { 2005 DebugAssert(display_texture_view_x == 0 && display_texture_view_y == 0 && 2006 static_cast<s32>(display_texture->GetWidth()) == display_texture_view_width && 2007 static_cast<s32>(display_texture->GetHeight()) == display_texture_view_height); 2008 2009 // Now we can apply the post chain. 2010 GPUTexture* post_output_texture = PostProcessing::InternalChain.GetOutputTexture(); 2011 if (PostProcessing::InternalChain.Apply(display_texture, m_display_depth_buffer, post_output_texture, 2012 GSVector4i(0, 0, display_texture_view_width, display_texture_view_height), 2013 display_texture_view_width, display_texture_view_height, 2014 m_crtc_state.display_width, m_crtc_state.display_height)) 2015 { 2016 display_texture_view_x = 0; 2017 display_texture_view_y = 0; 2018 display_texture = post_output_texture; 2019 display_texture->MakeReadyForSampling(); 2020 } 2021 } 2022 2023 const GPUTexture::Format hdformat = target ? target->GetFormat() : g_gpu_device->GetWindowFormat(); 2024 const u32 target_width = target ? target->GetWidth() : g_gpu_device->GetWindowWidth(); 2025 const u32 target_height = target ? target->GetHeight() : g_gpu_device->GetWindowHeight(); 2026 const bool really_postfx = 2027 (postfx && PostProcessing::DisplayChain.IsActive() && !g_gpu_device->GetWindowInfo().IsSurfaceless() && 2028 hdformat != GPUTexture::Format::Unknown && target_width > 0 && target_height > 0 && 2029 PostProcessing::DisplayChain.CheckTargets(hdformat, target_width, target_height)); 2030 const GSVector4i real_draw_rect = 2031 g_gpu_device->UsesLowerLeftOrigin() ? GPUDevice::FlipToLowerLeft(draw_rect, target_height) : draw_rect; 2032 if (really_postfx) 2033 { 2034 g_gpu_device->ClearRenderTarget(PostProcessing::DisplayChain.GetInputTexture(), GPUDevice::DEFAULT_CLEAR_COLOR); 2035 g_gpu_device->SetRenderTarget(PostProcessing::DisplayChain.GetInputTexture()); 2036 } 2037 else 2038 { 2039 if (target) 2040 g_gpu_device->SetRenderTarget(target); 2041 else if (!g_gpu_device->BeginPresent(false)) 2042 return false; 2043 } 2044 2045 if (display_texture) 2046 { 2047 bool texture_filter_linear = false; 2048 2049 struct Uniforms 2050 { 2051 float src_rect[4]; 2052 float src_size[4]; 2053 float clamp_rect[4]; 2054 float params[4]; 2055 float rotation_matrix[2][2]; 2056 } uniforms; 2057 std::memset(uniforms.params, 0, sizeof(uniforms.params)); 2058 2059 switch (g_settings.display_scaling) 2060 { 2061 case DisplayScalingMode::Nearest: 2062 case DisplayScalingMode::NearestInteger: 2063 break; 2064 2065 case DisplayScalingMode::BilinearSmooth: 2066 case DisplayScalingMode::BilinearInteger: 2067 texture_filter_linear = true; 2068 break; 2069 2070 case DisplayScalingMode::BilinearSharp: 2071 { 2072 texture_filter_linear = true; 2073 uniforms.params[0] = std::max( 2074 std::floor(static_cast<float>(draw_rect.width()) / static_cast<float>(m_display_texture_view_width)), 1.0f); 2075 uniforms.params[1] = std::max( 2076 std::floor(static_cast<float>(draw_rect.height()) / static_cast<float>(m_display_texture_view_height)), 1.0f); 2077 uniforms.params[2] = 0.5f - 0.5f / uniforms.params[0]; 2078 uniforms.params[3] = 0.5f - 0.5f / uniforms.params[1]; 2079 } 2080 break; 2081 2082 default: 2083 UnreachableCode(); 2084 break; 2085 } 2086 2087 g_gpu_device->SetPipeline(m_display_pipeline.get()); 2088 g_gpu_device->SetTextureSampler( 2089 0, display_texture, texture_filter_linear ? g_gpu_device->GetLinearSampler() : g_gpu_device->GetNearestSampler()); 2090 2091 // For bilinear, clamp to 0.5/SIZE-0.5 to avoid bleeding from the adjacent texels in VRAM. This is because 2092 // 1.0 in UV space is not the bottom-right texel, but a mix of the bottom-right and wrapped/next texel. 2093 const float rcp_width = 1.0f / static_cast<float>(display_texture->GetWidth()); 2094 const float rcp_height = 1.0f / static_cast<float>(display_texture->GetHeight()); 2095 uniforms.src_rect[0] = static_cast<float>(display_texture_view_x) * rcp_width; 2096 uniforms.src_rect[1] = static_cast<float>(display_texture_view_y) * rcp_height; 2097 uniforms.src_rect[2] = static_cast<float>(display_texture_view_width) * rcp_width; 2098 uniforms.src_rect[3] = static_cast<float>(display_texture_view_height) * rcp_height; 2099 uniforms.clamp_rect[0] = (static_cast<float>(display_texture_view_x) + 0.5f) * rcp_width; 2100 uniforms.clamp_rect[1] = (static_cast<float>(display_texture_view_y) + 0.5f) * rcp_height; 2101 uniforms.clamp_rect[2] = 2102 (static_cast<float>(display_texture_view_x + display_texture_view_width) - 0.5f) * rcp_width; 2103 uniforms.clamp_rect[3] = 2104 (static_cast<float>(display_texture_view_y + display_texture_view_height) - 0.5f) * rcp_height; 2105 uniforms.src_size[0] = static_cast<float>(display_texture->GetWidth()); 2106 uniforms.src_size[1] = static_cast<float>(display_texture->GetHeight()); 2107 uniforms.src_size[2] = rcp_width; 2108 uniforms.src_size[3] = rcp_height; 2109 2110 if (g_settings.display_rotation != DisplayRotation::Normal) 2111 { 2112 static constexpr const std::array<float, static_cast<size_t>(DisplayRotation::Count) - 1> rotation_radians = {{ 2113 static_cast<float>(std::numbers::pi * 1.5f), // Rotate90 2114 static_cast<float>(std::numbers::pi), // Rotate180 2115 static_cast<float>(std::numbers::pi / 2.0), // Rotate270 2116 }}; 2117 2118 GSMatrix2x2::Rotation(rotation_radians[static_cast<size_t>(g_settings.display_rotation) - 1]) 2119 .store(uniforms.rotation_matrix); 2120 } 2121 else 2122 { 2123 GSMatrix2x2::Identity().store(uniforms.rotation_matrix); 2124 } 2125 2126 g_gpu_device->PushUniformBuffer(&uniforms, sizeof(uniforms)); 2127 2128 g_gpu_device->SetViewportAndScissor(real_draw_rect); 2129 g_gpu_device->Draw(3, 0); 2130 } 2131 2132 if (really_postfx) 2133 { 2134 DebugAssert(!g_settings.debugging.show_vram); 2135 2136 // "original size" in postfx includes padding. 2137 const float upscale_x = m_display_texture ? static_cast<float>(m_display_texture_view_width) / 2138 static_cast<float>(m_crtc_state.display_vram_width) : 2139 1.0f; 2140 const float upscale_y = m_display_texture ? static_cast<float>(m_display_texture_view_height) / 2141 static_cast<float>(m_crtc_state.display_vram_height) : 2142 1.0f; 2143 const s32 orig_width = static_cast<s32>(std::ceil(static_cast<float>(m_crtc_state.display_width) * upscale_x)); 2144 const s32 orig_height = static_cast<s32>(std::ceil(static_cast<float>(m_crtc_state.display_height) * upscale_y)); 2145 2146 return PostProcessing::DisplayChain.Apply(PostProcessing::DisplayChain.GetInputTexture(), nullptr, target, 2147 display_rect, orig_width, orig_height, m_crtc_state.display_width, 2148 m_crtc_state.display_height); 2149 } 2150 else 2151 return true; 2152 } 2153 2154 bool GPU::SendDisplayToMediaCapture(MediaCapture* cap) 2155 { 2156 GPUTexture* target = cap->GetRenderTexture(); 2157 if (!target) [[unlikely]] 2158 return false; 2159 2160 const bool apply_aspect_ratio = 2161 (g_settings.display_screenshot_mode != DisplayScreenshotMode::UncorrectedInternalResolution); 2162 const bool postfx = (g_settings.display_screenshot_mode != DisplayScreenshotMode::InternalResolution); 2163 GSVector4i display_rect, draw_rect; 2164 CalculateDrawRect(target->GetWidth(), target->GetHeight(), !g_settings.debugging.show_vram, apply_aspect_ratio, 2165 &display_rect, &draw_rect); 2166 2167 // Not cleared by RenderDisplay(). 2168 g_gpu_device->ClearRenderTarget(target, GPUDevice::DEFAULT_CLEAR_COLOR); 2169 2170 if (!RenderDisplay(target, display_rect, draw_rect, postfx)) [[unlikely]] 2171 return false; 2172 2173 return cap->DeliverVideoFrame(target); 2174 } 2175 2176 void GPU::DestroyDeinterlaceTextures() 2177 { 2178 for (std::unique_ptr<GPUTexture>& tex : m_deinterlace_buffers) 2179 g_gpu_device->RecycleTexture(std::move(tex)); 2180 g_gpu_device->RecycleTexture(std::move(m_deinterlace_texture)); 2181 m_current_deinterlace_buffer = 0; 2182 } 2183 2184 bool GPU::Deinterlace(u32 field, u32 line_skip) 2185 { 2186 GPUTexture* src = m_display_texture; 2187 const u32 x = m_display_texture_view_x; 2188 const u32 y = m_display_texture_view_y; 2189 const u32 width = m_display_texture_view_width; 2190 const u32 height = m_display_texture_view_height; 2191 2192 switch (g_settings.display_deinterlacing_mode) 2193 { 2194 case DisplayDeinterlacingMode::Disabled: 2195 { 2196 if (line_skip == 0) 2197 return true; 2198 2199 // Still have to extract the field. 2200 if (!DeinterlaceExtractField(0, src, x, y, width, height, line_skip)) [[unlikely]] 2201 return false; 2202 2203 SetDisplayTexture(m_deinterlace_buffers[0].get(), m_display_depth_buffer, 0, 0, width, height); 2204 return true; 2205 } 2206 2207 case DisplayDeinterlacingMode::Weave: 2208 { 2209 GL_SCOPE_FMT("DeinterlaceWeave({{{},{}}}, {}x{}, field={}, line_skip={})", x, y, width, height, field, line_skip); 2210 2211 const u32 full_height = height * 2; 2212 if (!DeinterlaceSetTargetSize(width, full_height, true)) [[unlikely]] 2213 { 2214 ClearDisplayTexture(); 2215 return false; 2216 } 2217 2218 src->MakeReadyForSampling(); 2219 2220 g_gpu_device->SetRenderTarget(m_deinterlace_texture.get()); 2221 g_gpu_device->SetPipeline(m_deinterlace_pipeline.get()); 2222 g_gpu_device->SetTextureSampler(0, src, g_gpu_device->GetNearestSampler()); 2223 const u32 uniforms[] = {x, y, field, line_skip}; 2224 g_gpu_device->PushUniformBuffer(uniforms, sizeof(uniforms)); 2225 g_gpu_device->SetViewportAndScissor(0, 0, width, full_height); 2226 g_gpu_device->Draw(3, 0); 2227 2228 m_deinterlace_texture->MakeReadyForSampling(); 2229 SetDisplayTexture(m_deinterlace_texture.get(), m_display_depth_buffer, 0, 0, width, full_height); 2230 return true; 2231 } 2232 2233 case DisplayDeinterlacingMode::Blend: 2234 { 2235 constexpr u32 NUM_BLEND_BUFFERS = 2; 2236 2237 GL_SCOPE_FMT("DeinterlaceBlend({{{},{}}}, {}x{}, field={}, line_skip={})", x, y, width, height, field, line_skip); 2238 2239 const u32 this_buffer = m_current_deinterlace_buffer; 2240 m_current_deinterlace_buffer = (m_current_deinterlace_buffer + 1u) % NUM_BLEND_BUFFERS; 2241 GL_INS_FMT("Current buffer: {}", this_buffer); 2242 if (!DeinterlaceExtractField(this_buffer, src, x, y, width, height, line_skip) || 2243 !DeinterlaceSetTargetSize(width, height, false)) [[unlikely]] 2244 { 2245 ClearDisplayTexture(); 2246 return false; 2247 } 2248 2249 // TODO: could be implemented with alpha blending instead.. 2250 2251 g_gpu_device->InvalidateRenderTarget(m_deinterlace_texture.get()); 2252 g_gpu_device->SetRenderTarget(m_deinterlace_texture.get()); 2253 g_gpu_device->SetPipeline(m_deinterlace_pipeline.get()); 2254 g_gpu_device->SetTextureSampler(0, m_deinterlace_buffers[this_buffer].get(), g_gpu_device->GetNearestSampler()); 2255 g_gpu_device->SetTextureSampler(1, m_deinterlace_buffers[(this_buffer - 1) % NUM_BLEND_BUFFERS].get(), 2256 g_gpu_device->GetNearestSampler()); 2257 g_gpu_device->SetViewportAndScissor(0, 0, width, height); 2258 g_gpu_device->Draw(3, 0); 2259 2260 m_deinterlace_texture->MakeReadyForSampling(); 2261 SetDisplayTexture(m_deinterlace_texture.get(), m_display_depth_buffer, 0, 0, width, height); 2262 return true; 2263 } 2264 2265 case DisplayDeinterlacingMode::Adaptive: 2266 { 2267 GL_SCOPE_FMT("DeinterlaceAdaptive({{{},{}}}, {}x{}, field={}, line_skip={})", x, y, width, height, field, 2268 line_skip); 2269 2270 const u32 full_height = height * 2; 2271 const u32 this_buffer = m_current_deinterlace_buffer; 2272 m_current_deinterlace_buffer = (m_current_deinterlace_buffer + 1u) % DEINTERLACE_BUFFER_COUNT; 2273 GL_INS_FMT("Current buffer: {}", this_buffer); 2274 if (!DeinterlaceExtractField(this_buffer, src, x, y, width, height, line_skip) || 2275 !DeinterlaceSetTargetSize(width, full_height, false)) [[unlikely]] 2276 { 2277 ClearDisplayTexture(); 2278 return false; 2279 } 2280 2281 g_gpu_device->SetRenderTarget(m_deinterlace_texture.get()); 2282 g_gpu_device->SetPipeline(m_deinterlace_pipeline.get()); 2283 g_gpu_device->SetTextureSampler(0, m_deinterlace_buffers[this_buffer].get(), g_gpu_device->GetNearestSampler()); 2284 g_gpu_device->SetTextureSampler(1, m_deinterlace_buffers[(this_buffer - 1) % DEINTERLACE_BUFFER_COUNT].get(), 2285 g_gpu_device->GetNearestSampler()); 2286 g_gpu_device->SetTextureSampler(2, m_deinterlace_buffers[(this_buffer - 2) % DEINTERLACE_BUFFER_COUNT].get(), 2287 g_gpu_device->GetNearestSampler()); 2288 g_gpu_device->SetTextureSampler(3, m_deinterlace_buffers[(this_buffer - 3) % DEINTERLACE_BUFFER_COUNT].get(), 2289 g_gpu_device->GetNearestSampler()); 2290 const u32 uniforms[] = {field, full_height}; 2291 g_gpu_device->PushUniformBuffer(uniforms, sizeof(uniforms)); 2292 g_gpu_device->SetViewportAndScissor(0, 0, width, full_height); 2293 g_gpu_device->Draw(3, 0); 2294 2295 m_deinterlace_texture->MakeReadyForSampling(); 2296 SetDisplayTexture(m_deinterlace_texture.get(), m_display_depth_buffer, 0, 0, width, full_height); 2297 return true; 2298 } 2299 2300 default: 2301 UnreachableCode(); 2302 } 2303 } 2304 2305 bool GPU::DeinterlaceExtractField(u32 dst_bufidx, GPUTexture* src, u32 x, u32 y, u32 width, u32 height, u32 line_skip) 2306 { 2307 if (!m_deinterlace_buffers[dst_bufidx] || m_deinterlace_buffers[dst_bufidx]->GetWidth() != width || 2308 m_deinterlace_buffers[dst_bufidx]->GetHeight() != height) 2309 { 2310 if (!g_gpu_device->ResizeTexture(&m_deinterlace_buffers[dst_bufidx], width, height, GPUTexture::Type::RenderTarget, 2311 GPUTexture::Format::RGBA8, false)) [[unlikely]] 2312 { 2313 return false; 2314 } 2315 2316 GL_OBJECT_NAME_FMT(m_deinterlace_buffers[dst_bufidx], "Blend Deinterlace Buffer {}", dst_bufidx); 2317 } 2318 2319 GPUTexture* dst = m_deinterlace_buffers[dst_bufidx].get(); 2320 g_gpu_device->InvalidateRenderTarget(dst); 2321 2322 // If we're not skipping lines, then we can simply copy the texture. 2323 if (line_skip == 0 && src->GetFormat() == dst->GetFormat()) 2324 { 2325 GL_INS_FMT("DeinterlaceExtractField({{{},{}}} {}x{} line_skip={}) => copy direct", x, y, width, height, line_skip); 2326 g_gpu_device->CopyTextureRegion(dst, 0, 0, 0, 0, src, x, y, 0, 0, width, height); 2327 } 2328 else 2329 { 2330 GL_SCOPE_FMT("DeinterlaceExtractField({{{},{}}} {}x{} line_skip={}) => shader copy", x, y, width, height, 2331 line_skip); 2332 2333 // Otherwise, we need to extract every other line from the texture. 2334 src->MakeReadyForSampling(); 2335 g_gpu_device->SetRenderTarget(dst); 2336 g_gpu_device->SetPipeline(m_deinterlace_extract_pipeline.get()); 2337 g_gpu_device->SetTextureSampler(0, src, g_gpu_device->GetNearestSampler()); 2338 const u32 uniforms[] = {x, y, line_skip}; 2339 g_gpu_device->PushUniformBuffer(uniforms, sizeof(uniforms)); 2340 g_gpu_device->SetViewportAndScissor(0, 0, width, height); 2341 g_gpu_device->Draw(3, 0); 2342 2343 GL_POP(); 2344 } 2345 2346 dst->MakeReadyForSampling(); 2347 return true; 2348 } 2349 2350 bool GPU::DeinterlaceSetTargetSize(u32 width, u32 height, bool preserve) 2351 { 2352 if (!m_deinterlace_texture || m_deinterlace_texture->GetWidth() != width || 2353 m_deinterlace_texture->GetHeight() != height) 2354 { 2355 if (!g_gpu_device->ResizeTexture(&m_deinterlace_texture, width, height, GPUTexture::Type::RenderTarget, 2356 GPUTexture::Format::RGBA8, preserve)) [[unlikely]] 2357 { 2358 return false; 2359 } 2360 2361 GL_OBJECT_NAME(m_deinterlace_texture, "Deinterlace target texture"); 2362 } 2363 2364 return true; 2365 } 2366 2367 bool GPU::ApplyChromaSmoothing() 2368 { 2369 const u32 x = m_display_texture_view_x; 2370 const u32 y = m_display_texture_view_y; 2371 const u32 width = m_display_texture_view_width; 2372 const u32 height = m_display_texture_view_height; 2373 if (!m_chroma_smoothing_texture || m_chroma_smoothing_texture->GetWidth() != width || 2374 m_chroma_smoothing_texture->GetHeight() != height) 2375 { 2376 if (!g_gpu_device->ResizeTexture(&m_chroma_smoothing_texture, width, height, GPUTexture::Type::RenderTarget, 2377 GPUTexture::Format::RGBA8, false)) 2378 { 2379 ClearDisplayTexture(); 2380 return false; 2381 } 2382 2383 GL_OBJECT_NAME(m_chroma_smoothing_texture, "Chroma smoothing texture"); 2384 } 2385 2386 GL_SCOPE_FMT("ApplyChromaSmoothing({{{},{}}}, {}x{})", x, y, width, height); 2387 2388 m_display_texture->MakeReadyForSampling(); 2389 g_gpu_device->InvalidateRenderTarget(m_chroma_smoothing_texture.get()); 2390 g_gpu_device->SetRenderTarget(m_chroma_smoothing_texture.get()); 2391 g_gpu_device->SetPipeline(m_chroma_smoothing_pipeline.get()); 2392 g_gpu_device->SetTextureSampler(0, m_display_texture, g_gpu_device->GetNearestSampler()); 2393 const u32 uniforms[] = {x, y, width - 1, height - 1}; 2394 g_gpu_device->PushUniformBuffer(uniforms, sizeof(uniforms)); 2395 g_gpu_device->SetViewportAndScissor(0, 0, width, height); 2396 g_gpu_device->Draw(3, 0); 2397 2398 m_chroma_smoothing_texture->MakeReadyForSampling(); 2399 SetDisplayTexture(m_chroma_smoothing_texture.get(), m_display_depth_buffer, 0, 0, width, height); 2400 return true; 2401 } 2402 2403 void GPU::CalculateDrawRect(s32 window_width, s32 window_height, bool apply_rotation, bool apply_aspect_ratio, 2404 GSVector4i* display_rect, GSVector4i* draw_rect) const 2405 { 2406 const bool integer_scale = (g_settings.display_scaling == DisplayScalingMode::NearestInteger || 2407 g_settings.display_scaling == DisplayScalingMode::BilinearInteger); 2408 const bool show_vram = g_settings.debugging.show_vram; 2409 const float display_aspect_ratio = ComputeDisplayAspectRatio(); 2410 const float window_ratio = static_cast<float>(window_width) / static_cast<float>(window_height); 2411 const float crtc_display_width = static_cast<float>(show_vram ? VRAM_WIDTH : m_crtc_state.display_width); 2412 const float crtc_display_height = static_cast<float>(show_vram ? VRAM_HEIGHT : m_crtc_state.display_height); 2413 const float x_scale = 2414 apply_aspect_ratio ? 2415 (display_aspect_ratio / (static_cast<float>(crtc_display_width) / static_cast<float>(crtc_display_height))) : 2416 1.0f; 2417 float display_width = crtc_display_width; 2418 float display_height = crtc_display_height; 2419 float active_left = static_cast<float>(show_vram ? 0 : m_crtc_state.display_origin_left); 2420 float active_top = static_cast<float>(show_vram ? 0 : m_crtc_state.display_origin_top); 2421 float active_width = static_cast<float>(show_vram ? VRAM_WIDTH : m_crtc_state.display_vram_width); 2422 float active_height = static_cast<float>(show_vram ? VRAM_HEIGHT : m_crtc_state.display_vram_height); 2423 if (!g_settings.display_stretch_vertically) 2424 { 2425 display_width *= x_scale; 2426 active_left *= x_scale; 2427 active_width *= x_scale; 2428 } 2429 else 2430 { 2431 display_height /= x_scale; 2432 active_top /= x_scale; 2433 active_height /= x_scale; 2434 } 2435 2436 // swap width/height when rotated, the flipping of padding is taken care of in the shader with the rotation matrix 2437 if (g_settings.display_rotation == DisplayRotation::Rotate90 || 2438 g_settings.display_rotation == DisplayRotation::Rotate270) 2439 { 2440 std::swap(display_width, display_height); 2441 std::swap(active_width, active_height); 2442 std::swap(active_top, active_left); 2443 } 2444 2445 // now fit it within the window 2446 float scale; 2447 float left_padding, top_padding; 2448 if ((display_width / display_height) >= window_ratio) 2449 { 2450 // align in middle vertically 2451 scale = static_cast<float>(window_width) / display_width; 2452 if (integer_scale) 2453 { 2454 scale = std::max(std::floor(scale), 1.0f); 2455 left_padding = std::max<float>((static_cast<float>(window_width) - display_width * scale) / 2.0f, 0.0f); 2456 } 2457 else 2458 { 2459 left_padding = 0.0f; 2460 } 2461 2462 switch (g_settings.display_alignment) 2463 { 2464 case DisplayAlignment::RightOrBottom: 2465 top_padding = std::max<float>(static_cast<float>(window_height) - (display_height * scale), 0.0f); 2466 break; 2467 2468 case DisplayAlignment::Center: 2469 top_padding = std::max<float>((static_cast<float>(window_height) - (display_height * scale)) / 2.0f, 0.0f); 2470 break; 2471 2472 case DisplayAlignment::LeftOrTop: 2473 default: 2474 top_padding = 0.0f; 2475 break; 2476 } 2477 } 2478 else 2479 { 2480 // align in middle horizontally 2481 scale = static_cast<float>(window_height) / display_height; 2482 if (integer_scale) 2483 { 2484 scale = std::max(std::floor(scale), 1.0f); 2485 top_padding = std::max<float>((static_cast<float>(window_height) - (display_height * scale)) / 2.0f, 0.0f); 2486 } 2487 else 2488 { 2489 top_padding = 0.0f; 2490 } 2491 2492 switch (g_settings.display_alignment) 2493 { 2494 case DisplayAlignment::RightOrBottom: 2495 left_padding = std::max<float>(static_cast<float>(window_width) - (display_width * scale), 0.0f); 2496 break; 2497 2498 case DisplayAlignment::Center: 2499 left_padding = std::max<float>((static_cast<float>(window_width) - (display_width * scale)) / 2.0f, 0.0f); 2500 break; 2501 2502 case DisplayAlignment::LeftOrTop: 2503 default: 2504 left_padding = 0.0f; 2505 break; 2506 } 2507 } 2508 2509 // TODO: This should be a float rectangle. But because GL is lame, it only has integer viewports... 2510 const s32 left = static_cast<s32>(active_left * scale + left_padding); 2511 const s32 top = static_cast<s32>(active_top * scale + top_padding); 2512 const s32 right = left + static_cast<s32>(active_width * scale); 2513 const s32 bottom = top + static_cast<s32>(active_height * scale); 2514 *draw_rect = GSVector4i(left, top, right, bottom); 2515 *display_rect = GSVector4i( 2516 GSVector4(left_padding, top_padding, left_padding + display_width * scale, top_padding + display_height * scale)); 2517 } 2518 2519 bool CompressAndWriteTextureToFile(u32 width, u32 height, std::string filename, FileSystem::ManagedCFilePtr fp, 2520 u8 quality, bool clear_alpha, bool flip_y, std::vector<u32> texture_data, 2521 u32 texture_data_stride, GPUTexture::Format texture_format, bool display_osd_message, 2522 bool use_thread) 2523 { 2524 std::string osd_key; 2525 if (display_osd_message) 2526 { 2527 // Use a 60 second timeout to give it plenty of time to actually save. 2528 osd_key = fmt::format("ScreenshotSaver_{}", filename); 2529 Host::AddIconOSDMessage(osd_key, ICON_EMOJI_CAMERA_WITH_FLASH, 2530 fmt::format(TRANSLATE_FS("GPU", "Saving screenshot to '{}'."), Path::GetFileName(filename)), 2531 60.0f); 2532 } 2533 2534 static constexpr auto proc = [](u32 width, u32 height, std::string filename, FileSystem::ManagedCFilePtr fp, 2535 u8 quality, bool clear_alpha, bool flip_y, std::vector<u32> texture_data, 2536 u32 texture_data_stride, GPUTexture::Format texture_format, std::string osd_key, 2537 bool use_thread) { 2538 bool result; 2539 2540 const char* extension = std::strrchr(filename.c_str(), '.'); 2541 if (extension) 2542 { 2543 if (GPUTexture::ConvertTextureDataToRGBA8(width, height, texture_data, texture_data_stride, texture_format)) 2544 { 2545 if (clear_alpha) 2546 { 2547 for (u32& pixel : texture_data) 2548 pixel |= 0xFF000000u; 2549 } 2550 2551 if (flip_y) 2552 GPUTexture::FlipTextureDataRGBA8(width, height, reinterpret_cast<u8*>(texture_data.data()), 2553 texture_data_stride); 2554 2555 Assert(texture_data_stride == sizeof(u32) * width); 2556 RGBA8Image image(width, height, std::move(texture_data)); 2557 if (image.SaveToFile(filename.c_str(), fp.get(), quality)) 2558 { 2559 result = true; 2560 } 2561 else 2562 { 2563 ERROR_LOG("Unknown extension in filename '{}' or save error: '{}'", filename, extension); 2564 result = false; 2565 } 2566 } 2567 else 2568 { 2569 result = false; 2570 } 2571 } 2572 else 2573 { 2574 ERROR_LOG("Unable to determine file extension for '{}'", filename); 2575 result = false; 2576 } 2577 2578 if (!osd_key.empty()) 2579 { 2580 Host::AddIconOSDMessage(std::move(osd_key), ICON_EMOJI_CAMERA, 2581 fmt::format(result ? TRANSLATE_FS("GPU", "Saved screenshot to '{}'.") : 2582 TRANSLATE_FS("GPU", "Failed to save screenshot to '{}'."), 2583 Path::GetFileName(filename), 2584 result ? Host::OSD_INFO_DURATION : Host::OSD_ERROR_DURATION)); 2585 } 2586 2587 if (use_thread) 2588 { 2589 // remove ourselves from the list, if the GS thread is waiting for us, we won't be in there 2590 const auto this_id = std::this_thread::get_id(); 2591 std::unique_lock lock(s_screenshot_threads_mutex); 2592 for (auto it = s_screenshot_threads.begin(); it != s_screenshot_threads.end(); ++it) 2593 { 2594 if (it->get_id() == this_id) 2595 { 2596 it->detach(); 2597 s_screenshot_threads.erase(it); 2598 break; 2599 } 2600 } 2601 } 2602 2603 return result; 2604 }; 2605 2606 if (!use_thread) 2607 { 2608 return proc(width, height, std::move(filename), std::move(fp), quality, clear_alpha, flip_y, 2609 std::move(texture_data), texture_data_stride, texture_format, std::move(osd_key), use_thread); 2610 } 2611 2612 std::unique_lock lock(s_screenshot_threads_mutex); 2613 std::thread thread(proc, width, height, std::move(filename), std::move(fp), quality, clear_alpha, flip_y, 2614 std::move(texture_data), texture_data_stride, texture_format, std::move(osd_key), use_thread); 2615 s_screenshot_threads.push_back(std::move(thread)); 2616 return true; 2617 } 2618 2619 void JoinScreenshotThreads() 2620 { 2621 std::unique_lock lock(s_screenshot_threads_mutex); 2622 while (!s_screenshot_threads.empty()) 2623 { 2624 std::thread save_thread(std::move(s_screenshot_threads.front())); 2625 s_screenshot_threads.pop_front(); 2626 lock.unlock(); 2627 save_thread.join(); 2628 lock.lock(); 2629 } 2630 } 2631 2632 bool GPU::WriteDisplayTextureToFile(std::string filename, bool compress_on_thread /* = false */) 2633 { 2634 if (!m_display_texture) 2635 return false; 2636 2637 const u32 read_x = static_cast<u32>(m_display_texture_view_x); 2638 const u32 read_y = static_cast<u32>(m_display_texture_view_y); 2639 const u32 read_width = static_cast<u32>(m_display_texture_view_width); 2640 const u32 read_height = static_cast<u32>(m_display_texture_view_height); 2641 2642 const u32 texture_data_stride = 2643 Common::AlignUpPow2(GPUTexture::GetPixelSize(m_display_texture->GetFormat()) * read_width, 4); 2644 std::vector<u32> texture_data((texture_data_stride * read_height) / sizeof(u32)); 2645 2646 std::unique_ptr<GPUDownloadTexture> dltex; 2647 if (g_gpu_device->GetFeatures().memory_import) 2648 { 2649 dltex = 2650 g_gpu_device->CreateDownloadTexture(read_width, read_height, m_display_texture->GetFormat(), texture_data.data(), 2651 texture_data.size() * sizeof(u32), texture_data_stride); 2652 } 2653 if (!dltex) 2654 { 2655 if (!(dltex = g_gpu_device->CreateDownloadTexture(read_width, read_height, m_display_texture->GetFormat()))) 2656 { 2657 ERROR_LOG("Failed to create {}x{} {} download texture", read_width, read_height, 2658 GPUTexture::GetFormatName(m_display_texture->GetFormat())); 2659 return false; 2660 } 2661 } 2662 2663 dltex->CopyFromTexture(0, 0, m_display_texture, read_x, read_y, read_width, read_height, 0, 0, !dltex->IsImported()); 2664 if (!dltex->ReadTexels(0, 0, read_width, read_height, texture_data.data(), texture_data_stride)) 2665 { 2666 RestoreDeviceContext(); 2667 return false; 2668 } 2669 2670 RestoreDeviceContext(); 2671 2672 Error error; 2673 auto fp = FileSystem::OpenManagedCFile(filename.c_str(), "wb", &error); 2674 if (!fp) 2675 { 2676 ERROR_LOG("Can't open file '{}': {}", Path::GetFileName(filename), error.GetDescription()); 2677 return false; 2678 } 2679 2680 constexpr bool clear_alpha = true; 2681 const bool flip_y = g_gpu_device->UsesLowerLeftOrigin(); 2682 2683 return CompressAndWriteTextureToFile( 2684 read_width, read_height, std::move(filename), std::move(fp), g_settings.display_screenshot_quality, clear_alpha, 2685 flip_y, std::move(texture_data), texture_data_stride, m_display_texture->GetFormat(), false, compress_on_thread); 2686 } 2687 2688 bool GPU::RenderScreenshotToBuffer(u32 width, u32 height, const GSVector4i display_rect, const GSVector4i draw_rect, 2689 bool postfx, std::vector<u32>* out_pixels, u32* out_stride, 2690 GPUTexture::Format* out_format) 2691 { 2692 const GPUTexture::Format hdformat = 2693 g_gpu_device->HasSurface() ? g_gpu_device->GetWindowFormat() : GPUTexture::Format::RGBA8; 2694 2695 auto render_texture = 2696 g_gpu_device->FetchAutoRecycleTexture(width, height, 1, 1, 1, GPUTexture::Type::RenderTarget, hdformat); 2697 if (!render_texture) 2698 return false; 2699 2700 g_gpu_device->ClearRenderTarget(render_texture.get(), GPUDevice::DEFAULT_CLEAR_COLOR); 2701 2702 // TODO: this should use copy shader instead. 2703 RenderDisplay(render_texture.get(), display_rect, draw_rect, postfx); 2704 2705 const u32 stride = Common::AlignUpPow2(GPUTexture::GetPixelSize(hdformat) * width, sizeof(u32)); 2706 out_pixels->resize((height * stride) / sizeof(u32)); 2707 2708 std::unique_ptr<GPUDownloadTexture> dltex; 2709 if (g_gpu_device->GetFeatures().memory_import) 2710 { 2711 dltex = g_gpu_device->CreateDownloadTexture(width, height, hdformat, out_pixels->data(), 2712 out_pixels->size() * sizeof(u32), stride); 2713 } 2714 if (!dltex) 2715 { 2716 if (!(dltex = g_gpu_device->CreateDownloadTexture(width, height, hdformat))) 2717 { 2718 ERROR_LOG("Failed to create {}x{} download texture", width, height); 2719 return false; 2720 } 2721 } 2722 2723 dltex->CopyFromTexture(0, 0, render_texture.get(), 0, 0, width, height, 0, 0, false); 2724 if (!dltex->ReadTexels(0, 0, width, height, out_pixels->data(), stride)) 2725 { 2726 RestoreDeviceContext(); 2727 return false; 2728 } 2729 2730 *out_stride = stride; 2731 *out_format = hdformat; 2732 RestoreDeviceContext(); 2733 return true; 2734 } 2735 2736 void GPU::CalculateScreenshotSize(DisplayScreenshotMode mode, u32* width, u32* height, GSVector4i* display_rect, 2737 GSVector4i* draw_rect) const 2738 { 2739 *width = g_gpu_device->GetWindowWidth(); 2740 *height = g_gpu_device->GetWindowHeight(); 2741 CalculateDrawRect(*width, *height, true, !g_settings.debugging.show_vram, display_rect, draw_rect); 2742 2743 const bool internal_resolution = (mode != DisplayScreenshotMode::ScreenResolution || g_settings.debugging.show_vram); 2744 if (internal_resolution && m_display_texture_view_width != 0 && m_display_texture_view_height != 0) 2745 { 2746 if (mode == DisplayScreenshotMode::InternalResolution) 2747 { 2748 const u32 draw_width = static_cast<u32>(draw_rect->width()); 2749 const u32 draw_height = static_cast<u32>(draw_rect->height()); 2750 2751 // If internal res, scale the computed draw rectangle to the internal res. 2752 // We re-use the draw rect because it's already been AR corrected. 2753 const float sar = 2754 static_cast<float>(m_display_texture_view_width) / static_cast<float>(m_display_texture_view_height); 2755 const float dar = static_cast<float>(draw_width) / static_cast<float>(draw_height); 2756 if (sar >= dar) 2757 { 2758 // stretch height, preserve width 2759 const float scale = static_cast<float>(m_display_texture_view_width) / static_cast<float>(draw_width); 2760 *width = m_display_texture_view_width; 2761 *height = static_cast<u32>(std::round(static_cast<float>(draw_height) * scale)); 2762 } 2763 else 2764 { 2765 // stretch width, preserve height 2766 const float scale = static_cast<float>(m_display_texture_view_height) / static_cast<float>(draw_height); 2767 *width = static_cast<u32>(std::round(static_cast<float>(draw_width) * scale)); 2768 *height = m_display_texture_view_height; 2769 } 2770 2771 // DX11 won't go past 16K texture size. 2772 const u32 max_texture_size = g_gpu_device->GetMaxTextureSize(); 2773 if (*width > max_texture_size) 2774 { 2775 *height = static_cast<u32>(static_cast<float>(*height) / 2776 (static_cast<float>(*width) / static_cast<float>(max_texture_size))); 2777 *width = max_texture_size; 2778 } 2779 if (*height > max_texture_size) 2780 { 2781 *height = max_texture_size; 2782 *width = static_cast<u32>(static_cast<float>(*width) / 2783 (static_cast<float>(*height) / static_cast<float>(max_texture_size))); 2784 } 2785 } 2786 else // if (mode == DisplayScreenshotMode::UncorrectedInternalResolution) 2787 { 2788 *width = m_display_texture_view_width; 2789 *height = m_display_texture_view_height; 2790 } 2791 2792 // Remove padding, it's not part of the framebuffer. 2793 *draw_rect = GSVector4i(0, 0, static_cast<s32>(*width), static_cast<s32>(*height)); 2794 *display_rect = *draw_rect; 2795 } 2796 } 2797 2798 bool GPU::RenderScreenshotToFile(std::string filename, DisplayScreenshotMode mode, u8 quality, bool compress_on_thread, 2799 bool show_osd_message) 2800 { 2801 u32 width, height; 2802 GSVector4i display_rect, draw_rect; 2803 CalculateScreenshotSize(mode, &width, &height, &display_rect, &draw_rect); 2804 2805 const bool internal_resolution = (mode != DisplayScreenshotMode::ScreenResolution); 2806 if (width == 0 || height == 0) 2807 return false; 2808 2809 std::vector<u32> pixels; 2810 u32 pixels_stride; 2811 GPUTexture::Format pixels_format; 2812 if (!RenderScreenshotToBuffer(width, height, display_rect, draw_rect, !internal_resolution, &pixels, &pixels_stride, 2813 &pixels_format)) 2814 { 2815 ERROR_LOG("Failed to render {}x{} screenshot", width, height); 2816 return false; 2817 } 2818 2819 Error error; 2820 auto fp = FileSystem::OpenManagedCFile(filename.c_str(), "wb", &error); 2821 if (!fp) 2822 { 2823 ERROR_LOG("Can't open file '{}': {}", Path::GetFileName(filename), error.GetDescription()); 2824 return false; 2825 } 2826 2827 return CompressAndWriteTextureToFile(width, height, std::move(filename), std::move(fp), quality, true, 2828 g_gpu_device->UsesLowerLeftOrigin(), std::move(pixels), pixels_stride, 2829 pixels_format, show_osd_message, compress_on_thread); 2830 } 2831 2832 bool GPU::DumpVRAMToFile(const char* filename) 2833 { 2834 ReadVRAM(0, 0, VRAM_WIDTH, VRAM_HEIGHT); 2835 2836 const char* extension = std::strrchr(filename, '.'); 2837 if (extension && StringUtil::Strcasecmp(extension, ".png") == 0) 2838 { 2839 return DumpVRAMToFile(filename, VRAM_WIDTH, VRAM_HEIGHT, sizeof(u16) * VRAM_WIDTH, g_vram, true); 2840 } 2841 else if (extension && StringUtil::Strcasecmp(extension, ".bin") == 0) 2842 { 2843 return FileSystem::WriteBinaryFile(filename, g_vram, VRAM_WIDTH * VRAM_HEIGHT * sizeof(u16)); 2844 } 2845 else 2846 { 2847 ERROR_LOG("Unknown extension: '{}'", filename); 2848 return false; 2849 } 2850 } 2851 2852 bool GPU::DumpVRAMToFile(const char* filename, u32 width, u32 height, u32 stride, const void* buffer, bool remove_alpha) 2853 { 2854 RGBA8Image image(width, height); 2855 2856 const char* ptr_in = static_cast<const char*>(buffer); 2857 for (u32 row = 0; row < height; row++) 2858 { 2859 const char* row_ptr_in = ptr_in; 2860 u32* ptr_out = image.GetRowPixels(row); 2861 2862 for (u32 col = 0; col < width; col++) 2863 { 2864 u16 src_col; 2865 std::memcpy(&src_col, row_ptr_in, sizeof(u16)); 2866 row_ptr_in += sizeof(u16); 2867 *(ptr_out++) = VRAMRGBA5551ToRGBA8888(remove_alpha ? (src_col | u16(0x8000)) : src_col); 2868 } 2869 2870 ptr_in += stride; 2871 } 2872 2873 return image.SaveToFile(filename); 2874 } 2875 2876 void GPU::DrawDebugStateWindow() 2877 { 2878 const float framebuffer_scale = ImGuiManager::GetGlobalScale(); 2879 2880 ImGui::SetNextWindowSize(ImVec2(450.0f * framebuffer_scale, 550.0f * framebuffer_scale), ImGuiCond_FirstUseEver); 2881 if (!ImGui::Begin("GPU", nullptr)) 2882 { 2883 ImGui::End(); 2884 return; 2885 } 2886 2887 DrawRendererStats(); 2888 2889 if (ImGui::CollapsingHeader("GPU", ImGuiTreeNodeFlags_DefaultOpen)) 2890 { 2891 static constexpr std::array<const char*, 5> state_strings = { 2892 {"Idle", "Reading VRAM", "Writing VRAM", "Drawing Polyline"}}; 2893 2894 ImGui::Text("State: %s", state_strings[static_cast<u8>(m_blitter_state)]); 2895 ImGui::Text("Dither: %s", m_GPUSTAT.dither_enable ? "Enabled" : "Disabled"); 2896 ImGui::Text("Draw To Displayed Field: %s", m_GPUSTAT.draw_to_displayed_field ? "Enabled" : "Disabled"); 2897 ImGui::Text("Draw Set Mask Bit: %s", m_GPUSTAT.set_mask_while_drawing ? "Yes" : "No"); 2898 ImGui::Text("Draw To Masked Pixels: %s", m_GPUSTAT.check_mask_before_draw ? "Yes" : "No"); 2899 ImGui::Text("Reverse Flag: %s", m_GPUSTAT.reverse_flag ? "Yes" : "No"); 2900 ImGui::Text("Texture Disable: %s", m_GPUSTAT.texture_disable ? "Yes" : "No"); 2901 ImGui::Text("PAL Mode: %s", m_GPUSTAT.pal_mode ? "Yes" : "No"); 2902 ImGui::Text("Interrupt Request: %s", m_GPUSTAT.interrupt_request ? "Yes" : "No"); 2903 ImGui::Text("DMA Request: %s", m_GPUSTAT.dma_data_request ? "Yes" : "No"); 2904 } 2905 2906 if (ImGui::CollapsingHeader("CRTC", ImGuiTreeNodeFlags_DefaultOpen)) 2907 { 2908 const auto& cs = m_crtc_state; 2909 ImGui::Text("Clock: %s", (m_console_is_pal ? (m_GPUSTAT.pal_mode ? "PAL-on-PAL" : "NTSC-on-PAL") : 2910 (m_GPUSTAT.pal_mode ? "PAL-on-NTSC" : "NTSC-on-NTSC"))); 2911 ImGui::Text("Horizontal Frequency: %.3f KHz", ComputeHorizontalFrequency() / 1000.0f); 2912 ImGui::Text("Vertical Frequency: %.3f Hz", ComputeVerticalFrequency()); 2913 ImGui::Text("Dot Clock Divider: %u", cs.dot_clock_divider); 2914 ImGui::Text("Vertical Interlace: %s (%s field)", m_GPUSTAT.vertical_interlace ? "Yes" : "No", 2915 cs.interlaced_field ? "odd" : "even"); 2916 ImGui::Text("Current Scanline: %u (tick %u)", cs.current_scanline, cs.current_tick_in_scanline); 2917 ImGui::Text("Display Disable: %s", m_GPUSTAT.display_disable ? "Yes" : "No"); 2918 ImGui::Text("Displaying Odd Lines: %s", cs.active_line_lsb ? "Yes" : "No"); 2919 ImGui::Text("Color Depth: %u-bit", m_GPUSTAT.display_area_color_depth_24 ? 24 : 15); 2920 ImGui::Text("Start Offset in VRAM: (%u, %u)", cs.regs.X.GetValue(), cs.regs.Y.GetValue()); 2921 ImGui::Text("Display Total: %u (%u) horizontal, %u vertical", cs.horizontal_total, 2922 cs.horizontal_total / cs.dot_clock_divider, cs.vertical_total); 2923 ImGui::Text("Configured Display Range: %u-%u (%u-%u), %u-%u", cs.regs.X1.GetValue(), cs.regs.X2.GetValue(), 2924 cs.regs.X1.GetValue() / cs.dot_clock_divider, cs.regs.X2.GetValue() / cs.dot_clock_divider, 2925 cs.regs.Y1.GetValue(), cs.regs.Y2.GetValue()); 2926 ImGui::Text("Output Display Range: %u-%u (%u-%u), %u-%u", cs.horizontal_display_start, cs.horizontal_display_end, 2927 cs.horizontal_display_start / cs.dot_clock_divider, cs.horizontal_display_end / cs.dot_clock_divider, 2928 cs.vertical_display_start, cs.vertical_display_end); 2929 ImGui::Text("Cropping: %s", Settings::GetDisplayCropModeName(g_settings.display_crop_mode)); 2930 ImGui::Text("Visible Display Range: %u-%u (%u-%u), %u-%u", cs.horizontal_visible_start, cs.horizontal_visible_end, 2931 cs.horizontal_visible_start / cs.dot_clock_divider, cs.horizontal_visible_end / cs.dot_clock_divider, 2932 cs.vertical_visible_start, cs.vertical_visible_end); 2933 ImGui::Text("Display Resolution: %ux%u", cs.display_width, cs.display_height); 2934 ImGui::Text("Display Origin: %u, %u", cs.display_origin_left, cs.display_origin_top); 2935 ImGui::Text("Displayed/Visible VRAM Portion: %ux%u @ (%u, %u)", cs.display_vram_width, cs.display_vram_height, 2936 cs.display_vram_left, cs.display_vram_top); 2937 ImGui::Text("Padding: Left=%d, Top=%d, Right=%d, Bottom=%d", cs.display_origin_left, cs.display_origin_top, 2938 cs.display_width - cs.display_vram_width - cs.display_origin_left, 2939 cs.display_height - cs.display_vram_height - cs.display_origin_top); 2940 } 2941 2942 ImGui::End(); 2943 } 2944 2945 void GPU::DrawRendererStats() 2946 { 2947 } 2948 2949 void GPU::OnBufferSwapped() 2950 { 2951 } 2952 2953 void GPU::GetStatsString(SmallStringBase& str) 2954 { 2955 if (IsHardwareRenderer()) 2956 { 2957 str.format("{} HW | {} P | {} DC | {} B | {} RP | {} RB | {} C | {} W", 2958 GPUDevice::RenderAPIToString(g_gpu_device->GetRenderAPI()), m_stats.num_primitives, 2959 m_stats.host_num_draws, m_stats.host_num_barriers, m_stats.host_num_render_passes, 2960 m_stats.host_num_downloads, m_stats.num_copies, m_stats.num_writes); 2961 } 2962 else 2963 { 2964 str.format("{} SW | {} P | {} R | {} C | {} W", GPUDevice::RenderAPIToString(g_gpu_device->GetRenderAPI()), 2965 m_stats.num_primitives, m_stats.num_reads, m_stats.num_copies, m_stats.num_writes); 2966 } 2967 } 2968 2969 void GPU::GetMemoryStatsString(SmallStringBase& str) 2970 { 2971 const u32 vram_usage_mb = static_cast<u32>((g_gpu_device->GetVRAMUsage() + (1048576 - 1)) / 1048576); 2972 const u32 stream_kb = static_cast<u32>((m_stats.host_buffer_streamed + (1024 - 1)) / 1024); 2973 2974 str.format("{} MB VRAM | {} KB STR | {} TC | {} TU", vram_usage_mb, stream_kb, m_stats.host_num_copies, 2975 m_stats.host_num_uploads); 2976 } 2977 2978 void GPU::ResetStatistics() 2979 { 2980 m_counters = {}; 2981 g_gpu_device->ResetStatistics(); 2982 } 2983 2984 void GPU::UpdateStatistics(u32 frame_count) 2985 { 2986 const GPUDevice::Statistics& stats = g_gpu_device->GetStatistics(); 2987 const u32 round = (frame_count - 1); 2988 2989 #define UPDATE_COUNTER(x) m_stats.x = (m_counters.x + round) / frame_count 2990 #define UPDATE_GPU_STAT(x) m_stats.host_##x = (stats.x + round) / frame_count 2991 2992 UPDATE_COUNTER(num_reads); 2993 UPDATE_COUNTER(num_writes); 2994 UPDATE_COUNTER(num_copies); 2995 UPDATE_COUNTER(num_vertices); 2996 UPDATE_COUNTER(num_primitives); 2997 2998 // UPDATE_COUNTER(num_read_texture_updates); 2999 // UPDATE_COUNTER(num_ubo_updates); 3000 3001 UPDATE_GPU_STAT(buffer_streamed); 3002 UPDATE_GPU_STAT(num_draws); 3003 UPDATE_GPU_STAT(num_barriers); 3004 UPDATE_GPU_STAT(num_render_passes); 3005 UPDATE_GPU_STAT(num_copies); 3006 UPDATE_GPU_STAT(num_downloads); 3007 UPDATE_GPU_STAT(num_uploads); 3008 3009 #undef UPDATE_GPU_STAT 3010 #undef UPDATE_COUNTER 3011 3012 ResetStatistics(); 3013 }