duckstation

duckstation, but archived from the revision just before upstream changed it to a proprietary software project, this version is the libre one
git clone https://git.neptards.moe/u3shit/duckstation.git
Log | Files | Refs | README | LICENSE

gpu.cpp (111111B)


      1 // SPDX-FileCopyrightText: 2019-2024 Connor McLaughlin <stenzek@gmail.com>
      2 // SPDX-License-Identifier: (GPL-3.0 OR CC-BY-NC-ND-4.0)
      3 
      4 #include "gpu.h"
      5 #include "dma.h"
      6 #include "gpu_shadergen.h"
      7 #include "host.h"
      8 #include "interrupt_controller.h"
      9 #include "settings.h"
     10 #include "system.h"
     11 #include "timers.h"
     12 
     13 #include "util/gpu_device.h"
     14 #include "util/image.h"
     15 #include "util/imgui_manager.h"
     16 #include "util/media_capture.h"
     17 #include "util/postprocessing.h"
     18 #include "util/shadergen.h"
     19 #include "util/state_wrapper.h"
     20 
     21 #include "common/align.h"
     22 #include "common/error.h"
     23 #include "common/file_system.h"
     24 #include "common/gsvector_formatter.h"
     25 #include "common/log.h"
     26 #include "common/path.h"
     27 #include "common/small_string.h"
     28 #include "common/string_util.h"
     29 
     30 #include "IconsEmoji.h"
     31 #include "fmt/format.h"
     32 #include "imgui.h"
     33 
     34 #include <cmath>
     35 #include <numbers>
     36 #include <thread>
     37 
     38 Log_SetChannel(GPU);
     39 
     40 std::unique_ptr<GPU> g_gpu;
     41 alignas(HOST_PAGE_SIZE) u16 g_vram[VRAM_SIZE / sizeof(u16)];
     42 u16 g_gpu_clut[GPU_CLUT_SIZE];
     43 
     44 const GPU::GP0CommandHandlerTable GPU::s_GP0_command_handler_table = GPU::GenerateGP0CommandHandlerTable();
     45 
     46 static TimingEvent s_crtc_tick_event(
     47   "GPU CRTC Tick", 1, 1, [](void* param, TickCount ticks, TickCount ticks_late) { g_gpu->CRTCTickEvent(ticks); },
     48   nullptr);
     49 static TimingEvent s_command_tick_event(
     50   "GPU Command Tick", 1, 1, [](void* param, TickCount ticks, TickCount ticks_late) { g_gpu->CommandTickEvent(ticks); },
     51   nullptr);
     52 
     53 static std::deque<std::thread> s_screenshot_threads;
     54 static std::mutex s_screenshot_threads_mutex;
     55 
     56 // #define PSX_GPU_STATS
     57 #ifdef PSX_GPU_STATS
     58 static u64 s_active_gpu_cycles = 0;
     59 static u32 s_active_gpu_cycles_frames = 0;
     60 #endif
     61 
     62 static constexpr GPUTexture::Format DISPLAY_INTERNAL_POSTFX_FORMAT = GPUTexture::Format::RGBA8;
     63 
     64 static bool CompressAndWriteTextureToFile(u32 width, u32 height, std::string filename, FileSystem::ManagedCFilePtr fp,
     65                                           u8 quality, bool clear_alpha, bool flip_y, std::vector<u32> texture_data,
     66                                           u32 texture_data_stride, GPUTexture::Format texture_format,
     67                                           bool display_osd_message, bool use_thread);
     68 static void JoinScreenshotThreads();
     69 
     70 GPU::GPU()
     71 {
     72   ResetStatistics();
     73 }
     74 
     75 GPU::~GPU()
     76 {
     77   s_command_tick_event.Deactivate();
     78   s_crtc_tick_event.Deactivate();
     79 
     80   JoinScreenshotThreads();
     81   DestroyDeinterlaceTextures();
     82   g_gpu_device->RecycleTexture(std::move(m_chroma_smoothing_texture));
     83 
     84   if (g_gpu_device)
     85     g_gpu_device->SetGPUTimingEnabled(false);
     86 }
     87 
     88 bool GPU::Initialize()
     89 {
     90   m_force_progressive_scan = g_settings.gpu_disable_interlacing;
     91   m_force_ntsc_timings = g_settings.gpu_force_ntsc_timings;
     92   s_crtc_tick_event.Activate();
     93   m_fifo_size = g_settings.gpu_fifo_size;
     94   m_max_run_ahead = g_settings.gpu_max_run_ahead;
     95   m_console_is_pal = System::IsPALRegion();
     96   UpdateCRTCConfig();
     97 
     98   if (!CompileDisplayPipelines(true, true, g_settings.display_24bit_chroma_smoothing))
     99   {
    100     Host::ReportErrorAsync("Error", "Failed to compile base GPU pipelines.");
    101     return false;
    102   }
    103 
    104   g_gpu_device->SetGPUTimingEnabled(g_settings.display_show_gpu_usage);
    105 
    106 #ifdef PSX_GPU_STATS
    107   s_active_gpu_cycles = 0;
    108   s_active_gpu_cycles_frames = 0;
    109 #endif
    110 
    111   return true;
    112 }
    113 
    114 void GPU::UpdateSettings(const Settings& old_settings)
    115 {
    116   FlushRender();
    117 
    118   m_force_progressive_scan = g_settings.gpu_disable_interlacing;
    119   m_fifo_size = g_settings.gpu_fifo_size;
    120   m_max_run_ahead = g_settings.gpu_max_run_ahead;
    121 
    122   if (m_force_ntsc_timings != g_settings.gpu_force_ntsc_timings || m_console_is_pal != System::IsPALRegion())
    123   {
    124     m_force_ntsc_timings = g_settings.gpu_force_ntsc_timings;
    125     m_console_is_pal = System::IsPALRegion();
    126     UpdateCRTCConfig();
    127   }
    128 
    129   // Crop mode calls this, so recalculate the display area
    130   UpdateCRTCDisplayParameters();
    131 
    132   if (g_settings.display_scaling != old_settings.display_scaling ||
    133       g_settings.display_deinterlacing_mode != old_settings.display_deinterlacing_mode ||
    134       g_settings.display_24bit_chroma_smoothing != old_settings.display_24bit_chroma_smoothing)
    135   {
    136     // Toss buffers on mode change.
    137     if (g_settings.display_deinterlacing_mode != old_settings.display_deinterlacing_mode)
    138       DestroyDeinterlaceTextures();
    139 
    140     if (!CompileDisplayPipelines(g_settings.display_scaling != old_settings.display_scaling,
    141                                  g_settings.display_deinterlacing_mode != old_settings.display_deinterlacing_mode,
    142                                  g_settings.display_24bit_chroma_smoothing !=
    143                                    old_settings.display_24bit_chroma_smoothing))
    144     {
    145       Panic("Failed to compile display pipeline on settings change.");
    146     }
    147   }
    148 
    149   g_gpu_device->SetGPUTimingEnabled(g_settings.display_show_gpu_usage);
    150 }
    151 
    152 void GPU::CPUClockChanged()
    153 {
    154   UpdateCRTCConfig();
    155 }
    156 
    157 void GPU::UpdateResolutionScale()
    158 {
    159 }
    160 
    161 std::tuple<u32, u32> GPU::GetEffectiveDisplayResolution(bool scaled /* = true */)
    162 {
    163   return std::tie(m_crtc_state.display_vram_width, m_crtc_state.display_vram_height);
    164 }
    165 
    166 std::tuple<u32, u32> GPU::GetFullDisplayResolution(bool scaled /* = true */)
    167 {
    168   return std::tie(m_crtc_state.display_width, m_crtc_state.display_height);
    169 }
    170 
    171 void GPU::Reset(bool clear_vram)
    172 {
    173   m_GPUSTAT.bits = 0x14802000;
    174   m_set_texture_disable_mask = false;
    175   m_GPUREAD_latch = 0;
    176   m_crtc_state.fractional_ticks = 0;
    177   m_crtc_state.fractional_dot_ticks = 0;
    178   m_crtc_state.current_tick_in_scanline = 0;
    179   m_crtc_state.current_scanline = 0;
    180   m_crtc_state.in_hblank = false;
    181   m_crtc_state.in_vblank = false;
    182   m_crtc_state.interlaced_field = 0;
    183   m_crtc_state.interlaced_display_field = 0;
    184 
    185   if (clear_vram)
    186   {
    187     std::memset(g_vram, 0, sizeof(g_vram));
    188     std::memset(g_gpu_clut, 0, sizeof(g_gpu_clut));
    189   }
    190 
    191   // Cancel VRAM writes.
    192   m_blitter_state = BlitterState::Idle;
    193 
    194   // Force event to reschedule itself.
    195   s_crtc_tick_event.Deactivate();
    196   s_command_tick_event.Deactivate();
    197 
    198   SoftReset();
    199   UpdateDisplay();
    200 }
    201 
    202 void GPU::SoftReset()
    203 {
    204   FlushRender();
    205   if (m_blitter_state == BlitterState::WritingVRAM)
    206     FinishVRAMWrite();
    207 
    208   m_GPUSTAT.texture_page_x_base = 0;
    209   m_GPUSTAT.texture_page_y_base = 0;
    210   m_GPUSTAT.semi_transparency_mode = GPUTransparencyMode::HalfBackgroundPlusHalfForeground;
    211   m_GPUSTAT.texture_color_mode = GPUTextureMode::Palette4Bit;
    212   m_GPUSTAT.dither_enable = false;
    213   m_GPUSTAT.draw_to_displayed_field = false;
    214   m_GPUSTAT.set_mask_while_drawing = false;
    215   m_GPUSTAT.check_mask_before_draw = false;
    216   m_GPUSTAT.reverse_flag = false;
    217   m_GPUSTAT.texture_disable = false;
    218   m_GPUSTAT.horizontal_resolution_2 = 0;
    219   m_GPUSTAT.horizontal_resolution_1 = 0;
    220   m_GPUSTAT.vertical_resolution = false;
    221   m_GPUSTAT.pal_mode = System::IsPALRegion();
    222   m_GPUSTAT.display_area_color_depth_24 = false;
    223   m_GPUSTAT.vertical_interlace = false;
    224   m_GPUSTAT.display_disable = true;
    225   m_GPUSTAT.dma_direction = DMADirection::Off;
    226   m_drawing_area = {};
    227   m_drawing_area_changed = true;
    228   m_drawing_offset = {};
    229   std::memset(&m_crtc_state.regs, 0, sizeof(m_crtc_state.regs));
    230   m_crtc_state.regs.horizontal_display_range = 0xC60260;
    231   m_crtc_state.regs.vertical_display_range = 0x3FC10;
    232   m_blitter_state = BlitterState::Idle;
    233   m_pending_command_ticks = 0;
    234   m_command_total_words = 0;
    235   m_vram_transfer = {};
    236   m_fifo.Clear();
    237   m_blit_buffer.clear();
    238   m_blit_remaining_words = 0;
    239   m_draw_mode.texture_window_value = 0xFFFFFFFFu;
    240   SetDrawMode(0);
    241   SetTexturePalette(0);
    242   SetTextureWindow(0);
    243   InvalidateCLUT();
    244   UpdateDMARequest();
    245   UpdateCRTCConfig();
    246   UpdateCommandTickEvent();
    247   UpdateGPUIdle();
    248 }
    249 
    250 bool GPU::DoState(StateWrapper& sw, GPUTexture** host_texture, bool update_display)
    251 {
    252   FlushRender();
    253 
    254   if (sw.IsReading())
    255   {
    256     // perform a reset to discard all pending draws/fb state
    257     Reset(host_texture == nullptr);
    258   }
    259 
    260   sw.Do(&m_GPUSTAT.bits);
    261 
    262   sw.Do(&m_draw_mode.mode_reg.bits);
    263   sw.Do(&m_draw_mode.palette_reg.bits);
    264   sw.Do(&m_draw_mode.texture_window_value);
    265 
    266   if (sw.GetVersion() < 62) [[unlikely]]
    267   {
    268     // texture_page_x, texture_page_y, texture_palette_x, texture_palette_y
    269     DebugAssert(sw.IsReading());
    270     sw.SkipBytes(sizeof(u32) * 4);
    271   }
    272 
    273   sw.Do(&m_draw_mode.texture_window.and_x);
    274   sw.Do(&m_draw_mode.texture_window.and_y);
    275   sw.Do(&m_draw_mode.texture_window.or_x);
    276   sw.Do(&m_draw_mode.texture_window.or_y);
    277   sw.Do(&m_draw_mode.texture_x_flip);
    278   sw.Do(&m_draw_mode.texture_y_flip);
    279 
    280   sw.Do(&m_drawing_area.left);
    281   sw.Do(&m_drawing_area.top);
    282   sw.Do(&m_drawing_area.right);
    283   sw.Do(&m_drawing_area.bottom);
    284   sw.Do(&m_drawing_offset.x);
    285   sw.Do(&m_drawing_offset.y);
    286   sw.Do(&m_drawing_offset.x);
    287 
    288   sw.Do(&m_console_is_pal);
    289   sw.Do(&m_set_texture_disable_mask);
    290 
    291   sw.Do(&m_crtc_state.regs.display_address_start);
    292   sw.Do(&m_crtc_state.regs.horizontal_display_range);
    293   sw.Do(&m_crtc_state.regs.vertical_display_range);
    294   sw.Do(&m_crtc_state.dot_clock_divider);
    295   sw.Do(&m_crtc_state.display_width);
    296   sw.Do(&m_crtc_state.display_height);
    297   sw.Do(&m_crtc_state.display_origin_left);
    298   sw.Do(&m_crtc_state.display_origin_top);
    299   sw.Do(&m_crtc_state.display_vram_left);
    300   sw.Do(&m_crtc_state.display_vram_top);
    301   sw.Do(&m_crtc_state.display_vram_width);
    302   sw.Do(&m_crtc_state.display_vram_height);
    303   sw.Do(&m_crtc_state.horizontal_total);
    304   sw.Do(&m_crtc_state.horizontal_visible_start);
    305   sw.Do(&m_crtc_state.horizontal_visible_end);
    306   sw.Do(&m_crtc_state.horizontal_display_start);
    307   sw.Do(&m_crtc_state.horizontal_display_end);
    308   sw.Do(&m_crtc_state.vertical_total);
    309   sw.Do(&m_crtc_state.vertical_visible_start);
    310   sw.Do(&m_crtc_state.vertical_visible_end);
    311   sw.Do(&m_crtc_state.vertical_display_start);
    312   sw.Do(&m_crtc_state.vertical_display_end);
    313   sw.Do(&m_crtc_state.fractional_ticks);
    314   sw.Do(&m_crtc_state.current_tick_in_scanline);
    315   sw.Do(&m_crtc_state.current_scanline);
    316   sw.DoEx(&m_crtc_state.fractional_dot_ticks, 46, 0);
    317   sw.Do(&m_crtc_state.in_hblank);
    318   sw.Do(&m_crtc_state.in_vblank);
    319   sw.Do(&m_crtc_state.interlaced_field);
    320   sw.Do(&m_crtc_state.interlaced_display_field);
    321   sw.Do(&m_crtc_state.active_line_lsb);
    322 
    323   sw.Do(&m_blitter_state);
    324   sw.Do(&m_pending_command_ticks);
    325   sw.Do(&m_command_total_words);
    326   sw.Do(&m_GPUREAD_latch);
    327 
    328   if (sw.GetVersion() < 64) [[unlikely]]
    329   {
    330     // Clear CLUT cache and let it populate later.
    331     InvalidateCLUT();
    332   }
    333   else
    334   {
    335     sw.Do(&m_current_clut_reg_bits);
    336     sw.Do(&m_current_clut_is_8bit);
    337     sw.DoArray(g_gpu_clut, std::size(g_gpu_clut));
    338   }
    339 
    340   sw.Do(&m_vram_transfer.x);
    341   sw.Do(&m_vram_transfer.y);
    342   sw.Do(&m_vram_transfer.width);
    343   sw.Do(&m_vram_transfer.height);
    344   sw.Do(&m_vram_transfer.col);
    345   sw.Do(&m_vram_transfer.row);
    346 
    347   sw.Do(&m_fifo);
    348   sw.Do(&m_blit_buffer);
    349   sw.Do(&m_blit_remaining_words);
    350   sw.Do(&m_render_command.bits);
    351 
    352   sw.Do(&m_max_run_ahead);
    353   sw.Do(&m_fifo_size);
    354 
    355   if (sw.IsReading())
    356   {
    357     m_draw_mode.texture_page_changed = true;
    358     m_draw_mode.texture_window_changed = true;
    359     m_drawing_area_changed = true;
    360     SetClampedDrawingArea();
    361     UpdateDMARequest();
    362   }
    363 
    364   if (!host_texture)
    365   {
    366     if (!sw.DoMarker("GPU-VRAM"))
    367       return false;
    368 
    369     sw.DoBytes(g_vram, VRAM_WIDTH * VRAM_HEIGHT * sizeof(u16));
    370   }
    371 
    372   if (sw.IsReading())
    373   {
    374     UpdateCRTCConfig();
    375     if (update_display)
    376       UpdateDisplay();
    377 
    378     UpdateCommandTickEvent();
    379   }
    380 
    381   return !sw.HasError();
    382 }
    383 
    384 void GPU::RestoreDeviceContext()
    385 {
    386 }
    387 
    388 void GPU::UpdateDMARequest()
    389 {
    390   switch (m_blitter_state)
    391   {
    392     case BlitterState::Idle:
    393       m_GPUSTAT.ready_to_send_vram = false;
    394       m_GPUSTAT.ready_to_recieve_dma = (m_fifo.IsEmpty() || m_fifo.GetSize() < m_command_total_words);
    395       break;
    396 
    397     case BlitterState::WritingVRAM:
    398       m_GPUSTAT.ready_to_send_vram = false;
    399       m_GPUSTAT.ready_to_recieve_dma = (m_fifo.GetSize() < m_fifo_size);
    400       break;
    401 
    402     case BlitterState::ReadingVRAM:
    403       m_GPUSTAT.ready_to_send_vram = true;
    404       m_GPUSTAT.ready_to_recieve_dma = m_fifo.IsEmpty();
    405       break;
    406 
    407     case BlitterState::DrawingPolyLine:
    408       m_GPUSTAT.ready_to_send_vram = false;
    409       m_GPUSTAT.ready_to_recieve_dma = (m_fifo.GetSize() < m_fifo_size);
    410       break;
    411 
    412     default:
    413       UnreachableCode();
    414       break;
    415   }
    416 
    417   bool dma_request;
    418   switch (m_GPUSTAT.dma_direction)
    419   {
    420     case DMADirection::Off:
    421       dma_request = false;
    422       break;
    423 
    424     case DMADirection::FIFO:
    425       dma_request = m_GPUSTAT.ready_to_recieve_dma;
    426       break;
    427 
    428     case DMADirection::CPUtoGP0:
    429       dma_request = m_GPUSTAT.ready_to_recieve_dma;
    430       break;
    431 
    432     case DMADirection::GPUREADtoCPU:
    433       dma_request = m_GPUSTAT.ready_to_send_vram;
    434       break;
    435 
    436     default:
    437       dma_request = false;
    438       break;
    439   }
    440   m_GPUSTAT.dma_data_request = dma_request;
    441   DMA::SetRequest(DMA::Channel::GPU, dma_request);
    442 }
    443 
    444 void GPU::UpdateGPUIdle()
    445 {
    446   m_GPUSTAT.gpu_idle = (m_blitter_state == BlitterState::Idle && m_pending_command_ticks <= 0 && m_fifo.IsEmpty());
    447 }
    448 
    449 u32 GPU::ReadRegister(u32 offset)
    450 {
    451   switch (offset)
    452   {
    453     case 0x00:
    454       return ReadGPUREAD();
    455 
    456     case 0x04:
    457     {
    458       // code can be dependent on the odd/even bit, so update the GPU state when reading.
    459       // we can mitigate this slightly by only updating when the raster is actually hitting a new line
    460       if (IsCRTCScanlinePending())
    461         SynchronizeCRTC();
    462       if (IsCommandCompletionPending())
    463         s_command_tick_event.InvokeEarly();
    464 
    465       return m_GPUSTAT.bits;
    466     }
    467 
    468     default:
    469       ERROR_LOG("Unhandled register read: {:02X}", offset);
    470       return UINT32_C(0xFFFFFFFF);
    471   }
    472 }
    473 
    474 void GPU::WriteRegister(u32 offset, u32 value)
    475 {
    476   switch (offset)
    477   {
    478     case 0x00:
    479       m_fifo.Push(value);
    480       ExecuteCommands();
    481       return;
    482 
    483     case 0x04:
    484       WriteGP1(value);
    485       return;
    486 
    487     default:
    488       ERROR_LOG("Unhandled register write: {:02X} <- {:08X}", offset, value);
    489       return;
    490   }
    491 }
    492 
    493 void GPU::DMARead(u32* words, u32 word_count)
    494 {
    495   if (m_GPUSTAT.dma_direction != DMADirection::GPUREADtoCPU)
    496   {
    497     ERROR_LOG("Invalid DMA direction from GPU DMA read");
    498     std::fill_n(words, word_count, UINT32_C(0xFFFFFFFF));
    499     return;
    500   }
    501 
    502   for (u32 i = 0; i < word_count; i++)
    503     words[i] = ReadGPUREAD();
    504 }
    505 
    506 void GPU::EndDMAWrite()
    507 {
    508   ExecuteCommands();
    509 }
    510 
    511 /**
    512  * NTSC GPU clock 53.693175 MHz
    513  * PAL GPU clock 53.203425 MHz
    514  * courtesy of @ggrtk
    515  *
    516  * NTSC - sysclk * 715909 / 451584
    517  * PAL - sysclk * 709379 / 451584
    518  */
    519 
    520 TickCount GPU::GetCRTCFrequency() const
    521 {
    522   return m_console_is_pal ? 53203425 : 53693175;
    523 }
    524 
    525 TickCount GPU::CRTCTicksToSystemTicks(TickCount gpu_ticks, TickCount fractional_ticks) const
    526 {
    527   // convert to master clock, rounding up as we want to overshoot not undershoot
    528   if (!m_console_is_pal)
    529     return static_cast<TickCount>((u64(gpu_ticks) * u64(451584) + fractional_ticks + u64(715908)) / u64(715909));
    530   else
    531     return static_cast<TickCount>((u64(gpu_ticks) * u64(451584) + fractional_ticks + u64(709378)) / u64(709379));
    532 }
    533 
    534 TickCount GPU::SystemTicksToCRTCTicks(TickCount sysclk_ticks, TickCount* fractional_ticks) const
    535 {
    536   u64 mul = u64(sysclk_ticks);
    537   mul *= !m_console_is_pal ? u64(715909) : u64(709379);
    538   mul += u64(*fractional_ticks);
    539 
    540   const TickCount ticks = static_cast<TickCount>(mul / u64(451584));
    541   *fractional_ticks = static_cast<TickCount>(mul % u64(451584));
    542   return ticks;
    543 }
    544 
    545 void GPU::AddCommandTicks(TickCount ticks)
    546 {
    547   m_pending_command_ticks += ticks;
    548 #ifdef PSX_GPU_STATS
    549   s_active_gpu_cycles += ticks;
    550 #endif
    551 }
    552 
    553 void GPU::SynchronizeCRTC()
    554 {
    555   s_crtc_tick_event.InvokeEarly();
    556 }
    557 
    558 float GPU::ComputeHorizontalFrequency() const
    559 {
    560   const CRTCState& cs = m_crtc_state;
    561   TickCount fractional_ticks = 0;
    562   return static_cast<float>(
    563     static_cast<double>(SystemTicksToCRTCTicks(System::GetTicksPerSecond(), &fractional_ticks)) /
    564     static_cast<double>(cs.horizontal_total));
    565 }
    566 
    567 float GPU::ComputeVerticalFrequency() const
    568 {
    569   const CRTCState& cs = m_crtc_state;
    570   const TickCount ticks_per_frame = cs.horizontal_total * cs.vertical_total;
    571   TickCount fractional_ticks = 0;
    572   return static_cast<float>(
    573     static_cast<double>(SystemTicksToCRTCTicks(System::GetTicksPerSecond(), &fractional_ticks)) /
    574     static_cast<double>(ticks_per_frame));
    575 }
    576 
    577 float GPU::ComputeDisplayAspectRatio() const
    578 {
    579   if (g_settings.debugging.show_vram)
    580   {
    581     return static_cast<float>(VRAM_WIDTH) / static_cast<float>(VRAM_HEIGHT);
    582   }
    583   else if (g_settings.display_force_4_3_for_24bit && m_GPUSTAT.display_area_color_depth_24)
    584   {
    585     return 4.0f / 3.0f;
    586   }
    587   else if (g_settings.display_aspect_ratio == DisplayAspectRatio::Auto)
    588   {
    589     const CRTCState& cs = m_crtc_state;
    590     float relative_width = static_cast<float>(cs.horizontal_visible_end - cs.horizontal_visible_start);
    591     float relative_height = static_cast<float>(cs.vertical_visible_end - cs.vertical_visible_start);
    592 
    593     if (relative_width <= 0 || relative_height <= 0)
    594       return 4.0f / 3.0f;
    595 
    596     if (m_GPUSTAT.pal_mode)
    597     {
    598       relative_width /= static_cast<float>(PAL_HORIZONTAL_ACTIVE_END - PAL_HORIZONTAL_ACTIVE_START);
    599       relative_height /= static_cast<float>(PAL_VERTICAL_ACTIVE_END - PAL_VERTICAL_ACTIVE_START);
    600     }
    601     else
    602     {
    603       relative_width /= static_cast<float>(NTSC_HORIZONTAL_ACTIVE_END - NTSC_HORIZONTAL_ACTIVE_START);
    604       relative_height /= static_cast<float>(NTSC_VERTICAL_ACTIVE_END - NTSC_VERTICAL_ACTIVE_START);
    605     }
    606     return (relative_width / relative_height) * (4.0f / 3.0f);
    607   }
    608   else if (g_settings.display_aspect_ratio == DisplayAspectRatio::PAR1_1)
    609   {
    610     if (m_crtc_state.display_width == 0 || m_crtc_state.display_height == 0)
    611       return 4.0f / 3.0f;
    612 
    613     return static_cast<float>(m_crtc_state.display_width) / static_cast<float>(m_crtc_state.display_height);
    614   }
    615   else
    616   {
    617     return g_settings.GetDisplayAspectRatioValue();
    618   }
    619 }
    620 
    621 void GPU::UpdateCRTCConfig()
    622 {
    623   static constexpr std::array<u16, 8> dot_clock_dividers = {{10, 8, 5, 4, 7, 7, 7, 7}};
    624   CRTCState& cs = m_crtc_state;
    625 
    626   cs.vertical_total = m_GPUSTAT.pal_mode ? PAL_TOTAL_LINES : NTSC_TOTAL_LINES;
    627   cs.horizontal_total = m_GPUSTAT.pal_mode ? PAL_TICKS_PER_LINE : NTSC_TICKS_PER_LINE;
    628   cs.horizontal_active_start = m_GPUSTAT.pal_mode ? PAL_HORIZONTAL_ACTIVE_START : NTSC_HORIZONTAL_ACTIVE_START;
    629   cs.horizontal_active_end = m_GPUSTAT.pal_mode ? PAL_HORIZONTAL_ACTIVE_END : NTSC_HORIZONTAL_ACTIVE_END;
    630 
    631   const u8 horizontal_resolution_index = m_GPUSTAT.horizontal_resolution_1 | (m_GPUSTAT.horizontal_resolution_2 << 2);
    632   cs.dot_clock_divider = dot_clock_dividers[horizontal_resolution_index];
    633   cs.horizontal_display_start =
    634     (std::min<u16>(cs.regs.X1, cs.horizontal_total) / cs.dot_clock_divider) * cs.dot_clock_divider;
    635   cs.horizontal_display_end =
    636     (std::min<u16>(cs.regs.X2, cs.horizontal_total) / cs.dot_clock_divider) * cs.dot_clock_divider;
    637   cs.vertical_display_start = std::min<u16>(cs.regs.Y1, cs.vertical_total);
    638   cs.vertical_display_end = std::min<u16>(cs.regs.Y2, cs.vertical_total);
    639 
    640   if (m_GPUSTAT.pal_mode && m_force_ntsc_timings)
    641   {
    642     // scale to NTSC parameters
    643     cs.horizontal_display_start =
    644       static_cast<u16>((static_cast<u32>(cs.horizontal_display_start) * NTSC_TICKS_PER_LINE) / PAL_TICKS_PER_LINE);
    645     cs.horizontal_display_end = static_cast<u16>(
    646       ((static_cast<u32>(cs.horizontal_display_end) * NTSC_TICKS_PER_LINE) + (PAL_TICKS_PER_LINE - 1)) /
    647       PAL_TICKS_PER_LINE);
    648     cs.vertical_display_start =
    649       static_cast<u16>((static_cast<u32>(cs.vertical_display_start) * NTSC_TOTAL_LINES) / PAL_TOTAL_LINES);
    650     cs.vertical_display_end = static_cast<u16>(
    651       ((static_cast<u32>(cs.vertical_display_end) * NTSC_TOTAL_LINES) + (PAL_TOTAL_LINES - 1)) / PAL_TOTAL_LINES);
    652 
    653     cs.vertical_total = NTSC_TOTAL_LINES;
    654     cs.current_scanline %= NTSC_TOTAL_LINES;
    655     cs.horizontal_total = NTSC_TICKS_PER_LINE;
    656     cs.current_tick_in_scanline %= NTSC_TICKS_PER_LINE;
    657   }
    658 
    659   cs.horizontal_display_start =
    660     static_cast<u16>(System::ScaleTicksToOverclock(static_cast<TickCount>(cs.horizontal_display_start)));
    661   cs.horizontal_display_end =
    662     static_cast<u16>(System::ScaleTicksToOverclock(static_cast<TickCount>(cs.horizontal_display_end)));
    663   cs.horizontal_active_start =
    664     static_cast<u16>(System::ScaleTicksToOverclock(static_cast<TickCount>(cs.horizontal_active_start)));
    665   cs.horizontal_active_end =
    666     static_cast<u16>(System::ScaleTicksToOverclock(static_cast<TickCount>(cs.horizontal_active_end)));
    667   cs.horizontal_total = static_cast<u16>(System::ScaleTicksToOverclock(static_cast<TickCount>(cs.horizontal_total)));
    668 
    669   cs.current_tick_in_scanline %= cs.horizontal_total;
    670   cs.UpdateHBlankFlag();
    671 
    672   cs.current_scanline %= cs.vertical_total;
    673 
    674   System::SetThrottleFrequency(ComputeVerticalFrequency());
    675 
    676   UpdateCRTCDisplayParameters();
    677   UpdateCRTCTickEvent();
    678 }
    679 
    680 void GPU::UpdateCRTCDisplayParameters()
    681 {
    682   CRTCState& cs = m_crtc_state;
    683   const DisplayCropMode crop_mode = g_settings.display_crop_mode;
    684 
    685   const u16 horizontal_total = m_GPUSTAT.pal_mode ? PAL_TICKS_PER_LINE : NTSC_TICKS_PER_LINE;
    686   const u16 vertical_total = m_GPUSTAT.pal_mode ? PAL_TOTAL_LINES : NTSC_TOTAL_LINES;
    687   const u16 horizontal_display_start =
    688     (std::min<u16>(cs.regs.X1, horizontal_total) / cs.dot_clock_divider) * cs.dot_clock_divider;
    689   const u16 horizontal_display_end =
    690     (std::min<u16>(cs.regs.X2, horizontal_total) / cs.dot_clock_divider) * cs.dot_clock_divider;
    691   const u16 vertical_display_start = std::min<u16>(cs.regs.Y1, vertical_total);
    692   const u16 vertical_display_end = std::min<u16>(cs.regs.Y2, vertical_total);
    693 
    694   if (m_GPUSTAT.pal_mode)
    695   {
    696     // TODO: Verify PAL numbers.
    697     switch (crop_mode)
    698     {
    699       case DisplayCropMode::None:
    700         cs.horizontal_visible_start = PAL_HORIZONTAL_ACTIVE_START;
    701         cs.horizontal_visible_end = PAL_HORIZONTAL_ACTIVE_END;
    702         cs.vertical_visible_start = PAL_VERTICAL_ACTIVE_START;
    703         cs.vertical_visible_end = PAL_VERTICAL_ACTIVE_END;
    704         break;
    705 
    706       case DisplayCropMode::Overscan:
    707         cs.horizontal_visible_start = static_cast<u16>(std::max<int>(0, 628 + g_settings.display_active_start_offset));
    708         cs.horizontal_visible_end =
    709           static_cast<u16>(std::max<int>(cs.horizontal_visible_start, 3188 + g_settings.display_active_end_offset));
    710         cs.vertical_visible_start = static_cast<u16>(std::max<int>(0, 30 + g_settings.display_line_start_offset));
    711         cs.vertical_visible_end =
    712           static_cast<u16>(std::max<int>(cs.vertical_visible_start, 298 + g_settings.display_line_end_offset));
    713         break;
    714 
    715       case DisplayCropMode::Borders:
    716       default:
    717         cs.horizontal_visible_start = horizontal_display_start;
    718         cs.horizontal_visible_end = horizontal_display_end;
    719         cs.vertical_visible_start = vertical_display_start;
    720         cs.vertical_visible_end = vertical_display_end;
    721         break;
    722     }
    723     cs.horizontal_visible_start =
    724       std::clamp<u16>(cs.horizontal_visible_start, PAL_HORIZONTAL_ACTIVE_START, PAL_HORIZONTAL_ACTIVE_END);
    725     cs.horizontal_visible_end =
    726       std::clamp<u16>(cs.horizontal_visible_end, cs.horizontal_visible_start, PAL_HORIZONTAL_ACTIVE_END);
    727     cs.vertical_visible_start =
    728       std::clamp<u16>(cs.vertical_visible_start, PAL_VERTICAL_ACTIVE_START, PAL_VERTICAL_ACTIVE_END);
    729     cs.vertical_visible_end =
    730       std::clamp<u16>(cs.vertical_visible_end, cs.vertical_visible_start, PAL_VERTICAL_ACTIVE_END);
    731   }
    732   else
    733   {
    734     switch (crop_mode)
    735     {
    736       case DisplayCropMode::None:
    737         cs.horizontal_visible_start = NTSC_HORIZONTAL_ACTIVE_START;
    738         cs.horizontal_visible_end = NTSC_HORIZONTAL_ACTIVE_END;
    739         cs.vertical_visible_start = NTSC_VERTICAL_ACTIVE_START;
    740         cs.vertical_visible_end = NTSC_VERTICAL_ACTIVE_END;
    741         break;
    742 
    743       case DisplayCropMode::Overscan:
    744         cs.horizontal_visible_start = static_cast<u16>(std::max<int>(0, 608 + g_settings.display_active_start_offset));
    745         cs.horizontal_visible_end =
    746           static_cast<u16>(std::max<int>(cs.horizontal_visible_start, 3168 + g_settings.display_active_end_offset));
    747         cs.vertical_visible_start = static_cast<u16>(std::max<int>(0, 24 + g_settings.display_line_start_offset));
    748         cs.vertical_visible_end =
    749           static_cast<u16>(std::max<int>(cs.vertical_visible_start, 248 + g_settings.display_line_end_offset));
    750         break;
    751 
    752       case DisplayCropMode::Borders:
    753       default:
    754         cs.horizontal_visible_start = horizontal_display_start;
    755         cs.horizontal_visible_end = horizontal_display_end;
    756         cs.vertical_visible_start = vertical_display_start;
    757         cs.vertical_visible_end = vertical_display_end;
    758         break;
    759     }
    760     cs.horizontal_visible_start =
    761       std::clamp<u16>(cs.horizontal_visible_start, NTSC_HORIZONTAL_ACTIVE_START, NTSC_HORIZONTAL_ACTIVE_END);
    762     cs.horizontal_visible_end =
    763       std::clamp<u16>(cs.horizontal_visible_end, cs.horizontal_visible_start, NTSC_HORIZONTAL_ACTIVE_END);
    764     cs.vertical_visible_start =
    765       std::clamp<u16>(cs.vertical_visible_start, NTSC_VERTICAL_ACTIVE_START, NTSC_VERTICAL_ACTIVE_END);
    766     cs.vertical_visible_end =
    767       std::clamp<u16>(cs.vertical_visible_end, cs.vertical_visible_start, NTSC_VERTICAL_ACTIVE_END);
    768   }
    769 
    770   // If force-progressive is enabled, we only double the height in 480i mode. This way non-interleaved 480i framebuffers
    771   // won't be broken when displayed.
    772   const u8 y_shift = BoolToUInt8(m_GPUSTAT.vertical_interlace && m_GPUSTAT.vertical_resolution);
    773   const u8 height_shift = m_force_progressive_scan ? y_shift : BoolToUInt8(m_GPUSTAT.vertical_interlace);
    774 
    775   // Determine screen size.
    776   cs.display_width = (cs.horizontal_visible_end - cs.horizontal_visible_start) / cs.dot_clock_divider;
    777   cs.display_height = (cs.vertical_visible_end - cs.vertical_visible_start) << height_shift;
    778 
    779   // Determine number of pixels outputted from VRAM (in general, round to 4-pixel multiple).
    780   // TODO: Verify behavior if values are outside of the active video portion of scanline.
    781   const u16 horizontal_display_ticks =
    782     (horizontal_display_end < horizontal_display_start) ? 0 : (horizontal_display_end - horizontal_display_start);
    783 
    784   const u16 horizontal_display_pixels = horizontal_display_ticks / cs.dot_clock_divider;
    785   if (horizontal_display_pixels == 1u)
    786     cs.display_vram_width = 4u;
    787   else
    788     cs.display_vram_width = (horizontal_display_pixels + 2u) & ~3u;
    789 
    790   // Determine if we need to adjust the VRAM rectangle (because the display is starting outside the visible area) or add
    791   // padding.
    792   u16 horizontal_skip_pixels;
    793   if (horizontal_display_start >= cs.horizontal_visible_start)
    794   {
    795     cs.display_origin_left = (horizontal_display_start - cs.horizontal_visible_start) / cs.dot_clock_divider;
    796     cs.display_vram_left = cs.regs.X;
    797     horizontal_skip_pixels = 0;
    798   }
    799   else
    800   {
    801     horizontal_skip_pixels = (cs.horizontal_visible_start - horizontal_display_start) / cs.dot_clock_divider;
    802     cs.display_origin_left = 0;
    803     cs.display_vram_left = (cs.regs.X + horizontal_skip_pixels) % VRAM_WIDTH;
    804   }
    805 
    806   // apply the crop from the start (usually overscan)
    807   cs.display_vram_width -= std::min(cs.display_vram_width, horizontal_skip_pixels);
    808 
    809   // Apply crop from the end by shrinking VRAM rectangle width if display would end outside the visible area.
    810   cs.display_vram_width = std::min<u16>(cs.display_vram_width, cs.display_width - cs.display_origin_left);
    811 
    812   if (vertical_display_start >= cs.vertical_visible_start)
    813   {
    814     cs.display_origin_top = (vertical_display_start - cs.vertical_visible_start) << y_shift;
    815     cs.display_vram_top = cs.regs.Y;
    816   }
    817   else
    818   {
    819     cs.display_origin_top = 0;
    820     cs.display_vram_top = (cs.regs.Y + ((cs.vertical_visible_start - vertical_display_start) << y_shift)) % VRAM_HEIGHT;
    821   }
    822 
    823   if (vertical_display_end <= cs.vertical_visible_end)
    824   {
    825     cs.display_vram_height =
    826       (vertical_display_end -
    827        std::min(vertical_display_end, std::max(vertical_display_start, cs.vertical_visible_start)))
    828       << height_shift;
    829   }
    830   else
    831   {
    832     cs.display_vram_height =
    833       (cs.vertical_visible_end -
    834        std::min(cs.vertical_visible_end, std::max(vertical_display_start, cs.vertical_visible_start)))
    835       << height_shift;
    836   }
    837 }
    838 
    839 TickCount GPU::GetPendingCRTCTicks() const
    840 {
    841   const TickCount pending_sysclk_ticks = s_crtc_tick_event.GetTicksSinceLastExecution();
    842   TickCount fractional_ticks = m_crtc_state.fractional_ticks;
    843   return SystemTicksToCRTCTicks(pending_sysclk_ticks, &fractional_ticks);
    844 }
    845 
    846 TickCount GPU::GetPendingCommandTicks() const
    847 {
    848   if (!s_command_tick_event.IsActive())
    849     return 0;
    850 
    851   return SystemTicksToGPUTicks(s_command_tick_event.GetTicksSinceLastExecution());
    852 }
    853 
    854 void GPU::UpdateCRTCTickEvent()
    855 {
    856   // figure out how many GPU ticks until the next vblank or event
    857   TickCount lines_until_event;
    858   if (Timers::IsSyncEnabled(HBLANK_TIMER_INDEX))
    859   {
    860     // when the timer sync is enabled we need to sync at vblank start and end
    861     lines_until_event =
    862       (m_crtc_state.current_scanline >= m_crtc_state.vertical_display_end) ?
    863         (m_crtc_state.vertical_total - m_crtc_state.current_scanline + m_crtc_state.vertical_display_start) :
    864         (m_crtc_state.vertical_display_end - m_crtc_state.current_scanline);
    865   }
    866   else
    867   {
    868     lines_until_event =
    869       (m_crtc_state.current_scanline >= m_crtc_state.vertical_display_end ?
    870          (m_crtc_state.vertical_total - m_crtc_state.current_scanline + m_crtc_state.vertical_display_end) :
    871          (m_crtc_state.vertical_display_end - m_crtc_state.current_scanline));
    872   }
    873   if (Timers::IsExternalIRQEnabled(HBLANK_TIMER_INDEX))
    874     lines_until_event = std::min(lines_until_event, Timers::GetTicksUntilIRQ(HBLANK_TIMER_INDEX));
    875 
    876   TickCount ticks_until_event =
    877     lines_until_event * m_crtc_state.horizontal_total - m_crtc_state.current_tick_in_scanline;
    878   if (Timers::IsExternalIRQEnabled(DOT_TIMER_INDEX))
    879   {
    880     const TickCount dots_until_irq = Timers::GetTicksUntilIRQ(DOT_TIMER_INDEX);
    881     const TickCount ticks_until_irq =
    882       (dots_until_irq * m_crtc_state.dot_clock_divider) - m_crtc_state.fractional_dot_ticks;
    883     ticks_until_event = std::min(ticks_until_event, std::max<TickCount>(ticks_until_irq, 0));
    884   }
    885 
    886   if (Timers::IsSyncEnabled(DOT_TIMER_INDEX))
    887   {
    888     // This could potentially be optimized to skip the time the gate is active, if we're resetting and free running.
    889     // But realistically, I've only seen sync off (most games), or reset+pause on gate (Konami Lightgun games).
    890     TickCount ticks_until_hblank_start_or_end;
    891     if (m_crtc_state.current_tick_in_scanline >= m_crtc_state.horizontal_active_end)
    892     {
    893       ticks_until_hblank_start_or_end =
    894         m_crtc_state.horizontal_total - m_crtc_state.current_tick_in_scanline + m_crtc_state.horizontal_active_start;
    895     }
    896     else if (m_crtc_state.current_tick_in_scanline < m_crtc_state.horizontal_active_start)
    897     {
    898       ticks_until_hblank_start_or_end = m_crtc_state.horizontal_active_start - m_crtc_state.current_tick_in_scanline;
    899     }
    900     else
    901     {
    902       ticks_until_hblank_start_or_end = m_crtc_state.horizontal_active_end - m_crtc_state.current_tick_in_scanline;
    903     }
    904 
    905     ticks_until_event = std::min(ticks_until_event, ticks_until_hblank_start_or_end);
    906   }
    907 
    908   s_crtc_tick_event.Schedule(CRTCTicksToSystemTicks(ticks_until_event, m_crtc_state.fractional_ticks));
    909 }
    910 
    911 bool GPU::IsCRTCScanlinePending() const
    912 {
    913   // TODO: Most of these should be fields, not lines.
    914   const TickCount ticks = (GetPendingCRTCTicks() + m_crtc_state.current_tick_in_scanline);
    915   return (ticks >= m_crtc_state.horizontal_total);
    916 }
    917 
    918 bool GPU::IsCommandCompletionPending() const
    919 {
    920   return (m_pending_command_ticks > 0 && GetPendingCommandTicks() >= m_pending_command_ticks);
    921 }
    922 
    923 void GPU::CRTCTickEvent(TickCount ticks)
    924 {
    925   // convert cpu/master clock to GPU ticks, accounting for partial cycles because of the non-integer divider
    926   const TickCount prev_tick = m_crtc_state.current_tick_in_scanline;
    927   const TickCount gpu_ticks = SystemTicksToCRTCTicks(ticks, &m_crtc_state.fractional_ticks);
    928   m_crtc_state.current_tick_in_scanline += gpu_ticks;
    929 
    930   if (Timers::IsUsingExternalClock(DOT_TIMER_INDEX))
    931   {
    932     m_crtc_state.fractional_dot_ticks += gpu_ticks;
    933     const TickCount dots = m_crtc_state.fractional_dot_ticks / m_crtc_state.dot_clock_divider;
    934     m_crtc_state.fractional_dot_ticks = m_crtc_state.fractional_dot_ticks % m_crtc_state.dot_clock_divider;
    935     if (dots > 0)
    936       Timers::AddTicks(DOT_TIMER_INDEX, dots);
    937   }
    938 
    939   if (m_crtc_state.current_tick_in_scanline < m_crtc_state.horizontal_total)
    940   {
    941     // short path when we execute <1 line.. this shouldn't occur often, except when gated (konami lightgun games).
    942     m_crtc_state.UpdateHBlankFlag();
    943     Timers::SetGate(DOT_TIMER_INDEX, m_crtc_state.in_hblank);
    944     if (Timers::IsUsingExternalClock(HBLANK_TIMER_INDEX))
    945     {
    946       const u32 hblank_timer_ticks =
    947         BoolToUInt32(m_crtc_state.current_tick_in_scanline >= m_crtc_state.horizontal_active_end) -
    948         BoolToUInt32(prev_tick >= m_crtc_state.horizontal_active_end);
    949       if (hblank_timer_ticks > 0)
    950         Timers::AddTicks(HBLANK_TIMER_INDEX, static_cast<TickCount>(hblank_timer_ticks));
    951     }
    952 
    953     UpdateCRTCTickEvent();
    954     return;
    955   }
    956 
    957   u32 lines_to_draw = m_crtc_state.current_tick_in_scanline / m_crtc_state.horizontal_total;
    958   m_crtc_state.current_tick_in_scanline %= m_crtc_state.horizontal_total;
    959 #if 0
    960   Log_WarningPrintf("Old line: %u, new line: %u, drawing %u", m_crtc_state.current_scanline,
    961     m_crtc_state.current_scanline + lines_to_draw, lines_to_draw);
    962 #endif
    963 
    964   m_crtc_state.UpdateHBlankFlag();
    965   Timers::SetGate(DOT_TIMER_INDEX, m_crtc_state.in_hblank);
    966 
    967   if (Timers::IsUsingExternalClock(HBLANK_TIMER_INDEX))
    968   {
    969     // lines_to_draw => number of times ticks passed horizontal_total.
    970     // Subtract one if we were previously in hblank, but only on that line. If it was previously less than
    971     // horizontal_active_start, we still want to add one, because hblank would have gone inactive, and then active again
    972     // during the line. Finally add the current line being drawn, if hblank went inactive->active during the line.
    973     const u32 hblank_timer_ticks =
    974       lines_to_draw - BoolToUInt32(prev_tick >= m_crtc_state.horizontal_active_end) +
    975       BoolToUInt32(m_crtc_state.current_tick_in_scanline >= m_crtc_state.horizontal_active_end);
    976     if (hblank_timer_ticks > 0)
    977       Timers::AddTicks(HBLANK_TIMER_INDEX, static_cast<TickCount>(hblank_timer_ticks));
    978   }
    979 
    980   bool frame_done = false;
    981   while (lines_to_draw > 0)
    982   {
    983     const u32 lines_to_draw_this_loop =
    984       std::min(lines_to_draw, m_crtc_state.vertical_total - m_crtc_state.current_scanline);
    985     const u32 prev_scanline = m_crtc_state.current_scanline;
    986     m_crtc_state.current_scanline += lines_to_draw_this_loop;
    987     DebugAssert(m_crtc_state.current_scanline <= m_crtc_state.vertical_total);
    988     lines_to_draw -= lines_to_draw_this_loop;
    989 
    990     // clear the vblank flag if the beam would pass through the display area
    991     if (prev_scanline < m_crtc_state.vertical_display_start &&
    992         m_crtc_state.current_scanline >= m_crtc_state.vertical_display_end)
    993     {
    994       Timers::SetGate(HBLANK_TIMER_INDEX, false);
    995       InterruptController::SetLineState(InterruptController::IRQ::VBLANK, false);
    996       m_crtc_state.in_vblank = false;
    997     }
    998 
    999     const bool new_vblank = m_crtc_state.current_scanline < m_crtc_state.vertical_display_start ||
   1000                             m_crtc_state.current_scanline >= m_crtc_state.vertical_display_end;
   1001     if (m_crtc_state.in_vblank != new_vblank)
   1002     {
   1003       if (new_vblank)
   1004       {
   1005         DEBUG_LOG("Now in v-blank");
   1006 
   1007         // flush any pending draws and "scan out" the image
   1008         // TODO: move present in here I guess
   1009         FlushRender();
   1010         UpdateDisplay();
   1011         frame_done = true;
   1012 
   1013         // switch fields early. this is needed so we draw to the correct one.
   1014         if (m_GPUSTAT.InInterleaved480iMode())
   1015           m_crtc_state.interlaced_display_field = m_crtc_state.interlaced_field ^ 1u;
   1016         else
   1017           m_crtc_state.interlaced_display_field = 0;
   1018 
   1019 #ifdef PSX_GPU_STATS
   1020         if ((++s_active_gpu_cycles_frames) == 60)
   1021         {
   1022           const double busy_frac =
   1023             static_cast<double>(s_active_gpu_cycles) /
   1024             static_cast<double>(SystemTicksToGPUTicks(System::ScaleTicksToOverclock(System::MASTER_CLOCK)) *
   1025                                 (ComputeVerticalFrequency() / 60.0f));
   1026           DEV_LOG("PSX GPU Usage: {:.2f}% [{:.0f} cycles avg per frame]", busy_frac * 100,
   1027                   static_cast<double>(s_active_gpu_cycles) / static_cast<double>(s_active_gpu_cycles_frames));
   1028           s_active_gpu_cycles = 0;
   1029           s_active_gpu_cycles_frames = 0;
   1030         }
   1031 #endif
   1032       }
   1033 
   1034       Timers::SetGate(HBLANK_TIMER_INDEX, new_vblank);
   1035       InterruptController::SetLineState(InterruptController::IRQ::VBLANK, new_vblank);
   1036       m_crtc_state.in_vblank = new_vblank;
   1037     }
   1038 
   1039     // past the end of vblank?
   1040     if (m_crtc_state.current_scanline == m_crtc_state.vertical_total)
   1041     {
   1042       // start the new frame
   1043       m_crtc_state.current_scanline = 0;
   1044       if (m_GPUSTAT.vertical_interlace)
   1045       {
   1046         m_crtc_state.interlaced_field ^= 1u;
   1047         m_GPUSTAT.interlaced_field = !m_crtc_state.interlaced_field;
   1048       }
   1049       else
   1050       {
   1051         m_crtc_state.interlaced_field = 0;
   1052         m_GPUSTAT.interlaced_field = 0u; // new GPU = 1, old GPU = 0
   1053       }
   1054     }
   1055   }
   1056 
   1057   // alternating even line bit in 240-line mode
   1058   if (m_GPUSTAT.InInterleaved480iMode())
   1059   {
   1060     m_crtc_state.active_line_lsb =
   1061       Truncate8((m_crtc_state.regs.Y + BoolToUInt32(m_crtc_state.interlaced_display_field)) & u32(1));
   1062     m_GPUSTAT.display_line_lsb = ConvertToBoolUnchecked(
   1063       (m_crtc_state.regs.Y + (BoolToUInt8(!m_crtc_state.in_vblank) & m_crtc_state.interlaced_display_field)) & u32(1));
   1064   }
   1065   else
   1066   {
   1067     m_crtc_state.active_line_lsb = 0;
   1068     m_GPUSTAT.display_line_lsb = ConvertToBoolUnchecked((m_crtc_state.regs.Y + m_crtc_state.current_scanline) & u32(1));
   1069   }
   1070 
   1071   UpdateCRTCTickEvent();
   1072 
   1073   if (frame_done)
   1074     System::FrameDone();
   1075 }
   1076 
   1077 void GPU::CommandTickEvent(TickCount ticks)
   1078 {
   1079   m_pending_command_ticks -= SystemTicksToGPUTicks(ticks);
   1080 
   1081   m_executing_commands = true;
   1082   ExecuteCommands();
   1083   UpdateCommandTickEvent();
   1084   m_executing_commands = false;
   1085 }
   1086 
   1087 void GPU::UpdateCommandTickEvent()
   1088 {
   1089   if (m_pending_command_ticks <= 0)
   1090   {
   1091     m_pending_command_ticks = 0;
   1092     s_command_tick_event.Deactivate();
   1093   }
   1094   else
   1095   {
   1096     s_command_tick_event.SetIntervalAndSchedule(GPUTicksToSystemTicks(m_pending_command_ticks));
   1097   }
   1098 }
   1099 
   1100 void GPU::ConvertScreenCoordinatesToDisplayCoordinates(float window_x, float window_y, float* display_x,
   1101                                                        float* display_y) const
   1102 {
   1103   GSVector4i display_rc, draw_rc;
   1104   CalculateDrawRect(g_gpu_device->GetWindowWidth(), g_gpu_device->GetWindowHeight(), true, true, &display_rc, &draw_rc);
   1105 
   1106   // convert coordinates to active display region, then to full display region
   1107   const float scaled_display_x =
   1108     (window_x - static_cast<float>(display_rc.left)) / static_cast<float>(display_rc.width());
   1109   const float scaled_display_y =
   1110     (window_y - static_cast<float>(display_rc.top)) / static_cast<float>(display_rc.height());
   1111 
   1112   // scale back to internal resolution
   1113   *display_x = scaled_display_x * static_cast<float>(m_crtc_state.display_width);
   1114   *display_y = scaled_display_y * static_cast<float>(m_crtc_state.display_height);
   1115 
   1116   // TODO: apply rotation matrix
   1117 
   1118   DEV_LOG("win {:.0f},{:.0f} -> local {:.0f},{:.0f}, disp {:.2f},{:.2f} (size {},{} frac {},{})", window_x, window_y,
   1119           window_x - draw_rc.left, window_y - draw_rc.top, *display_x, *display_y, m_crtc_state.display_width,
   1120           m_crtc_state.display_height, *display_x / static_cast<float>(m_crtc_state.display_width),
   1121           *display_y / static_cast<float>(m_crtc_state.display_height));
   1122 }
   1123 
   1124 bool GPU::ConvertDisplayCoordinatesToBeamTicksAndLines(float display_x, float display_y, float x_scale, u32* out_tick,
   1125                                                        u32* out_line) const
   1126 {
   1127   if (x_scale != 1.0f)
   1128   {
   1129     const float dw = static_cast<float>(m_crtc_state.display_width);
   1130     float scaled_x = ((display_x / dw) * 2.0f) - 1.0f; // 0..1 -> -1..1
   1131     scaled_x *= x_scale;
   1132     display_x = (((scaled_x + 1.0f) * 0.5f) * dw); // -1..1 -> 0..1
   1133   }
   1134 
   1135   if (display_x < 0 || static_cast<u32>(display_x) >= m_crtc_state.display_width || display_y < 0 ||
   1136       static_cast<u32>(display_y) >= m_crtc_state.display_height)
   1137   {
   1138     return false;
   1139   }
   1140 
   1141   *out_line = (static_cast<u32>(std::round(display_y)) >> BoolToUInt8(IsInterlacedDisplayEnabled())) +
   1142               m_crtc_state.vertical_visible_start;
   1143   *out_tick = static_cast<u32>(System::ScaleTicksToOverclock(
   1144                 static_cast<TickCount>(std::round(display_x * static_cast<float>(m_crtc_state.dot_clock_divider))))) +
   1145               m_crtc_state.horizontal_visible_start;
   1146   return true;
   1147 }
   1148 
   1149 void GPU::GetBeamPosition(u32* out_ticks, u32* out_line)
   1150 {
   1151   const u32 current_tick = (GetPendingCRTCTicks() + m_crtc_state.current_tick_in_scanline);
   1152   *out_line =
   1153     (m_crtc_state.current_scanline + (current_tick / m_crtc_state.horizontal_total)) % m_crtc_state.vertical_total;
   1154   *out_ticks = current_tick % m_crtc_state.horizontal_total;
   1155 }
   1156 
   1157 TickCount GPU::GetSystemTicksUntilTicksAndLine(u32 ticks, u32 line)
   1158 {
   1159   u32 current_tick, current_line;
   1160   GetBeamPosition(&current_tick, &current_line);
   1161 
   1162   u32 ticks_to_target;
   1163   if (ticks >= current_tick)
   1164   {
   1165     ticks_to_target = ticks - current_tick;
   1166   }
   1167   else
   1168   {
   1169     ticks_to_target = (m_crtc_state.horizontal_total - current_tick) + ticks;
   1170     current_line = (current_line + 1) % m_crtc_state.vertical_total;
   1171   }
   1172 
   1173   const u32 lines_to_target =
   1174     (line >= current_line) ? (line - current_line) : ((m_crtc_state.vertical_total - current_line) + line);
   1175 
   1176   const TickCount total_ticks_to_target =
   1177     static_cast<TickCount>((lines_to_target * m_crtc_state.horizontal_total) + ticks_to_target);
   1178 
   1179   return CRTCTicksToSystemTicks(total_ticks_to_target, m_crtc_state.fractional_ticks);
   1180 }
   1181 
   1182 u32 GPU::ReadGPUREAD()
   1183 {
   1184   if (m_blitter_state != BlitterState::ReadingVRAM)
   1185     return m_GPUREAD_latch;
   1186 
   1187   // Read two pixels out of VRAM and combine them. Zero fill odd pixel counts.
   1188   u32 value = 0;
   1189   for (u32 i = 0; i < 2; i++)
   1190   {
   1191     // Read with correct wrap-around behavior.
   1192     const u16 read_x = (m_vram_transfer.x + m_vram_transfer.col) % VRAM_WIDTH;
   1193     const u16 read_y = (m_vram_transfer.y + m_vram_transfer.row) % VRAM_HEIGHT;
   1194     value |= ZeroExtend32(g_vram[read_y * VRAM_WIDTH + read_x]) << (i * 16);
   1195 
   1196     if (++m_vram_transfer.col == m_vram_transfer.width)
   1197     {
   1198       m_vram_transfer.col = 0;
   1199 
   1200       if (++m_vram_transfer.row == m_vram_transfer.height)
   1201       {
   1202         DEBUG_LOG("End of VRAM->CPU transfer");
   1203         m_vram_transfer = {};
   1204         m_blitter_state = BlitterState::Idle;
   1205 
   1206         // end of transfer, catch up on any commands which were written (unlikely)
   1207         ExecuteCommands();
   1208         break;
   1209       }
   1210     }
   1211   }
   1212 
   1213   m_GPUREAD_latch = value;
   1214   return value;
   1215 }
   1216 
   1217 void GPU::WriteGP1(u32 value)
   1218 {
   1219   const u32 command = (value >> 24) & 0x3Fu;
   1220   const u32 param = value & UINT32_C(0x00FFFFFF);
   1221   switch (command)
   1222   {
   1223     case 0x00: // Reset GPU
   1224     {
   1225       DEBUG_LOG("GP1 reset GPU");
   1226       s_command_tick_event.InvokeEarly();
   1227       SynchronizeCRTC();
   1228       SoftReset();
   1229     }
   1230     break;
   1231 
   1232     case 0x01: // Clear FIFO
   1233     {
   1234       DEBUG_LOG("GP1 clear FIFO");
   1235       s_command_tick_event.InvokeEarly();
   1236       SynchronizeCRTC();
   1237 
   1238       // flush partial writes
   1239       if (m_blitter_state == BlitterState::WritingVRAM)
   1240         FinishVRAMWrite();
   1241 
   1242       m_blitter_state = BlitterState::Idle;
   1243       m_command_total_words = 0;
   1244       m_vram_transfer = {};
   1245       m_fifo.Clear();
   1246       m_blit_buffer.clear();
   1247       m_blit_remaining_words = 0;
   1248       m_pending_command_ticks = 0;
   1249       s_command_tick_event.Deactivate();
   1250       UpdateDMARequest();
   1251       UpdateGPUIdle();
   1252     }
   1253     break;
   1254 
   1255     case 0x02: // Acknowledge Interrupt
   1256     {
   1257       DEBUG_LOG("Acknowledge interrupt");
   1258       m_GPUSTAT.interrupt_request = false;
   1259       InterruptController::SetLineState(InterruptController::IRQ::GPU, false);
   1260     }
   1261     break;
   1262 
   1263     case 0x03: // Display on/off
   1264     {
   1265       const bool disable = ConvertToBoolUnchecked(value & 0x01);
   1266       DEBUG_LOG("Display {}", disable ? "disabled" : "enabled");
   1267       SynchronizeCRTC();
   1268 
   1269       if (!m_GPUSTAT.display_disable && disable && IsInterlacedDisplayEnabled())
   1270         ClearDisplay();
   1271 
   1272       m_GPUSTAT.display_disable = disable;
   1273     }
   1274     break;
   1275 
   1276     case 0x04: // DMA Direction
   1277     {
   1278       DEBUG_LOG("DMA direction <- 0x{:02X}", static_cast<u32>(param));
   1279       if (m_GPUSTAT.dma_direction != static_cast<DMADirection>(param))
   1280       {
   1281         m_GPUSTAT.dma_direction = static_cast<DMADirection>(param);
   1282         UpdateDMARequest();
   1283       }
   1284     }
   1285     break;
   1286 
   1287     case 0x05: // Set display start address
   1288     {
   1289       const u32 new_value = param & CRTCState::Regs::DISPLAY_ADDRESS_START_MASK;
   1290       DEBUG_LOG("Display address start <- 0x{:08X}", new_value);
   1291 
   1292       System::IncrementInternalFrameNumber();
   1293       if (m_crtc_state.regs.display_address_start != new_value)
   1294       {
   1295         SynchronizeCRTC();
   1296         m_crtc_state.regs.display_address_start = new_value;
   1297         UpdateCRTCDisplayParameters();
   1298         OnBufferSwapped();
   1299       }
   1300     }
   1301     break;
   1302 
   1303     case 0x06: // Set horizontal display range
   1304     {
   1305       const u32 new_value = param & CRTCState::Regs::HORIZONTAL_DISPLAY_RANGE_MASK;
   1306       DEBUG_LOG("Horizontal display range <- 0x{:08X}", new_value);
   1307 
   1308       if (m_crtc_state.regs.horizontal_display_range != new_value)
   1309       {
   1310         SynchronizeCRTC();
   1311         m_crtc_state.regs.horizontal_display_range = new_value;
   1312         UpdateCRTCConfig();
   1313       }
   1314     }
   1315     break;
   1316 
   1317     case 0x07: // Set vertical display range
   1318     {
   1319       const u32 new_value = param & CRTCState::Regs::VERTICAL_DISPLAY_RANGE_MASK;
   1320       DEBUG_LOG("Vertical display range <- 0x{:08X}", new_value);
   1321 
   1322       if (m_crtc_state.regs.vertical_display_range != new_value)
   1323       {
   1324         SynchronizeCRTC();
   1325         m_crtc_state.regs.vertical_display_range = new_value;
   1326         UpdateCRTCConfig();
   1327       }
   1328     }
   1329     break;
   1330 
   1331     case 0x08: // Set display mode
   1332     {
   1333       union GP1_08h
   1334       {
   1335         u32 bits;
   1336 
   1337         BitField<u32, u8, 0, 2> horizontal_resolution_1;
   1338         BitField<u32, bool, 2, 1> vertical_resolution;
   1339         BitField<u32, bool, 3, 1> pal_mode;
   1340         BitField<u32, bool, 4, 1> display_area_color_depth;
   1341         BitField<u32, bool, 5, 1> vertical_interlace;
   1342         BitField<u32, bool, 6, 1> horizontal_resolution_2;
   1343         BitField<u32, bool, 7, 1> reverse_flag;
   1344       };
   1345 
   1346       const GP1_08h dm{param};
   1347       GPUSTAT new_GPUSTAT{m_GPUSTAT.bits};
   1348       new_GPUSTAT.horizontal_resolution_1 = dm.horizontal_resolution_1;
   1349       new_GPUSTAT.vertical_resolution = dm.vertical_resolution;
   1350       new_GPUSTAT.pal_mode = dm.pal_mode;
   1351       new_GPUSTAT.display_area_color_depth_24 = dm.display_area_color_depth;
   1352       new_GPUSTAT.vertical_interlace = dm.vertical_interlace;
   1353       new_GPUSTAT.horizontal_resolution_2 = dm.horizontal_resolution_2;
   1354       new_GPUSTAT.reverse_flag = dm.reverse_flag;
   1355       DEBUG_LOG("Set display mode <- 0x{:08X}", dm.bits);
   1356 
   1357       if (!m_GPUSTAT.vertical_interlace && dm.vertical_interlace && !m_force_progressive_scan)
   1358       {
   1359         // bit of a hack, technically we should pull the previous frame in, but this may not exist anymore
   1360         ClearDisplay();
   1361       }
   1362 
   1363       if (m_GPUSTAT.bits != new_GPUSTAT.bits)
   1364       {
   1365         // Have to be careful when setting this because Synchronize() can modify GPUSTAT.
   1366         static constexpr u32 SET_MASK = UINT32_C(0b00000000011111110100000000000000);
   1367         s_command_tick_event.InvokeEarly();
   1368         SynchronizeCRTC();
   1369         m_GPUSTAT.bits = (m_GPUSTAT.bits & ~SET_MASK) | (new_GPUSTAT.bits & SET_MASK);
   1370         UpdateCRTCConfig();
   1371       }
   1372     }
   1373     break;
   1374 
   1375     case 0x09: // Allow texture disable
   1376     {
   1377       m_set_texture_disable_mask = ConvertToBoolUnchecked(param & 0x01);
   1378       DEBUG_LOG("Set texture disable mask <- {}", m_set_texture_disable_mask ? "allowed" : "ignored");
   1379     }
   1380     break;
   1381 
   1382     case 0x10:
   1383     case 0x11:
   1384     case 0x12:
   1385     case 0x13:
   1386     case 0x14:
   1387     case 0x15:
   1388     case 0x16:
   1389     case 0x17:
   1390     case 0x18:
   1391     case 0x19:
   1392     case 0x1A:
   1393     case 0x1B:
   1394     case 0x1C:
   1395     case 0x1D:
   1396     case 0x1E:
   1397     case 0x1F:
   1398     {
   1399       HandleGetGPUInfoCommand(value);
   1400     }
   1401     break;
   1402 
   1403       [[unlikely]] default : ERROR_LOG("Unimplemented GP1 command 0x{:02X}", command);
   1404       break;
   1405   }
   1406 }
   1407 
   1408 void GPU::HandleGetGPUInfoCommand(u32 value)
   1409 {
   1410   const u8 subcommand = Truncate8(value & 0x07);
   1411   switch (subcommand)
   1412   {
   1413     case 0x00:
   1414     case 0x01:
   1415     case 0x06:
   1416     case 0x07:
   1417       // leave GPUREAD intact
   1418       break;
   1419 
   1420     case 0x02: // Get Texture Window
   1421     {
   1422       DEBUG_LOG("Get texture window");
   1423       m_GPUREAD_latch = m_draw_mode.texture_window_value;
   1424     }
   1425     break;
   1426 
   1427     case 0x03: // Get Draw Area Top Left
   1428     {
   1429       DEBUG_LOG("Get drawing area top left");
   1430       m_GPUREAD_latch =
   1431         ((m_drawing_area.left & UINT32_C(0b1111111111)) | ((m_drawing_area.top & UINT32_C(0b1111111111)) << 10));
   1432     }
   1433     break;
   1434 
   1435     case 0x04: // Get Draw Area Bottom Right
   1436     {
   1437       DEBUG_LOG("Get drawing area bottom right");
   1438       m_GPUREAD_latch =
   1439         ((m_drawing_area.right & UINT32_C(0b1111111111)) | ((m_drawing_area.bottom & UINT32_C(0b1111111111)) << 10));
   1440     }
   1441     break;
   1442 
   1443     case 0x05: // Get Drawing Offset
   1444     {
   1445       DEBUG_LOG("Get drawing offset");
   1446       m_GPUREAD_latch =
   1447         ((m_drawing_offset.x & INT32_C(0b11111111111)) | ((m_drawing_offset.y & INT32_C(0b11111111111)) << 11));
   1448     }
   1449     break;
   1450 
   1451       [[unlikely]] default : WARNING_LOG("Unhandled GetGPUInfo(0x{:02X})", subcommand);
   1452       break;
   1453   }
   1454 }
   1455 
   1456 void GPU::UpdateCLUTIfNeeded(GPUTextureMode texmode, GPUTexturePaletteReg clut)
   1457 {
   1458   if (texmode >= GPUTextureMode::Direct16Bit)
   1459     return;
   1460 
   1461   const bool needs_8bit = (texmode == GPUTextureMode::Palette8Bit);
   1462   if ((clut.bits != m_current_clut_reg_bits) || BoolToUInt8(needs_8bit) > BoolToUInt8(m_current_clut_is_8bit))
   1463   {
   1464     DEBUG_LOG("Reloading CLUT from {},{}, {}", clut.GetXBase(), clut.GetYBase(), needs_8bit ? "8-bit" : "4-bit");
   1465     AddCommandTicks(needs_8bit ? 256 : 16);
   1466     UpdateCLUT(clut, needs_8bit);
   1467     m_current_clut_reg_bits = clut.bits;
   1468     m_current_clut_is_8bit = needs_8bit;
   1469   }
   1470 }
   1471 
   1472 void GPU::InvalidateCLUT()
   1473 {
   1474   m_current_clut_reg_bits = std::numeric_limits<decltype(m_current_clut_reg_bits)>::max(); // will never match
   1475   m_current_clut_is_8bit = false;
   1476 }
   1477 
   1478 bool GPU::IsCLUTValid() const
   1479 {
   1480   return (m_current_clut_reg_bits != std::numeric_limits<decltype(m_current_clut_reg_bits)>::max());
   1481 }
   1482 
   1483 void GPU::ClearDisplay()
   1484 {
   1485   ClearDisplayTexture();
   1486 
   1487   // Just recycle the textures, it'll get re-fetched.
   1488   DestroyDeinterlaceTextures();
   1489 }
   1490 
   1491 void GPU::ReadVRAM(u32 x, u32 y, u32 width, u32 height)
   1492 {
   1493 }
   1494 
   1495 void GPU::FillVRAM(u32 x, u32 y, u32 width, u32 height, u32 color)
   1496 {
   1497   const u16 color16 = VRAMRGBA8888ToRGBA5551(color);
   1498   const GSVector4i fill = GSVector4i(color16, color16, color16, color16, color16, color16, color16, color16);
   1499   constexpr u32 vector_width = 8;
   1500   const u32 aligned_width = Common::AlignDownPow2(width, vector_width);
   1501 
   1502   if ((x + width) <= VRAM_WIDTH && !IsInterlacedRenderingEnabled())
   1503   {
   1504     for (u32 yoffs = 0; yoffs < height; yoffs++)
   1505     {
   1506       const u32 row = (y + yoffs) % VRAM_HEIGHT;
   1507 
   1508       u16* row_ptr = &g_vram[row * VRAM_WIDTH + x];
   1509       u32 xoffs = 0;
   1510       for (; xoffs < aligned_width; xoffs += vector_width, row_ptr += vector_width)
   1511         GSVector4i::store<false>(row_ptr, fill);
   1512       for (; xoffs < width; xoffs++)
   1513         *(row_ptr++) = color16;
   1514     }
   1515   }
   1516   else if (IsInterlacedRenderingEnabled())
   1517   {
   1518     // Hardware tests show that fills seem to break on the first two lines when the offset matches the displayed field.
   1519     if (IsCRTCScanlinePending())
   1520       SynchronizeCRTC();
   1521 
   1522     const u32 active_field = GetActiveLineLSB();
   1523     if ((x + width) <= VRAM_WIDTH)
   1524     {
   1525       for (u32 yoffs = 0; yoffs < height; yoffs++)
   1526       {
   1527         const u32 row = (y + yoffs) % VRAM_HEIGHT;
   1528         if ((row & u32(1)) == active_field)
   1529           continue;
   1530 
   1531         u16* row_ptr = &g_vram[row * VRAM_WIDTH + x];
   1532         u32 xoffs = 0;
   1533         for (; xoffs < aligned_width; xoffs += vector_width, row_ptr += vector_width)
   1534           GSVector4i::store<false>(row_ptr, fill);
   1535         for (; xoffs < width; xoffs++)
   1536           *(row_ptr++) = color16;
   1537       }
   1538     }
   1539     else
   1540     {
   1541       for (u32 yoffs = 0; yoffs < height; yoffs++)
   1542       {
   1543         const u32 row = (y + yoffs) % VRAM_HEIGHT;
   1544         if ((row & u32(1)) == active_field)
   1545           continue;
   1546 
   1547         u16* row_ptr = &g_vram[row * VRAM_WIDTH];
   1548         for (u32 xoffs = 0; xoffs < width; xoffs++)
   1549         {
   1550           const u32 col = (x + xoffs) % VRAM_WIDTH;
   1551           row_ptr[col] = color16;
   1552         }
   1553       }
   1554     }
   1555   }
   1556   else
   1557   {
   1558     for (u32 yoffs = 0; yoffs < height; yoffs++)
   1559     {
   1560       const u32 row = (y + yoffs) % VRAM_HEIGHT;
   1561       u16* row_ptr = &g_vram[row * VRAM_WIDTH];
   1562       for (u32 xoffs = 0; xoffs < width; xoffs++)
   1563       {
   1564         const u32 col = (x + xoffs) % VRAM_WIDTH;
   1565         row_ptr[col] = color16;
   1566       }
   1567     }
   1568   }
   1569 }
   1570 
   1571 void GPU::UpdateVRAM(u32 x, u32 y, u32 width, u32 height, const void* data, bool set_mask, bool check_mask)
   1572 {
   1573   // Fast path when the copy is not oversized.
   1574   if ((x + width) <= VRAM_WIDTH && (y + height) <= VRAM_HEIGHT && !set_mask && !check_mask)
   1575   {
   1576     const u16* src_ptr = static_cast<const u16*>(data);
   1577     u16* dst_ptr = &g_vram[y * VRAM_WIDTH + x];
   1578     for (u32 yoffs = 0; yoffs < height; yoffs++)
   1579     {
   1580       std::copy_n(src_ptr, width, dst_ptr);
   1581       src_ptr += width;
   1582       dst_ptr += VRAM_WIDTH;
   1583     }
   1584   }
   1585   else
   1586   {
   1587     // Slow path when we need to handle wrap-around.
   1588     // During transfer/render operations, if ((dst_pixel & mask_and) == 0) { pixel = src_pixel | mask_or }
   1589     const u16* src_ptr = static_cast<const u16*>(data);
   1590     const u16 mask_and = check_mask ? 0x8000 : 0;
   1591     const u16 mask_or = set_mask ? 0x8000 : 0;
   1592 
   1593     for (u32 row = 0; row < height;)
   1594     {
   1595       u16* dst_row_ptr = &g_vram[((y + row++) % VRAM_HEIGHT) * VRAM_WIDTH];
   1596       for (u32 col = 0; col < width;)
   1597       {
   1598         // TODO: Handle unaligned reads...
   1599         u16* pixel_ptr = &dst_row_ptr[(x + col++) % VRAM_WIDTH];
   1600         if (((*pixel_ptr) & mask_and) == 0)
   1601           *pixel_ptr = *(src_ptr++) | mask_or;
   1602       }
   1603     }
   1604   }
   1605 }
   1606 
   1607 void GPU::CopyVRAM(u32 src_x, u32 src_y, u32 dst_x, u32 dst_y, u32 width, u32 height)
   1608 {
   1609   // Break up oversized copies. This behavior has not been verified on console.
   1610   if ((src_x + width) > VRAM_WIDTH || (dst_x + width) > VRAM_WIDTH)
   1611   {
   1612     u32 remaining_rows = height;
   1613     u32 current_src_y = src_y;
   1614     u32 current_dst_y = dst_y;
   1615     while (remaining_rows > 0)
   1616     {
   1617       const u32 rows_to_copy =
   1618         std::min<u32>(remaining_rows, std::min<u32>(VRAM_HEIGHT - current_src_y, VRAM_HEIGHT - current_dst_y));
   1619 
   1620       u32 remaining_columns = width;
   1621       u32 current_src_x = src_x;
   1622       u32 current_dst_x = dst_x;
   1623       while (remaining_columns > 0)
   1624       {
   1625         const u32 columns_to_copy =
   1626           std::min<u32>(remaining_columns, std::min<u32>(VRAM_WIDTH - current_src_x, VRAM_WIDTH - current_dst_x));
   1627         CopyVRAM(current_src_x, current_src_y, current_dst_x, current_dst_y, columns_to_copy, rows_to_copy);
   1628         current_src_x = (current_src_x + columns_to_copy) % VRAM_WIDTH;
   1629         current_dst_x = (current_dst_x + columns_to_copy) % VRAM_WIDTH;
   1630         remaining_columns -= columns_to_copy;
   1631       }
   1632 
   1633       current_src_y = (current_src_y + rows_to_copy) % VRAM_HEIGHT;
   1634       current_dst_y = (current_dst_y + rows_to_copy) % VRAM_HEIGHT;
   1635       remaining_rows -= rows_to_copy;
   1636     }
   1637 
   1638     return;
   1639   }
   1640 
   1641   // This doesn't have a fast path, but do we really need one? It's not common.
   1642   const u16 mask_and = m_GPUSTAT.GetMaskAND();
   1643   const u16 mask_or = m_GPUSTAT.GetMaskOR();
   1644 
   1645   // Copy in reverse when src_x < dst_x, this is verified on console.
   1646   if (src_x < dst_x || ((src_x + width - 1) % VRAM_WIDTH) < ((dst_x + width - 1) % VRAM_WIDTH))
   1647   {
   1648     for (u32 row = 0; row < height; row++)
   1649     {
   1650       const u16* src_row_ptr = &g_vram[((src_y + row) % VRAM_HEIGHT) * VRAM_WIDTH];
   1651       u16* dst_row_ptr = &g_vram[((dst_y + row) % VRAM_HEIGHT) * VRAM_WIDTH];
   1652 
   1653       for (s32 col = static_cast<s32>(width - 1); col >= 0; col--)
   1654       {
   1655         const u16 src_pixel = src_row_ptr[(src_x + static_cast<u32>(col)) % VRAM_WIDTH];
   1656         u16* dst_pixel_ptr = &dst_row_ptr[(dst_x + static_cast<u32>(col)) % VRAM_WIDTH];
   1657         if ((*dst_pixel_ptr & mask_and) == 0)
   1658           *dst_pixel_ptr = src_pixel | mask_or;
   1659       }
   1660     }
   1661   }
   1662   else
   1663   {
   1664     for (u32 row = 0; row < height; row++)
   1665     {
   1666       const u16* src_row_ptr = &g_vram[((src_y + row) % VRAM_HEIGHT) * VRAM_WIDTH];
   1667       u16* dst_row_ptr = &g_vram[((dst_y + row) % VRAM_HEIGHT) * VRAM_WIDTH];
   1668 
   1669       for (u32 col = 0; col < width; col++)
   1670       {
   1671         const u16 src_pixel = src_row_ptr[(src_x + col) % VRAM_WIDTH];
   1672         u16* dst_pixel_ptr = &dst_row_ptr[(dst_x + col) % VRAM_WIDTH];
   1673         if ((*dst_pixel_ptr & mask_and) == 0)
   1674           *dst_pixel_ptr = src_pixel | mask_or;
   1675       }
   1676     }
   1677   }
   1678 }
   1679 
   1680 void GPU::SetClampedDrawingArea()
   1681 {
   1682   if (m_drawing_area.left > m_drawing_area.right || m_drawing_area.top > m_drawing_area.bottom) [[unlikely]]
   1683   {
   1684     m_clamped_drawing_area = GSVector4i::zero();
   1685     return;
   1686   }
   1687 
   1688   const u32 right = std::min(m_drawing_area.right + 1, static_cast<u32>(VRAM_WIDTH));
   1689   const u32 left = std::min(m_drawing_area.left, std::min(m_drawing_area.right, VRAM_WIDTH - 1));
   1690   const u32 bottom = std::min(m_drawing_area.bottom + 1, static_cast<u32>(VRAM_HEIGHT));
   1691   const u32 top = std::min(m_drawing_area.top, std::min(m_drawing_area.bottom, VRAM_HEIGHT - 1));
   1692   m_clamped_drawing_area = GSVector4i(left, top, right, bottom);
   1693 }
   1694 
   1695 void GPU::SetDrawMode(u16 value)
   1696 {
   1697   GPUDrawModeReg new_mode_reg{static_cast<u16>(value & GPUDrawModeReg::MASK)};
   1698   if (!m_set_texture_disable_mask)
   1699     new_mode_reg.texture_disable = false;
   1700 
   1701   if (new_mode_reg.bits == m_draw_mode.mode_reg.bits)
   1702     return;
   1703 
   1704   m_draw_mode.texture_page_changed |= ((new_mode_reg.bits & GPUDrawModeReg::TEXTURE_PAGE_MASK) !=
   1705                                        (m_draw_mode.mode_reg.bits & GPUDrawModeReg::TEXTURE_PAGE_MASK));
   1706   m_draw_mode.mode_reg.bits = new_mode_reg.bits;
   1707 
   1708   if (m_GPUSTAT.draw_to_displayed_field != new_mode_reg.draw_to_displayed_field)
   1709     FlushRender();
   1710 
   1711   // Bits 0..10 are returned in the GPU status register.
   1712   m_GPUSTAT.bits = (m_GPUSTAT.bits & ~(GPUDrawModeReg::GPUSTAT_MASK)) |
   1713                    (ZeroExtend32(new_mode_reg.bits) & GPUDrawModeReg::GPUSTAT_MASK);
   1714   m_GPUSTAT.texture_disable = m_draw_mode.mode_reg.texture_disable;
   1715 }
   1716 
   1717 void GPU::SetTexturePalette(u16 value)
   1718 {
   1719   value &= DrawMode::PALETTE_MASK;
   1720   if (m_draw_mode.palette_reg.bits == value)
   1721     return;
   1722 
   1723   m_draw_mode.palette_reg.bits = value;
   1724   m_draw_mode.texture_page_changed = true;
   1725 }
   1726 
   1727 void GPU::SetTextureWindow(u32 value)
   1728 {
   1729   value &= DrawMode::TEXTURE_WINDOW_MASK;
   1730   if (m_draw_mode.texture_window_value == value)
   1731     return;
   1732 
   1733   FlushRender();
   1734 
   1735   const u8 mask_x = Truncate8(value & UINT32_C(0x1F));
   1736   const u8 mask_y = Truncate8((value >> 5) & UINT32_C(0x1F));
   1737   const u8 offset_x = Truncate8((value >> 10) & UINT32_C(0x1F));
   1738   const u8 offset_y = Truncate8((value >> 15) & UINT32_C(0x1F));
   1739   DEBUG_LOG("Set texture window {:02X} {:02X} {:02X} {:02X}", mask_x, mask_y, offset_x, offset_y);
   1740 
   1741   m_draw_mode.texture_window.and_x = ~(mask_x * 8);
   1742   m_draw_mode.texture_window.and_y = ~(mask_y * 8);
   1743   m_draw_mode.texture_window.or_x = (offset_x & mask_x) * 8u;
   1744   m_draw_mode.texture_window.or_y = (offset_y & mask_y) * 8u;
   1745   m_draw_mode.texture_window_value = value;
   1746   m_draw_mode.texture_window_changed = true;
   1747 }
   1748 
   1749 void GPU::ReadCLUT(u16* dest, GPUTexturePaletteReg reg, bool clut_is_8bit)
   1750 {
   1751   const u16* src_row = &g_vram[reg.GetYBase() * VRAM_WIDTH];
   1752   const u32 start_x = reg.GetXBase();
   1753   if (!clut_is_8bit)
   1754   {
   1755     // Wraparound can't happen in 4-bit mode.
   1756     std::memcpy(dest, &src_row[start_x], sizeof(u16) * 16);
   1757   }
   1758   else
   1759   {
   1760     if ((start_x + 256) > VRAM_WIDTH) [[unlikely]]
   1761     {
   1762       const u32 end = VRAM_WIDTH - start_x;
   1763       const u32 start = 256 - end;
   1764       std::memcpy(dest, &src_row[start_x], sizeof(u16) * end);
   1765       std::memcpy(dest + end, src_row, sizeof(u16) * start);
   1766     }
   1767     else
   1768     {
   1769       std::memcpy(dest, &src_row[start_x], sizeof(u16) * 256);
   1770     }
   1771   }
   1772 }
   1773 
   1774 bool GPU::CompileDisplayPipelines(bool display, bool deinterlace, bool chroma_smoothing)
   1775 {
   1776   GPUShaderGen shadergen(g_gpu_device->GetRenderAPI(), g_gpu_device->GetFeatures().dual_source_blend,
   1777                          g_gpu_device->GetFeatures().framebuffer_fetch);
   1778 
   1779   GPUPipeline::GraphicsConfig plconfig;
   1780   plconfig.input_layout.vertex_stride = 0;
   1781   plconfig.primitive = GPUPipeline::Primitive::Triangles;
   1782   plconfig.rasterization = GPUPipeline::RasterizationState::GetNoCullState();
   1783   plconfig.depth = GPUPipeline::DepthState::GetNoTestsState();
   1784   plconfig.blend = GPUPipeline::BlendState::GetNoBlendingState();
   1785   plconfig.geometry_shader = nullptr;
   1786   plconfig.depth_format = GPUTexture::Format::Unknown;
   1787   plconfig.samples = 1;
   1788   plconfig.per_sample_shading = false;
   1789   plconfig.render_pass_flags = GPUPipeline::NoRenderPassFlags;
   1790 
   1791   if (display)
   1792   {
   1793     plconfig.layout = GPUPipeline::Layout::SingleTextureAndPushConstants;
   1794     plconfig.SetTargetFormats(g_gpu_device->HasSurface() ? g_gpu_device->GetWindowFormat() : GPUTexture::Format::RGBA8);
   1795 
   1796     std::string vs = shadergen.GenerateDisplayVertexShader();
   1797     std::string fs;
   1798     switch (g_settings.display_scaling)
   1799     {
   1800       case DisplayScalingMode::BilinearSharp:
   1801         fs = shadergen.GenerateDisplaySharpBilinearFragmentShader();
   1802         break;
   1803 
   1804       case DisplayScalingMode::BilinearSmooth:
   1805       case DisplayScalingMode::BilinearInteger:
   1806         fs = shadergen.GenerateDisplayFragmentShader(true);
   1807         break;
   1808 
   1809       case DisplayScalingMode::Nearest:
   1810       case DisplayScalingMode::NearestInteger:
   1811       default:
   1812         fs = shadergen.GenerateDisplayFragmentShader(false);
   1813         break;
   1814     }
   1815 
   1816     std::unique_ptr<GPUShader> vso = g_gpu_device->CreateShader(GPUShaderStage::Vertex, shadergen.GetLanguage(), vs);
   1817     std::unique_ptr<GPUShader> fso = g_gpu_device->CreateShader(GPUShaderStage::Fragment, shadergen.GetLanguage(), fs);
   1818     if (!vso || !fso)
   1819       return false;
   1820     GL_OBJECT_NAME(vso, "Display Vertex Shader");
   1821     GL_OBJECT_NAME_FMT(fso, "Display Fragment Shader [{}]",
   1822                        Settings::GetDisplayScalingName(g_settings.display_scaling));
   1823     plconfig.vertex_shader = vso.get();
   1824     plconfig.fragment_shader = fso.get();
   1825     if (!(m_display_pipeline = g_gpu_device->CreatePipeline(plconfig)))
   1826       return false;
   1827     GL_OBJECT_NAME_FMT(m_display_pipeline, "Display Pipeline [{}]",
   1828                        Settings::GetDisplayScalingName(g_settings.display_scaling));
   1829   }
   1830 
   1831   if (deinterlace)
   1832   {
   1833     plconfig.SetTargetFormats(GPUTexture::Format::RGBA8);
   1834 
   1835     std::unique_ptr<GPUShader> vso = g_gpu_device->CreateShader(GPUShaderStage::Vertex, shadergen.GetLanguage(),
   1836                                                                 shadergen.GenerateScreenQuadVertexShader());
   1837     if (!vso)
   1838       return false;
   1839     GL_OBJECT_NAME(vso, "Deinterlace Vertex Shader");
   1840 
   1841     std::unique_ptr<GPUShader> fso;
   1842     if (!(fso = g_gpu_device->CreateShader(GPUShaderStage::Fragment, shadergen.GetLanguage(),
   1843                                            shadergen.GenerateInterleavedFieldExtractFragmentShader())))
   1844     {
   1845       return false;
   1846     }
   1847 
   1848     GL_OBJECT_NAME(fso, "Deinterlace Field Extract Fragment Shader");
   1849 
   1850     plconfig.layout = GPUPipeline::Layout::SingleTextureAndPushConstants;
   1851     plconfig.vertex_shader = vso.get();
   1852     plconfig.fragment_shader = fso.get();
   1853     if (!(m_deinterlace_extract_pipeline = g_gpu_device->CreatePipeline(plconfig)))
   1854       return false;
   1855 
   1856     GL_OBJECT_NAME(m_deinterlace_extract_pipeline, "Deinterlace Field Extract Pipeline");
   1857 
   1858     switch (g_settings.display_deinterlacing_mode)
   1859     {
   1860       case DisplayDeinterlacingMode::Disabled:
   1861         break;
   1862 
   1863       case DisplayDeinterlacingMode::Weave:
   1864       {
   1865         if (!(fso = g_gpu_device->CreateShader(GPUShaderStage::Fragment, shadergen.GetLanguage(),
   1866                                                shadergen.GenerateDeinterlaceWeaveFragmentShader())))
   1867         {
   1868           return false;
   1869         }
   1870 
   1871         GL_OBJECT_NAME(fso, "Weave Deinterlace Fragment Shader");
   1872 
   1873         plconfig.layout = GPUPipeline::Layout::SingleTextureAndPushConstants;
   1874         plconfig.vertex_shader = vso.get();
   1875         plconfig.fragment_shader = fso.get();
   1876         if (!(m_deinterlace_pipeline = g_gpu_device->CreatePipeline(plconfig)))
   1877           return false;
   1878 
   1879         GL_OBJECT_NAME(m_deinterlace_pipeline, "Weave Deinterlace Pipeline");
   1880       }
   1881       break;
   1882 
   1883       case DisplayDeinterlacingMode::Blend:
   1884       {
   1885         if (!(fso = g_gpu_device->CreateShader(GPUShaderStage::Fragment, shadergen.GetLanguage(),
   1886                                                shadergen.GenerateDeinterlaceBlendFragmentShader())))
   1887         {
   1888           return false;
   1889         }
   1890 
   1891         GL_OBJECT_NAME(fso, "Blend Deinterlace Fragment Shader");
   1892 
   1893         plconfig.layout = GPUPipeline::Layout::MultiTextureAndPushConstants;
   1894         plconfig.vertex_shader = vso.get();
   1895         plconfig.fragment_shader = fso.get();
   1896         if (!(m_deinterlace_pipeline = g_gpu_device->CreatePipeline(plconfig)))
   1897           return false;
   1898 
   1899         GL_OBJECT_NAME(m_deinterlace_pipeline, "Blend Deinterlace Pipeline");
   1900       }
   1901       break;
   1902 
   1903       case DisplayDeinterlacingMode::Adaptive:
   1904       {
   1905         fso = g_gpu_device->CreateShader(GPUShaderStage::Fragment, shadergen.GetLanguage(),
   1906                                          shadergen.GenerateFastMADReconstructFragmentShader());
   1907         if (!fso)
   1908           return false;
   1909 
   1910         GL_OBJECT_NAME(fso, "FastMAD Reconstruct Fragment Shader");
   1911 
   1912         plconfig.layout = GPUPipeline::Layout::MultiTextureAndPushConstants;
   1913         plconfig.fragment_shader = fso.get();
   1914         if (!(m_deinterlace_pipeline = g_gpu_device->CreatePipeline(plconfig)))
   1915           return false;
   1916 
   1917         GL_OBJECT_NAME(m_deinterlace_pipeline, "FastMAD Reconstruct Pipeline");
   1918       }
   1919       break;
   1920 
   1921       default:
   1922         UnreachableCode();
   1923     }
   1924   }
   1925 
   1926   if (chroma_smoothing)
   1927   {
   1928     m_chroma_smoothing_pipeline.reset();
   1929     g_gpu_device->RecycleTexture(std::move(m_chroma_smoothing_texture));
   1930 
   1931     if (g_settings.display_24bit_chroma_smoothing)
   1932     {
   1933       plconfig.layout = GPUPipeline::Layout::SingleTextureAndPushConstants;
   1934       plconfig.SetTargetFormats(GPUTexture::Format::RGBA8);
   1935 
   1936       std::unique_ptr<GPUShader> vso = g_gpu_device->CreateShader(GPUShaderStage::Vertex, shadergen.GetLanguage(),
   1937                                                                   shadergen.GenerateScreenQuadVertexShader());
   1938       std::unique_ptr<GPUShader> fso = g_gpu_device->CreateShader(GPUShaderStage::Fragment, shadergen.GetLanguage(),
   1939                                                                   shadergen.GenerateChromaSmoothingFragmentShader());
   1940       if (!vso || !fso)
   1941         return false;
   1942       GL_OBJECT_NAME(vso, "Chroma Smoothing Vertex Shader");
   1943       GL_OBJECT_NAME(fso, "Chroma Smoothing Fragment Shader");
   1944 
   1945       plconfig.vertex_shader = vso.get();
   1946       plconfig.fragment_shader = fso.get();
   1947       if (!(m_chroma_smoothing_pipeline = g_gpu_device->CreatePipeline(plconfig)))
   1948         return false;
   1949       GL_OBJECT_NAME(m_chroma_smoothing_pipeline, "Chroma Smoothing Pipeline");
   1950     }
   1951   }
   1952 
   1953   return true;
   1954 }
   1955 
   1956 void GPU::ClearDisplayTexture()
   1957 {
   1958   m_display_texture = nullptr;
   1959   m_display_texture_view_x = 0;
   1960   m_display_texture_view_y = 0;
   1961   m_display_texture_view_width = 0;
   1962   m_display_texture_view_height = 0;
   1963 }
   1964 
   1965 void GPU::SetDisplayTexture(GPUTexture* texture, GPUTexture* depth_buffer, s32 view_x, s32 view_y, s32 view_width,
   1966                             s32 view_height)
   1967 {
   1968   DebugAssert(texture);
   1969   m_display_texture = texture;
   1970   m_display_depth_buffer = depth_buffer;
   1971   m_display_texture_view_x = view_x;
   1972   m_display_texture_view_y = view_y;
   1973   m_display_texture_view_width = view_width;
   1974   m_display_texture_view_height = view_height;
   1975 }
   1976 
   1977 bool GPU::PresentDisplay()
   1978 {
   1979   FlushRender();
   1980 
   1981   GSVector4i display_rect;
   1982   GSVector4i draw_rect;
   1983   CalculateDrawRect(g_gpu_device->GetWindowWidth(), g_gpu_device->GetWindowHeight(), !g_settings.debugging.show_vram,
   1984                     true, &display_rect, &draw_rect);
   1985   return RenderDisplay(nullptr, display_rect, draw_rect, !g_settings.debugging.show_vram);
   1986 }
   1987 
   1988 bool GPU::RenderDisplay(GPUTexture* target, const GSVector4i display_rect, const GSVector4i draw_rect, bool postfx)
   1989 {
   1990   GL_SCOPE_FMT("RenderDisplay: {}", draw_rect);
   1991 
   1992   if (m_display_texture)
   1993     m_display_texture->MakeReadyForSampling();
   1994 
   1995   // Internal post-processing.
   1996   GPUTexture* display_texture = m_display_texture;
   1997   s32 display_texture_view_x = m_display_texture_view_x;
   1998   s32 display_texture_view_y = m_display_texture_view_y;
   1999   s32 display_texture_view_width = m_display_texture_view_width;
   2000   s32 display_texture_view_height = m_display_texture_view_height;
   2001   if (postfx && display_texture && PostProcessing::InternalChain.IsActive() &&
   2002       PostProcessing::InternalChain.CheckTargets(DISPLAY_INTERNAL_POSTFX_FORMAT, display_texture_view_width,
   2003                                                  display_texture_view_height))
   2004   {
   2005     DebugAssert(display_texture_view_x == 0 && display_texture_view_y == 0 &&
   2006                 static_cast<s32>(display_texture->GetWidth()) == display_texture_view_width &&
   2007                 static_cast<s32>(display_texture->GetHeight()) == display_texture_view_height);
   2008 
   2009     // Now we can apply the post chain.
   2010     GPUTexture* post_output_texture = PostProcessing::InternalChain.GetOutputTexture();
   2011     if (PostProcessing::InternalChain.Apply(display_texture, m_display_depth_buffer, post_output_texture,
   2012                                             GSVector4i(0, 0, display_texture_view_width, display_texture_view_height),
   2013                                             display_texture_view_width, display_texture_view_height,
   2014                                             m_crtc_state.display_width, m_crtc_state.display_height))
   2015     {
   2016       display_texture_view_x = 0;
   2017       display_texture_view_y = 0;
   2018       display_texture = post_output_texture;
   2019       display_texture->MakeReadyForSampling();
   2020     }
   2021   }
   2022 
   2023   const GPUTexture::Format hdformat = target ? target->GetFormat() : g_gpu_device->GetWindowFormat();
   2024   const u32 target_width = target ? target->GetWidth() : g_gpu_device->GetWindowWidth();
   2025   const u32 target_height = target ? target->GetHeight() : g_gpu_device->GetWindowHeight();
   2026   const bool really_postfx =
   2027     (postfx && PostProcessing::DisplayChain.IsActive() && !g_gpu_device->GetWindowInfo().IsSurfaceless() &&
   2028      hdformat != GPUTexture::Format::Unknown && target_width > 0 && target_height > 0 &&
   2029      PostProcessing::DisplayChain.CheckTargets(hdformat, target_width, target_height));
   2030   const GSVector4i real_draw_rect =
   2031     g_gpu_device->UsesLowerLeftOrigin() ? GPUDevice::FlipToLowerLeft(draw_rect, target_height) : draw_rect;
   2032   if (really_postfx)
   2033   {
   2034     g_gpu_device->ClearRenderTarget(PostProcessing::DisplayChain.GetInputTexture(), GPUDevice::DEFAULT_CLEAR_COLOR);
   2035     g_gpu_device->SetRenderTarget(PostProcessing::DisplayChain.GetInputTexture());
   2036   }
   2037   else
   2038   {
   2039     if (target)
   2040       g_gpu_device->SetRenderTarget(target);
   2041     else if (!g_gpu_device->BeginPresent(false))
   2042       return false;
   2043   }
   2044 
   2045   if (display_texture)
   2046   {
   2047     bool texture_filter_linear = false;
   2048 
   2049     struct Uniforms
   2050     {
   2051       float src_rect[4];
   2052       float src_size[4];
   2053       float clamp_rect[4];
   2054       float params[4];
   2055       float rotation_matrix[2][2];
   2056     } uniforms;
   2057     std::memset(uniforms.params, 0, sizeof(uniforms.params));
   2058 
   2059     switch (g_settings.display_scaling)
   2060     {
   2061       case DisplayScalingMode::Nearest:
   2062       case DisplayScalingMode::NearestInteger:
   2063         break;
   2064 
   2065       case DisplayScalingMode::BilinearSmooth:
   2066       case DisplayScalingMode::BilinearInteger:
   2067         texture_filter_linear = true;
   2068         break;
   2069 
   2070       case DisplayScalingMode::BilinearSharp:
   2071       {
   2072         texture_filter_linear = true;
   2073         uniforms.params[0] = std::max(
   2074           std::floor(static_cast<float>(draw_rect.width()) / static_cast<float>(m_display_texture_view_width)), 1.0f);
   2075         uniforms.params[1] = std::max(
   2076           std::floor(static_cast<float>(draw_rect.height()) / static_cast<float>(m_display_texture_view_height)), 1.0f);
   2077         uniforms.params[2] = 0.5f - 0.5f / uniforms.params[0];
   2078         uniforms.params[3] = 0.5f - 0.5f / uniforms.params[1];
   2079       }
   2080       break;
   2081 
   2082       default:
   2083         UnreachableCode();
   2084         break;
   2085     }
   2086 
   2087     g_gpu_device->SetPipeline(m_display_pipeline.get());
   2088     g_gpu_device->SetTextureSampler(
   2089       0, display_texture, texture_filter_linear ? g_gpu_device->GetLinearSampler() : g_gpu_device->GetNearestSampler());
   2090 
   2091     // For bilinear, clamp to 0.5/SIZE-0.5 to avoid bleeding from the adjacent texels in VRAM. This is because
   2092     // 1.0 in UV space is not the bottom-right texel, but a mix of the bottom-right and wrapped/next texel.
   2093     const float rcp_width = 1.0f / static_cast<float>(display_texture->GetWidth());
   2094     const float rcp_height = 1.0f / static_cast<float>(display_texture->GetHeight());
   2095     uniforms.src_rect[0] = static_cast<float>(display_texture_view_x) * rcp_width;
   2096     uniforms.src_rect[1] = static_cast<float>(display_texture_view_y) * rcp_height;
   2097     uniforms.src_rect[2] = static_cast<float>(display_texture_view_width) * rcp_width;
   2098     uniforms.src_rect[3] = static_cast<float>(display_texture_view_height) * rcp_height;
   2099     uniforms.clamp_rect[0] = (static_cast<float>(display_texture_view_x) + 0.5f) * rcp_width;
   2100     uniforms.clamp_rect[1] = (static_cast<float>(display_texture_view_y) + 0.5f) * rcp_height;
   2101     uniforms.clamp_rect[2] =
   2102       (static_cast<float>(display_texture_view_x + display_texture_view_width) - 0.5f) * rcp_width;
   2103     uniforms.clamp_rect[3] =
   2104       (static_cast<float>(display_texture_view_y + display_texture_view_height) - 0.5f) * rcp_height;
   2105     uniforms.src_size[0] = static_cast<float>(display_texture->GetWidth());
   2106     uniforms.src_size[1] = static_cast<float>(display_texture->GetHeight());
   2107     uniforms.src_size[2] = rcp_width;
   2108     uniforms.src_size[3] = rcp_height;
   2109 
   2110     if (g_settings.display_rotation != DisplayRotation::Normal)
   2111     {
   2112       static constexpr const std::array<float, static_cast<size_t>(DisplayRotation::Count) - 1> rotation_radians = {{
   2113         static_cast<float>(std::numbers::pi * 1.5f), // Rotate90
   2114         static_cast<float>(std::numbers::pi),        // Rotate180
   2115         static_cast<float>(std::numbers::pi / 2.0),  // Rotate270
   2116       }};
   2117 
   2118       GSMatrix2x2::Rotation(rotation_radians[static_cast<size_t>(g_settings.display_rotation) - 1])
   2119         .store(uniforms.rotation_matrix);
   2120     }
   2121     else
   2122     {
   2123       GSMatrix2x2::Identity().store(uniforms.rotation_matrix);
   2124     }
   2125 
   2126     g_gpu_device->PushUniformBuffer(&uniforms, sizeof(uniforms));
   2127 
   2128     g_gpu_device->SetViewportAndScissor(real_draw_rect);
   2129     g_gpu_device->Draw(3, 0);
   2130   }
   2131 
   2132   if (really_postfx)
   2133   {
   2134     DebugAssert(!g_settings.debugging.show_vram);
   2135 
   2136     // "original size" in postfx includes padding.
   2137     const float upscale_x = m_display_texture ? static_cast<float>(m_display_texture_view_width) /
   2138                                                   static_cast<float>(m_crtc_state.display_vram_width) :
   2139                                                 1.0f;
   2140     const float upscale_y = m_display_texture ? static_cast<float>(m_display_texture_view_height) /
   2141                                                   static_cast<float>(m_crtc_state.display_vram_height) :
   2142                                                 1.0f;
   2143     const s32 orig_width = static_cast<s32>(std::ceil(static_cast<float>(m_crtc_state.display_width) * upscale_x));
   2144     const s32 orig_height = static_cast<s32>(std::ceil(static_cast<float>(m_crtc_state.display_height) * upscale_y));
   2145 
   2146     return PostProcessing::DisplayChain.Apply(PostProcessing::DisplayChain.GetInputTexture(), nullptr, target,
   2147                                               display_rect, orig_width, orig_height, m_crtc_state.display_width,
   2148                                               m_crtc_state.display_height);
   2149   }
   2150   else
   2151     return true;
   2152 }
   2153 
   2154 bool GPU::SendDisplayToMediaCapture(MediaCapture* cap)
   2155 {
   2156   GPUTexture* target = cap->GetRenderTexture();
   2157   if (!target) [[unlikely]]
   2158     return false;
   2159 
   2160   const bool apply_aspect_ratio =
   2161     (g_settings.display_screenshot_mode != DisplayScreenshotMode::UncorrectedInternalResolution);
   2162   const bool postfx = (g_settings.display_screenshot_mode != DisplayScreenshotMode::InternalResolution);
   2163   GSVector4i display_rect, draw_rect;
   2164   CalculateDrawRect(target->GetWidth(), target->GetHeight(), !g_settings.debugging.show_vram, apply_aspect_ratio,
   2165                     &display_rect, &draw_rect);
   2166 
   2167   // Not cleared by RenderDisplay().
   2168   g_gpu_device->ClearRenderTarget(target, GPUDevice::DEFAULT_CLEAR_COLOR);
   2169 
   2170   if (!RenderDisplay(target, display_rect, draw_rect, postfx)) [[unlikely]]
   2171     return false;
   2172 
   2173   return cap->DeliverVideoFrame(target);
   2174 }
   2175 
   2176 void GPU::DestroyDeinterlaceTextures()
   2177 {
   2178   for (std::unique_ptr<GPUTexture>& tex : m_deinterlace_buffers)
   2179     g_gpu_device->RecycleTexture(std::move(tex));
   2180   g_gpu_device->RecycleTexture(std::move(m_deinterlace_texture));
   2181   m_current_deinterlace_buffer = 0;
   2182 }
   2183 
   2184 bool GPU::Deinterlace(u32 field, u32 line_skip)
   2185 {
   2186   GPUTexture* src = m_display_texture;
   2187   const u32 x = m_display_texture_view_x;
   2188   const u32 y = m_display_texture_view_y;
   2189   const u32 width = m_display_texture_view_width;
   2190   const u32 height = m_display_texture_view_height;
   2191 
   2192   switch (g_settings.display_deinterlacing_mode)
   2193   {
   2194     case DisplayDeinterlacingMode::Disabled:
   2195     {
   2196       if (line_skip == 0)
   2197         return true;
   2198 
   2199       // Still have to extract the field.
   2200       if (!DeinterlaceExtractField(0, src, x, y, width, height, line_skip)) [[unlikely]]
   2201         return false;
   2202 
   2203       SetDisplayTexture(m_deinterlace_buffers[0].get(), m_display_depth_buffer, 0, 0, width, height);
   2204       return true;
   2205     }
   2206 
   2207     case DisplayDeinterlacingMode::Weave:
   2208     {
   2209       GL_SCOPE_FMT("DeinterlaceWeave({{{},{}}}, {}x{}, field={}, line_skip={})", x, y, width, height, field, line_skip);
   2210 
   2211       const u32 full_height = height * 2;
   2212       if (!DeinterlaceSetTargetSize(width, full_height, true)) [[unlikely]]
   2213       {
   2214         ClearDisplayTexture();
   2215         return false;
   2216       }
   2217 
   2218       src->MakeReadyForSampling();
   2219 
   2220       g_gpu_device->SetRenderTarget(m_deinterlace_texture.get());
   2221       g_gpu_device->SetPipeline(m_deinterlace_pipeline.get());
   2222       g_gpu_device->SetTextureSampler(0, src, g_gpu_device->GetNearestSampler());
   2223       const u32 uniforms[] = {x, y, field, line_skip};
   2224       g_gpu_device->PushUniformBuffer(uniforms, sizeof(uniforms));
   2225       g_gpu_device->SetViewportAndScissor(0, 0, width, full_height);
   2226       g_gpu_device->Draw(3, 0);
   2227 
   2228       m_deinterlace_texture->MakeReadyForSampling();
   2229       SetDisplayTexture(m_deinterlace_texture.get(), m_display_depth_buffer, 0, 0, width, full_height);
   2230       return true;
   2231     }
   2232 
   2233     case DisplayDeinterlacingMode::Blend:
   2234     {
   2235       constexpr u32 NUM_BLEND_BUFFERS = 2;
   2236 
   2237       GL_SCOPE_FMT("DeinterlaceBlend({{{},{}}}, {}x{}, field={}, line_skip={})", x, y, width, height, field, line_skip);
   2238 
   2239       const u32 this_buffer = m_current_deinterlace_buffer;
   2240       m_current_deinterlace_buffer = (m_current_deinterlace_buffer + 1u) % NUM_BLEND_BUFFERS;
   2241       GL_INS_FMT("Current buffer: {}", this_buffer);
   2242       if (!DeinterlaceExtractField(this_buffer, src, x, y, width, height, line_skip) ||
   2243           !DeinterlaceSetTargetSize(width, height, false)) [[unlikely]]
   2244       {
   2245         ClearDisplayTexture();
   2246         return false;
   2247       }
   2248 
   2249       // TODO: could be implemented with alpha blending instead..
   2250 
   2251       g_gpu_device->InvalidateRenderTarget(m_deinterlace_texture.get());
   2252       g_gpu_device->SetRenderTarget(m_deinterlace_texture.get());
   2253       g_gpu_device->SetPipeline(m_deinterlace_pipeline.get());
   2254       g_gpu_device->SetTextureSampler(0, m_deinterlace_buffers[this_buffer].get(), g_gpu_device->GetNearestSampler());
   2255       g_gpu_device->SetTextureSampler(1, m_deinterlace_buffers[(this_buffer - 1) % NUM_BLEND_BUFFERS].get(),
   2256                                       g_gpu_device->GetNearestSampler());
   2257       g_gpu_device->SetViewportAndScissor(0, 0, width, height);
   2258       g_gpu_device->Draw(3, 0);
   2259 
   2260       m_deinterlace_texture->MakeReadyForSampling();
   2261       SetDisplayTexture(m_deinterlace_texture.get(), m_display_depth_buffer, 0, 0, width, height);
   2262       return true;
   2263     }
   2264 
   2265     case DisplayDeinterlacingMode::Adaptive:
   2266     {
   2267       GL_SCOPE_FMT("DeinterlaceAdaptive({{{},{}}}, {}x{}, field={}, line_skip={})", x, y, width, height, field,
   2268                    line_skip);
   2269 
   2270       const u32 full_height = height * 2;
   2271       const u32 this_buffer = m_current_deinterlace_buffer;
   2272       m_current_deinterlace_buffer = (m_current_deinterlace_buffer + 1u) % DEINTERLACE_BUFFER_COUNT;
   2273       GL_INS_FMT("Current buffer: {}", this_buffer);
   2274       if (!DeinterlaceExtractField(this_buffer, src, x, y, width, height, line_skip) ||
   2275           !DeinterlaceSetTargetSize(width, full_height, false)) [[unlikely]]
   2276       {
   2277         ClearDisplayTexture();
   2278         return false;
   2279       }
   2280 
   2281       g_gpu_device->SetRenderTarget(m_deinterlace_texture.get());
   2282       g_gpu_device->SetPipeline(m_deinterlace_pipeline.get());
   2283       g_gpu_device->SetTextureSampler(0, m_deinterlace_buffers[this_buffer].get(), g_gpu_device->GetNearestSampler());
   2284       g_gpu_device->SetTextureSampler(1, m_deinterlace_buffers[(this_buffer - 1) % DEINTERLACE_BUFFER_COUNT].get(),
   2285                                       g_gpu_device->GetNearestSampler());
   2286       g_gpu_device->SetTextureSampler(2, m_deinterlace_buffers[(this_buffer - 2) % DEINTERLACE_BUFFER_COUNT].get(),
   2287                                       g_gpu_device->GetNearestSampler());
   2288       g_gpu_device->SetTextureSampler(3, m_deinterlace_buffers[(this_buffer - 3) % DEINTERLACE_BUFFER_COUNT].get(),
   2289                                       g_gpu_device->GetNearestSampler());
   2290       const u32 uniforms[] = {field, full_height};
   2291       g_gpu_device->PushUniformBuffer(uniforms, sizeof(uniforms));
   2292       g_gpu_device->SetViewportAndScissor(0, 0, width, full_height);
   2293       g_gpu_device->Draw(3, 0);
   2294 
   2295       m_deinterlace_texture->MakeReadyForSampling();
   2296       SetDisplayTexture(m_deinterlace_texture.get(), m_display_depth_buffer, 0, 0, width, full_height);
   2297       return true;
   2298     }
   2299 
   2300     default:
   2301       UnreachableCode();
   2302   }
   2303 }
   2304 
   2305 bool GPU::DeinterlaceExtractField(u32 dst_bufidx, GPUTexture* src, u32 x, u32 y, u32 width, u32 height, u32 line_skip)
   2306 {
   2307   if (!m_deinterlace_buffers[dst_bufidx] || m_deinterlace_buffers[dst_bufidx]->GetWidth() != width ||
   2308       m_deinterlace_buffers[dst_bufidx]->GetHeight() != height)
   2309   {
   2310     if (!g_gpu_device->ResizeTexture(&m_deinterlace_buffers[dst_bufidx], width, height, GPUTexture::Type::RenderTarget,
   2311                                      GPUTexture::Format::RGBA8, false)) [[unlikely]]
   2312     {
   2313       return false;
   2314     }
   2315 
   2316     GL_OBJECT_NAME_FMT(m_deinterlace_buffers[dst_bufidx], "Blend Deinterlace Buffer {}", dst_bufidx);
   2317   }
   2318 
   2319   GPUTexture* dst = m_deinterlace_buffers[dst_bufidx].get();
   2320   g_gpu_device->InvalidateRenderTarget(dst);
   2321 
   2322   // If we're not skipping lines, then we can simply copy the texture.
   2323   if (line_skip == 0 && src->GetFormat() == dst->GetFormat())
   2324   {
   2325     GL_INS_FMT("DeinterlaceExtractField({{{},{}}} {}x{} line_skip={}) => copy direct", x, y, width, height, line_skip);
   2326     g_gpu_device->CopyTextureRegion(dst, 0, 0, 0, 0, src, x, y, 0, 0, width, height);
   2327   }
   2328   else
   2329   {
   2330     GL_SCOPE_FMT("DeinterlaceExtractField({{{},{}}} {}x{} line_skip={}) => shader copy", x, y, width, height,
   2331                  line_skip);
   2332 
   2333     // Otherwise, we need to extract every other line from the texture.
   2334     src->MakeReadyForSampling();
   2335     g_gpu_device->SetRenderTarget(dst);
   2336     g_gpu_device->SetPipeline(m_deinterlace_extract_pipeline.get());
   2337     g_gpu_device->SetTextureSampler(0, src, g_gpu_device->GetNearestSampler());
   2338     const u32 uniforms[] = {x, y, line_skip};
   2339     g_gpu_device->PushUniformBuffer(uniforms, sizeof(uniforms));
   2340     g_gpu_device->SetViewportAndScissor(0, 0, width, height);
   2341     g_gpu_device->Draw(3, 0);
   2342 
   2343     GL_POP();
   2344   }
   2345 
   2346   dst->MakeReadyForSampling();
   2347   return true;
   2348 }
   2349 
   2350 bool GPU::DeinterlaceSetTargetSize(u32 width, u32 height, bool preserve)
   2351 {
   2352   if (!m_deinterlace_texture || m_deinterlace_texture->GetWidth() != width ||
   2353       m_deinterlace_texture->GetHeight() != height)
   2354   {
   2355     if (!g_gpu_device->ResizeTexture(&m_deinterlace_texture, width, height, GPUTexture::Type::RenderTarget,
   2356                                      GPUTexture::Format::RGBA8, preserve)) [[unlikely]]
   2357     {
   2358       return false;
   2359     }
   2360 
   2361     GL_OBJECT_NAME(m_deinterlace_texture, "Deinterlace target texture");
   2362   }
   2363 
   2364   return true;
   2365 }
   2366 
   2367 bool GPU::ApplyChromaSmoothing()
   2368 {
   2369   const u32 x = m_display_texture_view_x;
   2370   const u32 y = m_display_texture_view_y;
   2371   const u32 width = m_display_texture_view_width;
   2372   const u32 height = m_display_texture_view_height;
   2373   if (!m_chroma_smoothing_texture || m_chroma_smoothing_texture->GetWidth() != width ||
   2374       m_chroma_smoothing_texture->GetHeight() != height)
   2375   {
   2376     if (!g_gpu_device->ResizeTexture(&m_chroma_smoothing_texture, width, height, GPUTexture::Type::RenderTarget,
   2377                                      GPUTexture::Format::RGBA8, false))
   2378     {
   2379       ClearDisplayTexture();
   2380       return false;
   2381     }
   2382 
   2383     GL_OBJECT_NAME(m_chroma_smoothing_texture, "Chroma smoothing texture");
   2384   }
   2385 
   2386   GL_SCOPE_FMT("ApplyChromaSmoothing({{{},{}}}, {}x{})", x, y, width, height);
   2387 
   2388   m_display_texture->MakeReadyForSampling();
   2389   g_gpu_device->InvalidateRenderTarget(m_chroma_smoothing_texture.get());
   2390   g_gpu_device->SetRenderTarget(m_chroma_smoothing_texture.get());
   2391   g_gpu_device->SetPipeline(m_chroma_smoothing_pipeline.get());
   2392   g_gpu_device->SetTextureSampler(0, m_display_texture, g_gpu_device->GetNearestSampler());
   2393   const u32 uniforms[] = {x, y, width - 1, height - 1};
   2394   g_gpu_device->PushUniformBuffer(uniforms, sizeof(uniforms));
   2395   g_gpu_device->SetViewportAndScissor(0, 0, width, height);
   2396   g_gpu_device->Draw(3, 0);
   2397 
   2398   m_chroma_smoothing_texture->MakeReadyForSampling();
   2399   SetDisplayTexture(m_chroma_smoothing_texture.get(), m_display_depth_buffer, 0, 0, width, height);
   2400   return true;
   2401 }
   2402 
   2403 void GPU::CalculateDrawRect(s32 window_width, s32 window_height, bool apply_rotation, bool apply_aspect_ratio,
   2404                             GSVector4i* display_rect, GSVector4i* draw_rect) const
   2405 {
   2406   const bool integer_scale = (g_settings.display_scaling == DisplayScalingMode::NearestInteger ||
   2407                               g_settings.display_scaling == DisplayScalingMode::BilinearInteger);
   2408   const bool show_vram = g_settings.debugging.show_vram;
   2409   const float display_aspect_ratio = ComputeDisplayAspectRatio();
   2410   const float window_ratio = static_cast<float>(window_width) / static_cast<float>(window_height);
   2411   const float crtc_display_width = static_cast<float>(show_vram ? VRAM_WIDTH : m_crtc_state.display_width);
   2412   const float crtc_display_height = static_cast<float>(show_vram ? VRAM_HEIGHT : m_crtc_state.display_height);
   2413   const float x_scale =
   2414     apply_aspect_ratio ?
   2415       (display_aspect_ratio / (static_cast<float>(crtc_display_width) / static_cast<float>(crtc_display_height))) :
   2416       1.0f;
   2417   float display_width = crtc_display_width;
   2418   float display_height = crtc_display_height;
   2419   float active_left = static_cast<float>(show_vram ? 0 : m_crtc_state.display_origin_left);
   2420   float active_top = static_cast<float>(show_vram ? 0 : m_crtc_state.display_origin_top);
   2421   float active_width = static_cast<float>(show_vram ? VRAM_WIDTH : m_crtc_state.display_vram_width);
   2422   float active_height = static_cast<float>(show_vram ? VRAM_HEIGHT : m_crtc_state.display_vram_height);
   2423   if (!g_settings.display_stretch_vertically)
   2424   {
   2425     display_width *= x_scale;
   2426     active_left *= x_scale;
   2427     active_width *= x_scale;
   2428   }
   2429   else
   2430   {
   2431     display_height /= x_scale;
   2432     active_top /= x_scale;
   2433     active_height /= x_scale;
   2434   }
   2435 
   2436   // swap width/height when rotated, the flipping of padding is taken care of in the shader with the rotation matrix
   2437   if (g_settings.display_rotation == DisplayRotation::Rotate90 ||
   2438       g_settings.display_rotation == DisplayRotation::Rotate270)
   2439   {
   2440     std::swap(display_width, display_height);
   2441     std::swap(active_width, active_height);
   2442     std::swap(active_top, active_left);
   2443   }
   2444 
   2445   // now fit it within the window
   2446   float scale;
   2447   float left_padding, top_padding;
   2448   if ((display_width / display_height) >= window_ratio)
   2449   {
   2450     // align in middle vertically
   2451     scale = static_cast<float>(window_width) / display_width;
   2452     if (integer_scale)
   2453     {
   2454       scale = std::max(std::floor(scale), 1.0f);
   2455       left_padding = std::max<float>((static_cast<float>(window_width) - display_width * scale) / 2.0f, 0.0f);
   2456     }
   2457     else
   2458     {
   2459       left_padding = 0.0f;
   2460     }
   2461 
   2462     switch (g_settings.display_alignment)
   2463     {
   2464       case DisplayAlignment::RightOrBottom:
   2465         top_padding = std::max<float>(static_cast<float>(window_height) - (display_height * scale), 0.0f);
   2466         break;
   2467 
   2468       case DisplayAlignment::Center:
   2469         top_padding = std::max<float>((static_cast<float>(window_height) - (display_height * scale)) / 2.0f, 0.0f);
   2470         break;
   2471 
   2472       case DisplayAlignment::LeftOrTop:
   2473       default:
   2474         top_padding = 0.0f;
   2475         break;
   2476     }
   2477   }
   2478   else
   2479   {
   2480     // align in middle horizontally
   2481     scale = static_cast<float>(window_height) / display_height;
   2482     if (integer_scale)
   2483     {
   2484       scale = std::max(std::floor(scale), 1.0f);
   2485       top_padding = std::max<float>((static_cast<float>(window_height) - (display_height * scale)) / 2.0f, 0.0f);
   2486     }
   2487     else
   2488     {
   2489       top_padding = 0.0f;
   2490     }
   2491 
   2492     switch (g_settings.display_alignment)
   2493     {
   2494       case DisplayAlignment::RightOrBottom:
   2495         left_padding = std::max<float>(static_cast<float>(window_width) - (display_width * scale), 0.0f);
   2496         break;
   2497 
   2498       case DisplayAlignment::Center:
   2499         left_padding = std::max<float>((static_cast<float>(window_width) - (display_width * scale)) / 2.0f, 0.0f);
   2500         break;
   2501 
   2502       case DisplayAlignment::LeftOrTop:
   2503       default:
   2504         left_padding = 0.0f;
   2505         break;
   2506     }
   2507   }
   2508 
   2509   // TODO: This should be a float rectangle. But because GL is lame, it only has integer viewports...
   2510   const s32 left = static_cast<s32>(active_left * scale + left_padding);
   2511   const s32 top = static_cast<s32>(active_top * scale + top_padding);
   2512   const s32 right = left + static_cast<s32>(active_width * scale);
   2513   const s32 bottom = top + static_cast<s32>(active_height * scale);
   2514   *draw_rect = GSVector4i(left, top, right, bottom);
   2515   *display_rect = GSVector4i(
   2516     GSVector4(left_padding, top_padding, left_padding + display_width * scale, top_padding + display_height * scale));
   2517 }
   2518 
   2519 bool CompressAndWriteTextureToFile(u32 width, u32 height, std::string filename, FileSystem::ManagedCFilePtr fp,
   2520                                    u8 quality, bool clear_alpha, bool flip_y, std::vector<u32> texture_data,
   2521                                    u32 texture_data_stride, GPUTexture::Format texture_format, bool display_osd_message,
   2522                                    bool use_thread)
   2523 {
   2524   std::string osd_key;
   2525   if (display_osd_message)
   2526   {
   2527     // Use a 60 second timeout to give it plenty of time to actually save.
   2528     osd_key = fmt::format("ScreenshotSaver_{}", filename);
   2529     Host::AddIconOSDMessage(osd_key, ICON_EMOJI_CAMERA_WITH_FLASH,
   2530                             fmt::format(TRANSLATE_FS("GPU", "Saving screenshot to '{}'."), Path::GetFileName(filename)),
   2531                             60.0f);
   2532   }
   2533 
   2534   static constexpr auto proc = [](u32 width, u32 height, std::string filename, FileSystem::ManagedCFilePtr fp,
   2535                                   u8 quality, bool clear_alpha, bool flip_y, std::vector<u32> texture_data,
   2536                                   u32 texture_data_stride, GPUTexture::Format texture_format, std::string osd_key,
   2537                                   bool use_thread) {
   2538     bool result;
   2539 
   2540     const char* extension = std::strrchr(filename.c_str(), '.');
   2541     if (extension)
   2542     {
   2543       if (GPUTexture::ConvertTextureDataToRGBA8(width, height, texture_data, texture_data_stride, texture_format))
   2544       {
   2545         if (clear_alpha)
   2546         {
   2547           for (u32& pixel : texture_data)
   2548             pixel |= 0xFF000000u;
   2549         }
   2550 
   2551         if (flip_y)
   2552           GPUTexture::FlipTextureDataRGBA8(width, height, reinterpret_cast<u8*>(texture_data.data()),
   2553                                            texture_data_stride);
   2554 
   2555         Assert(texture_data_stride == sizeof(u32) * width);
   2556         RGBA8Image image(width, height, std::move(texture_data));
   2557         if (image.SaveToFile(filename.c_str(), fp.get(), quality))
   2558         {
   2559           result = true;
   2560         }
   2561         else
   2562         {
   2563           ERROR_LOG("Unknown extension in filename '{}' or save error: '{}'", filename, extension);
   2564           result = false;
   2565         }
   2566       }
   2567       else
   2568       {
   2569         result = false;
   2570       }
   2571     }
   2572     else
   2573     {
   2574       ERROR_LOG("Unable to determine file extension for '{}'", filename);
   2575       result = false;
   2576     }
   2577 
   2578     if (!osd_key.empty())
   2579     {
   2580       Host::AddIconOSDMessage(std::move(osd_key), ICON_EMOJI_CAMERA,
   2581                               fmt::format(result ? TRANSLATE_FS("GPU", "Saved screenshot to '{}'.") :
   2582                                                    TRANSLATE_FS("GPU", "Failed to save screenshot to '{}'."),
   2583                                           Path::GetFileName(filename),
   2584                                           result ? Host::OSD_INFO_DURATION : Host::OSD_ERROR_DURATION));
   2585     }
   2586 
   2587     if (use_thread)
   2588     {
   2589       // remove ourselves from the list, if the GS thread is waiting for us, we won't be in there
   2590       const auto this_id = std::this_thread::get_id();
   2591       std::unique_lock lock(s_screenshot_threads_mutex);
   2592       for (auto it = s_screenshot_threads.begin(); it != s_screenshot_threads.end(); ++it)
   2593       {
   2594         if (it->get_id() == this_id)
   2595         {
   2596           it->detach();
   2597           s_screenshot_threads.erase(it);
   2598           break;
   2599         }
   2600       }
   2601     }
   2602 
   2603     return result;
   2604   };
   2605 
   2606   if (!use_thread)
   2607   {
   2608     return proc(width, height, std::move(filename), std::move(fp), quality, clear_alpha, flip_y,
   2609                 std::move(texture_data), texture_data_stride, texture_format, std::move(osd_key), use_thread);
   2610   }
   2611 
   2612   std::unique_lock lock(s_screenshot_threads_mutex);
   2613   std::thread thread(proc, width, height, std::move(filename), std::move(fp), quality, clear_alpha, flip_y,
   2614                      std::move(texture_data), texture_data_stride, texture_format, std::move(osd_key), use_thread);
   2615   s_screenshot_threads.push_back(std::move(thread));
   2616   return true;
   2617 }
   2618 
   2619 void JoinScreenshotThreads()
   2620 {
   2621   std::unique_lock lock(s_screenshot_threads_mutex);
   2622   while (!s_screenshot_threads.empty())
   2623   {
   2624     std::thread save_thread(std::move(s_screenshot_threads.front()));
   2625     s_screenshot_threads.pop_front();
   2626     lock.unlock();
   2627     save_thread.join();
   2628     lock.lock();
   2629   }
   2630 }
   2631 
   2632 bool GPU::WriteDisplayTextureToFile(std::string filename, bool compress_on_thread /* = false */)
   2633 {
   2634   if (!m_display_texture)
   2635     return false;
   2636 
   2637   const u32 read_x = static_cast<u32>(m_display_texture_view_x);
   2638   const u32 read_y = static_cast<u32>(m_display_texture_view_y);
   2639   const u32 read_width = static_cast<u32>(m_display_texture_view_width);
   2640   const u32 read_height = static_cast<u32>(m_display_texture_view_height);
   2641 
   2642   const u32 texture_data_stride =
   2643     Common::AlignUpPow2(GPUTexture::GetPixelSize(m_display_texture->GetFormat()) * read_width, 4);
   2644   std::vector<u32> texture_data((texture_data_stride * read_height) / sizeof(u32));
   2645 
   2646   std::unique_ptr<GPUDownloadTexture> dltex;
   2647   if (g_gpu_device->GetFeatures().memory_import)
   2648   {
   2649     dltex =
   2650       g_gpu_device->CreateDownloadTexture(read_width, read_height, m_display_texture->GetFormat(), texture_data.data(),
   2651                                           texture_data.size() * sizeof(u32), texture_data_stride);
   2652   }
   2653   if (!dltex)
   2654   {
   2655     if (!(dltex = g_gpu_device->CreateDownloadTexture(read_width, read_height, m_display_texture->GetFormat())))
   2656     {
   2657       ERROR_LOG("Failed to create {}x{} {} download texture", read_width, read_height,
   2658                 GPUTexture::GetFormatName(m_display_texture->GetFormat()));
   2659       return false;
   2660     }
   2661   }
   2662 
   2663   dltex->CopyFromTexture(0, 0, m_display_texture, read_x, read_y, read_width, read_height, 0, 0, !dltex->IsImported());
   2664   if (!dltex->ReadTexels(0, 0, read_width, read_height, texture_data.data(), texture_data_stride))
   2665   {
   2666     RestoreDeviceContext();
   2667     return false;
   2668   }
   2669 
   2670   RestoreDeviceContext();
   2671 
   2672   Error error;
   2673   auto fp = FileSystem::OpenManagedCFile(filename.c_str(), "wb", &error);
   2674   if (!fp)
   2675   {
   2676     ERROR_LOG("Can't open file '{}': {}", Path::GetFileName(filename), error.GetDescription());
   2677     return false;
   2678   }
   2679 
   2680   constexpr bool clear_alpha = true;
   2681   const bool flip_y = g_gpu_device->UsesLowerLeftOrigin();
   2682 
   2683   return CompressAndWriteTextureToFile(
   2684     read_width, read_height, std::move(filename), std::move(fp), g_settings.display_screenshot_quality, clear_alpha,
   2685     flip_y, std::move(texture_data), texture_data_stride, m_display_texture->GetFormat(), false, compress_on_thread);
   2686 }
   2687 
   2688 bool GPU::RenderScreenshotToBuffer(u32 width, u32 height, const GSVector4i display_rect, const GSVector4i draw_rect,
   2689                                    bool postfx, std::vector<u32>* out_pixels, u32* out_stride,
   2690                                    GPUTexture::Format* out_format)
   2691 {
   2692   const GPUTexture::Format hdformat =
   2693     g_gpu_device->HasSurface() ? g_gpu_device->GetWindowFormat() : GPUTexture::Format::RGBA8;
   2694 
   2695   auto render_texture =
   2696     g_gpu_device->FetchAutoRecycleTexture(width, height, 1, 1, 1, GPUTexture::Type::RenderTarget, hdformat);
   2697   if (!render_texture)
   2698     return false;
   2699 
   2700   g_gpu_device->ClearRenderTarget(render_texture.get(), GPUDevice::DEFAULT_CLEAR_COLOR);
   2701 
   2702   // TODO: this should use copy shader instead.
   2703   RenderDisplay(render_texture.get(), display_rect, draw_rect, postfx);
   2704 
   2705   const u32 stride = Common::AlignUpPow2(GPUTexture::GetPixelSize(hdformat) * width, sizeof(u32));
   2706   out_pixels->resize((height * stride) / sizeof(u32));
   2707 
   2708   std::unique_ptr<GPUDownloadTexture> dltex;
   2709   if (g_gpu_device->GetFeatures().memory_import)
   2710   {
   2711     dltex = g_gpu_device->CreateDownloadTexture(width, height, hdformat, out_pixels->data(),
   2712                                                 out_pixels->size() * sizeof(u32), stride);
   2713   }
   2714   if (!dltex)
   2715   {
   2716     if (!(dltex = g_gpu_device->CreateDownloadTexture(width, height, hdformat)))
   2717     {
   2718       ERROR_LOG("Failed to create {}x{} download texture", width, height);
   2719       return false;
   2720     }
   2721   }
   2722 
   2723   dltex->CopyFromTexture(0, 0, render_texture.get(), 0, 0, width, height, 0, 0, false);
   2724   if (!dltex->ReadTexels(0, 0, width, height, out_pixels->data(), stride))
   2725   {
   2726     RestoreDeviceContext();
   2727     return false;
   2728   }
   2729 
   2730   *out_stride = stride;
   2731   *out_format = hdformat;
   2732   RestoreDeviceContext();
   2733   return true;
   2734 }
   2735 
   2736 void GPU::CalculateScreenshotSize(DisplayScreenshotMode mode, u32* width, u32* height, GSVector4i* display_rect,
   2737                                   GSVector4i* draw_rect) const
   2738 {
   2739   *width = g_gpu_device->GetWindowWidth();
   2740   *height = g_gpu_device->GetWindowHeight();
   2741   CalculateDrawRect(*width, *height, true, !g_settings.debugging.show_vram, display_rect, draw_rect);
   2742 
   2743   const bool internal_resolution = (mode != DisplayScreenshotMode::ScreenResolution || g_settings.debugging.show_vram);
   2744   if (internal_resolution && m_display_texture_view_width != 0 && m_display_texture_view_height != 0)
   2745   {
   2746     if (mode == DisplayScreenshotMode::InternalResolution)
   2747     {
   2748       const u32 draw_width = static_cast<u32>(draw_rect->width());
   2749       const u32 draw_height = static_cast<u32>(draw_rect->height());
   2750 
   2751       // If internal res, scale the computed draw rectangle to the internal res.
   2752       // We re-use the draw rect because it's already been AR corrected.
   2753       const float sar =
   2754         static_cast<float>(m_display_texture_view_width) / static_cast<float>(m_display_texture_view_height);
   2755       const float dar = static_cast<float>(draw_width) / static_cast<float>(draw_height);
   2756       if (sar >= dar)
   2757       {
   2758         // stretch height, preserve width
   2759         const float scale = static_cast<float>(m_display_texture_view_width) / static_cast<float>(draw_width);
   2760         *width = m_display_texture_view_width;
   2761         *height = static_cast<u32>(std::round(static_cast<float>(draw_height) * scale));
   2762       }
   2763       else
   2764       {
   2765         // stretch width, preserve height
   2766         const float scale = static_cast<float>(m_display_texture_view_height) / static_cast<float>(draw_height);
   2767         *width = static_cast<u32>(std::round(static_cast<float>(draw_width) * scale));
   2768         *height = m_display_texture_view_height;
   2769       }
   2770 
   2771       // DX11 won't go past 16K texture size.
   2772       const u32 max_texture_size = g_gpu_device->GetMaxTextureSize();
   2773       if (*width > max_texture_size)
   2774       {
   2775         *height = static_cast<u32>(static_cast<float>(*height) /
   2776                                    (static_cast<float>(*width) / static_cast<float>(max_texture_size)));
   2777         *width = max_texture_size;
   2778       }
   2779       if (*height > max_texture_size)
   2780       {
   2781         *height = max_texture_size;
   2782         *width = static_cast<u32>(static_cast<float>(*width) /
   2783                                   (static_cast<float>(*height) / static_cast<float>(max_texture_size)));
   2784       }
   2785     }
   2786     else // if (mode == DisplayScreenshotMode::UncorrectedInternalResolution)
   2787     {
   2788       *width = m_display_texture_view_width;
   2789       *height = m_display_texture_view_height;
   2790     }
   2791 
   2792     // Remove padding, it's not part of the framebuffer.
   2793     *draw_rect = GSVector4i(0, 0, static_cast<s32>(*width), static_cast<s32>(*height));
   2794     *display_rect = *draw_rect;
   2795   }
   2796 }
   2797 
   2798 bool GPU::RenderScreenshotToFile(std::string filename, DisplayScreenshotMode mode, u8 quality, bool compress_on_thread,
   2799                                  bool show_osd_message)
   2800 {
   2801   u32 width, height;
   2802   GSVector4i display_rect, draw_rect;
   2803   CalculateScreenshotSize(mode, &width, &height, &display_rect, &draw_rect);
   2804 
   2805   const bool internal_resolution = (mode != DisplayScreenshotMode::ScreenResolution);
   2806   if (width == 0 || height == 0)
   2807     return false;
   2808 
   2809   std::vector<u32> pixels;
   2810   u32 pixels_stride;
   2811   GPUTexture::Format pixels_format;
   2812   if (!RenderScreenshotToBuffer(width, height, display_rect, draw_rect, !internal_resolution, &pixels, &pixels_stride,
   2813                                 &pixels_format))
   2814   {
   2815     ERROR_LOG("Failed to render {}x{} screenshot", width, height);
   2816     return false;
   2817   }
   2818 
   2819   Error error;
   2820   auto fp = FileSystem::OpenManagedCFile(filename.c_str(), "wb", &error);
   2821   if (!fp)
   2822   {
   2823     ERROR_LOG("Can't open file '{}': {}", Path::GetFileName(filename), error.GetDescription());
   2824     return false;
   2825   }
   2826 
   2827   return CompressAndWriteTextureToFile(width, height, std::move(filename), std::move(fp), quality, true,
   2828                                        g_gpu_device->UsesLowerLeftOrigin(), std::move(pixels), pixels_stride,
   2829                                        pixels_format, show_osd_message, compress_on_thread);
   2830 }
   2831 
   2832 bool GPU::DumpVRAMToFile(const char* filename)
   2833 {
   2834   ReadVRAM(0, 0, VRAM_WIDTH, VRAM_HEIGHT);
   2835 
   2836   const char* extension = std::strrchr(filename, '.');
   2837   if (extension && StringUtil::Strcasecmp(extension, ".png") == 0)
   2838   {
   2839     return DumpVRAMToFile(filename, VRAM_WIDTH, VRAM_HEIGHT, sizeof(u16) * VRAM_WIDTH, g_vram, true);
   2840   }
   2841   else if (extension && StringUtil::Strcasecmp(extension, ".bin") == 0)
   2842   {
   2843     return FileSystem::WriteBinaryFile(filename, g_vram, VRAM_WIDTH * VRAM_HEIGHT * sizeof(u16));
   2844   }
   2845   else
   2846   {
   2847     ERROR_LOG("Unknown extension: '{}'", filename);
   2848     return false;
   2849   }
   2850 }
   2851 
   2852 bool GPU::DumpVRAMToFile(const char* filename, u32 width, u32 height, u32 stride, const void* buffer, bool remove_alpha)
   2853 {
   2854   RGBA8Image image(width, height);
   2855 
   2856   const char* ptr_in = static_cast<const char*>(buffer);
   2857   for (u32 row = 0; row < height; row++)
   2858   {
   2859     const char* row_ptr_in = ptr_in;
   2860     u32* ptr_out = image.GetRowPixels(row);
   2861 
   2862     for (u32 col = 0; col < width; col++)
   2863     {
   2864       u16 src_col;
   2865       std::memcpy(&src_col, row_ptr_in, sizeof(u16));
   2866       row_ptr_in += sizeof(u16);
   2867       *(ptr_out++) = VRAMRGBA5551ToRGBA8888(remove_alpha ? (src_col | u16(0x8000)) : src_col);
   2868     }
   2869 
   2870     ptr_in += stride;
   2871   }
   2872 
   2873   return image.SaveToFile(filename);
   2874 }
   2875 
   2876 void GPU::DrawDebugStateWindow()
   2877 {
   2878   const float framebuffer_scale = ImGuiManager::GetGlobalScale();
   2879 
   2880   ImGui::SetNextWindowSize(ImVec2(450.0f * framebuffer_scale, 550.0f * framebuffer_scale), ImGuiCond_FirstUseEver);
   2881   if (!ImGui::Begin("GPU", nullptr))
   2882   {
   2883     ImGui::End();
   2884     return;
   2885   }
   2886 
   2887   DrawRendererStats();
   2888 
   2889   if (ImGui::CollapsingHeader("GPU", ImGuiTreeNodeFlags_DefaultOpen))
   2890   {
   2891     static constexpr std::array<const char*, 5> state_strings = {
   2892       {"Idle", "Reading VRAM", "Writing VRAM", "Drawing Polyline"}};
   2893 
   2894     ImGui::Text("State: %s", state_strings[static_cast<u8>(m_blitter_state)]);
   2895     ImGui::Text("Dither: %s", m_GPUSTAT.dither_enable ? "Enabled" : "Disabled");
   2896     ImGui::Text("Draw To Displayed Field: %s", m_GPUSTAT.draw_to_displayed_field ? "Enabled" : "Disabled");
   2897     ImGui::Text("Draw Set Mask Bit: %s", m_GPUSTAT.set_mask_while_drawing ? "Yes" : "No");
   2898     ImGui::Text("Draw To Masked Pixels: %s", m_GPUSTAT.check_mask_before_draw ? "Yes" : "No");
   2899     ImGui::Text("Reverse Flag: %s", m_GPUSTAT.reverse_flag ? "Yes" : "No");
   2900     ImGui::Text("Texture Disable: %s", m_GPUSTAT.texture_disable ? "Yes" : "No");
   2901     ImGui::Text("PAL Mode: %s", m_GPUSTAT.pal_mode ? "Yes" : "No");
   2902     ImGui::Text("Interrupt Request: %s", m_GPUSTAT.interrupt_request ? "Yes" : "No");
   2903     ImGui::Text("DMA Request: %s", m_GPUSTAT.dma_data_request ? "Yes" : "No");
   2904   }
   2905 
   2906   if (ImGui::CollapsingHeader("CRTC", ImGuiTreeNodeFlags_DefaultOpen))
   2907   {
   2908     const auto& cs = m_crtc_state;
   2909     ImGui::Text("Clock: %s", (m_console_is_pal ? (m_GPUSTAT.pal_mode ? "PAL-on-PAL" : "NTSC-on-PAL") :
   2910                                                  (m_GPUSTAT.pal_mode ? "PAL-on-NTSC" : "NTSC-on-NTSC")));
   2911     ImGui::Text("Horizontal Frequency: %.3f KHz", ComputeHorizontalFrequency() / 1000.0f);
   2912     ImGui::Text("Vertical Frequency: %.3f Hz", ComputeVerticalFrequency());
   2913     ImGui::Text("Dot Clock Divider: %u", cs.dot_clock_divider);
   2914     ImGui::Text("Vertical Interlace: %s (%s field)", m_GPUSTAT.vertical_interlace ? "Yes" : "No",
   2915                 cs.interlaced_field ? "odd" : "even");
   2916     ImGui::Text("Current Scanline: %u (tick %u)", cs.current_scanline, cs.current_tick_in_scanline);
   2917     ImGui::Text("Display Disable: %s", m_GPUSTAT.display_disable ? "Yes" : "No");
   2918     ImGui::Text("Displaying Odd Lines: %s", cs.active_line_lsb ? "Yes" : "No");
   2919     ImGui::Text("Color Depth: %u-bit", m_GPUSTAT.display_area_color_depth_24 ? 24 : 15);
   2920     ImGui::Text("Start Offset in VRAM: (%u, %u)", cs.regs.X.GetValue(), cs.regs.Y.GetValue());
   2921     ImGui::Text("Display Total: %u (%u) horizontal, %u vertical", cs.horizontal_total,
   2922                 cs.horizontal_total / cs.dot_clock_divider, cs.vertical_total);
   2923     ImGui::Text("Configured Display Range: %u-%u (%u-%u), %u-%u", cs.regs.X1.GetValue(), cs.regs.X2.GetValue(),
   2924                 cs.regs.X1.GetValue() / cs.dot_clock_divider, cs.regs.X2.GetValue() / cs.dot_clock_divider,
   2925                 cs.regs.Y1.GetValue(), cs.regs.Y2.GetValue());
   2926     ImGui::Text("Output Display Range: %u-%u (%u-%u), %u-%u", cs.horizontal_display_start, cs.horizontal_display_end,
   2927                 cs.horizontal_display_start / cs.dot_clock_divider, cs.horizontal_display_end / cs.dot_clock_divider,
   2928                 cs.vertical_display_start, cs.vertical_display_end);
   2929     ImGui::Text("Cropping: %s", Settings::GetDisplayCropModeName(g_settings.display_crop_mode));
   2930     ImGui::Text("Visible Display Range: %u-%u (%u-%u), %u-%u", cs.horizontal_visible_start, cs.horizontal_visible_end,
   2931                 cs.horizontal_visible_start / cs.dot_clock_divider, cs.horizontal_visible_end / cs.dot_clock_divider,
   2932                 cs.vertical_visible_start, cs.vertical_visible_end);
   2933     ImGui::Text("Display Resolution: %ux%u", cs.display_width, cs.display_height);
   2934     ImGui::Text("Display Origin: %u, %u", cs.display_origin_left, cs.display_origin_top);
   2935     ImGui::Text("Displayed/Visible VRAM Portion: %ux%u @ (%u, %u)", cs.display_vram_width, cs.display_vram_height,
   2936                 cs.display_vram_left, cs.display_vram_top);
   2937     ImGui::Text("Padding: Left=%d, Top=%d, Right=%d, Bottom=%d", cs.display_origin_left, cs.display_origin_top,
   2938                 cs.display_width - cs.display_vram_width - cs.display_origin_left,
   2939                 cs.display_height - cs.display_vram_height - cs.display_origin_top);
   2940   }
   2941 
   2942   ImGui::End();
   2943 }
   2944 
   2945 void GPU::DrawRendererStats()
   2946 {
   2947 }
   2948 
   2949 void GPU::OnBufferSwapped()
   2950 {
   2951 }
   2952 
   2953 void GPU::GetStatsString(SmallStringBase& str)
   2954 {
   2955   if (IsHardwareRenderer())
   2956   {
   2957     str.format("{} HW | {} P | {} DC | {} B | {} RP | {} RB | {} C | {} W",
   2958                GPUDevice::RenderAPIToString(g_gpu_device->GetRenderAPI()), m_stats.num_primitives,
   2959                m_stats.host_num_draws, m_stats.host_num_barriers, m_stats.host_num_render_passes,
   2960                m_stats.host_num_downloads, m_stats.num_copies, m_stats.num_writes);
   2961   }
   2962   else
   2963   {
   2964     str.format("{} SW | {} P | {} R | {} C | {} W", GPUDevice::RenderAPIToString(g_gpu_device->GetRenderAPI()),
   2965                m_stats.num_primitives, m_stats.num_reads, m_stats.num_copies, m_stats.num_writes);
   2966   }
   2967 }
   2968 
   2969 void GPU::GetMemoryStatsString(SmallStringBase& str)
   2970 {
   2971   const u32 vram_usage_mb = static_cast<u32>((g_gpu_device->GetVRAMUsage() + (1048576 - 1)) / 1048576);
   2972   const u32 stream_kb = static_cast<u32>((m_stats.host_buffer_streamed + (1024 - 1)) / 1024);
   2973 
   2974   str.format("{} MB VRAM | {} KB STR | {} TC | {} TU", vram_usage_mb, stream_kb, m_stats.host_num_copies,
   2975              m_stats.host_num_uploads);
   2976 }
   2977 
   2978 void GPU::ResetStatistics()
   2979 {
   2980   m_counters = {};
   2981   g_gpu_device->ResetStatistics();
   2982 }
   2983 
   2984 void GPU::UpdateStatistics(u32 frame_count)
   2985 {
   2986   const GPUDevice::Statistics& stats = g_gpu_device->GetStatistics();
   2987   const u32 round = (frame_count - 1);
   2988 
   2989 #define UPDATE_COUNTER(x) m_stats.x = (m_counters.x + round) / frame_count
   2990 #define UPDATE_GPU_STAT(x) m_stats.host_##x = (stats.x + round) / frame_count
   2991 
   2992   UPDATE_COUNTER(num_reads);
   2993   UPDATE_COUNTER(num_writes);
   2994   UPDATE_COUNTER(num_copies);
   2995   UPDATE_COUNTER(num_vertices);
   2996   UPDATE_COUNTER(num_primitives);
   2997 
   2998   // UPDATE_COUNTER(num_read_texture_updates);
   2999   // UPDATE_COUNTER(num_ubo_updates);
   3000 
   3001   UPDATE_GPU_STAT(buffer_streamed);
   3002   UPDATE_GPU_STAT(num_draws);
   3003   UPDATE_GPU_STAT(num_barriers);
   3004   UPDATE_GPU_STAT(num_render_passes);
   3005   UPDATE_GPU_STAT(num_copies);
   3006   UPDATE_GPU_STAT(num_downloads);
   3007   UPDATE_GPU_STAT(num_uploads);
   3008 
   3009 #undef UPDATE_GPU_STAT
   3010 #undef UPDATE_COUNTER
   3011 
   3012   ResetStatistics();
   3013 }