duckstation

duckstation, but archived from the revision just before upstream changed it to a proprietary software project, this version is the libre one
git clone https://git.neptards.moe/u3shit/duckstation.git
Log | Files | Refs | README | LICENSE

gpu_commands.cpp (21065B)


      1 // SPDX-FileCopyrightText: 2019-2024 Connor McLaughlin <stenzek@gmail.com>
      2 // SPDX-License-Identifier: (GPL-3.0 OR CC-BY-NC-ND-4.0)
      3 
      4 #include "gpu.h"
      5 #include "interrupt_controller.h"
      6 #include "system.h"
      7 #include "texture_replacements.h"
      8 
      9 #include "common/assert.h"
     10 #include "common/log.h"
     11 #include "common/string_util.h"
     12 
     13 Log_SetChannel(GPU);
     14 
     15 #define CHECK_COMMAND_SIZE(num_words)                                                                                  \
     16   if (m_fifo.GetSize() < num_words)                                                                                    \
     17   {                                                                                                                    \
     18     m_command_total_words = num_words;                                                                                 \
     19     return false;                                                                                                      \
     20   }
     21 
     22 static u32 s_cpu_to_vram_dump_id = 1;
     23 static u32 s_vram_to_cpu_dump_id = 1;
     24 
     25 static constexpr u32 ReplaceZero(u32 value, u32 value_for_zero)
     26 {
     27   return value == 0 ? value_for_zero : value;
     28 }
     29 
     30 void GPU::TryExecuteCommands()
     31 {
     32   while (m_pending_command_ticks <= m_max_run_ahead && !m_fifo.IsEmpty())
     33   {
     34     switch (m_blitter_state)
     35     {
     36       case BlitterState::Idle:
     37       {
     38         const u32 command = FifoPeek(0) >> 24;
     39         if ((this->*s_GP0_command_handler_table[command])())
     40           continue;
     41         else
     42           return;
     43       }
     44 
     45       case BlitterState::WritingVRAM:
     46       {
     47         DebugAssert(m_blit_remaining_words > 0);
     48         const u32 words_to_copy = std::min(m_blit_remaining_words, m_fifo.GetSize());
     49         m_blit_buffer.reserve(m_blit_buffer.size() + words_to_copy);
     50         for (u32 i = 0; i < words_to_copy; i++)
     51           m_blit_buffer.push_back(FifoPop());
     52         m_blit_remaining_words -= words_to_copy;
     53 
     54         DEBUG_LOG("VRAM write burst of {} words, {} words remaining", words_to_copy, m_blit_remaining_words);
     55         if (m_blit_remaining_words == 0)
     56           FinishVRAMWrite();
     57 
     58         continue;
     59       }
     60 
     61       case BlitterState::ReadingVRAM:
     62       {
     63         return;
     64       }
     65       break;
     66 
     67       case BlitterState::DrawingPolyLine:
     68       {
     69         const u32 words_per_vertex = m_render_command.shading_enable ? 2 : 1;
     70         u32 terminator_index =
     71           m_render_command.shading_enable ? ((static_cast<u32>(m_blit_buffer.size()) & 1u) ^ 1u) : 0u;
     72         for (; terminator_index < m_fifo.GetSize(); terminator_index += words_per_vertex)
     73         {
     74           // polyline must have at least two vertices, and the terminator is (word & 0xf000f000) == 0x50005000.
     75           // terminator is on the first word for the vertex
     76           if ((FifoPeek(terminator_index) & UINT32_C(0xF000F000)) == UINT32_C(0x50005000))
     77             break;
     78         }
     79 
     80         const bool found_terminator = (terminator_index < m_fifo.GetSize());
     81         const u32 words_to_copy = std::min(terminator_index, m_fifo.GetSize());
     82         if (words_to_copy > 0)
     83         {
     84           m_blit_buffer.reserve(m_blit_buffer.size() + words_to_copy);
     85           for (u32 i = 0; i < words_to_copy; i++)
     86             m_blit_buffer.push_back(FifoPop());
     87         }
     88 
     89         DEBUG_LOG("Added {} words to polyline", words_to_copy);
     90         if (found_terminator)
     91         {
     92           // drop terminator
     93           m_fifo.RemoveOne();
     94           DEBUG_LOG("Drawing poly-line with {} vertices", GetPolyLineVertexCount());
     95           DispatchRenderCommand();
     96           m_blit_buffer.clear();
     97           EndCommand();
     98           continue;
     99         }
    100       }
    101       break;
    102     }
    103   }
    104 }
    105 
    106 void GPU::ExecuteCommands()
    107 {
    108   const bool was_executing_from_event = std::exchange(m_executing_commands, true);
    109 
    110   TryExecuteCommands();
    111   UpdateDMARequest();
    112   UpdateGPUIdle();
    113 
    114   m_executing_commands = was_executing_from_event;
    115   if (!was_executing_from_event)
    116     UpdateCommandTickEvent();
    117 }
    118 
    119 void GPU::EndCommand()
    120 {
    121   m_blitter_state = BlitterState::Idle;
    122   m_command_total_words = 0;
    123 }
    124 
    125 GPU::GP0CommandHandlerTable GPU::GenerateGP0CommandHandlerTable()
    126 {
    127   GP0CommandHandlerTable table = {};
    128   for (u32 i = 0; i < static_cast<u32>(table.size()); i++)
    129     table[i] = &GPU::HandleUnknownGP0Command;
    130   table[0x00] = &GPU::HandleNOPCommand;
    131   table[0x01] = &GPU::HandleClearCacheCommand;
    132   table[0x02] = &GPU::HandleFillRectangleCommand;
    133   table[0x03] = &GPU::HandleNOPCommand;
    134   for (u32 i = 0x04; i <= 0x1E; i++)
    135     table[i] = &GPU::HandleNOPCommand;
    136   table[0x1F] = &GPU::HandleInterruptRequestCommand;
    137   for (u32 i = 0x20; i <= 0x7F; i++)
    138   {
    139     const GPURenderCommand rc{i << 24};
    140     switch (rc.primitive)
    141     {
    142       case GPUPrimitive::Polygon:
    143         table[i] = &GPU::HandleRenderPolygonCommand;
    144         break;
    145       case GPUPrimitive::Line:
    146         table[i] = rc.polyline ? &GPU::HandleRenderPolyLineCommand : &GPU::HandleRenderLineCommand;
    147         break;
    148       case GPUPrimitive::Rectangle:
    149         table[i] = &GPU::HandleRenderRectangleCommand;
    150         break;
    151       default:
    152         table[i] = &GPU::HandleUnknownGP0Command;
    153         break;
    154     }
    155   }
    156   table[0xE0] = &GPU::HandleNOPCommand;
    157   table[0xE1] = &GPU::HandleSetDrawModeCommand;
    158   table[0xE2] = &GPU::HandleSetTextureWindowCommand;
    159   table[0xE3] = &GPU::HandleSetDrawingAreaTopLeftCommand;
    160   table[0xE4] = &GPU::HandleSetDrawingAreaBottomRightCommand;
    161   table[0xE5] = &GPU::HandleSetDrawingOffsetCommand;
    162   table[0xE6] = &GPU::HandleSetMaskBitCommand;
    163   for (u32 i = 0xE7; i <= 0xEF; i++)
    164     table[i] = &GPU::HandleNOPCommand;
    165   for (u32 i = 0x80; i <= 0x9F; i++)
    166     table[i] = &GPU::HandleCopyRectangleVRAMToVRAMCommand;
    167   for (u32 i = 0xA0; i <= 0xBF; i++)
    168     table[i] = &GPU::HandleCopyRectangleCPUToVRAMCommand;
    169   for (u32 i = 0xC0; i <= 0xDF; i++)
    170     table[i] = &GPU::HandleCopyRectangleVRAMToCPUCommand;
    171 
    172   table[0xFF] = &GPU::HandleNOPCommand;
    173 
    174   return table;
    175 }
    176 
    177 bool GPU::HandleUnknownGP0Command()
    178 {
    179   const u32 command = FifoPeek() >> 24;
    180   ERROR_LOG("Unimplemented GP0 command 0x{:02X}", command);
    181 
    182   SmallString dump;
    183   for (u32 i = 0; i < m_fifo.GetSize(); i++)
    184     dump.append_format("{}{:08X}", (i > 0) ? " " : "", FifoPeek(i));
    185   ERROR_LOG("FIFO: {}", dump);
    186 
    187   m_fifo.RemoveOne();
    188   EndCommand();
    189   return true;
    190 }
    191 
    192 bool GPU::HandleNOPCommand()
    193 {
    194   m_fifo.RemoveOne();
    195   EndCommand();
    196   return true;
    197 }
    198 
    199 bool GPU::HandleClearCacheCommand()
    200 {
    201   DEBUG_LOG("GP0 clear cache");
    202   m_draw_mode.SetTexturePageChanged();
    203   InvalidateCLUT();
    204   m_fifo.RemoveOne();
    205   AddCommandTicks(1);
    206   EndCommand();
    207   return true;
    208 }
    209 
    210 bool GPU::HandleInterruptRequestCommand()
    211 {
    212   DEBUG_LOG("GP0 interrupt request");
    213 
    214   m_GPUSTAT.interrupt_request = true;
    215   InterruptController::SetLineState(InterruptController::IRQ::GPU, true);
    216 
    217   m_fifo.RemoveOne();
    218   AddCommandTicks(1);
    219   EndCommand();
    220   return true;
    221 }
    222 
    223 bool GPU::HandleSetDrawModeCommand()
    224 {
    225   const u32 param = FifoPop() & 0x00FFFFFFu;
    226   DEBUG_LOG("Set draw mode {:08X}", param);
    227   SetDrawMode(Truncate16(param));
    228   AddCommandTicks(1);
    229   EndCommand();
    230   return true;
    231 }
    232 
    233 bool GPU::HandleSetTextureWindowCommand()
    234 {
    235   const u32 param = FifoPop() & 0x00FFFFFFu;
    236   SetTextureWindow(param);
    237   AddCommandTicks(1);
    238   EndCommand();
    239   return true;
    240 }
    241 
    242 bool GPU::HandleSetDrawingAreaTopLeftCommand()
    243 {
    244   const u32 param = FifoPop() & 0x00FFFFFFu;
    245   const u32 left = param & DRAWING_AREA_COORD_MASK;
    246   const u32 top = (param >> 10) & DRAWING_AREA_COORD_MASK;
    247   DEBUG_LOG("Set drawing area top-left: ({}, {})", left, top);
    248   if (m_drawing_area.left != left || m_drawing_area.top != top)
    249   {
    250     FlushRender();
    251 
    252     m_drawing_area.left = left;
    253     m_drawing_area.top = top;
    254     m_drawing_area_changed = true;
    255     SetClampedDrawingArea();
    256   }
    257 
    258   AddCommandTicks(1);
    259   EndCommand();
    260   return true;
    261 }
    262 
    263 bool GPU::HandleSetDrawingAreaBottomRightCommand()
    264 {
    265   const u32 param = FifoPop() & 0x00FFFFFFu;
    266 
    267   const u32 right = param & DRAWING_AREA_COORD_MASK;
    268   const u32 bottom = (param >> 10) & DRAWING_AREA_COORD_MASK;
    269   DEBUG_LOG("Set drawing area bottom-right: ({}, {})", m_drawing_area.right, m_drawing_area.bottom);
    270   if (m_drawing_area.right != right || m_drawing_area.bottom != bottom)
    271   {
    272     FlushRender();
    273 
    274     m_drawing_area.right = right;
    275     m_drawing_area.bottom = bottom;
    276     m_drawing_area_changed = true;
    277     SetClampedDrawingArea();
    278   }
    279 
    280   AddCommandTicks(1);
    281   EndCommand();
    282   return true;
    283 }
    284 
    285 bool GPU::HandleSetDrawingOffsetCommand()
    286 {
    287   const u32 param = FifoPop() & 0x00FFFFFFu;
    288   const s32 x = SignExtendN<11, s32>(param & 0x7FFu);
    289   const s32 y = SignExtendN<11, s32>((param >> 11) & 0x7FFu);
    290   DEBUG_LOG("Set drawing offset ({}, {})", m_drawing_offset.x, m_drawing_offset.y);
    291   if (m_drawing_offset.x != x || m_drawing_offset.y != y)
    292   {
    293     FlushRender();
    294 
    295     m_drawing_offset.x = x;
    296     m_drawing_offset.y = y;
    297   }
    298 
    299   AddCommandTicks(1);
    300   EndCommand();
    301   return true;
    302 }
    303 
    304 bool GPU::HandleSetMaskBitCommand()
    305 {
    306   const u32 param = FifoPop() & 0x00FFFFFFu;
    307 
    308   constexpr u32 gpustat_mask = (1 << 11) | (1 << 12);
    309   const u32 gpustat_bits = (param & 0x03) << 11;
    310   if ((m_GPUSTAT.bits & gpustat_mask) != gpustat_bits)
    311   {
    312     FlushRender();
    313     m_GPUSTAT.bits = (m_GPUSTAT.bits & ~gpustat_mask) | gpustat_bits;
    314   }
    315   DEBUG_LOG("Set mask bit {} {}", BoolToUInt32(m_GPUSTAT.set_mask_while_drawing),
    316             BoolToUInt32(m_GPUSTAT.check_mask_before_draw));
    317 
    318   AddCommandTicks(1);
    319   EndCommand();
    320   return true;
    321 }
    322 
    323 bool GPU::HandleRenderPolygonCommand()
    324 {
    325   const GPURenderCommand rc{FifoPeek(0)};
    326 
    327   // shaded vertices use the colour from the first word for the first vertex
    328   const u32 words_per_vertex = 1 + BoolToUInt32(rc.texture_enable) + BoolToUInt32(rc.shading_enable);
    329   const u32 num_vertices = rc.quad_polygon ? 4 : 3;
    330   const u32 total_words = words_per_vertex * num_vertices + BoolToUInt32(!rc.shading_enable);
    331   CHECK_COMMAND_SIZE(total_words);
    332 
    333   if (IsInterlacedRenderingEnabled() && IsCRTCScanlinePending())
    334     SynchronizeCRTC();
    335 
    336   // setup time
    337   static constexpr u16 s_setup_time[2][2][2] = {{{46, 226}, {334, 496}}, {{82, 262}, {370, 532}}};
    338   const TickCount setup_ticks = static_cast<TickCount>(ZeroExtend32(
    339     s_setup_time[BoolToUInt8(rc.quad_polygon)][BoolToUInt8(rc.shading_enable)][BoolToUInt8(rc.texture_enable)]));
    340   AddCommandTicks(setup_ticks);
    341 
    342   TRACE_LOG("Render {} {} {} {} polygon ({} verts, {} words per vert), {} setup ticks",
    343             rc.quad_polygon ? "four-point" : "three-point", rc.transparency_enable ? "semi-transparent" : "opaque",
    344             rc.texture_enable ? "textured" : "non-textured", rc.shading_enable ? "shaded" : "monochrome", num_vertices,
    345             words_per_vertex, setup_ticks);
    346 
    347   // set draw state up
    348   if (rc.texture_enable)
    349   {
    350     const u16 texpage_attribute = Truncate16((rc.shading_enable ? FifoPeek(5) : FifoPeek(4)) >> 16);
    351     SetDrawMode((texpage_attribute & GPUDrawModeReg::POLYGON_TEXPAGE_MASK) |
    352                 (m_draw_mode.mode_reg.bits & ~GPUDrawModeReg::POLYGON_TEXPAGE_MASK));
    353     SetTexturePalette(Truncate16(FifoPeek(2) >> 16));
    354     UpdateCLUTIfNeeded(m_draw_mode.mode_reg.texture_mode, m_draw_mode.palette_reg);
    355   }
    356 
    357   m_counters.num_vertices += num_vertices;
    358   m_counters.num_primitives++;
    359   m_render_command.bits = rc.bits;
    360   m_fifo.RemoveOne();
    361 
    362   DispatchRenderCommand();
    363   EndCommand();
    364   return true;
    365 }
    366 
    367 bool GPU::HandleRenderRectangleCommand()
    368 {
    369   const GPURenderCommand rc{FifoPeek(0)};
    370   const u32 total_words =
    371     2 + BoolToUInt32(rc.texture_enable) + BoolToUInt32(rc.rectangle_size == GPUDrawRectangleSize::Variable);
    372 
    373   CHECK_COMMAND_SIZE(total_words);
    374 
    375   if (IsInterlacedRenderingEnabled() && IsCRTCScanlinePending())
    376     SynchronizeCRTC();
    377 
    378   if (rc.texture_enable)
    379   {
    380     SetTexturePalette(Truncate16(FifoPeek(2) >> 16));
    381     UpdateCLUTIfNeeded(m_draw_mode.mode_reg.texture_mode, m_draw_mode.palette_reg);
    382   }
    383 
    384   const TickCount setup_ticks = 16;
    385   AddCommandTicks(setup_ticks);
    386 
    387   TRACE_LOG("Render {} {} {} rectangle ({} words), {} setup ticks",
    388             rc.transparency_enable ? "semi-transparent" : "opaque", rc.texture_enable ? "textured" : "non-textured",
    389             rc.shading_enable ? "shaded" : "monochrome", total_words, setup_ticks);
    390 
    391   m_counters.num_vertices++;
    392   m_counters.num_primitives++;
    393   m_render_command.bits = rc.bits;
    394   m_fifo.RemoveOne();
    395 
    396   DispatchRenderCommand();
    397   EndCommand();
    398   return true;
    399 }
    400 
    401 bool GPU::HandleRenderLineCommand()
    402 {
    403   const GPURenderCommand rc{FifoPeek(0)};
    404   const u32 total_words = rc.shading_enable ? 4 : 3;
    405   CHECK_COMMAND_SIZE(total_words);
    406 
    407   if (IsInterlacedRenderingEnabled() && IsCRTCScanlinePending())
    408     SynchronizeCRTC();
    409 
    410   TRACE_LOG("Render {} {} line ({} total words)", rc.transparency_enable ? "semi-transparent" : "opaque",
    411             rc.shading_enable ? "shaded" : "monochrome", total_words);
    412 
    413   m_counters.num_vertices += 2;
    414   m_counters.num_primitives++;
    415   m_render_command.bits = rc.bits;
    416   m_fifo.RemoveOne();
    417 
    418   DispatchRenderCommand();
    419   EndCommand();
    420   return true;
    421 }
    422 
    423 bool GPU::HandleRenderPolyLineCommand()
    424 {
    425   // always read the first two vertices, we test for the terminator after that
    426   const GPURenderCommand rc{FifoPeek(0)};
    427   const u32 min_words = rc.shading_enable ? 3 : 4;
    428   CHECK_COMMAND_SIZE(min_words);
    429 
    430   if (IsInterlacedRenderingEnabled() && IsCRTCScanlinePending())
    431     SynchronizeCRTC();
    432 
    433   const TickCount setup_ticks = 16;
    434   AddCommandTicks(setup_ticks);
    435 
    436   TRACE_LOG("Render {} {} poly-line, {} setup ticks", rc.transparency_enable ? "semi-transparent" : "opaque",
    437             rc.shading_enable ? "shaded" : "monochrome", setup_ticks);
    438 
    439   m_render_command.bits = rc.bits;
    440   m_fifo.RemoveOne();
    441 
    442   const u32 words_to_pop = min_words - 1;
    443   // m_blit_buffer.resize(words_to_pop);
    444   // FifoPopRange(m_blit_buffer.data(), words_to_pop);
    445   m_blit_buffer.reserve(words_to_pop);
    446   for (u32 i = 0; i < words_to_pop; i++)
    447     m_blit_buffer.push_back(Truncate32(FifoPop()));
    448 
    449   // polyline goes via a different path through the blit buffer
    450   m_blitter_state = BlitterState::DrawingPolyLine;
    451   m_command_total_words = 0;
    452   return true;
    453 }
    454 
    455 bool GPU::HandleFillRectangleCommand()
    456 {
    457   CHECK_COMMAND_SIZE(3);
    458 
    459   if (IsInterlacedRenderingEnabled() && IsCRTCScanlinePending())
    460     SynchronizeCRTC();
    461 
    462   FlushRender();
    463 
    464   const u32 color = FifoPop() & 0x00FFFFFF;
    465   const u32 dst_x = FifoPeek() & 0x3F0;
    466   const u32 dst_y = (FifoPop() >> 16) & VRAM_HEIGHT_MASK;
    467   const u32 width = ((FifoPeek() & VRAM_WIDTH_MASK) + 0xF) & ~0xF;
    468   const u32 height = (FifoPop() >> 16) & VRAM_HEIGHT_MASK;
    469 
    470   DEBUG_LOG("Fill VRAM rectangle offset=({},{}), size=({},{})", dst_x, dst_y, width, height);
    471 
    472   if (width > 0 && height > 0)
    473     FillVRAM(dst_x, dst_y, width, height, color);
    474 
    475   m_counters.num_writes++;
    476   AddCommandTicks(46 + ((width / 8) + 9) * height);
    477   EndCommand();
    478   return true;
    479 }
    480 
    481 bool GPU::HandleCopyRectangleCPUToVRAMCommand()
    482 {
    483   CHECK_COMMAND_SIZE(3);
    484   m_fifo.RemoveOne();
    485 
    486   const u32 coords = FifoPop();
    487   const u32 size = FifoPop();
    488 
    489   // Tenga Seiha does a bunch of completely-invalid VRAM writes on boot, then expects GPU idle to be set.
    490   // It's unclear what actually happens, I need to write another test, but for now, just skip these uploads.
    491   // Not setting GPU idle during the write command breaks Doom, so that's not an option.
    492   if (size == 0xFFFFFFFFu) [[unlikely]]
    493   {
    494     ERROR_LOG("Ignoring likely-invalid VRAM write to ({},{})", (coords & VRAM_WIDTH_MASK),
    495               ((coords >> 16) & VRAM_HEIGHT_MASK));
    496     return true;
    497   }
    498 
    499   const u32 dst_x = coords & VRAM_WIDTH_MASK;
    500   const u32 dst_y = (coords >> 16) & VRAM_HEIGHT_MASK;
    501   const u32 copy_width = ReplaceZero(size & VRAM_WIDTH_MASK, 0x400);
    502   const u32 copy_height = ReplaceZero((size >> 16) & VRAM_HEIGHT_MASK, 0x200);
    503   const u32 num_pixels = copy_width * copy_height;
    504   const u32 num_words = ((num_pixels + 1) / 2);
    505 
    506   DEBUG_LOG("Copy rectangle from CPU to VRAM offset=({},{}), size=({},{})", dst_x, dst_y, copy_width, copy_height);
    507 
    508   EndCommand();
    509 
    510   m_blitter_state = BlitterState::WritingVRAM;
    511   m_blit_buffer.reserve(num_words);
    512   m_blit_remaining_words = num_words;
    513   m_vram_transfer.x = Truncate16(dst_x);
    514   m_vram_transfer.y = Truncate16(dst_y);
    515   m_vram_transfer.width = Truncate16(copy_width);
    516   m_vram_transfer.height = Truncate16(copy_height);
    517   return true;
    518 }
    519 
    520 void GPU::FinishVRAMWrite()
    521 {
    522   if (IsInterlacedRenderingEnabled() && IsCRTCScanlinePending())
    523     SynchronizeCRTC();
    524 
    525   FlushRender();
    526 
    527   if (m_blit_remaining_words == 0)
    528   {
    529     if (g_settings.debugging.dump_cpu_to_vram_copies)
    530     {
    531       DumpVRAMToFile(TinyString::from_format("cpu_to_vram_copy_{}.png", s_cpu_to_vram_dump_id++), m_vram_transfer.width,
    532                      m_vram_transfer.height, sizeof(u16) * m_vram_transfer.width, m_blit_buffer.data(), true);
    533     }
    534 
    535     if (g_settings.texture_replacements.ShouldDumpVRAMWrite(m_vram_transfer.width, m_vram_transfer.height))
    536     {
    537       TextureReplacements::DumpVRAMWrite(m_vram_transfer.width, m_vram_transfer.height,
    538                                          reinterpret_cast<const u16*>(m_blit_buffer.data()));
    539     }
    540 
    541     UpdateVRAM(m_vram_transfer.x, m_vram_transfer.y, m_vram_transfer.width, m_vram_transfer.height,
    542                m_blit_buffer.data(), m_GPUSTAT.set_mask_while_drawing, m_GPUSTAT.check_mask_before_draw);
    543   }
    544   else
    545   {
    546     const u32 num_pixels = ZeroExtend32(m_vram_transfer.width) * ZeroExtend32(m_vram_transfer.height);
    547     const u32 num_words = (num_pixels + 1) / 2;
    548     const u32 transferred_words = num_words - m_blit_remaining_words;
    549     const u32 transferred_pixels = transferred_words * 2;
    550     const u32 transferred_full_rows = transferred_pixels / m_vram_transfer.width;
    551     const u32 transferred_width_last_row = transferred_pixels % m_vram_transfer.width;
    552 
    553     WARNING_LOG("Partial VRAM write - transfer finished with {} of {} words remaining ({} full rows, {} last row)",
    554                 m_blit_remaining_words, num_words, transferred_full_rows, transferred_width_last_row);
    555 
    556     const u8* blit_ptr = reinterpret_cast<const u8*>(m_blit_buffer.data());
    557     if (transferred_full_rows > 0)
    558     {
    559       UpdateVRAM(m_vram_transfer.x, m_vram_transfer.y, m_vram_transfer.width, transferred_full_rows, blit_ptr,
    560                  m_GPUSTAT.set_mask_while_drawing, m_GPUSTAT.check_mask_before_draw);
    561       blit_ptr += (ZeroExtend32(m_vram_transfer.width) * transferred_full_rows) * sizeof(u16);
    562     }
    563     if (transferred_width_last_row > 0)
    564     {
    565       UpdateVRAM(m_vram_transfer.x, m_vram_transfer.y + transferred_full_rows, transferred_width_last_row, 1, blit_ptr,
    566                  m_GPUSTAT.set_mask_while_drawing, m_GPUSTAT.check_mask_before_draw);
    567     }
    568   }
    569 
    570   m_counters.num_writes++;
    571   m_blit_buffer.clear();
    572   m_vram_transfer = {};
    573   m_blitter_state = BlitterState::Idle;
    574 }
    575 
    576 bool GPU::HandleCopyRectangleVRAMToCPUCommand()
    577 {
    578   CHECK_COMMAND_SIZE(3);
    579   m_fifo.RemoveOne();
    580 
    581   m_vram_transfer.x = Truncate16(FifoPeek() & VRAM_WIDTH_MASK);
    582   m_vram_transfer.y = Truncate16((FifoPop() >> 16) & VRAM_HEIGHT_MASK);
    583   m_vram_transfer.width = ((Truncate16(FifoPeek()) - 1) & VRAM_WIDTH_MASK) + 1;
    584   m_vram_transfer.height = ((Truncate16(FifoPop() >> 16) - 1) & VRAM_HEIGHT_MASK) + 1;
    585 
    586   DEBUG_LOG("Copy rectangle from VRAM to CPU offset=({},{}), size=({},{})", m_vram_transfer.x, m_vram_transfer.y,
    587             m_vram_transfer.width, m_vram_transfer.height);
    588   DebugAssert(m_vram_transfer.col == 0 && m_vram_transfer.row == 0);
    589 
    590   // all rendering should be done first...
    591   FlushRender();
    592 
    593   // ensure VRAM shadow is up to date
    594   ReadVRAM(m_vram_transfer.x, m_vram_transfer.y, m_vram_transfer.width, m_vram_transfer.height);
    595 
    596   if (g_settings.debugging.dump_vram_to_cpu_copies)
    597   {
    598     DumpVRAMToFile(TinyString::from_format("vram_to_cpu_copy_{}.png", s_vram_to_cpu_dump_id++), m_vram_transfer.width,
    599                    m_vram_transfer.height, sizeof(u16) * VRAM_WIDTH,
    600                    &g_vram[m_vram_transfer.y * VRAM_WIDTH + m_vram_transfer.x], true);
    601   }
    602 
    603   // switch to pixel-by-pixel read state
    604   m_counters.num_reads++;
    605   m_blitter_state = BlitterState::ReadingVRAM;
    606   m_command_total_words = 0;
    607   return true;
    608 }
    609 
    610 bool GPU::HandleCopyRectangleVRAMToVRAMCommand()
    611 {
    612   CHECK_COMMAND_SIZE(4);
    613   m_fifo.RemoveOne();
    614 
    615   const u32 src_x = FifoPeek() & VRAM_WIDTH_MASK;
    616   const u32 src_y = (FifoPop() >> 16) & VRAM_HEIGHT_MASK;
    617   const u32 dst_x = FifoPeek() & VRAM_WIDTH_MASK;
    618   const u32 dst_y = (FifoPop() >> 16) & VRAM_HEIGHT_MASK;
    619   const u32 width = ReplaceZero(FifoPeek() & VRAM_WIDTH_MASK, 0x400);
    620   const u32 height = ReplaceZero((FifoPop() >> 16) & VRAM_HEIGHT_MASK, 0x200);
    621 
    622   DEBUG_LOG("Copy rectangle from VRAM to VRAM src=({},{}), dst=({},{}), size=({},{})", src_x, src_y, dst_x, dst_y,
    623             width, height);
    624 
    625   // Some VRAM copies aren't going to do anything. Most games seem to send a 2x2 VRAM copy at the end of a frame.
    626   const bool skip_copy =
    627     width == 0 || height == 0 || (src_x == dst_x && src_y == dst_y && !m_GPUSTAT.set_mask_while_drawing);
    628   if (!skip_copy)
    629   {
    630     m_counters.num_copies++;
    631 
    632     FlushRender();
    633     CopyVRAM(src_x, src_y, dst_x, dst_y, width, height);
    634   }
    635 
    636   AddCommandTicks(width * height * 2);
    637   EndCommand();
    638   return true;
    639 }