duckstation

duckstation, but archived from the revision just before upstream changed it to a proprietary software project, this version is the libre one
git clone https://git.neptards.moe/u3shit/duckstation.git
Log | Files | Refs | README | LICENSE

gpu.h (24598B)


      1 // SPDX-FileCopyrightText: 2019-2024 Connor McLaughlin <stenzek@gmail.com>
      2 // SPDX-License-Identifier: (GPL-3.0 OR CC-BY-NC-ND-4.0)
      3 
      4 #pragma once
      5 #include "gpu_types.h"
      6 #include "timers.h"
      7 #include "timing_event.h"
      8 #include "types.h"
      9 
     10 #include "util/gpu_texture.h"
     11 
     12 #include "common/bitfield.h"
     13 #include "common/fifo_queue.h"
     14 #include "common/types.h"
     15 
     16 #include <algorithm>
     17 #include <array>
     18 #include <deque>
     19 #include <memory>
     20 #include <string>
     21 #include <tuple>
     22 #include <vector>
     23 
     24 class SmallStringBase;
     25 
     26 class StateWrapper;
     27 
     28 class GPUDevice;
     29 class GPUTexture;
     30 class GPUPipeline;
     31 class MediaCapture;
     32 
     33 struct Settings;
     34 
     35 namespace Threading {
     36 class Thread;
     37 }
     38 
     39 class GPU
     40 {
     41 public:
     42   enum class BlitterState : u8
     43   {
     44     Idle,
     45     ReadingVRAM,
     46     WritingVRAM,
     47     DrawingPolyLine
     48   };
     49 
     50   enum class DMADirection : u32
     51   {
     52     Off = 0,
     53     FIFO = 1,
     54     CPUtoGP0 = 2,
     55     GPUREADtoCPU = 3
     56   };
     57 
     58   enum : u32
     59   {
     60     MAX_FIFO_SIZE = 4096,
     61     DOT_TIMER_INDEX = 0,
     62     HBLANK_TIMER_INDEX = 1,
     63     MAX_RESOLUTION_SCALE = 32,
     64     DEINTERLACE_BUFFER_COUNT = 4,
     65     DRAWING_AREA_COORD_MASK = 1023,
     66   };
     67 
     68   enum : u16
     69   {
     70     NTSC_TICKS_PER_LINE = 3413,
     71     NTSC_TOTAL_LINES = 263,
     72     PAL_TICKS_PER_LINE = 3406,
     73     PAL_TOTAL_LINES = 314,
     74   };
     75 
     76   enum : u16
     77   {
     78     NTSC_HORIZONTAL_ACTIVE_START = 488,
     79     NTSC_HORIZONTAL_ACTIVE_END = 3288,
     80     NTSC_VERTICAL_ACTIVE_START = 16,
     81     NTSC_VERTICAL_ACTIVE_END = 256,
     82     PAL_HORIZONTAL_ACTIVE_START = 487,
     83     PAL_HORIZONTAL_ACTIVE_END = 3282,
     84     PAL_VERTICAL_ACTIVE_START = 20,
     85     PAL_VERTICAL_ACTIVE_END = 308,
     86   };
     87 
     88   // Base class constructor.
     89   GPU();
     90   virtual ~GPU();
     91 
     92   virtual const Threading::Thread* GetSWThread() const = 0;
     93   virtual bool IsHardwareRenderer() const = 0;
     94 
     95   virtual bool Initialize();
     96   virtual void Reset(bool clear_vram);
     97   virtual bool DoState(StateWrapper& sw, GPUTexture** save_to_texture, bool update_display);
     98 
     99   // Graphics API state reset/restore - call when drawing the UI etc.
    100   // TODO: replace with "invalidate cached state"
    101   virtual void RestoreDeviceContext();
    102 
    103   // Render statistics debug window.
    104   void DrawDebugStateWindow();
    105   void GetStatsString(SmallStringBase& str);
    106   void GetMemoryStatsString(SmallStringBase& str);
    107   void ResetStatistics();
    108   void UpdateStatistics(u32 frame_count);
    109 
    110   void CPUClockChanged();
    111 
    112   // MMIO access
    113   u32 ReadRegister(u32 offset);
    114   void WriteRegister(u32 offset, u32 value);
    115 
    116   // DMA access
    117   void DMARead(u32* words, u32 word_count);
    118 
    119   ALWAYS_INLINE bool BeginDMAWrite() const
    120   {
    121     return (m_GPUSTAT.dma_direction == DMADirection::CPUtoGP0 || m_GPUSTAT.dma_direction == DMADirection::FIFO);
    122   }
    123   ALWAYS_INLINE void DMAWrite(u32 address, u32 value)
    124   {
    125     m_fifo.Push((ZeroExtend64(address) << 32) | ZeroExtend64(value));
    126   }
    127   void EndDMAWrite();
    128 
    129   /// Returns true if no data is being sent from VRAM to the DAC or that no portion of VRAM would be visible on screen.
    130   ALWAYS_INLINE bool IsDisplayDisabled() const
    131   {
    132     return m_GPUSTAT.display_disable || m_crtc_state.display_vram_width == 0 || m_crtc_state.display_vram_height == 0;
    133   }
    134 
    135   /// Returns true if scanout should be interlaced.
    136   ALWAYS_INLINE bool IsInterlacedDisplayEnabled() const
    137   {
    138     return (!m_force_progressive_scan && m_GPUSTAT.vertical_interlace);
    139   }
    140 
    141   /// Returns true if interlaced rendering is enabled and force progressive scan is disabled.
    142   ALWAYS_INLINE bool IsInterlacedRenderingEnabled() const
    143   {
    144     return (!m_force_progressive_scan && m_GPUSTAT.SkipDrawingToActiveField());
    145   }
    146 
    147   /// Returns true if we're in PAL mode, otherwise false if NTSC.
    148   ALWAYS_INLINE bool IsInPALMode() const { return m_GPUSTAT.pal_mode; }
    149 
    150   /// Returns the number of pending GPU ticks.
    151   TickCount GetPendingCRTCTicks() const;
    152   TickCount GetPendingCommandTicks() const;
    153 
    154   /// Returns true if enough ticks have passed for the raster to be on the next line.
    155   bool IsCRTCScanlinePending() const;
    156 
    157   /// Returns true if a raster scanline or command execution is pending.
    158   bool IsCommandCompletionPending() const;
    159 
    160   /// Synchronizes the CRTC, updating the hblank timer.
    161   void SynchronizeCRTC();
    162 
    163   /// Recompile shaders/recreate framebuffers when needed.
    164   virtual void UpdateSettings(const Settings& old_settings);
    165 
    166   /// Updates the resolution scale when it's set to automatic.
    167   virtual void UpdateResolutionScale();
    168 
    169   /// Returns the effective display resolution of the GPU.
    170   virtual std::tuple<u32, u32> GetEffectiveDisplayResolution(bool scaled = true);
    171 
    172   /// Returns the full display resolution of the GPU, including padding.
    173   virtual std::tuple<u32, u32> GetFullDisplayResolution(bool scaled = true);
    174 
    175   float ComputeHorizontalFrequency() const;
    176   float ComputeVerticalFrequency() const;
    177   float ComputeDisplayAspectRatio() const;
    178 
    179   static std::unique_ptr<GPU> CreateHardwareRenderer();
    180   static std::unique_ptr<GPU> CreateSoftwareRenderer();
    181 
    182   // Converts window coordinates into horizontal ticks and scanlines. Returns false if out of range. Used for lightguns.
    183   void ConvertScreenCoordinatesToDisplayCoordinates(float window_x, float window_y, float* display_x,
    184                                                     float* display_y) const;
    185   bool ConvertDisplayCoordinatesToBeamTicksAndLines(float display_x, float display_y, float x_scale, u32* out_tick,
    186                                                     u32* out_line) const;
    187 
    188   // Returns the current beam position.
    189   void GetBeamPosition(u32* out_ticks, u32* out_line);
    190 
    191   // Returns the number of system clock ticks until the specified tick/line.
    192   TickCount GetSystemTicksUntilTicksAndLine(u32 ticks, u32 line);
    193 
    194   // Returns the number of visible lines.
    195   ALWAYS_INLINE u16 GetCRTCActiveStartLine() const { return m_crtc_state.vertical_display_start; }
    196   ALWAYS_INLINE u16 GetCRTCActiveEndLine() const { return m_crtc_state.vertical_display_end; }
    197 
    198   // Returns the video clock frequency.
    199   TickCount GetCRTCFrequency() const;
    200   ALWAYS_INLINE u16 GetCRTCDotClockDivider() const { return m_crtc_state.dot_clock_divider; }
    201   ALWAYS_INLINE s32 GetCRTCDisplayWidth() const { return m_crtc_state.display_width; }
    202   ALWAYS_INLINE s32 GetCRTCDisplayHeight() const { return m_crtc_state.display_height; }
    203 
    204   // Ticks for hblank/vblank.
    205   void CRTCTickEvent(TickCount ticks);
    206   void CommandTickEvent(TickCount ticks);
    207 
    208   // Dumps raw VRAM to a file.
    209   bool DumpVRAMToFile(const char* filename);
    210 
    211   // Ensures all buffered vertices are drawn.
    212   virtual void FlushRender() = 0;
    213 
    214   /// Helper function for computing the draw rectangle in a larger window.
    215   void CalculateDrawRect(s32 window_width, s32 window_height, bool apply_rotation, bool apply_aspect_ratio,
    216                          GSVector4i* display_rect, GSVector4i* draw_rect) const;
    217 
    218   /// Helper function for computing screenshot bounds.
    219   void CalculateScreenshotSize(DisplayScreenshotMode mode, u32* width, u32* height, GSVector4i* display_rect,
    220                                GSVector4i* draw_rect) const;
    221 
    222   /// Helper function to save current display texture to PNG.
    223   bool WriteDisplayTextureToFile(std::string filename, bool compress_on_thread = false);
    224 
    225   /// Renders the display, optionally with postprocessing to the specified image.
    226   bool RenderScreenshotToBuffer(u32 width, u32 height, const GSVector4i display_rect, const GSVector4i draw_rect,
    227                                 bool postfx, std::vector<u32>* out_pixels, u32* out_stride,
    228                                 GPUTexture::Format* out_format);
    229 
    230   /// Helper function to save screenshot to PNG.
    231   bool RenderScreenshotToFile(std::string filename, DisplayScreenshotMode mode, u8 quality, bool compress_on_thread,
    232                               bool show_osd_message);
    233 
    234   /// Draws the current display texture, with any post-processing.
    235   bool PresentDisplay();
    236 
    237   /// Sends the current frame to media capture.
    238   bool SendDisplayToMediaCapture(MediaCapture* cap);
    239 
    240   /// Reads the CLUT from the specified coordinates, accounting for wrap-around.
    241   static void ReadCLUT(u16* dest, GPUTexturePaletteReg reg, bool clut_is_8bit);
    242 
    243 protected:
    244   TickCount CRTCTicksToSystemTicks(TickCount crtc_ticks, TickCount fractional_ticks) const;
    245   TickCount SystemTicksToCRTCTicks(TickCount sysclk_ticks, TickCount* fractional_ticks) const;
    246 
    247   // The GPU internally appears to run at 2x the system clock.
    248   ALWAYS_INLINE static constexpr TickCount GPUTicksToSystemTicks(TickCount gpu_ticks)
    249   {
    250     return std::max<TickCount>((gpu_ticks + 1) >> 1, 1);
    251   }
    252   ALWAYS_INLINE static constexpr TickCount SystemTicksToGPUTicks(TickCount sysclk_ticks) { return sysclk_ticks << 1; }
    253 
    254   static constexpr std::tuple<u8, u8> UnpackTexcoord(u16 texcoord)
    255   {
    256     return std::make_tuple(static_cast<u8>(texcoord), static_cast<u8>(texcoord >> 8));
    257   }
    258 
    259   static constexpr std::tuple<u8, u8, u8> UnpackColorRGB24(u32 rgb24)
    260   {
    261     return std::make_tuple(static_cast<u8>(rgb24), static_cast<u8>(rgb24 >> 8), static_cast<u8>(rgb24 >> 16));
    262   }
    263 
    264   static bool DumpVRAMToFile(const char* filename, u32 width, u32 height, u32 stride, const void* buffer,
    265                              bool remove_alpha);
    266 
    267   void SoftReset();
    268   void ClearDisplay();
    269 
    270   // Sets dots per scanline
    271   void UpdateCRTCConfig();
    272   void UpdateCRTCDisplayParameters();
    273 
    274   // Update ticks for this execution slice
    275   void UpdateCRTCTickEvent();
    276   void UpdateCommandTickEvent();
    277 
    278   // Updates dynamic bits in GPUSTAT (ready to send VRAM/ready to receive DMA)
    279   void UpdateDMARequest();
    280   void UpdateGPUIdle();
    281 
    282   /// Returns 0 if the currently-displayed field is on odd lines (1,3,5,...) or 1 if even (2,4,6,...).
    283   ALWAYS_INLINE u32 GetInterlacedDisplayField() const { return ZeroExtend32(m_crtc_state.interlaced_field); }
    284 
    285   /// Returns 0 if the currently-displayed field is on an even line in VRAM, otherwise 1.
    286   ALWAYS_INLINE u32 GetActiveLineLSB() const { return ZeroExtend32(m_crtc_state.active_line_lsb); }
    287 
    288   /// Updates drawing area that's suitablef or clamping.
    289   void SetClampedDrawingArea();
    290 
    291   /// Sets/decodes GP0(E1h) (set draw mode).
    292   void SetDrawMode(u16 bits);
    293 
    294   /// Sets/decodes polygon/rectangle texture palette value.
    295   void SetTexturePalette(u16 bits);
    296 
    297   /// Sets/decodes texture window bits.
    298   void SetTextureWindow(u32 value);
    299 
    300   u32 ReadGPUREAD();
    301   void FinishVRAMWrite();
    302 
    303   /// Returns the number of vertices in the buffered poly-line.
    304   ALWAYS_INLINE u32 GetPolyLineVertexCount() const
    305   {
    306     return (static_cast<u32>(m_blit_buffer.size()) + BoolToUInt32(m_render_command.shading_enable)) >>
    307            BoolToUInt8(m_render_command.shading_enable);
    308   }
    309 
    310   void AddCommandTicks(TickCount ticks);
    311 
    312   void WriteGP1(u32 value);
    313   void EndCommand();
    314   void ExecuteCommands();
    315   void TryExecuteCommands();
    316   void HandleGetGPUInfoCommand(u32 value);
    317   void UpdateCLUTIfNeeded(GPUTextureMode texmode, GPUTexturePaletteReg clut);
    318   void InvalidateCLUT();
    319   bool IsCLUTValid() const;
    320 
    321   // Rendering in the backend
    322   virtual void ReadVRAM(u32 x, u32 y, u32 width, u32 height);
    323   virtual void FillVRAM(u32 x, u32 y, u32 width, u32 height, u32 color);
    324   virtual void UpdateVRAM(u32 x, u32 y, u32 width, u32 height, const void* data, bool set_mask, bool check_mask);
    325   virtual void CopyVRAM(u32 src_x, u32 src_y, u32 dst_x, u32 dst_y, u32 width, u32 height);
    326   virtual void DispatchRenderCommand() = 0;
    327   virtual void UpdateCLUT(GPUTexturePaletteReg reg, bool clut_is_8bit) = 0;
    328   virtual void UpdateDisplay() = 0;
    329   virtual void DrawRendererStats();
    330   virtual void OnBufferSwapped();
    331 
    332   ALWAYS_INLINE_RELEASE void AddDrawTriangleTicks(GSVector2i v1, GSVector2i v2, GSVector2i v3, bool shaded,
    333                                                   bool textured, bool semitransparent)
    334   {
    335     // This will not produce the correct results for triangles which are partially outside the clip area.
    336     // However, usually it'll undershoot not overshoot. If we wanted to make this more accurate, we'd need to intersect
    337     // the edges with the clip rectangle.
    338     // TODO: Coordinates are exclusive, so off by one here...
    339     const GSVector2i clamp_min = GSVector2i::load(&m_clamped_drawing_area.x);
    340     const GSVector2i clamp_max = GSVector2i::load(&m_clamped_drawing_area.z);
    341     v1 = v1.sat_i32(clamp_min, clamp_max);
    342     v2 = v2.sat_i32(clamp_min, clamp_max);
    343     v3 = v3.sat_i32(clamp_min, clamp_max);
    344 
    345     TickCount pixels =
    346       std::abs((v1.x * v2.y + v2.x * v3.y + v3.x * v1.y - v1.x * v3.y - v2.x * v1.y - v3.x * v2.y) / 2);
    347     if (textured)
    348       pixels += pixels;
    349     if (semitransparent || m_GPUSTAT.check_mask_before_draw)
    350       pixels += (pixels + 1) / 2;
    351     if (m_GPUSTAT.SkipDrawingToActiveField())
    352       pixels /= 2;
    353 
    354     AddCommandTicks(pixels);
    355   }
    356   ALWAYS_INLINE_RELEASE void AddDrawRectangleTicks(const GSVector4i clamped_rect, bool textured, bool semitransparent)
    357   {
    358     u32 drawn_width = clamped_rect.width();
    359     u32 drawn_height = clamped_rect.height();
    360 
    361     u32 ticks_per_row = drawn_width;
    362     if (textured)
    363     {
    364       switch (m_draw_mode.mode_reg.texture_mode)
    365       {
    366         case GPUTextureMode::Palette4Bit:
    367           ticks_per_row += drawn_width;
    368           break;
    369 
    370         case GPUTextureMode::Palette8Bit:
    371         {
    372           // Texture cache reload every 2 pixels, reads in 8 bytes (assuming 4x2). Cache only reloads if the
    373           // draw width is greater than 32, otherwise the cache hits between rows.
    374           if (drawn_width >= 32)
    375             ticks_per_row += (drawn_width / 4) * 8;
    376           else
    377             ticks_per_row += drawn_width;
    378         }
    379         break;
    380 
    381         case GPUTextureMode::Direct16Bit:
    382         case GPUTextureMode::Reserved_Direct16Bit:
    383         {
    384           // Same as above, except with 2x2 blocks instead of 4x2.
    385           if (drawn_width >= 32)
    386             ticks_per_row += (drawn_width / 2) * 8;
    387           else
    388             ticks_per_row += drawn_width;
    389         }
    390         break;
    391 
    392           DefaultCaseIsUnreachable()
    393       }
    394     }
    395 
    396     if (semitransparent || m_GPUSTAT.check_mask_before_draw)
    397       ticks_per_row += (drawn_width + 1u) / 2u;
    398     if (m_GPUSTAT.SkipDrawingToActiveField())
    399       drawn_height = std::max<u32>(drawn_height / 2, 1u);
    400 
    401     AddCommandTicks(ticks_per_row * drawn_height);
    402   }
    403   ALWAYS_INLINE_RELEASE void AddDrawLineTicks(const GSVector4i clamped_rect, bool shaded)
    404   {
    405     u32 drawn_width = clamped_rect.width();
    406     u32 drawn_height = clamped_rect.height();
    407 
    408     if (m_GPUSTAT.SkipDrawingToActiveField())
    409       drawn_height = std::max<u32>(drawn_height / 2, 1u);
    410 
    411     AddCommandTicks(std::max(drawn_width, drawn_height));
    412   }
    413 
    414   union GPUSTAT
    415   {
    416     u32 bits;
    417     BitField<u32, u8, 0, 4> texture_page_x_base;
    418     BitField<u32, u8, 4, 1> texture_page_y_base;
    419     BitField<u32, GPUTransparencyMode, 5, 2> semi_transparency_mode;
    420     BitField<u32, GPUTextureMode, 7, 2> texture_color_mode;
    421     BitField<u32, bool, 9, 1> dither_enable;
    422     BitField<u32, bool, 10, 1> draw_to_displayed_field;
    423     BitField<u32, bool, 11, 1> set_mask_while_drawing;
    424     BitField<u32, bool, 12, 1> check_mask_before_draw;
    425     BitField<u32, u8, 13, 1> interlaced_field;
    426     BitField<u32, bool, 14, 1> reverse_flag;
    427     BitField<u32, bool, 15, 1> texture_disable;
    428     BitField<u32, u8, 16, 1> horizontal_resolution_2;
    429     BitField<u32, u8, 17, 2> horizontal_resolution_1;
    430     BitField<u32, bool, 19, 1> vertical_resolution;
    431     BitField<u32, bool, 20, 1> pal_mode;
    432     BitField<u32, bool, 21, 1> display_area_color_depth_24;
    433     BitField<u32, bool, 22, 1> vertical_interlace;
    434     BitField<u32, bool, 23, 1> display_disable;
    435     BitField<u32, bool, 24, 1> interrupt_request;
    436     BitField<u32, bool, 25, 1> dma_data_request;
    437     BitField<u32, bool, 26, 1> gpu_idle;
    438     BitField<u32, bool, 27, 1> ready_to_send_vram;
    439     BitField<u32, bool, 28, 1> ready_to_recieve_dma;
    440     BitField<u32, DMADirection, 29, 2> dma_direction;
    441     BitField<u32, bool, 31, 1> display_line_lsb;
    442 
    443     ALWAYS_INLINE bool IsMaskingEnabled() const
    444     {
    445       static constexpr u32 MASK = ((1 << 11) | (1 << 12));
    446       return ((bits & MASK) != 0);
    447     }
    448     ALWAYS_INLINE bool SkipDrawingToActiveField() const
    449     {
    450       static constexpr u32 MASK = (1 << 19) | (1 << 22) | (1 << 10);
    451       static constexpr u32 ACTIVE = (1 << 19) | (1 << 22);
    452       return ((bits & MASK) == ACTIVE);
    453     }
    454     ALWAYS_INLINE bool InInterleaved480iMode() const
    455     {
    456       static constexpr u32 ACTIVE = (1 << 19) | (1 << 22);
    457       return ((bits & ACTIVE) == ACTIVE);
    458     }
    459 
    460     // During transfer/render operations, if ((dst_pixel & mask_and) == 0) { pixel = src_pixel | mask_or }
    461     ALWAYS_INLINE u16 GetMaskAND() const
    462     {
    463       // return check_mask_before_draw ? 0x8000 : 0x0000;
    464       return Truncate16((bits << 3) & 0x8000);
    465     }
    466     ALWAYS_INLINE u16 GetMaskOR() const
    467     {
    468       // return set_mask_while_drawing ? 0x8000 : 0x0000;
    469       return Truncate16((bits << 4) & 0x8000);
    470     }
    471   } m_GPUSTAT = {};
    472 
    473   struct DrawMode
    474   {
    475     static constexpr u16 PALETTE_MASK = UINT16_C(0b0111111111111111);
    476     static constexpr u32 TEXTURE_WINDOW_MASK = UINT32_C(0b11111111111111111111);
    477 
    478     // original values
    479     GPUDrawModeReg mode_reg;
    480     GPUTexturePaletteReg palette_reg; // from vertex
    481     u32 texture_window_value;
    482 
    483     // decoded values
    484     GPUTextureWindow texture_window;
    485     bool texture_x_flip;
    486     bool texture_y_flip;
    487     bool texture_page_changed;
    488     bool texture_window_changed;
    489 
    490     ALWAYS_INLINE bool IsTexturePageChanged() const { return texture_page_changed; }
    491     ALWAYS_INLINE void SetTexturePageChanged() { texture_page_changed = true; }
    492     ALWAYS_INLINE void ClearTexturePageChangedFlag() { texture_page_changed = false; }
    493 
    494     ALWAYS_INLINE bool IsTextureWindowChanged() const { return texture_window_changed; }
    495     ALWAYS_INLINE void SetTextureWindowChanged() { texture_window_changed = true; }
    496     ALWAYS_INLINE void ClearTextureWindowChangedFlag() { texture_window_changed = false; }
    497   } m_draw_mode = {};
    498 
    499   GPUDrawingArea m_drawing_area = {};
    500   GPUDrawingOffset m_drawing_offset = {};
    501   GSVector4i m_clamped_drawing_area = {};
    502 
    503   bool m_console_is_pal = false;
    504   bool m_set_texture_disable_mask = false;
    505   bool m_drawing_area_changed = false;
    506   bool m_force_progressive_scan = false;
    507   bool m_force_ntsc_timings = false;
    508 
    509   struct CRTCState
    510   {
    511     struct Regs
    512     {
    513       static constexpr u32 DISPLAY_ADDRESS_START_MASK = 0b111'11111111'11111110;
    514       static constexpr u32 HORIZONTAL_DISPLAY_RANGE_MASK = 0b11111111'11111111'11111111;
    515       static constexpr u32 VERTICAL_DISPLAY_RANGE_MASK = 0b1111'11111111'11111111;
    516 
    517       union
    518       {
    519         u32 display_address_start;
    520         BitField<u32, u16, 0, 10> X;
    521         BitField<u32, u16, 10, 9> Y;
    522       };
    523       union
    524       {
    525         u32 horizontal_display_range;
    526         BitField<u32, u16, 0, 12> X1;
    527         BitField<u32, u16, 12, 12> X2;
    528       };
    529 
    530       union
    531       {
    532         u32 vertical_display_range;
    533         BitField<u32, u16, 0, 10> Y1;
    534         BitField<u32, u16, 10, 10> Y2;
    535       };
    536     } regs;
    537 
    538     u16 dot_clock_divider;
    539 
    540     // Size of the simulated screen in pixels. Depending on crop mode, this may include overscan area.
    541     u16 display_width;
    542     u16 display_height;
    543 
    544     // Top-left corner in screen coordinates where the outputted portion of VRAM is first visible.
    545     u16 display_origin_left;
    546     u16 display_origin_top;
    547 
    548     // Rectangle in VRAM coordinates describing the area of VRAM that is visible on screen.
    549     u16 display_vram_left;
    550     u16 display_vram_top;
    551     u16 display_vram_width;
    552     u16 display_vram_height;
    553 
    554     // Visible range of the screen, in GPU ticks/lines. Clamped to lie within the active video region.
    555     u16 horizontal_visible_start;
    556     u16 horizontal_visible_end;
    557     u16 vertical_visible_start;
    558     u16 vertical_visible_end;
    559 
    560     u16 horizontal_display_start;
    561     u16 horizontal_display_end;
    562     u16 vertical_display_start;
    563     u16 vertical_display_end;
    564 
    565     u16 horizontal_active_start;
    566     u16 horizontal_active_end;
    567 
    568     u16 horizontal_total;
    569     u16 vertical_total;
    570 
    571     TickCount fractional_ticks;
    572     TickCount current_tick_in_scanline;
    573     u32 current_scanline;
    574 
    575     TickCount fractional_dot_ticks; // only used when timer0 is enabled
    576 
    577     bool in_hblank;
    578     bool in_vblank;
    579 
    580     u8 interlaced_field; // 0 = odd, 1 = even
    581     u8 interlaced_display_field;
    582     u8 active_line_lsb;
    583 
    584     ALWAYS_INLINE void UpdateHBlankFlag()
    585     {
    586       in_hblank =
    587         (current_tick_in_scanline < horizontal_active_start || current_tick_in_scanline >= horizontal_active_end);
    588     }
    589   } m_crtc_state = {};
    590 
    591   BlitterState m_blitter_state = BlitterState::Idle;
    592   u32 m_command_total_words = 0;
    593   TickCount m_pending_command_ticks = 0;
    594 
    595   /// GPUREAD value for non-VRAM-reads.
    596   u32 m_GPUREAD_latch = 0;
    597 
    598   // These are the bits from the palette register, but zero extended to 32-bit, so we can have an "invalid" value.
    599   // If an extra byte is ever not needed here for padding, the 8-bit flag could be packed into the MSB of this value.
    600   u32 m_current_clut_reg_bits = {};
    601   bool m_current_clut_is_8bit = false;
    602 
    603   /// True if currently executing/syncing.
    604   bool m_executing_commands = false;
    605 
    606   struct VRAMTransfer
    607   {
    608     u16 x;
    609     u16 y;
    610     u16 width;
    611     u16 height;
    612     u16 col;
    613     u16 row;
    614   } m_vram_transfer = {};
    615 
    616   HeapFIFOQueue<u64, MAX_FIFO_SIZE> m_fifo;
    617   std::vector<u32> m_blit_buffer;
    618   u32 m_blit_remaining_words;
    619   GPURenderCommand m_render_command{};
    620 
    621   ALWAYS_INLINE u32 FifoPop() { return Truncate32(m_fifo.Pop()); }
    622   ALWAYS_INLINE u32 FifoPeek() { return Truncate32(m_fifo.Peek()); }
    623   ALWAYS_INLINE u32 FifoPeek(u32 i) { return Truncate32(m_fifo.Peek(i)); }
    624 
    625   TickCount m_max_run_ahead = 128;
    626   u32 m_fifo_size = 128;
    627 
    628   void ClearDisplayTexture();
    629   void SetDisplayTexture(GPUTexture* texture, GPUTexture* depth_texture, s32 view_x, s32 view_y, s32 view_width,
    630                          s32 view_height);
    631 
    632   bool RenderDisplay(GPUTexture* target, const GSVector4i display_rect, const GSVector4i draw_rect, bool postfx);
    633 
    634   bool Deinterlace(u32 field, u32 line_skip);
    635   bool DeinterlaceExtractField(u32 dst_bufidx, GPUTexture* src, u32 x, u32 y, u32 width, u32 height, u32 line_skip);
    636   bool DeinterlaceSetTargetSize(u32 width, u32 height, bool preserve);
    637   void DestroyDeinterlaceTextures();
    638   bool ApplyChromaSmoothing();
    639 
    640   u32 m_current_deinterlace_buffer = 0;
    641   std::unique_ptr<GPUPipeline> m_deinterlace_pipeline;
    642   std::unique_ptr<GPUPipeline> m_deinterlace_extract_pipeline;
    643   std::array<std::unique_ptr<GPUTexture>, DEINTERLACE_BUFFER_COUNT> m_deinterlace_buffers;
    644   std::unique_ptr<GPUTexture> m_deinterlace_texture;
    645 
    646   std::unique_ptr<GPUPipeline> m_chroma_smoothing_pipeline;
    647   std::unique_ptr<GPUTexture> m_chroma_smoothing_texture;
    648 
    649   std::unique_ptr<GPUPipeline> m_display_pipeline;
    650   GPUTexture* m_display_texture = nullptr;
    651   GPUTexture* m_display_depth_buffer = nullptr;
    652   s32 m_display_texture_view_x = 0;
    653   s32 m_display_texture_view_y = 0;
    654   s32 m_display_texture_view_width = 0;
    655   s32 m_display_texture_view_height = 0;
    656 
    657   struct Counters
    658   {
    659     u32 num_reads;
    660     u32 num_writes;
    661     u32 num_copies;
    662     u32 num_vertices;
    663     u32 num_primitives;
    664 
    665     // u32 num_read_texture_updates;
    666     // u32 num_ubo_updates;
    667   };
    668 
    669   struct Stats : Counters
    670   {
    671     size_t host_buffer_streamed;
    672     u32 host_num_draws;
    673     u32 host_num_barriers;
    674     u32 host_num_render_passes;
    675     u32 host_num_copies;
    676     u32 host_num_downloads;
    677     u32 host_num_uploads;
    678   };
    679 
    680   Counters m_counters = {};
    681   Stats m_stats = {};
    682 
    683 private:
    684   bool CompileDisplayPipelines(bool display, bool deinterlace, bool chroma_smoothing);
    685 
    686   using GP0CommandHandler = bool (GPU::*)();
    687   using GP0CommandHandlerTable = std::array<GP0CommandHandler, 256>;
    688   static GP0CommandHandlerTable GenerateGP0CommandHandlerTable();
    689 
    690   // Rendering commands, returns false if not enough data is provided
    691   bool HandleUnknownGP0Command();
    692   bool HandleNOPCommand();
    693   bool HandleClearCacheCommand();
    694   bool HandleInterruptRequestCommand();
    695   bool HandleSetDrawModeCommand();
    696   bool HandleSetTextureWindowCommand();
    697   bool HandleSetDrawingAreaTopLeftCommand();
    698   bool HandleSetDrawingAreaBottomRightCommand();
    699   bool HandleSetDrawingOffsetCommand();
    700   bool HandleSetMaskBitCommand();
    701   bool HandleRenderPolygonCommand();
    702   bool HandleRenderRectangleCommand();
    703   bool HandleRenderLineCommand();
    704   bool HandleRenderPolyLineCommand();
    705   bool HandleFillRectangleCommand();
    706   bool HandleCopyRectangleCPUToVRAMCommand();
    707   bool HandleCopyRectangleVRAMToCPUCommand();
    708   bool HandleCopyRectangleVRAMToVRAMCommand();
    709 
    710   static const GP0CommandHandlerTable s_GP0_command_handler_table;
    711 };
    712 
    713 extern std::unique_ptr<GPU> g_gpu;
    714 extern u16 g_vram[VRAM_SIZE / sizeof(u16)];
    715 extern u16 g_gpu_clut[GPU_CLUT_SIZE];