duckstation

duckstation, but archived from the revision just before upstream changed it to a proprietary software project, this version is the libre one
git clone https://git.neptards.moe/u3shit/duckstation.git
Log | Files | Refs | README | LICENSE

gpu_hw.h (11940B)


      1 // SPDX-FileCopyrightText: 2019-2024 Connor McLaughlin <stenzek@gmail.com>
      2 // SPDX-License-Identifier: (GPL-3.0 OR CC-BY-NC-ND-4.0)
      3 
      4 #pragma once
      5 
      6 #include "gpu.h"
      7 #include "texture_replacements.h"
      8 
      9 #include "util/gpu_device.h"
     10 
     11 #include "common/dimensional_array.h"
     12 #include "common/gsvector.h"
     13 
     14 #include <limits>
     15 #include <tuple>
     16 #include <utility>
     17 
     18 class Error;
     19 
     20 class GPU_SW_Backend;
     21 struct GPUBackendCommand;
     22 struct GPUBackendDrawCommand;
     23 
     24 class GPU_HW final : public GPU
     25 {
     26 public:
     27   enum class BatchRenderMode : u8
     28   {
     29     TransparencyDisabled,
     30     TransparentAndOpaque,
     31     OnlyOpaque,
     32     OnlyTransparent,
     33     ShaderBlend
     34   };
     35 
     36   enum class BatchTextureMode : u8
     37   {
     38     Palette4Bit,
     39     Palette8Bit,
     40     Direct16Bit,
     41     Disabled,
     42 
     43     SpritePalette4Bit,
     44     SpritePalette8Bit,
     45     SpriteDirect16Bit,
     46 
     47     MaxCount,
     48 
     49     SpriteStart = SpritePalette4Bit,
     50   };
     51   static_assert(static_cast<u8>(BatchTextureMode::Palette4Bit) == static_cast<u8>(GPUTextureMode::Palette4Bit) &&
     52                 static_cast<u8>(BatchTextureMode::Palette8Bit) == static_cast<u8>(GPUTextureMode::Palette8Bit) &&
     53                 static_cast<u8>(BatchTextureMode::Direct16Bit) == static_cast<u8>(GPUTextureMode::Direct16Bit));
     54 
     55   GPU_HW();
     56   ~GPU_HW() override;
     57 
     58   const Threading::Thread* GetSWThread() const override;
     59   bool IsHardwareRenderer() const override;
     60 
     61   bool Initialize() override;
     62   void Reset(bool clear_vram) override;
     63   bool DoState(StateWrapper& sw, GPUTexture** host_texture, bool update_display) override;
     64 
     65   void RestoreDeviceContext() override;
     66 
     67   void UpdateSettings(const Settings& old_settings) override;
     68   void UpdateResolutionScale() override final;
     69   std::tuple<u32, u32> GetEffectiveDisplayResolution(bool scaled = true) override;
     70   std::tuple<u32, u32> GetFullDisplayResolution(bool scaled = true) override;
     71 
     72   void UpdateDisplay() override;
     73 
     74 private:
     75   enum : u32
     76   {
     77     MAX_BATCH_VERTEX_COUNTER_IDS = 65536 - 2,
     78     MAX_VERTICES_FOR_RECTANGLE = 6 * (((MAX_PRIMITIVE_WIDTH + (TEXTURE_PAGE_WIDTH - 1)) / TEXTURE_PAGE_WIDTH) + 1u) *
     79                                  (((MAX_PRIMITIVE_HEIGHT + (TEXTURE_PAGE_HEIGHT - 1)) / TEXTURE_PAGE_HEIGHT) + 1u),
     80     NUM_TEXTURE_MODES = static_cast<u32>(BatchTextureMode::MaxCount),
     81   };
     82   enum : u8
     83   {
     84     TEXPAGE_DIRTY_DRAWN_RECT = (1 << 0),
     85     TEXPAGE_DIRTY_WRITTEN_RECT = (1 << 1),
     86   };
     87 
     88   static_assert(GPUDevice::MIN_TEXEL_BUFFER_ELEMENTS >= (VRAM_WIDTH * VRAM_HEIGHT));
     89 
     90   struct BatchVertex
     91   {
     92     float x;
     93     float y;
     94     float z;
     95     float w;
     96     u32 color;
     97     u32 texpage;
     98     u16 u; // 16-bit texcoords are needed for 256 extent rectangles
     99     u16 v;
    100     u32 uv_limits;
    101 
    102     void Set(float x_, float y_, float z_, float w_, u32 color_, u32 texpage_, u16 packed_texcoord, u32 uv_limits_);
    103     void Set(float x_, float y_, float z_, float w_, u32 color_, u32 texpage_, u16 u_, u16 v_, u32 uv_limits_);
    104     static u32 PackUVLimits(u32 min_u, u32 max_u, u32 min_v, u32 max_v);
    105     void SetUVLimits(u32 min_u, u32 max_u, u32 min_v, u32 max_v);
    106   };
    107 
    108   struct BatchConfig
    109   {
    110     BatchTextureMode texture_mode = BatchTextureMode::Disabled;
    111     GPUTransparencyMode transparency_mode = GPUTransparencyMode::Disabled;
    112     bool dithering = false;
    113     bool interlacing = false;
    114     bool set_mask_while_drawing = false;
    115     bool check_mask_before_draw = false;
    116     bool use_depth_buffer = false;
    117     bool sprite_mode = false;
    118 
    119     // Returns the render mode for this batch.
    120     BatchRenderMode GetRenderMode() const;
    121   };
    122 
    123   struct BatchUBOData
    124   {
    125     u32 u_texture_window[4]; // and_x, and_y, or_x, or_y
    126     float u_src_alpha_factor;
    127     float u_dst_alpha_factor;
    128     u32 u_interlaced_displayed_field;
    129     u32 u_set_mask_while_drawing;
    130   };
    131 
    132   struct RendererStats
    133   {
    134     u32 num_batches;
    135     u32 num_vram_read_texture_updates;
    136     u32 num_uniform_buffer_updates;
    137   };
    138 
    139   static constexpr GSVector4i VRAM_SIZE_RECT = GSVector4i::cxpr(0, 0, VRAM_WIDTH, VRAM_HEIGHT);
    140   static constexpr GSVector4i INVALID_RECT =
    141     GSVector4i::cxpr(std::numeric_limits<s32>::max(), std::numeric_limits<s32>::max(), std::numeric_limits<s32>::min(),
    142                      std::numeric_limits<s32>::min());
    143 
    144   /// Returns true if a depth buffer should be created.
    145   GPUTexture::Format GetDepthBufferFormat() const;
    146 
    147   bool CreateBuffers();
    148   void ClearFramebuffer();
    149   void DestroyBuffers();
    150 
    151   bool CompilePipelines(Error* error);
    152   void DestroyPipelines();
    153 
    154   void LoadVertices();
    155 
    156   void PrintSettingsToLog();
    157   void CheckSettings();
    158 
    159   void UpdateVRAMReadTexture(bool drawn, bool written);
    160   void UpdateDepthBufferFromMaskBit();
    161   void CopyAndClearDepthBuffer();
    162   void ClearDepthBuffer();
    163   void SetScissor();
    164   void SetVRAMRenderTarget();
    165   void DeactivateROV();
    166   void MapGPUBuffer(u32 required_vertices, u32 required_indices);
    167   void UnmapGPUBuffer(u32 used_vertices, u32 used_indices);
    168   void DrawBatchVertices(BatchRenderMode render_mode, u32 num_indices, u32 base_index, u32 base_vertex);
    169 
    170   u32 CalculateResolutionScale() const;
    171   GPUDownsampleMode GetDownsampleMode(u32 resolution_scale) const;
    172 
    173   bool IsUsingMultisampling() const;
    174   bool IsUsingDownsampling() const;
    175 
    176   void SetFullVRAMDirtyRectangle();
    177   void ClearVRAMDirtyRectangle();
    178 
    179   void AddWrittenRectangle(const GSVector4i rect);
    180   void AddDrawnRectangle(const GSVector4i rect);
    181   void AddUnclampedDrawnRectangle(const GSVector4i rect);
    182   void SetTexPageChangedOnOverlap(const GSVector4i update_rect);
    183 
    184   void CheckForTexPageOverlap(GSVector4i uv_rect);
    185 
    186   bool IsFlushed() const;
    187   void EnsureVertexBufferSpace(u32 required_vertices, u32 required_indices);
    188   void EnsureVertexBufferSpaceForCurrentCommand();
    189   void ResetBatchVertexDepth();
    190 
    191   /// Returns the value to be written to the depth buffer for the current operation for mask bit emulation.
    192   float GetCurrentNormalizedVertexDepth() const;
    193 
    194   /// Returns if the draw needs to be broken into opaque/transparent passes.
    195   bool NeedsTwoPassRendering() const;
    196 
    197   /// Returns true if the draw is going to use shader blending/framebuffer fetch.
    198   bool NeedsShaderBlending(GPUTransparencyMode transparency, BatchTextureMode texture, bool check_mask) const;
    199 
    200   void FillBackendCommandParameters(GPUBackendCommand* cmd) const;
    201   void FillDrawCommand(GPUBackendDrawCommand* cmd, GPURenderCommand rc) const;
    202   void UpdateSoftwareRenderer(bool copy_vram_from_hw);
    203 
    204   void FillVRAM(u32 x, u32 y, u32 width, u32 height, u32 color) override;
    205   void ReadVRAM(u32 x, u32 y, u32 width, u32 height) override;
    206   void UpdateVRAM(u32 x, u32 y, u32 width, u32 height, const void* data, bool set_mask, bool check_mask) override;
    207   void CopyVRAM(u32 src_x, u32 src_y, u32 dst_x, u32 dst_y, u32 width, u32 height) override;
    208   void DispatchRenderCommand() override;
    209   void UpdateCLUT(GPUTexturePaletteReg reg, bool clut_is_8bit) override;
    210   void FlushRender() override;
    211   void DrawRendererStats() override;
    212   void OnBufferSwapped() override;
    213 
    214   void UpdateVRAMOnGPU(u32 x, u32 y, u32 width, u32 height, const void* data, u32 data_pitch, bool set_mask,
    215                        bool check_mask, const GSVector4i bounds);
    216   bool BlitVRAMReplacementTexture(const TextureReplacements::ReplacementImage* tex, u32 dst_x, u32 dst_y, u32 width,
    217                                   u32 height);
    218 
    219   /// Expands a line into two triangles.
    220   void DrawLine(const GSVector4 bounds, u32 col0, u32 col1, float depth);
    221 
    222   /// Handles quads with flipped texture coordinate directions.
    223   void HandleFlippedQuadTextureCoordinates(BatchVertex* vertices);
    224   bool IsPossibleSpritePolygon(const BatchVertex* vertices) const;
    225   bool ExpandLineTriangles(BatchVertex* vertices);
    226 
    227   /// Computes polygon U/V boundaries, and for overlap with the current texture page.
    228   void ComputePolygonUVLimits(BatchVertex* vertices, u32 num_vertices);
    229 
    230   /// Sets the depth test flag for PGXP depth buffering.
    231   void SetBatchDepthBuffer(bool enabled);
    232   void CheckForDepthClear(const BatchVertex* vertices, u32 num_vertices);
    233   void SetBatchSpriteMode(bool enabled);
    234 
    235   void UpdateDownsamplingLevels();
    236 
    237   void DownsampleFramebuffer();
    238   void DownsampleFramebufferAdaptive(GPUTexture* source, u32 left, u32 top, u32 width, u32 height);
    239   void DownsampleFramebufferBoxFilter(GPUTexture* source, u32 left, u32 top, u32 width, u32 height);
    240 
    241   std::unique_ptr<GPUTexture> m_vram_texture;
    242   std::unique_ptr<GPUTexture> m_vram_depth_texture;
    243   std::unique_ptr<GPUTexture> m_vram_depth_copy_texture;
    244   std::unique_ptr<GPUTexture> m_vram_read_texture;
    245   std::unique_ptr<GPUTexture> m_vram_readback_texture;
    246   std::unique_ptr<GPUDownloadTexture> m_vram_readback_download_texture;
    247   std::unique_ptr<GPUTexture> m_vram_replacement_texture;
    248 
    249   std::unique_ptr<GPUTextureBuffer> m_vram_upload_buffer;
    250   std::unique_ptr<GPUTexture> m_vram_write_texture;
    251 
    252   std::unique_ptr<GPU_SW_Backend> m_sw_renderer;
    253 
    254   BatchVertex* m_batch_vertex_ptr = nullptr;
    255   u16* m_batch_index_ptr = nullptr;
    256   u32 m_batch_base_vertex = 0;
    257   u32 m_batch_base_index = 0;
    258   u16 m_batch_vertex_count = 0;
    259   u16 m_batch_index_count = 0;
    260   u16 m_batch_vertex_space = 0;
    261   u16 m_batch_index_space = 0;
    262   s32 m_current_depth = 0;
    263   float m_last_depth_z = 1.0f;
    264 
    265   u8 m_resolution_scale = 1;
    266   u8 m_multisamples = 1;
    267 
    268   GPUTextureFilter m_texture_filtering = GPUTextureFilter::Nearest;
    269   GPUTextureFilter m_sprite_texture_filtering = GPUTextureFilter::Nearest;
    270   GPULineDetectMode m_line_detect_mode = GPULineDetectMode::Disabled;
    271   GPUDownsampleMode m_downsample_mode = GPUDownsampleMode::Disabled;
    272   GPUWireframeMode m_wireframe_mode = GPUWireframeMode::Disabled;
    273 
    274   bool m_supports_dual_source_blend : 1 = false;
    275   bool m_supports_framebuffer_fetch : 1 = false;
    276   bool m_true_color : 1 = true;
    277   bool m_pgxp_depth_buffer : 1 = false;
    278   bool m_clamp_uvs : 1 = false;
    279   bool m_compute_uv_range : 1 = false;
    280   bool m_allow_sprite_mode : 1 = false;
    281   bool m_allow_shader_blend : 1 = false;
    282   bool m_prefer_shader_blend : 1 = false;
    283   bool m_use_rov_for_shader_blend : 1 = false;
    284   bool m_write_mask_as_depth : 1 = false;
    285   bool m_depth_was_copied : 1 = false;
    286   bool m_texture_window_active : 1 = false;
    287   bool m_rov_active : 1 = false;
    288 
    289   u8 m_texpage_dirty = 0;
    290 
    291   BatchConfig m_batch;
    292 
    293   // Changed state
    294   bool m_batch_ubo_dirty = true;
    295   BatchUBOData m_batch_ubo_data = {};
    296 
    297   // Bounding box of VRAM area that the GPU has drawn into.
    298   GSVector4i m_vram_dirty_draw_rect = INVALID_RECT;
    299   GSVector4i m_vram_dirty_write_rect = INVALID_RECT;
    300   GSVector4i m_current_uv_rect = INVALID_RECT;
    301   s32 m_current_texture_page_offset[2] = {};
    302 
    303   std::unique_ptr<GPUPipeline> m_wireframe_pipeline;
    304 
    305   // [wrapped][interlaced]
    306   DimensionalArray<std::unique_ptr<GPUPipeline>, 2, 2> m_vram_fill_pipelines{};
    307 
    308   // [depth_test]
    309   std::array<std::unique_ptr<GPUPipeline>, 2> m_vram_write_pipelines{};
    310   std::array<std::unique_ptr<GPUPipeline>, 2> m_vram_copy_pipelines{};
    311 
    312   std::unique_ptr<GPUPipeline> m_vram_readback_pipeline;
    313   std::unique_ptr<GPUPipeline> m_vram_update_depth_pipeline;
    314   std::unique_ptr<GPUPipeline> m_vram_write_replacement_pipeline;
    315 
    316   std::array<std::unique_ptr<GPUPipeline>, 3> m_vram_extract_pipeline; // [24bit, 2=depth]
    317   std::unique_ptr<GPUTexture> m_vram_extract_texture;
    318   std::unique_ptr<GPUTexture> m_vram_extract_depth_texture;
    319   std::unique_ptr<GPUPipeline> m_copy_depth_pipeline;
    320 
    321   std::unique_ptr<GPUTexture> m_downsample_texture;
    322   std::unique_ptr<GPUPipeline> m_downsample_first_pass_pipeline;
    323   std::unique_ptr<GPUPipeline> m_downsample_mid_pass_pipeline;
    324   std::unique_ptr<GPUPipeline> m_downsample_blur_pass_pipeline;
    325   std::unique_ptr<GPUPipeline> m_downsample_composite_pass_pipeline;
    326   std::unique_ptr<GPUSampler> m_downsample_lod_sampler;
    327   std::unique_ptr<GPUSampler> m_downsample_composite_sampler;
    328   u32 m_downsample_scale_or_levels = 0;
    329 
    330   // [depth_test][transparency_mode][render_mode][texture_mode][dithering][interlacing][check_mask]
    331   DimensionalArray<std::unique_ptr<GPUPipeline>, 2, 2, 2, NUM_TEXTURE_MODES, 5, 5, 2> m_batch_pipelines{};
    332 };