duckstation

duckstation, but archived from the revision just before upstream changed it to a proprietary software project, this version is the libre one
git clone https://git.neptards.moe/u3shit/duckstation.git
Log | Files | Refs | README | LICENSE

metal_device.h (15380B)


      1 // SPDX-FileCopyrightText: 2023 Connor McLaughlin <stenzek@gmail.com>
      2 // SPDX-License-Identifier: (GPL-3.0 OR CC-BY-NC-ND-4.0)
      3 
      4 #pragma once
      5 
      6 // Macro hell. These have to come first.
      7 #include <AppKit/AppKit.h>
      8 #include <Metal/Metal.h>
      9 #include <QuartzCore/QuartzCore.h>
     10 
     11 #ifndef __OBJC__
     12 #error This file needs to be compiled with Objective C++.
     13 #endif
     14 
     15 #if __has_feature(objc_arc)
     16 #error ARC should not be enabled.
     17 #endif
     18 
     19 #include "gpu_device.h"
     20 #include "metal_stream_buffer.h"
     21 #include "window_info.h"
     22 
     23 #include "common/timer.h"
     24 
     25 #include <atomic>
     26 #include <memory>
     27 #include <mutex>
     28 #include <string>
     29 #include <string_view>
     30 #include <unordered_map>
     31 #include <utility>
     32 #include <vector>
     33 
     34 class MetalDevice;
     35 class MetalPipeline;
     36 class MetalTexture;
     37 
     38 class MetalSampler final : public GPUSampler
     39 {
     40   friend MetalDevice;
     41 
     42 public:
     43   ~MetalSampler() override;
     44 
     45   ALWAYS_INLINE id<MTLSamplerState> GetSamplerState() const { return m_ss; }
     46 
     47   void SetDebugName(std::string_view name) override;
     48 
     49 private:
     50   MetalSampler(id<MTLSamplerState> ss);
     51 
     52   id<MTLSamplerState> m_ss;
     53 };
     54 
     55 class MetalShader final : public GPUShader
     56 {
     57   friend MetalDevice;
     58 
     59 public:
     60   ~MetalShader() override;
     61 
     62   ALWAYS_INLINE id<MTLLibrary> GetLibrary() const { return m_library; }
     63   ALWAYS_INLINE id<MTLFunction> GetFunction() const { return m_function; }
     64 
     65   void SetDebugName(std::string_view name) override;
     66 
     67 private:
     68   MetalShader(GPUShaderStage stage, id<MTLLibrary> library, id<MTLFunction> function);
     69 
     70   id<MTLLibrary> m_library;
     71   id<MTLFunction> m_function;
     72 };
     73 
     74 class MetalPipeline final : public GPUPipeline
     75 {
     76   friend MetalDevice;
     77 
     78 public:
     79   ~MetalPipeline() override;
     80 
     81   ALWAYS_INLINE id<MTLRenderPipelineState> GetPipelineState() const { return m_pipeline; }
     82   ALWAYS_INLINE id<MTLDepthStencilState> GetDepthState() const { return m_depth; }
     83   ALWAYS_INLINE MTLCullMode GetCullMode() const { return m_cull_mode; }
     84   ALWAYS_INLINE MTLPrimitiveType GetPrimitive() const { return m_primitive; }
     85 
     86   void SetDebugName(std::string_view name) override;
     87 
     88 private:
     89   MetalPipeline(id<MTLRenderPipelineState> pipeline, id<MTLDepthStencilState> depth, MTLCullMode cull_mode,
     90                 MTLPrimitiveType primitive);
     91 
     92   id<MTLRenderPipelineState> m_pipeline;
     93   id<MTLDepthStencilState> m_depth;
     94   MTLCullMode m_cull_mode;
     95   MTLPrimitiveType m_primitive;
     96 };
     97 
     98 class MetalTexture final : public GPUTexture
     99 {
    100   friend MetalDevice;
    101 
    102 public:
    103   ~MetalTexture();
    104 
    105   ALWAYS_INLINE id<MTLTexture> GetMTLTexture() const { return m_texture; }
    106 
    107   bool Create(id<MTLDevice> device, u32 width, u32 height, u32 layers, u32 levels, u32 samples, Type type,
    108               Format format, const void* initial_data = nullptr, u32 initial_data_stride = 0);
    109 
    110   bool Update(u32 x, u32 y, u32 width, u32 height, const void* data, u32 pitch, u32 layer = 0, u32 level = 0) override;
    111   bool Map(void** map, u32* map_stride, u32 x, u32 y, u32 width, u32 height, u32 layer = 0, u32 level = 0) override;
    112   void Unmap() override;
    113 
    114   void MakeReadyForSampling() override;
    115 
    116   void SetDebugName(std::string_view name) override;
    117 
    118   // Call when the texture is bound to the pipeline, or read from in a copy.
    119   ALWAYS_INLINE void SetUseFenceCounter(u64 counter) { m_use_fence_counter = counter; }
    120 
    121 private:
    122   MetalTexture(id<MTLTexture> texture, u16 width, u16 height, u8 layers, u8 levels, u8 samples, Type type,
    123                Format format);
    124 
    125   id<MTLTexture> m_texture;
    126 
    127   // Contains the fence counter when the texture was last used.
    128   // When this matches the current fence counter, the texture was used this command buffer.
    129   u64 m_use_fence_counter = 0;
    130 
    131   u16 m_map_x = 0;
    132   u16 m_map_y = 0;
    133   u16 m_map_width = 0;
    134   u16 m_map_height = 0;
    135   u8 m_map_layer = 0;
    136   u8 m_map_level = 0;
    137 };
    138 
    139 class MetalDownloadTexture final : public GPUDownloadTexture
    140 {
    141 public:
    142   ~MetalDownloadTexture() override;
    143 
    144   static std::unique_ptr<MetalDownloadTexture> Create(u32 width, u32 height, GPUTexture::Format format, void* memory,
    145                                                       size_t memory_size, u32 memory_stride);
    146 
    147   void CopyFromTexture(u32 dst_x, u32 dst_y, GPUTexture* src, u32 src_x, u32 src_y, u32 width, u32 height,
    148                        u32 src_layer, u32 src_level, bool use_transfer_pitch) override;
    149 
    150   bool Map(u32 x, u32 y, u32 width, u32 height) override;
    151   void Unmap() override;
    152 
    153   void Flush() override;
    154 
    155   void SetDebugName(std::string_view name) override;
    156 
    157 private:
    158   MetalDownloadTexture(u32 width, u32 height, GPUTexture::Format format, u8* import_buffer, size_t buffer_offset,
    159                        id<MTLBuffer> buffer, const u8* map_ptr, u32 map_pitch);
    160 
    161   size_t m_buffer_offset = 0;
    162   id<MTLBuffer> m_buffer = nil;
    163 
    164   u64 m_copy_fence_counter = 0;
    165 };
    166 
    167 class MetalTextureBuffer final : public GPUTextureBuffer
    168 {
    169 public:
    170   MetalTextureBuffer(Format format, u32 size_in_elements);
    171   ~MetalTextureBuffer() override;
    172 
    173   ALWAYS_INLINE id<MTLBuffer> GetMTLBuffer() const { return m_buffer.GetBuffer(); }
    174 
    175   bool CreateBuffer(id<MTLDevice> device);
    176 
    177   // Inherited via GPUTextureBuffer
    178   void* Map(u32 required_elements) override;
    179   void Unmap(u32 used_elements) override;
    180 
    181   void SetDebugName(std::string_view name) override;
    182 
    183 private:
    184   MetalStreamBuffer m_buffer;
    185 };
    186 
    187 class MetalDevice final : public GPUDevice
    188 {
    189   friend MetalTexture;
    190   friend MetalDownloadTexture;
    191 
    192 public:
    193   ALWAYS_INLINE static MetalDevice& GetInstance() { return *static_cast<MetalDevice*>(g_gpu_device.get()); }
    194   ALWAYS_INLINE id<MTLDevice> GetMTLDevice() { return m_device; }
    195   ALWAYS_INLINE u64 GetCurrentFenceCounter() { return m_current_fence_counter; }
    196   ALWAYS_INLINE u64 GetCompletedFenceCounter() { return m_completed_fence_counter; }
    197 
    198   MetalDevice();
    199   ~MetalDevice();
    200 
    201   RenderAPI GetRenderAPI() const override;
    202 
    203   bool HasSurface() const override;
    204 
    205   bool UpdateWindow() override;
    206   void ResizeWindow(s32 new_window_width, s32 new_window_height, float new_window_scale) override;
    207   void DestroySurface() override;
    208 
    209   std::string GetDriverInfo() const override;
    210 
    211   void ExecuteAndWaitForGPUIdle() override;
    212 
    213   std::unique_ptr<GPUTexture> CreateTexture(u32 width, u32 height, u32 layers, u32 levels, u32 samples,
    214                                             GPUTexture::Type type, GPUTexture::Format format,
    215                                             const void* data = nullptr, u32 data_stride = 0) override;
    216   std::unique_ptr<GPUSampler> CreateSampler(const GPUSampler::Config& config) override;
    217   std::unique_ptr<GPUTextureBuffer> CreateTextureBuffer(GPUTextureBuffer::Format format, u32 size_in_elements) override;
    218 
    219   std::unique_ptr<GPUDownloadTexture> CreateDownloadTexture(u32 width, u32 height, GPUTexture::Format format) override;
    220   std::unique_ptr<GPUDownloadTexture> CreateDownloadTexture(u32 width, u32 height, GPUTexture::Format format,
    221                                                             void* memory, size_t memory_size,
    222                                                             u32 memory_stride) override;
    223 
    224   bool SupportsTextureFormat(GPUTexture::Format format) const override;
    225   void CopyTextureRegion(GPUTexture* dst, u32 dst_x, u32 dst_y, u32 dst_layer, u32 dst_level, GPUTexture* src,
    226                          u32 src_x, u32 src_y, u32 src_layer, u32 src_level, u32 width, u32 height) override;
    227   void ResolveTextureRegion(GPUTexture* dst, u32 dst_x, u32 dst_y, u32 dst_layer, u32 dst_level, GPUTexture* src,
    228                             u32 src_x, u32 src_y, u32 width, u32 height) override;
    229   void ClearRenderTarget(GPUTexture* t, u32 c) override;
    230   void ClearDepth(GPUTexture* t, float d) override;
    231   void InvalidateRenderTarget(GPUTexture* t) override;
    232 
    233   std::unique_ptr<GPUShader> CreateShaderFromBinary(GPUShaderStage stage, std::span<const u8> data,
    234                                                     Error* error) override;
    235   std::unique_ptr<GPUShader> CreateShaderFromSource(GPUShaderStage stage, GPUShaderLanguage language,
    236                                                     std::string_view source, const char* entry_point,
    237                                                     DynamicHeapArray<u8>* out_binary, Error* error) override;
    238   std::unique_ptr<GPUPipeline> CreatePipeline(const GPUPipeline::GraphicsConfig& config, Error* error) override;
    239 
    240   void PushDebugGroup(const char* name) override;
    241   void PopDebugGroup() override;
    242   void InsertDebugMessage(const char* msg) override;
    243 
    244   void MapVertexBuffer(u32 vertex_size, u32 vertex_count, void** map_ptr, u32* map_space,
    245                        u32* map_base_vertex) override;
    246   void UnmapVertexBuffer(u32 vertex_size, u32 vertex_count) override;
    247   void MapIndexBuffer(u32 index_count, DrawIndex** map_ptr, u32* map_space, u32* map_base_index) override;
    248   void UnmapIndexBuffer(u32 used_index_count) override;
    249   void PushUniformBuffer(const void* data, u32 data_size) override;
    250   void* MapUniformBuffer(u32 size) override;
    251   void UnmapUniformBuffer(u32 size) override;
    252   void SetRenderTargets(GPUTexture* const* rts, u32 num_rts, GPUTexture* ds,
    253                         GPUPipeline::RenderPassFlag feedback_loop) override;
    254   void SetPipeline(GPUPipeline* pipeline) override;
    255   void SetTextureSampler(u32 slot, GPUTexture* texture, GPUSampler* sampler) override;
    256   void SetTextureBuffer(u32 slot, GPUTextureBuffer* buffer) override;
    257   void SetViewport(const GSVector4i rc) override;
    258   void SetScissor(const GSVector4i rc) override;
    259   void Draw(u32 vertex_count, u32 base_vertex) override;
    260   void DrawIndexed(u32 index_count, u32 base_index, u32 base_vertex) override;
    261   void DrawIndexedWithBarrier(u32 index_count, u32 base_index, u32 base_vertex, DrawBarrier type) override;
    262 
    263   bool SetGPUTimingEnabled(bool enabled) override;
    264   float GetAndResetAccumulatedGPUTime() override;
    265 
    266   void SetVSyncMode(GPUVSyncMode mode, bool allow_present_throttle) override;
    267 
    268   bool BeginPresent(bool skip_present, u32 clear_color) override;
    269   void EndPresent(bool explicit_submit) override;
    270   void SubmitPresent() override;
    271 
    272   void WaitForFenceCounter(u64 counter);
    273 
    274   ALWAYS_INLINE MetalStreamBuffer& GetTextureStreamBuffer() { return m_texture_upload_buffer; }
    275   id<MTLBlitCommandEncoder> GetBlitEncoder(bool is_inline);
    276 
    277   void SubmitCommandBuffer(bool wait_for_completion = false);
    278   void SubmitCommandBufferAndRestartRenderPass(const char* reason);
    279 
    280   void CommitClear(MetalTexture* tex);
    281 
    282   void UnbindPipeline(MetalPipeline* pl);
    283   void UnbindTexture(MetalTexture* tex);
    284   void UnbindTextureBuffer(MetalTextureBuffer* buf);
    285 
    286   static void DeferRelease(id obj);
    287   static void DeferRelease(u64 fence_counter, id obj);
    288 
    289 protected:
    290   bool CreateDevice(std::string_view adapter, bool threaded_presentation,
    291                     std::optional<bool> exclusive_fullscreen_control, FeatureMask disabled_features,
    292                     Error* error) override;
    293   void DestroyDevice() override;
    294 
    295 private:
    296   static constexpr u32 VERTEX_BUFFER_SIZE = 8 * 1024 * 1024;
    297   static constexpr u32 INDEX_BUFFER_SIZE = 4 * 1024 * 1024;
    298   static constexpr u32 UNIFORM_BUFFER_SIZE = 2 * 1024 * 1024;
    299   static constexpr u32 UNIFORM_BUFFER_ALIGNMENT = 256;
    300   static constexpr u32 TEXTURE_STREAM_BUFFER_SIZE = 32 /*16*/ * 1024 * 1024; // TODO reduce after separate allocations
    301   static constexpr u8 NUM_TIMESTAMP_QUERIES = 3;
    302 
    303   using DepthStateMap = std::unordered_map<u8, id<MTLDepthStencilState>>;
    304 
    305   struct ClearPipelineConfig
    306   {
    307     GPUTexture::Format color_formats[MAX_RENDER_TARGETS];
    308     GPUTexture::Format depth_format;
    309     u8 samples;
    310     u8 pad[2];
    311 
    312     bool operator==(const ClearPipelineConfig& c) const { return (std::memcmp(this, &c, sizeof(*this)) == 0); }
    313     bool operator!=(const ClearPipelineConfig& c) const { return (std::memcmp(this, &c, sizeof(*this)) != 0); }
    314     bool operator<(const ClearPipelineConfig& c) const { return (std::memcmp(this, &c, sizeof(*this)) < 0); }
    315   };
    316   static_assert(sizeof(ClearPipelineConfig) == 8);
    317 
    318   ALWAYS_INLINE NSView* GetWindowView() const { return (__bridge NSView*)m_window_info.window_handle; }
    319 
    320   void SetFeatures(FeatureMask disabled_features);
    321   bool LoadShaders();
    322 
    323   id<MTLFunction> GetFunctionFromLibrary(id<MTLLibrary> library, NSString* name);
    324   id<MTLComputePipelineState> CreateComputePipeline(id<MTLFunction> function, NSString* name);
    325   ClearPipelineConfig GetCurrentClearPipelineConfig() const;
    326   id<MTLRenderPipelineState> GetClearDepthPipeline(const ClearPipelineConfig& config);
    327 
    328   std::unique_ptr<GPUShader> CreateShaderFromMSL(GPUShaderStage stage, std::string_view source,
    329                                                  std::string_view entry_point, Error* error);
    330 
    331   id<MTLDepthStencilState> GetDepthState(const GPUPipeline::DepthState& ds);
    332 
    333   void CreateCommandBuffer();
    334   void CommandBufferCompletedOffThread(id<MTLCommandBuffer> buffer, u64 fence_counter);
    335   void WaitForPreviousCommandBuffers();
    336   void CleanupObjects();
    337 
    338   ALWAYS_INLINE bool InRenderPass() const { return (m_render_encoder != nil); }
    339   ALWAYS_INLINE bool IsInlineUploading() const { return (m_inline_upload_encoder != nil); }
    340   void BeginRenderPass();
    341   void EndRenderPass();
    342   void EndInlineUploading();
    343   void EndAnyEncoding();
    344 
    345   GSVector4i ClampToFramebufferSize(const GSVector4i rc) const;
    346   void PreDrawCheck();
    347   void SetInitialEncoderState();
    348   void SetViewportInRenderEncoder();
    349   void SetScissorInRenderEncoder();
    350 
    351   bool CreateLayer();
    352   void DestroyLayer();
    353   void RenderBlankFrame();
    354 
    355   bool CreateBuffers();
    356   void DestroyBuffers();
    357 
    358   bool IsRenderTargetBound(const GPUTexture* tex) const;
    359 
    360   id<MTLDevice> m_device;
    361   id<MTLCommandQueue> m_queue;
    362 
    363   CAMetalLayer* m_layer = nil;
    364   id<MTLDrawable> m_layer_drawable = nil;
    365   MTLRenderPassDescriptor* m_layer_pass_desc = nil;
    366 
    367   std::mutex m_fence_mutex;
    368   u64 m_current_fence_counter = 0;
    369   std::atomic<u64> m_completed_fence_counter{0};
    370   std::deque<std::pair<u64, id>> m_cleanup_objects; // [fence_counter, object]
    371 
    372   DepthStateMap m_depth_states;
    373 
    374   MetalStreamBuffer m_vertex_buffer;
    375   MetalStreamBuffer m_index_buffer;
    376   MetalStreamBuffer m_uniform_buffer;
    377   MetalStreamBuffer m_texture_upload_buffer;
    378 
    379   id<MTLLibrary> m_shaders = nil;
    380   std::vector<std::pair<std::pair<GPUTexture::Format, GPUTexture::Format>, id<MTLComputePipelineState>>>
    381     m_resolve_pipelines;
    382   std::vector<std::pair<ClearPipelineConfig, id<MTLRenderPipelineState>>> m_clear_pipelines;
    383 
    384   id<MTLCommandBuffer> m_upload_cmdbuf = nil;
    385   id<MTLBlitCommandEncoder> m_upload_encoder = nil;
    386   id<MTLBlitCommandEncoder> m_inline_upload_encoder = nil;
    387 
    388   id<MTLCommandBuffer> m_render_cmdbuf = nil;
    389   id<MTLRenderCommandEncoder> m_render_encoder = nil;
    390 
    391   u8 m_num_current_render_targets = 0;
    392   GPUPipeline::RenderPassFlag m_current_feedback_loop = GPUPipeline::NoRenderPassFlags;
    393   std::array<MetalTexture*, MAX_RENDER_TARGETS> m_current_render_targets = {};
    394   MetalTexture* m_current_depth_target = nullptr;
    395 
    396   MetalPipeline* m_current_pipeline = nullptr;
    397   id<MTLDepthStencilState> m_current_depth_state = nil;
    398   MTLCullMode m_current_cull_mode = MTLCullModeNone;
    399   u32 m_current_uniform_buffer_position = 0;
    400 
    401   std::array<id<MTLTexture>, MAX_TEXTURE_SAMPLERS> m_current_textures = {};
    402   std::array<id<MTLSamplerState>, MAX_TEXTURE_SAMPLERS> m_current_samplers = {};
    403   id<MTLBuffer> m_current_ssbo = nil;
    404   GSVector4i m_current_viewport = {};
    405   GSVector4i m_current_scissor = {};
    406 
    407   bool m_vsync_enabled = false;
    408 
    409   double m_accumulated_gpu_time = 0;
    410   double m_last_gpu_time_end = 0;
    411 };