metal_device.h (15380B)
1 // SPDX-FileCopyrightText: 2023 Connor McLaughlin <stenzek@gmail.com> 2 // SPDX-License-Identifier: (GPL-3.0 OR CC-BY-NC-ND-4.0) 3 4 #pragma once 5 6 // Macro hell. These have to come first. 7 #include <AppKit/AppKit.h> 8 #include <Metal/Metal.h> 9 #include <QuartzCore/QuartzCore.h> 10 11 #ifndef __OBJC__ 12 #error This file needs to be compiled with Objective C++. 13 #endif 14 15 #if __has_feature(objc_arc) 16 #error ARC should not be enabled. 17 #endif 18 19 #include "gpu_device.h" 20 #include "metal_stream_buffer.h" 21 #include "window_info.h" 22 23 #include "common/timer.h" 24 25 #include <atomic> 26 #include <memory> 27 #include <mutex> 28 #include <string> 29 #include <string_view> 30 #include <unordered_map> 31 #include <utility> 32 #include <vector> 33 34 class MetalDevice; 35 class MetalPipeline; 36 class MetalTexture; 37 38 class MetalSampler final : public GPUSampler 39 { 40 friend MetalDevice; 41 42 public: 43 ~MetalSampler() override; 44 45 ALWAYS_INLINE id<MTLSamplerState> GetSamplerState() const { return m_ss; } 46 47 void SetDebugName(std::string_view name) override; 48 49 private: 50 MetalSampler(id<MTLSamplerState> ss); 51 52 id<MTLSamplerState> m_ss; 53 }; 54 55 class MetalShader final : public GPUShader 56 { 57 friend MetalDevice; 58 59 public: 60 ~MetalShader() override; 61 62 ALWAYS_INLINE id<MTLLibrary> GetLibrary() const { return m_library; } 63 ALWAYS_INLINE id<MTLFunction> GetFunction() const { return m_function; } 64 65 void SetDebugName(std::string_view name) override; 66 67 private: 68 MetalShader(GPUShaderStage stage, id<MTLLibrary> library, id<MTLFunction> function); 69 70 id<MTLLibrary> m_library; 71 id<MTLFunction> m_function; 72 }; 73 74 class MetalPipeline final : public GPUPipeline 75 { 76 friend MetalDevice; 77 78 public: 79 ~MetalPipeline() override; 80 81 ALWAYS_INLINE id<MTLRenderPipelineState> GetPipelineState() const { return m_pipeline; } 82 ALWAYS_INLINE id<MTLDepthStencilState> GetDepthState() const { return m_depth; } 83 ALWAYS_INLINE MTLCullMode GetCullMode() const { return m_cull_mode; } 84 ALWAYS_INLINE MTLPrimitiveType GetPrimitive() const { return m_primitive; } 85 86 void SetDebugName(std::string_view name) override; 87 88 private: 89 MetalPipeline(id<MTLRenderPipelineState> pipeline, id<MTLDepthStencilState> depth, MTLCullMode cull_mode, 90 MTLPrimitiveType primitive); 91 92 id<MTLRenderPipelineState> m_pipeline; 93 id<MTLDepthStencilState> m_depth; 94 MTLCullMode m_cull_mode; 95 MTLPrimitiveType m_primitive; 96 }; 97 98 class MetalTexture final : public GPUTexture 99 { 100 friend MetalDevice; 101 102 public: 103 ~MetalTexture(); 104 105 ALWAYS_INLINE id<MTLTexture> GetMTLTexture() const { return m_texture; } 106 107 bool Create(id<MTLDevice> device, u32 width, u32 height, u32 layers, u32 levels, u32 samples, Type type, 108 Format format, const void* initial_data = nullptr, u32 initial_data_stride = 0); 109 110 bool Update(u32 x, u32 y, u32 width, u32 height, const void* data, u32 pitch, u32 layer = 0, u32 level = 0) override; 111 bool Map(void** map, u32* map_stride, u32 x, u32 y, u32 width, u32 height, u32 layer = 0, u32 level = 0) override; 112 void Unmap() override; 113 114 void MakeReadyForSampling() override; 115 116 void SetDebugName(std::string_view name) override; 117 118 // Call when the texture is bound to the pipeline, or read from in a copy. 119 ALWAYS_INLINE void SetUseFenceCounter(u64 counter) { m_use_fence_counter = counter; } 120 121 private: 122 MetalTexture(id<MTLTexture> texture, u16 width, u16 height, u8 layers, u8 levels, u8 samples, Type type, 123 Format format); 124 125 id<MTLTexture> m_texture; 126 127 // Contains the fence counter when the texture was last used. 128 // When this matches the current fence counter, the texture was used this command buffer. 129 u64 m_use_fence_counter = 0; 130 131 u16 m_map_x = 0; 132 u16 m_map_y = 0; 133 u16 m_map_width = 0; 134 u16 m_map_height = 0; 135 u8 m_map_layer = 0; 136 u8 m_map_level = 0; 137 }; 138 139 class MetalDownloadTexture final : public GPUDownloadTexture 140 { 141 public: 142 ~MetalDownloadTexture() override; 143 144 static std::unique_ptr<MetalDownloadTexture> Create(u32 width, u32 height, GPUTexture::Format format, void* memory, 145 size_t memory_size, u32 memory_stride); 146 147 void CopyFromTexture(u32 dst_x, u32 dst_y, GPUTexture* src, u32 src_x, u32 src_y, u32 width, u32 height, 148 u32 src_layer, u32 src_level, bool use_transfer_pitch) override; 149 150 bool Map(u32 x, u32 y, u32 width, u32 height) override; 151 void Unmap() override; 152 153 void Flush() override; 154 155 void SetDebugName(std::string_view name) override; 156 157 private: 158 MetalDownloadTexture(u32 width, u32 height, GPUTexture::Format format, u8* import_buffer, size_t buffer_offset, 159 id<MTLBuffer> buffer, const u8* map_ptr, u32 map_pitch); 160 161 size_t m_buffer_offset = 0; 162 id<MTLBuffer> m_buffer = nil; 163 164 u64 m_copy_fence_counter = 0; 165 }; 166 167 class MetalTextureBuffer final : public GPUTextureBuffer 168 { 169 public: 170 MetalTextureBuffer(Format format, u32 size_in_elements); 171 ~MetalTextureBuffer() override; 172 173 ALWAYS_INLINE id<MTLBuffer> GetMTLBuffer() const { return m_buffer.GetBuffer(); } 174 175 bool CreateBuffer(id<MTLDevice> device); 176 177 // Inherited via GPUTextureBuffer 178 void* Map(u32 required_elements) override; 179 void Unmap(u32 used_elements) override; 180 181 void SetDebugName(std::string_view name) override; 182 183 private: 184 MetalStreamBuffer m_buffer; 185 }; 186 187 class MetalDevice final : public GPUDevice 188 { 189 friend MetalTexture; 190 friend MetalDownloadTexture; 191 192 public: 193 ALWAYS_INLINE static MetalDevice& GetInstance() { return *static_cast<MetalDevice*>(g_gpu_device.get()); } 194 ALWAYS_INLINE id<MTLDevice> GetMTLDevice() { return m_device; } 195 ALWAYS_INLINE u64 GetCurrentFenceCounter() { return m_current_fence_counter; } 196 ALWAYS_INLINE u64 GetCompletedFenceCounter() { return m_completed_fence_counter; } 197 198 MetalDevice(); 199 ~MetalDevice(); 200 201 RenderAPI GetRenderAPI() const override; 202 203 bool HasSurface() const override; 204 205 bool UpdateWindow() override; 206 void ResizeWindow(s32 new_window_width, s32 new_window_height, float new_window_scale) override; 207 void DestroySurface() override; 208 209 std::string GetDriverInfo() const override; 210 211 void ExecuteAndWaitForGPUIdle() override; 212 213 std::unique_ptr<GPUTexture> CreateTexture(u32 width, u32 height, u32 layers, u32 levels, u32 samples, 214 GPUTexture::Type type, GPUTexture::Format format, 215 const void* data = nullptr, u32 data_stride = 0) override; 216 std::unique_ptr<GPUSampler> CreateSampler(const GPUSampler::Config& config) override; 217 std::unique_ptr<GPUTextureBuffer> CreateTextureBuffer(GPUTextureBuffer::Format format, u32 size_in_elements) override; 218 219 std::unique_ptr<GPUDownloadTexture> CreateDownloadTexture(u32 width, u32 height, GPUTexture::Format format) override; 220 std::unique_ptr<GPUDownloadTexture> CreateDownloadTexture(u32 width, u32 height, GPUTexture::Format format, 221 void* memory, size_t memory_size, 222 u32 memory_stride) override; 223 224 bool SupportsTextureFormat(GPUTexture::Format format) const override; 225 void CopyTextureRegion(GPUTexture* dst, u32 dst_x, u32 dst_y, u32 dst_layer, u32 dst_level, GPUTexture* src, 226 u32 src_x, u32 src_y, u32 src_layer, u32 src_level, u32 width, u32 height) override; 227 void ResolveTextureRegion(GPUTexture* dst, u32 dst_x, u32 dst_y, u32 dst_layer, u32 dst_level, GPUTexture* src, 228 u32 src_x, u32 src_y, u32 width, u32 height) override; 229 void ClearRenderTarget(GPUTexture* t, u32 c) override; 230 void ClearDepth(GPUTexture* t, float d) override; 231 void InvalidateRenderTarget(GPUTexture* t) override; 232 233 std::unique_ptr<GPUShader> CreateShaderFromBinary(GPUShaderStage stage, std::span<const u8> data, 234 Error* error) override; 235 std::unique_ptr<GPUShader> CreateShaderFromSource(GPUShaderStage stage, GPUShaderLanguage language, 236 std::string_view source, const char* entry_point, 237 DynamicHeapArray<u8>* out_binary, Error* error) override; 238 std::unique_ptr<GPUPipeline> CreatePipeline(const GPUPipeline::GraphicsConfig& config, Error* error) override; 239 240 void PushDebugGroup(const char* name) override; 241 void PopDebugGroup() override; 242 void InsertDebugMessage(const char* msg) override; 243 244 void MapVertexBuffer(u32 vertex_size, u32 vertex_count, void** map_ptr, u32* map_space, 245 u32* map_base_vertex) override; 246 void UnmapVertexBuffer(u32 vertex_size, u32 vertex_count) override; 247 void MapIndexBuffer(u32 index_count, DrawIndex** map_ptr, u32* map_space, u32* map_base_index) override; 248 void UnmapIndexBuffer(u32 used_index_count) override; 249 void PushUniformBuffer(const void* data, u32 data_size) override; 250 void* MapUniformBuffer(u32 size) override; 251 void UnmapUniformBuffer(u32 size) override; 252 void SetRenderTargets(GPUTexture* const* rts, u32 num_rts, GPUTexture* ds, 253 GPUPipeline::RenderPassFlag feedback_loop) override; 254 void SetPipeline(GPUPipeline* pipeline) override; 255 void SetTextureSampler(u32 slot, GPUTexture* texture, GPUSampler* sampler) override; 256 void SetTextureBuffer(u32 slot, GPUTextureBuffer* buffer) override; 257 void SetViewport(const GSVector4i rc) override; 258 void SetScissor(const GSVector4i rc) override; 259 void Draw(u32 vertex_count, u32 base_vertex) override; 260 void DrawIndexed(u32 index_count, u32 base_index, u32 base_vertex) override; 261 void DrawIndexedWithBarrier(u32 index_count, u32 base_index, u32 base_vertex, DrawBarrier type) override; 262 263 bool SetGPUTimingEnabled(bool enabled) override; 264 float GetAndResetAccumulatedGPUTime() override; 265 266 void SetVSyncMode(GPUVSyncMode mode, bool allow_present_throttle) override; 267 268 bool BeginPresent(bool skip_present, u32 clear_color) override; 269 void EndPresent(bool explicit_submit) override; 270 void SubmitPresent() override; 271 272 void WaitForFenceCounter(u64 counter); 273 274 ALWAYS_INLINE MetalStreamBuffer& GetTextureStreamBuffer() { return m_texture_upload_buffer; } 275 id<MTLBlitCommandEncoder> GetBlitEncoder(bool is_inline); 276 277 void SubmitCommandBuffer(bool wait_for_completion = false); 278 void SubmitCommandBufferAndRestartRenderPass(const char* reason); 279 280 void CommitClear(MetalTexture* tex); 281 282 void UnbindPipeline(MetalPipeline* pl); 283 void UnbindTexture(MetalTexture* tex); 284 void UnbindTextureBuffer(MetalTextureBuffer* buf); 285 286 static void DeferRelease(id obj); 287 static void DeferRelease(u64 fence_counter, id obj); 288 289 protected: 290 bool CreateDevice(std::string_view adapter, bool threaded_presentation, 291 std::optional<bool> exclusive_fullscreen_control, FeatureMask disabled_features, 292 Error* error) override; 293 void DestroyDevice() override; 294 295 private: 296 static constexpr u32 VERTEX_BUFFER_SIZE = 8 * 1024 * 1024; 297 static constexpr u32 INDEX_BUFFER_SIZE = 4 * 1024 * 1024; 298 static constexpr u32 UNIFORM_BUFFER_SIZE = 2 * 1024 * 1024; 299 static constexpr u32 UNIFORM_BUFFER_ALIGNMENT = 256; 300 static constexpr u32 TEXTURE_STREAM_BUFFER_SIZE = 32 /*16*/ * 1024 * 1024; // TODO reduce after separate allocations 301 static constexpr u8 NUM_TIMESTAMP_QUERIES = 3; 302 303 using DepthStateMap = std::unordered_map<u8, id<MTLDepthStencilState>>; 304 305 struct ClearPipelineConfig 306 { 307 GPUTexture::Format color_formats[MAX_RENDER_TARGETS]; 308 GPUTexture::Format depth_format; 309 u8 samples; 310 u8 pad[2]; 311 312 bool operator==(const ClearPipelineConfig& c) const { return (std::memcmp(this, &c, sizeof(*this)) == 0); } 313 bool operator!=(const ClearPipelineConfig& c) const { return (std::memcmp(this, &c, sizeof(*this)) != 0); } 314 bool operator<(const ClearPipelineConfig& c) const { return (std::memcmp(this, &c, sizeof(*this)) < 0); } 315 }; 316 static_assert(sizeof(ClearPipelineConfig) == 8); 317 318 ALWAYS_INLINE NSView* GetWindowView() const { return (__bridge NSView*)m_window_info.window_handle; } 319 320 void SetFeatures(FeatureMask disabled_features); 321 bool LoadShaders(); 322 323 id<MTLFunction> GetFunctionFromLibrary(id<MTLLibrary> library, NSString* name); 324 id<MTLComputePipelineState> CreateComputePipeline(id<MTLFunction> function, NSString* name); 325 ClearPipelineConfig GetCurrentClearPipelineConfig() const; 326 id<MTLRenderPipelineState> GetClearDepthPipeline(const ClearPipelineConfig& config); 327 328 std::unique_ptr<GPUShader> CreateShaderFromMSL(GPUShaderStage stage, std::string_view source, 329 std::string_view entry_point, Error* error); 330 331 id<MTLDepthStencilState> GetDepthState(const GPUPipeline::DepthState& ds); 332 333 void CreateCommandBuffer(); 334 void CommandBufferCompletedOffThread(id<MTLCommandBuffer> buffer, u64 fence_counter); 335 void WaitForPreviousCommandBuffers(); 336 void CleanupObjects(); 337 338 ALWAYS_INLINE bool InRenderPass() const { return (m_render_encoder != nil); } 339 ALWAYS_INLINE bool IsInlineUploading() const { return (m_inline_upload_encoder != nil); } 340 void BeginRenderPass(); 341 void EndRenderPass(); 342 void EndInlineUploading(); 343 void EndAnyEncoding(); 344 345 GSVector4i ClampToFramebufferSize(const GSVector4i rc) const; 346 void PreDrawCheck(); 347 void SetInitialEncoderState(); 348 void SetViewportInRenderEncoder(); 349 void SetScissorInRenderEncoder(); 350 351 bool CreateLayer(); 352 void DestroyLayer(); 353 void RenderBlankFrame(); 354 355 bool CreateBuffers(); 356 void DestroyBuffers(); 357 358 bool IsRenderTargetBound(const GPUTexture* tex) const; 359 360 id<MTLDevice> m_device; 361 id<MTLCommandQueue> m_queue; 362 363 CAMetalLayer* m_layer = nil; 364 id<MTLDrawable> m_layer_drawable = nil; 365 MTLRenderPassDescriptor* m_layer_pass_desc = nil; 366 367 std::mutex m_fence_mutex; 368 u64 m_current_fence_counter = 0; 369 std::atomic<u64> m_completed_fence_counter{0}; 370 std::deque<std::pair<u64, id>> m_cleanup_objects; // [fence_counter, object] 371 372 DepthStateMap m_depth_states; 373 374 MetalStreamBuffer m_vertex_buffer; 375 MetalStreamBuffer m_index_buffer; 376 MetalStreamBuffer m_uniform_buffer; 377 MetalStreamBuffer m_texture_upload_buffer; 378 379 id<MTLLibrary> m_shaders = nil; 380 std::vector<std::pair<std::pair<GPUTexture::Format, GPUTexture::Format>, id<MTLComputePipelineState>>> 381 m_resolve_pipelines; 382 std::vector<std::pair<ClearPipelineConfig, id<MTLRenderPipelineState>>> m_clear_pipelines; 383 384 id<MTLCommandBuffer> m_upload_cmdbuf = nil; 385 id<MTLBlitCommandEncoder> m_upload_encoder = nil; 386 id<MTLBlitCommandEncoder> m_inline_upload_encoder = nil; 387 388 id<MTLCommandBuffer> m_render_cmdbuf = nil; 389 id<MTLRenderCommandEncoder> m_render_encoder = nil; 390 391 u8 m_num_current_render_targets = 0; 392 GPUPipeline::RenderPassFlag m_current_feedback_loop = GPUPipeline::NoRenderPassFlags; 393 std::array<MetalTexture*, MAX_RENDER_TARGETS> m_current_render_targets = {}; 394 MetalTexture* m_current_depth_target = nullptr; 395 396 MetalPipeline* m_current_pipeline = nullptr; 397 id<MTLDepthStencilState> m_current_depth_state = nil; 398 MTLCullMode m_current_cull_mode = MTLCullModeNone; 399 u32 m_current_uniform_buffer_position = 0; 400 401 std::array<id<MTLTexture>, MAX_TEXTURE_SAMPLERS> m_current_textures = {}; 402 std::array<id<MTLSamplerState>, MAX_TEXTURE_SAMPLERS> m_current_samplers = {}; 403 id<MTLBuffer> m_current_ssbo = nil; 404 GSVector4i m_current_viewport = {}; 405 GSVector4i m_current_scissor = {}; 406 407 bool m_vsync_enabled = false; 408 409 double m_accumulated_gpu_time = 0; 410 double m_last_gpu_time_end = 0; 411 };