duckstation

duckstation, but archived from the revision just before upstream changed it to a proprietary software project, this version is the libre one
git clone https://git.neptards.moe/u3shit/duckstation.git
Log | Files | Refs | README | LICENSE

metal_device.mm (85886B)


      1 // SPDX-FileCopyrightText: 2024 Connor McLaughlin <stenzek@gmail.com>
      2 // SPDX-License-Identifier: (GPL-3.0 OR CC-BY-NC-ND-4.0)
      3 
      4 #include "metal_device.h"
      5 
      6 #include "common/align.h"
      7 #include "common/assert.h"
      8 #include "common/error.h"
      9 #include "common/file_system.h"
     10 #include "common/log.h"
     11 #include "common/path.h"
     12 #include "common/scoped_guard.h"
     13 #include "common/string_util.h"
     14 
     15 // TODO FIXME...
     16 #define FMT_EXCEPTIONS 0
     17 #include "fmt/format.h"
     18 
     19 #include <array>
     20 #include <pthread.h>
     21 
     22 Log_SetChannel(MetalDevice);
     23 
     24 // TODO: Disable hazard tracking and issue barriers explicitly.
     25 
     26 // Looking across a range of GPUs, the optimal copy alignment for Vulkan drivers seems
     27 // to be between 1 (AMD/NV) and 64 (Intel). So, we'll go with 64 here.
     28 static constexpr u32 TEXTURE_UPLOAD_ALIGNMENT = 64;
     29 
     30 // The pitch alignment must be less or equal to the upload alignment.
     31 // We need 32 here for AVX2, so 64 is also fine.
     32 static constexpr u32 TEXTURE_UPLOAD_PITCH_ALIGNMENT = 64;
     33 
     34 static constexpr std::array<MTLPixelFormat, static_cast<u32>(GPUTexture::Format::MaxCount)> s_pixel_format_mapping = {
     35   MTLPixelFormatInvalid,      // Unknown
     36   MTLPixelFormatRGBA8Unorm,   // RGBA8
     37   MTLPixelFormatBGRA8Unorm,   // BGRA8
     38   MTLPixelFormatB5G6R5Unorm,  // RGB565
     39   MTLPixelFormatA1BGR5Unorm,  // RGBA5551
     40   MTLPixelFormatR8Unorm,      // R8
     41   MTLPixelFormatDepth16Unorm, // D16
     42   MTLPixelFormatDepth24Unorm_Stencil8, // D24S8
     43   MTLPixelFormatDepth32Float, // D32F
     44   MTLPixelFormatDepth32Float_Stencil8, // D32FS8
     45   MTLPixelFormatR16Unorm,     // R16
     46   MTLPixelFormatR16Sint,      // R16I
     47   MTLPixelFormatR16Uint,      // R16U
     48   MTLPixelFormatR16Float,     // R16F
     49   MTLPixelFormatR32Sint,      // R32I
     50   MTLPixelFormatR32Uint,      // R32U
     51   MTLPixelFormatR32Float,     // R32F
     52   MTLPixelFormatRG8Unorm,     // RG8
     53   MTLPixelFormatRG16Unorm,    // RG16
     54   MTLPixelFormatRG16Float,    // RG16F
     55   MTLPixelFormatRG32Float,    // RG32F
     56   MTLPixelFormatRGBA16Unorm,  // RGBA16
     57   MTLPixelFormatRGBA16Float,  // RGBA16F
     58   MTLPixelFormatRGBA32Float,  // RGBA32F
     59   MTLPixelFormatBGR10A2Unorm, // RGB10A2
     60 };
     61 
     62 static NSString* StringViewToNSString(std::string_view str)
     63 {
     64   if (str.empty())
     65     return nil;
     66 
     67   return [[[NSString alloc] autorelease] initWithBytes:str.data()
     68                                                 length:static_cast<NSUInteger>(str.length())
     69                                               encoding:NSUTF8StringEncoding];
     70 }
     71 
     72 static void LogNSError(NSError* error, std::string_view message)
     73 {
     74   Log::FastWrite("MetalDevice", LOGLEVEL_ERROR, message);
     75   Log::FastWrite("MetalDevice", LOGLEVEL_ERROR, "  NSError Code: {}", static_cast<u32>(error.code));
     76   Log::FastWrite("MetalDevice", LOGLEVEL_ERROR, "  NSError Description: {}", [error.description UTF8String]);
     77 }
     78 
     79 static void NSErrorToErrorObject(Error* errptr, std::string_view message, NSError* error)
     80 {
     81   Error::SetStringFmt(errptr, "{}NSError Code {}: {}", message, static_cast<u32>(error.code), [error.description UTF8String]);
     82 }
     83 
     84 static GPUTexture::Format GetTextureFormatForMTLFormat(MTLPixelFormat fmt)
     85 {
     86   for (u32 i = 0; i < static_cast<u32>(GPUTexture::Format::MaxCount); i++)
     87   {
     88     if (s_pixel_format_mapping[i] == fmt)
     89       return static_cast<GPUTexture::Format>(i);
     90   }
     91 
     92   return GPUTexture::Format::Unknown;
     93 }
     94 
     95 static u32 GetMetalMaxTextureSize(id<MTLDevice> device)
     96 {
     97   // https://gist.github.com/kylehowells/63d0723abc9588eb734cade4b7df660d
     98   if ([device supportsFamily:MTLGPUFamilyMacCatalyst1] || [device supportsFamily:MTLGPUFamilyMac1] ||
     99       [device supportsFamily:MTLGPUFamilyApple3])
    100   {
    101     return 16384;
    102   }
    103   else
    104   {
    105     return 8192;
    106   }
    107 }
    108 
    109 static u32 GetMetalMaxMultisamples(id<MTLDevice> device)
    110 {
    111   u32 max_multisamples = 0;
    112   for (u32 multisamples = 1; multisamples < 16; multisamples *= 2)
    113   {
    114     if (![device supportsTextureSampleCount:multisamples])
    115       break;
    116     max_multisamples = multisamples;
    117   }
    118   return max_multisamples;
    119 }
    120 
    121 template<typename F>
    122 static void RunOnMainThread(F&& f)
    123 {
    124   if ([NSThread isMainThread])
    125     f();
    126   else
    127     dispatch_sync(dispatch_get_main_queue(), f);
    128 }
    129 
    130 MetalDevice::MetalDevice() : m_current_viewport(0, 0, 1, 1), m_current_scissor(0, 0, 1, 1)
    131 {
    132 }
    133 
    134 MetalDevice::~MetalDevice()
    135 {
    136   Assert(m_layer == nil);
    137   Assert(m_device == nil);
    138 }
    139 
    140 RenderAPI MetalDevice::GetRenderAPI() const
    141 {
    142   return RenderAPI::Metal;
    143 }
    144 
    145 bool MetalDevice::HasSurface() const
    146 {
    147   return (m_layer != nil);
    148 }
    149 
    150 void MetalDevice::SetVSyncMode(GPUVSyncMode mode, bool allow_present_throttle)
    151 {
    152   // Metal does not support mailbox mode.
    153   mode = (mode == GPUVSyncMode::Mailbox) ? GPUVSyncMode::FIFO : mode;
    154   m_allow_present_throttle = allow_present_throttle;
    155 
    156   if (m_vsync_mode == mode)
    157     return;
    158 
    159   m_vsync_mode = mode;
    160   if (m_layer != nil)
    161     [m_layer setDisplaySyncEnabled:m_vsync_mode == GPUVSyncMode::FIFO];
    162 }
    163 
    164 bool MetalDevice::CreateDevice(std::string_view adapter, bool threaded_presentation,
    165                                std::optional<bool> exclusive_fullscreen_control, FeatureMask disabled_features,
    166                                Error* error)
    167 {
    168   @autoreleasepool
    169   {
    170     id<MTLDevice> device = nil;
    171     if (!adapter.empty())
    172     {
    173       NSArray<id<MTLDevice>>* devices = [MTLCopyAllDevices() autorelease];
    174       const u32 count = static_cast<u32>([devices count]);
    175       for (u32 i = 0; i < count; i++)
    176       {
    177         if (adapter == [[devices[i] name] UTF8String])
    178         {
    179           device = devices[i];
    180           break;
    181         }
    182       }
    183 
    184       if (device == nil)
    185         ERROR_LOG("Failed to find device named '{}'. Trying default.", adapter);
    186     }
    187 
    188     if (device == nil)
    189     {
    190       device = [MTLCreateSystemDefaultDevice() autorelease];
    191       if (device == nil)
    192       {
    193         Error::SetStringView(error, "Failed to create default Metal device.");
    194         return false;
    195       }
    196     }
    197 
    198     id<MTLCommandQueue> queue = [[device newCommandQueue] autorelease];
    199     if (queue == nil)
    200     {
    201       Error::SetStringView(error, "Failed to create command queue.");
    202       return false;
    203     }
    204 
    205     m_device = [device retain];
    206     m_queue = [queue retain];
    207     INFO_LOG("Metal Device: {}", [[m_device name] UTF8String]);
    208 
    209     SetFeatures(disabled_features);
    210 
    211     if (m_window_info.type != WindowInfo::Type::Surfaceless && !CreateLayer())
    212     {
    213       Error::SetStringView(error, "Failed to create layer.");
    214       return false;
    215     }
    216 
    217     CreateCommandBuffer();
    218     RenderBlankFrame();
    219 
    220     if (!LoadShaders())
    221     {
    222       Error::SetStringView(error, "Failed to load shaders.");
    223       return false;
    224     }
    225 
    226     if (!CreateBuffers())
    227     {
    228       Error::SetStringView(error, "Failed to create buffers.");
    229       return false;
    230     }
    231 
    232     return true;
    233   }
    234 }
    235 
    236 void MetalDevice::SetFeatures(FeatureMask disabled_features)
    237 {
    238   m_max_texture_size = GetMetalMaxTextureSize(m_device);
    239   m_max_multisamples = GetMetalMaxMultisamples(m_device);
    240 
    241   // Framebuffer fetch requires MSL 2.3 and an Apple GPU family.
    242   const bool supports_fbfetch = [m_device supportsFamily:MTLGPUFamilyApple1];
    243 
    244   // If fbfetch is disabled, barriers aren't supported on Apple GPUs.
    245   const bool supports_barriers =
    246     ([m_device supportsFamily:MTLGPUFamilyMac1] && ![m_device supportsFamily:MTLGPUFamilyApple3]);
    247 
    248   m_features.dual_source_blend = !(disabled_features & FEATURE_MASK_DUAL_SOURCE_BLEND);
    249   m_features.framebuffer_fetch = !(disabled_features & FEATURE_MASK_FRAMEBUFFER_FETCH) && supports_fbfetch;
    250   m_features.per_sample_shading = true;
    251   m_features.noperspective_interpolation = true;
    252   m_features.texture_copy_to_self = !(disabled_features & FEATURE_MASK_TEXTURE_COPY_TO_SELF);
    253   m_features.supports_texture_buffers = !(disabled_features & FEATURE_MASK_TEXTURE_BUFFERS);
    254   m_features.texture_buffers_emulated_with_ssbo = true;
    255   m_features.feedback_loops = (m_features.framebuffer_fetch || supports_barriers);
    256   m_features.geometry_shaders = false;
    257   m_features.partial_msaa_resolve = false;
    258   m_features.memory_import = true;
    259   m_features.explicit_present = false;
    260   m_features.shader_cache = true;
    261   m_features.pipeline_cache = false;
    262   m_features.prefer_unused_textures = true;
    263 }
    264 
    265 bool MetalDevice::LoadShaders()
    266 {
    267   @autoreleasepool
    268   {
    269     auto try_lib = [this](NSString* name) -> id<MTLLibrary> {
    270       NSBundle* bundle = [NSBundle mainBundle];
    271       NSString* path = [bundle pathForResource:name ofType:@"metallib"];
    272       if (path == nil)
    273       {
    274         // Xcode places it alongside the binary.
    275         path = [NSString stringWithFormat:@"%@/%@.metallib", [bundle bundlePath], name];
    276         if (![[NSFileManager defaultManager] fileExistsAtPath:path])
    277           return nil;
    278       }
    279 
    280       id<MTLLibrary> lib = [m_device newLibraryWithFile:path error:nil];
    281       if (lib == nil)
    282         return nil;
    283 
    284       return [lib retain];
    285     };
    286 
    287     if (!(m_shaders = try_lib(@"Metal23")) && !(m_shaders = try_lib(@"Metal22")) &&
    288         !(m_shaders = try_lib(@"Metal21")) && !(m_shaders = try_lib(@"default")))
    289     {
    290       return false;
    291     }
    292 
    293     return true;
    294   }
    295 }
    296 
    297 id<MTLFunction> MetalDevice::GetFunctionFromLibrary(id<MTLLibrary> library, NSString* name)
    298 {
    299   id<MTLFunction> function = [library newFunctionWithName:name];
    300   return function;
    301 }
    302 
    303 id<MTLComputePipelineState> MetalDevice::CreateComputePipeline(id<MTLFunction> function, NSString* name)
    304 {
    305   MTLComputePipelineDescriptor* desc = [MTLComputePipelineDescriptor new];
    306   if (name != nil)
    307     [desc setLabel:name];
    308   [desc setComputeFunction:function];
    309 
    310   NSError* err = nil;
    311   id<MTLComputePipelineState> pipeline = [m_device newComputePipelineStateWithDescriptor:desc
    312                                                                                  options:MTLPipelineOptionNone
    313                                                                               reflection:nil
    314                                                                                    error:&err];
    315   [desc release];
    316   if (pipeline == nil)
    317   {
    318     LogNSError(err, "Create compute pipeline failed:");
    319     return nil;
    320   }
    321 
    322   return pipeline;
    323 }
    324 
    325 void MetalDevice::DestroyDevice()
    326 {
    327   WaitForPreviousCommandBuffers();
    328 
    329   if (InRenderPass())
    330     EndRenderPass();
    331 
    332   if (m_upload_cmdbuf != nil)
    333   {
    334     [m_upload_encoder endEncoding];
    335     [m_upload_encoder release];
    336     m_upload_encoder = nil;
    337     [m_upload_cmdbuf release];
    338     m_upload_cmdbuf = nil;
    339   }
    340   if (m_render_cmdbuf != nil)
    341   {
    342     [m_render_cmdbuf release];
    343     m_render_cmdbuf = nil;
    344   }
    345 
    346   DestroyBuffers();
    347 
    348   for (auto& it : m_cleanup_objects)
    349     [it.second release];
    350   m_cleanup_objects.clear();
    351 
    352   for (auto& it : m_depth_states)
    353   {
    354     if (it.second != nil)
    355       [it.second release];
    356   }
    357   m_depth_states.clear();
    358   for (auto& it : m_resolve_pipelines)
    359   {
    360     if (it.second != nil)
    361       [it.second release];
    362   }
    363   m_resolve_pipelines.clear();
    364   for (auto& it : m_clear_pipelines)
    365   {
    366     if (it.second != nil)
    367       [it.second release];
    368   }
    369   m_clear_pipelines.clear();
    370   if (m_shaders != nil)
    371   {
    372     [m_shaders release];
    373     m_shaders = nil;
    374   }
    375   if (m_queue != nil)
    376   {
    377     [m_queue release];
    378     m_queue = nil;
    379   }
    380   if (m_device != nil)
    381   {
    382     [m_device release];
    383     m_device = nil;
    384   }
    385 }
    386 
    387 bool MetalDevice::CreateLayer()
    388 {
    389   @autoreleasepool
    390   {
    391     RunOnMainThread([this]() {
    392       @autoreleasepool
    393       {
    394         INFO_LOG("Creating a {}x{} Metal layer.", m_window_info.surface_width, m_window_info.surface_height);
    395         const auto size =
    396           CGSizeMake(static_cast<float>(m_window_info.surface_width), static_cast<float>(m_window_info.surface_height));
    397         m_layer = [CAMetalLayer layer];
    398         [m_layer setDevice:m_device];
    399         [m_layer setDrawableSize:size];
    400 
    401         // Default should be BGRA8.
    402         const MTLPixelFormat layer_fmt = [m_layer pixelFormat];
    403         m_window_info.surface_format = GetTextureFormatForMTLFormat(layer_fmt);
    404         if (m_window_info.surface_format == GPUTexture::Format::Unknown)
    405         {
    406           ERROR_LOG("Invalid pixel format {} in layer, using BGRA8.", static_cast<u32>(layer_fmt));
    407           [m_layer setPixelFormat:MTLPixelFormatBGRA8Unorm];
    408           m_window_info.surface_format = GPUTexture::Format::BGRA8;
    409         }
    410 
    411         VERBOSE_LOG("Metal layer pixel format is {}.", GPUTexture::GetFormatName(m_window_info.surface_format));
    412 
    413         NSView* view = GetWindowView();
    414         [view setWantsLayer:TRUE];
    415         [view setLayer:m_layer];
    416       }
    417     });
    418 
    419     // Metal does not support mailbox mode.
    420     m_vsync_mode = (m_vsync_mode == GPUVSyncMode::Mailbox) ? GPUVSyncMode::FIFO : m_vsync_mode;
    421     [m_layer setDisplaySyncEnabled:m_vsync_mode == GPUVSyncMode::FIFO];
    422 
    423     DebugAssert(m_layer_pass_desc == nil);
    424     m_layer_pass_desc = [[MTLRenderPassDescriptor renderPassDescriptor] retain];
    425     m_layer_pass_desc.renderTargetWidth = m_window_info.surface_width;
    426     m_layer_pass_desc.renderTargetHeight = m_window_info.surface_height;
    427     m_layer_pass_desc.colorAttachments[0].loadAction = MTLLoadActionClear;
    428     m_layer_pass_desc.colorAttachments[0].storeAction = MTLStoreActionStore;
    429     m_layer_pass_desc.colorAttachments[0].clearColor = MTLClearColorMake(0.0, 0.0, 0.0, 1.0);
    430     return true;
    431   }
    432 }
    433 
    434 void MetalDevice::DestroyLayer()
    435 {
    436   if (m_layer == nil)
    437     return;
    438 
    439   // Should wait for previous command buffers to finish, which might be rendering to drawables.
    440   WaitForPreviousCommandBuffers();
    441 
    442   [m_layer_pass_desc release];
    443   m_layer_pass_desc = nil;
    444   m_window_info.surface_format = GPUTexture::Format::Unknown;
    445 
    446   RunOnMainThread([this]() {
    447     NSView* view = GetWindowView();
    448     [view setLayer:nil];
    449     [view setWantsLayer:FALSE];
    450     [m_layer release];
    451     m_layer = nullptr;
    452   });
    453 }
    454 
    455 void MetalDevice::RenderBlankFrame()
    456 {
    457   DebugAssert(!InRenderPass());
    458   if (m_layer == nil)
    459     return;
    460 
    461   @autoreleasepool
    462   {
    463     id<MTLDrawable> drawable = [[m_layer nextDrawable] retain];
    464     m_layer_pass_desc.colorAttachments[0].texture = [drawable texture];
    465     id<MTLRenderCommandEncoder> encoder = [m_render_cmdbuf renderCommandEncoderWithDescriptor:m_layer_pass_desc];
    466     [encoder endEncoding];
    467     [m_render_cmdbuf presentDrawable:drawable];
    468     DeferRelease(drawable);
    469     SubmitCommandBuffer();
    470   }
    471 }
    472 
    473 bool MetalDevice::UpdateWindow()
    474 {
    475   if (InRenderPass())
    476     EndRenderPass();
    477   DestroyLayer();
    478 
    479   if (!AcquireWindow(false))
    480     return false;
    481 
    482   if (m_window_info.type != WindowInfo::Type::Surfaceless && !CreateLayer())
    483   {
    484     ERROR_LOG("Failed to create layer on updated window");
    485     return false;
    486   }
    487 
    488   return true;
    489 }
    490 
    491 void MetalDevice::DestroySurface()
    492 {
    493   DestroyLayer();
    494 }
    495 
    496 void MetalDevice::ResizeWindow(s32 new_window_width, s32 new_window_height, float new_window_scale)
    497 {
    498   @autoreleasepool
    499   {
    500     m_window_info.surface_scale = new_window_scale;
    501     if (static_cast<u32>(new_window_width) == m_window_info.surface_width &&
    502         static_cast<u32>(new_window_height) == m_window_info.surface_height)
    503     {
    504       return;
    505     }
    506 
    507     m_window_info.surface_width = new_window_width;
    508     m_window_info.surface_height = new_window_height;
    509 
    510     [m_layer setDrawableSize:CGSizeMake(new_window_width, new_window_height)];
    511     m_layer_pass_desc.renderTargetWidth = m_window_info.surface_width;
    512     m_layer_pass_desc.renderTargetHeight = m_window_info.surface_height;
    513   }
    514 }
    515 
    516 std::string MetalDevice::GetDriverInfo() const
    517 {
    518   @autoreleasepool
    519   {
    520     return ([[m_device description] UTF8String]);
    521   }
    522 }
    523 
    524 bool MetalDevice::CreateBuffers()
    525 {
    526   if (!m_vertex_buffer.Create(m_device, VERTEX_BUFFER_SIZE) || !m_index_buffer.Create(m_device, INDEX_BUFFER_SIZE) ||
    527       !m_uniform_buffer.Create(m_device, UNIFORM_BUFFER_SIZE) ||
    528       !m_texture_upload_buffer.Create(m_device, TEXTURE_STREAM_BUFFER_SIZE))
    529   {
    530     ERROR_LOG("Failed to create vertex/index/uniform buffers.");
    531     return false;
    532   }
    533 
    534   return true;
    535 }
    536 
    537 void MetalDevice::DestroyBuffers()
    538 {
    539   m_texture_upload_buffer.Destroy();
    540   m_uniform_buffer.Destroy();
    541   m_vertex_buffer.Destroy();
    542   m_index_buffer.Destroy();
    543 }
    544 
    545 bool MetalDevice::IsRenderTargetBound(const GPUTexture* tex) const
    546 {
    547   for (u32 i = 0; i < m_num_current_render_targets; i++)
    548   {
    549     if (m_current_render_targets[i] == tex)
    550       return true;
    551   }
    552 
    553   return false;
    554 }
    555 
    556 bool MetalDevice::SetGPUTimingEnabled(bool enabled)
    557 {
    558   if (m_gpu_timing_enabled == enabled)
    559     return true;
    560 
    561   std::unique_lock lock(m_fence_mutex);
    562   m_gpu_timing_enabled = enabled;
    563   m_accumulated_gpu_time = 0.0;
    564   m_last_gpu_time_end = 0.0;
    565   return true;
    566 }
    567 
    568 float MetalDevice::GetAndResetAccumulatedGPUTime()
    569 {
    570   std::unique_lock lock(m_fence_mutex);
    571   return std::exchange(m_accumulated_gpu_time, 0.0) * 1000.0;
    572 }
    573 
    574 MetalShader::MetalShader(GPUShaderStage stage, id<MTLLibrary> library, id<MTLFunction> function)
    575   : GPUShader(stage), m_library(library), m_function(function)
    576 {
    577 }
    578 
    579 MetalShader::~MetalShader()
    580 {
    581   MetalDevice::DeferRelease(m_function);
    582   MetalDevice::DeferRelease(m_library);
    583 }
    584 
    585 void MetalShader::SetDebugName(std::string_view name)
    586 {
    587   @autoreleasepool
    588   {
    589     [m_function setLabel:StringViewToNSString(name)];
    590   }
    591 }
    592 
    593 // TODO: Clean this up, somehow..
    594 namespace EmuFolders {
    595 extern std::string DataRoot;
    596 }
    597 static void DumpShader(u32 n, std::string_view suffix, std::string_view data)
    598 {
    599   if (data.empty())
    600     return;
    601 
    602   auto fp = FileSystem::OpenManagedCFile(
    603     Path::Combine(EmuFolders::DataRoot, fmt::format("shader{}_{}.txt", suffix, n)).c_str(), "wb");
    604   if (!fp)
    605     return;
    606 
    607   std::fwrite(data.data(), data.length(), 1, fp.get());
    608 }
    609 
    610 std::unique_ptr<GPUShader> MetalDevice::CreateShaderFromMSL(GPUShaderStage stage, std::string_view source,
    611                                                             std::string_view entry_point, Error* error)
    612 {
    613   @autoreleasepool
    614   {
    615     NSString* const ns_source = StringViewToNSString(source);
    616     NSError* nserror = nullptr;
    617     id<MTLLibrary> library = [m_device newLibraryWithSource:ns_source options:nil error:&nserror];
    618     if (!library)
    619     {
    620       LogNSError(nserror, TinyString::from_format("Failed to compile {} shader", GPUShader::GetStageName(stage)));
    621 
    622       const char* utf_error = [nserror.description UTF8String];
    623       DumpBadShader(source, fmt::format("Error {}: {}", static_cast<u32>(nserror.code), utf_error ? utf_error : ""));
    624       Error::SetStringFmt(error, "Failed to compile {} shader: Error {}: {}", GPUShader::GetStageName(stage),
    625                           static_cast<u32>(nserror.code), utf_error ? utf_error : "");
    626       return {};
    627     }
    628 
    629     id<MTLFunction> function = [library newFunctionWithName:StringViewToNSString(entry_point)];
    630     if (!function)
    631     {
    632       ERROR_LOG("Failed to get main function in compiled library");
    633       Error::SetStringView(error, "Failed to get main function in compiled library");
    634       return {};
    635     }
    636 
    637     return std::unique_ptr<MetalShader>(new MetalShader(stage, [library retain], [function retain]));
    638   }
    639 }
    640 
    641 std::unique_ptr<GPUShader> MetalDevice::CreateShaderFromBinary(GPUShaderStage stage, std::span<const u8> data,
    642                                                                Error* error)
    643 {
    644   const std::string_view str_data(reinterpret_cast<const char*>(data.data()), data.size());
    645   return CreateShaderFromMSL(stage, str_data, "main0", error);
    646 }
    647 
    648 std::unique_ptr<GPUShader> MetalDevice::CreateShaderFromSource(GPUShaderStage stage, GPUShaderLanguage language,
    649                                                                std::string_view source, const char* entry_point,
    650                                                                DynamicHeapArray<u8>* out_binary, Error* error)
    651 {
    652   static constexpr bool dump_shaders = false;
    653 
    654   DynamicHeapArray<u8> spv;
    655   if (!CompileGLSLShaderToVulkanSpv(stage, language, source, entry_point, !m_debug_device, false, &spv, error))
    656     return {};
    657 
    658   std::string msl;
    659   if (!TranslateVulkanSpvToLanguage(spv.cspan(), stage, GPUShaderLanguage::MSL, 230, &msl, error))
    660     return {};
    661 
    662   if constexpr (dump_shaders)
    663   {
    664     static unsigned s_next_id = 0;
    665     ++s_next_id;
    666     DumpShader(s_next_id, "_input", source);
    667     DumpShader(s_next_id, "_msl", msl);
    668   }
    669 
    670   if (out_binary)
    671   {
    672     out_binary->resize(msl.size());
    673     std::memcpy(out_binary->data(), msl.data(), msl.size());
    674   }
    675 
    676   return CreateShaderFromMSL(stage, msl, "main0", error);
    677 }
    678 
    679 MetalPipeline::MetalPipeline(id<MTLRenderPipelineState> pipeline, id<MTLDepthStencilState> depth, MTLCullMode cull_mode,
    680                              MTLPrimitiveType primitive)
    681   : m_pipeline(pipeline), m_depth(depth), m_cull_mode(cull_mode), m_primitive(primitive)
    682 {
    683 }
    684 
    685 MetalPipeline::~MetalPipeline()
    686 {
    687   MetalDevice::DeferRelease(m_pipeline);
    688 }
    689 
    690 void MetalPipeline::SetDebugName(std::string_view name)
    691 {
    692   // readonly property :/
    693 }
    694 
    695 id<MTLDepthStencilState> MetalDevice::GetDepthState(const GPUPipeline::DepthState& ds)
    696 {
    697   const auto it = m_depth_states.find(ds.key);
    698   if (it != m_depth_states.end())
    699     return it->second;
    700 
    701   @autoreleasepool
    702   {
    703     static constexpr std::array<MTLCompareFunction, static_cast<u32>(GPUPipeline::DepthFunc::MaxCount)> func_mapping = {
    704       {
    705         MTLCompareFunctionNever,        // Never
    706         MTLCompareFunctionAlways,       // Always
    707         MTLCompareFunctionLess,         // Less
    708         MTLCompareFunctionLessEqual,    // LessEqual
    709         MTLCompareFunctionGreater,      // Greater
    710         MTLCompareFunctionGreaterEqual, // GreaterEqual
    711         MTLCompareFunctionEqual,        // Equal
    712       }};
    713 
    714     MTLDepthStencilDescriptor* desc = [[MTLDepthStencilDescriptor new] autorelease];
    715     desc.depthCompareFunction = func_mapping[static_cast<u8>(ds.depth_test.GetValue())];
    716     desc.depthWriteEnabled = ds.depth_write ? TRUE : FALSE;
    717 
    718     id<MTLDepthStencilState> state = [m_device newDepthStencilStateWithDescriptor:desc];
    719     m_depth_states.emplace(ds.key, state);
    720     if (state == nil) [[unlikely]]
    721       ERROR_LOG("Failed to create depth-stencil state.");
    722 
    723     return state;
    724   }
    725 }
    726 
    727 std::unique_ptr<GPUPipeline> MetalDevice::CreatePipeline(const GPUPipeline::GraphicsConfig& config, Error* error)
    728 {
    729   @autoreleasepool
    730   {
    731     static constexpr std::array<MTLPrimitiveTopologyClass, static_cast<u32>(GPUPipeline::Primitive::MaxCount)>
    732       primitive_classes = {{
    733         MTLPrimitiveTopologyClassPoint,    // Points
    734         MTLPrimitiveTopologyClassLine,     // Lines
    735         MTLPrimitiveTopologyClassTriangle, // Triangles
    736         MTLPrimitiveTopologyClassTriangle, // TriangleStrips
    737       }};
    738     static constexpr std::array<MTLPrimitiveType, static_cast<u32>(GPUPipeline::Primitive::MaxCount)> primitives = {{
    739       MTLPrimitiveTypePoint,         // Points
    740       MTLPrimitiveTypeLine,          // Lines
    741       MTLPrimitiveTypeTriangle,      // Triangles
    742       MTLPrimitiveTypeTriangleStrip, // TriangleStrips
    743     }};
    744 
    745     static constexpr u32 MAX_COMPONENTS = 4;
    746     static constexpr const MTLVertexFormat
    747       format_mapping[static_cast<u8>(GPUPipeline::VertexAttribute::Type::MaxCount)][MAX_COMPONENTS] = {
    748         {MTLVertexFormatFloat, MTLVertexFormatFloat2, MTLVertexFormatFloat3, MTLVertexFormatFloat4}, // Float
    749         {MTLVertexFormatUChar, MTLVertexFormatUChar2, MTLVertexFormatUChar3, MTLVertexFormatUChar4}, // UInt8
    750         {MTLVertexFormatChar, MTLVertexFormatChar2, MTLVertexFormatChar3, MTLVertexFormatChar4},     // SInt8
    751         {MTLVertexFormatUCharNormalized, MTLVertexFormatUChar2Normalized, MTLVertexFormatUChar3Normalized,
    752          MTLVertexFormatUChar4Normalized},                                                               // UNorm8
    753         {MTLVertexFormatUShort, MTLVertexFormatUShort2, MTLVertexFormatUShort3, MTLVertexFormatUShort4}, // UInt16
    754         {MTLVertexFormatShort, MTLVertexFormatShort2, MTLVertexFormatShort3, MTLVertexFormatShort4},     // SInt16
    755         {MTLVertexFormatUShortNormalized, MTLVertexFormatUShort2Normalized, MTLVertexFormatUShort3Normalized,
    756          MTLVertexFormatUShort4Normalized},                                                      // UNorm16
    757         {MTLVertexFormatUInt, MTLVertexFormatUInt2, MTLVertexFormatUInt3, MTLVertexFormatUInt4}, // UInt32
    758         {MTLVertexFormatInt, MTLVertexFormatInt2, MTLVertexFormatInt3, MTLVertexFormatInt4},     // SInt32
    759       };
    760 
    761     static constexpr std::array<MTLCullMode, static_cast<u32>(GPUPipeline::CullMode::MaxCount)> cull_mapping = {{
    762       MTLCullModeNone,  // None
    763       MTLCullModeFront, // Front
    764       MTLCullModeBack,  // Back
    765     }};
    766 
    767     static constexpr std::array<MTLBlendFactor, static_cast<u32>(GPUPipeline::BlendFunc::MaxCount)> blend_mapping = {{
    768       MTLBlendFactorZero,                     // Zero
    769       MTLBlendFactorOne,                      // One
    770       MTLBlendFactorSourceColor,              // SrcColor
    771       MTLBlendFactorOneMinusSourceColor,      // InvSrcColor
    772       MTLBlendFactorDestinationColor,         // DstColor
    773       MTLBlendFactorOneMinusDestinationColor, // InvDstColor
    774       MTLBlendFactorSourceAlpha,              // SrcAlpha
    775       MTLBlendFactorOneMinusSourceAlpha,      // InvSrcAlpha
    776       MTLBlendFactorSource1Alpha,             // SrcAlpha1
    777       MTLBlendFactorOneMinusSource1Alpha,     // InvSrcAlpha1
    778       MTLBlendFactorDestinationAlpha,         // DstAlpha
    779       MTLBlendFactorOneMinusDestinationAlpha, // InvDstAlpha
    780       MTLBlendFactorBlendColor,               // ConstantAlpha
    781       MTLBlendFactorOneMinusBlendColor,       // InvConstantAlpha
    782     }};
    783 
    784     static constexpr std::array<MTLBlendOperation, static_cast<u32>(GPUPipeline::BlendOp::MaxCount)> op_mapping = {{
    785       MTLBlendOperationAdd,             // Add
    786       MTLBlendOperationSubtract,        // Subtract
    787       MTLBlendOperationReverseSubtract, // ReverseSubtract
    788       MTLBlendOperationMin,             // Min
    789       MTLBlendOperationMax,             // Max
    790     }};
    791 
    792     MTLRenderPipelineDescriptor* desc = [[MTLRenderPipelineDescriptor new] autorelease];
    793     desc.vertexFunction = static_cast<const MetalShader*>(config.vertex_shader)->GetFunction();
    794     desc.fragmentFunction = static_cast<const MetalShader*>(config.fragment_shader)->GetFunction();
    795 
    796     for (u32 i = 0; i < MAX_RENDER_TARGETS; i++)
    797     {
    798       if (config.color_formats[i] == GPUTexture::Format::Unknown)
    799         break;
    800 
    801       MTLRenderPipelineColorAttachmentDescriptor* ca = desc.colorAttachments[0];
    802       ca.pixelFormat = s_pixel_format_mapping[static_cast<u8>(config.color_formats[i])];
    803       ca.writeMask = (config.blend.write_r ? MTLColorWriteMaskRed : MTLColorWriteMaskNone) |
    804                      (config.blend.write_g ? MTLColorWriteMaskGreen : MTLColorWriteMaskNone) |
    805                      (config.blend.write_b ? MTLColorWriteMaskBlue : MTLColorWriteMaskNone) |
    806                      (config.blend.write_a ? MTLColorWriteMaskAlpha : MTLColorWriteMaskNone);
    807       ca.blendingEnabled = config.blend.enable;
    808       if (config.blend.enable)
    809       {
    810         ca.sourceRGBBlendFactor = blend_mapping[static_cast<u8>(config.blend.src_blend.GetValue())];
    811         ca.destinationRGBBlendFactor = blend_mapping[static_cast<u8>(config.blend.dst_blend.GetValue())];
    812         ca.rgbBlendOperation = op_mapping[static_cast<u8>(config.blend.blend_op.GetValue())];
    813         ca.sourceAlphaBlendFactor = blend_mapping[static_cast<u8>(config.blend.src_alpha_blend.GetValue())];
    814         ca.destinationAlphaBlendFactor = blend_mapping[static_cast<u8>(config.blend.dst_alpha_blend.GetValue())];
    815         ca.alphaBlendOperation = op_mapping[static_cast<u8>(config.blend.alpha_blend_op.GetValue())];
    816       }
    817     }
    818     desc.depthAttachmentPixelFormat = s_pixel_format_mapping[static_cast<u8>(config.depth_format)];
    819 
    820     // Input assembly.
    821     MTLVertexDescriptor* vdesc = nil;
    822     if (!config.input_layout.vertex_attributes.empty())
    823     {
    824       vdesc = [MTLVertexDescriptor vertexDescriptor];
    825       for (u32 i = 0; i < static_cast<u32>(config.input_layout.vertex_attributes.size()); i++)
    826       {
    827         const GPUPipeline::VertexAttribute& va = config.input_layout.vertex_attributes[i];
    828         DebugAssert(va.components > 0 && va.components <= MAX_COMPONENTS);
    829 
    830         MTLVertexAttributeDescriptor* vd = vdesc.attributes[i];
    831         vd.format = format_mapping[static_cast<u8>(va.type.GetValue())][va.components - 1];
    832         vd.offset = static_cast<NSUInteger>(va.offset.GetValue());
    833         vd.bufferIndex = 1;
    834       }
    835 
    836       vdesc.layouts[1].stepFunction = MTLVertexStepFunctionPerVertex;
    837       vdesc.layouts[1].stepRate = 1;
    838       vdesc.layouts[1].stride = config.input_layout.vertex_stride;
    839 
    840       desc.vertexDescriptor = vdesc;
    841     }
    842 
    843     // Rasterization state.
    844     const MTLCullMode cull_mode = cull_mapping[static_cast<u8>(config.rasterization.cull_mode.GetValue())];
    845     desc.rasterizationEnabled = TRUE;
    846     desc.inputPrimitiveTopology = primitive_classes[static_cast<u8>(config.primitive)];
    847 
    848     // Depth state
    849     id<MTLDepthStencilState> depth = GetDepthState(config.depth);
    850     if (depth == nil)
    851       return {};
    852 
    853     // General
    854     const MTLPrimitiveType primitive = primitives[static_cast<u8>(config.primitive)];
    855     desc.rasterSampleCount = config.samples;
    856 
    857     // Metal-specific stuff
    858     desc.vertexBuffers[0].mutability = MTLMutabilityImmutable;
    859     desc.fragmentBuffers[0].mutability = MTLMutabilityImmutable;
    860     if (!config.input_layout.vertex_attributes.empty())
    861       desc.vertexBuffers[1].mutability = MTLMutabilityImmutable;
    862     if (config.layout == GPUPipeline::Layout::SingleTextureBufferAndPushConstants)
    863       desc.fragmentBuffers[1].mutability = MTLMutabilityImmutable;
    864 
    865     NSError* nserror = nullptr;
    866     id<MTLRenderPipelineState> pipeline = [m_device newRenderPipelineStateWithDescriptor:desc error:&nserror];
    867     if (pipeline == nil)
    868     {
    869       LogNSError(nserror, "Failed to create render pipeline state");
    870       NSErrorToErrorObject(error, "newRenderPipelineStateWithDescriptor failed: ", nserror);
    871       return {};
    872     }
    873 
    874     return std::unique_ptr<GPUPipeline>(new MetalPipeline(pipeline, depth, cull_mode, primitive));
    875   }
    876 }
    877 
    878 MetalTexture::MetalTexture(id<MTLTexture> texture, u16 width, u16 height, u8 layers, u8 levels, u8 samples, Type type,
    879                            Format format)
    880   : GPUTexture(width, height, layers, levels, samples, type, format), m_texture(texture)
    881 {
    882 }
    883 
    884 MetalTexture::~MetalTexture()
    885 {
    886   if (m_texture != nil)
    887   {
    888     MetalDevice::GetInstance().UnbindTexture(this);
    889     MetalDevice::DeferRelease(m_texture);
    890   }
    891 }
    892 
    893 bool MetalTexture::Update(u32 x, u32 y, u32 width, u32 height, const void* data, u32 pitch, u32 layer /*= 0*/,
    894                           u32 level /*= 0*/)
    895 {
    896   const u32 aligned_pitch = Common::AlignUpPow2(width * GetPixelSize(), TEXTURE_UPLOAD_PITCH_ALIGNMENT);
    897   const u32 req_size = height * aligned_pitch;
    898 
    899   GPUDevice::GetStatistics().buffer_streamed += req_size;
    900   GPUDevice::GetStatistics().num_uploads++;
    901 
    902   MetalDevice& dev = MetalDevice::GetInstance();
    903   MetalStreamBuffer& sb = dev.GetTextureStreamBuffer();
    904   id<MTLBuffer> actual_buffer;
    905   u32 actual_offset;
    906   u32 actual_pitch;
    907   if (req_size >= (sb.GetCurrentSize() / 2u))
    908   {
    909     const u32 upload_size = height * pitch;
    910     const MTLResourceOptions options = MTLResourceStorageModeShared;
    911     actual_buffer = [dev.GetMTLDevice() newBufferWithBytes:data length:upload_size options:options];
    912     actual_offset = 0;
    913     actual_pitch = pitch;
    914     if (actual_buffer == nil)
    915     {
    916       Panic("Failed to allocate temporary buffer.");
    917       return false;
    918     }
    919 
    920     dev.DeferRelease(actual_buffer);
    921   }
    922   else
    923   {
    924     if (!sb.ReserveMemory(req_size, TEXTURE_UPLOAD_ALIGNMENT))
    925     {
    926       dev.SubmitCommandBuffer();
    927       if (!sb.ReserveMemory(req_size, TEXTURE_UPLOAD_ALIGNMENT))
    928       {
    929         Panic("Failed to reserve texture upload space.");
    930         return false;
    931       }
    932     }
    933 
    934     actual_offset = sb.GetCurrentOffset();
    935     StringUtil::StrideMemCpy(sb.GetCurrentHostPointer(), aligned_pitch, data, pitch, width * GetPixelSize(), height);
    936     sb.CommitMemory(req_size);
    937     actual_buffer = sb.GetBuffer();
    938     actual_pitch = aligned_pitch;
    939   }
    940 
    941   if (m_state == GPUTexture::State::Cleared && (x != 0 || y != 0 || width != m_width || height != m_height))
    942     dev.CommitClear(this);
    943 
    944   const bool is_inline = (m_use_fence_counter == dev.GetCurrentFenceCounter());
    945 
    946   id<MTLBlitCommandEncoder> encoder = dev.GetBlitEncoder(is_inline);
    947   [encoder copyFromBuffer:actual_buffer
    948              sourceOffset:actual_offset
    949         sourceBytesPerRow:actual_pitch
    950       sourceBytesPerImage:0
    951                sourceSize:MTLSizeMake(width, height, 1)
    952                 toTexture:m_texture
    953          destinationSlice:layer
    954          destinationLevel:level
    955         destinationOrigin:MTLOriginMake(x, y, 0)];
    956   m_state = GPUTexture::State::Dirty;
    957   return true;
    958 }
    959 
    960 bool MetalTexture::Map(void** map, u32* map_stride, u32 x, u32 y, u32 width, u32 height, u32 layer /*= 0*/,
    961                        u32 level /*= 0*/)
    962 {
    963   if ((x + width) > GetMipWidth(level) || (y + height) > GetMipHeight(level) || layer > m_layers || level > m_levels)
    964     return false;
    965 
    966   const u32 aligned_pitch = Common::AlignUpPow2(width * GetPixelSize(), TEXTURE_UPLOAD_PITCH_ALIGNMENT);
    967   const u32 req_size = height * aligned_pitch;
    968 
    969   MetalDevice& dev = MetalDevice::GetInstance();
    970   if (m_state == GPUTexture::State::Cleared && (x != 0 || y != 0 || width != m_width || height != m_height))
    971     dev.CommitClear(this);
    972 
    973   MetalStreamBuffer& sb = dev.GetTextureStreamBuffer();
    974   if (!sb.ReserveMemory(req_size, TEXTURE_UPLOAD_ALIGNMENT))
    975   {
    976     dev.SubmitCommandBuffer();
    977     if (!sb.ReserveMemory(req_size, TEXTURE_UPLOAD_ALIGNMENT))
    978     {
    979       Panic("Failed to allocate space in texture upload buffer");
    980       return false;
    981     }
    982   }
    983 
    984   *map = sb.GetCurrentHostPointer();
    985   *map_stride = aligned_pitch;
    986   m_map_x = x;
    987   m_map_y = y;
    988   m_map_width = width;
    989   m_map_height = height;
    990   m_map_layer = layer;
    991   m_map_level = level;
    992   m_state = GPUTexture::State::Dirty;
    993   return true;
    994 }
    995 
    996 void MetalTexture::Unmap()
    997 {
    998   const u32 aligned_pitch = Common::AlignUpPow2(m_map_width * GetPixelSize(), TEXTURE_UPLOAD_PITCH_ALIGNMENT);
    999   const u32 req_size = m_map_height * aligned_pitch;
   1000 
   1001   GPUDevice::GetStatistics().buffer_streamed += req_size;
   1002   GPUDevice::GetStatistics().num_uploads++;
   1003 
   1004   MetalDevice& dev = MetalDevice::GetInstance();
   1005   MetalStreamBuffer& sb = dev.GetTextureStreamBuffer();
   1006   const u32 offset = sb.GetCurrentOffset();
   1007   sb.CommitMemory(req_size);
   1008 
   1009   // TODO: track this
   1010   const bool is_inline = true;
   1011   id<MTLBlitCommandEncoder> encoder = dev.GetBlitEncoder(is_inline);
   1012   [encoder copyFromBuffer:sb.GetBuffer()
   1013              sourceOffset:offset
   1014         sourceBytesPerRow:aligned_pitch
   1015       sourceBytesPerImage:0
   1016                sourceSize:MTLSizeMake(m_map_width, m_map_height, 1)
   1017                 toTexture:m_texture
   1018          destinationSlice:m_map_layer
   1019          destinationLevel:m_map_level
   1020         destinationOrigin:MTLOriginMake(m_map_x, m_map_y, 0)];
   1021 
   1022   m_map_x = 0;
   1023   m_map_y = 0;
   1024   m_map_width = 0;
   1025   m_map_height = 0;
   1026   m_map_layer = 0;
   1027   m_map_level = 0;
   1028 }
   1029 
   1030 void MetalTexture::MakeReadyForSampling()
   1031 {
   1032   MetalDevice& dev = MetalDevice::GetInstance();
   1033   if (!dev.InRenderPass())
   1034     return;
   1035 
   1036   if (IsRenderTarget() ? dev.IsRenderTargetBound(this) : (dev.m_current_depth_target == this))
   1037     dev.EndRenderPass();
   1038 }
   1039 
   1040 void MetalTexture::SetDebugName(std::string_view name)
   1041 {
   1042   @autoreleasepool
   1043   {
   1044     [m_texture setLabel:StringViewToNSString(name)];
   1045   }
   1046 }
   1047 
   1048 std::unique_ptr<GPUTexture> MetalDevice::CreateTexture(u32 width, u32 height, u32 layers, u32 levels, u32 samples,
   1049                                                        GPUTexture::Type type, GPUTexture::Format format,
   1050                                                        const void* data, u32 data_stride)
   1051 {
   1052   if (!GPUTexture::ValidateConfig(width, height, layers, layers, samples, type, format))
   1053     return {};
   1054 
   1055   const MTLPixelFormat pixel_format = s_pixel_format_mapping[static_cast<u8>(format)];
   1056   if (pixel_format == MTLPixelFormatInvalid)
   1057     return {};
   1058 
   1059   @autoreleasepool
   1060   {
   1061     MTLTextureDescriptor* desc = [MTLTextureDescriptor texture2DDescriptorWithPixelFormat:pixel_format
   1062                                                                                     width:width
   1063                                                                                    height:height
   1064                                                                                 mipmapped:(levels > 1)];
   1065 
   1066     desc.mipmapLevelCount = levels;
   1067     desc.storageMode = MTLStorageModePrivate;
   1068     if (samples > 1)
   1069     {
   1070       desc.textureType = (layers > 1) ? MTLTextureType2DMultisampleArray : MTLTextureType2DMultisample;
   1071       desc.sampleCount = samples;
   1072     }
   1073     else if (layers > 1)
   1074     {
   1075       desc.textureType = MTLTextureType2DArray;
   1076       desc.arrayLength = layers;
   1077     }
   1078 
   1079     switch (type)
   1080     {
   1081       case GPUTexture::Type::Texture:
   1082       case GPUTexture::Type::DynamicTexture:
   1083         desc.usage = MTLTextureUsageShaderRead;
   1084         break;
   1085 
   1086       case GPUTexture::Type::RenderTarget:
   1087       case GPUTexture::Type::DepthStencil:
   1088         desc.usage = MTLTextureUsageShaderRead | MTLTextureUsageRenderTarget;
   1089         break;
   1090 
   1091       case GPUTexture::Type::RWTexture:
   1092         desc.usage = MTLTextureUsageShaderRead | MTLTextureUsageShaderWrite;
   1093         break;
   1094 
   1095       default:
   1096         UnreachableCode();
   1097         break;
   1098     }
   1099 
   1100     id<MTLTexture> tex = [m_device newTextureWithDescriptor:desc];
   1101     if (tex == nil)
   1102     {
   1103       ERROR_LOG("Failed to create {}x{} texture.", width, height);
   1104       return {};
   1105     }
   1106 
   1107     // This one can *definitely* go on the upload buffer.
   1108     std::unique_ptr<GPUTexture> gtex(
   1109       new MetalTexture([tex retain], width, height, layers, levels, samples, type, format));
   1110     if (data)
   1111     {
   1112       // TODO: handle multi-level uploads...
   1113       gtex->Update(0, 0, width, height, data, data_stride, 0, 0);
   1114     }
   1115 
   1116     return gtex;
   1117   }
   1118 }
   1119 
   1120 MetalDownloadTexture::MetalDownloadTexture(u32 width, u32 height, GPUTexture::Format format, u8* import_buffer,
   1121                                            size_t buffer_offset, id<MTLBuffer> buffer, const u8* map_ptr, u32 map_pitch)
   1122   : GPUDownloadTexture(width, height, format, (import_buffer != nullptr)), m_buffer_offset(buffer_offset),
   1123     m_buffer(buffer)
   1124 {
   1125   m_map_pointer = map_ptr;
   1126   m_current_pitch = map_pitch;
   1127 }
   1128 
   1129 MetalDownloadTexture::~MetalDownloadTexture()
   1130 {
   1131   [m_buffer release];
   1132 }
   1133 
   1134 std::unique_ptr<MetalDownloadTexture> MetalDownloadTexture::Create(u32 width, u32 height, GPUTexture::Format format,
   1135                                                                    void* memory, size_t memory_size, u32 memory_stride)
   1136 {
   1137   @autoreleasepool
   1138   {
   1139     MetalDevice& dev = MetalDevice::GetInstance();
   1140     id<MTLBuffer> buffer = nil;
   1141     size_t memory_offset = 0;
   1142     const u8* map_ptr = nullptr;
   1143     u32 map_pitch = 0;
   1144     u32 buffer_size = 0;
   1145 
   1146     constexpr MTLResourceOptions options = MTLResourceStorageModeShared | MTLResourceCPUCacheModeDefaultCache;
   1147 
   1148     // not importing memory?
   1149     if (!memory)
   1150     {
   1151       map_pitch = Common::AlignUpPow2(GPUTexture::CalcUploadPitch(format, width), TEXTURE_UPLOAD_PITCH_ALIGNMENT);
   1152       buffer_size = height * map_pitch;
   1153       buffer = [[dev.m_device newBufferWithLength:buffer_size options:options] retain];
   1154       if (buffer == nil)
   1155       {
   1156         ERROR_LOG("Failed to create {} byte buffer", buffer_size);
   1157         return {};
   1158       }
   1159 
   1160       map_ptr = static_cast<u8*>([buffer contents]);
   1161     }
   1162     else
   1163     {
   1164       map_pitch = memory_stride;
   1165       buffer_size = height * map_pitch;
   1166       Assert(buffer_size <= memory_size);
   1167 
   1168       // Importing memory, we need to page align the buffer.
   1169       void* page_aligned_memory =
   1170         reinterpret_cast<void*>(Common::AlignDownPow2(reinterpret_cast<uintptr_t>(memory), HOST_PAGE_SIZE));
   1171       const size_t page_offset = static_cast<size_t>(static_cast<u8*>(memory) - static_cast<u8*>(page_aligned_memory));
   1172       const size_t page_aligned_size = Common::AlignUpPow2(page_offset + memory_size, HOST_PAGE_SIZE);
   1173       DEV_LOG("Trying to import {} bytes of memory at {} for download texture", page_aligned_memory, page_aligned_size);
   1174 
   1175       buffer = [[dev.m_device newBufferWithBytesNoCopy:page_aligned_memory
   1176                                                 length:page_aligned_size
   1177                                                options:options
   1178                                            deallocator:nil] retain];
   1179       if (buffer == nil)
   1180       {
   1181         ERROR_LOG("Failed to import {} byte buffer", page_aligned_size);
   1182         return {};
   1183       }
   1184 
   1185       map_ptr = static_cast<u8*>(memory);
   1186     }
   1187 
   1188     return std::unique_ptr<MetalDownloadTexture>(new MetalDownloadTexture(
   1189       width, height, format, static_cast<u8*>(memory), memory_offset, buffer, map_ptr, map_pitch));
   1190   }
   1191 }
   1192 
   1193 void MetalDownloadTexture::CopyFromTexture(u32 dst_x, u32 dst_y, GPUTexture* src, u32 src_x, u32 src_y, u32 width,
   1194                                            u32 height, u32 src_layer, u32 src_level, bool use_transfer_pitch)
   1195 {
   1196   MetalTexture* const mtlTex = static_cast<MetalTexture*>(src);
   1197   MetalDevice& dev = MetalDevice::GetInstance();
   1198 
   1199   DebugAssert(mtlTex->GetFormat() == m_format);
   1200   DebugAssert(src_level < mtlTex->GetLevels());
   1201   DebugAssert((src_x + width) <= mtlTex->GetMipWidth(src_level) && (src_y + height) <= mtlTex->GetMipHeight(src_level));
   1202   DebugAssert((dst_x + width) <= m_width && (dst_y + height) <= m_height);
   1203   DebugAssert((dst_x == 0 && dst_y == 0) || !use_transfer_pitch);
   1204   DebugAssert(!m_is_imported || !use_transfer_pitch);
   1205 
   1206   u32 copy_offset, copy_size, copy_rows;
   1207   if (!m_is_imported)
   1208     m_current_pitch = GetTransferPitch(use_transfer_pitch ? width : m_width, TEXTURE_UPLOAD_PITCH_ALIGNMENT);
   1209   GetTransferSize(dst_x, dst_y, width, height, m_current_pitch, &copy_offset, &copy_size, &copy_rows);
   1210 
   1211   dev.GetStatistics().num_downloads++;
   1212 
   1213   dev.CommitClear(mtlTex);
   1214 
   1215   id<MTLBlitCommandEncoder> encoder = dev.GetBlitEncoder(true);
   1216   [encoder copyFromTexture:mtlTex->GetMTLTexture()
   1217                  sourceSlice:src_layer
   1218                  sourceLevel:src_level
   1219                 sourceOrigin:MTLOriginMake(src_x, src_y, 0)
   1220                   sourceSize:MTLSizeMake(width, height, 1)
   1221                     toBuffer:m_buffer
   1222            destinationOffset:m_buffer_offset + copy_offset
   1223       destinationBytesPerRow:m_current_pitch
   1224     destinationBytesPerImage:0];
   1225 
   1226   m_copy_fence_counter = dev.m_current_fence_counter;
   1227   m_needs_flush = true;
   1228 }
   1229 
   1230 bool MetalDownloadTexture::Map(u32 x, u32 y, u32 width, u32 height)
   1231 {
   1232   // Always mapped.
   1233   return true;
   1234 }
   1235 
   1236 void MetalDownloadTexture::Unmap()
   1237 {
   1238   // Always mapped.
   1239 }
   1240 
   1241 void MetalDownloadTexture::Flush()
   1242 {
   1243   if (!m_needs_flush)
   1244     return;
   1245 
   1246   m_needs_flush = false;
   1247 
   1248   MetalDevice& dev = MetalDevice::GetInstance();
   1249   if (dev.m_completed_fence_counter >= m_copy_fence_counter)
   1250     return;
   1251 
   1252   // Need to execute command buffer.
   1253   if (dev.GetCurrentFenceCounter() == m_copy_fence_counter)
   1254     dev.SubmitCommandBuffer(true);
   1255   else
   1256     dev.WaitForFenceCounter(m_copy_fence_counter);
   1257 }
   1258 
   1259 void MetalDownloadTexture::SetDebugName(std::string_view name)
   1260 {
   1261   @autoreleasepool
   1262   {
   1263     [m_buffer setLabel:StringViewToNSString(name)];
   1264   }
   1265 }
   1266 
   1267 std::unique_ptr<GPUDownloadTexture> MetalDevice::CreateDownloadTexture(u32 width, u32 height, GPUTexture::Format format)
   1268 {
   1269   return MetalDownloadTexture::Create(width, height, format, nullptr, 0, 0);
   1270 }
   1271 
   1272 std::unique_ptr<GPUDownloadTexture> MetalDevice::CreateDownloadTexture(u32 width, u32 height, GPUTexture::Format format,
   1273                                                                        void* memory, size_t memory_size,
   1274                                                                        u32 memory_stride)
   1275 {
   1276   return MetalDownloadTexture::Create(width, height, format, memory, memory_size, memory_stride);
   1277 }
   1278 
   1279 MetalSampler::MetalSampler(id<MTLSamplerState> ss) : m_ss(ss)
   1280 {
   1281 }
   1282 
   1283 MetalSampler::~MetalSampler() = default;
   1284 
   1285 void MetalSampler::SetDebugName(std::string_view name)
   1286 {
   1287   // lame.. have to put it on the descriptor :/
   1288 }
   1289 
   1290 std::unique_ptr<GPUSampler> MetalDevice::CreateSampler(const GPUSampler::Config& config)
   1291 {
   1292   @autoreleasepool
   1293   {
   1294     static constexpr std::array<MTLSamplerAddressMode, static_cast<u8>(GPUSampler::AddressMode::MaxCount)> ta = {{
   1295       MTLSamplerAddressModeRepeat,             // Repeat
   1296       MTLSamplerAddressModeClampToEdge,        // ClampToEdge
   1297       MTLSamplerAddressModeClampToBorderColor, // ClampToBorder
   1298       MTLSamplerAddressModeMirrorRepeat,       // MirrorRepeat
   1299     }};
   1300     static constexpr std::array<MTLSamplerMinMagFilter, static_cast<u8>(GPUSampler::Filter::MaxCount)> min_mag_filters =
   1301       {{
   1302         MTLSamplerMinMagFilterNearest, // Nearest
   1303         MTLSamplerMinMagFilterLinear,  // Linear
   1304       }};
   1305     static constexpr std::array<MTLSamplerMipFilter, static_cast<u8>(GPUSampler::Filter::MaxCount)> mip_filters = {{
   1306       MTLSamplerMipFilterNearest, // Nearest
   1307       MTLSamplerMipFilterLinear,  // Linear
   1308     }};
   1309 
   1310     struct BorderColorMapping
   1311     {
   1312       u32 color;
   1313       MTLSamplerBorderColor mtl_color;
   1314     };
   1315     static constexpr BorderColorMapping border_color_mapping[] = {
   1316       {0x00000000u, MTLSamplerBorderColorTransparentBlack},
   1317       {0xFF000000u, MTLSamplerBorderColorOpaqueBlack},
   1318       {0xFFFFFFFFu, MTLSamplerBorderColorOpaqueWhite},
   1319     };
   1320 
   1321     MTLSamplerDescriptor* desc = [[MTLSamplerDescriptor new] autorelease];
   1322     desc.normalizedCoordinates = true;
   1323     desc.sAddressMode = ta[static_cast<u8>(config.address_u.GetValue())];
   1324     desc.tAddressMode = ta[static_cast<u8>(config.address_v.GetValue())];
   1325     desc.rAddressMode = ta[static_cast<u8>(config.address_w.GetValue())];
   1326     desc.minFilter = min_mag_filters[static_cast<u8>(config.min_filter.GetValue())];
   1327     desc.magFilter = min_mag_filters[static_cast<u8>(config.mag_filter.GetValue())];
   1328     desc.mipFilter = (config.min_lod != config.max_lod) ? mip_filters[static_cast<u8>(config.mip_filter.GetValue())] :
   1329                                                           MTLSamplerMipFilterNotMipmapped;
   1330     desc.lodMinClamp = static_cast<float>(config.min_lod);
   1331     desc.lodMaxClamp = static_cast<float>(config.max_lod);
   1332     desc.maxAnisotropy = std::max<u8>(config.anisotropy, 1);
   1333 
   1334     if (config.address_u == GPUSampler::AddressMode::ClampToBorder ||
   1335         config.address_v == GPUSampler::AddressMode::ClampToBorder ||
   1336         config.address_w == GPUSampler::AddressMode::ClampToBorder)
   1337     {
   1338       u32 i;
   1339       for (i = 0; i < static_cast<u32>(std::size(border_color_mapping)); i++)
   1340       {
   1341         if (border_color_mapping[i].color == config.border_color)
   1342           break;
   1343       }
   1344       if (i == std::size(border_color_mapping))
   1345       {
   1346         ERROR_LOG("Unsupported border color: {:08X}", config.border_color.GetValue());
   1347         return {};
   1348       }
   1349 
   1350       desc.borderColor = border_color_mapping[i].mtl_color;
   1351     }
   1352 
   1353     // TODO: Pool?
   1354     id<MTLSamplerState> ss = [m_device newSamplerStateWithDescriptor:desc];
   1355     if (ss == nil)
   1356     {
   1357       ERROR_LOG("Failed to create sampler state.");
   1358       return {};
   1359     }
   1360 
   1361     return std::unique_ptr<GPUSampler>(new MetalSampler([ss retain]));
   1362   }
   1363 }
   1364 
   1365 bool MetalDevice::SupportsTextureFormat(GPUTexture::Format format) const
   1366 {
   1367   if (format == GPUTexture::Format::RGB565 || format == GPUTexture::Format::RGBA5551)
   1368   {
   1369     // These formats require an Apple Silicon GPU.
   1370     // See https://developer.apple.com/metal/Metal-Feature-Set-Tables.pdf
   1371     if (![m_device supportsFamily:MTLGPUFamilyApple2])
   1372       return false;
   1373   }
   1374 
   1375   return (s_pixel_format_mapping[static_cast<u8>(format)] != MTLPixelFormatInvalid);
   1376 }
   1377 
   1378 void MetalDevice::CopyTextureRegion(GPUTexture* dst, u32 dst_x, u32 dst_y, u32 dst_layer, u32 dst_level,
   1379                                     GPUTexture* src, u32 src_x, u32 src_y, u32 src_layer, u32 src_level, u32 width,
   1380                                     u32 height)
   1381 {
   1382   DebugAssert(src_level < src->GetLevels() && src_layer < src->GetLayers());
   1383   DebugAssert((src_x + width) <= src->GetMipWidth(src_level));
   1384   DebugAssert((src_y + height) <= src->GetMipHeight(src_level));
   1385   DebugAssert(dst_level < dst->GetLevels() && dst_layer < dst->GetLayers());
   1386   DebugAssert((dst_x + width) <= dst->GetMipWidth(dst_level));
   1387   DebugAssert((dst_y + height) <= dst->GetMipHeight(dst_level));
   1388 
   1389   MetalTexture* D = static_cast<MetalTexture*>(dst);
   1390   MetalTexture* S = static_cast<MetalTexture*>(src);
   1391 
   1392   if (D->IsRenderTargetOrDepthStencil())
   1393   {
   1394     if (S->GetState() == GPUTexture::State::Cleared)
   1395     {
   1396       if (S->GetWidth() == D->GetWidth() && S->GetHeight() == D->GetHeight())
   1397       {
   1398         // pass clear through
   1399         D->m_state = S->m_state;
   1400         D->m_clear_value = S->m_clear_value;
   1401         return;
   1402       }
   1403     }
   1404     else if (S->GetState() == GPUTexture::State::Invalidated)
   1405     {
   1406       // Contents are undefined ;)
   1407       return;
   1408     }
   1409     else if (dst_x == 0 && dst_y == 0 && width == D->GetMipWidth(dst_level) && height == D->GetMipHeight(dst_level))
   1410     {
   1411       D->SetState(GPUTexture::State::Dirty);
   1412     }
   1413 
   1414     CommitClear(D);
   1415   }
   1416 
   1417   CommitClear(S);
   1418 
   1419   S->SetUseFenceCounter(m_current_fence_counter);
   1420   D->SetUseFenceCounter(m_current_fence_counter);
   1421 
   1422   s_stats.num_copies++;
   1423 
   1424   @autoreleasepool
   1425   {
   1426     id<MTLBlitCommandEncoder> encoder = GetBlitEncoder(true);
   1427     [encoder copyFromTexture:S->GetMTLTexture()
   1428                  sourceSlice:src_level
   1429                  sourceLevel:src_level
   1430                 sourceOrigin:MTLOriginMake(src_x, src_y, 0)
   1431                   sourceSize:MTLSizeMake(width, height, 1)
   1432                    toTexture:D->GetMTLTexture()
   1433             destinationSlice:dst_layer
   1434             destinationLevel:dst_level
   1435            destinationOrigin:MTLOriginMake(dst_x, dst_y, 0)];
   1436   }
   1437 }
   1438 
   1439 void MetalDevice::ResolveTextureRegion(GPUTexture* dst, u32 dst_x, u32 dst_y, u32 dst_layer, u32 dst_level,
   1440                                        GPUTexture* src, u32 src_x, u32 src_y, u32 width, u32 height)
   1441 {
   1442   DebugAssert((src_x + width) <= src->GetWidth());
   1443   DebugAssert((src_y + height) <= src->GetHeight());
   1444   DebugAssert(dst_level < dst->GetLevels() && dst_layer < dst->GetLayers());
   1445   DebugAssert((dst_x + width) <= dst->GetMipWidth(dst_level));
   1446   DebugAssert((dst_y + height) <= dst->GetMipHeight(dst_level));
   1447   DebugAssert(!dst->IsMultisampled() && src->IsMultisampled());
   1448 
   1449   // Only does first level for now..
   1450   DebugAssert(dst_level == 0 && dst_layer == 0);
   1451 
   1452   const GPUTexture::Format src_format = dst->GetFormat();
   1453   const GPUTexture::Format dst_format = dst->GetFormat();
   1454   id<MTLComputePipelineState> resolve_pipeline = nil;
   1455   if (auto iter = std::find_if(m_resolve_pipelines.begin(), m_resolve_pipelines.end(),
   1456                                [src_format, dst_format](const auto& it) {
   1457                                  return it.first.first == src_format && it.first.second == dst_format;
   1458                                });
   1459       iter != m_resolve_pipelines.end())
   1460   {
   1461     resolve_pipeline = iter->second;
   1462   }
   1463   else
   1464   {
   1465     // Need to compile it.
   1466     @autoreleasepool
   1467     {
   1468       const bool is_depth = GPUTexture::IsDepthFormat(src_format);
   1469       id<MTLFunction> function =
   1470         [GetFunctionFromLibrary(m_shaders, is_depth ? @"depthResolveKernel" : @"colorResolveKernel") autorelease];
   1471       if (function == nil)
   1472         Panic("Failed to get resolve kernel");
   1473 
   1474       resolve_pipeline = [CreateComputePipeline(function, is_depth ? @"Depth Resolve" : @"Color Resolve") autorelease];
   1475       if (resolve_pipeline != nil)
   1476         [resolve_pipeline retain];
   1477       m_resolve_pipelines.emplace_back(std::make_pair(src_format, dst_format), resolve_pipeline);
   1478     }
   1479   }
   1480   if (resolve_pipeline == nil)
   1481     Panic("Failed to get resolve pipeline");
   1482 
   1483   if (InRenderPass())
   1484     EndRenderPass();
   1485 
   1486   s_stats.num_copies++;
   1487 
   1488   const u32 threadgroupHeight = resolve_pipeline.maxTotalThreadsPerThreadgroup / resolve_pipeline.threadExecutionWidth;
   1489   const MTLSize intrinsicThreadgroupSize = MTLSizeMake(resolve_pipeline.threadExecutionWidth, threadgroupHeight, 1);
   1490   const MTLSize threadgroupsInGrid =
   1491     MTLSizeMake((src->GetWidth() + intrinsicThreadgroupSize.width - 1) / intrinsicThreadgroupSize.width,
   1492                 (src->GetHeight() + intrinsicThreadgroupSize.height - 1) / intrinsicThreadgroupSize.height, 1);
   1493 
   1494   id<MTLComputeCommandEncoder> computeEncoder = [m_render_cmdbuf computeCommandEncoder];
   1495   [computeEncoder setComputePipelineState:resolve_pipeline];
   1496   [computeEncoder setTexture:static_cast<MetalTexture*>(src)->GetMTLTexture() atIndex:0];
   1497   [computeEncoder setTexture:static_cast<MetalTexture*>(dst)->GetMTLTexture() atIndex:1];
   1498   [computeEncoder dispatchThreadgroups:threadgroupsInGrid threadsPerThreadgroup:intrinsicThreadgroupSize];
   1499   [computeEncoder endEncoding];
   1500 }
   1501 
   1502 void MetalDevice::ClearRenderTarget(GPUTexture* t, u32 c)
   1503 {
   1504   GPUDevice::ClearRenderTarget(t, c);
   1505   if (InRenderPass() && IsRenderTargetBound(t))
   1506     EndRenderPass();
   1507 }
   1508 
   1509 void MetalDevice::ClearDepth(GPUTexture* t, float d)
   1510 {
   1511   GPUDevice::ClearDepth(t, d);
   1512   if (InRenderPass() && m_current_depth_target == t)
   1513   {
   1514     const ClearPipelineConfig config = GetCurrentClearPipelineConfig();
   1515     id<MTLRenderPipelineState> pipeline = GetClearDepthPipeline(config);
   1516     id<MTLDepthStencilState> depth = GetDepthState(GPUPipeline::DepthState::GetAlwaysWriteState());
   1517 
   1518     const GSVector4i rect = t->GetRect();
   1519     const bool set_vp = !m_current_viewport.eq(rect);
   1520     const bool set_scissor = !m_current_scissor.eq(rect);
   1521     if (set_vp)
   1522     {
   1523       [m_render_encoder setViewport:(MTLViewport){0.0, 0.0, static_cast<double>(t->GetWidth()),
   1524                                                   static_cast<double>(t->GetHeight()), 0.0, 1.0}];
   1525     }
   1526     if (set_scissor)
   1527       [m_render_encoder setScissorRect:(MTLScissorRect){0u, 0u, t->GetWidth(), t->GetHeight()}];
   1528 
   1529     [m_render_encoder setRenderPipelineState:pipeline];
   1530     if (m_current_cull_mode != MTLCullModeNone)
   1531       [m_render_encoder setCullMode:MTLCullModeNone];
   1532     if (depth != m_current_depth_state)
   1533       [m_render_encoder setDepthStencilState:depth];
   1534     [m_render_encoder setVertexBytes:&d length:sizeof(d) atIndex:0];
   1535     [m_render_encoder drawPrimitives:m_current_pipeline->GetPrimitive() vertexStart:0 vertexCount:3];
   1536     s_stats.num_draws++;
   1537 
   1538     [m_render_encoder setVertexBuffer:m_uniform_buffer.GetBuffer() offset:m_current_uniform_buffer_position atIndex:0];
   1539     if (m_current_pipeline)
   1540       [m_render_encoder setRenderPipelineState:m_current_pipeline->GetPipelineState()];
   1541     if (m_current_cull_mode != MTLCullModeNone)
   1542       [m_render_encoder setCullMode:m_current_cull_mode];
   1543     if (depth != m_current_depth_state)
   1544       [m_render_encoder setDepthStencilState:m_current_depth_state];
   1545     if (set_vp)
   1546       SetViewportInRenderEncoder();
   1547     if (set_scissor)
   1548       SetScissorInRenderEncoder();
   1549   }
   1550 }
   1551 
   1552 void MetalDevice::InvalidateRenderTarget(GPUTexture* t)
   1553 {
   1554   GPUDevice::InvalidateRenderTarget(t);
   1555   if (InRenderPass() && (t->IsRenderTarget() ? IsRenderTargetBound(t) : (m_current_depth_target == t)))
   1556     EndRenderPass();
   1557 }
   1558 
   1559 void MetalDevice::CommitClear(MetalTexture* tex)
   1560 {
   1561   if (tex->GetState() == GPUTexture::State::Cleared)
   1562   {
   1563     DebugAssert(tex->IsRenderTargetOrDepthStencil());
   1564     tex->SetState(GPUTexture::State::Dirty);
   1565 
   1566     // TODO: We could combine it with the current render pass.
   1567     if (InRenderPass())
   1568       EndRenderPass();
   1569 
   1570     @autoreleasepool
   1571     {
   1572       // Allocating here seems a bit sad.
   1573       MTLRenderPassDescriptor* desc = [MTLRenderPassDescriptor renderPassDescriptor];
   1574       desc.renderTargetWidth = tex->GetWidth();
   1575       desc.renderTargetHeight = tex->GetHeight();
   1576       if (tex->IsRenderTarget())
   1577       {
   1578         const auto cc = tex->GetUNormClearColor();
   1579         desc.colorAttachments[0].texture = tex->GetMTLTexture();
   1580         desc.colorAttachments[0].loadAction = MTLLoadActionClear;
   1581         desc.colorAttachments[0].storeAction = MTLStoreActionStore;
   1582         desc.colorAttachments[0].clearColor = MTLClearColorMake(cc[0], cc[1], cc[2], cc[3]);
   1583       }
   1584       else
   1585       {
   1586         desc.depthAttachment.texture = tex->GetMTLTexture();
   1587         desc.depthAttachment.loadAction = MTLLoadActionClear;
   1588         desc.depthAttachment.storeAction = MTLStoreActionStore;
   1589         desc.depthAttachment.clearDepth = tex->GetClearDepth();
   1590       }
   1591 
   1592       id<MTLRenderCommandEncoder> encoder = [m_render_cmdbuf renderCommandEncoderWithDescriptor:desc];
   1593       [encoder endEncoding];
   1594     }
   1595   }
   1596 }
   1597 
   1598 MetalDevice::ClearPipelineConfig MetalDevice::GetCurrentClearPipelineConfig() const
   1599 {
   1600   ClearPipelineConfig config = {};
   1601   for (u32 i = 0; i < m_num_current_render_targets; i++)
   1602     config.color_formats[i] = m_current_render_targets[i]->GetFormat();
   1603 
   1604   config.depth_format = m_current_depth_target ? m_current_depth_target->GetFormat() : GPUTexture::Format::Unknown;
   1605   config.samples =
   1606     m_current_depth_target ? m_current_depth_target->GetSamples() : m_current_render_targets[0]->GetSamples();
   1607   return config;
   1608 }
   1609 
   1610 id<MTLRenderPipelineState> MetalDevice::GetClearDepthPipeline(const ClearPipelineConfig& config)
   1611 {
   1612   const auto iter = std::find_if(m_clear_pipelines.begin(), m_clear_pipelines.end(),
   1613                                  [&config](const auto& it) { return (it.first == config); });
   1614   if (iter != m_clear_pipelines.end())
   1615     return iter->second;
   1616 
   1617   MTLRenderPipelineDescriptor* desc = [[MTLRenderPipelineDescriptor new] autorelease];
   1618   desc.vertexFunction = [GetFunctionFromLibrary(m_shaders, @"depthClearVertex") autorelease];
   1619   desc.fragmentFunction = [GetFunctionFromLibrary(m_shaders, @"depthClearFragment") autorelease];
   1620 
   1621   for (u32 i = 0; i < MAX_RENDER_TARGETS; i++)
   1622   {
   1623     if (config.color_formats[i] == GPUTexture::Format::Unknown)
   1624       break;
   1625     desc.colorAttachments[i].pixelFormat = s_pixel_format_mapping[static_cast<u8>(config.color_formats[i])];
   1626     desc.colorAttachments[i].writeMask = MTLColorWriteMaskNone;
   1627   }
   1628   desc.depthAttachmentPixelFormat = s_pixel_format_mapping[static_cast<u8>(config.depth_format)];
   1629   desc.rasterizationEnabled = TRUE;
   1630   desc.inputPrimitiveTopology = MTLPrimitiveTopologyClassTriangle;
   1631   desc.rasterSampleCount = config.samples;
   1632   desc.vertexBuffers[0].mutability = MTLMutabilityImmutable;
   1633 
   1634   NSError* error = nullptr;
   1635   id<MTLRenderPipelineState> pipeline = [m_device newRenderPipelineStateWithDescriptor:desc error:&error];
   1636   if (pipeline == nil)
   1637     LogNSError(error, "Failed to create clear render pipeline state");
   1638 
   1639   m_clear_pipelines.emplace_back(config, pipeline);
   1640   return pipeline;
   1641 }
   1642 
   1643 MetalTextureBuffer::MetalTextureBuffer(Format format, u32 size_in_elements) : GPUTextureBuffer(format, size_in_elements)
   1644 {
   1645 }
   1646 
   1647 MetalTextureBuffer::~MetalTextureBuffer()
   1648 {
   1649   if (m_buffer.IsValid())
   1650     MetalDevice::GetInstance().UnbindTextureBuffer(this);
   1651   m_buffer.Destroy();
   1652 }
   1653 
   1654 bool MetalTextureBuffer::CreateBuffer(id<MTLDevice> device)
   1655 {
   1656   return m_buffer.Create(device, GetSizeInBytes());
   1657 }
   1658 
   1659 void* MetalTextureBuffer::Map(u32 required_elements)
   1660 {
   1661   const u32 esize = GetElementSize(m_format);
   1662   const u32 req_size = esize * required_elements;
   1663   if (!m_buffer.ReserveMemory(req_size, esize))
   1664   {
   1665     MetalDevice::GetInstance().SubmitCommandBufferAndRestartRenderPass("out of space in texture buffer");
   1666     if (!m_buffer.ReserveMemory(req_size, esize))
   1667       Panic("Failed to allocate texture buffer space.");
   1668   }
   1669 
   1670   m_current_position = m_buffer.GetCurrentOffset() / esize;
   1671   return m_buffer.GetCurrentHostPointer();
   1672 }
   1673 
   1674 void MetalTextureBuffer::Unmap(u32 used_elements)
   1675 {
   1676   const u32 size = GetElementSize(m_format) * used_elements;
   1677   GPUDevice::GetStatistics().buffer_streamed += size;
   1678   GPUDevice::GetStatistics().num_uploads++;
   1679   m_buffer.CommitMemory(size);
   1680 }
   1681 
   1682 void MetalTextureBuffer::SetDebugName(std::string_view name)
   1683 {
   1684   @autoreleasepool
   1685   {
   1686     [m_buffer.GetBuffer() setLabel:StringViewToNSString(name)];
   1687   }
   1688 }
   1689 
   1690 std::unique_ptr<GPUTextureBuffer> MetalDevice::CreateTextureBuffer(GPUTextureBuffer::Format format,
   1691                                                                    u32 size_in_elements)
   1692 {
   1693   std::unique_ptr<MetalTextureBuffer> tb = std::make_unique<MetalTextureBuffer>(format, size_in_elements);
   1694   if (!tb->CreateBuffer(m_device))
   1695     tb.reset();
   1696 
   1697   return tb;
   1698 }
   1699 
   1700 void MetalDevice::PushDebugGroup(const char* name)
   1701 {
   1702 }
   1703 
   1704 void MetalDevice::PopDebugGroup()
   1705 {
   1706 }
   1707 
   1708 void MetalDevice::InsertDebugMessage(const char* msg)
   1709 {
   1710 }
   1711 
   1712 void MetalDevice::MapVertexBuffer(u32 vertex_size, u32 vertex_count, void** map_ptr, u32* map_space,
   1713                                   u32* map_base_vertex)
   1714 {
   1715   const u32 req_size = vertex_size * vertex_count;
   1716   if (!m_vertex_buffer.ReserveMemory(req_size, vertex_size))
   1717   {
   1718     SubmitCommandBufferAndRestartRenderPass("out of vertex space");
   1719     if (!m_vertex_buffer.ReserveMemory(req_size, vertex_size))
   1720       Panic("Failed to allocate vertex space");
   1721   }
   1722 
   1723   *map_ptr = m_vertex_buffer.GetCurrentHostPointer();
   1724   *map_space = m_vertex_buffer.GetCurrentSpace() / vertex_size;
   1725   *map_base_vertex = m_vertex_buffer.GetCurrentOffset() / vertex_size;
   1726 }
   1727 
   1728 void MetalDevice::UnmapVertexBuffer(u32 vertex_size, u32 vertex_count)
   1729 {
   1730   const u32 size = vertex_size * vertex_count;
   1731   s_stats.buffer_streamed += size;
   1732   m_vertex_buffer.CommitMemory(size);
   1733 }
   1734 
   1735 void MetalDevice::MapIndexBuffer(u32 index_count, DrawIndex** map_ptr, u32* map_space, u32* map_base_index)
   1736 {
   1737   const u32 req_size = sizeof(DrawIndex) * index_count;
   1738   if (!m_index_buffer.ReserveMemory(req_size, sizeof(DrawIndex)))
   1739   {
   1740     SubmitCommandBufferAndRestartRenderPass("out of index space");
   1741     if (!m_index_buffer.ReserveMemory(req_size, sizeof(DrawIndex)))
   1742       Panic("Failed to allocate index space");
   1743   }
   1744 
   1745   *map_ptr = reinterpret_cast<DrawIndex*>(m_index_buffer.GetCurrentHostPointer());
   1746   *map_space = m_index_buffer.GetCurrentSpace() / sizeof(DrawIndex);
   1747   *map_base_index = m_index_buffer.GetCurrentOffset() / sizeof(DrawIndex);
   1748 }
   1749 
   1750 void MetalDevice::UnmapIndexBuffer(u32 used_index_count)
   1751 {
   1752   const u32 size = sizeof(DrawIndex) * used_index_count;
   1753   s_stats.buffer_streamed += size;
   1754   m_index_buffer.CommitMemory(size);
   1755 }
   1756 
   1757 void MetalDevice::PushUniformBuffer(const void* data, u32 data_size)
   1758 {
   1759   s_stats.buffer_streamed += data_size;
   1760   void* map = MapUniformBuffer(data_size);
   1761   std::memcpy(map, data, data_size);
   1762   UnmapUniformBuffer(data_size);
   1763 }
   1764 
   1765 void* MetalDevice::MapUniformBuffer(u32 size)
   1766 {
   1767   const u32 used_space = Common::AlignUpPow2(size, UNIFORM_BUFFER_ALIGNMENT);
   1768   if (!m_uniform_buffer.ReserveMemory(used_space, UNIFORM_BUFFER_ALIGNMENT))
   1769   {
   1770     SubmitCommandBufferAndRestartRenderPass("out of uniform space");
   1771     if (!m_uniform_buffer.ReserveMemory(used_space, UNIFORM_BUFFER_ALIGNMENT))
   1772       Panic("Failed to allocate uniform space.");
   1773   }
   1774 
   1775   return m_uniform_buffer.GetCurrentHostPointer();
   1776 }
   1777 
   1778 void MetalDevice::UnmapUniformBuffer(u32 size)
   1779 {
   1780   s_stats.buffer_streamed += size;
   1781   m_current_uniform_buffer_position = m_uniform_buffer.GetCurrentOffset();
   1782   m_uniform_buffer.CommitMemory(size);
   1783   if (InRenderPass())
   1784   {
   1785     [m_render_encoder setVertexBufferOffset:m_current_uniform_buffer_position atIndex:0];
   1786     [m_render_encoder setFragmentBufferOffset:m_current_uniform_buffer_position atIndex:0];
   1787   }
   1788 }
   1789 
   1790 void MetalDevice::SetRenderTargets(GPUTexture* const* rts, u32 num_rts, GPUTexture* ds,
   1791                                    GPUPipeline::RenderPassFlag feedback_loop)
   1792 {
   1793   bool changed = (m_num_current_render_targets != num_rts || m_current_depth_target != ds ||
   1794                   (!m_features.framebuffer_fetch && ((feedback_loop & GPUPipeline::ColorFeedbackLoop) !=
   1795                                                      (m_current_feedback_loop & GPUPipeline::ColorFeedbackLoop))));
   1796   bool needs_ds_clear = (ds && ds->IsClearedOrInvalidated());
   1797   bool needs_rt_clear = false;
   1798 
   1799   m_current_depth_target = static_cast<MetalTexture*>(ds);
   1800   for (u32 i = 0; i < num_rts; i++)
   1801   {
   1802     MetalTexture* const RT = static_cast<MetalTexture*>(rts[i]);
   1803     changed |= m_current_render_targets[i] != RT;
   1804     m_current_render_targets[i] = RT;
   1805     needs_rt_clear |= RT->IsClearedOrInvalidated();
   1806   }
   1807   for (u32 i = num_rts; i < m_num_current_render_targets; i++)
   1808     m_current_render_targets[i] = nullptr;
   1809   m_num_current_render_targets = static_cast<u8>(num_rts);
   1810   m_current_feedback_loop = feedback_loop;
   1811 
   1812   if (changed || needs_rt_clear || needs_ds_clear)
   1813   {
   1814     if (InRenderPass())
   1815       EndRenderPass();
   1816   }
   1817 }
   1818 
   1819 void MetalDevice::SetPipeline(GPUPipeline* pipeline)
   1820 {
   1821   DebugAssert(pipeline);
   1822   if (m_current_pipeline == pipeline)
   1823     return;
   1824 
   1825   m_current_pipeline = static_cast<MetalPipeline*>(pipeline);
   1826   if (InRenderPass())
   1827   {
   1828     [m_render_encoder setRenderPipelineState:m_current_pipeline->GetPipelineState()];
   1829 
   1830     if (m_current_depth_state != m_current_pipeline->GetDepthState())
   1831     {
   1832       m_current_depth_state = m_current_pipeline->GetDepthState();
   1833       [m_render_encoder setDepthStencilState:m_current_depth_state];
   1834     }
   1835     if (m_current_cull_mode != m_current_pipeline->GetCullMode())
   1836     {
   1837       m_current_cull_mode = m_current_pipeline->GetCullMode();
   1838       [m_render_encoder setCullMode:m_current_cull_mode];
   1839     }
   1840   }
   1841   else
   1842   {
   1843     // Still need to set depth state before the draw begins.
   1844     m_current_depth_state = m_current_pipeline->GetDepthState();
   1845     m_current_cull_mode = m_current_pipeline->GetCullMode();
   1846   }
   1847 }
   1848 
   1849 void MetalDevice::UnbindPipeline(MetalPipeline* pl)
   1850 {
   1851   if (m_current_pipeline != pl)
   1852     return;
   1853 
   1854   m_current_pipeline = nullptr;
   1855   m_current_depth_state = nil;
   1856 }
   1857 
   1858 void MetalDevice::SetTextureSampler(u32 slot, GPUTexture* texture, GPUSampler* sampler)
   1859 {
   1860   DebugAssert(slot < MAX_TEXTURE_SAMPLERS);
   1861 
   1862   id<MTLTexture> T = texture ? static_cast<MetalTexture*>(texture)->GetMTLTexture() : nil;
   1863   if (texture)
   1864   {
   1865     CommitClear(static_cast<MetalTexture*>(texture));
   1866     static_cast<MetalTexture*>(texture)->SetUseFenceCounter(m_current_fence_counter);
   1867   }
   1868 
   1869   if (m_current_textures[slot] != T)
   1870   {
   1871     m_current_textures[slot] = T;
   1872     if (InRenderPass())
   1873       [m_render_encoder setFragmentTexture:T atIndex:slot];
   1874   }
   1875 
   1876   id<MTLSamplerState> S = sampler ? static_cast<MetalSampler*>(sampler)->GetSamplerState() : nil;
   1877   if (m_current_samplers[slot] != S)
   1878   {
   1879     m_current_samplers[slot] = S;
   1880     if (InRenderPass())
   1881       [m_render_encoder setFragmentSamplerState:S atIndex:slot];
   1882   }
   1883 }
   1884 
   1885 void MetalDevice::SetTextureBuffer(u32 slot, GPUTextureBuffer* buffer)
   1886 {
   1887   id<MTLBuffer> B = buffer ? static_cast<MetalTextureBuffer*>(buffer)->GetMTLBuffer() : nil;
   1888   if (m_current_ssbo == B)
   1889     return;
   1890 
   1891   m_current_ssbo = B;
   1892   if (InRenderPass())
   1893     [m_render_encoder setFragmentBuffer:B offset:0 atIndex:1];
   1894 }
   1895 
   1896 void MetalDevice::UnbindTexture(MetalTexture* tex)
   1897 {
   1898   const id<MTLTexture> T = tex->GetMTLTexture();
   1899   for (u32 i = 0; i < MAX_TEXTURE_SAMPLERS; i++)
   1900   {
   1901     if (m_current_textures[i] == T)
   1902     {
   1903       m_current_textures[i] = nil;
   1904       if (InRenderPass())
   1905         [m_render_encoder setFragmentTexture:nil atIndex:i];
   1906     }
   1907   }
   1908 
   1909   if (tex->IsRenderTarget())
   1910   {
   1911     for (u32 i = 0; i < m_num_current_render_targets; i++)
   1912     {
   1913       if (m_current_render_targets[i] == tex)
   1914       {
   1915         WARNING_LOG("Unbinding current RT");
   1916         SetRenderTargets(nullptr, 0, m_current_depth_target, GPUPipeline::NoRenderPassFlags); // TODO: Wrong
   1917         break;
   1918       }
   1919     }
   1920   }
   1921   else if (tex->IsDepthStencil())
   1922   {
   1923     if (m_current_depth_target == tex)
   1924     {
   1925       WARNING_LOG("Unbinding current DS");
   1926       SetRenderTargets(nullptr, 0, nullptr, GPUPipeline::NoRenderPassFlags);
   1927     }
   1928   }
   1929 }
   1930 
   1931 void MetalDevice::UnbindTextureBuffer(MetalTextureBuffer* buf)
   1932 {
   1933   if (m_current_ssbo != buf->GetMTLBuffer())
   1934     return;
   1935 
   1936   m_current_ssbo = nil;
   1937   if (InRenderPass())
   1938     [m_render_encoder setFragmentBuffer:nil offset:0 atIndex:1];
   1939 }
   1940 
   1941 void MetalDevice::SetViewport(const GSVector4i rc)
   1942 {
   1943   if (m_current_viewport.eq(rc))
   1944     return;
   1945 
   1946   m_current_viewport = rc;
   1947 
   1948   if (InRenderPass())
   1949     SetViewportInRenderEncoder();
   1950 }
   1951 
   1952 void MetalDevice::SetScissor(const GSVector4i rc)
   1953 {
   1954   if (m_current_scissor.eq(rc))
   1955     return;
   1956 
   1957   m_current_scissor = rc;
   1958 
   1959   if (InRenderPass())
   1960     SetScissorInRenderEncoder();
   1961 }
   1962 
   1963 void MetalDevice::BeginRenderPass()
   1964 {
   1965   DebugAssert(m_render_encoder == nil);
   1966 
   1967   // Inline writes :(
   1968   if (m_inline_upload_encoder != nil)
   1969   {
   1970     [m_inline_upload_encoder endEncoding];
   1971     [m_inline_upload_encoder release];
   1972     m_inline_upload_encoder = nil;
   1973   }
   1974 
   1975   s_stats.num_render_passes++;
   1976 
   1977   @autoreleasepool
   1978   {
   1979     MTLRenderPassDescriptor* desc = [MTLRenderPassDescriptor renderPassDescriptor];
   1980     if (m_num_current_render_targets == 0 && !m_current_depth_target)
   1981     {
   1982       // Rendering to view, but we got interrupted...
   1983       desc.colorAttachments[0].texture = [m_layer_drawable texture];
   1984       desc.colorAttachments[0].loadAction = MTLLoadActionLoad;
   1985     }
   1986     else
   1987     {
   1988       for (u32 i = 0; i < m_num_current_render_targets; i++)
   1989       {
   1990         MetalTexture* const RT = m_current_render_targets[i];
   1991         desc.colorAttachments[i].texture = RT->GetMTLTexture();
   1992         desc.colorAttachments[i].storeAction = MTLStoreActionStore;
   1993         RT->SetUseFenceCounter(m_current_fence_counter);
   1994 
   1995         switch (RT->GetState())
   1996         {
   1997           case GPUTexture::State::Cleared:
   1998           {
   1999             const auto clear_color = RT->GetUNormClearColor();
   2000             desc.colorAttachments[i].loadAction = MTLLoadActionClear;
   2001             desc.colorAttachments[i].clearColor =
   2002               MTLClearColorMake(clear_color[0], clear_color[1], clear_color[2], clear_color[3]);
   2003             RT->SetState(GPUTexture::State::Dirty);
   2004           }
   2005           break;
   2006 
   2007           case GPUTexture::State::Invalidated:
   2008           {
   2009             desc.colorAttachments[i].loadAction = MTLLoadActionDontCare;
   2010             RT->SetState(GPUTexture::State::Dirty);
   2011           }
   2012           break;
   2013 
   2014           case GPUTexture::State::Dirty:
   2015           {
   2016             desc.colorAttachments[i].loadAction = MTLLoadActionLoad;
   2017           }
   2018           break;
   2019 
   2020           default:
   2021             UnreachableCode();
   2022             break;
   2023         }
   2024       }
   2025 
   2026       if (MetalTexture* DS = m_current_depth_target)
   2027       {
   2028         desc.depthAttachment.texture = m_current_depth_target->GetMTLTexture();
   2029         desc.depthAttachment.storeAction = MTLStoreActionStore;
   2030         DS->SetUseFenceCounter(m_current_fence_counter);
   2031 
   2032         switch (DS->GetState())
   2033         {
   2034           case GPUTexture::State::Cleared:
   2035           {
   2036             desc.depthAttachment.loadAction = MTLLoadActionClear;
   2037             desc.depthAttachment.clearDepth = DS->GetClearDepth();
   2038             DS->SetState(GPUTexture::State::Dirty);
   2039           }
   2040           break;
   2041 
   2042           case GPUTexture::State::Invalidated:
   2043           {
   2044             desc.depthAttachment.loadAction = MTLLoadActionDontCare;
   2045             DS->SetState(GPUTexture::State::Dirty);
   2046           }
   2047           break;
   2048 
   2049           case GPUTexture::State::Dirty:
   2050           {
   2051             desc.depthAttachment.loadAction = MTLLoadActionLoad;
   2052           }
   2053           break;
   2054 
   2055           default:
   2056             UnreachableCode();
   2057             break;
   2058         }
   2059       }
   2060     }
   2061 
   2062     m_render_encoder = [[m_render_cmdbuf renderCommandEncoderWithDescriptor:desc] retain];
   2063     SetInitialEncoderState();
   2064   }
   2065 }
   2066 
   2067 void MetalDevice::EndRenderPass()
   2068 {
   2069   DebugAssert(InRenderPass() && !IsInlineUploading());
   2070   [m_render_encoder endEncoding];
   2071   [m_render_encoder release];
   2072   m_render_encoder = nil;
   2073 }
   2074 
   2075 void MetalDevice::EndInlineUploading()
   2076 {
   2077   DebugAssert(IsInlineUploading() && !InRenderPass());
   2078   [m_inline_upload_encoder endEncoding];
   2079   [m_inline_upload_encoder release];
   2080   m_inline_upload_encoder = nil;
   2081 }
   2082 
   2083 void MetalDevice::EndAnyEncoding()
   2084 {
   2085   if (InRenderPass())
   2086     EndRenderPass();
   2087   else if (IsInlineUploading())
   2088     EndInlineUploading();
   2089 }
   2090 
   2091 void MetalDevice::SetInitialEncoderState()
   2092 {
   2093   // Set initial state.
   2094   // TODO: avoid uniform set here? it's probably going to get changed...
   2095   // Might be better off just deferring all the init until the first draw...
   2096   [m_render_encoder setVertexBuffer:m_uniform_buffer.GetBuffer() offset:m_current_uniform_buffer_position atIndex:0];
   2097   [m_render_encoder setFragmentBuffer:m_uniform_buffer.GetBuffer() offset:m_current_uniform_buffer_position atIndex:0];
   2098   [m_render_encoder setVertexBuffer:m_vertex_buffer.GetBuffer() offset:0 atIndex:1];
   2099   [m_render_encoder setCullMode:m_current_cull_mode];
   2100   if (m_current_depth_state != nil)
   2101     [m_render_encoder setDepthStencilState:m_current_depth_state];
   2102   if (m_current_pipeline != nil)
   2103     [m_render_encoder setRenderPipelineState:m_current_pipeline->GetPipelineState()];
   2104   [m_render_encoder setFragmentTextures:m_current_textures.data() withRange:NSMakeRange(0, MAX_TEXTURE_SAMPLERS)];
   2105   [m_render_encoder setFragmentSamplerStates:m_current_samplers.data() withRange:NSMakeRange(0, MAX_TEXTURE_SAMPLERS)];
   2106   if (m_current_ssbo)
   2107     [m_render_encoder setFragmentBuffer:m_current_ssbo offset:0 atIndex:1];
   2108 
   2109   if (!m_features.framebuffer_fetch && (m_current_feedback_loop & GPUPipeline::ColorFeedbackLoop))
   2110   {
   2111     DebugAssert(m_current_render_targets[0]);
   2112     [m_render_encoder setFragmentTexture:m_current_render_targets[0]->GetMTLTexture() atIndex:MAX_TEXTURE_SAMPLERS];
   2113   }
   2114 
   2115   SetViewportInRenderEncoder();
   2116   SetScissorInRenderEncoder();
   2117 }
   2118 
   2119 void MetalDevice::SetViewportInRenderEncoder()
   2120 {
   2121   const GSVector4i rc = ClampToFramebufferSize(m_current_viewport);
   2122   [m_render_encoder
   2123     setViewport:(MTLViewport){static_cast<double>(rc.left), static_cast<double>(rc.top),
   2124                               static_cast<double>(rc.width()), static_cast<double>(rc.height()), 0.0, 1.0}];
   2125 }
   2126 
   2127 void MetalDevice::SetScissorInRenderEncoder()
   2128 {
   2129   const GSVector4i rc = ClampToFramebufferSize(m_current_scissor);
   2130   [m_render_encoder
   2131     setScissorRect:(MTLScissorRect){static_cast<NSUInteger>(rc.left), static_cast<NSUInteger>(rc.top),
   2132                                     static_cast<NSUInteger>(rc.width()), static_cast<NSUInteger>(rc.height())}];
   2133 }
   2134 
   2135 GSVector4i MetalDevice::ClampToFramebufferSize(const GSVector4i rc) const
   2136 {
   2137   const MetalTexture* rt_or_ds =
   2138     (m_num_current_render_targets > 0) ? m_current_render_targets[0] : m_current_depth_target;
   2139   const s32 clamp_width = rt_or_ds ? rt_or_ds->GetWidth() : m_window_info.surface_width;
   2140   const s32 clamp_height = rt_or_ds ? rt_or_ds->GetHeight() : m_window_info.surface_height;
   2141   return rc.rintersect(GSVector4i(0, 0, clamp_width, clamp_height));
   2142 }
   2143 
   2144 void MetalDevice::PreDrawCheck()
   2145 {
   2146   if (!InRenderPass())
   2147     BeginRenderPass();
   2148 }
   2149 
   2150 void MetalDevice::Draw(u32 vertex_count, u32 base_vertex)
   2151 {
   2152   PreDrawCheck();
   2153   s_stats.num_draws++;
   2154   [m_render_encoder drawPrimitives:m_current_pipeline->GetPrimitive() vertexStart:base_vertex vertexCount:vertex_count];
   2155 }
   2156 
   2157 void MetalDevice::DrawIndexed(u32 index_count, u32 base_index, u32 base_vertex)
   2158 {
   2159   PreDrawCheck();
   2160 
   2161   s_stats.num_draws++;
   2162 
   2163   const u32 index_offset = base_index * sizeof(u16);
   2164   [m_render_encoder drawIndexedPrimitives:m_current_pipeline->GetPrimitive()
   2165                                indexCount:index_count
   2166                                 indexType:MTLIndexTypeUInt16
   2167                               indexBuffer:m_index_buffer.GetBuffer()
   2168                         indexBufferOffset:index_offset
   2169                             instanceCount:1
   2170                                baseVertex:base_vertex
   2171                              baseInstance:0];
   2172 }
   2173 
   2174 void MetalDevice::DrawIndexedWithBarrier(u32 index_count, u32 base_index, u32 base_vertex, DrawBarrier type)
   2175 {
   2176   // Shouldn't be using this with framebuffer fetch.
   2177   DebugAssert(!m_features.framebuffer_fetch);
   2178 
   2179   const bool skip_first_barrier = !InRenderPass();
   2180   PreDrawCheck();
   2181 
   2182   // TODO: The first barrier is unnecessary if we're starting the render pass.
   2183 
   2184   u32 index_offset = base_index * sizeof(u16);
   2185 
   2186   switch (type)
   2187   {
   2188     case GPUDevice::DrawBarrier::None:
   2189     {
   2190       s_stats.num_draws++;
   2191 
   2192       [m_render_encoder drawIndexedPrimitives:m_current_pipeline->GetPrimitive()
   2193                                    indexCount:index_count
   2194                                     indexType:MTLIndexTypeUInt16
   2195                                   indexBuffer:m_index_buffer.GetBuffer()
   2196                             indexBufferOffset:index_offset
   2197                                 instanceCount:1
   2198                                    baseVertex:base_vertex
   2199                                  baseInstance:0];
   2200     }
   2201     break;
   2202 
   2203     case GPUDevice::DrawBarrier::One:
   2204     {
   2205       DebugAssert(m_num_current_render_targets == 1);
   2206       s_stats.num_draws++;
   2207 
   2208       if (!skip_first_barrier)
   2209       {
   2210         s_stats.num_barriers++;
   2211         [m_render_encoder memoryBarrierWithScope:MTLBarrierScopeRenderTargets
   2212                                      afterStages:MTLRenderStageFragment
   2213                                     beforeStages:MTLRenderStageFragment];
   2214       }
   2215 
   2216       [m_render_encoder drawIndexedPrimitives:m_current_pipeline->GetPrimitive()
   2217                                    indexCount:index_count
   2218                                     indexType:MTLIndexTypeUInt16
   2219                                   indexBuffer:m_index_buffer.GetBuffer()
   2220                             indexBufferOffset:index_offset
   2221                                 instanceCount:1
   2222                                    baseVertex:base_vertex
   2223                                  baseInstance:0];
   2224     }
   2225     break;
   2226 
   2227     case GPUDevice::DrawBarrier::Full:
   2228     {
   2229       DebugAssert(m_num_current_render_targets == 1);
   2230 
   2231       static constexpr const u8 vertices_per_primitive[][2] = {
   2232         {1, 1}, // MTLPrimitiveTypePoint
   2233         {2, 2}, // MTLPrimitiveTypeLine
   2234         {2, 1}, // MTLPrimitiveTypeLineStrip
   2235         {3, 3}, // MTLPrimitiveTypeTriangle
   2236         {3, 1}, // MTLPrimitiveTypeTriangleStrip
   2237       };
   2238 
   2239       const u32 first_step =
   2240         vertices_per_primitive[static_cast<size_t>(m_current_pipeline->GetPrimitive())][0] * sizeof(u16);
   2241       const u32 index_step =
   2242         vertices_per_primitive[static_cast<size_t>(m_current_pipeline->GetPrimitive())][1] * sizeof(u16);
   2243       const u32 end_offset = (base_index + index_count) * sizeof(u16);
   2244 
   2245       // first primitive
   2246       if (!skip_first_barrier)
   2247       {
   2248         s_stats.num_barriers++;
   2249         [m_render_encoder memoryBarrierWithScope:MTLBarrierScopeRenderTargets
   2250                                      afterStages:MTLRenderStageFragment
   2251                                     beforeStages:MTLRenderStageFragment];
   2252       }
   2253       s_stats.num_draws++;
   2254       [m_render_encoder drawIndexedPrimitives:m_current_pipeline->GetPrimitive()
   2255                                    indexCount:index_count
   2256                                     indexType:MTLIndexTypeUInt16
   2257                                   indexBuffer:m_index_buffer.GetBuffer()
   2258                             indexBufferOffset:index_offset
   2259                                 instanceCount:1
   2260                                    baseVertex:base_vertex
   2261                                  baseInstance:0];
   2262 
   2263       index_offset += first_step;
   2264 
   2265       // remaining primitices
   2266       for (; index_offset < end_offset; index_offset += index_step)
   2267       {
   2268         s_stats.num_barriers++;
   2269         s_stats.num_draws++;
   2270 
   2271         [m_render_encoder memoryBarrierWithScope:MTLBarrierScopeRenderTargets
   2272                                      afterStages:MTLRenderStageFragment
   2273                                     beforeStages:MTLRenderStageFragment];
   2274         [m_render_encoder drawIndexedPrimitives:m_current_pipeline->GetPrimitive()
   2275                                      indexCount:index_count
   2276                                       indexType:MTLIndexTypeUInt16
   2277                                     indexBuffer:m_index_buffer.GetBuffer()
   2278                               indexBufferOffset:index_offset
   2279                                   instanceCount:1
   2280                                      baseVertex:base_vertex
   2281                                    baseInstance:0];
   2282       }
   2283     }
   2284     break;
   2285 
   2286       DefaultCaseIsUnreachable();
   2287   }
   2288 }
   2289 
   2290 id<MTLBlitCommandEncoder> MetalDevice::GetBlitEncoder(bool is_inline)
   2291 {
   2292   @autoreleasepool
   2293   {
   2294     if (!is_inline)
   2295     {
   2296       if (!m_upload_cmdbuf)
   2297       {
   2298         m_upload_cmdbuf = [[m_queue commandBufferWithUnretainedReferences] retain];
   2299         m_upload_encoder = [[m_upload_cmdbuf blitCommandEncoder] retain];
   2300         [m_upload_encoder setLabel:@"Upload Encoder"];
   2301       }
   2302       return m_upload_encoder;
   2303     }
   2304 
   2305     // Interleaved with draws.
   2306     if (m_inline_upload_encoder != nil)
   2307       return m_inline_upload_encoder;
   2308 
   2309     if (InRenderPass())
   2310       EndRenderPass();
   2311     m_inline_upload_encoder = [[m_render_cmdbuf blitCommandEncoder] retain];
   2312     return m_inline_upload_encoder;
   2313   }
   2314 }
   2315 
   2316 bool MetalDevice::BeginPresent(bool skip_present, u32 clear_color)
   2317 {
   2318   @autoreleasepool
   2319   {
   2320     if (skip_present)
   2321       return false;
   2322 
   2323     if (m_layer == nil)
   2324     {
   2325       TrimTexturePool();
   2326       return false;
   2327     }
   2328 
   2329     EndAnyEncoding();
   2330 
   2331     m_layer_drawable = [[m_layer nextDrawable] retain];
   2332     if (m_layer_drawable == nil)
   2333     {
   2334       TrimTexturePool();
   2335       return false;
   2336     }
   2337 
   2338     SetViewportAndScissor(0, 0, m_window_info.surface_width, m_window_info.surface_height);
   2339 
   2340     // Set up rendering to layer.
   2341     const GSVector4 clear_color_v = GSVector4::rgba32(clear_color);
   2342     id<MTLTexture> layer_texture = [m_layer_drawable texture];
   2343     m_layer_pass_desc.colorAttachments[0].texture = layer_texture;
   2344     m_layer_pass_desc.colorAttachments[0].loadAction = MTLLoadActionClear;
   2345     m_layer_pass_desc.colorAttachments[0].clearColor = MTLClearColorMake(clear_color_v.r, clear_color_v.g, clear_color_v.g, clear_color_v.a);
   2346     m_render_encoder = [[m_render_cmdbuf renderCommandEncoderWithDescriptor:m_layer_pass_desc] retain];
   2347     s_stats.num_render_passes++;
   2348     std::memset(m_current_render_targets.data(), 0, sizeof(m_current_render_targets));
   2349     m_num_current_render_targets = 0;
   2350     m_current_feedback_loop = GPUPipeline::NoRenderPassFlags;
   2351     m_current_depth_target = nullptr;
   2352     m_current_pipeline = nullptr;
   2353     m_current_depth_state = nil;
   2354     SetInitialEncoderState();
   2355     return true;
   2356   }
   2357 }
   2358 
   2359 void MetalDevice::EndPresent(bool explicit_present)
   2360 {
   2361   DebugAssert(!explicit_present);
   2362 
   2363   // TODO: Explicit present
   2364   DebugAssert(m_num_current_render_targets == 0 && !m_current_depth_target);
   2365   EndAnyEncoding();
   2366 
   2367   [m_render_cmdbuf presentDrawable:m_layer_drawable];
   2368   DeferRelease(m_layer_drawable);
   2369   m_layer_drawable = nil;
   2370   SubmitCommandBuffer();
   2371   TrimTexturePool();
   2372 }
   2373 
   2374 void MetalDevice::SubmitPresent()
   2375 {
   2376   Panic("Not supported by this API.");
   2377 }
   2378 
   2379 void MetalDevice::CreateCommandBuffer()
   2380 {
   2381   @autoreleasepool
   2382   {
   2383     DebugAssert(m_render_cmdbuf == nil);
   2384     const u64 fence_counter = ++m_current_fence_counter;
   2385     m_render_cmdbuf = [[m_queue commandBufferWithUnretainedReferences] retain];
   2386     [m_render_cmdbuf addCompletedHandler:[this, fence_counter](id<MTLCommandBuffer> buffer) {
   2387       CommandBufferCompletedOffThread(buffer, fence_counter);
   2388     }];
   2389   }
   2390 
   2391   CleanupObjects();
   2392 }
   2393 
   2394 void MetalDevice::CommandBufferCompletedOffThread(id<MTLCommandBuffer> buffer, u64 fence_counter)
   2395 {
   2396   std::unique_lock lock(m_fence_mutex);
   2397   m_completed_fence_counter.store(std::max(m_completed_fence_counter.load(std::memory_order_acquire), fence_counter),
   2398                                   std::memory_order_release);
   2399 
   2400   if (m_gpu_timing_enabled)
   2401   {
   2402     const double begin = std::max(m_last_gpu_time_end, [buffer GPUStartTime]);
   2403     const double end = [buffer GPUEndTime];
   2404     if (end > begin)
   2405     {
   2406       m_accumulated_gpu_time += end - begin;
   2407       m_last_gpu_time_end = end;
   2408     }
   2409   }
   2410 }
   2411 
   2412 void MetalDevice::SubmitCommandBuffer(bool wait_for_completion)
   2413 {
   2414   if (m_upload_cmdbuf != nil)
   2415   {
   2416     [m_upload_encoder endEncoding];
   2417     [m_upload_encoder release];
   2418     m_upload_encoder = nil;
   2419     [m_upload_cmdbuf commit];
   2420     [m_upload_cmdbuf release];
   2421     m_upload_cmdbuf = nil;
   2422   }
   2423 
   2424   if (m_render_cmdbuf != nil)
   2425   {
   2426     if (InRenderPass())
   2427       EndRenderPass();
   2428     else if (IsInlineUploading())
   2429       EndInlineUploading();
   2430 
   2431     [m_render_cmdbuf commit];
   2432 
   2433     if (wait_for_completion)
   2434       [m_render_cmdbuf waitUntilCompleted];
   2435 
   2436     [m_render_cmdbuf release];
   2437     m_render_cmdbuf = nil;
   2438   }
   2439 
   2440   CreateCommandBuffer();
   2441 }
   2442 
   2443 void MetalDevice::SubmitCommandBufferAndRestartRenderPass(const char* reason)
   2444 {
   2445   DEV_LOG("Submitting command buffer and restarting render pass due to {}", reason);
   2446 
   2447   const bool in_render_pass = InRenderPass();
   2448   SubmitCommandBuffer();
   2449   if (in_render_pass)
   2450     BeginRenderPass();
   2451 }
   2452 
   2453 void MetalDevice::WaitForFenceCounter(u64 counter)
   2454 {
   2455   if (m_completed_fence_counter.load(std::memory_order_relaxed) >= counter)
   2456     return;
   2457 
   2458   // TODO: There has to be a better way to do this..
   2459   std::unique_lock lock(m_fence_mutex);
   2460   while (m_completed_fence_counter.load(std::memory_order_acquire) < counter)
   2461   {
   2462     lock.unlock();
   2463     pthread_yield_np();
   2464     lock.lock();
   2465   }
   2466 
   2467   CleanupObjects();
   2468 }
   2469 
   2470 void MetalDevice::WaitForPreviousCommandBuffers()
   2471 {
   2472   // Early init?
   2473   if (m_current_fence_counter == 0)
   2474     return;
   2475 
   2476   WaitForFenceCounter(m_current_fence_counter - 1);
   2477 }
   2478 
   2479 void MetalDevice::ExecuteAndWaitForGPUIdle()
   2480 {
   2481   SubmitCommandBuffer(true);
   2482   CleanupObjects();
   2483 }
   2484 
   2485 void MetalDevice::CleanupObjects()
   2486 {
   2487   const u64 counter = m_completed_fence_counter.load(std::memory_order_acquire);
   2488   while (m_cleanup_objects.size() > 0 && m_cleanup_objects.front().first <= counter)
   2489   {
   2490     [m_cleanup_objects.front().second release];
   2491     m_cleanup_objects.pop_front();
   2492   }
   2493 }
   2494 
   2495 void MetalDevice::DeferRelease(id obj)
   2496 {
   2497   MetalDevice& dev = GetInstance();
   2498   dev.m_cleanup_objects.emplace_back(dev.m_current_fence_counter, obj);
   2499 }
   2500 
   2501 void MetalDevice::DeferRelease(u64 fence_counter, id obj)
   2502 {
   2503   MetalDevice& dev = GetInstance();
   2504   dev.m_cleanup_objects.emplace_back(fence_counter, obj);
   2505 }
   2506 
   2507 std::unique_ptr<GPUDevice> GPUDevice::WrapNewMetalDevice()
   2508 {
   2509   return std::unique_ptr<GPUDevice>(new MetalDevice());
   2510 }
   2511 
   2512 GPUDevice::AdapterInfoList GPUDevice::WrapGetMetalAdapterList()
   2513 {
   2514   AdapterInfoList ret;
   2515   @autoreleasepool
   2516   {
   2517     NSArray<id<MTLDevice>>* devices = [MTLCopyAllDevices() autorelease];
   2518     const u32 count = static_cast<u32>([devices count]);
   2519     ret.reserve(count);
   2520     for (u32 i = 0; i < count; i++)
   2521     {
   2522       AdapterInfo ai;
   2523       ai.name = [devices[i].name UTF8String];
   2524       ai.max_texture_size = GetMetalMaxTextureSize(devices[i]);
   2525       ai.max_multisamples = GetMetalMaxMultisamples(devices[i]);
   2526       ai.supports_sample_shading = true;
   2527       ret.push_back(std::move(ai));
   2528     }
   2529   }
   2530 
   2531   return ret;
   2532 }