metal_device.mm (85886B)
1 // SPDX-FileCopyrightText: 2024 Connor McLaughlin <stenzek@gmail.com> 2 // SPDX-License-Identifier: (GPL-3.0 OR CC-BY-NC-ND-4.0) 3 4 #include "metal_device.h" 5 6 #include "common/align.h" 7 #include "common/assert.h" 8 #include "common/error.h" 9 #include "common/file_system.h" 10 #include "common/log.h" 11 #include "common/path.h" 12 #include "common/scoped_guard.h" 13 #include "common/string_util.h" 14 15 // TODO FIXME... 16 #define FMT_EXCEPTIONS 0 17 #include "fmt/format.h" 18 19 #include <array> 20 #include <pthread.h> 21 22 Log_SetChannel(MetalDevice); 23 24 // TODO: Disable hazard tracking and issue barriers explicitly. 25 26 // Looking across a range of GPUs, the optimal copy alignment for Vulkan drivers seems 27 // to be between 1 (AMD/NV) and 64 (Intel). So, we'll go with 64 here. 28 static constexpr u32 TEXTURE_UPLOAD_ALIGNMENT = 64; 29 30 // The pitch alignment must be less or equal to the upload alignment. 31 // We need 32 here for AVX2, so 64 is also fine. 32 static constexpr u32 TEXTURE_UPLOAD_PITCH_ALIGNMENT = 64; 33 34 static constexpr std::array<MTLPixelFormat, static_cast<u32>(GPUTexture::Format::MaxCount)> s_pixel_format_mapping = { 35 MTLPixelFormatInvalid, // Unknown 36 MTLPixelFormatRGBA8Unorm, // RGBA8 37 MTLPixelFormatBGRA8Unorm, // BGRA8 38 MTLPixelFormatB5G6R5Unorm, // RGB565 39 MTLPixelFormatA1BGR5Unorm, // RGBA5551 40 MTLPixelFormatR8Unorm, // R8 41 MTLPixelFormatDepth16Unorm, // D16 42 MTLPixelFormatDepth24Unorm_Stencil8, // D24S8 43 MTLPixelFormatDepth32Float, // D32F 44 MTLPixelFormatDepth32Float_Stencil8, // D32FS8 45 MTLPixelFormatR16Unorm, // R16 46 MTLPixelFormatR16Sint, // R16I 47 MTLPixelFormatR16Uint, // R16U 48 MTLPixelFormatR16Float, // R16F 49 MTLPixelFormatR32Sint, // R32I 50 MTLPixelFormatR32Uint, // R32U 51 MTLPixelFormatR32Float, // R32F 52 MTLPixelFormatRG8Unorm, // RG8 53 MTLPixelFormatRG16Unorm, // RG16 54 MTLPixelFormatRG16Float, // RG16F 55 MTLPixelFormatRG32Float, // RG32F 56 MTLPixelFormatRGBA16Unorm, // RGBA16 57 MTLPixelFormatRGBA16Float, // RGBA16F 58 MTLPixelFormatRGBA32Float, // RGBA32F 59 MTLPixelFormatBGR10A2Unorm, // RGB10A2 60 }; 61 62 static NSString* StringViewToNSString(std::string_view str) 63 { 64 if (str.empty()) 65 return nil; 66 67 return [[[NSString alloc] autorelease] initWithBytes:str.data() 68 length:static_cast<NSUInteger>(str.length()) 69 encoding:NSUTF8StringEncoding]; 70 } 71 72 static void LogNSError(NSError* error, std::string_view message) 73 { 74 Log::FastWrite("MetalDevice", LOGLEVEL_ERROR, message); 75 Log::FastWrite("MetalDevice", LOGLEVEL_ERROR, " NSError Code: {}", static_cast<u32>(error.code)); 76 Log::FastWrite("MetalDevice", LOGLEVEL_ERROR, " NSError Description: {}", [error.description UTF8String]); 77 } 78 79 static void NSErrorToErrorObject(Error* errptr, std::string_view message, NSError* error) 80 { 81 Error::SetStringFmt(errptr, "{}NSError Code {}: {}", message, static_cast<u32>(error.code), [error.description UTF8String]); 82 } 83 84 static GPUTexture::Format GetTextureFormatForMTLFormat(MTLPixelFormat fmt) 85 { 86 for (u32 i = 0; i < static_cast<u32>(GPUTexture::Format::MaxCount); i++) 87 { 88 if (s_pixel_format_mapping[i] == fmt) 89 return static_cast<GPUTexture::Format>(i); 90 } 91 92 return GPUTexture::Format::Unknown; 93 } 94 95 static u32 GetMetalMaxTextureSize(id<MTLDevice> device) 96 { 97 // https://gist.github.com/kylehowells/63d0723abc9588eb734cade4b7df660d 98 if ([device supportsFamily:MTLGPUFamilyMacCatalyst1] || [device supportsFamily:MTLGPUFamilyMac1] || 99 [device supportsFamily:MTLGPUFamilyApple3]) 100 { 101 return 16384; 102 } 103 else 104 { 105 return 8192; 106 } 107 } 108 109 static u32 GetMetalMaxMultisamples(id<MTLDevice> device) 110 { 111 u32 max_multisamples = 0; 112 for (u32 multisamples = 1; multisamples < 16; multisamples *= 2) 113 { 114 if (![device supportsTextureSampleCount:multisamples]) 115 break; 116 max_multisamples = multisamples; 117 } 118 return max_multisamples; 119 } 120 121 template<typename F> 122 static void RunOnMainThread(F&& f) 123 { 124 if ([NSThread isMainThread]) 125 f(); 126 else 127 dispatch_sync(dispatch_get_main_queue(), f); 128 } 129 130 MetalDevice::MetalDevice() : m_current_viewport(0, 0, 1, 1), m_current_scissor(0, 0, 1, 1) 131 { 132 } 133 134 MetalDevice::~MetalDevice() 135 { 136 Assert(m_layer == nil); 137 Assert(m_device == nil); 138 } 139 140 RenderAPI MetalDevice::GetRenderAPI() const 141 { 142 return RenderAPI::Metal; 143 } 144 145 bool MetalDevice::HasSurface() const 146 { 147 return (m_layer != nil); 148 } 149 150 void MetalDevice::SetVSyncMode(GPUVSyncMode mode, bool allow_present_throttle) 151 { 152 // Metal does not support mailbox mode. 153 mode = (mode == GPUVSyncMode::Mailbox) ? GPUVSyncMode::FIFO : mode; 154 m_allow_present_throttle = allow_present_throttle; 155 156 if (m_vsync_mode == mode) 157 return; 158 159 m_vsync_mode = mode; 160 if (m_layer != nil) 161 [m_layer setDisplaySyncEnabled:m_vsync_mode == GPUVSyncMode::FIFO]; 162 } 163 164 bool MetalDevice::CreateDevice(std::string_view adapter, bool threaded_presentation, 165 std::optional<bool> exclusive_fullscreen_control, FeatureMask disabled_features, 166 Error* error) 167 { 168 @autoreleasepool 169 { 170 id<MTLDevice> device = nil; 171 if (!adapter.empty()) 172 { 173 NSArray<id<MTLDevice>>* devices = [MTLCopyAllDevices() autorelease]; 174 const u32 count = static_cast<u32>([devices count]); 175 for (u32 i = 0; i < count; i++) 176 { 177 if (adapter == [[devices[i] name] UTF8String]) 178 { 179 device = devices[i]; 180 break; 181 } 182 } 183 184 if (device == nil) 185 ERROR_LOG("Failed to find device named '{}'. Trying default.", adapter); 186 } 187 188 if (device == nil) 189 { 190 device = [MTLCreateSystemDefaultDevice() autorelease]; 191 if (device == nil) 192 { 193 Error::SetStringView(error, "Failed to create default Metal device."); 194 return false; 195 } 196 } 197 198 id<MTLCommandQueue> queue = [[device newCommandQueue] autorelease]; 199 if (queue == nil) 200 { 201 Error::SetStringView(error, "Failed to create command queue."); 202 return false; 203 } 204 205 m_device = [device retain]; 206 m_queue = [queue retain]; 207 INFO_LOG("Metal Device: {}", [[m_device name] UTF8String]); 208 209 SetFeatures(disabled_features); 210 211 if (m_window_info.type != WindowInfo::Type::Surfaceless && !CreateLayer()) 212 { 213 Error::SetStringView(error, "Failed to create layer."); 214 return false; 215 } 216 217 CreateCommandBuffer(); 218 RenderBlankFrame(); 219 220 if (!LoadShaders()) 221 { 222 Error::SetStringView(error, "Failed to load shaders."); 223 return false; 224 } 225 226 if (!CreateBuffers()) 227 { 228 Error::SetStringView(error, "Failed to create buffers."); 229 return false; 230 } 231 232 return true; 233 } 234 } 235 236 void MetalDevice::SetFeatures(FeatureMask disabled_features) 237 { 238 m_max_texture_size = GetMetalMaxTextureSize(m_device); 239 m_max_multisamples = GetMetalMaxMultisamples(m_device); 240 241 // Framebuffer fetch requires MSL 2.3 and an Apple GPU family. 242 const bool supports_fbfetch = [m_device supportsFamily:MTLGPUFamilyApple1]; 243 244 // If fbfetch is disabled, barriers aren't supported on Apple GPUs. 245 const bool supports_barriers = 246 ([m_device supportsFamily:MTLGPUFamilyMac1] && ![m_device supportsFamily:MTLGPUFamilyApple3]); 247 248 m_features.dual_source_blend = !(disabled_features & FEATURE_MASK_DUAL_SOURCE_BLEND); 249 m_features.framebuffer_fetch = !(disabled_features & FEATURE_MASK_FRAMEBUFFER_FETCH) && supports_fbfetch; 250 m_features.per_sample_shading = true; 251 m_features.noperspective_interpolation = true; 252 m_features.texture_copy_to_self = !(disabled_features & FEATURE_MASK_TEXTURE_COPY_TO_SELF); 253 m_features.supports_texture_buffers = !(disabled_features & FEATURE_MASK_TEXTURE_BUFFERS); 254 m_features.texture_buffers_emulated_with_ssbo = true; 255 m_features.feedback_loops = (m_features.framebuffer_fetch || supports_barriers); 256 m_features.geometry_shaders = false; 257 m_features.partial_msaa_resolve = false; 258 m_features.memory_import = true; 259 m_features.explicit_present = false; 260 m_features.shader_cache = true; 261 m_features.pipeline_cache = false; 262 m_features.prefer_unused_textures = true; 263 } 264 265 bool MetalDevice::LoadShaders() 266 { 267 @autoreleasepool 268 { 269 auto try_lib = [this](NSString* name) -> id<MTLLibrary> { 270 NSBundle* bundle = [NSBundle mainBundle]; 271 NSString* path = [bundle pathForResource:name ofType:@"metallib"]; 272 if (path == nil) 273 { 274 // Xcode places it alongside the binary. 275 path = [NSString stringWithFormat:@"%@/%@.metallib", [bundle bundlePath], name]; 276 if (![[NSFileManager defaultManager] fileExistsAtPath:path]) 277 return nil; 278 } 279 280 id<MTLLibrary> lib = [m_device newLibraryWithFile:path error:nil]; 281 if (lib == nil) 282 return nil; 283 284 return [lib retain]; 285 }; 286 287 if (!(m_shaders = try_lib(@"Metal23")) && !(m_shaders = try_lib(@"Metal22")) && 288 !(m_shaders = try_lib(@"Metal21")) && !(m_shaders = try_lib(@"default"))) 289 { 290 return false; 291 } 292 293 return true; 294 } 295 } 296 297 id<MTLFunction> MetalDevice::GetFunctionFromLibrary(id<MTLLibrary> library, NSString* name) 298 { 299 id<MTLFunction> function = [library newFunctionWithName:name]; 300 return function; 301 } 302 303 id<MTLComputePipelineState> MetalDevice::CreateComputePipeline(id<MTLFunction> function, NSString* name) 304 { 305 MTLComputePipelineDescriptor* desc = [MTLComputePipelineDescriptor new]; 306 if (name != nil) 307 [desc setLabel:name]; 308 [desc setComputeFunction:function]; 309 310 NSError* err = nil; 311 id<MTLComputePipelineState> pipeline = [m_device newComputePipelineStateWithDescriptor:desc 312 options:MTLPipelineOptionNone 313 reflection:nil 314 error:&err]; 315 [desc release]; 316 if (pipeline == nil) 317 { 318 LogNSError(err, "Create compute pipeline failed:"); 319 return nil; 320 } 321 322 return pipeline; 323 } 324 325 void MetalDevice::DestroyDevice() 326 { 327 WaitForPreviousCommandBuffers(); 328 329 if (InRenderPass()) 330 EndRenderPass(); 331 332 if (m_upload_cmdbuf != nil) 333 { 334 [m_upload_encoder endEncoding]; 335 [m_upload_encoder release]; 336 m_upload_encoder = nil; 337 [m_upload_cmdbuf release]; 338 m_upload_cmdbuf = nil; 339 } 340 if (m_render_cmdbuf != nil) 341 { 342 [m_render_cmdbuf release]; 343 m_render_cmdbuf = nil; 344 } 345 346 DestroyBuffers(); 347 348 for (auto& it : m_cleanup_objects) 349 [it.second release]; 350 m_cleanup_objects.clear(); 351 352 for (auto& it : m_depth_states) 353 { 354 if (it.second != nil) 355 [it.second release]; 356 } 357 m_depth_states.clear(); 358 for (auto& it : m_resolve_pipelines) 359 { 360 if (it.second != nil) 361 [it.second release]; 362 } 363 m_resolve_pipelines.clear(); 364 for (auto& it : m_clear_pipelines) 365 { 366 if (it.second != nil) 367 [it.second release]; 368 } 369 m_clear_pipelines.clear(); 370 if (m_shaders != nil) 371 { 372 [m_shaders release]; 373 m_shaders = nil; 374 } 375 if (m_queue != nil) 376 { 377 [m_queue release]; 378 m_queue = nil; 379 } 380 if (m_device != nil) 381 { 382 [m_device release]; 383 m_device = nil; 384 } 385 } 386 387 bool MetalDevice::CreateLayer() 388 { 389 @autoreleasepool 390 { 391 RunOnMainThread([this]() { 392 @autoreleasepool 393 { 394 INFO_LOG("Creating a {}x{} Metal layer.", m_window_info.surface_width, m_window_info.surface_height); 395 const auto size = 396 CGSizeMake(static_cast<float>(m_window_info.surface_width), static_cast<float>(m_window_info.surface_height)); 397 m_layer = [CAMetalLayer layer]; 398 [m_layer setDevice:m_device]; 399 [m_layer setDrawableSize:size]; 400 401 // Default should be BGRA8. 402 const MTLPixelFormat layer_fmt = [m_layer pixelFormat]; 403 m_window_info.surface_format = GetTextureFormatForMTLFormat(layer_fmt); 404 if (m_window_info.surface_format == GPUTexture::Format::Unknown) 405 { 406 ERROR_LOG("Invalid pixel format {} in layer, using BGRA8.", static_cast<u32>(layer_fmt)); 407 [m_layer setPixelFormat:MTLPixelFormatBGRA8Unorm]; 408 m_window_info.surface_format = GPUTexture::Format::BGRA8; 409 } 410 411 VERBOSE_LOG("Metal layer pixel format is {}.", GPUTexture::GetFormatName(m_window_info.surface_format)); 412 413 NSView* view = GetWindowView(); 414 [view setWantsLayer:TRUE]; 415 [view setLayer:m_layer]; 416 } 417 }); 418 419 // Metal does not support mailbox mode. 420 m_vsync_mode = (m_vsync_mode == GPUVSyncMode::Mailbox) ? GPUVSyncMode::FIFO : m_vsync_mode; 421 [m_layer setDisplaySyncEnabled:m_vsync_mode == GPUVSyncMode::FIFO]; 422 423 DebugAssert(m_layer_pass_desc == nil); 424 m_layer_pass_desc = [[MTLRenderPassDescriptor renderPassDescriptor] retain]; 425 m_layer_pass_desc.renderTargetWidth = m_window_info.surface_width; 426 m_layer_pass_desc.renderTargetHeight = m_window_info.surface_height; 427 m_layer_pass_desc.colorAttachments[0].loadAction = MTLLoadActionClear; 428 m_layer_pass_desc.colorAttachments[0].storeAction = MTLStoreActionStore; 429 m_layer_pass_desc.colorAttachments[0].clearColor = MTLClearColorMake(0.0, 0.0, 0.0, 1.0); 430 return true; 431 } 432 } 433 434 void MetalDevice::DestroyLayer() 435 { 436 if (m_layer == nil) 437 return; 438 439 // Should wait for previous command buffers to finish, which might be rendering to drawables. 440 WaitForPreviousCommandBuffers(); 441 442 [m_layer_pass_desc release]; 443 m_layer_pass_desc = nil; 444 m_window_info.surface_format = GPUTexture::Format::Unknown; 445 446 RunOnMainThread([this]() { 447 NSView* view = GetWindowView(); 448 [view setLayer:nil]; 449 [view setWantsLayer:FALSE]; 450 [m_layer release]; 451 m_layer = nullptr; 452 }); 453 } 454 455 void MetalDevice::RenderBlankFrame() 456 { 457 DebugAssert(!InRenderPass()); 458 if (m_layer == nil) 459 return; 460 461 @autoreleasepool 462 { 463 id<MTLDrawable> drawable = [[m_layer nextDrawable] retain]; 464 m_layer_pass_desc.colorAttachments[0].texture = [drawable texture]; 465 id<MTLRenderCommandEncoder> encoder = [m_render_cmdbuf renderCommandEncoderWithDescriptor:m_layer_pass_desc]; 466 [encoder endEncoding]; 467 [m_render_cmdbuf presentDrawable:drawable]; 468 DeferRelease(drawable); 469 SubmitCommandBuffer(); 470 } 471 } 472 473 bool MetalDevice::UpdateWindow() 474 { 475 if (InRenderPass()) 476 EndRenderPass(); 477 DestroyLayer(); 478 479 if (!AcquireWindow(false)) 480 return false; 481 482 if (m_window_info.type != WindowInfo::Type::Surfaceless && !CreateLayer()) 483 { 484 ERROR_LOG("Failed to create layer on updated window"); 485 return false; 486 } 487 488 return true; 489 } 490 491 void MetalDevice::DestroySurface() 492 { 493 DestroyLayer(); 494 } 495 496 void MetalDevice::ResizeWindow(s32 new_window_width, s32 new_window_height, float new_window_scale) 497 { 498 @autoreleasepool 499 { 500 m_window_info.surface_scale = new_window_scale; 501 if (static_cast<u32>(new_window_width) == m_window_info.surface_width && 502 static_cast<u32>(new_window_height) == m_window_info.surface_height) 503 { 504 return; 505 } 506 507 m_window_info.surface_width = new_window_width; 508 m_window_info.surface_height = new_window_height; 509 510 [m_layer setDrawableSize:CGSizeMake(new_window_width, new_window_height)]; 511 m_layer_pass_desc.renderTargetWidth = m_window_info.surface_width; 512 m_layer_pass_desc.renderTargetHeight = m_window_info.surface_height; 513 } 514 } 515 516 std::string MetalDevice::GetDriverInfo() const 517 { 518 @autoreleasepool 519 { 520 return ([[m_device description] UTF8String]); 521 } 522 } 523 524 bool MetalDevice::CreateBuffers() 525 { 526 if (!m_vertex_buffer.Create(m_device, VERTEX_BUFFER_SIZE) || !m_index_buffer.Create(m_device, INDEX_BUFFER_SIZE) || 527 !m_uniform_buffer.Create(m_device, UNIFORM_BUFFER_SIZE) || 528 !m_texture_upload_buffer.Create(m_device, TEXTURE_STREAM_BUFFER_SIZE)) 529 { 530 ERROR_LOG("Failed to create vertex/index/uniform buffers."); 531 return false; 532 } 533 534 return true; 535 } 536 537 void MetalDevice::DestroyBuffers() 538 { 539 m_texture_upload_buffer.Destroy(); 540 m_uniform_buffer.Destroy(); 541 m_vertex_buffer.Destroy(); 542 m_index_buffer.Destroy(); 543 } 544 545 bool MetalDevice::IsRenderTargetBound(const GPUTexture* tex) const 546 { 547 for (u32 i = 0; i < m_num_current_render_targets; i++) 548 { 549 if (m_current_render_targets[i] == tex) 550 return true; 551 } 552 553 return false; 554 } 555 556 bool MetalDevice::SetGPUTimingEnabled(bool enabled) 557 { 558 if (m_gpu_timing_enabled == enabled) 559 return true; 560 561 std::unique_lock lock(m_fence_mutex); 562 m_gpu_timing_enabled = enabled; 563 m_accumulated_gpu_time = 0.0; 564 m_last_gpu_time_end = 0.0; 565 return true; 566 } 567 568 float MetalDevice::GetAndResetAccumulatedGPUTime() 569 { 570 std::unique_lock lock(m_fence_mutex); 571 return std::exchange(m_accumulated_gpu_time, 0.0) * 1000.0; 572 } 573 574 MetalShader::MetalShader(GPUShaderStage stage, id<MTLLibrary> library, id<MTLFunction> function) 575 : GPUShader(stage), m_library(library), m_function(function) 576 { 577 } 578 579 MetalShader::~MetalShader() 580 { 581 MetalDevice::DeferRelease(m_function); 582 MetalDevice::DeferRelease(m_library); 583 } 584 585 void MetalShader::SetDebugName(std::string_view name) 586 { 587 @autoreleasepool 588 { 589 [m_function setLabel:StringViewToNSString(name)]; 590 } 591 } 592 593 // TODO: Clean this up, somehow.. 594 namespace EmuFolders { 595 extern std::string DataRoot; 596 } 597 static void DumpShader(u32 n, std::string_view suffix, std::string_view data) 598 { 599 if (data.empty()) 600 return; 601 602 auto fp = FileSystem::OpenManagedCFile( 603 Path::Combine(EmuFolders::DataRoot, fmt::format("shader{}_{}.txt", suffix, n)).c_str(), "wb"); 604 if (!fp) 605 return; 606 607 std::fwrite(data.data(), data.length(), 1, fp.get()); 608 } 609 610 std::unique_ptr<GPUShader> MetalDevice::CreateShaderFromMSL(GPUShaderStage stage, std::string_view source, 611 std::string_view entry_point, Error* error) 612 { 613 @autoreleasepool 614 { 615 NSString* const ns_source = StringViewToNSString(source); 616 NSError* nserror = nullptr; 617 id<MTLLibrary> library = [m_device newLibraryWithSource:ns_source options:nil error:&nserror]; 618 if (!library) 619 { 620 LogNSError(nserror, TinyString::from_format("Failed to compile {} shader", GPUShader::GetStageName(stage))); 621 622 const char* utf_error = [nserror.description UTF8String]; 623 DumpBadShader(source, fmt::format("Error {}: {}", static_cast<u32>(nserror.code), utf_error ? utf_error : "")); 624 Error::SetStringFmt(error, "Failed to compile {} shader: Error {}: {}", GPUShader::GetStageName(stage), 625 static_cast<u32>(nserror.code), utf_error ? utf_error : ""); 626 return {}; 627 } 628 629 id<MTLFunction> function = [library newFunctionWithName:StringViewToNSString(entry_point)]; 630 if (!function) 631 { 632 ERROR_LOG("Failed to get main function in compiled library"); 633 Error::SetStringView(error, "Failed to get main function in compiled library"); 634 return {}; 635 } 636 637 return std::unique_ptr<MetalShader>(new MetalShader(stage, [library retain], [function retain])); 638 } 639 } 640 641 std::unique_ptr<GPUShader> MetalDevice::CreateShaderFromBinary(GPUShaderStage stage, std::span<const u8> data, 642 Error* error) 643 { 644 const std::string_view str_data(reinterpret_cast<const char*>(data.data()), data.size()); 645 return CreateShaderFromMSL(stage, str_data, "main0", error); 646 } 647 648 std::unique_ptr<GPUShader> MetalDevice::CreateShaderFromSource(GPUShaderStage stage, GPUShaderLanguage language, 649 std::string_view source, const char* entry_point, 650 DynamicHeapArray<u8>* out_binary, Error* error) 651 { 652 static constexpr bool dump_shaders = false; 653 654 DynamicHeapArray<u8> spv; 655 if (!CompileGLSLShaderToVulkanSpv(stage, language, source, entry_point, !m_debug_device, false, &spv, error)) 656 return {}; 657 658 std::string msl; 659 if (!TranslateVulkanSpvToLanguage(spv.cspan(), stage, GPUShaderLanguage::MSL, 230, &msl, error)) 660 return {}; 661 662 if constexpr (dump_shaders) 663 { 664 static unsigned s_next_id = 0; 665 ++s_next_id; 666 DumpShader(s_next_id, "_input", source); 667 DumpShader(s_next_id, "_msl", msl); 668 } 669 670 if (out_binary) 671 { 672 out_binary->resize(msl.size()); 673 std::memcpy(out_binary->data(), msl.data(), msl.size()); 674 } 675 676 return CreateShaderFromMSL(stage, msl, "main0", error); 677 } 678 679 MetalPipeline::MetalPipeline(id<MTLRenderPipelineState> pipeline, id<MTLDepthStencilState> depth, MTLCullMode cull_mode, 680 MTLPrimitiveType primitive) 681 : m_pipeline(pipeline), m_depth(depth), m_cull_mode(cull_mode), m_primitive(primitive) 682 { 683 } 684 685 MetalPipeline::~MetalPipeline() 686 { 687 MetalDevice::DeferRelease(m_pipeline); 688 } 689 690 void MetalPipeline::SetDebugName(std::string_view name) 691 { 692 // readonly property :/ 693 } 694 695 id<MTLDepthStencilState> MetalDevice::GetDepthState(const GPUPipeline::DepthState& ds) 696 { 697 const auto it = m_depth_states.find(ds.key); 698 if (it != m_depth_states.end()) 699 return it->second; 700 701 @autoreleasepool 702 { 703 static constexpr std::array<MTLCompareFunction, static_cast<u32>(GPUPipeline::DepthFunc::MaxCount)> func_mapping = { 704 { 705 MTLCompareFunctionNever, // Never 706 MTLCompareFunctionAlways, // Always 707 MTLCompareFunctionLess, // Less 708 MTLCompareFunctionLessEqual, // LessEqual 709 MTLCompareFunctionGreater, // Greater 710 MTLCompareFunctionGreaterEqual, // GreaterEqual 711 MTLCompareFunctionEqual, // Equal 712 }}; 713 714 MTLDepthStencilDescriptor* desc = [[MTLDepthStencilDescriptor new] autorelease]; 715 desc.depthCompareFunction = func_mapping[static_cast<u8>(ds.depth_test.GetValue())]; 716 desc.depthWriteEnabled = ds.depth_write ? TRUE : FALSE; 717 718 id<MTLDepthStencilState> state = [m_device newDepthStencilStateWithDescriptor:desc]; 719 m_depth_states.emplace(ds.key, state); 720 if (state == nil) [[unlikely]] 721 ERROR_LOG("Failed to create depth-stencil state."); 722 723 return state; 724 } 725 } 726 727 std::unique_ptr<GPUPipeline> MetalDevice::CreatePipeline(const GPUPipeline::GraphicsConfig& config, Error* error) 728 { 729 @autoreleasepool 730 { 731 static constexpr std::array<MTLPrimitiveTopologyClass, static_cast<u32>(GPUPipeline::Primitive::MaxCount)> 732 primitive_classes = {{ 733 MTLPrimitiveTopologyClassPoint, // Points 734 MTLPrimitiveTopologyClassLine, // Lines 735 MTLPrimitiveTopologyClassTriangle, // Triangles 736 MTLPrimitiveTopologyClassTriangle, // TriangleStrips 737 }}; 738 static constexpr std::array<MTLPrimitiveType, static_cast<u32>(GPUPipeline::Primitive::MaxCount)> primitives = {{ 739 MTLPrimitiveTypePoint, // Points 740 MTLPrimitiveTypeLine, // Lines 741 MTLPrimitiveTypeTriangle, // Triangles 742 MTLPrimitiveTypeTriangleStrip, // TriangleStrips 743 }}; 744 745 static constexpr u32 MAX_COMPONENTS = 4; 746 static constexpr const MTLVertexFormat 747 format_mapping[static_cast<u8>(GPUPipeline::VertexAttribute::Type::MaxCount)][MAX_COMPONENTS] = { 748 {MTLVertexFormatFloat, MTLVertexFormatFloat2, MTLVertexFormatFloat3, MTLVertexFormatFloat4}, // Float 749 {MTLVertexFormatUChar, MTLVertexFormatUChar2, MTLVertexFormatUChar3, MTLVertexFormatUChar4}, // UInt8 750 {MTLVertexFormatChar, MTLVertexFormatChar2, MTLVertexFormatChar3, MTLVertexFormatChar4}, // SInt8 751 {MTLVertexFormatUCharNormalized, MTLVertexFormatUChar2Normalized, MTLVertexFormatUChar3Normalized, 752 MTLVertexFormatUChar4Normalized}, // UNorm8 753 {MTLVertexFormatUShort, MTLVertexFormatUShort2, MTLVertexFormatUShort3, MTLVertexFormatUShort4}, // UInt16 754 {MTLVertexFormatShort, MTLVertexFormatShort2, MTLVertexFormatShort3, MTLVertexFormatShort4}, // SInt16 755 {MTLVertexFormatUShortNormalized, MTLVertexFormatUShort2Normalized, MTLVertexFormatUShort3Normalized, 756 MTLVertexFormatUShort4Normalized}, // UNorm16 757 {MTLVertexFormatUInt, MTLVertexFormatUInt2, MTLVertexFormatUInt3, MTLVertexFormatUInt4}, // UInt32 758 {MTLVertexFormatInt, MTLVertexFormatInt2, MTLVertexFormatInt3, MTLVertexFormatInt4}, // SInt32 759 }; 760 761 static constexpr std::array<MTLCullMode, static_cast<u32>(GPUPipeline::CullMode::MaxCount)> cull_mapping = {{ 762 MTLCullModeNone, // None 763 MTLCullModeFront, // Front 764 MTLCullModeBack, // Back 765 }}; 766 767 static constexpr std::array<MTLBlendFactor, static_cast<u32>(GPUPipeline::BlendFunc::MaxCount)> blend_mapping = {{ 768 MTLBlendFactorZero, // Zero 769 MTLBlendFactorOne, // One 770 MTLBlendFactorSourceColor, // SrcColor 771 MTLBlendFactorOneMinusSourceColor, // InvSrcColor 772 MTLBlendFactorDestinationColor, // DstColor 773 MTLBlendFactorOneMinusDestinationColor, // InvDstColor 774 MTLBlendFactorSourceAlpha, // SrcAlpha 775 MTLBlendFactorOneMinusSourceAlpha, // InvSrcAlpha 776 MTLBlendFactorSource1Alpha, // SrcAlpha1 777 MTLBlendFactorOneMinusSource1Alpha, // InvSrcAlpha1 778 MTLBlendFactorDestinationAlpha, // DstAlpha 779 MTLBlendFactorOneMinusDestinationAlpha, // InvDstAlpha 780 MTLBlendFactorBlendColor, // ConstantAlpha 781 MTLBlendFactorOneMinusBlendColor, // InvConstantAlpha 782 }}; 783 784 static constexpr std::array<MTLBlendOperation, static_cast<u32>(GPUPipeline::BlendOp::MaxCount)> op_mapping = {{ 785 MTLBlendOperationAdd, // Add 786 MTLBlendOperationSubtract, // Subtract 787 MTLBlendOperationReverseSubtract, // ReverseSubtract 788 MTLBlendOperationMin, // Min 789 MTLBlendOperationMax, // Max 790 }}; 791 792 MTLRenderPipelineDescriptor* desc = [[MTLRenderPipelineDescriptor new] autorelease]; 793 desc.vertexFunction = static_cast<const MetalShader*>(config.vertex_shader)->GetFunction(); 794 desc.fragmentFunction = static_cast<const MetalShader*>(config.fragment_shader)->GetFunction(); 795 796 for (u32 i = 0; i < MAX_RENDER_TARGETS; i++) 797 { 798 if (config.color_formats[i] == GPUTexture::Format::Unknown) 799 break; 800 801 MTLRenderPipelineColorAttachmentDescriptor* ca = desc.colorAttachments[0]; 802 ca.pixelFormat = s_pixel_format_mapping[static_cast<u8>(config.color_formats[i])]; 803 ca.writeMask = (config.blend.write_r ? MTLColorWriteMaskRed : MTLColorWriteMaskNone) | 804 (config.blend.write_g ? MTLColorWriteMaskGreen : MTLColorWriteMaskNone) | 805 (config.blend.write_b ? MTLColorWriteMaskBlue : MTLColorWriteMaskNone) | 806 (config.blend.write_a ? MTLColorWriteMaskAlpha : MTLColorWriteMaskNone); 807 ca.blendingEnabled = config.blend.enable; 808 if (config.blend.enable) 809 { 810 ca.sourceRGBBlendFactor = blend_mapping[static_cast<u8>(config.blend.src_blend.GetValue())]; 811 ca.destinationRGBBlendFactor = blend_mapping[static_cast<u8>(config.blend.dst_blend.GetValue())]; 812 ca.rgbBlendOperation = op_mapping[static_cast<u8>(config.blend.blend_op.GetValue())]; 813 ca.sourceAlphaBlendFactor = blend_mapping[static_cast<u8>(config.blend.src_alpha_blend.GetValue())]; 814 ca.destinationAlphaBlendFactor = blend_mapping[static_cast<u8>(config.blend.dst_alpha_blend.GetValue())]; 815 ca.alphaBlendOperation = op_mapping[static_cast<u8>(config.blend.alpha_blend_op.GetValue())]; 816 } 817 } 818 desc.depthAttachmentPixelFormat = s_pixel_format_mapping[static_cast<u8>(config.depth_format)]; 819 820 // Input assembly. 821 MTLVertexDescriptor* vdesc = nil; 822 if (!config.input_layout.vertex_attributes.empty()) 823 { 824 vdesc = [MTLVertexDescriptor vertexDescriptor]; 825 for (u32 i = 0; i < static_cast<u32>(config.input_layout.vertex_attributes.size()); i++) 826 { 827 const GPUPipeline::VertexAttribute& va = config.input_layout.vertex_attributes[i]; 828 DebugAssert(va.components > 0 && va.components <= MAX_COMPONENTS); 829 830 MTLVertexAttributeDescriptor* vd = vdesc.attributes[i]; 831 vd.format = format_mapping[static_cast<u8>(va.type.GetValue())][va.components - 1]; 832 vd.offset = static_cast<NSUInteger>(va.offset.GetValue()); 833 vd.bufferIndex = 1; 834 } 835 836 vdesc.layouts[1].stepFunction = MTLVertexStepFunctionPerVertex; 837 vdesc.layouts[1].stepRate = 1; 838 vdesc.layouts[1].stride = config.input_layout.vertex_stride; 839 840 desc.vertexDescriptor = vdesc; 841 } 842 843 // Rasterization state. 844 const MTLCullMode cull_mode = cull_mapping[static_cast<u8>(config.rasterization.cull_mode.GetValue())]; 845 desc.rasterizationEnabled = TRUE; 846 desc.inputPrimitiveTopology = primitive_classes[static_cast<u8>(config.primitive)]; 847 848 // Depth state 849 id<MTLDepthStencilState> depth = GetDepthState(config.depth); 850 if (depth == nil) 851 return {}; 852 853 // General 854 const MTLPrimitiveType primitive = primitives[static_cast<u8>(config.primitive)]; 855 desc.rasterSampleCount = config.samples; 856 857 // Metal-specific stuff 858 desc.vertexBuffers[0].mutability = MTLMutabilityImmutable; 859 desc.fragmentBuffers[0].mutability = MTLMutabilityImmutable; 860 if (!config.input_layout.vertex_attributes.empty()) 861 desc.vertexBuffers[1].mutability = MTLMutabilityImmutable; 862 if (config.layout == GPUPipeline::Layout::SingleTextureBufferAndPushConstants) 863 desc.fragmentBuffers[1].mutability = MTLMutabilityImmutable; 864 865 NSError* nserror = nullptr; 866 id<MTLRenderPipelineState> pipeline = [m_device newRenderPipelineStateWithDescriptor:desc error:&nserror]; 867 if (pipeline == nil) 868 { 869 LogNSError(nserror, "Failed to create render pipeline state"); 870 NSErrorToErrorObject(error, "newRenderPipelineStateWithDescriptor failed: ", nserror); 871 return {}; 872 } 873 874 return std::unique_ptr<GPUPipeline>(new MetalPipeline(pipeline, depth, cull_mode, primitive)); 875 } 876 } 877 878 MetalTexture::MetalTexture(id<MTLTexture> texture, u16 width, u16 height, u8 layers, u8 levels, u8 samples, Type type, 879 Format format) 880 : GPUTexture(width, height, layers, levels, samples, type, format), m_texture(texture) 881 { 882 } 883 884 MetalTexture::~MetalTexture() 885 { 886 if (m_texture != nil) 887 { 888 MetalDevice::GetInstance().UnbindTexture(this); 889 MetalDevice::DeferRelease(m_texture); 890 } 891 } 892 893 bool MetalTexture::Update(u32 x, u32 y, u32 width, u32 height, const void* data, u32 pitch, u32 layer /*= 0*/, 894 u32 level /*= 0*/) 895 { 896 const u32 aligned_pitch = Common::AlignUpPow2(width * GetPixelSize(), TEXTURE_UPLOAD_PITCH_ALIGNMENT); 897 const u32 req_size = height * aligned_pitch; 898 899 GPUDevice::GetStatistics().buffer_streamed += req_size; 900 GPUDevice::GetStatistics().num_uploads++; 901 902 MetalDevice& dev = MetalDevice::GetInstance(); 903 MetalStreamBuffer& sb = dev.GetTextureStreamBuffer(); 904 id<MTLBuffer> actual_buffer; 905 u32 actual_offset; 906 u32 actual_pitch; 907 if (req_size >= (sb.GetCurrentSize() / 2u)) 908 { 909 const u32 upload_size = height * pitch; 910 const MTLResourceOptions options = MTLResourceStorageModeShared; 911 actual_buffer = [dev.GetMTLDevice() newBufferWithBytes:data length:upload_size options:options]; 912 actual_offset = 0; 913 actual_pitch = pitch; 914 if (actual_buffer == nil) 915 { 916 Panic("Failed to allocate temporary buffer."); 917 return false; 918 } 919 920 dev.DeferRelease(actual_buffer); 921 } 922 else 923 { 924 if (!sb.ReserveMemory(req_size, TEXTURE_UPLOAD_ALIGNMENT)) 925 { 926 dev.SubmitCommandBuffer(); 927 if (!sb.ReserveMemory(req_size, TEXTURE_UPLOAD_ALIGNMENT)) 928 { 929 Panic("Failed to reserve texture upload space."); 930 return false; 931 } 932 } 933 934 actual_offset = sb.GetCurrentOffset(); 935 StringUtil::StrideMemCpy(sb.GetCurrentHostPointer(), aligned_pitch, data, pitch, width * GetPixelSize(), height); 936 sb.CommitMemory(req_size); 937 actual_buffer = sb.GetBuffer(); 938 actual_pitch = aligned_pitch; 939 } 940 941 if (m_state == GPUTexture::State::Cleared && (x != 0 || y != 0 || width != m_width || height != m_height)) 942 dev.CommitClear(this); 943 944 const bool is_inline = (m_use_fence_counter == dev.GetCurrentFenceCounter()); 945 946 id<MTLBlitCommandEncoder> encoder = dev.GetBlitEncoder(is_inline); 947 [encoder copyFromBuffer:actual_buffer 948 sourceOffset:actual_offset 949 sourceBytesPerRow:actual_pitch 950 sourceBytesPerImage:0 951 sourceSize:MTLSizeMake(width, height, 1) 952 toTexture:m_texture 953 destinationSlice:layer 954 destinationLevel:level 955 destinationOrigin:MTLOriginMake(x, y, 0)]; 956 m_state = GPUTexture::State::Dirty; 957 return true; 958 } 959 960 bool MetalTexture::Map(void** map, u32* map_stride, u32 x, u32 y, u32 width, u32 height, u32 layer /*= 0*/, 961 u32 level /*= 0*/) 962 { 963 if ((x + width) > GetMipWidth(level) || (y + height) > GetMipHeight(level) || layer > m_layers || level > m_levels) 964 return false; 965 966 const u32 aligned_pitch = Common::AlignUpPow2(width * GetPixelSize(), TEXTURE_UPLOAD_PITCH_ALIGNMENT); 967 const u32 req_size = height * aligned_pitch; 968 969 MetalDevice& dev = MetalDevice::GetInstance(); 970 if (m_state == GPUTexture::State::Cleared && (x != 0 || y != 0 || width != m_width || height != m_height)) 971 dev.CommitClear(this); 972 973 MetalStreamBuffer& sb = dev.GetTextureStreamBuffer(); 974 if (!sb.ReserveMemory(req_size, TEXTURE_UPLOAD_ALIGNMENT)) 975 { 976 dev.SubmitCommandBuffer(); 977 if (!sb.ReserveMemory(req_size, TEXTURE_UPLOAD_ALIGNMENT)) 978 { 979 Panic("Failed to allocate space in texture upload buffer"); 980 return false; 981 } 982 } 983 984 *map = sb.GetCurrentHostPointer(); 985 *map_stride = aligned_pitch; 986 m_map_x = x; 987 m_map_y = y; 988 m_map_width = width; 989 m_map_height = height; 990 m_map_layer = layer; 991 m_map_level = level; 992 m_state = GPUTexture::State::Dirty; 993 return true; 994 } 995 996 void MetalTexture::Unmap() 997 { 998 const u32 aligned_pitch = Common::AlignUpPow2(m_map_width * GetPixelSize(), TEXTURE_UPLOAD_PITCH_ALIGNMENT); 999 const u32 req_size = m_map_height * aligned_pitch; 1000 1001 GPUDevice::GetStatistics().buffer_streamed += req_size; 1002 GPUDevice::GetStatistics().num_uploads++; 1003 1004 MetalDevice& dev = MetalDevice::GetInstance(); 1005 MetalStreamBuffer& sb = dev.GetTextureStreamBuffer(); 1006 const u32 offset = sb.GetCurrentOffset(); 1007 sb.CommitMemory(req_size); 1008 1009 // TODO: track this 1010 const bool is_inline = true; 1011 id<MTLBlitCommandEncoder> encoder = dev.GetBlitEncoder(is_inline); 1012 [encoder copyFromBuffer:sb.GetBuffer() 1013 sourceOffset:offset 1014 sourceBytesPerRow:aligned_pitch 1015 sourceBytesPerImage:0 1016 sourceSize:MTLSizeMake(m_map_width, m_map_height, 1) 1017 toTexture:m_texture 1018 destinationSlice:m_map_layer 1019 destinationLevel:m_map_level 1020 destinationOrigin:MTLOriginMake(m_map_x, m_map_y, 0)]; 1021 1022 m_map_x = 0; 1023 m_map_y = 0; 1024 m_map_width = 0; 1025 m_map_height = 0; 1026 m_map_layer = 0; 1027 m_map_level = 0; 1028 } 1029 1030 void MetalTexture::MakeReadyForSampling() 1031 { 1032 MetalDevice& dev = MetalDevice::GetInstance(); 1033 if (!dev.InRenderPass()) 1034 return; 1035 1036 if (IsRenderTarget() ? dev.IsRenderTargetBound(this) : (dev.m_current_depth_target == this)) 1037 dev.EndRenderPass(); 1038 } 1039 1040 void MetalTexture::SetDebugName(std::string_view name) 1041 { 1042 @autoreleasepool 1043 { 1044 [m_texture setLabel:StringViewToNSString(name)]; 1045 } 1046 } 1047 1048 std::unique_ptr<GPUTexture> MetalDevice::CreateTexture(u32 width, u32 height, u32 layers, u32 levels, u32 samples, 1049 GPUTexture::Type type, GPUTexture::Format format, 1050 const void* data, u32 data_stride) 1051 { 1052 if (!GPUTexture::ValidateConfig(width, height, layers, layers, samples, type, format)) 1053 return {}; 1054 1055 const MTLPixelFormat pixel_format = s_pixel_format_mapping[static_cast<u8>(format)]; 1056 if (pixel_format == MTLPixelFormatInvalid) 1057 return {}; 1058 1059 @autoreleasepool 1060 { 1061 MTLTextureDescriptor* desc = [MTLTextureDescriptor texture2DDescriptorWithPixelFormat:pixel_format 1062 width:width 1063 height:height 1064 mipmapped:(levels > 1)]; 1065 1066 desc.mipmapLevelCount = levels; 1067 desc.storageMode = MTLStorageModePrivate; 1068 if (samples > 1) 1069 { 1070 desc.textureType = (layers > 1) ? MTLTextureType2DMultisampleArray : MTLTextureType2DMultisample; 1071 desc.sampleCount = samples; 1072 } 1073 else if (layers > 1) 1074 { 1075 desc.textureType = MTLTextureType2DArray; 1076 desc.arrayLength = layers; 1077 } 1078 1079 switch (type) 1080 { 1081 case GPUTexture::Type::Texture: 1082 case GPUTexture::Type::DynamicTexture: 1083 desc.usage = MTLTextureUsageShaderRead; 1084 break; 1085 1086 case GPUTexture::Type::RenderTarget: 1087 case GPUTexture::Type::DepthStencil: 1088 desc.usage = MTLTextureUsageShaderRead | MTLTextureUsageRenderTarget; 1089 break; 1090 1091 case GPUTexture::Type::RWTexture: 1092 desc.usage = MTLTextureUsageShaderRead | MTLTextureUsageShaderWrite; 1093 break; 1094 1095 default: 1096 UnreachableCode(); 1097 break; 1098 } 1099 1100 id<MTLTexture> tex = [m_device newTextureWithDescriptor:desc]; 1101 if (tex == nil) 1102 { 1103 ERROR_LOG("Failed to create {}x{} texture.", width, height); 1104 return {}; 1105 } 1106 1107 // This one can *definitely* go on the upload buffer. 1108 std::unique_ptr<GPUTexture> gtex( 1109 new MetalTexture([tex retain], width, height, layers, levels, samples, type, format)); 1110 if (data) 1111 { 1112 // TODO: handle multi-level uploads... 1113 gtex->Update(0, 0, width, height, data, data_stride, 0, 0); 1114 } 1115 1116 return gtex; 1117 } 1118 } 1119 1120 MetalDownloadTexture::MetalDownloadTexture(u32 width, u32 height, GPUTexture::Format format, u8* import_buffer, 1121 size_t buffer_offset, id<MTLBuffer> buffer, const u8* map_ptr, u32 map_pitch) 1122 : GPUDownloadTexture(width, height, format, (import_buffer != nullptr)), m_buffer_offset(buffer_offset), 1123 m_buffer(buffer) 1124 { 1125 m_map_pointer = map_ptr; 1126 m_current_pitch = map_pitch; 1127 } 1128 1129 MetalDownloadTexture::~MetalDownloadTexture() 1130 { 1131 [m_buffer release]; 1132 } 1133 1134 std::unique_ptr<MetalDownloadTexture> MetalDownloadTexture::Create(u32 width, u32 height, GPUTexture::Format format, 1135 void* memory, size_t memory_size, u32 memory_stride) 1136 { 1137 @autoreleasepool 1138 { 1139 MetalDevice& dev = MetalDevice::GetInstance(); 1140 id<MTLBuffer> buffer = nil; 1141 size_t memory_offset = 0; 1142 const u8* map_ptr = nullptr; 1143 u32 map_pitch = 0; 1144 u32 buffer_size = 0; 1145 1146 constexpr MTLResourceOptions options = MTLResourceStorageModeShared | MTLResourceCPUCacheModeDefaultCache; 1147 1148 // not importing memory? 1149 if (!memory) 1150 { 1151 map_pitch = Common::AlignUpPow2(GPUTexture::CalcUploadPitch(format, width), TEXTURE_UPLOAD_PITCH_ALIGNMENT); 1152 buffer_size = height * map_pitch; 1153 buffer = [[dev.m_device newBufferWithLength:buffer_size options:options] retain]; 1154 if (buffer == nil) 1155 { 1156 ERROR_LOG("Failed to create {} byte buffer", buffer_size); 1157 return {}; 1158 } 1159 1160 map_ptr = static_cast<u8*>([buffer contents]); 1161 } 1162 else 1163 { 1164 map_pitch = memory_stride; 1165 buffer_size = height * map_pitch; 1166 Assert(buffer_size <= memory_size); 1167 1168 // Importing memory, we need to page align the buffer. 1169 void* page_aligned_memory = 1170 reinterpret_cast<void*>(Common::AlignDownPow2(reinterpret_cast<uintptr_t>(memory), HOST_PAGE_SIZE)); 1171 const size_t page_offset = static_cast<size_t>(static_cast<u8*>(memory) - static_cast<u8*>(page_aligned_memory)); 1172 const size_t page_aligned_size = Common::AlignUpPow2(page_offset + memory_size, HOST_PAGE_SIZE); 1173 DEV_LOG("Trying to import {} bytes of memory at {} for download texture", page_aligned_memory, page_aligned_size); 1174 1175 buffer = [[dev.m_device newBufferWithBytesNoCopy:page_aligned_memory 1176 length:page_aligned_size 1177 options:options 1178 deallocator:nil] retain]; 1179 if (buffer == nil) 1180 { 1181 ERROR_LOG("Failed to import {} byte buffer", page_aligned_size); 1182 return {}; 1183 } 1184 1185 map_ptr = static_cast<u8*>(memory); 1186 } 1187 1188 return std::unique_ptr<MetalDownloadTexture>(new MetalDownloadTexture( 1189 width, height, format, static_cast<u8*>(memory), memory_offset, buffer, map_ptr, map_pitch)); 1190 } 1191 } 1192 1193 void MetalDownloadTexture::CopyFromTexture(u32 dst_x, u32 dst_y, GPUTexture* src, u32 src_x, u32 src_y, u32 width, 1194 u32 height, u32 src_layer, u32 src_level, bool use_transfer_pitch) 1195 { 1196 MetalTexture* const mtlTex = static_cast<MetalTexture*>(src); 1197 MetalDevice& dev = MetalDevice::GetInstance(); 1198 1199 DebugAssert(mtlTex->GetFormat() == m_format); 1200 DebugAssert(src_level < mtlTex->GetLevels()); 1201 DebugAssert((src_x + width) <= mtlTex->GetMipWidth(src_level) && (src_y + height) <= mtlTex->GetMipHeight(src_level)); 1202 DebugAssert((dst_x + width) <= m_width && (dst_y + height) <= m_height); 1203 DebugAssert((dst_x == 0 && dst_y == 0) || !use_transfer_pitch); 1204 DebugAssert(!m_is_imported || !use_transfer_pitch); 1205 1206 u32 copy_offset, copy_size, copy_rows; 1207 if (!m_is_imported) 1208 m_current_pitch = GetTransferPitch(use_transfer_pitch ? width : m_width, TEXTURE_UPLOAD_PITCH_ALIGNMENT); 1209 GetTransferSize(dst_x, dst_y, width, height, m_current_pitch, ©_offset, ©_size, ©_rows); 1210 1211 dev.GetStatistics().num_downloads++; 1212 1213 dev.CommitClear(mtlTex); 1214 1215 id<MTLBlitCommandEncoder> encoder = dev.GetBlitEncoder(true); 1216 [encoder copyFromTexture:mtlTex->GetMTLTexture() 1217 sourceSlice:src_layer 1218 sourceLevel:src_level 1219 sourceOrigin:MTLOriginMake(src_x, src_y, 0) 1220 sourceSize:MTLSizeMake(width, height, 1) 1221 toBuffer:m_buffer 1222 destinationOffset:m_buffer_offset + copy_offset 1223 destinationBytesPerRow:m_current_pitch 1224 destinationBytesPerImage:0]; 1225 1226 m_copy_fence_counter = dev.m_current_fence_counter; 1227 m_needs_flush = true; 1228 } 1229 1230 bool MetalDownloadTexture::Map(u32 x, u32 y, u32 width, u32 height) 1231 { 1232 // Always mapped. 1233 return true; 1234 } 1235 1236 void MetalDownloadTexture::Unmap() 1237 { 1238 // Always mapped. 1239 } 1240 1241 void MetalDownloadTexture::Flush() 1242 { 1243 if (!m_needs_flush) 1244 return; 1245 1246 m_needs_flush = false; 1247 1248 MetalDevice& dev = MetalDevice::GetInstance(); 1249 if (dev.m_completed_fence_counter >= m_copy_fence_counter) 1250 return; 1251 1252 // Need to execute command buffer. 1253 if (dev.GetCurrentFenceCounter() == m_copy_fence_counter) 1254 dev.SubmitCommandBuffer(true); 1255 else 1256 dev.WaitForFenceCounter(m_copy_fence_counter); 1257 } 1258 1259 void MetalDownloadTexture::SetDebugName(std::string_view name) 1260 { 1261 @autoreleasepool 1262 { 1263 [m_buffer setLabel:StringViewToNSString(name)]; 1264 } 1265 } 1266 1267 std::unique_ptr<GPUDownloadTexture> MetalDevice::CreateDownloadTexture(u32 width, u32 height, GPUTexture::Format format) 1268 { 1269 return MetalDownloadTexture::Create(width, height, format, nullptr, 0, 0); 1270 } 1271 1272 std::unique_ptr<GPUDownloadTexture> MetalDevice::CreateDownloadTexture(u32 width, u32 height, GPUTexture::Format format, 1273 void* memory, size_t memory_size, 1274 u32 memory_stride) 1275 { 1276 return MetalDownloadTexture::Create(width, height, format, memory, memory_size, memory_stride); 1277 } 1278 1279 MetalSampler::MetalSampler(id<MTLSamplerState> ss) : m_ss(ss) 1280 { 1281 } 1282 1283 MetalSampler::~MetalSampler() = default; 1284 1285 void MetalSampler::SetDebugName(std::string_view name) 1286 { 1287 // lame.. have to put it on the descriptor :/ 1288 } 1289 1290 std::unique_ptr<GPUSampler> MetalDevice::CreateSampler(const GPUSampler::Config& config) 1291 { 1292 @autoreleasepool 1293 { 1294 static constexpr std::array<MTLSamplerAddressMode, static_cast<u8>(GPUSampler::AddressMode::MaxCount)> ta = {{ 1295 MTLSamplerAddressModeRepeat, // Repeat 1296 MTLSamplerAddressModeClampToEdge, // ClampToEdge 1297 MTLSamplerAddressModeClampToBorderColor, // ClampToBorder 1298 MTLSamplerAddressModeMirrorRepeat, // MirrorRepeat 1299 }}; 1300 static constexpr std::array<MTLSamplerMinMagFilter, static_cast<u8>(GPUSampler::Filter::MaxCount)> min_mag_filters = 1301 {{ 1302 MTLSamplerMinMagFilterNearest, // Nearest 1303 MTLSamplerMinMagFilterLinear, // Linear 1304 }}; 1305 static constexpr std::array<MTLSamplerMipFilter, static_cast<u8>(GPUSampler::Filter::MaxCount)> mip_filters = {{ 1306 MTLSamplerMipFilterNearest, // Nearest 1307 MTLSamplerMipFilterLinear, // Linear 1308 }}; 1309 1310 struct BorderColorMapping 1311 { 1312 u32 color; 1313 MTLSamplerBorderColor mtl_color; 1314 }; 1315 static constexpr BorderColorMapping border_color_mapping[] = { 1316 {0x00000000u, MTLSamplerBorderColorTransparentBlack}, 1317 {0xFF000000u, MTLSamplerBorderColorOpaqueBlack}, 1318 {0xFFFFFFFFu, MTLSamplerBorderColorOpaqueWhite}, 1319 }; 1320 1321 MTLSamplerDescriptor* desc = [[MTLSamplerDescriptor new] autorelease]; 1322 desc.normalizedCoordinates = true; 1323 desc.sAddressMode = ta[static_cast<u8>(config.address_u.GetValue())]; 1324 desc.tAddressMode = ta[static_cast<u8>(config.address_v.GetValue())]; 1325 desc.rAddressMode = ta[static_cast<u8>(config.address_w.GetValue())]; 1326 desc.minFilter = min_mag_filters[static_cast<u8>(config.min_filter.GetValue())]; 1327 desc.magFilter = min_mag_filters[static_cast<u8>(config.mag_filter.GetValue())]; 1328 desc.mipFilter = (config.min_lod != config.max_lod) ? mip_filters[static_cast<u8>(config.mip_filter.GetValue())] : 1329 MTLSamplerMipFilterNotMipmapped; 1330 desc.lodMinClamp = static_cast<float>(config.min_lod); 1331 desc.lodMaxClamp = static_cast<float>(config.max_lod); 1332 desc.maxAnisotropy = std::max<u8>(config.anisotropy, 1); 1333 1334 if (config.address_u == GPUSampler::AddressMode::ClampToBorder || 1335 config.address_v == GPUSampler::AddressMode::ClampToBorder || 1336 config.address_w == GPUSampler::AddressMode::ClampToBorder) 1337 { 1338 u32 i; 1339 for (i = 0; i < static_cast<u32>(std::size(border_color_mapping)); i++) 1340 { 1341 if (border_color_mapping[i].color == config.border_color) 1342 break; 1343 } 1344 if (i == std::size(border_color_mapping)) 1345 { 1346 ERROR_LOG("Unsupported border color: {:08X}", config.border_color.GetValue()); 1347 return {}; 1348 } 1349 1350 desc.borderColor = border_color_mapping[i].mtl_color; 1351 } 1352 1353 // TODO: Pool? 1354 id<MTLSamplerState> ss = [m_device newSamplerStateWithDescriptor:desc]; 1355 if (ss == nil) 1356 { 1357 ERROR_LOG("Failed to create sampler state."); 1358 return {}; 1359 } 1360 1361 return std::unique_ptr<GPUSampler>(new MetalSampler([ss retain])); 1362 } 1363 } 1364 1365 bool MetalDevice::SupportsTextureFormat(GPUTexture::Format format) const 1366 { 1367 if (format == GPUTexture::Format::RGB565 || format == GPUTexture::Format::RGBA5551) 1368 { 1369 // These formats require an Apple Silicon GPU. 1370 // See https://developer.apple.com/metal/Metal-Feature-Set-Tables.pdf 1371 if (![m_device supportsFamily:MTLGPUFamilyApple2]) 1372 return false; 1373 } 1374 1375 return (s_pixel_format_mapping[static_cast<u8>(format)] != MTLPixelFormatInvalid); 1376 } 1377 1378 void MetalDevice::CopyTextureRegion(GPUTexture* dst, u32 dst_x, u32 dst_y, u32 dst_layer, u32 dst_level, 1379 GPUTexture* src, u32 src_x, u32 src_y, u32 src_layer, u32 src_level, u32 width, 1380 u32 height) 1381 { 1382 DebugAssert(src_level < src->GetLevels() && src_layer < src->GetLayers()); 1383 DebugAssert((src_x + width) <= src->GetMipWidth(src_level)); 1384 DebugAssert((src_y + height) <= src->GetMipHeight(src_level)); 1385 DebugAssert(dst_level < dst->GetLevels() && dst_layer < dst->GetLayers()); 1386 DebugAssert((dst_x + width) <= dst->GetMipWidth(dst_level)); 1387 DebugAssert((dst_y + height) <= dst->GetMipHeight(dst_level)); 1388 1389 MetalTexture* D = static_cast<MetalTexture*>(dst); 1390 MetalTexture* S = static_cast<MetalTexture*>(src); 1391 1392 if (D->IsRenderTargetOrDepthStencil()) 1393 { 1394 if (S->GetState() == GPUTexture::State::Cleared) 1395 { 1396 if (S->GetWidth() == D->GetWidth() && S->GetHeight() == D->GetHeight()) 1397 { 1398 // pass clear through 1399 D->m_state = S->m_state; 1400 D->m_clear_value = S->m_clear_value; 1401 return; 1402 } 1403 } 1404 else if (S->GetState() == GPUTexture::State::Invalidated) 1405 { 1406 // Contents are undefined ;) 1407 return; 1408 } 1409 else if (dst_x == 0 && dst_y == 0 && width == D->GetMipWidth(dst_level) && height == D->GetMipHeight(dst_level)) 1410 { 1411 D->SetState(GPUTexture::State::Dirty); 1412 } 1413 1414 CommitClear(D); 1415 } 1416 1417 CommitClear(S); 1418 1419 S->SetUseFenceCounter(m_current_fence_counter); 1420 D->SetUseFenceCounter(m_current_fence_counter); 1421 1422 s_stats.num_copies++; 1423 1424 @autoreleasepool 1425 { 1426 id<MTLBlitCommandEncoder> encoder = GetBlitEncoder(true); 1427 [encoder copyFromTexture:S->GetMTLTexture() 1428 sourceSlice:src_level 1429 sourceLevel:src_level 1430 sourceOrigin:MTLOriginMake(src_x, src_y, 0) 1431 sourceSize:MTLSizeMake(width, height, 1) 1432 toTexture:D->GetMTLTexture() 1433 destinationSlice:dst_layer 1434 destinationLevel:dst_level 1435 destinationOrigin:MTLOriginMake(dst_x, dst_y, 0)]; 1436 } 1437 } 1438 1439 void MetalDevice::ResolveTextureRegion(GPUTexture* dst, u32 dst_x, u32 dst_y, u32 dst_layer, u32 dst_level, 1440 GPUTexture* src, u32 src_x, u32 src_y, u32 width, u32 height) 1441 { 1442 DebugAssert((src_x + width) <= src->GetWidth()); 1443 DebugAssert((src_y + height) <= src->GetHeight()); 1444 DebugAssert(dst_level < dst->GetLevels() && dst_layer < dst->GetLayers()); 1445 DebugAssert((dst_x + width) <= dst->GetMipWidth(dst_level)); 1446 DebugAssert((dst_y + height) <= dst->GetMipHeight(dst_level)); 1447 DebugAssert(!dst->IsMultisampled() && src->IsMultisampled()); 1448 1449 // Only does first level for now.. 1450 DebugAssert(dst_level == 0 && dst_layer == 0); 1451 1452 const GPUTexture::Format src_format = dst->GetFormat(); 1453 const GPUTexture::Format dst_format = dst->GetFormat(); 1454 id<MTLComputePipelineState> resolve_pipeline = nil; 1455 if (auto iter = std::find_if(m_resolve_pipelines.begin(), m_resolve_pipelines.end(), 1456 [src_format, dst_format](const auto& it) { 1457 return it.first.first == src_format && it.first.second == dst_format; 1458 }); 1459 iter != m_resolve_pipelines.end()) 1460 { 1461 resolve_pipeline = iter->second; 1462 } 1463 else 1464 { 1465 // Need to compile it. 1466 @autoreleasepool 1467 { 1468 const bool is_depth = GPUTexture::IsDepthFormat(src_format); 1469 id<MTLFunction> function = 1470 [GetFunctionFromLibrary(m_shaders, is_depth ? @"depthResolveKernel" : @"colorResolveKernel") autorelease]; 1471 if (function == nil) 1472 Panic("Failed to get resolve kernel"); 1473 1474 resolve_pipeline = [CreateComputePipeline(function, is_depth ? @"Depth Resolve" : @"Color Resolve") autorelease]; 1475 if (resolve_pipeline != nil) 1476 [resolve_pipeline retain]; 1477 m_resolve_pipelines.emplace_back(std::make_pair(src_format, dst_format), resolve_pipeline); 1478 } 1479 } 1480 if (resolve_pipeline == nil) 1481 Panic("Failed to get resolve pipeline"); 1482 1483 if (InRenderPass()) 1484 EndRenderPass(); 1485 1486 s_stats.num_copies++; 1487 1488 const u32 threadgroupHeight = resolve_pipeline.maxTotalThreadsPerThreadgroup / resolve_pipeline.threadExecutionWidth; 1489 const MTLSize intrinsicThreadgroupSize = MTLSizeMake(resolve_pipeline.threadExecutionWidth, threadgroupHeight, 1); 1490 const MTLSize threadgroupsInGrid = 1491 MTLSizeMake((src->GetWidth() + intrinsicThreadgroupSize.width - 1) / intrinsicThreadgroupSize.width, 1492 (src->GetHeight() + intrinsicThreadgroupSize.height - 1) / intrinsicThreadgroupSize.height, 1); 1493 1494 id<MTLComputeCommandEncoder> computeEncoder = [m_render_cmdbuf computeCommandEncoder]; 1495 [computeEncoder setComputePipelineState:resolve_pipeline]; 1496 [computeEncoder setTexture:static_cast<MetalTexture*>(src)->GetMTLTexture() atIndex:0]; 1497 [computeEncoder setTexture:static_cast<MetalTexture*>(dst)->GetMTLTexture() atIndex:1]; 1498 [computeEncoder dispatchThreadgroups:threadgroupsInGrid threadsPerThreadgroup:intrinsicThreadgroupSize]; 1499 [computeEncoder endEncoding]; 1500 } 1501 1502 void MetalDevice::ClearRenderTarget(GPUTexture* t, u32 c) 1503 { 1504 GPUDevice::ClearRenderTarget(t, c); 1505 if (InRenderPass() && IsRenderTargetBound(t)) 1506 EndRenderPass(); 1507 } 1508 1509 void MetalDevice::ClearDepth(GPUTexture* t, float d) 1510 { 1511 GPUDevice::ClearDepth(t, d); 1512 if (InRenderPass() && m_current_depth_target == t) 1513 { 1514 const ClearPipelineConfig config = GetCurrentClearPipelineConfig(); 1515 id<MTLRenderPipelineState> pipeline = GetClearDepthPipeline(config); 1516 id<MTLDepthStencilState> depth = GetDepthState(GPUPipeline::DepthState::GetAlwaysWriteState()); 1517 1518 const GSVector4i rect = t->GetRect(); 1519 const bool set_vp = !m_current_viewport.eq(rect); 1520 const bool set_scissor = !m_current_scissor.eq(rect); 1521 if (set_vp) 1522 { 1523 [m_render_encoder setViewport:(MTLViewport){0.0, 0.0, static_cast<double>(t->GetWidth()), 1524 static_cast<double>(t->GetHeight()), 0.0, 1.0}]; 1525 } 1526 if (set_scissor) 1527 [m_render_encoder setScissorRect:(MTLScissorRect){0u, 0u, t->GetWidth(), t->GetHeight()}]; 1528 1529 [m_render_encoder setRenderPipelineState:pipeline]; 1530 if (m_current_cull_mode != MTLCullModeNone) 1531 [m_render_encoder setCullMode:MTLCullModeNone]; 1532 if (depth != m_current_depth_state) 1533 [m_render_encoder setDepthStencilState:depth]; 1534 [m_render_encoder setVertexBytes:&d length:sizeof(d) atIndex:0]; 1535 [m_render_encoder drawPrimitives:m_current_pipeline->GetPrimitive() vertexStart:0 vertexCount:3]; 1536 s_stats.num_draws++; 1537 1538 [m_render_encoder setVertexBuffer:m_uniform_buffer.GetBuffer() offset:m_current_uniform_buffer_position atIndex:0]; 1539 if (m_current_pipeline) 1540 [m_render_encoder setRenderPipelineState:m_current_pipeline->GetPipelineState()]; 1541 if (m_current_cull_mode != MTLCullModeNone) 1542 [m_render_encoder setCullMode:m_current_cull_mode]; 1543 if (depth != m_current_depth_state) 1544 [m_render_encoder setDepthStencilState:m_current_depth_state]; 1545 if (set_vp) 1546 SetViewportInRenderEncoder(); 1547 if (set_scissor) 1548 SetScissorInRenderEncoder(); 1549 } 1550 } 1551 1552 void MetalDevice::InvalidateRenderTarget(GPUTexture* t) 1553 { 1554 GPUDevice::InvalidateRenderTarget(t); 1555 if (InRenderPass() && (t->IsRenderTarget() ? IsRenderTargetBound(t) : (m_current_depth_target == t))) 1556 EndRenderPass(); 1557 } 1558 1559 void MetalDevice::CommitClear(MetalTexture* tex) 1560 { 1561 if (tex->GetState() == GPUTexture::State::Cleared) 1562 { 1563 DebugAssert(tex->IsRenderTargetOrDepthStencil()); 1564 tex->SetState(GPUTexture::State::Dirty); 1565 1566 // TODO: We could combine it with the current render pass. 1567 if (InRenderPass()) 1568 EndRenderPass(); 1569 1570 @autoreleasepool 1571 { 1572 // Allocating here seems a bit sad. 1573 MTLRenderPassDescriptor* desc = [MTLRenderPassDescriptor renderPassDescriptor]; 1574 desc.renderTargetWidth = tex->GetWidth(); 1575 desc.renderTargetHeight = tex->GetHeight(); 1576 if (tex->IsRenderTarget()) 1577 { 1578 const auto cc = tex->GetUNormClearColor(); 1579 desc.colorAttachments[0].texture = tex->GetMTLTexture(); 1580 desc.colorAttachments[0].loadAction = MTLLoadActionClear; 1581 desc.colorAttachments[0].storeAction = MTLStoreActionStore; 1582 desc.colorAttachments[0].clearColor = MTLClearColorMake(cc[0], cc[1], cc[2], cc[3]); 1583 } 1584 else 1585 { 1586 desc.depthAttachment.texture = tex->GetMTLTexture(); 1587 desc.depthAttachment.loadAction = MTLLoadActionClear; 1588 desc.depthAttachment.storeAction = MTLStoreActionStore; 1589 desc.depthAttachment.clearDepth = tex->GetClearDepth(); 1590 } 1591 1592 id<MTLRenderCommandEncoder> encoder = [m_render_cmdbuf renderCommandEncoderWithDescriptor:desc]; 1593 [encoder endEncoding]; 1594 } 1595 } 1596 } 1597 1598 MetalDevice::ClearPipelineConfig MetalDevice::GetCurrentClearPipelineConfig() const 1599 { 1600 ClearPipelineConfig config = {}; 1601 for (u32 i = 0; i < m_num_current_render_targets; i++) 1602 config.color_formats[i] = m_current_render_targets[i]->GetFormat(); 1603 1604 config.depth_format = m_current_depth_target ? m_current_depth_target->GetFormat() : GPUTexture::Format::Unknown; 1605 config.samples = 1606 m_current_depth_target ? m_current_depth_target->GetSamples() : m_current_render_targets[0]->GetSamples(); 1607 return config; 1608 } 1609 1610 id<MTLRenderPipelineState> MetalDevice::GetClearDepthPipeline(const ClearPipelineConfig& config) 1611 { 1612 const auto iter = std::find_if(m_clear_pipelines.begin(), m_clear_pipelines.end(), 1613 [&config](const auto& it) { return (it.first == config); }); 1614 if (iter != m_clear_pipelines.end()) 1615 return iter->second; 1616 1617 MTLRenderPipelineDescriptor* desc = [[MTLRenderPipelineDescriptor new] autorelease]; 1618 desc.vertexFunction = [GetFunctionFromLibrary(m_shaders, @"depthClearVertex") autorelease]; 1619 desc.fragmentFunction = [GetFunctionFromLibrary(m_shaders, @"depthClearFragment") autorelease]; 1620 1621 for (u32 i = 0; i < MAX_RENDER_TARGETS; i++) 1622 { 1623 if (config.color_formats[i] == GPUTexture::Format::Unknown) 1624 break; 1625 desc.colorAttachments[i].pixelFormat = s_pixel_format_mapping[static_cast<u8>(config.color_formats[i])]; 1626 desc.colorAttachments[i].writeMask = MTLColorWriteMaskNone; 1627 } 1628 desc.depthAttachmentPixelFormat = s_pixel_format_mapping[static_cast<u8>(config.depth_format)]; 1629 desc.rasterizationEnabled = TRUE; 1630 desc.inputPrimitiveTopology = MTLPrimitiveTopologyClassTriangle; 1631 desc.rasterSampleCount = config.samples; 1632 desc.vertexBuffers[0].mutability = MTLMutabilityImmutable; 1633 1634 NSError* error = nullptr; 1635 id<MTLRenderPipelineState> pipeline = [m_device newRenderPipelineStateWithDescriptor:desc error:&error]; 1636 if (pipeline == nil) 1637 LogNSError(error, "Failed to create clear render pipeline state"); 1638 1639 m_clear_pipelines.emplace_back(config, pipeline); 1640 return pipeline; 1641 } 1642 1643 MetalTextureBuffer::MetalTextureBuffer(Format format, u32 size_in_elements) : GPUTextureBuffer(format, size_in_elements) 1644 { 1645 } 1646 1647 MetalTextureBuffer::~MetalTextureBuffer() 1648 { 1649 if (m_buffer.IsValid()) 1650 MetalDevice::GetInstance().UnbindTextureBuffer(this); 1651 m_buffer.Destroy(); 1652 } 1653 1654 bool MetalTextureBuffer::CreateBuffer(id<MTLDevice> device) 1655 { 1656 return m_buffer.Create(device, GetSizeInBytes()); 1657 } 1658 1659 void* MetalTextureBuffer::Map(u32 required_elements) 1660 { 1661 const u32 esize = GetElementSize(m_format); 1662 const u32 req_size = esize * required_elements; 1663 if (!m_buffer.ReserveMemory(req_size, esize)) 1664 { 1665 MetalDevice::GetInstance().SubmitCommandBufferAndRestartRenderPass("out of space in texture buffer"); 1666 if (!m_buffer.ReserveMemory(req_size, esize)) 1667 Panic("Failed to allocate texture buffer space."); 1668 } 1669 1670 m_current_position = m_buffer.GetCurrentOffset() / esize; 1671 return m_buffer.GetCurrentHostPointer(); 1672 } 1673 1674 void MetalTextureBuffer::Unmap(u32 used_elements) 1675 { 1676 const u32 size = GetElementSize(m_format) * used_elements; 1677 GPUDevice::GetStatistics().buffer_streamed += size; 1678 GPUDevice::GetStatistics().num_uploads++; 1679 m_buffer.CommitMemory(size); 1680 } 1681 1682 void MetalTextureBuffer::SetDebugName(std::string_view name) 1683 { 1684 @autoreleasepool 1685 { 1686 [m_buffer.GetBuffer() setLabel:StringViewToNSString(name)]; 1687 } 1688 } 1689 1690 std::unique_ptr<GPUTextureBuffer> MetalDevice::CreateTextureBuffer(GPUTextureBuffer::Format format, 1691 u32 size_in_elements) 1692 { 1693 std::unique_ptr<MetalTextureBuffer> tb = std::make_unique<MetalTextureBuffer>(format, size_in_elements); 1694 if (!tb->CreateBuffer(m_device)) 1695 tb.reset(); 1696 1697 return tb; 1698 } 1699 1700 void MetalDevice::PushDebugGroup(const char* name) 1701 { 1702 } 1703 1704 void MetalDevice::PopDebugGroup() 1705 { 1706 } 1707 1708 void MetalDevice::InsertDebugMessage(const char* msg) 1709 { 1710 } 1711 1712 void MetalDevice::MapVertexBuffer(u32 vertex_size, u32 vertex_count, void** map_ptr, u32* map_space, 1713 u32* map_base_vertex) 1714 { 1715 const u32 req_size = vertex_size * vertex_count; 1716 if (!m_vertex_buffer.ReserveMemory(req_size, vertex_size)) 1717 { 1718 SubmitCommandBufferAndRestartRenderPass("out of vertex space"); 1719 if (!m_vertex_buffer.ReserveMemory(req_size, vertex_size)) 1720 Panic("Failed to allocate vertex space"); 1721 } 1722 1723 *map_ptr = m_vertex_buffer.GetCurrentHostPointer(); 1724 *map_space = m_vertex_buffer.GetCurrentSpace() / vertex_size; 1725 *map_base_vertex = m_vertex_buffer.GetCurrentOffset() / vertex_size; 1726 } 1727 1728 void MetalDevice::UnmapVertexBuffer(u32 vertex_size, u32 vertex_count) 1729 { 1730 const u32 size = vertex_size * vertex_count; 1731 s_stats.buffer_streamed += size; 1732 m_vertex_buffer.CommitMemory(size); 1733 } 1734 1735 void MetalDevice::MapIndexBuffer(u32 index_count, DrawIndex** map_ptr, u32* map_space, u32* map_base_index) 1736 { 1737 const u32 req_size = sizeof(DrawIndex) * index_count; 1738 if (!m_index_buffer.ReserveMemory(req_size, sizeof(DrawIndex))) 1739 { 1740 SubmitCommandBufferAndRestartRenderPass("out of index space"); 1741 if (!m_index_buffer.ReserveMemory(req_size, sizeof(DrawIndex))) 1742 Panic("Failed to allocate index space"); 1743 } 1744 1745 *map_ptr = reinterpret_cast<DrawIndex*>(m_index_buffer.GetCurrentHostPointer()); 1746 *map_space = m_index_buffer.GetCurrentSpace() / sizeof(DrawIndex); 1747 *map_base_index = m_index_buffer.GetCurrentOffset() / sizeof(DrawIndex); 1748 } 1749 1750 void MetalDevice::UnmapIndexBuffer(u32 used_index_count) 1751 { 1752 const u32 size = sizeof(DrawIndex) * used_index_count; 1753 s_stats.buffer_streamed += size; 1754 m_index_buffer.CommitMemory(size); 1755 } 1756 1757 void MetalDevice::PushUniformBuffer(const void* data, u32 data_size) 1758 { 1759 s_stats.buffer_streamed += data_size; 1760 void* map = MapUniformBuffer(data_size); 1761 std::memcpy(map, data, data_size); 1762 UnmapUniformBuffer(data_size); 1763 } 1764 1765 void* MetalDevice::MapUniformBuffer(u32 size) 1766 { 1767 const u32 used_space = Common::AlignUpPow2(size, UNIFORM_BUFFER_ALIGNMENT); 1768 if (!m_uniform_buffer.ReserveMemory(used_space, UNIFORM_BUFFER_ALIGNMENT)) 1769 { 1770 SubmitCommandBufferAndRestartRenderPass("out of uniform space"); 1771 if (!m_uniform_buffer.ReserveMemory(used_space, UNIFORM_BUFFER_ALIGNMENT)) 1772 Panic("Failed to allocate uniform space."); 1773 } 1774 1775 return m_uniform_buffer.GetCurrentHostPointer(); 1776 } 1777 1778 void MetalDevice::UnmapUniformBuffer(u32 size) 1779 { 1780 s_stats.buffer_streamed += size; 1781 m_current_uniform_buffer_position = m_uniform_buffer.GetCurrentOffset(); 1782 m_uniform_buffer.CommitMemory(size); 1783 if (InRenderPass()) 1784 { 1785 [m_render_encoder setVertexBufferOffset:m_current_uniform_buffer_position atIndex:0]; 1786 [m_render_encoder setFragmentBufferOffset:m_current_uniform_buffer_position atIndex:0]; 1787 } 1788 } 1789 1790 void MetalDevice::SetRenderTargets(GPUTexture* const* rts, u32 num_rts, GPUTexture* ds, 1791 GPUPipeline::RenderPassFlag feedback_loop) 1792 { 1793 bool changed = (m_num_current_render_targets != num_rts || m_current_depth_target != ds || 1794 (!m_features.framebuffer_fetch && ((feedback_loop & GPUPipeline::ColorFeedbackLoop) != 1795 (m_current_feedback_loop & GPUPipeline::ColorFeedbackLoop)))); 1796 bool needs_ds_clear = (ds && ds->IsClearedOrInvalidated()); 1797 bool needs_rt_clear = false; 1798 1799 m_current_depth_target = static_cast<MetalTexture*>(ds); 1800 for (u32 i = 0; i < num_rts; i++) 1801 { 1802 MetalTexture* const RT = static_cast<MetalTexture*>(rts[i]); 1803 changed |= m_current_render_targets[i] != RT; 1804 m_current_render_targets[i] = RT; 1805 needs_rt_clear |= RT->IsClearedOrInvalidated(); 1806 } 1807 for (u32 i = num_rts; i < m_num_current_render_targets; i++) 1808 m_current_render_targets[i] = nullptr; 1809 m_num_current_render_targets = static_cast<u8>(num_rts); 1810 m_current_feedback_loop = feedback_loop; 1811 1812 if (changed || needs_rt_clear || needs_ds_clear) 1813 { 1814 if (InRenderPass()) 1815 EndRenderPass(); 1816 } 1817 } 1818 1819 void MetalDevice::SetPipeline(GPUPipeline* pipeline) 1820 { 1821 DebugAssert(pipeline); 1822 if (m_current_pipeline == pipeline) 1823 return; 1824 1825 m_current_pipeline = static_cast<MetalPipeline*>(pipeline); 1826 if (InRenderPass()) 1827 { 1828 [m_render_encoder setRenderPipelineState:m_current_pipeline->GetPipelineState()]; 1829 1830 if (m_current_depth_state != m_current_pipeline->GetDepthState()) 1831 { 1832 m_current_depth_state = m_current_pipeline->GetDepthState(); 1833 [m_render_encoder setDepthStencilState:m_current_depth_state]; 1834 } 1835 if (m_current_cull_mode != m_current_pipeline->GetCullMode()) 1836 { 1837 m_current_cull_mode = m_current_pipeline->GetCullMode(); 1838 [m_render_encoder setCullMode:m_current_cull_mode]; 1839 } 1840 } 1841 else 1842 { 1843 // Still need to set depth state before the draw begins. 1844 m_current_depth_state = m_current_pipeline->GetDepthState(); 1845 m_current_cull_mode = m_current_pipeline->GetCullMode(); 1846 } 1847 } 1848 1849 void MetalDevice::UnbindPipeline(MetalPipeline* pl) 1850 { 1851 if (m_current_pipeline != pl) 1852 return; 1853 1854 m_current_pipeline = nullptr; 1855 m_current_depth_state = nil; 1856 } 1857 1858 void MetalDevice::SetTextureSampler(u32 slot, GPUTexture* texture, GPUSampler* sampler) 1859 { 1860 DebugAssert(slot < MAX_TEXTURE_SAMPLERS); 1861 1862 id<MTLTexture> T = texture ? static_cast<MetalTexture*>(texture)->GetMTLTexture() : nil; 1863 if (texture) 1864 { 1865 CommitClear(static_cast<MetalTexture*>(texture)); 1866 static_cast<MetalTexture*>(texture)->SetUseFenceCounter(m_current_fence_counter); 1867 } 1868 1869 if (m_current_textures[slot] != T) 1870 { 1871 m_current_textures[slot] = T; 1872 if (InRenderPass()) 1873 [m_render_encoder setFragmentTexture:T atIndex:slot]; 1874 } 1875 1876 id<MTLSamplerState> S = sampler ? static_cast<MetalSampler*>(sampler)->GetSamplerState() : nil; 1877 if (m_current_samplers[slot] != S) 1878 { 1879 m_current_samplers[slot] = S; 1880 if (InRenderPass()) 1881 [m_render_encoder setFragmentSamplerState:S atIndex:slot]; 1882 } 1883 } 1884 1885 void MetalDevice::SetTextureBuffer(u32 slot, GPUTextureBuffer* buffer) 1886 { 1887 id<MTLBuffer> B = buffer ? static_cast<MetalTextureBuffer*>(buffer)->GetMTLBuffer() : nil; 1888 if (m_current_ssbo == B) 1889 return; 1890 1891 m_current_ssbo = B; 1892 if (InRenderPass()) 1893 [m_render_encoder setFragmentBuffer:B offset:0 atIndex:1]; 1894 } 1895 1896 void MetalDevice::UnbindTexture(MetalTexture* tex) 1897 { 1898 const id<MTLTexture> T = tex->GetMTLTexture(); 1899 for (u32 i = 0; i < MAX_TEXTURE_SAMPLERS; i++) 1900 { 1901 if (m_current_textures[i] == T) 1902 { 1903 m_current_textures[i] = nil; 1904 if (InRenderPass()) 1905 [m_render_encoder setFragmentTexture:nil atIndex:i]; 1906 } 1907 } 1908 1909 if (tex->IsRenderTarget()) 1910 { 1911 for (u32 i = 0; i < m_num_current_render_targets; i++) 1912 { 1913 if (m_current_render_targets[i] == tex) 1914 { 1915 WARNING_LOG("Unbinding current RT"); 1916 SetRenderTargets(nullptr, 0, m_current_depth_target, GPUPipeline::NoRenderPassFlags); // TODO: Wrong 1917 break; 1918 } 1919 } 1920 } 1921 else if (tex->IsDepthStencil()) 1922 { 1923 if (m_current_depth_target == tex) 1924 { 1925 WARNING_LOG("Unbinding current DS"); 1926 SetRenderTargets(nullptr, 0, nullptr, GPUPipeline::NoRenderPassFlags); 1927 } 1928 } 1929 } 1930 1931 void MetalDevice::UnbindTextureBuffer(MetalTextureBuffer* buf) 1932 { 1933 if (m_current_ssbo != buf->GetMTLBuffer()) 1934 return; 1935 1936 m_current_ssbo = nil; 1937 if (InRenderPass()) 1938 [m_render_encoder setFragmentBuffer:nil offset:0 atIndex:1]; 1939 } 1940 1941 void MetalDevice::SetViewport(const GSVector4i rc) 1942 { 1943 if (m_current_viewport.eq(rc)) 1944 return; 1945 1946 m_current_viewport = rc; 1947 1948 if (InRenderPass()) 1949 SetViewportInRenderEncoder(); 1950 } 1951 1952 void MetalDevice::SetScissor(const GSVector4i rc) 1953 { 1954 if (m_current_scissor.eq(rc)) 1955 return; 1956 1957 m_current_scissor = rc; 1958 1959 if (InRenderPass()) 1960 SetScissorInRenderEncoder(); 1961 } 1962 1963 void MetalDevice::BeginRenderPass() 1964 { 1965 DebugAssert(m_render_encoder == nil); 1966 1967 // Inline writes :( 1968 if (m_inline_upload_encoder != nil) 1969 { 1970 [m_inline_upload_encoder endEncoding]; 1971 [m_inline_upload_encoder release]; 1972 m_inline_upload_encoder = nil; 1973 } 1974 1975 s_stats.num_render_passes++; 1976 1977 @autoreleasepool 1978 { 1979 MTLRenderPassDescriptor* desc = [MTLRenderPassDescriptor renderPassDescriptor]; 1980 if (m_num_current_render_targets == 0 && !m_current_depth_target) 1981 { 1982 // Rendering to view, but we got interrupted... 1983 desc.colorAttachments[0].texture = [m_layer_drawable texture]; 1984 desc.colorAttachments[0].loadAction = MTLLoadActionLoad; 1985 } 1986 else 1987 { 1988 for (u32 i = 0; i < m_num_current_render_targets; i++) 1989 { 1990 MetalTexture* const RT = m_current_render_targets[i]; 1991 desc.colorAttachments[i].texture = RT->GetMTLTexture(); 1992 desc.colorAttachments[i].storeAction = MTLStoreActionStore; 1993 RT->SetUseFenceCounter(m_current_fence_counter); 1994 1995 switch (RT->GetState()) 1996 { 1997 case GPUTexture::State::Cleared: 1998 { 1999 const auto clear_color = RT->GetUNormClearColor(); 2000 desc.colorAttachments[i].loadAction = MTLLoadActionClear; 2001 desc.colorAttachments[i].clearColor = 2002 MTLClearColorMake(clear_color[0], clear_color[1], clear_color[2], clear_color[3]); 2003 RT->SetState(GPUTexture::State::Dirty); 2004 } 2005 break; 2006 2007 case GPUTexture::State::Invalidated: 2008 { 2009 desc.colorAttachments[i].loadAction = MTLLoadActionDontCare; 2010 RT->SetState(GPUTexture::State::Dirty); 2011 } 2012 break; 2013 2014 case GPUTexture::State::Dirty: 2015 { 2016 desc.colorAttachments[i].loadAction = MTLLoadActionLoad; 2017 } 2018 break; 2019 2020 default: 2021 UnreachableCode(); 2022 break; 2023 } 2024 } 2025 2026 if (MetalTexture* DS = m_current_depth_target) 2027 { 2028 desc.depthAttachment.texture = m_current_depth_target->GetMTLTexture(); 2029 desc.depthAttachment.storeAction = MTLStoreActionStore; 2030 DS->SetUseFenceCounter(m_current_fence_counter); 2031 2032 switch (DS->GetState()) 2033 { 2034 case GPUTexture::State::Cleared: 2035 { 2036 desc.depthAttachment.loadAction = MTLLoadActionClear; 2037 desc.depthAttachment.clearDepth = DS->GetClearDepth(); 2038 DS->SetState(GPUTexture::State::Dirty); 2039 } 2040 break; 2041 2042 case GPUTexture::State::Invalidated: 2043 { 2044 desc.depthAttachment.loadAction = MTLLoadActionDontCare; 2045 DS->SetState(GPUTexture::State::Dirty); 2046 } 2047 break; 2048 2049 case GPUTexture::State::Dirty: 2050 { 2051 desc.depthAttachment.loadAction = MTLLoadActionLoad; 2052 } 2053 break; 2054 2055 default: 2056 UnreachableCode(); 2057 break; 2058 } 2059 } 2060 } 2061 2062 m_render_encoder = [[m_render_cmdbuf renderCommandEncoderWithDescriptor:desc] retain]; 2063 SetInitialEncoderState(); 2064 } 2065 } 2066 2067 void MetalDevice::EndRenderPass() 2068 { 2069 DebugAssert(InRenderPass() && !IsInlineUploading()); 2070 [m_render_encoder endEncoding]; 2071 [m_render_encoder release]; 2072 m_render_encoder = nil; 2073 } 2074 2075 void MetalDevice::EndInlineUploading() 2076 { 2077 DebugAssert(IsInlineUploading() && !InRenderPass()); 2078 [m_inline_upload_encoder endEncoding]; 2079 [m_inline_upload_encoder release]; 2080 m_inline_upload_encoder = nil; 2081 } 2082 2083 void MetalDevice::EndAnyEncoding() 2084 { 2085 if (InRenderPass()) 2086 EndRenderPass(); 2087 else if (IsInlineUploading()) 2088 EndInlineUploading(); 2089 } 2090 2091 void MetalDevice::SetInitialEncoderState() 2092 { 2093 // Set initial state. 2094 // TODO: avoid uniform set here? it's probably going to get changed... 2095 // Might be better off just deferring all the init until the first draw... 2096 [m_render_encoder setVertexBuffer:m_uniform_buffer.GetBuffer() offset:m_current_uniform_buffer_position atIndex:0]; 2097 [m_render_encoder setFragmentBuffer:m_uniform_buffer.GetBuffer() offset:m_current_uniform_buffer_position atIndex:0]; 2098 [m_render_encoder setVertexBuffer:m_vertex_buffer.GetBuffer() offset:0 atIndex:1]; 2099 [m_render_encoder setCullMode:m_current_cull_mode]; 2100 if (m_current_depth_state != nil) 2101 [m_render_encoder setDepthStencilState:m_current_depth_state]; 2102 if (m_current_pipeline != nil) 2103 [m_render_encoder setRenderPipelineState:m_current_pipeline->GetPipelineState()]; 2104 [m_render_encoder setFragmentTextures:m_current_textures.data() withRange:NSMakeRange(0, MAX_TEXTURE_SAMPLERS)]; 2105 [m_render_encoder setFragmentSamplerStates:m_current_samplers.data() withRange:NSMakeRange(0, MAX_TEXTURE_SAMPLERS)]; 2106 if (m_current_ssbo) 2107 [m_render_encoder setFragmentBuffer:m_current_ssbo offset:0 atIndex:1]; 2108 2109 if (!m_features.framebuffer_fetch && (m_current_feedback_loop & GPUPipeline::ColorFeedbackLoop)) 2110 { 2111 DebugAssert(m_current_render_targets[0]); 2112 [m_render_encoder setFragmentTexture:m_current_render_targets[0]->GetMTLTexture() atIndex:MAX_TEXTURE_SAMPLERS]; 2113 } 2114 2115 SetViewportInRenderEncoder(); 2116 SetScissorInRenderEncoder(); 2117 } 2118 2119 void MetalDevice::SetViewportInRenderEncoder() 2120 { 2121 const GSVector4i rc = ClampToFramebufferSize(m_current_viewport); 2122 [m_render_encoder 2123 setViewport:(MTLViewport){static_cast<double>(rc.left), static_cast<double>(rc.top), 2124 static_cast<double>(rc.width()), static_cast<double>(rc.height()), 0.0, 1.0}]; 2125 } 2126 2127 void MetalDevice::SetScissorInRenderEncoder() 2128 { 2129 const GSVector4i rc = ClampToFramebufferSize(m_current_scissor); 2130 [m_render_encoder 2131 setScissorRect:(MTLScissorRect){static_cast<NSUInteger>(rc.left), static_cast<NSUInteger>(rc.top), 2132 static_cast<NSUInteger>(rc.width()), static_cast<NSUInteger>(rc.height())}]; 2133 } 2134 2135 GSVector4i MetalDevice::ClampToFramebufferSize(const GSVector4i rc) const 2136 { 2137 const MetalTexture* rt_or_ds = 2138 (m_num_current_render_targets > 0) ? m_current_render_targets[0] : m_current_depth_target; 2139 const s32 clamp_width = rt_or_ds ? rt_or_ds->GetWidth() : m_window_info.surface_width; 2140 const s32 clamp_height = rt_or_ds ? rt_or_ds->GetHeight() : m_window_info.surface_height; 2141 return rc.rintersect(GSVector4i(0, 0, clamp_width, clamp_height)); 2142 } 2143 2144 void MetalDevice::PreDrawCheck() 2145 { 2146 if (!InRenderPass()) 2147 BeginRenderPass(); 2148 } 2149 2150 void MetalDevice::Draw(u32 vertex_count, u32 base_vertex) 2151 { 2152 PreDrawCheck(); 2153 s_stats.num_draws++; 2154 [m_render_encoder drawPrimitives:m_current_pipeline->GetPrimitive() vertexStart:base_vertex vertexCount:vertex_count]; 2155 } 2156 2157 void MetalDevice::DrawIndexed(u32 index_count, u32 base_index, u32 base_vertex) 2158 { 2159 PreDrawCheck(); 2160 2161 s_stats.num_draws++; 2162 2163 const u32 index_offset = base_index * sizeof(u16); 2164 [m_render_encoder drawIndexedPrimitives:m_current_pipeline->GetPrimitive() 2165 indexCount:index_count 2166 indexType:MTLIndexTypeUInt16 2167 indexBuffer:m_index_buffer.GetBuffer() 2168 indexBufferOffset:index_offset 2169 instanceCount:1 2170 baseVertex:base_vertex 2171 baseInstance:0]; 2172 } 2173 2174 void MetalDevice::DrawIndexedWithBarrier(u32 index_count, u32 base_index, u32 base_vertex, DrawBarrier type) 2175 { 2176 // Shouldn't be using this with framebuffer fetch. 2177 DebugAssert(!m_features.framebuffer_fetch); 2178 2179 const bool skip_first_barrier = !InRenderPass(); 2180 PreDrawCheck(); 2181 2182 // TODO: The first barrier is unnecessary if we're starting the render pass. 2183 2184 u32 index_offset = base_index * sizeof(u16); 2185 2186 switch (type) 2187 { 2188 case GPUDevice::DrawBarrier::None: 2189 { 2190 s_stats.num_draws++; 2191 2192 [m_render_encoder drawIndexedPrimitives:m_current_pipeline->GetPrimitive() 2193 indexCount:index_count 2194 indexType:MTLIndexTypeUInt16 2195 indexBuffer:m_index_buffer.GetBuffer() 2196 indexBufferOffset:index_offset 2197 instanceCount:1 2198 baseVertex:base_vertex 2199 baseInstance:0]; 2200 } 2201 break; 2202 2203 case GPUDevice::DrawBarrier::One: 2204 { 2205 DebugAssert(m_num_current_render_targets == 1); 2206 s_stats.num_draws++; 2207 2208 if (!skip_first_barrier) 2209 { 2210 s_stats.num_barriers++; 2211 [m_render_encoder memoryBarrierWithScope:MTLBarrierScopeRenderTargets 2212 afterStages:MTLRenderStageFragment 2213 beforeStages:MTLRenderStageFragment]; 2214 } 2215 2216 [m_render_encoder drawIndexedPrimitives:m_current_pipeline->GetPrimitive() 2217 indexCount:index_count 2218 indexType:MTLIndexTypeUInt16 2219 indexBuffer:m_index_buffer.GetBuffer() 2220 indexBufferOffset:index_offset 2221 instanceCount:1 2222 baseVertex:base_vertex 2223 baseInstance:0]; 2224 } 2225 break; 2226 2227 case GPUDevice::DrawBarrier::Full: 2228 { 2229 DebugAssert(m_num_current_render_targets == 1); 2230 2231 static constexpr const u8 vertices_per_primitive[][2] = { 2232 {1, 1}, // MTLPrimitiveTypePoint 2233 {2, 2}, // MTLPrimitiveTypeLine 2234 {2, 1}, // MTLPrimitiveTypeLineStrip 2235 {3, 3}, // MTLPrimitiveTypeTriangle 2236 {3, 1}, // MTLPrimitiveTypeTriangleStrip 2237 }; 2238 2239 const u32 first_step = 2240 vertices_per_primitive[static_cast<size_t>(m_current_pipeline->GetPrimitive())][0] * sizeof(u16); 2241 const u32 index_step = 2242 vertices_per_primitive[static_cast<size_t>(m_current_pipeline->GetPrimitive())][1] * sizeof(u16); 2243 const u32 end_offset = (base_index + index_count) * sizeof(u16); 2244 2245 // first primitive 2246 if (!skip_first_barrier) 2247 { 2248 s_stats.num_barriers++; 2249 [m_render_encoder memoryBarrierWithScope:MTLBarrierScopeRenderTargets 2250 afterStages:MTLRenderStageFragment 2251 beforeStages:MTLRenderStageFragment]; 2252 } 2253 s_stats.num_draws++; 2254 [m_render_encoder drawIndexedPrimitives:m_current_pipeline->GetPrimitive() 2255 indexCount:index_count 2256 indexType:MTLIndexTypeUInt16 2257 indexBuffer:m_index_buffer.GetBuffer() 2258 indexBufferOffset:index_offset 2259 instanceCount:1 2260 baseVertex:base_vertex 2261 baseInstance:0]; 2262 2263 index_offset += first_step; 2264 2265 // remaining primitices 2266 for (; index_offset < end_offset; index_offset += index_step) 2267 { 2268 s_stats.num_barriers++; 2269 s_stats.num_draws++; 2270 2271 [m_render_encoder memoryBarrierWithScope:MTLBarrierScopeRenderTargets 2272 afterStages:MTLRenderStageFragment 2273 beforeStages:MTLRenderStageFragment]; 2274 [m_render_encoder drawIndexedPrimitives:m_current_pipeline->GetPrimitive() 2275 indexCount:index_count 2276 indexType:MTLIndexTypeUInt16 2277 indexBuffer:m_index_buffer.GetBuffer() 2278 indexBufferOffset:index_offset 2279 instanceCount:1 2280 baseVertex:base_vertex 2281 baseInstance:0]; 2282 } 2283 } 2284 break; 2285 2286 DefaultCaseIsUnreachable(); 2287 } 2288 } 2289 2290 id<MTLBlitCommandEncoder> MetalDevice::GetBlitEncoder(bool is_inline) 2291 { 2292 @autoreleasepool 2293 { 2294 if (!is_inline) 2295 { 2296 if (!m_upload_cmdbuf) 2297 { 2298 m_upload_cmdbuf = [[m_queue commandBufferWithUnretainedReferences] retain]; 2299 m_upload_encoder = [[m_upload_cmdbuf blitCommandEncoder] retain]; 2300 [m_upload_encoder setLabel:@"Upload Encoder"]; 2301 } 2302 return m_upload_encoder; 2303 } 2304 2305 // Interleaved with draws. 2306 if (m_inline_upload_encoder != nil) 2307 return m_inline_upload_encoder; 2308 2309 if (InRenderPass()) 2310 EndRenderPass(); 2311 m_inline_upload_encoder = [[m_render_cmdbuf blitCommandEncoder] retain]; 2312 return m_inline_upload_encoder; 2313 } 2314 } 2315 2316 bool MetalDevice::BeginPresent(bool skip_present, u32 clear_color) 2317 { 2318 @autoreleasepool 2319 { 2320 if (skip_present) 2321 return false; 2322 2323 if (m_layer == nil) 2324 { 2325 TrimTexturePool(); 2326 return false; 2327 } 2328 2329 EndAnyEncoding(); 2330 2331 m_layer_drawable = [[m_layer nextDrawable] retain]; 2332 if (m_layer_drawable == nil) 2333 { 2334 TrimTexturePool(); 2335 return false; 2336 } 2337 2338 SetViewportAndScissor(0, 0, m_window_info.surface_width, m_window_info.surface_height); 2339 2340 // Set up rendering to layer. 2341 const GSVector4 clear_color_v = GSVector4::rgba32(clear_color); 2342 id<MTLTexture> layer_texture = [m_layer_drawable texture]; 2343 m_layer_pass_desc.colorAttachments[0].texture = layer_texture; 2344 m_layer_pass_desc.colorAttachments[0].loadAction = MTLLoadActionClear; 2345 m_layer_pass_desc.colorAttachments[0].clearColor = MTLClearColorMake(clear_color_v.r, clear_color_v.g, clear_color_v.g, clear_color_v.a); 2346 m_render_encoder = [[m_render_cmdbuf renderCommandEncoderWithDescriptor:m_layer_pass_desc] retain]; 2347 s_stats.num_render_passes++; 2348 std::memset(m_current_render_targets.data(), 0, sizeof(m_current_render_targets)); 2349 m_num_current_render_targets = 0; 2350 m_current_feedback_loop = GPUPipeline::NoRenderPassFlags; 2351 m_current_depth_target = nullptr; 2352 m_current_pipeline = nullptr; 2353 m_current_depth_state = nil; 2354 SetInitialEncoderState(); 2355 return true; 2356 } 2357 } 2358 2359 void MetalDevice::EndPresent(bool explicit_present) 2360 { 2361 DebugAssert(!explicit_present); 2362 2363 // TODO: Explicit present 2364 DebugAssert(m_num_current_render_targets == 0 && !m_current_depth_target); 2365 EndAnyEncoding(); 2366 2367 [m_render_cmdbuf presentDrawable:m_layer_drawable]; 2368 DeferRelease(m_layer_drawable); 2369 m_layer_drawable = nil; 2370 SubmitCommandBuffer(); 2371 TrimTexturePool(); 2372 } 2373 2374 void MetalDevice::SubmitPresent() 2375 { 2376 Panic("Not supported by this API."); 2377 } 2378 2379 void MetalDevice::CreateCommandBuffer() 2380 { 2381 @autoreleasepool 2382 { 2383 DebugAssert(m_render_cmdbuf == nil); 2384 const u64 fence_counter = ++m_current_fence_counter; 2385 m_render_cmdbuf = [[m_queue commandBufferWithUnretainedReferences] retain]; 2386 [m_render_cmdbuf addCompletedHandler:[this, fence_counter](id<MTLCommandBuffer> buffer) { 2387 CommandBufferCompletedOffThread(buffer, fence_counter); 2388 }]; 2389 } 2390 2391 CleanupObjects(); 2392 } 2393 2394 void MetalDevice::CommandBufferCompletedOffThread(id<MTLCommandBuffer> buffer, u64 fence_counter) 2395 { 2396 std::unique_lock lock(m_fence_mutex); 2397 m_completed_fence_counter.store(std::max(m_completed_fence_counter.load(std::memory_order_acquire), fence_counter), 2398 std::memory_order_release); 2399 2400 if (m_gpu_timing_enabled) 2401 { 2402 const double begin = std::max(m_last_gpu_time_end, [buffer GPUStartTime]); 2403 const double end = [buffer GPUEndTime]; 2404 if (end > begin) 2405 { 2406 m_accumulated_gpu_time += end - begin; 2407 m_last_gpu_time_end = end; 2408 } 2409 } 2410 } 2411 2412 void MetalDevice::SubmitCommandBuffer(bool wait_for_completion) 2413 { 2414 if (m_upload_cmdbuf != nil) 2415 { 2416 [m_upload_encoder endEncoding]; 2417 [m_upload_encoder release]; 2418 m_upload_encoder = nil; 2419 [m_upload_cmdbuf commit]; 2420 [m_upload_cmdbuf release]; 2421 m_upload_cmdbuf = nil; 2422 } 2423 2424 if (m_render_cmdbuf != nil) 2425 { 2426 if (InRenderPass()) 2427 EndRenderPass(); 2428 else if (IsInlineUploading()) 2429 EndInlineUploading(); 2430 2431 [m_render_cmdbuf commit]; 2432 2433 if (wait_for_completion) 2434 [m_render_cmdbuf waitUntilCompleted]; 2435 2436 [m_render_cmdbuf release]; 2437 m_render_cmdbuf = nil; 2438 } 2439 2440 CreateCommandBuffer(); 2441 } 2442 2443 void MetalDevice::SubmitCommandBufferAndRestartRenderPass(const char* reason) 2444 { 2445 DEV_LOG("Submitting command buffer and restarting render pass due to {}", reason); 2446 2447 const bool in_render_pass = InRenderPass(); 2448 SubmitCommandBuffer(); 2449 if (in_render_pass) 2450 BeginRenderPass(); 2451 } 2452 2453 void MetalDevice::WaitForFenceCounter(u64 counter) 2454 { 2455 if (m_completed_fence_counter.load(std::memory_order_relaxed) >= counter) 2456 return; 2457 2458 // TODO: There has to be a better way to do this.. 2459 std::unique_lock lock(m_fence_mutex); 2460 while (m_completed_fence_counter.load(std::memory_order_acquire) < counter) 2461 { 2462 lock.unlock(); 2463 pthread_yield_np(); 2464 lock.lock(); 2465 } 2466 2467 CleanupObjects(); 2468 } 2469 2470 void MetalDevice::WaitForPreviousCommandBuffers() 2471 { 2472 // Early init? 2473 if (m_current_fence_counter == 0) 2474 return; 2475 2476 WaitForFenceCounter(m_current_fence_counter - 1); 2477 } 2478 2479 void MetalDevice::ExecuteAndWaitForGPUIdle() 2480 { 2481 SubmitCommandBuffer(true); 2482 CleanupObjects(); 2483 } 2484 2485 void MetalDevice::CleanupObjects() 2486 { 2487 const u64 counter = m_completed_fence_counter.load(std::memory_order_acquire); 2488 while (m_cleanup_objects.size() > 0 && m_cleanup_objects.front().first <= counter) 2489 { 2490 [m_cleanup_objects.front().second release]; 2491 m_cleanup_objects.pop_front(); 2492 } 2493 } 2494 2495 void MetalDevice::DeferRelease(id obj) 2496 { 2497 MetalDevice& dev = GetInstance(); 2498 dev.m_cleanup_objects.emplace_back(dev.m_current_fence_counter, obj); 2499 } 2500 2501 void MetalDevice::DeferRelease(u64 fence_counter, id obj) 2502 { 2503 MetalDevice& dev = GetInstance(); 2504 dev.m_cleanup_objects.emplace_back(fence_counter, obj); 2505 } 2506 2507 std::unique_ptr<GPUDevice> GPUDevice::WrapNewMetalDevice() 2508 { 2509 return std::unique_ptr<GPUDevice>(new MetalDevice()); 2510 } 2511 2512 GPUDevice::AdapterInfoList GPUDevice::WrapGetMetalAdapterList() 2513 { 2514 AdapterInfoList ret; 2515 @autoreleasepool 2516 { 2517 NSArray<id<MTLDevice>>* devices = [MTLCopyAllDevices() autorelease]; 2518 const u32 count = static_cast<u32>([devices count]); 2519 ret.reserve(count); 2520 for (u32 i = 0; i < count; i++) 2521 { 2522 AdapterInfo ai; 2523 ai.name = [devices[i].name UTF8String]; 2524 ai.max_texture_size = GetMetalMaxTextureSize(devices[i]); 2525 ai.max_multisamples = GetMetalMaxMultisamples(devices[i]); 2526 ai.supports_sample_shading = true; 2527 ret.push_back(std::move(ai)); 2528 } 2529 } 2530 2531 return ret; 2532 }