duckstation

duckstation, but archived from the revision just before upstream changed it to a proprietary software project, this version is the libre one
git clone https://git.neptards.moe/u3shit/duckstation.git
Log | Files | Refs | README | LICENSE

vulkan_device.cpp (154017B)


      1 // SPDX-FileCopyrightText: 2019-2024 Connor McLaughlin <stenzek@gmail.com>
      2 // SPDX-License-Identifier: (GPL-3.0 OR CC-BY-NC-ND-4.0)
      3 
      4 #include "vulkan_device.h"
      5 #include "vulkan_builders.h"
      6 #include "vulkan_pipeline.h"
      7 #include "vulkan_stream_buffer.h"
      8 #include "vulkan_swap_chain.h"
      9 #include "vulkan_texture.h"
     10 
     11 #include "core/host.h"
     12 
     13 #include "common/align.h"
     14 #include "common/assert.h"
     15 #include "common/bitutils.h"
     16 #include "common/error.h"
     17 #include "common/file_system.h"
     18 #include "common/log.h"
     19 #include "common/path.h"
     20 #include "common/scoped_guard.h"
     21 #include "common/small_string.h"
     22 
     23 #include "fmt/format.h"
     24 #include "xxhash.h"
     25 
     26 #include <cstdlib>
     27 #include <limits>
     28 #include <mutex>
     29 
     30 Log_SetChannel(VulkanDevice);
     31 
     32 // TODO: VK_KHR_display.
     33 
     34 #pragma pack(push, 4)
     35 struct VK_PIPELINE_CACHE_HEADER
     36 {
     37   u32 header_length;
     38   u32 header_version;
     39   u32 vendor_id;
     40   u32 device_id;
     41   u8 uuid[VK_UUID_SIZE];
     42 };
     43 #pragma pack(pop)
     44 
     45 static VkAttachmentLoadOp GetLoadOpForTexture(const GPUTexture* tex)
     46 {
     47   static constexpr VkAttachmentLoadOp ops[3] = {VK_ATTACHMENT_LOAD_OP_LOAD, VK_ATTACHMENT_LOAD_OP_CLEAR,
     48                                                 VK_ATTACHMENT_LOAD_OP_DONT_CARE};
     49   return ops[static_cast<u8>(tex->GetState())];
     50 }
     51 
     52 // Tweakables
     53 enum : u32
     54 {
     55   MAX_DRAW_CALLS_PER_FRAME = 2048,
     56   MAX_COMBINED_IMAGE_SAMPLER_DESCRIPTORS_PER_FRAME = GPUDevice::MAX_TEXTURE_SAMPLERS * MAX_DRAW_CALLS_PER_FRAME,
     57   MAX_INPUT_ATTACHMENT_DESCRIPTORS_PER_FRAME = MAX_DRAW_CALLS_PER_FRAME,
     58   MAX_DESCRIPTOR_SETS_PER_FRAME = MAX_DRAW_CALLS_PER_FRAME,
     59   MAX_SAMPLER_DESCRIPTORS = 8192,
     60 
     61   VERTEX_BUFFER_SIZE = 32 * 1024 * 1024,
     62   INDEX_BUFFER_SIZE = 16 * 1024 * 1024,
     63   VERTEX_UNIFORM_BUFFER_SIZE = 8 * 1024 * 1024,
     64   FRAGMENT_UNIFORM_BUFFER_SIZE = 8 * 1024 * 1024,
     65   TEXTURE_BUFFER_SIZE = 64 * 1024 * 1024,
     66 
     67   UNIFORM_PUSH_CONSTANTS_STAGES = VK_SHADER_STAGE_VERTEX_BIT | VK_SHADER_STAGE_FRAGMENT_BIT,
     68   UNIFORM_PUSH_CONSTANTS_SIZE = 128,
     69 
     70   MAX_UNIFORM_BUFFER_SIZE = 1024,
     71 };
     72 
     73 const std::array<VkFormat, static_cast<u32>(GPUTexture::Format::MaxCount)> VulkanDevice::TEXTURE_FORMAT_MAPPING = {
     74   VK_FORMAT_UNDEFINED,                // Unknown
     75   VK_FORMAT_R8G8B8A8_UNORM,           // RGBA8
     76   VK_FORMAT_B8G8R8A8_UNORM,           // BGRA8
     77   VK_FORMAT_R5G6B5_UNORM_PACK16,      // RGB565
     78   VK_FORMAT_R5G5B5A1_UNORM_PACK16,    // RGBA5551
     79   VK_FORMAT_R8_UNORM,                 // R8
     80   VK_FORMAT_D16_UNORM,                // D16
     81   VK_FORMAT_D24_UNORM_S8_UINT,        // D24S8
     82   VK_FORMAT_D32_SFLOAT,               // D32F
     83   VK_FORMAT_D32_SFLOAT_S8_UINT,       // D32FS8
     84   VK_FORMAT_R16_UNORM,                // R16
     85   VK_FORMAT_R16_SINT,                 // R16I
     86   VK_FORMAT_R16_UINT,                 // R16U
     87   VK_FORMAT_R16_SFLOAT,               // R16F
     88   VK_FORMAT_R32_SINT,                 // R32I
     89   VK_FORMAT_R32_UINT,                 // R32U
     90   VK_FORMAT_R32_SFLOAT,               // R32F
     91   VK_FORMAT_R8G8_UNORM,               // RG8
     92   VK_FORMAT_R16G16_UNORM,             // RG16
     93   VK_FORMAT_R16G16_SFLOAT,            // RG16F
     94   VK_FORMAT_R32G32_SFLOAT,            // RG32F
     95   VK_FORMAT_R16G16B16A16_UNORM,       // RGBA16
     96   VK_FORMAT_R16G16B16A16_SFLOAT,      // RGBA16F
     97   VK_FORMAT_R32G32B32A32_SFLOAT,      // RGBA32F
     98   VK_FORMAT_A2R10G10B10_UNORM_PACK32, // RGB10A2
     99 };
    100 
    101 // Handles are always 64-bit, even on 32-bit platforms.
    102 static const VkRenderPass DYNAMIC_RENDERING_RENDER_PASS = ((VkRenderPass) static_cast<s64>(-1LL));
    103 
    104 #ifdef _DEBUG
    105 static u32 s_debug_scope_depth = 0;
    106 #endif
    107 
    108 // We need to synchronize instance creation because of adapter enumeration from the UI thread.
    109 static std::mutex s_instance_mutex;
    110 
    111 VulkanDevice::VulkanDevice()
    112 {
    113 #ifdef _DEBUG
    114   s_debug_scope_depth = 0;
    115 #endif
    116 }
    117 
    118 VulkanDevice::~VulkanDevice()
    119 {
    120   Assert(m_device == VK_NULL_HANDLE);
    121 }
    122 
    123 GPUTexture::Format VulkanDevice::GetFormatForVkFormat(VkFormat format)
    124 {
    125   for (u32 i = 0; i < static_cast<u32>(std::size(TEXTURE_FORMAT_MAPPING)); i++)
    126   {
    127     if (TEXTURE_FORMAT_MAPPING[i] == format)
    128       return static_cast<GPUTexture::Format>(i);
    129   }
    130 
    131   return GPUTexture::Format::Unknown;
    132 }
    133 
    134 VkInstance VulkanDevice::CreateVulkanInstance(const WindowInfo& wi, OptionalExtensions* oe, bool enable_debug_utils,
    135                                               bool enable_validation_layer)
    136 {
    137   ExtensionList enabled_extensions;
    138   if (!SelectInstanceExtensions(&enabled_extensions, wi, oe, enable_debug_utils))
    139     return VK_NULL_HANDLE;
    140 
    141   u32 maxApiVersion = VK_API_VERSION_1_0;
    142   if (vkEnumerateInstanceVersion)
    143   {
    144     VkResult res = vkEnumerateInstanceVersion(&maxApiVersion);
    145     if (res != VK_SUCCESS)
    146     {
    147       LOG_VULKAN_ERROR(res, "vkEnumerateInstanceVersion() failed: ");
    148       maxApiVersion = VK_API_VERSION_1_0;
    149     }
    150   }
    151   else
    152   {
    153     WARNING_LOG("Driver does not provide vkEnumerateInstanceVersion().");
    154   }
    155 
    156   // Cap out at 1.1 for consistency.
    157   const u32 apiVersion = std::min(maxApiVersion, VK_API_VERSION_1_1);
    158   INFO_LOG("Supported instance version: {}.{}.{}, requesting version {}.{}.{}", VK_API_VERSION_MAJOR(maxApiVersion),
    159            VK_API_VERSION_MINOR(maxApiVersion), VK_API_VERSION_PATCH(maxApiVersion), VK_API_VERSION_MAJOR(apiVersion),
    160            VK_API_VERSION_MINOR(apiVersion), VK_API_VERSION_PATCH(apiVersion));
    161 
    162   // Remember to manually update this every release. We don't pull in svnrev.h here, because
    163   // it's only the major/minor version, and rebuilding the file every time something else changes
    164   // is unnecessary.
    165   VkApplicationInfo app_info = {};
    166   app_info.sType = VK_STRUCTURE_TYPE_APPLICATION_INFO;
    167   app_info.pNext = nullptr;
    168   app_info.pApplicationName = "DuckStation";
    169   app_info.applicationVersion = VK_MAKE_VERSION(0, 1, 0);
    170   app_info.pEngineName = "DuckStation";
    171   app_info.engineVersion = VK_MAKE_VERSION(0, 1, 0);
    172   app_info.apiVersion = apiVersion;
    173 
    174   VkInstanceCreateInfo instance_create_info = {};
    175   instance_create_info.sType = VK_STRUCTURE_TYPE_INSTANCE_CREATE_INFO;
    176   instance_create_info.pNext = nullptr;
    177   instance_create_info.flags = 0;
    178   instance_create_info.pApplicationInfo = &app_info;
    179   instance_create_info.enabledExtensionCount = static_cast<uint32_t>(enabled_extensions.size());
    180   instance_create_info.ppEnabledExtensionNames = enabled_extensions.data();
    181   instance_create_info.enabledLayerCount = 0;
    182   instance_create_info.ppEnabledLayerNames = nullptr;
    183 
    184   // Enable debug layer on debug builds
    185   if (enable_validation_layer)
    186   {
    187     static const char* layer_names[] = {"VK_LAYER_KHRONOS_validation"};
    188     instance_create_info.enabledLayerCount = 1;
    189     instance_create_info.ppEnabledLayerNames = layer_names;
    190   }
    191 
    192   VkInstance instance;
    193   VkResult res = vkCreateInstance(&instance_create_info, nullptr, &instance);
    194   if (res != VK_SUCCESS)
    195   {
    196     LOG_VULKAN_ERROR(res, "vkCreateInstance failed: ");
    197     return nullptr;
    198   }
    199 
    200   return instance;
    201 }
    202 
    203 bool VulkanDevice::SelectInstanceExtensions(ExtensionList* extension_list, const WindowInfo& wi, OptionalExtensions* oe,
    204                                             bool enable_debug_utils)
    205 {
    206   u32 extension_count = 0;
    207   VkResult res = vkEnumerateInstanceExtensionProperties(nullptr, &extension_count, nullptr);
    208   if (res != VK_SUCCESS)
    209   {
    210     LOG_VULKAN_ERROR(res, "vkEnumerateInstanceExtensionProperties failed: ");
    211     return false;
    212   }
    213 
    214   if (extension_count == 0)
    215   {
    216     ERROR_LOG("Vulkan: No extensions supported by instance.");
    217     return false;
    218   }
    219 
    220   std::vector<VkExtensionProperties> available_extension_list(extension_count);
    221   res = vkEnumerateInstanceExtensionProperties(nullptr, &extension_count, available_extension_list.data());
    222   DebugAssert(res == VK_SUCCESS);
    223 
    224   auto SupportsExtension = [&](const char* name, bool required) {
    225     if (std::find_if(available_extension_list.begin(), available_extension_list.end(),
    226                      [&](const VkExtensionProperties& properties) {
    227                        return !strcmp(name, properties.extensionName);
    228                      }) != available_extension_list.end())
    229     {
    230       DEV_LOG("Enabling extension: {}", name);
    231       extension_list->push_back(name);
    232       return true;
    233     }
    234 
    235     if (required)
    236       ERROR_LOG("Vulkan: Missing required extension {}.", name);
    237 
    238     return false;
    239   };
    240 
    241   // Common extensions
    242   if (wi.type != WindowInfo::Type::Surfaceless && !SupportsExtension(VK_KHR_SURFACE_EXTENSION_NAME, true))
    243     return false;
    244 
    245 #if defined(VK_USE_PLATFORM_WIN32_KHR)
    246   if (wi.type == WindowInfo::Type::Win32 && !SupportsExtension(VK_KHR_WIN32_SURFACE_EXTENSION_NAME, true))
    247     return false;
    248 #endif
    249 #if defined(VK_USE_PLATFORM_XLIB_KHR)
    250   if (wi.type == WindowInfo::Type::X11 && !SupportsExtension(VK_KHR_XLIB_SURFACE_EXTENSION_NAME, true))
    251     return false;
    252 #endif
    253 #if defined(VK_USE_PLATFORM_WAYLAND_KHR)
    254   if (wi.type == WindowInfo::Type::Wayland && !SupportsExtension(VK_KHR_WAYLAND_SURFACE_EXTENSION_NAME, true))
    255     return false;
    256 #endif
    257 #if defined(VK_USE_PLATFORM_METAL_EXT)
    258   if (wi.type == WindowInfo::Type::MacOS && !SupportsExtension(VK_EXT_METAL_SURFACE_EXTENSION_NAME, true))
    259     return false;
    260 #endif
    261 #if defined(VK_USE_PLATFORM_ANDROID_KHR)
    262   if (wi.type == WindowInfo::Type::Android && !SupportsExtension(VK_KHR_ANDROID_SURFACE_EXTENSION_NAME, true))
    263     return false;
    264 #endif
    265 
    266   // VK_EXT_debug_utils
    267   if (enable_debug_utils && !SupportsExtension(VK_EXT_DEBUG_UTILS_EXTENSION_NAME, false))
    268     WARNING_LOG("Vulkan: Debug report requested, but extension is not available.");
    269 
    270   // Needed for exclusive fullscreen control.
    271   SupportsExtension(VK_KHR_GET_SURFACE_CAPABILITIES_2_EXTENSION_NAME, false);
    272 
    273   oe->vk_ext_swapchain_maintenance1 =
    274     (wi.type != WindowInfo::Type::Surfaceless && SupportsExtension(VK_EXT_SURFACE_MAINTENANCE_1_EXTENSION_NAME, false));
    275   oe->vk_khr_get_physical_device_properties2 =
    276     SupportsExtension(VK_KHR_GET_PHYSICAL_DEVICE_PROPERTIES_2_EXTENSION_NAME, false);
    277 
    278   return true;
    279 }
    280 
    281 VulkanDevice::GPUList VulkanDevice::EnumerateGPUs(VkInstance instance)
    282 {
    283   GPUList gpus;
    284 
    285   u32 gpu_count = 0;
    286   VkResult res = vkEnumeratePhysicalDevices(instance, &gpu_count, nullptr);
    287   if ((res != VK_SUCCESS && res != VK_INCOMPLETE) || gpu_count == 0)
    288   {
    289     LOG_VULKAN_ERROR(res, "vkEnumeratePhysicalDevices (1) failed: ");
    290     return gpus;
    291   }
    292 
    293   std::vector<VkPhysicalDevice> physical_devices(gpu_count);
    294   res = vkEnumeratePhysicalDevices(instance, &gpu_count, physical_devices.data());
    295   if (res == VK_INCOMPLETE)
    296   {
    297     WARNING_LOG("First vkEnumeratePhysicalDevices() call returned {} devices, but second returned {}",
    298                 physical_devices.size(), gpu_count);
    299   }
    300   else if (res != VK_SUCCESS)
    301   {
    302     LOG_VULKAN_ERROR(res, "vkEnumeratePhysicalDevices (2) failed: ");
    303     return gpus;
    304   }
    305 
    306   // Maybe we lost a GPU?
    307   if (gpu_count < physical_devices.size())
    308     physical_devices.resize(gpu_count);
    309 
    310   gpus.reserve(physical_devices.size());
    311   for (VkPhysicalDevice device : physical_devices)
    312   {
    313     VkPhysicalDeviceProperties props = {};
    314     vkGetPhysicalDeviceProperties(device, &props);
    315 
    316     VkPhysicalDeviceFeatures available_features = {};
    317     vkGetPhysicalDeviceFeatures(device, &available_features);
    318 
    319     AdapterInfo ai;
    320     ai.name = props.deviceName;
    321     ai.max_texture_size = std::min(props.limits.maxFramebufferWidth, props.limits.maxImageDimension2D);
    322     ai.max_multisamples = GetMaxMultisamples(device, props);
    323     ai.supports_sample_shading = available_features.sampleRateShading;
    324 
    325     // handle duplicate adapter names
    326     if (std::any_of(gpus.begin(), gpus.end(), [&ai](const auto& other) { return (ai.name == other.second.name); }))
    327     {
    328       std::string original_adapter_name = std::move(ai.name);
    329 
    330       u32 current_extra = 2;
    331       do
    332       {
    333         ai.name = fmt::format("{} ({})", original_adapter_name, current_extra);
    334         current_extra++;
    335       } while (
    336         std::any_of(gpus.begin(), gpus.end(), [&ai](const auto& other) { return (ai.name == other.second.name); }));
    337     }
    338 
    339     gpus.emplace_back(device, std::move(ai));
    340   }
    341 
    342   return gpus;
    343 }
    344 
    345 VulkanDevice::GPUList VulkanDevice::EnumerateGPUs()
    346 {
    347   GPUList ret;
    348   std::unique_lock lock(s_instance_mutex);
    349 
    350   // Device shouldn't be torn down since we have the lock.
    351   if (g_gpu_device && g_gpu_device->GetRenderAPI() == RenderAPI::Vulkan && Vulkan::IsVulkanLibraryLoaded())
    352   {
    353     ret = EnumerateGPUs(VulkanDevice::GetInstance().m_instance);
    354   }
    355   else
    356   {
    357     if (Vulkan::LoadVulkanLibrary(nullptr))
    358     {
    359       OptionalExtensions oe = {};
    360       const VkInstance instance = CreateVulkanInstance(WindowInfo(), &oe, false, false);
    361       if (instance != VK_NULL_HANDLE)
    362       {
    363         if (Vulkan::LoadVulkanInstanceFunctions(instance))
    364           ret = EnumerateGPUs(instance);
    365 
    366         vkDestroyInstance(instance, nullptr);
    367       }
    368 
    369       Vulkan::UnloadVulkanLibrary();
    370     }
    371   }
    372 
    373   return ret;
    374 }
    375 
    376 GPUDevice::AdapterInfoList VulkanDevice::GetAdapterList()
    377 {
    378   AdapterInfoList ret;
    379   GPUList gpus = EnumerateGPUs();
    380   ret.reserve(gpus.size());
    381   for (auto& [physical_device, adapter_info] : gpus)
    382     ret.push_back(std::move(adapter_info));
    383   return ret;
    384 }
    385 
    386 bool VulkanDevice::SelectDeviceExtensions(ExtensionList* extension_list, bool enable_surface, Error* error)
    387 {
    388   u32 extension_count = 0;
    389   VkResult res = vkEnumerateDeviceExtensionProperties(m_physical_device, nullptr, &extension_count, nullptr);
    390   if (res != VK_SUCCESS)
    391   {
    392     LOG_VULKAN_ERROR(res, "vkEnumerateDeviceExtensionProperties failed: ");
    393     Vulkan::SetErrorObject(error, "vkEnumerateDeviceExtensionProperties failed: ", res);
    394     return false;
    395   }
    396 
    397   if (extension_count == 0)
    398   {
    399     ERROR_LOG("No extensions supported by device.");
    400     Error::SetStringView(error, "No extensions supported by device.");
    401     return false;
    402   }
    403 
    404   std::vector<VkExtensionProperties> available_extension_list(extension_count);
    405   res =
    406     vkEnumerateDeviceExtensionProperties(m_physical_device, nullptr, &extension_count, available_extension_list.data());
    407   DebugAssert(res == VK_SUCCESS);
    408 
    409   auto SupportsExtension = [&](const char* name, bool required) {
    410     if (std::find_if(available_extension_list.begin(), available_extension_list.end(),
    411                      [&](const VkExtensionProperties& properties) {
    412                        return !strcmp(name, properties.extensionName);
    413                      }) != available_extension_list.end())
    414     {
    415       if (std::none_of(extension_list->begin(), extension_list->end(),
    416                        [&](const char* existing_name) { return (std::strcmp(existing_name, name) == 0); }))
    417       {
    418         DEV_LOG("Enabling extension: {}", name);
    419         extension_list->push_back(name);
    420       }
    421 
    422       return true;
    423     }
    424 
    425     if (required)
    426     {
    427       ERROR_LOG("Vulkan: Missing required extension {}.", name);
    428       Error::SetStringFmt(error, "Missing required extension {}.", name);
    429     }
    430 
    431     return false;
    432   };
    433 
    434   if (enable_surface && !SupportsExtension(VK_KHR_SWAPCHAIN_EXTENSION_NAME, true))
    435     return false;
    436 
    437   m_optional_extensions.vk_ext_memory_budget = SupportsExtension(VK_EXT_MEMORY_BUDGET_EXTENSION_NAME, false);
    438   m_optional_extensions.vk_ext_rasterization_order_attachment_access =
    439     SupportsExtension(VK_EXT_RASTERIZATION_ORDER_ATTACHMENT_ACCESS_EXTENSION_NAME, false) ||
    440     SupportsExtension(VK_ARM_RASTERIZATION_ORDER_ATTACHMENT_ACCESS_EXTENSION_NAME, false);
    441   m_optional_extensions.vk_khr_get_memory_requirements2 =
    442     SupportsExtension(VK_KHR_GET_MEMORY_REQUIREMENTS_2_EXTENSION_NAME, false);
    443   m_optional_extensions.vk_khr_bind_memory2 = SupportsExtension(VK_KHR_BIND_MEMORY_2_EXTENSION_NAME, false);
    444   m_optional_extensions.vk_khr_dedicated_allocation =
    445     SupportsExtension(VK_KHR_DEDICATED_ALLOCATION_EXTENSION_NAME, false);
    446   m_optional_extensions.vk_khr_driver_properties = SupportsExtension(VK_KHR_DRIVER_PROPERTIES_EXTENSION_NAME, false);
    447   m_optional_extensions.vk_khr_dynamic_rendering =
    448     SupportsExtension(VK_KHR_DEPTH_STENCIL_RESOLVE_EXTENSION_NAME, false) &&
    449     SupportsExtension(VK_KHR_CREATE_RENDERPASS_2_EXTENSION_NAME, false) &&
    450     SupportsExtension(VK_KHR_DYNAMIC_RENDERING_EXTENSION_NAME, false);
    451   m_optional_extensions.vk_khr_dynamic_rendering_local_read =
    452     m_optional_extensions.vk_khr_dynamic_rendering &&
    453     SupportsExtension(VK_KHR_DYNAMIC_RENDERING_LOCAL_READ_EXTENSION_NAME, false);
    454   m_optional_extensions.vk_khr_push_descriptor = SupportsExtension(VK_KHR_PUSH_DESCRIPTOR_EXTENSION_NAME, false);
    455 
    456   // glslang generates debug info instructions before phi nodes at the beginning of blocks when non-semantic debug info
    457   // is enabled, triggering errors by spirv-val. Gate it by an environment variable if you want source debugging until
    458   // this is fixed.
    459   if (const char* val = std::getenv("USE_NON_SEMANTIC_DEBUG_INFO");
    460       val && StringUtil::FromChars<bool>(val).value_or(false))
    461   {
    462     m_optional_extensions.vk_khr_shader_non_semantic_info =
    463       SupportsExtension(VK_KHR_SHADER_NON_SEMANTIC_INFO_EXTENSION_NAME, false);
    464   }
    465 
    466   m_optional_extensions.vk_ext_external_memory_host =
    467     SupportsExtension(VK_EXT_EXTERNAL_MEMORY_HOST_EXTENSION_NAME, false);
    468   m_optional_extensions.vk_ext_swapchain_maintenance1 =
    469     m_optional_extensions.vk_ext_swapchain_maintenance1 &&
    470     SupportsExtension(VK_EXT_SWAPCHAIN_MAINTENANCE_1_EXTENSION_NAME, false);
    471 
    472   // Dynamic rendering isn't strictly needed for FSI, but we want it with framebufferless rendering.
    473   m_optional_extensions.vk_ext_fragment_shader_interlock =
    474     m_optional_extensions.vk_khr_dynamic_rendering &&
    475     SupportsExtension(VK_EXT_FRAGMENT_SHADER_INTERLOCK_EXTENSION_NAME, false);
    476 
    477 #ifdef _WIN32
    478   m_optional_extensions.vk_ext_full_screen_exclusive =
    479     enable_surface && SupportsExtension(VK_EXT_FULL_SCREEN_EXCLUSIVE_EXTENSION_NAME, false);
    480   INFO_LOG("VK_EXT_full_screen_exclusive is {}",
    481            m_optional_extensions.vk_ext_full_screen_exclusive ? "supported" : "NOT supported");
    482 #endif
    483 
    484   if (IsBrokenMobileDriver())
    485   {
    486     // Push descriptor is broken on Adreno v502.. don't want to think about dynamic rendending.
    487     if (m_optional_extensions.vk_khr_dynamic_rendering)
    488     {
    489       m_optional_extensions.vk_khr_dynamic_rendering = false;
    490       m_optional_extensions.vk_khr_dynamic_rendering_local_read = false;
    491       m_optional_extensions.vk_ext_fragment_shader_interlock = false;
    492       WARNING_LOG("Disabling VK_KHR_dynamic_rendering on broken mobile driver.");
    493     }
    494     if (m_optional_extensions.vk_khr_push_descriptor)
    495     {
    496       m_optional_extensions.vk_khr_push_descriptor = false;
    497       WARNING_LOG("Disabling VK_KHR_push_descriptor on broken mobile driver.");
    498     }
    499   }
    500   else if (IsDeviceAMD())
    501   {
    502     // VK_KHR_dynamic_rendering_local_read appears to be broken on RDNA3, like everything else...
    503     // Just causes GPU resets when you actually use a feedback loop. Assume Mesa is fine.
    504 #if defined(_WIN32) || defined(__ANDROID__)
    505     m_optional_extensions.vk_khr_dynamic_rendering_local_read = false;
    506     WARNING_LOG("Disabling VK_KHR_dynamic_rendering_local_read on broken AMD driver.");
    507 #endif
    508   }
    509 
    510   // Don't bother checking for maintenance 4/5 if we don't have 1-3, i.e. Vulkan 1.1.
    511   if (m_device_properties.apiVersion >= VK_API_VERSION_1_1)
    512   {
    513     m_optional_extensions.vk_khr_maintenance4 = SupportsExtension(VK_KHR_MAINTENANCE_4_EXTENSION_NAME, false);
    514     m_optional_extensions.vk_khr_maintenance5 =
    515       m_optional_extensions.vk_khr_maintenance4 && SupportsExtension(VK_KHR_MAINTENANCE_4_EXTENSION_NAME, false);
    516   }
    517 
    518   return true;
    519 }
    520 
    521 bool VulkanDevice::CreateDevice(VkSurfaceKHR surface, bool enable_validation_layer, FeatureMask disabled_features,
    522                                 Error* error)
    523 {
    524   u32 queue_family_count;
    525   vkGetPhysicalDeviceQueueFamilyProperties(m_physical_device, &queue_family_count, nullptr);
    526   if (queue_family_count == 0)
    527   {
    528     ERROR_LOG("No queue families found on specified vulkan physical device.");
    529     Error::SetStringView(error, "No queue families found on specified vulkan physical device.");
    530     return false;
    531   }
    532 
    533   std::vector<VkQueueFamilyProperties> queue_family_properties(queue_family_count);
    534   vkGetPhysicalDeviceQueueFamilyProperties(m_physical_device, &queue_family_count, queue_family_properties.data());
    535   DEV_LOG("{} vulkan queue families", queue_family_count);
    536 
    537   // Find graphics and present queues.
    538   m_graphics_queue_family_index = queue_family_count;
    539   m_present_queue_family_index = queue_family_count;
    540   for (uint32_t i = 0; i < queue_family_count; i++)
    541   {
    542     VkBool32 graphics_supported = queue_family_properties[i].queueFlags & VK_QUEUE_GRAPHICS_BIT;
    543     if (graphics_supported)
    544     {
    545       m_graphics_queue_family_index = i;
    546       // Quit now, no need for a present queue.
    547       if (!surface)
    548       {
    549         break;
    550       }
    551     }
    552 
    553     if (surface)
    554     {
    555       VkBool32 present_supported;
    556       VkResult res = vkGetPhysicalDeviceSurfaceSupportKHR(m_physical_device, i, surface, &present_supported);
    557       if (res != VK_SUCCESS)
    558       {
    559         LOG_VULKAN_ERROR(res, "vkGetPhysicalDeviceSurfaceSupportKHR failed: ");
    560         Vulkan::SetErrorObject(error, "vkGetPhysicalDeviceSurfaceSupportKHR failed: ", res);
    561         return false;
    562       }
    563 
    564       if (present_supported)
    565       {
    566         m_present_queue_family_index = i;
    567       }
    568 
    569       // Prefer one queue family index that does both graphics and present.
    570       if (graphics_supported && present_supported)
    571       {
    572         break;
    573       }
    574     }
    575   }
    576   if (m_graphics_queue_family_index == queue_family_count)
    577   {
    578     ERROR_LOG("Vulkan: Failed to find an acceptable graphics queue.");
    579     Error::SetStringView(error, "Vulkan: Failed to find an acceptable graphics queue.");
    580     return false;
    581   }
    582   if (surface != VK_NULL_HANDLE && m_present_queue_family_index == queue_family_count)
    583   {
    584     ERROR_LOG("Vulkan: Failed to find an acceptable present queue.");
    585     Error::SetStringView(error, "Vulkan: Failed to find an acceptable present queue.");
    586     return false;
    587   }
    588 
    589   VkDeviceCreateInfo device_info = {};
    590   device_info.sType = VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO;
    591   device_info.pNext = nullptr;
    592   device_info.flags = 0;
    593   device_info.queueCreateInfoCount = 0;
    594 
    595   static constexpr float queue_priorities[] = {1.0f};
    596   std::array<VkDeviceQueueCreateInfo, 2> queue_infos;
    597   VkDeviceQueueCreateInfo& graphics_queue_info = queue_infos[device_info.queueCreateInfoCount++];
    598   graphics_queue_info.sType = VK_STRUCTURE_TYPE_DEVICE_QUEUE_CREATE_INFO;
    599   graphics_queue_info.pNext = nullptr;
    600   graphics_queue_info.flags = 0;
    601   graphics_queue_info.queueFamilyIndex = m_graphics_queue_family_index;
    602   graphics_queue_info.queueCount = 1;
    603   graphics_queue_info.pQueuePriorities = queue_priorities;
    604 
    605   if (surface != VK_NULL_HANDLE && m_graphics_queue_family_index != m_present_queue_family_index)
    606   {
    607     VkDeviceQueueCreateInfo& present_queue_info = queue_infos[device_info.queueCreateInfoCount++];
    608     present_queue_info.sType = VK_STRUCTURE_TYPE_DEVICE_QUEUE_CREATE_INFO;
    609     present_queue_info.pNext = nullptr;
    610     present_queue_info.flags = 0;
    611     present_queue_info.queueFamilyIndex = m_present_queue_family_index;
    612     present_queue_info.queueCount = 1;
    613     present_queue_info.pQueuePriorities = queue_priorities;
    614   }
    615 
    616   device_info.pQueueCreateInfos = queue_infos.data();
    617 
    618   ExtensionList enabled_extensions;
    619   if (!SelectDeviceExtensions(&enabled_extensions, surface != VK_NULL_HANDLE, error))
    620     return false;
    621 
    622   device_info.enabledExtensionCount = static_cast<uint32_t>(enabled_extensions.size());
    623   device_info.ppEnabledExtensionNames = enabled_extensions.data();
    624 
    625   // Check for required features before creating.
    626   VkPhysicalDeviceFeatures available_features;
    627   vkGetPhysicalDeviceFeatures(m_physical_device, &available_features);
    628 
    629   // Enable the features we use.
    630   VkPhysicalDeviceFeatures enabled_features = {};
    631   enabled_features.dualSrcBlend = available_features.dualSrcBlend;
    632   enabled_features.largePoints = available_features.largePoints;
    633   enabled_features.wideLines = available_features.wideLines;
    634   enabled_features.samplerAnisotropy = available_features.samplerAnisotropy;
    635   enabled_features.sampleRateShading = available_features.sampleRateShading;
    636   enabled_features.geometryShader = available_features.geometryShader;
    637   enabled_features.fragmentStoresAndAtomics = available_features.fragmentStoresAndAtomics;
    638   device_info.pEnabledFeatures = &enabled_features;
    639 
    640   // Enable debug layer on debug builds
    641   if (enable_validation_layer)
    642   {
    643     static const char* layer_names[] = {"VK_LAYER_LUNARG_standard_validation"};
    644     device_info.enabledLayerCount = 1;
    645     device_info.ppEnabledLayerNames = layer_names;
    646   }
    647 
    648   VkPhysicalDeviceRasterizationOrderAttachmentAccessFeaturesEXT rasterization_order_access_feature = {
    649     VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_RASTERIZATION_ORDER_ATTACHMENT_ACCESS_FEATURES_EXT, nullptr, VK_TRUE, VK_FALSE,
    650     VK_FALSE};
    651   VkPhysicalDeviceDynamicRenderingFeatures dynamic_rendering_feature = {
    652     VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DYNAMIC_RENDERING_FEATURES, nullptr, VK_TRUE};
    653   VkPhysicalDeviceDynamicRenderingLocalReadFeaturesKHR dynamic_rendering_local_read_feature = {
    654     VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DYNAMIC_RENDERING_LOCAL_READ_FEATURES_KHR, nullptr, VK_TRUE};
    655   VkPhysicalDeviceSwapchainMaintenance1FeaturesEXT swapchain_maintenance1_feature = {
    656     VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SWAPCHAIN_MAINTENANCE_1_FEATURES_EXT, nullptr, VK_TRUE};
    657   VkPhysicalDeviceFragmentShaderInterlockFeaturesEXT fragment_shader_interlock_feature = {
    658     VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FRAGMENT_SHADER_INTERLOCK_FEATURES_EXT, nullptr, VK_FALSE, VK_TRUE, VK_FALSE};
    659 
    660   if (m_optional_extensions.vk_ext_rasterization_order_attachment_access)
    661     Vulkan::AddPointerToChain(&device_info, &rasterization_order_access_feature);
    662   if (m_optional_extensions.vk_ext_swapchain_maintenance1)
    663     Vulkan::AddPointerToChain(&device_info, &swapchain_maintenance1_feature);
    664   if (m_optional_extensions.vk_khr_dynamic_rendering)
    665   {
    666     Vulkan::AddPointerToChain(&device_info, &dynamic_rendering_feature);
    667     if (m_optional_extensions.vk_khr_dynamic_rendering_local_read)
    668       Vulkan::AddPointerToChain(&device_info, &dynamic_rendering_local_read_feature);
    669     if (m_optional_extensions.vk_ext_fragment_shader_interlock)
    670       Vulkan::AddPointerToChain(&device_info, &fragment_shader_interlock_feature);
    671   }
    672 
    673   VkResult res = vkCreateDevice(m_physical_device, &device_info, nullptr, &m_device);
    674   if (res != VK_SUCCESS)
    675   {
    676     LOG_VULKAN_ERROR(res, "vkCreateDevice failed: ");
    677     Vulkan::SetErrorObject(error, "vkCreateDevice failed: ", res);
    678     return false;
    679   }
    680 
    681   // With the device created, we can fill the remaining entry points.
    682   if (!Vulkan::LoadVulkanDeviceFunctions(m_device))
    683     return false;
    684 
    685   // Grab the graphics and present queues.
    686   vkGetDeviceQueue(m_device, m_graphics_queue_family_index, 0, &m_graphics_queue);
    687   if (surface)
    688     vkGetDeviceQueue(m_device, m_present_queue_family_index, 0, &m_present_queue);
    689 
    690   m_features.gpu_timing = (m_device_properties.limits.timestampComputeAndGraphics != 0 &&
    691                            queue_family_properties[m_graphics_queue_family_index].timestampValidBits > 0 &&
    692                            m_device_properties.limits.timestampPeriod > 0);
    693   DEV_LOG("GPU timing is {} (TS={} TS valid bits={}, TS period={})",
    694           m_features.gpu_timing ? "supported" : "not supported",
    695           static_cast<u32>(m_device_properties.limits.timestampComputeAndGraphics),
    696           queue_family_properties[m_graphics_queue_family_index].timestampValidBits,
    697           m_device_properties.limits.timestampPeriod);
    698 
    699   ProcessDeviceExtensions();
    700   SetFeatures(disabled_features, enabled_features);
    701   return true;
    702 }
    703 
    704 void VulkanDevice::ProcessDeviceExtensions()
    705 {
    706   // advanced feature checks
    707   VkPhysicalDeviceFeatures2 features2 = {VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2, nullptr, {}};
    708   VkPhysicalDeviceRasterizationOrderAttachmentAccessFeaturesEXT rasterization_order_access_feature = {
    709     VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_RASTERIZATION_ORDER_ATTACHMENT_ACCESS_FEATURES_EXT, nullptr, VK_FALSE, VK_FALSE,
    710     VK_FALSE};
    711   VkPhysicalDeviceDynamicRenderingFeatures dynamic_rendering_feature = {
    712     VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DYNAMIC_RENDERING_FEATURES, nullptr, VK_FALSE};
    713   VkPhysicalDeviceDynamicRenderingLocalReadFeaturesKHR dynamic_rendering_local_read_feature = {
    714     VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DYNAMIC_RENDERING_LOCAL_READ_FEATURES_KHR, nullptr, VK_FALSE};
    715   VkPhysicalDeviceSwapchainMaintenance1FeaturesEXT swapchain_maintenance1_feature = {
    716     VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SWAPCHAIN_MAINTENANCE_1_FEATURES_EXT, nullptr, VK_FALSE};
    717   VkPhysicalDeviceFragmentShaderInterlockFeaturesEXT fragment_shader_interlock_feature = {
    718     VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FRAGMENT_SHADER_INTERLOCK_FEATURES_EXT, nullptr, VK_FALSE, VK_FALSE, VK_FALSE};
    719   VkPhysicalDeviceMaintenance4Features maintenance4_features = {
    720     VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MAINTENANCE_4_FEATURES, nullptr, VK_FALSE};
    721 
    722   // add in optional feature structs
    723   if (m_optional_extensions.vk_ext_rasterization_order_attachment_access)
    724     Vulkan::AddPointerToChain(&features2, &rasterization_order_access_feature);
    725   if (m_optional_extensions.vk_ext_swapchain_maintenance1)
    726     Vulkan::AddPointerToChain(&features2, &swapchain_maintenance1_feature);
    727   if (m_optional_extensions.vk_khr_dynamic_rendering)
    728   {
    729     Vulkan::AddPointerToChain(&features2, &dynamic_rendering_feature);
    730     if (m_optional_extensions.vk_khr_dynamic_rendering_local_read)
    731       Vulkan::AddPointerToChain(&features2, &dynamic_rendering_local_read_feature);
    732     if (m_optional_extensions.vk_ext_fragment_shader_interlock)
    733       Vulkan::AddPointerToChain(&features2, &fragment_shader_interlock_feature);
    734   }
    735   if (m_optional_extensions.vk_khr_maintenance5)
    736     Vulkan::AddPointerToChain(&features2, &maintenance4_features);
    737 
    738   // we might not have VK_KHR_get_physical_device_properties2...
    739   if (!vkGetPhysicalDeviceFeatures2 || !vkGetPhysicalDeviceProperties2 || !vkGetPhysicalDeviceMemoryProperties2)
    740   {
    741     if (!vkGetPhysicalDeviceFeatures2KHR || !vkGetPhysicalDeviceProperties2KHR ||
    742         !vkGetPhysicalDeviceMemoryProperties2KHR)
    743     {
    744       ERROR_LOG("One or more functions from VK_KHR_get_physical_device_properties2 is missing, disabling extension.");
    745       m_optional_extensions.vk_khr_get_physical_device_properties2 = false;
    746       vkGetPhysicalDeviceFeatures2 = nullptr;
    747       vkGetPhysicalDeviceProperties2 = nullptr;
    748       vkGetPhysicalDeviceMemoryProperties2 = nullptr;
    749     }
    750     else
    751     {
    752       vkGetPhysicalDeviceFeatures2 = vkGetPhysicalDeviceFeatures2KHR;
    753       vkGetPhysicalDeviceProperties2 = vkGetPhysicalDeviceProperties2KHR;
    754       vkGetPhysicalDeviceMemoryProperties2 = vkGetPhysicalDeviceMemoryProperties2KHR;
    755     }
    756   }
    757 
    758   // don't bother querying if we're not actually looking at any features
    759   if (vkGetPhysicalDeviceFeatures2 && features2.pNext)
    760     vkGetPhysicalDeviceFeatures2(m_physical_device, &features2);
    761 
    762   // confirm we actually support it
    763   m_optional_extensions.vk_ext_rasterization_order_attachment_access &=
    764     (rasterization_order_access_feature.rasterizationOrderColorAttachmentAccess == VK_TRUE);
    765   m_optional_extensions.vk_ext_swapchain_maintenance1 &=
    766     (swapchain_maintenance1_feature.swapchainMaintenance1 == VK_TRUE);
    767   m_optional_extensions.vk_khr_dynamic_rendering &= (dynamic_rendering_feature.dynamicRendering == VK_TRUE);
    768   m_optional_extensions.vk_khr_dynamic_rendering_local_read &=
    769     (dynamic_rendering_local_read_feature.dynamicRenderingLocalRead == VK_TRUE);
    770   m_optional_extensions.vk_ext_fragment_shader_interlock &=
    771     (m_optional_extensions.vk_khr_dynamic_rendering &&
    772      fragment_shader_interlock_feature.fragmentShaderPixelInterlock == VK_TRUE);
    773   m_optional_extensions.vk_khr_maintenance4 &= (maintenance4_features.maintenance4 == VK_TRUE);
    774   m_optional_extensions.vk_khr_maintenance5 &= m_optional_extensions.vk_khr_maintenance4;
    775 
    776   VkPhysicalDeviceProperties2 properties2 = {VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2, nullptr, {}};
    777   VkPhysicalDevicePushDescriptorPropertiesKHR push_descriptor_properties = {
    778     VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PUSH_DESCRIPTOR_PROPERTIES_KHR, nullptr, 0u};
    779   VkPhysicalDeviceExternalMemoryHostPropertiesEXT external_memory_host_properties = {
    780     VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_EXTERNAL_MEMORY_HOST_PROPERTIES_EXT, nullptr, 0};
    781 
    782   if (m_optional_extensions.vk_khr_driver_properties)
    783   {
    784     m_device_driver_properties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DRIVER_PROPERTIES;
    785     Vulkan::AddPointerToChain(&properties2, &m_device_driver_properties);
    786   }
    787   if (m_optional_extensions.vk_khr_push_descriptor)
    788     Vulkan::AddPointerToChain(&properties2, &push_descriptor_properties);
    789 
    790   if (m_optional_extensions.vk_ext_external_memory_host)
    791     Vulkan::AddPointerToChain(&properties2, &external_memory_host_properties);
    792 
    793   // don't bother querying if we're not actually looking at any features
    794   if (vkGetPhysicalDeviceProperties2 && properties2.pNext)
    795     vkGetPhysicalDeviceProperties2(m_physical_device, &properties2);
    796 
    797   m_optional_extensions.vk_khr_push_descriptor &= (push_descriptor_properties.maxPushDescriptors >= 1);
    798 
    799   // vk_ext_external_memory_host is only used if the import alignment is the same as the system's page size
    800   m_optional_extensions.vk_ext_external_memory_host &=
    801     (external_memory_host_properties.minImportedHostPointerAlignment == HOST_PAGE_SIZE);
    802 
    803 #define LOG_EXT(name, field) INFO_LOG(name " is {}", m_optional_extensions.field ? "supported" : "NOT supported")
    804 
    805   LOG_EXT("VK_EXT_external_memory_host", vk_ext_external_memory_host);
    806   LOG_EXT("VK_EXT_memory_budget", vk_ext_memory_budget);
    807   LOG_EXT("VK_EXT_fragment_shader_interlock", vk_ext_fragment_shader_interlock);
    808   LOG_EXT("VK_EXT_rasterization_order_attachment_access", vk_ext_rasterization_order_attachment_access);
    809   LOG_EXT("VK_EXT_swapchain_maintenance1", vk_ext_swapchain_maintenance1);
    810   LOG_EXT("VK_KHR_get_memory_requirements2", vk_khr_get_memory_requirements2);
    811   LOG_EXT("VK_KHR_bind_memory2", vk_khr_bind_memory2);
    812   LOG_EXT("VK_KHR_get_physical_device_properties2", vk_khr_get_physical_device_properties2);
    813   LOG_EXT("VK_KHR_dedicated_allocation", vk_khr_dedicated_allocation);
    814   LOG_EXT("VK_KHR_dynamic_rendering", vk_khr_dynamic_rendering);
    815   LOG_EXT("VK_KHR_dynamic_rendering_local_read", vk_khr_dynamic_rendering_local_read);
    816   LOG_EXT("VK_KHR_maintenance4", vk_khr_maintenance4);
    817   LOG_EXT("VK_KHR_maintenance5", vk_khr_maintenance5);
    818   LOG_EXT("VK_KHR_push_descriptor", vk_khr_push_descriptor);
    819 
    820 #undef LOG_EXT
    821 }
    822 
    823 bool VulkanDevice::CreateAllocator()
    824 {
    825   const u32 apiVersion = std::min(m_device_properties.apiVersion, VK_API_VERSION_1_1);
    826   INFO_LOG("Supported device API version: {}.{}.{}, using version {}.{}.{} for allocator.",
    827            VK_API_VERSION_MAJOR(m_device_properties.apiVersion), VK_API_VERSION_MINOR(m_device_properties.apiVersion),
    828            VK_API_VERSION_PATCH(m_device_properties.apiVersion), VK_API_VERSION_MAJOR(apiVersion),
    829            VK_API_VERSION_MINOR(apiVersion), VK_API_VERSION_PATCH(apiVersion));
    830 
    831   VmaAllocatorCreateInfo ci = {};
    832   ci.vulkanApiVersion = apiVersion;
    833   ci.flags = VMA_ALLOCATOR_CREATE_EXTERNALLY_SYNCHRONIZED_BIT;
    834   ci.physicalDevice = m_physical_device;
    835   ci.device = m_device;
    836   ci.instance = m_instance;
    837 
    838   if (apiVersion < VK_API_VERSION_1_1)
    839   {
    840     if (m_optional_extensions.vk_khr_get_memory_requirements2 && m_optional_extensions.vk_khr_dedicated_allocation)
    841     {
    842       DEV_LOG("Enabling VMA_ALLOCATOR_CREATE_KHR_DEDICATED_ALLOCATION_BIT on < Vulkan 1.1.");
    843       ci.flags |= VMA_ALLOCATOR_CREATE_KHR_DEDICATED_ALLOCATION_BIT;
    844     }
    845     if (m_optional_extensions.vk_khr_bind_memory2)
    846     {
    847       DEV_LOG("Enabling VMA_ALLOCATOR_CREATE_KHR_BIND_MEMORY2_BIT on < Vulkan 1.1.");
    848       ci.flags |= VMA_ALLOCATOR_CREATE_KHR_BIND_MEMORY2_BIT;
    849     }
    850   }
    851 
    852   if (m_optional_extensions.vk_ext_memory_budget)
    853   {
    854     DEV_LOG("Enabling VMA_ALLOCATOR_CREATE_EXT_MEMORY_BUDGET_BIT.");
    855     ci.flags |= VMA_ALLOCATOR_CREATE_EXT_MEMORY_BUDGET_BIT;
    856   }
    857 
    858   if (m_optional_extensions.vk_khr_maintenance4)
    859   {
    860     DEV_LOG("Enabling VMA_ALLOCATOR_CREATE_KHR_MAINTENANCE4_BIT");
    861     ci.flags |= VMA_ALLOCATOR_CREATE_KHR_MAINTENANCE4_BIT;
    862   }
    863 
    864   if (m_optional_extensions.vk_khr_maintenance5)
    865   {
    866     DEV_LOG("Enabling VMA_ALLOCATOR_CREATE_KHR_MAINTENANCE5_BIT");
    867     ci.flags |= VMA_ALLOCATOR_CREATE_KHR_MAINTENANCE5_BIT;
    868   }
    869 
    870   // Limit usage of the DEVICE_LOCAL upload heap when we're using a debug device.
    871   // On NVIDIA drivers, it results in frequently running out of device memory when trying to
    872   // play back captures in RenderDoc, making life very painful. Re-BAR GPUs should be fine.
    873   constexpr VkDeviceSize UPLOAD_HEAP_SIZE_THRESHOLD = 512 * 1024 * 1024;
    874   constexpr VkMemoryPropertyFlags UPLOAD_HEAP_PROPERTIES =
    875     VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT | VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT;
    876   std::array<VkDeviceSize, VK_MAX_MEMORY_HEAPS> heap_size_limits;
    877   if (m_debug_device)
    878   {
    879     VkPhysicalDeviceMemoryProperties memory_properties;
    880     vkGetPhysicalDeviceMemoryProperties(m_physical_device, &memory_properties);
    881 
    882     bool has_upload_heap = false;
    883     heap_size_limits.fill(VK_WHOLE_SIZE);
    884     for (u32 i = 0; i < memory_properties.memoryTypeCount; i++)
    885     {
    886       // Look for any memory types which are upload-like.
    887       const VkMemoryType& type = memory_properties.memoryTypes[i];
    888       if ((type.propertyFlags & UPLOAD_HEAP_PROPERTIES) != UPLOAD_HEAP_PROPERTIES)
    889         continue;
    890 
    891       const VkMemoryHeap& heap = memory_properties.memoryHeaps[type.heapIndex];
    892       if (heap.size >= UPLOAD_HEAP_SIZE_THRESHOLD)
    893         continue;
    894 
    895       if (heap_size_limits[type.heapIndex] == VK_WHOLE_SIZE)
    896       {
    897         WARNING_LOG("Disabling allocation from upload heap #{} ({:.2f} MB) due to debug device.", type.heapIndex,
    898                     static_cast<float>(heap.size) / 1048576.0f);
    899         heap_size_limits[type.heapIndex] = 0;
    900         has_upload_heap = true;
    901       }
    902     }
    903 
    904     if (has_upload_heap)
    905       ci.pHeapSizeLimit = heap_size_limits.data();
    906   }
    907 
    908   VkResult res = vmaCreateAllocator(&ci, &m_allocator);
    909   if (res != VK_SUCCESS)
    910   {
    911     LOG_VULKAN_ERROR(res, "vmaCreateAllocator failed: ");
    912     return false;
    913   }
    914 
    915   return true;
    916 }
    917 
    918 void VulkanDevice::DestroyAllocator()
    919 {
    920   if (m_allocator == VK_NULL_HANDLE)
    921     return;
    922 
    923   vmaDestroyAllocator(m_allocator);
    924   m_allocator = VK_NULL_HANDLE;
    925 }
    926 
    927 bool VulkanDevice::CreateCommandBuffers()
    928 {
    929   VkResult res;
    930 
    931   uint32_t frame_index = 0;
    932   for (CommandBuffer& resources : m_frame_resources)
    933   {
    934     resources.needs_fence_wait = false;
    935 
    936     VkCommandPoolCreateInfo pool_info = {VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO, nullptr, 0,
    937                                          m_graphics_queue_family_index};
    938     res = vkCreateCommandPool(m_device, &pool_info, nullptr, &resources.command_pool);
    939     if (res != VK_SUCCESS)
    940     {
    941       LOG_VULKAN_ERROR(res, "vkCreateCommandPool failed: ");
    942       return false;
    943     }
    944     Vulkan::SetObjectName(m_device, resources.command_pool,
    945                           TinyString::from_format("Frame Command Pool {}", frame_index));
    946 
    947     VkCommandBufferAllocateInfo buffer_info = {VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO, nullptr,
    948                                                resources.command_pool, VK_COMMAND_BUFFER_LEVEL_PRIMARY,
    949                                                static_cast<u32>(resources.command_buffers.size())};
    950 
    951     res = vkAllocateCommandBuffers(m_device, &buffer_info, resources.command_buffers.data());
    952     if (res != VK_SUCCESS)
    953     {
    954       LOG_VULKAN_ERROR(res, "vkAllocateCommandBuffers failed: ");
    955       return false;
    956     }
    957     for (u32 i = 0; i < resources.command_buffers.size(); i++)
    958     {
    959       Vulkan::SetObjectName(m_device, resources.command_buffers[i],
    960                             TinyString::from_format("Frame {} {}Command Buffer", frame_index, (i == 0) ? "Init" : ""));
    961     }
    962 
    963     VkFenceCreateInfo fence_info = {VK_STRUCTURE_TYPE_FENCE_CREATE_INFO, nullptr, VK_FENCE_CREATE_SIGNALED_BIT};
    964 
    965     res = vkCreateFence(m_device, &fence_info, nullptr, &resources.fence);
    966     if (res != VK_SUCCESS)
    967     {
    968       LOG_VULKAN_ERROR(res, "vkCreateFence failed: ");
    969       return false;
    970     }
    971     Vulkan::SetObjectName(m_device, resources.fence, TinyString::from_format("Frame Fence {}", frame_index));
    972 
    973     u32 num_pools = 0;
    974     VkDescriptorPoolSize pool_sizes[2];
    975     if (!m_optional_extensions.vk_khr_push_descriptor)
    976     {
    977       pool_sizes[num_pools++] = {VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER,
    978                                  MAX_COMBINED_IMAGE_SAMPLER_DESCRIPTORS_PER_FRAME};
    979     }
    980     pool_sizes[num_pools++] = {VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT, MAX_INPUT_ATTACHMENT_DESCRIPTORS_PER_FRAME};
    981 
    982     VkDescriptorPoolCreateInfo pool_create_info = {
    983       VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO, nullptr, 0, MAX_DESCRIPTOR_SETS_PER_FRAME, num_pools, pool_sizes};
    984 
    985     res = vkCreateDescriptorPool(m_device, &pool_create_info, nullptr, &resources.descriptor_pool);
    986     if (res != VK_SUCCESS)
    987     {
    988       LOG_VULKAN_ERROR(res, "vkCreateDescriptorPool failed: ");
    989       return false;
    990     }
    991     Vulkan::SetObjectName(m_device, resources.descriptor_pool,
    992                           TinyString::from_format("Frame Descriptor Pool {}", frame_index));
    993 
    994     ++frame_index;
    995   }
    996 
    997   BeginCommandBuffer(0);
    998   return true;
    999 }
   1000 
   1001 void VulkanDevice::DestroyCommandBuffers()
   1002 {
   1003   for (CommandBuffer& resources : m_frame_resources)
   1004   {
   1005     if (resources.fence != VK_NULL_HANDLE)
   1006       vkDestroyFence(m_device, resources.fence, nullptr);
   1007     if (resources.descriptor_pool != VK_NULL_HANDLE)
   1008       vkDestroyDescriptorPool(m_device, resources.descriptor_pool, nullptr);
   1009     if (resources.command_buffers[0] != VK_NULL_HANDLE)
   1010     {
   1011       vkFreeCommandBuffers(m_device, resources.command_pool, static_cast<u32>(resources.command_buffers.size()),
   1012                            resources.command_buffers.data());
   1013     }
   1014     if (resources.command_pool != VK_NULL_HANDLE)
   1015       vkDestroyCommandPool(m_device, resources.command_pool, nullptr);
   1016   }
   1017 }
   1018 
   1019 bool VulkanDevice::CreatePersistentDescriptorPool()
   1020 {
   1021   static constexpr const VkDescriptorPoolSize pool_sizes[] = {
   1022     {VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC, 1},
   1023     {VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, MAX_SAMPLER_DESCRIPTORS},
   1024     {VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER, 16},
   1025     {VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, 16},
   1026   };
   1027 
   1028   const VkDescriptorPoolCreateInfo pool_create_info = {
   1029     VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO,     nullptr,
   1030     VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, MAX_SAMPLER_DESCRIPTORS,
   1031     static_cast<u32>(std::size(pool_sizes)),           pool_sizes};
   1032 
   1033   VkResult res = vkCreateDescriptorPool(m_device, &pool_create_info, nullptr, &m_global_descriptor_pool);
   1034   if (res != VK_SUCCESS)
   1035   {
   1036     LOG_VULKAN_ERROR(res, "vkCreateDescriptorPool failed: ");
   1037     return false;
   1038   }
   1039   Vulkan::SetObjectName(m_device, m_global_descriptor_pool, "Global Descriptor Pool");
   1040 
   1041   if (m_features.gpu_timing)
   1042   {
   1043     const VkQueryPoolCreateInfo query_create_info = {
   1044       VK_STRUCTURE_TYPE_QUERY_POOL_CREATE_INFO, nullptr, 0, VK_QUERY_TYPE_TIMESTAMP, NUM_COMMAND_BUFFERS * 4, 0};
   1045     res = vkCreateQueryPool(m_device, &query_create_info, nullptr, &m_timestamp_query_pool);
   1046     if (res != VK_SUCCESS)
   1047     {
   1048       LOG_VULKAN_ERROR(res, "vkCreateQueryPool failed: ");
   1049       m_features.gpu_timing = false;
   1050       return false;
   1051     }
   1052   }
   1053 
   1054   return true;
   1055 }
   1056 
   1057 void VulkanDevice::DestroyPersistentDescriptorPool()
   1058 {
   1059   if (m_timestamp_query_pool != VK_NULL_HANDLE)
   1060     vkDestroyQueryPool(m_device, m_timestamp_query_pool, nullptr);
   1061 
   1062   if (m_global_descriptor_pool != VK_NULL_HANDLE)
   1063     vkDestroyDescriptorPool(m_device, m_global_descriptor_pool, nullptr);
   1064 }
   1065 
   1066 bool VulkanDevice::RenderPassCacheKey::operator==(const RenderPassCacheKey& rhs) const
   1067 {
   1068   return (std::memcmp(this, &rhs, sizeof(*this)) == 0);
   1069 }
   1070 
   1071 bool VulkanDevice::RenderPassCacheKey::operator!=(const RenderPassCacheKey& rhs) const
   1072 {
   1073   return (std::memcmp(this, &rhs, sizeof(*this)) != 0);
   1074 }
   1075 
   1076 size_t VulkanDevice::RenderPassCacheKeyHash::operator()(const RenderPassCacheKey& rhs) const
   1077 {
   1078   if constexpr (sizeof(void*) == 8)
   1079     return XXH3_64bits(&rhs, sizeof(rhs));
   1080   else
   1081     return XXH32(&rhs, sizeof(rhs), 0x1337);
   1082 }
   1083 
   1084 VkRenderPass VulkanDevice::GetRenderPass(const GPUPipeline::GraphicsConfig& config)
   1085 {
   1086   RenderPassCacheKey key;
   1087   std::memset(&key, 0, sizeof(key));
   1088 
   1089   for (u32 i = 0; i < MAX_RENDER_TARGETS; i++)
   1090   {
   1091     if (config.color_formats[i] == GPUTexture::Format::Unknown)
   1092       break;
   1093 
   1094     key.color[i].format = static_cast<u8>(config.color_formats[i]);
   1095     key.color[i].load_op = VK_ATTACHMENT_LOAD_OP_LOAD;
   1096     key.color[i].store_op = VK_ATTACHMENT_STORE_OP_STORE;
   1097   }
   1098 
   1099   if (config.depth_format != GPUTexture::Format::Unknown)
   1100   {
   1101     key.depth_format = static_cast<u8>(config.depth_format);
   1102     key.depth_load_op = VK_ATTACHMENT_LOAD_OP_LOAD;
   1103     key.depth_store_op = VK_ATTACHMENT_STORE_OP_STORE;
   1104 
   1105     const bool stencil = GPUTexture::IsDepthStencilFormat(config.depth_format);
   1106     key.stencil_load_op = stencil ? VK_ATTACHMENT_LOAD_OP_LOAD : VK_ATTACHMENT_LOAD_OP_DONT_CARE;
   1107     key.stencil_store_op = stencil ? VK_ATTACHMENT_STORE_OP_STORE : VK_ATTACHMENT_STORE_OP_DONT_CARE;
   1108   }
   1109 
   1110   key.samples = static_cast<u8>(config.samples);
   1111   key.feedback_loop = config.render_pass_flags;
   1112 
   1113   const auto it = m_render_pass_cache.find(key);
   1114   return (it != m_render_pass_cache.end()) ? it->second : CreateCachedRenderPass(key);
   1115 }
   1116 
   1117 VkRenderPass VulkanDevice::GetRenderPass(VulkanTexture* const* rts, u32 num_rts, VulkanTexture* ds,
   1118                                          GPUPipeline::RenderPassFlag feedback_loop)
   1119 {
   1120   RenderPassCacheKey key;
   1121   std::memset(&key, 0, sizeof(key));
   1122 
   1123   static_assert(static_cast<u8>(GPUTexture::Format::Unknown) == 0);
   1124 
   1125   for (u32 i = 0; i < num_rts; i++)
   1126   {
   1127     key.color[i].format = static_cast<u8>(rts[i]->GetFormat());
   1128     key.color[i].load_op = GetLoadOpForTexture(rts[i]);
   1129     key.color[i].store_op = VK_ATTACHMENT_STORE_OP_STORE;
   1130     key.samples = static_cast<u8>(rts[i]->GetSamples());
   1131   }
   1132 
   1133   if (ds)
   1134   {
   1135     const VkAttachmentLoadOp load_op = GetLoadOpForTexture(ds);
   1136     key.depth_format = static_cast<u8>(ds->GetFormat());
   1137     key.depth_load_op = load_op;
   1138     key.depth_store_op = VK_ATTACHMENT_STORE_OP_STORE;
   1139 
   1140     const bool stencil = GPUTexture::IsDepthStencilFormat(ds->GetFormat());
   1141     key.stencil_load_op = stencil ? load_op : VK_ATTACHMENT_LOAD_OP_DONT_CARE;
   1142     key.stencil_store_op = stencil ? VK_ATTACHMENT_STORE_OP_STORE : VK_ATTACHMENT_STORE_OP_DONT_CARE;
   1143 
   1144     key.samples = static_cast<u8>(ds->GetSamples());
   1145   }
   1146 
   1147   key.feedback_loop = feedback_loop;
   1148 
   1149   const auto it = m_render_pass_cache.find(key);
   1150   return (it != m_render_pass_cache.end()) ? it->second : CreateCachedRenderPass(key);
   1151 }
   1152 
   1153 VkRenderPass VulkanDevice::GetSwapChainRenderPass(GPUTexture::Format format, VkAttachmentLoadOp load_op)
   1154 {
   1155   DebugAssert(format != GPUTexture::Format::Unknown);
   1156 
   1157   RenderPassCacheKey key;
   1158   std::memset(&key, 0, sizeof(key));
   1159 
   1160   key.color[0].format = static_cast<u8>(format);
   1161   key.color[0].load_op = load_op;
   1162   key.color[0].store_op = VK_ATTACHMENT_STORE_OP_STORE;
   1163   key.samples = 1;
   1164 
   1165   const auto it = m_render_pass_cache.find(key);
   1166   return (it != m_render_pass_cache.end()) ? it->second : CreateCachedRenderPass(key);
   1167 }
   1168 
   1169 VkRenderPass VulkanDevice::GetRenderPassForRestarting(VkRenderPass pass)
   1170 {
   1171   for (const auto& it : m_render_pass_cache)
   1172   {
   1173     if (it.second != pass)
   1174       continue;
   1175 
   1176     RenderPassCacheKey modified_key = it.first;
   1177     for (u32 i = 0; i < MAX_RENDER_TARGETS; i++)
   1178     {
   1179       if (modified_key.color[i].load_op == VK_ATTACHMENT_LOAD_OP_CLEAR)
   1180         modified_key.color[i].load_op = VK_ATTACHMENT_LOAD_OP_LOAD;
   1181     }
   1182 
   1183     if (modified_key.depth_load_op == VK_ATTACHMENT_LOAD_OP_CLEAR)
   1184       modified_key.depth_load_op = VK_ATTACHMENT_LOAD_OP_LOAD;
   1185     if (modified_key.stencil_load_op == VK_ATTACHMENT_LOAD_OP_CLEAR)
   1186       modified_key.stencil_load_op = VK_ATTACHMENT_LOAD_OP_LOAD;
   1187 
   1188     if (modified_key == it.first)
   1189       return pass;
   1190 
   1191     auto fit = m_render_pass_cache.find(modified_key);
   1192     if (fit != m_render_pass_cache.end())
   1193       return fit->second;
   1194 
   1195     return CreateCachedRenderPass(modified_key);
   1196   }
   1197 
   1198   return pass;
   1199 }
   1200 
   1201 VkCommandBuffer VulkanDevice::GetCurrentInitCommandBuffer()
   1202 {
   1203   CommandBuffer& res = m_frame_resources[m_current_frame];
   1204   VkCommandBuffer buf = res.command_buffers[0];
   1205   if (res.init_buffer_used)
   1206     return buf;
   1207 
   1208   VkCommandBufferBeginInfo bi{VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO, nullptr,
   1209                               VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT, nullptr};
   1210   vkBeginCommandBuffer(buf, &bi);
   1211   res.init_buffer_used = true;
   1212   return buf;
   1213 }
   1214 
   1215 VkDescriptorSet VulkanDevice::AllocateDescriptorSet(VkDescriptorSetLayout set_layout)
   1216 {
   1217   VkDescriptorSetAllocateInfo allocate_info = {VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO, nullptr,
   1218                                                m_frame_resources[m_current_frame].descriptor_pool, 1, &set_layout};
   1219 
   1220   VkDescriptorSet descriptor_set;
   1221   VkResult res = vkAllocateDescriptorSets(m_device, &allocate_info, &descriptor_set);
   1222   if (res != VK_SUCCESS)
   1223   {
   1224     // Failing to allocate a descriptor set is not a fatal error, we can
   1225     // recover by moving to the next command buffer.
   1226     return VK_NULL_HANDLE;
   1227   }
   1228 
   1229   return descriptor_set;
   1230 }
   1231 
   1232 VkDescriptorSet VulkanDevice::AllocatePersistentDescriptorSet(VkDescriptorSetLayout set_layout)
   1233 {
   1234   VkDescriptorSetAllocateInfo allocate_info = {VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO, nullptr,
   1235                                                m_global_descriptor_pool, 1, &set_layout};
   1236 
   1237   VkDescriptorSet descriptor_set;
   1238   VkResult res = vkAllocateDescriptorSets(m_device, &allocate_info, &descriptor_set);
   1239   if (res != VK_SUCCESS)
   1240     return VK_NULL_HANDLE;
   1241 
   1242   return descriptor_set;
   1243 }
   1244 
   1245 void VulkanDevice::FreePersistentDescriptorSet(VkDescriptorSet set)
   1246 {
   1247   vkFreeDescriptorSets(m_device, m_global_descriptor_pool, 1, &set);
   1248 }
   1249 
   1250 void VulkanDevice::WaitForFenceCounter(u64 fence_counter)
   1251 {
   1252   if (m_completed_fence_counter >= fence_counter)
   1253     return;
   1254 
   1255   // Find the first command buffer which covers this counter value.
   1256   u32 index = (m_current_frame + 1) % NUM_COMMAND_BUFFERS;
   1257   while (index != m_current_frame)
   1258   {
   1259     if (m_frame_resources[index].fence_counter >= fence_counter)
   1260       break;
   1261 
   1262     index = (index + 1) % NUM_COMMAND_BUFFERS;
   1263   }
   1264 
   1265   DebugAssert(index != m_current_frame);
   1266   WaitForCommandBufferCompletion(index);
   1267 }
   1268 
   1269 void VulkanDevice::WaitForGPUIdle()
   1270 {
   1271   WaitForPresentComplete();
   1272   vkDeviceWaitIdle(m_device);
   1273 }
   1274 
   1275 float VulkanDevice::GetAndResetAccumulatedGPUTime()
   1276 {
   1277   const float time = m_accumulated_gpu_time;
   1278   m_accumulated_gpu_time = 0.0f;
   1279   return time;
   1280 }
   1281 
   1282 bool VulkanDevice::SetGPUTimingEnabled(bool enabled)
   1283 {
   1284   m_gpu_timing_enabled = enabled && m_features.gpu_timing;
   1285   return (enabled == m_gpu_timing_enabled);
   1286 }
   1287 
   1288 void VulkanDevice::WaitForCommandBufferCompletion(u32 index)
   1289 {
   1290   // We might be waiting for the buffer we just submitted to the worker thread.
   1291   if (m_queued_present.command_buffer_index == index && !m_present_done.load(std::memory_order_acquire))
   1292   {
   1293     WARNING_LOG("Waiting for threaded submission of cmdbuffer {}", index);
   1294     WaitForPresentComplete();
   1295   }
   1296 
   1297   // Wait for this command buffer to be completed.
   1298   static constexpr u32 MAX_TIMEOUTS = 10;
   1299   u32 timeouts = 0;
   1300   for (;;)
   1301   {
   1302     VkResult res = vkWaitForFences(m_device, 1, &m_frame_resources[index].fence, VK_TRUE, UINT64_MAX);
   1303     if (res == VK_SUCCESS)
   1304       break;
   1305 
   1306     if (res == VK_TIMEOUT && (++timeouts) <= MAX_TIMEOUTS)
   1307     {
   1308       ERROR_LOG("vkWaitForFences() for cmdbuffer {} failed with VK_TIMEOUT, trying again.", index);
   1309       continue;
   1310     }
   1311     else if (res != VK_SUCCESS)
   1312     {
   1313       LOG_VULKAN_ERROR(res, TinyString::from_format("vkWaitForFences() for cmdbuffer {} failed: ", index));
   1314       m_last_submit_failed.store(true, std::memory_order_release);
   1315       return;
   1316     }
   1317   }
   1318 
   1319   // Clean up any resources for command buffers between the last known completed buffer and this
   1320   // now-completed command buffer. If we use >2 buffers, this may be more than one buffer.
   1321   const u64 now_completed_counter = m_frame_resources[index].fence_counter;
   1322   u32 cleanup_index = (m_current_frame + 1) % NUM_COMMAND_BUFFERS;
   1323   while (cleanup_index != m_current_frame)
   1324   {
   1325     CommandBuffer& resources = m_frame_resources[cleanup_index];
   1326     if (resources.fence_counter > now_completed_counter)
   1327       break;
   1328 
   1329     if (m_gpu_timing_enabled && resources.timestamp_written)
   1330     {
   1331       std::array<u64, 2> timestamps;
   1332       VkResult res =
   1333         vkGetQueryPoolResults(m_device, m_timestamp_query_pool, index * 2, static_cast<u32>(timestamps.size()),
   1334                               sizeof(u64) * timestamps.size(), timestamps.data(), sizeof(u64), VK_QUERY_RESULT_64_BIT);
   1335       if (res == VK_SUCCESS)
   1336       {
   1337         // if we didn't write the timestamp at the start of the cmdbuffer (just enabled timing), the first TS will be
   1338         // zero
   1339         if (timestamps[0] > 0 && m_gpu_timing_enabled)
   1340         {
   1341           const double ns_diff =
   1342             (timestamps[1] - timestamps[0]) * static_cast<double>(m_device_properties.limits.timestampPeriod);
   1343           m_accumulated_gpu_time += static_cast<float>(ns_diff / 1000000.0);
   1344         }
   1345       }
   1346       else
   1347       {
   1348         LOG_VULKAN_ERROR(res, "vkGetQueryPoolResults failed: ");
   1349       }
   1350     }
   1351 
   1352     cleanup_index = (cleanup_index + 1) % NUM_COMMAND_BUFFERS;
   1353   }
   1354 
   1355   m_completed_fence_counter = now_completed_counter;
   1356   while (!m_cleanup_objects.empty())
   1357   {
   1358     auto& it = m_cleanup_objects.front();
   1359     if (it.first > now_completed_counter)
   1360       break;
   1361     it.second();
   1362     m_cleanup_objects.pop_front();
   1363   }
   1364 }
   1365 
   1366 void VulkanDevice::EndAndSubmitCommandBuffer(VulkanSwapChain* present_swap_chain, bool explicit_present,
   1367                                              bool submit_on_thread)
   1368 {
   1369   if (m_last_submit_failed.load(std::memory_order_acquire))
   1370     return;
   1371 
   1372   CommandBuffer& resources = m_frame_resources[m_current_frame];
   1373 
   1374   // End the current command buffer.
   1375   VkResult res;
   1376   if (resources.init_buffer_used)
   1377   {
   1378     res = vkEndCommandBuffer(resources.command_buffers[0]);
   1379     if (res != VK_SUCCESS)
   1380     {
   1381       LOG_VULKAN_ERROR(res, "vkEndCommandBuffer failed: ");
   1382       Panic("Failed to end command buffer");
   1383     }
   1384   }
   1385 
   1386   if (m_gpu_timing_enabled && resources.timestamp_written)
   1387   {
   1388     vkCmdWriteTimestamp(m_current_command_buffer, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, m_timestamp_query_pool,
   1389                         m_current_frame * 2 + 1);
   1390   }
   1391 
   1392   res = vkEndCommandBuffer(resources.command_buffers[1]);
   1393   if (res != VK_SUCCESS)
   1394   {
   1395     LOG_VULKAN_ERROR(res, "vkEndCommandBuffer failed: ");
   1396     Panic("Failed to end command buffer");
   1397   }
   1398 
   1399   // This command buffer now has commands, so can't be re-used without waiting.
   1400   resources.needs_fence_wait = true;
   1401 
   1402   std::unique_lock<std::mutex> lock(m_present_mutex);
   1403   WaitForPresentComplete(lock);
   1404 
   1405   if (!submit_on_thread || explicit_present || !m_present_thread.joinable())
   1406   {
   1407     DoSubmitCommandBuffer(m_current_frame, present_swap_chain);
   1408     if (present_swap_chain && !explicit_present)
   1409       DoPresent(present_swap_chain);
   1410     return;
   1411   }
   1412 
   1413   m_queued_present.command_buffer_index = m_current_frame;
   1414   m_queued_present.swap_chain = present_swap_chain;
   1415   m_present_done.store(false, std::memory_order_release);
   1416   m_present_queued_cv.notify_one();
   1417 }
   1418 
   1419 void VulkanDevice::DoSubmitCommandBuffer(u32 index, VulkanSwapChain* present_swap_chain)
   1420 {
   1421   CommandBuffer& resources = m_frame_resources[index];
   1422 
   1423   uint32_t wait_bits = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT;
   1424   VkSubmitInfo submit_info = {VK_STRUCTURE_TYPE_SUBMIT_INFO,
   1425                               nullptr,
   1426                               0u,
   1427                               nullptr,
   1428                               nullptr,
   1429                               resources.init_buffer_used ? 2u : 1u,
   1430                               resources.init_buffer_used ? resources.command_buffers.data() :
   1431                                                            &resources.command_buffers[1],
   1432                               0u,
   1433                               nullptr};
   1434 
   1435   if (present_swap_chain)
   1436   {
   1437     submit_info.pWaitSemaphores = present_swap_chain->GetImageAvailableSemaphorePtr();
   1438     submit_info.waitSemaphoreCount = 1;
   1439     submit_info.pWaitDstStageMask = &wait_bits;
   1440 
   1441     submit_info.pSignalSemaphores = present_swap_chain->GetRenderingFinishedSemaphorePtr();
   1442     submit_info.signalSemaphoreCount = 1;
   1443   }
   1444 
   1445   const VkResult res = vkQueueSubmit(m_graphics_queue, 1, &submit_info, resources.fence);
   1446   if (res != VK_SUCCESS)
   1447   {
   1448     LOG_VULKAN_ERROR(res, "vkQueueSubmit failed: ");
   1449     m_last_submit_failed.store(true, std::memory_order_release);
   1450     return;
   1451   }
   1452 }
   1453 
   1454 void VulkanDevice::DoPresent(VulkanSwapChain* present_swap_chain)
   1455 {
   1456   const VkPresentInfoKHR present_info = {VK_STRUCTURE_TYPE_PRESENT_INFO_KHR,
   1457                                          nullptr,
   1458                                          1,
   1459                                          present_swap_chain->GetRenderingFinishedSemaphorePtr(),
   1460                                          1,
   1461                                          present_swap_chain->GetSwapChainPtr(),
   1462                                          present_swap_chain->GetCurrentImageIndexPtr(),
   1463                                          nullptr};
   1464 
   1465   present_swap_chain->ResetImageAcquireResult();
   1466 
   1467   const VkResult res = vkQueuePresentKHR(m_present_queue, &present_info);
   1468   if (res != VK_SUCCESS && res != VK_SUBOPTIMAL_KHR)
   1469   {
   1470     // VK_ERROR_OUT_OF_DATE_KHR is not fatal, just means we need to recreate our swap chain.
   1471     if (res == VK_ERROR_OUT_OF_DATE_KHR)
   1472       ResizeWindow(0, 0, m_window_info.surface_scale);
   1473     else
   1474       LOG_VULKAN_ERROR(res, "vkQueuePresentKHR failed: ");
   1475 
   1476     return;
   1477   }
   1478 
   1479   // Grab the next image as soon as possible, that way we spend less time blocked on the next
   1480   // submission. Don't care if it fails, we'll deal with that at the presentation call site.
   1481   // Credit to dxvk for the idea.
   1482   present_swap_chain->AcquireNextImage();
   1483 }
   1484 
   1485 void VulkanDevice::WaitForPresentComplete()
   1486 {
   1487   if (m_present_done.load(std::memory_order_acquire))
   1488     return;
   1489 
   1490   std::unique_lock<std::mutex> lock(m_present_mutex);
   1491   WaitForPresentComplete(lock);
   1492 }
   1493 
   1494 void VulkanDevice::WaitForPresentComplete(std::unique_lock<std::mutex>& lock)
   1495 {
   1496   if (m_present_done.load(std::memory_order_acquire))
   1497     return;
   1498 
   1499   m_present_done_cv.wait(lock, [this]() { return m_present_done.load(std::memory_order_acquire); });
   1500 }
   1501 
   1502 void VulkanDevice::PresentThread()
   1503 {
   1504   std::unique_lock<std::mutex> lock(m_present_mutex);
   1505   while (!m_present_thread_done.load(std::memory_order_acquire))
   1506   {
   1507     m_present_queued_cv.wait(lock, [this]() {
   1508       return !m_present_done.load(std::memory_order_acquire) || m_present_thread_done.load(std::memory_order_acquire);
   1509     });
   1510 
   1511     if (m_present_done.load(std::memory_order_acquire))
   1512       continue;
   1513 
   1514     DoSubmitCommandBuffer(m_queued_present.command_buffer_index, m_queued_present.swap_chain);
   1515     if (m_queued_present.swap_chain)
   1516       DoPresent(m_queued_present.swap_chain);
   1517     m_present_done.store(true, std::memory_order_release);
   1518     m_present_done_cv.notify_one();
   1519   }
   1520 }
   1521 
   1522 void VulkanDevice::StartPresentThread()
   1523 {
   1524   DebugAssert(!m_present_thread.joinable());
   1525   m_present_thread_done.store(false, std::memory_order_release);
   1526   m_present_thread = std::thread(&VulkanDevice::PresentThread, this);
   1527 }
   1528 
   1529 void VulkanDevice::StopPresentThread()
   1530 {
   1531   if (!m_present_thread.joinable())
   1532     return;
   1533 
   1534   {
   1535     std::unique_lock<std::mutex> lock(m_present_mutex);
   1536     WaitForPresentComplete(lock);
   1537     m_present_thread_done.store(true, std::memory_order_release);
   1538     m_present_queued_cv.notify_one();
   1539   }
   1540 
   1541   m_present_thread.join();
   1542 }
   1543 
   1544 void VulkanDevice::MoveToNextCommandBuffer()
   1545 {
   1546   BeginCommandBuffer((m_current_frame + 1) % NUM_COMMAND_BUFFERS);
   1547 }
   1548 
   1549 void VulkanDevice::BeginCommandBuffer(u32 index)
   1550 {
   1551   CommandBuffer& resources = m_frame_resources[index];
   1552 
   1553   // Wait for the GPU to finish with all resources for this command buffer.
   1554   if (resources.fence_counter > m_completed_fence_counter)
   1555     WaitForCommandBufferCompletion(index);
   1556 
   1557   // Reset fence to unsignaled before starting.
   1558   VkResult res = vkResetFences(m_device, 1, &resources.fence);
   1559   if (res != VK_SUCCESS)
   1560     LOG_VULKAN_ERROR(res, "vkResetFences failed: ");
   1561 
   1562   // Reset command pools to beginning since we can re-use the memory now
   1563   res = vkResetCommandPool(m_device, resources.command_pool, 0);
   1564   if (res != VK_SUCCESS)
   1565     LOG_VULKAN_ERROR(res, "vkResetCommandPool failed: ");
   1566 
   1567   // Enable commands to be recorded to the two buffers again.
   1568   VkCommandBufferBeginInfo begin_info = {VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO, nullptr,
   1569                                          VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT, nullptr};
   1570   res = vkBeginCommandBuffer(resources.command_buffers[1], &begin_info);
   1571   if (res != VK_SUCCESS)
   1572     LOG_VULKAN_ERROR(res, "vkBeginCommandBuffer failed: ");
   1573 
   1574   // Also can do the same for the descriptor pools
   1575   if (resources.descriptor_pool != VK_NULL_HANDLE)
   1576   {
   1577     res = vkResetDescriptorPool(m_device, resources.descriptor_pool, 0);
   1578     if (res != VK_SUCCESS)
   1579       LOG_VULKAN_ERROR(res, "vkResetDescriptorPool failed: ");
   1580   }
   1581 
   1582   if (m_gpu_timing_enabled)
   1583   {
   1584     vkCmdResetQueryPool(resources.command_buffers[1], m_timestamp_query_pool, index * 2, 2);
   1585     vkCmdWriteTimestamp(resources.command_buffers[1], VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, m_timestamp_query_pool,
   1586                         index * 2);
   1587   }
   1588 
   1589   resources.fence_counter = m_next_fence_counter++;
   1590   resources.init_buffer_used = false;
   1591   resources.timestamp_written = m_gpu_timing_enabled;
   1592 
   1593   m_current_frame = index;
   1594   m_current_command_buffer = resources.command_buffers[1];
   1595 
   1596   // using the lower 32 bits of the fence index should be sufficient here, I hope...
   1597   vmaSetCurrentFrameIndex(m_allocator, static_cast<u32>(m_next_fence_counter));
   1598 }
   1599 
   1600 void VulkanDevice::SubmitCommandBuffer(bool wait_for_completion)
   1601 {
   1602   DebugAssert(!InRenderPass());
   1603 
   1604   const u32 current_frame = m_current_frame;
   1605   EndAndSubmitCommandBuffer(nullptr, false, false);
   1606   MoveToNextCommandBuffer();
   1607 
   1608   if (wait_for_completion)
   1609     WaitForCommandBufferCompletion(current_frame);
   1610 
   1611   InvalidateCachedState();
   1612 }
   1613 
   1614 void VulkanDevice::SubmitCommandBuffer(bool wait_for_completion, const std::string_view reason)
   1615 {
   1616   WARNING_LOG("Executing command buffer due to '{}'", reason);
   1617   SubmitCommandBuffer(wait_for_completion);
   1618 }
   1619 
   1620 void VulkanDevice::SubmitCommandBufferAndRestartRenderPass(const std::string_view reason)
   1621 {
   1622   if (InRenderPass())
   1623     EndRenderPass();
   1624 
   1625   VulkanPipeline* pl = m_current_pipeline;
   1626   SubmitCommandBuffer(false, reason);
   1627 
   1628   SetPipeline(pl);
   1629   BeginRenderPass();
   1630 }
   1631 
   1632 bool VulkanDevice::CheckLastSubmitFail()
   1633 {
   1634   return m_last_submit_failed.load(std::memory_order_acquire);
   1635 }
   1636 
   1637 void VulkanDevice::DeferBufferDestruction(VkBuffer object, VmaAllocation allocation)
   1638 {
   1639   m_cleanup_objects.emplace_back(GetCurrentFenceCounter(),
   1640                                  [this, object, allocation]() { vmaDestroyBuffer(m_allocator, object, allocation); });
   1641 }
   1642 
   1643 void VulkanDevice::DeferBufferDestruction(VkBuffer object, VkDeviceMemory memory)
   1644 {
   1645   m_cleanup_objects.emplace_back(GetCurrentFenceCounter(), [this, object, memory]() {
   1646     vkDestroyBuffer(m_device, object, nullptr);
   1647     vkFreeMemory(m_device, memory, nullptr);
   1648   });
   1649 }
   1650 
   1651 void VulkanDevice::DeferFramebufferDestruction(VkFramebuffer object)
   1652 {
   1653   m_cleanup_objects.emplace_back(GetCurrentFenceCounter(),
   1654                                  [this, object]() { vkDestroyFramebuffer(m_device, object, nullptr); });
   1655 }
   1656 
   1657 void VulkanDevice::DeferImageDestruction(VkImage object, VmaAllocation allocation)
   1658 {
   1659   m_cleanup_objects.emplace_back(GetCurrentFenceCounter(),
   1660                                  [this, object, allocation]() { vmaDestroyImage(m_allocator, object, allocation); });
   1661 }
   1662 
   1663 void VulkanDevice::DeferImageViewDestruction(VkImageView object)
   1664 {
   1665   m_cleanup_objects.emplace_back(GetCurrentFenceCounter(),
   1666                                  [this, object]() { vkDestroyImageView(m_device, object, nullptr); });
   1667 }
   1668 
   1669 void VulkanDevice::DeferPipelineDestruction(VkPipeline object)
   1670 {
   1671   m_cleanup_objects.emplace_back(GetCurrentFenceCounter(),
   1672                                  [this, object]() { vkDestroyPipeline(m_device, object, nullptr); });
   1673 }
   1674 
   1675 void VulkanDevice::DeferBufferViewDestruction(VkBufferView object)
   1676 {
   1677   m_cleanup_objects.emplace_back(GetCurrentFenceCounter(),
   1678                                  [this, object]() { vkDestroyBufferView(m_device, object, nullptr); });
   1679 }
   1680 
   1681 void VulkanDevice::DeferPersistentDescriptorSetDestruction(VkDescriptorSet object)
   1682 {
   1683   m_cleanup_objects.emplace_back(GetCurrentFenceCounter(), [this, object]() { FreePersistentDescriptorSet(object); });
   1684 }
   1685 
   1686 VKAPI_ATTR VkBool32 VKAPI_CALL DebugMessengerCallback(VkDebugUtilsMessageSeverityFlagBitsEXT severity,
   1687                                                       VkDebugUtilsMessageTypeFlagsEXT messageType,
   1688                                                       const VkDebugUtilsMessengerCallbackDataEXT* pCallbackData,
   1689                                                       void* pUserData)
   1690 {
   1691   if (severity & VK_DEBUG_UTILS_MESSAGE_SEVERITY_ERROR_BIT_EXT)
   1692   {
   1693     ERROR_LOG("Vulkan debug report: ({}) {}", pCallbackData->pMessageIdName ? pCallbackData->pMessageIdName : "",
   1694               pCallbackData->pMessage);
   1695   }
   1696   else if (severity & (VK_DEBUG_UTILS_MESSAGE_SEVERITY_WARNING_BIT_EXT))
   1697   {
   1698     WARNING_LOG("Vulkan debug report: ({}) {}", pCallbackData->pMessageIdName ? pCallbackData->pMessageIdName : "",
   1699                 pCallbackData->pMessage);
   1700   }
   1701   else if (severity & VK_DEBUG_UTILS_MESSAGE_SEVERITY_INFO_BIT_EXT)
   1702   {
   1703     INFO_LOG("Vulkan debug report: ({}) {}", pCallbackData->pMessageIdName ? pCallbackData->pMessageIdName : "",
   1704              pCallbackData->pMessage);
   1705   }
   1706   else
   1707   {
   1708     DEV_LOG("Vulkan debug report: ({}) {}", pCallbackData->pMessageIdName ? pCallbackData->pMessageIdName : "",
   1709             pCallbackData->pMessage);
   1710   }
   1711 
   1712   return VK_FALSE;
   1713 }
   1714 
   1715 bool VulkanDevice::EnableDebugUtils()
   1716 {
   1717   // Already enabled?
   1718   if (m_debug_messenger_callback != VK_NULL_HANDLE)
   1719     return true;
   1720 
   1721   // Check for presence of the functions before calling
   1722   if (!vkCreateDebugUtilsMessengerEXT || !vkDestroyDebugUtilsMessengerEXT || !vkSubmitDebugUtilsMessageEXT)
   1723   {
   1724     return false;
   1725   }
   1726 
   1727   VkDebugUtilsMessengerCreateInfoEXT messenger_info = {
   1728     VK_STRUCTURE_TYPE_DEBUG_UTILS_MESSENGER_CREATE_INFO_EXT,
   1729     nullptr,
   1730     0,
   1731     VK_DEBUG_UTILS_MESSAGE_SEVERITY_ERROR_BIT_EXT | VK_DEBUG_UTILS_MESSAGE_SEVERITY_WARNING_BIT_EXT |
   1732       VK_DEBUG_UTILS_MESSAGE_SEVERITY_INFO_BIT_EXT,
   1733     VK_DEBUG_UTILS_MESSAGE_TYPE_GENERAL_BIT_EXT | VK_DEBUG_UTILS_MESSAGE_TYPE_PERFORMANCE_BIT_EXT |
   1734       VK_DEBUG_UTILS_MESSAGE_TYPE_VALIDATION_BIT_EXT,
   1735     DebugMessengerCallback,
   1736     nullptr};
   1737 
   1738   const VkResult res =
   1739     vkCreateDebugUtilsMessengerEXT(m_instance, &messenger_info, nullptr, &m_debug_messenger_callback);
   1740   if (res != VK_SUCCESS)
   1741   {
   1742     LOG_VULKAN_ERROR(res, "vkCreateDebugUtilsMessengerEXT failed: ");
   1743     return false;
   1744   }
   1745 
   1746   return true;
   1747 }
   1748 
   1749 void VulkanDevice::DisableDebugUtils()
   1750 {
   1751   if (m_debug_messenger_callback != VK_NULL_HANDLE)
   1752   {
   1753     vkDestroyDebugUtilsMessengerEXT(m_instance, m_debug_messenger_callback, nullptr);
   1754     m_debug_messenger_callback = VK_NULL_HANDLE;
   1755   }
   1756 }
   1757 
   1758 bool VulkanDevice::IsDeviceNVIDIA() const
   1759 {
   1760   return (m_device_properties.vendorID == 0x10DE);
   1761 }
   1762 
   1763 bool VulkanDevice::IsDeviceAMD() const
   1764 {
   1765   return (m_device_properties.vendorID == 0x1002);
   1766 }
   1767 
   1768 bool VulkanDevice::IsDeviceAdreno() const
   1769 {
   1770   // Assume turnip is fine...
   1771   return ((m_device_properties.vendorID == 0x5143 ||
   1772            m_device_driver_properties.driverID == VK_DRIVER_ID_QUALCOMM_PROPRIETARY) &&
   1773           m_device_driver_properties.driverID != VK_DRIVER_ID_MESA_TURNIP);
   1774 }
   1775 
   1776 bool VulkanDevice::IsDeviceMali() const
   1777 {
   1778   return (m_device_properties.vendorID == 0x13B5 ||
   1779           m_device_driver_properties.driverID == VK_DRIVER_ID_ARM_PROPRIETARY);
   1780 }
   1781 
   1782 bool VulkanDevice::IsDeviceImgTec() const
   1783 {
   1784   return (m_device_properties.vendorID == 0x1010 ||
   1785           m_device_driver_properties.driverID == VK_DRIVER_ID_IMAGINATION_PROPRIETARY);
   1786 }
   1787 
   1788 bool VulkanDevice::IsBrokenMobileDriver() const
   1789 {
   1790   return (IsDeviceAdreno() || IsDeviceMali() || IsDeviceImgTec());
   1791 }
   1792 
   1793 VkRenderPass VulkanDevice::CreateCachedRenderPass(RenderPassCacheKey key)
   1794 {
   1795   std::array<VkAttachmentReference, MAX_RENDER_TARGETS> color_references;
   1796   VkAttachmentReference* color_reference_ptr = nullptr;
   1797   VkAttachmentReference depth_reference;
   1798   VkAttachmentReference* depth_reference_ptr = nullptr;
   1799   VkAttachmentReference input_reference;
   1800   VkAttachmentReference* input_reference_ptr = nullptr;
   1801   VkSubpassDependency subpass_dependency;
   1802   VkSubpassDependency* subpass_dependency_ptr = nullptr;
   1803   std::array<VkAttachmentDescription, MAX_RENDER_TARGETS + 1> attachments;
   1804   u32 num_attachments = 0;
   1805 
   1806   for (u32 i = 0; i < MAX_RENDER_TARGETS; i++)
   1807   {
   1808     if (key.color[i].format == static_cast<u8>(GPUTexture::Format::Unknown))
   1809       break;
   1810 
   1811     const VkImageLayout layout =
   1812       (key.feedback_loop & GPUPipeline::ColorFeedbackLoop) ?
   1813         (m_optional_extensions.vk_khr_dynamic_rendering_local_read ? VK_IMAGE_LAYOUT_RENDERING_LOCAL_READ_KHR :
   1814                                                                      VK_IMAGE_LAYOUT_GENERAL) :
   1815         VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL;
   1816 
   1817     const RenderPassCacheKey::RenderTarget key_rt = key.color[i];
   1818     attachments[num_attachments] = {i,
   1819                                     TEXTURE_FORMAT_MAPPING[key_rt.format],
   1820                                     static_cast<VkSampleCountFlagBits>(key.samples),
   1821                                     static_cast<VkAttachmentLoadOp>(key_rt.load_op),
   1822                                     static_cast<VkAttachmentStoreOp>(key_rt.store_op),
   1823                                     VK_ATTACHMENT_LOAD_OP_DONT_CARE,
   1824                                     VK_ATTACHMENT_STORE_OP_DONT_CARE,
   1825                                     layout,
   1826                                     layout};
   1827     color_references[num_attachments].attachment = num_attachments;
   1828     color_references[num_attachments].layout = layout;
   1829     color_reference_ptr = color_references.data();
   1830 
   1831     if (key.feedback_loop & GPUPipeline::ColorFeedbackLoop)
   1832     {
   1833       DebugAssert(i == 0);
   1834       input_reference.attachment = num_attachments;
   1835       input_reference.layout = layout;
   1836       input_reference_ptr = &input_reference;
   1837 
   1838       if (!m_optional_extensions.vk_ext_rasterization_order_attachment_access)
   1839       {
   1840         // don't need the framebuffer-local dependency when we have rasterization order attachment access
   1841         subpass_dependency.srcSubpass = 0;
   1842         subpass_dependency.dstSubpass = 0;
   1843         subpass_dependency.srcStageMask = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT;
   1844         subpass_dependency.dstStageMask = VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT;
   1845         subpass_dependency.srcAccessMask = VK_ACCESS_COLOR_ATTACHMENT_READ_BIT | VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT;
   1846         subpass_dependency.dstAccessMask = VK_ACCESS_INPUT_ATTACHMENT_READ_BIT;
   1847         subpass_dependency.dependencyFlags = VK_DEPENDENCY_BY_REGION_BIT;
   1848         subpass_dependency_ptr = &subpass_dependency;
   1849       }
   1850     }
   1851 
   1852     num_attachments++;
   1853   }
   1854 
   1855   const u32 num_rts = num_attachments;
   1856 
   1857   if (key.depth_format != static_cast<u8>(GPUTexture::Format::Unknown))
   1858   {
   1859     const VkImageLayout layout = (key.feedback_loop & GPUPipeline::SampleDepthBuffer) ?
   1860                                    VK_IMAGE_LAYOUT_GENERAL :
   1861                                    VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL;
   1862     attachments[num_attachments] = {0,
   1863                                     static_cast<VkFormat>(TEXTURE_FORMAT_MAPPING[key.depth_format]),
   1864                                     static_cast<VkSampleCountFlagBits>(key.samples),
   1865                                     static_cast<VkAttachmentLoadOp>(key.depth_load_op),
   1866                                     static_cast<VkAttachmentStoreOp>(key.depth_store_op),
   1867                                     static_cast<VkAttachmentLoadOp>(key.stencil_load_op),
   1868                                     static_cast<VkAttachmentStoreOp>(key.stencil_store_op),
   1869                                     layout,
   1870                                     layout};
   1871     depth_reference.attachment = num_attachments;
   1872     depth_reference.layout = layout;
   1873     depth_reference_ptr = &depth_reference;
   1874     num_attachments++;
   1875   }
   1876 
   1877   const VkSubpassDescriptionFlags subpass_flags =
   1878     ((key.feedback_loop & GPUPipeline::ColorFeedbackLoop) &&
   1879      m_optional_extensions.vk_ext_rasterization_order_attachment_access) ?
   1880       VK_SUBPASS_DESCRIPTION_RASTERIZATION_ORDER_ATTACHMENT_COLOR_ACCESS_BIT_EXT :
   1881       0;
   1882   const VkSubpassDescription subpass = {subpass_flags,
   1883                                         VK_PIPELINE_BIND_POINT_GRAPHICS,
   1884                                         input_reference_ptr ? num_rts : 0u,
   1885                                         input_reference_ptr,
   1886                                         num_rts,
   1887                                         color_reference_ptr,
   1888                                         nullptr,
   1889                                         depth_reference_ptr,
   1890                                         0,
   1891                                         nullptr};
   1892   const VkRenderPassCreateInfo pass_info = {VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO,
   1893                                             nullptr,
   1894                                             0u,
   1895                                             num_attachments,
   1896                                             attachments.data(),
   1897                                             1u,
   1898                                             &subpass,
   1899                                             subpass_dependency_ptr ? 1u : 0u,
   1900                                             subpass_dependency_ptr};
   1901 
   1902   VkRenderPass pass;
   1903   const VkResult res = vkCreateRenderPass(m_device, &pass_info, nullptr, &pass);
   1904   if (res != VK_SUCCESS)
   1905   {
   1906     LOG_VULKAN_ERROR(res, "vkCreateRenderPass failed: ");
   1907     return VK_NULL_HANDLE;
   1908   }
   1909 
   1910   m_render_pass_cache.emplace(key, pass);
   1911   return pass;
   1912 }
   1913 
   1914 VkFramebuffer VulkanDevice::CreateFramebuffer(GPUTexture* const* rts, u32 num_rts, GPUTexture* ds, u32 flags)
   1915 {
   1916   VulkanDevice& dev = VulkanDevice::GetInstance();
   1917   VkRenderPass render_pass =
   1918     dev.GetRenderPass(reinterpret_cast<VulkanTexture* const*>(rts), num_rts, static_cast<VulkanTexture*>(ds),
   1919                       static_cast<GPUPipeline::RenderPassFlag>(flags));
   1920 
   1921   const GPUTexture* rt_or_ds = (num_rts > 0) ? rts[0] : ds;
   1922   DebugAssert(rt_or_ds);
   1923 
   1924   Vulkan::FramebufferBuilder fbb;
   1925   fbb.SetRenderPass(render_pass);
   1926   fbb.SetSize(rt_or_ds->GetWidth(), rt_or_ds->GetHeight(), 1);
   1927   for (u32 i = 0; i < num_rts; i++)
   1928     fbb.AddAttachment(static_cast<VulkanTexture*>(rts[i])->GetView());
   1929   if (ds)
   1930     fbb.AddAttachment(static_cast<VulkanTexture*>(ds)->GetView());
   1931 
   1932   return fbb.Create(dev.m_device, false);
   1933 }
   1934 
   1935 void VulkanDevice::DestroyFramebuffer(VkFramebuffer fbo)
   1936 {
   1937   if (fbo == VK_NULL_HANDLE)
   1938     return;
   1939 
   1940   VulkanDevice::GetInstance().DeferFramebufferDestruction(fbo);
   1941 }
   1942 
   1943 bool VulkanDevice::IsSuitableDefaultRenderer()
   1944 {
   1945 #ifdef __ANDROID__
   1946   // No way in hell.
   1947   return false;
   1948 #else
   1949   GPUList gpus = EnumerateGPUs();
   1950   if (gpus.empty())
   1951   {
   1952     // No adapters, not gonna be able to use VK.
   1953     return false;
   1954   }
   1955 
   1956   // Check the first GPU, should be enough.
   1957   const std::string& name = gpus.front().second.name;
   1958   INFO_LOG("Using Vulkan GPU '{}' for automatic renderer check.", name);
   1959 
   1960   // Any software rendering (LLVMpipe, SwiftShader).
   1961   if (StringUtil::StartsWithNoCase(name, "llvmpipe") || StringUtil::StartsWithNoCase(name, "SwiftShader"))
   1962   {
   1963     INFO_LOG("Not using Vulkan for software renderer.");
   1964     return false;
   1965   }
   1966 
   1967   // For Intel, OpenGL usually ends up faster on Linux, because of fbfetch.
   1968   // Plus, the Ivy Bridge and Haswell drivers are incomplete.
   1969   if (StringUtil::StartsWithNoCase(name, "Intel"))
   1970   {
   1971     INFO_LOG("Not using Vulkan for Intel GPU.");
   1972     return false;
   1973   }
   1974 
   1975   INFO_LOG("Allowing Vulkan as default renderer.");
   1976   return true;
   1977 #endif
   1978 }
   1979 
   1980 RenderAPI VulkanDevice::GetRenderAPI() const
   1981 {
   1982   return RenderAPI::Vulkan;
   1983 }
   1984 
   1985 bool VulkanDevice::HasSurface() const
   1986 {
   1987   return static_cast<bool>(m_swap_chain);
   1988 }
   1989 
   1990 bool VulkanDevice::CreateDevice(std::string_view adapter, bool threaded_presentation,
   1991                                 std::optional<bool> exclusive_fullscreen_control, FeatureMask disabled_features,
   1992                                 Error* error)
   1993 {
   1994   std::unique_lock lock(s_instance_mutex);
   1995   bool enable_debug_utils = m_debug_device;
   1996   bool enable_validation_layer = m_debug_device;
   1997 
   1998   if (!Vulkan::LoadVulkanLibrary(error))
   1999   {
   2000     Error::AddPrefix(error,
   2001                      "Failed to load Vulkan library. Does your GPU and/or driver support Vulkan?\nThe error was:");
   2002     return false;
   2003   }
   2004 
   2005   m_instance = CreateVulkanInstance(m_window_info, &m_optional_extensions, enable_debug_utils, enable_validation_layer);
   2006   if (m_instance == VK_NULL_HANDLE)
   2007   {
   2008     if (enable_debug_utils || enable_validation_layer)
   2009     {
   2010       // Try again without the validation layer.
   2011       enable_debug_utils = false;
   2012       enable_validation_layer = false;
   2013       m_instance =
   2014         CreateVulkanInstance(m_window_info, &m_optional_extensions, enable_debug_utils, enable_validation_layer);
   2015       if (m_instance == VK_NULL_HANDLE)
   2016       {
   2017         Error::SetStringView(error, "Failed to create Vulkan instance. Does your GPU and/or driver support Vulkan?");
   2018         return false;
   2019       }
   2020 
   2021       ERROR_LOG("Vulkan validation/debug layers requested but are unavailable. Creating non-debug device.");
   2022     }
   2023   }
   2024 
   2025   if (!Vulkan::LoadVulkanInstanceFunctions(m_instance))
   2026   {
   2027     ERROR_LOG("Failed to load Vulkan instance functions");
   2028     Error::SetStringView(error, "Failed to load Vulkan instance functions");
   2029     return false;
   2030   }
   2031 
   2032   GPUList gpus = EnumerateGPUs(m_instance);
   2033   if (gpus.empty())
   2034   {
   2035     Error::SetStringView(error, "No physical devices found. Does your GPU and/or driver support Vulkan?");
   2036     return false;
   2037   }
   2038 
   2039   if (!adapter.empty())
   2040   {
   2041     u32 gpu_index = 0;
   2042     for (; gpu_index < static_cast<u32>(gpus.size()); gpu_index++)
   2043     {
   2044       INFO_LOG("GPU {}: {}", gpu_index, gpus[gpu_index].second.name);
   2045       if (gpus[gpu_index].second.name == adapter)
   2046       {
   2047         m_physical_device = gpus[gpu_index].first;
   2048         break;
   2049       }
   2050     }
   2051 
   2052     if (gpu_index == static_cast<u32>(gpus.size()))
   2053     {
   2054       WARNING_LOG("Requested GPU '{}' not found, using first ({})", adapter, gpus[0].second.name);
   2055       m_physical_device = gpus[0].first;
   2056     }
   2057   }
   2058   else
   2059   {
   2060     INFO_LOG("No GPU requested, using first ({})", gpus[0].second.name);
   2061     m_physical_device = gpus[0].first;
   2062   }
   2063 
   2064   // Read device physical memory properties, we need it for allocating buffers
   2065   vkGetPhysicalDeviceProperties(m_physical_device, &m_device_properties);
   2066   m_device_properties.limits.minUniformBufferOffsetAlignment =
   2067     std::max(m_device_properties.limits.minUniformBufferOffsetAlignment, static_cast<VkDeviceSize>(1));
   2068   m_device_properties.limits.minTexelBufferOffsetAlignment =
   2069     std::max(m_device_properties.limits.minTexelBufferOffsetAlignment, static_cast<VkDeviceSize>(1));
   2070   m_device_properties.limits.optimalBufferCopyOffsetAlignment =
   2071     std::max(m_device_properties.limits.optimalBufferCopyOffsetAlignment, static_cast<VkDeviceSize>(1));
   2072   m_device_properties.limits.optimalBufferCopyRowPitchAlignment =
   2073     std::max(m_device_properties.limits.optimalBufferCopyRowPitchAlignment, static_cast<VkDeviceSize>(1));
   2074   m_device_properties.limits.bufferImageGranularity =
   2075     std::max(m_device_properties.limits.bufferImageGranularity, static_cast<VkDeviceSize>(1));
   2076 
   2077   if (enable_debug_utils)
   2078     EnableDebugUtils();
   2079 
   2080   VkSurfaceKHR surface = VK_NULL_HANDLE;
   2081   ScopedGuard surface_cleanup = [this, &surface]() {
   2082     if (surface != VK_NULL_HANDLE)
   2083       vkDestroySurfaceKHR(m_instance, surface, nullptr);
   2084   };
   2085   if (m_window_info.type != WindowInfo::Type::Surfaceless)
   2086   {
   2087     surface = VulkanSwapChain::CreateVulkanSurface(m_instance, m_physical_device, &m_window_info);
   2088     if (surface == VK_NULL_HANDLE)
   2089       return false;
   2090   }
   2091 
   2092   // Attempt to create the device.
   2093   if (!CreateDevice(surface, enable_validation_layer, disabled_features, error))
   2094     return false;
   2095 
   2096   // And critical resources.
   2097   if (!CreateAllocator() || !CreatePersistentDescriptorPool() || !CreateCommandBuffers() || !CreatePipelineLayouts())
   2098     return false;
   2099 
   2100   if (threaded_presentation)
   2101     StartPresentThread();
   2102 
   2103   m_exclusive_fullscreen_control = exclusive_fullscreen_control;
   2104 
   2105   if (surface != VK_NULL_HANDLE)
   2106   {
   2107     VkPresentModeKHR present_mode;
   2108     if (!VulkanSwapChain::SelectPresentMode(surface, &m_vsync_mode, &present_mode) ||
   2109         !(m_swap_chain = VulkanSwapChain::Create(m_window_info, surface, present_mode, m_exclusive_fullscreen_control)))
   2110     {
   2111       Error::SetStringView(error, "Failed to create swap chain");
   2112       return false;
   2113     }
   2114 
   2115     // NOTE: This is assigned afterwards, because some platforms can modify the window info (e.g. Metal).
   2116     m_window_info = m_swap_chain->GetWindowInfo();
   2117   }
   2118 
   2119   surface_cleanup.Cancel();
   2120 
   2121   // Render a frame as soon as possible to clear out whatever was previously being displayed.
   2122   if (m_window_info.type != WindowInfo::Type::Surfaceless)
   2123     RenderBlankFrame();
   2124 
   2125   if (!CreateNullTexture())
   2126   {
   2127     Error::SetStringView(error, "Failed to create dummy texture");
   2128     return false;
   2129   }
   2130 
   2131   if (!CreateBuffers() || !CreatePersistentDescriptorSets())
   2132   {
   2133     Error::SetStringView(error, "Failed to create buffers/descriptor sets");
   2134     return false;
   2135   }
   2136 
   2137   return true;
   2138 }
   2139 
   2140 void VulkanDevice::DestroyDevice()
   2141 {
   2142   std::unique_lock lock(s_instance_mutex);
   2143 
   2144   if (InRenderPass())
   2145     EndRenderPass();
   2146 
   2147   // Don't both submitting the current command buffer, just toss it.
   2148   if (m_device != VK_NULL_HANDLE)
   2149     WaitForGPUIdle();
   2150 
   2151   StopPresentThread();
   2152   m_swap_chain.reset();
   2153 
   2154   if (m_null_texture)
   2155   {
   2156     m_null_texture->Destroy(false);
   2157     m_null_texture.reset();
   2158   }
   2159   for (auto& it : m_cleanup_objects)
   2160     it.second();
   2161   m_cleanup_objects.clear();
   2162   DestroyPersistentDescriptorSets();
   2163   DestroyBuffers();
   2164   DestroySamplers();
   2165 
   2166   DestroyPersistentDescriptorPool();
   2167   DestroyPipelineLayouts();
   2168   DestroyCommandBuffers();
   2169   DestroyAllocator();
   2170 
   2171   for (auto& it : m_render_pass_cache)
   2172     vkDestroyRenderPass(m_device, it.second, nullptr);
   2173   m_render_pass_cache.clear();
   2174 
   2175   if (m_pipeline_cache != VK_NULL_HANDLE)
   2176   {
   2177     vkDestroyPipelineCache(m_device, m_pipeline_cache, nullptr);
   2178     m_pipeline_cache = VK_NULL_HANDLE;
   2179   }
   2180 
   2181   if (m_device != VK_NULL_HANDLE)
   2182   {
   2183     vkDestroyDevice(m_device, nullptr);
   2184     m_device = VK_NULL_HANDLE;
   2185   }
   2186 
   2187   if (m_debug_messenger_callback != VK_NULL_HANDLE)
   2188     DisableDebugUtils();
   2189 
   2190   if (m_instance != VK_NULL_HANDLE)
   2191   {
   2192     vkDestroyInstance(m_instance, nullptr);
   2193     m_instance = VK_NULL_HANDLE;
   2194   }
   2195 
   2196   Vulkan::UnloadVulkanLibrary();
   2197 }
   2198 
   2199 bool VulkanDevice::ValidatePipelineCacheHeader(const VK_PIPELINE_CACHE_HEADER& header)
   2200 {
   2201   if (header.header_length < sizeof(VK_PIPELINE_CACHE_HEADER))
   2202   {
   2203     ERROR_LOG("Pipeline cache failed validation: Invalid header length");
   2204     return false;
   2205   }
   2206 
   2207   if (header.header_version != VK_PIPELINE_CACHE_HEADER_VERSION_ONE)
   2208   {
   2209     ERROR_LOG("Pipeline cache failed validation: Invalid header version");
   2210     return false;
   2211   }
   2212 
   2213   if (header.vendor_id != m_device_properties.vendorID)
   2214   {
   2215     ERROR_LOG("Pipeline cache failed validation: Incorrect vendor ID (file: 0x{:X}, device: 0x{:X})", header.vendor_id,
   2216               m_device_properties.vendorID);
   2217     return false;
   2218   }
   2219 
   2220   if (header.device_id != m_device_properties.deviceID)
   2221   {
   2222     ERROR_LOG("Pipeline cache failed validation: Incorrect device ID (file: 0x{:X}, device: 0x{:X})", header.device_id,
   2223               m_device_properties.deviceID);
   2224     return false;
   2225   }
   2226 
   2227   if (std::memcmp(header.uuid, m_device_properties.pipelineCacheUUID, VK_UUID_SIZE) != 0)
   2228   {
   2229     ERROR_LOG("Pipeline cache failed validation: Incorrect UUID");
   2230     return false;
   2231   }
   2232 
   2233   return true;
   2234 }
   2235 
   2236 void VulkanDevice::FillPipelineCacheHeader(VK_PIPELINE_CACHE_HEADER* header)
   2237 {
   2238   header->header_length = sizeof(VK_PIPELINE_CACHE_HEADER);
   2239   header->header_version = VK_PIPELINE_CACHE_HEADER_VERSION_ONE;
   2240   header->vendor_id = m_device_properties.vendorID;
   2241   header->device_id = m_device_properties.deviceID;
   2242   std::memcpy(header->uuid, m_device_properties.pipelineCacheUUID, VK_UUID_SIZE);
   2243 }
   2244 
   2245 bool VulkanDevice::ReadPipelineCache(std::optional<DynamicHeapArray<u8>> data)
   2246 {
   2247   if (data.has_value())
   2248   {
   2249     if (data->size() < sizeof(VK_PIPELINE_CACHE_HEADER))
   2250     {
   2251       ERROR_LOG("Pipeline cache is too small, ignoring.");
   2252       data.reset();
   2253     }
   2254 
   2255     VK_PIPELINE_CACHE_HEADER header;
   2256     std::memcpy(&header, data->data(), sizeof(header));
   2257     if (!ValidatePipelineCacheHeader(header))
   2258       data.reset();
   2259   }
   2260 
   2261   const VkPipelineCacheCreateInfo ci{VK_STRUCTURE_TYPE_PIPELINE_CACHE_CREATE_INFO, nullptr, 0,
   2262                                      data.has_value() ? data->size() : 0, data.has_value() ? data->data() : nullptr};
   2263   VkResult res = vkCreatePipelineCache(m_device, &ci, nullptr, &m_pipeline_cache);
   2264   if (res != VK_SUCCESS)
   2265   {
   2266     LOG_VULKAN_ERROR(res, "vkCreatePipelineCache() failed: ");
   2267     return false;
   2268   }
   2269 
   2270   return true;
   2271 }
   2272 
   2273 bool VulkanDevice::GetPipelineCacheData(DynamicHeapArray<u8>* data)
   2274 {
   2275   if (m_pipeline_cache == VK_NULL_HANDLE)
   2276     return false;
   2277 
   2278   size_t data_size;
   2279   VkResult res = vkGetPipelineCacheData(m_device, m_pipeline_cache, &data_size, nullptr);
   2280   if (res != VK_SUCCESS)
   2281   {
   2282     LOG_VULKAN_ERROR(res, "vkGetPipelineCacheData() failed: ");
   2283     return false;
   2284   }
   2285 
   2286   data->resize(data_size);
   2287   res = vkGetPipelineCacheData(m_device, m_pipeline_cache, &data_size, data->data());
   2288   if (res != VK_SUCCESS)
   2289   {
   2290     LOG_VULKAN_ERROR(res, "vkGetPipelineCacheData() (2) failed: ");
   2291     return false;
   2292   }
   2293 
   2294   data->resize(data_size);
   2295   return true;
   2296 }
   2297 
   2298 bool VulkanDevice::UpdateWindow()
   2299 {
   2300   DestroySurface();
   2301 
   2302   if (!AcquireWindow(false))
   2303     return false;
   2304 
   2305   if (m_window_info.IsSurfaceless())
   2306     return true;
   2307 
   2308   // make sure previous frames are presented
   2309   if (InRenderPass())
   2310     EndRenderPass();
   2311   SubmitCommandBuffer(false);
   2312   WaitForGPUIdle();
   2313 
   2314   VkSurfaceKHR surface = VulkanSwapChain::CreateVulkanSurface(m_instance, m_physical_device, &m_window_info);
   2315   if (surface == VK_NULL_HANDLE)
   2316   {
   2317     ERROR_LOG("Failed to create new surface for swap chain");
   2318     return false;
   2319   }
   2320 
   2321   VkPresentModeKHR present_mode;
   2322   if (!VulkanSwapChain::SelectPresentMode(surface, &m_vsync_mode, &present_mode) ||
   2323       !(m_swap_chain = VulkanSwapChain::Create(m_window_info, surface, present_mode, m_exclusive_fullscreen_control)))
   2324   {
   2325     ERROR_LOG("Failed to create swap chain");
   2326     VulkanSwapChain::DestroyVulkanSurface(m_instance, &m_window_info, surface);
   2327     return false;
   2328   }
   2329 
   2330   m_window_info = m_swap_chain->GetWindowInfo();
   2331   RenderBlankFrame();
   2332   return true;
   2333 }
   2334 
   2335 void VulkanDevice::ResizeWindow(s32 new_window_width, s32 new_window_height, float new_window_scale)
   2336 {
   2337   if (!m_swap_chain)
   2338     return;
   2339 
   2340   if (m_swap_chain->GetWidth() == static_cast<u32>(new_window_width) &&
   2341       m_swap_chain->GetHeight() == static_cast<u32>(new_window_height))
   2342   {
   2343     // skip unnecessary resizes
   2344     m_window_info.surface_scale = new_window_scale;
   2345     return;
   2346   }
   2347 
   2348   // make sure previous frames are presented
   2349   WaitForGPUIdle();
   2350 
   2351   if (!m_swap_chain->ResizeSwapChain(new_window_width, new_window_height, new_window_scale))
   2352   {
   2353     // AcquireNextImage() will fail, and we'll recreate the surface.
   2354     ERROR_LOG("Failed to resize swap chain. Next present will fail.");
   2355     return;
   2356   }
   2357 
   2358   m_window_info = m_swap_chain->GetWindowInfo();
   2359 }
   2360 
   2361 void VulkanDevice::DestroySurface()
   2362 {
   2363   WaitForGPUIdle();
   2364   m_swap_chain.reset();
   2365 }
   2366 
   2367 bool VulkanDevice::SupportsTextureFormat(GPUTexture::Format format) const
   2368 {
   2369   return (TEXTURE_FORMAT_MAPPING[static_cast<u8>(format)] != VK_FORMAT_UNDEFINED);
   2370 }
   2371 
   2372 std::string VulkanDevice::GetDriverInfo() const
   2373 {
   2374   std::string ret;
   2375   const u32 api_version = m_device_properties.apiVersion;
   2376   const u32 driver_version = m_device_properties.driverVersion;
   2377   if (m_optional_extensions.vk_khr_driver_properties)
   2378   {
   2379     const VkPhysicalDeviceDriverProperties& props = m_device_driver_properties;
   2380     ret = fmt::format(
   2381       "Driver {}.{}.{}\nVulkan {}.{}.{}\nConformance Version {}.{}.{}.{}\n{}\n{}\n{}", VK_VERSION_MAJOR(driver_version),
   2382       VK_VERSION_MINOR(driver_version), VK_VERSION_PATCH(driver_version), VK_API_VERSION_MAJOR(api_version),
   2383       VK_API_VERSION_MINOR(api_version), VK_API_VERSION_PATCH(api_version), props.conformanceVersion.major,
   2384       props.conformanceVersion.minor, props.conformanceVersion.subminor, props.conformanceVersion.patch,
   2385       props.driverInfo, props.driverName, m_device_properties.deviceName);
   2386   }
   2387   else
   2388   {
   2389     ret =
   2390       fmt::format("Driver {}.{}.{}\nVulkan {}.{}.{}\n{}", VK_VERSION_MAJOR(driver_version),
   2391                   VK_VERSION_MINOR(driver_version), VK_VERSION_PATCH(driver_version), VK_API_VERSION_MAJOR(api_version),
   2392                   VK_API_VERSION_MINOR(api_version), VK_API_VERSION_PATCH(api_version), m_device_properties.deviceName);
   2393   }
   2394 
   2395   return ret;
   2396 }
   2397 
   2398 void VulkanDevice::ExecuteAndWaitForGPUIdle()
   2399 {
   2400   if (InRenderPass())
   2401     EndRenderPass();
   2402 
   2403   SubmitCommandBuffer(true);
   2404 }
   2405 
   2406 void VulkanDevice::SetVSyncMode(GPUVSyncMode mode, bool allow_present_throttle)
   2407 {
   2408   m_allow_present_throttle = allow_present_throttle;
   2409   if (!m_swap_chain)
   2410   {
   2411     // For when it is re-created.
   2412     m_vsync_mode = mode;
   2413     return;
   2414   }
   2415 
   2416   VkPresentModeKHR present_mode;
   2417   if (!VulkanSwapChain::SelectPresentMode(m_swap_chain->GetSurface(), &mode, &present_mode))
   2418   {
   2419     ERROR_LOG("Ignoring vsync mode change.");
   2420     return;
   2421   }
   2422 
   2423   // Actually changed? If using a fallback, it might not have.
   2424   if (m_vsync_mode == mode)
   2425     return;
   2426 
   2427   m_vsync_mode = mode;
   2428 
   2429   // This swap chain should not be used by the current buffer, thus safe to destroy.
   2430   WaitForGPUIdle();
   2431   if (!m_swap_chain->SetPresentMode(present_mode))
   2432   {
   2433     Panic("Failed to update swap chain present mode.");
   2434     m_swap_chain.reset();
   2435   }
   2436 }
   2437 
   2438 bool VulkanDevice::BeginPresent(bool frame_skip, u32 clear_color)
   2439 {
   2440   if (InRenderPass())
   2441     EndRenderPass();
   2442 
   2443   if (frame_skip)
   2444     return false;
   2445 
   2446   // If we're running surfaceless, kick the command buffer so we don't run out of descriptors.
   2447   if (!m_swap_chain)
   2448   {
   2449     SubmitCommandBuffer(false);
   2450     TrimTexturePool();
   2451     return false;
   2452   }
   2453 
   2454   // Previous frame needs to be presented before we can acquire the swap chain.
   2455   WaitForPresentComplete();
   2456 
   2457   // Check if the device was lost.
   2458   if (CheckLastSubmitFail())
   2459   {
   2460     Panic("Fixme"); // TODO
   2461     TrimTexturePool();
   2462     return false;
   2463   }
   2464 
   2465   VkResult res = m_swap_chain->AcquireNextImage();
   2466   if (res != VK_SUCCESS)
   2467   {
   2468     LOG_VULKAN_ERROR(res, "vkAcquireNextImageKHR() failed: ");
   2469     m_swap_chain->ReleaseCurrentImage();
   2470 
   2471     if (res == VK_SUBOPTIMAL_KHR || res == VK_ERROR_OUT_OF_DATE_KHR)
   2472     {
   2473       ResizeWindow(0, 0, m_window_info.surface_scale);
   2474       res = m_swap_chain->AcquireNextImage();
   2475     }
   2476     else if (res == VK_ERROR_SURFACE_LOST_KHR)
   2477     {
   2478       WARNING_LOG("Surface lost, attempting to recreate");
   2479       if (!m_swap_chain->RecreateSurface(m_window_info))
   2480       {
   2481         ERROR_LOG("Failed to recreate surface after loss");
   2482         SubmitCommandBuffer(false);
   2483         TrimTexturePool();
   2484         return false;
   2485       }
   2486 
   2487       res = m_swap_chain->AcquireNextImage();
   2488     }
   2489 
   2490     // This can happen when multiple resize events happen in quick succession.
   2491     // In this case, just wait until the next frame to try again.
   2492     if (res != VK_SUCCESS && res != VK_SUBOPTIMAL_KHR)
   2493     {
   2494       // Still submit the command buffer, otherwise we'll end up with several frames waiting.
   2495       SubmitCommandBuffer(false);
   2496       TrimTexturePool();
   2497       return false;
   2498     }
   2499   }
   2500 
   2501   BeginSwapChainRenderPass(clear_color);
   2502   return true;
   2503 }
   2504 
   2505 void VulkanDevice::EndPresent(bool explicit_present)
   2506 {
   2507   DebugAssert(InRenderPass() && m_num_current_render_targets == 0 && !m_current_depth_target);
   2508   EndRenderPass();
   2509 
   2510   VkCommandBuffer cmdbuf = GetCurrentCommandBuffer();
   2511   VulkanTexture::TransitionSubresourcesToLayout(cmdbuf, m_swap_chain->GetCurrentImage(), GPUTexture::Type::RenderTarget,
   2512                                                 0, 1, 0, 1, VulkanTexture::Layout::ColorAttachment,
   2513                                                 VulkanTexture::Layout::PresentSrc);
   2514   EndAndSubmitCommandBuffer(m_swap_chain.get(), explicit_present, !m_swap_chain->IsPresentModeSynchronizing());
   2515   MoveToNextCommandBuffer();
   2516   InvalidateCachedState();
   2517   TrimTexturePool();
   2518 }
   2519 
   2520 void VulkanDevice::SubmitPresent()
   2521 {
   2522   DebugAssert(m_swap_chain);
   2523   DoPresent(m_swap_chain.get());
   2524 }
   2525 
   2526 #ifdef _DEBUG
   2527 static std::array<float, 3> Palette(float phase, const std::array<float, 3>& a, const std::array<float, 3>& b,
   2528                                     const std::array<float, 3>& c, const std::array<float, 3>& d)
   2529 {
   2530   std::array<float, 3> result;
   2531   result[0] = a[0] + b[0] * std::cos(6.28318f * (c[0] * phase + d[0]));
   2532   result[1] = a[1] + b[1] * std::cos(6.28318f * (c[1] * phase + d[1]));
   2533   result[2] = a[2] + b[2] * std::cos(6.28318f * (c[2] * phase + d[2]));
   2534   return result;
   2535 }
   2536 #endif
   2537 
   2538 void VulkanDevice::PushDebugGroup(const char* name)
   2539 {
   2540 #ifdef _DEBUG
   2541   if (!vkCmdBeginDebugUtilsLabelEXT || !m_debug_device)
   2542     return;
   2543 
   2544   const std::array<float, 3> color = Palette(static_cast<float>(++s_debug_scope_depth), {0.5f, 0.5f, 0.5f},
   2545                                              {0.5f, 0.5f, 0.5f}, {1.0f, 1.0f, 0.5f}, {0.8f, 0.90f, 0.30f});
   2546 
   2547   const VkDebugUtilsLabelEXT label = {
   2548     VK_STRUCTURE_TYPE_DEBUG_UTILS_LABEL_EXT,
   2549     nullptr,
   2550     name,
   2551     {color[0], color[1], color[2], 1.0f},
   2552   };
   2553   vkCmdBeginDebugUtilsLabelEXT(GetCurrentCommandBuffer(), &label);
   2554 #endif
   2555 }
   2556 
   2557 void VulkanDevice::PopDebugGroup()
   2558 {
   2559 #ifdef _DEBUG
   2560   if (!vkCmdEndDebugUtilsLabelEXT || !m_debug_device)
   2561     return;
   2562 
   2563   s_debug_scope_depth = (s_debug_scope_depth == 0) ? 0 : (s_debug_scope_depth - 1u);
   2564 
   2565   vkCmdEndDebugUtilsLabelEXT(GetCurrentCommandBuffer());
   2566 #endif
   2567 }
   2568 
   2569 void VulkanDevice::InsertDebugMessage(const char* msg)
   2570 {
   2571 #ifdef _DEBUG
   2572   if (!vkCmdInsertDebugUtilsLabelEXT || !m_debug_device)
   2573     return;
   2574 
   2575   const VkDebugUtilsLabelEXT label = {VK_STRUCTURE_TYPE_DEBUG_UTILS_LABEL_EXT, nullptr, msg, {0.0f, 0.0f, 0.0f, 1.0f}};
   2576   vkCmdInsertDebugUtilsLabelEXT(GetCurrentCommandBuffer(), &label);
   2577 #endif
   2578 }
   2579 
   2580 u32 VulkanDevice::GetMaxMultisamples(VkPhysicalDevice physical_device, const VkPhysicalDeviceProperties& properties)
   2581 {
   2582   VkImageFormatProperties color_properties = {};
   2583   vkGetPhysicalDeviceImageFormatProperties(physical_device, VK_FORMAT_R8G8B8A8_UNORM, VK_IMAGE_TYPE_2D,
   2584                                            VK_IMAGE_TILING_OPTIMAL, VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT, 0,
   2585                                            &color_properties);
   2586   VkImageFormatProperties depth_properties = {};
   2587   vkGetPhysicalDeviceImageFormatProperties(physical_device, VK_FORMAT_D32_SFLOAT, VK_IMAGE_TYPE_2D,
   2588                                            VK_IMAGE_TILING_OPTIMAL, VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT, 0,
   2589                                            &depth_properties);
   2590   const VkSampleCountFlags combined_properties = properties.limits.framebufferColorSampleCounts &
   2591                                                  properties.limits.framebufferDepthSampleCounts &
   2592                                                  color_properties.sampleCounts & depth_properties.sampleCounts;
   2593   if (combined_properties & VK_SAMPLE_COUNT_64_BIT)
   2594     return 64;
   2595   else if (combined_properties & VK_SAMPLE_COUNT_32_BIT)
   2596     return 32;
   2597   else if (combined_properties & VK_SAMPLE_COUNT_16_BIT)
   2598     return 16;
   2599   else if (combined_properties & VK_SAMPLE_COUNT_8_BIT)
   2600     return 8;
   2601   else if (combined_properties & VK_SAMPLE_COUNT_4_BIT)
   2602     return 4;
   2603   else if (combined_properties & VK_SAMPLE_COUNT_2_BIT)
   2604     return 2;
   2605   else
   2606     return 1;
   2607 }
   2608 
   2609 void VulkanDevice::SetFeatures(FeatureMask disabled_features, const VkPhysicalDeviceFeatures& vk_features)
   2610 {
   2611   m_max_texture_size =
   2612     std::min(m_device_properties.limits.maxImageDimension2D, m_device_properties.limits.maxFramebufferWidth);
   2613   m_max_multisamples = GetMaxMultisamples(m_physical_device, m_device_properties);
   2614 
   2615   m_features.dual_source_blend = !(disabled_features & FEATURE_MASK_DUAL_SOURCE_BLEND) && vk_features.dualSrcBlend;
   2616   m_features.framebuffer_fetch =
   2617     !(disabled_features & (FEATURE_MASK_FEEDBACK_LOOPS | FEATURE_MASK_FRAMEBUFFER_FETCH)) &&
   2618     m_optional_extensions.vk_ext_rasterization_order_attachment_access;
   2619 
   2620   if (!m_features.dual_source_blend)
   2621     WARNING_LOG("Vulkan driver is missing dual-source blending. This will have an impact on performance.");
   2622 
   2623   m_features.noperspective_interpolation = true;
   2624   m_features.texture_copy_to_self = !(disabled_features & FEATURE_MASK_TEXTURE_COPY_TO_SELF);
   2625   m_features.per_sample_shading = vk_features.sampleRateShading;
   2626   m_features.supports_texture_buffers = !(disabled_features & FEATURE_MASK_TEXTURE_BUFFERS);
   2627   m_features.feedback_loops = !(disabled_features & FEATURE_MASK_FEEDBACK_LOOPS);
   2628 
   2629 #ifdef __APPLE__
   2630   // Partial texture buffer uploads appear to be broken in macOS/MoltenVK.
   2631   m_features.texture_buffers_emulated_with_ssbo = true;
   2632 #else
   2633   const u32 max_texel_buffer_elements = m_device_properties.limits.maxTexelBufferElements;
   2634   INFO_LOG("Max texel buffer elements: {}", max_texel_buffer_elements);
   2635   if (max_texel_buffer_elements < MIN_TEXEL_BUFFER_ELEMENTS)
   2636   {
   2637     m_features.texture_buffers_emulated_with_ssbo = true;
   2638   }
   2639 #endif
   2640 
   2641   if (m_features.texture_buffers_emulated_with_ssbo)
   2642     WARNING_LOG("Emulating texture buffers with SSBOs.");
   2643 
   2644   m_features.geometry_shaders = !(disabled_features & FEATURE_MASK_GEOMETRY_SHADERS) && vk_features.geometryShader;
   2645 
   2646   m_features.partial_msaa_resolve = true;
   2647   m_features.memory_import = m_optional_extensions.vk_ext_external_memory_host;
   2648   m_features.explicit_present = true;
   2649   m_features.shader_cache = true;
   2650   m_features.pipeline_cache = true;
   2651   m_features.prefer_unused_textures = true;
   2652   m_features.raster_order_views =
   2653     (!(disabled_features & FEATURE_MASK_RASTER_ORDER_VIEWS) && vk_features.fragmentStoresAndAtomics &&
   2654      m_optional_extensions.vk_ext_fragment_shader_interlock);
   2655 }
   2656 
   2657 void VulkanDevice::CopyTextureRegion(GPUTexture* dst, u32 dst_x, u32 dst_y, u32 dst_layer, u32 dst_level,
   2658                                      GPUTexture* src, u32 src_x, u32 src_y, u32 src_layer, u32 src_level, u32 width,
   2659                                      u32 height)
   2660 {
   2661   VulkanTexture* const S = static_cast<VulkanTexture*>(src);
   2662   VulkanTexture* const D = static_cast<VulkanTexture*>(dst);
   2663 
   2664   if (S->GetState() == GPUTexture::State::Cleared)
   2665   {
   2666     // source is cleared. if destination is a render target, we can carry the clear forward
   2667     if (D->IsRenderTargetOrDepthStencil())
   2668     {
   2669       if (dst_level == 0 && dst_x == 0 && dst_y == 0 && width == D->GetWidth() && height == D->GetHeight())
   2670       {
   2671         // pass it forward if we're clearing the whole thing
   2672         if (S->IsDepthStencil())
   2673           D->SetClearDepth(S->GetClearDepth());
   2674         else
   2675           D->SetClearColor(S->GetClearColor());
   2676 
   2677         return;
   2678       }
   2679 
   2680       if (D->GetState() == GPUTexture::State::Cleared)
   2681       {
   2682         // destination is cleared, if it's the same colour and rect, we can just avoid this entirely
   2683         if (D->IsDepthStencil())
   2684         {
   2685           if (D->GetClearDepth() == S->GetClearDepth())
   2686             return;
   2687         }
   2688         else
   2689         {
   2690           if (D->GetClearColor() == S->GetClearColor())
   2691             return;
   2692         }
   2693       }
   2694 
   2695       // TODO: Could use attachment clear here..
   2696     }
   2697 
   2698     // commit the clear to the source first, then do normal copy
   2699     S->CommitClear();
   2700   }
   2701 
   2702   // if the destination has been cleared, and we're not overwriting the whole thing, commit the clear first
   2703   // (the area outside of where we're copying to)
   2704   if (D->GetState() == GPUTexture::State::Cleared &&
   2705       (dst_level != 0 || dst_x != 0 || dst_y != 0 || width != D->GetWidth() || height != D->GetHeight()))
   2706   {
   2707     D->CommitClear();
   2708   }
   2709 
   2710   // *now* we can do a normal image copy.
   2711   const VkImageAspectFlags src_aspect = (S->IsDepthStencil()) ? VK_IMAGE_ASPECT_DEPTH_BIT : VK_IMAGE_ASPECT_COLOR_BIT;
   2712   const VkImageAspectFlags dst_aspect = (D->IsDepthStencil()) ? VK_IMAGE_ASPECT_DEPTH_BIT : VK_IMAGE_ASPECT_COLOR_BIT;
   2713   const VkImageCopy ic = {{src_aspect, src_level, src_layer, 1u},
   2714                           {static_cast<s32>(src_x), static_cast<s32>(src_y), 0},
   2715                           {dst_aspect, dst_level, dst_layer, 1u},
   2716                           {static_cast<s32>(dst_x), static_cast<s32>(dst_y), 0},
   2717                           {static_cast<u32>(width), static_cast<u32>(height), 1u}};
   2718 
   2719   if (InRenderPass())
   2720     EndRenderPass();
   2721 
   2722   s_stats.num_copies++;
   2723 
   2724   S->SetUseFenceCounter(GetCurrentFenceCounter());
   2725   D->SetUseFenceCounter(GetCurrentFenceCounter());
   2726   S->TransitionToLayout((D == S) ? VulkanTexture::Layout::TransferSelf : VulkanTexture::Layout::TransferSrc);
   2727   D->TransitionToLayout((D == S) ? VulkanTexture::Layout::TransferSelf : VulkanTexture::Layout::TransferDst);
   2728 
   2729   vkCmdCopyImage(GetCurrentCommandBuffer(), S->GetImage(), S->GetVkLayout(), D->GetImage(), D->GetVkLayout(), 1, &ic);
   2730 
   2731   D->SetState(GPUTexture::State::Dirty);
   2732 }
   2733 
   2734 void VulkanDevice::ResolveTextureRegion(GPUTexture* dst, u32 dst_x, u32 dst_y, u32 dst_layer, u32 dst_level,
   2735                                         GPUTexture* src, u32 src_x, u32 src_y, u32 width, u32 height)
   2736 {
   2737   DebugAssert((src_x + width) <= src->GetWidth());
   2738   DebugAssert((src_y + height) <= src->GetHeight());
   2739   DebugAssert(src->IsMultisampled());
   2740   DebugAssert(dst_level < dst->GetLevels() && dst_layer < dst->GetLayers());
   2741   DebugAssert((dst_x + width) <= dst->GetMipWidth(dst_level));
   2742   DebugAssert((dst_y + height) <= dst->GetMipHeight(dst_level));
   2743   DebugAssert(!dst->IsMultisampled() && src->IsMultisampled());
   2744 
   2745   if (InRenderPass())
   2746     EndRenderPass();
   2747 
   2748   s_stats.num_copies++;
   2749 
   2750   VulkanTexture* D = static_cast<VulkanTexture*>(dst);
   2751   VulkanTexture* S = static_cast<VulkanTexture*>(src);
   2752   const VkCommandBuffer cmdbuf = GetCurrentCommandBuffer();
   2753 
   2754   if (S->GetState() == GPUTexture::State::Cleared)
   2755     S->CommitClear(cmdbuf);
   2756   if (D->IsRenderTargetOrDepthStencil() && D->GetState() == GPUTexture::State::Cleared)
   2757   {
   2758     if (width < dst->GetWidth() || height < dst->GetHeight())
   2759       D->CommitClear(cmdbuf);
   2760     else
   2761       D->SetState(GPUTexture::State::Dirty);
   2762   }
   2763 
   2764   S->TransitionSubresourcesToLayout(cmdbuf, 0, 1, 0, 1, S->GetLayout(), VulkanTexture::Layout::TransferSrc);
   2765   D->TransitionSubresourcesToLayout(cmdbuf, dst_layer, 1, dst_level, 1, D->GetLayout(),
   2766                                     VulkanTexture::Layout::TransferDst);
   2767 
   2768   const VkImageResolve resolve = {{VK_IMAGE_ASPECT_COLOR_BIT, 0u, 0u, 1u},
   2769                                   {static_cast<s32>(src_x), static_cast<s32>(src_y), 0},
   2770                                   {VK_IMAGE_ASPECT_COLOR_BIT, dst_level, dst_layer, 1u},
   2771                                   {static_cast<s32>(dst_x), static_cast<s32>(dst_y), 0},
   2772                                   {width, height, 1}};
   2773   vkCmdResolveImage(cmdbuf, S->GetImage(), VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, D->GetImage(),
   2774                     VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, 1, &resolve);
   2775 
   2776   S->TransitionSubresourcesToLayout(cmdbuf, 0, 1, 0, 1, VulkanTexture::Layout::TransferSrc, S->GetLayout());
   2777   D->TransitionSubresourcesToLayout(cmdbuf, dst_layer, 1, dst_level, 1, VulkanTexture::Layout::TransferDst,
   2778                                     D->GetLayout());
   2779 }
   2780 
   2781 void VulkanDevice::ClearRenderTarget(GPUTexture* t, u32 c)
   2782 {
   2783   GPUDevice::ClearRenderTarget(t, c);
   2784   if (InRenderPass())
   2785   {
   2786     const s32 idx = IsRenderTargetBoundIndex(t);
   2787     if (idx >= 0)
   2788     {
   2789       VulkanTexture* T = static_cast<VulkanTexture*>(t);
   2790 
   2791       if (IsDeviceNVIDIA())
   2792       {
   2793         EndRenderPass();
   2794       }
   2795       else
   2796       {
   2797         // Use an attachment clear so the render pass isn't restarted.
   2798         const VkClearAttachment ca = {VK_IMAGE_ASPECT_COLOR_BIT,
   2799                                       static_cast<u32>(idx),
   2800                                       {.color = static_cast<VulkanTexture*>(T)->GetClearColorValue()}};
   2801         const VkClearRect rc = {{{0, 0}, {T->GetWidth(), T->GetHeight()}}, 0u, 1u};
   2802         vkCmdClearAttachments(m_current_command_buffer, 1, &ca, 1, &rc);
   2803         T->SetState(GPUTexture::State::Dirty);
   2804       }
   2805     }
   2806   }
   2807 }
   2808 
   2809 void VulkanDevice::ClearDepth(GPUTexture* t, float d)
   2810 {
   2811   GPUDevice::ClearDepth(t, d);
   2812   if (InRenderPass() && m_current_depth_target == t)
   2813   {
   2814     // Using vkCmdClearAttachments() within a render pass on NVIDIA seems to cause dependency issues
   2815     // between draws that are testing depth which precede it. The result is flickering where Z tests
   2816     // should be failing. Breaking/restarting the render pass isn't enough to work around the bug,
   2817     // it needs an explicit pipeline barrier.
   2818     VulkanTexture* T = static_cast<VulkanTexture*>(t);
   2819     if (IsDeviceNVIDIA())
   2820     {
   2821       EndRenderPass();
   2822       T->TransitionSubresourcesToLayout(GetCurrentCommandBuffer(), 0, 1, 0, 1, T->GetLayout(), T->GetLayout());
   2823     }
   2824     else
   2825     {
   2826       // Use an attachment clear so the render pass isn't restarted.
   2827       const VkClearAttachment ca = {VK_IMAGE_ASPECT_DEPTH_BIT, 0, {.depthStencil = T->GetClearDepthValue()}};
   2828       const VkClearRect rc = {{{0, 0}, {T->GetWidth(), T->GetHeight()}}, 0u, 1u};
   2829       vkCmdClearAttachments(m_current_command_buffer, 1, &ca, 1, &rc);
   2830       T->SetState(GPUTexture::State::Dirty);
   2831     }
   2832   }
   2833 }
   2834 
   2835 void VulkanDevice::InvalidateRenderTarget(GPUTexture* t)
   2836 {
   2837   GPUDevice::InvalidateRenderTarget(t);
   2838   if (InRenderPass() && (t->IsDepthStencil() ? (m_current_depth_target == t) : (IsRenderTargetBoundIndex(t) >= 0)))
   2839   {
   2840     // Invalidate includes leaving whatever's in the current buffer.
   2841     GL_INS_FMT("Invalidating current {}", t->IsDepthStencil() ? "DS" : "RT");
   2842     t->SetState(GPUTexture::State::Dirty);
   2843   }
   2844 }
   2845 
   2846 bool VulkanDevice::CreateBuffers()
   2847 {
   2848   if (!m_vertex_buffer.Create(VK_BUFFER_USAGE_VERTEX_BUFFER_BIT, VERTEX_BUFFER_SIZE))
   2849   {
   2850     ERROR_LOG("Failed to allocate vertex buffer");
   2851     return false;
   2852   }
   2853 
   2854   if (!m_index_buffer.Create(VK_BUFFER_USAGE_INDEX_BUFFER_BIT, INDEX_BUFFER_SIZE))
   2855   {
   2856     ERROR_LOG("Failed to allocate index buffer");
   2857     return false;
   2858   }
   2859 
   2860   if (!m_uniform_buffer.Create(VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT, VERTEX_UNIFORM_BUFFER_SIZE))
   2861   {
   2862     ERROR_LOG("Failed to allocate uniform buffer");
   2863     return false;
   2864   }
   2865 
   2866   if (!m_texture_upload_buffer.Create(VK_BUFFER_USAGE_TRANSFER_SRC_BIT, TEXTURE_BUFFER_SIZE))
   2867   {
   2868     ERROR_LOG("Failed to allocate texture upload buffer");
   2869     return false;
   2870   }
   2871 
   2872   return true;
   2873 }
   2874 
   2875 void VulkanDevice::DestroyBuffers()
   2876 {
   2877   m_texture_upload_buffer.Destroy(false);
   2878   m_uniform_buffer.Destroy(false);
   2879   m_index_buffer.Destroy(false);
   2880   m_vertex_buffer.Destroy(false);
   2881 }
   2882 
   2883 void VulkanDevice::MapVertexBuffer(u32 vertex_size, u32 vertex_count, void** map_ptr, u32* map_space,
   2884                                    u32* map_base_vertex)
   2885 {
   2886   const u32 req_size = vertex_size * vertex_count;
   2887   if (!m_vertex_buffer.ReserveMemory(req_size, vertex_size))
   2888   {
   2889     SubmitCommandBufferAndRestartRenderPass("out of vertex space");
   2890     if (!m_vertex_buffer.ReserveMemory(req_size, vertex_size))
   2891       Panic("Failed to allocate vertex space");
   2892   }
   2893 
   2894   *map_ptr = m_vertex_buffer.GetCurrentHostPointer();
   2895   *map_space = m_vertex_buffer.GetCurrentSpace() / vertex_size;
   2896   *map_base_vertex = m_vertex_buffer.GetCurrentOffset() / vertex_size;
   2897 }
   2898 
   2899 void VulkanDevice::UnmapVertexBuffer(u32 vertex_size, u32 vertex_count)
   2900 {
   2901   const u32 size = vertex_size * vertex_count;
   2902   s_stats.buffer_streamed += size;
   2903   m_vertex_buffer.CommitMemory(size);
   2904 }
   2905 
   2906 void VulkanDevice::MapIndexBuffer(u32 index_count, DrawIndex** map_ptr, u32* map_space, u32* map_base_index)
   2907 {
   2908   const u32 req_size = sizeof(DrawIndex) * index_count;
   2909   if (!m_index_buffer.ReserveMemory(req_size, sizeof(DrawIndex)))
   2910   {
   2911     SubmitCommandBufferAndRestartRenderPass("out of index space");
   2912     if (!m_index_buffer.ReserveMemory(req_size, sizeof(DrawIndex)))
   2913       Panic("Failed to allocate index space");
   2914   }
   2915 
   2916   *map_ptr = reinterpret_cast<DrawIndex*>(m_index_buffer.GetCurrentHostPointer());
   2917   *map_space = m_index_buffer.GetCurrentSpace() / sizeof(DrawIndex);
   2918   *map_base_index = m_index_buffer.GetCurrentOffset() / sizeof(DrawIndex);
   2919 }
   2920 
   2921 void VulkanDevice::UnmapIndexBuffer(u32 used_index_count)
   2922 {
   2923   const u32 size = sizeof(DrawIndex) * used_index_count;
   2924   s_stats.buffer_streamed += size;
   2925   m_index_buffer.CommitMemory(size);
   2926 }
   2927 
   2928 void VulkanDevice::PushUniformBuffer(const void* data, u32 data_size)
   2929 {
   2930   DebugAssert(data_size < UNIFORM_PUSH_CONSTANTS_SIZE);
   2931   s_stats.buffer_streamed += data_size;
   2932   vkCmdPushConstants(GetCurrentCommandBuffer(), GetCurrentVkPipelineLayout(), UNIFORM_PUSH_CONSTANTS_STAGES, 0,
   2933                      data_size, data);
   2934 }
   2935 
   2936 void* VulkanDevice::MapUniformBuffer(u32 size)
   2937 {
   2938   const u32 align = static_cast<u32>(m_device_properties.limits.minUniformBufferOffsetAlignment);
   2939   const u32 used_space = Common::AlignUpPow2(size, align);
   2940   if (!m_uniform_buffer.ReserveMemory(used_space + MAX_UNIFORM_BUFFER_SIZE, align))
   2941   {
   2942     SubmitCommandBufferAndRestartRenderPass("out of uniform space");
   2943     if (!m_uniform_buffer.ReserveMemory(used_space + MAX_UNIFORM_BUFFER_SIZE, align))
   2944       Panic("Failed to allocate uniform space.");
   2945   }
   2946 
   2947   return m_uniform_buffer.GetCurrentHostPointer();
   2948 }
   2949 
   2950 void VulkanDevice::UnmapUniformBuffer(u32 size)
   2951 {
   2952   s_stats.buffer_streamed += size;
   2953   m_uniform_buffer_position = m_uniform_buffer.GetCurrentOffset();
   2954   m_uniform_buffer.CommitMemory(size);
   2955   m_dirty_flags |= DIRTY_FLAG_DYNAMIC_OFFSETS;
   2956 }
   2957 
   2958 bool VulkanDevice::CreateNullTexture()
   2959 {
   2960   m_null_texture = VulkanTexture::Create(1, 1, 1, 1, 1, GPUTexture::Type::RWTexture, GPUTexture::Format::RGBA8,
   2961                                          VK_FORMAT_R8G8B8A8_UNORM);
   2962   if (!m_null_texture)
   2963     return false;
   2964 
   2965   const VkCommandBuffer cmdbuf = GetCurrentCommandBuffer();
   2966   const VkImageSubresourceRange srr{VK_IMAGE_ASPECT_COLOR_BIT, 0u, 1u, 0u, 1u};
   2967   const VkClearColorValue ccv{};
   2968   m_null_texture->TransitionToLayout(cmdbuf, VulkanTexture::Layout::ClearDst);
   2969   vkCmdClearColorImage(cmdbuf, m_null_texture->GetImage(), VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, &ccv, 1, &srr);
   2970   m_null_texture->TransitionToLayout(cmdbuf, VulkanTexture::Layout::General);
   2971   Vulkan::SetObjectName(m_device, m_null_texture->GetImage(), "Null texture");
   2972   Vulkan::SetObjectName(m_device, m_null_texture->GetView(), "Null texture view");
   2973 
   2974   // Bind null texture and point sampler state to all.
   2975   const VkSampler point_sampler = GetSampler(GPUSampler::GetNearestConfig());
   2976   if (point_sampler == VK_NULL_HANDLE)
   2977     return false;
   2978 
   2979   for (u32 i = 0; i < MAX_TEXTURE_SAMPLERS; i++)
   2980     m_current_samplers[i] = point_sampler;
   2981 
   2982   return true;
   2983 }
   2984 
   2985 bool VulkanDevice::CreatePipelineLayouts()
   2986 {
   2987   Vulkan::DescriptorSetLayoutBuilder dslb;
   2988   Vulkan::PipelineLayoutBuilder plb;
   2989 
   2990   {
   2991     dslb.AddBinding(0, VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC, 1,
   2992                     VK_SHADER_STAGE_VERTEX_BIT | VK_SHADER_STAGE_FRAGMENT_BIT);
   2993     if ((m_ubo_ds_layout = dslb.Create(m_device)) == VK_NULL_HANDLE)
   2994       return false;
   2995     Vulkan::SetObjectName(m_device, m_ubo_ds_layout, "UBO Descriptor Set Layout");
   2996   }
   2997 
   2998   {
   2999     dslb.AddBinding(0, VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, 1, VK_SHADER_STAGE_FRAGMENT_BIT);
   3000     if ((m_single_texture_ds_layout = dslb.Create(m_device)) == VK_NULL_HANDLE)
   3001       return false;
   3002     Vulkan::SetObjectName(m_device, m_single_texture_ds_layout, "Single Texture Descriptor Set Layout");
   3003   }
   3004 
   3005   {
   3006     dslb.AddBinding(0,
   3007                     m_features.texture_buffers_emulated_with_ssbo ? VK_DESCRIPTOR_TYPE_STORAGE_BUFFER :
   3008                                                                     VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER,
   3009                     1, VK_SHADER_STAGE_FRAGMENT_BIT);
   3010     if ((m_single_texture_buffer_ds_layout = dslb.Create(m_device)) == VK_NULL_HANDLE)
   3011       return false;
   3012     Vulkan::SetObjectName(m_device, m_single_texture_buffer_ds_layout, "Texture Buffer Descriptor Set Layout");
   3013   }
   3014 
   3015   {
   3016     if (m_optional_extensions.vk_khr_push_descriptor)
   3017       dslb.SetPushFlag();
   3018     for (u32 i = 0; i < MAX_TEXTURE_SAMPLERS; i++)
   3019       dslb.AddBinding(i, VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, 1, VK_SHADER_STAGE_FRAGMENT_BIT);
   3020     if ((m_multi_texture_ds_layout = dslb.Create(m_device)) == VK_NULL_HANDLE)
   3021       return false;
   3022     Vulkan::SetObjectName(m_device, m_multi_texture_ds_layout, "Multi Texture Descriptor Set Layout");
   3023   }
   3024 
   3025   if (m_features.feedback_loops)
   3026   {
   3027     // TODO: This isn't ideal, since we can't push the RT descriptors.
   3028     dslb.AddBinding(0, VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT, 1, VK_SHADER_STAGE_FRAGMENT_BIT);
   3029     if ((m_feedback_loop_ds_layout = dslb.Create(m_device)) == VK_NULL_HANDLE)
   3030       return false;
   3031     Vulkan::SetObjectName(m_device, m_feedback_loop_ds_layout, "Feedback Loop Descriptor Set Layout");
   3032   }
   3033 
   3034   if (m_features.raster_order_views)
   3035   {
   3036     for (u32 i = 0; i < MAX_IMAGE_RENDER_TARGETS; i++)
   3037       dslb.AddBinding(i, VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, 1, VK_SHADER_STAGE_FRAGMENT_BIT);
   3038     if ((m_rov_ds_layout = dslb.Create(m_device)) == VK_NULL_HANDLE)
   3039       return false;
   3040     Vulkan::SetObjectName(m_device, m_feedback_loop_ds_layout, "ROV Descriptor Set Layout");
   3041   }
   3042 
   3043   for (u32 type = 0; type < 3; type++)
   3044   {
   3045     const bool feedback_loop = (type == 1);
   3046     const bool rov = (type == 2);
   3047     if ((feedback_loop && !m_features.feedback_loops) || (rov && !m_features.raster_order_views))
   3048       continue;
   3049 
   3050     {
   3051       VkPipelineLayout& pl = m_pipeline_layouts[type][static_cast<u8>(GPUPipeline::Layout::SingleTextureAndUBO)];
   3052       plb.AddDescriptorSet(m_ubo_ds_layout);
   3053       plb.AddDescriptorSet(m_single_texture_ds_layout);
   3054       if (feedback_loop)
   3055         plb.AddDescriptorSet(m_feedback_loop_ds_layout);
   3056       else if (rov)
   3057         plb.AddDescriptorSet(m_rov_ds_layout);
   3058       if ((pl = plb.Create(m_device)) == VK_NULL_HANDLE)
   3059         return false;
   3060       Vulkan::SetObjectName(m_device, pl, "Single Texture + UBO Pipeline Layout");
   3061     }
   3062 
   3063     {
   3064       VkPipelineLayout& pl =
   3065         m_pipeline_layouts[type][static_cast<u8>(GPUPipeline::Layout::SingleTextureAndPushConstants)];
   3066       plb.AddDescriptorSet(m_single_texture_ds_layout);
   3067       if (feedback_loop)
   3068         plb.AddDescriptorSet(m_feedback_loop_ds_layout);
   3069       else if (rov)
   3070         plb.AddDescriptorSet(m_rov_ds_layout);
   3071       plb.AddPushConstants(UNIFORM_PUSH_CONSTANTS_STAGES, 0, UNIFORM_PUSH_CONSTANTS_SIZE);
   3072       if ((pl = plb.Create(m_device)) == VK_NULL_HANDLE)
   3073         return false;
   3074       Vulkan::SetObjectName(m_device, pl, "Single Texture Pipeline Layout");
   3075     }
   3076 
   3077     {
   3078       VkPipelineLayout& pl =
   3079         m_pipeline_layouts[type][static_cast<u8>(GPUPipeline::Layout::SingleTextureBufferAndPushConstants)];
   3080       plb.AddDescriptorSet(m_single_texture_buffer_ds_layout);
   3081       if (feedback_loop)
   3082         plb.AddDescriptorSet(m_feedback_loop_ds_layout);
   3083       else if (rov)
   3084         plb.AddDescriptorSet(m_rov_ds_layout);
   3085       plb.AddPushConstants(UNIFORM_PUSH_CONSTANTS_STAGES, 0, UNIFORM_PUSH_CONSTANTS_SIZE);
   3086       if ((pl = plb.Create(m_device)) == VK_NULL_HANDLE)
   3087         return false;
   3088       Vulkan::SetObjectName(m_device, pl, "Single Texture Buffer + UBO Pipeline Layout");
   3089     }
   3090 
   3091     {
   3092       VkPipelineLayout& pl = m_pipeline_layouts[type][static_cast<u8>(GPUPipeline::Layout::MultiTextureAndUBO)];
   3093       plb.AddDescriptorSet(m_ubo_ds_layout);
   3094       plb.AddDescriptorSet(m_multi_texture_ds_layout);
   3095       if (feedback_loop)
   3096         plb.AddDescriptorSet(m_feedback_loop_ds_layout);
   3097       else if (rov)
   3098         plb.AddDescriptorSet(m_rov_ds_layout);
   3099       if ((pl = plb.Create(m_device)) == VK_NULL_HANDLE)
   3100         return false;
   3101       Vulkan::SetObjectName(m_device, pl, "Multi Texture + UBO Pipeline Layout");
   3102     }
   3103 
   3104     {
   3105       VkPipelineLayout& pl =
   3106         m_pipeline_layouts[type][static_cast<u8>(GPUPipeline::Layout::MultiTextureAndPushConstants)];
   3107       plb.AddDescriptorSet(m_multi_texture_ds_layout);
   3108       plb.AddPushConstants(UNIFORM_PUSH_CONSTANTS_STAGES, 0, UNIFORM_PUSH_CONSTANTS_SIZE);
   3109       if (feedback_loop)
   3110         plb.AddDescriptorSet(m_feedback_loop_ds_layout);
   3111       else if (rov)
   3112         plb.AddDescriptorSet(m_rov_ds_layout);
   3113       if ((pl = plb.Create(m_device)) == VK_NULL_HANDLE)
   3114         return false;
   3115       Vulkan::SetObjectName(m_device, pl, "Multi Texture Pipeline Layout");
   3116     }
   3117   }
   3118 
   3119   return true;
   3120 }
   3121 
   3122 void VulkanDevice::DestroyPipelineLayouts()
   3123 {
   3124   m_pipeline_layouts.enumerate([this](auto& pl) {
   3125     if (pl != VK_NULL_HANDLE)
   3126     {
   3127       vkDestroyPipelineLayout(m_device, pl, nullptr);
   3128       pl = VK_NULL_HANDLE;
   3129     }
   3130   });
   3131 
   3132   auto destroy_dsl = [this](VkDescriptorSetLayout& l) {
   3133     if (l != VK_NULL_HANDLE)
   3134     {
   3135       vkDestroyDescriptorSetLayout(m_device, l, nullptr);
   3136       l = VK_NULL_HANDLE;
   3137     }
   3138   };
   3139   destroy_dsl(m_rov_ds_layout);
   3140   destroy_dsl(m_feedback_loop_ds_layout);
   3141   destroy_dsl(m_multi_texture_ds_layout);
   3142   destroy_dsl(m_single_texture_buffer_ds_layout);
   3143   destroy_dsl(m_single_texture_ds_layout);
   3144   destroy_dsl(m_ubo_ds_layout);
   3145 }
   3146 
   3147 bool VulkanDevice::CreatePersistentDescriptorSets()
   3148 {
   3149   Vulkan::DescriptorSetUpdateBuilder dsub;
   3150 
   3151   // TODO: is this a bad thing? choosing an upper bound.. so long as it's not going to fetch all of it :/
   3152   m_ubo_descriptor_set = AllocatePersistentDescriptorSet(m_ubo_ds_layout);
   3153   if (m_ubo_descriptor_set == VK_NULL_HANDLE)
   3154     return false;
   3155   dsub.AddBufferDescriptorWrite(m_ubo_descriptor_set, 0, VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC,
   3156                                 m_uniform_buffer.GetBuffer(), 0, MAX_UNIFORM_BUFFER_SIZE);
   3157   dsub.Update(m_device, false);
   3158 
   3159   return true;
   3160 }
   3161 
   3162 void VulkanDevice::DestroyPersistentDescriptorSets()
   3163 {
   3164   if (m_ubo_descriptor_set != VK_NULL_HANDLE)
   3165     FreePersistentDescriptorSet(m_ubo_descriptor_set);
   3166 }
   3167 
   3168 void VulkanDevice::RenderBlankFrame()
   3169 {
   3170   VkResult res = m_swap_chain->AcquireNextImage();
   3171   if (res != VK_SUCCESS)
   3172   {
   3173     ERROR_LOG("Failed to acquire image for blank frame present");
   3174     return;
   3175   }
   3176 
   3177   VkCommandBuffer cmdbuf = GetCurrentCommandBuffer();
   3178 
   3179   const VkImage image = m_swap_chain->GetCurrentImage();
   3180   static constexpr VkImageSubresourceRange srr = {VK_IMAGE_ASPECT_COLOR_BIT, 0, 1, 0, 1};
   3181   static constexpr VkClearColorValue clear_color = {{0.0f, 0.0f, 0.0f, 1.0f}};
   3182   VulkanTexture::TransitionSubresourcesToLayout(cmdbuf, image, GPUTexture::Type::RenderTarget, 0, 1, 0, 1,
   3183                                                 VulkanTexture::Layout::Undefined, VulkanTexture::Layout::TransferDst);
   3184   vkCmdClearColorImage(cmdbuf, image, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, &clear_color, 1, &srr);
   3185   VulkanTexture::TransitionSubresourcesToLayout(cmdbuf, image, GPUTexture::Type::RenderTarget, 0, 1, 0, 1,
   3186                                                 VulkanTexture::Layout::TransferDst, VulkanTexture::Layout::PresentSrc);
   3187 
   3188   EndAndSubmitCommandBuffer(m_swap_chain.get(), false, !m_swap_chain->IsPresentModeSynchronizing());
   3189   MoveToNextCommandBuffer();
   3190 
   3191   InvalidateCachedState();
   3192 }
   3193 
   3194 bool VulkanDevice::TryImportHostMemory(void* data, size_t data_size, VkBufferUsageFlags buffer_usage,
   3195                                        VkDeviceMemory* out_memory, VkBuffer* out_buffer, VkDeviceSize* out_offset)
   3196 {
   3197   if (!m_optional_extensions.vk_ext_external_memory_host)
   3198     return false;
   3199 
   3200   // Align to the nearest page
   3201   void* data_aligned =
   3202     reinterpret_cast<void*>(Common::AlignDownPow2(reinterpret_cast<uintptr_t>(data), HOST_PAGE_SIZE));
   3203 
   3204   // Offset to the start of the data within the page
   3205   const size_t data_offset = reinterpret_cast<uintptr_t>(data) & static_cast<uintptr_t>(HOST_PAGE_MASK);
   3206 
   3207   // Full amount of data that must be imported, including the pages
   3208   const size_t data_size_aligned = Common::AlignUpPow2(data_offset + data_size, HOST_PAGE_SIZE);
   3209 
   3210   VkMemoryHostPointerPropertiesEXT pointer_properties = {VK_STRUCTURE_TYPE_MEMORY_HOST_POINTER_PROPERTIES_EXT, nullptr,
   3211                                                          0};
   3212   VkResult res = vkGetMemoryHostPointerPropertiesEXT(m_device, VK_EXTERNAL_MEMORY_HANDLE_TYPE_HOST_ALLOCATION_BIT_EXT,
   3213                                                      data_aligned, &pointer_properties);
   3214   if (res != VK_SUCCESS || pointer_properties.memoryTypeBits == 0)
   3215   {
   3216     LOG_VULKAN_ERROR(res, "vkGetMemoryHostPointerPropertiesEXT() failed: ");
   3217     return false;
   3218   }
   3219 
   3220   VmaAllocationCreateInfo vma_alloc_info = {};
   3221   vma_alloc_info.preferredFlags =
   3222     VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT | VK_MEMORY_PROPERTY_HOST_CACHED_BIT;
   3223   vma_alloc_info.memoryTypeBits = pointer_properties.memoryTypeBits;
   3224 
   3225   u32 memory_index = 0;
   3226   res = vmaFindMemoryTypeIndex(m_allocator, pointer_properties.memoryTypeBits, &vma_alloc_info, &memory_index);
   3227   if (res != VK_SUCCESS)
   3228   {
   3229     LOG_VULKAN_ERROR(res, "vmaFindMemoryTypeIndex() failed: ");
   3230     return false;
   3231   }
   3232 
   3233   const VkImportMemoryHostPointerInfoEXT import_info = {VK_STRUCTURE_TYPE_IMPORT_MEMORY_HOST_POINTER_INFO_EXT, nullptr,
   3234                                                         VK_EXTERNAL_MEMORY_HANDLE_TYPE_HOST_ALLOCATION_BIT_EXT,
   3235                                                         const_cast<void*>(data_aligned)};
   3236 
   3237   const VkMemoryAllocateInfo alloc_info = {VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO, &import_info, data_size_aligned,
   3238                                            memory_index};
   3239 
   3240   VkDeviceMemory imported_memory = VK_NULL_HANDLE;
   3241 
   3242   res = vkAllocateMemory(m_device, &alloc_info, nullptr, &imported_memory);
   3243   if (res != VK_SUCCESS)
   3244   {
   3245     LOG_VULKAN_ERROR(res, "vkAllocateMemory() failed: ");
   3246     return false;
   3247   }
   3248 
   3249   const VkExternalMemoryBufferCreateInfo external_info = {VK_STRUCTURE_TYPE_EXTERNAL_MEMORY_BUFFER_CREATE_INFO, nullptr,
   3250                                                           VK_EXTERNAL_MEMORY_HANDLE_TYPE_HOST_ALLOCATION_BIT_EXT};
   3251 
   3252   const VkBufferCreateInfo buffer_info = {VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
   3253                                           &external_info,
   3254                                           0,
   3255                                           data_size_aligned,
   3256                                           buffer_usage,
   3257                                           VK_SHARING_MODE_EXCLUSIVE,
   3258                                           0,
   3259                                           nullptr};
   3260 
   3261   VkBuffer imported_buffer = VK_NULL_HANDLE;
   3262   res = vkCreateBuffer(m_device, &buffer_info, nullptr, &imported_buffer);
   3263   if (res != VK_SUCCESS)
   3264   {
   3265     LOG_VULKAN_ERROR(res, "vkCreateBuffer() failed: ");
   3266     if (imported_memory != VK_NULL_HANDLE)
   3267       vkFreeMemory(m_device, imported_memory, nullptr);
   3268 
   3269     return false;
   3270   }
   3271 
   3272   vkBindBufferMemory(m_device, imported_buffer, imported_memory, 0);
   3273 
   3274   *out_memory = imported_memory;
   3275   *out_buffer = imported_buffer;
   3276   *out_offset = data_offset;
   3277   DEV_LOG("Imported {} byte buffer covering {} bytes at {}", data_size, data_size_aligned, data);
   3278   return true;
   3279 }
   3280 
   3281 void VulkanDevice::SetRenderTargets(GPUTexture* const* rts, u32 num_rts, GPUTexture* ds,
   3282                                     GPUPipeline::RenderPassFlag flags)
   3283 {
   3284   const bool changed_layout =
   3285     (m_current_render_pass_flags & (GPUPipeline::ColorFeedbackLoop | GPUPipeline::BindRenderTargetsAsImages)) !=
   3286     (flags & (GPUPipeline::ColorFeedbackLoop | GPUPipeline::BindRenderTargetsAsImages));
   3287   bool changed =
   3288     (m_num_current_render_targets != num_rts || m_current_depth_target != ds || m_current_render_pass_flags != flags);
   3289   bool needs_ds_clear = (ds && ds->IsClearedOrInvalidated());
   3290   bool needs_rt_clear = false;
   3291 
   3292   m_current_depth_target = static_cast<VulkanTexture*>(ds);
   3293   for (u32 i = 0; i < num_rts; i++)
   3294   {
   3295     VulkanTexture* const RT = static_cast<VulkanTexture*>(rts[i]);
   3296     changed |= m_current_render_targets[i] != RT;
   3297     m_current_render_targets[i] = RT;
   3298     needs_rt_clear |= RT->IsClearedOrInvalidated();
   3299   }
   3300   for (u32 i = num_rts; i < m_num_current_render_targets; i++)
   3301     m_current_render_targets[i] = nullptr;
   3302   m_num_current_render_targets = Truncate8(num_rts);
   3303   m_current_render_pass_flags = flags;
   3304 
   3305   if (changed)
   3306   {
   3307     if (InRenderPass())
   3308       EndRenderPass();
   3309 
   3310     if (m_num_current_render_targets == 0 && !m_current_depth_target)
   3311     {
   3312       m_current_framebuffer = VK_NULL_HANDLE;
   3313       return;
   3314     }
   3315 
   3316     if (!m_optional_extensions.vk_khr_dynamic_rendering ||
   3317         ((flags & GPUPipeline::ColorFeedbackLoop) && !m_optional_extensions.vk_khr_dynamic_rendering_local_read))
   3318     {
   3319       m_current_framebuffer = m_framebuffer_manager.Lookup(
   3320         (m_num_current_render_targets > 0) ? reinterpret_cast<GPUTexture**>(m_current_render_targets.data()) : nullptr,
   3321         m_num_current_render_targets, m_current_depth_target, flags);
   3322       if (m_current_framebuffer == VK_NULL_HANDLE)
   3323       {
   3324         ERROR_LOG("Failed to create framebuffer");
   3325         return;
   3326       }
   3327     }
   3328 
   3329     m_dirty_flags = (m_dirty_flags & ~DIRTY_FLAG_INPUT_ATTACHMENT) | (changed_layout ? DIRTY_FLAG_PIPELINE_LAYOUT : 0) |
   3330                     ((flags & (GPUPipeline::ColorFeedbackLoop | GPUPipeline::BindRenderTargetsAsImages)) ?
   3331                        DIRTY_FLAG_INPUT_ATTACHMENT :
   3332                        0);
   3333   }
   3334   else if (needs_rt_clear || needs_ds_clear)
   3335   {
   3336     // TODO: This could use vkCmdClearAttachments() instead.
   3337     if (InRenderPass())
   3338       EndRenderPass();
   3339   }
   3340 }
   3341 
   3342 void VulkanDevice::BeginRenderPass()
   3343 {
   3344   DebugAssert(!InRenderPass());
   3345 
   3346   // All textures should be in shader read only optimal already, but just in case..
   3347   const u32 num_textures = GetActiveTexturesForLayout(m_current_pipeline_layout);
   3348   for (u32 i = 0; i < num_textures; i++)
   3349   {
   3350     if (m_current_textures[i])
   3351       m_current_textures[i]->TransitionToLayout(VulkanTexture::Layout::ShaderReadOnly);
   3352   }
   3353 
   3354   // NVIDIA drivers appear to return random garbage when sampling the RT via a feedback loop, if the load op for
   3355   // the render pass is CLEAR. Using vkCmdClearAttachments() doesn't work, so we have to clear the image instead.
   3356   if (m_current_render_pass_flags & GPUPipeline::ColorFeedbackLoop && IsDeviceNVIDIA())
   3357   {
   3358     for (u32 i = 0; i < m_num_current_render_targets; i++)
   3359     {
   3360       if (m_current_render_targets[i]->GetState() == GPUTexture::State::Cleared)
   3361         m_current_render_targets[i]->CommitClear(m_current_command_buffer);
   3362     }
   3363   }
   3364 
   3365   if (m_optional_extensions.vk_khr_dynamic_rendering &&
   3366       (m_optional_extensions.vk_khr_dynamic_rendering_local_read ||
   3367        !(m_current_render_pass_flags & GPUPipeline::ColorFeedbackLoop)))
   3368   {
   3369     VkRenderingInfoKHR ri = {
   3370       VK_STRUCTURE_TYPE_RENDERING_INFO_KHR, nullptr, 0u, {}, 1u, 0u, 0u, nullptr, nullptr, nullptr};
   3371 
   3372     std::array<VkRenderingAttachmentInfoKHR, MAX_RENDER_TARGETS> attachments;
   3373     VkRenderingAttachmentInfoKHR depth_attachment;
   3374 
   3375     if (m_num_current_render_targets > 0 || m_current_depth_target)
   3376     {
   3377       if (!(m_current_render_pass_flags & GPUPipeline::BindRenderTargetsAsImages))
   3378       {
   3379         ri.colorAttachmentCount = m_num_current_render_targets;
   3380         ri.pColorAttachments = (m_num_current_render_targets > 0) ? attachments.data() : nullptr;
   3381 
   3382         // set up clear values and transition targets
   3383         for (u32 i = 0; i < m_num_current_render_targets; i++)
   3384         {
   3385           VulkanTexture* const rt = static_cast<VulkanTexture*>(m_current_render_targets[i]);
   3386           rt->TransitionToLayout((m_current_render_pass_flags & GPUPipeline::ColorFeedbackLoop) ?
   3387                                    VulkanTexture::Layout::FeedbackLoop :
   3388                                    VulkanTexture::Layout::ColorAttachment);
   3389           rt->SetUseFenceCounter(GetCurrentFenceCounter());
   3390 
   3391           VkRenderingAttachmentInfo& ai = attachments[i];
   3392           ai.sType = VK_STRUCTURE_TYPE_RENDERING_ATTACHMENT_INFO_KHR;
   3393           ai.pNext = nullptr;
   3394           ai.imageView = rt->GetView();
   3395           ai.imageLayout = rt->GetVkLayout();
   3396           ai.resolveMode = VK_RESOLVE_MODE_NONE_KHR;
   3397           ai.resolveImageView = VK_NULL_HANDLE;
   3398           ai.resolveImageLayout = VK_IMAGE_LAYOUT_UNDEFINED;
   3399           ai.loadOp = GetLoadOpForTexture(rt);
   3400           ai.storeOp = VK_ATTACHMENT_STORE_OP_STORE;
   3401 
   3402           if (rt->GetState() == GPUTexture::State::Cleared)
   3403           {
   3404             std::memcpy(ai.clearValue.color.float32, rt->GetUNormClearColor().data(),
   3405                         sizeof(ai.clearValue.color.float32));
   3406           }
   3407           rt->SetState(GPUTexture::State::Dirty);
   3408         }
   3409       }
   3410       else
   3411       {
   3412         // Binding as image, but we still need to clear it.
   3413         for (u32 i = 0; i < m_num_current_render_targets; i++)
   3414         {
   3415           VulkanTexture* rt = m_current_render_targets[i];
   3416           if (rt->GetState() == GPUTexture::State::Cleared)
   3417             rt->CommitClear(m_current_command_buffer);
   3418           rt->SetState(GPUTexture::State::Dirty);
   3419           rt->TransitionToLayout(VulkanTexture::Layout::ReadWriteImage);
   3420           rt->SetUseFenceCounter(GetCurrentFenceCounter());
   3421         }
   3422       }
   3423 
   3424       if (VulkanTexture* const ds = m_current_depth_target)
   3425       {
   3426         ds->TransitionToLayout(VulkanTexture::Layout::DepthStencilAttachment);
   3427         ds->SetUseFenceCounter(GetCurrentFenceCounter());
   3428 
   3429         depth_attachment.sType = VK_STRUCTURE_TYPE_RENDERING_ATTACHMENT_INFO_KHR;
   3430         depth_attachment.pNext = nullptr;
   3431         depth_attachment.imageView = ds->GetView();
   3432         depth_attachment.imageLayout = ds->GetVkLayout();
   3433         depth_attachment.resolveMode = VK_RESOLVE_MODE_NONE_KHR;
   3434         depth_attachment.resolveImageView = VK_NULL_HANDLE;
   3435         depth_attachment.resolveImageLayout = VK_IMAGE_LAYOUT_UNDEFINED;
   3436         depth_attachment.loadOp = GetLoadOpForTexture(ds);
   3437         depth_attachment.storeOp = VK_ATTACHMENT_STORE_OP_STORE;
   3438         ri.pDepthAttachment = &depth_attachment;
   3439 
   3440         if (ds->GetState() == GPUTexture::State::Cleared)
   3441           depth_attachment.clearValue.depthStencil = {ds->GetClearDepth(), 0u};
   3442 
   3443         ds->SetState(GPUTexture::State::Dirty);
   3444       }
   3445 
   3446       const VulkanTexture* const rt_or_ds =
   3447         (m_num_current_render_targets > 0) ? m_current_render_targets[0] : m_current_depth_target;
   3448       ri.renderArea = {{}, {rt_or_ds->GetWidth(), rt_or_ds->GetHeight()}};
   3449     }
   3450     else
   3451     {
   3452       VkRenderingAttachmentInfo& ai = attachments[0];
   3453       ai.sType = VK_STRUCTURE_TYPE_RENDERING_ATTACHMENT_INFO_KHR;
   3454       ai.pNext = nullptr;
   3455       ai.imageView = m_swap_chain->GetCurrentImageView();
   3456       ai.imageLayout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL;
   3457       ai.resolveMode = VK_RESOLVE_MODE_NONE_KHR;
   3458       ai.resolveImageView = VK_NULL_HANDLE;
   3459       ai.resolveImageLayout = VK_IMAGE_LAYOUT_UNDEFINED;
   3460       ai.loadOp = VK_ATTACHMENT_LOAD_OP_LOAD;
   3461       ai.storeOp = VK_ATTACHMENT_STORE_OP_STORE;
   3462 
   3463       ri.colorAttachmentCount = 1;
   3464       ri.pColorAttachments = attachments.data();
   3465       ri.renderArea = {{}, {m_swap_chain->GetWidth(), m_swap_chain->GetHeight()}};
   3466     }
   3467 
   3468     m_current_render_pass = DYNAMIC_RENDERING_RENDER_PASS;
   3469     vkCmdBeginRenderingKHR(GetCurrentCommandBuffer(), &ri);
   3470   }
   3471   else
   3472   {
   3473     VkRenderPassBeginInfo bi = {
   3474       VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO, nullptr, VK_NULL_HANDLE, VK_NULL_HANDLE, {}, 0u, nullptr};
   3475     std::array<VkClearValue, MAX_RENDER_TARGETS + 1> clear_values;
   3476 
   3477     if (m_current_framebuffer != VK_NULL_HANDLE)
   3478     {
   3479       bi.framebuffer = m_current_framebuffer;
   3480       bi.renderPass = m_current_render_pass =
   3481         GetRenderPass(m_current_render_targets.data(), m_num_current_render_targets, m_current_depth_target,
   3482                       m_current_render_pass_flags);
   3483       if (bi.renderPass == VK_NULL_HANDLE)
   3484       {
   3485         ERROR_LOG("Failed to create render pass");
   3486         return;
   3487       }
   3488 
   3489       // set up clear values and transition targets
   3490       for (u32 i = 0; i < m_num_current_render_targets; i++)
   3491       {
   3492         VulkanTexture* const rt = static_cast<VulkanTexture*>(m_current_render_targets[i]);
   3493         if (rt->GetState() == GPUTexture::State::Cleared)
   3494         {
   3495           std::memcpy(clear_values[i].color.float32, rt->GetUNormClearColor().data(),
   3496                       sizeof(clear_values[i].color.float32));
   3497           bi.pClearValues = clear_values.data();
   3498           bi.clearValueCount = i + 1;
   3499         }
   3500         rt->SetState(GPUTexture::State::Dirty);
   3501         rt->TransitionToLayout((m_current_render_pass_flags & GPUPipeline::ColorFeedbackLoop) ?
   3502                                  VulkanTexture::Layout::FeedbackLoop :
   3503                                  VulkanTexture::Layout::ColorAttachment);
   3504         rt->SetUseFenceCounter(GetCurrentFenceCounter());
   3505       }
   3506       if (VulkanTexture* const ds = static_cast<VulkanTexture*>(m_current_depth_target))
   3507       {
   3508         if (ds->GetState() == GPUTexture::State::Cleared)
   3509         {
   3510           clear_values[m_num_current_render_targets].depthStencil = {ds->GetClearDepth(), 0u};
   3511           bi.pClearValues = clear_values.data();
   3512           bi.clearValueCount = m_num_current_render_targets + 1;
   3513         }
   3514         ds->SetState(GPUTexture::State::Dirty);
   3515         ds->TransitionToLayout(VulkanTexture::Layout::DepthStencilAttachment);
   3516         ds->SetUseFenceCounter(GetCurrentFenceCounter());
   3517       }
   3518 
   3519       const VulkanTexture* const rt_or_ds = static_cast<const VulkanTexture*>(
   3520         (m_num_current_render_targets > 0) ? m_current_render_targets[0] : m_current_depth_target);
   3521       bi.renderArea.extent = {rt_or_ds->GetWidth(), rt_or_ds->GetHeight()};
   3522     }
   3523     else
   3524     {
   3525       // Re-rendering to swap chain.
   3526       bi.framebuffer = m_swap_chain->GetCurrentFramebuffer();
   3527       bi.renderPass = m_current_render_pass =
   3528         GetSwapChainRenderPass(m_swap_chain->GetWindowInfo().surface_format, VK_ATTACHMENT_LOAD_OP_LOAD);
   3529       bi.renderArea.extent = {m_swap_chain->GetWidth(), m_swap_chain->GetHeight()};
   3530     }
   3531 
   3532     DebugAssert(m_current_render_pass);
   3533     vkCmdBeginRenderPass(GetCurrentCommandBuffer(), &bi, VK_SUBPASS_CONTENTS_INLINE);
   3534   }
   3535 
   3536   s_stats.num_render_passes++;
   3537 
   3538   // If this is a new command buffer, bind the pipeline and such.
   3539   if (m_dirty_flags & DIRTY_FLAG_INITIAL)
   3540     SetInitialPipelineState();
   3541 }
   3542 
   3543 void VulkanDevice::BeginSwapChainRenderPass(u32 clear_color)
   3544 {
   3545   DebugAssert(!InRenderPass());
   3546 
   3547   const VkCommandBuffer cmdbuf = GetCurrentCommandBuffer();
   3548   const VkImage swap_chain_image = m_swap_chain->GetCurrentImage();
   3549 
   3550   // Swap chain images start in undefined
   3551   VulkanTexture::TransitionSubresourcesToLayout(cmdbuf, swap_chain_image, GPUTexture::Type::RenderTarget, 0, 1, 0, 1,
   3552                                                 VulkanTexture::Layout::Undefined,
   3553                                                 VulkanTexture::Layout::ColorAttachment);
   3554 
   3555   // All textures should be in shader read only optimal already, but just in case..
   3556   const u32 num_textures = GetActiveTexturesForLayout(m_current_pipeline_layout);
   3557   for (u32 i = 0; i < num_textures; i++)
   3558   {
   3559     if (m_current_textures[i])
   3560       m_current_textures[i]->TransitionToLayout(VulkanTexture::Layout::ShaderReadOnly);
   3561   }
   3562 
   3563   VkClearValue clear_value;
   3564   GSVector4::store<false>(&clear_value.color.float32, GSVector4::rgba32(clear_color));
   3565   if (m_optional_extensions.vk_khr_dynamic_rendering)
   3566   {
   3567     VkRenderingAttachmentInfo ai = {VK_STRUCTURE_TYPE_RENDERING_ATTACHMENT_INFO_KHR,
   3568                                     nullptr,
   3569                                     m_swap_chain->GetCurrentImageView(),
   3570                                     VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL,
   3571                                     VK_RESOLVE_MODE_NONE_KHR,
   3572                                     VK_NULL_HANDLE,
   3573                                     VK_IMAGE_LAYOUT_UNDEFINED,
   3574                                     VK_ATTACHMENT_LOAD_OP_CLEAR,
   3575                                     VK_ATTACHMENT_STORE_OP_STORE,
   3576                                     clear_value};
   3577 
   3578     const VkRenderingInfoKHR ri = {VK_STRUCTURE_TYPE_RENDERING_INFO_KHR,
   3579                                    nullptr,
   3580                                    0u,
   3581                                    {{}, {m_swap_chain->GetWidth(), m_swap_chain->GetHeight()}},
   3582                                    1u,
   3583                                    0u,
   3584                                    1u,
   3585                                    &ai,
   3586                                    nullptr,
   3587                                    nullptr};
   3588 
   3589     m_current_render_pass = DYNAMIC_RENDERING_RENDER_PASS;
   3590     vkCmdBeginRenderingKHR(GetCurrentCommandBuffer(), &ri);
   3591   }
   3592   else
   3593   {
   3594     m_current_render_pass =
   3595       GetSwapChainRenderPass(m_swap_chain->GetWindowInfo().surface_format, VK_ATTACHMENT_LOAD_OP_CLEAR);
   3596     DebugAssert(m_current_render_pass);
   3597 
   3598     const VkRenderPassBeginInfo rp = {VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO,
   3599                                       nullptr,
   3600                                       m_current_render_pass,
   3601                                       m_swap_chain->GetCurrentFramebuffer(),
   3602                                       {{0, 0}, {m_swap_chain->GetWidth(), m_swap_chain->GetHeight()}},
   3603                                       1u,
   3604                                       &clear_value};
   3605     vkCmdBeginRenderPass(GetCurrentCommandBuffer(), &rp, VK_SUBPASS_CONTENTS_INLINE);
   3606   }
   3607 
   3608   m_dirty_flags |=
   3609     (m_current_render_pass_flags & (GPUPipeline::ColorFeedbackLoop | GPUPipeline::BindRenderTargetsAsImages)) ?
   3610       DIRTY_FLAG_PIPELINE_LAYOUT :
   3611       0;
   3612   s_stats.num_render_passes++;
   3613   m_num_current_render_targets = 0;
   3614   m_current_render_pass_flags = GPUPipeline::NoRenderPassFlags;
   3615   std::memset(m_current_render_targets.data(), 0, sizeof(m_current_render_targets));
   3616   m_current_depth_target = nullptr;
   3617   m_current_framebuffer = VK_NULL_HANDLE;
   3618 }
   3619 
   3620 bool VulkanDevice::InRenderPass()
   3621 {
   3622   return m_current_render_pass != VK_NULL_HANDLE;
   3623 }
   3624 
   3625 void VulkanDevice::EndRenderPass()
   3626 {
   3627   DebugAssert(m_current_render_pass != VK_NULL_HANDLE);
   3628 
   3629   // TODO: stats
   3630   VkCommandBuffer cmdbuf = GetCurrentCommandBuffer();
   3631   if (std::exchange(m_current_render_pass, VK_NULL_HANDLE) == DYNAMIC_RENDERING_RENDER_PASS)
   3632     vkCmdEndRenderingKHR(cmdbuf);
   3633   else
   3634     vkCmdEndRenderPass(GetCurrentCommandBuffer());
   3635 }
   3636 
   3637 void VulkanDevice::SetPipeline(GPUPipeline* pipeline)
   3638 {
   3639   // First draw? Bind everything.
   3640   if (m_dirty_flags & DIRTY_FLAG_INITIAL)
   3641   {
   3642     m_current_pipeline = static_cast<VulkanPipeline*>(pipeline);
   3643     if (!m_current_pipeline)
   3644       return;
   3645 
   3646     SetInitialPipelineState();
   3647     return;
   3648   }
   3649   else if (m_current_pipeline == pipeline)
   3650   {
   3651     return;
   3652   }
   3653 
   3654   m_current_pipeline = static_cast<VulkanPipeline*>(pipeline);
   3655 
   3656   vkCmdBindPipeline(m_current_command_buffer, VK_PIPELINE_BIND_POINT_GRAPHICS, m_current_pipeline->GetPipeline());
   3657 
   3658   if (m_current_pipeline_layout != m_current_pipeline->GetLayout())
   3659   {
   3660     m_current_pipeline_layout = m_current_pipeline->GetLayout();
   3661     m_dirty_flags |= DIRTY_FLAG_PIPELINE_LAYOUT;
   3662   }
   3663 }
   3664 
   3665 void VulkanDevice::UnbindPipeline(VulkanPipeline* pl)
   3666 {
   3667   if (m_current_pipeline != pl)
   3668     return;
   3669 
   3670   m_current_pipeline = nullptr;
   3671 }
   3672 
   3673 void VulkanDevice::InvalidateCachedState()
   3674 {
   3675   m_dirty_flags = ALL_DIRTY_STATE |
   3676                   ((m_current_render_pass_flags & GPUPipeline::ColorFeedbackLoop) ? DIRTY_FLAG_INPUT_ATTACHMENT : 0);
   3677   m_current_render_pass = VK_NULL_HANDLE;
   3678   m_current_pipeline = nullptr;
   3679 }
   3680 
   3681 s32 VulkanDevice::IsRenderTargetBoundIndex(const GPUTexture* tex) const
   3682 {
   3683   for (u32 i = 0; i < m_num_current_render_targets; i++)
   3684   {
   3685     if (m_current_render_targets[i] == tex)
   3686       return static_cast<s32>(i);
   3687   }
   3688 
   3689   return -1;
   3690 }
   3691 
   3692 VulkanDevice::PipelineLayoutType VulkanDevice::GetPipelineLayoutType(GPUPipeline::RenderPassFlag flags)
   3693 {
   3694   return (flags & GPUPipeline::BindRenderTargetsAsImages) ?
   3695            PipelineLayoutType::BindRenderTargetsAsImages :
   3696            ((flags & GPUPipeline::ColorFeedbackLoop) ? PipelineLayoutType::ColorFeedbackLoop :
   3697                                                        PipelineLayoutType::Normal);
   3698 }
   3699 
   3700 VkPipelineLayout VulkanDevice::GetCurrentVkPipelineLayout() const
   3701 {
   3702   return m_pipeline_layouts[static_cast<size_t>(GetPipelineLayoutType(m_current_render_pass_flags))]
   3703                            [static_cast<size_t>(m_current_pipeline_layout)];
   3704 }
   3705 
   3706 void VulkanDevice::SetInitialPipelineState()
   3707 {
   3708   DebugAssert(m_current_pipeline);
   3709   m_dirty_flags &= ~DIRTY_FLAG_INITIAL;
   3710 
   3711   const VkDeviceSize offset = 0;
   3712   const VkCommandBuffer cmdbuf = GetCurrentCommandBuffer();
   3713   vkCmdBindVertexBuffers(cmdbuf, 0, 1, m_vertex_buffer.GetBufferPtr(), &offset);
   3714   vkCmdBindIndexBuffer(cmdbuf, m_index_buffer.GetBuffer(), 0, VK_INDEX_TYPE_UINT16);
   3715 
   3716   m_current_pipeline_layout = m_current_pipeline->GetLayout();
   3717   vkCmdBindPipeline(cmdbuf, VK_PIPELINE_BIND_POINT_GRAPHICS, m_current_pipeline->GetPipeline());
   3718 
   3719   const VkViewport vp = {static_cast<float>(m_current_viewport.left),
   3720                          static_cast<float>(m_current_viewport.top),
   3721                          static_cast<float>(m_current_viewport.width()),
   3722                          static_cast<float>(m_current_viewport.height()),
   3723                          0.0f,
   3724                          1.0f};
   3725   vkCmdSetViewport(GetCurrentCommandBuffer(), 0, 1, &vp);
   3726 
   3727   const VkRect2D vrc = {{m_current_scissor.left, m_current_scissor.top},
   3728                         {static_cast<u32>(m_current_scissor.width()), static_cast<u32>(m_current_scissor.height())}};
   3729   vkCmdSetScissor(GetCurrentCommandBuffer(), 0, 1, &vrc);
   3730 }
   3731 
   3732 void VulkanDevice::SetTextureSampler(u32 slot, GPUTexture* texture, GPUSampler* sampler)
   3733 {
   3734   VulkanTexture* T = static_cast<VulkanTexture*>(texture);
   3735   const VkSampler vsampler = static_cast<VulkanSampler*>(sampler ? sampler : m_nearest_sampler.get())->GetSampler();
   3736   if (m_current_textures[slot] != T || m_current_samplers[slot] != vsampler)
   3737   {
   3738     m_current_textures[slot] = T;
   3739     m_current_samplers[slot] = vsampler;
   3740     m_dirty_flags |= DIRTY_FLAG_TEXTURES_OR_SAMPLERS;
   3741   }
   3742 
   3743   if (T)
   3744   {
   3745     T->CommitClear();
   3746     T->SetUseFenceCounter(GetCurrentFenceCounter());
   3747     if (T->GetLayout() != VulkanTexture::Layout::ShaderReadOnly)
   3748     {
   3749       if (InRenderPass())
   3750         EndRenderPass();
   3751       T->TransitionToLayout(VulkanTexture::Layout::ShaderReadOnly);
   3752     }
   3753   }
   3754 }
   3755 
   3756 void VulkanDevice::SetTextureBuffer(u32 slot, GPUTextureBuffer* buffer)
   3757 {
   3758   DebugAssert(slot == 0);
   3759   if (m_current_texture_buffer == buffer)
   3760     return;
   3761 
   3762   m_current_texture_buffer = static_cast<VulkanTextureBuffer*>(buffer);
   3763   if (m_current_pipeline_layout == GPUPipeline::Layout::SingleTextureBufferAndPushConstants)
   3764     m_dirty_flags |= DIRTY_FLAG_TEXTURES_OR_SAMPLERS;
   3765 }
   3766 
   3767 void VulkanDevice::UnbindTexture(VulkanTexture* tex)
   3768 {
   3769   for (u32 i = 0; i < MAX_TEXTURE_SAMPLERS; i++)
   3770   {
   3771     if (m_current_textures[i] == tex)
   3772     {
   3773       m_current_textures[i] = nullptr;
   3774       m_dirty_flags |= DIRTY_FLAG_TEXTURES_OR_SAMPLERS;
   3775     }
   3776   }
   3777 
   3778   if (tex->IsRenderTarget() || tex->IsRWTexture())
   3779   {
   3780     for (u32 i = 0; i < m_num_current_render_targets; i++)
   3781     {
   3782       if (m_current_render_targets[i] == tex)
   3783       {
   3784         WARNING_LOG("Unbinding current RT");
   3785         SetRenderTargets(nullptr, 0, m_current_depth_target);
   3786         break;
   3787       }
   3788     }
   3789 
   3790     m_framebuffer_manager.RemoveRTReferences(tex);
   3791   }
   3792   else if (tex->IsDepthStencil())
   3793   {
   3794     if (m_current_depth_target == tex)
   3795     {
   3796       WARNING_LOG("Unbinding current DS");
   3797       SetRenderTargets(nullptr, 0, nullptr);
   3798     }
   3799 
   3800     m_framebuffer_manager.RemoveDSReferences(tex);
   3801   }
   3802 }
   3803 
   3804 void VulkanDevice::UnbindTextureBuffer(VulkanTextureBuffer* buf)
   3805 {
   3806   if (m_current_texture_buffer != buf)
   3807     return;
   3808 
   3809   m_current_texture_buffer = nullptr;
   3810 
   3811   if (m_current_pipeline_layout == GPUPipeline::Layout::SingleTextureBufferAndPushConstants)
   3812     m_dirty_flags |= DIRTY_FLAG_TEXTURES_OR_SAMPLERS;
   3813 }
   3814 
   3815 void VulkanDevice::SetViewport(const GSVector4i rc)
   3816 {
   3817   if (m_current_viewport.eq(rc))
   3818     return;
   3819 
   3820   m_current_viewport = rc;
   3821 
   3822   if (m_dirty_flags & DIRTY_FLAG_INITIAL)
   3823     return;
   3824 
   3825   const VkViewport vp = {static_cast<float>(rc.x),
   3826                          static_cast<float>(rc.y),
   3827                          static_cast<float>(rc.width()),
   3828                          static_cast<float>(rc.height()),
   3829                          0.0f,
   3830                          1.0f};
   3831   vkCmdSetViewport(GetCurrentCommandBuffer(), 0, 1, &vp);
   3832 }
   3833 
   3834 void VulkanDevice::SetScissor(const GSVector4i rc)
   3835 {
   3836   if (m_current_scissor.eq(rc))
   3837     return;
   3838 
   3839   m_current_scissor = rc;
   3840 
   3841   if (m_dirty_flags & DIRTY_FLAG_INITIAL)
   3842     return;
   3843 
   3844   const VkRect2D vrc = {{rc.x, rc.y}, {static_cast<u32>(rc.width()), static_cast<u32>(rc.height())}};
   3845   vkCmdSetScissor(GetCurrentCommandBuffer(), 0, 1, &vrc);
   3846 }
   3847 
   3848 void VulkanDevice::PreDrawCheck()
   3849 {
   3850   if (!InRenderPass())
   3851     BeginRenderPass();
   3852 
   3853   DebugAssert(!(m_dirty_flags & DIRTY_FLAG_INITIAL));
   3854   const u32 update_mask = (m_current_render_pass_flags ? ~0u : ~DIRTY_FLAG_INPUT_ATTACHMENT);
   3855   const u32 dirty = m_dirty_flags & update_mask;
   3856   m_dirty_flags = m_dirty_flags & ~update_mask;
   3857 
   3858   if (dirty != 0)
   3859   {
   3860     if (!UpdateDescriptorSets(dirty))
   3861     {
   3862       SubmitCommandBufferAndRestartRenderPass("out of descriptor sets");
   3863       PreDrawCheck();
   3864       return;
   3865     }
   3866   }
   3867 }
   3868 
   3869 template<GPUPipeline::Layout layout>
   3870 bool VulkanDevice::UpdateDescriptorSetsForLayout(u32 dirty)
   3871 {
   3872   [[maybe_unused]] bool new_dynamic_offsets = false;
   3873 
   3874   VkPipelineLayout const vk_pipeline_layout = GetCurrentVkPipelineLayout();
   3875   std::array<VkDescriptorSet, 3> ds;
   3876   u32 first_ds = 0;
   3877   u32 num_ds = 0;
   3878 
   3879   if constexpr (layout == GPUPipeline::Layout::SingleTextureAndUBO || layout == GPUPipeline::Layout::MultiTextureAndUBO)
   3880   {
   3881     new_dynamic_offsets = ((dirty & DIRTY_FLAG_DYNAMIC_OFFSETS) != 0);
   3882 
   3883     if (dirty & (DIRTY_FLAG_PIPELINE_LAYOUT | DIRTY_FLAG_DYNAMIC_OFFSETS))
   3884     {
   3885       ds[num_ds++] = m_ubo_descriptor_set;
   3886       new_dynamic_offsets = true;
   3887     }
   3888     else
   3889     {
   3890       first_ds++;
   3891     }
   3892   }
   3893 
   3894   if constexpr (layout == GPUPipeline::Layout::SingleTextureAndUBO ||
   3895                 layout == GPUPipeline::Layout::SingleTextureAndPushConstants)
   3896   {
   3897     VulkanTexture* const tex = m_current_textures[0] ? m_current_textures[0] : m_null_texture.get();
   3898     DebugAssert(tex && m_current_samplers[0] != VK_NULL_HANDLE);
   3899     ds[num_ds++] = tex->GetDescriptorSetWithSampler(m_current_samplers[0]);
   3900   }
   3901   else if constexpr (layout == GPUPipeline::Layout::SingleTextureBufferAndPushConstants)
   3902   {
   3903     DebugAssert(m_current_texture_buffer);
   3904     ds[num_ds++] = m_current_texture_buffer->GetDescriptorSet();
   3905   }
   3906   else if constexpr (layout == GPUPipeline::Layout::MultiTextureAndUBO ||
   3907                      layout == GPUPipeline::Layout::MultiTextureAndPushConstants)
   3908   {
   3909     Vulkan::DescriptorSetUpdateBuilder dsub;
   3910 
   3911     if (m_optional_extensions.vk_khr_push_descriptor)
   3912     {
   3913       for (u32 i = 0; i < MAX_TEXTURE_SAMPLERS; i++)
   3914       {
   3915         VulkanTexture* const tex = m_current_textures[i] ? m_current_textures[i] : m_null_texture.get();
   3916         DebugAssert(tex && m_current_samplers[i] != VK_NULL_HANDLE);
   3917         dsub.AddCombinedImageSamplerDescriptorWrite(VK_NULL_HANDLE, i, tex->GetView(), m_current_samplers[i],
   3918                                                     tex->GetVkLayout());
   3919       }
   3920 
   3921       const u32 set = (layout == GPUPipeline::Layout::MultiTextureAndUBO) ? 1 : 0;
   3922       dsub.PushUpdate(GetCurrentCommandBuffer(), VK_PIPELINE_BIND_POINT_GRAPHICS, vk_pipeline_layout, set);
   3923       if (num_ds == 0)
   3924         return true;
   3925     }
   3926     else
   3927     {
   3928       VkDescriptorSet tds = AllocateDescriptorSet(m_multi_texture_ds_layout);
   3929       if (tds == VK_NULL_HANDLE)
   3930         return false;
   3931 
   3932       ds[num_ds++] = tds;
   3933 
   3934       for (u32 i = 0; i < MAX_TEXTURE_SAMPLERS; i++)
   3935       {
   3936         VulkanTexture* const tex = m_current_textures[i] ? m_current_textures[i] : m_null_texture.get();
   3937         DebugAssert(tex && m_current_samplers[i] != VK_NULL_HANDLE);
   3938         dsub.AddCombinedImageSamplerDescriptorWrite(tds, i, tex->GetView(), m_current_samplers[i], tex->GetVkLayout());
   3939       }
   3940 
   3941       dsub.Update(m_device, false);
   3942     }
   3943   }
   3944 
   3945   if (m_num_current_render_targets > 0 &&
   3946       ((dirty & DIRTY_FLAG_INPUT_ATTACHMENT) ||
   3947        (dirty & DIRTY_FLAG_PIPELINE_LAYOUT &&
   3948         (m_current_render_pass_flags & (GPUPipeline::ColorFeedbackLoop | GPUPipeline::BindRenderTargetsAsImages)))))
   3949   {
   3950     if (m_current_render_pass_flags & GPUPipeline::BindRenderTargetsAsImages)
   3951     {
   3952       VkDescriptorSet ids = AllocateDescriptorSet(m_rov_ds_layout);
   3953       if (ids == VK_NULL_HANDLE)
   3954         return false;
   3955 
   3956       ds[num_ds++] = ids;
   3957 
   3958       Vulkan::DescriptorSetUpdateBuilder dsub;
   3959       for (u32 i = 0; i < m_num_current_render_targets; i++)
   3960       {
   3961         dsub.AddStorageImageDescriptorWrite(ids, i, m_current_render_targets[i]->GetView(),
   3962                                             m_current_render_targets[i]->GetVkLayout());
   3963       }
   3964 
   3965       // Annoyingly, have to update all slots...
   3966       for (u32 i = m_num_current_render_targets; i < MAX_IMAGE_RENDER_TARGETS; i++)
   3967         dsub.AddStorageImageDescriptorWrite(ids, i, m_null_texture->GetView(), m_null_texture->GetVkLayout());
   3968 
   3969       dsub.Update(m_device, false);
   3970     }
   3971     else
   3972     {
   3973       VkDescriptorSet ids = AllocateDescriptorSet(m_feedback_loop_ds_layout);
   3974       if (ids == VK_NULL_HANDLE)
   3975         return false;
   3976 
   3977       ds[num_ds++] = ids;
   3978 
   3979       Vulkan::DescriptorSetUpdateBuilder dsub;
   3980       dsub.AddInputAttachmentDescriptorWrite(ids, 0, m_current_render_targets[0]->GetView(),
   3981                                              m_current_render_targets[0]->GetVkLayout());
   3982       dsub.Update(m_device, false);
   3983     }
   3984   }
   3985 
   3986   DebugAssert(num_ds > 0);
   3987   vkCmdBindDescriptorSets(GetCurrentCommandBuffer(), VK_PIPELINE_BIND_POINT_GRAPHICS, vk_pipeline_layout, first_ds,
   3988                           num_ds, ds.data(), static_cast<u32>(new_dynamic_offsets),
   3989                           new_dynamic_offsets ? &m_uniform_buffer_position : nullptr);
   3990 
   3991   return true;
   3992 }
   3993 
   3994 bool VulkanDevice::UpdateDescriptorSets(u32 dirty)
   3995 {
   3996   switch (m_current_pipeline_layout)
   3997   {
   3998     case GPUPipeline::Layout::SingleTextureAndUBO:
   3999       return UpdateDescriptorSetsForLayout<GPUPipeline::Layout::SingleTextureAndUBO>(dirty);
   4000 
   4001     case GPUPipeline::Layout::SingleTextureAndPushConstants:
   4002       return UpdateDescriptorSetsForLayout<GPUPipeline::Layout::SingleTextureAndPushConstants>(dirty);
   4003 
   4004     case GPUPipeline::Layout::SingleTextureBufferAndPushConstants:
   4005       return UpdateDescriptorSetsForLayout<GPUPipeline::Layout::SingleTextureBufferAndPushConstants>(dirty);
   4006 
   4007     case GPUPipeline::Layout::MultiTextureAndUBO:
   4008       return UpdateDescriptorSetsForLayout<GPUPipeline::Layout::MultiTextureAndUBO>(dirty);
   4009 
   4010     case GPUPipeline::Layout::MultiTextureAndPushConstants:
   4011       return UpdateDescriptorSetsForLayout<GPUPipeline::Layout::MultiTextureAndPushConstants>(dirty);
   4012 
   4013     default:
   4014       UnreachableCode();
   4015   }
   4016 }
   4017 
   4018 void VulkanDevice::Draw(u32 vertex_count, u32 base_vertex)
   4019 {
   4020   PreDrawCheck();
   4021   s_stats.num_draws++;
   4022   vkCmdDraw(GetCurrentCommandBuffer(), vertex_count, 1, base_vertex, 0);
   4023 }
   4024 
   4025 void VulkanDevice::DrawIndexed(u32 index_count, u32 base_index, u32 base_vertex)
   4026 {
   4027   PreDrawCheck();
   4028   s_stats.num_draws++;
   4029   vkCmdDrawIndexed(GetCurrentCommandBuffer(), index_count, 1, base_index, base_vertex, 0);
   4030 }
   4031 
   4032 VkImageMemoryBarrier VulkanDevice::GetColorBufferBarrier(const VulkanTexture* rt) const
   4033 {
   4034   const VkImageLayout vk_layout = m_optional_extensions.vk_khr_dynamic_rendering_local_read ?
   4035                                     VK_IMAGE_LAYOUT_RENDERING_LOCAL_READ_KHR :
   4036                                     VK_IMAGE_LAYOUT_GENERAL;
   4037   DebugAssert(rt->GetLayout() == VulkanTexture::Layout::FeedbackLoop);
   4038 
   4039   return {VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
   4040           nullptr,
   4041           VK_ACCESS_COLOR_ATTACHMENT_READ_BIT | VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT,
   4042           VK_ACCESS_INPUT_ATTACHMENT_READ_BIT,
   4043           vk_layout,
   4044           vk_layout,
   4045           VK_QUEUE_FAMILY_IGNORED,
   4046           VK_QUEUE_FAMILY_IGNORED,
   4047           rt->GetImage(),
   4048           {VK_IMAGE_ASPECT_COLOR_BIT, 0u, 1u, 0u, 1u}};
   4049 }
   4050 
   4051 void VulkanDevice::DrawIndexedWithBarrier(u32 index_count, u32 base_index, u32 base_vertex, DrawBarrier type)
   4052 {
   4053   PreDrawCheck();
   4054 
   4055   // TODO: The first barrier is unnecessary if we're starting the render pass.
   4056 
   4057   switch (type)
   4058   {
   4059     case GPUDevice::DrawBarrier::None:
   4060     {
   4061       s_stats.num_draws++;
   4062       vkCmdDrawIndexed(GetCurrentCommandBuffer(), index_count, 1, base_index, base_vertex, 0);
   4063     }
   4064     break;
   4065 
   4066     case GPUDevice::DrawBarrier::One:
   4067     {
   4068       DebugAssert(m_num_current_render_targets == 1);
   4069       s_stats.num_barriers++;
   4070       s_stats.num_draws++;
   4071 
   4072       const VkImageMemoryBarrier barrier =
   4073         GetColorBufferBarrier(static_cast<VulkanTexture*>(m_current_render_targets[0]));
   4074       vkCmdPipelineBarrier(m_current_command_buffer, VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT,
   4075                            VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT, VK_DEPENDENCY_BY_REGION_BIT, 0, nullptr, 0, nullptr,
   4076                            1, &barrier);
   4077       vkCmdDrawIndexed(GetCurrentCommandBuffer(), index_count, 1, base_index, base_vertex, 0);
   4078     }
   4079     break;
   4080 
   4081     case GPUDevice::DrawBarrier::Full:
   4082     {
   4083       DebugAssert(m_num_current_render_targets == 1);
   4084 
   4085       const VkImageMemoryBarrier barrier =
   4086         GetColorBufferBarrier(static_cast<VulkanTexture*>(m_current_render_targets[0]));
   4087       const u32 indices_per_primitive = m_current_pipeline->GetVerticesPerPrimitive();
   4088       const u32 end_batch = base_index + index_count;
   4089 
   4090       for (; base_index < end_batch; base_index += indices_per_primitive)
   4091       {
   4092         s_stats.num_barriers++;
   4093         s_stats.num_draws++;
   4094 
   4095         vkCmdPipelineBarrier(m_current_command_buffer, VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT,
   4096                              VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT, VK_DEPENDENCY_BY_REGION_BIT, 0, nullptr, 0, nullptr,
   4097                              1, &barrier);
   4098         vkCmdDrawIndexed(GetCurrentCommandBuffer(), indices_per_primitive, 1, base_index, base_vertex, 0);
   4099       }
   4100     }
   4101     break;
   4102 
   4103       DefaultCaseIsUnreachable();
   4104   }
   4105 }