vulkan_device.cpp (154017B)
1 // SPDX-FileCopyrightText: 2019-2024 Connor McLaughlin <stenzek@gmail.com> 2 // SPDX-License-Identifier: (GPL-3.0 OR CC-BY-NC-ND-4.0) 3 4 #include "vulkan_device.h" 5 #include "vulkan_builders.h" 6 #include "vulkan_pipeline.h" 7 #include "vulkan_stream_buffer.h" 8 #include "vulkan_swap_chain.h" 9 #include "vulkan_texture.h" 10 11 #include "core/host.h" 12 13 #include "common/align.h" 14 #include "common/assert.h" 15 #include "common/bitutils.h" 16 #include "common/error.h" 17 #include "common/file_system.h" 18 #include "common/log.h" 19 #include "common/path.h" 20 #include "common/scoped_guard.h" 21 #include "common/small_string.h" 22 23 #include "fmt/format.h" 24 #include "xxhash.h" 25 26 #include <cstdlib> 27 #include <limits> 28 #include <mutex> 29 30 Log_SetChannel(VulkanDevice); 31 32 // TODO: VK_KHR_display. 33 34 #pragma pack(push, 4) 35 struct VK_PIPELINE_CACHE_HEADER 36 { 37 u32 header_length; 38 u32 header_version; 39 u32 vendor_id; 40 u32 device_id; 41 u8 uuid[VK_UUID_SIZE]; 42 }; 43 #pragma pack(pop) 44 45 static VkAttachmentLoadOp GetLoadOpForTexture(const GPUTexture* tex) 46 { 47 static constexpr VkAttachmentLoadOp ops[3] = {VK_ATTACHMENT_LOAD_OP_LOAD, VK_ATTACHMENT_LOAD_OP_CLEAR, 48 VK_ATTACHMENT_LOAD_OP_DONT_CARE}; 49 return ops[static_cast<u8>(tex->GetState())]; 50 } 51 52 // Tweakables 53 enum : u32 54 { 55 MAX_DRAW_CALLS_PER_FRAME = 2048, 56 MAX_COMBINED_IMAGE_SAMPLER_DESCRIPTORS_PER_FRAME = GPUDevice::MAX_TEXTURE_SAMPLERS * MAX_DRAW_CALLS_PER_FRAME, 57 MAX_INPUT_ATTACHMENT_DESCRIPTORS_PER_FRAME = MAX_DRAW_CALLS_PER_FRAME, 58 MAX_DESCRIPTOR_SETS_PER_FRAME = MAX_DRAW_CALLS_PER_FRAME, 59 MAX_SAMPLER_DESCRIPTORS = 8192, 60 61 VERTEX_BUFFER_SIZE = 32 * 1024 * 1024, 62 INDEX_BUFFER_SIZE = 16 * 1024 * 1024, 63 VERTEX_UNIFORM_BUFFER_SIZE = 8 * 1024 * 1024, 64 FRAGMENT_UNIFORM_BUFFER_SIZE = 8 * 1024 * 1024, 65 TEXTURE_BUFFER_SIZE = 64 * 1024 * 1024, 66 67 UNIFORM_PUSH_CONSTANTS_STAGES = VK_SHADER_STAGE_VERTEX_BIT | VK_SHADER_STAGE_FRAGMENT_BIT, 68 UNIFORM_PUSH_CONSTANTS_SIZE = 128, 69 70 MAX_UNIFORM_BUFFER_SIZE = 1024, 71 }; 72 73 const std::array<VkFormat, static_cast<u32>(GPUTexture::Format::MaxCount)> VulkanDevice::TEXTURE_FORMAT_MAPPING = { 74 VK_FORMAT_UNDEFINED, // Unknown 75 VK_FORMAT_R8G8B8A8_UNORM, // RGBA8 76 VK_FORMAT_B8G8R8A8_UNORM, // BGRA8 77 VK_FORMAT_R5G6B5_UNORM_PACK16, // RGB565 78 VK_FORMAT_R5G5B5A1_UNORM_PACK16, // RGBA5551 79 VK_FORMAT_R8_UNORM, // R8 80 VK_FORMAT_D16_UNORM, // D16 81 VK_FORMAT_D24_UNORM_S8_UINT, // D24S8 82 VK_FORMAT_D32_SFLOAT, // D32F 83 VK_FORMAT_D32_SFLOAT_S8_UINT, // D32FS8 84 VK_FORMAT_R16_UNORM, // R16 85 VK_FORMAT_R16_SINT, // R16I 86 VK_FORMAT_R16_UINT, // R16U 87 VK_FORMAT_R16_SFLOAT, // R16F 88 VK_FORMAT_R32_SINT, // R32I 89 VK_FORMAT_R32_UINT, // R32U 90 VK_FORMAT_R32_SFLOAT, // R32F 91 VK_FORMAT_R8G8_UNORM, // RG8 92 VK_FORMAT_R16G16_UNORM, // RG16 93 VK_FORMAT_R16G16_SFLOAT, // RG16F 94 VK_FORMAT_R32G32_SFLOAT, // RG32F 95 VK_FORMAT_R16G16B16A16_UNORM, // RGBA16 96 VK_FORMAT_R16G16B16A16_SFLOAT, // RGBA16F 97 VK_FORMAT_R32G32B32A32_SFLOAT, // RGBA32F 98 VK_FORMAT_A2R10G10B10_UNORM_PACK32, // RGB10A2 99 }; 100 101 // Handles are always 64-bit, even on 32-bit platforms. 102 static const VkRenderPass DYNAMIC_RENDERING_RENDER_PASS = ((VkRenderPass) static_cast<s64>(-1LL)); 103 104 #ifdef _DEBUG 105 static u32 s_debug_scope_depth = 0; 106 #endif 107 108 // We need to synchronize instance creation because of adapter enumeration from the UI thread. 109 static std::mutex s_instance_mutex; 110 111 VulkanDevice::VulkanDevice() 112 { 113 #ifdef _DEBUG 114 s_debug_scope_depth = 0; 115 #endif 116 } 117 118 VulkanDevice::~VulkanDevice() 119 { 120 Assert(m_device == VK_NULL_HANDLE); 121 } 122 123 GPUTexture::Format VulkanDevice::GetFormatForVkFormat(VkFormat format) 124 { 125 for (u32 i = 0; i < static_cast<u32>(std::size(TEXTURE_FORMAT_MAPPING)); i++) 126 { 127 if (TEXTURE_FORMAT_MAPPING[i] == format) 128 return static_cast<GPUTexture::Format>(i); 129 } 130 131 return GPUTexture::Format::Unknown; 132 } 133 134 VkInstance VulkanDevice::CreateVulkanInstance(const WindowInfo& wi, OptionalExtensions* oe, bool enable_debug_utils, 135 bool enable_validation_layer) 136 { 137 ExtensionList enabled_extensions; 138 if (!SelectInstanceExtensions(&enabled_extensions, wi, oe, enable_debug_utils)) 139 return VK_NULL_HANDLE; 140 141 u32 maxApiVersion = VK_API_VERSION_1_0; 142 if (vkEnumerateInstanceVersion) 143 { 144 VkResult res = vkEnumerateInstanceVersion(&maxApiVersion); 145 if (res != VK_SUCCESS) 146 { 147 LOG_VULKAN_ERROR(res, "vkEnumerateInstanceVersion() failed: "); 148 maxApiVersion = VK_API_VERSION_1_0; 149 } 150 } 151 else 152 { 153 WARNING_LOG("Driver does not provide vkEnumerateInstanceVersion()."); 154 } 155 156 // Cap out at 1.1 for consistency. 157 const u32 apiVersion = std::min(maxApiVersion, VK_API_VERSION_1_1); 158 INFO_LOG("Supported instance version: {}.{}.{}, requesting version {}.{}.{}", VK_API_VERSION_MAJOR(maxApiVersion), 159 VK_API_VERSION_MINOR(maxApiVersion), VK_API_VERSION_PATCH(maxApiVersion), VK_API_VERSION_MAJOR(apiVersion), 160 VK_API_VERSION_MINOR(apiVersion), VK_API_VERSION_PATCH(apiVersion)); 161 162 // Remember to manually update this every release. We don't pull in svnrev.h here, because 163 // it's only the major/minor version, and rebuilding the file every time something else changes 164 // is unnecessary. 165 VkApplicationInfo app_info = {}; 166 app_info.sType = VK_STRUCTURE_TYPE_APPLICATION_INFO; 167 app_info.pNext = nullptr; 168 app_info.pApplicationName = "DuckStation"; 169 app_info.applicationVersion = VK_MAKE_VERSION(0, 1, 0); 170 app_info.pEngineName = "DuckStation"; 171 app_info.engineVersion = VK_MAKE_VERSION(0, 1, 0); 172 app_info.apiVersion = apiVersion; 173 174 VkInstanceCreateInfo instance_create_info = {}; 175 instance_create_info.sType = VK_STRUCTURE_TYPE_INSTANCE_CREATE_INFO; 176 instance_create_info.pNext = nullptr; 177 instance_create_info.flags = 0; 178 instance_create_info.pApplicationInfo = &app_info; 179 instance_create_info.enabledExtensionCount = static_cast<uint32_t>(enabled_extensions.size()); 180 instance_create_info.ppEnabledExtensionNames = enabled_extensions.data(); 181 instance_create_info.enabledLayerCount = 0; 182 instance_create_info.ppEnabledLayerNames = nullptr; 183 184 // Enable debug layer on debug builds 185 if (enable_validation_layer) 186 { 187 static const char* layer_names[] = {"VK_LAYER_KHRONOS_validation"}; 188 instance_create_info.enabledLayerCount = 1; 189 instance_create_info.ppEnabledLayerNames = layer_names; 190 } 191 192 VkInstance instance; 193 VkResult res = vkCreateInstance(&instance_create_info, nullptr, &instance); 194 if (res != VK_SUCCESS) 195 { 196 LOG_VULKAN_ERROR(res, "vkCreateInstance failed: "); 197 return nullptr; 198 } 199 200 return instance; 201 } 202 203 bool VulkanDevice::SelectInstanceExtensions(ExtensionList* extension_list, const WindowInfo& wi, OptionalExtensions* oe, 204 bool enable_debug_utils) 205 { 206 u32 extension_count = 0; 207 VkResult res = vkEnumerateInstanceExtensionProperties(nullptr, &extension_count, nullptr); 208 if (res != VK_SUCCESS) 209 { 210 LOG_VULKAN_ERROR(res, "vkEnumerateInstanceExtensionProperties failed: "); 211 return false; 212 } 213 214 if (extension_count == 0) 215 { 216 ERROR_LOG("Vulkan: No extensions supported by instance."); 217 return false; 218 } 219 220 std::vector<VkExtensionProperties> available_extension_list(extension_count); 221 res = vkEnumerateInstanceExtensionProperties(nullptr, &extension_count, available_extension_list.data()); 222 DebugAssert(res == VK_SUCCESS); 223 224 auto SupportsExtension = [&](const char* name, bool required) { 225 if (std::find_if(available_extension_list.begin(), available_extension_list.end(), 226 [&](const VkExtensionProperties& properties) { 227 return !strcmp(name, properties.extensionName); 228 }) != available_extension_list.end()) 229 { 230 DEV_LOG("Enabling extension: {}", name); 231 extension_list->push_back(name); 232 return true; 233 } 234 235 if (required) 236 ERROR_LOG("Vulkan: Missing required extension {}.", name); 237 238 return false; 239 }; 240 241 // Common extensions 242 if (wi.type != WindowInfo::Type::Surfaceless && !SupportsExtension(VK_KHR_SURFACE_EXTENSION_NAME, true)) 243 return false; 244 245 #if defined(VK_USE_PLATFORM_WIN32_KHR) 246 if (wi.type == WindowInfo::Type::Win32 && !SupportsExtension(VK_KHR_WIN32_SURFACE_EXTENSION_NAME, true)) 247 return false; 248 #endif 249 #if defined(VK_USE_PLATFORM_XLIB_KHR) 250 if (wi.type == WindowInfo::Type::X11 && !SupportsExtension(VK_KHR_XLIB_SURFACE_EXTENSION_NAME, true)) 251 return false; 252 #endif 253 #if defined(VK_USE_PLATFORM_WAYLAND_KHR) 254 if (wi.type == WindowInfo::Type::Wayland && !SupportsExtension(VK_KHR_WAYLAND_SURFACE_EXTENSION_NAME, true)) 255 return false; 256 #endif 257 #if defined(VK_USE_PLATFORM_METAL_EXT) 258 if (wi.type == WindowInfo::Type::MacOS && !SupportsExtension(VK_EXT_METAL_SURFACE_EXTENSION_NAME, true)) 259 return false; 260 #endif 261 #if defined(VK_USE_PLATFORM_ANDROID_KHR) 262 if (wi.type == WindowInfo::Type::Android && !SupportsExtension(VK_KHR_ANDROID_SURFACE_EXTENSION_NAME, true)) 263 return false; 264 #endif 265 266 // VK_EXT_debug_utils 267 if (enable_debug_utils && !SupportsExtension(VK_EXT_DEBUG_UTILS_EXTENSION_NAME, false)) 268 WARNING_LOG("Vulkan: Debug report requested, but extension is not available."); 269 270 // Needed for exclusive fullscreen control. 271 SupportsExtension(VK_KHR_GET_SURFACE_CAPABILITIES_2_EXTENSION_NAME, false); 272 273 oe->vk_ext_swapchain_maintenance1 = 274 (wi.type != WindowInfo::Type::Surfaceless && SupportsExtension(VK_EXT_SURFACE_MAINTENANCE_1_EXTENSION_NAME, false)); 275 oe->vk_khr_get_physical_device_properties2 = 276 SupportsExtension(VK_KHR_GET_PHYSICAL_DEVICE_PROPERTIES_2_EXTENSION_NAME, false); 277 278 return true; 279 } 280 281 VulkanDevice::GPUList VulkanDevice::EnumerateGPUs(VkInstance instance) 282 { 283 GPUList gpus; 284 285 u32 gpu_count = 0; 286 VkResult res = vkEnumeratePhysicalDevices(instance, &gpu_count, nullptr); 287 if ((res != VK_SUCCESS && res != VK_INCOMPLETE) || gpu_count == 0) 288 { 289 LOG_VULKAN_ERROR(res, "vkEnumeratePhysicalDevices (1) failed: "); 290 return gpus; 291 } 292 293 std::vector<VkPhysicalDevice> physical_devices(gpu_count); 294 res = vkEnumeratePhysicalDevices(instance, &gpu_count, physical_devices.data()); 295 if (res == VK_INCOMPLETE) 296 { 297 WARNING_LOG("First vkEnumeratePhysicalDevices() call returned {} devices, but second returned {}", 298 physical_devices.size(), gpu_count); 299 } 300 else if (res != VK_SUCCESS) 301 { 302 LOG_VULKAN_ERROR(res, "vkEnumeratePhysicalDevices (2) failed: "); 303 return gpus; 304 } 305 306 // Maybe we lost a GPU? 307 if (gpu_count < physical_devices.size()) 308 physical_devices.resize(gpu_count); 309 310 gpus.reserve(physical_devices.size()); 311 for (VkPhysicalDevice device : physical_devices) 312 { 313 VkPhysicalDeviceProperties props = {}; 314 vkGetPhysicalDeviceProperties(device, &props); 315 316 VkPhysicalDeviceFeatures available_features = {}; 317 vkGetPhysicalDeviceFeatures(device, &available_features); 318 319 AdapterInfo ai; 320 ai.name = props.deviceName; 321 ai.max_texture_size = std::min(props.limits.maxFramebufferWidth, props.limits.maxImageDimension2D); 322 ai.max_multisamples = GetMaxMultisamples(device, props); 323 ai.supports_sample_shading = available_features.sampleRateShading; 324 325 // handle duplicate adapter names 326 if (std::any_of(gpus.begin(), gpus.end(), [&ai](const auto& other) { return (ai.name == other.second.name); })) 327 { 328 std::string original_adapter_name = std::move(ai.name); 329 330 u32 current_extra = 2; 331 do 332 { 333 ai.name = fmt::format("{} ({})", original_adapter_name, current_extra); 334 current_extra++; 335 } while ( 336 std::any_of(gpus.begin(), gpus.end(), [&ai](const auto& other) { return (ai.name == other.second.name); })); 337 } 338 339 gpus.emplace_back(device, std::move(ai)); 340 } 341 342 return gpus; 343 } 344 345 VulkanDevice::GPUList VulkanDevice::EnumerateGPUs() 346 { 347 GPUList ret; 348 std::unique_lock lock(s_instance_mutex); 349 350 // Device shouldn't be torn down since we have the lock. 351 if (g_gpu_device && g_gpu_device->GetRenderAPI() == RenderAPI::Vulkan && Vulkan::IsVulkanLibraryLoaded()) 352 { 353 ret = EnumerateGPUs(VulkanDevice::GetInstance().m_instance); 354 } 355 else 356 { 357 if (Vulkan::LoadVulkanLibrary(nullptr)) 358 { 359 OptionalExtensions oe = {}; 360 const VkInstance instance = CreateVulkanInstance(WindowInfo(), &oe, false, false); 361 if (instance != VK_NULL_HANDLE) 362 { 363 if (Vulkan::LoadVulkanInstanceFunctions(instance)) 364 ret = EnumerateGPUs(instance); 365 366 vkDestroyInstance(instance, nullptr); 367 } 368 369 Vulkan::UnloadVulkanLibrary(); 370 } 371 } 372 373 return ret; 374 } 375 376 GPUDevice::AdapterInfoList VulkanDevice::GetAdapterList() 377 { 378 AdapterInfoList ret; 379 GPUList gpus = EnumerateGPUs(); 380 ret.reserve(gpus.size()); 381 for (auto& [physical_device, adapter_info] : gpus) 382 ret.push_back(std::move(adapter_info)); 383 return ret; 384 } 385 386 bool VulkanDevice::SelectDeviceExtensions(ExtensionList* extension_list, bool enable_surface, Error* error) 387 { 388 u32 extension_count = 0; 389 VkResult res = vkEnumerateDeviceExtensionProperties(m_physical_device, nullptr, &extension_count, nullptr); 390 if (res != VK_SUCCESS) 391 { 392 LOG_VULKAN_ERROR(res, "vkEnumerateDeviceExtensionProperties failed: "); 393 Vulkan::SetErrorObject(error, "vkEnumerateDeviceExtensionProperties failed: ", res); 394 return false; 395 } 396 397 if (extension_count == 0) 398 { 399 ERROR_LOG("No extensions supported by device."); 400 Error::SetStringView(error, "No extensions supported by device."); 401 return false; 402 } 403 404 std::vector<VkExtensionProperties> available_extension_list(extension_count); 405 res = 406 vkEnumerateDeviceExtensionProperties(m_physical_device, nullptr, &extension_count, available_extension_list.data()); 407 DebugAssert(res == VK_SUCCESS); 408 409 auto SupportsExtension = [&](const char* name, bool required) { 410 if (std::find_if(available_extension_list.begin(), available_extension_list.end(), 411 [&](const VkExtensionProperties& properties) { 412 return !strcmp(name, properties.extensionName); 413 }) != available_extension_list.end()) 414 { 415 if (std::none_of(extension_list->begin(), extension_list->end(), 416 [&](const char* existing_name) { return (std::strcmp(existing_name, name) == 0); })) 417 { 418 DEV_LOG("Enabling extension: {}", name); 419 extension_list->push_back(name); 420 } 421 422 return true; 423 } 424 425 if (required) 426 { 427 ERROR_LOG("Vulkan: Missing required extension {}.", name); 428 Error::SetStringFmt(error, "Missing required extension {}.", name); 429 } 430 431 return false; 432 }; 433 434 if (enable_surface && !SupportsExtension(VK_KHR_SWAPCHAIN_EXTENSION_NAME, true)) 435 return false; 436 437 m_optional_extensions.vk_ext_memory_budget = SupportsExtension(VK_EXT_MEMORY_BUDGET_EXTENSION_NAME, false); 438 m_optional_extensions.vk_ext_rasterization_order_attachment_access = 439 SupportsExtension(VK_EXT_RASTERIZATION_ORDER_ATTACHMENT_ACCESS_EXTENSION_NAME, false) || 440 SupportsExtension(VK_ARM_RASTERIZATION_ORDER_ATTACHMENT_ACCESS_EXTENSION_NAME, false); 441 m_optional_extensions.vk_khr_get_memory_requirements2 = 442 SupportsExtension(VK_KHR_GET_MEMORY_REQUIREMENTS_2_EXTENSION_NAME, false); 443 m_optional_extensions.vk_khr_bind_memory2 = SupportsExtension(VK_KHR_BIND_MEMORY_2_EXTENSION_NAME, false); 444 m_optional_extensions.vk_khr_dedicated_allocation = 445 SupportsExtension(VK_KHR_DEDICATED_ALLOCATION_EXTENSION_NAME, false); 446 m_optional_extensions.vk_khr_driver_properties = SupportsExtension(VK_KHR_DRIVER_PROPERTIES_EXTENSION_NAME, false); 447 m_optional_extensions.vk_khr_dynamic_rendering = 448 SupportsExtension(VK_KHR_DEPTH_STENCIL_RESOLVE_EXTENSION_NAME, false) && 449 SupportsExtension(VK_KHR_CREATE_RENDERPASS_2_EXTENSION_NAME, false) && 450 SupportsExtension(VK_KHR_DYNAMIC_RENDERING_EXTENSION_NAME, false); 451 m_optional_extensions.vk_khr_dynamic_rendering_local_read = 452 m_optional_extensions.vk_khr_dynamic_rendering && 453 SupportsExtension(VK_KHR_DYNAMIC_RENDERING_LOCAL_READ_EXTENSION_NAME, false); 454 m_optional_extensions.vk_khr_push_descriptor = SupportsExtension(VK_KHR_PUSH_DESCRIPTOR_EXTENSION_NAME, false); 455 456 // glslang generates debug info instructions before phi nodes at the beginning of blocks when non-semantic debug info 457 // is enabled, triggering errors by spirv-val. Gate it by an environment variable if you want source debugging until 458 // this is fixed. 459 if (const char* val = std::getenv("USE_NON_SEMANTIC_DEBUG_INFO"); 460 val && StringUtil::FromChars<bool>(val).value_or(false)) 461 { 462 m_optional_extensions.vk_khr_shader_non_semantic_info = 463 SupportsExtension(VK_KHR_SHADER_NON_SEMANTIC_INFO_EXTENSION_NAME, false); 464 } 465 466 m_optional_extensions.vk_ext_external_memory_host = 467 SupportsExtension(VK_EXT_EXTERNAL_MEMORY_HOST_EXTENSION_NAME, false); 468 m_optional_extensions.vk_ext_swapchain_maintenance1 = 469 m_optional_extensions.vk_ext_swapchain_maintenance1 && 470 SupportsExtension(VK_EXT_SWAPCHAIN_MAINTENANCE_1_EXTENSION_NAME, false); 471 472 // Dynamic rendering isn't strictly needed for FSI, but we want it with framebufferless rendering. 473 m_optional_extensions.vk_ext_fragment_shader_interlock = 474 m_optional_extensions.vk_khr_dynamic_rendering && 475 SupportsExtension(VK_EXT_FRAGMENT_SHADER_INTERLOCK_EXTENSION_NAME, false); 476 477 #ifdef _WIN32 478 m_optional_extensions.vk_ext_full_screen_exclusive = 479 enable_surface && SupportsExtension(VK_EXT_FULL_SCREEN_EXCLUSIVE_EXTENSION_NAME, false); 480 INFO_LOG("VK_EXT_full_screen_exclusive is {}", 481 m_optional_extensions.vk_ext_full_screen_exclusive ? "supported" : "NOT supported"); 482 #endif 483 484 if (IsBrokenMobileDriver()) 485 { 486 // Push descriptor is broken on Adreno v502.. don't want to think about dynamic rendending. 487 if (m_optional_extensions.vk_khr_dynamic_rendering) 488 { 489 m_optional_extensions.vk_khr_dynamic_rendering = false; 490 m_optional_extensions.vk_khr_dynamic_rendering_local_read = false; 491 m_optional_extensions.vk_ext_fragment_shader_interlock = false; 492 WARNING_LOG("Disabling VK_KHR_dynamic_rendering on broken mobile driver."); 493 } 494 if (m_optional_extensions.vk_khr_push_descriptor) 495 { 496 m_optional_extensions.vk_khr_push_descriptor = false; 497 WARNING_LOG("Disabling VK_KHR_push_descriptor on broken mobile driver."); 498 } 499 } 500 else if (IsDeviceAMD()) 501 { 502 // VK_KHR_dynamic_rendering_local_read appears to be broken on RDNA3, like everything else... 503 // Just causes GPU resets when you actually use a feedback loop. Assume Mesa is fine. 504 #if defined(_WIN32) || defined(__ANDROID__) 505 m_optional_extensions.vk_khr_dynamic_rendering_local_read = false; 506 WARNING_LOG("Disabling VK_KHR_dynamic_rendering_local_read on broken AMD driver."); 507 #endif 508 } 509 510 // Don't bother checking for maintenance 4/5 if we don't have 1-3, i.e. Vulkan 1.1. 511 if (m_device_properties.apiVersion >= VK_API_VERSION_1_1) 512 { 513 m_optional_extensions.vk_khr_maintenance4 = SupportsExtension(VK_KHR_MAINTENANCE_4_EXTENSION_NAME, false); 514 m_optional_extensions.vk_khr_maintenance5 = 515 m_optional_extensions.vk_khr_maintenance4 && SupportsExtension(VK_KHR_MAINTENANCE_4_EXTENSION_NAME, false); 516 } 517 518 return true; 519 } 520 521 bool VulkanDevice::CreateDevice(VkSurfaceKHR surface, bool enable_validation_layer, FeatureMask disabled_features, 522 Error* error) 523 { 524 u32 queue_family_count; 525 vkGetPhysicalDeviceQueueFamilyProperties(m_physical_device, &queue_family_count, nullptr); 526 if (queue_family_count == 0) 527 { 528 ERROR_LOG("No queue families found on specified vulkan physical device."); 529 Error::SetStringView(error, "No queue families found on specified vulkan physical device."); 530 return false; 531 } 532 533 std::vector<VkQueueFamilyProperties> queue_family_properties(queue_family_count); 534 vkGetPhysicalDeviceQueueFamilyProperties(m_physical_device, &queue_family_count, queue_family_properties.data()); 535 DEV_LOG("{} vulkan queue families", queue_family_count); 536 537 // Find graphics and present queues. 538 m_graphics_queue_family_index = queue_family_count; 539 m_present_queue_family_index = queue_family_count; 540 for (uint32_t i = 0; i < queue_family_count; i++) 541 { 542 VkBool32 graphics_supported = queue_family_properties[i].queueFlags & VK_QUEUE_GRAPHICS_BIT; 543 if (graphics_supported) 544 { 545 m_graphics_queue_family_index = i; 546 // Quit now, no need for a present queue. 547 if (!surface) 548 { 549 break; 550 } 551 } 552 553 if (surface) 554 { 555 VkBool32 present_supported; 556 VkResult res = vkGetPhysicalDeviceSurfaceSupportKHR(m_physical_device, i, surface, &present_supported); 557 if (res != VK_SUCCESS) 558 { 559 LOG_VULKAN_ERROR(res, "vkGetPhysicalDeviceSurfaceSupportKHR failed: "); 560 Vulkan::SetErrorObject(error, "vkGetPhysicalDeviceSurfaceSupportKHR failed: ", res); 561 return false; 562 } 563 564 if (present_supported) 565 { 566 m_present_queue_family_index = i; 567 } 568 569 // Prefer one queue family index that does both graphics and present. 570 if (graphics_supported && present_supported) 571 { 572 break; 573 } 574 } 575 } 576 if (m_graphics_queue_family_index == queue_family_count) 577 { 578 ERROR_LOG("Vulkan: Failed to find an acceptable graphics queue."); 579 Error::SetStringView(error, "Vulkan: Failed to find an acceptable graphics queue."); 580 return false; 581 } 582 if (surface != VK_NULL_HANDLE && m_present_queue_family_index == queue_family_count) 583 { 584 ERROR_LOG("Vulkan: Failed to find an acceptable present queue."); 585 Error::SetStringView(error, "Vulkan: Failed to find an acceptable present queue."); 586 return false; 587 } 588 589 VkDeviceCreateInfo device_info = {}; 590 device_info.sType = VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO; 591 device_info.pNext = nullptr; 592 device_info.flags = 0; 593 device_info.queueCreateInfoCount = 0; 594 595 static constexpr float queue_priorities[] = {1.0f}; 596 std::array<VkDeviceQueueCreateInfo, 2> queue_infos; 597 VkDeviceQueueCreateInfo& graphics_queue_info = queue_infos[device_info.queueCreateInfoCount++]; 598 graphics_queue_info.sType = VK_STRUCTURE_TYPE_DEVICE_QUEUE_CREATE_INFO; 599 graphics_queue_info.pNext = nullptr; 600 graphics_queue_info.flags = 0; 601 graphics_queue_info.queueFamilyIndex = m_graphics_queue_family_index; 602 graphics_queue_info.queueCount = 1; 603 graphics_queue_info.pQueuePriorities = queue_priorities; 604 605 if (surface != VK_NULL_HANDLE && m_graphics_queue_family_index != m_present_queue_family_index) 606 { 607 VkDeviceQueueCreateInfo& present_queue_info = queue_infos[device_info.queueCreateInfoCount++]; 608 present_queue_info.sType = VK_STRUCTURE_TYPE_DEVICE_QUEUE_CREATE_INFO; 609 present_queue_info.pNext = nullptr; 610 present_queue_info.flags = 0; 611 present_queue_info.queueFamilyIndex = m_present_queue_family_index; 612 present_queue_info.queueCount = 1; 613 present_queue_info.pQueuePriorities = queue_priorities; 614 } 615 616 device_info.pQueueCreateInfos = queue_infos.data(); 617 618 ExtensionList enabled_extensions; 619 if (!SelectDeviceExtensions(&enabled_extensions, surface != VK_NULL_HANDLE, error)) 620 return false; 621 622 device_info.enabledExtensionCount = static_cast<uint32_t>(enabled_extensions.size()); 623 device_info.ppEnabledExtensionNames = enabled_extensions.data(); 624 625 // Check for required features before creating. 626 VkPhysicalDeviceFeatures available_features; 627 vkGetPhysicalDeviceFeatures(m_physical_device, &available_features); 628 629 // Enable the features we use. 630 VkPhysicalDeviceFeatures enabled_features = {}; 631 enabled_features.dualSrcBlend = available_features.dualSrcBlend; 632 enabled_features.largePoints = available_features.largePoints; 633 enabled_features.wideLines = available_features.wideLines; 634 enabled_features.samplerAnisotropy = available_features.samplerAnisotropy; 635 enabled_features.sampleRateShading = available_features.sampleRateShading; 636 enabled_features.geometryShader = available_features.geometryShader; 637 enabled_features.fragmentStoresAndAtomics = available_features.fragmentStoresAndAtomics; 638 device_info.pEnabledFeatures = &enabled_features; 639 640 // Enable debug layer on debug builds 641 if (enable_validation_layer) 642 { 643 static const char* layer_names[] = {"VK_LAYER_LUNARG_standard_validation"}; 644 device_info.enabledLayerCount = 1; 645 device_info.ppEnabledLayerNames = layer_names; 646 } 647 648 VkPhysicalDeviceRasterizationOrderAttachmentAccessFeaturesEXT rasterization_order_access_feature = { 649 VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_RASTERIZATION_ORDER_ATTACHMENT_ACCESS_FEATURES_EXT, nullptr, VK_TRUE, VK_FALSE, 650 VK_FALSE}; 651 VkPhysicalDeviceDynamicRenderingFeatures dynamic_rendering_feature = { 652 VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DYNAMIC_RENDERING_FEATURES, nullptr, VK_TRUE}; 653 VkPhysicalDeviceDynamicRenderingLocalReadFeaturesKHR dynamic_rendering_local_read_feature = { 654 VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DYNAMIC_RENDERING_LOCAL_READ_FEATURES_KHR, nullptr, VK_TRUE}; 655 VkPhysicalDeviceSwapchainMaintenance1FeaturesEXT swapchain_maintenance1_feature = { 656 VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SWAPCHAIN_MAINTENANCE_1_FEATURES_EXT, nullptr, VK_TRUE}; 657 VkPhysicalDeviceFragmentShaderInterlockFeaturesEXT fragment_shader_interlock_feature = { 658 VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FRAGMENT_SHADER_INTERLOCK_FEATURES_EXT, nullptr, VK_FALSE, VK_TRUE, VK_FALSE}; 659 660 if (m_optional_extensions.vk_ext_rasterization_order_attachment_access) 661 Vulkan::AddPointerToChain(&device_info, &rasterization_order_access_feature); 662 if (m_optional_extensions.vk_ext_swapchain_maintenance1) 663 Vulkan::AddPointerToChain(&device_info, &swapchain_maintenance1_feature); 664 if (m_optional_extensions.vk_khr_dynamic_rendering) 665 { 666 Vulkan::AddPointerToChain(&device_info, &dynamic_rendering_feature); 667 if (m_optional_extensions.vk_khr_dynamic_rendering_local_read) 668 Vulkan::AddPointerToChain(&device_info, &dynamic_rendering_local_read_feature); 669 if (m_optional_extensions.vk_ext_fragment_shader_interlock) 670 Vulkan::AddPointerToChain(&device_info, &fragment_shader_interlock_feature); 671 } 672 673 VkResult res = vkCreateDevice(m_physical_device, &device_info, nullptr, &m_device); 674 if (res != VK_SUCCESS) 675 { 676 LOG_VULKAN_ERROR(res, "vkCreateDevice failed: "); 677 Vulkan::SetErrorObject(error, "vkCreateDevice failed: ", res); 678 return false; 679 } 680 681 // With the device created, we can fill the remaining entry points. 682 if (!Vulkan::LoadVulkanDeviceFunctions(m_device)) 683 return false; 684 685 // Grab the graphics and present queues. 686 vkGetDeviceQueue(m_device, m_graphics_queue_family_index, 0, &m_graphics_queue); 687 if (surface) 688 vkGetDeviceQueue(m_device, m_present_queue_family_index, 0, &m_present_queue); 689 690 m_features.gpu_timing = (m_device_properties.limits.timestampComputeAndGraphics != 0 && 691 queue_family_properties[m_graphics_queue_family_index].timestampValidBits > 0 && 692 m_device_properties.limits.timestampPeriod > 0); 693 DEV_LOG("GPU timing is {} (TS={} TS valid bits={}, TS period={})", 694 m_features.gpu_timing ? "supported" : "not supported", 695 static_cast<u32>(m_device_properties.limits.timestampComputeAndGraphics), 696 queue_family_properties[m_graphics_queue_family_index].timestampValidBits, 697 m_device_properties.limits.timestampPeriod); 698 699 ProcessDeviceExtensions(); 700 SetFeatures(disabled_features, enabled_features); 701 return true; 702 } 703 704 void VulkanDevice::ProcessDeviceExtensions() 705 { 706 // advanced feature checks 707 VkPhysicalDeviceFeatures2 features2 = {VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2, nullptr, {}}; 708 VkPhysicalDeviceRasterizationOrderAttachmentAccessFeaturesEXT rasterization_order_access_feature = { 709 VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_RASTERIZATION_ORDER_ATTACHMENT_ACCESS_FEATURES_EXT, nullptr, VK_FALSE, VK_FALSE, 710 VK_FALSE}; 711 VkPhysicalDeviceDynamicRenderingFeatures dynamic_rendering_feature = { 712 VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DYNAMIC_RENDERING_FEATURES, nullptr, VK_FALSE}; 713 VkPhysicalDeviceDynamicRenderingLocalReadFeaturesKHR dynamic_rendering_local_read_feature = { 714 VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DYNAMIC_RENDERING_LOCAL_READ_FEATURES_KHR, nullptr, VK_FALSE}; 715 VkPhysicalDeviceSwapchainMaintenance1FeaturesEXT swapchain_maintenance1_feature = { 716 VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SWAPCHAIN_MAINTENANCE_1_FEATURES_EXT, nullptr, VK_FALSE}; 717 VkPhysicalDeviceFragmentShaderInterlockFeaturesEXT fragment_shader_interlock_feature = { 718 VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FRAGMENT_SHADER_INTERLOCK_FEATURES_EXT, nullptr, VK_FALSE, VK_FALSE, VK_FALSE}; 719 VkPhysicalDeviceMaintenance4Features maintenance4_features = { 720 VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MAINTENANCE_4_FEATURES, nullptr, VK_FALSE}; 721 722 // add in optional feature structs 723 if (m_optional_extensions.vk_ext_rasterization_order_attachment_access) 724 Vulkan::AddPointerToChain(&features2, &rasterization_order_access_feature); 725 if (m_optional_extensions.vk_ext_swapchain_maintenance1) 726 Vulkan::AddPointerToChain(&features2, &swapchain_maintenance1_feature); 727 if (m_optional_extensions.vk_khr_dynamic_rendering) 728 { 729 Vulkan::AddPointerToChain(&features2, &dynamic_rendering_feature); 730 if (m_optional_extensions.vk_khr_dynamic_rendering_local_read) 731 Vulkan::AddPointerToChain(&features2, &dynamic_rendering_local_read_feature); 732 if (m_optional_extensions.vk_ext_fragment_shader_interlock) 733 Vulkan::AddPointerToChain(&features2, &fragment_shader_interlock_feature); 734 } 735 if (m_optional_extensions.vk_khr_maintenance5) 736 Vulkan::AddPointerToChain(&features2, &maintenance4_features); 737 738 // we might not have VK_KHR_get_physical_device_properties2... 739 if (!vkGetPhysicalDeviceFeatures2 || !vkGetPhysicalDeviceProperties2 || !vkGetPhysicalDeviceMemoryProperties2) 740 { 741 if (!vkGetPhysicalDeviceFeatures2KHR || !vkGetPhysicalDeviceProperties2KHR || 742 !vkGetPhysicalDeviceMemoryProperties2KHR) 743 { 744 ERROR_LOG("One or more functions from VK_KHR_get_physical_device_properties2 is missing, disabling extension."); 745 m_optional_extensions.vk_khr_get_physical_device_properties2 = false; 746 vkGetPhysicalDeviceFeatures2 = nullptr; 747 vkGetPhysicalDeviceProperties2 = nullptr; 748 vkGetPhysicalDeviceMemoryProperties2 = nullptr; 749 } 750 else 751 { 752 vkGetPhysicalDeviceFeatures2 = vkGetPhysicalDeviceFeatures2KHR; 753 vkGetPhysicalDeviceProperties2 = vkGetPhysicalDeviceProperties2KHR; 754 vkGetPhysicalDeviceMemoryProperties2 = vkGetPhysicalDeviceMemoryProperties2KHR; 755 } 756 } 757 758 // don't bother querying if we're not actually looking at any features 759 if (vkGetPhysicalDeviceFeatures2 && features2.pNext) 760 vkGetPhysicalDeviceFeatures2(m_physical_device, &features2); 761 762 // confirm we actually support it 763 m_optional_extensions.vk_ext_rasterization_order_attachment_access &= 764 (rasterization_order_access_feature.rasterizationOrderColorAttachmentAccess == VK_TRUE); 765 m_optional_extensions.vk_ext_swapchain_maintenance1 &= 766 (swapchain_maintenance1_feature.swapchainMaintenance1 == VK_TRUE); 767 m_optional_extensions.vk_khr_dynamic_rendering &= (dynamic_rendering_feature.dynamicRendering == VK_TRUE); 768 m_optional_extensions.vk_khr_dynamic_rendering_local_read &= 769 (dynamic_rendering_local_read_feature.dynamicRenderingLocalRead == VK_TRUE); 770 m_optional_extensions.vk_ext_fragment_shader_interlock &= 771 (m_optional_extensions.vk_khr_dynamic_rendering && 772 fragment_shader_interlock_feature.fragmentShaderPixelInterlock == VK_TRUE); 773 m_optional_extensions.vk_khr_maintenance4 &= (maintenance4_features.maintenance4 == VK_TRUE); 774 m_optional_extensions.vk_khr_maintenance5 &= m_optional_extensions.vk_khr_maintenance4; 775 776 VkPhysicalDeviceProperties2 properties2 = {VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2, nullptr, {}}; 777 VkPhysicalDevicePushDescriptorPropertiesKHR push_descriptor_properties = { 778 VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PUSH_DESCRIPTOR_PROPERTIES_KHR, nullptr, 0u}; 779 VkPhysicalDeviceExternalMemoryHostPropertiesEXT external_memory_host_properties = { 780 VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_EXTERNAL_MEMORY_HOST_PROPERTIES_EXT, nullptr, 0}; 781 782 if (m_optional_extensions.vk_khr_driver_properties) 783 { 784 m_device_driver_properties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DRIVER_PROPERTIES; 785 Vulkan::AddPointerToChain(&properties2, &m_device_driver_properties); 786 } 787 if (m_optional_extensions.vk_khr_push_descriptor) 788 Vulkan::AddPointerToChain(&properties2, &push_descriptor_properties); 789 790 if (m_optional_extensions.vk_ext_external_memory_host) 791 Vulkan::AddPointerToChain(&properties2, &external_memory_host_properties); 792 793 // don't bother querying if we're not actually looking at any features 794 if (vkGetPhysicalDeviceProperties2 && properties2.pNext) 795 vkGetPhysicalDeviceProperties2(m_physical_device, &properties2); 796 797 m_optional_extensions.vk_khr_push_descriptor &= (push_descriptor_properties.maxPushDescriptors >= 1); 798 799 // vk_ext_external_memory_host is only used if the import alignment is the same as the system's page size 800 m_optional_extensions.vk_ext_external_memory_host &= 801 (external_memory_host_properties.minImportedHostPointerAlignment == HOST_PAGE_SIZE); 802 803 #define LOG_EXT(name, field) INFO_LOG(name " is {}", m_optional_extensions.field ? "supported" : "NOT supported") 804 805 LOG_EXT("VK_EXT_external_memory_host", vk_ext_external_memory_host); 806 LOG_EXT("VK_EXT_memory_budget", vk_ext_memory_budget); 807 LOG_EXT("VK_EXT_fragment_shader_interlock", vk_ext_fragment_shader_interlock); 808 LOG_EXT("VK_EXT_rasterization_order_attachment_access", vk_ext_rasterization_order_attachment_access); 809 LOG_EXT("VK_EXT_swapchain_maintenance1", vk_ext_swapchain_maintenance1); 810 LOG_EXT("VK_KHR_get_memory_requirements2", vk_khr_get_memory_requirements2); 811 LOG_EXT("VK_KHR_bind_memory2", vk_khr_bind_memory2); 812 LOG_EXT("VK_KHR_get_physical_device_properties2", vk_khr_get_physical_device_properties2); 813 LOG_EXT("VK_KHR_dedicated_allocation", vk_khr_dedicated_allocation); 814 LOG_EXT("VK_KHR_dynamic_rendering", vk_khr_dynamic_rendering); 815 LOG_EXT("VK_KHR_dynamic_rendering_local_read", vk_khr_dynamic_rendering_local_read); 816 LOG_EXT("VK_KHR_maintenance4", vk_khr_maintenance4); 817 LOG_EXT("VK_KHR_maintenance5", vk_khr_maintenance5); 818 LOG_EXT("VK_KHR_push_descriptor", vk_khr_push_descriptor); 819 820 #undef LOG_EXT 821 } 822 823 bool VulkanDevice::CreateAllocator() 824 { 825 const u32 apiVersion = std::min(m_device_properties.apiVersion, VK_API_VERSION_1_1); 826 INFO_LOG("Supported device API version: {}.{}.{}, using version {}.{}.{} for allocator.", 827 VK_API_VERSION_MAJOR(m_device_properties.apiVersion), VK_API_VERSION_MINOR(m_device_properties.apiVersion), 828 VK_API_VERSION_PATCH(m_device_properties.apiVersion), VK_API_VERSION_MAJOR(apiVersion), 829 VK_API_VERSION_MINOR(apiVersion), VK_API_VERSION_PATCH(apiVersion)); 830 831 VmaAllocatorCreateInfo ci = {}; 832 ci.vulkanApiVersion = apiVersion; 833 ci.flags = VMA_ALLOCATOR_CREATE_EXTERNALLY_SYNCHRONIZED_BIT; 834 ci.physicalDevice = m_physical_device; 835 ci.device = m_device; 836 ci.instance = m_instance; 837 838 if (apiVersion < VK_API_VERSION_1_1) 839 { 840 if (m_optional_extensions.vk_khr_get_memory_requirements2 && m_optional_extensions.vk_khr_dedicated_allocation) 841 { 842 DEV_LOG("Enabling VMA_ALLOCATOR_CREATE_KHR_DEDICATED_ALLOCATION_BIT on < Vulkan 1.1."); 843 ci.flags |= VMA_ALLOCATOR_CREATE_KHR_DEDICATED_ALLOCATION_BIT; 844 } 845 if (m_optional_extensions.vk_khr_bind_memory2) 846 { 847 DEV_LOG("Enabling VMA_ALLOCATOR_CREATE_KHR_BIND_MEMORY2_BIT on < Vulkan 1.1."); 848 ci.flags |= VMA_ALLOCATOR_CREATE_KHR_BIND_MEMORY2_BIT; 849 } 850 } 851 852 if (m_optional_extensions.vk_ext_memory_budget) 853 { 854 DEV_LOG("Enabling VMA_ALLOCATOR_CREATE_EXT_MEMORY_BUDGET_BIT."); 855 ci.flags |= VMA_ALLOCATOR_CREATE_EXT_MEMORY_BUDGET_BIT; 856 } 857 858 if (m_optional_extensions.vk_khr_maintenance4) 859 { 860 DEV_LOG("Enabling VMA_ALLOCATOR_CREATE_KHR_MAINTENANCE4_BIT"); 861 ci.flags |= VMA_ALLOCATOR_CREATE_KHR_MAINTENANCE4_BIT; 862 } 863 864 if (m_optional_extensions.vk_khr_maintenance5) 865 { 866 DEV_LOG("Enabling VMA_ALLOCATOR_CREATE_KHR_MAINTENANCE5_BIT"); 867 ci.flags |= VMA_ALLOCATOR_CREATE_KHR_MAINTENANCE5_BIT; 868 } 869 870 // Limit usage of the DEVICE_LOCAL upload heap when we're using a debug device. 871 // On NVIDIA drivers, it results in frequently running out of device memory when trying to 872 // play back captures in RenderDoc, making life very painful. Re-BAR GPUs should be fine. 873 constexpr VkDeviceSize UPLOAD_HEAP_SIZE_THRESHOLD = 512 * 1024 * 1024; 874 constexpr VkMemoryPropertyFlags UPLOAD_HEAP_PROPERTIES = 875 VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT | VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT; 876 std::array<VkDeviceSize, VK_MAX_MEMORY_HEAPS> heap_size_limits; 877 if (m_debug_device) 878 { 879 VkPhysicalDeviceMemoryProperties memory_properties; 880 vkGetPhysicalDeviceMemoryProperties(m_physical_device, &memory_properties); 881 882 bool has_upload_heap = false; 883 heap_size_limits.fill(VK_WHOLE_SIZE); 884 for (u32 i = 0; i < memory_properties.memoryTypeCount; i++) 885 { 886 // Look for any memory types which are upload-like. 887 const VkMemoryType& type = memory_properties.memoryTypes[i]; 888 if ((type.propertyFlags & UPLOAD_HEAP_PROPERTIES) != UPLOAD_HEAP_PROPERTIES) 889 continue; 890 891 const VkMemoryHeap& heap = memory_properties.memoryHeaps[type.heapIndex]; 892 if (heap.size >= UPLOAD_HEAP_SIZE_THRESHOLD) 893 continue; 894 895 if (heap_size_limits[type.heapIndex] == VK_WHOLE_SIZE) 896 { 897 WARNING_LOG("Disabling allocation from upload heap #{} ({:.2f} MB) due to debug device.", type.heapIndex, 898 static_cast<float>(heap.size) / 1048576.0f); 899 heap_size_limits[type.heapIndex] = 0; 900 has_upload_heap = true; 901 } 902 } 903 904 if (has_upload_heap) 905 ci.pHeapSizeLimit = heap_size_limits.data(); 906 } 907 908 VkResult res = vmaCreateAllocator(&ci, &m_allocator); 909 if (res != VK_SUCCESS) 910 { 911 LOG_VULKAN_ERROR(res, "vmaCreateAllocator failed: "); 912 return false; 913 } 914 915 return true; 916 } 917 918 void VulkanDevice::DestroyAllocator() 919 { 920 if (m_allocator == VK_NULL_HANDLE) 921 return; 922 923 vmaDestroyAllocator(m_allocator); 924 m_allocator = VK_NULL_HANDLE; 925 } 926 927 bool VulkanDevice::CreateCommandBuffers() 928 { 929 VkResult res; 930 931 uint32_t frame_index = 0; 932 for (CommandBuffer& resources : m_frame_resources) 933 { 934 resources.needs_fence_wait = false; 935 936 VkCommandPoolCreateInfo pool_info = {VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO, nullptr, 0, 937 m_graphics_queue_family_index}; 938 res = vkCreateCommandPool(m_device, &pool_info, nullptr, &resources.command_pool); 939 if (res != VK_SUCCESS) 940 { 941 LOG_VULKAN_ERROR(res, "vkCreateCommandPool failed: "); 942 return false; 943 } 944 Vulkan::SetObjectName(m_device, resources.command_pool, 945 TinyString::from_format("Frame Command Pool {}", frame_index)); 946 947 VkCommandBufferAllocateInfo buffer_info = {VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO, nullptr, 948 resources.command_pool, VK_COMMAND_BUFFER_LEVEL_PRIMARY, 949 static_cast<u32>(resources.command_buffers.size())}; 950 951 res = vkAllocateCommandBuffers(m_device, &buffer_info, resources.command_buffers.data()); 952 if (res != VK_SUCCESS) 953 { 954 LOG_VULKAN_ERROR(res, "vkAllocateCommandBuffers failed: "); 955 return false; 956 } 957 for (u32 i = 0; i < resources.command_buffers.size(); i++) 958 { 959 Vulkan::SetObjectName(m_device, resources.command_buffers[i], 960 TinyString::from_format("Frame {} {}Command Buffer", frame_index, (i == 0) ? "Init" : "")); 961 } 962 963 VkFenceCreateInfo fence_info = {VK_STRUCTURE_TYPE_FENCE_CREATE_INFO, nullptr, VK_FENCE_CREATE_SIGNALED_BIT}; 964 965 res = vkCreateFence(m_device, &fence_info, nullptr, &resources.fence); 966 if (res != VK_SUCCESS) 967 { 968 LOG_VULKAN_ERROR(res, "vkCreateFence failed: "); 969 return false; 970 } 971 Vulkan::SetObjectName(m_device, resources.fence, TinyString::from_format("Frame Fence {}", frame_index)); 972 973 u32 num_pools = 0; 974 VkDescriptorPoolSize pool_sizes[2]; 975 if (!m_optional_extensions.vk_khr_push_descriptor) 976 { 977 pool_sizes[num_pools++] = {VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, 978 MAX_COMBINED_IMAGE_SAMPLER_DESCRIPTORS_PER_FRAME}; 979 } 980 pool_sizes[num_pools++] = {VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT, MAX_INPUT_ATTACHMENT_DESCRIPTORS_PER_FRAME}; 981 982 VkDescriptorPoolCreateInfo pool_create_info = { 983 VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO, nullptr, 0, MAX_DESCRIPTOR_SETS_PER_FRAME, num_pools, pool_sizes}; 984 985 res = vkCreateDescriptorPool(m_device, &pool_create_info, nullptr, &resources.descriptor_pool); 986 if (res != VK_SUCCESS) 987 { 988 LOG_VULKAN_ERROR(res, "vkCreateDescriptorPool failed: "); 989 return false; 990 } 991 Vulkan::SetObjectName(m_device, resources.descriptor_pool, 992 TinyString::from_format("Frame Descriptor Pool {}", frame_index)); 993 994 ++frame_index; 995 } 996 997 BeginCommandBuffer(0); 998 return true; 999 } 1000 1001 void VulkanDevice::DestroyCommandBuffers() 1002 { 1003 for (CommandBuffer& resources : m_frame_resources) 1004 { 1005 if (resources.fence != VK_NULL_HANDLE) 1006 vkDestroyFence(m_device, resources.fence, nullptr); 1007 if (resources.descriptor_pool != VK_NULL_HANDLE) 1008 vkDestroyDescriptorPool(m_device, resources.descriptor_pool, nullptr); 1009 if (resources.command_buffers[0] != VK_NULL_HANDLE) 1010 { 1011 vkFreeCommandBuffers(m_device, resources.command_pool, static_cast<u32>(resources.command_buffers.size()), 1012 resources.command_buffers.data()); 1013 } 1014 if (resources.command_pool != VK_NULL_HANDLE) 1015 vkDestroyCommandPool(m_device, resources.command_pool, nullptr); 1016 } 1017 } 1018 1019 bool VulkanDevice::CreatePersistentDescriptorPool() 1020 { 1021 static constexpr const VkDescriptorPoolSize pool_sizes[] = { 1022 {VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC, 1}, 1023 {VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, MAX_SAMPLER_DESCRIPTORS}, 1024 {VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER, 16}, 1025 {VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, 16}, 1026 }; 1027 1028 const VkDescriptorPoolCreateInfo pool_create_info = { 1029 VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO, nullptr, 1030 VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, MAX_SAMPLER_DESCRIPTORS, 1031 static_cast<u32>(std::size(pool_sizes)), pool_sizes}; 1032 1033 VkResult res = vkCreateDescriptorPool(m_device, &pool_create_info, nullptr, &m_global_descriptor_pool); 1034 if (res != VK_SUCCESS) 1035 { 1036 LOG_VULKAN_ERROR(res, "vkCreateDescriptorPool failed: "); 1037 return false; 1038 } 1039 Vulkan::SetObjectName(m_device, m_global_descriptor_pool, "Global Descriptor Pool"); 1040 1041 if (m_features.gpu_timing) 1042 { 1043 const VkQueryPoolCreateInfo query_create_info = { 1044 VK_STRUCTURE_TYPE_QUERY_POOL_CREATE_INFO, nullptr, 0, VK_QUERY_TYPE_TIMESTAMP, NUM_COMMAND_BUFFERS * 4, 0}; 1045 res = vkCreateQueryPool(m_device, &query_create_info, nullptr, &m_timestamp_query_pool); 1046 if (res != VK_SUCCESS) 1047 { 1048 LOG_VULKAN_ERROR(res, "vkCreateQueryPool failed: "); 1049 m_features.gpu_timing = false; 1050 return false; 1051 } 1052 } 1053 1054 return true; 1055 } 1056 1057 void VulkanDevice::DestroyPersistentDescriptorPool() 1058 { 1059 if (m_timestamp_query_pool != VK_NULL_HANDLE) 1060 vkDestroyQueryPool(m_device, m_timestamp_query_pool, nullptr); 1061 1062 if (m_global_descriptor_pool != VK_NULL_HANDLE) 1063 vkDestroyDescriptorPool(m_device, m_global_descriptor_pool, nullptr); 1064 } 1065 1066 bool VulkanDevice::RenderPassCacheKey::operator==(const RenderPassCacheKey& rhs) const 1067 { 1068 return (std::memcmp(this, &rhs, sizeof(*this)) == 0); 1069 } 1070 1071 bool VulkanDevice::RenderPassCacheKey::operator!=(const RenderPassCacheKey& rhs) const 1072 { 1073 return (std::memcmp(this, &rhs, sizeof(*this)) != 0); 1074 } 1075 1076 size_t VulkanDevice::RenderPassCacheKeyHash::operator()(const RenderPassCacheKey& rhs) const 1077 { 1078 if constexpr (sizeof(void*) == 8) 1079 return XXH3_64bits(&rhs, sizeof(rhs)); 1080 else 1081 return XXH32(&rhs, sizeof(rhs), 0x1337); 1082 } 1083 1084 VkRenderPass VulkanDevice::GetRenderPass(const GPUPipeline::GraphicsConfig& config) 1085 { 1086 RenderPassCacheKey key; 1087 std::memset(&key, 0, sizeof(key)); 1088 1089 for (u32 i = 0; i < MAX_RENDER_TARGETS; i++) 1090 { 1091 if (config.color_formats[i] == GPUTexture::Format::Unknown) 1092 break; 1093 1094 key.color[i].format = static_cast<u8>(config.color_formats[i]); 1095 key.color[i].load_op = VK_ATTACHMENT_LOAD_OP_LOAD; 1096 key.color[i].store_op = VK_ATTACHMENT_STORE_OP_STORE; 1097 } 1098 1099 if (config.depth_format != GPUTexture::Format::Unknown) 1100 { 1101 key.depth_format = static_cast<u8>(config.depth_format); 1102 key.depth_load_op = VK_ATTACHMENT_LOAD_OP_LOAD; 1103 key.depth_store_op = VK_ATTACHMENT_STORE_OP_STORE; 1104 1105 const bool stencil = GPUTexture::IsDepthStencilFormat(config.depth_format); 1106 key.stencil_load_op = stencil ? VK_ATTACHMENT_LOAD_OP_LOAD : VK_ATTACHMENT_LOAD_OP_DONT_CARE; 1107 key.stencil_store_op = stencil ? VK_ATTACHMENT_STORE_OP_STORE : VK_ATTACHMENT_STORE_OP_DONT_CARE; 1108 } 1109 1110 key.samples = static_cast<u8>(config.samples); 1111 key.feedback_loop = config.render_pass_flags; 1112 1113 const auto it = m_render_pass_cache.find(key); 1114 return (it != m_render_pass_cache.end()) ? it->second : CreateCachedRenderPass(key); 1115 } 1116 1117 VkRenderPass VulkanDevice::GetRenderPass(VulkanTexture* const* rts, u32 num_rts, VulkanTexture* ds, 1118 GPUPipeline::RenderPassFlag feedback_loop) 1119 { 1120 RenderPassCacheKey key; 1121 std::memset(&key, 0, sizeof(key)); 1122 1123 static_assert(static_cast<u8>(GPUTexture::Format::Unknown) == 0); 1124 1125 for (u32 i = 0; i < num_rts; i++) 1126 { 1127 key.color[i].format = static_cast<u8>(rts[i]->GetFormat()); 1128 key.color[i].load_op = GetLoadOpForTexture(rts[i]); 1129 key.color[i].store_op = VK_ATTACHMENT_STORE_OP_STORE; 1130 key.samples = static_cast<u8>(rts[i]->GetSamples()); 1131 } 1132 1133 if (ds) 1134 { 1135 const VkAttachmentLoadOp load_op = GetLoadOpForTexture(ds); 1136 key.depth_format = static_cast<u8>(ds->GetFormat()); 1137 key.depth_load_op = load_op; 1138 key.depth_store_op = VK_ATTACHMENT_STORE_OP_STORE; 1139 1140 const bool stencil = GPUTexture::IsDepthStencilFormat(ds->GetFormat()); 1141 key.stencil_load_op = stencil ? load_op : VK_ATTACHMENT_LOAD_OP_DONT_CARE; 1142 key.stencil_store_op = stencil ? VK_ATTACHMENT_STORE_OP_STORE : VK_ATTACHMENT_STORE_OP_DONT_CARE; 1143 1144 key.samples = static_cast<u8>(ds->GetSamples()); 1145 } 1146 1147 key.feedback_loop = feedback_loop; 1148 1149 const auto it = m_render_pass_cache.find(key); 1150 return (it != m_render_pass_cache.end()) ? it->second : CreateCachedRenderPass(key); 1151 } 1152 1153 VkRenderPass VulkanDevice::GetSwapChainRenderPass(GPUTexture::Format format, VkAttachmentLoadOp load_op) 1154 { 1155 DebugAssert(format != GPUTexture::Format::Unknown); 1156 1157 RenderPassCacheKey key; 1158 std::memset(&key, 0, sizeof(key)); 1159 1160 key.color[0].format = static_cast<u8>(format); 1161 key.color[0].load_op = load_op; 1162 key.color[0].store_op = VK_ATTACHMENT_STORE_OP_STORE; 1163 key.samples = 1; 1164 1165 const auto it = m_render_pass_cache.find(key); 1166 return (it != m_render_pass_cache.end()) ? it->second : CreateCachedRenderPass(key); 1167 } 1168 1169 VkRenderPass VulkanDevice::GetRenderPassForRestarting(VkRenderPass pass) 1170 { 1171 for (const auto& it : m_render_pass_cache) 1172 { 1173 if (it.second != pass) 1174 continue; 1175 1176 RenderPassCacheKey modified_key = it.first; 1177 for (u32 i = 0; i < MAX_RENDER_TARGETS; i++) 1178 { 1179 if (modified_key.color[i].load_op == VK_ATTACHMENT_LOAD_OP_CLEAR) 1180 modified_key.color[i].load_op = VK_ATTACHMENT_LOAD_OP_LOAD; 1181 } 1182 1183 if (modified_key.depth_load_op == VK_ATTACHMENT_LOAD_OP_CLEAR) 1184 modified_key.depth_load_op = VK_ATTACHMENT_LOAD_OP_LOAD; 1185 if (modified_key.stencil_load_op == VK_ATTACHMENT_LOAD_OP_CLEAR) 1186 modified_key.stencil_load_op = VK_ATTACHMENT_LOAD_OP_LOAD; 1187 1188 if (modified_key == it.first) 1189 return pass; 1190 1191 auto fit = m_render_pass_cache.find(modified_key); 1192 if (fit != m_render_pass_cache.end()) 1193 return fit->second; 1194 1195 return CreateCachedRenderPass(modified_key); 1196 } 1197 1198 return pass; 1199 } 1200 1201 VkCommandBuffer VulkanDevice::GetCurrentInitCommandBuffer() 1202 { 1203 CommandBuffer& res = m_frame_resources[m_current_frame]; 1204 VkCommandBuffer buf = res.command_buffers[0]; 1205 if (res.init_buffer_used) 1206 return buf; 1207 1208 VkCommandBufferBeginInfo bi{VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO, nullptr, 1209 VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT, nullptr}; 1210 vkBeginCommandBuffer(buf, &bi); 1211 res.init_buffer_used = true; 1212 return buf; 1213 } 1214 1215 VkDescriptorSet VulkanDevice::AllocateDescriptorSet(VkDescriptorSetLayout set_layout) 1216 { 1217 VkDescriptorSetAllocateInfo allocate_info = {VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO, nullptr, 1218 m_frame_resources[m_current_frame].descriptor_pool, 1, &set_layout}; 1219 1220 VkDescriptorSet descriptor_set; 1221 VkResult res = vkAllocateDescriptorSets(m_device, &allocate_info, &descriptor_set); 1222 if (res != VK_SUCCESS) 1223 { 1224 // Failing to allocate a descriptor set is not a fatal error, we can 1225 // recover by moving to the next command buffer. 1226 return VK_NULL_HANDLE; 1227 } 1228 1229 return descriptor_set; 1230 } 1231 1232 VkDescriptorSet VulkanDevice::AllocatePersistentDescriptorSet(VkDescriptorSetLayout set_layout) 1233 { 1234 VkDescriptorSetAllocateInfo allocate_info = {VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO, nullptr, 1235 m_global_descriptor_pool, 1, &set_layout}; 1236 1237 VkDescriptorSet descriptor_set; 1238 VkResult res = vkAllocateDescriptorSets(m_device, &allocate_info, &descriptor_set); 1239 if (res != VK_SUCCESS) 1240 return VK_NULL_HANDLE; 1241 1242 return descriptor_set; 1243 } 1244 1245 void VulkanDevice::FreePersistentDescriptorSet(VkDescriptorSet set) 1246 { 1247 vkFreeDescriptorSets(m_device, m_global_descriptor_pool, 1, &set); 1248 } 1249 1250 void VulkanDevice::WaitForFenceCounter(u64 fence_counter) 1251 { 1252 if (m_completed_fence_counter >= fence_counter) 1253 return; 1254 1255 // Find the first command buffer which covers this counter value. 1256 u32 index = (m_current_frame + 1) % NUM_COMMAND_BUFFERS; 1257 while (index != m_current_frame) 1258 { 1259 if (m_frame_resources[index].fence_counter >= fence_counter) 1260 break; 1261 1262 index = (index + 1) % NUM_COMMAND_BUFFERS; 1263 } 1264 1265 DebugAssert(index != m_current_frame); 1266 WaitForCommandBufferCompletion(index); 1267 } 1268 1269 void VulkanDevice::WaitForGPUIdle() 1270 { 1271 WaitForPresentComplete(); 1272 vkDeviceWaitIdle(m_device); 1273 } 1274 1275 float VulkanDevice::GetAndResetAccumulatedGPUTime() 1276 { 1277 const float time = m_accumulated_gpu_time; 1278 m_accumulated_gpu_time = 0.0f; 1279 return time; 1280 } 1281 1282 bool VulkanDevice::SetGPUTimingEnabled(bool enabled) 1283 { 1284 m_gpu_timing_enabled = enabled && m_features.gpu_timing; 1285 return (enabled == m_gpu_timing_enabled); 1286 } 1287 1288 void VulkanDevice::WaitForCommandBufferCompletion(u32 index) 1289 { 1290 // We might be waiting for the buffer we just submitted to the worker thread. 1291 if (m_queued_present.command_buffer_index == index && !m_present_done.load(std::memory_order_acquire)) 1292 { 1293 WARNING_LOG("Waiting for threaded submission of cmdbuffer {}", index); 1294 WaitForPresentComplete(); 1295 } 1296 1297 // Wait for this command buffer to be completed. 1298 static constexpr u32 MAX_TIMEOUTS = 10; 1299 u32 timeouts = 0; 1300 for (;;) 1301 { 1302 VkResult res = vkWaitForFences(m_device, 1, &m_frame_resources[index].fence, VK_TRUE, UINT64_MAX); 1303 if (res == VK_SUCCESS) 1304 break; 1305 1306 if (res == VK_TIMEOUT && (++timeouts) <= MAX_TIMEOUTS) 1307 { 1308 ERROR_LOG("vkWaitForFences() for cmdbuffer {} failed with VK_TIMEOUT, trying again.", index); 1309 continue; 1310 } 1311 else if (res != VK_SUCCESS) 1312 { 1313 LOG_VULKAN_ERROR(res, TinyString::from_format("vkWaitForFences() for cmdbuffer {} failed: ", index)); 1314 m_last_submit_failed.store(true, std::memory_order_release); 1315 return; 1316 } 1317 } 1318 1319 // Clean up any resources for command buffers between the last known completed buffer and this 1320 // now-completed command buffer. If we use >2 buffers, this may be more than one buffer. 1321 const u64 now_completed_counter = m_frame_resources[index].fence_counter; 1322 u32 cleanup_index = (m_current_frame + 1) % NUM_COMMAND_BUFFERS; 1323 while (cleanup_index != m_current_frame) 1324 { 1325 CommandBuffer& resources = m_frame_resources[cleanup_index]; 1326 if (resources.fence_counter > now_completed_counter) 1327 break; 1328 1329 if (m_gpu_timing_enabled && resources.timestamp_written) 1330 { 1331 std::array<u64, 2> timestamps; 1332 VkResult res = 1333 vkGetQueryPoolResults(m_device, m_timestamp_query_pool, index * 2, static_cast<u32>(timestamps.size()), 1334 sizeof(u64) * timestamps.size(), timestamps.data(), sizeof(u64), VK_QUERY_RESULT_64_BIT); 1335 if (res == VK_SUCCESS) 1336 { 1337 // if we didn't write the timestamp at the start of the cmdbuffer (just enabled timing), the first TS will be 1338 // zero 1339 if (timestamps[0] > 0 && m_gpu_timing_enabled) 1340 { 1341 const double ns_diff = 1342 (timestamps[1] - timestamps[0]) * static_cast<double>(m_device_properties.limits.timestampPeriod); 1343 m_accumulated_gpu_time += static_cast<float>(ns_diff / 1000000.0); 1344 } 1345 } 1346 else 1347 { 1348 LOG_VULKAN_ERROR(res, "vkGetQueryPoolResults failed: "); 1349 } 1350 } 1351 1352 cleanup_index = (cleanup_index + 1) % NUM_COMMAND_BUFFERS; 1353 } 1354 1355 m_completed_fence_counter = now_completed_counter; 1356 while (!m_cleanup_objects.empty()) 1357 { 1358 auto& it = m_cleanup_objects.front(); 1359 if (it.first > now_completed_counter) 1360 break; 1361 it.second(); 1362 m_cleanup_objects.pop_front(); 1363 } 1364 } 1365 1366 void VulkanDevice::EndAndSubmitCommandBuffer(VulkanSwapChain* present_swap_chain, bool explicit_present, 1367 bool submit_on_thread) 1368 { 1369 if (m_last_submit_failed.load(std::memory_order_acquire)) 1370 return; 1371 1372 CommandBuffer& resources = m_frame_resources[m_current_frame]; 1373 1374 // End the current command buffer. 1375 VkResult res; 1376 if (resources.init_buffer_used) 1377 { 1378 res = vkEndCommandBuffer(resources.command_buffers[0]); 1379 if (res != VK_SUCCESS) 1380 { 1381 LOG_VULKAN_ERROR(res, "vkEndCommandBuffer failed: "); 1382 Panic("Failed to end command buffer"); 1383 } 1384 } 1385 1386 if (m_gpu_timing_enabled && resources.timestamp_written) 1387 { 1388 vkCmdWriteTimestamp(m_current_command_buffer, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, m_timestamp_query_pool, 1389 m_current_frame * 2 + 1); 1390 } 1391 1392 res = vkEndCommandBuffer(resources.command_buffers[1]); 1393 if (res != VK_SUCCESS) 1394 { 1395 LOG_VULKAN_ERROR(res, "vkEndCommandBuffer failed: "); 1396 Panic("Failed to end command buffer"); 1397 } 1398 1399 // This command buffer now has commands, so can't be re-used without waiting. 1400 resources.needs_fence_wait = true; 1401 1402 std::unique_lock<std::mutex> lock(m_present_mutex); 1403 WaitForPresentComplete(lock); 1404 1405 if (!submit_on_thread || explicit_present || !m_present_thread.joinable()) 1406 { 1407 DoSubmitCommandBuffer(m_current_frame, present_swap_chain); 1408 if (present_swap_chain && !explicit_present) 1409 DoPresent(present_swap_chain); 1410 return; 1411 } 1412 1413 m_queued_present.command_buffer_index = m_current_frame; 1414 m_queued_present.swap_chain = present_swap_chain; 1415 m_present_done.store(false, std::memory_order_release); 1416 m_present_queued_cv.notify_one(); 1417 } 1418 1419 void VulkanDevice::DoSubmitCommandBuffer(u32 index, VulkanSwapChain* present_swap_chain) 1420 { 1421 CommandBuffer& resources = m_frame_resources[index]; 1422 1423 uint32_t wait_bits = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT; 1424 VkSubmitInfo submit_info = {VK_STRUCTURE_TYPE_SUBMIT_INFO, 1425 nullptr, 1426 0u, 1427 nullptr, 1428 nullptr, 1429 resources.init_buffer_used ? 2u : 1u, 1430 resources.init_buffer_used ? resources.command_buffers.data() : 1431 &resources.command_buffers[1], 1432 0u, 1433 nullptr}; 1434 1435 if (present_swap_chain) 1436 { 1437 submit_info.pWaitSemaphores = present_swap_chain->GetImageAvailableSemaphorePtr(); 1438 submit_info.waitSemaphoreCount = 1; 1439 submit_info.pWaitDstStageMask = &wait_bits; 1440 1441 submit_info.pSignalSemaphores = present_swap_chain->GetRenderingFinishedSemaphorePtr(); 1442 submit_info.signalSemaphoreCount = 1; 1443 } 1444 1445 const VkResult res = vkQueueSubmit(m_graphics_queue, 1, &submit_info, resources.fence); 1446 if (res != VK_SUCCESS) 1447 { 1448 LOG_VULKAN_ERROR(res, "vkQueueSubmit failed: "); 1449 m_last_submit_failed.store(true, std::memory_order_release); 1450 return; 1451 } 1452 } 1453 1454 void VulkanDevice::DoPresent(VulkanSwapChain* present_swap_chain) 1455 { 1456 const VkPresentInfoKHR present_info = {VK_STRUCTURE_TYPE_PRESENT_INFO_KHR, 1457 nullptr, 1458 1, 1459 present_swap_chain->GetRenderingFinishedSemaphorePtr(), 1460 1, 1461 present_swap_chain->GetSwapChainPtr(), 1462 present_swap_chain->GetCurrentImageIndexPtr(), 1463 nullptr}; 1464 1465 present_swap_chain->ResetImageAcquireResult(); 1466 1467 const VkResult res = vkQueuePresentKHR(m_present_queue, &present_info); 1468 if (res != VK_SUCCESS && res != VK_SUBOPTIMAL_KHR) 1469 { 1470 // VK_ERROR_OUT_OF_DATE_KHR is not fatal, just means we need to recreate our swap chain. 1471 if (res == VK_ERROR_OUT_OF_DATE_KHR) 1472 ResizeWindow(0, 0, m_window_info.surface_scale); 1473 else 1474 LOG_VULKAN_ERROR(res, "vkQueuePresentKHR failed: "); 1475 1476 return; 1477 } 1478 1479 // Grab the next image as soon as possible, that way we spend less time blocked on the next 1480 // submission. Don't care if it fails, we'll deal with that at the presentation call site. 1481 // Credit to dxvk for the idea. 1482 present_swap_chain->AcquireNextImage(); 1483 } 1484 1485 void VulkanDevice::WaitForPresentComplete() 1486 { 1487 if (m_present_done.load(std::memory_order_acquire)) 1488 return; 1489 1490 std::unique_lock<std::mutex> lock(m_present_mutex); 1491 WaitForPresentComplete(lock); 1492 } 1493 1494 void VulkanDevice::WaitForPresentComplete(std::unique_lock<std::mutex>& lock) 1495 { 1496 if (m_present_done.load(std::memory_order_acquire)) 1497 return; 1498 1499 m_present_done_cv.wait(lock, [this]() { return m_present_done.load(std::memory_order_acquire); }); 1500 } 1501 1502 void VulkanDevice::PresentThread() 1503 { 1504 std::unique_lock<std::mutex> lock(m_present_mutex); 1505 while (!m_present_thread_done.load(std::memory_order_acquire)) 1506 { 1507 m_present_queued_cv.wait(lock, [this]() { 1508 return !m_present_done.load(std::memory_order_acquire) || m_present_thread_done.load(std::memory_order_acquire); 1509 }); 1510 1511 if (m_present_done.load(std::memory_order_acquire)) 1512 continue; 1513 1514 DoSubmitCommandBuffer(m_queued_present.command_buffer_index, m_queued_present.swap_chain); 1515 if (m_queued_present.swap_chain) 1516 DoPresent(m_queued_present.swap_chain); 1517 m_present_done.store(true, std::memory_order_release); 1518 m_present_done_cv.notify_one(); 1519 } 1520 } 1521 1522 void VulkanDevice::StartPresentThread() 1523 { 1524 DebugAssert(!m_present_thread.joinable()); 1525 m_present_thread_done.store(false, std::memory_order_release); 1526 m_present_thread = std::thread(&VulkanDevice::PresentThread, this); 1527 } 1528 1529 void VulkanDevice::StopPresentThread() 1530 { 1531 if (!m_present_thread.joinable()) 1532 return; 1533 1534 { 1535 std::unique_lock<std::mutex> lock(m_present_mutex); 1536 WaitForPresentComplete(lock); 1537 m_present_thread_done.store(true, std::memory_order_release); 1538 m_present_queued_cv.notify_one(); 1539 } 1540 1541 m_present_thread.join(); 1542 } 1543 1544 void VulkanDevice::MoveToNextCommandBuffer() 1545 { 1546 BeginCommandBuffer((m_current_frame + 1) % NUM_COMMAND_BUFFERS); 1547 } 1548 1549 void VulkanDevice::BeginCommandBuffer(u32 index) 1550 { 1551 CommandBuffer& resources = m_frame_resources[index]; 1552 1553 // Wait for the GPU to finish with all resources for this command buffer. 1554 if (resources.fence_counter > m_completed_fence_counter) 1555 WaitForCommandBufferCompletion(index); 1556 1557 // Reset fence to unsignaled before starting. 1558 VkResult res = vkResetFences(m_device, 1, &resources.fence); 1559 if (res != VK_SUCCESS) 1560 LOG_VULKAN_ERROR(res, "vkResetFences failed: "); 1561 1562 // Reset command pools to beginning since we can re-use the memory now 1563 res = vkResetCommandPool(m_device, resources.command_pool, 0); 1564 if (res != VK_SUCCESS) 1565 LOG_VULKAN_ERROR(res, "vkResetCommandPool failed: "); 1566 1567 // Enable commands to be recorded to the two buffers again. 1568 VkCommandBufferBeginInfo begin_info = {VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO, nullptr, 1569 VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT, nullptr}; 1570 res = vkBeginCommandBuffer(resources.command_buffers[1], &begin_info); 1571 if (res != VK_SUCCESS) 1572 LOG_VULKAN_ERROR(res, "vkBeginCommandBuffer failed: "); 1573 1574 // Also can do the same for the descriptor pools 1575 if (resources.descriptor_pool != VK_NULL_HANDLE) 1576 { 1577 res = vkResetDescriptorPool(m_device, resources.descriptor_pool, 0); 1578 if (res != VK_SUCCESS) 1579 LOG_VULKAN_ERROR(res, "vkResetDescriptorPool failed: "); 1580 } 1581 1582 if (m_gpu_timing_enabled) 1583 { 1584 vkCmdResetQueryPool(resources.command_buffers[1], m_timestamp_query_pool, index * 2, 2); 1585 vkCmdWriteTimestamp(resources.command_buffers[1], VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, m_timestamp_query_pool, 1586 index * 2); 1587 } 1588 1589 resources.fence_counter = m_next_fence_counter++; 1590 resources.init_buffer_used = false; 1591 resources.timestamp_written = m_gpu_timing_enabled; 1592 1593 m_current_frame = index; 1594 m_current_command_buffer = resources.command_buffers[1]; 1595 1596 // using the lower 32 bits of the fence index should be sufficient here, I hope... 1597 vmaSetCurrentFrameIndex(m_allocator, static_cast<u32>(m_next_fence_counter)); 1598 } 1599 1600 void VulkanDevice::SubmitCommandBuffer(bool wait_for_completion) 1601 { 1602 DebugAssert(!InRenderPass()); 1603 1604 const u32 current_frame = m_current_frame; 1605 EndAndSubmitCommandBuffer(nullptr, false, false); 1606 MoveToNextCommandBuffer(); 1607 1608 if (wait_for_completion) 1609 WaitForCommandBufferCompletion(current_frame); 1610 1611 InvalidateCachedState(); 1612 } 1613 1614 void VulkanDevice::SubmitCommandBuffer(bool wait_for_completion, const std::string_view reason) 1615 { 1616 WARNING_LOG("Executing command buffer due to '{}'", reason); 1617 SubmitCommandBuffer(wait_for_completion); 1618 } 1619 1620 void VulkanDevice::SubmitCommandBufferAndRestartRenderPass(const std::string_view reason) 1621 { 1622 if (InRenderPass()) 1623 EndRenderPass(); 1624 1625 VulkanPipeline* pl = m_current_pipeline; 1626 SubmitCommandBuffer(false, reason); 1627 1628 SetPipeline(pl); 1629 BeginRenderPass(); 1630 } 1631 1632 bool VulkanDevice::CheckLastSubmitFail() 1633 { 1634 return m_last_submit_failed.load(std::memory_order_acquire); 1635 } 1636 1637 void VulkanDevice::DeferBufferDestruction(VkBuffer object, VmaAllocation allocation) 1638 { 1639 m_cleanup_objects.emplace_back(GetCurrentFenceCounter(), 1640 [this, object, allocation]() { vmaDestroyBuffer(m_allocator, object, allocation); }); 1641 } 1642 1643 void VulkanDevice::DeferBufferDestruction(VkBuffer object, VkDeviceMemory memory) 1644 { 1645 m_cleanup_objects.emplace_back(GetCurrentFenceCounter(), [this, object, memory]() { 1646 vkDestroyBuffer(m_device, object, nullptr); 1647 vkFreeMemory(m_device, memory, nullptr); 1648 }); 1649 } 1650 1651 void VulkanDevice::DeferFramebufferDestruction(VkFramebuffer object) 1652 { 1653 m_cleanup_objects.emplace_back(GetCurrentFenceCounter(), 1654 [this, object]() { vkDestroyFramebuffer(m_device, object, nullptr); }); 1655 } 1656 1657 void VulkanDevice::DeferImageDestruction(VkImage object, VmaAllocation allocation) 1658 { 1659 m_cleanup_objects.emplace_back(GetCurrentFenceCounter(), 1660 [this, object, allocation]() { vmaDestroyImage(m_allocator, object, allocation); }); 1661 } 1662 1663 void VulkanDevice::DeferImageViewDestruction(VkImageView object) 1664 { 1665 m_cleanup_objects.emplace_back(GetCurrentFenceCounter(), 1666 [this, object]() { vkDestroyImageView(m_device, object, nullptr); }); 1667 } 1668 1669 void VulkanDevice::DeferPipelineDestruction(VkPipeline object) 1670 { 1671 m_cleanup_objects.emplace_back(GetCurrentFenceCounter(), 1672 [this, object]() { vkDestroyPipeline(m_device, object, nullptr); }); 1673 } 1674 1675 void VulkanDevice::DeferBufferViewDestruction(VkBufferView object) 1676 { 1677 m_cleanup_objects.emplace_back(GetCurrentFenceCounter(), 1678 [this, object]() { vkDestroyBufferView(m_device, object, nullptr); }); 1679 } 1680 1681 void VulkanDevice::DeferPersistentDescriptorSetDestruction(VkDescriptorSet object) 1682 { 1683 m_cleanup_objects.emplace_back(GetCurrentFenceCounter(), [this, object]() { FreePersistentDescriptorSet(object); }); 1684 } 1685 1686 VKAPI_ATTR VkBool32 VKAPI_CALL DebugMessengerCallback(VkDebugUtilsMessageSeverityFlagBitsEXT severity, 1687 VkDebugUtilsMessageTypeFlagsEXT messageType, 1688 const VkDebugUtilsMessengerCallbackDataEXT* pCallbackData, 1689 void* pUserData) 1690 { 1691 if (severity & VK_DEBUG_UTILS_MESSAGE_SEVERITY_ERROR_BIT_EXT) 1692 { 1693 ERROR_LOG("Vulkan debug report: ({}) {}", pCallbackData->pMessageIdName ? pCallbackData->pMessageIdName : "", 1694 pCallbackData->pMessage); 1695 } 1696 else if (severity & (VK_DEBUG_UTILS_MESSAGE_SEVERITY_WARNING_BIT_EXT)) 1697 { 1698 WARNING_LOG("Vulkan debug report: ({}) {}", pCallbackData->pMessageIdName ? pCallbackData->pMessageIdName : "", 1699 pCallbackData->pMessage); 1700 } 1701 else if (severity & VK_DEBUG_UTILS_MESSAGE_SEVERITY_INFO_BIT_EXT) 1702 { 1703 INFO_LOG("Vulkan debug report: ({}) {}", pCallbackData->pMessageIdName ? pCallbackData->pMessageIdName : "", 1704 pCallbackData->pMessage); 1705 } 1706 else 1707 { 1708 DEV_LOG("Vulkan debug report: ({}) {}", pCallbackData->pMessageIdName ? pCallbackData->pMessageIdName : "", 1709 pCallbackData->pMessage); 1710 } 1711 1712 return VK_FALSE; 1713 } 1714 1715 bool VulkanDevice::EnableDebugUtils() 1716 { 1717 // Already enabled? 1718 if (m_debug_messenger_callback != VK_NULL_HANDLE) 1719 return true; 1720 1721 // Check for presence of the functions before calling 1722 if (!vkCreateDebugUtilsMessengerEXT || !vkDestroyDebugUtilsMessengerEXT || !vkSubmitDebugUtilsMessageEXT) 1723 { 1724 return false; 1725 } 1726 1727 VkDebugUtilsMessengerCreateInfoEXT messenger_info = { 1728 VK_STRUCTURE_TYPE_DEBUG_UTILS_MESSENGER_CREATE_INFO_EXT, 1729 nullptr, 1730 0, 1731 VK_DEBUG_UTILS_MESSAGE_SEVERITY_ERROR_BIT_EXT | VK_DEBUG_UTILS_MESSAGE_SEVERITY_WARNING_BIT_EXT | 1732 VK_DEBUG_UTILS_MESSAGE_SEVERITY_INFO_BIT_EXT, 1733 VK_DEBUG_UTILS_MESSAGE_TYPE_GENERAL_BIT_EXT | VK_DEBUG_UTILS_MESSAGE_TYPE_PERFORMANCE_BIT_EXT | 1734 VK_DEBUG_UTILS_MESSAGE_TYPE_VALIDATION_BIT_EXT, 1735 DebugMessengerCallback, 1736 nullptr}; 1737 1738 const VkResult res = 1739 vkCreateDebugUtilsMessengerEXT(m_instance, &messenger_info, nullptr, &m_debug_messenger_callback); 1740 if (res != VK_SUCCESS) 1741 { 1742 LOG_VULKAN_ERROR(res, "vkCreateDebugUtilsMessengerEXT failed: "); 1743 return false; 1744 } 1745 1746 return true; 1747 } 1748 1749 void VulkanDevice::DisableDebugUtils() 1750 { 1751 if (m_debug_messenger_callback != VK_NULL_HANDLE) 1752 { 1753 vkDestroyDebugUtilsMessengerEXT(m_instance, m_debug_messenger_callback, nullptr); 1754 m_debug_messenger_callback = VK_NULL_HANDLE; 1755 } 1756 } 1757 1758 bool VulkanDevice::IsDeviceNVIDIA() const 1759 { 1760 return (m_device_properties.vendorID == 0x10DE); 1761 } 1762 1763 bool VulkanDevice::IsDeviceAMD() const 1764 { 1765 return (m_device_properties.vendorID == 0x1002); 1766 } 1767 1768 bool VulkanDevice::IsDeviceAdreno() const 1769 { 1770 // Assume turnip is fine... 1771 return ((m_device_properties.vendorID == 0x5143 || 1772 m_device_driver_properties.driverID == VK_DRIVER_ID_QUALCOMM_PROPRIETARY) && 1773 m_device_driver_properties.driverID != VK_DRIVER_ID_MESA_TURNIP); 1774 } 1775 1776 bool VulkanDevice::IsDeviceMali() const 1777 { 1778 return (m_device_properties.vendorID == 0x13B5 || 1779 m_device_driver_properties.driverID == VK_DRIVER_ID_ARM_PROPRIETARY); 1780 } 1781 1782 bool VulkanDevice::IsDeviceImgTec() const 1783 { 1784 return (m_device_properties.vendorID == 0x1010 || 1785 m_device_driver_properties.driverID == VK_DRIVER_ID_IMAGINATION_PROPRIETARY); 1786 } 1787 1788 bool VulkanDevice::IsBrokenMobileDriver() const 1789 { 1790 return (IsDeviceAdreno() || IsDeviceMali() || IsDeviceImgTec()); 1791 } 1792 1793 VkRenderPass VulkanDevice::CreateCachedRenderPass(RenderPassCacheKey key) 1794 { 1795 std::array<VkAttachmentReference, MAX_RENDER_TARGETS> color_references; 1796 VkAttachmentReference* color_reference_ptr = nullptr; 1797 VkAttachmentReference depth_reference; 1798 VkAttachmentReference* depth_reference_ptr = nullptr; 1799 VkAttachmentReference input_reference; 1800 VkAttachmentReference* input_reference_ptr = nullptr; 1801 VkSubpassDependency subpass_dependency; 1802 VkSubpassDependency* subpass_dependency_ptr = nullptr; 1803 std::array<VkAttachmentDescription, MAX_RENDER_TARGETS + 1> attachments; 1804 u32 num_attachments = 0; 1805 1806 for (u32 i = 0; i < MAX_RENDER_TARGETS; i++) 1807 { 1808 if (key.color[i].format == static_cast<u8>(GPUTexture::Format::Unknown)) 1809 break; 1810 1811 const VkImageLayout layout = 1812 (key.feedback_loop & GPUPipeline::ColorFeedbackLoop) ? 1813 (m_optional_extensions.vk_khr_dynamic_rendering_local_read ? VK_IMAGE_LAYOUT_RENDERING_LOCAL_READ_KHR : 1814 VK_IMAGE_LAYOUT_GENERAL) : 1815 VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL; 1816 1817 const RenderPassCacheKey::RenderTarget key_rt = key.color[i]; 1818 attachments[num_attachments] = {i, 1819 TEXTURE_FORMAT_MAPPING[key_rt.format], 1820 static_cast<VkSampleCountFlagBits>(key.samples), 1821 static_cast<VkAttachmentLoadOp>(key_rt.load_op), 1822 static_cast<VkAttachmentStoreOp>(key_rt.store_op), 1823 VK_ATTACHMENT_LOAD_OP_DONT_CARE, 1824 VK_ATTACHMENT_STORE_OP_DONT_CARE, 1825 layout, 1826 layout}; 1827 color_references[num_attachments].attachment = num_attachments; 1828 color_references[num_attachments].layout = layout; 1829 color_reference_ptr = color_references.data(); 1830 1831 if (key.feedback_loop & GPUPipeline::ColorFeedbackLoop) 1832 { 1833 DebugAssert(i == 0); 1834 input_reference.attachment = num_attachments; 1835 input_reference.layout = layout; 1836 input_reference_ptr = &input_reference; 1837 1838 if (!m_optional_extensions.vk_ext_rasterization_order_attachment_access) 1839 { 1840 // don't need the framebuffer-local dependency when we have rasterization order attachment access 1841 subpass_dependency.srcSubpass = 0; 1842 subpass_dependency.dstSubpass = 0; 1843 subpass_dependency.srcStageMask = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT; 1844 subpass_dependency.dstStageMask = VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT; 1845 subpass_dependency.srcAccessMask = VK_ACCESS_COLOR_ATTACHMENT_READ_BIT | VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT; 1846 subpass_dependency.dstAccessMask = VK_ACCESS_INPUT_ATTACHMENT_READ_BIT; 1847 subpass_dependency.dependencyFlags = VK_DEPENDENCY_BY_REGION_BIT; 1848 subpass_dependency_ptr = &subpass_dependency; 1849 } 1850 } 1851 1852 num_attachments++; 1853 } 1854 1855 const u32 num_rts = num_attachments; 1856 1857 if (key.depth_format != static_cast<u8>(GPUTexture::Format::Unknown)) 1858 { 1859 const VkImageLayout layout = (key.feedback_loop & GPUPipeline::SampleDepthBuffer) ? 1860 VK_IMAGE_LAYOUT_GENERAL : 1861 VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL; 1862 attachments[num_attachments] = {0, 1863 static_cast<VkFormat>(TEXTURE_FORMAT_MAPPING[key.depth_format]), 1864 static_cast<VkSampleCountFlagBits>(key.samples), 1865 static_cast<VkAttachmentLoadOp>(key.depth_load_op), 1866 static_cast<VkAttachmentStoreOp>(key.depth_store_op), 1867 static_cast<VkAttachmentLoadOp>(key.stencil_load_op), 1868 static_cast<VkAttachmentStoreOp>(key.stencil_store_op), 1869 layout, 1870 layout}; 1871 depth_reference.attachment = num_attachments; 1872 depth_reference.layout = layout; 1873 depth_reference_ptr = &depth_reference; 1874 num_attachments++; 1875 } 1876 1877 const VkSubpassDescriptionFlags subpass_flags = 1878 ((key.feedback_loop & GPUPipeline::ColorFeedbackLoop) && 1879 m_optional_extensions.vk_ext_rasterization_order_attachment_access) ? 1880 VK_SUBPASS_DESCRIPTION_RASTERIZATION_ORDER_ATTACHMENT_COLOR_ACCESS_BIT_EXT : 1881 0; 1882 const VkSubpassDescription subpass = {subpass_flags, 1883 VK_PIPELINE_BIND_POINT_GRAPHICS, 1884 input_reference_ptr ? num_rts : 0u, 1885 input_reference_ptr, 1886 num_rts, 1887 color_reference_ptr, 1888 nullptr, 1889 depth_reference_ptr, 1890 0, 1891 nullptr}; 1892 const VkRenderPassCreateInfo pass_info = {VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO, 1893 nullptr, 1894 0u, 1895 num_attachments, 1896 attachments.data(), 1897 1u, 1898 &subpass, 1899 subpass_dependency_ptr ? 1u : 0u, 1900 subpass_dependency_ptr}; 1901 1902 VkRenderPass pass; 1903 const VkResult res = vkCreateRenderPass(m_device, &pass_info, nullptr, &pass); 1904 if (res != VK_SUCCESS) 1905 { 1906 LOG_VULKAN_ERROR(res, "vkCreateRenderPass failed: "); 1907 return VK_NULL_HANDLE; 1908 } 1909 1910 m_render_pass_cache.emplace(key, pass); 1911 return pass; 1912 } 1913 1914 VkFramebuffer VulkanDevice::CreateFramebuffer(GPUTexture* const* rts, u32 num_rts, GPUTexture* ds, u32 flags) 1915 { 1916 VulkanDevice& dev = VulkanDevice::GetInstance(); 1917 VkRenderPass render_pass = 1918 dev.GetRenderPass(reinterpret_cast<VulkanTexture* const*>(rts), num_rts, static_cast<VulkanTexture*>(ds), 1919 static_cast<GPUPipeline::RenderPassFlag>(flags)); 1920 1921 const GPUTexture* rt_or_ds = (num_rts > 0) ? rts[0] : ds; 1922 DebugAssert(rt_or_ds); 1923 1924 Vulkan::FramebufferBuilder fbb; 1925 fbb.SetRenderPass(render_pass); 1926 fbb.SetSize(rt_or_ds->GetWidth(), rt_or_ds->GetHeight(), 1); 1927 for (u32 i = 0; i < num_rts; i++) 1928 fbb.AddAttachment(static_cast<VulkanTexture*>(rts[i])->GetView()); 1929 if (ds) 1930 fbb.AddAttachment(static_cast<VulkanTexture*>(ds)->GetView()); 1931 1932 return fbb.Create(dev.m_device, false); 1933 } 1934 1935 void VulkanDevice::DestroyFramebuffer(VkFramebuffer fbo) 1936 { 1937 if (fbo == VK_NULL_HANDLE) 1938 return; 1939 1940 VulkanDevice::GetInstance().DeferFramebufferDestruction(fbo); 1941 } 1942 1943 bool VulkanDevice::IsSuitableDefaultRenderer() 1944 { 1945 #ifdef __ANDROID__ 1946 // No way in hell. 1947 return false; 1948 #else 1949 GPUList gpus = EnumerateGPUs(); 1950 if (gpus.empty()) 1951 { 1952 // No adapters, not gonna be able to use VK. 1953 return false; 1954 } 1955 1956 // Check the first GPU, should be enough. 1957 const std::string& name = gpus.front().second.name; 1958 INFO_LOG("Using Vulkan GPU '{}' for automatic renderer check.", name); 1959 1960 // Any software rendering (LLVMpipe, SwiftShader). 1961 if (StringUtil::StartsWithNoCase(name, "llvmpipe") || StringUtil::StartsWithNoCase(name, "SwiftShader")) 1962 { 1963 INFO_LOG("Not using Vulkan for software renderer."); 1964 return false; 1965 } 1966 1967 // For Intel, OpenGL usually ends up faster on Linux, because of fbfetch. 1968 // Plus, the Ivy Bridge and Haswell drivers are incomplete. 1969 if (StringUtil::StartsWithNoCase(name, "Intel")) 1970 { 1971 INFO_LOG("Not using Vulkan for Intel GPU."); 1972 return false; 1973 } 1974 1975 INFO_LOG("Allowing Vulkan as default renderer."); 1976 return true; 1977 #endif 1978 } 1979 1980 RenderAPI VulkanDevice::GetRenderAPI() const 1981 { 1982 return RenderAPI::Vulkan; 1983 } 1984 1985 bool VulkanDevice::HasSurface() const 1986 { 1987 return static_cast<bool>(m_swap_chain); 1988 } 1989 1990 bool VulkanDevice::CreateDevice(std::string_view adapter, bool threaded_presentation, 1991 std::optional<bool> exclusive_fullscreen_control, FeatureMask disabled_features, 1992 Error* error) 1993 { 1994 std::unique_lock lock(s_instance_mutex); 1995 bool enable_debug_utils = m_debug_device; 1996 bool enable_validation_layer = m_debug_device; 1997 1998 if (!Vulkan::LoadVulkanLibrary(error)) 1999 { 2000 Error::AddPrefix(error, 2001 "Failed to load Vulkan library. Does your GPU and/or driver support Vulkan?\nThe error was:"); 2002 return false; 2003 } 2004 2005 m_instance = CreateVulkanInstance(m_window_info, &m_optional_extensions, enable_debug_utils, enable_validation_layer); 2006 if (m_instance == VK_NULL_HANDLE) 2007 { 2008 if (enable_debug_utils || enable_validation_layer) 2009 { 2010 // Try again without the validation layer. 2011 enable_debug_utils = false; 2012 enable_validation_layer = false; 2013 m_instance = 2014 CreateVulkanInstance(m_window_info, &m_optional_extensions, enable_debug_utils, enable_validation_layer); 2015 if (m_instance == VK_NULL_HANDLE) 2016 { 2017 Error::SetStringView(error, "Failed to create Vulkan instance. Does your GPU and/or driver support Vulkan?"); 2018 return false; 2019 } 2020 2021 ERROR_LOG("Vulkan validation/debug layers requested but are unavailable. Creating non-debug device."); 2022 } 2023 } 2024 2025 if (!Vulkan::LoadVulkanInstanceFunctions(m_instance)) 2026 { 2027 ERROR_LOG("Failed to load Vulkan instance functions"); 2028 Error::SetStringView(error, "Failed to load Vulkan instance functions"); 2029 return false; 2030 } 2031 2032 GPUList gpus = EnumerateGPUs(m_instance); 2033 if (gpus.empty()) 2034 { 2035 Error::SetStringView(error, "No physical devices found. Does your GPU and/or driver support Vulkan?"); 2036 return false; 2037 } 2038 2039 if (!adapter.empty()) 2040 { 2041 u32 gpu_index = 0; 2042 for (; gpu_index < static_cast<u32>(gpus.size()); gpu_index++) 2043 { 2044 INFO_LOG("GPU {}: {}", gpu_index, gpus[gpu_index].second.name); 2045 if (gpus[gpu_index].second.name == adapter) 2046 { 2047 m_physical_device = gpus[gpu_index].first; 2048 break; 2049 } 2050 } 2051 2052 if (gpu_index == static_cast<u32>(gpus.size())) 2053 { 2054 WARNING_LOG("Requested GPU '{}' not found, using first ({})", adapter, gpus[0].second.name); 2055 m_physical_device = gpus[0].first; 2056 } 2057 } 2058 else 2059 { 2060 INFO_LOG("No GPU requested, using first ({})", gpus[0].second.name); 2061 m_physical_device = gpus[0].first; 2062 } 2063 2064 // Read device physical memory properties, we need it for allocating buffers 2065 vkGetPhysicalDeviceProperties(m_physical_device, &m_device_properties); 2066 m_device_properties.limits.minUniformBufferOffsetAlignment = 2067 std::max(m_device_properties.limits.minUniformBufferOffsetAlignment, static_cast<VkDeviceSize>(1)); 2068 m_device_properties.limits.minTexelBufferOffsetAlignment = 2069 std::max(m_device_properties.limits.minTexelBufferOffsetAlignment, static_cast<VkDeviceSize>(1)); 2070 m_device_properties.limits.optimalBufferCopyOffsetAlignment = 2071 std::max(m_device_properties.limits.optimalBufferCopyOffsetAlignment, static_cast<VkDeviceSize>(1)); 2072 m_device_properties.limits.optimalBufferCopyRowPitchAlignment = 2073 std::max(m_device_properties.limits.optimalBufferCopyRowPitchAlignment, static_cast<VkDeviceSize>(1)); 2074 m_device_properties.limits.bufferImageGranularity = 2075 std::max(m_device_properties.limits.bufferImageGranularity, static_cast<VkDeviceSize>(1)); 2076 2077 if (enable_debug_utils) 2078 EnableDebugUtils(); 2079 2080 VkSurfaceKHR surface = VK_NULL_HANDLE; 2081 ScopedGuard surface_cleanup = [this, &surface]() { 2082 if (surface != VK_NULL_HANDLE) 2083 vkDestroySurfaceKHR(m_instance, surface, nullptr); 2084 }; 2085 if (m_window_info.type != WindowInfo::Type::Surfaceless) 2086 { 2087 surface = VulkanSwapChain::CreateVulkanSurface(m_instance, m_physical_device, &m_window_info); 2088 if (surface == VK_NULL_HANDLE) 2089 return false; 2090 } 2091 2092 // Attempt to create the device. 2093 if (!CreateDevice(surface, enable_validation_layer, disabled_features, error)) 2094 return false; 2095 2096 // And critical resources. 2097 if (!CreateAllocator() || !CreatePersistentDescriptorPool() || !CreateCommandBuffers() || !CreatePipelineLayouts()) 2098 return false; 2099 2100 if (threaded_presentation) 2101 StartPresentThread(); 2102 2103 m_exclusive_fullscreen_control = exclusive_fullscreen_control; 2104 2105 if (surface != VK_NULL_HANDLE) 2106 { 2107 VkPresentModeKHR present_mode; 2108 if (!VulkanSwapChain::SelectPresentMode(surface, &m_vsync_mode, &present_mode) || 2109 !(m_swap_chain = VulkanSwapChain::Create(m_window_info, surface, present_mode, m_exclusive_fullscreen_control))) 2110 { 2111 Error::SetStringView(error, "Failed to create swap chain"); 2112 return false; 2113 } 2114 2115 // NOTE: This is assigned afterwards, because some platforms can modify the window info (e.g. Metal). 2116 m_window_info = m_swap_chain->GetWindowInfo(); 2117 } 2118 2119 surface_cleanup.Cancel(); 2120 2121 // Render a frame as soon as possible to clear out whatever was previously being displayed. 2122 if (m_window_info.type != WindowInfo::Type::Surfaceless) 2123 RenderBlankFrame(); 2124 2125 if (!CreateNullTexture()) 2126 { 2127 Error::SetStringView(error, "Failed to create dummy texture"); 2128 return false; 2129 } 2130 2131 if (!CreateBuffers() || !CreatePersistentDescriptorSets()) 2132 { 2133 Error::SetStringView(error, "Failed to create buffers/descriptor sets"); 2134 return false; 2135 } 2136 2137 return true; 2138 } 2139 2140 void VulkanDevice::DestroyDevice() 2141 { 2142 std::unique_lock lock(s_instance_mutex); 2143 2144 if (InRenderPass()) 2145 EndRenderPass(); 2146 2147 // Don't both submitting the current command buffer, just toss it. 2148 if (m_device != VK_NULL_HANDLE) 2149 WaitForGPUIdle(); 2150 2151 StopPresentThread(); 2152 m_swap_chain.reset(); 2153 2154 if (m_null_texture) 2155 { 2156 m_null_texture->Destroy(false); 2157 m_null_texture.reset(); 2158 } 2159 for (auto& it : m_cleanup_objects) 2160 it.second(); 2161 m_cleanup_objects.clear(); 2162 DestroyPersistentDescriptorSets(); 2163 DestroyBuffers(); 2164 DestroySamplers(); 2165 2166 DestroyPersistentDescriptorPool(); 2167 DestroyPipelineLayouts(); 2168 DestroyCommandBuffers(); 2169 DestroyAllocator(); 2170 2171 for (auto& it : m_render_pass_cache) 2172 vkDestroyRenderPass(m_device, it.second, nullptr); 2173 m_render_pass_cache.clear(); 2174 2175 if (m_pipeline_cache != VK_NULL_HANDLE) 2176 { 2177 vkDestroyPipelineCache(m_device, m_pipeline_cache, nullptr); 2178 m_pipeline_cache = VK_NULL_HANDLE; 2179 } 2180 2181 if (m_device != VK_NULL_HANDLE) 2182 { 2183 vkDestroyDevice(m_device, nullptr); 2184 m_device = VK_NULL_HANDLE; 2185 } 2186 2187 if (m_debug_messenger_callback != VK_NULL_HANDLE) 2188 DisableDebugUtils(); 2189 2190 if (m_instance != VK_NULL_HANDLE) 2191 { 2192 vkDestroyInstance(m_instance, nullptr); 2193 m_instance = VK_NULL_HANDLE; 2194 } 2195 2196 Vulkan::UnloadVulkanLibrary(); 2197 } 2198 2199 bool VulkanDevice::ValidatePipelineCacheHeader(const VK_PIPELINE_CACHE_HEADER& header) 2200 { 2201 if (header.header_length < sizeof(VK_PIPELINE_CACHE_HEADER)) 2202 { 2203 ERROR_LOG("Pipeline cache failed validation: Invalid header length"); 2204 return false; 2205 } 2206 2207 if (header.header_version != VK_PIPELINE_CACHE_HEADER_VERSION_ONE) 2208 { 2209 ERROR_LOG("Pipeline cache failed validation: Invalid header version"); 2210 return false; 2211 } 2212 2213 if (header.vendor_id != m_device_properties.vendorID) 2214 { 2215 ERROR_LOG("Pipeline cache failed validation: Incorrect vendor ID (file: 0x{:X}, device: 0x{:X})", header.vendor_id, 2216 m_device_properties.vendorID); 2217 return false; 2218 } 2219 2220 if (header.device_id != m_device_properties.deviceID) 2221 { 2222 ERROR_LOG("Pipeline cache failed validation: Incorrect device ID (file: 0x{:X}, device: 0x{:X})", header.device_id, 2223 m_device_properties.deviceID); 2224 return false; 2225 } 2226 2227 if (std::memcmp(header.uuid, m_device_properties.pipelineCacheUUID, VK_UUID_SIZE) != 0) 2228 { 2229 ERROR_LOG("Pipeline cache failed validation: Incorrect UUID"); 2230 return false; 2231 } 2232 2233 return true; 2234 } 2235 2236 void VulkanDevice::FillPipelineCacheHeader(VK_PIPELINE_CACHE_HEADER* header) 2237 { 2238 header->header_length = sizeof(VK_PIPELINE_CACHE_HEADER); 2239 header->header_version = VK_PIPELINE_CACHE_HEADER_VERSION_ONE; 2240 header->vendor_id = m_device_properties.vendorID; 2241 header->device_id = m_device_properties.deviceID; 2242 std::memcpy(header->uuid, m_device_properties.pipelineCacheUUID, VK_UUID_SIZE); 2243 } 2244 2245 bool VulkanDevice::ReadPipelineCache(std::optional<DynamicHeapArray<u8>> data) 2246 { 2247 if (data.has_value()) 2248 { 2249 if (data->size() < sizeof(VK_PIPELINE_CACHE_HEADER)) 2250 { 2251 ERROR_LOG("Pipeline cache is too small, ignoring."); 2252 data.reset(); 2253 } 2254 2255 VK_PIPELINE_CACHE_HEADER header; 2256 std::memcpy(&header, data->data(), sizeof(header)); 2257 if (!ValidatePipelineCacheHeader(header)) 2258 data.reset(); 2259 } 2260 2261 const VkPipelineCacheCreateInfo ci{VK_STRUCTURE_TYPE_PIPELINE_CACHE_CREATE_INFO, nullptr, 0, 2262 data.has_value() ? data->size() : 0, data.has_value() ? data->data() : nullptr}; 2263 VkResult res = vkCreatePipelineCache(m_device, &ci, nullptr, &m_pipeline_cache); 2264 if (res != VK_SUCCESS) 2265 { 2266 LOG_VULKAN_ERROR(res, "vkCreatePipelineCache() failed: "); 2267 return false; 2268 } 2269 2270 return true; 2271 } 2272 2273 bool VulkanDevice::GetPipelineCacheData(DynamicHeapArray<u8>* data) 2274 { 2275 if (m_pipeline_cache == VK_NULL_HANDLE) 2276 return false; 2277 2278 size_t data_size; 2279 VkResult res = vkGetPipelineCacheData(m_device, m_pipeline_cache, &data_size, nullptr); 2280 if (res != VK_SUCCESS) 2281 { 2282 LOG_VULKAN_ERROR(res, "vkGetPipelineCacheData() failed: "); 2283 return false; 2284 } 2285 2286 data->resize(data_size); 2287 res = vkGetPipelineCacheData(m_device, m_pipeline_cache, &data_size, data->data()); 2288 if (res != VK_SUCCESS) 2289 { 2290 LOG_VULKAN_ERROR(res, "vkGetPipelineCacheData() (2) failed: "); 2291 return false; 2292 } 2293 2294 data->resize(data_size); 2295 return true; 2296 } 2297 2298 bool VulkanDevice::UpdateWindow() 2299 { 2300 DestroySurface(); 2301 2302 if (!AcquireWindow(false)) 2303 return false; 2304 2305 if (m_window_info.IsSurfaceless()) 2306 return true; 2307 2308 // make sure previous frames are presented 2309 if (InRenderPass()) 2310 EndRenderPass(); 2311 SubmitCommandBuffer(false); 2312 WaitForGPUIdle(); 2313 2314 VkSurfaceKHR surface = VulkanSwapChain::CreateVulkanSurface(m_instance, m_physical_device, &m_window_info); 2315 if (surface == VK_NULL_HANDLE) 2316 { 2317 ERROR_LOG("Failed to create new surface for swap chain"); 2318 return false; 2319 } 2320 2321 VkPresentModeKHR present_mode; 2322 if (!VulkanSwapChain::SelectPresentMode(surface, &m_vsync_mode, &present_mode) || 2323 !(m_swap_chain = VulkanSwapChain::Create(m_window_info, surface, present_mode, m_exclusive_fullscreen_control))) 2324 { 2325 ERROR_LOG("Failed to create swap chain"); 2326 VulkanSwapChain::DestroyVulkanSurface(m_instance, &m_window_info, surface); 2327 return false; 2328 } 2329 2330 m_window_info = m_swap_chain->GetWindowInfo(); 2331 RenderBlankFrame(); 2332 return true; 2333 } 2334 2335 void VulkanDevice::ResizeWindow(s32 new_window_width, s32 new_window_height, float new_window_scale) 2336 { 2337 if (!m_swap_chain) 2338 return; 2339 2340 if (m_swap_chain->GetWidth() == static_cast<u32>(new_window_width) && 2341 m_swap_chain->GetHeight() == static_cast<u32>(new_window_height)) 2342 { 2343 // skip unnecessary resizes 2344 m_window_info.surface_scale = new_window_scale; 2345 return; 2346 } 2347 2348 // make sure previous frames are presented 2349 WaitForGPUIdle(); 2350 2351 if (!m_swap_chain->ResizeSwapChain(new_window_width, new_window_height, new_window_scale)) 2352 { 2353 // AcquireNextImage() will fail, and we'll recreate the surface. 2354 ERROR_LOG("Failed to resize swap chain. Next present will fail."); 2355 return; 2356 } 2357 2358 m_window_info = m_swap_chain->GetWindowInfo(); 2359 } 2360 2361 void VulkanDevice::DestroySurface() 2362 { 2363 WaitForGPUIdle(); 2364 m_swap_chain.reset(); 2365 } 2366 2367 bool VulkanDevice::SupportsTextureFormat(GPUTexture::Format format) const 2368 { 2369 return (TEXTURE_FORMAT_MAPPING[static_cast<u8>(format)] != VK_FORMAT_UNDEFINED); 2370 } 2371 2372 std::string VulkanDevice::GetDriverInfo() const 2373 { 2374 std::string ret; 2375 const u32 api_version = m_device_properties.apiVersion; 2376 const u32 driver_version = m_device_properties.driverVersion; 2377 if (m_optional_extensions.vk_khr_driver_properties) 2378 { 2379 const VkPhysicalDeviceDriverProperties& props = m_device_driver_properties; 2380 ret = fmt::format( 2381 "Driver {}.{}.{}\nVulkan {}.{}.{}\nConformance Version {}.{}.{}.{}\n{}\n{}\n{}", VK_VERSION_MAJOR(driver_version), 2382 VK_VERSION_MINOR(driver_version), VK_VERSION_PATCH(driver_version), VK_API_VERSION_MAJOR(api_version), 2383 VK_API_VERSION_MINOR(api_version), VK_API_VERSION_PATCH(api_version), props.conformanceVersion.major, 2384 props.conformanceVersion.minor, props.conformanceVersion.subminor, props.conformanceVersion.patch, 2385 props.driverInfo, props.driverName, m_device_properties.deviceName); 2386 } 2387 else 2388 { 2389 ret = 2390 fmt::format("Driver {}.{}.{}\nVulkan {}.{}.{}\n{}", VK_VERSION_MAJOR(driver_version), 2391 VK_VERSION_MINOR(driver_version), VK_VERSION_PATCH(driver_version), VK_API_VERSION_MAJOR(api_version), 2392 VK_API_VERSION_MINOR(api_version), VK_API_VERSION_PATCH(api_version), m_device_properties.deviceName); 2393 } 2394 2395 return ret; 2396 } 2397 2398 void VulkanDevice::ExecuteAndWaitForGPUIdle() 2399 { 2400 if (InRenderPass()) 2401 EndRenderPass(); 2402 2403 SubmitCommandBuffer(true); 2404 } 2405 2406 void VulkanDevice::SetVSyncMode(GPUVSyncMode mode, bool allow_present_throttle) 2407 { 2408 m_allow_present_throttle = allow_present_throttle; 2409 if (!m_swap_chain) 2410 { 2411 // For when it is re-created. 2412 m_vsync_mode = mode; 2413 return; 2414 } 2415 2416 VkPresentModeKHR present_mode; 2417 if (!VulkanSwapChain::SelectPresentMode(m_swap_chain->GetSurface(), &mode, &present_mode)) 2418 { 2419 ERROR_LOG("Ignoring vsync mode change."); 2420 return; 2421 } 2422 2423 // Actually changed? If using a fallback, it might not have. 2424 if (m_vsync_mode == mode) 2425 return; 2426 2427 m_vsync_mode = mode; 2428 2429 // This swap chain should not be used by the current buffer, thus safe to destroy. 2430 WaitForGPUIdle(); 2431 if (!m_swap_chain->SetPresentMode(present_mode)) 2432 { 2433 Panic("Failed to update swap chain present mode."); 2434 m_swap_chain.reset(); 2435 } 2436 } 2437 2438 bool VulkanDevice::BeginPresent(bool frame_skip, u32 clear_color) 2439 { 2440 if (InRenderPass()) 2441 EndRenderPass(); 2442 2443 if (frame_skip) 2444 return false; 2445 2446 // If we're running surfaceless, kick the command buffer so we don't run out of descriptors. 2447 if (!m_swap_chain) 2448 { 2449 SubmitCommandBuffer(false); 2450 TrimTexturePool(); 2451 return false; 2452 } 2453 2454 // Previous frame needs to be presented before we can acquire the swap chain. 2455 WaitForPresentComplete(); 2456 2457 // Check if the device was lost. 2458 if (CheckLastSubmitFail()) 2459 { 2460 Panic("Fixme"); // TODO 2461 TrimTexturePool(); 2462 return false; 2463 } 2464 2465 VkResult res = m_swap_chain->AcquireNextImage(); 2466 if (res != VK_SUCCESS) 2467 { 2468 LOG_VULKAN_ERROR(res, "vkAcquireNextImageKHR() failed: "); 2469 m_swap_chain->ReleaseCurrentImage(); 2470 2471 if (res == VK_SUBOPTIMAL_KHR || res == VK_ERROR_OUT_OF_DATE_KHR) 2472 { 2473 ResizeWindow(0, 0, m_window_info.surface_scale); 2474 res = m_swap_chain->AcquireNextImage(); 2475 } 2476 else if (res == VK_ERROR_SURFACE_LOST_KHR) 2477 { 2478 WARNING_LOG("Surface lost, attempting to recreate"); 2479 if (!m_swap_chain->RecreateSurface(m_window_info)) 2480 { 2481 ERROR_LOG("Failed to recreate surface after loss"); 2482 SubmitCommandBuffer(false); 2483 TrimTexturePool(); 2484 return false; 2485 } 2486 2487 res = m_swap_chain->AcquireNextImage(); 2488 } 2489 2490 // This can happen when multiple resize events happen in quick succession. 2491 // In this case, just wait until the next frame to try again. 2492 if (res != VK_SUCCESS && res != VK_SUBOPTIMAL_KHR) 2493 { 2494 // Still submit the command buffer, otherwise we'll end up with several frames waiting. 2495 SubmitCommandBuffer(false); 2496 TrimTexturePool(); 2497 return false; 2498 } 2499 } 2500 2501 BeginSwapChainRenderPass(clear_color); 2502 return true; 2503 } 2504 2505 void VulkanDevice::EndPresent(bool explicit_present) 2506 { 2507 DebugAssert(InRenderPass() && m_num_current_render_targets == 0 && !m_current_depth_target); 2508 EndRenderPass(); 2509 2510 VkCommandBuffer cmdbuf = GetCurrentCommandBuffer(); 2511 VulkanTexture::TransitionSubresourcesToLayout(cmdbuf, m_swap_chain->GetCurrentImage(), GPUTexture::Type::RenderTarget, 2512 0, 1, 0, 1, VulkanTexture::Layout::ColorAttachment, 2513 VulkanTexture::Layout::PresentSrc); 2514 EndAndSubmitCommandBuffer(m_swap_chain.get(), explicit_present, !m_swap_chain->IsPresentModeSynchronizing()); 2515 MoveToNextCommandBuffer(); 2516 InvalidateCachedState(); 2517 TrimTexturePool(); 2518 } 2519 2520 void VulkanDevice::SubmitPresent() 2521 { 2522 DebugAssert(m_swap_chain); 2523 DoPresent(m_swap_chain.get()); 2524 } 2525 2526 #ifdef _DEBUG 2527 static std::array<float, 3> Palette(float phase, const std::array<float, 3>& a, const std::array<float, 3>& b, 2528 const std::array<float, 3>& c, const std::array<float, 3>& d) 2529 { 2530 std::array<float, 3> result; 2531 result[0] = a[0] + b[0] * std::cos(6.28318f * (c[0] * phase + d[0])); 2532 result[1] = a[1] + b[1] * std::cos(6.28318f * (c[1] * phase + d[1])); 2533 result[2] = a[2] + b[2] * std::cos(6.28318f * (c[2] * phase + d[2])); 2534 return result; 2535 } 2536 #endif 2537 2538 void VulkanDevice::PushDebugGroup(const char* name) 2539 { 2540 #ifdef _DEBUG 2541 if (!vkCmdBeginDebugUtilsLabelEXT || !m_debug_device) 2542 return; 2543 2544 const std::array<float, 3> color = Palette(static_cast<float>(++s_debug_scope_depth), {0.5f, 0.5f, 0.5f}, 2545 {0.5f, 0.5f, 0.5f}, {1.0f, 1.0f, 0.5f}, {0.8f, 0.90f, 0.30f}); 2546 2547 const VkDebugUtilsLabelEXT label = { 2548 VK_STRUCTURE_TYPE_DEBUG_UTILS_LABEL_EXT, 2549 nullptr, 2550 name, 2551 {color[0], color[1], color[2], 1.0f}, 2552 }; 2553 vkCmdBeginDebugUtilsLabelEXT(GetCurrentCommandBuffer(), &label); 2554 #endif 2555 } 2556 2557 void VulkanDevice::PopDebugGroup() 2558 { 2559 #ifdef _DEBUG 2560 if (!vkCmdEndDebugUtilsLabelEXT || !m_debug_device) 2561 return; 2562 2563 s_debug_scope_depth = (s_debug_scope_depth == 0) ? 0 : (s_debug_scope_depth - 1u); 2564 2565 vkCmdEndDebugUtilsLabelEXT(GetCurrentCommandBuffer()); 2566 #endif 2567 } 2568 2569 void VulkanDevice::InsertDebugMessage(const char* msg) 2570 { 2571 #ifdef _DEBUG 2572 if (!vkCmdInsertDebugUtilsLabelEXT || !m_debug_device) 2573 return; 2574 2575 const VkDebugUtilsLabelEXT label = {VK_STRUCTURE_TYPE_DEBUG_UTILS_LABEL_EXT, nullptr, msg, {0.0f, 0.0f, 0.0f, 1.0f}}; 2576 vkCmdInsertDebugUtilsLabelEXT(GetCurrentCommandBuffer(), &label); 2577 #endif 2578 } 2579 2580 u32 VulkanDevice::GetMaxMultisamples(VkPhysicalDevice physical_device, const VkPhysicalDeviceProperties& properties) 2581 { 2582 VkImageFormatProperties color_properties = {}; 2583 vkGetPhysicalDeviceImageFormatProperties(physical_device, VK_FORMAT_R8G8B8A8_UNORM, VK_IMAGE_TYPE_2D, 2584 VK_IMAGE_TILING_OPTIMAL, VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT, 0, 2585 &color_properties); 2586 VkImageFormatProperties depth_properties = {}; 2587 vkGetPhysicalDeviceImageFormatProperties(physical_device, VK_FORMAT_D32_SFLOAT, VK_IMAGE_TYPE_2D, 2588 VK_IMAGE_TILING_OPTIMAL, VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT, 0, 2589 &depth_properties); 2590 const VkSampleCountFlags combined_properties = properties.limits.framebufferColorSampleCounts & 2591 properties.limits.framebufferDepthSampleCounts & 2592 color_properties.sampleCounts & depth_properties.sampleCounts; 2593 if (combined_properties & VK_SAMPLE_COUNT_64_BIT) 2594 return 64; 2595 else if (combined_properties & VK_SAMPLE_COUNT_32_BIT) 2596 return 32; 2597 else if (combined_properties & VK_SAMPLE_COUNT_16_BIT) 2598 return 16; 2599 else if (combined_properties & VK_SAMPLE_COUNT_8_BIT) 2600 return 8; 2601 else if (combined_properties & VK_SAMPLE_COUNT_4_BIT) 2602 return 4; 2603 else if (combined_properties & VK_SAMPLE_COUNT_2_BIT) 2604 return 2; 2605 else 2606 return 1; 2607 } 2608 2609 void VulkanDevice::SetFeatures(FeatureMask disabled_features, const VkPhysicalDeviceFeatures& vk_features) 2610 { 2611 m_max_texture_size = 2612 std::min(m_device_properties.limits.maxImageDimension2D, m_device_properties.limits.maxFramebufferWidth); 2613 m_max_multisamples = GetMaxMultisamples(m_physical_device, m_device_properties); 2614 2615 m_features.dual_source_blend = !(disabled_features & FEATURE_MASK_DUAL_SOURCE_BLEND) && vk_features.dualSrcBlend; 2616 m_features.framebuffer_fetch = 2617 !(disabled_features & (FEATURE_MASK_FEEDBACK_LOOPS | FEATURE_MASK_FRAMEBUFFER_FETCH)) && 2618 m_optional_extensions.vk_ext_rasterization_order_attachment_access; 2619 2620 if (!m_features.dual_source_blend) 2621 WARNING_LOG("Vulkan driver is missing dual-source blending. This will have an impact on performance."); 2622 2623 m_features.noperspective_interpolation = true; 2624 m_features.texture_copy_to_self = !(disabled_features & FEATURE_MASK_TEXTURE_COPY_TO_SELF); 2625 m_features.per_sample_shading = vk_features.sampleRateShading; 2626 m_features.supports_texture_buffers = !(disabled_features & FEATURE_MASK_TEXTURE_BUFFERS); 2627 m_features.feedback_loops = !(disabled_features & FEATURE_MASK_FEEDBACK_LOOPS); 2628 2629 #ifdef __APPLE__ 2630 // Partial texture buffer uploads appear to be broken in macOS/MoltenVK. 2631 m_features.texture_buffers_emulated_with_ssbo = true; 2632 #else 2633 const u32 max_texel_buffer_elements = m_device_properties.limits.maxTexelBufferElements; 2634 INFO_LOG("Max texel buffer elements: {}", max_texel_buffer_elements); 2635 if (max_texel_buffer_elements < MIN_TEXEL_BUFFER_ELEMENTS) 2636 { 2637 m_features.texture_buffers_emulated_with_ssbo = true; 2638 } 2639 #endif 2640 2641 if (m_features.texture_buffers_emulated_with_ssbo) 2642 WARNING_LOG("Emulating texture buffers with SSBOs."); 2643 2644 m_features.geometry_shaders = !(disabled_features & FEATURE_MASK_GEOMETRY_SHADERS) && vk_features.geometryShader; 2645 2646 m_features.partial_msaa_resolve = true; 2647 m_features.memory_import = m_optional_extensions.vk_ext_external_memory_host; 2648 m_features.explicit_present = true; 2649 m_features.shader_cache = true; 2650 m_features.pipeline_cache = true; 2651 m_features.prefer_unused_textures = true; 2652 m_features.raster_order_views = 2653 (!(disabled_features & FEATURE_MASK_RASTER_ORDER_VIEWS) && vk_features.fragmentStoresAndAtomics && 2654 m_optional_extensions.vk_ext_fragment_shader_interlock); 2655 } 2656 2657 void VulkanDevice::CopyTextureRegion(GPUTexture* dst, u32 dst_x, u32 dst_y, u32 dst_layer, u32 dst_level, 2658 GPUTexture* src, u32 src_x, u32 src_y, u32 src_layer, u32 src_level, u32 width, 2659 u32 height) 2660 { 2661 VulkanTexture* const S = static_cast<VulkanTexture*>(src); 2662 VulkanTexture* const D = static_cast<VulkanTexture*>(dst); 2663 2664 if (S->GetState() == GPUTexture::State::Cleared) 2665 { 2666 // source is cleared. if destination is a render target, we can carry the clear forward 2667 if (D->IsRenderTargetOrDepthStencil()) 2668 { 2669 if (dst_level == 0 && dst_x == 0 && dst_y == 0 && width == D->GetWidth() && height == D->GetHeight()) 2670 { 2671 // pass it forward if we're clearing the whole thing 2672 if (S->IsDepthStencil()) 2673 D->SetClearDepth(S->GetClearDepth()); 2674 else 2675 D->SetClearColor(S->GetClearColor()); 2676 2677 return; 2678 } 2679 2680 if (D->GetState() == GPUTexture::State::Cleared) 2681 { 2682 // destination is cleared, if it's the same colour and rect, we can just avoid this entirely 2683 if (D->IsDepthStencil()) 2684 { 2685 if (D->GetClearDepth() == S->GetClearDepth()) 2686 return; 2687 } 2688 else 2689 { 2690 if (D->GetClearColor() == S->GetClearColor()) 2691 return; 2692 } 2693 } 2694 2695 // TODO: Could use attachment clear here.. 2696 } 2697 2698 // commit the clear to the source first, then do normal copy 2699 S->CommitClear(); 2700 } 2701 2702 // if the destination has been cleared, and we're not overwriting the whole thing, commit the clear first 2703 // (the area outside of where we're copying to) 2704 if (D->GetState() == GPUTexture::State::Cleared && 2705 (dst_level != 0 || dst_x != 0 || dst_y != 0 || width != D->GetWidth() || height != D->GetHeight())) 2706 { 2707 D->CommitClear(); 2708 } 2709 2710 // *now* we can do a normal image copy. 2711 const VkImageAspectFlags src_aspect = (S->IsDepthStencil()) ? VK_IMAGE_ASPECT_DEPTH_BIT : VK_IMAGE_ASPECT_COLOR_BIT; 2712 const VkImageAspectFlags dst_aspect = (D->IsDepthStencil()) ? VK_IMAGE_ASPECT_DEPTH_BIT : VK_IMAGE_ASPECT_COLOR_BIT; 2713 const VkImageCopy ic = {{src_aspect, src_level, src_layer, 1u}, 2714 {static_cast<s32>(src_x), static_cast<s32>(src_y), 0}, 2715 {dst_aspect, dst_level, dst_layer, 1u}, 2716 {static_cast<s32>(dst_x), static_cast<s32>(dst_y), 0}, 2717 {static_cast<u32>(width), static_cast<u32>(height), 1u}}; 2718 2719 if (InRenderPass()) 2720 EndRenderPass(); 2721 2722 s_stats.num_copies++; 2723 2724 S->SetUseFenceCounter(GetCurrentFenceCounter()); 2725 D->SetUseFenceCounter(GetCurrentFenceCounter()); 2726 S->TransitionToLayout((D == S) ? VulkanTexture::Layout::TransferSelf : VulkanTexture::Layout::TransferSrc); 2727 D->TransitionToLayout((D == S) ? VulkanTexture::Layout::TransferSelf : VulkanTexture::Layout::TransferDst); 2728 2729 vkCmdCopyImage(GetCurrentCommandBuffer(), S->GetImage(), S->GetVkLayout(), D->GetImage(), D->GetVkLayout(), 1, &ic); 2730 2731 D->SetState(GPUTexture::State::Dirty); 2732 } 2733 2734 void VulkanDevice::ResolveTextureRegion(GPUTexture* dst, u32 dst_x, u32 dst_y, u32 dst_layer, u32 dst_level, 2735 GPUTexture* src, u32 src_x, u32 src_y, u32 width, u32 height) 2736 { 2737 DebugAssert((src_x + width) <= src->GetWidth()); 2738 DebugAssert((src_y + height) <= src->GetHeight()); 2739 DebugAssert(src->IsMultisampled()); 2740 DebugAssert(dst_level < dst->GetLevels() && dst_layer < dst->GetLayers()); 2741 DebugAssert((dst_x + width) <= dst->GetMipWidth(dst_level)); 2742 DebugAssert((dst_y + height) <= dst->GetMipHeight(dst_level)); 2743 DebugAssert(!dst->IsMultisampled() && src->IsMultisampled()); 2744 2745 if (InRenderPass()) 2746 EndRenderPass(); 2747 2748 s_stats.num_copies++; 2749 2750 VulkanTexture* D = static_cast<VulkanTexture*>(dst); 2751 VulkanTexture* S = static_cast<VulkanTexture*>(src); 2752 const VkCommandBuffer cmdbuf = GetCurrentCommandBuffer(); 2753 2754 if (S->GetState() == GPUTexture::State::Cleared) 2755 S->CommitClear(cmdbuf); 2756 if (D->IsRenderTargetOrDepthStencil() && D->GetState() == GPUTexture::State::Cleared) 2757 { 2758 if (width < dst->GetWidth() || height < dst->GetHeight()) 2759 D->CommitClear(cmdbuf); 2760 else 2761 D->SetState(GPUTexture::State::Dirty); 2762 } 2763 2764 S->TransitionSubresourcesToLayout(cmdbuf, 0, 1, 0, 1, S->GetLayout(), VulkanTexture::Layout::TransferSrc); 2765 D->TransitionSubresourcesToLayout(cmdbuf, dst_layer, 1, dst_level, 1, D->GetLayout(), 2766 VulkanTexture::Layout::TransferDst); 2767 2768 const VkImageResolve resolve = {{VK_IMAGE_ASPECT_COLOR_BIT, 0u, 0u, 1u}, 2769 {static_cast<s32>(src_x), static_cast<s32>(src_y), 0}, 2770 {VK_IMAGE_ASPECT_COLOR_BIT, dst_level, dst_layer, 1u}, 2771 {static_cast<s32>(dst_x), static_cast<s32>(dst_y), 0}, 2772 {width, height, 1}}; 2773 vkCmdResolveImage(cmdbuf, S->GetImage(), VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, D->GetImage(), 2774 VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, 1, &resolve); 2775 2776 S->TransitionSubresourcesToLayout(cmdbuf, 0, 1, 0, 1, VulkanTexture::Layout::TransferSrc, S->GetLayout()); 2777 D->TransitionSubresourcesToLayout(cmdbuf, dst_layer, 1, dst_level, 1, VulkanTexture::Layout::TransferDst, 2778 D->GetLayout()); 2779 } 2780 2781 void VulkanDevice::ClearRenderTarget(GPUTexture* t, u32 c) 2782 { 2783 GPUDevice::ClearRenderTarget(t, c); 2784 if (InRenderPass()) 2785 { 2786 const s32 idx = IsRenderTargetBoundIndex(t); 2787 if (idx >= 0) 2788 { 2789 VulkanTexture* T = static_cast<VulkanTexture*>(t); 2790 2791 if (IsDeviceNVIDIA()) 2792 { 2793 EndRenderPass(); 2794 } 2795 else 2796 { 2797 // Use an attachment clear so the render pass isn't restarted. 2798 const VkClearAttachment ca = {VK_IMAGE_ASPECT_COLOR_BIT, 2799 static_cast<u32>(idx), 2800 {.color = static_cast<VulkanTexture*>(T)->GetClearColorValue()}}; 2801 const VkClearRect rc = {{{0, 0}, {T->GetWidth(), T->GetHeight()}}, 0u, 1u}; 2802 vkCmdClearAttachments(m_current_command_buffer, 1, &ca, 1, &rc); 2803 T->SetState(GPUTexture::State::Dirty); 2804 } 2805 } 2806 } 2807 } 2808 2809 void VulkanDevice::ClearDepth(GPUTexture* t, float d) 2810 { 2811 GPUDevice::ClearDepth(t, d); 2812 if (InRenderPass() && m_current_depth_target == t) 2813 { 2814 // Using vkCmdClearAttachments() within a render pass on NVIDIA seems to cause dependency issues 2815 // between draws that are testing depth which precede it. The result is flickering where Z tests 2816 // should be failing. Breaking/restarting the render pass isn't enough to work around the bug, 2817 // it needs an explicit pipeline barrier. 2818 VulkanTexture* T = static_cast<VulkanTexture*>(t); 2819 if (IsDeviceNVIDIA()) 2820 { 2821 EndRenderPass(); 2822 T->TransitionSubresourcesToLayout(GetCurrentCommandBuffer(), 0, 1, 0, 1, T->GetLayout(), T->GetLayout()); 2823 } 2824 else 2825 { 2826 // Use an attachment clear so the render pass isn't restarted. 2827 const VkClearAttachment ca = {VK_IMAGE_ASPECT_DEPTH_BIT, 0, {.depthStencil = T->GetClearDepthValue()}}; 2828 const VkClearRect rc = {{{0, 0}, {T->GetWidth(), T->GetHeight()}}, 0u, 1u}; 2829 vkCmdClearAttachments(m_current_command_buffer, 1, &ca, 1, &rc); 2830 T->SetState(GPUTexture::State::Dirty); 2831 } 2832 } 2833 } 2834 2835 void VulkanDevice::InvalidateRenderTarget(GPUTexture* t) 2836 { 2837 GPUDevice::InvalidateRenderTarget(t); 2838 if (InRenderPass() && (t->IsDepthStencil() ? (m_current_depth_target == t) : (IsRenderTargetBoundIndex(t) >= 0))) 2839 { 2840 // Invalidate includes leaving whatever's in the current buffer. 2841 GL_INS_FMT("Invalidating current {}", t->IsDepthStencil() ? "DS" : "RT"); 2842 t->SetState(GPUTexture::State::Dirty); 2843 } 2844 } 2845 2846 bool VulkanDevice::CreateBuffers() 2847 { 2848 if (!m_vertex_buffer.Create(VK_BUFFER_USAGE_VERTEX_BUFFER_BIT, VERTEX_BUFFER_SIZE)) 2849 { 2850 ERROR_LOG("Failed to allocate vertex buffer"); 2851 return false; 2852 } 2853 2854 if (!m_index_buffer.Create(VK_BUFFER_USAGE_INDEX_BUFFER_BIT, INDEX_BUFFER_SIZE)) 2855 { 2856 ERROR_LOG("Failed to allocate index buffer"); 2857 return false; 2858 } 2859 2860 if (!m_uniform_buffer.Create(VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT, VERTEX_UNIFORM_BUFFER_SIZE)) 2861 { 2862 ERROR_LOG("Failed to allocate uniform buffer"); 2863 return false; 2864 } 2865 2866 if (!m_texture_upload_buffer.Create(VK_BUFFER_USAGE_TRANSFER_SRC_BIT, TEXTURE_BUFFER_SIZE)) 2867 { 2868 ERROR_LOG("Failed to allocate texture upload buffer"); 2869 return false; 2870 } 2871 2872 return true; 2873 } 2874 2875 void VulkanDevice::DestroyBuffers() 2876 { 2877 m_texture_upload_buffer.Destroy(false); 2878 m_uniform_buffer.Destroy(false); 2879 m_index_buffer.Destroy(false); 2880 m_vertex_buffer.Destroy(false); 2881 } 2882 2883 void VulkanDevice::MapVertexBuffer(u32 vertex_size, u32 vertex_count, void** map_ptr, u32* map_space, 2884 u32* map_base_vertex) 2885 { 2886 const u32 req_size = vertex_size * vertex_count; 2887 if (!m_vertex_buffer.ReserveMemory(req_size, vertex_size)) 2888 { 2889 SubmitCommandBufferAndRestartRenderPass("out of vertex space"); 2890 if (!m_vertex_buffer.ReserveMemory(req_size, vertex_size)) 2891 Panic("Failed to allocate vertex space"); 2892 } 2893 2894 *map_ptr = m_vertex_buffer.GetCurrentHostPointer(); 2895 *map_space = m_vertex_buffer.GetCurrentSpace() / vertex_size; 2896 *map_base_vertex = m_vertex_buffer.GetCurrentOffset() / vertex_size; 2897 } 2898 2899 void VulkanDevice::UnmapVertexBuffer(u32 vertex_size, u32 vertex_count) 2900 { 2901 const u32 size = vertex_size * vertex_count; 2902 s_stats.buffer_streamed += size; 2903 m_vertex_buffer.CommitMemory(size); 2904 } 2905 2906 void VulkanDevice::MapIndexBuffer(u32 index_count, DrawIndex** map_ptr, u32* map_space, u32* map_base_index) 2907 { 2908 const u32 req_size = sizeof(DrawIndex) * index_count; 2909 if (!m_index_buffer.ReserveMemory(req_size, sizeof(DrawIndex))) 2910 { 2911 SubmitCommandBufferAndRestartRenderPass("out of index space"); 2912 if (!m_index_buffer.ReserveMemory(req_size, sizeof(DrawIndex))) 2913 Panic("Failed to allocate index space"); 2914 } 2915 2916 *map_ptr = reinterpret_cast<DrawIndex*>(m_index_buffer.GetCurrentHostPointer()); 2917 *map_space = m_index_buffer.GetCurrentSpace() / sizeof(DrawIndex); 2918 *map_base_index = m_index_buffer.GetCurrentOffset() / sizeof(DrawIndex); 2919 } 2920 2921 void VulkanDevice::UnmapIndexBuffer(u32 used_index_count) 2922 { 2923 const u32 size = sizeof(DrawIndex) * used_index_count; 2924 s_stats.buffer_streamed += size; 2925 m_index_buffer.CommitMemory(size); 2926 } 2927 2928 void VulkanDevice::PushUniformBuffer(const void* data, u32 data_size) 2929 { 2930 DebugAssert(data_size < UNIFORM_PUSH_CONSTANTS_SIZE); 2931 s_stats.buffer_streamed += data_size; 2932 vkCmdPushConstants(GetCurrentCommandBuffer(), GetCurrentVkPipelineLayout(), UNIFORM_PUSH_CONSTANTS_STAGES, 0, 2933 data_size, data); 2934 } 2935 2936 void* VulkanDevice::MapUniformBuffer(u32 size) 2937 { 2938 const u32 align = static_cast<u32>(m_device_properties.limits.minUniformBufferOffsetAlignment); 2939 const u32 used_space = Common::AlignUpPow2(size, align); 2940 if (!m_uniform_buffer.ReserveMemory(used_space + MAX_UNIFORM_BUFFER_SIZE, align)) 2941 { 2942 SubmitCommandBufferAndRestartRenderPass("out of uniform space"); 2943 if (!m_uniform_buffer.ReserveMemory(used_space + MAX_UNIFORM_BUFFER_SIZE, align)) 2944 Panic("Failed to allocate uniform space."); 2945 } 2946 2947 return m_uniform_buffer.GetCurrentHostPointer(); 2948 } 2949 2950 void VulkanDevice::UnmapUniformBuffer(u32 size) 2951 { 2952 s_stats.buffer_streamed += size; 2953 m_uniform_buffer_position = m_uniform_buffer.GetCurrentOffset(); 2954 m_uniform_buffer.CommitMemory(size); 2955 m_dirty_flags |= DIRTY_FLAG_DYNAMIC_OFFSETS; 2956 } 2957 2958 bool VulkanDevice::CreateNullTexture() 2959 { 2960 m_null_texture = VulkanTexture::Create(1, 1, 1, 1, 1, GPUTexture::Type::RWTexture, GPUTexture::Format::RGBA8, 2961 VK_FORMAT_R8G8B8A8_UNORM); 2962 if (!m_null_texture) 2963 return false; 2964 2965 const VkCommandBuffer cmdbuf = GetCurrentCommandBuffer(); 2966 const VkImageSubresourceRange srr{VK_IMAGE_ASPECT_COLOR_BIT, 0u, 1u, 0u, 1u}; 2967 const VkClearColorValue ccv{}; 2968 m_null_texture->TransitionToLayout(cmdbuf, VulkanTexture::Layout::ClearDst); 2969 vkCmdClearColorImage(cmdbuf, m_null_texture->GetImage(), VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, &ccv, 1, &srr); 2970 m_null_texture->TransitionToLayout(cmdbuf, VulkanTexture::Layout::General); 2971 Vulkan::SetObjectName(m_device, m_null_texture->GetImage(), "Null texture"); 2972 Vulkan::SetObjectName(m_device, m_null_texture->GetView(), "Null texture view"); 2973 2974 // Bind null texture and point sampler state to all. 2975 const VkSampler point_sampler = GetSampler(GPUSampler::GetNearestConfig()); 2976 if (point_sampler == VK_NULL_HANDLE) 2977 return false; 2978 2979 for (u32 i = 0; i < MAX_TEXTURE_SAMPLERS; i++) 2980 m_current_samplers[i] = point_sampler; 2981 2982 return true; 2983 } 2984 2985 bool VulkanDevice::CreatePipelineLayouts() 2986 { 2987 Vulkan::DescriptorSetLayoutBuilder dslb; 2988 Vulkan::PipelineLayoutBuilder plb; 2989 2990 { 2991 dslb.AddBinding(0, VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC, 1, 2992 VK_SHADER_STAGE_VERTEX_BIT | VK_SHADER_STAGE_FRAGMENT_BIT); 2993 if ((m_ubo_ds_layout = dslb.Create(m_device)) == VK_NULL_HANDLE) 2994 return false; 2995 Vulkan::SetObjectName(m_device, m_ubo_ds_layout, "UBO Descriptor Set Layout"); 2996 } 2997 2998 { 2999 dslb.AddBinding(0, VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, 1, VK_SHADER_STAGE_FRAGMENT_BIT); 3000 if ((m_single_texture_ds_layout = dslb.Create(m_device)) == VK_NULL_HANDLE) 3001 return false; 3002 Vulkan::SetObjectName(m_device, m_single_texture_ds_layout, "Single Texture Descriptor Set Layout"); 3003 } 3004 3005 { 3006 dslb.AddBinding(0, 3007 m_features.texture_buffers_emulated_with_ssbo ? VK_DESCRIPTOR_TYPE_STORAGE_BUFFER : 3008 VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER, 3009 1, VK_SHADER_STAGE_FRAGMENT_BIT); 3010 if ((m_single_texture_buffer_ds_layout = dslb.Create(m_device)) == VK_NULL_HANDLE) 3011 return false; 3012 Vulkan::SetObjectName(m_device, m_single_texture_buffer_ds_layout, "Texture Buffer Descriptor Set Layout"); 3013 } 3014 3015 { 3016 if (m_optional_extensions.vk_khr_push_descriptor) 3017 dslb.SetPushFlag(); 3018 for (u32 i = 0; i < MAX_TEXTURE_SAMPLERS; i++) 3019 dslb.AddBinding(i, VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, 1, VK_SHADER_STAGE_FRAGMENT_BIT); 3020 if ((m_multi_texture_ds_layout = dslb.Create(m_device)) == VK_NULL_HANDLE) 3021 return false; 3022 Vulkan::SetObjectName(m_device, m_multi_texture_ds_layout, "Multi Texture Descriptor Set Layout"); 3023 } 3024 3025 if (m_features.feedback_loops) 3026 { 3027 // TODO: This isn't ideal, since we can't push the RT descriptors. 3028 dslb.AddBinding(0, VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT, 1, VK_SHADER_STAGE_FRAGMENT_BIT); 3029 if ((m_feedback_loop_ds_layout = dslb.Create(m_device)) == VK_NULL_HANDLE) 3030 return false; 3031 Vulkan::SetObjectName(m_device, m_feedback_loop_ds_layout, "Feedback Loop Descriptor Set Layout"); 3032 } 3033 3034 if (m_features.raster_order_views) 3035 { 3036 for (u32 i = 0; i < MAX_IMAGE_RENDER_TARGETS; i++) 3037 dslb.AddBinding(i, VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, 1, VK_SHADER_STAGE_FRAGMENT_BIT); 3038 if ((m_rov_ds_layout = dslb.Create(m_device)) == VK_NULL_HANDLE) 3039 return false; 3040 Vulkan::SetObjectName(m_device, m_feedback_loop_ds_layout, "ROV Descriptor Set Layout"); 3041 } 3042 3043 for (u32 type = 0; type < 3; type++) 3044 { 3045 const bool feedback_loop = (type == 1); 3046 const bool rov = (type == 2); 3047 if ((feedback_loop && !m_features.feedback_loops) || (rov && !m_features.raster_order_views)) 3048 continue; 3049 3050 { 3051 VkPipelineLayout& pl = m_pipeline_layouts[type][static_cast<u8>(GPUPipeline::Layout::SingleTextureAndUBO)]; 3052 plb.AddDescriptorSet(m_ubo_ds_layout); 3053 plb.AddDescriptorSet(m_single_texture_ds_layout); 3054 if (feedback_loop) 3055 plb.AddDescriptorSet(m_feedback_loop_ds_layout); 3056 else if (rov) 3057 plb.AddDescriptorSet(m_rov_ds_layout); 3058 if ((pl = plb.Create(m_device)) == VK_NULL_HANDLE) 3059 return false; 3060 Vulkan::SetObjectName(m_device, pl, "Single Texture + UBO Pipeline Layout"); 3061 } 3062 3063 { 3064 VkPipelineLayout& pl = 3065 m_pipeline_layouts[type][static_cast<u8>(GPUPipeline::Layout::SingleTextureAndPushConstants)]; 3066 plb.AddDescriptorSet(m_single_texture_ds_layout); 3067 if (feedback_loop) 3068 plb.AddDescriptorSet(m_feedback_loop_ds_layout); 3069 else if (rov) 3070 plb.AddDescriptorSet(m_rov_ds_layout); 3071 plb.AddPushConstants(UNIFORM_PUSH_CONSTANTS_STAGES, 0, UNIFORM_PUSH_CONSTANTS_SIZE); 3072 if ((pl = plb.Create(m_device)) == VK_NULL_HANDLE) 3073 return false; 3074 Vulkan::SetObjectName(m_device, pl, "Single Texture Pipeline Layout"); 3075 } 3076 3077 { 3078 VkPipelineLayout& pl = 3079 m_pipeline_layouts[type][static_cast<u8>(GPUPipeline::Layout::SingleTextureBufferAndPushConstants)]; 3080 plb.AddDescriptorSet(m_single_texture_buffer_ds_layout); 3081 if (feedback_loop) 3082 plb.AddDescriptorSet(m_feedback_loop_ds_layout); 3083 else if (rov) 3084 plb.AddDescriptorSet(m_rov_ds_layout); 3085 plb.AddPushConstants(UNIFORM_PUSH_CONSTANTS_STAGES, 0, UNIFORM_PUSH_CONSTANTS_SIZE); 3086 if ((pl = plb.Create(m_device)) == VK_NULL_HANDLE) 3087 return false; 3088 Vulkan::SetObjectName(m_device, pl, "Single Texture Buffer + UBO Pipeline Layout"); 3089 } 3090 3091 { 3092 VkPipelineLayout& pl = m_pipeline_layouts[type][static_cast<u8>(GPUPipeline::Layout::MultiTextureAndUBO)]; 3093 plb.AddDescriptorSet(m_ubo_ds_layout); 3094 plb.AddDescriptorSet(m_multi_texture_ds_layout); 3095 if (feedback_loop) 3096 plb.AddDescriptorSet(m_feedback_loop_ds_layout); 3097 else if (rov) 3098 plb.AddDescriptorSet(m_rov_ds_layout); 3099 if ((pl = plb.Create(m_device)) == VK_NULL_HANDLE) 3100 return false; 3101 Vulkan::SetObjectName(m_device, pl, "Multi Texture + UBO Pipeline Layout"); 3102 } 3103 3104 { 3105 VkPipelineLayout& pl = 3106 m_pipeline_layouts[type][static_cast<u8>(GPUPipeline::Layout::MultiTextureAndPushConstants)]; 3107 plb.AddDescriptorSet(m_multi_texture_ds_layout); 3108 plb.AddPushConstants(UNIFORM_PUSH_CONSTANTS_STAGES, 0, UNIFORM_PUSH_CONSTANTS_SIZE); 3109 if (feedback_loop) 3110 plb.AddDescriptorSet(m_feedback_loop_ds_layout); 3111 else if (rov) 3112 plb.AddDescriptorSet(m_rov_ds_layout); 3113 if ((pl = plb.Create(m_device)) == VK_NULL_HANDLE) 3114 return false; 3115 Vulkan::SetObjectName(m_device, pl, "Multi Texture Pipeline Layout"); 3116 } 3117 } 3118 3119 return true; 3120 } 3121 3122 void VulkanDevice::DestroyPipelineLayouts() 3123 { 3124 m_pipeline_layouts.enumerate([this](auto& pl) { 3125 if (pl != VK_NULL_HANDLE) 3126 { 3127 vkDestroyPipelineLayout(m_device, pl, nullptr); 3128 pl = VK_NULL_HANDLE; 3129 } 3130 }); 3131 3132 auto destroy_dsl = [this](VkDescriptorSetLayout& l) { 3133 if (l != VK_NULL_HANDLE) 3134 { 3135 vkDestroyDescriptorSetLayout(m_device, l, nullptr); 3136 l = VK_NULL_HANDLE; 3137 } 3138 }; 3139 destroy_dsl(m_rov_ds_layout); 3140 destroy_dsl(m_feedback_loop_ds_layout); 3141 destroy_dsl(m_multi_texture_ds_layout); 3142 destroy_dsl(m_single_texture_buffer_ds_layout); 3143 destroy_dsl(m_single_texture_ds_layout); 3144 destroy_dsl(m_ubo_ds_layout); 3145 } 3146 3147 bool VulkanDevice::CreatePersistentDescriptorSets() 3148 { 3149 Vulkan::DescriptorSetUpdateBuilder dsub; 3150 3151 // TODO: is this a bad thing? choosing an upper bound.. so long as it's not going to fetch all of it :/ 3152 m_ubo_descriptor_set = AllocatePersistentDescriptorSet(m_ubo_ds_layout); 3153 if (m_ubo_descriptor_set == VK_NULL_HANDLE) 3154 return false; 3155 dsub.AddBufferDescriptorWrite(m_ubo_descriptor_set, 0, VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC, 3156 m_uniform_buffer.GetBuffer(), 0, MAX_UNIFORM_BUFFER_SIZE); 3157 dsub.Update(m_device, false); 3158 3159 return true; 3160 } 3161 3162 void VulkanDevice::DestroyPersistentDescriptorSets() 3163 { 3164 if (m_ubo_descriptor_set != VK_NULL_HANDLE) 3165 FreePersistentDescriptorSet(m_ubo_descriptor_set); 3166 } 3167 3168 void VulkanDevice::RenderBlankFrame() 3169 { 3170 VkResult res = m_swap_chain->AcquireNextImage(); 3171 if (res != VK_SUCCESS) 3172 { 3173 ERROR_LOG("Failed to acquire image for blank frame present"); 3174 return; 3175 } 3176 3177 VkCommandBuffer cmdbuf = GetCurrentCommandBuffer(); 3178 3179 const VkImage image = m_swap_chain->GetCurrentImage(); 3180 static constexpr VkImageSubresourceRange srr = {VK_IMAGE_ASPECT_COLOR_BIT, 0, 1, 0, 1}; 3181 static constexpr VkClearColorValue clear_color = {{0.0f, 0.0f, 0.0f, 1.0f}}; 3182 VulkanTexture::TransitionSubresourcesToLayout(cmdbuf, image, GPUTexture::Type::RenderTarget, 0, 1, 0, 1, 3183 VulkanTexture::Layout::Undefined, VulkanTexture::Layout::TransferDst); 3184 vkCmdClearColorImage(cmdbuf, image, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, &clear_color, 1, &srr); 3185 VulkanTexture::TransitionSubresourcesToLayout(cmdbuf, image, GPUTexture::Type::RenderTarget, 0, 1, 0, 1, 3186 VulkanTexture::Layout::TransferDst, VulkanTexture::Layout::PresentSrc); 3187 3188 EndAndSubmitCommandBuffer(m_swap_chain.get(), false, !m_swap_chain->IsPresentModeSynchronizing()); 3189 MoveToNextCommandBuffer(); 3190 3191 InvalidateCachedState(); 3192 } 3193 3194 bool VulkanDevice::TryImportHostMemory(void* data, size_t data_size, VkBufferUsageFlags buffer_usage, 3195 VkDeviceMemory* out_memory, VkBuffer* out_buffer, VkDeviceSize* out_offset) 3196 { 3197 if (!m_optional_extensions.vk_ext_external_memory_host) 3198 return false; 3199 3200 // Align to the nearest page 3201 void* data_aligned = 3202 reinterpret_cast<void*>(Common::AlignDownPow2(reinterpret_cast<uintptr_t>(data), HOST_PAGE_SIZE)); 3203 3204 // Offset to the start of the data within the page 3205 const size_t data_offset = reinterpret_cast<uintptr_t>(data) & static_cast<uintptr_t>(HOST_PAGE_MASK); 3206 3207 // Full amount of data that must be imported, including the pages 3208 const size_t data_size_aligned = Common::AlignUpPow2(data_offset + data_size, HOST_PAGE_SIZE); 3209 3210 VkMemoryHostPointerPropertiesEXT pointer_properties = {VK_STRUCTURE_TYPE_MEMORY_HOST_POINTER_PROPERTIES_EXT, nullptr, 3211 0}; 3212 VkResult res = vkGetMemoryHostPointerPropertiesEXT(m_device, VK_EXTERNAL_MEMORY_HANDLE_TYPE_HOST_ALLOCATION_BIT_EXT, 3213 data_aligned, &pointer_properties); 3214 if (res != VK_SUCCESS || pointer_properties.memoryTypeBits == 0) 3215 { 3216 LOG_VULKAN_ERROR(res, "vkGetMemoryHostPointerPropertiesEXT() failed: "); 3217 return false; 3218 } 3219 3220 VmaAllocationCreateInfo vma_alloc_info = {}; 3221 vma_alloc_info.preferredFlags = 3222 VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT | VK_MEMORY_PROPERTY_HOST_CACHED_BIT; 3223 vma_alloc_info.memoryTypeBits = pointer_properties.memoryTypeBits; 3224 3225 u32 memory_index = 0; 3226 res = vmaFindMemoryTypeIndex(m_allocator, pointer_properties.memoryTypeBits, &vma_alloc_info, &memory_index); 3227 if (res != VK_SUCCESS) 3228 { 3229 LOG_VULKAN_ERROR(res, "vmaFindMemoryTypeIndex() failed: "); 3230 return false; 3231 } 3232 3233 const VkImportMemoryHostPointerInfoEXT import_info = {VK_STRUCTURE_TYPE_IMPORT_MEMORY_HOST_POINTER_INFO_EXT, nullptr, 3234 VK_EXTERNAL_MEMORY_HANDLE_TYPE_HOST_ALLOCATION_BIT_EXT, 3235 const_cast<void*>(data_aligned)}; 3236 3237 const VkMemoryAllocateInfo alloc_info = {VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO, &import_info, data_size_aligned, 3238 memory_index}; 3239 3240 VkDeviceMemory imported_memory = VK_NULL_HANDLE; 3241 3242 res = vkAllocateMemory(m_device, &alloc_info, nullptr, &imported_memory); 3243 if (res != VK_SUCCESS) 3244 { 3245 LOG_VULKAN_ERROR(res, "vkAllocateMemory() failed: "); 3246 return false; 3247 } 3248 3249 const VkExternalMemoryBufferCreateInfo external_info = {VK_STRUCTURE_TYPE_EXTERNAL_MEMORY_BUFFER_CREATE_INFO, nullptr, 3250 VK_EXTERNAL_MEMORY_HANDLE_TYPE_HOST_ALLOCATION_BIT_EXT}; 3251 3252 const VkBufferCreateInfo buffer_info = {VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, 3253 &external_info, 3254 0, 3255 data_size_aligned, 3256 buffer_usage, 3257 VK_SHARING_MODE_EXCLUSIVE, 3258 0, 3259 nullptr}; 3260 3261 VkBuffer imported_buffer = VK_NULL_HANDLE; 3262 res = vkCreateBuffer(m_device, &buffer_info, nullptr, &imported_buffer); 3263 if (res != VK_SUCCESS) 3264 { 3265 LOG_VULKAN_ERROR(res, "vkCreateBuffer() failed: "); 3266 if (imported_memory != VK_NULL_HANDLE) 3267 vkFreeMemory(m_device, imported_memory, nullptr); 3268 3269 return false; 3270 } 3271 3272 vkBindBufferMemory(m_device, imported_buffer, imported_memory, 0); 3273 3274 *out_memory = imported_memory; 3275 *out_buffer = imported_buffer; 3276 *out_offset = data_offset; 3277 DEV_LOG("Imported {} byte buffer covering {} bytes at {}", data_size, data_size_aligned, data); 3278 return true; 3279 } 3280 3281 void VulkanDevice::SetRenderTargets(GPUTexture* const* rts, u32 num_rts, GPUTexture* ds, 3282 GPUPipeline::RenderPassFlag flags) 3283 { 3284 const bool changed_layout = 3285 (m_current_render_pass_flags & (GPUPipeline::ColorFeedbackLoop | GPUPipeline::BindRenderTargetsAsImages)) != 3286 (flags & (GPUPipeline::ColorFeedbackLoop | GPUPipeline::BindRenderTargetsAsImages)); 3287 bool changed = 3288 (m_num_current_render_targets != num_rts || m_current_depth_target != ds || m_current_render_pass_flags != flags); 3289 bool needs_ds_clear = (ds && ds->IsClearedOrInvalidated()); 3290 bool needs_rt_clear = false; 3291 3292 m_current_depth_target = static_cast<VulkanTexture*>(ds); 3293 for (u32 i = 0; i < num_rts; i++) 3294 { 3295 VulkanTexture* const RT = static_cast<VulkanTexture*>(rts[i]); 3296 changed |= m_current_render_targets[i] != RT; 3297 m_current_render_targets[i] = RT; 3298 needs_rt_clear |= RT->IsClearedOrInvalidated(); 3299 } 3300 for (u32 i = num_rts; i < m_num_current_render_targets; i++) 3301 m_current_render_targets[i] = nullptr; 3302 m_num_current_render_targets = Truncate8(num_rts); 3303 m_current_render_pass_flags = flags; 3304 3305 if (changed) 3306 { 3307 if (InRenderPass()) 3308 EndRenderPass(); 3309 3310 if (m_num_current_render_targets == 0 && !m_current_depth_target) 3311 { 3312 m_current_framebuffer = VK_NULL_HANDLE; 3313 return; 3314 } 3315 3316 if (!m_optional_extensions.vk_khr_dynamic_rendering || 3317 ((flags & GPUPipeline::ColorFeedbackLoop) && !m_optional_extensions.vk_khr_dynamic_rendering_local_read)) 3318 { 3319 m_current_framebuffer = m_framebuffer_manager.Lookup( 3320 (m_num_current_render_targets > 0) ? reinterpret_cast<GPUTexture**>(m_current_render_targets.data()) : nullptr, 3321 m_num_current_render_targets, m_current_depth_target, flags); 3322 if (m_current_framebuffer == VK_NULL_HANDLE) 3323 { 3324 ERROR_LOG("Failed to create framebuffer"); 3325 return; 3326 } 3327 } 3328 3329 m_dirty_flags = (m_dirty_flags & ~DIRTY_FLAG_INPUT_ATTACHMENT) | (changed_layout ? DIRTY_FLAG_PIPELINE_LAYOUT : 0) | 3330 ((flags & (GPUPipeline::ColorFeedbackLoop | GPUPipeline::BindRenderTargetsAsImages)) ? 3331 DIRTY_FLAG_INPUT_ATTACHMENT : 3332 0); 3333 } 3334 else if (needs_rt_clear || needs_ds_clear) 3335 { 3336 // TODO: This could use vkCmdClearAttachments() instead. 3337 if (InRenderPass()) 3338 EndRenderPass(); 3339 } 3340 } 3341 3342 void VulkanDevice::BeginRenderPass() 3343 { 3344 DebugAssert(!InRenderPass()); 3345 3346 // All textures should be in shader read only optimal already, but just in case.. 3347 const u32 num_textures = GetActiveTexturesForLayout(m_current_pipeline_layout); 3348 for (u32 i = 0; i < num_textures; i++) 3349 { 3350 if (m_current_textures[i]) 3351 m_current_textures[i]->TransitionToLayout(VulkanTexture::Layout::ShaderReadOnly); 3352 } 3353 3354 // NVIDIA drivers appear to return random garbage when sampling the RT via a feedback loop, if the load op for 3355 // the render pass is CLEAR. Using vkCmdClearAttachments() doesn't work, so we have to clear the image instead. 3356 if (m_current_render_pass_flags & GPUPipeline::ColorFeedbackLoop && IsDeviceNVIDIA()) 3357 { 3358 for (u32 i = 0; i < m_num_current_render_targets; i++) 3359 { 3360 if (m_current_render_targets[i]->GetState() == GPUTexture::State::Cleared) 3361 m_current_render_targets[i]->CommitClear(m_current_command_buffer); 3362 } 3363 } 3364 3365 if (m_optional_extensions.vk_khr_dynamic_rendering && 3366 (m_optional_extensions.vk_khr_dynamic_rendering_local_read || 3367 !(m_current_render_pass_flags & GPUPipeline::ColorFeedbackLoop))) 3368 { 3369 VkRenderingInfoKHR ri = { 3370 VK_STRUCTURE_TYPE_RENDERING_INFO_KHR, nullptr, 0u, {}, 1u, 0u, 0u, nullptr, nullptr, nullptr}; 3371 3372 std::array<VkRenderingAttachmentInfoKHR, MAX_RENDER_TARGETS> attachments; 3373 VkRenderingAttachmentInfoKHR depth_attachment; 3374 3375 if (m_num_current_render_targets > 0 || m_current_depth_target) 3376 { 3377 if (!(m_current_render_pass_flags & GPUPipeline::BindRenderTargetsAsImages)) 3378 { 3379 ri.colorAttachmentCount = m_num_current_render_targets; 3380 ri.pColorAttachments = (m_num_current_render_targets > 0) ? attachments.data() : nullptr; 3381 3382 // set up clear values and transition targets 3383 for (u32 i = 0; i < m_num_current_render_targets; i++) 3384 { 3385 VulkanTexture* const rt = static_cast<VulkanTexture*>(m_current_render_targets[i]); 3386 rt->TransitionToLayout((m_current_render_pass_flags & GPUPipeline::ColorFeedbackLoop) ? 3387 VulkanTexture::Layout::FeedbackLoop : 3388 VulkanTexture::Layout::ColorAttachment); 3389 rt->SetUseFenceCounter(GetCurrentFenceCounter()); 3390 3391 VkRenderingAttachmentInfo& ai = attachments[i]; 3392 ai.sType = VK_STRUCTURE_TYPE_RENDERING_ATTACHMENT_INFO_KHR; 3393 ai.pNext = nullptr; 3394 ai.imageView = rt->GetView(); 3395 ai.imageLayout = rt->GetVkLayout(); 3396 ai.resolveMode = VK_RESOLVE_MODE_NONE_KHR; 3397 ai.resolveImageView = VK_NULL_HANDLE; 3398 ai.resolveImageLayout = VK_IMAGE_LAYOUT_UNDEFINED; 3399 ai.loadOp = GetLoadOpForTexture(rt); 3400 ai.storeOp = VK_ATTACHMENT_STORE_OP_STORE; 3401 3402 if (rt->GetState() == GPUTexture::State::Cleared) 3403 { 3404 std::memcpy(ai.clearValue.color.float32, rt->GetUNormClearColor().data(), 3405 sizeof(ai.clearValue.color.float32)); 3406 } 3407 rt->SetState(GPUTexture::State::Dirty); 3408 } 3409 } 3410 else 3411 { 3412 // Binding as image, but we still need to clear it. 3413 for (u32 i = 0; i < m_num_current_render_targets; i++) 3414 { 3415 VulkanTexture* rt = m_current_render_targets[i]; 3416 if (rt->GetState() == GPUTexture::State::Cleared) 3417 rt->CommitClear(m_current_command_buffer); 3418 rt->SetState(GPUTexture::State::Dirty); 3419 rt->TransitionToLayout(VulkanTexture::Layout::ReadWriteImage); 3420 rt->SetUseFenceCounter(GetCurrentFenceCounter()); 3421 } 3422 } 3423 3424 if (VulkanTexture* const ds = m_current_depth_target) 3425 { 3426 ds->TransitionToLayout(VulkanTexture::Layout::DepthStencilAttachment); 3427 ds->SetUseFenceCounter(GetCurrentFenceCounter()); 3428 3429 depth_attachment.sType = VK_STRUCTURE_TYPE_RENDERING_ATTACHMENT_INFO_KHR; 3430 depth_attachment.pNext = nullptr; 3431 depth_attachment.imageView = ds->GetView(); 3432 depth_attachment.imageLayout = ds->GetVkLayout(); 3433 depth_attachment.resolveMode = VK_RESOLVE_MODE_NONE_KHR; 3434 depth_attachment.resolveImageView = VK_NULL_HANDLE; 3435 depth_attachment.resolveImageLayout = VK_IMAGE_LAYOUT_UNDEFINED; 3436 depth_attachment.loadOp = GetLoadOpForTexture(ds); 3437 depth_attachment.storeOp = VK_ATTACHMENT_STORE_OP_STORE; 3438 ri.pDepthAttachment = &depth_attachment; 3439 3440 if (ds->GetState() == GPUTexture::State::Cleared) 3441 depth_attachment.clearValue.depthStencil = {ds->GetClearDepth(), 0u}; 3442 3443 ds->SetState(GPUTexture::State::Dirty); 3444 } 3445 3446 const VulkanTexture* const rt_or_ds = 3447 (m_num_current_render_targets > 0) ? m_current_render_targets[0] : m_current_depth_target; 3448 ri.renderArea = {{}, {rt_or_ds->GetWidth(), rt_or_ds->GetHeight()}}; 3449 } 3450 else 3451 { 3452 VkRenderingAttachmentInfo& ai = attachments[0]; 3453 ai.sType = VK_STRUCTURE_TYPE_RENDERING_ATTACHMENT_INFO_KHR; 3454 ai.pNext = nullptr; 3455 ai.imageView = m_swap_chain->GetCurrentImageView(); 3456 ai.imageLayout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL; 3457 ai.resolveMode = VK_RESOLVE_MODE_NONE_KHR; 3458 ai.resolveImageView = VK_NULL_HANDLE; 3459 ai.resolveImageLayout = VK_IMAGE_LAYOUT_UNDEFINED; 3460 ai.loadOp = VK_ATTACHMENT_LOAD_OP_LOAD; 3461 ai.storeOp = VK_ATTACHMENT_STORE_OP_STORE; 3462 3463 ri.colorAttachmentCount = 1; 3464 ri.pColorAttachments = attachments.data(); 3465 ri.renderArea = {{}, {m_swap_chain->GetWidth(), m_swap_chain->GetHeight()}}; 3466 } 3467 3468 m_current_render_pass = DYNAMIC_RENDERING_RENDER_PASS; 3469 vkCmdBeginRenderingKHR(GetCurrentCommandBuffer(), &ri); 3470 } 3471 else 3472 { 3473 VkRenderPassBeginInfo bi = { 3474 VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO, nullptr, VK_NULL_HANDLE, VK_NULL_HANDLE, {}, 0u, nullptr}; 3475 std::array<VkClearValue, MAX_RENDER_TARGETS + 1> clear_values; 3476 3477 if (m_current_framebuffer != VK_NULL_HANDLE) 3478 { 3479 bi.framebuffer = m_current_framebuffer; 3480 bi.renderPass = m_current_render_pass = 3481 GetRenderPass(m_current_render_targets.data(), m_num_current_render_targets, m_current_depth_target, 3482 m_current_render_pass_flags); 3483 if (bi.renderPass == VK_NULL_HANDLE) 3484 { 3485 ERROR_LOG("Failed to create render pass"); 3486 return; 3487 } 3488 3489 // set up clear values and transition targets 3490 for (u32 i = 0; i < m_num_current_render_targets; i++) 3491 { 3492 VulkanTexture* const rt = static_cast<VulkanTexture*>(m_current_render_targets[i]); 3493 if (rt->GetState() == GPUTexture::State::Cleared) 3494 { 3495 std::memcpy(clear_values[i].color.float32, rt->GetUNormClearColor().data(), 3496 sizeof(clear_values[i].color.float32)); 3497 bi.pClearValues = clear_values.data(); 3498 bi.clearValueCount = i + 1; 3499 } 3500 rt->SetState(GPUTexture::State::Dirty); 3501 rt->TransitionToLayout((m_current_render_pass_flags & GPUPipeline::ColorFeedbackLoop) ? 3502 VulkanTexture::Layout::FeedbackLoop : 3503 VulkanTexture::Layout::ColorAttachment); 3504 rt->SetUseFenceCounter(GetCurrentFenceCounter()); 3505 } 3506 if (VulkanTexture* const ds = static_cast<VulkanTexture*>(m_current_depth_target)) 3507 { 3508 if (ds->GetState() == GPUTexture::State::Cleared) 3509 { 3510 clear_values[m_num_current_render_targets].depthStencil = {ds->GetClearDepth(), 0u}; 3511 bi.pClearValues = clear_values.data(); 3512 bi.clearValueCount = m_num_current_render_targets + 1; 3513 } 3514 ds->SetState(GPUTexture::State::Dirty); 3515 ds->TransitionToLayout(VulkanTexture::Layout::DepthStencilAttachment); 3516 ds->SetUseFenceCounter(GetCurrentFenceCounter()); 3517 } 3518 3519 const VulkanTexture* const rt_or_ds = static_cast<const VulkanTexture*>( 3520 (m_num_current_render_targets > 0) ? m_current_render_targets[0] : m_current_depth_target); 3521 bi.renderArea.extent = {rt_or_ds->GetWidth(), rt_or_ds->GetHeight()}; 3522 } 3523 else 3524 { 3525 // Re-rendering to swap chain. 3526 bi.framebuffer = m_swap_chain->GetCurrentFramebuffer(); 3527 bi.renderPass = m_current_render_pass = 3528 GetSwapChainRenderPass(m_swap_chain->GetWindowInfo().surface_format, VK_ATTACHMENT_LOAD_OP_LOAD); 3529 bi.renderArea.extent = {m_swap_chain->GetWidth(), m_swap_chain->GetHeight()}; 3530 } 3531 3532 DebugAssert(m_current_render_pass); 3533 vkCmdBeginRenderPass(GetCurrentCommandBuffer(), &bi, VK_SUBPASS_CONTENTS_INLINE); 3534 } 3535 3536 s_stats.num_render_passes++; 3537 3538 // If this is a new command buffer, bind the pipeline and such. 3539 if (m_dirty_flags & DIRTY_FLAG_INITIAL) 3540 SetInitialPipelineState(); 3541 } 3542 3543 void VulkanDevice::BeginSwapChainRenderPass(u32 clear_color) 3544 { 3545 DebugAssert(!InRenderPass()); 3546 3547 const VkCommandBuffer cmdbuf = GetCurrentCommandBuffer(); 3548 const VkImage swap_chain_image = m_swap_chain->GetCurrentImage(); 3549 3550 // Swap chain images start in undefined 3551 VulkanTexture::TransitionSubresourcesToLayout(cmdbuf, swap_chain_image, GPUTexture::Type::RenderTarget, 0, 1, 0, 1, 3552 VulkanTexture::Layout::Undefined, 3553 VulkanTexture::Layout::ColorAttachment); 3554 3555 // All textures should be in shader read only optimal already, but just in case.. 3556 const u32 num_textures = GetActiveTexturesForLayout(m_current_pipeline_layout); 3557 for (u32 i = 0; i < num_textures; i++) 3558 { 3559 if (m_current_textures[i]) 3560 m_current_textures[i]->TransitionToLayout(VulkanTexture::Layout::ShaderReadOnly); 3561 } 3562 3563 VkClearValue clear_value; 3564 GSVector4::store<false>(&clear_value.color.float32, GSVector4::rgba32(clear_color)); 3565 if (m_optional_extensions.vk_khr_dynamic_rendering) 3566 { 3567 VkRenderingAttachmentInfo ai = {VK_STRUCTURE_TYPE_RENDERING_ATTACHMENT_INFO_KHR, 3568 nullptr, 3569 m_swap_chain->GetCurrentImageView(), 3570 VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL, 3571 VK_RESOLVE_MODE_NONE_KHR, 3572 VK_NULL_HANDLE, 3573 VK_IMAGE_LAYOUT_UNDEFINED, 3574 VK_ATTACHMENT_LOAD_OP_CLEAR, 3575 VK_ATTACHMENT_STORE_OP_STORE, 3576 clear_value}; 3577 3578 const VkRenderingInfoKHR ri = {VK_STRUCTURE_TYPE_RENDERING_INFO_KHR, 3579 nullptr, 3580 0u, 3581 {{}, {m_swap_chain->GetWidth(), m_swap_chain->GetHeight()}}, 3582 1u, 3583 0u, 3584 1u, 3585 &ai, 3586 nullptr, 3587 nullptr}; 3588 3589 m_current_render_pass = DYNAMIC_RENDERING_RENDER_PASS; 3590 vkCmdBeginRenderingKHR(GetCurrentCommandBuffer(), &ri); 3591 } 3592 else 3593 { 3594 m_current_render_pass = 3595 GetSwapChainRenderPass(m_swap_chain->GetWindowInfo().surface_format, VK_ATTACHMENT_LOAD_OP_CLEAR); 3596 DebugAssert(m_current_render_pass); 3597 3598 const VkRenderPassBeginInfo rp = {VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO, 3599 nullptr, 3600 m_current_render_pass, 3601 m_swap_chain->GetCurrentFramebuffer(), 3602 {{0, 0}, {m_swap_chain->GetWidth(), m_swap_chain->GetHeight()}}, 3603 1u, 3604 &clear_value}; 3605 vkCmdBeginRenderPass(GetCurrentCommandBuffer(), &rp, VK_SUBPASS_CONTENTS_INLINE); 3606 } 3607 3608 m_dirty_flags |= 3609 (m_current_render_pass_flags & (GPUPipeline::ColorFeedbackLoop | GPUPipeline::BindRenderTargetsAsImages)) ? 3610 DIRTY_FLAG_PIPELINE_LAYOUT : 3611 0; 3612 s_stats.num_render_passes++; 3613 m_num_current_render_targets = 0; 3614 m_current_render_pass_flags = GPUPipeline::NoRenderPassFlags; 3615 std::memset(m_current_render_targets.data(), 0, sizeof(m_current_render_targets)); 3616 m_current_depth_target = nullptr; 3617 m_current_framebuffer = VK_NULL_HANDLE; 3618 } 3619 3620 bool VulkanDevice::InRenderPass() 3621 { 3622 return m_current_render_pass != VK_NULL_HANDLE; 3623 } 3624 3625 void VulkanDevice::EndRenderPass() 3626 { 3627 DebugAssert(m_current_render_pass != VK_NULL_HANDLE); 3628 3629 // TODO: stats 3630 VkCommandBuffer cmdbuf = GetCurrentCommandBuffer(); 3631 if (std::exchange(m_current_render_pass, VK_NULL_HANDLE) == DYNAMIC_RENDERING_RENDER_PASS) 3632 vkCmdEndRenderingKHR(cmdbuf); 3633 else 3634 vkCmdEndRenderPass(GetCurrentCommandBuffer()); 3635 } 3636 3637 void VulkanDevice::SetPipeline(GPUPipeline* pipeline) 3638 { 3639 // First draw? Bind everything. 3640 if (m_dirty_flags & DIRTY_FLAG_INITIAL) 3641 { 3642 m_current_pipeline = static_cast<VulkanPipeline*>(pipeline); 3643 if (!m_current_pipeline) 3644 return; 3645 3646 SetInitialPipelineState(); 3647 return; 3648 } 3649 else if (m_current_pipeline == pipeline) 3650 { 3651 return; 3652 } 3653 3654 m_current_pipeline = static_cast<VulkanPipeline*>(pipeline); 3655 3656 vkCmdBindPipeline(m_current_command_buffer, VK_PIPELINE_BIND_POINT_GRAPHICS, m_current_pipeline->GetPipeline()); 3657 3658 if (m_current_pipeline_layout != m_current_pipeline->GetLayout()) 3659 { 3660 m_current_pipeline_layout = m_current_pipeline->GetLayout(); 3661 m_dirty_flags |= DIRTY_FLAG_PIPELINE_LAYOUT; 3662 } 3663 } 3664 3665 void VulkanDevice::UnbindPipeline(VulkanPipeline* pl) 3666 { 3667 if (m_current_pipeline != pl) 3668 return; 3669 3670 m_current_pipeline = nullptr; 3671 } 3672 3673 void VulkanDevice::InvalidateCachedState() 3674 { 3675 m_dirty_flags = ALL_DIRTY_STATE | 3676 ((m_current_render_pass_flags & GPUPipeline::ColorFeedbackLoop) ? DIRTY_FLAG_INPUT_ATTACHMENT : 0); 3677 m_current_render_pass = VK_NULL_HANDLE; 3678 m_current_pipeline = nullptr; 3679 } 3680 3681 s32 VulkanDevice::IsRenderTargetBoundIndex(const GPUTexture* tex) const 3682 { 3683 for (u32 i = 0; i < m_num_current_render_targets; i++) 3684 { 3685 if (m_current_render_targets[i] == tex) 3686 return static_cast<s32>(i); 3687 } 3688 3689 return -1; 3690 } 3691 3692 VulkanDevice::PipelineLayoutType VulkanDevice::GetPipelineLayoutType(GPUPipeline::RenderPassFlag flags) 3693 { 3694 return (flags & GPUPipeline::BindRenderTargetsAsImages) ? 3695 PipelineLayoutType::BindRenderTargetsAsImages : 3696 ((flags & GPUPipeline::ColorFeedbackLoop) ? PipelineLayoutType::ColorFeedbackLoop : 3697 PipelineLayoutType::Normal); 3698 } 3699 3700 VkPipelineLayout VulkanDevice::GetCurrentVkPipelineLayout() const 3701 { 3702 return m_pipeline_layouts[static_cast<size_t>(GetPipelineLayoutType(m_current_render_pass_flags))] 3703 [static_cast<size_t>(m_current_pipeline_layout)]; 3704 } 3705 3706 void VulkanDevice::SetInitialPipelineState() 3707 { 3708 DebugAssert(m_current_pipeline); 3709 m_dirty_flags &= ~DIRTY_FLAG_INITIAL; 3710 3711 const VkDeviceSize offset = 0; 3712 const VkCommandBuffer cmdbuf = GetCurrentCommandBuffer(); 3713 vkCmdBindVertexBuffers(cmdbuf, 0, 1, m_vertex_buffer.GetBufferPtr(), &offset); 3714 vkCmdBindIndexBuffer(cmdbuf, m_index_buffer.GetBuffer(), 0, VK_INDEX_TYPE_UINT16); 3715 3716 m_current_pipeline_layout = m_current_pipeline->GetLayout(); 3717 vkCmdBindPipeline(cmdbuf, VK_PIPELINE_BIND_POINT_GRAPHICS, m_current_pipeline->GetPipeline()); 3718 3719 const VkViewport vp = {static_cast<float>(m_current_viewport.left), 3720 static_cast<float>(m_current_viewport.top), 3721 static_cast<float>(m_current_viewport.width()), 3722 static_cast<float>(m_current_viewport.height()), 3723 0.0f, 3724 1.0f}; 3725 vkCmdSetViewport(GetCurrentCommandBuffer(), 0, 1, &vp); 3726 3727 const VkRect2D vrc = {{m_current_scissor.left, m_current_scissor.top}, 3728 {static_cast<u32>(m_current_scissor.width()), static_cast<u32>(m_current_scissor.height())}}; 3729 vkCmdSetScissor(GetCurrentCommandBuffer(), 0, 1, &vrc); 3730 } 3731 3732 void VulkanDevice::SetTextureSampler(u32 slot, GPUTexture* texture, GPUSampler* sampler) 3733 { 3734 VulkanTexture* T = static_cast<VulkanTexture*>(texture); 3735 const VkSampler vsampler = static_cast<VulkanSampler*>(sampler ? sampler : m_nearest_sampler.get())->GetSampler(); 3736 if (m_current_textures[slot] != T || m_current_samplers[slot] != vsampler) 3737 { 3738 m_current_textures[slot] = T; 3739 m_current_samplers[slot] = vsampler; 3740 m_dirty_flags |= DIRTY_FLAG_TEXTURES_OR_SAMPLERS; 3741 } 3742 3743 if (T) 3744 { 3745 T->CommitClear(); 3746 T->SetUseFenceCounter(GetCurrentFenceCounter()); 3747 if (T->GetLayout() != VulkanTexture::Layout::ShaderReadOnly) 3748 { 3749 if (InRenderPass()) 3750 EndRenderPass(); 3751 T->TransitionToLayout(VulkanTexture::Layout::ShaderReadOnly); 3752 } 3753 } 3754 } 3755 3756 void VulkanDevice::SetTextureBuffer(u32 slot, GPUTextureBuffer* buffer) 3757 { 3758 DebugAssert(slot == 0); 3759 if (m_current_texture_buffer == buffer) 3760 return; 3761 3762 m_current_texture_buffer = static_cast<VulkanTextureBuffer*>(buffer); 3763 if (m_current_pipeline_layout == GPUPipeline::Layout::SingleTextureBufferAndPushConstants) 3764 m_dirty_flags |= DIRTY_FLAG_TEXTURES_OR_SAMPLERS; 3765 } 3766 3767 void VulkanDevice::UnbindTexture(VulkanTexture* tex) 3768 { 3769 for (u32 i = 0; i < MAX_TEXTURE_SAMPLERS; i++) 3770 { 3771 if (m_current_textures[i] == tex) 3772 { 3773 m_current_textures[i] = nullptr; 3774 m_dirty_flags |= DIRTY_FLAG_TEXTURES_OR_SAMPLERS; 3775 } 3776 } 3777 3778 if (tex->IsRenderTarget() || tex->IsRWTexture()) 3779 { 3780 for (u32 i = 0; i < m_num_current_render_targets; i++) 3781 { 3782 if (m_current_render_targets[i] == tex) 3783 { 3784 WARNING_LOG("Unbinding current RT"); 3785 SetRenderTargets(nullptr, 0, m_current_depth_target); 3786 break; 3787 } 3788 } 3789 3790 m_framebuffer_manager.RemoveRTReferences(tex); 3791 } 3792 else if (tex->IsDepthStencil()) 3793 { 3794 if (m_current_depth_target == tex) 3795 { 3796 WARNING_LOG("Unbinding current DS"); 3797 SetRenderTargets(nullptr, 0, nullptr); 3798 } 3799 3800 m_framebuffer_manager.RemoveDSReferences(tex); 3801 } 3802 } 3803 3804 void VulkanDevice::UnbindTextureBuffer(VulkanTextureBuffer* buf) 3805 { 3806 if (m_current_texture_buffer != buf) 3807 return; 3808 3809 m_current_texture_buffer = nullptr; 3810 3811 if (m_current_pipeline_layout == GPUPipeline::Layout::SingleTextureBufferAndPushConstants) 3812 m_dirty_flags |= DIRTY_FLAG_TEXTURES_OR_SAMPLERS; 3813 } 3814 3815 void VulkanDevice::SetViewport(const GSVector4i rc) 3816 { 3817 if (m_current_viewport.eq(rc)) 3818 return; 3819 3820 m_current_viewport = rc; 3821 3822 if (m_dirty_flags & DIRTY_FLAG_INITIAL) 3823 return; 3824 3825 const VkViewport vp = {static_cast<float>(rc.x), 3826 static_cast<float>(rc.y), 3827 static_cast<float>(rc.width()), 3828 static_cast<float>(rc.height()), 3829 0.0f, 3830 1.0f}; 3831 vkCmdSetViewport(GetCurrentCommandBuffer(), 0, 1, &vp); 3832 } 3833 3834 void VulkanDevice::SetScissor(const GSVector4i rc) 3835 { 3836 if (m_current_scissor.eq(rc)) 3837 return; 3838 3839 m_current_scissor = rc; 3840 3841 if (m_dirty_flags & DIRTY_FLAG_INITIAL) 3842 return; 3843 3844 const VkRect2D vrc = {{rc.x, rc.y}, {static_cast<u32>(rc.width()), static_cast<u32>(rc.height())}}; 3845 vkCmdSetScissor(GetCurrentCommandBuffer(), 0, 1, &vrc); 3846 } 3847 3848 void VulkanDevice::PreDrawCheck() 3849 { 3850 if (!InRenderPass()) 3851 BeginRenderPass(); 3852 3853 DebugAssert(!(m_dirty_flags & DIRTY_FLAG_INITIAL)); 3854 const u32 update_mask = (m_current_render_pass_flags ? ~0u : ~DIRTY_FLAG_INPUT_ATTACHMENT); 3855 const u32 dirty = m_dirty_flags & update_mask; 3856 m_dirty_flags = m_dirty_flags & ~update_mask; 3857 3858 if (dirty != 0) 3859 { 3860 if (!UpdateDescriptorSets(dirty)) 3861 { 3862 SubmitCommandBufferAndRestartRenderPass("out of descriptor sets"); 3863 PreDrawCheck(); 3864 return; 3865 } 3866 } 3867 } 3868 3869 template<GPUPipeline::Layout layout> 3870 bool VulkanDevice::UpdateDescriptorSetsForLayout(u32 dirty) 3871 { 3872 [[maybe_unused]] bool new_dynamic_offsets = false; 3873 3874 VkPipelineLayout const vk_pipeline_layout = GetCurrentVkPipelineLayout(); 3875 std::array<VkDescriptorSet, 3> ds; 3876 u32 first_ds = 0; 3877 u32 num_ds = 0; 3878 3879 if constexpr (layout == GPUPipeline::Layout::SingleTextureAndUBO || layout == GPUPipeline::Layout::MultiTextureAndUBO) 3880 { 3881 new_dynamic_offsets = ((dirty & DIRTY_FLAG_DYNAMIC_OFFSETS) != 0); 3882 3883 if (dirty & (DIRTY_FLAG_PIPELINE_LAYOUT | DIRTY_FLAG_DYNAMIC_OFFSETS)) 3884 { 3885 ds[num_ds++] = m_ubo_descriptor_set; 3886 new_dynamic_offsets = true; 3887 } 3888 else 3889 { 3890 first_ds++; 3891 } 3892 } 3893 3894 if constexpr (layout == GPUPipeline::Layout::SingleTextureAndUBO || 3895 layout == GPUPipeline::Layout::SingleTextureAndPushConstants) 3896 { 3897 VulkanTexture* const tex = m_current_textures[0] ? m_current_textures[0] : m_null_texture.get(); 3898 DebugAssert(tex && m_current_samplers[0] != VK_NULL_HANDLE); 3899 ds[num_ds++] = tex->GetDescriptorSetWithSampler(m_current_samplers[0]); 3900 } 3901 else if constexpr (layout == GPUPipeline::Layout::SingleTextureBufferAndPushConstants) 3902 { 3903 DebugAssert(m_current_texture_buffer); 3904 ds[num_ds++] = m_current_texture_buffer->GetDescriptorSet(); 3905 } 3906 else if constexpr (layout == GPUPipeline::Layout::MultiTextureAndUBO || 3907 layout == GPUPipeline::Layout::MultiTextureAndPushConstants) 3908 { 3909 Vulkan::DescriptorSetUpdateBuilder dsub; 3910 3911 if (m_optional_extensions.vk_khr_push_descriptor) 3912 { 3913 for (u32 i = 0; i < MAX_TEXTURE_SAMPLERS; i++) 3914 { 3915 VulkanTexture* const tex = m_current_textures[i] ? m_current_textures[i] : m_null_texture.get(); 3916 DebugAssert(tex && m_current_samplers[i] != VK_NULL_HANDLE); 3917 dsub.AddCombinedImageSamplerDescriptorWrite(VK_NULL_HANDLE, i, tex->GetView(), m_current_samplers[i], 3918 tex->GetVkLayout()); 3919 } 3920 3921 const u32 set = (layout == GPUPipeline::Layout::MultiTextureAndUBO) ? 1 : 0; 3922 dsub.PushUpdate(GetCurrentCommandBuffer(), VK_PIPELINE_BIND_POINT_GRAPHICS, vk_pipeline_layout, set); 3923 if (num_ds == 0) 3924 return true; 3925 } 3926 else 3927 { 3928 VkDescriptorSet tds = AllocateDescriptorSet(m_multi_texture_ds_layout); 3929 if (tds == VK_NULL_HANDLE) 3930 return false; 3931 3932 ds[num_ds++] = tds; 3933 3934 for (u32 i = 0; i < MAX_TEXTURE_SAMPLERS; i++) 3935 { 3936 VulkanTexture* const tex = m_current_textures[i] ? m_current_textures[i] : m_null_texture.get(); 3937 DebugAssert(tex && m_current_samplers[i] != VK_NULL_HANDLE); 3938 dsub.AddCombinedImageSamplerDescriptorWrite(tds, i, tex->GetView(), m_current_samplers[i], tex->GetVkLayout()); 3939 } 3940 3941 dsub.Update(m_device, false); 3942 } 3943 } 3944 3945 if (m_num_current_render_targets > 0 && 3946 ((dirty & DIRTY_FLAG_INPUT_ATTACHMENT) || 3947 (dirty & DIRTY_FLAG_PIPELINE_LAYOUT && 3948 (m_current_render_pass_flags & (GPUPipeline::ColorFeedbackLoop | GPUPipeline::BindRenderTargetsAsImages))))) 3949 { 3950 if (m_current_render_pass_flags & GPUPipeline::BindRenderTargetsAsImages) 3951 { 3952 VkDescriptorSet ids = AllocateDescriptorSet(m_rov_ds_layout); 3953 if (ids == VK_NULL_HANDLE) 3954 return false; 3955 3956 ds[num_ds++] = ids; 3957 3958 Vulkan::DescriptorSetUpdateBuilder dsub; 3959 for (u32 i = 0; i < m_num_current_render_targets; i++) 3960 { 3961 dsub.AddStorageImageDescriptorWrite(ids, i, m_current_render_targets[i]->GetView(), 3962 m_current_render_targets[i]->GetVkLayout()); 3963 } 3964 3965 // Annoyingly, have to update all slots... 3966 for (u32 i = m_num_current_render_targets; i < MAX_IMAGE_RENDER_TARGETS; i++) 3967 dsub.AddStorageImageDescriptorWrite(ids, i, m_null_texture->GetView(), m_null_texture->GetVkLayout()); 3968 3969 dsub.Update(m_device, false); 3970 } 3971 else 3972 { 3973 VkDescriptorSet ids = AllocateDescriptorSet(m_feedback_loop_ds_layout); 3974 if (ids == VK_NULL_HANDLE) 3975 return false; 3976 3977 ds[num_ds++] = ids; 3978 3979 Vulkan::DescriptorSetUpdateBuilder dsub; 3980 dsub.AddInputAttachmentDescriptorWrite(ids, 0, m_current_render_targets[0]->GetView(), 3981 m_current_render_targets[0]->GetVkLayout()); 3982 dsub.Update(m_device, false); 3983 } 3984 } 3985 3986 DebugAssert(num_ds > 0); 3987 vkCmdBindDescriptorSets(GetCurrentCommandBuffer(), VK_PIPELINE_BIND_POINT_GRAPHICS, vk_pipeline_layout, first_ds, 3988 num_ds, ds.data(), static_cast<u32>(new_dynamic_offsets), 3989 new_dynamic_offsets ? &m_uniform_buffer_position : nullptr); 3990 3991 return true; 3992 } 3993 3994 bool VulkanDevice::UpdateDescriptorSets(u32 dirty) 3995 { 3996 switch (m_current_pipeline_layout) 3997 { 3998 case GPUPipeline::Layout::SingleTextureAndUBO: 3999 return UpdateDescriptorSetsForLayout<GPUPipeline::Layout::SingleTextureAndUBO>(dirty); 4000 4001 case GPUPipeline::Layout::SingleTextureAndPushConstants: 4002 return UpdateDescriptorSetsForLayout<GPUPipeline::Layout::SingleTextureAndPushConstants>(dirty); 4003 4004 case GPUPipeline::Layout::SingleTextureBufferAndPushConstants: 4005 return UpdateDescriptorSetsForLayout<GPUPipeline::Layout::SingleTextureBufferAndPushConstants>(dirty); 4006 4007 case GPUPipeline::Layout::MultiTextureAndUBO: 4008 return UpdateDescriptorSetsForLayout<GPUPipeline::Layout::MultiTextureAndUBO>(dirty); 4009 4010 case GPUPipeline::Layout::MultiTextureAndPushConstants: 4011 return UpdateDescriptorSetsForLayout<GPUPipeline::Layout::MultiTextureAndPushConstants>(dirty); 4012 4013 default: 4014 UnreachableCode(); 4015 } 4016 } 4017 4018 void VulkanDevice::Draw(u32 vertex_count, u32 base_vertex) 4019 { 4020 PreDrawCheck(); 4021 s_stats.num_draws++; 4022 vkCmdDraw(GetCurrentCommandBuffer(), vertex_count, 1, base_vertex, 0); 4023 } 4024 4025 void VulkanDevice::DrawIndexed(u32 index_count, u32 base_index, u32 base_vertex) 4026 { 4027 PreDrawCheck(); 4028 s_stats.num_draws++; 4029 vkCmdDrawIndexed(GetCurrentCommandBuffer(), index_count, 1, base_index, base_vertex, 0); 4030 } 4031 4032 VkImageMemoryBarrier VulkanDevice::GetColorBufferBarrier(const VulkanTexture* rt) const 4033 { 4034 const VkImageLayout vk_layout = m_optional_extensions.vk_khr_dynamic_rendering_local_read ? 4035 VK_IMAGE_LAYOUT_RENDERING_LOCAL_READ_KHR : 4036 VK_IMAGE_LAYOUT_GENERAL; 4037 DebugAssert(rt->GetLayout() == VulkanTexture::Layout::FeedbackLoop); 4038 4039 return {VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, 4040 nullptr, 4041 VK_ACCESS_COLOR_ATTACHMENT_READ_BIT | VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT, 4042 VK_ACCESS_INPUT_ATTACHMENT_READ_BIT, 4043 vk_layout, 4044 vk_layout, 4045 VK_QUEUE_FAMILY_IGNORED, 4046 VK_QUEUE_FAMILY_IGNORED, 4047 rt->GetImage(), 4048 {VK_IMAGE_ASPECT_COLOR_BIT, 0u, 1u, 0u, 1u}}; 4049 } 4050 4051 void VulkanDevice::DrawIndexedWithBarrier(u32 index_count, u32 base_index, u32 base_vertex, DrawBarrier type) 4052 { 4053 PreDrawCheck(); 4054 4055 // TODO: The first barrier is unnecessary if we're starting the render pass. 4056 4057 switch (type) 4058 { 4059 case GPUDevice::DrawBarrier::None: 4060 { 4061 s_stats.num_draws++; 4062 vkCmdDrawIndexed(GetCurrentCommandBuffer(), index_count, 1, base_index, base_vertex, 0); 4063 } 4064 break; 4065 4066 case GPUDevice::DrawBarrier::One: 4067 { 4068 DebugAssert(m_num_current_render_targets == 1); 4069 s_stats.num_barriers++; 4070 s_stats.num_draws++; 4071 4072 const VkImageMemoryBarrier barrier = 4073 GetColorBufferBarrier(static_cast<VulkanTexture*>(m_current_render_targets[0])); 4074 vkCmdPipelineBarrier(m_current_command_buffer, VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT, 4075 VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT, VK_DEPENDENCY_BY_REGION_BIT, 0, nullptr, 0, nullptr, 4076 1, &barrier); 4077 vkCmdDrawIndexed(GetCurrentCommandBuffer(), index_count, 1, base_index, base_vertex, 0); 4078 } 4079 break; 4080 4081 case GPUDevice::DrawBarrier::Full: 4082 { 4083 DebugAssert(m_num_current_render_targets == 1); 4084 4085 const VkImageMemoryBarrier barrier = 4086 GetColorBufferBarrier(static_cast<VulkanTexture*>(m_current_render_targets[0])); 4087 const u32 indices_per_primitive = m_current_pipeline->GetVerticesPerPrimitive(); 4088 const u32 end_batch = base_index + index_count; 4089 4090 for (; base_index < end_batch; base_index += indices_per_primitive) 4091 { 4092 s_stats.num_barriers++; 4093 s_stats.num_draws++; 4094 4095 vkCmdPipelineBarrier(m_current_command_buffer, VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT, 4096 VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT, VK_DEPENDENCY_BY_REGION_BIT, 0, nullptr, 0, nullptr, 4097 1, &barrier); 4098 vkCmdDrawIndexed(GetCurrentCommandBuffer(), indices_per_primitive, 1, base_index, base_vertex, 0); 4099 } 4100 } 4101 break; 4102 4103 DefaultCaseIsUnreachable(); 4104 } 4105 }