gpu_device.cpp (60950B)
1 // SPDX-FileCopyrightText: 2019-2024 Connor McLaughlin <stenzek@gmail.com> 2 // SPDX-License-Identifier: (GPL-3.0 OR CC-BY-NC-ND-4.0) 3 4 #include "gpu_device.h" 5 #include "compress_helpers.h" 6 #include "core/host.h" // TODO: Remove, needed for getting fullscreen mode. 7 #include "core/settings.h" // TODO: Remove, needed for dump directory. 8 #include "gpu_framebuffer_manager.h" 9 #include "shadergen.h" 10 11 #include "common/assert.h" 12 #include "common/dynamic_library.h" 13 #include "common/error.h" 14 #include "common/file_system.h" 15 #include "common/log.h" 16 #include "common/path.h" 17 #include "common/scoped_guard.h" 18 #include "common/sha1_digest.h" 19 #include "common/string_util.h" 20 #include "common/timer.h" 21 22 #include "fmt/format.h" 23 #include "imgui.h" 24 #include "shaderc/shaderc.h" 25 #include "spirv_cross/spirv_cross_c.h" 26 #include "xxhash.h" 27 28 Log_SetChannel(GPUDevice); 29 30 #ifdef _WIN32 31 #include "common/windows_headers.h" 32 #include "d3d11_device.h" 33 #include "d3d12_device.h" 34 #include "d3d_common.h" 35 #endif 36 37 #ifdef ENABLE_OPENGL 38 #include "opengl_device.h" 39 #endif 40 41 #ifdef ENABLE_VULKAN 42 #include "vulkan_device.h" 43 #endif 44 45 std::unique_ptr<GPUDevice> g_gpu_device; 46 47 static std::string s_pipeline_cache_path; 48 static size_t s_pipeline_cache_size; 49 static std::array<u8, SHA1Digest::DIGEST_SIZE> s_pipeline_cache_hash; 50 size_t GPUDevice::s_total_vram_usage = 0; 51 GPUDevice::Statistics GPUDevice::s_stats = {}; 52 53 GPUSampler::GPUSampler() = default; 54 55 GPUSampler::~GPUSampler() = default; 56 57 GPUSampler::Config GPUSampler::GetNearestConfig() 58 { 59 Config config = {}; 60 config.address_u = GPUSampler::AddressMode::ClampToEdge; 61 config.address_v = GPUSampler::AddressMode::ClampToEdge; 62 config.address_w = GPUSampler::AddressMode::ClampToEdge; 63 config.min_filter = GPUSampler::Filter::Nearest; 64 config.mag_filter = GPUSampler::Filter::Nearest; 65 return config; 66 } 67 68 GPUSampler::Config GPUSampler::GetLinearConfig() 69 { 70 Config config = {}; 71 config.address_u = GPUSampler::AddressMode::ClampToEdge; 72 config.address_v = GPUSampler::AddressMode::ClampToEdge; 73 config.address_w = GPUSampler::AddressMode::ClampToEdge; 74 config.min_filter = GPUSampler::Filter::Linear; 75 config.mag_filter = GPUSampler::Filter::Linear; 76 return config; 77 } 78 79 GPUShader::GPUShader(GPUShaderStage stage) : m_stage(stage) 80 { 81 } 82 83 GPUShader::~GPUShader() = default; 84 85 const char* GPUShader::GetStageName(GPUShaderStage stage) 86 { 87 static constexpr std::array<const char*, static_cast<u32>(GPUShaderStage::MaxCount)> names = {"Vertex", "Fragment", 88 "Geometry", "Compute"}; 89 90 return names[static_cast<u32>(stage)]; 91 } 92 93 GPUPipeline::GPUPipeline() = default; 94 95 GPUPipeline::~GPUPipeline() = default; 96 97 size_t GPUPipeline::InputLayoutHash::operator()(const InputLayout& il) const 98 { 99 std::size_t h = 0; 100 hash_combine(h, il.vertex_attributes.size(), il.vertex_stride); 101 102 for (const VertexAttribute& va : il.vertex_attributes) 103 hash_combine(h, va.key); 104 105 return h; 106 } 107 108 bool GPUPipeline::InputLayout::operator==(const InputLayout& rhs) const 109 { 110 return (vertex_stride == rhs.vertex_stride && vertex_attributes.size() == rhs.vertex_attributes.size() && 111 std::memcmp(vertex_attributes.data(), rhs.vertex_attributes.data(), 112 sizeof(VertexAttribute) * rhs.vertex_attributes.size()) == 0); 113 } 114 115 bool GPUPipeline::InputLayout::operator!=(const InputLayout& rhs) const 116 { 117 return (vertex_stride != rhs.vertex_stride || vertex_attributes.size() != rhs.vertex_attributes.size() || 118 std::memcmp(vertex_attributes.data(), rhs.vertex_attributes.data(), 119 sizeof(VertexAttribute) * rhs.vertex_attributes.size()) != 0); 120 } 121 122 GPUPipeline::RasterizationState GPUPipeline::RasterizationState::GetNoCullState() 123 { 124 RasterizationState ret = {}; 125 ret.cull_mode = CullMode::None; 126 return ret; 127 } 128 129 GPUPipeline::DepthState GPUPipeline::DepthState::GetNoTestsState() 130 { 131 DepthState ret = {}; 132 ret.depth_test = DepthFunc::Always; 133 return ret; 134 } 135 136 GPUPipeline::DepthState GPUPipeline::DepthState::GetAlwaysWriteState() 137 { 138 DepthState ret = {}; 139 ret.depth_test = DepthFunc::Always; 140 ret.depth_write = true; 141 return ret; 142 } 143 144 GPUPipeline::BlendState GPUPipeline::BlendState::GetNoBlendingState() 145 { 146 BlendState ret = {}; 147 ret.write_mask = 0xf; 148 return ret; 149 } 150 151 GPUPipeline::BlendState GPUPipeline::BlendState::GetAlphaBlendingState() 152 { 153 BlendState ret = {}; 154 ret.enable = true; 155 ret.src_blend = BlendFunc::SrcAlpha; 156 ret.dst_blend = BlendFunc::InvSrcAlpha; 157 ret.blend_op = BlendOp::Add; 158 ret.src_alpha_blend = BlendFunc::One; 159 ret.dst_alpha_blend = BlendFunc::Zero; 160 ret.alpha_blend_op = BlendOp::Add; 161 ret.write_mask = 0xf; 162 return ret; 163 } 164 165 void GPUPipeline::GraphicsConfig::SetTargetFormats(GPUTexture::Format color_format, 166 GPUTexture::Format depth_format_ /* = GPUTexture::Format::Unknown */) 167 { 168 color_formats[0] = color_format; 169 for (size_t i = 1; i < std::size(color_formats); i++) 170 color_formats[i] = GPUTexture::Format::Unknown; 171 depth_format = depth_format_; 172 } 173 174 u32 GPUPipeline::GraphicsConfig::GetRenderTargetCount() const 175 { 176 u32 num_rts = 0; 177 for (; num_rts < static_cast<u32>(std::size(color_formats)); num_rts++) 178 { 179 if (color_formats[num_rts] == GPUTexture::Format::Unknown) 180 break; 181 } 182 return num_rts; 183 } 184 185 GPUTextureBuffer::GPUTextureBuffer(Format format, u32 size) : m_format(format), m_size_in_elements(size) 186 { 187 } 188 189 GPUTextureBuffer::~GPUTextureBuffer() = default; 190 191 u32 GPUTextureBuffer::GetElementSize(Format format) 192 { 193 static constexpr std::array<u32, static_cast<u32>(Format::MaxCount)> element_size = {{ 194 sizeof(u16), 195 }}; 196 197 return element_size[static_cast<u32>(format)]; 198 } 199 200 bool GPUFramebufferManagerBase::Key::operator==(const Key& rhs) const 201 { 202 return (std::memcmp(this, &rhs, sizeof(*this)) == 0); 203 } 204 205 bool GPUFramebufferManagerBase::Key::operator!=(const Key& rhs) const 206 { 207 return (std::memcmp(this, &rhs, sizeof(*this)) != 0); 208 } 209 210 bool GPUFramebufferManagerBase::Key::ContainsRT(const GPUTexture* tex) const 211 { 212 // num_rts is worse for predictability. 213 for (u32 i = 0; i < GPUDevice::MAX_RENDER_TARGETS; i++) 214 { 215 if (rts[i] == tex) 216 return true; 217 } 218 return false; 219 } 220 221 size_t GPUFramebufferManagerBase::KeyHash::operator()(const Key& key) const 222 { 223 if constexpr (sizeof(void*) == 8) 224 return XXH3_64bits(&key, sizeof(key)); 225 else 226 return XXH32(&key, sizeof(key), 0x1337); 227 } 228 229 GPUDevice::GPUDevice() 230 { 231 ResetStatistics(); 232 } 233 234 GPUDevice::~GPUDevice() = default; 235 236 RenderAPI GPUDevice::GetPreferredAPI() 237 { 238 static RenderAPI preferred_renderer = RenderAPI::None; 239 if (preferred_renderer == RenderAPI::None) [[unlikely]] 240 { 241 #if defined(_WIN32) && !defined(_M_ARM64) 242 // Perfer DX11 on Windows, except ARM64, where QCom has slow DX11 drivers. 243 preferred_renderer = RenderAPI::D3D11; 244 #elif defined(_WIN32) && defined(_M_ARM64) 245 preferred_renderer = RenderAPI::D3D12; 246 #elif defined(__APPLE__) 247 // Prefer Metal on MacOS. 248 preferred_renderer = RenderAPI::Metal; 249 #elif defined(ENABLE_OPENGL) && defined(ENABLE_VULKAN) 250 // On Linux, if we have both GL and Vulkan, prefer VK if the driver isn't software. 251 preferred_renderer = VulkanDevice::IsSuitableDefaultRenderer() ? RenderAPI::Vulkan : RenderAPI::OpenGL; 252 #elif defined(ENABLE_OPENGL) 253 preferred_renderer = RenderAPI::OpenGL; 254 #elif defined(ENABLE_VULKAN) 255 preferred_renderer = RenderAPI::Vulkan; 256 #else 257 // Uhhh, what? 258 ERROR_LOG("Somehow don't have any renderers available..."); 259 preferred_renderer = RenderAPI::None; 260 #endif 261 } 262 263 return preferred_renderer; 264 } 265 266 const char* GPUDevice::RenderAPIToString(RenderAPI api) 267 { 268 switch (api) 269 { 270 // clang-format off 271 #define CASE(x) case RenderAPI::x: return #x 272 CASE(None); 273 CASE(D3D11); 274 CASE(D3D12); 275 CASE(Metal); 276 CASE(Vulkan); 277 CASE(OpenGL); 278 CASE(OpenGLES); 279 #undef CASE 280 // clang-format on 281 default: 282 return "Unknown"; 283 } 284 } 285 286 const char* GPUDevice::ShaderLanguageToString(GPUShaderLanguage language) 287 { 288 switch (language) 289 { 290 // clang-format off 291 #define CASE(x) case GPUShaderLanguage::x: return #x 292 CASE(HLSL); 293 CASE(GLSL); 294 CASE(GLSLES); 295 CASE(MSL); 296 CASE(SPV); 297 #undef CASE 298 // clang-format on 299 default: 300 return "Unknown"; 301 } 302 } 303 304 bool GPUDevice::IsSameRenderAPI(RenderAPI lhs, RenderAPI rhs) 305 { 306 return (lhs == rhs || ((lhs == RenderAPI::OpenGL || lhs == RenderAPI::OpenGLES) && 307 (rhs == RenderAPI::OpenGL || rhs == RenderAPI::OpenGLES))); 308 } 309 310 GPUDevice::AdapterInfoList GPUDevice::GetAdapterListForAPI(RenderAPI api) 311 { 312 AdapterInfoList ret; 313 314 switch (api) 315 { 316 #ifdef ENABLE_VULKAN 317 case RenderAPI::Vulkan: 318 ret = VulkanDevice::GetAdapterList(); 319 break; 320 #endif 321 322 #ifdef ENABLE_OPENGL 323 case RenderAPI::OpenGL: 324 case RenderAPI::OpenGLES: 325 // No way of querying. 326 break; 327 #endif 328 329 #ifdef _WIN32 330 case RenderAPI::D3D11: 331 case RenderAPI::D3D12: 332 ret = D3DCommon::GetAdapterInfoList(); 333 break; 334 #endif 335 336 #ifdef __APPLE__ 337 case RenderAPI::Metal: 338 ret = WrapGetMetalAdapterList(); 339 break; 340 #endif 341 342 default: 343 break; 344 } 345 346 return ret; 347 } 348 349 bool GPUDevice::Create(std::string_view adapter, std::string_view shader_cache_path, u32 shader_cache_version, 350 bool debug_device, GPUVSyncMode vsync, bool allow_present_throttle, bool threaded_presentation, 351 std::optional<bool> exclusive_fullscreen_control, FeatureMask disabled_features, Error* error) 352 { 353 m_vsync_mode = vsync; 354 m_allow_present_throttle = allow_present_throttle; 355 m_debug_device = debug_device; 356 357 if (!AcquireWindow(true)) 358 { 359 Error::SetStringView(error, "Failed to acquire window from host."); 360 return false; 361 } 362 363 if (!CreateDevice(adapter, threaded_presentation, exclusive_fullscreen_control, disabled_features, error)) 364 { 365 if (error && !error->IsValid()) 366 error->SetStringView("Failed to create device."); 367 return false; 368 } 369 370 INFO_LOG("Graphics Driver Info:\n{}", GetDriverInfo()); 371 372 OpenShaderCache(shader_cache_path, shader_cache_version); 373 374 if (!CreateResources(error)) 375 { 376 Error::AddPrefix(error, "Failed to create base resources."); 377 return false; 378 } 379 380 return true; 381 } 382 383 void GPUDevice::Destroy() 384 { 385 PurgeTexturePool(); 386 if (HasSurface()) 387 DestroySurface(); 388 DestroyResources(); 389 CloseShaderCache(); 390 DestroyDevice(); 391 } 392 393 bool GPUDevice::SupportsExclusiveFullscreen() const 394 { 395 return false; 396 } 397 398 void GPUDevice::OpenShaderCache(std::string_view base_path, u32 version) 399 { 400 if (m_features.shader_cache && !base_path.empty()) 401 { 402 const std::string basename = GetShaderCacheBaseName("shaders"); 403 const std::string filename = Path::Combine(base_path, basename); 404 if (!m_shader_cache.Open(filename.c_str(), version)) 405 { 406 WARNING_LOG("Failed to open shader cache. Creating new cache."); 407 if (!m_shader_cache.Create()) 408 ERROR_LOG("Failed to create new shader cache."); 409 410 // Squish the pipeline cache too, it's going to be stale. 411 if (m_features.pipeline_cache) 412 { 413 const std::string pc_filename = 414 Path::Combine(base_path, TinyString::from_format("{}.bin", GetShaderCacheBaseName("pipelines"))); 415 if (FileSystem::FileExists(pc_filename.c_str())) 416 { 417 INFO_LOG("Removing old pipeline cache '{}'", Path::GetFileName(pc_filename)); 418 FileSystem::DeleteFile(pc_filename.c_str()); 419 } 420 } 421 } 422 } 423 else 424 { 425 // Still need to set the version - GL needs it. 426 m_shader_cache.Open(std::string_view(), version); 427 } 428 429 s_pipeline_cache_path = {}; 430 if (m_features.pipeline_cache && !base_path.empty()) 431 { 432 const std::string basename = GetShaderCacheBaseName("pipelines"); 433 std::string filename = Path::Combine(base_path, TinyString::from_format("{}.bin", basename)); 434 if (OpenPipelineCache(filename)) 435 s_pipeline_cache_path = std::move(filename); 436 else 437 WARNING_LOG("Failed to read pipeline cache."); 438 } 439 } 440 441 void GPUDevice::CloseShaderCache() 442 { 443 m_shader_cache.Close(); 444 445 if (!s_pipeline_cache_path.empty()) 446 { 447 DynamicHeapArray<u8> data; 448 if (GetPipelineCacheData(&data)) 449 { 450 // Save disk writes if it hasn't changed, think of the poor SSDs. 451 if (s_pipeline_cache_size != data.size() || s_pipeline_cache_hash != SHA1Digest::GetDigest(data.cspan())) 452 { 453 Error error; 454 INFO_LOG("Compressing and writing {} bytes to '{}'", data.size(), Path::GetFileName(s_pipeline_cache_path)); 455 if (!CompressHelpers::CompressToFile(CompressHelpers::CompressType::Zstandard, s_pipeline_cache_path.c_str(), 456 data.cspan(), -1, true, &error)) 457 { 458 ERROR_LOG("Failed to write pipeline cache to '{}': {}", Path::GetFileName(s_pipeline_cache_path), 459 error.GetDescription()); 460 } 461 } 462 else 463 { 464 INFO_LOG("Skipping updating pipeline cache '{}' due to no changes.", Path::GetFileName(s_pipeline_cache_path)); 465 } 466 } 467 468 s_pipeline_cache_path = {}; 469 } 470 } 471 472 std::string GPUDevice::GetShaderCacheBaseName(std::string_view type) const 473 { 474 const std::string_view debug_suffix = m_debug_device ? "_debug" : ""; 475 476 std::string ret; 477 switch (GetRenderAPI()) 478 { 479 #ifdef _WIN32 480 case RenderAPI::D3D11: 481 ret = fmt::format( 482 "d3d11_{}_{}{}", type, 483 D3DCommon::GetFeatureLevelShaderModelString(D3D11Device::GetInstance().GetD3DDevice()->GetFeatureLevel()), 484 debug_suffix); 485 break; 486 case RenderAPI::D3D12: 487 ret = fmt::format("d3d12_{}{}", type, debug_suffix); 488 break; 489 #endif 490 #ifdef ENABLE_VULKAN 491 case RenderAPI::Vulkan: 492 ret = fmt::format("vulkan_{}{}", type, debug_suffix); 493 break; 494 #endif 495 #ifdef ENABLE_OPENGL 496 case RenderAPI::OpenGL: 497 ret = fmt::format("opengl_{}{}", type, debug_suffix); 498 break; 499 case RenderAPI::OpenGLES: 500 ret = fmt::format("opengles_{}{}", type, debug_suffix); 501 break; 502 #endif 503 #ifdef __APPLE__ 504 case RenderAPI::Metal: 505 ret = fmt::format("metal_{}{}", type, debug_suffix); 506 break; 507 #endif 508 default: 509 UnreachableCode(); 510 break; 511 } 512 513 return ret; 514 } 515 516 bool GPUDevice::OpenPipelineCache(const std::string& filename) 517 { 518 if (FileSystem::GetPathFileSize(filename.c_str()) <= 0) 519 return false; 520 521 Error error; 522 CompressHelpers::OptionalByteBuffer data = 523 CompressHelpers::DecompressFile(CompressHelpers::CompressType::Zstandard, filename.c_str(), std::nullopt, &error); 524 if (!data.has_value()) 525 { 526 ERROR_LOG("Failed to load pipeline cache from '{}': {}", Path::GetFileName(filename), error.GetDescription()); 527 data.reset(); 528 } 529 530 if (data.has_value()) 531 { 532 s_pipeline_cache_size = data->size(); 533 s_pipeline_cache_hash = SHA1Digest::GetDigest(data->cspan()); 534 } 535 else 536 { 537 s_pipeline_cache_size = 0; 538 s_pipeline_cache_hash = {}; 539 } 540 541 if (!ReadPipelineCache(std::move(data))) 542 { 543 s_pipeline_cache_size = 0; 544 s_pipeline_cache_hash = {}; 545 return false; 546 } 547 548 INFO_LOG("Pipeline cache hash: {}", SHA1Digest::DigestToString(s_pipeline_cache_hash)); 549 return true; 550 } 551 552 bool GPUDevice::ReadPipelineCache(std::optional<DynamicHeapArray<u8>> data) 553 { 554 return false; 555 } 556 557 bool GPUDevice::GetPipelineCacheData(DynamicHeapArray<u8>* data) 558 { 559 return false; 560 } 561 562 bool GPUDevice::AcquireWindow(bool recreate_window) 563 { 564 std::optional<WindowInfo> wi = Host::AcquireRenderWindow(recreate_window); 565 if (!wi.has_value()) 566 return false; 567 568 INFO_LOG("Render window is {}x{}.", wi->surface_width, wi->surface_height); 569 m_window_info = wi.value(); 570 return true; 571 } 572 573 bool GPUDevice::CreateResources(Error* error) 574 { 575 if (!(m_nearest_sampler = CreateSampler(GPUSampler::GetNearestConfig())) || 576 !(m_linear_sampler = CreateSampler(GPUSampler::GetLinearConfig()))) 577 { 578 Error::SetStringView(error, "Failed to create samplers"); 579 return false; 580 } 581 582 const RenderAPI render_api = GetRenderAPI(); 583 ShaderGen shadergen(render_api, ShaderGen::GetShaderLanguageForAPI(render_api), m_features.dual_source_blend, 584 m_features.framebuffer_fetch); 585 586 std::unique_ptr<GPUShader> imgui_vs = 587 CreateShader(GPUShaderStage::Vertex, shadergen.GetLanguage(), shadergen.GenerateImGuiVertexShader(), error); 588 std::unique_ptr<GPUShader> imgui_fs = 589 CreateShader(GPUShaderStage::Fragment, shadergen.GetLanguage(), shadergen.GenerateImGuiFragmentShader(), error); 590 if (!imgui_vs || !imgui_fs) 591 { 592 Error::AddPrefix(error, "Failed to compile ImGui shaders: "); 593 return false; 594 } 595 GL_OBJECT_NAME(imgui_vs, "ImGui Vertex Shader"); 596 GL_OBJECT_NAME(imgui_fs, "ImGui Fragment Shader"); 597 598 static constexpr GPUPipeline::VertexAttribute imgui_attributes[] = { 599 GPUPipeline::VertexAttribute::Make(0, GPUPipeline::VertexAttribute::Semantic::Position, 0, 600 GPUPipeline::VertexAttribute::Type::Float, 2, OFFSETOF(ImDrawVert, pos)), 601 GPUPipeline::VertexAttribute::Make(1, GPUPipeline::VertexAttribute::Semantic::TexCoord, 0, 602 GPUPipeline::VertexAttribute::Type::Float, 2, OFFSETOF(ImDrawVert, uv)), 603 GPUPipeline::VertexAttribute::Make(2, GPUPipeline::VertexAttribute::Semantic::Color, 0, 604 GPUPipeline::VertexAttribute::Type::UNorm8, 4, OFFSETOF(ImDrawVert, col)), 605 }; 606 607 GPUPipeline::GraphicsConfig plconfig; 608 plconfig.layout = GPUPipeline::Layout::SingleTextureAndPushConstants; 609 plconfig.input_layout.vertex_attributes = imgui_attributes; 610 plconfig.input_layout.vertex_stride = sizeof(ImDrawVert); 611 plconfig.primitive = GPUPipeline::Primitive::Triangles; 612 plconfig.rasterization = GPUPipeline::RasterizationState::GetNoCullState(); 613 plconfig.depth = GPUPipeline::DepthState::GetNoTestsState(); 614 plconfig.blend = GPUPipeline::BlendState::GetAlphaBlendingState(); 615 plconfig.blend.write_mask = 0x7; 616 plconfig.SetTargetFormats(HasSurface() ? m_window_info.surface_format : GPUTexture::Format::RGBA8); 617 plconfig.samples = 1; 618 plconfig.per_sample_shading = false; 619 plconfig.render_pass_flags = GPUPipeline::NoRenderPassFlags; 620 plconfig.vertex_shader = imgui_vs.get(); 621 plconfig.geometry_shader = nullptr; 622 plconfig.fragment_shader = imgui_fs.get(); 623 624 m_imgui_pipeline = CreatePipeline(plconfig, error); 625 if (!m_imgui_pipeline) 626 { 627 Error::AddPrefix(error, "Failed to compile ImGui pipeline: "); 628 return false; 629 } 630 GL_OBJECT_NAME(m_imgui_pipeline, "ImGui Pipeline"); 631 632 return true; 633 } 634 635 void GPUDevice::DestroyResources() 636 { 637 m_imgui_font_texture.reset(); 638 m_imgui_pipeline.reset(); 639 640 m_imgui_pipeline.reset(); 641 642 m_linear_sampler.reset(); 643 m_nearest_sampler.reset(); 644 645 m_shader_cache.Close(); 646 } 647 648 void GPUDevice::RenderImGui() 649 { 650 GL_SCOPE("RenderImGui"); 651 652 ImGui::Render(); 653 654 const ImDrawData* draw_data = ImGui::GetDrawData(); 655 if (draw_data->CmdListsCount == 0) 656 return; 657 658 SetPipeline(m_imgui_pipeline.get()); 659 SetViewportAndScissor(0, 0, m_window_info.surface_width, m_window_info.surface_height); 660 661 const float L = 0.0f; 662 const float R = static_cast<float>(m_window_info.surface_width); 663 const float T = 0.0f; 664 const float B = static_cast<float>(m_window_info.surface_height); 665 const float ortho_projection[4][4] = { 666 {2.0f / (R - L), 0.0f, 0.0f, 0.0f}, 667 {0.0f, 2.0f / (T - B), 0.0f, 0.0f}, 668 {0.0f, 0.0f, 0.5f, 0.0f}, 669 {(R + L) / (L - R), (T + B) / (B - T), 0.5f, 1.0f}, 670 }; 671 PushUniformBuffer(ortho_projection, sizeof(ortho_projection)); 672 673 // Render command lists 674 const bool flip = UsesLowerLeftOrigin(); 675 for (int n = 0; n < draw_data->CmdListsCount; n++) 676 { 677 const ImDrawList* cmd_list = draw_data->CmdLists[n]; 678 static_assert(sizeof(ImDrawIdx) == sizeof(DrawIndex)); 679 680 u32 base_vertex, base_index; 681 UploadVertexBuffer(cmd_list->VtxBuffer.Data, sizeof(ImDrawVert), cmd_list->VtxBuffer.Size, &base_vertex); 682 UploadIndexBuffer(cmd_list->IdxBuffer.Data, cmd_list->IdxBuffer.Size, &base_index); 683 684 for (int cmd_i = 0; cmd_i < cmd_list->CmdBuffer.Size; cmd_i++) 685 { 686 const ImDrawCmd* pcmd = &cmd_list->CmdBuffer[cmd_i]; 687 DebugAssert(!pcmd->UserCallback); 688 689 if (pcmd->ElemCount == 0 || pcmd->ClipRect.z <= pcmd->ClipRect.x || pcmd->ClipRect.w <= pcmd->ClipRect.y) 690 continue; 691 692 if (flip) 693 { 694 const s32 height = static_cast<s32>(pcmd->ClipRect.w - pcmd->ClipRect.y); 695 const s32 flipped_y = 696 static_cast<s32>(m_window_info.surface_height) - static_cast<s32>(pcmd->ClipRect.y) - height; 697 SetScissor(static_cast<s32>(pcmd->ClipRect.x), flipped_y, static_cast<s32>(pcmd->ClipRect.z - pcmd->ClipRect.x), 698 height); 699 } 700 else 701 { 702 SetScissor(static_cast<s32>(pcmd->ClipRect.x), static_cast<s32>(pcmd->ClipRect.y), 703 static_cast<s32>(pcmd->ClipRect.z - pcmd->ClipRect.x), 704 static_cast<s32>(pcmd->ClipRect.w - pcmd->ClipRect.y)); 705 } 706 707 SetTextureSampler(0, reinterpret_cast<GPUTexture*>(pcmd->TextureId), m_linear_sampler.get()); 708 DrawIndexed(pcmd->ElemCount, base_index + pcmd->IdxOffset, base_vertex + pcmd->VtxOffset); 709 } 710 } 711 } 712 713 void GPUDevice::UploadVertexBuffer(const void* vertices, u32 vertex_size, u32 vertex_count, u32* base_vertex) 714 { 715 void* map; 716 u32 space; 717 MapVertexBuffer(vertex_size, vertex_count, &map, &space, base_vertex); 718 std::memcpy(map, vertices, vertex_size * vertex_count); 719 UnmapVertexBuffer(vertex_size, vertex_count); 720 } 721 722 void GPUDevice::UploadIndexBuffer(const u16* indices, u32 index_count, u32* base_index) 723 { 724 u16* map; 725 u32 space; 726 MapIndexBuffer(index_count, &map, &space, base_index); 727 std::memcpy(map, indices, sizeof(u16) * index_count); 728 UnmapIndexBuffer(index_count); 729 } 730 731 void GPUDevice::UploadUniformBuffer(const void* data, u32 data_size) 732 { 733 void* map = MapUniformBuffer(data_size); 734 std::memcpy(map, data, data_size); 735 UnmapUniformBuffer(data_size); 736 } 737 738 void GPUDevice::SetRenderTarget(GPUTexture* rt, GPUTexture* ds, GPUPipeline::RenderPassFlag render_pass_flags) 739 { 740 SetRenderTargets(rt ? &rt : nullptr, rt ? 1 : 0, ds, render_pass_flags); 741 } 742 743 void GPUDevice::SetViewport(s32 x, s32 y, s32 width, s32 height) 744 { 745 SetViewport(GSVector4i(x, y, x + width, y + height)); 746 } 747 748 void GPUDevice::SetScissor(s32 x, s32 y, s32 width, s32 height) 749 { 750 SetScissor(GSVector4i(x, y, x + width, y + height)); 751 } 752 753 void GPUDevice::SetViewportAndScissor(s32 x, s32 y, s32 width, s32 height) 754 { 755 SetViewportAndScissor(GSVector4i(x, y, x + width, y + height)); 756 } 757 758 void GPUDevice::SetViewportAndScissor(const GSVector4i rc) 759 { 760 SetViewport(rc); 761 SetScissor(rc); 762 } 763 764 void GPUDevice::ClearRenderTarget(GPUTexture* t, u32 c) 765 { 766 t->SetClearColor(c); 767 } 768 769 void GPUDevice::ClearDepth(GPUTexture* t, float d) 770 { 771 t->SetClearDepth(d); 772 } 773 774 void GPUDevice::InvalidateRenderTarget(GPUTexture* t) 775 { 776 t->SetState(GPUTexture::State::Invalidated); 777 } 778 779 std::unique_ptr<GPUShader> GPUDevice::CreateShader(GPUShaderStage stage, GPUShaderLanguage language, 780 std::string_view source, Error* error /* = nullptr */, 781 const char* entry_point /* = "main" */) 782 { 783 std::unique_ptr<GPUShader> shader; 784 if (!m_shader_cache.IsOpen()) 785 { 786 shader = CreateShaderFromSource(stage, language, source, entry_point, nullptr, error); 787 return shader; 788 } 789 790 const GPUShaderCache::CacheIndexKey key = m_shader_cache.GetCacheKey(stage, language, source, entry_point); 791 std::optional<GPUShaderCache::ShaderBinary> binary = m_shader_cache.Lookup(key); 792 if (binary.has_value()) 793 { 794 shader = CreateShaderFromBinary(stage, binary->cspan(), error); 795 if (shader) 796 return shader; 797 798 ERROR_LOG("Failed to create shader from binary (driver changed?). Clearing cache."); 799 m_shader_cache.Clear(); 800 binary.reset(); 801 } 802 803 GPUShaderCache::ShaderBinary new_binary; 804 shader = CreateShaderFromSource(stage, language, source, entry_point, &new_binary, error); 805 if (!shader) 806 return shader; 807 808 // Don't insert empty shaders into the cache... 809 if (!new_binary.empty()) 810 { 811 if (!m_shader_cache.Insert(key, new_binary.data(), static_cast<u32>(new_binary.size()))) 812 m_shader_cache.Close(); 813 } 814 815 return shader; 816 } 817 818 bool GPUDevice::GetRequestedExclusiveFullscreenMode(u32* width, u32* height, float* refresh_rate) 819 { 820 const std::string mode = Host::GetBaseStringSettingValue("GPU", "FullscreenMode", ""); 821 if (!mode.empty()) 822 { 823 const std::string_view mode_view = mode; 824 std::string_view::size_type sep1 = mode.find('x'); 825 if (sep1 != std::string_view::npos) 826 { 827 std::optional<u32> owidth = StringUtil::FromChars<u32>(mode_view.substr(0, sep1)); 828 sep1++; 829 830 while (sep1 < mode.length() && std::isspace(mode[sep1])) 831 sep1++; 832 833 if (owidth.has_value() && sep1 < mode.length()) 834 { 835 std::string_view::size_type sep2 = mode.find('@', sep1); 836 if (sep2 != std::string_view::npos) 837 { 838 std::optional<u32> oheight = StringUtil::FromChars<u32>(mode_view.substr(sep1, sep2 - sep1)); 839 sep2++; 840 841 while (sep2 < mode.length() && std::isspace(mode[sep2])) 842 sep2++; 843 844 if (oheight.has_value() && sep2 < mode.length()) 845 { 846 std::optional<float> orefresh_rate = StringUtil::FromChars<float>(mode_view.substr(sep2)); 847 if (orefresh_rate.has_value()) 848 { 849 *width = owidth.value(); 850 *height = oheight.value(); 851 *refresh_rate = orefresh_rate.value(); 852 return true; 853 } 854 } 855 } 856 } 857 } 858 } 859 860 *width = 0; 861 *height = 0; 862 *refresh_rate = 0; 863 return false; 864 } 865 866 std::string GPUDevice::GetFullscreenModeString(u32 width, u32 height, float refresh_rate) 867 { 868 return fmt::format("{} x {} @ {} hz", width, height, refresh_rate); 869 } 870 871 std::string GPUDevice::GetShaderDumpPath(std::string_view name) 872 { 873 return Path::Combine(EmuFolders::Dumps, name); 874 } 875 876 void GPUDevice::DumpBadShader(std::string_view code, std::string_view errors) 877 { 878 static u32 next_bad_shader_id = 0; 879 880 const std::string filename = GetShaderDumpPath(fmt::format("bad_shader_{}.txt", ++next_bad_shader_id)); 881 auto fp = FileSystem::OpenManagedCFile(filename.c_str(), "wb"); 882 if (fp) 883 { 884 if (!code.empty()) 885 std::fwrite(code.data(), code.size(), 1, fp.get()); 886 std::fputs("\n\n**** ERRORS ****\n", fp.get()); 887 if (!errors.empty()) 888 std::fwrite(errors.data(), errors.size(), 1, fp.get()); 889 } 890 } 891 892 std::array<float, 4> GPUDevice::RGBA8ToFloat(u32 rgba) 893 { 894 return std::array<float, 4>{static_cast<float>(rgba & UINT32_C(0xFF)) * (1.0f / 255.0f), 895 static_cast<float>((rgba >> 8) & UINT32_C(0xFF)) * (1.0f / 255.0f), 896 static_cast<float>((rgba >> 16) & UINT32_C(0xFF)) * (1.0f / 255.0f), 897 static_cast<float>(rgba >> 24) * (1.0f / 255.0f)}; 898 } 899 900 bool GPUDevice::UpdateImGuiFontTexture() 901 { 902 ImGuiIO& io = ImGui::GetIO(); 903 904 unsigned char* pixels; 905 int width, height; 906 io.Fonts->GetTexDataAsRGBA32(&pixels, &width, &height); 907 908 const u32 pitch = sizeof(u32) * width; 909 910 if (m_imgui_font_texture && m_imgui_font_texture->GetWidth() == static_cast<u32>(width) && 911 m_imgui_font_texture->GetHeight() == static_cast<u32>(height) && 912 m_imgui_font_texture->Update(0, 0, static_cast<u32>(width), static_cast<u32>(height), pixels, pitch)) 913 { 914 io.Fonts->SetTexID(m_imgui_font_texture.get()); 915 return true; 916 } 917 918 std::unique_ptr<GPUTexture> new_font = 919 FetchTexture(width, height, 1, 1, 1, GPUTexture::Type::Texture, GPUTexture::Format::RGBA8, pixels, pitch); 920 if (!new_font) 921 return false; 922 923 RecycleTexture(std::move(m_imgui_font_texture)); 924 m_imgui_font_texture = std::move(new_font); 925 io.Fonts->SetTexID(m_imgui_font_texture.get()); 926 return true; 927 } 928 929 bool GPUDevice::UsesLowerLeftOrigin() const 930 { 931 const RenderAPI api = GetRenderAPI(); 932 return (api == RenderAPI::OpenGL || api == RenderAPI::OpenGLES); 933 } 934 935 GSVector4i GPUDevice::FlipToLowerLeft(GSVector4i rc, s32 target_height) 936 { 937 const s32 height = rc.height(); 938 const s32 flipped_y = target_height - rc.top - height; 939 rc.top = flipped_y; 940 rc.bottom = flipped_y + height; 941 return rc; 942 } 943 944 bool GPUDevice::IsTexturePoolType(GPUTexture::Type type) 945 { 946 return (type == GPUTexture::Type::Texture || type == GPUTexture::Type::DynamicTexture); 947 } 948 949 std::unique_ptr<GPUTexture> GPUDevice::FetchTexture(u32 width, u32 height, u32 layers, u32 levels, u32 samples, 950 GPUTexture::Type type, GPUTexture::Format format, 951 const void* data /*= nullptr*/, u32 data_stride /*= 0*/) 952 { 953 std::unique_ptr<GPUTexture> ret; 954 955 const TexturePoolKey key = {static_cast<u16>(width), 956 static_cast<u16>(height), 957 static_cast<u8>(layers), 958 static_cast<u8>(levels), 959 static_cast<u8>(samples), 960 type, 961 format, 962 0u}; 963 964 const bool is_texture = IsTexturePoolType(type); 965 TexturePool& pool = is_texture ? m_texture_pool : m_target_pool; 966 const u32 pool_size = (is_texture ? MAX_TEXTURE_POOL_SIZE : MAX_TARGET_POOL_SIZE); 967 968 TexturePool::iterator it; 969 970 if (is_texture && m_features.prefer_unused_textures) 971 { 972 // Try to find a texture that wasn't used this frame first. 973 for (it = m_texture_pool.begin(); it != m_texture_pool.end(); ++it) 974 { 975 if (it->use_counter == m_texture_pool_counter) 976 { 977 // We're into textures recycled this frame, not going to find anything newer. 978 // But prefer reuse over creating a new texture. 979 if (m_texture_pool.size() < pool_size) 980 { 981 it = m_texture_pool.end(); 982 break; 983 } 984 } 985 986 if (it->key == key) 987 break; 988 } 989 } 990 else 991 { 992 for (it = pool.begin(); it != pool.end(); ++it) 993 { 994 if (it->key == key) 995 break; 996 } 997 } 998 999 if (it != pool.end()) 1000 { 1001 if (!data || it->texture->Update(0, 0, width, height, data, data_stride, 0, 0)) 1002 { 1003 ret = std::move(it->texture); 1004 pool.erase(it); 1005 return ret; 1006 } 1007 else 1008 { 1009 // This shouldn't happen... 1010 ERROR_LOG("Failed to upload {}x{} to pooled texture", width, height); 1011 } 1012 } 1013 1014 ret = CreateTexture(width, height, layers, levels, samples, type, format, data, data_stride); 1015 return ret; 1016 } 1017 1018 std::unique_ptr<GPUTexture, GPUDevice::PooledTextureDeleter> 1019 GPUDevice::FetchAutoRecycleTexture(u32 width, u32 height, u32 layers, u32 levels, u32 samples, GPUTexture::Type type, 1020 GPUTexture::Format format, const void* data /*= nullptr*/, u32 data_stride /*= 0*/, 1021 bool dynamic /*= false*/) 1022 { 1023 std::unique_ptr<GPUTexture> ret = 1024 FetchTexture(width, height, layers, levels, samples, type, format, data, data_stride); 1025 return std::unique_ptr<GPUTexture, PooledTextureDeleter>(ret.release()); 1026 } 1027 1028 void GPUDevice::RecycleTexture(std::unique_ptr<GPUTexture> texture) 1029 { 1030 if (!texture) 1031 return; 1032 1033 const TexturePoolKey key = {static_cast<u16>(texture->GetWidth()), 1034 static_cast<u16>(texture->GetHeight()), 1035 static_cast<u8>(texture->GetLayers()), 1036 static_cast<u8>(texture->GetLevels()), 1037 static_cast<u8>(texture->GetSamples()), 1038 texture->GetType(), 1039 texture->GetFormat(), 1040 0u}; 1041 1042 const bool is_texture = IsTexturePoolType(texture->GetType()); 1043 TexturePool& pool = is_texture ? m_texture_pool : m_target_pool; 1044 pool.push_back({std::move(texture), m_texture_pool_counter, key}); 1045 1046 const u32 max_size = is_texture ? MAX_TEXTURE_POOL_SIZE : MAX_TARGET_POOL_SIZE; 1047 while (pool.size() > max_size) 1048 { 1049 DEBUG_LOG("Trim {}x{} texture from pool", pool.front().texture->GetWidth(), pool.front().texture->GetHeight()); 1050 pool.pop_front(); 1051 } 1052 } 1053 1054 void GPUDevice::PurgeTexturePool() 1055 { 1056 m_texture_pool_counter = 0; 1057 m_texture_pool.clear(); 1058 m_target_pool.clear(); 1059 } 1060 1061 void GPUDevice::TrimTexturePool() 1062 { 1063 GL_INS_FMT("Texture Pool Size: {}", m_texture_pool.size()); 1064 GL_INS_FMT("Target Pool Size: {}", m_target_pool.size()); 1065 GL_INS_FMT("VRAM Usage: {:.2f} MB", s_total_vram_usage / 1048576.0); 1066 1067 DEBUG_LOG("Texture Pool Size: {} Target Pool Size: {} VRAM: {:.2f} MB", m_texture_pool.size(), m_target_pool.size(), 1068 s_total_vram_usage / 1048756.0); 1069 1070 if (m_texture_pool.empty() && m_target_pool.empty()) 1071 return; 1072 1073 const u32 prev_counter = m_texture_pool_counter++; 1074 for (u32 pool_idx = 0; pool_idx < 2; pool_idx++) 1075 { 1076 TexturePool& pool = pool_idx ? m_target_pool : m_texture_pool; 1077 for (auto it = pool.begin(); it != pool.end();) 1078 { 1079 const u32 delta = (prev_counter - it->use_counter); 1080 if (delta < POOL_PURGE_DELAY) 1081 break; 1082 1083 DEBUG_LOG("Trim {}x{} texture from pool", it->texture->GetWidth(), it->texture->GetHeight()); 1084 it = pool.erase(it); 1085 } 1086 } 1087 1088 if (m_texture_pool_counter < prev_counter) [[unlikely]] 1089 { 1090 // wrapped around, handle it 1091 if (m_texture_pool.empty() && m_target_pool.empty()) 1092 { 1093 m_texture_pool_counter = 0; 1094 } 1095 else 1096 { 1097 const u32 texture_min = 1098 m_texture_pool.empty() ? std::numeric_limits<u32>::max() : m_texture_pool.front().use_counter; 1099 const u32 target_min = 1100 m_target_pool.empty() ? std::numeric_limits<u32>::max() : m_target_pool.front().use_counter; 1101 const u32 reduce = std::min(texture_min, target_min); 1102 m_texture_pool_counter -= reduce; 1103 for (u32 pool_idx = 0; pool_idx < 2; pool_idx++) 1104 { 1105 TexturePool& pool = pool_idx ? m_target_pool : m_texture_pool; 1106 for (TexturePoolEntry& entry : pool) 1107 entry.use_counter -= reduce; 1108 } 1109 } 1110 } 1111 } 1112 1113 bool GPUDevice::ResizeTexture(std::unique_ptr<GPUTexture>* tex, u32 new_width, u32 new_height, GPUTexture::Type type, 1114 GPUTexture::Format format, bool preserve /* = true */) 1115 { 1116 GPUTexture* old_tex = tex->get(); 1117 DebugAssert(!old_tex || (old_tex->GetLayers() == 1 && old_tex->GetLevels() == 1 && old_tex->GetSamples() == 1)); 1118 std::unique_ptr<GPUTexture> new_tex = FetchTexture(new_width, new_height, 1, 1, 1, type, format); 1119 if (!new_tex) [[unlikely]] 1120 { 1121 ERROR_LOG("Failed to create new {}x{} texture", new_width, new_height); 1122 return false; 1123 } 1124 1125 if (old_tex) 1126 { 1127 if (old_tex->GetState() == GPUTexture::State::Cleared) 1128 { 1129 if (type == GPUTexture::Type::RenderTarget) 1130 ClearRenderTarget(new_tex.get(), old_tex->GetClearColor()); 1131 } 1132 else if (old_tex->GetState() == GPUTexture::State::Dirty) 1133 { 1134 const u32 copy_width = std::min(new_width, old_tex->GetWidth()); 1135 const u32 copy_height = std::min(new_height, old_tex->GetHeight()); 1136 if (type == GPUTexture::Type::RenderTarget) 1137 ClearRenderTarget(new_tex.get(), 0); 1138 CopyTextureRegion(new_tex.get(), 0, 0, 0, 0, old_tex, 0, 0, 0, 0, copy_width, copy_height); 1139 } 1140 } 1141 else if (preserve) 1142 { 1143 // If we're expecting data to be there, make sure to clear it. 1144 if (type == GPUTexture::Type::RenderTarget) 1145 ClearRenderTarget(new_tex.get(), 0); 1146 } 1147 1148 RecycleTexture(std::move(*tex)); 1149 *tex = std::move(new_tex); 1150 return true; 1151 } 1152 1153 bool GPUDevice::ShouldSkipPresentingFrame() 1154 { 1155 // Only needed with FIFO. But since we're so fast, we allow it always. 1156 if (!m_allow_present_throttle) 1157 return false; 1158 1159 const float throttle_rate = (m_window_info.surface_refresh_rate > 0.0f) ? m_window_info.surface_refresh_rate : 60.0f; 1160 const float throttle_period = 1.0f / throttle_rate; 1161 1162 const u64 now = Common::Timer::GetCurrentValue(); 1163 const double diff = Common::Timer::ConvertValueToSeconds(now - m_last_frame_displayed_time); 1164 if (diff < throttle_period) 1165 return true; 1166 1167 m_last_frame_displayed_time = now; 1168 return false; 1169 } 1170 1171 void GPUDevice::ThrottlePresentation() 1172 { 1173 const float throttle_rate = (m_window_info.surface_refresh_rate > 0.0f) ? m_window_info.surface_refresh_rate : 60.0f; 1174 1175 const u64 sleep_period = Common::Timer::ConvertNanosecondsToValue(1e+9f / static_cast<double>(throttle_rate)); 1176 const u64 current_ts = Common::Timer::GetCurrentValue(); 1177 1178 // Allow it to fall behind/run ahead up to 2*period. Sleep isn't that precise, plus we need to 1179 // allow time for the actual rendering. 1180 const u64 max_variance = sleep_period * 2; 1181 if (static_cast<u64>(std::abs(static_cast<s64>(current_ts - m_last_frame_displayed_time))) > max_variance) 1182 m_last_frame_displayed_time = current_ts + sleep_period; 1183 else 1184 m_last_frame_displayed_time += sleep_period; 1185 1186 Common::Timer::SleepUntil(m_last_frame_displayed_time, false); 1187 } 1188 1189 bool GPUDevice::SetGPUTimingEnabled(bool enabled) 1190 { 1191 return false; 1192 } 1193 1194 float GPUDevice::GetAndResetAccumulatedGPUTime() 1195 { 1196 return 0.0f; 1197 } 1198 1199 void GPUDevice::ResetStatistics() 1200 { 1201 s_stats = {}; 1202 } 1203 1204 std::unique_ptr<GPUDevice> GPUDevice::CreateDeviceForAPI(RenderAPI api) 1205 { 1206 switch (api) 1207 { 1208 #ifdef ENABLE_VULKAN 1209 case RenderAPI::Vulkan: 1210 return std::make_unique<VulkanDevice>(); 1211 #endif 1212 1213 #ifdef ENABLE_OPENGL 1214 case RenderAPI::OpenGL: 1215 case RenderAPI::OpenGLES: 1216 return std::make_unique<OpenGLDevice>(); 1217 #endif 1218 1219 #ifdef _WIN32 1220 case RenderAPI::D3D12: 1221 return std::make_unique<D3D12Device>(); 1222 1223 case RenderAPI::D3D11: 1224 return std::make_unique<D3D11Device>(); 1225 #endif 1226 1227 #ifdef __APPLE__ 1228 case RenderAPI::Metal: 1229 return WrapNewMetalDevice(); 1230 #endif 1231 1232 default: 1233 return {}; 1234 } 1235 } 1236 1237 #define SHADERC_FUNCTIONS(X) \ 1238 X(shaderc_compiler_initialize) \ 1239 X(shaderc_compiler_release) \ 1240 X(shaderc_compile_options_initialize) \ 1241 X(shaderc_compile_options_release) \ 1242 X(shaderc_compile_options_set_source_language) \ 1243 X(shaderc_compile_options_set_generate_debug_info) \ 1244 X(shaderc_compile_options_set_optimization_level) \ 1245 X(shaderc_compile_options_set_target_env) \ 1246 X(shaderc_compilation_status_to_string) \ 1247 X(shaderc_compile_into_spv) \ 1248 X(shaderc_result_release) \ 1249 X(shaderc_result_get_length) \ 1250 X(shaderc_result_get_num_warnings) \ 1251 X(shaderc_result_get_bytes) \ 1252 X(shaderc_result_get_compilation_status) \ 1253 X(shaderc_result_get_error_message) 1254 1255 #define SPIRV_CROSS_FUNCTIONS(X) \ 1256 X(spvc_context_create) \ 1257 X(spvc_context_destroy) \ 1258 X(spvc_context_set_error_callback) \ 1259 X(spvc_context_parse_spirv) \ 1260 X(spvc_context_create_compiler) \ 1261 X(spvc_compiler_create_compiler_options) \ 1262 X(spvc_compiler_create_shader_resources) \ 1263 X(spvc_compiler_get_execution_model) \ 1264 X(spvc_compiler_options_set_bool) \ 1265 X(spvc_compiler_options_set_uint) \ 1266 X(spvc_compiler_install_compiler_options) \ 1267 X(spvc_compiler_require_extension) \ 1268 X(spvc_compiler_compile) \ 1269 X(spvc_resources_get_resource_list_for_type) 1270 1271 #ifdef _WIN32 1272 #define SPIRV_CROSS_HLSL_FUNCTIONS(X) X(spvc_compiler_hlsl_add_resource_binding) 1273 #else 1274 #define SPIRV_CROSS_HLSL_FUNCTIONS(X) 1275 #endif 1276 #ifdef __APPLE__ 1277 #define SPIRV_CROSS_MSL_FUNCTIONS(X) X(spvc_compiler_msl_add_resource_binding) 1278 #else 1279 #define SPIRV_CROSS_MSL_FUNCTIONS(X) 1280 #endif 1281 1282 // TODO: NOT thread safe, yet. 1283 namespace dyn_libs { 1284 static bool OpenShaderc(Error* error); 1285 static void CloseShaderc(); 1286 static bool OpenSpirvCross(Error* error); 1287 static void CloseSpirvCross(); 1288 static void CloseAll(); 1289 1290 static DynamicLibrary s_shaderc_library; 1291 static DynamicLibrary s_spirv_cross_library; 1292 1293 static shaderc_compiler_t s_shaderc_compiler = nullptr; 1294 1295 static bool s_close_registered = false; 1296 1297 #define ADD_FUNC(F) static decltype(&::F) F; 1298 SHADERC_FUNCTIONS(ADD_FUNC) 1299 SPIRV_CROSS_FUNCTIONS(ADD_FUNC) 1300 SPIRV_CROSS_HLSL_FUNCTIONS(ADD_FUNC) 1301 SPIRV_CROSS_MSL_FUNCTIONS(ADD_FUNC) 1302 #undef ADD_FUNC 1303 1304 } // namespace dyn_libs 1305 1306 bool dyn_libs::OpenShaderc(Error* error) 1307 { 1308 if (s_shaderc_library.IsOpen()) 1309 return true; 1310 1311 const std::string libname = DynamicLibrary::GetVersionedFilename("shaderc_shared"); 1312 if (!s_shaderc_library.Open(libname.c_str(), error)) 1313 { 1314 Error::AddPrefix(error, "Failed to load shaderc: "); 1315 return false; 1316 } 1317 1318 #define LOAD_FUNC(F) \ 1319 if (!s_shaderc_library.GetSymbol(#F, &F)) \ 1320 { \ 1321 Error::SetStringFmt(error, "Failed to find function {}", #F); \ 1322 CloseShaderc(); \ 1323 return false; \ 1324 } 1325 1326 SHADERC_FUNCTIONS(LOAD_FUNC) 1327 #undef LOAD_FUNC 1328 1329 s_shaderc_compiler = shaderc_compiler_initialize(); 1330 if (!s_shaderc_compiler) 1331 { 1332 Error::SetStringView(error, "shaderc_compiler_initialize() failed"); 1333 CloseShaderc(); 1334 return false; 1335 } 1336 1337 if (!s_close_registered) 1338 { 1339 s_close_registered = true; 1340 std::atexit(&dyn_libs::CloseAll); 1341 } 1342 1343 return true; 1344 } 1345 1346 void dyn_libs::CloseShaderc() 1347 { 1348 if (s_shaderc_compiler) 1349 { 1350 shaderc_compiler_release(s_shaderc_compiler); 1351 s_shaderc_compiler = nullptr; 1352 } 1353 1354 #define UNLOAD_FUNC(F) F = nullptr; 1355 SHADERC_FUNCTIONS(UNLOAD_FUNC) 1356 #undef UNLOAD_FUNC 1357 1358 s_shaderc_library.Close(); 1359 } 1360 1361 bool dyn_libs::OpenSpirvCross(Error* error) 1362 { 1363 if (s_spirv_cross_library.IsOpen()) 1364 return true; 1365 1366 #ifdef _WIN32 1367 // SPVC's build on Windows doesn't spit out a versioned DLL. 1368 const std::string libname = DynamicLibrary::GetVersionedFilename("spirv-cross-c-shared"); 1369 #else 1370 const std::string libname = DynamicLibrary::GetVersionedFilename("spirv-cross-c-shared", SPVC_C_API_VERSION_MAJOR); 1371 #endif 1372 if (!s_spirv_cross_library.Open(libname.c_str(), error)) 1373 { 1374 Error::AddPrefix(error, "Failed to load spirv-cross: "); 1375 return false; 1376 } 1377 1378 #define LOAD_FUNC(F) \ 1379 if (!s_spirv_cross_library.GetSymbol(#F, &F)) \ 1380 { \ 1381 Error::SetStringFmt(error, "Failed to find function {}", #F); \ 1382 CloseShaderc(); \ 1383 return false; \ 1384 } 1385 1386 SPIRV_CROSS_FUNCTIONS(LOAD_FUNC) 1387 SPIRV_CROSS_HLSL_FUNCTIONS(LOAD_FUNC) 1388 SPIRV_CROSS_MSL_FUNCTIONS(LOAD_FUNC) 1389 #undef LOAD_FUNC 1390 1391 if (!s_close_registered) 1392 { 1393 s_close_registered = true; 1394 std::atexit(&dyn_libs::CloseAll); 1395 } 1396 1397 return true; 1398 } 1399 1400 void dyn_libs::CloseSpirvCross() 1401 { 1402 #define UNLOAD_FUNC(F) F = nullptr; 1403 SPIRV_CROSS_FUNCTIONS(UNLOAD_FUNC) 1404 SPIRV_CROSS_HLSL_FUNCTIONS(UNLOAD_FUNC) 1405 SPIRV_CROSS_MSL_FUNCTIONS(UNLOAD_FUNC) 1406 #undef UNLOAD_FUNC 1407 1408 s_spirv_cross_library.Close(); 1409 } 1410 1411 void dyn_libs::CloseAll() 1412 { 1413 CloseShaderc(); 1414 CloseSpirvCross(); 1415 } 1416 1417 #undef SPIRV_CROSS_HLSL_FUNCTIONS 1418 #undef SPIRV_CROSS_MSL_FUNCTIONS 1419 #undef SPIRV_CROSS_FUNCTIONS 1420 #undef SHADERC_FUNCTIONS 1421 1422 bool GPUDevice::CompileGLSLShaderToVulkanSpv(GPUShaderStage stage, GPUShaderLanguage source_language, 1423 std::string_view source, const char* entry_point, bool optimization, 1424 bool nonsemantic_debug_info, DynamicHeapArray<u8>* out_binary, 1425 Error* error) 1426 { 1427 static constexpr const std::array<shaderc_shader_kind, static_cast<size_t>(GPUShaderStage::MaxCount)> stage_kinds = {{ 1428 shaderc_glsl_vertex_shader, 1429 shaderc_glsl_fragment_shader, 1430 shaderc_glsl_geometry_shader, 1431 shaderc_glsl_compute_shader, 1432 }}; 1433 1434 if (source_language != GPUShaderLanguage::GLSLVK) 1435 { 1436 Error::SetStringFmt(error, "Unsupported source language for transpile: {}", 1437 ShaderLanguageToString(source_language)); 1438 return false; 1439 } 1440 1441 if (!dyn_libs::OpenShaderc(error)) 1442 return false; 1443 1444 const shaderc_compile_options_t options = dyn_libs::shaderc_compile_options_initialize(); 1445 AssertMsg(options, "shaderc_compile_options_initialize() failed"); 1446 1447 dyn_libs::shaderc_compile_options_set_source_language(options, shaderc_source_language_glsl); 1448 dyn_libs::shaderc_compile_options_set_target_env(options, shaderc_target_env_vulkan, 0); 1449 dyn_libs::shaderc_compile_options_set_generate_debug_info(options, m_debug_device, 1450 m_debug_device && nonsemantic_debug_info); 1451 dyn_libs::shaderc_compile_options_set_optimization_level( 1452 options, optimization ? shaderc_optimization_level_performance : shaderc_optimization_level_zero); 1453 1454 const shaderc_compilation_result_t result = 1455 dyn_libs::shaderc_compile_into_spv(dyn_libs::s_shaderc_compiler, source.data(), source.length(), 1456 stage_kinds[static_cast<size_t>(stage)], "source", entry_point, options); 1457 const shaderc_compilation_status status = 1458 result ? dyn_libs::shaderc_result_get_compilation_status(result) : shaderc_compilation_status_internal_error; 1459 if (status != shaderc_compilation_status_success) 1460 { 1461 const std::string_view errors(result ? dyn_libs::shaderc_result_get_error_message(result) : "null result object"); 1462 Error::SetStringFmt(error, "Failed to compile shader to SPIR-V: {}\n{}", 1463 dyn_libs::shaderc_compilation_status_to_string(status), errors); 1464 ERROR_LOG("Failed to compile shader to SPIR-V: {}\n{}", dyn_libs::shaderc_compilation_status_to_string(status), 1465 errors); 1466 DumpBadShader(source, errors); 1467 } 1468 else 1469 { 1470 const size_t num_warnings = dyn_libs::shaderc_result_get_num_warnings(result); 1471 if (num_warnings > 0) 1472 WARNING_LOG("Shader compiled with warnings:\n{}", dyn_libs::shaderc_result_get_error_message(result)); 1473 1474 const size_t spirv_size = dyn_libs::shaderc_result_get_length(result); 1475 DebugAssert(spirv_size > 0); 1476 out_binary->resize(spirv_size); 1477 std::memcpy(out_binary->data(), dyn_libs::shaderc_result_get_bytes(result), spirv_size); 1478 } 1479 1480 dyn_libs::shaderc_result_release(result); 1481 dyn_libs::shaderc_compile_options_release(options); 1482 return (status == shaderc_compilation_status_success); 1483 } 1484 1485 bool GPUDevice::TranslateVulkanSpvToLanguage(const std::span<const u8> spirv, GPUShaderStage stage, 1486 GPUShaderLanguage target_language, u32 target_version, std::string* output, 1487 Error* error) 1488 { 1489 if (!dyn_libs::OpenSpirvCross(error)) 1490 return false; 1491 1492 spvc_context sctx; 1493 spvc_result sres; 1494 if ((sres = dyn_libs::spvc_context_create(&sctx)) != SPVC_SUCCESS) 1495 { 1496 Error::SetStringFmt(error, "spvc_context_create() failed: {}", static_cast<int>(sres)); 1497 return false; 1498 } 1499 1500 const ScopedGuard sctx_guard = [&sctx]() { dyn_libs::spvc_context_destroy(sctx); }; 1501 1502 dyn_libs::spvc_context_set_error_callback( 1503 sctx, 1504 [](void* error, const char* errormsg) { 1505 ERROR_LOG("SPIRV-Cross reported an error: {}", errormsg); 1506 Error::SetStringView(static_cast<Error*>(error), errormsg); 1507 }, 1508 error); 1509 1510 spvc_parsed_ir sir; 1511 if ((sres = dyn_libs::spvc_context_parse_spirv(sctx, reinterpret_cast<const u32*>(spirv.data()), spirv.size() / 4, 1512 &sir)) != SPVC_SUCCESS) 1513 { 1514 Error::SetStringFmt(error, "spvc_context_parse_spirv() failed: {}", static_cast<int>(sres)); 1515 return {}; 1516 } 1517 1518 static constexpr std::array<spvc_backend, static_cast<size_t>(GPUShaderLanguage::Count)> backends = { 1519 {SPVC_BACKEND_NONE, SPVC_BACKEND_HLSL, SPVC_BACKEND_GLSL, SPVC_BACKEND_GLSL, SPVC_BACKEND_GLSL, SPVC_BACKEND_MSL, 1520 SPVC_BACKEND_NONE}}; 1521 1522 spvc_compiler scompiler; 1523 if ((sres = dyn_libs::spvc_context_create_compiler(sctx, backends[static_cast<size_t>(target_language)], sir, 1524 SPVC_CAPTURE_MODE_TAKE_OWNERSHIP, &scompiler)) != SPVC_SUCCESS) 1525 { 1526 Error::SetStringFmt(error, "spvc_context_create_compiler() failed: {}", static_cast<int>(sres)); 1527 return {}; 1528 } 1529 1530 spvc_compiler_options soptions; 1531 if ((sres = dyn_libs::spvc_compiler_create_compiler_options(scompiler, &soptions)) != SPVC_SUCCESS) 1532 { 1533 Error::SetStringFmt(error, "spvc_compiler_create_compiler_options() failed: {}", static_cast<int>(sres)); 1534 return {}; 1535 } 1536 1537 spvc_resources resources; 1538 if ((sres = dyn_libs::spvc_compiler_create_shader_resources(scompiler, &resources)) != SPVC_SUCCESS) 1539 { 1540 Error::SetStringFmt(error, "spvc_compiler_create_shader_resources() failed: {}", static_cast<int>(sres)); 1541 return {}; 1542 } 1543 1544 // Need to know if there's UBOs for mapping. 1545 const spvc_reflected_resource *ubos, *textures; 1546 size_t ubos_count, textures_count; 1547 if ((sres = dyn_libs::spvc_resources_get_resource_list_for_type(resources, SPVC_RESOURCE_TYPE_UNIFORM_BUFFER, &ubos, 1548 &ubos_count)) != SPVC_SUCCESS || 1549 (sres = dyn_libs::spvc_resources_get_resource_list_for_type(resources, SPVC_RESOURCE_TYPE_SAMPLED_IMAGE, 1550 &textures, &textures_count)) != SPVC_SUCCESS) 1551 { 1552 Error::SetStringFmt(error, "spvc_resources_get_resource_list_for_type() failed: {}", static_cast<int>(sres)); 1553 return {}; 1554 } 1555 1556 [[maybe_unused]] const SpvExecutionModel execmodel = dyn_libs::spvc_compiler_get_execution_model(scompiler); 1557 1558 switch (target_language) 1559 { 1560 #ifdef _WIN32 1561 case GPUShaderLanguage::HLSL: 1562 { 1563 if ((sres = dyn_libs::spvc_compiler_options_set_uint(soptions, SPVC_COMPILER_OPTION_HLSL_SHADER_MODEL, 1564 target_version)) != SPVC_SUCCESS) 1565 { 1566 Error::SetStringFmt(error, "spvc_compiler_options_set_uint(SPVC_COMPILER_OPTION_HLSL_SHADER_MODEL) failed: {}", 1567 static_cast<int>(sres)); 1568 return {}; 1569 } 1570 1571 if ((sres = dyn_libs::spvc_compiler_options_set_bool( 1572 soptions, SPVC_COMPILER_OPTION_HLSL_SUPPORT_NONZERO_BASE_VERTEX_BASE_INSTANCE, false)) != SPVC_SUCCESS) 1573 { 1574 Error::SetStringFmt(error, 1575 "spvc_compiler_options_set_bool(SPVC_COMPILER_OPTION_HLSL_SUPPORT_NONZERO_BASE_VERTEX_" 1576 "BASE_INSTANCE) failed: {}", 1577 static_cast<int>(sres)); 1578 return {}; 1579 } 1580 1581 u32 start_set = 0; 1582 if (ubos_count > 0) 1583 { 1584 const spvc_hlsl_resource_binding rb = {.stage = execmodel, 1585 .desc_set = start_set++, 1586 .binding = 0, 1587 .cbv = {.register_space = 0, .register_binding = 0}}; 1588 if ((sres = dyn_libs::spvc_compiler_hlsl_add_resource_binding(scompiler, &rb)) != SPVC_SUCCESS) 1589 { 1590 Error::SetStringFmt(error, "spvc_compiler_hlsl_add_resource_binding() failed: {}", static_cast<int>(sres)); 1591 return {}; 1592 } 1593 } 1594 1595 if (textures_count > 0) 1596 { 1597 for (u32 i = 0; i < MAX_TEXTURE_SAMPLERS; i++) 1598 { 1599 const spvc_hlsl_resource_binding rb = {.stage = execmodel, 1600 .desc_set = start_set++, 1601 .binding = i, 1602 .srv = {.register_space = 0, .register_binding = i}, 1603 .sampler = {.register_space = 0, .register_binding = i}}; 1604 if ((sres = dyn_libs::spvc_compiler_hlsl_add_resource_binding(scompiler, &rb)) != SPVC_SUCCESS) 1605 { 1606 Error::SetStringFmt(error, "spvc_compiler_hlsl_add_resource_binding() failed: {}", static_cast<int>(sres)); 1607 return {}; 1608 } 1609 } 1610 } 1611 } 1612 break; 1613 #endif 1614 1615 #ifdef ENABLE_OPENGL 1616 case GPUShaderLanguage::GLSL: 1617 case GPUShaderLanguage::GLSLES: 1618 { 1619 if ((sres = dyn_libs::spvc_compiler_options_set_uint(soptions, SPVC_COMPILER_OPTION_GLSL_VERSION, 1620 target_version)) != SPVC_SUCCESS) 1621 { 1622 Error::SetStringFmt(error, "spvc_compiler_options_set_uint(SPVC_COMPILER_OPTION_GLSL_VERSION) failed: {}", 1623 static_cast<int>(sres)); 1624 return {}; 1625 } 1626 1627 const bool is_gles = (target_language == GPUShaderLanguage::GLSLES); 1628 if ((sres = dyn_libs::spvc_compiler_options_set_bool(soptions, SPVC_COMPILER_OPTION_GLSL_ES, is_gles)) != 1629 SPVC_SUCCESS) 1630 { 1631 Error::SetStringFmt(error, "spvc_compiler_options_set_bool(SPVC_COMPILER_OPTION_GLSL_ES) failed: {}", 1632 static_cast<int>(sres)); 1633 return {}; 1634 } 1635 1636 const bool enable_420pack = (is_gles ? (target_version >= 310) : (target_version >= 420)); 1637 if ((sres = dyn_libs::spvc_compiler_options_set_bool(soptions, SPVC_COMPILER_OPTION_GLSL_ENABLE_420PACK_EXTENSION, 1638 enable_420pack)) != SPVC_SUCCESS) 1639 { 1640 Error::SetStringFmt( 1641 error, "spvc_compiler_options_set_bool(SPVC_COMPILER_OPTION_GLSL_ENABLE_420PACK_EXTENSION) failed: {}", 1642 static_cast<int>(sres)); 1643 return {}; 1644 } 1645 } 1646 break; 1647 #endif 1648 1649 #ifdef __APPLE__ 1650 case GPUShaderLanguage::MSL: 1651 { 1652 if ((sres = dyn_libs::spvc_compiler_options_set_bool( 1653 soptions, SPVC_COMPILER_OPTION_MSL_PAD_FRAGMENT_OUTPUT_COMPONENTS, true)) != SPVC_SUCCESS) 1654 { 1655 Error::SetStringFmt( 1656 error, "spvc_compiler_options_set_bool(SPVC_COMPILER_OPTION_MSL_PAD_FRAGMENT_OUTPUT_COMPONENTS) failed: {}", 1657 static_cast<int>(sres)); 1658 return {}; 1659 } 1660 1661 if ((sres = dyn_libs::spvc_compiler_options_set_bool(soptions, SPVC_COMPILER_OPTION_MSL_FRAMEBUFFER_FETCH_SUBPASS, 1662 m_features.framebuffer_fetch)) != SPVC_SUCCESS) 1663 { 1664 Error::SetStringFmt( 1665 error, "spvc_compiler_options_set_bool(SPVC_COMPILER_OPTION_MSL_FRAMEBUFFER_FETCH_SUBPASS) failed: {}", 1666 static_cast<int>(sres)); 1667 return {}; 1668 } 1669 1670 if (m_features.framebuffer_fetch && 1671 ((sres = dyn_libs::spvc_compiler_options_set_uint(soptions, SPVC_COMPILER_OPTION_MSL_VERSION, 1672 SPVC_MAKE_MSL_VERSION(2, 3, 0))) != SPVC_SUCCESS)) 1673 { 1674 Error::SetStringFmt(error, "spvc_compiler_options_set_uint(SPVC_COMPILER_OPTION_MSL_VERSION) failed: {}", 1675 static_cast<int>(sres)); 1676 return {}; 1677 } 1678 1679 if (stage == GPUShaderStage::Fragment) 1680 { 1681 for (u32 i = 0; i < MAX_TEXTURE_SAMPLERS; i++) 1682 { 1683 const spvc_msl_resource_binding rb = {.stage = SpvExecutionModelFragment, 1684 .desc_set = 1, 1685 .binding = i, 1686 .msl_buffer = i, 1687 .msl_texture = i, 1688 .msl_sampler = i}; 1689 1690 if ((sres = dyn_libs::spvc_compiler_msl_add_resource_binding(scompiler, &rb)) != SPVC_SUCCESS) 1691 { 1692 Error::SetStringFmt(error, "spvc_compiler_msl_add_resource_binding() failed: {}", static_cast<int>(sres)); 1693 return {}; 1694 } 1695 } 1696 1697 if (!m_features.framebuffer_fetch) 1698 { 1699 const spvc_msl_resource_binding rb = { 1700 .stage = SpvExecutionModelFragment, .desc_set = 2, .binding = 0, .msl_texture = MAX_TEXTURE_SAMPLERS}; 1701 1702 if ((sres = dyn_libs::spvc_compiler_msl_add_resource_binding(scompiler, &rb)) != SPVC_SUCCESS) 1703 { 1704 Error::SetStringFmt(error, "spvc_compiler_msl_add_resource_binding() for FB failed: {}", 1705 static_cast<int>(sres)); 1706 return {}; 1707 } 1708 } 1709 } 1710 } 1711 break; 1712 #endif 1713 1714 default: 1715 Error::SetStringFmt(error, "Unsupported target language {}.", ShaderLanguageToString(target_language)); 1716 break; 1717 } 1718 1719 if ((sres = dyn_libs::spvc_compiler_install_compiler_options(scompiler, soptions)) != SPVC_SUCCESS) 1720 { 1721 Error::SetStringFmt(error, "spvc_compiler_install_compiler_options() failed: {}", static_cast<int>(sres)); 1722 return false; 1723 } 1724 1725 const char* out_src; 1726 if ((sres = dyn_libs::spvc_compiler_compile(scompiler, &out_src)) != SPVC_SUCCESS) 1727 { 1728 Error::SetStringFmt(error, "spvc_compiler_compile() failed: {}", static_cast<int>(sres)); 1729 return false; 1730 } 1731 1732 const size_t out_src_length = out_src ? std::strlen(out_src) : 0; 1733 if (out_src_length == 0) 1734 { 1735 Error::SetStringView(error, "Failed to compile SPIR-V to target language."); 1736 return false; 1737 } 1738 1739 output->assign(out_src, out_src_length); 1740 return true; 1741 } 1742 1743 std::unique_ptr<GPUShader> GPUDevice::TranspileAndCreateShaderFromSource( 1744 GPUShaderStage stage, GPUShaderLanguage source_language, std::string_view source, const char* entry_point, 1745 GPUShaderLanguage target_language, u32 target_version, DynamicHeapArray<u8>* out_binary, Error* error) 1746 { 1747 // Disable optimization when targeting OpenGL GLSL, otherwise, the name-based linking will fail. 1748 const bool optimization = 1749 (target_language != GPUShaderLanguage::GLSL && target_language != GPUShaderLanguage::GLSLES); 1750 DynamicHeapArray<u8> spv; 1751 if (!CompileGLSLShaderToVulkanSpv(stage, source_language, source, entry_point, optimization, false, &spv, error)) 1752 return {}; 1753 1754 std::string dest_source; 1755 if (!TranslateVulkanSpvToLanguage(spv.cspan(), stage, target_language, target_version, &dest_source, error)) 1756 return {}; 1757 1758 // TODO: MSL needs entry point suffixed. 1759 1760 return CreateShaderFromSource(stage, target_language, dest_source, entry_point, out_binary, error); 1761 }