d3d12_device.cpp (77993B)
1 // SPDX-FileCopyrightText: 2019-2024 Connor McLaughlin <stenzek@gmail.com> 2 // SPDX-License-Identifier: (GPL-3.0 OR PolyForm-Strict-1.0.0) 3 4 #include "d3d12_device.h" 5 #include "d3d12_builders.h" 6 #include "d3d12_pipeline.h" 7 #include "d3d12_stream_buffer.h" 8 #include "d3d12_texture.h" 9 #include "d3d_common.h" 10 11 #include "core/host.h" 12 13 #include "common/align.h" 14 #include "common/assert.h" 15 #include "common/bitutils.h" 16 #include "common/error.h" 17 #include "common/file_system.h" 18 #include "common/log.h" 19 #include "common/path.h" 20 #include "common/scoped_guard.h" 21 #include "common/small_string.h" 22 #include "common/string_util.h" 23 24 #include "D3D12MemAlloc.h" 25 #include "fmt/format.h" 26 27 #include <limits> 28 #include <mutex> 29 30 Log_SetChannel(D3D12Device); 31 32 // Tweakables 33 enum : u32 34 { 35 MAX_DRAW_CALLS_PER_FRAME = 2048, 36 MAX_DESCRIPTORS_PER_FRAME = 32768, 37 MAX_SAMPLERS_PER_FRAME = D3D12_MAX_SHADER_VISIBLE_SAMPLER_HEAP_SIZE, 38 MAX_DESCRIPTOR_SETS_PER_FRAME = MAX_DRAW_CALLS_PER_FRAME, 39 40 MAX_PERSISTENT_DESCRIPTORS = 2048, 41 MAX_PERSISTENT_RTVS = 512, 42 MAX_PERSISTENT_DSVS = 128, 43 MAX_PERSISTENT_SAMPLERS = 512, 44 45 VERTEX_BUFFER_SIZE = 32 * 1024 * 1024, 46 INDEX_BUFFER_SIZE = 16 * 1024 * 1024, 47 VERTEX_UNIFORM_BUFFER_SIZE = 8 * 1024 * 1024, 48 FRAGMENT_UNIFORM_BUFFER_SIZE = 8 * 1024 * 1024, 49 TEXTURE_BUFFER_SIZE = 64 * 1024 * 1024, 50 51 // UNIFORM_PUSH_CONSTANTS_STAGES = VK_SHADER_STAGE_VERTEX_BIT | VK_SHADER_STAGE_FRAGMENT_BIT, 52 UNIFORM_PUSH_CONSTANTS_SIZE = 128, 53 54 MAX_UNIFORM_BUFFER_SIZE = 1024, 55 }; 56 57 // We need to synchronize instance creation because of adapter enumeration from the UI thread. 58 static std::mutex s_instance_mutex; 59 60 static constexpr GPUTexture::Format s_swap_chain_format = GPUTexture::Format::RGBA8; 61 62 // We just need to keep this alive, never reference it. 63 static DynamicHeapArray<u8> s_pipeline_cache_data; 64 65 #ifdef _DEBUG 66 #include "WinPixEventRuntime/pix3.h" 67 static u32 s_debug_scope_depth = 0; 68 #endif 69 70 D3D12Device::D3D12Device() 71 { 72 #ifdef _DEBUG 73 s_debug_scope_depth = 0; 74 #endif 75 } 76 77 D3D12Device::~D3D12Device() 78 { 79 Assert(!m_device); 80 Assert(s_pipeline_cache_data.empty()); 81 } 82 83 D3D12Device::ComPtr<ID3DBlob> D3D12Device::SerializeRootSignature(const D3D12_ROOT_SIGNATURE_DESC* desc, Error* error) 84 { 85 ComPtr<ID3DBlob> blob; 86 ComPtr<ID3DBlob> error_blob; 87 const HRESULT hr = 88 D3D12SerializeRootSignature(desc, D3D_ROOT_SIGNATURE_VERSION_1, blob.GetAddressOf(), error_blob.GetAddressOf()); 89 if (FAILED(hr)) [[unlikely]] 90 { 91 Error::SetHResult(error, "D3D12SerializeRootSignature() failed: ", hr); 92 if (error_blob) 93 ERROR_LOG(static_cast<const char*>(error_blob->GetBufferPointer())); 94 95 return {}; 96 } 97 98 return blob; 99 } 100 101 D3D12Device::ComPtr<ID3D12RootSignature> D3D12Device::CreateRootSignature(const D3D12_ROOT_SIGNATURE_DESC* desc, 102 Error* error) 103 { 104 ComPtr<ID3DBlob> blob = SerializeRootSignature(desc, error); 105 if (!blob) 106 return {}; 107 108 ComPtr<ID3D12RootSignature> rs; 109 const HRESULT hr = 110 m_device->CreateRootSignature(0, blob->GetBufferPointer(), blob->GetBufferSize(), IID_PPV_ARGS(rs.GetAddressOf())); 111 if (FAILED(hr)) [[unlikely]] 112 { 113 Error::SetHResult(error, "CreateRootSignature() failed: ", hr); 114 return {}; 115 } 116 117 return rs; 118 } 119 120 bool D3D12Device::CreateDevice(std::string_view adapter, bool threaded_presentation, 121 std::optional<bool> exclusive_fullscreen_control, FeatureMask disabled_features, 122 Error* error) 123 { 124 std::unique_lock lock(s_instance_mutex); 125 126 m_dxgi_factory = D3DCommon::CreateFactory(m_debug_device, error); 127 if (!m_dxgi_factory) 128 return false; 129 130 m_adapter = D3DCommon::GetAdapterByName(m_dxgi_factory.Get(), adapter); 131 132 HRESULT hr = S_OK; 133 134 // Enabling the debug layer will fail if the Graphics Tools feature is not installed. 135 if (m_debug_device) 136 { 137 ComPtr<ID3D12Debug> debug12; 138 hr = D3D12GetDebugInterface(IID_PPV_ARGS(debug12.GetAddressOf())); 139 if (SUCCEEDED(hr)) 140 { 141 debug12->EnableDebugLayer(); 142 } 143 else 144 { 145 ERROR_LOG("Debug layer requested but not available."); 146 m_debug_device = false; 147 } 148 } 149 150 // Create the actual device. 151 for (D3D_FEATURE_LEVEL try_feature_level : {D3D_FEATURE_LEVEL_11_0}) 152 { 153 hr = D3D12CreateDevice(m_adapter.Get(), try_feature_level, IID_PPV_ARGS(&m_device)); 154 if (SUCCEEDED(hr)) 155 { 156 m_feature_level = try_feature_level; 157 break; 158 } 159 } 160 if (FAILED(hr)) 161 { 162 Error::SetHResult(error, "Failed to create D3D12 device: ", hr); 163 return false; 164 } 165 166 if (!m_adapter) 167 { 168 const LUID luid(m_device->GetAdapterLuid()); 169 if (FAILED(m_dxgi_factory->EnumAdapterByLuid(luid, IID_PPV_ARGS(m_adapter.GetAddressOf())))) 170 ERROR_LOG("Failed to get lookup adapter by device LUID"); 171 } 172 173 if (m_debug_device) 174 { 175 ComPtr<ID3D12InfoQueue> info_queue; 176 if (SUCCEEDED(m_device.As(&info_queue))) 177 { 178 if (IsDebuggerPresent()) 179 { 180 info_queue->SetBreakOnSeverity(D3D12_MESSAGE_SEVERITY_ERROR, TRUE); 181 info_queue->SetBreakOnSeverity(D3D12_MESSAGE_SEVERITY_WARNING, TRUE); 182 } 183 184 D3D12_INFO_QUEUE_FILTER filter = {}; 185 std::array<D3D12_MESSAGE_ID, 6> id_list{ 186 D3D12_MESSAGE_ID_CLEARRENDERTARGETVIEW_MISMATCHINGCLEARVALUE, 187 D3D12_MESSAGE_ID_CLEARDEPTHSTENCILVIEW_MISMATCHINGCLEARVALUE, 188 D3D12_MESSAGE_ID_CREATEGRAPHICSPIPELINESTATE_RENDERTARGETVIEW_NOT_SET, 189 D3D12_MESSAGE_ID_CREATEINPUTLAYOUT_TYPE_MISMATCH, 190 D3D12_MESSAGE_ID_DRAW_EMPTY_SCISSOR_RECTANGLE, 191 D3D12_MESSAGE_ID_LOADPIPELINE_NAMENOTFOUND, 192 }; 193 filter.DenyList.NumIDs = static_cast<UINT>(id_list.size()); 194 filter.DenyList.pIDList = id_list.data(); 195 info_queue->PushStorageFilter(&filter); 196 } 197 } 198 199 const D3D12_COMMAND_QUEUE_DESC queue_desc = {D3D12_COMMAND_LIST_TYPE_DIRECT, D3D12_COMMAND_QUEUE_PRIORITY_NORMAL, 200 D3D12_COMMAND_QUEUE_FLAG_NONE, 0u}; 201 hr = m_device->CreateCommandQueue(&queue_desc, IID_PPV_ARGS(&m_command_queue)); 202 if (FAILED(hr)) 203 { 204 Error::SetHResult(error, "Failed to create command queue: ", hr); 205 return false; 206 } 207 208 D3D12MA::ALLOCATOR_DESC allocatorDesc = {}; 209 allocatorDesc.pDevice = m_device.Get(); 210 allocatorDesc.pAdapter = m_adapter.Get(); 211 allocatorDesc.Flags = 212 D3D12MA::ALLOCATOR_FLAG_SINGLETHREADED | 213 D3D12MA::ALLOCATOR_FLAG_DEFAULT_POOLS_NOT_ZEROED /* | D3D12MA::ALLOCATOR_FLAG_ALWAYS_COMMITTED*/; 214 215 hr = D3D12MA::CreateAllocator(&allocatorDesc, m_allocator.GetAddressOf()); 216 if (FAILED(hr)) 217 { 218 Error::SetHResult(error, "D3D12MA::CreateAllocator() failed: ", hr); 219 return false; 220 } 221 222 hr = m_device->CreateFence(m_completed_fence_value, D3D12_FENCE_FLAG_NONE, IID_PPV_ARGS(&m_fence)); 223 if (FAILED(hr)) 224 { 225 Error::SetHResult(error, "Failed to create fence: ", hr); 226 return false; 227 } 228 229 m_fence_event = CreateEvent(nullptr, FALSE, FALSE, nullptr); 230 if (m_fence_event == NULL) 231 { 232 Error::SetWin32(error, "Failed to create fence event: ", GetLastError()); 233 return false; 234 } 235 236 SetFeatures(disabled_features); 237 238 if (!CreateCommandLists(error) || !CreateDescriptorHeaps(error)) 239 return false; 240 241 if (!m_window_info.IsSurfaceless() && !CreateSwapChain(error)) 242 return false; 243 244 if (!CreateRootSignatures(error) || !CreateBuffers(error)) 245 return false; 246 247 CreateTimestampQuery(); 248 return true; 249 } 250 251 void D3D12Device::DestroyDevice() 252 { 253 std::unique_lock lock(s_instance_mutex); 254 255 // Toss command list if we're recording... 256 if (InRenderPass()) 257 EndRenderPass(); 258 259 WaitForGPUIdle(); 260 261 DestroyDeferredObjects(m_current_fence_value); 262 DestroySamplers(); 263 DestroyTimestampQuery(); 264 DestroyBuffers(); 265 DestroyDescriptorHeaps(); 266 DestroyRootSignatures(); 267 DestroySwapChain(); 268 DestroyCommandLists(); 269 270 m_pipeline_library.Reset(); 271 s_pipeline_cache_data.deallocate(); 272 m_fence.Reset(); 273 if (m_fence_event != NULL) 274 { 275 CloseHandle(m_fence_event); 276 m_fence_event = NULL; 277 } 278 279 m_allocator.Reset(); 280 m_command_queue.Reset(); 281 m_device.Reset(); 282 m_adapter.Reset(); 283 m_dxgi_factory.Reset(); 284 } 285 286 bool D3D12Device::ReadPipelineCache(std::optional<DynamicHeapArray<u8>> data) 287 { 288 HRESULT hr = 289 m_device->CreatePipelineLibrary(data.has_value() ? data->data() : nullptr, data.has_value() ? data->size() : 0, 290 IID_PPV_ARGS(m_pipeline_library.ReleaseAndGetAddressOf())); 291 if (SUCCEEDED(hr)) 292 { 293 if (data.has_value()) 294 s_pipeline_cache_data = std::move(data.value()); 295 296 return true; 297 } 298 299 // Try without the cache data. 300 if (data.has_value()) 301 { 302 WARNING_LOG("CreatePipelineLibrary() failed, trying without cache data. Error: {}", 303 Error::CreateHResult(hr).GetDescription()); 304 305 hr = m_device->CreatePipelineLibrary(nullptr, 0, IID_PPV_ARGS(m_pipeline_library.ReleaseAndGetAddressOf())); 306 if (SUCCEEDED(hr)) 307 return true; 308 } 309 310 if (FAILED(hr)) 311 { 312 WARNING_LOG("CreatePipelineLibrary() failed, pipeline caching will not be available. Error: {}", 313 Error::CreateHResult(hr).GetDescription()); 314 return false; 315 } 316 317 return true; 318 } 319 320 bool D3D12Device::GetPipelineCacheData(DynamicHeapArray<u8>* data) 321 { 322 if (!m_pipeline_library) 323 return false; 324 325 const size_t size = m_pipeline_library->GetSerializedSize(); 326 if (size == 0) 327 { 328 WARNING_LOG("Empty serialized pipeline state returned."); 329 return true; 330 } 331 332 data->resize(size); 333 const HRESULT hr = m_pipeline_library->Serialize(data->data(), data->size()); 334 if (FAILED(hr)) 335 { 336 ERROR_LOG("Serialize() failed with HRESULT {:08X}", static_cast<unsigned>(hr)); 337 data->deallocate(); 338 return false; 339 } 340 341 return true; 342 } 343 344 bool D3D12Device::CreateCommandLists(Error* error) 345 { 346 for (u32 i = 0; i < NUM_COMMAND_LISTS; i++) 347 { 348 CommandList& res = m_command_lists[i]; 349 HRESULT hr; 350 351 for (u32 j = 0; j < 2; j++) 352 { 353 hr = m_device->CreateCommandAllocator(D3D12_COMMAND_LIST_TYPE_DIRECT, 354 IID_PPV_ARGS(res.command_allocators[j].GetAddressOf())); 355 if (FAILED(hr)) 356 { 357 Error::SetHResult(error, "CreateCommandAllocator() failed: ", hr); 358 return false; 359 } 360 361 hr = m_device->CreateCommandList(0, D3D12_COMMAND_LIST_TYPE_DIRECT, res.command_allocators[j].Get(), nullptr, 362 IID_PPV_ARGS(res.command_lists[j].GetAddressOf())); 363 if (FAILED(hr)) 364 { 365 Error::SetHResult(error, "CreateCommandList() failed: ", hr); 366 return false; 367 } 368 369 // Close the command lists, since the first thing we do is reset them. 370 hr = res.command_lists[j]->Close(); 371 if (FAILED(hr)) 372 { 373 Error::SetHResult(error, "Close() for new command list failed: ", hr); 374 return false; 375 } 376 } 377 378 if (!res.descriptor_allocator.Create(m_device.Get(), D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV, 379 MAX_DESCRIPTORS_PER_FRAME, error)) 380 { 381 Error::AddPrefix(error, "Failed to create per frame descriptor allocator: "); 382 return false; 383 } 384 385 if (!res.sampler_allocator.Create(m_device.Get(), MAX_SAMPLERS_PER_FRAME, error)) 386 { 387 Error::AddPrefix(error, "Failed to create per frame sampler allocator: "); 388 return false; 389 } 390 } 391 392 MoveToNextCommandList(); 393 return true; 394 } 395 396 void D3D12Device::MoveToNextCommandList() 397 { 398 m_current_command_list = (m_current_command_list + 1) % NUM_COMMAND_LISTS; 399 m_current_fence_value++; 400 401 // We may have to wait if this command list hasn't finished on the GPU. 402 CommandList& res = m_command_lists[m_current_command_list]; 403 WaitForFence(res.fence_counter); 404 res.fence_counter = m_current_fence_value; 405 res.init_list_used = false; 406 407 // Begin command list. 408 res.command_allocators[1]->Reset(); 409 res.command_lists[1]->Reset(res.command_allocators[1].Get(), nullptr); 410 res.descriptor_allocator.Reset(); 411 if (res.sampler_allocator.ShouldReset()) 412 res.sampler_allocator.Reset(); 413 414 if (res.has_timestamp_query) 415 { 416 // readback timestamp from the last time this cmdlist was used. 417 // we don't need to worry about disjoint in dx12, the frequency is reliable within a single cmdlist. 418 const u32 offset = (m_current_command_list * (sizeof(u64) * NUM_TIMESTAMP_QUERIES_PER_CMDLIST)); 419 const D3D12_RANGE read_range = {offset, offset + (sizeof(u64) * NUM_TIMESTAMP_QUERIES_PER_CMDLIST)}; 420 void* map; 421 HRESULT hr = m_timestamp_query_buffer->Map(0, &read_range, &map); 422 if (SUCCEEDED(hr)) 423 { 424 u64 timestamps[2]; 425 std::memcpy(timestamps, static_cast<const u8*>(map) + offset, sizeof(timestamps)); 426 m_accumulated_gpu_time += 427 static_cast<float>(static_cast<double>(timestamps[1] - timestamps[0]) / m_timestamp_frequency); 428 429 const D3D12_RANGE write_range = {}; 430 m_timestamp_query_buffer->Unmap(0, &write_range); 431 } 432 else 433 { 434 WARNING_LOG("Map() for timestamp query failed: {:08X}", static_cast<unsigned>(hr)); 435 } 436 } 437 438 res.has_timestamp_query = m_gpu_timing_enabled; 439 if (m_gpu_timing_enabled) 440 { 441 res.command_lists[1]->EndQuery(m_timestamp_query_heap.Get(), D3D12_QUERY_TYPE_TIMESTAMP, 442 m_current_command_list * NUM_TIMESTAMP_QUERIES_PER_CMDLIST); 443 } 444 445 ID3D12DescriptorHeap* heaps[2] = {res.descriptor_allocator.GetDescriptorHeap(), 446 res.sampler_allocator.GetDescriptorHeap()}; 447 res.command_lists[1]->SetDescriptorHeaps(static_cast<UINT>(std::size(heaps)), heaps); 448 449 m_allocator->SetCurrentFrameIndex(static_cast<UINT>(m_current_fence_value)); 450 InvalidateCachedState(); 451 } 452 453 void D3D12Device::DestroyCommandLists() 454 { 455 for (CommandList& resources : m_command_lists) 456 { 457 resources.descriptor_allocator.Destroy(); 458 resources.sampler_allocator.Destroy(); 459 for (u32 i = 0; i < 2; i++) 460 { 461 resources.command_lists[i].Reset(); 462 resources.command_allocators[i].Reset(); 463 } 464 } 465 } 466 467 bool D3D12Device::CreateDescriptorHeaps(Error* error) 468 { 469 if (!m_descriptor_heap_manager.Create(m_device.Get(), D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV, 470 MAX_PERSISTENT_DESCRIPTORS, false, error) || 471 !m_rtv_heap_manager.Create(m_device.Get(), D3D12_DESCRIPTOR_HEAP_TYPE_RTV, MAX_PERSISTENT_RTVS, false, error) || 472 !m_dsv_heap_manager.Create(m_device.Get(), D3D12_DESCRIPTOR_HEAP_TYPE_DSV, MAX_PERSISTENT_DSVS, false, error) || 473 !m_sampler_heap_manager.Create(m_device.Get(), D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER, MAX_PERSISTENT_SAMPLERS, false, 474 error)) 475 { 476 return false; 477 } 478 479 // Allocate null SRV descriptor for unbound textures. 480 static constexpr D3D12_SHADER_RESOURCE_VIEW_DESC null_srv_desc = { 481 DXGI_FORMAT_R8G8B8A8_UNORM, D3D12_SRV_DIMENSION_TEXTURE2D, D3D12_DEFAULT_SHADER_4_COMPONENT_MAPPING, {}}; 482 if (!m_descriptor_heap_manager.Allocate(&m_null_srv_descriptor)) 483 { 484 Error::SetStringView(error, "Failed to allocate null SRV descriptor"); 485 return false; 486 } 487 m_device->CreateShaderResourceView(nullptr, &null_srv_desc, m_null_srv_descriptor.cpu_handle); 488 489 // Same for UAVs. 490 static constexpr D3D12_UNORDERED_ACCESS_VIEW_DESC null_uav_desc = { 491 DXGI_FORMAT_R8G8B8A8_UNORM, D3D12_UAV_DIMENSION_TEXTURE2D, {}}; 492 if (!m_descriptor_heap_manager.Allocate(&m_null_uav_descriptor)) 493 { 494 Error::SetStringView(error, "Failed to allocate null UAV descriptor"); 495 return false; 496 } 497 m_device->CreateUnorderedAccessView(nullptr, nullptr, &null_uav_desc, m_null_uav_descriptor.cpu_handle); 498 499 // Same for samplers. 500 m_point_sampler = GetSampler(GPUSampler::GetNearestConfig()); 501 for (u32 i = 0; i < MAX_TEXTURE_SAMPLERS; i++) 502 m_current_samplers[i] = m_point_sampler; 503 return true; 504 } 505 506 void D3D12Device::DestroyDescriptorHeaps() 507 { 508 if (m_null_uav_descriptor) 509 m_descriptor_heap_manager.Free(&m_null_uav_descriptor); 510 if (m_null_srv_descriptor) 511 m_descriptor_heap_manager.Free(&m_null_srv_descriptor); 512 m_sampler_heap_manager.Destroy(); 513 m_dsv_heap_manager.Destroy(); 514 m_rtv_heap_manager.Destroy(); 515 m_descriptor_heap_manager.Destroy(); 516 } 517 518 ID3D12GraphicsCommandList4* D3D12Device::GetInitCommandList() 519 { 520 CommandList& res = m_command_lists[m_current_command_list]; 521 if (!res.init_list_used) 522 { 523 HRESULT hr = res.command_allocators[0]->Reset(); 524 AssertMsg(SUCCEEDED(hr), "Reset init command allocator failed"); 525 526 hr = res.command_lists[0]->Reset(res.command_allocators[0].Get(), nullptr); 527 AssertMsg(SUCCEEDED(hr), "Reset init command list failed"); 528 res.init_list_used = true; 529 } 530 531 return res.command_lists[0].Get(); 532 } 533 534 void D3D12Device::SubmitCommandList(bool wait_for_completion) 535 { 536 CommandList& res = m_command_lists[m_current_command_list]; 537 HRESULT hr; 538 539 if (res.has_timestamp_query) 540 { 541 // write the timestamp back at the end of the cmdlist 542 res.command_lists[1]->EndQuery(m_timestamp_query_heap.Get(), D3D12_QUERY_TYPE_TIMESTAMP, 543 (m_current_command_list * NUM_TIMESTAMP_QUERIES_PER_CMDLIST) + 1); 544 res.command_lists[1]->ResolveQueryData(m_timestamp_query_heap.Get(), D3D12_QUERY_TYPE_TIMESTAMP, 545 m_current_command_list * NUM_TIMESTAMP_QUERIES_PER_CMDLIST, 546 NUM_TIMESTAMP_QUERIES_PER_CMDLIST, m_timestamp_query_buffer.Get(), 547 m_current_command_list * (sizeof(u64) * NUM_TIMESTAMP_QUERIES_PER_CMDLIST)); 548 } 549 550 // TODO: error handling 551 if (res.init_list_used) 552 { 553 hr = res.command_lists[0]->Close(); 554 if (FAILED(hr)) [[unlikely]] 555 { 556 ERROR_LOG("Closing init command list failed with HRESULT {:08X}", static_cast<unsigned>(hr)); 557 Panic("TODO cannot continue"); 558 } 559 } 560 561 // Close and queue command list. 562 hr = res.command_lists[1]->Close(); 563 if (FAILED(hr)) [[unlikely]] 564 { 565 ERROR_LOG("Closing main command list failed with HRESULT {:08X}", static_cast<unsigned>(hr)); 566 Panic("TODO cannot continue"); 567 } 568 569 if (res.init_list_used) 570 { 571 const std::array<ID3D12CommandList*, 2> execute_lists{res.command_lists[0].Get(), res.command_lists[1].Get()}; 572 m_command_queue->ExecuteCommandLists(static_cast<UINT>(execute_lists.size()), execute_lists.data()); 573 } 574 else 575 { 576 const std::array<ID3D12CommandList*, 1> execute_lists{res.command_lists[1].Get()}; 577 m_command_queue->ExecuteCommandLists(static_cast<UINT>(execute_lists.size()), execute_lists.data()); 578 } 579 580 // Update fence when GPU has completed. 581 hr = m_command_queue->Signal(m_fence.Get(), res.fence_counter); 582 DebugAssertMsg(SUCCEEDED(hr), "Signal fence"); 583 584 MoveToNextCommandList(); 585 586 if (wait_for_completion) 587 WaitForFence(res.fence_counter); 588 } 589 590 void D3D12Device::SubmitCommandList(bool wait_for_completion, const std::string_view reason) 591 { 592 WARNING_LOG("Executing command buffer due to '{}'", reason); 593 SubmitCommandList(wait_for_completion); 594 } 595 596 void D3D12Device::SubmitCommandListAndRestartRenderPass(const std::string_view reason) 597 { 598 if (InRenderPass()) 599 EndRenderPass(); 600 601 D3D12Pipeline* pl = m_current_pipeline; 602 SubmitCommandList(false, reason); 603 604 SetPipeline(pl); 605 BeginRenderPass(); 606 } 607 608 void D3D12Device::WaitForFence(u64 fence) 609 { 610 if (m_completed_fence_value >= fence) 611 return; 612 613 // Try non-blocking check. 614 m_completed_fence_value = m_fence->GetCompletedValue(); 615 if (m_completed_fence_value < fence) 616 { 617 // Fall back to event. 618 HRESULT hr = m_fence->SetEventOnCompletion(fence, m_fence_event); 619 AssertMsg(SUCCEEDED(hr), "Set fence event on completion"); 620 WaitForSingleObject(m_fence_event, INFINITE); 621 m_completed_fence_value = m_fence->GetCompletedValue(); 622 } 623 624 // Release resources for as many command lists which have completed. 625 DestroyDeferredObjects(m_completed_fence_value); 626 } 627 628 void D3D12Device::WaitForGPUIdle() 629 { 630 u32 index = (m_current_command_list + 1) % NUM_COMMAND_LISTS; 631 for (u32 i = 0; i < (NUM_COMMAND_LISTS - 1); i++) 632 { 633 WaitForFence(m_command_lists[index].fence_counter); 634 index = (index + 1) % NUM_COMMAND_LISTS; 635 } 636 } 637 638 void D3D12Device::ExecuteAndWaitForGPUIdle() 639 { 640 if (InRenderPass()) 641 EndRenderPass(); 642 643 SubmitCommandList(true); 644 } 645 646 bool D3D12Device::CreateTimestampQuery() 647 { 648 constexpr u32 QUERY_COUNT = NUM_TIMESTAMP_QUERIES_PER_CMDLIST * NUM_COMMAND_LISTS; 649 constexpr u32 BUFFER_SIZE = sizeof(u64) * QUERY_COUNT; 650 651 const D3D12_QUERY_HEAP_DESC desc = {D3D12_QUERY_HEAP_TYPE_TIMESTAMP, QUERY_COUNT, 0u}; 652 HRESULT hr = m_device->CreateQueryHeap(&desc, IID_PPV_ARGS(m_timestamp_query_heap.GetAddressOf())); 653 if (FAILED(hr)) 654 { 655 ERROR_LOG("CreateQueryHeap() for timestamp failed with {:08X}", static_cast<unsigned>(hr)); 656 m_features.gpu_timing = false; 657 return false; 658 } 659 660 const D3D12MA::ALLOCATION_DESC allocation_desc = {D3D12MA::ALLOCATION_FLAG_NONE, D3D12_HEAP_TYPE_READBACK, 661 D3D12_HEAP_FLAG_NONE, nullptr, nullptr}; 662 const D3D12_RESOURCE_DESC resource_desc = {D3D12_RESOURCE_DIMENSION_BUFFER, 663 0, 664 BUFFER_SIZE, 665 1, 666 1, 667 1, 668 DXGI_FORMAT_UNKNOWN, 669 {1, 0}, 670 D3D12_TEXTURE_LAYOUT_ROW_MAJOR, 671 D3D12_RESOURCE_FLAG_NONE}; 672 hr = m_allocator->CreateResource(&allocation_desc, &resource_desc, D3D12_RESOURCE_STATE_COPY_DEST, nullptr, 673 m_timestamp_query_allocation.GetAddressOf(), 674 IID_PPV_ARGS(m_timestamp_query_buffer.GetAddressOf())); 675 if (FAILED(hr)) 676 { 677 ERROR_LOG("CreateResource() for timestamp failed with {:08X}", static_cast<unsigned>(hr)); 678 m_features.gpu_timing = false; 679 return false; 680 } 681 682 u64 frequency; 683 hr = m_command_queue->GetTimestampFrequency(&frequency); 684 if (FAILED(hr)) 685 { 686 ERROR_LOG("GetTimestampFrequency() failed: {:08X}", static_cast<unsigned>(hr)); 687 m_features.gpu_timing = false; 688 return false; 689 } 690 691 m_timestamp_frequency = static_cast<double>(frequency) / 1000.0; 692 return true; 693 } 694 695 void D3D12Device::DestroyTimestampQuery() 696 { 697 m_timestamp_query_buffer.Reset(); 698 m_timestamp_query_allocation.Reset(); 699 m_timestamp_query_heap.Reset(); 700 } 701 702 float D3D12Device::GetAndResetAccumulatedGPUTime() 703 { 704 const float time = m_accumulated_gpu_time; 705 m_accumulated_gpu_time = 0.0f; 706 return time; 707 } 708 709 bool D3D12Device::SetGPUTimingEnabled(bool enabled) 710 { 711 m_gpu_timing_enabled = enabled && m_features.gpu_timing; 712 return (enabled == m_gpu_timing_enabled); 713 } 714 715 void D3D12Device::DeferObjectDestruction(ComPtr<ID3D12Object> resource) 716 { 717 DebugAssert(resource); 718 m_cleanup_resources.emplace_back(GetCurrentFenceValue(), 719 std::pair<D3D12MA::Allocation*, ID3D12Object*>(nullptr, resource.Detach())); 720 } 721 722 void D3D12Device::DeferResourceDestruction(ComPtr<D3D12MA::Allocation> allocation, ComPtr<ID3D12Resource> resource) 723 { 724 DebugAssert(allocation && resource); 725 m_cleanup_resources.emplace_back( 726 GetCurrentFenceValue(), std::pair<D3D12MA::Allocation*, ID3D12Object*>(allocation.Detach(), resource.Detach())); 727 } 728 729 void D3D12Device::DeferDescriptorDestruction(D3D12DescriptorHeapManager& heap, D3D12DescriptorHandle* descriptor) 730 { 731 DebugAssert(descriptor->index != D3D12DescriptorHandle::INVALID_INDEX); 732 m_cleanup_descriptors.emplace_back(GetCurrentFenceValue(), 733 std::pair<D3D12DescriptorHeapManager*, D3D12DescriptorHandle>(&heap, *descriptor)); 734 descriptor->Clear(); 735 } 736 737 void D3D12Device::DestroyDeferredObjects(u64 fence_value) 738 { 739 while (!m_cleanup_descriptors.empty()) 740 { 741 auto& it = m_cleanup_descriptors.front(); 742 if (it.first > fence_value) 743 break; 744 745 it.second.first->Free(it.second.second.index); 746 m_cleanup_descriptors.pop_front(); 747 } 748 749 while (!m_cleanup_resources.empty()) 750 { 751 auto& it = m_cleanup_resources.front(); 752 if (it.first > fence_value) 753 break; 754 755 it.second.second->Release(); 756 if (it.second.first) 757 it.second.first->Release(); 758 m_cleanup_resources.pop_front(); 759 } 760 } 761 762 RenderAPI D3D12Device::GetRenderAPI() const 763 { 764 return RenderAPI::D3D12; 765 } 766 767 bool D3D12Device::HasSurface() const 768 { 769 return static_cast<bool>(m_swap_chain); 770 } 771 772 u32 D3D12Device::GetSwapChainBufferCount() const 773 { 774 // With vsync off, we only need two buffers. Same for blocking vsync. 775 // With triple buffering, we need three. 776 return (m_vsync_mode == GPUVSyncMode::Mailbox) ? 3 : 2; 777 } 778 779 bool D3D12Device::CreateSwapChain(Error* error) 780 { 781 if (m_window_info.type != WindowInfo::Type::Win32) 782 { 783 Error::SetStringView(error, "D3D12 expects a Win32 window."); 784 return false; 785 } 786 787 const D3DCommon::DXGIFormatMapping& fm = D3DCommon::GetFormatMapping(s_swap_chain_format); 788 789 const HWND window_hwnd = reinterpret_cast<HWND>(m_window_info.window_handle); 790 RECT client_rc{}; 791 GetClientRect(window_hwnd, &client_rc); 792 793 DXGI_MODE_DESC fullscreen_mode = {}; 794 ComPtr<IDXGIOutput> fullscreen_output; 795 if (Host::IsFullscreen()) 796 { 797 u32 fullscreen_width, fullscreen_height; 798 float fullscreen_refresh_rate; 799 m_is_exclusive_fullscreen = 800 GetRequestedExclusiveFullscreenMode(&fullscreen_width, &fullscreen_height, &fullscreen_refresh_rate) && 801 D3DCommon::GetRequestedExclusiveFullscreenModeDesc(m_dxgi_factory.Get(), client_rc, fullscreen_width, 802 fullscreen_height, fullscreen_refresh_rate, fm.resource_format, 803 &fullscreen_mode, fullscreen_output.GetAddressOf()); 804 805 // Using mailbox-style no-allow-tearing causes tearing in exclusive fullscreen. 806 if (m_vsync_mode == GPUVSyncMode::Mailbox && m_is_exclusive_fullscreen) 807 { 808 WARNING_LOG("Using FIFO instead of Mailbox vsync due to exclusive fullscreen."); 809 m_vsync_mode = GPUVSyncMode::FIFO; 810 } 811 } 812 else 813 { 814 m_is_exclusive_fullscreen = false; 815 } 816 817 DXGI_SWAP_CHAIN_DESC1 swap_chain_desc = {}; 818 swap_chain_desc.Width = static_cast<u32>(client_rc.right - client_rc.left); 819 swap_chain_desc.Height = static_cast<u32>(client_rc.bottom - client_rc.top); 820 swap_chain_desc.Format = fm.resource_format; 821 swap_chain_desc.SampleDesc.Count = 1; 822 swap_chain_desc.BufferCount = GetSwapChainBufferCount(); 823 swap_chain_desc.BufferUsage = DXGI_USAGE_RENDER_TARGET_OUTPUT; 824 swap_chain_desc.SwapEffect = DXGI_SWAP_EFFECT_FLIP_DISCARD; 825 826 m_using_allow_tearing = (m_allow_tearing_supported && !m_is_exclusive_fullscreen); 827 if (m_using_allow_tearing) 828 swap_chain_desc.Flags |= DXGI_SWAP_CHAIN_FLAG_ALLOW_TEARING; 829 830 HRESULT hr = S_OK; 831 832 if (m_is_exclusive_fullscreen) 833 { 834 DXGI_SWAP_CHAIN_DESC1 fs_sd_desc = swap_chain_desc; 835 DXGI_SWAP_CHAIN_FULLSCREEN_DESC fs_desc = {}; 836 837 fs_sd_desc.Flags = DXGI_SWAP_CHAIN_FLAG_ALLOW_MODE_SWITCH; 838 fs_sd_desc.Width = fullscreen_mode.Width; 839 fs_sd_desc.Height = fullscreen_mode.Height; 840 fs_desc.RefreshRate = fullscreen_mode.RefreshRate; 841 fs_desc.ScanlineOrdering = fullscreen_mode.ScanlineOrdering; 842 fs_desc.Scaling = fullscreen_mode.Scaling; 843 fs_desc.Windowed = FALSE; 844 845 VERBOSE_LOG("Creating a {}x{} exclusive fullscreen swap chain", fs_sd_desc.Width, fs_sd_desc.Height); 846 hr = m_dxgi_factory->CreateSwapChainForHwnd(m_command_queue.Get(), window_hwnd, &fs_sd_desc, &fs_desc, 847 fullscreen_output.Get(), m_swap_chain.ReleaseAndGetAddressOf()); 848 if (FAILED(hr)) 849 { 850 WARNING_LOG("Failed to create fullscreen swap chain, trying windowed."); 851 m_is_exclusive_fullscreen = false; 852 m_using_allow_tearing = m_allow_tearing_supported; 853 } 854 } 855 856 if (!m_is_exclusive_fullscreen) 857 { 858 VERBOSE_LOG("Creating a {}x{} windowed swap chain", swap_chain_desc.Width, swap_chain_desc.Height); 859 hr = m_dxgi_factory->CreateSwapChainForHwnd(m_command_queue.Get(), window_hwnd, &swap_chain_desc, nullptr, nullptr, 860 m_swap_chain.ReleaseAndGetAddressOf()); 861 if (FAILED(hr)) 862 { 863 Error::SetHResult(error, "CreateSwapChainForHwnd() failed: ", hr); 864 return false; 865 } 866 } 867 868 hr = m_dxgi_factory->MakeWindowAssociation(window_hwnd, DXGI_MWA_NO_WINDOW_CHANGES); 869 if (FAILED(hr)) 870 WARNING_LOG("MakeWindowAssociation() to disable ALT+ENTER failed"); 871 872 if (!CreateSwapChainRTV(error)) 873 { 874 DestroySwapChain(); 875 return false; 876 } 877 878 // Render a frame as soon as possible to clear out whatever was previously being displayed. 879 RenderBlankFrame(); 880 return true; 881 } 882 883 bool D3D12Device::CreateSwapChainRTV(Error* error) 884 { 885 DXGI_SWAP_CHAIN_DESC swap_chain_desc; 886 HRESULT hr = m_swap_chain->GetDesc(&swap_chain_desc); 887 if (FAILED(hr)) 888 { 889 Error::SetHResult(error, "GetDesc() for swap chain failed: ", hr); 890 return false; 891 } 892 893 const D3D12_RENDER_TARGET_VIEW_DESC rtv_desc = {swap_chain_desc.BufferDesc.Format, D3D12_RTV_DIMENSION_TEXTURE2D, {}}; 894 895 for (u32 i = 0; i < swap_chain_desc.BufferCount; i++) 896 { 897 ComPtr<ID3D12Resource> backbuffer; 898 hr = m_swap_chain->GetBuffer(i, IID_PPV_ARGS(backbuffer.GetAddressOf())); 899 if (FAILED(hr)) 900 { 901 Error::SetHResult(error, "GetBuffer for RTV failed: ", hr); 902 DestroySwapChainRTVs(); 903 return false; 904 } 905 906 D3D12::SetObjectName(backbuffer.Get(), TinyString::from_format("Swap Chain Buffer #{}", i)); 907 908 D3D12DescriptorHandle rtv; 909 if (!m_rtv_heap_manager.Allocate(&rtv)) 910 { 911 Error::SetStringView(error, "Failed to allocate RTV handle."); 912 DestroySwapChainRTVs(); 913 return false; 914 } 915 916 m_device->CreateRenderTargetView(backbuffer.Get(), &rtv_desc, rtv); 917 m_swap_chain_buffers.emplace_back(std::move(backbuffer), rtv); 918 } 919 920 m_window_info.surface_width = swap_chain_desc.BufferDesc.Width; 921 m_window_info.surface_height = swap_chain_desc.BufferDesc.Height; 922 m_window_info.surface_format = s_swap_chain_format; 923 VERBOSE_LOG("Swap chain buffer size: {}x{}", m_window_info.surface_width, m_window_info.surface_height); 924 925 if (m_window_info.type == WindowInfo::Type::Win32) 926 { 927 BOOL fullscreen = FALSE; 928 DXGI_SWAP_CHAIN_DESC desc; 929 if (SUCCEEDED(m_swap_chain->GetFullscreenState(&fullscreen, nullptr)) && fullscreen && 930 SUCCEEDED(m_swap_chain->GetDesc(&desc))) 931 { 932 m_window_info.surface_refresh_rate = static_cast<float>(desc.BufferDesc.RefreshRate.Numerator) / 933 static_cast<float>(desc.BufferDesc.RefreshRate.Denominator); 934 } 935 } 936 937 m_current_swap_chain_buffer = 0; 938 return true; 939 } 940 941 void D3D12Device::DestroySwapChainRTVs() 942 { 943 // Runtime gets cranky if we don't submit the current buffer... 944 if (InRenderPass()) 945 EndRenderPass(); 946 SubmitCommandList(true); 947 948 for (auto it = m_swap_chain_buffers.rbegin(); it != m_swap_chain_buffers.rend(); ++it) 949 { 950 m_rtv_heap_manager.Free(it->second.index); 951 it->first.Reset(); 952 } 953 m_swap_chain_buffers.clear(); 954 m_current_swap_chain_buffer = 0; 955 } 956 957 void D3D12Device::DestroySwapChain() 958 { 959 if (!m_swap_chain) 960 return; 961 962 DestroySwapChainRTVs(); 963 964 // switch out of fullscreen before destroying 965 BOOL is_fullscreen; 966 if (SUCCEEDED(m_swap_chain->GetFullscreenState(&is_fullscreen, nullptr)) && is_fullscreen) 967 m_swap_chain->SetFullscreenState(FALSE, nullptr); 968 969 m_swap_chain.Reset(); 970 m_is_exclusive_fullscreen = false; 971 } 972 973 void D3D12Device::RenderBlankFrame() 974 { 975 if (InRenderPass()) 976 EndRenderPass(); 977 978 auto& swap_chain_buf = m_swap_chain_buffers[m_current_swap_chain_buffer]; 979 ID3D12GraphicsCommandList4* cmdlist = GetCommandList(); 980 m_current_swap_chain_buffer = ((m_current_swap_chain_buffer + 1) % static_cast<u32>(m_swap_chain_buffers.size())); 981 D3D12Texture::TransitionSubresourceToState(cmdlist, swap_chain_buf.first.Get(), 0, D3D12_RESOURCE_STATE_COMMON, 982 D3D12_RESOURCE_STATE_RENDER_TARGET); 983 cmdlist->ClearRenderTargetView(swap_chain_buf.second, GSVector4::cxpr(0.0f, 0.0f, 0.0f, 1.0f).F32, 0, nullptr); 984 D3D12Texture::TransitionSubresourceToState(cmdlist, swap_chain_buf.first.Get(), 0, D3D12_RESOURCE_STATE_RENDER_TARGET, 985 D3D12_RESOURCE_STATE_PRESENT); 986 SubmitCommandList(false); 987 m_swap_chain->Present(0, m_using_allow_tearing ? DXGI_PRESENT_ALLOW_TEARING : 0); 988 } 989 990 bool D3D12Device::UpdateWindow() 991 { 992 WaitForGPUIdle(); 993 DestroySwapChain(); 994 995 if (!AcquireWindow(false)) 996 return false; 997 998 if (m_window_info.IsSurfaceless()) 999 return true; 1000 1001 Error error; 1002 if (!CreateSwapChain(&error)) 1003 { 1004 ERROR_LOG("Failed to create swap chain on updated window: {}", error.GetDescription()); 1005 return false; 1006 } 1007 1008 RenderBlankFrame(); 1009 return true; 1010 } 1011 1012 void D3D12Device::ResizeWindow(s32 new_window_width, s32 new_window_height, float new_window_scale) 1013 { 1014 if (!m_swap_chain) 1015 return; 1016 1017 m_window_info.surface_scale = new_window_scale; 1018 1019 if (m_window_info.surface_width == static_cast<u32>(new_window_width) && 1020 m_window_info.surface_height == static_cast<u32>(new_window_height)) 1021 { 1022 return; 1023 } 1024 1025 DestroySwapChainRTVs(); 1026 1027 HRESULT hr = m_swap_chain->ResizeBuffers(0, 0, 0, DXGI_FORMAT_UNKNOWN, 1028 m_using_allow_tearing ? DXGI_SWAP_CHAIN_FLAG_ALLOW_TEARING : 0); 1029 if (FAILED(hr)) 1030 ERROR_LOG("ResizeBuffers() failed: 0x{:08X}", static_cast<unsigned>(hr)); 1031 1032 Error error; 1033 if (!CreateSwapChainRTV(&error)) 1034 { 1035 ERROR_LOG("Failed to recreate swap chain RTV after resize", error.GetDescription()); 1036 Panic("Failed to recreate swap chain RTV after resize"); 1037 } 1038 } 1039 1040 void D3D12Device::DestroySurface() 1041 { 1042 DestroySwapChainRTVs(); 1043 DestroySwapChain(); 1044 } 1045 1046 bool D3D12Device::SupportsTextureFormat(GPUTexture::Format format) const 1047 { 1048 constexpr u32 required = D3D12_FORMAT_SUPPORT1_TEXTURE2D | D3D12_FORMAT_SUPPORT1_SHADER_SAMPLE; 1049 1050 const DXGI_FORMAT dfmt = D3DCommon::GetFormatMapping(format).resource_format; 1051 if (dfmt == DXGI_FORMAT_UNKNOWN) 1052 return false; 1053 1054 D3D12_FEATURE_DATA_FORMAT_SUPPORT support = {dfmt, {}, {}}; 1055 return SUCCEEDED(m_device->CheckFeatureSupport(D3D12_FEATURE_FORMAT_SUPPORT, &support, sizeof(support))) && 1056 (support.Support1 & required) == required; 1057 } 1058 1059 std::string D3D12Device::GetDriverInfo() const 1060 { 1061 std::string ret = fmt::format("{} ({})\n", D3DCommon::GetFeatureLevelString(m_feature_level), 1062 D3DCommon::GetFeatureLevelShaderModelString(m_feature_level)); 1063 1064 DXGI_ADAPTER_DESC desc; 1065 if (m_adapter && SUCCEEDED(m_adapter->GetDesc(&desc))) 1066 { 1067 fmt::format_to(std::back_inserter(ret), "VID: 0x{:04X} PID: 0x{:04X}\n", desc.VendorId, desc.DeviceId); 1068 ret += StringUtil::WideStringToUTF8String(desc.Description); 1069 ret += "\n"; 1070 1071 const std::string driver_version(D3DCommon::GetDriverVersionFromLUID(desc.AdapterLuid)); 1072 if (!driver_version.empty()) 1073 { 1074 ret += "Driver Version: "; 1075 ret += driver_version; 1076 } 1077 } 1078 1079 return ret; 1080 } 1081 1082 void D3D12Device::SetVSyncMode(GPUVSyncMode mode, bool allow_present_throttle) 1083 { 1084 m_allow_present_throttle = allow_present_throttle; 1085 1086 // Using mailbox-style no-allow-tearing causes tearing in exclusive fullscreen. 1087 if (mode == GPUVSyncMode::Mailbox && m_is_exclusive_fullscreen) 1088 { 1089 WARNING_LOG("Using FIFO instead of Mailbox vsync due to exclusive fullscreen."); 1090 mode = GPUVSyncMode::FIFO; 1091 } 1092 1093 if (m_vsync_mode == mode) 1094 return; 1095 1096 const u32 old_buffer_count = GetSwapChainBufferCount(); 1097 m_vsync_mode = mode; 1098 if (!m_swap_chain) 1099 return; 1100 1101 if (GetSwapChainBufferCount() != old_buffer_count) 1102 { 1103 DestroySwapChain(); 1104 1105 Error error; 1106 if (!CreateSwapChain(&error)) 1107 { 1108 ERROR_LOG("Failed to recreate swap chain after vsync change: {}", error.GetDescription()); 1109 Panic("Failed to recreate swap chain after vsync change."); 1110 } 1111 } 1112 } 1113 1114 bool D3D12Device::BeginPresent(bool frame_skip, u32 clear_color) 1115 { 1116 if (InRenderPass()) 1117 EndRenderPass(); 1118 1119 if (frame_skip) 1120 return false; 1121 1122 // If we're running surfaceless, kick the command buffer so we don't run out of descriptors. 1123 if (!m_swap_chain) 1124 { 1125 SubmitCommandList(false); 1126 TrimTexturePool(); 1127 return false; 1128 } 1129 1130 // TODO: Check if the device was lost. 1131 1132 // Check if we lost exclusive fullscreen. If so, notify the host, so it can switch to windowed mode. 1133 // This might get called repeatedly if it takes a while to switch back, that's the host's problem. 1134 BOOL is_fullscreen; 1135 if (m_is_exclusive_fullscreen && 1136 (FAILED(m_swap_chain->GetFullscreenState(&is_fullscreen, nullptr)) || !is_fullscreen)) 1137 { 1138 Host::RunOnCPUThread([]() { Host::SetFullscreen(false); }); 1139 TrimTexturePool(); 1140 return false; 1141 } 1142 1143 BeginSwapChainRenderPass(clear_color); 1144 return true; 1145 } 1146 1147 void D3D12Device::EndPresent(bool explicit_present) 1148 { 1149 DebugAssert(InRenderPass() && m_num_current_render_targets == 0 && !m_current_depth_target); 1150 EndRenderPass(); 1151 1152 const auto& swap_chain_buf = m_swap_chain_buffers[m_current_swap_chain_buffer]; 1153 m_current_swap_chain_buffer = ((m_current_swap_chain_buffer + 1) % static_cast<u32>(m_swap_chain_buffers.size())); 1154 1155 ID3D12GraphicsCommandList* cmdlist = GetCommandList(); 1156 D3D12Texture::TransitionSubresourceToState(cmdlist, swap_chain_buf.first.Get(), 0, D3D12_RESOURCE_STATE_RENDER_TARGET, 1157 D3D12_RESOURCE_STATE_PRESENT); 1158 1159 SubmitCommandList(false); 1160 TrimTexturePool(); 1161 1162 if (!explicit_present) 1163 SubmitPresent(); 1164 } 1165 1166 void D3D12Device::SubmitPresent() 1167 { 1168 DebugAssert(m_swap_chain); 1169 1170 const UINT sync_interval = static_cast<UINT>(m_vsync_mode == GPUVSyncMode::FIFO); 1171 const UINT flags = (m_vsync_mode == GPUVSyncMode::Disabled && m_using_allow_tearing) ? DXGI_PRESENT_ALLOW_TEARING : 0; 1172 m_swap_chain->Present(sync_interval, flags); 1173 } 1174 1175 #ifdef _DEBUG 1176 static UINT64 Palette(float phase, const std::array<float, 3>& a, const std::array<float, 3>& b, 1177 const std::array<float, 3>& c, const std::array<float, 3>& d) 1178 { 1179 std::array<float, 3> result; 1180 result[0] = a[0] + b[0] * std::cos(6.28318f * (c[0] * phase + d[0])); 1181 result[1] = a[1] + b[1] * std::cos(6.28318f * (c[1] * phase + d[1])); 1182 result[2] = a[2] + b[2] * std::cos(6.28318f * (c[2] * phase + d[2])); 1183 1184 return PIX_COLOR(static_cast<BYTE>(std::clamp(result[0] * 255.0f, 0.0f, 255.0f)), 1185 static_cast<BYTE>(std::clamp(result[1] * 255.0f, 0.0f, 255.0f)), 1186 static_cast<BYTE>(std::clamp(result[2] * 255.0f, 0.0f, 255.0f))); 1187 } 1188 #endif 1189 1190 void D3D12Device::PushDebugGroup(const char* name) 1191 { 1192 #ifdef _DEBUG 1193 if (!m_debug_device) 1194 return; 1195 1196 const UINT64 color = Palette(static_cast<float>(++s_debug_scope_depth), {0.5f, 0.5f, 0.5f}, {0.5f, 0.5f, 0.5f}, 1197 {1.0f, 1.0f, 0.5f}, {0.8f, 0.90f, 0.30f}); 1198 PIXBeginEvent(GetCommandList(), color, "%s", name); 1199 #endif 1200 } 1201 1202 void D3D12Device::PopDebugGroup() 1203 { 1204 #ifdef _DEBUG 1205 if (!m_debug_device) 1206 return; 1207 1208 s_debug_scope_depth = (s_debug_scope_depth == 0) ? 0 : (s_debug_scope_depth - 1u); 1209 PIXEndEvent(GetCommandList()); 1210 #endif 1211 } 1212 1213 void D3D12Device::InsertDebugMessage(const char* msg) 1214 { 1215 #ifdef _DEBUG 1216 if (!m_debug_device) 1217 return; 1218 1219 PIXSetMarker(GetCommandList(), PIX_COLOR(0, 0, 0), "%s", msg); 1220 #endif 1221 } 1222 1223 void D3D12Device::SetFeatures(FeatureMask disabled_features) 1224 { 1225 m_max_texture_size = D3D12_REQ_TEXTURE2D_U_OR_V_DIMENSION; 1226 m_max_multisamples = 1; 1227 for (u32 multisamples = 2; multisamples < D3D12_MAX_MULTISAMPLE_SAMPLE_COUNT; multisamples++) 1228 { 1229 D3D12_FEATURE_DATA_MULTISAMPLE_QUALITY_LEVELS fd = {DXGI_FORMAT_R8G8B8A8_UNORM, static_cast<UINT>(multisamples), 1230 D3D12_MULTISAMPLE_QUALITY_LEVELS_FLAG_NONE, 0u}; 1231 1232 if (SUCCEEDED(m_device->CheckFeatureSupport(D3D12_FEATURE_MULTISAMPLE_QUALITY_LEVELS, &fd, sizeof(fd))) && 1233 fd.NumQualityLevels > 0) 1234 { 1235 m_max_multisamples = multisamples; 1236 } 1237 } 1238 1239 m_features.dual_source_blend = !(disabled_features & FEATURE_MASK_DUAL_SOURCE_BLEND); 1240 m_features.framebuffer_fetch = false; 1241 m_features.per_sample_shading = true; 1242 m_features.noperspective_interpolation = true; 1243 m_features.texture_copy_to_self = 1244 /*!(disabled_features & FEATURE_MASK_TEXTURE_COPY_TO_SELF)*/ false; // TODO: Support with Enhanced Barriers 1245 m_features.supports_texture_buffers = !(disabled_features & FEATURE_MASK_TEXTURE_BUFFERS); 1246 m_features.texture_buffers_emulated_with_ssbo = false; 1247 m_features.feedback_loops = false; 1248 m_features.geometry_shaders = !(disabled_features & FEATURE_MASK_GEOMETRY_SHADERS); 1249 m_features.partial_msaa_resolve = true; 1250 m_features.memory_import = false; 1251 m_features.explicit_present = true; 1252 m_features.gpu_timing = true; 1253 m_features.shader_cache = true; 1254 m_features.pipeline_cache = true; 1255 m_features.prefer_unused_textures = true; 1256 1257 BOOL allow_tearing_supported = false; 1258 HRESULT hr = m_dxgi_factory->CheckFeatureSupport(DXGI_FEATURE_PRESENT_ALLOW_TEARING, &allow_tearing_supported, 1259 sizeof(allow_tearing_supported)); 1260 m_allow_tearing_supported = (SUCCEEDED(hr) && allow_tearing_supported == TRUE); 1261 1262 m_features.raster_order_views = false; 1263 if (!(disabled_features & FEATURE_MASK_RASTER_ORDER_VIEWS)) 1264 { 1265 D3D12_FEATURE_DATA_D3D12_OPTIONS options = {}; 1266 m_features.raster_order_views = 1267 SUCCEEDED(m_device->CheckFeatureSupport(D3D12_FEATURE_D3D12_OPTIONS, &options, sizeof(options))) && 1268 options.ROVsSupported; 1269 } 1270 } 1271 1272 void D3D12Device::CopyTextureRegion(GPUTexture* dst, u32 dst_x, u32 dst_y, u32 dst_layer, u32 dst_level, 1273 GPUTexture* src, u32 src_x, u32 src_y, u32 src_layer, u32 src_level, u32 width, 1274 u32 height) 1275 { 1276 D3D12Texture* const S = static_cast<D3D12Texture*>(src); 1277 D3D12Texture* const D = static_cast<D3D12Texture*>(dst); 1278 1279 if (S->GetState() == GPUTexture::State::Cleared) 1280 { 1281 // source is cleared. if destination is a render target, we can carry the clear forward 1282 if (D->IsRenderTargetOrDepthStencil()) 1283 { 1284 if (dst_level == 0 && dst_x == 0 && dst_y == 0 && width == D->GetWidth() && height == D->GetHeight()) 1285 { 1286 // pass it forward if we're clearing the whole thing 1287 if (S->IsDepthStencil()) 1288 D->SetClearDepth(S->GetClearDepth()); 1289 else 1290 D->SetClearColor(S->GetClearColor()); 1291 1292 return; 1293 } 1294 1295 if (D->GetState() == GPUTexture::State::Cleared) 1296 { 1297 // destination is cleared, if it's the same colour and rect, we can just avoid this entirely 1298 if (D->IsDepthStencil()) 1299 { 1300 if (D->GetClearDepth() == S->GetClearDepth()) 1301 return; 1302 } 1303 else 1304 { 1305 if (D->GetClearColor() == S->GetClearColor()) 1306 return; 1307 } 1308 } 1309 } 1310 1311 // commit the clear to the source first, then do normal copy 1312 S->CommitClear(); 1313 } 1314 1315 // if the destination has been cleared, and we're not overwriting the whole thing, commit the clear first 1316 // (the area outside of where we're copying to) 1317 if (D->GetState() == GPUTexture::State::Cleared && 1318 (dst_level != 0 || dst_x != 0 || dst_y != 0 || width != D->GetWidth() || height != D->GetHeight())) 1319 { 1320 D->CommitClear(); 1321 } 1322 1323 s_stats.num_copies++; 1324 1325 // *now* we can do a normal image copy. 1326 if (InRenderPass()) 1327 EndRenderPass(); 1328 1329 S->TransitionToState(D3D12_RESOURCE_STATE_COPY_SOURCE); 1330 S->SetUseFenceValue(GetCurrentFenceValue()); 1331 1332 D->TransitionToState(D3D12_RESOURCE_STATE_COPY_DEST); 1333 D->SetUseFenceValue(GetCurrentFenceValue()); 1334 1335 D3D12_TEXTURE_COPY_LOCATION srcloc; 1336 srcloc.pResource = S->GetResource(); 1337 srcloc.Type = D3D12_TEXTURE_COPY_TYPE_SUBRESOURCE_INDEX; 1338 srcloc.SubresourceIndex = S->CalculateSubresource(src_layer, src_level); 1339 1340 D3D12_TEXTURE_COPY_LOCATION dstloc; 1341 dstloc.pResource = D->GetResource(); 1342 dstloc.Type = D3D12_TEXTURE_COPY_TYPE_SUBRESOURCE_INDEX; 1343 dstloc.SubresourceIndex = D->CalculateSubresource(dst_layer, dst_level); 1344 1345 const D3D12_BOX srcbox{static_cast<UINT>(src_x), static_cast<UINT>(src_y), 0u, 1346 static_cast<UINT>(src_x + width), static_cast<UINT>(src_y + height), 1u}; 1347 GetCommandList()->CopyTextureRegion(&dstloc, dst_x, dst_y, 0, &srcloc, &srcbox); 1348 1349 D->SetState(GPUTexture::State::Dirty); 1350 } 1351 1352 void D3D12Device::ResolveTextureRegion(GPUTexture* dst, u32 dst_x, u32 dst_y, u32 dst_layer, u32 dst_level, 1353 GPUTexture* src, u32 src_x, u32 src_y, u32 width, u32 height) 1354 { 1355 DebugAssert((src_x + width) <= src->GetWidth()); 1356 DebugAssert((src_y + height) <= src->GetHeight()); 1357 DebugAssert(src->IsMultisampled()); 1358 DebugAssert(dst_level < dst->GetLevels() && dst_layer < dst->GetLayers()); 1359 DebugAssert((dst_x + width) <= dst->GetMipWidth(dst_level)); 1360 DebugAssert((dst_y + height) <= dst->GetMipHeight(dst_level)); 1361 DebugAssert(!dst->IsMultisampled() && src->IsMultisampled()); 1362 1363 if (InRenderPass()) 1364 EndRenderPass(); 1365 1366 s_stats.num_copies++; 1367 1368 D3D12Texture* D = static_cast<D3D12Texture*>(dst); 1369 D3D12Texture* S = static_cast<D3D12Texture*>(src); 1370 ID3D12GraphicsCommandList4* cmdlist = GetCommandList(); 1371 const u32 DSR = D->CalculateSubresource(dst_layer, dst_level); 1372 1373 S->CommitClear(cmdlist); 1374 D->CommitClear(cmdlist); 1375 1376 S->TransitionSubresourceToState(cmdlist, 0, S->GetResourceState(), D3D12_RESOURCE_STATE_RESOLVE_SOURCE); 1377 D->TransitionSubresourceToState(cmdlist, DSR, D->GetResourceState(), D3D12_RESOURCE_STATE_RESOLVE_DEST); 1378 1379 if (src_x == 0 && src_y == 0 && width == src->GetWidth() && height == src->GetHeight() && dst_x == 0 && dst_y == 0 && 1380 width == dst->GetMipWidth(dst_level) && height == dst->GetMipHeight(dst_level)) 1381 { 1382 cmdlist->ResolveSubresource(D->GetResource(), DSR, S->GetResource(), 0, S->GetDXGIFormat()); 1383 } 1384 else 1385 { 1386 D3D12_RECT src_rc{static_cast<LONG>(src_x), static_cast<LONG>(src_y), static_cast<LONG>(src_x + width), 1387 static_cast<LONG>(src_y + height)}; 1388 cmdlist->ResolveSubresourceRegion(D->GetResource(), D->CalculateSubresource(dst_level, dst_layer), dst_x, dst_y, 1389 S->GetResource(), 0, &src_rc, D->GetDXGIFormat(), D3D12_RESOLVE_MODE_AVERAGE); 1390 } 1391 1392 S->TransitionSubresourceToState(cmdlist, 0, D3D12_RESOURCE_STATE_RESOLVE_SOURCE, S->GetResourceState()); 1393 D->TransitionSubresourceToState(cmdlist, DSR, D3D12_RESOURCE_STATE_RESOLVE_DEST, D->GetResourceState()); 1394 } 1395 1396 void D3D12Device::ClearRenderTarget(GPUTexture* t, u32 c) 1397 { 1398 GPUDevice::ClearRenderTarget(t, c); 1399 if (InRenderPass() && IsRenderTargetBound(t)) 1400 EndRenderPass(); 1401 } 1402 1403 void D3D12Device::ClearDepth(GPUTexture* t, float d) 1404 { 1405 GPUDevice::ClearDepth(t, d); 1406 if (InRenderPass() && m_current_depth_target == t) 1407 EndRenderPass(); 1408 } 1409 1410 void D3D12Device::InvalidateRenderTarget(GPUTexture* t) 1411 { 1412 GPUDevice::InvalidateRenderTarget(t); 1413 if (InRenderPass() && (t->IsDepthStencil() ? (m_current_depth_target == t) : IsRenderTargetBound(t))) 1414 EndRenderPass(); 1415 } 1416 1417 bool D3D12Device::CreateBuffers(Error* error) 1418 { 1419 if (!m_vertex_buffer.Create(VERTEX_BUFFER_SIZE, error)) 1420 { 1421 ERROR_LOG("Failed to allocate vertex buffer"); 1422 return false; 1423 } 1424 1425 if (!m_index_buffer.Create(INDEX_BUFFER_SIZE, error)) 1426 { 1427 ERROR_LOG("Failed to allocate index buffer"); 1428 return false; 1429 } 1430 1431 if (!m_uniform_buffer.Create(VERTEX_UNIFORM_BUFFER_SIZE, error)) 1432 { 1433 ERROR_LOG("Failed to allocate uniform buffer"); 1434 return false; 1435 } 1436 1437 if (!m_texture_upload_buffer.Create(TEXTURE_BUFFER_SIZE, error)) 1438 { 1439 ERROR_LOG("Failed to allocate texture upload buffer"); 1440 return false; 1441 } 1442 1443 return true; 1444 } 1445 1446 void D3D12Device::DestroyBuffers() 1447 { 1448 m_texture_upload_buffer.Destroy(false); 1449 m_uniform_buffer.Destroy(false); 1450 m_index_buffer.Destroy(false); 1451 m_vertex_buffer.Destroy(false); 1452 } 1453 1454 void D3D12Device::MapVertexBuffer(u32 vertex_size, u32 vertex_count, void** map_ptr, u32* map_space, 1455 u32* map_base_vertex) 1456 { 1457 const u32 req_size = vertex_size * vertex_count; 1458 if (!m_vertex_buffer.ReserveMemory(req_size, vertex_size)) 1459 { 1460 SubmitCommandListAndRestartRenderPass("out of vertex space"); 1461 if (!m_vertex_buffer.ReserveMemory(req_size, vertex_size)) 1462 Panic("Failed to allocate vertex space"); 1463 } 1464 1465 *map_ptr = m_vertex_buffer.GetCurrentHostPointer(); 1466 *map_space = m_vertex_buffer.GetCurrentSpace() / vertex_size; 1467 *map_base_vertex = m_vertex_buffer.GetCurrentOffset() / vertex_size; 1468 } 1469 1470 void D3D12Device::UnmapVertexBuffer(u32 vertex_size, u32 vertex_count) 1471 { 1472 const u32 upload_size = vertex_size * vertex_count; 1473 s_stats.buffer_streamed += upload_size; 1474 m_vertex_buffer.CommitMemory(upload_size); 1475 } 1476 1477 void D3D12Device::MapIndexBuffer(u32 index_count, DrawIndex** map_ptr, u32* map_space, u32* map_base_index) 1478 { 1479 const u32 req_size = sizeof(DrawIndex) * index_count; 1480 if (!m_index_buffer.ReserveMemory(req_size, sizeof(DrawIndex))) 1481 { 1482 SubmitCommandListAndRestartRenderPass("out of index space"); 1483 if (!m_index_buffer.ReserveMemory(req_size, sizeof(DrawIndex))) 1484 Panic("Failed to allocate index space"); 1485 } 1486 1487 *map_ptr = reinterpret_cast<DrawIndex*>(m_index_buffer.GetCurrentHostPointer()); 1488 *map_space = m_index_buffer.GetCurrentSpace() / sizeof(DrawIndex); 1489 *map_base_index = m_index_buffer.GetCurrentOffset() / sizeof(DrawIndex); 1490 } 1491 1492 void D3D12Device::UnmapIndexBuffer(u32 used_index_count) 1493 { 1494 const u32 upload_size = sizeof(DrawIndex) * used_index_count; 1495 s_stats.buffer_streamed += upload_size; 1496 m_index_buffer.CommitMemory(upload_size); 1497 } 1498 1499 void D3D12Device::PushUniformBuffer(const void* data, u32 data_size) 1500 { 1501 static constexpr std::array<u8, static_cast<u8>(GPUPipeline::Layout::MaxCount)> push_parameters = { 1502 0, // SingleTextureAndUBO 1503 2, // SingleTextureAndPushConstants 1504 1, // SingleTextureBufferAndPushConstants 1505 0, // MultiTextureAndUBO 1506 2, // MultiTextureAndPushConstants 1507 }; 1508 1509 DebugAssert(data_size < UNIFORM_PUSH_CONSTANTS_SIZE); 1510 if (m_dirty_flags & DIRTY_FLAG_PIPELINE_LAYOUT) 1511 { 1512 m_dirty_flags &= ~DIRTY_FLAG_PIPELINE_LAYOUT; 1513 UpdateRootSignature(); 1514 } 1515 1516 s_stats.buffer_streamed += data_size; 1517 1518 const u32 push_param = 1519 push_parameters[static_cast<u8>(m_current_pipeline_layout)] + BoolToUInt8(IsUsingROVRootSignature()); 1520 GetCommandList()->SetGraphicsRoot32BitConstants(push_param, data_size / 4u, data, 0); 1521 } 1522 1523 void* D3D12Device::MapUniformBuffer(u32 size) 1524 { 1525 const u32 used_space = Common::AlignUpPow2(size, D3D12_CONSTANT_BUFFER_DATA_PLACEMENT_ALIGNMENT); 1526 if (!m_uniform_buffer.ReserveMemory(used_space + MAX_UNIFORM_BUFFER_SIZE, 1527 D3D12_CONSTANT_BUFFER_DATA_PLACEMENT_ALIGNMENT)) 1528 { 1529 SubmitCommandListAndRestartRenderPass("out of uniform space"); 1530 if (!m_uniform_buffer.ReserveMemory(used_space + MAX_UNIFORM_BUFFER_SIZE, 1531 D3D12_CONSTANT_BUFFER_DATA_PLACEMENT_ALIGNMENT)) 1532 Panic("Failed to allocate uniform space."); 1533 } 1534 1535 return m_uniform_buffer.GetCurrentHostPointer(); 1536 } 1537 1538 void D3D12Device::UnmapUniformBuffer(u32 size) 1539 { 1540 s_stats.buffer_streamed += size; 1541 m_uniform_buffer_position = m_uniform_buffer.GetCurrentOffset(); 1542 m_uniform_buffer.CommitMemory(size); 1543 m_dirty_flags |= DIRTY_FLAG_CONSTANT_BUFFER; 1544 } 1545 1546 bool D3D12Device::CreateRootSignatures(Error* error) 1547 { 1548 D3D12::RootSignatureBuilder rsb; 1549 1550 for (u32 rov = 0; rov < 2; rov++) 1551 { 1552 if (rov && !m_features.raster_order_views) 1553 break; 1554 1555 { 1556 auto& rs = m_root_signatures[rov][static_cast<u8>(GPUPipeline::Layout::SingleTextureAndUBO)]; 1557 1558 rsb.SetInputAssemblerFlag(); 1559 rsb.AddDescriptorTable(D3D12_DESCRIPTOR_RANGE_TYPE_SRV, 0, 1, D3D12_SHADER_VISIBILITY_PIXEL); 1560 rsb.AddDescriptorTable(D3D12_DESCRIPTOR_RANGE_TYPE_SAMPLER, 0, 1, D3D12_SHADER_VISIBILITY_PIXEL); 1561 rsb.AddCBVParameter(0, D3D12_SHADER_VISIBILITY_ALL); 1562 if (rov) 1563 { 1564 rsb.AddDescriptorTable(D3D12_DESCRIPTOR_RANGE_TYPE_UAV, 0, MAX_IMAGE_RENDER_TARGETS, 1565 D3D12_SHADER_VISIBILITY_PIXEL); 1566 } 1567 if (!(rs = rsb.Create(error, true))) 1568 return false; 1569 D3D12::SetObjectName(rs.Get(), "Single Texture + UBO Pipeline Layout"); 1570 } 1571 1572 { 1573 auto& rs = m_root_signatures[rov][static_cast<u8>(GPUPipeline::Layout::SingleTextureAndPushConstants)]; 1574 1575 rsb.SetInputAssemblerFlag(); 1576 rsb.AddDescriptorTable(D3D12_DESCRIPTOR_RANGE_TYPE_SRV, 0, 1, D3D12_SHADER_VISIBILITY_PIXEL); 1577 rsb.AddDescriptorTable(D3D12_DESCRIPTOR_RANGE_TYPE_SAMPLER, 0, 1, D3D12_SHADER_VISIBILITY_PIXEL); 1578 if (rov) 1579 { 1580 rsb.AddDescriptorTable(D3D12_DESCRIPTOR_RANGE_TYPE_UAV, 0, MAX_IMAGE_RENDER_TARGETS, 1581 D3D12_SHADER_VISIBILITY_PIXEL); 1582 } 1583 rsb.Add32BitConstants(0, UNIFORM_PUSH_CONSTANTS_SIZE / sizeof(u32), D3D12_SHADER_VISIBILITY_ALL); 1584 if (!(rs = rsb.Create(error, true))) 1585 return false; 1586 D3D12::SetObjectName(rs.Get(), "Single Texture Pipeline Layout"); 1587 } 1588 1589 { 1590 auto& rs = m_root_signatures[rov][static_cast<u8>(GPUPipeline::Layout::SingleTextureBufferAndPushConstants)]; 1591 1592 rsb.SetInputAssemblerFlag(); 1593 rsb.AddDescriptorTable(D3D12_DESCRIPTOR_RANGE_TYPE_SRV, 0, 1, D3D12_SHADER_VISIBILITY_PIXEL); 1594 if (rov) 1595 { 1596 rsb.AddDescriptorTable(D3D12_DESCRIPTOR_RANGE_TYPE_UAV, 0, MAX_IMAGE_RENDER_TARGETS, 1597 D3D12_SHADER_VISIBILITY_PIXEL); 1598 } 1599 rsb.Add32BitConstants(0, UNIFORM_PUSH_CONSTANTS_SIZE / sizeof(u32), D3D12_SHADER_VISIBILITY_ALL); 1600 if (!(rs = rsb.Create(error, true))) 1601 return false; 1602 D3D12::SetObjectName(rs.Get(), "Single Texture Buffer + UBO Pipeline Layout"); 1603 } 1604 1605 { 1606 auto& rs = m_root_signatures[rov][static_cast<u8>(GPUPipeline::Layout::MultiTextureAndUBO)]; 1607 1608 rsb.SetInputAssemblerFlag(); 1609 rsb.AddDescriptorTable(D3D12_DESCRIPTOR_RANGE_TYPE_SRV, 0, MAX_TEXTURE_SAMPLERS, D3D12_SHADER_VISIBILITY_PIXEL); 1610 rsb.AddDescriptorTable(D3D12_DESCRIPTOR_RANGE_TYPE_SAMPLER, 0, MAX_TEXTURE_SAMPLERS, 1611 D3D12_SHADER_VISIBILITY_PIXEL); 1612 rsb.AddCBVParameter(0, D3D12_SHADER_VISIBILITY_ALL); 1613 if (rov) 1614 { 1615 rsb.AddDescriptorTable(D3D12_DESCRIPTOR_RANGE_TYPE_UAV, 0, MAX_IMAGE_RENDER_TARGETS, 1616 D3D12_SHADER_VISIBILITY_PIXEL); 1617 } 1618 if (!(rs = rsb.Create(error, true))) 1619 return false; 1620 D3D12::SetObjectName(rs.Get(), "Multi Texture + UBO Pipeline Layout"); 1621 } 1622 1623 { 1624 auto& rs = m_root_signatures[rov][static_cast<u8>(GPUPipeline::Layout::MultiTextureAndPushConstants)]; 1625 1626 rsb.SetInputAssemblerFlag(); 1627 rsb.AddDescriptorTable(D3D12_DESCRIPTOR_RANGE_TYPE_SRV, 0, MAX_TEXTURE_SAMPLERS, D3D12_SHADER_VISIBILITY_PIXEL); 1628 rsb.AddDescriptorTable(D3D12_DESCRIPTOR_RANGE_TYPE_SAMPLER, 0, MAX_TEXTURE_SAMPLERS, 1629 D3D12_SHADER_VISIBILITY_PIXEL); 1630 if (rov) 1631 { 1632 rsb.AddDescriptorTable(D3D12_DESCRIPTOR_RANGE_TYPE_UAV, 0, MAX_IMAGE_RENDER_TARGETS, 1633 D3D12_SHADER_VISIBILITY_PIXEL); 1634 } 1635 rsb.Add32BitConstants(0, UNIFORM_PUSH_CONSTANTS_SIZE / sizeof(u32), D3D12_SHADER_VISIBILITY_ALL); 1636 if (!(rs = rsb.Create(error, true))) 1637 return false; 1638 D3D12::SetObjectName(rs.Get(), "Multi Texture Pipeline Layout"); 1639 } 1640 } 1641 1642 return true; 1643 } 1644 1645 void D3D12Device::DestroyRootSignatures() 1646 { 1647 m_root_signatures.enumerate([](auto& it) { it.Reset(); }); 1648 } 1649 1650 void D3D12Device::SetRenderTargets(GPUTexture* const* rts, u32 num_rts, GPUTexture* ds, 1651 GPUPipeline::RenderPassFlag flags) 1652 { 1653 DebugAssert( 1654 !(flags & (GPUPipeline::RenderPassFlag::ColorFeedbackLoop | GPUPipeline::RenderPassFlag::SampleDepthBuffer))); 1655 1656 const bool image_bind_changed = ((m_current_render_pass_flags ^ flags) & GPUPipeline::BindRenderTargetsAsImages); 1657 bool changed = 1658 (m_num_current_render_targets != num_rts || m_current_depth_target != ds || m_current_render_pass_flags != flags); 1659 bool needs_ds_clear = (ds && ds->IsClearedOrInvalidated()); 1660 bool needs_rt_clear = false; 1661 1662 if (InRenderPass()) 1663 EndRenderPass(); 1664 1665 m_current_depth_target = static_cast<D3D12Texture*>(ds); 1666 for (u32 i = 0; i < num_rts; i++) 1667 { 1668 D3D12Texture* const RT = static_cast<D3D12Texture*>(rts[i]); 1669 changed |= m_current_render_targets[i] != RT; 1670 m_current_render_targets[i] = RT; 1671 needs_rt_clear |= RT->IsClearedOrInvalidated(); 1672 } 1673 for (u32 i = num_rts; i < m_num_current_render_targets; i++) 1674 m_current_render_targets[i] = nullptr; 1675 m_num_current_render_targets = Truncate8(num_rts); 1676 m_current_render_pass_flags = flags; 1677 1678 // Don't end render pass unless it's necessary. 1679 if (changed) 1680 { 1681 if (InRenderPass()) 1682 EndRenderPass(); 1683 1684 // Need a root signature change if switching to UAVs. 1685 m_dirty_flags |= image_bind_changed ? LAYOUT_DEPENDENT_DIRTY_STATE : 0; 1686 m_dirty_flags = (flags & GPUPipeline::BindRenderTargetsAsImages) ? (m_dirty_flags | DIRTY_FLAG_RT_UAVS) : 1687 (m_dirty_flags & ~DIRTY_FLAG_RT_UAVS); 1688 } 1689 else if (needs_rt_clear || needs_ds_clear) 1690 { 1691 if (InRenderPass()) 1692 EndRenderPass(); 1693 } 1694 } 1695 1696 void D3D12Device::BeginRenderPass() 1697 { 1698 DebugAssert(!InRenderPass()); 1699 1700 std::array<D3D12_RENDER_PASS_RENDER_TARGET_DESC, MAX_RENDER_TARGETS> rt_desc; 1701 D3D12_RENDER_PASS_DEPTH_STENCIL_DESC ds_desc; 1702 1703 D3D12_RENDER_PASS_RENDER_TARGET_DESC* rt_desc_p = nullptr; 1704 D3D12_RENDER_PASS_DEPTH_STENCIL_DESC* ds_desc_p = nullptr; 1705 u32 num_rt_descs = 0; 1706 1707 ID3D12GraphicsCommandList4* cmdlist = GetCommandList(); 1708 1709 if (m_num_current_render_targets > 0 || m_current_depth_target) [[likely]] 1710 { 1711 if (!IsUsingROVRootSignature()) [[likely]] 1712 { 1713 for (u32 i = 0; i < m_num_current_render_targets; i++) 1714 { 1715 D3D12Texture* const rt = m_current_render_targets[i]; 1716 rt->TransitionToState(cmdlist, D3D12_RESOURCE_STATE_RENDER_TARGET); 1717 rt->SetUseFenceValue(GetCurrentFenceValue()); 1718 1719 D3D12_RENDER_PASS_RENDER_TARGET_DESC& desc = rt_desc[i]; 1720 desc.cpuDescriptor = rt->GetWriteDescriptor(); 1721 desc.EndingAccess.Type = D3D12_RENDER_PASS_ENDING_ACCESS_TYPE_PRESERVE; 1722 1723 switch (rt->GetState()) 1724 { 1725 case GPUTexture::State::Cleared: 1726 { 1727 desc.BeginningAccess.Type = D3D12_RENDER_PASS_BEGINNING_ACCESS_TYPE_CLEAR; 1728 std::memcpy(desc.BeginningAccess.Clear.ClearValue.Color, rt->GetUNormClearColor().data(), 1729 sizeof(desc.BeginningAccess.Clear.ClearValue.Color)); 1730 rt->SetState(GPUTexture::State::Dirty); 1731 } 1732 break; 1733 1734 case GPUTexture::State::Invalidated: 1735 { 1736 desc.BeginningAccess.Type = D3D12_RENDER_PASS_BEGINNING_ACCESS_TYPE_DISCARD; 1737 rt->SetState(GPUTexture::State::Dirty); 1738 } 1739 break; 1740 1741 case GPUTexture::State::Dirty: 1742 { 1743 desc.BeginningAccess.Type = D3D12_RENDER_PASS_BEGINNING_ACCESS_TYPE_PRESERVE; 1744 } 1745 break; 1746 1747 default: 1748 UnreachableCode(); 1749 break; 1750 } 1751 } 1752 1753 rt_desc_p = (m_num_current_render_targets > 0) ? rt_desc.data() : nullptr; 1754 num_rt_descs = m_num_current_render_targets; 1755 } 1756 else 1757 { 1758 // Still need to clear the RTs. 1759 for (u32 i = 0; i < m_num_current_render_targets; i++) 1760 { 1761 D3D12Texture* const rt = m_current_render_targets[i]; 1762 rt->TransitionToState(cmdlist, D3D12_RESOURCE_STATE_UNORDERED_ACCESS); 1763 rt->SetUseFenceValue(GetCurrentFenceValue()); 1764 rt->CommitClear(cmdlist); 1765 } 1766 } 1767 if (m_current_depth_target) 1768 { 1769 D3D12Texture* const ds = m_current_depth_target; 1770 ds->TransitionToState(cmdlist, D3D12_RESOURCE_STATE_DEPTH_WRITE); 1771 ds->SetUseFenceValue(GetCurrentFenceValue()); 1772 ds_desc_p = &ds_desc; 1773 ds_desc.cpuDescriptor = ds->GetWriteDescriptor(); 1774 ds_desc.DepthEndingAccess.Type = D3D12_RENDER_PASS_ENDING_ACCESS_TYPE_PRESERVE; 1775 ds_desc.StencilBeginningAccess = {}; 1776 ds_desc.StencilEndingAccess = {}; 1777 1778 switch (ds->GetState()) 1779 { 1780 case GPUTexture::State::Cleared: 1781 { 1782 ds_desc.DepthBeginningAccess.Type = D3D12_RENDER_PASS_BEGINNING_ACCESS_TYPE_CLEAR; 1783 ds_desc.DepthBeginningAccess.Clear.ClearValue.DepthStencil.Depth = ds->GetClearDepth(); 1784 ds->SetState(GPUTexture::State::Dirty); 1785 } 1786 break; 1787 1788 case GPUTexture::State::Invalidated: 1789 { 1790 ds_desc.DepthBeginningAccess.Type = D3D12_RENDER_PASS_BEGINNING_ACCESS_TYPE_DISCARD; 1791 ds->SetState(GPUTexture::State::Dirty); 1792 } 1793 break; 1794 1795 case GPUTexture::State::Dirty: 1796 { 1797 ds_desc.DepthBeginningAccess.Type = D3D12_RENDER_PASS_BEGINNING_ACCESS_TYPE_PRESERVE; 1798 } 1799 break; 1800 1801 default: 1802 UnreachableCode(); 1803 break; 1804 } 1805 1806 ds_desc_p = &ds_desc; 1807 } 1808 } 1809 else 1810 { 1811 // Re-rendering to swap chain. 1812 const auto& swap_chain_buf = m_swap_chain_buffers[m_current_swap_chain_buffer]; 1813 rt_desc[0] = {swap_chain_buf.second, 1814 {D3D12_RENDER_PASS_BEGINNING_ACCESS_TYPE_PRESERVE, {}}, 1815 {D3D12_RENDER_PASS_ENDING_ACCESS_TYPE_PRESERVE, {}}}; 1816 rt_desc_p = &rt_desc[0]; 1817 num_rt_descs = 1; 1818 } 1819 1820 // All textures should be in shader read only optimal already, but just in case.. 1821 const u32 num_textures = GetActiveTexturesForLayout(m_current_pipeline_layout); 1822 for (u32 i = 0; i < num_textures; i++) 1823 { 1824 if (m_current_textures[i]) 1825 m_current_textures[i]->TransitionToState(cmdlist, D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE); 1826 } 1827 1828 DebugAssert(rt_desc_p || ds_desc_p || IsUsingROVRootSignature()); 1829 cmdlist->BeginRenderPass(num_rt_descs, rt_desc_p, ds_desc_p, D3D12_RENDER_PASS_FLAG_NONE); 1830 1831 // TODO: Stats 1832 m_in_render_pass = true; 1833 s_stats.num_render_passes++; 1834 1835 // If this is a new command buffer, bind the pipeline and such. 1836 if (m_dirty_flags & DIRTY_FLAG_INITIAL) 1837 SetInitialPipelineState(); 1838 } 1839 1840 void D3D12Device::BeginSwapChainRenderPass(u32 clear_color) 1841 { 1842 DebugAssert(!InRenderPass()); 1843 1844 ID3D12GraphicsCommandList4* const cmdlist = GetCommandList(); 1845 const auto& swap_chain_buf = m_swap_chain_buffers[m_current_swap_chain_buffer]; 1846 1847 D3D12Texture::TransitionSubresourceToState(cmdlist, swap_chain_buf.first.Get(), 0, D3D12_RESOURCE_STATE_COMMON, 1848 D3D12_RESOURCE_STATE_RENDER_TARGET); 1849 1850 // All textures should be in shader read only optimal already, but just in case.. 1851 const u32 num_textures = GetActiveTexturesForLayout(m_current_pipeline_layout); 1852 for (u32 i = 0; i < num_textures; i++) 1853 { 1854 if (m_current_textures[i]) 1855 m_current_textures[i]->TransitionToState(cmdlist, D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE); 1856 } 1857 1858 D3D12_RENDER_PASS_RENDER_TARGET_DESC rt_desc = {swap_chain_buf.second, 1859 {D3D12_RENDER_PASS_BEGINNING_ACCESS_TYPE_CLEAR, {}}, 1860 {D3D12_RENDER_PASS_ENDING_ACCESS_TYPE_PRESERVE, {}}}; 1861 GSVector4::store<false>(rt_desc.BeginningAccess.Clear.ClearValue.Color, GSVector4::rgba32(clear_color)); 1862 cmdlist->BeginRenderPass(1, &rt_desc, nullptr, D3D12_RENDER_PASS_FLAG_NONE); 1863 1864 std::memset(m_current_render_targets.data(), 0, sizeof(m_current_render_targets)); 1865 m_num_current_render_targets = 0; 1866 m_dirty_flags = 1867 (m_dirty_flags & ~DIRTY_FLAG_RT_UAVS) | ((IsUsingROVRootSignature()) ? DIRTY_FLAG_PIPELINE_LAYOUT : 0); 1868 m_current_render_pass_flags = GPUPipeline::NoRenderPassFlags; 1869 m_current_depth_target = nullptr; 1870 m_in_render_pass = true; 1871 s_stats.num_render_passes++; 1872 1873 // Clear pipeline, it's likely incompatible. 1874 m_current_pipeline = nullptr; 1875 } 1876 1877 bool D3D12Device::InRenderPass() 1878 { 1879 return m_in_render_pass; 1880 } 1881 1882 void D3D12Device::EndRenderPass() 1883 { 1884 DebugAssert(m_in_render_pass); 1885 1886 // TODO: stats 1887 m_in_render_pass = false; 1888 1889 GetCommandList()->EndRenderPass(); 1890 } 1891 1892 void D3D12Device::SetPipeline(GPUPipeline* pipeline) 1893 { 1894 // First draw? Bind everything. 1895 if (m_dirty_flags & DIRTY_FLAG_INITIAL) 1896 { 1897 m_current_pipeline = static_cast<D3D12Pipeline*>(pipeline); 1898 if (!m_current_pipeline) 1899 return; 1900 1901 SetInitialPipelineState(); 1902 return; 1903 } 1904 else if (m_current_pipeline == pipeline) 1905 { 1906 return; 1907 } 1908 1909 m_current_pipeline = static_cast<D3D12Pipeline*>(pipeline); 1910 1911 ID3D12GraphicsCommandList4* cmdlist = GetCommandList(); 1912 cmdlist->SetPipelineState(m_current_pipeline->GetPipeline()); 1913 1914 if (D3D12_PRIMITIVE_TOPOLOGY topology = m_current_pipeline->GetTopology(); topology != m_current_topology) 1915 { 1916 m_current_topology = topology; 1917 cmdlist->IASetPrimitiveTopology(topology); 1918 } 1919 1920 if (u32 vertex_stride = m_current_pipeline->GetVertexStride(); 1921 vertex_stride > 0 && m_current_vertex_stride != vertex_stride) 1922 { 1923 m_current_vertex_stride = vertex_stride; 1924 SetVertexBuffer(cmdlist); 1925 } 1926 1927 // TODO: we don't need to change the blend constant if blending isn't on. 1928 if (u32 blend_constants = m_current_pipeline->GetBlendConstants(); m_current_blend_constant != blend_constants) 1929 { 1930 m_current_blend_constant = blend_constants; 1931 cmdlist->OMSetBlendFactor(m_current_pipeline->GetBlendConstantsF().data()); 1932 } 1933 1934 if (GPUPipeline::Layout layout = m_current_pipeline->GetLayout(); m_current_pipeline_layout != layout) 1935 { 1936 m_current_pipeline_layout = layout; 1937 m_dirty_flags |= LAYOUT_DEPENDENT_DIRTY_STATE & (IsUsingROVRootSignature() ? ~0u : ~DIRTY_FLAG_RT_UAVS); 1938 } 1939 } 1940 1941 void D3D12Device::UnbindPipeline(D3D12Pipeline* pl) 1942 { 1943 if (m_current_pipeline != pl) 1944 return; 1945 1946 m_current_pipeline = nullptr; 1947 } 1948 1949 bool D3D12Device::IsRenderTargetBound(const GPUTexture* tex) const 1950 { 1951 for (u32 i = 0; i < m_num_current_render_targets; i++) 1952 { 1953 if (m_current_render_targets[i] == tex) 1954 return true; 1955 } 1956 1957 return false; 1958 } 1959 1960 void D3D12Device::InvalidateCachedState() 1961 { 1962 m_dirty_flags = ALL_DIRTY_STATE & 1963 ((m_current_render_pass_flags & GPUPipeline::BindRenderTargetsAsImages) ? ~0u : ~DIRTY_FLAG_RT_UAVS); 1964 m_in_render_pass = false; 1965 m_current_pipeline = nullptr; 1966 m_current_vertex_stride = 0; 1967 m_current_blend_constant = 0; 1968 m_current_topology = D3D_PRIMITIVE_TOPOLOGY_UNDEFINED; 1969 } 1970 1971 void D3D12Device::SetInitialPipelineState() 1972 { 1973 DebugAssert(m_current_pipeline); 1974 m_dirty_flags &= ~DIRTY_FLAG_INITIAL; 1975 1976 ID3D12GraphicsCommandList4* cmdlist = GetCommandList(); 1977 1978 m_current_vertex_stride = m_current_pipeline->GetVertexStride(); 1979 SetVertexBuffer(cmdlist); 1980 const D3D12_INDEX_BUFFER_VIEW ib_view = {m_index_buffer.GetGPUPointer(), m_index_buffer.GetSize(), 1981 DXGI_FORMAT_R16_UINT}; 1982 cmdlist->IASetIndexBuffer(&ib_view); 1983 1984 cmdlist->SetPipelineState(m_current_pipeline->GetPipeline()); 1985 m_current_pipeline_layout = m_current_pipeline->GetLayout(); 1986 1987 m_current_topology = m_current_pipeline->GetTopology(); 1988 cmdlist->IASetPrimitiveTopology(m_current_topology); 1989 1990 m_current_blend_constant = m_current_pipeline->GetBlendConstants(); 1991 cmdlist->OMSetBlendFactor(m_current_pipeline->GetBlendConstantsF().data()); 1992 1993 SetViewport(cmdlist); 1994 SetScissor(cmdlist); 1995 } 1996 1997 void D3D12Device::SetVertexBuffer(ID3D12GraphicsCommandList4* cmdlist) 1998 { 1999 const D3D12_VERTEX_BUFFER_VIEW vb_view = {m_vertex_buffer.GetGPUPointer(), m_vertex_buffer.GetSize(), 2000 m_current_vertex_stride}; 2001 cmdlist->IASetVertexBuffers(0, 1, &vb_view); 2002 } 2003 2004 void D3D12Device::SetViewport(ID3D12GraphicsCommandList4* cmdlist) 2005 { 2006 const D3D12_VIEWPORT vp = {static_cast<float>(m_current_viewport.left), 2007 static_cast<float>(m_current_viewport.top), 2008 static_cast<float>(m_current_viewport.width()), 2009 static_cast<float>(m_current_viewport.height()), 2010 0.0f, 2011 1.0f}; 2012 cmdlist->RSSetViewports(1, &vp); 2013 } 2014 2015 void D3D12Device::SetScissor(ID3D12GraphicsCommandList4* cmdlist) 2016 { 2017 static_assert(sizeof(GSVector4i) == sizeof(D3D12_RECT)); 2018 cmdlist->RSSetScissorRects(1, reinterpret_cast<const D3D12_RECT*>(&m_current_scissor)); 2019 } 2020 2021 void D3D12Device::SetTextureSampler(u32 slot, GPUTexture* texture, GPUSampler* sampler) 2022 { 2023 D3D12Texture* T = static_cast<D3D12Texture*>(texture); 2024 if (m_current_textures[slot] != T) 2025 { 2026 m_current_textures[slot] = T; 2027 m_dirty_flags |= DIRTY_FLAG_TEXTURES; 2028 2029 if (T) 2030 { 2031 T->CommitClear(); 2032 T->SetUseFenceValue(GetCurrentFenceValue()); 2033 if (T->GetResourceState() != D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE) 2034 { 2035 if (InRenderPass()) 2036 EndRenderPass(); 2037 T->TransitionToState(D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE); 2038 } 2039 } 2040 } 2041 2042 const D3D12DescriptorHandle& handle = 2043 sampler ? static_cast<D3D12Sampler*>(sampler)->GetDescriptor() : m_point_sampler; 2044 if (m_current_samplers[slot] != handle) 2045 { 2046 m_current_samplers[slot] = handle; 2047 m_dirty_flags |= DIRTY_FLAG_SAMPLERS; 2048 } 2049 } 2050 2051 void D3D12Device::SetTextureBuffer(u32 slot, GPUTextureBuffer* buffer) 2052 { 2053 DebugAssert(slot == 0); 2054 if (m_current_texture_buffer == buffer) 2055 return; 2056 2057 m_current_texture_buffer = static_cast<D3D12TextureBuffer*>(buffer); 2058 if (m_current_pipeline_layout == GPUPipeline::Layout::SingleTextureBufferAndPushConstants) 2059 m_dirty_flags |= DIRTY_FLAG_TEXTURES; 2060 } 2061 2062 void D3D12Device::UnbindTexture(D3D12Texture* tex) 2063 { 2064 for (u32 i = 0; i < MAX_TEXTURE_SAMPLERS; i++) 2065 { 2066 if (m_current_textures[i] == tex) 2067 { 2068 m_current_textures[i] = nullptr; 2069 m_dirty_flags |= DIRTY_FLAG_TEXTURES; 2070 } 2071 } 2072 2073 if (tex->IsRenderTarget() || tex->IsRWTexture()) 2074 { 2075 for (u32 i = 0; i < m_num_current_render_targets; i++) 2076 { 2077 if (m_current_render_targets[i] == tex) 2078 { 2079 if (InRenderPass()) 2080 EndRenderPass(); 2081 m_current_render_targets[i] = nullptr; 2082 } 2083 } 2084 } 2085 else if (tex->IsDepthStencil()) 2086 { 2087 if (m_current_depth_target == tex) 2088 { 2089 if (InRenderPass()) 2090 EndRenderPass(); 2091 m_current_depth_target = nullptr; 2092 } 2093 } 2094 } 2095 2096 void D3D12Device::UnbindTextureBuffer(D3D12TextureBuffer* buf) 2097 { 2098 if (m_current_texture_buffer != buf) 2099 return; 2100 2101 m_current_texture_buffer = nullptr; 2102 2103 if (m_current_pipeline_layout == GPUPipeline::Layout::SingleTextureBufferAndPushConstants) 2104 m_dirty_flags |= DIRTY_FLAG_TEXTURES; 2105 } 2106 2107 void D3D12Device::SetViewport(const GSVector4i rc) 2108 { 2109 if (m_current_viewport.eq(rc)) 2110 return; 2111 2112 m_current_viewport = rc; 2113 2114 if (m_dirty_flags & DIRTY_FLAG_INITIAL) 2115 return; 2116 2117 SetViewport(GetCommandList()); 2118 } 2119 2120 void D3D12Device::SetScissor(const GSVector4i rc) 2121 { 2122 if (m_current_scissor.eq(rc)) 2123 return; 2124 2125 m_current_scissor = rc; 2126 2127 if (m_dirty_flags & DIRTY_FLAG_INITIAL) 2128 return; 2129 2130 SetScissor(GetCommandList()); 2131 } 2132 2133 void D3D12Device::PreDrawCheck() 2134 { 2135 // TODO: Flushing cmdbuffer because of descriptor OOM will lose push constants. 2136 2137 DebugAssert(!(m_dirty_flags & DIRTY_FLAG_INITIAL)); 2138 const u32 dirty = std::exchange(m_dirty_flags, 0); 2139 if (dirty != 0) 2140 { 2141 if (dirty & DIRTY_FLAG_PIPELINE_LAYOUT) 2142 { 2143 UpdateRootSignature(); 2144 if (!UpdateRootParameters(dirty)) 2145 { 2146 SubmitCommandListAndRestartRenderPass("out of descriptors"); 2147 PreDrawCheck(); 2148 return; 2149 } 2150 } 2151 else if (dirty & (DIRTY_FLAG_CONSTANT_BUFFER | DIRTY_FLAG_TEXTURES | DIRTY_FLAG_SAMPLERS | DIRTY_FLAG_RT_UAVS)) 2152 { 2153 if (!UpdateRootParameters(dirty)) 2154 { 2155 SubmitCommandListAndRestartRenderPass("out of descriptors"); 2156 PreDrawCheck(); 2157 return; 2158 } 2159 } 2160 } 2161 2162 if (!InRenderPass()) 2163 BeginRenderPass(); 2164 } 2165 2166 bool D3D12Device::IsUsingROVRootSignature() const 2167 { 2168 return ((m_current_render_pass_flags & GPUPipeline::BindRenderTargetsAsImages) != 0); 2169 } 2170 2171 void D3D12Device::UpdateRootSignature() 2172 { 2173 GetCommandList()->SetGraphicsRootSignature( 2174 m_root_signatures[BoolToUInt8(IsUsingROVRootSignature())][static_cast<u8>(m_current_pipeline_layout)].Get()); 2175 } 2176 2177 template<GPUPipeline::Layout layout> 2178 bool D3D12Device::UpdateParametersForLayout(u32 dirty) 2179 { 2180 ID3D12GraphicsCommandList4* cmdlist = GetCommandList(); 2181 2182 if constexpr (layout == GPUPipeline::Layout::SingleTextureAndUBO || layout == GPUPipeline::Layout::MultiTextureAndUBO) 2183 { 2184 if (dirty & DIRTY_FLAG_CONSTANT_BUFFER) 2185 cmdlist->SetGraphicsRootConstantBufferView(2, m_uniform_buffer.GetGPUPointer() + m_uniform_buffer_position); 2186 } 2187 2188 constexpr u32 num_textures = GetActiveTexturesForLayout(layout); 2189 if (dirty & DIRTY_FLAG_TEXTURES && num_textures > 0) 2190 { 2191 D3D12DescriptorAllocator& allocator = m_command_lists[m_current_command_list].descriptor_allocator; 2192 D3D12DescriptorHandle gpu_handle; 2193 if (!allocator.Allocate(num_textures, &gpu_handle)) 2194 return false; 2195 2196 if constexpr (num_textures == 1) 2197 { 2198 m_device->CopyDescriptorsSimple( 2199 1, gpu_handle, m_current_textures[0] ? m_current_textures[0]->GetSRVDescriptor() : m_null_srv_descriptor, 2200 D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV); 2201 } 2202 else 2203 { 2204 D3D12_CPU_DESCRIPTOR_HANDLE src_handles[MAX_TEXTURE_SAMPLERS]; 2205 UINT src_sizes[MAX_TEXTURE_SAMPLERS]; 2206 for (u32 i = 0; i < num_textures; i++) 2207 { 2208 src_handles[i] = m_current_textures[i] ? m_current_textures[i]->GetSRVDescriptor() : m_null_srv_descriptor; 2209 src_sizes[i] = 1; 2210 } 2211 m_device->CopyDescriptors(1, &gpu_handle.cpu_handle, &num_textures, num_textures, src_handles, src_sizes, 2212 D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV); 2213 } 2214 2215 cmdlist->SetGraphicsRootDescriptorTable(0, gpu_handle); 2216 } 2217 2218 if (dirty & DIRTY_FLAG_SAMPLERS && num_textures > 0) 2219 { 2220 auto& allocator = m_command_lists[m_current_command_list].sampler_allocator; 2221 D3D12DescriptorHandle gpu_handle; 2222 if constexpr (num_textures == 1) 2223 { 2224 if (!allocator.LookupSingle(m_device.Get(), &gpu_handle, m_current_samplers[0])) 2225 return false; 2226 } 2227 else 2228 { 2229 if (!allocator.LookupGroup(m_device.Get(), &gpu_handle, m_current_samplers.data())) 2230 return false; 2231 } 2232 2233 cmdlist->SetGraphicsRootDescriptorTable(1, gpu_handle); 2234 } 2235 2236 if (dirty & DIRTY_FLAG_TEXTURES && layout == GPUPipeline::Layout::SingleTextureBufferAndPushConstants) 2237 { 2238 D3D12DescriptorAllocator& allocator = m_command_lists[m_current_command_list].descriptor_allocator; 2239 D3D12DescriptorHandle gpu_handle; 2240 if (!allocator.Allocate(1, &gpu_handle)) 2241 return false; 2242 2243 m_device->CopyDescriptorsSimple( 2244 1, gpu_handle, m_current_texture_buffer ? m_current_texture_buffer->GetDescriptor() : m_null_srv_descriptor, 2245 D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV); 2246 cmdlist->SetGraphicsRootDescriptorTable(0, gpu_handle); 2247 } 2248 2249 if (dirty & DIRTY_FLAG_RT_UAVS) 2250 { 2251 DebugAssert(m_current_render_pass_flags & GPUPipeline::BindRenderTargetsAsImages); 2252 2253 D3D12DescriptorAllocator& allocator = m_command_lists[m_current_command_list].descriptor_allocator; 2254 D3D12DescriptorHandle gpu_handle; 2255 if (!allocator.Allocate(MAX_IMAGE_RENDER_TARGETS, &gpu_handle)) 2256 return false; 2257 2258 D3D12_CPU_DESCRIPTOR_HANDLE src_handles[MAX_IMAGE_RENDER_TARGETS]; 2259 UINT src_sizes[MAX_IMAGE_RENDER_TARGETS]; 2260 const UINT dst_size = MAX_IMAGE_RENDER_TARGETS; 2261 for (u32 i = 0; i < MAX_IMAGE_RENDER_TARGETS; i++) 2262 { 2263 src_handles[i] = 2264 m_current_render_targets[i] ? m_current_render_targets[i]->GetSRVDescriptor() : m_null_srv_descriptor; 2265 src_sizes[i] = 1; 2266 } 2267 m_device->CopyDescriptors(1, &gpu_handle.cpu_handle, &dst_size, MAX_IMAGE_RENDER_TARGETS, src_handles, src_sizes, 2268 D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV); 2269 2270 constexpr u32 rov_param = 2271 (layout == GPUPipeline::Layout::SingleTextureBufferAndPushConstants) ? 2272 1 : 2273 ((layout == GPUPipeline::Layout::SingleTextureAndUBO || layout == GPUPipeline::Layout::MultiTextureAndUBO) ? 3 : 2274 2); 2275 cmdlist->SetGraphicsRootDescriptorTable(rov_param, gpu_handle); 2276 } 2277 2278 return true; 2279 } 2280 2281 bool D3D12Device::UpdateRootParameters(u32 dirty) 2282 { 2283 switch (m_current_pipeline_layout) 2284 { 2285 case GPUPipeline::Layout::SingleTextureAndUBO: 2286 return UpdateParametersForLayout<GPUPipeline::Layout::SingleTextureAndUBO>(dirty); 2287 2288 case GPUPipeline::Layout::SingleTextureAndPushConstants: 2289 return UpdateParametersForLayout<GPUPipeline::Layout::SingleTextureAndPushConstants>(dirty); 2290 2291 case GPUPipeline::Layout::SingleTextureBufferAndPushConstants: 2292 return UpdateParametersForLayout<GPUPipeline::Layout::SingleTextureBufferAndPushConstants>(dirty); 2293 2294 case GPUPipeline::Layout::MultiTextureAndUBO: 2295 return UpdateParametersForLayout<GPUPipeline::Layout::MultiTextureAndUBO>(dirty); 2296 2297 case GPUPipeline::Layout::MultiTextureAndPushConstants: 2298 return UpdateParametersForLayout<GPUPipeline::Layout::MultiTextureAndPushConstants>(dirty); 2299 2300 default: 2301 UnreachableCode(); 2302 } 2303 } 2304 2305 void D3D12Device::Draw(u32 vertex_count, u32 base_vertex) 2306 { 2307 PreDrawCheck(); 2308 s_stats.num_draws++; 2309 GetCommandList()->DrawInstanced(vertex_count, 1, base_vertex, 0); 2310 } 2311 2312 void D3D12Device::DrawIndexed(u32 index_count, u32 base_index, u32 base_vertex) 2313 { 2314 PreDrawCheck(); 2315 s_stats.num_draws++; 2316 GetCommandList()->DrawIndexedInstanced(index_count, 1, base_index, base_vertex, 0); 2317 } 2318 2319 void D3D12Device::DrawIndexedWithBarrier(u32 index_count, u32 base_index, u32 base_vertex, DrawBarrier type) 2320 { 2321 Panic("Barriers are not supported"); 2322 }