duckstation

duckstation, but archived from the revision just before upstream changed it to a proprietary software project, this version is the libre one
git clone https://git.neptards.moe/u3shit/duckstation.git
Log | Files | Refs | README | LICENSE

d3d12_device.cpp (77993B)


      1 // SPDX-FileCopyrightText: 2019-2024 Connor McLaughlin <stenzek@gmail.com>
      2 // SPDX-License-Identifier: (GPL-3.0 OR PolyForm-Strict-1.0.0)
      3 
      4 #include "d3d12_device.h"
      5 #include "d3d12_builders.h"
      6 #include "d3d12_pipeline.h"
      7 #include "d3d12_stream_buffer.h"
      8 #include "d3d12_texture.h"
      9 #include "d3d_common.h"
     10 
     11 #include "core/host.h"
     12 
     13 #include "common/align.h"
     14 #include "common/assert.h"
     15 #include "common/bitutils.h"
     16 #include "common/error.h"
     17 #include "common/file_system.h"
     18 #include "common/log.h"
     19 #include "common/path.h"
     20 #include "common/scoped_guard.h"
     21 #include "common/small_string.h"
     22 #include "common/string_util.h"
     23 
     24 #include "D3D12MemAlloc.h"
     25 #include "fmt/format.h"
     26 
     27 #include <limits>
     28 #include <mutex>
     29 
     30 Log_SetChannel(D3D12Device);
     31 
     32 // Tweakables
     33 enum : u32
     34 {
     35   MAX_DRAW_CALLS_PER_FRAME = 2048,
     36   MAX_DESCRIPTORS_PER_FRAME = 32768,
     37   MAX_SAMPLERS_PER_FRAME = D3D12_MAX_SHADER_VISIBLE_SAMPLER_HEAP_SIZE,
     38   MAX_DESCRIPTOR_SETS_PER_FRAME = MAX_DRAW_CALLS_PER_FRAME,
     39 
     40   MAX_PERSISTENT_DESCRIPTORS = 2048,
     41   MAX_PERSISTENT_RTVS = 512,
     42   MAX_PERSISTENT_DSVS = 128,
     43   MAX_PERSISTENT_SAMPLERS = 512,
     44 
     45   VERTEX_BUFFER_SIZE = 32 * 1024 * 1024,
     46   INDEX_BUFFER_SIZE = 16 * 1024 * 1024,
     47   VERTEX_UNIFORM_BUFFER_SIZE = 8 * 1024 * 1024,
     48   FRAGMENT_UNIFORM_BUFFER_SIZE = 8 * 1024 * 1024,
     49   TEXTURE_BUFFER_SIZE = 64 * 1024 * 1024,
     50 
     51   // UNIFORM_PUSH_CONSTANTS_STAGES = VK_SHADER_STAGE_VERTEX_BIT | VK_SHADER_STAGE_FRAGMENT_BIT,
     52   UNIFORM_PUSH_CONSTANTS_SIZE = 128,
     53 
     54   MAX_UNIFORM_BUFFER_SIZE = 1024,
     55 };
     56 
     57 // We need to synchronize instance creation because of adapter enumeration from the UI thread.
     58 static std::mutex s_instance_mutex;
     59 
     60 static constexpr GPUTexture::Format s_swap_chain_format = GPUTexture::Format::RGBA8;
     61 
     62 // We just need to keep this alive, never reference it.
     63 static DynamicHeapArray<u8> s_pipeline_cache_data;
     64 
     65 #ifdef _DEBUG
     66 #include "WinPixEventRuntime/pix3.h"
     67 static u32 s_debug_scope_depth = 0;
     68 #endif
     69 
     70 D3D12Device::D3D12Device()
     71 {
     72 #ifdef _DEBUG
     73   s_debug_scope_depth = 0;
     74 #endif
     75 }
     76 
     77 D3D12Device::~D3D12Device()
     78 {
     79   Assert(!m_device);
     80   Assert(s_pipeline_cache_data.empty());
     81 }
     82 
     83 D3D12Device::ComPtr<ID3DBlob> D3D12Device::SerializeRootSignature(const D3D12_ROOT_SIGNATURE_DESC* desc, Error* error)
     84 {
     85   ComPtr<ID3DBlob> blob;
     86   ComPtr<ID3DBlob> error_blob;
     87   const HRESULT hr =
     88     D3D12SerializeRootSignature(desc, D3D_ROOT_SIGNATURE_VERSION_1, blob.GetAddressOf(), error_blob.GetAddressOf());
     89   if (FAILED(hr)) [[unlikely]]
     90   {
     91     Error::SetHResult(error, "D3D12SerializeRootSignature() failed: ", hr);
     92     if (error_blob)
     93       ERROR_LOG(static_cast<const char*>(error_blob->GetBufferPointer()));
     94 
     95     return {};
     96   }
     97 
     98   return blob;
     99 }
    100 
    101 D3D12Device::ComPtr<ID3D12RootSignature> D3D12Device::CreateRootSignature(const D3D12_ROOT_SIGNATURE_DESC* desc,
    102                                                                           Error* error)
    103 {
    104   ComPtr<ID3DBlob> blob = SerializeRootSignature(desc, error);
    105   if (!blob)
    106     return {};
    107 
    108   ComPtr<ID3D12RootSignature> rs;
    109   const HRESULT hr =
    110     m_device->CreateRootSignature(0, blob->GetBufferPointer(), blob->GetBufferSize(), IID_PPV_ARGS(rs.GetAddressOf()));
    111   if (FAILED(hr)) [[unlikely]]
    112   {
    113     Error::SetHResult(error, "CreateRootSignature() failed: ", hr);
    114     return {};
    115   }
    116 
    117   return rs;
    118 }
    119 
    120 bool D3D12Device::CreateDevice(std::string_view adapter, bool threaded_presentation,
    121                                std::optional<bool> exclusive_fullscreen_control, FeatureMask disabled_features,
    122                                Error* error)
    123 {
    124   std::unique_lock lock(s_instance_mutex);
    125 
    126   m_dxgi_factory = D3DCommon::CreateFactory(m_debug_device, error);
    127   if (!m_dxgi_factory)
    128     return false;
    129 
    130   m_adapter = D3DCommon::GetAdapterByName(m_dxgi_factory.Get(), adapter);
    131 
    132   HRESULT hr = S_OK;
    133 
    134   // Enabling the debug layer will fail if the Graphics Tools feature is not installed.
    135   if (m_debug_device)
    136   {
    137     ComPtr<ID3D12Debug> debug12;
    138     hr = D3D12GetDebugInterface(IID_PPV_ARGS(debug12.GetAddressOf()));
    139     if (SUCCEEDED(hr))
    140     {
    141       debug12->EnableDebugLayer();
    142     }
    143     else
    144     {
    145       ERROR_LOG("Debug layer requested but not available.");
    146       m_debug_device = false;
    147     }
    148   }
    149 
    150   // Create the actual device.
    151   for (D3D_FEATURE_LEVEL try_feature_level : {D3D_FEATURE_LEVEL_11_0})
    152   {
    153     hr = D3D12CreateDevice(m_adapter.Get(), try_feature_level, IID_PPV_ARGS(&m_device));
    154     if (SUCCEEDED(hr))
    155     {
    156       m_feature_level = try_feature_level;
    157       break;
    158     }
    159   }
    160   if (FAILED(hr))
    161   {
    162     Error::SetHResult(error, "Failed to create D3D12 device: ", hr);
    163     return false;
    164   }
    165 
    166   if (!m_adapter)
    167   {
    168     const LUID luid(m_device->GetAdapterLuid());
    169     if (FAILED(m_dxgi_factory->EnumAdapterByLuid(luid, IID_PPV_ARGS(m_adapter.GetAddressOf()))))
    170       ERROR_LOG("Failed to get lookup adapter by device LUID");
    171   }
    172 
    173   if (m_debug_device)
    174   {
    175     ComPtr<ID3D12InfoQueue> info_queue;
    176     if (SUCCEEDED(m_device.As(&info_queue)))
    177     {
    178       if (IsDebuggerPresent())
    179       {
    180         info_queue->SetBreakOnSeverity(D3D12_MESSAGE_SEVERITY_ERROR, TRUE);
    181         info_queue->SetBreakOnSeverity(D3D12_MESSAGE_SEVERITY_WARNING, TRUE);
    182       }
    183 
    184       D3D12_INFO_QUEUE_FILTER filter = {};
    185       std::array<D3D12_MESSAGE_ID, 6> id_list{
    186         D3D12_MESSAGE_ID_CLEARRENDERTARGETVIEW_MISMATCHINGCLEARVALUE,
    187         D3D12_MESSAGE_ID_CLEARDEPTHSTENCILVIEW_MISMATCHINGCLEARVALUE,
    188         D3D12_MESSAGE_ID_CREATEGRAPHICSPIPELINESTATE_RENDERTARGETVIEW_NOT_SET,
    189         D3D12_MESSAGE_ID_CREATEINPUTLAYOUT_TYPE_MISMATCH,
    190         D3D12_MESSAGE_ID_DRAW_EMPTY_SCISSOR_RECTANGLE,
    191         D3D12_MESSAGE_ID_LOADPIPELINE_NAMENOTFOUND,
    192       };
    193       filter.DenyList.NumIDs = static_cast<UINT>(id_list.size());
    194       filter.DenyList.pIDList = id_list.data();
    195       info_queue->PushStorageFilter(&filter);
    196     }
    197   }
    198 
    199   const D3D12_COMMAND_QUEUE_DESC queue_desc = {D3D12_COMMAND_LIST_TYPE_DIRECT, D3D12_COMMAND_QUEUE_PRIORITY_NORMAL,
    200                                                D3D12_COMMAND_QUEUE_FLAG_NONE, 0u};
    201   hr = m_device->CreateCommandQueue(&queue_desc, IID_PPV_ARGS(&m_command_queue));
    202   if (FAILED(hr))
    203   {
    204     Error::SetHResult(error, "Failed to create command queue: ", hr);
    205     return false;
    206   }
    207 
    208   D3D12MA::ALLOCATOR_DESC allocatorDesc = {};
    209   allocatorDesc.pDevice = m_device.Get();
    210   allocatorDesc.pAdapter = m_adapter.Get();
    211   allocatorDesc.Flags =
    212     D3D12MA::ALLOCATOR_FLAG_SINGLETHREADED |
    213     D3D12MA::ALLOCATOR_FLAG_DEFAULT_POOLS_NOT_ZEROED /* | D3D12MA::ALLOCATOR_FLAG_ALWAYS_COMMITTED*/;
    214 
    215   hr = D3D12MA::CreateAllocator(&allocatorDesc, m_allocator.GetAddressOf());
    216   if (FAILED(hr))
    217   {
    218     Error::SetHResult(error, "D3D12MA::CreateAllocator() failed: ", hr);
    219     return false;
    220   }
    221 
    222   hr = m_device->CreateFence(m_completed_fence_value, D3D12_FENCE_FLAG_NONE, IID_PPV_ARGS(&m_fence));
    223   if (FAILED(hr))
    224   {
    225     Error::SetHResult(error, "Failed to create fence: ", hr);
    226     return false;
    227   }
    228 
    229   m_fence_event = CreateEvent(nullptr, FALSE, FALSE, nullptr);
    230   if (m_fence_event == NULL)
    231   {
    232     Error::SetWin32(error, "Failed to create fence event: ", GetLastError());
    233     return false;
    234   }
    235 
    236   SetFeatures(disabled_features);
    237 
    238   if (!CreateCommandLists(error) || !CreateDescriptorHeaps(error))
    239     return false;
    240 
    241   if (!m_window_info.IsSurfaceless() && !CreateSwapChain(error))
    242     return false;
    243 
    244   if (!CreateRootSignatures(error) || !CreateBuffers(error))
    245     return false;
    246 
    247   CreateTimestampQuery();
    248   return true;
    249 }
    250 
    251 void D3D12Device::DestroyDevice()
    252 {
    253   std::unique_lock lock(s_instance_mutex);
    254 
    255   // Toss command list if we're recording...
    256   if (InRenderPass())
    257     EndRenderPass();
    258 
    259   WaitForGPUIdle();
    260 
    261   DestroyDeferredObjects(m_current_fence_value);
    262   DestroySamplers();
    263   DestroyTimestampQuery();
    264   DestroyBuffers();
    265   DestroyDescriptorHeaps();
    266   DestroyRootSignatures();
    267   DestroySwapChain();
    268   DestroyCommandLists();
    269 
    270   m_pipeline_library.Reset();
    271   s_pipeline_cache_data.deallocate();
    272   m_fence.Reset();
    273   if (m_fence_event != NULL)
    274   {
    275     CloseHandle(m_fence_event);
    276     m_fence_event = NULL;
    277   }
    278 
    279   m_allocator.Reset();
    280   m_command_queue.Reset();
    281   m_device.Reset();
    282   m_adapter.Reset();
    283   m_dxgi_factory.Reset();
    284 }
    285 
    286 bool D3D12Device::ReadPipelineCache(std::optional<DynamicHeapArray<u8>> data)
    287 {
    288   HRESULT hr =
    289     m_device->CreatePipelineLibrary(data.has_value() ? data->data() : nullptr, data.has_value() ? data->size() : 0,
    290                                     IID_PPV_ARGS(m_pipeline_library.ReleaseAndGetAddressOf()));
    291   if (SUCCEEDED(hr))
    292   {
    293     if (data.has_value())
    294       s_pipeline_cache_data = std::move(data.value());
    295 
    296     return true;
    297   }
    298 
    299   // Try without the cache data.
    300   if (data.has_value())
    301   {
    302     WARNING_LOG("CreatePipelineLibrary() failed, trying without cache data. Error: {}",
    303                 Error::CreateHResult(hr).GetDescription());
    304 
    305     hr = m_device->CreatePipelineLibrary(nullptr, 0, IID_PPV_ARGS(m_pipeline_library.ReleaseAndGetAddressOf()));
    306     if (SUCCEEDED(hr))
    307       return true;
    308   }
    309 
    310   if (FAILED(hr))
    311   {
    312     WARNING_LOG("CreatePipelineLibrary() failed, pipeline caching will not be available. Error: {}",
    313                 Error::CreateHResult(hr).GetDescription());
    314     return false;
    315   }
    316 
    317   return true;
    318 }
    319 
    320 bool D3D12Device::GetPipelineCacheData(DynamicHeapArray<u8>* data)
    321 {
    322   if (!m_pipeline_library)
    323     return false;
    324 
    325   const size_t size = m_pipeline_library->GetSerializedSize();
    326   if (size == 0)
    327   {
    328     WARNING_LOG("Empty serialized pipeline state returned.");
    329     return true;
    330   }
    331 
    332   data->resize(size);
    333   const HRESULT hr = m_pipeline_library->Serialize(data->data(), data->size());
    334   if (FAILED(hr))
    335   {
    336     ERROR_LOG("Serialize() failed with HRESULT {:08X}", static_cast<unsigned>(hr));
    337     data->deallocate();
    338     return false;
    339   }
    340 
    341   return true;
    342 }
    343 
    344 bool D3D12Device::CreateCommandLists(Error* error)
    345 {
    346   for (u32 i = 0; i < NUM_COMMAND_LISTS; i++)
    347   {
    348     CommandList& res = m_command_lists[i];
    349     HRESULT hr;
    350 
    351     for (u32 j = 0; j < 2; j++)
    352     {
    353       hr = m_device->CreateCommandAllocator(D3D12_COMMAND_LIST_TYPE_DIRECT,
    354                                             IID_PPV_ARGS(res.command_allocators[j].GetAddressOf()));
    355       if (FAILED(hr))
    356       {
    357         Error::SetHResult(error, "CreateCommandAllocator() failed: ", hr);
    358         return false;
    359       }
    360 
    361       hr = m_device->CreateCommandList(0, D3D12_COMMAND_LIST_TYPE_DIRECT, res.command_allocators[j].Get(), nullptr,
    362                                        IID_PPV_ARGS(res.command_lists[j].GetAddressOf()));
    363       if (FAILED(hr))
    364       {
    365         Error::SetHResult(error, "CreateCommandList() failed: ", hr);
    366         return false;
    367       }
    368 
    369       // Close the command lists, since the first thing we do is reset them.
    370       hr = res.command_lists[j]->Close();
    371       if (FAILED(hr))
    372       {
    373         Error::SetHResult(error, "Close() for new command list failed: ", hr);
    374         return false;
    375       }
    376     }
    377 
    378     if (!res.descriptor_allocator.Create(m_device.Get(), D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV,
    379                                          MAX_DESCRIPTORS_PER_FRAME, error))
    380     {
    381       Error::AddPrefix(error, "Failed to create per frame descriptor allocator: ");
    382       return false;
    383     }
    384 
    385     if (!res.sampler_allocator.Create(m_device.Get(), MAX_SAMPLERS_PER_FRAME, error))
    386     {
    387       Error::AddPrefix(error, "Failed to create per frame sampler allocator: ");
    388       return false;
    389     }
    390   }
    391 
    392   MoveToNextCommandList();
    393   return true;
    394 }
    395 
    396 void D3D12Device::MoveToNextCommandList()
    397 {
    398   m_current_command_list = (m_current_command_list + 1) % NUM_COMMAND_LISTS;
    399   m_current_fence_value++;
    400 
    401   // We may have to wait if this command list hasn't finished on the GPU.
    402   CommandList& res = m_command_lists[m_current_command_list];
    403   WaitForFence(res.fence_counter);
    404   res.fence_counter = m_current_fence_value;
    405   res.init_list_used = false;
    406 
    407   // Begin command list.
    408   res.command_allocators[1]->Reset();
    409   res.command_lists[1]->Reset(res.command_allocators[1].Get(), nullptr);
    410   res.descriptor_allocator.Reset();
    411   if (res.sampler_allocator.ShouldReset())
    412     res.sampler_allocator.Reset();
    413 
    414   if (res.has_timestamp_query)
    415   {
    416     // readback timestamp from the last time this cmdlist was used.
    417     // we don't need to worry about disjoint in dx12, the frequency is reliable within a single cmdlist.
    418     const u32 offset = (m_current_command_list * (sizeof(u64) * NUM_TIMESTAMP_QUERIES_PER_CMDLIST));
    419     const D3D12_RANGE read_range = {offset, offset + (sizeof(u64) * NUM_TIMESTAMP_QUERIES_PER_CMDLIST)};
    420     void* map;
    421     HRESULT hr = m_timestamp_query_buffer->Map(0, &read_range, &map);
    422     if (SUCCEEDED(hr))
    423     {
    424       u64 timestamps[2];
    425       std::memcpy(timestamps, static_cast<const u8*>(map) + offset, sizeof(timestamps));
    426       m_accumulated_gpu_time +=
    427         static_cast<float>(static_cast<double>(timestamps[1] - timestamps[0]) / m_timestamp_frequency);
    428 
    429       const D3D12_RANGE write_range = {};
    430       m_timestamp_query_buffer->Unmap(0, &write_range);
    431     }
    432     else
    433     {
    434       WARNING_LOG("Map() for timestamp query failed: {:08X}", static_cast<unsigned>(hr));
    435     }
    436   }
    437 
    438   res.has_timestamp_query = m_gpu_timing_enabled;
    439   if (m_gpu_timing_enabled)
    440   {
    441     res.command_lists[1]->EndQuery(m_timestamp_query_heap.Get(), D3D12_QUERY_TYPE_TIMESTAMP,
    442                                    m_current_command_list * NUM_TIMESTAMP_QUERIES_PER_CMDLIST);
    443   }
    444 
    445   ID3D12DescriptorHeap* heaps[2] = {res.descriptor_allocator.GetDescriptorHeap(),
    446                                     res.sampler_allocator.GetDescriptorHeap()};
    447   res.command_lists[1]->SetDescriptorHeaps(static_cast<UINT>(std::size(heaps)), heaps);
    448 
    449   m_allocator->SetCurrentFrameIndex(static_cast<UINT>(m_current_fence_value));
    450   InvalidateCachedState();
    451 }
    452 
    453 void D3D12Device::DestroyCommandLists()
    454 {
    455   for (CommandList& resources : m_command_lists)
    456   {
    457     resources.descriptor_allocator.Destroy();
    458     resources.sampler_allocator.Destroy();
    459     for (u32 i = 0; i < 2; i++)
    460     {
    461       resources.command_lists[i].Reset();
    462       resources.command_allocators[i].Reset();
    463     }
    464   }
    465 }
    466 
    467 bool D3D12Device::CreateDescriptorHeaps(Error* error)
    468 {
    469   if (!m_descriptor_heap_manager.Create(m_device.Get(), D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV,
    470                                         MAX_PERSISTENT_DESCRIPTORS, false, error) ||
    471       !m_rtv_heap_manager.Create(m_device.Get(), D3D12_DESCRIPTOR_HEAP_TYPE_RTV, MAX_PERSISTENT_RTVS, false, error) ||
    472       !m_dsv_heap_manager.Create(m_device.Get(), D3D12_DESCRIPTOR_HEAP_TYPE_DSV, MAX_PERSISTENT_DSVS, false, error) ||
    473       !m_sampler_heap_manager.Create(m_device.Get(), D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER, MAX_PERSISTENT_SAMPLERS, false,
    474                                      error))
    475   {
    476     return false;
    477   }
    478 
    479   // Allocate null SRV descriptor for unbound textures.
    480   static constexpr D3D12_SHADER_RESOURCE_VIEW_DESC null_srv_desc = {
    481     DXGI_FORMAT_R8G8B8A8_UNORM, D3D12_SRV_DIMENSION_TEXTURE2D, D3D12_DEFAULT_SHADER_4_COMPONENT_MAPPING, {}};
    482   if (!m_descriptor_heap_manager.Allocate(&m_null_srv_descriptor))
    483   {
    484     Error::SetStringView(error, "Failed to allocate null SRV descriptor");
    485     return false;
    486   }
    487   m_device->CreateShaderResourceView(nullptr, &null_srv_desc, m_null_srv_descriptor.cpu_handle);
    488 
    489   // Same for UAVs.
    490   static constexpr D3D12_UNORDERED_ACCESS_VIEW_DESC null_uav_desc = {
    491     DXGI_FORMAT_R8G8B8A8_UNORM, D3D12_UAV_DIMENSION_TEXTURE2D, {}};
    492   if (!m_descriptor_heap_manager.Allocate(&m_null_uav_descriptor))
    493   {
    494     Error::SetStringView(error, "Failed to allocate null UAV descriptor");
    495     return false;
    496   }
    497   m_device->CreateUnorderedAccessView(nullptr, nullptr, &null_uav_desc, m_null_uav_descriptor.cpu_handle);
    498 
    499   // Same for samplers.
    500   m_point_sampler = GetSampler(GPUSampler::GetNearestConfig());
    501   for (u32 i = 0; i < MAX_TEXTURE_SAMPLERS; i++)
    502     m_current_samplers[i] = m_point_sampler;
    503   return true;
    504 }
    505 
    506 void D3D12Device::DestroyDescriptorHeaps()
    507 {
    508   if (m_null_uav_descriptor)
    509     m_descriptor_heap_manager.Free(&m_null_uav_descriptor);
    510   if (m_null_srv_descriptor)
    511     m_descriptor_heap_manager.Free(&m_null_srv_descriptor);
    512   m_sampler_heap_manager.Destroy();
    513   m_dsv_heap_manager.Destroy();
    514   m_rtv_heap_manager.Destroy();
    515   m_descriptor_heap_manager.Destroy();
    516 }
    517 
    518 ID3D12GraphicsCommandList4* D3D12Device::GetInitCommandList()
    519 {
    520   CommandList& res = m_command_lists[m_current_command_list];
    521   if (!res.init_list_used)
    522   {
    523     HRESULT hr = res.command_allocators[0]->Reset();
    524     AssertMsg(SUCCEEDED(hr), "Reset init command allocator failed");
    525 
    526     hr = res.command_lists[0]->Reset(res.command_allocators[0].Get(), nullptr);
    527     AssertMsg(SUCCEEDED(hr), "Reset init command list failed");
    528     res.init_list_used = true;
    529   }
    530 
    531   return res.command_lists[0].Get();
    532 }
    533 
    534 void D3D12Device::SubmitCommandList(bool wait_for_completion)
    535 {
    536   CommandList& res = m_command_lists[m_current_command_list];
    537   HRESULT hr;
    538 
    539   if (res.has_timestamp_query)
    540   {
    541     // write the timestamp back at the end of the cmdlist
    542     res.command_lists[1]->EndQuery(m_timestamp_query_heap.Get(), D3D12_QUERY_TYPE_TIMESTAMP,
    543                                    (m_current_command_list * NUM_TIMESTAMP_QUERIES_PER_CMDLIST) + 1);
    544     res.command_lists[1]->ResolveQueryData(m_timestamp_query_heap.Get(), D3D12_QUERY_TYPE_TIMESTAMP,
    545                                            m_current_command_list * NUM_TIMESTAMP_QUERIES_PER_CMDLIST,
    546                                            NUM_TIMESTAMP_QUERIES_PER_CMDLIST, m_timestamp_query_buffer.Get(),
    547                                            m_current_command_list * (sizeof(u64) * NUM_TIMESTAMP_QUERIES_PER_CMDLIST));
    548   }
    549 
    550   // TODO: error handling
    551   if (res.init_list_used)
    552   {
    553     hr = res.command_lists[0]->Close();
    554     if (FAILED(hr)) [[unlikely]]
    555     {
    556       ERROR_LOG("Closing init command list failed with HRESULT {:08X}", static_cast<unsigned>(hr));
    557       Panic("TODO cannot continue");
    558     }
    559   }
    560 
    561   // Close and queue command list.
    562   hr = res.command_lists[1]->Close();
    563   if (FAILED(hr)) [[unlikely]]
    564   {
    565     ERROR_LOG("Closing main command list failed with HRESULT {:08X}", static_cast<unsigned>(hr));
    566     Panic("TODO cannot continue");
    567   }
    568 
    569   if (res.init_list_used)
    570   {
    571     const std::array<ID3D12CommandList*, 2> execute_lists{res.command_lists[0].Get(), res.command_lists[1].Get()};
    572     m_command_queue->ExecuteCommandLists(static_cast<UINT>(execute_lists.size()), execute_lists.data());
    573   }
    574   else
    575   {
    576     const std::array<ID3D12CommandList*, 1> execute_lists{res.command_lists[1].Get()};
    577     m_command_queue->ExecuteCommandLists(static_cast<UINT>(execute_lists.size()), execute_lists.data());
    578   }
    579 
    580   // Update fence when GPU has completed.
    581   hr = m_command_queue->Signal(m_fence.Get(), res.fence_counter);
    582   DebugAssertMsg(SUCCEEDED(hr), "Signal fence");
    583 
    584   MoveToNextCommandList();
    585 
    586   if (wait_for_completion)
    587     WaitForFence(res.fence_counter);
    588 }
    589 
    590 void D3D12Device::SubmitCommandList(bool wait_for_completion, const std::string_view reason)
    591 {
    592   WARNING_LOG("Executing command buffer due to '{}'", reason);
    593   SubmitCommandList(wait_for_completion);
    594 }
    595 
    596 void D3D12Device::SubmitCommandListAndRestartRenderPass(const std::string_view reason)
    597 {
    598   if (InRenderPass())
    599     EndRenderPass();
    600 
    601   D3D12Pipeline* pl = m_current_pipeline;
    602   SubmitCommandList(false, reason);
    603 
    604   SetPipeline(pl);
    605   BeginRenderPass();
    606 }
    607 
    608 void D3D12Device::WaitForFence(u64 fence)
    609 {
    610   if (m_completed_fence_value >= fence)
    611     return;
    612 
    613   // Try non-blocking check.
    614   m_completed_fence_value = m_fence->GetCompletedValue();
    615   if (m_completed_fence_value < fence)
    616   {
    617     // Fall back to event.
    618     HRESULT hr = m_fence->SetEventOnCompletion(fence, m_fence_event);
    619     AssertMsg(SUCCEEDED(hr), "Set fence event on completion");
    620     WaitForSingleObject(m_fence_event, INFINITE);
    621     m_completed_fence_value = m_fence->GetCompletedValue();
    622   }
    623 
    624   // Release resources for as many command lists which have completed.
    625   DestroyDeferredObjects(m_completed_fence_value);
    626 }
    627 
    628 void D3D12Device::WaitForGPUIdle()
    629 {
    630   u32 index = (m_current_command_list + 1) % NUM_COMMAND_LISTS;
    631   for (u32 i = 0; i < (NUM_COMMAND_LISTS - 1); i++)
    632   {
    633     WaitForFence(m_command_lists[index].fence_counter);
    634     index = (index + 1) % NUM_COMMAND_LISTS;
    635   }
    636 }
    637 
    638 void D3D12Device::ExecuteAndWaitForGPUIdle()
    639 {
    640   if (InRenderPass())
    641     EndRenderPass();
    642 
    643   SubmitCommandList(true);
    644 }
    645 
    646 bool D3D12Device::CreateTimestampQuery()
    647 {
    648   constexpr u32 QUERY_COUNT = NUM_TIMESTAMP_QUERIES_PER_CMDLIST * NUM_COMMAND_LISTS;
    649   constexpr u32 BUFFER_SIZE = sizeof(u64) * QUERY_COUNT;
    650 
    651   const D3D12_QUERY_HEAP_DESC desc = {D3D12_QUERY_HEAP_TYPE_TIMESTAMP, QUERY_COUNT, 0u};
    652   HRESULT hr = m_device->CreateQueryHeap(&desc, IID_PPV_ARGS(m_timestamp_query_heap.GetAddressOf()));
    653   if (FAILED(hr))
    654   {
    655     ERROR_LOG("CreateQueryHeap() for timestamp failed with {:08X}", static_cast<unsigned>(hr));
    656     m_features.gpu_timing = false;
    657     return false;
    658   }
    659 
    660   const D3D12MA::ALLOCATION_DESC allocation_desc = {D3D12MA::ALLOCATION_FLAG_NONE, D3D12_HEAP_TYPE_READBACK,
    661                                                     D3D12_HEAP_FLAG_NONE, nullptr, nullptr};
    662   const D3D12_RESOURCE_DESC resource_desc = {D3D12_RESOURCE_DIMENSION_BUFFER,
    663                                              0,
    664                                              BUFFER_SIZE,
    665                                              1,
    666                                              1,
    667                                              1,
    668                                              DXGI_FORMAT_UNKNOWN,
    669                                              {1, 0},
    670                                              D3D12_TEXTURE_LAYOUT_ROW_MAJOR,
    671                                              D3D12_RESOURCE_FLAG_NONE};
    672   hr = m_allocator->CreateResource(&allocation_desc, &resource_desc, D3D12_RESOURCE_STATE_COPY_DEST, nullptr,
    673                                    m_timestamp_query_allocation.GetAddressOf(),
    674                                    IID_PPV_ARGS(m_timestamp_query_buffer.GetAddressOf()));
    675   if (FAILED(hr))
    676   {
    677     ERROR_LOG("CreateResource() for timestamp failed with {:08X}", static_cast<unsigned>(hr));
    678     m_features.gpu_timing = false;
    679     return false;
    680   }
    681 
    682   u64 frequency;
    683   hr = m_command_queue->GetTimestampFrequency(&frequency);
    684   if (FAILED(hr))
    685   {
    686     ERROR_LOG("GetTimestampFrequency() failed: {:08X}", static_cast<unsigned>(hr));
    687     m_features.gpu_timing = false;
    688     return false;
    689   }
    690 
    691   m_timestamp_frequency = static_cast<double>(frequency) / 1000.0;
    692   return true;
    693 }
    694 
    695 void D3D12Device::DestroyTimestampQuery()
    696 {
    697   m_timestamp_query_buffer.Reset();
    698   m_timestamp_query_allocation.Reset();
    699   m_timestamp_query_heap.Reset();
    700 }
    701 
    702 float D3D12Device::GetAndResetAccumulatedGPUTime()
    703 {
    704   const float time = m_accumulated_gpu_time;
    705   m_accumulated_gpu_time = 0.0f;
    706   return time;
    707 }
    708 
    709 bool D3D12Device::SetGPUTimingEnabled(bool enabled)
    710 {
    711   m_gpu_timing_enabled = enabled && m_features.gpu_timing;
    712   return (enabled == m_gpu_timing_enabled);
    713 }
    714 
    715 void D3D12Device::DeferObjectDestruction(ComPtr<ID3D12Object> resource)
    716 {
    717   DebugAssert(resource);
    718   m_cleanup_resources.emplace_back(GetCurrentFenceValue(),
    719                                    std::pair<D3D12MA::Allocation*, ID3D12Object*>(nullptr, resource.Detach()));
    720 }
    721 
    722 void D3D12Device::DeferResourceDestruction(ComPtr<D3D12MA::Allocation> allocation, ComPtr<ID3D12Resource> resource)
    723 {
    724   DebugAssert(allocation && resource);
    725   m_cleanup_resources.emplace_back(
    726     GetCurrentFenceValue(), std::pair<D3D12MA::Allocation*, ID3D12Object*>(allocation.Detach(), resource.Detach()));
    727 }
    728 
    729 void D3D12Device::DeferDescriptorDestruction(D3D12DescriptorHeapManager& heap, D3D12DescriptorHandle* descriptor)
    730 {
    731   DebugAssert(descriptor->index != D3D12DescriptorHandle::INVALID_INDEX);
    732   m_cleanup_descriptors.emplace_back(GetCurrentFenceValue(),
    733                                      std::pair<D3D12DescriptorHeapManager*, D3D12DescriptorHandle>(&heap, *descriptor));
    734   descriptor->Clear();
    735 }
    736 
    737 void D3D12Device::DestroyDeferredObjects(u64 fence_value)
    738 {
    739   while (!m_cleanup_descriptors.empty())
    740   {
    741     auto& it = m_cleanup_descriptors.front();
    742     if (it.first > fence_value)
    743       break;
    744 
    745     it.second.first->Free(it.second.second.index);
    746     m_cleanup_descriptors.pop_front();
    747   }
    748 
    749   while (!m_cleanup_resources.empty())
    750   {
    751     auto& it = m_cleanup_resources.front();
    752     if (it.first > fence_value)
    753       break;
    754 
    755     it.second.second->Release();
    756     if (it.second.first)
    757       it.second.first->Release();
    758     m_cleanup_resources.pop_front();
    759   }
    760 }
    761 
    762 RenderAPI D3D12Device::GetRenderAPI() const
    763 {
    764   return RenderAPI::D3D12;
    765 }
    766 
    767 bool D3D12Device::HasSurface() const
    768 {
    769   return static_cast<bool>(m_swap_chain);
    770 }
    771 
    772 u32 D3D12Device::GetSwapChainBufferCount() const
    773 {
    774   // With vsync off, we only need two buffers. Same for blocking vsync.
    775   // With triple buffering, we need three.
    776   return (m_vsync_mode == GPUVSyncMode::Mailbox) ? 3 : 2;
    777 }
    778 
    779 bool D3D12Device::CreateSwapChain(Error* error)
    780 {
    781   if (m_window_info.type != WindowInfo::Type::Win32)
    782   {
    783     Error::SetStringView(error, "D3D12 expects a Win32 window.");
    784     return false;
    785   }
    786 
    787   const D3DCommon::DXGIFormatMapping& fm = D3DCommon::GetFormatMapping(s_swap_chain_format);
    788 
    789   const HWND window_hwnd = reinterpret_cast<HWND>(m_window_info.window_handle);
    790   RECT client_rc{};
    791   GetClientRect(window_hwnd, &client_rc);
    792 
    793   DXGI_MODE_DESC fullscreen_mode = {};
    794   ComPtr<IDXGIOutput> fullscreen_output;
    795   if (Host::IsFullscreen())
    796   {
    797     u32 fullscreen_width, fullscreen_height;
    798     float fullscreen_refresh_rate;
    799     m_is_exclusive_fullscreen =
    800       GetRequestedExclusiveFullscreenMode(&fullscreen_width, &fullscreen_height, &fullscreen_refresh_rate) &&
    801       D3DCommon::GetRequestedExclusiveFullscreenModeDesc(m_dxgi_factory.Get(), client_rc, fullscreen_width,
    802                                                          fullscreen_height, fullscreen_refresh_rate, fm.resource_format,
    803                                                          &fullscreen_mode, fullscreen_output.GetAddressOf());
    804 
    805     // Using mailbox-style no-allow-tearing causes tearing in exclusive fullscreen.
    806     if (m_vsync_mode == GPUVSyncMode::Mailbox && m_is_exclusive_fullscreen)
    807     {
    808       WARNING_LOG("Using FIFO instead of Mailbox vsync due to exclusive fullscreen.");
    809       m_vsync_mode = GPUVSyncMode::FIFO;
    810     }
    811   }
    812   else
    813   {
    814     m_is_exclusive_fullscreen = false;
    815   }
    816 
    817   DXGI_SWAP_CHAIN_DESC1 swap_chain_desc = {};
    818   swap_chain_desc.Width = static_cast<u32>(client_rc.right - client_rc.left);
    819   swap_chain_desc.Height = static_cast<u32>(client_rc.bottom - client_rc.top);
    820   swap_chain_desc.Format = fm.resource_format;
    821   swap_chain_desc.SampleDesc.Count = 1;
    822   swap_chain_desc.BufferCount = GetSwapChainBufferCount();
    823   swap_chain_desc.BufferUsage = DXGI_USAGE_RENDER_TARGET_OUTPUT;
    824   swap_chain_desc.SwapEffect = DXGI_SWAP_EFFECT_FLIP_DISCARD;
    825 
    826   m_using_allow_tearing = (m_allow_tearing_supported && !m_is_exclusive_fullscreen);
    827   if (m_using_allow_tearing)
    828     swap_chain_desc.Flags |= DXGI_SWAP_CHAIN_FLAG_ALLOW_TEARING;
    829 
    830   HRESULT hr = S_OK;
    831 
    832   if (m_is_exclusive_fullscreen)
    833   {
    834     DXGI_SWAP_CHAIN_DESC1 fs_sd_desc = swap_chain_desc;
    835     DXGI_SWAP_CHAIN_FULLSCREEN_DESC fs_desc = {};
    836 
    837     fs_sd_desc.Flags = DXGI_SWAP_CHAIN_FLAG_ALLOW_MODE_SWITCH;
    838     fs_sd_desc.Width = fullscreen_mode.Width;
    839     fs_sd_desc.Height = fullscreen_mode.Height;
    840     fs_desc.RefreshRate = fullscreen_mode.RefreshRate;
    841     fs_desc.ScanlineOrdering = fullscreen_mode.ScanlineOrdering;
    842     fs_desc.Scaling = fullscreen_mode.Scaling;
    843     fs_desc.Windowed = FALSE;
    844 
    845     VERBOSE_LOG("Creating a {}x{} exclusive fullscreen swap chain", fs_sd_desc.Width, fs_sd_desc.Height);
    846     hr = m_dxgi_factory->CreateSwapChainForHwnd(m_command_queue.Get(), window_hwnd, &fs_sd_desc, &fs_desc,
    847                                                 fullscreen_output.Get(), m_swap_chain.ReleaseAndGetAddressOf());
    848     if (FAILED(hr))
    849     {
    850       WARNING_LOG("Failed to create fullscreen swap chain, trying windowed.");
    851       m_is_exclusive_fullscreen = false;
    852       m_using_allow_tearing = m_allow_tearing_supported;
    853     }
    854   }
    855 
    856   if (!m_is_exclusive_fullscreen)
    857   {
    858     VERBOSE_LOG("Creating a {}x{} windowed swap chain", swap_chain_desc.Width, swap_chain_desc.Height);
    859     hr = m_dxgi_factory->CreateSwapChainForHwnd(m_command_queue.Get(), window_hwnd, &swap_chain_desc, nullptr, nullptr,
    860                                                 m_swap_chain.ReleaseAndGetAddressOf());
    861     if (FAILED(hr))
    862     {
    863       Error::SetHResult(error, "CreateSwapChainForHwnd() failed: ", hr);
    864       return false;
    865     }
    866   }
    867 
    868   hr = m_dxgi_factory->MakeWindowAssociation(window_hwnd, DXGI_MWA_NO_WINDOW_CHANGES);
    869   if (FAILED(hr))
    870     WARNING_LOG("MakeWindowAssociation() to disable ALT+ENTER failed");
    871 
    872   if (!CreateSwapChainRTV(error))
    873   {
    874     DestroySwapChain();
    875     return false;
    876   }
    877 
    878   // Render a frame as soon as possible to clear out whatever was previously being displayed.
    879   RenderBlankFrame();
    880   return true;
    881 }
    882 
    883 bool D3D12Device::CreateSwapChainRTV(Error* error)
    884 {
    885   DXGI_SWAP_CHAIN_DESC swap_chain_desc;
    886   HRESULT hr = m_swap_chain->GetDesc(&swap_chain_desc);
    887   if (FAILED(hr))
    888   {
    889     Error::SetHResult(error, "GetDesc() for swap chain failed: ", hr);
    890     return false;
    891   }
    892 
    893   const D3D12_RENDER_TARGET_VIEW_DESC rtv_desc = {swap_chain_desc.BufferDesc.Format, D3D12_RTV_DIMENSION_TEXTURE2D, {}};
    894 
    895   for (u32 i = 0; i < swap_chain_desc.BufferCount; i++)
    896   {
    897     ComPtr<ID3D12Resource> backbuffer;
    898     hr = m_swap_chain->GetBuffer(i, IID_PPV_ARGS(backbuffer.GetAddressOf()));
    899     if (FAILED(hr))
    900     {
    901       Error::SetHResult(error, "GetBuffer for RTV failed: ", hr);
    902       DestroySwapChainRTVs();
    903       return false;
    904     }
    905 
    906     D3D12::SetObjectName(backbuffer.Get(), TinyString::from_format("Swap Chain Buffer #{}", i));
    907 
    908     D3D12DescriptorHandle rtv;
    909     if (!m_rtv_heap_manager.Allocate(&rtv))
    910     {
    911       Error::SetStringView(error, "Failed to allocate RTV handle.");
    912       DestroySwapChainRTVs();
    913       return false;
    914     }
    915 
    916     m_device->CreateRenderTargetView(backbuffer.Get(), &rtv_desc, rtv);
    917     m_swap_chain_buffers.emplace_back(std::move(backbuffer), rtv);
    918   }
    919 
    920   m_window_info.surface_width = swap_chain_desc.BufferDesc.Width;
    921   m_window_info.surface_height = swap_chain_desc.BufferDesc.Height;
    922   m_window_info.surface_format = s_swap_chain_format;
    923   VERBOSE_LOG("Swap chain buffer size: {}x{}", m_window_info.surface_width, m_window_info.surface_height);
    924 
    925   if (m_window_info.type == WindowInfo::Type::Win32)
    926   {
    927     BOOL fullscreen = FALSE;
    928     DXGI_SWAP_CHAIN_DESC desc;
    929     if (SUCCEEDED(m_swap_chain->GetFullscreenState(&fullscreen, nullptr)) && fullscreen &&
    930         SUCCEEDED(m_swap_chain->GetDesc(&desc)))
    931     {
    932       m_window_info.surface_refresh_rate = static_cast<float>(desc.BufferDesc.RefreshRate.Numerator) /
    933                                            static_cast<float>(desc.BufferDesc.RefreshRate.Denominator);
    934     }
    935   }
    936 
    937   m_current_swap_chain_buffer = 0;
    938   return true;
    939 }
    940 
    941 void D3D12Device::DestroySwapChainRTVs()
    942 {
    943   // Runtime gets cranky if we don't submit the current buffer...
    944   if (InRenderPass())
    945     EndRenderPass();
    946   SubmitCommandList(true);
    947 
    948   for (auto it = m_swap_chain_buffers.rbegin(); it != m_swap_chain_buffers.rend(); ++it)
    949   {
    950     m_rtv_heap_manager.Free(it->second.index);
    951     it->first.Reset();
    952   }
    953   m_swap_chain_buffers.clear();
    954   m_current_swap_chain_buffer = 0;
    955 }
    956 
    957 void D3D12Device::DestroySwapChain()
    958 {
    959   if (!m_swap_chain)
    960     return;
    961 
    962   DestroySwapChainRTVs();
    963 
    964   // switch out of fullscreen before destroying
    965   BOOL is_fullscreen;
    966   if (SUCCEEDED(m_swap_chain->GetFullscreenState(&is_fullscreen, nullptr)) && is_fullscreen)
    967     m_swap_chain->SetFullscreenState(FALSE, nullptr);
    968 
    969   m_swap_chain.Reset();
    970   m_is_exclusive_fullscreen = false;
    971 }
    972 
    973 void D3D12Device::RenderBlankFrame()
    974 {
    975   if (InRenderPass())
    976     EndRenderPass();
    977 
    978   auto& swap_chain_buf = m_swap_chain_buffers[m_current_swap_chain_buffer];
    979   ID3D12GraphicsCommandList4* cmdlist = GetCommandList();
    980   m_current_swap_chain_buffer = ((m_current_swap_chain_buffer + 1) % static_cast<u32>(m_swap_chain_buffers.size()));
    981   D3D12Texture::TransitionSubresourceToState(cmdlist, swap_chain_buf.first.Get(), 0, D3D12_RESOURCE_STATE_COMMON,
    982                                              D3D12_RESOURCE_STATE_RENDER_TARGET);
    983   cmdlist->ClearRenderTargetView(swap_chain_buf.second, GSVector4::cxpr(0.0f, 0.0f, 0.0f, 1.0f).F32, 0, nullptr);
    984   D3D12Texture::TransitionSubresourceToState(cmdlist, swap_chain_buf.first.Get(), 0, D3D12_RESOURCE_STATE_RENDER_TARGET,
    985                                              D3D12_RESOURCE_STATE_PRESENT);
    986   SubmitCommandList(false);
    987   m_swap_chain->Present(0, m_using_allow_tearing ? DXGI_PRESENT_ALLOW_TEARING : 0);
    988 }
    989 
    990 bool D3D12Device::UpdateWindow()
    991 {
    992   WaitForGPUIdle();
    993   DestroySwapChain();
    994 
    995   if (!AcquireWindow(false))
    996     return false;
    997 
    998   if (m_window_info.IsSurfaceless())
    999     return true;
   1000 
   1001   Error error;
   1002   if (!CreateSwapChain(&error))
   1003   {
   1004     ERROR_LOG("Failed to create swap chain on updated window: {}", error.GetDescription());
   1005     return false;
   1006   }
   1007 
   1008   RenderBlankFrame();
   1009   return true;
   1010 }
   1011 
   1012 void D3D12Device::ResizeWindow(s32 new_window_width, s32 new_window_height, float new_window_scale)
   1013 {
   1014   if (!m_swap_chain)
   1015     return;
   1016 
   1017   m_window_info.surface_scale = new_window_scale;
   1018 
   1019   if (m_window_info.surface_width == static_cast<u32>(new_window_width) &&
   1020       m_window_info.surface_height == static_cast<u32>(new_window_height))
   1021   {
   1022     return;
   1023   }
   1024 
   1025   DestroySwapChainRTVs();
   1026 
   1027   HRESULT hr = m_swap_chain->ResizeBuffers(0, 0, 0, DXGI_FORMAT_UNKNOWN,
   1028                                            m_using_allow_tearing ? DXGI_SWAP_CHAIN_FLAG_ALLOW_TEARING : 0);
   1029   if (FAILED(hr))
   1030     ERROR_LOG("ResizeBuffers() failed: 0x{:08X}", static_cast<unsigned>(hr));
   1031 
   1032   Error error;
   1033   if (!CreateSwapChainRTV(&error))
   1034   {
   1035     ERROR_LOG("Failed to recreate swap chain RTV after resize", error.GetDescription());
   1036     Panic("Failed to recreate swap chain RTV after resize");
   1037   }
   1038 }
   1039 
   1040 void D3D12Device::DestroySurface()
   1041 {
   1042   DestroySwapChainRTVs();
   1043   DestroySwapChain();
   1044 }
   1045 
   1046 bool D3D12Device::SupportsTextureFormat(GPUTexture::Format format) const
   1047 {
   1048   constexpr u32 required = D3D12_FORMAT_SUPPORT1_TEXTURE2D | D3D12_FORMAT_SUPPORT1_SHADER_SAMPLE;
   1049 
   1050   const DXGI_FORMAT dfmt = D3DCommon::GetFormatMapping(format).resource_format;
   1051   if (dfmt == DXGI_FORMAT_UNKNOWN)
   1052     return false;
   1053 
   1054   D3D12_FEATURE_DATA_FORMAT_SUPPORT support = {dfmt, {}, {}};
   1055   return SUCCEEDED(m_device->CheckFeatureSupport(D3D12_FEATURE_FORMAT_SUPPORT, &support, sizeof(support))) &&
   1056          (support.Support1 & required) == required;
   1057 }
   1058 
   1059 std::string D3D12Device::GetDriverInfo() const
   1060 {
   1061   std::string ret = fmt::format("{} ({})\n", D3DCommon::GetFeatureLevelString(m_feature_level),
   1062                                 D3DCommon::GetFeatureLevelShaderModelString(m_feature_level));
   1063 
   1064   DXGI_ADAPTER_DESC desc;
   1065   if (m_adapter && SUCCEEDED(m_adapter->GetDesc(&desc)))
   1066   {
   1067     fmt::format_to(std::back_inserter(ret), "VID: 0x{:04X} PID: 0x{:04X}\n", desc.VendorId, desc.DeviceId);
   1068     ret += StringUtil::WideStringToUTF8String(desc.Description);
   1069     ret += "\n";
   1070 
   1071     const std::string driver_version(D3DCommon::GetDriverVersionFromLUID(desc.AdapterLuid));
   1072     if (!driver_version.empty())
   1073     {
   1074       ret += "Driver Version: ";
   1075       ret += driver_version;
   1076     }
   1077   }
   1078 
   1079   return ret;
   1080 }
   1081 
   1082 void D3D12Device::SetVSyncMode(GPUVSyncMode mode, bool allow_present_throttle)
   1083 {
   1084   m_allow_present_throttle = allow_present_throttle;
   1085 
   1086   // Using mailbox-style no-allow-tearing causes tearing in exclusive fullscreen.
   1087   if (mode == GPUVSyncMode::Mailbox && m_is_exclusive_fullscreen)
   1088   {
   1089     WARNING_LOG("Using FIFO instead of Mailbox vsync due to exclusive fullscreen.");
   1090     mode = GPUVSyncMode::FIFO;
   1091   }
   1092 
   1093   if (m_vsync_mode == mode)
   1094     return;
   1095 
   1096   const u32 old_buffer_count = GetSwapChainBufferCount();
   1097   m_vsync_mode = mode;
   1098   if (!m_swap_chain)
   1099     return;
   1100 
   1101   if (GetSwapChainBufferCount() != old_buffer_count)
   1102   {
   1103     DestroySwapChain();
   1104 
   1105     Error error;
   1106     if (!CreateSwapChain(&error))
   1107     {
   1108       ERROR_LOG("Failed to recreate swap chain after vsync change: {}", error.GetDescription());
   1109       Panic("Failed to recreate swap chain after vsync change.");
   1110     }
   1111   }
   1112 }
   1113 
   1114 bool D3D12Device::BeginPresent(bool frame_skip, u32 clear_color)
   1115 {
   1116   if (InRenderPass())
   1117     EndRenderPass();
   1118 
   1119   if (frame_skip)
   1120     return false;
   1121 
   1122   // If we're running surfaceless, kick the command buffer so we don't run out of descriptors.
   1123   if (!m_swap_chain)
   1124   {
   1125     SubmitCommandList(false);
   1126     TrimTexturePool();
   1127     return false;
   1128   }
   1129 
   1130   // TODO: Check if the device was lost.
   1131 
   1132   // Check if we lost exclusive fullscreen. If so, notify the host, so it can switch to windowed mode.
   1133   // This might get called repeatedly if it takes a while to switch back, that's the host's problem.
   1134   BOOL is_fullscreen;
   1135   if (m_is_exclusive_fullscreen &&
   1136       (FAILED(m_swap_chain->GetFullscreenState(&is_fullscreen, nullptr)) || !is_fullscreen))
   1137   {
   1138     Host::RunOnCPUThread([]() { Host::SetFullscreen(false); });
   1139     TrimTexturePool();
   1140     return false;
   1141   }
   1142 
   1143   BeginSwapChainRenderPass(clear_color);
   1144   return true;
   1145 }
   1146 
   1147 void D3D12Device::EndPresent(bool explicit_present)
   1148 {
   1149   DebugAssert(InRenderPass() && m_num_current_render_targets == 0 && !m_current_depth_target);
   1150   EndRenderPass();
   1151 
   1152   const auto& swap_chain_buf = m_swap_chain_buffers[m_current_swap_chain_buffer];
   1153   m_current_swap_chain_buffer = ((m_current_swap_chain_buffer + 1) % static_cast<u32>(m_swap_chain_buffers.size()));
   1154 
   1155   ID3D12GraphicsCommandList* cmdlist = GetCommandList();
   1156   D3D12Texture::TransitionSubresourceToState(cmdlist, swap_chain_buf.first.Get(), 0, D3D12_RESOURCE_STATE_RENDER_TARGET,
   1157                                              D3D12_RESOURCE_STATE_PRESENT);
   1158 
   1159   SubmitCommandList(false);
   1160   TrimTexturePool();
   1161 
   1162   if (!explicit_present)
   1163     SubmitPresent();
   1164 }
   1165 
   1166 void D3D12Device::SubmitPresent()
   1167 {
   1168   DebugAssert(m_swap_chain);
   1169 
   1170   const UINT sync_interval = static_cast<UINT>(m_vsync_mode == GPUVSyncMode::FIFO);
   1171   const UINT flags = (m_vsync_mode == GPUVSyncMode::Disabled && m_using_allow_tearing) ? DXGI_PRESENT_ALLOW_TEARING : 0;
   1172   m_swap_chain->Present(sync_interval, flags);
   1173 }
   1174 
   1175 #ifdef _DEBUG
   1176 static UINT64 Palette(float phase, const std::array<float, 3>& a, const std::array<float, 3>& b,
   1177                       const std::array<float, 3>& c, const std::array<float, 3>& d)
   1178 {
   1179   std::array<float, 3> result;
   1180   result[0] = a[0] + b[0] * std::cos(6.28318f * (c[0] * phase + d[0]));
   1181   result[1] = a[1] + b[1] * std::cos(6.28318f * (c[1] * phase + d[1]));
   1182   result[2] = a[2] + b[2] * std::cos(6.28318f * (c[2] * phase + d[2]));
   1183 
   1184   return PIX_COLOR(static_cast<BYTE>(std::clamp(result[0] * 255.0f, 0.0f, 255.0f)),
   1185                    static_cast<BYTE>(std::clamp(result[1] * 255.0f, 0.0f, 255.0f)),
   1186                    static_cast<BYTE>(std::clamp(result[2] * 255.0f, 0.0f, 255.0f)));
   1187 }
   1188 #endif
   1189 
   1190 void D3D12Device::PushDebugGroup(const char* name)
   1191 {
   1192 #ifdef _DEBUG
   1193   if (!m_debug_device)
   1194     return;
   1195 
   1196   const UINT64 color = Palette(static_cast<float>(++s_debug_scope_depth), {0.5f, 0.5f, 0.5f}, {0.5f, 0.5f, 0.5f},
   1197                                {1.0f, 1.0f, 0.5f}, {0.8f, 0.90f, 0.30f});
   1198   PIXBeginEvent(GetCommandList(), color, "%s", name);
   1199 #endif
   1200 }
   1201 
   1202 void D3D12Device::PopDebugGroup()
   1203 {
   1204 #ifdef _DEBUG
   1205   if (!m_debug_device)
   1206     return;
   1207 
   1208   s_debug_scope_depth = (s_debug_scope_depth == 0) ? 0 : (s_debug_scope_depth - 1u);
   1209   PIXEndEvent(GetCommandList());
   1210 #endif
   1211 }
   1212 
   1213 void D3D12Device::InsertDebugMessage(const char* msg)
   1214 {
   1215 #ifdef _DEBUG
   1216   if (!m_debug_device)
   1217     return;
   1218 
   1219   PIXSetMarker(GetCommandList(), PIX_COLOR(0, 0, 0), "%s", msg);
   1220 #endif
   1221 }
   1222 
   1223 void D3D12Device::SetFeatures(FeatureMask disabled_features)
   1224 {
   1225   m_max_texture_size = D3D12_REQ_TEXTURE2D_U_OR_V_DIMENSION;
   1226   m_max_multisamples = 1;
   1227   for (u32 multisamples = 2; multisamples < D3D12_MAX_MULTISAMPLE_SAMPLE_COUNT; multisamples++)
   1228   {
   1229     D3D12_FEATURE_DATA_MULTISAMPLE_QUALITY_LEVELS fd = {DXGI_FORMAT_R8G8B8A8_UNORM, static_cast<UINT>(multisamples),
   1230                                                         D3D12_MULTISAMPLE_QUALITY_LEVELS_FLAG_NONE, 0u};
   1231 
   1232     if (SUCCEEDED(m_device->CheckFeatureSupport(D3D12_FEATURE_MULTISAMPLE_QUALITY_LEVELS, &fd, sizeof(fd))) &&
   1233         fd.NumQualityLevels > 0)
   1234     {
   1235       m_max_multisamples = multisamples;
   1236     }
   1237   }
   1238 
   1239   m_features.dual_source_blend = !(disabled_features & FEATURE_MASK_DUAL_SOURCE_BLEND);
   1240   m_features.framebuffer_fetch = false;
   1241   m_features.per_sample_shading = true;
   1242   m_features.noperspective_interpolation = true;
   1243   m_features.texture_copy_to_self =
   1244     /*!(disabled_features & FEATURE_MASK_TEXTURE_COPY_TO_SELF)*/ false; // TODO: Support with Enhanced Barriers
   1245   m_features.supports_texture_buffers = !(disabled_features & FEATURE_MASK_TEXTURE_BUFFERS);
   1246   m_features.texture_buffers_emulated_with_ssbo = false;
   1247   m_features.feedback_loops = false;
   1248   m_features.geometry_shaders = !(disabled_features & FEATURE_MASK_GEOMETRY_SHADERS);
   1249   m_features.partial_msaa_resolve = true;
   1250   m_features.memory_import = false;
   1251   m_features.explicit_present = true;
   1252   m_features.gpu_timing = true;
   1253   m_features.shader_cache = true;
   1254   m_features.pipeline_cache = true;
   1255   m_features.prefer_unused_textures = true;
   1256 
   1257   BOOL allow_tearing_supported = false;
   1258   HRESULT hr = m_dxgi_factory->CheckFeatureSupport(DXGI_FEATURE_PRESENT_ALLOW_TEARING, &allow_tearing_supported,
   1259                                                    sizeof(allow_tearing_supported));
   1260   m_allow_tearing_supported = (SUCCEEDED(hr) && allow_tearing_supported == TRUE);
   1261 
   1262   m_features.raster_order_views = false;
   1263   if (!(disabled_features & FEATURE_MASK_RASTER_ORDER_VIEWS))
   1264   {
   1265     D3D12_FEATURE_DATA_D3D12_OPTIONS options = {};
   1266     m_features.raster_order_views =
   1267       SUCCEEDED(m_device->CheckFeatureSupport(D3D12_FEATURE_D3D12_OPTIONS, &options, sizeof(options))) &&
   1268       options.ROVsSupported;
   1269   }
   1270 }
   1271 
   1272 void D3D12Device::CopyTextureRegion(GPUTexture* dst, u32 dst_x, u32 dst_y, u32 dst_layer, u32 dst_level,
   1273                                     GPUTexture* src, u32 src_x, u32 src_y, u32 src_layer, u32 src_level, u32 width,
   1274                                     u32 height)
   1275 {
   1276   D3D12Texture* const S = static_cast<D3D12Texture*>(src);
   1277   D3D12Texture* const D = static_cast<D3D12Texture*>(dst);
   1278 
   1279   if (S->GetState() == GPUTexture::State::Cleared)
   1280   {
   1281     // source is cleared. if destination is a render target, we can carry the clear forward
   1282     if (D->IsRenderTargetOrDepthStencil())
   1283     {
   1284       if (dst_level == 0 && dst_x == 0 && dst_y == 0 && width == D->GetWidth() && height == D->GetHeight())
   1285       {
   1286         // pass it forward if we're clearing the whole thing
   1287         if (S->IsDepthStencil())
   1288           D->SetClearDepth(S->GetClearDepth());
   1289         else
   1290           D->SetClearColor(S->GetClearColor());
   1291 
   1292         return;
   1293       }
   1294 
   1295       if (D->GetState() == GPUTexture::State::Cleared)
   1296       {
   1297         // destination is cleared, if it's the same colour and rect, we can just avoid this entirely
   1298         if (D->IsDepthStencil())
   1299         {
   1300           if (D->GetClearDepth() == S->GetClearDepth())
   1301             return;
   1302         }
   1303         else
   1304         {
   1305           if (D->GetClearColor() == S->GetClearColor())
   1306             return;
   1307         }
   1308       }
   1309     }
   1310 
   1311     // commit the clear to the source first, then do normal copy
   1312     S->CommitClear();
   1313   }
   1314 
   1315   // if the destination has been cleared, and we're not overwriting the whole thing, commit the clear first
   1316   // (the area outside of where we're copying to)
   1317   if (D->GetState() == GPUTexture::State::Cleared &&
   1318       (dst_level != 0 || dst_x != 0 || dst_y != 0 || width != D->GetWidth() || height != D->GetHeight()))
   1319   {
   1320     D->CommitClear();
   1321   }
   1322 
   1323   s_stats.num_copies++;
   1324 
   1325   // *now* we can do a normal image copy.
   1326   if (InRenderPass())
   1327     EndRenderPass();
   1328 
   1329   S->TransitionToState(D3D12_RESOURCE_STATE_COPY_SOURCE);
   1330   S->SetUseFenceValue(GetCurrentFenceValue());
   1331 
   1332   D->TransitionToState(D3D12_RESOURCE_STATE_COPY_DEST);
   1333   D->SetUseFenceValue(GetCurrentFenceValue());
   1334 
   1335   D3D12_TEXTURE_COPY_LOCATION srcloc;
   1336   srcloc.pResource = S->GetResource();
   1337   srcloc.Type = D3D12_TEXTURE_COPY_TYPE_SUBRESOURCE_INDEX;
   1338   srcloc.SubresourceIndex = S->CalculateSubresource(src_layer, src_level);
   1339 
   1340   D3D12_TEXTURE_COPY_LOCATION dstloc;
   1341   dstloc.pResource = D->GetResource();
   1342   dstloc.Type = D3D12_TEXTURE_COPY_TYPE_SUBRESOURCE_INDEX;
   1343   dstloc.SubresourceIndex = D->CalculateSubresource(dst_layer, dst_level);
   1344 
   1345   const D3D12_BOX srcbox{static_cast<UINT>(src_x),         static_cast<UINT>(src_y),          0u,
   1346                          static_cast<UINT>(src_x + width), static_cast<UINT>(src_y + height), 1u};
   1347   GetCommandList()->CopyTextureRegion(&dstloc, dst_x, dst_y, 0, &srcloc, &srcbox);
   1348 
   1349   D->SetState(GPUTexture::State::Dirty);
   1350 }
   1351 
   1352 void D3D12Device::ResolveTextureRegion(GPUTexture* dst, u32 dst_x, u32 dst_y, u32 dst_layer, u32 dst_level,
   1353                                        GPUTexture* src, u32 src_x, u32 src_y, u32 width, u32 height)
   1354 {
   1355   DebugAssert((src_x + width) <= src->GetWidth());
   1356   DebugAssert((src_y + height) <= src->GetHeight());
   1357   DebugAssert(src->IsMultisampled());
   1358   DebugAssert(dst_level < dst->GetLevels() && dst_layer < dst->GetLayers());
   1359   DebugAssert((dst_x + width) <= dst->GetMipWidth(dst_level));
   1360   DebugAssert((dst_y + height) <= dst->GetMipHeight(dst_level));
   1361   DebugAssert(!dst->IsMultisampled() && src->IsMultisampled());
   1362 
   1363   if (InRenderPass())
   1364     EndRenderPass();
   1365 
   1366   s_stats.num_copies++;
   1367 
   1368   D3D12Texture* D = static_cast<D3D12Texture*>(dst);
   1369   D3D12Texture* S = static_cast<D3D12Texture*>(src);
   1370   ID3D12GraphicsCommandList4* cmdlist = GetCommandList();
   1371   const u32 DSR = D->CalculateSubresource(dst_layer, dst_level);
   1372 
   1373   S->CommitClear(cmdlist);
   1374   D->CommitClear(cmdlist);
   1375 
   1376   S->TransitionSubresourceToState(cmdlist, 0, S->GetResourceState(), D3D12_RESOURCE_STATE_RESOLVE_SOURCE);
   1377   D->TransitionSubresourceToState(cmdlist, DSR, D->GetResourceState(), D3D12_RESOURCE_STATE_RESOLVE_DEST);
   1378 
   1379   if (src_x == 0 && src_y == 0 && width == src->GetWidth() && height == src->GetHeight() && dst_x == 0 && dst_y == 0 &&
   1380       width == dst->GetMipWidth(dst_level) && height == dst->GetMipHeight(dst_level))
   1381   {
   1382     cmdlist->ResolveSubresource(D->GetResource(), DSR, S->GetResource(), 0, S->GetDXGIFormat());
   1383   }
   1384   else
   1385   {
   1386     D3D12_RECT src_rc{static_cast<LONG>(src_x), static_cast<LONG>(src_y), static_cast<LONG>(src_x + width),
   1387                       static_cast<LONG>(src_y + height)};
   1388     cmdlist->ResolveSubresourceRegion(D->GetResource(), D->CalculateSubresource(dst_level, dst_layer), dst_x, dst_y,
   1389                                       S->GetResource(), 0, &src_rc, D->GetDXGIFormat(), D3D12_RESOLVE_MODE_AVERAGE);
   1390   }
   1391 
   1392   S->TransitionSubresourceToState(cmdlist, 0, D3D12_RESOURCE_STATE_RESOLVE_SOURCE, S->GetResourceState());
   1393   D->TransitionSubresourceToState(cmdlist, DSR, D3D12_RESOURCE_STATE_RESOLVE_DEST, D->GetResourceState());
   1394 }
   1395 
   1396 void D3D12Device::ClearRenderTarget(GPUTexture* t, u32 c)
   1397 {
   1398   GPUDevice::ClearRenderTarget(t, c);
   1399   if (InRenderPass() && IsRenderTargetBound(t))
   1400     EndRenderPass();
   1401 }
   1402 
   1403 void D3D12Device::ClearDepth(GPUTexture* t, float d)
   1404 {
   1405   GPUDevice::ClearDepth(t, d);
   1406   if (InRenderPass() && m_current_depth_target == t)
   1407     EndRenderPass();
   1408 }
   1409 
   1410 void D3D12Device::InvalidateRenderTarget(GPUTexture* t)
   1411 {
   1412   GPUDevice::InvalidateRenderTarget(t);
   1413   if (InRenderPass() && (t->IsDepthStencil() ? (m_current_depth_target == t) : IsRenderTargetBound(t)))
   1414     EndRenderPass();
   1415 }
   1416 
   1417 bool D3D12Device::CreateBuffers(Error* error)
   1418 {
   1419   if (!m_vertex_buffer.Create(VERTEX_BUFFER_SIZE, error))
   1420   {
   1421     ERROR_LOG("Failed to allocate vertex buffer");
   1422     return false;
   1423   }
   1424 
   1425   if (!m_index_buffer.Create(INDEX_BUFFER_SIZE, error))
   1426   {
   1427     ERROR_LOG("Failed to allocate index buffer");
   1428     return false;
   1429   }
   1430 
   1431   if (!m_uniform_buffer.Create(VERTEX_UNIFORM_BUFFER_SIZE, error))
   1432   {
   1433     ERROR_LOG("Failed to allocate uniform buffer");
   1434     return false;
   1435   }
   1436 
   1437   if (!m_texture_upload_buffer.Create(TEXTURE_BUFFER_SIZE, error))
   1438   {
   1439     ERROR_LOG("Failed to allocate texture upload buffer");
   1440     return false;
   1441   }
   1442 
   1443   return true;
   1444 }
   1445 
   1446 void D3D12Device::DestroyBuffers()
   1447 {
   1448   m_texture_upload_buffer.Destroy(false);
   1449   m_uniform_buffer.Destroy(false);
   1450   m_index_buffer.Destroy(false);
   1451   m_vertex_buffer.Destroy(false);
   1452 }
   1453 
   1454 void D3D12Device::MapVertexBuffer(u32 vertex_size, u32 vertex_count, void** map_ptr, u32* map_space,
   1455                                   u32* map_base_vertex)
   1456 {
   1457   const u32 req_size = vertex_size * vertex_count;
   1458   if (!m_vertex_buffer.ReserveMemory(req_size, vertex_size))
   1459   {
   1460     SubmitCommandListAndRestartRenderPass("out of vertex space");
   1461     if (!m_vertex_buffer.ReserveMemory(req_size, vertex_size))
   1462       Panic("Failed to allocate vertex space");
   1463   }
   1464 
   1465   *map_ptr = m_vertex_buffer.GetCurrentHostPointer();
   1466   *map_space = m_vertex_buffer.GetCurrentSpace() / vertex_size;
   1467   *map_base_vertex = m_vertex_buffer.GetCurrentOffset() / vertex_size;
   1468 }
   1469 
   1470 void D3D12Device::UnmapVertexBuffer(u32 vertex_size, u32 vertex_count)
   1471 {
   1472   const u32 upload_size = vertex_size * vertex_count;
   1473   s_stats.buffer_streamed += upload_size;
   1474   m_vertex_buffer.CommitMemory(upload_size);
   1475 }
   1476 
   1477 void D3D12Device::MapIndexBuffer(u32 index_count, DrawIndex** map_ptr, u32* map_space, u32* map_base_index)
   1478 {
   1479   const u32 req_size = sizeof(DrawIndex) * index_count;
   1480   if (!m_index_buffer.ReserveMemory(req_size, sizeof(DrawIndex)))
   1481   {
   1482     SubmitCommandListAndRestartRenderPass("out of index space");
   1483     if (!m_index_buffer.ReserveMemory(req_size, sizeof(DrawIndex)))
   1484       Panic("Failed to allocate index space");
   1485   }
   1486 
   1487   *map_ptr = reinterpret_cast<DrawIndex*>(m_index_buffer.GetCurrentHostPointer());
   1488   *map_space = m_index_buffer.GetCurrentSpace() / sizeof(DrawIndex);
   1489   *map_base_index = m_index_buffer.GetCurrentOffset() / sizeof(DrawIndex);
   1490 }
   1491 
   1492 void D3D12Device::UnmapIndexBuffer(u32 used_index_count)
   1493 {
   1494   const u32 upload_size = sizeof(DrawIndex) * used_index_count;
   1495   s_stats.buffer_streamed += upload_size;
   1496   m_index_buffer.CommitMemory(upload_size);
   1497 }
   1498 
   1499 void D3D12Device::PushUniformBuffer(const void* data, u32 data_size)
   1500 {
   1501   static constexpr std::array<u8, static_cast<u8>(GPUPipeline::Layout::MaxCount)> push_parameters = {
   1502     0, // SingleTextureAndUBO
   1503     2, // SingleTextureAndPushConstants
   1504     1, // SingleTextureBufferAndPushConstants
   1505     0, // MultiTextureAndUBO
   1506     2, // MultiTextureAndPushConstants
   1507   };
   1508 
   1509   DebugAssert(data_size < UNIFORM_PUSH_CONSTANTS_SIZE);
   1510   if (m_dirty_flags & DIRTY_FLAG_PIPELINE_LAYOUT)
   1511   {
   1512     m_dirty_flags &= ~DIRTY_FLAG_PIPELINE_LAYOUT;
   1513     UpdateRootSignature();
   1514   }
   1515 
   1516   s_stats.buffer_streamed += data_size;
   1517 
   1518   const u32 push_param =
   1519     push_parameters[static_cast<u8>(m_current_pipeline_layout)] + BoolToUInt8(IsUsingROVRootSignature());
   1520   GetCommandList()->SetGraphicsRoot32BitConstants(push_param, data_size / 4u, data, 0);
   1521 }
   1522 
   1523 void* D3D12Device::MapUniformBuffer(u32 size)
   1524 {
   1525   const u32 used_space = Common::AlignUpPow2(size, D3D12_CONSTANT_BUFFER_DATA_PLACEMENT_ALIGNMENT);
   1526   if (!m_uniform_buffer.ReserveMemory(used_space + MAX_UNIFORM_BUFFER_SIZE,
   1527                                       D3D12_CONSTANT_BUFFER_DATA_PLACEMENT_ALIGNMENT))
   1528   {
   1529     SubmitCommandListAndRestartRenderPass("out of uniform space");
   1530     if (!m_uniform_buffer.ReserveMemory(used_space + MAX_UNIFORM_BUFFER_SIZE,
   1531                                         D3D12_CONSTANT_BUFFER_DATA_PLACEMENT_ALIGNMENT))
   1532       Panic("Failed to allocate uniform space.");
   1533   }
   1534 
   1535   return m_uniform_buffer.GetCurrentHostPointer();
   1536 }
   1537 
   1538 void D3D12Device::UnmapUniformBuffer(u32 size)
   1539 {
   1540   s_stats.buffer_streamed += size;
   1541   m_uniform_buffer_position = m_uniform_buffer.GetCurrentOffset();
   1542   m_uniform_buffer.CommitMemory(size);
   1543   m_dirty_flags |= DIRTY_FLAG_CONSTANT_BUFFER;
   1544 }
   1545 
   1546 bool D3D12Device::CreateRootSignatures(Error* error)
   1547 {
   1548   D3D12::RootSignatureBuilder rsb;
   1549 
   1550   for (u32 rov = 0; rov < 2; rov++)
   1551   {
   1552     if (rov && !m_features.raster_order_views)
   1553       break;
   1554 
   1555     {
   1556       auto& rs = m_root_signatures[rov][static_cast<u8>(GPUPipeline::Layout::SingleTextureAndUBO)];
   1557 
   1558       rsb.SetInputAssemblerFlag();
   1559       rsb.AddDescriptorTable(D3D12_DESCRIPTOR_RANGE_TYPE_SRV, 0, 1, D3D12_SHADER_VISIBILITY_PIXEL);
   1560       rsb.AddDescriptorTable(D3D12_DESCRIPTOR_RANGE_TYPE_SAMPLER, 0, 1, D3D12_SHADER_VISIBILITY_PIXEL);
   1561       rsb.AddCBVParameter(0, D3D12_SHADER_VISIBILITY_ALL);
   1562       if (rov)
   1563       {
   1564         rsb.AddDescriptorTable(D3D12_DESCRIPTOR_RANGE_TYPE_UAV, 0, MAX_IMAGE_RENDER_TARGETS,
   1565                                D3D12_SHADER_VISIBILITY_PIXEL);
   1566       }
   1567       if (!(rs = rsb.Create(error, true)))
   1568         return false;
   1569       D3D12::SetObjectName(rs.Get(), "Single Texture + UBO Pipeline Layout");
   1570     }
   1571 
   1572     {
   1573       auto& rs = m_root_signatures[rov][static_cast<u8>(GPUPipeline::Layout::SingleTextureAndPushConstants)];
   1574 
   1575       rsb.SetInputAssemblerFlag();
   1576       rsb.AddDescriptorTable(D3D12_DESCRIPTOR_RANGE_TYPE_SRV, 0, 1, D3D12_SHADER_VISIBILITY_PIXEL);
   1577       rsb.AddDescriptorTable(D3D12_DESCRIPTOR_RANGE_TYPE_SAMPLER, 0, 1, D3D12_SHADER_VISIBILITY_PIXEL);
   1578       if (rov)
   1579       {
   1580         rsb.AddDescriptorTable(D3D12_DESCRIPTOR_RANGE_TYPE_UAV, 0, MAX_IMAGE_RENDER_TARGETS,
   1581                                D3D12_SHADER_VISIBILITY_PIXEL);
   1582       }
   1583       rsb.Add32BitConstants(0, UNIFORM_PUSH_CONSTANTS_SIZE / sizeof(u32), D3D12_SHADER_VISIBILITY_ALL);
   1584       if (!(rs = rsb.Create(error, true)))
   1585         return false;
   1586       D3D12::SetObjectName(rs.Get(), "Single Texture Pipeline Layout");
   1587     }
   1588 
   1589     {
   1590       auto& rs = m_root_signatures[rov][static_cast<u8>(GPUPipeline::Layout::SingleTextureBufferAndPushConstants)];
   1591 
   1592       rsb.SetInputAssemblerFlag();
   1593       rsb.AddDescriptorTable(D3D12_DESCRIPTOR_RANGE_TYPE_SRV, 0, 1, D3D12_SHADER_VISIBILITY_PIXEL);
   1594       if (rov)
   1595       {
   1596         rsb.AddDescriptorTable(D3D12_DESCRIPTOR_RANGE_TYPE_UAV, 0, MAX_IMAGE_RENDER_TARGETS,
   1597                                D3D12_SHADER_VISIBILITY_PIXEL);
   1598       }
   1599       rsb.Add32BitConstants(0, UNIFORM_PUSH_CONSTANTS_SIZE / sizeof(u32), D3D12_SHADER_VISIBILITY_ALL);
   1600       if (!(rs = rsb.Create(error, true)))
   1601         return false;
   1602       D3D12::SetObjectName(rs.Get(), "Single Texture Buffer + UBO Pipeline Layout");
   1603     }
   1604 
   1605     {
   1606       auto& rs = m_root_signatures[rov][static_cast<u8>(GPUPipeline::Layout::MultiTextureAndUBO)];
   1607 
   1608       rsb.SetInputAssemblerFlag();
   1609       rsb.AddDescriptorTable(D3D12_DESCRIPTOR_RANGE_TYPE_SRV, 0, MAX_TEXTURE_SAMPLERS, D3D12_SHADER_VISIBILITY_PIXEL);
   1610       rsb.AddDescriptorTable(D3D12_DESCRIPTOR_RANGE_TYPE_SAMPLER, 0, MAX_TEXTURE_SAMPLERS,
   1611                              D3D12_SHADER_VISIBILITY_PIXEL);
   1612       rsb.AddCBVParameter(0, D3D12_SHADER_VISIBILITY_ALL);
   1613       if (rov)
   1614       {
   1615         rsb.AddDescriptorTable(D3D12_DESCRIPTOR_RANGE_TYPE_UAV, 0, MAX_IMAGE_RENDER_TARGETS,
   1616                                D3D12_SHADER_VISIBILITY_PIXEL);
   1617       }
   1618       if (!(rs = rsb.Create(error, true)))
   1619         return false;
   1620       D3D12::SetObjectName(rs.Get(), "Multi Texture + UBO Pipeline Layout");
   1621     }
   1622 
   1623     {
   1624       auto& rs = m_root_signatures[rov][static_cast<u8>(GPUPipeline::Layout::MultiTextureAndPushConstants)];
   1625 
   1626       rsb.SetInputAssemblerFlag();
   1627       rsb.AddDescriptorTable(D3D12_DESCRIPTOR_RANGE_TYPE_SRV, 0, MAX_TEXTURE_SAMPLERS, D3D12_SHADER_VISIBILITY_PIXEL);
   1628       rsb.AddDescriptorTable(D3D12_DESCRIPTOR_RANGE_TYPE_SAMPLER, 0, MAX_TEXTURE_SAMPLERS,
   1629                              D3D12_SHADER_VISIBILITY_PIXEL);
   1630       if (rov)
   1631       {
   1632         rsb.AddDescriptorTable(D3D12_DESCRIPTOR_RANGE_TYPE_UAV, 0, MAX_IMAGE_RENDER_TARGETS,
   1633                                D3D12_SHADER_VISIBILITY_PIXEL);
   1634       }
   1635       rsb.Add32BitConstants(0, UNIFORM_PUSH_CONSTANTS_SIZE / sizeof(u32), D3D12_SHADER_VISIBILITY_ALL);
   1636       if (!(rs = rsb.Create(error, true)))
   1637         return false;
   1638       D3D12::SetObjectName(rs.Get(), "Multi Texture Pipeline Layout");
   1639     }
   1640   }
   1641 
   1642   return true;
   1643 }
   1644 
   1645 void D3D12Device::DestroyRootSignatures()
   1646 {
   1647   m_root_signatures.enumerate([](auto& it) { it.Reset(); });
   1648 }
   1649 
   1650 void D3D12Device::SetRenderTargets(GPUTexture* const* rts, u32 num_rts, GPUTexture* ds,
   1651                                    GPUPipeline::RenderPassFlag flags)
   1652 {
   1653   DebugAssert(
   1654     !(flags & (GPUPipeline::RenderPassFlag::ColorFeedbackLoop | GPUPipeline::RenderPassFlag::SampleDepthBuffer)));
   1655 
   1656   const bool image_bind_changed = ((m_current_render_pass_flags ^ flags) & GPUPipeline::BindRenderTargetsAsImages);
   1657   bool changed =
   1658     (m_num_current_render_targets != num_rts || m_current_depth_target != ds || m_current_render_pass_flags != flags);
   1659   bool needs_ds_clear = (ds && ds->IsClearedOrInvalidated());
   1660   bool needs_rt_clear = false;
   1661 
   1662   if (InRenderPass())
   1663     EndRenderPass();
   1664 
   1665   m_current_depth_target = static_cast<D3D12Texture*>(ds);
   1666   for (u32 i = 0; i < num_rts; i++)
   1667   {
   1668     D3D12Texture* const RT = static_cast<D3D12Texture*>(rts[i]);
   1669     changed |= m_current_render_targets[i] != RT;
   1670     m_current_render_targets[i] = RT;
   1671     needs_rt_clear |= RT->IsClearedOrInvalidated();
   1672   }
   1673   for (u32 i = num_rts; i < m_num_current_render_targets; i++)
   1674     m_current_render_targets[i] = nullptr;
   1675   m_num_current_render_targets = Truncate8(num_rts);
   1676   m_current_render_pass_flags = flags;
   1677 
   1678   // Don't end render pass unless it's necessary.
   1679   if (changed)
   1680   {
   1681     if (InRenderPass())
   1682       EndRenderPass();
   1683 
   1684     // Need a root signature change if switching to UAVs.
   1685     m_dirty_flags |= image_bind_changed ? LAYOUT_DEPENDENT_DIRTY_STATE : 0;
   1686     m_dirty_flags = (flags & GPUPipeline::BindRenderTargetsAsImages) ? (m_dirty_flags | DIRTY_FLAG_RT_UAVS) :
   1687                                                                        (m_dirty_flags & ~DIRTY_FLAG_RT_UAVS);
   1688   }
   1689   else if (needs_rt_clear || needs_ds_clear)
   1690   {
   1691     if (InRenderPass())
   1692       EndRenderPass();
   1693   }
   1694 }
   1695 
   1696 void D3D12Device::BeginRenderPass()
   1697 {
   1698   DebugAssert(!InRenderPass());
   1699 
   1700   std::array<D3D12_RENDER_PASS_RENDER_TARGET_DESC, MAX_RENDER_TARGETS> rt_desc;
   1701   D3D12_RENDER_PASS_DEPTH_STENCIL_DESC ds_desc;
   1702 
   1703   D3D12_RENDER_PASS_RENDER_TARGET_DESC* rt_desc_p = nullptr;
   1704   D3D12_RENDER_PASS_DEPTH_STENCIL_DESC* ds_desc_p = nullptr;
   1705   u32 num_rt_descs = 0;
   1706 
   1707   ID3D12GraphicsCommandList4* cmdlist = GetCommandList();
   1708 
   1709   if (m_num_current_render_targets > 0 || m_current_depth_target) [[likely]]
   1710   {
   1711     if (!IsUsingROVRootSignature()) [[likely]]
   1712     {
   1713       for (u32 i = 0; i < m_num_current_render_targets; i++)
   1714       {
   1715         D3D12Texture* const rt = m_current_render_targets[i];
   1716         rt->TransitionToState(cmdlist, D3D12_RESOURCE_STATE_RENDER_TARGET);
   1717         rt->SetUseFenceValue(GetCurrentFenceValue());
   1718 
   1719         D3D12_RENDER_PASS_RENDER_TARGET_DESC& desc = rt_desc[i];
   1720         desc.cpuDescriptor = rt->GetWriteDescriptor();
   1721         desc.EndingAccess.Type = D3D12_RENDER_PASS_ENDING_ACCESS_TYPE_PRESERVE;
   1722 
   1723         switch (rt->GetState())
   1724         {
   1725           case GPUTexture::State::Cleared:
   1726           {
   1727             desc.BeginningAccess.Type = D3D12_RENDER_PASS_BEGINNING_ACCESS_TYPE_CLEAR;
   1728             std::memcpy(desc.BeginningAccess.Clear.ClearValue.Color, rt->GetUNormClearColor().data(),
   1729                         sizeof(desc.BeginningAccess.Clear.ClearValue.Color));
   1730             rt->SetState(GPUTexture::State::Dirty);
   1731           }
   1732           break;
   1733 
   1734           case GPUTexture::State::Invalidated:
   1735           {
   1736             desc.BeginningAccess.Type = D3D12_RENDER_PASS_BEGINNING_ACCESS_TYPE_DISCARD;
   1737             rt->SetState(GPUTexture::State::Dirty);
   1738           }
   1739           break;
   1740 
   1741           case GPUTexture::State::Dirty:
   1742           {
   1743             desc.BeginningAccess.Type = D3D12_RENDER_PASS_BEGINNING_ACCESS_TYPE_PRESERVE;
   1744           }
   1745           break;
   1746 
   1747           default:
   1748             UnreachableCode();
   1749             break;
   1750         }
   1751       }
   1752 
   1753       rt_desc_p = (m_num_current_render_targets > 0) ? rt_desc.data() : nullptr;
   1754       num_rt_descs = m_num_current_render_targets;
   1755     }
   1756     else
   1757     {
   1758       // Still need to clear the RTs.
   1759       for (u32 i = 0; i < m_num_current_render_targets; i++)
   1760       {
   1761         D3D12Texture* const rt = m_current_render_targets[i];
   1762         rt->TransitionToState(cmdlist, D3D12_RESOURCE_STATE_UNORDERED_ACCESS);
   1763         rt->SetUseFenceValue(GetCurrentFenceValue());
   1764         rt->CommitClear(cmdlist);
   1765       }
   1766     }
   1767     if (m_current_depth_target)
   1768     {
   1769       D3D12Texture* const ds = m_current_depth_target;
   1770       ds->TransitionToState(cmdlist, D3D12_RESOURCE_STATE_DEPTH_WRITE);
   1771       ds->SetUseFenceValue(GetCurrentFenceValue());
   1772       ds_desc_p = &ds_desc;
   1773       ds_desc.cpuDescriptor = ds->GetWriteDescriptor();
   1774       ds_desc.DepthEndingAccess.Type = D3D12_RENDER_PASS_ENDING_ACCESS_TYPE_PRESERVE;
   1775       ds_desc.StencilBeginningAccess = {};
   1776       ds_desc.StencilEndingAccess = {};
   1777 
   1778       switch (ds->GetState())
   1779       {
   1780         case GPUTexture::State::Cleared:
   1781         {
   1782           ds_desc.DepthBeginningAccess.Type = D3D12_RENDER_PASS_BEGINNING_ACCESS_TYPE_CLEAR;
   1783           ds_desc.DepthBeginningAccess.Clear.ClearValue.DepthStencil.Depth = ds->GetClearDepth();
   1784           ds->SetState(GPUTexture::State::Dirty);
   1785         }
   1786         break;
   1787 
   1788         case GPUTexture::State::Invalidated:
   1789         {
   1790           ds_desc.DepthBeginningAccess.Type = D3D12_RENDER_PASS_BEGINNING_ACCESS_TYPE_DISCARD;
   1791           ds->SetState(GPUTexture::State::Dirty);
   1792         }
   1793         break;
   1794 
   1795         case GPUTexture::State::Dirty:
   1796         {
   1797           ds_desc.DepthBeginningAccess.Type = D3D12_RENDER_PASS_BEGINNING_ACCESS_TYPE_PRESERVE;
   1798         }
   1799         break;
   1800 
   1801         default:
   1802           UnreachableCode();
   1803           break;
   1804       }
   1805 
   1806       ds_desc_p = &ds_desc;
   1807     }
   1808   }
   1809   else
   1810   {
   1811     // Re-rendering to swap chain.
   1812     const auto& swap_chain_buf = m_swap_chain_buffers[m_current_swap_chain_buffer];
   1813     rt_desc[0] = {swap_chain_buf.second,
   1814                   {D3D12_RENDER_PASS_BEGINNING_ACCESS_TYPE_PRESERVE, {}},
   1815                   {D3D12_RENDER_PASS_ENDING_ACCESS_TYPE_PRESERVE, {}}};
   1816     rt_desc_p = &rt_desc[0];
   1817     num_rt_descs = 1;
   1818   }
   1819 
   1820   // All textures should be in shader read only optimal already, but just in case..
   1821   const u32 num_textures = GetActiveTexturesForLayout(m_current_pipeline_layout);
   1822   for (u32 i = 0; i < num_textures; i++)
   1823   {
   1824     if (m_current_textures[i])
   1825       m_current_textures[i]->TransitionToState(cmdlist, D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE);
   1826   }
   1827 
   1828   DebugAssert(rt_desc_p || ds_desc_p || IsUsingROVRootSignature());
   1829   cmdlist->BeginRenderPass(num_rt_descs, rt_desc_p, ds_desc_p, D3D12_RENDER_PASS_FLAG_NONE);
   1830 
   1831   // TODO: Stats
   1832   m_in_render_pass = true;
   1833   s_stats.num_render_passes++;
   1834 
   1835   // If this is a new command buffer, bind the pipeline and such.
   1836   if (m_dirty_flags & DIRTY_FLAG_INITIAL)
   1837     SetInitialPipelineState();
   1838 }
   1839 
   1840 void D3D12Device::BeginSwapChainRenderPass(u32 clear_color)
   1841 {
   1842   DebugAssert(!InRenderPass());
   1843 
   1844   ID3D12GraphicsCommandList4* const cmdlist = GetCommandList();
   1845   const auto& swap_chain_buf = m_swap_chain_buffers[m_current_swap_chain_buffer];
   1846 
   1847   D3D12Texture::TransitionSubresourceToState(cmdlist, swap_chain_buf.first.Get(), 0, D3D12_RESOURCE_STATE_COMMON,
   1848                                              D3D12_RESOURCE_STATE_RENDER_TARGET);
   1849 
   1850   // All textures should be in shader read only optimal already, but just in case..
   1851   const u32 num_textures = GetActiveTexturesForLayout(m_current_pipeline_layout);
   1852   for (u32 i = 0; i < num_textures; i++)
   1853   {
   1854     if (m_current_textures[i])
   1855       m_current_textures[i]->TransitionToState(cmdlist, D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE);
   1856   }
   1857 
   1858   D3D12_RENDER_PASS_RENDER_TARGET_DESC rt_desc = {swap_chain_buf.second,
   1859                                                   {D3D12_RENDER_PASS_BEGINNING_ACCESS_TYPE_CLEAR, {}},
   1860                                                   {D3D12_RENDER_PASS_ENDING_ACCESS_TYPE_PRESERVE, {}}};
   1861   GSVector4::store<false>(rt_desc.BeginningAccess.Clear.ClearValue.Color, GSVector4::rgba32(clear_color));
   1862   cmdlist->BeginRenderPass(1, &rt_desc, nullptr, D3D12_RENDER_PASS_FLAG_NONE);
   1863 
   1864   std::memset(m_current_render_targets.data(), 0, sizeof(m_current_render_targets));
   1865   m_num_current_render_targets = 0;
   1866   m_dirty_flags =
   1867     (m_dirty_flags & ~DIRTY_FLAG_RT_UAVS) | ((IsUsingROVRootSignature()) ? DIRTY_FLAG_PIPELINE_LAYOUT : 0);
   1868   m_current_render_pass_flags = GPUPipeline::NoRenderPassFlags;
   1869   m_current_depth_target = nullptr;
   1870   m_in_render_pass = true;
   1871   s_stats.num_render_passes++;
   1872 
   1873   // Clear pipeline, it's likely incompatible.
   1874   m_current_pipeline = nullptr;
   1875 }
   1876 
   1877 bool D3D12Device::InRenderPass()
   1878 {
   1879   return m_in_render_pass;
   1880 }
   1881 
   1882 void D3D12Device::EndRenderPass()
   1883 {
   1884   DebugAssert(m_in_render_pass);
   1885 
   1886   // TODO: stats
   1887   m_in_render_pass = false;
   1888 
   1889   GetCommandList()->EndRenderPass();
   1890 }
   1891 
   1892 void D3D12Device::SetPipeline(GPUPipeline* pipeline)
   1893 {
   1894   // First draw? Bind everything.
   1895   if (m_dirty_flags & DIRTY_FLAG_INITIAL)
   1896   {
   1897     m_current_pipeline = static_cast<D3D12Pipeline*>(pipeline);
   1898     if (!m_current_pipeline)
   1899       return;
   1900 
   1901     SetInitialPipelineState();
   1902     return;
   1903   }
   1904   else if (m_current_pipeline == pipeline)
   1905   {
   1906     return;
   1907   }
   1908 
   1909   m_current_pipeline = static_cast<D3D12Pipeline*>(pipeline);
   1910 
   1911   ID3D12GraphicsCommandList4* cmdlist = GetCommandList();
   1912   cmdlist->SetPipelineState(m_current_pipeline->GetPipeline());
   1913 
   1914   if (D3D12_PRIMITIVE_TOPOLOGY topology = m_current_pipeline->GetTopology(); topology != m_current_topology)
   1915   {
   1916     m_current_topology = topology;
   1917     cmdlist->IASetPrimitiveTopology(topology);
   1918   }
   1919 
   1920   if (u32 vertex_stride = m_current_pipeline->GetVertexStride();
   1921       vertex_stride > 0 && m_current_vertex_stride != vertex_stride)
   1922   {
   1923     m_current_vertex_stride = vertex_stride;
   1924     SetVertexBuffer(cmdlist);
   1925   }
   1926 
   1927   // TODO: we don't need to change the blend constant if blending isn't on.
   1928   if (u32 blend_constants = m_current_pipeline->GetBlendConstants(); m_current_blend_constant != blend_constants)
   1929   {
   1930     m_current_blend_constant = blend_constants;
   1931     cmdlist->OMSetBlendFactor(m_current_pipeline->GetBlendConstantsF().data());
   1932   }
   1933 
   1934   if (GPUPipeline::Layout layout = m_current_pipeline->GetLayout(); m_current_pipeline_layout != layout)
   1935   {
   1936     m_current_pipeline_layout = layout;
   1937     m_dirty_flags |= LAYOUT_DEPENDENT_DIRTY_STATE & (IsUsingROVRootSignature() ? ~0u : ~DIRTY_FLAG_RT_UAVS);
   1938   }
   1939 }
   1940 
   1941 void D3D12Device::UnbindPipeline(D3D12Pipeline* pl)
   1942 {
   1943   if (m_current_pipeline != pl)
   1944     return;
   1945 
   1946   m_current_pipeline = nullptr;
   1947 }
   1948 
   1949 bool D3D12Device::IsRenderTargetBound(const GPUTexture* tex) const
   1950 {
   1951   for (u32 i = 0; i < m_num_current_render_targets; i++)
   1952   {
   1953     if (m_current_render_targets[i] == tex)
   1954       return true;
   1955   }
   1956 
   1957   return false;
   1958 }
   1959 
   1960 void D3D12Device::InvalidateCachedState()
   1961 {
   1962   m_dirty_flags = ALL_DIRTY_STATE &
   1963                   ((m_current_render_pass_flags & GPUPipeline::BindRenderTargetsAsImages) ? ~0u : ~DIRTY_FLAG_RT_UAVS);
   1964   m_in_render_pass = false;
   1965   m_current_pipeline = nullptr;
   1966   m_current_vertex_stride = 0;
   1967   m_current_blend_constant = 0;
   1968   m_current_topology = D3D_PRIMITIVE_TOPOLOGY_UNDEFINED;
   1969 }
   1970 
   1971 void D3D12Device::SetInitialPipelineState()
   1972 {
   1973   DebugAssert(m_current_pipeline);
   1974   m_dirty_flags &= ~DIRTY_FLAG_INITIAL;
   1975 
   1976   ID3D12GraphicsCommandList4* cmdlist = GetCommandList();
   1977 
   1978   m_current_vertex_stride = m_current_pipeline->GetVertexStride();
   1979   SetVertexBuffer(cmdlist);
   1980   const D3D12_INDEX_BUFFER_VIEW ib_view = {m_index_buffer.GetGPUPointer(), m_index_buffer.GetSize(),
   1981                                            DXGI_FORMAT_R16_UINT};
   1982   cmdlist->IASetIndexBuffer(&ib_view);
   1983 
   1984   cmdlist->SetPipelineState(m_current_pipeline->GetPipeline());
   1985   m_current_pipeline_layout = m_current_pipeline->GetLayout();
   1986 
   1987   m_current_topology = m_current_pipeline->GetTopology();
   1988   cmdlist->IASetPrimitiveTopology(m_current_topology);
   1989 
   1990   m_current_blend_constant = m_current_pipeline->GetBlendConstants();
   1991   cmdlist->OMSetBlendFactor(m_current_pipeline->GetBlendConstantsF().data());
   1992 
   1993   SetViewport(cmdlist);
   1994   SetScissor(cmdlist);
   1995 }
   1996 
   1997 void D3D12Device::SetVertexBuffer(ID3D12GraphicsCommandList4* cmdlist)
   1998 {
   1999   const D3D12_VERTEX_BUFFER_VIEW vb_view = {m_vertex_buffer.GetGPUPointer(), m_vertex_buffer.GetSize(),
   2000                                             m_current_vertex_stride};
   2001   cmdlist->IASetVertexBuffers(0, 1, &vb_view);
   2002 }
   2003 
   2004 void D3D12Device::SetViewport(ID3D12GraphicsCommandList4* cmdlist)
   2005 {
   2006   const D3D12_VIEWPORT vp = {static_cast<float>(m_current_viewport.left),
   2007                              static_cast<float>(m_current_viewport.top),
   2008                              static_cast<float>(m_current_viewport.width()),
   2009                              static_cast<float>(m_current_viewport.height()),
   2010                              0.0f,
   2011                              1.0f};
   2012   cmdlist->RSSetViewports(1, &vp);
   2013 }
   2014 
   2015 void D3D12Device::SetScissor(ID3D12GraphicsCommandList4* cmdlist)
   2016 {
   2017   static_assert(sizeof(GSVector4i) == sizeof(D3D12_RECT));
   2018   cmdlist->RSSetScissorRects(1, reinterpret_cast<const D3D12_RECT*>(&m_current_scissor));
   2019 }
   2020 
   2021 void D3D12Device::SetTextureSampler(u32 slot, GPUTexture* texture, GPUSampler* sampler)
   2022 {
   2023   D3D12Texture* T = static_cast<D3D12Texture*>(texture);
   2024   if (m_current_textures[slot] != T)
   2025   {
   2026     m_current_textures[slot] = T;
   2027     m_dirty_flags |= DIRTY_FLAG_TEXTURES;
   2028 
   2029     if (T)
   2030     {
   2031       T->CommitClear();
   2032       T->SetUseFenceValue(GetCurrentFenceValue());
   2033       if (T->GetResourceState() != D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE)
   2034       {
   2035         if (InRenderPass())
   2036           EndRenderPass();
   2037         T->TransitionToState(D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE);
   2038       }
   2039     }
   2040   }
   2041 
   2042   const D3D12DescriptorHandle& handle =
   2043     sampler ? static_cast<D3D12Sampler*>(sampler)->GetDescriptor() : m_point_sampler;
   2044   if (m_current_samplers[slot] != handle)
   2045   {
   2046     m_current_samplers[slot] = handle;
   2047     m_dirty_flags |= DIRTY_FLAG_SAMPLERS;
   2048   }
   2049 }
   2050 
   2051 void D3D12Device::SetTextureBuffer(u32 slot, GPUTextureBuffer* buffer)
   2052 {
   2053   DebugAssert(slot == 0);
   2054   if (m_current_texture_buffer == buffer)
   2055     return;
   2056 
   2057   m_current_texture_buffer = static_cast<D3D12TextureBuffer*>(buffer);
   2058   if (m_current_pipeline_layout == GPUPipeline::Layout::SingleTextureBufferAndPushConstants)
   2059     m_dirty_flags |= DIRTY_FLAG_TEXTURES;
   2060 }
   2061 
   2062 void D3D12Device::UnbindTexture(D3D12Texture* tex)
   2063 {
   2064   for (u32 i = 0; i < MAX_TEXTURE_SAMPLERS; i++)
   2065   {
   2066     if (m_current_textures[i] == tex)
   2067     {
   2068       m_current_textures[i] = nullptr;
   2069       m_dirty_flags |= DIRTY_FLAG_TEXTURES;
   2070     }
   2071   }
   2072 
   2073   if (tex->IsRenderTarget() || tex->IsRWTexture())
   2074   {
   2075     for (u32 i = 0; i < m_num_current_render_targets; i++)
   2076     {
   2077       if (m_current_render_targets[i] == tex)
   2078       {
   2079         if (InRenderPass())
   2080           EndRenderPass();
   2081         m_current_render_targets[i] = nullptr;
   2082       }
   2083     }
   2084   }
   2085   else if (tex->IsDepthStencil())
   2086   {
   2087     if (m_current_depth_target == tex)
   2088     {
   2089       if (InRenderPass())
   2090         EndRenderPass();
   2091       m_current_depth_target = nullptr;
   2092     }
   2093   }
   2094 }
   2095 
   2096 void D3D12Device::UnbindTextureBuffer(D3D12TextureBuffer* buf)
   2097 {
   2098   if (m_current_texture_buffer != buf)
   2099     return;
   2100 
   2101   m_current_texture_buffer = nullptr;
   2102 
   2103   if (m_current_pipeline_layout == GPUPipeline::Layout::SingleTextureBufferAndPushConstants)
   2104     m_dirty_flags |= DIRTY_FLAG_TEXTURES;
   2105 }
   2106 
   2107 void D3D12Device::SetViewport(const GSVector4i rc)
   2108 {
   2109   if (m_current_viewport.eq(rc))
   2110     return;
   2111 
   2112   m_current_viewport = rc;
   2113 
   2114   if (m_dirty_flags & DIRTY_FLAG_INITIAL)
   2115     return;
   2116 
   2117   SetViewport(GetCommandList());
   2118 }
   2119 
   2120 void D3D12Device::SetScissor(const GSVector4i rc)
   2121 {
   2122   if (m_current_scissor.eq(rc))
   2123     return;
   2124 
   2125   m_current_scissor = rc;
   2126 
   2127   if (m_dirty_flags & DIRTY_FLAG_INITIAL)
   2128     return;
   2129 
   2130   SetScissor(GetCommandList());
   2131 }
   2132 
   2133 void D3D12Device::PreDrawCheck()
   2134 {
   2135   // TODO: Flushing cmdbuffer because of descriptor OOM will lose push constants.
   2136 
   2137   DebugAssert(!(m_dirty_flags & DIRTY_FLAG_INITIAL));
   2138   const u32 dirty = std::exchange(m_dirty_flags, 0);
   2139   if (dirty != 0)
   2140   {
   2141     if (dirty & DIRTY_FLAG_PIPELINE_LAYOUT)
   2142     {
   2143       UpdateRootSignature();
   2144       if (!UpdateRootParameters(dirty))
   2145       {
   2146         SubmitCommandListAndRestartRenderPass("out of descriptors");
   2147         PreDrawCheck();
   2148         return;
   2149       }
   2150     }
   2151     else if (dirty & (DIRTY_FLAG_CONSTANT_BUFFER | DIRTY_FLAG_TEXTURES | DIRTY_FLAG_SAMPLERS | DIRTY_FLAG_RT_UAVS))
   2152     {
   2153       if (!UpdateRootParameters(dirty))
   2154       {
   2155         SubmitCommandListAndRestartRenderPass("out of descriptors");
   2156         PreDrawCheck();
   2157         return;
   2158       }
   2159     }
   2160   }
   2161 
   2162   if (!InRenderPass())
   2163     BeginRenderPass();
   2164 }
   2165 
   2166 bool D3D12Device::IsUsingROVRootSignature() const
   2167 {
   2168   return ((m_current_render_pass_flags & GPUPipeline::BindRenderTargetsAsImages) != 0);
   2169 }
   2170 
   2171 void D3D12Device::UpdateRootSignature()
   2172 {
   2173   GetCommandList()->SetGraphicsRootSignature(
   2174     m_root_signatures[BoolToUInt8(IsUsingROVRootSignature())][static_cast<u8>(m_current_pipeline_layout)].Get());
   2175 }
   2176 
   2177 template<GPUPipeline::Layout layout>
   2178 bool D3D12Device::UpdateParametersForLayout(u32 dirty)
   2179 {
   2180   ID3D12GraphicsCommandList4* cmdlist = GetCommandList();
   2181 
   2182   if constexpr (layout == GPUPipeline::Layout::SingleTextureAndUBO || layout == GPUPipeline::Layout::MultiTextureAndUBO)
   2183   {
   2184     if (dirty & DIRTY_FLAG_CONSTANT_BUFFER)
   2185       cmdlist->SetGraphicsRootConstantBufferView(2, m_uniform_buffer.GetGPUPointer() + m_uniform_buffer_position);
   2186   }
   2187 
   2188   constexpr u32 num_textures = GetActiveTexturesForLayout(layout);
   2189   if (dirty & DIRTY_FLAG_TEXTURES && num_textures > 0)
   2190   {
   2191     D3D12DescriptorAllocator& allocator = m_command_lists[m_current_command_list].descriptor_allocator;
   2192     D3D12DescriptorHandle gpu_handle;
   2193     if (!allocator.Allocate(num_textures, &gpu_handle))
   2194       return false;
   2195 
   2196     if constexpr (num_textures == 1)
   2197     {
   2198       m_device->CopyDescriptorsSimple(
   2199         1, gpu_handle, m_current_textures[0] ? m_current_textures[0]->GetSRVDescriptor() : m_null_srv_descriptor,
   2200         D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV);
   2201     }
   2202     else
   2203     {
   2204       D3D12_CPU_DESCRIPTOR_HANDLE src_handles[MAX_TEXTURE_SAMPLERS];
   2205       UINT src_sizes[MAX_TEXTURE_SAMPLERS];
   2206       for (u32 i = 0; i < num_textures; i++)
   2207       {
   2208         src_handles[i] = m_current_textures[i] ? m_current_textures[i]->GetSRVDescriptor() : m_null_srv_descriptor;
   2209         src_sizes[i] = 1;
   2210       }
   2211       m_device->CopyDescriptors(1, &gpu_handle.cpu_handle, &num_textures, num_textures, src_handles, src_sizes,
   2212                                 D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV);
   2213     }
   2214 
   2215     cmdlist->SetGraphicsRootDescriptorTable(0, gpu_handle);
   2216   }
   2217 
   2218   if (dirty & DIRTY_FLAG_SAMPLERS && num_textures > 0)
   2219   {
   2220     auto& allocator = m_command_lists[m_current_command_list].sampler_allocator;
   2221     D3D12DescriptorHandle gpu_handle;
   2222     if constexpr (num_textures == 1)
   2223     {
   2224       if (!allocator.LookupSingle(m_device.Get(), &gpu_handle, m_current_samplers[0]))
   2225         return false;
   2226     }
   2227     else
   2228     {
   2229       if (!allocator.LookupGroup(m_device.Get(), &gpu_handle, m_current_samplers.data()))
   2230         return false;
   2231     }
   2232 
   2233     cmdlist->SetGraphicsRootDescriptorTable(1, gpu_handle);
   2234   }
   2235 
   2236   if (dirty & DIRTY_FLAG_TEXTURES && layout == GPUPipeline::Layout::SingleTextureBufferAndPushConstants)
   2237   {
   2238     D3D12DescriptorAllocator& allocator = m_command_lists[m_current_command_list].descriptor_allocator;
   2239     D3D12DescriptorHandle gpu_handle;
   2240     if (!allocator.Allocate(1, &gpu_handle))
   2241       return false;
   2242 
   2243     m_device->CopyDescriptorsSimple(
   2244       1, gpu_handle, m_current_texture_buffer ? m_current_texture_buffer->GetDescriptor() : m_null_srv_descriptor,
   2245       D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV);
   2246     cmdlist->SetGraphicsRootDescriptorTable(0, gpu_handle);
   2247   }
   2248 
   2249   if (dirty & DIRTY_FLAG_RT_UAVS)
   2250   {
   2251     DebugAssert(m_current_render_pass_flags & GPUPipeline::BindRenderTargetsAsImages);
   2252 
   2253     D3D12DescriptorAllocator& allocator = m_command_lists[m_current_command_list].descriptor_allocator;
   2254     D3D12DescriptorHandle gpu_handle;
   2255     if (!allocator.Allocate(MAX_IMAGE_RENDER_TARGETS, &gpu_handle))
   2256       return false;
   2257 
   2258     D3D12_CPU_DESCRIPTOR_HANDLE src_handles[MAX_IMAGE_RENDER_TARGETS];
   2259     UINT src_sizes[MAX_IMAGE_RENDER_TARGETS];
   2260     const UINT dst_size = MAX_IMAGE_RENDER_TARGETS;
   2261     for (u32 i = 0; i < MAX_IMAGE_RENDER_TARGETS; i++)
   2262     {
   2263       src_handles[i] =
   2264         m_current_render_targets[i] ? m_current_render_targets[i]->GetSRVDescriptor() : m_null_srv_descriptor;
   2265       src_sizes[i] = 1;
   2266     }
   2267     m_device->CopyDescriptors(1, &gpu_handle.cpu_handle, &dst_size, MAX_IMAGE_RENDER_TARGETS, src_handles, src_sizes,
   2268                               D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV);
   2269 
   2270     constexpr u32 rov_param =
   2271       (layout == GPUPipeline::Layout::SingleTextureBufferAndPushConstants) ?
   2272         1 :
   2273         ((layout == GPUPipeline::Layout::SingleTextureAndUBO || layout == GPUPipeline::Layout::MultiTextureAndUBO) ? 3 :
   2274                                                                                                                      2);
   2275     cmdlist->SetGraphicsRootDescriptorTable(rov_param, gpu_handle);
   2276   }
   2277 
   2278   return true;
   2279 }
   2280 
   2281 bool D3D12Device::UpdateRootParameters(u32 dirty)
   2282 {
   2283   switch (m_current_pipeline_layout)
   2284   {
   2285     case GPUPipeline::Layout::SingleTextureAndUBO:
   2286       return UpdateParametersForLayout<GPUPipeline::Layout::SingleTextureAndUBO>(dirty);
   2287 
   2288     case GPUPipeline::Layout::SingleTextureAndPushConstants:
   2289       return UpdateParametersForLayout<GPUPipeline::Layout::SingleTextureAndPushConstants>(dirty);
   2290 
   2291     case GPUPipeline::Layout::SingleTextureBufferAndPushConstants:
   2292       return UpdateParametersForLayout<GPUPipeline::Layout::SingleTextureBufferAndPushConstants>(dirty);
   2293 
   2294     case GPUPipeline::Layout::MultiTextureAndUBO:
   2295       return UpdateParametersForLayout<GPUPipeline::Layout::MultiTextureAndUBO>(dirty);
   2296 
   2297     case GPUPipeline::Layout::MultiTextureAndPushConstants:
   2298       return UpdateParametersForLayout<GPUPipeline::Layout::MultiTextureAndPushConstants>(dirty);
   2299 
   2300     default:
   2301       UnreachableCode();
   2302   }
   2303 }
   2304 
   2305 void D3D12Device::Draw(u32 vertex_count, u32 base_vertex)
   2306 {
   2307   PreDrawCheck();
   2308   s_stats.num_draws++;
   2309   GetCommandList()->DrawInstanced(vertex_count, 1, base_vertex, 0);
   2310 }
   2311 
   2312 void D3D12Device::DrawIndexed(u32 index_count, u32 base_index, u32 base_vertex)
   2313 {
   2314   PreDrawCheck();
   2315   s_stats.num_draws++;
   2316   GetCommandList()->DrawIndexedInstanced(index_count, 1, base_index, base_vertex, 0);
   2317 }
   2318 
   2319 void D3D12Device::DrawIndexedWithBarrier(u32 index_count, u32 base_index, u32 base_vertex, DrawBarrier type)
   2320 {
   2321   Panic("Barriers are not supported");
   2322 }