duckstation

duckstation, but archived from the revision just before upstream changed it to a proprietary software project, this version is the libre one
git clone https://git.neptards.moe/u3shit/duckstation.git
Log | Files | Refs | README | LICENSE

d3d12_stream_buffer.cpp (9248B)


      1 // SPDX-FileCopyrightText: 2019-2024 Connor McLaughlin <stenzek@gmail.com>
      2 // SPDX-License-Identifier: (GPL-3.0 OR PolyForm-Strict-1.0.0)
      3 
      4 #include "d3d12_stream_buffer.h"
      5 #include "d3d12_device.h"
      6 
      7 #include "common/align.h"
      8 #include "common/assert.h"
      9 #include "common/error.h"
     10 #include "common/log.h"
     11 
     12 #include "D3D12MemAlloc.h"
     13 
     14 #include <algorithm>
     15 
     16 Log_SetChannel(D3D12StreamBuffer);
     17 
     18 D3D12StreamBuffer::D3D12StreamBuffer() = default;
     19 
     20 D3D12StreamBuffer::~D3D12StreamBuffer()
     21 {
     22   Destroy();
     23 }
     24 
     25 bool D3D12StreamBuffer::Create(u32 size, Error* error)
     26 {
     27   const D3D12_RESOURCE_DESC resource_desc = {
     28     D3D12_RESOURCE_DIMENSION_BUFFER, 0, size, 1, 1, 1, DXGI_FORMAT_UNKNOWN, {1, 0}, D3D12_TEXTURE_LAYOUT_ROW_MAJOR,
     29     D3D12_RESOURCE_FLAG_NONE};
     30 
     31   D3D12MA::ALLOCATION_DESC allocationDesc = {};
     32   allocationDesc.Flags = D3D12MA::ALLOCATION_FLAG_COMMITTED;
     33   allocationDesc.HeapType = D3D12_HEAP_TYPE_UPLOAD;
     34 
     35   Microsoft::WRL::ComPtr<ID3D12Resource> buffer;
     36   Microsoft::WRL::ComPtr<D3D12MA::Allocation> allocation;
     37   HRESULT hr = D3D12Device::GetInstance().GetAllocator()->CreateResource(
     38     &allocationDesc, &resource_desc, D3D12_RESOURCE_STATE_GENERIC_READ, nullptr, allocation.ReleaseAndGetAddressOf(),
     39     IID_PPV_ARGS(buffer.GetAddressOf()));
     40   if (FAILED(hr)) [[unlikely]]
     41   {
     42     Error::SetHResult(error, "CreateResource() for stream buffer failed: ", hr);
     43     return false;
     44   }
     45 
     46   static const D3D12_RANGE read_range = {};
     47   u8* host_pointer;
     48   hr = buffer->Map(0, &read_range, reinterpret_cast<void**>(&host_pointer));
     49   if (FAILED(hr)) [[unlikely]]
     50   {
     51     Error::SetHResult(error, "Map() for stream buffer failed: ", hr);
     52     return false;
     53   }
     54 
     55   Destroy(true);
     56 
     57   m_buffer = std::move(buffer);
     58   m_allocation = std::move(allocation);
     59   m_host_pointer = host_pointer;
     60   m_size = size;
     61   m_gpu_pointer = m_buffer->GetGPUVirtualAddress();
     62   return true;
     63 }
     64 
     65 bool D3D12StreamBuffer::ReserveMemory(u32 num_bytes, u32 alignment)
     66 {
     67   const u32 required_bytes = num_bytes + alignment;
     68 
     69   // Check for sane allocations
     70   if (num_bytes > m_size) [[unlikely]]
     71   {
     72     ERROR_LOG("Attempting to allocate {} bytes from a {} byte stream buffer", static_cast<u32>(num_bytes),
     73               static_cast<u32>(m_size));
     74     Panic("Stream buffer overflow");
     75   }
     76 
     77   // Is the GPU behind or up to date with our current offset?
     78   UpdateCurrentFencePosition();
     79   if (m_current_offset >= m_current_gpu_position)
     80   {
     81     const u32 aligned_required_bytes = (m_current_offset > 0) ? required_bytes : num_bytes;
     82     const u32 remaining_bytes = m_size - m_current_offset;
     83     if (aligned_required_bytes <= remaining_bytes)
     84     {
     85       // Place at the current position, after the GPU position.
     86       m_current_offset = Common::AlignUp(m_current_offset, alignment);
     87       m_current_space = m_size - m_current_offset;
     88       return true;
     89     }
     90 
     91     // Check for space at the start of the buffer
     92     // We use < here because we don't want to have the case of m_current_offset ==
     93     // m_current_gpu_position. That would mean the code above would assume the
     94     // GPU has caught up to us, which it hasn't.
     95     if (required_bytes < m_current_gpu_position)
     96     {
     97       // Reset offset to zero, since we're allocating behind the gpu now
     98       m_current_offset = 0;
     99       m_current_space = m_current_gpu_position;
    100       return true;
    101     }
    102   }
    103 
    104   // Is the GPU ahead of our current offset?
    105   if (m_current_offset < m_current_gpu_position)
    106   {
    107     // We have from m_current_offset..m_current_gpu_position space to use.
    108     const u32 remaining_bytes = m_current_gpu_position - m_current_offset;
    109     if (required_bytes < remaining_bytes)
    110     {
    111       // Place at the current position, since this is still behind the GPU.
    112       m_current_offset = Common::AlignUp(m_current_offset, alignment);
    113       m_current_space = m_current_gpu_position - m_current_offset;
    114       return true;
    115     }
    116   }
    117 
    118   // Can we find a fence to wait on that will give us enough memory?
    119   if (WaitForClearSpace(required_bytes))
    120   {
    121     const u32 align_diff = Common::AlignUp(m_current_offset, alignment) - m_current_offset;
    122     m_current_offset += align_diff;
    123     m_current_space -= align_diff;
    124     return true;
    125   }
    126 
    127   // We tried everything we could, and still couldn't get anything. This means that too much space
    128   // in the buffer is being used by the command buffer currently being recorded. Therefore, the
    129   // only option is to execute it, and wait until it's done.
    130   return false;
    131 }
    132 
    133 void D3D12StreamBuffer::CommitMemory(u32 final_num_bytes)
    134 {
    135   DebugAssert((m_current_offset + final_num_bytes) <= m_size);
    136   DebugAssert(final_num_bytes <= m_current_space);
    137   m_current_offset += final_num_bytes;
    138   m_current_space -= final_num_bytes;
    139 }
    140 
    141 void D3D12StreamBuffer::Destroy(bool defer)
    142 {
    143   if (m_host_pointer)
    144   {
    145     const D3D12_RANGE written_range = {0, m_size};
    146     m_buffer->Unmap(0, &written_range);
    147     m_host_pointer = nullptr;
    148   }
    149 
    150   if (m_buffer && defer)
    151     D3D12Device::GetInstance().DeferResourceDestruction(std::move(m_allocation), std::move(m_buffer));
    152   m_buffer.Reset();
    153   m_allocation.Reset();
    154 
    155   m_current_offset = 0;
    156   m_current_space = 0;
    157   m_current_gpu_position = 0;
    158   m_tracked_fences.clear();
    159 }
    160 
    161 void D3D12StreamBuffer::UpdateCurrentFencePosition()
    162 {
    163   // Don't create a tracking entry if the GPU is caught up with the buffer.
    164   if (m_current_offset == m_current_gpu_position)
    165     return;
    166 
    167   // Has the offset changed since the last fence?
    168   const u64 fence = D3D12Device::GetInstance().GetCurrentFenceValue();
    169   if (!m_tracked_fences.empty() && m_tracked_fences.back().first == fence)
    170   {
    171     // Still haven't executed a command buffer, so just update the offset.
    172     m_tracked_fences.back().second = m_current_offset;
    173     return;
    174   }
    175 
    176   UpdateGPUPosition();
    177   m_tracked_fences.emplace_back(fence, m_current_offset);
    178 }
    179 
    180 void D3D12StreamBuffer::UpdateGPUPosition()
    181 {
    182   auto start = m_tracked_fences.begin();
    183   auto end = start;
    184 
    185   const u64 completed_counter = D3D12Device::GetInstance().GetCompletedFenceValue();
    186   while (end != m_tracked_fences.end() && completed_counter >= end->first)
    187   {
    188     m_current_gpu_position = end->second;
    189     ++end;
    190   }
    191 
    192   if (start != end)
    193     m_tracked_fences.erase(start, end);
    194 }
    195 
    196 bool D3D12StreamBuffer::WaitForClearSpace(u32 num_bytes)
    197 {
    198   u32 new_offset = 0;
    199   u32 new_space = 0;
    200   u32 new_gpu_position = 0;
    201 
    202   auto iter = m_tracked_fences.begin();
    203   for (; iter != m_tracked_fences.end(); ++iter)
    204   {
    205     // Would this fence bring us in line with the GPU?
    206     // This is the "last resort" case, where a command buffer execution has been forced
    207     // after no additional data has been written to it, so we can assume that after the
    208     // fence has been signaled the entire buffer is now consumed.
    209     u32 gpu_position = iter->second;
    210     if (m_current_offset == gpu_position)
    211     {
    212       new_offset = 0;
    213       new_space = m_size;
    214       new_gpu_position = 0;
    215       break;
    216     }
    217 
    218     // Assuming that we wait for this fence, are we allocating in front of the GPU?
    219     if (m_current_offset > gpu_position)
    220     {
    221       // This would suggest the GPU has now followed us and wrapped around, so we have from
    222       // m_current_position..m_size free, as well as and 0..gpu_position.
    223       const u32 remaining_space_after_offset = m_size - m_current_offset;
    224       if (remaining_space_after_offset >= num_bytes)
    225       {
    226         // Switch to allocating in front of the GPU, using the remainder of the buffer.
    227         new_offset = m_current_offset;
    228         new_space = m_size - m_current_offset;
    229         new_gpu_position = gpu_position;
    230         break;
    231       }
    232 
    233       // We can wrap around to the start, behind the GPU, if there is enough space.
    234       // We use > here because otherwise we'd end up lining up with the GPU, and then the
    235       // allocator would assume that the GPU has consumed what we just wrote.
    236       if (gpu_position > num_bytes)
    237       {
    238         new_offset = 0;
    239         new_space = gpu_position;
    240         new_gpu_position = gpu_position;
    241         break;
    242       }
    243     }
    244     else
    245     {
    246       // We're currently allocating behind the GPU. This would give us between the current
    247       // offset and the GPU position worth of space to work with. Again, > because we can't
    248       // align the GPU position with the buffer offset.
    249       u32 available_space_inbetween = gpu_position - m_current_offset;
    250       if (available_space_inbetween > num_bytes)
    251       {
    252         // Leave the offset as-is, but update the GPU position.
    253         new_offset = m_current_offset;
    254         new_space = gpu_position - m_current_offset;
    255         new_gpu_position = gpu_position;
    256         break;
    257       }
    258     }
    259   }
    260 
    261   // Did any fences satisfy this condition?
    262   // Has the command buffer been executed yet? If not, the caller should execute it.
    263   if (iter == m_tracked_fences.end() || iter->first == D3D12Device::GetInstance().GetCurrentFenceValue())
    264     return false;
    265 
    266   // Wait until this fence is signaled. This will fire the callback, updating the GPU position.
    267   D3D12Device::GetInstance().WaitForFence(iter->first);
    268   m_tracked_fences.erase(m_tracked_fences.begin(), m_current_offset == iter->second ? m_tracked_fences.end() : ++iter);
    269   m_current_offset = new_offset;
    270   m_current_space = new_space;
    271   m_current_gpu_position = new_gpu_position;
    272   return true;
    273 }