d3d12_stream_buffer.cpp (9248B)
1 // SPDX-FileCopyrightText: 2019-2024 Connor McLaughlin <stenzek@gmail.com> 2 // SPDX-License-Identifier: (GPL-3.0 OR PolyForm-Strict-1.0.0) 3 4 #include "d3d12_stream_buffer.h" 5 #include "d3d12_device.h" 6 7 #include "common/align.h" 8 #include "common/assert.h" 9 #include "common/error.h" 10 #include "common/log.h" 11 12 #include "D3D12MemAlloc.h" 13 14 #include <algorithm> 15 16 Log_SetChannel(D3D12StreamBuffer); 17 18 D3D12StreamBuffer::D3D12StreamBuffer() = default; 19 20 D3D12StreamBuffer::~D3D12StreamBuffer() 21 { 22 Destroy(); 23 } 24 25 bool D3D12StreamBuffer::Create(u32 size, Error* error) 26 { 27 const D3D12_RESOURCE_DESC resource_desc = { 28 D3D12_RESOURCE_DIMENSION_BUFFER, 0, size, 1, 1, 1, DXGI_FORMAT_UNKNOWN, {1, 0}, D3D12_TEXTURE_LAYOUT_ROW_MAJOR, 29 D3D12_RESOURCE_FLAG_NONE}; 30 31 D3D12MA::ALLOCATION_DESC allocationDesc = {}; 32 allocationDesc.Flags = D3D12MA::ALLOCATION_FLAG_COMMITTED; 33 allocationDesc.HeapType = D3D12_HEAP_TYPE_UPLOAD; 34 35 Microsoft::WRL::ComPtr<ID3D12Resource> buffer; 36 Microsoft::WRL::ComPtr<D3D12MA::Allocation> allocation; 37 HRESULT hr = D3D12Device::GetInstance().GetAllocator()->CreateResource( 38 &allocationDesc, &resource_desc, D3D12_RESOURCE_STATE_GENERIC_READ, nullptr, allocation.ReleaseAndGetAddressOf(), 39 IID_PPV_ARGS(buffer.GetAddressOf())); 40 if (FAILED(hr)) [[unlikely]] 41 { 42 Error::SetHResult(error, "CreateResource() for stream buffer failed: ", hr); 43 return false; 44 } 45 46 static const D3D12_RANGE read_range = {}; 47 u8* host_pointer; 48 hr = buffer->Map(0, &read_range, reinterpret_cast<void**>(&host_pointer)); 49 if (FAILED(hr)) [[unlikely]] 50 { 51 Error::SetHResult(error, "Map() for stream buffer failed: ", hr); 52 return false; 53 } 54 55 Destroy(true); 56 57 m_buffer = std::move(buffer); 58 m_allocation = std::move(allocation); 59 m_host_pointer = host_pointer; 60 m_size = size; 61 m_gpu_pointer = m_buffer->GetGPUVirtualAddress(); 62 return true; 63 } 64 65 bool D3D12StreamBuffer::ReserveMemory(u32 num_bytes, u32 alignment) 66 { 67 const u32 required_bytes = num_bytes + alignment; 68 69 // Check for sane allocations 70 if (num_bytes > m_size) [[unlikely]] 71 { 72 ERROR_LOG("Attempting to allocate {} bytes from a {} byte stream buffer", static_cast<u32>(num_bytes), 73 static_cast<u32>(m_size)); 74 Panic("Stream buffer overflow"); 75 } 76 77 // Is the GPU behind or up to date with our current offset? 78 UpdateCurrentFencePosition(); 79 if (m_current_offset >= m_current_gpu_position) 80 { 81 const u32 aligned_required_bytes = (m_current_offset > 0) ? required_bytes : num_bytes; 82 const u32 remaining_bytes = m_size - m_current_offset; 83 if (aligned_required_bytes <= remaining_bytes) 84 { 85 // Place at the current position, after the GPU position. 86 m_current_offset = Common::AlignUp(m_current_offset, alignment); 87 m_current_space = m_size - m_current_offset; 88 return true; 89 } 90 91 // Check for space at the start of the buffer 92 // We use < here because we don't want to have the case of m_current_offset == 93 // m_current_gpu_position. That would mean the code above would assume the 94 // GPU has caught up to us, which it hasn't. 95 if (required_bytes < m_current_gpu_position) 96 { 97 // Reset offset to zero, since we're allocating behind the gpu now 98 m_current_offset = 0; 99 m_current_space = m_current_gpu_position; 100 return true; 101 } 102 } 103 104 // Is the GPU ahead of our current offset? 105 if (m_current_offset < m_current_gpu_position) 106 { 107 // We have from m_current_offset..m_current_gpu_position space to use. 108 const u32 remaining_bytes = m_current_gpu_position - m_current_offset; 109 if (required_bytes < remaining_bytes) 110 { 111 // Place at the current position, since this is still behind the GPU. 112 m_current_offset = Common::AlignUp(m_current_offset, alignment); 113 m_current_space = m_current_gpu_position - m_current_offset; 114 return true; 115 } 116 } 117 118 // Can we find a fence to wait on that will give us enough memory? 119 if (WaitForClearSpace(required_bytes)) 120 { 121 const u32 align_diff = Common::AlignUp(m_current_offset, alignment) - m_current_offset; 122 m_current_offset += align_diff; 123 m_current_space -= align_diff; 124 return true; 125 } 126 127 // We tried everything we could, and still couldn't get anything. This means that too much space 128 // in the buffer is being used by the command buffer currently being recorded. Therefore, the 129 // only option is to execute it, and wait until it's done. 130 return false; 131 } 132 133 void D3D12StreamBuffer::CommitMemory(u32 final_num_bytes) 134 { 135 DebugAssert((m_current_offset + final_num_bytes) <= m_size); 136 DebugAssert(final_num_bytes <= m_current_space); 137 m_current_offset += final_num_bytes; 138 m_current_space -= final_num_bytes; 139 } 140 141 void D3D12StreamBuffer::Destroy(bool defer) 142 { 143 if (m_host_pointer) 144 { 145 const D3D12_RANGE written_range = {0, m_size}; 146 m_buffer->Unmap(0, &written_range); 147 m_host_pointer = nullptr; 148 } 149 150 if (m_buffer && defer) 151 D3D12Device::GetInstance().DeferResourceDestruction(std::move(m_allocation), std::move(m_buffer)); 152 m_buffer.Reset(); 153 m_allocation.Reset(); 154 155 m_current_offset = 0; 156 m_current_space = 0; 157 m_current_gpu_position = 0; 158 m_tracked_fences.clear(); 159 } 160 161 void D3D12StreamBuffer::UpdateCurrentFencePosition() 162 { 163 // Don't create a tracking entry if the GPU is caught up with the buffer. 164 if (m_current_offset == m_current_gpu_position) 165 return; 166 167 // Has the offset changed since the last fence? 168 const u64 fence = D3D12Device::GetInstance().GetCurrentFenceValue(); 169 if (!m_tracked_fences.empty() && m_tracked_fences.back().first == fence) 170 { 171 // Still haven't executed a command buffer, so just update the offset. 172 m_tracked_fences.back().second = m_current_offset; 173 return; 174 } 175 176 UpdateGPUPosition(); 177 m_tracked_fences.emplace_back(fence, m_current_offset); 178 } 179 180 void D3D12StreamBuffer::UpdateGPUPosition() 181 { 182 auto start = m_tracked_fences.begin(); 183 auto end = start; 184 185 const u64 completed_counter = D3D12Device::GetInstance().GetCompletedFenceValue(); 186 while (end != m_tracked_fences.end() && completed_counter >= end->first) 187 { 188 m_current_gpu_position = end->second; 189 ++end; 190 } 191 192 if (start != end) 193 m_tracked_fences.erase(start, end); 194 } 195 196 bool D3D12StreamBuffer::WaitForClearSpace(u32 num_bytes) 197 { 198 u32 new_offset = 0; 199 u32 new_space = 0; 200 u32 new_gpu_position = 0; 201 202 auto iter = m_tracked_fences.begin(); 203 for (; iter != m_tracked_fences.end(); ++iter) 204 { 205 // Would this fence bring us in line with the GPU? 206 // This is the "last resort" case, where a command buffer execution has been forced 207 // after no additional data has been written to it, so we can assume that after the 208 // fence has been signaled the entire buffer is now consumed. 209 u32 gpu_position = iter->second; 210 if (m_current_offset == gpu_position) 211 { 212 new_offset = 0; 213 new_space = m_size; 214 new_gpu_position = 0; 215 break; 216 } 217 218 // Assuming that we wait for this fence, are we allocating in front of the GPU? 219 if (m_current_offset > gpu_position) 220 { 221 // This would suggest the GPU has now followed us and wrapped around, so we have from 222 // m_current_position..m_size free, as well as and 0..gpu_position. 223 const u32 remaining_space_after_offset = m_size - m_current_offset; 224 if (remaining_space_after_offset >= num_bytes) 225 { 226 // Switch to allocating in front of the GPU, using the remainder of the buffer. 227 new_offset = m_current_offset; 228 new_space = m_size - m_current_offset; 229 new_gpu_position = gpu_position; 230 break; 231 } 232 233 // We can wrap around to the start, behind the GPU, if there is enough space. 234 // We use > here because otherwise we'd end up lining up with the GPU, and then the 235 // allocator would assume that the GPU has consumed what we just wrote. 236 if (gpu_position > num_bytes) 237 { 238 new_offset = 0; 239 new_space = gpu_position; 240 new_gpu_position = gpu_position; 241 break; 242 } 243 } 244 else 245 { 246 // We're currently allocating behind the GPU. This would give us between the current 247 // offset and the GPU position worth of space to work with. Again, > because we can't 248 // align the GPU position with the buffer offset. 249 u32 available_space_inbetween = gpu_position - m_current_offset; 250 if (available_space_inbetween > num_bytes) 251 { 252 // Leave the offset as-is, but update the GPU position. 253 new_offset = m_current_offset; 254 new_space = gpu_position - m_current_offset; 255 new_gpu_position = gpu_position; 256 break; 257 } 258 } 259 } 260 261 // Did any fences satisfy this condition? 262 // Has the command buffer been executed yet? If not, the caller should execute it. 263 if (iter == m_tracked_fences.end() || iter->first == D3D12Device::GetInstance().GetCurrentFenceValue()) 264 return false; 265 266 // Wait until this fence is signaled. This will fire the callback, updating the GPU position. 267 D3D12Device::GetInstance().WaitForFence(iter->first); 268 m_tracked_fences.erase(m_tracked_fences.begin(), m_current_offset == iter->second ? m_tracked_fences.end() : ++iter); 269 m_current_offset = new_offset; 270 m_current_space = new_space; 271 m_current_gpu_position = new_gpu_position; 272 return true; 273 }