vulkan_stream_buffer.cpp (10460B)
1 // SPDX-FileCopyrightText: 2019-2023 Connor McLaughlin <stenzek@gmail.com> 2 // SPDX-License-Identifier: (GPL-3.0 OR CC-BY-NC-ND-4.0) 3 4 #include "vulkan_stream_buffer.h" 5 #include "vulkan_builders.h" 6 #include "vulkan_device.h" 7 8 #include "common/align.h" 9 #include "common/assert.h" 10 #include "common/bitutils.h" 11 #include "common/log.h" 12 Log_SetChannel(VulkanDevice); 13 14 VulkanStreamBuffer::VulkanStreamBuffer() = default; 15 16 VulkanStreamBuffer::VulkanStreamBuffer(VulkanStreamBuffer&& move) 17 : m_size(move.m_size), m_current_offset(move.m_current_offset), m_current_space(move.m_current_space), 18 m_current_gpu_position(move.m_current_gpu_position), m_allocation(move.m_allocation), m_buffer(move.m_buffer), 19 m_host_pointer(move.m_host_pointer), m_tracked_fences(std::move(move.m_tracked_fences)) 20 { 21 move.m_size = 0; 22 move.m_current_offset = 0; 23 move.m_current_space = 0; 24 move.m_current_gpu_position = 0; 25 move.m_allocation = VK_NULL_HANDLE; 26 move.m_buffer = VK_NULL_HANDLE; 27 move.m_host_pointer = nullptr; 28 } 29 30 VulkanStreamBuffer::~VulkanStreamBuffer() 31 { 32 if (IsValid()) 33 Destroy(true); 34 } 35 36 VulkanStreamBuffer& VulkanStreamBuffer::operator=(VulkanStreamBuffer&& move) 37 { 38 if (IsValid()) 39 Destroy(true); 40 41 std::swap(m_size, move.m_size); 42 std::swap(m_current_offset, move.m_current_offset); 43 std::swap(m_current_space, move.m_current_space); 44 std::swap(m_current_gpu_position, move.m_current_gpu_position); 45 std::swap(m_buffer, move.m_buffer); 46 std::swap(m_host_pointer, move.m_host_pointer); 47 std::swap(m_tracked_fences, move.m_tracked_fences); 48 49 return *this; 50 } 51 52 bool VulkanStreamBuffer::Create(VkBufferUsageFlags usage, u32 size) 53 { 54 const VkBufferCreateInfo bci = {VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, 55 nullptr, 56 0, 57 static_cast<VkDeviceSize>(size), 58 usage, 59 VK_SHARING_MODE_EXCLUSIVE, 60 0, 61 nullptr}; 62 63 VmaAllocationCreateInfo aci = {}; 64 aci.flags = VMA_ALLOCATION_CREATE_MAPPED_BIT; 65 aci.usage = VMA_MEMORY_USAGE_CPU_TO_GPU; 66 aci.preferredFlags = VK_MEMORY_PROPERTY_HOST_COHERENT_BIT; 67 68 VmaAllocationInfo ai = {}; 69 VkBuffer new_buffer = VK_NULL_HANDLE; 70 VmaAllocation new_allocation = VK_NULL_HANDLE; 71 VkResult res = 72 vmaCreateBuffer(VulkanDevice::GetInstance().GetAllocator(), &bci, &aci, &new_buffer, &new_allocation, &ai); 73 if (res != VK_SUCCESS) 74 { 75 LOG_VULKAN_ERROR(res, "vkCreateBuffer failed: "); 76 return false; 77 } 78 79 if (IsValid()) 80 Destroy(true); 81 82 // Replace with the new buffer 83 m_size = size; 84 m_current_offset = 0; 85 m_current_gpu_position = 0; 86 m_tracked_fences.clear(); 87 m_allocation = new_allocation; 88 m_buffer = new_buffer; 89 m_host_pointer = static_cast<u8*>(ai.pMappedData); 90 return true; 91 } 92 93 void VulkanStreamBuffer::Destroy(bool defer) 94 { 95 if (m_buffer != VK_NULL_HANDLE) 96 { 97 if (defer) 98 VulkanDevice::GetInstance().DeferBufferDestruction(m_buffer, m_allocation); 99 else 100 vmaDestroyBuffer(VulkanDevice::GetInstance().GetAllocator(), m_buffer, m_allocation); 101 } 102 103 m_size = 0; 104 m_current_offset = 0; 105 m_current_gpu_position = 0; 106 m_tracked_fences.clear(); 107 m_buffer = VK_NULL_HANDLE; 108 m_allocation = VK_NULL_HANDLE; 109 m_host_pointer = nullptr; 110 } 111 112 bool VulkanStreamBuffer::ReserveMemory(u32 num_bytes, u32 alignment) 113 { 114 const u32 required_bytes = num_bytes + alignment; 115 116 // Check for sane allocations 117 if (required_bytes > m_size) [[unlikely]] 118 { 119 ERROR_LOG("Attempting to allocate {} bytes from a {} byte stream buffer", num_bytes, m_size); 120 Panic("Stream buffer overflow"); 121 } 122 123 UpdateGPUPosition(); 124 125 // Is the GPU behind or up to date with our current offset? 126 if (m_current_offset >= m_current_gpu_position) 127 { 128 const u32 remaining_bytes = m_size - m_current_offset; 129 if (required_bytes <= remaining_bytes) 130 { 131 // Place at the current position, after the GPU position. 132 m_current_offset = Common::AlignUp(m_current_offset, alignment); 133 m_current_space = m_size - m_current_offset; 134 return true; 135 } 136 137 // Check for space at the start of the buffer 138 // We use < here because we don't want to have the case of m_current_offset == 139 // m_current_gpu_position. That would mean the code above would assume the 140 // GPU has caught up to us, which it hasn't. 141 if (required_bytes < m_current_gpu_position) 142 { 143 // Reset offset to zero, since we're allocating behind the gpu now 144 m_current_offset = 0; 145 m_current_space = m_current_gpu_position - 1; 146 return true; 147 } 148 } 149 150 // Is the GPU ahead of our current offset? 151 if (m_current_offset < m_current_gpu_position) 152 { 153 // We have from m_current_offset..m_current_gpu_position space to use. 154 const u32 remaining_bytes = m_current_gpu_position - m_current_offset; 155 if (required_bytes < remaining_bytes) 156 { 157 // Place at the current position, since this is still behind the GPU. 158 m_current_offset = Common::AlignUp(m_current_offset, alignment); 159 m_current_space = m_current_gpu_position - m_current_offset - 1; 160 return true; 161 } 162 } 163 164 // Can we find a fence to wait on that will give us enough memory? 165 if (WaitForClearSpace(required_bytes)) 166 { 167 const u32 align_diff = Common::AlignUp(m_current_offset, alignment) - m_current_offset; 168 m_current_offset += align_diff; 169 m_current_space -= align_diff; 170 return true; 171 } 172 173 // We tried everything we could, and still couldn't get anything. This means that too much space 174 // in the buffer is being used by the command buffer currently being recorded. Therefore, the 175 // only option is to execute it, and wait until it's done. 176 return false; 177 } 178 179 void VulkanStreamBuffer::CommitMemory(u32 final_num_bytes) 180 { 181 DebugAssert((m_current_offset + final_num_bytes) <= m_size); 182 DebugAssert(final_num_bytes <= m_current_space); 183 184 // For non-coherent mappings, flush the memory range 185 vmaFlushAllocation(VulkanDevice::GetInstance().GetAllocator(), m_allocation, m_current_offset, final_num_bytes); 186 187 m_current_offset += final_num_bytes; 188 m_current_space -= final_num_bytes; 189 UpdateCurrentFencePosition(); 190 } 191 192 void VulkanStreamBuffer::UpdateCurrentFencePosition() 193 { 194 // Has the offset changed since the last fence? 195 const u64 counter = VulkanDevice::GetInstance().GetCurrentFenceCounter(); 196 if (!m_tracked_fences.empty() && m_tracked_fences.back().first == counter) 197 { 198 // Still haven't executed a command buffer, so just update the offset. 199 m_tracked_fences.back().second = m_current_offset; 200 return; 201 } 202 203 // New buffer, so update the GPU position while we're at it. 204 m_tracked_fences.emplace_back(counter, m_current_offset); 205 } 206 207 void VulkanStreamBuffer::UpdateGPUPosition() 208 { 209 auto start = m_tracked_fences.begin(); 210 auto end = start; 211 212 const u64 completed_counter = VulkanDevice::GetInstance().GetCompletedFenceCounter(); 213 while (end != m_tracked_fences.end() && completed_counter >= end->first) 214 { 215 m_current_gpu_position = end->second; 216 ++end; 217 } 218 219 if (start != end) 220 { 221 m_tracked_fences.erase(start, end); 222 if (m_current_offset == m_current_gpu_position) 223 { 224 // GPU is all caught up now. 225 m_current_offset = 0; 226 m_current_gpu_position = 0; 227 m_current_space = m_size; 228 } 229 } 230 } 231 232 bool VulkanStreamBuffer::WaitForClearSpace(u32 num_bytes) 233 { 234 u32 new_offset = 0; 235 u32 new_space = 0; 236 u32 new_gpu_position = 0; 237 238 auto iter = m_tracked_fences.begin(); 239 for (; iter != m_tracked_fences.end(); ++iter) 240 { 241 // Would this fence bring us in line with the GPU? 242 // This is the "last resort" case, where a command buffer execution has been forced 243 // after no additional data has been written to it, so we can assume that after the 244 // fence has been signaled the entire buffer is now consumed. 245 u32 gpu_position = iter->second; 246 if (m_current_offset == gpu_position) 247 { 248 new_offset = 0; 249 new_space = m_size; 250 new_gpu_position = 0; 251 break; 252 } 253 254 // Assuming that we wait for this fence, are we allocating in front of the GPU? 255 if (m_current_offset > gpu_position) 256 { 257 // This would suggest the GPU has now followed us and wrapped around, so we have from 258 // m_current_position..m_size free, as well as and 0..gpu_position. 259 const u32 remaining_space_after_offset = m_size - m_current_offset; 260 if (remaining_space_after_offset >= num_bytes) 261 { 262 // Switch to allocating in front of the GPU, using the remainder of the buffer. 263 new_offset = m_current_offset; 264 new_space = m_size - m_current_offset; 265 new_gpu_position = gpu_position; 266 break; 267 } 268 269 // We can wrap around to the start, behind the GPU, if there is enough space. 270 // We use > here because otherwise we'd end up lining up with the GPU, and then the 271 // allocator would assume that the GPU has consumed what we just wrote. 272 if (gpu_position > num_bytes) 273 { 274 new_offset = 0; 275 new_space = gpu_position - 1; 276 new_gpu_position = gpu_position; 277 break; 278 } 279 } 280 else 281 { 282 // We're currently allocating behind the GPU. This would give us between the current 283 // offset and the GPU position worth of space to work with. Again, > because we can't 284 // align the GPU position with the buffer offset. 285 u32 available_space_inbetween = gpu_position - m_current_offset; 286 if (available_space_inbetween > num_bytes) 287 { 288 // Leave the offset as-is, but update the GPU position. 289 new_offset = m_current_offset; 290 new_space = available_space_inbetween - 1; 291 new_gpu_position = gpu_position; 292 break; 293 } 294 } 295 } 296 297 // Did any fences satisfy this condition? 298 // Has the command buffer been executed yet? If not, the caller should execute it. 299 if (iter == m_tracked_fences.end() || iter->first == VulkanDevice::GetInstance().GetCurrentFenceCounter()) 300 return false; 301 302 // Wait until this fence is signaled. This will fire the callback, updating the GPU position. 303 VulkanDevice::GetInstance().WaitForFenceCounter(iter->first); 304 m_tracked_fences.erase(m_tracked_fences.begin(), m_current_offset == iter->second ? m_tracked_fences.end() : ++iter); 305 m_current_offset = new_offset; 306 m_current_space = new_space; 307 m_current_gpu_position = new_gpu_position; 308 return true; 309 }