metal_stream_buffer.mm (8243B)
1 // SPDX-FileCopyrightText: 2023 Connor McLaughlin <stenzek@gmail.com> 2 // SPDX-License-Identifier: (GPL-3.0 OR CC-BY-NC-ND-4.0) 3 4 #include "metal_stream_buffer.h" 5 #include "metal_device.h" 6 7 #include "common/align.h" 8 #include "common/assert.h" 9 #include "common/log.h" 10 11 Log_SetChannel(MetalDevice); 12 13 MetalStreamBuffer::MetalStreamBuffer() = default; 14 15 MetalStreamBuffer::~MetalStreamBuffer() 16 { 17 if (IsValid()) 18 Destroy(); 19 } 20 21 bool MetalStreamBuffer::Create(id<MTLDevice> device, u32 size) 22 { 23 @autoreleasepool 24 { 25 const MTLResourceOptions options = MTLResourceStorageModeShared | MTLResourceCPUCacheModeWriteCombined; 26 27 id<MTLBuffer> new_buffer = [device newBufferWithLength:size options:options]; 28 if (new_buffer == nil) 29 { 30 ERROR_LOG("Failed to create buffer."); 31 return false; 32 } 33 34 if (IsValid()) 35 Destroy(); 36 37 // Replace with the new buffer 38 m_size = size; 39 m_current_offset = 0; 40 m_current_gpu_position = 0; 41 m_tracked_fences.clear(); 42 m_buffer = [new_buffer retain]; 43 m_host_pointer = static_cast<u8*>([new_buffer contents]); 44 return true; 45 } 46 } 47 48 void MetalStreamBuffer::Destroy() 49 { 50 m_size = 0; 51 m_current_offset = 0; 52 m_current_gpu_position = 0; 53 m_tracked_fences.clear(); 54 [m_buffer release]; 55 m_buffer = nil; 56 m_host_pointer = nullptr; 57 } 58 59 bool MetalStreamBuffer::ReserveMemory(u32 num_bytes, u32 alignment) 60 { 61 const u32 required_bytes = num_bytes + alignment; 62 63 // Check for sane allocations 64 if (required_bytes > m_size) [[unlikely]] 65 { 66 ERROR_LOG("Attempting to allocate {} bytes from a {} byte stream buffer", num_bytes, m_size); 67 Panic("Stream buffer overflow"); 68 return false; 69 } 70 71 UpdateGPUPosition(); 72 73 // Is the GPU behind or up to date with our current offset? 74 if (m_current_offset >= m_current_gpu_position) 75 { 76 const u32 remaining_bytes = m_size - m_current_offset; 77 if (required_bytes <= remaining_bytes) 78 { 79 // Place at the current position, after the GPU position. 80 m_current_offset = Common::AlignUp(m_current_offset, alignment); 81 m_current_space = m_size - m_current_offset; 82 return true; 83 } 84 85 // Check for space at the start of the buffer 86 // We use < here because we don't want to have the case of m_current_offset == 87 // m_current_gpu_position. That would mean the code above would assume the 88 // GPU has caught up to us, which it hasn't. 89 if (required_bytes < m_current_gpu_position) 90 { 91 // Reset offset to zero, since we're allocating behind the gpu now 92 m_current_offset = 0; 93 m_current_space = m_current_gpu_position - 1; 94 return true; 95 } 96 } 97 98 // Is the GPU ahead of our current offset? 99 if (m_current_offset < m_current_gpu_position) 100 { 101 // We have from m_current_offset..m_current_gpu_position space to use. 102 const u32 remaining_bytes = m_current_gpu_position - m_current_offset; 103 if (required_bytes < remaining_bytes) 104 { 105 // Place at the current position, since this is still behind the GPU. 106 m_current_offset = Common::AlignUp(m_current_offset, alignment); 107 m_current_space = m_current_gpu_position - m_current_offset - 1; 108 return true; 109 } 110 } 111 112 // Can we find a fence to wait on that will give us enough memory? 113 if (WaitForClearSpace(required_bytes)) 114 { 115 const u32 align_diff = Common::AlignUp(m_current_offset, alignment) - m_current_offset; 116 m_current_offset += align_diff; 117 m_current_space -= align_diff; 118 return true; 119 } 120 121 // We tried everything we could, and still couldn't get anything. This means that too much space 122 // in the buffer is being used by the command buffer currently being recorded. Therefore, the 123 // only option is to execute it, and wait until it's done. 124 return false; 125 } 126 127 void MetalStreamBuffer::CommitMemory(u32 final_num_bytes) 128 { 129 DebugAssert((m_current_offset + final_num_bytes) <= m_size); 130 DebugAssert(final_num_bytes <= m_current_space); 131 132 m_current_offset += final_num_bytes; 133 m_current_space -= final_num_bytes; 134 UpdateCurrentFencePosition(); 135 } 136 137 void MetalStreamBuffer::UpdateCurrentFencePosition() 138 { 139 // Has the offset changed since the last fence? 140 const u64 counter = MetalDevice::GetInstance().GetCurrentFenceCounter(); 141 if (!m_tracked_fences.empty() && m_tracked_fences.back().first == counter) 142 { 143 // Still haven't executed a command buffer, so just update the offset. 144 m_tracked_fences.back().second = m_current_offset; 145 return; 146 } 147 148 // New buffer, so update the GPU position while we're at it. 149 m_tracked_fences.emplace_back(counter, m_current_offset); 150 } 151 152 void MetalStreamBuffer::UpdateGPUPosition() 153 { 154 auto start = m_tracked_fences.begin(); 155 auto end = start; 156 157 const u64 completed_counter = MetalDevice::GetInstance().GetCompletedFenceCounter(); 158 while (end != m_tracked_fences.end() && completed_counter >= end->first) 159 { 160 m_current_gpu_position = end->second; 161 ++end; 162 } 163 164 if (start != end) 165 { 166 m_tracked_fences.erase(start, end); 167 if (m_current_offset == m_current_gpu_position) 168 { 169 // GPU is all caught up now. 170 m_current_offset = 0; 171 m_current_gpu_position = 0; 172 m_current_space = m_size; 173 } 174 } 175 } 176 177 bool MetalStreamBuffer::WaitForClearSpace(u32 num_bytes) 178 { 179 u32 new_offset = 0; 180 u32 new_space = 0; 181 u32 new_gpu_position = 0; 182 183 auto iter = m_tracked_fences.begin(); 184 for (; iter != m_tracked_fences.end(); ++iter) 185 { 186 // Would this fence bring us in line with the GPU? 187 // This is the "last resort" case, where a command buffer execution has been forced 188 // after no additional data has been written to it, so we can assume that after the 189 // fence has been signaled the entire buffer is now consumed. 190 u32 gpu_position = iter->second; 191 if (m_current_offset == gpu_position) 192 { 193 new_offset = 0; 194 new_space = m_size; 195 new_gpu_position = 0; 196 break; 197 } 198 199 // Assuming that we wait for this fence, are we allocating in front of the GPU? 200 if (m_current_offset > gpu_position) 201 { 202 // This would suggest the GPU has now followed us and wrapped around, so we have from 203 // m_current_position..m_size free, as well as and 0..gpu_position. 204 const u32 remaining_space_after_offset = m_size - m_current_offset; 205 if (remaining_space_after_offset >= num_bytes) 206 { 207 // Switch to allocating in front of the GPU, using the remainder of the buffer. 208 new_offset = m_current_offset; 209 new_space = m_size - m_current_offset; 210 new_gpu_position = gpu_position; 211 break; 212 } 213 214 // We can wrap around to the start, behind the GPU, if there is enough space. 215 // We use > here because otherwise we'd end up lining up with the GPU, and then the 216 // allocator would assume that the GPU has consumed what we just wrote. 217 if (gpu_position > num_bytes) 218 { 219 new_offset = 0; 220 new_space = gpu_position - 1; 221 new_gpu_position = gpu_position; 222 break; 223 } 224 } 225 else 226 { 227 // We're currently allocating behind the GPU. This would give us between the current 228 // offset and the GPU position worth of space to work with. Again, > because we can't 229 // align the GPU position with the buffer offset. 230 u32 available_space_inbetween = gpu_position - m_current_offset; 231 if (available_space_inbetween > num_bytes) 232 { 233 // Leave the offset as-is, but update the GPU position. 234 new_offset = m_current_offset; 235 new_space = available_space_inbetween - 1; 236 new_gpu_position = gpu_position; 237 break; 238 } 239 } 240 } 241 242 // Did any fences satisfy this condition? 243 // Has the command buffer been executed yet? If not, the caller should execute it. 244 MetalDevice& dev = MetalDevice::GetInstance(); 245 if (iter == m_tracked_fences.end() || iter->first == dev.GetCurrentFenceCounter()) 246 return false; 247 248 // Wait until this fence is signaled. This will fire the callback, updating the GPU position. 249 dev.WaitForFenceCounter(iter->first); 250 m_tracked_fences.erase(m_tracked_fences.begin(), m_current_offset == iter->second ? m_tracked_fences.end() : ++iter); 251 m_current_offset = new_offset; 252 m_current_space = new_space; 253 m_current_gpu_position = new_gpu_position; 254 return true; 255 }