opengl_stream_buffer.cpp (9833B)
1 // SPDX-FileCopyrightText: 2019-2024 Connor McLaughlin <stenzek@gmail.com> 2 // SPDX-License-Identifier: (GPL-3.0 OR PolyForm-Strict-1.0.0) 3 4 #include "opengl_stream_buffer.h" 5 6 #include "common/align.h" 7 #include "common/assert.h" 8 9 #include <array> 10 #include <cstdio> 11 12 OpenGLStreamBuffer::OpenGLStreamBuffer(GLenum target, GLuint buffer_id, u32 size) 13 : m_target(target), m_buffer_id(buffer_id), m_size(size) 14 { 15 } 16 17 OpenGLStreamBuffer::~OpenGLStreamBuffer() 18 { 19 glDeleteBuffers(1, &m_buffer_id); 20 } 21 22 void OpenGLStreamBuffer::Bind() 23 { 24 glBindBuffer(m_target, m_buffer_id); 25 } 26 27 void OpenGLStreamBuffer::Unbind() 28 { 29 glBindBuffer(m_target, 0); 30 } 31 32 void OpenGLStreamBuffer::SetDebugName(std::string_view name) 33 { 34 #ifdef _DEBUG 35 if (glObjectLabel) 36 { 37 glObjectLabel(GL_BUFFER, GetGLBufferId(), static_cast<GLsizei>(name.length()), 38 static_cast<const GLchar*>(name.data())); 39 } 40 #endif 41 } 42 43 namespace { 44 45 // Uses glBufferSubData() to update. Preferred for drivers which don't support {ARB,EXT}_buffer_storage. 46 class BufferSubDataStreamBuffer final : public OpenGLStreamBuffer 47 { 48 public: 49 ~BufferSubDataStreamBuffer() override { Common::AlignedFree(m_cpu_buffer); } 50 51 MappingResult Map(u32 alignment, u32 min_size) override 52 { 53 return MappingResult{static_cast<void*>(m_cpu_buffer), 0, 0, m_size / alignment}; 54 } 55 56 u32 Unmap(u32 used_size) override 57 { 58 if (used_size == 0) 59 return 0; 60 61 glBindBuffer(m_target, m_buffer_id); 62 glBufferSubData(m_target, 0, used_size, m_cpu_buffer); 63 return 0; 64 } 65 66 u32 GetChunkSize() const override { return m_size; } 67 68 static std::unique_ptr<OpenGLStreamBuffer> Create(GLenum target, u32 size) 69 { 70 glGetError(); 71 72 GLuint buffer_id; 73 glGenBuffers(1, &buffer_id); 74 glBindBuffer(target, buffer_id); 75 glBufferData(target, size, nullptr, GL_STREAM_DRAW); 76 77 GLenum err = glGetError(); 78 if (err != GL_NO_ERROR) 79 { 80 glBindBuffer(target, 0); 81 glDeleteBuffers(1, &buffer_id); 82 return {}; 83 } 84 85 return std::unique_ptr<OpenGLStreamBuffer>(new BufferSubDataStreamBuffer(target, buffer_id, size)); 86 } 87 88 private: 89 BufferSubDataStreamBuffer(GLenum target, GLuint buffer_id, u32 size) : OpenGLStreamBuffer(target, buffer_id, size) 90 { 91 m_cpu_buffer = static_cast<u8*>(Common::AlignedMalloc(size, 32)); 92 if (!m_cpu_buffer) 93 Panic("Failed to allocate CPU storage for GL buffer"); 94 } 95 96 u8* m_cpu_buffer; 97 }; 98 99 // Uses BufferData() to orphan the buffer after every update. Used on Mali where BufferSubData forces a sync. 100 class BufferDataStreamBuffer final : public OpenGLStreamBuffer 101 { 102 public: 103 ~BufferDataStreamBuffer() override { Common::AlignedFree(m_cpu_buffer); } 104 105 MappingResult Map(u32 alignment, u32 min_size) override 106 { 107 return MappingResult{static_cast<void*>(m_cpu_buffer), 0, 0, m_size / alignment}; 108 } 109 110 u32 Unmap(u32 used_size) override 111 { 112 if (used_size == 0) 113 return 0; 114 115 glBindBuffer(m_target, m_buffer_id); 116 glBufferData(m_target, used_size, m_cpu_buffer, GL_STREAM_DRAW); 117 return 0; 118 } 119 120 u32 GetChunkSize() const override { return m_size; } 121 122 static std::unique_ptr<OpenGLStreamBuffer> Create(GLenum target, u32 size) 123 { 124 glGetError(); 125 126 GLuint buffer_id; 127 glGenBuffers(1, &buffer_id); 128 glBindBuffer(target, buffer_id); 129 glBufferData(target, size, nullptr, GL_STREAM_DRAW); 130 131 GLenum err = glGetError(); 132 if (err != GL_NO_ERROR) 133 { 134 glBindBuffer(target, 0); 135 glDeleteBuffers(1, &buffer_id); 136 return {}; 137 } 138 139 return std::unique_ptr<OpenGLStreamBuffer>(new BufferDataStreamBuffer(target, buffer_id, size)); 140 } 141 142 private: 143 BufferDataStreamBuffer(GLenum target, GLuint buffer_id, u32 size) : OpenGLStreamBuffer(target, buffer_id, size) 144 { 145 m_cpu_buffer = static_cast<u8*>(Common::AlignedMalloc(size, 32)); 146 if (!m_cpu_buffer) 147 Panic("Failed to allocate CPU storage for GL buffer"); 148 } 149 150 u8* m_cpu_buffer; 151 }; 152 153 // Base class for implementations which require syncing. 154 class SyncingStreamBuffer : public OpenGLStreamBuffer 155 { 156 public: 157 enum : u32 158 { 159 NUM_SYNC_POINTS = 16 160 }; 161 162 virtual ~SyncingStreamBuffer() override 163 { 164 for (u32 i = m_available_block_index; i <= m_used_block_index; i++) 165 { 166 DebugAssert(m_sync_objects[i]); 167 glDeleteSync(m_sync_objects[i]); 168 } 169 } 170 171 protected: 172 SyncingStreamBuffer(GLenum target, GLuint buffer_id, u32 size) 173 : OpenGLStreamBuffer(target, buffer_id, size), m_bytes_per_block((size + (NUM_SYNC_POINTS)-1) / NUM_SYNC_POINTS) 174 { 175 } 176 177 ALWAYS_INLINE u32 GetSyncIndexForOffset(u32 offset) { return offset / m_bytes_per_block; } 178 179 ALWAYS_INLINE void AddSyncsForOffset(u32 offset) 180 { 181 const u32 end = GetSyncIndexForOffset(offset); 182 for (; m_used_block_index < end; m_used_block_index++) 183 { 184 DebugAssert(!m_sync_objects[m_used_block_index]); 185 m_sync_objects[m_used_block_index] = glFenceSync(GL_SYNC_GPU_COMMANDS_COMPLETE, 0); 186 } 187 } 188 189 ALWAYS_INLINE void WaitForSync(GLsync& sync) 190 { 191 glClientWaitSync(sync, GL_SYNC_FLUSH_COMMANDS_BIT, GL_TIMEOUT_IGNORED); 192 glDeleteSync(sync); 193 sync = nullptr; 194 } 195 196 ALWAYS_INLINE void EnsureSyncsWaitedForOffset(u32 offset) 197 { 198 const u32 end = std::min<u32>(GetSyncIndexForOffset(offset) + 1, NUM_SYNC_POINTS); 199 for (; m_available_block_index < end; m_available_block_index++) 200 { 201 DebugAssert(m_sync_objects[m_available_block_index]); 202 WaitForSync(m_sync_objects[m_available_block_index]); 203 } 204 } 205 206 void AllocateSpace(u32 size) 207 { 208 // add sync objects for writes since the last allocation 209 AddSyncsForOffset(m_position); 210 211 // wait for sync objects for the space we want to use 212 EnsureSyncsWaitedForOffset(m_position + size); 213 214 // wrap-around? 215 if ((m_position + size) > m_size) 216 { 217 // current position ... buffer end 218 AddSyncsForOffset(m_size); 219 220 // rewind, and try again 221 m_position = 0; 222 223 // wait for the sync at the start of the buffer 224 WaitForSync(m_sync_objects[0]); 225 m_available_block_index = 1; 226 227 // and however much more we need to satisfy the allocation 228 EnsureSyncsWaitedForOffset(size); 229 m_used_block_index = 0; 230 } 231 } 232 233 u32 GetChunkSize() const override { return m_size / NUM_SYNC_POINTS; } 234 235 u32 m_position = 0; 236 u32 m_used_block_index = 0; 237 u32 m_available_block_index = NUM_SYNC_POINTS; 238 u32 m_bytes_per_block; 239 std::array<GLsync, NUM_SYNC_POINTS> m_sync_objects{}; 240 }; 241 242 class BufferStorageStreamBuffer : public SyncingStreamBuffer 243 { 244 public: 245 ~BufferStorageStreamBuffer() override 246 { 247 glBindBuffer(m_target, m_buffer_id); 248 glUnmapBuffer(m_target); 249 glBindBuffer(m_target, 0); 250 } 251 252 MappingResult Map(u32 alignment, u32 min_size) override 253 { 254 if (m_position > 0) 255 m_position = Common::AlignUp(m_position, alignment); 256 257 AllocateSpace(min_size); 258 DebugAssert((m_position + min_size) <= (m_available_block_index * m_bytes_per_block)); 259 260 const u32 free_space_in_block = ((m_available_block_index * m_bytes_per_block) - m_position); 261 return MappingResult{static_cast<void*>(m_mapped_ptr + m_position), m_position, m_position / alignment, 262 free_space_in_block / alignment}; 263 } 264 265 u32 Unmap(u32 used_size) override 266 { 267 DebugAssert((m_position + used_size) <= m_size); 268 if (!m_coherent) 269 { 270 if (GLAD_GL_VERSION_4_5 || GLAD_GL_ARB_direct_state_access) 271 { 272 glFlushMappedNamedBufferRange(m_buffer_id, m_position, used_size); 273 } 274 else 275 { 276 Bind(); 277 glFlushMappedBufferRange(m_target, m_position, used_size); 278 } 279 } 280 281 const u32 prev_position = m_position; 282 m_position += used_size; 283 return prev_position; 284 } 285 286 static std::unique_ptr<OpenGLStreamBuffer> Create(GLenum target, u32 size, bool coherent = true) 287 { 288 glGetError(); 289 290 GLuint buffer_id; 291 glGenBuffers(1, &buffer_id); 292 glBindBuffer(target, buffer_id); 293 294 const u32 flags = GL_MAP_WRITE_BIT | GL_MAP_PERSISTENT_BIT | (coherent ? GL_MAP_COHERENT_BIT : 0); 295 const u32 map_flags = GL_MAP_WRITE_BIT | GL_MAP_PERSISTENT_BIT | (coherent ? 0 : GL_MAP_FLUSH_EXPLICIT_BIT); 296 if (GLAD_GL_VERSION_4_4 || GLAD_GL_ARB_buffer_storage) 297 glBufferStorage(target, size, nullptr, flags); 298 else if (GLAD_GL_EXT_buffer_storage) 299 glBufferStorageEXT(target, size, nullptr, flags); 300 301 GLenum err = glGetError(); 302 if (err != GL_NO_ERROR) 303 { 304 glBindBuffer(target, 0); 305 glDeleteBuffers(1, &buffer_id); 306 return {}; 307 } 308 309 u8* mapped_ptr = static_cast<u8*>(glMapBufferRange(target, 0, size, map_flags)); 310 AssertMsg(mapped_ptr, "Persistent buffer was mapped"); 311 312 return std::unique_ptr<OpenGLStreamBuffer>( 313 new BufferStorageStreamBuffer(target, buffer_id, size, mapped_ptr, coherent)); 314 } 315 316 private: 317 BufferStorageStreamBuffer(GLenum target, GLuint buffer_id, u32 size, u8* mapped_ptr, bool coherent) 318 : SyncingStreamBuffer(target, buffer_id, size), m_mapped_ptr(mapped_ptr), m_coherent(coherent) 319 { 320 } 321 322 u8* m_mapped_ptr; 323 bool m_coherent; 324 }; 325 326 } // namespace 327 328 std::unique_ptr<OpenGLStreamBuffer> OpenGLStreamBuffer::Create(GLenum target, u32 size) 329 { 330 std::unique_ptr<OpenGLStreamBuffer> buf; 331 if (GLAD_GL_VERSION_4_4 || GLAD_GL_ARB_buffer_storage || GLAD_GL_EXT_buffer_storage) 332 { 333 buf = BufferStorageStreamBuffer::Create(target, size); 334 if (buf) 335 return buf; 336 } 337 338 // BufferSubData is slower on all drivers except NVIDIA... 339 #if 0 340 const char* vendor = reinterpret_cast<const char*>(glGetString(GL_VENDOR)); 341 if (std::strcmp(vendor, "ARM") == 0 || std::strcmp(vendor, "Qualcomm") == 0) 342 { 343 // Mali and Adreno drivers can't do sub-buffer tracking... 344 return BufferDataStreamBuffer::Create(target, size); 345 } 346 347 return BufferSubDataStreamBuffer::Create(target, size); 348 #else 349 return BufferDataStreamBuffer::Create(target, size); 350 #endif 351 }