duckstation

duckstation, but archived from the revision just before upstream changed it to a proprietary software project, this version is the libre one
git clone https://git.neptards.moe/u3shit/duckstation.git
Log | Files | Refs | README | LICENSE

opengl_stream_buffer.cpp (9833B)


      1 // SPDX-FileCopyrightText: 2019-2024 Connor McLaughlin <stenzek@gmail.com>
      2 // SPDX-License-Identifier: (GPL-3.0 OR PolyForm-Strict-1.0.0)
      3 
      4 #include "opengl_stream_buffer.h"
      5 
      6 #include "common/align.h"
      7 #include "common/assert.h"
      8 
      9 #include <array>
     10 #include <cstdio>
     11 
     12 OpenGLStreamBuffer::OpenGLStreamBuffer(GLenum target, GLuint buffer_id, u32 size)
     13   : m_target(target), m_buffer_id(buffer_id), m_size(size)
     14 {
     15 }
     16 
     17 OpenGLStreamBuffer::~OpenGLStreamBuffer()
     18 {
     19   glDeleteBuffers(1, &m_buffer_id);
     20 }
     21 
     22 void OpenGLStreamBuffer::Bind()
     23 {
     24   glBindBuffer(m_target, m_buffer_id);
     25 }
     26 
     27 void OpenGLStreamBuffer::Unbind()
     28 {
     29   glBindBuffer(m_target, 0);
     30 }
     31 
     32 void OpenGLStreamBuffer::SetDebugName(std::string_view name)
     33 {
     34 #ifdef _DEBUG
     35   if (glObjectLabel)
     36   {
     37     glObjectLabel(GL_BUFFER, GetGLBufferId(), static_cast<GLsizei>(name.length()),
     38                   static_cast<const GLchar*>(name.data()));
     39   }
     40 #endif
     41 }
     42 
     43 namespace {
     44 
     45 // Uses glBufferSubData() to update. Preferred for drivers which don't support {ARB,EXT}_buffer_storage.
     46 class BufferSubDataStreamBuffer final : public OpenGLStreamBuffer
     47 {
     48 public:
     49   ~BufferSubDataStreamBuffer() override { Common::AlignedFree(m_cpu_buffer); }
     50 
     51   MappingResult Map(u32 alignment, u32 min_size) override
     52   {
     53     return MappingResult{static_cast<void*>(m_cpu_buffer), 0, 0, m_size / alignment};
     54   }
     55 
     56   u32 Unmap(u32 used_size) override
     57   {
     58     if (used_size == 0)
     59       return 0;
     60 
     61     glBindBuffer(m_target, m_buffer_id);
     62     glBufferSubData(m_target, 0, used_size, m_cpu_buffer);
     63     return 0;
     64   }
     65 
     66   u32 GetChunkSize() const override { return m_size; }
     67 
     68   static std::unique_ptr<OpenGLStreamBuffer> Create(GLenum target, u32 size)
     69   {
     70     glGetError();
     71 
     72     GLuint buffer_id;
     73     glGenBuffers(1, &buffer_id);
     74     glBindBuffer(target, buffer_id);
     75     glBufferData(target, size, nullptr, GL_STREAM_DRAW);
     76 
     77     GLenum err = glGetError();
     78     if (err != GL_NO_ERROR)
     79     {
     80       glBindBuffer(target, 0);
     81       glDeleteBuffers(1, &buffer_id);
     82       return {};
     83     }
     84 
     85     return std::unique_ptr<OpenGLStreamBuffer>(new BufferSubDataStreamBuffer(target, buffer_id, size));
     86   }
     87 
     88 private:
     89   BufferSubDataStreamBuffer(GLenum target, GLuint buffer_id, u32 size) : OpenGLStreamBuffer(target, buffer_id, size)
     90   {
     91     m_cpu_buffer = static_cast<u8*>(Common::AlignedMalloc(size, 32));
     92     if (!m_cpu_buffer)
     93       Panic("Failed to allocate CPU storage for GL buffer");
     94   }
     95 
     96   u8* m_cpu_buffer;
     97 };
     98 
     99 // Uses BufferData() to orphan the buffer after every update. Used on Mali where BufferSubData forces a sync.
    100 class BufferDataStreamBuffer final : public OpenGLStreamBuffer
    101 {
    102 public:
    103   ~BufferDataStreamBuffer() override { Common::AlignedFree(m_cpu_buffer); }
    104 
    105   MappingResult Map(u32 alignment, u32 min_size) override
    106   {
    107     return MappingResult{static_cast<void*>(m_cpu_buffer), 0, 0, m_size / alignment};
    108   }
    109 
    110   u32 Unmap(u32 used_size) override
    111   {
    112     if (used_size == 0)
    113       return 0;
    114 
    115     glBindBuffer(m_target, m_buffer_id);
    116     glBufferData(m_target, used_size, m_cpu_buffer, GL_STREAM_DRAW);
    117     return 0;
    118   }
    119 
    120   u32 GetChunkSize() const override { return m_size; }
    121 
    122   static std::unique_ptr<OpenGLStreamBuffer> Create(GLenum target, u32 size)
    123   {
    124     glGetError();
    125 
    126     GLuint buffer_id;
    127     glGenBuffers(1, &buffer_id);
    128     glBindBuffer(target, buffer_id);
    129     glBufferData(target, size, nullptr, GL_STREAM_DRAW);
    130 
    131     GLenum err = glGetError();
    132     if (err != GL_NO_ERROR)
    133     {
    134       glBindBuffer(target, 0);
    135       glDeleteBuffers(1, &buffer_id);
    136       return {};
    137     }
    138 
    139     return std::unique_ptr<OpenGLStreamBuffer>(new BufferDataStreamBuffer(target, buffer_id, size));
    140   }
    141 
    142 private:
    143   BufferDataStreamBuffer(GLenum target, GLuint buffer_id, u32 size) : OpenGLStreamBuffer(target, buffer_id, size)
    144   {
    145     m_cpu_buffer = static_cast<u8*>(Common::AlignedMalloc(size, 32));
    146     if (!m_cpu_buffer)
    147       Panic("Failed to allocate CPU storage for GL buffer");
    148   }
    149 
    150   u8* m_cpu_buffer;
    151 };
    152 
    153 // Base class for implementations which require syncing.
    154 class SyncingStreamBuffer : public OpenGLStreamBuffer
    155 {
    156 public:
    157   enum : u32
    158   {
    159     NUM_SYNC_POINTS = 16
    160   };
    161 
    162   virtual ~SyncingStreamBuffer() override
    163   {
    164     for (u32 i = m_available_block_index; i <= m_used_block_index; i++)
    165     {
    166       DebugAssert(m_sync_objects[i]);
    167       glDeleteSync(m_sync_objects[i]);
    168     }
    169   }
    170 
    171 protected:
    172   SyncingStreamBuffer(GLenum target, GLuint buffer_id, u32 size)
    173     : OpenGLStreamBuffer(target, buffer_id, size), m_bytes_per_block((size + (NUM_SYNC_POINTS)-1) / NUM_SYNC_POINTS)
    174   {
    175   }
    176 
    177   ALWAYS_INLINE u32 GetSyncIndexForOffset(u32 offset) { return offset / m_bytes_per_block; }
    178 
    179   ALWAYS_INLINE void AddSyncsForOffset(u32 offset)
    180   {
    181     const u32 end = GetSyncIndexForOffset(offset);
    182     for (; m_used_block_index < end; m_used_block_index++)
    183     {
    184       DebugAssert(!m_sync_objects[m_used_block_index]);
    185       m_sync_objects[m_used_block_index] = glFenceSync(GL_SYNC_GPU_COMMANDS_COMPLETE, 0);
    186     }
    187   }
    188 
    189   ALWAYS_INLINE void WaitForSync(GLsync& sync)
    190   {
    191     glClientWaitSync(sync, GL_SYNC_FLUSH_COMMANDS_BIT, GL_TIMEOUT_IGNORED);
    192     glDeleteSync(sync);
    193     sync = nullptr;
    194   }
    195 
    196   ALWAYS_INLINE void EnsureSyncsWaitedForOffset(u32 offset)
    197   {
    198     const u32 end = std::min<u32>(GetSyncIndexForOffset(offset) + 1, NUM_SYNC_POINTS);
    199     for (; m_available_block_index < end; m_available_block_index++)
    200     {
    201       DebugAssert(m_sync_objects[m_available_block_index]);
    202       WaitForSync(m_sync_objects[m_available_block_index]);
    203     }
    204   }
    205 
    206   void AllocateSpace(u32 size)
    207   {
    208     // add sync objects for writes since the last allocation
    209     AddSyncsForOffset(m_position);
    210 
    211     // wait for sync objects for the space we want to use
    212     EnsureSyncsWaitedForOffset(m_position + size);
    213 
    214     // wrap-around?
    215     if ((m_position + size) > m_size)
    216     {
    217       // current position ... buffer end
    218       AddSyncsForOffset(m_size);
    219 
    220       // rewind, and try again
    221       m_position = 0;
    222 
    223       // wait for the sync at the start of the buffer
    224       WaitForSync(m_sync_objects[0]);
    225       m_available_block_index = 1;
    226 
    227       // and however much more we need to satisfy the allocation
    228       EnsureSyncsWaitedForOffset(size);
    229       m_used_block_index = 0;
    230     }
    231   }
    232 
    233   u32 GetChunkSize() const override { return m_size / NUM_SYNC_POINTS; }
    234 
    235   u32 m_position = 0;
    236   u32 m_used_block_index = 0;
    237   u32 m_available_block_index = NUM_SYNC_POINTS;
    238   u32 m_bytes_per_block;
    239   std::array<GLsync, NUM_SYNC_POINTS> m_sync_objects{};
    240 };
    241 
    242 class BufferStorageStreamBuffer : public SyncingStreamBuffer
    243 {
    244 public:
    245   ~BufferStorageStreamBuffer() override
    246   {
    247     glBindBuffer(m_target, m_buffer_id);
    248     glUnmapBuffer(m_target);
    249     glBindBuffer(m_target, 0);
    250   }
    251 
    252   MappingResult Map(u32 alignment, u32 min_size) override
    253   {
    254     if (m_position > 0)
    255       m_position = Common::AlignUp(m_position, alignment);
    256 
    257     AllocateSpace(min_size);
    258     DebugAssert((m_position + min_size) <= (m_available_block_index * m_bytes_per_block));
    259 
    260     const u32 free_space_in_block = ((m_available_block_index * m_bytes_per_block) - m_position);
    261     return MappingResult{static_cast<void*>(m_mapped_ptr + m_position), m_position, m_position / alignment,
    262                          free_space_in_block / alignment};
    263   }
    264 
    265   u32 Unmap(u32 used_size) override
    266   {
    267     DebugAssert((m_position + used_size) <= m_size);
    268     if (!m_coherent)
    269     {
    270       if (GLAD_GL_VERSION_4_5 || GLAD_GL_ARB_direct_state_access)
    271       {
    272         glFlushMappedNamedBufferRange(m_buffer_id, m_position, used_size);
    273       }
    274       else
    275       {
    276         Bind();
    277         glFlushMappedBufferRange(m_target, m_position, used_size);
    278       }
    279     }
    280 
    281     const u32 prev_position = m_position;
    282     m_position += used_size;
    283     return prev_position;
    284   }
    285 
    286   static std::unique_ptr<OpenGLStreamBuffer> Create(GLenum target, u32 size, bool coherent = true)
    287   {
    288     glGetError();
    289 
    290     GLuint buffer_id;
    291     glGenBuffers(1, &buffer_id);
    292     glBindBuffer(target, buffer_id);
    293 
    294     const u32 flags = GL_MAP_WRITE_BIT | GL_MAP_PERSISTENT_BIT | (coherent ? GL_MAP_COHERENT_BIT : 0);
    295     const u32 map_flags = GL_MAP_WRITE_BIT | GL_MAP_PERSISTENT_BIT | (coherent ? 0 : GL_MAP_FLUSH_EXPLICIT_BIT);
    296     if (GLAD_GL_VERSION_4_4 || GLAD_GL_ARB_buffer_storage)
    297       glBufferStorage(target, size, nullptr, flags);
    298     else if (GLAD_GL_EXT_buffer_storage)
    299       glBufferStorageEXT(target, size, nullptr, flags);
    300 
    301     GLenum err = glGetError();
    302     if (err != GL_NO_ERROR)
    303     {
    304       glBindBuffer(target, 0);
    305       glDeleteBuffers(1, &buffer_id);
    306       return {};
    307     }
    308 
    309     u8* mapped_ptr = static_cast<u8*>(glMapBufferRange(target, 0, size, map_flags));
    310     AssertMsg(mapped_ptr, "Persistent buffer was mapped");
    311 
    312     return std::unique_ptr<OpenGLStreamBuffer>(
    313       new BufferStorageStreamBuffer(target, buffer_id, size, mapped_ptr, coherent));
    314   }
    315 
    316 private:
    317   BufferStorageStreamBuffer(GLenum target, GLuint buffer_id, u32 size, u8* mapped_ptr, bool coherent)
    318     : SyncingStreamBuffer(target, buffer_id, size), m_mapped_ptr(mapped_ptr), m_coherent(coherent)
    319   {
    320   }
    321 
    322   u8* m_mapped_ptr;
    323   bool m_coherent;
    324 };
    325 
    326 } // namespace
    327 
    328 std::unique_ptr<OpenGLStreamBuffer> OpenGLStreamBuffer::Create(GLenum target, u32 size)
    329 {
    330   std::unique_ptr<OpenGLStreamBuffer> buf;
    331   if (GLAD_GL_VERSION_4_4 || GLAD_GL_ARB_buffer_storage || GLAD_GL_EXT_buffer_storage)
    332   {
    333     buf = BufferStorageStreamBuffer::Create(target, size);
    334     if (buf)
    335       return buf;
    336   }
    337 
    338   // BufferSubData is slower on all drivers except NVIDIA...
    339 #if 0
    340   const char* vendor = reinterpret_cast<const char*>(glGetString(GL_VENDOR));
    341   if (std::strcmp(vendor, "ARM") == 0 || std::strcmp(vendor, "Qualcomm") == 0)
    342   {
    343     // Mali and Adreno drivers can't do sub-buffer tracking...
    344     return BufferDataStreamBuffer::Create(target, size);
    345   }
    346 
    347   return BufferSubDataStreamBuffer::Create(target, size);
    348 #else
    349   return BufferDataStreamBuffer::Create(target, size);
    350 #endif
    351 }