duckstation

duckstation, but archived from the revision just before upstream changed it to a proprietary software project, this version is the libre one
git clone https://git.neptards.moe/u3shit/duckstation.git
Log | Files | Refs | README | LICENSE

metal_stream_buffer.mm (8243B)


      1 // SPDX-FileCopyrightText: 2023 Connor McLaughlin <stenzek@gmail.com>
      2 // SPDX-License-Identifier: (GPL-3.0 OR CC-BY-NC-ND-4.0)
      3 
      4 #include "metal_stream_buffer.h"
      5 #include "metal_device.h"
      6 
      7 #include "common/align.h"
      8 #include "common/assert.h"
      9 #include "common/log.h"
     10 
     11 Log_SetChannel(MetalDevice);
     12 
     13 MetalStreamBuffer::MetalStreamBuffer() = default;
     14 
     15 MetalStreamBuffer::~MetalStreamBuffer()
     16 {
     17   if (IsValid())
     18     Destroy();
     19 }
     20 
     21 bool MetalStreamBuffer::Create(id<MTLDevice> device, u32 size)
     22 {
     23   @autoreleasepool
     24   {
     25     const MTLResourceOptions options = MTLResourceStorageModeShared | MTLResourceCPUCacheModeWriteCombined;
     26 
     27     id<MTLBuffer> new_buffer = [device newBufferWithLength:size options:options];
     28     if (new_buffer == nil)
     29     {
     30       ERROR_LOG("Failed to create buffer.");
     31       return false;
     32     }
     33 
     34     if (IsValid())
     35       Destroy();
     36 
     37     // Replace with the new buffer
     38     m_size = size;
     39     m_current_offset = 0;
     40     m_current_gpu_position = 0;
     41     m_tracked_fences.clear();
     42     m_buffer = [new_buffer retain];
     43     m_host_pointer = static_cast<u8*>([new_buffer contents]);
     44     return true;
     45   }
     46 }
     47 
     48 void MetalStreamBuffer::Destroy()
     49 {
     50   m_size = 0;
     51   m_current_offset = 0;
     52   m_current_gpu_position = 0;
     53   m_tracked_fences.clear();
     54   [m_buffer release];
     55   m_buffer = nil;
     56   m_host_pointer = nullptr;
     57 }
     58 
     59 bool MetalStreamBuffer::ReserveMemory(u32 num_bytes, u32 alignment)
     60 {
     61   const u32 required_bytes = num_bytes + alignment;
     62 
     63   // Check for sane allocations
     64   if (required_bytes > m_size) [[unlikely]]
     65   {
     66     ERROR_LOG("Attempting to allocate {} bytes from a {} byte stream buffer", num_bytes, m_size);
     67     Panic("Stream buffer overflow");
     68     return false;
     69   }
     70 
     71   UpdateGPUPosition();
     72 
     73   // Is the GPU behind or up to date with our current offset?
     74   if (m_current_offset >= m_current_gpu_position)
     75   {
     76     const u32 remaining_bytes = m_size - m_current_offset;
     77     if (required_bytes <= remaining_bytes)
     78     {
     79       // Place at the current position, after the GPU position.
     80       m_current_offset = Common::AlignUp(m_current_offset, alignment);
     81       m_current_space = m_size - m_current_offset;
     82       return true;
     83     }
     84 
     85     // Check for space at the start of the buffer
     86     // We use < here because we don't want to have the case of m_current_offset ==
     87     // m_current_gpu_position. That would mean the code above would assume the
     88     // GPU has caught up to us, which it hasn't.
     89     if (required_bytes < m_current_gpu_position)
     90     {
     91       // Reset offset to zero, since we're allocating behind the gpu now
     92       m_current_offset = 0;
     93       m_current_space = m_current_gpu_position - 1;
     94       return true;
     95     }
     96   }
     97 
     98   // Is the GPU ahead of our current offset?
     99   if (m_current_offset < m_current_gpu_position)
    100   {
    101     // We have from m_current_offset..m_current_gpu_position space to use.
    102     const u32 remaining_bytes = m_current_gpu_position - m_current_offset;
    103     if (required_bytes < remaining_bytes)
    104     {
    105       // Place at the current position, since this is still behind the GPU.
    106       m_current_offset = Common::AlignUp(m_current_offset, alignment);
    107       m_current_space = m_current_gpu_position - m_current_offset - 1;
    108       return true;
    109     }
    110   }
    111 
    112   // Can we find a fence to wait on that will give us enough memory?
    113   if (WaitForClearSpace(required_bytes))
    114   {
    115     const u32 align_diff = Common::AlignUp(m_current_offset, alignment) - m_current_offset;
    116     m_current_offset += align_diff;
    117     m_current_space -= align_diff;
    118     return true;
    119   }
    120 
    121   // We tried everything we could, and still couldn't get anything. This means that too much space
    122   // in the buffer is being used by the command buffer currently being recorded. Therefore, the
    123   // only option is to execute it, and wait until it's done.
    124   return false;
    125 }
    126 
    127 void MetalStreamBuffer::CommitMemory(u32 final_num_bytes)
    128 {
    129   DebugAssert((m_current_offset + final_num_bytes) <= m_size);
    130   DebugAssert(final_num_bytes <= m_current_space);
    131 
    132   m_current_offset += final_num_bytes;
    133   m_current_space -= final_num_bytes;
    134   UpdateCurrentFencePosition();
    135 }
    136 
    137 void MetalStreamBuffer::UpdateCurrentFencePosition()
    138 {
    139   // Has the offset changed since the last fence?
    140   const u64 counter = MetalDevice::GetInstance().GetCurrentFenceCounter();
    141   if (!m_tracked_fences.empty() && m_tracked_fences.back().first == counter)
    142   {
    143     // Still haven't executed a command buffer, so just update the offset.
    144     m_tracked_fences.back().second = m_current_offset;
    145     return;
    146   }
    147 
    148   // New buffer, so update the GPU position while we're at it.
    149   m_tracked_fences.emplace_back(counter, m_current_offset);
    150 }
    151 
    152 void MetalStreamBuffer::UpdateGPUPosition()
    153 {
    154   auto start = m_tracked_fences.begin();
    155   auto end = start;
    156 
    157   const u64 completed_counter = MetalDevice::GetInstance().GetCompletedFenceCounter();
    158   while (end != m_tracked_fences.end() && completed_counter >= end->first)
    159   {
    160     m_current_gpu_position = end->second;
    161     ++end;
    162   }
    163 
    164   if (start != end)
    165   {
    166     m_tracked_fences.erase(start, end);
    167     if (m_current_offset == m_current_gpu_position)
    168     {
    169       // GPU is all caught up now.
    170       m_current_offset = 0;
    171       m_current_gpu_position = 0;
    172       m_current_space = m_size;
    173     }
    174   }
    175 }
    176 
    177 bool MetalStreamBuffer::WaitForClearSpace(u32 num_bytes)
    178 {
    179   u32 new_offset = 0;
    180   u32 new_space = 0;
    181   u32 new_gpu_position = 0;
    182 
    183   auto iter = m_tracked_fences.begin();
    184   for (; iter != m_tracked_fences.end(); ++iter)
    185   {
    186     // Would this fence bring us in line with the GPU?
    187     // This is the "last resort" case, where a command buffer execution has been forced
    188     // after no additional data has been written to it, so we can assume that after the
    189     // fence has been signaled the entire buffer is now consumed.
    190     u32 gpu_position = iter->second;
    191     if (m_current_offset == gpu_position)
    192     {
    193       new_offset = 0;
    194       new_space = m_size;
    195       new_gpu_position = 0;
    196       break;
    197     }
    198 
    199     // Assuming that we wait for this fence, are we allocating in front of the GPU?
    200     if (m_current_offset > gpu_position)
    201     {
    202       // This would suggest the GPU has now followed us and wrapped around, so we have from
    203       // m_current_position..m_size free, as well as and 0..gpu_position.
    204       const u32 remaining_space_after_offset = m_size - m_current_offset;
    205       if (remaining_space_after_offset >= num_bytes)
    206       {
    207         // Switch to allocating in front of the GPU, using the remainder of the buffer.
    208         new_offset = m_current_offset;
    209         new_space = m_size - m_current_offset;
    210         new_gpu_position = gpu_position;
    211         break;
    212       }
    213 
    214       // We can wrap around to the start, behind the GPU, if there is enough space.
    215       // We use > here because otherwise we'd end up lining up with the GPU, and then the
    216       // allocator would assume that the GPU has consumed what we just wrote.
    217       if (gpu_position > num_bytes)
    218       {
    219         new_offset = 0;
    220         new_space = gpu_position - 1;
    221         new_gpu_position = gpu_position;
    222         break;
    223       }
    224     }
    225     else
    226     {
    227       // We're currently allocating behind the GPU. This would give us between the current
    228       // offset and the GPU position worth of space to work with. Again, > because we can't
    229       // align the GPU position with the buffer offset.
    230       u32 available_space_inbetween = gpu_position - m_current_offset;
    231       if (available_space_inbetween > num_bytes)
    232       {
    233         // Leave the offset as-is, but update the GPU position.
    234         new_offset = m_current_offset;
    235         new_space = available_space_inbetween - 1;
    236         new_gpu_position = gpu_position;
    237         break;
    238       }
    239     }
    240   }
    241 
    242   // Did any fences satisfy this condition?
    243   // Has the command buffer been executed yet? If not, the caller should execute it.
    244   MetalDevice& dev = MetalDevice::GetInstance();
    245   if (iter == m_tracked_fences.end() || iter->first == dev.GetCurrentFenceCounter())
    246     return false;
    247 
    248   // Wait until this fence is signaled. This will fire the callback, updating the GPU position.
    249   dev.WaitForFenceCounter(iter->first);
    250   m_tracked_fences.erase(m_tracked_fences.begin(), m_current_offset == iter->second ? m_tracked_fences.end() : ++iter);
    251   m_current_offset = new_offset;
    252   m_current_space = new_space;
    253   m_current_gpu_position = new_gpu_position;
    254   return true;
    255 }