duckstation

duckstation, but archived from the revision just before upstream changed it to a proprietary software project, this version is the libre one
git clone https://git.neptards.moe/u3shit/duckstation.git
Log | Files | Refs | README | LICENSE

media_capture.cpp (104134B)


      1 // SPDX-FileCopyrightText: 2019-2024 Connor McLaughlin <stenzek@gmail.com>
      2 // SPDX-License-Identifier: (GPL-3.0 OR PolyForm-Strict-1.0.0)
      3 
      4 #include "media_capture.h"
      5 #include "gpu_device.h"
      6 #include "host.h"
      7 
      8 #include "common/align.h"
      9 #include "common/assert.h"
     10 #include "common/dynamic_library.h"
     11 #include "common/error.h"
     12 #include "common/file_system.h"
     13 #include "common/gsvector.h"
     14 #include "common/log.h"
     15 #include "common/path.h"
     16 #include "common/string_util.h"
     17 #include "common/threading.h"
     18 
     19 #include "IconsFontAwesome5.h"
     20 #include "fmt/format.h"
     21 
     22 #include <algorithm>
     23 #include <atomic>
     24 #include <condition_variable>
     25 #include <cstring>
     26 #include <deque>
     27 #include <limits>
     28 #include <mutex>
     29 #include <string>
     30 #include <thread>
     31 
     32 #ifdef _WIN32
     33 #include "common/windows_headers.h"
     34 
     35 #include <Mferror.h>
     36 #include <codecapi.h>
     37 #include <mfapi.h>
     38 #include <mfidl.h>
     39 #include <mfreadwrite.h>
     40 #include <wrl/client.h>
     41 
     42 #pragma comment(lib, "mfuuid")
     43 #endif
     44 
     45 #ifndef __ANDROID__
     46 
     47 #ifdef _MSC_VER
     48 #pragma warning(push)
     49 #pragma warning(disable : 4244) // warning C4244: 'return': conversion from 'int' to 'uint8_t', possible loss of data
     50 #endif
     51 
     52 extern "C" {
     53 #include "libavcodec/avcodec.h"
     54 #include "libavcodec/version.h"
     55 #include "libavformat/avformat.h"
     56 #include "libavformat/version.h"
     57 #include "libavutil/dict.h"
     58 #include "libavutil/opt.h"
     59 #include "libavutil/version.h"
     60 #include "libswresample/swresample.h"
     61 #include "libswresample/version.h"
     62 #include "libswscale/swscale.h"
     63 #include "libswscale/version.h"
     64 }
     65 
     66 #ifdef _MSC_VER
     67 #pragma warning(pop)
     68 #endif
     69 
     70 #endif
     71 
     72 Log_SetChannel(MediaCapture);
     73 
     74 namespace {
     75 
     76 static constexpr u32 VIDEO_WIDTH_ALIGNMENT = 8;
     77 static constexpr u32 VIDEO_HEIGHT_ALIGNMENT = 8;
     78 
     79 class ALIGN_TO_CACHE_LINE MediaCaptureBase : public MediaCapture
     80 {
     81 public:
     82   static constexpr u32 NUM_FRAMES_IN_FLIGHT = 3;
     83   static constexpr u32 MAX_PENDING_FRAMES = NUM_FRAMES_IN_FLIGHT * 2;
     84   static constexpr u32 AUDIO_CHANNELS = 2;
     85 
     86   virtual ~MediaCaptureBase() override;
     87 
     88   bool BeginCapture(float fps, float aspect, u32 width, u32 height, GPUTexture::Format texture_format, u32 sample_rate,
     89                     std::string path, bool capture_video, std::string_view video_codec, u32 video_bitrate,
     90                     std::string_view video_codec_args, bool capture_audio, std::string_view audio_codec,
     91                     u32 audio_bitrate, std::string_view audio_codec_args, Error* error) override final;
     92 
     93   const std::string& GetPath() const override final;
     94   std::string GetNextCapturePath() const override final;
     95   u32 GetVideoWidth() const override final;
     96   u32 GetVideoHeight() const override final;
     97   float GetVideoFPS() const override final;
     98 
     99   float GetCaptureThreadUsage() const override final;
    100   float GetCaptureThreadTime() const override final;
    101   void UpdateCaptureThreadUsage(double pct_divider, double time_divider) override final;
    102 
    103   GPUTexture* GetRenderTexture() override final;
    104   bool DeliverVideoFrame(GPUTexture* stex) override final;
    105   bool DeliverAudioFrames(const s16* frames, u32 num_frames) override final;
    106   bool EndCapture(Error* error) override final;
    107   void Flush() override final;
    108 
    109 protected:
    110   struct PendingFrame
    111   {
    112     enum class State
    113     {
    114       Unused,
    115       NeedsMap,
    116       NeedsEncoding
    117     };
    118 
    119     std::unique_ptr<GPUDownloadTexture> tex;
    120     s64 pts;
    121     State state;
    122   };
    123 
    124   ALWAYS_INLINE u32 GetAudioBufferSizeInFrames() const
    125   {
    126     return (static_cast<u32>(m_audio_buffer.size()) / AUDIO_CHANNELS);
    127   }
    128 
    129   void ProcessFramePendingMap(std::unique_lock<std::mutex>& lock);
    130   void ProcessAllInFlightFrames(std::unique_lock<std::mutex>& lock);
    131   void EncoderThreadEntryPoint();
    132   void StartEncoderThread();
    133   void StopEncoderThread(std::unique_lock<std::mutex>& lock);
    134   void DeleteOutputFile();
    135 
    136   virtual void ClearState();
    137   virtual bool SendFrame(const PendingFrame& pf, Error* error) = 0;
    138   virtual bool ProcessAudioPackets(s64 video_pts, Error* error) = 0;
    139 
    140   virtual bool InternalBeginCapture(float fps, float aspect, u32 sample_rate, bool capture_video,
    141                                     std::string_view video_codec, u32 video_bitrate, std::string_view video_codec_args,
    142                                     bool capture_audio, std::string_view audio_codec, u32 audio_bitrate,
    143                                     std::string_view audio_codec_args, Error* error) = 0;
    144   virtual bool InternalEndCapture(std::unique_lock<std::mutex>& lock, Error* error);
    145 
    146   mutable std::mutex m_lock;
    147   std::string m_path;
    148   std::atomic_bool m_capturing{false};
    149   std::atomic_bool m_encoding_error{false};
    150 
    151   GPUTexture::Format m_video_render_texture_format = GPUTexture::Format::Unknown;
    152   u32 m_video_width = 0;
    153   u32 m_video_height = 0;
    154   float m_video_fps = 0;
    155   s64 m_next_video_pts = 0;
    156   std::unique_ptr<GPUTexture> m_render_texture;
    157 
    158   s64 m_next_audio_pts = 0;
    159   u32 m_audio_frame_pos = 0;
    160   u32 m_audio_frame_size = 0;
    161 
    162   Threading::Thread m_encoder_thread;
    163   u64 m_encoder_thread_last_time = 0;
    164   float m_encoder_thread_usage = 0.0f;
    165   float m_encoder_thread_time = 0.0f;
    166 
    167   std::condition_variable m_frame_ready_cv;
    168   std::condition_variable m_frame_encoded_cv;
    169   std::array<PendingFrame, MAX_PENDING_FRAMES> m_pending_frames = {};
    170   u32 m_pending_frames_pos = 0;
    171   u32 m_frames_pending_map = 0;
    172   u32 m_frames_map_consume_pos = 0;
    173   u32 m_frames_pending_encode = 0;
    174   u32 m_frames_encode_consume_pos = 0;
    175 
    176   DynamicHeapArray<s16> m_audio_buffer;
    177   std::atomic<u32> m_audio_buffer_size{0};
    178   u32 m_audio_buffer_write_pos = 0;
    179   ALIGN_TO_CACHE_LINE u32 m_audio_buffer_read_pos = 0;
    180 
    181   // Shared across all backends.
    182   [[maybe_unused]] static inline std::mutex s_load_mutex;
    183 };
    184 
    185 MediaCaptureBase::~MediaCaptureBase() = default;
    186 
    187 bool MediaCaptureBase::BeginCapture(float fps, float aspect, u32 width, u32 height, GPUTexture::Format texture_format,
    188                                     u32 sample_rate, std::string path, bool capture_video, std::string_view video_codec,
    189                                     u32 video_bitrate, std::string_view video_codec_args, bool capture_audio,
    190                                     std::string_view audio_codec, u32 audio_bitrate, std::string_view audio_codec_args,
    191                                     Error* error)
    192 {
    193   m_video_render_texture_format = texture_format;
    194   m_video_width = width;
    195   m_video_height = height;
    196   m_video_fps = fps;
    197 
    198   if (path.empty())
    199   {
    200     Error::SetStringView(error, "No path specified.");
    201     return false;
    202   }
    203   else if (capture_video &&
    204            (fps == 0.0f || m_video_width == 0 || !Common::IsAlignedPow2(m_video_width, VIDEO_WIDTH_ALIGNMENT) ||
    205             m_video_height == 0 || !Common::IsAlignedPow2(m_video_height, VIDEO_HEIGHT_ALIGNMENT)))
    206   {
    207     Error::SetStringView(error, "Invalid video dimensions/rate.");
    208     return false;
    209   }
    210 
    211   m_path = std::move(path);
    212   m_capturing.store(true, std::memory_order_release);
    213 
    214   // allocate audio buffer, dynamic based on sample rate
    215   if (capture_audio)
    216     m_audio_buffer.resize(sample_rate * MAX_PENDING_FRAMES * AUDIO_CHANNELS);
    217 
    218   INFO_LOG("Initializing capture:");
    219   if (capture_video)
    220   {
    221     INFO_LOG("  Video: FPS={}, Aspect={}, Codec={}, Bitrate={}, Args={}", fps, aspect, video_codec, video_bitrate,
    222              video_codec_args);
    223   }
    224   if (capture_audio)
    225   {
    226     INFO_LOG("  Audio: SampleRate={}, Codec={}, Bitrate={}, Args={}", sample_rate, audio_codec, audio_bitrate,
    227              audio_codec_args);
    228   }
    229 
    230   if (!InternalBeginCapture(fps, aspect, sample_rate, capture_video, video_codec, video_bitrate, video_codec_args,
    231                             capture_audio, audio_codec, audio_bitrate, audio_codec_args, error))
    232   {
    233     ClearState();
    234     return false;
    235   }
    236 
    237   StartEncoderThread();
    238   return true;
    239 }
    240 
    241 GPUTexture* MediaCaptureBase::GetRenderTexture()
    242 {
    243   if (m_render_texture) [[likely]]
    244     return m_render_texture.get();
    245 
    246   m_render_texture = g_gpu_device->CreateTexture(m_video_width, m_video_height, 1, 1, 1, GPUTexture::Type::RenderTarget,
    247                                                  m_video_render_texture_format);
    248   if (!m_render_texture) [[unlikely]]
    249   {
    250     ERROR_LOG("Failed to create {}x{} render texture.", m_video_width, m_video_height);
    251     return nullptr;
    252   }
    253 
    254   return m_render_texture.get();
    255 }
    256 
    257 bool MediaCaptureBase::DeliverVideoFrame(GPUTexture* stex)
    258 {
    259   std::unique_lock<std::mutex> lock(m_lock);
    260 
    261   // If the encoder thread reported an error, stop the capture.
    262   if (m_encoding_error.load(std::memory_order_acquire))
    263     return false;
    264 
    265   if (m_frames_pending_map >= NUM_FRAMES_IN_FLIGHT)
    266     ProcessFramePendingMap(lock);
    267 
    268   PendingFrame& pf = m_pending_frames[m_pending_frames_pos];
    269 
    270   // It shouldn't be pending map, but the encode thread might be lagging.
    271   DebugAssert(pf.state != PendingFrame::State::NeedsMap);
    272   if (pf.state == PendingFrame::State::NeedsEncoding)
    273   {
    274     m_frame_encoded_cv.wait(lock, [&pf]() { return pf.state == PendingFrame::State::Unused; });
    275   }
    276 
    277   if (!pf.tex || pf.tex->GetWidth() != static_cast<u32>(stex->GetWidth()) ||
    278       pf.tex->GetHeight() != static_cast<u32>(stex->GetHeight()))
    279   {
    280     pf.tex.reset();
    281     pf.tex = g_gpu_device->CreateDownloadTexture(stex->GetWidth(), stex->GetHeight(), stex->GetFormat());
    282     if (!pf.tex)
    283     {
    284       ERROR_LOG("Failed to create {}x{} download texture", stex->GetWidth(), stex->GetHeight());
    285       return false;
    286     }
    287 
    288 #ifdef _DEBUG
    289     GL_OBJECT_NAME_FMT(pf.tex, "GSCapture {}x{} Download Texture", stex->GetWidth(), stex->GetHeight());
    290 #endif
    291   }
    292 
    293   pf.tex->CopyFromTexture(0, 0, stex, 0, 0, m_video_width, m_video_height, 0, 0);
    294   pf.pts = m_next_video_pts++;
    295   pf.state = PendingFrame::State::NeedsMap;
    296 
    297   m_pending_frames_pos = (m_pending_frames_pos + 1) % MAX_PENDING_FRAMES;
    298   m_frames_pending_map++;
    299   return true;
    300 }
    301 
    302 void MediaCaptureBase::ProcessFramePendingMap(std::unique_lock<std::mutex>& lock)
    303 {
    304   DebugAssert(m_frames_pending_map > 0);
    305 
    306   PendingFrame& pf = m_pending_frames[m_frames_map_consume_pos];
    307   DebugAssert(pf.state == PendingFrame::State::NeedsMap);
    308 
    309   // Flushing is potentially expensive, so we leave it unlocked in case the encode thread
    310   // needs to pick up another thread while we're waiting.
    311   lock.unlock();
    312 
    313   if (pf.tex->NeedsFlush())
    314     pf.tex->Flush();
    315 
    316   // Even if the map failed, we need to kick it to the encode thread anyway, because
    317   // otherwise our queue indices will get desynchronized.
    318   if (!pf.tex->Map(0, 0, m_video_width, m_video_height))
    319     WARNING_LOG("Failed to map previously flushed frame.");
    320 
    321   lock.lock();
    322 
    323   // Kick to encoder thread!
    324   pf.state = PendingFrame::State::NeedsEncoding;
    325   m_frames_map_consume_pos = (m_frames_map_consume_pos + 1) % MAX_PENDING_FRAMES;
    326   m_frames_pending_map--;
    327   m_frames_pending_encode++;
    328   m_frame_ready_cv.notify_one();
    329 }
    330 
    331 void MediaCaptureBase::EncoderThreadEntryPoint()
    332 {
    333   Threading::SetNameOfCurrentThread("Media Capture Encoding");
    334 
    335   Error error;
    336   std::unique_lock<std::mutex> lock(m_lock);
    337 
    338   for (;;)
    339   {
    340     m_frame_ready_cv.wait(
    341       lock, [this]() { return (m_frames_pending_encode > 0 || !m_capturing.load(std::memory_order_acquire)); });
    342     if (m_frames_pending_encode == 0 && !m_capturing.load(std::memory_order_acquire))
    343       break;
    344 
    345     PendingFrame& pf = m_pending_frames[m_frames_encode_consume_pos];
    346     DebugAssert(!IsCapturingVideo() || pf.state == PendingFrame::State::NeedsEncoding);
    347 
    348     lock.unlock();
    349 
    350     bool okay = !m_encoding_error;
    351 
    352     // If the frame failed to map, this will be false, and we'll just skip it.
    353     if (okay && IsCapturingVideo() && pf.tex->IsMapped())
    354       okay = SendFrame(pf, &error);
    355 
    356     // Encode as many audio frames while the video is ahead.
    357     if (okay && IsCapturingAudio())
    358       okay = ProcessAudioPackets(pf.pts, &error);
    359 
    360     lock.lock();
    361 
    362     // If we had an encoding error, tell the GS thread to shut down the capture (later).
    363     if (!okay) [[unlikely]]
    364     {
    365       ERROR_LOG("Encoding error: {}", error.GetDescription());
    366       m_encoding_error.store(true, std::memory_order_release);
    367     }
    368 
    369     // Done with this frame! Wait for the next.
    370     pf.state = PendingFrame::State::Unused;
    371     m_frames_encode_consume_pos = (m_frames_encode_consume_pos + 1) % MAX_PENDING_FRAMES;
    372     m_frames_pending_encode--;
    373     m_frame_encoded_cv.notify_all();
    374   }
    375 }
    376 
    377 void MediaCaptureBase::StartEncoderThread()
    378 {
    379   INFO_LOG("Starting encoder thread.");
    380   DebugAssert(m_capturing.load(std::memory_order_acquire) && !m_encoder_thread.Joinable());
    381   m_encoder_thread.Start([this]() { EncoderThreadEntryPoint(); });
    382 }
    383 
    384 void MediaCaptureBase::StopEncoderThread(std::unique_lock<std::mutex>& lock)
    385 {
    386   // Thread will exit when s_capturing is false.
    387   DebugAssert(!m_capturing.load(std::memory_order_acquire));
    388 
    389   if (m_encoder_thread.Joinable())
    390   {
    391     INFO_LOG("Stopping encoder thread.");
    392 
    393     // Might be sleeping, so wake it before joining.
    394     m_frame_ready_cv.notify_one();
    395     lock.unlock();
    396     m_encoder_thread.Join();
    397     lock.lock();
    398   }
    399 }
    400 
    401 void MediaCaptureBase::ProcessAllInFlightFrames(std::unique_lock<std::mutex>& lock)
    402 {
    403   while (m_frames_pending_map > 0)
    404     ProcessFramePendingMap(lock);
    405 
    406   while (m_frames_pending_encode > 0)
    407   {
    408     m_frame_encoded_cv.wait(lock, [this]() { return (m_frames_pending_encode == 0 || m_encoding_error); });
    409   }
    410 }
    411 
    412 bool MediaCaptureBase::DeliverAudioFrames(const s16* frames, u32 num_frames)
    413 {
    414   if (!IsCapturingAudio())
    415     return true;
    416   else if (!m_capturing.load(std::memory_order_acquire))
    417     return false;
    418 
    419   const u32 audio_buffer_size = GetAudioBufferSizeInFrames();
    420   if ((audio_buffer_size - m_audio_buffer_size.load(std::memory_order_acquire)) < num_frames)
    421   {
    422     // Need to wait for it to drain a bit.
    423     std::unique_lock<std::mutex> lock(m_lock);
    424     m_frame_encoded_cv.wait(lock, [this, &num_frames, &audio_buffer_size]() {
    425       return (!m_capturing.load(std::memory_order_acquire) ||
    426               ((audio_buffer_size - m_audio_buffer_size.load(std::memory_order_acquire)) >= num_frames));
    427     });
    428     if (!m_capturing.load(std::memory_order_acquire))
    429       return false;
    430   }
    431 
    432   for (u32 remaining_frames = num_frames;;)
    433   {
    434     const u32 contig_frames = std::min(audio_buffer_size - m_audio_buffer_write_pos, remaining_frames);
    435     std::memcpy(&m_audio_buffer[m_audio_buffer_write_pos * AUDIO_CHANNELS], frames,
    436                 sizeof(s16) * AUDIO_CHANNELS * contig_frames);
    437     m_audio_buffer_write_pos = (m_audio_buffer_write_pos + contig_frames) % audio_buffer_size;
    438     remaining_frames -= contig_frames;
    439     if (remaining_frames == 0)
    440       break;
    441   }
    442 
    443   const u32 buffer_size = m_audio_buffer_size.fetch_add(num_frames, std::memory_order_release) + num_frames;
    444   if (!IsCapturingVideo() && buffer_size >= m_audio_frame_size)
    445   {
    446     // If we're not capturing video, push "frames" when we hit the audio packet size.
    447     std::unique_lock<std::mutex> lock(m_lock);
    448     if (!m_capturing.load(std::memory_order_acquire))
    449       return false;
    450 
    451     PendingFrame& pf = m_pending_frames[m_pending_frames_pos];
    452     pf.state = PendingFrame::State::NeedsEncoding;
    453     m_pending_frames_pos = (m_pending_frames_pos + 1) % MAX_PENDING_FRAMES;
    454 
    455     m_frames_pending_encode++;
    456     m_frame_ready_cv.notify_one();
    457   }
    458 
    459   return true;
    460 }
    461 
    462 bool MediaCaptureBase::InternalEndCapture(std::unique_lock<std::mutex>& lock, Error* error)
    463 {
    464   DebugAssert(m_capturing.load(std::memory_order_acquire));
    465 
    466   const bool had_error = m_encoding_error.load(std::memory_order_acquire);
    467   if (!had_error)
    468     ProcessAllInFlightFrames(lock);
    469 
    470   m_capturing.store(false, std::memory_order_release);
    471   StopEncoderThread(lock);
    472   return !had_error;
    473 }
    474 
    475 void MediaCaptureBase::ClearState()
    476 {
    477   m_next_video_pts = 0;
    478   m_next_audio_pts = 0;
    479 
    480   m_pending_frames = {};
    481   m_pending_frames_pos = 0;
    482   m_frames_pending_map = 0;
    483   m_frames_map_consume_pos = 0;
    484   m_frames_pending_encode = 0;
    485   m_frames_encode_consume_pos = 0;
    486 
    487   m_audio_buffer_read_pos = 0;
    488   m_audio_buffer_write_pos = 0;
    489   m_audio_buffer_size.store(0, std::memory_order_release);
    490   m_audio_frame_pos = 0;
    491   m_audio_buffer_size = 0;
    492   m_audio_buffer.deallocate();
    493 
    494   m_encoding_error.store(false, std::memory_order_release);
    495 }
    496 
    497 bool MediaCaptureBase::EndCapture(Error* error)
    498 {
    499   std::unique_lock<std::mutex> lock(m_lock);
    500   if (!InternalEndCapture(lock, error))
    501   {
    502     DeleteOutputFile();
    503     ClearState();
    504     return false;
    505   }
    506 
    507   ClearState();
    508   return true;
    509 }
    510 
    511 const std::string& MediaCaptureBase::GetPath() const
    512 {
    513   return m_path;
    514 }
    515 
    516 std::string MediaCaptureBase::GetNextCapturePath() const
    517 {
    518   const std::string_view ext = Path::GetExtension(m_path);
    519   std::string_view name = Path::GetFileTitle(m_path);
    520 
    521   // Should end with a number.
    522   u32 partnum = 2;
    523   std::string_view::size_type pos = name.rfind("_part");
    524   if (pos != std::string_view::npos)
    525   {
    526     std::string_view::size_type cpos = pos + 5;
    527     for (; cpos < name.length(); cpos++)
    528     {
    529       if (name[cpos] < '0' || name[cpos] > '9')
    530         break;
    531     }
    532     if (cpos == name.length())
    533     {
    534       // Has existing part number, so add to it.
    535       partnum = StringUtil::FromChars<u32>(name.substr(pos + 5)).value_or(1) + 1;
    536       name = name.substr(0, pos);
    537     }
    538   }
    539 
    540   // If we haven't started a new file previously, add "_part2".
    541   return Path::BuildRelativePath(m_path, fmt::format("{}_part{:03d}.{}", name, partnum, ext));
    542 }
    543 
    544 u32 MediaCaptureBase::GetVideoWidth() const
    545 {
    546   return m_video_width;
    547 }
    548 
    549 u32 MediaCaptureBase::GetVideoHeight() const
    550 {
    551   return m_video_height;
    552 }
    553 
    554 float MediaCaptureBase::GetVideoFPS() const
    555 {
    556   return m_video_fps;
    557 }
    558 
    559 float MediaCaptureBase::GetCaptureThreadUsage() const
    560 {
    561   return m_encoder_thread_usage;
    562 }
    563 
    564 float MediaCaptureBase::GetCaptureThreadTime() const
    565 {
    566   return m_encoder_thread_time;
    567 }
    568 
    569 void MediaCaptureBase::UpdateCaptureThreadUsage(double pct_divider, double time_divider)
    570 {
    571   const u64 time = m_encoder_thread.GetCPUTime();
    572   const u64 delta = time - m_encoder_thread_last_time;
    573   m_encoder_thread_usage = static_cast<float>(static_cast<double>(delta) * pct_divider);
    574   m_encoder_thread_time = static_cast<float>(static_cast<double>(delta) * time_divider);
    575   m_encoder_thread_last_time = time;
    576 }
    577 
    578 void MediaCaptureBase::Flush()
    579 {
    580   std::unique_lock<std::mutex> lock(m_lock);
    581 
    582   if (m_encoding_error)
    583     return;
    584 
    585   ProcessAllInFlightFrames(lock);
    586 
    587   if (IsCapturingAudio())
    588   {
    589     // Clear any buffered audio frames out, we don't want to delay the CPU thread.
    590     const u32 audio_frames = m_audio_buffer_size.load(std::memory_order_acquire);
    591     if (audio_frames > 0)
    592       WARNING_LOG("Dropping {} audio frames for buffer clear.", audio_frames);
    593 
    594     m_audio_buffer_read_pos = 0;
    595     m_audio_buffer_write_pos = 0;
    596     m_audio_buffer_size.store(0, std::memory_order_release);
    597   }
    598 }
    599 
    600 void MediaCaptureBase::DeleteOutputFile()
    601 {
    602   if (m_path.empty())
    603     return;
    604 
    605   Error error;
    606   if (FileSystem::DeleteFile(m_path.c_str(), &error))
    607   {
    608     INFO_LOG("Deleted output file {}", Path::GetFileName(m_path));
    609     m_path = {};
    610   }
    611   else
    612   {
    613     ERROR_LOG("Failed to delete output file '{}': {}", Path::GetFileName(m_path), error.GetDescription());
    614   }
    615 }
    616 
    617 #ifdef _WIN32
    618 
    619 #define VISIT_MFPLAT_IMPORTS(X)                                                                                        \
    620   X(MFCreateMediaType)                                                                                                 \
    621   X(MFCreateMemoryBuffer)                                                                                              \
    622   X(MFCreateSample)                                                                                                    \
    623   X(MFHeapFree)                                                                                                        \
    624   X(MFShutdown)                                                                                                        \
    625   X(MFStartup)                                                                                                         \
    626   X(MFTEnumEx)
    627 
    628 #define VISIT_MFREADWRITE_IMPORTS(X) X(MFCreateSinkWriterFromURL)
    629 
    630 #define VISIT_MF_IMPORTS(X) X(MFTranscodeGetAudioOutputAvailableTypes)
    631 
    632 class MediaCaptureMF final : public MediaCaptureBase
    633 {
    634   template<class T>
    635   using ComPtr = Microsoft::WRL::ComPtr<T>;
    636 
    637   static constexpr u32 TEN_NANOSECONDS = 10 * 1000 * 1000;
    638   static constexpr DWORD INVALID_STREAM_INDEX = std::numeric_limits<DWORD>::max();
    639   static constexpr u32 AUDIO_BITS_PER_SAMPLE = sizeof(s16) * 8;
    640 
    641   static constexpr const GUID& AUDIO_INPUT_MEDIA_FORMAT = MFAudioFormat_PCM;
    642   static constexpr const GUID& VIDEO_RGB_MEDIA_FORMAT = MFVideoFormat_RGB32;
    643   static constexpr const GUID& VIDEO_YUV_MEDIA_FORMAT = MFVideoFormat_NV12;
    644 
    645 public:
    646   ~MediaCaptureMF() override;
    647 
    648   static std::unique_ptr<MediaCapture> Create(Error* error);
    649   static ContainerList GetContainerList();
    650   static CodecList GetVideoCodecList(const char* container);
    651   static CodecList GetAudioCodecList(const char* container);
    652 
    653   bool IsCapturingAudio() const override;
    654   bool IsCapturingVideo() const override;
    655   time_t GetElapsedTime() const override;
    656 
    657 protected:
    658   void ClearState() override;
    659   bool SendFrame(const PendingFrame& pf, Error* error) override;
    660   bool ProcessAudioPackets(s64 video_pts, Error* error) override;
    661   bool InternalBeginCapture(float fps, float aspect, u32 sample_rate, bool capture_video, std::string_view video_codec,
    662                             u32 video_bitrate, std::string_view video_codec_args, bool capture_audio,
    663                             std::string_view audio_codec, u32 audio_bitrate, std::string_view audio_codec_args,
    664                             Error* error) override;
    665   bool InternalEndCapture(std::unique_lock<std::mutex>& lock, Error* error) override;
    666 
    667 private:
    668   ComPtr<IMFTransform> CreateVideoYUVTransform(ComPtr<IMFMediaType>* output_type, Error* error);
    669   ComPtr<IMFTransform> CreateVideoEncodeTransform(std::string_view codec, u32 bitrate, IMFMediaType* input_type,
    670                                                   ComPtr<IMFMediaType>* output_type, bool* use_async_transform,
    671                                                   Error* error);
    672   bool GetAudioTypes(std::string_view codec, ComPtr<IMFMediaType>* input_type, ComPtr<IMFMediaType>* output_type,
    673                      u32 sample_rate, u32 bitrate, Error* error);
    674   void ConvertVideoFrame(u8* dst, size_t dst_stride, const u8* src, size_t src_stride, u32 width, u32 height) const;
    675 
    676   bool ProcessVideoOutputSamples(Error* error); // synchronous
    677   bool ProcessVideoEvents(Error* error);        // asynchronous
    678 
    679   ComPtr<IMFSinkWriter> m_sink_writer;
    680 
    681   DWORD m_video_stream_index = INVALID_STREAM_INDEX;
    682   DWORD m_audio_stream_index = INVALID_STREAM_INDEX;
    683 
    684   LONGLONG m_video_sample_duration = 0;
    685   LONGLONG m_audio_sample_duration = 0;
    686 
    687   u32 m_frame_rate_numerator = 0;
    688 
    689   ComPtr<IMFTransform> m_video_yuv_transform;
    690   ComPtr<IMFSample> m_video_yuv_sample;
    691   ComPtr<IMFTransform> m_video_encode_transform;
    692   ComPtr<IMFMediaEventGenerator> m_video_encode_event_generator;
    693   std::deque<ComPtr<IMFSample>> m_pending_video_samples;
    694   ComPtr<IMFSample> m_video_output_sample;
    695   u32 m_wanted_video_samples = 0;
    696   DWORD m_video_sample_size = 0;
    697 
    698 #define DECLARE_IMPORT(X) static inline decltype(X)* wrap_##X;
    699   VISIT_MFPLAT_IMPORTS(DECLARE_IMPORT);
    700   VISIT_MFREADWRITE_IMPORTS(DECLARE_IMPORT);
    701   VISIT_MF_IMPORTS(DECLARE_IMPORT);
    702 #undef DECLARE_IMPORT
    703 
    704   static bool LoadMediaFoundation(Error* error);
    705   static void UnloadMediaFoundation();
    706 
    707   static inline DynamicLibrary s_mfplat_library;
    708   static inline DynamicLibrary s_mfreadwrite_library;
    709   static inline DynamicLibrary s_mf_library;
    710   static inline bool s_library_loaded = false;
    711 };
    712 
    713 struct MediaFoundationVideoCodec
    714 {
    715   const char* name;
    716   const char* display_name;
    717   const GUID& guid;
    718   bool require_hardware;
    719 };
    720 struct MediaFoundationAudioCodec
    721 {
    722   const char* name;
    723   const char* display_name;
    724   const GUID& guid;
    725   u32 min_bitrate;
    726   u32 max_bitrate;
    727 };
    728 static constexpr const MediaFoundationVideoCodec s_media_foundation_video_codecs[] = {
    729   {"h264", "H.264 with Software Encoding", MFVideoFormat_H264, false},
    730   {"h264_hw", "H.264 with Hardware Encoding", MFVideoFormat_H264, true},
    731   {"h265", "H.265 with Software Encoding", MFVideoFormat_H265, false},
    732   {"h265_hw", "H.265 with Hardware Encoding", MFVideoFormat_H265, true},
    733   {"hevc", "HEVC with Software Encoding", MFVideoFormat_HEVC, false},
    734   {"hevc_hw", "HEVC with Hardware Encoding", MFVideoFormat_HEVC, true},
    735   {"vp9", "VP9 with Software Encoding", MFVideoFormat_VP90, false},
    736   {"vp9_hw", "VP9 with Hardware Encoding", MFVideoFormat_VP90, true},
    737   {"av1", "AV1 with Software Encoding", MFVideoFormat_AV1, false},
    738   {"av1_hw", "AV1 with Hardware Encoding", MFVideoFormat_AV1, false},
    739 };
    740 static constexpr const MediaFoundationAudioCodec s_media_foundation_audio_codecs[] = {
    741   {"aac", "Advanced Audio Coding", MFAudioFormat_AAC, 64, 224},
    742   {"mp3", "MPEG-2 Audio Layer III", MFAudioFormat_MP3, 64, 320},
    743   {"pcm", "Uncompressed PCM", MFAudioFormat_PCM, 0, std::numeric_limits<u32>::max()},
    744 };
    745 
    746 bool MediaCaptureMF::LoadMediaFoundation(Error* error)
    747 {
    748   std::unique_lock lock(s_load_mutex);
    749   if (s_library_loaded)
    750     return true;
    751 
    752   bool result = s_mfplat_library.Open("mfplat.dll", error);
    753   result = result && s_mfreadwrite_library.Open("mfreadwrite.dll", error);
    754   result = result && s_mf_library.Open("mf.dll", error);
    755 
    756 #define RESOLVE_IMPORT(X) result = result && s_mfplat_library.GetSymbol(#X, &wrap_##X);
    757   VISIT_MFPLAT_IMPORTS(RESOLVE_IMPORT);
    758 #undef RESOLVE_IMPORT
    759 
    760 #define RESOLVE_IMPORT(X) result = result && s_mfreadwrite_library.GetSymbol(#X, &wrap_##X);
    761   VISIT_MFREADWRITE_IMPORTS(RESOLVE_IMPORT);
    762 #undef RESOLVE_IMPORT
    763 
    764 #define RESOLVE_IMPORT(X) result = result && s_mf_library.GetSymbol(#X, &wrap_##X);
    765   VISIT_MF_IMPORTS(RESOLVE_IMPORT);
    766 #undef RESOLVE_IMPORT
    767 
    768   HRESULT hr;
    769   if (result && FAILED(hr = wrap_MFStartup(MF_VERSION, MFSTARTUP_NOSOCKET))) [[unlikely]]
    770   {
    771     Error::SetHResult(error, "MFStartup() failed: ", hr);
    772     result = false;
    773   }
    774 
    775   if (result) [[likely]]
    776   {
    777     s_library_loaded = true;
    778     std::atexit(&MediaCaptureMF::UnloadMediaFoundation);
    779     return true;
    780   }
    781 
    782   UnloadMediaFoundation();
    783 
    784   Error::AddPrefix(error, TRANSLATE_SV("MediaCapture", "Failed to load Media Foundation libraries: "));
    785   return false;
    786 }
    787 
    788 void MediaCaptureMF::UnloadMediaFoundation()
    789 {
    790 #define CLEAR_IMPORT(X) wrap_##X = nullptr;
    791   VISIT_MF_IMPORTS(CLEAR_IMPORT);
    792   VISIT_MFREADWRITE_IMPORTS(CLEAR_IMPORT);
    793   VISIT_MFPLAT_IMPORTS(CLEAR_IMPORT);
    794 #undef CLEAR_IMPORT
    795 
    796   s_mf_library.Close();
    797   s_mfreadwrite_library.Close();
    798   s_mfplat_library.Close();
    799   s_library_loaded = false;
    800 }
    801 
    802 #undef VISIT_MF_IMPORTS
    803 #undef VISIT_MFREADWRITE_IMPORTS
    804 #undef VISIT_MFPLAT_IMPORTS
    805 
    806 MediaCaptureMF::~MediaCaptureMF() = default;
    807 
    808 std::unique_ptr<MediaCapture> MediaCaptureMF::Create(Error* error)
    809 {
    810   if (!LoadMediaFoundation(error))
    811     return nullptr;
    812 
    813   return std::make_unique<MediaCaptureMF>();
    814 }
    815 
    816 MediaCapture::ContainerList MediaCaptureMF::GetContainerList()
    817 {
    818   return {
    819     {"avi", "Audio Video Interleave"},
    820     {"mp4", "MPEG-4 Part 14"},
    821     {"mp3", "MPEG-2 Audio Layer III"},
    822     {"wav", "Waveform Audio File Format"},
    823   };
    824 }
    825 
    826 MediaCapture::ContainerList MediaCaptureMF::GetAudioCodecList(const char* container)
    827 {
    828   ContainerList ret;
    829   ret.reserve(std::size(s_media_foundation_audio_codecs));
    830   for (const MediaFoundationAudioCodec& codec : s_media_foundation_audio_codecs)
    831     ret.emplace_back(codec.name, codec.display_name);
    832   return ret;
    833 }
    834 
    835 MediaCapture::ContainerList MediaCaptureMF::GetVideoCodecList(const char* container)
    836 {
    837   ContainerList ret;
    838   ret.reserve(std::size(s_media_foundation_video_codecs));
    839   for (const MediaFoundationVideoCodec& codec : s_media_foundation_video_codecs)
    840     ret.emplace_back(codec.name, codec.display_name);
    841   return ret;
    842 }
    843 
    844 bool MediaCaptureMF::IsCapturingVideo() const
    845 {
    846   return (m_video_stream_index != INVALID_STREAM_INDEX);
    847 }
    848 
    849 bool MediaCaptureMF::IsCapturingAudio() const
    850 {
    851   return (m_audio_stream_index != INVALID_STREAM_INDEX);
    852 }
    853 
    854 time_t MediaCaptureMF::GetElapsedTime() const
    855 {
    856   if (IsCapturingVideo())
    857     return static_cast<time_t>(static_cast<LONGLONG>(m_next_video_pts * m_video_sample_duration) / TEN_NANOSECONDS);
    858   else
    859     return static_cast<time_t>(static_cast<LONGLONG>(m_next_audio_pts * m_audio_sample_duration) / TEN_NANOSECONDS);
    860 }
    861 
    862 bool MediaCaptureMF::InternalBeginCapture(float fps, float aspect, u32 sample_rate, bool capture_video,
    863                                           std::string_view video_codec, u32 video_bitrate,
    864                                           std::string_view video_codec_args, bool capture_audio,
    865                                           std::string_view audio_codec, u32 audio_bitrate,
    866                                           std::string_view audio_codec_args, Error* error)
    867 {
    868   HRESULT hr;
    869 
    870   ComPtr<IMFMediaType> video_media_type;
    871   bool use_async_video_transform = false;
    872 
    873   if (capture_video)
    874   {
    875     m_frame_rate_numerator = static_cast<u32>(fps * TEN_NANOSECONDS);
    876     m_video_sample_duration = static_cast<LONGLONG>(static_cast<double>(TEN_NANOSECONDS) / static_cast<double>(fps));
    877 
    878     ComPtr<IMFMediaType> yuv_media_type;
    879     if (!(m_video_yuv_transform = CreateVideoYUVTransform(&yuv_media_type, error)) ||
    880         !(m_video_encode_transform = CreateVideoEncodeTransform(video_codec, video_bitrate, yuv_media_type.Get(),
    881                                                                 &video_media_type, &use_async_video_transform, error)))
    882     {
    883       return false;
    884     }
    885   }
    886 
    887   ComPtr<IMFMediaType> audio_input_type, audio_output_type;
    888   if (capture_audio)
    889   {
    890     if (!GetAudioTypes(audio_codec, &audio_input_type, &audio_output_type, sample_rate, audio_bitrate, error))
    891       return false;
    892 
    893     // only used when not capturing video
    894     m_audio_frame_size = static_cast<u32>(static_cast<float>(sample_rate) / fps);
    895 
    896     m_audio_sample_duration =
    897       static_cast<LONGLONG>(static_cast<double>(TEN_NANOSECONDS) / static_cast<double>(sample_rate));
    898   }
    899 
    900   if (FAILED(hr = wrap_MFCreateSinkWriterFromURL(StringUtil::UTF8StringToWideString(m_path).c_str(), nullptr, nullptr,
    901                                                  m_sink_writer.GetAddressOf())))
    902   {
    903     Error::SetHResult(error, "MFCreateSinkWriterFromURL() failed: ", hr);
    904     return false;
    905   }
    906 
    907   if (capture_video)
    908   {
    909     if (SUCCEEDED(hr) && FAILED(hr = m_sink_writer->AddStream(video_media_type.Get(), &m_video_stream_index)))
    910       [[unlikely]]
    911     {
    912       Error::SetHResult(error, "Video AddStream() failed: ", hr);
    913     }
    914 
    915     if (SUCCEEDED(hr) && FAILED(hr = m_sink_writer->SetInputMediaType(m_video_stream_index, video_media_type.Get(),
    916                                                                       nullptr))) [[unlikely]]
    917     {
    918       Error::SetHResult(error, "Video SetInputMediaType() failed: ", hr);
    919     }
    920   }
    921 
    922   if (capture_audio)
    923   {
    924     if (SUCCEEDED(hr) && FAILED(hr = m_sink_writer->AddStream(audio_output_type.Get(), &m_audio_stream_index)))
    925       [[unlikely]]
    926     {
    927       Error::SetHResult(error, "Audio AddStream() failed: ", hr);
    928     }
    929 
    930     if (SUCCEEDED(hr) && audio_input_type &&
    931         FAILED(hr = m_sink_writer->SetInputMediaType(m_audio_stream_index, audio_input_type.Get(), nullptr)))
    932       [[unlikely]]
    933     {
    934       Error::SetHResult(error, "Audio SetInputMediaType() failed: ", hr);
    935     }
    936   }
    937 
    938   if (SUCCEEDED(hr) && FAILED(hr = m_sink_writer->BeginWriting()))
    939     Error::SetHResult(error, "BeginWriting() failed: ", hr);
    940 
    941   if (use_async_video_transform)
    942   {
    943     if (SUCCEEDED(hr) && FAILED(hr = m_video_encode_transform.As(&m_video_encode_event_generator)))
    944       Error::SetHResult(error, "Getting video encode event generator failed: ", hr);
    945   }
    946 
    947   if (capture_video && SUCCEEDED(hr) &&
    948       FAILED(hr = m_video_encode_transform->ProcessMessage(MFT_MESSAGE_NOTIFY_START_OF_STREAM, 0)))
    949   {
    950     Error::SetHResult(error, "MFT_MESSAGE_NOTIFY_START_OF_STREAM failed: ", hr);
    951   }
    952 
    953   if (FAILED(hr))
    954   {
    955     m_sink_writer.Reset();
    956     DeleteOutputFile();
    957     return false;
    958   }
    959 
    960   return true;
    961 }
    962 
    963 bool MediaCaptureMF::InternalEndCapture(std::unique_lock<std::mutex>& lock, Error* error)
    964 {
    965   HRESULT hr = MediaCaptureBase::InternalEndCapture(lock, error) ? S_OK : E_FAIL;
    966 
    967   // need to drain all input frames
    968   if (m_video_encode_transform)
    969   {
    970     if (SUCCEEDED(hr) && FAILED(hr = m_video_encode_transform->ProcessMessage(MFT_MESSAGE_NOTIFY_END_OF_STREAM, 0)))
    971     {
    972       Error::SetHResult(error, "MFT_MESSAGE_NOTIFY_END_OF_STREAM failed: ", hr);
    973       return false;
    974     }
    975 
    976     if (m_video_encode_event_generator)
    977       hr = ProcessVideoEvents(error) ? S_OK : E_FAIL;
    978     else
    979       hr = ProcessVideoOutputSamples(error) ? S_OK : E_FAIL;
    980   }
    981 
    982   if (SUCCEEDED(hr) && FAILED(hr = m_sink_writer->Finalize())) [[unlikely]]
    983     Error::SetHResult(error, "Finalize() failed: ", hr);
    984 
    985   m_sink_writer.Reset();
    986   return SUCCEEDED(hr);
    987 }
    988 
    989 MediaCaptureMF::ComPtr<IMFTransform> MediaCaptureMF::CreateVideoYUVTransform(ComPtr<IMFMediaType>* output_type,
    990                                                                              Error* error)
    991 {
    992   const MFT_REGISTER_TYPE_INFO input_type_info = {.guidMajorType = MFMediaType_Video,
    993                                                   .guidSubtype = VIDEO_RGB_MEDIA_FORMAT};
    994   const MFT_REGISTER_TYPE_INFO output_type_info = {.guidMajorType = MFMediaType_Video,
    995                                                    .guidSubtype = VIDEO_YUV_MEDIA_FORMAT};
    996 
    997   IMFActivate** transforms = nullptr;
    998   UINT32 num_transforms = 0;
    999   HRESULT hr = wrap_MFTEnumEx(MFT_CATEGORY_VIDEO_PROCESSOR, MFT_ENUM_FLAG_SORTANDFILTER, &input_type_info,
   1000                               &output_type_info, &transforms, &num_transforms);
   1001   if (FAILED(hr)) [[unlikely]]
   1002   {
   1003     Error::SetHResult(error, "YUV MFTEnumEx() failed: ", hr);
   1004     return nullptr;
   1005   }
   1006   else if (num_transforms == 0) [[unlikely]]
   1007   {
   1008     Error::SetStringView(error, "No video processors found.");
   1009     return nullptr;
   1010   }
   1011 
   1012   ComPtr<IMFTransform> transform;
   1013   hr = transforms[0]->ActivateObject(IID_PPV_ARGS(transform.GetAddressOf()));
   1014   if (transforms)
   1015     wrap_MFHeapFree(transforms);
   1016   if (FAILED(hr)) [[unlikely]]
   1017   {
   1018     Error::SetHResult(error, "YUV ActivateObject() failed: ", hr);
   1019     return nullptr;
   1020   }
   1021 
   1022   ComPtr<IMFMediaType> input_type;
   1023   if (FAILED(hr = wrap_MFCreateMediaType(input_type.GetAddressOf())) ||
   1024       FAILED(hr = wrap_MFCreateMediaType(output_type->GetAddressOf()))) [[unlikely]]
   1025   {
   1026     Error::SetHResult(error, "YUV MFCreateMediaType() failed: ", hr);
   1027     return nullptr;
   1028   }
   1029 
   1030   if (FAILED(hr = input_type->SetGUID(MF_MT_MAJOR_TYPE, MFMediaType_Video)) ||
   1031       FAILED(hr = input_type->SetGUID(MF_MT_SUBTYPE, VIDEO_RGB_MEDIA_FORMAT)) ||
   1032       FAILED(hr = input_type->SetUINT32(MF_MT_INTERLACE_MODE, MFVideoInterlace_Progressive)) ||
   1033       FAILED(hr = MFSetAttributeSize(input_type.Get(), MF_MT_FRAME_SIZE, m_video_width, m_video_height)) ||
   1034       FAILED(hr = (*output_type)->SetGUID(MF_MT_MAJOR_TYPE, MFMediaType_Video)) ||
   1035       FAILED(hr = (*output_type)->SetGUID(MF_MT_SUBTYPE, VIDEO_YUV_MEDIA_FORMAT)) ||
   1036       FAILED(hr = (*output_type)->SetUINT32(MF_MT_INTERLACE_MODE, MFVideoInterlace_Progressive)) ||
   1037       FAILED(hr = MFSetAttributeSize(output_type->Get(), MF_MT_FRAME_SIZE, m_video_width, m_video_height)) ||
   1038       FAILED(hr = MFSetAttributeRatio(output_type->Get(), MF_MT_FRAME_RATE, m_frame_rate_numerator, TEN_NANOSECONDS)))
   1039     [[unlikely]]
   1040   {
   1041     Error::SetHResult(error, "YUV setting attributes failed: ", hr);
   1042     return nullptr;
   1043   }
   1044 
   1045   if (FAILED(hr = transform->SetOutputType(0, output_type->Get(), 0))) [[unlikely]]
   1046   {
   1047     Error::SetHResult(error, "YUV SetOutputType() failed: ", hr);
   1048     return nullptr;
   1049   }
   1050 
   1051   if (FAILED(hr = transform->SetInputType(0, input_type.Get(), 0))) [[unlikely]]
   1052   {
   1053     Error::SetHResult(error, "YUV SetInputType() failed: ", hr);
   1054     return nullptr;
   1055   }
   1056 
   1057   return transform;
   1058 }
   1059 
   1060 MediaCaptureMF::ComPtr<IMFTransform> MediaCaptureMF::CreateVideoEncodeTransform(std::string_view codec, u32 bitrate,
   1061                                                                                 IMFMediaType* input_type,
   1062                                                                                 ComPtr<IMFMediaType>* output_type,
   1063                                                                                 bool* use_async_transform, Error* error)
   1064 {
   1065   const MFT_REGISTER_TYPE_INFO input_type_info = {.guidMajorType = MFMediaType_Video,
   1066                                                   .guidSubtype = VIDEO_YUV_MEDIA_FORMAT};
   1067   MFT_REGISTER_TYPE_INFO output_type_info = {.guidMajorType = MFMediaType_Video, .guidSubtype = MFVideoFormat_H264};
   1068   bool hardware = false;
   1069   if (!codec.empty())
   1070   {
   1071     bool found = false;
   1072     for (const MediaFoundationVideoCodec& tcodec : s_media_foundation_video_codecs)
   1073     {
   1074       if (StringUtil::EqualNoCase(codec, tcodec.name))
   1075       {
   1076         output_type_info.guidSubtype = tcodec.guid;
   1077         hardware = tcodec.require_hardware;
   1078         found = true;
   1079         break;
   1080       }
   1081     }
   1082     if (!found)
   1083     {
   1084       Error::SetStringFmt(error, "Unknown video codec '{}'", codec);
   1085       return nullptr;
   1086     }
   1087   }
   1088 
   1089   IMFActivate** transforms = nullptr;
   1090   UINT32 num_transforms = 0;
   1091   HRESULT hr =
   1092     wrap_MFTEnumEx(MFT_CATEGORY_VIDEO_ENCODER, (hardware ? MFT_ENUM_FLAG_HARDWARE : 0) | MFT_ENUM_FLAG_SORTANDFILTER,
   1093                    &input_type_info, &output_type_info, &transforms, &num_transforms);
   1094   if (FAILED(hr)) [[unlikely]]
   1095   {
   1096     Error::SetHResult(error, "Encoder MFTEnumEx() failed: ", hr);
   1097     return nullptr;
   1098   }
   1099   else if (num_transforms == 0) [[unlikely]]
   1100   {
   1101     Error::SetStringView(error, "No video encoders found.");
   1102     return nullptr;
   1103   }
   1104 
   1105   ComPtr<IMFTransform> transform;
   1106   hr = transforms[0]->ActivateObject(IID_PPV_ARGS(transform.GetAddressOf()));
   1107   if (transforms)
   1108     wrap_MFHeapFree(transforms);
   1109   if (FAILED(hr)) [[unlikely]]
   1110   {
   1111     Error::SetHResult(error, "Encoder ActivateObject() failed: ", hr);
   1112     return nullptr;
   1113   }
   1114 
   1115   *use_async_transform = false;
   1116   if (hardware)
   1117   {
   1118     ComPtr<IMFAttributes> attributes;
   1119     if (FAILED(transform->GetAttributes(attributes.GetAddressOf()))) [[unlikely]]
   1120     {
   1121       Error::SetHResult(error, "YUV GetAttributes() failed: ", hr);
   1122       return nullptr;
   1123     }
   1124     UINT32 async_supported;
   1125     *use_async_transform =
   1126       (SUCCEEDED(hr = attributes->GetUINT32(MF_TRANSFORM_ASYNC, &async_supported)) && async_supported == TRUE &&
   1127        SUCCEEDED(hr = attributes->SetUINT32(MF_TRANSFORM_ASYNC_UNLOCK, 1)));
   1128     if (use_async_transform)
   1129       INFO_LOG("Using async video transform.");
   1130   }
   1131 
   1132   if (FAILED(hr = wrap_MFCreateMediaType(output_type->GetAddressOf()))) [[unlikely]]
   1133   {
   1134     Error::SetHResult(error, "Encoder MFCreateMediaType() failed: ", hr);
   1135     return nullptr;
   1136   }
   1137 
   1138   constexpr u32 par_numerator = 1;
   1139   constexpr u32 par_denominator = 1;
   1140 
   1141   u32 profile = 0;
   1142   if (output_type_info.guidSubtype == MFVideoFormat_H264)
   1143     profile = eAVEncH264VProfile_Main;
   1144   else if (output_type_info.guidSubtype == MFVideoFormat_H265)
   1145     profile = eAVEncH265VProfile_Main_420_8;
   1146   else if (output_type_info.guidSubtype == MFVideoFormat_VP90)
   1147     profile = eAVEncVP9VProfile_420_8;
   1148 
   1149   if (FAILED(hr = (*output_type)->SetGUID(MF_MT_MAJOR_TYPE, MFMediaType_Video)) ||
   1150       FAILED(hr = (*output_type)->SetGUID(MF_MT_SUBTYPE, output_type_info.guidSubtype)) ||
   1151       FAILED(hr = (*output_type)->SetUINT32(MF_MT_AVG_BITRATE, bitrate * 1000)) ||
   1152       FAILED(hr = (*output_type)->SetUINT32(MF_MT_INTERLACE_MODE, MFVideoInterlace_Progressive)) ||
   1153       FAILED(hr = (*output_type)->SetUINT32(MF_MT_MPEG2_PROFILE, profile)) ||
   1154       FAILED(hr = MFSetAttributeSize(output_type->Get(), MF_MT_FRAME_SIZE, m_video_width, m_video_height)) ||
   1155       FAILED(hr = MFSetAttributeRatio(output_type->Get(), MF_MT_FRAME_RATE, m_frame_rate_numerator, TEN_NANOSECONDS)) ||
   1156       FAILED(hr = MFSetAttributeRatio(output_type->Get(), MF_MT_PIXEL_ASPECT_RATIO, par_numerator, par_denominator)))
   1157     [[unlikely]]
   1158   {
   1159     Error::SetHResult(error, "Encoder setting attributes failed: ", hr);
   1160     return nullptr;
   1161   }
   1162 
   1163   if (FAILED(hr = transform->SetOutputType(0, output_type->Get(), 0))) [[unlikely]]
   1164   {
   1165     Error::SetHResult(error, "Encoder SetOutputType() failed: ", hr);
   1166     return nullptr;
   1167   }
   1168 
   1169   if (FAILED(hr = transform->SetInputType(0, input_type, 0))) [[unlikely]]
   1170   {
   1171     Error::SetHResult(error, "Encoder SetInputType() failed: ", hr);
   1172     return nullptr;
   1173   }
   1174 
   1175   MFT_OUTPUT_STREAM_INFO osi;
   1176   if (FAILED(hr = transform->GetOutputStreamInfo(0, &osi))) [[unlikely]]
   1177   {
   1178     Error::SetHResult(error, "Encoder GetOutputStreamInfo() failed: ", hr);
   1179     return nullptr;
   1180   }
   1181 
   1182   if (!(osi.dwFlags & MFT_OUTPUT_STREAM_PROVIDES_SAMPLES))
   1183   {
   1184     if (osi.cbSize == 0)
   1185     {
   1186       Error::SetStringFmt(error, "Invalid sample size for non-output-providing stream");
   1187       return nullptr;
   1188     }
   1189 
   1190     m_video_sample_size = osi.cbSize;
   1191   }
   1192 
   1193   INFO_LOG("Video sample size: {}", m_video_sample_size);
   1194   return transform;
   1195 }
   1196 
   1197 ALWAYS_INLINE_RELEASE void MediaCaptureMF::ConvertVideoFrame(u8* dst, size_t dst_stride, const u8* src,
   1198                                                              size_t src_stride, u32 width, u32 height) const
   1199 {
   1200   if (!g_gpu_device->UsesLowerLeftOrigin())
   1201   {
   1202     src += src_stride * (height - 1);
   1203     src_stride = static_cast<size_t>(-static_cast<std::make_signed_t<size_t>>(src_stride));
   1204   }
   1205 
   1206   if (m_video_render_texture_format == GPUTexture::Format::RGBA8)
   1207   {
   1208     // need to convert rgba -> bgra, as well as flipping vertically
   1209     const u32 vector_width = 4;
   1210     const u32 aligned_width = Common::AlignDownPow2(width, vector_width);
   1211     for (u32 remaining_rows = height;;)
   1212     {
   1213       const u8* row_src = src;
   1214       u8* row_dst = dst;
   1215 
   1216       u32 x = 0;
   1217       for (; x < aligned_width; x += vector_width)
   1218       {
   1219         static constexpr GSVector4i mask = GSVector4i::cxpr8(2, 1, 0, 3, 6, 5, 4, 7, 10, 9, 8, 11, 14, 13, 12, 15);
   1220         GSVector4i::store<false>(row_dst, GSVector4i::load<false>(row_src).shuffle8(mask));
   1221         row_src += vector_width * sizeof(u32);
   1222         row_dst += vector_width * sizeof(u32);
   1223       }
   1224 
   1225       for (; x < width; x++)
   1226       {
   1227         row_dst[0] = row_src[2];
   1228         row_dst[1] = row_src[1];
   1229         row_dst[2] = row_src[0];
   1230         row_dst[3] = row_src[3];
   1231         row_src += sizeof(u32);
   1232         row_dst += sizeof(u32);
   1233       }
   1234 
   1235       src += src_stride;
   1236       dst += dst_stride;
   1237 
   1238       remaining_rows--;
   1239       if (remaining_rows == 0)
   1240         break;
   1241     }
   1242   }
   1243   else
   1244   {
   1245     // only flip
   1246     const u32 copy_width = sizeof(u32) * width;
   1247     for (u32 remaining_rows = height;;)
   1248     {
   1249       const u8* row_src = src;
   1250       u8* row_dst = dst;
   1251       std::memcpy(row_dst, row_src, copy_width);
   1252       src += src_stride;
   1253       dst += dst_stride;
   1254 
   1255       remaining_rows--;
   1256       if (remaining_rows == 0)
   1257         break;
   1258     }
   1259   }
   1260 }
   1261 
   1262 void MediaCaptureMF::ClearState()
   1263 {
   1264   MediaCaptureBase::ClearState();
   1265 
   1266   m_sink_writer.Reset();
   1267 
   1268   m_video_stream_index = INVALID_STREAM_INDEX;
   1269   m_audio_stream_index = INVALID_STREAM_INDEX;
   1270 
   1271   m_video_sample_duration = 0;
   1272   m_audio_sample_duration = 0;
   1273   m_frame_rate_numerator = 0;
   1274 
   1275   m_video_yuv_transform.Reset();
   1276   m_video_yuv_sample.Reset();
   1277   m_video_encode_transform.Reset();
   1278   m_video_encode_event_generator.Reset();
   1279   m_pending_video_samples.clear();
   1280   m_video_output_sample.Reset();
   1281   m_wanted_video_samples = 0;
   1282   m_video_sample_size = 0;
   1283 }
   1284 
   1285 bool MediaCaptureMF::SendFrame(const PendingFrame& pf, Error* error)
   1286 {
   1287   const u32 buffer_stride = m_video_width * sizeof(u32);
   1288   const u32 buffer_size = buffer_stride * m_video_height;
   1289 
   1290   HRESULT hr;
   1291   ComPtr<IMFMediaBuffer> buffer;
   1292   if (FAILED(hr = wrap_MFCreateMemoryBuffer(buffer_size, buffer.GetAddressOf()))) [[unlikely]]
   1293   {
   1294     Error::SetHResult(error, "MFCreateMemoryBuffer() failed: ", hr);
   1295     return false;
   1296   }
   1297 
   1298   BYTE* buffer_data;
   1299   if (FAILED(hr = buffer->Lock(&buffer_data, nullptr, nullptr))) [[unlikely]]
   1300   {
   1301     Error::SetHResult(error, "Lock() failed: ", hr);
   1302     return false;
   1303   }
   1304 
   1305   ConvertVideoFrame(buffer_data, buffer_stride, pf.tex->GetMapPointer(), pf.tex->GetMapPitch(), m_video_width,
   1306                     m_video_height);
   1307   buffer->Unlock();
   1308 
   1309   if (FAILED(hr = buffer->SetCurrentLength(buffer_size))) [[unlikely]]
   1310   {
   1311     Error::SetHResult(error, "SetCurrentLength() failed: ", hr);
   1312     return false;
   1313   }
   1314 
   1315   ComPtr<IMFSample> sample;
   1316   if (FAILED(hr = wrap_MFCreateSample(sample.GetAddressOf()))) [[unlikely]]
   1317   {
   1318     Error::SetHResult(error, "MFCreateSample() failed: ", hr);
   1319     return false;
   1320   }
   1321 
   1322   if (FAILED(hr = sample->AddBuffer(buffer.Get()))) [[unlikely]]
   1323   {
   1324     Error::SetHResult(error, "AddBuffer() failed: ", hr);
   1325     return false;
   1326   }
   1327 
   1328   const LONGLONG timestamp = static_cast<LONGLONG>(pf.pts) * m_video_sample_duration;
   1329   if (FAILED(hr = sample->SetSampleTime(timestamp))) [[unlikely]]
   1330   {
   1331     Error::SetHResult(error, "SetSampleTime() failed: ", hr);
   1332     return false;
   1333   }
   1334 
   1335   if (FAILED(hr = sample->SetSampleDuration(m_video_sample_duration))) [[unlikely]]
   1336   {
   1337     Error::SetHResult(error, "SetSampleDuration() failed: ", hr);
   1338     return false;
   1339   }
   1340 
   1341   //////////////////////////////////////////////////////////////////////////
   1342   // RGB -> YUV
   1343   //////////////////////////////////////////////////////////////////////////
   1344 
   1345   if (FAILED(hr = m_video_yuv_transform->ProcessInput(0, sample.Get(), 0))) [[unlikely]]
   1346   {
   1347     Error::SetHResult(error, "YUV ProcessInput() failed: ", hr);
   1348     return false;
   1349   }
   1350 
   1351   for (;;)
   1352   {
   1353     if (!m_video_yuv_sample)
   1354     {
   1355       ComPtr<IMFMediaBuffer> yuv_membuf;
   1356       if (FAILED(hr = wrap_MFCreateMemoryBuffer(buffer_size, yuv_membuf.GetAddressOf()))) [[unlikely]]
   1357       {
   1358         Error::SetHResult(error, "YUV MFCreateMemoryBuffer() failed: ", hr);
   1359         return false;
   1360       }
   1361 
   1362       if (FAILED(hr = wrap_MFCreateSample(m_video_yuv_sample.GetAddressOf()))) [[unlikely]]
   1363       {
   1364         Error::SetHResult(error, "YUV MFCreateSample() failed: ", hr);
   1365         return false;
   1366       }
   1367       if (FAILED(hr = m_video_yuv_sample->AddBuffer(yuv_membuf.Get()))) [[unlikely]]
   1368       {
   1369         Error::SetHResult(error, "YUV AddBuffer() failed: ", hr);
   1370         return false;
   1371       }
   1372     }
   1373 
   1374     DWORD status;
   1375     MFT_OUTPUT_DATA_BUFFER yuv_buf = {.pSample = m_video_yuv_sample.Get()};
   1376     hr = m_video_yuv_transform->ProcessOutput(0, 1, &yuv_buf, &status);
   1377     if (hr == MF_E_TRANSFORM_NEED_MORE_INPUT)
   1378       break;
   1379 
   1380     if (FAILED(hr)) [[unlikely]]
   1381     {
   1382       Error::SetHResult(error, "YUV ProcessOutput() failed: ", hr);
   1383       return false;
   1384     }
   1385     if (yuv_buf.pEvents)
   1386       yuv_buf.pEvents->Release();
   1387 
   1388     m_pending_video_samples.push_back(std::move(m_video_yuv_sample));
   1389 
   1390     if (m_video_encode_event_generator)
   1391     {
   1392       if (!ProcessVideoEvents(error)) [[unlikely]]
   1393         return false;
   1394     }
   1395     else
   1396     {
   1397       if (!ProcessVideoOutputSamples(error)) [[unlikely]]
   1398         return false;
   1399     }
   1400   }
   1401 
   1402   return true;
   1403 }
   1404 
   1405 bool MediaCaptureMF::ProcessVideoOutputSamples(Error* error)
   1406 {
   1407   HRESULT hr;
   1408 
   1409   for (;;)
   1410   {
   1411     while (!m_pending_video_samples.empty())
   1412     {
   1413       if (FAILED(hr = m_video_encode_transform->ProcessInput(0, m_pending_video_samples.front().Get(), 0))) [[unlikely]]
   1414       {
   1415         Error::SetHResult(error, "Video ProcessInput() failed: ", hr);
   1416         return false;
   1417       }
   1418       m_pending_video_samples.pop_front();
   1419     }
   1420 
   1421     if (m_video_sample_size > 0 && !m_video_output_sample)
   1422     {
   1423       ComPtr<IMFMediaBuffer> video_membuf;
   1424       if (FAILED(hr = wrap_MFCreateMemoryBuffer(m_video_sample_size, video_membuf.GetAddressOf()))) [[unlikely]]
   1425       {
   1426         Error::SetHResult(error, "YUV MFCreateMemoryBuffer() failed: ", hr);
   1427         return false;
   1428       }
   1429 
   1430       if (FAILED(hr = wrap_MFCreateSample(m_video_output_sample.GetAddressOf()))) [[unlikely]]
   1431       {
   1432         Error::SetHResult(error, "YUV MFCreateSample() failed: ", hr);
   1433         return false;
   1434       }
   1435       if (FAILED(hr = m_video_output_sample->AddBuffer(video_membuf.Get()))) [[unlikely]]
   1436       {
   1437         Error::SetHResult(error, "YUV AddBuffer() failed: ", hr);
   1438         return false;
   1439       }
   1440     }
   1441 
   1442     MFT_OUTPUT_DATA_BUFFER video_buf = {.pSample = m_video_output_sample.Get()};
   1443     DWORD status;
   1444     hr = m_video_encode_transform->ProcessOutput(0, 1, &video_buf, &status);
   1445     if (hr == MF_E_TRANSFORM_NEED_MORE_INPUT)
   1446       break;
   1447 
   1448     if (FAILED(hr)) [[unlikely]]
   1449     {
   1450       Error::SetHResult(error, "Video ProcessOutput() failed: ", hr);
   1451       return false;
   1452     }
   1453     if (video_buf.pEvents)
   1454       video_buf.pEvents->Release();
   1455 
   1456     hr = m_sink_writer->WriteSample(m_video_stream_index, video_buf.pSample);
   1457     if (FAILED(hr)) [[unlikely]]
   1458     {
   1459       Error::SetHResult(error, "Video WriteSample() failed: ", hr);
   1460       return false;
   1461     }
   1462 
   1463     // might be transform-provided
   1464     if (m_video_output_sample)
   1465       m_video_output_sample.Reset();
   1466     else
   1467       video_buf.pSample->Release();
   1468   }
   1469 
   1470   return true;
   1471 }
   1472 
   1473 bool MediaCaptureMF::ProcessVideoEvents(Error* error)
   1474 {
   1475   HRESULT hr;
   1476 
   1477   for (;;)
   1478   {
   1479     // push any wanted input
   1480     while (m_wanted_video_samples > 0)
   1481     {
   1482       if (m_pending_video_samples.empty())
   1483         break;
   1484 
   1485       if (FAILED(hr = m_video_encode_transform->ProcessInput(0, m_pending_video_samples.front().Get(), 0))) [[unlikely]]
   1486       {
   1487         Error::SetHResult(error, "Video ProcessInput() failed: ", hr);
   1488         return false;
   1489       }
   1490       m_pending_video_samples.pop_front();
   1491 
   1492       m_wanted_video_samples--;
   1493     }
   1494 
   1495     ComPtr<IMFMediaEvent> event;
   1496     hr = m_video_encode_event_generator->GetEvent(MF_EVENT_FLAG_NO_WAIT, event.GetAddressOf());
   1497     if (hr == MF_E_NO_EVENTS_AVAILABLE)
   1498       return true;
   1499 
   1500     if (FAILED(hr)) [[unlikely]]
   1501     {
   1502       Error::SetHResult(error, "GetEvent() failed: ", hr);
   1503       return false;
   1504     }
   1505 
   1506     MediaEventType type;
   1507     if (FAILED(hr = event->GetType(&type))) [[unlikely]]
   1508     {
   1509       Error::SetHResult(error, "GetEvent() failed: ", hr);
   1510       return false;
   1511     }
   1512 
   1513     UINT32 stream_id = 0;
   1514     if (type == METransformNeedInput || type == METransformHaveOutput)
   1515     {
   1516       if (FAILED(hr = event->GetUINT32(MF_EVENT_MFT_INPUT_STREAM_ID, &stream_id)))
   1517       {
   1518         Error::SetHResult(error, "Get stream ID failed: ", hr);
   1519         return false;
   1520       }
   1521       else if (stream_id != 0)
   1522       {
   1523         Error::SetStringFmt(error, "Unexpected stream ID {}", stream_id);
   1524         return false;
   1525       }
   1526     }
   1527 
   1528     switch (type)
   1529     {
   1530       case METransformNeedInput:
   1531       {
   1532         m_wanted_video_samples++;
   1533       }
   1534       break;
   1535 
   1536       case METransformHaveOutput:
   1537       {
   1538         if (m_video_sample_size > 0 && !m_video_output_sample)
   1539         {
   1540           ComPtr<IMFMediaBuffer> video_membuf;
   1541           if (FAILED(hr = wrap_MFCreateMemoryBuffer(m_video_sample_size, video_membuf.GetAddressOf()))) [[unlikely]]
   1542           {
   1543             Error::SetHResult(error, "YUV MFCreateMemoryBuffer() failed: ", hr);
   1544             return false;
   1545           }
   1546 
   1547           if (FAILED(hr = wrap_MFCreateSample(m_video_output_sample.GetAddressOf()))) [[unlikely]]
   1548           {
   1549             Error::SetHResult(error, "YUV MFCreateSample() failed: ", hr);
   1550             return false;
   1551           }
   1552           if (FAILED(hr = m_video_output_sample->AddBuffer(video_membuf.Get()))) [[unlikely]]
   1553           {
   1554             Error::SetHResult(error, "YUV AddBuffer() failed: ", hr);
   1555             return false;
   1556           }
   1557         }
   1558 
   1559         MFT_OUTPUT_DATA_BUFFER video_buf = {.pSample = m_video_output_sample.Get()};
   1560         DWORD status;
   1561         if (FAILED(hr = m_video_encode_transform->ProcessOutput(0, 1, &video_buf, &status))) [[unlikely]]
   1562         {
   1563           Error::SetHResult(error, "Video ProcessOutput() failed: ", hr);
   1564           return false;
   1565         }
   1566         if (video_buf.pEvents)
   1567           video_buf.pEvents->Release();
   1568 
   1569         hr = m_sink_writer->WriteSample(m_video_stream_index, video_buf.pSample);
   1570         if (FAILED(hr)) [[unlikely]]
   1571         {
   1572           Error::SetHResult(error, "Video WriteSample() failed: ", hr);
   1573           return false;
   1574         }
   1575 
   1576         // might be transform-provided
   1577         if (m_video_output_sample)
   1578           m_video_output_sample.Reset();
   1579         else
   1580           video_buf.pSample->Release();
   1581       }
   1582       break;
   1583 
   1584       default:
   1585         WARNING_LOG("Unhandled video event {}", static_cast<u32>(type));
   1586         break;
   1587     }
   1588   }
   1589 }
   1590 
   1591 bool MediaCaptureMF::GetAudioTypes(std::string_view codec, ComPtr<IMFMediaType>* input_type,
   1592                                    ComPtr<IMFMediaType>* output_type, u32 sample_rate, u32 bitrate, Error* error)
   1593 {
   1594   GUID output_subtype = MFAudioFormat_AAC;
   1595   if (!codec.empty())
   1596   {
   1597     bool found = false;
   1598     for (const MediaFoundationAudioCodec& tcodec : s_media_foundation_audio_codecs)
   1599     {
   1600       if (StringUtil::EqualNoCase(codec, tcodec.name))
   1601       {
   1602         output_subtype = tcodec.guid;
   1603         bitrate = std::clamp(bitrate, tcodec.min_bitrate, tcodec.max_bitrate);
   1604         found = true;
   1605         break;
   1606       }
   1607     }
   1608     if (!found)
   1609     {
   1610       Error::SetStringFmt(error, "Unknown audio codec '{}'", codec);
   1611       return false;
   1612     }
   1613   }
   1614 
   1615   HRESULT hr;
   1616   if (FAILED(hr = wrap_MFCreateMediaType(input_type->GetAddressOf()))) [[unlikely]]
   1617   {
   1618     Error::SetHResult(error, "Audio MFCreateMediaType() failed: ", hr);
   1619     return false;
   1620   }
   1621 
   1622   const u32 block_align = AUDIO_CHANNELS * (AUDIO_BITS_PER_SAMPLE / 8);
   1623   const u32 bytes_per_second = block_align * sample_rate;
   1624 
   1625   if (FAILED(hr = (*input_type)->SetGUID(MF_MT_MAJOR_TYPE, MFMediaType_Audio)) ||
   1626       FAILED(hr = (*input_type)->SetGUID(MF_MT_SUBTYPE, AUDIO_INPUT_MEDIA_FORMAT)) ||
   1627       FAILED(hr = (*input_type)->SetUINT32(MF_MT_AUDIO_NUM_CHANNELS, AUDIO_CHANNELS)) ||
   1628       FAILED(hr = (*input_type)->SetUINT32(MF_MT_AUDIO_BITS_PER_SAMPLE, AUDIO_BITS_PER_SAMPLE)) ||
   1629       FAILED(hr = (*input_type)->SetUINT32(MF_MT_AUDIO_SAMPLES_PER_SECOND, sample_rate)) ||
   1630       FAILED(hr = (*input_type)->SetUINT32(MF_MT_AUDIO_BLOCK_ALIGNMENT, block_align)) ||
   1631       FAILED(hr = (*input_type)->SetUINT32(MF_MT_AUDIO_AVG_BYTES_PER_SECOND, bytes_per_second)) ||
   1632       FAILED(hr = (*input_type)->SetUINT32(MF_MT_ALL_SAMPLES_INDEPENDENT, TRUE))) [[unlikely]]
   1633   {
   1634     Error::SetHResult(error, "Audio setting attributes failed: ", hr);
   1635     return false;
   1636   }
   1637 
   1638   // If our input type is PCM, no need for an input type, it's the same as output.
   1639   if (output_subtype == AUDIO_INPUT_MEDIA_FORMAT)
   1640   {
   1641     *output_type = std::move(*input_type);
   1642     return true;
   1643   }
   1644 
   1645   ComPtr<IMFCollection> output_types_collection;
   1646   DWORD output_types_collection_size = 0;
   1647   hr = wrap_MFTranscodeGetAudioOutputAvailableTypes(output_subtype, 0, nullptr, output_types_collection.GetAddressOf());
   1648   if (FAILED(hr) || FAILED(hr = output_types_collection->GetElementCount(&output_types_collection_size))) [[unlikely]]
   1649   {
   1650     Error::SetHResult(error, "MFTranscodeGetAudioOutputAvailableTypes() failed: ", hr);
   1651     return false;
   1652   }
   1653 
   1654   std::vector<std::pair<ComPtr<IMFMediaType>, u32>> output_types;
   1655   for (DWORD i = 0; i < output_types_collection_size; i++)
   1656   {
   1657     ComPtr<IUnknown> current_output_type;
   1658     ComPtr<IMFMediaType> current_output_type_c;
   1659     if (SUCCEEDED(hr = output_types_collection->GetElement(i, current_output_type.GetAddressOf())) &&
   1660         SUCCEEDED(current_output_type.As(&current_output_type_c)))
   1661     {
   1662       UINT32 current_channel_count, current_sample_rate;
   1663       if (SUCCEEDED(current_output_type_c->GetUINT32(MF_MT_AUDIO_NUM_CHANNELS, &current_channel_count)) &&
   1664           SUCCEEDED(current_output_type_c->GetUINT32(MF_MT_AUDIO_SAMPLES_PER_SECOND, &current_sample_rate)) &&
   1665           current_channel_count == AUDIO_CHANNELS && current_sample_rate == sample_rate)
   1666       {
   1667         u32 current_bitrate;
   1668         if (SUCCEEDED(current_output_type_c->GetUINT32(MF_MT_AUDIO_AVG_BYTES_PER_SECOND, &current_bitrate)))
   1669           current_bitrate *= 8;
   1670         else if (FAILED(current_output_type_c->GetUINT32(MF_MT_AVG_BITRATE, &current_bitrate)))
   1671           continue;
   1672 
   1673         output_types.emplace_back(std::move(current_output_type_c), current_bitrate);
   1674       }
   1675     }
   1676   }
   1677 
   1678   // pick the closest bitrate
   1679   const u32 bitrate_kbps = bitrate * 1000;
   1680   std::pair<ComPtr<IMFMediaType>, u32>* selected_output_type = nullptr;
   1681   for (auto it = output_types.begin(); it != output_types.end(); ++it)
   1682   {
   1683     if (it->second >= bitrate_kbps &&
   1684         (!selected_output_type || (selected_output_type->second - bitrate_kbps) > (it->second - bitrate_kbps)))
   1685     {
   1686       selected_output_type = &(*it);
   1687     }
   1688   }
   1689   if (!selected_output_type)
   1690   {
   1691     Error::SetStringView(error, "Unable to find a matching audio output type.");
   1692     return false;
   1693   }
   1694 
   1695   *output_type = std::move(selected_output_type->first);
   1696   return true;
   1697 }
   1698 
   1699 bool MediaCaptureMF::ProcessAudioPackets(s64 video_pts, Error* error)
   1700 {
   1701   const u32 max_audio_buffer_size = GetAudioBufferSizeInFrames();
   1702   HRESULT hr;
   1703 
   1704   u32 pending_frames = m_audio_buffer_size.load(std::memory_order_acquire);
   1705   while (pending_frames > 0 && (!IsCapturingVideo() ||
   1706                                 ((m_next_audio_pts * m_audio_sample_duration) < (video_pts * m_video_sample_duration))))
   1707   {
   1708     // Grab as many source frames as we can.
   1709     const u32 contig_frames = std::min(pending_frames, max_audio_buffer_size - m_audio_buffer_read_pos);
   1710     DebugAssert(contig_frames > 0);
   1711 
   1712     const u32 buffer_size = contig_frames * sizeof(s16) * AUDIO_CHANNELS;
   1713     ComPtr<IMFMediaBuffer> buffer;
   1714     if (FAILED(hr = wrap_MFCreateMemoryBuffer(buffer_size, buffer.GetAddressOf()))) [[unlikely]]
   1715     {
   1716       Error::SetHResult(error, "Audio MFCreateMemoryBuffer() failed: ", hr);
   1717       return false;
   1718     }
   1719 
   1720     BYTE* buffer_data;
   1721     if (FAILED(hr = buffer->Lock(&buffer_data, nullptr, nullptr))) [[unlikely]]
   1722     {
   1723       Error::SetHResult(error, "Audio Lock() failed: ", hr);
   1724       return false;
   1725     }
   1726 
   1727     std::memcpy(buffer_data, &m_audio_buffer[m_audio_buffer_read_pos * AUDIO_CHANNELS], buffer_size);
   1728     buffer->Unlock();
   1729 
   1730     if (FAILED(hr = buffer->SetCurrentLength(buffer_size))) [[unlikely]]
   1731     {
   1732       Error::SetHResult(error, "Audio SetCurrentLength() failed: ", hr);
   1733       return false;
   1734     }
   1735 
   1736     ComPtr<IMFSample> sample;
   1737     if (FAILED(hr = wrap_MFCreateSample(sample.GetAddressOf()))) [[unlikely]]
   1738     {
   1739       Error::SetHResult(error, "Audio MFCreateSample() failed: ", hr);
   1740       return false;
   1741     }
   1742 
   1743     if (FAILED(hr = sample->AddBuffer(buffer.Get()))) [[unlikely]]
   1744     {
   1745       Error::SetHResult(error, "Audio AddBuffer() failed: ", hr);
   1746       return false;
   1747     }
   1748 
   1749     const LONGLONG timestamp = static_cast<LONGLONG>(m_next_audio_pts) * m_audio_sample_duration;
   1750     if (FAILED(hr = sample->SetSampleTime(timestamp))) [[unlikely]]
   1751     {
   1752       Error::SetHResult(error, "Audio SetSampleTime() failed: ", hr);
   1753       return false;
   1754     }
   1755 
   1756     const LONGLONG duration = static_cast<LONGLONG>(contig_frames) * m_audio_sample_duration;
   1757     if (FAILED(hr = sample->SetSampleDuration(duration))) [[unlikely]]
   1758     {
   1759       Error::SetHResult(error, "Audio SetSampleDuration() failed: ", hr);
   1760       return false;
   1761     }
   1762 
   1763     m_next_audio_pts += contig_frames;
   1764 
   1765     hr = m_sink_writer->WriteSample(m_audio_stream_index, sample.Get());
   1766     if (FAILED(hr)) [[unlikely]]
   1767     {
   1768       Error::SetHResult(error, "Audio WriteSample() failed: ", hr);
   1769       return false;
   1770     }
   1771 
   1772     m_audio_buffer_read_pos = (m_audio_buffer_read_pos + contig_frames) % max_audio_buffer_size;
   1773     m_audio_buffer_size.fetch_sub(contig_frames, std::memory_order_acq_rel);
   1774     m_audio_frame_pos += contig_frames;
   1775     pending_frames -= contig_frames;
   1776   }
   1777 
   1778   return true;
   1779 }
   1780 
   1781 #endif
   1782 
   1783 #ifndef __ANDROID__
   1784 
   1785 // We're using deprecated fields because we're targeting multiple ffmpeg versions.
   1786 #if defined(_MSC_VER)
   1787 #pragma warning(disable : 4996) // warning C4996: 'AVCodecContext::channels': was declared deprecated
   1788 #elif defined(__clang__)
   1789 #pragma clang diagnostic ignored "-Wdeprecated-declarations"
   1790 #elif defined(__GNUC__)
   1791 #pragma GCC diagnostic ignored "-Wdeprecated-declarations"
   1792 #endif
   1793 
   1794 // Compatibility with both ffmpeg 4.x and 5.x.
   1795 #if (LIBAVFORMAT_VERSION_MAJOR < 59)
   1796 #define ff_const59
   1797 #else
   1798 #define ff_const59 const
   1799 #endif
   1800 
   1801 #define VISIT_AVCODEC_IMPORTS(X)                                                                                       \
   1802   X(avcodec_find_encoder_by_name)                                                                                      \
   1803   X(avcodec_find_encoder)                                                                                              \
   1804   X(avcodec_alloc_context3)                                                                                            \
   1805   X(avcodec_open2)                                                                                                     \
   1806   X(avcodec_free_context)                                                                                              \
   1807   X(avcodec_send_frame)                                                                                                \
   1808   X(avcodec_receive_packet)                                                                                            \
   1809   X(avcodec_parameters_from_context)                                                                                   \
   1810   X(avcodec_get_hw_config)                                                                                             \
   1811   X(av_codec_iterate)                                                                                                  \
   1812   X(av_packet_alloc)                                                                                                   \
   1813   X(av_packet_free)                                                                                                    \
   1814   X(av_packet_rescale_ts)                                                                                              \
   1815   X(av_packet_unref)
   1816 
   1817 #define VISIT_AVFORMAT_IMPORTS(X)                                                                                      \
   1818   X(avformat_alloc_output_context2)                                                                                    \
   1819   X(avformat_new_stream)                                                                                               \
   1820   X(avformat_write_header)                                                                                             \
   1821   X(av_guess_format)                                                                                                   \
   1822   X(av_interleaved_write_frame)                                                                                        \
   1823   X(av_write_trailer)                                                                                                  \
   1824   X(avformat_free_context)                                                                                             \
   1825   X(avformat_query_codec)                                                                                              \
   1826   X(avio_open)                                                                                                         \
   1827   X(avio_closep)
   1828 
   1829 #if LIBAVUTIL_VERSION_MAJOR < 57
   1830 #define AVUTIL_57_IMPORTS(X)
   1831 #else
   1832 #define AVUTIL_57_IMPORTS(X)                                                                                           \
   1833   X(av_channel_layout_default)                                                                                         \
   1834   X(av_channel_layout_copy)                                                                                            \
   1835   X(av_opt_set_chlayout)
   1836 #endif
   1837 
   1838 #define VISIT_AVUTIL_IMPORTS(X)                                                                                        \
   1839   AVUTIL_57_IMPORTS(X)                                                                                                 \
   1840   X(av_frame_alloc)                                                                                                    \
   1841   X(av_frame_get_buffer)                                                                                               \
   1842   X(av_frame_free)                                                                                                     \
   1843   X(av_frame_make_writable)                                                                                            \
   1844   X(av_strerror)                                                                                                       \
   1845   X(av_reduce)                                                                                                         \
   1846   X(av_dict_parse_string)                                                                                              \
   1847   X(av_dict_get)                                                                                                       \
   1848   X(av_dict_free)                                                                                                      \
   1849   X(av_opt_set_int)                                                                                                    \
   1850   X(av_opt_set_sample_fmt)                                                                                             \
   1851   X(av_compare_ts)                                                                                                     \
   1852   X(av_get_bytes_per_sample)                                                                                           \
   1853   X(av_sample_fmt_is_planar)                                                                                           \
   1854   X(av_d2q)                                                                                                            \
   1855   X(av_hwdevice_get_type_name)                                                                                         \
   1856   X(av_hwdevice_ctx_create)                                                                                            \
   1857   X(av_hwframe_ctx_alloc)                                                                                              \
   1858   X(av_hwframe_ctx_init)                                                                                               \
   1859   X(av_hwframe_transfer_data)                                                                                          \
   1860   X(av_hwframe_get_buffer)                                                                                             \
   1861   X(av_buffer_ref)                                                                                                     \
   1862   X(av_buffer_unref)
   1863 
   1864 #define VISIT_SWSCALE_IMPORTS(X)                                                                                       \
   1865   X(sws_getCachedContext)                                                                                              \
   1866   X(sws_scale)                                                                                                         \
   1867   X(sws_freeContext)
   1868 
   1869 #define VISIT_SWRESAMPLE_IMPORTS(X)                                                                                    \
   1870   X(swr_alloc)                                                                                                         \
   1871   X(swr_init)                                                                                                          \
   1872   X(swr_free)                                                                                                          \
   1873   X(swr_convert)                                                                                                       \
   1874   X(swr_next_pts)
   1875 
   1876 class MediaCaptureFFmpeg final : public MediaCaptureBase
   1877 {
   1878 public:
   1879   ~MediaCaptureFFmpeg() override = default;
   1880 
   1881   static std::unique_ptr<MediaCapture> Create(Error* error);
   1882   static ContainerList GetContainerList();
   1883   static CodecList GetVideoCodecList(const char* container);
   1884   static CodecList GetAudioCodecList(const char* container);
   1885 
   1886   bool IsCapturingAudio() const override;
   1887   bool IsCapturingVideo() const override;
   1888   time_t GetElapsedTime() const override;
   1889 
   1890 protected:
   1891   void ClearState() override;
   1892   bool SendFrame(const PendingFrame& pf, Error* error) override;
   1893   bool ProcessAudioPackets(s64 video_pts, Error* error) override;
   1894   bool InternalBeginCapture(float fps, float aspect, u32 sample_rate, bool capture_video, std::string_view video_codec,
   1895                             u32 video_bitrate, std::string_view video_codec_args, bool capture_audio,
   1896                             std::string_view audio_codec, u32 audio_bitrate, std::string_view audio_codec_args,
   1897                             Error* error) override;
   1898   bool InternalEndCapture(std::unique_lock<std::mutex>& lock, Error* error) override;
   1899 
   1900 private:
   1901   static void SetAVError(Error* error, std::string_view prefix, int errnum);
   1902   static CodecList GetCodecListForContainer(const char* container, AVMediaType type);
   1903 
   1904   bool IsUsingHardwareVideoEncoding();
   1905 
   1906   bool ReceivePackets(AVCodecContext* codec_context, AVStream* stream, AVPacket* packet, Error* error);
   1907 
   1908   AVFormatContext* m_format_context = nullptr;
   1909 
   1910   AVCodecContext* m_video_codec_context = nullptr;
   1911   AVStream* m_video_stream = nullptr;
   1912   AVFrame* m_converted_video_frame = nullptr; // YUV
   1913   AVFrame* m_hw_video_frame = nullptr;
   1914   AVPacket* m_video_packet = nullptr;
   1915   SwsContext* m_sws_context = nullptr;
   1916   AVDictionary* m_video_codec_arguments = nullptr;
   1917   AVBufferRef* m_video_hw_context = nullptr;
   1918   AVBufferRef* m_video_hw_frames = nullptr;
   1919 
   1920   AVCodecContext* m_audio_codec_context = nullptr;
   1921   AVStream* m_audio_stream = nullptr;
   1922   AVFrame* m_converted_audio_frame = nullptr;
   1923   AVPacket* m_audio_packet = nullptr;
   1924   SwrContext* m_swr_context = nullptr;
   1925   AVDictionary* m_audio_codec_arguments = nullptr;
   1926 
   1927   AVPixelFormat m_video_pixel_format = AV_PIX_FMT_NONE;
   1928   u32 m_audio_frame_bps = 0;
   1929   bool m_audio_frame_planar = false;
   1930 
   1931 #define DECLARE_IMPORT(X) static inline decltype(X)* wrap_##X;
   1932   VISIT_AVCODEC_IMPORTS(DECLARE_IMPORT);
   1933   VISIT_AVFORMAT_IMPORTS(DECLARE_IMPORT);
   1934   VISIT_AVUTIL_IMPORTS(DECLARE_IMPORT);
   1935   VISIT_SWSCALE_IMPORTS(DECLARE_IMPORT);
   1936   VISIT_SWRESAMPLE_IMPORTS(DECLARE_IMPORT);
   1937 #undef DECLARE_IMPORT
   1938 
   1939   static bool LoadFFmpeg(Error* error);
   1940   static void UnloadFFmpeg();
   1941 
   1942   static inline DynamicLibrary s_avcodec_library;
   1943   static inline DynamicLibrary s_avformat_library;
   1944   static inline DynamicLibrary s_avutil_library;
   1945   static inline DynamicLibrary s_swscale_library;
   1946   static inline DynamicLibrary s_swresample_library;
   1947   static inline bool s_library_loaded = false;
   1948 };
   1949 
   1950 bool MediaCaptureFFmpeg::LoadFFmpeg(Error* error)
   1951 {
   1952   std::unique_lock lock(s_load_mutex);
   1953   if (s_library_loaded)
   1954     return true;
   1955 
   1956   static constexpr auto open_dynlib = [](DynamicLibrary& lib, const char* name, int major_version, Error* error) {
   1957     std::string full_name(DynamicLibrary::GetVersionedFilename(name, major_version));
   1958     return lib.Open(full_name.c_str(), error);
   1959   };
   1960 
   1961   bool result = true;
   1962 
   1963   result = result && open_dynlib(s_avutil_library, "avutil", LIBAVUTIL_VERSION_MAJOR, error);
   1964   result = result && open_dynlib(s_avcodec_library, "avcodec", LIBAVCODEC_VERSION_MAJOR, error);
   1965   result = result && open_dynlib(s_avformat_library, "avformat", LIBAVFORMAT_VERSION_MAJOR, error);
   1966   result = result && open_dynlib(s_swscale_library, "swscale", LIBSWSCALE_VERSION_MAJOR, error);
   1967   result = result && open_dynlib(s_swresample_library, "swresample", LIBSWRESAMPLE_VERSION_MAJOR, error);
   1968 
   1969 #define RESOLVE_IMPORT(X) result = result && s_avcodec_library.GetSymbol(#X, &wrap_##X);
   1970   VISIT_AVCODEC_IMPORTS(RESOLVE_IMPORT);
   1971 #undef RESOLVE_IMPORT
   1972 
   1973 #define RESOLVE_IMPORT(X) result = result && s_avformat_library.GetSymbol(#X, &wrap_##X);
   1974   VISIT_AVFORMAT_IMPORTS(RESOLVE_IMPORT);
   1975 #undef RESOLVE_IMPORT
   1976 
   1977 #define RESOLVE_IMPORT(X) result = result && s_avutil_library.GetSymbol(#X, &wrap_##X);
   1978   VISIT_AVUTIL_IMPORTS(RESOLVE_IMPORT);
   1979 #undef RESOLVE_IMPORT
   1980 
   1981 #define RESOLVE_IMPORT(X) result = result && s_swscale_library.GetSymbol(#X, &wrap_##X);
   1982   VISIT_SWSCALE_IMPORTS(RESOLVE_IMPORT);
   1983 #undef RESOLVE_IMPORT
   1984 
   1985 #define RESOLVE_IMPORT(X) result = result && s_swresample_library.GetSymbol(#X, &wrap_##X);
   1986   VISIT_SWRESAMPLE_IMPORTS(RESOLVE_IMPORT);
   1987 #undef RESOLVE_IMPORT
   1988 
   1989   if (result)
   1990   {
   1991     s_library_loaded = true;
   1992     std::atexit(&MediaCaptureFFmpeg::UnloadFFmpeg);
   1993     return true;
   1994   }
   1995 
   1996   UnloadFFmpeg();
   1997 
   1998   Error::SetStringFmt(
   1999     error,
   2000     TRANSLATE_FS(
   2001       "MediaCapture",
   2002       "You may be missing one or more files, or are using the incorrect version. This build of DuckStation requires:\n"
   2003       "  libavcodec: {}\n"
   2004       "  libavformat: {}\n"
   2005       "  libavutil: {}\n"
   2006       "  libswscale: {}\n"
   2007       "  libswresample: {}\n"),
   2008     LIBAVCODEC_VERSION_MAJOR, LIBAVFORMAT_VERSION_MAJOR, LIBAVUTIL_VERSION_MAJOR, LIBSWSCALE_VERSION_MAJOR,
   2009     LIBSWRESAMPLE_VERSION_MAJOR);
   2010   return false;
   2011 }
   2012 
   2013 void MediaCaptureFFmpeg::UnloadFFmpeg()
   2014 {
   2015 #define CLEAR_IMPORT(X) wrap_##X = nullptr;
   2016   VISIT_AVCODEC_IMPORTS(CLEAR_IMPORT);
   2017   VISIT_AVFORMAT_IMPORTS(CLEAR_IMPORT);
   2018   VISIT_AVUTIL_IMPORTS(CLEAR_IMPORT);
   2019   VISIT_SWSCALE_IMPORTS(CLEAR_IMPORT);
   2020   VISIT_SWRESAMPLE_IMPORTS(CLEAR_IMPORT);
   2021 #undef CLEAR_IMPORT
   2022 
   2023   s_swresample_library.Close();
   2024   s_swscale_library.Close();
   2025   s_avutil_library.Close();
   2026   s_avformat_library.Close();
   2027   s_avcodec_library.Close();
   2028   s_library_loaded = false;
   2029 }
   2030 
   2031 #undef VISIT_AVCODEC_IMPORTS
   2032 #undef VISIT_AVFORMAT_IMPORTS
   2033 #undef VISIT_AVUTIL_IMPORTS
   2034 #undef VISIT_SWSCALE_IMPORTS
   2035 #undef VISIT_SWRESAMPLE_IMPORTS
   2036 
   2037 void MediaCaptureFFmpeg::SetAVError(Error* error, std::string_view prefix, int errnum)
   2038 {
   2039   char errbuf[128];
   2040   wrap_av_strerror(errnum, errbuf, sizeof(errbuf));
   2041 
   2042   Error::SetStringFmt(error, "{} {}", prefix, errbuf);
   2043 }
   2044 
   2045 bool MediaCaptureFFmpeg::IsCapturingAudio() const
   2046 {
   2047   return (m_audio_stream != nullptr);
   2048 }
   2049 
   2050 bool MediaCaptureFFmpeg::IsCapturingVideo() const
   2051 {
   2052   return (m_video_stream != nullptr);
   2053 }
   2054 
   2055 time_t MediaCaptureFFmpeg::GetElapsedTime() const
   2056 {
   2057   std::unique_lock<std::mutex> lock(m_lock);
   2058   s64 seconds;
   2059   if (m_video_stream)
   2060   {
   2061     seconds = (m_next_video_pts * static_cast<s64>(m_video_codec_context->time_base.num)) /
   2062               static_cast<s64>(m_video_codec_context->time_base.den);
   2063   }
   2064   else
   2065   {
   2066     DebugAssert(IsCapturingAudio());
   2067     seconds = (m_next_audio_pts * static_cast<s64>(m_audio_codec_context->time_base.num)) /
   2068               static_cast<s64>(m_audio_codec_context->time_base.den);
   2069   }
   2070 
   2071   return seconds;
   2072 }
   2073 
   2074 bool MediaCaptureFFmpeg::IsUsingHardwareVideoEncoding()
   2075 {
   2076   return (m_video_hw_context != nullptr);
   2077 }
   2078 
   2079 bool MediaCaptureFFmpeg::InternalBeginCapture(float fps, float aspect, u32 sample_rate, bool capture_video,
   2080                                               std::string_view video_codec, u32 video_bitrate,
   2081                                               std::string_view video_codec_args, bool capture_audio,
   2082                                               std::string_view audio_codec, u32 audio_bitrate,
   2083                                               std::string_view audio_codec_args, Error* error)
   2084 {
   2085   ff_const59 AVOutputFormat* output_format = wrap_av_guess_format(nullptr, m_path.c_str(), nullptr);
   2086   if (!output_format)
   2087   {
   2088     Error::SetStringFmt(error, "Failed to get output format for '{}'", Path::GetFileName(m_path));
   2089     return false;
   2090   }
   2091 
   2092   int res = wrap_avformat_alloc_output_context2(&m_format_context, output_format, nullptr, m_path.c_str());
   2093   if (res < 0)
   2094   {
   2095     SetAVError(error, "avformat_alloc_output_context2() failed: ", res);
   2096     return false;
   2097   }
   2098 
   2099   // find the codec id
   2100   if (capture_video)
   2101   {
   2102     const AVCodec* vcodec = nullptr;
   2103     if (!video_codec.empty())
   2104     {
   2105       vcodec = wrap_avcodec_find_encoder_by_name(TinyString(video_codec).c_str());
   2106       if (!vcodec)
   2107       {
   2108         Error::SetStringFmt(error, "Video codec {} not found.", video_codec);
   2109         return false;
   2110       }
   2111     }
   2112 
   2113     // FFmpeg decides whether mp4, mkv, etc should use h264 or mpeg4 as their default codec by whether x264 was enabled
   2114     // But there's a lot of other h264 encoders (e.g. hardware encoders) we may want to use instead
   2115     if (!vcodec && wrap_avformat_query_codec(output_format, AV_CODEC_ID_H264, FF_COMPLIANCE_NORMAL))
   2116       vcodec = wrap_avcodec_find_encoder(AV_CODEC_ID_H264);
   2117     if (!vcodec)
   2118       vcodec = wrap_avcodec_find_encoder(output_format->video_codec);
   2119 
   2120     if (!vcodec)
   2121     {
   2122       Error::SetStringView(error, "Failed to find video encoder.");
   2123       return false;
   2124     }
   2125 
   2126     m_video_codec_context = wrap_avcodec_alloc_context3(vcodec);
   2127     if (!m_video_codec_context)
   2128     {
   2129       Error::SetStringView(error, "Failed to allocate video codec context.");
   2130       return false;
   2131     }
   2132 
   2133     m_video_codec_context->codec_type = AVMEDIA_TYPE_VIDEO;
   2134     m_video_codec_context->bit_rate = video_bitrate * 1000;
   2135     m_video_codec_context->width = m_video_width;
   2136     m_video_codec_context->height = m_video_height;
   2137     m_video_codec_context->sample_aspect_ratio = wrap_av_d2q(aspect, 100000);
   2138     wrap_av_reduce(&m_video_codec_context->time_base.num, &m_video_codec_context->time_base.den, 10000,
   2139                    static_cast<s64>(static_cast<double>(fps) * 10000.0), std::numeric_limits<s32>::max());
   2140 
   2141     // Map input pixel format.
   2142     static constexpr const std::pair<GPUTexture::Format, AVPixelFormat> texture_pf_mapping[] = {
   2143       {GPUTexture::Format::RGBA8, AV_PIX_FMT_RGBA},
   2144       {GPUTexture::Format::BGRA8, AV_PIX_FMT_BGRA},
   2145     };
   2146     if (const auto pf_mapping =
   2147           std::find_if(std::begin(texture_pf_mapping), std::end(texture_pf_mapping),
   2148                        [this](const auto& it) { return (it.first == m_video_render_texture_format); });
   2149         pf_mapping != std::end(texture_pf_mapping))
   2150     {
   2151       m_video_pixel_format = pf_mapping->second;
   2152     }
   2153     else
   2154     {
   2155       Error::SetStringFmt(error, "Unhandled input pixel format {}",
   2156                           GPUTexture::GetFormatName(m_video_render_texture_format));
   2157       return false;
   2158     }
   2159 
   2160     // Default to YUV 4:2:0 if the codec doesn't specify a pixel format.
   2161     AVPixelFormat sw_pix_fmt = AV_PIX_FMT_YUV420P;
   2162     if (vcodec->pix_fmts)
   2163     {
   2164       // Prefer YUV420 given the choice, but otherwise fall back to whatever it supports.
   2165       sw_pix_fmt = vcodec->pix_fmts[0];
   2166       for (u32 i = 0; vcodec->pix_fmts[i] != AV_PIX_FMT_NONE; i++)
   2167       {
   2168         if (vcodec->pix_fmts[i] == AV_PIX_FMT_YUV420P)
   2169         {
   2170           sw_pix_fmt = vcodec->pix_fmts[i];
   2171           break;
   2172         }
   2173       }
   2174     }
   2175     m_video_codec_context->pix_fmt = sw_pix_fmt;
   2176 
   2177     // Can we use hardware encoding?
   2178     const AVCodecHWConfig* hwconfig = wrap_avcodec_get_hw_config(vcodec, 0);
   2179     if (hwconfig && hwconfig->pix_fmt != AV_PIX_FMT_NONE && hwconfig->pix_fmt != sw_pix_fmt)
   2180     {
   2181       // First index isn't our preferred pixel format, try the others, but fall back if one doesn't exist.
   2182       int index = 1;
   2183       while (const AVCodecHWConfig* next_hwconfig = wrap_avcodec_get_hw_config(vcodec, index++))
   2184       {
   2185         if (next_hwconfig->pix_fmt == sw_pix_fmt)
   2186         {
   2187           hwconfig = next_hwconfig;
   2188           break;
   2189         }
   2190       }
   2191     }
   2192 
   2193     if (hwconfig)
   2194     {
   2195       Error hw_error;
   2196 
   2197       INFO_LOG("Trying to use {} hardware device for video encoding.",
   2198                wrap_av_hwdevice_get_type_name(hwconfig->device_type));
   2199       res = wrap_av_hwdevice_ctx_create(&m_video_hw_context, hwconfig->device_type, nullptr, nullptr, 0);
   2200       if (res < 0)
   2201       {
   2202         SetAVError(&hw_error, "av_hwdevice_ctx_create() failed: ", res);
   2203         ERROR_LOG(hw_error.GetDescription());
   2204       }
   2205       else
   2206       {
   2207         m_video_hw_frames = wrap_av_hwframe_ctx_alloc(m_video_hw_context);
   2208         if (!m_video_hw_frames)
   2209         {
   2210           ERROR_LOG("s_video_hw_frames() failed");
   2211           wrap_av_buffer_unref(&m_video_hw_context);
   2212         }
   2213         else
   2214         {
   2215           AVHWFramesContext* frames_ctx = reinterpret_cast<AVHWFramesContext*>(m_video_hw_frames->data);
   2216           frames_ctx->format = (hwconfig->pix_fmt != AV_PIX_FMT_NONE) ? hwconfig->pix_fmt : sw_pix_fmt;
   2217           frames_ctx->sw_format = sw_pix_fmt;
   2218           frames_ctx->width = m_video_codec_context->width;
   2219           frames_ctx->height = m_video_codec_context->height;
   2220           res = wrap_av_hwframe_ctx_init(m_video_hw_frames);
   2221           if (res < 0)
   2222           {
   2223             SetAVError(&hw_error, "av_hwframe_ctx_init() failed: ", res);
   2224             ERROR_LOG(hw_error.GetDescription());
   2225             wrap_av_buffer_unref(&m_video_hw_frames);
   2226             wrap_av_buffer_unref(&m_video_hw_context);
   2227           }
   2228           else
   2229           {
   2230             m_video_codec_context->hw_frames_ctx = wrap_av_buffer_ref(m_video_hw_frames);
   2231             if (hwconfig->pix_fmt != AV_PIX_FMT_NONE)
   2232               m_video_codec_context->pix_fmt = hwconfig->pix_fmt;
   2233           }
   2234         }
   2235       }
   2236 
   2237       if (!m_video_hw_context)
   2238       {
   2239         ERROR_LOG("Failed to create hardware encoder, using software encoding.");
   2240         hwconfig = nullptr;
   2241       }
   2242     }
   2243 
   2244     if (!video_codec_args.empty())
   2245     {
   2246       res = wrap_av_dict_parse_string(&m_video_codec_arguments, SmallString(video_codec_args).c_str(), "=", ":", 0);
   2247       if (res < 0)
   2248       {
   2249         SetAVError(error, "av_dict_parse_string() for video failed: ", res);
   2250         return false;
   2251       }
   2252     }
   2253 
   2254     if (output_format->flags & AVFMT_GLOBALHEADER)
   2255       m_video_codec_context->flags |= AV_CODEC_FLAG_GLOBAL_HEADER;
   2256 
   2257     bool has_pixel_format_override = wrap_av_dict_get(m_video_codec_arguments, "pixel_format", nullptr, 0);
   2258 
   2259     res = wrap_avcodec_open2(m_video_codec_context, vcodec, &m_video_codec_arguments);
   2260     if (res < 0)
   2261     {
   2262       SetAVError(error, "avcodec_open2() for video failed: ", res);
   2263       return false;
   2264     }
   2265 
   2266     // If the user overrode the pixel format, get that now
   2267     if (has_pixel_format_override)
   2268       sw_pix_fmt = m_video_codec_context->pix_fmt;
   2269 
   2270     m_converted_video_frame = wrap_av_frame_alloc();
   2271     m_hw_video_frame = IsUsingHardwareVideoEncoding() ? wrap_av_frame_alloc() : nullptr;
   2272     if (!m_converted_video_frame || (IsUsingHardwareVideoEncoding() && !m_hw_video_frame))
   2273     {
   2274       SetAVError(error, "Failed to allocate frame: ", AVERROR(ENOMEM));
   2275       return false;
   2276     }
   2277 
   2278     m_converted_video_frame->format = sw_pix_fmt;
   2279     m_converted_video_frame->width = m_video_codec_context->width;
   2280     m_converted_video_frame->height = m_video_codec_context->height;
   2281     res = wrap_av_frame_get_buffer(m_converted_video_frame, 0);
   2282     if (res < 0)
   2283     {
   2284       SetAVError(error, "av_frame_get_buffer() for converted frame failed: ", res);
   2285       return false;
   2286     }
   2287 
   2288     if (IsUsingHardwareVideoEncoding())
   2289     {
   2290       m_hw_video_frame->format = m_video_codec_context->pix_fmt;
   2291       m_hw_video_frame->width = m_video_codec_context->width;
   2292       m_hw_video_frame->height = m_video_codec_context->height;
   2293       res = wrap_av_hwframe_get_buffer(m_video_hw_frames, m_hw_video_frame, 0);
   2294       if (res < 0)
   2295       {
   2296         SetAVError(error, "av_frame_get_buffer() for HW frame failed: ", res);
   2297         return false;
   2298       }
   2299     }
   2300 
   2301     m_video_stream = wrap_avformat_new_stream(m_format_context, vcodec);
   2302     if (!m_video_stream)
   2303     {
   2304       SetAVError(error, "avformat_new_stream() for video failed: ", res);
   2305       return false;
   2306     }
   2307 
   2308     res = wrap_avcodec_parameters_from_context(m_video_stream->codecpar, m_video_codec_context);
   2309     if (res < 0)
   2310     {
   2311       SetAVError(error, "avcodec_parameters_from_context() for video failed: ", AVERROR(ENOMEM));
   2312       return false;
   2313     }
   2314 
   2315     m_video_stream->time_base = m_video_codec_context->time_base;
   2316     m_video_stream->sample_aspect_ratio = m_video_codec_context->sample_aspect_ratio;
   2317 
   2318     m_video_packet = wrap_av_packet_alloc();
   2319     if (!m_video_packet)
   2320     {
   2321       SetAVError(error, "av_packet_alloc() for video failed: ", AVERROR(ENOMEM));
   2322       return false;
   2323     }
   2324   }
   2325 
   2326   if (capture_audio)
   2327   {
   2328     const AVCodec* acodec = nullptr;
   2329     if (!audio_codec.empty())
   2330     {
   2331       acodec = wrap_avcodec_find_encoder_by_name(TinyString(audio_codec).c_str());
   2332       if (!acodec)
   2333       {
   2334         Error::SetStringFmt(error, "Audio codec {} not found.", video_codec);
   2335         return false;
   2336       }
   2337     }
   2338     if (!acodec)
   2339       acodec = wrap_avcodec_find_encoder(output_format->audio_codec);
   2340     if (!acodec)
   2341     {
   2342       Error::SetStringView(error, "Failed to find audio encoder.");
   2343       return false;
   2344     }
   2345 
   2346     m_audio_codec_context = wrap_avcodec_alloc_context3(acodec);
   2347     if (!m_audio_codec_context)
   2348     {
   2349       Error::SetStringView(error, "Failed to allocate audio codec context.");
   2350       return false;
   2351     }
   2352 
   2353     m_audio_codec_context->codec_type = AVMEDIA_TYPE_AUDIO;
   2354     m_audio_codec_context->bit_rate = audio_bitrate * 1000;
   2355     m_audio_codec_context->sample_fmt = AV_SAMPLE_FMT_S16;
   2356     m_audio_codec_context->sample_rate = sample_rate;
   2357     m_audio_codec_context->time_base = {1, static_cast<int>(sample_rate)};
   2358 #if LIBAVUTIL_VERSION_MAJOR < 57
   2359     m_audio_codec_context->channels = AUDIO_CHANNELS;
   2360     m_audio_codec_context->channel_layout = AV_CH_LAYOUT_STEREO;
   2361 #else
   2362     wrap_av_channel_layout_default(&m_audio_codec_context->ch_layout, AUDIO_CHANNELS);
   2363 #endif
   2364 
   2365     bool supports_format = false;
   2366     for (const AVSampleFormat* p = acodec->sample_fmts; *p != AV_SAMPLE_FMT_NONE; p++)
   2367     {
   2368       if (*p == m_audio_codec_context->sample_fmt)
   2369       {
   2370         supports_format = true;
   2371         break;
   2372       }
   2373     }
   2374     if (!supports_format)
   2375     {
   2376       WARNING_LOG("Audio codec '{}' does not support S16 samples, using default.", acodec->name);
   2377       m_audio_codec_context->sample_fmt = acodec->sample_fmts[0];
   2378       m_swr_context = wrap_swr_alloc();
   2379       if (!m_swr_context)
   2380       {
   2381         SetAVError(error, "swr_alloc() failed: ", AVERROR(ENOMEM));
   2382         return false;
   2383       }
   2384 
   2385       wrap_av_opt_set_int(m_swr_context, "in_channel_count", AUDIO_CHANNELS, 0);
   2386       wrap_av_opt_set_int(m_swr_context, "in_sample_rate", sample_rate, 0);
   2387       wrap_av_opt_set_sample_fmt(m_swr_context, "in_sample_fmt", AV_SAMPLE_FMT_S16, 0);
   2388       wrap_av_opt_set_int(m_swr_context, "out_channel_count", AUDIO_CHANNELS, 0);
   2389       wrap_av_opt_set_int(m_swr_context, "out_sample_rate", sample_rate, 0);
   2390       wrap_av_opt_set_sample_fmt(m_swr_context, "out_sample_fmt", m_audio_codec_context->sample_fmt, 0);
   2391 
   2392 #if LIBAVUTIL_VERSION_MAJOR >= 59
   2393       wrap_av_opt_set_chlayout(m_swr_context, "in_chlayout", &m_audio_codec_context->ch_layout, 0);
   2394       wrap_av_opt_set_chlayout(m_swr_context, "out_chlayout", &m_audio_codec_context->ch_layout, 0);
   2395 #endif
   2396 
   2397       res = wrap_swr_init(m_swr_context);
   2398       if (res < 0)
   2399       {
   2400         SetAVError(error, "swr_init() failed: ", res);
   2401         return false;
   2402       }
   2403     }
   2404 
   2405     // TODO: Check channel layout support
   2406 
   2407     if (!audio_codec_args.empty())
   2408     {
   2409       res = wrap_av_dict_parse_string(&m_audio_codec_arguments, SmallString(audio_codec_args).c_str(), "=", ":", 0);
   2410       if (res < 0)
   2411       {
   2412         SetAVError(error, "av_dict_parse_string() for audio failed: ", res);
   2413         return false;
   2414       }
   2415     }
   2416 
   2417     if (output_format->flags & AVFMT_GLOBALHEADER)
   2418       m_audio_codec_context->flags |= AV_CODEC_FLAG_GLOBAL_HEADER;
   2419 
   2420     res = wrap_avcodec_open2(m_audio_codec_context, acodec, &m_audio_codec_arguments);
   2421     if (res < 0)
   2422     {
   2423       SetAVError(error, "avcodec_open2() for audio failed: ", res);
   2424       return false;
   2425     }
   2426 
   2427     // Use packet size for frame if it supports it... but most don't.
   2428     if (acodec->capabilities & AV_CODEC_CAP_VARIABLE_FRAME_SIZE)
   2429       m_audio_frame_size = static_cast<u32>(static_cast<float>(sample_rate) / fps);
   2430     else
   2431       m_audio_frame_size = m_audio_codec_context->frame_size;
   2432     if (m_audio_frame_size >= m_audio_buffer.size())
   2433     {
   2434       SetAVError(error,
   2435                  TinyString::from_format("Audio frame size {} exceeds buffer size {}", m_audio_frame_size,
   2436                                          m_audio_buffer.size()),
   2437                  AVERROR(EINVAL));
   2438       return false;
   2439     }
   2440 
   2441     m_audio_frame_bps = wrap_av_get_bytes_per_sample(m_audio_codec_context->sample_fmt);
   2442     m_audio_frame_planar = (wrap_av_sample_fmt_is_planar(m_audio_codec_context->sample_fmt) != 0);
   2443 
   2444     m_converted_audio_frame = wrap_av_frame_alloc();
   2445     if (!m_converted_audio_frame)
   2446     {
   2447       SetAVError(error, "Failed to allocate audio frame: ", AVERROR(ENOMEM));
   2448       return false;
   2449     }
   2450 
   2451     m_converted_audio_frame->format = m_audio_codec_context->sample_fmt;
   2452     m_converted_audio_frame->nb_samples = m_audio_frame_size;
   2453 #if LIBAVUTIL_VERSION_MAJOR < 57
   2454     m_converted_audio_frame->channels = AUDIO_CHANNELS;
   2455     m_converted_audio_frame->channel_layout = m_audio_codec_context->channel_layout;
   2456 #else
   2457     wrap_av_channel_layout_copy(&m_converted_audio_frame->ch_layout, &m_audio_codec_context->ch_layout);
   2458 #endif
   2459     res = wrap_av_frame_get_buffer(m_converted_audio_frame, 0);
   2460     if (res < 0)
   2461     {
   2462       SetAVError(error, "av_frame_get_buffer() for audio frame failed: ", res);
   2463       return false;
   2464     }
   2465 
   2466     m_audio_stream = wrap_avformat_new_stream(m_format_context, acodec);
   2467     if (!m_audio_stream)
   2468     {
   2469       SetAVError(error, "avformat_new_stream() for audio failed: ", AVERROR(ENOMEM));
   2470       return false;
   2471     }
   2472 
   2473     res = wrap_avcodec_parameters_from_context(m_audio_stream->codecpar, m_audio_codec_context);
   2474     if (res < 0)
   2475     {
   2476       SetAVError(error, "avcodec_parameters_from_context() for audio failed: ", res);
   2477       return false;
   2478     }
   2479 
   2480     m_audio_stream->time_base = m_audio_codec_context->time_base;
   2481 
   2482     m_audio_packet = wrap_av_packet_alloc();
   2483     if (!m_audio_packet)
   2484     {
   2485       SetAVError(error, "av_packet_alloc() for audio failed: ", AVERROR(ENOMEM));
   2486       return false;
   2487     }
   2488   }
   2489 
   2490   res = wrap_avio_open(&m_format_context->pb, m_path.c_str(), AVIO_FLAG_WRITE);
   2491   if (res < 0)
   2492   {
   2493     SetAVError(error, "avio_open() failed: ", res);
   2494     return false;
   2495   }
   2496 
   2497   res = wrap_avformat_write_header(m_format_context, nullptr);
   2498   if (res < 0)
   2499   {
   2500     SetAVError(error, "avformat_write_header() failed: ", res);
   2501     return false;
   2502   }
   2503 
   2504   return true;
   2505 }
   2506 
   2507 bool MediaCaptureFFmpeg::InternalEndCapture(std::unique_lock<std::mutex>& lock, Error* error)
   2508 {
   2509   int res = MediaCaptureBase::InternalEndCapture(lock, error) ? 0 : -1;
   2510   if (res == 0)
   2511   {
   2512     // end of stream
   2513     if (m_video_stream)
   2514     {
   2515       res = wrap_avcodec_send_frame(m_video_codec_context, nullptr);
   2516       if (res < 0)
   2517         SetAVError(error, "avcodec_send_frame() for video EOS failed: ", res);
   2518       else
   2519         res = ReceivePackets(m_video_codec_context, m_video_stream, m_video_packet, error) ? 0 : -1;
   2520     }
   2521     if (m_audio_stream)
   2522     {
   2523       res = wrap_avcodec_send_frame(m_audio_codec_context, nullptr);
   2524       if (res < 0)
   2525         SetAVError(error, "avcodec_send_frame() for audio EOS failed: ", res);
   2526       else
   2527         res = ReceivePackets(m_audio_codec_context, m_audio_stream, m_audio_packet, error) ? 0 : -1;
   2528     }
   2529 
   2530     // end of file!
   2531     if (res == 0)
   2532     {
   2533       res = wrap_av_write_trailer(m_format_context);
   2534       if (res < 0)
   2535         SetAVError(error, "av_write_trailer() failed: ", res);
   2536     }
   2537   }
   2538 
   2539   return (res == 0);
   2540 }
   2541 
   2542 void MediaCaptureFFmpeg::ClearState()
   2543 {
   2544   if (m_format_context)
   2545   {
   2546     int res = wrap_avio_closep(&m_format_context->pb);
   2547     if (res < 0) [[unlikely]]
   2548     {
   2549       Error close_error;
   2550       SetAVError(&close_error, "avio_closep() failed: ", res);
   2551       ERROR_LOG(close_error.GetDescription());
   2552     }
   2553   }
   2554 
   2555   if (m_sws_context)
   2556   {
   2557     wrap_sws_freeContext(m_sws_context);
   2558     m_sws_context = nullptr;
   2559   }
   2560   if (m_video_packet)
   2561     wrap_av_packet_free(&m_video_packet);
   2562   if (m_converted_video_frame)
   2563     wrap_av_frame_free(&m_converted_video_frame);
   2564   if (m_hw_video_frame)
   2565     wrap_av_frame_free(&m_hw_video_frame);
   2566   if (m_video_hw_frames)
   2567     wrap_av_buffer_unref(&m_video_hw_frames);
   2568   if (m_video_hw_context)
   2569     wrap_av_buffer_unref(&m_video_hw_context);
   2570   if (m_video_codec_context)
   2571     wrap_avcodec_free_context(&m_video_codec_context);
   2572   m_video_stream = nullptr;
   2573 
   2574   if (m_swr_context)
   2575     wrap_swr_free(&m_swr_context);
   2576   if (m_audio_packet)
   2577     wrap_av_packet_free(&m_audio_packet);
   2578   if (m_converted_audio_frame)
   2579     wrap_av_frame_free(&m_converted_audio_frame);
   2580   if (m_audio_codec_context)
   2581     wrap_avcodec_free_context(&m_audio_codec_context);
   2582   m_audio_stream = nullptr;
   2583 
   2584   if (m_format_context)
   2585   {
   2586     wrap_avformat_free_context(m_format_context);
   2587     m_format_context = nullptr;
   2588   }
   2589   if (m_video_codec_arguments)
   2590     wrap_av_dict_free(&m_video_codec_arguments);
   2591   if (m_audio_codec_arguments)
   2592     wrap_av_dict_free(&m_audio_codec_arguments);
   2593 }
   2594 
   2595 bool MediaCaptureFFmpeg::ReceivePackets(AVCodecContext* codec_context, AVStream* stream, AVPacket* packet, Error* error)
   2596 {
   2597   for (;;)
   2598   {
   2599     int res = wrap_avcodec_receive_packet(codec_context, packet);
   2600     if (res == AVERROR(EAGAIN) || res == AVERROR_EOF)
   2601     {
   2602       // no more data available
   2603       break;
   2604     }
   2605     else if (res < 0) [[unlikely]]
   2606     {
   2607       SetAVError(error, "avcodec_receive_packet() failed: ", res);
   2608       return false;
   2609     }
   2610 
   2611     packet->stream_index = stream->index;
   2612 
   2613     // in case the frame rate changed...
   2614     wrap_av_packet_rescale_ts(packet, codec_context->time_base, stream->time_base);
   2615 
   2616     res = wrap_av_interleaved_write_frame(m_format_context, packet);
   2617     if (res < 0) [[unlikely]]
   2618     {
   2619       SetAVError(error, "av_interleaved_write_frame() failed: ", res);
   2620       return false;
   2621     }
   2622 
   2623     wrap_av_packet_unref(packet);
   2624   }
   2625 
   2626   return true;
   2627 }
   2628 
   2629 bool MediaCaptureFFmpeg::SendFrame(const PendingFrame& pf, Error* error)
   2630 {
   2631   const u8* source_ptr = pf.tex->GetMapPointer();
   2632   const int source_width = static_cast<int>(pf.tex->GetWidth());
   2633   const int source_height = static_cast<int>(pf.tex->GetHeight());
   2634 
   2635   // OpenGL lower-left flip.
   2636   int source_pitch = static_cast<int>(pf.tex->GetMapPitch());
   2637   if (g_gpu_device->UsesLowerLeftOrigin())
   2638   {
   2639     source_ptr = source_ptr + static_cast<size_t>(source_pitch) * static_cast<u32>(source_height - 1);
   2640     source_pitch = -source_pitch;
   2641   }
   2642 
   2643   // In case a previous frame is still using the frame.
   2644   wrap_av_frame_make_writable(m_converted_video_frame);
   2645 
   2646   m_sws_context = wrap_sws_getCachedContext(m_sws_context, source_width, source_height, m_video_pixel_format,
   2647                                             m_converted_video_frame->width, m_converted_video_frame->height,
   2648                                             static_cast<AVPixelFormat>(m_converted_video_frame->format), SWS_BICUBIC,
   2649                                             nullptr, nullptr, nullptr);
   2650   if (!m_sws_context) [[unlikely]]
   2651   {
   2652     Error::SetStringView(error, "sws_getCachedContext() failed");
   2653     return false;
   2654   }
   2655 
   2656   wrap_sws_scale(m_sws_context, reinterpret_cast<const u8**>(&source_ptr), &source_pitch, 0, source_height,
   2657                  m_converted_video_frame->data, m_converted_video_frame->linesize);
   2658 
   2659   AVFrame* frame_to_send = m_converted_video_frame;
   2660   if (IsUsingHardwareVideoEncoding())
   2661   {
   2662     // Need to transfer the frame to hardware.
   2663     const int res = wrap_av_hwframe_transfer_data(m_hw_video_frame, m_converted_video_frame, 0);
   2664     if (res < 0) [[unlikely]]
   2665     {
   2666       SetAVError(error, "av_hwframe_transfer_data() failed: ", res);
   2667       return false;
   2668     }
   2669 
   2670     frame_to_send = m_hw_video_frame;
   2671   }
   2672 
   2673   // Set the correct PTS before handing it off.
   2674   frame_to_send->pts = pf.pts;
   2675 
   2676   const int res = wrap_avcodec_send_frame(m_video_codec_context, frame_to_send);
   2677   if (res < 0) [[unlikely]]
   2678   {
   2679     SetAVError(error, "avcodec_send_frame() failed: ", res);
   2680     return false;
   2681   }
   2682 
   2683   return ReceivePackets(m_video_codec_context, m_video_stream, m_video_packet, error);
   2684 }
   2685 
   2686 bool MediaCaptureFFmpeg::ProcessAudioPackets(s64 video_pts, Error* error)
   2687 {
   2688   const u32 max_audio_buffer_size = GetAudioBufferSizeInFrames();
   2689 
   2690   u32 pending_frames = m_audio_buffer_size.load(std::memory_order_acquire);
   2691   while (pending_frames > 0 &&
   2692          (!m_video_codec_context || wrap_av_compare_ts(video_pts, m_video_codec_context->time_base, m_next_audio_pts,
   2693                                                        m_audio_codec_context->time_base) > 0))
   2694   {
   2695     // In case the encoder is still using it.
   2696     if (m_audio_frame_pos == 0)
   2697       wrap_av_frame_make_writable(m_converted_audio_frame);
   2698 
   2699     // Grab as many source frames as we can.
   2700     const u32 contig_frames = std::min(pending_frames, max_audio_buffer_size - m_audio_buffer_read_pos);
   2701     const u32 this_batch = std::min(m_audio_frame_size - m_audio_frame_pos, contig_frames);
   2702 
   2703     // Do we need to convert the sample format?
   2704     if (!m_swr_context)
   2705     {
   2706       // No, just copy frames out of staging buffer.
   2707       if (m_audio_frame_planar)
   2708       {
   2709         // This is slow. Hopefully doesn't happen in too many configurations.
   2710         for (u32 i = 0; i < AUDIO_CHANNELS; i++)
   2711         {
   2712           u8* output = m_converted_audio_frame->data[i] + m_audio_frame_pos * m_audio_frame_bps;
   2713           const u8* input = reinterpret_cast<u8*>(&m_audio_buffer[m_audio_buffer_read_pos * AUDIO_CHANNELS + i]);
   2714           for (u32 j = 0; j < this_batch; j++)
   2715           {
   2716             std::memcpy(output, input, sizeof(s16));
   2717             input += sizeof(s16) * AUDIO_CHANNELS;
   2718             output += m_audio_frame_bps;
   2719           }
   2720         }
   2721       }
   2722       else
   2723       {
   2724         // Direct copy - optimal.
   2725         std::memcpy(m_converted_audio_frame->data[0] + m_audio_frame_pos * m_audio_frame_bps * AUDIO_CHANNELS,
   2726                     &m_audio_buffer[m_audio_buffer_read_pos * AUDIO_CHANNELS],
   2727                     this_batch * sizeof(s16) * AUDIO_CHANNELS);
   2728       }
   2729     }
   2730     else
   2731     {
   2732       // Use swresample to convert.
   2733       const u8* input = reinterpret_cast<u8*>(&m_audio_buffer[m_audio_buffer_read_pos * AUDIO_CHANNELS]);
   2734 
   2735       // Might be planar, so offset both buffers.
   2736       u8* output[AUDIO_CHANNELS];
   2737       if (m_audio_frame_planar)
   2738       {
   2739         for (u32 i = 0; i < AUDIO_CHANNELS; i++)
   2740           output[i] = m_converted_audio_frame->data[i] + (m_audio_frame_pos * m_audio_frame_bps);
   2741       }
   2742       else
   2743       {
   2744         output[0] = m_converted_audio_frame->data[0] + (m_audio_frame_pos * m_audio_frame_bps * AUDIO_CHANNELS);
   2745       }
   2746 
   2747       const int res = wrap_swr_convert(m_swr_context, output, this_batch, &input, this_batch);
   2748       if (res < 0)
   2749       {
   2750         SetAVError(error, "swr_convert() failed: ", res);
   2751         return false;
   2752       }
   2753     }
   2754 
   2755     m_audio_buffer_read_pos = (m_audio_buffer_read_pos + this_batch) % max_audio_buffer_size;
   2756     m_audio_buffer_size.fetch_sub(this_batch);
   2757     m_audio_frame_pos += this_batch;
   2758     pending_frames -= this_batch;
   2759 
   2760     // Do we have a complete frame?
   2761     if (m_audio_frame_pos == m_audio_frame_size)
   2762     {
   2763       m_audio_frame_pos = 0;
   2764 
   2765       if (!m_swr_context)
   2766       {
   2767         // PTS is simply frames.
   2768         m_converted_audio_frame->pts = m_next_audio_pts;
   2769       }
   2770       else
   2771       {
   2772         m_converted_audio_frame->pts = wrap_swr_next_pts(m_swr_context, m_next_audio_pts);
   2773       }
   2774 
   2775       // Increment PTS.
   2776       m_next_audio_pts += m_audio_frame_size;
   2777 
   2778       // Send off for encoding.
   2779       int res = wrap_avcodec_send_frame(m_audio_codec_context, m_converted_audio_frame);
   2780       if (res < 0) [[unlikely]]
   2781       {
   2782         SetAVError(error, "avcodec_send_frame() for audio failed: ", res);
   2783         return false;
   2784       }
   2785 
   2786       // Write any packets back to the output file.
   2787       if (!ReceivePackets(m_audio_codec_context, m_audio_stream, m_audio_packet, error)) [[unlikely]]
   2788         return false;
   2789     }
   2790   }
   2791 
   2792   return true;
   2793 }
   2794 
   2795 std::unique_ptr<MediaCapture> MediaCaptureFFmpeg::Create(Error* error)
   2796 {
   2797   if (!LoadFFmpeg(error))
   2798     return nullptr;
   2799 
   2800   return std::make_unique<MediaCaptureFFmpeg>();
   2801 }
   2802 
   2803 MediaCapture::ContainerList MediaCaptureFFmpeg::GetContainerList()
   2804 {
   2805   return {
   2806     {"avi", "Audio Video Interleave"}, {"mp4", "MPEG-4 Part 14"},         {"mkv", "Matroska Media Container"},
   2807     {"mov", "QuickTime File Format"},  {"mp3", "MPEG-2 Audio Layer III"}, {"wav", "Waveform Audio File Format"},
   2808   };
   2809 }
   2810 
   2811 MediaCaptureBase::CodecList MediaCaptureFFmpeg::GetCodecListForContainer(const char* container, AVMediaType type)
   2812 {
   2813   CodecList ret;
   2814 
   2815   Error error;
   2816   if (!LoadFFmpeg(&error))
   2817   {
   2818     ERROR_LOG("FFmpeg load failed: {}", error.GetDescription());
   2819     return ret;
   2820   }
   2821 
   2822   const AVOutputFormat* output_format =
   2823     wrap_av_guess_format(nullptr, fmt::format("video.{}", container ? container : "mp4").c_str(), nullptr);
   2824   if (!output_format)
   2825   {
   2826     ERROR_LOG("av_guess_format() failed");
   2827     return ret;
   2828   }
   2829 
   2830   void* iter = nullptr;
   2831   const AVCodec* codec;
   2832   while ((codec = wrap_av_codec_iterate(&iter)) != nullptr)
   2833   {
   2834     // only get audio codecs
   2835     if (codec->type != type || !wrap_avcodec_find_encoder(codec->id) || !wrap_avcodec_find_encoder_by_name(codec->name))
   2836       continue;
   2837 
   2838     if (!wrap_avformat_query_codec(output_format, codec->id, FF_COMPLIANCE_NORMAL))
   2839       continue;
   2840 
   2841     if (std::find_if(ret.begin(), ret.end(), [codec](const auto& it) { return it.first == codec->name; }) != ret.end())
   2842       continue;
   2843 
   2844     ret.emplace_back(codec->name, codec->long_name ? codec->long_name : codec->name);
   2845   }
   2846 
   2847   return ret;
   2848 }
   2849 
   2850 MediaCapture::CodecList MediaCaptureFFmpeg::GetVideoCodecList(const char* container)
   2851 {
   2852   return GetCodecListForContainer(container, AVMEDIA_TYPE_VIDEO);
   2853 }
   2854 
   2855 MediaCapture::CodecList MediaCaptureFFmpeg::GetAudioCodecList(const char* container)
   2856 {
   2857   return GetCodecListForContainer(container, AVMEDIA_TYPE_AUDIO);
   2858 }
   2859 
   2860 #endif
   2861 
   2862 } // namespace
   2863 
   2864 static constexpr const std::array s_backend_names = {
   2865 #ifdef _WIN32
   2866   "MediaFoundation",
   2867 #endif
   2868 #ifndef __ANDROID__
   2869   "FFmpeg",
   2870 #endif
   2871 };
   2872 static constexpr const std::array s_backend_display_names = {
   2873 #ifdef _WIN32
   2874   TRANSLATE_NOOP("MediaCapture", "Media Foundation"),
   2875 #endif
   2876 #ifndef __ANDROID__
   2877   TRANSLATE_NOOP("MediaCapture", "FFmpeg"),
   2878 #endif
   2879 };
   2880 static_assert(s_backend_names.size() == static_cast<size_t>(MediaCaptureBackend::MaxCount));
   2881 static_assert(s_backend_display_names.size() == static_cast<size_t>(MediaCaptureBackend::MaxCount));
   2882 
   2883 MediaCapture::~MediaCapture() = default;
   2884 
   2885 std::optional<MediaCaptureBackend> MediaCapture::ParseBackendName(const char* str)
   2886 {
   2887   int index = 0;
   2888   for (const char* name : s_backend_names)
   2889   {
   2890     if (std::strcmp(name, str) == 0)
   2891       return static_cast<MediaCaptureBackend>(index);
   2892 
   2893     index++;
   2894   }
   2895 
   2896   return std::nullopt;
   2897 }
   2898 
   2899 const char* MediaCapture::GetBackendName(MediaCaptureBackend backend)
   2900 {
   2901   return s_backend_names[static_cast<size_t>(backend)];
   2902 }
   2903 
   2904 const char* MediaCapture::GetBackendDisplayName(MediaCaptureBackend backend)
   2905 {
   2906   return Host::TranslateToCString("MediaCapture", s_backend_display_names[static_cast<size_t>(backend)]);
   2907 }
   2908 
   2909 void MediaCapture::AdjustVideoSize(u32* width, u32* height)
   2910 {
   2911   *width = Common::AlignUpPow2(*width, VIDEO_WIDTH_ALIGNMENT);
   2912   *height = Common::AlignUpPow2(*height, VIDEO_HEIGHT_ALIGNMENT);
   2913 }
   2914 
   2915 MediaCapture::ContainerList MediaCapture::GetContainerList(MediaCaptureBackend backend)
   2916 {
   2917   ContainerList ret;
   2918   switch (backend)
   2919   {
   2920 #ifdef _WIN32
   2921     case MediaCaptureBackend::MediaFoundation:
   2922       ret = MediaCaptureMF::GetContainerList();
   2923       break;
   2924 #endif
   2925 #ifndef __ANDROID__
   2926     case MediaCaptureBackend::FFmpeg:
   2927       ret = MediaCaptureFFmpeg::GetContainerList();
   2928       break;
   2929 #endif
   2930     default:
   2931       break;
   2932   }
   2933   return ret;
   2934 }
   2935 
   2936 MediaCapture::CodecList MediaCapture::GetVideoCodecList(MediaCaptureBackend backend, const char* container)
   2937 {
   2938   CodecList ret;
   2939   switch (backend)
   2940   {
   2941 #ifdef _WIN32
   2942     case MediaCaptureBackend::MediaFoundation:
   2943       ret = MediaCaptureMF::GetVideoCodecList(container);
   2944       break;
   2945 #endif
   2946 #ifndef __ANDROID__
   2947     case MediaCaptureBackend::FFmpeg:
   2948       ret = MediaCaptureFFmpeg::GetVideoCodecList(container);
   2949       break;
   2950 #endif
   2951     default:
   2952       break;
   2953   }
   2954   return ret;
   2955 }
   2956 
   2957 MediaCapture::CodecList MediaCapture::GetAudioCodecList(MediaCaptureBackend backend, const char* container)
   2958 {
   2959   CodecList ret;
   2960   switch (backend)
   2961   {
   2962 #ifdef _WIN32
   2963     case MediaCaptureBackend::MediaFoundation:
   2964       ret = MediaCaptureMF::GetAudioCodecList(container);
   2965       break;
   2966 #endif
   2967 #ifndef __ANDROID__
   2968     case MediaCaptureBackend::FFmpeg:
   2969       ret = MediaCaptureFFmpeg::GetAudioCodecList(container);
   2970       break;
   2971 #endif
   2972     default:
   2973       break;
   2974   }
   2975   return ret;
   2976 }
   2977 
   2978 std::unique_ptr<MediaCapture> MediaCapture::Create(MediaCaptureBackend backend, Error* error)
   2979 {
   2980   switch (backend)
   2981   {
   2982 #ifdef _WIN32
   2983     case MediaCaptureBackend::MediaFoundation:
   2984       return MediaCaptureMF::Create(error);
   2985 #endif
   2986 #ifndef __ANDROID__
   2987     case MediaCaptureBackend::FFmpeg:
   2988       return MediaCaptureFFmpeg::Create(error);
   2989 #endif
   2990     default:
   2991       return nullptr;
   2992   }
   2993 }