media_capture.cpp (104134B)
1 // SPDX-FileCopyrightText: 2019-2024 Connor McLaughlin <stenzek@gmail.com> 2 // SPDX-License-Identifier: (GPL-3.0 OR PolyForm-Strict-1.0.0) 3 4 #include "media_capture.h" 5 #include "gpu_device.h" 6 #include "host.h" 7 8 #include "common/align.h" 9 #include "common/assert.h" 10 #include "common/dynamic_library.h" 11 #include "common/error.h" 12 #include "common/file_system.h" 13 #include "common/gsvector.h" 14 #include "common/log.h" 15 #include "common/path.h" 16 #include "common/string_util.h" 17 #include "common/threading.h" 18 19 #include "IconsFontAwesome5.h" 20 #include "fmt/format.h" 21 22 #include <algorithm> 23 #include <atomic> 24 #include <condition_variable> 25 #include <cstring> 26 #include <deque> 27 #include <limits> 28 #include <mutex> 29 #include <string> 30 #include <thread> 31 32 #ifdef _WIN32 33 #include "common/windows_headers.h" 34 35 #include <Mferror.h> 36 #include <codecapi.h> 37 #include <mfapi.h> 38 #include <mfidl.h> 39 #include <mfreadwrite.h> 40 #include <wrl/client.h> 41 42 #pragma comment(lib, "mfuuid") 43 #endif 44 45 #ifndef __ANDROID__ 46 47 #ifdef _MSC_VER 48 #pragma warning(push) 49 #pragma warning(disable : 4244) // warning C4244: 'return': conversion from 'int' to 'uint8_t', possible loss of data 50 #endif 51 52 extern "C" { 53 #include "libavcodec/avcodec.h" 54 #include "libavcodec/version.h" 55 #include "libavformat/avformat.h" 56 #include "libavformat/version.h" 57 #include "libavutil/dict.h" 58 #include "libavutil/opt.h" 59 #include "libavutil/version.h" 60 #include "libswresample/swresample.h" 61 #include "libswresample/version.h" 62 #include "libswscale/swscale.h" 63 #include "libswscale/version.h" 64 } 65 66 #ifdef _MSC_VER 67 #pragma warning(pop) 68 #endif 69 70 #endif 71 72 Log_SetChannel(MediaCapture); 73 74 namespace { 75 76 static constexpr u32 VIDEO_WIDTH_ALIGNMENT = 8; 77 static constexpr u32 VIDEO_HEIGHT_ALIGNMENT = 8; 78 79 class ALIGN_TO_CACHE_LINE MediaCaptureBase : public MediaCapture 80 { 81 public: 82 static constexpr u32 NUM_FRAMES_IN_FLIGHT = 3; 83 static constexpr u32 MAX_PENDING_FRAMES = NUM_FRAMES_IN_FLIGHT * 2; 84 static constexpr u32 AUDIO_CHANNELS = 2; 85 86 virtual ~MediaCaptureBase() override; 87 88 bool BeginCapture(float fps, float aspect, u32 width, u32 height, GPUTexture::Format texture_format, u32 sample_rate, 89 std::string path, bool capture_video, std::string_view video_codec, u32 video_bitrate, 90 std::string_view video_codec_args, bool capture_audio, std::string_view audio_codec, 91 u32 audio_bitrate, std::string_view audio_codec_args, Error* error) override final; 92 93 const std::string& GetPath() const override final; 94 std::string GetNextCapturePath() const override final; 95 u32 GetVideoWidth() const override final; 96 u32 GetVideoHeight() const override final; 97 float GetVideoFPS() const override final; 98 99 float GetCaptureThreadUsage() const override final; 100 float GetCaptureThreadTime() const override final; 101 void UpdateCaptureThreadUsage(double pct_divider, double time_divider) override final; 102 103 GPUTexture* GetRenderTexture() override final; 104 bool DeliverVideoFrame(GPUTexture* stex) override final; 105 bool DeliverAudioFrames(const s16* frames, u32 num_frames) override final; 106 bool EndCapture(Error* error) override final; 107 void Flush() override final; 108 109 protected: 110 struct PendingFrame 111 { 112 enum class State 113 { 114 Unused, 115 NeedsMap, 116 NeedsEncoding 117 }; 118 119 std::unique_ptr<GPUDownloadTexture> tex; 120 s64 pts; 121 State state; 122 }; 123 124 ALWAYS_INLINE u32 GetAudioBufferSizeInFrames() const 125 { 126 return (static_cast<u32>(m_audio_buffer.size()) / AUDIO_CHANNELS); 127 } 128 129 void ProcessFramePendingMap(std::unique_lock<std::mutex>& lock); 130 void ProcessAllInFlightFrames(std::unique_lock<std::mutex>& lock); 131 void EncoderThreadEntryPoint(); 132 void StartEncoderThread(); 133 void StopEncoderThread(std::unique_lock<std::mutex>& lock); 134 void DeleteOutputFile(); 135 136 virtual void ClearState(); 137 virtual bool SendFrame(const PendingFrame& pf, Error* error) = 0; 138 virtual bool ProcessAudioPackets(s64 video_pts, Error* error) = 0; 139 140 virtual bool InternalBeginCapture(float fps, float aspect, u32 sample_rate, bool capture_video, 141 std::string_view video_codec, u32 video_bitrate, std::string_view video_codec_args, 142 bool capture_audio, std::string_view audio_codec, u32 audio_bitrate, 143 std::string_view audio_codec_args, Error* error) = 0; 144 virtual bool InternalEndCapture(std::unique_lock<std::mutex>& lock, Error* error); 145 146 mutable std::mutex m_lock; 147 std::string m_path; 148 std::atomic_bool m_capturing{false}; 149 std::atomic_bool m_encoding_error{false}; 150 151 GPUTexture::Format m_video_render_texture_format = GPUTexture::Format::Unknown; 152 u32 m_video_width = 0; 153 u32 m_video_height = 0; 154 float m_video_fps = 0; 155 s64 m_next_video_pts = 0; 156 std::unique_ptr<GPUTexture> m_render_texture; 157 158 s64 m_next_audio_pts = 0; 159 u32 m_audio_frame_pos = 0; 160 u32 m_audio_frame_size = 0; 161 162 Threading::Thread m_encoder_thread; 163 u64 m_encoder_thread_last_time = 0; 164 float m_encoder_thread_usage = 0.0f; 165 float m_encoder_thread_time = 0.0f; 166 167 std::condition_variable m_frame_ready_cv; 168 std::condition_variable m_frame_encoded_cv; 169 std::array<PendingFrame, MAX_PENDING_FRAMES> m_pending_frames = {}; 170 u32 m_pending_frames_pos = 0; 171 u32 m_frames_pending_map = 0; 172 u32 m_frames_map_consume_pos = 0; 173 u32 m_frames_pending_encode = 0; 174 u32 m_frames_encode_consume_pos = 0; 175 176 DynamicHeapArray<s16> m_audio_buffer; 177 std::atomic<u32> m_audio_buffer_size{0}; 178 u32 m_audio_buffer_write_pos = 0; 179 ALIGN_TO_CACHE_LINE u32 m_audio_buffer_read_pos = 0; 180 181 // Shared across all backends. 182 [[maybe_unused]] static inline std::mutex s_load_mutex; 183 }; 184 185 MediaCaptureBase::~MediaCaptureBase() = default; 186 187 bool MediaCaptureBase::BeginCapture(float fps, float aspect, u32 width, u32 height, GPUTexture::Format texture_format, 188 u32 sample_rate, std::string path, bool capture_video, std::string_view video_codec, 189 u32 video_bitrate, std::string_view video_codec_args, bool capture_audio, 190 std::string_view audio_codec, u32 audio_bitrate, std::string_view audio_codec_args, 191 Error* error) 192 { 193 m_video_render_texture_format = texture_format; 194 m_video_width = width; 195 m_video_height = height; 196 m_video_fps = fps; 197 198 if (path.empty()) 199 { 200 Error::SetStringView(error, "No path specified."); 201 return false; 202 } 203 else if (capture_video && 204 (fps == 0.0f || m_video_width == 0 || !Common::IsAlignedPow2(m_video_width, VIDEO_WIDTH_ALIGNMENT) || 205 m_video_height == 0 || !Common::IsAlignedPow2(m_video_height, VIDEO_HEIGHT_ALIGNMENT))) 206 { 207 Error::SetStringView(error, "Invalid video dimensions/rate."); 208 return false; 209 } 210 211 m_path = std::move(path); 212 m_capturing.store(true, std::memory_order_release); 213 214 // allocate audio buffer, dynamic based on sample rate 215 if (capture_audio) 216 m_audio_buffer.resize(sample_rate * MAX_PENDING_FRAMES * AUDIO_CHANNELS); 217 218 INFO_LOG("Initializing capture:"); 219 if (capture_video) 220 { 221 INFO_LOG(" Video: FPS={}, Aspect={}, Codec={}, Bitrate={}, Args={}", fps, aspect, video_codec, video_bitrate, 222 video_codec_args); 223 } 224 if (capture_audio) 225 { 226 INFO_LOG(" Audio: SampleRate={}, Codec={}, Bitrate={}, Args={}", sample_rate, audio_codec, audio_bitrate, 227 audio_codec_args); 228 } 229 230 if (!InternalBeginCapture(fps, aspect, sample_rate, capture_video, video_codec, video_bitrate, video_codec_args, 231 capture_audio, audio_codec, audio_bitrate, audio_codec_args, error)) 232 { 233 ClearState(); 234 return false; 235 } 236 237 StartEncoderThread(); 238 return true; 239 } 240 241 GPUTexture* MediaCaptureBase::GetRenderTexture() 242 { 243 if (m_render_texture) [[likely]] 244 return m_render_texture.get(); 245 246 m_render_texture = g_gpu_device->CreateTexture(m_video_width, m_video_height, 1, 1, 1, GPUTexture::Type::RenderTarget, 247 m_video_render_texture_format); 248 if (!m_render_texture) [[unlikely]] 249 { 250 ERROR_LOG("Failed to create {}x{} render texture.", m_video_width, m_video_height); 251 return nullptr; 252 } 253 254 return m_render_texture.get(); 255 } 256 257 bool MediaCaptureBase::DeliverVideoFrame(GPUTexture* stex) 258 { 259 std::unique_lock<std::mutex> lock(m_lock); 260 261 // If the encoder thread reported an error, stop the capture. 262 if (m_encoding_error.load(std::memory_order_acquire)) 263 return false; 264 265 if (m_frames_pending_map >= NUM_FRAMES_IN_FLIGHT) 266 ProcessFramePendingMap(lock); 267 268 PendingFrame& pf = m_pending_frames[m_pending_frames_pos]; 269 270 // It shouldn't be pending map, but the encode thread might be lagging. 271 DebugAssert(pf.state != PendingFrame::State::NeedsMap); 272 if (pf.state == PendingFrame::State::NeedsEncoding) 273 { 274 m_frame_encoded_cv.wait(lock, [&pf]() { return pf.state == PendingFrame::State::Unused; }); 275 } 276 277 if (!pf.tex || pf.tex->GetWidth() != static_cast<u32>(stex->GetWidth()) || 278 pf.tex->GetHeight() != static_cast<u32>(stex->GetHeight())) 279 { 280 pf.tex.reset(); 281 pf.tex = g_gpu_device->CreateDownloadTexture(stex->GetWidth(), stex->GetHeight(), stex->GetFormat()); 282 if (!pf.tex) 283 { 284 ERROR_LOG("Failed to create {}x{} download texture", stex->GetWidth(), stex->GetHeight()); 285 return false; 286 } 287 288 #ifdef _DEBUG 289 GL_OBJECT_NAME_FMT(pf.tex, "GSCapture {}x{} Download Texture", stex->GetWidth(), stex->GetHeight()); 290 #endif 291 } 292 293 pf.tex->CopyFromTexture(0, 0, stex, 0, 0, m_video_width, m_video_height, 0, 0); 294 pf.pts = m_next_video_pts++; 295 pf.state = PendingFrame::State::NeedsMap; 296 297 m_pending_frames_pos = (m_pending_frames_pos + 1) % MAX_PENDING_FRAMES; 298 m_frames_pending_map++; 299 return true; 300 } 301 302 void MediaCaptureBase::ProcessFramePendingMap(std::unique_lock<std::mutex>& lock) 303 { 304 DebugAssert(m_frames_pending_map > 0); 305 306 PendingFrame& pf = m_pending_frames[m_frames_map_consume_pos]; 307 DebugAssert(pf.state == PendingFrame::State::NeedsMap); 308 309 // Flushing is potentially expensive, so we leave it unlocked in case the encode thread 310 // needs to pick up another thread while we're waiting. 311 lock.unlock(); 312 313 if (pf.tex->NeedsFlush()) 314 pf.tex->Flush(); 315 316 // Even if the map failed, we need to kick it to the encode thread anyway, because 317 // otherwise our queue indices will get desynchronized. 318 if (!pf.tex->Map(0, 0, m_video_width, m_video_height)) 319 WARNING_LOG("Failed to map previously flushed frame."); 320 321 lock.lock(); 322 323 // Kick to encoder thread! 324 pf.state = PendingFrame::State::NeedsEncoding; 325 m_frames_map_consume_pos = (m_frames_map_consume_pos + 1) % MAX_PENDING_FRAMES; 326 m_frames_pending_map--; 327 m_frames_pending_encode++; 328 m_frame_ready_cv.notify_one(); 329 } 330 331 void MediaCaptureBase::EncoderThreadEntryPoint() 332 { 333 Threading::SetNameOfCurrentThread("Media Capture Encoding"); 334 335 Error error; 336 std::unique_lock<std::mutex> lock(m_lock); 337 338 for (;;) 339 { 340 m_frame_ready_cv.wait( 341 lock, [this]() { return (m_frames_pending_encode > 0 || !m_capturing.load(std::memory_order_acquire)); }); 342 if (m_frames_pending_encode == 0 && !m_capturing.load(std::memory_order_acquire)) 343 break; 344 345 PendingFrame& pf = m_pending_frames[m_frames_encode_consume_pos]; 346 DebugAssert(!IsCapturingVideo() || pf.state == PendingFrame::State::NeedsEncoding); 347 348 lock.unlock(); 349 350 bool okay = !m_encoding_error; 351 352 // If the frame failed to map, this will be false, and we'll just skip it. 353 if (okay && IsCapturingVideo() && pf.tex->IsMapped()) 354 okay = SendFrame(pf, &error); 355 356 // Encode as many audio frames while the video is ahead. 357 if (okay && IsCapturingAudio()) 358 okay = ProcessAudioPackets(pf.pts, &error); 359 360 lock.lock(); 361 362 // If we had an encoding error, tell the GS thread to shut down the capture (later). 363 if (!okay) [[unlikely]] 364 { 365 ERROR_LOG("Encoding error: {}", error.GetDescription()); 366 m_encoding_error.store(true, std::memory_order_release); 367 } 368 369 // Done with this frame! Wait for the next. 370 pf.state = PendingFrame::State::Unused; 371 m_frames_encode_consume_pos = (m_frames_encode_consume_pos + 1) % MAX_PENDING_FRAMES; 372 m_frames_pending_encode--; 373 m_frame_encoded_cv.notify_all(); 374 } 375 } 376 377 void MediaCaptureBase::StartEncoderThread() 378 { 379 INFO_LOG("Starting encoder thread."); 380 DebugAssert(m_capturing.load(std::memory_order_acquire) && !m_encoder_thread.Joinable()); 381 m_encoder_thread.Start([this]() { EncoderThreadEntryPoint(); }); 382 } 383 384 void MediaCaptureBase::StopEncoderThread(std::unique_lock<std::mutex>& lock) 385 { 386 // Thread will exit when s_capturing is false. 387 DebugAssert(!m_capturing.load(std::memory_order_acquire)); 388 389 if (m_encoder_thread.Joinable()) 390 { 391 INFO_LOG("Stopping encoder thread."); 392 393 // Might be sleeping, so wake it before joining. 394 m_frame_ready_cv.notify_one(); 395 lock.unlock(); 396 m_encoder_thread.Join(); 397 lock.lock(); 398 } 399 } 400 401 void MediaCaptureBase::ProcessAllInFlightFrames(std::unique_lock<std::mutex>& lock) 402 { 403 while (m_frames_pending_map > 0) 404 ProcessFramePendingMap(lock); 405 406 while (m_frames_pending_encode > 0) 407 { 408 m_frame_encoded_cv.wait(lock, [this]() { return (m_frames_pending_encode == 0 || m_encoding_error); }); 409 } 410 } 411 412 bool MediaCaptureBase::DeliverAudioFrames(const s16* frames, u32 num_frames) 413 { 414 if (!IsCapturingAudio()) 415 return true; 416 else if (!m_capturing.load(std::memory_order_acquire)) 417 return false; 418 419 const u32 audio_buffer_size = GetAudioBufferSizeInFrames(); 420 if ((audio_buffer_size - m_audio_buffer_size.load(std::memory_order_acquire)) < num_frames) 421 { 422 // Need to wait for it to drain a bit. 423 std::unique_lock<std::mutex> lock(m_lock); 424 m_frame_encoded_cv.wait(lock, [this, &num_frames, &audio_buffer_size]() { 425 return (!m_capturing.load(std::memory_order_acquire) || 426 ((audio_buffer_size - m_audio_buffer_size.load(std::memory_order_acquire)) >= num_frames)); 427 }); 428 if (!m_capturing.load(std::memory_order_acquire)) 429 return false; 430 } 431 432 for (u32 remaining_frames = num_frames;;) 433 { 434 const u32 contig_frames = std::min(audio_buffer_size - m_audio_buffer_write_pos, remaining_frames); 435 std::memcpy(&m_audio_buffer[m_audio_buffer_write_pos * AUDIO_CHANNELS], frames, 436 sizeof(s16) * AUDIO_CHANNELS * contig_frames); 437 m_audio_buffer_write_pos = (m_audio_buffer_write_pos + contig_frames) % audio_buffer_size; 438 remaining_frames -= contig_frames; 439 if (remaining_frames == 0) 440 break; 441 } 442 443 const u32 buffer_size = m_audio_buffer_size.fetch_add(num_frames, std::memory_order_release) + num_frames; 444 if (!IsCapturingVideo() && buffer_size >= m_audio_frame_size) 445 { 446 // If we're not capturing video, push "frames" when we hit the audio packet size. 447 std::unique_lock<std::mutex> lock(m_lock); 448 if (!m_capturing.load(std::memory_order_acquire)) 449 return false; 450 451 PendingFrame& pf = m_pending_frames[m_pending_frames_pos]; 452 pf.state = PendingFrame::State::NeedsEncoding; 453 m_pending_frames_pos = (m_pending_frames_pos + 1) % MAX_PENDING_FRAMES; 454 455 m_frames_pending_encode++; 456 m_frame_ready_cv.notify_one(); 457 } 458 459 return true; 460 } 461 462 bool MediaCaptureBase::InternalEndCapture(std::unique_lock<std::mutex>& lock, Error* error) 463 { 464 DebugAssert(m_capturing.load(std::memory_order_acquire)); 465 466 const bool had_error = m_encoding_error.load(std::memory_order_acquire); 467 if (!had_error) 468 ProcessAllInFlightFrames(lock); 469 470 m_capturing.store(false, std::memory_order_release); 471 StopEncoderThread(lock); 472 return !had_error; 473 } 474 475 void MediaCaptureBase::ClearState() 476 { 477 m_next_video_pts = 0; 478 m_next_audio_pts = 0; 479 480 m_pending_frames = {}; 481 m_pending_frames_pos = 0; 482 m_frames_pending_map = 0; 483 m_frames_map_consume_pos = 0; 484 m_frames_pending_encode = 0; 485 m_frames_encode_consume_pos = 0; 486 487 m_audio_buffer_read_pos = 0; 488 m_audio_buffer_write_pos = 0; 489 m_audio_buffer_size.store(0, std::memory_order_release); 490 m_audio_frame_pos = 0; 491 m_audio_buffer_size = 0; 492 m_audio_buffer.deallocate(); 493 494 m_encoding_error.store(false, std::memory_order_release); 495 } 496 497 bool MediaCaptureBase::EndCapture(Error* error) 498 { 499 std::unique_lock<std::mutex> lock(m_lock); 500 if (!InternalEndCapture(lock, error)) 501 { 502 DeleteOutputFile(); 503 ClearState(); 504 return false; 505 } 506 507 ClearState(); 508 return true; 509 } 510 511 const std::string& MediaCaptureBase::GetPath() const 512 { 513 return m_path; 514 } 515 516 std::string MediaCaptureBase::GetNextCapturePath() const 517 { 518 const std::string_view ext = Path::GetExtension(m_path); 519 std::string_view name = Path::GetFileTitle(m_path); 520 521 // Should end with a number. 522 u32 partnum = 2; 523 std::string_view::size_type pos = name.rfind("_part"); 524 if (pos != std::string_view::npos) 525 { 526 std::string_view::size_type cpos = pos + 5; 527 for (; cpos < name.length(); cpos++) 528 { 529 if (name[cpos] < '0' || name[cpos] > '9') 530 break; 531 } 532 if (cpos == name.length()) 533 { 534 // Has existing part number, so add to it. 535 partnum = StringUtil::FromChars<u32>(name.substr(pos + 5)).value_or(1) + 1; 536 name = name.substr(0, pos); 537 } 538 } 539 540 // If we haven't started a new file previously, add "_part2". 541 return Path::BuildRelativePath(m_path, fmt::format("{}_part{:03d}.{}", name, partnum, ext)); 542 } 543 544 u32 MediaCaptureBase::GetVideoWidth() const 545 { 546 return m_video_width; 547 } 548 549 u32 MediaCaptureBase::GetVideoHeight() const 550 { 551 return m_video_height; 552 } 553 554 float MediaCaptureBase::GetVideoFPS() const 555 { 556 return m_video_fps; 557 } 558 559 float MediaCaptureBase::GetCaptureThreadUsage() const 560 { 561 return m_encoder_thread_usage; 562 } 563 564 float MediaCaptureBase::GetCaptureThreadTime() const 565 { 566 return m_encoder_thread_time; 567 } 568 569 void MediaCaptureBase::UpdateCaptureThreadUsage(double pct_divider, double time_divider) 570 { 571 const u64 time = m_encoder_thread.GetCPUTime(); 572 const u64 delta = time - m_encoder_thread_last_time; 573 m_encoder_thread_usage = static_cast<float>(static_cast<double>(delta) * pct_divider); 574 m_encoder_thread_time = static_cast<float>(static_cast<double>(delta) * time_divider); 575 m_encoder_thread_last_time = time; 576 } 577 578 void MediaCaptureBase::Flush() 579 { 580 std::unique_lock<std::mutex> lock(m_lock); 581 582 if (m_encoding_error) 583 return; 584 585 ProcessAllInFlightFrames(lock); 586 587 if (IsCapturingAudio()) 588 { 589 // Clear any buffered audio frames out, we don't want to delay the CPU thread. 590 const u32 audio_frames = m_audio_buffer_size.load(std::memory_order_acquire); 591 if (audio_frames > 0) 592 WARNING_LOG("Dropping {} audio frames for buffer clear.", audio_frames); 593 594 m_audio_buffer_read_pos = 0; 595 m_audio_buffer_write_pos = 0; 596 m_audio_buffer_size.store(0, std::memory_order_release); 597 } 598 } 599 600 void MediaCaptureBase::DeleteOutputFile() 601 { 602 if (m_path.empty()) 603 return; 604 605 Error error; 606 if (FileSystem::DeleteFile(m_path.c_str(), &error)) 607 { 608 INFO_LOG("Deleted output file {}", Path::GetFileName(m_path)); 609 m_path = {}; 610 } 611 else 612 { 613 ERROR_LOG("Failed to delete output file '{}': {}", Path::GetFileName(m_path), error.GetDescription()); 614 } 615 } 616 617 #ifdef _WIN32 618 619 #define VISIT_MFPLAT_IMPORTS(X) \ 620 X(MFCreateMediaType) \ 621 X(MFCreateMemoryBuffer) \ 622 X(MFCreateSample) \ 623 X(MFHeapFree) \ 624 X(MFShutdown) \ 625 X(MFStartup) \ 626 X(MFTEnumEx) 627 628 #define VISIT_MFREADWRITE_IMPORTS(X) X(MFCreateSinkWriterFromURL) 629 630 #define VISIT_MF_IMPORTS(X) X(MFTranscodeGetAudioOutputAvailableTypes) 631 632 class MediaCaptureMF final : public MediaCaptureBase 633 { 634 template<class T> 635 using ComPtr = Microsoft::WRL::ComPtr<T>; 636 637 static constexpr u32 TEN_NANOSECONDS = 10 * 1000 * 1000; 638 static constexpr DWORD INVALID_STREAM_INDEX = std::numeric_limits<DWORD>::max(); 639 static constexpr u32 AUDIO_BITS_PER_SAMPLE = sizeof(s16) * 8; 640 641 static constexpr const GUID& AUDIO_INPUT_MEDIA_FORMAT = MFAudioFormat_PCM; 642 static constexpr const GUID& VIDEO_RGB_MEDIA_FORMAT = MFVideoFormat_RGB32; 643 static constexpr const GUID& VIDEO_YUV_MEDIA_FORMAT = MFVideoFormat_NV12; 644 645 public: 646 ~MediaCaptureMF() override; 647 648 static std::unique_ptr<MediaCapture> Create(Error* error); 649 static ContainerList GetContainerList(); 650 static CodecList GetVideoCodecList(const char* container); 651 static CodecList GetAudioCodecList(const char* container); 652 653 bool IsCapturingAudio() const override; 654 bool IsCapturingVideo() const override; 655 time_t GetElapsedTime() const override; 656 657 protected: 658 void ClearState() override; 659 bool SendFrame(const PendingFrame& pf, Error* error) override; 660 bool ProcessAudioPackets(s64 video_pts, Error* error) override; 661 bool InternalBeginCapture(float fps, float aspect, u32 sample_rate, bool capture_video, std::string_view video_codec, 662 u32 video_bitrate, std::string_view video_codec_args, bool capture_audio, 663 std::string_view audio_codec, u32 audio_bitrate, std::string_view audio_codec_args, 664 Error* error) override; 665 bool InternalEndCapture(std::unique_lock<std::mutex>& lock, Error* error) override; 666 667 private: 668 ComPtr<IMFTransform> CreateVideoYUVTransform(ComPtr<IMFMediaType>* output_type, Error* error); 669 ComPtr<IMFTransform> CreateVideoEncodeTransform(std::string_view codec, u32 bitrate, IMFMediaType* input_type, 670 ComPtr<IMFMediaType>* output_type, bool* use_async_transform, 671 Error* error); 672 bool GetAudioTypes(std::string_view codec, ComPtr<IMFMediaType>* input_type, ComPtr<IMFMediaType>* output_type, 673 u32 sample_rate, u32 bitrate, Error* error); 674 void ConvertVideoFrame(u8* dst, size_t dst_stride, const u8* src, size_t src_stride, u32 width, u32 height) const; 675 676 bool ProcessVideoOutputSamples(Error* error); // synchronous 677 bool ProcessVideoEvents(Error* error); // asynchronous 678 679 ComPtr<IMFSinkWriter> m_sink_writer; 680 681 DWORD m_video_stream_index = INVALID_STREAM_INDEX; 682 DWORD m_audio_stream_index = INVALID_STREAM_INDEX; 683 684 LONGLONG m_video_sample_duration = 0; 685 LONGLONG m_audio_sample_duration = 0; 686 687 u32 m_frame_rate_numerator = 0; 688 689 ComPtr<IMFTransform> m_video_yuv_transform; 690 ComPtr<IMFSample> m_video_yuv_sample; 691 ComPtr<IMFTransform> m_video_encode_transform; 692 ComPtr<IMFMediaEventGenerator> m_video_encode_event_generator; 693 std::deque<ComPtr<IMFSample>> m_pending_video_samples; 694 ComPtr<IMFSample> m_video_output_sample; 695 u32 m_wanted_video_samples = 0; 696 DWORD m_video_sample_size = 0; 697 698 #define DECLARE_IMPORT(X) static inline decltype(X)* wrap_##X; 699 VISIT_MFPLAT_IMPORTS(DECLARE_IMPORT); 700 VISIT_MFREADWRITE_IMPORTS(DECLARE_IMPORT); 701 VISIT_MF_IMPORTS(DECLARE_IMPORT); 702 #undef DECLARE_IMPORT 703 704 static bool LoadMediaFoundation(Error* error); 705 static void UnloadMediaFoundation(); 706 707 static inline DynamicLibrary s_mfplat_library; 708 static inline DynamicLibrary s_mfreadwrite_library; 709 static inline DynamicLibrary s_mf_library; 710 static inline bool s_library_loaded = false; 711 }; 712 713 struct MediaFoundationVideoCodec 714 { 715 const char* name; 716 const char* display_name; 717 const GUID& guid; 718 bool require_hardware; 719 }; 720 struct MediaFoundationAudioCodec 721 { 722 const char* name; 723 const char* display_name; 724 const GUID& guid; 725 u32 min_bitrate; 726 u32 max_bitrate; 727 }; 728 static constexpr const MediaFoundationVideoCodec s_media_foundation_video_codecs[] = { 729 {"h264", "H.264 with Software Encoding", MFVideoFormat_H264, false}, 730 {"h264_hw", "H.264 with Hardware Encoding", MFVideoFormat_H264, true}, 731 {"h265", "H.265 with Software Encoding", MFVideoFormat_H265, false}, 732 {"h265_hw", "H.265 with Hardware Encoding", MFVideoFormat_H265, true}, 733 {"hevc", "HEVC with Software Encoding", MFVideoFormat_HEVC, false}, 734 {"hevc_hw", "HEVC with Hardware Encoding", MFVideoFormat_HEVC, true}, 735 {"vp9", "VP9 with Software Encoding", MFVideoFormat_VP90, false}, 736 {"vp9_hw", "VP9 with Hardware Encoding", MFVideoFormat_VP90, true}, 737 {"av1", "AV1 with Software Encoding", MFVideoFormat_AV1, false}, 738 {"av1_hw", "AV1 with Hardware Encoding", MFVideoFormat_AV1, false}, 739 }; 740 static constexpr const MediaFoundationAudioCodec s_media_foundation_audio_codecs[] = { 741 {"aac", "Advanced Audio Coding", MFAudioFormat_AAC, 64, 224}, 742 {"mp3", "MPEG-2 Audio Layer III", MFAudioFormat_MP3, 64, 320}, 743 {"pcm", "Uncompressed PCM", MFAudioFormat_PCM, 0, std::numeric_limits<u32>::max()}, 744 }; 745 746 bool MediaCaptureMF::LoadMediaFoundation(Error* error) 747 { 748 std::unique_lock lock(s_load_mutex); 749 if (s_library_loaded) 750 return true; 751 752 bool result = s_mfplat_library.Open("mfplat.dll", error); 753 result = result && s_mfreadwrite_library.Open("mfreadwrite.dll", error); 754 result = result && s_mf_library.Open("mf.dll", error); 755 756 #define RESOLVE_IMPORT(X) result = result && s_mfplat_library.GetSymbol(#X, &wrap_##X); 757 VISIT_MFPLAT_IMPORTS(RESOLVE_IMPORT); 758 #undef RESOLVE_IMPORT 759 760 #define RESOLVE_IMPORT(X) result = result && s_mfreadwrite_library.GetSymbol(#X, &wrap_##X); 761 VISIT_MFREADWRITE_IMPORTS(RESOLVE_IMPORT); 762 #undef RESOLVE_IMPORT 763 764 #define RESOLVE_IMPORT(X) result = result && s_mf_library.GetSymbol(#X, &wrap_##X); 765 VISIT_MF_IMPORTS(RESOLVE_IMPORT); 766 #undef RESOLVE_IMPORT 767 768 HRESULT hr; 769 if (result && FAILED(hr = wrap_MFStartup(MF_VERSION, MFSTARTUP_NOSOCKET))) [[unlikely]] 770 { 771 Error::SetHResult(error, "MFStartup() failed: ", hr); 772 result = false; 773 } 774 775 if (result) [[likely]] 776 { 777 s_library_loaded = true; 778 std::atexit(&MediaCaptureMF::UnloadMediaFoundation); 779 return true; 780 } 781 782 UnloadMediaFoundation(); 783 784 Error::AddPrefix(error, TRANSLATE_SV("MediaCapture", "Failed to load Media Foundation libraries: ")); 785 return false; 786 } 787 788 void MediaCaptureMF::UnloadMediaFoundation() 789 { 790 #define CLEAR_IMPORT(X) wrap_##X = nullptr; 791 VISIT_MF_IMPORTS(CLEAR_IMPORT); 792 VISIT_MFREADWRITE_IMPORTS(CLEAR_IMPORT); 793 VISIT_MFPLAT_IMPORTS(CLEAR_IMPORT); 794 #undef CLEAR_IMPORT 795 796 s_mf_library.Close(); 797 s_mfreadwrite_library.Close(); 798 s_mfplat_library.Close(); 799 s_library_loaded = false; 800 } 801 802 #undef VISIT_MF_IMPORTS 803 #undef VISIT_MFREADWRITE_IMPORTS 804 #undef VISIT_MFPLAT_IMPORTS 805 806 MediaCaptureMF::~MediaCaptureMF() = default; 807 808 std::unique_ptr<MediaCapture> MediaCaptureMF::Create(Error* error) 809 { 810 if (!LoadMediaFoundation(error)) 811 return nullptr; 812 813 return std::make_unique<MediaCaptureMF>(); 814 } 815 816 MediaCapture::ContainerList MediaCaptureMF::GetContainerList() 817 { 818 return { 819 {"avi", "Audio Video Interleave"}, 820 {"mp4", "MPEG-4 Part 14"}, 821 {"mp3", "MPEG-2 Audio Layer III"}, 822 {"wav", "Waveform Audio File Format"}, 823 }; 824 } 825 826 MediaCapture::ContainerList MediaCaptureMF::GetAudioCodecList(const char* container) 827 { 828 ContainerList ret; 829 ret.reserve(std::size(s_media_foundation_audio_codecs)); 830 for (const MediaFoundationAudioCodec& codec : s_media_foundation_audio_codecs) 831 ret.emplace_back(codec.name, codec.display_name); 832 return ret; 833 } 834 835 MediaCapture::ContainerList MediaCaptureMF::GetVideoCodecList(const char* container) 836 { 837 ContainerList ret; 838 ret.reserve(std::size(s_media_foundation_video_codecs)); 839 for (const MediaFoundationVideoCodec& codec : s_media_foundation_video_codecs) 840 ret.emplace_back(codec.name, codec.display_name); 841 return ret; 842 } 843 844 bool MediaCaptureMF::IsCapturingVideo() const 845 { 846 return (m_video_stream_index != INVALID_STREAM_INDEX); 847 } 848 849 bool MediaCaptureMF::IsCapturingAudio() const 850 { 851 return (m_audio_stream_index != INVALID_STREAM_INDEX); 852 } 853 854 time_t MediaCaptureMF::GetElapsedTime() const 855 { 856 if (IsCapturingVideo()) 857 return static_cast<time_t>(static_cast<LONGLONG>(m_next_video_pts * m_video_sample_duration) / TEN_NANOSECONDS); 858 else 859 return static_cast<time_t>(static_cast<LONGLONG>(m_next_audio_pts * m_audio_sample_duration) / TEN_NANOSECONDS); 860 } 861 862 bool MediaCaptureMF::InternalBeginCapture(float fps, float aspect, u32 sample_rate, bool capture_video, 863 std::string_view video_codec, u32 video_bitrate, 864 std::string_view video_codec_args, bool capture_audio, 865 std::string_view audio_codec, u32 audio_bitrate, 866 std::string_view audio_codec_args, Error* error) 867 { 868 HRESULT hr; 869 870 ComPtr<IMFMediaType> video_media_type; 871 bool use_async_video_transform = false; 872 873 if (capture_video) 874 { 875 m_frame_rate_numerator = static_cast<u32>(fps * TEN_NANOSECONDS); 876 m_video_sample_duration = static_cast<LONGLONG>(static_cast<double>(TEN_NANOSECONDS) / static_cast<double>(fps)); 877 878 ComPtr<IMFMediaType> yuv_media_type; 879 if (!(m_video_yuv_transform = CreateVideoYUVTransform(&yuv_media_type, error)) || 880 !(m_video_encode_transform = CreateVideoEncodeTransform(video_codec, video_bitrate, yuv_media_type.Get(), 881 &video_media_type, &use_async_video_transform, error))) 882 { 883 return false; 884 } 885 } 886 887 ComPtr<IMFMediaType> audio_input_type, audio_output_type; 888 if (capture_audio) 889 { 890 if (!GetAudioTypes(audio_codec, &audio_input_type, &audio_output_type, sample_rate, audio_bitrate, error)) 891 return false; 892 893 // only used when not capturing video 894 m_audio_frame_size = static_cast<u32>(static_cast<float>(sample_rate) / fps); 895 896 m_audio_sample_duration = 897 static_cast<LONGLONG>(static_cast<double>(TEN_NANOSECONDS) / static_cast<double>(sample_rate)); 898 } 899 900 if (FAILED(hr = wrap_MFCreateSinkWriterFromURL(StringUtil::UTF8StringToWideString(m_path).c_str(), nullptr, nullptr, 901 m_sink_writer.GetAddressOf()))) 902 { 903 Error::SetHResult(error, "MFCreateSinkWriterFromURL() failed: ", hr); 904 return false; 905 } 906 907 if (capture_video) 908 { 909 if (SUCCEEDED(hr) && FAILED(hr = m_sink_writer->AddStream(video_media_type.Get(), &m_video_stream_index))) 910 [[unlikely]] 911 { 912 Error::SetHResult(error, "Video AddStream() failed: ", hr); 913 } 914 915 if (SUCCEEDED(hr) && FAILED(hr = m_sink_writer->SetInputMediaType(m_video_stream_index, video_media_type.Get(), 916 nullptr))) [[unlikely]] 917 { 918 Error::SetHResult(error, "Video SetInputMediaType() failed: ", hr); 919 } 920 } 921 922 if (capture_audio) 923 { 924 if (SUCCEEDED(hr) && FAILED(hr = m_sink_writer->AddStream(audio_output_type.Get(), &m_audio_stream_index))) 925 [[unlikely]] 926 { 927 Error::SetHResult(error, "Audio AddStream() failed: ", hr); 928 } 929 930 if (SUCCEEDED(hr) && audio_input_type && 931 FAILED(hr = m_sink_writer->SetInputMediaType(m_audio_stream_index, audio_input_type.Get(), nullptr))) 932 [[unlikely]] 933 { 934 Error::SetHResult(error, "Audio SetInputMediaType() failed: ", hr); 935 } 936 } 937 938 if (SUCCEEDED(hr) && FAILED(hr = m_sink_writer->BeginWriting())) 939 Error::SetHResult(error, "BeginWriting() failed: ", hr); 940 941 if (use_async_video_transform) 942 { 943 if (SUCCEEDED(hr) && FAILED(hr = m_video_encode_transform.As(&m_video_encode_event_generator))) 944 Error::SetHResult(error, "Getting video encode event generator failed: ", hr); 945 } 946 947 if (capture_video && SUCCEEDED(hr) && 948 FAILED(hr = m_video_encode_transform->ProcessMessage(MFT_MESSAGE_NOTIFY_START_OF_STREAM, 0))) 949 { 950 Error::SetHResult(error, "MFT_MESSAGE_NOTIFY_START_OF_STREAM failed: ", hr); 951 } 952 953 if (FAILED(hr)) 954 { 955 m_sink_writer.Reset(); 956 DeleteOutputFile(); 957 return false; 958 } 959 960 return true; 961 } 962 963 bool MediaCaptureMF::InternalEndCapture(std::unique_lock<std::mutex>& lock, Error* error) 964 { 965 HRESULT hr = MediaCaptureBase::InternalEndCapture(lock, error) ? S_OK : E_FAIL; 966 967 // need to drain all input frames 968 if (m_video_encode_transform) 969 { 970 if (SUCCEEDED(hr) && FAILED(hr = m_video_encode_transform->ProcessMessage(MFT_MESSAGE_NOTIFY_END_OF_STREAM, 0))) 971 { 972 Error::SetHResult(error, "MFT_MESSAGE_NOTIFY_END_OF_STREAM failed: ", hr); 973 return false; 974 } 975 976 if (m_video_encode_event_generator) 977 hr = ProcessVideoEvents(error) ? S_OK : E_FAIL; 978 else 979 hr = ProcessVideoOutputSamples(error) ? S_OK : E_FAIL; 980 } 981 982 if (SUCCEEDED(hr) && FAILED(hr = m_sink_writer->Finalize())) [[unlikely]] 983 Error::SetHResult(error, "Finalize() failed: ", hr); 984 985 m_sink_writer.Reset(); 986 return SUCCEEDED(hr); 987 } 988 989 MediaCaptureMF::ComPtr<IMFTransform> MediaCaptureMF::CreateVideoYUVTransform(ComPtr<IMFMediaType>* output_type, 990 Error* error) 991 { 992 const MFT_REGISTER_TYPE_INFO input_type_info = {.guidMajorType = MFMediaType_Video, 993 .guidSubtype = VIDEO_RGB_MEDIA_FORMAT}; 994 const MFT_REGISTER_TYPE_INFO output_type_info = {.guidMajorType = MFMediaType_Video, 995 .guidSubtype = VIDEO_YUV_MEDIA_FORMAT}; 996 997 IMFActivate** transforms = nullptr; 998 UINT32 num_transforms = 0; 999 HRESULT hr = wrap_MFTEnumEx(MFT_CATEGORY_VIDEO_PROCESSOR, MFT_ENUM_FLAG_SORTANDFILTER, &input_type_info, 1000 &output_type_info, &transforms, &num_transforms); 1001 if (FAILED(hr)) [[unlikely]] 1002 { 1003 Error::SetHResult(error, "YUV MFTEnumEx() failed: ", hr); 1004 return nullptr; 1005 } 1006 else if (num_transforms == 0) [[unlikely]] 1007 { 1008 Error::SetStringView(error, "No video processors found."); 1009 return nullptr; 1010 } 1011 1012 ComPtr<IMFTransform> transform; 1013 hr = transforms[0]->ActivateObject(IID_PPV_ARGS(transform.GetAddressOf())); 1014 if (transforms) 1015 wrap_MFHeapFree(transforms); 1016 if (FAILED(hr)) [[unlikely]] 1017 { 1018 Error::SetHResult(error, "YUV ActivateObject() failed: ", hr); 1019 return nullptr; 1020 } 1021 1022 ComPtr<IMFMediaType> input_type; 1023 if (FAILED(hr = wrap_MFCreateMediaType(input_type.GetAddressOf())) || 1024 FAILED(hr = wrap_MFCreateMediaType(output_type->GetAddressOf()))) [[unlikely]] 1025 { 1026 Error::SetHResult(error, "YUV MFCreateMediaType() failed: ", hr); 1027 return nullptr; 1028 } 1029 1030 if (FAILED(hr = input_type->SetGUID(MF_MT_MAJOR_TYPE, MFMediaType_Video)) || 1031 FAILED(hr = input_type->SetGUID(MF_MT_SUBTYPE, VIDEO_RGB_MEDIA_FORMAT)) || 1032 FAILED(hr = input_type->SetUINT32(MF_MT_INTERLACE_MODE, MFVideoInterlace_Progressive)) || 1033 FAILED(hr = MFSetAttributeSize(input_type.Get(), MF_MT_FRAME_SIZE, m_video_width, m_video_height)) || 1034 FAILED(hr = (*output_type)->SetGUID(MF_MT_MAJOR_TYPE, MFMediaType_Video)) || 1035 FAILED(hr = (*output_type)->SetGUID(MF_MT_SUBTYPE, VIDEO_YUV_MEDIA_FORMAT)) || 1036 FAILED(hr = (*output_type)->SetUINT32(MF_MT_INTERLACE_MODE, MFVideoInterlace_Progressive)) || 1037 FAILED(hr = MFSetAttributeSize(output_type->Get(), MF_MT_FRAME_SIZE, m_video_width, m_video_height)) || 1038 FAILED(hr = MFSetAttributeRatio(output_type->Get(), MF_MT_FRAME_RATE, m_frame_rate_numerator, TEN_NANOSECONDS))) 1039 [[unlikely]] 1040 { 1041 Error::SetHResult(error, "YUV setting attributes failed: ", hr); 1042 return nullptr; 1043 } 1044 1045 if (FAILED(hr = transform->SetOutputType(0, output_type->Get(), 0))) [[unlikely]] 1046 { 1047 Error::SetHResult(error, "YUV SetOutputType() failed: ", hr); 1048 return nullptr; 1049 } 1050 1051 if (FAILED(hr = transform->SetInputType(0, input_type.Get(), 0))) [[unlikely]] 1052 { 1053 Error::SetHResult(error, "YUV SetInputType() failed: ", hr); 1054 return nullptr; 1055 } 1056 1057 return transform; 1058 } 1059 1060 MediaCaptureMF::ComPtr<IMFTransform> MediaCaptureMF::CreateVideoEncodeTransform(std::string_view codec, u32 bitrate, 1061 IMFMediaType* input_type, 1062 ComPtr<IMFMediaType>* output_type, 1063 bool* use_async_transform, Error* error) 1064 { 1065 const MFT_REGISTER_TYPE_INFO input_type_info = {.guidMajorType = MFMediaType_Video, 1066 .guidSubtype = VIDEO_YUV_MEDIA_FORMAT}; 1067 MFT_REGISTER_TYPE_INFO output_type_info = {.guidMajorType = MFMediaType_Video, .guidSubtype = MFVideoFormat_H264}; 1068 bool hardware = false; 1069 if (!codec.empty()) 1070 { 1071 bool found = false; 1072 for (const MediaFoundationVideoCodec& tcodec : s_media_foundation_video_codecs) 1073 { 1074 if (StringUtil::EqualNoCase(codec, tcodec.name)) 1075 { 1076 output_type_info.guidSubtype = tcodec.guid; 1077 hardware = tcodec.require_hardware; 1078 found = true; 1079 break; 1080 } 1081 } 1082 if (!found) 1083 { 1084 Error::SetStringFmt(error, "Unknown video codec '{}'", codec); 1085 return nullptr; 1086 } 1087 } 1088 1089 IMFActivate** transforms = nullptr; 1090 UINT32 num_transforms = 0; 1091 HRESULT hr = 1092 wrap_MFTEnumEx(MFT_CATEGORY_VIDEO_ENCODER, (hardware ? MFT_ENUM_FLAG_HARDWARE : 0) | MFT_ENUM_FLAG_SORTANDFILTER, 1093 &input_type_info, &output_type_info, &transforms, &num_transforms); 1094 if (FAILED(hr)) [[unlikely]] 1095 { 1096 Error::SetHResult(error, "Encoder MFTEnumEx() failed: ", hr); 1097 return nullptr; 1098 } 1099 else if (num_transforms == 0) [[unlikely]] 1100 { 1101 Error::SetStringView(error, "No video encoders found."); 1102 return nullptr; 1103 } 1104 1105 ComPtr<IMFTransform> transform; 1106 hr = transforms[0]->ActivateObject(IID_PPV_ARGS(transform.GetAddressOf())); 1107 if (transforms) 1108 wrap_MFHeapFree(transforms); 1109 if (FAILED(hr)) [[unlikely]] 1110 { 1111 Error::SetHResult(error, "Encoder ActivateObject() failed: ", hr); 1112 return nullptr; 1113 } 1114 1115 *use_async_transform = false; 1116 if (hardware) 1117 { 1118 ComPtr<IMFAttributes> attributes; 1119 if (FAILED(transform->GetAttributes(attributes.GetAddressOf()))) [[unlikely]] 1120 { 1121 Error::SetHResult(error, "YUV GetAttributes() failed: ", hr); 1122 return nullptr; 1123 } 1124 UINT32 async_supported; 1125 *use_async_transform = 1126 (SUCCEEDED(hr = attributes->GetUINT32(MF_TRANSFORM_ASYNC, &async_supported)) && async_supported == TRUE && 1127 SUCCEEDED(hr = attributes->SetUINT32(MF_TRANSFORM_ASYNC_UNLOCK, 1))); 1128 if (use_async_transform) 1129 INFO_LOG("Using async video transform."); 1130 } 1131 1132 if (FAILED(hr = wrap_MFCreateMediaType(output_type->GetAddressOf()))) [[unlikely]] 1133 { 1134 Error::SetHResult(error, "Encoder MFCreateMediaType() failed: ", hr); 1135 return nullptr; 1136 } 1137 1138 constexpr u32 par_numerator = 1; 1139 constexpr u32 par_denominator = 1; 1140 1141 u32 profile = 0; 1142 if (output_type_info.guidSubtype == MFVideoFormat_H264) 1143 profile = eAVEncH264VProfile_Main; 1144 else if (output_type_info.guidSubtype == MFVideoFormat_H265) 1145 profile = eAVEncH265VProfile_Main_420_8; 1146 else if (output_type_info.guidSubtype == MFVideoFormat_VP90) 1147 profile = eAVEncVP9VProfile_420_8; 1148 1149 if (FAILED(hr = (*output_type)->SetGUID(MF_MT_MAJOR_TYPE, MFMediaType_Video)) || 1150 FAILED(hr = (*output_type)->SetGUID(MF_MT_SUBTYPE, output_type_info.guidSubtype)) || 1151 FAILED(hr = (*output_type)->SetUINT32(MF_MT_AVG_BITRATE, bitrate * 1000)) || 1152 FAILED(hr = (*output_type)->SetUINT32(MF_MT_INTERLACE_MODE, MFVideoInterlace_Progressive)) || 1153 FAILED(hr = (*output_type)->SetUINT32(MF_MT_MPEG2_PROFILE, profile)) || 1154 FAILED(hr = MFSetAttributeSize(output_type->Get(), MF_MT_FRAME_SIZE, m_video_width, m_video_height)) || 1155 FAILED(hr = MFSetAttributeRatio(output_type->Get(), MF_MT_FRAME_RATE, m_frame_rate_numerator, TEN_NANOSECONDS)) || 1156 FAILED(hr = MFSetAttributeRatio(output_type->Get(), MF_MT_PIXEL_ASPECT_RATIO, par_numerator, par_denominator))) 1157 [[unlikely]] 1158 { 1159 Error::SetHResult(error, "Encoder setting attributes failed: ", hr); 1160 return nullptr; 1161 } 1162 1163 if (FAILED(hr = transform->SetOutputType(0, output_type->Get(), 0))) [[unlikely]] 1164 { 1165 Error::SetHResult(error, "Encoder SetOutputType() failed: ", hr); 1166 return nullptr; 1167 } 1168 1169 if (FAILED(hr = transform->SetInputType(0, input_type, 0))) [[unlikely]] 1170 { 1171 Error::SetHResult(error, "Encoder SetInputType() failed: ", hr); 1172 return nullptr; 1173 } 1174 1175 MFT_OUTPUT_STREAM_INFO osi; 1176 if (FAILED(hr = transform->GetOutputStreamInfo(0, &osi))) [[unlikely]] 1177 { 1178 Error::SetHResult(error, "Encoder GetOutputStreamInfo() failed: ", hr); 1179 return nullptr; 1180 } 1181 1182 if (!(osi.dwFlags & MFT_OUTPUT_STREAM_PROVIDES_SAMPLES)) 1183 { 1184 if (osi.cbSize == 0) 1185 { 1186 Error::SetStringFmt(error, "Invalid sample size for non-output-providing stream"); 1187 return nullptr; 1188 } 1189 1190 m_video_sample_size = osi.cbSize; 1191 } 1192 1193 INFO_LOG("Video sample size: {}", m_video_sample_size); 1194 return transform; 1195 } 1196 1197 ALWAYS_INLINE_RELEASE void MediaCaptureMF::ConvertVideoFrame(u8* dst, size_t dst_stride, const u8* src, 1198 size_t src_stride, u32 width, u32 height) const 1199 { 1200 if (!g_gpu_device->UsesLowerLeftOrigin()) 1201 { 1202 src += src_stride * (height - 1); 1203 src_stride = static_cast<size_t>(-static_cast<std::make_signed_t<size_t>>(src_stride)); 1204 } 1205 1206 if (m_video_render_texture_format == GPUTexture::Format::RGBA8) 1207 { 1208 // need to convert rgba -> bgra, as well as flipping vertically 1209 const u32 vector_width = 4; 1210 const u32 aligned_width = Common::AlignDownPow2(width, vector_width); 1211 for (u32 remaining_rows = height;;) 1212 { 1213 const u8* row_src = src; 1214 u8* row_dst = dst; 1215 1216 u32 x = 0; 1217 for (; x < aligned_width; x += vector_width) 1218 { 1219 static constexpr GSVector4i mask = GSVector4i::cxpr8(2, 1, 0, 3, 6, 5, 4, 7, 10, 9, 8, 11, 14, 13, 12, 15); 1220 GSVector4i::store<false>(row_dst, GSVector4i::load<false>(row_src).shuffle8(mask)); 1221 row_src += vector_width * sizeof(u32); 1222 row_dst += vector_width * sizeof(u32); 1223 } 1224 1225 for (; x < width; x++) 1226 { 1227 row_dst[0] = row_src[2]; 1228 row_dst[1] = row_src[1]; 1229 row_dst[2] = row_src[0]; 1230 row_dst[3] = row_src[3]; 1231 row_src += sizeof(u32); 1232 row_dst += sizeof(u32); 1233 } 1234 1235 src += src_stride; 1236 dst += dst_stride; 1237 1238 remaining_rows--; 1239 if (remaining_rows == 0) 1240 break; 1241 } 1242 } 1243 else 1244 { 1245 // only flip 1246 const u32 copy_width = sizeof(u32) * width; 1247 for (u32 remaining_rows = height;;) 1248 { 1249 const u8* row_src = src; 1250 u8* row_dst = dst; 1251 std::memcpy(row_dst, row_src, copy_width); 1252 src += src_stride; 1253 dst += dst_stride; 1254 1255 remaining_rows--; 1256 if (remaining_rows == 0) 1257 break; 1258 } 1259 } 1260 } 1261 1262 void MediaCaptureMF::ClearState() 1263 { 1264 MediaCaptureBase::ClearState(); 1265 1266 m_sink_writer.Reset(); 1267 1268 m_video_stream_index = INVALID_STREAM_INDEX; 1269 m_audio_stream_index = INVALID_STREAM_INDEX; 1270 1271 m_video_sample_duration = 0; 1272 m_audio_sample_duration = 0; 1273 m_frame_rate_numerator = 0; 1274 1275 m_video_yuv_transform.Reset(); 1276 m_video_yuv_sample.Reset(); 1277 m_video_encode_transform.Reset(); 1278 m_video_encode_event_generator.Reset(); 1279 m_pending_video_samples.clear(); 1280 m_video_output_sample.Reset(); 1281 m_wanted_video_samples = 0; 1282 m_video_sample_size = 0; 1283 } 1284 1285 bool MediaCaptureMF::SendFrame(const PendingFrame& pf, Error* error) 1286 { 1287 const u32 buffer_stride = m_video_width * sizeof(u32); 1288 const u32 buffer_size = buffer_stride * m_video_height; 1289 1290 HRESULT hr; 1291 ComPtr<IMFMediaBuffer> buffer; 1292 if (FAILED(hr = wrap_MFCreateMemoryBuffer(buffer_size, buffer.GetAddressOf()))) [[unlikely]] 1293 { 1294 Error::SetHResult(error, "MFCreateMemoryBuffer() failed: ", hr); 1295 return false; 1296 } 1297 1298 BYTE* buffer_data; 1299 if (FAILED(hr = buffer->Lock(&buffer_data, nullptr, nullptr))) [[unlikely]] 1300 { 1301 Error::SetHResult(error, "Lock() failed: ", hr); 1302 return false; 1303 } 1304 1305 ConvertVideoFrame(buffer_data, buffer_stride, pf.tex->GetMapPointer(), pf.tex->GetMapPitch(), m_video_width, 1306 m_video_height); 1307 buffer->Unlock(); 1308 1309 if (FAILED(hr = buffer->SetCurrentLength(buffer_size))) [[unlikely]] 1310 { 1311 Error::SetHResult(error, "SetCurrentLength() failed: ", hr); 1312 return false; 1313 } 1314 1315 ComPtr<IMFSample> sample; 1316 if (FAILED(hr = wrap_MFCreateSample(sample.GetAddressOf()))) [[unlikely]] 1317 { 1318 Error::SetHResult(error, "MFCreateSample() failed: ", hr); 1319 return false; 1320 } 1321 1322 if (FAILED(hr = sample->AddBuffer(buffer.Get()))) [[unlikely]] 1323 { 1324 Error::SetHResult(error, "AddBuffer() failed: ", hr); 1325 return false; 1326 } 1327 1328 const LONGLONG timestamp = static_cast<LONGLONG>(pf.pts) * m_video_sample_duration; 1329 if (FAILED(hr = sample->SetSampleTime(timestamp))) [[unlikely]] 1330 { 1331 Error::SetHResult(error, "SetSampleTime() failed: ", hr); 1332 return false; 1333 } 1334 1335 if (FAILED(hr = sample->SetSampleDuration(m_video_sample_duration))) [[unlikely]] 1336 { 1337 Error::SetHResult(error, "SetSampleDuration() failed: ", hr); 1338 return false; 1339 } 1340 1341 ////////////////////////////////////////////////////////////////////////// 1342 // RGB -> YUV 1343 ////////////////////////////////////////////////////////////////////////// 1344 1345 if (FAILED(hr = m_video_yuv_transform->ProcessInput(0, sample.Get(), 0))) [[unlikely]] 1346 { 1347 Error::SetHResult(error, "YUV ProcessInput() failed: ", hr); 1348 return false; 1349 } 1350 1351 for (;;) 1352 { 1353 if (!m_video_yuv_sample) 1354 { 1355 ComPtr<IMFMediaBuffer> yuv_membuf; 1356 if (FAILED(hr = wrap_MFCreateMemoryBuffer(buffer_size, yuv_membuf.GetAddressOf()))) [[unlikely]] 1357 { 1358 Error::SetHResult(error, "YUV MFCreateMemoryBuffer() failed: ", hr); 1359 return false; 1360 } 1361 1362 if (FAILED(hr = wrap_MFCreateSample(m_video_yuv_sample.GetAddressOf()))) [[unlikely]] 1363 { 1364 Error::SetHResult(error, "YUV MFCreateSample() failed: ", hr); 1365 return false; 1366 } 1367 if (FAILED(hr = m_video_yuv_sample->AddBuffer(yuv_membuf.Get()))) [[unlikely]] 1368 { 1369 Error::SetHResult(error, "YUV AddBuffer() failed: ", hr); 1370 return false; 1371 } 1372 } 1373 1374 DWORD status; 1375 MFT_OUTPUT_DATA_BUFFER yuv_buf = {.pSample = m_video_yuv_sample.Get()}; 1376 hr = m_video_yuv_transform->ProcessOutput(0, 1, &yuv_buf, &status); 1377 if (hr == MF_E_TRANSFORM_NEED_MORE_INPUT) 1378 break; 1379 1380 if (FAILED(hr)) [[unlikely]] 1381 { 1382 Error::SetHResult(error, "YUV ProcessOutput() failed: ", hr); 1383 return false; 1384 } 1385 if (yuv_buf.pEvents) 1386 yuv_buf.pEvents->Release(); 1387 1388 m_pending_video_samples.push_back(std::move(m_video_yuv_sample)); 1389 1390 if (m_video_encode_event_generator) 1391 { 1392 if (!ProcessVideoEvents(error)) [[unlikely]] 1393 return false; 1394 } 1395 else 1396 { 1397 if (!ProcessVideoOutputSamples(error)) [[unlikely]] 1398 return false; 1399 } 1400 } 1401 1402 return true; 1403 } 1404 1405 bool MediaCaptureMF::ProcessVideoOutputSamples(Error* error) 1406 { 1407 HRESULT hr; 1408 1409 for (;;) 1410 { 1411 while (!m_pending_video_samples.empty()) 1412 { 1413 if (FAILED(hr = m_video_encode_transform->ProcessInput(0, m_pending_video_samples.front().Get(), 0))) [[unlikely]] 1414 { 1415 Error::SetHResult(error, "Video ProcessInput() failed: ", hr); 1416 return false; 1417 } 1418 m_pending_video_samples.pop_front(); 1419 } 1420 1421 if (m_video_sample_size > 0 && !m_video_output_sample) 1422 { 1423 ComPtr<IMFMediaBuffer> video_membuf; 1424 if (FAILED(hr = wrap_MFCreateMemoryBuffer(m_video_sample_size, video_membuf.GetAddressOf()))) [[unlikely]] 1425 { 1426 Error::SetHResult(error, "YUV MFCreateMemoryBuffer() failed: ", hr); 1427 return false; 1428 } 1429 1430 if (FAILED(hr = wrap_MFCreateSample(m_video_output_sample.GetAddressOf()))) [[unlikely]] 1431 { 1432 Error::SetHResult(error, "YUV MFCreateSample() failed: ", hr); 1433 return false; 1434 } 1435 if (FAILED(hr = m_video_output_sample->AddBuffer(video_membuf.Get()))) [[unlikely]] 1436 { 1437 Error::SetHResult(error, "YUV AddBuffer() failed: ", hr); 1438 return false; 1439 } 1440 } 1441 1442 MFT_OUTPUT_DATA_BUFFER video_buf = {.pSample = m_video_output_sample.Get()}; 1443 DWORD status; 1444 hr = m_video_encode_transform->ProcessOutput(0, 1, &video_buf, &status); 1445 if (hr == MF_E_TRANSFORM_NEED_MORE_INPUT) 1446 break; 1447 1448 if (FAILED(hr)) [[unlikely]] 1449 { 1450 Error::SetHResult(error, "Video ProcessOutput() failed: ", hr); 1451 return false; 1452 } 1453 if (video_buf.pEvents) 1454 video_buf.pEvents->Release(); 1455 1456 hr = m_sink_writer->WriteSample(m_video_stream_index, video_buf.pSample); 1457 if (FAILED(hr)) [[unlikely]] 1458 { 1459 Error::SetHResult(error, "Video WriteSample() failed: ", hr); 1460 return false; 1461 } 1462 1463 // might be transform-provided 1464 if (m_video_output_sample) 1465 m_video_output_sample.Reset(); 1466 else 1467 video_buf.pSample->Release(); 1468 } 1469 1470 return true; 1471 } 1472 1473 bool MediaCaptureMF::ProcessVideoEvents(Error* error) 1474 { 1475 HRESULT hr; 1476 1477 for (;;) 1478 { 1479 // push any wanted input 1480 while (m_wanted_video_samples > 0) 1481 { 1482 if (m_pending_video_samples.empty()) 1483 break; 1484 1485 if (FAILED(hr = m_video_encode_transform->ProcessInput(0, m_pending_video_samples.front().Get(), 0))) [[unlikely]] 1486 { 1487 Error::SetHResult(error, "Video ProcessInput() failed: ", hr); 1488 return false; 1489 } 1490 m_pending_video_samples.pop_front(); 1491 1492 m_wanted_video_samples--; 1493 } 1494 1495 ComPtr<IMFMediaEvent> event; 1496 hr = m_video_encode_event_generator->GetEvent(MF_EVENT_FLAG_NO_WAIT, event.GetAddressOf()); 1497 if (hr == MF_E_NO_EVENTS_AVAILABLE) 1498 return true; 1499 1500 if (FAILED(hr)) [[unlikely]] 1501 { 1502 Error::SetHResult(error, "GetEvent() failed: ", hr); 1503 return false; 1504 } 1505 1506 MediaEventType type; 1507 if (FAILED(hr = event->GetType(&type))) [[unlikely]] 1508 { 1509 Error::SetHResult(error, "GetEvent() failed: ", hr); 1510 return false; 1511 } 1512 1513 UINT32 stream_id = 0; 1514 if (type == METransformNeedInput || type == METransformHaveOutput) 1515 { 1516 if (FAILED(hr = event->GetUINT32(MF_EVENT_MFT_INPUT_STREAM_ID, &stream_id))) 1517 { 1518 Error::SetHResult(error, "Get stream ID failed: ", hr); 1519 return false; 1520 } 1521 else if (stream_id != 0) 1522 { 1523 Error::SetStringFmt(error, "Unexpected stream ID {}", stream_id); 1524 return false; 1525 } 1526 } 1527 1528 switch (type) 1529 { 1530 case METransformNeedInput: 1531 { 1532 m_wanted_video_samples++; 1533 } 1534 break; 1535 1536 case METransformHaveOutput: 1537 { 1538 if (m_video_sample_size > 0 && !m_video_output_sample) 1539 { 1540 ComPtr<IMFMediaBuffer> video_membuf; 1541 if (FAILED(hr = wrap_MFCreateMemoryBuffer(m_video_sample_size, video_membuf.GetAddressOf()))) [[unlikely]] 1542 { 1543 Error::SetHResult(error, "YUV MFCreateMemoryBuffer() failed: ", hr); 1544 return false; 1545 } 1546 1547 if (FAILED(hr = wrap_MFCreateSample(m_video_output_sample.GetAddressOf()))) [[unlikely]] 1548 { 1549 Error::SetHResult(error, "YUV MFCreateSample() failed: ", hr); 1550 return false; 1551 } 1552 if (FAILED(hr = m_video_output_sample->AddBuffer(video_membuf.Get()))) [[unlikely]] 1553 { 1554 Error::SetHResult(error, "YUV AddBuffer() failed: ", hr); 1555 return false; 1556 } 1557 } 1558 1559 MFT_OUTPUT_DATA_BUFFER video_buf = {.pSample = m_video_output_sample.Get()}; 1560 DWORD status; 1561 if (FAILED(hr = m_video_encode_transform->ProcessOutput(0, 1, &video_buf, &status))) [[unlikely]] 1562 { 1563 Error::SetHResult(error, "Video ProcessOutput() failed: ", hr); 1564 return false; 1565 } 1566 if (video_buf.pEvents) 1567 video_buf.pEvents->Release(); 1568 1569 hr = m_sink_writer->WriteSample(m_video_stream_index, video_buf.pSample); 1570 if (FAILED(hr)) [[unlikely]] 1571 { 1572 Error::SetHResult(error, "Video WriteSample() failed: ", hr); 1573 return false; 1574 } 1575 1576 // might be transform-provided 1577 if (m_video_output_sample) 1578 m_video_output_sample.Reset(); 1579 else 1580 video_buf.pSample->Release(); 1581 } 1582 break; 1583 1584 default: 1585 WARNING_LOG("Unhandled video event {}", static_cast<u32>(type)); 1586 break; 1587 } 1588 } 1589 } 1590 1591 bool MediaCaptureMF::GetAudioTypes(std::string_view codec, ComPtr<IMFMediaType>* input_type, 1592 ComPtr<IMFMediaType>* output_type, u32 sample_rate, u32 bitrate, Error* error) 1593 { 1594 GUID output_subtype = MFAudioFormat_AAC; 1595 if (!codec.empty()) 1596 { 1597 bool found = false; 1598 for (const MediaFoundationAudioCodec& tcodec : s_media_foundation_audio_codecs) 1599 { 1600 if (StringUtil::EqualNoCase(codec, tcodec.name)) 1601 { 1602 output_subtype = tcodec.guid; 1603 bitrate = std::clamp(bitrate, tcodec.min_bitrate, tcodec.max_bitrate); 1604 found = true; 1605 break; 1606 } 1607 } 1608 if (!found) 1609 { 1610 Error::SetStringFmt(error, "Unknown audio codec '{}'", codec); 1611 return false; 1612 } 1613 } 1614 1615 HRESULT hr; 1616 if (FAILED(hr = wrap_MFCreateMediaType(input_type->GetAddressOf()))) [[unlikely]] 1617 { 1618 Error::SetHResult(error, "Audio MFCreateMediaType() failed: ", hr); 1619 return false; 1620 } 1621 1622 const u32 block_align = AUDIO_CHANNELS * (AUDIO_BITS_PER_SAMPLE / 8); 1623 const u32 bytes_per_second = block_align * sample_rate; 1624 1625 if (FAILED(hr = (*input_type)->SetGUID(MF_MT_MAJOR_TYPE, MFMediaType_Audio)) || 1626 FAILED(hr = (*input_type)->SetGUID(MF_MT_SUBTYPE, AUDIO_INPUT_MEDIA_FORMAT)) || 1627 FAILED(hr = (*input_type)->SetUINT32(MF_MT_AUDIO_NUM_CHANNELS, AUDIO_CHANNELS)) || 1628 FAILED(hr = (*input_type)->SetUINT32(MF_MT_AUDIO_BITS_PER_SAMPLE, AUDIO_BITS_PER_SAMPLE)) || 1629 FAILED(hr = (*input_type)->SetUINT32(MF_MT_AUDIO_SAMPLES_PER_SECOND, sample_rate)) || 1630 FAILED(hr = (*input_type)->SetUINT32(MF_MT_AUDIO_BLOCK_ALIGNMENT, block_align)) || 1631 FAILED(hr = (*input_type)->SetUINT32(MF_MT_AUDIO_AVG_BYTES_PER_SECOND, bytes_per_second)) || 1632 FAILED(hr = (*input_type)->SetUINT32(MF_MT_ALL_SAMPLES_INDEPENDENT, TRUE))) [[unlikely]] 1633 { 1634 Error::SetHResult(error, "Audio setting attributes failed: ", hr); 1635 return false; 1636 } 1637 1638 // If our input type is PCM, no need for an input type, it's the same as output. 1639 if (output_subtype == AUDIO_INPUT_MEDIA_FORMAT) 1640 { 1641 *output_type = std::move(*input_type); 1642 return true; 1643 } 1644 1645 ComPtr<IMFCollection> output_types_collection; 1646 DWORD output_types_collection_size = 0; 1647 hr = wrap_MFTranscodeGetAudioOutputAvailableTypes(output_subtype, 0, nullptr, output_types_collection.GetAddressOf()); 1648 if (FAILED(hr) || FAILED(hr = output_types_collection->GetElementCount(&output_types_collection_size))) [[unlikely]] 1649 { 1650 Error::SetHResult(error, "MFTranscodeGetAudioOutputAvailableTypes() failed: ", hr); 1651 return false; 1652 } 1653 1654 std::vector<std::pair<ComPtr<IMFMediaType>, u32>> output_types; 1655 for (DWORD i = 0; i < output_types_collection_size; i++) 1656 { 1657 ComPtr<IUnknown> current_output_type; 1658 ComPtr<IMFMediaType> current_output_type_c; 1659 if (SUCCEEDED(hr = output_types_collection->GetElement(i, current_output_type.GetAddressOf())) && 1660 SUCCEEDED(current_output_type.As(¤t_output_type_c))) 1661 { 1662 UINT32 current_channel_count, current_sample_rate; 1663 if (SUCCEEDED(current_output_type_c->GetUINT32(MF_MT_AUDIO_NUM_CHANNELS, ¤t_channel_count)) && 1664 SUCCEEDED(current_output_type_c->GetUINT32(MF_MT_AUDIO_SAMPLES_PER_SECOND, ¤t_sample_rate)) && 1665 current_channel_count == AUDIO_CHANNELS && current_sample_rate == sample_rate) 1666 { 1667 u32 current_bitrate; 1668 if (SUCCEEDED(current_output_type_c->GetUINT32(MF_MT_AUDIO_AVG_BYTES_PER_SECOND, ¤t_bitrate))) 1669 current_bitrate *= 8; 1670 else if (FAILED(current_output_type_c->GetUINT32(MF_MT_AVG_BITRATE, ¤t_bitrate))) 1671 continue; 1672 1673 output_types.emplace_back(std::move(current_output_type_c), current_bitrate); 1674 } 1675 } 1676 } 1677 1678 // pick the closest bitrate 1679 const u32 bitrate_kbps = bitrate * 1000; 1680 std::pair<ComPtr<IMFMediaType>, u32>* selected_output_type = nullptr; 1681 for (auto it = output_types.begin(); it != output_types.end(); ++it) 1682 { 1683 if (it->second >= bitrate_kbps && 1684 (!selected_output_type || (selected_output_type->second - bitrate_kbps) > (it->second - bitrate_kbps))) 1685 { 1686 selected_output_type = &(*it); 1687 } 1688 } 1689 if (!selected_output_type) 1690 { 1691 Error::SetStringView(error, "Unable to find a matching audio output type."); 1692 return false; 1693 } 1694 1695 *output_type = std::move(selected_output_type->first); 1696 return true; 1697 } 1698 1699 bool MediaCaptureMF::ProcessAudioPackets(s64 video_pts, Error* error) 1700 { 1701 const u32 max_audio_buffer_size = GetAudioBufferSizeInFrames(); 1702 HRESULT hr; 1703 1704 u32 pending_frames = m_audio_buffer_size.load(std::memory_order_acquire); 1705 while (pending_frames > 0 && (!IsCapturingVideo() || 1706 ((m_next_audio_pts * m_audio_sample_duration) < (video_pts * m_video_sample_duration)))) 1707 { 1708 // Grab as many source frames as we can. 1709 const u32 contig_frames = std::min(pending_frames, max_audio_buffer_size - m_audio_buffer_read_pos); 1710 DebugAssert(contig_frames > 0); 1711 1712 const u32 buffer_size = contig_frames * sizeof(s16) * AUDIO_CHANNELS; 1713 ComPtr<IMFMediaBuffer> buffer; 1714 if (FAILED(hr = wrap_MFCreateMemoryBuffer(buffer_size, buffer.GetAddressOf()))) [[unlikely]] 1715 { 1716 Error::SetHResult(error, "Audio MFCreateMemoryBuffer() failed: ", hr); 1717 return false; 1718 } 1719 1720 BYTE* buffer_data; 1721 if (FAILED(hr = buffer->Lock(&buffer_data, nullptr, nullptr))) [[unlikely]] 1722 { 1723 Error::SetHResult(error, "Audio Lock() failed: ", hr); 1724 return false; 1725 } 1726 1727 std::memcpy(buffer_data, &m_audio_buffer[m_audio_buffer_read_pos * AUDIO_CHANNELS], buffer_size); 1728 buffer->Unlock(); 1729 1730 if (FAILED(hr = buffer->SetCurrentLength(buffer_size))) [[unlikely]] 1731 { 1732 Error::SetHResult(error, "Audio SetCurrentLength() failed: ", hr); 1733 return false; 1734 } 1735 1736 ComPtr<IMFSample> sample; 1737 if (FAILED(hr = wrap_MFCreateSample(sample.GetAddressOf()))) [[unlikely]] 1738 { 1739 Error::SetHResult(error, "Audio MFCreateSample() failed: ", hr); 1740 return false; 1741 } 1742 1743 if (FAILED(hr = sample->AddBuffer(buffer.Get()))) [[unlikely]] 1744 { 1745 Error::SetHResult(error, "Audio AddBuffer() failed: ", hr); 1746 return false; 1747 } 1748 1749 const LONGLONG timestamp = static_cast<LONGLONG>(m_next_audio_pts) * m_audio_sample_duration; 1750 if (FAILED(hr = sample->SetSampleTime(timestamp))) [[unlikely]] 1751 { 1752 Error::SetHResult(error, "Audio SetSampleTime() failed: ", hr); 1753 return false; 1754 } 1755 1756 const LONGLONG duration = static_cast<LONGLONG>(contig_frames) * m_audio_sample_duration; 1757 if (FAILED(hr = sample->SetSampleDuration(duration))) [[unlikely]] 1758 { 1759 Error::SetHResult(error, "Audio SetSampleDuration() failed: ", hr); 1760 return false; 1761 } 1762 1763 m_next_audio_pts += contig_frames; 1764 1765 hr = m_sink_writer->WriteSample(m_audio_stream_index, sample.Get()); 1766 if (FAILED(hr)) [[unlikely]] 1767 { 1768 Error::SetHResult(error, "Audio WriteSample() failed: ", hr); 1769 return false; 1770 } 1771 1772 m_audio_buffer_read_pos = (m_audio_buffer_read_pos + contig_frames) % max_audio_buffer_size; 1773 m_audio_buffer_size.fetch_sub(contig_frames, std::memory_order_acq_rel); 1774 m_audio_frame_pos += contig_frames; 1775 pending_frames -= contig_frames; 1776 } 1777 1778 return true; 1779 } 1780 1781 #endif 1782 1783 #ifndef __ANDROID__ 1784 1785 // We're using deprecated fields because we're targeting multiple ffmpeg versions. 1786 #if defined(_MSC_VER) 1787 #pragma warning(disable : 4996) // warning C4996: 'AVCodecContext::channels': was declared deprecated 1788 #elif defined(__clang__) 1789 #pragma clang diagnostic ignored "-Wdeprecated-declarations" 1790 #elif defined(__GNUC__) 1791 #pragma GCC diagnostic ignored "-Wdeprecated-declarations" 1792 #endif 1793 1794 // Compatibility with both ffmpeg 4.x and 5.x. 1795 #if (LIBAVFORMAT_VERSION_MAJOR < 59) 1796 #define ff_const59 1797 #else 1798 #define ff_const59 const 1799 #endif 1800 1801 #define VISIT_AVCODEC_IMPORTS(X) \ 1802 X(avcodec_find_encoder_by_name) \ 1803 X(avcodec_find_encoder) \ 1804 X(avcodec_alloc_context3) \ 1805 X(avcodec_open2) \ 1806 X(avcodec_free_context) \ 1807 X(avcodec_send_frame) \ 1808 X(avcodec_receive_packet) \ 1809 X(avcodec_parameters_from_context) \ 1810 X(avcodec_get_hw_config) \ 1811 X(av_codec_iterate) \ 1812 X(av_packet_alloc) \ 1813 X(av_packet_free) \ 1814 X(av_packet_rescale_ts) \ 1815 X(av_packet_unref) 1816 1817 #define VISIT_AVFORMAT_IMPORTS(X) \ 1818 X(avformat_alloc_output_context2) \ 1819 X(avformat_new_stream) \ 1820 X(avformat_write_header) \ 1821 X(av_guess_format) \ 1822 X(av_interleaved_write_frame) \ 1823 X(av_write_trailer) \ 1824 X(avformat_free_context) \ 1825 X(avformat_query_codec) \ 1826 X(avio_open) \ 1827 X(avio_closep) 1828 1829 #if LIBAVUTIL_VERSION_MAJOR < 57 1830 #define AVUTIL_57_IMPORTS(X) 1831 #else 1832 #define AVUTIL_57_IMPORTS(X) \ 1833 X(av_channel_layout_default) \ 1834 X(av_channel_layout_copy) \ 1835 X(av_opt_set_chlayout) 1836 #endif 1837 1838 #define VISIT_AVUTIL_IMPORTS(X) \ 1839 AVUTIL_57_IMPORTS(X) \ 1840 X(av_frame_alloc) \ 1841 X(av_frame_get_buffer) \ 1842 X(av_frame_free) \ 1843 X(av_frame_make_writable) \ 1844 X(av_strerror) \ 1845 X(av_reduce) \ 1846 X(av_dict_parse_string) \ 1847 X(av_dict_get) \ 1848 X(av_dict_free) \ 1849 X(av_opt_set_int) \ 1850 X(av_opt_set_sample_fmt) \ 1851 X(av_compare_ts) \ 1852 X(av_get_bytes_per_sample) \ 1853 X(av_sample_fmt_is_planar) \ 1854 X(av_d2q) \ 1855 X(av_hwdevice_get_type_name) \ 1856 X(av_hwdevice_ctx_create) \ 1857 X(av_hwframe_ctx_alloc) \ 1858 X(av_hwframe_ctx_init) \ 1859 X(av_hwframe_transfer_data) \ 1860 X(av_hwframe_get_buffer) \ 1861 X(av_buffer_ref) \ 1862 X(av_buffer_unref) 1863 1864 #define VISIT_SWSCALE_IMPORTS(X) \ 1865 X(sws_getCachedContext) \ 1866 X(sws_scale) \ 1867 X(sws_freeContext) 1868 1869 #define VISIT_SWRESAMPLE_IMPORTS(X) \ 1870 X(swr_alloc) \ 1871 X(swr_init) \ 1872 X(swr_free) \ 1873 X(swr_convert) \ 1874 X(swr_next_pts) 1875 1876 class MediaCaptureFFmpeg final : public MediaCaptureBase 1877 { 1878 public: 1879 ~MediaCaptureFFmpeg() override = default; 1880 1881 static std::unique_ptr<MediaCapture> Create(Error* error); 1882 static ContainerList GetContainerList(); 1883 static CodecList GetVideoCodecList(const char* container); 1884 static CodecList GetAudioCodecList(const char* container); 1885 1886 bool IsCapturingAudio() const override; 1887 bool IsCapturingVideo() const override; 1888 time_t GetElapsedTime() const override; 1889 1890 protected: 1891 void ClearState() override; 1892 bool SendFrame(const PendingFrame& pf, Error* error) override; 1893 bool ProcessAudioPackets(s64 video_pts, Error* error) override; 1894 bool InternalBeginCapture(float fps, float aspect, u32 sample_rate, bool capture_video, std::string_view video_codec, 1895 u32 video_bitrate, std::string_view video_codec_args, bool capture_audio, 1896 std::string_view audio_codec, u32 audio_bitrate, std::string_view audio_codec_args, 1897 Error* error) override; 1898 bool InternalEndCapture(std::unique_lock<std::mutex>& lock, Error* error) override; 1899 1900 private: 1901 static void SetAVError(Error* error, std::string_view prefix, int errnum); 1902 static CodecList GetCodecListForContainer(const char* container, AVMediaType type); 1903 1904 bool IsUsingHardwareVideoEncoding(); 1905 1906 bool ReceivePackets(AVCodecContext* codec_context, AVStream* stream, AVPacket* packet, Error* error); 1907 1908 AVFormatContext* m_format_context = nullptr; 1909 1910 AVCodecContext* m_video_codec_context = nullptr; 1911 AVStream* m_video_stream = nullptr; 1912 AVFrame* m_converted_video_frame = nullptr; // YUV 1913 AVFrame* m_hw_video_frame = nullptr; 1914 AVPacket* m_video_packet = nullptr; 1915 SwsContext* m_sws_context = nullptr; 1916 AVDictionary* m_video_codec_arguments = nullptr; 1917 AVBufferRef* m_video_hw_context = nullptr; 1918 AVBufferRef* m_video_hw_frames = nullptr; 1919 1920 AVCodecContext* m_audio_codec_context = nullptr; 1921 AVStream* m_audio_stream = nullptr; 1922 AVFrame* m_converted_audio_frame = nullptr; 1923 AVPacket* m_audio_packet = nullptr; 1924 SwrContext* m_swr_context = nullptr; 1925 AVDictionary* m_audio_codec_arguments = nullptr; 1926 1927 AVPixelFormat m_video_pixel_format = AV_PIX_FMT_NONE; 1928 u32 m_audio_frame_bps = 0; 1929 bool m_audio_frame_planar = false; 1930 1931 #define DECLARE_IMPORT(X) static inline decltype(X)* wrap_##X; 1932 VISIT_AVCODEC_IMPORTS(DECLARE_IMPORT); 1933 VISIT_AVFORMAT_IMPORTS(DECLARE_IMPORT); 1934 VISIT_AVUTIL_IMPORTS(DECLARE_IMPORT); 1935 VISIT_SWSCALE_IMPORTS(DECLARE_IMPORT); 1936 VISIT_SWRESAMPLE_IMPORTS(DECLARE_IMPORT); 1937 #undef DECLARE_IMPORT 1938 1939 static bool LoadFFmpeg(Error* error); 1940 static void UnloadFFmpeg(); 1941 1942 static inline DynamicLibrary s_avcodec_library; 1943 static inline DynamicLibrary s_avformat_library; 1944 static inline DynamicLibrary s_avutil_library; 1945 static inline DynamicLibrary s_swscale_library; 1946 static inline DynamicLibrary s_swresample_library; 1947 static inline bool s_library_loaded = false; 1948 }; 1949 1950 bool MediaCaptureFFmpeg::LoadFFmpeg(Error* error) 1951 { 1952 std::unique_lock lock(s_load_mutex); 1953 if (s_library_loaded) 1954 return true; 1955 1956 static constexpr auto open_dynlib = [](DynamicLibrary& lib, const char* name, int major_version, Error* error) { 1957 std::string full_name(DynamicLibrary::GetVersionedFilename(name, major_version)); 1958 return lib.Open(full_name.c_str(), error); 1959 }; 1960 1961 bool result = true; 1962 1963 result = result && open_dynlib(s_avutil_library, "avutil", LIBAVUTIL_VERSION_MAJOR, error); 1964 result = result && open_dynlib(s_avcodec_library, "avcodec", LIBAVCODEC_VERSION_MAJOR, error); 1965 result = result && open_dynlib(s_avformat_library, "avformat", LIBAVFORMAT_VERSION_MAJOR, error); 1966 result = result && open_dynlib(s_swscale_library, "swscale", LIBSWSCALE_VERSION_MAJOR, error); 1967 result = result && open_dynlib(s_swresample_library, "swresample", LIBSWRESAMPLE_VERSION_MAJOR, error); 1968 1969 #define RESOLVE_IMPORT(X) result = result && s_avcodec_library.GetSymbol(#X, &wrap_##X); 1970 VISIT_AVCODEC_IMPORTS(RESOLVE_IMPORT); 1971 #undef RESOLVE_IMPORT 1972 1973 #define RESOLVE_IMPORT(X) result = result && s_avformat_library.GetSymbol(#X, &wrap_##X); 1974 VISIT_AVFORMAT_IMPORTS(RESOLVE_IMPORT); 1975 #undef RESOLVE_IMPORT 1976 1977 #define RESOLVE_IMPORT(X) result = result && s_avutil_library.GetSymbol(#X, &wrap_##X); 1978 VISIT_AVUTIL_IMPORTS(RESOLVE_IMPORT); 1979 #undef RESOLVE_IMPORT 1980 1981 #define RESOLVE_IMPORT(X) result = result && s_swscale_library.GetSymbol(#X, &wrap_##X); 1982 VISIT_SWSCALE_IMPORTS(RESOLVE_IMPORT); 1983 #undef RESOLVE_IMPORT 1984 1985 #define RESOLVE_IMPORT(X) result = result && s_swresample_library.GetSymbol(#X, &wrap_##X); 1986 VISIT_SWRESAMPLE_IMPORTS(RESOLVE_IMPORT); 1987 #undef RESOLVE_IMPORT 1988 1989 if (result) 1990 { 1991 s_library_loaded = true; 1992 std::atexit(&MediaCaptureFFmpeg::UnloadFFmpeg); 1993 return true; 1994 } 1995 1996 UnloadFFmpeg(); 1997 1998 Error::SetStringFmt( 1999 error, 2000 TRANSLATE_FS( 2001 "MediaCapture", 2002 "You may be missing one or more files, or are using the incorrect version. This build of DuckStation requires:\n" 2003 " libavcodec: {}\n" 2004 " libavformat: {}\n" 2005 " libavutil: {}\n" 2006 " libswscale: {}\n" 2007 " libswresample: {}\n"), 2008 LIBAVCODEC_VERSION_MAJOR, LIBAVFORMAT_VERSION_MAJOR, LIBAVUTIL_VERSION_MAJOR, LIBSWSCALE_VERSION_MAJOR, 2009 LIBSWRESAMPLE_VERSION_MAJOR); 2010 return false; 2011 } 2012 2013 void MediaCaptureFFmpeg::UnloadFFmpeg() 2014 { 2015 #define CLEAR_IMPORT(X) wrap_##X = nullptr; 2016 VISIT_AVCODEC_IMPORTS(CLEAR_IMPORT); 2017 VISIT_AVFORMAT_IMPORTS(CLEAR_IMPORT); 2018 VISIT_AVUTIL_IMPORTS(CLEAR_IMPORT); 2019 VISIT_SWSCALE_IMPORTS(CLEAR_IMPORT); 2020 VISIT_SWRESAMPLE_IMPORTS(CLEAR_IMPORT); 2021 #undef CLEAR_IMPORT 2022 2023 s_swresample_library.Close(); 2024 s_swscale_library.Close(); 2025 s_avutil_library.Close(); 2026 s_avformat_library.Close(); 2027 s_avcodec_library.Close(); 2028 s_library_loaded = false; 2029 } 2030 2031 #undef VISIT_AVCODEC_IMPORTS 2032 #undef VISIT_AVFORMAT_IMPORTS 2033 #undef VISIT_AVUTIL_IMPORTS 2034 #undef VISIT_SWSCALE_IMPORTS 2035 #undef VISIT_SWRESAMPLE_IMPORTS 2036 2037 void MediaCaptureFFmpeg::SetAVError(Error* error, std::string_view prefix, int errnum) 2038 { 2039 char errbuf[128]; 2040 wrap_av_strerror(errnum, errbuf, sizeof(errbuf)); 2041 2042 Error::SetStringFmt(error, "{} {}", prefix, errbuf); 2043 } 2044 2045 bool MediaCaptureFFmpeg::IsCapturingAudio() const 2046 { 2047 return (m_audio_stream != nullptr); 2048 } 2049 2050 bool MediaCaptureFFmpeg::IsCapturingVideo() const 2051 { 2052 return (m_video_stream != nullptr); 2053 } 2054 2055 time_t MediaCaptureFFmpeg::GetElapsedTime() const 2056 { 2057 std::unique_lock<std::mutex> lock(m_lock); 2058 s64 seconds; 2059 if (m_video_stream) 2060 { 2061 seconds = (m_next_video_pts * static_cast<s64>(m_video_codec_context->time_base.num)) / 2062 static_cast<s64>(m_video_codec_context->time_base.den); 2063 } 2064 else 2065 { 2066 DebugAssert(IsCapturingAudio()); 2067 seconds = (m_next_audio_pts * static_cast<s64>(m_audio_codec_context->time_base.num)) / 2068 static_cast<s64>(m_audio_codec_context->time_base.den); 2069 } 2070 2071 return seconds; 2072 } 2073 2074 bool MediaCaptureFFmpeg::IsUsingHardwareVideoEncoding() 2075 { 2076 return (m_video_hw_context != nullptr); 2077 } 2078 2079 bool MediaCaptureFFmpeg::InternalBeginCapture(float fps, float aspect, u32 sample_rate, bool capture_video, 2080 std::string_view video_codec, u32 video_bitrate, 2081 std::string_view video_codec_args, bool capture_audio, 2082 std::string_view audio_codec, u32 audio_bitrate, 2083 std::string_view audio_codec_args, Error* error) 2084 { 2085 ff_const59 AVOutputFormat* output_format = wrap_av_guess_format(nullptr, m_path.c_str(), nullptr); 2086 if (!output_format) 2087 { 2088 Error::SetStringFmt(error, "Failed to get output format for '{}'", Path::GetFileName(m_path)); 2089 return false; 2090 } 2091 2092 int res = wrap_avformat_alloc_output_context2(&m_format_context, output_format, nullptr, m_path.c_str()); 2093 if (res < 0) 2094 { 2095 SetAVError(error, "avformat_alloc_output_context2() failed: ", res); 2096 return false; 2097 } 2098 2099 // find the codec id 2100 if (capture_video) 2101 { 2102 const AVCodec* vcodec = nullptr; 2103 if (!video_codec.empty()) 2104 { 2105 vcodec = wrap_avcodec_find_encoder_by_name(TinyString(video_codec).c_str()); 2106 if (!vcodec) 2107 { 2108 Error::SetStringFmt(error, "Video codec {} not found.", video_codec); 2109 return false; 2110 } 2111 } 2112 2113 // FFmpeg decides whether mp4, mkv, etc should use h264 or mpeg4 as their default codec by whether x264 was enabled 2114 // But there's a lot of other h264 encoders (e.g. hardware encoders) we may want to use instead 2115 if (!vcodec && wrap_avformat_query_codec(output_format, AV_CODEC_ID_H264, FF_COMPLIANCE_NORMAL)) 2116 vcodec = wrap_avcodec_find_encoder(AV_CODEC_ID_H264); 2117 if (!vcodec) 2118 vcodec = wrap_avcodec_find_encoder(output_format->video_codec); 2119 2120 if (!vcodec) 2121 { 2122 Error::SetStringView(error, "Failed to find video encoder."); 2123 return false; 2124 } 2125 2126 m_video_codec_context = wrap_avcodec_alloc_context3(vcodec); 2127 if (!m_video_codec_context) 2128 { 2129 Error::SetStringView(error, "Failed to allocate video codec context."); 2130 return false; 2131 } 2132 2133 m_video_codec_context->codec_type = AVMEDIA_TYPE_VIDEO; 2134 m_video_codec_context->bit_rate = video_bitrate * 1000; 2135 m_video_codec_context->width = m_video_width; 2136 m_video_codec_context->height = m_video_height; 2137 m_video_codec_context->sample_aspect_ratio = wrap_av_d2q(aspect, 100000); 2138 wrap_av_reduce(&m_video_codec_context->time_base.num, &m_video_codec_context->time_base.den, 10000, 2139 static_cast<s64>(static_cast<double>(fps) * 10000.0), std::numeric_limits<s32>::max()); 2140 2141 // Map input pixel format. 2142 static constexpr const std::pair<GPUTexture::Format, AVPixelFormat> texture_pf_mapping[] = { 2143 {GPUTexture::Format::RGBA8, AV_PIX_FMT_RGBA}, 2144 {GPUTexture::Format::BGRA8, AV_PIX_FMT_BGRA}, 2145 }; 2146 if (const auto pf_mapping = 2147 std::find_if(std::begin(texture_pf_mapping), std::end(texture_pf_mapping), 2148 [this](const auto& it) { return (it.first == m_video_render_texture_format); }); 2149 pf_mapping != std::end(texture_pf_mapping)) 2150 { 2151 m_video_pixel_format = pf_mapping->second; 2152 } 2153 else 2154 { 2155 Error::SetStringFmt(error, "Unhandled input pixel format {}", 2156 GPUTexture::GetFormatName(m_video_render_texture_format)); 2157 return false; 2158 } 2159 2160 // Default to YUV 4:2:0 if the codec doesn't specify a pixel format. 2161 AVPixelFormat sw_pix_fmt = AV_PIX_FMT_YUV420P; 2162 if (vcodec->pix_fmts) 2163 { 2164 // Prefer YUV420 given the choice, but otherwise fall back to whatever it supports. 2165 sw_pix_fmt = vcodec->pix_fmts[0]; 2166 for (u32 i = 0; vcodec->pix_fmts[i] != AV_PIX_FMT_NONE; i++) 2167 { 2168 if (vcodec->pix_fmts[i] == AV_PIX_FMT_YUV420P) 2169 { 2170 sw_pix_fmt = vcodec->pix_fmts[i]; 2171 break; 2172 } 2173 } 2174 } 2175 m_video_codec_context->pix_fmt = sw_pix_fmt; 2176 2177 // Can we use hardware encoding? 2178 const AVCodecHWConfig* hwconfig = wrap_avcodec_get_hw_config(vcodec, 0); 2179 if (hwconfig && hwconfig->pix_fmt != AV_PIX_FMT_NONE && hwconfig->pix_fmt != sw_pix_fmt) 2180 { 2181 // First index isn't our preferred pixel format, try the others, but fall back if one doesn't exist. 2182 int index = 1; 2183 while (const AVCodecHWConfig* next_hwconfig = wrap_avcodec_get_hw_config(vcodec, index++)) 2184 { 2185 if (next_hwconfig->pix_fmt == sw_pix_fmt) 2186 { 2187 hwconfig = next_hwconfig; 2188 break; 2189 } 2190 } 2191 } 2192 2193 if (hwconfig) 2194 { 2195 Error hw_error; 2196 2197 INFO_LOG("Trying to use {} hardware device for video encoding.", 2198 wrap_av_hwdevice_get_type_name(hwconfig->device_type)); 2199 res = wrap_av_hwdevice_ctx_create(&m_video_hw_context, hwconfig->device_type, nullptr, nullptr, 0); 2200 if (res < 0) 2201 { 2202 SetAVError(&hw_error, "av_hwdevice_ctx_create() failed: ", res); 2203 ERROR_LOG(hw_error.GetDescription()); 2204 } 2205 else 2206 { 2207 m_video_hw_frames = wrap_av_hwframe_ctx_alloc(m_video_hw_context); 2208 if (!m_video_hw_frames) 2209 { 2210 ERROR_LOG("s_video_hw_frames() failed"); 2211 wrap_av_buffer_unref(&m_video_hw_context); 2212 } 2213 else 2214 { 2215 AVHWFramesContext* frames_ctx = reinterpret_cast<AVHWFramesContext*>(m_video_hw_frames->data); 2216 frames_ctx->format = (hwconfig->pix_fmt != AV_PIX_FMT_NONE) ? hwconfig->pix_fmt : sw_pix_fmt; 2217 frames_ctx->sw_format = sw_pix_fmt; 2218 frames_ctx->width = m_video_codec_context->width; 2219 frames_ctx->height = m_video_codec_context->height; 2220 res = wrap_av_hwframe_ctx_init(m_video_hw_frames); 2221 if (res < 0) 2222 { 2223 SetAVError(&hw_error, "av_hwframe_ctx_init() failed: ", res); 2224 ERROR_LOG(hw_error.GetDescription()); 2225 wrap_av_buffer_unref(&m_video_hw_frames); 2226 wrap_av_buffer_unref(&m_video_hw_context); 2227 } 2228 else 2229 { 2230 m_video_codec_context->hw_frames_ctx = wrap_av_buffer_ref(m_video_hw_frames); 2231 if (hwconfig->pix_fmt != AV_PIX_FMT_NONE) 2232 m_video_codec_context->pix_fmt = hwconfig->pix_fmt; 2233 } 2234 } 2235 } 2236 2237 if (!m_video_hw_context) 2238 { 2239 ERROR_LOG("Failed to create hardware encoder, using software encoding."); 2240 hwconfig = nullptr; 2241 } 2242 } 2243 2244 if (!video_codec_args.empty()) 2245 { 2246 res = wrap_av_dict_parse_string(&m_video_codec_arguments, SmallString(video_codec_args).c_str(), "=", ":", 0); 2247 if (res < 0) 2248 { 2249 SetAVError(error, "av_dict_parse_string() for video failed: ", res); 2250 return false; 2251 } 2252 } 2253 2254 if (output_format->flags & AVFMT_GLOBALHEADER) 2255 m_video_codec_context->flags |= AV_CODEC_FLAG_GLOBAL_HEADER; 2256 2257 bool has_pixel_format_override = wrap_av_dict_get(m_video_codec_arguments, "pixel_format", nullptr, 0); 2258 2259 res = wrap_avcodec_open2(m_video_codec_context, vcodec, &m_video_codec_arguments); 2260 if (res < 0) 2261 { 2262 SetAVError(error, "avcodec_open2() for video failed: ", res); 2263 return false; 2264 } 2265 2266 // If the user overrode the pixel format, get that now 2267 if (has_pixel_format_override) 2268 sw_pix_fmt = m_video_codec_context->pix_fmt; 2269 2270 m_converted_video_frame = wrap_av_frame_alloc(); 2271 m_hw_video_frame = IsUsingHardwareVideoEncoding() ? wrap_av_frame_alloc() : nullptr; 2272 if (!m_converted_video_frame || (IsUsingHardwareVideoEncoding() && !m_hw_video_frame)) 2273 { 2274 SetAVError(error, "Failed to allocate frame: ", AVERROR(ENOMEM)); 2275 return false; 2276 } 2277 2278 m_converted_video_frame->format = sw_pix_fmt; 2279 m_converted_video_frame->width = m_video_codec_context->width; 2280 m_converted_video_frame->height = m_video_codec_context->height; 2281 res = wrap_av_frame_get_buffer(m_converted_video_frame, 0); 2282 if (res < 0) 2283 { 2284 SetAVError(error, "av_frame_get_buffer() for converted frame failed: ", res); 2285 return false; 2286 } 2287 2288 if (IsUsingHardwareVideoEncoding()) 2289 { 2290 m_hw_video_frame->format = m_video_codec_context->pix_fmt; 2291 m_hw_video_frame->width = m_video_codec_context->width; 2292 m_hw_video_frame->height = m_video_codec_context->height; 2293 res = wrap_av_hwframe_get_buffer(m_video_hw_frames, m_hw_video_frame, 0); 2294 if (res < 0) 2295 { 2296 SetAVError(error, "av_frame_get_buffer() for HW frame failed: ", res); 2297 return false; 2298 } 2299 } 2300 2301 m_video_stream = wrap_avformat_new_stream(m_format_context, vcodec); 2302 if (!m_video_stream) 2303 { 2304 SetAVError(error, "avformat_new_stream() for video failed: ", res); 2305 return false; 2306 } 2307 2308 res = wrap_avcodec_parameters_from_context(m_video_stream->codecpar, m_video_codec_context); 2309 if (res < 0) 2310 { 2311 SetAVError(error, "avcodec_parameters_from_context() for video failed: ", AVERROR(ENOMEM)); 2312 return false; 2313 } 2314 2315 m_video_stream->time_base = m_video_codec_context->time_base; 2316 m_video_stream->sample_aspect_ratio = m_video_codec_context->sample_aspect_ratio; 2317 2318 m_video_packet = wrap_av_packet_alloc(); 2319 if (!m_video_packet) 2320 { 2321 SetAVError(error, "av_packet_alloc() for video failed: ", AVERROR(ENOMEM)); 2322 return false; 2323 } 2324 } 2325 2326 if (capture_audio) 2327 { 2328 const AVCodec* acodec = nullptr; 2329 if (!audio_codec.empty()) 2330 { 2331 acodec = wrap_avcodec_find_encoder_by_name(TinyString(audio_codec).c_str()); 2332 if (!acodec) 2333 { 2334 Error::SetStringFmt(error, "Audio codec {} not found.", video_codec); 2335 return false; 2336 } 2337 } 2338 if (!acodec) 2339 acodec = wrap_avcodec_find_encoder(output_format->audio_codec); 2340 if (!acodec) 2341 { 2342 Error::SetStringView(error, "Failed to find audio encoder."); 2343 return false; 2344 } 2345 2346 m_audio_codec_context = wrap_avcodec_alloc_context3(acodec); 2347 if (!m_audio_codec_context) 2348 { 2349 Error::SetStringView(error, "Failed to allocate audio codec context."); 2350 return false; 2351 } 2352 2353 m_audio_codec_context->codec_type = AVMEDIA_TYPE_AUDIO; 2354 m_audio_codec_context->bit_rate = audio_bitrate * 1000; 2355 m_audio_codec_context->sample_fmt = AV_SAMPLE_FMT_S16; 2356 m_audio_codec_context->sample_rate = sample_rate; 2357 m_audio_codec_context->time_base = {1, static_cast<int>(sample_rate)}; 2358 #if LIBAVUTIL_VERSION_MAJOR < 57 2359 m_audio_codec_context->channels = AUDIO_CHANNELS; 2360 m_audio_codec_context->channel_layout = AV_CH_LAYOUT_STEREO; 2361 #else 2362 wrap_av_channel_layout_default(&m_audio_codec_context->ch_layout, AUDIO_CHANNELS); 2363 #endif 2364 2365 bool supports_format = false; 2366 for (const AVSampleFormat* p = acodec->sample_fmts; *p != AV_SAMPLE_FMT_NONE; p++) 2367 { 2368 if (*p == m_audio_codec_context->sample_fmt) 2369 { 2370 supports_format = true; 2371 break; 2372 } 2373 } 2374 if (!supports_format) 2375 { 2376 WARNING_LOG("Audio codec '{}' does not support S16 samples, using default.", acodec->name); 2377 m_audio_codec_context->sample_fmt = acodec->sample_fmts[0]; 2378 m_swr_context = wrap_swr_alloc(); 2379 if (!m_swr_context) 2380 { 2381 SetAVError(error, "swr_alloc() failed: ", AVERROR(ENOMEM)); 2382 return false; 2383 } 2384 2385 wrap_av_opt_set_int(m_swr_context, "in_channel_count", AUDIO_CHANNELS, 0); 2386 wrap_av_opt_set_int(m_swr_context, "in_sample_rate", sample_rate, 0); 2387 wrap_av_opt_set_sample_fmt(m_swr_context, "in_sample_fmt", AV_SAMPLE_FMT_S16, 0); 2388 wrap_av_opt_set_int(m_swr_context, "out_channel_count", AUDIO_CHANNELS, 0); 2389 wrap_av_opt_set_int(m_swr_context, "out_sample_rate", sample_rate, 0); 2390 wrap_av_opt_set_sample_fmt(m_swr_context, "out_sample_fmt", m_audio_codec_context->sample_fmt, 0); 2391 2392 #if LIBAVUTIL_VERSION_MAJOR >= 59 2393 wrap_av_opt_set_chlayout(m_swr_context, "in_chlayout", &m_audio_codec_context->ch_layout, 0); 2394 wrap_av_opt_set_chlayout(m_swr_context, "out_chlayout", &m_audio_codec_context->ch_layout, 0); 2395 #endif 2396 2397 res = wrap_swr_init(m_swr_context); 2398 if (res < 0) 2399 { 2400 SetAVError(error, "swr_init() failed: ", res); 2401 return false; 2402 } 2403 } 2404 2405 // TODO: Check channel layout support 2406 2407 if (!audio_codec_args.empty()) 2408 { 2409 res = wrap_av_dict_parse_string(&m_audio_codec_arguments, SmallString(audio_codec_args).c_str(), "=", ":", 0); 2410 if (res < 0) 2411 { 2412 SetAVError(error, "av_dict_parse_string() for audio failed: ", res); 2413 return false; 2414 } 2415 } 2416 2417 if (output_format->flags & AVFMT_GLOBALHEADER) 2418 m_audio_codec_context->flags |= AV_CODEC_FLAG_GLOBAL_HEADER; 2419 2420 res = wrap_avcodec_open2(m_audio_codec_context, acodec, &m_audio_codec_arguments); 2421 if (res < 0) 2422 { 2423 SetAVError(error, "avcodec_open2() for audio failed: ", res); 2424 return false; 2425 } 2426 2427 // Use packet size for frame if it supports it... but most don't. 2428 if (acodec->capabilities & AV_CODEC_CAP_VARIABLE_FRAME_SIZE) 2429 m_audio_frame_size = static_cast<u32>(static_cast<float>(sample_rate) / fps); 2430 else 2431 m_audio_frame_size = m_audio_codec_context->frame_size; 2432 if (m_audio_frame_size >= m_audio_buffer.size()) 2433 { 2434 SetAVError(error, 2435 TinyString::from_format("Audio frame size {} exceeds buffer size {}", m_audio_frame_size, 2436 m_audio_buffer.size()), 2437 AVERROR(EINVAL)); 2438 return false; 2439 } 2440 2441 m_audio_frame_bps = wrap_av_get_bytes_per_sample(m_audio_codec_context->sample_fmt); 2442 m_audio_frame_planar = (wrap_av_sample_fmt_is_planar(m_audio_codec_context->sample_fmt) != 0); 2443 2444 m_converted_audio_frame = wrap_av_frame_alloc(); 2445 if (!m_converted_audio_frame) 2446 { 2447 SetAVError(error, "Failed to allocate audio frame: ", AVERROR(ENOMEM)); 2448 return false; 2449 } 2450 2451 m_converted_audio_frame->format = m_audio_codec_context->sample_fmt; 2452 m_converted_audio_frame->nb_samples = m_audio_frame_size; 2453 #if LIBAVUTIL_VERSION_MAJOR < 57 2454 m_converted_audio_frame->channels = AUDIO_CHANNELS; 2455 m_converted_audio_frame->channel_layout = m_audio_codec_context->channel_layout; 2456 #else 2457 wrap_av_channel_layout_copy(&m_converted_audio_frame->ch_layout, &m_audio_codec_context->ch_layout); 2458 #endif 2459 res = wrap_av_frame_get_buffer(m_converted_audio_frame, 0); 2460 if (res < 0) 2461 { 2462 SetAVError(error, "av_frame_get_buffer() for audio frame failed: ", res); 2463 return false; 2464 } 2465 2466 m_audio_stream = wrap_avformat_new_stream(m_format_context, acodec); 2467 if (!m_audio_stream) 2468 { 2469 SetAVError(error, "avformat_new_stream() for audio failed: ", AVERROR(ENOMEM)); 2470 return false; 2471 } 2472 2473 res = wrap_avcodec_parameters_from_context(m_audio_stream->codecpar, m_audio_codec_context); 2474 if (res < 0) 2475 { 2476 SetAVError(error, "avcodec_parameters_from_context() for audio failed: ", res); 2477 return false; 2478 } 2479 2480 m_audio_stream->time_base = m_audio_codec_context->time_base; 2481 2482 m_audio_packet = wrap_av_packet_alloc(); 2483 if (!m_audio_packet) 2484 { 2485 SetAVError(error, "av_packet_alloc() for audio failed: ", AVERROR(ENOMEM)); 2486 return false; 2487 } 2488 } 2489 2490 res = wrap_avio_open(&m_format_context->pb, m_path.c_str(), AVIO_FLAG_WRITE); 2491 if (res < 0) 2492 { 2493 SetAVError(error, "avio_open() failed: ", res); 2494 return false; 2495 } 2496 2497 res = wrap_avformat_write_header(m_format_context, nullptr); 2498 if (res < 0) 2499 { 2500 SetAVError(error, "avformat_write_header() failed: ", res); 2501 return false; 2502 } 2503 2504 return true; 2505 } 2506 2507 bool MediaCaptureFFmpeg::InternalEndCapture(std::unique_lock<std::mutex>& lock, Error* error) 2508 { 2509 int res = MediaCaptureBase::InternalEndCapture(lock, error) ? 0 : -1; 2510 if (res == 0) 2511 { 2512 // end of stream 2513 if (m_video_stream) 2514 { 2515 res = wrap_avcodec_send_frame(m_video_codec_context, nullptr); 2516 if (res < 0) 2517 SetAVError(error, "avcodec_send_frame() for video EOS failed: ", res); 2518 else 2519 res = ReceivePackets(m_video_codec_context, m_video_stream, m_video_packet, error) ? 0 : -1; 2520 } 2521 if (m_audio_stream) 2522 { 2523 res = wrap_avcodec_send_frame(m_audio_codec_context, nullptr); 2524 if (res < 0) 2525 SetAVError(error, "avcodec_send_frame() for audio EOS failed: ", res); 2526 else 2527 res = ReceivePackets(m_audio_codec_context, m_audio_stream, m_audio_packet, error) ? 0 : -1; 2528 } 2529 2530 // end of file! 2531 if (res == 0) 2532 { 2533 res = wrap_av_write_trailer(m_format_context); 2534 if (res < 0) 2535 SetAVError(error, "av_write_trailer() failed: ", res); 2536 } 2537 } 2538 2539 return (res == 0); 2540 } 2541 2542 void MediaCaptureFFmpeg::ClearState() 2543 { 2544 if (m_format_context) 2545 { 2546 int res = wrap_avio_closep(&m_format_context->pb); 2547 if (res < 0) [[unlikely]] 2548 { 2549 Error close_error; 2550 SetAVError(&close_error, "avio_closep() failed: ", res); 2551 ERROR_LOG(close_error.GetDescription()); 2552 } 2553 } 2554 2555 if (m_sws_context) 2556 { 2557 wrap_sws_freeContext(m_sws_context); 2558 m_sws_context = nullptr; 2559 } 2560 if (m_video_packet) 2561 wrap_av_packet_free(&m_video_packet); 2562 if (m_converted_video_frame) 2563 wrap_av_frame_free(&m_converted_video_frame); 2564 if (m_hw_video_frame) 2565 wrap_av_frame_free(&m_hw_video_frame); 2566 if (m_video_hw_frames) 2567 wrap_av_buffer_unref(&m_video_hw_frames); 2568 if (m_video_hw_context) 2569 wrap_av_buffer_unref(&m_video_hw_context); 2570 if (m_video_codec_context) 2571 wrap_avcodec_free_context(&m_video_codec_context); 2572 m_video_stream = nullptr; 2573 2574 if (m_swr_context) 2575 wrap_swr_free(&m_swr_context); 2576 if (m_audio_packet) 2577 wrap_av_packet_free(&m_audio_packet); 2578 if (m_converted_audio_frame) 2579 wrap_av_frame_free(&m_converted_audio_frame); 2580 if (m_audio_codec_context) 2581 wrap_avcodec_free_context(&m_audio_codec_context); 2582 m_audio_stream = nullptr; 2583 2584 if (m_format_context) 2585 { 2586 wrap_avformat_free_context(m_format_context); 2587 m_format_context = nullptr; 2588 } 2589 if (m_video_codec_arguments) 2590 wrap_av_dict_free(&m_video_codec_arguments); 2591 if (m_audio_codec_arguments) 2592 wrap_av_dict_free(&m_audio_codec_arguments); 2593 } 2594 2595 bool MediaCaptureFFmpeg::ReceivePackets(AVCodecContext* codec_context, AVStream* stream, AVPacket* packet, Error* error) 2596 { 2597 for (;;) 2598 { 2599 int res = wrap_avcodec_receive_packet(codec_context, packet); 2600 if (res == AVERROR(EAGAIN) || res == AVERROR_EOF) 2601 { 2602 // no more data available 2603 break; 2604 } 2605 else if (res < 0) [[unlikely]] 2606 { 2607 SetAVError(error, "avcodec_receive_packet() failed: ", res); 2608 return false; 2609 } 2610 2611 packet->stream_index = stream->index; 2612 2613 // in case the frame rate changed... 2614 wrap_av_packet_rescale_ts(packet, codec_context->time_base, stream->time_base); 2615 2616 res = wrap_av_interleaved_write_frame(m_format_context, packet); 2617 if (res < 0) [[unlikely]] 2618 { 2619 SetAVError(error, "av_interleaved_write_frame() failed: ", res); 2620 return false; 2621 } 2622 2623 wrap_av_packet_unref(packet); 2624 } 2625 2626 return true; 2627 } 2628 2629 bool MediaCaptureFFmpeg::SendFrame(const PendingFrame& pf, Error* error) 2630 { 2631 const u8* source_ptr = pf.tex->GetMapPointer(); 2632 const int source_width = static_cast<int>(pf.tex->GetWidth()); 2633 const int source_height = static_cast<int>(pf.tex->GetHeight()); 2634 2635 // OpenGL lower-left flip. 2636 int source_pitch = static_cast<int>(pf.tex->GetMapPitch()); 2637 if (g_gpu_device->UsesLowerLeftOrigin()) 2638 { 2639 source_ptr = source_ptr + static_cast<size_t>(source_pitch) * static_cast<u32>(source_height - 1); 2640 source_pitch = -source_pitch; 2641 } 2642 2643 // In case a previous frame is still using the frame. 2644 wrap_av_frame_make_writable(m_converted_video_frame); 2645 2646 m_sws_context = wrap_sws_getCachedContext(m_sws_context, source_width, source_height, m_video_pixel_format, 2647 m_converted_video_frame->width, m_converted_video_frame->height, 2648 static_cast<AVPixelFormat>(m_converted_video_frame->format), SWS_BICUBIC, 2649 nullptr, nullptr, nullptr); 2650 if (!m_sws_context) [[unlikely]] 2651 { 2652 Error::SetStringView(error, "sws_getCachedContext() failed"); 2653 return false; 2654 } 2655 2656 wrap_sws_scale(m_sws_context, reinterpret_cast<const u8**>(&source_ptr), &source_pitch, 0, source_height, 2657 m_converted_video_frame->data, m_converted_video_frame->linesize); 2658 2659 AVFrame* frame_to_send = m_converted_video_frame; 2660 if (IsUsingHardwareVideoEncoding()) 2661 { 2662 // Need to transfer the frame to hardware. 2663 const int res = wrap_av_hwframe_transfer_data(m_hw_video_frame, m_converted_video_frame, 0); 2664 if (res < 0) [[unlikely]] 2665 { 2666 SetAVError(error, "av_hwframe_transfer_data() failed: ", res); 2667 return false; 2668 } 2669 2670 frame_to_send = m_hw_video_frame; 2671 } 2672 2673 // Set the correct PTS before handing it off. 2674 frame_to_send->pts = pf.pts; 2675 2676 const int res = wrap_avcodec_send_frame(m_video_codec_context, frame_to_send); 2677 if (res < 0) [[unlikely]] 2678 { 2679 SetAVError(error, "avcodec_send_frame() failed: ", res); 2680 return false; 2681 } 2682 2683 return ReceivePackets(m_video_codec_context, m_video_stream, m_video_packet, error); 2684 } 2685 2686 bool MediaCaptureFFmpeg::ProcessAudioPackets(s64 video_pts, Error* error) 2687 { 2688 const u32 max_audio_buffer_size = GetAudioBufferSizeInFrames(); 2689 2690 u32 pending_frames = m_audio_buffer_size.load(std::memory_order_acquire); 2691 while (pending_frames > 0 && 2692 (!m_video_codec_context || wrap_av_compare_ts(video_pts, m_video_codec_context->time_base, m_next_audio_pts, 2693 m_audio_codec_context->time_base) > 0)) 2694 { 2695 // In case the encoder is still using it. 2696 if (m_audio_frame_pos == 0) 2697 wrap_av_frame_make_writable(m_converted_audio_frame); 2698 2699 // Grab as many source frames as we can. 2700 const u32 contig_frames = std::min(pending_frames, max_audio_buffer_size - m_audio_buffer_read_pos); 2701 const u32 this_batch = std::min(m_audio_frame_size - m_audio_frame_pos, contig_frames); 2702 2703 // Do we need to convert the sample format? 2704 if (!m_swr_context) 2705 { 2706 // No, just copy frames out of staging buffer. 2707 if (m_audio_frame_planar) 2708 { 2709 // This is slow. Hopefully doesn't happen in too many configurations. 2710 for (u32 i = 0; i < AUDIO_CHANNELS; i++) 2711 { 2712 u8* output = m_converted_audio_frame->data[i] + m_audio_frame_pos * m_audio_frame_bps; 2713 const u8* input = reinterpret_cast<u8*>(&m_audio_buffer[m_audio_buffer_read_pos * AUDIO_CHANNELS + i]); 2714 for (u32 j = 0; j < this_batch; j++) 2715 { 2716 std::memcpy(output, input, sizeof(s16)); 2717 input += sizeof(s16) * AUDIO_CHANNELS; 2718 output += m_audio_frame_bps; 2719 } 2720 } 2721 } 2722 else 2723 { 2724 // Direct copy - optimal. 2725 std::memcpy(m_converted_audio_frame->data[0] + m_audio_frame_pos * m_audio_frame_bps * AUDIO_CHANNELS, 2726 &m_audio_buffer[m_audio_buffer_read_pos * AUDIO_CHANNELS], 2727 this_batch * sizeof(s16) * AUDIO_CHANNELS); 2728 } 2729 } 2730 else 2731 { 2732 // Use swresample to convert. 2733 const u8* input = reinterpret_cast<u8*>(&m_audio_buffer[m_audio_buffer_read_pos * AUDIO_CHANNELS]); 2734 2735 // Might be planar, so offset both buffers. 2736 u8* output[AUDIO_CHANNELS]; 2737 if (m_audio_frame_planar) 2738 { 2739 for (u32 i = 0; i < AUDIO_CHANNELS; i++) 2740 output[i] = m_converted_audio_frame->data[i] + (m_audio_frame_pos * m_audio_frame_bps); 2741 } 2742 else 2743 { 2744 output[0] = m_converted_audio_frame->data[0] + (m_audio_frame_pos * m_audio_frame_bps * AUDIO_CHANNELS); 2745 } 2746 2747 const int res = wrap_swr_convert(m_swr_context, output, this_batch, &input, this_batch); 2748 if (res < 0) 2749 { 2750 SetAVError(error, "swr_convert() failed: ", res); 2751 return false; 2752 } 2753 } 2754 2755 m_audio_buffer_read_pos = (m_audio_buffer_read_pos + this_batch) % max_audio_buffer_size; 2756 m_audio_buffer_size.fetch_sub(this_batch); 2757 m_audio_frame_pos += this_batch; 2758 pending_frames -= this_batch; 2759 2760 // Do we have a complete frame? 2761 if (m_audio_frame_pos == m_audio_frame_size) 2762 { 2763 m_audio_frame_pos = 0; 2764 2765 if (!m_swr_context) 2766 { 2767 // PTS is simply frames. 2768 m_converted_audio_frame->pts = m_next_audio_pts; 2769 } 2770 else 2771 { 2772 m_converted_audio_frame->pts = wrap_swr_next_pts(m_swr_context, m_next_audio_pts); 2773 } 2774 2775 // Increment PTS. 2776 m_next_audio_pts += m_audio_frame_size; 2777 2778 // Send off for encoding. 2779 int res = wrap_avcodec_send_frame(m_audio_codec_context, m_converted_audio_frame); 2780 if (res < 0) [[unlikely]] 2781 { 2782 SetAVError(error, "avcodec_send_frame() for audio failed: ", res); 2783 return false; 2784 } 2785 2786 // Write any packets back to the output file. 2787 if (!ReceivePackets(m_audio_codec_context, m_audio_stream, m_audio_packet, error)) [[unlikely]] 2788 return false; 2789 } 2790 } 2791 2792 return true; 2793 } 2794 2795 std::unique_ptr<MediaCapture> MediaCaptureFFmpeg::Create(Error* error) 2796 { 2797 if (!LoadFFmpeg(error)) 2798 return nullptr; 2799 2800 return std::make_unique<MediaCaptureFFmpeg>(); 2801 } 2802 2803 MediaCapture::ContainerList MediaCaptureFFmpeg::GetContainerList() 2804 { 2805 return { 2806 {"avi", "Audio Video Interleave"}, {"mp4", "MPEG-4 Part 14"}, {"mkv", "Matroska Media Container"}, 2807 {"mov", "QuickTime File Format"}, {"mp3", "MPEG-2 Audio Layer III"}, {"wav", "Waveform Audio File Format"}, 2808 }; 2809 } 2810 2811 MediaCaptureBase::CodecList MediaCaptureFFmpeg::GetCodecListForContainer(const char* container, AVMediaType type) 2812 { 2813 CodecList ret; 2814 2815 Error error; 2816 if (!LoadFFmpeg(&error)) 2817 { 2818 ERROR_LOG("FFmpeg load failed: {}", error.GetDescription()); 2819 return ret; 2820 } 2821 2822 const AVOutputFormat* output_format = 2823 wrap_av_guess_format(nullptr, fmt::format("video.{}", container ? container : "mp4").c_str(), nullptr); 2824 if (!output_format) 2825 { 2826 ERROR_LOG("av_guess_format() failed"); 2827 return ret; 2828 } 2829 2830 void* iter = nullptr; 2831 const AVCodec* codec; 2832 while ((codec = wrap_av_codec_iterate(&iter)) != nullptr) 2833 { 2834 // only get audio codecs 2835 if (codec->type != type || !wrap_avcodec_find_encoder(codec->id) || !wrap_avcodec_find_encoder_by_name(codec->name)) 2836 continue; 2837 2838 if (!wrap_avformat_query_codec(output_format, codec->id, FF_COMPLIANCE_NORMAL)) 2839 continue; 2840 2841 if (std::find_if(ret.begin(), ret.end(), [codec](const auto& it) { return it.first == codec->name; }) != ret.end()) 2842 continue; 2843 2844 ret.emplace_back(codec->name, codec->long_name ? codec->long_name : codec->name); 2845 } 2846 2847 return ret; 2848 } 2849 2850 MediaCapture::CodecList MediaCaptureFFmpeg::GetVideoCodecList(const char* container) 2851 { 2852 return GetCodecListForContainer(container, AVMEDIA_TYPE_VIDEO); 2853 } 2854 2855 MediaCapture::CodecList MediaCaptureFFmpeg::GetAudioCodecList(const char* container) 2856 { 2857 return GetCodecListForContainer(container, AVMEDIA_TYPE_AUDIO); 2858 } 2859 2860 #endif 2861 2862 } // namespace 2863 2864 static constexpr const std::array s_backend_names = { 2865 #ifdef _WIN32 2866 "MediaFoundation", 2867 #endif 2868 #ifndef __ANDROID__ 2869 "FFmpeg", 2870 #endif 2871 }; 2872 static constexpr const std::array s_backend_display_names = { 2873 #ifdef _WIN32 2874 TRANSLATE_NOOP("MediaCapture", "Media Foundation"), 2875 #endif 2876 #ifndef __ANDROID__ 2877 TRANSLATE_NOOP("MediaCapture", "FFmpeg"), 2878 #endif 2879 }; 2880 static_assert(s_backend_names.size() == static_cast<size_t>(MediaCaptureBackend::MaxCount)); 2881 static_assert(s_backend_display_names.size() == static_cast<size_t>(MediaCaptureBackend::MaxCount)); 2882 2883 MediaCapture::~MediaCapture() = default; 2884 2885 std::optional<MediaCaptureBackend> MediaCapture::ParseBackendName(const char* str) 2886 { 2887 int index = 0; 2888 for (const char* name : s_backend_names) 2889 { 2890 if (std::strcmp(name, str) == 0) 2891 return static_cast<MediaCaptureBackend>(index); 2892 2893 index++; 2894 } 2895 2896 return std::nullopt; 2897 } 2898 2899 const char* MediaCapture::GetBackendName(MediaCaptureBackend backend) 2900 { 2901 return s_backend_names[static_cast<size_t>(backend)]; 2902 } 2903 2904 const char* MediaCapture::GetBackendDisplayName(MediaCaptureBackend backend) 2905 { 2906 return Host::TranslateToCString("MediaCapture", s_backend_display_names[static_cast<size_t>(backend)]); 2907 } 2908 2909 void MediaCapture::AdjustVideoSize(u32* width, u32* height) 2910 { 2911 *width = Common::AlignUpPow2(*width, VIDEO_WIDTH_ALIGNMENT); 2912 *height = Common::AlignUpPow2(*height, VIDEO_HEIGHT_ALIGNMENT); 2913 } 2914 2915 MediaCapture::ContainerList MediaCapture::GetContainerList(MediaCaptureBackend backend) 2916 { 2917 ContainerList ret; 2918 switch (backend) 2919 { 2920 #ifdef _WIN32 2921 case MediaCaptureBackend::MediaFoundation: 2922 ret = MediaCaptureMF::GetContainerList(); 2923 break; 2924 #endif 2925 #ifndef __ANDROID__ 2926 case MediaCaptureBackend::FFmpeg: 2927 ret = MediaCaptureFFmpeg::GetContainerList(); 2928 break; 2929 #endif 2930 default: 2931 break; 2932 } 2933 return ret; 2934 } 2935 2936 MediaCapture::CodecList MediaCapture::GetVideoCodecList(MediaCaptureBackend backend, const char* container) 2937 { 2938 CodecList ret; 2939 switch (backend) 2940 { 2941 #ifdef _WIN32 2942 case MediaCaptureBackend::MediaFoundation: 2943 ret = MediaCaptureMF::GetVideoCodecList(container); 2944 break; 2945 #endif 2946 #ifndef __ANDROID__ 2947 case MediaCaptureBackend::FFmpeg: 2948 ret = MediaCaptureFFmpeg::GetVideoCodecList(container); 2949 break; 2950 #endif 2951 default: 2952 break; 2953 } 2954 return ret; 2955 } 2956 2957 MediaCapture::CodecList MediaCapture::GetAudioCodecList(MediaCaptureBackend backend, const char* container) 2958 { 2959 CodecList ret; 2960 switch (backend) 2961 { 2962 #ifdef _WIN32 2963 case MediaCaptureBackend::MediaFoundation: 2964 ret = MediaCaptureMF::GetAudioCodecList(container); 2965 break; 2966 #endif 2967 #ifndef __ANDROID__ 2968 case MediaCaptureBackend::FFmpeg: 2969 ret = MediaCaptureFFmpeg::GetAudioCodecList(container); 2970 break; 2971 #endif 2972 default: 2973 break; 2974 } 2975 return ret; 2976 } 2977 2978 std::unique_ptr<MediaCapture> MediaCapture::Create(MediaCaptureBackend backend, Error* error) 2979 { 2980 switch (backend) 2981 { 2982 #ifdef _WIN32 2983 case MediaCaptureBackend::MediaFoundation: 2984 return MediaCaptureMF::Create(error); 2985 #endif 2986 #ifndef __ANDROID__ 2987 case MediaCaptureBackend::FFmpeg: 2988 return MediaCaptureFFmpeg::Create(error); 2989 #endif 2990 default: 2991 return nullptr; 2992 } 2993 }