freesurround_decoder.h (6998B)
1 // SPDX-FileCopyrightText: 2007-2010 Christian Kothe, 2024 Connor McLaughlin <stenzek@gmail.com> 2 // SPDX-License-Identifier: GPL-2.0+ 3 4 #pragma once 5 6 #include "kiss_fftr.h" 7 8 #include <array> 9 #include <cmath> 10 #include <complex> 11 #include <span> 12 #include <vector> 13 14 /** 15 * The FreeSurround decoder. 16 */ 17 class FreeSurroundDecoder 18 { 19 public: 20 /** 21 * The supported output channel setups. 22 * A channel setup is defined by the set of channels that are present. Here is a graphic 23 * of the cs_5point1 setup: http://en.wikipedia.org/wiki/File:5_1_channels_(surround_sound)_label.svg 24 */ 25 enum class ChannelSetup 26 { 27 Stereo, 28 Surround41, 29 Surround51, 30 Surround71, 31 Legacy, // same channels as cs_5point1 but different upmixing transform; does not support the focus control 32 MaxCount 33 }; 34 35 static constexpr int grid_res = 21; // resolution of the lookup grid 36 using LUT = const float (*)[grid_res]; 37 38 /** 39 * Create an instance of the decoder. 40 * @param setup The output channel setup -- determines the number of output channels 41 * and their place in the sound field. 42 * @param blocksize Granularity at which data is processed by the decode() function. 43 * Must be a power of two and should correspond to ca. 10ms worth of single-channel 44 * samples (default is 4096 for 44.1Khz data). Do not make it shorter or longer 45 * than 5ms to 20ms since the granularity at which locations are decoded 46 * changes with this. 47 */ 48 FreeSurroundDecoder(ChannelSetup setup = ChannelSetup::Surround51, unsigned blocksize = 4096); 49 ~FreeSurroundDecoder(); 50 51 /** 52 * Decode a chunk of stereo sound. The output is delayed by half of the blocksize. 53 * This function is the only one needed for straightforward decoding. 54 * @param input Contains exactly blocksize (multiplexed) stereo samples, i.e. 2*blocksize numbers. 55 * @return A pointer to an internal buffer of exactly blocksize (multiplexed) multichannel samples. 56 * The actual number of values depends on the number of output channels in the chosen 57 * channel setup. 58 */ 59 float* Decode(float* input); 60 61 /** 62 * Flush the internal buffer. 63 */ 64 void Flush(); 65 66 // --- soundfield transformations 67 // These functions allow to set up geometric transformations of the sound field after it has been decoded. 68 // The sound field is best pictured as a 2-dimensional square with the listener in its 69 // center which can be shifted or stretched in various ways before it is sent to the 70 // speakers. The order in which these transformations are applied is as listed below. 71 72 /** 73 * Allows to wrap the soundfield around the listener in a circular manner. 74 * Determines the angle of the frontal sound stage relative to the listener, in degrees. 75 * A setting of 90° corresponds to standard surround decoding, 180° stretches the front stage from 76 * ear to ear, 270° wraps it around most of the head. The side and rear content of the sound 77 * field is compressed accordingly behind the listerer. (default: 90, range: [0°..360°]) 78 */ 79 void SetCircularWrap(float v); 80 81 /** 82 * Allows to shift the soundfield forward or backward. 83 * Value range: [-1.0..+1.0]. 0 is no offset, positive values move the sound 84 * forward, negative values move it backwards. (default: 0) 85 */ 86 void SetShift(float v); 87 88 /** 89 * Allows to scale the soundfield backwards. 90 * Value range: [0.0..+5.0] -- 0 is all compressed to the front, 1 is no change, 5 is scaled 5x backwards (default: 1) 91 */ 92 void SetDepth(float v); 93 94 /** 95 * Allows to control the localization (i.e., focality) of sources. 96 * Value range: [-1.0..+1.0] -- 0 means unchanged, positive means more localized, negative means more ambient 97 * (default: 0) 98 */ 99 void SetFocus(float v); 100 101 // --- rendering parameters 102 // These parameters control how the sound field is mapped onto speakers. 103 104 /** 105 * Set the presence of the front center channel(s). 106 * Value range: [0.0..1.0] -- fully present at 1.0, fully replaced by left/right at 0.0 (default: 1). 107 * The default of 1.0 results in spec-conformant decoding ("movie mode") while a value of 0.7 is 108 * better suited for music reproduction (which is usually mixed without a center channel). 109 */ 110 void SetCenterImage(float v); 111 112 /** 113 * Set the front stereo separation. 114 * Value range: [0.0..inf] -- 1.0 is default, 0.0 is mono. 115 */ 116 void SetFrontSeparation(float v); 117 118 /** 119 * Set the rear stereo separation. 120 * Value range: [0.0..inf] -- 1.0 is default, 0.0 is mono. 121 */ 122 void SetRearSeparation(float v); 123 124 // --- bass redirection (to LFE) 125 126 /** 127 * Enable/disable LFE channel (default: false = disabled) 128 */ 129 void SetBassRedirection(bool v); 130 131 /** 132 * Set the lower end of the transition band, in Hz/Nyquist (default: 40/22050). 133 */ 134 void SetLowCutoff(float v); 135 136 /** 137 * Set the upper end of the transition band, in Hz/Nyquist (default: 90/22050). 138 */ 139 void SetHighCutoff(float v); 140 141 // --- info 142 143 /** 144 * Number of samples currently held in the buffer. 145 */ 146 unsigned GetSamplesBuffered(); 147 148 private: 149 using cplx = std::complex<double>; 150 151 struct ChannelMap 152 { 153 std::span<const LUT> luts; 154 const float* xsf; 155 }; 156 157 static const std::array<ChannelMap, static_cast<size_t>(ChannelSetup::MaxCount)> s_channel_maps; 158 159 void BufferedDecode(float* input); 160 161 // get the index (and fractional offset!) in a piecewise-linear channel allocation grid 162 static int MapToGrid(double& x); 163 164 // constants 165 const ChannelMap& cmap; // the channel setup 166 unsigned N, C; // number of samples per input/output block, number of output channels 167 168 // parameters 169 float circular_wrap; // angle of the front soundstage around the listener (90°=default) 170 float shift; // forward/backward offset of the soundstage 171 float depth; // backward extension of the soundstage 172 float focus; // localization of the sound events 173 float center_image; // presence of the center speaker 174 float front_separation; // front stereo separation 175 float rear_separation; // rear stereo separation 176 float lo_cut, hi_cut; // LFE cutoff frequencies 177 bool use_lfe; // whether to use the LFE channel 178 179 // FFT data structures 180 std::vector<double> lt, rt, dst; // left total, right total (source arrays), time-domain destination buffer array 181 std::vector<cplx> lf, rf; // left total / right total in frequency domain 182 kiss_fftr_cfg forward = nullptr; 183 kiss_fftr_cfg inverse = nullptr; // FFT buffers 184 185 // buffers 186 bool buffer_empty = true; // whether the buffer is currently empty or dirty 187 std::vector<float> inbuf; // stereo input buffer (multiplexed) 188 std::vector<float> outbuf; // multichannel output buffer (multiplexed) 189 std::vector<double> wnd; // the window function, precomputed 190 std::vector<std::vector<cplx>> signal; // the signal to be constructed in every channel, in the frequency domain 191 };