surroundize/pw_decoder.cpp

#include "pw_decoder.hpp"

#include <algorithm>
#include <cmath>
#include <cstring>

// Based on PipeWire
/* SPDX-FileCopyrightText: Copyright © 2018 Wim Taymans */
/* SPDX-License-Identifier: MIT */

template <std::size_t NConseq = 1>
static void Vol(float* dst, std::size_t dst_stride,
                const float* src, std::size_t src_stride, std::size_t n_samples,
                float vol)
{
  if (vol == 0.f)
    while (n_samples--)
    {
      for (std::size_t i = 0; i < NConseq; ++i)
        dst[i] = 0.f;
      dst += dst_stride;
    }
  else if (vol == 1.f)
    while (n_samples--)
    {
      for (std::size_t i = 0; i < NConseq; ++i)
        dst[i] = src[i];
      dst += dst_stride;
      src += src_stride;
    }
  else
    while (n_samples--)
    {
      for (std::size_t i = 0; i < NConseq; ++i)
        dst[i] = src[i] * vol;
      dst += dst_stride;
      src += src_stride;
    }
}

static void Avg(float* dst, std::size_t dst_stride, std::span<const float> src)
{
  auto s = src.data();
  for (auto n = src.size(); n; n -= 2)
  {
    *dst = (s[0] + s[1]) * .5f;
    dst += dst_stride;
    s += 2;
  }
}

static void Sub(float* dst, std::size_t dst_stride, std::span<const float> src)
{
  auto s = src.data();
  for (auto n = src.size(); n; n -= 2)
  {
    *dst = s[0] - s[1];
    dst += dst_stride;
    s += 2;
  }
}


static void lr4_process_c(
  struct lr4 *lr4, float *dst, std::size_t dst_stride,
  const float *src, std::size_t src_stride, const float vol, std::size_t samples)
{
  float x1 = lr4->x1;
  float x2 = lr4->x2;
  float y1 = lr4->y1;
  float y2 = lr4->y2;
  float b0 = lr4->bq.b0;
  float b1 = lr4->bq.b1;
  float b2 = lr4->bq.b2;
  float a1 = lr4->bq.a1;
  float a2 = lr4->bq.a2;
  float x, y, z;

  if (vol == 0.0f || !lr4->active) {
    Vol(dst, dst_stride, src, src_stride, samples, vol);
    return;
  }

  for (std::size_t i = 0; i < samples; i++) {
    x  = *src;
    y  = b0 * x          + x1;
    x1 = b1 * x - a1 * y + x2;
    x2 = b2 * x - a2 * y;
    z  = b0 * y          + y1;
    y1 = b1 * y - a1 * z + y2;
    y2 = b2 * y - a2 * z;
    *dst = z * vol;

    dst += dst_stride;
    src += src_stride;
  }
#define F(x) (std::isnormal(x) ? (x) : 0.0f)
  lr4->x1 = F(x1);
  lr4->x2 = F(x2);
  lr4->y1 = F(y1);
  lr4->y2 = F(y2);
#undef F
}

static void set_coefficient(struct biquad *bq, double b0, double b1, double b2,
                            double a0, double a1, double a2)
{
  double a0_inv = 1 / a0;
  bq->b0 = (float)(b0 * a0_inv);
  bq->b1 = (float)(b1 * a0_inv);
  bq->b2 = (float)(b2 * a0_inv);
  bq->a1 = (float)(a1 * a0_inv);
  bq->a2 = (float)(a2 * a0_inv);
}

/* Q = 1 / sqrt(2), also resulting Q value when S = 1 */
#define BIQUAD_DEFAULT_Q 0.707106781186548

static void biquad_set_lowpass(struct biquad *bq, double cutoff, double Q)
{
  /* Clear history values. */
  bq->x1 = 0;
  bq->x2 = 0;

  /* Limit cutoff to 0 to 1. */
  cutoff = fmax(0.0, fmin(cutoff, 1.0));

  if (cutoff == 1 || cutoff == 0) {
    /* When cutoff is 1, the z-transform is 1.
     * When cutoff is zero, nothing gets through the filter, so set
     * coefficients up correctly.
     */
    set_coefficient(bq, cutoff, 0, 0, 1, 0, 0);
    return;
  }

  /* Set Q to a sane default value if not set */
  if (Q <= 0)
    Q = BIQUAD_DEFAULT_Q;

  /* Compute biquad coefficients for lowpass filter */
  /* H(s) = 1 / (s^2 + s/Q + 1) */
  double w0 = M_PI * cutoff;
  double alpha = sin(w0) / (2 * Q);
  double k = cos(w0);

  double b0 = (1 - k) / 2;
  double b1 = 1 - k;
  double b2 = (1 - k) / 2;
  double a0 = 1 + alpha;
  double a1 = -2 * k;
  double a2 = 1 - alpha;

  set_coefficient(bq, b0, b1, b2, a0, a1, a2);
}

static void biquad_set_none(biquad* bq)
{
  bq->x1 = 0;
  bq->x2 = 0;
  set_coefficient(bq, 1, 0, 0, 1, 0, 0);
}

static void lr4_set(struct lr4 *lr4, bool lowpass, float freq)
{
  lowpass ? biquad_set_lowpass(&lr4->bq, freq, 0) : biquad_set_none(&lr4->bq);
  lr4->x1 = 0;
  lr4->x2 = 0;
  lr4->y1 = 0;
  lr4->y2 = 0;
  lr4->active = lowpass;
}

void PWDecoder::DelayConvolveRun(
  std::span<float> buffer, uint32_t *pos,
  float *dst, std::size_t dst_stride,
  const float *src, std::size_t src_stride, const float vol,
  std::size_t n_samples)
{
  uint32_t w = *pos;
  auto n_buffer = uint32_t(buffer.size() / 2);
  auto o = n_buffer - delay - taps.size()-1;

  if (taps.size() == 1) {
    for (std::size_t i = 0; i < n_samples; i++) {
      buffer[w] = buffer[w + n_buffer] = *src;
      *dst = buffer[w + o] * vol;
      w = w + 1 >= n_buffer ? 0 : w + 1;

      dst += dst_stride;
      src += src_stride;
    }
  } else {
    for (std::size_t i = 0; i < n_samples; i++) {
      float sum = 0.0f;

      buffer[w] = buffer[w + n_buffer] = *src;
      for (std::size_t j = 0; j < taps.size(); j++)
        sum += taps[j] * buffer[w+o+j];
      *dst = sum * vol;

      w = w + 1 >= n_buffer ? 0 : w + 1;
      dst += dst_stride;
      src += src_stride;
    }
  }
  *pos = w;
}

static inline void blackman_window(std::span<float> taps)
{
  for (size_t n = 0; n < taps.size(); n++) {
    float w = 2.0f * float(M_PI) * float(n) / float(taps.size()-1);
    taps[n] = 0.3635819f - 0.4891775f * cosf(w)
      + 0.1365995f * cosf(2 * w) - 0.0106411f * cosf(3 * w);
  }
}

static inline void hilbert_generate(std::span<float> taps)
{
  for (std::size_t i = 0; i < taps.size(); i++) {
    int k = -int(taps.size() / 2) + int(i);
    if (k & 1) {
      float pk = (float)M_PI * k;
      taps[i] *= (1.0f - cosf(pk)) / pk;
    } else {
      taps[i] = 0.0f;
    }
  }
}

static inline void reverse_taps(std::span<float> taps)
{
  for (size_t i = 0; i < taps.size()/2; i++)
    std::swap(taps[i], taps[taps.size()-1-i]);
}


static constexpr float SQRT1_2 = 0.707106781f;

void PWDecoder::Mix4(std::span<const float> src)
{
  auto n_samples = src.size() / 2;
  buf.resize(n_samples * 4);
  Vol<2>(buf.data(), 4, src.data(), 2, n_samples, 1.f);

  if (!psd) {
    Vol<2>(buf.data() + 2, 4, src.data(), 2, n_samples, SQRT1_2);
  } else {
    Sub(buf.data() + 2, 4, src);

    DelayConvolveRun(buffer[1], &pos[1], buf.data() + 3, 4, buf.data() + 2, 4,
                     -SQRT1_2, n_samples);
    DelayConvolveRun(buffer[0], &pos[0], buf.data() + 2, 4, buf.data() + 2, 4,
                     SQRT1_2, n_samples);
  }
}

void PWDecoder::Mix3p1Base(std::span<const float> src, std::size_t dst_stride)
{
  auto n_samples = src.size() / 2;

  const float v2 = SQRT1_2;
  const float v3 = lfe_cutoff > 0 ? .5f : .0f;

  if (widen == 0.0f) {
    Vol<2>(buf.data(), dst_stride, src.data(), 2, n_samples, 1.f);
    Avg(buf.data() + 2, dst_stride, src);
  } else {
    for (uint32_t n = 0; n < n_samples; n++) {
      float c = src[2*n] + src[2*n+1];
      float w = c * widen;
      buf[dst_stride*n+0] = (src[2*n+0] - w);
      buf[dst_stride*n+1] = (src[2*n+1] - w);
      buf[dst_stride*n+2] = c * 0.5f;
    }
  }
  lr4_process_c(&lr4[1], buf.data() + 3, dst_stride, buf.data() + 2, dst_stride,
                v3, n_samples); // lr3
  lr4_process_c(&lr4[0], buf.data() + 2, dst_stride, buf.data() + 2, dst_stride,
                v2, n_samples); // lr2
}

void PWDecoder::Mix3p1(std::span<const float> src)
{
  buf.resize(src.size() * 2);
  Mix3p1Base(src, 4);
}

void PWDecoder::Mix5p1(std::span<const float> src)
{
  auto n_samples = src.size() / 2;
  buf.resize(n_samples * 6);
  Mix3p1Base(src, 6);

  if (!psd) {
    Vol<2>(buf.data() + 4, 6, src.data(), 2, n_samples, SQRT1_2);
  } else {
    Sub(buf.data() + 4, 6, src);

    DelayConvolveRun(buffer[1], &pos[1], buf.data() + 5, 6, buf.data() + 4, 6,
                     -SQRT1_2, n_samples);
    DelayConvolveRun(buffer[0], &pos[0], buf.data() + 4, 6, buf.data() + 4, 6,
                     SQRT1_2, n_samples);
  }
}

void PWDecoder::Mix7p1(std::span<const float> src)
{
  auto n_samples = src.size() / 2;
  buf.resize(n_samples * 8);
  Mix3p1Base(src, 8);

  Vol<2>(buf.data() + 4, 8, src.data(), 2, n_samples, SQRT1_2);

  if (!psd) {
    Vol<2>(buf.data() + 6, 8, src.data(), 2, n_samples, SQRT1_2);
  } else {
    Sub(buf.data() + 6, 8, src);

    DelayConvolveRun(buffer[1], &pos[1], buf.data() + 7, 8, buf.data() + 6, 8,
                     -SQRT1_2, n_samples);
    DelayConvolveRun(buffer[0], &pos[0], buf.data() + 6, 8, buf.data() + 6, 8,
                     SQRT1_2, n_samples);
  }
}

static constexpr std::size_t BUFFER_SIZE = 4096;
PWDecoder::PWDecoder()
{
  buffer[0].resize(2 * BUFFER_SIZE);
  buffer[1].resize(2 * BUFFER_SIZE);
}

void PWDecoder::Init(float rate)
{
  if (rate < 0) return;
  this->rate = rate;
  delay = std::uint32_t(rear_delay_ms * rate / 1000.0f);
  pos = {0, 0};
  std::fill(buffer[0].begin(), buffer[0].end(), 0);
  std::fill(buffer[1].begin(), buffer[1].end(), 0);

  if (n_taps <= 1)
  {
    n_taps = 1;
    taps.assign({1.f});
  }
  else
  {
    n_taps = std::clamp<std::uint32_t>(n_taps, 15, 255) | 1;
    taps.resize(n_taps);
    blackman_window(taps);
    hilbert_generate(taps);
    reverse_taps(taps);
  }

  if (delay + taps.size() > BUFFER_SIZE)
    delay = std::uint32_t(BUFFER_SIZE - taps.size());

  if (channel_setup != ChannelSetup::QUAD)
  {
    lr4_set(&lr4[1], lfe_cutoff > 0, lfe_cutoff / rate);
    lr4_set(&lr4[0], fc_cutoff > 0, fc_cutoff / rate);
  }
}

std::vector<Option> PWDecoder::GetOptions()
{
  return {
    {
      "channel_setup", "SETUP",
      "Output channel setup. One of: quad, 3.1, 5.1, 7.1. Default: 5.1",
      [this](std::string_view sv)
      {
        /**/ if (sv == "quad") channel_setup = ChannelSetup::QUAD;
        else if (sv == "3.1")  channel_setup = ChannelSetup::_3P1;
        else if (sv == "5.1")  channel_setup = ChannelSetup::_5P1;
        else if (sv == "7.1")  channel_setup = ChannelSetup::_7P1;
        else throw std::runtime_error("Invalid channel setup " + std::string{sv});
      }
    },
    {
      "psd", "BOOL",
      "Enable Passive Surround Decoding. The rear channels as produced from the front left and right ambient sound (the difference between the channels). A delay and optional phase shift are added to the rear signal to make the sound bigger. If disabled, front is just copied to rear.",
      [this](std::string_view sv) { psd = FromString<bool>(sv); }
    },
    {
      "widen", "FLOAT",
      "Subtracts some of the front center signal from the stereo channels. This moves the dialogs more to the center speaker and leaves the ambient sound in the stereo channels.\nOnly active when Front Center is produced.",
      [this](std::string_view sv) { widen = FromString<float>(sv); }
    },
    {
      "hilbert_taps", "INT",
      "This option will apply a 90 degree phase shift to the rear channels to improve specialization. Taps needs to be between 15 and 255 with more accurate results (and more CPU consumption) for higher values.\nThis is only active when the psd up-mix method is used. 0 to disable.",
      [this](std::string_view sv)
      {
        n_taps = FromString<std::uint32_t>(sv);
        Init(rate);
      }
    },
    {
      "rear_delay", "FLOAT",
      "Apply a delay in milliseconds when up-mixing the rear channels. This improves specialization of the sound. A typical delay of 12 milliseconds is the default.\nThis is only active when the psd up-mix method is used.",
      [this](std::string_view sv)
      {
        rear_delay_ms = FromString<float>(sv);
        Init(rate);
      }
    },
    {
      "lfe_cutoff", "FLOAT",
      "Apply a lowpass filter to the low frequency effects. The value is expressed in Hz. Typical subwoofers have a cutoff at around 150 and 200. 0 disables the feature. Default: 150",
      [this](std::string_view sv)
      {
        lfe_cutoff = FromString<float>(sv);
        Init(rate);
      }
    },
    {
      "fc_cutoff", "FLOAT",
      "Apply a lowpass filter to the front center frequency. The value is expressed in Hz.\nSince the front center contains the dialogs, a typical cutoff frequency is 12000 Hz.",
      [this](std::string_view sv)
      {
        lfe_cutoff = FromString<float>(sv);
        Init(rate);
      }
    },
  };
}

std::vector<Channel::E> PWDecoder::GetChannels() const
{
  using E = Channel::E;
  switch (channel_setup)
  {
  case ChannelSetup::QUAD:
    return { E::FRONT_LEFT, E::FRONT_RIGHT, E::REAR_LEFT, E::REAR_RIGHT };
  case ChannelSetup::_3P1:
    return { E::FRONT_LEFT, E::FRONT_RIGHT, E::FRONT_CENTER, E::LFE };
  case ChannelSetup::_5P1:
    return { E::FRONT_LEFT, E::FRONT_RIGHT, E::FRONT_CENTER, E::LFE,
             E::REAR_LEFT, E::REAR_RIGHT };
  case ChannelSetup::_7P1:
    return { E::FRONT_LEFT, E::FRONT_RIGHT, E::FRONT_CENTER, E::LFE,
             E::SIDE_LEFT, E::SIDE_RIGHT, E::REAR_LEFT, E::REAR_RIGHT };
  }
}

std::span<const float> PWDecoder::Decode(std::span<const float> in)
{
  switch (channel_setup)
  {
  case ChannelSetup::QUAD: Mix4(in); break;
  case ChannelSetup::_3P1: Mix3p1(in); break;
  case ChannelSetup::_5P1: Mix5p1(in); break;
  case ChannelSetup::_7P1: Mix7p1(in); break;
  }
  return buf;
}