_formant_shifter_8cpp_source.html

/*  SPDX-License-Identifier: GPL-2.0-or-later */

/*!********************************************************************


  Audacity: A Digital Audio Editor


  FormantShifter.cpp


  Matthieu Hodgkinson


**********************************************************************/

#include "FormantShifter.h"

#include "FormantShifterLoggerInterface.h"

#include "MapToPositiveHalfIndex.h"

#include "MathApprox.h"

#include <algorithm>

#include <cassert>

#include <fstream>


namespace

{

// `x` has length `fftSize/2+1`.

// Returns the last bin that wasn't zeroed.

size_t ResampleFreqDomain(float* x, size_t fftSize, double factor)

{

   const auto size = fftSize / 2 + 1;

   const auto end = std::min(size, size_t(size * factor));

   std::vector<float> tmp(end);

   for (size_t i = 0; i < end; ++i)

   {

      const int int_pos = i / factor;

      const float frac_pos = 1.f * i / factor - int_pos;

      const auto k = MapToPositiveHalfIndex(int_pos, fftSize);

      const auto l = MapToPositiveHalfIndex(int_pos + 1, fftSize);

      tmp[i] = (1 - frac_pos) * x[k] + frac_pos * x[l];

   }

   std::copy(tmp.begin(), tmp.begin() + end, x);

   if (end < size)

      std::fill(x + end, x + size, 0.f);

   return end;

}

} // namespace


FormantShifter::FormantShifter(

   int sampleRate, double cutoffQuefrency,

   FormantShifterLoggerInterface& logger)

    : cutoffQuefrency { cutoffQuefrency }

    , mSampleRate { sampleRate }

    , mLogger { logger }

{

}


void FormantShifter::Reset(size_t fftSize)

{

   mFft = std::make_unique<staffpad::audio::FourierTransform>(

      static_cast<int32_t>(fftSize));

   const auto numBins = fftSize / 2 + 1;

   mEnvelope.setSize(1, numBins);

   mCepstrum.setSize(1, fftSize);

   mEnvelopeReal.resize(numBins);

   mWeights.resize(numBins);

}


void FormantShifter::Reset()

{

   mFft.reset();

}


void FormantShifter::Process(

   const float* powSpec, std::complex<float>* spec, double factor)

{

   assert(factor > 0);

   if (factor <= 0 || cutoffQuefrency == 0 || !mFft)

      return;


   const auto fftSize = mFft->getSize();

   const auto numBins = fftSize / 2 + 1;


   mLogger.Log(fftSize, "fftSize");


   // Take the log of the normalized magnitude. (This assumes that

   // the window averages to 1.)

   std::complex<float>* pEnv = mEnvelope.getPtr(0);

   const float normalizer = FastLog2(fftSize);

   std::transform(powSpec, powSpec + numBins, pEnv, [&](float power) {

      return .5f * FastLog2(power) - normalizer;

   });


   // Get the cosine transform of the log magnitude, aka the cepstrum.

   mFft->inverseReal(mEnvelope, mCepstrum);

   auto pCepst = mCepstrum.getPtr(0);

   mLogger.Log(pCepst, fftSize, "cepstrum");


   // "Lifter" the cepstrum.

   const auto binCutoff = int(mSampleRate * cutoffQuefrency * factor);

   if (binCutoff < fftSize / 2)

      std::fill(pCepst + binCutoff + 1, pCepst + fftSize - binCutoff, 0.f);

   mLogger.Log(pCepst, fftSize, "cepstrumLiftered");


   // Get the envelope back.

   mFft->forwardReal(mCepstrum, mEnvelope);

   std::transform(

      pEnv, pEnv + numBins, mEnvelopeReal.begin(),

      [fftSize = fftSize](const std::complex<float>& env) {

         return std::exp2(env.real() / fftSize);

      });

   mLogger.Log(mEnvelopeReal.data(), numBins, "envelope");


   // Get the weights, which are the ratio of the desired envelope to the

   // current envelope (which has the effect of downsampling).

   std::transform(

      mEnvelopeReal.begin(), mEnvelopeReal.end(), mWeights.begin(),

      [](float env) { return std::isnormal(env) ? 1.f / env : 0.f; });


   const auto lastNonZeroedBin =

      ResampleFreqDomain(mEnvelopeReal.data(), fftSize, factor);


   mLogger.Log(mEnvelopeReal.data(), numBins, "envelopeResampled");

   std::transform(

      mEnvelopeReal.begin(), mEnvelopeReal.end(), mWeights.begin(),

      mWeights.begin(), [](float env, float weight) {

         // Limit the weights to 100, which corresponds to 20dB.

         // Our purpose is to add (or remove) energy to formants, and it doesn't

         // need to be by more than that. This way we also avoid unreasonable

         // amplification.

         return std::min(env * weight, 100.f);

      });


   // Say the signal was downsampled to pitch it up. The factor is then less

   // than 1, and the resampler had to zero out the upper part of the envelope

   // bins. For these, rather than zeroing the spec too, it sounds better

   // to keep the original, even if no envelope correction is applied, else the

   // signal looses a bit of clarity. At such high frequencies, we probably

   // don't need a smooth frequency-domain transition and a jump is fine. (This

   // is visible in the spec, in case you're curious.)

   std::fill(mWeights.begin() + lastNonZeroedBin, mWeights.end(), 1.f);


   mLogger.Log(mWeights.data(), mWeights.size(), "weights");


   mLogger.Log(

      spec, numBins, "magnitude",

      [fftSize = fftSize](const std::complex<float>& spec) {

         return std::abs(spec) / fftSize;

      });


   // Now apply the weights.

   std::transform(

      spec, spec + numBins, mWeights.begin(), spec,

      std::multiplies<std::complex<float>>());


   mLogger.Log(

      spec, numBins, "weightedMagnitude",

      [fftSize = fftSize](const std::complex<float>& spec) {

         return std::abs(spec) / fftSize;

      });


   mLogger.ProcessFinished(spec, fftSize);

}

min
int min(int a, int b)
Definition: CompareAudioCommand.cpp:114

FormantShifter.h

FormantShifterLoggerInterface.h

MapToPositiveHalfIndex.h

MapToPositiveHalfIndex
constexpr auto MapToPositiveHalfIndex(int index, int fullSize)
Useful when dealing with symmetric spectra reduced only to their positive half. See tests below for m...
Definition: MapToPositiveHalfIndex.h:12

MathApprox.h

FastLog2
constexpr float FastLog2(float x)
Approximates the base-2 logarithm of a float to two decimal places, adapted from https://stackoverflo...
Definition: MathApprox.h:32

FormantShifter::mFft
std::unique_ptr< staffpad::audio::FourierTransform > mFft
Definition: FormantShifter.h:62

FormantShifter::Reset
void Reset()
Definition: FormantShifter.cpp:63

FormantShifter::cutoffQuefrency
const double cutoffQuefrency
Definition: FormantShifter.h:30

FormantShifter::mEnvelopeReal
std::vector< float > mEnvelopeReal
Definition: FormantShifter.h:65

FormantShifter::mWeights
std::vector< float > mWeights
Definition: FormantShifter.h:66

FormantShifter::mCepstrum
staffpad::SamplesReal mCepstrum
Definition: FormantShifter.h:64

FormantShifter::mSampleRate
const int mSampleRate
Definition: FormantShifter.h:60

FormantShifter::mEnvelope
staffpad::SamplesComplex mEnvelope
Definition: FormantShifter.h:63

FormantShifter::mLogger
FormantShifterLoggerInterface & mLogger
Definition: FormantShifter.h:61

FormantShifter::FormantShifter
FormantShifter(int sampleRate, double cutoffQuefrency, FormantShifterLoggerInterface &logger)
Definition: FormantShifter.cpp:43

FormantShifter::Process
void Process(const float *powerSpectrum, std::complex< float > *spectrum, double factor)
Processes spectrum in place, or does nothing if Reset(fftSize) wasn't called or Reset() was called si...
Definition: FormantShifter.cpp:68

FormantShifterLoggerInterface
Definition: FormantShifterLoggerInterface.h:19

FormantShifterLoggerInterface::Log
virtual void Log(int value, const char *name) const =0

FormantShifterLoggerInterface::ProcessFinished
virtual void ProcessFinished(std::complex< float > *spectrum, size_t fftSize)=0
If not already, disables the logging and marks the spectrum with an audible event to make clear where...

staffpad::SamplesFloat::setSize
void setSize(int32_t numChannels, int32_t samples)
Definition: SamplesFloat.h:21

staffpad::SamplesFloat::getPtr
T * getPtr(int32_t channel)
Definition: SamplesFloat.h:48

size
size_t size
Definition: ffmpeg-2.3.6-single-header.h:412

anonymous_namespace{ClipSegmentTest.cpp}::sampleRate
constexpr auto sampleRate
Definition: ClipSegmentTest.cpp:20

anonymous_namespace{FormantShifter.cpp}::ResampleFreqDomain
size_t ResampleFreqDomain(float *x, size_t fftSize, double factor)
Definition: FormantShifter.cpp:23

details::end
const char * end(const char *str) noexcept
Definition: StringUtils.h:106

fast_float::detail::power
constexpr fastfloat_really_inline int32_t power(int32_t q) noexcept
Definition: fast_float.h:1454

staffpad::vo::copy
void copy(const T *src, T *dst, int32_t n)
Definition: VectorOps.h:40