Audacity 3.2.0
FormantShifter.cpp
Go to the documentation of this file.
1/* SPDX-License-Identifier: GPL-2.0-or-later */
2/*!********************************************************************
3
4 Audacity: A Digital Audio Editor
5
6 FormantShifter.cpp
7
8 Matthieu Hodgkinson
9
10**********************************************************************/
11#include "FormantShifter.h"
14#include "MathApprox.h"
15#include <algorithm>
16#include <cassert>
17#include <fstream>
18
19namespace
20{
21// `x` has length `fftSize/2+1`.
22// Returns the last bin that wasn't zeroed.
23size_t ResampleFreqDomain(float* x, size_t fftSize, double factor)
24{
25 const auto size = fftSize / 2 + 1;
26 const auto end = std::min(size, size_t(size * factor));
27 std::vector<float> tmp(end);
28 for (size_t i = 0; i < end; ++i)
29 {
30 const int int_pos = i / factor;
31 const float frac_pos = 1.f * i / factor - int_pos;
32 const auto k = MapToPositiveHalfIndex(int_pos, fftSize);
33 const auto l = MapToPositiveHalfIndex(int_pos + 1, fftSize);
34 tmp[i] = (1 - frac_pos) * x[k] + frac_pos * x[l];
35 }
36 std::copy(tmp.begin(), tmp.begin() + end, x);
37 if (end < size)
38 std::fill(x + end, x + size, 0.f);
39 return end;
40}
41} // namespace
42
44 int sampleRate, double cutoffQuefrency,
46 : cutoffQuefrency { cutoffQuefrency }
47 , mSampleRate { sampleRate }
48 , mLogger { logger }
49{
50}
51
52void FormantShifter::Reset(size_t fftSize)
53{
54 mFft = std::make_unique<staffpad::audio::FourierTransform>(
55 static_cast<int32_t>(fftSize));
56 const auto numBins = fftSize / 2 + 1;
57 mEnvelope.setSize(1, numBins);
58 mCepstrum.setSize(1, fftSize);
59 mEnvelopeReal.resize(numBins);
60 mWeights.resize(numBins);
61}
62
64{
65 mFft.reset();
66}
67
69 const float* powSpec, std::complex<float>* spec, double factor)
70{
71 assert(factor > 0);
72 if (factor <= 0 || cutoffQuefrency == 0 || !mFft)
73 return;
74
75 const auto fftSize = mFft->getSize();
76 const auto numBins = fftSize / 2 + 1;
77
78 mLogger.Log(fftSize, "fftSize");
79
80 // Take the log of the normalized magnitude. (This assumes that
81 // the window averages to 1.)
82 std::complex<float>* pEnv = mEnvelope.getPtr(0);
83 const float normalizer = FastLog2(fftSize);
84 std::transform(powSpec, powSpec + numBins, pEnv, [&](float power) {
85 return .5f * FastLog2(power) - normalizer;
86 });
87
88 // Get the cosine transform of the log magnitude, aka the cepstrum.
89 mFft->inverseReal(mEnvelope, mCepstrum);
90 auto pCepst = mCepstrum.getPtr(0);
91 mLogger.Log(pCepst, fftSize, "cepstrum");
92
93 // "Lifter" the cepstrum.
94 const auto binCutoff = int(mSampleRate * cutoffQuefrency * factor);
95 if (binCutoff < fftSize / 2)
96 std::fill(pCepst + binCutoff + 1, pCepst + fftSize - binCutoff, 0.f);
97 mLogger.Log(pCepst, fftSize, "cepstrumLiftered");
98
99 // Get the envelope back.
100 mFft->forwardReal(mCepstrum, mEnvelope);
101 std::transform(
102 pEnv, pEnv + numBins, mEnvelopeReal.begin(),
103 [fftSize = fftSize](const std::complex<float>& env) {
104 return std::exp2(env.real() / fftSize);
105 });
106 mLogger.Log(mEnvelopeReal.data(), numBins, "envelope");
107
108 // Get the weights, which are the ratio of the desired envelope to the
109 // current envelope (which has the effect of downsampling).
110 std::transform(
111 mEnvelopeReal.begin(), mEnvelopeReal.end(), mWeights.begin(),
112 [](float env) { return std::isnormal(env) ? 1.f / env : 0.f; });
113
114 const auto lastNonZeroedBin =
115 ResampleFreqDomain(mEnvelopeReal.data(), fftSize, factor);
116
117 mLogger.Log(mEnvelopeReal.data(), numBins, "envelopeResampled");
118 std::transform(
119 mEnvelopeReal.begin(), mEnvelopeReal.end(), mWeights.begin(),
120 mWeights.begin(), [](float env, float weight) {
121 // Limit the weights to 100, which corresponds to 20dB.
122 // Our purpose is to add (or remove) energy to formants, and it doesn't
123 // need to be by more than that. This way we also avoid unreasonable
124 // amplification.
125 return std::min(env * weight, 100.f);
126 });
127
128 // Say the signal was downsampled to pitch it up. The factor is then less
129 // than 1, and the resampler had to zero out the upper part of the envelope
130 // bins. For these, rather than zeroing the spec too, it sounds better
131 // to keep the original, even if no envelope correction is applied, else the
132 // signal looses a bit of clarity. At such high frequencies, we probably
133 // don't need a smooth frequency-domain transition and a jump is fine. (This
134 // is visible in the spec, in case you're curious.)
135 std::fill(mWeights.begin() + lastNonZeroedBin, mWeights.end(), 1.f);
136
137 mLogger.Log(mWeights.data(), mWeights.size(), "weights");
138
139 mLogger.Log(
140 spec, numBins, "magnitude",
141 [fftSize = fftSize](const std::complex<float>& spec) {
142 return std::abs(spec) / fftSize;
143 });
144
145 // Now apply the weights.
146 std::transform(
147 spec, spec + numBins, mWeights.begin(), spec,
148 std::multiplies<std::complex<float>>());
149
150 mLogger.Log(
151 spec, numBins, "weightedMagnitude",
152 [fftSize = fftSize](const std::complex<float>& spec) {
153 return std::abs(spec) / fftSize;
154 });
155
156 mLogger.ProcessFinished(spec, fftSize);
157}
int min(int a, int b)
constexpr auto MapToPositiveHalfIndex(int index, int fullSize)
Useful when dealing with symmetric spectra reduced only to their positive half. See tests below for m...
constexpr float FastLog2(float x)
Approximates the base-2 logarithm of a float to two decimal places, adapted from https://stackoverflo...
Definition: MathApprox.h:32
std::unique_ptr< staffpad::audio::FourierTransform > mFft
const double cutoffQuefrency
std::vector< float > mEnvelopeReal
std::vector< float > mWeights
staffpad::SamplesReal mCepstrum
const int mSampleRate
staffpad::SamplesComplex mEnvelope
FormantShifterLoggerInterface & mLogger
FormantShifter(int sampleRate, double cutoffQuefrency, FormantShifterLoggerInterface &logger)
void Process(const float *powerSpectrum, std::complex< float > *spectrum, double factor)
Processes spectrum in place, or does nothing if Reset(fftSize) wasn't called or Reset() was called si...
virtual void Log(int value, const char *name) const =0
virtual void ProcessFinished(std::complex< float > *spectrum, size_t fftSize)=0
If not already, disables the logging and marks the spectrum with an audible event to make clear where...
void setSize(int32_t numChannels, int32_t samples)
Definition: SamplesFloat.h:21
T * getPtr(int32_t channel)
Definition: SamplesFloat.h:48
size_t ResampleFreqDomain(float *x, size_t fftSize, double factor)
const char * end(const char *str) noexcept
Definition: StringUtils.h:106
constexpr fastfloat_really_inline int32_t power(int32_t q) noexcept
Definition: fast_float.h:1454
void copy(const T *src, T *dst, int32_t n)
Definition: VectorOps.h:40