Audacity 3.2.0
FormantShifter.cpp
Go to the documentation of this file.
1/* SPDX-License-Identifier: GPL-2.0-or-later */
2/*!********************************************************************
3
4 Audacity: A Digital Audio Editor
5
6 FormantShifter.cpp
7
8 Matthieu Hodgkinson
9
10**********************************************************************/
11#include "FormantShifter.h"
14#include "MathApprox.h"
15#include <algorithm>
16#include <cassert>
17#include <fstream>
18
19namespace
20{
21// `x` has length `fftSize/2+1`.
22// Returns the last bin that wasn't zeroed.
23size_t ResampleFreqDomain(float* x, size_t fftSize, double factor)
24{
25 const auto size = fftSize / 2 + 1;
26 const auto end = std::min(size, size_t(size * factor));
27 std::vector<float> tmp(end);
28 for (size_t i = 0; i < end; ++i)
29 {
30 const int int_pos = i / factor;
31 const float frac_pos = 1.f * i / factor - int_pos;
32 const auto k = MapToPositiveHalfIndex(int_pos, fftSize);
33 const auto l = MapToPositiveHalfIndex(int_pos + 1, fftSize);
34 tmp[i] = (1 - frac_pos) * x[k] + frac_pos * x[l];
35 }
36 std::copy(tmp.begin(), tmp.begin() + end, x);
37 if (end < size)
38 std::fill(x + end, x + size, 0.f);
39 return end;
40}
41} // namespace
42
44 int sampleRate, double cutoffQuefrency,
46 : cutoffQuefrency { cutoffQuefrency }
47 , mSampleRate { sampleRate }
48 , mLogger { logger }
49{
50}
51
52void FormantShifter::Reset(size_t fftSize)
53{
54 mFft = std::make_unique<staffpad::audio::FourierTransform>(fftSize);
55 const auto numBins = fftSize / 2 + 1;
56 mEnvelope.setSize(1, numBins);
57 mCepstrum.setSize(1, fftSize);
58 mEnvelopeReal.resize(numBins);
59 mWeights.resize(numBins);
60}
61
63{
64 mFft.reset();
65}
66
68 const float* powSpec, std::complex<float>* spec, double factor)
69{
70 assert(factor > 0);
71 if (factor <= 0 || cutoffQuefrency == 0 || !mFft)
72 return;
73
74 const auto fftSize = mFft->getSize();
75 const auto numBins = fftSize / 2 + 1;
76
77 mLogger.Log(fftSize, "fftSize");
78
79 // Take the log of the normalized magnitude. (This assumes that
80 // the window averages to 1.)
81 std::complex<float>* pEnv = mEnvelope.getPtr(0);
82 const float normalizer = FastLog2(fftSize);
83 std::transform(powSpec, powSpec + numBins, pEnv, [&](float power) {
84 return .5f * FastLog2(power) - normalizer;
85 });
86
87 // Get the cosine transform of the log magnitude, aka the cepstrum.
88 mFft->inverseReal(mEnvelope, mCepstrum);
89 auto pCepst = mCepstrum.getPtr(0);
90 mLogger.Log(pCepst, fftSize, "cepstrum");
91
92 // "Lifter" the cepstrum.
93 const auto binCutoff = int(mSampleRate * cutoffQuefrency * factor);
94 if (binCutoff < fftSize / 2)
95 std::fill(pCepst + binCutoff + 1, pCepst + fftSize - binCutoff, 0.f);
96 mLogger.Log(pCepst, fftSize, "cepstrumLiftered");
97
98 // Get the envelope back.
99 mFft->forwardReal(mCepstrum, mEnvelope);
100 std::transform(
101 pEnv, pEnv + numBins, mEnvelopeReal.begin(),
102 [fftSize = fftSize](const std::complex<float>& env) {
103 return std::exp2(env.real() / fftSize);
104 });
105 mLogger.Log(mEnvelopeReal.data(), numBins, "envelope");
106
107 // Get the weights, which are the ratio of the desired envelope to the
108 // current envelope (which has the effect of downsampling).
109 std::transform(
110 mEnvelopeReal.begin(), mEnvelopeReal.end(), mWeights.begin(),
111 [](float env) { return std::isnormal(env) ? 1.f / env : 0.f; });
112
113 const auto lastNonZeroedBin =
114 ResampleFreqDomain(mEnvelopeReal.data(), fftSize, factor);
115
116 mLogger.Log(mEnvelopeReal.data(), numBins, "envelopeResampled");
117 std::transform(
118 mEnvelopeReal.begin(), mEnvelopeReal.end(), mWeights.begin(),
119 mWeights.begin(), [](float env, float weight) {
120 // Limit the weights to 100, which corresponds to 20dB.
121 // Our purpose is to add (or remove) energy to formants, and it doesn't
122 // need to be by more than that. This way we also avoid unreasonable
123 // amplification.
124 return std::min(env * weight, 100.f);
125 });
126
127 // Say the signal was downsampled to pitch it up. The factor is then less
128 // than 1, and the resampler had to zero out the upper part of the envelope
129 // bins. For these, rather than zeroing the spec too, it sounds better
130 // to keep the original, even if no envelope correction is applied, else the
131 // signal looses a bit of clarity. At such high frequencies, we probably
132 // don't need a smooth frequency-domain transition and a jump is fine. (This
133 // is visible in the spec, in case you're curious.)
134 std::fill(mWeights.begin() + lastNonZeroedBin, mWeights.end(), 1.f);
135
136 mLogger.Log(mWeights.data(), mWeights.size(), "weights");
137
138 mLogger.Log(
139 spec, numBins, "magnitude",
140 [fftSize = fftSize](const std::complex<float>& spec) {
141 return std::abs(spec) / fftSize;
142 });
143
144 // Now apply the weights.
145 std::transform(
146 spec, spec + numBins, mWeights.begin(), spec,
147 std::multiplies<std::complex<float>>());
148
149 mLogger.Log(
150 spec, numBins, "weightedMagnitude",
151 [fftSize = fftSize](const std::complex<float>& spec) {
152 return std::abs(spec) / fftSize;
153 });
154
155 mLogger.ProcessFinished(spec, fftSize);
156}
int min(int a, int b)
constexpr auto MapToPositiveHalfIndex(int index, int fullSize)
Useful when dealing with symmetric spectra reduced only to their positive half. See tests below for m...
constexpr float FastLog2(float x)
Approximates the base-2 logarithm of a float to two decimal places, adapted from https://stackoverflo...
Definition: MathApprox.h:32
std::unique_ptr< staffpad::audio::FourierTransform > mFft
const double cutoffQuefrency
std::vector< float > mEnvelopeReal
std::vector< float > mWeights
staffpad::SamplesReal mCepstrum
const int mSampleRate
staffpad::SamplesComplex mEnvelope
FormantShifterLoggerInterface & mLogger
FormantShifter(int sampleRate, double cutoffQuefrency, FormantShifterLoggerInterface &logger)
void Process(const float *powerSpectrum, std::complex< float > *spectrum, double factor)
Processes spectrum in place, or does nothing if Reset(fftSize) wasn't called or Reset() was called si...
virtual void Log(int value, const char *name) const =0
virtual void ProcessFinished(std::complex< float > *spectrum, size_t fftSize)=0
If not already, disables the logging and marks the spectrum with an audible event to make clear where...
void setSize(int32_t numChannels, int32_t samples)
Definition: SamplesFloat.h:21
T * getPtr(int32_t channel)
Definition: SamplesFloat.h:48
size_t ResampleFreqDomain(float *x, size_t fftSize, double factor)
const char * end(const char *str) noexcept
Definition: StringUtils.h:106
constexpr fastfloat_really_inline int32_t power(int32_t q) noexcept
Definition: fast_float.h:1454
void copy(const T *src, T *dst, int32_t n)
Definition: VectorOps.h:40