Audacity 3.2.0
FormatClassifier.cpp
Go to the documentation of this file.
1/**********************************************************************
2
3 Audacity: A Digital Audio Editor
4
5 FormatClassifier.cpp
6
7 Philipp Sibler
8
9******************************************************************//*******************************************************************/
20#include "FormatClassifier.h"
21
22#include <stdint.h>
23#include <cmath>
24#include <cfloat>
25#include <vector>
26
27#include <wx/defs.h>
28
29#include "sndfile.h"
30
32 mReader(filename),
33 mMeter(cSiglen)
34{
35 // Define the classification classes
36 for ( auto endianness : {
39 } )
40 for ( auto format : {
47 } )
48 mClasses.push_back( { format, endianness } );
49
50 // Build feature vectors
51 mMonoFeat = Floats{ mClasses.size() };
52 mStereoFeat = Floats{ mClasses.size() };
53
54#ifdef FORMATCLASSIFIER_SIGNAL_DEBUG
55 // Build a debug writer
56 char dfile [1024];
57 sprintf(dfile, "%s.sig", filename);
58 mpWriter = std::make_unique<DebugWriter>(dfile);
59#endif
60
61 // Run it
62 Run();
63
64#ifdef FORMATCLASSIFIER_SIGNAL_DEBUG
65 for (unsigned int n = 0; n < mClasses.size(); n++)
66 {
67 wxPrintf("Class [%i] Machine [%i]: Mono: %3.7f Stereo: %3.7f\n", mClasses[n].format, mClasses[n].endian, mMonoFeat[n], mStereoFeat[n]);
68 }
69#endif
70
71}
72
74{
75}
76
78{
79 return mResultFormat;
80}
81
83{
84 int format = SF_FORMAT_RAW;
85
86 switch(mResultFormat.format)
87 {
89 format |= SF_FORMAT_PCM_S8;
90 break;
92 format |= SF_FORMAT_PCM_16;
93 break;
95 format |= SF_FORMAT_PCM_32;
96 break;
98 format |= SF_FORMAT_PCM_U8;
99 break;
101 format |= SF_FORMAT_FLOAT;
102 break;
104 format |= SF_FORMAT_DOUBLE;
105 break;
106 default:
107 format |= SF_FORMAT_PCM_16;
108 break;
109 }
110
111 switch(mResultFormat.endian)
112 {
114 format |= SF_ENDIAN_LITTLE;
115 break;
117 format |= SF_ENDIAN_BIG;
118 break;
119 }
120
121 return format;
122}
123
125{
126 return mResultChannels;
127}
128
130{
131 // Calc the mono feature vector
132 for (unsigned int n = 0; n < mClasses.size(); n++)
133 {
134 // Read the signal
135 ReadSignal(mClasses[n], 1);
136#ifdef FORMATCLASSIFIER_SIGNAL_DEBUG
137 mpWriter->WriteSignal(mSigBuffer, cSiglen);
138#endif
139
140 // Do some simple preprocessing
141 // Remove DC offset
142 float smean = Mean(mSigBuffer.get(), cSiglen);
143 Sub(mSigBuffer.get(), smean, cSiglen);
144 // Normalize to +- 1.0
145 Abs(mSigBuffer.get(), mAuxBuffer.get(), cSiglen);
146 float smax = Max(mAuxBuffer.get(), cSiglen);
147 Div(mSigBuffer.get(), smax, cSiglen);
148
149 // Now actually fill the feature vector
150 // Low to high band power ratio
151 float pLo = mMeter.CalcPower(mSigBuffer.get(), 0.15f, 0.3f);
152 float pHi = mMeter.CalcPower(mSigBuffer.get(), 0.45f, 0.1f);
153 mMonoFeat[n] = pLo / pHi;
154 }
155
156 // Calc the stereo feature vector
157 for (unsigned int n = 0; n < mClasses.size(); n++)
158 {
159 // Read the signal
160 ReadSignal(mClasses[n], 2);
161#ifdef FORMATCLASSIFIER_SIGNAL_DEBUG
162 mpWriter->WriteSignal(mSigBuffer, cSiglen);
163#endif
164
165 // Do some simple preprocessing
166 // Remove DC offset
167 float smean = Mean(mSigBuffer.get(), cSiglen);
168 Sub(mSigBuffer.get(), smean, cSiglen);
169 // Normalize to +- 1.0
170 Abs(mSigBuffer.get(), mAuxBuffer.get(), cSiglen);
171 float smax = Max(mAuxBuffer.get(), cSiglen);
172 Div(mSigBuffer.get(), smax, cSiglen);
173
174 // Now actually fill the feature vector
175 // Low to high band power ratio
176 float pLo = mMeter.CalcPower(mSigBuffer.get(), 0.15f, 0.3f);
177 float pHi = mMeter.CalcPower(mSigBuffer.get(), 0.45f, 0.1f);
178 mStereoFeat[n] = pLo / pHi;
179 }
180
181 // Get the results
182 size_t midx, sidx;
183 float monoMax = Max(mMonoFeat.get(), mClasses.size(), &midx);
184 float stereoMax = Max(mStereoFeat.get(), mClasses.size(), &sidx);
185
186 if (monoMax > stereoMax)
187 {
188 mResultChannels = 1;
189 mResultFormat = mClasses[midx];
190 }
191 else
192 {
193 mResultChannels = 2;
194 mResultFormat = mClasses[sidx];
195 }
196
197}
198
200{
201 size_t actRead = 0;
202 unsigned int n = 0;
203
204 mReader.Reset();
205
206 // Do a dummy read of 1024 bytes to skip potential header information
208
209 do
210 {
211 actRead = mReader.ReadSamples(mRawBuffer.get(), cSiglen, stride, format.format, format.endian);
212
213 if (n == 0)
214 {
216 }
217 else
218 {
219 if (actRead == cSiglen)
220 {
222
223 // Integrate signals
224 Add(mSigBuffer.get(), mAuxBuffer.get(), cSiglen);
225
226 // Do some dummy reads to break signal coherence
227 mReader.ReadSamples(mRawBuffer.get(), n + 1, stride, format.format, format.endian);
228 }
229 }
230
231 n++;
232
233 } while ((n < cNumInts) && (actRead == cSiglen));
234
235}
236
238{
239 switch(format.format)
240 {
242 ToFloat((int8_t*) in, out, cSiglen);
243 break;
245 ToFloat((int16_t*) in, out, cSiglen);
246 break;
248 ToFloat((int32_t*) in, out, cSiglen);
249 break;
251 ToFloat((uint8_t*) in, out, cSiglen);
252 break;
254 ToFloat((uint16_t*) in, out, cSiglen);
255 break;
257 ToFloat((uint32_t*) in, out, cSiglen);
258 break;
260 ToFloat((float*) in, out, cSiglen);
261 break;
263 ToFloat((double*) in, out, cSiglen);
264 break;
265 }
266}
267
268void FormatClassifier::Add(float* in1, float* in2, size_t len)
269{
270 for (unsigned int n = 0; n < len; n++)
271 {
272 in1[n] += in2[n];
273 }
274}
275
276void FormatClassifier::Sub(float* in, float subt, size_t len)
277{
278 for (unsigned int n = 0; n < len; n++)
279 {
280 in[n] -= subt;
281 }
282}
283
284void FormatClassifier::Div(float* in, float div, size_t len)
285{
286 for (unsigned int n = 0; n < len; n++)
287 {
288 in[n] /= div;
289 }
290}
291
292
293void FormatClassifier::Abs(float* in, float* out, size_t len)
294{
295 for (unsigned int n = 0; n < len; n++)
296 {
297 if (in[n] < 0.0f)
298 {
299 out[n] = -in[n];
300 }
301 else
302 {
303 out[n] = in[n];
304 }
305 }
306}
307
308float FormatClassifier::Mean(float* in, size_t len)
309{
310 float mean = 0.0f;
311
312 for (unsigned int n = 0; n < len; n++)
313 {
314 mean += in[n];
315 }
316
317 mean /= len;
318
319 return mean;
320}
321
322float FormatClassifier::Max(float* in, size_t len)
323{
324 size_t dummyidx;
325 return Max(in, len, &dummyidx);
326}
327
328float FormatClassifier::Max(float* in, size_t len, size_t* maxidx)
329{
330 float max = -FLT_MAX;
331 *maxidx = 0;
332
333 for (unsigned int n = 0; n < len; n++)
334 {
335 if (in[n] > max)
336 {
337 max = in[n];
338 *maxidx = n;
339 }
340 }
341
342 return max;
343}
344
345template<class T> void FormatClassifier::ToFloat(T* in, float* out, size_t len)
346{
347 for(unsigned int n = 0; n < len; n++)
348 {
349 out[n] = (float) in[n];
350 }
351}
FormatClassT mResultFormat
void ToFloat(T *in, float *out, size_t len)
void Abs(float *in, float *out, size_t len)
MultiFormatReader mReader
void ConvertSamples(void *in, float *out, FormatClassT format)
void Sub(float *in, float subt, size_t len)
FormatClassT GetResultFormat()
static const size_t cNumInts
ArrayOf< uint8_t > mRawBuffer
void Div(float *in, float div, size_t len)
unsigned mResultChannels
unsigned GetResultChannels()
void Add(float *in1, float *in2, size_t len)
FormatVectorT mClasses
float Mean(float *in, size_t len)
FormatClassifier(const char *filename)
SpecPowerCalculation mMeter
static const size_t cSiglen
float Max(float *in, size_t len)
void ReadSignal(FormatClassT format, size_t stride)
size_t ReadSamples(void *buffer, size_t len, MultiFormatReader::FormatT format, MachineEndianness::EndiannessT end)
float CalcPower(float *sig, float fc, float bw)
MultiFormatReader::FormatT format
MachineEndianness::EndiannessT endian