Audacity 3.2.0
FormatClassifier.cpp
Go to the documentation of this file.
1/**********************************************************************
2
3 Audacity: A Digital Audio Editor
4
5 FormatClassifier.cpp
6
7 Philipp Sibler
8
9******************************************************************//*******************************************************************/
20#include "FormatClassifier.h"
21
22#include <stdint.h>
23#include <cmath>
24#include <cfloat>
25#include <vector>
26#include <cstdio>
27
28#include <wx/defs.h>
29
30#include "sndfile.h"
31
33 mReader(filename),
34 mMeter(cSiglen)
35{
36 // Define the classification classes
37 for ( auto endianness : {
40 } )
41 for ( auto format : {
48 } )
49 mClasses.push_back( { format, endianness } );
50
51 // Build feature vectors
52 mMonoFeat = Floats{ mClasses.size() };
53 mStereoFeat = Floats{ mClasses.size() };
54
55#ifdef FORMATCLASSIFIER_SIGNAL_DEBUG
56 // Build a debug writer
57 char dfile [1024];
58 sprintf(dfile, "%s.sig", filename);
59 mpWriter = std::make_unique<DebugWriter>(dfile);
60#endif
61
62 // Run it
63 Run();
64
65#ifdef FORMATCLASSIFIER_SIGNAL_DEBUG
66 for (unsigned int n = 0; n < mClasses.size(); n++)
67 {
68 wxPrintf("Class [%i] Machine [%i]: Mono: %3.7f Stereo: %3.7f\n", mClasses[n].format, mClasses[n].endian, mMonoFeat[n], mStereoFeat[n]);
69 }
70#endif
71
72}
73
75{
76}
77
79{
80 return mResultFormat;
81}
82
84{
85 int format = SF_FORMAT_RAW;
86
87 switch(mResultFormat.format)
88 {
90 format |= SF_FORMAT_PCM_S8;
91 break;
93 format |= SF_FORMAT_PCM_16;
94 break;
96 format |= SF_FORMAT_PCM_32;
97 break;
99 format |= SF_FORMAT_PCM_U8;
100 break;
102 format |= SF_FORMAT_FLOAT;
103 break;
105 format |= SF_FORMAT_DOUBLE;
106 break;
107 default:
108 format |= SF_FORMAT_PCM_16;
109 break;
110 }
111
112 switch(mResultFormat.endian)
113 {
115 format |= SF_ENDIAN_LITTLE;
116 break;
118 format |= SF_ENDIAN_BIG;
119 break;
120 }
121
122 return format;
123}
124
126{
127 return mResultChannels;
128}
129
131{
132 // Calc the mono feature vector
133 for (unsigned int n = 0; n < mClasses.size(); n++)
134 {
135 // Read the signal
136 ReadSignal(mClasses[n], 1);
137#ifdef FORMATCLASSIFIER_SIGNAL_DEBUG
138 mpWriter->WriteSignal(mSigBuffer, cSiglen);
139#endif
140
141 // Do some simple preprocessing
142 // Remove DC offset
143 float smean = Mean(mSigBuffer.get(), cSiglen);
144 Sub(mSigBuffer.get(), smean, cSiglen);
145 // Normalize to +- 1.0
146 Abs(mSigBuffer.get(), mAuxBuffer.get(), cSiglen);
147 float smax = Max(mAuxBuffer.get(), cSiglen);
148 Div(mSigBuffer.get(), smax, cSiglen);
149
150 // Now actually fill the feature vector
151 // Low to high band power ratio
152 float pLo = mMeter.CalcPower(mSigBuffer.get(), 0.15f, 0.3f);
153 float pHi = mMeter.CalcPower(mSigBuffer.get(), 0.45f, 0.1f);
154 mMonoFeat[n] = pLo / pHi;
155 }
156
157 // Calc the stereo feature vector
158 for (unsigned int n = 0; n < mClasses.size(); n++)
159 {
160 // Read the signal
161 ReadSignal(mClasses[n], 2);
162#ifdef FORMATCLASSIFIER_SIGNAL_DEBUG
163 mpWriter->WriteSignal(mSigBuffer, cSiglen);
164#endif
165
166 // Do some simple preprocessing
167 // Remove DC offset
168 float smean = Mean(mSigBuffer.get(), cSiglen);
169 Sub(mSigBuffer.get(), smean, cSiglen);
170 // Normalize to +- 1.0
171 Abs(mSigBuffer.get(), mAuxBuffer.get(), cSiglen);
172 float smax = Max(mAuxBuffer.get(), cSiglen);
173 Div(mSigBuffer.get(), smax, cSiglen);
174
175 // Now actually fill the feature vector
176 // Low to high band power ratio
177 float pLo = mMeter.CalcPower(mSigBuffer.get(), 0.15f, 0.3f);
178 float pHi = mMeter.CalcPower(mSigBuffer.get(), 0.45f, 0.1f);
179 mStereoFeat[n] = pLo / pHi;
180 }
181
182 // Get the results
183 size_t midx, sidx;
184 float monoMax = Max(mMonoFeat.get(), mClasses.size(), &midx);
185 float stereoMax = Max(mStereoFeat.get(), mClasses.size(), &sidx);
186
187 if (monoMax > stereoMax)
188 {
189 mResultChannels = 1;
190 mResultFormat = mClasses[midx];
191 }
192 else
193 {
194 mResultChannels = 2;
195 mResultFormat = mClasses[sidx];
196 }
197
198}
199
201{
202 size_t actRead = 0;
203 unsigned int n = 0;
204
205 mReader.Reset();
206
207 // Do a dummy read of 1024 bytes to skip potential header information
209
210 do
211 {
212 actRead = mReader.ReadSamples(mRawBuffer.get(), cSiglen, stride, format.format, format.endian);
213
214 if (n == 0)
215 {
217 }
218 else
219 {
220 if (actRead == cSiglen)
221 {
223
224 // Integrate signals
225 Add(mSigBuffer.get(), mAuxBuffer.get(), cSiglen);
226
227 // Do some dummy reads to break signal coherence
228 mReader.ReadSamples(mRawBuffer.get(), n + 1, stride, format.format, format.endian);
229 }
230 }
231
232 n++;
233
234 } while ((n < cNumInts) && (actRead == cSiglen));
235
236}
237
239{
240 switch(format.format)
241 {
243 ToFloat((int8_t*) in, out, cSiglen);
244 break;
246 ToFloat((int16_t*) in, out, cSiglen);
247 break;
249 ToFloat((int32_t*) in, out, cSiglen);
250 break;
252 ToFloat((uint8_t*) in, out, cSiglen);
253 break;
255 ToFloat((uint16_t*) in, out, cSiglen);
256 break;
258 ToFloat((uint32_t*) in, out, cSiglen);
259 break;
261 ToFloat((float*) in, out, cSiglen);
262 break;
264 ToFloat((double*) in, out, cSiglen);
265 break;
266 }
267}
268
269void FormatClassifier::Add(float* in1, float* in2, size_t len)
270{
271 for (unsigned int n = 0; n < len; n++)
272 {
273 in1[n] += in2[n];
274 }
275}
276
277void FormatClassifier::Sub(float* in, float subt, size_t len)
278{
279 for (unsigned int n = 0; n < len; n++)
280 {
281 in[n] -= subt;
282 }
283}
284
285void FormatClassifier::Div(float* in, float div, size_t len)
286{
287 for (unsigned int n = 0; n < len; n++)
288 {
289 in[n] /= div;
290 }
291}
292
293
294void FormatClassifier::Abs(float* in, float* out, size_t len)
295{
296 for (unsigned int n = 0; n < len; n++)
297 {
298 if (in[n] < 0.0f)
299 {
300 out[n] = -in[n];
301 }
302 else
303 {
304 out[n] = in[n];
305 }
306 }
307}
308
309float FormatClassifier::Mean(float* in, size_t len)
310{
311 float mean = 0.0f;
312
313 for (unsigned int n = 0; n < len; n++)
314 {
315 mean += in[n];
316 }
317
318 mean /= len;
319
320 return mean;
321}
322
323float FormatClassifier::Max(float* in, size_t len)
324{
325 size_t dummyidx;
326 return Max(in, len, &dummyidx);
327}
328
329float FormatClassifier::Max(float* in, size_t len, size_t* maxidx)
330{
331 float max = -FLT_MAX;
332 *maxidx = 0;
333
334 for (unsigned int n = 0; n < len; n++)
335 {
336 if (in[n] > max)
337 {
338 max = in[n];
339 *maxidx = n;
340 }
341 }
342
343 return max;
344}
345
346template<class T> void FormatClassifier::ToFloat(T* in, float* out, size_t len)
347{
348 for(unsigned int n = 0; n < len; n++)
349 {
350 out[n] = (float) in[n];
351 }
352}
int format
Definition: ExportPCM.cpp:56
FormatClassT mResultFormat
void ToFloat(T *in, float *out, size_t len)
void Abs(float *in, float *out, size_t len)
MultiFormatReader mReader
void ConvertSamples(void *in, float *out, FormatClassT format)
void Sub(float *in, float subt, size_t len)
FormatClassT GetResultFormat()
static const size_t cNumInts
ArrayOf< uint8_t > mRawBuffer
void Div(float *in, float div, size_t len)
unsigned mResultChannels
unsigned GetResultChannels()
void Add(float *in1, float *in2, size_t len)
FormatVectorT mClasses
float Mean(float *in, size_t len)
FormatClassifier(const char *filename)
SpecPowerCalculation mMeter
static const size_t cSiglen
float Max(float *in, size_t len)
void ReadSignal(FormatClassT format, size_t stride)
size_t ReadSamples(void *buffer, size_t len, MultiFormatReader::FormatT format, MachineEndianness::EndiannessT end)
float CalcPower(float *sig, float fc, float bw)
MultiFormatReader::FormatT format
MachineEndianness::EndiannessT endian