Audacity  2.2.2
FormatClassifier.cpp
Go to the documentation of this file.
1 /**********************************************************************
2 
3  Audacity: A Digital Audio Editor
4 
5  FormatClassifier.cpp
6 
7  Philipp Sibler
8 
9 ******************************************************************//*******************************************************************/
20 #include "FormatClassifier.h"
21 
22 #include <stdint.h>
23 #include <cmath>
24 #include <cfloat>
25 #include <vector>
26 #include <cstdio>
27 
28 #include <wx/defs.h>
29 
30 #include "MultiFormatReader.h"
31 #include "sndfile.h"
32 
33 FormatClassifier::FormatClassifier(const char* filename) :
34  mReader(filename),
35  mMeter(cSiglen)
36 {
37  FormatClassT fClass;
38 
39  // Define the classification classes
42  mClasses.push_back(fClass);
44  mClasses.push_back(fClass);
46  mClasses.push_back(fClass);
48  mClasses.push_back(fClass);
50  mClasses.push_back(fClass);
52  mClasses.push_back(fClass);
53 
56  mClasses.push_back(fClass);
58  mClasses.push_back(fClass);
60  mClasses.push_back(fClass);
62  mClasses.push_back(fClass);
64  mClasses.push_back(fClass);
66  mClasses.push_back(fClass);
67 
68  // Build feature vectors
69  mMonoFeat = Floats{ mClasses.size() };
70  mStereoFeat = Floats{ mClasses.size() };
71 
72 #ifdef FORMATCLASSIFIER_SIGNAL_DEBUG
73  // Build a debug writer
74  char dfile [1024];
75  sprintf(dfile, "%s.sig", filename);
76  mpWriter = std::make_unique<DebugWriter>(dfile);
77 #endif
78 
79  // Run it
80  Run();
81 
82 #ifdef FORMATCLASSIFIER_SIGNAL_DEBUG
83  for (unsigned int n = 0; n < mClasses.size(); n++)
84  {
85  wxPrintf("Class [%i] Machine [%i]: Mono: %3.7f Stereo: %3.7f\n", mClasses[n].format, mClasses[n].endian, mMonoFeat[n], mStereoFeat[n]);
86  }
87 #endif
88 
89 }
90 
92 {
93 }
94 
96 {
97  return mResultFormat;
98 }
99 
101 {
102  int format = SF_FORMAT_RAW;
103 
104  switch(mResultFormat.format)
105  {
107  format |= SF_FORMAT_PCM_S8;
108  break;
110  format |= SF_FORMAT_PCM_16;
111  break;
113  format |= SF_FORMAT_PCM_32;
114  break;
116  format |= SF_FORMAT_PCM_U8;
117  break;
119  format |= SF_FORMAT_FLOAT;
120  break;
122  format |= SF_FORMAT_DOUBLE;
123  break;
124  default:
125  format |= SF_FORMAT_PCM_16;
126  break;
127  }
128 
129  switch(mResultFormat.endian)
130  {
132  format |= SF_ENDIAN_LITTLE;
133  break;
135  format |= SF_ENDIAN_BIG;
136  break;
137  }
138 
139  return format;
140 }
141 
143 {
144  return mResultChannels;
145 }
146 
148 {
149  // Calc the mono feature vector
150  for (unsigned int n = 0; n < mClasses.size(); n++)
151  {
152  // Read the signal
153  ReadSignal(mClasses[n], 1);
154 #ifdef FORMATCLASSIFIER_SIGNAL_DEBUG
155  mpWriter->WriteSignal(mSigBuffer, cSiglen);
156 #endif
157 
158  // Do some simple preprocessing
159  // Remove DC offset
160  float smean = Mean(mSigBuffer.get(), cSiglen);
161  Sub(mSigBuffer.get(), smean, cSiglen);
162  // Normalize to +- 1.0
163  Abs(mSigBuffer.get(), mAuxBuffer.get(), cSiglen);
164  float smax = Max(mAuxBuffer.get(), cSiglen);
165  Div(mSigBuffer.get(), smax, cSiglen);
166 
167  // Now actually fill the feature vector
168  // Low to high band power ratio
169  float pLo = mMeter.CalcPower(mSigBuffer.get(), 0.15f, 0.3f);
170  float pHi = mMeter.CalcPower(mSigBuffer.get(), 0.45f, 0.1f);
171  mMonoFeat[n] = pLo / pHi;
172  }
173 
174  // Calc the stereo feature vector
175  for (unsigned int n = 0; n < mClasses.size(); n++)
176  {
177  // Read the signal
178  ReadSignal(mClasses[n], 2);
179 #ifdef FORMATCLASSIFIER_SIGNAL_DEBUG
180  mpWriter->WriteSignal(mSigBuffer, cSiglen);
181 #endif
182 
183  // Do some simple preprocessing
184  // Remove DC offset
185  float smean = Mean(mSigBuffer.get(), cSiglen);
186  Sub(mSigBuffer.get(), smean, cSiglen);
187  // Normalize to +- 1.0
188  Abs(mSigBuffer.get(), mAuxBuffer.get(), cSiglen);
189  float smax = Max(mAuxBuffer.get(), cSiglen);
190  Div(mSigBuffer.get(), smax, cSiglen);
191 
192  // Now actually fill the feature vector
193  // Low to high band power ratio
194  float pLo = mMeter.CalcPower(mSigBuffer.get(), 0.15f, 0.3f);
195  float pHi = mMeter.CalcPower(mSigBuffer.get(), 0.45f, 0.1f);
196  mStereoFeat[n] = pLo / pHi;
197  }
198 
199  // Get the results
200  size_t midx, sidx;
201  float monoMax = Max(mMonoFeat.get(), mClasses.size(), &midx);
202  float stereoMax = Max(mStereoFeat.get(), mClasses.size(), &sidx);
203 
204  if (monoMax > stereoMax)
205  {
206  mResultChannels = 1;
207  mResultFormat = mClasses[midx];
208  }
209  else
210  {
211  mResultChannels = 2;
212  mResultFormat = mClasses[sidx];
213  }
214 
215 }
216 
218 {
219  size_t actRead = 0;
220  unsigned int n = 0;
221 
222  mReader.Reset();
223 
224  // Do a dummy read of 1024 bytes to skip potential header information
226 
227  do
228  {
229  actRead = mReader.ReadSamples(mRawBuffer.get(), cSiglen, stride, format.format, format.endian);
230 
231  if (n == 0)
232  {
234  }
235  else
236  {
237  if (actRead == cSiglen)
238  {
240 
241  // Integrate signals
242  Add(mSigBuffer.get(), mAuxBuffer.get(), cSiglen);
243 
244  // Do some dummy reads to break signal coherence
245  mReader.ReadSamples(mRawBuffer.get(), n + 1, stride, format.format, format.endian);
246  }
247  }
248 
249  n++;
250 
251  } while ((n < cNumInts) && (actRead == cSiglen));
252 
253 }
254 
256 {
257  switch(format.format)
258  {
260  ToFloat((int8_t*) in, out, cSiglen);
261  break;
263  ToFloat((int16_t*) in, out, cSiglen);
264  break;
266  ToFloat((int32_t*) in, out, cSiglen);
267  break;
269  ToFloat((uint8_t*) in, out, cSiglen);
270  break;
272  ToFloat((uint16_t*) in, out, cSiglen);
273  break;
275  ToFloat((uint32_t*) in, out, cSiglen);
276  break;
278  ToFloat((float*) in, out, cSiglen);
279  break;
281  ToFloat((double*) in, out, cSiglen);
282  break;
283  }
284 }
285 
286 void FormatClassifier::Add(float* in1, float* in2, size_t len)
287 {
288  for (unsigned int n = 0; n < len; n++)
289  {
290  in1[n] += in2[n];
291  }
292 }
293 
294 void FormatClassifier::Sub(float* in, float subt, size_t len)
295 {
296  for (unsigned int n = 0; n < len; n++)
297  {
298  in[n] -= subt;
299  }
300 }
301 
302 void FormatClassifier::Div(float* in, float div, size_t len)
303 {
304  for (unsigned int n = 0; n < len; n++)
305  {
306  in[n] /= div;
307  }
308 }
309 
310 
311 void FormatClassifier::Abs(float* in, float* out, size_t len)
312 {
313  for (unsigned int n = 0; n < len; n++)
314  {
315  if (in[n] < 0.0f)
316  {
317  out[n] = -in[n];
318  }
319  else
320  {
321  out[n] = in[n];
322  }
323  }
324 }
325 
326 float FormatClassifier::Mean(float* in, size_t len)
327 {
328  float mean = 0.0f;
329 
330  for (unsigned int n = 0; n < len; n++)
331  {
332  mean += in[n];
333  }
334 
335  mean /= len;
336 
337  return mean;
338 }
339 
340 float FormatClassifier::Max(float* in, size_t len)
341 {
342  size_t dummyidx;
343  return Max(in, len, &dummyidx);
344 }
345 
346 float FormatClassifier::Max(float* in, size_t len, size_t* maxidx)
347 {
348  float max = -FLT_MAX;
349  *maxidx = 0;
350 
351  for (unsigned int n = 0; n < len; n++)
352  {
353  if (in[n] > max)
354  {
355  max = in[n];
356  *maxidx = n;
357  }
358  }
359 
360  return max;
361 }
362 
363 template<class T> void FormatClassifier::ToFloat(T* in, float* out, size_t len)
364 {
365  for(unsigned int n = 0; n < len; n++)
366  {
367  out[n] = (float) in[n];
368  }
369 }
void Add(float *in1, float *in2, size_t len)
float Mean(float *in, size_t len)
FormatClassT GetResultFormat()
FormatVectorT mClasses
size_t ReadSamples(void *buffer, size_t len, MultiFormatReader::FormatT format, MachineEndianness::EndiannessT end)
static const size_t cSiglen
unsigned mResultChannels
void Div(float *in, float div, size_t len)
void ConvertSamples(void *in, float *out, FormatClassT format)
FormatClassT mResultFormat
static const size_t cNumInts
int format
Definition: ExportPCM.cpp:56
float CalcPower(float *sig, float fc, float bw)
FormatClassifier(const char *filename)
void ToFloat(T *in, float *out, size_t len)
MultiFormatReader::FormatT format
float Max(float *in, size_t len)
MachineEndianness::EndiannessT endian
SpecPowerCalculation mMeter
unsigned GetResultChannels()
void Sub(float *in, float subt, size_t len)
void Abs(float *in, float *out, size_t len)
MultiFormatReader mReader
void ReadSignal(FormatClassT format, size_t stride)
ArrayOf< uint8_t > mRawBuffer