Audacity  3.0.3
VoiceKey.cpp
Go to the documentation of this file.
1 /**********************************************************************
2 
3  Audacity: A Digital Audio Editor
4 
5  VoiceKey.cpp
6 
7  ?? Dominic Mazzoni
8  ?? Shane Muller
9 
10 *******************************************************************//*******************************************************************/
19 
20 
21 
22 #include "VoiceKey.h"
23 
24 #include <wx/string.h>
25 #include <math.h>
26 #include <stdio.h>
27 
28 #include <wx/textfile.h>
29 #include <wx/intl.h>
30 #include <iostream>
31 
32 #include "WaveTrack.h"
34 #include "widgets/wxPanelWrapper.h"
35 
36 using std::cout;
37 using std::endl;
38 
39 
40 
42 {
43 
44  mWindowSize = 0.01; //size of analysis window in seconds
45 
46  mEnergyMean = .0006; // reasonable initial levels assuming sampling rate of
47  mEnergySD = .0002; // 44100 hertz
48  mSignChangesMean = .08;
49  mSignChangesSD= .02;
52 
53  AdjustThreshold(2);
54 
55  mSilentWindowSize = .05; //Amount of time (in seconds) below threshold to call it silence
56  mSignalWindowSize = .05; //Amount of time (in seconds) above threshold to call it signal
57 
58 
59  mUseEnergy = true;
60  mUseSignChangesLow = false;
61  mUseSignChangesHigh = false;
64 
65 
66 };
67 
68 
70 {
71 };
72 
73 
74 
75 //---------------------------------------------------------------------------
76 // VoiceKey::On/Off Forward/Backward
77 // This operates in two phases:
78 // First, you take chunks of samples that are WindowSize big.
79 // If you have a run of them where something passes the threshold for SignalWindowSize seconds,
80 // you return to the last empty block and scan forward one sample at a time until you find the
81 // starting point of the speech.
82 
83 
84 
85 
86 //Move forward to find an ON region.
88  const WaveTrack & t, sampleCount start, sampleCount len)
89 {
90 
91  if((mWindowSize) >= (len + 10).as_double() ){
92 
93  /* i18n-hint: Voice key is an experimental/incomplete feature that
94  is used to navigate in vocal recordings, to move forwards and
95  backwards by words. So 'key' is being used in the sense of an index.
96  This error message means that you've selected too short
97  a region of audio to be able to use this feature.*/
98  AudacityMessageBox( XO("Selection is too small to use voice key.") );
99  return start;
100  }
101  else {
102 
103  //Change the millisecond-based parameters into sample-based parameters
104  double rate = t.GetRate(); //Translates seconds to samples
105  size_t WindowSizeInt = (rate * mWindowSize); //Size of window to examine
106  size_t SignalWindowSizeInt = (rate * mSignalWindowSize); //This much signal is necessary to trip key
107 
108  auto samplesleft = len - WindowSizeInt; //Indexes the number of samples remaining in the selection
109  auto lastsubthresholdsample = start; //start this off at the selection start
110  // keeps track of the sample number of the last sample to not exceed the threshold
111 
112  int blockruns=0; //keeps track of the number of consecutive above-threshold blocks
113 
114 
115  //This loop goes through the selection a block at a time. If a long enough run
116  //of above-threshold blocks occur, we return to the last sub-threshold block and
117  //go through one sample at a time.
118  //If there are fewer than 10 samples leftover, don't bother.
119 
120  for(auto i = start; samplesleft >= 10;
121  i += (WindowSizeInt - 1) , samplesleft -= (WindowSizeInt - 1)) {
122 
123  //Set blocksize so that it is the right size
124  const auto blocksize = limitSampleBufferSize( WindowSizeInt, samplesleft);
125 
126  //Test whether we are above threshold (the number of stats)
127  if(AboveThreshold(t,i,blocksize))
128  {
129  blockruns++; //Hit
130  } else {
131  blockruns=0; //Miss--start over
132  lastsubthresholdsample = i;
133  }
134 
135  //If the blockrun is long enough, break out of the loop early:
136  if(blockruns > mSignalWindowSize/mWindowSize)
137  break;
138 
139  }
140 
141  //Now, if we broke out early (samplesleft > 10), go back to the lastsubthresholdsample and look more carefully
142  if(samplesleft > 10) {
143 
144 
145  //Calculate how many to scan through--we only have to go through (at most)
146  //the first window + 1 samples--but we need another window samples to draw from.
147  size_t remaining = 2*WindowSizeInt+1;
148 
149  //To speed things up, create a local buffer to store things in, to avoid the costly t.Get();
150  //Only go through the first SignalWindowSizeInt samples, and choose the first that trips the key.
151  Floats buffer{ remaining };
152  t.GetFloats(buffer.get(),
153  lastsubthresholdsample, remaining);
154 
155 
156 
157  //Initialize these trend markers atrend and ztrend. They keep track of the
158  //up/down trends at the start and end of the evaluation window.
159  int atrend = sgn(buffer[1]-buffer[0]);
160  int ztrend = sgn(buffer[WindowSizeInt+1]-buffer[WindowSizeInt]);
161 
162 
163  double erg=0;
164  double sc=0;
165  double dc=0;
166 
167  //Get initial test statistic values.
168  if(mUseEnergy)
169  erg = TestEnergy(t, lastsubthresholdsample, WindowSizeInt);
170 
172  sc = TestSignChanges(t,lastsubthresholdsample, WindowSizeInt);
173 
175  dc = TestDirectionChanges(t,lastsubthresholdsample,WindowSizeInt);
176 
177 
178  //Now, go through the sound again, sample by sample.
179  wxASSERT(WindowSizeInt < SignalWindowSizeInt);
180  size_t i;
181  for(i = 0; i + WindowSizeInt < SignalWindowSizeInt; i++) {
182 
183  int tests = 0;
184  int testThreshold = 0;
185  //Update the test statistics
186  if(mUseEnergy)
187  {
188  TestEnergyUpdate(erg, WindowSizeInt,buffer[i],buffer[i+WindowSizeInt+1]);
189  tests += (int)(erg>mThresholdEnergy);
190  testThreshold++;
191  }
193  {
194  TestSignChangesUpdate(sc,WindowSizeInt,buffer[i],buffer[i+1],buffer[i+WindowSizeInt],buffer[i+WindowSizeInt+1]);
195  tests += (int)(sc < mThresholdSignChangesLower);
196  testThreshold++;
197  }
198 
200  {
201  TestSignChangesUpdate(sc,WindowSizeInt,buffer[i],buffer[i+1],buffer[i+WindowSizeInt],buffer[i+WindowSizeInt+1]);
202  tests += (int)(sc > mThresholdSignChangesUpper);
203  testThreshold++;
204  }
205 
207  {
208  TestDirectionChangesUpdate(dc,WindowSizeInt,atrend,buffer[i],buffer[i+1],ztrend,buffer[i+WindowSizeInt],buffer[i+WindowSizeInt+1]);
209  tests += (int)(dc < mThresholdDirectionChangesLower);
210  testThreshold++;
211  }
212 
214  {
215  TestDirectionChangesUpdate(dc,WindowSizeInt,atrend,buffer[i],buffer[i+1],ztrend,buffer[i+WindowSizeInt],buffer[i+WindowSizeInt+1]);
216  tests += (int)(dc > mThresholdDirectionChangesUpper);
217  testThreshold++;
218  }
219 
220 
221 
222  if(tests >= testThreshold)
223  { //Finish off on the first hit
224  break;
225  }
226  }
227 
228  //When we get here, i+lastsubthresholdsample is the best guess for where the word starts
229  return i + lastsubthresholdsample;
230  }
231  else {
232  //If we failed to find anything, return the start position
233  return start ;
234  }
235  }
236 }
237 
238 //Move backward from end to find an ON region.
240  const WaveTrack & t, sampleCount end, sampleCount len)
241 {
242 
243 
244  if((mWindowSize) >= (len + 10).as_double() ){
245 
246  AudacityMessageBox( XO("Selection is too small to use voice key.") );
247  return end;
248  }
249  else {
250 
251  //Change the millisecond-based parameters into sample-based parameters
252  double rate = t.GetRate(); //Translates seconds to samples
253  size_t WindowSizeInt = (rate * mWindowSize); //Size of window to examine
254  //unsigned int SilentWindowSizeInt = (unsigned int)(rate * mSilentWindowSize); //This much signal is necessary to trip key
255 
256  auto samplesleft = len - WindowSizeInt; //Indexes the number of samples remaining in the selection
257  auto lastsubthresholdsample = end; //start this off at the end
258  // keeps track of the sample number of the last sample to not exceed the threshold
259 
260  int blockruns=0; //keeps track of the number of consecutive above-threshold blocks
261 
262 
263  //This loop goes through the selection a block at a time in reverse order. If a long enough run
264  //of above-threshold blocks occur, we return to the last sub-threshold block and
265  //go through one sample at a time.
266  //If there are fewer than 10 samples leftover, don't bother.
267  for(auto i = end - WindowSizeInt; samplesleft >= 10;
268  i -= (WindowSizeInt - 1) , samplesleft -= (WindowSizeInt - 1)) {
269 
270  //Set blocksize so that it is the right size
271 
272  const auto blocksize = limitSampleBufferSize( WindowSizeInt, samplesleft);
273 
274 
275  //Test whether we are above threshold
276  if(AboveThreshold(t,i,blocksize))
277  {
278  blockruns++; //Hit
279  }
280  else
281  {
282  blockruns=0; //Miss--start over
283  lastsubthresholdsample = i+WindowSizeInt;
284  }
285 
286  //If the blockrun is long enough, break out of the loop early:
287  if(blockruns > mSilentWindowSize/mWindowSize)
288  break;
289 
290  }
291 
292  //Now, if we broke out early (samplesleft > 10), go back to the lastsubthresholdsample and look more carefully
293  if(samplesleft > 10) {
294 
295  //Calculate how many to scan through--we only have to go through (at most)
296  //the first window + 1 samples--but we need another window samples to draw from.
297  size_t remaining = 2*WindowSizeInt+1;
298 
299  //To speed things up, create a local buffer to store things in, to avoid the costly t.Get();
300  //Only go through the first mSilentWindowSizeInt samples, and choose the first that trips the key.
301  Floats buffer{ remaining };
302  t.GetFloats(buffer.get(),
303  lastsubthresholdsample - remaining, remaining);
304 
305  //Initialize these trend markers atrend and ztrend. They keep track of the
306  //up/down trends at the start and end of the evaluation window.
307  int atrend = sgn(buffer[remaining - 2]-buffer[remaining - 1]);
308 
309  int ztrend = sgn(buffer[remaining - WindowSizeInt - 2] -
310  buffer[remaining - WindowSizeInt
311  // PVS-Studio detected a probable error here
312  // when it read - 2.
313  // is - 1 correct?
314  // This code is unused. I didn't study further.
315  - 1
316  ]);
317 
318  double erg=0;
319  double sc = 0;
320  double dc = 0;
321 
322  //Get initial test statistic values.
323  if(mUseEnergy)
324  erg = TestEnergy(t, lastsubthresholdsample, WindowSizeInt);
326  sc = TestSignChanges(t,lastsubthresholdsample, WindowSizeInt);
328  dc = TestDirectionChanges(t,lastsubthresholdsample,WindowSizeInt);
329 
330  //Now, go through the sound again, sample by sample.
331  size_t i;
332  for(i = remaining - 1; i > WindowSizeInt; i--) {
333  int tests = 0;
334  int testThreshold = 0;
335  //Update the test statistics
336  if(mUseEnergy)
337  {
338  TestEnergyUpdate(erg, WindowSizeInt,buffer[i],buffer[i+WindowSizeInt+1]);
339  tests += (int)(erg>mThresholdEnergy);
340  testThreshold++;
341  }
343  {
344  TestSignChangesUpdate(sc,WindowSizeInt,buffer[i],buffer[i+1],buffer[i+WindowSizeInt],buffer[i+WindowSizeInt+1]);
345  tests += (int)(sc < mThresholdSignChangesLower);
346  testThreshold++;
347  }
349  {
350  TestSignChangesUpdate(sc,WindowSizeInt,buffer[i],buffer[i+1],buffer[i+WindowSizeInt],buffer[i+WindowSizeInt+1]);
351  tests += (int)(sc > mThresholdSignChangesUpper);
352  testThreshold++;
353  }
355  {
356  TestDirectionChangesUpdate(dc,WindowSizeInt,atrend,buffer[i],buffer[i+1],ztrend,buffer[i+WindowSizeInt],buffer[i+WindowSizeInt+1]);
357  tests += (int)(dc < mThresholdDirectionChangesLower);
358  testThreshold++;
359  }
361  {
362  TestDirectionChangesUpdate(dc,WindowSizeInt,atrend,buffer[i],buffer[i+1],ztrend,buffer[i+WindowSizeInt],buffer[i+WindowSizeInt+1]);
363  tests += (int)(dc > mThresholdDirectionChangesUpper);
364  testThreshold++;
365  }
366 
367  if(tests >= testThreshold)
368  { //Finish off on the first hit
369  break;
370  }
371  }
372 
373  //When we get here, i+lastsubthresholdsample is the best guess for where the word starts
374  return lastsubthresholdsample - remaining + i;
375  }
376  else {
377  //If we failed to find anything, return the start position
378  return end ;
379  }
380  }
381 }
382 
383 
384 //Move forward from the start to find an OFF region.
386  const WaveTrack & t, sampleCount start, sampleCount len)
387 {
388 
389  if((mWindowSize) >= (len + 10).as_double() ){
390  AudacityMessageBox( XO("Selection is too small to use voice key.") );
391 
392  return start;
393  }
394  else {
395 
396 
397  //Change the millisecond-based parameters into sample-based parameters
398  double rate = t.GetRate(); //Translates seconds to samples
399  unsigned int WindowSizeInt = (unsigned int)(rate * mWindowSize); //Size of window to examine
400  unsigned int SilentWindowSizeInt = (unsigned int)(rate * mSilentWindowSize); //This much signal is necessary to trip key
401 
402  sampleCount samplesleft ( len.as_double() - WindowSizeInt ); //Indexes the number of samples remaining in the selection
403  auto lastsubthresholdsample = start; //start this off at the selection start
404  // keeps track of the sample number of the last sample to not exceed the threshold
405 
406  int blockruns=0; //keeps track of the number of consecutive above-threshold blocks
407 
408  //This loop goes through the selection a block at a time. If a long enough run
409  //of above-threshold blocks occur, we return to the last sub-threshold block and
410  //go through one sample at a time.
411  //If there are fewer than 10 samples leftover, don't bother.
412  for(auto i = start; samplesleft >= 10;
413  i += (WindowSizeInt - 1) , samplesleft -= (WindowSizeInt - 1)) {
414 
415  //Set blocksize so that it is the right size
416  const auto blocksize = limitSampleBufferSize( WindowSizeInt, samplesleft);
417 
418  if(!AboveThreshold(t,i,blocksize))
419  {
420  blockruns++; //Hit
421  }
422  else
423  {
424  blockruns=0; //Above threshold--start over
425  lastsubthresholdsample = i;
426  }
427 
428  //If the blockrun is long enough, break out of the loop early:
429  if(blockruns > mSilentWindowSize/mWindowSize)
430  break;
431 
432  }
433 
434  //Now, if we broke out early (samplesleft > 10), go back to the lastsubthresholdsample and look more carefully
435  if(samplesleft > 10) {
436 
437 
438  //Calculate how many to scan through--we only have to go through (at most)
439  //the first window + 1 samples--but we need another window samples to draw from.
440  size_t remaining = 2*WindowSizeInt+1;
441 
442  //To speed things up, create a local buffer to store things in, to avoid the costly t.Get();
443  //Only go through the first SilentWindowSizeInt samples, and choose the first that trips the key.
444  Floats buffer{ remaining };
445  t.GetFloats(buffer.get(),
446  lastsubthresholdsample, remaining);
447 
448  //Initialize these trend markers atrend and ztrend. They keep track of the
449  //up/down trends at the start and end of the evaluation window.
450  int atrend = sgn(buffer[1]-buffer[0]);
451  int ztrend = sgn(buffer[WindowSizeInt+1]-buffer[WindowSizeInt]);
452 
453 
454  double erg=0;
455  double sc=0;
456  double dc=0;
457 
458  //Get initial test statistic values.
459  if(mUseEnergy)
460  erg = TestEnergy(t, lastsubthresholdsample, WindowSizeInt);
462  sc = TestSignChanges(t,lastsubthresholdsample, WindowSizeInt);
464  dc = TestDirectionChanges(t,lastsubthresholdsample,WindowSizeInt);
465 
466  //Now, go through the sound again, sample by sample.
467  size_t i;
468  for(i = 0; i < SilentWindowSizeInt - WindowSizeInt; i++) {
469  int tests = 0;
470  int testThreshold = 0;
471  //Update the test statistics
472  if(mUseEnergy)
473  {
474  TestEnergyUpdate(erg, WindowSizeInt,buffer[i],buffer[i+WindowSizeInt+1]);
475  tests += (int)(erg>mThresholdEnergy);
476  testThreshold++;
477  }
479  {
480  TestSignChangesUpdate(sc,WindowSizeInt,buffer[i],buffer[i+1],buffer[i+WindowSizeInt],buffer[i+WindowSizeInt+1]);
481  tests += (int)(sc < mThresholdSignChangesLower);
482  testThreshold++;
483  }
485  {
486  TestSignChangesUpdate(sc,WindowSizeInt,buffer[i],buffer[i+1],buffer[i+WindowSizeInt],buffer[i+WindowSizeInt+1]);
487  tests += (int)(sc > mThresholdSignChangesUpper);
488  testThreshold++;
489  }
491  {
492  TestDirectionChangesUpdate(dc,WindowSizeInt,atrend,buffer[i],buffer[i+1],ztrend,buffer[i+WindowSizeInt],buffer[i+WindowSizeInt+1]);
493  tests += (int)(dc < mThresholdDirectionChangesLower);
494  testThreshold++;
495  }
497  {
498  TestDirectionChangesUpdate(dc,WindowSizeInt,atrend,buffer[i],buffer[i+1],ztrend,buffer[i+WindowSizeInt],buffer[i+WindowSizeInt+1]);
499  tests += (int)(dc > mThresholdDirectionChangesUpper);
500  testThreshold++;
501  }
502 
503  if(tests < testThreshold)
504  { //Finish off on the first below-threshold block
505  break;
506  }
507  }
508 
509  //When we get here, i+lastsubthresholdsample is the best guess for where the word starts
510  return i + lastsubthresholdsample;
511  }
512  else {
513  //If we failed to find anything, return the start position
514  return start ;
515  }
516  }
517 }
518 
519 
520 //Move backward from the end to find an OFF region
522  const WaveTrack & t, sampleCount end, sampleCount len)
523 {
524 
525 
526  if((mWindowSize) >= (len + 10).as_double() ){
527 
528  AudacityMessageBox( XO("Selection is too small to use voice key.") );
529  return end;
530  }
531  else {
532 
533  //Change the millisecond-based parameters into sample-based parameters
534  double rate = t.GetRate(); //Translates seconds to samples
535  unsigned int WindowSizeInt = (unsigned int)(rate * mWindowSize); //Size of window to examine
536  //unsigned int SilentWindowSizeInt = (unsigned int)(rate * mSilentWindowSize); //This much signal is necessary to trip key
537 
538  auto samplesleft = len - WindowSizeInt; //Indexes the number of samples remaining in the selection
539  auto lastsubthresholdsample = end; //start this off at the end
540  // keeps track of the sample number of the last sample to not exceed the threshold
541 
542  int blockruns=0; //keeps track of the number of consecutive above-threshold blocks
543 
544  //This loop goes through the selection a block at a time in reverse order. If a long enough run
545  //of above-threshold blocks occur, we return to the last sub-threshold block and
546  //go through one sample at a time.
547  //If there are fewer than 10 samples leftover, don't bother.
548  for(auto i = end - WindowSizeInt; samplesleft >= 10;
549  i -= (WindowSizeInt - 1), samplesleft -= (WindowSizeInt -1 )) {
550 
551  //Set blocksize so that it is the right size
552  const auto blocksize = limitSampleBufferSize( WindowSizeInt, samplesleft);
553 
554  if(!AboveThreshold(t,i,blocksize))
555  {
556 
557  blockruns++; //Hit
558  }
559  else
560  {
561  blockruns=0; //Miss--start over
562  lastsubthresholdsample = i+WindowSizeInt;
563 
564  }
565 
566  //If the blockrun is long enough, break out of the loop early:
567  if(blockruns > mSilentWindowSize/mWindowSize)
568  break;
569 
570  }
571 
572  //Now, if we broke out early (samplesleft > 10), go back to the lastsubthresholdsample and look more carefully
573  if(samplesleft > 10) {
574 
575  //Calculate how many to scan through--we only have to go through (at most)
576  //the first window + 1 samples--but we need another window samples to draw from.
577  const size_t remaining = 2*WindowSizeInt+1;
578 
579  //To speed things up, create a local buffer to store things in, to avoid the costly t.Get();
580  //Only go through the first SilentWindowSizeInt samples, and choose the first that trips the key.
581  Floats buffer{ remaining };
582  t.GetFloats(buffer.get(),
583  lastsubthresholdsample - remaining, remaining);
584 
585  //Initialize these trend markers atrend and ztrend. They keep track of the
586  //up/down trends at the start and end of the remaining window.
587  int atrend = sgn(buffer[remaining - 2] - buffer[remaining - 1]);
588  int ztrend =
589  sgn(buffer[remaining - WindowSizeInt - 2] -
590  buffer[remaining - WindowSizeInt - 2]);
591 
592  double erg=0;
593  double sc=0;
594  double dc=0;
595  //Get initial test statistic values.
596  if(mUseEnergy)
597  erg = TestEnergy(t, lastsubthresholdsample, WindowSizeInt);
599  sc = TestSignChanges(t,lastsubthresholdsample, WindowSizeInt);
601  dc = TestDirectionChanges(t,lastsubthresholdsample,WindowSizeInt);
602 
603  //Now, go through the sound again, sample by sample.
604  size_t i;
605  for(i = remaining - 1; i > WindowSizeInt; i--) {
606 
607  int tests = 0;
608  int testThreshold = 0;
609  //Update the test statistics
610  if(mUseEnergy)
611  {
612  TestEnergyUpdate(erg, WindowSizeInt,buffer[i],buffer[i+WindowSizeInt+1]);
613  tests += (int)(erg>mThresholdEnergy);
614  testThreshold++;
615  }
617  {
618  TestSignChangesUpdate(sc,WindowSizeInt,buffer[i],buffer[i+1],buffer[i+WindowSizeInt],buffer[i+WindowSizeInt+1]);
619  tests += (int)(sc < mThresholdSignChangesLower);
620  testThreshold++;
621  }
623  {
624  TestSignChangesUpdate(sc,WindowSizeInt,buffer[i],buffer[i+1],buffer[i+WindowSizeInt],buffer[i+WindowSizeInt+1]);
625  tests += (int)(sc > mThresholdSignChangesUpper);
626  testThreshold++;
627  }
629  {
630  TestDirectionChangesUpdate(dc,WindowSizeInt,atrend,buffer[i],buffer[i+1],ztrend,buffer[i+WindowSizeInt],buffer[i+WindowSizeInt+1]);
631  tests += (int)(dc < mThresholdDirectionChangesLower);
632  testThreshold++;
633  }
635  {
636  TestDirectionChangesUpdate(dc,WindowSizeInt,atrend,buffer[i],buffer[i+1],ztrend,buffer[i+WindowSizeInt],buffer[i+WindowSizeInt+1]);
637  tests += (int)(dc > mThresholdDirectionChangesUpper);
638  testThreshold++;
639  }
640 
641 
642 
643  if(tests < testThreshold)
644  { //Finish off on the first hit
645  break;
646  }
647  }
648 
649  //When we get here, i+lastsubthresholdsample is the best guess for where the word starts
650  return lastsubthresholdsample - remaining + i;
651  }
652  else {
653  //If we failed to find anything, return the start position
654  return end ;
655  }
656  }
657 }
658 
659 //This tests whether a specified block region is above or below threshold.
661  const WaveTrack & t, sampleCount start, sampleCount len)
662 {
663 
664  double erg=0;
665  double sc=0;
666  double dc=0; //These store three statistics: energy, signchanges, and directionchanges
667  int tests =0; //Keeps track of how many statistics surpass the threshold.
668  int testThreshold=0; //Keeps track of the threshold.
669 
670  //Calculate the test statistics
671  if(mUseEnergy)
672  {
673  testThreshold++;
674  erg = TestEnergy(t, start,len);
675  tests +=(int)(erg > mThresholdEnergy);
676 #if 0
677  std::cout << "Energy: " << erg << " " <<mThresholdEnergy << std::endl;
678 #endif
679  }
680 
682  {
683  testThreshold++;
684  sc = TestSignChanges(t,start,len);
685  tests += (int)(sc < mThresholdSignChangesLower);
686 #if 0
687  std::cout << "SignChanges: " << sc << " " <<mThresholdSignChangesLower<< " < " << mThresholdSignChangesUpper << std::endl;
688 #endif
689 
690  }
692  {
693  testThreshold++;
694  sc = TestSignChanges(t,start,len);
695  tests += (int)(sc > mThresholdSignChangesUpper);
696 #if 0
697  std::cout << "SignChanges: " << sc << " " <<mThresholdSignChangesLower<< " < " << mThresholdSignChangesUpper << std::endl;
698 #endif
699 
700  }
701 
702 
704  {
705  testThreshold++;
706  dc = TestDirectionChanges(t,start,len);
707  tests += (int)(dc < mThresholdDirectionChangesLower);
708 #if 0
709  std::cout << "DirectionChanges: " << dc << " " <<mThresholdDirectionChangesLower<< " < " << mThresholdDirectionChangesUpper << std::endl;
710 #endif
711  }
713  {
714  testThreshold++;
715  dc = TestDirectionChanges(t,start,len);
716  tests += (int)(dc > mThresholdDirectionChangesUpper);
717 #if 0
718  std::cout << "DirectionChanges: " << dc << " " <<mThresholdDirectionChangesLower<< " < " << mThresholdDirectionChangesUpper << std::endl;
719 #endif
720  }
721 
722  //Test whether we are above threshold (the number of stats)
723  return (tests >= testThreshold);
724 
725 }
726 
727 //This adjusts the threshold. Larger values of t expand the noise region,
728 //making more things be classified as noise (and requiring a stronger signal).
730 {
731 
738 };
739 
740 
741 //This 'calibrates' the voicekey to noise
743 {
744  //To calibrate the noise, we need to scan the sample block just like in the voicekey and
745  //calculate the mean and standard deviation of the test statistics.
746  //Then, we set the BaselineThreshold to be one
747 
748  wxBusyCursor busy;
749 
750  //initialize some sample statistics: sums of X and X^2
751 
752  double sumerg, sumerg2;
753  double sumsc, sumsc2;
754  double sumdc, sumdc2;
755  double erg, sc, dc;
756  //Now, change the millisecond-based parameters into sample-based parameters
757  //(This depends on WaveTrack t)
758  double rate = t.GetRate();
759  unsigned int WindowSizeInt = (unsigned int)(rate * mWindowSize);
760  // unsigned int SignalWindowSizeInt = (unsigned int)(rate * mSignalWindowSize);
761 
762 
763  //Get the first test statistics
764 
765  //Calibrate all of the statistic, because they might be
766  //changed later.
767 
768  // if(mUseEnergy)
769  erg = TestEnergy(t, start, WindowSizeInt);
770 
771  // if(mUseSignChanges)
772  sc = TestSignChanges(t,start, WindowSizeInt);
773 
774  // if(mUseDirectionChanges)
775  dc = TestDirectionChanges(t,start,WindowSizeInt);
776 
777  sumerg =0.0;
778  sumerg2 = 0.0;
779  sumsc =0.0;
780  sumsc2 = 0.0;
781  sumdc =0.0;
782  sumdc2 =0.0;
783 
784 
785  // int n = len - WindowSizeInt; //This is how many samples we have
786  auto samplesleft = len - WindowSizeInt;
787  int samples=0;
788 
789  for(auto i = start; samplesleft >= 10;
790  i += (WindowSizeInt - 1), samplesleft -= (WindowSizeInt -1) ) {
791  //Take samples chunk-by-chunk.
792  //Normally, this should be in WindowSizeInt chunks, but at the end (if there are more than 10
793  //samples left) take a chunk that eats the rest of the samples.
794 
795  samples++; //Increment the number of samples we have
796  const auto blocksize = limitSampleBufferSize( WindowSizeInt, samplesleft);
797 
798  erg = TestEnergy(t, i, blocksize);
799  sumerg +=(double)erg;
800  sumerg2 += pow((double)erg,2);
801 
802  sc = TestSignChanges(t,i, blocksize);
803  sumsc += (double)sc;
804  sumsc2 += pow((double)sc,2);
805 
806 
807  dc = TestDirectionChanges(t,i,blocksize);
808  sumdc += (double)dc;
809  sumdc2 += pow((double)dc,2);
810  }
811 
812  mEnergyMean = sumerg / samples;
813  mEnergySD = sqrt(sumerg2/samples - mEnergyMean*mEnergyMean);
814 
815  mSignChangesMean = sumsc / samples;
816  mSignChangesSD = sqrt(sumsc2 / samples - mSignChangesMean * mSignChangesMean);
817 
818  mDirectionChangesMean = sumdc / samples;
820 
821  auto text = XO("Calibration Results\n");
822  text +=
823  /* i18n-hint: %1.4f is replaced by a number. sd stands for 'Standard Deviations'*/
824  XO("Energy -- mean: %1.4f sd: (%1.4f)\n")
825  .Format( mEnergyMean, mEnergySD );
826  text +=
827  XO("Sign Changes -- mean: %1.4f sd: (%1.4f)\n")
828  .Format( mSignChangesMean, mSignChangesSD );
829  text +=
830  XO("Direction Changes -- mean: %1.4f sd: (%1.4f)\n")
833  nullptr,
834  text,
835  XO("Calibration Complete"),
836  wxOK | wxICON_INFORMATION,
837  wxPoint(-1, -1)
838  }
839  .ShowModal();
840 
842 }
843 
844 
845 void VoiceKey::SetKeyType(bool erg, bool scLow , bool scHigh,
846  bool dcLow, bool dcHigh)
847 {
848  mUseEnergy = erg;
849  mUseSignChangesLow = scLow;
850  mUseSignChangesHigh = scHigh;
851  mUseDirectionChangesLow = dcLow;
852  mUseDirectionChangesHigh = dcHigh;
853 }
854 
855 
856 //This might continue over a number of blocks.
858  const WaveTrack & t, sampleCount start, sampleCount len)
859 {
860 
861  double sum = 1;
862  auto s = start; //Keep track of start
863  auto originalLen = len; //Keep track of the length of block to process (its not the length of t)
864  const auto blockSize = limitSampleBufferSize(
865  t.GetMaxBlockSize(), len); //Determine size of sampling buffer
866  Floats buffer{ blockSize }; //Get a sampling buffer
867 
868  while(len > 0)
869  {
870  //Figure out how much to grab
871  auto block = limitSampleBufferSize ( t.GetBestBlockSize(s), len );
872 
873  t.GetFloats(buffer.get(), s,block); //grab the block;
874 
875  //Now, go through the block and calculate energy
876  for(decltype(block) i = 0; i< block; i++)
877  {
878  sum += buffer[i]*buffer[i];
879  }
880 
881  len -= block;
882  s += block;
883  }
884 
885  return sum / originalLen.as_double();
886 }
887 
888 
889 //This will update RMSE by adding one element and subtracting another
890 void VoiceKey::TestEnergyUpdate (double & prevErg, int len, const float & drop, const float & add)
891 {
892  //This is an updating formula for RMSE. It will only recalculate what's changed.
893  prevErg = prevErg + (double)(fabs(add) - fabs(drop))/len;
894 
895 }
896 
897 
899  const WaveTrack & t, sampleCount start, sampleCount len)
900 {
901 
902 
903  auto s = start; //Keep track of start
904  auto originalLen = len; //Keep track of the length of block to process (its not the length of t)
905  const auto blockSize = limitSampleBufferSize(
906  t.GetMaxBlockSize(), len); //Determine size of sampling buffer
907  unsigned long signchanges = 1;
908  int currentsign=0;
909 
910  Floats buffer{ blockSize }; //Get a sampling buffer
911 
912  while(len > 0) {
913  //Figure out how much to grab
914  auto block = limitSampleBufferSize ( t.GetBestBlockSize(s), len );
915 
916  t.GetFloats(buffer.get(), s, block); //grab the block;
917 
918  if (len == originalLen)
919  {
920  //The first time through, set stuff up special.
921  currentsign = sgn(buffer[0]);
922  }
923 
924  //Now, go through the block and calculate zero crossings
925 
926  for(decltype(block) i = 0; i< block; i++)
927  {
928  if( sgn(buffer[i]) != currentsign)
929  {
930  currentsign = sgn(buffer[i]);
931  signchanges++;
932  }
933 
934  }
935  len -= block;
936  s += block;
937  }
938  return (double)signchanges / originalLen.as_double();
939 }
940 
941 void VoiceKey::TestSignChangesUpdate(double & currentsignchanges, int len,
942  const float & a1,
943  const float & a2,
944  const float & z1,
945  const float & z2)
946 {
947 
948  if(sgn(a1)!=sgn(a2)) currentsignchanges -= 1.0/len;
949  if(sgn(z1)!=sgn(z2)) currentsignchanges += 1.0/len;
950 
951 }
952 
953 
955  const WaveTrack & t, sampleCount start, sampleCount len)
956 {
957 
958 
959  auto s = start; //Keep track of start
960  auto originalLen = len; //Keep track of the length of block to process (its not the length of t)
961  const auto blockSize = limitSampleBufferSize(
962  t.GetMaxBlockSize(), len); //Determine size of sampling buffer
963  unsigned long directionchanges = 1;
964  float lastval=float(0);
965  int lastdirection=1;
966 
967  Floats buffer{ blockSize }; //Get a sampling buffer
968 
969  while(len > 0) {
970  //Figure out how much to grab
971  auto block = limitSampleBufferSize ( t.GetBestBlockSize(s), len );
972 
973  t.GetFloats(buffer.get(), s, block); //grab the block;
974 
975  if (len == originalLen) {
976  //The first time through, set stuff up special.
977  lastval = buffer[0];
978  }
979 
980  //Now, go through the block and calculate zero crossings
981 
982 
983  for(decltype(block) i = 0; i< block; i++){
984 
985  if( sgn(buffer[i]-lastval) != lastdirection) {
986  directionchanges++;
987  lastdirection = sgn(buffer[i] - lastval);
988  }
989  lastval = buffer[i];
990 
991  }
992  len -= block;
993  s += block;
994  }
995  return (double)directionchanges/originalLen.as_double();
996 }
997 
998 
999 
1000 
1001 // This method does an updating by looking at the trends
1002 // This will change currentdirections and atrend/trend, so be warned.
1003 void VoiceKey::TestDirectionChangesUpdate(double & currentdirectionchanges, int len,
1004  int & atrend, const float & a1, const float & a2,
1005  int & ztrend, const float & z1, const float & z2)
1006 {
1007 
1008  if(sgn(a2 - a1)!= atrend ) {
1009  //Here, the direction shifted for the item we're dropping.
1010  currentdirectionchanges -= 1.0/len;
1011  atrend = sgn(a2-a1);
1012  }
1013  if(sgn(z2 - z1)!= ztrend){
1014  //Here, the direction shifts when we add an item
1015  currentdirectionchanges += 1.0/len;
1016  ztrend = sgn(z2-z1);
1017  }
1018 
1019 }
WaveTrack.h
VoiceKey::AboveThreshold
bool AboveThreshold(const WaveTrack &t, sampleCount start, sampleCount len)
Definition: VoiceKey.cpp:660
VoiceKey::mSignalWindowSize
double mSignalWindowSize
Definition: VoiceKey.h:80
WaveTrack
A Track that contains audio waveform data.
Definition: WaveTrack.h:70
AudacityMessageBox
int AudacityMessageBox(const TranslatableString &message, const TranslatableString &caption, long style, wxWindow *parent, int x, int y)
Definition: AudacityMessageBox.cpp:17
VoiceKey::OffBackward
sampleCount OffBackward(const WaveTrack &t, sampleCount start, sampleCount len)
Definition: VoiceKey.cpp:521
VoiceKey::AdjustThreshold
void AdjustThreshold(double t)
Definition: VoiceKey.cpp:729
VoiceKey::VoiceKey
VoiceKey()
Definition: VoiceKey.cpp:41
VoiceKey::TestEnergy
double TestEnergy(const WaveTrack &t, sampleCount start, sampleCount len)
Definition: VoiceKey.cpp:857
sgn
int sgn(int number)
Definition: VoiceKey.h:98
XO
#define XO(s)
Definition: Internat.h:31
wxPanelWrapper.h
VoiceKey::TestDirectionChanges
double TestDirectionChanges(const WaveTrack &t, sampleCount start, sampleCount len)
Definition: VoiceKey.cpp:954
VoiceKey::OnForward
sampleCount OnForward(const WaveTrack &t, sampleCount start, sampleCount len)
Definition: VoiceKey.cpp:87
VoiceKey::TestSignChanges
double TestSignChanges(const WaveTrack &t, sampleCount start, sampleCount len)
Definition: VoiceKey.cpp:898
AudacityMessageDialog
Wrap wxMessageDialog so that caption IS translatable.
Definition: wxPanelWrapper.h:215
VoiceKey::mUseSignChangesLow
bool mUseSignChangesLow
Definition: VoiceKey.h:73
VoiceKey::mEnergyMean
double mEnergyMean
Definition: VoiceKey.h:58
VoiceKey::mWindowSize
double mWindowSize
Definition: VoiceKey.h:54
VoiceKey::TestEnergyUpdate
void TestEnergyUpdate(double &prevErg, int length, const float &drop, const float &add)
Definition: VoiceKey.cpp:890
sampleCount::as_double
double as_double() const
Definition: SampleCount.h:45
VoiceKey::SetKeyType
void SetKeyType(bool erg, bool scLow, bool scHigh, bool dcLow, bool dcHigh)
Definition: VoiceKey.cpp:845
VoiceKey::mThresholdSignChangesUpper
double mThresholdSignChangesUpper
Definition: VoiceKey.h:67
VoiceKey::TestDirectionChangesUpdate
void TestDirectionChangesUpdate(double &currentdirectionchanges, int length, int &atrend, const float &a1, const float &a2, int &ztrend, const float &z1, const float &z2)
Definition: VoiceKey.cpp:1003
VoiceKey.h
VoiceKey::mUseDirectionChangesLow
bool mUseDirectionChangesLow
Definition: VoiceKey.h:75
VoiceKey::mUseDirectionChangesHigh
bool mUseDirectionChangesHigh
Definition: VoiceKey.h:76
VoiceKey::mUseEnergy
bool mUseEnergy
Definition: VoiceKey.h:72
VoiceKey::mSignChangesMean
double mSignChangesMean
Definition: VoiceKey.h:60
VoiceKey::mThresholdDirectionChangesUpper
double mThresholdDirectionChangesUpper
Definition: VoiceKey.h:69
VoiceKey::TestSignChangesUpdate
void TestSignChangesUpdate(double &currentsignchanges, int length, const float &a1, const float &a2, const float &z1, const float &z2)
Definition: VoiceKey.cpp:941
VoiceKey::mThresholdEnergy
double mThresholdEnergy
Definition: VoiceKey.h:65
VoiceKey::~VoiceKey
~VoiceKey()
Definition: VoiceKey.cpp:69
VoiceKey::mSignChangesSD
double mSignChangesSD
Definition: VoiceKey.h:61
WaveTrack::GetMaxBlockSize
size_t GetMaxBlockSize() const
Definition: WaveTrack.cpp:1800
VoiceKey::CalibrateNoise
void CalibrateNoise(const WaveTrack &t, sampleCount start, sampleCount len)
Definition: VoiceKey.cpp:742
sampleCount
Positions or offsets within audio files need a wide type.
Definition: SampleCount.h:18
VoiceKey::mSilentWindowSize
double mSilentWindowSize
Definition: VoiceKey.h:79
VoiceKey::mEnergySD
double mEnergySD
Definition: VoiceKey.h:59
AudacityMessageBox.h
VoiceKey::OffForward
sampleCount OffForward(const WaveTrack &t, sampleCount start, sampleCount len)
Definition: VoiceKey.cpp:385
VoiceKey::mThresholdSignChangesLower
double mThresholdSignChangesLower
Definition: VoiceKey.h:66
VoiceKey::OnBackward
sampleCount OnBackward(const WaveTrack &t, sampleCount start, sampleCount len)
Definition: VoiceKey.cpp:239
VoiceKey::mDirectionChangesSD
double mDirectionChangesSD
Definition: VoiceKey.h:63
WaveTrack::GetBestBlockSize
size_t GetBestBlockSize(sampleCount t) const
Definition: WaveTrack.cpp:1782
VoiceKey::mThresholdDirectionChangesLower
double mThresholdDirectionChangesLower
Definition: VoiceKey.h:68
VoiceKey::mDirectionChangesMean
double mDirectionChangesMean
Definition: VoiceKey.h:62
VoiceKey::mThresholdAdjustment
double mThresholdAdjustment
Definition: VoiceKey.h:56
VoiceKey::mUseSignChangesHigh
bool mUseSignChangesHigh
Definition: VoiceKey.h:74
limitSampleBufferSize
size_t limitSampleBufferSize(size_t bufferSize, sampleCount limit)
Definition: SampleCount.cpp:23
WaveTrack::GetFloats
bool GetFloats(float *buffer, sampleCount start, size_t len, fillFormat fill=fillZero, bool mayThrow=true, sampleCount *pNumWithinClips=nullptr) const
Retrieve samples from a track in floating-point format, regardless of the storage format.
Definition: WaveTrack.h:269
ArrayOf< float >
WaveTrack::GetRate
double GetRate() const
Definition: WaveTrack.cpp:482