Audacity 3.2.0
VoiceKey.cpp
Go to the documentation of this file.
1/**********************************************************************
2
3 Audacity: A Digital Audio Editor
4
5 VoiceKey.cpp
6
7 ?? Dominic Mazzoni
8 ?? Shane Muller
9
10*******************************************************************//*******************************************************************/
19
20
21
22#include "VoiceKey.h"
23
24#include <wx/string.h>
25#include <math.h>
26#include <stdio.h>
27
28#include <wx/textfile.h>
29#include <wx/intl.h>
30#include <iostream>
31
32#include "WaveTrack.h"
35
36using std::cout;
37using std::endl;
38
39
40
42{
43
44 mWindowSize = 0.01; //size of analysis window in seconds
45
46 mEnergyMean = .0006; // reasonable initial levels assuming sampling rate of
47 mEnergySD = .0002; // 44100 hertz
48 mSignChangesMean = .08;
49 mSignChangesSD= .02;
52
54
55 mSilentWindowSize = .05; //Amount of time (in seconds) below threshold to call it silence
56 mSignalWindowSize = .05; //Amount of time (in seconds) above threshold to call it signal
57
58
59 mUseEnergy = true;
60 mUseSignChangesLow = false;
61 mUseSignChangesHigh = false;
64
65
66};
67
68
70{
71};
72
73
74
75//---------------------------------------------------------------------------
76// VoiceKey::On/Off Forward/Backward
77// This operates in two phases:
78// First, you take chunks of samples that are WindowSize big.
79// If you have a run of them where something passes the threshold for SignalWindowSize seconds,
80// you return to the last empty block and scan forward one sample at a time until you find the
81// starting point of the speech.
82
83
84
85
86//Move forward to find an ON region.
88 const WaveTrack & t, sampleCount start, sampleCount len)
89{
90
91 if((mWindowSize) >= (len + 10).as_double() ){
92
93 /* i18n-hint: Voice key is an experimental/incomplete feature that
94 is used to navigate in vocal recordings, to move forwards and
95 backwards by words. So 'key' is being used in the sense of an index.
96 This error message means that you've selected too short
97 a region of audio to be able to use this feature.*/
98 AudacityMessageBox( XO("Selection is too small to use voice key.") );
99 return start;
100 }
101 else {
102
103 //Change the millisecond-based parameters into sample-based parameters
104 double rate = t.GetRate(); //Translates seconds to samples
105 size_t WindowSizeInt = (rate * mWindowSize); //Size of window to examine
106 size_t SignalWindowSizeInt = (rate * mSignalWindowSize); //This much signal is necessary to trip key
107
108 auto samplesleft = len - WindowSizeInt; //Indexes the number of samples remaining in the selection
109 auto lastsubthresholdsample = start; //start this off at the selection start
110 // keeps track of the sample number of the last sample to not exceed the threshold
111
112 int blockruns=0; //keeps track of the number of consecutive above-threshold blocks
113
114
115 //This loop goes through the selection a block at a time. If a long enough run
116 //of above-threshold blocks occur, we return to the last sub-threshold block and
117 //go through one sample at a time.
118 //If there are fewer than 10 samples leftover, don't bother.
119
120 for(auto i = start; samplesleft >= 10;
121 i += (WindowSizeInt - 1) , samplesleft -= (WindowSizeInt - 1)) {
122
123 //Set blocksize so that it is the right size
124 const auto blocksize = limitSampleBufferSize( WindowSizeInt, samplesleft);
125
126 //Test whether we are above threshold (the number of stats)
127 if(AboveThreshold(t,i,blocksize))
128 {
129 blockruns++; //Hit
130 } else {
131 blockruns=0; //Miss--start over
132 lastsubthresholdsample = i;
133 }
134
135 //If the blockrun is long enough, break out of the loop early:
136 if(blockruns > mSignalWindowSize/mWindowSize)
137 break;
138
139 }
140
141 //Now, if we broke out early (samplesleft > 10), go back to the lastsubthresholdsample and look more carefully
142 if(samplesleft > 10) {
143
144
145 //Calculate how many to scan through--we only have to go through (at most)
146 //the first window + 1 samples--but we need another window samples to draw from.
147 size_t remaining = 2*WindowSizeInt+1;
148
149 //To speed things up, create a local buffer to store things in, to avoid the costly t.Get();
150 //Only go through the first SignalWindowSizeInt samples, and choose the first that trips the key.
151 Floats buffer{ remaining };
152 t.GetFloats(buffer.get(),
153 lastsubthresholdsample, remaining);
154
155
156
157 //Initialize these trend markers atrend and ztrend. They keep track of the
158 //up/down trends at the start and end of the evaluation window.
159 int atrend = sgn(buffer[1]-buffer[0]);
160 int ztrend = sgn(buffer[WindowSizeInt+1]-buffer[WindowSizeInt]);
161
162
163 double erg=0;
164 double sc=0;
165 double dc=0;
166
167 //Get initial test statistic values.
168 if(mUseEnergy)
169 erg = TestEnergy(t, lastsubthresholdsample, WindowSizeInt);
170
172 sc = TestSignChanges(t,lastsubthresholdsample, WindowSizeInt);
173
175 dc = TestDirectionChanges(t,lastsubthresholdsample,WindowSizeInt);
176
177
178 //Now, go through the sound again, sample by sample.
179 wxASSERT(WindowSizeInt < SignalWindowSizeInt);
180 size_t i;
181 for(i = 0; i + WindowSizeInt < SignalWindowSizeInt; i++) {
182
183 int tests = 0;
184 int testThreshold = 0;
185 //Update the test statistics
186 if(mUseEnergy)
187 {
188 TestEnergyUpdate(erg, WindowSizeInt,buffer[i],buffer[i+WindowSizeInt+1]);
189 tests += (int)(erg>mThresholdEnergy);
190 testThreshold++;
191 }
193 {
194 TestSignChangesUpdate(sc,WindowSizeInt,buffer[i],buffer[i+1],buffer[i+WindowSizeInt],buffer[i+WindowSizeInt+1]);
195 tests += (int)(sc < mThresholdSignChangesLower);
196 testThreshold++;
197 }
198
200 {
201 TestSignChangesUpdate(sc,WindowSizeInt,buffer[i],buffer[i+1],buffer[i+WindowSizeInt],buffer[i+WindowSizeInt+1]);
202 tests += (int)(sc > mThresholdSignChangesUpper);
203 testThreshold++;
204 }
205
207 {
208 TestDirectionChangesUpdate(dc,WindowSizeInt,atrend,buffer[i],buffer[i+1],ztrend,buffer[i+WindowSizeInt],buffer[i+WindowSizeInt+1]);
209 tests += (int)(dc < mThresholdDirectionChangesLower);
210 testThreshold++;
211 }
212
214 {
215 TestDirectionChangesUpdate(dc,WindowSizeInt,atrend,buffer[i],buffer[i+1],ztrend,buffer[i+WindowSizeInt],buffer[i+WindowSizeInt+1]);
216 tests += (int)(dc > mThresholdDirectionChangesUpper);
217 testThreshold++;
218 }
219
220
221
222 if(tests >= testThreshold)
223 { //Finish off on the first hit
224 break;
225 }
226 }
227
228 //When we get here, i+lastsubthresholdsample is the best guess for where the word starts
229 return i + lastsubthresholdsample;
230 }
231 else {
232 //If we failed to find anything, return the start position
233 return start ;
234 }
235 }
236}
237
238//Move backward from end to find an ON region.
240 const WaveTrack & t, sampleCount end, sampleCount len)
241{
242
243
244 if((mWindowSize) >= (len + 10).as_double() ){
245
246 AudacityMessageBox( XO("Selection is too small to use voice key.") );
247 return end;
248 }
249 else {
250
251 //Change the millisecond-based parameters into sample-based parameters
252 double rate = t.GetRate(); //Translates seconds to samples
253 size_t WindowSizeInt = (rate * mWindowSize); //Size of window to examine
254 //unsigned int SilentWindowSizeInt = (unsigned int)(rate * mSilentWindowSize); //This much signal is necessary to trip key
255
256 auto samplesleft = len - WindowSizeInt; //Indexes the number of samples remaining in the selection
257 auto lastsubthresholdsample = end; //start this off at the end
258 // keeps track of the sample number of the last sample to not exceed the threshold
259
260 int blockruns=0; //keeps track of the number of consecutive above-threshold blocks
261
262
263 //This loop goes through the selection a block at a time in reverse order. If a long enough run
264 //of above-threshold blocks occur, we return to the last sub-threshold block and
265 //go through one sample at a time.
266 //If there are fewer than 10 samples leftover, don't bother.
267 for(auto i = end - WindowSizeInt; samplesleft >= 10;
268 i -= (WindowSizeInt - 1) , samplesleft -= (WindowSizeInt - 1)) {
269
270 //Set blocksize so that it is the right size
271
272 const auto blocksize = limitSampleBufferSize( WindowSizeInt, samplesleft);
273
274
275 //Test whether we are above threshold
276 if(AboveThreshold(t,i,blocksize))
277 {
278 blockruns++; //Hit
279 }
280 else
281 {
282 blockruns=0; //Miss--start over
283 lastsubthresholdsample = i+WindowSizeInt;
284 }
285
286 //If the blockrun is long enough, break out of the loop early:
287 if(blockruns > mSilentWindowSize/mWindowSize)
288 break;
289
290 }
291
292 //Now, if we broke out early (samplesleft > 10), go back to the lastsubthresholdsample and look more carefully
293 if(samplesleft > 10) {
294
295 //Calculate how many to scan through--we only have to go through (at most)
296 //the first window + 1 samples--but we need another window samples to draw from.
297 size_t remaining = 2*WindowSizeInt+1;
298
299 //To speed things up, create a local buffer to store things in, to avoid the costly t.Get();
300 //Only go through the first mSilentWindowSizeInt samples, and choose the first that trips the key.
301 Floats buffer{ remaining };
302 t.GetFloats(buffer.get(),
303 lastsubthresholdsample - remaining, remaining);
304
305 //Initialize these trend markers atrend and ztrend. They keep track of the
306 //up/down trends at the start and end of the evaluation window.
307 int atrend = sgn(buffer[remaining - 2]-buffer[remaining - 1]);
308
309 int ztrend = sgn(buffer[remaining - WindowSizeInt - 2] -
310 buffer[remaining - WindowSizeInt
311 // PVS-Studio detected a probable error here
312 // when it read - 2.
313 // is - 1 correct?
314 // This code is unused. I didn't study further.
315 - 1
316 ]);
317
318 double erg=0;
319 double sc = 0;
320 double dc = 0;
321
322 //Get initial test statistic values.
323 if(mUseEnergy)
324 erg = TestEnergy(t, lastsubthresholdsample, WindowSizeInt);
326 sc = TestSignChanges(t,lastsubthresholdsample, WindowSizeInt);
328 dc = TestDirectionChanges(t,lastsubthresholdsample,WindowSizeInt);
329
330 //Now, go through the sound again, sample by sample.
331 size_t i;
332 for(i = remaining - 1; i > WindowSizeInt; i--) {
333 int tests = 0;
334 int testThreshold = 0;
335 //Update the test statistics
336 if(mUseEnergy)
337 {
338 TestEnergyUpdate(erg, WindowSizeInt,buffer[i],buffer[i+WindowSizeInt+1]);
339 tests += (int)(erg>mThresholdEnergy);
340 testThreshold++;
341 }
343 {
344 TestSignChangesUpdate(sc,WindowSizeInt,buffer[i],buffer[i+1],buffer[i+WindowSizeInt],buffer[i+WindowSizeInt+1]);
345 tests += (int)(sc < mThresholdSignChangesLower);
346 testThreshold++;
347 }
349 {
350 TestSignChangesUpdate(sc,WindowSizeInt,buffer[i],buffer[i+1],buffer[i+WindowSizeInt],buffer[i+WindowSizeInt+1]);
351 tests += (int)(sc > mThresholdSignChangesUpper);
352 testThreshold++;
353 }
355 {
356 TestDirectionChangesUpdate(dc,WindowSizeInt,atrend,buffer[i],buffer[i+1],ztrend,buffer[i+WindowSizeInt],buffer[i+WindowSizeInt+1]);
357 tests += (int)(dc < mThresholdDirectionChangesLower);
358 testThreshold++;
359 }
361 {
362 TestDirectionChangesUpdate(dc,WindowSizeInt,atrend,buffer[i],buffer[i+1],ztrend,buffer[i+WindowSizeInt],buffer[i+WindowSizeInt+1]);
363 tests += (int)(dc > mThresholdDirectionChangesUpper);
364 testThreshold++;
365 }
366
367 if(tests >= testThreshold)
368 { //Finish off on the first hit
369 break;
370 }
371 }
372
373 //When we get here, i+lastsubthresholdsample is the best guess for where the word starts
374 return lastsubthresholdsample - remaining + i;
375 }
376 else {
377 //If we failed to find anything, return the start position
378 return end ;
379 }
380 }
381}
382
383
384//Move forward from the start to find an OFF region.
386 const WaveTrack & t, sampleCount start, sampleCount len)
387{
388
389 if((mWindowSize) >= (len + 10).as_double() ){
390 AudacityMessageBox( XO("Selection is too small to use voice key.") );
391
392 return start;
393 }
394 else {
395
396
397 //Change the millisecond-based parameters into sample-based parameters
398 double rate = t.GetRate(); //Translates seconds to samples
399 unsigned int WindowSizeInt = (unsigned int)(rate * mWindowSize); //Size of window to examine
400 unsigned int SilentWindowSizeInt = (unsigned int)(rate * mSilentWindowSize); //This much signal is necessary to trip key
401
402 sampleCount samplesleft ( len.as_double() - WindowSizeInt ); //Indexes the number of samples remaining in the selection
403 auto lastsubthresholdsample = start; //start this off at the selection start
404 // keeps track of the sample number of the last sample to not exceed the threshold
405
406 int blockruns=0; //keeps track of the number of consecutive above-threshold blocks
407
408 //This loop goes through the selection a block at a time. If a long enough run
409 //of above-threshold blocks occur, we return to the last sub-threshold block and
410 //go through one sample at a time.
411 //If there are fewer than 10 samples leftover, don't bother.
412 for(auto i = start; samplesleft >= 10;
413 i += (WindowSizeInt - 1) , samplesleft -= (WindowSizeInt - 1)) {
414
415 //Set blocksize so that it is the right size
416 const auto blocksize = limitSampleBufferSize( WindowSizeInt, samplesleft);
417
418 if(!AboveThreshold(t,i,blocksize))
419 {
420 blockruns++; //Hit
421 }
422 else
423 {
424 blockruns=0; //Above threshold--start over
425 lastsubthresholdsample = i;
426 }
427
428 //If the blockrun is long enough, break out of the loop early:
429 if(blockruns > mSilentWindowSize/mWindowSize)
430 break;
431
432 }
433
434 //Now, if we broke out early (samplesleft > 10), go back to the lastsubthresholdsample and look more carefully
435 if(samplesleft > 10) {
436
437
438 //Calculate how many to scan through--we only have to go through (at most)
439 //the first window + 1 samples--but we need another window samples to draw from.
440 size_t remaining = 2*WindowSizeInt+1;
441
442 //To speed things up, create a local buffer to store things in, to avoid the costly t.Get();
443 //Only go through the first SilentWindowSizeInt samples, and choose the first that trips the key.
444 Floats buffer{ remaining };
445 t.GetFloats(buffer.get(),
446 lastsubthresholdsample, remaining);
447
448 //Initialize these trend markers atrend and ztrend. They keep track of the
449 //up/down trends at the start and end of the evaluation window.
450 int atrend = sgn(buffer[1]-buffer[0]);
451 int ztrend = sgn(buffer[WindowSizeInt+1]-buffer[WindowSizeInt]);
452
453
454 double erg=0;
455 double sc=0;
456 double dc=0;
457
458 //Get initial test statistic values.
459 if(mUseEnergy)
460 erg = TestEnergy(t, lastsubthresholdsample, WindowSizeInt);
462 sc = TestSignChanges(t,lastsubthresholdsample, WindowSizeInt);
464 dc = TestDirectionChanges(t,lastsubthresholdsample,WindowSizeInt);
465
466 //Now, go through the sound again, sample by sample.
467 size_t i;
468 for(i = 0; i < SilentWindowSizeInt - WindowSizeInt; i++) {
469 int tests = 0;
470 int testThreshold = 0;
471 //Update the test statistics
472 if(mUseEnergy)
473 {
474 TestEnergyUpdate(erg, WindowSizeInt,buffer[i],buffer[i+WindowSizeInt+1]);
475 tests += (int)(erg>mThresholdEnergy);
476 testThreshold++;
477 }
479 {
480 TestSignChangesUpdate(sc,WindowSizeInt,buffer[i],buffer[i+1],buffer[i+WindowSizeInt],buffer[i+WindowSizeInt+1]);
481 tests += (int)(sc < mThresholdSignChangesLower);
482 testThreshold++;
483 }
485 {
486 TestSignChangesUpdate(sc,WindowSizeInt,buffer[i],buffer[i+1],buffer[i+WindowSizeInt],buffer[i+WindowSizeInt+1]);
487 tests += (int)(sc > mThresholdSignChangesUpper);
488 testThreshold++;
489 }
491 {
492 TestDirectionChangesUpdate(dc,WindowSizeInt,atrend,buffer[i],buffer[i+1],ztrend,buffer[i+WindowSizeInt],buffer[i+WindowSizeInt+1]);
493 tests += (int)(dc < mThresholdDirectionChangesLower);
494 testThreshold++;
495 }
497 {
498 TestDirectionChangesUpdate(dc,WindowSizeInt,atrend,buffer[i],buffer[i+1],ztrend,buffer[i+WindowSizeInt],buffer[i+WindowSizeInt+1]);
499 tests += (int)(dc > mThresholdDirectionChangesUpper);
500 testThreshold++;
501 }
502
503 if(tests < testThreshold)
504 { //Finish off on the first below-threshold block
505 break;
506 }
507 }
508
509 //When we get here, i+lastsubthresholdsample is the best guess for where the word starts
510 return i + lastsubthresholdsample;
511 }
512 else {
513 //If we failed to find anything, return the start position
514 return start ;
515 }
516 }
517}
518
519
520//Move backward from the end to find an OFF region
522 const WaveTrack & t, sampleCount end, sampleCount len)
523{
524
525
526 if((mWindowSize) >= (len + 10).as_double() ){
527
528 AudacityMessageBox( XO("Selection is too small to use voice key.") );
529 return end;
530 }
531 else {
532
533 //Change the millisecond-based parameters into sample-based parameters
534 double rate = t.GetRate(); //Translates seconds to samples
535 unsigned int WindowSizeInt = (unsigned int)(rate * mWindowSize); //Size of window to examine
536 //unsigned int SilentWindowSizeInt = (unsigned int)(rate * mSilentWindowSize); //This much signal is necessary to trip key
537
538 auto samplesleft = len - WindowSizeInt; //Indexes the number of samples remaining in the selection
539 auto lastsubthresholdsample = end; //start this off at the end
540 // keeps track of the sample number of the last sample to not exceed the threshold
541
542 int blockruns=0; //keeps track of the number of consecutive above-threshold blocks
543
544 //This loop goes through the selection a block at a time in reverse order. If a long enough run
545 //of above-threshold blocks occur, we return to the last sub-threshold block and
546 //go through one sample at a time.
547 //If there are fewer than 10 samples leftover, don't bother.
548 for(auto i = end - WindowSizeInt; samplesleft >= 10;
549 i -= (WindowSizeInt - 1), samplesleft -= (WindowSizeInt -1 )) {
550
551 //Set blocksize so that it is the right size
552 const auto blocksize = limitSampleBufferSize( WindowSizeInt, samplesleft);
553
554 if(!AboveThreshold(t,i,blocksize))
555 {
556
557 blockruns++; //Hit
558 }
559 else
560 {
561 blockruns=0; //Miss--start over
562 lastsubthresholdsample = i+WindowSizeInt;
563
564 }
565
566 //If the blockrun is long enough, break out of the loop early:
567 if(blockruns > mSilentWindowSize/mWindowSize)
568 break;
569
570 }
571
572 //Now, if we broke out early (samplesleft > 10), go back to the lastsubthresholdsample and look more carefully
573 if(samplesleft > 10) {
574
575 //Calculate how many to scan through--we only have to go through (at most)
576 //the first window + 1 samples--but we need another window samples to draw from.
577 const size_t remaining = 2*WindowSizeInt+1;
578
579 //To speed things up, create a local buffer to store things in, to avoid the costly t.Get();
580 //Only go through the first SilentWindowSizeInt samples, and choose the first that trips the key.
581 Floats buffer{ remaining };
582 t.GetFloats(buffer.get(),
583 lastsubthresholdsample - remaining, remaining);
584
585 //Initialize these trend markers atrend and ztrend. They keep track of the
586 //up/down trends at the start and end of the remaining window.
587 int atrend = sgn(buffer[remaining - 2] - buffer[remaining - 1]);
588 int ztrend =
589 sgn(buffer[remaining - WindowSizeInt - 2] -
590 buffer[remaining - WindowSizeInt - 2]);
591
592 double erg=0;
593 double sc=0;
594 double dc=0;
595 //Get initial test statistic values.
596 if(mUseEnergy)
597 erg = TestEnergy(t, lastsubthresholdsample, WindowSizeInt);
599 sc = TestSignChanges(t,lastsubthresholdsample, WindowSizeInt);
601 dc = TestDirectionChanges(t,lastsubthresholdsample,WindowSizeInt);
602
603 //Now, go through the sound again, sample by sample.
604 size_t i;
605 for(i = remaining - 1; i > WindowSizeInt; i--) {
606
607 int tests = 0;
608 int testThreshold = 0;
609 //Update the test statistics
610 if(mUseEnergy)
611 {
612 TestEnergyUpdate(erg, WindowSizeInt,buffer[i],buffer[i+WindowSizeInt+1]);
613 tests += (int)(erg>mThresholdEnergy);
614 testThreshold++;
615 }
617 {
618 TestSignChangesUpdate(sc,WindowSizeInt,buffer[i],buffer[i+1],buffer[i+WindowSizeInt],buffer[i+WindowSizeInt+1]);
619 tests += (int)(sc < mThresholdSignChangesLower);
620 testThreshold++;
621 }
623 {
624 TestSignChangesUpdate(sc,WindowSizeInt,buffer[i],buffer[i+1],buffer[i+WindowSizeInt],buffer[i+WindowSizeInt+1]);
625 tests += (int)(sc > mThresholdSignChangesUpper);
626 testThreshold++;
627 }
629 {
630 TestDirectionChangesUpdate(dc,WindowSizeInt,atrend,buffer[i],buffer[i+1],ztrend,buffer[i+WindowSizeInt],buffer[i+WindowSizeInt+1]);
631 tests += (int)(dc < mThresholdDirectionChangesLower);
632 testThreshold++;
633 }
635 {
636 TestDirectionChangesUpdate(dc,WindowSizeInt,atrend,buffer[i],buffer[i+1],ztrend,buffer[i+WindowSizeInt],buffer[i+WindowSizeInt+1]);
637 tests += (int)(dc > mThresholdDirectionChangesUpper);
638 testThreshold++;
639 }
640
641
642
643 if(tests < testThreshold)
644 { //Finish off on the first hit
645 break;
646 }
647 }
648
649 //When we get here, i+lastsubthresholdsample is the best guess for where the word starts
650 return lastsubthresholdsample - remaining + i;
651 }
652 else {
653 //If we failed to find anything, return the start position
654 return end ;
655 }
656 }
657}
658
659//This tests whether a specified block region is above or below threshold.
661 const WaveTrack & t, sampleCount start, sampleCount len)
662{
663
664 double erg=0;
665 double sc=0;
666 double dc=0; //These store three statistics: energy, signchanges, and directionchanges
667 int tests =0; //Keeps track of how many statistics surpass the threshold.
668 int testThreshold=0; //Keeps track of the threshold.
669
670 //Calculate the test statistics
671 if(mUseEnergy)
672 {
673 testThreshold++;
674 erg = TestEnergy(t, start,len);
675 tests +=(int)(erg > mThresholdEnergy);
676#if 0
677 std::cout << "Energy: " << erg << " " <<mThresholdEnergy << std::endl;
678#endif
679 }
680
682 {
683 testThreshold++;
684 sc = TestSignChanges(t,start,len);
685 tests += (int)(sc < mThresholdSignChangesLower);
686#if 0
687 std::cout << "SignChanges: " << sc << " " <<mThresholdSignChangesLower<< " < " << mThresholdSignChangesUpper << std::endl;
688#endif
689
690 }
692 {
693 testThreshold++;
694 sc = TestSignChanges(t,start,len);
695 tests += (int)(sc > mThresholdSignChangesUpper);
696#if 0
697 std::cout << "SignChanges: " << sc << " " <<mThresholdSignChangesLower<< " < " << mThresholdSignChangesUpper << std::endl;
698#endif
699
700 }
701
702
704 {
705 testThreshold++;
706 dc = TestDirectionChanges(t,start,len);
707 tests += (int)(dc < mThresholdDirectionChangesLower);
708#if 0
709 std::cout << "DirectionChanges: " << dc << " " <<mThresholdDirectionChangesLower<< " < " << mThresholdDirectionChangesUpper << std::endl;
710#endif
711 }
713 {
714 testThreshold++;
715 dc = TestDirectionChanges(t,start,len);
716 tests += (int)(dc > mThresholdDirectionChangesUpper);
717#if 0
718 std::cout << "DirectionChanges: " << dc << " " <<mThresholdDirectionChangesLower<< " < " << mThresholdDirectionChangesUpper << std::endl;
719#endif
720 }
721
722 //Test whether we are above threshold (the number of stats)
723 return (tests >= testThreshold);
724
725}
726
727//This adjusts the threshold. Larger values of t expand the noise region,
728//making more things be classified as noise (and requiring a stronger signal).
730{
731
738};
739
740
741//This 'calibrates' the voicekey to noise
743{
744 //To calibrate the noise, we need to scan the sample block just like in the voicekey and
745 //calculate the mean and standard deviation of the test statistics.
746 //Then, we set the BaselineThreshold to be one
747
748 wxBusyCursor busy;
749
750 //initialize some sample statistics: sums of X and X^2
751
752 double sumerg, sumerg2;
753 double sumsc, sumsc2;
754 double sumdc, sumdc2;
755 double erg, sc, dc;
756 //Now, change the millisecond-based parameters into sample-based parameters
757 //(This depends on WaveTrack t)
758 double rate = t.GetRate();
759 unsigned int WindowSizeInt = (unsigned int)(rate * mWindowSize);
760 // unsigned int SignalWindowSizeInt = (unsigned int)(rate * mSignalWindowSize);
761
762
763 //Get the first test statistics
764
765 //Calibrate all of the statistic, because they might be
766 //changed later.
767
768 // if(mUseEnergy)
769 erg = TestEnergy(t, start, WindowSizeInt);
770
771 // if(mUseSignChanges)
772 sc = TestSignChanges(t,start, WindowSizeInt);
773
774 // if(mUseDirectionChanges)
775 dc = TestDirectionChanges(t,start,WindowSizeInt);
776
777 sumerg =0.0;
778 sumerg2 = 0.0;
779 sumsc =0.0;
780 sumsc2 = 0.0;
781 sumdc =0.0;
782 sumdc2 =0.0;
783
784
785 // int n = len - WindowSizeInt; //This is how many samples we have
786 auto samplesleft = len - WindowSizeInt;
787 int samples=0;
788
789 for(auto i = start; samplesleft >= 10;
790 i += (WindowSizeInt - 1), samplesleft -= (WindowSizeInt -1) ) {
791 //Take samples chunk-by-chunk.
792 //Normally, this should be in WindowSizeInt chunks, but at the end (if there are more than 10
793 //samples left) take a chunk that eats the rest of the samples.
794
795 samples++; //Increment the number of samples we have
796 const auto blocksize = limitSampleBufferSize( WindowSizeInt, samplesleft);
797
798 erg = TestEnergy(t, i, blocksize);
799 sumerg +=(double)erg;
800 sumerg2 += pow((double)erg,2);
801
802 sc = TestSignChanges(t,i, blocksize);
803 sumsc += (double)sc;
804 sumsc2 += pow((double)sc,2);
805
806
807 dc = TestDirectionChanges(t,i,blocksize);
808 sumdc += (double)dc;
809 sumdc2 += pow((double)dc,2);
810 }
811
812 mEnergyMean = sumerg / samples;
813 mEnergySD = sqrt(sumerg2/samples - mEnergyMean*mEnergyMean);
814
815 mSignChangesMean = sumsc / samples;
816 mSignChangesSD = sqrt(sumsc2 / samples - mSignChangesMean * mSignChangesMean);
817
818 mDirectionChangesMean = sumdc / samples;
820
821 auto text = XO("Calibration Results\n");
822 text +=
823 /* i18n-hint: %1.4f is replaced by a number. sd stands for 'Standard Deviations'*/
824 XO("Energy -- mean: %1.4f sd: (%1.4f)\n")
825 .Format( mEnergyMean, mEnergySD );
826 text +=
827 XO("Sign Changes -- mean: %1.4f sd: (%1.4f)\n")
829 text +=
830 XO("Direction Changes -- mean: %1.4f sd: (%1.4f)\n")
833 nullptr,
834 text,
835 XO("Calibration Complete"),
836 wxOK | wxICON_INFORMATION,
837 wxPoint(-1, -1)
838 }
839 .ShowModal();
840
842}
843
844
845void VoiceKey::SetKeyType(bool erg, bool scLow , bool scHigh,
846 bool dcLow, bool dcHigh)
847{
848 mUseEnergy = erg;
849 mUseSignChangesLow = scLow;
850 mUseSignChangesHigh = scHigh;
853}
854
855
856//This might continue over a number of blocks.
858 const WaveTrack & t, sampleCount start, sampleCount len)
859{
860
861 double sum = 1;
862 auto s = start; //Keep track of start
863 auto originalLen = len; //Keep track of the length of block to process (its not the length of t)
864 const auto blockSize = limitSampleBufferSize(
865 t.GetMaxBlockSize(), len); //Determine size of sampling buffer
866 Floats buffer{ blockSize }; //Get a sampling buffer
867
868 while(len > 0)
869 {
870 //Figure out how much to grab
871 auto block = limitSampleBufferSize ( t.GetBestBlockSize(s), len );
872
873 t.GetFloats(buffer.get(), s,block); //grab the block;
874
875 //Now, go through the block and calculate energy
876 for(decltype(block) i = 0; i< block; i++)
877 {
878 sum += buffer[i]*buffer[i];
879 }
880
881 len -= block;
882 s += block;
883 }
884
885 return sum / originalLen.as_double();
886}
887
888
889//This will update RMSE by adding one element and subtracting another
890void VoiceKey::TestEnergyUpdate (double & prevErg, int len, const float & drop, const float & add)
891{
892 //This is an updating formula for RMSE. It will only recalculate what's changed.
893 prevErg = prevErg + (double)(fabs(add) - fabs(drop))/len;
894
895}
896
897
899 const WaveTrack & t, sampleCount start, sampleCount len)
900{
901
902
903 auto s = start; //Keep track of start
904 auto originalLen = len; //Keep track of the length of block to process (its not the length of t)
905 const auto blockSize = limitSampleBufferSize(
906 t.GetMaxBlockSize(), len); //Determine size of sampling buffer
907 unsigned long signchanges = 1;
908 int currentsign=0;
909
910 Floats buffer{ blockSize }; //Get a sampling buffer
911
912 while(len > 0) {
913 //Figure out how much to grab
914 auto block = limitSampleBufferSize ( t.GetBestBlockSize(s), len );
915
916 t.GetFloats(buffer.get(), s, block); //grab the block;
917
918 if (len == originalLen)
919 {
920 //The first time through, set stuff up special.
921 currentsign = sgn(buffer[0]);
922 }
923
924 //Now, go through the block and calculate zero crossings
925
926 for(decltype(block) i = 0; i< block; i++)
927 {
928 if( sgn(buffer[i]) != currentsign)
929 {
930 currentsign = sgn(buffer[i]);
931 signchanges++;
932 }
933
934 }
935 len -= block;
936 s += block;
937 }
938 return (double)signchanges / originalLen.as_double();
939}
940
941void VoiceKey::TestSignChangesUpdate(double & currentsignchanges, int len,
942 const float & a1,
943 const float & a2,
944 const float & z1,
945 const float & z2)
946{
947
948 if(sgn(a1)!=sgn(a2)) currentsignchanges -= 1.0/len;
949 if(sgn(z1)!=sgn(z2)) currentsignchanges += 1.0/len;
950
951}
952
953
955 const WaveTrack & t, sampleCount start, sampleCount len)
956{
957
958
959 auto s = start; //Keep track of start
960 auto originalLen = len; //Keep track of the length of block to process (its not the length of t)
961 const auto blockSize = limitSampleBufferSize(
962 t.GetMaxBlockSize(), len); //Determine size of sampling buffer
963 unsigned long directionchanges = 1;
964 float lastval=float(0);
965 int lastdirection=1;
966
967 Floats buffer{ blockSize }; //Get a sampling buffer
968
969 while(len > 0) {
970 //Figure out how much to grab
971 auto block = limitSampleBufferSize ( t.GetBestBlockSize(s), len );
972
973 t.GetFloats(buffer.get(), s, block); //grab the block;
974
975 if (len == originalLen) {
976 //The first time through, set stuff up special.
977 lastval = buffer[0];
978 }
979
980 //Now, go through the block and calculate zero crossings
981
982
983 for(decltype(block) i = 0; i< block; i++){
984
985 if( sgn(buffer[i]-lastval) != lastdirection) {
986 directionchanges++;
987 lastdirection = sgn(buffer[i] - lastval);
988 }
989 lastval = buffer[i];
990
991 }
992 len -= block;
993 s += block;
994 }
995 return (double)directionchanges/originalLen.as_double();
996}
997
998
999
1000
1001// This method does an updating by looking at the trends
1002// This will change currentdirections and atrend/trend, so be warned.
1003void VoiceKey::TestDirectionChangesUpdate(double & currentdirectionchanges, int len,
1004 int & atrend, const float & a1, const float & a2,
1005 int & ztrend, const float & z1, const float & z2)
1006{
1007
1008 if(sgn(a2 - a1)!= atrend ) {
1009 //Here, the direction shifted for the item we're dropping.
1010 currentdirectionchanges -= 1.0/len;
1011 atrend = sgn(a2-a1);
1012 }
1013 if(sgn(z2 - z1)!= ztrend){
1014 //Here, the direction shifts when we add an item
1015 currentdirectionchanges += 1.0/len;
1016 ztrend = sgn(z2-z1);
1017 }
1018
1019}
int AudacityMessageBox(const TranslatableString &message, const TranslatableString &caption, long style, wxWindow *parent, int x, int y)
#define XO(s)
Definition: Internat.h:31
size_t limitSampleBufferSize(size_t bufferSize, sampleCount limit)
Definition: SampleCount.cpp:23
int sgn(int number)
Definition: VoiceKey.h:98
Wrap wxMessageDialog so that caption IS translatable.
bool GetFloats(float *buffer, sampleCount start, size_t len, fillFormat fill=fillZero, bool mayThrow=true, sampleCount *pNumWithinClips=nullptr) const
Retrieve samples from a track in floating-point format, regardless of the storage format.
Definition: SampleTrack.h:65
double mThresholdAdjustment
Definition: VoiceKey.h:56
bool mUseSignChangesHigh
Definition: VoiceKey.h:74
void TestDirectionChangesUpdate(double &currentdirectionchanges, int length, int &atrend, const float &a1, const float &a2, int &ztrend, const float &z1, const float &z2)
Definition: VoiceKey.cpp:1003
sampleCount OnBackward(const WaveTrack &t, sampleCount start, sampleCount len)
Definition: VoiceKey.cpp:239
VoiceKey()
Definition: VoiceKey.cpp:41
double TestDirectionChanges(const WaveTrack &t, sampleCount start, sampleCount len)
Definition: VoiceKey.cpp:954
double mSignalWindowSize
Definition: VoiceKey.h:80
bool mUseSignChangesLow
Definition: VoiceKey.h:73
double mThresholdDirectionChangesUpper
Definition: VoiceKey.h:69
void AdjustThreshold(double t)
Definition: VoiceKey.cpp:729
double mThresholdDirectionChangesLower
Definition: VoiceKey.h:68
double TestEnergy(const WaveTrack &t, sampleCount start, sampleCount len)
Definition: VoiceKey.cpp:857
bool mUseDirectionChangesLow
Definition: VoiceKey.h:75
double mEnergySD
Definition: VoiceKey.h:59
double mSignChangesSD
Definition: VoiceKey.h:61
double mSignChangesMean
Definition: VoiceKey.h:60
sampleCount OffForward(const WaveTrack &t, sampleCount start, sampleCount len)
Definition: VoiceKey.cpp:385
void TestEnergyUpdate(double &prevErg, int length, const float &drop, const float &add)
Definition: VoiceKey.cpp:890
bool AboveThreshold(const WaveTrack &t, sampleCount start, sampleCount len)
Definition: VoiceKey.cpp:660
double mThresholdSignChangesLower
Definition: VoiceKey.h:66
double mWindowSize
Definition: VoiceKey.h:54
bool mUseDirectionChangesHigh
Definition: VoiceKey.h:76
double mThresholdEnergy
Definition: VoiceKey.h:65
double TestSignChanges(const WaveTrack &t, sampleCount start, sampleCount len)
Definition: VoiceKey.cpp:898
double mEnergyMean
Definition: VoiceKey.h:58
void CalibrateNoise(const WaveTrack &t, sampleCount start, sampleCount len)
Definition: VoiceKey.cpp:742
~VoiceKey()
Definition: VoiceKey.cpp:69
bool mUseEnergy
Definition: VoiceKey.h:72
void SetKeyType(bool erg, bool scLow, bool scHigh, bool dcLow, bool dcHigh)
Definition: VoiceKey.cpp:845
double mSilentWindowSize
Definition: VoiceKey.h:79
void TestSignChangesUpdate(double &currentsignchanges, int length, const float &a1, const float &a2, const float &z1, const float &z2)
Definition: VoiceKey.cpp:941
sampleCount OffBackward(const WaveTrack &t, sampleCount start, sampleCount len)
Definition: VoiceKey.cpp:521
double mThresholdSignChangesUpper
Definition: VoiceKey.h:67
sampleCount OnForward(const WaveTrack &t, sampleCount start, sampleCount len)
Definition: VoiceKey.cpp:87
double mDirectionChangesSD
Definition: VoiceKey.h:63
double mDirectionChangesMean
Definition: VoiceKey.h:62
A Track that contains audio waveform data.
Definition: WaveTrack.h:57
size_t GetMaxBlockSize() const override
This returns a nonnegative number of samples meant to size a memory buffer.
Definition: WaveTrack.cpp:1806
size_t GetBestBlockSize(sampleCount t) const override
This returns a nonnegative number of samples meant to size a memory buffer.
Definition: WaveTrack.cpp:1788
double GetRate() const override
Definition: WaveTrack.cpp:481
Positions or offsets within audio files need a wide type.
Definition: SampleCount.h:18
double as_double() const
Definition: SampleCount.h:45
auto end(const Ptr< Type, BaseDeleter > &p)
Enables range-for, if Traits<Type>::iterated_type is defined.
Definition: PackedArray.h:126