Audacity 3.2.0
VoiceKey.cpp
Go to the documentation of this file.
1/**********************************************************************
2
3 Audacity: A Digital Audio Editor
4
5 VoiceKey.cpp
6
7 ?? Dominic Mazzoni
8 ?? Shane Muller
9
10*******************************************************************//*******************************************************************/
19
20
21
22#include "VoiceKey.h"
23
24#include <math.h>
25
26#include <wx/textfile.h>
27#include <iostream>
28
29#include "WaveTrack.h"
30#include "AudacityMessageBox.h"
31#include "wxPanelWrapper.h"
32
33using std::cout;
34using std::endl;
35
36
37
39{
40
41 mWindowSize = 0.01; //size of analysis window in seconds
42
43 mEnergyMean = .0006; // reasonable initial levels assuming sampling rate of
44 mEnergySD = .0002; // 44100 hertz
45 mSignChangesMean = .08;
46 mSignChangesSD= .02;
49
51
52 mSilentWindowSize = .05; //Amount of time (in seconds) below threshold to call it silence
53 mSignalWindowSize = .05; //Amount of time (in seconds) above threshold to call it signal
54
55
56 mUseEnergy = true;
57 mUseSignChangesLow = false;
58 mUseSignChangesHigh = false;
61
62
63};
64
65
67{
68};
69
70
71
72//---------------------------------------------------------------------------
73// VoiceKey::On/Off Forward/Backward
74// This operates in two phases:
75// First, you take chunks of samples that are WindowSize big.
76// If you have a run of them where something passes the threshold for SignalWindowSize seconds,
77// you return to the last empty block and scan forward one sample at a time until you find the
78// starting point of the speech.
79
80
81
82
83//Move forward to find an ON region.
85 const WaveChannel & t, sampleCount start, sampleCount len)
86{
87
88 if((mWindowSize) >= (len + 10).as_double() ){
89
90 /* i18n-hint: Voice key is an experimental/incomplete feature that
91 is used to navigate in vocal recordings, to move forwards and
92 backwards by words. So 'key' is being used in the sense of an index.
93 This error message means that you've selected too short
94 a region of audio to be able to use this feature.*/
95 AudacityMessageBox( XO("Selection is too small to use voice key.") );
96 return start;
97 }
98 else {
99
100 //Change the millisecond-based parameters into sample-based parameters
101 double rate = t.GetRate(); //Translates seconds to samples
102 size_t WindowSizeInt = (rate * mWindowSize); //Size of window to examine
103 size_t SignalWindowSizeInt = (rate * mSignalWindowSize); //This much signal is necessary to trip key
104
105 auto samplesleft = len - WindowSizeInt; //Indexes the number of samples remaining in the selection
106 auto lastsubthresholdsample = start; //start this off at the selection start
107 // keeps track of the sample number of the last sample to not exceed the threshold
108
109 int blockruns=0; //keeps track of the number of consecutive above-threshold blocks
110
111
112 //This loop goes through the selection a block at a time. If a long enough run
113 //of above-threshold blocks occur, we return to the last sub-threshold block and
114 //go through one sample at a time.
115 //If there are fewer than 10 samples leftover, don't bother.
116
117 for(auto i = start; samplesleft >= 10;
118 i += (WindowSizeInt - 1) , samplesleft -= (WindowSizeInt - 1)) {
119
120 //Set blocksize so that it is the right size
121 const auto blocksize = limitSampleBufferSize( WindowSizeInt, samplesleft);
122
123 //Test whether we are above threshold (the number of stats)
124 if(AboveThreshold(t,i,blocksize))
125 {
126 blockruns++; //Hit
127 } else {
128 blockruns=0; //Miss--start over
129 lastsubthresholdsample = i;
130 }
131
132 //If the blockrun is long enough, break out of the loop early:
133 if(blockruns > mSignalWindowSize/mWindowSize)
134 break;
135
136 }
137
138 //Now, if we broke out early (samplesleft > 10), go back to the lastsubthresholdsample and look more carefully
139 if(samplesleft > 10) {
140
141
142 //Calculate how many to scan through--we only have to go through (at most)
143 //the first window + 1 samples--but we need another window samples to draw from.
144 size_t remaining = 2*WindowSizeInt+1;
145
146 //To speed things up, create a local buffer to store things in, to avoid the costly t.Get();
147 //Only go through the first SignalWindowSizeInt samples, and choose the first that trips the key.
148 Floats buffer{ remaining };
149 t.GetFloats(buffer.get(),
150 lastsubthresholdsample, remaining);
151
152
153
154 //Initialize these trend markers atrend and ztrend. They keep track of the
155 //up/down trends at the start and end of the evaluation window.
156 int atrend = sgn(buffer[1]-buffer[0]);
157 int ztrend = sgn(buffer[WindowSizeInt+1]-buffer[WindowSizeInt]);
158
159
160 double erg=0;
161 double sc=0;
162 double dc=0;
163
164 //Get initial test statistic values.
165 if(mUseEnergy)
166 erg = TestEnergy(t, lastsubthresholdsample, WindowSizeInt);
167
169 sc = TestSignChanges(t,lastsubthresholdsample, WindowSizeInt);
170
172 dc = TestDirectionChanges(t,lastsubthresholdsample,WindowSizeInt);
173
174
175 //Now, go through the sound again, sample by sample.
176 wxASSERT(WindowSizeInt < SignalWindowSizeInt);
177 size_t i;
178 for(i = 0; i + WindowSizeInt < SignalWindowSizeInt; i++) {
179
180 int tests = 0;
181 int testThreshold = 0;
182 //Update the test statistics
183 if(mUseEnergy)
184 {
185 TestEnergyUpdate(erg, WindowSizeInt,buffer[i],buffer[i+WindowSizeInt+1]);
186 tests += (int)(erg>mThresholdEnergy);
187 testThreshold++;
188 }
190 {
191 TestSignChangesUpdate(sc,WindowSizeInt,buffer[i],buffer[i+1],buffer[i+WindowSizeInt],buffer[i+WindowSizeInt+1]);
192 tests += (int)(sc < mThresholdSignChangesLower);
193 testThreshold++;
194 }
195
197 {
198 TestSignChangesUpdate(sc,WindowSizeInt,buffer[i],buffer[i+1],buffer[i+WindowSizeInt],buffer[i+WindowSizeInt+1]);
199 tests += (int)(sc > mThresholdSignChangesUpper);
200 testThreshold++;
201 }
202
204 {
205 TestDirectionChangesUpdate(dc,WindowSizeInt,atrend,buffer[i],buffer[i+1],ztrend,buffer[i+WindowSizeInt],buffer[i+WindowSizeInt+1]);
206 tests += (int)(dc < mThresholdDirectionChangesLower);
207 testThreshold++;
208 }
209
211 {
212 TestDirectionChangesUpdate(dc,WindowSizeInt,atrend,buffer[i],buffer[i+1],ztrend,buffer[i+WindowSizeInt],buffer[i+WindowSizeInt+1]);
213 tests += (int)(dc > mThresholdDirectionChangesUpper);
214 testThreshold++;
215 }
216
217
218
219 if(tests >= testThreshold)
220 { //Finish off on the first hit
221 break;
222 }
223 }
224
225 //When we get here, i+lastsubthresholdsample is the best guess for where the word starts
226 return i + lastsubthresholdsample;
227 }
228 else {
229 //If we failed to find anything, return the start position
230 return start ;
231 }
232 }
233}
234
235//Move backward from end to find an ON region.
237 const WaveChannel & t, sampleCount end, sampleCount len)
238{
239
240
241 if((mWindowSize) >= (len + 10).as_double() ){
242
243 AudacityMessageBox( XO("Selection is too small to use voice key.") );
244 return end;
245 }
246 else {
247
248 //Change the millisecond-based parameters into sample-based parameters
249 double rate = t.GetRate(); //Translates seconds to samples
250 size_t WindowSizeInt = (rate * mWindowSize); //Size of window to examine
251 //unsigned int SilentWindowSizeInt = (unsigned int)(rate * mSilentWindowSize); //This much signal is necessary to trip key
252
253 auto samplesleft = len - WindowSizeInt; //Indexes the number of samples remaining in the selection
254 auto lastsubthresholdsample = end; //start this off at the end
255 // keeps track of the sample number of the last sample to not exceed the threshold
256
257 int blockruns=0; //keeps track of the number of consecutive above-threshold blocks
258
259
260 //This loop goes through the selection a block at a time in reverse order. If a long enough run
261 //of above-threshold blocks occur, we return to the last sub-threshold block and
262 //go through one sample at a time.
263 //If there are fewer than 10 samples leftover, don't bother.
264 for(auto i = end - WindowSizeInt; samplesleft >= 10;
265 i -= (WindowSizeInt - 1) , samplesleft -= (WindowSizeInt - 1)) {
266
267 //Set blocksize so that it is the right size
268
269 const auto blocksize = limitSampleBufferSize( WindowSizeInt, samplesleft);
270
271
272 //Test whether we are above threshold
273 if(AboveThreshold(t,i,blocksize))
274 {
275 blockruns++; //Hit
276 }
277 else
278 {
279 blockruns=0; //Miss--start over
280 lastsubthresholdsample = i+WindowSizeInt;
281 }
282
283 //If the blockrun is long enough, break out of the loop early:
284 if(blockruns > mSilentWindowSize/mWindowSize)
285 break;
286
287 }
288
289 //Now, if we broke out early (samplesleft > 10), go back to the lastsubthresholdsample and look more carefully
290 if(samplesleft > 10) {
291
292 //Calculate how many to scan through--we only have to go through (at most)
293 //the first window + 1 samples--but we need another window samples to draw from.
294 size_t remaining = 2*WindowSizeInt+1;
295
296 //To speed things up, create a local buffer to store things in, to avoid the costly t.Get();
297 //Only go through the first mSilentWindowSizeInt samples, and choose the first that trips the key.
298 Floats buffer{ remaining };
299 t.GetFloats(buffer.get(),
300 lastsubthresholdsample - remaining, remaining);
301
302 //Initialize these trend markers atrend and ztrend. They keep track of the
303 //up/down trends at the start and end of the evaluation window.
304 int atrend = sgn(buffer[remaining - 2]-buffer[remaining - 1]);
305
306 int ztrend = sgn(buffer[remaining - WindowSizeInt - 2] -
307 buffer[remaining - WindowSizeInt
308 // PVS-Studio detected a probable error here
309 // when it read - 2.
310 // is - 1 correct?
311 // This code is unused. I didn't study further.
312 - 1
313 ]);
314
315 double erg=0;
316 double sc = 0;
317 double dc = 0;
318
319 //Get initial test statistic values.
320 if(mUseEnergy)
321 erg = TestEnergy(t, lastsubthresholdsample, WindowSizeInt);
323 sc = TestSignChanges(t,lastsubthresholdsample, WindowSizeInt);
325 dc = TestDirectionChanges(t,lastsubthresholdsample,WindowSizeInt);
326
327 //Now, go through the sound again, sample by sample.
328 size_t i;
329 for(i = remaining - 1; i > WindowSizeInt; i--) {
330 int tests = 0;
331 int testThreshold = 0;
332 //Update the test statistics
333 if(mUseEnergy)
334 {
335 TestEnergyUpdate(erg, WindowSizeInt,buffer[i],buffer[i+WindowSizeInt+1]);
336 tests += (int)(erg>mThresholdEnergy);
337 testThreshold++;
338 }
340 {
341 TestSignChangesUpdate(sc,WindowSizeInt,buffer[i],buffer[i+1],buffer[i+WindowSizeInt],buffer[i+WindowSizeInt+1]);
342 tests += (int)(sc < mThresholdSignChangesLower);
343 testThreshold++;
344 }
346 {
347 TestSignChangesUpdate(sc,WindowSizeInt,buffer[i],buffer[i+1],buffer[i+WindowSizeInt],buffer[i+WindowSizeInt+1]);
348 tests += (int)(sc > mThresholdSignChangesUpper);
349 testThreshold++;
350 }
352 {
353 TestDirectionChangesUpdate(dc,WindowSizeInt,atrend,buffer[i],buffer[i+1],ztrend,buffer[i+WindowSizeInt],buffer[i+WindowSizeInt+1]);
354 tests += (int)(dc < mThresholdDirectionChangesLower);
355 testThreshold++;
356 }
358 {
359 TestDirectionChangesUpdate(dc,WindowSizeInt,atrend,buffer[i],buffer[i+1],ztrend,buffer[i+WindowSizeInt],buffer[i+WindowSizeInt+1]);
360 tests += (int)(dc > mThresholdDirectionChangesUpper);
361 testThreshold++;
362 }
363
364 if(tests >= testThreshold)
365 { //Finish off on the first hit
366 break;
367 }
368 }
369
370 //When we get here, i+lastsubthresholdsample is the best guess for where the word starts
371 return lastsubthresholdsample - remaining + i;
372 }
373 else {
374 //If we failed to find anything, return the start position
375 return end ;
376 }
377 }
378}
379
380
381//Move forward from the start to find an OFF region.
383 const WaveChannel & t, sampleCount start, sampleCount len)
384{
385
386 if((mWindowSize) >= (len + 10).as_double() ){
387 AudacityMessageBox( XO("Selection is too small to use voice key.") );
388
389 return start;
390 }
391 else {
392
393
394 //Change the millisecond-based parameters into sample-based parameters
395 double rate = t.GetRate(); //Translates seconds to samples
396 unsigned int WindowSizeInt = (unsigned int)(rate * mWindowSize); //Size of window to examine
397 unsigned int SilentWindowSizeInt = (unsigned int)(rate * mSilentWindowSize); //This much signal is necessary to trip key
398
399 sampleCount samplesleft ( len.as_double() - WindowSizeInt ); //Indexes the number of samples remaining in the selection
400 auto lastsubthresholdsample = start; //start this off at the selection start
401 // keeps track of the sample number of the last sample to not exceed the threshold
402
403 int blockruns=0; //keeps track of the number of consecutive above-threshold blocks
404
405 //This loop goes through the selection a block at a time. If a long enough run
406 //of above-threshold blocks occur, we return to the last sub-threshold block and
407 //go through one sample at a time.
408 //If there are fewer than 10 samples leftover, don't bother.
409 for(auto i = start; samplesleft >= 10;
410 i += (WindowSizeInt - 1) , samplesleft -= (WindowSizeInt - 1)) {
411
412 //Set blocksize so that it is the right size
413 const auto blocksize = limitSampleBufferSize( WindowSizeInt, samplesleft);
414
415 if(!AboveThreshold(t,i,blocksize))
416 {
417 blockruns++; //Hit
418 }
419 else
420 {
421 blockruns=0; //Above threshold--start over
422 lastsubthresholdsample = i;
423 }
424
425 //If the blockrun is long enough, break out of the loop early:
426 if(blockruns > mSilentWindowSize/mWindowSize)
427 break;
428
429 }
430
431 //Now, if we broke out early (samplesleft > 10), go back to the lastsubthresholdsample and look more carefully
432 if(samplesleft > 10) {
433
434
435 //Calculate how many to scan through--we only have to go through (at most)
436 //the first window + 1 samples--but we need another window samples to draw from.
437 size_t remaining = 2*WindowSizeInt+1;
438
439 //To speed things up, create a local buffer to store things in, to avoid the costly t.Get();
440 //Only go through the first SilentWindowSizeInt samples, and choose the first that trips the key.
441 Floats buffer{ remaining };
442 t.GetFloats(buffer.get(),
443 lastsubthresholdsample, remaining);
444
445 //Initialize these trend markers atrend and ztrend. They keep track of the
446 //up/down trends at the start and end of the evaluation window.
447 int atrend = sgn(buffer[1]-buffer[0]);
448 int ztrend = sgn(buffer[WindowSizeInt+1]-buffer[WindowSizeInt]);
449
450
451 double erg=0;
452 double sc=0;
453 double dc=0;
454
455 //Get initial test statistic values.
456 if(mUseEnergy)
457 erg = TestEnergy(t, lastsubthresholdsample, WindowSizeInt);
459 sc = TestSignChanges(t,lastsubthresholdsample, WindowSizeInt);
461 dc = TestDirectionChanges(t,lastsubthresholdsample,WindowSizeInt);
462
463 //Now, go through the sound again, sample by sample.
464 size_t i;
465 for(i = 0; i < SilentWindowSizeInt - WindowSizeInt; i++) {
466 int tests = 0;
467 int testThreshold = 0;
468 //Update the test statistics
469 if(mUseEnergy)
470 {
471 TestEnergyUpdate(erg, WindowSizeInt,buffer[i],buffer[i+WindowSizeInt+1]);
472 tests += (int)(erg>mThresholdEnergy);
473 testThreshold++;
474 }
476 {
477 TestSignChangesUpdate(sc,WindowSizeInt,buffer[i],buffer[i+1],buffer[i+WindowSizeInt],buffer[i+WindowSizeInt+1]);
478 tests += (int)(sc < mThresholdSignChangesLower);
479 testThreshold++;
480 }
482 {
483 TestSignChangesUpdate(sc,WindowSizeInt,buffer[i],buffer[i+1],buffer[i+WindowSizeInt],buffer[i+WindowSizeInt+1]);
484 tests += (int)(sc > mThresholdSignChangesUpper);
485 testThreshold++;
486 }
488 {
489 TestDirectionChangesUpdate(dc,WindowSizeInt,atrend,buffer[i],buffer[i+1],ztrend,buffer[i+WindowSizeInt],buffer[i+WindowSizeInt+1]);
490 tests += (int)(dc < mThresholdDirectionChangesLower);
491 testThreshold++;
492 }
494 {
495 TestDirectionChangesUpdate(dc,WindowSizeInt,atrend,buffer[i],buffer[i+1],ztrend,buffer[i+WindowSizeInt],buffer[i+WindowSizeInt+1]);
496 tests += (int)(dc > mThresholdDirectionChangesUpper);
497 testThreshold++;
498 }
499
500 if(tests < testThreshold)
501 { //Finish off on the first below-threshold block
502 break;
503 }
504 }
505
506 //When we get here, i+lastsubthresholdsample is the best guess for where the word starts
507 return i + lastsubthresholdsample;
508 }
509 else {
510 //If we failed to find anything, return the start position
511 return start ;
512 }
513 }
514}
515
516
517//Move backward from the end to find an OFF region
519 const WaveChannel & t, sampleCount end, sampleCount len)
520{
521
522
523 if((mWindowSize) >= (len + 10).as_double() ){
524
525 AudacityMessageBox( XO("Selection is too small to use voice key.") );
526 return end;
527 }
528 else {
529
530 //Change the millisecond-based parameters into sample-based parameters
531 double rate = t.GetRate(); //Translates seconds to samples
532 unsigned int WindowSizeInt = (unsigned int)(rate * mWindowSize); //Size of window to examine
533 //unsigned int SilentWindowSizeInt = (unsigned int)(rate * mSilentWindowSize); //This much signal is necessary to trip key
534
535 auto samplesleft = len - WindowSizeInt; //Indexes the number of samples remaining in the selection
536 auto lastsubthresholdsample = end; //start this off at the end
537 // keeps track of the sample number of the last sample to not exceed the threshold
538
539 int blockruns=0; //keeps track of the number of consecutive above-threshold blocks
540
541 //This loop goes through the selection a block at a time in reverse order. If a long enough run
542 //of above-threshold blocks occur, we return to the last sub-threshold block and
543 //go through one sample at a time.
544 //If there are fewer than 10 samples leftover, don't bother.
545 for(auto i = end - WindowSizeInt; samplesleft >= 10;
546 i -= (WindowSizeInt - 1), samplesleft -= (WindowSizeInt -1 )) {
547
548 //Set blocksize so that it is the right size
549 const auto blocksize = limitSampleBufferSize( WindowSizeInt, samplesleft);
550
551 if(!AboveThreshold(t,i,blocksize))
552 {
553
554 blockruns++; //Hit
555 }
556 else
557 {
558 blockruns=0; //Miss--start over
559 lastsubthresholdsample = i+WindowSizeInt;
560
561 }
562
563 //If the blockrun is long enough, break out of the loop early:
564 if(blockruns > mSilentWindowSize/mWindowSize)
565 break;
566
567 }
568
569 //Now, if we broke out early (samplesleft > 10), go back to the lastsubthresholdsample and look more carefully
570 if(samplesleft > 10) {
571
572 //Calculate how many to scan through--we only have to go through (at most)
573 //the first window + 1 samples--but we need another window samples to draw from.
574 const size_t remaining = 2*WindowSizeInt+1;
575
576 //To speed things up, create a local buffer to store things in, to avoid the costly t.Get();
577 //Only go through the first SilentWindowSizeInt samples, and choose the first that trips the key.
578 Floats buffer{ remaining };
579 t.GetFloats(buffer.get(),
580 lastsubthresholdsample - remaining, remaining);
581
582 //Initialize these trend markers atrend and ztrend. They keep track of the
583 //up/down trends at the start and end of the remaining window.
584 int atrend = sgn(buffer[remaining - 2] - buffer[remaining - 1]);
585 int ztrend =
586 sgn(buffer[remaining - WindowSizeInt - 2] -
587 buffer[remaining - WindowSizeInt - 2]);
588
589 double erg=0;
590 double sc=0;
591 double dc=0;
592 //Get initial test statistic values.
593 if(mUseEnergy)
594 erg = TestEnergy(t, lastsubthresholdsample, WindowSizeInt);
596 sc = TestSignChanges(t,lastsubthresholdsample, WindowSizeInt);
598 dc = TestDirectionChanges(t,lastsubthresholdsample,WindowSizeInt);
599
600 //Now, go through the sound again, sample by sample.
601 size_t i;
602 for(i = remaining - 1; i > WindowSizeInt; i--) {
603
604 int tests = 0;
605 int testThreshold = 0;
606 //Update the test statistics
607 if(mUseEnergy)
608 {
609 TestEnergyUpdate(erg, WindowSizeInt,buffer[i],buffer[i+WindowSizeInt+1]);
610 tests += (int)(erg>mThresholdEnergy);
611 testThreshold++;
612 }
614 {
615 TestSignChangesUpdate(sc,WindowSizeInt,buffer[i],buffer[i+1],buffer[i+WindowSizeInt],buffer[i+WindowSizeInt+1]);
616 tests += (int)(sc < mThresholdSignChangesLower);
617 testThreshold++;
618 }
620 {
621 TestSignChangesUpdate(sc,WindowSizeInt,buffer[i],buffer[i+1],buffer[i+WindowSizeInt],buffer[i+WindowSizeInt+1]);
622 tests += (int)(sc > mThresholdSignChangesUpper);
623 testThreshold++;
624 }
626 {
627 TestDirectionChangesUpdate(dc,WindowSizeInt,atrend,buffer[i],buffer[i+1],ztrend,buffer[i+WindowSizeInt],buffer[i+WindowSizeInt+1]);
628 tests += (int)(dc < mThresholdDirectionChangesLower);
629 testThreshold++;
630 }
632 {
633 TestDirectionChangesUpdate(dc,WindowSizeInt,atrend,buffer[i],buffer[i+1],ztrend,buffer[i+WindowSizeInt],buffer[i+WindowSizeInt+1]);
634 tests += (int)(dc > mThresholdDirectionChangesUpper);
635 testThreshold++;
636 }
637
638
639
640 if(tests < testThreshold)
641 { //Finish off on the first hit
642 break;
643 }
644 }
645
646 //When we get here, i+lastsubthresholdsample is the best guess for where the word starts
647 return lastsubthresholdsample - remaining + i;
648 }
649 else {
650 //If we failed to find anything, return the start position
651 return end ;
652 }
653 }
654}
655
656//This tests whether a specified block region is above or below threshold.
658 const WaveChannel & t, sampleCount start, sampleCount len)
659{
660
661 double erg=0;
662 double sc=0;
663 double dc=0; //These store three statistics: energy, signchanges, and directionchanges
664 int tests =0; //Keeps track of how many statistics surpass the threshold.
665 int testThreshold=0; //Keeps track of the threshold.
666
667 //Calculate the test statistics
668 if(mUseEnergy)
669 {
670 testThreshold++;
671 erg = TestEnergy(t, start,len);
672 tests +=(int)(erg > mThresholdEnergy);
673#if 0
674 std::cout << "Energy: " << erg << " " <<mThresholdEnergy << std::endl;
675#endif
676 }
677
679 {
680 testThreshold++;
681 sc = TestSignChanges(t,start,len);
682 tests += (int)(sc < mThresholdSignChangesLower);
683#if 0
684 std::cout << "SignChanges: " << sc << " " <<mThresholdSignChangesLower<< " < " << mThresholdSignChangesUpper << std::endl;
685#endif
686
687 }
689 {
690 testThreshold++;
691 sc = TestSignChanges(t,start,len);
692 tests += (int)(sc > mThresholdSignChangesUpper);
693#if 0
694 std::cout << "SignChanges: " << sc << " " <<mThresholdSignChangesLower<< " < " << mThresholdSignChangesUpper << std::endl;
695#endif
696
697 }
698
699
701 {
702 testThreshold++;
703 dc = TestDirectionChanges(t,start,len);
704 tests += (int)(dc < mThresholdDirectionChangesLower);
705#if 0
706 std::cout << "DirectionChanges: " << dc << " " <<mThresholdDirectionChangesLower<< " < " << mThresholdDirectionChangesUpper << std::endl;
707#endif
708 }
710 {
711 testThreshold++;
712 dc = TestDirectionChanges(t,start,len);
713 tests += (int)(dc > mThresholdDirectionChangesUpper);
714#if 0
715 std::cout << "DirectionChanges: " << dc << " " <<mThresholdDirectionChangesLower<< " < " << mThresholdDirectionChangesUpper << std::endl;
716#endif
717 }
718
719 //Test whether we are above threshold (the number of stats)
720 return (tests >= testThreshold);
721
722}
723
724//This adjusts the threshold. Larger values of t expand the noise region,
725//making more things be classified as noise (and requiring a stronger signal).
727{
728
735};
736
737
738//This 'calibrates' the voicekey to noise
740{
741 //To calibrate the noise, we need to scan the sample block just like in the voicekey and
742 //calculate the mean and standard deviation of the test statistics.
743 //Then, we set the BaselineThreshold to be one
744
745 wxBusyCursor busy;
746
747 //initialize some sample statistics: sums of X and X^2
748
749 double sumerg, sumerg2;
750 double sumsc, sumsc2;
751 double sumdc, sumdc2;
752 double erg, sc, dc;
753 //Now, change the millisecond-based parameters into sample-based parameters
754 //(This depends on WaveTrack t)
755 double rate = t.GetRate();
756 unsigned int WindowSizeInt = (unsigned int)(rate * mWindowSize);
757 // unsigned int SignalWindowSizeInt = (unsigned int)(rate * mSignalWindowSize);
758
759
760 //Get the first test statistics
761
762 //Calibrate all of the statistic, because they might be
763 //changed later.
764
765 // if(mUseEnergy)
766 erg = TestEnergy(t, start, WindowSizeInt);
767
768 // if(mUseSignChanges)
769 sc = TestSignChanges(t,start, WindowSizeInt);
770
771 // if(mUseDirectionChanges)
772 dc = TestDirectionChanges(t,start,WindowSizeInt);
773
774 sumerg =0.0;
775 sumerg2 = 0.0;
776 sumsc =0.0;
777 sumsc2 = 0.0;
778 sumdc =0.0;
779 sumdc2 =0.0;
780
781
782 // int n = len - WindowSizeInt; //This is how many samples we have
783 auto samplesleft = len - WindowSizeInt;
784 int samples=0;
785
786 for(auto i = start; samplesleft >= 10;
787 i += (WindowSizeInt - 1), samplesleft -= (WindowSizeInt -1) ) {
788 //Take samples chunk-by-chunk.
789 //Normally, this should be in WindowSizeInt chunks, but at the end (if there are more than 10
790 //samples left) take a chunk that eats the rest of the samples.
791
792 samples++; //Increment the number of samples we have
793 const auto blocksize = limitSampleBufferSize( WindowSizeInt, samplesleft);
794
795 erg = TestEnergy(t, i, blocksize);
796 sumerg +=(double)erg;
797 sumerg2 += pow((double)erg,2);
798
799 sc = TestSignChanges(t,i, blocksize);
800 sumsc += (double)sc;
801 sumsc2 += pow((double)sc,2);
802
803
804 dc = TestDirectionChanges(t,i,blocksize);
805 sumdc += (double)dc;
806 sumdc2 += pow((double)dc,2);
807 }
808
809 mEnergyMean = sumerg / samples;
810 mEnergySD = sqrt(sumerg2/samples - mEnergyMean*mEnergyMean);
811
812 mSignChangesMean = sumsc / samples;
814
815 mDirectionChangesMean = sumdc / samples;
817
818 auto text = XO("Calibration Results\n");
819 text +=
820 /* i18n-hint: %1.4f is replaced by a number. sd stands for 'Standard Deviations'*/
821 XO("Energy -- mean: %1.4f sd: (%1.4f)\n")
822 .Format( mEnergyMean, mEnergySD );
823 text +=
824 XO("Sign Changes -- mean: %1.4f sd: (%1.4f)\n")
826 text +=
827 XO("Direction Changes -- mean: %1.4f sd: (%1.4f)\n")
830 nullptr,
831 text,
832 XO("Calibration Complete"),
833 wxOK | wxICON_INFORMATION,
834 wxPoint(-1, -1)
835 }
836 .ShowModal();
837
839}
840
841
842void VoiceKey::SetKeyType(bool erg, bool scLow , bool scHigh,
843 bool dcLow, bool dcHigh)
844{
845 mUseEnergy = erg;
846 mUseSignChangesLow = scLow;
847 mUseSignChangesHigh = scHigh;
850}
851
852
853//This might continue over a number of blocks.
855 const WaveChannel & t, sampleCount start, sampleCount len)
856{
857
858 double sum = 1;
859 auto s = start; //Keep track of start
860 auto originalLen = len; //Keep track of the length of block to process (its not the length of t)
861 const auto blockSize = limitSampleBufferSize(
862 t.GetMaxBlockSize(), len); //Determine size of sampling buffer
863 Floats buffer{ blockSize }; //Get a sampling buffer
864
865 while(len > 0)
866 {
867 //Figure out how much to grab
868 auto block = limitSampleBufferSize ( t.GetBestBlockSize(s), len );
869
870 t.GetFloats(buffer.get(), s,block); //grab the block;
871
872 //Now, go through the block and calculate energy
873 for(decltype(block) i = 0; i< block; i++)
874 {
875 sum += buffer[i]*buffer[i];
876 }
877
878 len -= block;
879 s += block;
880 }
881
882 return sum / originalLen.as_double();
883}
884
885
886//This will update RMSE by adding one element and subtracting another
887void VoiceKey::TestEnergyUpdate (double & prevErg, int len, const float & drop, const float & add)
888{
889 //This is an updating formula for RMSE. It will only recalculate what's changed.
890 prevErg = prevErg + (double)(fabs(add) - fabs(drop))/len;
891
892}
893
894
896 const WaveChannel & t, sampleCount start, sampleCount len)
897{
898
899
900 auto s = start; //Keep track of start
901 auto originalLen = len; //Keep track of the length of block to process (its not the length of t)
902 const auto blockSize = limitSampleBufferSize(
903 t.GetMaxBlockSize(), len); //Determine size of sampling buffer
904 unsigned long signchanges = 1;
905 int currentsign=0;
906
907 Floats buffer{ blockSize }; //Get a sampling buffer
908
909 while(len > 0) {
910 //Figure out how much to grab
911 auto block = limitSampleBufferSize ( t.GetBestBlockSize(s), len );
912
913 t.GetFloats(buffer.get(), s, block); //grab the block;
914
915 if (len == originalLen)
916 {
917 //The first time through, set stuff up special.
918 currentsign = sgn(buffer[0]);
919 }
920
921 //Now, go through the block and calculate zero crossings
922
923 for(decltype(block) i = 0; i< block; i++)
924 {
925 if( sgn(buffer[i]) != currentsign)
926 {
927 currentsign = sgn(buffer[i]);
928 signchanges++;
929 }
930
931 }
932 len -= block;
933 s += block;
934 }
935 return (double)signchanges / originalLen.as_double();
936}
937
938void VoiceKey::TestSignChangesUpdate(double & currentsignchanges, int len,
939 const float & a1,
940 const float & a2,
941 const float & z1,
942 const float & z2)
943{
944
945 if(sgn(a1)!=sgn(a2)) currentsignchanges -= 1.0/len;
946 if(sgn(z1)!=sgn(z2)) currentsignchanges += 1.0/len;
947
948}
949
950
952 const WaveChannel & t, sampleCount start, sampleCount len)
953{
954
955
956 auto s = start; //Keep track of start
957 auto originalLen = len; //Keep track of the length of block to process (its not the length of t)
958 const auto blockSize = limitSampleBufferSize(
959 t.GetMaxBlockSize(), len); //Determine size of sampling buffer
960 unsigned long directionchanges = 1;
961 float lastval=float(0);
962 int lastdirection=1;
963
964 Floats buffer{ blockSize }; //Get a sampling buffer
965
966 while(len > 0) {
967 //Figure out how much to grab
968 auto block = limitSampleBufferSize ( t.GetBestBlockSize(s), len );
969
970 t.GetFloats(buffer.get(), s, block); //grab the block;
971
972 if (len == originalLen) {
973 //The first time through, set stuff up special.
974 lastval = buffer[0];
975 }
976
977 //Now, go through the block and calculate zero crossings
978
979
980 for(decltype(block) i = 0; i< block; i++){
981
982 if( sgn(buffer[i]-lastval) != lastdirection) {
983 directionchanges++;
984 lastdirection = sgn(buffer[i] - lastval);
985 }
986 lastval = buffer[i];
987
988 }
989 len -= block;
990 s += block;
991 }
992 return (double)directionchanges/originalLen.as_double();
993}
994
995
996
997
998// This method does an updating by looking at the trends
999// This will change currentdirections and atrend/trend, so be warned.
1000void VoiceKey::TestDirectionChangesUpdate(double & currentdirectionchanges, int len,
1001 int & atrend, const float & a1, const float & a2,
1002 int & ztrend, const float & z1, const float & z2)
1003{
1004
1005 if(sgn(a2 - a1)!= atrend ) {
1006 //Here, the direction shifted for the item we're dropping.
1007 currentdirectionchanges -= 1.0/len;
1008 atrend = sgn(a2-a1);
1009 }
1010 if(sgn(z2 - z1)!= ztrend){
1011 //Here, the direction shifts when we add an item
1012 currentdirectionchanges += 1.0/len;
1013 ztrend = sgn(z2-z1);
1014 }
1015
1016}
int AudacityMessageBox(const TranslatableString &message, const TranslatableString &caption, long style, wxWindow *parent, int x, int y)
XO("Cut/Copy/Paste")
size_t limitSampleBufferSize(size_t bufferSize, sampleCount limit)
Definition: SampleCount.cpp:22
int sgn(int number)
Definition: VoiceKey.h:98
Wrap wxMessageDialog so that caption IS translatable.
double TestDirectionChanges(const WaveChannel &t, sampleCount start, sampleCount len)
Definition: VoiceKey.cpp:951
double mThresholdAdjustment
Definition: VoiceKey.h:56
bool mUseSignChangesHigh
Definition: VoiceKey.h:74
void TestDirectionChangesUpdate(double &currentdirectionchanges, int length, int &atrend, const float &a1, const float &a2, int &ztrend, const float &z1, const float &z2)
Definition: VoiceKey.cpp:1000
VoiceKey()
Definition: VoiceKey.cpp:38
sampleCount OffForward(const WaveChannel &t, sampleCount start, sampleCount len)
Definition: VoiceKey.cpp:382
double mSignalWindowSize
Definition: VoiceKey.h:80
bool mUseSignChangesLow
Definition: VoiceKey.h:73
sampleCount OnBackward(const WaveChannel &t, sampleCount start, sampleCount len)
Definition: VoiceKey.cpp:236
double mThresholdDirectionChangesUpper
Definition: VoiceKey.h:69
void AdjustThreshold(double t)
Definition: VoiceKey.cpp:726
sampleCount OnForward(const WaveChannel &t, sampleCount start, sampleCount len)
Definition: VoiceKey.cpp:84
double mThresholdDirectionChangesLower
Definition: VoiceKey.h:68
bool AboveThreshold(const WaveChannel &t, sampleCount start, sampleCount len)
Definition: VoiceKey.cpp:657
bool mUseDirectionChangesLow
Definition: VoiceKey.h:75
double mEnergySD
Definition: VoiceKey.h:59
double mSignChangesSD
Definition: VoiceKey.h:61
sampleCount OffBackward(const WaveChannel &t, sampleCount start, sampleCount len)
Definition: VoiceKey.cpp:518
double mSignChangesMean
Definition: VoiceKey.h:60
void TestEnergyUpdate(double &prevErg, int length, const float &drop, const float &add)
Definition: VoiceKey.cpp:887
double mThresholdSignChangesLower
Definition: VoiceKey.h:66
double mWindowSize
Definition: VoiceKey.h:54
bool mUseDirectionChangesHigh
Definition: VoiceKey.h:76
double TestEnergy(const WaveChannel &t, sampleCount start, sampleCount len)
Definition: VoiceKey.cpp:854
double mThresholdEnergy
Definition: VoiceKey.h:65
double TestSignChanges(const WaveChannel &t, sampleCount start, sampleCount len)
Definition: VoiceKey.cpp:895
void CalibrateNoise(const WaveChannel &t, sampleCount start, sampleCount len)
Definition: VoiceKey.cpp:739
double mEnergyMean
Definition: VoiceKey.h:58
~VoiceKey()
Definition: VoiceKey.cpp:66
bool mUseEnergy
Definition: VoiceKey.h:72
void SetKeyType(bool erg, bool scLow, bool scHigh, bool dcLow, bool dcHigh)
Definition: VoiceKey.cpp:842
double mSilentWindowSize
Definition: VoiceKey.h:79
void TestSignChangesUpdate(double &currentsignchanges, int length, const float &a1, const float &a2, const float &z1, const float &z2)
Definition: VoiceKey.cpp:938
double mThresholdSignChangesUpper
Definition: VoiceKey.h:67
double mDirectionChangesSD
Definition: VoiceKey.h:63
double mDirectionChangesMean
Definition: VoiceKey.h:62
double GetRate() const override
Definition: WaveTrack.cpp:793
bool GetFloats(float *buffer, sampleCount start, size_t len, fillFormat fill=FillFormat::fillZero, bool mayThrow=true, sampleCount *pNumWithinClips=nullptr) const
"narrow" overload fetches from the unique channel
Definition: WaveTrack.h:129
size_t GetBestBlockSize(sampleCount t) const
A hint for sizing of well aligned fetches.
Definition: WaveTrack.h:850
size_t GetMaxBlockSize() const
Definition: WaveTrack.h:858
Positions or offsets within audio files need a wide type.
Definition: SampleCount.h:19
double as_double() const
Definition: SampleCount.h:46
const char * end(const char *str) noexcept
Definition: StringUtils.h:106
__finl float_x4 __vecc sqrt(const float_x4 &a)
void add(const T *src1, const T *src2, T *dst, int32_t n)
Definition: VectorOps.h:46