Audacity  2.2.2
Public Member Functions | Private Member Functions | Private Attributes | List of all members
VoiceKey Class Reference

This implements a voice key, detecting either the next "ON" or "OFF" point. More...

#include <VoiceKey.h>

Public Member Functions

 VoiceKey ()
 
 ~VoiceKey ()
 
sampleCount OnForward (const WaveTrack &t, sampleCount start, sampleCount len)
 
sampleCount OnBackward (const WaveTrack &t, sampleCount start, sampleCount len)
 
sampleCount OffForward (const WaveTrack &t, sampleCount start, sampleCount len)
 
sampleCount OffBackward (const WaveTrack &t, sampleCount start, sampleCount len)
 
void CalibrateNoise (const WaveTrack &t, sampleCount start, sampleCount len)
 
void AdjustThreshold (double t)
 
bool AboveThreshold (const WaveTrack &t, sampleCount start, sampleCount len)
 
void SetKeyType (bool erg, bool scLow, bool scHigh, bool dcLow, bool dcHigh)
 

Private Member Functions

double TestEnergy (const WaveTrack &t, sampleCount start, sampleCount len)
 
double TestSignChanges (const WaveTrack &t, sampleCount start, sampleCount len)
 
double TestDirectionChanges (const WaveTrack &t, sampleCount start, sampleCount len)
 
void TestEnergyUpdate (double &prevErg, int length, const float &drop, const float &add)
 
void TestSignChangesUpdate (double &currentsignchanges, int length, const float &a1, const float &a2, const float &z1, const float &z2)
 
void TestDirectionChangesUpdate (double &currentdirectionchanges, int length, int &atrend, const float &a1, const float &a2, int &ztrend, const float &z1, const float &z2)
 

Private Attributes

double mWindowSize
 
double mThresholdAdjustment
 
double mEnergyMean
 
double mEnergySD
 
double mSignChangesMean
 
double mSignChangesSD
 
double mDirectionChangesMean
 
double mDirectionChangesSD
 
double mThresholdEnergy
 
double mThresholdSignChangesLower
 
double mThresholdSignChangesUpper
 
double mThresholdDirectionChangesLower
 
double mThresholdDirectionChangesUpper
 
bool mUseEnergy
 
bool mUseSignChangesLow
 
bool mUseSignChangesHigh
 
bool mUseDirectionChangesLow
 
bool mUseDirectionChangesHigh
 
double mSilentWindowSize
 
double mSignalWindowSize
 

Detailed Description

This implements a voice key, detecting either the next "ON" or "OFF" point.

Definition at line 33 of file VoiceKey.h.

Constructor & Destructor Documentation

VoiceKey::VoiceKey ( )

Definition at line 42 of file VoiceKey.cpp.

References AdjustThreshold(), mDirectionChangesMean, mDirectionChangesSD, mEnergyMean, mEnergySD, mSignalWindowSize, mSignChangesMean, mSignChangesSD, mSilentWindowSize, mUseDirectionChangesHigh, mUseDirectionChangesLow, mUseEnergy, mUseSignChangesHigh, mUseSignChangesLow, and mWindowSize.

43 {
44 
45  mWindowSize = 0.01; //size of analysis window in seconds
46 
47  mEnergyMean = .0006; // reasonable initial levels assuming sampling rate of
48  mEnergySD = .0002; // 44100 hertz
49  mSignChangesMean = .08;
50  mSignChangesSD= .02;
53 
54  AdjustThreshold(2);
55 
56  mSilentWindowSize = .05; //Amount of time (in seconds) below threshold to call it silence
57  mSignalWindowSize = .05; //Amount of time (in seconds) above threshold to call it signal
58 
59 
60  mUseEnergy = true;
61  mUseSignChangesLow = false;
62  mUseSignChangesHigh = false;
65 
66 
67 };
void AdjustThreshold(double t)
Definition: VoiceKey.cpp:730
bool mUseSignChangesLow
Definition: VoiceKey.h:73
double mSignalWindowSize
Definition: VoiceKey.h:80
double mEnergyMean
Definition: VoiceKey.h:58
double mSilentWindowSize
Definition: VoiceKey.h:79
double mSignChangesSD
Definition: VoiceKey.h:61
double mWindowSize
Definition: VoiceKey.h:54
bool mUseDirectionChangesLow
Definition: VoiceKey.h:75
bool mUseDirectionChangesHigh
Definition: VoiceKey.h:76
bool mUseEnergy
Definition: VoiceKey.h:72
double mSignChangesMean
Definition: VoiceKey.h:60
bool mUseSignChangesHigh
Definition: VoiceKey.h:74
double mEnergySD
Definition: VoiceKey.h:59
double mDirectionChangesMean
Definition: VoiceKey.h:62
double mDirectionChangesSD
Definition: VoiceKey.h:63
VoiceKey::~VoiceKey ( )

Definition at line 70 of file VoiceKey.cpp.

71 {
72 };

Member Function Documentation

bool VoiceKey::AboveThreshold ( const WaveTrack t,
sampleCount  start,
sampleCount  len 
)

Definition at line 661 of file VoiceKey.cpp.

References mThresholdDirectionChangesLower, mThresholdDirectionChangesUpper, mThresholdEnergy, mThresholdSignChangesLower, mThresholdSignChangesUpper, mUseDirectionChangesHigh, mUseDirectionChangesLow, mUseEnergy, mUseSignChangesHigh, mUseSignChangesLow, TestDirectionChanges(), TestEnergy(), and TestSignChanges().

Referenced by OffBackward(), OffForward(), OnBackward(), and OnForward().

663 {
664 
665  double erg=0;
666  double sc=0;
667  double dc=0; //These store three statistics: energy, signchanges, and directionchanges
668  int tests =0; //Keeps track of how many statistics surpass the threshold.
669  int testThreshold=0; //Keeps track of the threshold.
670 
671  //Calculate the test statistics
672  if(mUseEnergy)
673  {
674  testThreshold++;
675  erg = TestEnergy(t, start,len);
676  tests +=(int)(erg > mThresholdEnergy);
677 #if 0
678  std::cout << "Energy: " << erg << " " <<mThresholdEnergy << std::endl;
679 #endif
680  }
681 
683  {
684  testThreshold++;
685  sc = TestSignChanges(t,start,len);
686  tests += (int)(sc < mThresholdSignChangesLower);
687 #if 0
688  std::cout << "SignChanges: " << sc << " " <<mThresholdSignChangesLower<< " < " << mThresholdSignChangesUpper << std::endl;
689 #endif
690 
691  }
693  {
694  testThreshold++;
695  sc = TestSignChanges(t,start,len);
696  tests += (int)(sc > mThresholdSignChangesUpper);
697 #if 0
698  std::cout << "SignChanges: " << sc << " " <<mThresholdSignChangesLower<< " < " << mThresholdSignChangesUpper << std::endl;
699 #endif
700 
701  }
702 
703 
705  {
706  testThreshold++;
707  dc = TestDirectionChanges(t,start,len);
708  tests += (int)(dc < mThresholdDirectionChangesLower);
709 #if 0
710  std::cout << "DirectionChanges: " << dc << " " <<mThresholdDirectionChangesLower<< " < " << mThresholdDirectionChangesUpper << std::endl;
711 #endif
712  }
714  {
715  testThreshold++;
716  dc = TestDirectionChanges(t,start,len);
717  tests += (int)(dc > mThresholdDirectionChangesUpper);
718 #if 0
719  std::cout << "DirectionChanges: " << dc << " " <<mThresholdDirectionChangesLower<< " < " << mThresholdDirectionChangesUpper << std::endl;
720 #endif
721  }
722 
723  //Test whether we are above threshold (the number of stats)
724  return (tests >= testThreshold);
725 
726 }
double TestEnergy(const WaveTrack &t, sampleCount start, sampleCount len)
Definition: VoiceKey.cpp:849
double mThresholdDirectionChangesUpper
Definition: VoiceKey.h:69
bool mUseSignChangesLow
Definition: VoiceKey.h:73
bool mUseDirectionChangesLow
Definition: VoiceKey.h:75
double mThresholdSignChangesUpper
Definition: VoiceKey.h:67
double TestDirectionChanges(const WaveTrack &t, sampleCount start, sampleCount len)
Definition: VoiceKey.cpp:946
bool mUseDirectionChangesHigh
Definition: VoiceKey.h:76
bool mUseEnergy
Definition: VoiceKey.h:72
bool mUseSignChangesHigh
Definition: VoiceKey.h:74
double TestSignChanges(const WaveTrack &t, sampleCount start, sampleCount len)
Definition: VoiceKey.cpp:890
double mThresholdDirectionChangesLower
Definition: VoiceKey.h:68
double mThresholdSignChangesLower
Definition: VoiceKey.h:66
double mThresholdEnergy
Definition: VoiceKey.h:65
void VoiceKey::AdjustThreshold ( double  t)

Definition at line 730 of file VoiceKey.cpp.

References mDirectionChangesMean, mDirectionChangesSD, mEnergyMean, mEnergySD, mSignChangesMean, mSignChangesSD, mThresholdAdjustment, mThresholdDirectionChangesLower, mThresholdDirectionChangesUpper, mThresholdEnergy, mThresholdSignChangesLower, and mThresholdSignChangesUpper.

Referenced by CalibrateNoise(), and VoiceKey().

731 {
732 
739 };
double mThresholdAdjustment
Definition: VoiceKey.h:56
double mThresholdDirectionChangesUpper
Definition: VoiceKey.h:69
double mEnergyMean
Definition: VoiceKey.h:58
double mSignChangesSD
Definition: VoiceKey.h:61
double mThresholdSignChangesUpper
Definition: VoiceKey.h:67
double mSignChangesMean
Definition: VoiceKey.h:60
double mEnergySD
Definition: VoiceKey.h:59
double mDirectionChangesMean
Definition: VoiceKey.h:62
double mThresholdDirectionChangesLower
Definition: VoiceKey.h:68
double mThresholdSignChangesLower
Definition: VoiceKey.h:66
double mDirectionChangesSD
Definition: VoiceKey.h:63
double mThresholdEnergy
Definition: VoiceKey.h:65
void VoiceKey::CalibrateNoise ( const WaveTrack t,
sampleCount  start,
sampleCount  len 
)

Definition at line 743 of file VoiceKey.cpp.

References _(), AdjustThreshold(), WaveTrack::GetRate(), limitSampleBufferSize(), mDirectionChangesMean, mDirectionChangesSD, mEnergyMean, mEnergySD, mSignChangesMean, mSignChangesSD, mThresholdAdjustment, mWindowSize, TestDirectionChanges(), TestEnergy(), and TestSignChanges().

744 {
745  //To calibrate the noise, we need to scan the sample block just like in the voicekey and
746  //calculate the mean and standard deviation of the test statistics.
747  //Then, we set the BaselineThreshold to be one
748 
749  wxBusyCursor busy;
750 
751  //initialize some sample statistics: sums of X and X^2
752 
753  double sumerg, sumerg2;
754  double sumsc, sumsc2;
755  double sumdc, sumdc2;
756  double erg, sc, dc;
757  //Now, change the millisecond-based parameters into sample-based parameters
758  //(This depends on WaveTrack t)
759  double rate = t.GetRate();
760  unsigned int WindowSizeInt = (unsigned int)(rate * mWindowSize);
761  // unsigned int SignalWindowSizeInt = (unsigned int)(rate * mSignalWindowSize);
762 
763 
764  //Get the first test statistics
765 
766  //Calibrate all of the statistic, because they might be
767  //changed later.
768 
769  // if(mUseEnergy)
770  erg = TestEnergy(t, start, WindowSizeInt);
771 
772  // if(mUseSignChanges)
773  sc = TestSignChanges(t,start, WindowSizeInt);
774 
775  // if(mUseDirectionChanges)
776  dc = TestDirectionChanges(t,start,WindowSizeInt);
777 
778  sumerg =0.0;
779  sumerg2 = 0.0;
780  sumsc =0.0;
781  sumsc2 = 0.0;
782  sumdc =0.0;
783  sumdc2 =0.0;
784 
785 
786  // int n = len - WindowSizeInt; //This is how many samples we have
787  auto samplesleft = len - WindowSizeInt;
788  int samples=0;
789 
790  for(auto i = start; samplesleft >= 10;
791  i += (WindowSizeInt - 1), samplesleft -= (WindowSizeInt -1) ) {
792  //Take samples chunk-by-chunk.
793  //Normally, this should be in WindowSizeInt chunks, but at the end (if there are more than 10
794  //samples left) take a chunk that eats the rest of the samples.
795 
796  samples++; //Increment the number of samples we have
797  const auto blocksize = limitSampleBufferSize( WindowSizeInt, samplesleft);
798 
799  erg = TestEnergy(t, i, blocksize);
800  sumerg +=(double)erg;
801  sumerg2 += pow((double)erg,2);
802 
803  sc = TestSignChanges(t,i, blocksize);
804  sumsc += (double)sc;
805  sumsc2 += pow((double)sc,2);
806 
807 
808  dc = TestDirectionChanges(t,i,blocksize);
809  sumdc += (double)dc;
810  sumdc2 += pow((double)dc,2);
811  }
812 
813  mEnergyMean = sumerg / samples;
814  mEnergySD = sqrt(sumerg2/samples - mEnergyMean*mEnergyMean);
815 
816  mSignChangesMean = sumsc / samples;
817  mSignChangesSD = sqrt(sumsc2 / samples - mSignChangesMean * mSignChangesMean);
818 
819  mDirectionChangesMean = sumdc / samples;
821 
822  wxString text = _("Calibration Results\n");
823  /* i18n-hint: %1.4f is replaced by a number. sd stands for 'Standard Deviations'*/
824  text += wxString::Format(_("Energy -- mean: %1.4f sd: (%1.4f)\n"),mEnergyMean,mEnergySD);
825  text+= wxString::Format(_("Sign Changes -- mean: %1.4f sd: (%1.4f)\n"),mSignChangesMean,mSignChangesSD);
826  text += wxString::Format(_("Direction Changes -- mean: %1.4f sd: (%1.4f)\n"), mDirectionChangesMean, mDirectionChangesSD);
827  AudacityMessageDialog{ NULL, text,
828  _("Calibration Complete"),
829  wxOK | wxICON_INFORMATION,
830  wxPoint(-1, -1) }
831  .ShowModal();
832 
834 }
void AdjustThreshold(double t)
Definition: VoiceKey.cpp:730
double mThresholdAdjustment
Definition: VoiceKey.h:56
double TestEnergy(const WaveTrack &t, sampleCount start, sampleCount len)
Definition: VoiceKey.cpp:849
Wrap wxMessageDialog so that caption IS translatable.
Definition: ErrorDialog.h:129
double mEnergyMean
Definition: VoiceKey.h:58
size_t limitSampleBufferSize(size_t bufferSize, sampleCount limit)
Definition: Types.h:178
double mSignChangesSD
Definition: VoiceKey.h:61
double mWindowSize
Definition: VoiceKey.h:54
double TestDirectionChanges(const WaveTrack &t, sampleCount start, sampleCount len)
Definition: VoiceKey.cpp:946
double mSignChangesMean
Definition: VoiceKey.h:60
double mEnergySD
Definition: VoiceKey.h:59
double TestSignChanges(const WaveTrack &t, sampleCount start, sampleCount len)
Definition: VoiceKey.cpp:890
_("Move Track &Down")+wxT("\t")+(GetActiveProject() -> GetCommandManager() ->GetKeyFromName(wxT("TrackMoveDown")).Raw()), OnMoveTrack) POPUP_MENU_ITEM(OnMoveTopID, _("Move Track to &Top")+wxT("\t")+(GetActiveProject() ->GetCommandManager() ->GetKeyFromName(wxT("TrackMoveTop")).Raw()), OnMoveTrack) POPUP_MENU_ITEM(OnMoveBottomID, _("Move Track to &Bottom")+wxT("\t")+(GetActiveProject() ->GetCommandManager() ->GetKeyFromName(wxT("TrackMoveBottom")).Raw()), OnMoveTrack)#define SET_TRACK_NAME_PLUGIN_SYMBOLclass SetTrackNameCommand:public AudacityCommand
double mDirectionChangesMean
Definition: VoiceKey.h:62
double mDirectionChangesSD
Definition: VoiceKey.h:63
double GetRate() const
Definition: WaveTrack.cpp:398
sampleCount VoiceKey::OffBackward ( const WaveTrack t,
sampleCount  start,
sampleCount  len 
)

Definition at line 522 of file VoiceKey.cpp.

References _(), AboveThreshold(), AudacityMessageBox(), floatSample, WaveTrack::Get(), WaveTrack::GetRate(), limitSampleBufferSize(), mSilentWindowSize, mThresholdDirectionChangesLower, mThresholdDirectionChangesUpper, mThresholdEnergy, mThresholdSignChangesLower, mThresholdSignChangesUpper, mUseDirectionChangesHigh, mUseDirectionChangesLow, mUseEnergy, mUseSignChangesHigh, mUseSignChangesLow, mWindowSize, sgn(), TestDirectionChanges(), TestDirectionChangesUpdate(), TestEnergy(), TestEnergyUpdate(), TestSignChanges(), and TestSignChangesUpdate().

524 {
525 
526 
527  if((mWindowSize) >= (len + 10).as_double() ){
528 
529  AudacityMessageBox(_("Selection is too small to use voice key."));
530  return end;
531  }
532  else {
533 
534  //Change the millisecond-based parameters into sample-based parameters
535  double rate = t.GetRate(); //Translates seconds to samples
536  unsigned int WindowSizeInt = (unsigned int)(rate * mWindowSize); //Size of window to examine
537  //unsigned int SilentWindowSizeInt = (unsigned int)(rate * mSilentWindowSize); //This much signal is necessary to trip key
538 
539  auto samplesleft = len - WindowSizeInt; //Indexes the number of samples remaining in the selection
540  auto lastsubthresholdsample = end; //start this off at the end
541  // keeps track of the sample number of the last sample to not exceed the threshold
542 
543  int blockruns=0; //keeps track of the number of consecutive above-threshold blocks
544 
545  //This loop goes through the selection a block at a time in reverse order. If a long enough run
546  //of above-threshold blocks occur, we return to the last sub-threshold block and
547  //go through one sample at a time.
548  //If there are fewer than 10 samples leftover, don't bother.
549  for(auto i = end - WindowSizeInt; samplesleft >= 10;
550  i -= (WindowSizeInt - 1), samplesleft -= (WindowSizeInt -1 )) {
551 
552  //Set blocksize so that it is the right size
553  const auto blocksize = limitSampleBufferSize( WindowSizeInt, samplesleft);
554 
555  if(!AboveThreshold(t,i,blocksize))
556  {
557 
558  blockruns++; //Hit
559  }
560  else
561  {
562  blockruns=0; //Miss--start over
563  lastsubthresholdsample = i+WindowSizeInt;
564 
565  }
566 
567  //If the blockrun is long enough, break out of the loop early:
568  if(blockruns > mSilentWindowSize/mWindowSize)
569  break;
570 
571  }
572 
573  //Now, if we broke out early (samplesleft > 10), go back to the lastsubthresholdsample and look more carefully
574  if(samplesleft > 10) {
575 
576  //Calculate how many to scan through--we only have to go through (at most)
577  //the first window + 1 samples--but we need another window samples to draw from.
578  const size_t remaining = 2*WindowSizeInt+1;
579 
580  //To speed things up, create a local buffer to store things in, to avoid the costly t.Get();
581  //Only go through the first SilentWindowSizeInt samples, and choose the first that trips the key.
582  Floats buffer{ remaining };
583  t.Get((samplePtr)buffer.get(), floatSample,
584  lastsubthresholdsample - remaining, remaining);
585 
586  //Initialize these trend markers atrend and ztrend. They keep track of the
587  //up/down trends at the start and end of the remaining window.
588  int atrend = sgn(buffer[remaining - 2] - buffer[remaining - 1]);
589  int ztrend =
590  sgn(buffer[remaining - WindowSizeInt - 2] -
591  buffer[remaining - WindowSizeInt - 2]);
592 
593  double erg=0;
594  double sc=0;
595  double dc=0;
596  //Get initial test statistic values.
597  if(mUseEnergy)
598  erg = TestEnergy(t, lastsubthresholdsample, WindowSizeInt);
600  sc = TestSignChanges(t,lastsubthresholdsample, WindowSizeInt);
602  dc = TestDirectionChanges(t,lastsubthresholdsample,WindowSizeInt);
603 
604  //Now, go through the sound again, sample by sample.
605  size_t i;
606  for(i = remaining - 1; i > WindowSizeInt; i--) {
607 
608  int tests = 0;
609  int testThreshold = 0;
610  //Update the test statistics
611  if(mUseEnergy)
612  {
613  TestEnergyUpdate(erg, WindowSizeInt,buffer[i],buffer[i+WindowSizeInt+1]);
614  tests += (int)(erg>mThresholdEnergy);
615  testThreshold++;
616  }
618  {
619  TestSignChangesUpdate(sc,WindowSizeInt,buffer[i],buffer[i+1],buffer[i+WindowSizeInt],buffer[i+WindowSizeInt+1]);
620  tests += (int)(sc < mThresholdSignChangesLower);
621  testThreshold++;
622  }
624  {
625  TestSignChangesUpdate(sc,WindowSizeInt,buffer[i],buffer[i+1],buffer[i+WindowSizeInt],buffer[i+WindowSizeInt+1]);
626  tests += (int)(sc > mThresholdSignChangesUpper);
627  testThreshold++;
628  }
630  {
631  TestDirectionChangesUpdate(dc,WindowSizeInt,atrend,buffer[i],buffer[i+1],ztrend,buffer[i+WindowSizeInt],buffer[i+WindowSizeInt+1]);
632  tests += (int)(dc < mThresholdDirectionChangesLower);
633  testThreshold++;
634  }
636  {
637  TestDirectionChangesUpdate(dc,WindowSizeInt,atrend,buffer[i],buffer[i+1],ztrend,buffer[i+WindowSizeInt],buffer[i+WindowSizeInt+1]);
638  tests += (int)(dc > mThresholdDirectionChangesUpper);
639  testThreshold++;
640  }
641 
642 
643 
644  if(tests < testThreshold)
645  { //Finish off on the first hit
646  break;
647  }
648  }
649 
650  //When we get here, i+lastsubthresholdsample is the best guess for where the word starts
651  return lastsubthresholdsample - remaining + i;
652  }
653  else {
654  //If we failed to find anything, return the start position
655  return end ;
656  }
657  }
658 }
double TestEnergy(const WaveTrack &t, sampleCount start, sampleCount len)
Definition: VoiceKey.cpp:849
int AudacityMessageBox(const wxString &message, const wxString &caption=AudacityMessageBoxCaptionStr(), long style=wxOK|wxCENTRE, wxWindow *parent=NULL, int x=wxDefaultCoord, int y=wxDefaultCoord)
Definition: ErrorDialog.h:92
double mThresholdDirectionChangesUpper
Definition: VoiceKey.h:69
bool AboveThreshold(const WaveTrack &t, sampleCount start, sampleCount len)
Definition: VoiceKey.cpp:661
void TestEnergyUpdate(double &prevErg, int length, const float &drop, const float &add)
Definition: VoiceKey.cpp:882
bool mUseSignChangesLow
Definition: VoiceKey.h:73
size_t limitSampleBufferSize(size_t bufferSize, sampleCount limit)
Definition: Types.h:178
double mSilentWindowSize
Definition: VoiceKey.h:79
double mWindowSize
Definition: VoiceKey.h:54
bool mUseDirectionChangesLow
Definition: VoiceKey.h:75
double mThresholdSignChangesUpper
Definition: VoiceKey.h:67
double TestDirectionChanges(const WaveTrack &t, sampleCount start, sampleCount len)
Definition: VoiceKey.cpp:946
bool mUseDirectionChangesHigh
Definition: VoiceKey.h:76
bool mUseEnergy
Definition: VoiceKey.h:72
char * samplePtr
Definition: Types.h:203
bool mUseSignChangesHigh
Definition: VoiceKey.h:74
double TestSignChanges(const WaveTrack &t, sampleCount start, sampleCount len)
Definition: VoiceKey.cpp:890
_("Move Track &Down")+wxT("\t")+(GetActiveProject() -> GetCommandManager() ->GetKeyFromName(wxT("TrackMoveDown")).Raw()), OnMoveTrack) POPUP_MENU_ITEM(OnMoveTopID, _("Move Track to &Top")+wxT("\t")+(GetActiveProject() ->GetCommandManager() ->GetKeyFromName(wxT("TrackMoveTop")).Raw()), OnMoveTrack) POPUP_MENU_ITEM(OnMoveBottomID, _("Move Track to &Bottom")+wxT("\t")+(GetActiveProject() ->GetCommandManager() ->GetKeyFromName(wxT("TrackMoveBottom")).Raw()), OnMoveTrack)#define SET_TRACK_NAME_PLUGIN_SYMBOLclass SetTrackNameCommand:public AudacityCommand
double mThresholdDirectionChangesLower
Definition: VoiceKey.h:68
double mThresholdSignChangesLower
Definition: VoiceKey.h:66
void TestDirectionChangesUpdate(double &currentdirectionchanges, int length, int &atrend, const float &a1, const float &a2, int &ztrend, const float &z1, const float &z2)
Definition: VoiceKey.cpp:995
double GetRate() const
Definition: WaveTrack.cpp:398
bool Get(samplePtr buffer, sampleFormat format, sampleCount start, size_t len, fillFormat fill=fillZero, bool mayThrow=true, sampleCount *pNumCopied=nullptr) const
Definition: WaveTrack.cpp:1971
int sgn(int number)
Definition: VoiceKey.h:98
double mThresholdEnergy
Definition: VoiceKey.h:65
void TestSignChangesUpdate(double &currentsignchanges, int length, const float &a1, const float &a2, const float &z1, const float &z2)
Definition: VoiceKey.cpp:933
sampleCount VoiceKey::OffForward ( const WaveTrack t,
sampleCount  start,
sampleCount  len 
)

Definition at line 386 of file VoiceKey.cpp.

References _(), AboveThreshold(), sampleCount::as_double(), AudacityMessageBox(), floatSample, WaveTrack::Get(), WaveTrack::GetRate(), limitSampleBufferSize(), mSilentWindowSize, mThresholdDirectionChangesLower, mThresholdDirectionChangesUpper, mThresholdEnergy, mThresholdSignChangesLower, mThresholdSignChangesUpper, mUseDirectionChangesHigh, mUseDirectionChangesLow, mUseEnergy, mUseSignChangesHigh, mUseSignChangesLow, mWindowSize, sgn(), TestDirectionChanges(), TestDirectionChangesUpdate(), TestEnergy(), TestEnergyUpdate(), TestSignChanges(), and TestSignChangesUpdate().

388 {
389 
390  if((mWindowSize) >= (len + 10).as_double() ){
391  AudacityMessageBox(_("Selection is too small to use voice key."));
392 
393  return start;
394  }
395  else {
396 
397 
398  //Change the millisecond-based parameters into sample-based parameters
399  double rate = t.GetRate(); //Translates seconds to samples
400  unsigned int WindowSizeInt = (unsigned int)(rate * mWindowSize); //Size of window to examine
401  unsigned int SilentWindowSizeInt = (unsigned int)(rate * mSilentWindowSize); //This much signal is necessary to trip key
402 
403  sampleCount samplesleft ( len.as_double() - WindowSizeInt ); //Indexes the number of samples remaining in the selection
404  auto lastsubthresholdsample = start; //start this off at the selection start
405  // keeps track of the sample number of the last sample to not exceed the threshold
406 
407  int blockruns=0; //keeps track of the number of consecutive above-threshold blocks
408 
409  //This loop goes through the selection a block at a time. If a long enough run
410  //of above-threshold blocks occur, we return to the last sub-threshold block and
411  //go through one sample at a time.
412  //If there are fewer than 10 samples leftover, don't bother.
413  for(auto i = start; samplesleft >= 10;
414  i += (WindowSizeInt - 1) , samplesleft -= (WindowSizeInt - 1)) {
415 
416  //Set blocksize so that it is the right size
417  const auto blocksize = limitSampleBufferSize( WindowSizeInt, samplesleft);
418 
419  if(!AboveThreshold(t,i,blocksize))
420  {
421  blockruns++; //Hit
422  }
423  else
424  {
425  blockruns=0; //Above threshold--start over
426  lastsubthresholdsample = i;
427  }
428 
429  //If the blockrun is long enough, break out of the loop early:
430  if(blockruns > mSilentWindowSize/mWindowSize)
431  break;
432 
433  }
434 
435  //Now, if we broke out early (samplesleft > 10), go back to the lastsubthresholdsample and look more carefully
436  if(samplesleft > 10) {
437 
438 
439  //Calculate how many to scan through--we only have to go through (at most)
440  //the first window + 1 samples--but we need another window samples to draw from.
441  size_t remaining = 2*WindowSizeInt+1;
442 
443  //To speed things up, create a local buffer to store things in, to avoid the costly t.Get();
444  //Only go through the first SilentWindowSizeInt samples, and choose the first that trips the key.
445  Floats buffer{ remaining };
446  t.Get((samplePtr)buffer.get(), floatSample,
447  lastsubthresholdsample, remaining);
448 
449  //Initialize these trend markers atrend and ztrend. They keep track of the
450  //up/down trends at the start and end of the evaluation window.
451  int atrend = sgn(buffer[1]-buffer[0]);
452  int ztrend = sgn(buffer[WindowSizeInt+1]-buffer[WindowSizeInt]);
453 
454 
455  double erg=0;
456  double sc=0;
457  double dc=0;
458 
459  //Get initial test statistic values.
460  if(mUseEnergy)
461  erg = TestEnergy(t, lastsubthresholdsample, WindowSizeInt);
463  sc = TestSignChanges(t,lastsubthresholdsample, WindowSizeInt);
465  dc = TestDirectionChanges(t,lastsubthresholdsample,WindowSizeInt);
466 
467  //Now, go through the sound again, sample by sample.
468  size_t i;
469  for(i = 0; i < SilentWindowSizeInt - WindowSizeInt; i++) {
470  int tests = 0;
471  int testThreshold = 0;
472  //Update the test statistics
473  if(mUseEnergy)
474  {
475  TestEnergyUpdate(erg, WindowSizeInt,buffer[i],buffer[i+WindowSizeInt+1]);
476  tests += (int)(erg>mThresholdEnergy);
477  testThreshold++;
478  }
480  {
481  TestSignChangesUpdate(sc,WindowSizeInt,buffer[i],buffer[i+1],buffer[i+WindowSizeInt],buffer[i+WindowSizeInt+1]);
482  tests += (int)(sc < mThresholdSignChangesLower);
483  testThreshold++;
484  }
486  {
487  TestSignChangesUpdate(sc,WindowSizeInt,buffer[i],buffer[i+1],buffer[i+WindowSizeInt],buffer[i+WindowSizeInt+1]);
488  tests += (int)(sc > mThresholdSignChangesUpper);
489  testThreshold++;
490  }
492  {
493  TestDirectionChangesUpdate(dc,WindowSizeInt,atrend,buffer[i],buffer[i+1],ztrend,buffer[i+WindowSizeInt],buffer[i+WindowSizeInt+1]);
494  tests += (int)(dc < mThresholdDirectionChangesLower);
495  testThreshold++;
496  }
498  {
499  TestDirectionChangesUpdate(dc,WindowSizeInt,atrend,buffer[i],buffer[i+1],ztrend,buffer[i+WindowSizeInt],buffer[i+WindowSizeInt+1]);
500  tests += (int)(dc > mThresholdDirectionChangesUpper);
501  testThreshold++;
502  }
503 
504  if(tests < testThreshold)
505  { //Finish off on the first below-threshold block
506  break;
507  }
508  }
509 
510  //When we get here, i+lastsubthresholdsample is the best guess for where the word starts
511  return i + lastsubthresholdsample;
512  }
513  else {
514  //If we failed to find anything, return the start position
515  return start ;
516  }
517  }
518 }
double TestEnergy(const WaveTrack &t, sampleCount start, sampleCount len)
Definition: VoiceKey.cpp:849
double as_double() const
Definition: Types.h:88
int AudacityMessageBox(const wxString &message, const wxString &caption=AudacityMessageBoxCaptionStr(), long style=wxOK|wxCENTRE, wxWindow *parent=NULL, int x=wxDefaultCoord, int y=wxDefaultCoord)
Definition: ErrorDialog.h:92
double mThresholdDirectionChangesUpper
Definition: VoiceKey.h:69
bool AboveThreshold(const WaveTrack &t, sampleCount start, sampleCount len)
Definition: VoiceKey.cpp:661
void TestEnergyUpdate(double &prevErg, int length, const float &drop, const float &add)
Definition: VoiceKey.cpp:882
bool mUseSignChangesLow
Definition: VoiceKey.h:73
size_t limitSampleBufferSize(size_t bufferSize, sampleCount limit)
Definition: Types.h:178
double mSilentWindowSize
Definition: VoiceKey.h:79
double mWindowSize
Definition: VoiceKey.h:54
bool mUseDirectionChangesLow
Definition: VoiceKey.h:75
double mThresholdSignChangesUpper
Definition: VoiceKey.h:67
double TestDirectionChanges(const WaveTrack &t, sampleCount start, sampleCount len)
Definition: VoiceKey.cpp:946
bool mUseDirectionChangesHigh
Definition: VoiceKey.h:76
bool mUseEnergy
Definition: VoiceKey.h:72
char * samplePtr
Definition: Types.h:203
bool mUseSignChangesHigh
Definition: VoiceKey.h:74
double TestSignChanges(const WaveTrack &t, sampleCount start, sampleCount len)
Definition: VoiceKey.cpp:890
_("Move Track &Down")+wxT("\t")+(GetActiveProject() -> GetCommandManager() ->GetKeyFromName(wxT("TrackMoveDown")).Raw()), OnMoveTrack) POPUP_MENU_ITEM(OnMoveTopID, _("Move Track to &Top")+wxT("\t")+(GetActiveProject() ->GetCommandManager() ->GetKeyFromName(wxT("TrackMoveTop")).Raw()), OnMoveTrack) POPUP_MENU_ITEM(OnMoveBottomID, _("Move Track to &Bottom")+wxT("\t")+(GetActiveProject() ->GetCommandManager() ->GetKeyFromName(wxT("TrackMoveBottom")).Raw()), OnMoveTrack)#define SET_TRACK_NAME_PLUGIN_SYMBOLclass SetTrackNameCommand:public AudacityCommand
double mThresholdDirectionChangesLower
Definition: VoiceKey.h:68
double mThresholdSignChangesLower
Definition: VoiceKey.h:66
void TestDirectionChangesUpdate(double &currentdirectionchanges, int length, int &atrend, const float &a1, const float &a2, int &ztrend, const float &z1, const float &z2)
Definition: VoiceKey.cpp:995
double GetRate() const
Definition: WaveTrack.cpp:398
bool Get(samplePtr buffer, sampleFormat format, sampleCount start, size_t len, fillFormat fill=fillZero, bool mayThrow=true, sampleCount *pNumCopied=nullptr) const
Definition: WaveTrack.cpp:1971
int sgn(int number)
Definition: VoiceKey.h:98
double mThresholdEnergy
Definition: VoiceKey.h:65
void TestSignChangesUpdate(double &currentsignchanges, int length, const float &a1, const float &a2, const float &z1, const float &z2)
Definition: VoiceKey.cpp:933
sampleCount VoiceKey::OnBackward ( const WaveTrack t,
sampleCount  start,
sampleCount  len 
)

Definition at line 240 of file VoiceKey.cpp.

References _(), AboveThreshold(), AudacityMessageBox(), floatSample, WaveTrack::Get(), WaveTrack::GetRate(), limitSampleBufferSize(), mSilentWindowSize, mThresholdDirectionChangesLower, mThresholdDirectionChangesUpper, mThresholdEnergy, mThresholdSignChangesLower, mThresholdSignChangesUpper, mUseDirectionChangesHigh, mUseDirectionChangesLow, mUseEnergy, mUseSignChangesHigh, mUseSignChangesLow, mWindowSize, sgn(), TestDirectionChanges(), TestDirectionChangesUpdate(), TestEnergy(), TestEnergyUpdate(), TestSignChanges(), and TestSignChangesUpdate().

242 {
243 
244 
245  if((mWindowSize) >= (len + 10).as_double() ){
246 
247  AudacityMessageBox(_("Selection is too small to use voice key."));
248  return end;
249  }
250  else {
251 
252  //Change the millisecond-based parameters into sample-based parameters
253  double rate = t.GetRate(); //Translates seconds to samples
254  size_t WindowSizeInt = (rate * mWindowSize); //Size of window to examine
255  //unsigned int SilentWindowSizeInt = (unsigned int)(rate * mSilentWindowSize); //This much signal is necessary to trip key
256 
257  auto samplesleft = len - WindowSizeInt; //Indexes the number of samples remaining in the selection
258  auto lastsubthresholdsample = end; //start this off at the end
259  // keeps track of the sample number of the last sample to not exceed the threshold
260 
261  int blockruns=0; //keeps track of the number of consecutive above-threshold blocks
262 
263 
264  //This loop goes through the selection a block at a time in reverse order. If a long enough run
265  //of above-threshold blocks occur, we return to the last sub-threshold block and
266  //go through one sample at a time.
267  //If there are fewer than 10 samples leftover, don't bother.
268  for(auto i = end - WindowSizeInt; samplesleft >= 10;
269  i -= (WindowSizeInt - 1) , samplesleft -= (WindowSizeInt - 1)) {
270 
271  //Set blocksize so that it is the right size
272 
273  const auto blocksize = limitSampleBufferSize( WindowSizeInt, samplesleft);
274 
275 
276  //Test whether we are above threshold
277  if(AboveThreshold(t,i,blocksize))
278  {
279  blockruns++; //Hit
280  }
281  else
282  {
283  blockruns=0; //Miss--start over
284  lastsubthresholdsample = i+WindowSizeInt;
285  }
286 
287  //If the blockrun is long enough, break out of the loop early:
288  if(blockruns > mSilentWindowSize/mWindowSize)
289  break;
290 
291  }
292 
293  //Now, if we broke out early (samplesleft > 10), go back to the lastsubthresholdsample and look more carefully
294  if(samplesleft > 10) {
295 
296  //Calculate how many to scan through--we only have to go through (at most)
297  //the first window + 1 samples--but we need another window samples to draw from.
298  size_t remaining = 2*WindowSizeInt+1;
299 
300  //To speed things up, create a local buffer to store things in, to avoid the costly t.Get();
301  //Only go through the first mSilentWindowSizeInt samples, and choose the first that trips the key.
302  Floats buffer{ remaining };
303  t.Get((samplePtr)buffer.get(), floatSample,
304  lastsubthresholdsample - remaining, remaining);
305 
306  //Initialize these trend markers atrend and ztrend. They keep track of the
307  //up/down trends at the start and end of the evaluation window.
308  int atrend = sgn(buffer[remaining - 2]-buffer[remaining - 1]);
309 
310  int ztrend = sgn(buffer[remaining - WindowSizeInt - 2] -
311  buffer[remaining - WindowSizeInt
312  // PVS-Studio detected a probable error here
313  // when it read - 2.
314  // is - 1 correct?
315  // This code is unused. I didn't study further.
316  - 1
317  ]);
318 
319  double erg=0;
320  double sc = 0;
321  double dc = 0;
322 
323  //Get initial test statistic values.
324  if(mUseEnergy)
325  erg = TestEnergy(t, lastsubthresholdsample, WindowSizeInt);
327  sc = TestSignChanges(t,lastsubthresholdsample, WindowSizeInt);
329  dc = TestDirectionChanges(t,lastsubthresholdsample,WindowSizeInt);
330 
331  //Now, go through the sound again, sample by sample.
332  size_t i;
333  for(i = remaining - 1; i > WindowSizeInt; i--) {
334  int tests = 0;
335  int testThreshold = 0;
336  //Update the test statistics
337  if(mUseEnergy)
338  {
339  TestEnergyUpdate(erg, WindowSizeInt,buffer[i],buffer[i+WindowSizeInt+1]);
340  tests += (int)(erg>mThresholdEnergy);
341  testThreshold++;
342  }
344  {
345  TestSignChangesUpdate(sc,WindowSizeInt,buffer[i],buffer[i+1],buffer[i+WindowSizeInt],buffer[i+WindowSizeInt+1]);
346  tests += (int)(sc < mThresholdSignChangesLower);
347  testThreshold++;
348  }
350  {
351  TestSignChangesUpdate(sc,WindowSizeInt,buffer[i],buffer[i+1],buffer[i+WindowSizeInt],buffer[i+WindowSizeInt+1]);
352  tests += (int)(sc > mThresholdSignChangesUpper);
353  testThreshold++;
354  }
356  {
357  TestDirectionChangesUpdate(dc,WindowSizeInt,atrend,buffer[i],buffer[i+1],ztrend,buffer[i+WindowSizeInt],buffer[i+WindowSizeInt+1]);
358  tests += (int)(dc < mThresholdDirectionChangesLower);
359  testThreshold++;
360  }
362  {
363  TestDirectionChangesUpdate(dc,WindowSizeInt,atrend,buffer[i],buffer[i+1],ztrend,buffer[i+WindowSizeInt],buffer[i+WindowSizeInt+1]);
364  tests += (int)(dc > mThresholdDirectionChangesUpper);
365  testThreshold++;
366  }
367 
368  if(tests >= testThreshold)
369  { //Finish off on the first hit
370  break;
371  }
372  }
373 
374  //When we get here, i+lastsubthresholdsample is the best guess for where the word starts
375  return lastsubthresholdsample - remaining + i;
376  }
377  else {
378  //If we failed to find anything, return the start position
379  return end ;
380  }
381  }
382 }
double TestEnergy(const WaveTrack &t, sampleCount start, sampleCount len)
Definition: VoiceKey.cpp:849
int AudacityMessageBox(const wxString &message, const wxString &caption=AudacityMessageBoxCaptionStr(), long style=wxOK|wxCENTRE, wxWindow *parent=NULL, int x=wxDefaultCoord, int y=wxDefaultCoord)
Definition: ErrorDialog.h:92
double mThresholdDirectionChangesUpper
Definition: VoiceKey.h:69
bool AboveThreshold(const WaveTrack &t, sampleCount start, sampleCount len)
Definition: VoiceKey.cpp:661
void TestEnergyUpdate(double &prevErg, int length, const float &drop, const float &add)
Definition: VoiceKey.cpp:882
bool mUseSignChangesLow
Definition: VoiceKey.h:73
size_t limitSampleBufferSize(size_t bufferSize, sampleCount limit)
Definition: Types.h:178
double mSilentWindowSize
Definition: VoiceKey.h:79
double mWindowSize
Definition: VoiceKey.h:54
bool mUseDirectionChangesLow
Definition: VoiceKey.h:75
double mThresholdSignChangesUpper
Definition: VoiceKey.h:67
double TestDirectionChanges(const WaveTrack &t, sampleCount start, sampleCount len)
Definition: VoiceKey.cpp:946
bool mUseDirectionChangesHigh
Definition: VoiceKey.h:76
bool mUseEnergy
Definition: VoiceKey.h:72
char * samplePtr
Definition: Types.h:203
bool mUseSignChangesHigh
Definition: VoiceKey.h:74
double TestSignChanges(const WaveTrack &t, sampleCount start, sampleCount len)
Definition: VoiceKey.cpp:890
_("Move Track &Down")+wxT("\t")+(GetActiveProject() -> GetCommandManager() ->GetKeyFromName(wxT("TrackMoveDown")).Raw()), OnMoveTrack) POPUP_MENU_ITEM(OnMoveTopID, _("Move Track to &Top")+wxT("\t")+(GetActiveProject() ->GetCommandManager() ->GetKeyFromName(wxT("TrackMoveTop")).Raw()), OnMoveTrack) POPUP_MENU_ITEM(OnMoveBottomID, _("Move Track to &Bottom")+wxT("\t")+(GetActiveProject() ->GetCommandManager() ->GetKeyFromName(wxT("TrackMoveBottom")).Raw()), OnMoveTrack)#define SET_TRACK_NAME_PLUGIN_SYMBOLclass SetTrackNameCommand:public AudacityCommand
double mThresholdDirectionChangesLower
Definition: VoiceKey.h:68
double mThresholdSignChangesLower
Definition: VoiceKey.h:66
void TestDirectionChangesUpdate(double &currentdirectionchanges, int length, int &atrend, const float &a1, const float &a2, int &ztrend, const float &z1, const float &z2)
Definition: VoiceKey.cpp:995
double GetRate() const
Definition: WaveTrack.cpp:398
bool Get(samplePtr buffer, sampleFormat format, sampleCount start, size_t len, fillFormat fill=fillZero, bool mayThrow=true, sampleCount *pNumCopied=nullptr) const
Definition: WaveTrack.cpp:1971
int sgn(int number)
Definition: VoiceKey.h:98
double mThresholdEnergy
Definition: VoiceKey.h:65
void TestSignChangesUpdate(double &currentsignchanges, int length, const float &a1, const float &a2, const float &z1, const float &z2)
Definition: VoiceKey.cpp:933
sampleCount VoiceKey::OnForward ( const WaveTrack t,
sampleCount  start,
sampleCount  len 
)

Definition at line 88 of file VoiceKey.cpp.

References _(), AboveThreshold(), AudacityMessageBox(), floatSample, WaveTrack::Get(), WaveTrack::GetRate(), limitSampleBufferSize(), mSignalWindowSize, mThresholdDirectionChangesLower, mThresholdDirectionChangesUpper, mThresholdEnergy, mThresholdSignChangesLower, mThresholdSignChangesUpper, mUseDirectionChangesHigh, mUseDirectionChangesLow, mUseEnergy, mUseSignChangesHigh, mUseSignChangesLow, mWindowSize, sgn(), TestDirectionChanges(), TestDirectionChangesUpdate(), TestEnergy(), TestEnergyUpdate(), TestSignChanges(), and TestSignChangesUpdate().

90 {
91 
92  if((mWindowSize) >= (len + 10).as_double() ){
93 
94  /* i18n-hint: Voice key is an experimental/incomplete feature that
95  is used to navigate in vocal recordings, to move forwards and
96  backwards by words. So 'key' is being used in the sense of an index.
97  This error message means that you've selected too short
98  a region of audio to be able to use this feature.*/
99  AudacityMessageBox(_("Selection is too small to use voice key."));
100  return start;
101  }
102  else {
103 
104  //Change the millisecond-based parameters into sample-based parameters
105  double rate = t.GetRate(); //Translates seconds to samples
106  size_t WindowSizeInt = (rate * mWindowSize); //Size of window to examine
107  size_t SignalWindowSizeInt = (rate * mSignalWindowSize); //This much signal is necessary to trip key
108 
109  auto samplesleft = len - WindowSizeInt; //Indexes the number of samples remaining in the selection
110  auto lastsubthresholdsample = start; //start this off at the selection start
111  // keeps track of the sample number of the last sample to not exceed the threshold
112 
113  int blockruns=0; //keeps track of the number of consecutive above-threshold blocks
114 
115 
116  //This loop goes through the selection a block at a time. If a long enough run
117  //of above-threshold blocks occur, we return to the last sub-threshold block and
118  //go through one sample at a time.
119  //If there are fewer than 10 samples leftover, don't bother.
120 
121  for(auto i = start; samplesleft >= 10;
122  i += (WindowSizeInt - 1) , samplesleft -= (WindowSizeInt - 1)) {
123 
124  //Set blocksize so that it is the right size
125  const auto blocksize = limitSampleBufferSize( WindowSizeInt, samplesleft);
126 
127  //Test whether we are above threshold (the number of stats)
128  if(AboveThreshold(t,i,blocksize))
129  {
130  blockruns++; //Hit
131  } else {
132  blockruns=0; //Miss--start over
133  lastsubthresholdsample = i;
134  }
135 
136  //If the blockrun is long enough, break out of the loop early:
137  if(blockruns > mSignalWindowSize/mWindowSize)
138  break;
139 
140  }
141 
142  //Now, if we broke out early (samplesleft > 10), go back to the lastsubthresholdsample and look more carefully
143  if(samplesleft > 10) {
144 
145 
146  //Calculate how many to scan through--we only have to go through (at most)
147  //the first window + 1 samples--but we need another window samples to draw from.
148  size_t remaining = 2*WindowSizeInt+1;
149 
150  //To speed things up, create a local buffer to store things in, to avoid the costly t.Get();
151  //Only go through the first SignalWindowSizeInt samples, and choose the first that trips the key.
152  Floats buffer{ remaining };
153  t.Get((samplePtr)buffer.get(), floatSample,
154  lastsubthresholdsample, remaining);
155 
156 
157 
158  //Initialize these trend markers atrend and ztrend. They keep track of the
159  //up/down trends at the start and end of the evaluation window.
160  int atrend = sgn(buffer[1]-buffer[0]);
161  int ztrend = sgn(buffer[WindowSizeInt+1]-buffer[WindowSizeInt]);
162 
163 
164  double erg=0;
165  double sc=0;
166  double dc=0;
167 
168  //Get initial test statistic values.
169  if(mUseEnergy)
170  erg = TestEnergy(t, lastsubthresholdsample, WindowSizeInt);
171 
173  sc = TestSignChanges(t,lastsubthresholdsample, WindowSizeInt);
174 
176  dc = TestDirectionChanges(t,lastsubthresholdsample,WindowSizeInt);
177 
178 
179  //Now, go through the sound again, sample by sample.
180  wxASSERT(WindowSizeInt < SignalWindowSizeInt);
181  size_t i;
182  for(i = 0; i + WindowSizeInt < SignalWindowSizeInt; i++) {
183 
184  int tests = 0;
185  int testThreshold = 0;
186  //Update the test statistics
187  if(mUseEnergy)
188  {
189  TestEnergyUpdate(erg, WindowSizeInt,buffer[i],buffer[i+WindowSizeInt+1]);
190  tests += (int)(erg>mThresholdEnergy);
191  testThreshold++;
192  }
194  {
195  TestSignChangesUpdate(sc,WindowSizeInt,buffer[i],buffer[i+1],buffer[i+WindowSizeInt],buffer[i+WindowSizeInt+1]);
196  tests += (int)(sc < mThresholdSignChangesLower);
197  testThreshold++;
198  }
199 
201  {
202  TestSignChangesUpdate(sc,WindowSizeInt,buffer[i],buffer[i+1],buffer[i+WindowSizeInt],buffer[i+WindowSizeInt+1]);
203  tests += (int)(sc > mThresholdSignChangesUpper);
204  testThreshold++;
205  }
206 
208  {
209  TestDirectionChangesUpdate(dc,WindowSizeInt,atrend,buffer[i],buffer[i+1],ztrend,buffer[i+WindowSizeInt],buffer[i+WindowSizeInt+1]);
210  tests += (int)(dc < mThresholdDirectionChangesLower);
211  testThreshold++;
212  }
213 
215  {
216  TestDirectionChangesUpdate(dc,WindowSizeInt,atrend,buffer[i],buffer[i+1],ztrend,buffer[i+WindowSizeInt],buffer[i+WindowSizeInt+1]);
217  tests += (int)(dc > mThresholdDirectionChangesUpper);
218  testThreshold++;
219  }
220 
221 
222 
223  if(tests >= testThreshold)
224  { //Finish off on the first hit
225  break;
226  }
227  }
228 
229  //When we get here, i+lastsubthresholdsample is the best guess for where the word starts
230  return i + lastsubthresholdsample;
231  }
232  else {
233  //If we failed to find anything, return the start position
234  return start ;
235  }
236  }
237 }
double TestEnergy(const WaveTrack &t, sampleCount start, sampleCount len)
Definition: VoiceKey.cpp:849
int AudacityMessageBox(const wxString &message, const wxString &caption=AudacityMessageBoxCaptionStr(), long style=wxOK|wxCENTRE, wxWindow *parent=NULL, int x=wxDefaultCoord, int y=wxDefaultCoord)
Definition: ErrorDialog.h:92
double mThresholdDirectionChangesUpper
Definition: VoiceKey.h:69
bool AboveThreshold(const WaveTrack &t, sampleCount start, sampleCount len)
Definition: VoiceKey.cpp:661
void TestEnergyUpdate(double &prevErg, int length, const float &drop, const float &add)
Definition: VoiceKey.cpp:882
bool mUseSignChangesLow
Definition: VoiceKey.h:73
double mSignalWindowSize
Definition: VoiceKey.h:80
size_t limitSampleBufferSize(size_t bufferSize, sampleCount limit)
Definition: Types.h:178
double mWindowSize
Definition: VoiceKey.h:54
bool mUseDirectionChangesLow
Definition: VoiceKey.h:75
double mThresholdSignChangesUpper
Definition: VoiceKey.h:67
double TestDirectionChanges(const WaveTrack &t, sampleCount start, sampleCount len)
Definition: VoiceKey.cpp:946
bool mUseDirectionChangesHigh
Definition: VoiceKey.h:76
bool mUseEnergy
Definition: VoiceKey.h:72
char * samplePtr
Definition: Types.h:203
bool mUseSignChangesHigh
Definition: VoiceKey.h:74
double TestSignChanges(const WaveTrack &t, sampleCount start, sampleCount len)
Definition: VoiceKey.cpp:890
_("Move Track &Down")+wxT("\t")+(GetActiveProject() -> GetCommandManager() ->GetKeyFromName(wxT("TrackMoveDown")).Raw()), OnMoveTrack) POPUP_MENU_ITEM(OnMoveTopID, _("Move Track to &Top")+wxT("\t")+(GetActiveProject() ->GetCommandManager() ->GetKeyFromName(wxT("TrackMoveTop")).Raw()), OnMoveTrack) POPUP_MENU_ITEM(OnMoveBottomID, _("Move Track to &Bottom")+wxT("\t")+(GetActiveProject() ->GetCommandManager() ->GetKeyFromName(wxT("TrackMoveBottom")).Raw()), OnMoveTrack)#define SET_TRACK_NAME_PLUGIN_SYMBOLclass SetTrackNameCommand:public AudacityCommand
double mThresholdDirectionChangesLower
Definition: VoiceKey.h:68
double mThresholdSignChangesLower
Definition: VoiceKey.h:66
void TestDirectionChangesUpdate(double &currentdirectionchanges, int length, int &atrend, const float &a1, const float &a2, int &ztrend, const float &z1, const float &z2)
Definition: VoiceKey.cpp:995
double GetRate() const
Definition: WaveTrack.cpp:398
bool Get(samplePtr buffer, sampleFormat format, sampleCount start, size_t len, fillFormat fill=fillZero, bool mayThrow=true, sampleCount *pNumCopied=nullptr) const
Definition: WaveTrack.cpp:1971
int sgn(int number)
Definition: VoiceKey.h:98
double mThresholdEnergy
Definition: VoiceKey.h:65
void TestSignChangesUpdate(double &currentsignchanges, int length, const float &a1, const float &a2, const float &z1, const float &z2)
Definition: VoiceKey.cpp:933
void VoiceKey::SetKeyType ( bool  erg,
bool  scLow,
bool  scHigh,
bool  dcLow,
bool  dcHigh 
)

Definition at line 837 of file VoiceKey.cpp.

References mUseDirectionChangesHigh, mUseDirectionChangesLow, mUseEnergy, mUseSignChangesHigh, and mUseSignChangesLow.

839 {
840  mUseEnergy = erg;
841  mUseSignChangesLow = scLow;
842  mUseSignChangesHigh = scHigh;
843  mUseDirectionChangesLow = dcLow;
844  mUseDirectionChangesHigh = dcHigh;
845 }
bool mUseSignChangesLow
Definition: VoiceKey.h:73
bool mUseDirectionChangesLow
Definition: VoiceKey.h:75
bool mUseDirectionChangesHigh
Definition: VoiceKey.h:76
bool mUseEnergy
Definition: VoiceKey.h:72
bool mUseSignChangesHigh
Definition: VoiceKey.h:74
double VoiceKey::TestDirectionChanges ( const WaveTrack t,
sampleCount  start,
sampleCount  len 
)
private

Definition at line 946 of file VoiceKey.cpp.

References sampleCount::as_double(), floatSample, WaveTrack::Get(), WaveTrack::GetBestBlockSize(), WaveTrack::GetMaxBlockSize(), limitSampleBufferSize(), and sgn().

Referenced by AboveThreshold(), CalibrateNoise(), OffBackward(), OffForward(), OnBackward(), and OnForward().

948 {
949 
950 
951  auto s = start; //Keep track of start
952  auto originalLen = len; //Keep track of the length of block to process (its not the length of t)
953  const auto blockSize = limitSampleBufferSize(
954  t.GetMaxBlockSize(), len); //Determine size of sampling buffer
955  unsigned long directionchanges = 1;
956  float lastval=float(0);
957  int lastdirection=1;
958 
959  Floats buffer{ blockSize }; //Get a sampling buffer
960 
961  while(len > 0) {
962  //Figure out how much to grab
963  auto block = limitSampleBufferSize ( t.GetBestBlockSize(s), len );
964 
965  t.Get((samplePtr)buffer.get(), floatSample, s, block); //grab the block;
966 
967  if (len == originalLen) {
968  //The first time through, set stuff up special.
969  lastval = buffer[0];
970  }
971 
972  //Now, go through the block and calculate zero crossings
973 
974 
975  for(decltype(block) i = 0; i< block; i++){
976 
977  if( sgn(buffer[i]-lastval) != lastdirection) {
978  directionchanges++;
979  lastdirection = sgn(buffer[i] - lastval);
980  }
981  lastval = buffer[i];
982 
983  }
984  len -= block;
985  s += block;
986  }
987  return (double)directionchanges/originalLen.as_double();
988 }
double as_double() const
Definition: Types.h:88
size_t GetBestBlockSize(sampleCount t) const
Definition: WaveTrack.cpp:1607
size_t limitSampleBufferSize(size_t bufferSize, sampleCount limit)
Definition: Types.h:178
char * samplePtr
Definition: Types.h:203
size_t GetMaxBlockSize() const
Definition: WaveTrack.cpp:1625
bool Get(samplePtr buffer, sampleFormat format, sampleCount start, size_t len, fillFormat fill=fillZero, bool mayThrow=true, sampleCount *pNumCopied=nullptr) const
Definition: WaveTrack.cpp:1971
int sgn(int number)
Definition: VoiceKey.h:98
void VoiceKey::TestDirectionChangesUpdate ( double &  currentdirectionchanges,
int  length,
int &  atrend,
const float &  a1,
const float &  a2,
int &  ztrend,
const float &  z1,
const float &  z2 
)
private

Definition at line 995 of file VoiceKey.cpp.

References sgn().

Referenced by OffBackward(), OffForward(), OnBackward(), and OnForward().

998 {
999 
1000  if(sgn(a2 - a1)!= atrend ) {
1001  //Here, the direction shifted for the item we're dropping.
1002  currentdirectionchanges -= 1.0/len;
1003  atrend = sgn(a2-a1);
1004  }
1005  if(sgn(z2 - z1)!= ztrend){
1006  //Here, the direction shifts when we add an item
1007  currentdirectionchanges += 1.0/len;
1008  ztrend = sgn(z2-z1);
1009  }
1010 
1011 }
int sgn(int number)
Definition: VoiceKey.h:98
double VoiceKey::TestEnergy ( const WaveTrack t,
sampleCount  start,
sampleCount  len 
)
private

Definition at line 849 of file VoiceKey.cpp.

References sampleCount::as_double(), floatSample, WaveTrack::Get(), WaveTrack::GetBestBlockSize(), WaveTrack::GetMaxBlockSize(), and limitSampleBufferSize().

Referenced by AboveThreshold(), CalibrateNoise(), OffBackward(), OffForward(), OnBackward(), and OnForward().

851 {
852 
853  double sum = 1;
854  auto s = start; //Keep track of start
855  auto originalLen = len; //Keep track of the length of block to process (its not the length of t)
856  const auto blockSize = limitSampleBufferSize(
857  t.GetMaxBlockSize(), len); //Determine size of sampling buffer
858  Floats buffer{ blockSize }; //Get a sampling buffer
859 
860  while(len > 0)
861  {
862  //Figure out how much to grab
863  auto block = limitSampleBufferSize ( t.GetBestBlockSize(s), len );
864 
865  t.Get((samplePtr)buffer.get(), floatSample, s,block); //grab the block;
866 
867  //Now, go through the block and calculate energy
868  for(decltype(block) i = 0; i< block; i++)
869  {
870  sum += buffer[i]*buffer[i];
871  }
872 
873  len -= block;
874  s += block;
875  }
876 
877  return sum / originalLen.as_double();
878 }
double as_double() const
Definition: Types.h:88
size_t GetBestBlockSize(sampleCount t) const
Definition: WaveTrack.cpp:1607
size_t limitSampleBufferSize(size_t bufferSize, sampleCount limit)
Definition: Types.h:178
char * samplePtr
Definition: Types.h:203
size_t GetMaxBlockSize() const
Definition: WaveTrack.cpp:1625
bool Get(samplePtr buffer, sampleFormat format, sampleCount start, size_t len, fillFormat fill=fillZero, bool mayThrow=true, sampleCount *pNumCopied=nullptr) const
Definition: WaveTrack.cpp:1971
void VoiceKey::TestEnergyUpdate ( double &  prevErg,
int  length,
const float &  drop,
const float &  add 
)
private

Definition at line 882 of file VoiceKey.cpp.

Referenced by OffBackward(), OffForward(), OnBackward(), and OnForward().

883 {
884  //This is an updating formula for RMSE. It will only recalculate what's changed.
885  prevErg = prevErg + (double)(fabs(add) - fabs(drop))/len;
886 
887 }
double VoiceKey::TestSignChanges ( const WaveTrack t,
sampleCount  start,
sampleCount  len 
)
private

Definition at line 890 of file VoiceKey.cpp.

References sampleCount::as_double(), floatSample, WaveTrack::Get(), WaveTrack::GetBestBlockSize(), WaveTrack::GetMaxBlockSize(), limitSampleBufferSize(), and sgn().

Referenced by AboveThreshold(), CalibrateNoise(), OffBackward(), OffForward(), OnBackward(), and OnForward().

892 {
893 
894 
895  auto s = start; //Keep track of start
896  auto originalLen = len; //Keep track of the length of block to process (its not the length of t)
897  const auto blockSize = limitSampleBufferSize(
898  t.GetMaxBlockSize(), len); //Determine size of sampling buffer
899  unsigned long signchanges = 1;
900  int currentsign=0;
901 
902  Floats buffer{ blockSize }; //Get a sampling buffer
903 
904  while(len > 0) {
905  //Figure out how much to grab
906  auto block = limitSampleBufferSize ( t.GetBestBlockSize(s), len );
907 
908  t.Get((samplePtr)buffer.get(), floatSample, s, block); //grab the block;
909 
910  if (len == originalLen)
911  {
912  //The first time through, set stuff up special.
913  currentsign = sgn(buffer[0]);
914  }
915 
916  //Now, go through the block and calculate zero crossings
917 
918  for(decltype(block) i = 0; i< block; i++)
919  {
920  if( sgn(buffer[i]) != currentsign)
921  {
922  currentsign = sgn(buffer[i]);
923  signchanges++;
924  }
925 
926  }
927  len -= block;
928  s += block;
929  }
930  return (double)signchanges / originalLen.as_double();
931 }
double as_double() const
Definition: Types.h:88
size_t GetBestBlockSize(sampleCount t) const
Definition: WaveTrack.cpp:1607
size_t limitSampleBufferSize(size_t bufferSize, sampleCount limit)
Definition: Types.h:178
char * samplePtr
Definition: Types.h:203
size_t GetMaxBlockSize() const
Definition: WaveTrack.cpp:1625
bool Get(samplePtr buffer, sampleFormat format, sampleCount start, size_t len, fillFormat fill=fillZero, bool mayThrow=true, sampleCount *pNumCopied=nullptr) const
Definition: WaveTrack.cpp:1971
int sgn(int number)
Definition: VoiceKey.h:98
void VoiceKey::TestSignChangesUpdate ( double &  currentsignchanges,
int  length,
const float &  a1,
const float &  a2,
const float &  z1,
const float &  z2 
)
private

Definition at line 933 of file VoiceKey.cpp.

References sgn().

Referenced by OffBackward(), OffForward(), OnBackward(), and OnForward().

938 {
939 
940  if(sgn(a1)!=sgn(a2)) currentsignchanges -= 1.0/len;
941  if(sgn(z1)!=sgn(z2)) currentsignchanges += 1.0/len;
942 
943 }
int sgn(int number)
Definition: VoiceKey.h:98

Member Data Documentation

double VoiceKey::mDirectionChangesMean
private

Definition at line 62 of file VoiceKey.h.

Referenced by AdjustThreshold(), CalibrateNoise(), and VoiceKey().

double VoiceKey::mDirectionChangesSD
private

Definition at line 63 of file VoiceKey.h.

Referenced by AdjustThreshold(), CalibrateNoise(), and VoiceKey().

double VoiceKey::mEnergyMean
private

Definition at line 58 of file VoiceKey.h.

Referenced by AdjustThreshold(), CalibrateNoise(), and VoiceKey().

double VoiceKey::mEnergySD
private

Definition at line 59 of file VoiceKey.h.

Referenced by AdjustThreshold(), CalibrateNoise(), and VoiceKey().

double VoiceKey::mSignalWindowSize
private

Definition at line 80 of file VoiceKey.h.

Referenced by OnForward(), and VoiceKey().

double VoiceKey::mSignChangesMean
private

Definition at line 60 of file VoiceKey.h.

Referenced by AdjustThreshold(), CalibrateNoise(), and VoiceKey().

double VoiceKey::mSignChangesSD
private

Definition at line 61 of file VoiceKey.h.

Referenced by AdjustThreshold(), CalibrateNoise(), and VoiceKey().

double VoiceKey::mSilentWindowSize
private

Definition at line 79 of file VoiceKey.h.

Referenced by OffBackward(), OffForward(), OnBackward(), and VoiceKey().

double VoiceKey::mThresholdAdjustment
private

Definition at line 56 of file VoiceKey.h.

Referenced by AdjustThreshold(), and CalibrateNoise().

double VoiceKey::mThresholdDirectionChangesLower
private
double VoiceKey::mThresholdDirectionChangesUpper
private
double VoiceKey::mThresholdEnergy
private
double VoiceKey::mThresholdSignChangesLower
private
double VoiceKey::mThresholdSignChangesUpper
private
bool VoiceKey::mUseDirectionChangesHigh
private
bool VoiceKey::mUseDirectionChangesLow
private
bool VoiceKey::mUseEnergy
private
bool VoiceKey::mUseSignChangesHigh
private
bool VoiceKey::mUseSignChangesLow
private
double VoiceKey::mWindowSize
private

Definition at line 54 of file VoiceKey.h.

Referenced by CalibrateNoise(), OffBackward(), OffForward(), OnBackward(), OnForward(), and VoiceKey().


The documentation for this class was generated from the following files: