19#ifdef EXPERIMENTAL_EQ_SSE_THREADED
20#include "../Project.h"
24#include "../float_cast.h"
30#include <wx/tooltip.h>
35#include "../RealFFTf48x.h"
59void cpuid(
int CPUInfo[4],
int InfoType){
60 __asm__ __volatile__ (
72bool sMathCapsInitialized =
false;
77int sMathPath=MATH_FUNCTION_SSE|MATH_FUNCTION_THREADED;
79void EffectEqualization48x::SetMathPath(
int mathPath) { sMathPath=mathPath; };
81int EffectEqualization48x::GetMathPath() {
return sMathPath; };
83void EffectEqualization48x::AddMathPathOption(
int mathPath) { sMathPath|=mathPath; };
85void EffectEqualization48x::RemoveMathPathOption(
int mathPath) { sMathPath&=~mathPath; };
87MathCaps *EffectEqualization48x::GetMathCaps()
89 if(!sMathCapsInitialized)
91 sMathCapsInitialized=
true;
92 sMathCaps.x64 =
false;
93 sMathCaps.MMX =
false;
94 sMathCaps.SSE =
false;
95 sMathCaps.SSE2 =
false;
96 sMathCaps.SSE3 =
false;
97 sMathCaps.SSSE3 =
false;
98 sMathCaps.SSE41 =
false;
99 sMathCaps.SSE42 =
false;
100 sMathCaps.SSE4a =
false;
101 sMathCaps.AVX =
false;
102 sMathCaps.XOP =
false;
103 sMathCaps.FMA3 =
false;
104 sMathCaps.FMA4 =
false;
110 cpuid(info, 0x80000000);
111 int nExIds = info[0];
115 cpuid(info,0x00000001);
116 sMathCaps.MMX = (info[3] & ((int)1 << 23)) != 0;
117 sMathCaps.SSE = (info[3] & ((int)1 << 25)) != 0;
118 sMathCaps.SSE2 = (info[3] & ((int)1 << 26)) != 0;
119 sMathCaps.SSE3 = (info[2] & ((int)1 << 0)) != 0;
121 sMathCaps.SSSE3 = (info[2] & ((int)1 << 9)) != 0;
122 sMathCaps.SSE41 = (info[2] & ((int)1 << 19)) != 0;
123 sMathCaps.SSE42 = (info[2] & ((int)1 << 20)) != 0;
125 sMathCaps.AVX = (info[2] & ((int)1 << 28)) != 0;
126 sMathCaps.FMA3 = (info[2] & ((int)1 << 12)) != 0;
129 if (nExIds >= 0x80000001){
130 cpuid(info,0x80000001);
131 sMathCaps.x64 = (info[3] & ((int)1 << 29)) != 0;
132 sMathCaps.SSE4a = (info[2] & ((int)1 << 6)) != 0;
133 sMathCaps.FMA4 = (info[2] & ((int)1 << 16)) != 0;
134 sMathCaps.XOP = (info[2] & ((int)1 << 11)) != 0;
137 sMathPath=MATH_FUNCTION_SSE|MATH_FUNCTION_THREADED;
142void * malloc_simd(
const size_t size)
145 return _aligned_malloc(
size, 16);
146#elif defined __linux__
147 return memalign (16,
size);
148#elif defined __MACH__
155void free_simd::operator() (
void* mem)
const
164EffectEqualization48x::EffectEqualization48x():
165 mThreadCount(0),mFilterSize(0),mWindowSize(0),mBlockSize(0),mWorkerDataCount(0),mBlocksPerBuffer(20),
166 mScratchBufferSize(0),mSubBufferSize(0),mThreaded(false),
167 mBenching(false),mBufferCount(0)
171EffectEqualization48x::~EffectEqualization48x()
175bool EffectEqualization48x::AllocateBuffersWorkers(
int nThreads)
178 FreeBuffersWorkers();
179 mFilterSize=(mEffectEqualization->mM-1)&(~15);
180 mWindowSize=mEffectEqualization->windowSize;
181 wxASSERT(mFilterSize < mWindowSize);
182 mBlockSize=mWindowSize-mFilterSize;
183 auto threadCount = wxThread::GetCPUCount();
184 mThreaded = (nThreads > 0 && threadCount > 0);
187 mThreadCount = threadCount;
188 mWorkerDataCount=mThreadCount+2;
194 mBufferCount=sMathPath&MATH_FUNCTION_AVX?8:4;
206 mScratchBufferSize=mWindowSize*3*
sizeof(float)*mBufferCount;
207 mSubBufferSize=mBlockSize*(mBufferCount*(mBlocksPerBuffer-1));
208 mBigBuffer.reset( (
float *)malloc_simd(
sizeof(
float) * (mSubBufferSize + mFilterSize + mScratchBufferSize) * mWorkerDataCount) );
210 mBufferInfo.reinit(mWorkerDataCount);
211 for(
int i=0;i<mWorkerDataCount;i++) {
212 mBufferInfo[i].mFftWindowSize=mWindowSize;
213 mBufferInfo[i].mFftFilterSize=mFilterSize;
214 mBufferInfo[i].mBufferLength=mBlockSize*mBlocksPerBuffer;
215 mBufferInfo[i].mContiguousBufferSize=mSubBufferSize;
216 mBufferInfo[i].mScratchBuffer=&mBigBuffer[(mSubBufferSize+mScratchBufferSize)*i+mSubBufferSize];
217 for(
int j=0;j<mBufferCount;j++)
218 mBufferInfo[i].mBufferDest[j]=mBufferInfo[i].mBufferSouce[j]=&mBigBuffer[j*(mBufferInfo[i].mBufferLength-mBlockSize)+(mSubBufferSize+mScratchBufferSize)*i];
223 mEQWorkers.reinit(mThreadCount);
224 for(
int i=0;i<mThreadCount;i++) {
225 mEQWorkers[i].SetData( mBufferInfo.get(), mWorkerDataCount, &mDataMutex,
this);
226 mEQWorkers[i].Create();
233bool EffectEqualization48x::FreeBuffersWorkers()
236 for(
int i=0;i<mThreadCount;i++) {
237 mEQWorkers[i].ExitLoop();
239 for(
int i=0;i<mThreadCount;i++) {
240 mEQWorkers[i].Wait();
254#pragma warning(disable: 4702)
258 flags&=~(MATH_FUNCTION_BITREVERSE_TABLE|MATH_FUNCTION_SIN_COS_TABLE);
261 case MATH_FUNCTION_SSE:
262 return ProcessOne4x(count, track, start, len);
264 case MATH_FUNCTION_SSE|MATH_FUNCTION_THREADED:
265 return ProcessOne1x4xThreaded(count, track, start, len);
267 case MATH_FUNCTION_THREADED:
268 case MATH_FUNCTION_THREADED|MATH_FUNCTION_SEGMENTED_CODE:
269 return ProcessOne1x4xThreaded(count, track, start, len, 1);
271 case MATH_FUNCTION_SEGMENTED_CODE:
272 return ProcessOne1x(count, track, start, len);
275 return !mEffectEqualization->ProcessOne(count, track, start, len);
284 mEffectEqualization=effectEqualization;
286 mEffectEqualization->CopyInputTracks();
287 bool bBreakLoop =
false;
291 mEffectEqualization->mM=(mEffectEqualization->mM&(~15))+1;
292 AllocateBuffersWorkers(sMathPath&MATH_FUNCTION_THREADED);
293 auto cleanup =
finally( [&] { FreeBuffersWorkers(); } );
296 mEffectEqualization->mOutputTracks->Selected<
WaveTrack >() ) {
299 double t0 = mEffectEqualization->mT0 < trackStart? trackStart: mEffectEqualization->mT0;
300 double t1 = mEffectEqualization->mT1 > trackEnd? trackEnd: mEffectEqualization->mT1;
305 auto len =
end - start;
306 bBreakLoop=RunFunctionSelect(sMathPath, count, track, start, len);
313 mEffectEqualization->ReplaceProcessedTracks(!bBreakLoop);
317bool EffectEqualization48x::TrackCompare()
319 mEffectEqualization->CopyInputTracks();
320 bool bBreakLoop =
false;
324 mEffectEqualization->mM=(mEffectEqualization->mM&(~15))+1;
325 AllocateBuffersWorkers(sMathPath&MATH_FUNCTION_THREADED);
326 auto cleanup =
finally( [&] { FreeBuffersWorkers(); } );
329 std::vector<const Track*> SecondIMap;
330 std::vector<Track*> SecondOMap;
335 auto &SecondOutputTracks = *pSecondOutputTracks;
338 mEffectEqualization->inputTracks()->Any<
const WaveTrack >()) {
341 if (aTrack->GetSelected() || aTrack->IsSyncLockSelected())
343 auto o = mEffectEqualization->mFactory->DuplicateWaveTrack( *aTrack );
344 SecondIMap.push_back(aTrack);
345 SecondIMap.push_back(o.get());
346 SecondOutputTracks.Add( o );
350 for(
int i = 0; i < 2; i++) {
351 i?sMathPath=sMathPath:sMathPath=0;
354 ( i ? mEffectEqualization->mOutputTracks.get()
355 : &SecondOutputTracks ) -> Selected< WaveTrack >() ) {
358 double t0 = mEffectEqualization->mT0 < trackStart? trackStart: mEffectEqualization->mT0;
359 double t1 = mEffectEqualization->mT1 > trackEnd? trackEnd: mEffectEqualization->mT1;
364 auto len =
end - start;
365 bBreakLoop=RunFunctionSelect(sMathPath, count, track, start, len);
373 auto iter2 = (SecondOutputTracks.Selected<
const WaveTrack >()).first;
374 auto track2 = *iter2;
376 mEffectEqualization->mOutputTracks->Selected<
WaveTrack >() ) {
379 double t0 = mEffectEqualization->mT0 < trackStart? trackStart: mEffectEqualization->mT0;
380 double t1 = mEffectEqualization->mT1 > trackEnd? trackEnd: mEffectEqualization->mT1;
385 auto len =
end - start;
386 DeltaTrack(track, track2, start, len);
390 mEffectEqualization->ReplaceProcessedTracks(!bBreakLoop);
394bool EffectEqualization48x::DeltaTrack(
400 Floats buffer1{ trackBlockSize };
401 Floats buffer2{ trackBlockSize };
405 auto originalLen = len;
406 auto currentSample = start;
412 for(
decltype(curretLength) i=0;i<curretLength;i++)
413 buffer1[i]-=buffer2[i];
415 currentSample+=curretLength;
420 ProcessTail(t, output.get(), start, len);
424#include <wx/stopwatch.h>
428 mEffectEqualization=effectEqualization;
429 mEffectEqualization->CopyInputTracks();
430 bool bBreakLoop =
false;
434 mEffectEqualization->mM=(mEffectEqualization->mM&(~15))+1;
435 AllocateBuffersWorkers(MATH_FUNCTION_THREADED);
436 auto cleanup =
finally( [&] { FreeBuffersWorkers(); } );
437 long times[] = { 0,0,0,0,0 };
440 for(
int i = 0; i < 5 && !bBreakLoop; i++) {
443 case 0: localMathPath=MATH_FUNCTION_SSE|MATH_FUNCTION_THREADED;
447 case 1: localMathPath=MATH_FUNCTION_SSE;
451 case 2: localMathPath=MATH_FUNCTION_SEGMENTED_CODE;
453 case 3: localMathPath=MATH_FUNCTION_THREADED|MATH_FUNCTION_SEGMENTED_CODE;
455 case 4: localMathPath=0;
457 default: localMathPath=-1;
459 if(localMathPath >= 0) {
463 mEffectEqualization->mOutputTracks->Selected<
WaveTrack >() ) {
466 double t0 = mEffectEqualization->mT0 < trackStart? trackStart: mEffectEqualization->mT0;
467 double t1 = mEffectEqualization->mT1 > trackEnd? trackEnd: mEffectEqualization->mT1;
472 auto len =
end - start;
473 bBreakLoop=RunFunctionSelect( localMathPath, count, track, start, len);
479 times[i]=timer.Time();
484 mEffectEqualization->ReplaceProcessedTracks(bBreakLoop);
486 wxTimeSpan tsSSEThreaded(0, 0, 0, times[0]);
487 wxTimeSpan tsSSE(0, 0, 0, times[1]);
488 wxTimeSpan tsDefaultEnhanced(0, 0, 0, times[2]);
489 wxTimeSpan tsDefaultThreaded(0, 0, 0, times[3]);
490 wxTimeSpan tsDefault(0, 0, 0, times[4]);
492 mEffectEqualization->MessageBox(
494"Benchmark times:\nOriginal: %s\nDefault Segmented: %s\nDefault Threaded: %s\nSSE: %s\nSSE Threaded: %s\n")
496 tsDefault.Format(
wxT(
"%M:%S.%l")),
497 tsDefaultEnhanced.Format(
wxT(
"%M:%S.%l")),
498 tsDefaultThreaded.Format(
wxT(
"%M:%S.%l")),
499 tsSSE.Format(
wxT(
"%M:%S.%l")),
500 tsSSEThreaded.Format(
wxT(
"%M:%S.%l")) ) );
519 std::vector<std::pair<double, double> > clipStartEndTimes;
520 std::vector<std::pair<double, double> > clipRealStartEndTimes;
521 for (
const auto &clip: t->GetClips())
526 clipStartT = clip->GetStartTime();
527 clipEndT = clip->GetEndTime();
528 if( clipEndT <= startT )
530 if( clipStartT >= startT + lenT )
534 clipRealStartEndTimes.push_back(std::pair<double,double>(clipStartT,clipEndT));
536 if( clipStartT < startT )
538 if( clipEndT > startT + lenT )
539 clipEndT = startT + lenT;
542 clipStartEndTimes.push_back(std::pair<double,double>(clipStartT,clipEndT));
545 for(
unsigned int i=0;i<clipStartEndTimes.size();i++)
548 t->
Clear(clipStartEndTimes[i].first,clipStartEndTimes[i].second);
550 auto toClipOutput = output->
Copy(clipStartEndTimes[i].first-startT, clipStartEndTimes[i].second-startT);
552 t->
Paste(clipStartEndTimes[i].first, toClipOutput.get());
555 if( (clipRealStartEndTimes[i].first != clipStartEndTimes[i].first ||
556 clipRealStartEndTimes[i].second != clipStartEndTimes[i].second) &&
557 !(clipRealStartEndTimes[i].first <= startT &&
558 clipRealStartEndTimes[i].second >= startT+lenT) )
559 t->
Join(clipRealStartEndTimes[i].first,clipRealStartEndTimes[i].second);
564bool EffectEqualization48x::ProcessBuffer(
fft_type *sourceBuffer,
fft_type *destBuffer,
size_t bufferLength)
566 BufferInfo bufferInfo;
567 bufferInfo.mContiguousBufferSize=bufferLength;
568 bufferInfo.mBufferSouce[0]=sourceBuffer;
569 bufferInfo.mBufferDest[0]=destBuffer;
570 bufferInfo.mScratchBuffer=&sourceBuffer[mSubBufferSize];
571 return ProcessBuffer1x(&bufferInfo);
574bool EffectEqualization48x::ProcessBuffer1x(BufferInfo *bufferInfo)
576 int bufferCount=bufferInfo->mContiguousBufferSize?1:4;
577 for(
int bufferIndex=0;bufferIndex<bufferCount;bufferIndex++)
579 auto bufferLength=bufferInfo->mBufferLength;
580 if(bufferInfo->mContiguousBufferSize)
581 bufferLength=bufferInfo->mContiguousBufferSize;
583 auto blockCount=bufferLength/mBlockSize;
584 auto lastBlockSize=bufferLength%mBlockSize;
588 float *workBuffer=bufferInfo->mScratchBuffer;
589 float *scratchBuffer=&workBuffer[mWindowSize*2];
590 float *sourceBuffer=bufferInfo->mBufferSouce[bufferIndex];
591 float *destBuffer=bufferInfo->mBufferDest[bufferIndex];
592 for(
size_t runx=0;runx<blockCount;runx++)
594 float *currentBuffer=&workBuffer[mWindowSize*(runx&1)];
595 for(
int i=0;i<mBlockSize;i++)
596 currentBuffer[i]=sourceBuffer[i];
597 sourceBuffer+=mBlockSize;
598 float *currentFilter=¤tBuffer[mBlockSize];
599 for(
int i=0;i<mFilterSize;i++)
602 Filter1x(mWindowSize, currentBuffer, scratchBuffer);
603 float *writeEnd=currentBuffer+mBlockSize;
605 writeEnd=currentBuffer+(lastBlockSize+mFilterSize);
607 float *lastOverrun=&workBuffer[mWindowSize*((runx+1)&1)+mBlockSize];
608 for(
int j=0;j<mFilterSize;j++)
609 *destBuffer++= *currentBuffer++ + *lastOverrun++;
611 currentBuffer+=mFilterSize>>1;
612 while(currentBuffer<writeEnd)
613 *destBuffer++ = *currentBuffer++;
619bool EffectEqualization48x::ProcessOne1x(
int count,
WaveTrack * t,
629 mEffectEqualization->TrackProgress(count, 0.0);
630 int subBufferSize=mBufferCount==8?(mSubBufferSize>>1):mSubBufferSize;
631 auto bigRuns=len/(subBufferSize-mBlockSize);
632 int trackBlocksPerBig=subBufferSize/trackBlockSize;
633 int trackLeftovers=subBufferSize-trackBlocksPerBig*trackBlockSize;
634 size_t singleProcessLength;
638 singleProcessLength =
639 ((mFilterSize>>1)*bigRuns + len%(bigRuns*(subBufferSize-mBlockSize)))
641 auto currentSample=start;
642 bool bBreakLoop =
false;
643 for(
int bigRun=0;bigRun<bigRuns;bigRun++)
646 for(
int i=0;i<trackBlocksPerBig;i++) {
648 currentSample+=trackBlockSize;
652 currentSample+=trackLeftovers;
654 currentSample-=mBlockSize+(mFilterSize>>1);
656 ProcessBuffer1x(mBufferInfo.get());
657 bBreakLoop=mEffectEqualization->TrackProgress(count, (
double)(bigRun)/bigRuns.as_double());
660 output->
Append((
samplePtr)&mBigBuffer[(bigRun?mBlockSize:0)+(mFilterSize>>1)],
floatSample, subBufferSize-((bigRun?mBlockSize:0)+(mFilterSize>>1)));
662 if(singleProcessLength && !bBreakLoop) {
663 t->
Get((
samplePtr)mBigBuffer.get(),
floatSample, currentSample, singleProcessLength+mBlockSize+(mFilterSize>>1));
664 ProcessBuffer(mBigBuffer.get(), mBigBuffer.get(), singleProcessLength+mBlockSize+(mFilterSize>>1));
665 output->
Append((
samplePtr)&mBigBuffer[bigRuns > 0 ? mBlockSize : 0],
floatSample, singleProcessLength+mBlockSize+(mFilterSize>>1));
669 ProcessTail(t, output.get(), start, len);
673void EffectEqualization48x::Filter1x(
size_t len,
674 float *buffer,
float *scratchBuffer)
679 RealFFTf1x(buffer, mEffectEqualization->hFFT.get());
684 float filterFuncR, filterFuncI;
685 filterFuncR = mEffectEqualization->mFilterFuncR[0];
686 scratchBuffer[0] = buffer[0] * filterFuncR;
687 auto halfLength = (len / 2);
689 bool useBitReverseTable=sMathPath&1;
691 for(i = 1; i < halfLength; i++)
693 if(useBitReverseTable) {
694 real=buffer[mEffectEqualization->hFFT->BitReversed[i] ];
695 imag=buffer[mEffectEqualization->hFFT->BitReversed[i]+1];
697 int bitReversed=
SmallRB(i,mEffectEqualization->hFFT->pow2Bits);
698 real=buffer[bitReversed];
699 imag=buffer[bitReversed+1];
701 filterFuncR=mEffectEqualization->mFilterFuncR[i];
702 filterFuncI=mEffectEqualization->mFilterFuncI[i];
704 scratchBuffer[2*i ] = real*filterFuncR - imag*filterFuncI;
705 scratchBuffer[2*i+1] = real*filterFuncI + imag*filterFuncR;
708 filterFuncR=mEffectEqualization->mFilterFuncR[halfLength];
709 scratchBuffer[1] = buffer[1] * filterFuncR;
713 ReorderToTime1x(mEffectEqualization->hFFT.get(), scratchBuffer, buffer);
716bool EffectEqualization48x::ProcessBuffer4x(BufferInfo *bufferInfo)
719 if(bufferInfo->mBufferLength%mBlockSize)
722 auto blockCount=bufferInfo->mBufferLength/mBlockSize;
724 __m128 *readBlocks[4];
725 __m128 *writeBlocks[4];
726 for(
int i=0;i<4;i++) {
727 readBlocks[i]=(__m128 *)bufferInfo->mBufferSouce[i];
728 writeBlocks[i]=(__m128 *)bufferInfo->mBufferDest[i];
731 __m128 *swizzledBuffer128=(__m128 *)bufferInfo->mScratchBuffer;
732 __m128 *scratchBuffer=&swizzledBuffer128[mWindowSize*2];
734 for(
size_t run4x=0;run4x<blockCount;run4x++)
737 __m128 *currentSwizzledBlock=&swizzledBuffer128[mWindowSize*(run4x&1)];
738 for(
int i=0,j=0;j<mBlockSize;i++,j+=4) {
739 __m128 tmp0 = _mm_shuffle_ps(readBlocks[0][i], readBlocks[1][i], _MM_SHUFFLE(1,0,1,0));
740 __m128 tmp1 = _mm_shuffle_ps(readBlocks[0][i], readBlocks[1][i], _MM_SHUFFLE(3,2,3,2));
741 __m128 tmp2 = _mm_shuffle_ps(readBlocks[2][i], readBlocks[3][i], _MM_SHUFFLE(1,0,1,0));
742 __m128 tmp3 = _mm_shuffle_ps(readBlocks[2][i], readBlocks[3][i], _MM_SHUFFLE(3,2,3,2));
743 currentSwizzledBlock[j] = _mm_shuffle_ps(tmp0, tmp2, _MM_SHUFFLE(2,0,2,0));
744 currentSwizzledBlock[j+1] = _mm_shuffle_ps(tmp0, tmp2, _MM_SHUFFLE(3,1,3,1));
745 currentSwizzledBlock[j+2] = _mm_shuffle_ps(tmp1, tmp3, _MM_SHUFFLE(2,0,2,0));
746 currentSwizzledBlock[j+3] = _mm_shuffle_ps(tmp1, tmp3, _MM_SHUFFLE(3,1,3,1));
748 __m128 *thisOverrun128=¤tSwizzledBlock[mBlockSize];
749 for(
int i=0;i<mFilterSize;i++)
750 thisOverrun128[i]=_mm_set1_ps(0.0);
751 Filter4x(mWindowSize, (
float *)currentSwizzledBlock, (
float *)scratchBuffer);
752 int writeStart=0, writeToStart=0;
753 int writeEnd=mBlockSize;
756 __m128 *lastOverrun128=&swizzledBuffer128[mWindowSize*((run4x+1)&1)+mBlockSize];
758 for(
int i=0,j=0;j<mFilterSize;i++,j+=4) {
759 __m128 tmps0 = _mm_add_ps(currentSwizzledBlock[j], lastOverrun128[j]);
760 __m128 tmps1 = _mm_add_ps(currentSwizzledBlock[j+1], lastOverrun128[j+1]);
761 __m128 tmps2 = _mm_add_ps(currentSwizzledBlock[j+2], lastOverrun128[j+2]);
762 __m128 tmps3 = _mm_add_ps(currentSwizzledBlock[j+3], lastOverrun128[j+3]);
763 __m128 tmp0 = _mm_shuffle_ps(tmps1, tmps0, _MM_SHUFFLE(0,1,0,1));
764 __m128 tmp1 = _mm_shuffle_ps(tmps1, tmps0, _MM_SHUFFLE(2,3,2,3));
765 __m128 tmp2 = _mm_shuffle_ps(tmps3, tmps2, _MM_SHUFFLE(0,1,0,1));
766 __m128 tmp3 = _mm_shuffle_ps(tmps3, tmps2, _MM_SHUFFLE(2,3,2,3));
767 writeBlocks[0][i] = _mm_shuffle_ps(tmp0, tmp2, _MM_SHUFFLE(1,3,1,3));
768 writeBlocks[1][i] = _mm_shuffle_ps(tmp0, tmp2, _MM_SHUFFLE(0,2,0,2));
769 writeBlocks[2][i] = _mm_shuffle_ps(tmp1, tmp3, _MM_SHUFFLE(1,3,1,3));
770 writeBlocks[3][i] = _mm_shuffle_ps(tmp1, tmp3, _MM_SHUFFLE(0,2,0,2));
772 writeStart=mFilterSize;
773 writeToStart=mFilterSize>>2;
775 for(
int i=writeToStart,j=writeStart;j<writeEnd;i++,j+=4) {
776 __m128 tmp0 = _mm_shuffle_ps(currentSwizzledBlock[j+1], currentSwizzledBlock[j], _MM_SHUFFLE(0,1,0,1));
777 __m128 tmp1 = _mm_shuffle_ps(currentSwizzledBlock[j+1], currentSwizzledBlock[j], _MM_SHUFFLE(2,3,2,3));
778 __m128 tmp2 = _mm_shuffle_ps(currentSwizzledBlock[j+3], currentSwizzledBlock[j+2], _MM_SHUFFLE(0,1,0,1));
779 __m128 tmp3 = _mm_shuffle_ps(currentSwizzledBlock[j+3], currentSwizzledBlock[j+2], _MM_SHUFFLE(2,3,2,3));
780 writeBlocks[0][i] = _mm_shuffle_ps(tmp0, tmp2, _MM_SHUFFLE(1,3,1,3));
781 writeBlocks[1][i] = _mm_shuffle_ps(tmp0, tmp2, _MM_SHUFFLE(0,2,0,2));
782 writeBlocks[2][i] = _mm_shuffle_ps(tmp1, tmp3, _MM_SHUFFLE(1,3,1,3));
783 writeBlocks[3][i] = _mm_shuffle_ps(tmp1, tmp3, _MM_SHUFFLE(0,2,0,2));
789 for(
int i=writeToStart,j=writeStart;j<writeEnd;i++,j+=4) {
790 __m128 tmp0 = _mm_shuffle_ps(currentSwizzledBlock[j+1], currentSwizzledBlock[j], _MM_SHUFFLE(0,1,0,1));
791 __m128 tmp2 = _mm_shuffle_ps(currentSwizzledBlock[j+3], currentSwizzledBlock[j+2], _MM_SHUFFLE(0,1,0,1));
792 writeBlocks[0][i] = _mm_shuffle_ps(tmp0, tmp2, _MM_SHUFFLE(1,3,1,3));
795 for(
int i=0;i<4;i++) {
796 readBlocks[i]+=mBlockSize>>2;
797 writeBlocks[i]+=mBlockSize>>2;
803bool EffectEqualization48x::ProcessOne4x(
int count,
WaveTrack * t,
806 int subBufferSize=mBufferCount==8?(mSubBufferSize>>1):mSubBufferSize;
808 if(len<subBufferSize)
809 return ProcessOne1x(count, t, start, len);
816 mEffectEqualization->TrackProgress(count, 0.0);
817 auto bigRuns = len/(subBufferSize-mBlockSize);
818 int trackBlocksPerBig=subBufferSize/trackBlockSize;
819 int trackLeftovers=subBufferSize-trackBlocksPerBig*trackBlockSize;
820 size_t singleProcessLength =
821 ((mFilterSize>>1)*bigRuns + len%(bigRuns*(subBufferSize-mBlockSize)))
823 auto currentSample=start;
825 bool bBreakLoop =
false;
826 for(
int bigRun=0;bigRun<bigRuns;bigRun++)
829 for(
int i=0;i<trackBlocksPerBig;i++) {
831 currentSample+=trackBlockSize;
835 currentSample+=trackLeftovers;
837 currentSample-=mBlockSize+(mFilterSize>>1);
839 ProcessBuffer4x(mBufferInfo.get());
840 bBreakLoop=mEffectEqualization->TrackProgress(count, (
double)(bigRun)/bigRuns.as_double());
843 output->
Append((
samplePtr)&mBigBuffer[(bigRun?mBlockSize:0)+(mFilterSize>>1)],
floatSample, subBufferSize-((bigRun?mBlockSize:0)+(mFilterSize>>1)));
845 if(singleProcessLength && !bBreakLoop) {
846 t->
Get((
samplePtr)mBigBuffer.get(),
floatSample, currentSample, singleProcessLength+mBlockSize+(mFilterSize>>1));
847 ProcessBuffer(mBigBuffer.get(), mBigBuffer.get(), singleProcessLength+mBlockSize+(mFilterSize>>1));
848 output->
Append((
samplePtr)&mBigBuffer[bigRuns > 0 ? mBlockSize : 0],
floatSample, singleProcessLength+mBlockSize+(mFilterSize>>1));
853 ProcessTail(t, output.get(), start, len);
857#include <wx/thread.h>
859void *EQWorker::Entry()
864 wxMutexLocker locker( *mMutex );
865 for(; i < mBufferInfoCount; i++) {
866 if(mBufferInfoList[i].mBufferStatus==BufferReady) {
867 mBufferInfoList[i].mBufferStatus=BufferBusy;
872 if ( i < mBufferInfoCount ) {
873 switch (mProcessingType)
876 mEffectEqualization48x->ProcessBuffer1x(&mBufferInfoList[i]);
879 mEffectEqualization48x->ProcessBuffer4x(&mBufferInfoList[i]);
882 mBufferInfoList[i].mBufferStatus=BufferDone;
888bool EffectEqualization48x::ProcessOne1x4xThreaded(
int count,
WaveTrack * t,
891 int subBufferSize=mBufferCount==8?(mSubBufferSize>>1):mSubBufferSize;
896 return ProcessOne4x(count, t, start, len);
897 if(mThreadCount<=0 || blockCount<256)
898 return ProcessOne4x(count, t, start, len);
900 for(
int i=0;i<mThreadCount;i++)
901 mEQWorkers[i].mProcessingType=processingType;
907 mEffectEqualization->TrackProgress(count, 0.0);
908 auto bigRuns = len/(subBufferSize-mBlockSize);
909 int trackBlocksPerBig=subBufferSize/trackBlockSize;
910 int trackLeftovers=subBufferSize-trackBlocksPerBig*trackBlockSize;
911 size_t singleProcessLength =
912 ((mFilterSize>>1)*bigRuns + len%(bigRuns*(subBufferSize-mBlockSize)))
914 auto currentSample=start;
916 int bigBlocksRead=mWorkerDataCount, bigBlocksWritten=0;
919 auto maxPreFill = bigRuns < mWorkerDataCount ? bigRuns : mWorkerDataCount;
920 for(
int i=0;i<maxPreFill;i++)
923 for(
int j=0;j<trackBlocksPerBig;j++) {
924 t->
Get((
samplePtr)&mBufferInfo[i].mBufferSouce[0][j*trackBlockSize],
floatSample, currentSample, trackBlockSize);
925 currentSample+=trackBlockSize;
928 t->
Get((
samplePtr)&mBufferInfo[i].mBufferSouce[0][trackBlocksPerBig*trackBlockSize],
floatSample, currentSample, trackLeftovers);
929 currentSample+=trackLeftovers;
931 currentSample-=mBlockSize+(mFilterSize>>1);
932 mBufferInfo[i].mBufferStatus=BufferReady;
935 bool bBreakLoop =
false;
936 while(bigBlocksWritten<bigRuns && !bBreakLoop) {
937 bBreakLoop=mEffectEqualization->TrackProgress(count, (
double)(bigBlocksWritten)/bigRuns.as_double());
940 wxMutexLocker locker( mDataMutex );
942 while((mBufferInfo[currentIndex].mBufferStatus==BufferDone) && (bigBlocksWritten<bigRuns)) {
943 output->
Append((
samplePtr)&mBufferInfo[currentIndex].mBufferDest[0][(bigBlocksWritten?mBlockSize:0)+(mFilterSize>>1)],
floatSample, subBufferSize-((bigBlocksWritten?mBlockSize:0)+(mFilterSize>>1)));
945 if(bigBlocksRead<bigRuns) {
947 for(
int j=0;j<trackBlocksPerBig;j++) {
948 t->
Get((
samplePtr)&mBufferInfo[currentIndex].mBufferSouce[0][j*trackBlockSize],
floatSample, currentSample, trackBlockSize);
949 currentSample+=trackBlockSize;
952 t->
Get((
samplePtr)&mBufferInfo[currentIndex].mBufferSouce[0][trackBlocksPerBig*trackBlockSize],
floatSample, currentSample, trackLeftovers);
953 currentSample+=trackLeftovers;
955 currentSample-=mBlockSize+(mFilterSize>>1);
956 mBufferInfo[currentIndex].mBufferStatus=BufferReady;
958 }
else mBufferInfo[currentIndex].mBufferStatus=BufferEmpty;
959 currentIndex=(currentIndex+1)%mWorkerDataCount;
962 if(singleProcessLength && !bBreakLoop) {
963 t->
Get((
samplePtr)mBigBuffer.get(),
floatSample, currentSample, singleProcessLength+mBlockSize+(mFilterSize>>1));
964 ProcessBuffer(mBigBuffer.get(), mBigBuffer.get(), singleProcessLength+mBlockSize+(mFilterSize>>1));
969 ProcessTail(t, output.get(), start, len);
973void EffectEqualization48x::Filter4x(
size_t len,
974 float *buffer,
float *scratchBuffer)
977 __m128 real128, imag128;
979 RealFFTf4x(buffer, mEffectEqualization->hFFT.get());
983 __m128 *localFFTBuffer=(__m128 *)scratchBuffer;
984 __m128 *localBuffer=(__m128 *)buffer;
986 __m128 filterFuncR, filterFuncI;
987 filterFuncR = _mm_set1_ps(mEffectEqualization->mFilterFuncR[0]);
988 localFFTBuffer[0] = _mm_mul_ps(localBuffer[0], filterFuncR);
989 auto halfLength = (len / 2);
991 bool useBitReverseTable = sMathPath & 1;
993 for(i = 1; i < halfLength; i++)
995 if(useBitReverseTable) {
996 real128=localBuffer[mEffectEqualization->hFFT->BitReversed[i] ];
997 imag128=localBuffer[mEffectEqualization->hFFT->BitReversed[i]+1];
999 int bitReversed=
SmallRB(i,mEffectEqualization->hFFT->pow2Bits);
1000 real128=localBuffer[bitReversed];
1001 imag128=localBuffer[bitReversed+1];
1003 filterFuncR=_mm_set1_ps(mEffectEqualization->mFilterFuncR[i]);
1004 filterFuncI=_mm_set1_ps(mEffectEqualization->mFilterFuncI[i]);
1005 localFFTBuffer[2*i ] = _mm_sub_ps( _mm_mul_ps(real128, filterFuncR), _mm_mul_ps(imag128, filterFuncI));
1006 localFFTBuffer[2*i+1] = _mm_add_ps( _mm_mul_ps(real128, filterFuncI), _mm_mul_ps(imag128, filterFuncR));
1009 filterFuncR=_mm_set1_ps(mEffectEqualization->mFilterFuncR[halfLength]);
1010 localFFTBuffer[1] = _mm_mul_ps(localBuffer[1], filterFuncR);
1014 ReorderToTime4x(mEffectEqualization->hFFT.get(), scratchBuffer, buffer);
1021bool EffectEqualization48x::ProcessBuffer8x(BufferInfo *bufferInfo)
1024 if(bufferInfo->mBufferLength%mBlockSize || mBufferCount!=8)
1027 auto blockCount=bufferInfo->mBufferLength/mBlockSize;
1029 __m128 *readBlocks[8];
1030 __m128 *writeBlocks[8];
1031 for(
int i=0;i<8;i++) {
1032 readBlocks[i]=(__m128 *)bufferInfo->mBufferSouce[i];
1033 writeBlocks[i]=(__m128 *)bufferInfo->mBufferDest[i];
1036 __m128 *swizzledBuffer128=(__m128 *)bufferInfo->mScratchBuffer;
1037 __m128 *scratchBuffer=&swizzledBuffer128[mWindowSize*4];
1039 int doubleFilter=mFilterSize<<1;
1040 int doubleWindow=mWindowSize<<1;
1041 int doubleBlock=mBlockSize<<1;
1042 for(
int run4x=0;run4x<blockCount;run4x++)
1045 __m128 *currentSwizzledBlock=&swizzledBuffer128[doubleWindow*(run4x&1)];
1046 for(
int i=0,j=0;j<doubleBlock;i++,j+=8) {
1047 __m128 tmp0 = _mm_shuffle_ps(readBlocks[0][i], readBlocks[1][i], _MM_SHUFFLE(1,0,1,0));
1048 __m128 tmp1 = _mm_shuffle_ps(readBlocks[0][i], readBlocks[1][i], _MM_SHUFFLE(3,2,3,2));
1049 __m128 tmp2 = _mm_shuffle_ps(readBlocks[2][i], readBlocks[3][i], _MM_SHUFFLE(1,0,1,0));
1050 __m128 tmp3 = _mm_shuffle_ps(readBlocks[2][i], readBlocks[3][i], _MM_SHUFFLE(3,2,3,2));
1051 currentSwizzledBlock[j] = _mm_shuffle_ps(tmp0, tmp2, _MM_SHUFFLE(2,0,2,0));
1052 currentSwizzledBlock[j+2] = _mm_shuffle_ps(tmp0, tmp2, _MM_SHUFFLE(3,1,3,1));
1053 currentSwizzledBlock[j+4] = _mm_shuffle_ps(tmp1, tmp3, _MM_SHUFFLE(2,0,2,0));
1054 currentSwizzledBlock[j+6] = _mm_shuffle_ps(tmp1, tmp3, _MM_SHUFFLE(3,1,3,1));
1055 tmp0 = _mm_shuffle_ps(readBlocks[4][i], readBlocks[5][i], _MM_SHUFFLE(1,0,1,0));
1056 tmp1 = _mm_shuffle_ps(readBlocks[4][i], readBlocks[5][i], _MM_SHUFFLE(3,2,3,2));
1057 tmp2 = _mm_shuffle_ps(readBlocks[6][i], readBlocks[7][i], _MM_SHUFFLE(1,0,1,0));
1058 tmp3 = _mm_shuffle_ps(readBlocks[6][i], readBlocks[7][i], _MM_SHUFFLE(3,2,3,2));
1059 currentSwizzledBlock[j+1] = _mm_shuffle_ps(tmp0, tmp2, _MM_SHUFFLE(2,0,2,0));
1060 currentSwizzledBlock[j+3] = _mm_shuffle_ps(tmp0, tmp2, _MM_SHUFFLE(3,1,3,1));
1061 currentSwizzledBlock[j+5] = _mm_shuffle_ps(tmp1, tmp3, _MM_SHUFFLE(2,0,2,0));
1062 currentSwizzledBlock[j+7] = _mm_shuffle_ps(tmp1, tmp3, _MM_SHUFFLE(3,1,3,1));
1064 __m128 *thisOverrun128=¤tSwizzledBlock[doubleBlock];
1065 for(
int i=0;i<doubleFilter;i++)
1066 thisOverrun128[i]=_mm_set1_ps(0.0);
1067 Filter8x(mWindowSize, (
float *)currentSwizzledBlock, (
float *)scratchBuffer);
1068 int writeStart=0, writeToStart=0;
1069 int writeEnd=doubleBlock;
1072 __m128 *lastOverrun128=&swizzledBuffer128[doubleWindow*((run4x+1)&1)+doubleBlock];
1074 for(
int i=0,j=0;j<doubleFilter;i++,j+=8) {
1075 __m128 tmps0 = _mm_add_ps(currentSwizzledBlock[j], lastOverrun128[j]);
1076 __m128 tmps1 = _mm_add_ps(currentSwizzledBlock[j+2], lastOverrun128[j+2]);
1077 __m128 tmps2 = _mm_add_ps(currentSwizzledBlock[j+4], lastOverrun128[j+4]);
1078 __m128 tmps3 = _mm_add_ps(currentSwizzledBlock[j+6], lastOverrun128[j+6]);
1079 __m128 tmp0 = _mm_shuffle_ps(tmps1, tmps0, _MM_SHUFFLE(0,1,0,1));
1080 __m128 tmp1 = _mm_shuffle_ps(tmps1, tmps0, _MM_SHUFFLE(2,3,2,3));
1081 __m128 tmp2 = _mm_shuffle_ps(tmps3, tmps2, _MM_SHUFFLE(0,1,0,1));
1082 __m128 tmp3 = _mm_shuffle_ps(tmps3, tmps2, _MM_SHUFFLE(2,3,2,3));
1083 writeBlocks[0][i] = _mm_shuffle_ps(tmp0, tmp2, _MM_SHUFFLE(1,3,1,3));
1084 writeBlocks[1][i] = _mm_shuffle_ps(tmp0, tmp2, _MM_SHUFFLE(0,2,0,2));
1085 writeBlocks[2][i] = _mm_shuffle_ps(tmp1, tmp3, _MM_SHUFFLE(1,3,1,3));
1086 writeBlocks[3][i] = _mm_shuffle_ps(tmp1, tmp3, _MM_SHUFFLE(0,2,0,2));
1087 tmps0 = _mm_add_ps(currentSwizzledBlock[j+1], lastOverrun128[j+1]);
1088 tmps1 = _mm_add_ps(currentSwizzledBlock[j+3], lastOverrun128[j+3]);
1089 tmps2 = _mm_add_ps(currentSwizzledBlock[j+5], lastOverrun128[j+5]);
1090 tmps3 = _mm_add_ps(currentSwizzledBlock[j+7], lastOverrun128[j+7]);
1091 tmp0 = _mm_shuffle_ps(tmps1, tmps0, _MM_SHUFFLE(0,1,0,1));
1092 tmp1 = _mm_shuffle_ps(tmps1, tmps0, _MM_SHUFFLE(2,3,2,3));
1093 tmp2 = _mm_shuffle_ps(tmps3, tmps2, _MM_SHUFFLE(0,1,0,1));
1094 tmp3 = _mm_shuffle_ps(tmps3, tmps2, _MM_SHUFFLE(2,3,2,3));
1095 writeBlocks[4][i] = _mm_shuffle_ps(tmp0, tmp2, _MM_SHUFFLE(1,3,1,3));
1096 writeBlocks[5][i] = _mm_shuffle_ps(tmp0, tmp2, _MM_SHUFFLE(0,2,0,2));
1097 writeBlocks[6][i] = _mm_shuffle_ps(tmp1, tmp3, _MM_SHUFFLE(1,3,1,3));
1098 writeBlocks[7][i] = _mm_shuffle_ps(tmp1, tmp3, _MM_SHUFFLE(0,2,0,2));
1100 writeStart=doubleFilter;
1101 writeToStart=mFilterSize>>2;
1103 for(
int i=writeToStart,j=writeStart;j<writeEnd;i++,j+=8) {
1104 __m128 tmp0 = _mm_shuffle_ps(currentSwizzledBlock[j+2], currentSwizzledBlock[j], _MM_SHUFFLE(0,1,0,1));
1105 __m128 tmp1 = _mm_shuffle_ps(currentSwizzledBlock[j+2], currentSwizzledBlock[j], _MM_SHUFFLE(2,3,2,3));
1106 __m128 tmp2 = _mm_shuffle_ps(currentSwizzledBlock[j+6], currentSwizzledBlock[j+4], _MM_SHUFFLE(0,1,0,1));
1107 __m128 tmp3 = _mm_shuffle_ps(currentSwizzledBlock[j+6], currentSwizzledBlock[j+4], _MM_SHUFFLE(2,3,2,3));
1108 writeBlocks[0][i] = _mm_shuffle_ps(tmp0, tmp2, _MM_SHUFFLE(1,3,1,3));
1109 writeBlocks[1][i] = _mm_shuffle_ps(tmp0, tmp2, _MM_SHUFFLE(0,2,0,2));
1110 writeBlocks[2][i] = _mm_shuffle_ps(tmp1, tmp3, _MM_SHUFFLE(1,3,1,3));
1111 writeBlocks[3][i] = _mm_shuffle_ps(tmp1, tmp3, _MM_SHUFFLE(0,2,0,2));
1112 tmp0 = _mm_shuffle_ps(currentSwizzledBlock[j+3], currentSwizzledBlock[j+1], _MM_SHUFFLE(0,1,0,1));
1113 tmp1 = _mm_shuffle_ps(currentSwizzledBlock[j+3], currentSwizzledBlock[j+1], _MM_SHUFFLE(2,3,2,3));
1114 tmp2 = _mm_shuffle_ps(currentSwizzledBlock[j+7], currentSwizzledBlock[j+5], _MM_SHUFFLE(0,1,0,1));
1115 tmp3 = _mm_shuffle_ps(currentSwizzledBlock[j+7], currentSwizzledBlock[j+5], _MM_SHUFFLE(2,3,2,3));
1116 writeBlocks[4][i] = _mm_shuffle_ps(tmp0, tmp2, _MM_SHUFFLE(1,3,1,3));
1117 writeBlocks[5][i] = _mm_shuffle_ps(tmp0, tmp2, _MM_SHUFFLE(0,2,0,2));
1118 writeBlocks[6][i] = _mm_shuffle_ps(tmp1, tmp3, _MM_SHUFFLE(1,3,1,3));
1119 writeBlocks[7][i] = _mm_shuffle_ps(tmp1, tmp3, _MM_SHUFFLE(0,2,0,2));
1125 for(
int i=writeToStart,j=writeStart;j<writeEnd;i++,j+=8) {
1126 __m128 tmp0 = _mm_shuffle_ps(currentSwizzledBlock[j+2], currentSwizzledBlock[j], _MM_SHUFFLE(0,1,0,1));
1127 __m128 tmp2 = _mm_shuffle_ps(currentSwizzledBlock[j+6], currentSwizzledBlock[j+4], _MM_SHUFFLE(0,1,0,1));
1128 writeBlocks[0][i] = _mm_shuffle_ps(tmp0, tmp2, _MM_SHUFFLE(1,3,1,3));
1131 for(
int i=0;i<8;i++) {
1132 readBlocks[i]+=mBlockSize>>2;
1133 writeBlocks[i]+=mBlockSize>>2;
1139bool EffectEqualization48x::ProcessOne8x(
int count,
WaveTrack * t,
1145 return ProcessOne4x(count, t, start, len);
1152 mEffectEqualization->TrackProgress(count, 0.0);
1153 int bigRuns=len/(mSubBufferSize-mBlockSize);
1154 int trackBlocksPerBig=mSubBufferSize/trackBlockSize;
1155 int trackLeftovers=mSubBufferSize-trackBlocksPerBig*trackBlockSize;
1156 int singleProcessLength=(mFilterSize>>1)*bigRuns + len%(bigRuns*(mSubBufferSize-mBlockSize));
1157 auto currentSample=start;
1159 bool bBreakLoop =
false;
1160 for(
int bigRun=0;bigRun<bigRuns;bigRun++)
1163 for(
int i=0;i<trackBlocksPerBig;i++) {
1165 currentSample+=trackBlockSize;
1167 if(trackLeftovers) {
1169 currentSample+=trackLeftovers;
1171 currentSample-=mBlockSize+(mFilterSize>>1);
1173 ProcessBuffer4x(mBufferInfo);
1174 if (bBreakLoop=mEffectEqualization->TrackProgress(count, (
double)(bigRun)/(
double)bigRuns))
1178 output->
Append((
samplePtr)&mBigBuffer[(bigRun?mBlockSize:0)+(mFilterSize>>1)],
floatSample, mSubBufferSize-((bigRun?mBlockSize:0)+(mFilterSize>>1)));
1180 if(singleProcessLength && !bBreakLoop) {
1181 t->
Get((
samplePtr)mBigBuffer.get(),
floatSample, currentSample, singleProcessLength+mBlockSize+(mFilterSize>>1));
1182 ProcessBuffer(mBigBuffer.get(), mBigBuffer.get(), singleProcessLength+mBlockSize+(mFilterSize>>1));
1187 ProcessTail(t, output.get(), start, len);
1191bool EffectEqualization48x::ProcessOne8xThreaded(
int count,
WaveTrack * t,
1197 return ProcessOne4x(count, t, start, len);
1198 if(mThreadCount<=0 || blockCount<256)
1199 return ProcessOne4x(count, t, start, len);
1205 mEffectEqualization->TrackProgress(count, 0.0);
1206 int bigRuns=len/(mSubBufferSize-mBlockSize);
1207 int trackBlocksPerBig=mSubBufferSize/trackBlockSize;
1208 int trackLeftovers=mSubBufferSize-trackBlocksPerBig*trackBlockSize;
1209 int singleProcessLength=(mFilterSize>>1)*bigRuns + len%(bigRuns*(mSubBufferSize-mBlockSize));
1210 auto currentSample=start;
1212 int bigBlocksRead=mWorkerDataCount, bigBlocksWritten=0;
1215 for(
int i=0;i<mWorkerDataCount;i++)
1218 for(
int j=0;j<trackBlocksPerBig;j++) {
1219 t->
Get((
samplePtr)&mBufferInfo[i].mBufferSouce[0][j*trackBlockSize],
floatSample, currentSample, trackBlockSize);
1220 currentSample+=trackBlockSize;
1222 if(trackLeftovers) {
1223 t->
Get((
samplePtr)&mBufferInfo[i].mBufferSouce[0][trackBlocksPerBig*trackBlockSize],
floatSample, currentSample, trackLeftovers);
1224 currentSample+=trackLeftovers;
1226 currentSample-=mBlockSize+(mFilterSize>>1);
1227 mBufferInfo[i].mBufferStatus=BufferReady;
1230 bool bBreakLoop =
false;
1231 while(bigBlocksWritten<bigRuns) {
1232 if (bBreakLoop=mEffectEqualization->TrackProgress(count, (
double)(bigBlocksWritten)/(
double)bigRuns))
1236 wxMutexLocker locker( mDataMutex );
1238 while((mBufferInfo[currentIndex].mBufferStatus==BufferDone) && (bigBlocksWritten<bigRuns)) {
1239 output->
Append((
samplePtr)&mBufferInfo[currentIndex].mBufferDest[0][(bigBlocksWritten?mBlockSize:0)+(mFilterSize>>1)],
floatSample, mSubBufferSize-((bigBlocksWritten?mBlockSize:0)+(mFilterSize>>1)));
1241 if(bigBlocksRead<bigRuns) {
1243 for(
int j=0;j<trackBlocksPerBig;j++) {
1244 t->
Get((
samplePtr)&mBufferInfo[currentIndex].mBufferSouce[0][j*trackBlockSize],
floatSample, currentSample, trackBlockSize);
1245 currentSample+=trackBlockSize;
1247 if(trackLeftovers) {
1248 t->
Get((
samplePtr)&mBufferInfo[currentIndex].mBufferSouce[0][trackBlocksPerBig*trackBlockSize],
floatSample, currentSample, trackLeftovers);
1249 currentSample+=trackLeftovers;
1251 currentSample-=mBlockSize+(mFilterSize>>1);
1252 mBufferInfo[currentIndex].mBufferStatus=BufferReady;
1254 }
else mBufferInfo[currentIndex].mBufferStatus=BufferEmpty;
1255 currentIndex=(currentIndex+1)%mWorkerDataCount;
1258 if(singleProcessLength && !bBreakLoop) {
1259 t->
Get((
samplePtr)mBigBuffer.get(),
floatSample, currentSample, singleProcessLength+mBlockSize+(mFilterSize>>1));
1260 ProcessBuffer(mBigBuffer.get(), mBigBuffer.get(), singleProcessLength+mBlockSize+(mFilterSize>>1));
1265 ProcessTail(t, output.get(), start, len);
1272void EffectEqualization48x::Filter8x(
size_t len,
1273 float *buffer,
float *scratchBuffer)
1276 __m256 real256, imag256;
1278 RealFFTf8x(buffer, mEffectEqualization->hFFT);
1282 __m256 *localFFTBuffer=(__m256 *)scratchBuffer;
1283 __m256 *localBuffer=(__m256 *)buffer;
1285 __m256 filterFuncR, filterFuncI;
1286 filterFuncR = _mm256_set1_ps(mEffectEqualization->mFilterFuncR[0]);
1287 localFFTBuffer[0] = _mm256_mul_ps(localBuffer[0], filterFuncR);
1288 auto halfLength = (len / 2);
1290 bool useBitReverseTable = sMathPath & 1;
1292 for(i = 1; i < halfLength; i++)
1294 if(useBitReverseTable) {
1295 real256=localBuffer[mEffectEqualization->hFFT->BitReversed[i] ];
1296 imag256=localBuffer[mEffectEqualization->hFFT->BitReversed[i]+1];
1298 int bitReversed=
SmallRB(i,mEffectEqualization->hFFT->pow2Bits);
1299 real256=localBuffer[bitReversed];
1300 imag256=localBuffer[bitReversed+1];
1302 filterFuncR=_mm256_set1_ps(mEffectEqualization->mFilterFuncR[i]);
1303 filterFuncI=_mm256_set1_ps(mEffectEqualization->mFilterFuncI[i]);
1304 localFFTBuffer[2*i ] = _mm256_sub_ps( _mm256_mul_ps(real256, filterFuncR), _mm256_mul_ps(imag256, filterFuncI));
1305 localFFTBuffer[2*i+1] = _mm256_add_ps( _mm256_mul_ps(real256, filterFuncI), _mm256_mul_ps(imag256, filterFuncR));
1308 filterFuncR=_mm256_set1_ps(mEffectEqualization->mFilterFuncR[halfLength]);
1309 localFFTBuffer[1] = _mm256_mul_ps(localBuffer[1], filterFuncR);
1312 InverseRealFFTf8x(scratchBuffer, mEffectEqualization->hFFT);
1313 ReorderToTime8x(mEffectEqualization->hFFT, scratchBuffer, buffer);
void RealFFTf4x(fft_type *, FFTParam *, int functionType=-1)
void TableUsage(int iMask)
void RealFFTf1x(fft_type *, FFTParam *, int functionType=-1)
void ReorderToTime4x(FFTParam *hFFT, fft_type *buffer, fft_type *TimeOut, int functionType=-1)
void ReorderToTime1x(FFTParam *hFFT, fft_type *buffer, fft_type *TimeOut, int functionType=-1)
void InverseRealFFTf4x(fft_type *, FFTParam *, int functionType=-1)
void InverseRealFFTf1x(fft_type *, FFTParam *, int functionType=-1)
int SmallRB(int bits, int numberBits)
size_t limitSampleBufferSize(size_t bufferSize, sampleCount limit)
Subclass & Get(const RegisteredFactory &key)
Get reference to an attachment, creating on demand if not present, down-cast it to Subclass.
An Effect that modifies volume in different frequency bands.
static TrackListHolder Create(AudacityProject *pOwner)
A Track that contains audio waveform data.
double GetStartTime() const override
Implement WideSampleSequence.
void ConvertToSampleFormat(sampleFormat format, const std::function< void(size_t)> &progressReport={})
bool Append(size_t iChannel, constSamplePtr buffer, sampleFormat format, size_t len, unsigned int stride=1, sampleFormat effectiveFormat=widestSampleFormat) override
void Join(double t0, double t1, const ProgressReporter &reportProgress)
void Clear(double t0, double t1) override
void Paste(double t0, const Track &src) override
double GetEndTime() const override
Implement WideSampleSequence.
size_t GetMaxBlockSize() const
Holder EmptyCopy(size_t nChannels, const SampleBlockFactoryPtr &pFactory={}) const
Track::Holder Copy(double t0, double t1, bool forClipboard=true) const override
Create new tracks and don't modify this track.
double LongSamplesToTime(sampleCount pos) const
sampleCount TimeToLongSamples(double t0) const
Positions or offsets within audio files need a wide type.
const char * end(const char *str) noexcept