17#if defined(_MSC_VER) && (defined(_M_AMD64) || defined(_M_X64) || \
18 (defined(_M_IX86_FP) && _M_IX86_FP >= 2))
19#define USE_SSE2_COMPLEX 1
31 return ::malloc(bytes);
40inline void copy(
const T* src, T* dst, int32_t n)
42 memcpy(dst, src, n *
sizeof(T));
46inline void add(
const T* src1,
const T* src2, T* dst, int32_t n)
48 for (int32_t i = 0; i < n; i++)
49 dst[i] = src1[i] + src2[i];
53inline void subtract(
const T* src1,
const T* src2, T* dst, int32_t n)
55 for (int32_t i = 0; i < n; i++)
56 dst[i] = src2[i] - src1[i];
62 for (int32_t i = 0; i < n; i++)
63 dst[i] = src[i] * constant;
69 for (int32_t i = 0; i < n; i++)
70 dst[i] += src[i] * constant;
74inline void multiply(
const T* src1,
const T* src2, T* dst, int32_t n)
76 for (int32_t i = 0; i < n; i++)
77 dst[i] = src1[i] * src2[i];
83 std::fill(dst, dst + n, 0.f);
87inline void findMaxElement(
const T* src, int32_t n, int32_t& maxIndex, T& maxValue)
92 for (int32_t i = 1; i < n; i++)
94 if (src[i] > maxValue)
104inline void calcPhases(
const std::complex<float>* src,
float* dst, int32_t n)
108 [](
const __m128 rp,
const __m128 ip, __m128& out)
112inline void calcNorms(
const std::complex<float>* src,
float* dst, int32_t n)
116 [](
const __m128 rp,
const __m128 ip, __m128& out)
121 const float* oldPhase,
const float* newPhase, std::complex<float>* dst,
125 oldPhase, newPhase, dst, n);
128inline void calcPhases(
const std::complex<float>* src,
float* dst, int32_t n)
130 for (int32_t i = 0; i < n; i++)
131 dst[i] = std::arg(src[i]);
134inline void calcNorms(
const std::complex<float>* src,
float* dst, int32_t n)
136 for (int32_t i = 0; i < n; i++)
140inline void rotate(
const float* oldPhase,
const float* newPhase, std::complex<float>* dst, int32_t n)
142 for (int32_t i = 0; i < n; i++) {
143 const auto theta = oldPhase ? newPhase[i] - oldPhase[i] : newPhase[i];
144 dst[i] *= std::complex<float>(cosf(theta), sinf(theta));
__m128 atan2_ps(__m128 y, __m128 x)
__m128 norm(__m128 x, __m128 y)
void perform_parallel_simd_aligned(const std::complex< float > *input, float *output, int n, const fnc &f)
void rotate_parallel_simd_aligned(const float *oldPhase, const float *newPhase, std::complex< float > *output, int n)
void * allocate(int32_t bytes)
void subtract(const T *src1, const T *src2, T *dst, int32_t n)
void multiply(const T *src1, const T *src2, T *dst, int32_t n)
void calcPhases(const std::complex< float > *src, float *dst, int32_t n)
void rotate(const float *oldPhase, const float *newPhase, std::complex< float > *dst, int32_t n)
void setToZero(T *dst, int32_t n)
void findMaxElement(const T *src, int32_t n, int32_t &maxIndex, T &maxValue)
void add(const T *src1, const T *src2, T *dst, int32_t n)
void calcNorms(const std::complex< float > *src, float *dst, int32_t n)
void constantMultiplyAndAdd(const T *src, T constant, T *dst, int32_t n)
void copy(const T *src, T *dst, int32_t n)
void constantMultiply(const T *src, T constant, T *dst, int32_t n)