Audacity 3.2.0
SimdTypes_sse2.h
Go to the documentation of this file.
1/*
2SSE2 simd types
3*/
4
5#pragma once
6
7#if _MSC_VER
8#define __finl __forceinline
9#define __vecc __vectorcall
10#else
11#define __finl inline __attribute__((always_inline))
12#define __vecc
13#endif
14
15#include <emmintrin.h>
16
17namespace staffpad::audio::simd {
18
19// this is jumping through some hoops to get the same level of support
20// for clang and msvc. With clang, the sse2 types are built-in and have
21// some arithmetic operators defined.
22// On msvc the sse2 types are structs with no operators.
23// to get to the same level and to be able to write algorithms "naturally",
24// everything needs to be wrapped in a struct.
25struct float_x4
26{
27 __m128 s;
29 {
30 }
31
33 __finl float_x4(float val)
34 {
35 s = _mm_set1_ps(val);
36 }
37
38 __finl float_x4(const __m128 &val) : s(val)
39 {
40 }
41
42#if __clang__
43private:
44 // this helper class allows writing to the single registers for clang
45 // __mm128 is a built-in type -> we can't return a float& reference.
46 // this is just syntax sugar and clang will remove it during builds.
47 //
48 // it allows to write
49 // float_x4 a;
50 // a[1] = 2.f;
51 struct RegisterAccessWrapper
52 {
53 __m128 &val;
54 int i;
55
56 void operator=(float x)
57 {
58 val[i] = x;
59 }
60 operator float() noexcept
61 {
62 return val[i];
63 }
64 };
65
66public:
67 __finl RegisterAccessWrapper operator[](int n)
68 {
69 RegisterAccessWrapper raw = {s, n};
70 return raw;
71 }
72
73 __finl const float operator[](int n) const
74 {
75 return s[n];
76 }
77#elif _MSC_VER
78 // on msvc returning a ref to a sub-register is possible
79 __finl float &operator[](int n)
80 {
81 return s.m128_f32[n];
82 }
83
84 __finl const float operator[](int n) const
85 {
86 return s.m128_f32[n];
87 }
88#endif
89};
90
91__finl float_x4 __vecc float_x4_from_float(float x)
92{
93 return _mm_set1_ps(x);
94}
95
96__finl float_x4 __vecc float_x4_load_aligned(const float *x)
97{
98 return _mm_load_ps(x);
99}
100
101__finl void __vecc store_aligned(const float_x4 &a, float *x)
102{
103 _mm_store_ps(x, a.s);
104}
105
106__finl float_x4 __vecc unzip1(const float_x4 &a, const float_x4 &b)
107{
108 return _mm_shuffle_ps(a.s, b.s, _MM_SHUFFLE(2, 0, 2, 0));
109}
110
111__finl float_x4 __vecc unzip2(const float_x4 &a, const float_x4 &b)
112{
113 return _mm_shuffle_ps(a.s, b.s, _MM_SHUFFLE(3, 1, 3, 1));
114}
115
116__finl float_x4 __vecc operator+(float_x4 a, float_x4 b)
117{
118 return _mm_add_ps(a.s, b.s);
119}
120
121__finl float_x4 __vecc operator-(float_x4 a, float_x4 b)
122{
123 return _mm_sub_ps(a.s, b.s);
124}
125
126__finl float_x4 __vecc operator*(float_x4 a, float_x4 b)
127{
128 return _mm_mul_ps(a.s, b.s);
129}
130
131__finl float_x4 __vecc sqrt(const float_x4 &a)
132{
133 return _mm_sqrt_ps(a.s);
134}
135
136__finl float __vecc rint(float x)
137{
138 __m128i A = _mm_cvtps_epi32(_mm_set_ss(x));
139 return _mm_cvtss_f32(_mm_cvtepi32_ps(A));
140}
141
142__finl float_x4 __vecc rint(const float_x4 &a)
143{
144 __m128i A = _mm_cvtps_epi32(a.s);
145 return _mm_cvtepi32_ps(A);
146}
147
148} // namespace staffpad::audio::simd
#define __finl
#define __vecc
#define A(N)
Definition: ToChars.cpp:62
__finl float_x4 __vecc float_x4_from_float(float x)
__finl float_x4 __vecc operator+(float_x4 a, float_x4 b)
__finl float_x4 __vecc operator-(float_x4 a, float_x4 b)
__finl float __vecc rint(float a)
__finl float_x4 __vecc unzip1(const float_x4 &a, const float_x4 &b)
__finl float_x4 __vecc float_x4_load_aligned(const float *x)
__finl void __vecc store_aligned(const float_x4 &a, float *x)
__finl float_x4 __vecc sqrt(const float_x4 &a)
__finl float_x4 __vecc operator*(float_x4 a, float_x4 b)
__finl float_x4 __vecc unzip2(const float_x4 &a, const float_x4 &b)
__finl float & operator[](int n)
__finl float_x4(const __m128 &val)
__finl float_x4(float val)
enables math like: float_x4 a = 0.5f * float_x4{1.f, 2.f, 3.f, 4.f};