Audacity 3.2.0
SimdTypes_neon.h
Go to the documentation of this file.
1/*
2 Neon version of SIMD types.
3 */
4
5#pragma once
6
7#if _MSC_VER
8#include <arm64_neon.h>
9#define __finl __forceinline
10#define __vecc __vectorcall
11#else
12#include <arm_neon.h>
13#define __finl inline __attribute__((always_inline))
14#define __vecc
15#endif
16
17#include <cmath>
18
19namespace staffpad::audio::simd {
20
22{
23 float32x4_t s;
24
26 {
27 }
28
30 __finl float_x4(float val)
31 {
32 s = vdupq_n_f32(val);
33 }
34
35 __finl float_x4(const float32x4_t &val) : s(val)
36 {
37 }
38
40 __finl float_x4(float v0, float v1, float v2, float v3)
41 {
42#if _MSC_VER // aggregate initializer won't work unless we have {.n128_f32 = ..} in c++20
43 s.n128_f32[0] = v0;
44 s.n128_f32[1] = v1;
45 s.n128_f32[2] = v2;
46 s.n128_f32[3] = v3;
47#elif __clang__
48 s = {v0, v1, v2, v3};
49#else
50 float f[4] = {v0, v1, v2, v3};
51 s = vld1q_f32(f);
52#endif
53 }
54
55#if __clang__
56private:
57 // this helper class allows writing to the single registers for clang
58 // __mm128 is a built-in type -> we can't return a float& reference.
59 // this is just syntax sugar and clang will remove it during builds.
60 //
61 // it allows to write
62 // float_x4 a;
63 // a[1] = 2.f;
64 struct RegisterAccessWrapper
65 {
66 float32x4_t &val;
67 int i;
68
69 void operator=(float x)
70 {
71 val[i] = x;
72 }
73 operator float() noexcept
74 {
75 return val[i];
76 }
77 };
78
79public:
80 __finl RegisterAccessWrapper operator[](int n)
81 {
82 RegisterAccessWrapper raw = {s, n};
83 return raw;
84 }
85
86 __finl const float operator[](int n) const
87 {
88 return s[n];
89 }
90#elif _MSC_VER
91 // on msvc returning a ref to a sub-register is possible
92 __finl float &operator[](int n)
93 {
94 return s.n128_f32[n];
95 }
96
97 __finl const float operator[](int n) const
98 {
99 return s.n128_f32[n];
100 }
101#endif
102};
103
105{
106 return vdupq_n_f32(x);
107}
108
110{
111 return vld1q_f32(x);
112}
113
114__finl void __vecc store_aligned(const float_x4 &a, float *x)
115{
116 vst1q_f32(x, a.s);
117}
118
120{
121 return vuzp1q_f32(a.s, b.s);
122}
123
125{
126 return vuzp2q_f32(a.s, b.s);
127}
128
130{
131 return vaddq_f32(a.s, b.s);
132}
133
135{
136 return vsubq_f32(a.s, b.s);
137}
138
140{
141 return vmulq_f32(a.s, b.s);
142}
143
145{
146 return vsqrtq_f32(a.s);
147}
148
149__finl float __vecc rint(float a)
150{
151 return std::rint(a);
152}
153
155{
156 return vrndnq_f32(a.s);
157}
158
159} // namespace staffpad::audio::simd
#define __finl
#define __vecc
__finl float_x4 __vecc float_x4_from_float(float x)
__finl float_x4 __vecc operator+(float_x4 a, float_x4 b)
__finl float_x4 __vecc operator-(float_x4 a, float_x4 b)
__finl float __vecc rint(float a)
__finl float_x4 __vecc unzip1(const float_x4 &a, const float_x4 &b)
__finl float_x4 __vecc float_x4_load_aligned(const float *x)
__finl void __vecc store_aligned(const float_x4 &a, float *x)
__finl float_x4 __vecc sqrt(const float_x4 &a)
__finl float_x4 __vecc operator*(float_x4 a, float_x4 b)
__finl float_x4 __vecc rint(const float_x4 &a)
__finl float_x4 __vecc unzip2(const float_x4 &a, const float_x4 &b)
__finl float & operator[](int n)
__finl float_x4(const float32x4_t &val)
__finl float_x4(float val)
enables math like: float_x4 a = 0.5f * float_x4{1.f, 2.f, 3.f, 4.f};
__finl float_x4(float v0, float v1, float v2, float v3)
enables assignments like: float_x4 a = {1.f, 2.f, 3.f, 4.f};