Audacity 3.2.0
SimdTypes_neon.h
Go to the documentation of this file.
1/*
2 Neon version of SIMD types.
3 */
4
5#pragma once
6
7#if _MSC_VER
8#include <arm64_neon.h>
9#define __finl __forceinline
10#define __vecc __vectorcall
11#else
12#include <arm_neon.h>
13#define __finl inline __attribute__((always_inline))
14#define __vecc
15#endif
16
17namespace staffpad::audio::simd {
18
20{
21 float32x4_t s;
22
24 {
25 }
26
28 __finl float_x4(float val)
29 {
30 s = vdupq_n_f32(val);
31 }
32
33 __finl float_x4(const float32x4_t &val) : s(val)
34 {
35 }
36
38 __finl float_x4(float v0, float v1, float v2, float v3)
39 {
40#if _MSC_VER // aggregate initializer won't work unless we have {.n128_f32 = ..} in c++20
41 s.n128_f32[0] = v0;
42 s.n128_f32[1] = v1;
43 s.n128_f32[2] = v2;
44 s.n128_f32[3] = v3;
45#else
46 s = {v0, v1, v2, v3};
47#endif
48 }
49
50#if __clang__
51private:
52 // this helper class allows writing to the single registers for clang
53 // __mm128 is a built-in type -> we can't return a float& reference.
54 // this is just syntax sugar and clang will remove it during builds.
55 //
56 // it allows to write
57 // float_x4 a;
58 // a[1] = 2.f;
59 struct RegisterAccessWrapper
60 {
61 float32x4_t &val;
62 int i;
63
64 void operator=(float x)
65 {
66 val[i] = x;
67 }
68 operator float() noexcept
69 {
70 return val[i];
71 }
72 };
73
74public:
75 __finl RegisterAccessWrapper operator[](int n)
76 {
77 RegisterAccessWrapper raw = {s, n};
78 return raw;
79 }
80
81 __finl const float operator[](int n) const
82 {
83 return s[n];
84 }
85#elif _MSC_VER
86 // on msvc returning a ref to a sub-register is possible
87 __finl float &operator[](int n)
88 {
89 return s.n128_f32[n];
90 }
91
92 __finl const float operator[](int n) const
93 {
94 return s.n128_f32[n];
95 }
96#endif
97};
98
100{
101 return vdupq_n_f32(x);
102}
103
105{
106 return vld1q_f32(x);
107}
108
109__finl void __vecc store_aligned(const float_x4 &a, float *x)
110{
111 vst1q_f32(x, a.s);
112}
113
115{
116 return vuzp1q_f32(a.s, b.s);
117}
118
120{
121 return vuzp2q_f32(a.s, b.s);
122}
123
125{
126 return vaddq_f32(a.s, b.s);
127}
128
130{
131 return vsubq_f32(a.s, b.s);
132}
133
135{
136 return vmulq_f32(a.s, b.s);
137}
138
140{
141 return vsqrtq_f32(a.s);
142}
143
145{
146 return vrndnq_f32(a.s);
147}
148
149} // namespace staffpad::audio::simd
#define __finl
#define __vecc
__finl float_x4 __vecc float_x4_from_float(float x)
__finl float_x4 __vecc operator+(float_x4 a, float_x4 b)
__finl float_x4 __vecc operator-(float_x4 a, float_x4 b)
__finl float_x4 __vecc unzip1(const float_x4 &a, const float_x4 &b)
__finl float_x4 __vecc float_x4_load_aligned(const float *x)
__finl void __vecc store_aligned(const float_x4 &a, float *x)
__finl float_x4 __vecc sqrt(const float_x4 &a)
__finl float_x4 __vecc operator*(float_x4 a, float_x4 b)
__finl float_x4 __vecc rint(const float_x4 &a)
__finl float_x4 __vecc unzip2(const float_x4 &a, const float_x4 &b)
__finl float & operator[](int n)
__finl float_x4(const float32x4_t &val)
__finl float_x4(float val)
enables math like: float_x4 a = 0.5f * float_x4{1.f, 2.f, 3.f, 4.f};
__finl float_x4(float v0, float v1, float v2, float v3)
enables assignments like: float_x4 a = {1.f, 2.f, 3.f, 4.f};