Audacity 3.2.0
SimdTypes.h
Go to the documentation of this file.
1/*
2 Simd-types for parallel dsp processing.
3 Aligned memory allocation for simd vectors.
4 */
5
6#pragma once
7#include <cassert>
8#include <cstdlib>
9
10#if _MSC_VER
11#define __finl __forceinline
12#define __vecc __vectorcall
13#else
14#define __finl inline __attribute__((always_inline))
15#define __vecc
16#endif
17
18#if defined(__SSE2__) || (defined(_M_AMD64) || defined(_M_X64)) || (defined(_M_IX86_FP) && _M_IX86_FP >= 2)
19#include "SimdTypes_sse2.h"
20#elif defined(__arm64__) || defined(__aarch64__) || defined(_M_ARM64)
21#include "SimdTypes_neon.h"
22#else
23#include "SimdTypes_scalar.h"
24#endif
25
27
29inline void *aligned_malloc(size_t required_bytes, size_t alignment)
30{
31 auto offset = alignment - 1 + sizeof(void *);
32 auto p1 = std::malloc(required_bytes + offset);
33 if (p1 == nullptr)
34 return nullptr;
35 // figure out aligned position
36 void *p2 = (void *)(((size_t)(p1) + offset) & ~(alignment - 1));
37 // write malloced pointer in front of aligned data
38 ((void **)p2)[-1] = p1;
39 return p2;
40}
41
43inline void aligned_free(void *p)
44{
45 if (p)
46 free(((void **)p)[-1]);
47}
48
50template <typename cls>
51inline cls *aligned_new(int alignment)
52{
53 void *mem = aligned_malloc(sizeof(cls), alignment);
54 return new (mem) cls();
55}
56
58template <typename cls>
59inline void aligned_delete(cls *obj)
60{
61 if (obj != nullptr)
62 {
63 obj->~cls();
64 aligned_free((void *)obj);
65 }
66}
67
68template <typename T>
69inline bool is_aligned(T *obj, int alignment)
70{
71 return (((size_t)obj) & (alignment - 1)) == 0;
72}
73
85// two buffers read/write
86template <typename fnc>
87__finl void perform_parallel_simd_aligned(float *a, float *b, int n, const fnc &f)
88{
89 // fnc& f needs to be a lambda of type [](auto &a, auto &b){}.
90 // the autos will be float_x4/float
91 constexpr int N = 4;
92 constexpr int byte_size = sizeof(float);
93
94 assert(is_aligned(a, N * byte_size) && is_aligned(b, N * byte_size));
95
96 for (int i = 0; i <= n - N; i += N)
97 {
98 auto x = float_x4_load_aligned(a + i);
99 auto y = float_x4_load_aligned(b + i);
100 f(x, y);
101 store_aligned(x, a + i);
102 store_aligned(y, b + i);
103 }
104 // deal with last partial packet
105 for (int i = n & (~(N - 1)); i < n; ++i)
106 f(a[i], b[i]);
107}
108
110template <typename fnc>
111__finl void perform_parallel_simd_aligned(float *a, int n, const fnc &f)
112{
113 // fnc& f needs to be a lambda of type [](auto &a){}.
114 constexpr int N = 4;
115 constexpr int byte_size = sizeof(float);
116 assert(is_aligned(a, N * byte_size));
117
118 for (int i = 0; i <= n - N; i += N)
119 {
120 auto x = float_x4_load_aligned(a + i);
121 f(x);
122 store_aligned(x, a + i);
123 }
124 // deal with last partial packet
125 for (int i = n & (~(N - 1)); i < n; ++i)
126 f(a[i]);
127}
128
129} // namespace staffpad::audio::simd
#define __finl
Definition: SimdTypes.h:14
cls * aligned_new(int alignment)
create a c++ class at an memory-aligned spot that needs to be deleted using aligned_delete
Definition: SimdTypes.h:51
void aligned_free(void *p)
free memory allocated with aligned_malloc
Definition: SimdTypes.h:43
void * aligned_malloc(size_t required_bytes, size_t alignment)
reserve aligned memory. Needs to be freed with aligned_free()
Definition: SimdTypes.h:29
bool is_aligned(T *obj, int alignment)
Definition: SimdTypes.h:69
__finl void perform_parallel_simd_aligned(float *a, float *b, int n, const fnc &f)
Definition: SimdTypes.h:87
__finl float_x4 __vecc float_x4_load_aligned(const float *x)
__finl void __vecc store_aligned(const float_x4 &a, float *x)
void aligned_delete(cls *obj)
Definition: SimdTypes.h:59
void free(void *ptr)
Definition: VectorOps.h:34