11#define __finl __forceinline
12#define __vecc __vectorcall
14#define __finl inline __attribute__((always_inline))
18#if defined(__SSE2__) || (defined(_M_AMD64) || defined(_M_X64)) || (defined(_M_IX86_FP) && _M_IX86_FP >= 2)
20#elif defined(__arm64__) || defined(__aarch64__) || defined(_M_ARM64)
31 auto offset = alignment - 1 +
sizeof(
void *);
32 auto p1 = std::malloc(required_bytes + offset);
36 void *p2 = (
void *)(((
size_t)(p1) + offset) & ~(alignment - 1));
38 ((
void **)p2)[-1] = p1;
46 free(((
void **)p)[-1]);
50template <
typename cls>
54 return new (mem) cls();
58template <
typename cls>
71 return (((
size_t)obj) & (alignment - 1)) == 0;
86template <
typename fnc>
92 constexpr int byte_size =
sizeof(float);
96 for (
int i = 0; i <= n - N; i += N)
105 for (
int i = n & (~(N - 1)); i < n; ++i)
110template <
typename fnc>
115 constexpr int byte_size =
sizeof(float);
118 for (
int i = 0; i <= n - N; i += N)
125 for (
int i = n & (~(N - 1)); i < n; ++i)
cls * aligned_new(int alignment)
create a c++ class at an memory-aligned spot that needs to be deleted using aligned_delete
void aligned_free(void *p)
free memory allocated with aligned_malloc
void * aligned_malloc(size_t required_bytes, size_t alignment)
reserve aligned memory. Needs to be freed with aligned_free()
bool is_aligned(T *obj, int alignment)
__finl void perform_parallel_simd_aligned(float *a, float *b, int n, const fnc &f)
__finl float_x4 __vecc float_x4_load_aligned(const float *x)
__finl void __vecc store_aligned(const float_x4 &a, float *x)
void aligned_delete(cls *obj)