hayati ayguen | eeb17fc | 2020-04-13 04:02:07 +0200 | [diff] [blame] | 1 | |
| 2 | #include "pffft.h" |
| 3 | |
| 4 | #include <stdlib.h> |
| 5 | |
| 6 | /* SSE and co like 16-bytes aligned pointers |
| 7 | * with a 64-byte alignment, we are even aligned on L2 cache lines... */ |
| 8 | #define MALLOC_V4SF_ALIGNMENT 64 |
| 9 | |
| 10 | static void * Valigned_malloc(size_t nb_bytes) { |
| 11 | void *p, *p0 = malloc(nb_bytes + MALLOC_V4SF_ALIGNMENT); |
| 12 | if (!p0) return (void *) 0; |
| 13 | p = (void *) (((size_t) p0 + MALLOC_V4SF_ALIGNMENT) & (~((size_t) (MALLOC_V4SF_ALIGNMENT-1)))); |
| 14 | *((void **) p - 1) = p0; |
| 15 | return p; |
| 16 | } |
| 17 | |
| 18 | static void Valigned_free(void *p) { |
| 19 | if (p) free(*((void **) p - 1)); |
| 20 | } |
| 21 | |
| 22 | |
| 23 | static int next_power_of_two(int N) { |
| 24 | /* https://graphics.stanford.edu/~seander/bithacks.html#RoundUpPowerOf2 */ |
| 25 | /* compute the next highest power of 2 of 32-bit v */ |
| 26 | unsigned v = N; |
| 27 | v--; |
| 28 | v |= v >> 1; |
| 29 | v |= v >> 2; |
| 30 | v |= v >> 4; |
| 31 | v |= v >> 8; |
| 32 | v |= v >> 16; |
| 33 | v++; |
| 34 | return v; |
| 35 | } |
| 36 | |
| 37 | static int is_power_of_two(int N) { |
| 38 | /* https://graphics.stanford.edu/~seander/bithacks.html#DetermineIfPowerOf2 */ |
| 39 | int f = N && !(N & (N - 1)); |
| 40 | return f; |
| 41 | } |
| 42 | |
| 43 | static int min_fft_size(pffft_transform_t transform) { |
| 44 | /* unfortunately, the fft size must be a multiple of 16 for complex FFTs |
| 45 | and 32 for real FFTs -- a lot of stuff would need to be rewritten to |
| 46 | handle other cases (or maybe just switch to a scalar fft, I don't know..) */ |
| 47 | int simdSz = pffft_simd_size(); |
| 48 | if (transform == PFFFT_REAL) |
| 49 | return ( 2 * simdSz * simdSz ); |
| 50 | else if (transform == PFFFT_COMPLEX) |
| 51 | return ( simdSz * simdSz ); |
| 52 | else |
| 53 | return 1; |
| 54 | } |
| 55 | |
| 56 | |
| 57 | void *pffft_aligned_malloc(size_t nb_bytes) { return Valigned_malloc(nb_bytes); } |
| 58 | void pffft_aligned_free(void *p) { Valigned_free(p); } |
| 59 | int pffft_next_power_of_two(int N) { return next_power_of_two(N); } |
| 60 | int pffft_is_power_of_two(int N) { return is_power_of_two(N); } |
| 61 | int pffft_min_fft_size(pffft_transform_t transform) { return min_fft_size(transform); } |
| 62 | |
| 63 | void *pffftd_aligned_malloc(size_t nb_bytes) { return Valigned_malloc(nb_bytes); } |
| 64 | void pffftd_aligned_free(void *p) { Valigned_free(p); } |
| 65 | int pffftd_next_power_of_two(int N) { return next_power_of_two(N); } |
| 66 | int pffftd_is_power_of_two(int N) { return is_power_of_two(N); } |
| 67 | int pffftd_min_fft_size(pffft_transform_t transform) { return min_fft_size(transform); } |
| 68 | |