dario mambro | 5850463 | 2020-03-24 14:49:50 +0100 | [diff] [blame] | 1 | /* Copyright (c) 2013 Julien Pommier ( pommier@modartt.com ) |
| 2 | |
| 3 | Based on original fortran 77 code from FFTPACKv4 from NETLIB, |
| 4 | authored by Dr Paul Swarztrauber of NCAR, in 1985. |
| 5 | |
| 6 | As confirmed by the NCAR fftpack software curators, the following |
| 7 | FFTPACKv5 license applies to FFTPACKv4 sources. My changes are |
| 8 | released under the same terms. |
| 9 | |
| 10 | FFTPACK license: |
| 11 | |
| 12 | http://www.cisl.ucar.edu/css/software/fftpack5/ftpk.html |
| 13 | |
| 14 | Copyright (c) 2004 the University Corporation for Atmospheric |
| 15 | Research ("UCAR"). All rights reserved. Developed by NCAR's |
| 16 | Computational and Information Systems Laboratory, UCAR, |
| 17 | www.cisl.ucar.edu. |
| 18 | |
| 19 | Redistribution and use of the Software in source and binary forms, |
| 20 | with or without modification, is permitted provided that the |
| 21 | following conditions are met: |
| 22 | |
| 23 | - Neither the names of NCAR's Computational and Information Systems |
| 24 | Laboratory, the University Corporation for Atmospheric Research, |
| 25 | nor the names of its sponsors or contributors may be used to |
| 26 | endorse or promote products derived from this Software without |
| 27 | specific prior written permission. |
| 28 | |
| 29 | - Redistributions of source code must retain the above copyright |
| 30 | notices, this list of conditions, and the disclaimer below. |
| 31 | |
| 32 | - Redistributions in binary form must reproduce the above copyright |
| 33 | notice, this list of conditions, and the disclaimer below in the |
| 34 | documentation and/or other materials provided with the |
| 35 | distribution. |
| 36 | |
| 37 | THIS SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, |
| 38 | EXPRESS OR IMPLIED, INCLUDING, BUT NOT LIMITED TO THE WARRANTIES OF |
| 39 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND |
| 40 | NONINFRINGEMENT. IN NO EVENT SHALL THE CONTRIBUTORS OR COPYRIGHT |
| 41 | HOLDERS BE LIABLE FOR ANY CLAIM, INDIRECT, INCIDENTAL, SPECIAL, |
| 42 | EXEMPLARY, OR CONSEQUENTIAL DAMAGES OR OTHER LIABILITY, WHETHER IN AN |
| 43 | ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN |
| 44 | CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS WITH THE |
| 45 | SOFTWARE. |
| 46 | */ |
| 47 | /* |
| 48 | NOTE: This file is adapted from Julien Pommier's original PFFFT, |
| 49 | which works on 32 bit floating point precision using SSE instructions, |
| 50 | to work with 64 bit floating point precision using AVX instructions. |
| 51 | Author: Dario Mambro @ https://github.com/unevens/pffft |
| 52 | */ |
| 53 | /* |
| 54 | PFFFT : a Pretty Fast FFT. |
| 55 | |
| 56 | This is basically an adaptation of the single precision fftpack |
| 57 | (v4) as found on netlib taking advantage of SIMD instruction found |
| 58 | on cpus such as intel x86 (SSE1), powerpc (Altivec), and arm (NEON). |
| 59 | |
| 60 | For architectures where no SIMD instruction is available, the code |
| 61 | falls back to a scalar version. |
| 62 | |
| 63 | Restrictions: |
| 64 | |
hayati ayguen | 01d26a7 | 2020-03-26 09:02:09 +0100 | [diff] [blame] | 65 | - 1D transforms only, with 64-bit double precision. |
dario mambro | 5850463 | 2020-03-24 14:49:50 +0100 | [diff] [blame] | 66 | |
| 67 | - supports only transforms for inputs of length N of the form |
| 68 | N=(2^a)*(3^b)*(5^c), a >= 5, b >=0, c >= 0 (32, 48, 64, 96, 128, |
| 69 | 144, 160, etc are all acceptable lengths). Performance is best for |
| 70 | 128<=N<=8192. |
| 71 | |
| 72 | - all (double*) pointers in the functions below are expected to |
| 73 | have an "simd-compatible" alignment, that is 32 bytes on x86 and |
| 74 | powerpc CPUs. |
| 75 | |
| 76 | You can allocate such buffers with the functions |
| 77 | pffft_aligned_malloc / pffft_aligned_free (or with stuff like |
| 78 | posix_memalign..) |
| 79 | |
| 80 | */ |
| 81 | |
| 82 | #ifndef PFFFT_DOUBLE_H |
| 83 | #define PFFFT_DOUBLE_H |
| 84 | |
hayati ayguen | c974c1d | 2020-03-29 03:39:30 +0200 | [diff] [blame] | 85 | #include <stddef.h> /* for size_t */ |
dario mambro | 5850463 | 2020-03-24 14:49:50 +0100 | [diff] [blame] | 86 | |
| 87 | #ifdef __cplusplus |
| 88 | extern "C" { |
| 89 | #endif |
| 90 | |
| 91 | /* opaque struct holding internal stuff (precomputed twiddle factors) |
| 92 | this struct can be shared by many threads as it contains only |
| 93 | read-only data. |
| 94 | */ |
dario mambro | b42ce92 | 2020-03-24 20:30:39 +0100 | [diff] [blame] | 95 | typedef struct PFFFTD_Setup PFFFTD_Setup; |
dario mambro | 5850463 | 2020-03-24 14:49:50 +0100 | [diff] [blame] | 96 | |
hayati ayguen | 01d26a7 | 2020-03-26 09:02:09 +0100 | [diff] [blame] | 97 | #ifndef PFFFT_COMMON_ENUMS |
| 98 | #define PFFFT_COMMON_ENUMS |
| 99 | |
dario mambro | 5850463 | 2020-03-24 14:49:50 +0100 | [diff] [blame] | 100 | /* direction of the transform */ |
hayati ayguen | 01d26a7 | 2020-03-26 09:02:09 +0100 | [diff] [blame] | 101 | typedef enum { PFFFT_FORWARD, PFFFT_BACKWARD } pffft_direction_t; |
dario mambro | 5850463 | 2020-03-24 14:49:50 +0100 | [diff] [blame] | 102 | |
| 103 | /* type of transform */ |
hayati ayguen | 01d26a7 | 2020-03-26 09:02:09 +0100 | [diff] [blame] | 104 | typedef enum { PFFFT_REAL, PFFFT_COMPLEX } pffft_transform_t; |
| 105 | |
| 106 | #endif |
dario mambro | 5850463 | 2020-03-24 14:49:50 +0100 | [diff] [blame] | 107 | |
| 108 | /* |
| 109 | prepare for performing transforms of size N -- the returned |
dario mambro | b42ce92 | 2020-03-24 20:30:39 +0100 | [diff] [blame] | 110 | PFFFTD_Setup structure is read-only so it can safely be shared by |
dario mambro | 5850463 | 2020-03-24 14:49:50 +0100 | [diff] [blame] | 111 | multiple concurrent threads. |
| 112 | */ |
hayati ayguen | 01d26a7 | 2020-03-26 09:02:09 +0100 | [diff] [blame] | 113 | PFFFTD_Setup *pffftd_new_setup(int N, pffft_transform_t transform); |
dario mambro | b42ce92 | 2020-03-24 20:30:39 +0100 | [diff] [blame] | 114 | void pffftd_destroy_setup(PFFFTD_Setup *); |
dario mambro | 5850463 | 2020-03-24 14:49:50 +0100 | [diff] [blame] | 115 | /* |
| 116 | Perform a Fourier transform , The z-domain data is stored in the |
| 117 | most efficient order for transforming it back, or using it for |
| 118 | convolution. If you need to have its content sorted in the |
| 119 | "usual" way, that is as an array of interleaved complex numbers, |
| 120 | either use pffft_transform_ordered , or call pffft_zreorder after |
| 121 | the forward fft, and before the backward fft. |
| 122 | |
| 123 | Transforms are not scaled: PFFFT_BACKWARD(PFFFT_FORWARD(x)) = N*x. |
| 124 | Typically you will want to scale the backward transform by 1/N. |
| 125 | |
| 126 | The 'work' pointer should point to an area of N (2*N for complex |
| 127 | fft) doubles, properly aligned. If 'work' is NULL, then stack will |
| 128 | be used instead (this is probably the best strategy for small |
hayati ayguen | 01d26a7 | 2020-03-26 09:02:09 +0100 | [diff] [blame] | 129 | FFTs, say for N < 16384). Threads usually have a small stack, that |
| 130 | there's no sufficient amount of memory, usually leading to a crash! |
| 131 | Use the heap with pffft_aligned_malloc() in this case. |
dario mambro | 5850463 | 2020-03-24 14:49:50 +0100 | [diff] [blame] | 132 | |
| 133 | input and output may alias. |
| 134 | */ |
hayati ayguen | 01d26a7 | 2020-03-26 09:02:09 +0100 | [diff] [blame] | 135 | void pffftd_transform(PFFFTD_Setup *setup, const double *input, double *output, double *work, pffft_direction_t direction); |
dario mambro | 5850463 | 2020-03-24 14:49:50 +0100 | [diff] [blame] | 136 | |
| 137 | /* |
| 138 | Similar to pffft_transform, but makes sure that the output is |
| 139 | ordered as expected (interleaved complex numbers). This is |
| 140 | similar to calling pffft_transform and then pffft_zreorder. |
| 141 | |
| 142 | input and output may alias. |
| 143 | */ |
hayati ayguen | 01d26a7 | 2020-03-26 09:02:09 +0100 | [diff] [blame] | 144 | void pffftd_transform_ordered(PFFFTD_Setup *setup, const double *input, double *output, double *work, pffft_direction_t direction); |
dario mambro | 5850463 | 2020-03-24 14:49:50 +0100 | [diff] [blame] | 145 | |
| 146 | /* |
| 147 | call pffft_zreorder(.., PFFFT_FORWARD) after pffft_transform(..., |
| 148 | PFFFT_FORWARD) if you want to have the frequency components in |
| 149 | the correct "canonical" order, as interleaved complex numbers. |
| 150 | |
| 151 | (for real transforms, both 0-frequency and half frequency |
| 152 | components, which are real, are assembled in the first entry as |
| 153 | F(0)+i*F(n/2+1). Note that the original fftpack did place |
| 154 | F(n/2+1) at the end of the arrays). |
| 155 | |
| 156 | input and output should not alias. |
| 157 | */ |
hayati ayguen | 01d26a7 | 2020-03-26 09:02:09 +0100 | [diff] [blame] | 158 | void pffftd_zreorder(PFFFTD_Setup *setup, const double *input, double *output, pffft_direction_t direction); |
dario mambro | 5850463 | 2020-03-24 14:49:50 +0100 | [diff] [blame] | 159 | |
| 160 | /* |
| 161 | Perform a multiplication of the frequency components of dft_a and |
| 162 | dft_b and accumulate them into dft_ab. The arrays should have |
| 163 | been obtained with pffft_transform(.., PFFFT_FORWARD) and should |
| 164 | *not* have been reordered with pffft_zreorder (otherwise just |
| 165 | perform the operation yourself as the dft coefs are stored as |
| 166 | interleaved complex numbers). |
| 167 | |
| 168 | the operation performed is: dft_ab += (dft_a * fdt_b)*scaling |
| 169 | |
| 170 | The dft_a, dft_b and dft_ab pointers may alias. |
| 171 | */ |
dario mambro | b42ce92 | 2020-03-24 20:30:39 +0100 | [diff] [blame] | 172 | void pffftd_zconvolve_accumulate(PFFFTD_Setup *setup, const double *dft_a, const double *dft_b, double *dft_ab, double scaling); |
dario mambro | 5850463 | 2020-03-24 14:49:50 +0100 | [diff] [blame] | 173 | |
Dario Mambro | add5b1d | 2020-03-27 20:02:03 +0100 | [diff] [blame] | 174 | /* |
hayati ayguen | eeb17fc | 2020-04-13 04:02:07 +0200 | [diff] [blame] | 175 | Perform a multiplication of the frequency components of dft_a and |
| 176 | dft_b and put result in dft_ab. The arrays should have |
| 177 | been obtained with pffft_transform(.., PFFFT_FORWARD) and should |
| 178 | *not* have been reordered with pffft_zreorder (otherwise just |
| 179 | perform the operation yourself as the dft coefs are stored as |
| 180 | interleaved complex numbers). |
| 181 | |
| 182 | the operation performed is: dft_ab = (dft_a * fdt_b)*scaling |
| 183 | |
| 184 | The dft_a, dft_b and dft_ab pointers may alias. |
Dario Mambro | add5b1d | 2020-03-27 20:02:03 +0100 | [diff] [blame] | 185 | */ |
| 186 | void pffftd_zconvolve_no_accu(PFFFTD_Setup *setup, const double *dft_a, const double *dft_b, double*dft_ab, double scaling); |
| 187 | |
hayati ayguen | eeb17fc | 2020-04-13 04:02:07 +0200 | [diff] [blame] | 188 | /* return 4 or 1 wether support AVX instructions was enabled when building pffft-double.c */ |
| 189 | int pffftd_simd_size(); |
| 190 | |
| 191 | /* return string identifier of used architecture (AVX/..) */ |
| 192 | const char * pffftd_simd_arch(); |
| 193 | |
| 194 | |
| 195 | /* following functions are identical to the pffft_ functions */ |
Dario Mambro | add5b1d | 2020-03-27 20:02:03 +0100 | [diff] [blame] | 196 | |
dario mambro | 5850463 | 2020-03-24 14:49:50 +0100 | [diff] [blame] | 197 | /* simple helper to get minimum possible fft size */ |
hayati ayguen | 01d26a7 | 2020-03-26 09:02:09 +0100 | [diff] [blame] | 198 | int pffftd_min_fft_size(pffft_transform_t transform); |
dario mambro | 5850463 | 2020-03-24 14:49:50 +0100 | [diff] [blame] | 199 | |
| 200 | /* simple helper to determine next power of 2 |
hayati ayguen | 01d26a7 | 2020-03-26 09:02:09 +0100 | [diff] [blame] | 201 | - without inexact/rounding floating point operations |
hayati ayguen | 653f77b | 2020-03-25 07:46:31 +0100 | [diff] [blame] | 202 | */ |
dario mambro | b42ce92 | 2020-03-24 20:30:39 +0100 | [diff] [blame] | 203 | int pffftd_next_power_of_two(int N); |
dario mambro | 5850463 | 2020-03-24 14:49:50 +0100 | [diff] [blame] | 204 | |
| 205 | /* simple helper to determine if power of 2 - returns bool */ |
dario mambro | b42ce92 | 2020-03-24 20:30:39 +0100 | [diff] [blame] | 206 | int pffftd_is_power_of_two(int N); |
dario mambro | 5850463 | 2020-03-24 14:49:50 +0100 | [diff] [blame] | 207 | |
| 208 | /* |
| 209 | the double buffers must have the correct alignment (32-byte boundary |
| 210 | on intel and powerpc). This function may be used to obtain such |
| 211 | correctly aligned buffers. |
| 212 | */ |
dario mambro | b42ce92 | 2020-03-24 20:30:39 +0100 | [diff] [blame] | 213 | void *pffftd_aligned_malloc(size_t nb_bytes); |
| 214 | void pffftd_aligned_free(void *); |
dario mambro | 5850463 | 2020-03-24 14:49:50 +0100 | [diff] [blame] | 215 | |
dario mambro | 5850463 | 2020-03-24 14:49:50 +0100 | [diff] [blame] | 216 | #ifdef __cplusplus |
| 217 | } |
| 218 | #endif |
| 219 | |
hayati ayguen | 01d26a7 | 2020-03-26 09:02:09 +0100 | [diff] [blame] | 220 | #endif /* PFFFT_DOUBLE_H */ |
| 221 | |