blob: d83c06d4eba08d0943c80ae3721cee1183ac97d9 [file] [log] [blame]
dario mambro58504632020-03-24 14:49:50 +01001/* Copyright (c) 2013 Julien Pommier ( pommier@modartt.com )
2
3 Based on original fortran 77 code from FFTPACKv4 from NETLIB,
4 authored by Dr Paul Swarztrauber of NCAR, in 1985.
5
6 As confirmed by the NCAR fftpack software curators, the following
7 FFTPACKv5 license applies to FFTPACKv4 sources. My changes are
8 released under the same terms.
9
10 FFTPACK license:
11
12 http://www.cisl.ucar.edu/css/software/fftpack5/ftpk.html
13
14 Copyright (c) 2004 the University Corporation for Atmospheric
15 Research ("UCAR"). All rights reserved. Developed by NCAR's
16 Computational and Information Systems Laboratory, UCAR,
17 www.cisl.ucar.edu.
18
19 Redistribution and use of the Software in source and binary forms,
20 with or without modification, is permitted provided that the
21 following conditions are met:
22
23 - Neither the names of NCAR's Computational and Information Systems
24 Laboratory, the University Corporation for Atmospheric Research,
25 nor the names of its sponsors or contributors may be used to
26 endorse or promote products derived from this Software without
27 specific prior written permission.
28
29 - Redistributions of source code must retain the above copyright
30 notices, this list of conditions, and the disclaimer below.
31
32 - Redistributions in binary form must reproduce the above copyright
33 notice, this list of conditions, and the disclaimer below in the
34 documentation and/or other materials provided with the
35 distribution.
36
37 THIS SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
38 EXPRESS OR IMPLIED, INCLUDING, BUT NOT LIMITED TO THE WARRANTIES OF
39 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
40 NONINFRINGEMENT. IN NO EVENT SHALL THE CONTRIBUTORS OR COPYRIGHT
41 HOLDERS BE LIABLE FOR ANY CLAIM, INDIRECT, INCIDENTAL, SPECIAL,
42 EXEMPLARY, OR CONSEQUENTIAL DAMAGES OR OTHER LIABILITY, WHETHER IN AN
43 ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
44 CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS WITH THE
45 SOFTWARE.
46*/
47/*
48 NOTE: This file is adapted from Julien Pommier's original PFFFT,
49 which works on 32 bit floating point precision using SSE instructions,
50 to work with 64 bit floating point precision using AVX instructions.
51 Author: Dario Mambro @ https://github.com/unevens/pffft
52*/
53/*
54 PFFFT : a Pretty Fast FFT.
55
56 This is basically an adaptation of the single precision fftpack
57 (v4) as found on netlib taking advantage of SIMD instruction found
58 on cpus such as intel x86 (SSE1), powerpc (Altivec), and arm (NEON).
59
60 For architectures where no SIMD instruction is available, the code
61 falls back to a scalar version.
62
63 Restrictions:
64
hayati ayguen01d26a72020-03-26 09:02:09 +010065 - 1D transforms only, with 64-bit double precision.
dario mambro58504632020-03-24 14:49:50 +010066
67 - supports only transforms for inputs of length N of the form
68 N=(2^a)*(3^b)*(5^c), a >= 5, b >=0, c >= 0 (32, 48, 64, 96, 128,
69 144, 160, etc are all acceptable lengths). Performance is best for
70 128<=N<=8192.
71
72 - all (double*) pointers in the functions below are expected to
73 have an "simd-compatible" alignment, that is 32 bytes on x86 and
74 powerpc CPUs.
75
76 You can allocate such buffers with the functions
77 pffft_aligned_malloc / pffft_aligned_free (or with stuff like
78 posix_memalign..)
79
80*/
81
82#ifndef PFFFT_DOUBLE_H
83#define PFFFT_DOUBLE_H
84
hayati ayguenc974c1d2020-03-29 03:39:30 +020085#include <stddef.h> /* for size_t */
dario mambro58504632020-03-24 14:49:50 +010086
87#ifdef __cplusplus
88extern "C" {
89#endif
90
91 /* opaque struct holding internal stuff (precomputed twiddle factors)
92 this struct can be shared by many threads as it contains only
93 read-only data.
94 */
dario mambrob42ce922020-03-24 20:30:39 +010095 typedef struct PFFFTD_Setup PFFFTD_Setup;
dario mambro58504632020-03-24 14:49:50 +010096
hayati ayguen01d26a72020-03-26 09:02:09 +010097#ifndef PFFFT_COMMON_ENUMS
98#define PFFFT_COMMON_ENUMS
99
dario mambro58504632020-03-24 14:49:50 +0100100 /* direction of the transform */
hayati ayguen01d26a72020-03-26 09:02:09 +0100101 typedef enum { PFFFT_FORWARD, PFFFT_BACKWARD } pffft_direction_t;
dario mambro58504632020-03-24 14:49:50 +0100102
103 /* type of transform */
hayati ayguen01d26a72020-03-26 09:02:09 +0100104 typedef enum { PFFFT_REAL, PFFFT_COMPLEX } pffft_transform_t;
105
106#endif
dario mambro58504632020-03-24 14:49:50 +0100107
108 /*
109 prepare for performing transforms of size N -- the returned
dario mambrob42ce922020-03-24 20:30:39 +0100110 PFFFTD_Setup structure is read-only so it can safely be shared by
dario mambro58504632020-03-24 14:49:50 +0100111 multiple concurrent threads.
112 */
hayati ayguen01d26a72020-03-26 09:02:09 +0100113 PFFFTD_Setup *pffftd_new_setup(int N, pffft_transform_t transform);
dario mambrob42ce922020-03-24 20:30:39 +0100114 void pffftd_destroy_setup(PFFFTD_Setup *);
dario mambro58504632020-03-24 14:49:50 +0100115 /*
116 Perform a Fourier transform , The z-domain data is stored in the
117 most efficient order for transforming it back, or using it for
118 convolution. If you need to have its content sorted in the
119 "usual" way, that is as an array of interleaved complex numbers,
120 either use pffft_transform_ordered , or call pffft_zreorder after
121 the forward fft, and before the backward fft.
122
123 Transforms are not scaled: PFFFT_BACKWARD(PFFFT_FORWARD(x)) = N*x.
124 Typically you will want to scale the backward transform by 1/N.
125
126 The 'work' pointer should point to an area of N (2*N for complex
127 fft) doubles, properly aligned. If 'work' is NULL, then stack will
128 be used instead (this is probably the best strategy for small
hayati ayguen01d26a72020-03-26 09:02:09 +0100129 FFTs, say for N < 16384). Threads usually have a small stack, that
130 there's no sufficient amount of memory, usually leading to a crash!
131 Use the heap with pffft_aligned_malloc() in this case.
dario mambro58504632020-03-24 14:49:50 +0100132
133 input and output may alias.
134 */
hayati ayguen01d26a72020-03-26 09:02:09 +0100135 void pffftd_transform(PFFFTD_Setup *setup, const double *input, double *output, double *work, pffft_direction_t direction);
dario mambro58504632020-03-24 14:49:50 +0100136
137 /*
138 Similar to pffft_transform, but makes sure that the output is
139 ordered as expected (interleaved complex numbers). This is
140 similar to calling pffft_transform and then pffft_zreorder.
141
142 input and output may alias.
143 */
hayati ayguen01d26a72020-03-26 09:02:09 +0100144 void pffftd_transform_ordered(PFFFTD_Setup *setup, const double *input, double *output, double *work, pffft_direction_t direction);
dario mambro58504632020-03-24 14:49:50 +0100145
146 /*
147 call pffft_zreorder(.., PFFFT_FORWARD) after pffft_transform(...,
148 PFFFT_FORWARD) if you want to have the frequency components in
149 the correct "canonical" order, as interleaved complex numbers.
150
151 (for real transforms, both 0-frequency and half frequency
152 components, which are real, are assembled in the first entry as
153 F(0)+i*F(n/2+1). Note that the original fftpack did place
154 F(n/2+1) at the end of the arrays).
155
156 input and output should not alias.
157 */
hayati ayguen01d26a72020-03-26 09:02:09 +0100158 void pffftd_zreorder(PFFFTD_Setup *setup, const double *input, double *output, pffft_direction_t direction);
dario mambro58504632020-03-24 14:49:50 +0100159
160 /*
161 Perform a multiplication of the frequency components of dft_a and
162 dft_b and accumulate them into dft_ab. The arrays should have
163 been obtained with pffft_transform(.., PFFFT_FORWARD) and should
164 *not* have been reordered with pffft_zreorder (otherwise just
165 perform the operation yourself as the dft coefs are stored as
166 interleaved complex numbers).
167
168 the operation performed is: dft_ab += (dft_a * fdt_b)*scaling
169
170 The dft_a, dft_b and dft_ab pointers may alias.
171 */
dario mambrob42ce922020-03-24 20:30:39 +0100172 void pffftd_zconvolve_accumulate(PFFFTD_Setup *setup, const double *dft_a, const double *dft_b, double *dft_ab, double scaling);
dario mambro58504632020-03-24 14:49:50 +0100173
Dario Mambroadd5b1d2020-03-27 20:02:03 +0100174 /*
hayati aygueneeb17fc2020-04-13 04:02:07 +0200175 Perform a multiplication of the frequency components of dft_a and
176 dft_b and put result in dft_ab. The arrays should have
177 been obtained with pffft_transform(.., PFFFT_FORWARD) and should
178 *not* have been reordered with pffft_zreorder (otherwise just
179 perform the operation yourself as the dft coefs are stored as
180 interleaved complex numbers).
181
182 the operation performed is: dft_ab = (dft_a * fdt_b)*scaling
183
184 The dft_a, dft_b and dft_ab pointers may alias.
Dario Mambroadd5b1d2020-03-27 20:02:03 +0100185 */
186 void pffftd_zconvolve_no_accu(PFFFTD_Setup *setup, const double *dft_a, const double *dft_b, double*dft_ab, double scaling);
187
hayati aygueneeb17fc2020-04-13 04:02:07 +0200188 /* return 4 or 1 wether support AVX instructions was enabled when building pffft-double.c */
189 int pffftd_simd_size();
190
191 /* return string identifier of used architecture (AVX/..) */
192 const char * pffftd_simd_arch();
193
194
195 /* following functions are identical to the pffft_ functions */
Dario Mambroadd5b1d2020-03-27 20:02:03 +0100196
dario mambro58504632020-03-24 14:49:50 +0100197 /* simple helper to get minimum possible fft size */
hayati ayguen01d26a72020-03-26 09:02:09 +0100198 int pffftd_min_fft_size(pffft_transform_t transform);
dario mambro58504632020-03-24 14:49:50 +0100199
200 /* simple helper to determine next power of 2
hayati ayguen01d26a72020-03-26 09:02:09 +0100201 - without inexact/rounding floating point operations
hayati ayguen653f77b2020-03-25 07:46:31 +0100202 */
dario mambrob42ce922020-03-24 20:30:39 +0100203 int pffftd_next_power_of_two(int N);
dario mambro58504632020-03-24 14:49:50 +0100204
205 /* simple helper to determine if power of 2 - returns bool */
dario mambrob42ce922020-03-24 20:30:39 +0100206 int pffftd_is_power_of_two(int N);
dario mambro58504632020-03-24 14:49:50 +0100207
208 /*
209 the double buffers must have the correct alignment (32-byte boundary
210 on intel and powerpc). This function may be used to obtain such
211 correctly aligned buffers.
212 */
dario mambrob42ce922020-03-24 20:30:39 +0100213 void *pffftd_aligned_malloc(size_t nb_bytes);
214 void pffftd_aligned_free(void *);
dario mambro58504632020-03-24 14:49:50 +0100215
dario mambro58504632020-03-24 14:49:50 +0100216#ifdef __cplusplus
217}
218#endif
219
hayati ayguen01d26a72020-03-26 09:02:09 +0100220#endif /* PFFFT_DOUBLE_H */
221