blob: 31bb731010b8d8bc6cd0273694b40dab1c36624d [file] [log] [blame]
Julien Pommier432b3e82013-01-12 19:28:03 +01001/* Copyright (c) 2013 Julien Pommier ( pommier@modartt.com )
Julien Pommier370d2092011-11-19 18:04:25 +01002
3 Based on original fortran 77 code from FFTPACKv4 from NETLIB,
4 authored by Dr Paul Swarztrauber of NCAR, in 1985.
5
6 As confirmed by the NCAR fftpack software curators, the following
7 FFTPACKv5 license applies to FFTPACKv4 sources. My changes are
8 released under the same terms.
9
10 FFTPACK license:
11
12 http://www.cisl.ucar.edu/css/software/fftpack5/ftpk.html
13
14 Copyright (c) 2004 the University Corporation for Atmospheric
15 Research ("UCAR"). All rights reserved. Developed by NCAR's
16 Computational and Information Systems Laboratory, UCAR,
17 www.cisl.ucar.edu.
18
19 Redistribution and use of the Software in source and binary forms,
20 with or without modification, is permitted provided that the
21 following conditions are met:
22
23 - Neither the names of NCAR's Computational and Information Systems
24 Laboratory, the University Corporation for Atmospheric Research,
25 nor the names of its sponsors or contributors may be used to
26 endorse or promote products derived from this Software without
27 specific prior written permission.
28
29 - Redistributions of source code must retain the above copyright
30 notices, this list of conditions, and the disclaimer below.
31
32 - Redistributions in binary form must reproduce the above copyright
33 notice, this list of conditions, and the disclaimer below in the
34 documentation and/or other materials provided with the
35 distribution.
36
37 THIS SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
38 EXPRESS OR IMPLIED, INCLUDING, BUT NOT LIMITED TO THE WARRANTIES OF
39 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
40 NONINFRINGEMENT. IN NO EVENT SHALL THE CONTRIBUTORS OR COPYRIGHT
41 HOLDERS BE LIABLE FOR ANY CLAIM, INDIRECT, INCIDENTAL, SPECIAL,
42 EXEMPLARY, OR CONSEQUENTIAL DAMAGES OR OTHER LIABILITY, WHETHER IN AN
43 ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
44 CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS WITH THE
45 SOFTWARE.
46*/
47
48/*
49 PFFFT : a Pretty Fast FFT.
50
51 This is basically an adaptation of the single precision fftpack
52 (v4) as found on netlib taking advantage of SIMD instruction found
53 on cpus such as intel x86 (SSE1), powerpc (Altivec), and arm (NEON).
54
55 For architectures where no SIMD instruction is available, the code
56 falls back to a scalar version.
57
58 Restrictions:
59
60 - 1D transforms only, with 32-bit single precision.
61
62 - supports only transforms for inputs of length N of the form
Julien Pommier0302e8a2012-10-11 18:04:09 +020063 N=(2^a)*(3^b)*(5^c), a >= 5, b >=0, c >= 0 (32, 48, 64, 96, 128,
64 144, 160, etc are all acceptable lengths). Performance is best for
65 128<=N<=8192.
Julien Pommier370d2092011-11-19 18:04:25 +010066
67 - all (float*) pointers in the functions below are expected to
68 have an "simd-compatible" alignment, that is 16 bytes on x86 and
69 powerpc CPUs.
70
71 You can allocate such buffers with the functions
72 pffft_aligned_malloc / pffft_aligned_free (or with stuff like
73 posix_memalign..)
74
75*/
76
77#ifndef PFFFT_H
78#define PFFFT_H
79
hayati ayguenc974c1d2020-03-29 03:39:30 +020080#include <stddef.h> /* for size_t */
Julien Pommier370d2092011-11-19 18:04:25 +010081
82#ifdef __cplusplus
83extern "C" {
84#endif
85
86 /* opaque struct holding internal stuff (precomputed twiddle factors)
87 this struct can be shared by many threads as it contains only
88 read-only data.
89 */
90 typedef struct PFFFT_Setup PFFFT_Setup;
91
hayati ayguen01d26a72020-03-26 09:02:09 +010092#ifndef PFFFT_COMMON_ENUMS
93#define PFFFT_COMMON_ENUMS
94
Julien Pommier370d2092011-11-19 18:04:25 +010095 /* direction of the transform */
96 typedef enum { PFFFT_FORWARD, PFFFT_BACKWARD } pffft_direction_t;
97
98 /* type of transform */
99 typedef enum { PFFFT_REAL, PFFFT_COMPLEX } pffft_transform_t;
100
hayati ayguen01d26a72020-03-26 09:02:09 +0100101#endif
102
Julien Pommier370d2092011-11-19 18:04:25 +0100103 /*
104 prepare for performing transforms of size N -- the returned
105 PFFFT_Setup structure is read-only so it can safely be shared by
106 multiple concurrent threads.
107 */
108 PFFFT_Setup *pffft_new_setup(int N, pffft_transform_t transform);
109 void pffft_destroy_setup(PFFFT_Setup *);
110 /*
111 Perform a Fourier transform , The z-domain data is stored in the
112 most efficient order for transforming it back, or using it for
113 convolution. If you need to have its content sorted in the
114 "usual" way, that is as an array of interleaved complex numbers,
115 either use pffft_transform_ordered , or call pffft_zreorder after
116 the forward fft, and before the backward fft.
117
118 Transforms are not scaled: PFFFT_BACKWARD(PFFFT_FORWARD(x)) = N*x.
119 Typically you will want to scale the backward transform by 1/N.
120
121 The 'work' pointer should point to an area of N (2*N for complex
122 fft) floats, properly aligned. If 'work' is NULL, then stack will
Julien Pommier432b3e82013-01-12 19:28:03 +0100123 be used instead (this is probably the best strategy for small
hayati ayguen4807c2b2020-01-10 00:00:17 +0100124 FFTs, say for N < 16384). Threads usually have a small stack, that
hayati ayguen01d26a72020-03-26 09:02:09 +0100125 there's no sufficient amount of memory, usually leading to a crash!
hayati ayguen4807c2b2020-01-10 00:00:17 +0100126 Use the heap with pffft_aligned_malloc() in this case.
Julien Pommier370d2092011-11-19 18:04:25 +0100127
128 input and output may alias.
129 */
hayati ayguen01d26a72020-03-26 09:02:09 +0100130 void pffft_transform(PFFFT_Setup *setup, const float *input, float *output, float *work, pffft_direction_t direction);
Julien Pommier370d2092011-11-19 18:04:25 +0100131
132 /*
133 Similar to pffft_transform, but makes sure that the output is
134 ordered as expected (interleaved complex numbers). This is
135 similar to calling pffft_transform and then pffft_zreorder.
136
137 input and output may alias.
138 */
hayati ayguen01d26a72020-03-26 09:02:09 +0100139 void pffft_transform_ordered(PFFFT_Setup *setup, const float *input, float *output, float *work, pffft_direction_t direction);
Julien Pommier370d2092011-11-19 18:04:25 +0100140
141 /*
142 call pffft_zreorder(.., PFFFT_FORWARD) after pffft_transform(...,
143 PFFFT_FORWARD) if you want to have the frequency components in
144 the correct "canonical" order, as interleaved complex numbers.
145
146 (for real transforms, both 0-frequency and half frequency
147 components, which are real, are assembled in the first entry as
148 F(0)+i*F(n/2+1). Note that the original fftpack did place
149 F(n/2+1) at the end of the arrays).
150
151 input and output should not alias.
152 */
hayati ayguen01d26a72020-03-26 09:02:09 +0100153 void pffft_zreorder(PFFFT_Setup *setup, const float *input, float *output, pffft_direction_t direction);
Julien Pommier370d2092011-11-19 18:04:25 +0100154
155 /*
156 Perform a multiplication of the frequency components of dft_a and
157 dft_b and accumulate them into dft_ab. The arrays should have
158 been obtained with pffft_transform(.., PFFFT_FORWARD) and should
159 *not* have been reordered with pffft_zreorder (otherwise just
160 perform the operation yourself as the dft coefs are stored as
161 interleaved complex numbers).
162
163 the operation performed is: dft_ab += (dft_a * fdt_b)*scaling
164
165 The dft_a, dft_b and dft_ab pointers may alias.
166 */
hayati ayguen01d26a72020-03-26 09:02:09 +0100167 void pffft_zconvolve_accumulate(PFFFT_Setup *setup, const float *dft_a, const float *dft_b, float *dft_ab, float scaling);
Julien Pommier370d2092011-11-19 18:04:25 +0100168
hayati ayguene6cffc92020-02-28 19:57:30 +0100169 /*
170 Perform a multiplication of the frequency components of dft_a and
171 dft_b and put result in dft_ab. The arrays should have
172 been obtained with pffft_transform(.., PFFFT_FORWARD) and should
173 *not* have been reordered with pffft_zreorder (otherwise just
174 perform the operation yourself as the dft coefs are stored as
175 interleaved complex numbers).
hayati aygueneeb17fc2020-04-13 04:02:07 +0200176
hayati ayguene6cffc92020-02-28 19:57:30 +0100177 the operation performed is: dft_ab = (dft_a * fdt_b)*scaling
hayati aygueneeb17fc2020-04-13 04:02:07 +0200178
hayati ayguene6cffc92020-02-28 19:57:30 +0100179 The dft_a, dft_b and dft_ab pointers may alias.
180 */
181 void pffft_zconvolve_no_accu(PFFFT_Setup *setup, const float *dft_a, const float *dft_b, float *dft_ab, float scaling);
182
hayati aygueneeb17fc2020-04-13 04:02:07 +0200183 /* return 4 or 1 wether support SSE/NEON/Altivec instructions was enabled when building pffft.c */
184 int pffft_simd_size();
185
186 /* return string identifier of used architecture (SSE/NEON/Altivec/..) */
187 const char * pffft_simd_arch();
188
189
190 /* following functions are identical to the pffftd_ functions */
191
hayati ayguene6cffc92020-02-28 19:57:30 +0100192 /* simple helper to get minimum possible fft size */
193 int pffft_min_fft_size(pffft_transform_t transform);
194
195 /* simple helper to determine next power of 2
196 - without inexact/rounding floating point operations
197 */
198 int pffft_next_power_of_two(int N);
199
200 /* simple helper to determine if power of 2 - returns bool */
201 int pffft_is_power_of_two(int N);
202
Julien Pommier370d2092011-11-19 18:04:25 +0100203 /*
204 the float buffers must have the correct alignment (16-byte boundary
205 on intel and powerpc). This function may be used to obtain such
206 correctly aligned buffers.
207 */
208 void *pffft_aligned_malloc(size_t nb_bytes);
209 void pffft_aligned_free(void *);
210
Julien Pommier370d2092011-11-19 18:04:25 +0100211#ifdef __cplusplus
212}
213#endif
214
hayati ayguen01d26a72020-03-26 09:02:09 +0100215#endif /* PFFFT_H */
216