blob: e3a9e9e83fa59f21b6fbecd3ffd7661790f3fffc [file] [log] [blame]
Julien Pommier370d2092011-11-19 18:04:25 +01001/*
Julien Pommier432b3e82013-01-12 19:28:03 +01002 Copyright (c) 2013 Julien Pommier.
hayati ayguenca112412020-04-13 00:19:40 +02003 Copyright (c) 2019 Hayati Ayguen ( h_ayguen@web.de )
Julien Pommier370d2092011-11-19 18:04:25 +01004
5 Small test & bench for PFFFT, comparing its performance with the scalar FFTPACK, FFTW, and Apple vDSP
6
7 How to build:
8
Julien Pommier836bc4b2011-11-20 10:58:07 +01009 on linux, with fftw3:
10 gcc -o test_pffft -DHAVE_FFTW -msse -mfpmath=sse -O3 -Wall -W pffft.c test_pffft.c fftpack.c -L/usr/local/lib -I/usr/local/include/ -lfftw3f -lm
Julien Pommier370d2092011-11-19 18:04:25 +010011
Julien Pommier836bc4b2011-11-20 10:58:07 +010012 on macos, without fftw3:
hayati ayguen3673ac02019-12-22 07:09:56 +010013 clang -o test_pffft -DHAVE_VECLIB -O3 -Wall -W pffft.c test_pffft.c fftpack.c -L/usr/local/lib -I/usr/local/include/ -framework Accelerate
Julien Pommier370d2092011-11-19 18:04:25 +010014
Julien Pommier836bc4b2011-11-20 10:58:07 +010015 on macos, with fftw3:
hayati ayguen3673ac02019-12-22 07:09:56 +010016 clang -o test_pffft -DHAVE_FFTW -DHAVE_VECLIB -O3 -Wall -W pffft.c test_pffft.c fftpack.c -L/usr/local/lib -I/usr/local/include/ -lfftw3f -framework Accelerate
17
18 as alternative: replace clang by gcc.
Julien Pommier836bc4b2011-11-20 10:58:07 +010019
20 on windows, with visual c++:
21 cl /Ox -D_USE_MATH_DEFINES /arch:SSE test_pffft.c pffft.c fftpack.c
Julien Pommier370d2092011-11-19 18:04:25 +010022
23 build without SIMD instructions:
Julien Pommier370d2092011-11-19 18:04:25 +010024 gcc -o test_pffft -DPFFFT_SIMD_DISABLE -O3 -Wall -W pffft.c test_pffft.c fftpack.c -lm
25
26 */
27
hayati ayguenca112412020-04-13 00:19:40 +020028#define CONCAT_TOKENS(A, B) A ## B
hayati ayguen223c62a2020-06-13 01:59:49 +020029#define CONCAT_THREE_TOKENS(A, B, C) A ## B ## C
hayati ayguenca112412020-04-13 00:19:40 +020030
31#ifdef PFFFT_ENABLE_FLOAT
Julien Pommier370d2092011-11-19 18:04:25 +010032#include "pffft.h"
hayati ayguenca112412020-04-13 00:19:40 +020033
34typedef float pffft_scalar;
35typedef PFFFT_Setup PFFFT_SETUP;
36#define PFFFT_FUNC(F) CONCAT_TOKENS(pffft_, F)
37
38#else
39/*
40Note: adapted for double precision dynamic range version.
41*/
42#include "pffft_double.h"
43
44typedef double pffft_scalar;
45typedef PFFFTD_Setup PFFFT_SETUP;
46#define PFFFT_FUNC(F) CONCAT_TOKENS(pffftd_, F)
47#endif
48
hayati ayguenca112412020-04-13 00:19:40 +020049#ifdef PFFFT_ENABLE_FLOAT
50
Julien Pommier370d2092011-11-19 18:04:25 +010051#include "fftpack.h"
52
hayati ayguen4c3a87a2019-12-23 08:48:00 +010053#ifdef HAVE_GREEN_FFTS
54#include "fftext.h"
55#endif
56
57#ifdef HAVE_KISS_FFT
58#include <kiss_fft.h>
59#include <kiss_fftr.h>
60#endif
61
hayati ayguenca112412020-04-13 00:19:40 +020062#endif
hayati ayguen4c3a87a2019-12-23 08:48:00 +010063
hayati ayguen1c17fd42020-06-12 18:21:26 +000064#ifdef HAVE_POCKET_FFT
hayati ayguen223c62a2020-06-13 01:59:49 +020065#include <pocketfft_double.h>
66#include <pocketfft_single.h>
hayati ayguen1c17fd42020-06-12 18:21:26 +000067#endif
68
hayati ayguen223c62a2020-06-13 01:59:49 +020069#ifdef PFFFT_ENABLE_FLOAT
70 #define POCKFFTR_PRE(R) CONCAT_TOKENS(rffts, R)
71 #define POCKFFTC_PRE(R) CONCAT_TOKENS(cffts, R)
72 #define POCKFFTR_MID(L,R) CONCAT_THREE_TOKENS(L, rffts, R)
73 #define POCKFFTC_MID(L,R) CONCAT_THREE_TOKENS(L, cffts, R)
74#else
75 #define POCKFFTR_PRE(R) CONCAT_TOKENS(rfft, R)
76 #define POCKFFTC_PRE(R) CONCAT_TOKENS(cfft, R)
77 #define POCKFFTR_MID(L,R) CONCAT_THREE_TOKENS(L, rfft, R)
78 #define POCKFFTC_MID(L,R) CONCAT_THREE_TOKENS(L, cfft, R)
79#endif
80
81
hayati ayguen1c17fd42020-06-12 18:21:26 +000082
Julien Pommier370d2092011-11-19 18:04:25 +010083#include <math.h>
84#include <stdio.h>
85#include <stdlib.h>
86#include <time.h>
87#include <assert.h>
88#include <string.h>
89
90#ifdef HAVE_SYS_TIMES
91# include <sys/times.h>
92# include <unistd.h>
93#endif
94
95#ifdef HAVE_VECLIB
hayati ayguen3673ac02019-12-22 07:09:56 +010096# include <Accelerate/Accelerate.h>
Julien Pommier370d2092011-11-19 18:04:25 +010097#endif
98
99#ifdef HAVE_FFTW
100# include <fftw3.h>
hayati ayguenca112412020-04-13 00:19:40 +0200101
102#ifdef PFFFT_ENABLE_FLOAT
103typedef fftwf_plan FFTW_PLAN;
104typedef fftwf_complex FFTW_COMPLEX;
105#define FFTW_FUNC(F) CONCAT_TOKENS(fftwf_, F)
106#else
107typedef fftw_plan FFTW_PLAN;
108typedef fftw_complex FFTW_COMPLEX;
109#define FFTW_FUNC(F) CONCAT_TOKENS(fftw_, F)
Julien Pommier370d2092011-11-19 18:04:25 +0100110#endif
111
hayati ayguenca112412020-04-13 00:19:40 +0200112#endif /* HAVE_FFTW */
113
hayati ayguenc974c1d2020-03-29 03:39:30 +0200114#ifndef M_LN2
115 #define M_LN2 0.69314718055994530942 /* log_e 2 */
116#endif
117
hayati ayguencd60b962019-12-23 11:42:31 +0100118
hayati ayguen1c17fd42020-06-12 18:21:26 +0000119#define NUM_FFT_ALGOS 9
hayati ayguencd60b962019-12-23 11:42:31 +0100120enum {
121 ALGO_FFTPACK = 0,
122 ALGO_VECLIB,
123 ALGO_FFTW_ESTIM,
124 ALGO_FFTW_AUTO,
125 ALGO_GREEN,
126 ALGO_KISS,
hayati ayguen1c17fd42020-06-12 18:21:26 +0000127 ALGO_POCKET,
128 ALGO_PFFFT_U, /* = 7 */
129 ALGO_PFFFT_O /* = 8 */
hayati ayguencd60b962019-12-23 11:42:31 +0100130};
131
hayati ayguenb9804a22019-12-27 00:35:47 +0100132#define NUM_TYPES 7
hayati ayguencd60b962019-12-23 11:42:31 +0100133enum {
134 TYPE_PREP = 0, /* time for preparation in ms */
135 TYPE_DUR_NS = 1, /* time per fft in ns */
136 TYPE_DUR_FASTEST = 2, /* relative time to fastest */
hayati ayguen42ee6f12019-12-25 23:08:04 +0100137 TYPE_REL_PFFFT = 3, /* relative time to ALGO_PFFFT */
138 TYPE_ITER = 4, /* # of iterations in measurement */
hayati ayguenb9804a22019-12-27 00:35:47 +0100139 TYPE_MFLOPS = 5, /* MFlops/sec */
140 TYPE_DUR_TOT = 6 /* test duration in sec */
hayati ayguencd60b962019-12-23 11:42:31 +0100141};
hayati ayguenc974c1d2020-03-29 03:39:30 +0200142/* double tmeas[NUM_TYPES][NUM_FFT_ALGOS]; */
hayati ayguencd60b962019-12-23 11:42:31 +0100143
144const char * algoName[NUM_FFT_ALGOS] = {
hayati ayguenb9804a22019-12-27 00:35:47 +0100145 "FFTPack ",
146 "vDSP (vec) ",
hayati ayguenca112412020-04-13 00:19:40 +0200147 "FFTW F(estim)",
148 "FFTW F(auto) ",
hayati ayguenb9804a22019-12-27 00:35:47 +0100149 "Green ",
150 "Kiss ",
hayati ayguen1c17fd42020-06-12 18:21:26 +0000151 "Pocket ",
hayati ayguenb9804a22019-12-27 00:35:47 +0100152 "PFFFT-U(simd)", /* unordered */
153 "PFFFT (simd) " /* ordered */
hayati ayguencd60b962019-12-23 11:42:31 +0100154};
155
156
157int compiledInAlgo[NUM_FFT_ALGOS] = {
hayati ayguenca112412020-04-13 00:19:40 +0200158#ifdef PFFFT_ENABLE_FLOAT
hayati ayguencd60b962019-12-23 11:42:31 +0100159 1, /* "FFTPack " */
hayati ayguenca112412020-04-13 00:19:40 +0200160#else
161 0, /* "FFTPack " */
162#endif
163#if defined(HAVE_VECLIB) && defined(PFFFT_ENABLE_FLOAT)
hayati ayguencd60b962019-12-23 11:42:31 +0100164 1, /* "vDSP (vec) " */
165#else
166 0,
167#endif
hayati ayguenca112412020-04-13 00:19:40 +0200168#if defined(HAVE_FFTW)
hayati ayguencd60b962019-12-23 11:42:31 +0100169 1, /* "FFTW(estim)" */
170 1, /* "FFTW (auto)" */
171#else
172 0, 0,
173#endif
hayati ayguenca112412020-04-13 00:19:40 +0200174#if defined(HAVE_GREEN_FFTS) && defined(PFFFT_ENABLE_FLOAT)
hayati ayguencd60b962019-12-23 11:42:31 +0100175 1, /* "Green " */
176#else
177 0,
178#endif
hayati ayguenca112412020-04-13 00:19:40 +0200179#if defined(HAVE_KISS_FFT) && defined(PFFFT_ENABLE_FLOAT)
hayati ayguencd60b962019-12-23 11:42:31 +0100180 1, /* "Kiss " */
181#else
182 0,
183#endif
hayati ayguen223c62a2020-06-13 01:59:49 +0200184#if defined(HAVE_POCKET_FFT)
hayati ayguen1c17fd42020-06-12 18:21:26 +0000185 1, /* "Pocket " */
186#else
187 0,
188#endif
hayati ayguenb9804a22019-12-27 00:35:47 +0100189 1, /* "PFFFT_U " */
190 1 /* "PFFFT_O " */
hayati ayguencd60b962019-12-23 11:42:31 +0100191};
192
193const char * algoTableHeader[NUM_FFT_ALGOS][2] = {
194{ "| real FFTPack ", "| cplx FFTPack " },
195{ "| real vDSP ", "| cplx vDSP " },
196{ "|real FFTWestim", "|cplx FFTWestim" },
197{ "|real FFTWauto ", "|cplx FFTWauto " },
198{ "| real Green ", "| cplx Green " },
199{ "| real Kiss ", "| cplx Kiss " },
hayati ayguen1c17fd42020-06-12 18:21:26 +0000200{ "| real Pocket ", "| cplx Pocket " },
hayati ayguenb9804a22019-12-27 00:35:47 +0100201{ "| real PFFFT-U ", "| cplx PFFFT-U " },
hayati ayguencd60b962019-12-23 11:42:31 +0100202{ "| real PFFFT ", "| cplx PFFFT " } };
203
204const char * typeText[NUM_TYPES] = {
205 "preparation in ms",
206 "time per fft in ns",
207 "relative to fastest",
hayati ayguen42ee6f12019-12-25 23:08:04 +0100208 "relative to pffft",
209 "measured_num_iters",
hayati ayguenb9804a22019-12-27 00:35:47 +0100210 "mflops",
211 "test duration in sec"
hayati ayguencd60b962019-12-23 11:42:31 +0100212};
213
hayati ayguenbc8d4a82019-12-25 01:27:33 +0100214const char * typeFilenamePart[NUM_TYPES] = {
215 "1-preparation-in-ms",
216 "2-timePerFft-in-ns",
217 "3-rel-fastest",
hayati ayguen42ee6f12019-12-25 23:08:04 +0100218 "4-rel-pffft",
219 "5-num-iter",
hayati ayguenb9804a22019-12-27 00:35:47 +0100220 "6-mflops",
221 "7-duration-in-sec"
222};
223
224#define SAVE_ALL_TYPES 0
225
226const int saveType[NUM_TYPES] = {
227 1, /* "1-preparation-in-ms" */
228 0, /* "2-timePerFft-in-ns" */
229 0, /* "3-rel-fastest" */
230 1, /* "4-rel-pffft" */
231 1, /* "5-num-iter" */
232 1, /* "6-mflops" */
233 1, /* "7-duration-in-sec" */
hayati ayguenbc8d4a82019-12-25 01:27:33 +0100234};
235
hayati ayguencd60b962019-12-23 11:42:31 +0100236
Julien Pommier370d2092011-11-19 18:04:25 +0100237#define MAX(x,y) ((x)>(y)?(x):(y))
hayati ayguen42ee6f12019-12-25 23:08:04 +0100238#define MIN(x,y) ((x)<(y)?(x):(y))
Julien Pommier370d2092011-11-19 18:04:25 +0100239
hayati ayguenbc8d4a82019-12-25 01:27:33 +0100240unsigned Log2(unsigned v) {
241 /* we don't need speed records .. obvious way is good enough */
242 /* https://graphics.stanford.edu/~seander/bithacks.html#IntegerLogObvious */
243 /* Find the log base 2 of an integer with the MSB N set in O(N) operations (the obvious way):
244 * unsigned v: 32-bit word to find the log base 2 of */
245 unsigned r = 0; /* r will be lg(v) */
246 while (v >>= 1)
247 {
248 r++;
249 }
250 return r;
251}
252
253
Julien Pommier370d2092011-11-19 18:04:25 +0100254double frand() {
255 return rand()/(double)RAND_MAX;
256}
257
258#if defined(HAVE_SYS_TIMES)
259 inline double uclock_sec(void) {
260 static double ttclk = 0.;
hayati ayguenb9804a22019-12-27 00:35:47 +0100261 struct tms t;
262 if (ttclk == 0.)
263 ttclk = sysconf(_SC_CLK_TCK);
264 times(&t);
265 /* use only the user time of this process - not realtime, which depends on OS-scheduler .. */
266 return ((double)t.tms_utime)) / ttclk;
Julien Pommier370d2092011-11-19 18:04:25 +0100267 }
268# else
Julien Pommier836bc4b2011-11-20 10:58:07 +0100269 double uclock_sec(void)
Julien Pommier370d2092011-11-19 18:04:25 +0100270{ return (double)clock()/(double)CLOCKS_PER_SEC; }
271#endif
272
273
274/* compare results with the regular fftpack */
275void pffft_validate_N(int N, int cplx) {
hayati ayguenca112412020-04-13 00:19:40 +0200276
277#ifdef PFFFT_ENABLE_FLOAT
278
Julien Pommier370d2092011-11-19 18:04:25 +0100279 int Nfloat = N*(cplx?2:1);
hayati ayguenca112412020-04-13 00:19:40 +0200280 int Nbytes = Nfloat * sizeof(pffft_scalar);
Julien Pommier0302e8a2012-10-11 18:04:09 +0200281 float *ref, *in, *out, *tmp, *tmp2;
hayati ayguenca112412020-04-13 00:19:40 +0200282 PFFFT_SETUP *s = PFFFT_FUNC(new_setup)(N, cplx ? PFFFT_COMPLEX : PFFFT_REAL);
Julien Pommier370d2092011-11-19 18:04:25 +0100283 int pass;
Julien Pommier0302e8a2012-10-11 18:04:09 +0200284
hayati ayguenca112412020-04-13 00:19:40 +0200285
Julien Pommier0302e8a2012-10-11 18:04:09 +0200286 if (!s) { printf("Skipping N=%d, not supported\n", N); return; }
hayati ayguenca112412020-04-13 00:19:40 +0200287 ref = PFFFT_FUNC(aligned_malloc)(Nbytes);
288 in = PFFFT_FUNC(aligned_malloc)(Nbytes);
289 out = PFFFT_FUNC(aligned_malloc)(Nbytes);
290 tmp = PFFFT_FUNC(aligned_malloc)(Nbytes);
291 tmp2 = PFFFT_FUNC(aligned_malloc)(Nbytes);
Julien Pommier0302e8a2012-10-11 18:04:09 +0200292
Julien Pommier370d2092011-11-19 18:04:25 +0100293 for (pass=0; pass < 2; ++pass) {
Julien Pommier836bc4b2011-11-20 10:58:07 +0100294 float ref_max = 0;
Julien Pommier370d2092011-11-19 18:04:25 +0100295 int k;
hayati ayguenc974c1d2020-03-29 03:39:30 +0200296 /* printf("N=%d pass=%d cplx=%d\n", N, pass, cplx); */
297 /* compute reference solution with FFTPACK */
Julien Pommier370d2092011-11-19 18:04:25 +0100298 if (pass == 0) {
hayati ayguenca112412020-04-13 00:19:40 +0200299 float *wrk = malloc(2*Nbytes+15*sizeof(pffft_scalar));
Julien Pommier370d2092011-11-19 18:04:25 +0100300 for (k=0; k < Nfloat; ++k) {
hayati ayguenee17cb02020-08-28 04:47:22 +0200301 ref[k] = in[k] = (float)( frand()*2-1 );
302 out[k] = 1e30F;
Julien Pommier370d2092011-11-19 18:04:25 +0100303 }
304 if (!cplx) {
305 rffti(N, wrk);
306 rfftf(N, ref, wrk);
hayati ayguenc974c1d2020-03-29 03:39:30 +0200307 /* use our ordering for real ffts instead of the one of fftpack */
Julien Pommier370d2092011-11-19 18:04:25 +0100308 {
309 float refN=ref[N-1];
310 for (k=N-2; k >= 1; --k) ref[k+1] = ref[k];
311 ref[1] = refN;
312 }
313 } else {
314 cffti(N, wrk);
315 cfftf(N, ref, wrk);
316 }
317 free(wrk);
318 }
319
hayati ayguenee17cb02020-08-28 04:47:22 +0200320 for (k = 0; k < Nfloat; ++k) ref_max = MAX(ref_max, (float)( fabs(ref[k]) ));
Julien Pommier370d2092011-11-19 18:04:25 +0100321
322
hayati ayguenc974c1d2020-03-29 03:39:30 +0200323 /* pass 0 : non canonical ordering of transform coefficients */
Julien Pommier370d2092011-11-19 18:04:25 +0100324 if (pass == 0) {
hayati ayguenc974c1d2020-03-29 03:39:30 +0200325 /* test forward transform, with different input / output */
hayati ayguenca112412020-04-13 00:19:40 +0200326 PFFFT_FUNC(transform)(s, in, tmp, 0, PFFFT_FORWARD);
Julien Pommier370d2092011-11-19 18:04:25 +0100327 memcpy(tmp2, tmp, Nbytes);
328 memcpy(tmp, in, Nbytes);
hayati ayguenca112412020-04-13 00:19:40 +0200329 PFFFT_FUNC(transform)(s, tmp, tmp, 0, PFFFT_FORWARD);
Julien Pommier370d2092011-11-19 18:04:25 +0100330 for (k = 0; k < Nfloat; ++k) {
331 assert(tmp2[k] == tmp[k]);
332 }
333
hayati ayguenc974c1d2020-03-29 03:39:30 +0200334 /* test reordering */
hayati ayguenca112412020-04-13 00:19:40 +0200335 PFFFT_FUNC(zreorder)(s, tmp, out, PFFFT_FORWARD);
336 PFFFT_FUNC(zreorder)(s, out, tmp, PFFFT_BACKWARD);
Julien Pommier370d2092011-11-19 18:04:25 +0100337 for (k = 0; k < Nfloat; ++k) {
338 assert(tmp2[k] == tmp[k]);
339 }
hayati ayguenca112412020-04-13 00:19:40 +0200340 PFFFT_FUNC(zreorder)(s, tmp, out, PFFFT_FORWARD);
Julien Pommier370d2092011-11-19 18:04:25 +0100341 } else {
hayati ayguenc974c1d2020-03-29 03:39:30 +0200342 /* pass 1 : canonical ordering of transform coeffs. */
hayati ayguenca112412020-04-13 00:19:40 +0200343 PFFFT_FUNC(transform_ordered)(s, in, tmp, 0, PFFFT_FORWARD);
Julien Pommier370d2092011-11-19 18:04:25 +0100344 memcpy(tmp2, tmp, Nbytes);
345 memcpy(tmp, in, Nbytes);
hayati ayguenca112412020-04-13 00:19:40 +0200346 PFFFT_FUNC(transform_ordered)(s, tmp, tmp, 0, PFFFT_FORWARD);
Julien Pommier370d2092011-11-19 18:04:25 +0100347 for (k = 0; k < Nfloat; ++k) {
348 assert(tmp2[k] == tmp[k]);
349 }
350 memcpy(out, tmp, Nbytes);
351 }
352
353 {
354 for (k=0; k < Nfloat; ++k) {
355 if (!(fabs(ref[k] - out[k]) < 1e-3*ref_max)) {
356 printf("%s forward PFFFT mismatch found for N=%d\n", (cplx?"CPLX":"REAL"), N);
357 exit(1);
358 }
359 }
hayati ayguenca112412020-04-13 00:19:40 +0200360
361 if (pass == 0) PFFFT_FUNC(transform)(s, tmp, out, 0, PFFFT_BACKWARD);
362 else PFFFT_FUNC(transform_ordered)(s, tmp, out, 0, PFFFT_BACKWARD);
Julien Pommier370d2092011-11-19 18:04:25 +0100363 memcpy(tmp2, out, Nbytes);
364 memcpy(out, tmp, Nbytes);
hayati ayguenca112412020-04-13 00:19:40 +0200365 if (pass == 0) PFFFT_FUNC(transform)(s, out, out, 0, PFFFT_BACKWARD);
366 else PFFFT_FUNC(transform_ordered)(s, out, out, 0, PFFFT_BACKWARD);
Julien Pommier370d2092011-11-19 18:04:25 +0100367 for (k = 0; k < Nfloat; ++k) {
368 assert(tmp2[k] == out[k]);
369 out[k] *= 1.f/N;
370 }
371 for (k = 0; k < Nfloat; ++k) {
372 if (fabs(in[k] - out[k]) > 1e-3 * ref_max) {
373 printf("pass=%d, %s IFFFT does not match for N=%d\n", pass, (cplx?"CPLX":"REAL"), N); break;
374 exit(1);
375 }
376 }
377 }
378
hayati ayguenc974c1d2020-03-29 03:39:30 +0200379 /* quick test of the circular convolution in fft domain */
Julien Pommier432b3e82013-01-12 19:28:03 +0100380 {
381 float conv_err = 0, conv_max = 0;
Julien Pommier370d2092011-11-19 18:04:25 +0100382
hayati ayguenca112412020-04-13 00:19:40 +0200383 PFFFT_FUNC(zreorder)(s, ref, tmp, PFFFT_FORWARD);
Julien Pommier432b3e82013-01-12 19:28:03 +0100384 memset(out, 0, Nbytes);
hayati ayguenca112412020-04-13 00:19:40 +0200385 PFFFT_FUNC(zconvolve_accumulate)(s, ref, ref, out, 1.0);
386 PFFFT_FUNC(zreorder)(s, out, tmp2, PFFFT_FORWARD);
Julien Pommier432b3e82013-01-12 19:28:03 +0100387
388 for (k=0; k < Nfloat; k += 2) {
389 float ar = tmp[k], ai=tmp[k+1];
390 if (cplx || k > 0) {
391 tmp[k] = ar*ar - ai*ai;
392 tmp[k+1] = 2*ar*ai;
393 } else {
394 tmp[0] = ar*ar;
395 tmp[1] = ai*ai;
396 }
Julien Pommier370d2092011-11-19 18:04:25 +0100397 }
Julien Pommier432b3e82013-01-12 19:28:03 +0100398
399 for (k=0; k < Nfloat; ++k) {
400 float d = fabs(tmp[k] - tmp2[k]), e = fabs(tmp[k]);
401 if (d > conv_err) conv_err = d;
402 if (e > conv_max) conv_max = e;
403 }
404 if (conv_err > 1e-5*conv_max) {
405 printf("zconvolve error ? %g %g\n", conv_err, conv_max); exit(1);
406 }
Julien Pommier370d2092011-11-19 18:04:25 +0100407 }
408
409 }
410
Julien Pommier836bc4b2011-11-20 10:58:07 +0100411 printf("%s PFFFT is OK for N=%d\n", (cplx?"CPLX":"REAL"), N); fflush(stdout);
hayati ayguenca112412020-04-13 00:19:40 +0200412
413 PFFFT_FUNC(destroy_setup)(s);
414 PFFFT_FUNC(aligned_free)(ref);
415 PFFFT_FUNC(aligned_free)(in);
416 PFFFT_FUNC(aligned_free)(out);
417 PFFFT_FUNC(aligned_free)(tmp);
418 PFFFT_FUNC(aligned_free)(tmp2);
419
420#endif /* PFFFT_ENABLE_FLOAT */
Julien Pommier370d2092011-11-19 18:04:25 +0100421}
422
423void pffft_validate(int cplx) {
Julien Pommier432b3e82013-01-12 19:28:03 +0100424 static int Ntest[] = { 16, 32, 64, 96, 128, 160, 192, 256, 288, 384, 5*96, 512, 576, 5*128, 800, 864, 1024, 2048, 2592, 4000, 4096, 12000, 36864, 0};
Julien Pommier370d2092011-11-19 18:04:25 +0100425 int k;
426 for (k = 0; Ntest[k]; ++k) {
427 int N = Ntest[k];
428 if (N == 16 && !cplx) continue;
Julien Pommier370d2092011-11-19 18:04:25 +0100429 pffft_validate_N(N, cplx);
430 }
431}
432
hayati ayguencd60b962019-12-23 11:42:31 +0100433int array_output_format = 1;
Julien Pommier836bc4b2011-11-20 10:58:07 +0100434
hayati ayguenbc8d4a82019-12-25 01:27:33 +0100435
436void print_table(const char *txt, FILE *tableFile) {
437 fprintf(stdout, "%s", txt);
438 if (tableFile && tableFile != stdout)
439 fprintf(tableFile, "%s", txt);
440}
441
442void print_table_flops(float mflops, FILE *tableFile) {
443 fprintf(stdout, "|%11.0f ", mflops);
444 if (tableFile && tableFile != stdout)
445 fprintf(tableFile, "|%11.0f ", mflops);
446}
447
448void print_table_fftsize(int N, FILE *tableFile) {
449 fprintf(stdout, "|%9d ", N);
450 if (tableFile && tableFile != stdout)
451 fprintf(tableFile, "|%9d ", N);
452}
453
454double show_output(const char *name, int N, int cplx, float flops, float t0, float t1, int max_iter, FILE *tableFile) {
hayati ayguen4c3a87a2019-12-23 08:48:00 +0100455 double T = (double)(t1-t0)/2/max_iter * 1e9;
Julien Pommier836bc4b2011-11-20 10:58:07 +0100456 float mflops = flops/1e6/(t1 - t0 + 1e-16);
457 if (array_output_format) {
hayati ayguenbc8d4a82019-12-25 01:27:33 +0100458 if (flops != -1)
459 print_table_flops(mflops, tableFile);
460 else
hayati ayguenca112412020-04-13 00:19:40 +0200461 print_table("| n/a ", tableFile);
Julien Pommier836bc4b2011-11-20 10:58:07 +0100462 } else {
463 if (flops != -1) {
464 printf("N=%5d, %s %16s : %6.0f MFlops [t=%6.0f ns, %d runs]\n", N, (cplx?"CPLX":"REAL"), name, mflops, (t1-t0)/2/max_iter * 1e9, max_iter);
465 }
466 }
467 fflush(stdout);
hayati ayguen4c3a87a2019-12-23 08:48:00 +0100468 return T;
Julien Pommier836bc4b2011-11-20 10:58:07 +0100469}
Julien Pommier370d2092011-11-19 18:04:25 +0100470
hayati ayguenb9804a22019-12-27 00:35:47 +0100471double cal_benchmark(int N, int cplx) {
472 const int log2N = Log2(N);
473 int Nfloat = (cplx ? N*2 : N);
hayati ayguenca112412020-04-13 00:19:40 +0200474 int Nbytes = Nfloat * sizeof(pffft_scalar);
475 pffft_scalar *X = PFFFT_FUNC(aligned_malloc)(Nbytes), *Y = PFFFT_FUNC(aligned_malloc)(Nbytes), *Z = PFFFT_FUNC(aligned_malloc)(Nbytes);
hayati ayguenb9804a22019-12-27 00:35:47 +0100476 double t0, t1, tstop, T, nI;
477 int k, iter;
478
hayati ayguenca112412020-04-13 00:19:40 +0200479 assert( PFFFT_FUNC(is_power_of_two)(N) );
hayati ayguenb9804a22019-12-27 00:35:47 +0100480 for (k = 0; k < Nfloat; ++k) {
481 X[k] = sqrtf(k+1);
482 }
483
484 /* PFFFT-U (unordered) benchmark */
hayati ayguenca112412020-04-13 00:19:40 +0200485 PFFFT_SETUP *s = PFFFT_FUNC(new_setup)(N, cplx ? PFFFT_COMPLEX : PFFFT_REAL);
hayati ayguenb9804a22019-12-27 00:35:47 +0100486 assert(s);
487 iter = 0;
488 t0 = uclock_sec();
489 tstop = t0 + 0.25; /* benchmark duration: 250 ms */
490 do {
491 for ( k = 0; k < 512; ++k ) {
hayati ayguenca112412020-04-13 00:19:40 +0200492 PFFFT_FUNC(transform)(s, X, Z, Y, PFFFT_FORWARD);
493 PFFFT_FUNC(transform)(s, X, Z, Y, PFFFT_BACKWARD);
hayati ayguenb9804a22019-12-27 00:35:47 +0100494 ++iter;
495 }
496 t1 = uclock_sec();
497 } while ( t1 < tstop );
hayati ayguenca112412020-04-13 00:19:40 +0200498 PFFFT_FUNC(destroy_setup)(s);
499 PFFFT_FUNC(aligned_free)(X);
500 PFFFT_FUNC(aligned_free)(Y);
501 PFFFT_FUNC(aligned_free)(Z);
hayati ayguenb9804a22019-12-27 00:35:47 +0100502
503 T = ( t1 - t0 ); /* duration per fft() */
504 nI = ((double)iter) * ( log2N * N ); /* number of iterations "normalized" to O(N) = N*log2(N) */
505 return (nI / T); /* normalized iterations per second */
506}
507
508
509
510void benchmark_ffts(int N, int cplx, int withFFTWfullMeas, double iterCal, double tmeas[NUM_TYPES][NUM_FFT_ALGOS], int haveAlgo[NUM_FFT_ALGOS], FILE *tableFile ) {
hayati ayguenbc8d4a82019-12-25 01:27:33 +0100511 const int log2N = Log2(N);
hayati ayguenca112412020-04-13 00:19:40 +0200512 int nextPow2N = PFFFT_FUNC(next_power_of_two)(N);
hayati ayguenbc8d4a82019-12-25 01:27:33 +0100513 int log2NextN = Log2(nextPow2N);
hayati ayguenca112412020-04-13 00:19:40 +0200514 int pffftPow2N = nextPow2N;
hayati ayguenbc8d4a82019-12-25 01:27:33 +0100515
516 int Nfloat = (cplx ? MAX(nextPow2N, pffftPow2N)*2 : MAX(nextPow2N, pffftPow2N));
hayati ayguenb9804a22019-12-27 00:35:47 +0100517 int Nmax, k, iter;
hayati ayguenca112412020-04-13 00:19:40 +0200518 int Nbytes = Nfloat * sizeof(pffft_scalar);
hayati ayguenbc8d4a82019-12-25 01:27:33 +0100519
hayati ayguenca112412020-04-13 00:19:40 +0200520 pffft_scalar *X = PFFFT_FUNC(aligned_malloc)(Nbytes + sizeof(pffft_scalar)), *Y = PFFFT_FUNC(aligned_malloc)(Nbytes + 2*sizeof(pffft_scalar) ), *Z = PFFFT_FUNC(aligned_malloc)(Nbytes);
hayati ayguenb9804a22019-12-27 00:35:47 +0100521 double te, t0, t1, tstop, flops, Tfastest;
Julien Pommier370d2092011-11-19 18:04:25 +0100522
hayati ayguenb9804a22019-12-27 00:35:47 +0100523 const double max_test_duration = 0.150; /* test duration 150 ms */
524 double numIter = max_test_duration * iterCal / ( log2N * N ); /* number of iteration for max_test_duration */
525 const int step_iter = MAX(1, ((int)(0.01 * numIter)) ); /* one hundredth */
526 int max_iter = MAX(1, ((int)numIter) ); /* minimum 1 iteration */
527
528 const float checkVal = 12345.0F;
hayati ayguenbc8d4a82019-12-25 01:27:33 +0100529
530 /* printf("benchmark_ffts(N = %d, cplx = %d): Nfloat = %d, X_mem = 0x%p, X = %p\n", N, cplx, Nfloat, X_mem, X); */
Julien Pommier370d2092011-11-19 18:04:25 +0100531
hayati ayguenca112412020-04-13 00:19:40 +0200532 memset( X, 0, Nfloat * sizeof(pffft_scalar) );
hayati ayguenb9804a22019-12-27 00:35:47 +0100533 if ( Nfloat < 32 ) {
534 for (k = 0; k < Nfloat; k += 4)
535 X[k] = sqrtf(k+1);
536 } else {
537 for (k = 0; k < Nfloat; k += (Nfloat/16) )
538 X[k] = sqrtf(k+1);
Julien Pommier370d2092011-11-19 18:04:25 +0100539 }
540
hayati ayguen4c3a87a2019-12-23 08:48:00 +0100541 for ( k = 0; k < NUM_TYPES; ++k )
542 {
543 for ( iter = 0; iter < NUM_FFT_ALGOS; ++iter )
544 tmeas[k][iter] = 0.0;
545 }
546
547
hayati ayguenc974c1d2020-03-29 03:39:30 +0200548 /* FFTPack benchmark */
hayati ayguenbc8d4a82019-12-25 01:27:33 +0100549 Nmax = (cplx ? N*2 : N);
550 X[Nmax] = checkVal;
hayati ayguenca112412020-04-13 00:19:40 +0200551#ifdef PFFFT_ENABLE_FLOAT
Julien Pommier370d2092011-11-19 18:04:25 +0100552 {
hayati ayguenca112412020-04-13 00:19:40 +0200553 float *wrk = malloc(2*Nbytes + 15*sizeof(pffft_scalar));
hayati ayguen4c3a87a2019-12-23 08:48:00 +0100554 te = uclock_sec();
Julien Pommier370d2092011-11-19 18:04:25 +0100555 if (cplx) cffti(N, wrk);
556 else rffti(N, wrk);
hayati ayguenb9804a22019-12-27 00:35:47 +0100557 t0 = uclock_sec();
558 tstop = t0 + max_test_duration;
559 max_iter = 0;
560 do {
561 for ( k = 0; k < step_iter; ++k ) {
562 if (cplx) {
563 assert( X[Nmax] == checkVal );
564 cfftf(N, X, wrk);
565 assert( X[Nmax] == checkVal );
566 cfftb(N, X, wrk);
567 assert( X[Nmax] == checkVal );
568 } else {
569 assert( X[Nmax] == checkVal );
570 rfftf(N, X, wrk);
571 assert( X[Nmax] == checkVal );
572 rfftb(N, X, wrk);
573 assert( X[Nmax] == checkVal );
574 }
575 ++max_iter;
Julien Pommier370d2092011-11-19 18:04:25 +0100576 }
hayati ayguenb9804a22019-12-27 00:35:47 +0100577 t1 = uclock_sec();
578 } while ( t1 < tstop );
579
Julien Pommier370d2092011-11-19 18:04:25 +0100580 free(wrk);
hayati ayguenb9804a22019-12-27 00:35:47 +0100581
hayati ayguenc974c1d2020-03-29 03:39:30 +0200582 flops = (max_iter*2) * ((cplx ? 5 : 2.5)*N*log((double)N)/M_LN2); /* see http://www.fftw.org/speed/method.html */
hayati ayguenb9804a22019-12-27 00:35:47 +0100583 tmeas[TYPE_ITER][ALGO_FFTPACK] = max_iter;
hayati ayguen42ee6f12019-12-25 23:08:04 +0100584 tmeas[TYPE_MFLOPS][ALGO_FFTPACK] = flops/1e6/(t1 - t0 + 1e-16);
hayati ayguenb9804a22019-12-27 00:35:47 +0100585 tmeas[TYPE_DUR_TOT][ALGO_FFTPACK] = t1 - t0;
586 tmeas[TYPE_DUR_NS][ALGO_FFTPACK] = show_output("FFTPack", N, cplx, flops, t0, t1, max_iter, tableFile);
hayati ayguen4c3a87a2019-12-23 08:48:00 +0100587 tmeas[TYPE_PREP][ALGO_FFTPACK] = (t0 - te) * 1e3;
588 haveAlgo[ALGO_FFTPACK] = 1;
Julien Pommier370d2092011-11-19 18:04:25 +0100589 }
hayati ayguenca112412020-04-13 00:19:40 +0200590#endif
Julien Pommier370d2092011-11-19 18:04:25 +0100591
hayati ayguenca112412020-04-13 00:19:40 +0200592#if defined(HAVE_VECLIB) && defined(PFFFT_ENABLE_FLOAT)
hayati ayguenbc8d4a82019-12-25 01:27:33 +0100593 Nmax = (cplx ? nextPow2N*2 : nextPow2N);
594 X[Nmax] = checkVal;
595 te = uclock_sec();
hayati ayguenca112412020-04-13 00:19:40 +0200596 if ( 1 || PFFFT_FUNC(is_power_of_two)(N) ) {
Julien Pommier370d2092011-11-19 18:04:25 +0100597 FFTSetup setup;
598
hayati ayguenbc8d4a82019-12-25 01:27:33 +0100599 setup = vDSP_create_fftsetup(log2NextN, FFT_RADIX2);
Julien Pommier370d2092011-11-19 18:04:25 +0100600 DSPSplitComplex zsamples;
601 zsamples.realp = &X[0];
602 zsamples.imagp = &X[Nfloat/2];
hayati ayguenb9804a22019-12-27 00:35:47 +0100603 t0 = uclock_sec();
604 tstop = t0 + max_test_duration;
605 max_iter = 0;
606 do {
607 for ( k = 0; k < step_iter; ++k ) {
608 if (cplx) {
609 assert( X[Nmax] == checkVal );
610 vDSP_fft_zip(setup, &zsamples, 1, log2NextN, kFFTDirection_Forward);
611 assert( X[Nmax] == checkVal );
612 vDSP_fft_zip(setup, &zsamples, 1, log2NextN, kFFTDirection_Inverse);
613 assert( X[Nmax] == checkVal );
614 } else {
615 assert( X[Nmax] == checkVal );
616 vDSP_fft_zrip(setup, &zsamples, 1, log2NextN, kFFTDirection_Forward);
617 assert( X[Nmax] == checkVal );
618 vDSP_fft_zrip(setup, &zsamples, 1, log2NextN, kFFTDirection_Inverse);
619 assert( X[Nmax] == checkVal );
620 }
621 ++max_iter;
Julien Pommier370d2092011-11-19 18:04:25 +0100622 }
hayati ayguenb9804a22019-12-27 00:35:47 +0100623 t1 = uclock_sec();
624 } while ( t1 < tstop );
Julien Pommier370d2092011-11-19 18:04:25 +0100625
hayati ayguenb9804a22019-12-27 00:35:47 +0100626 vDSP_destroy_fftsetup(setup);
hayati ayguenc974c1d2020-03-29 03:39:30 +0200627 flops = (max_iter*2) * ((cplx ? 5 : 2.5)*N*log((double)N)/M_LN2); /* see http://www.fftw.org/speed/method.html */
hayati ayguenb9804a22019-12-27 00:35:47 +0100628 tmeas[TYPE_ITER][ALGO_VECLIB] = max_iter;
hayati ayguen42ee6f12019-12-25 23:08:04 +0100629 tmeas[TYPE_MFLOPS][ALGO_VECLIB] = flops/1e6/(t1 - t0 + 1e-16);
hayati ayguenb9804a22019-12-27 00:35:47 +0100630 tmeas[TYPE_DUR_TOT][ALGO_VECLIB] = t1 - t0;
hayati ayguenbc8d4a82019-12-25 01:27:33 +0100631 tmeas[TYPE_DUR_NS][ALGO_VECLIB] = show_output("vDSP", N, cplx, flops, t0, t1, max_iter, tableFile);
hayati ayguen4c3a87a2019-12-23 08:48:00 +0100632 tmeas[TYPE_PREP][ALGO_VECLIB] = (t0 - te) * 1e3;
633 haveAlgo[ALGO_VECLIB] = 1;
Julien Pommier836bc4b2011-11-20 10:58:07 +0100634 } else {
hayati ayguenbc8d4a82019-12-25 01:27:33 +0100635 show_output("vDSP", N, cplx, -1, -1, -1, -1, tableFile);
Julien Pommier370d2092011-11-19 18:04:25 +0100636 }
637#endif
hayati ayguencd60b962019-12-23 11:42:31 +0100638
hayati ayguenca112412020-04-13 00:19:40 +0200639#if defined(HAVE_FFTW)
hayati ayguenbc8d4a82019-12-25 01:27:33 +0100640 Nmax = (cplx ? N*2 : N);
641 X[Nmax] = checkVal;
Julien Pommier370d2092011-11-19 18:04:25 +0100642 {
hayati ayguencd60b962019-12-23 11:42:31 +0100643 /* int flags = (N <= (256*1024) ? FFTW_MEASURE : FFTW_ESTIMATE); measure takes a lot of time on largest ffts */
644 int flags = FFTW_ESTIMATE;
hayati ayguen4c3a87a2019-12-23 08:48:00 +0100645 te = uclock_sec();
hayati ayguenca112412020-04-13 00:19:40 +0200646
647 FFTW_PLAN planf, planb;
648 FFTW_COMPLEX *in = (FFTW_COMPLEX*) FFTW_FUNC(malloc)(sizeof(FFTW_COMPLEX) * N);
649 FFTW_COMPLEX *out = (FFTW_COMPLEX*) FFTW_FUNC(malloc)(sizeof(FFTW_COMPLEX) * N);
650 memset(in, 0, sizeof(FFTW_COMPLEX) * N);
Julien Pommier370d2092011-11-19 18:04:25 +0100651 if (cplx) {
hayati ayguenca112412020-04-13 00:19:40 +0200652 planf = FFTW_FUNC(plan_dft_1d)(N, in, out, FFTW_FORWARD, flags);
653 planb = FFTW_FUNC(plan_dft_1d)(N, in, out, FFTW_BACKWARD, flags);
Julien Pommier370d2092011-11-19 18:04:25 +0100654 } else {
hayati ayguenca112412020-04-13 00:19:40 +0200655 planf = FFTW_FUNC(plan_dft_r2c_1d)(N, (pffft_scalar*)in, out, flags);
656 planb = FFTW_FUNC(plan_dft_c2r_1d)(N, in, (pffft_scalar*)out, flags);
Julien Pommier370d2092011-11-19 18:04:25 +0100657 }
658
hayati ayguenb9804a22019-12-27 00:35:47 +0100659 t0 = uclock_sec();
660 tstop = t0 + max_test_duration;
661 max_iter = 0;
662 do {
663 for ( k = 0; k < step_iter; ++k ) {
664 assert( X[Nmax] == checkVal );
hayati ayguenca112412020-04-13 00:19:40 +0200665 FFTW_FUNC(execute)(planf);
hayati ayguenb9804a22019-12-27 00:35:47 +0100666 assert( X[Nmax] == checkVal );
hayati ayguenca112412020-04-13 00:19:40 +0200667 FFTW_FUNC(execute)(planb);
hayati ayguenb9804a22019-12-27 00:35:47 +0100668 assert( X[Nmax] == checkVal );
669 ++max_iter;
670 }
671 t1 = uclock_sec();
672 } while ( t1 < tstop );
Julien Pommier370d2092011-11-19 18:04:25 +0100673
hayati ayguenca112412020-04-13 00:19:40 +0200674 FFTW_FUNC(destroy_plan)(planf);
675 FFTW_FUNC(destroy_plan)(planb);
676 FFTW_FUNC(free)(in); FFTW_FUNC(free)(out);
Julien Pommier370d2092011-11-19 18:04:25 +0100677
hayati ayguenc974c1d2020-03-29 03:39:30 +0200678 flops = (max_iter*2) * ((cplx ? 5 : 2.5)*N*log((double)N)/M_LN2); /* see http://www.fftw.org/speed/method.html */
hayati ayguenb9804a22019-12-27 00:35:47 +0100679 tmeas[TYPE_ITER][ALGO_FFTW_ESTIM] = max_iter;
hayati ayguen42ee6f12019-12-25 23:08:04 +0100680 tmeas[TYPE_MFLOPS][ALGO_FFTW_ESTIM] = flops/1e6/(t1 - t0 + 1e-16);
hayati ayguenb9804a22019-12-27 00:35:47 +0100681 tmeas[TYPE_DUR_TOT][ALGO_FFTW_ESTIM] = t1 - t0;
hayati ayguenbc8d4a82019-12-25 01:27:33 +0100682 tmeas[TYPE_DUR_NS][ALGO_FFTW_ESTIM] = show_output((flags == FFTW_MEASURE ? algoName[ALGO_FFTW_AUTO] : algoName[ALGO_FFTW_ESTIM]), N, cplx, flops, t0, t1, max_iter, tableFile);
hayati ayguen4c3a87a2019-12-23 08:48:00 +0100683 tmeas[TYPE_PREP][ALGO_FFTW_ESTIM] = (t0 - te) * 1e3;
684 haveAlgo[ALGO_FFTW_ESTIM] = 1;
Julien Pommier370d2092011-11-19 18:04:25 +0100685 }
hayati ayguenbc8d4a82019-12-25 01:27:33 +0100686 Nmax = (cplx ? N*2 : N);
687 X[Nmax] = checkVal;
hayati ayguencd60b962019-12-23 11:42:31 +0100688 do {
689 /* int flags = (N <= (256*1024) ? FFTW_MEASURE : FFTW_ESTIMATE); measure takes a lot of time on largest ffts */
690 /* int flags = FFTW_MEASURE; */
hayati ayguenb9804a22019-12-27 00:35:47 +0100691#if ( defined(__arm__) || defined(__aarch64__) || defined(__arm64__) )
692 int limitFFTsize = 31; /* takes over a second on Raspberry Pi 3 B+ -- and much much more on higher ffts sizes! */
693#else
694 int limitFFTsize = 2400; /* take over a second on i7 for fft size 2400 */
695#endif
696 int flags = (N < limitFFTsize ? FFTW_MEASURE : (withFFTWfullMeas ? FFTW_MEASURE : FFTW_ESTIMATE));
697
hayati ayguencd60b962019-12-23 11:42:31 +0100698 if (flags == FFTW_ESTIMATE) {
hayati ayguenbc8d4a82019-12-25 01:27:33 +0100699 show_output((flags == FFTW_MEASURE ? algoName[ALGO_FFTW_AUTO] : algoName[ALGO_FFTW_ESTIM]), N, cplx, -1, -1, -1, -1, tableFile);
hayati ayguencd60b962019-12-23 11:42:31 +0100700 /* copy values from estimation */
hayati ayguenb9804a22019-12-27 00:35:47 +0100701 tmeas[TYPE_ITER][ALGO_FFTW_AUTO] = tmeas[TYPE_ITER][ALGO_FFTW_ESTIM];
702 tmeas[TYPE_DUR_TOT][ALGO_FFTW_AUTO] = tmeas[TYPE_DUR_TOT][ALGO_FFTW_ESTIM];
hayati ayguencd60b962019-12-23 11:42:31 +0100703 tmeas[TYPE_DUR_NS][ALGO_FFTW_AUTO] = tmeas[TYPE_DUR_NS][ALGO_FFTW_ESTIM];
704 tmeas[TYPE_PREP][ALGO_FFTW_AUTO] = tmeas[TYPE_PREP][ALGO_FFTW_ESTIM];
hayati ayguen4c3a87a2019-12-23 08:48:00 +0100705 } else {
hayati ayguencd60b962019-12-23 11:42:31 +0100706 te = uclock_sec();
hayati ayguenca112412020-04-13 00:19:40 +0200707 FFTW_PLAN planf, planb;
708 FFTW_COMPLEX *in = (FFTW_COMPLEX*) FFTW_FUNC(malloc)(sizeof(FFTW_COMPLEX) * N);
709 FFTW_COMPLEX *out = (FFTW_COMPLEX*) FFTW_FUNC(malloc)(sizeof(FFTW_COMPLEX) * N);
710 memset(in, 0, sizeof(FFTW_COMPLEX) * N);
hayati ayguencd60b962019-12-23 11:42:31 +0100711 if (cplx) {
hayati ayguenca112412020-04-13 00:19:40 +0200712 planf = FFTW_FUNC(plan_dft_1d)(N, in, out, FFTW_FORWARD, flags);
713 planb = FFTW_FUNC(plan_dft_1d)(N, in, out, FFTW_BACKWARD, flags);
hayati ayguencd60b962019-12-23 11:42:31 +0100714 } else {
hayati ayguenca112412020-04-13 00:19:40 +0200715 planf = FFTW_FUNC(plan_dft_r2c_1d)(N, (pffft_scalar*)in, out, flags);
716 planb = FFTW_FUNC(plan_dft_c2r_1d)(N, in, (pffft_scalar*)out, flags);
hayati ayguencd60b962019-12-23 11:42:31 +0100717 }
718
hayati ayguenb9804a22019-12-27 00:35:47 +0100719 t0 = uclock_sec();
720 tstop = t0 + max_test_duration;
721 max_iter = 0;
722 do {
723 for ( k = 0; k < step_iter; ++k ) {
724 assert( X[Nmax] == checkVal );
hayati ayguenca112412020-04-13 00:19:40 +0200725 FFTW_FUNC(execute)(planf);
hayati ayguenb9804a22019-12-27 00:35:47 +0100726 assert( X[Nmax] == checkVal );
hayati ayguenca112412020-04-13 00:19:40 +0200727 FFTW_FUNC(execute)(planb);
hayati ayguenb9804a22019-12-27 00:35:47 +0100728 assert( X[Nmax] == checkVal );
729 ++max_iter;
730 }
731 t1 = uclock_sec();
732 } while ( t1 < tstop );
hayati ayguencd60b962019-12-23 11:42:31 +0100733
hayati ayguenca112412020-04-13 00:19:40 +0200734 FFTW_FUNC(destroy_plan)(planf);
735 FFTW_FUNC(destroy_plan)(planb);
736 FFTW_FUNC(free)(in); FFTW_FUNC(free)(out);
hayati ayguencd60b962019-12-23 11:42:31 +0100737
hayati ayguenc974c1d2020-03-29 03:39:30 +0200738 flops = (max_iter*2) * ((cplx ? 5 : 2.5)*N*log((double)N)/M_LN2); /* see http://www.fftw.org/speed/method.html */
hayati ayguenb9804a22019-12-27 00:35:47 +0100739 tmeas[TYPE_ITER][ALGO_FFTW_AUTO] = max_iter;
hayati ayguen42ee6f12019-12-25 23:08:04 +0100740 tmeas[TYPE_MFLOPS][ALGO_FFTW_AUTO] = flops/1e6/(t1 - t0 + 1e-16);
hayati ayguenb9804a22019-12-27 00:35:47 +0100741 tmeas[TYPE_DUR_TOT][ALGO_FFTW_AUTO] = t1 - t0;
hayati ayguenbc8d4a82019-12-25 01:27:33 +0100742 tmeas[TYPE_DUR_NS][ALGO_FFTW_AUTO] = show_output((flags == FFTW_MEASURE ? algoName[ALGO_FFTW_AUTO] : algoName[ALGO_FFTW_ESTIM]), N, cplx, flops, t0, t1, max_iter, tableFile);
hayati ayguencd60b962019-12-23 11:42:31 +0100743 tmeas[TYPE_PREP][ALGO_FFTW_AUTO] = (t0 - te) * 1e3;
744 haveAlgo[ALGO_FFTW_AUTO] = 1;
hayati ayguen4c3a87a2019-12-23 08:48:00 +0100745 }
hayati ayguencd60b962019-12-23 11:42:31 +0100746 } while (0);
hayati ayguen4c3a87a2019-12-23 08:48:00 +0100747#else
hayati ayguenbc8d4a82019-12-25 01:27:33 +0100748 (void)withFFTWfullMeas;
hayati ayguenca112412020-04-13 00:19:40 +0200749#endif
Julien Pommier370d2092011-11-19 18:04:25 +0100750
hayati ayguenca112412020-04-13 00:19:40 +0200751#if defined(HAVE_GREEN_FFTS) && defined(PFFFT_ENABLE_FLOAT)
hayati ayguenbc8d4a82019-12-25 01:27:33 +0100752 Nmax = (cplx ? nextPow2N*2 : nextPow2N);
753 X[Nmax] = checkVal;
hayati ayguenca112412020-04-13 00:19:40 +0200754 if ( 1 || PFFFT_FUNC(is_power_of_two)(N) )
hayati ayguen4c3a87a2019-12-23 08:48:00 +0100755 {
756 te = uclock_sec();
hayati ayguenbc8d4a82019-12-25 01:27:33 +0100757 fftInit(log2NextN);
758
hayati ayguenb9804a22019-12-27 00:35:47 +0100759 t0 = uclock_sec();
760 tstop = t0 + max_test_duration;
761 max_iter = 0;
762 do {
763 for ( k = 0; k < step_iter; ++k ) {
hayati ayguen4c3a87a2019-12-23 08:48:00 +0100764 if (cplx) {
hayati ayguenbc8d4a82019-12-25 01:27:33 +0100765 assert( X[Nmax] == checkVal );
766 ffts(X, log2NextN, 1);
767 assert( X[Nmax] == checkVal );
768 iffts(X, log2NextN, 1);
769 assert( X[Nmax] == checkVal );
hayati ayguen4c3a87a2019-12-23 08:48:00 +0100770 } else {
hayati ayguenbc8d4a82019-12-25 01:27:33 +0100771 rffts(X, log2NextN, 1);
772 riffts(X, log2NextN, 1);
hayati ayguen4c3a87a2019-12-23 08:48:00 +0100773 }
hayati ayguenb9804a22019-12-27 00:35:47 +0100774
775 ++max_iter;
hayati ayguen4c3a87a2019-12-23 08:48:00 +0100776 }
777 t1 = uclock_sec();
hayati ayguenb9804a22019-12-27 00:35:47 +0100778 } while ( t1 < tstop );
hayati ayguen4c3a87a2019-12-23 08:48:00 +0100779
hayati ayguenb9804a22019-12-27 00:35:47 +0100780 fftFree();
781
hayati ayguenc974c1d2020-03-29 03:39:30 +0200782 flops = (max_iter*2) * ((cplx ? 5 : 2.5)*N*log((double)N)/M_LN2); /* see http://www.fftw.org/speed/method.html */
hayati ayguenb9804a22019-12-27 00:35:47 +0100783 tmeas[TYPE_ITER][ALGO_GREEN] = max_iter;
784 tmeas[TYPE_MFLOPS][ALGO_GREEN] = flops/1e6/(t1 - t0 + 1e-16);
785 tmeas[TYPE_DUR_TOT][ALGO_GREEN] = t1 - t0;
786 tmeas[TYPE_DUR_NS][ALGO_GREEN] = show_output("Green", N, cplx, flops, t0, t1, max_iter, tableFile);
787 tmeas[TYPE_PREP][ALGO_GREEN] = (t0 - te) * 1e3;
788 haveAlgo[ALGO_GREEN] = 1;
hayati ayguenbc8d4a82019-12-25 01:27:33 +0100789 } else {
790 show_output("Green", N, cplx, -1, -1, -1, -1, tableFile);
hayati ayguen4c3a87a2019-12-23 08:48:00 +0100791 }
hayati ayguen4c3a87a2019-12-23 08:48:00 +0100792#endif
793
hayati ayguenca112412020-04-13 00:19:40 +0200794#if defined(HAVE_KISS_FFT) && defined(PFFFT_ENABLE_FLOAT)
hayati ayguenbc8d4a82019-12-25 01:27:33 +0100795 Nmax = (cplx ? nextPow2N*2 : nextPow2N);
796 X[Nmax] = checkVal;
hayati ayguenca112412020-04-13 00:19:40 +0200797 if ( 1 || PFFFT_FUNC(is_power_of_two)(N) )
hayati ayguen4c3a87a2019-12-23 08:48:00 +0100798 {
799 kiss_fft_cfg stf;
800 kiss_fft_cfg sti;
801 kiss_fftr_cfg stfr;
802 kiss_fftr_cfg stir;
803
804 te = uclock_sec();
805 if (cplx) {
hayati ayguenbc8d4a82019-12-25 01:27:33 +0100806 stf = kiss_fft_alloc(nextPow2N, 0, 0, 0);
807 sti = kiss_fft_alloc(nextPow2N, 1, 0, 0);
hayati ayguen4c3a87a2019-12-23 08:48:00 +0100808 } else {
hayati ayguenbc8d4a82019-12-25 01:27:33 +0100809 stfr = kiss_fftr_alloc(nextPow2N, 0, 0, 0);
810 stir = kiss_fftr_alloc(nextPow2N, 1, 0, 0);
hayati ayguen4c3a87a2019-12-23 08:48:00 +0100811 }
812
hayati ayguenb9804a22019-12-27 00:35:47 +0100813 t0 = uclock_sec();
814 tstop = t0 + max_test_duration;
815 max_iter = 0;
816 do {
817 for ( k = 0; k < step_iter; ++k ) {
hayati ayguen4c3a87a2019-12-23 08:48:00 +0100818 if (cplx) {
hayati ayguenbc8d4a82019-12-25 01:27:33 +0100819 assert( X[Nmax] == checkVal );
hayati ayguen4c3a87a2019-12-23 08:48:00 +0100820 kiss_fft(stf, (const kiss_fft_cpx *)X, (kiss_fft_cpx *)Y);
hayati ayguenbc8d4a82019-12-25 01:27:33 +0100821 assert( X[Nmax] == checkVal );
hayati ayguen4c3a87a2019-12-23 08:48:00 +0100822 kiss_fft(sti, (const kiss_fft_cpx *)Y, (kiss_fft_cpx *)X);
hayati ayguenbc8d4a82019-12-25 01:27:33 +0100823 assert( X[Nmax] == checkVal );
hayati ayguen4c3a87a2019-12-23 08:48:00 +0100824 } else {
hayati ayguenbc8d4a82019-12-25 01:27:33 +0100825 assert( X[Nmax] == checkVal );
hayati ayguen4c3a87a2019-12-23 08:48:00 +0100826 kiss_fftr(stfr, X, (kiss_fft_cpx *)Y);
hayati ayguenbc8d4a82019-12-25 01:27:33 +0100827 assert( X[Nmax] == checkVal );
hayati ayguen4c3a87a2019-12-23 08:48:00 +0100828 kiss_fftri(stir, (const kiss_fft_cpx *)Y, X);
hayati ayguenbc8d4a82019-12-25 01:27:33 +0100829 assert( X[Nmax] == checkVal );
hayati ayguen4c3a87a2019-12-23 08:48:00 +0100830 }
hayati ayguenb9804a22019-12-27 00:35:47 +0100831 ++max_iter;
hayati ayguen4c3a87a2019-12-23 08:48:00 +0100832 }
833 t1 = uclock_sec();
hayati ayguenb9804a22019-12-27 00:35:47 +0100834 } while ( t1 < tstop );
hayati ayguen4c3a87a2019-12-23 08:48:00 +0100835
hayati ayguenb9804a22019-12-27 00:35:47 +0100836 kiss_fft_cleanup();
837
hayati ayguenc974c1d2020-03-29 03:39:30 +0200838 flops = (max_iter*2) * ((cplx ? 5 : 2.5)*N*log((double)N)/M_LN2); /* see http://www.fftw.org/speed/method.html */
hayati ayguenb9804a22019-12-27 00:35:47 +0100839 tmeas[TYPE_ITER][ALGO_KISS] = max_iter;
840 tmeas[TYPE_MFLOPS][ALGO_KISS] = flops/1e6/(t1 - t0 + 1e-16);
841 tmeas[TYPE_DUR_TOT][ALGO_KISS] = t1 - t0;
842 tmeas[TYPE_DUR_NS][ALGO_KISS] = show_output("Kiss", N, cplx, flops, t0, t1, max_iter, tableFile);
843 tmeas[TYPE_PREP][ALGO_KISS] = (t0 - te) * 1e3;
844 haveAlgo[ALGO_KISS] = 1;
hayati ayguenbc8d4a82019-12-25 01:27:33 +0100845 } else {
846 show_output("Kiss", N, cplx, -1, -1, -1, -1, tableFile);
hayati ayguen4c3a87a2019-12-23 08:48:00 +0100847 }
848#endif
849
hayati ayguen223c62a2020-06-13 01:59:49 +0200850#if defined(HAVE_POCKET_FFT)
851
hayati ayguen1c17fd42020-06-12 18:21:26 +0000852 Nmax = (cplx ? nextPow2N*2 : nextPow2N);
853 X[Nmax] = checkVal;
854 if ( 1 || PFFFT_FUNC(is_power_of_two)(N) )
855 {
hayati ayguen223c62a2020-06-13 01:59:49 +0200856 POCKFFTR_PRE(_plan) planr;
857 POCKFFTC_PRE(_plan) planc;
hayati ayguen1c17fd42020-06-12 18:21:26 +0000858
859 te = uclock_sec();
860 if (cplx) {
hayati ayguen223c62a2020-06-13 01:59:49 +0200861 planc = POCKFFTC_MID(make_,_plan)(nextPow2N);
hayati ayguen1c17fd42020-06-12 18:21:26 +0000862 } else {
hayati ayguen223c62a2020-06-13 01:59:49 +0200863 planr = POCKFFTR_MID(make_,_plan)(nextPow2N);
hayati ayguen1c17fd42020-06-12 18:21:26 +0000864 }
865
866 t0 = uclock_sec();
867 tstop = t0 + max_test_duration;
868 max_iter = 0;
869 do {
870 for ( k = 0; k < step_iter; ++k ) {
871 if (cplx) {
872 assert( X[Nmax] == checkVal );
873 memcpy(Y, X, 2*nextPow2N * sizeof(pffft_scalar) );
hayati ayguen223c62a2020-06-13 01:59:49 +0200874 POCKFFTC_PRE(_forward)(planc, Y, 1.);
hayati ayguen1c17fd42020-06-12 18:21:26 +0000875 assert( X[Nmax] == checkVal );
876 memcpy(X, Y, 2*nextPow2N * sizeof(pffft_scalar) );
hayati ayguen223c62a2020-06-13 01:59:49 +0200877 POCKFFTC_PRE(_backward)(planc, X, 1./nextPow2N);
hayati ayguen1c17fd42020-06-12 18:21:26 +0000878 assert( X[Nmax] == checkVal );
879 } else {
880 assert( X[Nmax] == checkVal );
881 memcpy(Y, X, nextPow2N * sizeof(pffft_scalar) );
hayati ayguen223c62a2020-06-13 01:59:49 +0200882 POCKFFTR_PRE(_forward)(planr, Y, 1.);
hayati ayguen1c17fd42020-06-12 18:21:26 +0000883 assert( X[Nmax] == checkVal );
884 memcpy(X, Y, nextPow2N * sizeof(pffft_scalar) );
hayati ayguen223c62a2020-06-13 01:59:49 +0200885 POCKFFTR_PRE(_backward)(planr, X, 1./nextPow2N);
hayati ayguen1c17fd42020-06-12 18:21:26 +0000886 assert( X[Nmax] == checkVal );
887 }
888 ++max_iter;
889 }
890 t1 = uclock_sec();
891 } while ( t1 < tstop );
892
893 if (cplx) {
hayati ayguen223c62a2020-06-13 01:59:49 +0200894 POCKFFTC_MID(destroy_,_plan)(planc);
hayati ayguen1c17fd42020-06-12 18:21:26 +0000895 } else {
hayati ayguen223c62a2020-06-13 01:59:49 +0200896 POCKFFTR_MID(destroy_,_plan)(planr);
hayati ayguen1c17fd42020-06-12 18:21:26 +0000897 }
898
899 flops = (max_iter*2) * ((cplx ? 5 : 2.5)*N*log((double)N)/M_LN2); /* see http://www.fftw.org/speed/method.html */
900 tmeas[TYPE_ITER][ALGO_POCKET] = max_iter;
901 tmeas[TYPE_MFLOPS][ALGO_POCKET] = flops/1e6/(t1 - t0 + 1e-16);
902 tmeas[TYPE_DUR_TOT][ALGO_POCKET] = t1 - t0;
903 tmeas[TYPE_DUR_NS][ALGO_POCKET] = show_output("Pocket", N, cplx, flops, t0, t1, max_iter, tableFile);
904 tmeas[TYPE_PREP][ALGO_POCKET] = (t0 - te) * 1e3;
905 haveAlgo[ALGO_POCKET] = 1;
906 } else {
907 show_output("Pocket", N, cplx, -1, -1, -1, -1, tableFile);
908 }
909#endif
910
hayati ayguen4c3a87a2019-12-23 08:48:00 +0100911
hayati ayguenc974c1d2020-03-29 03:39:30 +0200912 /* PFFFT-U (unordered) benchmark */
hayati ayguenbc8d4a82019-12-25 01:27:33 +0100913 Nmax = (cplx ? pffftPow2N*2 : pffftPow2N);
914 X[Nmax] = checkVal;
hayati ayguenca112412020-04-13 00:19:40 +0200915 if ( pffftPow2N >= PFFFT_FUNC(min_fft_size)(cplx ? PFFFT_COMPLEX : PFFFT_REAL) )
Julien Pommier836bc4b2011-11-20 10:58:07 +0100916 {
hayati ayguen4c3a87a2019-12-23 08:48:00 +0100917 te = uclock_sec();
hayati ayguenca112412020-04-13 00:19:40 +0200918 PFFFT_SETUP *s = PFFFT_FUNC(new_setup)(pffftPow2N, cplx ? PFFFT_COMPLEX : PFFFT_REAL);
Julien Pommier0302e8a2012-10-11 18:04:09 +0200919 if (s) {
hayati ayguenb9804a22019-12-27 00:35:47 +0100920 t0 = uclock_sec();
921 tstop = t0 + max_test_duration;
922 max_iter = 0;
923 do {
924 for ( k = 0; k < step_iter; ++k ) {
925 assert( X[Nmax] == checkVal );
hayati ayguenca112412020-04-13 00:19:40 +0200926 PFFFT_FUNC(transform)(s, X, Z, Y, PFFFT_FORWARD);
hayati ayguenb9804a22019-12-27 00:35:47 +0100927 assert( X[Nmax] == checkVal );
hayati ayguenca112412020-04-13 00:19:40 +0200928 PFFFT_FUNC(transform)(s, X, Z, Y, PFFFT_BACKWARD);
hayati ayguenb9804a22019-12-27 00:35:47 +0100929 assert( X[Nmax] == checkVal );
930 ++max_iter;
931 }
932 t1 = uclock_sec();
933 } while ( t1 < tstop );
934
hayati ayguenca112412020-04-13 00:19:40 +0200935 PFFFT_FUNC(destroy_setup)(s);
hayati ayguenbc8d4a82019-12-25 01:27:33 +0100936
hayati ayguenc974c1d2020-03-29 03:39:30 +0200937 flops = (max_iter*2) * ((cplx ? 5 : 2.5)*N*log((double)N)/M_LN2); /* see http://www.fftw.org/speed/method.html */
hayati ayguenb9804a22019-12-27 00:35:47 +0100938 tmeas[TYPE_ITER][ALGO_PFFFT_U] = max_iter;
939 tmeas[TYPE_MFLOPS][ALGO_PFFFT_U] = flops/1e6/(t1 - t0 + 1e-16);
940 tmeas[TYPE_DUR_TOT][ALGO_PFFFT_U] = t1 - t0;
941 tmeas[TYPE_DUR_NS][ALGO_PFFFT_U] = show_output("PFFFT-U", N, cplx, flops, t0, t1, max_iter, tableFile);
942 tmeas[TYPE_PREP][ALGO_PFFFT_U] = (t0 - te) * 1e3;
943 haveAlgo[ALGO_PFFFT_U] = 1;
944 }
hayati ayguenca112412020-04-13 00:19:40 +0200945 } else {
946 show_output("PFFFT-U", N, cplx, -1, -1, -1, -1, tableFile);
hayati ayguenb9804a22019-12-27 00:35:47 +0100947 }
hayati ayguenca112412020-04-13 00:19:40 +0200948
949
950 if ( pffftPow2N >= PFFFT_FUNC(min_fft_size)(cplx ? PFFFT_COMPLEX : PFFFT_REAL) )
hayati ayguenb9804a22019-12-27 00:35:47 +0100951 {
952 te = uclock_sec();
hayati ayguenca112412020-04-13 00:19:40 +0200953 PFFFT_SETUP *s = PFFFT_FUNC(new_setup)(pffftPow2N, cplx ? PFFFT_COMPLEX : PFFFT_REAL);
hayati ayguenb9804a22019-12-27 00:35:47 +0100954 if (s) {
955 t0 = uclock_sec();
956 tstop = t0 + max_test_duration;
957 max_iter = 0;
958 do {
959 for ( k = 0; k < step_iter; ++k ) {
960 assert( X[Nmax] == checkVal );
hayati ayguenca112412020-04-13 00:19:40 +0200961 PFFFT_FUNC(transform_ordered)(s, X, Z, Y, PFFFT_FORWARD);
hayati ayguenb9804a22019-12-27 00:35:47 +0100962 assert( X[Nmax] == checkVal );
hayati ayguenca112412020-04-13 00:19:40 +0200963 PFFFT_FUNC(transform_ordered)(s, X, Z, Y, PFFFT_BACKWARD);
hayati ayguenb9804a22019-12-27 00:35:47 +0100964 assert( X[Nmax] == checkVal );
965 ++max_iter;
966 }
967 t1 = uclock_sec();
968 } while ( t1 < tstop );
969
hayati ayguenca112412020-04-13 00:19:40 +0200970 PFFFT_FUNC(destroy_setup)(s);
hayati ayguenb9804a22019-12-27 00:35:47 +0100971
hayati ayguenc974c1d2020-03-29 03:39:30 +0200972 flops = (max_iter*2) * ((cplx ? 5 : 2.5)*N*log((double)N)/M_LN2); /* see http://www.fftw.org/speed/method.html */
hayati ayguenb9804a22019-12-27 00:35:47 +0100973 tmeas[TYPE_ITER][ALGO_PFFFT_O] = max_iter;
974 tmeas[TYPE_MFLOPS][ALGO_PFFFT_O] = flops/1e6/(t1 - t0 + 1e-16);
975 tmeas[TYPE_DUR_TOT][ALGO_PFFFT_O] = t1 - t0;
976 tmeas[TYPE_DUR_NS][ALGO_PFFFT_O] = show_output("PFFFT", N, cplx, flops, t0, t1, max_iter, tableFile);
977 tmeas[TYPE_PREP][ALGO_PFFFT_O] = (t0 - te) * 1e3;
978 haveAlgo[ALGO_PFFFT_O] = 1;
Julien Pommier0302e8a2012-10-11 18:04:09 +0200979 }
hayati ayguenca112412020-04-13 00:19:40 +0200980 } else {
981 show_output("PFFFT", N, cplx, -1, -1, -1, -1, tableFile);
Julien Pommier836bc4b2011-11-20 10:58:07 +0100982 }
983
hayati ayguencd60b962019-12-23 11:42:31 +0100984 if (!array_output_format)
hayati ayguen4c3a87a2019-12-23 08:48:00 +0100985 {
986 printf("prepare/ms: ");
987 for ( iter = 0; iter < NUM_FFT_ALGOS; ++iter )
988 {
989 if ( haveAlgo[iter] && tmeas[TYPE_DUR_NS][iter] > 0.0 ) {
990 printf("%s %.3f ", algoName[iter], tmeas[TYPE_PREP][iter] );
991 }
992 }
993 printf("\n");
994 }
995 Tfastest = 0.0;
996 for ( iter = 0; iter < NUM_FFT_ALGOS; ++iter )
997 {
998 if ( Tfastest == 0.0 || ( tmeas[TYPE_DUR_NS][iter] != 0.0 && tmeas[TYPE_DUR_NS][iter] < Tfastest ) )
999 Tfastest = tmeas[TYPE_DUR_NS][iter];
1000 }
1001 if ( Tfastest > 0.0 )
1002 {
hayati ayguencd60b962019-12-23 11:42:31 +01001003 if (!array_output_format)
1004 printf("relative fast: ");
hayati ayguen4c3a87a2019-12-23 08:48:00 +01001005 for ( iter = 0; iter < NUM_FFT_ALGOS; ++iter )
1006 {
1007 if ( haveAlgo[iter] && tmeas[TYPE_DUR_NS][iter] > 0.0 ) {
1008 tmeas[TYPE_DUR_FASTEST][iter] = tmeas[TYPE_DUR_NS][iter] / Tfastest;
hayati ayguencd60b962019-12-23 11:42:31 +01001009 if (!array_output_format)
1010 printf("%s %.3f ", algoName[iter], tmeas[TYPE_DUR_FASTEST][iter] );
hayati ayguen4c3a87a2019-12-23 08:48:00 +01001011 }
1012 }
hayati ayguencd60b962019-12-23 11:42:31 +01001013 if (!array_output_format)
1014 printf("\n");
hayati ayguen4c3a87a2019-12-23 08:48:00 +01001015 }
1016
1017 {
hayati ayguencd60b962019-12-23 11:42:31 +01001018 if (!array_output_format)
1019 printf("relative pffft: ");
hayati ayguen4c3a87a2019-12-23 08:48:00 +01001020 for ( iter = 0; iter < NUM_FFT_ALGOS; ++iter )
1021 {
1022 if ( haveAlgo[iter] && tmeas[TYPE_DUR_NS][iter] > 0.0 ) {
hayati ayguenb9804a22019-12-27 00:35:47 +01001023 tmeas[TYPE_REL_PFFFT][iter] = tmeas[TYPE_DUR_NS][iter] / tmeas[TYPE_DUR_NS][ALGO_PFFFT_O];
hayati ayguencd60b962019-12-23 11:42:31 +01001024 if (!array_output_format)
1025 printf("%s %.3f ", algoName[iter], tmeas[TYPE_REL_PFFFT][iter] );
hayati ayguen4c3a87a2019-12-23 08:48:00 +01001026 }
1027 }
hayati ayguencd60b962019-12-23 11:42:31 +01001028 if (!array_output_format)
1029 printf("\n");
hayati ayguen4c3a87a2019-12-23 08:48:00 +01001030 }
1031
Julien Pommier836bc4b2011-11-20 10:58:07 +01001032 if (!array_output_format) {
1033 printf("--\n");
1034 }
Julien Pommier370d2092011-11-19 18:04:25 +01001035
hayati ayguenca112412020-04-13 00:19:40 +02001036 PFFFT_FUNC(aligned_free)(X);
1037 PFFFT_FUNC(aligned_free)(Y);
1038 PFFFT_FUNC(aligned_free)(Z);
Julien Pommier370d2092011-11-19 18:04:25 +01001039}
1040
hayati ayguenca112412020-04-13 00:19:40 +02001041
1042/* small functions inside pffft.c that will detect (compiler) bugs with respect to simd instructions */
1043void validate_pffft_simd();
1044int validate_pffft_simd_ex(FILE * DbgOut);
1045void validate_pffftd_simd();
1046int validate_pffftd_simd_ex(FILE * DbgOut);
Julien Pommier370d2092011-11-19 18:04:25 +01001047
Julien Pommier836bc4b2011-11-20 10:58:07 +01001048
hayati ayguen4c3a87a2019-12-23 08:48:00 +01001049
1050int main(int argc, char **argv) {
1051 /* unfortunately, the fft size must be a multiple of 16 for complex FFTs
1052 and 32 for real FFTs -- a lot of stuff would need to be rewritten to
1053 handle other cases (or maybe just switch to a scalar fft, I don't know..) */
1054
hayati ayguenbc8d4a82019-12-25 01:27:33 +01001055#if 0 /* include powers of 2 ? */
1056#define NUMNONPOW2LENS 23
1057 int NnonPow2[NUMNONPOW2LENS] = {
1058 64, 96, 128, 160, 192, 256, 384, 5*96, 512, 5*128,
1059 3*256, 800, 1024, 2048, 2400, 4096, 8192, 9*1024, 16384, 32768,
1060 256*1024, 1024*1024, -1 };
1061#else
1062#define NUMNONPOW2LENS 11
1063 int NnonPow2[NUMNONPOW2LENS] = {
1064 96, 160, 192, 384, 5*96, 5*128,3*256, 800, 2400, 9*1024,
1065 -1 };
1066#endif
1067
hayati ayguen482d2322020-08-26 14:51:34 +02001068#define NUMPOW2FFTLENS 22
hayati ayguenbc8d4a82019-12-25 01:27:33 +01001069#define MAXNUMFFTLENS MAX( NUMPOW2FFTLENS, NUMNONPOW2LENS )
hayati ayguen482d2322020-08-26 14:51:34 +02001070 int Npow2[NUMPOW2FFTLENS]; /* exp = 1 .. 21, -1 */
hayati ayguenbc8d4a82019-12-25 01:27:33 +01001071 const int *Nvalues = NULL;
1072 double tmeas[2][MAXNUMFFTLENS][NUM_TYPES][NUM_FFT_ALGOS];
hayati ayguenb9804a22019-12-27 00:35:47 +01001073 double iterCalReal, iterCalCplx;
hayati ayguenbc8d4a82019-12-25 01:27:33 +01001074
1075 int benchReal=1, benchCplx=1, withFFTWfullMeas=0, outputTable2File=1, usePow2=1;
hayati ayguen4c3a87a2019-12-23 08:48:00 +01001076 int realCplxIdx, typeIdx;
1077 int i, k;
hayati ayguenbc8d4a82019-12-25 01:27:33 +01001078 FILE *tableFile = NULL;
hayati ayguen4c3a87a2019-12-23 08:48:00 +01001079
hayati ayguen4c3a87a2019-12-23 08:48:00 +01001080 int haveAlgo[NUM_FFT_ALGOS];
hayati ayguenca112412020-04-13 00:19:40 +02001081 char acCsvFilename[64];
hayati ayguenbc8d4a82019-12-25 01:27:33 +01001082
1083 for ( k = 1; k <= NUMPOW2FFTLENS; ++k )
1084 Npow2[k-1] = (k == NUMPOW2FFTLENS) ? -1 : (1 << k);
1085 Nvalues = Npow2; /* set default .. for comparisons .. */
hayati ayguen4c3a87a2019-12-23 08:48:00 +01001086
1087 for ( i = 0; i < NUM_FFT_ALGOS; ++i )
1088 haveAlgo[i] = 0;
1089
hayati ayguenca112412020-04-13 00:19:40 +02001090 printf("pffft architecture: '%s'\n", PFFFT_FUNC(simd_arch)());
1091 printf("pffft SIMD size: %d\n", PFFFT_FUNC(simd_size)());
1092 printf("pffft min real fft: %d\n", PFFFT_FUNC(min_fft_size)(PFFFT_REAL));
1093 printf("pffft min complex fft: %d\n", PFFFT_FUNC(min_fft_size)(PFFFT_COMPLEX));
1094 printf("\n");
1095
hayati ayguen4c3a87a2019-12-23 08:48:00 +01001096 for ( i = 1; i < argc; ++i ) {
hayati ayguencd60b962019-12-23 11:42:31 +01001097 if (!strcmp(argv[i], "--array-format") || !strcmp(argv[i], "--table")) {
hayati ayguen4c3a87a2019-12-23 08:48:00 +01001098 array_output_format = 1;
1099 }
hayati ayguenbc8d4a82019-12-25 01:27:33 +01001100 else if (!strcmp(argv[i], "--no-tab")) {
hayati ayguencd60b962019-12-23 11:42:31 +01001101 array_output_format = 0;
1102 }
hayati ayguenbc8d4a82019-12-25 01:27:33 +01001103 else if (!strcmp(argv[i], "--real")) {
hayati ayguen4c3a87a2019-12-23 08:48:00 +01001104 benchCplx = 0;
1105 }
hayati ayguenbc8d4a82019-12-25 01:27:33 +01001106 else if (!strcmp(argv[i], "--cplx")) {
hayati ayguen4c3a87a2019-12-23 08:48:00 +01001107 benchReal = 0;
1108 }
hayati ayguenbc8d4a82019-12-25 01:27:33 +01001109 else if (!strcmp(argv[i], "--fftw-full-measure")) {
1110 withFFTWfullMeas = 1;
1111 }
1112 else if (!strcmp(argv[i], "--non-pow2")) {
1113 Nvalues = NnonPow2;
1114 usePow2 = 0;
1115 }
1116 else /* if (!strcmp(argv[i], "--help")) */ {
1117 printf("usage: %s [--array-format|--table] [--no-tab] [--real|--cplx] [--fftw-full-measure] [--non-pow2]\n", argv[0]);
1118 exit(0);
hayati ayguen4c3a87a2019-12-23 08:48:00 +01001119 }
Julien Pommier836bc4b2011-11-20 10:58:07 +01001120 }
1121
hayati ayguencd60b962019-12-23 11:42:31 +01001122#ifdef HAVE_FFTW
hayati ayguenca112412020-04-13 00:19:40 +02001123#ifdef PFFFT_ENABLE_DOUBLE
1124 algoName[ALGO_FFTW_ESTIM] = "FFTW D(estim)";
1125 algoName[ALGO_FFTW_AUTO] = "FFTW D(auto) ";
1126#endif
1127
hayati ayguenbc8d4a82019-12-25 01:27:33 +01001128 if (withFFTWfullMeas)
hayati ayguencd60b962019-12-23 11:42:31 +01001129 {
hayati ayguenca112412020-04-13 00:19:40 +02001130#ifdef PFFFT_ENABLE_FLOAT
1131 algoName[ALGO_FFTW_AUTO] = "FFTWF(meas)"; /* "FFTW (auto)" */
1132#else
1133 algoName[ALGO_FFTW_AUTO] = "FFTWD(meas)"; /* "FFTW (auto)" */
1134#endif
hayati ayguencd60b962019-12-23 11:42:31 +01001135 algoTableHeader[NUM_FFT_ALGOS][0] = "|real FFTWmeas "; /* "|real FFTWauto " */
1136 algoTableHeader[NUM_FFT_ALGOS][0] = "|cplx FFTWmeas "; /* "|cplx FFTWauto " */
1137 }
hayati ayguencd60b962019-12-23 11:42:31 +01001138#endif
hayati ayguen4c3a87a2019-12-23 08:48:00 +01001139
hayati ayguenca112412020-04-13 00:19:40 +02001140 if ( PFFFT_FUNC(simd_size)() == 1 )
1141 {
1142 algoName[ALGO_PFFFT_U] = "PFFFTU scal-1";
1143 algoName[ALGO_PFFFT_O] = "PFFFT scal-1 ";
1144 }
1145 else if ( !strcmp(PFFFT_FUNC(simd_arch)(), "4xScalar") )
1146 {
1147 algoName[ALGO_PFFFT_U] = "PFFFT-U scal4";
1148 algoName[ALGO_PFFFT_O] = "PFFFT scal-4 ";
1149 }
1150
hayati ayguenb9804a22019-12-27 00:35:47 +01001151
1152 clock();
1153 /* double TClockDur = 1.0 / CLOCKS_PER_SEC;
1154 printf("clock() duration for CLOCKS_PER_SEC = %f sec = %f ms\n", TClockDur, 1000.0 * TClockDur );
1155 */
1156
1157 /* calibrate test duration */
1158 {
1159 double t0, t1, dur;
1160 printf("calibrating fft benchmark duration at size N = 512 ..\n");
1161 t0 = uclock_sec();
hayati ayguen4c3a87a2019-12-23 08:48:00 +01001162 if (benchReal) {
hayati ayguenb9804a22019-12-27 00:35:47 +01001163 iterCalReal = cal_benchmark(512, 0 /* real fft */);
1164 printf("real fft iterCal = %f\n", iterCalReal);
Julien Pommier836bc4b2011-11-20 10:58:07 +01001165 }
hayati ayguen4c3a87a2019-12-23 08:48:00 +01001166 if (benchCplx) {
hayati ayguenb9804a22019-12-27 00:35:47 +01001167 iterCalCplx = cal_benchmark(512, 1 /* cplx fft */);
1168 printf("cplx fft iterCal = %f\n", iterCalCplx);
Julien Pommier836bc4b2011-11-20 10:58:07 +01001169 }
hayati ayguenb9804a22019-12-27 00:35:47 +01001170 t1 = uclock_sec();
1171 dur = t1 - t0;
hayati ayguenca112412020-04-13 00:19:40 +02001172 printf("calibration done in %f sec.\n\n", dur);
hayati ayguenb9804a22019-12-27 00:35:47 +01001173 }
1174
1175 if (!array_output_format) {
1176 if (benchReal) {
1177 for (i=0; Nvalues[i] > 0; ++i)
1178 benchmark_ffts(Nvalues[i], 0 /* real fft */, withFFTWfullMeas, iterCalReal, tmeas[0][i], haveAlgo, NULL);
1179 }
1180 if (benchCplx) {
1181 for (i=0; Nvalues[i] > 0; ++i)
1182 benchmark_ffts(Nvalues[i], 1 /* cplx fft */, withFFTWfullMeas, iterCalCplx, tmeas[1][i], haveAlgo, NULL);
1183 }
1184
Julien Pommier836bc4b2011-11-20 10:58:07 +01001185 } else {
Julien Pommier836bc4b2011-11-20 10:58:07 +01001186
hayati ayguenbc8d4a82019-12-25 01:27:33 +01001187 if (outputTable2File) {
1188 tableFile = fopen( usePow2 ? "bench-fft-table-pow2.txt" : "bench-fft-table-non2.txt", "w");
1189 }
hayati ayguencd60b962019-12-23 11:42:31 +01001190 /* print table headers */
hayati ayguenca112412020-04-13 00:19:40 +02001191 printf("table shows MFlops; higher values indicate faster computation\n\n");
1192
hayati ayguencd60b962019-12-23 11:42:31 +01001193 {
hayati ayguenbc8d4a82019-12-25 01:27:33 +01001194 print_table("| input len ", tableFile);
hayati ayguencd60b962019-12-23 11:42:31 +01001195 for (realCplxIdx = 0; realCplxIdx < 2; ++realCplxIdx)
1196 {
1197 if ( (realCplxIdx == 0 && !benchReal) || (realCplxIdx == 1 && !benchCplx) )
1198 continue;
1199 for (k=0; k < NUM_FFT_ALGOS; ++k)
1200 {
1201 if ( compiledInAlgo[k] )
hayati ayguenbc8d4a82019-12-25 01:27:33 +01001202 print_table(algoTableHeader[k][realCplxIdx], tableFile);
hayati ayguencd60b962019-12-23 11:42:31 +01001203 }
1204 }
hayati ayguenbc8d4a82019-12-25 01:27:33 +01001205 print_table("|\n", tableFile);
hayati ayguencd60b962019-12-23 11:42:31 +01001206 }
1207 /* print table value seperators */
1208 {
hayati ayguenbc8d4a82019-12-25 01:27:33 +01001209 print_table("|----------", tableFile);
hayati ayguencd60b962019-12-23 11:42:31 +01001210 for (realCplxIdx = 0; realCplxIdx < 2; ++realCplxIdx)
1211 {
1212 if ( (realCplxIdx == 0 && !benchReal) || (realCplxIdx == 1 && !benchCplx) )
1213 continue;
1214 for (k=0; k < NUM_FFT_ALGOS; ++k)
1215 {
1216 if ( compiledInAlgo[k] )
hayati ayguenbc8d4a82019-12-25 01:27:33 +01001217 print_table(":|-------------", tableFile);
hayati ayguencd60b962019-12-23 11:42:31 +01001218 }
1219 }
hayati ayguenbc8d4a82019-12-25 01:27:33 +01001220 print_table(":|\n", tableFile);
hayati ayguencd60b962019-12-23 11:42:31 +01001221 }
1222
Julien Pommier836bc4b2011-11-20 10:58:07 +01001223 for (i=0; Nvalues[i] > 0; ++i) {
hayati ayguen4c3a87a2019-12-23 08:48:00 +01001224 {
hayati ayguenb9804a22019-12-27 00:35:47 +01001225 double t0, t1;
hayati ayguenbc8d4a82019-12-25 01:27:33 +01001226 print_table_fftsize(Nvalues[i], tableFile);
hayati ayguenb9804a22019-12-27 00:35:47 +01001227 t0 = uclock_sec();
1228 if (benchReal)
1229 benchmark_ffts(Nvalues[i], 0, withFFTWfullMeas, iterCalReal, tmeas[0][i], haveAlgo, tableFile);
1230 if (benchCplx)
1231 benchmark_ffts(Nvalues[i], 1, withFFTWfullMeas, iterCalCplx, tmeas[1][i], haveAlgo, tableFile);
1232 t1 = uclock_sec();
hayati ayguenbc8d4a82019-12-25 01:27:33 +01001233 print_table("|\n", tableFile);
hayati ayguenb9804a22019-12-27 00:35:47 +01001234 /* printf("all ffts for size %d took %f sec\n", Nvalues[i], t1-t0); */
1235 (void)t0;
1236 (void)t1;
hayati ayguen4c3a87a2019-12-23 08:48:00 +01001237 }
Julien Pommier836bc4b2011-11-20 10:58:07 +01001238 }
hayati ayguenbc8d4a82019-12-25 01:27:33 +01001239 fprintf(stdout, " (numbers are given in MFlops)\n");
1240 if (outputTable2File) {
1241 fclose(tableFile);
1242 }
Julien Pommier370d2092011-11-19 18:04:25 +01001243 }
Julien Pommier836bc4b2011-11-20 10:58:07 +01001244
hayati ayguenbc8d4a82019-12-25 01:27:33 +01001245 printf("\n");
1246 printf("now writing .csv files ..\n");
hayati ayguen4c3a87a2019-12-23 08:48:00 +01001247
1248 for (realCplxIdx = 0; realCplxIdx < 2; ++realCplxIdx)
1249 {
1250 if ( (benchReal && realCplxIdx == 0) || (benchCplx && realCplxIdx == 1) )
1251 {
1252 for (typeIdx = 0; typeIdx < NUM_TYPES; ++typeIdx)
1253 {
hayati ayguenbc8d4a82019-12-25 01:27:33 +01001254 FILE *f = NULL;
hayati ayguenb9804a22019-12-27 00:35:47 +01001255 if ( !(SAVE_ALL_TYPES || saveType[typeIdx]) )
1256 continue;
hayati ayguenbc8d4a82019-12-25 01:27:33 +01001257 acCsvFilename[0] = 0;
1258#ifdef PFFFT_SIMD_DISABLE
1259 strcat(acCsvFilename, "scal-");
1260#else
1261 strcat(acCsvFilename, "simd-");
1262#endif
1263 strcat(acCsvFilename, (realCplxIdx == 0 ? "real-" : "cplx-"));
1264 strcat(acCsvFilename, ( usePow2 ? "pow2-" : "non2-"));
hayati ayguenca112412020-04-13 00:19:40 +02001265 assert( strlen(acCsvFilename) + strlen(typeFilenamePart[typeIdx]) + 5 < (sizeof(acCsvFilename) / sizeof(acCsvFilename[0])) );
hayati ayguenbc8d4a82019-12-25 01:27:33 +01001266 strcat(acCsvFilename, typeFilenamePart[typeIdx]);
1267 strcat(acCsvFilename, ".csv");
1268 f = fopen(acCsvFilename, "w");
1269 if (!f)
1270 continue;
hayati ayguen4c3a87a2019-12-23 08:48:00 +01001271 {
hayati ayguenbc8d4a82019-12-25 01:27:33 +01001272 fprintf(f, "size, log2, ");
hayati ayguen4c3a87a2019-12-23 08:48:00 +01001273 for (k=0; k < NUM_FFT_ALGOS; ++k)
1274 if ( haveAlgo[k] )
hayati ayguenbc8d4a82019-12-25 01:27:33 +01001275 fprintf(f, "%s, ", algoName[k]);
1276 fprintf(f, "\n");
hayati ayguen4c3a87a2019-12-23 08:48:00 +01001277 }
1278 for (i=0; Nvalues[i] > 0; ++i)
1279 {
hayati ayguen4c3a87a2019-12-23 08:48:00 +01001280 {
hayati ayguenbc8d4a82019-12-25 01:27:33 +01001281 fprintf(f, "%d, %.3f, ", Nvalues[i], log10((double)Nvalues[i])/log10(2.0) );
hayati ayguen4c3a87a2019-12-23 08:48:00 +01001282 for (k=0; k < NUM_FFT_ALGOS; ++k)
1283 if ( haveAlgo[k] )
hayati ayguenbc8d4a82019-12-25 01:27:33 +01001284 fprintf(f, "%f, ", tmeas[realCplxIdx][i][typeIdx][k]);
1285 fprintf(f, "\n");
hayati ayguen4c3a87a2019-12-23 08:48:00 +01001286 }
1287 }
hayati ayguenbc8d4a82019-12-25 01:27:33 +01001288 fclose(f);
hayati ayguen4c3a87a2019-12-23 08:48:00 +01001289 }
1290 }
1291 }
1292
Julien Pommier370d2092011-11-19 18:04:25 +01001293 return 0;
1294}
hayati ayguen3673ac02019-12-22 07:09:56 +01001295