Julien Pommier | 370d209 | 2011-11-19 18:04:25 +0100 | [diff] [blame] | 1 | /* |
Julien Pommier | 432b3e8 | 2013-01-12 19:28:03 +0100 | [diff] [blame] | 2 | Copyright (c) 2013 Julien Pommier. |
hayati ayguen | ca11241 | 2020-04-13 00:19:40 +0200 | [diff] [blame] | 3 | Copyright (c) 2019 Hayati Ayguen ( h_ayguen@web.de ) |
Julien Pommier | 370d209 | 2011-11-19 18:04:25 +0100 | [diff] [blame] | 4 | |
| 5 | Small test & bench for PFFFT, comparing its performance with the scalar FFTPACK, FFTW, and Apple vDSP |
| 6 | |
| 7 | How to build: |
| 8 | |
Julien Pommier | 836bc4b | 2011-11-20 10:58:07 +0100 | [diff] [blame] | 9 | on linux, with fftw3: |
| 10 | gcc -o test_pffft -DHAVE_FFTW -msse -mfpmath=sse -O3 -Wall -W pffft.c test_pffft.c fftpack.c -L/usr/local/lib -I/usr/local/include/ -lfftw3f -lm |
Julien Pommier | 370d209 | 2011-11-19 18:04:25 +0100 | [diff] [blame] | 11 | |
Julien Pommier | 836bc4b | 2011-11-20 10:58:07 +0100 | [diff] [blame] | 12 | on macos, without fftw3: |
hayati ayguen | 3673ac0 | 2019-12-22 07:09:56 +0100 | [diff] [blame] | 13 | clang -o test_pffft -DHAVE_VECLIB -O3 -Wall -W pffft.c test_pffft.c fftpack.c -L/usr/local/lib -I/usr/local/include/ -framework Accelerate |
Julien Pommier | 370d209 | 2011-11-19 18:04:25 +0100 | [diff] [blame] | 14 | |
Julien Pommier | 836bc4b | 2011-11-20 10:58:07 +0100 | [diff] [blame] | 15 | on macos, with fftw3: |
hayati ayguen | 3673ac0 | 2019-12-22 07:09:56 +0100 | [diff] [blame] | 16 | clang -o test_pffft -DHAVE_FFTW -DHAVE_VECLIB -O3 -Wall -W pffft.c test_pffft.c fftpack.c -L/usr/local/lib -I/usr/local/include/ -lfftw3f -framework Accelerate |
| 17 | |
| 18 | as alternative: replace clang by gcc. |
Julien Pommier | 836bc4b | 2011-11-20 10:58:07 +0100 | [diff] [blame] | 19 | |
| 20 | on windows, with visual c++: |
| 21 | cl /Ox -D_USE_MATH_DEFINES /arch:SSE test_pffft.c pffft.c fftpack.c |
Julien Pommier | 370d209 | 2011-11-19 18:04:25 +0100 | [diff] [blame] | 22 | |
| 23 | build without SIMD instructions: |
Julien Pommier | 370d209 | 2011-11-19 18:04:25 +0100 | [diff] [blame] | 24 | gcc -o test_pffft -DPFFFT_SIMD_DISABLE -O3 -Wall -W pffft.c test_pffft.c fftpack.c -lm |
| 25 | |
| 26 | */ |
| 27 | |
hayati ayguen | ca11241 | 2020-04-13 00:19:40 +0200 | [diff] [blame] | 28 | #define CONCAT_TOKENS(A, B) A ## B |
hayati ayguen | 223c62a | 2020-06-13 01:59:49 +0200 | [diff] [blame] | 29 | #define CONCAT_THREE_TOKENS(A, B, C) A ## B ## C |
hayati ayguen | ca11241 | 2020-04-13 00:19:40 +0200 | [diff] [blame] | 30 | |
| 31 | #ifdef PFFFT_ENABLE_FLOAT |
Julien Pommier | 370d209 | 2011-11-19 18:04:25 +0100 | [diff] [blame] | 32 | #include "pffft.h" |
hayati ayguen | ca11241 | 2020-04-13 00:19:40 +0200 | [diff] [blame] | 33 | |
| 34 | typedef float pffft_scalar; |
| 35 | typedef PFFFT_Setup PFFFT_SETUP; |
| 36 | #define PFFFT_FUNC(F) CONCAT_TOKENS(pffft_, F) |
| 37 | |
| 38 | #else |
| 39 | /* |
| 40 | Note: adapted for double precision dynamic range version. |
| 41 | */ |
| 42 | #include "pffft_double.h" |
| 43 | |
| 44 | typedef double pffft_scalar; |
| 45 | typedef PFFFTD_Setup PFFFT_SETUP; |
| 46 | #define PFFFT_FUNC(F) CONCAT_TOKENS(pffftd_, F) |
| 47 | #endif |
| 48 | |
hayati ayguen | ca11241 | 2020-04-13 00:19:40 +0200 | [diff] [blame] | 49 | #ifdef PFFFT_ENABLE_FLOAT |
| 50 | |
Julien Pommier | 370d209 | 2011-11-19 18:04:25 +0100 | [diff] [blame] | 51 | #include "fftpack.h" |
| 52 | |
hayati ayguen | 4c3a87a | 2019-12-23 08:48:00 +0100 | [diff] [blame] | 53 | #ifdef HAVE_GREEN_FFTS |
| 54 | #include "fftext.h" |
| 55 | #endif |
| 56 | |
| 57 | #ifdef HAVE_KISS_FFT |
| 58 | #include <kiss_fft.h> |
| 59 | #include <kiss_fftr.h> |
| 60 | #endif |
| 61 | |
hayati ayguen | ca11241 | 2020-04-13 00:19:40 +0200 | [diff] [blame] | 62 | #endif |
hayati ayguen | 4c3a87a | 2019-12-23 08:48:00 +0100 | [diff] [blame] | 63 | |
hayati ayguen | 1c17fd4 | 2020-06-12 18:21:26 +0000 | [diff] [blame] | 64 | #ifdef HAVE_POCKET_FFT |
hayati ayguen | 223c62a | 2020-06-13 01:59:49 +0200 | [diff] [blame] | 65 | #include <pocketfft_double.h> |
| 66 | #include <pocketfft_single.h> |
hayati ayguen | 1c17fd4 | 2020-06-12 18:21:26 +0000 | [diff] [blame] | 67 | #endif |
| 68 | |
hayati ayguen | 223c62a | 2020-06-13 01:59:49 +0200 | [diff] [blame] | 69 | #ifdef PFFFT_ENABLE_FLOAT |
| 70 | #define POCKFFTR_PRE(R) CONCAT_TOKENS(rffts, R) |
| 71 | #define POCKFFTC_PRE(R) CONCAT_TOKENS(cffts, R) |
| 72 | #define POCKFFTR_MID(L,R) CONCAT_THREE_TOKENS(L, rffts, R) |
| 73 | #define POCKFFTC_MID(L,R) CONCAT_THREE_TOKENS(L, cffts, R) |
| 74 | #else |
| 75 | #define POCKFFTR_PRE(R) CONCAT_TOKENS(rfft, R) |
| 76 | #define POCKFFTC_PRE(R) CONCAT_TOKENS(cfft, R) |
| 77 | #define POCKFFTR_MID(L,R) CONCAT_THREE_TOKENS(L, rfft, R) |
| 78 | #define POCKFFTC_MID(L,R) CONCAT_THREE_TOKENS(L, cfft, R) |
| 79 | #endif |
| 80 | |
| 81 | |
hayati ayguen | 1c17fd4 | 2020-06-12 18:21:26 +0000 | [diff] [blame] | 82 | |
Julien Pommier | 370d209 | 2011-11-19 18:04:25 +0100 | [diff] [blame] | 83 | #include <math.h> |
| 84 | #include <stdio.h> |
| 85 | #include <stdlib.h> |
| 86 | #include <time.h> |
| 87 | #include <assert.h> |
| 88 | #include <string.h> |
| 89 | |
| 90 | #ifdef HAVE_SYS_TIMES |
| 91 | # include <sys/times.h> |
| 92 | # include <unistd.h> |
| 93 | #endif |
| 94 | |
| 95 | #ifdef HAVE_VECLIB |
hayati ayguen | 3673ac0 | 2019-12-22 07:09:56 +0100 | [diff] [blame] | 96 | # include <Accelerate/Accelerate.h> |
Julien Pommier | 370d209 | 2011-11-19 18:04:25 +0100 | [diff] [blame] | 97 | #endif |
| 98 | |
| 99 | #ifdef HAVE_FFTW |
| 100 | # include <fftw3.h> |
hayati ayguen | ca11241 | 2020-04-13 00:19:40 +0200 | [diff] [blame] | 101 | |
| 102 | #ifdef PFFFT_ENABLE_FLOAT |
| 103 | typedef fftwf_plan FFTW_PLAN; |
| 104 | typedef fftwf_complex FFTW_COMPLEX; |
| 105 | #define FFTW_FUNC(F) CONCAT_TOKENS(fftwf_, F) |
| 106 | #else |
| 107 | typedef fftw_plan FFTW_PLAN; |
| 108 | typedef fftw_complex FFTW_COMPLEX; |
| 109 | #define FFTW_FUNC(F) CONCAT_TOKENS(fftw_, F) |
Julien Pommier | 370d209 | 2011-11-19 18:04:25 +0100 | [diff] [blame] | 110 | #endif |
| 111 | |
hayati ayguen | ca11241 | 2020-04-13 00:19:40 +0200 | [diff] [blame] | 112 | #endif /* HAVE_FFTW */ |
| 113 | |
hayati ayguen | c974c1d | 2020-03-29 03:39:30 +0200 | [diff] [blame] | 114 | #ifndef M_LN2 |
| 115 | #define M_LN2 0.69314718055994530942 /* log_e 2 */ |
| 116 | #endif |
| 117 | |
hayati ayguen | cd60b96 | 2019-12-23 11:42:31 +0100 | [diff] [blame] | 118 | |
hayati ayguen | 1c17fd4 | 2020-06-12 18:21:26 +0000 | [diff] [blame] | 119 | #define NUM_FFT_ALGOS 9 |
hayati ayguen | cd60b96 | 2019-12-23 11:42:31 +0100 | [diff] [blame] | 120 | enum { |
| 121 | ALGO_FFTPACK = 0, |
| 122 | ALGO_VECLIB, |
| 123 | ALGO_FFTW_ESTIM, |
| 124 | ALGO_FFTW_AUTO, |
| 125 | ALGO_GREEN, |
| 126 | ALGO_KISS, |
hayati ayguen | 1c17fd4 | 2020-06-12 18:21:26 +0000 | [diff] [blame] | 127 | ALGO_POCKET, |
| 128 | ALGO_PFFFT_U, /* = 7 */ |
| 129 | ALGO_PFFFT_O /* = 8 */ |
hayati ayguen | cd60b96 | 2019-12-23 11:42:31 +0100 | [diff] [blame] | 130 | }; |
| 131 | |
hayati ayguen | b9804a2 | 2019-12-27 00:35:47 +0100 | [diff] [blame] | 132 | #define NUM_TYPES 7 |
hayati ayguen | cd60b96 | 2019-12-23 11:42:31 +0100 | [diff] [blame] | 133 | enum { |
| 134 | TYPE_PREP = 0, /* time for preparation in ms */ |
| 135 | TYPE_DUR_NS = 1, /* time per fft in ns */ |
| 136 | TYPE_DUR_FASTEST = 2, /* relative time to fastest */ |
hayati ayguen | 42ee6f1 | 2019-12-25 23:08:04 +0100 | [diff] [blame] | 137 | TYPE_REL_PFFFT = 3, /* relative time to ALGO_PFFFT */ |
| 138 | TYPE_ITER = 4, /* # of iterations in measurement */ |
hayati ayguen | b9804a2 | 2019-12-27 00:35:47 +0100 | [diff] [blame] | 139 | TYPE_MFLOPS = 5, /* MFlops/sec */ |
| 140 | TYPE_DUR_TOT = 6 /* test duration in sec */ |
hayati ayguen | cd60b96 | 2019-12-23 11:42:31 +0100 | [diff] [blame] | 141 | }; |
hayati ayguen | c974c1d | 2020-03-29 03:39:30 +0200 | [diff] [blame] | 142 | /* double tmeas[NUM_TYPES][NUM_FFT_ALGOS]; */ |
hayati ayguen | cd60b96 | 2019-12-23 11:42:31 +0100 | [diff] [blame] | 143 | |
| 144 | const char * algoName[NUM_FFT_ALGOS] = { |
hayati ayguen | b9804a2 | 2019-12-27 00:35:47 +0100 | [diff] [blame] | 145 | "FFTPack ", |
| 146 | "vDSP (vec) ", |
hayati ayguen | ca11241 | 2020-04-13 00:19:40 +0200 | [diff] [blame] | 147 | "FFTW F(estim)", |
| 148 | "FFTW F(auto) ", |
hayati ayguen | b9804a2 | 2019-12-27 00:35:47 +0100 | [diff] [blame] | 149 | "Green ", |
| 150 | "Kiss ", |
hayati ayguen | 1c17fd4 | 2020-06-12 18:21:26 +0000 | [diff] [blame] | 151 | "Pocket ", |
hayati ayguen | b9804a2 | 2019-12-27 00:35:47 +0100 | [diff] [blame] | 152 | "PFFFT-U(simd)", /* unordered */ |
| 153 | "PFFFT (simd) " /* ordered */ |
hayati ayguen | cd60b96 | 2019-12-23 11:42:31 +0100 | [diff] [blame] | 154 | }; |
| 155 | |
| 156 | |
| 157 | int compiledInAlgo[NUM_FFT_ALGOS] = { |
hayati ayguen | ca11241 | 2020-04-13 00:19:40 +0200 | [diff] [blame] | 158 | #ifdef PFFFT_ENABLE_FLOAT |
hayati ayguen | cd60b96 | 2019-12-23 11:42:31 +0100 | [diff] [blame] | 159 | 1, /* "FFTPack " */ |
hayati ayguen | ca11241 | 2020-04-13 00:19:40 +0200 | [diff] [blame] | 160 | #else |
| 161 | 0, /* "FFTPack " */ |
| 162 | #endif |
| 163 | #if defined(HAVE_VECLIB) && defined(PFFFT_ENABLE_FLOAT) |
hayati ayguen | cd60b96 | 2019-12-23 11:42:31 +0100 | [diff] [blame] | 164 | 1, /* "vDSP (vec) " */ |
| 165 | #else |
| 166 | 0, |
| 167 | #endif |
hayati ayguen | ca11241 | 2020-04-13 00:19:40 +0200 | [diff] [blame] | 168 | #if defined(HAVE_FFTW) |
hayati ayguen | cd60b96 | 2019-12-23 11:42:31 +0100 | [diff] [blame] | 169 | 1, /* "FFTW(estim)" */ |
| 170 | 1, /* "FFTW (auto)" */ |
| 171 | #else |
| 172 | 0, 0, |
| 173 | #endif |
hayati ayguen | ca11241 | 2020-04-13 00:19:40 +0200 | [diff] [blame] | 174 | #if defined(HAVE_GREEN_FFTS) && defined(PFFFT_ENABLE_FLOAT) |
hayati ayguen | cd60b96 | 2019-12-23 11:42:31 +0100 | [diff] [blame] | 175 | 1, /* "Green " */ |
| 176 | #else |
| 177 | 0, |
| 178 | #endif |
hayati ayguen | ca11241 | 2020-04-13 00:19:40 +0200 | [diff] [blame] | 179 | #if defined(HAVE_KISS_FFT) && defined(PFFFT_ENABLE_FLOAT) |
hayati ayguen | cd60b96 | 2019-12-23 11:42:31 +0100 | [diff] [blame] | 180 | 1, /* "Kiss " */ |
| 181 | #else |
| 182 | 0, |
| 183 | #endif |
hayati ayguen | 223c62a | 2020-06-13 01:59:49 +0200 | [diff] [blame] | 184 | #if defined(HAVE_POCKET_FFT) |
hayati ayguen | 1c17fd4 | 2020-06-12 18:21:26 +0000 | [diff] [blame] | 185 | 1, /* "Pocket " */ |
| 186 | #else |
| 187 | 0, |
| 188 | #endif |
hayati ayguen | b9804a2 | 2019-12-27 00:35:47 +0100 | [diff] [blame] | 189 | 1, /* "PFFFT_U " */ |
| 190 | 1 /* "PFFFT_O " */ |
hayati ayguen | cd60b96 | 2019-12-23 11:42:31 +0100 | [diff] [blame] | 191 | }; |
| 192 | |
| 193 | const char * algoTableHeader[NUM_FFT_ALGOS][2] = { |
| 194 | { "| real FFTPack ", "| cplx FFTPack " }, |
| 195 | { "| real vDSP ", "| cplx vDSP " }, |
| 196 | { "|real FFTWestim", "|cplx FFTWestim" }, |
| 197 | { "|real FFTWauto ", "|cplx FFTWauto " }, |
| 198 | { "| real Green ", "| cplx Green " }, |
| 199 | { "| real Kiss ", "| cplx Kiss " }, |
hayati ayguen | 1c17fd4 | 2020-06-12 18:21:26 +0000 | [diff] [blame] | 200 | { "| real Pocket ", "| cplx Pocket " }, |
hayati ayguen | b9804a2 | 2019-12-27 00:35:47 +0100 | [diff] [blame] | 201 | { "| real PFFFT-U ", "| cplx PFFFT-U " }, |
hayati ayguen | cd60b96 | 2019-12-23 11:42:31 +0100 | [diff] [blame] | 202 | { "| real PFFFT ", "| cplx PFFFT " } }; |
| 203 | |
| 204 | const char * typeText[NUM_TYPES] = { |
| 205 | "preparation in ms", |
| 206 | "time per fft in ns", |
| 207 | "relative to fastest", |
hayati ayguen | 42ee6f1 | 2019-12-25 23:08:04 +0100 | [diff] [blame] | 208 | "relative to pffft", |
| 209 | "measured_num_iters", |
hayati ayguen | b9804a2 | 2019-12-27 00:35:47 +0100 | [diff] [blame] | 210 | "mflops", |
| 211 | "test duration in sec" |
hayati ayguen | cd60b96 | 2019-12-23 11:42:31 +0100 | [diff] [blame] | 212 | }; |
| 213 | |
hayati ayguen | bc8d4a8 | 2019-12-25 01:27:33 +0100 | [diff] [blame] | 214 | const char * typeFilenamePart[NUM_TYPES] = { |
| 215 | "1-preparation-in-ms", |
| 216 | "2-timePerFft-in-ns", |
| 217 | "3-rel-fastest", |
hayati ayguen | 42ee6f1 | 2019-12-25 23:08:04 +0100 | [diff] [blame] | 218 | "4-rel-pffft", |
| 219 | "5-num-iter", |
hayati ayguen | b9804a2 | 2019-12-27 00:35:47 +0100 | [diff] [blame] | 220 | "6-mflops", |
| 221 | "7-duration-in-sec" |
| 222 | }; |
| 223 | |
| 224 | #define SAVE_ALL_TYPES 0 |
| 225 | |
| 226 | const int saveType[NUM_TYPES] = { |
| 227 | 1, /* "1-preparation-in-ms" */ |
| 228 | 0, /* "2-timePerFft-in-ns" */ |
| 229 | 0, /* "3-rel-fastest" */ |
| 230 | 1, /* "4-rel-pffft" */ |
| 231 | 1, /* "5-num-iter" */ |
| 232 | 1, /* "6-mflops" */ |
| 233 | 1, /* "7-duration-in-sec" */ |
hayati ayguen | bc8d4a8 | 2019-12-25 01:27:33 +0100 | [diff] [blame] | 234 | }; |
| 235 | |
hayati ayguen | cd60b96 | 2019-12-23 11:42:31 +0100 | [diff] [blame] | 236 | |
Julien Pommier | 370d209 | 2011-11-19 18:04:25 +0100 | [diff] [blame] | 237 | #define MAX(x,y) ((x)>(y)?(x):(y)) |
hayati ayguen | 42ee6f1 | 2019-12-25 23:08:04 +0100 | [diff] [blame] | 238 | #define MIN(x,y) ((x)<(y)?(x):(y)) |
Julien Pommier | 370d209 | 2011-11-19 18:04:25 +0100 | [diff] [blame] | 239 | |
hayati ayguen | bc8d4a8 | 2019-12-25 01:27:33 +0100 | [diff] [blame] | 240 | unsigned Log2(unsigned v) { |
| 241 | /* we don't need speed records .. obvious way is good enough */ |
| 242 | /* https://graphics.stanford.edu/~seander/bithacks.html#IntegerLogObvious */ |
| 243 | /* Find the log base 2 of an integer with the MSB N set in O(N) operations (the obvious way): |
| 244 | * unsigned v: 32-bit word to find the log base 2 of */ |
| 245 | unsigned r = 0; /* r will be lg(v) */ |
| 246 | while (v >>= 1) |
| 247 | { |
| 248 | r++; |
| 249 | } |
| 250 | return r; |
| 251 | } |
| 252 | |
| 253 | |
Julien Pommier | 370d209 | 2011-11-19 18:04:25 +0100 | [diff] [blame] | 254 | double frand() { |
| 255 | return rand()/(double)RAND_MAX; |
| 256 | } |
| 257 | |
| 258 | #if defined(HAVE_SYS_TIMES) |
| 259 | inline double uclock_sec(void) { |
| 260 | static double ttclk = 0.; |
hayati ayguen | b9804a2 | 2019-12-27 00:35:47 +0100 | [diff] [blame] | 261 | struct tms t; |
| 262 | if (ttclk == 0.) |
| 263 | ttclk = sysconf(_SC_CLK_TCK); |
| 264 | times(&t); |
| 265 | /* use only the user time of this process - not realtime, which depends on OS-scheduler .. */ |
| 266 | return ((double)t.tms_utime)) / ttclk; |
Julien Pommier | 370d209 | 2011-11-19 18:04:25 +0100 | [diff] [blame] | 267 | } |
| 268 | # else |
Julien Pommier | 836bc4b | 2011-11-20 10:58:07 +0100 | [diff] [blame] | 269 | double uclock_sec(void) |
Julien Pommier | 370d209 | 2011-11-19 18:04:25 +0100 | [diff] [blame] | 270 | { return (double)clock()/(double)CLOCKS_PER_SEC; } |
| 271 | #endif |
| 272 | |
| 273 | |
| 274 | /* compare results with the regular fftpack */ |
| 275 | void pffft_validate_N(int N, int cplx) { |
hayati ayguen | ca11241 | 2020-04-13 00:19:40 +0200 | [diff] [blame] | 276 | |
| 277 | #ifdef PFFFT_ENABLE_FLOAT |
| 278 | |
Julien Pommier | 370d209 | 2011-11-19 18:04:25 +0100 | [diff] [blame] | 279 | int Nfloat = N*(cplx?2:1); |
hayati ayguen | ca11241 | 2020-04-13 00:19:40 +0200 | [diff] [blame] | 280 | int Nbytes = Nfloat * sizeof(pffft_scalar); |
Julien Pommier | 0302e8a | 2012-10-11 18:04:09 +0200 | [diff] [blame] | 281 | float *ref, *in, *out, *tmp, *tmp2; |
hayati ayguen | ca11241 | 2020-04-13 00:19:40 +0200 | [diff] [blame] | 282 | PFFFT_SETUP *s = PFFFT_FUNC(new_setup)(N, cplx ? PFFFT_COMPLEX : PFFFT_REAL); |
Julien Pommier | 370d209 | 2011-11-19 18:04:25 +0100 | [diff] [blame] | 283 | int pass; |
Julien Pommier | 0302e8a | 2012-10-11 18:04:09 +0200 | [diff] [blame] | 284 | |
hayati ayguen | ca11241 | 2020-04-13 00:19:40 +0200 | [diff] [blame] | 285 | |
Julien Pommier | 0302e8a | 2012-10-11 18:04:09 +0200 | [diff] [blame] | 286 | if (!s) { printf("Skipping N=%d, not supported\n", N); return; } |
hayati ayguen | ca11241 | 2020-04-13 00:19:40 +0200 | [diff] [blame] | 287 | ref = PFFFT_FUNC(aligned_malloc)(Nbytes); |
| 288 | in = PFFFT_FUNC(aligned_malloc)(Nbytes); |
| 289 | out = PFFFT_FUNC(aligned_malloc)(Nbytes); |
| 290 | tmp = PFFFT_FUNC(aligned_malloc)(Nbytes); |
| 291 | tmp2 = PFFFT_FUNC(aligned_malloc)(Nbytes); |
Julien Pommier | 0302e8a | 2012-10-11 18:04:09 +0200 | [diff] [blame] | 292 | |
Julien Pommier | 370d209 | 2011-11-19 18:04:25 +0100 | [diff] [blame] | 293 | for (pass=0; pass < 2; ++pass) { |
Julien Pommier | 836bc4b | 2011-11-20 10:58:07 +0100 | [diff] [blame] | 294 | float ref_max = 0; |
Julien Pommier | 370d209 | 2011-11-19 18:04:25 +0100 | [diff] [blame] | 295 | int k; |
hayati ayguen | c974c1d | 2020-03-29 03:39:30 +0200 | [diff] [blame] | 296 | /* printf("N=%d pass=%d cplx=%d\n", N, pass, cplx); */ |
| 297 | /* compute reference solution with FFTPACK */ |
Julien Pommier | 370d209 | 2011-11-19 18:04:25 +0100 | [diff] [blame] | 298 | if (pass == 0) { |
hayati ayguen | ca11241 | 2020-04-13 00:19:40 +0200 | [diff] [blame] | 299 | float *wrk = malloc(2*Nbytes+15*sizeof(pffft_scalar)); |
Julien Pommier | 370d209 | 2011-11-19 18:04:25 +0100 | [diff] [blame] | 300 | for (k=0; k < Nfloat; ++k) { |
hayati ayguen | ee17cb0 | 2020-08-28 04:47:22 +0200 | [diff] [blame] | 301 | ref[k] = in[k] = (float)( frand()*2-1 ); |
| 302 | out[k] = 1e30F; |
Julien Pommier | 370d209 | 2011-11-19 18:04:25 +0100 | [diff] [blame] | 303 | } |
| 304 | if (!cplx) { |
| 305 | rffti(N, wrk); |
| 306 | rfftf(N, ref, wrk); |
hayati ayguen | c974c1d | 2020-03-29 03:39:30 +0200 | [diff] [blame] | 307 | /* use our ordering for real ffts instead of the one of fftpack */ |
Julien Pommier | 370d209 | 2011-11-19 18:04:25 +0100 | [diff] [blame] | 308 | { |
| 309 | float refN=ref[N-1]; |
| 310 | for (k=N-2; k >= 1; --k) ref[k+1] = ref[k]; |
| 311 | ref[1] = refN; |
| 312 | } |
| 313 | } else { |
| 314 | cffti(N, wrk); |
| 315 | cfftf(N, ref, wrk); |
| 316 | } |
| 317 | free(wrk); |
| 318 | } |
| 319 | |
hayati ayguen | ee17cb0 | 2020-08-28 04:47:22 +0200 | [diff] [blame] | 320 | for (k = 0; k < Nfloat; ++k) ref_max = MAX(ref_max, (float)( fabs(ref[k]) )); |
Julien Pommier | 370d209 | 2011-11-19 18:04:25 +0100 | [diff] [blame] | 321 | |
| 322 | |
hayati ayguen | c974c1d | 2020-03-29 03:39:30 +0200 | [diff] [blame] | 323 | /* pass 0 : non canonical ordering of transform coefficients */ |
Julien Pommier | 370d209 | 2011-11-19 18:04:25 +0100 | [diff] [blame] | 324 | if (pass == 0) { |
hayati ayguen | c974c1d | 2020-03-29 03:39:30 +0200 | [diff] [blame] | 325 | /* test forward transform, with different input / output */ |
hayati ayguen | ca11241 | 2020-04-13 00:19:40 +0200 | [diff] [blame] | 326 | PFFFT_FUNC(transform)(s, in, tmp, 0, PFFFT_FORWARD); |
Julien Pommier | 370d209 | 2011-11-19 18:04:25 +0100 | [diff] [blame] | 327 | memcpy(tmp2, tmp, Nbytes); |
| 328 | memcpy(tmp, in, Nbytes); |
hayati ayguen | ca11241 | 2020-04-13 00:19:40 +0200 | [diff] [blame] | 329 | PFFFT_FUNC(transform)(s, tmp, tmp, 0, PFFFT_FORWARD); |
Julien Pommier | 370d209 | 2011-11-19 18:04:25 +0100 | [diff] [blame] | 330 | for (k = 0; k < Nfloat; ++k) { |
| 331 | assert(tmp2[k] == tmp[k]); |
| 332 | } |
| 333 | |
hayati ayguen | c974c1d | 2020-03-29 03:39:30 +0200 | [diff] [blame] | 334 | /* test reordering */ |
hayati ayguen | ca11241 | 2020-04-13 00:19:40 +0200 | [diff] [blame] | 335 | PFFFT_FUNC(zreorder)(s, tmp, out, PFFFT_FORWARD); |
| 336 | PFFFT_FUNC(zreorder)(s, out, tmp, PFFFT_BACKWARD); |
Julien Pommier | 370d209 | 2011-11-19 18:04:25 +0100 | [diff] [blame] | 337 | for (k = 0; k < Nfloat; ++k) { |
| 338 | assert(tmp2[k] == tmp[k]); |
| 339 | } |
hayati ayguen | ca11241 | 2020-04-13 00:19:40 +0200 | [diff] [blame] | 340 | PFFFT_FUNC(zreorder)(s, tmp, out, PFFFT_FORWARD); |
Julien Pommier | 370d209 | 2011-11-19 18:04:25 +0100 | [diff] [blame] | 341 | } else { |
hayati ayguen | c974c1d | 2020-03-29 03:39:30 +0200 | [diff] [blame] | 342 | /* pass 1 : canonical ordering of transform coeffs. */ |
hayati ayguen | ca11241 | 2020-04-13 00:19:40 +0200 | [diff] [blame] | 343 | PFFFT_FUNC(transform_ordered)(s, in, tmp, 0, PFFFT_FORWARD); |
Julien Pommier | 370d209 | 2011-11-19 18:04:25 +0100 | [diff] [blame] | 344 | memcpy(tmp2, tmp, Nbytes); |
| 345 | memcpy(tmp, in, Nbytes); |
hayati ayguen | ca11241 | 2020-04-13 00:19:40 +0200 | [diff] [blame] | 346 | PFFFT_FUNC(transform_ordered)(s, tmp, tmp, 0, PFFFT_FORWARD); |
Julien Pommier | 370d209 | 2011-11-19 18:04:25 +0100 | [diff] [blame] | 347 | for (k = 0; k < Nfloat; ++k) { |
| 348 | assert(tmp2[k] == tmp[k]); |
| 349 | } |
| 350 | memcpy(out, tmp, Nbytes); |
| 351 | } |
| 352 | |
| 353 | { |
| 354 | for (k=0; k < Nfloat; ++k) { |
| 355 | if (!(fabs(ref[k] - out[k]) < 1e-3*ref_max)) { |
| 356 | printf("%s forward PFFFT mismatch found for N=%d\n", (cplx?"CPLX":"REAL"), N); |
| 357 | exit(1); |
| 358 | } |
| 359 | } |
hayati ayguen | ca11241 | 2020-04-13 00:19:40 +0200 | [diff] [blame] | 360 | |
| 361 | if (pass == 0) PFFFT_FUNC(transform)(s, tmp, out, 0, PFFFT_BACKWARD); |
| 362 | else PFFFT_FUNC(transform_ordered)(s, tmp, out, 0, PFFFT_BACKWARD); |
Julien Pommier | 370d209 | 2011-11-19 18:04:25 +0100 | [diff] [blame] | 363 | memcpy(tmp2, out, Nbytes); |
| 364 | memcpy(out, tmp, Nbytes); |
hayati ayguen | ca11241 | 2020-04-13 00:19:40 +0200 | [diff] [blame] | 365 | if (pass == 0) PFFFT_FUNC(transform)(s, out, out, 0, PFFFT_BACKWARD); |
| 366 | else PFFFT_FUNC(transform_ordered)(s, out, out, 0, PFFFT_BACKWARD); |
Julien Pommier | 370d209 | 2011-11-19 18:04:25 +0100 | [diff] [blame] | 367 | for (k = 0; k < Nfloat; ++k) { |
| 368 | assert(tmp2[k] == out[k]); |
| 369 | out[k] *= 1.f/N; |
| 370 | } |
| 371 | for (k = 0; k < Nfloat; ++k) { |
| 372 | if (fabs(in[k] - out[k]) > 1e-3 * ref_max) { |
| 373 | printf("pass=%d, %s IFFFT does not match for N=%d\n", pass, (cplx?"CPLX":"REAL"), N); break; |
| 374 | exit(1); |
| 375 | } |
| 376 | } |
| 377 | } |
| 378 | |
hayati ayguen | c974c1d | 2020-03-29 03:39:30 +0200 | [diff] [blame] | 379 | /* quick test of the circular convolution in fft domain */ |
Julien Pommier | 432b3e8 | 2013-01-12 19:28:03 +0100 | [diff] [blame] | 380 | { |
| 381 | float conv_err = 0, conv_max = 0; |
Julien Pommier | 370d209 | 2011-11-19 18:04:25 +0100 | [diff] [blame] | 382 | |
hayati ayguen | ca11241 | 2020-04-13 00:19:40 +0200 | [diff] [blame] | 383 | PFFFT_FUNC(zreorder)(s, ref, tmp, PFFFT_FORWARD); |
Julien Pommier | 432b3e8 | 2013-01-12 19:28:03 +0100 | [diff] [blame] | 384 | memset(out, 0, Nbytes); |
hayati ayguen | ca11241 | 2020-04-13 00:19:40 +0200 | [diff] [blame] | 385 | PFFFT_FUNC(zconvolve_accumulate)(s, ref, ref, out, 1.0); |
| 386 | PFFFT_FUNC(zreorder)(s, out, tmp2, PFFFT_FORWARD); |
Julien Pommier | 432b3e8 | 2013-01-12 19:28:03 +0100 | [diff] [blame] | 387 | |
| 388 | for (k=0; k < Nfloat; k += 2) { |
| 389 | float ar = tmp[k], ai=tmp[k+1]; |
| 390 | if (cplx || k > 0) { |
| 391 | tmp[k] = ar*ar - ai*ai; |
| 392 | tmp[k+1] = 2*ar*ai; |
| 393 | } else { |
| 394 | tmp[0] = ar*ar; |
| 395 | tmp[1] = ai*ai; |
| 396 | } |
Julien Pommier | 370d209 | 2011-11-19 18:04:25 +0100 | [diff] [blame] | 397 | } |
Julien Pommier | 432b3e8 | 2013-01-12 19:28:03 +0100 | [diff] [blame] | 398 | |
| 399 | for (k=0; k < Nfloat; ++k) { |
| 400 | float d = fabs(tmp[k] - tmp2[k]), e = fabs(tmp[k]); |
| 401 | if (d > conv_err) conv_err = d; |
| 402 | if (e > conv_max) conv_max = e; |
| 403 | } |
| 404 | if (conv_err > 1e-5*conv_max) { |
| 405 | printf("zconvolve error ? %g %g\n", conv_err, conv_max); exit(1); |
| 406 | } |
Julien Pommier | 370d209 | 2011-11-19 18:04:25 +0100 | [diff] [blame] | 407 | } |
| 408 | |
| 409 | } |
| 410 | |
Julien Pommier | 836bc4b | 2011-11-20 10:58:07 +0100 | [diff] [blame] | 411 | printf("%s PFFFT is OK for N=%d\n", (cplx?"CPLX":"REAL"), N); fflush(stdout); |
hayati ayguen | ca11241 | 2020-04-13 00:19:40 +0200 | [diff] [blame] | 412 | |
| 413 | PFFFT_FUNC(destroy_setup)(s); |
| 414 | PFFFT_FUNC(aligned_free)(ref); |
| 415 | PFFFT_FUNC(aligned_free)(in); |
| 416 | PFFFT_FUNC(aligned_free)(out); |
| 417 | PFFFT_FUNC(aligned_free)(tmp); |
| 418 | PFFFT_FUNC(aligned_free)(tmp2); |
| 419 | |
| 420 | #endif /* PFFFT_ENABLE_FLOAT */ |
Julien Pommier | 370d209 | 2011-11-19 18:04:25 +0100 | [diff] [blame] | 421 | } |
| 422 | |
| 423 | void pffft_validate(int cplx) { |
Julien Pommier | 432b3e8 | 2013-01-12 19:28:03 +0100 | [diff] [blame] | 424 | static int Ntest[] = { 16, 32, 64, 96, 128, 160, 192, 256, 288, 384, 5*96, 512, 576, 5*128, 800, 864, 1024, 2048, 2592, 4000, 4096, 12000, 36864, 0}; |
Julien Pommier | 370d209 | 2011-11-19 18:04:25 +0100 | [diff] [blame] | 425 | int k; |
| 426 | for (k = 0; Ntest[k]; ++k) { |
| 427 | int N = Ntest[k]; |
| 428 | if (N == 16 && !cplx) continue; |
Julien Pommier | 370d209 | 2011-11-19 18:04:25 +0100 | [diff] [blame] | 429 | pffft_validate_N(N, cplx); |
| 430 | } |
| 431 | } |
| 432 | |
hayati ayguen | cd60b96 | 2019-12-23 11:42:31 +0100 | [diff] [blame] | 433 | int array_output_format = 1; |
Julien Pommier | 836bc4b | 2011-11-20 10:58:07 +0100 | [diff] [blame] | 434 | |
hayati ayguen | bc8d4a8 | 2019-12-25 01:27:33 +0100 | [diff] [blame] | 435 | |
| 436 | void print_table(const char *txt, FILE *tableFile) { |
| 437 | fprintf(stdout, "%s", txt); |
| 438 | if (tableFile && tableFile != stdout) |
| 439 | fprintf(tableFile, "%s", txt); |
| 440 | } |
| 441 | |
| 442 | void print_table_flops(float mflops, FILE *tableFile) { |
| 443 | fprintf(stdout, "|%11.0f ", mflops); |
| 444 | if (tableFile && tableFile != stdout) |
| 445 | fprintf(tableFile, "|%11.0f ", mflops); |
| 446 | } |
| 447 | |
| 448 | void print_table_fftsize(int N, FILE *tableFile) { |
| 449 | fprintf(stdout, "|%9d ", N); |
| 450 | if (tableFile && tableFile != stdout) |
| 451 | fprintf(tableFile, "|%9d ", N); |
| 452 | } |
| 453 | |
| 454 | double show_output(const char *name, int N, int cplx, float flops, float t0, float t1, int max_iter, FILE *tableFile) { |
hayati ayguen | 4c3a87a | 2019-12-23 08:48:00 +0100 | [diff] [blame] | 455 | double T = (double)(t1-t0)/2/max_iter * 1e9; |
Julien Pommier | 836bc4b | 2011-11-20 10:58:07 +0100 | [diff] [blame] | 456 | float mflops = flops/1e6/(t1 - t0 + 1e-16); |
| 457 | if (array_output_format) { |
hayati ayguen | bc8d4a8 | 2019-12-25 01:27:33 +0100 | [diff] [blame] | 458 | if (flops != -1) |
| 459 | print_table_flops(mflops, tableFile); |
| 460 | else |
hayati ayguen | ca11241 | 2020-04-13 00:19:40 +0200 | [diff] [blame] | 461 | print_table("| n/a ", tableFile); |
Julien Pommier | 836bc4b | 2011-11-20 10:58:07 +0100 | [diff] [blame] | 462 | } else { |
| 463 | if (flops != -1) { |
| 464 | printf("N=%5d, %s %16s : %6.0f MFlops [t=%6.0f ns, %d runs]\n", N, (cplx?"CPLX":"REAL"), name, mflops, (t1-t0)/2/max_iter * 1e9, max_iter); |
| 465 | } |
| 466 | } |
| 467 | fflush(stdout); |
hayati ayguen | 4c3a87a | 2019-12-23 08:48:00 +0100 | [diff] [blame] | 468 | return T; |
Julien Pommier | 836bc4b | 2011-11-20 10:58:07 +0100 | [diff] [blame] | 469 | } |
Julien Pommier | 370d209 | 2011-11-19 18:04:25 +0100 | [diff] [blame] | 470 | |
hayati ayguen | b9804a2 | 2019-12-27 00:35:47 +0100 | [diff] [blame] | 471 | double cal_benchmark(int N, int cplx) { |
| 472 | const int log2N = Log2(N); |
| 473 | int Nfloat = (cplx ? N*2 : N); |
hayati ayguen | ca11241 | 2020-04-13 00:19:40 +0200 | [diff] [blame] | 474 | int Nbytes = Nfloat * sizeof(pffft_scalar); |
| 475 | pffft_scalar *X = PFFFT_FUNC(aligned_malloc)(Nbytes), *Y = PFFFT_FUNC(aligned_malloc)(Nbytes), *Z = PFFFT_FUNC(aligned_malloc)(Nbytes); |
hayati ayguen | b9804a2 | 2019-12-27 00:35:47 +0100 | [diff] [blame] | 476 | double t0, t1, tstop, T, nI; |
| 477 | int k, iter; |
| 478 | |
hayati ayguen | ca11241 | 2020-04-13 00:19:40 +0200 | [diff] [blame] | 479 | assert( PFFFT_FUNC(is_power_of_two)(N) ); |
hayati ayguen | b9804a2 | 2019-12-27 00:35:47 +0100 | [diff] [blame] | 480 | for (k = 0; k < Nfloat; ++k) { |
| 481 | X[k] = sqrtf(k+1); |
| 482 | } |
| 483 | |
| 484 | /* PFFFT-U (unordered) benchmark */ |
hayati ayguen | ca11241 | 2020-04-13 00:19:40 +0200 | [diff] [blame] | 485 | PFFFT_SETUP *s = PFFFT_FUNC(new_setup)(N, cplx ? PFFFT_COMPLEX : PFFFT_REAL); |
hayati ayguen | b9804a2 | 2019-12-27 00:35:47 +0100 | [diff] [blame] | 486 | assert(s); |
| 487 | iter = 0; |
| 488 | t0 = uclock_sec(); |
| 489 | tstop = t0 + 0.25; /* benchmark duration: 250 ms */ |
| 490 | do { |
| 491 | for ( k = 0; k < 512; ++k ) { |
hayati ayguen | ca11241 | 2020-04-13 00:19:40 +0200 | [diff] [blame] | 492 | PFFFT_FUNC(transform)(s, X, Z, Y, PFFFT_FORWARD); |
| 493 | PFFFT_FUNC(transform)(s, X, Z, Y, PFFFT_BACKWARD); |
hayati ayguen | b9804a2 | 2019-12-27 00:35:47 +0100 | [diff] [blame] | 494 | ++iter; |
| 495 | } |
| 496 | t1 = uclock_sec(); |
| 497 | } while ( t1 < tstop ); |
hayati ayguen | ca11241 | 2020-04-13 00:19:40 +0200 | [diff] [blame] | 498 | PFFFT_FUNC(destroy_setup)(s); |
| 499 | PFFFT_FUNC(aligned_free)(X); |
| 500 | PFFFT_FUNC(aligned_free)(Y); |
| 501 | PFFFT_FUNC(aligned_free)(Z); |
hayati ayguen | b9804a2 | 2019-12-27 00:35:47 +0100 | [diff] [blame] | 502 | |
| 503 | T = ( t1 - t0 ); /* duration per fft() */ |
| 504 | nI = ((double)iter) * ( log2N * N ); /* number of iterations "normalized" to O(N) = N*log2(N) */ |
| 505 | return (nI / T); /* normalized iterations per second */ |
| 506 | } |
| 507 | |
| 508 | |
| 509 | |
| 510 | void benchmark_ffts(int N, int cplx, int withFFTWfullMeas, double iterCal, double tmeas[NUM_TYPES][NUM_FFT_ALGOS], int haveAlgo[NUM_FFT_ALGOS], FILE *tableFile ) { |
hayati ayguen | bc8d4a8 | 2019-12-25 01:27:33 +0100 | [diff] [blame] | 511 | const int log2N = Log2(N); |
hayati ayguen | ca11241 | 2020-04-13 00:19:40 +0200 | [diff] [blame] | 512 | int nextPow2N = PFFFT_FUNC(next_power_of_two)(N); |
hayati ayguen | bc8d4a8 | 2019-12-25 01:27:33 +0100 | [diff] [blame] | 513 | int log2NextN = Log2(nextPow2N); |
hayati ayguen | ca11241 | 2020-04-13 00:19:40 +0200 | [diff] [blame] | 514 | int pffftPow2N = nextPow2N; |
hayati ayguen | bc8d4a8 | 2019-12-25 01:27:33 +0100 | [diff] [blame] | 515 | |
| 516 | int Nfloat = (cplx ? MAX(nextPow2N, pffftPow2N)*2 : MAX(nextPow2N, pffftPow2N)); |
hayati ayguen | b9804a2 | 2019-12-27 00:35:47 +0100 | [diff] [blame] | 517 | int Nmax, k, iter; |
hayati ayguen | ca11241 | 2020-04-13 00:19:40 +0200 | [diff] [blame] | 518 | int Nbytes = Nfloat * sizeof(pffft_scalar); |
hayati ayguen | bc8d4a8 | 2019-12-25 01:27:33 +0100 | [diff] [blame] | 519 | |
hayati ayguen | ca11241 | 2020-04-13 00:19:40 +0200 | [diff] [blame] | 520 | pffft_scalar *X = PFFFT_FUNC(aligned_malloc)(Nbytes + sizeof(pffft_scalar)), *Y = PFFFT_FUNC(aligned_malloc)(Nbytes + 2*sizeof(pffft_scalar) ), *Z = PFFFT_FUNC(aligned_malloc)(Nbytes); |
hayati ayguen | b9804a2 | 2019-12-27 00:35:47 +0100 | [diff] [blame] | 521 | double te, t0, t1, tstop, flops, Tfastest; |
Julien Pommier | 370d209 | 2011-11-19 18:04:25 +0100 | [diff] [blame] | 522 | |
hayati ayguen | b9804a2 | 2019-12-27 00:35:47 +0100 | [diff] [blame] | 523 | const double max_test_duration = 0.150; /* test duration 150 ms */ |
| 524 | double numIter = max_test_duration * iterCal / ( log2N * N ); /* number of iteration for max_test_duration */ |
| 525 | const int step_iter = MAX(1, ((int)(0.01 * numIter)) ); /* one hundredth */ |
| 526 | int max_iter = MAX(1, ((int)numIter) ); /* minimum 1 iteration */ |
| 527 | |
| 528 | const float checkVal = 12345.0F; |
hayati ayguen | bc8d4a8 | 2019-12-25 01:27:33 +0100 | [diff] [blame] | 529 | |
| 530 | /* printf("benchmark_ffts(N = %d, cplx = %d): Nfloat = %d, X_mem = 0x%p, X = %p\n", N, cplx, Nfloat, X_mem, X); */ |
Julien Pommier | 370d209 | 2011-11-19 18:04:25 +0100 | [diff] [blame] | 531 | |
hayati ayguen | ca11241 | 2020-04-13 00:19:40 +0200 | [diff] [blame] | 532 | memset( X, 0, Nfloat * sizeof(pffft_scalar) ); |
hayati ayguen | b9804a2 | 2019-12-27 00:35:47 +0100 | [diff] [blame] | 533 | if ( Nfloat < 32 ) { |
| 534 | for (k = 0; k < Nfloat; k += 4) |
| 535 | X[k] = sqrtf(k+1); |
| 536 | } else { |
| 537 | for (k = 0; k < Nfloat; k += (Nfloat/16) ) |
| 538 | X[k] = sqrtf(k+1); |
Julien Pommier | 370d209 | 2011-11-19 18:04:25 +0100 | [diff] [blame] | 539 | } |
| 540 | |
hayati ayguen | 4c3a87a | 2019-12-23 08:48:00 +0100 | [diff] [blame] | 541 | for ( k = 0; k < NUM_TYPES; ++k ) |
| 542 | { |
| 543 | for ( iter = 0; iter < NUM_FFT_ALGOS; ++iter ) |
| 544 | tmeas[k][iter] = 0.0; |
| 545 | } |
| 546 | |
| 547 | |
hayati ayguen | c974c1d | 2020-03-29 03:39:30 +0200 | [diff] [blame] | 548 | /* FFTPack benchmark */ |
hayati ayguen | bc8d4a8 | 2019-12-25 01:27:33 +0100 | [diff] [blame] | 549 | Nmax = (cplx ? N*2 : N); |
| 550 | X[Nmax] = checkVal; |
hayati ayguen | ca11241 | 2020-04-13 00:19:40 +0200 | [diff] [blame] | 551 | #ifdef PFFFT_ENABLE_FLOAT |
Julien Pommier | 370d209 | 2011-11-19 18:04:25 +0100 | [diff] [blame] | 552 | { |
hayati ayguen | ca11241 | 2020-04-13 00:19:40 +0200 | [diff] [blame] | 553 | float *wrk = malloc(2*Nbytes + 15*sizeof(pffft_scalar)); |
hayati ayguen | 4c3a87a | 2019-12-23 08:48:00 +0100 | [diff] [blame] | 554 | te = uclock_sec(); |
Julien Pommier | 370d209 | 2011-11-19 18:04:25 +0100 | [diff] [blame] | 555 | if (cplx) cffti(N, wrk); |
| 556 | else rffti(N, wrk); |
hayati ayguen | b9804a2 | 2019-12-27 00:35:47 +0100 | [diff] [blame] | 557 | t0 = uclock_sec(); |
| 558 | tstop = t0 + max_test_duration; |
| 559 | max_iter = 0; |
| 560 | do { |
| 561 | for ( k = 0; k < step_iter; ++k ) { |
| 562 | if (cplx) { |
| 563 | assert( X[Nmax] == checkVal ); |
| 564 | cfftf(N, X, wrk); |
| 565 | assert( X[Nmax] == checkVal ); |
| 566 | cfftb(N, X, wrk); |
| 567 | assert( X[Nmax] == checkVal ); |
| 568 | } else { |
| 569 | assert( X[Nmax] == checkVal ); |
| 570 | rfftf(N, X, wrk); |
| 571 | assert( X[Nmax] == checkVal ); |
| 572 | rfftb(N, X, wrk); |
| 573 | assert( X[Nmax] == checkVal ); |
| 574 | } |
| 575 | ++max_iter; |
Julien Pommier | 370d209 | 2011-11-19 18:04:25 +0100 | [diff] [blame] | 576 | } |
hayati ayguen | b9804a2 | 2019-12-27 00:35:47 +0100 | [diff] [blame] | 577 | t1 = uclock_sec(); |
| 578 | } while ( t1 < tstop ); |
| 579 | |
Julien Pommier | 370d209 | 2011-11-19 18:04:25 +0100 | [diff] [blame] | 580 | free(wrk); |
hayati ayguen | b9804a2 | 2019-12-27 00:35:47 +0100 | [diff] [blame] | 581 | |
hayati ayguen | c974c1d | 2020-03-29 03:39:30 +0200 | [diff] [blame] | 582 | flops = (max_iter*2) * ((cplx ? 5 : 2.5)*N*log((double)N)/M_LN2); /* see http://www.fftw.org/speed/method.html */ |
hayati ayguen | b9804a2 | 2019-12-27 00:35:47 +0100 | [diff] [blame] | 583 | tmeas[TYPE_ITER][ALGO_FFTPACK] = max_iter; |
hayati ayguen | 42ee6f1 | 2019-12-25 23:08:04 +0100 | [diff] [blame] | 584 | tmeas[TYPE_MFLOPS][ALGO_FFTPACK] = flops/1e6/(t1 - t0 + 1e-16); |
hayati ayguen | b9804a2 | 2019-12-27 00:35:47 +0100 | [diff] [blame] | 585 | tmeas[TYPE_DUR_TOT][ALGO_FFTPACK] = t1 - t0; |
| 586 | tmeas[TYPE_DUR_NS][ALGO_FFTPACK] = show_output("FFTPack", N, cplx, flops, t0, t1, max_iter, tableFile); |
hayati ayguen | 4c3a87a | 2019-12-23 08:48:00 +0100 | [diff] [blame] | 587 | tmeas[TYPE_PREP][ALGO_FFTPACK] = (t0 - te) * 1e3; |
| 588 | haveAlgo[ALGO_FFTPACK] = 1; |
Julien Pommier | 370d209 | 2011-11-19 18:04:25 +0100 | [diff] [blame] | 589 | } |
hayati ayguen | ca11241 | 2020-04-13 00:19:40 +0200 | [diff] [blame] | 590 | #endif |
Julien Pommier | 370d209 | 2011-11-19 18:04:25 +0100 | [diff] [blame] | 591 | |
hayati ayguen | ca11241 | 2020-04-13 00:19:40 +0200 | [diff] [blame] | 592 | #if defined(HAVE_VECLIB) && defined(PFFFT_ENABLE_FLOAT) |
hayati ayguen | bc8d4a8 | 2019-12-25 01:27:33 +0100 | [diff] [blame] | 593 | Nmax = (cplx ? nextPow2N*2 : nextPow2N); |
| 594 | X[Nmax] = checkVal; |
| 595 | te = uclock_sec(); |
hayati ayguen | ca11241 | 2020-04-13 00:19:40 +0200 | [diff] [blame] | 596 | if ( 1 || PFFFT_FUNC(is_power_of_two)(N) ) { |
Julien Pommier | 370d209 | 2011-11-19 18:04:25 +0100 | [diff] [blame] | 597 | FFTSetup setup; |
| 598 | |
hayati ayguen | bc8d4a8 | 2019-12-25 01:27:33 +0100 | [diff] [blame] | 599 | setup = vDSP_create_fftsetup(log2NextN, FFT_RADIX2); |
Julien Pommier | 370d209 | 2011-11-19 18:04:25 +0100 | [diff] [blame] | 600 | DSPSplitComplex zsamples; |
| 601 | zsamples.realp = &X[0]; |
| 602 | zsamples.imagp = &X[Nfloat/2]; |
hayati ayguen | b9804a2 | 2019-12-27 00:35:47 +0100 | [diff] [blame] | 603 | t0 = uclock_sec(); |
| 604 | tstop = t0 + max_test_duration; |
| 605 | max_iter = 0; |
| 606 | do { |
| 607 | for ( k = 0; k < step_iter; ++k ) { |
| 608 | if (cplx) { |
| 609 | assert( X[Nmax] == checkVal ); |
| 610 | vDSP_fft_zip(setup, &zsamples, 1, log2NextN, kFFTDirection_Forward); |
| 611 | assert( X[Nmax] == checkVal ); |
| 612 | vDSP_fft_zip(setup, &zsamples, 1, log2NextN, kFFTDirection_Inverse); |
| 613 | assert( X[Nmax] == checkVal ); |
| 614 | } else { |
| 615 | assert( X[Nmax] == checkVal ); |
| 616 | vDSP_fft_zrip(setup, &zsamples, 1, log2NextN, kFFTDirection_Forward); |
| 617 | assert( X[Nmax] == checkVal ); |
| 618 | vDSP_fft_zrip(setup, &zsamples, 1, log2NextN, kFFTDirection_Inverse); |
| 619 | assert( X[Nmax] == checkVal ); |
| 620 | } |
| 621 | ++max_iter; |
Julien Pommier | 370d209 | 2011-11-19 18:04:25 +0100 | [diff] [blame] | 622 | } |
hayati ayguen | b9804a2 | 2019-12-27 00:35:47 +0100 | [diff] [blame] | 623 | t1 = uclock_sec(); |
| 624 | } while ( t1 < tstop ); |
Julien Pommier | 370d209 | 2011-11-19 18:04:25 +0100 | [diff] [blame] | 625 | |
hayati ayguen | b9804a2 | 2019-12-27 00:35:47 +0100 | [diff] [blame] | 626 | vDSP_destroy_fftsetup(setup); |
hayati ayguen | c974c1d | 2020-03-29 03:39:30 +0200 | [diff] [blame] | 627 | flops = (max_iter*2) * ((cplx ? 5 : 2.5)*N*log((double)N)/M_LN2); /* see http://www.fftw.org/speed/method.html */ |
hayati ayguen | b9804a2 | 2019-12-27 00:35:47 +0100 | [diff] [blame] | 628 | tmeas[TYPE_ITER][ALGO_VECLIB] = max_iter; |
hayati ayguen | 42ee6f1 | 2019-12-25 23:08:04 +0100 | [diff] [blame] | 629 | tmeas[TYPE_MFLOPS][ALGO_VECLIB] = flops/1e6/(t1 - t0 + 1e-16); |
hayati ayguen | b9804a2 | 2019-12-27 00:35:47 +0100 | [diff] [blame] | 630 | tmeas[TYPE_DUR_TOT][ALGO_VECLIB] = t1 - t0; |
hayati ayguen | bc8d4a8 | 2019-12-25 01:27:33 +0100 | [diff] [blame] | 631 | tmeas[TYPE_DUR_NS][ALGO_VECLIB] = show_output("vDSP", N, cplx, flops, t0, t1, max_iter, tableFile); |
hayati ayguen | 4c3a87a | 2019-12-23 08:48:00 +0100 | [diff] [blame] | 632 | tmeas[TYPE_PREP][ALGO_VECLIB] = (t0 - te) * 1e3; |
| 633 | haveAlgo[ALGO_VECLIB] = 1; |
Julien Pommier | 836bc4b | 2011-11-20 10:58:07 +0100 | [diff] [blame] | 634 | } else { |
hayati ayguen | bc8d4a8 | 2019-12-25 01:27:33 +0100 | [diff] [blame] | 635 | show_output("vDSP", N, cplx, -1, -1, -1, -1, tableFile); |
Julien Pommier | 370d209 | 2011-11-19 18:04:25 +0100 | [diff] [blame] | 636 | } |
| 637 | #endif |
hayati ayguen | cd60b96 | 2019-12-23 11:42:31 +0100 | [diff] [blame] | 638 | |
hayati ayguen | ca11241 | 2020-04-13 00:19:40 +0200 | [diff] [blame] | 639 | #if defined(HAVE_FFTW) |
hayati ayguen | bc8d4a8 | 2019-12-25 01:27:33 +0100 | [diff] [blame] | 640 | Nmax = (cplx ? N*2 : N); |
| 641 | X[Nmax] = checkVal; |
Julien Pommier | 370d209 | 2011-11-19 18:04:25 +0100 | [diff] [blame] | 642 | { |
hayati ayguen | cd60b96 | 2019-12-23 11:42:31 +0100 | [diff] [blame] | 643 | /* int flags = (N <= (256*1024) ? FFTW_MEASURE : FFTW_ESTIMATE); measure takes a lot of time on largest ffts */ |
| 644 | int flags = FFTW_ESTIMATE; |
hayati ayguen | 4c3a87a | 2019-12-23 08:48:00 +0100 | [diff] [blame] | 645 | te = uclock_sec(); |
hayati ayguen | ca11241 | 2020-04-13 00:19:40 +0200 | [diff] [blame] | 646 | |
| 647 | FFTW_PLAN planf, planb; |
| 648 | FFTW_COMPLEX *in = (FFTW_COMPLEX*) FFTW_FUNC(malloc)(sizeof(FFTW_COMPLEX) * N); |
| 649 | FFTW_COMPLEX *out = (FFTW_COMPLEX*) FFTW_FUNC(malloc)(sizeof(FFTW_COMPLEX) * N); |
| 650 | memset(in, 0, sizeof(FFTW_COMPLEX) * N); |
Julien Pommier | 370d209 | 2011-11-19 18:04:25 +0100 | [diff] [blame] | 651 | if (cplx) { |
hayati ayguen | ca11241 | 2020-04-13 00:19:40 +0200 | [diff] [blame] | 652 | planf = FFTW_FUNC(plan_dft_1d)(N, in, out, FFTW_FORWARD, flags); |
| 653 | planb = FFTW_FUNC(plan_dft_1d)(N, in, out, FFTW_BACKWARD, flags); |
Julien Pommier | 370d209 | 2011-11-19 18:04:25 +0100 | [diff] [blame] | 654 | } else { |
hayati ayguen | ca11241 | 2020-04-13 00:19:40 +0200 | [diff] [blame] | 655 | planf = FFTW_FUNC(plan_dft_r2c_1d)(N, (pffft_scalar*)in, out, flags); |
| 656 | planb = FFTW_FUNC(plan_dft_c2r_1d)(N, in, (pffft_scalar*)out, flags); |
Julien Pommier | 370d209 | 2011-11-19 18:04:25 +0100 | [diff] [blame] | 657 | } |
| 658 | |
hayati ayguen | b9804a2 | 2019-12-27 00:35:47 +0100 | [diff] [blame] | 659 | t0 = uclock_sec(); |
| 660 | tstop = t0 + max_test_duration; |
| 661 | max_iter = 0; |
| 662 | do { |
| 663 | for ( k = 0; k < step_iter; ++k ) { |
| 664 | assert( X[Nmax] == checkVal ); |
hayati ayguen | ca11241 | 2020-04-13 00:19:40 +0200 | [diff] [blame] | 665 | FFTW_FUNC(execute)(planf); |
hayati ayguen | b9804a2 | 2019-12-27 00:35:47 +0100 | [diff] [blame] | 666 | assert( X[Nmax] == checkVal ); |
hayati ayguen | ca11241 | 2020-04-13 00:19:40 +0200 | [diff] [blame] | 667 | FFTW_FUNC(execute)(planb); |
hayati ayguen | b9804a2 | 2019-12-27 00:35:47 +0100 | [diff] [blame] | 668 | assert( X[Nmax] == checkVal ); |
| 669 | ++max_iter; |
| 670 | } |
| 671 | t1 = uclock_sec(); |
| 672 | } while ( t1 < tstop ); |
Julien Pommier | 370d209 | 2011-11-19 18:04:25 +0100 | [diff] [blame] | 673 | |
hayati ayguen | ca11241 | 2020-04-13 00:19:40 +0200 | [diff] [blame] | 674 | FFTW_FUNC(destroy_plan)(planf); |
| 675 | FFTW_FUNC(destroy_plan)(planb); |
| 676 | FFTW_FUNC(free)(in); FFTW_FUNC(free)(out); |
Julien Pommier | 370d209 | 2011-11-19 18:04:25 +0100 | [diff] [blame] | 677 | |
hayati ayguen | c974c1d | 2020-03-29 03:39:30 +0200 | [diff] [blame] | 678 | flops = (max_iter*2) * ((cplx ? 5 : 2.5)*N*log((double)N)/M_LN2); /* see http://www.fftw.org/speed/method.html */ |
hayati ayguen | b9804a2 | 2019-12-27 00:35:47 +0100 | [diff] [blame] | 679 | tmeas[TYPE_ITER][ALGO_FFTW_ESTIM] = max_iter; |
hayati ayguen | 42ee6f1 | 2019-12-25 23:08:04 +0100 | [diff] [blame] | 680 | tmeas[TYPE_MFLOPS][ALGO_FFTW_ESTIM] = flops/1e6/(t1 - t0 + 1e-16); |
hayati ayguen | b9804a2 | 2019-12-27 00:35:47 +0100 | [diff] [blame] | 681 | tmeas[TYPE_DUR_TOT][ALGO_FFTW_ESTIM] = t1 - t0; |
hayati ayguen | bc8d4a8 | 2019-12-25 01:27:33 +0100 | [diff] [blame] | 682 | tmeas[TYPE_DUR_NS][ALGO_FFTW_ESTIM] = show_output((flags == FFTW_MEASURE ? algoName[ALGO_FFTW_AUTO] : algoName[ALGO_FFTW_ESTIM]), N, cplx, flops, t0, t1, max_iter, tableFile); |
hayati ayguen | 4c3a87a | 2019-12-23 08:48:00 +0100 | [diff] [blame] | 683 | tmeas[TYPE_PREP][ALGO_FFTW_ESTIM] = (t0 - te) * 1e3; |
| 684 | haveAlgo[ALGO_FFTW_ESTIM] = 1; |
Julien Pommier | 370d209 | 2011-11-19 18:04:25 +0100 | [diff] [blame] | 685 | } |
hayati ayguen | bc8d4a8 | 2019-12-25 01:27:33 +0100 | [diff] [blame] | 686 | Nmax = (cplx ? N*2 : N); |
| 687 | X[Nmax] = checkVal; |
hayati ayguen | cd60b96 | 2019-12-23 11:42:31 +0100 | [diff] [blame] | 688 | do { |
| 689 | /* int flags = (N <= (256*1024) ? FFTW_MEASURE : FFTW_ESTIMATE); measure takes a lot of time on largest ffts */ |
| 690 | /* int flags = FFTW_MEASURE; */ |
hayati ayguen | b9804a2 | 2019-12-27 00:35:47 +0100 | [diff] [blame] | 691 | #if ( defined(__arm__) || defined(__aarch64__) || defined(__arm64__) ) |
| 692 | int limitFFTsize = 31; /* takes over a second on Raspberry Pi 3 B+ -- and much much more on higher ffts sizes! */ |
| 693 | #else |
| 694 | int limitFFTsize = 2400; /* take over a second on i7 for fft size 2400 */ |
| 695 | #endif |
| 696 | int flags = (N < limitFFTsize ? FFTW_MEASURE : (withFFTWfullMeas ? FFTW_MEASURE : FFTW_ESTIMATE)); |
| 697 | |
hayati ayguen | cd60b96 | 2019-12-23 11:42:31 +0100 | [diff] [blame] | 698 | if (flags == FFTW_ESTIMATE) { |
hayati ayguen | bc8d4a8 | 2019-12-25 01:27:33 +0100 | [diff] [blame] | 699 | show_output((flags == FFTW_MEASURE ? algoName[ALGO_FFTW_AUTO] : algoName[ALGO_FFTW_ESTIM]), N, cplx, -1, -1, -1, -1, tableFile); |
hayati ayguen | cd60b96 | 2019-12-23 11:42:31 +0100 | [diff] [blame] | 700 | /* copy values from estimation */ |
hayati ayguen | b9804a2 | 2019-12-27 00:35:47 +0100 | [diff] [blame] | 701 | tmeas[TYPE_ITER][ALGO_FFTW_AUTO] = tmeas[TYPE_ITER][ALGO_FFTW_ESTIM]; |
| 702 | tmeas[TYPE_DUR_TOT][ALGO_FFTW_AUTO] = tmeas[TYPE_DUR_TOT][ALGO_FFTW_ESTIM]; |
hayati ayguen | cd60b96 | 2019-12-23 11:42:31 +0100 | [diff] [blame] | 703 | tmeas[TYPE_DUR_NS][ALGO_FFTW_AUTO] = tmeas[TYPE_DUR_NS][ALGO_FFTW_ESTIM]; |
| 704 | tmeas[TYPE_PREP][ALGO_FFTW_AUTO] = tmeas[TYPE_PREP][ALGO_FFTW_ESTIM]; |
hayati ayguen | 4c3a87a | 2019-12-23 08:48:00 +0100 | [diff] [blame] | 705 | } else { |
hayati ayguen | cd60b96 | 2019-12-23 11:42:31 +0100 | [diff] [blame] | 706 | te = uclock_sec(); |
hayati ayguen | ca11241 | 2020-04-13 00:19:40 +0200 | [diff] [blame] | 707 | FFTW_PLAN planf, planb; |
| 708 | FFTW_COMPLEX *in = (FFTW_COMPLEX*) FFTW_FUNC(malloc)(sizeof(FFTW_COMPLEX) * N); |
| 709 | FFTW_COMPLEX *out = (FFTW_COMPLEX*) FFTW_FUNC(malloc)(sizeof(FFTW_COMPLEX) * N); |
| 710 | memset(in, 0, sizeof(FFTW_COMPLEX) * N); |
hayati ayguen | cd60b96 | 2019-12-23 11:42:31 +0100 | [diff] [blame] | 711 | if (cplx) { |
hayati ayguen | ca11241 | 2020-04-13 00:19:40 +0200 | [diff] [blame] | 712 | planf = FFTW_FUNC(plan_dft_1d)(N, in, out, FFTW_FORWARD, flags); |
| 713 | planb = FFTW_FUNC(plan_dft_1d)(N, in, out, FFTW_BACKWARD, flags); |
hayati ayguen | cd60b96 | 2019-12-23 11:42:31 +0100 | [diff] [blame] | 714 | } else { |
hayati ayguen | ca11241 | 2020-04-13 00:19:40 +0200 | [diff] [blame] | 715 | planf = FFTW_FUNC(plan_dft_r2c_1d)(N, (pffft_scalar*)in, out, flags); |
| 716 | planb = FFTW_FUNC(plan_dft_c2r_1d)(N, in, (pffft_scalar*)out, flags); |
hayati ayguen | cd60b96 | 2019-12-23 11:42:31 +0100 | [diff] [blame] | 717 | } |
| 718 | |
hayati ayguen | b9804a2 | 2019-12-27 00:35:47 +0100 | [diff] [blame] | 719 | t0 = uclock_sec(); |
| 720 | tstop = t0 + max_test_duration; |
| 721 | max_iter = 0; |
| 722 | do { |
| 723 | for ( k = 0; k < step_iter; ++k ) { |
| 724 | assert( X[Nmax] == checkVal ); |
hayati ayguen | ca11241 | 2020-04-13 00:19:40 +0200 | [diff] [blame] | 725 | FFTW_FUNC(execute)(planf); |
hayati ayguen | b9804a2 | 2019-12-27 00:35:47 +0100 | [diff] [blame] | 726 | assert( X[Nmax] == checkVal ); |
hayati ayguen | ca11241 | 2020-04-13 00:19:40 +0200 | [diff] [blame] | 727 | FFTW_FUNC(execute)(planb); |
hayati ayguen | b9804a2 | 2019-12-27 00:35:47 +0100 | [diff] [blame] | 728 | assert( X[Nmax] == checkVal ); |
| 729 | ++max_iter; |
| 730 | } |
| 731 | t1 = uclock_sec(); |
| 732 | } while ( t1 < tstop ); |
hayati ayguen | cd60b96 | 2019-12-23 11:42:31 +0100 | [diff] [blame] | 733 | |
hayati ayguen | ca11241 | 2020-04-13 00:19:40 +0200 | [diff] [blame] | 734 | FFTW_FUNC(destroy_plan)(planf); |
| 735 | FFTW_FUNC(destroy_plan)(planb); |
| 736 | FFTW_FUNC(free)(in); FFTW_FUNC(free)(out); |
hayati ayguen | cd60b96 | 2019-12-23 11:42:31 +0100 | [diff] [blame] | 737 | |
hayati ayguen | c974c1d | 2020-03-29 03:39:30 +0200 | [diff] [blame] | 738 | flops = (max_iter*2) * ((cplx ? 5 : 2.5)*N*log((double)N)/M_LN2); /* see http://www.fftw.org/speed/method.html */ |
hayati ayguen | b9804a2 | 2019-12-27 00:35:47 +0100 | [diff] [blame] | 739 | tmeas[TYPE_ITER][ALGO_FFTW_AUTO] = max_iter; |
hayati ayguen | 42ee6f1 | 2019-12-25 23:08:04 +0100 | [diff] [blame] | 740 | tmeas[TYPE_MFLOPS][ALGO_FFTW_AUTO] = flops/1e6/(t1 - t0 + 1e-16); |
hayati ayguen | b9804a2 | 2019-12-27 00:35:47 +0100 | [diff] [blame] | 741 | tmeas[TYPE_DUR_TOT][ALGO_FFTW_AUTO] = t1 - t0; |
hayati ayguen | bc8d4a8 | 2019-12-25 01:27:33 +0100 | [diff] [blame] | 742 | tmeas[TYPE_DUR_NS][ALGO_FFTW_AUTO] = show_output((flags == FFTW_MEASURE ? algoName[ALGO_FFTW_AUTO] : algoName[ALGO_FFTW_ESTIM]), N, cplx, flops, t0, t1, max_iter, tableFile); |
hayati ayguen | cd60b96 | 2019-12-23 11:42:31 +0100 | [diff] [blame] | 743 | tmeas[TYPE_PREP][ALGO_FFTW_AUTO] = (t0 - te) * 1e3; |
| 744 | haveAlgo[ALGO_FFTW_AUTO] = 1; |
hayati ayguen | 4c3a87a | 2019-12-23 08:48:00 +0100 | [diff] [blame] | 745 | } |
hayati ayguen | cd60b96 | 2019-12-23 11:42:31 +0100 | [diff] [blame] | 746 | } while (0); |
hayati ayguen | 4c3a87a | 2019-12-23 08:48:00 +0100 | [diff] [blame] | 747 | #else |
hayati ayguen | bc8d4a8 | 2019-12-25 01:27:33 +0100 | [diff] [blame] | 748 | (void)withFFTWfullMeas; |
hayati ayguen | ca11241 | 2020-04-13 00:19:40 +0200 | [diff] [blame] | 749 | #endif |
Julien Pommier | 370d209 | 2011-11-19 18:04:25 +0100 | [diff] [blame] | 750 | |
hayati ayguen | ca11241 | 2020-04-13 00:19:40 +0200 | [diff] [blame] | 751 | #if defined(HAVE_GREEN_FFTS) && defined(PFFFT_ENABLE_FLOAT) |
hayati ayguen | bc8d4a8 | 2019-12-25 01:27:33 +0100 | [diff] [blame] | 752 | Nmax = (cplx ? nextPow2N*2 : nextPow2N); |
| 753 | X[Nmax] = checkVal; |
hayati ayguen | ca11241 | 2020-04-13 00:19:40 +0200 | [diff] [blame] | 754 | if ( 1 || PFFFT_FUNC(is_power_of_two)(N) ) |
hayati ayguen | 4c3a87a | 2019-12-23 08:48:00 +0100 | [diff] [blame] | 755 | { |
| 756 | te = uclock_sec(); |
hayati ayguen | bc8d4a8 | 2019-12-25 01:27:33 +0100 | [diff] [blame] | 757 | fftInit(log2NextN); |
| 758 | |
hayati ayguen | b9804a2 | 2019-12-27 00:35:47 +0100 | [diff] [blame] | 759 | t0 = uclock_sec(); |
| 760 | tstop = t0 + max_test_duration; |
| 761 | max_iter = 0; |
| 762 | do { |
| 763 | for ( k = 0; k < step_iter; ++k ) { |
hayati ayguen | 4c3a87a | 2019-12-23 08:48:00 +0100 | [diff] [blame] | 764 | if (cplx) { |
hayati ayguen | bc8d4a8 | 2019-12-25 01:27:33 +0100 | [diff] [blame] | 765 | assert( X[Nmax] == checkVal ); |
| 766 | ffts(X, log2NextN, 1); |
| 767 | assert( X[Nmax] == checkVal ); |
| 768 | iffts(X, log2NextN, 1); |
| 769 | assert( X[Nmax] == checkVal ); |
hayati ayguen | 4c3a87a | 2019-12-23 08:48:00 +0100 | [diff] [blame] | 770 | } else { |
hayati ayguen | bc8d4a8 | 2019-12-25 01:27:33 +0100 | [diff] [blame] | 771 | rffts(X, log2NextN, 1); |
| 772 | riffts(X, log2NextN, 1); |
hayati ayguen | 4c3a87a | 2019-12-23 08:48:00 +0100 | [diff] [blame] | 773 | } |
hayati ayguen | b9804a2 | 2019-12-27 00:35:47 +0100 | [diff] [blame] | 774 | |
| 775 | ++max_iter; |
hayati ayguen | 4c3a87a | 2019-12-23 08:48:00 +0100 | [diff] [blame] | 776 | } |
| 777 | t1 = uclock_sec(); |
hayati ayguen | b9804a2 | 2019-12-27 00:35:47 +0100 | [diff] [blame] | 778 | } while ( t1 < tstop ); |
hayati ayguen | 4c3a87a | 2019-12-23 08:48:00 +0100 | [diff] [blame] | 779 | |
hayati ayguen | b9804a2 | 2019-12-27 00:35:47 +0100 | [diff] [blame] | 780 | fftFree(); |
| 781 | |
hayati ayguen | c974c1d | 2020-03-29 03:39:30 +0200 | [diff] [blame] | 782 | flops = (max_iter*2) * ((cplx ? 5 : 2.5)*N*log((double)N)/M_LN2); /* see http://www.fftw.org/speed/method.html */ |
hayati ayguen | b9804a2 | 2019-12-27 00:35:47 +0100 | [diff] [blame] | 783 | tmeas[TYPE_ITER][ALGO_GREEN] = max_iter; |
| 784 | tmeas[TYPE_MFLOPS][ALGO_GREEN] = flops/1e6/(t1 - t0 + 1e-16); |
| 785 | tmeas[TYPE_DUR_TOT][ALGO_GREEN] = t1 - t0; |
| 786 | tmeas[TYPE_DUR_NS][ALGO_GREEN] = show_output("Green", N, cplx, flops, t0, t1, max_iter, tableFile); |
| 787 | tmeas[TYPE_PREP][ALGO_GREEN] = (t0 - te) * 1e3; |
| 788 | haveAlgo[ALGO_GREEN] = 1; |
hayati ayguen | bc8d4a8 | 2019-12-25 01:27:33 +0100 | [diff] [blame] | 789 | } else { |
| 790 | show_output("Green", N, cplx, -1, -1, -1, -1, tableFile); |
hayati ayguen | 4c3a87a | 2019-12-23 08:48:00 +0100 | [diff] [blame] | 791 | } |
hayati ayguen | 4c3a87a | 2019-12-23 08:48:00 +0100 | [diff] [blame] | 792 | #endif |
| 793 | |
hayati ayguen | ca11241 | 2020-04-13 00:19:40 +0200 | [diff] [blame] | 794 | #if defined(HAVE_KISS_FFT) && defined(PFFFT_ENABLE_FLOAT) |
hayati ayguen | bc8d4a8 | 2019-12-25 01:27:33 +0100 | [diff] [blame] | 795 | Nmax = (cplx ? nextPow2N*2 : nextPow2N); |
| 796 | X[Nmax] = checkVal; |
hayati ayguen | ca11241 | 2020-04-13 00:19:40 +0200 | [diff] [blame] | 797 | if ( 1 || PFFFT_FUNC(is_power_of_two)(N) ) |
hayati ayguen | 4c3a87a | 2019-12-23 08:48:00 +0100 | [diff] [blame] | 798 | { |
| 799 | kiss_fft_cfg stf; |
| 800 | kiss_fft_cfg sti; |
| 801 | kiss_fftr_cfg stfr; |
| 802 | kiss_fftr_cfg stir; |
| 803 | |
| 804 | te = uclock_sec(); |
| 805 | if (cplx) { |
hayati ayguen | bc8d4a8 | 2019-12-25 01:27:33 +0100 | [diff] [blame] | 806 | stf = kiss_fft_alloc(nextPow2N, 0, 0, 0); |
| 807 | sti = kiss_fft_alloc(nextPow2N, 1, 0, 0); |
hayati ayguen | 4c3a87a | 2019-12-23 08:48:00 +0100 | [diff] [blame] | 808 | } else { |
hayati ayguen | bc8d4a8 | 2019-12-25 01:27:33 +0100 | [diff] [blame] | 809 | stfr = kiss_fftr_alloc(nextPow2N, 0, 0, 0); |
| 810 | stir = kiss_fftr_alloc(nextPow2N, 1, 0, 0); |
hayati ayguen | 4c3a87a | 2019-12-23 08:48:00 +0100 | [diff] [blame] | 811 | } |
| 812 | |
hayati ayguen | b9804a2 | 2019-12-27 00:35:47 +0100 | [diff] [blame] | 813 | t0 = uclock_sec(); |
| 814 | tstop = t0 + max_test_duration; |
| 815 | max_iter = 0; |
| 816 | do { |
| 817 | for ( k = 0; k < step_iter; ++k ) { |
hayati ayguen | 4c3a87a | 2019-12-23 08:48:00 +0100 | [diff] [blame] | 818 | if (cplx) { |
hayati ayguen | bc8d4a8 | 2019-12-25 01:27:33 +0100 | [diff] [blame] | 819 | assert( X[Nmax] == checkVal ); |
hayati ayguen | 4c3a87a | 2019-12-23 08:48:00 +0100 | [diff] [blame] | 820 | kiss_fft(stf, (const kiss_fft_cpx *)X, (kiss_fft_cpx *)Y); |
hayati ayguen | bc8d4a8 | 2019-12-25 01:27:33 +0100 | [diff] [blame] | 821 | assert( X[Nmax] == checkVal ); |
hayati ayguen | 4c3a87a | 2019-12-23 08:48:00 +0100 | [diff] [blame] | 822 | kiss_fft(sti, (const kiss_fft_cpx *)Y, (kiss_fft_cpx *)X); |
hayati ayguen | bc8d4a8 | 2019-12-25 01:27:33 +0100 | [diff] [blame] | 823 | assert( X[Nmax] == checkVal ); |
hayati ayguen | 4c3a87a | 2019-12-23 08:48:00 +0100 | [diff] [blame] | 824 | } else { |
hayati ayguen | bc8d4a8 | 2019-12-25 01:27:33 +0100 | [diff] [blame] | 825 | assert( X[Nmax] == checkVal ); |
hayati ayguen | 4c3a87a | 2019-12-23 08:48:00 +0100 | [diff] [blame] | 826 | kiss_fftr(stfr, X, (kiss_fft_cpx *)Y); |
hayati ayguen | bc8d4a8 | 2019-12-25 01:27:33 +0100 | [diff] [blame] | 827 | assert( X[Nmax] == checkVal ); |
hayati ayguen | 4c3a87a | 2019-12-23 08:48:00 +0100 | [diff] [blame] | 828 | kiss_fftri(stir, (const kiss_fft_cpx *)Y, X); |
hayati ayguen | bc8d4a8 | 2019-12-25 01:27:33 +0100 | [diff] [blame] | 829 | assert( X[Nmax] == checkVal ); |
hayati ayguen | 4c3a87a | 2019-12-23 08:48:00 +0100 | [diff] [blame] | 830 | } |
hayati ayguen | b9804a2 | 2019-12-27 00:35:47 +0100 | [diff] [blame] | 831 | ++max_iter; |
hayati ayguen | 4c3a87a | 2019-12-23 08:48:00 +0100 | [diff] [blame] | 832 | } |
| 833 | t1 = uclock_sec(); |
hayati ayguen | b9804a2 | 2019-12-27 00:35:47 +0100 | [diff] [blame] | 834 | } while ( t1 < tstop ); |
hayati ayguen | 4c3a87a | 2019-12-23 08:48:00 +0100 | [diff] [blame] | 835 | |
hayati ayguen | b9804a2 | 2019-12-27 00:35:47 +0100 | [diff] [blame] | 836 | kiss_fft_cleanup(); |
| 837 | |
hayati ayguen | c974c1d | 2020-03-29 03:39:30 +0200 | [diff] [blame] | 838 | flops = (max_iter*2) * ((cplx ? 5 : 2.5)*N*log((double)N)/M_LN2); /* see http://www.fftw.org/speed/method.html */ |
hayati ayguen | b9804a2 | 2019-12-27 00:35:47 +0100 | [diff] [blame] | 839 | tmeas[TYPE_ITER][ALGO_KISS] = max_iter; |
| 840 | tmeas[TYPE_MFLOPS][ALGO_KISS] = flops/1e6/(t1 - t0 + 1e-16); |
| 841 | tmeas[TYPE_DUR_TOT][ALGO_KISS] = t1 - t0; |
| 842 | tmeas[TYPE_DUR_NS][ALGO_KISS] = show_output("Kiss", N, cplx, flops, t0, t1, max_iter, tableFile); |
| 843 | tmeas[TYPE_PREP][ALGO_KISS] = (t0 - te) * 1e3; |
| 844 | haveAlgo[ALGO_KISS] = 1; |
hayati ayguen | bc8d4a8 | 2019-12-25 01:27:33 +0100 | [diff] [blame] | 845 | } else { |
| 846 | show_output("Kiss", N, cplx, -1, -1, -1, -1, tableFile); |
hayati ayguen | 4c3a87a | 2019-12-23 08:48:00 +0100 | [diff] [blame] | 847 | } |
| 848 | #endif |
| 849 | |
hayati ayguen | 223c62a | 2020-06-13 01:59:49 +0200 | [diff] [blame] | 850 | #if defined(HAVE_POCKET_FFT) |
| 851 | |
hayati ayguen | 1c17fd4 | 2020-06-12 18:21:26 +0000 | [diff] [blame] | 852 | Nmax = (cplx ? nextPow2N*2 : nextPow2N); |
| 853 | X[Nmax] = checkVal; |
| 854 | if ( 1 || PFFFT_FUNC(is_power_of_two)(N) ) |
| 855 | { |
hayati ayguen | 223c62a | 2020-06-13 01:59:49 +0200 | [diff] [blame] | 856 | POCKFFTR_PRE(_plan) planr; |
| 857 | POCKFFTC_PRE(_plan) planc; |
hayati ayguen | 1c17fd4 | 2020-06-12 18:21:26 +0000 | [diff] [blame] | 858 | |
| 859 | te = uclock_sec(); |
| 860 | if (cplx) { |
hayati ayguen | 223c62a | 2020-06-13 01:59:49 +0200 | [diff] [blame] | 861 | planc = POCKFFTC_MID(make_,_plan)(nextPow2N); |
hayati ayguen | 1c17fd4 | 2020-06-12 18:21:26 +0000 | [diff] [blame] | 862 | } else { |
hayati ayguen | 223c62a | 2020-06-13 01:59:49 +0200 | [diff] [blame] | 863 | planr = POCKFFTR_MID(make_,_plan)(nextPow2N); |
hayati ayguen | 1c17fd4 | 2020-06-12 18:21:26 +0000 | [diff] [blame] | 864 | } |
| 865 | |
| 866 | t0 = uclock_sec(); |
| 867 | tstop = t0 + max_test_duration; |
| 868 | max_iter = 0; |
| 869 | do { |
| 870 | for ( k = 0; k < step_iter; ++k ) { |
| 871 | if (cplx) { |
| 872 | assert( X[Nmax] == checkVal ); |
| 873 | memcpy(Y, X, 2*nextPow2N * sizeof(pffft_scalar) ); |
hayati ayguen | 223c62a | 2020-06-13 01:59:49 +0200 | [diff] [blame] | 874 | POCKFFTC_PRE(_forward)(planc, Y, 1.); |
hayati ayguen | 1c17fd4 | 2020-06-12 18:21:26 +0000 | [diff] [blame] | 875 | assert( X[Nmax] == checkVal ); |
| 876 | memcpy(X, Y, 2*nextPow2N * sizeof(pffft_scalar) ); |
hayati ayguen | 223c62a | 2020-06-13 01:59:49 +0200 | [diff] [blame] | 877 | POCKFFTC_PRE(_backward)(planc, X, 1./nextPow2N); |
hayati ayguen | 1c17fd4 | 2020-06-12 18:21:26 +0000 | [diff] [blame] | 878 | assert( X[Nmax] == checkVal ); |
| 879 | } else { |
| 880 | assert( X[Nmax] == checkVal ); |
| 881 | memcpy(Y, X, nextPow2N * sizeof(pffft_scalar) ); |
hayati ayguen | 223c62a | 2020-06-13 01:59:49 +0200 | [diff] [blame] | 882 | POCKFFTR_PRE(_forward)(planr, Y, 1.); |
hayati ayguen | 1c17fd4 | 2020-06-12 18:21:26 +0000 | [diff] [blame] | 883 | assert( X[Nmax] == checkVal ); |
| 884 | memcpy(X, Y, nextPow2N * sizeof(pffft_scalar) ); |
hayati ayguen | 223c62a | 2020-06-13 01:59:49 +0200 | [diff] [blame] | 885 | POCKFFTR_PRE(_backward)(planr, X, 1./nextPow2N); |
hayati ayguen | 1c17fd4 | 2020-06-12 18:21:26 +0000 | [diff] [blame] | 886 | assert( X[Nmax] == checkVal ); |
| 887 | } |
| 888 | ++max_iter; |
| 889 | } |
| 890 | t1 = uclock_sec(); |
| 891 | } while ( t1 < tstop ); |
| 892 | |
| 893 | if (cplx) { |
hayati ayguen | 223c62a | 2020-06-13 01:59:49 +0200 | [diff] [blame] | 894 | POCKFFTC_MID(destroy_,_plan)(planc); |
hayati ayguen | 1c17fd4 | 2020-06-12 18:21:26 +0000 | [diff] [blame] | 895 | } else { |
hayati ayguen | 223c62a | 2020-06-13 01:59:49 +0200 | [diff] [blame] | 896 | POCKFFTR_MID(destroy_,_plan)(planr); |
hayati ayguen | 1c17fd4 | 2020-06-12 18:21:26 +0000 | [diff] [blame] | 897 | } |
| 898 | |
| 899 | flops = (max_iter*2) * ((cplx ? 5 : 2.5)*N*log((double)N)/M_LN2); /* see http://www.fftw.org/speed/method.html */ |
| 900 | tmeas[TYPE_ITER][ALGO_POCKET] = max_iter; |
| 901 | tmeas[TYPE_MFLOPS][ALGO_POCKET] = flops/1e6/(t1 - t0 + 1e-16); |
| 902 | tmeas[TYPE_DUR_TOT][ALGO_POCKET] = t1 - t0; |
| 903 | tmeas[TYPE_DUR_NS][ALGO_POCKET] = show_output("Pocket", N, cplx, flops, t0, t1, max_iter, tableFile); |
| 904 | tmeas[TYPE_PREP][ALGO_POCKET] = (t0 - te) * 1e3; |
| 905 | haveAlgo[ALGO_POCKET] = 1; |
| 906 | } else { |
| 907 | show_output("Pocket", N, cplx, -1, -1, -1, -1, tableFile); |
| 908 | } |
| 909 | #endif |
| 910 | |
hayati ayguen | 4c3a87a | 2019-12-23 08:48:00 +0100 | [diff] [blame] | 911 | |
hayati ayguen | c974c1d | 2020-03-29 03:39:30 +0200 | [diff] [blame] | 912 | /* PFFFT-U (unordered) benchmark */ |
hayati ayguen | bc8d4a8 | 2019-12-25 01:27:33 +0100 | [diff] [blame] | 913 | Nmax = (cplx ? pffftPow2N*2 : pffftPow2N); |
| 914 | X[Nmax] = checkVal; |
hayati ayguen | ca11241 | 2020-04-13 00:19:40 +0200 | [diff] [blame] | 915 | if ( pffftPow2N >= PFFFT_FUNC(min_fft_size)(cplx ? PFFFT_COMPLEX : PFFFT_REAL) ) |
Julien Pommier | 836bc4b | 2011-11-20 10:58:07 +0100 | [diff] [blame] | 916 | { |
hayati ayguen | 4c3a87a | 2019-12-23 08:48:00 +0100 | [diff] [blame] | 917 | te = uclock_sec(); |
hayati ayguen | ca11241 | 2020-04-13 00:19:40 +0200 | [diff] [blame] | 918 | PFFFT_SETUP *s = PFFFT_FUNC(new_setup)(pffftPow2N, cplx ? PFFFT_COMPLEX : PFFFT_REAL); |
Julien Pommier | 0302e8a | 2012-10-11 18:04:09 +0200 | [diff] [blame] | 919 | if (s) { |
hayati ayguen | b9804a2 | 2019-12-27 00:35:47 +0100 | [diff] [blame] | 920 | t0 = uclock_sec(); |
| 921 | tstop = t0 + max_test_duration; |
| 922 | max_iter = 0; |
| 923 | do { |
| 924 | for ( k = 0; k < step_iter; ++k ) { |
| 925 | assert( X[Nmax] == checkVal ); |
hayati ayguen | ca11241 | 2020-04-13 00:19:40 +0200 | [diff] [blame] | 926 | PFFFT_FUNC(transform)(s, X, Z, Y, PFFFT_FORWARD); |
hayati ayguen | b9804a2 | 2019-12-27 00:35:47 +0100 | [diff] [blame] | 927 | assert( X[Nmax] == checkVal ); |
hayati ayguen | ca11241 | 2020-04-13 00:19:40 +0200 | [diff] [blame] | 928 | PFFFT_FUNC(transform)(s, X, Z, Y, PFFFT_BACKWARD); |
hayati ayguen | b9804a2 | 2019-12-27 00:35:47 +0100 | [diff] [blame] | 929 | assert( X[Nmax] == checkVal ); |
| 930 | ++max_iter; |
| 931 | } |
| 932 | t1 = uclock_sec(); |
| 933 | } while ( t1 < tstop ); |
| 934 | |
hayati ayguen | ca11241 | 2020-04-13 00:19:40 +0200 | [diff] [blame] | 935 | PFFFT_FUNC(destroy_setup)(s); |
hayati ayguen | bc8d4a8 | 2019-12-25 01:27:33 +0100 | [diff] [blame] | 936 | |
hayati ayguen | c974c1d | 2020-03-29 03:39:30 +0200 | [diff] [blame] | 937 | flops = (max_iter*2) * ((cplx ? 5 : 2.5)*N*log((double)N)/M_LN2); /* see http://www.fftw.org/speed/method.html */ |
hayati ayguen | b9804a2 | 2019-12-27 00:35:47 +0100 | [diff] [blame] | 938 | tmeas[TYPE_ITER][ALGO_PFFFT_U] = max_iter; |
| 939 | tmeas[TYPE_MFLOPS][ALGO_PFFFT_U] = flops/1e6/(t1 - t0 + 1e-16); |
| 940 | tmeas[TYPE_DUR_TOT][ALGO_PFFFT_U] = t1 - t0; |
| 941 | tmeas[TYPE_DUR_NS][ALGO_PFFFT_U] = show_output("PFFFT-U", N, cplx, flops, t0, t1, max_iter, tableFile); |
| 942 | tmeas[TYPE_PREP][ALGO_PFFFT_U] = (t0 - te) * 1e3; |
| 943 | haveAlgo[ALGO_PFFFT_U] = 1; |
| 944 | } |
hayati ayguen | ca11241 | 2020-04-13 00:19:40 +0200 | [diff] [blame] | 945 | } else { |
| 946 | show_output("PFFFT-U", N, cplx, -1, -1, -1, -1, tableFile); |
hayati ayguen | b9804a2 | 2019-12-27 00:35:47 +0100 | [diff] [blame] | 947 | } |
hayati ayguen | ca11241 | 2020-04-13 00:19:40 +0200 | [diff] [blame] | 948 | |
| 949 | |
| 950 | if ( pffftPow2N >= PFFFT_FUNC(min_fft_size)(cplx ? PFFFT_COMPLEX : PFFFT_REAL) ) |
hayati ayguen | b9804a2 | 2019-12-27 00:35:47 +0100 | [diff] [blame] | 951 | { |
| 952 | te = uclock_sec(); |
hayati ayguen | ca11241 | 2020-04-13 00:19:40 +0200 | [diff] [blame] | 953 | PFFFT_SETUP *s = PFFFT_FUNC(new_setup)(pffftPow2N, cplx ? PFFFT_COMPLEX : PFFFT_REAL); |
hayati ayguen | b9804a2 | 2019-12-27 00:35:47 +0100 | [diff] [blame] | 954 | if (s) { |
| 955 | t0 = uclock_sec(); |
| 956 | tstop = t0 + max_test_duration; |
| 957 | max_iter = 0; |
| 958 | do { |
| 959 | for ( k = 0; k < step_iter; ++k ) { |
| 960 | assert( X[Nmax] == checkVal ); |
hayati ayguen | ca11241 | 2020-04-13 00:19:40 +0200 | [diff] [blame] | 961 | PFFFT_FUNC(transform_ordered)(s, X, Z, Y, PFFFT_FORWARD); |
hayati ayguen | b9804a2 | 2019-12-27 00:35:47 +0100 | [diff] [blame] | 962 | assert( X[Nmax] == checkVal ); |
hayati ayguen | ca11241 | 2020-04-13 00:19:40 +0200 | [diff] [blame] | 963 | PFFFT_FUNC(transform_ordered)(s, X, Z, Y, PFFFT_BACKWARD); |
hayati ayguen | b9804a2 | 2019-12-27 00:35:47 +0100 | [diff] [blame] | 964 | assert( X[Nmax] == checkVal ); |
| 965 | ++max_iter; |
| 966 | } |
| 967 | t1 = uclock_sec(); |
| 968 | } while ( t1 < tstop ); |
| 969 | |
hayati ayguen | ca11241 | 2020-04-13 00:19:40 +0200 | [diff] [blame] | 970 | PFFFT_FUNC(destroy_setup)(s); |
hayati ayguen | b9804a2 | 2019-12-27 00:35:47 +0100 | [diff] [blame] | 971 | |
hayati ayguen | c974c1d | 2020-03-29 03:39:30 +0200 | [diff] [blame] | 972 | flops = (max_iter*2) * ((cplx ? 5 : 2.5)*N*log((double)N)/M_LN2); /* see http://www.fftw.org/speed/method.html */ |
hayati ayguen | b9804a2 | 2019-12-27 00:35:47 +0100 | [diff] [blame] | 973 | tmeas[TYPE_ITER][ALGO_PFFFT_O] = max_iter; |
| 974 | tmeas[TYPE_MFLOPS][ALGO_PFFFT_O] = flops/1e6/(t1 - t0 + 1e-16); |
| 975 | tmeas[TYPE_DUR_TOT][ALGO_PFFFT_O] = t1 - t0; |
| 976 | tmeas[TYPE_DUR_NS][ALGO_PFFFT_O] = show_output("PFFFT", N, cplx, flops, t0, t1, max_iter, tableFile); |
| 977 | tmeas[TYPE_PREP][ALGO_PFFFT_O] = (t0 - te) * 1e3; |
| 978 | haveAlgo[ALGO_PFFFT_O] = 1; |
Julien Pommier | 0302e8a | 2012-10-11 18:04:09 +0200 | [diff] [blame] | 979 | } |
hayati ayguen | ca11241 | 2020-04-13 00:19:40 +0200 | [diff] [blame] | 980 | } else { |
| 981 | show_output("PFFFT", N, cplx, -1, -1, -1, -1, tableFile); |
Julien Pommier | 836bc4b | 2011-11-20 10:58:07 +0100 | [diff] [blame] | 982 | } |
| 983 | |
hayati ayguen | cd60b96 | 2019-12-23 11:42:31 +0100 | [diff] [blame] | 984 | if (!array_output_format) |
hayati ayguen | 4c3a87a | 2019-12-23 08:48:00 +0100 | [diff] [blame] | 985 | { |
| 986 | printf("prepare/ms: "); |
| 987 | for ( iter = 0; iter < NUM_FFT_ALGOS; ++iter ) |
| 988 | { |
| 989 | if ( haveAlgo[iter] && tmeas[TYPE_DUR_NS][iter] > 0.0 ) { |
| 990 | printf("%s %.3f ", algoName[iter], tmeas[TYPE_PREP][iter] ); |
| 991 | } |
| 992 | } |
| 993 | printf("\n"); |
| 994 | } |
| 995 | Tfastest = 0.0; |
| 996 | for ( iter = 0; iter < NUM_FFT_ALGOS; ++iter ) |
| 997 | { |
| 998 | if ( Tfastest == 0.0 || ( tmeas[TYPE_DUR_NS][iter] != 0.0 && tmeas[TYPE_DUR_NS][iter] < Tfastest ) ) |
| 999 | Tfastest = tmeas[TYPE_DUR_NS][iter]; |
| 1000 | } |
| 1001 | if ( Tfastest > 0.0 ) |
| 1002 | { |
hayati ayguen | cd60b96 | 2019-12-23 11:42:31 +0100 | [diff] [blame] | 1003 | if (!array_output_format) |
| 1004 | printf("relative fast: "); |
hayati ayguen | 4c3a87a | 2019-12-23 08:48:00 +0100 | [diff] [blame] | 1005 | for ( iter = 0; iter < NUM_FFT_ALGOS; ++iter ) |
| 1006 | { |
| 1007 | if ( haveAlgo[iter] && tmeas[TYPE_DUR_NS][iter] > 0.0 ) { |
| 1008 | tmeas[TYPE_DUR_FASTEST][iter] = tmeas[TYPE_DUR_NS][iter] / Tfastest; |
hayati ayguen | cd60b96 | 2019-12-23 11:42:31 +0100 | [diff] [blame] | 1009 | if (!array_output_format) |
| 1010 | printf("%s %.3f ", algoName[iter], tmeas[TYPE_DUR_FASTEST][iter] ); |
hayati ayguen | 4c3a87a | 2019-12-23 08:48:00 +0100 | [diff] [blame] | 1011 | } |
| 1012 | } |
hayati ayguen | cd60b96 | 2019-12-23 11:42:31 +0100 | [diff] [blame] | 1013 | if (!array_output_format) |
| 1014 | printf("\n"); |
hayati ayguen | 4c3a87a | 2019-12-23 08:48:00 +0100 | [diff] [blame] | 1015 | } |
| 1016 | |
| 1017 | { |
hayati ayguen | cd60b96 | 2019-12-23 11:42:31 +0100 | [diff] [blame] | 1018 | if (!array_output_format) |
| 1019 | printf("relative pffft: "); |
hayati ayguen | 4c3a87a | 2019-12-23 08:48:00 +0100 | [diff] [blame] | 1020 | for ( iter = 0; iter < NUM_FFT_ALGOS; ++iter ) |
| 1021 | { |
| 1022 | if ( haveAlgo[iter] && tmeas[TYPE_DUR_NS][iter] > 0.0 ) { |
hayati ayguen | b9804a2 | 2019-12-27 00:35:47 +0100 | [diff] [blame] | 1023 | tmeas[TYPE_REL_PFFFT][iter] = tmeas[TYPE_DUR_NS][iter] / tmeas[TYPE_DUR_NS][ALGO_PFFFT_O]; |
hayati ayguen | cd60b96 | 2019-12-23 11:42:31 +0100 | [diff] [blame] | 1024 | if (!array_output_format) |
| 1025 | printf("%s %.3f ", algoName[iter], tmeas[TYPE_REL_PFFFT][iter] ); |
hayati ayguen | 4c3a87a | 2019-12-23 08:48:00 +0100 | [diff] [blame] | 1026 | } |
| 1027 | } |
hayati ayguen | cd60b96 | 2019-12-23 11:42:31 +0100 | [diff] [blame] | 1028 | if (!array_output_format) |
| 1029 | printf("\n"); |
hayati ayguen | 4c3a87a | 2019-12-23 08:48:00 +0100 | [diff] [blame] | 1030 | } |
| 1031 | |
Julien Pommier | 836bc4b | 2011-11-20 10:58:07 +0100 | [diff] [blame] | 1032 | if (!array_output_format) { |
| 1033 | printf("--\n"); |
| 1034 | } |
Julien Pommier | 370d209 | 2011-11-19 18:04:25 +0100 | [diff] [blame] | 1035 | |
hayati ayguen | ca11241 | 2020-04-13 00:19:40 +0200 | [diff] [blame] | 1036 | PFFFT_FUNC(aligned_free)(X); |
| 1037 | PFFFT_FUNC(aligned_free)(Y); |
| 1038 | PFFFT_FUNC(aligned_free)(Z); |
Julien Pommier | 370d209 | 2011-11-19 18:04:25 +0100 | [diff] [blame] | 1039 | } |
| 1040 | |
hayati ayguen | ca11241 | 2020-04-13 00:19:40 +0200 | [diff] [blame] | 1041 | |
| 1042 | /* small functions inside pffft.c that will detect (compiler) bugs with respect to simd instructions */ |
| 1043 | void validate_pffft_simd(); |
| 1044 | int validate_pffft_simd_ex(FILE * DbgOut); |
| 1045 | void validate_pffftd_simd(); |
| 1046 | int validate_pffftd_simd_ex(FILE * DbgOut); |
Julien Pommier | 370d209 | 2011-11-19 18:04:25 +0100 | [diff] [blame] | 1047 | |
Julien Pommier | 836bc4b | 2011-11-20 10:58:07 +0100 | [diff] [blame] | 1048 | |
hayati ayguen | 4c3a87a | 2019-12-23 08:48:00 +0100 | [diff] [blame] | 1049 | |
| 1050 | int main(int argc, char **argv) { |
| 1051 | /* unfortunately, the fft size must be a multiple of 16 for complex FFTs |
| 1052 | and 32 for real FFTs -- a lot of stuff would need to be rewritten to |
| 1053 | handle other cases (or maybe just switch to a scalar fft, I don't know..) */ |
| 1054 | |
hayati ayguen | bc8d4a8 | 2019-12-25 01:27:33 +0100 | [diff] [blame] | 1055 | #if 0 /* include powers of 2 ? */ |
| 1056 | #define NUMNONPOW2LENS 23 |
| 1057 | int NnonPow2[NUMNONPOW2LENS] = { |
| 1058 | 64, 96, 128, 160, 192, 256, 384, 5*96, 512, 5*128, |
| 1059 | 3*256, 800, 1024, 2048, 2400, 4096, 8192, 9*1024, 16384, 32768, |
| 1060 | 256*1024, 1024*1024, -1 }; |
| 1061 | #else |
| 1062 | #define NUMNONPOW2LENS 11 |
| 1063 | int NnonPow2[NUMNONPOW2LENS] = { |
| 1064 | 96, 160, 192, 384, 5*96, 5*128,3*256, 800, 2400, 9*1024, |
| 1065 | -1 }; |
| 1066 | #endif |
| 1067 | |
hayati ayguen | 482d232 | 2020-08-26 14:51:34 +0200 | [diff] [blame] | 1068 | #define NUMPOW2FFTLENS 22 |
hayati ayguen | bc8d4a8 | 2019-12-25 01:27:33 +0100 | [diff] [blame] | 1069 | #define MAXNUMFFTLENS MAX( NUMPOW2FFTLENS, NUMNONPOW2LENS ) |
hayati ayguen | 482d232 | 2020-08-26 14:51:34 +0200 | [diff] [blame] | 1070 | int Npow2[NUMPOW2FFTLENS]; /* exp = 1 .. 21, -1 */ |
hayati ayguen | bc8d4a8 | 2019-12-25 01:27:33 +0100 | [diff] [blame] | 1071 | const int *Nvalues = NULL; |
| 1072 | double tmeas[2][MAXNUMFFTLENS][NUM_TYPES][NUM_FFT_ALGOS]; |
hayati ayguen | b9804a2 | 2019-12-27 00:35:47 +0100 | [diff] [blame] | 1073 | double iterCalReal, iterCalCplx; |
hayati ayguen | bc8d4a8 | 2019-12-25 01:27:33 +0100 | [diff] [blame] | 1074 | |
| 1075 | int benchReal=1, benchCplx=1, withFFTWfullMeas=0, outputTable2File=1, usePow2=1; |
hayati ayguen | 4c3a87a | 2019-12-23 08:48:00 +0100 | [diff] [blame] | 1076 | int realCplxIdx, typeIdx; |
| 1077 | int i, k; |
hayati ayguen | bc8d4a8 | 2019-12-25 01:27:33 +0100 | [diff] [blame] | 1078 | FILE *tableFile = NULL; |
hayati ayguen | 4c3a87a | 2019-12-23 08:48:00 +0100 | [diff] [blame] | 1079 | |
hayati ayguen | 4c3a87a | 2019-12-23 08:48:00 +0100 | [diff] [blame] | 1080 | int haveAlgo[NUM_FFT_ALGOS]; |
hayati ayguen | ca11241 | 2020-04-13 00:19:40 +0200 | [diff] [blame] | 1081 | char acCsvFilename[64]; |
hayati ayguen | bc8d4a8 | 2019-12-25 01:27:33 +0100 | [diff] [blame] | 1082 | |
| 1083 | for ( k = 1; k <= NUMPOW2FFTLENS; ++k ) |
| 1084 | Npow2[k-1] = (k == NUMPOW2FFTLENS) ? -1 : (1 << k); |
| 1085 | Nvalues = Npow2; /* set default .. for comparisons .. */ |
hayati ayguen | 4c3a87a | 2019-12-23 08:48:00 +0100 | [diff] [blame] | 1086 | |
| 1087 | for ( i = 0; i < NUM_FFT_ALGOS; ++i ) |
| 1088 | haveAlgo[i] = 0; |
| 1089 | |
hayati ayguen | ca11241 | 2020-04-13 00:19:40 +0200 | [diff] [blame] | 1090 | printf("pffft architecture: '%s'\n", PFFFT_FUNC(simd_arch)()); |
| 1091 | printf("pffft SIMD size: %d\n", PFFFT_FUNC(simd_size)()); |
| 1092 | printf("pffft min real fft: %d\n", PFFFT_FUNC(min_fft_size)(PFFFT_REAL)); |
| 1093 | printf("pffft min complex fft: %d\n", PFFFT_FUNC(min_fft_size)(PFFFT_COMPLEX)); |
| 1094 | printf("\n"); |
| 1095 | |
hayati ayguen | 4c3a87a | 2019-12-23 08:48:00 +0100 | [diff] [blame] | 1096 | for ( i = 1; i < argc; ++i ) { |
hayati ayguen | cd60b96 | 2019-12-23 11:42:31 +0100 | [diff] [blame] | 1097 | if (!strcmp(argv[i], "--array-format") || !strcmp(argv[i], "--table")) { |
hayati ayguen | 4c3a87a | 2019-12-23 08:48:00 +0100 | [diff] [blame] | 1098 | array_output_format = 1; |
| 1099 | } |
hayati ayguen | bc8d4a8 | 2019-12-25 01:27:33 +0100 | [diff] [blame] | 1100 | else if (!strcmp(argv[i], "--no-tab")) { |
hayati ayguen | cd60b96 | 2019-12-23 11:42:31 +0100 | [diff] [blame] | 1101 | array_output_format = 0; |
| 1102 | } |
hayati ayguen | bc8d4a8 | 2019-12-25 01:27:33 +0100 | [diff] [blame] | 1103 | else if (!strcmp(argv[i], "--real")) { |
hayati ayguen | 4c3a87a | 2019-12-23 08:48:00 +0100 | [diff] [blame] | 1104 | benchCplx = 0; |
| 1105 | } |
hayati ayguen | bc8d4a8 | 2019-12-25 01:27:33 +0100 | [diff] [blame] | 1106 | else if (!strcmp(argv[i], "--cplx")) { |
hayati ayguen | 4c3a87a | 2019-12-23 08:48:00 +0100 | [diff] [blame] | 1107 | benchReal = 0; |
| 1108 | } |
hayati ayguen | bc8d4a8 | 2019-12-25 01:27:33 +0100 | [diff] [blame] | 1109 | else if (!strcmp(argv[i], "--fftw-full-measure")) { |
| 1110 | withFFTWfullMeas = 1; |
| 1111 | } |
| 1112 | else if (!strcmp(argv[i], "--non-pow2")) { |
| 1113 | Nvalues = NnonPow2; |
| 1114 | usePow2 = 0; |
| 1115 | } |
| 1116 | else /* if (!strcmp(argv[i], "--help")) */ { |
| 1117 | printf("usage: %s [--array-format|--table] [--no-tab] [--real|--cplx] [--fftw-full-measure] [--non-pow2]\n", argv[0]); |
| 1118 | exit(0); |
hayati ayguen | 4c3a87a | 2019-12-23 08:48:00 +0100 | [diff] [blame] | 1119 | } |
Julien Pommier | 836bc4b | 2011-11-20 10:58:07 +0100 | [diff] [blame] | 1120 | } |
| 1121 | |
hayati ayguen | cd60b96 | 2019-12-23 11:42:31 +0100 | [diff] [blame] | 1122 | #ifdef HAVE_FFTW |
hayati ayguen | ca11241 | 2020-04-13 00:19:40 +0200 | [diff] [blame] | 1123 | #ifdef PFFFT_ENABLE_DOUBLE |
| 1124 | algoName[ALGO_FFTW_ESTIM] = "FFTW D(estim)"; |
| 1125 | algoName[ALGO_FFTW_AUTO] = "FFTW D(auto) "; |
| 1126 | #endif |
| 1127 | |
hayati ayguen | bc8d4a8 | 2019-12-25 01:27:33 +0100 | [diff] [blame] | 1128 | if (withFFTWfullMeas) |
hayati ayguen | cd60b96 | 2019-12-23 11:42:31 +0100 | [diff] [blame] | 1129 | { |
hayati ayguen | ca11241 | 2020-04-13 00:19:40 +0200 | [diff] [blame] | 1130 | #ifdef PFFFT_ENABLE_FLOAT |
| 1131 | algoName[ALGO_FFTW_AUTO] = "FFTWF(meas)"; /* "FFTW (auto)" */ |
| 1132 | #else |
| 1133 | algoName[ALGO_FFTW_AUTO] = "FFTWD(meas)"; /* "FFTW (auto)" */ |
| 1134 | #endif |
hayati ayguen | cd60b96 | 2019-12-23 11:42:31 +0100 | [diff] [blame] | 1135 | algoTableHeader[NUM_FFT_ALGOS][0] = "|real FFTWmeas "; /* "|real FFTWauto " */ |
| 1136 | algoTableHeader[NUM_FFT_ALGOS][0] = "|cplx FFTWmeas "; /* "|cplx FFTWauto " */ |
| 1137 | } |
hayati ayguen | cd60b96 | 2019-12-23 11:42:31 +0100 | [diff] [blame] | 1138 | #endif |
hayati ayguen | 4c3a87a | 2019-12-23 08:48:00 +0100 | [diff] [blame] | 1139 | |
hayati ayguen | ca11241 | 2020-04-13 00:19:40 +0200 | [diff] [blame] | 1140 | if ( PFFFT_FUNC(simd_size)() == 1 ) |
| 1141 | { |
| 1142 | algoName[ALGO_PFFFT_U] = "PFFFTU scal-1"; |
| 1143 | algoName[ALGO_PFFFT_O] = "PFFFT scal-1 "; |
| 1144 | } |
| 1145 | else if ( !strcmp(PFFFT_FUNC(simd_arch)(), "4xScalar") ) |
| 1146 | { |
| 1147 | algoName[ALGO_PFFFT_U] = "PFFFT-U scal4"; |
| 1148 | algoName[ALGO_PFFFT_O] = "PFFFT scal-4 "; |
| 1149 | } |
| 1150 | |
hayati ayguen | b9804a2 | 2019-12-27 00:35:47 +0100 | [diff] [blame] | 1151 | |
| 1152 | clock(); |
| 1153 | /* double TClockDur = 1.0 / CLOCKS_PER_SEC; |
| 1154 | printf("clock() duration for CLOCKS_PER_SEC = %f sec = %f ms\n", TClockDur, 1000.0 * TClockDur ); |
| 1155 | */ |
| 1156 | |
| 1157 | /* calibrate test duration */ |
| 1158 | { |
| 1159 | double t0, t1, dur; |
| 1160 | printf("calibrating fft benchmark duration at size N = 512 ..\n"); |
| 1161 | t0 = uclock_sec(); |
hayati ayguen | 4c3a87a | 2019-12-23 08:48:00 +0100 | [diff] [blame] | 1162 | if (benchReal) { |
hayati ayguen | b9804a2 | 2019-12-27 00:35:47 +0100 | [diff] [blame] | 1163 | iterCalReal = cal_benchmark(512, 0 /* real fft */); |
| 1164 | printf("real fft iterCal = %f\n", iterCalReal); |
Julien Pommier | 836bc4b | 2011-11-20 10:58:07 +0100 | [diff] [blame] | 1165 | } |
hayati ayguen | 4c3a87a | 2019-12-23 08:48:00 +0100 | [diff] [blame] | 1166 | if (benchCplx) { |
hayati ayguen | b9804a2 | 2019-12-27 00:35:47 +0100 | [diff] [blame] | 1167 | iterCalCplx = cal_benchmark(512, 1 /* cplx fft */); |
| 1168 | printf("cplx fft iterCal = %f\n", iterCalCplx); |
Julien Pommier | 836bc4b | 2011-11-20 10:58:07 +0100 | [diff] [blame] | 1169 | } |
hayati ayguen | b9804a2 | 2019-12-27 00:35:47 +0100 | [diff] [blame] | 1170 | t1 = uclock_sec(); |
| 1171 | dur = t1 - t0; |
hayati ayguen | ca11241 | 2020-04-13 00:19:40 +0200 | [diff] [blame] | 1172 | printf("calibration done in %f sec.\n\n", dur); |
hayati ayguen | b9804a2 | 2019-12-27 00:35:47 +0100 | [diff] [blame] | 1173 | } |
| 1174 | |
| 1175 | if (!array_output_format) { |
| 1176 | if (benchReal) { |
| 1177 | for (i=0; Nvalues[i] > 0; ++i) |
| 1178 | benchmark_ffts(Nvalues[i], 0 /* real fft */, withFFTWfullMeas, iterCalReal, tmeas[0][i], haveAlgo, NULL); |
| 1179 | } |
| 1180 | if (benchCplx) { |
| 1181 | for (i=0; Nvalues[i] > 0; ++i) |
| 1182 | benchmark_ffts(Nvalues[i], 1 /* cplx fft */, withFFTWfullMeas, iterCalCplx, tmeas[1][i], haveAlgo, NULL); |
| 1183 | } |
| 1184 | |
Julien Pommier | 836bc4b | 2011-11-20 10:58:07 +0100 | [diff] [blame] | 1185 | } else { |
Julien Pommier | 836bc4b | 2011-11-20 10:58:07 +0100 | [diff] [blame] | 1186 | |
hayati ayguen | bc8d4a8 | 2019-12-25 01:27:33 +0100 | [diff] [blame] | 1187 | if (outputTable2File) { |
| 1188 | tableFile = fopen( usePow2 ? "bench-fft-table-pow2.txt" : "bench-fft-table-non2.txt", "w"); |
| 1189 | } |
hayati ayguen | cd60b96 | 2019-12-23 11:42:31 +0100 | [diff] [blame] | 1190 | /* print table headers */ |
hayati ayguen | ca11241 | 2020-04-13 00:19:40 +0200 | [diff] [blame] | 1191 | printf("table shows MFlops; higher values indicate faster computation\n\n"); |
| 1192 | |
hayati ayguen | cd60b96 | 2019-12-23 11:42:31 +0100 | [diff] [blame] | 1193 | { |
hayati ayguen | bc8d4a8 | 2019-12-25 01:27:33 +0100 | [diff] [blame] | 1194 | print_table("| input len ", tableFile); |
hayati ayguen | cd60b96 | 2019-12-23 11:42:31 +0100 | [diff] [blame] | 1195 | for (realCplxIdx = 0; realCplxIdx < 2; ++realCplxIdx) |
| 1196 | { |
| 1197 | if ( (realCplxIdx == 0 && !benchReal) || (realCplxIdx == 1 && !benchCplx) ) |
| 1198 | continue; |
| 1199 | for (k=0; k < NUM_FFT_ALGOS; ++k) |
| 1200 | { |
| 1201 | if ( compiledInAlgo[k] ) |
hayati ayguen | bc8d4a8 | 2019-12-25 01:27:33 +0100 | [diff] [blame] | 1202 | print_table(algoTableHeader[k][realCplxIdx], tableFile); |
hayati ayguen | cd60b96 | 2019-12-23 11:42:31 +0100 | [diff] [blame] | 1203 | } |
| 1204 | } |
hayati ayguen | bc8d4a8 | 2019-12-25 01:27:33 +0100 | [diff] [blame] | 1205 | print_table("|\n", tableFile); |
hayati ayguen | cd60b96 | 2019-12-23 11:42:31 +0100 | [diff] [blame] | 1206 | } |
| 1207 | /* print table value seperators */ |
| 1208 | { |
hayati ayguen | bc8d4a8 | 2019-12-25 01:27:33 +0100 | [diff] [blame] | 1209 | print_table("|----------", tableFile); |
hayati ayguen | cd60b96 | 2019-12-23 11:42:31 +0100 | [diff] [blame] | 1210 | for (realCplxIdx = 0; realCplxIdx < 2; ++realCplxIdx) |
| 1211 | { |
| 1212 | if ( (realCplxIdx == 0 && !benchReal) || (realCplxIdx == 1 && !benchCplx) ) |
| 1213 | continue; |
| 1214 | for (k=0; k < NUM_FFT_ALGOS; ++k) |
| 1215 | { |
| 1216 | if ( compiledInAlgo[k] ) |
hayati ayguen | bc8d4a8 | 2019-12-25 01:27:33 +0100 | [diff] [blame] | 1217 | print_table(":|-------------", tableFile); |
hayati ayguen | cd60b96 | 2019-12-23 11:42:31 +0100 | [diff] [blame] | 1218 | } |
| 1219 | } |
hayati ayguen | bc8d4a8 | 2019-12-25 01:27:33 +0100 | [diff] [blame] | 1220 | print_table(":|\n", tableFile); |
hayati ayguen | cd60b96 | 2019-12-23 11:42:31 +0100 | [diff] [blame] | 1221 | } |
| 1222 | |
Julien Pommier | 836bc4b | 2011-11-20 10:58:07 +0100 | [diff] [blame] | 1223 | for (i=0; Nvalues[i] > 0; ++i) { |
hayati ayguen | 4c3a87a | 2019-12-23 08:48:00 +0100 | [diff] [blame] | 1224 | { |
hayati ayguen | b9804a2 | 2019-12-27 00:35:47 +0100 | [diff] [blame] | 1225 | double t0, t1; |
hayati ayguen | bc8d4a8 | 2019-12-25 01:27:33 +0100 | [diff] [blame] | 1226 | print_table_fftsize(Nvalues[i], tableFile); |
hayati ayguen | b9804a2 | 2019-12-27 00:35:47 +0100 | [diff] [blame] | 1227 | t0 = uclock_sec(); |
| 1228 | if (benchReal) |
| 1229 | benchmark_ffts(Nvalues[i], 0, withFFTWfullMeas, iterCalReal, tmeas[0][i], haveAlgo, tableFile); |
| 1230 | if (benchCplx) |
| 1231 | benchmark_ffts(Nvalues[i], 1, withFFTWfullMeas, iterCalCplx, tmeas[1][i], haveAlgo, tableFile); |
| 1232 | t1 = uclock_sec(); |
hayati ayguen | bc8d4a8 | 2019-12-25 01:27:33 +0100 | [diff] [blame] | 1233 | print_table("|\n", tableFile); |
hayati ayguen | b9804a2 | 2019-12-27 00:35:47 +0100 | [diff] [blame] | 1234 | /* printf("all ffts for size %d took %f sec\n", Nvalues[i], t1-t0); */ |
| 1235 | (void)t0; |
| 1236 | (void)t1; |
hayati ayguen | 4c3a87a | 2019-12-23 08:48:00 +0100 | [diff] [blame] | 1237 | } |
Julien Pommier | 836bc4b | 2011-11-20 10:58:07 +0100 | [diff] [blame] | 1238 | } |
hayati ayguen | bc8d4a8 | 2019-12-25 01:27:33 +0100 | [diff] [blame] | 1239 | fprintf(stdout, " (numbers are given in MFlops)\n"); |
| 1240 | if (outputTable2File) { |
| 1241 | fclose(tableFile); |
| 1242 | } |
Julien Pommier | 370d209 | 2011-11-19 18:04:25 +0100 | [diff] [blame] | 1243 | } |
Julien Pommier | 836bc4b | 2011-11-20 10:58:07 +0100 | [diff] [blame] | 1244 | |
hayati ayguen | bc8d4a8 | 2019-12-25 01:27:33 +0100 | [diff] [blame] | 1245 | printf("\n"); |
| 1246 | printf("now writing .csv files ..\n"); |
hayati ayguen | 4c3a87a | 2019-12-23 08:48:00 +0100 | [diff] [blame] | 1247 | |
| 1248 | for (realCplxIdx = 0; realCplxIdx < 2; ++realCplxIdx) |
| 1249 | { |
| 1250 | if ( (benchReal && realCplxIdx == 0) || (benchCplx && realCplxIdx == 1) ) |
| 1251 | { |
| 1252 | for (typeIdx = 0; typeIdx < NUM_TYPES; ++typeIdx) |
| 1253 | { |
hayati ayguen | bc8d4a8 | 2019-12-25 01:27:33 +0100 | [diff] [blame] | 1254 | FILE *f = NULL; |
hayati ayguen | b9804a2 | 2019-12-27 00:35:47 +0100 | [diff] [blame] | 1255 | if ( !(SAVE_ALL_TYPES || saveType[typeIdx]) ) |
| 1256 | continue; |
hayati ayguen | bc8d4a8 | 2019-12-25 01:27:33 +0100 | [diff] [blame] | 1257 | acCsvFilename[0] = 0; |
| 1258 | #ifdef PFFFT_SIMD_DISABLE |
| 1259 | strcat(acCsvFilename, "scal-"); |
| 1260 | #else |
| 1261 | strcat(acCsvFilename, "simd-"); |
| 1262 | #endif |
| 1263 | strcat(acCsvFilename, (realCplxIdx == 0 ? "real-" : "cplx-")); |
| 1264 | strcat(acCsvFilename, ( usePow2 ? "pow2-" : "non2-")); |
hayati ayguen | ca11241 | 2020-04-13 00:19:40 +0200 | [diff] [blame] | 1265 | assert( strlen(acCsvFilename) + strlen(typeFilenamePart[typeIdx]) + 5 < (sizeof(acCsvFilename) / sizeof(acCsvFilename[0])) ); |
hayati ayguen | bc8d4a8 | 2019-12-25 01:27:33 +0100 | [diff] [blame] | 1266 | strcat(acCsvFilename, typeFilenamePart[typeIdx]); |
| 1267 | strcat(acCsvFilename, ".csv"); |
| 1268 | f = fopen(acCsvFilename, "w"); |
| 1269 | if (!f) |
| 1270 | continue; |
hayati ayguen | 4c3a87a | 2019-12-23 08:48:00 +0100 | [diff] [blame] | 1271 | { |
hayati ayguen | bc8d4a8 | 2019-12-25 01:27:33 +0100 | [diff] [blame] | 1272 | fprintf(f, "size, log2, "); |
hayati ayguen | 4c3a87a | 2019-12-23 08:48:00 +0100 | [diff] [blame] | 1273 | for (k=0; k < NUM_FFT_ALGOS; ++k) |
| 1274 | if ( haveAlgo[k] ) |
hayati ayguen | bc8d4a8 | 2019-12-25 01:27:33 +0100 | [diff] [blame] | 1275 | fprintf(f, "%s, ", algoName[k]); |
| 1276 | fprintf(f, "\n"); |
hayati ayguen | 4c3a87a | 2019-12-23 08:48:00 +0100 | [diff] [blame] | 1277 | } |
| 1278 | for (i=0; Nvalues[i] > 0; ++i) |
| 1279 | { |
hayati ayguen | 4c3a87a | 2019-12-23 08:48:00 +0100 | [diff] [blame] | 1280 | { |
hayati ayguen | bc8d4a8 | 2019-12-25 01:27:33 +0100 | [diff] [blame] | 1281 | fprintf(f, "%d, %.3f, ", Nvalues[i], log10((double)Nvalues[i])/log10(2.0) ); |
hayati ayguen | 4c3a87a | 2019-12-23 08:48:00 +0100 | [diff] [blame] | 1282 | for (k=0; k < NUM_FFT_ALGOS; ++k) |
| 1283 | if ( haveAlgo[k] ) |
hayati ayguen | bc8d4a8 | 2019-12-25 01:27:33 +0100 | [diff] [blame] | 1284 | fprintf(f, "%f, ", tmeas[realCplxIdx][i][typeIdx][k]); |
| 1285 | fprintf(f, "\n"); |
hayati ayguen | 4c3a87a | 2019-12-23 08:48:00 +0100 | [diff] [blame] | 1286 | } |
| 1287 | } |
hayati ayguen | bc8d4a8 | 2019-12-25 01:27:33 +0100 | [diff] [blame] | 1288 | fclose(f); |
hayati ayguen | 4c3a87a | 2019-12-23 08:48:00 +0100 | [diff] [blame] | 1289 | } |
| 1290 | } |
| 1291 | } |
| 1292 | |
Julien Pommier | 370d209 | 2011-11-19 18:04:25 +0100 | [diff] [blame] | 1293 | return 0; |
| 1294 | } |
hayati ayguen | 3673ac0 | 2019-12-22 07:09:56 +0100 | [diff] [blame] | 1295 | |