hayati ayguen | b2d2936 | 2020-01-02 00:06:09 +0100 | [diff] [blame] | 1 | /* |
| 2 | Copyright (c) 2013 Julien Pommier. |
hayati ayguen | ca11241 | 2020-04-13 00:19:40 +0200 | [diff] [blame] | 3 | Copyright (c) 2019 Hayati Ayguen ( h_ayguen@web.de ) |
hayati ayguen | b2d2936 | 2020-01-02 00:06:09 +0100 | [diff] [blame] | 4 | */ |
hayati ayguen | ca11241 | 2020-04-13 00:19:40 +0200 | [diff] [blame] | 5 | |
hayati ayguen | b2d2936 | 2020-01-02 00:06:09 +0100 | [diff] [blame] | 6 | #define _WANT_SNAN 1 |
| 7 | |
| 8 | #include "pffft.h" |
| 9 | #include "pffastconv.h" |
| 10 | |
| 11 | #include <math.h> |
| 12 | #include <float.h> |
| 13 | #include <limits.h> |
| 14 | #include <inttypes.h> |
| 15 | #include <stdio.h> |
| 16 | #include <stdlib.h> |
| 17 | #include <time.h> |
| 18 | #include <assert.h> |
| 19 | #include <string.h> |
| 20 | |
| 21 | #ifdef HAVE_SYS_TIMES |
| 22 | # include <sys/times.h> |
| 23 | # include <unistd.h> |
| 24 | #endif |
| 25 | |
| 26 | /* |
| 27 | vector support macros: the rest of the code is independant of |
| 28 | SSE/Altivec/NEON -- adding support for other platforms with 4-element |
| 29 | vectors should be limited to these macros |
| 30 | */ |
hayati ayguen | ca11241 | 2020-04-13 00:19:40 +0200 | [diff] [blame] | 31 | #if 0 |
| 32 | #include "simd/pf_float.h" |
| 33 | #endif |
hayati ayguen | b2d2936 | 2020-01-02 00:06:09 +0100 | [diff] [blame] | 34 | |
| 35 | #if defined(_MSC_VER) |
| 36 | # define RESTRICT __restrict |
| 37 | #elif defined(__GNUC__) |
| 38 | # define RESTRICT __restrict |
| 39 | #else |
| 40 | # define RESTRICT |
| 41 | #endif |
| 42 | |
| 43 | |
| 44 | #if defined(_MSC_VER) |
| 45 | #pragma warning( disable : 4244 ) |
| 46 | #endif |
| 47 | |
| 48 | |
| 49 | #ifdef SNANF |
| 50 | #define INVALID_FLOAT_VAL SNANF |
| 51 | #elif defined(SNAN) |
| 52 | #define INVALID_FLOAT_VAL SNAN |
| 53 | #elif defined(NAN) |
| 54 | #define INVALID_FLOAT_VAL NAN |
| 55 | #elif defined(INFINITY) |
| 56 | #define INVALID_FLOAT_VAL INFINITY |
| 57 | #else |
| 58 | #define INVALID_FLOAT_VAL FLT_MAX |
| 59 | #endif |
| 60 | |
| 61 | |
| 62 | #if defined(HAVE_SYS_TIMES) |
| 63 | inline double uclock_sec(void) { |
| 64 | static double ttclk = 0.; |
| 65 | struct tms t; |
| 66 | if (ttclk == 0.) |
| 67 | ttclk = sysconf(_SC_CLK_TCK); |
| 68 | times(&t); |
| 69 | /* use only the user time of this process - not realtime, which depends on OS-scheduler .. */ |
| 70 | return ((double)t.tms_utime)) / ttclk; |
| 71 | } |
| 72 | # else |
| 73 | double uclock_sec(void) |
| 74 | { return (double)clock()/(double)CLOCKS_PER_SEC; } |
| 75 | #endif |
| 76 | |
| 77 | |
| 78 | |
hayati ayguen | b2d2936 | 2020-01-02 00:06:09 +0100 | [diff] [blame] | 79 | typedef int (*pfnConvolution) (void * setup, const float * X, int len, float *Y, const float *Yref, int applyFlush); |
| 80 | typedef void* (*pfnConvSetup) (float *Hfwd, int Nf, int * BlkLen, int flags); |
| 81 | typedef pfnConvolution (*pfnGetConvFnPtr) (void * setup); |
| 82 | typedef void (*pfnConvDestroy) (void * setup); |
| 83 | |
| 84 | |
| 85 | struct ConvSetup |
| 86 | { |
| 87 | pfnConvolution pfn; |
| 88 | int N; |
| 89 | int B; |
| 90 | float * H; |
| 91 | int flags; |
| 92 | }; |
| 93 | |
| 94 | |
| 95 | void * convSetupRev( float * H, int N, int * BlkLen, int flags ) |
| 96 | { |
| 97 | struct ConvSetup * s = pffastconv_malloc( sizeof(struct ConvSetup) ); |
| 98 | int i, Nr = N; |
| 99 | if (flags & PFFASTCONV_CPLX_INP_OUT) |
| 100 | Nr *= 2; |
| 101 | Nr += 4; |
| 102 | s->pfn = NULL; |
| 103 | s->N = N; |
| 104 | s->B = *BlkLen; |
| 105 | s->H = pffastconv_malloc((unsigned)Nr * sizeof(float)); |
| 106 | s->flags = flags; |
| 107 | memset(s->H, 0, (unsigned)Nr * sizeof(float)); |
| 108 | if (flags & PFFASTCONV_CPLX_INP_OUT) |
| 109 | { |
| 110 | for ( i = 0; i < N; ++i ) { |
| 111 | s->H[2*(N-1 -i) ] = H[i]; |
| 112 | s->H[2*(N-1 -i)+1] = H[i]; |
| 113 | } |
| 114 | /* simpler detection of overruns */ |
| 115 | s->H[ 2*N ] = INVALID_FLOAT_VAL; |
| 116 | s->H[ 2*N +1 ] = INVALID_FLOAT_VAL; |
| 117 | s->H[ 2*N +2 ] = INVALID_FLOAT_VAL; |
| 118 | s->H[ 2*N +3 ] = INVALID_FLOAT_VAL; |
| 119 | } |
| 120 | else |
| 121 | { |
| 122 | for ( i = 0; i < N; ++i ) |
| 123 | s->H[ N-1 -i ] = H[i]; |
| 124 | /* simpler detection of overruns */ |
| 125 | s->H[ N ] = INVALID_FLOAT_VAL; |
| 126 | s->H[ N +1 ] = INVALID_FLOAT_VAL; |
| 127 | s->H[ N +2 ] = INVALID_FLOAT_VAL; |
| 128 | s->H[ N +3 ] = INVALID_FLOAT_VAL; |
| 129 | } |
| 130 | return s; |
| 131 | } |
| 132 | |
| 133 | void convDestroyRev( void * setup ) |
| 134 | { |
| 135 | struct ConvSetup * s = (struct ConvSetup*)setup; |
| 136 | pffastconv_free(s->H); |
| 137 | pffastconv_free(setup); |
| 138 | } |
| 139 | |
| 140 | |
| 141 | pfnConvolution ConvGetFnPtrRev( void * setup ) |
| 142 | { |
| 143 | struct ConvSetup * s = (struct ConvSetup*)setup; |
| 144 | if (!s) |
| 145 | return NULL; |
| 146 | return s->pfn; |
| 147 | } |
| 148 | |
| 149 | |
| 150 | void convSimdDestroy( void * setup ) |
| 151 | { |
| 152 | convDestroyRev(setup); |
| 153 | } |
| 154 | |
| 155 | |
| 156 | void * fastConvSetup( float * H, int N, int * BlkLen, int flags ) |
| 157 | { |
| 158 | void * p = pffastconv_new_setup( H, N, BlkLen, flags ); |
| 159 | if (!p) |
| 160 | printf("fastConvSetup(N = %d, *BlkLen = %d, flags = %d) = NULL\n", N, *BlkLen, flags); |
| 161 | return p; |
| 162 | } |
| 163 | |
| 164 | |
| 165 | void fastConvDestroy( void * setup ) |
| 166 | { |
| 167 | pffastconv_destroy_setup( (PFFASTCONV_Setup*)setup ); |
| 168 | } |
| 169 | |
| 170 | |
| 171 | |
| 172 | int slow_conv_R(void * setup, const float * input, int len, float *output, const float *Yref, int applyFlush) |
| 173 | { |
| 174 | struct ConvSetup * p = (struct ConvSetup*)setup; |
| 175 | const float * RESTRICT X = input; |
| 176 | const float * RESTRICT Hrev = p->H; |
| 177 | float * RESTRICT Y = output; |
| 178 | const int Nr = ((p->flags & PFFASTCONV_CPLX_INP_OUT) ? 2 : 1) * p->N; |
| 179 | const int lenNr = ((p->flags & PFFASTCONV_CPLX_INP_OUT) ? 2 : 1) * (len - p->N); |
| 180 | int i, j; |
| 181 | (void)Yref; |
| 182 | (void)applyFlush; |
| 183 | |
| 184 | if (p->flags & PFFASTCONV_CPLX_INP_OUT) |
| 185 | { |
| 186 | for ( i = 0; i <= lenNr; i += 2 ) |
| 187 | { |
| 188 | float sumRe = 0.0F, sumIm = 0.0F; |
| 189 | for ( j = 0; j < Nr; j += 2 ) |
| 190 | { |
| 191 | sumRe += X[i+j ] * Hrev[j]; |
| 192 | sumIm += X[i+j+1] * Hrev[j+1]; |
| 193 | } |
| 194 | Y[i ] = sumRe; |
| 195 | Y[i+1] = sumIm; |
| 196 | } |
| 197 | return i/2; |
| 198 | } |
| 199 | else |
| 200 | { |
| 201 | for ( i = 0; i <= lenNr; ++i ) |
| 202 | { |
| 203 | float sum = 0.0F; |
| 204 | for (j = 0; j < Nr; ++j ) |
| 205 | sum += X[i+j] * Hrev[j]; |
| 206 | Y[i] = sum; |
| 207 | } |
| 208 | return i; |
| 209 | } |
| 210 | } |
| 211 | |
| 212 | |
| 213 | |
| 214 | int slow_conv_A(void * setup, const float * input, int len, float *output, const float *Yref, int applyFlush) |
| 215 | { |
| 216 | float sum[4]; |
| 217 | struct ConvSetup * p = (struct ConvSetup*)setup; |
| 218 | const float * RESTRICT X = input; |
| 219 | const float * RESTRICT Hrev = p->H; |
| 220 | float * RESTRICT Y = output; |
| 221 | const int Nr = ((p->flags & PFFASTCONV_CPLX_INP_OUT) ? 2 : 1) * p->N; |
| 222 | const int lenNr = ((p->flags & PFFASTCONV_CPLX_INP_OUT) ? 2 : 1) * (len - p->N); |
| 223 | int i, j; |
| 224 | (void)Yref; |
| 225 | (void)applyFlush; |
| 226 | |
| 227 | if (p->flags & PFFASTCONV_CPLX_INP_OUT) |
| 228 | { |
| 229 | if ( (Nr & 3) == 0 ) |
| 230 | { |
| 231 | for ( i = 0; i <= lenNr; i += 2 ) |
| 232 | { |
| 233 | sum[0] = sum[1] = sum[2] = sum[3] = 0.0F; |
| 234 | for (j = 0; j < Nr; j += 4 ) |
| 235 | { |
| 236 | sum[0] += X[i+j] * Hrev[j]; |
| 237 | sum[1] += X[i+j+1] * Hrev[j+1]; |
| 238 | sum[2] += X[i+j+2] * Hrev[j+2]; |
| 239 | sum[3] += X[i+j+3] * Hrev[j+3]; |
| 240 | } |
| 241 | Y[i ] = sum[0] + sum[2]; |
| 242 | Y[i+1] = sum[1] + sum[3]; |
| 243 | } |
| 244 | } |
| 245 | else |
| 246 | { |
| 247 | const int M = Nr & (~3); |
| 248 | for ( i = 0; i <= lenNr; i += 2 ) |
| 249 | { |
| 250 | float tailSumRe = 0.0F, tailSumIm = 0.0F; |
| 251 | sum[0] = sum[1] = sum[2] = sum[3] = 0.0F; |
| 252 | for (j = 0; j < M; j += 4 ) |
| 253 | { |
| 254 | sum[0] += X[i+j ] * Hrev[j ]; |
| 255 | sum[1] += X[i+j+1] * Hrev[j+1]; |
| 256 | sum[2] += X[i+j+2] * Hrev[j+2]; |
| 257 | sum[3] += X[i+j+3] * Hrev[j+3]; |
| 258 | } |
| 259 | for ( ; j < Nr; j += 2 ) { |
| 260 | tailSumRe += X[i+j ] * Hrev[j ]; |
| 261 | tailSumIm += X[i+j+1] * Hrev[j+1]; |
| 262 | } |
| 263 | Y[i ] = ( sum[0] + sum[2] ) + tailSumRe; |
| 264 | Y[i+1] = ( sum[1] + sum[3] ) + tailSumIm; |
| 265 | } |
| 266 | } |
| 267 | return i/2; |
| 268 | } |
| 269 | else |
| 270 | { |
| 271 | if ( (Nr & 3) == 0 ) |
| 272 | { |
| 273 | for ( i = 0; i <= lenNr; ++i ) |
| 274 | { |
| 275 | sum[0] = sum[1] = sum[2] = sum[3] = 0.0F; |
| 276 | for (j = 0; j < Nr; j += 4 ) |
| 277 | { |
| 278 | sum[0] += X[i+j] * Hrev[j]; |
| 279 | sum[1] += X[i+j+1] * Hrev[j+1]; |
| 280 | sum[2] += X[i+j+2] * Hrev[j+2]; |
| 281 | sum[3] += X[i+j+3] * Hrev[j+3]; |
| 282 | } |
| 283 | Y[i] = sum[0] + sum[1] + sum[2] + sum[3]; |
| 284 | } |
| 285 | return i; |
| 286 | } |
| 287 | else |
| 288 | { |
| 289 | const int M = Nr & (~3); |
| 290 | /* printf("A: Nr = %d, M = %d, H[M] = %f, H[M+1] = %f, H[M+2] = %f, H[M+3] = %f\n", Nr, M, Hrev[M], Hrev[M+1], Hrev[M+2], Hrev[M+3] ); */ |
| 291 | for ( i = 0; i <= lenNr; ++i ) |
| 292 | { |
| 293 | float tailSum = 0.0; |
| 294 | sum[0] = sum[1] = sum[2] = sum[3] = 0.0F; |
| 295 | for (j = 0; j < M; j += 4 ) |
| 296 | { |
| 297 | sum[0] += X[i+j] * Hrev[j]; |
| 298 | sum[1] += X[i+j+1] * Hrev[j+1]; |
| 299 | sum[2] += X[i+j+2] * Hrev[j+2]; |
| 300 | sum[3] += X[i+j+3] * Hrev[j+3]; |
| 301 | } |
| 302 | for ( ; j < Nr; ++j ) |
| 303 | tailSum += X[i+j] * Hrev[j]; |
| 304 | Y[i] = (sum[0] + sum[1]) + (sum[2] + sum[3]) + tailSum; |
| 305 | } |
| 306 | return i; |
| 307 | } |
| 308 | } |
| 309 | } |
| 310 | |
| 311 | |
| 312 | int slow_conv_B(void * setup, const float * input, int len, float *output, const float *Yref, int applyFlush) |
| 313 | { |
| 314 | float sum[4]; |
| 315 | struct ConvSetup * p = (struct ConvSetup*)setup; |
| 316 | (void)Yref; |
| 317 | (void)applyFlush; |
| 318 | if (p->flags & PFFASTCONV_SYMMETRIC) |
| 319 | { |
| 320 | const float * RESTRICT X = input; |
| 321 | const float * RESTRICT Hrev = p->H; |
| 322 | float * RESTRICT Y = output; |
| 323 | const int Nr = ((p->flags & PFFASTCONV_CPLX_INP_OUT) ? 2 : 1) * p->N; |
| 324 | const int lenNr = ((p->flags & PFFASTCONV_CPLX_INP_OUT) ? 2 : 1) * (len - p->N); |
| 325 | const int h = Nr / 2 -4; |
| 326 | const int E = Nr -4; |
| 327 | int i, j; |
| 328 | |
| 329 | if (p->flags & PFFASTCONV_CPLX_INP_OUT) |
| 330 | { |
| 331 | for ( i = 0; i <= lenNr; i += 2 ) |
| 332 | { |
| 333 | const int k = i + E; |
| 334 | sum[0] = sum[1] = sum[2] = sum[3] = 0.0F; |
| 335 | for (j = 0; j <= h; j += 4 ) |
| 336 | { |
| 337 | sum[0] += Hrev[j ] * ( X[i+j ] + X[k-j+2] ); |
| 338 | sum[1] += Hrev[j+1] * ( X[i+j+1] + X[k-j+3] ); |
| 339 | sum[2] += Hrev[j+2] * ( X[i+j+2] + X[k-j ] ); |
| 340 | sum[3] += Hrev[j+3] * ( X[i+j+3] + X[k-j+1] ); |
| 341 | } |
| 342 | Y[i ] = sum[0] + sum[2]; |
| 343 | Y[i+1] = sum[1] + sum[3]; |
| 344 | } |
| 345 | return i/2; |
| 346 | } |
| 347 | else |
| 348 | { |
| 349 | for ( i = 0; i <= lenNr; ++i ) |
| 350 | { |
| 351 | const int k = i + E; |
| 352 | sum[0] = sum[1] = sum[2] = sum[3] = 0.0F; |
| 353 | for (j = 0; j <= h; j += 4 ) |
| 354 | { |
| 355 | sum[0] += Hrev[j ] * ( X[i+j ] + X[k-j+3] ); |
| 356 | sum[1] += Hrev[j+1] * ( X[i+j+1] + X[k-j+2] ); |
| 357 | sum[2] += Hrev[j+2] * ( X[i+j+2] + X[k-j+1] ); |
| 358 | sum[3] += Hrev[j+3] * ( X[i+j+3] + X[k-j ] ); |
| 359 | } |
| 360 | Y[i] = sum[0] + sum[1] + sum[2] + sum[3]; |
| 361 | } |
| 362 | return i; |
| 363 | } |
| 364 | } |
| 365 | else |
| 366 | { |
| 367 | const float * RESTRICT X = input; |
| 368 | const float * RESTRICT Hrev = p->H; |
| 369 | float * RESTRICT Y = output; |
| 370 | const int Nr = ((p->flags & PFFASTCONV_CPLX_INP_OUT) ? 2 : 1) * p->N; |
| 371 | const int lenNr = ((p->flags & PFFASTCONV_CPLX_INP_OUT) ? 2 : 1) * (len - p->N); |
| 372 | int i, j; |
| 373 | |
| 374 | if (p->flags & PFFASTCONV_CPLX_INP_OUT) |
| 375 | { |
| 376 | for ( i = 0; i <= lenNr; i += 2 ) |
| 377 | { |
| 378 | sum[0] = sum[1] = sum[2] = sum[3] = 0.0F; |
| 379 | for (j = 0; j < Nr; j += 4 ) |
| 380 | { |
| 381 | sum[0] += X[i+j] * Hrev[j]; |
| 382 | sum[1] += X[i+j+1] * Hrev[j+1]; |
| 383 | sum[2] += X[i+j+2] * Hrev[j+2]; |
| 384 | sum[3] += X[i+j+3] * Hrev[j+3]; |
| 385 | } |
| 386 | Y[i ] = sum[0] + sum[2]; |
| 387 | Y[i+1] = sum[1] + sum[3]; |
| 388 | } |
| 389 | return i/2; |
| 390 | } |
| 391 | else |
| 392 | { |
| 393 | if ( (Nr & 3) == 0 ) |
| 394 | { |
| 395 | for ( i = 0; i <= lenNr; ++i ) |
| 396 | { |
| 397 | sum[0] = sum[1] = sum[2] = sum[3] = 0.0F; |
| 398 | for (j = 0; j < Nr; j += 4 ) |
| 399 | { |
| 400 | sum[0] += X[i+j] * Hrev[j]; |
| 401 | sum[1] += X[i+j+1] * Hrev[j+1]; |
| 402 | sum[2] += X[i+j+2] * Hrev[j+2]; |
| 403 | sum[3] += X[i+j+3] * Hrev[j+3]; |
| 404 | } |
| 405 | Y[i] = (sum[0] + sum[1]) + (sum[2] + sum[3]); |
| 406 | } |
| 407 | return i; |
| 408 | } |
| 409 | else |
| 410 | { |
| 411 | const int M = Nr & (~3); |
| 412 | /* printf("B: Nr = %d\n", Nr ); */ |
| 413 | for ( i = 0; i <= lenNr; ++i ) |
| 414 | { |
| 415 | float tailSum = 0.0; |
| 416 | sum[0] = sum[1] = sum[2] = sum[3] = 0.0F; |
| 417 | for (j = 0; j < M; j += 4 ) |
| 418 | { |
| 419 | sum[0] += X[i+j] * Hrev[j]; |
| 420 | sum[1] += X[i+j+1] * Hrev[j+1]; |
| 421 | sum[2] += X[i+j+2] * Hrev[j+2]; |
| 422 | sum[3] += X[i+j+3] * Hrev[j+3]; |
| 423 | } |
| 424 | for ( ; j < Nr; ++j ) |
| 425 | tailSum += X[i+j] * Hrev[j]; |
| 426 | Y[i] = (sum[0] + sum[1]) + (sum[2] + sum[3]) + tailSum; |
| 427 | } |
| 428 | return i; |
| 429 | } |
| 430 | } |
| 431 | } |
| 432 | |
| 433 | } |
| 434 | |
| 435 | |
hayati ayguen | b2d2936 | 2020-01-02 00:06:09 +0100 | [diff] [blame] | 436 | int fast_conv(void * setup, const float * X, int len, float *Y, const float *Yref, int applyFlush) |
| 437 | { |
| 438 | (void)Yref; |
| 439 | return pffastconv_apply( (PFFASTCONV_Setup*)setup, X, len, Y, applyFlush ); |
| 440 | } |
| 441 | |
| 442 | |
| 443 | |
| 444 | void printFirst( const float * V, const char * st, const int N, const int perLine ) |
| 445 | { |
| 446 | (void)V; (void)st; (void)N; (void)perLine; |
| 447 | return; |
| 448 | #if 0 |
| 449 | int i; |
| 450 | for ( i = 0; i < N; ++i ) |
| 451 | { |
| 452 | if ( (i % perLine) == 0 ) |
| 453 | printf("\n%s[%d]", st, i); |
| 454 | printf("\t%.1f", V[i]); |
| 455 | } |
| 456 | printf("\n"); |
| 457 | #endif |
| 458 | } |
| 459 | |
| 460 | |
| 461 | |
| 462 | #define NUMY 11 |
| 463 | |
| 464 | |
| 465 | int test(int FILTERLEN, int convFlags, const int testOutLen, int printDbg, int printSpeed) { |
| 466 | double t0, t1, tstop, td, tdref; |
| 467 | float *X, *H; |
| 468 | float *Y[NUMY]; |
| 469 | int64_t outN[NUMY]; |
| 470 | /* 256 KFloats or 16 MFloats data */ |
| 471 | #if 1 |
| 472 | const int len = testOutLen ? (1 << 18) : (1 << 24); |
| 473 | #elif 0 |
| 474 | const int len = testOutLen ? (1 << 18) : (1 << 13); |
| 475 | #else |
| 476 | const int len = testOutLen ? (1 << 18) : (1024); |
| 477 | #endif |
| 478 | const int cplxFactor = ( convFlags & PFFASTCONV_CPLX_INP_OUT ) ? 2 : 1; |
| 479 | const int lenC = len / cplxFactor; |
| 480 | |
| 481 | int yi, yc, posMaxErr; |
| 482 | float yRangeMin, yRangeMax, yErrLimit, maxErr = 0.0; |
| 483 | int i, j, numErrOverLimit, iter; |
| 484 | int retErr = 0; |
| 485 | |
hayati ayguen | ca11241 | 2020-04-13 00:19:40 +0200 | [diff] [blame] | 486 | /* 0 1 2 3 4 5 6 7 8 9 */ |
| 487 | pfnConvSetup aSetup[NUMY] = { convSetupRev, convSetupRev, convSetupRev, fastConvSetup, fastConvSetup, fastConvSetup, fastConvSetup, fastConvSetup, fastConvSetup, fastConvSetup }; |
| 488 | pfnConvDestroy aDestroy[NUMY] = { convDestroyRev, convDestroyRev, convDestroyRev, fastConvDestroy, fastConvDestroy, fastConvDestroy, fastConvDestroy, fastConvDestroy, fastConvDestroy, fastConvDestroy }; |
| 489 | pfnGetConvFnPtr aGetFnPtr[NUMY] = { NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, }; |
| 490 | pfnConvolution aConv[NUMY] = { slow_conv_R, slow_conv_A, slow_conv_B, fast_conv, fast_conv, fast_conv, fast_conv, fast_conv, fast_conv, fast_conv }; |
| 491 | const char * convText[NUMY] = { "R(non-simd)", "A(non-simd)", "B(non-simd)", "fast_conv_64", "fast_conv_128", "fast_conv_256", "fast_conv_512", "fast_conv_1K", "fast_conv_2K", "fast_conv_4K" }; |
| 492 | int aFastAlgo[NUMY] = { 0, 0, 0, 1, 1, 1, 1, 1, 1, 1 }; |
| 493 | void * aSetupCfg[NUMY] = { NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL }; |
| 494 | int aBlkLen[NUMY] = { 1024, 1024, 1024, 64, 128, 256, 512, 1024, 2048, 4096 }; |
hayati ayguen | b2d2936 | 2020-01-02 00:06:09 +0100 | [diff] [blame] | 495 | #if 1 |
hayati ayguen | ca11241 | 2020-04-13 00:19:40 +0200 | [diff] [blame] | 496 | int aRunAlgo[NUMY] = { 1, 1, 1, FILTERLEN<64, FILTERLEN<128, FILTERLEN<256, FILTERLEN<512, FILTERLEN<1024, FILTERLEN<2048, FILTERLEN<4096 }; |
| 497 | #elif 0 |
| 498 | int aRunAlgo[NUMY] = { 1, 0, 0, 0 && FILTERLEN<64, 1 && FILTERLEN<128, 1 && FILTERLEN<256, 0 && FILTERLEN<512, 0 && FILTERLEN<1024, 0 && FILTERLEN<2048, 0 && FILTERLEN<4096 }; |
hayati ayguen | b2d2936 | 2020-01-02 00:06:09 +0100 | [diff] [blame] | 499 | #else |
hayati ayguen | ca11241 | 2020-04-13 00:19:40 +0200 | [diff] [blame] | 500 | int aRunAlgo[NUMY] = { 1, 1, 1, 0 && FILTERLEN<64, 0 && FILTERLEN<128, 1 && FILTERLEN<256, 0 && FILTERLEN<512, 0 && FILTERLEN<1024, 0 && FILTERLEN<2048, 0 && FILTERLEN<4096 }; |
hayati ayguen | b2d2936 | 2020-01-02 00:06:09 +0100 | [diff] [blame] | 501 | #endif |
| 502 | double aSpeedFactor[NUMY], aDuration[NUMY], procSmpPerSec[NUMY]; |
| 503 | |
| 504 | X = pffastconv_malloc( (unsigned)(len+4) * sizeof(float) ); |
| 505 | for ( i=0; i < NUMY; ++i) |
| 506 | { |
hayati ayguen | ca11241 | 2020-04-13 00:19:40 +0200 | [diff] [blame] | 507 | if ( 1 || i < 2 ) |
hayati ayguen | b2d2936 | 2020-01-02 00:06:09 +0100 | [diff] [blame] | 508 | Y[i] = pffastconv_malloc( (unsigned)len * sizeof(float) ); |
| 509 | else |
| 510 | Y[i] = Y[1]; |
| 511 | |
hayati ayguen | e6cffc9 | 2020-02-28 19:57:30 +0100 | [diff] [blame] | 512 | Y[i][0] = 123.F; /* test for pffft_zconvolve_no_accu() */ |
hayati ayguen | b2d2936 | 2020-01-02 00:06:09 +0100 | [diff] [blame] | 513 | aSpeedFactor[i] = -1.0; |
| 514 | aDuration[i] = -1.0; |
| 515 | procSmpPerSec[i] = -1.0; |
| 516 | } |
| 517 | |
| 518 | H = pffastconv_malloc((unsigned)FILTERLEN * sizeof(float)); |
| 519 | |
| 520 | /* initialize input */ |
| 521 | if ( convFlags & PFFASTCONV_CPLX_INP_OUT ) |
| 522 | { |
| 523 | for ( i = 0; i < lenC; ++i ) |
| 524 | { |
| 525 | X[2*i ] = (float)(i % 4093); /* 4094 is a prime number. see https://en.wikipedia.org/wiki/List_of_prime_numbers */ |
| 526 | X[2*i+1] = (float)((i+2048) % 4093); |
| 527 | } |
| 528 | } |
| 529 | else |
| 530 | { |
| 531 | for ( i = 0; i < len; ++i ) |
| 532 | X[i] = (float)(i % 4093); /* 4094 is a prime number. see https://en.wikipedia.org/wiki/List_of_prime_numbers */ |
| 533 | } |
| 534 | X[ len ] = INVALID_FLOAT_VAL; |
| 535 | X[ len +1 ] = INVALID_FLOAT_VAL; |
| 536 | X[ len +2 ] = INVALID_FLOAT_VAL; |
| 537 | X[ len +3 ] = INVALID_FLOAT_VAL; |
| 538 | |
| 539 | if (!testOutLen) |
| 540 | printFirst( X, "X", 64, 8 ); |
| 541 | |
| 542 | /* filter coeffs */ |
| 543 | memset( H, 0, FILTERLEN * sizeof(float) ); |
| 544 | #if 1 |
| 545 | if ( convFlags & PFFASTCONV_SYMMETRIC ) |
| 546 | { |
| 547 | const int half = FILTERLEN / 2; |
| 548 | for ( j = 0; j < half; ++j ) { |
| 549 | switch (j % 3) { |
| 550 | case 0: H[j] = H[FILTERLEN-1-j] = -1.0F; break; |
| 551 | case 1: H[j] = H[FILTERLEN-1-j] = 1.0F; break; |
| 552 | case 2: H[j] = H[FILTERLEN-1-j] = 0.5F; break; |
| 553 | } |
| 554 | } |
| 555 | } |
| 556 | else |
| 557 | { |
| 558 | for ( j = 0; j < FILTERLEN; ++j ) { |
| 559 | switch (j % 3) { |
| 560 | case 0: H[j] = -1.0F; break; |
| 561 | case 1: H[j] = 1.0F; break; |
| 562 | case 2: H[j] = 0.5F; break; |
| 563 | } |
| 564 | } |
| 565 | } |
| 566 | #else |
| 567 | H[0] = 1.0F; |
| 568 | H[FILTERLEN -1] = 1.0F; |
| 569 | #endif |
| 570 | if (!testOutLen) |
| 571 | printFirst( H, "H", FILTERLEN, 8 ); |
| 572 | |
| 573 | printf("\n"); |
| 574 | printf("filterLen = %d\t%s%s\t%s:\n", FILTERLEN, |
| 575 | ((convFlags & PFFASTCONV_CPLX_INP_OUT)?"cplx":"real"), |
| 576 | (convFlags & PFFASTCONV_CPLX_INP_OUT)?((convFlags & PFFASTCONV_CPLX_SINGLE_FFT)?" single":" 2x") : "", |
| 577 | ((convFlags & PFFASTCONV_SYMMETRIC)?"symmetric":"non-sym") ); |
| 578 | |
| 579 | while (1) |
| 580 | { |
| 581 | |
| 582 | for ( yi = 0; yi < NUMY; ++yi ) |
| 583 | { |
| 584 | if (!aRunAlgo[yi]) |
| 585 | continue; |
| 586 | |
| 587 | aSetupCfg[yi] = aSetup[yi]( H, FILTERLEN, &aBlkLen[yi], convFlags ); |
| 588 | |
| 589 | /* get effective apply function ptr */ |
| 590 | if ( aSetupCfg[yi] && aGetFnPtr[yi] ) |
| 591 | aConv[yi] = aGetFnPtr[yi]( aSetupCfg[yi] ); |
| 592 | |
| 593 | if ( aSetupCfg[yi] && aConv[yi] ) { |
| 594 | if (testOutLen) |
| 595 | { |
| 596 | t0 = uclock_sec(); |
| 597 | outN[yi] = aConv[yi]( aSetupCfg[yi], X, lenC, Y[yi], Y[0], 1 /* applyFlush */ ); |
| 598 | t1 = uclock_sec(); |
| 599 | td = t1 - t0; |
| 600 | } |
| 601 | else |
| 602 | { |
| 603 | const int blkLen = 4096; /* required for 'fast_conv_4K' */ |
| 604 | int64_t offC = 0, offS, Nout; |
| 605 | int k; |
| 606 | iter = 0; |
| 607 | outN[yi] = 0; |
| 608 | t0 = uclock_sec(); |
| 609 | tstop = t0 + 0.25; /* benchmark duration: 250 ms */ |
| 610 | do { |
| 611 | for ( k = 0; k < 128 && offC +blkLen < lenC; ++k ) |
| 612 | { |
| 613 | offS = cplxFactor * offC; |
| 614 | Nout = aConv[yi]( aSetupCfg[yi], X +offS, blkLen, Y[yi] +offS, Y[0], (offC +blkLen >= lenC) /* applyFlush */ ); |
| 615 | offC += Nout; |
| 616 | ++iter; |
| 617 | if ( !Nout ) |
| 618 | break; |
| 619 | if ( offC +blkLen >= lenC ) |
| 620 | { |
| 621 | outN[yi] += offC; |
| 622 | offC = 0; |
| 623 | } |
| 624 | } |
| 625 | t1 = uclock_sec(); |
| 626 | } while ( t1 < tstop ); |
| 627 | outN[yi] += offC; |
| 628 | td = t1 - t0; |
hayati ayguen | e6cffc9 | 2020-02-28 19:57:30 +0100 | [diff] [blame] | 629 | procSmpPerSec[yi] = cplxFactor * (double)outN[yi] / td; |
hayati ayguen | b2d2936 | 2020-01-02 00:06:09 +0100 | [diff] [blame] | 630 | } |
| 631 | } |
| 632 | else |
| 633 | { |
| 634 | t0 = t1 = td = 0.0; |
| 635 | outN[yi] = 0; |
| 636 | } |
| 637 | aDuration[yi] = td; |
| 638 | if ( yi == 0 ) { |
| 639 | const float * Yvals = Y[0]; |
| 640 | const int64_t refOutLen = cplxFactor * outN[0]; |
| 641 | tdref = td; |
| 642 | if (printDbg) { |
| 643 | printf("convolution '%s' took: %f ms\n", convText[yi], td*1000.0); |
| 644 | printf(" convolution '%s' output size %" PRId64 " == (cplx) len %d + %" PRId64 "\n", convText[yi], outN[yi], len / cplxFactor, outN[yi] - len / cplxFactor); |
| 645 | } |
| 646 | aSpeedFactor[yi] = 1.0; |
| 647 | /* */ |
| 648 | yRangeMin = FLT_MAX; |
| 649 | yRangeMax = FLT_MIN; |
| 650 | for ( i = 0; i < refOutLen; ++i ) |
| 651 | { |
| 652 | if ( yRangeMax < Yvals[i] ) yRangeMax = Yvals[i]; |
| 653 | if ( yRangeMin > Yvals[i] ) yRangeMin = Yvals[i]; |
| 654 | } |
| 655 | yErrLimit = fabsf(yRangeMax - yRangeMin) / ( 100.0F * 1000.0F ); |
| 656 | /* yErrLimit = 0.01F; */ |
| 657 | if (testOutLen) { |
| 658 | if (1) { |
| 659 | printf("reference output len = %" PRId64 " smp\n", outN[0]); |
| 660 | printf("reference output range |%.1f ..%.1f| = %.1f ==> err limit = %f\n", yRangeMin, yRangeMax, yRangeMax - yRangeMin, yErrLimit); |
| 661 | } |
| 662 | printFirst( Yvals, "Yref", 64, 8 ); |
| 663 | } |
| 664 | } |
| 665 | else |
| 666 | { |
| 667 | aSpeedFactor[yi] = tdref / td; |
| 668 | if (printDbg) { |
| 669 | printf("\nconvolution '%s' took: %f ms == %f %% == %f X\n", convText[yi], td*1000.0, td * 100 / tdref, tdref / td); |
| 670 | printf(" convolution '%s' output size %" PRId64 " == (cplx) len %d + %" PRId64 "\n", convText[yi], outN[yi], len / cplxFactor, outN[yi] - len / cplxFactor); |
| 671 | } |
| 672 | } |
| 673 | } |
| 674 | |
| 675 | int iMaxSpeedSlowAlgo = -1; |
| 676 | int iFirstFastAlgo = -1; |
| 677 | int iMaxSpeedFastAlgo = -1; |
| 678 | int iPrintedRefOutLen = 0; |
| 679 | { |
| 680 | for ( yc = 1; yc < NUMY; ++yc ) |
| 681 | { |
| 682 | if (!aRunAlgo[yc]) |
| 683 | continue; |
| 684 | if (aFastAlgo[yc]) { |
| 685 | if ( iMaxSpeedFastAlgo < 0 || aSpeedFactor[yc] > aSpeedFactor[iMaxSpeedFastAlgo] ) |
| 686 | iMaxSpeedFastAlgo = yc; |
| 687 | |
| 688 | if (iFirstFastAlgo < 0) |
| 689 | iFirstFastAlgo = yc; |
| 690 | } |
| 691 | else |
| 692 | { |
| 693 | if ( iMaxSpeedSlowAlgo < 0 || aSpeedFactor[yc] > aSpeedFactor[iMaxSpeedSlowAlgo] ) |
| 694 | iMaxSpeedSlowAlgo = yc; |
| 695 | } |
| 696 | } |
| 697 | |
| 698 | if (printSpeed) |
| 699 | { |
| 700 | if (testOutLen) |
| 701 | { |
| 702 | if (iMaxSpeedSlowAlgo >= 0 ) |
| 703 | printf("fastest slow algorithm is '%s' at speed %f X ; abs duration %f ms\n", convText[iMaxSpeedSlowAlgo], aSpeedFactor[iMaxSpeedSlowAlgo], 1000.0 * aDuration[iMaxSpeedSlowAlgo]); |
hayati ayguen | ca11241 | 2020-04-13 00:19:40 +0200 | [diff] [blame] | 704 | if (0 != iMaxSpeedSlowAlgo && aRunAlgo[0]) |
hayati ayguen | b2d2936 | 2020-01-02 00:06:09 +0100 | [diff] [blame] | 705 | printf("slow algorithm '%s' at speed %f X ; abs duration %f ms\n", convText[0], aSpeedFactor[0], 1000.0 * aDuration[0]); |
hayati ayguen | ca11241 | 2020-04-13 00:19:40 +0200 | [diff] [blame] | 706 | if (1 != iMaxSpeedSlowAlgo && aRunAlgo[1]) |
hayati ayguen | b2d2936 | 2020-01-02 00:06:09 +0100 | [diff] [blame] | 707 | printf("slow algorithm '%s' at speed %f X ; abs duration %f ms\n", convText[1], aSpeedFactor[1], 1000.0 * aDuration[1]); |
hayati ayguen | b2d2936 | 2020-01-02 00:06:09 +0100 | [diff] [blame] | 708 | |
| 709 | if (iFirstFastAlgo >= 0 && iFirstFastAlgo != iMaxSpeedFastAlgo && aRunAlgo[iFirstFastAlgo]) |
| 710 | printf("first fast algorithm is '%s' at speed %f X ; abs duration %f ms\n", convText[iFirstFastAlgo], aSpeedFactor[iFirstFastAlgo], 1000.0 * aDuration[iFirstFastAlgo]); |
| 711 | if (iFirstFastAlgo >= 0 && iFirstFastAlgo+1 != iMaxSpeedFastAlgo && iFirstFastAlgo+1 < NUMY && aRunAlgo[iFirstFastAlgo+1]) |
| 712 | printf("2nd fast algorithm is '%s' at speed %f X ; abs duration %f ms\n", convText[iFirstFastAlgo+1], aSpeedFactor[iFirstFastAlgo+1], 1000.0 * aDuration[iFirstFastAlgo+1]); |
| 713 | |
| 714 | if ( 0 <= iMaxSpeedFastAlgo && iMaxSpeedFastAlgo < NUMY && aRunAlgo[iMaxSpeedFastAlgo] ) |
| 715 | { |
| 716 | printf("fastest fast algorithm is '%s' at speed %f X ; abs duration %f ms\n", convText[iMaxSpeedFastAlgo], aSpeedFactor[iMaxSpeedFastAlgo], 1000.0 * aDuration[iMaxSpeedFastAlgo]); |
| 717 | if ( 0 <= iMaxSpeedSlowAlgo && iMaxSpeedSlowAlgo < NUMY && aRunAlgo[iMaxSpeedSlowAlgo] ) |
| 718 | printf("fast / slow ratio: %f X\n", aSpeedFactor[iMaxSpeedFastAlgo] / aSpeedFactor[iMaxSpeedSlowAlgo] ); |
| 719 | } |
| 720 | printf("\n"); |
| 721 | } |
| 722 | else |
| 723 | { |
| 724 | for ( yc = 0; yc < NUMY; ++yc ) |
| 725 | { |
| 726 | if (!aRunAlgo[yc] || procSmpPerSec[yc] <= 0.0) |
| 727 | continue; |
hayati ayguen | e6cffc9 | 2020-02-28 19:57:30 +0100 | [diff] [blame] | 728 | printf("algo '%s':\t%.2f MSmp\tin\t%.1f ms\t= %g kSmpPerSec\n", |
| 729 | convText[yc], (double)outN[yc]/(1000.0 * 1000.0), 1000.0 * aDuration[yc], procSmpPerSec[yc] * 0.001 ); |
hayati ayguen | b2d2936 | 2020-01-02 00:06:09 +0100 | [diff] [blame] | 730 | } |
| 731 | } |
| 732 | |
| 733 | } |
| 734 | } |
| 735 | |
| 736 | |
| 737 | for ( yc = 1; yc < NUMY; ++yc ) |
| 738 | { |
| 739 | const float * Yref; |
| 740 | const float * Ycurr; |
| 741 | int outMin; |
| 742 | |
| 743 | if (!aRunAlgo[yc]) |
| 744 | continue; |
| 745 | |
| 746 | if (printDbg) |
| 747 | printf("\n"); |
| 748 | |
| 749 | if ( outN[yc] == 0 ) |
| 750 | { |
| 751 | printf("output size 0: '%s' not implemented\n", convText[yc]); |
| 752 | } |
| 753 | else if ( outN[0] != outN[yc] /* && aFastAlgo[yc] */ && testOutLen ) |
| 754 | { |
| 755 | if (!iPrintedRefOutLen) |
| 756 | { |
| 757 | printf("reference output size = %" PRId64 ", delta to (cplx) input length = %" PRId64 " smp\n", outN[0], (len / cplxFactor) - outN[0]); |
| 758 | iPrintedRefOutLen = 1; |
| 759 | } |
| 760 | printf("output size doesn't match!: ref (FILTERLEN %d) returned %" PRId64 " smp, '%s' returned %" PRId64 " smp : delta = %" PRId64 " smp\n", |
| 761 | FILTERLEN, outN[0], convText[yc], outN[yc], outN[yc] - outN[0] ); |
| 762 | retErr = 1; |
| 763 | } |
| 764 | |
| 765 | posMaxErr = 0; |
| 766 | maxErr = -1.0; |
| 767 | Yref = Y[0]; |
| 768 | Ycurr = Y[yc]; |
| 769 | outMin = ( outN[yc] < outN[0] ) ? outN[yc] : outN[0]; |
| 770 | numErrOverLimit = 0; |
| 771 | for ( i = 0; i < outMin; ++i ) |
| 772 | { |
| 773 | if ( numErrOverLimit < 6 && fabs(Ycurr[i] - Yref[i]) >= yErrLimit ) |
| 774 | { |
| 775 | printf("algo '%s': at %d: ***ERROR*** = %f, errLimit = %f, ref = %f, actual = %f\n", |
| 776 | convText[yc], i, fabs(Ycurr[i] - Yref[i]), yErrLimit, Yref[i], Ycurr[i] ); |
| 777 | ++numErrOverLimit; |
| 778 | } |
| 779 | |
| 780 | if ( fabs(Ycurr[i] - Yref[i]) > maxErr ) |
| 781 | { |
| 782 | maxErr = fabsf(Ycurr[i] - Yref[i]); |
| 783 | posMaxErr = i; |
| 784 | } |
| 785 | } |
| 786 | |
| 787 | if ( printDbg || (iMaxSpeedSlowAlgo == i) || (iMaxSpeedFastAlgo == i) ) |
| 788 | printf("max difference for '%s' is %g at sample idx %d of max inp 4093-1 == %f %%\n", convText[yc], maxErr, posMaxErr, maxErr * 100.0 / 4092.0 ); |
| 789 | } |
| 790 | |
| 791 | break; |
| 792 | } |
| 793 | |
| 794 | pffastconv_free(X); |
| 795 | for ( i=0; i < NUMY; ++i) |
| 796 | { |
hayati ayguen | ca11241 | 2020-04-13 00:19:40 +0200 | [diff] [blame] | 797 | if ( 1 || i < 2 ) |
hayati ayguen | b2d2936 | 2020-01-02 00:06:09 +0100 | [diff] [blame] | 798 | pffastconv_free( Y[i] ); |
| 799 | if (!aRunAlgo[i]) |
| 800 | continue; |
| 801 | aDestroy[i]( aSetupCfg[i] ); |
| 802 | } |
| 803 | |
| 804 | pffastconv_free(H); |
| 805 | |
| 806 | return retErr; |
| 807 | } |
| 808 | |
hayati ayguen | ca11241 | 2020-04-13 00:19:40 +0200 | [diff] [blame] | 809 | /* small functions inside pffft.c that will detect (compiler) bugs with respect to simd instructions */ |
| 810 | void validate_pffft_simd(); |
| 811 | int validate_pffft_simd_ex(FILE * DbgOut); |
| 812 | |
hayati ayguen | b2d2936 | 2020-01-02 00:06:09 +0100 | [diff] [blame] | 813 | |
| 814 | int main(int argc, char **argv) |
| 815 | { |
| 816 | int result = 0; |
| 817 | int i, k, M, flagsA, flagsB, flagsC, testOutLen, printDbg, printSpeed; |
| 818 | int testOutLens = 1, benchConv = 1, quickTest = 0, slowTest = 0; |
| 819 | int testReal = 1, testCplx = 1, testSymetric = 0; |
| 820 | |
| 821 | for ( i = 1; i < argc; ++i ) { |
hayati ayguen | ca11241 | 2020-04-13 00:19:40 +0200 | [diff] [blame] | 822 | |
| 823 | if (!strcmp(argv[i], "--test-simd")) { |
| 824 | int numErrs = validate_pffft_simd_ex(stdout); |
| 825 | fprintf( ( numErrs != 0 ? stderr : stdout ), "validate_pffft_simd_ex() returned %d errors!\n", numErrs); |
| 826 | return ( numErrs > 0 ? 1 : 0 ); |
| 827 | } |
| 828 | |
hayati ayguen | b2d2936 | 2020-01-02 00:06:09 +0100 | [diff] [blame] | 829 | if (!strcmp(argv[i], "--no-len")) { |
| 830 | testOutLens = 0; |
| 831 | } |
| 832 | else if (!strcmp(argv[i], "--no-bench")) { |
| 833 | benchConv = 0; |
| 834 | } |
| 835 | else if (!strcmp(argv[i], "--quick")) { |
| 836 | quickTest = 1; |
| 837 | } |
| 838 | else if (!strcmp(argv[i], "--slow")) { |
| 839 | slowTest = 1; |
| 840 | } |
| 841 | else if (!strcmp(argv[i], "--real")) { |
| 842 | testCplx = 0; |
| 843 | } |
| 844 | else if (!strcmp(argv[i], "--cplx")) { |
| 845 | testReal = 0; |
| 846 | } |
| 847 | else if (!strcmp(argv[i], "--sym")) { |
| 848 | testSymetric = 1; |
| 849 | } |
| 850 | else /* if (!strcmp(argv[i], "--help")) */ { |
hayati ayguen | ca11241 | 2020-04-13 00:19:40 +0200 | [diff] [blame] | 851 | printf("usage: %s [--test-simd] [--no-len] [--no-bench] [--quick|--slow] [--real|--cplx] [--sym]\n", argv[0]); |
hayati ayguen | b2d2936 | 2020-01-02 00:06:09 +0100 | [diff] [blame] | 852 | exit(1); |
| 853 | } |
| 854 | } |
| 855 | |
| 856 | |
| 857 | if (testOutLens) |
| 858 | { |
| 859 | for ( k = 0; k < 3; ++k ) |
| 860 | { |
| 861 | if ( (k == 0 && !testReal) || (k > 0 && !testCplx) ) |
| 862 | continue; |
| 863 | printf("\n\n==========\n"); |
| 864 | printf("testing %s %s output lengths ..\n", (k == 0 ? "real" : "cplx"), ( k == 0 ? "" : (k==1 ? "2x" : "single") ) ); |
| 865 | printf("==========\n"); |
| 866 | flagsA = (k == 0) ? 0 : PFFASTCONV_CPLX_INP_OUT; |
| 867 | flagsB = flagsA | ( testSymetric ? PFFASTCONV_SYMMETRIC : 0 ); |
| 868 | flagsC = flagsB | PFFASTCONV_CPLX_SINGLE_FFT; |
| 869 | testOutLen = 1; |
| 870 | printDbg = 0; |
| 871 | printSpeed = 0; |
| 872 | for ( M = 128 - 4; M <= (quickTest ? 128+16 : 256); ++M ) |
| 873 | { |
| 874 | if ( (M % 16) != 0 && testSymetric ) |
| 875 | continue; |
| 876 | result |= test(M, flagsB, testOutLen, printDbg, printSpeed); |
| 877 | } |
| 878 | } |
| 879 | } |
| 880 | |
| 881 | if (benchConv) |
| 882 | { |
| 883 | for ( k = 0; k < 3; ++k ) |
| 884 | { |
| 885 | if ( (k == 0 && !testReal) || (k > 0 && !testCplx) ) |
| 886 | continue; |
| 887 | printf("\n\n==========\n"); |
| 888 | printf("starting %s %s benchmark against linear convolutions ..\n", (k == 0 ? "real" : "cplx"), ( k == 0 ? "" : (k==1 ? "2x" : "single") ) ); |
| 889 | printf("==========\n"); |
| 890 | flagsA = (k == 0) ? 0 : PFFASTCONV_CPLX_INP_OUT; |
| 891 | flagsB = flagsA | ( testSymetric ? PFFASTCONV_SYMMETRIC : 0 ); |
| 892 | flagsC = flagsB | ( k == 2 ? PFFASTCONV_CPLX_SINGLE_FFT : 0 ); |
| 893 | testOutLen = 0; |
| 894 | printDbg = 0; |
| 895 | printSpeed = 1; |
| 896 | if (!slowTest) { |
| 897 | result |= test( 32, flagsC, testOutLen, printDbg, printSpeed); |
| 898 | result |= test( 32+ 16, flagsC, testOutLen, printDbg, printSpeed); |
| 899 | result |= test( 64, flagsC, testOutLen, printDbg, printSpeed); |
| 900 | result |= test( 64+ 32, flagsC, testOutLen, printDbg, printSpeed); |
| 901 | result |= test(128, flagsC, testOutLen, printDbg, printSpeed); |
| 902 | } |
| 903 | if (!quickTest) { |
| 904 | result |= test(128+ 64, flagsC, testOutLen, printDbg, printSpeed); |
| 905 | result |= test(256, flagsC, testOutLen, printDbg, printSpeed); |
| 906 | result |= test(256+128, flagsC, testOutLen, printDbg, printSpeed); |
| 907 | result |= test(512, flagsC, testOutLen, printDbg, printSpeed); |
| 908 | result |= test(1024, flagsC, testOutLen, printDbg, printSpeed); |
| 909 | } |
| 910 | } |
| 911 | } |
| 912 | |
| 913 | return result; |
| 914 | } |
| 915 | |