Blame - test_pffastconv.c - platform/external/pffft

blob: 90d36ca3372c634073128bb458718b4adae64766 [file] [log] [blame]

hayati ayguen	b2d2936	2020-01-02 00:06:09 +0100	[diff] [blame]	1	/*
				2	Copyright (c) 2013 Julien Pommier.
hayati ayguen	ca11241	2020-04-13 00:19:40 +0200	[diff] [blame]	3	Copyright (c) 2019 Hayati Ayguen ( h_ayguen@web.de )
hayati ayguen	b2d2936	2020-01-02 00:06:09 +0100	[diff] [blame]	4	*/
hayati ayguen	ca11241	2020-04-13 00:19:40 +0200	[diff] [blame]	5
hayati ayguen	b2d2936	2020-01-02 00:06:09 +0100	[diff] [blame]	6	#define _WANT_SNAN 1
				7
				8	#include "pffft.h"
				9	#include "pffastconv.h"
				10
				11	#include <math.h>
				12	#include <float.h>
				13	#include <limits.h>
				14	#include <inttypes.h>
				15	#include <stdio.h>
				16	#include <stdlib.h>
				17	#include <time.h>
				18	#include <assert.h>
				19	#include <string.h>
				20
				21	#ifdef HAVE_SYS_TIMES
				22	# include <sys/times.h>
				23	# include <unistd.h>
				24	#endif
				25
				26	/*
				27	vector support macros: the rest of the code is independant of
				28	SSE/Altivec/NEON -- adding support for other platforms with 4-element
				29	vectors should be limited to these macros
				30	*/
hayati ayguen	ca11241	2020-04-13 00:19:40 +0200	[diff] [blame]	31	#if 0
				32	#include "simd/pf_float.h"
				33	#endif
hayati ayguen	b2d2936	2020-01-02 00:06:09 +0100	[diff] [blame]	34
				35	#if defined(_MSC_VER)
				36	# define RESTRICT __restrict
				37	#elif defined(__GNUC__)
				38	# define RESTRICT __restrict
				39	#else
				40	# define RESTRICT
				41	#endif
				42
				43
				44	#if defined(_MSC_VER)
				45	#pragma warning( disable : 4244 )
				46	#endif
				47
				48
				49	#ifdef SNANF
				50	#define INVALID_FLOAT_VAL SNANF
				51	#elif defined(SNAN)
				52	#define INVALID_FLOAT_VAL SNAN
				53	#elif defined(NAN)
				54	#define INVALID_FLOAT_VAL NAN
				55	#elif defined(INFINITY)
				56	#define INVALID_FLOAT_VAL INFINITY
				57	#else
				58	#define INVALID_FLOAT_VAL FLT_MAX
				59	#endif
				60
				61
				62	#if defined(HAVE_SYS_TIMES)
				63	inline double uclock_sec(void) {
				64	static double ttclk = 0.;
				65	struct tms t;
				66	if (ttclk == 0.)
				67	ttclk = sysconf(_SC_CLK_TCK);
				68	times(&t);
				69	/* use only the user time of this process - not realtime, which depends on OS-scheduler .. */
				70	return ((double)t.tms_utime)) / ttclk;
				71	}
				72	# else
				73	double uclock_sec(void)
				74	{ return (double)clock()/(double)CLOCKS_PER_SEC; }
				75	#endif
				76
				77
				78
hayati ayguen	b2d2936	2020-01-02 00:06:09 +0100	[diff] [blame]	79	typedef int (pfnConvolution) (void setup, const float * X, int len, float Y, const float Yref, int applyFlush);
				80	typedef void* (pfnConvSetup) (float Hfwd, int Nf, int * BlkLen, int flags);
				81	typedef pfnConvolution (pfnGetConvFnPtr) (void setup);
				82	typedef void (pfnConvDestroy) (void setup);
				83
				84
				85	struct ConvSetup
				86	{
				87	pfnConvolution pfn;
				88	int N;
				89	int B;
				90	float * H;
				91	int flags;
				92	};
				93
				94
				95	void * convSetupRev( float * H, int N, int * BlkLen, int flags )
				96	{
				97	struct ConvSetup * s = pffastconv_malloc( sizeof(struct ConvSetup) );
				98	int i, Nr = N;
				99	if (flags & PFFASTCONV_CPLX_INP_OUT)
				100	Nr *= 2;
				101	Nr += 4;
				102	s->pfn = NULL;
				103	s->N = N;
				104	s->B = *BlkLen;
				105	s->H = pffastconv_malloc((unsigned)Nr * sizeof(float));
				106	s->flags = flags;
				107	memset(s->H, 0, (unsigned)Nr * sizeof(float));
				108	if (flags & PFFASTCONV_CPLX_INP_OUT)
				109	{
				110	for ( i = 0; i < N; ++i ) {
				111	s->H[2*(N-1 -i) ] = H[i];
				112	s->H[2*(N-1 -i)+1] = H[i];
				113	}
				114	/* simpler detection of overruns */
				115	s->H[ 2*N ] = INVALID_FLOAT_VAL;
				116	s->H[ 2*N +1 ] = INVALID_FLOAT_VAL;
				117	s->H[ 2*N +2 ] = INVALID_FLOAT_VAL;
				118	s->H[ 2*N +3 ] = INVALID_FLOAT_VAL;
				119	}
				120	else
				121	{
				122	for ( i = 0; i < N; ++i )
				123	s->H[ N-1 -i ] = H[i];
				124	/* simpler detection of overruns */
				125	s->H[ N ] = INVALID_FLOAT_VAL;
				126	s->H[ N +1 ] = INVALID_FLOAT_VAL;
				127	s->H[ N +2 ] = INVALID_FLOAT_VAL;
				128	s->H[ N +3 ] = INVALID_FLOAT_VAL;
				129	}
				130	return s;
				131	}
				132
				133	void convDestroyRev( void * setup )
				134	{
				135	struct ConvSetup * s = (struct ConvSetup*)setup;
				136	pffastconv_free(s->H);
				137	pffastconv_free(setup);
				138	}
				139
				140
				141	pfnConvolution ConvGetFnPtrRev( void * setup )
				142	{
				143	struct ConvSetup * s = (struct ConvSetup*)setup;
				144	if (!s)
				145	return NULL;
				146	return s->pfn;
				147	}
				148
				149
				150	void convSimdDestroy( void * setup )
				151	{
				152	convDestroyRev(setup);
				153	}
				154
				155
				156	void * fastConvSetup( float * H, int N, int * BlkLen, int flags )
				157	{
				158	void * p = pffastconv_new_setup( H, N, BlkLen, flags );
				159	if (!p)
				160	printf("fastConvSetup(N = %d, BlkLen = %d, flags = %d) = NULL\n", N, BlkLen, flags);
				161	return p;
				162	}
				163
				164
				165	void fastConvDestroy( void * setup )
				166	{
				167	pffastconv_destroy_setup( (PFFASTCONV_Setup*)setup );
				168	}
				169
				170
				171
				172	int slow_conv_R(void * setup, const float * input, int len, float output, const float Yref, int applyFlush)
				173	{
				174	struct ConvSetup * p = (struct ConvSetup*)setup;
				175	const float * RESTRICT X = input;
				176	const float * RESTRICT Hrev = p->H;
				177	float * RESTRICT Y = output;
				178	const int Nr = ((p->flags & PFFASTCONV_CPLX_INP_OUT) ? 2 : 1) * p->N;
				179	const int lenNr = ((p->flags & PFFASTCONV_CPLX_INP_OUT) ? 2 : 1) * (len - p->N);
				180	int i, j;
				181	(void)Yref;
				182	(void)applyFlush;
				183
				184	if (p->flags & PFFASTCONV_CPLX_INP_OUT)
				185	{
				186	for ( i = 0; i <= lenNr; i += 2 )
				187	{
				188	float sumRe = 0.0F, sumIm = 0.0F;
				189	for ( j = 0; j < Nr; j += 2 )
				190	{
				191	sumRe += X[i+j ] * Hrev[j];
				192	sumIm += X[i+j+1] * Hrev[j+1];
				193	}
				194	Y[i ] = sumRe;
				195	Y[i+1] = sumIm;
				196	}
				197	return i/2;
				198	}
				199	else
				200	{
				201	for ( i = 0; i <= lenNr; ++i )
				202	{
				203	float sum = 0.0F;
				204	for (j = 0; j < Nr; ++j )
				205	sum += X[i+j] * Hrev[j];
				206	Y[i] = sum;
				207	}
				208	return i;
				209	}
				210	}
				211
				212
				213
				214	int slow_conv_A(void * setup, const float * input, int len, float output, const float Yref, int applyFlush)
				215	{
				216	float sum[4];
				217	struct ConvSetup * p = (struct ConvSetup*)setup;
				218	const float * RESTRICT X = input;
				219	const float * RESTRICT Hrev = p->H;
				220	float * RESTRICT Y = output;
				221	const int Nr = ((p->flags & PFFASTCONV_CPLX_INP_OUT) ? 2 : 1) * p->N;
				222	const int lenNr = ((p->flags & PFFASTCONV_CPLX_INP_OUT) ? 2 : 1) * (len - p->N);
				223	int i, j;
				224	(void)Yref;
				225	(void)applyFlush;
				226
				227	if (p->flags & PFFASTCONV_CPLX_INP_OUT)
				228	{
				229	if ( (Nr & 3) == 0 )
				230	{
				231	for ( i = 0; i <= lenNr; i += 2 )
				232	{
				233	sum[0] = sum[1] = sum[2] = sum[3] = 0.0F;
				234	for (j = 0; j < Nr; j += 4 )
				235	{
				236	sum[0] += X[i+j] * Hrev[j];
				237	sum[1] += X[i+j+1] * Hrev[j+1];
				238	sum[2] += X[i+j+2] * Hrev[j+2];
				239	sum[3] += X[i+j+3] * Hrev[j+3];
				240	}
				241	Y[i ] = sum[0] + sum[2];
				242	Y[i+1] = sum[1] + sum[3];
				243	}
				244	}
				245	else
				246	{
				247	const int M = Nr & (~3);
				248	for ( i = 0; i <= lenNr; i += 2 )
				249	{
				250	float tailSumRe = 0.0F, tailSumIm = 0.0F;
				251	sum[0] = sum[1] = sum[2] = sum[3] = 0.0F;
				252	for (j = 0; j < M; j += 4 )
				253	{
				254	sum[0] += X[i+j ] * Hrev[j ];
				255	sum[1] += X[i+j+1] * Hrev[j+1];
				256	sum[2] += X[i+j+2] * Hrev[j+2];
				257	sum[3] += X[i+j+3] * Hrev[j+3];
				258	}
				259	for ( ; j < Nr; j += 2 ) {
				260	tailSumRe += X[i+j ] * Hrev[j ];
				261	tailSumIm += X[i+j+1] * Hrev[j+1];
				262	}
				263	Y[i ] = ( sum[0] + sum[2] ) + tailSumRe;
				264	Y[i+1] = ( sum[1] + sum[3] ) + tailSumIm;
				265	}
				266	}
				267	return i/2;
				268	}
				269	else
				270	{
				271	if ( (Nr & 3) == 0 )
				272	{
				273	for ( i = 0; i <= lenNr; ++i )
				274	{
				275	sum[0] = sum[1] = sum[2] = sum[3] = 0.0F;
				276	for (j = 0; j < Nr; j += 4 )
				277	{
				278	sum[0] += X[i+j] * Hrev[j];
				279	sum[1] += X[i+j+1] * Hrev[j+1];
				280	sum[2] += X[i+j+2] * Hrev[j+2];
				281	sum[3] += X[i+j+3] * Hrev[j+3];
				282	}
				283	Y[i] = sum[0] + sum[1] + sum[2] + sum[3];
				284	}
				285	return i;
				286	}
				287	else
				288	{
				289	const int M = Nr & (~3);
				290	/* printf("A: Nr = %d, M = %d, H[M] = %f, H[M+1] = %f, H[M+2] = %f, H[M+3] = %f\n", Nr, M, Hrev[M], Hrev[M+1], Hrev[M+2], Hrev[M+3] ); */
				291	for ( i = 0; i <= lenNr; ++i )
				292	{
				293	float tailSum = 0.0;
				294	sum[0] = sum[1] = sum[2] = sum[3] = 0.0F;
				295	for (j = 0; j < M; j += 4 )
				296	{
				297	sum[0] += X[i+j] * Hrev[j];
				298	sum[1] += X[i+j+1] * Hrev[j+1];
				299	sum[2] += X[i+j+2] * Hrev[j+2];
				300	sum[3] += X[i+j+3] * Hrev[j+3];
				301	}
				302	for ( ; j < Nr; ++j )
				303	tailSum += X[i+j] * Hrev[j];
				304	Y[i] = (sum[0] + sum[1]) + (sum[2] + sum[3]) + tailSum;
				305	}
				306	return i;
				307	}
				308	}
				309	}
				310
				311
				312	int slow_conv_B(void * setup, const float * input, int len, float output, const float Yref, int applyFlush)
				313	{
				314	float sum[4];
				315	struct ConvSetup * p = (struct ConvSetup*)setup;
				316	(void)Yref;
				317	(void)applyFlush;
				318	if (p->flags & PFFASTCONV_SYMMETRIC)
				319	{
				320	const float * RESTRICT X = input;
				321	const float * RESTRICT Hrev = p->H;
				322	float * RESTRICT Y = output;
				323	const int Nr = ((p->flags & PFFASTCONV_CPLX_INP_OUT) ? 2 : 1) * p->N;
				324	const int lenNr = ((p->flags & PFFASTCONV_CPLX_INP_OUT) ? 2 : 1) * (len - p->N);
				325	const int h = Nr / 2 -4;
				326	const int E = Nr -4;
				327	int i, j;
				328
				329	if (p->flags & PFFASTCONV_CPLX_INP_OUT)
				330	{
				331	for ( i = 0; i <= lenNr; i += 2 )
				332	{
				333	const int k = i + E;
				334	sum[0] = sum[1] = sum[2] = sum[3] = 0.0F;
				335	for (j = 0; j <= h; j += 4 )
				336	{
				337	sum[0] += Hrev[j ] * ( X[i+j ] + X[k-j+2] );
				338	sum[1] += Hrev[j+1] * ( X[i+j+1] + X[k-j+3] );
				339	sum[2] += Hrev[j+2] * ( X[i+j+2] + X[k-j ] );
				340	sum[3] += Hrev[j+3] * ( X[i+j+3] + X[k-j+1] );
				341	}
				342	Y[i ] = sum[0] + sum[2];
				343	Y[i+1] = sum[1] + sum[3];
				344	}
				345	return i/2;
				346	}
				347	else
				348	{
				349	for ( i = 0; i <= lenNr; ++i )
				350	{
				351	const int k = i + E;
				352	sum[0] = sum[1] = sum[2] = sum[3] = 0.0F;
				353	for (j = 0; j <= h; j += 4 )
				354	{
				355	sum[0] += Hrev[j ] * ( X[i+j ] + X[k-j+3] );
				356	sum[1] += Hrev[j+1] * ( X[i+j+1] + X[k-j+2] );
				357	sum[2] += Hrev[j+2] * ( X[i+j+2] + X[k-j+1] );
				358	sum[3] += Hrev[j+3] * ( X[i+j+3] + X[k-j ] );
				359	}
				360	Y[i] = sum[0] + sum[1] + sum[2] + sum[3];
				361	}
				362	return i;
				363	}
				364	}
				365	else
				366	{
				367	const float * RESTRICT X = input;
				368	const float * RESTRICT Hrev = p->H;
				369	float * RESTRICT Y = output;
				370	const int Nr = ((p->flags & PFFASTCONV_CPLX_INP_OUT) ? 2 : 1) * p->N;
				371	const int lenNr = ((p->flags & PFFASTCONV_CPLX_INP_OUT) ? 2 : 1) * (len - p->N);
				372	int i, j;
				373
				374	if (p->flags & PFFASTCONV_CPLX_INP_OUT)
				375	{
				376	for ( i = 0; i <= lenNr; i += 2 )
				377	{
				378	sum[0] = sum[1] = sum[2] = sum[3] = 0.0F;
				379	for (j = 0; j < Nr; j += 4 )
				380	{
				381	sum[0] += X[i+j] * Hrev[j];
				382	sum[1] += X[i+j+1] * Hrev[j+1];
				383	sum[2] += X[i+j+2] * Hrev[j+2];
				384	sum[3] += X[i+j+3] * Hrev[j+3];
				385	}
				386	Y[i ] = sum[0] + sum[2];
				387	Y[i+1] = sum[1] + sum[3];
				388	}
				389	return i/2;
				390	}
				391	else
				392	{
				393	if ( (Nr & 3) == 0 )
				394	{
				395	for ( i = 0; i <= lenNr; ++i )
				396	{
				397	sum[0] = sum[1] = sum[2] = sum[3] = 0.0F;
				398	for (j = 0; j < Nr; j += 4 )
				399	{
				400	sum[0] += X[i+j] * Hrev[j];
				401	sum[1] += X[i+j+1] * Hrev[j+1];
				402	sum[2] += X[i+j+2] * Hrev[j+2];
				403	sum[3] += X[i+j+3] * Hrev[j+3];
				404	}
				405	Y[i] = (sum[0] + sum[1]) + (sum[2] + sum[3]);
				406	}
				407	return i;
				408	}
				409	else
				410	{
				411	const int M = Nr & (~3);
				412	/* printf("B: Nr = %d\n", Nr ); */
				413	for ( i = 0; i <= lenNr; ++i )
				414	{
				415	float tailSum = 0.0;
				416	sum[0] = sum[1] = sum[2] = sum[3] = 0.0F;
				417	for (j = 0; j < M; j += 4 )
				418	{
				419	sum[0] += X[i+j] * Hrev[j];
				420	sum[1] += X[i+j+1] * Hrev[j+1];
				421	sum[2] += X[i+j+2] * Hrev[j+2];
				422	sum[3] += X[i+j+3] * Hrev[j+3];
				423	}
				424	for ( ; j < Nr; ++j )
				425	tailSum += X[i+j] * Hrev[j];
				426	Y[i] = (sum[0] + sum[1]) + (sum[2] + sum[3]) + tailSum;
				427	}
				428	return i;
				429	}
				430	}
				431	}
				432
				433	}
				434
				435
hayati ayguen	b2d2936	2020-01-02 00:06:09 +0100	[diff] [blame]	436	int fast_conv(void * setup, const float * X, int len, float Y, const float Yref, int applyFlush)
				437	{
				438	(void)Yref;
				439	return pffastconv_apply( (PFFASTCONV_Setup*)setup, X, len, Y, applyFlush );
				440	}
				441
				442
				443
				444	void printFirst( const float * V, const char * st, const int N, const int perLine )
				445	{
				446	(void)V; (void)st; (void)N; (void)perLine;
				447	return;
				448	#if 0
				449	int i;
				450	for ( i = 0; i < N; ++i )
				451	{
				452	if ( (i % perLine) == 0 )
				453	printf("\n%s[%d]", st, i);
				454	printf("\t%.1f", V[i]);
				455	}
				456	printf("\n");
				457	#endif
				458	}
				459
				460
				461
				462	#define NUMY 11
				463
				464
				465	int test(int FILTERLEN, int convFlags, const int testOutLen, int printDbg, int printSpeed) {
				466	double t0, t1, tstop, td, tdref;
				467	float X, H;
				468	float *Y[NUMY];
				469	int64_t outN[NUMY];
				470	/* 256 KFloats or 16 MFloats data */
				471	#if 1
				472	const int len = testOutLen ? (1 << 18) : (1 << 24);
				473	#elif 0
				474	const int len = testOutLen ? (1 << 18) : (1 << 13);
				475	#else
				476	const int len = testOutLen ? (1 << 18) : (1024);
				477	#endif
				478	const int cplxFactor = ( convFlags & PFFASTCONV_CPLX_INP_OUT ) ? 2 : 1;
				479	const int lenC = len / cplxFactor;
				480
				481	int yi, yc, posMaxErr;
				482	float yRangeMin, yRangeMax, yErrLimit, maxErr = 0.0;
				483	int i, j, numErrOverLimit, iter;
				484	int retErr = 0;
				485
hayati ayguen	ca11241	2020-04-13 00:19:40 +0200	[diff] [blame]	486	/* 0 1 2 3 4 5 6 7 8 9 */
				487	pfnConvSetup aSetup[NUMY] = { convSetupRev, convSetupRev, convSetupRev, fastConvSetup, fastConvSetup, fastConvSetup, fastConvSetup, fastConvSetup, fastConvSetup, fastConvSetup };
				488	pfnConvDestroy aDestroy[NUMY] = { convDestroyRev, convDestroyRev, convDestroyRev, fastConvDestroy, fastConvDestroy, fastConvDestroy, fastConvDestroy, fastConvDestroy, fastConvDestroy, fastConvDestroy };
				489	pfnGetConvFnPtr aGetFnPtr[NUMY] = { NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, };
				490	pfnConvolution aConv[NUMY] = { slow_conv_R, slow_conv_A, slow_conv_B, fast_conv, fast_conv, fast_conv, fast_conv, fast_conv, fast_conv, fast_conv };
				491	const char * convText[NUMY] = { "R(non-simd)", "A(non-simd)", "B(non-simd)", "fast_conv_64", "fast_conv_128", "fast_conv_256", "fast_conv_512", "fast_conv_1K", "fast_conv_2K", "fast_conv_4K" };
				492	int aFastAlgo[NUMY] = { 0, 0, 0, 1, 1, 1, 1, 1, 1, 1 };
				493	void * aSetupCfg[NUMY] = { NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL };
				494	int aBlkLen[NUMY] = { 1024, 1024, 1024, 64, 128, 256, 512, 1024, 2048, 4096 };
hayati ayguen	b2d2936	2020-01-02 00:06:09 +0100	[diff] [blame]	495	#if 1
hayati ayguen	ca11241	2020-04-13 00:19:40 +0200	[diff] [blame]	496	int aRunAlgo[NUMY] = { 1, 1, 1, FILTERLEN<64, FILTERLEN<128, FILTERLEN<256, FILTERLEN<512, FILTERLEN<1024, FILTERLEN<2048, FILTERLEN<4096 };
				497	#elif 0
				498	int aRunAlgo[NUMY] = { 1, 0, 0, 0 && FILTERLEN<64, 1 && FILTERLEN<128, 1 && FILTERLEN<256, 0 && FILTERLEN<512, 0 && FILTERLEN<1024, 0 && FILTERLEN<2048, 0 && FILTERLEN<4096 };
hayati ayguen	b2d2936	2020-01-02 00:06:09 +0100	[diff] [blame]	499	#else
hayati ayguen	ca11241	2020-04-13 00:19:40 +0200	[diff] [blame]	500	int aRunAlgo[NUMY] = { 1, 1, 1, 0 && FILTERLEN<64, 0 && FILTERLEN<128, 1 && FILTERLEN<256, 0 && FILTERLEN<512, 0 && FILTERLEN<1024, 0 && FILTERLEN<2048, 0 && FILTERLEN<4096 };
hayati ayguen	b2d2936	2020-01-02 00:06:09 +0100	[diff] [blame]	501	#endif
				502	double aSpeedFactor[NUMY], aDuration[NUMY], procSmpPerSec[NUMY];
				503
				504	X = pffastconv_malloc( (unsigned)(len+4) * sizeof(float) );
				505	for ( i=0; i < NUMY; ++i)
				506	{
hayati ayguen	ca11241	2020-04-13 00:19:40 +0200	[diff] [blame]	507	if ( 1 \|\| i < 2 )
hayati ayguen	b2d2936	2020-01-02 00:06:09 +0100	[diff] [blame]	508	Y[i] = pffastconv_malloc( (unsigned)len * sizeof(float) );
				509	else
				510	Y[i] = Y[1];
				511
hayati ayguen	e6cffc9	2020-02-28 19:57:30 +0100	[diff] [blame]	512	Y[i][0] = 123.F; /* test for pffft_zconvolve_no_accu() */
hayati ayguen	b2d2936	2020-01-02 00:06:09 +0100	[diff] [blame]	513	aSpeedFactor[i] = -1.0;
				514	aDuration[i] = -1.0;
				515	procSmpPerSec[i] = -1.0;
				516	}
				517
				518	H = pffastconv_malloc((unsigned)FILTERLEN * sizeof(float));
				519
				520	/* initialize input */
				521	if ( convFlags & PFFASTCONV_CPLX_INP_OUT )
				522	{
				523	for ( i = 0; i < lenC; ++i )
				524	{
				525	X[2i ] = (float)(i % 4093); / 4094 is a prime number. see https://en.wikipedia.org/wiki/List_of_prime_numbers */
				526	X[2*i+1] = (float)((i+2048) % 4093);
				527	}
				528	}
				529	else
				530	{
				531	for ( i = 0; i < len; ++i )
				532	X[i] = (float)(i % 4093); /* 4094 is a prime number. see https://en.wikipedia.org/wiki/List_of_prime_numbers */
				533	}
				534	X[ len ] = INVALID_FLOAT_VAL;
				535	X[ len +1 ] = INVALID_FLOAT_VAL;
				536	X[ len +2 ] = INVALID_FLOAT_VAL;
				537	X[ len +3 ] = INVALID_FLOAT_VAL;
				538
				539	if (!testOutLen)
				540	printFirst( X, "X", 64, 8 );
				541
				542	/* filter coeffs */
				543	memset( H, 0, FILTERLEN * sizeof(float) );
				544	#if 1
				545	if ( convFlags & PFFASTCONV_SYMMETRIC )
				546	{
				547	const int half = FILTERLEN / 2;
				548	for ( j = 0; j < half; ++j ) {
				549	switch (j % 3) {
				550	case 0: H[j] = H[FILTERLEN-1-j] = -1.0F; break;
				551	case 1: H[j] = H[FILTERLEN-1-j] = 1.0F; break;
				552	case 2: H[j] = H[FILTERLEN-1-j] = 0.5F; break;
				553	}
				554	}
				555	}
				556	else
				557	{
				558	for ( j = 0; j < FILTERLEN; ++j ) {
				559	switch (j % 3) {
				560	case 0: H[j] = -1.0F; break;
				561	case 1: H[j] = 1.0F; break;
				562	case 2: H[j] = 0.5F; break;
				563	}
				564	}
				565	}
				566	#else
				567	H[0] = 1.0F;
				568	H[FILTERLEN -1] = 1.0F;
				569	#endif
				570	if (!testOutLen)
				571	printFirst( H, "H", FILTERLEN, 8 );
				572
				573	printf("\n");
				574	printf("filterLen = %d\t%s%s\t%s:\n", FILTERLEN,
				575	((convFlags & PFFASTCONV_CPLX_INP_OUT)?"cplx":"real"),
				576	(convFlags & PFFASTCONV_CPLX_INP_OUT)?((convFlags & PFFASTCONV_CPLX_SINGLE_FFT)?" single":" 2x") : "",
				577	((convFlags & PFFASTCONV_SYMMETRIC)?"symmetric":"non-sym") );
				578
				579	while (1)
				580	{
				581
				582	for ( yi = 0; yi < NUMY; ++yi )
				583	{
				584	if (!aRunAlgo[yi])
				585	continue;
				586
				587	aSetupCfg[yi] = aSetup[yi]( H, FILTERLEN, &aBlkLen[yi], convFlags );
				588
				589	/* get effective apply function ptr */
				590	if ( aSetupCfg[yi] && aGetFnPtr[yi] )
				591	aConv[yi] = aGetFnPtr[yi]( aSetupCfg[yi] );
				592
				593	if ( aSetupCfg[yi] && aConv[yi] ) {
				594	if (testOutLen)
				595	{
				596	t0 = uclock_sec();
				597	outN[yi] = aConv[yi]( aSetupCfg[yi], X, lenC, Y[yi], Y[0], 1 /* applyFlush */ );
				598	t1 = uclock_sec();
				599	td = t1 - t0;
				600	}
				601	else
				602	{
				603	const int blkLen = 4096; /* required for 'fast_conv_4K' */
				604	int64_t offC = 0, offS, Nout;
				605	int k;
				606	iter = 0;
				607	outN[yi] = 0;
				608	t0 = uclock_sec();
				609	tstop = t0 + 0.25; /* benchmark duration: 250 ms */
				610	do {
				611	for ( k = 0; k < 128 && offC +blkLen < lenC; ++k )
				612	{
				613	offS = cplxFactor * offC;
				614	Nout = aConv[yi]( aSetupCfg[yi], X +offS, blkLen, Y[yi] +offS, Y[0], (offC +blkLen >= lenC) /* applyFlush */ );
				615	offC += Nout;
				616	++iter;
				617	if ( !Nout )
				618	break;
				619	if ( offC +blkLen >= lenC )
				620	{
				621	outN[yi] += offC;
				622	offC = 0;
				623	}
				624	}
				625	t1 = uclock_sec();
				626	} while ( t1 < tstop );
				627	outN[yi] += offC;
				628	td = t1 - t0;
hayati ayguen	e6cffc9	2020-02-28 19:57:30 +0100	[diff] [blame]	629	procSmpPerSec[yi] = cplxFactor * (double)outN[yi] / td;
hayati ayguen	b2d2936	2020-01-02 00:06:09 +0100	[diff] [blame]	630	}
				631	}
				632	else
				633	{
				634	t0 = t1 = td = 0.0;
				635	outN[yi] = 0;
				636	}
				637	aDuration[yi] = td;
				638	if ( yi == 0 ) {
				639	const float * Yvals = Y[0];
				640	const int64_t refOutLen = cplxFactor * outN[0];
				641	tdref = td;
				642	if (printDbg) {
				643	printf("convolution '%s' took: %f ms\n", convText[yi], td*1000.0);
				644	printf(" convolution '%s' output size %" PRId64 " == (cplx) len %d + %" PRId64 "\n", convText[yi], outN[yi], len / cplxFactor, outN[yi] - len / cplxFactor);
				645	}
				646	aSpeedFactor[yi] = 1.0;
				647	/* */
				648	yRangeMin = FLT_MAX;
				649	yRangeMax = FLT_MIN;
				650	for ( i = 0; i < refOutLen; ++i )
				651	{
				652	if ( yRangeMax < Yvals[i] ) yRangeMax = Yvals[i];
				653	if ( yRangeMin > Yvals[i] ) yRangeMin = Yvals[i];
				654	}
				655	yErrLimit = fabsf(yRangeMax - yRangeMin) / ( 100.0F * 1000.0F );
				656	/* yErrLimit = 0.01F; */
				657	if (testOutLen) {
				658	if (1) {
				659	printf("reference output len = %" PRId64 " smp\n", outN[0]);
				660	printf("reference output range \|%.1f ..%.1f\| = %.1f ==> err limit = %f\n", yRangeMin, yRangeMax, yRangeMax - yRangeMin, yErrLimit);
				661	}
				662	printFirst( Yvals, "Yref", 64, 8 );
				663	}
				664	}
				665	else
				666	{
				667	aSpeedFactor[yi] = tdref / td;
				668	if (printDbg) {
				669	printf("\nconvolution '%s' took: %f ms == %f %% == %f X\n", convText[yi], td1000.0, td 100 / tdref, tdref / td);
				670	printf(" convolution '%s' output size %" PRId64 " == (cplx) len %d + %" PRId64 "\n", convText[yi], outN[yi], len / cplxFactor, outN[yi] - len / cplxFactor);
				671	}
				672	}
				673	}
				674
				675	int iMaxSpeedSlowAlgo = -1;
				676	int iFirstFastAlgo = -1;
				677	int iMaxSpeedFastAlgo = -1;
				678	int iPrintedRefOutLen = 0;
				679	{
				680	for ( yc = 1; yc < NUMY; ++yc )
				681	{
				682	if (!aRunAlgo[yc])
				683	continue;
				684	if (aFastAlgo[yc]) {
				685	if ( iMaxSpeedFastAlgo < 0 \|\| aSpeedFactor[yc] > aSpeedFactor[iMaxSpeedFastAlgo] )
				686	iMaxSpeedFastAlgo = yc;
				687
				688	if (iFirstFastAlgo < 0)
				689	iFirstFastAlgo = yc;
				690	}
				691	else
				692	{
				693	if ( iMaxSpeedSlowAlgo < 0 \|\| aSpeedFactor[yc] > aSpeedFactor[iMaxSpeedSlowAlgo] )
				694	iMaxSpeedSlowAlgo = yc;
				695	}
				696	}
				697
				698	if (printSpeed)
				699	{
				700	if (testOutLen)
				701	{
				702	if (iMaxSpeedSlowAlgo >= 0 )
				703	printf("fastest slow algorithm is '%s' at speed %f X ; abs duration %f ms\n", convText[iMaxSpeedSlowAlgo], aSpeedFactor[iMaxSpeedSlowAlgo], 1000.0 * aDuration[iMaxSpeedSlowAlgo]);
hayati ayguen	ca11241	2020-04-13 00:19:40 +0200	[diff] [blame]	704	if (0 != iMaxSpeedSlowAlgo && aRunAlgo[0])
hayati ayguen	b2d2936	2020-01-02 00:06:09 +0100	[diff] [blame]	705	printf("slow algorithm '%s' at speed %f X ; abs duration %f ms\n", convText[0], aSpeedFactor[0], 1000.0 * aDuration[0]);
hayati ayguen	ca11241	2020-04-13 00:19:40 +0200	[diff] [blame]	706	if (1 != iMaxSpeedSlowAlgo && aRunAlgo[1])
hayati ayguen	b2d2936	2020-01-02 00:06:09 +0100	[diff] [blame]	707	printf("slow algorithm '%s' at speed %f X ; abs duration %f ms\n", convText[1], aSpeedFactor[1], 1000.0 * aDuration[1]);
hayati ayguen	b2d2936	2020-01-02 00:06:09 +0100	[diff] [blame]	708
				709	if (iFirstFastAlgo >= 0 && iFirstFastAlgo != iMaxSpeedFastAlgo && aRunAlgo[iFirstFastAlgo])
				710	printf("first fast algorithm is '%s' at speed %f X ; abs duration %f ms\n", convText[iFirstFastAlgo], aSpeedFactor[iFirstFastAlgo], 1000.0 * aDuration[iFirstFastAlgo]);
				711	if (iFirstFastAlgo >= 0 && iFirstFastAlgo+1 != iMaxSpeedFastAlgo && iFirstFastAlgo+1 < NUMY && aRunAlgo[iFirstFastAlgo+1])
				712	printf("2nd fast algorithm is '%s' at speed %f X ; abs duration %f ms\n", convText[iFirstFastAlgo+1], aSpeedFactor[iFirstFastAlgo+1], 1000.0 * aDuration[iFirstFastAlgo+1]);
				713
				714	if ( 0 <= iMaxSpeedFastAlgo && iMaxSpeedFastAlgo < NUMY && aRunAlgo[iMaxSpeedFastAlgo] )
				715	{
				716	printf("fastest fast algorithm is '%s' at speed %f X ; abs duration %f ms\n", convText[iMaxSpeedFastAlgo], aSpeedFactor[iMaxSpeedFastAlgo], 1000.0 * aDuration[iMaxSpeedFastAlgo]);
				717	if ( 0 <= iMaxSpeedSlowAlgo && iMaxSpeedSlowAlgo < NUMY && aRunAlgo[iMaxSpeedSlowAlgo] )
				718	printf("fast / slow ratio: %f X\n", aSpeedFactor[iMaxSpeedFastAlgo] / aSpeedFactor[iMaxSpeedSlowAlgo] );
				719	}
				720	printf("\n");
				721	}
				722	else
				723	{
				724	for ( yc = 0; yc < NUMY; ++yc )
				725	{
				726	if (!aRunAlgo[yc] \|\| procSmpPerSec[yc] <= 0.0)
				727	continue;
hayati ayguen	e6cffc9	2020-02-28 19:57:30 +0100	[diff] [blame]	728	printf("algo '%s':\t%.2f MSmp\tin\t%.1f ms\t= %g kSmpPerSec\n",
				729	convText[yc], (double)outN[yc]/(1000.0 * 1000.0), 1000.0 * aDuration[yc], procSmpPerSec[yc] * 0.001 );
hayati ayguen	b2d2936	2020-01-02 00:06:09 +0100	[diff] [blame]	730	}
				731	}
				732
				733	}
				734	}
				735
				736
				737	for ( yc = 1; yc < NUMY; ++yc )
				738	{
				739	const float * Yref;
				740	const float * Ycurr;
				741	int outMin;
				742
				743	if (!aRunAlgo[yc])
				744	continue;
				745
				746	if (printDbg)
				747	printf("\n");
				748
				749	if ( outN[yc] == 0 )
				750	{
				751	printf("output size 0: '%s' not implemented\n", convText[yc]);
				752	}
				753	else if ( outN[0] != outN[yc] /* && aFastAlgo[yc] */ && testOutLen )
				754	{
				755	if (!iPrintedRefOutLen)
				756	{
				757	printf("reference output size = %" PRId64 ", delta to (cplx) input length = %" PRId64 " smp\n", outN[0], (len / cplxFactor) - outN[0]);
				758	iPrintedRefOutLen = 1;
				759	}
				760	printf("output size doesn't match!: ref (FILTERLEN %d) returned %" PRId64 " smp, '%s' returned %" PRId64 " smp : delta = %" PRId64 " smp\n",
				761	FILTERLEN, outN[0], convText[yc], outN[yc], outN[yc] - outN[0] );
				762	retErr = 1;
				763	}
				764
				765	posMaxErr = 0;
				766	maxErr = -1.0;
				767	Yref = Y[0];
				768	Ycurr = Y[yc];
				769	outMin = ( outN[yc] < outN[0] ) ? outN[yc] : outN[0];
				770	numErrOverLimit = 0;
				771	for ( i = 0; i < outMin; ++i )
				772	{
				773	if ( numErrOverLimit < 6 && fabs(Ycurr[i] - Yref[i]) >= yErrLimit )
				774	{
				775	printf("algo '%s': at %d: *ERROR* = %f, errLimit = %f, ref = %f, actual = %f\n",
				776	convText[yc], i, fabs(Ycurr[i] - Yref[i]), yErrLimit, Yref[i], Ycurr[i] );
				777	++numErrOverLimit;
				778	}
				779
				780	if ( fabs(Ycurr[i] - Yref[i]) > maxErr )
				781	{
				782	maxErr = fabsf(Ycurr[i] - Yref[i]);
				783	posMaxErr = i;
				784	}
				785	}
				786
				787	if ( printDbg \|\| (iMaxSpeedSlowAlgo == i) \|\| (iMaxSpeedFastAlgo == i) )
				788	printf("max difference for '%s' is %g at sample idx %d of max inp 4093-1 == %f %%\n", convText[yc], maxErr, posMaxErr, maxErr * 100.0 / 4092.0 );
				789	}
				790
				791	break;
				792	}
				793
				794	pffastconv_free(X);
				795	for ( i=0; i < NUMY; ++i)
				796	{
hayati ayguen	ca11241	2020-04-13 00:19:40 +0200	[diff] [blame]	797	if ( 1 \|\| i < 2 )
hayati ayguen	b2d2936	2020-01-02 00:06:09 +0100	[diff] [blame]	798	pffastconv_free( Y[i] );
				799	if (!aRunAlgo[i])
				800	continue;
				801	aDestroy[i]( aSetupCfg[i] );
				802	}
				803
				804	pffastconv_free(H);
				805
				806	return retErr;
				807	}
				808
hayati ayguen	ca11241	2020-04-13 00:19:40 +0200	[diff] [blame]	809	/* small functions inside pffft.c that will detect (compiler) bugs with respect to simd instructions */
				810	void validate_pffft_simd();
				811	int validate_pffft_simd_ex(FILE * DbgOut);
				812
hayati ayguen	b2d2936	2020-01-02 00:06:09 +0100	[diff] [blame]	813
				814	int main(int argc, char **argv)
				815	{
				816	int result = 0;
				817	int i, k, M, flagsA, flagsB, flagsC, testOutLen, printDbg, printSpeed;
				818	int testOutLens = 1, benchConv = 1, quickTest = 0, slowTest = 0;
				819	int testReal = 1, testCplx = 1, testSymetric = 0;
				820
				821	for ( i = 1; i < argc; ++i ) {
hayati ayguen	ca11241	2020-04-13 00:19:40 +0200	[diff] [blame]	822
				823	if (!strcmp(argv[i], "--test-simd")) {
				824	int numErrs = validate_pffft_simd_ex(stdout);
				825	fprintf( ( numErrs != 0 ? stderr : stdout ), "validate_pffft_simd_ex() returned %d errors!\n", numErrs);
				826	return ( numErrs > 0 ? 1 : 0 );
				827	}
				828
hayati ayguen	b2d2936	2020-01-02 00:06:09 +0100	[diff] [blame]	829	if (!strcmp(argv[i], "--no-len")) {
				830	testOutLens = 0;
				831	}
				832	else if (!strcmp(argv[i], "--no-bench")) {
				833	benchConv = 0;
				834	}
				835	else if (!strcmp(argv[i], "--quick")) {
				836	quickTest = 1;
				837	}
				838	else if (!strcmp(argv[i], "--slow")) {
				839	slowTest = 1;
				840	}
				841	else if (!strcmp(argv[i], "--real")) {
				842	testCplx = 0;
				843	}
				844	else if (!strcmp(argv[i], "--cplx")) {
				845	testReal = 0;
				846	}
				847	else if (!strcmp(argv[i], "--sym")) {
				848	testSymetric = 1;
				849	}
				850	else /* if (!strcmp(argv[i], "--help")) */ {
hayati ayguen	ca11241	2020-04-13 00:19:40 +0200	[diff] [blame]	851	printf("usage: %s [--test-simd] [--no-len] [--no-bench] [--quick\|--slow] [--real\|--cplx] [--sym]\n", argv[0]);
hayati ayguen	b2d2936	2020-01-02 00:06:09 +0100	[diff] [blame]	852	exit(1);
				853	}
				854	}
				855
				856
				857	if (testOutLens)
				858	{
				859	for ( k = 0; k < 3; ++k )
				860	{
				861	if ( (k == 0 && !testReal) \|\| (k > 0 && !testCplx) )
				862	continue;
				863	printf("\n\n==========\n");
				864	printf("testing %s %s output lengths ..\n", (k == 0 ? "real" : "cplx"), ( k == 0 ? "" : (k==1 ? "2x" : "single") ) );
				865	printf("==========\n");
				866	flagsA = (k == 0) ? 0 : PFFASTCONV_CPLX_INP_OUT;
				867	flagsB = flagsA \| ( testSymetric ? PFFASTCONV_SYMMETRIC : 0 );
				868	flagsC = flagsB \| PFFASTCONV_CPLX_SINGLE_FFT;
				869	testOutLen = 1;
				870	printDbg = 0;
				871	printSpeed = 0;
				872	for ( M = 128 - 4; M <= (quickTest ? 128+16 : 256); ++M )
				873	{
				874	if ( (M % 16) != 0 && testSymetric )
				875	continue;
				876	result \|= test(M, flagsB, testOutLen, printDbg, printSpeed);
				877	}
				878	}
				879	}
				880
				881	if (benchConv)
				882	{
				883	for ( k = 0; k < 3; ++k )
				884	{
				885	if ( (k == 0 && !testReal) \|\| (k > 0 && !testCplx) )
				886	continue;
				887	printf("\n\n==========\n");
				888	printf("starting %s %s benchmark against linear convolutions ..\n", (k == 0 ? "real" : "cplx"), ( k == 0 ? "" : (k==1 ? "2x" : "single") ) );
				889	printf("==========\n");
				890	flagsA = (k == 0) ? 0 : PFFASTCONV_CPLX_INP_OUT;
				891	flagsB = flagsA \| ( testSymetric ? PFFASTCONV_SYMMETRIC : 0 );
				892	flagsC = flagsB \| ( k == 2 ? PFFASTCONV_CPLX_SINGLE_FFT : 0 );
				893	testOutLen = 0;
				894	printDbg = 0;
				895	printSpeed = 1;
				896	if (!slowTest) {
				897	result \|= test( 32, flagsC, testOutLen, printDbg, printSpeed);
				898	result \|= test( 32+ 16, flagsC, testOutLen, printDbg, printSpeed);
				899	result \|= test( 64, flagsC, testOutLen, printDbg, printSpeed);
				900	result \|= test( 64+ 32, flagsC, testOutLen, printDbg, printSpeed);
				901	result \|= test(128, flagsC, testOutLen, printDbg, printSpeed);
				902	}
				903	if (!quickTest) {
				904	result \|= test(128+ 64, flagsC, testOutLen, printDbg, printSpeed);
				905	result \|= test(256, flagsC, testOutLen, printDbg, printSpeed);
				906	result \|= test(256+128, flagsC, testOutLen, printDbg, printSpeed);
				907	result \|= test(512, flagsC, testOutLen, printDbg, printSpeed);
				908	result \|= test(1024, flagsC, testOutLen, printDbg, printSpeed);
				909	}
				910	}
				911	}
				912
				913	return result;
				914	}
				915