Blame - pffft.h - platform/external/pffft

blob: 31bb731010b8d8bc6cd0273694b40dab1c36624d [file] [log] [blame]

Julien Pommier	432b3e8	2013-01-12 19:28:03 +0100	[diff] [blame]	1	/* Copyright (c) 2013 Julien Pommier ( pommier@modartt.com )
Julien Pommier	370d209	2011-11-19 18:04:25 +0100	[diff] [blame]	2
				3	Based on original fortran 77 code from FFTPACKv4 from NETLIB,
				4	authored by Dr Paul Swarztrauber of NCAR, in 1985.
				5
				6	As confirmed by the NCAR fftpack software curators, the following
				7	FFTPACKv5 license applies to FFTPACKv4 sources. My changes are
				8	released under the same terms.
				9
				10	FFTPACK license:
				11
				12	http://www.cisl.ucar.edu/css/software/fftpack5/ftpk.html
				13
				14	Copyright (c) 2004 the University Corporation for Atmospheric
				15	Research ("UCAR"). All rights reserved. Developed by NCAR's
				16	Computational and Information Systems Laboratory, UCAR,
				17	www.cisl.ucar.edu.
				18
				19	Redistribution and use of the Software in source and binary forms,
				20	with or without modification, is permitted provided that the
				21	following conditions are met:
				22
				23	- Neither the names of NCAR's Computational and Information Systems
				24	Laboratory, the University Corporation for Atmospheric Research,
				25	nor the names of its sponsors or contributors may be used to
				26	endorse or promote products derived from this Software without
				27	specific prior written permission.
				28
				29	- Redistributions of source code must retain the above copyright
				30	notices, this list of conditions, and the disclaimer below.
				31
				32	- Redistributions in binary form must reproduce the above copyright
				33	notice, this list of conditions, and the disclaimer below in the
				34	documentation and/or other materials provided with the
				35	distribution.
				36
				37	THIS SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
				38	EXPRESS OR IMPLIED, INCLUDING, BUT NOT LIMITED TO THE WARRANTIES OF
				39	MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
				40	NONINFRINGEMENT. IN NO EVENT SHALL THE CONTRIBUTORS OR COPYRIGHT
				41	HOLDERS BE LIABLE FOR ANY CLAIM, INDIRECT, INCIDENTAL, SPECIAL,
				42	EXEMPLARY, OR CONSEQUENTIAL DAMAGES OR OTHER LIABILITY, WHETHER IN AN
				43	ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
				44	CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS WITH THE
				45	SOFTWARE.
				46	*/
				47
				48	/*
				49	PFFFT : a Pretty Fast FFT.
				50
				51	This is basically an adaptation of the single precision fftpack
				52	(v4) as found on netlib taking advantage of SIMD instruction found
				53	on cpus such as intel x86 (SSE1), powerpc (Altivec), and arm (NEON).
				54
				55	For architectures where no SIMD instruction is available, the code
				56	falls back to a scalar version.
				57
				58	Restrictions:
				59
				60	- 1D transforms only, with 32-bit single precision.
				61
				62	- supports only transforms for inputs of length N of the form
Julien Pommier	0302e8a	2012-10-11 18:04:09 +0200	[diff] [blame]	63	N=(2^a)(3^b)(5^c), a >= 5, b >=0, c >= 0 (32, 48, 64, 96, 128,
				64	144, 160, etc are all acceptable lengths). Performance is best for
				65	128<=N<=8192.
Julien Pommier	370d209	2011-11-19 18:04:25 +0100	[diff] [blame]	66
				67	- all (float*) pointers in the functions below are expected to
				68	have an "simd-compatible" alignment, that is 16 bytes on x86 and
				69	powerpc CPUs.
				70
				71	You can allocate such buffers with the functions
				72	pffft_aligned_malloc / pffft_aligned_free (or with stuff like
				73	posix_memalign..)
				74
				75	*/
				76
				77	#ifndef PFFFT_H
				78	#define PFFFT_H
				79
hayati ayguen	c974c1d	2020-03-29 03:39:30 +0200	[diff] [blame]	80	#include <stddef.h> /* for size_t */
Julien Pommier	370d209	2011-11-19 18:04:25 +0100	[diff] [blame]	81
				82	#ifdef __cplusplus
				83	extern "C" {
				84	#endif
				85
				86	/* opaque struct holding internal stuff (precomputed twiddle factors)
				87	this struct can be shared by many threads as it contains only
				88	read-only data.
				89	*/
				90	typedef struct PFFFT_Setup PFFFT_Setup;
				91
hayati ayguen	01d26a7	2020-03-26 09:02:09 +0100	[diff] [blame]	92	#ifndef PFFFT_COMMON_ENUMS
				93	#define PFFFT_COMMON_ENUMS
				94
Julien Pommier	370d209	2011-11-19 18:04:25 +0100	[diff] [blame]	95	/* direction of the transform */
				96	typedef enum { PFFFT_FORWARD, PFFFT_BACKWARD } pffft_direction_t;
				97
				98	/* type of transform */
				99	typedef enum { PFFFT_REAL, PFFFT_COMPLEX } pffft_transform_t;
				100
hayati ayguen	01d26a7	2020-03-26 09:02:09 +0100	[diff] [blame]	101	#endif
				102
Julien Pommier	370d209	2011-11-19 18:04:25 +0100	[diff] [blame]	103	/*
				104	prepare for performing transforms of size N -- the returned
				105	PFFFT_Setup structure is read-only so it can safely be shared by
				106	multiple concurrent threads.
				107	*/
				108	PFFFT_Setup *pffft_new_setup(int N, pffft_transform_t transform);
				109	void pffft_destroy_setup(PFFFT_Setup *);
				110	/*
				111	Perform a Fourier transform , The z-domain data is stored in the
				112	most efficient order for transforming it back, or using it for
				113	convolution. If you need to have its content sorted in the
				114	"usual" way, that is as an array of interleaved complex numbers,
				115	either use pffft_transform_ordered , or call pffft_zreorder after
				116	the forward fft, and before the backward fft.
				117
				118	Transforms are not scaled: PFFFT_BACKWARD(PFFFT_FORWARD(x)) = N*x.
				119	Typically you will want to scale the backward transform by 1/N.
				120
				121	The 'work' pointer should point to an area of N (2*N for complex
				122	fft) floats, properly aligned. If 'work' is NULL, then stack will
Julien Pommier	432b3e8	2013-01-12 19:28:03 +0100	[diff] [blame]	123	be used instead (this is probably the best strategy for small
hayati ayguen	4807c2b	2020-01-10 00:00:17 +0100	[diff] [blame]	124	FFTs, say for N < 16384). Threads usually have a small stack, that
hayati ayguen	01d26a7	2020-03-26 09:02:09 +0100	[diff] [blame]	125	there's no sufficient amount of memory, usually leading to a crash!
hayati ayguen	4807c2b	2020-01-10 00:00:17 +0100	[diff] [blame]	126	Use the heap with pffft_aligned_malloc() in this case.
Julien Pommier	370d209	2011-11-19 18:04:25 +0100	[diff] [blame]	127
				128	input and output may alias.
				129	*/
hayati ayguen	01d26a7	2020-03-26 09:02:09 +0100	[diff] [blame]	130	void pffft_transform(PFFFT_Setup setup, const float input, float output, float work, pffft_direction_t direction);
Julien Pommier	370d209	2011-11-19 18:04:25 +0100	[diff] [blame]	131
				132	/*
				133	Similar to pffft_transform, but makes sure that the output is
				134	ordered as expected (interleaved complex numbers). This is
				135	similar to calling pffft_transform and then pffft_zreorder.
				136
				137	input and output may alias.
				138	*/
hayati ayguen	01d26a7	2020-03-26 09:02:09 +0100	[diff] [blame]	139	void pffft_transform_ordered(PFFFT_Setup setup, const float input, float output, float work, pffft_direction_t direction);
Julien Pommier	370d209	2011-11-19 18:04:25 +0100	[diff] [blame]	140
				141	/*
				142	call pffft_zreorder(.., PFFFT_FORWARD) after pffft_transform(...,
				143	PFFFT_FORWARD) if you want to have the frequency components in
				144	the correct "canonical" order, as interleaved complex numbers.
				145
				146	(for real transforms, both 0-frequency and half frequency
				147	components, which are real, are assembled in the first entry as
				148	F(0)+i*F(n/2+1). Note that the original fftpack did place
				149	F(n/2+1) at the end of the arrays).
				150
				151	input and output should not alias.
				152	*/
hayati ayguen	01d26a7	2020-03-26 09:02:09 +0100	[diff] [blame]	153	void pffft_zreorder(PFFFT_Setup setup, const float input, float *output, pffft_direction_t direction);
Julien Pommier	370d209	2011-11-19 18:04:25 +0100	[diff] [blame]	154
				155	/*
				156	Perform a multiplication of the frequency components of dft_a and
				157	dft_b and accumulate them into dft_ab. The arrays should have
				158	been obtained with pffft_transform(.., PFFFT_FORWARD) and should
				159	not have been reordered with pffft_zreorder (otherwise just
				160	perform the operation yourself as the dft coefs are stored as
				161	interleaved complex numbers).
				162
				163	the operation performed is: dft_ab += (dft_a * fdt_b)*scaling
				164
				165	The dft_a, dft_b and dft_ab pointers may alias.
				166	*/
hayati ayguen	01d26a7	2020-03-26 09:02:09 +0100	[diff] [blame]	167	void pffft_zconvolve_accumulate(PFFFT_Setup setup, const float dft_a, const float dft_b, float dft_ab, float scaling);
Julien Pommier	370d209	2011-11-19 18:04:25 +0100	[diff] [blame]	168
hayati ayguen	e6cffc9	2020-02-28 19:57:30 +0100	[diff] [blame]	169	/*
				170	Perform a multiplication of the frequency components of dft_a and
				171	dft_b and put result in dft_ab. The arrays should have
				172	been obtained with pffft_transform(.., PFFFT_FORWARD) and should
				173	not have been reordered with pffft_zreorder (otherwise just
				174	perform the operation yourself as the dft coefs are stored as
				175	interleaved complex numbers).
hayati ayguen	eeb17fc	2020-04-13 04:02:07 +0200	[diff] [blame]	176
hayati ayguen	e6cffc9	2020-02-28 19:57:30 +0100	[diff] [blame]	177	the operation performed is: dft_ab = (dft_a * fdt_b)*scaling
hayati ayguen	eeb17fc	2020-04-13 04:02:07 +0200	[diff] [blame]	178
hayati ayguen	e6cffc9	2020-02-28 19:57:30 +0100	[diff] [blame]	179	The dft_a, dft_b and dft_ab pointers may alias.
				180	*/
				181	void pffft_zconvolve_no_accu(PFFFT_Setup setup, const float dft_a, const float dft_b, float dft_ab, float scaling);
				182
hayati ayguen	eeb17fc	2020-04-13 04:02:07 +0200	[diff] [blame]	183	/* return 4 or 1 wether support SSE/NEON/Altivec instructions was enabled when building pffft.c */
				184	int pffft_simd_size();
				185
				186	/* return string identifier of used architecture (SSE/NEON/Altivec/..) */
				187	const char * pffft_simd_arch();
				188
				189
				190	/* following functions are identical to the pffftd_ functions */
				191
hayati ayguen	e6cffc9	2020-02-28 19:57:30 +0100	[diff] [blame]	192	/* simple helper to get minimum possible fft size */
				193	int pffft_min_fft_size(pffft_transform_t transform);
				194
				195	/* simple helper to determine next power of 2
				196	- without inexact/rounding floating point operations
				197	*/
				198	int pffft_next_power_of_two(int N);
				199
				200	/* simple helper to determine if power of 2 - returns bool */
				201	int pffft_is_power_of_two(int N);
				202
Julien Pommier	370d209	2011-11-19 18:04:25 +0100	[diff] [blame]	203	/*
				204	the float buffers must have the correct alignment (16-byte boundary
				205	on intel and powerpc). This function may be used to obtain such
				206	correctly aligned buffers.
				207	*/
				208	void *pffft_aligned_malloc(size_t nb_bytes);
				209	void pffft_aligned_free(void *);
				210
Julien Pommier	370d209	2011-11-19 18:04:25 +0100	[diff] [blame]	211	#ifdef __cplusplus
				212	}
				213	#endif
				214
hayati ayguen	01d26a7	2020-03-26 09:02:09 +0100	[diff] [blame]	215	#endif /* PFFFT_H */
				216