Blame - pffft_double.h - platform/external/pffft

blob: d83c06d4eba08d0943c80ae3721cee1183ac97d9 [file] [log] [blame]

dario mambro	5850463	2020-03-24 14:49:50 +0100	[diff] [blame]	1	/* Copyright (c) 2013 Julien Pommier ( pommier@modartt.com )
				2
				3	Based on original fortran 77 code from FFTPACKv4 from NETLIB,
				4	authored by Dr Paul Swarztrauber of NCAR, in 1985.
				5
				6	As confirmed by the NCAR fftpack software curators, the following
				7	FFTPACKv5 license applies to FFTPACKv4 sources. My changes are
				8	released under the same terms.
				9
				10	FFTPACK license:
				11
				12	http://www.cisl.ucar.edu/css/software/fftpack5/ftpk.html
				13
				14	Copyright (c) 2004 the University Corporation for Atmospheric
				15	Research ("UCAR"). All rights reserved. Developed by NCAR's
				16	Computational and Information Systems Laboratory, UCAR,
				17	www.cisl.ucar.edu.
				18
				19	Redistribution and use of the Software in source and binary forms,
				20	with or without modification, is permitted provided that the
				21	following conditions are met:
				22
				23	- Neither the names of NCAR's Computational and Information Systems
				24	Laboratory, the University Corporation for Atmospheric Research,
				25	nor the names of its sponsors or contributors may be used to
				26	endorse or promote products derived from this Software without
				27	specific prior written permission.
				28
				29	- Redistributions of source code must retain the above copyright
				30	notices, this list of conditions, and the disclaimer below.
				31
				32	- Redistributions in binary form must reproduce the above copyright
				33	notice, this list of conditions, and the disclaimer below in the
				34	documentation and/or other materials provided with the
				35	distribution.
				36
				37	THIS SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
				38	EXPRESS OR IMPLIED, INCLUDING, BUT NOT LIMITED TO THE WARRANTIES OF
				39	MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
				40	NONINFRINGEMENT. IN NO EVENT SHALL THE CONTRIBUTORS OR COPYRIGHT
				41	HOLDERS BE LIABLE FOR ANY CLAIM, INDIRECT, INCIDENTAL, SPECIAL,
				42	EXEMPLARY, OR CONSEQUENTIAL DAMAGES OR OTHER LIABILITY, WHETHER IN AN
				43	ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
				44	CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS WITH THE
				45	SOFTWARE.
				46	*/
				47	/*
				48	NOTE: This file is adapted from Julien Pommier's original PFFFT,
				49	which works on 32 bit floating point precision using SSE instructions,
				50	to work with 64 bit floating point precision using AVX instructions.
				51	Author: Dario Mambro @ https://github.com/unevens/pffft
				52	*/
				53	/*
				54	PFFFT : a Pretty Fast FFT.
				55
				56	This is basically an adaptation of the single precision fftpack
				57	(v4) as found on netlib taking advantage of SIMD instruction found
				58	on cpus such as intel x86 (SSE1), powerpc (Altivec), and arm (NEON).
				59
				60	For architectures where no SIMD instruction is available, the code
				61	falls back to a scalar version.
				62
				63	Restrictions:
				64
hayati ayguen	01d26a7	2020-03-26 09:02:09 +0100	[diff] [blame]	65	- 1D transforms only, with 64-bit double precision.
dario mambro	5850463	2020-03-24 14:49:50 +0100	[diff] [blame]	66
				67	- supports only transforms for inputs of length N of the form
				68	N=(2^a)(3^b)(5^c), a >= 5, b >=0, c >= 0 (32, 48, 64, 96, 128,
				69	144, 160, etc are all acceptable lengths). Performance is best for
				70	128<=N<=8192.
				71
				72	- all (double*) pointers in the functions below are expected to
				73	have an "simd-compatible" alignment, that is 32 bytes on x86 and
				74	powerpc CPUs.
				75
				76	You can allocate such buffers with the functions
				77	pffft_aligned_malloc / pffft_aligned_free (or with stuff like
				78	posix_memalign..)
				79
				80	*/
				81
				82	#ifndef PFFFT_DOUBLE_H
				83	#define PFFFT_DOUBLE_H
				84
hayati ayguen	c974c1d	2020-03-29 03:39:30 +0200	[diff] [blame]	85	#include <stddef.h> /* for size_t */
dario mambro	5850463	2020-03-24 14:49:50 +0100	[diff] [blame]	86
				87	#ifdef __cplusplus
				88	extern "C" {
				89	#endif
				90
				91	/* opaque struct holding internal stuff (precomputed twiddle factors)
				92	this struct can be shared by many threads as it contains only
				93	read-only data.
				94	*/
dario mambro	b42ce92	2020-03-24 20:30:39 +0100	[diff] [blame]	95	typedef struct PFFFTD_Setup PFFFTD_Setup;
dario mambro	5850463	2020-03-24 14:49:50 +0100	[diff] [blame]	96
hayati ayguen	01d26a7	2020-03-26 09:02:09 +0100	[diff] [blame]	97	#ifndef PFFFT_COMMON_ENUMS
				98	#define PFFFT_COMMON_ENUMS
				99
dario mambro	5850463	2020-03-24 14:49:50 +0100	[diff] [blame]	100	/* direction of the transform */
hayati ayguen	01d26a7	2020-03-26 09:02:09 +0100	[diff] [blame]	101	typedef enum { PFFFT_FORWARD, PFFFT_BACKWARD } pffft_direction_t;
dario mambro	5850463	2020-03-24 14:49:50 +0100	[diff] [blame]	102
				103	/* type of transform */
hayati ayguen	01d26a7	2020-03-26 09:02:09 +0100	[diff] [blame]	104	typedef enum { PFFFT_REAL, PFFFT_COMPLEX } pffft_transform_t;
				105
				106	#endif
dario mambro	5850463	2020-03-24 14:49:50 +0100	[diff] [blame]	107
				108	/*
				109	prepare for performing transforms of size N -- the returned
dario mambro	b42ce92	2020-03-24 20:30:39 +0100	[diff] [blame]	110	PFFFTD_Setup structure is read-only so it can safely be shared by
dario mambro	5850463	2020-03-24 14:49:50 +0100	[diff] [blame]	111	multiple concurrent threads.
				112	*/
hayati ayguen	01d26a7	2020-03-26 09:02:09 +0100	[diff] [blame]	113	PFFFTD_Setup *pffftd_new_setup(int N, pffft_transform_t transform);
dario mambro	b42ce92	2020-03-24 20:30:39 +0100	[diff] [blame]	114	void pffftd_destroy_setup(PFFFTD_Setup *);
dario mambro	5850463	2020-03-24 14:49:50 +0100	[diff] [blame]	115	/*
				116	Perform a Fourier transform , The z-domain data is stored in the
				117	most efficient order for transforming it back, or using it for
				118	convolution. If you need to have its content sorted in the
				119	"usual" way, that is as an array of interleaved complex numbers,
				120	either use pffft_transform_ordered , or call pffft_zreorder after
				121	the forward fft, and before the backward fft.
				122
				123	Transforms are not scaled: PFFFT_BACKWARD(PFFFT_FORWARD(x)) = N*x.
				124	Typically you will want to scale the backward transform by 1/N.
				125
				126	The 'work' pointer should point to an area of N (2*N for complex
				127	fft) doubles, properly aligned. If 'work' is NULL, then stack will
				128	be used instead (this is probably the best strategy for small
hayati ayguen	01d26a7	2020-03-26 09:02:09 +0100	[diff] [blame]	129	FFTs, say for N < 16384). Threads usually have a small stack, that
				130	there's no sufficient amount of memory, usually leading to a crash!
				131	Use the heap with pffft_aligned_malloc() in this case.
dario mambro	5850463	2020-03-24 14:49:50 +0100	[diff] [blame]	132
				133	input and output may alias.
				134	*/
hayati ayguen	01d26a7	2020-03-26 09:02:09 +0100	[diff] [blame]	135	void pffftd_transform(PFFFTD_Setup setup, const double input, double output, double work, pffft_direction_t direction);
dario mambro	5850463	2020-03-24 14:49:50 +0100	[diff] [blame]	136
				137	/*
				138	Similar to pffft_transform, but makes sure that the output is
				139	ordered as expected (interleaved complex numbers). This is
				140	similar to calling pffft_transform and then pffft_zreorder.
				141
				142	input and output may alias.
				143	*/
hayati ayguen	01d26a7	2020-03-26 09:02:09 +0100	[diff] [blame]	144	void pffftd_transform_ordered(PFFFTD_Setup setup, const double input, double output, double work, pffft_direction_t direction);
dario mambro	5850463	2020-03-24 14:49:50 +0100	[diff] [blame]	145
				146	/*
				147	call pffft_zreorder(.., PFFFT_FORWARD) after pffft_transform(...,
				148	PFFFT_FORWARD) if you want to have the frequency components in
				149	the correct "canonical" order, as interleaved complex numbers.
				150
				151	(for real transforms, both 0-frequency and half frequency
				152	components, which are real, are assembled in the first entry as
				153	F(0)+i*F(n/2+1). Note that the original fftpack did place
				154	F(n/2+1) at the end of the arrays).
				155
				156	input and output should not alias.
				157	*/
hayati ayguen	01d26a7	2020-03-26 09:02:09 +0100	[diff] [blame]	158	void pffftd_zreorder(PFFFTD_Setup setup, const double input, double *output, pffft_direction_t direction);
dario mambro	5850463	2020-03-24 14:49:50 +0100	[diff] [blame]	159
				160	/*
				161	Perform a multiplication of the frequency components of dft_a and
				162	dft_b and accumulate them into dft_ab. The arrays should have
				163	been obtained with pffft_transform(.., PFFFT_FORWARD) and should
				164	not have been reordered with pffft_zreorder (otherwise just
				165	perform the operation yourself as the dft coefs are stored as
				166	interleaved complex numbers).
				167
				168	the operation performed is: dft_ab += (dft_a * fdt_b)*scaling
				169
				170	The dft_a, dft_b and dft_ab pointers may alias.
				171	*/
dario mambro	b42ce92	2020-03-24 20:30:39 +0100	[diff] [blame]	172	void pffftd_zconvolve_accumulate(PFFFTD_Setup setup, const double dft_a, const double dft_b, double dft_ab, double scaling);
dario mambro	5850463	2020-03-24 14:49:50 +0100	[diff] [blame]	173
Dario Mambro	add5b1d	2020-03-27 20:02:03 +0100	[diff] [blame]	174	/*
hayati ayguen	eeb17fc	2020-04-13 04:02:07 +0200	[diff] [blame]	175	Perform a multiplication of the frequency components of dft_a and
				176	dft_b and put result in dft_ab. The arrays should have
				177	been obtained with pffft_transform(.., PFFFT_FORWARD) and should
				178	not have been reordered with pffft_zreorder (otherwise just
				179	perform the operation yourself as the dft coefs are stored as
				180	interleaved complex numbers).
				181
				182	the operation performed is: dft_ab = (dft_a * fdt_b)*scaling
				183
				184	The dft_a, dft_b and dft_ab pointers may alias.
Dario Mambro	add5b1d	2020-03-27 20:02:03 +0100	[diff] [blame]	185	*/
				186	void pffftd_zconvolve_no_accu(PFFFTD_Setup setup, const double dft_a, const double dft_b, doubledft_ab, double scaling);
				187
hayati ayguen	eeb17fc	2020-04-13 04:02:07 +0200	[diff] [blame]	188	/* return 4 or 1 wether support AVX instructions was enabled when building pffft-double.c */
				189	int pffftd_simd_size();
				190
				191	/* return string identifier of used architecture (AVX/..) */
				192	const char * pffftd_simd_arch();
				193
				194
				195	/* following functions are identical to the pffft_ functions */
Dario Mambro	add5b1d	2020-03-27 20:02:03 +0100	[diff] [blame]	196
dario mambro	5850463	2020-03-24 14:49:50 +0100	[diff] [blame]	197	/* simple helper to get minimum possible fft size */
hayati ayguen	01d26a7	2020-03-26 09:02:09 +0100	[diff] [blame]	198	int pffftd_min_fft_size(pffft_transform_t transform);
dario mambro	5850463	2020-03-24 14:49:50 +0100	[diff] [blame]	199
				200	/* simple helper to determine next power of 2
hayati ayguen	01d26a7	2020-03-26 09:02:09 +0100	[diff] [blame]	201	- without inexact/rounding floating point operations
hayati ayguen	653f77b	2020-03-25 07:46:31 +0100	[diff] [blame]	202	*/
dario mambro	b42ce92	2020-03-24 20:30:39 +0100	[diff] [blame]	203	int pffftd_next_power_of_two(int N);
dario mambro	5850463	2020-03-24 14:49:50 +0100	[diff] [blame]	204
				205	/* simple helper to determine if power of 2 - returns bool */
dario mambro	b42ce92	2020-03-24 20:30:39 +0100	[diff] [blame]	206	int pffftd_is_power_of_two(int N);
dario mambro	5850463	2020-03-24 14:49:50 +0100	[diff] [blame]	207
				208	/*
				209	the double buffers must have the correct alignment (32-byte boundary
				210	on intel and powerpc). This function may be used to obtain such
				211	correctly aligned buffers.
				212	*/
dario mambro	b42ce92	2020-03-24 20:30:39 +0100	[diff] [blame]	213	void *pffftd_aligned_malloc(size_t nb_bytes);
				214	void pffftd_aligned_free(void *);
dario mambro	5850463	2020-03-24 14:49:50 +0100	[diff] [blame]	215
dario mambro	5850463	2020-03-24 14:49:50 +0100	[diff] [blame]	216	#ifdef __cplusplus
				217	}
				218	#endif
				219
hayati ayguen	01d26a7	2020-03-26 09:02:09 +0100	[diff] [blame]	220	#endif /* PFFFT_DOUBLE_H */
				221