Julien Pommier | 432b3e8 | 2013-01-12 19:28:03 +0100 | [diff] [blame] | 1 | /* Copyright (c) 2013 Julien Pommier ( pommier@modartt.com ) |
hayati ayguen | e6cffc9 | 2020-02-28 19:57:30 +0100 | [diff] [blame] | 2 | Copyright (c) 2020 Hayati Ayguen ( h_ayguen@web.de ) |
Julien Pommier | 370d209 | 2011-11-19 18:04:25 +0100 | [diff] [blame] | 3 | |
| 4 | Based on original fortran 77 code from FFTPACKv4 from NETLIB |
| 5 | (http://www.netlib.org/fftpack), authored by Dr Paul Swarztrauber |
| 6 | of NCAR, in 1985. |
| 7 | |
| 8 | As confirmed by the NCAR fftpack software curators, the following |
| 9 | FFTPACKv5 license applies to FFTPACKv4 sources. My changes are |
| 10 | released under the same terms. |
| 11 | |
| 12 | FFTPACK license: |
| 13 | |
| 14 | http://www.cisl.ucar.edu/css/software/fftpack5/ftpk.html |
| 15 | |
| 16 | Copyright (c) 2004 the University Corporation for Atmospheric |
| 17 | Research ("UCAR"). All rights reserved. Developed by NCAR's |
| 18 | Computational and Information Systems Laboratory, UCAR, |
| 19 | www.cisl.ucar.edu. |
| 20 | |
| 21 | Redistribution and use of the Software in source and binary forms, |
| 22 | with or without modification, is permitted provided that the |
| 23 | following conditions are met: |
| 24 | |
| 25 | - Neither the names of NCAR's Computational and Information Systems |
| 26 | Laboratory, the University Corporation for Atmospheric Research, |
| 27 | nor the names of its sponsors or contributors may be used to |
| 28 | endorse or promote products derived from this Software without |
| 29 | specific prior written permission. |
| 30 | |
| 31 | - Redistributions of source code must retain the above copyright |
| 32 | notices, this list of conditions, and the disclaimer below. |
| 33 | |
| 34 | - Redistributions in binary form must reproduce the above copyright |
| 35 | notice, this list of conditions, and the disclaimer below in the |
| 36 | documentation and/or other materials provided with the |
| 37 | distribution. |
| 38 | |
| 39 | THIS SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, |
| 40 | EXPRESS OR IMPLIED, INCLUDING, BUT NOT LIMITED TO THE WARRANTIES OF |
| 41 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND |
| 42 | NONINFRINGEMENT. IN NO EVENT SHALL THE CONTRIBUTORS OR COPYRIGHT |
| 43 | HOLDERS BE LIABLE FOR ANY CLAIM, INDIRECT, INCIDENTAL, SPECIAL, |
| 44 | EXEMPLARY, OR CONSEQUENTIAL DAMAGES OR OTHER LIABILITY, WHETHER IN AN |
| 45 | ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN |
| 46 | CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS WITH THE |
| 47 | SOFTWARE. |
| 48 | |
| 49 | |
| 50 | PFFFT : a Pretty Fast FFT. |
| 51 | |
| 52 | This file is largerly based on the original FFTPACK implementation, modified in |
| 53 | order to take advantage of SIMD instructions of modern CPUs. |
| 54 | */ |
| 55 | |
| 56 | /* |
| 57 | ChangeLog: |
| 58 | - 2011/10/02, version 1: This is the very first release of this file. |
| 59 | */ |
| 60 | |
hayati ayguen | 3673ac0 | 2019-12-22 07:09:56 +0100 | [diff] [blame] | 61 | #include "pffft.h" |
| 62 | |
Julien Pommier | 370d209 | 2011-11-19 18:04:25 +0100 | [diff] [blame] | 63 | /* detect compiler flavour */ |
| 64 | #if defined(_MSC_VER) |
| 65 | # define COMPILER_MSVC |
| 66 | #elif defined(__GNUC__) |
| 67 | # define COMPILER_GCC |
| 68 | #endif |
| 69 | |
meng ke | a82d034 | 2018-11-23 03:48:01 +0800 | [diff] [blame] | 70 | #include <stdlib.h> |
hayati ayguen | bc8d4a8 | 2019-12-25 01:27:33 +0100 | [diff] [blame] | 71 | #include <stdint.h> |
meng ke | a82d034 | 2018-11-23 03:48:01 +0800 | [diff] [blame] | 72 | #include <stdio.h> |
| 73 | #include <math.h> |
| 74 | #include <assert.h> |
| 75 | |
Julien Pommier | 370d209 | 2011-11-19 18:04:25 +0100 | [diff] [blame] | 76 | #if defined(COMPILER_GCC) |
Julien Pommier | 432b3e8 | 2013-01-12 19:28:03 +0100 | [diff] [blame] | 77 | # define ALWAYS_INLINE(return_type) inline return_type __attribute__ ((always_inline)) |
Julien Pommier | 370d209 | 2011-11-19 18:04:25 +0100 | [diff] [blame] | 78 | # define NEVER_INLINE(return_type) return_type __attribute__ ((noinline)) |
| 79 | # define RESTRICT __restrict |
| 80 | # define VLA_ARRAY_ON_STACK(type__, varname__, size__) type__ varname__[size__]; |
| 81 | #elif defined(COMPILER_MSVC) |
| 82 | # define ALWAYS_INLINE(return_type) __forceinline return_type |
| 83 | # define NEVER_INLINE(return_type) __declspec(noinline) return_type |
| 84 | # define RESTRICT __restrict |
Julien Pommier | 2a19584 | 2012-10-11 11:11:41 +0200 | [diff] [blame] | 85 | # define VLA_ARRAY_ON_STACK(type__, varname__, size__) type__ *varname__ = (type__*)_alloca(size__ * sizeof(type__)) |
Julien Pommier | 370d209 | 2011-11-19 18:04:25 +0100 | [diff] [blame] | 86 | #endif |
| 87 | |
| 88 | |
R. Martinho Fernandes | a57d697 | 2015-10-06 18:13:12 +0200 | [diff] [blame] | 89 | #ifdef COMPILER_MSVC |
meng ke | 2f55d81 | 2018-11-23 04:03:17 +0800 | [diff] [blame] | 90 | #pragma warning( disable : 4244 4305 4204 4456 ) |
R. Martinho Fernandes | a57d697 | 2015-10-06 18:13:12 +0200 | [diff] [blame] | 91 | #endif |
| 92 | |
Julien Pommier | 370d209 | 2011-11-19 18:04:25 +0100 | [diff] [blame] | 93 | /* |
| 94 | vector support macros: the rest of the code is independant of |
| 95 | SSE/Altivec/NEON -- adding support for other platforms with 4-element |
| 96 | vectors should be limited to these macros |
| 97 | */ |
hayati ayguen | ca11241 | 2020-04-13 00:19:40 +0200 | [diff] [blame] | 98 | #include "simd/pf_float.h" |
Julien Pommier | 370d209 | 2011-11-19 18:04:25 +0100 | [diff] [blame] | 99 | |
hayati ayguen | 01d26a7 | 2020-03-26 09:02:09 +0100 | [diff] [blame] | 100 | /* have code comparable with this definition */ |
| 101 | #define SETUP_STRUCT PFFFT_Setup |
| 102 | #define FUNC_NEW_SETUP pffft_new_setup |
| 103 | #define FUNC_DESTROY pffft_destroy_setup |
| 104 | #define FUNC_TRANSFORM_UNORDRD pffft_transform |
| 105 | #define FUNC_TRANSFORM_ORDERED pffft_transform_ordered |
| 106 | #define FUNC_ZREORDER pffft_zreorder |
| 107 | #define FUNC_ZCONVOLVE_ACCUMULATE pffft_zconvolve_accumulate |
| 108 | #define FUNC_ZCONVOLVE_NO_ACCU pffft_zconvolve_no_accu |
hayati ayguen | 01d26a7 | 2020-03-26 09:02:09 +0100 | [diff] [blame] | 109 | |
| 110 | #define FUNC_ALIGNED_MALLOC pffft_aligned_malloc |
| 111 | #define FUNC_ALIGNED_FREE pffft_aligned_free |
| 112 | #define FUNC_SIMD_SIZE pffft_simd_size |
hayati ayguen | ca11241 | 2020-04-13 00:19:40 +0200 | [diff] [blame] | 113 | #define FUNC_SIMD_ARCH pffft_simd_arch |
| 114 | #define FUNC_VALIDATE_SIMD_A validate_pffft_simd |
| 115 | #define FUNC_VALIDATE_SIMD_EX validate_pffft_simd_ex |
hayati ayguen | 01d26a7 | 2020-03-26 09:02:09 +0100 | [diff] [blame] | 116 | |
| 117 | #define FUNC_CPLX_FINALIZE pffft_cplx_finalize |
| 118 | #define FUNC_CPLX_PREPROCESS pffft_cplx_preprocess |
| 119 | #define FUNC_REAL_PREPROCESS_4X4 pffft_real_preprocess_4x4 |
| 120 | #define FUNC_REAL_PREPROCESS pffft_real_preprocess |
| 121 | #define FUNC_REAL_FINALIZE_4X4 pffft_real_finalize_4x4 |
| 122 | #define FUNC_REAL_FINALIZE pffft_real_finalize |
| 123 | #define FUNC_TRANSFORM_INTERNAL pffft_transform_internal |
| 124 | |
| 125 | #define FUNC_COS cosf |
| 126 | #define FUNC_SIN sinf |
| 127 | |
| 128 | |
hayati ayguen | 88918bb | 2020-03-26 22:34:46 +0100 | [diff] [blame] | 129 | #include "pffft_priv_impl.h" |
hayati ayguen | e6cffc9 | 2020-02-28 19:57:30 +0100 | [diff] [blame] | 130 | |
hayati ayguen | e6cffc9 | 2020-02-28 19:57:30 +0100 | [diff] [blame] | 131 | |