dario mambro | 5850463 | 2020-03-24 14:49:50 +0100 | [diff] [blame] | 1 | /* Copyright (c) 2013 Julien Pommier ( pommier@modartt.com ) |
hayati ayguen | 01d26a7 | 2020-03-26 09:02:09 +0100 | [diff] [blame] | 2 | Copyright (c) 2020 Hayati Ayguen ( h_ayguen@web.de ) |
dario mambro | c2be344 | 2020-03-26 19:40:30 +0100 | [diff] [blame] | 3 | Copyright (c) 2020 Dario Mambro ( dario.mambro@gmail.com ) |
dario mambro | 5850463 | 2020-03-24 14:49:50 +0100 | [diff] [blame] | 4 | |
| 5 | Based on original fortran 77 code from FFTPACKv4 from NETLIB |
| 6 | (http://www.netlib.org/fftpack), authored by Dr Paul Swarztrauber |
| 7 | of NCAR, in 1985. |
| 8 | |
| 9 | As confirmed by the NCAR fftpack software curators, the following |
| 10 | FFTPACKv5 license applies to FFTPACKv4 sources. My changes are |
| 11 | released under the same terms. |
| 12 | |
| 13 | FFTPACK license: |
| 14 | |
| 15 | http://www.cisl.ucar.edu/css/software/fftpack5/ftpk.html |
| 16 | |
| 17 | Copyright (c) 2004 the University Corporation for Atmospheric |
| 18 | Research ("UCAR"). All rights reserved. Developed by NCAR's |
| 19 | Computational and Information Systems Laboratory, UCAR, |
| 20 | www.cisl.ucar.edu. |
| 21 | |
| 22 | Redistribution and use of the Software in source and binary forms, |
| 23 | with or without modification, is permitted provided that the |
| 24 | following conditions are met: |
| 25 | |
| 26 | - Neither the names of NCAR's Computational and Information Systems |
| 27 | Laboratory, the University Corporation for Atmospheric Research, |
| 28 | nor the names of its sponsors or contributors may be used to |
| 29 | endorse or promote products derived from this Software without |
| 30 | specific prior written permission. |
| 31 | |
| 32 | - Redistributions of source code must retain the above copyright |
| 33 | notices, this list of conditions, and the disclaimer below. |
| 34 | |
| 35 | - Redistributions in binary form must reproduce the above copyright |
| 36 | notice, this list of conditions, and the disclaimer below in the |
| 37 | documentation and/or other materials provided with the |
| 38 | distribution. |
| 39 | |
| 40 | THIS SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, |
| 41 | EXPRESS OR IMPLIED, INCLUDING, BUT NOT LIMITED TO THE WARRANTIES OF |
| 42 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND |
| 43 | NONINFRINGEMENT. IN NO EVENT SHALL THE CONTRIBUTORS OR COPYRIGHT |
| 44 | HOLDERS BE LIABLE FOR ANY CLAIM, INDIRECT, INCIDENTAL, SPECIAL, |
| 45 | EXEMPLARY, OR CONSEQUENTIAL DAMAGES OR OTHER LIABILITY, WHETHER IN AN |
| 46 | ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN |
| 47 | CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS WITH THE |
| 48 | SOFTWARE. |
| 49 | |
| 50 | |
| 51 | PFFFT : a Pretty Fast FFT. |
| 52 | |
| 53 | This file is largerly based on the original FFTPACK implementation, modified in |
| 54 | order to take advantage of SIMD instructions of modern CPUs. |
| 55 | */ |
| 56 | |
| 57 | /* |
dario mambro | 5850463 | 2020-03-24 14:49:50 +0100 | [diff] [blame] | 58 | NOTE: This file is adapted from Julien Pommier's original PFFFT, |
| 59 | which works on 32 bit floating point precision using SSE instructions, |
| 60 | to work with 64 bit floating point precision using AVX instructions. |
| 61 | Author: Dario Mambro @ https://github.com/unevens/pffft |
| 62 | */ |
| 63 | |
hayati ayguen | 01d26a7 | 2020-03-26 09:02:09 +0100 | [diff] [blame] | 64 | #include "pffft_double.h" |
| 65 | |
dario mambro | 5850463 | 2020-03-24 14:49:50 +0100 | [diff] [blame] | 66 | /* detect compiler flavour */ |
| 67 | #if defined(_MSC_VER) |
| 68 | # define COMPILER_MSVC |
| 69 | #elif defined(__GNUC__) |
| 70 | # define COMPILER_GCC |
| 71 | #endif |
| 72 | |
| 73 | #ifdef COMPILER_MSVC |
| 74 | # define _USE_MATH_DEFINES |
| 75 | # include <malloc.h> |
dario mambro | 5850463 | 2020-03-24 14:49:50 +0100 | [diff] [blame] | 76 | #else |
| 77 | # include <alloca.h> |
| 78 | #endif |
| 79 | |
dario mambro | 5850463 | 2020-03-24 14:49:50 +0100 | [diff] [blame] | 80 | #include <stdlib.h> |
hayati ayguen | 01d26a7 | 2020-03-26 09:02:09 +0100 | [diff] [blame] | 81 | #include <stdint.h> |
dario mambro | 5850463 | 2020-03-24 14:49:50 +0100 | [diff] [blame] | 82 | #include <stdio.h> |
| 83 | #include <math.h> |
| 84 | #include <assert.h> |
| 85 | |
| 86 | #if defined(COMPILER_GCC) |
| 87 | # define ALWAYS_INLINE(return_type) inline return_type __attribute__ ((always_inline)) |
| 88 | # define NEVER_INLINE(return_type) return_type __attribute__ ((noinline)) |
| 89 | # define RESTRICT __restrict |
| 90 | # define VLA_ARRAY_ON_STACK(type__, varname__, size__) type__ varname__[size__]; |
| 91 | #elif defined(COMPILER_MSVC) |
| 92 | # define ALWAYS_INLINE(return_type) __forceinline return_type |
| 93 | # define NEVER_INLINE(return_type) __declspec(noinline) return_type |
| 94 | # define RESTRICT __restrict |
| 95 | # define VLA_ARRAY_ON_STACK(type__, varname__, size__) type__ *varname__ = (type__*)_alloca(size__ * sizeof(type__)) |
| 96 | #endif |
| 97 | |
| 98 | |
hayati ayguen | 01d26a7 | 2020-03-26 09:02:09 +0100 | [diff] [blame] | 99 | #ifdef COMPILER_MSVC |
| 100 | #pragma warning( disable : 4244 4305 4204 4456 ) |
| 101 | #endif |
| 102 | |
dario mambro | 5850463 | 2020-03-24 14:49:50 +0100 | [diff] [blame] | 103 | /* |
| 104 | vector support macros: the rest of the code is independant of |
| 105 | AVX -- adding support for other platforms with 4-element |
| 106 | vectors should be limited to these macros |
| 107 | */ |
hayati ayguen | ca11241 | 2020-04-13 00:19:40 +0200 | [diff] [blame] | 108 | #include "simd/pf_double.h" |
dario mambro | 5850463 | 2020-03-24 14:49:50 +0100 | [diff] [blame] | 109 | |
hayati ayguen | 01d26a7 | 2020-03-26 09:02:09 +0100 | [diff] [blame] | 110 | /* have code comparable with this definition */ |
| 111 | #define float double |
| 112 | #define SETUP_STRUCT PFFFTD_Setup |
| 113 | #define FUNC_NEW_SETUP pffftd_new_setup |
| 114 | #define FUNC_DESTROY pffftd_destroy_setup |
| 115 | #define FUNC_TRANSFORM_UNORDRD pffftd_transform |
| 116 | #define FUNC_TRANSFORM_ORDERED pffftd_transform_ordered |
| 117 | #define FUNC_ZREORDER pffftd_zreorder |
| 118 | #define FUNC_ZCONVOLVE_ACCUMULATE pffftd_zconvolve_accumulate |
| 119 | #define FUNC_ZCONVOLVE_NO_ACCU pffftd_zconvolve_no_accu |
dario mambro | 5850463 | 2020-03-24 14:49:50 +0100 | [diff] [blame] | 120 | |
hayati ayguen | 01d26a7 | 2020-03-26 09:02:09 +0100 | [diff] [blame] | 121 | #define FUNC_ALIGNED_MALLOC pffftd_aligned_malloc |
| 122 | #define FUNC_ALIGNED_FREE pffftd_aligned_free |
| 123 | #define FUNC_SIMD_SIZE pffftd_simd_size |
hayati ayguen | ca11241 | 2020-04-13 00:19:40 +0200 | [diff] [blame] | 124 | #define FUNC_SIMD_ARCH pffftd_simd_arch |
| 125 | #define FUNC_VALIDATE_SIMD_A validate_pffftd_simd |
| 126 | #define FUNC_VALIDATE_SIMD_EX validate_pffftd_simd_ex |
dario mambro | 5850463 | 2020-03-24 14:49:50 +0100 | [diff] [blame] | 127 | |
hayati ayguen | 01d26a7 | 2020-03-26 09:02:09 +0100 | [diff] [blame] | 128 | #define FUNC_CPLX_FINALIZE pffftd_cplx_finalize |
| 129 | #define FUNC_CPLX_PREPROCESS pffftd_cplx_preprocess |
| 130 | #define FUNC_REAL_PREPROCESS_4X4 pffftd_real_preprocess_4x4 |
| 131 | #define FUNC_REAL_PREPROCESS pffftd_real_preprocess |
| 132 | #define FUNC_REAL_FINALIZE_4X4 pffftd_real_finalize_4x4 |
| 133 | #define FUNC_REAL_FINALIZE pffftd_real_finalize |
| 134 | #define FUNC_TRANSFORM_INTERNAL pffftd_transform_internal |
dario mambro | 5850463 | 2020-03-24 14:49:50 +0100 | [diff] [blame] | 135 | |
hayati ayguen | 01d26a7 | 2020-03-26 09:02:09 +0100 | [diff] [blame] | 136 | #define FUNC_COS cos |
| 137 | #define FUNC_SIN sin |
dario mambro | 5850463 | 2020-03-24 14:49:50 +0100 | [diff] [blame] | 138 | |
dario mambro | 5850463 | 2020-03-24 14:49:50 +0100 | [diff] [blame] | 139 | |
hayati ayguen | 88918bb | 2020-03-26 22:34:46 +0100 | [diff] [blame] | 140 | #include "pffft_priv_impl.h" |
hayati ayguen | 01d26a7 | 2020-03-26 09:02:09 +0100 | [diff] [blame] | 141 | |
dario mambro | 5850463 | 2020-03-24 14:49:50 +0100 | [diff] [blame] | 142 | |