blob: ebbd4daff31228384fad846a0c6f7e9040ceb3e4 [file] [log] [blame]
hayati ayguenb2d29362020-01-02 00:06:09 +01001/* Copyright (c) 2019 Hayati Ayguen ( h_ayguen@web.de )
2
3 Redistribution and use of the Software in source and binary forms,
4 with or without modification, is permitted provided that the
5 following conditions are met:
6
7 - Neither the names of PFFFT, PFFASTCONV, nor the names of its
8 sponsors or contributors may be used to endorse or promote products
9 derived from this Software without specific prior written permission.
10
11 - Redistributions of source code must retain the above copyright
12 notices, this list of conditions, and the disclaimer below.
13
14 - Redistributions in binary form must reproduce the above copyright
15 notice, this list of conditions, and the disclaimer below in the
16 documentation and/or other materials provided with the
17 distribution.
18
19 THIS SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
20 EXPRESS OR IMPLIED, INCLUDING, BUT NOT LIMITED TO THE WARRANTIES OF
21 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
22 NONINFRINGEMENT. IN NO EVENT SHALL THE CONTRIBUTORS OR COPYRIGHT
23 HOLDERS BE LIABLE FOR ANY CLAIM, INDIRECT, INCIDENTAL, SPECIAL,
24 EXEMPLARY, OR CONSEQUENTIAL DAMAGES OR OTHER LIABILITY, WHETHER IN AN
25 ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
26 CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS WITH THE
27 SOFTWARE.
28*/
29
30/*
31 PFFASTCONV : a Pretty Fast Fast Convolution
32
33 This is basically the implementation of fast convolution,
34 utilizing the FFT (pffft).
35
36 Restrictions:
37
38 - 1D transforms only, with 32-bit single precision.
39
40 - all (float*) pointers in the functions below are expected to
41 have an "simd-compatible" alignment, that is 16 bytes on x86 and
42 powerpc CPUs.
43
44 You can allocate such buffers with the functions
45 pffft_aligned_malloc / pffft_aligned_free (or with stuff like
46 posix_memalign..)
47
48*/
49
50#ifndef PFFASTCONV_H
51#define PFFASTCONV_H
52
53#include <stddef.h> // for size_t
hayati ayguen55be34f2020-01-15 11:37:06 +010054#include "pffft.h"
55
hayati ayguenb2d29362020-01-02 00:06:09 +010056
57#ifdef __cplusplus
58extern "C" {
59#endif
60
61 /* opaque struct holding internal stuff
62 this struct can't be shared by many threads as it contains
63 temporary data, computed within the convolution
64 */
65 typedef struct PFFASTCONV_Setup PFFASTCONV_Setup;
66
67 typedef enum {
68 PFFASTCONV_CPLX_INP_OUT = 1,
69 /* set when input and output is complex,
70 * with real and imag part interleaved in both vectors.
71 * input[] has inputLen complex values: 2 * inputLen floats,
72 * output[] is also written with complex values.
73 * without this flag, the input is interpreted as real vector
74 */
75
76 PFFASTCONV_CPLX_FILTER = 2,
77 /* set when filterCoeffs is complex,
78 * with real and imag part interleaved.
79 * filterCoeffs[] has filterLen complex values: 2 * filterLen floats
80 * without this flag, the filter is interpreted as real vector
81 * ATTENTION: this is not implemented yet!
82 */
83
84 PFFASTCONV_DIRECT_INP = 4,
85 /* set PFFASTCONV_DIRECT_INP only, when following conditions are met:
86 * 1- input vecor X must be aligned
87 * 2- (all) inputLen <= ouput blockLen
88 * 3- X must have minimum length of output BlockLen
89 * 4- the additional samples from inputLen .. BlockLen-1
90 * must contain valid small and non-NAN samples (ideally zero)
91 *
92 * this option is ignored when PFFASTCONV_CPLX_INP_OUT is set
93 */
94
95 PFFASTCONV_DIRECT_OUT = 8,
96 /* set PFFASTCONV_DIRECT_OUT only when following conditions are met:
97 * 1- output vector Y must be aligned
98 * 2- (all) inputLen <= ouput blockLen
99 * 3- Y must have minimum length of output blockLen
100 *
101 * this option is ignored when PFFASTCONV_CPLX_INP_OUT is set
102 */
103
104 PFFASTCONV_CPLX_SINGLE_FFT = 16,
105 /* hint to process complex data with one single FFT;
106 * default is to use 2 FFTs: one for real part, one for imag part
107 * */
108
109
110 PFFASTCONV_SYMMETRIC = 32
111 /* just informal, that filter is symmetric .. and filterLen is multiple of 8 */
112
113 } pffastconv_flags_t;
114
115 /*
116 prepare for performing fast convolution(s) of 'filterLen' with input 'blockLen'.
117 The output 'blockLen' might be bigger to allow the fast convolution.
118
119 'flags' are bitmask over the 'pffastconv_flags_t' enum.
120
121 PFFASTCONV_Setup structure can't be shared accross multiple filters
122 or concurrent threads.
123 */
hayati ayguen55be34f2020-01-15 11:37:06 +0100124 PFFASTCONV_Setup * pffastconv_new_setup( const PFFFT_FLOAT * filterCoeffs, int filterLen, int * blockLen, int flags );
hayati ayguenb2d29362020-01-02 00:06:09 +0100125
126 void pffastconv_destroy_setup(PFFASTCONV_Setup *);
127
128 /*
129 Perform the fast convolution.
130
131 'input' and 'output' don't need to be aligned - unless any of
132 PFFASTCONV_DIRECT_INP or PFFASTCONV_DIRECT_OUT is set in 'flags'.
133
134 inputLen > output 'blockLen' (from pffastconv_new_setup()) is allowed.
135 in this case, multiple FFTs are called internally, to process the
136 input[].
137
138 'output' vector must have size >= (inputLen - filterLen + 1)
139
140 set bool option 'applyFlush' to process the full input[].
141 with this option, 'tail samples' of input are also processed.
142 This might be inefficient, because the FFT is called to produce
143 few(er) output samples, than possible.
144 This option is useful to process the last samples of an input (file)
145 or to reduce latency.
146
147 return value is the number of produced samples in output[].
148 the same amount of samples is processed from input[]. to continue
149 processing, the caller must save/move the remaining samples of
150 input[].
151
152 */
hayati ayguen55be34f2020-01-15 11:37:06 +0100153 int pffastconv_apply(PFFASTCONV_Setup * s, const PFFFT_FLOAT *input, int inputLen, PFFFT_FLOAT *output, int applyFlush);
hayati ayguenb2d29362020-01-02 00:06:09 +0100154
155 void *pffastconv_malloc(size_t nb_bytes);
156 void pffastconv_free(void *);
157
158 /* return 4 or 1 wether support SSE/Altivec instructions was enable when building pffft.c */
159 int pffastconv_simd_size();
160
161
162#ifdef __cplusplus
163}
164#endif
165
166#endif /* PFFASTCONV_H */