blob: 8bb2a65e978fcc9fc6f8a884d0812fc73fa8003e [file] [log] [blame]
hayati ayguenb2d29362020-01-02 00:06:09 +01001/*
hayati ayguenca112412020-04-13 00:19:40 +02002 Copyright (c) 2019 Hayati Ayguen ( h_ayguen@web.de )
hayati ayguenb2d29362020-01-02 00:06:09 +01003 */
4
5#include "pffastconv.h"
6#include "pffft.h"
7
hayati ayguenb2d29362020-01-02 00:06:09 +01008#include <stdlib.h>
9#include <stdint.h>
10#include <stdio.h>
11#include <math.h>
12#include <assert.h>
13#include <string.h>
14
15#define FASTCONV_DBG_OUT 0
16
17
hayati ayguenca112412020-04-13 00:19:40 +020018/* detect compiler flavour */
19#if defined(_MSC_VER)
hayati ayguenb2d29362020-01-02 00:06:09 +010020# define RESTRICT __restrict
hayati ayguenb2d29362020-01-02 00:06:09 +010021#pragma warning( disable : 4244 4305 4204 4456 )
hayati ayguenca112412020-04-13 00:19:40 +020022#elif defined(__GNUC__)
23# define RESTRICT __restrict
hayati ayguenb2d29362020-01-02 00:06:09 +010024#endif
25
hayati ayguenb2d29362020-01-02 00:06:09 +010026
hayati ayguenb2d29362020-01-02 00:06:09 +010027void *pffastconv_malloc(size_t nb_bytes)
28{
hayati ayguenca112412020-04-13 00:19:40 +020029 return pffft_aligned_malloc(nb_bytes);
hayati ayguenb2d29362020-01-02 00:06:09 +010030}
31
32void pffastconv_free(void *p)
33{
hayati ayguenca112412020-04-13 00:19:40 +020034 pffft_aligned_free(p);
hayati ayguenb2d29362020-01-02 00:06:09 +010035}
36
37int pffastconv_simd_size()
38{
hayati ayguenca112412020-04-13 00:19:40 +020039 return pffft_simd_size();
hayati ayguenb2d29362020-01-02 00:06:09 +010040}
41
42
43
44struct PFFASTCONV_Setup
45{
46 float * Xt; /* input == x in time domain - copy for alignment */
47 float * Xf; /* input == X in freq domain */
48 float * Hf; /* filterCoeffs == H in freq domain */
49 float * Mf; /* input * filterCoeffs in freq domain */
50 PFFFT_Setup *st;
51 int filterLen; /* convolution length */
52 int Nfft; /* FFT/block length */
53 int flags;
54 float scale;
55};
56
57
58PFFASTCONV_Setup * pffastconv_new_setup( const float * filterCoeffs, int filterLen, int * blockLen, int flags )
59{
60 PFFASTCONV_Setup * s = NULL;
61 const int cplxFactor = ( (flags & PFFASTCONV_CPLX_INP_OUT) && (flags & PFFASTCONV_CPLX_SINGLE_FFT) ) ? 2 : 1;
hayati ayguen3dab35c2020-02-27 05:13:21 +010062 const int minFftLen = 2*pffft_simd_size()*pffft_simd_size();
hayati ayguene6cffc92020-02-28 19:57:30 +010063 int i, Nfft = 2 * pffft_next_power_of_two(filterLen -1);
hayati ayguenb2d29362020-01-02 00:06:09 +010064#if FASTCONV_DBG_OUT
65 const int iOldBlkLen = *blockLen;
66#endif
67
hayati ayguen3dab35c2020-02-27 05:13:21 +010068 if ( Nfft < minFftLen )
69 Nfft = minFftLen;
70
hayati ayguenb2d29362020-01-02 00:06:09 +010071 if ( flags & PFFASTCONV_CPLX_FILTER )
72 return NULL;
73
74 s = pffastconv_malloc( sizeof(struct PFFASTCONV_Setup) );
75
76 if ( *blockLen > Nfft ) {
77 Nfft = *blockLen;
hayati ayguene6cffc92020-02-28 19:57:30 +010078 Nfft = pffft_next_power_of_two(Nfft);
hayati ayguenb2d29362020-01-02 00:06:09 +010079 }
80 *blockLen = Nfft; /* this is in (complex) samples */
81
82 Nfft *= cplxFactor;
83
84 if ( (flags & PFFASTCONV_DIRECT_INP) && !(flags & PFFASTCONV_CPLX_INP_OUT) )
85 s->Xt = NULL;
86 else
87 s->Xt = pffastconv_malloc((unsigned)Nfft * sizeof(float));
88 s->Xf = pffastconv_malloc((unsigned)Nfft * sizeof(float));
89 s->Hf = pffastconv_malloc((unsigned)Nfft * sizeof(float));
90 s->Mf = pffastconv_malloc((unsigned)Nfft * sizeof(float));
91 s->st = pffft_new_setup(Nfft, PFFFT_REAL); /* with complex: we do 2 x fft() */
92 s->filterLen = filterLen; /* filterLen == convolution length == length of impulse response */
93 if ( cplxFactor == 2 )
94 s->filterLen = 2 * filterLen - 1;
95 s->Nfft = Nfft; /* FFT/block length */
96 s->flags = flags;
97 s->scale = (float)( 1.0 / Nfft );
98
99 memset( s->Xt, 0, (unsigned)Nfft * sizeof(float) );
hayati ayguene6cffc92020-02-28 19:57:30 +0100100 if ( flags & PFFASTCONV_CORRELATION ) {
101 for ( i = 0; i < filterLen; ++i )
102 s->Xt[ ( Nfft - cplxFactor * i ) & (Nfft -1) ] = filterCoeffs[ i ];
103 } else {
104 for ( i = 0; i < filterLen; ++i )
105 s->Xt[ ( Nfft - cplxFactor * i ) & (Nfft -1) ] = filterCoeffs[ filterLen - 1 - i ];
106 }
hayati ayguenb2d29362020-01-02 00:06:09 +0100107
108 pffft_transform(s->st, s->Xt, s->Hf, /* tmp = */ s->Mf, PFFFT_FORWARD);
109
110#if FASTCONV_DBG_OUT
111 printf("\n fastConvSetup(filterLen = %d, blockLen %d) --> blockLen %d, OutLen = %d\n"
112 , filterLen, iOldBlkLen, *blockLen, Nfft - filterLen +1 );
113#endif
114
115 return s;
116}
117
118
119void pffastconv_destroy_setup( PFFASTCONV_Setup * s )
120{
121 if (!s)
122 return;
123 pffft_destroy_setup(s->st);
124 pffastconv_free(s->Mf);
125 pffastconv_free(s->Hf);
126 pffastconv_free(s->Xf);
127 if ( s->Xt )
128 pffastconv_free(s->Xt);
129 pffastconv_free(s);
130}
131
132
133int pffastconv_apply(PFFASTCONV_Setup * s, const float *input_, int cplxInputLen, float *output_, int applyFlush)
134{
135 const float * RESTRICT X = input_;
136 float * RESTRICT Y = output_;
137 const int Nfft = s->Nfft;
138 const int filterLen = s->filterLen;
139 const int flags = s->flags;
140 const int cplxFactor = ( (flags & PFFASTCONV_CPLX_INP_OUT) && (flags & PFFASTCONV_CPLX_SINGLE_FFT) ) ? 2 : 1;
141 const int inputLen = cplxFactor * cplxInputLen;
142 int inpOff, procLen, numOut = 0, j, part, cplxOff;
143
144 /* applyFlush != 0:
145 * inputLen - inpOff -filterLen + 1 > 0
146 * <=> inputLen -filterLen + 1 > inpOff
147 * <=> inpOff < inputLen -filterLen + 1
148 *
149 * applyFlush == 0:
150 * inputLen - inpOff >= Nfft
151 * <=> inputLen - Nfft >= inpOff
152 * <=> inpOff <= inputLen - Nfft
153 * <=> inpOff < inputLen - Nfft + 1
154 */
155
156 if ( cplxFactor == 2 )
157 {
158 const int maxOff = applyFlush ? (inputLen -filterLen + 1) : (inputLen - Nfft + 1);
159#if 0
160 printf( "*** inputLen %d, filterLen %d, Nfft %d => maxOff %d\n", inputLen, filterLen, Nfft, maxOff);
161#endif
162 for ( inpOff = 0; inpOff < maxOff; inpOff += numOut )
163 {
164 procLen = ( (inputLen - inpOff) >= Nfft ) ? Nfft : (inputLen - inpOff);
165 numOut = ( procLen - filterLen + 1 ) & ( ~1 );
166 if (!numOut)
167 break;
168#if 0
169 if (!inpOff)
170 printf("*** inpOff = %d, numOut = %d\n", inpOff, numOut);
171 if (inpOff + filterLen + 2 >= maxOff )
172 printf("*** inpOff = %d, inpOff + numOut = %d\n", inpOff, inpOff + numOut);
173#endif
174
175 if ( flags & PFFASTCONV_DIRECT_INP )
176 {
177 pffft_transform(s->st, X + inpOff, s->Xf, /* tmp = */ s->Mf, PFFFT_FORWARD);
178 }
179 else
180 {
181 memcpy( s->Xt, X + inpOff, (unsigned)procLen * sizeof(float) );
182 if ( procLen < Nfft )
183 memset( s->Xt + procLen, 0, (unsigned)(Nfft - procLen) * sizeof(float) );
184
185 pffft_transform(s->st, s->Xt, s->Xf, /* tmp = */ s->Mf, PFFFT_FORWARD);
186 }
187
hayati ayguene6cffc92020-02-28 19:57:30 +0100188 pffft_zconvolve_no_accu(s->st, s->Xf, s->Hf, /* tmp = */ s->Mf, s->scale);
hayati ayguenb2d29362020-01-02 00:06:09 +0100189
190 if ( flags & PFFASTCONV_DIRECT_OUT )
191 {
192 pffft_transform(s->st, s->Mf, Y + inpOff, s->Xf, PFFFT_BACKWARD);
193 }
194 else
195 {
196 pffft_transform(s->st, s->Mf, s->Xf, /* tmp = */ s->Xt, PFFFT_BACKWARD);
197 memcpy( Y + inpOff, s->Xf, (unsigned)numOut * sizeof(float) );
198 }
199 }
200 return inpOff / cplxFactor;
201 }
202 else
203 {
204 const int maxOff = applyFlush ? (inputLen -filterLen + 1) : (inputLen - Nfft + 1);
205 const int numParts = (flags & PFFASTCONV_CPLX_INP_OUT) ? 2 : 1;
206
207 for ( inpOff = 0; inpOff < maxOff; inpOff += numOut )
208 {
209 procLen = ( (inputLen - inpOff) >= Nfft ) ? Nfft : (inputLen - inpOff);
210 numOut = procLen - filterLen + 1;
211
212 for ( part = 0; part < numParts; ++part ) /* iterate per real/imag component */
213 {
214
215 if ( flags & PFFASTCONV_CPLX_INP_OUT )
216 {
217 cplxOff = 2 * inpOff + part;
218 for ( j = 0; j < procLen; ++j )
219 s->Xt[j] = X[cplxOff + 2 * j];
220 if ( procLen < Nfft )
221 memset( s->Xt + procLen, 0, (unsigned)(Nfft - procLen) * sizeof(float) );
hayati ayguenca112412020-04-13 00:19:40 +0200222
hayati ayguenb2d29362020-01-02 00:06:09 +0100223 pffft_transform(s->st, s->Xt, s->Xf, /* tmp = */ s->Mf, PFFFT_FORWARD);
224 }
225 else if ( flags & PFFASTCONV_DIRECT_INP )
226 {
227 pffft_transform(s->st, X + inpOff, s->Xf, /* tmp = */ s->Mf, PFFFT_FORWARD);
228 }
229 else
230 {
231 memcpy( s->Xt, X + inpOff, (unsigned)procLen * sizeof(float) );
232 if ( procLen < Nfft )
233 memset( s->Xt + procLen, 0, (unsigned)(Nfft - procLen) * sizeof(float) );
234
235 pffft_transform(s->st, s->Xt, s->Xf, /* tmp = */ s->Mf, PFFFT_FORWARD);
236 }
hayati ayguenca112412020-04-13 00:19:40 +0200237
hayati ayguene6cffc92020-02-28 19:57:30 +0100238 pffft_zconvolve_no_accu(s->st, s->Xf, s->Hf, /* tmp = */ s->Mf, s->scale);
hayati ayguenb2d29362020-01-02 00:06:09 +0100239
240 if ( flags & PFFASTCONV_CPLX_INP_OUT )
241 {
242 pffft_transform(s->st, s->Mf, s->Xf, /* tmp = */ s->Xt, PFFFT_BACKWARD);
243
244 cplxOff = 2 * inpOff + part;
245 for ( j = 0; j < numOut; ++j )
246 Y[ cplxOff + 2 * j ] = s->Xf[j];
247 }
248 else if ( flags & PFFASTCONV_DIRECT_OUT )
249 {
250 pffft_transform(s->st, s->Mf, Y + inpOff, s->Xf, PFFFT_BACKWARD);
251 }
252 else
253 {
254 pffft_transform(s->st, s->Mf, s->Xf, /* tmp = */ s->Xt, PFFFT_BACKWARD);
255 memcpy( Y + inpOff, s->Xf, (unsigned)numOut * sizeof(float) );
256 }
257
258 }
259 }
260
261 return inpOff;
262 }
263}
264