Blame - drivers/staging/echo/fir.h - kernel/msm-4.9

blob: 277d20e43f14c64420d460daf40daff361d2bbe3 [file] [log] [blame]

David Rowe	10602db	2008-10-06 21:41:46 -0700	[diff] [blame]	1	/*
				2	* SpanDSP - a series of DSP components for telephony
				3	*
				4	* fir.h - General telephony FIR routines
				5	*
				6	* Written by Steve Underwood <steveu@coppice.org>
				7	*
				8	* Copyright (C) 2002 Steve Underwood
				9	*
				10	* All rights reserved.
				11	*
				12	* This program is free software; you can redistribute it and/or modify
				13	* it under the terms of the GNU General Public License version 2, as
				14	* published by the Free Software Foundation.
				15	*
				16	* This program is distributed in the hope that it will be useful,
				17	* but WITHOUT ANY WARRANTY; without even the implied warranty of
				18	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
				19	* GNU General Public License for more details.
				20	*
				21	* You should have received a copy of the GNU General Public License
				22	* along with this program; if not, write to the Free Software
				23	* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
				24	*
				25	* $Id: fir.h,v 1.8 2006/10/24 13:45:28 steveu Exp $
				26	*/
				27
				28	/*! \page fir_page FIR filtering
				29	\section fir_page_sec_1 What does it do?
				30	???.
				31
				32	\section fir_page_sec_2 How does it work?
				33	???.
				34	*/
				35
				36	#if !defined(_FIR_H_)
				37	#define _FIR_H_
				38
				39	/*
				40	Blackfin NOTES & IDEAS:
				41
				42	A simple dot product function is used to implement the filter. This performs
				43	just one MAC/cycle which is inefficient but was easy to implement as a first
				44	pass. The current Blackfin code also uses an unrolled form of the filter
				45	history to avoid 0 length hardware loop issues. This is wasteful of
				46	memory.
				47
				48	Ideas for improvement:
				49
				50	1/ Rewrite filter for dual MAC inner loop. The issue here is handling
				51	history sample offsets that are 16 bit aligned - the dual MAC needs
				52	32 bit aligmnent. There are some good examples in libbfdsp.
				53
				54	2/ Use the hardware circular buffer facility tohalve memory usage.
				55
				56	3/ Consider using internal memory.
				57
				58	Using less memory might also improve speed as cache misses will be
				59	reduced. A drop in MIPs and memory approaching 50% should be
				60	possible.
				61
				62	The foreground and background filters currenlty use a total of
				63	about 10 MIPs/ch as measured with speedtest.c on a 256 TAP echo
				64	can.
				65	*/
				66
				67	#if defined(USE_MMX) \|\| defined(USE_SSE2)
				68	#include "mmx.h"
				69	#endif
				70
				71	/*!
				72	16 bit integer FIR descriptor. This defines the working state for a single
				73	instance of an FIR filter using 16 bit integer coefficients.
				74	*/
				75	typedef struct
				76	{
				77	int taps;
				78	int curr_pos;
				79	const int16_t *coeffs;
				80	int16_t *history;
				81	} fir16_state_t;
				82
				83	/*!
				84	32 bit integer FIR descriptor. This defines the working state for a single
				85	instance of an FIR filter using 32 bit integer coefficients, and filtering
				86	16 bit integer data.
				87	*/
				88	typedef struct
				89	{
				90	int taps;
				91	int curr_pos;
				92	const int32_t *coeffs;
				93	int16_t *history;
				94	} fir32_state_t;
				95
				96	/*!
				97	Floating point FIR descriptor. This defines the working state for a single
				98	instance of an FIR filter using floating point coefficients and data.
				99	*/
				100	typedef struct
				101	{
				102	int taps;
				103	int curr_pos;
				104	const float *coeffs;
				105	float *history;
				106	} fir_float_state_t;
				107
				108	#ifdef __cplusplus
				109	extern "C" {
				110	#endif
				111
				112	static __inline__ const int16_t fir16_create(fir16_state_t fir,
				113	const int16_t *coeffs,
				114	int taps)
				115	{
				116	fir->taps = taps;
				117	fir->curr_pos = taps - 1;
				118	fir->coeffs = coeffs;
Tzafrir Cohen	f55ccbf	2008-10-12 08:13:21 +0200	[diff] [blame^]	119	#if defined(USE_MMX) \|\| defined(USE_SSE2) \|\| defined(__bfin__)
David Rowe	10602db	2008-10-06 21:41:46 -0700	[diff] [blame]	120	if ((fir->history = malloc(2tapssizeof(int16_t))))
				121	memset(fir->history, 0, 2tapssizeof(int16_t));
				122	#else
				123	if ((fir->history = (int16_t ) malloc(tapssizeof(int16_t))))
				124	memset(fir->history, 0, taps*sizeof(int16_t));
				125	#endif
				126	return fir->history;
				127	}
				128	/- End of function --------------------------------------------------------/
				129
				130	static __inline__ void fir16_flush(fir16_state_t *fir)
				131	{
Tzafrir Cohen	f55ccbf	2008-10-12 08:13:21 +0200	[diff] [blame^]	132	#if defined(USE_MMX) \|\| defined(USE_SSE2) \|\| defined(__bfin__)
David Rowe	10602db	2008-10-06 21:41:46 -0700	[diff] [blame]	133	memset(fir->history, 0, 2fir->tapssizeof(int16_t));
				134	#else
				135	memset(fir->history, 0, fir->taps*sizeof(int16_t));
				136	#endif
				137	}
				138	/- End of function --------------------------------------------------------/
				139
				140	static __inline__ void fir16_free(fir16_state_t *fir)
				141	{
				142	free(fir->history);
				143	}
				144	/- End of function --------------------------------------------------------/
				145
Tzafrir Cohen	f55ccbf	2008-10-12 08:13:21 +0200	[diff] [blame^]	146	#ifdef __bfin__
David Rowe	10602db	2008-10-06 21:41:46 -0700	[diff] [blame]	147	static inline int32_t dot_asm(short x, short y, int len)
				148	{
				149	int dot;
				150
				151	len--;
				152
				153	__asm__
				154	(
				155	"I0 = %1;\n\t"
				156	"I1 = %2;\n\t"
				157	"A0 = 0;\n\t"
				158	"R0.L = W[I0++] \|\| R1.L = W[I1++];\n\t"
				159	"LOOP dot%= LC0 = %3;\n\t"
				160	"LOOP_BEGIN dot%=;\n\t"
				161	"A0 += R0.L * R1.L (IS) \|\| R0.L = W[I0++] \|\| R1.L = W[I1++];\n\t"
				162	"LOOP_END dot%=;\n\t"
				163	"A0 += R0.L*R1.L (IS);\n\t"
				164	"R0 = A0;\n\t"
				165	"%0 = R0;\n\t"
				166	: "=&d" (dot)
				167	: "a" (x), "a" (y), "a" (len)
				168	: "I0", "I1", "A1", "A0", "R0", "R1"
				169	);
				170
				171	return dot;
				172	}
				173	#endif
				174	/- End of function --------------------------------------------------------/
				175
				176	static __inline__ int16_t fir16(fir16_state_t *fir, int16_t sample)
				177	{
				178	int32_t y;
				179	#if defined(USE_MMX)
				180	int i;
				181	mmx_t *mmx_coeffs;
				182	mmx_t *mmx_hist;
				183
				184	fir->history[fir->curr_pos] = sample;
				185	fir->history[fir->curr_pos + fir->taps] = sample;
				186
				187	mmx_coeffs = (mmx_t *) fir->coeffs;
				188	mmx_hist = (mmx_t *) &fir->history[fir->curr_pos];
				189	i = fir->taps;
				190	pxor_r2r(mm4, mm4);
				191	/* 8 samples per iteration, so the filter must be a multiple of 8 long. */
				192	while (i > 0)
				193	{
				194	movq_m2r(mmx_coeffs[0], mm0);
				195	movq_m2r(mmx_coeffs[1], mm2);
				196	movq_m2r(mmx_hist[0], mm1);
				197	movq_m2r(mmx_hist[1], mm3);
				198	mmx_coeffs += 2;
				199	mmx_hist += 2;
				200	pmaddwd_r2r(mm1, mm0);
				201	pmaddwd_r2r(mm3, mm2);
				202	paddd_r2r(mm0, mm4);
				203	paddd_r2r(mm2, mm4);
				204	i -= 8;
				205	}
				206	movq_r2r(mm4, mm0);
				207	psrlq_i2r(32, mm0);
				208	paddd_r2r(mm0, mm4);
				209	movd_r2m(mm4, y);
				210	emms();
				211	#elif defined(USE_SSE2)
				212	int i;
				213	xmm_t *xmm_coeffs;
				214	xmm_t *xmm_hist;
				215
				216	fir->history[fir->curr_pos] = sample;
				217	fir->history[fir->curr_pos + fir->taps] = sample;
				218
				219	xmm_coeffs = (xmm_t *) fir->coeffs;
				220	xmm_hist = (xmm_t *) &fir->history[fir->curr_pos];
				221	i = fir->taps;
				222	pxor_r2r(xmm4, xmm4);
				223	/* 16 samples per iteration, so the filter must be a multiple of 16 long. */
				224	while (i > 0)
				225	{
				226	movdqu_m2r(xmm_coeffs[0], xmm0);
				227	movdqu_m2r(xmm_coeffs[1], xmm2);
				228	movdqu_m2r(xmm_hist[0], xmm1);
				229	movdqu_m2r(xmm_hist[1], xmm3);
				230	xmm_coeffs += 2;
				231	xmm_hist += 2;
				232	pmaddwd_r2r(xmm1, xmm0);
				233	pmaddwd_r2r(xmm3, xmm2);
				234	paddd_r2r(xmm0, xmm4);
				235	paddd_r2r(xmm2, xmm4);
				236	i -= 16;
				237	}
				238	movdqa_r2r(xmm4, xmm0);
				239	psrldq_i2r(8, xmm0);
				240	paddd_r2r(xmm0, xmm4);
				241	movdqa_r2r(xmm4, xmm0);
				242	psrldq_i2r(4, xmm0);
				243	paddd_r2r(xmm0, xmm4);
				244	movd_r2m(xmm4, y);
Tzafrir Cohen	f55ccbf	2008-10-12 08:13:21 +0200	[diff] [blame^]	245	#elif defined(__bfin__)
David Rowe	10602db	2008-10-06 21:41:46 -0700	[diff] [blame]	246	fir->history[fir->curr_pos] = sample;
				247	fir->history[fir->curr_pos + fir->taps] = sample;
				248	y = dot_asm((int16_t*)fir->coeffs, &fir->history[fir->curr_pos], fir->taps);
				249	#else
				250	int i;
				251	int offset1;
				252	int offset2;
				253
				254	fir->history[fir->curr_pos] = sample;
				255
				256	offset2 = fir->curr_pos;
				257	offset1 = fir->taps - offset2;
				258	y = 0;
				259	for (i = fir->taps - 1; i >= offset1; i--)
				260	y += fir->coeffs[i]*fir->history[i - offset1];
				261	for ( ; i >= 0; i--)
				262	y += fir->coeffs[i]*fir->history[i + offset2];
				263	#endif
				264	if (fir->curr_pos <= 0)
				265	fir->curr_pos = fir->taps;
				266	fir->curr_pos--;
				267	return (int16_t) (y >> 15);
				268	}
				269	/- End of function --------------------------------------------------------/
				270
				271	static __inline__ const int16_t fir32_create(fir32_state_t fir,
				272	const int32_t *coeffs,
				273	int taps)
				274	{
				275	fir->taps = taps;
				276	fir->curr_pos = taps - 1;
				277	fir->coeffs = coeffs;
				278	fir->history = (int16_t ) malloc(tapssizeof(int16_t));
				279	if (fir->history)
				280	memset(fir->history, '\0', taps*sizeof(int16_t));
				281	return fir->history;
				282	}
				283	/- End of function --------------------------------------------------------/
				284
				285	static __inline__ void fir32_flush(fir32_state_t *fir)
				286	{
				287	memset(fir->history, 0, fir->taps*sizeof(int16_t));
				288	}
				289	/- End of function --------------------------------------------------------/
				290
				291	static __inline__ void fir32_free(fir32_state_t *fir)
				292	{
				293	free(fir->history);
				294	}
				295	/- End of function --------------------------------------------------------/
				296
				297	static __inline__ int16_t fir32(fir32_state_t *fir, int16_t sample)
				298	{
				299	int i;
				300	int32_t y;
				301	int offset1;
				302	int offset2;
				303
				304	fir->history[fir->curr_pos] = sample;
				305	offset2 = fir->curr_pos;
				306	offset1 = fir->taps - offset2;
				307	y = 0;
				308	for (i = fir->taps - 1; i >= offset1; i--)
				309	y += fir->coeffs[i]*fir->history[i - offset1];
				310	for ( ; i >= 0; i--)
				311	y += fir->coeffs[i]*fir->history[i + offset2];
				312	if (fir->curr_pos <= 0)
				313	fir->curr_pos = fir->taps;
				314	fir->curr_pos--;
				315	return (int16_t) (y >> 15);
				316	}
				317	/- End of function --------------------------------------------------------/
				318
				319	#ifndef __KERNEL__
				320	static __inline__ const float fir_float_create(fir_float_state_t fir,
				321	const float *coeffs,
				322	int taps)
				323	{
				324	fir->taps = taps;
				325	fir->curr_pos = taps - 1;
				326	fir->coeffs = coeffs;
				327	fir->history = (float ) malloc(tapssizeof(float));
				328	if (fir->history)
				329	memset(fir->history, '\0', taps*sizeof(float));
				330	return fir->history;
				331	}
				332	/- End of function --------------------------------------------------------/
				333
				334	static __inline__ void fir_float_free(fir_float_state_t *fir)
				335	{
				336	free(fir->history);
				337	}
				338	/- End of function --------------------------------------------------------/
				339
				340	static __inline__ int16_t fir_float(fir_float_state_t *fir, int16_t sample)
				341	{
				342	int i;
				343	float y;
				344	int offset1;
				345	int offset2;
				346
				347	fir->history[fir->curr_pos] = sample;
				348
				349	offset2 = fir->curr_pos;
				350	offset1 = fir->taps - offset2;
				351	y = 0;
				352	for (i = fir->taps - 1; i >= offset1; i--)
				353	y += fir->coeffs[i]*fir->history[i - offset1];
				354	for ( ; i >= 0; i--)
				355	y += fir->coeffs[i]*fir->history[i + offset2];
				356	if (fir->curr_pos <= 0)
				357	fir->curr_pos = fir->taps;
				358	fir->curr_pos--;
				359	return (int16_t) y;
				360	}
				361	/- End of function --------------------------------------------------------/
				362	#endif
				363
				364	#ifdef __cplusplus
				365	}
				366	#endif
				367
				368	#endif
				369	/- End of file ------------------------------------------------------------/