Blame - common/ih264_inter_pred_filters.c - platform/external/libavc

blob: 7d1e4079b65fe0751de561d724f78ef4fd567b5b [file] [log] [blame]

Hamsalekha S	8d3d303	2015-03-13 21:24:58 +0530	[diff] [blame]	1	/******************************************************************************
				2	*
				3	* Copyright (C) 2015 The Android Open Source Project
				4	*
				5	* Licensed under the Apache License, Version 2.0 (the "License");
				6	* you may not use this file except in compliance with the License.
				7	* You may obtain a copy of the License at:
				8	*
				9	* http://www.apache.org/licenses/LICENSE-2.0
				10	*
				11	* Unless required by applicable law or agreed to in writing, software
				12	* distributed under the License is distributed on an "AS IS" BASIS,
				13	* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
				14	* See the License for the specific language governing permissions and
				15	* limitations under the License.
				16	*
				17	*****************************************************************************
				18	* Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
				19	*/
				20	/**
				21	*******************************************************************************
				22	* @file
				23	* ih264_inter_pred_filters.c
				24	*
				25	* @brief
				26	* Contains function definitions for inter prediction interpolation filters
				27	*
				28	* @author
				29	* Ittiam
				30	*
				31	* @par List of Functions:
				32	* - ih264_inter_pred_luma_copy
				33	* - ih264_interleave_copy
				34	* - ih264_inter_pred_luma_horz
				35	* - ih264_inter_pred_luma_vert
				36	* - ih264_inter_pred_luma_horz_hpel_vert_hpel
				37	* - ih264_inter_pred_luma_horz_qpel
				38	* - ih264_inter_pred_luma_vert_qpel
				39	* - ih264_inter_pred_luma_horz_qpel_vert_qpel
				40	* - ih264_inter_pred_luma_horz_hpel_vert_qpel
				41	* - ih264_inter_pred_luma_horz_qpel_vert_hpel
				42	* - ih264_inter_pred_luma_bilinear
				43	* - ih264_inter_pred_chroma
				44	*
				45	* @remarks
				46	* None
				47	*
				48	*******************************************************************************
				49	*/
				50
				51	/*****************************************************************************/
				52	/* File Includes */
				53	/*****************************************************************************/
				54
				55	/* User include files */
				56	#include "ih264_typedefs.h"
				57	#include "ih264_macros.h"
				58	#include "ih264_platform_macros.h"
				59	#include "ih264_inter_pred_filters.h"
				60
				61
				62	/*****************************************************************************/
				63	/* Constant Data variables */
				64	/*****************************************************************************/
				65
				66	/* coefficients for 6 tap filtering*/
				67	const WORD32 ih264_g_six_tap[3] ={1,-5,20};
				68
				69
				70	/*****************************************************************************/
				71	/* Function definitions . */
				72	/*****************************************************************************/
				73	/**
				74	*******************************************************************************
				75	*
				76	* @brief
				77	* Interprediction luma function for copy
				78	*
				79	* @par Description:
				80	* Copies the array of width 'wd' and height 'ht' from the location pointed
				81	* by 'src' to the location pointed by 'dst'
				82	*
				83	* @param[in] pu1_src
				84	* UWORD8 pointer to the source
				85	*
				86	* @param[out] pu1_dst
				87	* UWORD8 pointer to the destination
				88	*
				89	* @param[in] src_strd
				90	* integer source stride
				91	*
				92	* @param[in] dst_strd
				93	* integer destination stride
				94	*
				95	*
				96	* @param[in] ht
				97	* integer height of the array
				98	*
				99	* @param[in] wd
				100	* integer width of the array
				101	*
				102	* @returns
				103	*
				104	* @remarks
				105	* None
				106	*
				107	*******************************************************************************
				108	*/
				109
				110	void ih264_inter_pred_luma_copy(UWORD8 *pu1_src,
				111	UWORD8 *pu1_dst,
				112	WORD32 src_strd,
				113	WORD32 dst_strd,
				114	WORD32 ht,
				115	WORD32 wd,
				116	UWORD8* pu1_tmp,
				117	WORD32 dydx)
				118	{
				119	WORD32 row, col;
				120	UNUSED(pu1_tmp);
				121	UNUSED(dydx);
				122	for(row = 0; row < ht; row++)
				123	{
				124	for(col = 0; col < wd; col++)
				125	{
				126	pu1_dst[col] = pu1_src[col];
				127	}
				128
				129	pu1_src += src_strd;
				130	pu1_dst += dst_strd;
				131	}
				132	}
				133
				134	/**
				135	*******************************************************************************
				136	*
				137	* @brief
				138	* Fucntion for copying to an interleaved destination
				139	*
				140	* @par Description:
				141	* Copies the array of width 'wd' and height 'ht' from the location pointed
				142	* by 'src' to the location pointed by 'dst'
				143	*
				144	* @param[in] pu1_src
				145	* UWORD8 pointer to the source
				146	*
				147	* @param[out] pu1_dst
				148	* UWORD8 pointer to the destination
				149	*
				150	* @param[in] src_strd
				151	* integer source stride
				152	*
				153	* @param[in] dst_strd
				154	* integer destination stride
				155	*
				156	* @param[in] ht
				157	* integer height of the array
				158	*
				159	* @param[in] wd
				160	* integer width of the array
				161	*
				162	* @returns
				163	*
				164	* @remarks
				165	* The alternate elements of src will be copied to alternate locations in dsr
				166	* Other locations are not touched
				167	*
				168	*******************************************************************************
				169	*/
				170	void ih264_interleave_copy(UWORD8 *pu1_src,
				171	UWORD8 *pu1_dst,
				172	WORD32 src_strd,
				173	WORD32 dst_strd,
				174	WORD32 ht,
				175	WORD32 wd)
				176	{
				177	WORD32 row, col;
				178	wd *= 2;
				179
				180	for(row = 0; row < ht; row++)
				181	{
				182	for(col = 0; col < wd; col+=2)
				183	{
				184	pu1_dst[col] = pu1_src[col];
				185	}
				186
				187	pu1_src += src_strd;
				188	pu1_dst += dst_strd;
				189	}
				190	}
				191
				192	/**
				193	*******************************************************************************
				194	*
				195	* @brief
				196	* Interprediction luma filter for horizontal input
				197	*
				198	* @par Description:
				199	* Applies a 6 tap horizontal filter .The output is clipped to 8 bits
				200	* sec 8.4.2.2.1 titled "Luma sample interpolation process"
				201	*
				202	* @param[in] pu1_src
				203	* UWORD8 pointer to the source
				204	*
				205	* @param[out] pu1_dst
				206	* UWORD8 pointer to the destination
				207	*
				208	* @param[in] src_strd
				209	* integer source stride
				210	*
				211	* @param[in] dst_strd
				212	* integer destination stride
				213	*
				214	* @param[in] ht
				215	* integer height of the array
				216	*
				217	* @param[in] wd
				218	* integer width of the array
				219	*
				220	* @returns
				221	*
				222	* @remarks
				223	* None
				224	*
				225	*******************************************************************************
				226	*/
				227	void ih264_inter_pred_luma_horz(UWORD8 *pu1_src,
				228	UWORD8 *pu1_dst,
				229	WORD32 src_strd,
				230	WORD32 dst_strd,
				231	WORD32 ht,
				232	WORD32 wd,
				233	UWORD8* pu1_tmp,
				234	WORD32 dydx)
				235	{
				236	WORD32 row, col;
				237	WORD16 i2_tmp;
				238	UNUSED(pu1_tmp);
				239	UNUSED(dydx);
				240
				241	for(row = 0; row < ht; row++)
				242	{
				243	for(col = 0; col < wd; col++)
				244	{
				245	i2_tmp = 0;/ih264_g_six_tap[] is the array containing the filter coeffs/
				246	i2_tmp = ih264_g_six_tap[0] *
				247	(pu1_src[col - 2] + pu1_src[col + 3])
				248	+ ih264_g_six_tap[1] *
				249	(pu1_src[col - 1] + pu1_src[col + 2])
				250	+ ih264_g_six_tap[2] *
				251	(pu1_src[col] + pu1_src[col + 1]);
				252	i2_tmp = (i2_tmp + 16) >> 5;
				253	pu1_dst[col] = CLIP_U8(i2_tmp);
				254	}
				255
				256	pu1_src += src_strd;
				257	pu1_dst += dst_strd;
				258	}
				259
				260	}
				261
				262	/**
				263	*******************************************************************************
				264	*
				265	* @brief
				266	* Interprediction luma filter for vertical input
				267	*
				268	* @par Description:
				269	* Applies a 6 tap vertical filter.The output is clipped to 8 bits
				270	* sec 8.4.2.2.1 titled "Luma sample interpolation process"
				271	*
				272	* @param[in] pu1_src
				273	* UWORD8 pointer to the source
				274	*
				275	* @param[out] pu1_dst
				276	* UWORD8 pointer to the destination
				277	*
				278	* @param[in] src_strd
				279	* integer source stride
				280	*
				281	* @param[in] dst_strd
				282	* integer destination stride
				283	*
				284	* @param[in] ht
				285	* integer height of the array
				286	*
				287	* @param[in] wd
				288	* integer width of the array
				289	*
				290	* @returns
				291	*
				292	* @remarks
				293	* None
				294	*
				295	*******************************************************************************
				296	*/
				297	void ih264_inter_pred_luma_vert(UWORD8 *pu1_src,
				298	UWORD8 *pu1_dst,
				299	WORD32 src_strd,
				300	WORD32 dst_strd,
				301	WORD32 ht,
				302	WORD32 wd,
				303	UWORD8* pu1_tmp,
				304	WORD32 dydx)
				305	{
				306	WORD32 row, col;
				307	WORD16 i2_tmp;
				308	UNUSED(pu1_tmp);
				309	UNUSED(dydx);
				310
				311	for(row = 0; row < ht; row++)
				312	{
				313	for(col = 0; col < wd; col++)
				314	{
				315	i2_tmp = 0; /ih264_g_six_tap[] is the array containing the filter coeffs/
				316	i2_tmp = ih264_g_six_tap[0] *
				317	(pu1_src[col - 2 * src_strd] + pu1_src[col + 3 * src_strd])
				318	+ ih264_g_six_tap[1] *
				319	(pu1_src[col - 1 * src_strd] + pu1_src[col + 2 * src_strd])
				320	+ ih264_g_six_tap[2] *
				321	(pu1_src[col] + pu1_src[col + 1 * src_strd]);
				322	i2_tmp = (i2_tmp + 16) >> 5;
				323	pu1_dst[col] = CLIP_U8(i2_tmp);
				324	}
				325	pu1_src += src_strd;
				326	pu1_dst += dst_strd;
				327	}
				328	}
				329
				330	/*!
				331	**************************************************************************
				332	* \if Function name : ih264_inter_pred_luma_horz_hpel_vert_hpel \endif
				333	*
				334	* \brief
				335	* This function implements a two stage cascaded six tap filter. It
				336	* applies the six tap filter in the horizontal direction on the
				337	* predictor values, followed by applying the same filter in the
				338	* vertical direction on the output of the first stage. The six tap
				339	* filtering operation is described in sec 8.4.2.2.1 titled "Luma sample
				340	* interpolation process"
				341	*
				342	* \param pu1_src: Pointer to the buffer containing the predictor values.
				343	* pu1_src could point to the frame buffer or the predictor buffer.
				344	* \param pu1_dst: Pointer to the destination buffer where the output of
				345	* the six tap filter is stored.
				346	* \param ht: Height of the rectangular pixel grid to be interpolated
				347	* \param wd: Width of the rectangular pixel grid to be interpolated
				348	* \param src_strd: Width of the buffer pointed to by pu1_src.
				349	* \param dst_strd: Width of the destination buffer
				350	* \param pu1_tmp: temporary buffer.
				351	* \param dydx: x and y reference offset for qpel calculations: UNUSED in this function.
				352	*
				353	* \return
				354	* None.
				355	*
				356	* \note
				357	* This function takes the 8 bit predictor values, applies the six tap
				358	* filter in the horizontal direction and outputs the result clipped to
				359	* 8 bit precision. The input is stored in the buffer pointed to by
				360	* pu1_src while the output is stored in the buffer pointed by pu1_dst.
				361	* Both pu1_src and pu1_dst could point to the same buffer i.e. the
				362	* six tap filter could be done in place.
				363	*
				364	**************************************************************************
				365	*/
				366	void ih264_inter_pred_luma_horz_hpel_vert_hpel(UWORD8 *pu1_src,
				367	UWORD8 *pu1_dst,
				368	WORD32 src_strd,
				369	WORD32 dst_strd,
				370	WORD32 ht,
				371	WORD32 wd,
				372	UWORD8* pu1_tmp,
				373	WORD32 dydx)
				374	{
				375	WORD32 row, col;
				376	WORD32 tmp;
				377	WORD16* pi2_pred1_temp;
				378	WORD16* pi2_pred1;
				379	UNUSED(dydx);
				380	pi2_pred1_temp = (WORD16*)pu1_tmp;
				381	pi2_pred1_temp += 2;
				382	pi2_pred1 = pi2_pred1_temp;
				383	for(row = 0; row < ht; row++)
				384	{
				385	for(col = -2; col < wd + 3; col++)
				386	{
				387	tmp = 0;/ih264_g_six_tap[] is the array containing the filter coeffs/
				388	tmp = ih264_g_six_tap[0] *
				389	(pu1_src[col - 2 * src_strd] + pu1_src[col + 3 * src_strd])
				390	+ ih264_g_six_tap[1] *
				391	(pu1_src[col - 1 * src_strd] + pu1_src[col + 2 * src_strd])
				392	+ ih264_g_six_tap[2] *
				393	(pu1_src[col] + pu1_src[col + 1 * src_strd]);
				394	pi2_pred1_temp[col] = tmp;
				395	}
				396	pu1_src += src_strd;
				397	pi2_pred1_temp = pi2_pred1_temp + wd + 5;
				398	}
				399
				400	for(row = 0; row < ht; row++)
				401	{
				402	for(col = 0; col < wd; col++)
				403	{
				404	tmp = 0;/ih264_g_six_tap[] is the array containing the filter coeffs/
				405	tmp = ih264_g_six_tap[0] *
				406	(pi2_pred1[col - 2] + pi2_pred1[col + 3])
				407	+ ih264_g_six_tap[1] *
				408	(pi2_pred1[col - 1] + pi2_pred1[col + 2])
				409	+ ih264_g_six_tap[2] * (pi2_pred1[col] + pi2_pred1[col + 1]);
				410	tmp = (tmp + 512) >> 10;
				411	pu1_dst[col] = CLIP_U8(tmp);
				412	}
				413	pi2_pred1 += (wd + 5);
				414	pu1_dst += dst_strd;
				415	}
				416	}
				417
				418	/*!
				419	**************************************************************************
				420	* \if Function name : ih264_inter_pred_luma_horz_qpel \endif
				421	*
				422	* \brief
				423	* This routine applies the six tap filter to the predictors in the
				424	* horizontal direction. The six tap filtering operation is described in
				425	* sec 8.4.2.2.1 titled "Luma sample interpolation process"
				426	*
				427	* \param pu1_src: Pointer to the buffer containing the predictor values.
				428	* pu1_src could point to the frame buffer or the predictor buffer.
				429	* \param pu1_dst: Pointer to the destination buffer where the output of
				430	* the six tap filter is stored.
				431	* \param ht: Height of the rectangular pixel grid to be interpolated
				432	* \param wd: Width of the rectangular pixel grid to be interpolated
				433	* \param src_strd: Width of the buffer pointed to by pu1_src.
				434	* \param dst_strd: Width of the destination buffer
				435	* \param pu1_tmp: temporary buffer: UNUSED in this function
				436	* \param dydx: x and y reference offset for qpel calculations.
				437	*
				438	* \return
				439	* None.
				440	*
				441	* \note
				442	* This function takes the 8 bit predictor values, applies the six tap
				443	* filter in the horizontal direction and outputs the result clipped to
				444	* 8 bit precision. The input is stored in the buffer pointed to by
				445	* pu1_src while the output is stored in the buffer pointed by pu1_dst.
				446	* Both pu1_src and pu1_dst could point to the same buffer i.e. the
				447	* six tap filter could be done in place.
				448	*
				449	**************************************************************************
				450	*/
				451	void ih264_inter_pred_luma_horz_qpel(UWORD8 *pu1_src,
				452	UWORD8 *pu1_dst,
				453	WORD32 src_strd,
				454	WORD32 dst_strd,
				455	WORD32 ht,
				456	WORD32 wd,
				457	UWORD8* pu1_tmp,
				458	WORD32 dydx)
				459	{
				460	WORD32 row, col;
				461	UWORD8 *pu1_pred1;
				462	WORD32 x_offset = dydx & 0x3;
				463	UNUSED(pu1_tmp);
				464	pu1_pred1 = pu1_src + (x_offset >> 1);
				465
				466	for(row = 0; row < ht; row++)
				467	{
				468	for(col = 0; col < wd; col++, pu1_src++, pu1_dst++)
				469	{
				470	WORD16 i2_temp;
				471	/* The logic below implements the following equation
				472	i2_temp = puc_pred[-2] - 5 * (puc_pred[-1] + puc_pred[2]) +
				473	20 * (puc_pred[0] + puc_pred[1]) + puc_pred[3]; */
				474	i2_temp = pu1_src[-2] + pu1_src[3]
				475	- (pu1_src[-1] + pu1_src[2])
				476	+ ((pu1_src[0] + pu1_src[1] - pu1_src[-1] - pu1_src[2]) << 2)
				477	+ ((pu1_src[0] + pu1_src[1]) << 4);
				478	i2_temp = (i2_temp + 16) >> 5;
				479	i2_temp = CLIP_U8(i2_temp);
				480	pu1_dst = (i2_temp + pu1_pred1 + 1) >> 1;
				481
				482	pu1_pred1++;
				483	}
				484	pu1_dst += dst_strd - wd;
				485	pu1_src += src_strd - wd;
				486	pu1_pred1 += src_strd - wd;
				487	}
				488	}
				489
				490	/*!
				491	**************************************************************************
				492	* \if Function name : ih264_inter_pred_luma_vert_qpel \endif
				493	*
				494	* \brief
				495	* This routine applies the six tap filter to the predictors in the
				496	* vertical direction and interpolates them to obtain pixels at quarter vertical
				497	* positions (0, 1/4) and (0, 3/4). The six tap filtering operation is
				498	* described in sec 8.4.2.2.1 titled "Luma sample interpolation process"
				499	*
				500	* \param pu1_src: Pointer to the buffer containing the predictor values.
				501	* pu1_src could point to the frame buffer or the predictor buffer.
				502	* \param pu1_dst: Pointer to the destination buffer where the output of
				503	* the six tap filter is stored.
				504	* \param ht: Height of the rectangular pixel grid to be interpolated
				505	* \param wd: Width of the rectangular pixel grid to be interpolated
				506	* \param src_strd: Width of the buffer pointed to by puc_pred.
				507	* \param dst_strd: Width of the destination buffer
				508	* \param pu1_tmp: temporary buffer: UNUSED in this function
				509	* \param dydx: x and y reference offset for qpel calculations.
				510	*
				511	* \return
				512	* void
				513	*
				514	* \note
				515	* This function takes the 8 bit predictor values, applies the six tap
				516	* filter in the vertical direction and outputs the result clipped to
				517	* 8 bit precision. The input is stored in the buffer pointed to by
				518	* puc_pred while the output is stored in the buffer pointed by puc_dest.
				519	* Both puc_pred and puc_dest could point to the same buffer i.e. the
				520	* six tap filter could be done in place.
				521	*
				522	* \para <title>
				523	* <paragraph>
				524	* ...
				525	**************************************************************************
				526	*/
				527	void ih264_inter_pred_luma_vert_qpel(UWORD8 *pu1_src,
				528	UWORD8 *pu1_dst,
				529	WORD32 src_strd,
				530	WORD32 dst_strd,
				531	WORD32 ht,
				532	WORD32 wd,
				533	UWORD8* pu1_tmp,
				534	WORD32 dydx)
				535	{
				536	WORD32 row, col;
				537	WORD32 y_offset = dydx >> 2;
				538	WORD32 off1, off2, off3;
				539	UWORD8 *pu1_pred1;
				540	UNUSED(pu1_tmp);
				541	y_offset = y_offset & 0x3;
				542
				543	off1 = src_strd;
				544	off2 = src_strd << 1;
				545	off3 = off1 + off2;
				546
				547	pu1_pred1 = pu1_src + (y_offset >> 1) * src_strd;
				548
				549	for(row = 0; row < ht; row++)
				550	{
				551	for(col = 0; col < wd; col++, pu1_dst++, pu1_src++, pu1_pred1++)
				552	{
				553	WORD16 i2_temp;
				554	/* The logic below implements the following equation
				555	i16_temp = puc_pred[-2src_strd] + puc_pred[3src_strd] -
				556	5 * (puc_pred[-1src_strd] + puc_pred[2src_strd]) +
				557	20 * (puc_pred[0] + puc_pred[src_strd]); */
				558	i2_temp = pu1_src[-off2] + pu1_src[off3]
				559	- (pu1_src[-off1] + pu1_src[off2])
				560	+ ((pu1_src[0] + pu1_src[off1] - pu1_src[-off1] - pu1_src[off2]) << 2)
				561	+ ((pu1_src[0] + pu1_src[off1]) << 4);
				562	i2_temp = (i2_temp + 16) >> 5;
				563	i2_temp = CLIP_U8(i2_temp);
				564
				565	pu1_dst = (i2_temp + pu1_pred1 + 1) >> 1;
				566	}
				567	pu1_src += src_strd - wd;
				568	pu1_pred1 += src_strd - wd;
				569	pu1_dst += dst_strd - wd;
				570	}
				571	}
				572
				573	/*!
				574	**************************************************************************
				575	* \if Function name : ih264_inter_pred_luma_horz_qpel_vert_qpel \endif
				576	*
				577	* \brief
				578	* This routine applies the six tap filter to the predictors in the
				579	* vertical and horizontal direction and averages them to get pixels at locations
				580	* (1/4,1/4), (1/4, 3/4), (3/4, 1/4) & (3/4, 3/4). The six tap filtering operation
				581	* is described in sec 8.4.2.2.1 titled "Luma sample interpolation process"
				582	*
				583	* \param pu1_src: Pointer to the buffer containing the predictor values.
				584	* pu1_src could point to the frame buffer or the predictor buffer.
				585	* \param pu1_dst: Pointer to the destination buffer where the output of
				586	* the six tap filter is stored.
				587	* \param wd: Width of the rectangular pixel grid to be interpolated
				588	* \param ht: Height of the rectangular pixel grid to be interpolated
				589	* \param src_strd: Width of the buffer pointed to by puc_pred.
				590	* \param dst_strd: Width of the destination buffer
				591	* \param pu1_tmp: temporary buffer, UNUSED in this function
				592	* \param dydx: x and y reference offset for qpel calculations.
				593	*
				594	* \return
				595	* void
				596	*
				597	* \note
				598	* This function takes the 8 bit predictor values, applies the six tap
				599	* filter in the vertical direction and outputs the result clipped to
				600	* 8 bit precision. The input is stored in the buffer pointed to by
				601	* puc_pred while the output is stored in the buffer pointed by puc_dest.
				602	* Both puc_pred and puc_dest could point to the same buffer i.e. the
				603	* six tap filter could be done in place.
				604	*
				605	* \para <title>
				606	* <paragraph>
				607	* ...
				608	**************************************************************************
				609	*/
				610	void ih264_inter_pred_luma_horz_qpel_vert_qpel(UWORD8 *pu1_src,
				611	UWORD8 *pu1_dst,
				612	WORD32 src_strd,
				613	WORD32 dst_strd,
				614	WORD32 ht,
				615	WORD32 wd,
				616	UWORD8* pu1_tmp,
				617	WORD32 dydx)
				618	{
				619	WORD32 row, col;
				620	WORD32 x_offset = dydx & 0x3;
				621	WORD32 y_offset = dydx >> 2;
				622
				623	WORD32 off1, off2, off3;
				624	UWORD8* pu1_pred_vert, *pu1_pred_horz;
				625	UNUSED(pu1_tmp);
				626	y_offset = y_offset & 0x3;
				627
				628	off1 = src_strd;
				629	off2 = src_strd << 1;
				630	off3 = off1 + off2;
				631
				632	pu1_pred_horz = pu1_src + (y_offset >> 1) * src_strd;
				633	pu1_pred_vert = pu1_src + (x_offset >> 1);
				634
				635	for(row = 0; row < ht; row++)
				636	{
				637	for(col = 0; col < wd;
				638	col++, pu1_dst++, pu1_pred_vert++, pu1_pred_horz++)
				639	{
				640	WORD16 i2_temp_vert, i2_temp_horz;
				641	/* The logic below implements the following equation
				642	i2_temp = puc_pred[-2src_strd] + puc_pred[3src_strd] -
				643	5 * (puc_pred[-1src_strd] + puc_pred[2src_strd]) +
				644	20 * (puc_pred[0] + puc_pred[src_strd]); */
				645	i2_temp_vert = pu1_pred_vert[-off2] + pu1_pred_vert[off3]
				646	- (pu1_pred_vert[-off1] + pu1_pred_vert[off2])
				647	+ ((pu1_pred_vert[0] + pu1_pred_vert[off1]
				648	- pu1_pred_vert[-off1]
				649	- pu1_pred_vert[off2]) << 2)
				650	+ ((pu1_pred_vert[0] + pu1_pred_vert[off1]) << 4);
				651	i2_temp_vert = (i2_temp_vert + 16) >> 5;
				652	i2_temp_vert = CLIP_U8(i2_temp_vert);
				653
				654	/* The logic below implements the following equation
				655	i16_temp = puc_pred[-2] - 5 * (puc_pred[-1] + puc_pred[2]) +
				656	20 * (puc_pred[0] + puc_pred[1]) + puc_pred[3]; */
				657	i2_temp_horz = pu1_pred_horz[-2] + pu1_pred_horz[3]
				658	- (pu1_pred_horz[-1] + pu1_pred_horz[2])
				659	+ ((pu1_pred_horz[0] + pu1_pred_horz[1]
				660	- pu1_pred_horz[-1]
				661	- pu1_pred_horz[2]) << 2)
				662	+ ((pu1_pred_horz[0] + pu1_pred_horz[1]) << 4);
				663	i2_temp_horz = (i2_temp_horz + 16) >> 5;
				664	i2_temp_horz = CLIP_U8(i2_temp_horz);
				665	*pu1_dst = (i2_temp_vert + i2_temp_horz + 1) >> 1;
				666	}
				667	pu1_pred_vert += (src_strd - wd);
				668	pu1_pred_horz += (src_strd - wd);
				669	pu1_dst += (dst_strd - wd);
				670	}
				671	}
				672
				673	/*!
				674	**************************************************************************
				675	* \if Function name : ih264_inter_pred_luma_horz_qpel_vert_hpel \endif
				676	*
				677	* \brief
				678	* This routine applies the six tap filter to the predictors in the vertical
				679	* and horizontal direction to obtain the pixel at (1/2,1/2). It then interpolates
				680	* pixel at (0,1/2) and (1/2,1/2) to obtain pixel at (1/4,1/2). Similarly for (3/4,1/2).
				681	* The six tap filtering operation is described in sec 8.4.2.2.1 titled
				682	* "Luma sample interpolation process"
				683	*
				684	* \param pu1_src: Pointer to the buffer containing the predictor values.
				685	* pu1_src could point to the frame buffer or the predictor buffer.
				686	* \param pu1_dst: Pointer to the destination buffer where the output of
				687	* the six tap filter followed by interpolation is stored.
				688	* \param wd: Width of the rectangular pixel grid to be interpolated
				689	* \param ht: Height of the rectangular pixel grid to be interpolated
				690	* \param src_strd: Width of the buffer pointed to by puc_pred.
				691	* \param dst_strd: Width of the destination buffer
				692	* \param pu1_tmp: buffer to store temporary output after 1st 6-tap filter.
				693	* \param dydx: x and y reference offset for qpel calculations.
				694	*
				695	* \return
				696	* void
				697	*
				698	* \note
				699	* This function takes the 8 bit predictor values, applies the six tap
				700	* filter in the vertical direction and outputs the result clipped to
				701	* 8 bit precision. The input is stored in the buffer pointed to by
				702	* puc_pred while the output is stored in the buffer pointed by puc_dest.
				703	* Both puc_pred and puc_dest could point to the same buffer i.e. the
				704	* six tap filter could be done in place.
				705	*
				706	* \para <title>
				707	* <paragraph>
				708	* ...
				709	**************************************************************************
				710	*/
				711	void ih264_inter_pred_luma_horz_qpel_vert_hpel(UWORD8 *pu1_src,
				712	UWORD8 *pu1_dst,
				713	WORD32 src_strd,
				714	WORD32 dst_strd,
				715	WORD32 ht,
				716	WORD32 wd,
				717	UWORD8* pu1_tmp,
				718	WORD32 dydx)
				719	{
				720	WORD32 row, col;
				721	WORD32 tmp;
				722	WORD16* pi2_pred1_temp, *pi2_pred1;
				723	UWORD8* pu1_dst_tmp;
				724	WORD32 x_offset = dydx & 0x3;
				725	WORD16 i2_macro;
				726
				727	pi2_pred1_temp = (WORD16*)pu1_tmp;
				728	pi2_pred1_temp += 2;
				729	pi2_pred1 = pi2_pred1_temp;
				730	pu1_dst_tmp = pu1_dst;
				731
				732	for(row = 0; row < ht; row++)
				733	{
				734	for(col = -2; col < wd + 3; col++)
				735	{
				736	tmp = 0;/ih264_g_six_tap[] is the array containing the filter coeffs/
				737	tmp = ih264_g_six_tap[0] *
				738	(pu1_src[col - 2 * src_strd] + pu1_src[col + 3 * src_strd])
				739	+ ih264_g_six_tap[1] *
				740	(pu1_src[col - 1 * src_strd] + pu1_src[col + 2 * src_strd])
				741	+ ih264_g_six_tap[2] *
				742	(pu1_src[col] + pu1_src[col + 1 * src_strd]);
				743	pi2_pred1_temp[col] = tmp;
				744	}
				745
				746	pu1_src += src_strd;
				747	pi2_pred1_temp = pi2_pred1_temp + wd + 5;
				748	}
				749
				750	pi2_pred1_temp = pi2_pred1;
				751	for(row = 0; row < ht; row++)
				752	{
				753	for(col = 0; col < wd; col++)
				754	{
				755	tmp = 0;/ih264_g_six_tap[] is the array containing the filter coeffs/
				756	tmp = ih264_g_six_tap[0] *
				757	(pi2_pred1[col - 2] + pi2_pred1[col + 3])
				758	+ ih264_g_six_tap[1] *
				759	(pi2_pred1[col - 1] + pi2_pred1[col + 2])
				760	+ ih264_g_six_tap[2] *
				761	(pi2_pred1[col] + pi2_pred1[col + 1]);
				762	tmp = (tmp + 512) >> 10;
				763	pu1_dst[col] = CLIP_U8(tmp);
				764	}
				765	pi2_pred1 += (wd + 5);
				766	pu1_dst += dst_strd;
				767	}
				768
				769	pu1_dst = pu1_dst_tmp;
				770	pi2_pred1_temp += (x_offset >> 1);
				771	for(row = ht; row != 0; row--)
				772	{
				773	for(col = wd; col != 0; col--, pu1_dst++, pi2_pred1_temp++)
				774	{
				775	UWORD8 uc_temp;
				776	/* Clipping the output of the six tap filter obtained from the
				777	first stage of the 2d filter stage */
				778	pi2_pred1_temp = (pi2_pred1_temp + 16) >> 5;
				779	i2_macro = (*pi2_pred1_temp);
				780	uc_temp = CLIP_U8(i2_macro);
				781	pu1_dst = (pu1_dst + uc_temp + 1) >> 1;
				782	}
				783	pi2_pred1_temp += 5;
				784	pu1_dst += dst_strd - wd;
				785	}
				786	}
				787
				788	/*!
				789	**************************************************************************
				790	* \if Function name : ih264_inter_pred_luma_horz_hpel_vert_qpel \endif
				791	*
				792	* \brief
				793	* This routine applies the six tap filter to the predictors in the horizontal
				794	* and vertical direction to obtain the pixel at (1/2,1/2). It then interpolates
				795	* pixel at (1/2,0) and (1/2,1/2) to obtain pixel at (1/2,1/4). Similarly for (1/2,3/4).
				796	* The six tap filtering operation is described in sec 8.4.2.2.1 titled
				797	* "Luma sample interpolation process"
				798	*
				799	* \param pu1_src: Pointer to the buffer containing the predictor values.
				800	* pu1_src could point to the frame buffer or the predictor buffer.
				801	* \param pu1_dst: Pointer to the destination buffer where the output of
				802	* the six tap filter followed by interpolation is stored.
				803	* \param wd: Width of the rectangular pixel grid to be interpolated
				804	* \param ht: Height of the rectangular pixel grid to be interpolated
				805	* \param src_strd: Width of the buffer pointed to by puc_pred.
				806	* \param dst_strd: Width of the destination buffer
				807	* \param pu1_tmp: buffer to store temporary output after 1st 6-tap filter.
				808	* \param dydx: x and y reference offset for qpel calculations.
				809	*
				810	* \return
				811	* void
				812	*
				813	* \note
				814	* This function takes the 8 bit predictor values, applies the six tap
				815	* filter in the vertical direction and outputs the result clipped to
				816	* 8 bit precision. The input is stored in the buffer pointed to by
				817	* puc_pred while the output is stored in the buffer pointed by puc_dest.
				818	* Both puc_pred and puc_dest could point to the same buffer i.e. the
				819	* six tap filter could be done in place.
				820	*
				821	* \para <title>
				822	* <paragraph>
				823	* ...
				824	**************************************************************************
				825	*/
				826	void ih264_inter_pred_luma_horz_hpel_vert_qpel(UWORD8 *pu1_src,
				827	UWORD8 *pu1_dst,
				828	WORD32 src_strd,
				829	WORD32 dst_strd,
				830	WORD32 ht,
				831	WORD32 wd,
				832	UWORD8* pu1_tmp,
				833	WORD32 dydx)
				834	{
				835
				836	WORD32 row, col;
				837	WORD32 tmp;
				838	WORD32 y_offset = dydx >> 2;
				839	WORD16* pi2_pred1_temp, *pi2_pred1;
				840	UWORD8* pu1_dst_tmp;
				841	//WORD32 x_offset = dydx & 0x3;
				842	WORD16 i2_macro;
				843
				844	y_offset = y_offset & 0x3;
				845
				846	pi2_pred1_temp = (WORD16*)pu1_tmp;
				847	pi2_pred1_temp += 2 * wd;
				848	pi2_pred1 = pi2_pred1_temp;
				849	pu1_dst_tmp = pu1_dst;
				850	pu1_src -= 2 * src_strd;
				851	for(row = -2; row < ht + 3; row++)
				852	{
				853	for(col = 0; col < wd; col++)
				854	{
				855	tmp = 0;/ih264_g_six_tap[] is the array containing the filter coeffs/
				856	tmp = ih264_g_six_tap[0] * (pu1_src[col - 2] + pu1_src[col + 3])
				857	+ ih264_g_six_tap[1] * (pu1_src[col - 1] + pu1_src[col + 2])
				858	+ ih264_g_six_tap[2] * (pu1_src[col] + pu1_src[col + 1]);
				859	pi2_pred1_temp[col - 2 * wd] = tmp;
				860	}
				861
				862	pu1_src += src_strd;
				863	pi2_pred1_temp += wd;
				864	}
				865	pi2_pred1_temp = pi2_pred1;
				866	for(row = 0; row < ht; row++)
				867	{
				868	for(col = 0; col < wd; col++)
				869	{
				870	tmp = 0;/ih264_g_six_tap[] is the array containing the filter coeffs/
				871	tmp = ih264_g_six_tap[0] * (pi2_pred1[col - 2 * wd] + pi2_pred1[col + 3 * wd])
				872	+ ih264_g_six_tap[1] * (pi2_pred1[col - 1 * wd] + pi2_pred1[col + 2 * wd])
				873	+ ih264_g_six_tap[2] * (pi2_pred1[col] + pi2_pred1[col + 1 * wd]);
				874	tmp = (tmp + 512) >> 10;
				875	pu1_dst[col] = CLIP_U8(tmp);
				876	}
				877	pi2_pred1 += wd;
				878	pu1_dst += dst_strd;
				879	}
				880	pu1_dst = pu1_dst_tmp;
				881	pi2_pred1_temp += (y_offset >> 1) * wd;
				882	for(row = ht; row != 0; row--)
				883
				884	{
				885	for(col = wd; col != 0; col--, pu1_dst++, pi2_pred1_temp++)
				886	{
				887	UWORD8 u1_temp;
				888	/* Clipping the output of the six tap filter obtained from the
				889	first stage of the 2d filter stage */
				890	pi2_pred1_temp = (pi2_pred1_temp + 16) >> 5;
				891	i2_macro = (*pi2_pred1_temp);
				892	u1_temp = CLIP_U8(i2_macro);
				893	pu1_dst = (pu1_dst + u1_temp + 1) >> 1;
				894	}
				895	//pi16_pred1_temp += wd;
				896	pu1_dst += dst_strd - wd;
				897	}
				898	}
				899
				900	/**
				901	*******************************************************************************
				902	* function:ih264_inter_pred_luma_bilinear
				903	*
				904	* @brief
				905	* This routine applies the bilinear filter to the predictors .
				906	* The filtering operation is described in
				907	* sec 8.4.2.2.1 titled "Luma sample interpolation process"
				908	*
				909	* @par Description:
				910	\note
				911	* This function is called to obtain pixels lying at the following
				912	* locations (1/4,1), (3/4,1),(1,1/4), (1,3/4) ,(1/4,1/2), (3/4,1/2),(1/2,1/4), (1/2,3/4),(3/4,1/4),(1/4,3/4),(3/4,3/4)&& (1/4,1/4) .
				913	* The function averages the two adjacent values from the two input arrays in horizontal direction.
				914	*
				915	*
				916	* @param[in] pu1_src1:
				917	* UWORD8 Pointer to the buffer containing the first input array.
				918	*
				919	* @param[in] pu1_src2:
				920	* UWORD8 Pointer to the buffer containing the second input array.
				921	*
				922	* @param[out] pu1_dst
				923	* UWORD8 pointer to the destination where the output of bilinear filter is stored.
				924	*
				925	* @param[in] src_strd1
				926	* Stride of the first input buffer
				927	*
				928	* @param[in] src_strd2
				929	* Stride of the second input buffer
				930	*
				931	* @param[in] dst_strd
				932	* integer destination stride of pu1_dst
				933	*
				934	* @param[in] ht
				935	* integer height of the array
				936	*
				937	* @param[in] wd
				938	* integer width of the array
				939	*
				940	* @returns
				941	*
				942	* @remarks
				943	* None
				944	*
				945	*******************************************************************************
				946	*/
				947	void ih264_inter_pred_luma_bilinear(UWORD8 *pu1_src1,
				948	UWORD8 *pu1_src2,
				949	UWORD8 *pu1_dst,
				950	WORD32 src_strd1,
				951	WORD32 src_strd2,
				952	WORD32 dst_strd,
				953	WORD32 ht,
				954	WORD32 wd)
				955	{
				956	WORD32 row, col;
				957	WORD16 i2_tmp;
				958
				959	for(row = 0; row < ht; row++)
				960	{
				961	for(col = 0; col < wd; col++)
				962	{
				963	i2_tmp = pu1_src1[col] + pu1_src2[col];
				964	i2_tmp = (i2_tmp + 1) >> 1;
				965	pu1_dst[col] = CLIP_U8(i2_tmp);
				966	}
				967	pu1_src1 += src_strd1;
				968	pu1_src2 += src_strd2;
				969	pu1_dst += dst_strd;
				970	}
				971
				972	}
				973
				974	/**
				975	*******************************************************************************
				976	*
				977	* @brief
				978	* Interprediction chroma filter
				979	*
				980	* @par Description:
				981	* Applies filtering to chroma samples as mentioned in
				982	* sec 8.4.2.2.2 titled "chroma sample interpolation process"
				983	*
				984	* @param[in] pu1_src
				985	* UWORD8 pointer to the source containing alternate U and V samples
				986	*
				987	* @param[out] pu1_dst
				988	* UWORD8 pointer to the destination
				989	*
				990	* @param[in] src_strd
				991	* integer source stride
				992	*
				993	* @param[in] dst_strd
				994	* integer destination stride
				995	*
				996	* @param[in] u1_dx
				997	* dx value where the sample is to be produced(refer sec 8.4.2.2.2 )
				998	*
				999	* @param[in] u1_dy
				1000	* dy value where the sample is to be produced(refer sec 8.4.2.2.2 )
				1001	*
				1002	* @param[in] ht
				1003	* integer height of the array
				1004	*
				1005	* @param[in] wd
				1006	* integer width of the array
				1007	*
				1008	* @returns
				1009	*
				1010	* @remarks
				1011	* None
				1012	*
				1013	*******************************************************************************
				1014	*/
				1015	void ih264_inter_pred_chroma(UWORD8 *pu1_src,
				1016	UWORD8 *pu1_dst,
				1017	WORD32 src_strd,
				1018	WORD32 dst_strd,
				1019	WORD32 dx,
				1020	WORD32 dy,
				1021	WORD32 ht,
				1022	WORD32 wd)
				1023	{
				1024	WORD32 row, col;
				1025	WORD16 i2_tmp;
				1026
				1027	for(row = 0; row < ht; row++)
				1028	{
				1029	for(col = 0; col < 2 * wd; col++)
				1030	{
				1031	i2_tmp = 0; /* applies equation (8-266) in section 8.4.2.2.2 */
				1032	i2_tmp = (8 - dx) * (8 - dy) * pu1_src[col]
				1033	+ (dx) * (8 - dy) * pu1_src[col + 2]
				1034	+ (8 - dx) * (dy) * (pu1_src + src_strd)[col]
				1035	+ (dx) * (dy) * (pu1_src + src_strd)[col + 2];
				1036	i2_tmp = (i2_tmp + 32) >> 6;
				1037	pu1_dst[col] = CLIP_U8(i2_tmp);
				1038	}
				1039	pu1_src += src_strd;
				1040	pu1_dst += dst_strd;
				1041	}
				1042	}