Blame - encoder/ime_distortion_metrics.c - platform/external/libavc

blob: f8c44df8e03a1b3592cc97c70a0480dce4be252c [file] [log] [blame]

Hamsalekha S	8d3d303	2015-03-13 21:24:58 +0530	[diff] [blame]	1	/******************************************************************************
				2	*
				3	* Copyright (C) 2015 The Android Open Source Project
				4	*
				5	* Licensed under the Apache License, Version 2.0 (the "License");
				6	* you may not use this file except in compliance with the License.
				7	* You may obtain a copy of the License at:
				8	*
				9	* http://www.apache.org/licenses/LICENSE-2.0
				10	*
				11	* Unless required by applicable law or agreed to in writing, software
				12	* distributed under the License is distributed on an "AS IS" BASIS,
				13	* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
				14	* See the License for the specific language governing permissions and
				15	* limitations under the License.
				16	*
				17	*****************************************************************************
				18	* Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
				19	*/
				20
				21	/**
				22	******************************************************************************
				23	* @file ih264e_distortion_metrics.c
				24	*
				25	* @brief
				26	* This file contains definitions of routines that compute distortion
				27	* between two macro/sub blocks of identical dimensions
				28	*
				29	* @author
				30	* Ittiam
				31	*
				32	* @par List of Functions:
				33	* - ime_sub_pel_compute_sad_16x16()
				34	* - ime_calculate_sad4_prog()
				35	* - ime_calculate_sad3_prog()
				36	* - ime_calculate_sad2_prog()
				37	* - ime_compute_sad_16x16()
				38	* - ime_compute_sad_16x16_fast()
				39	* - ime_compute_sad_16x16_ea8()
				40	* - ime_compute_sad_8x8()
				41	* - ime_compute_sad_4x4()
				42	* - ime_compute_sad_16x8()
				43	* - ime_compute_satqd_16x16_lumainter()
				44	* - ime_compute_satqd_8x16_chroma()
				45	* - ime_compute_satqd_16x16_lumaintra()
				46	*
				47	*
				48	* @remarks
				49	* None
				50	*
				51	*******************************************************************************
				52	*/
				53
				54	/*****************************************************************************/
				55	/* File Includes */
				56	/*****************************************************************************/
				57
				58	/* System include files */
				59	#include <stdio.h>
				60	#include <stdlib.h>
				61	#include <string.h>
				62
				63	/* User include files */
				64	#include "ime_typedefs.h"
				65	#include "ime_defs.h"
				66	#include "ime_macros.h"
				67	#include "ime_statistics.h"
				68	#include "ime_platform_macros.h"
				69	#include "ime_distortion_metrics.h"
				70
				71
				72	/*****************************************************************************/
				73	/* Function Definitions */
				74	/*****************************************************************************/
				75
				76	/**
				77	******************************************************************************
				78	*
				79	* @brief computes distortion (SAD) at all subpel points about the src location
				80	*
				81	* @par Description
				82	* This functions computes SAD at all points at a subpel distance from the
				83	* current source location.
				84	*
				85	* @param[in] pu1_src
				86	* UWORD8 pointer to the source
				87	*
				88	* @param[out] pu1_ref_half_x
				89	* UWORD8 pointer to half pel buffer
				90	*
				91	* @param[out] pu1_ref_half_y
				92	* UWORD8 pointer to half pel buffer
				93	*
				94	* @param[out] pu1_ref_half_xy
				95	* UWORD8 pointer to half pel buffer
				96	*
				97	* @param[in] src_strd
				98	* integer source stride
				99	*
				100	* @param[in] ref_strd
				101	* integer ref stride
				102	*
				103	* @param[out] pi4_sad
				104	* integer evaluated sad
				105	* pi4_sad[0] - half x
				106	* pi4_sad[1] - half x - 1
				107	* pi4_sad[2] - half y
				108	* pi4_sad[3] - half y - 1
				109	* pi4_sad[4] - half xy
				110	* pi4_sad[5] - half xy - 1
				111	* pi4_sad[6] - half xy - strd
				112	* pi4_sad[7] - half xy - 1 - strd
				113	*
				114	* @remarks
				115	*
				116	******************************************************************************
				117	*/
				118	void ime_sub_pel_compute_sad_16x16(UWORD8 *pu1_src,
				119	UWORD8 *pu1_ref_half_x,
				120	UWORD8 *pu1_ref_half_y,
				121	UWORD8 *pu1_ref_half_xy,
				122	WORD32 src_strd,
				123	WORD32 ref_strd,
				124	WORD32 *pi4_sad)
				125	{
				126	UWORD8 *pu1_ref_half_x_left = pu1_ref_half_x - 1;
				127	UWORD8 *pu1_ref_half_y_top = pu1_ref_half_y - ref_strd;
				128	UWORD8 *pu1_ref_half_xy_left = pu1_ref_half_xy - 1;
				129	UWORD8 *pu1_ref_half_xy_top = pu1_ref_half_xy - ref_strd;
				130	UWORD8 *pu1_ref_half_xy_top_left = pu1_ref_half_xy - ref_strd - 1;
				131
				132	WORD32 row, col;
				133
				134	memset(pi4_sad, 0, 8 * sizeof(WORD32));
				135
				136	for(row = 0; row < MB_SIZE; row++)
				137	{
				138	for(col = 0; col < MB_SIZE; col++)
				139	{
				140	WORD32 src;
				141	WORD32 diff;
				142
				143	src = pu1_src[col];
				144
				145	diff = src - pu1_ref_half_x[col];
				146	pi4_sad[0] += ABS(diff);
				147
				148	diff = src - pu1_ref_half_x_left[col];
				149	pi4_sad[1] += ABS(diff);
				150
				151	diff = src - pu1_ref_half_y[col];
				152	pi4_sad[2] += ABS(diff);
				153
				154	diff = src - pu1_ref_half_y_top[col];
				155	pi4_sad[3] += ABS(diff);
				156
				157	diff = src - pu1_ref_half_xy[col];
				158	pi4_sad[4] += ABS(diff);
				159
				160	diff = src - pu1_ref_half_xy_left[col];
				161	pi4_sad[5] += ABS(diff);
				162
				163	diff = src - pu1_ref_half_xy_top[col];
				164	pi4_sad[6] += ABS(diff);
				165
				166	diff = src - pu1_ref_half_xy_top_left[col];
				167	pi4_sad[7] += ABS(diff);
				168	}
				169
				170	pu1_src += src_strd;
				171
				172	pu1_ref_half_x += ref_strd;
				173	pu1_ref_half_x_left += ref_strd;
				174
				175	pu1_ref_half_y += ref_strd;
				176	pu1_ref_half_y_top += ref_strd;
				177
				178	pu1_ref_half_xy += ref_strd;
				179	pu1_ref_half_xy_left += ref_strd;
				180	pu1_ref_half_xy_top += ref_strd;
				181	pu1_ref_half_xy_top_left += ref_strd;
				182	}
				183	}
				184
				185	/**
				186	*******************************************************************************
				187	*
				188	* @brief compute sad
				189	*
				190	* @par Description: This function computes the sad at vertices of diamond grid
				191	* centered at reference pointer and at unit distance from it.
				192	*
				193	* @param[in] pu1_ref
				194	* UWORD8 pointer to the reference
				195	*
				196	* @param[out] pu1_src
				197	* UWORD8 pointer to the source
				198	*
				199	* @param[in] ref_strd
				200	* integer reference stride
				201	*
				202	* @param[in] src_strd
				203	* integer source stride
				204	*
				205	* @param[out] pi4_sad
				206	* pointer to integer array evaluated sad
				207	*
				208	* @returns sad at all evaluated vertexes
				209	*
				210	* @remarks none
				211	*
				212	*******************************************************************************
				213	*/
				214	void ime_calculate_sad4_prog(UWORD8 *pu1_ref,
				215	UWORD8 *pu1_src,
				216	WORD32 ref_strd,
				217	WORD32 src_strd,
				218	WORD32 *pi4_sad)
				219	{
				220
				221	/* reference ptrs at unit 1 distance in diamond pattern centered at pu1_ref */
				222	UWORD8 *left_ptr = pu1_ref - 1;
				223	UWORD8 *right_ptr = pu1_ref + 1;
				224	UWORD8 *top_ptr = pu1_ref - ref_strd;
				225	UWORD8 *bot_ptr = pu1_ref + ref_strd;
				226
				227	/* temp var */
				228	WORD32 count2, count3;
				229	UWORD32 u4_ref_buf_offset = ref_strd - MB_SIZE;
				230	UWORD32 u4_cur_buf_offset = src_strd - MB_SIZE;
				231
				232	memset(pi4_sad, 0, 4 * sizeof(WORD32));
				233
				234	for(count2 = MB_SIZE; count2 > 0; count2--)
				235	{
				236	for(count3 = MB_SIZE; count3 > 0 ; count3--)
				237	{
				238	WORD32 src;
				239	WORD32 diff;
				240
				241	src = *pu1_src++;
				242
				243	diff = src - *left_ptr++;
				244	pi4_sad[0] += ABS(diff);
				245
				246	diff = src - *right_ptr++;
				247	pi4_sad[1] += ABS(diff);
				248
				249	diff = src - *top_ptr++;
				250	pi4_sad[2] += ABS(diff);
				251
				252	diff = src - *bot_ptr++;
				253	pi4_sad[3] += ABS(diff);
				254	}
				255
				256	bot_ptr += u4_ref_buf_offset;
				257	left_ptr += u4_ref_buf_offset;
				258	right_ptr += u4_ref_buf_offset;
				259	top_ptr += u4_ref_buf_offset;
				260
				261	pu1_src += u4_cur_buf_offset;
				262	}
				263
				264	}
				265
				266	/**
				267	*******************************************************************************
				268	*
				269	* @brief compute sad
				270	*
				271	* @par Description: This function computes the sad at vertices of diamond grid
				272	* centered at reference pointer and at unit distance from it.
				273	*
				274	* @param[in] pu1_ref1, pu1_ref2, pu1_ref3
				275	* UWORD8 pointer to the reference
				276	*
				277	* @param[out] pu1_src
				278	* UWORD8 pointer to the source
				279	*
				280	* @param[in] ref_strd
				281	* integer reference stride
				282	*
				283	* @param[in] src_strd
				284	* integer source stride
				285	*
				286	* @param[out] pi4_sad
				287	* pointer to integer array evaluated sad
				288	*
				289	* @returns sad at all evaluated vertexes
				290	*
				291	* @remarks none
				292	*
				293	*******************************************************************************
				294	*/
				295	void ime_calculate_sad3_prog(UWORD8 *pu1_ref1,
				296	UWORD8 *pu1_ref2,
				297	UWORD8 *pu1_ref3,
				298	UWORD8 *pu1_src,
				299	WORD32 ref_strd,
				300	WORD32 src_strd,
				301	WORD32 *pi4_sad)
				302	{
				303	/* temp var */
				304	WORD32 i;
				305	UWORD32 u4_ref_buf_offset = ref_strd - MB_SIZE;
				306	UWORD32 u4_cur_buf_offset = src_strd - MB_SIZE;
				307
				308	for(i = 16; i > 0; i--)
				309	{
				310	USADA8(pu1_src, pu1_ref1, pi4_sad[0]);
				311	USADA8(pu1_src, pu1_ref2, pi4_sad[1]);
				312	USADA8(pu1_src, pu1_ref3, pi4_sad[2]);
				313	pu1_src += 4;
				314	pu1_ref1 += 4;
				315	pu1_ref2 += 4;
				316	pu1_ref3 += 4;
				317
				318	USADA8(pu1_src, pu1_ref1, pi4_sad[0]);
				319	USADA8(pu1_src, pu1_ref2, pi4_sad[1]);
				320	USADA8(pu1_src, pu1_ref3, pi4_sad[2]);
				321	pu1_src += 4;
				322	pu1_ref1 += 4;
				323	pu1_ref2 += 4;
				324	pu1_ref3 += 4;
				325
				326	USADA8(pu1_src, pu1_ref1, pi4_sad[0]);
				327	USADA8(pu1_src, pu1_ref2, pi4_sad[1]);
				328	USADA8(pu1_src, pu1_ref3, pi4_sad[2]);
				329	pu1_src += 4;
				330	pu1_ref1 += 4;
				331	pu1_ref2 += 4;
				332	pu1_ref3 += 4;
				333
				334	USADA8(pu1_src, pu1_ref1, pi4_sad[0]);
				335	USADA8(pu1_src, pu1_ref2, pi4_sad[1]);
				336	USADA8(pu1_src, pu1_ref3, pi4_sad[2]);
				337	pu1_src += 4;
				338	pu1_ref1 += 4;
				339	pu1_ref2 += 4;
				340	pu1_ref3 += 4;
				341
				342	pu1_src += u4_cur_buf_offset;
				343	pu1_ref1 += u4_ref_buf_offset;
				344	pu1_ref2 += u4_ref_buf_offset;
				345	pu1_ref3 += u4_ref_buf_offset;
				346	}
				347
				348	}
				349
				350	/**
				351	*******************************************************************************
				352	*
				353	* @brief compute sad
				354	*
				355	* @par Description: This function computes the sad at vertices of diamond grid
				356	* centered at reference pointer and at unit distance from it.
				357	*
				358	* @param[in] pu1_ref1, pu1_ref2
				359	* UWORD8 pointer to the reference
				360	*
				361	* @param[out] pu1_src
				362	* UWORD8 pointer to the source
				363	*
				364	* @param[in] ref_strd
				365	* integer reference stride
				366	*
				367	* @param[in] src_strd
				368	* integer source stride
				369	*
				370	* @param[out] pi4_sad
				371	* pointer to integer array evaluated sad
				372	*
				373	* @returns sad at all evaluated vertexes
				374	*
				375	* @remarks none
				376	*
				377	*******************************************************************************
				378	*/
				379	void ime_calculate_sad2_prog(UWORD8 *pu1_ref1,
				380	UWORD8 *pu1_ref2,
				381	UWORD8 *pu1_src,
				382	WORD32 ref_strd,
				383	WORD32 src_strd,
				384	WORD32 *pi4_sad)
				385	{
				386	/* temp var */
				387	WORD32 i;
				388	UWORD32 u4_ref_buf_offset = ref_strd - MB_SIZE;
				389	UWORD32 u4_cur_buf_offset = src_strd - MB_SIZE;
				390
				391	for(i = 16; i > 0; i--)
				392	{
				393	USADA8(pu1_src, pu1_ref1, pi4_sad[0]);
				394	USADA8(pu1_src, pu1_ref2, pi4_sad[1]);
				395	pu1_src += 4;
				396	pu1_ref1 += 4;
				397	pu1_ref2 += 4;
				398
				399	USADA8(pu1_src, pu1_ref1, pi4_sad[0]);
				400	USADA8(pu1_src, pu1_ref2, pi4_sad[1]);
				401	pu1_src += 4;
				402	pu1_ref1 += 4;
				403	pu1_ref2 += 4;
				404
				405	USADA8(pu1_src, pu1_ref1, pi4_sad[0]);
				406	USADA8(pu1_src, pu1_ref2, pi4_sad[1]);
				407	pu1_src += 4;
				408	pu1_ref1 += 4;
				409	pu1_ref2 += 4;
				410
				411	USADA8(pu1_src, pu1_ref1, pi4_sad[0]);
				412	USADA8(pu1_src, pu1_ref2, pi4_sad[1]);
				413	pu1_src += 4;
				414	pu1_ref1 += 4;
				415	pu1_ref2 += 4;
				416
				417	pu1_src += u4_cur_buf_offset;
				418	pu1_ref1 += u4_ref_buf_offset;
				419	pu1_ref2 += u4_ref_buf_offset;
				420	}
				421
				422	}
				423
				424	/**
				425	******************************************************************************
				426	*
				427	* @brief computes distortion (SAD) between 2 16x16 blocks
				428	*
				429	* @par Description
				430	* This functions computes SAD between 2 16x16 blocks. There is a provision
				431	* for early exit if the up-to computed SAD exceeds maximum allowed SAD. To
				432	* compute the distortion of the entire block set u4_max_sad to USHRT_MAX.
				433	*
				434	* @param[in] pu1_src
				435	* UWORD8 pointer to the source
				436	*
				437	* @param[out] pu1_dst
				438	* UWORD8 pointer to the destination
				439	*
				440	* @param[in] src_strd
				441	* integer source stride
				442	*
				443	* @param[in] dst_strd
				444	* integer destination stride
				445	*
				446	* @param[in] i4_max_sad
				447	* integer maximum allowed distortion
				448	*
				449	* @param[out] pi4_mb_distortion
				450	* integer evaluated sad
				451	*
				452	* @remarks
				453	*
				454	******************************************************************************
				455	*/
				456	void ime_compute_sad_16x16(UWORD8 *pu1_src,
				457	UWORD8 *pu1_est,
				458	WORD32 src_strd,
				459	WORD32 est_strd,
				460	WORD32 i4_max_sad,
				461	WORD32 *pi4_mb_distortion)
				462	{
				463	WORD32 i4_sad = 0;
				464	UWORD32 u4_src_offset = src_strd - 16;
				465	UWORD32 u4_est_offset = est_strd - 16;
				466	UWORD32 i;
				467
				468	GATHER_16x16_SAD_EE_STATS(gu4_16x16_sad_ee_stats, 16);
				469
				470	for(i = 16; i > 0; i--)
				471	{
				472	USADA8(pu1_src, pu1_est, i4_sad);
				473	pu1_src += 4;
				474	pu1_est += 4;
				475
				476	USADA8(pu1_src, pu1_est, i4_sad);
				477	pu1_src += 4;
				478	pu1_est += 4;
				479
				480	USADA8(pu1_src, pu1_est, i4_sad);
				481	pu1_src += 4;
				482	pu1_est += 4;
				483
				484	USADA8(pu1_src, pu1_est, i4_sad);
				485	pu1_src += 4;
				486	pu1_est += 4;
				487
				488	/* early exit */
				489	if(i4_max_sad < i4_sad)
				490	{
				491
				492	GATHER_16x16_SAD_EE_STATS(gu4_16x16_sad_ee_stats, 16-i);
				493
				494	*pi4_mb_distortion = i4_sad;
				495	return ;
				496	}
				497	pu1_src += u4_src_offset;
				498	pu1_est += u4_est_offset;
				499	}
				500
				501	*pi4_mb_distortion = i4_sad;
				502	return ;
				503	}
				504
				505	/**
				506	******************************************************************************
				507	*
				508	* @brief computes distortion (SAD) between 2 16x16 blocks (fast mode)
				509	*
				510	* @par Description
				511	* This functions computes SAD between 2 16x16 blocks. There is a provision
				512	* for early exit if the up-to computed SAD exceeds maximum allowed SAD. To
				513	* compute the distortion of the entire block set u4_max_sad to USHRT_MAX.
				514	*
				515	* @param[in] pu1_src
				516	* UWORD8 pointer to the source
				517	*
				518	* @param[out] pu1_dst
				519	* UWORD8 pointer to the destination
				520	*
				521	* @param[in] src_strd
				522	* integer source stride
				523	*
				524	* @param[in] dst_strd
				525	* integer destination stride
				526	*
				527	* @param[in] i4_max_sad
				528	* integer maximum allowed distortion
				529	*
				530	* @param[out] pi4_mb_distortion
				531	* integer evaluated sad
				532	*
				533	* @remarks
				534	*
				535	******************************************************************************
				536	*/
				537	void ime_compute_sad_16x16_fast(UWORD8 *pu1_src,
				538	UWORD8 *pu1_est,
				539	WORD32 src_strd,
				540	WORD32 est_strd,
				541	WORD32 i4_max_sad,
				542	WORD32 *pi4_mb_distortion)
				543	{
				544
				545	WORD32 i4_sad = 0;
				546	UWORD32 u4_src_offset = 2 * src_strd - 16;
				547	UWORD32 u4_est_offset = 2 * est_strd - 16;
				548	UWORD32 i;
				549
				550	UNUSED(i4_max_sad);
				551
				552	for(i = 16; i > 0; i-= 2)
				553	{
				554	USADA8(pu1_src, pu1_est, i4_sad);
				555	pu1_src += 4;
				556	pu1_est += 4;
				557
				558	USADA8(pu1_src, pu1_est, i4_sad);
				559	pu1_src += 4;
				560	pu1_est += 4;
				561
				562	USADA8(pu1_src, pu1_est, i4_sad);
				563	pu1_src += 4;
				564	pu1_est += 4;
				565
				566	USADA8(pu1_src, pu1_est, i4_sad);
				567	pu1_src += 4;
				568	pu1_est += 4;
				569
				570	pu1_src += u4_src_offset;
				571	pu1_est += u4_est_offset;
				572	}
				573
				574	*pi4_mb_distortion = (i4_sad << 1);
				575	return ;
				576	}
				577
				578	/**
				579	******************************************************************************
				580	*
				581	* @brief computes distortion (SAD) between 2 8x8 blocks
				582	*
				583	* @par Description
				584	* This functions computes SAD between 2 8x8 blocks. There is a provision
				585	* for early exit if the up-to computed SAD exceeds maximum allowed SAD. To
				586	* compute the distortion of the entire block set u4_max_sad to USHRT_MAX.
				587	*
				588	* @param[in] pu1_src
				589	* UWORD8 pointer to the source
				590	*
				591	* @param[out] pu1_dst
				592	* UWORD8 pointer to the destination
				593	*
				594	* @param[in] src_strd
				595	* integer source stride
				596	*
				597	* @param[in] dst_strd
				598	* integer destination stride
				599	*
				600	* @param[in] u4_max_sad
				601	* integer maximum allowed distortion
				602	*
				603	* @param[out] i4_sad
				604	* integer evaluated sad
				605	*
				606	* @remarks
				607	*
				608	******************************************************************************
				609	*/
				610
				611	void ime_compute_sad_8x8(UWORD8 *pu1_src,
				612	UWORD8 *pu1_est,
				613	WORD32 src_strd,
				614	WORD32 est_strd,
				615	WORD32 i4_max_sad,
				616	WORD32 *pi4_mb_distortion)
				617	{
				618	WORD32 i4_sad = 0;
				619	UWORD32 u4_src_offset = src_strd - 8;
				620	UWORD32 u4_est_offset = est_strd - 8;
				621	UWORD32 i, j;
				622	WORD16 temp;
				623
				624	for(i = 8; i > 0; i--)
				625	{
				626	for(j = 8; j > 0; j--)
				627	{
				628	/* SAD */
				629	temp = pu1_src++ - pu1_est++;
				630	i4_sad += ABS(temp);
				631	}
				632	/* early exit */
				633	if(i4_max_sad < i4_sad)
				634	{
				635	*pi4_mb_distortion = i4_sad;
				636	return;
				637	}
				638	pu1_src += u4_src_offset;
				639	pu1_est += u4_est_offset;
				640	}
				641	*pi4_mb_distortion = i4_sad;
				642	}
				643
				644	/**
				645	******************************************************************************
				646	*
				647	* @brief computes distortion (SAD) between 2 4x4 blocks
				648	*
				649	* @par Description
				650	* This functions computes SAD between 2 4x4 blocks. There is a provision
				651	* for early exit if the up-to computed SAD exceeds maximum allowed SAD. To
				652	* compute the distortion of the entire block set u4_max_sad to USHRT_MAX.
				653	*
				654	* @param[in] pu1_src
				655	* UWORD8 pointer to the source
				656	*
				657	* @param[out] pu1_dst
				658	* UWORD8 pointer to the destination
				659	*
				660	* @param[in] src_strd
				661	* integer source stride
				662	*
				663	* @param[in] dst_strd
				664	* integer destination stride
				665	*
				666	* @param[in] u4_max_sad
				667	* integer maximum allowed distortion
				668	*
				669	* @param[out] pi4_mb_distortion
				670	* integer evaluated sad
				671	*
				672	* @remarks
				673	*
				674	******************************************************************************
				675	*/
				676	void ime_compute_sad_4x4
				677	(
				678	UWORD8 *pu1_src,
				679	UWORD8 *pu1_est,
				680	WORD32 src_strd,
				681	WORD32 est_strd,
				682	WORD32 i4_max_sad,
				683	WORD32 *pi4_mb_distortion
				684	)
				685	{
				686	WORD32 i4_sad = 0;
				687
				688	UNUSED(i4_max_sad);
				689
				690	USADA8(pu1_src, pu1_est, i4_sad);
				691	pu1_src += src_strd;
				692	pu1_est += est_strd;
				693
				694	USADA8(pu1_src, pu1_est, i4_sad);
				695	pu1_src += src_strd;
				696	pu1_est += est_strd;
				697
				698	USADA8(pu1_src, pu1_est, i4_sad);
				699	pu1_src += src_strd;
				700	pu1_est += est_strd;
				701
				702	USADA8(pu1_src, pu1_est, i4_sad);
				703	*pi4_mb_distortion = i4_sad;
				704	}
				705
				706
				707	/**
				708	******************************************************************************
				709	*
				710	* @brief computes distortion (SAD) between 2 16x8 blocks
				711	*
				712	*
				713	* @par Description
				714	* This functions computes SAD between 2 16x8 blocks. There is a provision
				715	* for early exit if the up-to computed SAD exceeds maximum allowed SAD. To
				716	* compute the distortion of the entire block set u4_max_sad to USHRT_MAX.
				717	*
				718	* @param[in] pu1_src
				719	* UWORD8 pointer to the source
				720	*
				721	* @param[out] pu1_dst
				722	* UWORD8 pointer to the destination
				723	*
				724	* @param[in] src_strd
				725	* integer source stride
				726	*
				727	* @param[in] dst_strd
				728	* integer destination stride
				729	*
				730	* @param[in] u4_max_sad
				731	* integer maximum allowed distortion
				732	*
				733	* @param[out] pi4_mb_distortion
				734	* integer evaluated sad
				735	*
				736	* @remarks
				737	*
				738	******************************************************************************
				739	*/
				740	void ime_compute_sad_16x8
				741	(
				742	UWORD8 *pu1_src,
				743	UWORD8 *pu1_est,
				744	WORD32 src_strd,
				745	WORD32 est_strd,
				746	WORD32 i4_max_sad,
				747	WORD32 *pi4_mb_distortion
				748	)
				749	{
				750	WORD32 i4_sad = 0;
				751	UWORD32 u4_src_offset = src_strd - 16;
				752	UWORD32 u4_est_offset = est_strd - 16;
				753	UWORD32 i, j;
				754	WORD16 temp;
				755
				756	GATHER_16x8_SAD_EE_STATS(gu4_16x8_sad_ee_stats, 8);
				757
				758	for(i = 8; i > 0; i--)
				759	{
				760	for(j = 16; j > 0; j--)
				761	{
				762	/* SAD */
				763	temp = pu1_src++ - pu1_est++;
				764	i4_sad += ABS(temp);
				765	}
				766	/* early exit */
				767	if(i4_max_sad < i4_sad)
				768	{
				769
				770	GATHER_16x8_SAD_EE_STATS(gu4_16x8_sad_ee_stats, 8-i);
				771
				772	*pi4_mb_distortion = i4_sad;
				773
				774	return;
				775	}
				776	pu1_src += u4_src_offset;
				777	pu1_est += u4_est_offset;
				778	}
				779
				780	*pi4_mb_distortion = i4_sad;
				781	return;
				782
				783	}
				784
				785	/**
				786	******************************************************************************
				787	*
				788	* @brief computes distortion (SAD) between 2 16x16 blocks
				789	*
				790	* @par Description
				791	* This functions computes SAD between 2 16x16 blocks. There is a provision
				792	* for early exit if the up-to computed SAD exceeds maximum allowed SAD. To
				793	* compute the distortion of the entire block set u4_max_sad to USHRT_MAX.
				794	*
				795	* @param[in] pu1_src
				796	* UWORD8 pointer to the source
				797	*
				798	* @param[out] pu1_dst
				799	* UWORD8 pointer to the destination
				800	*
				801	* @param[in] src_strd
				802	* integer source stride
				803	*
				804	* @param[in] dst_strd
				805	* integer destination stride
				806	*
				807	* @param[in] i4_max_sad
				808	* integer maximum allowed distortion
				809	*
				810	* @param[out] pi4_mb_distortion
				811	* integer evaluated sad
				812	*
				813	* @remarks
				814	*
				815	******************************************************************************
				816	*/
				817	void ime_compute_sad_16x16_ea8(UWORD8 *pu1_src,
				818	UWORD8 *pu1_est,
				819	WORD32 src_strd,
				820	WORD32 est_strd,
				821	WORD32 i4_max_sad,
				822	WORD32 *pi4_mb_distortion)
				823	{
				824	WORD32 i4_sad = 0;
				825	UWORD32 u4_src_offset = src_strd - 16;
				826	UWORD32 u4_est_offset = est_strd - 16;
				827	UWORD32 i, j;
				828	WORD16 temp;
				829	UWORD8 *pu1_src_temp = pu1_src + src_strd;
				830	UWORD8 *pu1_est_temp = pu1_est + est_strd;
				831
				832	for(i = 16; i > 0; i -= 2)
				833	{
				834	for(j = 16; j > 0; j--)
				835	{
				836	/* SAD */
				837	temp = pu1_src++ - pu1_est++;
				838	i4_sad += ABS(temp);
				839	}
				840
				841	pu1_src += (u4_src_offset + src_strd);
				842	pu1_est += (u4_est_offset + est_strd);
				843
				844	}
				845
				846	/* early exit */
				847	if(i4_max_sad < i4_sad)
				848	{
				849	*pi4_mb_distortion = i4_sad;
				850	return;
				851	}
				852
				853	pu1_src = pu1_src_temp;
				854	pu1_est = pu1_est_temp;
				855
				856	for(i = 16; i > 0; i -= 2)
				857	{
				858	for(j = 16; j > 0; j--)
				859	{
				860	/* SAD */
				861	temp = pu1_src++ - pu1_est++;
				862	i4_sad += ABS(temp);
				863	}
				864
				865	pu1_src += u4_src_offset + src_strd;
				866	pu1_est += u4_est_offset + est_strd;
				867	}
				868
				869	*pi4_mb_distortion = i4_sad;
				870	return;
				871	}
				872
				873
				874	/**
				875	*******************************************************************************
				876	*
				877	* @brief This function computes SAD between two 16x16 blocks
				878	* It also computes if the block will be zero after H264 transform and quant for
				879	* Intra 16x16 blocks
				880	*
				881	* @param[in] pu1_src
				882	* UWORD8 pointer to the source
				883	*
				884	* @param[out] pu1_dst
				885	* UWORD8 pointer to the destination
				886	*
				887	* @param[in] src_strd
				888	* integer source stride
				889	*
				890	* @param[in] dst_strd
				891	* integer destination stride
				892	*
				893	* @param[in] pu2_thrsh
				894	* Threshold for each element of transofrmed quantized block
				895	*
				896	* @param[out] pi4_mb_distortion
				897	* integer evaluated sad
				898	*
				899	* @param[out] pu4_is_zero
				900	* Poitner to store if the block is zero after transform and quantization
				901	*
				902	* @remarks
				903	*
				904	******************************************************************************
				905	*/
				906	void ime_compute_satqd_16x16_lumainter(UWORD8 *pu1_src,
				907	UWORD8 *pu1_est,
				908	WORD32 src_strd,
				909	WORD32 est_strd,
				910	UWORD16 *pu2_thrsh,
				911	WORD32 *pi4_mb_distortion,
				912	UWORD32 *pu4_is_non_zero)
				913	{
				914	UWORD32 i,j;
				915	WORD16 s1,s2,s3,s4,sad_1,sad_2,ls1,ls2,ls3,ls4,ls5,ls6,ls7,ls8;
				916	UWORD8 pu1_src_lp,pu1_est_lp;
				917	UWORD32 sad = 0;
				918
				919	(*pi4_mb_distortion) = 0;
				920	for(i=0;i<4;i++)
				921	{
				922	for(j=0;j<4;j++)
				923	{
				924	pu1_src_lp = pu1_src + 4*j;
				925	pu1_est_lp = pu1_est + 4*j;
				926
				927	s1 = ABS((WORD16)pu1_src_lp[0] - (WORD16)pu1_est_lp[0])+ ABS((WORD16)pu1_src_lp[3] - (WORD16)pu1_est_lp[3]);
				928	s4 = ABS((WORD16)pu1_src_lp[1] - (WORD16)pu1_est_lp[1])+ ABS((WORD16)pu1_src_lp[2] - (WORD16)pu1_est_lp[2]);
				929
				930	pu1_src_lp += src_strd;
				931	pu1_est_lp += est_strd;
				932
				933	s2 = ABS((WORD16)pu1_src_lp[0] - (WORD16)pu1_est_lp[0])+ ABS((WORD16)pu1_src_lp[3] - (WORD16)pu1_est_lp[3]);
				934	s3 = ABS((WORD16)pu1_src_lp[1] - (WORD16)pu1_est_lp[1])+ ABS((WORD16)pu1_src_lp[2] - (WORD16)pu1_est_lp[2]);
				935
				936	pu1_src_lp += src_strd;
				937	pu1_est_lp += est_strd;
				938
				939	s2 += ABS((WORD16)pu1_src_lp[0] - (WORD16)pu1_est_lp[0])+ ABS((WORD16)pu1_src_lp[3] - (WORD16)pu1_est_lp[3]);
				940	s3 += ABS((WORD16)pu1_src_lp[1] - (WORD16)pu1_est_lp[1])+ ABS((WORD16)pu1_src_lp[2] - (WORD16)pu1_est_lp[2]);
				941
				942	pu1_src_lp += src_strd;
				943	pu1_est_lp += est_strd;
				944
				945	s1 += ABS((WORD16)pu1_src_lp[0] - (WORD16)pu1_est_lp[0])+ ABS((WORD16)pu1_src_lp[3] - (WORD16)pu1_est_lp[3]);
				946	s4 += ABS((WORD16)pu1_src_lp[1] - (WORD16)pu1_est_lp[1])+ ABS((WORD16)pu1_src_lp[2] - (WORD16)pu1_est_lp[2]);
				947
				948	sad_1 = s1+s2+s3+s4;
				949
				950	if(sad == 0)
				951	{
				952	sad_2 = sad_1<<1;
				953
				954	ls1 = sad_2 -(s2 + s3);
				955	ls2 = sad_2 -(s1 + s4);
				956	ls3 = sad_2 -(s3 + s4);
				957	ls4 = sad_2 -(s3 - (s1<<1));
				958	ls5 = sad_2 -(s4 - (s2<<1));
				959	ls6 = sad_2 -(s1 + s2);
				960	ls7 = sad_2 -(s2 - (s4<<1));
				961	ls8 = sad_2 -(s1 - (s3<<1));
				962
				963	if(
				964	pu2_thrsh[8] <= sad_1 \|\|
				965	pu2_thrsh[0] <= ls2 \|\|
				966	pu2_thrsh[1] <= ls1 \|\|
				967	pu2_thrsh[2] <= ls8 \|\|
				968	pu2_thrsh[3] <= ls5 \|\|
				969
				970	pu2_thrsh[4] <= ls6 \|\|
				971	pu2_thrsh[5] <= ls3 \|\|
				972	pu2_thrsh[6] <= ls7 \|\|
				973	pu2_thrsh[7] <= ls4
				974
				975	)sad = 1;
				976	}
				977	(*pi4_mb_distortion) += sad_1;
				978	}
				979	pu1_src += (src_strd *4);
				980	pu1_est += (est_strd *4);
				981	}
				982	*pu4_is_non_zero = sad;
				983	}
				984
				985
				986	/**
				987	******************************************************************************
				988	*
				989	* @brief computes distortion (SAD and SAQTD) between 2 16x8 (interleaved) chroma blocks
				990	*
				991	*
				992	* @par Description
				993	* This functions computes SAD between2 16x8 chroma blocks(interleaved)
				994	* It also checks if the SATDD(Sum of absolute transformed wuqntized differnce beteern the blocks
				995	* If SAQTD is zero, it gives back zero
				996	* Other wise sad is retrned
				997	* There is no provison for early exit
				998	*
				999	* The transform done here is the transform for chroma blocks in H264
				1000	*
				1001	* @param[in] pu1_src
				1002	* UWORD8 pointer to the source
				1003	*
				1004	* @param[out] pu1_dst
				1005	* UWORD8 pointer to the destination
				1006	*
				1007	* @param[in] src_strd
				1008	* integer source stride
				1009	*
				1010	* @param[in] dst_strd
				1011	* integer destination stride
				1012	*
				1013	* @param[in] pu2_thrsh
				1014	* Threshold for each element of transofrmed quantized block
				1015	*
				1016	* @param[out] pi4_mb_distortion
				1017	* integer evaluated sad
				1018	*
				1019	* @remarks
				1020	* Fucntion code is nit updated.
				1021	* Will require debugging and minor modifications
				1022	*
				1023	******************************************************************************
				1024	*/
				1025	void ime_compute_satqd_8x16_chroma(UWORD8 *pu1_src,
				1026	UWORD8 *pu1_est,
				1027	WORD32 src_strd,
				1028	WORD32 est_strd,
				1029	WORD32 max_sad,
				1030	UWORD16 *thrsh)
				1031	{
				1032	WORD32 i,j,plane;
				1033	WORD16 s1,s2,s3,s4,sad_1,sad_2,ls1,ls2,ls3,ls4,ls5,ls6,ls7,ls8;
				1034	UWORD8 pu1_src_lp,pu1_est_lp,pu1_src_plane,pu1_est_plane;
				1035	WORD32 sad =0;
				1036	UNUSED(max_sad);
				1037
				1038	pu1_src_plane = pu1_src;
				1039	pu1_est_plane = pu1_est;
				1040
				1041	for(plane =0;plane<2;plane++)
				1042	{
				1043	for(i=0;i<4;i++)
				1044	{
				1045	for(j=0;j<4;j++)
				1046	{
				1047	pu1_src_lp = pu1_src + 8*j;
				1048	pu1_est_lp = pu1_est + 8*j;
				1049
				1050	s1 = ABS((WORD16)pu1_src_lp[0] - (WORD16)pu1_est_lp[0])+ ABS((WORD16)pu1_src_lp[6] - (WORD16)pu1_est_lp[6]);
				1051	s4 = ABS((WORD16)pu1_src_lp[2] - (WORD16)pu1_est_lp[2])+ ABS((WORD16)pu1_src_lp[4] - (WORD16)pu1_est_lp[4]);
				1052
				1053	pu1_src_lp += src_strd;
				1054	pu1_est_lp += est_strd;
				1055
				1056	s2 = ABS((WORD16)pu1_src_lp[0] - (WORD16)pu1_est_lp[0])+ ABS((WORD16)pu1_src_lp[6] - (WORD16)pu1_est_lp[6]);
				1057	s3 = ABS((WORD16)pu1_src_lp[2] - (WORD16)pu1_est_lp[2])+ ABS((WORD16)pu1_src_lp[4] - (WORD16)pu1_est_lp[4]);
				1058
				1059	pu1_src_lp += src_strd;
				1060	pu1_est_lp += est_strd;
				1061
				1062	s2 += ABS((WORD16)pu1_src_lp[0] - (WORD16)pu1_est_lp[0])+ ABS((WORD16)pu1_src_lp[6] - (WORD16)pu1_est_lp[6]);
				1063	s3 += ABS((WORD16)pu1_src_lp[2] - (WORD16)pu1_est_lp[2])+ ABS((WORD16)pu1_src_lp[4] - (WORD16)pu1_est_lp[4]);
				1064
				1065	pu1_src_lp += src_strd;
				1066	pu1_est_lp += est_strd;
				1067
				1068	s1 += ABS((WORD16)pu1_src_lp[0] - (WORD16)pu1_est_lp[0])+ ABS((WORD16)pu1_src_lp[6] - (WORD16)pu1_est_lp[6]);
				1069	s4 += ABS((WORD16)pu1_src_lp[2] - (WORD16)pu1_est_lp[2])+ ABS((WORD16)pu1_src_lp[4] - (WORD16)pu1_est_lp[4]);
				1070
				1071	sad_1 = s1+s2+s3+s4;
				1072	sad_2 = sad_1<<1;
				1073
				1074	ls1 = sad_2 -(s2 + s3);
				1075	ls2 = sad_2 -(s1 + s4);
				1076	ls3 = sad_2 -(s3 + s4);
				1077	ls4 = sad_2 -(s3 - (s1<<1));
				1078	ls5 = sad_2 -(s4 - (s2<<1));
				1079	ls6 = sad_2 -(s1 + s2);
				1080	ls7 = sad_2 -(s2 - (s4<<1));
				1081	ls8 = sad_2 -(s1 - (s3<<1));
				1082
				1083	if(
				1084	//thrsh[0] > sad_1 && Chroma Dc is checked later
				1085	thrsh[1] > ls1 &&
				1086	thrsh[2] > sad_1 &&
				1087	thrsh[3] > ls2 &&
				1088
				1089	thrsh[4] > ls3 &&
				1090	thrsh[5] > ls4 &&
				1091	thrsh[6] > ls3 &&
				1092	thrsh[7] > ls5 &&
				1093
				1094	thrsh[8] > sad_1 &&
				1095	thrsh[9] > ls1 &&
				1096	thrsh[10]> sad_1 &&
				1097	thrsh[11]> ls2 &&
				1098
				1099	thrsh[12]> ls6 &&
				1100	thrsh[13]> ls7 &&
				1101	thrsh[14]> ls6 &&
				1102	thrsh[15]> ls8
				1103	)
				1104	{
				1105	/set current sad to be zero/
				1106	}
				1107	else
				1108	return ;
				1109
				1110	sad += sad_1;
				1111	}
				1112	pu1_src += (src_strd *4);
				1113	pu1_est += (est_strd *4);
				1114	}
				1115	if(sad < (thrsh[0]<<1))sad = 0;
				1116	else return ;
				1117
				1118	pu1_src = pu1_src_plane+1;
				1119	pu1_est = pu1_est_plane+1;
				1120	}
				1121	return ;
				1122	}
				1123
				1124
				1125	/**
				1126	******************************************************************************
				1127	*
				1128	* @brief computes distortion (SAD and SAQTD) between 2 16x16 blocks
				1129	*
				1130	* @par Description
				1131	* This functions computes SAD between 2 16x16 blocks.
				1132	* It also checks if the SATDD(Sum of absolute transformed wuqntized differnce beteern the blocks
				1133	* If SAQTD is zero, it gives back zero
				1134	* Other wise sad is retrned
				1135	* There is no provison for early exit
				1136	*
				1137	* The transform done here is the transform for inter 16x16 blocks in H264
				1138	*
				1139	* @param[in] pu1_src
				1140	* UWORD8 pointer to the source
				1141	*
				1142	* @param[out] pu1_dst
				1143	* UWORD8 pointer to the destination
				1144	*
				1145	* @param[in] src_strd
				1146	* integer source stride
				1147	*
				1148	* @param[in] dst_strd
				1149	* integer destination stride
				1150	*
				1151	* @param[in] pu2_thrsh
				1152	* Threshold for each element of transofrmed quantized block
				1153	*
				1154	* @param[out] pi4_mb_distortion
				1155	* integer evaluated sad
				1156	*
				1157	* @remarks
				1158	*
				1159	******************************************************************************
				1160	*/
				1161	void ime_compute_satqd_16x16_lumaintra(UWORD8 *pu1_src,
				1162	UWORD8 *pu1_est,
				1163	WORD32 src_strd,
				1164	WORD32 est_strd,
				1165	WORD32 max_sad,
				1166	UWORD16 *thrsh,
				1167	WORD32 *pi4_mb_distortion,
				1168	UWORD8 *sig_nz_sad)
				1169	{
				1170	UWORD32 i,j;
				1171	WORD16 s1[4],s2[4],s3[4],s4[4],sad[4];
				1172	UWORD8 pu1_src_lp,pu1_est_lp;
				1173	UWORD8 *sig_sad_dc;
				1174	UWORD32 nz_sad_sig = 0;
				1175	UNUSED(max_sad);
				1176	*pi4_mb_distortion =0;
				1177
				1178	sig_sad_dc = sig_nz_sad;
				1179	sig_nz_sad++;
				1180
				1181	for(i=0;i<4;i++)
				1182	{
				1183	for(j=0;j<4;j++)
				1184	{
				1185	pu1_src_lp = pu1_src + 4*j;
				1186	pu1_est_lp = pu1_est + 4*j;
				1187
				1188	s1[j] = ABS((WORD16)pu1_src_lp[0] - (WORD16)pu1_est_lp[0])+ ABS((WORD16)pu1_src_lp[3] - (WORD16)pu1_est_lp[3]);
				1189	s4[j] = ABS((WORD16)pu1_src_lp[1] - (WORD16)pu1_est_lp[1])+ ABS((WORD16)pu1_src_lp[2] - (WORD16)pu1_est_lp[2]);
				1190
				1191	pu1_src_lp += src_strd;
				1192	pu1_est_lp += est_strd;
				1193
				1194	s2[j] = ABS((WORD16)pu1_src_lp[0] - (WORD16)pu1_est_lp[0])+ ABS((WORD16)pu1_src_lp[3] - (WORD16)pu1_est_lp[3]);
				1195	s3[j] = ABS((WORD16)pu1_src_lp[1] - (WORD16)pu1_est_lp[1])+ ABS((WORD16)pu1_src_lp[2] - (WORD16)pu1_est_lp[2]);
				1196
				1197	pu1_src_lp += src_strd;
				1198	pu1_est_lp += est_strd;
				1199
				1200	s2[j] += ABS((WORD16)pu1_src_lp[0] - (WORD16)pu1_est_lp[0])+ ABS((WORD16)pu1_src_lp[3] - (WORD16)pu1_est_lp[3]);
				1201	s3[j] += ABS((WORD16)pu1_src_lp[1] - (WORD16)pu1_est_lp[1])+ ABS((WORD16)pu1_src_lp[2] - (WORD16)pu1_est_lp[2]);
				1202
				1203	pu1_src_lp += src_strd;
				1204	pu1_est_lp += est_strd;
				1205
				1206	s1[j] += ABS((WORD16)pu1_src_lp[0] - (WORD16)pu1_est_lp[0])+ ABS((WORD16)pu1_src_lp[3] - (WORD16)pu1_est_lp[3]);
				1207	s4[j] += ABS((WORD16)pu1_src_lp[1] - (WORD16)pu1_est_lp[1])+ ABS((WORD16)pu1_src_lp[2] - (WORD16)pu1_est_lp[2]);
				1208
				1209	sad[j] = ((s1[j]+s2[j]+s3[j]+s4[j])<<1);
				1210	}
				1211
				1212	for(j=0;j<4;j++)
				1213	{
				1214
				1215	if(
				1216	//thrsh[0] > (sad[j] >> 1) &&Dc goes in the other part
				1217	thrsh[1] > (sad[j] -(s2[j] + s3[j])) &&
				1218	thrsh[2] > (sad[j]>>1) &&
				1219	thrsh[3] > (sad[j] -(s1[j] + s4[j])) &&
				1220
				1221	thrsh[4] > (sad[j] -(s3[j] + s4[j])) &&
				1222	thrsh[5] > (sad[j] -(s3[j] - (s1[j]<<1))) &&
				1223	thrsh[6] > (sad[j] -(s3[j] + s4[j])) &&
				1224	thrsh[7] > (sad[j] -(s4[j] - (s2[j]<<1))) &&
				1225
				1226	thrsh[8] > (sad[j]>>1) &&
				1227	thrsh[9] > (sad[j] -(s2[j] + s3[j])) &&
				1228	thrsh[10]> (sad[j]>>1) &&
				1229	thrsh[11]> (sad[j] -(s1[j] + s4[j])) &&
				1230
				1231	thrsh[12]> (sad[j] -(s1[j] + s2[j])) &&
				1232	thrsh[13]> (sad[j] -(s2[j] - (s4[j]<<1))) &&
				1233	thrsh[14]> (sad[j] -(s1[j] + s2[j])) &&
				1234	thrsh[15]> (sad[j] -(s1[j] - (s3[j]<<1)))
				1235	)
				1236	{
				1237	//sad[j] = 0; /set current sad to be zero/
				1238	sig_nz_sad[j] = 0;/Signal that the sad is zero/
				1239	}
				1240	else
				1241	{
				1242	sig_nz_sad[j] = 1;/signal that sad is non zero/
				1243	nz_sad_sig = 1;
				1244	}
				1245
				1246	(*pi4_mb_distortion) += (sad[j]>>1);
				1247	//if((pi4_mb_distortion) >= max_sad)return; /return or some thing*/
				1248	}
				1249
				1250	sig_nz_sad += 4;
				1251	pu1_src += (src_strd *4);
				1252	pu1_est += (est_strd *4);
				1253	}
				1254
				1255	if((*pi4_mb_distortion) < thrsh[0]<<2)
				1256	{
				1257	*sig_sad_dc = 0;
				1258	if(nz_sad_sig == 0)(*pi4_mb_distortion) = 0;
				1259	}
				1260	else *sig_sad_dc = 1;
				1261	}
				1262
Harinarayanan K K	134291e	2015-06-18 16:03:38 +0530	[diff] [blame]	1263