Blame - encoder/ih264e_core_coding.c - platform/external/libavc

blob: 5b36aef1722ef293958e80cdc8e19a49ab8c5f07 [file] [log] [blame]

Hamsalekha S	8d3d303	2015-03-13 21:24:58 +0530	[diff] [blame]	1	/******************************************************************************
				2	*
				3	* Copyright (C) 2015 The Android Open Source Project
				4	*
				5	* Licensed under the Apache License, Version 2.0 (the "License");
				6	* you may not use this file except in compliance with the License.
				7	* You may obtain a copy of the License at:
				8	*
				9	* http://www.apache.org/licenses/LICENSE-2.0
				10	*
				11	* Unless required by applicable law or agreed to in writing, software
				12	* distributed under the License is distributed on an "AS IS" BASIS,
				13	* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
				14	* See the License for the specific language governing permissions and
				15	* limitations under the License.
				16	*
				17	*****************************************************************************
				18	* Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
				19	*/
				20
				21	/**
				22	*******************************************************************************
				23	* @file
				24	* ih264e_core_coding.c
				25	*
				26	* @brief
				27	* This file contains routines that perform luma and chroma core coding for
				28	* intra macroblocks
				29	*
				30	* @author
				31	* ittiam
				32	*
				33	* @par List of Functions:
				34	* - ih264e_pack_l_mb_i16()
				35	* - ih264e_pack_c_mb_i8()
				36	* - ih264e_code_luma_intra_macroblock_16x16()
				37	* - ih264e_code_luma_intra_macroblock_4x4()
				38	* - ih264e_code_chroma_intra_macroblock_8x8()
				39	*
				40	* @remarks
				41	* None
				42	*
				43	*******************************************************************************
				44	*/
				45
				46	/*****************************************************************************/
				47	/* File Includes */
				48	/*****************************************************************************/
				49
				50	/* System include files */
				51	#include <stdio.h>
				52	#include <string.h>
				53	#include <assert.h>
				54
				55	/* User include files */
				56	#include "ih264e_config.h"
				57	#include "ih264_typedefs.h"
				58	#include "ih264_platform_macros.h"
				59	#include "iv2.h"
				60	#include "ive2.h"
Harish Mahendrakar	c72323e	2015-04-28 19:07:40 +0530	[diff] [blame]	61	#include "ih264_macros.h"
Hamsalekha S	8d3d303	2015-03-13 21:24:58 +0530	[diff] [blame]	62	#include "ih264_defs.h"
				63	#include "ih264e_defs.h"
				64	#include "ih264_trans_data.h"
				65	#include "ih264e_error.h"
				66	#include "ih264e_bitstream.h"
				67	#include "ime_distortion_metrics.h"
Harinarayanan K K	134291e	2015-06-18 16:03:38 +0530	[diff] [blame]	68	#include "ime_defs.h"
Hamsalekha S	8d3d303	2015-03-13 21:24:58 +0530	[diff] [blame]	69	#include "ime_structs.h"
				70	#include "ih264_structs.h"
				71	#include "ih264_trans_quant_itrans_iquant.h"
				72	#include "ih264_inter_pred_filters.h"
				73	#include "ih264_mem_fns.h"
				74	#include "ih264_padding.h"
				75	#include "ih264_intra_pred_filters.h"
				76	#include "ih264_deblk_edge_filters.h"
Harinarayanan K K	134291e	2015-06-18 16:03:38 +0530	[diff] [blame]	77	#include "ih264_cabac_tables.h"
Hamsalekha S	8d3d303	2015-03-13 21:24:58 +0530	[diff] [blame]	78	#include "irc_cntrl_param.h"
				79	#include "irc_frame_info_collector.h"
				80	#include "ih264e_rate_control.h"
Harinarayanan K K	134291e	2015-06-18 16:03:38 +0530	[diff] [blame]	81	#include "ih264e_cabac_structs.h"
Hamsalekha S	8d3d303	2015-03-13 21:24:58 +0530	[diff] [blame]	82	#include "ih264e_structs.h"
				83	#include "ih264e_globals.h"
				84	#include "ih264e_core_coding.h"
				85	#include "ih264e_mc.h"
				86
				87
				88	/*****************************************************************************/
				89	/* Function Definitions */
				90	/*****************************************************************************/
				91
				92	/**
				93	*******************************************************************************
				94	*
				95	* @brief
				96	* This function performs does the DCT transform then Hadamard transform
				97	* and quantization for a macroblock when the mb mode is intra 16x16 mode
				98	*
				99	* @par Description:
				100	* First cf4 is done on all 16 4x4 blocks of the 16x16 input block.
				101	* Then hadamard transform is done on the DC coefficients
				102	* Quantization is then performed on the 16x16 block, 4x4 wise
				103	*
				104	* @param[in] pu1_src
				105	* Pointer to source sub-block
				106	*
				107	* @param[in] pu1_pred
				108	* Pointer to prediction sub-block
				109	*
				110	* @param[in] pi2_out
				111	* Pointer to residual sub-block
				112	* The output will be in linear format
				113	* The first 16 continuous locations will contain the values of Dc block
				114	* After DC block and a stride 1st AC block will follow
				115	* After one more stride next AC block will follow
				116	* The blocks will be in raster scan order
				117	*
				118	* @param[in] src_strd
				119	* Source stride
				120	*
				121	* @param[in] pred_strd
				122	* Prediction stride
				123	*
				124	* @param[in] dst_strd
				125	* Destination stride
				126	*
				127	* @param[in] pu2_scale_matrix
				128	* The quantization matrix for 4x4 transform
				129	*
				130	* @param[in] pu2_threshold_matrix
				131	* Threshold matrix
				132	*
				133	* @param[in] u4_qbits
				134	* 15+QP/6
				135	*
				136	* @param[in] u4_round_factor
				137	* Round factor for quant
				138	*
				139	* @param[out] pu1_nnz
				140	* Memory to store the non-zeros after transform
				141	* The first byte will be the nnz of DC block
				142	* From the next byte the AC nnzs will be stored in raster scan order
				143	*
				144	* @param u4_dc_flag
				145	* Signals if Dc transform is to be done or not
				146	* 1 -> Dc transform will be done
				147	* 0 -> Dc transform will not be done
				148	*
				149	* @remarks
				150	*
				151	*******************************************************************************
				152	*/
				153	void ih264e_luma_16x16_resi_trans_dctrans_quant(codec_t *ps_codec,
				154	UWORD8 *pu1_src,
				155	UWORD8 *pu1_pred,
				156	WORD16 *pi2_out,
				157	WORD32 src_strd,
				158	WORD32 pred_strd,
				159	WORD32 dst_strd,
				160	const UWORD16 *pu2_scale_matrix,
				161	const UWORD16 *pu2_threshold_matrix,
				162	UWORD32 u4_qbits,
				163	UWORD32 u4_round_factor,
				164	UWORD8 *pu1_nnz,
				165	UWORD32 u4_dc_flag)
				166
				167	{
				168	WORD32 blk_cntr;
				169	WORD32 i4_offsetx, i4_offsety;
				170	UWORD8 pu1_curr_src, pu1_curr_pred;
				171
				172	WORD16 *pi2_dc_str = pi2_out;
				173
				174	/* Move to the ac addresses */
				175	pu1_nnz++;
				176	pi2_out += dst_strd;
				177
				178	for (blk_cntr = 0; blk_cntr < NUM_LUMA4x4_BLOCKS_IN_MB; blk_cntr++)
				179	{
				180	IND2SUB_LUMA_MB(blk_cntr, i4_offsetx, i4_offsety);
				181
				182	pu1_curr_src = pu1_src + i4_offsetx + i4_offsety * src_strd;
				183	pu1_curr_pred = pu1_pred + i4_offsetx + i4_offsety * pred_strd;
				184
				185	ps_codec->pf_resi_trans_quant_4x4(pu1_curr_src, pu1_curr_pred,
				186	pi2_out + blk_cntr * dst_strd,
				187	src_strd, pred_strd, pu2_scale_matrix,
				188	pu2_threshold_matrix, u4_qbits,
				189	u4_round_factor, &pu1_nnz[blk_cntr],
				190	&pi2_dc_str[blk_cntr]);
				191
				192	}
				193
				194	if (!u4_dc_flag)
				195	return;
				196
				197	/*
				198	* In case of i16x16, we need to remove the contribution of dc coeffs into
				199	* nnz of each block. We are doing that in the packing function
				200	*/
				201
				202	/* Adjust pointers to point to dc values */
				203	pi2_out -= dst_strd;
				204	pu1_nnz--;
				205
				206	u4_qbits++;
				207	u4_round_factor <<= 1;
				208
				209	ps_codec->pf_hadamard_quant_4x4(pi2_dc_str, pi2_out, pu2_scale_matrix,
				210	pu2_threshold_matrix, u4_qbits,
				211	u4_round_factor, &pu1_nnz[0]);
				212	}
				213
				214	/**
				215	*******************************************************************************
				216	*
				217	* @brief
				218	* This function performs the intra 16x16 inverse transform process for H264
				219	* it includes inverse Dc transform, inverse quant and then inverse transform
				220	*
				221	* @par Description:
				222	*
				223	* @param[in] pi2_src
				224	* Input data, 16x16 size
				225	* First 16 mem locations will have the Dc coffs in rater scan order in linear fashion
				226	* after a stride 1st AC clock will be present again in raster can order
				227	* Then each AC block of the 16x16 block will follow in raster scan order
				228	*
				229	* @param[in] pu1_pred
				230	* The predicted data, 16x16 size
				231	* Block by block form
				232	*
				233	* @param[in] pu1_out
				234	* Output 16x16
				235	* In block by block form
				236	*
				237	* @param[in] src_strd
				238	* Source stride
				239	*
				240	* @param[in] pred_strd
				241	* input stride for prediction buffer
				242	*
				243	* @param[in] out_strd
				244	* input stride for output buffer
				245	*
				246	* @param[in] pu2_iscale_mat
				247	* Inverse quantization matrix for 4x4 transform
				248	*
				249	* @param[in] pu2_weigh_mat
				250	* weight matrix of 4x4 transform
				251	*
				252	* @param[in] qp_div
				253	* QP/6
				254	*
				255	* @param[in] pi4_tmp
				256	* Input temporary buffer
				257	* needs to be at least 20 in size
				258	*
				259	* @param[in] pu4_cntrl
				260	* Controls the transform path
				261	* total Last 17 bits are used
				262	* the 16th th bit will correspond to DC block
				263	* and 32-17 will correspond to the ac blocks in raster scan order
				264	* bit equaling zero indicates that the entire 4x4 block is zero for DC
				265	* For AC blocks a bit equaling zero will mean that all 15 AC coffs of the block is nonzero
				266	*
				267	* @param[in] pi4_tmp
				268	* Input temporary buffer
				269	* needs to be at least COFF_CNT_SUB_BLK_4x4+COFF_CNT_SUB_BLK_4x4 size
				270	*
				271	* @returns
				272	* none
				273	*
				274	* @remarks
				275	* The all zero case must be taken care outside
				276	*
				277	*******************************************************************************
				278	*/
				279	void ih264e_luma_16x16_idctrans_iquant_itrans_recon(codec_t *ps_codec,
				280	WORD16 *pi2_src,
				281	UWORD8 *pu1_pred,
				282	UWORD8 *pu1_out,
				283	WORD32 src_strd,
				284	WORD32 pred_strd,
				285	WORD32 out_strd,
				286	const UWORD16 *pu2_iscale_mat,
				287	const UWORD16 *pu2_weigh_mat,
				288	UWORD32 qp_div,
				289	UWORD32 u4_cntrl,
				290	UWORD32 u4_dc_trans_flag,
				291	WORD32 *pi4_tmp)
				292	{
				293	/* Start index for inverse quant in a 4x4 block */
				294	WORD32 iq_start_idx = (u4_dc_trans_flag == 0) ? 0 : 1;
				295
				296	/* Cntrl bits for 4x4 transforms
				297	* u4_blk_cntrl : controls if a 4x4 block should be processed in ac path
				298	* u4_dc_cntrl : controls is a 4x4 block is to be processed in dc path
				299	* : dc block must contain only single dc coefficient
				300	* u4_empty_blk_cntrl : control fot 4x4 block with no coeffs, ie no dc and ac
				301	* : ie not (ac or dc)
				302	*/
				303	UWORD32 u4_blk_cntrl, u4_dc_cntrl, u4_empty_blk_cntrl;
				304
				305	/* tmp registers for block ids */
				306	UWORD32 u4_blk_id;
				307
				308	/* Subscrripts */
				309	WORD32 i4_offset_x, i4_offset_y;
				310
				311	UWORD8 pu1_cur_prd_blk, pu1_cur_out_blk;
				312
				313	/* Src and stride for dc coeffs */
				314	UWORD32 u4_dc_inc;
				315	WORD16 *pi2_dc_src;
				316
				317	/*
				318	* For intra blocks we need to do inverse dc transform
				319	* In case if intra blocks, its here that we populate the dc bits in cntrl
				320	* as they cannot be populated any earlier
				321	*/
				322	if (u4_dc_trans_flag)
				323	{
				324	UWORD32 cntr, u4_dc_cntrl;
				325	/* Do inv hadamard and place the results at the start of each AC block */
				326	ps_codec->pf_ihadamard_scaling_4x4(pi2_src, pi2_src, pu2_iscale_mat,
				327	pu2_weigh_mat, qp_div, pi4_tmp);
				328
				329	/* Update the cntrl flag */
				330	u4_dc_cntrl = 0;
				331	for (cntr = 0; cntr < DC_COEFF_CNT_LUMA_MB; cntr++)
				332	{
				333	u4_dc_cntrl \|= ((pi2_src[cntr] != 0) << (15 - cntr));
				334	}
				335	/* Mark dc bits as 1 if corresponding ac bit is 0 */
				336	u4_dc_cntrl = (~(u4_cntrl >> 16) & u4_dc_cntrl);
				337	/* Combine both ac and dc bits */
				338	u4_cntrl = (u4_cntrl & CNTRL_FLAG_AC_MASK_LUMA)
				339	\| (u4_dc_cntrl & CNTRL_FLAG_DC_MASK_LUMA);
				340	}
				341
				342	/* Source for dc coeffs
				343	* If the block is intra, we have to read dc values from first row of src
				344	* then stride for each block is 1, other wise its src stride
				345	*/
				346	pi2_dc_src = (iq_start_idx == 0) ? (pi2_src + src_strd) : pi2_src;
				347	u4_dc_inc = (iq_start_idx == 0) ? src_strd : 1;
				348
				349	/* The AC blocks starts from 2nd row */
				350	pi2_src += src_strd;
				351
				352	/* Get the block bits */
				353	u4_blk_cntrl = (u4_cntrl & CNTRL_FLAG_AC_MASK_LUMA);
				354	u4_dc_cntrl = (u4_cntrl & CNTRL_FLAG_DC_MASK_LUMA) << 16;
				355	u4_empty_blk_cntrl = (~(u4_dc_cntrl \| u4_blk_cntrl)) & 0xFFFF0000;
				356
				357	/* Get first block to process */
				358	DEQUEUE_BLKID_FROM_CONTROL(u4_dc_cntrl, u4_blk_id);
				359	while (u4_blk_id < NUM_LUMA4x4_BLOCKS_IN_MB)
				360	{
				361	/* Compute address of src blocks */
				362	WORD32 i4_src_offset = u4_dc_inc * u4_blk_id;
				363
				364	IND2SUB_LUMA_MB(u4_blk_id, i4_offset_x, i4_offset_y);
				365
				366	/* Compute address of out and pred blocks */
				367	pu1_cur_prd_blk = pu1_pred + i4_offset_x + i4_offset_y * pred_strd;
				368	pu1_cur_out_blk = pu1_out + i4_offset_x + i4_offset_y * out_strd;
				369
				370	/* Do inv dc transform */
				371	ps_codec->pf_iquant_itrans_recon_4x4_dc(pi2_dc_src + i4_src_offset,
				372	pu1_cur_prd_blk,
				373	pu1_cur_out_blk, pred_strd,
				374	out_strd, pu2_iscale_mat,
				375	pu2_weigh_mat, qp_div, NULL,
				376	iq_start_idx,
				377	pi2_dc_src + i4_src_offset);
				378	/* Get next DC block to process */
				379	DEQUEUE_BLKID_FROM_CONTROL(u4_dc_cntrl, u4_blk_id);
				380	}
				381
				382	/* now process ac/mixed blocks */
				383	DEQUEUE_BLKID_FROM_CONTROL(u4_blk_cntrl, u4_blk_id);
				384	while (u4_blk_id < NUM_LUMA4x4_BLOCKS_IN_MB)
				385	{
				386
				387	WORD32 i4_src_offset = src_strd * u4_blk_id;
				388
				389	IND2SUB_LUMA_MB(u4_blk_id, i4_offset_x, i4_offset_y);
				390
				391	pu1_cur_prd_blk = pu1_pred + i4_offset_x + i4_offset_y * pred_strd;
				392	pu1_cur_out_blk = pu1_out + i4_offset_x + i4_offset_y * out_strd;
				393
				394	ps_codec->pf_iquant_itrans_recon_4x4(pi2_src + i4_src_offset,
				395	pu1_cur_prd_blk, pu1_cur_out_blk,
				396	pred_strd, out_strd,
				397	pu2_iscale_mat, pu2_weigh_mat,
				398	qp_div, (WORD16*) pi4_tmp,
				399	iq_start_idx,
				400	pi2_dc_src + u4_blk_id);
				401
				402	DEQUEUE_BLKID_FROM_CONTROL(u4_blk_cntrl, u4_blk_id);
				403	}
				404
				405	/* Now process empty blocks */
				406	DEQUEUE_BLKID_FROM_CONTROL(u4_empty_blk_cntrl, u4_blk_id);
				407	while (u4_blk_id < NUM_LUMA4x4_BLOCKS_IN_MB)
				408	{
				409	IND2SUB_LUMA_MB(u4_blk_id, i4_offset_x, i4_offset_y);
				410
				411	pu1_cur_prd_blk = pu1_pred + i4_offset_x + i4_offset_y * pred_strd;
				412	pu1_cur_out_blk = pu1_out + i4_offset_x + i4_offset_y * out_strd;
				413
				414	ps_codec->pf_inter_pred_luma_copy(pu1_cur_prd_blk, pu1_cur_out_blk,
				415	pred_strd, out_strd, SIZE_4X4_BLK_HRZ,
				416	SIZE_4X4_BLK_VERT, 0, 0);
				417
				418	DEQUEUE_BLKID_FROM_CONTROL(u4_empty_blk_cntrl, u4_blk_id);
				419	}
				420	}
				421
				422	/**
				423	*******************************************************************************
				424	*
				425	* @brief
				426	* This function performs does the DCT transform then Hadamard transform
				427	* and quantization for a chroma macroblock
				428	*
				429	* @par Description:
				430	* First cf4 is done on all 16 4x4 blocks of the 8x8input block
				431	* Then hadamard transform is done on the DC coefficients
				432	* Quantization is then performed on the 8x8 block, 4x4 wise
				433	*
				434	* @param[in] pu1_src
				435	* Pointer to source sub-block
				436	* The input is in interleaved format for two chroma planes
				437	*
				438	* @param[in] pu1_pred
				439	* Pointer to prediction sub-block
				440	* Prediction is in inter leaved format
				441	*
				442	* @param[in] pi2_out
				443	* Pointer to residual sub-block
				444	* The output will be in linear format
				445	* The first 4 continuous locations will contain the values of DC block for U
				446	* and then next 4 will contain for V.
				447	* After DC block and a stride 1st AC block of U plane will follow
				448	* After one more stride next AC block of V plane will follow
				449	* The blocks will be in raster scan order
				450	*
				451	* After all the AC blocks of U plane AC blocks of V plane will follow in exact
				452	* same way
				453	*
				454	* @param[in] src_strd
				455	* Source stride
				456	*
				457	* @param[in] pred_strd
				458	* Prediction stride
				459	*
				460	* @param[in] dst_strd
				461	* Destination stride
				462	*
				463	* @param[in] pu2_scale_matrix
				464	* The quantization matrix for 4x4 transform
				465	*
				466	* @param[in] pu2_threshold_matrix
				467	* Threshold matrix
				468	*
				469	* @param[in] u4_qbits
				470	* 15+QP/6
				471	*
				472	* @param[in] u4_round_factor
				473	* Round factor for quant
				474	*
				475	* @param[out] pu1_nnz
				476	* Memory to store the non-zeros after transform
				477	* The first byte will be the nnz od DC block for U plane
				478	* From the next byte the AC nnzs will be storerd in raster scan order
				479	* The fifth byte will be nnz of Dc block of V plane
				480	* Then Ac blocks will follow
				481	*
				482	* @param u4_dc_flag
				483	* Signals if Dc transform is to be done or not
				484	* 1 -> Dc transform will be done
				485	* 0 -> Dc transform will not be done
				486	*
				487	* @remarks
				488	*
				489	*******************************************************************************
				490	*/
				491	void ih264e_chroma_8x8_resi_trans_dctrans_quant(codec_t *ps_codec,
				492	UWORD8 *pu1_src,
				493	UWORD8 *pu1_pred,
				494	WORD16 *pi2_out,
				495	WORD32 src_strd,
				496	WORD32 pred_strd,
				497	WORD32 out_strd,
				498	const UWORD16 *pu2_scale_matrix,
				499	const UWORD16 *pu2_threshold_matrix,
				500	UWORD32 u4_qbits,
				501	UWORD32 u4_round_factor,
				502	UWORD8 *pu1_nnz_c)
				503	{
				504	WORD32 blk_cntr;
				505	WORD32 i4_offsetx, i4_offsety;
				506	UWORD8 pu1_curr_src, pu1_curr_pred;
				507
				508	WORD16 pi2_dc_str[8];
				509	UWORD8 au1_dcnnz[2];
				510
				511	/* Move to the ac addresses */
				512	pu1_nnz_c++;
				513	pi2_out += out_strd;
				514
				515	for (blk_cntr = 0; blk_cntr < NUM_CHROMA4x4_BLOCKS_IN_MB; blk_cntr++)
				516	{
				517	IND2SUB_CHROMA_MB(blk_cntr, i4_offsetx, i4_offsety);
				518
				519	pu1_curr_src = pu1_src + i4_offsetx + i4_offsety * src_strd;
				520	pu1_curr_pred = pu1_pred + i4_offsetx + i4_offsety * pred_strd;
				521
				522	/* For chroma, v plane nnz is populated from position 5 */
				523	ps_codec->pf_resi_trans_quant_chroma_4x4(
				524	pu1_curr_src, pu1_curr_pred,
				525	pi2_out + blk_cntr * out_strd, src_strd, pred_strd,
				526	pu2_scale_matrix, pu2_threshold_matrix, u4_qbits,
				527	u4_round_factor, &pu1_nnz_c[blk_cntr + (blk_cntr > 3)],
				528	&pi2_dc_str[blk_cntr]);
				529	}
				530
				531	/* Adjust pointers to point to dc values */
				532	pi2_out -= out_strd;
				533	pu1_nnz_c--;
				534
				535	u4_qbits++;
				536	u4_round_factor <<= 1;
				537
				538	ps_codec->pf_hadamard_quant_2x2_uv(pi2_dc_str, pi2_out, pu2_scale_matrix,
				539	pu2_threshold_matrix, u4_qbits,
				540	u4_round_factor, au1_dcnnz);
				541
				542	/* Copy the dc nnzs */
				543	pu1_nnz_c[0] = au1_dcnnz[0];
				544	pu1_nnz_c[5] = au1_dcnnz[1];
				545
				546	}
				547
				548	/**
				549	*******************************************************************************
				550	* @brief
				551	* This function performs the inverse transform with process for chroma MB of H264
				552	*
				553	* @par Description:
				554	* Does inverse DC transform ,inverse quantization inverse transform
				555	*
				556	* @param[in] pi2_src
				557	* Input data, 16x16 size
				558	* The input is in the form of, first 4 locations will contain DC coeffs of
				559	* U plane, next 4 will contain DC coeffs of V plane, then AC blocks of U plane
				560	* in raster scan order will follow, each block as linear array in raster scan order.
				561	* After a stride next AC block will follow. After all AC blocks of U plane
				562	* V plane AC blocks will follow in exact same order.
				563	*
				564	* @param[in] pu1_pred
				565	* The predicted data, 8x16 size, U and V interleaved
				566	*
				567	* @param[in] pu1_out
				568	* Output 8x16, U and V interleaved
				569	*
				570	* @param[in] src_strd
				571	* Source stride
				572	*
				573	* @param[in] pred_strd
				574	* input stride for prediction buffer
				575	*
				576	* @param[in] out_strd
				577	* input stride for output buffer
				578	*
				579	* @param[in] pu2_iscale_mat
				580	* Inverse quantization martix for 4x4 transform
				581	*
				582	* @param[in] pu2_weigh_mat
				583	* weight matrix of 4x4 transform
				584	*
				585	* @param[in] qp_div
				586	* QP/6
				587	*
				588	* @param[in] pi4_tmp
				589	* Input temporary buffer
				590	* needs to be at least COFF_CNT_SUB_BLK_4x4 + Number of Dc cofss for chroma * number of planes
				591	* in size
				592	*
				593	* @param[in] pu4_cntrl
				594	* Controls the transform path
				595	* the 15 th bit will correspond to DC block of U plane , 14th will indicate the V plane Dc block
				596	* 32-28 bits will indicate AC blocks of U plane in raster scan order
				597	* 27-23 bits will indicate AC blocks of V plane in rater scan order
				598	* The bit 1 implies that there is at least one non zero coeff in a block
				599	*
				600	* @returns
				601	* none
				602	*
				603	* @remarks
				604	*******************************************************************************
				605	*/
				606	void ih264e_chroma_8x8_idctrans_iquant_itrans_recon(codec_t *ps_codec,
				607	WORD16 *pi2_src,
				608	UWORD8 *pu1_pred,
				609	UWORD8 *pu1_out,
				610	WORD32 src_strd,
				611	WORD32 pred_strd,
				612	WORD32 out_strd,
				613	const UWORD16 *pu2_iscale_mat,
				614	const UWORD16 *pu2_weigh_mat,
				615	UWORD32 qp_div,
				616	UWORD32 u4_cntrl,
				617	WORD32 *pi4_tmp)
				618	{
				619	/* Cntrl bits for 4x4 transforms
				620	* u4_blk_cntrl : controls if a 4x4 block should be processed in ac path
				621	* u4_dc_cntrl : controls is a 4x4 block is to be processed in dc path
				622	* : dc block must contain only single dc coefficient
				623	* u4_empty_blk_cntrl : control fot 4x4 block with no coeffs, ie no dc and ac
				624	* : ie not (ac or dc)
				625	*/
				626
				627	UWORD32 u4_blk_cntrl, u4_dc_cntrl, u4_empty_blk_cntrl;
				628
				629	/* tmp registers for block ids */
				630	WORD32 u4_blk_id;
				631
				632	/* Offsets for pointers */
				633	WORD32 i4_offset_x, i4_offset_y;
				634
				635	/* Pointer to 4x4 blocks */
				636	UWORD8 pu1_cur_4x4_prd_blk, pu1_cur_4x4_out_blk;
				637
				638	/* Tmp register for pointer to dc coffs */
				639	WORD16 *pi2_dc_src;
				640
				641	WORD16 i2_zero = 0;
				642
				643	/* Increment for dc block */
				644	WORD32 i4_dc_inc;
				645
				646	/*
				647	* Lets do the inverse transform for dc coeffs in chroma
				648	*/
				649	if (u4_cntrl & CNTRL_FLAG_DCBLK_MASK_CHROMA)
				650	{
				651	UWORD32 cntr, u4_dc_cntrl;
				652	/* Do inv hadamard for u an v block */
				653
				654	ps_codec->pf_ihadamard_scaling_2x2_uv(pi2_src, pi2_src, pu2_iscale_mat,
				655	pu2_weigh_mat, qp_div, NULL);
				656	/*
				657	* Update the cntrl flag
				658	* Flag is updated as follows bits 15-11 -> u block dc bits
				659	*/
				660	u4_dc_cntrl = 0;
				661	for (cntr = 0; cntr < 8; cntr++)
				662	{
				663	u4_dc_cntrl \|= ((pi2_src[cntr] != 0) << (15 - cntr));
				664	}
				665
				666	/* Mark dc bits as 1 if corresponding ac bit is 0 */
				667	u4_dc_cntrl = (~(u4_cntrl >> 16) & u4_dc_cntrl);
				668	/* Combine both ac and dc bits */
				669	u4_cntrl = (u4_cntrl & CNTRL_FLAG_AC_MASK_CHROMA)
				670	\| (u4_dc_cntrl & CNTRL_FLAG_DC_MASK_CHROMA);
				671
				672	/* Since we populated the dc coffs, we have to read them from there */
				673	pi2_dc_src = pi2_src;
				674	i4_dc_inc = 1;
				675	}
				676	else
				677	{
				678	u4_cntrl = u4_cntrl & CNTRL_FLAG_AC_MASK_CHROMA;
				679	pi2_dc_src = &i2_zero;
				680	i4_dc_inc = 0;
				681	}
				682
				683	/* Get the block bits */
				684	u4_blk_cntrl = (u4_cntrl & CNTRL_FLAG_AC_MASK_CHROMA);
				685	u4_dc_cntrl = (u4_cntrl & CNTRL_FLAG_DC_MASK_CHROMA) << 16;
				686	u4_empty_blk_cntrl = (~(u4_dc_cntrl \| u4_blk_cntrl)) & 0xFF000000;
				687
				688	/* The AC blocks starts from 2nd row */
				689	pi2_src += src_strd;
				690
				691	DEQUEUE_BLKID_FROM_CONTROL(u4_dc_cntrl, u4_blk_id);
				692	while (u4_blk_id < 8)
				693	{
				694	WORD32 dc_src_offset = u4_blk_id * i4_dc_inc;
				695
				696	IND2SUB_CHROMA_MB(u4_blk_id, i4_offset_x, i4_offset_y);
				697
				698	pu1_cur_4x4_prd_blk = pu1_pred + i4_offset_x + i4_offset_y * pred_strd;
				699	pu1_cur_4x4_out_blk = pu1_out + i4_offset_x + i4_offset_y * out_strd;
				700
				701	ps_codec->pf_iquant_itrans_recon_chroma_4x4_dc(
				702	pi2_dc_src + dc_src_offset, pu1_cur_4x4_prd_blk,
				703	pu1_cur_4x4_out_blk, pred_strd, out_strd, NULL, NULL, 0,
				704	NULL, pi2_dc_src + dc_src_offset);
				705	/* Get next DC block to process */
				706	DEQUEUE_BLKID_FROM_CONTROL(u4_dc_cntrl, u4_blk_id);
				707	}
				708
				709	/* now process ac/mixed blocks */
				710	DEQUEUE_BLKID_FROM_CONTROL(u4_blk_cntrl, u4_blk_id);
				711	while (u4_blk_id < 8)
				712	{
				713	WORD32 i4_src_offset = src_strd * u4_blk_id;
				714	WORD32 dc_src_offset = i4_dc_inc * u4_blk_id;
				715
				716	IND2SUB_CHROMA_MB(u4_blk_id, i4_offset_x, i4_offset_y);
				717
				718	pu1_cur_4x4_prd_blk = pu1_pred + i4_offset_x + i4_offset_y * pred_strd;
				719	pu1_cur_4x4_out_blk = pu1_out + i4_offset_x + i4_offset_y * out_strd;
				720
				721	ps_codec->pf_iquant_itrans_recon_chroma_4x4(pi2_src + i4_src_offset,
				722	pu1_cur_4x4_prd_blk,
				723	pu1_cur_4x4_out_blk,
				724	pred_strd, out_strd,
				725	pu2_iscale_mat,
				726	pu2_weigh_mat, qp_div,
				727	(WORD16 *) pi4_tmp,
				728	pi2_dc_src + dc_src_offset);
				729
				730	DEQUEUE_BLKID_FROM_CONTROL(u4_blk_cntrl, u4_blk_id);
				731	}
				732
				733	/* Now process empty blocks */
				734	DEQUEUE_BLKID_FROM_CONTROL(u4_empty_blk_cntrl, u4_blk_id);
				735	while (u4_blk_id < 8)
				736	{
				737	IND2SUB_CHROMA_MB(u4_blk_id, i4_offset_x, i4_offset_y);
				738
				739	pu1_cur_4x4_prd_blk = pu1_pred + i4_offset_x + i4_offset_y * pred_strd;
				740	pu1_cur_4x4_out_blk = pu1_out + i4_offset_x + i4_offset_y * out_strd;
				741
				742	ps_codec->pf_interleave_copy(pu1_cur_4x4_prd_blk, pu1_cur_4x4_out_blk,
				743	pred_strd, out_strd, SIZE_4X4_BLK_VERT,
				744	SIZE_4X4_BLK_HRZ);
				745
				746	DEQUEUE_BLKID_FROM_CONTROL(u4_empty_blk_cntrl, u4_blk_id);
				747	}
				748	}
				749
				750	/**
				751	******************************************************************************
				752	*
				753	* @brief This function packs residue of an i16x16 luma mb for entropy coding
				754	*
				755	* @par Description
				756	* An i16 macro block contains two classes of units, dc 4x4 block and
				757	* 4x4 ac blocks. while packing the mb, the dc block is sent first, and
				758	* the 16 ac blocks are sent next in scan order. Each and every block is
				759	* represented by 3 parameters (nnz, significant coefficient map and the
				760	* residue coefficients itself). If a 4x4 unit does not have any coefficients
				761	* then only nnz is sent. Inside a 4x4 block the individual coefficients are
				762	* sent in scan order.
				763	*
				764	* The first byte of each block will be nnz of the block, if it is non zero,
				765	* a 2 byte significance map is sent. This is followed by nonzero coefficients.
				766	* This is repeated for 1 dc + 16 ac blocks.
				767	*
				768	* @param[in] pi2_res_mb
				769	* pointer to residue mb
				770	*
				771	* @param[in, out] pv_mb_coeff_data
				772	* buffer pointing to packed residue coefficients
				773	*
				774	* @param[in] u4_res_strd
				775	* residual block stride
				776	*
				777	* @param[out] u1_cbp_l
				778	* coded block pattern luma
				779	*
				780	* @param[in] pu1_nnz
				781	* number of non zero coefficients in each 4x4 unit
				782	*
				783	* @param[out]
				784	* Control signal for inverse transform of 16x16 blocks
				785	*
				786	* @return none
				787	*
				788	* @ remarks
				789	*
				790	******************************************************************************
				791	*/
				792	void ih264e_pack_l_mb_i16(WORD16 *pi2_res_mb,
				793	void **pv_mb_coeff_data,
				794	WORD32 i4_res_strd,
				795	UWORD8 *u1_cbp_l,
				796	UWORD8 *pu1_nnz,
				797	UWORD32 *pu4_cntrl)
				798	{
				799	/* pointer to packed sub block buffer space */
				800	tu_sblk_coeff_data_t ps_mb_coeff_data = (pv_mb_coeff_data), *ps_mb_coeff_data_ac;
				801
				802	/* no of non zero coefficients in the current sub block */
				803	UWORD32 u4_nnz_cnt;
				804
				805	/* significant coefficient map */
				806	UWORD32 u4_s_map;
				807
				808	/* pointer to scanning matrix */
				809	const UWORD8 *pu1_scan_order;
				810
				811	/* number of non zeros in sub block */
				812	UWORD32 u4_nnz;
				813
				814	/* coeff scan order */
				815	const UWORD8 u1_scan_order[16] = {0, 1, 4, 5, 2, 3, 6, 7, 8, 9, 12, 13, 10, 11, 14, 15};
				816
				817	/* temp var */
				818	UWORD32 coeff_cnt, mask, b4,u4_cntrl=0;
				819
				820	/DC and AC coeff pointers/
				821	WORD16 pi2_res_mb_ac,pi2_res_mb_dc;
				822
				823	/********************************************************/
				824	/* pack dc coeff data for entropy coding */
				825	/********************************************************/
				826
				827	pi2_res_mb_dc = pi2_res_mb;
				828	pu1_scan_order = gu1_luma_scan_order_dc;
				829
				830	u4_nnz = *pu1_nnz;
				831	u4_cntrl = 0;
				832
				833	/* write number of non zero coefficients */
				834	ps_mb_coeff_data->i4_sig_map_nnz = u4_nnz;
				835
				836	if (u4_nnz)
				837	{
				838	for (u4_nnz_cnt = 0, coeff_cnt = 0, mask = 1, u4_s_map = 0; u4_nnz_cnt < u4_nnz; coeff_cnt++)
				839	{
				840	if (pi2_res_mb_dc[pu1_scan_order[coeff_cnt]])
				841	{
				842	/* write residue */
				843	ps_mb_coeff_data->ai2_residue[u4_nnz_cnt++] = pi2_res_mb_dc[pu1_scan_order[coeff_cnt]];
				844	u4_s_map \|= mask;
				845	}
				846	mask <<= 1;
				847	}
				848	/* write significant coeff map */
				849	ps_mb_coeff_data->i4_sig_map_nnz \|= (u4_s_map << 16);
Harish Mahendrakar	c72323e	2015-04-28 19:07:40 +0530	[diff] [blame]	850	(*pv_mb_coeff_data) = ps_mb_coeff_data->ai2_residue + ALIGN2(u4_nnz_cnt);
Hamsalekha S	8d3d303	2015-03-13 21:24:58 +0530	[diff] [blame]	851
				852	u4_cntrl = 0x00008000;// Set DC bit in ctrl code
				853	}
				854	else
				855	{
				856	(*pv_mb_coeff_data) = ps_mb_coeff_data->ai2_residue;
				857	}
				858
				859	/********************************************************/
				860	/* pack ac coeff data for entropy coding */
				861	/********************************************************/
				862
				863	pu1_nnz ++;
				864	pu1_scan_order = gu1_luma_scan_order;
				865	pi2_res_mb += i4_res_strd; /Move to AC block/
				866
				867	ps_mb_coeff_data_ac = (*pv_mb_coeff_data);
				868
				869	for (b4 = 0; b4 < 16; b4++)
				870	{
				871	ps_mb_coeff_data = (*pv_mb_coeff_data);
				872
				873	u4_nnz = pu1_nnz[u1_scan_order[b4]];
				874
				875	/* Jump according to the scan order */
				876	pi2_res_mb_ac = pi2_res_mb + (i4_res_strd * u1_scan_order[b4]);
				877
				878	/*
				879	* Since this is a i16x16 block, we should not count dc coeff on indi
				880	* vidual 4x4 blocks to nnz. But due to the implementation of 16x16
				881	* trans function, we add dc's nnz to u4_nnz too. Hence we adjust that
				882	* here
				883	*/
				884	u4_nnz -= (pi2_res_mb_ac[0] != 0);
				885
				886	/* write number of non zero coefficients */
				887	ps_mb_coeff_data->i4_sig_map_nnz = u4_nnz;
				888
				889	if (u4_nnz)
				890	{
				891	for (u4_nnz_cnt = 0, coeff_cnt = 1, mask = 1, u4_s_map = 0; u4_nnz_cnt < u4_nnz; coeff_cnt++)
				892	{
				893	if (pi2_res_mb_ac[pu1_scan_order[coeff_cnt]])
				894	{
				895	/* write residue */
				896	ps_mb_coeff_data->ai2_residue[u4_nnz_cnt++] = pi2_res_mb_ac[pu1_scan_order[coeff_cnt]];
				897	u4_s_map \|= mask;
				898	}
				899	mask <<= 1;
				900	}
				901	/* write significant coeff map */
				902	ps_mb_coeff_data->i4_sig_map_nnz \|= (u4_s_map << 16);
Harish Mahendrakar	c72323e	2015-04-28 19:07:40 +0530	[diff] [blame]	903	(*pv_mb_coeff_data) = ps_mb_coeff_data->ai2_residue + ALIGN2(u4_nnz_cnt);
Hamsalekha S	8d3d303	2015-03-13 21:24:58 +0530	[diff] [blame]	904	*u1_cbp_l = 15;
				905
				906	u4_cntrl \|= (1 << (31 - u1_scan_order[b4]));
				907	}
				908	else
				909	{
				910	(*pv_mb_coeff_data) = ps_mb_coeff_data->ai2_residue;
				911	}
				912
				913	}
				914
				915	if (!(*u1_cbp_l))
				916	{
				917	(*pv_mb_coeff_data) = ps_mb_coeff_data_ac;
				918	}
				919
				920	/* Store the cntrl signal */
				921	(*pu4_cntrl) = u4_cntrl;
				922	return;
				923	}
				924
				925	/**
				926	******************************************************************************
				927	*
				928	* @brief This function packs residue of an p16x16 luma mb for entropy coding
				929	*
				930	* @par Description
				931	* A p16x16 macro block contains two classes of units 16 4x4 ac blocks.
				932	* while packing the mb, the dc block is sent first, and
				933	* the 16 ac blocks are sent next in scan order. Each and every block is
				934	* represented by 3 parameters (nnz, significant coefficient map and the
				935	* residue coefficients itself). If a 4x4 unit does not have any coefficients
				936	* then only nnz is sent. Inside a 4x4 block the individual coefficients are
				937	* sent in scan order.
				938	*
				939	* The first byte of each block will be nnz of the block, if it is non zero,
				940	* a 2 byte significance map is sent. This is followed by nonzero coefficients.
				941	* This is repeated for 1 dc + 16 ac blocks.
				942	*
				943	* @param[in] pi2_res_mb
				944	* pointer to residue mb
				945	*
				946	* @param[in, out] pv_mb_coeff_data
				947	* buffer pointing to packed residue coefficients
				948	*
				949	* @param[in] i4_res_strd
				950	* residual block stride
				951	*
				952	* @param[out] u1_cbp_l
				953	* coded block pattern luma
				954	*
				955	* @param[in] pu1_nnz
				956	* number of non zero coefficients in each 4x4 unit
				957	*
				958	* @param[out] pu4_cntrl
				959	* Control signal for inverse transform
				960	*
				961	* @return none
				962	*
				963	* @remarks Killing coffs not yet coded
				964	*
				965	******************************************************************************
				966	*/
				967	void ih264e_pack_l_mb(WORD16 *pi2_res_mb,
				968	void **pv_mb_coeff_data,
				969	WORD32 i4_res_strd,
				970	UWORD8 *u1_cbp_l,
				971	UWORD8 *pu1_nnz,
				972	UWORD32 u4_thres_resi,
				973	UWORD32 *pu4_cntrl)
				974	{
				975	/* pointer to packed sub block buffer space */
				976	tu_sblk_coeff_data_t ps_mb_coeff_data, ps_mb_coeff_data_b8, *ps_mb_coeff_data_mb;
				977
				978	/* no of non zero coefficients in the current sub block */
				979	UWORD32 u4_nnz_cnt;
				980
				981	/* significant coefficient map */
				982	UWORD32 u4_s_map;
				983
				984	/* pointer to scanning matrix */
				985	const UWORD8 *pu1_scan_order = gu1_luma_scan_order;
				986
				987	/* number of non zeros in sub block */
				988	UWORD32 u4_nnz;
				989
				990	/* pointer to residual sub block */
				991	WORD16 *pi2_res_sb;
				992
				993	/* coeff scan order */
				994	const UWORD8 u1_scan_order[16] = {0, 1, 4, 5, 2, 3, 6, 7, 8, 9, 12, 13, 10, 11, 14, 15};
				995
				996	/* coeff cost */
				997	const UWORD8 *pu1_coeff_cost = gu1_coeff_cost;
				998
				999	/* temp var */
				1000	UWORD32 u4_mb_coeff_cost = 0, u4_b8_coeff_cost = 0, coeff_cnt, mask, u4_cntrl = 0, b4, b8;
				1001
				1002	/* temp var */
				1003	WORD32 i4_res_val, i4_run = -1, dcac_block;
				1004
				1005	/* When Hadamard transform is disabled, first row values are dont care, ignore them */
				1006	pi2_res_mb += i4_res_strd;
				1007
				1008	/* When Hadamard transform is disabled, first unit value is dont care, ignore this */
				1009	pu1_nnz ++;
				1010
				1011	ps_mb_coeff_data_mb = ps_mb_coeff_data_b8 = (*pv_mb_coeff_data);
				1012
				1013	/********************************************************/
				1014	/* pack coeff data for entropy coding */
				1015	/********************************************************/
				1016
				1017	for (b4 = 0; b4 < 16; b4++)
				1018	{
				1019	ps_mb_coeff_data = (*pv_mb_coeff_data);
				1020
				1021	b8 = b4 >> 2;
				1022
				1023	u4_nnz = pu1_nnz[u1_scan_order[b4]];
				1024
				1025	/* Jump according to the scan order */
				1026	pi2_res_sb = pi2_res_mb + (i4_res_strd * u1_scan_order[b4]);
				1027
				1028	/* write number of non zero coefficients */
				1029	ps_mb_coeff_data->i4_sig_map_nnz = u4_nnz;
				1030
				1031	if (u4_nnz)
				1032	{
				1033	for (u4_nnz_cnt = 0, coeff_cnt = 0, mask = 1, u4_s_map = 0; u4_nnz_cnt < u4_nnz; coeff_cnt++)
				1034	{
				1035	/* number of runs of zero before, this is used to compute coeff cost */
				1036	i4_run++;
				1037
				1038	i4_res_val = pi2_res_sb[pu1_scan_order[coeff_cnt]];
				1039
				1040	if (i4_res_val)
				1041	{
				1042	/* write residue */
				1043	ps_mb_coeff_data->ai2_residue[u4_nnz_cnt++] = i4_res_val;
				1044	u4_s_map \|= mask;
				1045
				1046	if (u4_thres_resi)
				1047	{
				1048	/* compute coeff cost */
				1049	if (i4_res_val == 1 \|\| i4_res_val == -1)
				1050	{
				1051	if (i4_run < 6)
				1052	u4_b8_coeff_cost += pu1_coeff_cost[i4_run];
				1053	}
				1054	else
				1055	u4_b8_coeff_cost += 9;
				1056
				1057	i4_run = -1;
				1058	}
				1059	}
				1060
				1061	mask <<= 1;
				1062	}
				1063
				1064	/* write significant coeff map */
				1065	ps_mb_coeff_data->i4_sig_map_nnz \|= (u4_s_map << 16);
Harish Mahendrakar	c72323e	2015-04-28 19:07:40 +0530	[diff] [blame]	1066	(*pv_mb_coeff_data) = ps_mb_coeff_data->ai2_residue + ALIGN2(u4_nnz_cnt);
Hamsalekha S	8d3d303	2015-03-13 21:24:58 +0530	[diff] [blame]	1067
				1068	/* cbp */
				1069	*u1_cbp_l \|= (1 << b8);
				1070
				1071	/* Cntrl map for inverse transform computation
				1072	*
				1073	* If coeff_cnt is zero, it means that only nonzero was a dc coeff
				1074	* Hence we have to set the 16 - u1_scan_order[b4]) position instead
				1075	* of 31 - u1_scan_order[b4]
				1076	*/
				1077	dcac_block = (coeff_cnt == 0)?16:31;
				1078	u4_cntrl \|= (1 << (dcac_block - u1_scan_order[b4]));
				1079	}
				1080	else
				1081	{
				1082	(*pv_mb_coeff_data) = ps_mb_coeff_data->ai2_residue;
				1083	}
				1084
				1085	/* Decide if the 8x8 unit has to be sent for entropy coding? */
				1086	if ((b4+1) % 4 == 0)
				1087	{
				1088	if ( u4_thres_resi && (u4_b8_coeff_cost <= LUMA_SUB_BLOCK_SKIP_THRESHOLD) &&
				1089	(*u1_cbp_l & (1 << b8)) )
				1090	{
				1091
				1092
				1093	/*
				1094	* When we want to reset the full 8x8 block, we have to reset
				1095	* both the dc and ac coeff bits hence we have the symmetric
				1096	* arrangement of bits
				1097	*/
				1098	const UWORD32 cntrl_mask_map[4] = {0xcc00cc00, 0x33003300, 0x00cc00cc, 0x00330033};
				1099
				1100	/* restore cbp */
				1101	u1_cbp_l = (u1_cbp_l & (~(1 << b8)));
				1102
				1103	/* correct cntrl flag */
				1104	u4_cntrl = u4_cntrl & (~cntrl_mask_map[(b4 >> 2)]);
				1105
				1106	/* correct nnz */
				1107	pu1_nnz[u1_scan_order[b4 - 3]] = 0;
				1108	pu1_nnz[u1_scan_order[b4 - 2]] = 0;
				1109	pu1_nnz[u1_scan_order[b4 - 1]] = 0;
				1110	pu1_nnz[u1_scan_order[b4]] = 0;
				1111
				1112	/* reset blk cost */
				1113	u4_b8_coeff_cost = 0;
				1114	}
				1115
				1116	if (!(*u1_cbp_l & (1 << b8)))
				1117	{
				1118	(*pv_mb_coeff_data) = ps_mb_coeff_data_b8;
				1119	}
				1120
				1121	u4_mb_coeff_cost += u4_b8_coeff_cost;
				1122
				1123	u4_b8_coeff_cost = 0;
				1124	i4_run = -1;
				1125	ps_mb_coeff_data_b8 = (*pv_mb_coeff_data);
				1126	}
				1127	}
				1128
				1129	if (u4_thres_resi && (u4_mb_coeff_cost <= LUMA_BLOCK_SKIP_THRESHOLD)
				1130	&& (*u1_cbp_l))
				1131	{
				1132	(*pv_mb_coeff_data) = ps_mb_coeff_data_mb;
				1133	*u1_cbp_l = 0;
				1134	u4_cntrl = 0;
				1135	memset(pu1_nnz, 0, 16);
				1136	}
				1137
				1138	(*pu4_cntrl) = u4_cntrl;
				1139
				1140	return;
				1141	}
				1142
				1143	/**
				1144	******************************************************************************
				1145	*
				1146	* @brief This function packs residue of an i8x8 chroma mb for entropy coding
				1147	*
				1148	* @par Description
				1149	* An i8 chroma macro block contains two classes of units, dc 2x2 block and
				1150	* 4x4 ac blocks. while packing the mb, the dc block is sent first, and
				1151	* the 4 ac blocks are sent next in scan order. Each and every block is
				1152	* represented by 3 parameters (nnz, significant coefficient map and the
				1153	* residue coefficients itself). If a 4x4 unit does not have any coefficients
				1154	* then only nnz is sent. Inside a 4x4 block the individual coefficients are
				1155	* sent in scan order.
				1156	*
				1157	* The first byte of each block will be nnz of the block, if it is non zero,
				1158	* a 2 byte significance map is sent. This is followed by nonzero coefficients.
				1159	* This is repeated for 1 dc + 4 ac blocks.
				1160	*
				1161	* @param[in] pi2_res_mb
				1162	* pointer to residue mb
				1163	*
				1164	* @param[in, out] pv_mb_coeff_data
				1165	* buffer pointing to packed residue coefficients
				1166	*
				1167	* @param[in] u4_res_strd
				1168	* residual block stride
				1169	*
				1170	* @param[out] u1_cbp_c
				1171	* coded block pattern chroma
				1172	*
				1173	* @param[in] pu1_nnz
				1174	* number of non zero coefficients in each 4x4 unit
				1175	*
				1176	* @param[out] pu1_nnz
				1177	* Control signal for inverse transform
				1178	*
				1179	* @param[in] u4_swap_uv
				1180	* Swaps the order of U and V planes in entropy bitstream
				1181	*
				1182	* @return none
				1183	*
				1184	* @ remarks
				1185	*
				1186	******************************************************************************
				1187	*/
				1188	void ih264e_pack_c_mb(WORD16 *pi2_res_mb,
				1189	void **pv_mb_coeff_data,
				1190	WORD32 i4_res_strd,
				1191	UWORD8 *u1_cbp_c,
				1192	UWORD8 *pu1_nnz,
				1193	UWORD32 u4_thres_resi,
				1194	UWORD32 *pu4_cntrl,
				1195	UWORD32 u4_swap_uv)
				1196	{
				1197	/* pointer to packed sub block buffer space */
				1198	tu_sblk_coeff_data_t ps_mb_coeff_data = (pv_mb_coeff_data);
				1199	tu_sblk_coeff_data_t ps_mb_coeff_data_dc, ps_mb_coeff_data_ac;
				1200
				1201	/* nnz pointer */
				1202	UWORD8 pu1_nnz_ac, pu1_nnz_dc;
				1203
				1204	/* nnz counter */
				1205	UWORD32 u4_nnz_cnt;
				1206
				1207	/* significant coefficient map */
				1208	UWORD32 u4_s_map;
				1209
				1210	/* pointer to scanning matrix */
				1211	const UWORD8 *pu1_scan_order;
				1212
				1213	/* no of non zero coefficients in the current sub block */
				1214	UWORD32 u4_nnz;
				1215
				1216	/* pointer to residual sub block, res val */
				1217	WORD16 *pi2_res_sb, i2_res_val;
				1218
				1219	/* temp var */
				1220	UWORD32 coeff_cnt, mask, b4,plane;
				1221
				1222	/* temp var */
				1223	UWORD32 u4_coeff_cost;
				1224	WORD32 i4_run;
				1225
				1226	/* coeff cost */
				1227	const UWORD8 *pu1_coeff_cost = gu1_coeff_cost;
				1228
				1229	/* pointer to packed buffer space */
				1230	UWORD32 *pu4_mb_coeff_data = NULL;
				1231
				1232	/* ac coded block pattern */
				1233	UWORD8 u1_cbp_ac;
				1234
				1235	/* Variable to store the current bit pos in cntrl variable*/
				1236	UWORD32 cntrl_pos = 0;
				1237
				1238	/********************************************************/
				1239	/* pack dc coeff data for entropy coding */
				1240	/********************************************************/
				1241	pu1_scan_order = gu1_chroma_scan_order_dc;
				1242	pi2_res_sb = pi2_res_mb;
				1243	pu1_nnz_dc = pu1_nnz;
				1244	(*pu4_cntrl) = 0;
				1245	cntrl_pos = 15;
				1246	ps_mb_coeff_data_dc = (*pv_mb_coeff_data);
				1247
				1248	/* Color space conversion between SP_UV and SP_VU
				1249	* We always assume SP_UV for all the processing
				1250	* Hence to get proper stream output we need to swap U and V channels here
				1251	*
				1252	* For that there are two paths we need to look for
				1253	* One is the path to bitstream , these variables should have the proper input
				1254	* configured UV or VU
Harinarayanan K K	134291e	2015-06-18 16:03:38 +0530	[diff] [blame]	1255	* For the other path the inverse transform variables should have what ever ordering the
Hamsalekha S	8d3d303	2015-03-13 21:24:58 +0530	[diff] [blame]	1256	* input had
				1257	*/
				1258
				1259	if (u4_swap_uv)
				1260	{
				1261	pu1_nnz_dc += 5;/* Move to NNZ of V planve */
				1262	pi2_res_sb += 4;/* Move to DC coff of V plane */
				1263
				1264	cntrl_pos = 14; /* Control bit for V plane */
				1265	}
				1266
				1267	for (plane = 0; plane < 2; plane++)
				1268	{
				1269	ps_mb_coeff_data = (*pv_mb_coeff_data);
				1270
				1271	u4_nnz = *pu1_nnz_dc;
				1272	/* write number of non zero coefficients U/V */
				1273	ps_mb_coeff_data->i4_sig_map_nnz = u4_nnz;
				1274
				1275	if (u4_nnz)
				1276	{
				1277	for (u4_nnz_cnt = 0, coeff_cnt = 0, mask = 1, u4_s_map = 0; u4_nnz_cnt < u4_nnz; coeff_cnt++)
				1278	{
				1279	i2_res_val = pi2_res_sb[pu1_scan_order[coeff_cnt]];
				1280	if (i2_res_val)
				1281	{
				1282	/* write residue U/V */
				1283	ps_mb_coeff_data->ai2_residue[u4_nnz_cnt++] = i2_res_val;
				1284	u4_s_map \|= mask;
				1285	}
				1286	mask <<= 1;
				1287	}
				1288	/* write significant coeff map U/V */
				1289	ps_mb_coeff_data->i4_sig_map_nnz \|= (u4_s_map << 16);
Harish Mahendrakar	c72323e	2015-04-28 19:07:40 +0530	[diff] [blame]	1290	(*pv_mb_coeff_data) = ps_mb_coeff_data->ai2_residue + ALIGN2(u4_nnz_cnt);
Hamsalekha S	8d3d303	2015-03-13 21:24:58 +0530	[diff] [blame]	1291	*u1_cbp_c = 1;
				1292
				1293	(*pu4_cntrl) \|= (1 << cntrl_pos);
				1294	}
				1295	else
				1296	{
				1297	(*pv_mb_coeff_data) = ps_mb_coeff_data->ai2_residue;
				1298	}
				1299
				1300	if (u4_swap_uv)
				1301	{
				1302	cntrl_pos++; /* Control bit for U plane */
				1303	pu1_nnz_dc -= 5; /* Move to NNZ of U plane */
				1304	pi2_res_sb -= 4; /* Move to DC coff of U plane */
				1305
				1306	}
				1307	else
				1308	{
				1309	cntrl_pos--; /* Control bit for U plane */
				1310	pu1_nnz_dc += 5; /* 4 for AC NNZ and 1 for DC */
				1311	pi2_res_sb += 4; /* Move to DC coff of V plane */
				1312	}
				1313	}
				1314
				1315	/********************************************************/
				1316	/* pack ac coeff data for entropy coding */
				1317	/********************************************************/
				1318
				1319	pu1_scan_order = gu1_chroma_scan_order;
				1320	ps_mb_coeff_data_ac = (*pv_mb_coeff_data);
				1321
				1322	if (u4_swap_uv)
				1323	{
				1324	pi2_res_sb = pi2_res_mb + i4_res_strd * 5; /* Move to V plane ,ie 1dc row+ 4 ac row */
				1325	cntrl_pos = 27; /* The control bits are to be added for V bloc ie 31-4 th bit */
				1326	pu1_nnz_ac = pu1_nnz + 6;/Move the nnz to V block NNZ 1 dc + 1dc + 4 ac /
				1327	}
				1328	else
				1329	{
				1330	pi2_res_sb = pi2_res_mb + i4_res_strd; /* Move to U plane ,ie 1dc row */
				1331	cntrl_pos = 31;
				1332	pu1_nnz_ac = pu1_nnz + 1; /* Move the nnz to V block NNZ 1 dc */
				1333	}
				1334
				1335	for (plane = 0; plane < 2; plane++)
				1336	{
				1337	pu4_mb_coeff_data = (*pv_mb_coeff_data);
				1338
				1339	u4_coeff_cost = 0;
				1340	i4_run = -1;
				1341
				1342	/* get the current cbp, so that it automatically
				1343	* gets reverted in case of zero ac values */
				1344	u1_cbp_ac = *u1_cbp_c;
				1345
				1346	for (b4 = 0; b4 < 4; b4++)
				1347	{
				1348	ps_mb_coeff_data = (*pv_mb_coeff_data);
				1349
				1350	u4_nnz = *pu1_nnz_ac;
				1351
				1352	/*
				1353	* We are scanning only ac coeffs, but the nnz is for the
				1354	* complete 4x4 block. Hence we have to discount the nnz contributed
				1355	* by the dc coefficient
				1356	*/
				1357	u4_nnz -= (pi2_res_sb[0]!=0);
				1358
				1359	/* write number of non zero coefficients U/V */
				1360	ps_mb_coeff_data->i4_sig_map_nnz = u4_nnz;
				1361
				1362	if (u4_nnz)
				1363	{
				1364	for (u4_nnz_cnt = 0, coeff_cnt = 0, mask = 1, u4_s_map = 0; u4_nnz_cnt < u4_nnz; coeff_cnt++)
				1365	{
				1366	i2_res_val = pi2_res_sb[pu1_scan_order[coeff_cnt]];
				1367
				1368	i4_run++;
				1369
				1370	if (i2_res_val)
				1371	{
				1372	/* write residue U/V */
				1373	ps_mb_coeff_data->ai2_residue[u4_nnz_cnt++] = i2_res_val;
				1374	u4_s_map \|= mask;
				1375
				1376	if ( u4_thres_resi && (u4_coeff_cost < CHROMA_BLOCK_SKIP_THRESHOLD) )
				1377	{
				1378	/* compute coeff cost */
				1379	if (i2_res_val == 1 \|\| i2_res_val == -1)
				1380	{
				1381	if (i4_run < 6)
				1382	u4_coeff_cost += pu1_coeff_cost[i4_run];
				1383	}
				1384	else
				1385	u4_coeff_cost += 9;
				1386
				1387	i4_run = -1;
				1388	}
				1389	}
				1390	mask <<= 1;
				1391	}
				1392
				1393	/* write significant coeff map U/V */
				1394	ps_mb_coeff_data->i4_sig_map_nnz \|= (u4_s_map << 16);
Harish Mahendrakar	c72323e	2015-04-28 19:07:40 +0530	[diff] [blame]	1395	(*pv_mb_coeff_data) = ps_mb_coeff_data->ai2_residue + ALIGN2(u4_nnz_cnt);
Hamsalekha S	8d3d303	2015-03-13 21:24:58 +0530	[diff] [blame]	1396	u1_cbp_ac = 2;
				1397
				1398	(*pu4_cntrl) \|= 1 << cntrl_pos;
				1399	}
				1400	else
				1401	{
				1402	(*pv_mb_coeff_data) = ps_mb_coeff_data->ai2_residue;
				1403	}
				1404
				1405	pu1_nnz_ac++;
				1406	pi2_res_sb += i4_res_strd;
				1407	cntrl_pos--;
				1408	}
				1409
				1410	/* reset block */
				1411	if (u4_thres_resi && (u4_coeff_cost < CHROMA_BLOCK_SKIP_THRESHOLD))
				1412	{
				1413	pu4_mb_coeff_data[0] = 0;
				1414	pu4_mb_coeff_data[1] = 0;
				1415	pu4_mb_coeff_data[2] = 0;
				1416	pu4_mb_coeff_data[3] = 0;
				1417	(*pv_mb_coeff_data) = pu4_mb_coeff_data + 4;
				1418
				1419	/* Generate the control signal */
				1420	/* Zero out the current plane's AC coefficients */
				1421	(*pu4_cntrl) &= ((plane == u4_swap_uv) ? 0x0FFFFFFF : 0xF0FFFFFF);
				1422
				1423	/* Similarly do for the NNZ also */
				1424	*(pu1_nnz_ac - 4) = 0;
				1425	*(pu1_nnz_ac - 3) = 0;
				1426	*(pu1_nnz_ac - 2) = 0;
				1427	*(pu1_nnz_ac - 1) = 0;
				1428	}
				1429	else
				1430	{
				1431	*u1_cbp_c = u1_cbp_ac;
				1432	}
				1433
				1434	if (u4_swap_uv)
				1435	{
				1436	pi2_res_sb = pi2_res_mb + i4_res_strd; /* Move to V plane ,ie 1dc row+ 4 ac row + 1 dc row */
				1437	cntrl_pos = 31; /* The control bits are to be added for V bloc ie 31-4 th bit */
				1438	pu1_nnz_ac = pu1_nnz + 1; /* Move the nnz to V block NNZ 1 dc + 1dc + 4 ac */
				1439
				1440	pu1_nnz_ac = pu1_nnz + 1;
				1441	}
				1442	else
				1443	pu1_nnz_ac = pu1_nnz + 6; /* Go to nnz of V plane */
				1444	}
				1445
				1446	/* restore the ptr basing on cbp */
				1447	if (*u1_cbp_c == 0)
				1448	{
				1449	(*pv_mb_coeff_data) = ps_mb_coeff_data_dc;
				1450	}
				1451	else if (*u1_cbp_c == 1)
				1452	{
				1453	(*pv_mb_coeff_data) = ps_mb_coeff_data_ac;
				1454	}
				1455
				1456	return ;
				1457	}
				1458
				1459	/**
				1460	*******************************************************************************
				1461	*
				1462	* @brief performs luma core coding when intra mode is i16x16
				1463	*
				1464	* @par Description:
				1465	* If the current mb is to be coded as intra of mb type i16x16, the mb is first
				1466	* predicted using one of i16x16 prediction filters, basing on the intra mode
				1467	* chosen. Then, error is computed between the input blk and the estimated blk.
				1468	* This error is transformed (hierarchical transform i.e., dct followed by hada-
				1469	* -mard), quantized. The quantized coefficients are packed in scan order for
				1470	* entropy coding.
				1471	*
				1472	* @param[in] ps_proc_ctxt
				1473	* pointer to the current macro block context
				1474	*
				1475	* @returns u1_cbp_l
				1476	* coded block pattern luma
				1477	*
				1478	* @remarks none
				1479	*
				1480	*******************************************************************************
				1481	*/
				1482
				1483	UWORD8 ih264e_code_luma_intra_macroblock_16x16(process_ctxt_t *ps_proc)
				1484	{
				1485	/* Codec Context */
				1486	codec_t *ps_codec = ps_proc->ps_codec;
				1487
				1488	/* pointer to ref macro block */
				1489	UWORD8 *pu1_ref_mb = ps_proc->pu1_rec_buf_luma;
				1490
				1491	/* pointer to src macro block */
				1492	UWORD8 *pu1_curr_mb = ps_proc->pu1_src_buf_luma;
				1493
				1494	/* pointer to prediction macro block */
				1495	UWORD8 *pu1_pred_mb = NULL;
				1496
				1497	/* pointer to residual macro block */
				1498	WORD16 *pi2_res_mb = ps_proc->pi2_res_buf;
				1499
				1500	/* strides */
				1501	WORD32 i4_src_strd = ps_proc->i4_src_strd;
				1502	WORD32 i4_rec_strd = ps_proc->i4_rec_strd;
				1503	WORD32 i4_pred_strd = ps_proc->i4_pred_strd;
				1504	WORD32 i4_res_strd = ps_proc->i4_res_strd;
				1505
				1506	/* intra mode */
				1507	UWORD8 u1_intra_mode = ps_proc->u1_l_i16_mode;
				1508
				1509	/* coded block pattern */
				1510	UWORD8 u1_cbp_l = 0;
				1511
				1512	/* number of non zero coeffs*/
				1513	UWORD32 au4_nnz[5];
				1514	UWORD8 pu1_nnz = (UWORD8 )au4_nnz;
				1515
				1516	/Cntrol signal for itrans/
				1517	UWORD32 u4_cntrl;
				1518
				1519	/* quantization parameters */
				1520	quant_params_t *ps_qp_params = ps_proc->ps_qp_params[0];
				1521
				1522	/* pointer to packed mb coeff data */
				1523	void **pv_mb_coeff_data = &(ps_proc->pv_mb_coeff_data);
				1524
				1525	/* init nnz */
				1526	au4_nnz[0] = 0;
				1527	au4_nnz[1] = 0;
				1528	au4_nnz[2] = 0;
				1529	au4_nnz[3] = 0;
				1530	au4_nnz[4] = 0;
				1531
				1532	if (u1_intra_mode == PLANE_I16x16)
				1533	{
				1534	pu1_pred_mb = ps_proc->pu1_pred_mb_intra_16x16_plane;
				1535	}
				1536	else
				1537	{
				1538	pu1_pred_mb = ps_proc->pu1_pred_mb_intra_16x16;
				1539	}
				1540
				1541	/********************************************************/
				1542	/* error estimation, */
				1543	/* transform */
				1544	/* quantization */
				1545	/********************************************************/
				1546	ih264e_luma_16x16_resi_trans_dctrans_quant(ps_codec, pu1_curr_mb,
				1547	pu1_pred_mb, pi2_res_mb,
				1548	i4_src_strd, i4_pred_strd,
				1549	i4_res_strd,
				1550	ps_qp_params->pu2_scale_mat,
				1551	ps_qp_params->pu2_thres_mat,
				1552	ps_qp_params->u1_qbits,
				1553	ps_qp_params->u4_dead_zone,
				1554	pu1_nnz, ENABLE_DC_TRANSFORM);
				1555
				1556	/********************************************************/
				1557	/* pack coeff data for entropy coding */
				1558	/********************************************************/
				1559	ih264e_pack_l_mb_i16(pi2_res_mb, pv_mb_coeff_data, i4_res_strd, &u1_cbp_l,
				1560	pu1_nnz, &u4_cntrl);
				1561
				1562	/********************************************************/
				1563	/* ierror estimation, */
				1564	/* itransform */
				1565	/* iquantization */
				1566	/********************************************************/
				1567	/*
				1568	*if refernce frame is not to be computed
				1569	*we only need the right and bottom border 4x4 blocks to predict next intra
				1570	*blocks, hence only compute them
				1571	*/
				1572	if (!ps_proc->u4_compute_recon)
				1573	{
				1574	u4_cntrl &= 0x111F8000;
				1575	}
				1576
				1577	if (u4_cntrl)
				1578	{
				1579	ih264e_luma_16x16_idctrans_iquant_itrans_recon(
				1580	ps_codec, pi2_res_mb, pu1_pred_mb, pu1_ref_mb,
				1581	i4_res_strd, i4_pred_strd, i4_rec_strd,
				1582	ps_qp_params->pu2_iscale_mat,
				1583	ps_qp_params->pu2_weigh_mat, ps_qp_params->u1_qp_div,
				1584	u4_cntrl, ENABLE_DC_TRANSFORM,
				1585	ps_proc->pv_scratch_buff);
				1586	}
				1587	else
				1588	{
				1589	ps_codec->pf_inter_pred_luma_copy(pu1_pred_mb, pu1_ref_mb, i4_pred_strd,
				1590	i4_rec_strd, MB_SIZE, MB_SIZE, NULL,
				1591	0);
				1592	}
				1593
				1594	return (u1_cbp_l);
				1595	}
				1596
				1597
				1598	/**
				1599	*******************************************************************************
				1600	*
				1601	* @brief performs luma core coding when intra mode is i4x4
				1602	*
				1603	* @par Description:
				1604	* If the current mb is to be coded as intra of mb type i4x4, the mb is first
				1605	* predicted using one of i4x4 prediction filters, basing on the intra mode
				1606	* chosen. Then, error is computed between the input blk and the estimated blk.
				1607	* This error is dct transformed and quantized. The quantized coefficients are
				1608	* packed in scan order for entropy coding.
				1609	*
				1610	* @param[in] ps_proc_ctxt
				1611	* pointer to the current macro block context
				1612	*
				1613	* @returns u1_cbp_l
				1614	* coded block pattern luma
				1615	*
				1616	* @remarks
				1617	* The traversal of 4x4 subblocks in the 16x16 macroblock is as per the scan order
				1618	* mentioned in h.264 specification
				1619	*
				1620	*******************************************************************************
				1621	*/
				1622	UWORD8 ih264e_code_luma_intra_macroblock_4x4(process_ctxt_t *ps_proc)
				1623	{
				1624	/* Codec Context */
				1625	codec_t *ps_codec = ps_proc->ps_codec;
				1626
				1627	/* pointer to ref macro block */
				1628	UWORD8 *pu1_ref_mb = ps_proc->pu1_rec_buf_luma;
				1629
				1630	/* pointer to src macro block */
				1631	UWORD8 *pu1_curr_mb = ps_proc->pu1_src_buf_luma;
				1632
				1633	/* pointer to prediction macro block */
				1634	UWORD8 *pu1_pred_mb = ps_proc->pu1_pred_mb;
				1635
				1636	/* pointer to residual macro block */
				1637	WORD16 *pi2_res_mb = ps_proc->pi2_res_buf;
				1638
				1639	/* strides */
				1640	WORD32 i4_src_strd = ps_proc->i4_src_strd;
				1641	WORD32 i4_rec_strd = ps_proc->i4_rec_strd;
				1642	WORD32 i4_pred_strd = ps_proc->i4_pred_strd;
				1643
				1644	/* pointer to neighbors: left, top, top-left */
				1645	UWORD8 *pu1_mb_a;
				1646	UWORD8 *pu1_mb_b;
				1647	UWORD8 *pu1_mb_c;
				1648	UWORD8 *pu1_mb_d;
				1649
				1650	/* intra mode */
				1651	UWORD8 u1_intra_mode = ps_proc->u1_l_i16_mode;
				1652
				1653	/* neighbor availability */
				1654	WORD32 i4_ngbr_avbl;
				1655
				1656	/* neighbor pels for intra prediction */
				1657	UWORD8 *pu1_ngbr_pels_i4 = ps_proc->au1_ngbr_pels;
				1658
				1659	/* coded block pattern */
				1660	UWORD8 u1_cbp_l = 0;
				1661
				1662	/* number of non zero coeffs*/
				1663	UWORD8 u1_nnz;
				1664
				1665	/* quantization parameters */
				1666	quant_params_t *ps_qp_params = ps_proc->ps_qp_params[0];
				1667
				1668	/* pointer to packed mb coeff data */
				1669	void **pv_mb_coeff_data = &(ps_proc->pv_mb_coeff_data);
				1670
				1671	/* pointer to packed mb coeff data */
				1672	tu_sblk_coeff_data_t ps_mb_coeff_data, ps_mb_coeff_data_b8;
				1673
				1674	/* no of non zero coefficients in the current sub block */
				1675	UWORD32 u4_nnz_cnt;
				1676
				1677	/* significant coefficient map */
				1678	UWORD32 u4_s_map;
				1679
				1680	/* pointer to scanning matrix */
				1681	const UWORD8 *pu1_scan_order = gu1_luma_scan_order;
				1682
				1683	/Dummy variable for 4x4 trans fucntion/
				1684	WORD16 i2_dc_dummy;
				1685
				1686	/* temp var */
				1687	UWORD32 i, b8, b4, u1_blk_x, u1_blk_y, u1_pix_x, u1_pix_y, coeff_cnt, mask;
				1688
				1689	/* Process 16 4x4 lum sub-blocks of the MB in scan order */
				1690	for (b8 = 0; b8 < 4; b8++)
				1691	{
				1692	u1_blk_x = GET_BLK_RASTER_POS_X(b8) << 3;
				1693	u1_blk_y = GET_BLK_RASTER_POS_Y(b8) << 3;
				1694
				1695	/* if in case cbp for the 8x8 block is zero, send no residue */
				1696	ps_mb_coeff_data_b8 = *pv_mb_coeff_data;
				1697
				1698	for (b4 = 0; b4 < 4; b4++)
				1699	{
				1700	/* index of pel in MB */
				1701	u1_pix_x = u1_blk_x + (GET_SUB_BLK_RASTER_POS_X(b4) << 2);
				1702	u1_pix_y = u1_blk_y + (GET_SUB_BLK_RASTER_POS_Y(b4) << 2);
				1703
				1704	/* Initialize source and reference pointers */
				1705	pu1_curr_mb = ps_proc->pu1_src_buf_luma + u1_pix_x + (u1_pix_y * i4_src_strd);
				1706	pu1_ref_mb = ps_proc->pu1_rec_buf_luma + u1_pix_x + (u1_pix_y * i4_rec_strd);
				1707
				1708	/* pointer to left of ref macro block */
				1709	pu1_mb_a = pu1_ref_mb - 1;
				1710	/* pointer to top of ref macro block */
				1711	pu1_mb_b = pu1_ref_mb - i4_rec_strd;
				1712	/* pointer to topright of ref macro block */
				1713	pu1_mb_c = pu1_mb_b + 4;
				1714	/* pointer to topleft macro block */
				1715	pu1_mb_d = pu1_mb_b - 1;
				1716
				1717	/* compute neighbor availability */
				1718	i4_ngbr_avbl = ps_proc->au1_ngbr_avbl_4x4_subblks[(b8 << 2) + b4];
				1719
				1720	/* sub block intra mode */
				1721	u1_intra_mode = ps_proc->au1_intra_luma_mb_4x4_modes[(b8 << 2) + b4];
				1722
				1723	/********************************************************/
				1724	/* gather prediction pels from neighbors for prediction */
				1725	/********************************************************/
				1726	/* left pels */
				1727	if (i4_ngbr_avbl & LEFT_MB_AVAILABLE_MASK)
				1728	{
				1729	for (i = 0; i < 4; i++)
				1730	pu1_ngbr_pels_i4[4 - 1 - i] = pu1_mb_a[i * i4_rec_strd];
				1731	}
				1732	else
				1733	{
				1734	memset(pu1_ngbr_pels_i4, 0, 4);
				1735	}
				1736
				1737	/* top pels */
				1738	if (i4_ngbr_avbl & TOP_MB_AVAILABLE_MASK)
				1739	{
				1740	memcpy(pu1_ngbr_pels_i4 + 4 + 1, pu1_mb_b, 4);
				1741	}
				1742	else
				1743	{
				1744	memset(pu1_ngbr_pels_i4 + 5, 0, 4);
				1745	}
				1746	/* top left pels */
				1747	if (i4_ngbr_avbl & TOP_LEFT_MB_AVAILABLE_MASK)
				1748	{
				1749	pu1_ngbr_pels_i4[4] = *pu1_mb_d;
				1750	}
				1751	else
				1752	{
				1753	pu1_ngbr_pels_i4[4] = 0;
				1754	}
				1755	/* top right pels */
				1756	if (i4_ngbr_avbl & TOP_RIGHT_MB_AVAILABLE_MASK)
				1757	{
				1758	memcpy(pu1_ngbr_pels_i4+8+1,pu1_mb_c,4);
				1759	}
				1760	else if (i4_ngbr_avbl & TOP_MB_AVAILABLE_MASK)
				1761	{
				1762	memset(pu1_ngbr_pels_i4+8+1,pu1_ngbr_pels_i4[8],4);
				1763	}
				1764
				1765	/********************************************************/
				1766	/* prediction */
				1767	/********************************************************/
				1768	(ps_codec->apf_intra_pred_4_l)[u1_intra_mode](pu1_ngbr_pels_i4,
				1769	pu1_pred_mb, 0,
				1770	i4_pred_strd,
				1771	i4_ngbr_avbl);
				1772
				1773	/********************************************************/
				1774	/* error estimation, */
				1775	/* transform */
				1776	/* quantization */
				1777	/********************************************************/
				1778	ps_codec->pf_resi_trans_quant_4x4(pu1_curr_mb, pu1_pred_mb,
				1779	pi2_res_mb, i4_src_strd,
				1780	i4_pred_strd,
				1781	ps_qp_params->pu2_scale_mat,
				1782	ps_qp_params->pu2_thres_mat,
				1783	ps_qp_params->u1_qbits,
				1784	ps_qp_params->u4_dead_zone,
				1785	&u1_nnz, &i2_dc_dummy);
				1786
				1787	/********************************************************/
				1788	/* pack coeff data for entropy coding */
				1789	/********************************************************/
				1790	ps_mb_coeff_data = *pv_mb_coeff_data;
				1791
				1792	/* write number of non zero coefficients */
				1793	ps_mb_coeff_data->i4_sig_map_nnz = u1_nnz;
				1794
				1795	if (u1_nnz)
				1796	{
				1797	for (u4_nnz_cnt = 0, coeff_cnt = 0, mask = 1, u4_s_map = 0; u4_nnz_cnt < u1_nnz; coeff_cnt++)
				1798	{
				1799	if (pi2_res_mb[pu1_scan_order[coeff_cnt]])
				1800	{
				1801	/* write residue */
				1802	ps_mb_coeff_data->ai2_residue[u4_nnz_cnt++] = pi2_res_mb[pu1_scan_order[coeff_cnt]];
				1803	u4_s_map \|= mask;
				1804	}
				1805	mask <<= 1;
				1806	}
				1807	/* write significant coeff map */
				1808	ps_mb_coeff_data->i4_sig_map_nnz \|= (u4_s_map << 16);
				1809
				1810	/* update ptr to coeff data */
Harish Mahendrakar	c72323e	2015-04-28 19:07:40 +0530	[diff] [blame]	1811	(*pv_mb_coeff_data) = ps_mb_coeff_data->ai2_residue + ALIGN2(u4_nnz_cnt);
Hamsalekha S	8d3d303	2015-03-13 21:24:58 +0530	[diff] [blame]	1812
				1813	/* cbp */
				1814	u1_cbp_l \|= (1 << b8);
				1815	}
				1816	else
				1817	{
				1818	(*pv_mb_coeff_data) = ps_mb_coeff_data->ai2_residue;
				1819	}
				1820
				1821	/********************************************************/
				1822	/* ierror estimation, */
				1823	/* itransform */
				1824	/* iquantization */
				1825	/********************************************************/
Martin Storsjo	17c7e8e	2015-07-02 12:06:04 +0300	[diff] [blame]	1826	if (u1_nnz)
				1827	ps_codec->pf_iquant_itrans_recon_4x4(
				1828	pi2_res_mb, pu1_pred_mb, pu1_ref_mb,
				1829	/No input stride,/i4_pred_strd,
				1830	i4_rec_strd, ps_qp_params->pu2_iscale_mat,
				1831	ps_qp_params->pu2_weigh_mat,
				1832	ps_qp_params->u1_qp_div,
				1833	ps_proc->pv_scratch_buff, 0, 0);
				1834	else
				1835	ps_codec->pf_inter_pred_luma_copy(pu1_pred_mb, pu1_ref_mb,
				1836	i4_pred_strd, i4_rec_strd,
				1837	BLK_SIZE, BLK_SIZE, NULL,
				1838	0);
Hamsalekha S	8d3d303	2015-03-13 21:24:58 +0530	[diff] [blame]	1839
				1840	}
				1841
				1842	/* if the 8x8 block has no residue, nothing needs to be sent to entropy */
				1843	if (!(u1_cbp_l & (1 << b8)))
				1844	{
				1845	*pv_mb_coeff_data = ps_mb_coeff_data_b8;
				1846	}
				1847	}
				1848
				1849	return (u1_cbp_l);
				1850	}
				1851
				1852	/**
				1853	*******************************************************************************
				1854	*
				1855	* @brief performs luma core coding when intra mode is i4x4
				1856	*
				1857	* @par Description:
				1858	* If the current mb is to be coded as intra of mb type i4x4, the mb is first
				1859	* predicted using one of i4x4 prediction filters, basing on the intra mode
				1860	* chosen. Then, error is computed between the input blk and the estimated blk.
				1861	* This error is dct transformed and quantized. The quantized coefficients are
				1862	* packed in scan order for entropy coding.
				1863	*
				1864	* @param[in] ps_proc_ctxt
				1865	* pointer to the current macro block context
				1866	*
				1867	* @returns u1_cbp_l
				1868	* coded block pattern luma
				1869	*
				1870	* @remarks
				1871	* The traversal of 4x4 subblocks in the 16x16 macroblock is as per the scan order
				1872	* mentioned in h.264 specification
				1873	*
				1874	*******************************************************************************
				1875	*/
				1876	UWORD8 ih264e_code_luma_intra_macroblock_4x4_rdopt_on(process_ctxt_t *ps_proc)
				1877	{
				1878	/* Codec Context */
				1879	codec_t *ps_codec = ps_proc->ps_codec;
				1880
				1881	/* pointer to ref macro block */
				1882	UWORD8 *pu1_ref_mb_intra_4x4 = ps_proc->pu1_ref_mb_intra_4x4;
				1883
				1884	/* pointer to recon buffer */
				1885	UWORD8 *pu1_rec_mb = ps_proc->pu1_rec_buf_luma;
				1886
				1887	/* pointer to residual macro block */
				1888	WORD16 *pi2_res_mb = ps_proc->pi2_res_buf_intra_4x4;
				1889
				1890	/* strides */
				1891	WORD32 i4_rec_strd = ps_proc->i4_rec_strd;
				1892
				1893	/* number of non zero coeffs*/
				1894	UWORD8 pu1_nnz = (UWORD8 )ps_proc->au4_nnz_intra_4x4;
				1895
				1896	/* coded block pattern */
				1897	UWORD8 u1_cbp_l = 0;
				1898
				1899	/* pointer to packed mb coeff data */
				1900	void **pv_mb_coeff_data = &(ps_proc->pv_mb_coeff_data);
				1901
				1902	/* pointer to packed mb coeff data */
				1903	tu_sblk_coeff_data_t ps_mb_coeff_data, ps_mb_coeff_data_b8;
				1904
				1905	/* no of non zero coefficients in the current sub block */
				1906	UWORD32 u4_nnz_cnt;
				1907
				1908	/* significant coefficient map */
				1909	UWORD32 u4_s_map;
				1910
				1911	/* pointer to scanning matrix */
				1912	const UWORD8 *pu1_scan_order = gu1_luma_scan_order;
				1913
				1914	/* temp var */
				1915	UWORD32 b8, b4, coeff_cnt, mask;
				1916
				1917	/* Process 16 4x4 lum sub-blocks of the MB in scan order */
				1918	for (b8 = 0; b8 < 4; b8++)
				1919	{
				1920	/* if in case cbp for the 8x8 block is zero, send no residue */
				1921	ps_mb_coeff_data_b8 = *pv_mb_coeff_data;
				1922
				1923	for (b4 = 0; b4 < 4; b4++, pu1_nnz++, pi2_res_mb += MB_SIZE)
				1924	{
				1925	/********************************************************/
				1926	/* pack coeff data for entropy coding */
				1927	/********************************************************/
				1928	ps_mb_coeff_data = *pv_mb_coeff_data;
				1929
				1930	/* write number of non zero coefficients */
				1931	ps_mb_coeff_data->i4_sig_map_nnz = *pu1_nnz;
				1932
				1933	if (*pu1_nnz)
				1934	{
				1935	for (u4_nnz_cnt = 0, coeff_cnt = 0, mask = 1, u4_s_map = 0; u4_nnz_cnt < *pu1_nnz; coeff_cnt++)
				1936	{
				1937	if (pi2_res_mb[pu1_scan_order[coeff_cnt]])
				1938	{
				1939	/* write residue */
				1940	ps_mb_coeff_data->ai2_residue[u4_nnz_cnt++] = pi2_res_mb[pu1_scan_order[coeff_cnt]];
				1941	u4_s_map \|= mask;
				1942	}
				1943	mask <<= 1;
				1944	}
				1945	/* write significant coeff map */
				1946	ps_mb_coeff_data->i4_sig_map_nnz \|= (u4_s_map << 16);
				1947
				1948	/* update ptr to coeff data */
Harish Mahendrakar	c72323e	2015-04-28 19:07:40 +0530	[diff] [blame]	1949	(*pv_mb_coeff_data) = ps_mb_coeff_data->ai2_residue + ALIGN2(u4_nnz_cnt);
Hamsalekha S	8d3d303	2015-03-13 21:24:58 +0530	[diff] [blame]	1950
				1951	/* cbp */
				1952	u1_cbp_l \|= (1 << b8);
				1953	}
				1954	else
				1955	{
				1956	(*pv_mb_coeff_data) = ps_mb_coeff_data->ai2_residue;
				1957	}
				1958	}
				1959
				1960	/* if the 8x8 block has no residue, nothing needs to be sent to entropy */
				1961	if (!(u1_cbp_l & (1 << b8)))
				1962	{
				1963	*pv_mb_coeff_data = ps_mb_coeff_data_b8;
				1964	}
				1965	}
				1966
				1967	/* memcpy recon */
				1968	ps_codec->pf_inter_pred_luma_copy(pu1_ref_mb_intra_4x4, pu1_rec_mb, MB_SIZE, i4_rec_strd, MB_SIZE, MB_SIZE, NULL, 0);
				1969
				1970	return (u1_cbp_l);
				1971	}
				1972
				1973
				1974	/**
				1975	*******************************************************************************
				1976	*
				1977	* @brief performs chroma core coding for intra macro blocks
				1978	*
				1979	* @par Description:
				1980	* If the current MB is to be intra coded with mb type chroma I8x8, the MB is
				1981	* first predicted using intra 8x8 prediction filters. The predicted data is
				1982	* compared with the input for error and the error is transformed. The DC
				1983	* coefficients of each transformed sub blocks are further transformed using
				1984	* Hadamard transform. The resulting coefficients are quantized, packed and sent
				1985	* for entropy coding.
				1986	*
				1987	* @param[in] ps_proc_ctxt
				1988	* pointer to the current macro block context
				1989	*
				1990	* @returns u1_cbp_c
				1991	* coded block pattern chroma
				1992	*
				1993	* @remarks
				1994	* The traversal of 4x4 subblocks in the 8x8 macroblock is as per the scan order
				1995	* mentioned in h.264 specification
				1996	*
				1997	*******************************************************************************
				1998	*/
				1999	UWORD8 ih264e_code_chroma_intra_macroblock_8x8(process_ctxt_t *ps_proc)
				2000	{
				2001	/* Codec Context */
				2002	codec_t *ps_codec = ps_proc->ps_codec;
				2003
				2004	/* pointer to ref macro block */
				2005	UWORD8 *pu1_ref_mb = ps_proc->pu1_rec_buf_chroma;
				2006
				2007	/* pointer to src macro block */
				2008	UWORD8 *pu1_curr_mb = ps_proc->pu1_src_buf_chroma;
				2009
				2010	/* pointer to prediction macro block */
				2011	UWORD8 *pu1_pred_mb = NULL;
				2012
				2013	/* pointer to residual macro block */
				2014	WORD16 *pi2_res_mb = ps_proc->pi2_res_buf;
				2015
				2016	/* strides */
Martin Storsjo	53c6878	2015-06-09 16:25:51 +0300	[diff] [blame]	2017	WORD32 i4_src_strd = ps_proc->i4_src_chroma_strd;
Hamsalekha S	8d3d303	2015-03-13 21:24:58 +0530	[diff] [blame]	2018	WORD32 i4_rec_strd = ps_proc->i4_rec_strd;
				2019	WORD32 i4_pred_strd = ps_proc->i4_pred_strd;
				2020	WORD32 i4_res_strd = ps_proc->i4_res_strd;
				2021
				2022	/* intra mode */
				2023	UWORD8 u1_intra_mode = ps_proc->u1_c_i8_mode;
				2024
				2025	/* coded block pattern */
				2026	UWORD8 u1_cbp_c = 0;
				2027
				2028	/* number of non zero coeffs*/
				2029	UWORD8 au1_nnz[18] = {0};
				2030
				2031	/* quantization parameters */
				2032	quant_params_t *ps_qp_params = ps_proc->ps_qp_params[1];
				2033
				2034	/* Control signal for inverse transform */
				2035	UWORD32 u4_cntrl;
				2036
				2037	/* pointer to packed mb coeff data */
				2038	void **pv_mb_coeff_data = &(ps_proc->pv_mb_coeff_data);
				2039
				2040	/* See if we need to swap U and V plances for entropy */
				2041	UWORD32 u4_swap_uv = ps_codec->s_cfg.e_inp_color_fmt == IV_YUV_420SP_VU;
				2042
				2043	if (PLANE_CH_I8x8 == u1_intra_mode)
				2044	{
				2045	pu1_pred_mb = ps_proc->pu1_pred_mb_intra_chroma_plane;
				2046	}
				2047	else
				2048	{
				2049	pu1_pred_mb = ps_proc->pu1_pred_mb_intra_chroma;
				2050	}
				2051
				2052	/********************************************************/
				2053	/* error estimation, */
				2054	/* transform */
				2055	/* quantization */
				2056	/********************************************************/
				2057	ih264e_chroma_8x8_resi_trans_dctrans_quant(ps_codec, pu1_curr_mb,
				2058	pu1_pred_mb, pi2_res_mb,
				2059	i4_src_strd, i4_pred_strd,
				2060	i4_res_strd,
				2061	ps_qp_params->pu2_scale_mat,
				2062	ps_qp_params->pu2_thres_mat,
				2063	ps_qp_params->u1_qbits,
				2064	ps_qp_params->u4_dead_zone,
				2065	au1_nnz);
				2066
				2067	/********************************************************/
				2068	/* pack coeff data for entropy coding */
				2069	/********************************************************/
				2070	ih264e_pack_c_mb(pi2_res_mb, pv_mb_coeff_data, i4_res_strd, &u1_cbp_c,
				2071	au1_nnz, ps_codec->u4_thres_resi, &u4_cntrl, u4_swap_uv);
				2072
				2073	/********************************************************/
				2074	/* ierror estimation, */
				2075	/* itransform */
				2076	/* iquantization */
				2077	/********************************************************/
				2078	ih264e_chroma_8x8_idctrans_iquant_itrans_recon(ps_codec, pi2_res_mb,
				2079	pu1_pred_mb, pu1_ref_mb,
				2080	i4_res_strd, i4_pred_strd,
				2081	i4_rec_strd,
				2082	ps_qp_params->pu2_iscale_mat,
				2083	ps_qp_params->pu2_weigh_mat,
				2084	ps_qp_params->u1_qp_div,
				2085	u4_cntrl,
				2086	ps_proc->pv_scratch_buff);
				2087	return (u1_cbp_c);
				2088	}
				2089
				2090
				2091	/**
				2092	*******************************************************************************
				2093	*
				2094	* @brief performs luma core coding when mode is inter
				2095	*
				2096	* @par Description:
				2097	* If the current mb is to be coded as inter the mb is predicted based on the
				2098	* sub mb partitions and corresponding motion vectors generated by ME. Then,
				2099	* error is computed between the input blk and the estimated blk. This error is
				2100	* transformed, quantized. The quantized coefficients are packed in scan order
				2101	* for entropy coding
				2102	*
				2103	* @param[in] ps_proc_ctxt
				2104	* pointer to the current macro block context
				2105	*
				2106	* @returns u1_cbp_l
				2107	* coded block pattern luma
				2108	*
				2109	* @remarks none
				2110	*
				2111	*******************************************************************************
				2112	*/
				2113
				2114	UWORD8 ih264e_code_luma_inter_macroblock_16x16(process_ctxt_t *ps_proc)
				2115	{
				2116	/* Codec Context */
				2117	codec_t *ps_codec = ps_proc->ps_codec;
				2118
				2119	/* pointer to ref macro block */
				2120	UWORD8 *pu1_rec_mb = ps_proc->pu1_rec_buf_luma;
				2121
				2122	/* pointer to src macro block */
				2123	UWORD8 *pu1_curr_mb = ps_proc->pu1_src_buf_luma;
				2124
				2125	/* pointer to prediction macro block */
				2126	UWORD8 *pu1_pred_mb = ps_proc->pu1_pred_mb;
				2127
				2128	/* pointer to residual macro block */
				2129	WORD16 *pi2_res_mb = ps_proc->pi2_res_buf;
				2130
				2131	/* strides */
				2132	WORD32 i4_src_strd = ps_proc->i4_src_strd;
				2133	WORD32 i4_rec_strd = ps_proc->i4_rec_strd;
				2134	WORD32 i4_pred_strd = ps_proc->i4_pred_strd;
				2135	WORD32 i4_res_strd = ps_proc->i4_res_strd;
				2136
				2137	/* coded block pattern */
				2138	UWORD8 u1_cbp_l = 0;
				2139
				2140	/Control signal of itrans/
				2141	UWORD32 u4_cntrl;
				2142
				2143	/* number of non zero coeffs*/
				2144	UWORD8 pu1_nnz = (UWORD8 )ps_proc->au4_nnz;
				2145
				2146	/* quantization parameters */
				2147	quant_params_t *ps_qp_params = ps_proc->ps_qp_params[0];
				2148
				2149	/* pointer to packed mb coeff data */
				2150	void **pv_mb_coeff_data = &(ps_proc->pv_mb_coeff_data);
				2151
				2152	/* pseudo pred buffer */
				2153	UWORD8 *pu1_pseudo_pred = pu1_pred_mb;
				2154
				2155	/* pseudo pred buffer stride */
				2156	WORD32 i4_pseudo_pred_strd = i4_pred_strd;
				2157
				2158	/* init nnz */
				2159	ps_proc->au4_nnz[0] = 0;
				2160	ps_proc->au4_nnz[1] = 0;
				2161	ps_proc->au4_nnz[2] = 0;
				2162	ps_proc->au4_nnz[3] = 0;
				2163	ps_proc->au4_nnz[4] = 0;
				2164
				2165	/********************************************************/
				2166	/* prediction */
				2167	/********************************************************/
				2168	ih264e_motion_comp_luma(ps_proc, &pu1_pseudo_pred, &i4_pseudo_pred_strd);
				2169
				2170	/********************************************************/
				2171	/* error estimation, */
				2172	/* transform */
				2173	/* quantization */
				2174	/********************************************************/
				2175	if (ps_proc->u4_min_sad_reached == 0 \|\| ps_proc->u4_min_sad != 0)
				2176	{
				2177	ih264e_luma_16x16_resi_trans_dctrans_quant(ps_codec, pu1_curr_mb,
				2178	pu1_pseudo_pred, pi2_res_mb,
				2179	i4_src_strd,
				2180	i4_pseudo_pred_strd,
				2181	i4_res_strd,
				2182	ps_qp_params->pu2_scale_mat,
				2183	ps_qp_params->pu2_thres_mat,
				2184	ps_qp_params->u1_qbits,
				2185	ps_qp_params->u4_dead_zone,
				2186	pu1_nnz,
				2187	DISABLE_DC_TRANSFORM);
				2188
				2189	/********************************************************/
				2190	/* pack coeff data for entropy coding */
				2191	/********************************************************/
				2192	ih264e_pack_l_mb(pi2_res_mb, pv_mb_coeff_data, i4_res_strd, &u1_cbp_l,
				2193	pu1_nnz, ps_codec->u4_thres_resi, &u4_cntrl);
				2194	}
				2195	else
				2196	{
				2197	u1_cbp_l = 0;
				2198	u4_cntrl = 0;
				2199	}
				2200
				2201	/********************************************************/
				2202	/* ierror estimation, */
				2203	/* itransform */
				2204	/* iquantization */
				2205	/********************************************************/
				2206
				2207	/*If the frame is not to be used for P frame reference or dumping recon
				2208	* we only will use the reocn for only predicting intra Mbs
				2209	* THis will need only right and bottom edge 4x4 blocks recon
				2210	* Hence we selectively enable them using control signal(including DC)
				2211	*/
				2212	if (ps_proc->u4_compute_recon != 1)
				2213	{
				2214	u4_cntrl &= 0x111F0000;
				2215	}
				2216
				2217	if (u4_cntrl)
				2218	{
				2219	ih264e_luma_16x16_idctrans_iquant_itrans_recon(
				2220	ps_codec, pi2_res_mb, pu1_pseudo_pred, pu1_rec_mb,
				2221	i4_res_strd, i4_pseudo_pred_strd, i4_rec_strd,
				2222	ps_qp_params->pu2_iscale_mat,
				2223	ps_qp_params->pu2_weigh_mat, ps_qp_params->u1_qp_div,
				2224	u4_cntrl /Cntrl/, DISABLE_DC_TRANSFORM,
				2225	ps_proc->pv_scratch_buff);
				2226	}
				2227	else
				2228	{
				2229	ps_codec->pf_inter_pred_luma_copy(pu1_pseudo_pred, pu1_rec_mb,
				2230	i4_pseudo_pred_strd, i4_rec_strd,
				2231	MB_SIZE, MB_SIZE, NULL, 0);
				2232	}
				2233
				2234
				2235	return (u1_cbp_l);
				2236	}
				2237
				2238	/**
				2239	*******************************************************************************
				2240	*
				2241	* @brief performs chroma core coding for inter macro blocks
				2242	*
				2243	* @par Description:
				2244	* If the current mb is to be coded as inter predicted mb,based on the sub mb partitions
				2245	* and corresponding motion vectors generated by ME ,prediction is done.
				2246	* Then, error is computed between the input blk and the estimated blk.
				2247	* This error is transformed , quantized. The quantized coefficients
				2248	* are packed in scan order for
				2249	* entropy coding.
				2250	*
				2251	* @param[in] ps_proc_ctxt
				2252	* pointer to the current macro block context
				2253	*
				2254	* @returns u1_cbp_l
				2255	* coded block pattern chroma
				2256	*
				2257	* @remarks none
				2258	*
				2259	*******************************************************************************
				2260	*/
				2261	UWORD8 ih264e_code_chroma_inter_macroblock_8x8(process_ctxt_t *ps_proc)
				2262	{
				2263	/* Codec Context */
				2264	codec_t *ps_codec = ps_proc->ps_codec;
				2265
				2266	/* pointer to ref macro block */
				2267	UWORD8 *pu1_rec_mb = ps_proc->pu1_rec_buf_chroma;
				2268
				2269	/* pointer to src macro block */
				2270	UWORD8 *pu1_curr_mb = ps_proc->pu1_src_buf_chroma;
				2271
				2272	/* pointer to prediction macro block */
				2273	UWORD8 *pu1_pred_mb = ps_proc->pu1_pred_mb;
				2274
				2275	/* pointer to residual macro block */
				2276	WORD16 *pi2_res_mb = ps_proc->pi2_res_buf;
				2277
				2278	/* strides */
Martin Storsjo	53c6878	2015-06-09 16:25:51 +0300	[diff] [blame]	2279	WORD32 i4_src_strd = ps_proc->i4_src_chroma_strd;
Hamsalekha S	8d3d303	2015-03-13 21:24:58 +0530	[diff] [blame]	2280	WORD32 i4_rec_strd = ps_proc->i4_rec_strd;
				2281	WORD32 i4_pred_strd = ps_proc->i4_pred_strd;
				2282	WORD32 i4_res_strd = ps_proc->i4_res_strd;
				2283
				2284	/* coded block pattern */
				2285	UWORD8 u1_cbp_c = 0;
				2286
				2287	/Control signal for inverse transform/
				2288	UWORD32 u4_cntrl;
				2289
				2290	/* number of non zero coeffs*/
				2291	UWORD8 au1_nnz[10] = {0};
				2292
				2293	/* quantization parameters */
				2294	quant_params_t *ps_qp_params = ps_proc->ps_qp_params[1];
				2295
				2296	/* pointer to packed mb coeff data */
				2297	void **pv_mb_coeff_data = &(ps_proc->pv_mb_coeff_data);
				2298
				2299	/See if we need to swap U and V plances for entropy/
				2300	UWORD32 u4_swap_uv = ps_codec->s_cfg.e_inp_color_fmt == IV_YUV_420SP_VU;
				2301
				2302	/********************************************************/
				2303	/* prediction */
				2304	/********************************************************/
				2305	ih264e_motion_comp_chroma(ps_proc);
				2306
				2307	/********************************************************/
				2308	/* error estimation, */
				2309	/* transform */
				2310	/* quantization */
				2311	/********************************************************/
				2312	ih264e_chroma_8x8_resi_trans_dctrans_quant(ps_codec, pu1_curr_mb,
				2313	pu1_pred_mb, pi2_res_mb,
				2314	i4_src_strd, i4_pred_strd,
				2315	i4_res_strd,
				2316	ps_qp_params->pu2_scale_mat,
				2317	ps_qp_params->pu2_thres_mat,
				2318	ps_qp_params->u1_qbits,
				2319	ps_qp_params->u4_dead_zone,
				2320	au1_nnz);
				2321
				2322	/********************************************************/
				2323	/* pack coeff data for entropy coding */
				2324	/********************************************************/
				2325	ih264e_pack_c_mb(pi2_res_mb, pv_mb_coeff_data, i4_res_strd, &u1_cbp_c,
				2326	au1_nnz, ps_codec->u4_thres_resi, &u4_cntrl, u4_swap_uv);
				2327
				2328	/********************************************************/
				2329	/* ierror estimation, */
				2330	/* itransform */
				2331	/* iquantization */
				2332	/********************************************************/
				2333
				2334	/* If the frame is not to be used for P frame reference or dumping recon
				2335	* we only will use the reocn for only predicting intra Mbs
				2336	* THis will need only right and bottom edge 4x4 blocks recon
				2337	* Hence we selectively enable them using control signal(including DC)
				2338	*/
				2339	if (!ps_proc->u4_compute_recon)
				2340	{
				2341	u4_cntrl &= 0x7700C000;
				2342	}
				2343
				2344	if (u4_cntrl)
				2345	{
				2346	ih264e_chroma_8x8_idctrans_iquant_itrans_recon(
				2347	ps_codec, pi2_res_mb, pu1_pred_mb, pu1_rec_mb,
				2348	i4_res_strd, i4_pred_strd, i4_rec_strd,
				2349	ps_qp_params->pu2_iscale_mat,
				2350	ps_qp_params->pu2_weigh_mat, ps_qp_params->u1_qp_div,
				2351	u4_cntrl, ps_proc->pv_scratch_buff);
				2352	}
				2353	else
				2354	{
				2355	ps_codec->pf_inter_pred_luma_copy(pu1_pred_mb, pu1_rec_mb, i4_pred_strd,
				2356	i4_rec_strd, MB_SIZE >> 1, MB_SIZE,
				2357	NULL, 0);
				2358	}
				2359
				2360	return (u1_cbp_c);
				2361	}