blob: 5ba18ded057305fe5c251689575d2e5b0b54b183 [file] [log] [blame]
Hamsalekha S8d3d3032015-03-13 21:24:58 +05301/******************************************************************************
2 *
3 * Copyright (C) 2015 The Android Open Source Project
4 *
5 * Licensed under the Apache License, Version 2.0 (the "License");
6 * you may not use this file except in compliance with the License.
7 * You may obtain a copy of the License at:
8 *
9 * http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 *
17 *****************************************************************************
18 * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
19*/
20
21/**
22 *******************************************************************************
23 * @file
24 * ih264e_core_coding.c
25 *
26 * @brief
27 * This file contains routines that perform luma and chroma core coding for
28 * intra macroblocks
29 *
30 * @author
31 * ittiam
32 *
33 * @par List of Functions:
34 * - ih264e_pack_l_mb_i16()
35 * - ih264e_pack_c_mb_i8()
36 * - ih264e_code_luma_intra_macroblock_16x16()
37 * - ih264e_code_luma_intra_macroblock_4x4()
38 * - ih264e_code_chroma_intra_macroblock_8x8()
39 *
40 * @remarks
41 * None
42 *
43 *******************************************************************************
44 */
45
46/*****************************************************************************/
47/* File Includes */
48/*****************************************************************************/
49
50/* System include files */
51#include <stdio.h>
52#include <string.h>
53#include <assert.h>
54
55/* User include files */
56#include "ih264e_config.h"
57#include "ih264_typedefs.h"
58#include "ih264_platform_macros.h"
59#include "iv2.h"
60#include "ive2.h"
61#include "ih264_defs.h"
62#include "ih264e_defs.h"
63#include "ih264_trans_data.h"
64#include "ih264e_error.h"
65#include "ih264e_bitstream.h"
66#include "ime_distortion_metrics.h"
67#include "ime_structs.h"
68#include "ih264_structs.h"
69#include "ih264_trans_quant_itrans_iquant.h"
70#include "ih264_inter_pred_filters.h"
71#include "ih264_mem_fns.h"
72#include "ih264_padding.h"
73#include "ih264_intra_pred_filters.h"
74#include "ih264_deblk_edge_filters.h"
75#include "irc_cntrl_param.h"
76#include "irc_frame_info_collector.h"
77#include "ih264e_rate_control.h"
78#include "ih264e_structs.h"
79#include "ih264e_globals.h"
80#include "ih264e_core_coding.h"
81#include "ih264e_mc.h"
82
83
84/*****************************************************************************/
85/* Function Definitions */
86/*****************************************************************************/
87
88/**
89*******************************************************************************
90*
91* @brief
92* This function performs does the DCT transform then Hadamard transform
93* and quantization for a macroblock when the mb mode is intra 16x16 mode
94*
95* @par Description:
96* First cf4 is done on all 16 4x4 blocks of the 16x16 input block.
97* Then hadamard transform is done on the DC coefficients
98* Quantization is then performed on the 16x16 block, 4x4 wise
99*
100* @param[in] pu1_src
101* Pointer to source sub-block
102*
103* @param[in] pu1_pred
104* Pointer to prediction sub-block
105*
106* @param[in] pi2_out
107* Pointer to residual sub-block
108* The output will be in linear format
109* The first 16 continuous locations will contain the values of Dc block
110* After DC block and a stride 1st AC block will follow
111* After one more stride next AC block will follow
112* The blocks will be in raster scan order
113*
114* @param[in] src_strd
115* Source stride
116*
117* @param[in] pred_strd
118* Prediction stride
119*
120* @param[in] dst_strd
121* Destination stride
122*
123* @param[in] pu2_scale_matrix
124* The quantization matrix for 4x4 transform
125*
126* @param[in] pu2_threshold_matrix
127* Threshold matrix
128*
129* @param[in] u4_qbits
130* 15+QP/6
131*
132* @param[in] u4_round_factor
133* Round factor for quant
134*
135* @param[out] pu1_nnz
136* Memory to store the non-zeros after transform
137* The first byte will be the nnz of DC block
138* From the next byte the AC nnzs will be stored in raster scan order
139*
140* @param u4_dc_flag
141* Signals if Dc transform is to be done or not
142* 1 -> Dc transform will be done
143* 0 -> Dc transform will not be done
144*
145* @remarks
146*
147*******************************************************************************
148*/
149void ih264e_luma_16x16_resi_trans_dctrans_quant(codec_t *ps_codec,
150 UWORD8 *pu1_src,
151 UWORD8 *pu1_pred,
152 WORD16 *pi2_out,
153 WORD32 src_strd,
154 WORD32 pred_strd,
155 WORD32 dst_strd,
156 const UWORD16 *pu2_scale_matrix,
157 const UWORD16 *pu2_threshold_matrix,
158 UWORD32 u4_qbits,
159 UWORD32 u4_round_factor,
160 UWORD8 *pu1_nnz,
161 UWORD32 u4_dc_flag)
162
163{
164 WORD32 blk_cntr;
165 WORD32 i4_offsetx, i4_offsety;
166 UWORD8 *pu1_curr_src, *pu1_curr_pred;
167
168 WORD16 *pi2_dc_str = pi2_out;
169
170 /* Move to the ac addresses */
171 pu1_nnz++;
172 pi2_out += dst_strd;
173
174 for (blk_cntr = 0; blk_cntr < NUM_LUMA4x4_BLOCKS_IN_MB; blk_cntr++)
175 {
176 IND2SUB_LUMA_MB(blk_cntr, i4_offsetx, i4_offsety);
177
178 pu1_curr_src = pu1_src + i4_offsetx + i4_offsety * src_strd;
179 pu1_curr_pred = pu1_pred + i4_offsetx + i4_offsety * pred_strd;
180
181 ps_codec->pf_resi_trans_quant_4x4(pu1_curr_src, pu1_curr_pred,
182 pi2_out + blk_cntr * dst_strd,
183 src_strd, pred_strd, pu2_scale_matrix,
184 pu2_threshold_matrix, u4_qbits,
185 u4_round_factor, &pu1_nnz[blk_cntr],
186 &pi2_dc_str[blk_cntr]);
187
188 }
189
190 if (!u4_dc_flag)
191 return;
192
193 /*
194 * In case of i16x16, we need to remove the contribution of dc coeffs into
195 * nnz of each block. We are doing that in the packing function
196 */
197
198 /* Adjust pointers to point to dc values */
199 pi2_out -= dst_strd;
200 pu1_nnz--;
201
202 u4_qbits++;
203 u4_round_factor <<= 1;
204
205 ps_codec->pf_hadamard_quant_4x4(pi2_dc_str, pi2_out, pu2_scale_matrix,
206 pu2_threshold_matrix, u4_qbits,
207 u4_round_factor, &pu1_nnz[0]);
208}
209
210/**
211*******************************************************************************
212*
213* @brief
214* This function performs the intra 16x16 inverse transform process for H264
215* it includes inverse Dc transform, inverse quant and then inverse transform
216*
217* @par Description:
218*
219* @param[in] pi2_src
220* Input data, 16x16 size
221* First 16 mem locations will have the Dc coffs in rater scan order in linear fashion
222* after a stride 1st AC clock will be present again in raster can order
223* Then each AC block of the 16x16 block will follow in raster scan order
224*
225* @param[in] pu1_pred
226* The predicted data, 16x16 size
227* Block by block form
228*
229* @param[in] pu1_out
230* Output 16x16
231* In block by block form
232*
233* @param[in] src_strd
234* Source stride
235*
236* @param[in] pred_strd
237* input stride for prediction buffer
238*
239* @param[in] out_strd
240* input stride for output buffer
241*
242* @param[in] pu2_iscale_mat
243* Inverse quantization matrix for 4x4 transform
244*
245* @param[in] pu2_weigh_mat
246* weight matrix of 4x4 transform
247*
248* @param[in] qp_div
249* QP/6
250*
251* @param[in] pi4_tmp
252* Input temporary buffer
253* needs to be at least 20 in size
254*
255* @param[in] pu4_cntrl
256* Controls the transform path
257* total Last 17 bits are used
258* the 16th th bit will correspond to DC block
259* and 32-17 will correspond to the ac blocks in raster scan order
260* bit equaling zero indicates that the entire 4x4 block is zero for DC
261* For AC blocks a bit equaling zero will mean that all 15 AC coffs of the block is nonzero
262*
263* @param[in] pi4_tmp
264* Input temporary buffer
265* needs to be at least COFF_CNT_SUB_BLK_4x4+COFF_CNT_SUB_BLK_4x4 size
266*
267* @returns
268* none
269*
270* @remarks
271* The all zero case must be taken care outside
272*
273*******************************************************************************
274*/
275void ih264e_luma_16x16_idctrans_iquant_itrans_recon(codec_t *ps_codec,
276 WORD16 *pi2_src,
277 UWORD8 *pu1_pred,
278 UWORD8 *pu1_out,
279 WORD32 src_strd,
280 WORD32 pred_strd,
281 WORD32 out_strd,
282 const UWORD16 *pu2_iscale_mat,
283 const UWORD16 *pu2_weigh_mat,
284 UWORD32 qp_div,
285 UWORD32 u4_cntrl,
286 UWORD32 u4_dc_trans_flag,
287 WORD32 *pi4_tmp)
288{
289 /* Start index for inverse quant in a 4x4 block */
290 WORD32 iq_start_idx = (u4_dc_trans_flag == 0) ? 0 : 1;
291
292 /* Cntrl bits for 4x4 transforms
293 * u4_blk_cntrl : controls if a 4x4 block should be processed in ac path
294 * u4_dc_cntrl : controls is a 4x4 block is to be processed in dc path
295 * : dc block must contain only single dc coefficient
296 * u4_empty_blk_cntrl : control fot 4x4 block with no coeffs, ie no dc and ac
297 * : ie not (ac or dc)
298 */
299 UWORD32 u4_blk_cntrl, u4_dc_cntrl, u4_empty_blk_cntrl;
300
301 /* tmp registers for block ids */
302 UWORD32 u4_blk_id;
303
304 /* Subscrripts */
305 WORD32 i4_offset_x, i4_offset_y;
306
307 UWORD8 *pu1_cur_prd_blk, *pu1_cur_out_blk;
308
309 /* Src and stride for dc coeffs */
310 UWORD32 u4_dc_inc;
311 WORD16 *pi2_dc_src;
312
313 /*
314 * For intra blocks we need to do inverse dc transform
315 * In case if intra blocks, its here that we populate the dc bits in cntrl
316 * as they cannot be populated any earlier
317 */
318 if (u4_dc_trans_flag)
319 {
320 UWORD32 cntr, u4_dc_cntrl;
321 /* Do inv hadamard and place the results at the start of each AC block */
322 ps_codec->pf_ihadamard_scaling_4x4(pi2_src, pi2_src, pu2_iscale_mat,
323 pu2_weigh_mat, qp_div, pi4_tmp);
324
325 /* Update the cntrl flag */
326 u4_dc_cntrl = 0;
327 for (cntr = 0; cntr < DC_COEFF_CNT_LUMA_MB; cntr++)
328 {
329 u4_dc_cntrl |= ((pi2_src[cntr] != 0) << (15 - cntr));
330 }
331 /* Mark dc bits as 1 if corresponding ac bit is 0 */
332 u4_dc_cntrl = (~(u4_cntrl >> 16) & u4_dc_cntrl);
333 /* Combine both ac and dc bits */
334 u4_cntrl = (u4_cntrl & CNTRL_FLAG_AC_MASK_LUMA)
335 | (u4_dc_cntrl & CNTRL_FLAG_DC_MASK_LUMA);
336 }
337
338 /* Source for dc coeffs
339 * If the block is intra, we have to read dc values from first row of src
340 * then stride for each block is 1, other wise its src stride
341 */
342 pi2_dc_src = (iq_start_idx == 0) ? (pi2_src + src_strd) : pi2_src;
343 u4_dc_inc = (iq_start_idx == 0) ? src_strd : 1;
344
345 /* The AC blocks starts from 2nd row */
346 pi2_src += src_strd;
347
348 /* Get the block bits */
349 u4_blk_cntrl = (u4_cntrl & CNTRL_FLAG_AC_MASK_LUMA);
350 u4_dc_cntrl = (u4_cntrl & CNTRL_FLAG_DC_MASK_LUMA) << 16;
351 u4_empty_blk_cntrl = (~(u4_dc_cntrl | u4_blk_cntrl)) & 0xFFFF0000;
352
353 /* Get first block to process */
354 DEQUEUE_BLKID_FROM_CONTROL(u4_dc_cntrl, u4_blk_id);
355 while (u4_blk_id < NUM_LUMA4x4_BLOCKS_IN_MB)
356 {
357 /* Compute address of src blocks */
358 WORD32 i4_src_offset = u4_dc_inc * u4_blk_id;
359
360 IND2SUB_LUMA_MB(u4_blk_id, i4_offset_x, i4_offset_y);
361
362 /* Compute address of out and pred blocks */
363 pu1_cur_prd_blk = pu1_pred + i4_offset_x + i4_offset_y * pred_strd;
364 pu1_cur_out_blk = pu1_out + i4_offset_x + i4_offset_y * out_strd;
365
366 /* Do inv dc transform */
367 ps_codec->pf_iquant_itrans_recon_4x4_dc(pi2_dc_src + i4_src_offset,
368 pu1_cur_prd_blk,
369 pu1_cur_out_blk, pred_strd,
370 out_strd, pu2_iscale_mat,
371 pu2_weigh_mat, qp_div, NULL,
372 iq_start_idx,
373 pi2_dc_src + i4_src_offset);
374 /* Get next DC block to process */
375 DEQUEUE_BLKID_FROM_CONTROL(u4_dc_cntrl, u4_blk_id);
376 }
377
378 /* now process ac/mixed blocks */
379 DEQUEUE_BLKID_FROM_CONTROL(u4_blk_cntrl, u4_blk_id);
380 while (u4_blk_id < NUM_LUMA4x4_BLOCKS_IN_MB)
381 {
382
383 WORD32 i4_src_offset = src_strd * u4_blk_id;
384
385 IND2SUB_LUMA_MB(u4_blk_id, i4_offset_x, i4_offset_y);
386
387 pu1_cur_prd_blk = pu1_pred + i4_offset_x + i4_offset_y * pred_strd;
388 pu1_cur_out_blk = pu1_out + i4_offset_x + i4_offset_y * out_strd;
389
390 ps_codec->pf_iquant_itrans_recon_4x4(pi2_src + i4_src_offset,
391 pu1_cur_prd_blk, pu1_cur_out_blk,
392 pred_strd, out_strd,
393 pu2_iscale_mat, pu2_weigh_mat,
394 qp_div, (WORD16*) pi4_tmp,
395 iq_start_idx,
396 pi2_dc_src + u4_blk_id);
397
398 DEQUEUE_BLKID_FROM_CONTROL(u4_blk_cntrl, u4_blk_id);
399 }
400
401 /* Now process empty blocks */
402 DEQUEUE_BLKID_FROM_CONTROL(u4_empty_blk_cntrl, u4_blk_id);
403 while (u4_blk_id < NUM_LUMA4x4_BLOCKS_IN_MB)
404 {
405 IND2SUB_LUMA_MB(u4_blk_id, i4_offset_x, i4_offset_y);
406
407 pu1_cur_prd_blk = pu1_pred + i4_offset_x + i4_offset_y * pred_strd;
408 pu1_cur_out_blk = pu1_out + i4_offset_x + i4_offset_y * out_strd;
409
410 ps_codec->pf_inter_pred_luma_copy(pu1_cur_prd_blk, pu1_cur_out_blk,
411 pred_strd, out_strd, SIZE_4X4_BLK_HRZ,
412 SIZE_4X4_BLK_VERT, 0, 0);
413
414 DEQUEUE_BLKID_FROM_CONTROL(u4_empty_blk_cntrl, u4_blk_id);
415 }
416}
417
418/**
419*******************************************************************************
420*
421* @brief
422* This function performs does the DCT transform then Hadamard transform
423* and quantization for a chroma macroblock
424*
425* @par Description:
426* First cf4 is done on all 16 4x4 blocks of the 8x8input block
427* Then hadamard transform is done on the DC coefficients
428* Quantization is then performed on the 8x8 block, 4x4 wise
429*
430* @param[in] pu1_src
431* Pointer to source sub-block
432* The input is in interleaved format for two chroma planes
433*
434* @param[in] pu1_pred
435* Pointer to prediction sub-block
436* Prediction is in inter leaved format
437*
438* @param[in] pi2_out
439* Pointer to residual sub-block
440* The output will be in linear format
441* The first 4 continuous locations will contain the values of DC block for U
442* and then next 4 will contain for V.
443* After DC block and a stride 1st AC block of U plane will follow
444* After one more stride next AC block of V plane will follow
445* The blocks will be in raster scan order
446*
447* After all the AC blocks of U plane AC blocks of V plane will follow in exact
448* same way
449*
450* @param[in] src_strd
451* Source stride
452*
453* @param[in] pred_strd
454* Prediction stride
455*
456* @param[in] dst_strd
457* Destination stride
458*
459* @param[in] pu2_scale_matrix
460* The quantization matrix for 4x4 transform
461*
462* @param[in] pu2_threshold_matrix
463* Threshold matrix
464*
465* @param[in] u4_qbits
466* 15+QP/6
467*
468* @param[in] u4_round_factor
469* Round factor for quant
470*
471* @param[out] pu1_nnz
472* Memory to store the non-zeros after transform
473* The first byte will be the nnz od DC block for U plane
474* From the next byte the AC nnzs will be storerd in raster scan order
475* The fifth byte will be nnz of Dc block of V plane
476* Then Ac blocks will follow
477*
478* @param u4_dc_flag
479* Signals if Dc transform is to be done or not
480* 1 -> Dc transform will be done
481* 0 -> Dc transform will not be done
482*
483* @remarks
484*
485*******************************************************************************
486*/
487void ih264e_chroma_8x8_resi_trans_dctrans_quant(codec_t *ps_codec,
488 UWORD8 *pu1_src,
489 UWORD8 *pu1_pred,
490 WORD16 *pi2_out,
491 WORD32 src_strd,
492 WORD32 pred_strd,
493 WORD32 out_strd,
494 const UWORD16 *pu2_scale_matrix,
495 const UWORD16 *pu2_threshold_matrix,
496 UWORD32 u4_qbits,
497 UWORD32 u4_round_factor,
498 UWORD8 *pu1_nnz_c)
499{
500 WORD32 blk_cntr;
501 WORD32 i4_offsetx, i4_offsety;
502 UWORD8 *pu1_curr_src, *pu1_curr_pred;
503
504 WORD16 pi2_dc_str[8];
505 UWORD8 au1_dcnnz[2];
506
507 /* Move to the ac addresses */
508 pu1_nnz_c++;
509 pi2_out += out_strd;
510
511 for (blk_cntr = 0; blk_cntr < NUM_CHROMA4x4_BLOCKS_IN_MB; blk_cntr++)
512 {
513 IND2SUB_CHROMA_MB(blk_cntr, i4_offsetx, i4_offsety);
514
515 pu1_curr_src = pu1_src + i4_offsetx + i4_offsety * src_strd;
516 pu1_curr_pred = pu1_pred + i4_offsetx + i4_offsety * pred_strd;
517
518 /* For chroma, v plane nnz is populated from position 5 */
519 ps_codec->pf_resi_trans_quant_chroma_4x4(
520 pu1_curr_src, pu1_curr_pred,
521 pi2_out + blk_cntr * out_strd, src_strd, pred_strd,
522 pu2_scale_matrix, pu2_threshold_matrix, u4_qbits,
523 u4_round_factor, &pu1_nnz_c[blk_cntr + (blk_cntr > 3)],
524 &pi2_dc_str[blk_cntr]);
525 }
526
527 /* Adjust pointers to point to dc values */
528 pi2_out -= out_strd;
529 pu1_nnz_c--;
530
531 u4_qbits++;
532 u4_round_factor <<= 1;
533
534 ps_codec->pf_hadamard_quant_2x2_uv(pi2_dc_str, pi2_out, pu2_scale_matrix,
535 pu2_threshold_matrix, u4_qbits,
536 u4_round_factor, au1_dcnnz);
537
538 /* Copy the dc nnzs */
539 pu1_nnz_c[0] = au1_dcnnz[0];
540 pu1_nnz_c[5] = au1_dcnnz[1];
541
542}
543
544/**
545*******************************************************************************
546* @brief
547* This function performs the inverse transform with process for chroma MB of H264
548*
549* @par Description:
550* Does inverse DC transform ,inverse quantization inverse transform
551*
552* @param[in] pi2_src
553* Input data, 16x16 size
554* The input is in the form of, first 4 locations will contain DC coeffs of
555* U plane, next 4 will contain DC coeffs of V plane, then AC blocks of U plane
556* in raster scan order will follow, each block as linear array in raster scan order.
557* After a stride next AC block will follow. After all AC blocks of U plane
558* V plane AC blocks will follow in exact same order.
559*
560* @param[in] pu1_pred
561* The predicted data, 8x16 size, U and V interleaved
562*
563* @param[in] pu1_out
564* Output 8x16, U and V interleaved
565*
566* @param[in] src_strd
567* Source stride
568*
569* @param[in] pred_strd
570* input stride for prediction buffer
571*
572* @param[in] out_strd
573* input stride for output buffer
574*
575* @param[in] pu2_iscale_mat
576* Inverse quantization martix for 4x4 transform
577*
578* @param[in] pu2_weigh_mat
579* weight matrix of 4x4 transform
580*
581* @param[in] qp_div
582* QP/6
583*
584* @param[in] pi4_tmp
585* Input temporary buffer
586* needs to be at least COFF_CNT_SUB_BLK_4x4 + Number of Dc cofss for chroma * number of planes
587* in size
588*
589* @param[in] pu4_cntrl
590* Controls the transform path
591* the 15 th bit will correspond to DC block of U plane , 14th will indicate the V plane Dc block
592* 32-28 bits will indicate AC blocks of U plane in raster scan order
593* 27-23 bits will indicate AC blocks of V plane in rater scan order
594* The bit 1 implies that there is at least one non zero coeff in a block
595*
596* @returns
597* none
598*
599* @remarks
600*******************************************************************************
601*/
602void ih264e_chroma_8x8_idctrans_iquant_itrans_recon(codec_t *ps_codec,
603 WORD16 *pi2_src,
604 UWORD8 *pu1_pred,
605 UWORD8 *pu1_out,
606 WORD32 src_strd,
607 WORD32 pred_strd,
608 WORD32 out_strd,
609 const UWORD16 *pu2_iscale_mat,
610 const UWORD16 *pu2_weigh_mat,
611 UWORD32 qp_div,
612 UWORD32 u4_cntrl,
613 WORD32 *pi4_tmp)
614{
615 /* Cntrl bits for 4x4 transforms
616 * u4_blk_cntrl : controls if a 4x4 block should be processed in ac path
617 * u4_dc_cntrl : controls is a 4x4 block is to be processed in dc path
618 * : dc block must contain only single dc coefficient
619 * u4_empty_blk_cntrl : control fot 4x4 block with no coeffs, ie no dc and ac
620 * : ie not (ac or dc)
621 */
622
623 UWORD32 u4_blk_cntrl, u4_dc_cntrl, u4_empty_blk_cntrl;
624
625 /* tmp registers for block ids */
626 WORD32 u4_blk_id;
627
628 /* Offsets for pointers */
629 WORD32 i4_offset_x, i4_offset_y;
630
631 /* Pointer to 4x4 blocks */
632 UWORD8 *pu1_cur_4x4_prd_blk, *pu1_cur_4x4_out_blk;
633
634 /* Tmp register for pointer to dc coffs */
635 WORD16 *pi2_dc_src;
636
637 WORD16 i2_zero = 0;
638
639 /* Increment for dc block */
640 WORD32 i4_dc_inc;
641
642 /*
643 * Lets do the inverse transform for dc coeffs in chroma
644 */
645 if (u4_cntrl & CNTRL_FLAG_DCBLK_MASK_CHROMA)
646 {
647 UWORD32 cntr, u4_dc_cntrl;
648 /* Do inv hadamard for u an v block */
649
650 ps_codec->pf_ihadamard_scaling_2x2_uv(pi2_src, pi2_src, pu2_iscale_mat,
651 pu2_weigh_mat, qp_div, NULL);
652 /*
653 * Update the cntrl flag
654 * Flag is updated as follows bits 15-11 -> u block dc bits
655 */
656 u4_dc_cntrl = 0;
657 for (cntr = 0; cntr < 8; cntr++)
658 {
659 u4_dc_cntrl |= ((pi2_src[cntr] != 0) << (15 - cntr));
660 }
661
662 /* Mark dc bits as 1 if corresponding ac bit is 0 */
663 u4_dc_cntrl = (~(u4_cntrl >> 16) & u4_dc_cntrl);
664 /* Combine both ac and dc bits */
665 u4_cntrl = (u4_cntrl & CNTRL_FLAG_AC_MASK_CHROMA)
666 | (u4_dc_cntrl & CNTRL_FLAG_DC_MASK_CHROMA);
667
668 /* Since we populated the dc coffs, we have to read them from there */
669 pi2_dc_src = pi2_src;
670 i4_dc_inc = 1;
671 }
672 else
673 {
674 u4_cntrl = u4_cntrl & CNTRL_FLAG_AC_MASK_CHROMA;
675 pi2_dc_src = &i2_zero;
676 i4_dc_inc = 0;
677 }
678
679 /* Get the block bits */
680 u4_blk_cntrl = (u4_cntrl & CNTRL_FLAG_AC_MASK_CHROMA);
681 u4_dc_cntrl = (u4_cntrl & CNTRL_FLAG_DC_MASK_CHROMA) << 16;
682 u4_empty_blk_cntrl = (~(u4_dc_cntrl | u4_blk_cntrl)) & 0xFF000000;
683
684 /* The AC blocks starts from 2nd row */
685 pi2_src += src_strd;
686
687 DEQUEUE_BLKID_FROM_CONTROL(u4_dc_cntrl, u4_blk_id);
688 while (u4_blk_id < 8)
689 {
690 WORD32 dc_src_offset = u4_blk_id * i4_dc_inc;
691
692 IND2SUB_CHROMA_MB(u4_blk_id, i4_offset_x, i4_offset_y);
693
694 pu1_cur_4x4_prd_blk = pu1_pred + i4_offset_x + i4_offset_y * pred_strd;
695 pu1_cur_4x4_out_blk = pu1_out + i4_offset_x + i4_offset_y * out_strd;
696
697 ps_codec->pf_iquant_itrans_recon_chroma_4x4_dc(
698 pi2_dc_src + dc_src_offset, pu1_cur_4x4_prd_blk,
699 pu1_cur_4x4_out_blk, pred_strd, out_strd, NULL, NULL, 0,
700 NULL, pi2_dc_src + dc_src_offset);
701 /* Get next DC block to process */
702 DEQUEUE_BLKID_FROM_CONTROL(u4_dc_cntrl, u4_blk_id);
703 }
704
705 /* now process ac/mixed blocks */
706 DEQUEUE_BLKID_FROM_CONTROL(u4_blk_cntrl, u4_blk_id);
707 while (u4_blk_id < 8)
708 {
709 WORD32 i4_src_offset = src_strd * u4_blk_id;
710 WORD32 dc_src_offset = i4_dc_inc * u4_blk_id;
711
712 IND2SUB_CHROMA_MB(u4_blk_id, i4_offset_x, i4_offset_y);
713
714 pu1_cur_4x4_prd_blk = pu1_pred + i4_offset_x + i4_offset_y * pred_strd;
715 pu1_cur_4x4_out_blk = pu1_out + i4_offset_x + i4_offset_y * out_strd;
716
717 ps_codec->pf_iquant_itrans_recon_chroma_4x4(pi2_src + i4_src_offset,
718 pu1_cur_4x4_prd_blk,
719 pu1_cur_4x4_out_blk,
720 pred_strd, out_strd,
721 pu2_iscale_mat,
722 pu2_weigh_mat, qp_div,
723 (WORD16 *) pi4_tmp,
724 pi2_dc_src + dc_src_offset);
725
726 DEQUEUE_BLKID_FROM_CONTROL(u4_blk_cntrl, u4_blk_id);
727 }
728
729 /* Now process empty blocks */
730 DEQUEUE_BLKID_FROM_CONTROL(u4_empty_blk_cntrl, u4_blk_id);
731 while (u4_blk_id < 8)
732 {
733 IND2SUB_CHROMA_MB(u4_blk_id, i4_offset_x, i4_offset_y);
734
735 pu1_cur_4x4_prd_blk = pu1_pred + i4_offset_x + i4_offset_y * pred_strd;
736 pu1_cur_4x4_out_blk = pu1_out + i4_offset_x + i4_offset_y * out_strd;
737
738 ps_codec->pf_interleave_copy(pu1_cur_4x4_prd_blk, pu1_cur_4x4_out_blk,
739 pred_strd, out_strd, SIZE_4X4_BLK_VERT,
740 SIZE_4X4_BLK_HRZ);
741
742 DEQUEUE_BLKID_FROM_CONTROL(u4_empty_blk_cntrl, u4_blk_id);
743 }
744}
745
746/**
747******************************************************************************
748*
749* @brief This function packs residue of an i16x16 luma mb for entropy coding
750*
751* @par Description
752* An i16 macro block contains two classes of units, dc 4x4 block and
753* 4x4 ac blocks. while packing the mb, the dc block is sent first, and
754* the 16 ac blocks are sent next in scan order. Each and every block is
755* represented by 3 parameters (nnz, significant coefficient map and the
756* residue coefficients itself). If a 4x4 unit does not have any coefficients
757* then only nnz is sent. Inside a 4x4 block the individual coefficients are
758* sent in scan order.
759*
760* The first byte of each block will be nnz of the block, if it is non zero,
761* a 2 byte significance map is sent. This is followed by nonzero coefficients.
762* This is repeated for 1 dc + 16 ac blocks.
763*
764* @param[in] pi2_res_mb
765* pointer to residue mb
766*
767* @param[in, out] pv_mb_coeff_data
768* buffer pointing to packed residue coefficients
769*
770* @param[in] u4_res_strd
771* residual block stride
772*
773* @param[out] u1_cbp_l
774* coded block pattern luma
775*
776* @param[in] pu1_nnz
777* number of non zero coefficients in each 4x4 unit
778*
779* @param[out]
780* Control signal for inverse transform of 16x16 blocks
781*
782* @return none
783*
784* @ remarks
785*
786******************************************************************************
787*/
788void ih264e_pack_l_mb_i16(WORD16 *pi2_res_mb,
789 void **pv_mb_coeff_data,
790 WORD32 i4_res_strd,
791 UWORD8 *u1_cbp_l,
792 UWORD8 *pu1_nnz,
793 UWORD32 *pu4_cntrl)
794{
795 /* pointer to packed sub block buffer space */
796 tu_sblk_coeff_data_t *ps_mb_coeff_data = (*pv_mb_coeff_data), *ps_mb_coeff_data_ac;
797
798 /* no of non zero coefficients in the current sub block */
799 UWORD32 u4_nnz_cnt;
800
801 /* significant coefficient map */
802 UWORD32 u4_s_map;
803
804 /* pointer to scanning matrix */
805 const UWORD8 *pu1_scan_order;
806
807 /* number of non zeros in sub block */
808 UWORD32 u4_nnz;
809
810 /* coeff scan order */
811 const UWORD8 u1_scan_order[16] = {0, 1, 4, 5, 2, 3, 6, 7, 8, 9, 12, 13, 10, 11, 14, 15};
812
813 /* temp var */
814 UWORD32 coeff_cnt, mask, b4,u4_cntrl=0;
815
816 /*DC and AC coeff pointers*/
817 WORD16 *pi2_res_mb_ac,*pi2_res_mb_dc;
818
819 /********************************************************/
820 /* pack dc coeff data for entropy coding */
821 /********************************************************/
822
823 pi2_res_mb_dc = pi2_res_mb;
824 pu1_scan_order = gu1_luma_scan_order_dc;
825
826 u4_nnz = *pu1_nnz;
827 u4_cntrl = 0;
828
829 /* write number of non zero coefficients */
830 ps_mb_coeff_data->i4_sig_map_nnz = u4_nnz;
831
832 if (u4_nnz)
833 {
834 for (u4_nnz_cnt = 0, coeff_cnt = 0, mask = 1, u4_s_map = 0; u4_nnz_cnt < u4_nnz; coeff_cnt++)
835 {
836 if (pi2_res_mb_dc[pu1_scan_order[coeff_cnt]])
837 {
838 /* write residue */
839 ps_mb_coeff_data->ai2_residue[u4_nnz_cnt++] = pi2_res_mb_dc[pu1_scan_order[coeff_cnt]];
840 u4_s_map |= mask;
841 }
842 mask <<= 1;
843 }
844 /* write significant coeff map */
845 ps_mb_coeff_data->i4_sig_map_nnz |= (u4_s_map << 16);
846 (*pv_mb_coeff_data) = ps_mb_coeff_data->ai2_residue + u4_nnz_cnt;
847
848 u4_cntrl = 0x00008000;// Set DC bit in ctrl code
849 }
850 else
851 {
852 (*pv_mb_coeff_data) = ps_mb_coeff_data->ai2_residue;
853 }
854
855 /********************************************************/
856 /* pack ac coeff data for entropy coding */
857 /********************************************************/
858
859 pu1_nnz ++;
860 pu1_scan_order = gu1_luma_scan_order;
861 pi2_res_mb += i4_res_strd; /*Move to AC block*/
862
863 ps_mb_coeff_data_ac = (*pv_mb_coeff_data);
864
865 for (b4 = 0; b4 < 16; b4++)
866 {
867 ps_mb_coeff_data = (*pv_mb_coeff_data);
868
869 u4_nnz = pu1_nnz[u1_scan_order[b4]];
870
871 /* Jump according to the scan order */
872 pi2_res_mb_ac = pi2_res_mb + (i4_res_strd * u1_scan_order[b4]);
873
874 /*
875 * Since this is a i16x16 block, we should not count dc coeff on indi
876 * vidual 4x4 blocks to nnz. But due to the implementation of 16x16
877 * trans function, we add dc's nnz to u4_nnz too. Hence we adjust that
878 * here
879 */
880 u4_nnz -= (pi2_res_mb_ac[0] != 0);
881
882 /* write number of non zero coefficients */
883 ps_mb_coeff_data->i4_sig_map_nnz = u4_nnz;
884
885 if (u4_nnz)
886 {
887 for (u4_nnz_cnt = 0, coeff_cnt = 1, mask = 1, u4_s_map = 0; u4_nnz_cnt < u4_nnz; coeff_cnt++)
888 {
889 if (pi2_res_mb_ac[pu1_scan_order[coeff_cnt]])
890 {
891 /* write residue */
892 ps_mb_coeff_data->ai2_residue[u4_nnz_cnt++] = pi2_res_mb_ac[pu1_scan_order[coeff_cnt]];
893 u4_s_map |= mask;
894 }
895 mask <<= 1;
896 }
897 /* write significant coeff map */
898 ps_mb_coeff_data->i4_sig_map_nnz |= (u4_s_map << 16);
899 (*pv_mb_coeff_data) = ps_mb_coeff_data->ai2_residue + u4_nnz_cnt;
900 *u1_cbp_l = 15;
901
902 u4_cntrl |= (1 << (31 - u1_scan_order[b4]));
903 }
904 else
905 {
906 (*pv_mb_coeff_data) = ps_mb_coeff_data->ai2_residue;
907 }
908
909 }
910
911 if (!(*u1_cbp_l))
912 {
913 (*pv_mb_coeff_data) = ps_mb_coeff_data_ac;
914 }
915
916 /* Store the cntrl signal */
917 (*pu4_cntrl) = u4_cntrl;
918 return;
919}
920
921/**
922******************************************************************************
923*
924* @brief This function packs residue of an p16x16 luma mb for entropy coding
925*
926* @par Description
927* A p16x16 macro block contains two classes of units 16 4x4 ac blocks.
928* while packing the mb, the dc block is sent first, and
929* the 16 ac blocks are sent next in scan order. Each and every block is
930* represented by 3 parameters (nnz, significant coefficient map and the
931* residue coefficients itself). If a 4x4 unit does not have any coefficients
932* then only nnz is sent. Inside a 4x4 block the individual coefficients are
933* sent in scan order.
934*
935* The first byte of each block will be nnz of the block, if it is non zero,
936* a 2 byte significance map is sent. This is followed by nonzero coefficients.
937* This is repeated for 1 dc + 16 ac blocks.
938*
939* @param[in] pi2_res_mb
940* pointer to residue mb
941*
942* @param[in, out] pv_mb_coeff_data
943* buffer pointing to packed residue coefficients
944*
945* @param[in] i4_res_strd
946* residual block stride
947*
948* @param[out] u1_cbp_l
949* coded block pattern luma
950*
951* @param[in] pu1_nnz
952* number of non zero coefficients in each 4x4 unit
953*
954* @param[out] pu4_cntrl
955* Control signal for inverse transform
956*
957* @return none
958*
959* @remarks Killing coffs not yet coded
960*
961******************************************************************************
962*/
963void ih264e_pack_l_mb(WORD16 *pi2_res_mb,
964 void **pv_mb_coeff_data,
965 WORD32 i4_res_strd,
966 UWORD8 *u1_cbp_l,
967 UWORD8 *pu1_nnz,
968 UWORD32 u4_thres_resi,
969 UWORD32 *pu4_cntrl)
970{
971 /* pointer to packed sub block buffer space */
972 tu_sblk_coeff_data_t *ps_mb_coeff_data, *ps_mb_coeff_data_b8, *ps_mb_coeff_data_mb;
973
974 /* no of non zero coefficients in the current sub block */
975 UWORD32 u4_nnz_cnt;
976
977 /* significant coefficient map */
978 UWORD32 u4_s_map;
979
980 /* pointer to scanning matrix */
981 const UWORD8 *pu1_scan_order = gu1_luma_scan_order;
982
983 /* number of non zeros in sub block */
984 UWORD32 u4_nnz;
985
986 /* pointer to residual sub block */
987 WORD16 *pi2_res_sb;
988
989 /* coeff scan order */
990 const UWORD8 u1_scan_order[16] = {0, 1, 4, 5, 2, 3, 6, 7, 8, 9, 12, 13, 10, 11, 14, 15};
991
992 /* coeff cost */
993 const UWORD8 *pu1_coeff_cost = gu1_coeff_cost;
994
995 /* temp var */
996 UWORD32 u4_mb_coeff_cost = 0, u4_b8_coeff_cost = 0, coeff_cnt, mask, u4_cntrl = 0, b4, b8;
997
998 /* temp var */
999 WORD32 i4_res_val, i4_run = -1, dcac_block;
1000
1001 /* When Hadamard transform is disabled, first row values are dont care, ignore them */
1002 pi2_res_mb += i4_res_strd;
1003
1004 /* When Hadamard transform is disabled, first unit value is dont care, ignore this */
1005 pu1_nnz ++;
1006
1007 ps_mb_coeff_data_mb = ps_mb_coeff_data_b8 = (*pv_mb_coeff_data);
1008
1009 /********************************************************/
1010 /* pack coeff data for entropy coding */
1011 /********************************************************/
1012
1013 for (b4 = 0; b4 < 16; b4++)
1014 {
1015 ps_mb_coeff_data = (*pv_mb_coeff_data);
1016
1017 b8 = b4 >> 2;
1018
1019 u4_nnz = pu1_nnz[u1_scan_order[b4]];
1020
1021 /* Jump according to the scan order */
1022 pi2_res_sb = pi2_res_mb + (i4_res_strd * u1_scan_order[b4]);
1023
1024 /* write number of non zero coefficients */
1025 ps_mb_coeff_data->i4_sig_map_nnz = u4_nnz;
1026
1027 if (u4_nnz)
1028 {
1029 for (u4_nnz_cnt = 0, coeff_cnt = 0, mask = 1, u4_s_map = 0; u4_nnz_cnt < u4_nnz; coeff_cnt++)
1030 {
1031 /* number of runs of zero before, this is used to compute coeff cost */
1032 i4_run++;
1033
1034 i4_res_val = pi2_res_sb[pu1_scan_order[coeff_cnt]];
1035
1036 if (i4_res_val)
1037 {
1038 /* write residue */
1039 ps_mb_coeff_data->ai2_residue[u4_nnz_cnt++] = i4_res_val;
1040 u4_s_map |= mask;
1041
1042 if (u4_thres_resi)
1043 {
1044 /* compute coeff cost */
1045 if (i4_res_val == 1 || i4_res_val == -1)
1046 {
1047 if (i4_run < 6)
1048 u4_b8_coeff_cost += pu1_coeff_cost[i4_run];
1049 }
1050 else
1051 u4_b8_coeff_cost += 9;
1052
1053 i4_run = -1;
1054 }
1055 }
1056
1057 mask <<= 1;
1058 }
1059
1060 /* write significant coeff map */
1061 ps_mb_coeff_data->i4_sig_map_nnz |= (u4_s_map << 16);
1062 (*pv_mb_coeff_data) = ps_mb_coeff_data->ai2_residue + u4_nnz_cnt;
1063
1064 /* cbp */
1065 *u1_cbp_l |= (1 << b8);
1066
1067 /* Cntrl map for inverse transform computation
1068 *
1069 * If coeff_cnt is zero, it means that only nonzero was a dc coeff
1070 * Hence we have to set the 16 - u1_scan_order[b4]) position instead
1071 * of 31 - u1_scan_order[b4]
1072 */
1073 dcac_block = (coeff_cnt == 0)?16:31;
1074 u4_cntrl |= (1 << (dcac_block - u1_scan_order[b4]));
1075 }
1076 else
1077 {
1078 (*pv_mb_coeff_data) = ps_mb_coeff_data->ai2_residue;
1079 }
1080
1081 /* Decide if the 8x8 unit has to be sent for entropy coding? */
1082 if ((b4+1) % 4 == 0)
1083 {
1084 if ( u4_thres_resi && (u4_b8_coeff_cost <= LUMA_SUB_BLOCK_SKIP_THRESHOLD) &&
1085 (*u1_cbp_l & (1 << b8)) )
1086 {
1087
1088
1089 /*
1090 * When we want to reset the full 8x8 block, we have to reset
1091 * both the dc and ac coeff bits hence we have the symmetric
1092 * arrangement of bits
1093 */
1094 const UWORD32 cntrl_mask_map[4] = {0xcc00cc00, 0x33003300, 0x00cc00cc, 0x00330033};
1095
1096 /* restore cbp */
1097 *u1_cbp_l = (*u1_cbp_l & (~(1 << b8)));
1098
1099 /* correct cntrl flag */
1100 u4_cntrl = u4_cntrl & (~cntrl_mask_map[(b4 >> 2)]);
1101
1102 /* correct nnz */
1103 pu1_nnz[u1_scan_order[b4 - 3]] = 0;
1104 pu1_nnz[u1_scan_order[b4 - 2]] = 0;
1105 pu1_nnz[u1_scan_order[b4 - 1]] = 0;
1106 pu1_nnz[u1_scan_order[b4]] = 0;
1107
1108 /* reset blk cost */
1109 u4_b8_coeff_cost = 0;
1110 }
1111
1112 if (!(*u1_cbp_l & (1 << b8)))
1113 {
1114 (*pv_mb_coeff_data) = ps_mb_coeff_data_b8;
1115 }
1116
1117 u4_mb_coeff_cost += u4_b8_coeff_cost;
1118
1119 u4_b8_coeff_cost = 0;
1120 i4_run = -1;
1121 ps_mb_coeff_data_b8 = (*pv_mb_coeff_data);
1122 }
1123 }
1124
1125 if (u4_thres_resi && (u4_mb_coeff_cost <= LUMA_BLOCK_SKIP_THRESHOLD)
1126 && (*u1_cbp_l))
1127 {
1128 (*pv_mb_coeff_data) = ps_mb_coeff_data_mb;
1129 *u1_cbp_l = 0;
1130 u4_cntrl = 0;
1131 memset(pu1_nnz, 0, 16);
1132 }
1133
1134 (*pu4_cntrl) = u4_cntrl;
1135
1136 return;
1137}
1138
1139/**
1140******************************************************************************
1141*
1142* @brief This function packs residue of an i8x8 chroma mb for entropy coding
1143*
1144* @par Description
1145* An i8 chroma macro block contains two classes of units, dc 2x2 block and
1146* 4x4 ac blocks. while packing the mb, the dc block is sent first, and
1147* the 4 ac blocks are sent next in scan order. Each and every block is
1148* represented by 3 parameters (nnz, significant coefficient map and the
1149* residue coefficients itself). If a 4x4 unit does not have any coefficients
1150* then only nnz is sent. Inside a 4x4 block the individual coefficients are
1151* sent in scan order.
1152*
1153* The first byte of each block will be nnz of the block, if it is non zero,
1154* a 2 byte significance map is sent. This is followed by nonzero coefficients.
1155* This is repeated for 1 dc + 4 ac blocks.
1156*
1157* @param[in] pi2_res_mb
1158* pointer to residue mb
1159*
1160* @param[in, out] pv_mb_coeff_data
1161* buffer pointing to packed residue coefficients
1162*
1163* @param[in] u4_res_strd
1164* residual block stride
1165*
1166* @param[out] u1_cbp_c
1167* coded block pattern chroma
1168*
1169* @param[in] pu1_nnz
1170* number of non zero coefficients in each 4x4 unit
1171*
1172* @param[out] pu1_nnz
1173* Control signal for inverse transform
1174*
1175* @param[in] u4_swap_uv
1176* Swaps the order of U and V planes in entropy bitstream
1177*
1178* @return none
1179*
1180* @ remarks
1181*
1182******************************************************************************
1183*/
1184void ih264e_pack_c_mb(WORD16 *pi2_res_mb,
1185 void **pv_mb_coeff_data,
1186 WORD32 i4_res_strd,
1187 UWORD8 *u1_cbp_c,
1188 UWORD8 *pu1_nnz,
1189 UWORD32 u4_thres_resi,
1190 UWORD32 *pu4_cntrl,
1191 UWORD32 u4_swap_uv)
1192{
1193 /* pointer to packed sub block buffer space */
1194 tu_sblk_coeff_data_t *ps_mb_coeff_data = (*pv_mb_coeff_data);
1195 tu_sblk_coeff_data_t *ps_mb_coeff_data_dc, *ps_mb_coeff_data_ac;
1196
1197 /* nnz pointer */
1198 UWORD8 *pu1_nnz_ac, *pu1_nnz_dc;
1199
1200 /* nnz counter */
1201 UWORD32 u4_nnz_cnt;
1202
1203 /* significant coefficient map */
1204 UWORD32 u4_s_map;
1205
1206 /* pointer to scanning matrix */
1207 const UWORD8 *pu1_scan_order;
1208
1209 /* no of non zero coefficients in the current sub block */
1210 UWORD32 u4_nnz;
1211
1212 /* pointer to residual sub block, res val */
1213 WORD16 *pi2_res_sb, i2_res_val;
1214
1215 /* temp var */
1216 UWORD32 coeff_cnt, mask, b4,plane;
1217
1218 /* temp var */
1219 UWORD32 u4_coeff_cost;
1220 WORD32 i4_run;
1221
1222 /* coeff cost */
1223 const UWORD8 *pu1_coeff_cost = gu1_coeff_cost;
1224
1225 /* pointer to packed buffer space */
1226 UWORD32 *pu4_mb_coeff_data = NULL;
1227
1228 /* ac coded block pattern */
1229 UWORD8 u1_cbp_ac;
1230
1231 /* Variable to store the current bit pos in cntrl variable*/
1232 UWORD32 cntrl_pos = 0;
1233
1234 /********************************************************/
1235 /* pack dc coeff data for entropy coding */
1236 /********************************************************/
1237 pu1_scan_order = gu1_chroma_scan_order_dc;
1238 pi2_res_sb = pi2_res_mb;
1239 pu1_nnz_dc = pu1_nnz;
1240 (*pu4_cntrl) = 0;
1241 cntrl_pos = 15;
1242 ps_mb_coeff_data_dc = (*pv_mb_coeff_data);
1243
1244 /* Color space conversion between SP_UV and SP_VU
1245 * We always assume SP_UV for all the processing
1246 * Hence to get proper stream output we need to swap U and V channels here
1247 *
1248 * For that there are two paths we need to look for
1249 * One is the path to bitstream , these variables should have the proper input
1250 * configured UV or VU
1251 * For the other path the inverse transform variables should have ehat ever 0ordering the
1252 * input had
1253 */
1254
1255 if (u4_swap_uv)
1256 {
1257 pu1_nnz_dc += 5;/* Move to NNZ of V planve */
1258 pi2_res_sb += 4;/* Move to DC coff of V plane */
1259
1260 cntrl_pos = 14; /* Control bit for V plane */
1261 }
1262
1263 for (plane = 0; plane < 2; plane++)
1264 {
1265 ps_mb_coeff_data = (*pv_mb_coeff_data);
1266
1267 u4_nnz = *pu1_nnz_dc;
1268 /* write number of non zero coefficients U/V */
1269 ps_mb_coeff_data->i4_sig_map_nnz = u4_nnz;
1270
1271 if (u4_nnz)
1272 {
1273 for (u4_nnz_cnt = 0, coeff_cnt = 0, mask = 1, u4_s_map = 0; u4_nnz_cnt < u4_nnz; coeff_cnt++)
1274 {
1275 i2_res_val = pi2_res_sb[pu1_scan_order[coeff_cnt]];
1276 if (i2_res_val)
1277 {
1278 /* write residue U/V */
1279 ps_mb_coeff_data->ai2_residue[u4_nnz_cnt++] = i2_res_val;
1280 u4_s_map |= mask;
1281 }
1282 mask <<= 1;
1283 }
1284 /* write significant coeff map U/V */
1285 ps_mb_coeff_data->i4_sig_map_nnz |= (u4_s_map << 16);
1286 (*pv_mb_coeff_data) = ps_mb_coeff_data->ai2_residue + u4_nnz_cnt;
1287 *u1_cbp_c = 1;
1288
1289 (*pu4_cntrl) |= (1 << cntrl_pos);
1290 }
1291 else
1292 {
1293 (*pv_mb_coeff_data) = ps_mb_coeff_data->ai2_residue;
1294 }
1295
1296 if (u4_swap_uv)
1297 {
1298 cntrl_pos++; /* Control bit for U plane */
1299 pu1_nnz_dc -= 5; /* Move to NNZ of U plane */
1300 pi2_res_sb -= 4; /* Move to DC coff of U plane */
1301
1302 }
1303 else
1304 {
1305 cntrl_pos--; /* Control bit for U plane */
1306 pu1_nnz_dc += 5; /* 4 for AC NNZ and 1 for DC */
1307 pi2_res_sb += 4; /* Move to DC coff of V plane */
1308 }
1309 }
1310
1311 /********************************************************/
1312 /* pack ac coeff data for entropy coding */
1313 /********************************************************/
1314
1315 pu1_scan_order = gu1_chroma_scan_order;
1316 ps_mb_coeff_data_ac = (*pv_mb_coeff_data);
1317
1318 if (u4_swap_uv)
1319 {
1320 pi2_res_sb = pi2_res_mb + i4_res_strd * 5; /* Move to V plane ,ie 1dc row+ 4 ac row */
1321 cntrl_pos = 27; /* The control bits are to be added for V bloc ie 31-4 th bit */
1322 pu1_nnz_ac = pu1_nnz + 6;/*Move the nnz to V block NNZ 1 dc + 1dc + 4 ac */
1323 }
1324 else
1325 {
1326 pi2_res_sb = pi2_res_mb + i4_res_strd; /* Move to U plane ,ie 1dc row */
1327 cntrl_pos = 31;
1328 pu1_nnz_ac = pu1_nnz + 1; /* Move the nnz to V block NNZ 1 dc */
1329 }
1330
1331 for (plane = 0; plane < 2; plane++)
1332 {
1333 pu4_mb_coeff_data = (*pv_mb_coeff_data);
1334
1335 u4_coeff_cost = 0;
1336 i4_run = -1;
1337
1338 /* get the current cbp, so that it automatically
1339 * gets reverted in case of zero ac values */
1340 u1_cbp_ac = *u1_cbp_c;
1341
1342 for (b4 = 0; b4 < 4; b4++)
1343 {
1344 ps_mb_coeff_data = (*pv_mb_coeff_data);
1345
1346 u4_nnz = *pu1_nnz_ac;
1347
1348 /*
1349 * We are scanning only ac coeffs, but the nnz is for the
1350 * complete 4x4 block. Hence we have to discount the nnz contributed
1351 * by the dc coefficient
1352 */
1353 u4_nnz -= (pi2_res_sb[0]!=0);
1354
1355 /* write number of non zero coefficients U/V */
1356 ps_mb_coeff_data->i4_sig_map_nnz = u4_nnz;
1357
1358 if (u4_nnz)
1359 {
1360 for (u4_nnz_cnt = 0, coeff_cnt = 0, mask = 1, u4_s_map = 0; u4_nnz_cnt < u4_nnz; coeff_cnt++)
1361 {
1362 i2_res_val = pi2_res_sb[pu1_scan_order[coeff_cnt]];
1363
1364 i4_run++;
1365
1366 if (i2_res_val)
1367 {
1368 /* write residue U/V */
1369 ps_mb_coeff_data->ai2_residue[u4_nnz_cnt++] = i2_res_val;
1370 u4_s_map |= mask;
1371
1372 if ( u4_thres_resi && (u4_coeff_cost < CHROMA_BLOCK_SKIP_THRESHOLD) )
1373 {
1374 /* compute coeff cost */
1375 if (i2_res_val == 1 || i2_res_val == -1)
1376 {
1377 if (i4_run < 6)
1378 u4_coeff_cost += pu1_coeff_cost[i4_run];
1379 }
1380 else
1381 u4_coeff_cost += 9;
1382
1383 i4_run = -1;
1384 }
1385 }
1386 mask <<= 1;
1387 }
1388
1389 /* write significant coeff map U/V */
1390 ps_mb_coeff_data->i4_sig_map_nnz |= (u4_s_map << 16);
1391 (*pv_mb_coeff_data) = ps_mb_coeff_data->ai2_residue + u4_nnz_cnt;
1392 u1_cbp_ac = 2;
1393
1394 (*pu4_cntrl) |= 1 << cntrl_pos;
1395 }
1396 else
1397 {
1398 (*pv_mb_coeff_data) = ps_mb_coeff_data->ai2_residue;
1399 }
1400
1401 pu1_nnz_ac++;
1402 pi2_res_sb += i4_res_strd;
1403 cntrl_pos--;
1404 }
1405
1406 /* reset block */
1407 if (u4_thres_resi && (u4_coeff_cost < CHROMA_BLOCK_SKIP_THRESHOLD))
1408 {
1409 pu4_mb_coeff_data[0] = 0;
1410 pu4_mb_coeff_data[1] = 0;
1411 pu4_mb_coeff_data[2] = 0;
1412 pu4_mb_coeff_data[3] = 0;
1413 (*pv_mb_coeff_data) = pu4_mb_coeff_data + 4;
1414
1415 /* Generate the control signal */
1416 /* Zero out the current plane's AC coefficients */
1417 (*pu4_cntrl) &= ((plane == u4_swap_uv) ? 0x0FFFFFFF : 0xF0FFFFFF);
1418
1419 /* Similarly do for the NNZ also */
1420 *(pu1_nnz_ac - 4) = 0;
1421 *(pu1_nnz_ac - 3) = 0;
1422 *(pu1_nnz_ac - 2) = 0;
1423 *(pu1_nnz_ac - 1) = 0;
1424 }
1425 else
1426 {
1427 *u1_cbp_c = u1_cbp_ac;
1428 }
1429
1430 if (u4_swap_uv)
1431 {
1432 pi2_res_sb = pi2_res_mb + i4_res_strd; /* Move to V plane ,ie 1dc row+ 4 ac row + 1 dc row */
1433 cntrl_pos = 31; /* The control bits are to be added for V bloc ie 31-4 th bit */
1434 pu1_nnz_ac = pu1_nnz + 1; /* Move the nnz to V block NNZ 1 dc + 1dc + 4 ac */
1435
1436 pu1_nnz_ac = pu1_nnz + 1;
1437 }
1438 else
1439 pu1_nnz_ac = pu1_nnz + 6; /* Go to nnz of V plane */
1440 }
1441
1442 /* restore the ptr basing on cbp */
1443 if (*u1_cbp_c == 0)
1444 {
1445 (*pv_mb_coeff_data) = ps_mb_coeff_data_dc;
1446 }
1447 else if (*u1_cbp_c == 1)
1448 {
1449 (*pv_mb_coeff_data) = ps_mb_coeff_data_ac;
1450 }
1451
1452 return ;
1453}
1454
1455/**
1456*******************************************************************************
1457*
1458* @brief performs luma core coding when intra mode is i16x16
1459*
1460* @par Description:
1461* If the current mb is to be coded as intra of mb type i16x16, the mb is first
1462* predicted using one of i16x16 prediction filters, basing on the intra mode
1463* chosen. Then, error is computed between the input blk and the estimated blk.
1464* This error is transformed (hierarchical transform i.e., dct followed by hada-
1465* -mard), quantized. The quantized coefficients are packed in scan order for
1466* entropy coding.
1467*
1468* @param[in] ps_proc_ctxt
1469* pointer to the current macro block context
1470*
1471* @returns u1_cbp_l
1472* coded block pattern luma
1473*
1474* @remarks none
1475*
1476*******************************************************************************
1477*/
1478
1479UWORD8 ih264e_code_luma_intra_macroblock_16x16(process_ctxt_t *ps_proc)
1480{
1481 /* Codec Context */
1482 codec_t *ps_codec = ps_proc->ps_codec;
1483
1484 /* pointer to ref macro block */
1485 UWORD8 *pu1_ref_mb = ps_proc->pu1_rec_buf_luma;
1486
1487 /* pointer to src macro block */
1488 UWORD8 *pu1_curr_mb = ps_proc->pu1_src_buf_luma;
1489
1490 /* pointer to prediction macro block */
1491 UWORD8 *pu1_pred_mb = NULL;
1492
1493 /* pointer to residual macro block */
1494 WORD16 *pi2_res_mb = ps_proc->pi2_res_buf;
1495
1496 /* strides */
1497 WORD32 i4_src_strd = ps_proc->i4_src_strd;
1498 WORD32 i4_rec_strd = ps_proc->i4_rec_strd;
1499 WORD32 i4_pred_strd = ps_proc->i4_pred_strd;
1500 WORD32 i4_res_strd = ps_proc->i4_res_strd;
1501
1502 /* intra mode */
1503 UWORD8 u1_intra_mode = ps_proc->u1_l_i16_mode;
1504
1505 /* coded block pattern */
1506 UWORD8 u1_cbp_l = 0;
1507
1508 /* number of non zero coeffs*/
1509 UWORD32 au4_nnz[5];
1510 UWORD8 *pu1_nnz = (UWORD8 *)au4_nnz;
1511
1512 /*Cntrol signal for itrans*/
1513 UWORD32 u4_cntrl;
1514
1515 /* quantization parameters */
1516 quant_params_t *ps_qp_params = ps_proc->ps_qp_params[0];
1517
1518 /* pointer to packed mb coeff data */
1519 void **pv_mb_coeff_data = &(ps_proc->pv_mb_coeff_data);
1520
1521 /* init nnz */
1522 au4_nnz[0] = 0;
1523 au4_nnz[1] = 0;
1524 au4_nnz[2] = 0;
1525 au4_nnz[3] = 0;
1526 au4_nnz[4] = 0;
1527
1528 if (u1_intra_mode == PLANE_I16x16)
1529 {
1530 pu1_pred_mb = ps_proc->pu1_pred_mb_intra_16x16_plane;
1531 }
1532 else
1533 {
1534 pu1_pred_mb = ps_proc->pu1_pred_mb_intra_16x16;
1535 }
1536
1537 /********************************************************/
1538 /* error estimation, */
1539 /* transform */
1540 /* quantization */
1541 /********************************************************/
1542 ih264e_luma_16x16_resi_trans_dctrans_quant(ps_codec, pu1_curr_mb,
1543 pu1_pred_mb, pi2_res_mb,
1544 i4_src_strd, i4_pred_strd,
1545 i4_res_strd,
1546 ps_qp_params->pu2_scale_mat,
1547 ps_qp_params->pu2_thres_mat,
1548 ps_qp_params->u1_qbits,
1549 ps_qp_params->u4_dead_zone,
1550 pu1_nnz, ENABLE_DC_TRANSFORM);
1551
1552 /********************************************************/
1553 /* pack coeff data for entropy coding */
1554 /********************************************************/
1555 ih264e_pack_l_mb_i16(pi2_res_mb, pv_mb_coeff_data, i4_res_strd, &u1_cbp_l,
1556 pu1_nnz, &u4_cntrl);
1557
1558 /********************************************************/
1559 /* ierror estimation, */
1560 /* itransform */
1561 /* iquantization */
1562 /********************************************************/
1563 /*
1564 *if refernce frame is not to be computed
1565 *we only need the right and bottom border 4x4 blocks to predict next intra
1566 *blocks, hence only compute them
1567 */
1568 if (!ps_proc->u4_compute_recon)
1569 {
1570 u4_cntrl &= 0x111F8000;
1571 }
1572
1573 if (u4_cntrl)
1574 {
1575 ih264e_luma_16x16_idctrans_iquant_itrans_recon(
1576 ps_codec, pi2_res_mb, pu1_pred_mb, pu1_ref_mb,
1577 i4_res_strd, i4_pred_strd, i4_rec_strd,
1578 ps_qp_params->pu2_iscale_mat,
1579 ps_qp_params->pu2_weigh_mat, ps_qp_params->u1_qp_div,
1580 u4_cntrl, ENABLE_DC_TRANSFORM,
1581 ps_proc->pv_scratch_buff);
1582 }
1583 else
1584 {
1585 ps_codec->pf_inter_pred_luma_copy(pu1_pred_mb, pu1_ref_mb, i4_pred_strd,
1586 i4_rec_strd, MB_SIZE, MB_SIZE, NULL,
1587 0);
1588 }
1589
1590 return (u1_cbp_l);
1591}
1592
1593
1594/**
1595*******************************************************************************
1596*
1597* @brief performs luma core coding when intra mode is i4x4
1598*
1599* @par Description:
1600* If the current mb is to be coded as intra of mb type i4x4, the mb is first
1601* predicted using one of i4x4 prediction filters, basing on the intra mode
1602* chosen. Then, error is computed between the input blk and the estimated blk.
1603* This error is dct transformed and quantized. The quantized coefficients are
1604* packed in scan order for entropy coding.
1605*
1606* @param[in] ps_proc_ctxt
1607* pointer to the current macro block context
1608*
1609* @returns u1_cbp_l
1610* coded block pattern luma
1611*
1612* @remarks
1613* The traversal of 4x4 subblocks in the 16x16 macroblock is as per the scan order
1614* mentioned in h.264 specification
1615*
1616*******************************************************************************
1617*/
1618UWORD8 ih264e_code_luma_intra_macroblock_4x4(process_ctxt_t *ps_proc)
1619{
1620 /* Codec Context */
1621 codec_t *ps_codec = ps_proc->ps_codec;
1622
1623 /* pointer to ref macro block */
1624 UWORD8 *pu1_ref_mb = ps_proc->pu1_rec_buf_luma;
1625
1626 /* pointer to src macro block */
1627 UWORD8 *pu1_curr_mb = ps_proc->pu1_src_buf_luma;
1628
1629 /* pointer to prediction macro block */
1630 UWORD8 *pu1_pred_mb = ps_proc->pu1_pred_mb;
1631
1632 /* pointer to residual macro block */
1633 WORD16 *pi2_res_mb = ps_proc->pi2_res_buf;
1634
1635 /* strides */
1636 WORD32 i4_src_strd = ps_proc->i4_src_strd;
1637 WORD32 i4_rec_strd = ps_proc->i4_rec_strd;
1638 WORD32 i4_pred_strd = ps_proc->i4_pred_strd;
1639
1640 /* pointer to neighbors: left, top, top-left */
1641 UWORD8 *pu1_mb_a;
1642 UWORD8 *pu1_mb_b;
1643 UWORD8 *pu1_mb_c;
1644 UWORD8 *pu1_mb_d;
1645
1646 /* intra mode */
1647 UWORD8 u1_intra_mode = ps_proc->u1_l_i16_mode;
1648
1649 /* neighbor availability */
1650 WORD32 i4_ngbr_avbl;
1651
1652 /* neighbor pels for intra prediction */
1653 UWORD8 *pu1_ngbr_pels_i4 = ps_proc->au1_ngbr_pels;
1654
1655 /* coded block pattern */
1656 UWORD8 u1_cbp_l = 0;
1657
1658 /* number of non zero coeffs*/
1659 UWORD8 u1_nnz;
1660
1661 /* quantization parameters */
1662 quant_params_t *ps_qp_params = ps_proc->ps_qp_params[0];
1663
1664 /* pointer to packed mb coeff data */
1665 void **pv_mb_coeff_data = &(ps_proc->pv_mb_coeff_data);
1666
1667 /* pointer to packed mb coeff data */
1668 tu_sblk_coeff_data_t *ps_mb_coeff_data, *ps_mb_coeff_data_b8;
1669
1670 /* no of non zero coefficients in the current sub block */
1671 UWORD32 u4_nnz_cnt;
1672
1673 /* significant coefficient map */
1674 UWORD32 u4_s_map;
1675
1676 /* pointer to scanning matrix */
1677 const UWORD8 *pu1_scan_order = gu1_luma_scan_order;
1678
1679 /*Dummy variable for 4x4 trans fucntion*/
1680 WORD16 i2_dc_dummy;
1681
1682 /* temp var */
1683 UWORD32 i, b8, b4, u1_blk_x, u1_blk_y, u1_pix_x, u1_pix_y, coeff_cnt, mask;
1684
1685 /* Process 16 4x4 lum sub-blocks of the MB in scan order */
1686 for (b8 = 0; b8 < 4; b8++)
1687 {
1688 u1_blk_x = GET_BLK_RASTER_POS_X(b8) << 3;
1689 u1_blk_y = GET_BLK_RASTER_POS_Y(b8) << 3;
1690
1691 /* if in case cbp for the 8x8 block is zero, send no residue */
1692 ps_mb_coeff_data_b8 = *pv_mb_coeff_data;
1693
1694 for (b4 = 0; b4 < 4; b4++)
1695 {
1696 /* index of pel in MB */
1697 u1_pix_x = u1_blk_x + (GET_SUB_BLK_RASTER_POS_X(b4) << 2);
1698 u1_pix_y = u1_blk_y + (GET_SUB_BLK_RASTER_POS_Y(b4) << 2);
1699
1700 /* Initialize source and reference pointers */
1701 pu1_curr_mb = ps_proc->pu1_src_buf_luma + u1_pix_x + (u1_pix_y * i4_src_strd);
1702 pu1_ref_mb = ps_proc->pu1_rec_buf_luma + u1_pix_x + (u1_pix_y * i4_rec_strd);
1703
1704 /* pointer to left of ref macro block */
1705 pu1_mb_a = pu1_ref_mb - 1;
1706 /* pointer to top of ref macro block */
1707 pu1_mb_b = pu1_ref_mb - i4_rec_strd;
1708 /* pointer to topright of ref macro block */
1709 pu1_mb_c = pu1_mb_b + 4;
1710 /* pointer to topleft macro block */
1711 pu1_mb_d = pu1_mb_b - 1;
1712
1713 /* compute neighbor availability */
1714 i4_ngbr_avbl = ps_proc->au1_ngbr_avbl_4x4_subblks[(b8 << 2) + b4];
1715
1716 /* sub block intra mode */
1717 u1_intra_mode = ps_proc->au1_intra_luma_mb_4x4_modes[(b8 << 2) + b4];
1718
1719 /********************************************************/
1720 /* gather prediction pels from neighbors for prediction */
1721 /********************************************************/
1722 /* left pels */
1723 if (i4_ngbr_avbl & LEFT_MB_AVAILABLE_MASK)
1724 {
1725 for (i = 0; i < 4; i++)
1726 pu1_ngbr_pels_i4[4 - 1 - i] = pu1_mb_a[i * i4_rec_strd];
1727 }
1728 else
1729 {
1730 memset(pu1_ngbr_pels_i4, 0, 4);
1731 }
1732
1733 /* top pels */
1734 if (i4_ngbr_avbl & TOP_MB_AVAILABLE_MASK)
1735 {
1736 memcpy(pu1_ngbr_pels_i4 + 4 + 1, pu1_mb_b, 4);
1737 }
1738 else
1739 {
1740 memset(pu1_ngbr_pels_i4 + 5, 0, 4);
1741 }
1742 /* top left pels */
1743 if (i4_ngbr_avbl & TOP_LEFT_MB_AVAILABLE_MASK)
1744 {
1745 pu1_ngbr_pels_i4[4] = *pu1_mb_d;
1746 }
1747 else
1748 {
1749 pu1_ngbr_pels_i4[4] = 0;
1750 }
1751 /* top right pels */
1752 if (i4_ngbr_avbl & TOP_RIGHT_MB_AVAILABLE_MASK)
1753 {
1754 memcpy(pu1_ngbr_pels_i4+8+1,pu1_mb_c,4);
1755 }
1756 else if (i4_ngbr_avbl & TOP_MB_AVAILABLE_MASK)
1757 {
1758 memset(pu1_ngbr_pels_i4+8+1,pu1_ngbr_pels_i4[8],4);
1759 }
1760
1761 /********************************************************/
1762 /* prediction */
1763 /********************************************************/
1764 (ps_codec->apf_intra_pred_4_l)[u1_intra_mode](pu1_ngbr_pels_i4,
1765 pu1_pred_mb, 0,
1766 i4_pred_strd,
1767 i4_ngbr_avbl);
1768
1769 /********************************************************/
1770 /* error estimation, */
1771 /* transform */
1772 /* quantization */
1773 /********************************************************/
1774 ps_codec->pf_resi_trans_quant_4x4(pu1_curr_mb, pu1_pred_mb,
1775 pi2_res_mb, i4_src_strd,
1776 i4_pred_strd,
1777 ps_qp_params->pu2_scale_mat,
1778 ps_qp_params->pu2_thres_mat,
1779 ps_qp_params->u1_qbits,
1780 ps_qp_params->u4_dead_zone,
1781 &u1_nnz, &i2_dc_dummy);
1782
1783 /********************************************************/
1784 /* pack coeff data for entropy coding */
1785 /********************************************************/
1786 ps_mb_coeff_data = *pv_mb_coeff_data;
1787
1788 /* write number of non zero coefficients */
1789 ps_mb_coeff_data->i4_sig_map_nnz = u1_nnz;
1790
1791 if (u1_nnz)
1792 {
1793 for (u4_nnz_cnt = 0, coeff_cnt = 0, mask = 1, u4_s_map = 0; u4_nnz_cnt < u1_nnz; coeff_cnt++)
1794 {
1795 if (pi2_res_mb[pu1_scan_order[coeff_cnt]])
1796 {
1797 /* write residue */
1798 ps_mb_coeff_data->ai2_residue[u4_nnz_cnt++] = pi2_res_mb[pu1_scan_order[coeff_cnt]];
1799 u4_s_map |= mask;
1800 }
1801 mask <<= 1;
1802 }
1803 /* write significant coeff map */
1804 ps_mb_coeff_data->i4_sig_map_nnz |= (u4_s_map << 16);
1805
1806 /* update ptr to coeff data */
1807 (*pv_mb_coeff_data) = ps_mb_coeff_data->ai2_residue + u4_nnz_cnt;
1808
1809 /* cbp */
1810 u1_cbp_l |= (1 << b8);
1811 }
1812 else
1813 {
1814 (*pv_mb_coeff_data) = ps_mb_coeff_data->ai2_residue;
1815 }
1816
1817 /********************************************************/
1818 /* ierror estimation, */
1819 /* itransform */
1820 /* iquantization */
1821 /********************************************************/
1822 /* If the frame is not to be used for P frame reference or dumping recon
1823 * we only will use the recon for only predicting intra Mbs
1824 * This will need only right and bottom edge 4x4 blocks recon
1825 * Hence we selectively enable them
1826 */
1827 if (ps_proc->u4_compute_recon || (0xF888 & (1 << ((b8 << 2) + b4))))
1828 {
1829 if (u1_nnz)
1830 ps_codec->pf_iquant_itrans_recon_4x4(
1831 pi2_res_mb, pu1_pred_mb, pu1_ref_mb,
1832 /*No input stride,*/i4_pred_strd,
1833 i4_rec_strd, ps_qp_params->pu2_iscale_mat,
1834 ps_qp_params->pu2_weigh_mat,
1835 ps_qp_params->u1_qp_div,
1836 ps_proc->pv_scratch_buff, 0, 0);
1837 else
1838 ps_codec->pf_inter_pred_luma_copy(pu1_pred_mb, pu1_ref_mb,
1839 i4_pred_strd, i4_rec_strd,
1840 BLK_SIZE, BLK_SIZE, NULL,
1841 0);
1842 }
1843
1844 }
1845
1846 /* if the 8x8 block has no residue, nothing needs to be sent to entropy */
1847 if (!(u1_cbp_l & (1 << b8)))
1848 {
1849 *pv_mb_coeff_data = ps_mb_coeff_data_b8;
1850 }
1851 }
1852
1853 return (u1_cbp_l);
1854}
1855
1856/**
1857*******************************************************************************
1858*
1859* @brief performs luma core coding when intra mode is i4x4
1860*
1861* @par Description:
1862* If the current mb is to be coded as intra of mb type i4x4, the mb is first
1863* predicted using one of i4x4 prediction filters, basing on the intra mode
1864* chosen. Then, error is computed between the input blk and the estimated blk.
1865* This error is dct transformed and quantized. The quantized coefficients are
1866* packed in scan order for entropy coding.
1867*
1868* @param[in] ps_proc_ctxt
1869* pointer to the current macro block context
1870*
1871* @returns u1_cbp_l
1872* coded block pattern luma
1873*
1874* @remarks
1875* The traversal of 4x4 subblocks in the 16x16 macroblock is as per the scan order
1876* mentioned in h.264 specification
1877*
1878*******************************************************************************
1879*/
1880UWORD8 ih264e_code_luma_intra_macroblock_4x4_rdopt_on(process_ctxt_t *ps_proc)
1881{
1882 /* Codec Context */
1883 codec_t *ps_codec = ps_proc->ps_codec;
1884
1885 /* pointer to ref macro block */
1886 UWORD8 *pu1_ref_mb_intra_4x4 = ps_proc->pu1_ref_mb_intra_4x4;
1887
1888 /* pointer to recon buffer */
1889 UWORD8 *pu1_rec_mb = ps_proc->pu1_rec_buf_luma;
1890
1891 /* pointer to residual macro block */
1892 WORD16 *pi2_res_mb = ps_proc->pi2_res_buf_intra_4x4;
1893
1894 /* strides */
1895 WORD32 i4_rec_strd = ps_proc->i4_rec_strd;
1896
1897 /* number of non zero coeffs*/
1898 UWORD8 *pu1_nnz = (UWORD8 *)ps_proc->au4_nnz_intra_4x4;
1899
1900 /* coded block pattern */
1901 UWORD8 u1_cbp_l = 0;
1902
1903 /* pointer to packed mb coeff data */
1904 void **pv_mb_coeff_data = &(ps_proc->pv_mb_coeff_data);
1905
1906 /* pointer to packed mb coeff data */
1907 tu_sblk_coeff_data_t *ps_mb_coeff_data, *ps_mb_coeff_data_b8;
1908
1909 /* no of non zero coefficients in the current sub block */
1910 UWORD32 u4_nnz_cnt;
1911
1912 /* significant coefficient map */
1913 UWORD32 u4_s_map;
1914
1915 /* pointer to scanning matrix */
1916 const UWORD8 *pu1_scan_order = gu1_luma_scan_order;
1917
1918 /* temp var */
1919 UWORD32 b8, b4, coeff_cnt, mask;
1920
1921 /* Process 16 4x4 lum sub-blocks of the MB in scan order */
1922 for (b8 = 0; b8 < 4; b8++)
1923 {
1924 /* if in case cbp for the 8x8 block is zero, send no residue */
1925 ps_mb_coeff_data_b8 = *pv_mb_coeff_data;
1926
1927 for (b4 = 0; b4 < 4; b4++, pu1_nnz++, pi2_res_mb += MB_SIZE)
1928 {
1929 /********************************************************/
1930 /* pack coeff data for entropy coding */
1931 /********************************************************/
1932 ps_mb_coeff_data = *pv_mb_coeff_data;
1933
1934 /* write number of non zero coefficients */
1935 ps_mb_coeff_data->i4_sig_map_nnz = *pu1_nnz;
1936
1937 if (*pu1_nnz)
1938 {
1939 for (u4_nnz_cnt = 0, coeff_cnt = 0, mask = 1, u4_s_map = 0; u4_nnz_cnt < *pu1_nnz; coeff_cnt++)
1940 {
1941 if (pi2_res_mb[pu1_scan_order[coeff_cnt]])
1942 {
1943 /* write residue */
1944 ps_mb_coeff_data->ai2_residue[u4_nnz_cnt++] = pi2_res_mb[pu1_scan_order[coeff_cnt]];
1945 u4_s_map |= mask;
1946 }
1947 mask <<= 1;
1948 }
1949 /* write significant coeff map */
1950 ps_mb_coeff_data->i4_sig_map_nnz |= (u4_s_map << 16);
1951
1952 /* update ptr to coeff data */
1953 (*pv_mb_coeff_data) = ps_mb_coeff_data->ai2_residue + u4_nnz_cnt;
1954
1955 /* cbp */
1956 u1_cbp_l |= (1 << b8);
1957 }
1958 else
1959 {
1960 (*pv_mb_coeff_data) = ps_mb_coeff_data->ai2_residue;
1961 }
1962 }
1963
1964 /* if the 8x8 block has no residue, nothing needs to be sent to entropy */
1965 if (!(u1_cbp_l & (1 << b8)))
1966 {
1967 *pv_mb_coeff_data = ps_mb_coeff_data_b8;
1968 }
1969 }
1970
1971 /* memcpy recon */
1972 ps_codec->pf_inter_pred_luma_copy(pu1_ref_mb_intra_4x4, pu1_rec_mb, MB_SIZE, i4_rec_strd, MB_SIZE, MB_SIZE, NULL, 0);
1973
1974 return (u1_cbp_l);
1975}
1976
1977
1978/**
1979*******************************************************************************
1980*
1981* @brief performs chroma core coding for intra macro blocks
1982*
1983* @par Description:
1984* If the current MB is to be intra coded with mb type chroma I8x8, the MB is
1985* first predicted using intra 8x8 prediction filters. The predicted data is
1986* compared with the input for error and the error is transformed. The DC
1987* coefficients of each transformed sub blocks are further transformed using
1988* Hadamard transform. The resulting coefficients are quantized, packed and sent
1989* for entropy coding.
1990*
1991* @param[in] ps_proc_ctxt
1992* pointer to the current macro block context
1993*
1994* @returns u1_cbp_c
1995* coded block pattern chroma
1996*
1997* @remarks
1998* The traversal of 4x4 subblocks in the 8x8 macroblock is as per the scan order
1999* mentioned in h.264 specification
2000*
2001*******************************************************************************
2002*/
2003UWORD8 ih264e_code_chroma_intra_macroblock_8x8(process_ctxt_t *ps_proc)
2004{
2005 /* Codec Context */
2006 codec_t *ps_codec = ps_proc->ps_codec;
2007
2008 /* pointer to ref macro block */
2009 UWORD8 *pu1_ref_mb = ps_proc->pu1_rec_buf_chroma;
2010
2011 /* pointer to src macro block */
2012 UWORD8 *pu1_curr_mb = ps_proc->pu1_src_buf_chroma;
2013
2014 /* pointer to prediction macro block */
2015 UWORD8 *pu1_pred_mb = NULL;
2016
2017 /* pointer to residual macro block */
2018 WORD16 *pi2_res_mb = ps_proc->pi2_res_buf;
2019
2020 /* strides */
2021 WORD32 i4_src_strd = ps_proc->i4_src_strd;
2022 WORD32 i4_rec_strd = ps_proc->i4_rec_strd;
2023 WORD32 i4_pred_strd = ps_proc->i4_pred_strd;
2024 WORD32 i4_res_strd = ps_proc->i4_res_strd;
2025
2026 /* intra mode */
2027 UWORD8 u1_intra_mode = ps_proc->u1_c_i8_mode;
2028
2029 /* coded block pattern */
2030 UWORD8 u1_cbp_c = 0;
2031
2032 /* number of non zero coeffs*/
2033 UWORD8 au1_nnz[18] = {0};
2034
2035 /* quantization parameters */
2036 quant_params_t *ps_qp_params = ps_proc->ps_qp_params[1];
2037
2038 /* Control signal for inverse transform */
2039 UWORD32 u4_cntrl;
2040
2041 /* pointer to packed mb coeff data */
2042 void **pv_mb_coeff_data = &(ps_proc->pv_mb_coeff_data);
2043
2044 /* See if we need to swap U and V plances for entropy */
2045 UWORD32 u4_swap_uv = ps_codec->s_cfg.e_inp_color_fmt == IV_YUV_420SP_VU;
2046
2047 if (PLANE_CH_I8x8 == u1_intra_mode)
2048 {
2049 pu1_pred_mb = ps_proc->pu1_pred_mb_intra_chroma_plane;
2050 }
2051 else
2052 {
2053 pu1_pred_mb = ps_proc->pu1_pred_mb_intra_chroma;
2054 }
2055
2056 /********************************************************/
2057 /* error estimation, */
2058 /* transform */
2059 /* quantization */
2060 /********************************************************/
2061 ih264e_chroma_8x8_resi_trans_dctrans_quant(ps_codec, pu1_curr_mb,
2062 pu1_pred_mb, pi2_res_mb,
2063 i4_src_strd, i4_pred_strd,
2064 i4_res_strd,
2065 ps_qp_params->pu2_scale_mat,
2066 ps_qp_params->pu2_thres_mat,
2067 ps_qp_params->u1_qbits,
2068 ps_qp_params->u4_dead_zone,
2069 au1_nnz);
2070
2071 /********************************************************/
2072 /* pack coeff data for entropy coding */
2073 /********************************************************/
2074 ih264e_pack_c_mb(pi2_res_mb, pv_mb_coeff_data, i4_res_strd, &u1_cbp_c,
2075 au1_nnz, ps_codec->u4_thres_resi, &u4_cntrl, u4_swap_uv);
2076
2077 /********************************************************/
2078 /* ierror estimation, */
2079 /* itransform */
2080 /* iquantization */
2081 /********************************************************/
2082 ih264e_chroma_8x8_idctrans_iquant_itrans_recon(ps_codec, pi2_res_mb,
2083 pu1_pred_mb, pu1_ref_mb,
2084 i4_res_strd, i4_pred_strd,
2085 i4_rec_strd,
2086 ps_qp_params->pu2_iscale_mat,
2087 ps_qp_params->pu2_weigh_mat,
2088 ps_qp_params->u1_qp_div,
2089 u4_cntrl,
2090 ps_proc->pv_scratch_buff);
2091 return (u1_cbp_c);
2092}
2093
2094
2095/**
2096*******************************************************************************
2097*
2098* @brief performs luma core coding when mode is inter
2099*
2100* @par Description:
2101* If the current mb is to be coded as inter the mb is predicted based on the
2102* sub mb partitions and corresponding motion vectors generated by ME. Then,
2103* error is computed between the input blk and the estimated blk. This error is
2104* transformed, quantized. The quantized coefficients are packed in scan order
2105* for entropy coding
2106*
2107* @param[in] ps_proc_ctxt
2108* pointer to the current macro block context
2109*
2110* @returns u1_cbp_l
2111* coded block pattern luma
2112*
2113* @remarks none
2114*
2115*******************************************************************************
2116*/
2117
2118UWORD8 ih264e_code_luma_inter_macroblock_16x16(process_ctxt_t *ps_proc)
2119{
2120 /* Codec Context */
2121 codec_t *ps_codec = ps_proc->ps_codec;
2122
2123 /* pointer to ref macro block */
2124 UWORD8 *pu1_rec_mb = ps_proc->pu1_rec_buf_luma;
2125
2126 /* pointer to src macro block */
2127 UWORD8 *pu1_curr_mb = ps_proc->pu1_src_buf_luma;
2128
2129 /* pointer to prediction macro block */
2130 UWORD8 *pu1_pred_mb = ps_proc->pu1_pred_mb;
2131
2132 /* pointer to residual macro block */
2133 WORD16 *pi2_res_mb = ps_proc->pi2_res_buf;
2134
2135 /* strides */
2136 WORD32 i4_src_strd = ps_proc->i4_src_strd;
2137 WORD32 i4_rec_strd = ps_proc->i4_rec_strd;
2138 WORD32 i4_pred_strd = ps_proc->i4_pred_strd;
2139 WORD32 i4_res_strd = ps_proc->i4_res_strd;
2140
2141 /* coded block pattern */
2142 UWORD8 u1_cbp_l = 0;
2143
2144 /*Control signal of itrans*/
2145 UWORD32 u4_cntrl;
2146
2147 /* number of non zero coeffs*/
2148 UWORD8 *pu1_nnz = (UWORD8 *)ps_proc->au4_nnz;
2149
2150 /* quantization parameters */
2151 quant_params_t *ps_qp_params = ps_proc->ps_qp_params[0];
2152
2153 /* pointer to packed mb coeff data */
2154 void **pv_mb_coeff_data = &(ps_proc->pv_mb_coeff_data);
2155
2156 /* pseudo pred buffer */
2157 UWORD8 *pu1_pseudo_pred = pu1_pred_mb;
2158
2159 /* pseudo pred buffer stride */
2160 WORD32 i4_pseudo_pred_strd = i4_pred_strd;
2161
2162 /* init nnz */
2163 ps_proc->au4_nnz[0] = 0;
2164 ps_proc->au4_nnz[1] = 0;
2165 ps_proc->au4_nnz[2] = 0;
2166 ps_proc->au4_nnz[3] = 0;
2167 ps_proc->au4_nnz[4] = 0;
2168
2169 /********************************************************/
2170 /* prediction */
2171 /********************************************************/
2172 ih264e_motion_comp_luma(ps_proc, &pu1_pseudo_pred, &i4_pseudo_pred_strd);
2173
2174 /********************************************************/
2175 /* error estimation, */
2176 /* transform */
2177 /* quantization */
2178 /********************************************************/
2179 if (ps_proc->u4_min_sad_reached == 0 || ps_proc->u4_min_sad != 0)
2180 {
2181 ih264e_luma_16x16_resi_trans_dctrans_quant(ps_codec, pu1_curr_mb,
2182 pu1_pseudo_pred, pi2_res_mb,
2183 i4_src_strd,
2184 i4_pseudo_pred_strd,
2185 i4_res_strd,
2186 ps_qp_params->pu2_scale_mat,
2187 ps_qp_params->pu2_thres_mat,
2188 ps_qp_params->u1_qbits,
2189 ps_qp_params->u4_dead_zone,
2190 pu1_nnz,
2191 DISABLE_DC_TRANSFORM);
2192
2193 /********************************************************/
2194 /* pack coeff data for entropy coding */
2195 /********************************************************/
2196 ih264e_pack_l_mb(pi2_res_mb, pv_mb_coeff_data, i4_res_strd, &u1_cbp_l,
2197 pu1_nnz, ps_codec->u4_thres_resi, &u4_cntrl);
2198 }
2199 else
2200 {
2201 u1_cbp_l = 0;
2202 u4_cntrl = 0;
2203 }
2204
2205 /********************************************************/
2206 /* ierror estimation, */
2207 /* itransform */
2208 /* iquantization */
2209 /********************************************************/
2210
2211 /*If the frame is not to be used for P frame reference or dumping recon
2212 * we only will use the reocn for only predicting intra Mbs
2213 * THis will need only right and bottom edge 4x4 blocks recon
2214 * Hence we selectively enable them using control signal(including DC)
2215 */
2216 if (ps_proc->u4_compute_recon != 1)
2217 {
2218 u4_cntrl &= 0x111F0000;
2219 }
2220
2221 if (u4_cntrl)
2222 {
2223 ih264e_luma_16x16_idctrans_iquant_itrans_recon(
2224 ps_codec, pi2_res_mb, pu1_pseudo_pred, pu1_rec_mb,
2225 i4_res_strd, i4_pseudo_pred_strd, i4_rec_strd,
2226 ps_qp_params->pu2_iscale_mat,
2227 ps_qp_params->pu2_weigh_mat, ps_qp_params->u1_qp_div,
2228 u4_cntrl /*Cntrl*/, DISABLE_DC_TRANSFORM,
2229 ps_proc->pv_scratch_buff);
2230 }
2231 else
2232 {
2233 ps_codec->pf_inter_pred_luma_copy(pu1_pseudo_pred, pu1_rec_mb,
2234 i4_pseudo_pred_strd, i4_rec_strd,
2235 MB_SIZE, MB_SIZE, NULL, 0);
2236 }
2237
2238
2239 return (u1_cbp_l);
2240}
2241
2242/**
2243*******************************************************************************
2244*
2245* @brief performs chroma core coding for inter macro blocks
2246*
2247* @par Description:
2248* If the current mb is to be coded as inter predicted mb,based on the sub mb partitions
2249* and corresponding motion vectors generated by ME ,prediction is done.
2250* Then, error is computed between the input blk and the estimated blk.
2251* This error is transformed , quantized. The quantized coefficients
2252* are packed in scan order for
2253* entropy coding.
2254*
2255* @param[in] ps_proc_ctxt
2256* pointer to the current macro block context
2257*
2258* @returns u1_cbp_l
2259* coded block pattern chroma
2260*
2261* @remarks none
2262*
2263*******************************************************************************
2264*/
2265UWORD8 ih264e_code_chroma_inter_macroblock_8x8(process_ctxt_t *ps_proc)
2266{
2267 /* Codec Context */
2268 codec_t *ps_codec = ps_proc->ps_codec;
2269
2270 /* pointer to ref macro block */
2271 UWORD8 *pu1_rec_mb = ps_proc->pu1_rec_buf_chroma;
2272
2273 /* pointer to src macro block */
2274 UWORD8 *pu1_curr_mb = ps_proc->pu1_src_buf_chroma;
2275
2276 /* pointer to prediction macro block */
2277 UWORD8 *pu1_pred_mb = ps_proc->pu1_pred_mb;
2278
2279 /* pointer to residual macro block */
2280 WORD16 *pi2_res_mb = ps_proc->pi2_res_buf;
2281
2282 /* strides */
2283 WORD32 i4_src_strd = ps_proc->i4_src_strd;
2284 WORD32 i4_rec_strd = ps_proc->i4_rec_strd;
2285 WORD32 i4_pred_strd = ps_proc->i4_pred_strd;
2286 WORD32 i4_res_strd = ps_proc->i4_res_strd;
2287
2288 /* coded block pattern */
2289 UWORD8 u1_cbp_c = 0;
2290
2291 /*Control signal for inverse transform*/
2292 UWORD32 u4_cntrl;
2293
2294 /* number of non zero coeffs*/
2295 UWORD8 au1_nnz[10] = {0};
2296
2297 /* quantization parameters */
2298 quant_params_t *ps_qp_params = ps_proc->ps_qp_params[1];
2299
2300 /* pointer to packed mb coeff data */
2301 void **pv_mb_coeff_data = &(ps_proc->pv_mb_coeff_data);
2302
2303 /*See if we need to swap U and V plances for entropy*/
2304 UWORD32 u4_swap_uv = ps_codec->s_cfg.e_inp_color_fmt == IV_YUV_420SP_VU;
2305
2306 /********************************************************/
2307 /* prediction */
2308 /********************************************************/
2309 ih264e_motion_comp_chroma(ps_proc);
2310
2311 /********************************************************/
2312 /* error estimation, */
2313 /* transform */
2314 /* quantization */
2315 /********************************************************/
2316 ih264e_chroma_8x8_resi_trans_dctrans_quant(ps_codec, pu1_curr_mb,
2317 pu1_pred_mb, pi2_res_mb,
2318 i4_src_strd, i4_pred_strd,
2319 i4_res_strd,
2320 ps_qp_params->pu2_scale_mat,
2321 ps_qp_params->pu2_thres_mat,
2322 ps_qp_params->u1_qbits,
2323 ps_qp_params->u4_dead_zone,
2324 au1_nnz);
2325
2326 /********************************************************/
2327 /* pack coeff data for entropy coding */
2328 /********************************************************/
2329 ih264e_pack_c_mb(pi2_res_mb, pv_mb_coeff_data, i4_res_strd, &u1_cbp_c,
2330 au1_nnz, ps_codec->u4_thres_resi, &u4_cntrl, u4_swap_uv);
2331
2332 /********************************************************/
2333 /* ierror estimation, */
2334 /* itransform */
2335 /* iquantization */
2336 /********************************************************/
2337
2338 /* If the frame is not to be used for P frame reference or dumping recon
2339 * we only will use the reocn for only predicting intra Mbs
2340 * THis will need only right and bottom edge 4x4 blocks recon
2341 * Hence we selectively enable them using control signal(including DC)
2342 */
2343 if (!ps_proc->u4_compute_recon)
2344 {
2345 u4_cntrl &= 0x7700C000;
2346 }
2347
2348 if (u4_cntrl)
2349 {
2350 ih264e_chroma_8x8_idctrans_iquant_itrans_recon(
2351 ps_codec, pi2_res_mb, pu1_pred_mb, pu1_rec_mb,
2352 i4_res_strd, i4_pred_strd, i4_rec_strd,
2353 ps_qp_params->pu2_iscale_mat,
2354 ps_qp_params->pu2_weigh_mat, ps_qp_params->u1_qp_div,
2355 u4_cntrl, ps_proc->pv_scratch_buff);
2356 }
2357 else
2358 {
2359 ps_codec->pf_inter_pred_luma_copy(pu1_pred_mb, pu1_rec_mb, i4_pred_strd,
2360 i4_rec_strd, MB_SIZE >> 1, MB_SIZE,
2361 NULL, 0);
2362 }
2363
2364 return (u1_cbp_c);
2365}