| /****************************************************************************** |
| * |
| * Copyright (C) 2015 The Android Open Source Project |
| * |
| * Licensed under the Apache License, Version 2.0 (the "License"); |
| * you may not use this file except in compliance with the License. |
| * You may obtain a copy of the License at: |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| * |
| ***************************************************************************** |
| * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore |
| */ |
| |
| /** |
| ******************************************************************************* |
| * @file |
| * ih264e_core_coding.c |
| * |
| * @brief |
| * This file contains routines that perform luma and chroma core coding for |
| * intra macroblocks |
| * |
| * @author |
| * ittiam |
| * |
| * @par List of Functions: |
| * - ih264e_pack_l_mb_i16() |
| * - ih264e_pack_c_mb_i8() |
| * - ih264e_code_luma_intra_macroblock_16x16() |
| * - ih264e_code_luma_intra_macroblock_4x4() |
| * - ih264e_code_chroma_intra_macroblock_8x8() |
| * |
| * @remarks |
| * None |
| * |
| ******************************************************************************* |
| */ |
| |
| /*****************************************************************************/ |
| /* File Includes */ |
| /*****************************************************************************/ |
| |
| /* System include files */ |
| #include <stdio.h> |
| #include <string.h> |
| #include <assert.h> |
| |
| /* User include files */ |
| #include "ih264e_config.h" |
| #include "ih264_typedefs.h" |
| #include "ih264_platform_macros.h" |
| #include "iv2.h" |
| #include "ive2.h" |
| #include "ih264_macros.h" |
| #include "ih264_defs.h" |
| #include "ih264e_defs.h" |
| #include "ih264_trans_data.h" |
| #include "ih264e_error.h" |
| #include "ih264e_bitstream.h" |
| #include "ime_distortion_metrics.h" |
| #include "ime_defs.h" |
| #include "ime_structs.h" |
| #include "ih264_structs.h" |
| #include "ih264_trans_quant_itrans_iquant.h" |
| #include "ih264_inter_pred_filters.h" |
| #include "ih264_mem_fns.h" |
| #include "ih264_padding.h" |
| #include "ih264_intra_pred_filters.h" |
| #include "ih264_deblk_edge_filters.h" |
| #include "ih264_cabac_tables.h" |
| #include "irc_cntrl_param.h" |
| #include "irc_frame_info_collector.h" |
| #include "ih264e_rate_control.h" |
| #include "ih264e_cabac_structs.h" |
| #include "ih264e_structs.h" |
| #include "ih264e_globals.h" |
| #include "ih264e_core_coding.h" |
| #include "ih264e_mc.h" |
| |
| |
| /*****************************************************************************/ |
| /* Function Definitions */ |
| /*****************************************************************************/ |
| |
| /** |
| ******************************************************************************* |
| * |
| * @brief |
| * This function performs does the DCT transform then Hadamard transform |
| * and quantization for a macroblock when the mb mode is intra 16x16 mode |
| * |
| * @par Description: |
| * First cf4 is done on all 16 4x4 blocks of the 16x16 input block. |
| * Then hadamard transform is done on the DC coefficients |
| * Quantization is then performed on the 16x16 block, 4x4 wise |
| * |
| * @param[in] pu1_src |
| * Pointer to source sub-block |
| * |
| * @param[in] pu1_pred |
| * Pointer to prediction sub-block |
| * |
| * @param[in] pi2_out |
| * Pointer to residual sub-block |
| * The output will be in linear format |
| * The first 16 continuous locations will contain the values of Dc block |
| * After DC block and a stride 1st AC block will follow |
| * After one more stride next AC block will follow |
| * The blocks will be in raster scan order |
| * |
| * @param[in] src_strd |
| * Source stride |
| * |
| * @param[in] pred_strd |
| * Prediction stride |
| * |
| * @param[in] dst_strd |
| * Destination stride |
| * |
| * @param[in] pu2_scale_matrix |
| * The quantization matrix for 4x4 transform |
| * |
| * @param[in] pu2_threshold_matrix |
| * Threshold matrix |
| * |
| * @param[in] u4_qbits |
| * 15+QP/6 |
| * |
| * @param[in] u4_round_factor |
| * Round factor for quant |
| * |
| * @param[out] pu1_nnz |
| * Memory to store the non-zeros after transform |
| * The first byte will be the nnz of DC block |
| * From the next byte the AC nnzs will be stored in raster scan order |
| * |
| * @param u4_dc_flag |
| * Signals if Dc transform is to be done or not |
| * 1 -> Dc transform will be done |
| * 0 -> Dc transform will not be done |
| * |
| * @remarks |
| * |
| ******************************************************************************* |
| */ |
| void ih264e_luma_16x16_resi_trans_dctrans_quant(codec_t *ps_codec, |
| UWORD8 *pu1_src, |
| UWORD8 *pu1_pred, |
| WORD16 *pi2_out, |
| WORD32 src_strd, |
| WORD32 pred_strd, |
| WORD32 dst_strd, |
| const UWORD16 *pu2_scale_matrix, |
| const UWORD16 *pu2_threshold_matrix, |
| UWORD32 u4_qbits, |
| UWORD32 u4_round_factor, |
| UWORD8 *pu1_nnz, |
| UWORD32 u4_dc_flag) |
| |
| { |
| WORD32 blk_cntr; |
| WORD32 i4_offsetx, i4_offsety; |
| UWORD8 *pu1_curr_src, *pu1_curr_pred; |
| |
| WORD16 *pi2_dc_str = pi2_out; |
| |
| /* Move to the ac addresses */ |
| pu1_nnz++; |
| pi2_out += dst_strd; |
| |
| for (blk_cntr = 0; blk_cntr < NUM_LUMA4x4_BLOCKS_IN_MB; blk_cntr++) |
| { |
| IND2SUB_LUMA_MB(blk_cntr, i4_offsetx, i4_offsety); |
| |
| pu1_curr_src = pu1_src + i4_offsetx + i4_offsety * src_strd; |
| pu1_curr_pred = pu1_pred + i4_offsetx + i4_offsety * pred_strd; |
| |
| ps_codec->pf_resi_trans_quant_4x4(pu1_curr_src, pu1_curr_pred, |
| pi2_out + blk_cntr * dst_strd, |
| src_strd, pred_strd, pu2_scale_matrix, |
| pu2_threshold_matrix, u4_qbits, |
| u4_round_factor, &pu1_nnz[blk_cntr], |
| &pi2_dc_str[blk_cntr]); |
| |
| } |
| |
| if (!u4_dc_flag) |
| return; |
| |
| /* |
| * In case of i16x16, we need to remove the contribution of dc coeffs into |
| * nnz of each block. We are doing that in the packing function |
| */ |
| |
| /* Adjust pointers to point to dc values */ |
| pi2_out -= dst_strd; |
| pu1_nnz--; |
| |
| u4_qbits++; |
| u4_round_factor <<= 1; |
| |
| ps_codec->pf_hadamard_quant_4x4(pi2_dc_str, pi2_out, pu2_scale_matrix, |
| pu2_threshold_matrix, u4_qbits, |
| u4_round_factor, &pu1_nnz[0]); |
| } |
| |
| /** |
| ******************************************************************************* |
| * |
| * @brief |
| * This function performs the intra 16x16 inverse transform process for H264 |
| * it includes inverse Dc transform, inverse quant and then inverse transform |
| * |
| * @par Description: |
| * |
| * @param[in] pi2_src |
| * Input data, 16x16 size |
| * First 16 mem locations will have the Dc coffs in rater scan order in linear fashion |
| * after a stride 1st AC clock will be present again in raster can order |
| * Then each AC block of the 16x16 block will follow in raster scan order |
| * |
| * @param[in] pu1_pred |
| * The predicted data, 16x16 size |
| * Block by block form |
| * |
| * @param[in] pu1_out |
| * Output 16x16 |
| * In block by block form |
| * |
| * @param[in] src_strd |
| * Source stride |
| * |
| * @param[in] pred_strd |
| * input stride for prediction buffer |
| * |
| * @param[in] out_strd |
| * input stride for output buffer |
| * |
| * @param[in] pu2_iscale_mat |
| * Inverse quantization matrix for 4x4 transform |
| * |
| * @param[in] pu2_weigh_mat |
| * weight matrix of 4x4 transform |
| * |
| * @param[in] qp_div |
| * QP/6 |
| * |
| * @param[in] pi4_tmp |
| * Input temporary buffer |
| * needs to be at least 20 in size |
| * |
| * @param[in] pu4_cntrl |
| * Controls the transform path |
| * total Last 17 bits are used |
| * the 16th th bit will correspond to DC block |
| * and 32-17 will correspond to the ac blocks in raster scan order |
| * bit equaling zero indicates that the entire 4x4 block is zero for DC |
| * For AC blocks a bit equaling zero will mean that all 15 AC coffs of the block is nonzero |
| * |
| * @param[in] pi4_tmp |
| * Input temporary buffer |
| * needs to be at least COFF_CNT_SUB_BLK_4x4+COFF_CNT_SUB_BLK_4x4 size |
| * |
| * @returns |
| * none |
| * |
| * @remarks |
| * The all zero case must be taken care outside |
| * |
| ******************************************************************************* |
| */ |
| void ih264e_luma_16x16_idctrans_iquant_itrans_recon(codec_t *ps_codec, |
| WORD16 *pi2_src, |
| UWORD8 *pu1_pred, |
| UWORD8 *pu1_out, |
| WORD32 src_strd, |
| WORD32 pred_strd, |
| WORD32 out_strd, |
| const UWORD16 *pu2_iscale_mat, |
| const UWORD16 *pu2_weigh_mat, |
| UWORD32 qp_div, |
| UWORD32 u4_cntrl, |
| UWORD32 u4_dc_trans_flag, |
| WORD32 *pi4_tmp) |
| { |
| /* Start index for inverse quant in a 4x4 block */ |
| WORD32 iq_start_idx = (u4_dc_trans_flag == 0) ? 0 : 1; |
| |
| /* Cntrl bits for 4x4 transforms |
| * u4_blk_cntrl : controls if a 4x4 block should be processed in ac path |
| * u4_dc_cntrl : controls is a 4x4 block is to be processed in dc path |
| * : dc block must contain only single dc coefficient |
| * u4_empty_blk_cntrl : control fot 4x4 block with no coeffs, ie no dc and ac |
| * : ie not (ac or dc) |
| */ |
| UWORD32 u4_blk_cntrl, u4_dc_cntrl, u4_empty_blk_cntrl; |
| |
| /* tmp registers for block ids */ |
| UWORD32 u4_blk_id; |
| |
| /* Subscrripts */ |
| WORD32 i4_offset_x, i4_offset_y; |
| |
| UWORD8 *pu1_cur_prd_blk, *pu1_cur_out_blk; |
| |
| /* Src and stride for dc coeffs */ |
| UWORD32 u4_dc_inc; |
| WORD16 *pi2_dc_src; |
| |
| /* |
| * For intra blocks we need to do inverse dc transform |
| * In case if intra blocks, its here that we populate the dc bits in cntrl |
| * as they cannot be populated any earlier |
| */ |
| if (u4_dc_trans_flag) |
| { |
| UWORD32 cntr, u4_dc_cntrl; |
| /* Do inv hadamard and place the results at the start of each AC block */ |
| ps_codec->pf_ihadamard_scaling_4x4(pi2_src, pi2_src, pu2_iscale_mat, |
| pu2_weigh_mat, qp_div, pi4_tmp); |
| |
| /* Update the cntrl flag */ |
| u4_dc_cntrl = 0; |
| for (cntr = 0; cntr < DC_COEFF_CNT_LUMA_MB; cntr++) |
| { |
| u4_dc_cntrl |= ((pi2_src[cntr] != 0) << (15 - cntr)); |
| } |
| /* Mark dc bits as 1 if corresponding ac bit is 0 */ |
| u4_dc_cntrl = (~(u4_cntrl >> 16) & u4_dc_cntrl); |
| /* Combine both ac and dc bits */ |
| u4_cntrl = (u4_cntrl & CNTRL_FLAG_AC_MASK_LUMA) |
| | (u4_dc_cntrl & CNTRL_FLAG_DC_MASK_LUMA); |
| } |
| |
| /* Source for dc coeffs |
| * If the block is intra, we have to read dc values from first row of src |
| * then stride for each block is 1, other wise its src stride |
| */ |
| pi2_dc_src = (iq_start_idx == 0) ? (pi2_src + src_strd) : pi2_src; |
| u4_dc_inc = (iq_start_idx == 0) ? src_strd : 1; |
| |
| /* The AC blocks starts from 2nd row */ |
| pi2_src += src_strd; |
| |
| /* Get the block bits */ |
| u4_blk_cntrl = (u4_cntrl & CNTRL_FLAG_AC_MASK_LUMA); |
| u4_dc_cntrl = (u4_cntrl & CNTRL_FLAG_DC_MASK_LUMA) << 16; |
| u4_empty_blk_cntrl = (~(u4_dc_cntrl | u4_blk_cntrl)) & 0xFFFF0000; |
| |
| /* Get first block to process */ |
| DEQUEUE_BLKID_FROM_CONTROL(u4_dc_cntrl, u4_blk_id); |
| while (u4_blk_id < NUM_LUMA4x4_BLOCKS_IN_MB) |
| { |
| /* Compute address of src blocks */ |
| WORD32 i4_src_offset = u4_dc_inc * u4_blk_id; |
| |
| IND2SUB_LUMA_MB(u4_blk_id, i4_offset_x, i4_offset_y); |
| |
| /* Compute address of out and pred blocks */ |
| pu1_cur_prd_blk = pu1_pred + i4_offset_x + i4_offset_y * pred_strd; |
| pu1_cur_out_blk = pu1_out + i4_offset_x + i4_offset_y * out_strd; |
| |
| /* Do inv dc transform */ |
| ps_codec->pf_iquant_itrans_recon_4x4_dc(pi2_dc_src + i4_src_offset, |
| pu1_cur_prd_blk, |
| pu1_cur_out_blk, pred_strd, |
| out_strd, pu2_iscale_mat, |
| pu2_weigh_mat, qp_div, NULL, |
| iq_start_idx, |
| pi2_dc_src + i4_src_offset); |
| /* Get next DC block to process */ |
| DEQUEUE_BLKID_FROM_CONTROL(u4_dc_cntrl, u4_blk_id); |
| } |
| |
| /* now process ac/mixed blocks */ |
| DEQUEUE_BLKID_FROM_CONTROL(u4_blk_cntrl, u4_blk_id); |
| while (u4_blk_id < NUM_LUMA4x4_BLOCKS_IN_MB) |
| { |
| |
| WORD32 i4_src_offset = src_strd * u4_blk_id; |
| |
| IND2SUB_LUMA_MB(u4_blk_id, i4_offset_x, i4_offset_y); |
| |
| pu1_cur_prd_blk = pu1_pred + i4_offset_x + i4_offset_y * pred_strd; |
| pu1_cur_out_blk = pu1_out + i4_offset_x + i4_offset_y * out_strd; |
| |
| ps_codec->pf_iquant_itrans_recon_4x4(pi2_src + i4_src_offset, |
| pu1_cur_prd_blk, pu1_cur_out_blk, |
| pred_strd, out_strd, |
| pu2_iscale_mat, pu2_weigh_mat, |
| qp_div, (WORD16*) pi4_tmp, |
| iq_start_idx, |
| pi2_dc_src + u4_blk_id); |
| |
| DEQUEUE_BLKID_FROM_CONTROL(u4_blk_cntrl, u4_blk_id); |
| } |
| |
| /* Now process empty blocks */ |
| DEQUEUE_BLKID_FROM_CONTROL(u4_empty_blk_cntrl, u4_blk_id); |
| while (u4_blk_id < NUM_LUMA4x4_BLOCKS_IN_MB) |
| { |
| IND2SUB_LUMA_MB(u4_blk_id, i4_offset_x, i4_offset_y); |
| |
| pu1_cur_prd_blk = pu1_pred + i4_offset_x + i4_offset_y * pred_strd; |
| pu1_cur_out_blk = pu1_out + i4_offset_x + i4_offset_y * out_strd; |
| |
| ps_codec->pf_inter_pred_luma_copy(pu1_cur_prd_blk, pu1_cur_out_blk, |
| pred_strd, out_strd, SIZE_4X4_BLK_HRZ, |
| SIZE_4X4_BLK_VERT, 0, 0); |
| |
| DEQUEUE_BLKID_FROM_CONTROL(u4_empty_blk_cntrl, u4_blk_id); |
| } |
| } |
| |
| /** |
| ******************************************************************************* |
| * |
| * @brief |
| * This function performs does the DCT transform then Hadamard transform |
| * and quantization for a chroma macroblock |
| * |
| * @par Description: |
| * First cf4 is done on all 16 4x4 blocks of the 8x8input block |
| * Then hadamard transform is done on the DC coefficients |
| * Quantization is then performed on the 8x8 block, 4x4 wise |
| * |
| * @param[in] pu1_src |
| * Pointer to source sub-block |
| * The input is in interleaved format for two chroma planes |
| * |
| * @param[in] pu1_pred |
| * Pointer to prediction sub-block |
| * Prediction is in inter leaved format |
| * |
| * @param[in] pi2_out |
| * Pointer to residual sub-block |
| * The output will be in linear format |
| * The first 4 continuous locations will contain the values of DC block for U |
| * and then next 4 will contain for V. |
| * After DC block and a stride 1st AC block of U plane will follow |
| * After one more stride next AC block of V plane will follow |
| * The blocks will be in raster scan order |
| * |
| * After all the AC blocks of U plane AC blocks of V plane will follow in exact |
| * same way |
| * |
| * @param[in] src_strd |
| * Source stride |
| * |
| * @param[in] pred_strd |
| * Prediction stride |
| * |
| * @param[in] dst_strd |
| * Destination stride |
| * |
| * @param[in] pu2_scale_matrix |
| * The quantization matrix for 4x4 transform |
| * |
| * @param[in] pu2_threshold_matrix |
| * Threshold matrix |
| * |
| * @param[in] u4_qbits |
| * 15+QP/6 |
| * |
| * @param[in] u4_round_factor |
| * Round factor for quant |
| * |
| * @param[out] pu1_nnz |
| * Memory to store the non-zeros after transform |
| * The first byte will be the nnz od DC block for U plane |
| * From the next byte the AC nnzs will be storerd in raster scan order |
| * The fifth byte will be nnz of Dc block of V plane |
| * Then Ac blocks will follow |
| * |
| * @param u4_dc_flag |
| * Signals if Dc transform is to be done or not |
| * 1 -> Dc transform will be done |
| * 0 -> Dc transform will not be done |
| * |
| * @remarks |
| * |
| ******************************************************************************* |
| */ |
| void ih264e_chroma_8x8_resi_trans_dctrans_quant(codec_t *ps_codec, |
| UWORD8 *pu1_src, |
| UWORD8 *pu1_pred, |
| WORD16 *pi2_out, |
| WORD32 src_strd, |
| WORD32 pred_strd, |
| WORD32 out_strd, |
| const UWORD16 *pu2_scale_matrix, |
| const UWORD16 *pu2_threshold_matrix, |
| UWORD32 u4_qbits, |
| UWORD32 u4_round_factor, |
| UWORD8 *pu1_nnz_c) |
| { |
| WORD32 blk_cntr; |
| WORD32 i4_offsetx, i4_offsety; |
| UWORD8 *pu1_curr_src, *pu1_curr_pred; |
| |
| WORD16 pi2_dc_str[8]; |
| UWORD8 au1_dcnnz[2]; |
| |
| /* Move to the ac addresses */ |
| pu1_nnz_c++; |
| pi2_out += out_strd; |
| |
| for (blk_cntr = 0; blk_cntr < NUM_CHROMA4x4_BLOCKS_IN_MB; blk_cntr++) |
| { |
| IND2SUB_CHROMA_MB(blk_cntr, i4_offsetx, i4_offsety); |
| |
| pu1_curr_src = pu1_src + i4_offsetx + i4_offsety * src_strd; |
| pu1_curr_pred = pu1_pred + i4_offsetx + i4_offsety * pred_strd; |
| |
| /* For chroma, v plane nnz is populated from position 5 */ |
| ps_codec->pf_resi_trans_quant_chroma_4x4( |
| pu1_curr_src, pu1_curr_pred, |
| pi2_out + blk_cntr * out_strd, src_strd, pred_strd, |
| pu2_scale_matrix, pu2_threshold_matrix, u4_qbits, |
| u4_round_factor, &pu1_nnz_c[blk_cntr + (blk_cntr > 3)], |
| &pi2_dc_str[blk_cntr]); |
| } |
| |
| /* Adjust pointers to point to dc values */ |
| pi2_out -= out_strd; |
| pu1_nnz_c--; |
| |
| u4_qbits++; |
| u4_round_factor <<= 1; |
| |
| ps_codec->pf_hadamard_quant_2x2_uv(pi2_dc_str, pi2_out, pu2_scale_matrix, |
| pu2_threshold_matrix, u4_qbits, |
| u4_round_factor, au1_dcnnz); |
| |
| /* Copy the dc nnzs */ |
| pu1_nnz_c[0] = au1_dcnnz[0]; |
| pu1_nnz_c[5] = au1_dcnnz[1]; |
| |
| } |
| |
| /** |
| ******************************************************************************* |
| * @brief |
| * This function performs the inverse transform with process for chroma MB of H264 |
| * |
| * @par Description: |
| * Does inverse DC transform ,inverse quantization inverse transform |
| * |
| * @param[in] pi2_src |
| * Input data, 16x16 size |
| * The input is in the form of, first 4 locations will contain DC coeffs of |
| * U plane, next 4 will contain DC coeffs of V plane, then AC blocks of U plane |
| * in raster scan order will follow, each block as linear array in raster scan order. |
| * After a stride next AC block will follow. After all AC blocks of U plane |
| * V plane AC blocks will follow in exact same order. |
| * |
| * @param[in] pu1_pred |
| * The predicted data, 8x16 size, U and V interleaved |
| * |
| * @param[in] pu1_out |
| * Output 8x16, U and V interleaved |
| * |
| * @param[in] src_strd |
| * Source stride |
| * |
| * @param[in] pred_strd |
| * input stride for prediction buffer |
| * |
| * @param[in] out_strd |
| * input stride for output buffer |
| * |
| * @param[in] pu2_iscale_mat |
| * Inverse quantization martix for 4x4 transform |
| * |
| * @param[in] pu2_weigh_mat |
| * weight matrix of 4x4 transform |
| * |
| * @param[in] qp_div |
| * QP/6 |
| * |
| * @param[in] pi4_tmp |
| * Input temporary buffer |
| * needs to be at least COFF_CNT_SUB_BLK_4x4 + Number of Dc cofss for chroma * number of planes |
| * in size |
| * |
| * @param[in] pu4_cntrl |
| * Controls the transform path |
| * the 15 th bit will correspond to DC block of U plane , 14th will indicate the V plane Dc block |
| * 32-28 bits will indicate AC blocks of U plane in raster scan order |
| * 27-23 bits will indicate AC blocks of V plane in rater scan order |
| * The bit 1 implies that there is at least one non zero coeff in a block |
| * |
| * @returns |
| * none |
| * |
| * @remarks |
| ******************************************************************************* |
| */ |
| void ih264e_chroma_8x8_idctrans_iquant_itrans_recon(codec_t *ps_codec, |
| WORD16 *pi2_src, |
| UWORD8 *pu1_pred, |
| UWORD8 *pu1_out, |
| WORD32 src_strd, |
| WORD32 pred_strd, |
| WORD32 out_strd, |
| const UWORD16 *pu2_iscale_mat, |
| const UWORD16 *pu2_weigh_mat, |
| UWORD32 qp_div, |
| UWORD32 u4_cntrl, |
| WORD32 *pi4_tmp) |
| { |
| /* Cntrl bits for 4x4 transforms |
| * u4_blk_cntrl : controls if a 4x4 block should be processed in ac path |
| * u4_dc_cntrl : controls is a 4x4 block is to be processed in dc path |
| * : dc block must contain only single dc coefficient |
| * u4_empty_blk_cntrl : control fot 4x4 block with no coeffs, ie no dc and ac |
| * : ie not (ac or dc) |
| */ |
| |
| UWORD32 u4_blk_cntrl, u4_dc_cntrl, u4_empty_blk_cntrl; |
| |
| /* tmp registers for block ids */ |
| WORD32 u4_blk_id; |
| |
| /* Offsets for pointers */ |
| WORD32 i4_offset_x, i4_offset_y; |
| |
| /* Pointer to 4x4 blocks */ |
| UWORD8 *pu1_cur_4x4_prd_blk, *pu1_cur_4x4_out_blk; |
| |
| /* Tmp register for pointer to dc coffs */ |
| WORD16 *pi2_dc_src; |
| |
| WORD16 i2_zero = 0; |
| |
| /* Increment for dc block */ |
| WORD32 i4_dc_inc; |
| |
| /* |
| * Lets do the inverse transform for dc coeffs in chroma |
| */ |
| if (u4_cntrl & CNTRL_FLAG_DCBLK_MASK_CHROMA) |
| { |
| UWORD32 cntr, u4_dc_cntrl; |
| /* Do inv hadamard for u an v block */ |
| |
| ps_codec->pf_ihadamard_scaling_2x2_uv(pi2_src, pi2_src, pu2_iscale_mat, |
| pu2_weigh_mat, qp_div, NULL); |
| /* |
| * Update the cntrl flag |
| * Flag is updated as follows bits 15-11 -> u block dc bits |
| */ |
| u4_dc_cntrl = 0; |
| for (cntr = 0; cntr < 8; cntr++) |
| { |
| u4_dc_cntrl |= ((pi2_src[cntr] != 0) << (15 - cntr)); |
| } |
| |
| /* Mark dc bits as 1 if corresponding ac bit is 0 */ |
| u4_dc_cntrl = (~(u4_cntrl >> 16) & u4_dc_cntrl); |
| /* Combine both ac and dc bits */ |
| u4_cntrl = (u4_cntrl & CNTRL_FLAG_AC_MASK_CHROMA) |
| | (u4_dc_cntrl & CNTRL_FLAG_DC_MASK_CHROMA); |
| |
| /* Since we populated the dc coffs, we have to read them from there */ |
| pi2_dc_src = pi2_src; |
| i4_dc_inc = 1; |
| } |
| else |
| { |
| u4_cntrl = u4_cntrl & CNTRL_FLAG_AC_MASK_CHROMA; |
| pi2_dc_src = &i2_zero; |
| i4_dc_inc = 0; |
| } |
| |
| /* Get the block bits */ |
| u4_blk_cntrl = (u4_cntrl & CNTRL_FLAG_AC_MASK_CHROMA); |
| u4_dc_cntrl = (u4_cntrl & CNTRL_FLAG_DC_MASK_CHROMA) << 16; |
| u4_empty_blk_cntrl = (~(u4_dc_cntrl | u4_blk_cntrl)) & 0xFF000000; |
| |
| /* The AC blocks starts from 2nd row */ |
| pi2_src += src_strd; |
| |
| DEQUEUE_BLKID_FROM_CONTROL(u4_dc_cntrl, u4_blk_id); |
| while (u4_blk_id < 8) |
| { |
| WORD32 dc_src_offset = u4_blk_id * i4_dc_inc; |
| |
| IND2SUB_CHROMA_MB(u4_blk_id, i4_offset_x, i4_offset_y); |
| |
| pu1_cur_4x4_prd_blk = pu1_pred + i4_offset_x + i4_offset_y * pred_strd; |
| pu1_cur_4x4_out_blk = pu1_out + i4_offset_x + i4_offset_y * out_strd; |
| |
| ps_codec->pf_iquant_itrans_recon_chroma_4x4_dc( |
| pi2_dc_src + dc_src_offset, pu1_cur_4x4_prd_blk, |
| pu1_cur_4x4_out_blk, pred_strd, out_strd, NULL, NULL, 0, |
| NULL, pi2_dc_src + dc_src_offset); |
| /* Get next DC block to process */ |
| DEQUEUE_BLKID_FROM_CONTROL(u4_dc_cntrl, u4_blk_id); |
| } |
| |
| /* now process ac/mixed blocks */ |
| DEQUEUE_BLKID_FROM_CONTROL(u4_blk_cntrl, u4_blk_id); |
| while (u4_blk_id < 8) |
| { |
| WORD32 i4_src_offset = src_strd * u4_blk_id; |
| WORD32 dc_src_offset = i4_dc_inc * u4_blk_id; |
| |
| IND2SUB_CHROMA_MB(u4_blk_id, i4_offset_x, i4_offset_y); |
| |
| pu1_cur_4x4_prd_blk = pu1_pred + i4_offset_x + i4_offset_y * pred_strd; |
| pu1_cur_4x4_out_blk = pu1_out + i4_offset_x + i4_offset_y * out_strd; |
| |
| ps_codec->pf_iquant_itrans_recon_chroma_4x4(pi2_src + i4_src_offset, |
| pu1_cur_4x4_prd_blk, |
| pu1_cur_4x4_out_blk, |
| pred_strd, out_strd, |
| pu2_iscale_mat, |
| pu2_weigh_mat, qp_div, |
| (WORD16 *) pi4_tmp, |
| pi2_dc_src + dc_src_offset); |
| |
| DEQUEUE_BLKID_FROM_CONTROL(u4_blk_cntrl, u4_blk_id); |
| } |
| |
| /* Now process empty blocks */ |
| DEQUEUE_BLKID_FROM_CONTROL(u4_empty_blk_cntrl, u4_blk_id); |
| while (u4_blk_id < 8) |
| { |
| IND2SUB_CHROMA_MB(u4_blk_id, i4_offset_x, i4_offset_y); |
| |
| pu1_cur_4x4_prd_blk = pu1_pred + i4_offset_x + i4_offset_y * pred_strd; |
| pu1_cur_4x4_out_blk = pu1_out + i4_offset_x + i4_offset_y * out_strd; |
| |
| ps_codec->pf_interleave_copy(pu1_cur_4x4_prd_blk, pu1_cur_4x4_out_blk, |
| pred_strd, out_strd, SIZE_4X4_BLK_VERT, |
| SIZE_4X4_BLK_HRZ); |
| |
| DEQUEUE_BLKID_FROM_CONTROL(u4_empty_blk_cntrl, u4_blk_id); |
| } |
| } |
| |
| /** |
| ****************************************************************************** |
| * |
| * @brief This function packs residue of an i16x16 luma mb for entropy coding |
| * |
| * @par Description |
| * An i16 macro block contains two classes of units, dc 4x4 block and |
| * 4x4 ac blocks. while packing the mb, the dc block is sent first, and |
| * the 16 ac blocks are sent next in scan order. Each and every block is |
| * represented by 3 parameters (nnz, significant coefficient map and the |
| * residue coefficients itself). If a 4x4 unit does not have any coefficients |
| * then only nnz is sent. Inside a 4x4 block the individual coefficients are |
| * sent in scan order. |
| * |
| * The first byte of each block will be nnz of the block, if it is non zero, |
| * a 2 byte significance map is sent. This is followed by nonzero coefficients. |
| * This is repeated for 1 dc + 16 ac blocks. |
| * |
| * @param[in] pi2_res_mb |
| * pointer to residue mb |
| * |
| * @param[in, out] pv_mb_coeff_data |
| * buffer pointing to packed residue coefficients |
| * |
| * @param[in] u4_res_strd |
| * residual block stride |
| * |
| * @param[out] u1_cbp_l |
| * coded block pattern luma |
| * |
| * @param[in] pu1_nnz |
| * number of non zero coefficients in each 4x4 unit |
| * |
| * @param[out] |
| * Control signal for inverse transform of 16x16 blocks |
| * |
| * @return none |
| * |
| * @ remarks |
| * |
| ****************************************************************************** |
| */ |
| void ih264e_pack_l_mb_i16(WORD16 *pi2_res_mb, |
| void **pv_mb_coeff_data, |
| WORD32 i4_res_strd, |
| UWORD8 *u1_cbp_l, |
| UWORD8 *pu1_nnz, |
| UWORD32 *pu4_cntrl) |
| { |
| /* pointer to packed sub block buffer space */ |
| tu_sblk_coeff_data_t *ps_mb_coeff_data = (*pv_mb_coeff_data), *ps_mb_coeff_data_ac; |
| |
| /* no of non zero coefficients in the current sub block */ |
| UWORD32 u4_nnz_cnt; |
| |
| /* significant coefficient map */ |
| UWORD32 u4_s_map; |
| |
| /* pointer to scanning matrix */ |
| const UWORD8 *pu1_scan_order; |
| |
| /* number of non zeros in sub block */ |
| UWORD32 u4_nnz; |
| |
| /* coeff scan order */ |
| const UWORD8 u1_scan_order[16] = {0, 1, 4, 5, 2, 3, 6, 7, 8, 9, 12, 13, 10, 11, 14, 15}; |
| |
| /* temp var */ |
| UWORD32 coeff_cnt, mask, b4,u4_cntrl=0; |
| |
| /*DC and AC coeff pointers*/ |
| WORD16 *pi2_res_mb_ac,*pi2_res_mb_dc; |
| |
| /********************************************************/ |
| /* pack dc coeff data for entropy coding */ |
| /********************************************************/ |
| |
| pi2_res_mb_dc = pi2_res_mb; |
| pu1_scan_order = gu1_luma_scan_order_dc; |
| |
| u4_nnz = *pu1_nnz; |
| u4_cntrl = 0; |
| |
| /* write number of non zero coefficients */ |
| ps_mb_coeff_data->i4_sig_map_nnz = u4_nnz; |
| |
| if (u4_nnz) |
| { |
| for (u4_nnz_cnt = 0, coeff_cnt = 0, mask = 1, u4_s_map = 0; u4_nnz_cnt < u4_nnz; coeff_cnt++) |
| { |
| if (pi2_res_mb_dc[pu1_scan_order[coeff_cnt]]) |
| { |
| /* write residue */ |
| ps_mb_coeff_data->ai2_residue[u4_nnz_cnt++] = pi2_res_mb_dc[pu1_scan_order[coeff_cnt]]; |
| u4_s_map |= mask; |
| } |
| mask <<= 1; |
| } |
| /* write significant coeff map */ |
| ps_mb_coeff_data->i4_sig_map_nnz |= (u4_s_map << 16); |
| (*pv_mb_coeff_data) = ps_mb_coeff_data->ai2_residue + ALIGN2(u4_nnz_cnt); |
| |
| u4_cntrl = 0x00008000;// Set DC bit in ctrl code |
| } |
| else |
| { |
| (*pv_mb_coeff_data) = ps_mb_coeff_data->ai2_residue; |
| } |
| |
| /********************************************************/ |
| /* pack ac coeff data for entropy coding */ |
| /********************************************************/ |
| |
| pu1_nnz ++; |
| pu1_scan_order = gu1_luma_scan_order; |
| pi2_res_mb += i4_res_strd; /*Move to AC block*/ |
| |
| ps_mb_coeff_data_ac = (*pv_mb_coeff_data); |
| |
| for (b4 = 0; b4 < 16; b4++) |
| { |
| ps_mb_coeff_data = (*pv_mb_coeff_data); |
| |
| u4_nnz = pu1_nnz[u1_scan_order[b4]]; |
| |
| /* Jump according to the scan order */ |
| pi2_res_mb_ac = pi2_res_mb + (i4_res_strd * u1_scan_order[b4]); |
| |
| /* |
| * Since this is a i16x16 block, we should not count dc coeff on indi |
| * vidual 4x4 blocks to nnz. But due to the implementation of 16x16 |
| * trans function, we add dc's nnz to u4_nnz too. Hence we adjust that |
| * here |
| */ |
| u4_nnz -= (pi2_res_mb_ac[0] != 0); |
| |
| /* write number of non zero coefficients */ |
| ps_mb_coeff_data->i4_sig_map_nnz = u4_nnz; |
| |
| if (u4_nnz) |
| { |
| for (u4_nnz_cnt = 0, coeff_cnt = 1, mask = 1, u4_s_map = 0; u4_nnz_cnt < u4_nnz; coeff_cnt++) |
| { |
| if (pi2_res_mb_ac[pu1_scan_order[coeff_cnt]]) |
| { |
| /* write residue */ |
| ps_mb_coeff_data->ai2_residue[u4_nnz_cnt++] = pi2_res_mb_ac[pu1_scan_order[coeff_cnt]]; |
| u4_s_map |= mask; |
| } |
| mask <<= 1; |
| } |
| /* write significant coeff map */ |
| ps_mb_coeff_data->i4_sig_map_nnz |= (u4_s_map << 16); |
| (*pv_mb_coeff_data) = ps_mb_coeff_data->ai2_residue + ALIGN2(u4_nnz_cnt); |
| *u1_cbp_l = 15; |
| |
| u4_cntrl |= (1 << (31 - u1_scan_order[b4])); |
| } |
| else |
| { |
| (*pv_mb_coeff_data) = ps_mb_coeff_data->ai2_residue; |
| } |
| |
| } |
| |
| if (!(*u1_cbp_l)) |
| { |
| (*pv_mb_coeff_data) = ps_mb_coeff_data_ac; |
| } |
| |
| /* Store the cntrl signal */ |
| (*pu4_cntrl) = u4_cntrl; |
| return; |
| } |
| |
| /** |
| ****************************************************************************** |
| * |
| * @brief This function packs residue of an p16x16 luma mb for entropy coding |
| * |
| * @par Description |
| * A p16x16 macro block contains two classes of units 16 4x4 ac blocks. |
| * while packing the mb, the dc block is sent first, and |
| * the 16 ac blocks are sent next in scan order. Each and every block is |
| * represented by 3 parameters (nnz, significant coefficient map and the |
| * residue coefficients itself). If a 4x4 unit does not have any coefficients |
| * then only nnz is sent. Inside a 4x4 block the individual coefficients are |
| * sent in scan order. |
| * |
| * The first byte of each block will be nnz of the block, if it is non zero, |
| * a 2 byte significance map is sent. This is followed by nonzero coefficients. |
| * This is repeated for 1 dc + 16 ac blocks. |
| * |
| * @param[in] pi2_res_mb |
| * pointer to residue mb |
| * |
| * @param[in, out] pv_mb_coeff_data |
| * buffer pointing to packed residue coefficients |
| * |
| * @param[in] i4_res_strd |
| * residual block stride |
| * |
| * @param[out] u1_cbp_l |
| * coded block pattern luma |
| * |
| * @param[in] pu1_nnz |
| * number of non zero coefficients in each 4x4 unit |
| * |
| * @param[out] pu4_cntrl |
| * Control signal for inverse transform |
| * |
| * @return none |
| * |
| * @remarks Killing coffs not yet coded |
| * |
| ****************************************************************************** |
| */ |
| void ih264e_pack_l_mb(WORD16 *pi2_res_mb, |
| void **pv_mb_coeff_data, |
| WORD32 i4_res_strd, |
| UWORD8 *u1_cbp_l, |
| UWORD8 *pu1_nnz, |
| UWORD32 u4_thres_resi, |
| UWORD32 *pu4_cntrl) |
| { |
| /* pointer to packed sub block buffer space */ |
| tu_sblk_coeff_data_t *ps_mb_coeff_data, *ps_mb_coeff_data_b8, *ps_mb_coeff_data_mb; |
| |
| /* no of non zero coefficients in the current sub block */ |
| UWORD32 u4_nnz_cnt; |
| |
| /* significant coefficient map */ |
| UWORD32 u4_s_map; |
| |
| /* pointer to scanning matrix */ |
| const UWORD8 *pu1_scan_order = gu1_luma_scan_order; |
| |
| /* number of non zeros in sub block */ |
| UWORD32 u4_nnz; |
| |
| /* pointer to residual sub block */ |
| WORD16 *pi2_res_sb; |
| |
| /* coeff scan order */ |
| const UWORD8 u1_scan_order[16] = {0, 1, 4, 5, 2, 3, 6, 7, 8, 9, 12, 13, 10, 11, 14, 15}; |
| |
| /* coeff cost */ |
| const UWORD8 *pu1_coeff_cost = gu1_coeff_cost; |
| |
| /* temp var */ |
| UWORD32 u4_mb_coeff_cost = 0, u4_b8_coeff_cost = 0, coeff_cnt, mask, u4_cntrl = 0, b4, b8; |
| |
| /* temp var */ |
| WORD32 i4_res_val, i4_run = -1, dcac_block; |
| |
| /* When Hadamard transform is disabled, first row values are dont care, ignore them */ |
| pi2_res_mb += i4_res_strd; |
| |
| /* When Hadamard transform is disabled, first unit value is dont care, ignore this */ |
| pu1_nnz ++; |
| |
| ps_mb_coeff_data_mb = ps_mb_coeff_data_b8 = (*pv_mb_coeff_data); |
| |
| /********************************************************/ |
| /* pack coeff data for entropy coding */ |
| /********************************************************/ |
| |
| for (b4 = 0; b4 < 16; b4++) |
| { |
| ps_mb_coeff_data = (*pv_mb_coeff_data); |
| |
| b8 = b4 >> 2; |
| |
| u4_nnz = pu1_nnz[u1_scan_order[b4]]; |
| |
| /* Jump according to the scan order */ |
| pi2_res_sb = pi2_res_mb + (i4_res_strd * u1_scan_order[b4]); |
| |
| /* write number of non zero coefficients */ |
| ps_mb_coeff_data->i4_sig_map_nnz = u4_nnz; |
| |
| if (u4_nnz) |
| { |
| for (u4_nnz_cnt = 0, coeff_cnt = 0, mask = 1, u4_s_map = 0; u4_nnz_cnt < u4_nnz; coeff_cnt++) |
| { |
| /* number of runs of zero before, this is used to compute coeff cost */ |
| i4_run++; |
| |
| i4_res_val = pi2_res_sb[pu1_scan_order[coeff_cnt]]; |
| |
| if (i4_res_val) |
| { |
| /* write residue */ |
| ps_mb_coeff_data->ai2_residue[u4_nnz_cnt++] = i4_res_val; |
| u4_s_map |= mask; |
| |
| if (u4_thres_resi) |
| { |
| /* compute coeff cost */ |
| if (i4_res_val == 1 || i4_res_val == -1) |
| { |
| if (i4_run < 6) |
| u4_b8_coeff_cost += pu1_coeff_cost[i4_run]; |
| } |
| else |
| u4_b8_coeff_cost += 9; |
| |
| i4_run = -1; |
| } |
| } |
| |
| mask <<= 1; |
| } |
| |
| /* write significant coeff map */ |
| ps_mb_coeff_data->i4_sig_map_nnz |= (u4_s_map << 16); |
| (*pv_mb_coeff_data) = ps_mb_coeff_data->ai2_residue + ALIGN2(u4_nnz_cnt); |
| |
| /* cbp */ |
| *u1_cbp_l |= (1 << b8); |
| |
| /* Cntrl map for inverse transform computation |
| * |
| * If coeff_cnt is zero, it means that only nonzero was a dc coeff |
| * Hence we have to set the 16 - u1_scan_order[b4]) position instead |
| * of 31 - u1_scan_order[b4] |
| */ |
| dcac_block = (coeff_cnt == 0)?16:31; |
| u4_cntrl |= (1 << (dcac_block - u1_scan_order[b4])); |
| } |
| else |
| { |
| (*pv_mb_coeff_data) = ps_mb_coeff_data->ai2_residue; |
| } |
| |
| /* Decide if the 8x8 unit has to be sent for entropy coding? */ |
| if ((b4+1) % 4 == 0) |
| { |
| if ( u4_thres_resi && (u4_b8_coeff_cost <= LUMA_SUB_BLOCK_SKIP_THRESHOLD) && |
| (*u1_cbp_l & (1 << b8)) ) |
| { |
| |
| |
| /* |
| * When we want to reset the full 8x8 block, we have to reset |
| * both the dc and ac coeff bits hence we have the symmetric |
| * arrangement of bits |
| */ |
| const UWORD32 cntrl_mask_map[4] = {0xcc00cc00, 0x33003300, 0x00cc00cc, 0x00330033}; |
| |
| /* restore cbp */ |
| *u1_cbp_l = (*u1_cbp_l & (~(1 << b8))); |
| |
| /* correct cntrl flag */ |
| u4_cntrl = u4_cntrl & (~cntrl_mask_map[(b4 >> 2)]); |
| |
| /* correct nnz */ |
| pu1_nnz[u1_scan_order[b4 - 3]] = 0; |
| pu1_nnz[u1_scan_order[b4 - 2]] = 0; |
| pu1_nnz[u1_scan_order[b4 - 1]] = 0; |
| pu1_nnz[u1_scan_order[b4]] = 0; |
| |
| /* reset blk cost */ |
| u4_b8_coeff_cost = 0; |
| } |
| |
| if (!(*u1_cbp_l & (1 << b8))) |
| { |
| (*pv_mb_coeff_data) = ps_mb_coeff_data_b8; |
| } |
| |
| u4_mb_coeff_cost += u4_b8_coeff_cost; |
| |
| u4_b8_coeff_cost = 0; |
| i4_run = -1; |
| ps_mb_coeff_data_b8 = (*pv_mb_coeff_data); |
| } |
| } |
| |
| if (u4_thres_resi && (u4_mb_coeff_cost <= LUMA_BLOCK_SKIP_THRESHOLD) |
| && (*u1_cbp_l)) |
| { |
| (*pv_mb_coeff_data) = ps_mb_coeff_data_mb; |
| *u1_cbp_l = 0; |
| u4_cntrl = 0; |
| memset(pu1_nnz, 0, 16); |
| } |
| |
| (*pu4_cntrl) = u4_cntrl; |
| |
| return; |
| } |
| |
| /** |
| ****************************************************************************** |
| * |
| * @brief This function packs residue of an i8x8 chroma mb for entropy coding |
| * |
| * @par Description |
| * An i8 chroma macro block contains two classes of units, dc 2x2 block and |
| * 4x4 ac blocks. while packing the mb, the dc block is sent first, and |
| * the 4 ac blocks are sent next in scan order. Each and every block is |
| * represented by 3 parameters (nnz, significant coefficient map and the |
| * residue coefficients itself). If a 4x4 unit does not have any coefficients |
| * then only nnz is sent. Inside a 4x4 block the individual coefficients are |
| * sent in scan order. |
| * |
| * The first byte of each block will be nnz of the block, if it is non zero, |
| * a 2 byte significance map is sent. This is followed by nonzero coefficients. |
| * This is repeated for 1 dc + 4 ac blocks. |
| * |
| * @param[in] pi2_res_mb |
| * pointer to residue mb |
| * |
| * @param[in, out] pv_mb_coeff_data |
| * buffer pointing to packed residue coefficients |
| * |
| * @param[in] u4_res_strd |
| * residual block stride |
| * |
| * @param[out] u1_cbp_c |
| * coded block pattern chroma |
| * |
| * @param[in] pu1_nnz |
| * number of non zero coefficients in each 4x4 unit |
| * |
| * @param[out] pu1_nnz |
| * Control signal for inverse transform |
| * |
| * @param[in] u4_swap_uv |
| * Swaps the order of U and V planes in entropy bitstream |
| * |
| * @return none |
| * |
| * @ remarks |
| * |
| ****************************************************************************** |
| */ |
| void ih264e_pack_c_mb(WORD16 *pi2_res_mb, |
| void **pv_mb_coeff_data, |
| WORD32 i4_res_strd, |
| UWORD8 *u1_cbp_c, |
| UWORD8 *pu1_nnz, |
| UWORD32 u4_thres_resi, |
| UWORD32 *pu4_cntrl, |
| UWORD32 u4_swap_uv) |
| { |
| /* pointer to packed sub block buffer space */ |
| tu_sblk_coeff_data_t *ps_mb_coeff_data = (*pv_mb_coeff_data); |
| tu_sblk_coeff_data_t *ps_mb_coeff_data_dc, *ps_mb_coeff_data_ac; |
| |
| /* nnz pointer */ |
| UWORD8 *pu1_nnz_ac, *pu1_nnz_dc; |
| |
| /* nnz counter */ |
| UWORD32 u4_nnz_cnt; |
| |
| /* significant coefficient map */ |
| UWORD32 u4_s_map; |
| |
| /* pointer to scanning matrix */ |
| const UWORD8 *pu1_scan_order; |
| |
| /* no of non zero coefficients in the current sub block */ |
| UWORD32 u4_nnz; |
| |
| /* pointer to residual sub block, res val */ |
| WORD16 *pi2_res_sb, i2_res_val; |
| |
| /* temp var */ |
| UWORD32 coeff_cnt, mask, b4,plane; |
| |
| /* temp var */ |
| UWORD32 u4_coeff_cost; |
| WORD32 i4_run; |
| |
| /* coeff cost */ |
| const UWORD8 *pu1_coeff_cost = gu1_coeff_cost; |
| |
| /* pointer to packed buffer space */ |
| UWORD32 *pu4_mb_coeff_data = NULL; |
| |
| /* ac coded block pattern */ |
| UWORD8 u1_cbp_ac; |
| |
| /* Variable to store the current bit pos in cntrl variable*/ |
| UWORD32 cntrl_pos = 0; |
| |
| /********************************************************/ |
| /* pack dc coeff data for entropy coding */ |
| /********************************************************/ |
| pu1_scan_order = gu1_chroma_scan_order_dc; |
| pi2_res_sb = pi2_res_mb; |
| pu1_nnz_dc = pu1_nnz; |
| (*pu4_cntrl) = 0; |
| cntrl_pos = 15; |
| ps_mb_coeff_data_dc = (*pv_mb_coeff_data); |
| |
| /* Color space conversion between SP_UV and SP_VU |
| * We always assume SP_UV for all the processing |
| * Hence to get proper stream output we need to swap U and V channels here |
| * |
| * For that there are two paths we need to look for |
| * One is the path to bitstream , these variables should have the proper input |
| * configured UV or VU |
| * For the other path the inverse transform variables should have what ever ordering the |
| * input had |
| */ |
| |
| if (u4_swap_uv) |
| { |
| pu1_nnz_dc += 5;/* Move to NNZ of V planve */ |
| pi2_res_sb += 4;/* Move to DC coff of V plane */ |
| |
| cntrl_pos = 14; /* Control bit for V plane */ |
| } |
| |
| for (plane = 0; plane < 2; plane++) |
| { |
| ps_mb_coeff_data = (*pv_mb_coeff_data); |
| |
| u4_nnz = *pu1_nnz_dc; |
| /* write number of non zero coefficients U/V */ |
| ps_mb_coeff_data->i4_sig_map_nnz = u4_nnz; |
| |
| if (u4_nnz) |
| { |
| for (u4_nnz_cnt = 0, coeff_cnt = 0, mask = 1, u4_s_map = 0; u4_nnz_cnt < u4_nnz; coeff_cnt++) |
| { |
| i2_res_val = pi2_res_sb[pu1_scan_order[coeff_cnt]]; |
| if (i2_res_val) |
| { |
| /* write residue U/V */ |
| ps_mb_coeff_data->ai2_residue[u4_nnz_cnt++] = i2_res_val; |
| u4_s_map |= mask; |
| } |
| mask <<= 1; |
| } |
| /* write significant coeff map U/V */ |
| ps_mb_coeff_data->i4_sig_map_nnz |= (u4_s_map << 16); |
| (*pv_mb_coeff_data) = ps_mb_coeff_data->ai2_residue + ALIGN2(u4_nnz_cnt); |
| *u1_cbp_c = 1; |
| |
| (*pu4_cntrl) |= (1 << cntrl_pos); |
| } |
| else |
| { |
| (*pv_mb_coeff_data) = ps_mb_coeff_data->ai2_residue; |
| } |
| |
| if (u4_swap_uv) |
| { |
| cntrl_pos++; /* Control bit for U plane */ |
| pu1_nnz_dc -= 5; /* Move to NNZ of U plane */ |
| pi2_res_sb -= 4; /* Move to DC coff of U plane */ |
| |
| } |
| else |
| { |
| cntrl_pos--; /* Control bit for U plane */ |
| pu1_nnz_dc += 5; /* 4 for AC NNZ and 1 for DC */ |
| pi2_res_sb += 4; /* Move to DC coff of V plane */ |
| } |
| } |
| |
| /********************************************************/ |
| /* pack ac coeff data for entropy coding */ |
| /********************************************************/ |
| |
| pu1_scan_order = gu1_chroma_scan_order; |
| ps_mb_coeff_data_ac = (*pv_mb_coeff_data); |
| |
| if (u4_swap_uv) |
| { |
| pi2_res_sb = pi2_res_mb + i4_res_strd * 5; /* Move to V plane ,ie 1dc row+ 4 ac row */ |
| cntrl_pos = 27; /* The control bits are to be added for V bloc ie 31-4 th bit */ |
| pu1_nnz_ac = pu1_nnz + 6;/*Move the nnz to V block NNZ 1 dc + 1dc + 4 ac */ |
| } |
| else |
| { |
| pi2_res_sb = pi2_res_mb + i4_res_strd; /* Move to U plane ,ie 1dc row */ |
| cntrl_pos = 31; |
| pu1_nnz_ac = pu1_nnz + 1; /* Move the nnz to V block NNZ 1 dc */ |
| } |
| |
| for (plane = 0; plane < 2; plane++) |
| { |
| pu4_mb_coeff_data = (*pv_mb_coeff_data); |
| |
| u4_coeff_cost = 0; |
| i4_run = -1; |
| |
| /* get the current cbp, so that it automatically |
| * gets reverted in case of zero ac values */ |
| u1_cbp_ac = *u1_cbp_c; |
| |
| for (b4 = 0; b4 < 4; b4++) |
| { |
| ps_mb_coeff_data = (*pv_mb_coeff_data); |
| |
| u4_nnz = *pu1_nnz_ac; |
| |
| /* |
| * We are scanning only ac coeffs, but the nnz is for the |
| * complete 4x4 block. Hence we have to discount the nnz contributed |
| * by the dc coefficient |
| */ |
| u4_nnz -= (pi2_res_sb[0]!=0); |
| |
| /* write number of non zero coefficients U/V */ |
| ps_mb_coeff_data->i4_sig_map_nnz = u4_nnz; |
| |
| if (u4_nnz) |
| { |
| for (u4_nnz_cnt = 0, coeff_cnt = 0, mask = 1, u4_s_map = 0; u4_nnz_cnt < u4_nnz; coeff_cnt++) |
| { |
| i2_res_val = pi2_res_sb[pu1_scan_order[coeff_cnt]]; |
| |
| i4_run++; |
| |
| if (i2_res_val) |
| { |
| /* write residue U/V */ |
| ps_mb_coeff_data->ai2_residue[u4_nnz_cnt++] = i2_res_val; |
| u4_s_map |= mask; |
| |
| if ( u4_thres_resi && (u4_coeff_cost < CHROMA_BLOCK_SKIP_THRESHOLD) ) |
| { |
| /* compute coeff cost */ |
| if (i2_res_val == 1 || i2_res_val == -1) |
| { |
| if (i4_run < 6) |
| u4_coeff_cost += pu1_coeff_cost[i4_run]; |
| } |
| else |
| u4_coeff_cost += 9; |
| |
| i4_run = -1; |
| } |
| } |
| mask <<= 1; |
| } |
| |
| /* write significant coeff map U/V */ |
| ps_mb_coeff_data->i4_sig_map_nnz |= (u4_s_map << 16); |
| (*pv_mb_coeff_data) = ps_mb_coeff_data->ai2_residue + ALIGN2(u4_nnz_cnt); |
| u1_cbp_ac = 2; |
| |
| (*pu4_cntrl) |= 1 << cntrl_pos; |
| } |
| else |
| { |
| (*pv_mb_coeff_data) = ps_mb_coeff_data->ai2_residue; |
| } |
| |
| pu1_nnz_ac++; |
| pi2_res_sb += i4_res_strd; |
| cntrl_pos--; |
| } |
| |
| /* reset block */ |
| if (u4_thres_resi && (u4_coeff_cost < CHROMA_BLOCK_SKIP_THRESHOLD)) |
| { |
| pu4_mb_coeff_data[0] = 0; |
| pu4_mb_coeff_data[1] = 0; |
| pu4_mb_coeff_data[2] = 0; |
| pu4_mb_coeff_data[3] = 0; |
| (*pv_mb_coeff_data) = pu4_mb_coeff_data + 4; |
| |
| /* Generate the control signal */ |
| /* Zero out the current plane's AC coefficients */ |
| (*pu4_cntrl) &= ((plane == u4_swap_uv) ? 0x0FFFFFFF : 0xF0FFFFFF); |
| |
| /* Similarly do for the NNZ also */ |
| *(pu1_nnz_ac - 4) = 0; |
| *(pu1_nnz_ac - 3) = 0; |
| *(pu1_nnz_ac - 2) = 0; |
| *(pu1_nnz_ac - 1) = 0; |
| } |
| else |
| { |
| *u1_cbp_c = u1_cbp_ac; |
| } |
| |
| if (u4_swap_uv) |
| { |
| pi2_res_sb = pi2_res_mb + i4_res_strd; /* Move to V plane ,ie 1dc row+ 4 ac row + 1 dc row */ |
| cntrl_pos = 31; /* The control bits are to be added for V bloc ie 31-4 th bit */ |
| pu1_nnz_ac = pu1_nnz + 1; /* Move the nnz to V block NNZ 1 dc + 1dc + 4 ac */ |
| |
| pu1_nnz_ac = pu1_nnz + 1; |
| } |
| else |
| pu1_nnz_ac = pu1_nnz + 6; /* Go to nnz of V plane */ |
| } |
| |
| /* restore the ptr basing on cbp */ |
| if (*u1_cbp_c == 0) |
| { |
| (*pv_mb_coeff_data) = ps_mb_coeff_data_dc; |
| } |
| else if (*u1_cbp_c == 1) |
| { |
| (*pv_mb_coeff_data) = ps_mb_coeff_data_ac; |
| } |
| |
| return ; |
| } |
| |
| /** |
| ******************************************************************************* |
| * |
| * @brief performs luma core coding when intra mode is i16x16 |
| * |
| * @par Description: |
| * If the current mb is to be coded as intra of mb type i16x16, the mb is first |
| * predicted using one of i16x16 prediction filters, basing on the intra mode |
| * chosen. Then, error is computed between the input blk and the estimated blk. |
| * This error is transformed (hierarchical transform i.e., dct followed by hada- |
| * -mard), quantized. The quantized coefficients are packed in scan order for |
| * entropy coding. |
| * |
| * @param[in] ps_proc_ctxt |
| * pointer to the current macro block context |
| * |
| * @returns u1_cbp_l |
| * coded block pattern luma |
| * |
| * @remarks none |
| * |
| ******************************************************************************* |
| */ |
| |
| UWORD8 ih264e_code_luma_intra_macroblock_16x16(process_ctxt_t *ps_proc) |
| { |
| /* Codec Context */ |
| codec_t *ps_codec = ps_proc->ps_codec; |
| |
| /* pointer to ref macro block */ |
| UWORD8 *pu1_ref_mb = ps_proc->pu1_rec_buf_luma; |
| |
| /* pointer to src macro block */ |
| UWORD8 *pu1_curr_mb = ps_proc->pu1_src_buf_luma; |
| |
| /* pointer to prediction macro block */ |
| UWORD8 *pu1_pred_mb = NULL; |
| |
| /* pointer to residual macro block */ |
| WORD16 *pi2_res_mb = ps_proc->pi2_res_buf; |
| |
| /* strides */ |
| WORD32 i4_src_strd = ps_proc->i4_src_strd; |
| WORD32 i4_rec_strd = ps_proc->i4_rec_strd; |
| WORD32 i4_pred_strd = ps_proc->i4_pred_strd; |
| WORD32 i4_res_strd = ps_proc->i4_res_strd; |
| |
| /* intra mode */ |
| UWORD8 u1_intra_mode = ps_proc->u1_l_i16_mode; |
| |
| /* coded block pattern */ |
| UWORD8 u1_cbp_l = 0; |
| |
| /* number of non zero coeffs*/ |
| UWORD32 au4_nnz[5]; |
| UWORD8 *pu1_nnz = (UWORD8 *)au4_nnz; |
| |
| /*Cntrol signal for itrans*/ |
| UWORD32 u4_cntrl; |
| |
| /* quantization parameters */ |
| quant_params_t *ps_qp_params = ps_proc->ps_qp_params[0]; |
| |
| /* pointer to packed mb coeff data */ |
| void **pv_mb_coeff_data = &(ps_proc->pv_mb_coeff_data); |
| |
| /* init nnz */ |
| au4_nnz[0] = 0; |
| au4_nnz[1] = 0; |
| au4_nnz[2] = 0; |
| au4_nnz[3] = 0; |
| au4_nnz[4] = 0; |
| |
| if (u1_intra_mode == PLANE_I16x16) |
| { |
| pu1_pred_mb = ps_proc->pu1_pred_mb_intra_16x16_plane; |
| } |
| else |
| { |
| pu1_pred_mb = ps_proc->pu1_pred_mb_intra_16x16; |
| } |
| |
| /********************************************************/ |
| /* error estimation, */ |
| /* transform */ |
| /* quantization */ |
| /********************************************************/ |
| ih264e_luma_16x16_resi_trans_dctrans_quant(ps_codec, pu1_curr_mb, |
| pu1_pred_mb, pi2_res_mb, |
| i4_src_strd, i4_pred_strd, |
| i4_res_strd, |
| ps_qp_params->pu2_scale_mat, |
| ps_qp_params->pu2_thres_mat, |
| ps_qp_params->u1_qbits, |
| ps_qp_params->u4_dead_zone, |
| pu1_nnz, ENABLE_DC_TRANSFORM); |
| |
| /********************************************************/ |
| /* pack coeff data for entropy coding */ |
| /********************************************************/ |
| ih264e_pack_l_mb_i16(pi2_res_mb, pv_mb_coeff_data, i4_res_strd, &u1_cbp_l, |
| pu1_nnz, &u4_cntrl); |
| |
| /********************************************************/ |
| /* ierror estimation, */ |
| /* itransform */ |
| /* iquantization */ |
| /********************************************************/ |
| /* |
| *if refernce frame is not to be computed |
| *we only need the right and bottom border 4x4 blocks to predict next intra |
| *blocks, hence only compute them |
| */ |
| if (!ps_proc->u4_compute_recon) |
| { |
| u4_cntrl &= 0x111F8000; |
| } |
| |
| if (u4_cntrl) |
| { |
| ih264e_luma_16x16_idctrans_iquant_itrans_recon( |
| ps_codec, pi2_res_mb, pu1_pred_mb, pu1_ref_mb, |
| i4_res_strd, i4_pred_strd, i4_rec_strd, |
| ps_qp_params->pu2_iscale_mat, |
| ps_qp_params->pu2_weigh_mat, ps_qp_params->u1_qp_div, |
| u4_cntrl, ENABLE_DC_TRANSFORM, |
| ps_proc->pv_scratch_buff); |
| } |
| else |
| { |
| ps_codec->pf_inter_pred_luma_copy(pu1_pred_mb, pu1_ref_mb, i4_pred_strd, |
| i4_rec_strd, MB_SIZE, MB_SIZE, NULL, |
| 0); |
| } |
| |
| return (u1_cbp_l); |
| } |
| |
| |
| /** |
| ******************************************************************************* |
| * |
| * @brief performs luma core coding when intra mode is i4x4 |
| * |
| * @par Description: |
| * If the current mb is to be coded as intra of mb type i4x4, the mb is first |
| * predicted using one of i4x4 prediction filters, basing on the intra mode |
| * chosen. Then, error is computed between the input blk and the estimated blk. |
| * This error is dct transformed and quantized. The quantized coefficients are |
| * packed in scan order for entropy coding. |
| * |
| * @param[in] ps_proc_ctxt |
| * pointer to the current macro block context |
| * |
| * @returns u1_cbp_l |
| * coded block pattern luma |
| * |
| * @remarks |
| * The traversal of 4x4 subblocks in the 16x16 macroblock is as per the scan order |
| * mentioned in h.264 specification |
| * |
| ******************************************************************************* |
| */ |
| UWORD8 ih264e_code_luma_intra_macroblock_4x4(process_ctxt_t *ps_proc) |
| { |
| /* Codec Context */ |
| codec_t *ps_codec = ps_proc->ps_codec; |
| |
| /* pointer to ref macro block */ |
| UWORD8 *pu1_ref_mb = ps_proc->pu1_rec_buf_luma; |
| |
| /* pointer to src macro block */ |
| UWORD8 *pu1_curr_mb = ps_proc->pu1_src_buf_luma; |
| |
| /* pointer to prediction macro block */ |
| UWORD8 *pu1_pred_mb = ps_proc->pu1_pred_mb; |
| |
| /* pointer to residual macro block */ |
| WORD16 *pi2_res_mb = ps_proc->pi2_res_buf; |
| |
| /* strides */ |
| WORD32 i4_src_strd = ps_proc->i4_src_strd; |
| WORD32 i4_rec_strd = ps_proc->i4_rec_strd; |
| WORD32 i4_pred_strd = ps_proc->i4_pred_strd; |
| |
| /* pointer to neighbors: left, top, top-left */ |
| UWORD8 *pu1_mb_a; |
| UWORD8 *pu1_mb_b; |
| UWORD8 *pu1_mb_c; |
| UWORD8 *pu1_mb_d; |
| |
| /* intra mode */ |
| UWORD8 u1_intra_mode = ps_proc->u1_l_i16_mode; |
| |
| /* neighbor availability */ |
| WORD32 i4_ngbr_avbl; |
| |
| /* neighbor pels for intra prediction */ |
| UWORD8 *pu1_ngbr_pels_i4 = ps_proc->au1_ngbr_pels; |
| |
| /* coded block pattern */ |
| UWORD8 u1_cbp_l = 0; |
| |
| /* number of non zero coeffs*/ |
| UWORD8 u1_nnz; |
| |
| /* quantization parameters */ |
| quant_params_t *ps_qp_params = ps_proc->ps_qp_params[0]; |
| |
| /* pointer to packed mb coeff data */ |
| void **pv_mb_coeff_data = &(ps_proc->pv_mb_coeff_data); |
| |
| /* pointer to packed mb coeff data */ |
| tu_sblk_coeff_data_t *ps_mb_coeff_data, *ps_mb_coeff_data_b8; |
| |
| /* no of non zero coefficients in the current sub block */ |
| UWORD32 u4_nnz_cnt; |
| |
| /* significant coefficient map */ |
| UWORD32 u4_s_map; |
| |
| /* pointer to scanning matrix */ |
| const UWORD8 *pu1_scan_order = gu1_luma_scan_order; |
| |
| /*Dummy variable for 4x4 trans fucntion*/ |
| WORD16 i2_dc_dummy; |
| |
| /* temp var */ |
| UWORD32 i, b8, b4, u1_blk_x, u1_blk_y, u1_pix_x, u1_pix_y, coeff_cnt, mask; |
| |
| /* Process 16 4x4 lum sub-blocks of the MB in scan order */ |
| for (b8 = 0; b8 < 4; b8++) |
| { |
| u1_blk_x = GET_BLK_RASTER_POS_X(b8) << 3; |
| u1_blk_y = GET_BLK_RASTER_POS_Y(b8) << 3; |
| |
| /* if in case cbp for the 8x8 block is zero, send no residue */ |
| ps_mb_coeff_data_b8 = *pv_mb_coeff_data; |
| |
| for (b4 = 0; b4 < 4; b4++) |
| { |
| /* index of pel in MB */ |
| u1_pix_x = u1_blk_x + (GET_SUB_BLK_RASTER_POS_X(b4) << 2); |
| u1_pix_y = u1_blk_y + (GET_SUB_BLK_RASTER_POS_Y(b4) << 2); |
| |
| /* Initialize source and reference pointers */ |
| pu1_curr_mb = ps_proc->pu1_src_buf_luma + u1_pix_x + (u1_pix_y * i4_src_strd); |
| pu1_ref_mb = ps_proc->pu1_rec_buf_luma + u1_pix_x + (u1_pix_y * i4_rec_strd); |
| |
| /* pointer to left of ref macro block */ |
| pu1_mb_a = pu1_ref_mb - 1; |
| /* pointer to top of ref macro block */ |
| pu1_mb_b = pu1_ref_mb - i4_rec_strd; |
| /* pointer to topright of ref macro block */ |
| pu1_mb_c = pu1_mb_b + 4; |
| /* pointer to topleft macro block */ |
| pu1_mb_d = pu1_mb_b - 1; |
| |
| /* compute neighbor availability */ |
| i4_ngbr_avbl = ps_proc->au1_ngbr_avbl_4x4_subblks[(b8 << 2) + b4]; |
| |
| /* sub block intra mode */ |
| u1_intra_mode = ps_proc->au1_intra_luma_mb_4x4_modes[(b8 << 2) + b4]; |
| |
| /********************************************************/ |
| /* gather prediction pels from neighbors for prediction */ |
| /********************************************************/ |
| /* left pels */ |
| if (i4_ngbr_avbl & LEFT_MB_AVAILABLE_MASK) |
| { |
| for (i = 0; i < 4; i++) |
| pu1_ngbr_pels_i4[4 - 1 - i] = pu1_mb_a[i * i4_rec_strd]; |
| } |
| else |
| { |
| memset(pu1_ngbr_pels_i4, 0, 4); |
| } |
| |
| /* top pels */ |
| if (i4_ngbr_avbl & TOP_MB_AVAILABLE_MASK) |
| { |
| memcpy(pu1_ngbr_pels_i4 + 4 + 1, pu1_mb_b, 4); |
| } |
| else |
| { |
| memset(pu1_ngbr_pels_i4 + 5, 0, 4); |
| } |
| /* top left pels */ |
| if (i4_ngbr_avbl & TOP_LEFT_MB_AVAILABLE_MASK) |
| { |
| pu1_ngbr_pels_i4[4] = *pu1_mb_d; |
| } |
| else |
| { |
| pu1_ngbr_pels_i4[4] = 0; |
| } |
| /* top right pels */ |
| if (i4_ngbr_avbl & TOP_RIGHT_MB_AVAILABLE_MASK) |
| { |
| memcpy(pu1_ngbr_pels_i4+8+1,pu1_mb_c,4); |
| } |
| else if (i4_ngbr_avbl & TOP_MB_AVAILABLE_MASK) |
| { |
| memset(pu1_ngbr_pels_i4+8+1,pu1_ngbr_pels_i4[8],4); |
| } |
| |
| /********************************************************/ |
| /* prediction */ |
| /********************************************************/ |
| (ps_codec->apf_intra_pred_4_l)[u1_intra_mode](pu1_ngbr_pels_i4, |
| pu1_pred_mb, 0, |
| i4_pred_strd, |
| i4_ngbr_avbl); |
| |
| /********************************************************/ |
| /* error estimation, */ |
| /* transform */ |
| /* quantization */ |
| /********************************************************/ |
| ps_codec->pf_resi_trans_quant_4x4(pu1_curr_mb, pu1_pred_mb, |
| pi2_res_mb, i4_src_strd, |
| i4_pred_strd, |
| ps_qp_params->pu2_scale_mat, |
| ps_qp_params->pu2_thres_mat, |
| ps_qp_params->u1_qbits, |
| ps_qp_params->u4_dead_zone, |
| &u1_nnz, &i2_dc_dummy); |
| |
| /********************************************************/ |
| /* pack coeff data for entropy coding */ |
| /********************************************************/ |
| ps_mb_coeff_data = *pv_mb_coeff_data; |
| |
| /* write number of non zero coefficients */ |
| ps_mb_coeff_data->i4_sig_map_nnz = u1_nnz; |
| |
| if (u1_nnz) |
| { |
| for (u4_nnz_cnt = 0, coeff_cnt = 0, mask = 1, u4_s_map = 0; u4_nnz_cnt < u1_nnz; coeff_cnt++) |
| { |
| if (pi2_res_mb[pu1_scan_order[coeff_cnt]]) |
| { |
| /* write residue */ |
| ps_mb_coeff_data->ai2_residue[u4_nnz_cnt++] = pi2_res_mb[pu1_scan_order[coeff_cnt]]; |
| u4_s_map |= mask; |
| } |
| mask <<= 1; |
| } |
| /* write significant coeff map */ |
| ps_mb_coeff_data->i4_sig_map_nnz |= (u4_s_map << 16); |
| |
| /* update ptr to coeff data */ |
| (*pv_mb_coeff_data) = ps_mb_coeff_data->ai2_residue + ALIGN2(u4_nnz_cnt); |
| |
| /* cbp */ |
| u1_cbp_l |= (1 << b8); |
| } |
| else |
| { |
| (*pv_mb_coeff_data) = ps_mb_coeff_data->ai2_residue; |
| } |
| |
| /********************************************************/ |
| /* ierror estimation, */ |
| /* itransform */ |
| /* iquantization */ |
| /********************************************************/ |
| if (u1_nnz) |
| ps_codec->pf_iquant_itrans_recon_4x4( |
| pi2_res_mb, pu1_pred_mb, pu1_ref_mb, |
| /*No input stride,*/i4_pred_strd, |
| i4_rec_strd, ps_qp_params->pu2_iscale_mat, |
| ps_qp_params->pu2_weigh_mat, |
| ps_qp_params->u1_qp_div, |
| ps_proc->pv_scratch_buff, 0, 0); |
| else |
| ps_codec->pf_inter_pred_luma_copy(pu1_pred_mb, pu1_ref_mb, |
| i4_pred_strd, i4_rec_strd, |
| BLK_SIZE, BLK_SIZE, NULL, |
| 0); |
| |
| } |
| |
| /* if the 8x8 block has no residue, nothing needs to be sent to entropy */ |
| if (!(u1_cbp_l & (1 << b8))) |
| { |
| *pv_mb_coeff_data = ps_mb_coeff_data_b8; |
| } |
| } |
| |
| return (u1_cbp_l); |
| } |
| |
| /** |
| ******************************************************************************* |
| * |
| * @brief performs luma core coding when intra mode is i4x4 |
| * |
| * @par Description: |
| * If the current mb is to be coded as intra of mb type i4x4, the mb is first |
| * predicted using one of i4x4 prediction filters, basing on the intra mode |
| * chosen. Then, error is computed between the input blk and the estimated blk. |
| * This error is dct transformed and quantized. The quantized coefficients are |
| * packed in scan order for entropy coding. |
| * |
| * @param[in] ps_proc_ctxt |
| * pointer to the current macro block context |
| * |
| * @returns u1_cbp_l |
| * coded block pattern luma |
| * |
| * @remarks |
| * The traversal of 4x4 subblocks in the 16x16 macroblock is as per the scan order |
| * mentioned in h.264 specification |
| * |
| ******************************************************************************* |
| */ |
| UWORD8 ih264e_code_luma_intra_macroblock_4x4_rdopt_on(process_ctxt_t *ps_proc) |
| { |
| /* Codec Context */ |
| codec_t *ps_codec = ps_proc->ps_codec; |
| |
| /* pointer to ref macro block */ |
| UWORD8 *pu1_ref_mb_intra_4x4 = ps_proc->pu1_ref_mb_intra_4x4; |
| |
| /* pointer to recon buffer */ |
| UWORD8 *pu1_rec_mb = ps_proc->pu1_rec_buf_luma; |
| |
| /* pointer to residual macro block */ |
| WORD16 *pi2_res_mb = ps_proc->pi2_res_buf_intra_4x4; |
| |
| /* strides */ |
| WORD32 i4_rec_strd = ps_proc->i4_rec_strd; |
| |
| /* number of non zero coeffs*/ |
| UWORD8 *pu1_nnz = (UWORD8 *)ps_proc->au4_nnz_intra_4x4; |
| |
| /* coded block pattern */ |
| UWORD8 u1_cbp_l = 0; |
| |
| /* pointer to packed mb coeff data */ |
| void **pv_mb_coeff_data = &(ps_proc->pv_mb_coeff_data); |
| |
| /* pointer to packed mb coeff data */ |
| tu_sblk_coeff_data_t *ps_mb_coeff_data, *ps_mb_coeff_data_b8; |
| |
| /* no of non zero coefficients in the current sub block */ |
| UWORD32 u4_nnz_cnt; |
| |
| /* significant coefficient map */ |
| UWORD32 u4_s_map; |
| |
| /* pointer to scanning matrix */ |
| const UWORD8 *pu1_scan_order = gu1_luma_scan_order; |
| |
| /* temp var */ |
| UWORD32 b8, b4, coeff_cnt, mask; |
| |
| /* Process 16 4x4 lum sub-blocks of the MB in scan order */ |
| for (b8 = 0; b8 < 4; b8++) |
| { |
| /* if in case cbp for the 8x8 block is zero, send no residue */ |
| ps_mb_coeff_data_b8 = *pv_mb_coeff_data; |
| |
| for (b4 = 0; b4 < 4; b4++, pu1_nnz++, pi2_res_mb += MB_SIZE) |
| { |
| /********************************************************/ |
| /* pack coeff data for entropy coding */ |
| /********************************************************/ |
| ps_mb_coeff_data = *pv_mb_coeff_data; |
| |
| /* write number of non zero coefficients */ |
| ps_mb_coeff_data->i4_sig_map_nnz = *pu1_nnz; |
| |
| if (*pu1_nnz) |
| { |
| for (u4_nnz_cnt = 0, coeff_cnt = 0, mask = 1, u4_s_map = 0; u4_nnz_cnt < *pu1_nnz; coeff_cnt++) |
| { |
| if (pi2_res_mb[pu1_scan_order[coeff_cnt]]) |
| { |
| /* write residue */ |
| ps_mb_coeff_data->ai2_residue[u4_nnz_cnt++] = pi2_res_mb[pu1_scan_order[coeff_cnt]]; |
| u4_s_map |= mask; |
| } |
| mask <<= 1; |
| } |
| /* write significant coeff map */ |
| ps_mb_coeff_data->i4_sig_map_nnz |= (u4_s_map << 16); |
| |
| /* update ptr to coeff data */ |
| (*pv_mb_coeff_data) = ps_mb_coeff_data->ai2_residue + ALIGN2(u4_nnz_cnt); |
| |
| /* cbp */ |
| u1_cbp_l |= (1 << b8); |
| } |
| else |
| { |
| (*pv_mb_coeff_data) = ps_mb_coeff_data->ai2_residue; |
| } |
| } |
| |
| /* if the 8x8 block has no residue, nothing needs to be sent to entropy */ |
| if (!(u1_cbp_l & (1 << b8))) |
| { |
| *pv_mb_coeff_data = ps_mb_coeff_data_b8; |
| } |
| } |
| |
| /* memcpy recon */ |
| ps_codec->pf_inter_pred_luma_copy(pu1_ref_mb_intra_4x4, pu1_rec_mb, MB_SIZE, i4_rec_strd, MB_SIZE, MB_SIZE, NULL, 0); |
| |
| return (u1_cbp_l); |
| } |
| |
| |
| /** |
| ******************************************************************************* |
| * |
| * @brief performs chroma core coding for intra macro blocks |
| * |
| * @par Description: |
| * If the current MB is to be intra coded with mb type chroma I8x8, the MB is |
| * first predicted using intra 8x8 prediction filters. The predicted data is |
| * compared with the input for error and the error is transformed. The DC |
| * coefficients of each transformed sub blocks are further transformed using |
| * Hadamard transform. The resulting coefficients are quantized, packed and sent |
| * for entropy coding. |
| * |
| * @param[in] ps_proc_ctxt |
| * pointer to the current macro block context |
| * |
| * @returns u1_cbp_c |
| * coded block pattern chroma |
| * |
| * @remarks |
| * The traversal of 4x4 subblocks in the 8x8 macroblock is as per the scan order |
| * mentioned in h.264 specification |
| * |
| ******************************************************************************* |
| */ |
| UWORD8 ih264e_code_chroma_intra_macroblock_8x8(process_ctxt_t *ps_proc) |
| { |
| /* Codec Context */ |
| codec_t *ps_codec = ps_proc->ps_codec; |
| |
| /* pointer to ref macro block */ |
| UWORD8 *pu1_ref_mb = ps_proc->pu1_rec_buf_chroma; |
| |
| /* pointer to src macro block */ |
| UWORD8 *pu1_curr_mb = ps_proc->pu1_src_buf_chroma; |
| |
| /* pointer to prediction macro block */ |
| UWORD8 *pu1_pred_mb = NULL; |
| |
| /* pointer to residual macro block */ |
| WORD16 *pi2_res_mb = ps_proc->pi2_res_buf; |
| |
| /* strides */ |
| WORD32 i4_src_strd = ps_proc->i4_src_chroma_strd; |
| WORD32 i4_rec_strd = ps_proc->i4_rec_strd; |
| WORD32 i4_pred_strd = ps_proc->i4_pred_strd; |
| WORD32 i4_res_strd = ps_proc->i4_res_strd; |
| |
| /* intra mode */ |
| UWORD8 u1_intra_mode = ps_proc->u1_c_i8_mode; |
| |
| /* coded block pattern */ |
| UWORD8 u1_cbp_c = 0; |
| |
| /* number of non zero coeffs*/ |
| UWORD8 au1_nnz[18] = {0}; |
| |
| /* quantization parameters */ |
| quant_params_t *ps_qp_params = ps_proc->ps_qp_params[1]; |
| |
| /* Control signal for inverse transform */ |
| UWORD32 u4_cntrl; |
| |
| /* pointer to packed mb coeff data */ |
| void **pv_mb_coeff_data = &(ps_proc->pv_mb_coeff_data); |
| |
| /* See if we need to swap U and V plances for entropy */ |
| UWORD32 u4_swap_uv = ps_codec->s_cfg.e_inp_color_fmt == IV_YUV_420SP_VU; |
| |
| if (PLANE_CH_I8x8 == u1_intra_mode) |
| { |
| pu1_pred_mb = ps_proc->pu1_pred_mb_intra_chroma_plane; |
| } |
| else |
| { |
| pu1_pred_mb = ps_proc->pu1_pred_mb_intra_chroma; |
| } |
| |
| /********************************************************/ |
| /* error estimation, */ |
| /* transform */ |
| /* quantization */ |
| /********************************************************/ |
| ih264e_chroma_8x8_resi_trans_dctrans_quant(ps_codec, pu1_curr_mb, |
| pu1_pred_mb, pi2_res_mb, |
| i4_src_strd, i4_pred_strd, |
| i4_res_strd, |
| ps_qp_params->pu2_scale_mat, |
| ps_qp_params->pu2_thres_mat, |
| ps_qp_params->u1_qbits, |
| ps_qp_params->u4_dead_zone, |
| au1_nnz); |
| |
| /********************************************************/ |
| /* pack coeff data for entropy coding */ |
| /********************************************************/ |
| ih264e_pack_c_mb(pi2_res_mb, pv_mb_coeff_data, i4_res_strd, &u1_cbp_c, |
| au1_nnz, ps_codec->u4_thres_resi, &u4_cntrl, u4_swap_uv); |
| |
| /********************************************************/ |
| /* ierror estimation, */ |
| /* itransform */ |
| /* iquantization */ |
| /********************************************************/ |
| ih264e_chroma_8x8_idctrans_iquant_itrans_recon(ps_codec, pi2_res_mb, |
| pu1_pred_mb, pu1_ref_mb, |
| i4_res_strd, i4_pred_strd, |
| i4_rec_strd, |
| ps_qp_params->pu2_iscale_mat, |
| ps_qp_params->pu2_weigh_mat, |
| ps_qp_params->u1_qp_div, |
| u4_cntrl, |
| ps_proc->pv_scratch_buff); |
| return (u1_cbp_c); |
| } |
| |
| |
| /** |
| ******************************************************************************* |
| * |
| * @brief performs luma core coding when mode is inter |
| * |
| * @par Description: |
| * If the current mb is to be coded as inter the mb is predicted based on the |
| * sub mb partitions and corresponding motion vectors generated by ME. Then, |
| * error is computed between the input blk and the estimated blk. This error is |
| * transformed, quantized. The quantized coefficients are packed in scan order |
| * for entropy coding |
| * |
| * @param[in] ps_proc_ctxt |
| * pointer to the current macro block context |
| * |
| * @returns u1_cbp_l |
| * coded block pattern luma |
| * |
| * @remarks none |
| * |
| ******************************************************************************* |
| */ |
| |
| UWORD8 ih264e_code_luma_inter_macroblock_16x16(process_ctxt_t *ps_proc) |
| { |
| /* Codec Context */ |
| codec_t *ps_codec = ps_proc->ps_codec; |
| |
| /* pointer to ref macro block */ |
| UWORD8 *pu1_rec_mb = ps_proc->pu1_rec_buf_luma; |
| |
| /* pointer to src macro block */ |
| UWORD8 *pu1_curr_mb = ps_proc->pu1_src_buf_luma; |
| |
| /* pointer to prediction macro block */ |
| UWORD8 *pu1_pred_mb = ps_proc->pu1_pred_mb; |
| |
| /* pointer to residual macro block */ |
| WORD16 *pi2_res_mb = ps_proc->pi2_res_buf; |
| |
| /* strides */ |
| WORD32 i4_src_strd = ps_proc->i4_src_strd; |
| WORD32 i4_rec_strd = ps_proc->i4_rec_strd; |
| WORD32 i4_pred_strd = ps_proc->i4_pred_strd; |
| WORD32 i4_res_strd = ps_proc->i4_res_strd; |
| |
| /* coded block pattern */ |
| UWORD8 u1_cbp_l = 0; |
| |
| /*Control signal of itrans*/ |
| UWORD32 u4_cntrl; |
| |
| /* number of non zero coeffs*/ |
| UWORD8 *pu1_nnz = (UWORD8 *)ps_proc->au4_nnz; |
| |
| /* quantization parameters */ |
| quant_params_t *ps_qp_params = ps_proc->ps_qp_params[0]; |
| |
| /* pointer to packed mb coeff data */ |
| void **pv_mb_coeff_data = &(ps_proc->pv_mb_coeff_data); |
| |
| /* pseudo pred buffer */ |
| UWORD8 *pu1_pseudo_pred = pu1_pred_mb; |
| |
| /* pseudo pred buffer stride */ |
| WORD32 i4_pseudo_pred_strd = i4_pred_strd; |
| |
| /* init nnz */ |
| ps_proc->au4_nnz[0] = 0; |
| ps_proc->au4_nnz[1] = 0; |
| ps_proc->au4_nnz[2] = 0; |
| ps_proc->au4_nnz[3] = 0; |
| ps_proc->au4_nnz[4] = 0; |
| |
| /********************************************************/ |
| /* prediction */ |
| /********************************************************/ |
| ih264e_motion_comp_luma(ps_proc, &pu1_pseudo_pred, &i4_pseudo_pred_strd); |
| |
| /********************************************************/ |
| /* error estimation, */ |
| /* transform */ |
| /* quantization */ |
| /********************************************************/ |
| if (ps_proc->u4_min_sad_reached == 0 || ps_proc->u4_min_sad != 0) |
| { |
| ih264e_luma_16x16_resi_trans_dctrans_quant(ps_codec, pu1_curr_mb, |
| pu1_pseudo_pred, pi2_res_mb, |
| i4_src_strd, |
| i4_pseudo_pred_strd, |
| i4_res_strd, |
| ps_qp_params->pu2_scale_mat, |
| ps_qp_params->pu2_thres_mat, |
| ps_qp_params->u1_qbits, |
| ps_qp_params->u4_dead_zone, |
| pu1_nnz, |
| DISABLE_DC_TRANSFORM); |
| |
| /********************************************************/ |
| /* pack coeff data for entropy coding */ |
| /********************************************************/ |
| ih264e_pack_l_mb(pi2_res_mb, pv_mb_coeff_data, i4_res_strd, &u1_cbp_l, |
| pu1_nnz, ps_codec->u4_thres_resi, &u4_cntrl); |
| } |
| else |
| { |
| u1_cbp_l = 0; |
| u4_cntrl = 0; |
| } |
| |
| /********************************************************/ |
| /* ierror estimation, */ |
| /* itransform */ |
| /* iquantization */ |
| /********************************************************/ |
| |
| /*If the frame is not to be used for P frame reference or dumping recon |
| * we only will use the reocn for only predicting intra Mbs |
| * THis will need only right and bottom edge 4x4 blocks recon |
| * Hence we selectively enable them using control signal(including DC) |
| */ |
| if (ps_proc->u4_compute_recon != 1) |
| { |
| u4_cntrl &= 0x111F0000; |
| } |
| |
| if (u4_cntrl) |
| { |
| ih264e_luma_16x16_idctrans_iquant_itrans_recon( |
| ps_codec, pi2_res_mb, pu1_pseudo_pred, pu1_rec_mb, |
| i4_res_strd, i4_pseudo_pred_strd, i4_rec_strd, |
| ps_qp_params->pu2_iscale_mat, |
| ps_qp_params->pu2_weigh_mat, ps_qp_params->u1_qp_div, |
| u4_cntrl /*Cntrl*/, DISABLE_DC_TRANSFORM, |
| ps_proc->pv_scratch_buff); |
| } |
| else |
| { |
| ps_codec->pf_inter_pred_luma_copy(pu1_pseudo_pred, pu1_rec_mb, |
| i4_pseudo_pred_strd, i4_rec_strd, |
| MB_SIZE, MB_SIZE, NULL, 0); |
| } |
| |
| |
| return (u1_cbp_l); |
| } |
| |
| /** |
| ******************************************************************************* |
| * |
| * @brief performs chroma core coding for inter macro blocks |
| * |
| * @par Description: |
| * If the current mb is to be coded as inter predicted mb,based on the sub mb partitions |
| * and corresponding motion vectors generated by ME ,prediction is done. |
| * Then, error is computed between the input blk and the estimated blk. |
| * This error is transformed , quantized. The quantized coefficients |
| * are packed in scan order for |
| * entropy coding. |
| * |
| * @param[in] ps_proc_ctxt |
| * pointer to the current macro block context |
| * |
| * @returns u1_cbp_l |
| * coded block pattern chroma |
| * |
| * @remarks none |
| * |
| ******************************************************************************* |
| */ |
| UWORD8 ih264e_code_chroma_inter_macroblock_8x8(process_ctxt_t *ps_proc) |
| { |
| /* Codec Context */ |
| codec_t *ps_codec = ps_proc->ps_codec; |
| |
| /* pointer to ref macro block */ |
| UWORD8 *pu1_rec_mb = ps_proc->pu1_rec_buf_chroma; |
| |
| /* pointer to src macro block */ |
| UWORD8 *pu1_curr_mb = ps_proc->pu1_src_buf_chroma; |
| |
| /* pointer to prediction macro block */ |
| UWORD8 *pu1_pred_mb = ps_proc->pu1_pred_mb; |
| |
| /* pointer to residual macro block */ |
| WORD16 *pi2_res_mb = ps_proc->pi2_res_buf; |
| |
| /* strides */ |
| WORD32 i4_src_strd = ps_proc->i4_src_chroma_strd; |
| WORD32 i4_rec_strd = ps_proc->i4_rec_strd; |
| WORD32 i4_pred_strd = ps_proc->i4_pred_strd; |
| WORD32 i4_res_strd = ps_proc->i4_res_strd; |
| |
| /* coded block pattern */ |
| UWORD8 u1_cbp_c = 0; |
| |
| /*Control signal for inverse transform*/ |
| UWORD32 u4_cntrl; |
| |
| /* number of non zero coeffs*/ |
| UWORD8 au1_nnz[10] = {0}; |
| |
| /* quantization parameters */ |
| quant_params_t *ps_qp_params = ps_proc->ps_qp_params[1]; |
| |
| /* pointer to packed mb coeff data */ |
| void **pv_mb_coeff_data = &(ps_proc->pv_mb_coeff_data); |
| |
| /*See if we need to swap U and V plances for entropy*/ |
| UWORD32 u4_swap_uv = ps_codec->s_cfg.e_inp_color_fmt == IV_YUV_420SP_VU; |
| |
| /********************************************************/ |
| /* prediction */ |
| /********************************************************/ |
| ih264e_motion_comp_chroma(ps_proc); |
| |
| /********************************************************/ |
| /* error estimation, */ |
| /* transform */ |
| /* quantization */ |
| /********************************************************/ |
| ih264e_chroma_8x8_resi_trans_dctrans_quant(ps_codec, pu1_curr_mb, |
| pu1_pred_mb, pi2_res_mb, |
| i4_src_strd, i4_pred_strd, |
| i4_res_strd, |
| ps_qp_params->pu2_scale_mat, |
| ps_qp_params->pu2_thres_mat, |
| ps_qp_params->u1_qbits, |
| ps_qp_params->u4_dead_zone, |
| au1_nnz); |
| |
| /********************************************************/ |
| /* pack coeff data for entropy coding */ |
| /********************************************************/ |
| ih264e_pack_c_mb(pi2_res_mb, pv_mb_coeff_data, i4_res_strd, &u1_cbp_c, |
| au1_nnz, ps_codec->u4_thres_resi, &u4_cntrl, u4_swap_uv); |
| |
| /********************************************************/ |
| /* ierror estimation, */ |
| /* itransform */ |
| /* iquantization */ |
| /********************************************************/ |
| |
| /* If the frame is not to be used for P frame reference or dumping recon |
| * we only will use the reocn for only predicting intra Mbs |
| * THis will need only right and bottom edge 4x4 blocks recon |
| * Hence we selectively enable them using control signal(including DC) |
| */ |
| if (!ps_proc->u4_compute_recon) |
| { |
| u4_cntrl &= 0x7700C000; |
| } |
| |
| if (u4_cntrl) |
| { |
| ih264e_chroma_8x8_idctrans_iquant_itrans_recon( |
| ps_codec, pi2_res_mb, pu1_pred_mb, pu1_rec_mb, |
| i4_res_strd, i4_pred_strd, i4_rec_strd, |
| ps_qp_params->pu2_iscale_mat, |
| ps_qp_params->pu2_weigh_mat, ps_qp_params->u1_qp_div, |
| u4_cntrl, ps_proc->pv_scratch_buff); |
| } |
| else |
| { |
| ps_codec->pf_inter_pred_luma_copy(pu1_pred_mb, pu1_rec_mb, i4_pred_strd, |
| i4_rec_strd, MB_SIZE >> 1, MB_SIZE, |
| NULL, 0); |
| } |
| |
| return (u1_cbp_c); |
| } |