| /****************************************************************************** |
| * |
| * Copyright (C) 2018 The Android Open Source Project |
| * |
| * Licensed under the Apache License, Version 2.0 (the "License"); |
| * you may not use this file except in compliance with the License. |
| * You may obtain a copy of the License at: |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| * |
| ***************************************************************************** |
| * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore |
| */ |
| /** |
| ******************************************************************************* |
| * @file |
| * ihevce_deblk.c |
| * |
| * @brief |
| * Contains definition for the ctb level deblk function |
| * |
| * @author |
| * ittiam |
| * |
| * @List of Functions: |
| * ihevce_deblk_populate_qp_map() |
| * ihevce_deblk_ctb() |
| * ihevce_hbd_deblk_ctb() |
| * |
| * @remarks |
| * None |
| * |
| ******************************************************************************* |
| */ |
| |
| /*****************************************************************************/ |
| /* File Includes */ |
| /*****************************************************************************/ |
| /* System include files */ |
| #include <stdio.h> |
| #include <string.h> |
| #include <stdlib.h> |
| #include <assert.h> |
| #include <stdarg.h> |
| #include <math.h> |
| |
| /* User include files */ |
| #include "ihevc_typedefs.h" |
| #include "itt_video_api.h" |
| #include "ihevce_api.h" |
| |
| #include "rc_cntrl_param.h" |
| #include "rc_frame_info_collector.h" |
| #include "rc_look_ahead_params.h" |
| |
| #include "ihevc_defs.h" |
| #include "ihevc_debug.h" |
| #include "ihevc_structs.h" |
| #include "ihevc_platform_macros.h" |
| #include "ihevc_deblk.h" |
| #include "ihevc_deblk_tables.h" |
| #include "ihevc_common_tables.h" |
| #include "ihevc_itrans_recon.h" |
| #include "ihevc_chroma_itrans_recon.h" |
| #include "ihevc_chroma_intra_pred.h" |
| #include "ihevc_intra_pred.h" |
| #include "ihevc_inter_pred.h" |
| #include "ihevc_mem_fns.h" |
| #include "ihevc_padding.h" |
| #include "ihevc_weighted_pred.h" |
| #include "ihevc_sao.h" |
| #include "ihevc_resi_trans.h" |
| #include "ihevc_quant_iquant_ssd.h" |
| #include "ihevc_cabac_tables.h" |
| |
| #include "ihevce_defs.h" |
| #include "ihevce_hle_interface.h" |
| #include "ihevce_lap_enc_structs.h" |
| #include "ihevce_multi_thrd_structs.h" |
| #include "ihevce_me_common_defs.h" |
| #include "ihevce_had_satd.h" |
| #include "ihevce_error_codes.h" |
| #include "ihevce_bitstream.h" |
| #include "ihevce_cabac.h" |
| #include "ihevce_rdoq_macros.h" |
| #include "ihevce_function_selector.h" |
| #include "ihevce_enc_structs.h" |
| #include "ihevce_entropy_structs.h" |
| #include "ihevce_cmn_utils_instr_set_router.h" |
| #include "ihevce_enc_loop_structs.h" |
| #include "ihevce_common_utils.h" |
| #include "ihevce_global_tables.h" |
| #include "ihevce_deblk.h" |
| #include "ihevce_tile_interface.h" |
| |
| /*****************************************************************************/ |
| /* Function Definitions */ |
| /*****************************************************************************/ |
| |
| /*! |
| ****************************************************************************** |
| * \if Function name : ihevce_deblk_populate_qp_map \endif |
| * |
| * \brief |
| * |
| * |
| ***************************************************************************** |
| */ |
| void ihevce_deblk_populate_qp_map( |
| ihevce_enc_loop_ctxt_t *ps_ctxt, |
| deblk_ctbrow_prms_t *ps_deblk_ctb_row_params, |
| ctb_enc_loop_out_t *ps_ctb_out_dblk, |
| WORD32 vert_ctr, |
| frm_ctb_ctxt_t *ps_frm_ctb_prms, |
| ihevce_tile_params_t *ps_col_tile_params) |
| { |
| ctb_enc_loop_out_t *ps_ctb_out; |
| WORD32 ctb_ctr, ctb_start, ctb_end; |
| WORD32 tile_qp_offset, tile_qp_size, i4_offset_for_last_cu_qp; |
| /* Create the Qp map for the entire current CTB-row for deblocking purpose(only)*/ |
| /* Do this iff cur pic is referred or recon dump is enabled or psnr calc is on*/ |
| /*Qp of the last CU of previous CTB row*/ |
| WORD8 i1_last_cu_qp; |
| /*A pointer pointing to the top 4x4 block's Qp for all CTb rows*/ |
| WORD8 *pi1_qp_top_4x4_ctb_row = |
| ps_deblk_ctb_row_params->api1_qp_top_4x4_ctb_row[ps_ctxt->i4_enc_frm_id] + |
| (ps_deblk_ctb_row_params->u4_qp_top_4x4_buf_size * ps_ctxt->i4_bitrate_instance_num); |
| |
| UWORD32 u4_qp_top_4x4_buf_strd = ps_deblk_ctb_row_params->u4_qp_top_4x4_buf_strd; |
| |
| /*The Qp map which has to be populated*/ |
| UWORD32 u4_qp_buffer_stride = ps_deblk_ctb_row_params->u4_qp_buffer_stride; |
| WORD8 *pi1_ctb_tile_qp = ps_deblk_ctb_row_params->pi1_ctb_row_qp; |
| |
| /*Temporary pointers to Qp map at CTB level*/ |
| WORD8 *pi1_ctb_qp_map_tile; |
| |
| i4_offset_for_last_cu_qp = ps_ctxt->pi4_offset_for_last_cu_qp[ps_ctxt->i4_tile_col_idx]; |
| /* total QPs to be copied for current row is : */ |
| tile_qp_size = i4_offset_for_last_cu_qp + 1; |
| /*Pointing to the first CTB of current CTB row*/ |
| ps_ctb_out = ps_ctb_out_dblk; |
| /* Offset req. for the row QP to the tile start */ |
| tile_qp_offset = ps_col_tile_params->i4_first_ctb_x * (ps_frm_ctb_prms->i4_ctb_size / 4); |
| |
| ctb_start = ps_col_tile_params->i4_first_ctb_x; |
| ctb_end = |
| (ps_col_tile_params->i4_first_ctb_x + ps_col_tile_params->i4_curr_tile_wd_in_ctb_unit); |
| |
| if(vert_ctr) /*Not first CTB row of frame*/ |
| { |
| /*copy from top4x4_array data stored by upper CTB-row to qp-map*/ |
| memcpy( |
| pi1_ctb_tile_qp, |
| (pi1_qp_top_4x4_ctb_row + (vert_ctr - 1) * u4_qp_top_4x4_buf_strd + tile_qp_offset), |
| tile_qp_size); |
| } |
| |
| /*pu1_ctb_row_qp points to top4x4 row in Qp-map. |
| Now pointing pu1_ctb_qp_map to cur 4x4 row*/ |
| pi1_ctb_qp_map_tile = pi1_ctb_tile_qp + u4_qp_buffer_stride; |
| |
| /* This i1_last_cu_qp will be conditionally overwritten later */ |
| i1_last_cu_qp = ps_ctxt->i4_frame_qp; |
| |
| /* -- Loop over all the CTBs in a CTB-row for populating the Qp-map ----- */ |
| for(ctb_ctr = ctb_start; ctb_ctr < ctb_end; ctb_ctr++) |
| { |
| WORD32 cu_ctr; |
| cu_enc_loop_out_t *ps_curr_cu; |
| |
| /* Update i1_last_cu_qp based on CTB's position in tile */ |
| update_last_coded_cu_qp( |
| (ps_deblk_ctb_row_params->pi1_ctb_row_qp + i4_offset_for_last_cu_qp), |
| ps_ctxt->i1_entropy_coding_sync_enabled_flag, |
| ps_frm_ctb_prms, |
| ps_ctxt->i4_frame_qp, |
| vert_ctr, |
| ctb_ctr, |
| &i1_last_cu_qp); |
| |
| /* store the pointer of first cu of current ctb */ |
| ps_curr_cu = ps_ctb_out->ps_enc_cu; |
| |
| /* --------- loop over all the CUs in the CTB --------------- */ |
| for(cu_ctr = 0; cu_ctr < ps_ctb_out->u1_num_cus_in_ctb; cu_ctr++) |
| { |
| UWORD8 u1_vert_4x4, u1_horz_4x4; //for_loop counters |
| WORD8 *pi1_cu_qp_map; |
| |
| WORD8 i1_qp, i1_qp_left, i1_qp_top; |
| |
| pi1_cu_qp_map = pi1_ctb_qp_map_tile + |
| (ps_curr_cu->b3_cu_pos_y * 2) * u4_qp_buffer_stride + |
| (ps_curr_cu->b3_cu_pos_x * 2); |
| |
| /*If the current CU is coded in skip_mode/zero_CBF then |
| for deblocking, Qp of the previously coded CU will be used*/ |
| if(ps_curr_cu->b1_skip_flag || ps_curr_cu->b1_no_residual_syntax_flag) |
| { |
| if(0 == ps_curr_cu->b3_cu_pos_x) |
| i1_qp_left = i1_last_cu_qp; |
| else |
| i1_qp_left = *(pi1_cu_qp_map - 1); |
| |
| if(0 == ps_curr_cu->b3_cu_pos_y) |
| i1_qp_top = i1_last_cu_qp; |
| else |
| i1_qp_top = *(pi1_cu_qp_map - u4_qp_buffer_stride); |
| |
| i1_qp = (i1_qp_left + i1_qp_top + 1) / 2; |
| |
| if(0 == ps_curr_cu->b1_first_cu_in_qg) |
| { |
| i1_qp = i1_last_cu_qp; |
| } |
| } |
| else |
| { |
| i1_qp = ps_curr_cu->i1_cu_qp; |
| } |
| |
| i1_last_cu_qp = i1_qp; |
| |
| /*---- Loop for populating Qp map for the current CU -------*/ |
| for(u1_vert_4x4 = 0; u1_vert_4x4 < (ps_curr_cu->b4_cu_size * 2); u1_vert_4x4++) |
| { |
| for(u1_horz_4x4 = 0; u1_horz_4x4 < (ps_curr_cu->b4_cu_size * 2); u1_horz_4x4++) |
| { |
| pi1_cu_qp_map[u1_horz_4x4] = i1_qp; |
| } |
| pi1_cu_qp_map += u4_qp_buffer_stride; |
| } |
| /*Update Qp-map ptr. Qp map is at 4x4 level but b4_cu_size is at 8x8 level*/ |
| ps_curr_cu++; |
| } |
| pi1_ctb_qp_map_tile += (ps_frm_ctb_prms->i4_ctb_size / 4); //one qp per 4x4 block. |
| ps_ctb_out++; |
| |
| } //for(ctb_ctr = 0; ctb_ctr < num_ctbs_horz; ctb_ctr++) |
| |
| /*fill into the top4x4_array Qp for the lower CTB-row from bottom part of cur CTB row*/ |
| memcpy( |
| (pi1_qp_top_4x4_ctb_row + vert_ctr * u4_qp_top_4x4_buf_strd + tile_qp_offset), |
| (pi1_ctb_tile_qp + (ps_frm_ctb_prms->i4_ctb_size / 4) * u4_qp_buffer_stride), |
| tile_qp_size); |
| } |
| |
| /** |
| ******************************************************************************* |
| * |
| * @brief |
| * Deblock CTB level function. |
| * |
| * @par Description: |
| * For a given CTB, deblocking on both vertical and |
| * horizontal edges is done. Both the luma and chroma |
| * blocks are processed |
| * |
| * @param[in] |
| * ps_deblk: Pointer to the deblock context |
| * last_col: if the CTB is the last CTB of current CTB-row value is 1 else 0 |
| * ps_deblk_ctb_row_params: deblk ctb row params |
| * |
| * @returns |
| * |
| * @remarks |
| * None |
| * |
| ******************************************************************************* |
| */ |
| void ihevce_deblk_ctb( |
| deblk_ctb_params_t *ps_deblk, WORD32 last_col, deblk_ctbrow_prms_t *ps_deblk_ctb_row_params) |
| { |
| WORD32 ctb_size; |
| UWORD32 u4_bs; |
| WORD32 bs_lz; /*Leading zeros in boundary strength*/ |
| WORD32 qp_p, qp_q; |
| UWORD8 *pu1_src; |
| UWORD8 *pu1_src_uv; |
| UWORD8 *pu1_curr_src; |
| WORD32 col_size; |
| WORD32 col, row, i4_edge_count; |
| WORD32 num_columns_for_vert_filt; |
| WORD32 num_blks_for_vert_filt; |
| WORD32 num_rows_for_horz_filt; |
| |
| ihevc_deblk_chroma_horz_ft *pf_deblk_chroma_horz; |
| ihevc_deblk_chroma_horz_ft *pf_deblk_chroma_vert; |
| |
| /* Filter flags are packed along with the qp info. |
| 6 out of the 8 bits correspond to qp and 1 to filter flag. */ |
| /* filter_p and filter_q are initialized to 1. |
| They are to be extracted along with the qp info. */ |
| WORD32 filter_p, filter_q; |
| WORD8 *pi1_ctb_row_qp_p, *pi1_ctb_row_qp_temp; |
| WORD8 *pi1_ctb_row_qp_q; |
| |
| func_selector_t *ps_func_slector = ps_deblk->ps_func_selector; |
| |
| WORD32 left_luma_edge_filter_flag = ps_deblk->i4_deblock_left_ctb_edge; |
| WORD32 top_luma_edge_filter_flag = ps_deblk->i4_deblock_top_ctb_edge; |
| WORD32 left_chroma_edge_filter_flag = ps_deblk->i4_deblock_left_ctb_edge; |
| WORD32 top_chroma_edge_filter_flag = ps_deblk->i4_deblock_top_ctb_edge; |
| UWORD32 *bs_vert = ps_deblk_ctb_row_params->pu4_ctb_row_bs_vert; |
| UWORD32 *bs_horz = ps_deblk_ctb_row_params->pu4_ctb_row_bs_horz; |
| UWORD32 *bs_vert_uv = bs_vert; |
| UWORD32 *bs_horz_uv = bs_horz; |
| UWORD32 u4_qp_buffer_stride = ps_deblk_ctb_row_params->u4_qp_buffer_stride; |
| UWORD8 u1_is_422 = (ps_deblk->u1_chroma_array_type == 2); |
| |
| if(u1_is_422) |
| { |
| pf_deblk_chroma_horz = ps_func_slector->ihevc_deblk_422chroma_horz_fptr; |
| pf_deblk_chroma_vert = ps_func_slector->ihevc_deblk_422chroma_vert_fptr; |
| } |
| else |
| { |
| pf_deblk_chroma_horz = ps_func_slector->ihevc_deblk_chroma_horz_fptr; |
| pf_deblk_chroma_vert = ps_func_slector->ihevc_deblk_chroma_vert_fptr; |
| } |
| |
| ctb_size = ps_deblk->i4_ctb_size; |
| |
| /* The PCM filter flag and bypass trans flag are always set to 1 in encoder profile */ |
| /* Can be removed during optimization */ |
| filter_q = 1; |
| filter_p = 1; |
| |
| ////////////////////////////////////////////////////////////////////////////// |
| /* Luma Veritcal Edge */ |
| pu1_src = ps_deblk->pu1_ctb_y; |
| pi1_ctb_row_qp_temp = ps_deblk_ctb_row_params->pi1_ctb_row_qp + u4_qp_buffer_stride; |
| num_columns_for_vert_filt = ctb_size / 8; |
| num_blks_for_vert_filt = ctb_size / 4; |
| |
| for(i4_edge_count = 0; i4_edge_count < num_columns_for_vert_filt; i4_edge_count++) |
| { |
| u4_bs = *bs_vert; |
| /* get the current 4x4 vertical pointer */ |
| pu1_curr_src = pu1_src; |
| pi1_ctb_row_qp_q = pi1_ctb_row_qp_temp + (i4_edge_count << 1); |
| |
| /* If the current edge is not the 1st edge of frame or slice */ |
| if(1 == left_luma_edge_filter_flag) |
| { |
| for(row = 0; row < num_blks_for_vert_filt;) |
| { |
| bs_lz = CLZ(u4_bs) >> 1; |
| /* If BS = 0, skip the egde filtering */ |
| if(0 != bs_lz) |
| { |
| u4_bs = u4_bs << (bs_lz << 1); |
| pu1_curr_src += ((bs_lz << 2) * ps_deblk->i4_luma_pic_stride); |
| pi1_ctb_row_qp_q += (bs_lz * u4_qp_buffer_stride); |
| row += bs_lz; |
| continue; |
| } |
| qp_p = *(pi1_ctb_row_qp_q - 1); |
| qp_q = *pi1_ctb_row_qp_q; |
| |
| ps_func_slector->ihevc_deblk_luma_vert_fptr( |
| pu1_curr_src, |
| ps_deblk->i4_luma_pic_stride, |
| (u4_bs >> 30), /* bits 31 and 30 are extracted */ |
| qp_p, |
| qp_q, |
| ps_deblk->i4_beta_offset_div2, |
| ps_deblk->i4_tc_offset_div2, |
| filter_p, |
| filter_q); |
| |
| u4_bs = u4_bs << 2; |
| pu1_curr_src += (ps_deblk->i4_luma_pic_stride << 2); |
| pi1_ctb_row_qp_q += u4_qp_buffer_stride; |
| row++; |
| } |
| } |
| |
| /* Increment the boundary strength and src pointer for the next column */ |
| bs_vert += 1; |
| pu1_src += 8; |
| |
| /* Enable for the next edges of ctb*/ |
| left_luma_edge_filter_flag = 1; |
| } |
| |
| ////////////////////////////////////////////////////////////////////////////// |
| /* Chroma Veritcal Edge */ |
| pu1_src_uv = ps_deblk->pu1_ctb_uv; |
| pi1_ctb_row_qp_temp = ps_deblk_ctb_row_params->pi1_ctb_row_qp + u4_qp_buffer_stride; |
| |
| /* Column spacing is 4 for each chroma component */ |
| /* and hence 8 when they are interleaved. */ |
| /* But, only those columns with a x co-ordinate */ |
| /* that is divisiblee by 8 are filtered */ |
| /* Hence, denominator is 16 */ |
| num_columns_for_vert_filt = ctb_size / 16; |
| /* blk_size is 4 and chroma_ctb_height is ctb_size/2 */ |
| num_blks_for_vert_filt = (0 == u1_is_422) ? (ctb_size / 2) / 4 : (ctb_size) / 4; |
| |
| for(i4_edge_count = 0; i4_edge_count < num_columns_for_vert_filt; i4_edge_count++) |
| { |
| /* Every alternate boundary strength value is used for 420 chroma */ |
| u4_bs = *(bs_vert_uv) & ((0 == u1_is_422) ? 0x88888888 : 0xaaaaaaaa); |
| pu1_curr_src = pu1_src_uv; |
| pi1_ctb_row_qp_q = pi1_ctb_row_qp_temp + (i4_edge_count << 2); |
| |
| /* If the current edge is not the 1st edge of frame or slice */ |
| if(1 == left_chroma_edge_filter_flag) |
| { |
| /* Each 'bs' is 2 bits long */ |
| /* The divby4 in 420 is */ |
| /* necessitated by the fact that */ |
| /* chroma ctb_ht is half that of luma */ |
| WORD32 i4_log2_num_bits_per_bs = ((0 == u1_is_422) + 1); |
| /* i4_sub_heightC = 2 for 420 */ |
| /* i4_sub_heightC = 1 for 422 */ |
| WORD32 i4_sub_heightC = i4_log2_num_bits_per_bs; |
| |
| for(row = 0; row < num_blks_for_vert_filt;) |
| { |
| bs_lz = CLZ(u4_bs) >> i4_log2_num_bits_per_bs; |
| |
| /* If BS = 0, skip the egde filtering */ |
| if(0 != bs_lz) |
| { |
| row += bs_lz; |
| u4_bs = u4_bs << (bs_lz << i4_log2_num_bits_per_bs); |
| /* '<<2' because of blk_size being 4x4 */ |
| pu1_curr_src += ((bs_lz << 2) * ps_deblk->i4_chroma_pic_stride); |
| |
| /* In 420, every alternate QP row is skipped, because chroma height */ |
| /* In 422, no row is skipped */ |
| pi1_ctb_row_qp_q += ((u4_qp_buffer_stride << (i4_sub_heightC - 1)) * bs_lz); |
| |
| continue; |
| } |
| |
| qp_p = *(pi1_ctb_row_qp_q - i4_sub_heightC); |
| qp_q = *pi1_ctb_row_qp_q; |
| |
| pf_deblk_chroma_vert( |
| pu1_curr_src, |
| ps_deblk->i4_chroma_pic_stride, |
| qp_p, |
| qp_q, |
| ps_deblk->i4_cb_qp_indx_offset, |
| ps_deblk->i4_cr_qp_indx_offset, |
| ps_deblk->i4_tc_offset_div2, |
| filter_p, |
| filter_q); |
| |
| u4_bs = u4_bs << (1 << i4_log2_num_bits_per_bs); |
| pu1_curr_src += (ps_deblk->i4_chroma_pic_stride << 2); |
| pi1_ctb_row_qp_q += (u4_qp_buffer_stride << (i4_sub_heightC - 1)); |
| row++; |
| } |
| } |
| /* Increment the boundary strength by 2 and src pointer for the next column */ |
| /* As the edge filtering happens for alternate column */ |
| bs_vert_uv += 2; |
| pu1_src_uv += 16; |
| left_chroma_edge_filter_flag = 1; |
| } |
| |
| ////////////////////////////////////////////////////////////////////////////// |
| |
| /* Luma Horizontal Edge */ |
| pu1_src = ps_deblk->pu1_ctb_y; |
| col_size = ctb_size / 4; |
| |
| /* If the ctb is the 1st ctb of row, */ |
| /* Decrement the loop count to exclude filtering of last 4 pixels */ |
| /* else shift the src pointer by 4 pixels to do filtering for shifted ctb */ |
| if(ps_deblk->i4_deblock_left_ctb_edge == 1) |
| { |
| pu1_src -= 4; |
| /*If the ctb is at the horizonatl end of PIC*/ |
| /* Increase the column size to filter last 4 pixels */ |
| col_size += last_col; |
| } |
| else if(!last_col) |
| { |
| col_size -= 1; |
| } |
| { |
| UWORD8 *pu1_src_temp = pu1_src; |
| //pu1_ctb_row_qp_p and pu1_ctb_row_qp_q point to alternate rows |
| pi1_ctb_row_qp_p = ps_deblk_ctb_row_params->pi1_ctb_row_qp; |
| |
| num_rows_for_horz_filt = ctb_size / 8; |
| |
| for(i4_edge_count = 0; i4_edge_count < num_rows_for_horz_filt; i4_edge_count++) |
| { |
| WORD32 col_size_temp = col_size; |
| pi1_ctb_row_qp_q = pi1_ctb_row_qp_p + u4_qp_buffer_stride; |
| pu1_src = pu1_src_temp + (i4_edge_count * 8 * ps_deblk->i4_luma_pic_stride); |
| |
| if(1 == top_luma_edge_filter_flag) |
| { |
| //Deblock the last vertical_4x4_column of previous CTB |
| if(ps_deblk->i4_deblock_left_ctb_edge == 1) |
| { |
| u4_bs = ps_deblk->au1_prev_bs[i4_edge_count] & 0x3; |
| if(u4_bs != 0) |
| { |
| qp_p = *(pi1_ctb_row_qp_p - 1); |
| qp_q = *(pi1_ctb_row_qp_q - 1); |
| |
| ps_func_slector->ihevc_deblk_luma_horz_fptr( |
| pu1_src, |
| ps_deblk->i4_luma_pic_stride, |
| u4_bs, |
| qp_p, |
| qp_q, |
| ps_deblk->i4_beta_offset_div2, |
| ps_deblk->i4_tc_offset_div2, |
| 1, |
| 1); |
| } |
| |
| pu1_src += 4; |
| col_size_temp--; |
| } |
| //Start deblocking current CTB |
| u4_bs = *(bs_horz); |
| |
| for(col = 0; col < col_size_temp;) |
| { |
| bs_lz = CLZ(u4_bs) >> 1; |
| if(0 != bs_lz) |
| { |
| u4_bs = u4_bs << (bs_lz << 1); |
| pu1_src += 4 * bs_lz; |
| col += bs_lz; |
| continue; |
| } |
| qp_p = *(pi1_ctb_row_qp_p + col); |
| qp_q = *(pi1_ctb_row_qp_q + col); |
| |
| ps_func_slector->ihevc_deblk_luma_horz_fptr( |
| pu1_src, |
| ps_deblk->i4_luma_pic_stride, |
| u4_bs >> (sizeof(u4_bs) * 8 - 2), |
| qp_p, |
| qp_q, |
| ps_deblk->i4_beta_offset_div2, |
| ps_deblk->i4_tc_offset_div2, |
| filter_p, |
| filter_q); |
| |
| pu1_src += 4; |
| u4_bs = u4_bs << 2; |
| col++; |
| } |
| //Store the last vertical_4x4 column of CTB's info for next CTB deblocking |
| u4_bs = *bs_horz; |
| ps_deblk->au1_prev_bs[i4_edge_count] = |
| (UWORD8)(((u4_bs << ((ctb_size >> 1) - 2))) >> 30); |
| } |
| bs_horz += 1; |
| pi1_ctb_row_qp_p += (u4_qp_buffer_stride << 1); |
| top_luma_edge_filter_flag = 1; |
| } |
| } |
| |
| ////////////////////////////////////////////////////////////////////////////// |
| /* Chroma Horizontal Edge */ |
| pu1_src_uv = ps_deblk->pu1_ctb_uv; |
| col_size = ctb_size / 8; |
| |
| /* If the ctb is the 1st ctb of row, */ |
| /* Decrement the loop count to exclude filtering of last 4 pixels */ |
| /* else shift the src pointer by 8 (uv) pixels to do filtering for shifted ctb */ |
| if(ps_deblk->i4_deblock_left_ctb_edge == 1) |
| { |
| pu1_src_uv -= 8; |
| |
| /*If the ctb is at the horizonatl end of PIC*/ |
| /* Increase the column size to filter last 8 (uv) pixels */ |
| col_size += last_col; |
| } |
| else if(!last_col) |
| { |
| col_size--; |
| } |
| |
| { |
| UWORD8 *pu1_src_temp = pu1_src_uv; |
| |
| //pu1_ctb_row_qp_p and pu1_ctb_row_qp_q point to alternate rows |
| pi1_ctb_row_qp_p = ps_deblk_ctb_row_params->pi1_ctb_row_qp; |
| num_rows_for_horz_filt = ctb_size / ((0 == u1_is_422) ? 16 : 8); |
| |
| for(i4_edge_count = 0; i4_edge_count < num_rows_for_horz_filt; i4_edge_count++) |
| { |
| WORD32 col_size_temp = col_size; |
| |
| pi1_ctb_row_qp_q = pi1_ctb_row_qp_p + u4_qp_buffer_stride; |
| pu1_src_uv = pu1_src_temp + (i4_edge_count * 8 * ps_deblk->i4_chroma_pic_stride); |
| |
| if(1 == top_chroma_edge_filter_flag) |
| { |
| //Deblock the last vertical _4x4_column of previous CTB |
| if(ps_deblk->i4_deblock_left_ctb_edge == 1) |
| { |
| u4_bs = ps_deblk->au1_prev_bs_uv[i4_edge_count] & 0x2; |
| |
| if(u4_bs == 2) |
| { |
| qp_p = *(pi1_ctb_row_qp_p - 1); |
| qp_q = *(pi1_ctb_row_qp_q - 1); |
| |
| pf_deblk_chroma_horz( |
| pu1_src_uv, |
| ps_deblk->i4_chroma_pic_stride, |
| qp_p, |
| qp_q, |
| ps_deblk->i4_cb_qp_indx_offset, |
| ps_deblk->i4_cr_qp_indx_offset, |
| ps_deblk->i4_tc_offset_div2, |
| 1, |
| 1); |
| } |
| |
| pu1_src_uv += 8; |
| col_size_temp--; |
| } |
| |
| //Start deblocking current CTB |
| u4_bs = *(bs_horz_uv)&0x88888888; |
| |
| for(col = 0; col < col_size_temp;) |
| { |
| bs_lz = CLZ(u4_bs) >> 2; |
| |
| if(0 != bs_lz) |
| { |
| u4_bs = u4_bs << (bs_lz << 2); |
| pu1_src_uv += (8 * bs_lz); |
| |
| col += bs_lz; |
| continue; |
| } |
| |
| qp_p = *(pi1_ctb_row_qp_p + (col << 1)); |
| qp_q = *(pi1_ctb_row_qp_q + (col << 1)); |
| |
| pf_deblk_chroma_horz( |
| pu1_src_uv, |
| ps_deblk->i4_chroma_pic_stride, |
| qp_p, |
| qp_q, |
| ps_deblk->i4_cb_qp_indx_offset, |
| ps_deblk->i4_cr_qp_indx_offset, |
| ps_deblk->i4_tc_offset_div2, |
| filter_p, |
| filter_q); |
| |
| pu1_src_uv += 8; |
| u4_bs = u4_bs << 4; |
| col++; |
| } |
| |
| //Store the last vertical_4x4 column of CTB's info for next CTB deblocking |
| u4_bs = *bs_horz_uv; |
| ps_deblk->au1_prev_bs_uv[i4_edge_count] = |
| (UWORD8)(((u4_bs << ((ctb_size >> 1) - 4))) >> 30); |
| } |
| |
| bs_horz_uv += ((0 == u1_is_422) + 1); |
| pi1_ctb_row_qp_p += (u4_qp_buffer_stride << ((0 == u1_is_422) + 1)); |
| top_chroma_edge_filter_flag = 1; |
| } |
| } |
| |
| return; |
| } |