| /****************************************************************************** |
| * |
| * Copyright (C) 2015 The Android Open Source Project |
| * |
| * Licensed under the Apache License, Version 2.0 (the "License"); |
| * you may not use this file except in compliance with the License. |
| * You may obtain a copy of the License at: |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| * |
| ***************************************************************************** |
| * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore |
| */ |
| /** |
| ******************************************************************************* |
| * @file |
| * ih264e_me.c |
| * |
| * @brief |
| * |
| * |
| * @author |
| * Ittiam |
| * |
| * @par List of Functions: |
| * - |
| * |
| * @remarks |
| * None |
| * |
| ******************************************************************************* |
| */ |
| |
| /*****************************************************************************/ |
| /* File Includes */ |
| /*****************************************************************************/ |
| |
| /* System include files */ |
| #include <stdio.h> |
| #include <assert.h> |
| #include <limits.h> |
| #include <string.h> |
| |
| /* User include files */ |
| #include "ime_typedefs.h" |
| #include "ime_distortion_metrics.h" |
| #include "ime_defs.h" |
| #include "ime_structs.h" |
| #include "ime.h" |
| #include "ime_macros.h" |
| #include "ime_statistics.h" |
| |
| /** |
| ******************************************************************************* |
| * |
| * @brief Diamond Search |
| * |
| * @par Description: |
| * This function computes the sad at vertices of several layers of diamond grid |
| * at a time. The number of layers of diamond grid that would be evaluated is |
| * configurable.The function computes the sad at vertices of a diamond grid. If |
| * the sad at the center of the diamond grid is lesser than the sad at any other |
| * point of the diamond grid, the function marks the candidate Mb partition as |
| * mv. |
| * |
| * @param[in] ps_mb_part |
| * pointer to current mb partition ctxt with respect to ME |
| * |
| * @param[in] ps_me_ctxt |
| * pointer to me context |
| * |
| * @param[in] u4_lambda_motion |
| * lambda motion |
| * |
| * @param[in] u4_enable_fast_sad |
| * enable/disable fast sad computation |
| * |
| * @returns mv pair & corresponding distortion and cost |
| * |
| * @remarks Diamond Srch, radius is 1 |
| * |
| ******************************************************************************* |
| */ |
| void ime_diamond_search_16x16(me_ctxt_t *ps_me_ctxt, WORD32 i4_reflist) |
| { |
| /* MB partition info */ |
| mb_part_ctxt *ps_mb_part = &ps_me_ctxt->as_mb_part[i4_reflist]; |
| |
| /* lagrange parameter */ |
| UWORD32 u4_lambda_motion = ps_me_ctxt->u4_lambda_motion; |
| |
| /* srch range*/ |
| WORD32 i4_srch_range_n = ps_me_ctxt->i4_srch_range_n; |
| WORD32 i4_srch_range_s = ps_me_ctxt->i4_srch_range_s; |
| WORD32 i4_srch_range_e = ps_me_ctxt->i4_srch_range_e; |
| WORD32 i4_srch_range_w = ps_me_ctxt->i4_srch_range_w; |
| |
| /* enabled fast sad computation */ |
| // UWORD32 u4_enable_fast_sad = ps_me_ctxt->u4_enable_fast_sad; |
| |
| /* pointer to src macro block */ |
| UWORD8 *pu1_curr_mb = ps_me_ctxt->pu1_src_buf_luma; |
| UWORD8 *pu1_ref_mb = ps_me_ctxt->apu1_ref_buf_luma[i4_reflist]; |
| |
| /* strides */ |
| WORD32 i4_src_strd = ps_me_ctxt->i4_src_strd; |
| WORD32 i4_ref_strd = ps_me_ctxt->i4_rec_strd; |
| |
| /* least cost */ |
| WORD32 i4_cost_least = ps_mb_part->i4_mb_cost; |
| |
| /* least sad */ |
| WORD32 i4_distortion_least = ps_mb_part->i4_mb_distortion; |
| |
| /* mv pair */ |
| WORD16 i2_mvx, i2_mvy; |
| |
| /* mv bits */ |
| UWORD8 *pu1_mv_bits = ps_me_ctxt->pu1_mv_bits; |
| |
| /* temp var */ |
| WORD32 i4_cost[4]; |
| WORD32 i4_sad[4]; |
| UWORD8 *pu1_ref; |
| WORD16 i2_mv_u_x, i2_mv_u_y; |
| |
| /* Diamond search Iteration Max Cnt */ |
| UWORD32 u4_num_layers = ps_me_ctxt->u4_num_layers; |
| |
| /* temp var */ |
| // UWORD8 u1_prev_jump = NONE; |
| // UWORD8 u1_curr_jump = NONE; |
| // UWORD8 u1_next_jump; |
| // WORD32 mask_arr[5] = {15, 13, 14, 7, 11}; |
| // WORD32 mask; |
| // UWORD8 *apu1_ref[4]; |
| // WORD32 i, cnt; |
| // WORD32 dia[4][2] = {{-1, 0}, {1, 0}, {0, -1}, {0, 1}}; |
| |
| /* mv with best sad during initial evaluation */ |
| i2_mvx = ps_mb_part->s_mv_curr.i2_mvx; |
| i2_mvy = ps_mb_part->s_mv_curr.i2_mvy; |
| |
| i2_mv_u_x = i2_mvx; |
| i2_mv_u_y = i2_mvy; |
| |
| while (u4_num_layers--) |
| { |
| /* FIXME : is this the write way to check for out of bounds ? */ |
| if ( (i2_mvx - 1 < i4_srch_range_w) || |
| (i2_mvx + 1 > i4_srch_range_e) || |
| (i2_mvy - 1 < i4_srch_range_n) || |
| (i2_mvy + 1 > i4_srch_range_s) ) |
| { |
| break; |
| } |
| |
| pu1_ref = pu1_ref_mb + i2_mvx + (i2_mvy * i4_ref_strd); |
| |
| ps_me_ctxt->pf_ime_compute_sad4_diamond(pu1_ref, |
| pu1_curr_mb, |
| i4_ref_strd, |
| i4_src_strd, |
| i4_sad); |
| |
| DEBUG_SAD_HISTOGRAM_ADD(i4_sad[0], 2); |
| DEBUG_SAD_HISTOGRAM_ADD(i4_sad[1], 2); |
| DEBUG_SAD_HISTOGRAM_ADD(i4_sad[2], 2); |
| DEBUG_SAD_HISTOGRAM_ADD(i4_sad[3], 2); |
| |
| /* compute cost */ |
| i4_cost[0] = i4_sad[0] + u4_lambda_motion * ( pu1_mv_bits[ ((i2_mvx - 1) << 2) - ps_mb_part->s_mv_pred.i2_mvx] |
| + pu1_mv_bits[(i2_mvy << 2) - ps_mb_part->s_mv_pred.i2_mvy] ); |
| i4_cost[1] = i4_sad[1] + u4_lambda_motion * ( pu1_mv_bits[ ((i2_mvx + 1) << 2) - ps_mb_part->s_mv_pred.i2_mvx] |
| + pu1_mv_bits[(i2_mvy << 2) - ps_mb_part->s_mv_pred.i2_mvy] ); |
| i4_cost[2] = i4_sad[2] + u4_lambda_motion * ( pu1_mv_bits[ (i2_mvx << 2) - ps_mb_part->s_mv_pred.i2_mvx] |
| + pu1_mv_bits[((i2_mvy - 1) << 2) - ps_mb_part->s_mv_pred.i2_mvy] ); |
| i4_cost[3] = i4_sad[3] + u4_lambda_motion * ( pu1_mv_bits[ (i2_mvx << 2) - ps_mb_part->s_mv_pred.i2_mvx] |
| + pu1_mv_bits[((i2_mvy + 1) << 2) - ps_mb_part->s_mv_pred.i2_mvy] ); |
| |
| |
| if (i4_cost_least > i4_cost[0]) |
| { |
| i4_cost_least = i4_cost[0]; |
| i4_distortion_least = i4_sad[0]; |
| |
| i2_mv_u_x = (i2_mvx - 1); |
| i2_mv_u_y = i2_mvy; |
| } |
| |
| if (i4_cost_least > i4_cost[1]) |
| { |
| i4_cost_least = i4_cost[1]; |
| i4_distortion_least = i4_sad[1]; |
| |
| i2_mv_u_x = (i2_mvx + 1); |
| i2_mv_u_y = i2_mvy; |
| } |
| |
| if (i4_cost_least > i4_cost[2]) |
| { |
| i4_cost_least = i4_cost[2]; |
| i4_distortion_least = i4_sad[2]; |
| |
| i2_mv_u_x = i2_mvx; |
| i2_mv_u_y = i2_mvy - 1; |
| } |
| |
| if (i4_cost_least > i4_cost[3]) |
| { |
| i4_cost_least = i4_cost[3]; |
| i4_distortion_least = i4_sad[3]; |
| |
| i2_mv_u_x = i2_mvx; |
| i2_mv_u_y = i2_mvy + 1; |
| } |
| |
| if( (i2_mv_u_x == i2_mvx) && (i2_mv_u_y == i2_mvy)) |
| { |
| ps_mb_part->u4_exit = 1; |
| break; |
| } |
| else |
| { |
| i2_mvx = i2_mv_u_x; |
| i2_mvy = i2_mv_u_y; |
| } |
| |
| |
| } |
| |
| if (i4_cost_least < ps_mb_part->i4_mb_cost) |
| { |
| ps_mb_part->i4_mb_cost = i4_cost_least; |
| ps_mb_part->i4_mb_distortion = i4_distortion_least; |
| ps_mb_part->s_mv_curr.i2_mvx = i2_mvx; |
| ps_mb_part->s_mv_curr.i2_mvy = i2_mvy; |
| } |
| |
| } |
| |
| |
| /** |
| ******************************************************************************* |
| * |
| * @brief This function computes the best motion vector among the tentative mv |
| * candidates chosen. |
| * |
| * @par Description: |
| * This function determines the position in the search window at which the motion |
| * estimation should begin in order to minimise the number of search iterations. |
| * |
| * @param[in] ps_mb_part |
| * pointer to current mb partition ctxt with respect to ME |
| * |
| * @param[in] u4_lambda_motion |
| * lambda motion |
| * |
| * @param[in] u4_fast_flag |
| * enable/disable fast sad computation |
| * |
| * @returns mv pair & corresponding distortion and cost |
| * |
| * @remarks none |
| * |
| ******************************************************************************* |
| */ |
| |
| void ime_evaluate_init_srchposn_16x16 |
| ( |
| me_ctxt_t *ps_me_ctxt, |
| WORD32 i4_reflist |
| ) |
| { |
| UWORD32 u4_lambda_motion = ps_me_ctxt->u4_lambda_motion; |
| |
| /* candidate mv cnt */ |
| UWORD32 u4_num_candidates = ps_me_ctxt->u4_num_candidates[i4_reflist]; |
| |
| /* list of candidate mvs */ |
| ime_mv_t *ps_mv_list = ps_me_ctxt->as_mv_init_search[i4_reflist]; |
| |
| /* pointer to src macro block */ |
| UWORD8 *pu1_curr_mb = ps_me_ctxt->pu1_src_buf_luma; |
| UWORD8 *pu1_ref_mb = ps_me_ctxt->apu1_ref_buf_luma[i4_reflist]; |
| |
| /* strides */ |
| WORD32 i4_src_strd = ps_me_ctxt->i4_src_strd; |
| WORD32 i4_ref_strd = ps_me_ctxt->i4_rec_strd; |
| |
| /* enabled fast sad computation */ |
| UWORD32 u4_enable_fast_sad = ps_me_ctxt->u4_enable_fast_sad; |
| |
| /* SAD(distortion metric) of an 8x8 block */ |
| WORD32 i4_mb_distortion; |
| |
| /* cost = distortion + u4_lambda_motion * rate */ |
| WORD32 i4_mb_cost, i4_mb_cost_least = INT_MAX, i4_distortion_least = INT_MAX; |
| |
| /* mb partitions info */ |
| mb_part_ctxt *ps_mb_part = &(ps_me_ctxt->as_mb_part[i4_reflist]); |
| |
| /* mv bits */ |
| UWORD8 *pu1_mv_bits = ps_me_ctxt->pu1_mv_bits; |
| |
| /* temp var */ |
| UWORD32 i, j; |
| WORD32 i4_srch_pos_idx = 0; |
| UWORD8 *pu1_ref = NULL; |
| |
| /* Carry out a search using each of the motion vector pairs identified above as predictors. */ |
| /* TODO : Just like Skip, Do we need to add any bias to zero mv as well */ |
| for(i = 0; i < u4_num_candidates; i++) |
| { |
| /* compute sad */ |
| WORD32 c_sad = 1; |
| |
| for(j = 0; j < i; j++ ) |
| { |
| if ( (ps_mv_list[i].i2_mvx == ps_mv_list[j].i2_mvx) && |
| (ps_mv_list[i].i2_mvy == ps_mv_list[j].i2_mvy) ) |
| { |
| c_sad = 0; |
| break; |
| } |
| } |
| if(c_sad) |
| { |
| /* adjust ref pointer */ |
| pu1_ref = pu1_ref_mb + ps_mv_list[i].i2_mvx + (ps_mv_list[i].i2_mvy * i4_ref_strd); |
| |
| /* compute distortion */ |
| ps_me_ctxt->pf_ime_compute_sad_16x16[u4_enable_fast_sad](pu1_curr_mb, pu1_ref, i4_src_strd, i4_ref_strd, i4_mb_cost_least, &i4_mb_distortion); |
| |
| DEBUG_SAD_HISTOGRAM_ADD(i4_mb_distortion, 3); |
| /* compute cost */ |
| i4_mb_cost = i4_mb_distortion + u4_lambda_motion * ( pu1_mv_bits[ (ps_mv_list[i].i2_mvx << 2) - ps_mb_part->s_mv_pred.i2_mvx] |
| + pu1_mv_bits[(ps_mv_list[i].i2_mvy << 2) - ps_mb_part->s_mv_pred.i2_mvy] ); |
| |
| if (i4_mb_cost < i4_mb_cost_least) |
| { |
| i4_mb_cost_least = i4_mb_cost; |
| |
| i4_distortion_least = i4_mb_distortion; |
| |
| i4_srch_pos_idx = i; |
| } |
| } |
| } |
| |
| if (i4_mb_cost_least < ps_mb_part->i4_mb_cost) |
| { |
| ps_mb_part->i4_srch_pos_idx = i4_srch_pos_idx; |
| ps_mb_part->i4_mb_cost = i4_mb_cost_least; |
| ps_mb_part->i4_mb_distortion = i4_distortion_least; |
| ps_mb_part->s_mv_curr.i2_mvx = ps_mv_list[i4_srch_pos_idx].i2_mvx; |
| ps_mb_part->s_mv_curr.i2_mvy = ps_mv_list[i4_srch_pos_idx].i2_mvy; |
| } |
| } |
| |
| /** |
| ******************************************************************************* |
| * |
| * @brief Searches for the best matching full pixel predictor within the search |
| * range |
| * |
| * @par Description: |
| * This function begins by computing the mv predict vector for the current mb. |
| * This is used for cost computations. Further basing on the algo. chosen, it |
| * looks through a set of candidate vectors that best represent the mb a least |
| * cost and returns this information. |
| * |
| * @param[in] ps_proc |
| * pointer to current proc ctxt |
| * |
| * @param[in] ps_me_ctxt |
| * pointer to me context |
| * |
| * @returns mv pair & corresponding distortion and cost |
| * |
| * @remarks none |
| * |
| ******************************************************************************* |
| */ |
| void ime_full_pel_motion_estimation_16x16 |
| ( |
| me_ctxt_t *ps_me_ctxt, |
| WORD32 i4_ref_list |
| ) |
| { |
| /* mb part info */ |
| mb_part_ctxt *ps_mb_part = &ps_me_ctxt->as_mb_part[i4_ref_list]; |
| |
| /******************************************************************/ |
| /* Modify Search range about initial candidate instead of zero mv */ |
| /******************************************************************/ |
| /* |
| * FIXME: The motion vectors in a way can become unbounded. It may so happen that |
| * MV might exceed the limit of the profile configured. |
| */ |
| ps_me_ctxt->i4_srch_range_w = MAX(ps_me_ctxt->i4_srch_range_w, |
| -ps_me_ctxt->ai2_srch_boundaries[0] + ps_mb_part->s_mv_curr.i2_mvx); |
| ps_me_ctxt->i4_srch_range_e = MIN(ps_me_ctxt->i4_srch_range_e, |
| ps_me_ctxt->ai2_srch_boundaries[0] + ps_mb_part->s_mv_curr.i2_mvx); |
| ps_me_ctxt->i4_srch_range_n = MAX(ps_me_ctxt->i4_srch_range_n, |
| -ps_me_ctxt->ai2_srch_boundaries[1] + ps_mb_part->s_mv_curr.i2_mvy); |
| ps_me_ctxt->i4_srch_range_s = MIN(ps_me_ctxt->i4_srch_range_s, |
| ps_me_ctxt->ai2_srch_boundaries[1] + ps_mb_part->s_mv_curr.i2_mvy); |
| |
| /************************************************************/ |
| /* Traverse about best initial candidate for mv */ |
| /************************************************************/ |
| |
| switch (ps_me_ctxt->u4_me_speed_preset) |
| { |
| case DMND_SRCH: |
| ime_diamond_search_16x16(ps_me_ctxt, i4_ref_list); |
| break; |
| default: |
| assert(0); |
| break; |
| } |
| } |
| |
| /** |
| ******************************************************************************* |
| * |
| * @brief Searches for the best matching sub pixel predictor within the search |
| * range |
| * |
| * @par Description: |
| * This function begins by searching across all sub pixel sample points |
| * around the full pel motion vector. The vector with least cost is chosen as |
| * the mv for the current mb. If the skip mode is not evaluated while analysing |
| * the initial search candidates then analyse it here and update the mv. |
| * |
| * @param[in] ps_proc |
| * pointer to current proc ctxt |
| * |
| * @param[in] ps_me_ctxt |
| * pointer to me context |
| * |
| * @returns none |
| * |
| * @remarks none |
| * |
| ******************************************************************************* |
| */ |
| void ime_sub_pel_motion_estimation_16x16 |
| ( |
| me_ctxt_t *ps_me_ctxt, |
| WORD32 i4_reflist |
| ) |
| { |
| /* pointers to src & ref macro block */ |
| UWORD8 *pu1_curr_mb = ps_me_ctxt->pu1_src_buf_luma; |
| |
| /* pointers to ref. half pel planes */ |
| UWORD8 *pu1_ref_mb_half_x; |
| UWORD8 *pu1_ref_mb_half_y; |
| UWORD8 *pu1_ref_mb_half_xy; |
| |
| /* pointers to ref. half pel planes */ |
| UWORD8 *pu1_ref_mb_half_x_temp; |
| UWORD8 *pu1_ref_mb_half_y_temp; |
| UWORD8 *pu1_ref_mb_half_xy_temp; |
| |
| /* strides */ |
| WORD32 i4_src_strd = ps_me_ctxt->i4_src_strd; |
| |
| WORD32 i4_ref_strd = ps_me_ctxt->u4_subpel_buf_strd; |
| |
| /* mb partitions info */ |
| mb_part_ctxt *ps_mb_part = &ps_me_ctxt->as_mb_part[i4_reflist]; |
| |
| /* SAD(distortion metric) of an mb */ |
| WORD32 i4_mb_distortion; |
| WORD32 i4_distortion_least = ps_mb_part->i4_mb_distortion; |
| |
| /* cost = distortion + u4_lambda_motion * rate */ |
| WORD32 i4_mb_cost; |
| WORD32 i4_mb_cost_least = ps_mb_part->i4_mb_cost; |
| |
| /*Best half pel buffer*/ |
| UWORD8 *pu1_best_hpel_buf = NULL; |
| |
| /* mv bits */ |
| UWORD8 *pu1_mv_bits = ps_me_ctxt->pu1_mv_bits; |
| |
| /* Motion vectors in full-pel units */ |
| WORD16 mv_x, mv_y; |
| |
| /* lambda - lagrange constant */ |
| UWORD32 u4_lambda_motion = ps_me_ctxt->u4_lambda_motion; |
| |
| /* Flags to check if half pel points needs to be evaluated */ |
| /**************************************/ |
| /* 1 bit for each half pel candidate */ |
| /* bit 0 - half x = 1, half y = 0 */ |
| /* bit 1 - half x = -1, half y = 0 */ |
| /* bit 2 - half x = 0, half y = 1 */ |
| /* bit 3 - half x = 0, half y = -1 */ |
| /* bit 4 - half x = 1, half y = 1 */ |
| /* bit 5 - half x = -1, half y = 1 */ |
| /* bit 6 - half x = 1, half y = -1 */ |
| /* bit 7 - half x = -1, half y = -1 */ |
| /**************************************/ |
| /* temp var */ |
| WORD16 i2_mv_u_x, i2_mv_u_y; |
| WORD32 i, j; |
| WORD32 ai4_sad[8]; |
| |
| WORD32 i4_srch_pos_idx = ps_mb_part->i4_srch_pos_idx; |
| |
| i2_mv_u_x = ps_mb_part->s_mv_curr.i2_mvx; |
| i2_mv_u_y = ps_mb_part->s_mv_curr.i2_mvy; |
| |
| /************************************************************/ |
| /* Evaluate half pel */ |
| /************************************************************/ |
| mv_x = ps_mb_part->s_mv_curr.i2_mvx >> 2; |
| mv_y = ps_mb_part->s_mv_curr.i2_mvy >> 2; |
| |
| |
| /**************************************************************/ |
| /* ps_me_ctxt->pu1_half_x points to the half pel pixel on the */ |
| /* left side of full pel */ |
| /* ps_me_ctxt->pu1_half_y points to the half pel pixel on the */ |
| /* top side of full pel */ |
| /* ps_me_ctxt->pu1_half_xy points to the half pel pixel */ |
| /* on the top left side of full pel */ |
| /* for the function pf_ime_sub_pel_compute_sad_16x16 the */ |
| /* default postions are */ |
| /* ps_me_ctxt->pu1_half_x = right halp_pel */ |
| /* ps_me_ctxt->pu1_half_y = bottom halp_pel */ |
| /* ps_me_ctxt->pu1_half_xy = bottom right halp_pel */ |
| /* Hence corresponding adjustments made here */ |
| /**************************************************************/ |
| |
| pu1_ref_mb_half_x_temp = pu1_ref_mb_half_x = ps_me_ctxt->apu1_subpel_buffs[0] + 1; |
| pu1_ref_mb_half_y_temp = pu1_ref_mb_half_y = ps_me_ctxt->apu1_subpel_buffs[1] + 1 + i4_ref_strd; |
| pu1_ref_mb_half_xy_temp = pu1_ref_mb_half_xy = ps_me_ctxt->apu1_subpel_buffs[2] + 1 + i4_ref_strd; |
| |
| ps_me_ctxt->pf_ime_sub_pel_compute_sad_16x16(pu1_curr_mb, pu1_ref_mb_half_x, |
| pu1_ref_mb_half_y, |
| pu1_ref_mb_half_xy, |
| i4_src_strd, i4_ref_strd, |
| ai4_sad); |
| |
| /* Half x plane */ |
| for(i = 0; i < 2; i++) |
| { |
| WORD32 mv_x_tmp = (mv_x << 2) + 2; |
| WORD32 mv_y_tmp = (mv_y << 2); |
| |
| mv_x_tmp -= (i * 4); |
| |
| i4_mb_distortion = ai4_sad[i]; |
| |
| /* compute cost */ |
| i4_mb_cost = i4_mb_distortion + u4_lambda_motion * ( pu1_mv_bits[ mv_x_tmp - ps_mb_part->s_mv_pred.i2_mvx] |
| + pu1_mv_bits[mv_y_tmp - ps_mb_part->s_mv_pred.i2_mvy] ); |
| |
| if (i4_mb_cost < i4_mb_cost_least) |
| { |
| i4_mb_cost_least = i4_mb_cost; |
| |
| i4_distortion_least = i4_mb_distortion; |
| |
| i2_mv_u_x = mv_x_tmp; |
| |
| i2_mv_u_y = mv_y_tmp; |
| |
| #ifndef HP_PL /*choosing whether left or right half_x*/ |
| ps_me_ctxt->apu1_subpel_buffs[0] = pu1_ref_mb_half_x_temp - i; |
| pu1_best_hpel_buf = pu1_ref_mb_half_x_temp - i; |
| |
| i4_srch_pos_idx = 0; |
| #endif |
| } |
| |
| } |
| |
| /* Half y plane */ |
| for(i = 0; i < 2; i++) |
| { |
| WORD32 mv_x_tmp = (mv_x << 2); |
| WORD32 mv_y_tmp = (mv_y << 2) + 2; |
| |
| mv_y_tmp -= (i * 4); |
| |
| i4_mb_distortion = ai4_sad[2 + i]; |
| |
| /* compute cost */ |
| i4_mb_cost = i4_mb_distortion + u4_lambda_motion * ( pu1_mv_bits[ mv_x_tmp - ps_mb_part->s_mv_pred.i2_mvx] |
| + pu1_mv_bits[mv_y_tmp - ps_mb_part->s_mv_pred.i2_mvy] ); |
| |
| if (i4_mb_cost < i4_mb_cost_least) |
| { |
| i4_mb_cost_least = i4_mb_cost; |
| |
| i4_distortion_least = i4_mb_distortion; |
| |
| i2_mv_u_x = mv_x_tmp; |
| |
| i2_mv_u_y = mv_y_tmp; |
| |
| #ifndef HP_PL/*choosing whether top or bottom half_y*/ |
| ps_me_ctxt->apu1_subpel_buffs[1] = pu1_ref_mb_half_y_temp - i*(i4_ref_strd); |
| pu1_best_hpel_buf = pu1_ref_mb_half_y_temp - i*(i4_ref_strd); |
| |
| i4_srch_pos_idx = 1; |
| #endif |
| } |
| |
| } |
| |
| /* Half xy plane */ |
| for(j = 0; j < 2; j++) |
| { |
| for(i = 0; i < 2; i++) |
| { |
| WORD32 mv_x_tmp = (mv_x << 2) + 2; |
| WORD32 mv_y_tmp = (mv_y << 2) + 2; |
| |
| mv_x_tmp -= (i * 4); |
| mv_y_tmp -= (j * 4); |
| |
| i4_mb_distortion = ai4_sad[4 + i + 2 * j]; |
| |
| /* compute cost */ |
| i4_mb_cost = i4_mb_distortion + u4_lambda_motion * ( pu1_mv_bits[ mv_x_tmp - ps_mb_part->s_mv_pred.i2_mvx] |
| + pu1_mv_bits[mv_y_tmp - ps_mb_part->s_mv_pred.i2_mvy] ); |
| |
| if (i4_mb_cost < i4_mb_cost_least) |
| { |
| i4_mb_cost_least = i4_mb_cost; |
| |
| i4_distortion_least = i4_mb_distortion; |
| |
| i2_mv_u_x = mv_x_tmp; |
| |
| i2_mv_u_y = mv_y_tmp; |
| |
| #ifndef HP_PL /*choosing between four half_xy */ |
| ps_me_ctxt->apu1_subpel_buffs[2] = pu1_ref_mb_half_xy_temp - j*(i4_ref_strd) - i; |
| pu1_best_hpel_buf = pu1_ref_mb_half_xy_temp - j*(i4_ref_strd) - i; |
| |
| i4_srch_pos_idx = 2; |
| #endif |
| } |
| |
| } |
| } |
| |
| if (i4_mb_cost_least < ps_mb_part->i4_mb_cost) |
| { |
| ps_mb_part->i4_mb_cost = i4_mb_cost_least; |
| ps_mb_part->i4_mb_distortion = i4_distortion_least; |
| ps_mb_part->s_mv_curr.i2_mvx = i2_mv_u_x; |
| ps_mb_part->s_mv_curr.i2_mvy = i2_mv_u_y; |
| ps_mb_part->pu1_best_hpel_buf = pu1_best_hpel_buf; |
| ps_mb_part->i4_srch_pos_idx = i4_srch_pos_idx; |
| } |
| } |
| |
| /** |
| ******************************************************************************* |
| * |
| * @brief This function computes cost of skip macroblocks |
| * |
| * @par Description: |
| * |
| * @param[in] ps_me_ctxt |
| * pointer to me ctxt |
| * |
| * |
| * @returns none |
| * |
| * @remarks |
| * NOTE: while computing the skip cost, do not enable early exit from compute |
| * sad function because, a negative bias gets added later |
| * Note tha the last ME candidate in me ctxt is taken as skip motion vector |
| * |
| ******************************************************************************* |
| */ |
| void ime_compute_skip_cost |
| ( |
| me_ctxt_t *ps_me_ctxt, |
| ime_mv_t *ps_skip_mv, |
| mb_part_ctxt *ps_smb_part_info, |
| UWORD32 u4_use_stat_sad, |
| WORD32 i4_reflist, |
| WORD32 i4_is_slice_type_b |
| ) |
| { |
| |
| /* SAD(distortion metric) of an mb */ |
| WORD32 i4_mb_distortion; |
| |
| /* cost = distortion + u4_lambda_motion * rate */ |
| WORD32 i4_mb_cost; |
| |
| /* temp var */ |
| UWORD8 *pu1_ref = NULL; |
| |
| ime_mv_t s_skip_mv; |
| |
| s_skip_mv.i2_mvx = (ps_skip_mv->i2_mvx +2)>>2; |
| s_skip_mv.i2_mvy = (ps_skip_mv->i2_mvy +2)>>2; |
| |
| /* Check if the skip mv is out of bounds or subpel */ |
| { |
| /* skip mv */ |
| ime_mv_t s_clip_skip_mv; |
| |
| s_clip_skip_mv.i2_mvx = CLIP3(ps_me_ctxt->i4_srch_range_w, ps_me_ctxt->i4_srch_range_e, s_skip_mv.i2_mvx); |
| s_clip_skip_mv.i2_mvy = CLIP3(ps_me_ctxt->i4_srch_range_n, ps_me_ctxt->i4_srch_range_s, s_skip_mv.i2_mvy); |
| |
| if ((s_clip_skip_mv.i2_mvx != s_skip_mv.i2_mvx) || |
| (s_clip_skip_mv.i2_mvy != s_skip_mv.i2_mvy) || |
| (ps_skip_mv->i2_mvx & 0x3) || |
| (ps_skip_mv->i2_mvy & 0x3)) |
| { |
| return ; |
| } |
| } |
| |
| |
| /* adjust ref pointer */ |
| pu1_ref = ps_me_ctxt->apu1_ref_buf_luma[i4_reflist] + s_skip_mv.i2_mvx |
| + (s_skip_mv.i2_mvy * ps_me_ctxt->i4_rec_strd); |
| |
| if(u4_use_stat_sad == 1) |
| { |
| UWORD32 u4_is_nonzero; |
| |
| ps_me_ctxt->pf_ime_compute_sad_stat_luma_16x16( |
| ps_me_ctxt->pu1_src_buf_luma, pu1_ref, ps_me_ctxt->i4_src_strd, |
| ps_me_ctxt->i4_rec_strd, ps_me_ctxt->pu2_sad_thrsh, |
| &i4_mb_distortion, &u4_is_nonzero); |
| |
| if (u4_is_nonzero == 0 || i4_mb_distortion <= ps_me_ctxt->i4_min_sad) |
| { |
| ps_me_ctxt->u4_min_sad_reached = 1; /* found min sad */ |
| ps_me_ctxt->i4_min_sad = (u4_is_nonzero == 0) ? 0 : i4_mb_distortion; |
| } |
| } |
| else |
| { |
| ps_me_ctxt->pf_ime_compute_sad_16x16[ps_me_ctxt->u4_enable_fast_sad]( |
| ps_me_ctxt->pu1_src_buf_luma, pu1_ref, ps_me_ctxt->i4_src_strd, |
| ps_me_ctxt->i4_rec_strd, INT_MAX, &i4_mb_distortion); |
| |
| if(i4_mb_distortion <= ps_me_ctxt->i4_min_sad) |
| { |
| ps_me_ctxt->i4_min_sad = i4_mb_distortion; |
| ps_me_ctxt->u4_min_sad_reached = 1; /* found min sad */ |
| } |
| } |
| |
| |
| /* for skip mode cost & distortion are identical |
| * But we shall add a bias to favor skip mode. |
| * Doc. JVT B118 Suggests SKIP_BIAS as 16. |
| * TODO : Empirical analysis of SKIP_BIAS is necessary */ |
| |
| i4_mb_cost = i4_mb_distortion - (ps_me_ctxt->u4_lambda_motion * (ps_me_ctxt->i4_skip_bias[0] + ps_me_ctxt->i4_skip_bias[1] * i4_is_slice_type_b)); |
| |
| if (i4_mb_cost <= ps_smb_part_info->i4_mb_cost) |
| { |
| ps_smb_part_info->i4_mb_cost = i4_mb_cost; |
| ps_smb_part_info->i4_mb_distortion = i4_mb_distortion; |
| ps_smb_part_info->s_mv_curr.i2_mvx = s_skip_mv.i2_mvx; |
| ps_smb_part_info->s_mv_curr.i2_mvy = s_skip_mv.i2_mvy; |
| } |
| } |
| |