blob: cef3beeb24fca4e6b52c3a106f6437d615d82af9 [file] [log] [blame]
Harish Mahendrakar0d8951c2014-05-16 10:31:13 -07001/******************************************************************************
2*
3* Copyright (C) 2012 Ittiam Systems Pvt Ltd, Bangalore
4*
5* Licensed under the Apache License, Version 2.0 (the "License");
6* you may not use this file except in compliance with the License.
7* You may obtain a copy of the License at:
8*
9* http://www.apache.org/licenses/LICENSE-2.0
10*
11* Unless required by applicable law or agreed to in writing, software
12* distributed under the License is distributed on an "AS IS" BASIS,
13* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14* See the License for the specific language governing permissions and
15* limitations under the License.
16*
17******************************************************************************/
18/**
19 *******************************************************************************
20 * @file
21 * ihevc_inter_pred.c
22 *
23 * @brief
24 * Calculates the prediction samples for a given cbt
25 *
26 * @author
27 * Srinivas T
28 *
29 * @par List of Functions:
30 * - ihevc_inter_pred()
31 *
32 * @remarks
33 * None
34 *
35 *******************************************************************************
36 */
37#include <stdio.h>
38#include <stddef.h>
39#include <stdlib.h>
40#include <string.h>
41#include <assert.h>
42
43#include "ihevc_typedefs.h"
44#include "iv.h"
45#include "ivd.h"
46#include "ihevcd_cxa.h"
47#include "ithread.h"
48
49#include "ihevc_defs.h"
50#include "ihevc_debug.h"
51#include "ihevc_structs.h"
52#include "ihevc_macros.h"
53#include "ihevc_platform_macros.h"
54#include "ihevc_cabac_tables.h"
55#include "ihevc_weighted_pred.h"
56
57#include "ihevc_error.h"
58#include "ihevc_common_tables.h"
59
60#include "ihevcd_trace.h"
61#include "ihevcd_defs.h"
62#include "ihevcd_function_selector.h"
63#include "ihevcd_structs.h"
64#include "ihevcd_error.h"
65#include "ihevcd_nal.h"
66#include "ihevcd_bitstream.h"
67#include "ihevcd_job_queue.h"
68#include "ihevcd_utils.h"
69
70#include "ihevc_inter_pred.h"
71#include "ihevcd_profile.h"
72
73WORD8 luma_filter[4][NTAPS_LUMA] =
74{
75 { 0, 0, 0, 64, 0, 0, 0, 0 },
76 { -1, 4, -10, 58, 17, -5, 1, 0 },
77 { -1, 4, -11, 40, 40, -11, 4, -1 },
78 { 0, 1, -5, 17, 58, -10, 4, -1 } };
79
80/* The filter uses only the first four elements in each array */
81WORD8 chroma_filter[8][NTAPS_LUMA] =
82{
83 { 0, 64, 0, 0, 0, 0, 0, 0 },
84 { -2, 58, 10, -2, 0, 0, 0, 0 },
85 { -4, 54, 16, -2, 0, 0, 0, 0 },
86 { -6, 46, 28, -4, 0, 0, 0, 0 },
87 { -4, 36, 36, -4, 0, 0, 0, 0 },
88 { -4, 28, 46, -6, 0, 0, 0, 0 },
89 { -2, 16, 54, -4, 0, 0, 0, 0 },
90 { -2, 10, 58, -2, 0, 0, 0, 0 } };
91
92/**
93*******************************************************************************
94*
95* @brief
96* Inter prediction CTB level function
97*
98* @par Description:
99* For a given CTB, Inter prediction followed by weighted prediction is
100* done for all the PUs present in the CTB
101*
102* @param[in] ps_ctb
103* Pointer to the CTB context
104*
105* @returns
106*
107* @remarks
108*
109*
110*******************************************************************************
111*/
112
113void ihevcd_inter_pred_ctb(process_ctxt_t *ps_proc)
114{
115 UWORD8 *ref_pic_luma_l0, *ref_pic_chroma_l0;
116 UWORD8 *ref_pic_luma_l1, *ref_pic_chroma_l1;
117
118 UWORD8 *ref_pic_l0 = NULL, *ref_pic_l1 = NULL;
119
120 slice_header_t *ps_slice_hdr;
121 sps_t *ps_sps;
122 pps_t *ps_pps;
123 pu_t *ps_pu;
124 codec_t *ps_codec;
125 WORD32 pu_indx;
126 WORD32 pu_x, pu_y;
127 WORD32 pu_wd, pu_ht;
128 WORD32 i4_pu_cnt;
129 WORD32 cur_ctb_idx;
130
131 WORD32 clr_indx;
132 WORD32 ntaps;
133
134
135
136 WORD32 ai2_xint[2] = { 0, 0 }, ai2_yint[2] = { 0, 0 };
137 WORD32 ai2_xfrac[2] = { 0, 0 }, ai2_yfrac[2] = { 0, 0 };
138
139 WORD32 weighted_pred, bi_pred;
140
141 WORD32 ref_strd;
142 UWORD8 *pu1_dst_luma, *pu1_dst_chroma;
143
144 UWORD8 *pu1_dst;
145
146 WORD16 *pi2_tmp1, *pi2_tmp2;
147
148 WORD32 luma_weight_l0, luma_weight_l1;
149 WORD32 chroma_weight_l0_cb, chroma_weight_l1_cb, chroma_weight_l0_cr, chroma_weight_l1_cr;
150 WORD32 luma_offset_l0, luma_offset_l1;
151 WORD32 chroma_offset_l0_cb, chroma_offset_l1_cb, chroma_offset_l0_cr, chroma_offset_l1_cr;
152 WORD32 shift, lvl_shift1, lvl_shift2;
153
154 pf_inter_pred func_ptr1, func_ptr2, func_ptr3, func_ptr4;
155 WORD32 func_indx1, func_indx2, func_indx3, func_indx4;
156 void *func_src;
157 void *func_dst;
158 WORD32 func_src_strd;
159 WORD32 func_dst_strd;
160 WORD8 *func_coeff;
161 WORD32 func_wd;
162 WORD32 func_ht;
163 WORD32 next_ctb_idx;
164 WORD8(*coeff)[8];
165 WORD32 chroma_yuv420sp_vu;
166
167 PROFILE_DISABLE_INTER_PRED();
168 ps_codec = ps_proc->ps_codec;
169 ps_slice_hdr = ps_proc->ps_slice_hdr;
170 ps_pps = ps_proc->ps_pps;
171 ps_sps = ps_proc->ps_sps;
172 cur_ctb_idx = ps_proc->i4_ctb_x
173 + ps_proc->i4_ctb_y * (ps_sps->i2_pic_wd_in_ctb);
174 /*
175 * In case of tiles, the next ctb belonging to the same tile must be used to get the PU index
176 */
177
178 next_ctb_idx = ps_proc->i4_next_pu_ctb_cnt;
179 i4_pu_cnt = ps_proc->pu4_pic_pu_idx[next_ctb_idx] - ps_proc->pu4_pic_pu_idx[cur_ctb_idx];
180
181 ps_pu = ps_proc->ps_pu;
182 ref_strd = ps_codec->i4_strd;
183 pi2_tmp1 = ps_proc->pi2_inter_pred_tmp_buf1;
184 pi2_tmp2 = ps_proc->pi2_inter_pred_tmp_buf2;
185 pu1_dst_luma = ps_proc->pu1_cur_pic_luma;
186 pu1_dst_chroma = ps_proc->pu1_cur_pic_chroma;
187
188 chroma_yuv420sp_vu = (ps_codec->e_ref_chroma_fmt == IV_YUV_420SP_VU);
189
190 ASSERT(PSLICE == ps_slice_hdr->i1_slice_type || BSLICE == ps_slice_hdr->i1_slice_type);
191
192 ref_pic_luma_l0 = NULL;
193 ref_pic_chroma_l0 = NULL;
194
195 luma_weight_l0 = 0;
196 chroma_weight_l0_cb = 0;
197 chroma_weight_l0_cr = 0;
198
199 luma_offset_l0 = 0;
200 chroma_offset_l0_cb = 0;
201 chroma_offset_l0_cr = 0;
202
203 ref_pic_luma_l1 = NULL;
204 ref_pic_chroma_l1 = NULL;
205
206 luma_weight_l1 = 0;
207 chroma_weight_l1_cb = 0;
208 chroma_weight_l1_cr = 0;
209
210 luma_offset_l1 = 0;
211 chroma_offset_l1_cb = 0;
212 chroma_offset_l1_cr = 0;
213
214 for(pu_indx = 0; pu_indx < i4_pu_cnt; pu_indx++, ps_pu++)
215 {
216 /* If the PU is intra then proceed to the next */
217 if(1 == ps_pu->b1_intra_flag)
218 continue;
219 pu_x = (ps_proc->i4_ctb_x << ps_sps->i1_log2_ctb_size) + (ps_pu->b4_pos_x << 2);
220 pu_y = (ps_proc->i4_ctb_y << ps_sps->i1_log2_ctb_size) + (ps_pu->b4_pos_y << 2);
221
222 pu_wd = (ps_pu->b4_wd + 1) << 2;
223 pu_ht = (ps_pu->b4_ht + 1) << 2;
224
225 weighted_pred = (ps_slice_hdr->i1_slice_type == PSLICE) ? ps_pps->i1_weighted_pred_flag :
226 ps_pps->i1_weighted_bipred_flag;
227 bi_pred = (ps_pu->b2_pred_mode == PRED_BI);
228
229#ifdef GPU_BUILD
230 if(ps_proc->u4_gpu_inter_flag == 1)
231 {
232 /* Only 16x16 PUs have been implemented on opencl device */
233 if((pu_wd % 16 == 0) && (pu_ht % 16 == 0) && (weighted_pred == 0))
234 {
235 //printf("Skipping Inter\n");
236 continue;
237 }
238 }
239#endif
240 if(ps_pu->b2_pred_mode != PRED_L1)
241 {
242 pic_buf_t *ps_pic_buf_l0;
243
244 ps_pic_buf_l0 = (pic_buf_t *)((ps_slice_hdr->as_ref_pic_list0[ps_pu->mv.i1_l0_ref_idx].pv_pic_buf));
245
246 ref_pic_luma_l0 = ps_pic_buf_l0->pu1_luma;
247 ref_pic_chroma_l0 = ps_pic_buf_l0->pu1_chroma;
248
249 luma_weight_l0 = ps_slice_hdr->s_wt_ofst.i2_luma_weight_l0[ps_pu->mv.i1_l0_ref_idx];
250 chroma_weight_l0_cb = ps_slice_hdr->s_wt_ofst.i2_chroma_weight_l0_cb[ps_pu->mv.i1_l0_ref_idx];
251 chroma_weight_l0_cr = ps_slice_hdr->s_wt_ofst.i2_chroma_weight_l0_cr[ps_pu->mv.i1_l0_ref_idx];
252
253 luma_offset_l0 = ps_slice_hdr->s_wt_ofst.i2_luma_offset_l0[ps_pu->mv.i1_l0_ref_idx];
254 chroma_offset_l0_cb = ps_slice_hdr->s_wt_ofst.i2_chroma_offset_l0_cb[ps_pu->mv.i1_l0_ref_idx];
255 chroma_offset_l0_cr = ps_slice_hdr->s_wt_ofst.i2_chroma_offset_l0_cr[ps_pu->mv.i1_l0_ref_idx];
256 }
257
258 if(ps_pu->b2_pred_mode != PRED_L0)
259 {
260 pic_buf_t *ps_pic_buf_l1;
261 ps_pic_buf_l1 = (pic_buf_t *)((ps_slice_hdr->as_ref_pic_list1[ps_pu->mv.i1_l1_ref_idx].pv_pic_buf));
262 ref_pic_luma_l1 = ps_pic_buf_l1->pu1_luma;
263 ref_pic_chroma_l1 = ps_pic_buf_l1->pu1_chroma;
264
265 luma_weight_l1 = ps_slice_hdr->s_wt_ofst.i2_luma_weight_l1[ps_pu->mv.i1_l1_ref_idx];
266 chroma_weight_l1_cb = ps_slice_hdr->s_wt_ofst.i2_chroma_weight_l1_cb[ps_pu->mv.i1_l1_ref_idx];
267 chroma_weight_l1_cr = ps_slice_hdr->s_wt_ofst.i2_chroma_weight_l1_cr[ps_pu->mv.i1_l1_ref_idx];
268
269 luma_offset_l1 = ps_slice_hdr->s_wt_ofst.i2_luma_offset_l1[ps_pu->mv.i1_l1_ref_idx];
270 chroma_offset_l1_cb = ps_slice_hdr->s_wt_ofst.i2_chroma_offset_l1_cb[ps_pu->mv.i1_l1_ref_idx];
271 chroma_offset_l1_cr = ps_slice_hdr->s_wt_ofst.i2_chroma_offset_l1_cr[ps_pu->mv.i1_l1_ref_idx];
272 }
273
274 /*luma and chroma components*/
275 for(clr_indx = 0; clr_indx < 2; clr_indx++)
276 {
277 PROFILE_DISABLE_INTER_PRED_LUMA(clr_indx);
278 PROFILE_DISABLE_INTER_PRED_CHROMA(clr_indx);
279
280 if(clr_indx == 0)
281 {
282 WORD32 mv;
283 if(ps_pu->b2_pred_mode != PRED_L1)
284 {
285 mv = CLIP3(ps_pu->mv.s_l0_mv.i2_mvx, (-((MAX_CTB_SIZE + pu_x + 7) << 2)), ((ps_sps->i2_pic_width_in_luma_samples - pu_x + 7) << 2));
286 ai2_xint[0] = pu_x + (mv >> 2);
287 ai2_xfrac[0] = mv & 3;
288
289 mv = CLIP3(ps_pu->mv.s_l0_mv.i2_mvy, (-((MAX_CTB_SIZE + pu_y + 7) << 2)), ((ps_sps->i2_pic_height_in_luma_samples - pu_y + 7) << 2));
290 ai2_yint[0] = pu_y + (mv >> 2);
291 ai2_yfrac[0] = mv & 3;
292
293 ai2_xfrac[0] &= ps_codec->i4_mv_frac_mask;
294 ai2_yfrac[0] &= ps_codec->i4_mv_frac_mask;
295
296
297 ref_pic_l0 = ref_pic_luma_l0 + ai2_yint[0] * ref_strd
298 + ai2_xint[0];
299 }
300
301 if(ps_pu->b2_pred_mode != PRED_L0)
302 {
303
304 mv = CLIP3(ps_pu->mv.s_l1_mv.i2_mvx, (-((MAX_CTB_SIZE + pu_x + 7) << 2)), ((ps_sps->i2_pic_width_in_luma_samples - pu_x + 7) << 2));
305 ai2_xint[1] = pu_x + (mv >> 2);
306 ai2_xfrac[1] = mv & 3;
307
308 mv = CLIP3(ps_pu->mv.s_l1_mv.i2_mvy, (-((MAX_CTB_SIZE + pu_y + 7) << 2)), ((ps_sps->i2_pic_height_in_luma_samples - pu_y + 7) << 2));
309 ai2_yint[1] = pu_y + (mv >> 2);
310 ai2_yfrac[1] = mv & 3;
311
312 ref_pic_l1 = ref_pic_luma_l1 + ai2_yint[1] * ref_strd
313 + ai2_xint[1];
314 ai2_xfrac[1] &= ps_codec->i4_mv_frac_mask;
315 ai2_yfrac[1] &= ps_codec->i4_mv_frac_mask;
316
317 }
318
319 pu1_dst = pu1_dst_luma + pu_y * ref_strd + pu_x;
320
321 ntaps = NTAPS_LUMA;
322 coeff = luma_filter;
323 }
324
325 else
326 {
327 WORD32 mv;
328 /* xint is upshifted by 1 because the chroma components are */
329 /* interleaved which is not the assumption made by standard */
330 if(ps_pu->b2_pred_mode != PRED_L1)
331 {
332 mv = CLIP3(ps_pu->mv.s_l0_mv.i2_mvx, (-((MAX_CTB_SIZE + pu_x + 7) << 2)), ((ps_sps->i2_pic_width_in_luma_samples - pu_x + 7) << 2));
333 ai2_xint[0] = (pu_x / 2 + (mv >> 3)) << 1;
334 ai2_xfrac[0] = mv & 7;
335
336 mv = CLIP3(ps_pu->mv.s_l0_mv.i2_mvy, (-((MAX_CTB_SIZE + pu_y + 7) << 2)), ((ps_sps->i2_pic_height_in_luma_samples - pu_y + 7) << 2));
337 ai2_yint[0] = pu_y / 2 + (mv >> 3);
338 ai2_yfrac[0] = mv & 7;
339
340 ref_pic_l0 = ref_pic_chroma_l0 + ai2_yint[0] * ref_strd
341 + ai2_xint[0];
342
343 ai2_xfrac[0] &= ps_codec->i4_mv_frac_mask;
344 ai2_yfrac[0] &= ps_codec->i4_mv_frac_mask;
345
346 }
347
348 if(ps_pu->b2_pred_mode != PRED_L0)
349 {
350 mv = CLIP3(ps_pu->mv.s_l1_mv.i2_mvx, (-((MAX_CTB_SIZE + pu_x + 7) << 2)), ((ps_sps->i2_pic_width_in_luma_samples - pu_x + 7) << 2));
351 ai2_xint[1] = (pu_x / 2 + (mv >> 3)) << 1;
352 ai2_xfrac[1] = mv & 7;
353
354 mv = CLIP3(ps_pu->mv.s_l1_mv.i2_mvy, (-((MAX_CTB_SIZE + pu_y + 7) << 2)), ((ps_sps->i2_pic_height_in_luma_samples - pu_y + 7) << 2));
355 ai2_yint[1] = pu_y / 2 + (mv >> 3);
356 ai2_yfrac[1] = mv & 7;
357
358 ref_pic_l1 = ref_pic_chroma_l1 + ai2_yint[1] * ref_strd
359 + ai2_xint[1];
360 ai2_xfrac[1] &= ps_codec->i4_mv_frac_mask;
361 ai2_yfrac[1] &= ps_codec->i4_mv_frac_mask;
362
363 }
364
365 pu1_dst = pu1_dst_chroma + pu_y * ref_strd / 2 + pu_x;
366
367 ntaps = NTAPS_CHROMA;
368 coeff = chroma_filter;
369 }
370
371 if(ps_pu->b2_pred_mode != PRED_L1)
372 {
373 func_indx1 = 4 * (weighted_pred || bi_pred) + 1 + 11 * clr_indx;
374 func_indx1 += ai2_xfrac[0] ? 2 : 0;
375 func_indx1 += ai2_yfrac[0] ? 1 : 0;
376
377 func_indx2 = (ai2_xfrac[0] && ai2_yfrac[0])
378 * (9 + (weighted_pred || bi_pred)) + 11 * clr_indx;
379
380 func_ptr1 = ps_codec->apf_inter_pred[func_indx1];
381 func_ptr2 = ps_codec->apf_inter_pred[func_indx2];
382 }
383 else
384 {
385 func_ptr1 = NULL;
386 func_ptr2 = NULL;
387 }
388 if(ps_pu->b2_pred_mode != PRED_L0)
389 {
390 func_indx3 = 4 * (weighted_pred || bi_pred) + 1 + 11 * clr_indx;
391 func_indx3 += ai2_xfrac[1] ? 2 : 0;
392 func_indx3 += ai2_yfrac[1] ? 1 : 0;
393
394 func_indx4 = (ai2_xfrac[1] && ai2_yfrac[1])
395 * (9 + (weighted_pred || bi_pred)) + 11 * clr_indx;
396
397 func_ptr3 = ps_codec->apf_inter_pred[func_indx3];
398 func_ptr4 = ps_codec->apf_inter_pred[func_indx4];
399 }
400 else
401 {
402 func_ptr3 = NULL;
403 func_ptr4 = NULL;
404 }
405
406 /*Function 1*/
407 if(func_ptr1 != NULL)
408 {
409 func_src_strd = ref_strd;
410 func_src = (ai2_xfrac[0] && ai2_yfrac[0]) ?
411 ref_pic_l0 - (ntaps / 2 - 1) * func_src_strd :
412 ref_pic_l0;
413 func_dst = (weighted_pred || bi_pred) ?
414 (void *)pi2_tmp1 : (void *)pu1_dst;
415 if(ai2_xfrac[0] && ai2_yfrac[0])
416 {
417 func_dst = pi2_tmp1;
418 }
419
420 func_dst_strd = (weighted_pred || bi_pred
421 || (ai2_xfrac[0] && ai2_yfrac[0])) ?
422 pu_wd : ref_strd;
423 func_coeff = ai2_xfrac[0] ?
424 coeff[ai2_xfrac[0]] : coeff[ai2_yfrac[0]];
425 func_wd = pu_wd >> clr_indx;
426 func_ht = pu_ht >> clr_indx;
427 func_ht += (ai2_xfrac[0] && ai2_yfrac[0]) ? ntaps - 1 : 0;
428 func_ptr1(func_src, func_dst, func_src_strd, func_dst_strd,
429 func_coeff, func_ht, func_wd);
430 }
431
432 /*Function 2*/
433 if(func_ptr2 != NULL)
434 {
435 func_src_strd = pu_wd;
436 func_src = pi2_tmp1 + (ntaps / 2 - 1) * func_src_strd;
437 func_dst = (weighted_pred || bi_pred) ?
438 (void *)pi2_tmp1 : (void *)pu1_dst;
439
440 func_dst_strd = (weighted_pred || bi_pred) ?
441 pu_wd : ref_strd;
442 func_coeff = coeff[ai2_yfrac[0]];
443 func_wd = pu_wd >> clr_indx;
444 func_ht = pu_ht >> clr_indx;
445 func_ptr2(func_src, func_dst, func_src_strd, func_dst_strd,
446 func_coeff, func_ht, func_wd);
447 }
448
449 if(func_ptr3 != NULL)
450 {
451 func_src_strd = ref_strd;
452 func_src = (ai2_xfrac[1] && ai2_yfrac[1]) ?
453 ref_pic_l1 - (ntaps / 2 - 1) * func_src_strd :
454 ref_pic_l1;
455
456 func_dst = (weighted_pred || bi_pred) ?
457 (void *)pi2_tmp2 : (void *)pu1_dst;
458 if(ai2_xfrac[1] && ai2_yfrac[1])
459 {
460 func_dst = pi2_tmp2;
461 }
462 func_dst_strd = (weighted_pred || bi_pred
463 || (ai2_xfrac[1] && ai2_yfrac[1])) ?
464 pu_wd : ref_strd;
465 func_coeff = ai2_xfrac[1] ?
466 coeff[ai2_xfrac[1]] : coeff[ai2_yfrac[1]];
467 func_wd = pu_wd >> clr_indx;
468 func_ht = pu_ht >> clr_indx;
469 func_ht += (ai2_xfrac[1] && ai2_yfrac[1]) ? ntaps - 1 : 0;
470 func_ptr3(func_src, func_dst, func_src_strd, func_dst_strd,
471 func_coeff, func_ht, func_wd);
472
473 }
474
475 if(func_ptr4 != NULL)
476 {
477 func_src_strd = pu_wd;
478 func_src = pi2_tmp2 + (ntaps / 2 - 1) * func_src_strd;
479
480 func_dst = (weighted_pred || bi_pred) ?
481 (void *)pi2_tmp2 : (void *)pu1_dst;
482 func_dst_strd = (weighted_pred || bi_pred) ?
483 pu_wd : ref_strd;
484 func_coeff = coeff[ai2_yfrac[1]];
485 func_wd = pu_wd >> clr_indx;
486 func_ht = pu_ht >> clr_indx;
487 func_ptr4(func_src, func_dst, func_src_strd, func_dst_strd,
488 func_coeff, func_ht, func_wd);
489
490 }
491
492 PROFILE_DISABLE_INTER_PRED_LUMA_AVERAGING(clr_indx);
493 PROFILE_DISABLE_INTER_PRED_CHROMA_AVERAGING(clr_indx);
494
495
496 if((weighted_pred != 0) && (bi_pred != 0))
497 {
498 lvl_shift1 = 0;
499 lvl_shift2 = 0;
500 if((0 == clr_indx) && (ai2_xfrac[0] && ai2_yfrac[0]))
501 lvl_shift1 = (1 << 13);
502
503 if((0 == clr_indx) && (ai2_xfrac[1] && ai2_yfrac[1]))
504 lvl_shift2 = (1 << 13);
505
506
507 if(0 == clr_indx)
508 {
509 shift = ps_slice_hdr->s_wt_ofst.i1_luma_log2_weight_denom
510 + SHIFT_14_MINUS_BIT_DEPTH + 1;
511
512 ps_codec->s_func_selector.ihevc_weighted_pred_bi_fptr(pi2_tmp1,
513 pi2_tmp2,
514 pu1_dst,
515 pu_wd,
516 pu_wd,
517 ref_strd,
518 luma_weight_l0,
519 luma_offset_l0,
520 luma_weight_l1,
521 luma_offset_l1,
522 shift,
523 lvl_shift1,
524 lvl_shift2,
525 pu_ht,
526 pu_wd);
527 }
528 else
529 {
530 shift = ps_slice_hdr->s_wt_ofst.i1_chroma_log2_weight_denom
531 + SHIFT_14_MINUS_BIT_DEPTH + 1;
532
533 if(chroma_yuv420sp_vu)
534 {
535 ps_codec->s_func_selector.ihevc_weighted_pred_chroma_bi_fptr(pi2_tmp1,
536 pi2_tmp2,
537 pu1_dst,
538 pu_wd,
539 pu_wd,
540 ref_strd,
541 chroma_weight_l0_cr,
542 chroma_weight_l0_cb,
543 chroma_offset_l0_cr,
544 chroma_offset_l0_cb,
545 chroma_weight_l1_cr,
546 chroma_weight_l1_cb,
547 chroma_offset_l1_cr,
548 chroma_offset_l1_cb,
549 shift,
550 lvl_shift1,
551 lvl_shift2,
552 pu_ht >> 1,
553 pu_wd >> 1);
554 }
555 else
556 {
557 ps_codec->s_func_selector.ihevc_weighted_pred_chroma_bi_fptr(pi2_tmp1,
558 pi2_tmp2,
559 pu1_dst,
560 pu_wd,
561 pu_wd,
562 ref_strd,
563 chroma_weight_l0_cb,
564 chroma_weight_l0_cr,
565 chroma_offset_l0_cb,
566 chroma_offset_l0_cr,
567 chroma_weight_l1_cb,
568 chroma_weight_l1_cr,
569 chroma_offset_l1_cb,
570 chroma_offset_l1_cr,
571 shift,
572 lvl_shift1,
573 lvl_shift2,
574 pu_ht >> 1,
575 pu_wd >> 1);
576 }
577 }
578 }
579
580 else if((weighted_pred != 0) && (bi_pred == 0))
581 {
582 lvl_shift1 = 0;
583 if(ps_pu->b2_pred_mode == PRED_L0)
584 {
585 if((0 == clr_indx) && (ai2_xfrac[0] && ai2_yfrac[0]))
586 lvl_shift1 = (1 << 13);
587 }
588 else
589 {
590 if((0 == clr_indx) && (ai2_xfrac[1] && ai2_yfrac[1]))
591 lvl_shift1 = (1 << 13);
592 }
593
594 if(0 == clr_indx)
595 {
596 shift = ps_slice_hdr->s_wt_ofst.i1_luma_log2_weight_denom
597 + SHIFT_14_MINUS_BIT_DEPTH;
598
599 ps_codec->s_func_selector.ihevc_weighted_pred_uni_fptr(ps_pu->b2_pred_mode == PRED_L0 ? pi2_tmp1 : pi2_tmp2,
600 pu1_dst,
601 pu_wd,
602 ref_strd,
603 ps_pu->b2_pred_mode == PRED_L0 ? luma_weight_l0 : luma_weight_l1,
604 ps_pu->b2_pred_mode == PRED_L0 ? luma_offset_l0 : luma_offset_l1,
605 shift,
606 lvl_shift1,
607 pu_ht,
608 pu_wd);
609 }
610 else
611 {
612 shift = ps_slice_hdr->s_wt_ofst.i1_chroma_log2_weight_denom
613 + SHIFT_14_MINUS_BIT_DEPTH;
614
615 if(chroma_yuv420sp_vu)
616 {
617 ps_codec->s_func_selector.ihevc_weighted_pred_chroma_uni_fptr(ps_pu->b2_pred_mode == PRED_L0 ? pi2_tmp1 : pi2_tmp2,
618 pu1_dst,
619 pu_wd,
620 ref_strd,
621 ps_pu->b2_pred_mode == PRED_L0 ? chroma_weight_l0_cr : chroma_weight_l1_cr,
622 ps_pu->b2_pred_mode == PRED_L0 ? chroma_weight_l0_cb : chroma_weight_l1_cb,
623 ps_pu->b2_pred_mode == PRED_L0 ? chroma_offset_l0_cr : chroma_offset_l1_cr,
624 ps_pu->b2_pred_mode == PRED_L0 ? chroma_offset_l0_cb : chroma_offset_l1_cb,
625 shift,
626 lvl_shift1,
627 pu_ht >> 1,
628 pu_wd >> 1);
629 }
630 else
631 {
632 ps_codec->s_func_selector.ihevc_weighted_pred_chroma_uni_fptr(ps_pu->b2_pred_mode == PRED_L0 ? pi2_tmp1 : pi2_tmp2,
633 pu1_dst,
634 pu_wd,
635 ref_strd,
636 ps_pu->b2_pred_mode == PRED_L0 ? chroma_weight_l0_cb : chroma_weight_l1_cb,
637 ps_pu->b2_pred_mode == PRED_L0 ? chroma_weight_l0_cr : chroma_weight_l1_cr,
638 ps_pu->b2_pred_mode == PRED_L0 ? chroma_offset_l0_cb : chroma_offset_l1_cb,
639 ps_pu->b2_pred_mode == PRED_L0 ? chroma_offset_l0_cr : chroma_offset_l1_cr,
640 shift,
641 lvl_shift1,
642 pu_ht >> 1,
643 pu_wd >> 1);
644 }
645 }
646 }
647
648 else if((weighted_pred == 0) && (bi_pred != 0))
649 {
650 lvl_shift1 = 0;
651 lvl_shift2 = 0;
652 if((0 == clr_indx) && (ai2_xfrac[0] && ai2_yfrac[0]))
653 lvl_shift1 = (1 << 13);
654
655 if((0 == clr_indx) && (ai2_xfrac[1] && ai2_yfrac[1]))
656 lvl_shift2 = (1 << 13);
657
658 if(clr_indx != 0)
659 {
660 pu_ht = (pu_ht >> 1);
661 }
662 ps_codec->s_func_selector.ihevc_weighted_pred_bi_default_fptr(pi2_tmp1,
663 pi2_tmp2,
664 pu1_dst,
665 pu_wd,
666 pu_wd,
667 ref_strd,
668 lvl_shift1,
669 lvl_shift2,
670 pu_ht,
671 pu_wd);
672
673 }
674 }
675 }
676}