blob: fa67d84725be1ba1bc2d5c8c2f9c3febab80eb48 [file] [log] [blame]
Hamsalekha S8d3d3032015-03-13 21:24:58 +05301/******************************************************************************
2 *
3 * Copyright (C) 2015 The Android Open Source Project
4 *
5 * Licensed under the Apache License, Version 2.0 (the "License");
6 * you may not use this file except in compliance with the License.
7 * You may obtain a copy of the License at:
8 *
9 * http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 *
17 *****************************************************************************
18 * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
19*/
20
21/**
22*******************************************************************************
23* @file
24* ih264e_process.c
25*
26* @brief
27* Contains functions for codec thread
28*
29* @author
30* Harish
31*
32* @par List of Functions:
33* - ih264e_generate_sps_pps()
34* - ih264e_init_entropy_ctxt()
35* - ih264e_entropy()
36* - ih264e_pack_header_data()
37* - ih264e_update_proc_ctxt()
38* - ih264e_init_proc_ctxt()
39* - ih264e_pad_recon_buffer()
40* - ih264e_dblk_pad_hpel_processing_n_mbs()
41* - ih264e_process()
42* - ih264e_set_rc_pic_params()
43* - ih264e_update_rc_post_enc()
44* - ih264e_process_thread()
45*
46* @remarks
47* None
48*
49*******************************************************************************
50*/
51
52/*****************************************************************************/
53/* File Includes */
54/*****************************************************************************/
55
56/* System include files */
57#include <stdio.h>
58#include <stddef.h>
59#include <stdlib.h>
60#include <string.h>
61#include <limits.h>
62#include <assert.h>
63
64/* User include files */
65#include "ih264_typedefs.h"
66#include "iv2.h"
67#include "ive2.h"
68#include "ih264_defs.h"
69#include "ih264_debug.h"
70#include "ime_distortion_metrics.h"
71#include "ime_structs.h"
72#include "ih264_defs.h"
73#include "ih264_error.h"
74#include "ih264_structs.h"
75#include "ih264_trans_quant_itrans_iquant.h"
76#include "ih264_inter_pred_filters.h"
77#include "ih264_mem_fns.h"
78#include "ih264_padding.h"
79#include "ih264_intra_pred_filters.h"
80#include "ih264_deblk_edge_filters.h"
81#include "ih264_platform_macros.h"
82#include "ih264_macros.h"
83#include "ih264_error.h"
84#include "ih264_buf_mgr.h"
85#include "ih264e_error.h"
86#include "ih264e_bitstream.h"
87#include "ih264_structs.h"
88#include "ih264_common_tables.h"
89#include "ih264_list.h"
90#include "ih264e_defs.h"
91#include "irc_cntrl_param.h"
92#include "irc_frame_info_collector.h"
93#include "ih264e_rate_control.h"
94#include "ih264e_structs.h"
95#include "ih264e_process.h"
96#include "ithread.h"
97#include "ih264e_intra_modes_eval.h"
98#include "ih264e_encode_header.h"
99#include "ih264e_globals.h"
100#include "ih264e_config.h"
101#include "ih264e_trace.h"
102#include "ih264e_statistics.h"
103#include "ih264_cavlc_tables.h"
104#include "ih264e_cavlc.h"
105#include "ih264e_deblk.h"
106#include "ih264e_me.h"
107#include "ih264e_debug.h"
108#include "ih264e_process.h"
109#include "ih264e_master.h"
110#include "ih264e_utils.h"
111#include "irc_mem_req_and_acq.h"
112#include "irc_cntrl_param.h"
113#include "irc_frame_info_collector.h"
114#include "irc_rate_control_api.h"
115#include "ih264e_platform_macros.h"
116#include "ih264_padding.h"
117#include "ime_statistics.h"
118
119
120/*****************************************************************************/
121/* Function Definitions */
122/*****************************************************************************/
123
124/**
125******************************************************************************
126*
127* @brief This function generates sps, pps set on request
128*
129* @par Description
130* When the encoder is set in header generation mode, the following function
131* is called. This generates sps and pps headers and returns the control back
132* to caller.
133*
134* @param[in] ps_codec
135* pointer to codec context
136*
137* @return success or failure error code
138*
139******************************************************************************
140*/
141IH264E_ERROR_T ih264e_generate_sps_pps(codec_t *ps_codec)
142{
143 /* choose between ping-pong process buffer set */
144 WORD32 ctxt_sel = ps_codec->i4_encode_api_call_cnt & 1;
145
146 /* entropy ctxt */
147 entropy_ctxt_t *ps_entropy = &ps_codec->as_process[ctxt_sel * MAX_PROCESS_THREADS].s_entropy;
148
149 /* Bitstream structure */
150 bitstrm_t *ps_bitstrm = ps_entropy->ps_bitstrm;
151
152 /* sps */
153 sps_t *ps_sps = NULL;
154
155 /* pps */
156 pps_t *ps_pps = NULL;
157
158 /* output buff */
159 out_buf_t *ps_out_buf = &ps_codec->as_out_buf[ctxt_sel];
160
161
162 /********************************************************************/
163 /* initialize the bit stream buffer */
164 /********************************************************************/
165 ih264e_bitstrm_init(ps_bitstrm, ps_out_buf->s_bits_buf.pv_buf, ps_out_buf->s_bits_buf.u4_bufsize);
166
167 /********************************************************************/
168 /* BEGIN HEADER GENERATION */
169 /********************************************************************/
170 /*ps_codec->i4_pps_id ++;*/
171 ps_codec->i4_pps_id %= MAX_PPS_CNT;
172
173 /*ps_codec->i4_sps_id ++;*/
174 ps_codec->i4_sps_id %= MAX_SPS_CNT;
175
176 /* populate sps header */
177 ps_sps = ps_codec->ps_sps_base + ps_codec->i4_sps_id;
178 ih264e_populate_sps(ps_codec, ps_sps);
179
180 /* populate pps header */
181 ps_pps = ps_codec->ps_pps_base + ps_codec->i4_pps_id;
182 ih264e_populate_pps(ps_codec, ps_pps);
183
184 ps_entropy->i4_error_code = IH264E_SUCCESS;
185
186 /* generate sps */
187 ps_entropy->i4_error_code |= ih264e_generate_sps(ps_bitstrm, ps_sps);
188
189 /* generate pps */
190 ps_entropy->i4_error_code |= ih264e_generate_pps(ps_bitstrm, ps_pps, ps_sps);
191
192 /* queue output buffer */
193 ps_out_buf->s_bits_buf.u4_bytes = ps_bitstrm->u4_strm_buf_offset;
194
195 return ps_entropy->i4_error_code;
196}
197
198/**
199*******************************************************************************
200*
201* @brief initialize entropy context.
202*
203* @par Description:
204* Before invoking the call to perform to entropy coding the entropy context
205* associated with the job needs to be initialized. This involves the start
206* mb address, end mb address, slice index and the pointer to location at
207* which the mb residue info and mb header info are packed.
208*
209* @param[in] ps_proc
210* Pointer to the current process context
211*
212* @returns error status
213*
214* @remarks none
215*
216*******************************************************************************
217*/
218IH264E_ERROR_T ih264e_init_entropy_ctxt(process_ctxt_t *ps_proc)
219{
220 /* codec context */
221 codec_t *ps_codec = ps_proc->ps_codec;
222
223 /* entropy ctxt */
224 entropy_ctxt_t *ps_entropy = &ps_proc->s_entropy;
225
226 /* start address */
227 ps_entropy->i4_mb_start_add = ps_entropy->i4_mb_y * ps_entropy->i4_wd_mbs + ps_entropy->i4_mb_x;
228
229 /* end address */
230 ps_entropy->i4_mb_end_add = ps_entropy->i4_mb_start_add + ps_entropy->i4_mb_cnt;
231
232 /* slice index */
233 ps_entropy->i4_cur_slice_idx = ps_proc->pu1_slice_idx[ps_entropy->i4_mb_start_add];
234
235 /* sof */
236 /* @ start of frame or start of a new slice, set sof flag */
237 if (ps_entropy->i4_mb_start_add == 0)
238 {
239 ps_entropy->i4_sof = 1;
240 }
241
242 if (ps_entropy->i4_mb_x == 0)
243 {
244 /* packed mb coeff data */
245 ps_entropy->pv_mb_coeff_data = ((UWORD8 *)ps_entropy->pv_pic_mb_coeff_data) +
246 ps_entropy->i4_mb_y * ps_codec->u4_size_coeff_data;
247
248 /* packed mb header data */
249 ps_entropy->pv_mb_header_data = ((UWORD8 *)ps_entropy->pv_pic_mb_header_data) +
250 ps_entropy->i4_mb_y * ps_codec->u4_size_header_data;
251 }
252
253 return IH264E_SUCCESS;
254}
255
256/**
257*******************************************************************************
258*
259* @brief entry point for entropy coding
260*
261* @par Description
262* This function calls lower level functions to perform entropy coding for a
263* group (n rows) of mb's. After encoding 1 row of mb's, the function takes
264* back the control, updates the ctxt and calls lower level functions again.
265* This process is repeated till all the rows or group of mb's (which ever is
266* minimum) are coded
267*
268* @param[in] ps_proc
269* process context
270*
271* @returns error status
272*
273* @remarks
274*
275*******************************************************************************
276*/
277#define GET_NUM_BITS(ps_bitstream) ((ps_bitstream->u4_strm_buf_offset << 3) + WORD_SIZE - ps_bitstream->i4_bits_left_in_cw)
278
279IH264E_ERROR_T ih264e_entropy(process_ctxt_t *ps_proc)
280{
281 /* codec context */
282 codec_t *ps_codec = ps_proc->ps_codec;
283
284 /* entropy context */
285 entropy_ctxt_t *ps_entropy = &ps_proc->s_entropy;
286
287 /* sps */
288 sps_t *ps_sps = ps_entropy->ps_sps_base + (ps_entropy->u4_sps_id % MAX_SPS_CNT);
289
290 /* pps */
291 pps_t *ps_pps = ps_entropy->ps_pps_base + (ps_entropy->u4_pps_id % MAX_PPS_CNT);
292
293 /* slice header */
294 slice_header_t *ps_slice_hdr = ps_entropy->ps_slice_hdr_base + (ps_entropy->i4_cur_slice_idx % MAX_SLICE_HDR_CNT);
295
296 /* slice type */
297 WORD32 i4_slice_type = ps_proc->i4_slice_type;
298
299 /* Bitstream structure */
300 bitstrm_t *ps_bitstrm = ps_entropy->ps_bitstrm;
301
302 /* output buff */
303 out_buf_t s_out_buf;
304
305 /* proc map */
306 UWORD8 *pu1_proc_map;
307
308 /* entropy map */
309 UWORD8 *pu1_entropy_map_curr;
310
311 /* proc base idx */
312 WORD32 ctxt_sel = ps_proc->i4_encode_api_call_cnt & 1;
313
314 /* temp var */
315 WORD32 i4_wd_mbs, i4_ht_mbs;
316 UWORD32 u4_mb_cnt, u4_mb_idx, u4_mb_end_idx;
317
318 /********************************************************************/
319 /* BEGIN INIT */
320 /********************************************************************/
321
322 /* entropy encode start address */
323 u4_mb_idx = ps_entropy->i4_mb_start_add;
324
325 /* entropy encode end address */
326 u4_mb_end_idx = ps_entropy->i4_mb_end_add;
327
328 /* width in mbs */
329 i4_wd_mbs = ps_entropy->i4_wd_mbs;
330
331 /* height in mbs */
332 i4_ht_mbs = ps_entropy->i4_ht_mbs;
333
334 /* total mb cnt */
335 u4_mb_cnt = i4_wd_mbs * i4_ht_mbs;
336
337 /* proc map */
338 pu1_proc_map = ps_proc->pu1_proc_map + ps_entropy->i4_mb_y * i4_wd_mbs;
339
340 /* entropy map */
341 pu1_entropy_map_curr = ps_entropy->pu1_entropy_map + ps_entropy->i4_mb_y * i4_wd_mbs;
342
343 /********************************************************************/
344 /* @ start of frame / slice, */
345 /* initialize the output buffer, */
346 /* initialize the bit stream buffer, */
347 /* check if sps and pps headers have to be generated, */
348 /* populate and generate slice header */
349 /********************************************************************/
350 if (ps_entropy->i4_sof)
351 {
352 /********************************************************************/
353 /* initialize the output buffer */
354 /********************************************************************/
355 s_out_buf = ps_codec->as_out_buf[ctxt_sel];
356
357 /* is last frame to encode */
358 s_out_buf.u4_is_last = ps_entropy->u4_is_last;
359
360 /* frame idx */
361 s_out_buf.u4_timestamp_high = ps_entropy->u4_timestamp_high;
362 s_out_buf.u4_timestamp_low = ps_entropy->u4_timestamp_low;
363
364 /********************************************************************/
365 /* initialize the bit stream buffer */
366 /********************************************************************/
367 ih264e_bitstrm_init(ps_bitstrm, s_out_buf.s_bits_buf.pv_buf, s_out_buf.s_bits_buf.u4_bufsize);
368
369 /********************************************************************/
370 /* BEGIN HEADER GENERATION */
371 /********************************************************************/
372 if (1 == ps_entropy->i4_gen_header)
373 {
374 /* generate sps */
375 ps_entropy->i4_error_code |= ih264e_generate_sps(ps_bitstrm, ps_sps);
376
377 /* generate pps */
378 ps_entropy->i4_error_code |= ih264e_generate_pps(ps_bitstrm, ps_pps, ps_sps);
379
380 /* reset i4_gen_header */
381 ps_entropy->i4_gen_header = 0;
382 }
383
384 /* populate slice header */
385 ih264e_populate_slice_header(ps_proc, ps_slice_hdr, ps_pps, ps_sps);
386
387 /* generate slice header */
388 ps_entropy->i4_error_code |= ih264e_generate_slice_header(ps_bitstrm, ps_slice_hdr,
389 ps_pps, ps_sps);
390
391 /* once start of frame / slice is done, you can reset it */
392 /* it is the responsibility of the caller to set this flag */
393 ps_entropy->i4_sof = 0;
394 }
395
396 /* begin entropy coding for the mb set */
397 while (u4_mb_idx < u4_mb_end_idx)
398 {
399 /* init ptrs/indices */
400 if (ps_entropy->i4_mb_x == i4_wd_mbs)
401 {
402 ps_entropy->i4_mb_y ++;
403 ps_entropy->i4_mb_x = 0;
404
405 /* packed mb coeff data */
406 ps_entropy->pv_mb_coeff_data = ((UWORD8 *)ps_entropy->pv_pic_mb_coeff_data) +
407 ps_entropy->i4_mb_y * ps_codec->u4_size_coeff_data;
408
409 /* packed mb header data */
410 ps_entropy->pv_mb_header_data = ((UWORD8 *)ps_entropy->pv_pic_mb_header_data) +
411 ps_entropy->i4_mb_y * ps_codec->u4_size_header_data;
412
413 /* proc map */
414 pu1_proc_map = ps_proc->pu1_proc_map + ps_entropy->i4_mb_y * i4_wd_mbs;
415
416 /* entropy map */
417 pu1_entropy_map_curr = ps_entropy->pu1_entropy_map + ps_entropy->i4_mb_y * i4_wd_mbs;
418 }
419
420 DEBUG("\nmb indices x, y %d, %d", ps_entropy->i4_mb_x, ps_entropy->i4_mb_y);
421 ENTROPY_TRACE("mb index x %d", ps_entropy->i4_mb_x);
422 ENTROPY_TRACE("mb index y %d", ps_entropy->i4_mb_y);
423
424 /* wait until the curr mb is core coded */
425 /* The wait for curr mb to be core coded is essential when entropy is launched
426 * as a separate job
427 */
428 while (1)
429 {
430 volatile UWORD8 *pu1_buf1;
431 WORD32 idx = ps_entropy->i4_mb_x;
432
433 pu1_buf1 = pu1_proc_map + idx;
434 if(*pu1_buf1)
435 break;
436 ithread_yield();
437 }
438
439 /* write mb layer */
440 ps_codec->pf_write_mb_syntax_layer[i4_slice_type](ps_entropy);
441
442 /* set entropy map */
443 pu1_entropy_map_curr[ps_entropy->i4_mb_x] = 1;
444
445 u4_mb_idx ++;
446 ps_entropy->i4_mb_x ++;
447
448 if (ps_entropy->i4_mb_x == i4_wd_mbs)
449 {
450 /* if slices are enabled */
451 if (ps_codec->s_cfg.e_slice_mode == IVE_SLICE_MODE_BLOCKS)
452 {
453 /* current slice index */
454 WORD32 i4_curr_slice_idx = ps_entropy->i4_cur_slice_idx;
455
456 /* slice map */
457 UWORD8 *pu1_slice_idx = ps_entropy->pu1_slice_idx;
458
459 /* No need to open a slice at end of frame. The current slice can be closed at the time
460 * of signaling eof flag.
461 */
462 if ( (u4_mb_idx != u4_mb_cnt) && (i4_curr_slice_idx != pu1_slice_idx[u4_mb_idx]))
463 {
464 /* mb skip run */
465 if ((i4_slice_type != ISLICE) && *ps_entropy->pi4_mb_skip_run)
466 {
467 if (*ps_entropy->pi4_mb_skip_run)
468 {
469 PUT_BITS_UEV(ps_bitstrm, *ps_entropy->pi4_mb_skip_run, ps_entropy->i4_error_code, "mb skip run");
470 *ps_entropy->pi4_mb_skip_run = 0;
471 }
472 }
473
474 /* put rbsp trailing bits for the previous slice */
475 ps_entropy->i4_error_code |= ih264e_put_rbsp_trailing_bits(ps_bitstrm);
476
477 /* update slice header pointer */
478 i4_curr_slice_idx = pu1_slice_idx[u4_mb_idx];
479 ps_entropy->i4_cur_slice_idx = i4_curr_slice_idx;
480 ps_slice_hdr = ps_entropy->ps_slice_hdr_base + (i4_curr_slice_idx % MAX_SLICE_HDR_CNT);
481
482 /* populate slice header */
483 ps_entropy->i4_mb_start_add = u4_mb_idx;
484 ih264e_populate_slice_header(ps_proc, ps_slice_hdr, ps_pps, ps_sps);
485
486 /* generate slice header */
487 ps_entropy->i4_error_code |= ih264e_generate_slice_header(ps_bitstrm, ps_slice_hdr,
488 ps_pps, ps_sps);
489 }
490 }
491
492 /* Dont execute any further instructions until store synchronization took place */
493 DATA_SYNC();
494 }
495 }
496
497 /* check for eof */
498 if (u4_mb_idx == u4_mb_cnt)
499 {
500 /* set end of frame flag */
501 ps_entropy->i4_eof = 1;
502 }
503
504 if (ps_entropy->i4_eof)
505 {
506 /* mb skip run */
507 if ((i4_slice_type != ISLICE) && *ps_entropy->pi4_mb_skip_run)
508 {
509 if (*ps_entropy->pi4_mb_skip_run)
510 {
511 PUT_BITS_UEV(ps_bitstrm, *ps_entropy->pi4_mb_skip_run, ps_entropy->i4_error_code, "mb skip run");
512 *ps_entropy->pi4_mb_skip_run = 0;
513 }
514 }
515
516 /* put rbsp trailing bits */
517 ps_entropy->i4_error_code |= ih264e_put_rbsp_trailing_bits(ps_bitstrm);
518
519 /* update current frame stats to rc library */
520 if (IVE_RC_NONE != ps_codec->s_cfg.e_rc_mode)
521 {
522 /* number of bytes to stuff */
523 WORD32 i4_stuff_bytes;
524
525 /* update */
526 i4_stuff_bytes = ih264e_update_rc_post_enc(ps_codec, ctxt_sel, ps_proc->i4_pic_cnt);
527
528 /* cbr rc - house keeping */
529 if (ps_codec->s_rate_control.post_encode_skip[ctxt_sel])
530 {
531 ps_entropy->ps_bitstrm->u4_strm_buf_offset = 0;
532 }
533 else if (i4_stuff_bytes)
534 {
535 /* add filler nal units */
536 ps_entropy->i4_error_code |= ih264e_add_filler_nal_unit(ps_bitstrm, i4_stuff_bytes);
537 }
538 }
539
540 /********************************************************************/
541 /* signal the output */
542 /********************************************************************/
543 ps_codec->as_out_buf[ctxt_sel].s_bits_buf.u4_bytes = ps_entropy->ps_bitstrm->u4_strm_buf_offset;
544
545 DEBUG("entropy status %x", ps_entropy->i4_error_code);
546 }
547
548 /* allow threads to dequeue entropy jobs */
549 ps_codec->au4_entropy_thread_active[ctxt_sel] = 0;
550
551 return ps_entropy->i4_error_code;
552}
553
554/**
555*******************************************************************************
556*
557* @brief Packs header information of a mb in to a buffer
558*
559* @par Description:
560* After the deciding the mode info of a macroblock, the syntax elements
561* associated with the mb are packed and stored. The entropy thread unpacks
562* this buffer and generates the end bit stream.
563*
564* @param[in] ps_proc
565* Pointer to the current process context
566*
567* @returns error status
568*
569* @remarks none
570*
571*******************************************************************************
572*/
573IH264E_ERROR_T ih264e_pack_header_data(process_ctxt_t *ps_proc)
574{
575 /* curr mb type */
576 UWORD32 u4_mb_type = ps_proc->u4_mb_type;
577
578 /* pack mb syntax layer of curr mb (used for entropy coding) */
579 if (u4_mb_type == I4x4)
580 {
581 /* pointer to mb header storage space */
582 UWORD8 *pu1_ptr = ps_proc->pv_mb_header_data;
583
584 /* temp var */
585 WORD32 i4, byte;
586
587 /* mb type plus mode */
588 *pu1_ptr++ = (ps_proc->u1_c_i8_mode << 6) + u4_mb_type;
589
590 /* cbp */
591 *pu1_ptr++ = ps_proc->u4_cbp;
592
593 /* mb qp delta */
594 *pu1_ptr++ = ps_proc->u4_mb_qp - ps_proc->u4_mb_qp_prev;
595
596 /* sub mb modes */
597 for (i4 = 0; i4 < 16; i4 ++)
598 {
599 byte = 0;
600
601 if (ps_proc->au1_predicted_intra_luma_mb_4x4_modes[i4] ==
602 ps_proc->au1_intra_luma_mb_4x4_modes[i4])
603 {
604 byte |= 1;
605 }
606 else
607 {
608
609 if (ps_proc->au1_intra_luma_mb_4x4_modes[i4] <
610 ps_proc->au1_predicted_intra_luma_mb_4x4_modes[i4])
611 {
612 byte |= (ps_proc->au1_intra_luma_mb_4x4_modes[i4] << 1);
613 }
614 else
615 {
616 byte |= (ps_proc->au1_intra_luma_mb_4x4_modes[i4] - 1) << 1;
617 }
618 }
619
620 i4++;
621
622 if (ps_proc->au1_predicted_intra_luma_mb_4x4_modes[i4] ==
623 ps_proc->au1_intra_luma_mb_4x4_modes[i4])
624 {
625 byte |= 16;
626 }
627 else
628 {
629
630 if (ps_proc->au1_intra_luma_mb_4x4_modes[i4] <
631 ps_proc->au1_predicted_intra_luma_mb_4x4_modes[i4])
632 {
633 byte |= (ps_proc->au1_intra_luma_mb_4x4_modes[i4] << 5);
634 }
635 else
636 {
637 byte |= (ps_proc->au1_intra_luma_mb_4x4_modes[i4] - 1) << 5;
638 }
639 }
640
641 *pu1_ptr++ = byte;
642 }
643
644 /* end of mb layer */
645 ps_proc->pv_mb_header_data = pu1_ptr;
646 }
647 else if (u4_mb_type == I16x16)
648 {
649 /* pointer to mb header storage space */
650 UWORD8 *pu1_ptr = ps_proc->pv_mb_header_data;
651
652 /* mb type plus mode */
653 *pu1_ptr++ = (ps_proc->u1_c_i8_mode << 6) + (ps_proc->u1_l_i16_mode << 4) + u4_mb_type;
654
655 /* cbp */
656 *pu1_ptr++ = ps_proc->u4_cbp;
657
658 /* mb qp delta */
659 *pu1_ptr++ = ps_proc->u4_mb_qp - ps_proc->u4_mb_qp_prev;
660
661 /* end of mb layer */
662 ps_proc->pv_mb_header_data = pu1_ptr;
663 }
664 else if (u4_mb_type == P16x16)
665 {
666 /* pointer to mb header storage space */
667 UWORD8 *pu1_ptr = ps_proc->pv_mb_header_data;
668
669 WORD16 *i2_mv_ptr;
670
671 /* mb type plus mode */
672 *pu1_ptr++ = u4_mb_type;
673
674 /* cbp */
675 *pu1_ptr++ = ps_proc->u4_cbp;
676
677 /* mb qp delta */
678 *pu1_ptr++ = ps_proc->u4_mb_qp - ps_proc->u4_mb_qp_prev;
679
680 i2_mv_ptr = (WORD16 *)pu1_ptr;
681
682 *i2_mv_ptr++ = ps_proc->ps_pu->s_l0_mv.i2_mvx - ps_proc->ps_pred_mv->i2_mvx;
683
684 *i2_mv_ptr++ = ps_proc->ps_pu->s_l0_mv.i2_mvy - ps_proc->ps_pred_mv->i2_mvy;
685
686 /* end of mb layer */
687 ps_proc->pv_mb_header_data = i2_mv_ptr;
688 }
689 else if (u4_mb_type == PSKIP)
690 {
691 /* pointer to mb header storage space */
692 UWORD8 *pu1_ptr = ps_proc->pv_mb_header_data;
693
694 /* mb type plus mode */
695 *pu1_ptr++ = u4_mb_type;
696
697 /* end of mb layer */
698 ps_proc->pv_mb_header_data = pu1_ptr;
699 }
700
701 return IH264E_SUCCESS;
702}
703
704/**
705*******************************************************************************
706*
707* @brief update process context after encoding an mb. This involves preserving
708* the current mb information for later use, initialize the proc ctxt elements to
709* encode next mb.
710*
711* @par Description:
712* This function performs house keeping tasks after encoding an mb.
713* After encoding an mb, various elements of the process context needs to be
714* updated to encode the next mb. For instance, the source, recon and reference
715* pointers, mb indices have to be adjusted to the next mb. The slice index of
716* the current mb needs to be updated. If mb qp modulation is enabled, then if
717* the qp changes the quant param structure needs to be updated. Also to encoding
718* the next mb, the current mb info is used as part of mode prediction or mv
719* prediction. Hence the current mb info has to preserved at top/top left/left
720* locations.
721*
722* @param[in] ps_proc
723* Pointer to the current process context
724*
725* @returns none
726*
727* @remarks none
728*
729*******************************************************************************
730*/
731WORD32 ih264e_update_proc_ctxt(process_ctxt_t *ps_proc)
732{
733 /* error status */
734 WORD32 error_status = IH264_SUCCESS;
735
736 /* codec context */
737 codec_t *ps_codec = ps_proc->ps_codec;
738
739 /* curr mb indices */
740 WORD32 i4_mb_x = ps_proc->i4_mb_x;
741 WORD32 i4_mb_y = ps_proc->i4_mb_y;
742
743 /* mb syntax elements of neighbors */
744 mb_info_t *ps_left_syn = &ps_proc->s_left_mb_syntax_ele;
745 mb_info_t *ps_top_syn = ps_proc->ps_top_row_mb_syntax_ele + i4_mb_x;
746 mb_info_t *ps_top_left_syn = &ps_proc->s_top_left_mb_syntax_ele;
747
748 /* curr mb type */
749 UWORD32 u4_mb_type = ps_proc->u4_mb_type;
750
751 /* curr mb type */
752 UWORD32 u4_is_intra = ps_proc->u4_is_intra;
753
754 /* width in mbs */
755 WORD32 i4_wd_mbs = ps_proc->i4_wd_mbs;
756
757 /*height in mbs*/
758 WORD32 i4_ht_mbs = ps_proc->i4_ht_mbs;
759
760 /* proc map */
761 UWORD8 *pu1_proc_map = ps_proc->pu1_proc_map + (i4_mb_y * i4_wd_mbs);
762
763 /* deblk context */
764 deblk_ctxt_t *ps_deblk = &ps_proc->s_deblk_ctxt;
765
766 /* deblk bs context */
767 bs_ctxt_t *ps_bs = &(ps_deblk->s_bs_ctxt);
768
769 /* top row motion vector info */
770 enc_pu_t *ps_top_row_pu = ps_proc->ps_top_row_pu + i4_mb_x;
771
772 /* top left mb motion vector */
773 enc_pu_t *ps_top_left_mb_pu = &ps_proc->s_top_left_mb_pu;
774
775 /* left mb motion vector */
776 enc_pu_t *ps_left_mb_pu = &ps_proc->s_left_mb_pu;
777
778 /* sub mb modes */
779 UWORD8 *pu1_top_mb_intra_modes = ps_proc->pu1_top_mb_intra_modes + (i4_mb_x << 4);
780
Hamsalekha S8d3d3032015-03-13 21:24:58 +0530781 /*************************************************************/
782 /* During MV prediction, when top right mb is not available, */
783 /* top left mb info. is used for prediction. Hence the curr */
784 /* top, which will be top left for the next mb needs to be */
785 /* preserved before updating it with curr mb info. */
786 /*************************************************************/
787
788 /* mb type, mb class, csbp */
789 *ps_top_left_syn = *ps_top_syn;
790
791 if (ps_proc->i4_slice_type == PSLICE)
792 {
793 /*****************************************/
794 /* update top left with top info results */
795 /*****************************************/
796
797 /* mv */
798 *ps_top_left_mb_pu = *ps_top_row_pu;
799 }
800
801 /*************************************************/
802 /* update top and left with curr mb info results */
803 /*************************************************/
804
805 /* mb type */
806 ps_left_syn->u2_mb_type = ps_top_syn->u2_mb_type = u4_mb_type;
807
808 /* mb class */
809 ps_left_syn->u2_is_intra = ps_top_syn->u2_is_intra = u4_is_intra;
810
811 /* csbp */
812 ps_left_syn->u4_csbp = ps_top_syn->u4_csbp = ps_proc->u4_csbp;
813
814 /* distortion */
815 ps_left_syn->i4_mb_distortion = ps_top_syn->i4_mb_distortion = ps_proc->i4_mb_distortion;
816
817 if (u4_is_intra)
818 {
819 /* mb / sub mb modes */
820 if (I16x16 == u4_mb_type)
821 {
822 pu1_top_mb_intra_modes[0] = ps_proc->au1_left_mb_intra_modes[0] = ps_proc->u1_l_i16_mode;
823 }
824 else if (I4x4 == u4_mb_type)
825 {
826 ps_codec->pf_mem_cpy_mul8(ps_proc->au1_left_mb_intra_modes, ps_proc->au1_intra_luma_mb_4x4_modes, 16);
827 ps_codec->pf_mem_cpy_mul8(pu1_top_mb_intra_modes, ps_proc->au1_intra_luma_mb_4x4_modes, 16);
828 }
829 else if (I8x8 == u4_mb_type)
830 {
831 memcpy(ps_proc->au1_left_mb_intra_modes, ps_proc->au1_intra_luma_mb_8x8_modes, 4);
832 memcpy(pu1_top_mb_intra_modes, ps_proc->au1_intra_luma_mb_8x8_modes, 4);
833 }
834
835 if (ps_proc->i4_slice_type == PSLICE)
836 {
837 /* mv */
838 *ps_left_mb_pu = *ps_top_row_pu = *(ps_proc->ps_pu);
839
840// /* reset ngbr mv's */
841// ps_top_row_pu->i1_l0_ref_idx = -1;
842// ps_top_row_pu->s_l0_mv = zero_mv;
843//
844// *ps_left_mb_pu = *ps_top_row_pu;
845 }
846 }
847 else
848 {
849 /* mv */
850 *ps_left_mb_pu = *ps_top_row_pu = *(ps_proc->ps_pu);
851 }
852
853 /*
854 * Mark that the MB has been coded intra
855 * So that future AIRs can skip it
856 */
857 ps_proc->pu1_is_intra_coded[i4_mb_x + (i4_mb_y * i4_wd_mbs)] = u4_is_intra;
858
859 /**************************************************/
860 /* pack mb header info. for entropy coding */
861 /**************************************************/
862 ih264e_pack_header_data(ps_proc);
863
864 /* update previous mb qp */
865 ps_proc->u4_mb_qp_prev = ps_proc->u4_mb_qp;
866
867 /* store qp */
868 ps_proc->s_deblk_ctxt.s_bs_ctxt.pu1_pic_qp[(i4_mb_y * i4_wd_mbs) + i4_mb_x] = ps_proc->u4_mb_qp;
869
870 /*
871 * We need to sync the cache to make sure that the nmv content of proc
872 * is updated to cache properly
873 */
874 DATA_SYNC();
875
876 /* Just before finishing the row, enqueue the job in to entropy queue.
877 * The master thread depending on its convenience shall dequeue it and
878 * performs entropy.
879 *
880 * WARN !! Placing this block post proc map update can cause queuing of
881 * entropy jobs in out of order.
882 */
883 if (i4_mb_x == i4_wd_mbs - 1)
884 {
885 /* job structures */
886 job_t s_job;
887
888 /* job class */
889 s_job.i4_cmd = CMD_ENTROPY;
890
891 /* number of mbs to be processed in the current job */
892 s_job.i2_mb_cnt = ps_codec->s_cfg.i4_wd_mbs;
893
894 /* job start index x */
895 s_job.i2_mb_x = 0;
896
897 /* job start index y */
898 s_job.i2_mb_y = ps_proc->i4_mb_y;
899
900 /* proc base idx */
901 s_job.i2_proc_base_idx = (ps_codec->i4_encode_api_call_cnt & 1) ? (MAX_PROCESS_CTXT / 2): 0 ;
902
903 /* queue the job */
904 error_status |= ih264_list_queue(ps_proc->pv_entropy_jobq, &s_job, 1);
905
906 if(ps_proc->i4_mb_y == (i4_ht_mbs - 1))
907 ih264_list_terminate(ps_codec->pv_entropy_jobq);
908 }
909
910 /* update proc map */
911 pu1_proc_map[i4_mb_x] = 1;
912
913 /**************************************************/
914 /* update proc ctxt elements for encoding next mb */
915 /**************************************************/
916 /* update indices */
917 i4_mb_x ++;
918 ps_proc->i4_mb_x = i4_mb_x;
919
920 if (ps_proc->i4_mb_x == i4_wd_mbs)
921 {
922 ps_proc->i4_mb_y++;
923 ps_proc->i4_mb_x = 0;
924 }
925
926 /* update slice index */
927 ps_proc->i4_cur_slice_idx = ps_proc->pu1_slice_idx[ps_proc->i4_mb_y * i4_wd_mbs + ps_proc->i4_mb_x];
928
929 /* update buffers pointers */
930 ps_proc->pu1_src_buf_luma += MB_SIZE;
931 ps_proc->pu1_rec_buf_luma += MB_SIZE;
932 ps_proc->pu1_ref_buf_luma += MB_SIZE;
933
934 /*
935 * Note: Although chroma mb size is 8, as the chroma buffers are interleaved,
936 * the stride per MB is MB_SIZE
937 */
938 ps_proc->pu1_src_buf_chroma += MB_SIZE;
939 ps_proc->pu1_rec_buf_chroma += MB_SIZE;
940 ps_proc->pu1_ref_buf_chroma += MB_SIZE;
941
Hamsalekha S8d3d3032015-03-13 21:24:58 +0530942
943 /* Reset cost, distortion params */
944 ps_proc->i4_mb_cost = INT_MAX;
945 ps_proc->i4_mb_distortion = SHRT_MAX;
946
947 ps_proc->ps_pu += *ps_proc->pu4_mb_pu_cnt;
948
949 ps_proc->pu4_mb_pu_cnt += 1;
950
951 /* deblk ctxts */
952 if (ps_proc->u4_disable_deblock_level != 1)
953 {
954 /* indices */
955 ps_bs->i4_mb_x = ps_proc->i4_mb_x;
956 ps_bs->i4_mb_y = ps_proc->i4_mb_y;
957
958#ifndef N_MB_ENABLE /* For N MB processing update take place inside deblocking function */
959 ps_deblk->i4_mb_x ++;
960
961 ps_deblk->pu1_cur_pic_luma += MB_SIZE;
962 /*
963 * Note: Although chroma mb size is 8, as the chroma buffers are interleaved,
964 * the stride per MB is MB_SIZE
965 */
966 ps_deblk->pu1_cur_pic_chroma += MB_SIZE;
967#endif
968 }
969
970 return error_status;
971}
972
973/**
974*******************************************************************************
975*
976* @brief initialize process context.
977*
978* @par Description:
979* Before dispatching the current job to process thread, the process context
980* associated with the job is initialized. Usually every job aims to encode one
981* row of mb's. Basing on the row indices provided by the job, the process
982* context's buffer ptrs, slice indices and other elements that are necessary
983* during core-coding are initialized.
984*
985* @param[in] ps_proc
986* Pointer to the current process context
987*
988* @returns error status
989*
990* @remarks none
991*
992*******************************************************************************
993*/
994IH264E_ERROR_T ih264e_init_proc_ctxt(process_ctxt_t *ps_proc)
995{
996 /* codec context */
997 codec_t *ps_codec = ps_proc->ps_codec;
998
999 /* nmb processing context*/
1000 n_mb_process_ctxt_t *ps_n_mb_ctxt = &ps_proc->s_n_mb_ctxt;
1001
1002 /* indices */
1003 WORD32 i4_mb_x, i4_mb_y;
1004
1005 /* strides */
1006 WORD32 i4_src_strd = ps_proc->i4_src_strd;
1007 WORD32 i4_rec_strd = ps_proc->i4_rec_strd;
1008
1009 /* quant params */
1010 quant_params_t *ps_qp_params = ps_proc->ps_qp_params[0];
1011
1012 /* deblk ctxt */
1013 deblk_ctxt_t *ps_deblk = &ps_proc->s_deblk_ctxt;
1014
1015 /* deblk bs context */
1016 bs_ctxt_t *ps_bs = &(ps_deblk->s_bs_ctxt);
1017
1018 /* Pointer to mv_buffer of current frame */
1019 mv_buf_t *ps_cur_mv_buf = ps_proc->ps_cur_mv_buf;
1020
1021 /* Pointers for color space conversion */
1022 UWORD8 *pu1_y_buf_base, *pu1_u_buf_base, *pu1_v_buf_base;
1023
1024 /* Pad the MB to support non standard sizes */
Harish Mahendrakarc72323e2015-04-28 19:07:40 +05301025 UWORD32 u4_pad_right_sz = ps_codec->s_cfg.u4_wd - ps_codec->s_cfg.u4_disp_wd;
Hamsalekha S8d3d3032015-03-13 21:24:58 +05301026 UWORD32 u4_pad_bottom_sz = ps_codec->s_cfg.u4_ht - ps_codec->s_cfg.u4_disp_ht;
Harish Mahendrakarc72323e2015-04-28 19:07:40 +05301027 UWORD16 u2_num_rows = MB_SIZE;
1028 WORD32 convert_uv_only;
Hamsalekha S8d3d3032015-03-13 21:24:58 +05301029
1030 /********************************************************************/
1031 /* BEGIN INIT */
1032 /********************************************************************/
1033
1034 i4_mb_x = ps_proc->i4_mb_x;
1035 i4_mb_y = ps_proc->i4_mb_y;
1036
1037 /* Number of mbs processed in one loop of process function */
1038 ps_proc->i4_nmb_ntrpy = (ps_proc->i4_wd_mbs > MAX_NMB) ? MAX_NMB : ps_proc->i4_wd_mbs;
1039 ps_proc->u4_nmb_me = (ps_proc->i4_wd_mbs > MAX_NMB)? MAX_NMB : ps_proc->i4_wd_mbs;
1040
Harish Mahendrakarc72323e2015-04-28 19:07:40 +05301041 convert_uv_only = 1;
1042 if (u4_pad_bottom_sz && (ps_proc->i4_mb_y == ps_proc->i4_ht_mbs - 1))
1043 {
1044 u2_num_rows = (UWORD16) MB_SIZE - u4_pad_bottom_sz;
1045 ps_proc->pu1_src_buf_luma_base = ps_codec->pu1_y_csc_buf_base;
1046 ps_proc->pu1_src_buf_luma = ps_proc->pu1_src_buf_luma_base + (i4_mb_x * MB_SIZE) + ps_codec->s_cfg.u4_max_wd * (i4_mb_y * MB_SIZE);
1047 convert_uv_only = 0;
1048
1049 }
1050 else
1051 ps_proc->pu1_src_buf_luma = ps_proc->pu1_src_buf_luma_base + (i4_mb_x * MB_SIZE) + i4_src_strd * (i4_mb_y * MB_SIZE);
1052
Hamsalekha S8d3d3032015-03-13 21:24:58 +05301053 /* init buffer pointers */
Harish Mahendrakarc72323e2015-04-28 19:07:40 +05301054
1055 ps_proc->pu1_src_buf_chroma = ps_proc->pu1_src_buf_chroma_base + (i4_mb_x * MB_SIZE) + ps_codec->s_cfg.u4_max_wd * (i4_mb_y * BLK8x8SIZE);
Hamsalekha S8d3d3032015-03-13 21:24:58 +05301056 ps_proc->pu1_rec_buf_luma = ps_proc->pu1_rec_buf_luma_base + (i4_mb_x * MB_SIZE) + i4_rec_strd * (i4_mb_y * MB_SIZE);
1057 ps_proc->pu1_rec_buf_chroma = ps_proc->pu1_rec_buf_chroma_base + (i4_mb_x * MB_SIZE) + i4_rec_strd * (i4_mb_y * BLK8x8SIZE);
1058 ps_proc->pu1_ref_buf_luma = ps_proc->pu1_ref_buf_luma_base + (i4_mb_x * MB_SIZE) + i4_rec_strd * (i4_mb_y * MB_SIZE);
1059 ps_proc->pu1_ref_buf_chroma = ps_proc->pu1_ref_buf_chroma_base + (i4_mb_x * MB_SIZE) + i4_rec_strd * (i4_mb_y * BLK8x8SIZE);
1060
Harish Mahendrakarc72323e2015-04-28 19:07:40 +05301061
Hamsalekha S8d3d3032015-03-13 21:24:58 +05301062 /*
1063 * Do color space conversion
1064 * NOTE : We assume there that the number of MB's to process will not span multiple rows
1065 */
1066 switch (ps_codec->s_cfg.e_inp_color_fmt)
1067 {
1068 case IV_YUV_420SP_UV:
1069 case IV_YUV_420SP_VU:
1070 break;
1071
1072 case IV_YUV_420P :
1073 pu1_y_buf_base = (UWORD8 *)ps_proc->s_inp_buf.s_raw_buf.apv_bufs[0] + (i4_mb_x * MB_SIZE) +
1074 ps_proc->s_inp_buf.s_raw_buf.au4_strd[0] * (i4_mb_y * MB_SIZE);
1075
1076 pu1_u_buf_base = (UWORD8 *)ps_proc->s_inp_buf.s_raw_buf.apv_bufs[1] + (i4_mb_x * BLK8x8SIZE) +
1077 ps_proc->s_inp_buf.s_raw_buf.au4_strd[1] * (i4_mb_y * BLK8x8SIZE);
1078
1079 pu1_v_buf_base = (UWORD8 *)ps_proc->s_inp_buf.s_raw_buf.apv_bufs[2] + (i4_mb_x * BLK8x8SIZE) +
1080 ps_proc->s_inp_buf.s_raw_buf.au4_strd[2] * (i4_mb_y * BLK8x8SIZE);
1081
1082 ps_codec->pf_ih264e_conv_420p_to_420sp(
1083 pu1_y_buf_base, pu1_u_buf_base, pu1_v_buf_base,
1084 ps_proc->pu1_src_buf_luma,
Harish Mahendrakarc72323e2015-04-28 19:07:40 +05301085 ps_proc->pu1_src_buf_chroma, u2_num_rows,
1086 ps_codec->s_cfg.u4_disp_wd,
Hamsalekha S8d3d3032015-03-13 21:24:58 +05301087 ps_proc->s_inp_buf.s_raw_buf.au4_strd[0],
1088 ps_proc->s_inp_buf.s_raw_buf.au4_strd[1],
1089 ps_proc->s_inp_buf.s_raw_buf.au4_strd[2],
Harish Mahendrakarc72323e2015-04-28 19:07:40 +05301090 ps_proc->i4_src_strd, ps_proc->i4_src_strd,
1091 convert_uv_only);
Hamsalekha S8d3d3032015-03-13 21:24:58 +05301092 break;
1093
1094 case IV_YUV_422ILE :
1095 pu1_y_buf_base = (UWORD8 *)ps_proc->s_inp_buf.s_raw_buf.apv_bufs[0] + (i4_mb_x * MB_SIZE * 2)
1096 + ps_proc->s_inp_buf.s_raw_buf.au4_strd[0] * (i4_mb_y * MB_SIZE);
1097
1098 ps_codec->pf_ih264e_fmt_conv_422i_to_420sp(
1099 ps_proc->pu1_src_buf_luma,
1100 ps_proc->pu1_src_buf_chroma,
1101 ps_proc->pu1_src_buf_chroma + 1, pu1_y_buf_base,
Harish Mahendrakarc72323e2015-04-28 19:07:40 +05301102 ps_codec->s_cfg.u4_disp_wd, u2_num_rows,
Hamsalekha S8d3d3032015-03-13 21:24:58 +05301103 ps_proc->i4_src_strd, ps_proc->i4_src_strd,
1104 ps_proc->i4_src_strd,
1105 ps_proc->s_inp_buf.s_raw_buf.au4_strd[0] >> 1);
1106 break;
1107
1108 default:
1109 break;
1110 }
1111
Harish Mahendrakarc72323e2015-04-28 19:07:40 +05301112 if (u4_pad_right_sz && (ps_proc->i4_mb_x == 0) &&
1113 (ps_proc->i4_src_strd > (WORD32)ps_codec->s_cfg.u4_disp_wd) )
1114 {
1115 UWORD32 u4_pad_wd, u4_pad_ht;
1116 u4_pad_wd = (UWORD32)(ps_proc->i4_src_strd - ps_codec->s_cfg.u4_disp_wd);
1117 u4_pad_wd = MIN(u4_pad_right_sz, u4_pad_wd);
1118 u4_pad_ht = MB_SIZE;
1119 if(ps_proc->i4_mb_y == ps_proc->i4_ht_mbs - 1)
1120 u4_pad_ht = MIN(MB_SIZE, (MB_SIZE - u4_pad_bottom_sz));
1121
1122 ih264_pad_right_luma(
1123 ps_proc->pu1_src_buf_luma + ps_codec->s_cfg.u4_disp_wd,
1124 ps_proc->i4_src_strd, u4_pad_ht, u4_pad_wd);
1125
1126 ih264_pad_right_chroma(
1127 ps_proc->pu1_src_buf_chroma + ps_codec->s_cfg.u4_disp_wd,
1128 ps_proc->i4_src_strd, u4_pad_ht / 2, u4_pad_wd);
1129 }
1130
Hamsalekha S8d3d3032015-03-13 21:24:58 +05301131 /* pad bottom edge */
1132 if (u4_pad_bottom_sz && (ps_proc->i4_mb_y == ps_proc->i4_ht_mbs - 1) && ps_proc->i4_mb_x == 0)
1133 {
1134 ih264_pad_bottom(ps_proc->pu1_src_buf_luma + (MB_SIZE - u4_pad_bottom_sz) * ps_proc->i4_src_strd,
Harish Mahendrakarc72323e2015-04-28 19:07:40 +05301135 ps_proc->i4_src_strd, ps_proc->i4_src_strd, u4_pad_bottom_sz);
Hamsalekha S8d3d3032015-03-13 21:24:58 +05301136
1137 ih264_pad_bottom(ps_proc->pu1_src_buf_chroma + (MB_SIZE - u4_pad_bottom_sz) * ps_proc->i4_src_strd / 2,
Harish Mahendrakarc72323e2015-04-28 19:07:40 +05301138 ps_proc->i4_src_strd, ps_proc->i4_src_strd, (u4_pad_bottom_sz / 2));
Hamsalekha S8d3d3032015-03-13 21:24:58 +05301139 }
1140
Harish Mahendrakarc72323e2015-04-28 19:07:40 +05301141
Hamsalekha S8d3d3032015-03-13 21:24:58 +05301142 /* packed mb coeff data */
1143 ps_proc->pv_mb_coeff_data = ((UWORD8 *)ps_proc->pv_pic_mb_coeff_data) + i4_mb_y * ps_codec->u4_size_coeff_data;
1144
1145 /* packed mb header data */
1146 ps_proc->pv_mb_header_data = ((UWORD8 *)ps_proc->pv_pic_mb_header_data) + i4_mb_y * ps_codec->u4_size_header_data;
1147
1148 /* slice index */
1149 ps_proc->i4_cur_slice_idx = ps_proc->pu1_slice_idx[i4_mb_y * ps_proc->i4_wd_mbs + i4_mb_x];
1150
1151 /*********************************************************************/
1152 /* ih264e_init_quant_params() routine is called at the pic init level*/
1153 /* this would have initialized the qp. */
1154 /* TODO_LATER: currently it is assumed that quant params donot change*/
1155 /* across mb's. When they do calculate update ps_qp_params accordingly*/
1156 /*********************************************************************/
1157
1158 /* init mv buffer ptr */
1159 ps_proc->ps_pu = ps_cur_mv_buf->ps_pic_pu + (i4_mb_y * ps_proc->i4_wd_mbs * (MIN_PU_SIZE * MIN_PU_SIZE));
1160
1161 if (i4_mb_y == 0)
1162 {
1163 ps_proc->ps_top_row_pu_ME = ps_cur_mv_buf->ps_pic_pu;
1164 }
1165 else
1166 {
1167 ps_proc->ps_top_row_pu_ME = ps_cur_mv_buf->ps_pic_pu + ((i4_mb_y - 1) * ps_proc->i4_wd_mbs * (MIN_PU_SIZE * MIN_PU_SIZE));
1168 }
1169
1170 ps_proc->pu4_mb_pu_cnt = ps_cur_mv_buf->pu4_mb_pu_cnt + (i4_mb_y * ps_proc->i4_wd_mbs);
1171
1172 /* mb type */
1173 ps_proc->u4_mb_type = I16x16;
1174
1175 /* lambda */
1176 ps_proc->u4_lambda = gu1_qp0[ps_qp_params->u1_mb_qp];
1177
1178 /* mb distortion */
1179 ps_proc->i4_mb_distortion = SHRT_MAX;
1180
1181 if (i4_mb_x == 0)
1182 {
1183 ps_proc->s_left_mb_syntax_ele.i4_mb_distortion = 0;
1184
1185 ps_proc->s_top_left_mb_syntax_ele.i4_mb_distortion = 0;
1186
1187 ps_proc->s_top_left_mb_syntax_ME.i4_mb_distortion = 0;
1188
1189 if (i4_mb_y == 0)
1190 {
1191 memset(ps_proc->ps_top_row_mb_syntax_ele, 0, (ps_proc->i4_wd_mbs + 1)*sizeof(mb_info_t));
1192 }
1193 }
1194
1195 /* mb cost */
1196 ps_proc->i4_mb_cost = INT_MAX;
1197
1198 /**********************/
1199 /* init deblk context */
1200 /**********************/
1201 ps_deblk->i4_mb_x = ps_proc->i4_mb_x;
1202 /* deblk lags the current mb proc by 1 row */
1203 /* NOTE: Intra prediction has to happen with non deblocked samples used as reference */
1204 /* Hence to deblk MB 0 of row 0, you have wait till MB 0 of row 1 is encoded. */
1205 /* For simplicity, we chose to lag deblking by 1 Row wrt to proc */
1206 ps_deblk->i4_mb_y = ps_proc->i4_mb_y - 1;
1207
1208 /* buffer ptrs */
1209 ps_deblk->pu1_cur_pic_luma = ps_proc->pu1_rec_buf_luma_base + i4_rec_strd * (ps_deblk->i4_mb_y * MB_SIZE);
1210 ps_deblk->pu1_cur_pic_chroma = ps_proc->pu1_rec_buf_chroma_base + i4_rec_strd * (ps_deblk->i4_mb_y * BLK8x8SIZE);
1211
1212 /* init deblk bs context */
1213 /* mb indices */
1214 ps_bs->i4_mb_x = ps_proc->i4_mb_x;
1215 ps_bs->i4_mb_y = ps_proc->i4_mb_y;
1216
1217 /* init n_mb_process context */
1218 ps_n_mb_ctxt->i4_mb_x = 0;
1219 ps_n_mb_ctxt->i4_mb_y = ps_deblk->i4_mb_y;
1220 ps_n_mb_ctxt->i4_n_mbs = ps_proc->i4_nmb_ntrpy;
1221
1222 return IH264E_SUCCESS;
1223}
1224
1225/**
1226*******************************************************************************
1227*
1228* @brief This function performs luma & chroma padding
1229*
1230* @par Description:
1231*
1232* @param[in] ps_proc
1233* Process context corresponding to the job
1234*
1235* @param[in] pu1_curr_pic_luma
1236* Pointer to luma buffer
1237*
1238* @param[in] pu1_curr_pic_chroma
1239* Pointer to chroma buffer
1240*
1241* @param[in] i4_mb_x
1242* mb index x
1243*
1244* @param[in] i4_mb_y
1245* mb index y
1246*
1247* @param[in] i4_pad_ht
1248* number of rows to be padded
1249*
1250* @returns error status
1251*
1252* @remarks none
1253*
1254*******************************************************************************
1255*/
1256IH264E_ERROR_T ih264e_pad_recon_buffer(process_ctxt_t *ps_proc,
1257 UWORD8 *pu1_curr_pic_luma,
1258 UWORD8 *pu1_curr_pic_chroma,
1259 WORD32 i4_mb_x,
1260 WORD32 i4_mb_y,
1261 WORD32 i4_pad_ht)
1262{
1263 /* codec context */
1264 codec_t *ps_codec = ps_proc->ps_codec;
1265
1266 /* strides */
1267 WORD32 i4_rec_strd = ps_proc->i4_rec_strd;
1268
1269 if (i4_mb_x == 0)
1270 {
1271 /* padding left luma */
1272 ps_codec->pf_pad_left_luma(pu1_curr_pic_luma, i4_rec_strd, i4_pad_ht, PAD_LEFT);
1273
1274 /* padding left chroma */
1275 ps_codec->pf_pad_left_chroma(pu1_curr_pic_chroma, i4_rec_strd, i4_pad_ht >> 1, PAD_LEFT);
1276 }
Harish Mahendrakarc72323e2015-04-28 19:07:40 +05301277 if (i4_mb_x == ps_proc->i4_wd_mbs - 1)
Hamsalekha S8d3d3032015-03-13 21:24:58 +05301278 {
1279 /* padding right luma */
1280 ps_codec->pf_pad_right_luma(pu1_curr_pic_luma + MB_SIZE, i4_rec_strd, i4_pad_ht, PAD_RIGHT);
1281
1282 /* padding right chroma */
1283 ps_codec->pf_pad_right_chroma(pu1_curr_pic_chroma + MB_SIZE, i4_rec_strd, i4_pad_ht >> 1, PAD_RIGHT);
1284
1285 if (i4_mb_y == ps_proc->i4_ht_mbs - 1)
1286 {
1287 UWORD8 *pu1_rec_luma = pu1_curr_pic_luma + MB_SIZE + PAD_RIGHT + ((i4_pad_ht - 1) * i4_rec_strd);
1288 UWORD8 *pu1_rec_chroma = pu1_curr_pic_chroma + MB_SIZE + PAD_RIGHT + (((i4_pad_ht >> 1) - 1) * i4_rec_strd);
1289
1290 /* padding bottom luma */
1291 ps_codec->pf_pad_bottom(pu1_rec_luma, i4_rec_strd, i4_rec_strd, PAD_BOT);
1292
1293 /* padding bottom chroma */
1294 ps_codec->pf_pad_bottom(pu1_rec_chroma, i4_rec_strd, i4_rec_strd, (PAD_BOT >> 1));
1295 }
1296 }
1297
1298 if (i4_mb_y == 0)
1299 {
1300 UWORD8 *pu1_rec_luma = pu1_curr_pic_luma;
1301 UWORD8 *pu1_rec_chroma = pu1_curr_pic_chroma;
1302 WORD32 wd = MB_SIZE;
1303
1304 if (i4_mb_x == 0)
1305 {
1306 pu1_rec_luma -= PAD_LEFT;
1307 pu1_rec_chroma -= PAD_LEFT;
1308
1309 wd += PAD_LEFT;
1310 }
Harish Mahendrakarc72323e2015-04-28 19:07:40 +05301311 if (i4_mb_x == ps_proc->i4_wd_mbs - 1)
Hamsalekha S8d3d3032015-03-13 21:24:58 +05301312 {
1313 wd += PAD_RIGHT;
1314 }
1315
1316 /* padding top luma */
1317 ps_codec->pf_pad_top(pu1_rec_luma, i4_rec_strd, wd, PAD_TOP);
1318
1319 /* padding top chroma */
1320 ps_codec->pf_pad_top(pu1_rec_chroma, i4_rec_strd, wd, (PAD_TOP >> 1));
1321 }
1322
1323 return IH264E_SUCCESS;
1324}
1325
1326
1327
1328
1329/**
1330*******************************************************************************
1331*
1332* @brief This function performs deblocking, padding and halfpel generation for
1333* 'n' MBs
1334*
1335* @par Description:
1336*
1337* @param[in] ps_proc
1338* Process context corresponding to the job
1339*
1340* @param[in] pu1_curr_pic_luma
1341* Current MB being processed(Luma)
1342*
1343* @param[in] pu1_curr_pic_chroma
1344* Current MB being processed(Chroma)
1345*
1346* @param[in] i4_mb_x
1347* Column value of current MB processed
1348*
1349* @param[in] i4_mb_y
1350* Curent row processed
1351*
1352* @returns error status
1353*
1354* @remarks none
1355*
1356*******************************************************************************
1357*/
1358IH264E_ERROR_T ih264e_dblk_pad_hpel_processing_n_mbs(process_ctxt_t *ps_proc,
1359 UWORD8 *pu1_curr_pic_luma,
1360 UWORD8 *pu1_curr_pic_chroma,
1361 WORD32 i4_mb_x,
1362 WORD32 i4_mb_y)
1363{
1364 /* codec context */
1365 codec_t *ps_codec = ps_proc->ps_codec;
1366
1367 /* n_mb processing context */
1368 n_mb_process_ctxt_t *ps_n_mb_ctxt = &ps_proc->s_n_mb_ctxt;
1369
1370 /* deblk context */
1371 deblk_ctxt_t *ps_deblk = &ps_proc->s_deblk_ctxt;
1372
1373 /* strides */
1374 WORD32 i4_rec_strd = ps_proc->i4_rec_strd;
1375
1376 /* loop variables */
1377 WORD32 row, i, j, col;
1378
1379 /* Padding Width */
1380 UWORD32 u4_pad_wd;
1381
1382 /* deblk_map of the row being deblocked */
1383 UWORD8 *pu1_deblk_map = ps_proc->pu1_deblk_map + ps_deblk->i4_mb_y * ps_proc->i4_wd_mbs;
1384
1385 /* deblk_map_previous row */
1386 UWORD8 *pu1_deblk_map_prev_row = pu1_deblk_map - ps_proc->i4_wd_mbs;
1387
1388 WORD32 u4_pad_top = 0;
1389
1390 WORD32 u4_deblk_prev_row = 0;
1391
1392 /* Number of mbs to be processed */
1393 WORD32 i4_n_mbs = ps_n_mb_ctxt->i4_n_mbs;
1394
1395 /* Number of mbs actually processed
1396 * (at the end of a row, when remaining number of MBs are less than i4_n_mbs) */
1397 WORD32 i4_n_mb_process_count = 0;
1398
1399 UWORD8 *pu1_pad_bottom_src = NULL;
1400
1401 UWORD8 *pu1_pad_src_luma = NULL;
1402 UWORD8 *pu1_pad_src_chroma = NULL;
1403
1404 if (ps_proc->u4_disable_deblock_level == 1)
1405 {
1406 /* If left most MB is processed, then pad left */
1407 if (i4_mb_x == 0)
1408 {
1409 /* padding left luma */
1410 ps_codec->pf_pad_left_luma(pu1_curr_pic_luma, i4_rec_strd, MB_SIZE, PAD_LEFT);
1411
1412 /* padding left chroma */
1413 ps_codec->pf_pad_left_chroma(pu1_curr_pic_chroma, i4_rec_strd, MB_SIZE >> 1, PAD_LEFT);
1414 }
1415 /*last col*/
1416 if (i4_mb_x == (ps_proc->i4_wd_mbs - 1))
1417 {
1418 /* padding right luma */
1419 ps_codec->pf_pad_right_luma(pu1_curr_pic_luma + MB_SIZE, i4_rec_strd, MB_SIZE, PAD_RIGHT);
1420
1421 /* padding right chroma */
1422 ps_codec->pf_pad_right_chroma(pu1_curr_pic_chroma + MB_SIZE, i4_rec_strd, MB_SIZE >> 1, PAD_RIGHT);
1423 }
1424 }
1425
Harish Mahendrakarc72323e2015-04-28 19:07:40 +05301426 if ((i4_mb_y > 0) || (i4_mb_y == (ps_proc->i4_ht_mbs - 1)))
Hamsalekha S8d3d3032015-03-13 21:24:58 +05301427 {
1428 /* if number of mb's to be processed are less than 'N', go back.
1429 * exception to the above clause is end of row */
1430 if ( ((i4_mb_x - (ps_n_mb_ctxt->i4_mb_x - 1)) < i4_n_mbs) && (i4_mb_x < (ps_proc->i4_wd_mbs - 1)) )
1431 {
1432 return IH264E_SUCCESS;
1433 }
1434 else
1435 {
1436 i4_n_mb_process_count = MIN(i4_mb_x - (ps_n_mb_ctxt->i4_mb_x - 1), i4_n_mbs);
1437
1438 u4_deblk_prev_row = 1;
1439
1440 /* checking whether the top rows are deblocked */
1441 for (col = 0; col < i4_n_mb_process_count; col++)
1442 {
1443 u4_deblk_prev_row &= pu1_deblk_map_prev_row[ps_deblk->i4_mb_x + col];
1444 }
1445
1446 /* checking whether the top right MB is deblocked */
1447 if ((ps_deblk->i4_mb_x + i4_n_mb_process_count) != ps_proc->i4_wd_mbs)
1448 {
1449 u4_deblk_prev_row &= pu1_deblk_map_prev_row[ps_deblk->i4_mb_x + i4_n_mb_process_count];
1450 }
1451
1452 /* performing deblocking for required number of MBs */
Harish Mahendrakarc72323e2015-04-28 19:07:40 +05301453 if ((i4_mb_y > 0) && (ps_proc->u4_disable_deblock_level != 1))
Hamsalekha S8d3d3032015-03-13 21:24:58 +05301454 {
1455 /* Top or Top right MBs not deblocked */
Harish Mahendrakarc72323e2015-04-28 19:07:40 +05301456 if ((u4_deblk_prev_row != 1) && (i4_mb_y > 0))
Hamsalekha S8d3d3032015-03-13 21:24:58 +05301457 {
1458 return IH264E_SUCCESS;
1459 }
1460
1461 for (row = 0; row < i4_n_mb_process_count; row++)
1462 {
1463 ih264e_deblock_mb(ps_proc, ps_deblk);
1464
1465 pu1_deblk_map[ps_deblk->i4_mb_x] = 1;
1466
1467 if (ps_deblk->i4_mb_y > 0)
1468 {
1469 if (ps_deblk->i4_mb_x == 0)/* If left most MB is processed, then pad left*/
1470 {
1471 /* padding left luma */
1472 ps_codec->pf_pad_left_luma(ps_deblk->pu1_cur_pic_luma - i4_rec_strd * MB_SIZE, i4_rec_strd, MB_SIZE, PAD_LEFT);
1473
1474 /* padding left chroma */
1475 ps_codec->pf_pad_left_chroma(ps_deblk->pu1_cur_pic_chroma - i4_rec_strd * BLK8x8SIZE, i4_rec_strd, MB_SIZE >> 1, PAD_LEFT);
1476 }
1477
1478 if (ps_deblk->i4_mb_x == (ps_proc->i4_wd_mbs - 1))/*last column*/
1479 {
1480 /* padding right luma */
1481 ps_codec->pf_pad_right_luma(ps_deblk->pu1_cur_pic_luma - i4_rec_strd * MB_SIZE + MB_SIZE, i4_rec_strd, MB_SIZE, PAD_RIGHT);
1482
1483 /* padding right chroma */
1484 ps_codec->pf_pad_right_chroma(ps_deblk->pu1_cur_pic_chroma - i4_rec_strd * BLK8x8SIZE + MB_SIZE, i4_rec_strd, MB_SIZE >> 1, PAD_RIGHT);
1485 }
1486 }
1487 ps_deblk->i4_mb_x++;
1488
1489 ps_deblk->pu1_cur_pic_luma += MB_SIZE;
1490 ps_deblk->pu1_cur_pic_chroma += MB_SIZE;
1491
1492 }
1493 }
Harish Mahendrakarc72323e2015-04-28 19:07:40 +05301494 else if(i4_mb_y > 0)
Hamsalekha S8d3d3032015-03-13 21:24:58 +05301495 {
1496 ps_deblk->i4_mb_x += i4_n_mb_process_count;
1497
1498 ps_deblk->pu1_cur_pic_luma += i4_n_mb_process_count * MB_SIZE;
1499 ps_deblk->pu1_cur_pic_chroma += i4_n_mb_process_count * MB_SIZE;
1500 }
1501
1502 if (i4_mb_y == 2)
1503 {
1504 u4_pad_wd = i4_n_mb_process_count * MB_SIZE;
1505 u4_pad_top = ps_n_mb_ctxt->i4_mb_x * MB_SIZE;
1506
1507 if (ps_n_mb_ctxt->i4_mb_x == 0)
1508 {
1509 u4_pad_wd += PAD_LEFT;
1510 u4_pad_top = -PAD_LEFT;
1511 }
1512
1513 if (i4_mb_x == ps_proc->i4_wd_mbs - 1)
1514 {
1515 u4_pad_wd += PAD_RIGHT;
1516 }
1517
1518 /* padding top luma */
1519 ps_codec->pf_pad_top(ps_proc->pu1_rec_buf_luma_base + u4_pad_top, i4_rec_strd, u4_pad_wd, PAD_TOP);
1520
1521 /* padding top chroma */
1522 ps_codec->pf_pad_top(ps_proc->pu1_rec_buf_chroma_base + u4_pad_top, i4_rec_strd, u4_pad_wd, (PAD_TOP >> 1));
1523 }
1524
1525 ps_n_mb_ctxt->i4_mb_x += i4_n_mb_process_count;
1526
1527 if (i4_mb_x == ps_proc->i4_wd_mbs - 1)
1528 {
1529 if (ps_proc->i4_mb_y == ps_proc->i4_ht_mbs - 1)
1530 {
1531 /* Bottom Padding is done in one stretch for the entire width */
1532 if (ps_proc->u4_disable_deblock_level != 1)
1533 {
1534 ps_deblk->pu1_cur_pic_luma = ps_proc->pu1_rec_buf_luma_base + (ps_proc->i4_ht_mbs - 1) * i4_rec_strd * MB_SIZE;
1535
1536 ps_deblk->pu1_cur_pic_chroma = ps_proc->pu1_rec_buf_chroma_base + (ps_proc->i4_ht_mbs - 1) * i4_rec_strd * BLK8x8SIZE;
1537
1538 ps_n_mb_ctxt->i4_mb_x = 0;
1539 ps_n_mb_ctxt->i4_mb_y = ps_proc->i4_mb_y;
1540 ps_deblk->i4_mb_x = 0;
1541 ps_deblk->i4_mb_y = ps_proc->i4_mb_y;
1542
1543 /* update pic qp map (as update_proc_ctxt is still not called for the last MB) */
1544 ps_proc->s_deblk_ctxt.s_bs_ctxt.pu1_pic_qp[(i4_mb_y * ps_proc->i4_wd_mbs) + i4_mb_x] = ps_proc->u4_mb_qp;
1545
1546 i4_n_mb_process_count = (ps_proc->i4_wd_mbs) % i4_n_mbs;
1547
1548 j = (ps_proc->i4_wd_mbs) / i4_n_mbs;
1549
1550 for (i = 0; i < j; i++)
1551 {
1552 for (col = 0; col < i4_n_mbs; col++)
1553 {
1554 ih264e_deblock_mb(ps_proc, ps_deblk);
1555
1556 pu1_deblk_map[ps_deblk->i4_mb_x] = 1;
1557
1558 ps_deblk->i4_mb_x++;
1559 ps_deblk->pu1_cur_pic_luma += MB_SIZE;
1560 ps_deblk->pu1_cur_pic_chroma += MB_SIZE;
1561 ps_n_mb_ctxt->i4_mb_x++;
1562 }
1563 }
1564
1565 for (col = 0; col < i4_n_mb_process_count; col++)
1566 {
1567 ih264e_deblock_mb(ps_proc, ps_deblk);
1568
1569 pu1_deblk_map[ps_deblk->i4_mb_x] = 1;
1570
1571 ps_deblk->i4_mb_x++;
1572 ps_deblk->pu1_cur_pic_luma += MB_SIZE;
1573 ps_deblk->pu1_cur_pic_chroma += MB_SIZE;
1574 ps_n_mb_ctxt->i4_mb_x++;
1575 }
1576
1577 pu1_pad_src_luma = ps_proc->pu1_rec_buf_luma_base + (ps_proc->i4_ht_mbs - 2) * MB_SIZE * i4_rec_strd;
1578
1579 pu1_pad_src_chroma = ps_proc->pu1_rec_buf_chroma_base + (ps_proc->i4_ht_mbs - 2) * BLK8x8SIZE * i4_rec_strd;
1580
1581 /* padding left luma */
1582 ps_codec->pf_pad_left_luma(pu1_pad_src_luma, i4_rec_strd, MB_SIZE, PAD_LEFT);
1583
1584 /* padding left chroma */
1585 ps_codec->pf_pad_left_chroma(pu1_pad_src_chroma, i4_rec_strd, BLK8x8SIZE, PAD_LEFT);
1586
1587 pu1_pad_src_luma += i4_rec_strd * MB_SIZE;
1588 pu1_pad_src_chroma += i4_rec_strd * BLK8x8SIZE;
1589
1590 /* padding left luma */
1591 ps_codec->pf_pad_left_luma(pu1_pad_src_luma, i4_rec_strd, MB_SIZE, PAD_LEFT);
1592
1593 /* padding left chroma */
1594 ps_codec->pf_pad_left_chroma(pu1_pad_src_chroma, i4_rec_strd, BLK8x8SIZE, PAD_LEFT);
1595
1596 pu1_pad_src_luma = ps_proc->pu1_rec_buf_luma_base + (ps_proc->i4_ht_mbs - 2) * MB_SIZE * i4_rec_strd + (ps_proc->i4_wd_mbs) * MB_SIZE;
1597
1598 pu1_pad_src_chroma = ps_proc->pu1_rec_buf_chroma_base + (ps_proc->i4_ht_mbs - 2) * BLK8x8SIZE * i4_rec_strd + (ps_proc->i4_wd_mbs) * MB_SIZE;
1599
1600 /* padding right luma */
1601 ps_codec->pf_pad_right_luma(pu1_pad_src_luma, i4_rec_strd, MB_SIZE, PAD_RIGHT);
1602
1603 /* padding right chroma */
1604 ps_codec->pf_pad_right_chroma(pu1_pad_src_chroma, i4_rec_strd, BLK8x8SIZE, PAD_RIGHT);
1605
1606 pu1_pad_src_luma += i4_rec_strd * MB_SIZE;
1607 pu1_pad_src_chroma += i4_rec_strd * BLK8x8SIZE;
1608
1609 /* padding right luma */
1610 ps_codec->pf_pad_right_luma(pu1_pad_src_luma, i4_rec_strd, MB_SIZE, PAD_RIGHT);
1611
1612 /* padding right chroma */
1613 ps_codec->pf_pad_right_chroma(pu1_pad_src_chroma, i4_rec_strd, BLK8x8SIZE, PAD_RIGHT);
1614
1615 }
1616
Harish Mahendrakarc72323e2015-04-28 19:07:40 +05301617 /* In case height is less than 2 MBs pad top */
1618 if (ps_proc->i4_ht_mbs <= 2)
1619 {
1620 UWORD8 *pu1_pad_top_src;
1621 /* padding top luma */
1622 pu1_pad_top_src = ps_proc->pu1_rec_buf_luma_base - PAD_LEFT;
1623 ps_codec->pf_pad_top(pu1_pad_top_src, i4_rec_strd, i4_rec_strd, PAD_TOP);
1624
1625 /* padding top chroma */
1626 pu1_pad_top_src = ps_proc->pu1_rec_buf_chroma_base - PAD_LEFT;
1627 ps_codec->pf_pad_top(pu1_pad_top_src, i4_rec_strd, i4_rec_strd, (PAD_TOP >> 1));
1628 }
1629
Hamsalekha S8d3d3032015-03-13 21:24:58 +05301630 /* padding bottom luma */
1631 pu1_pad_bottom_src = ps_proc->pu1_rec_buf_luma_base + ps_proc->i4_ht_mbs * MB_SIZE * i4_rec_strd - PAD_LEFT;
1632 ps_codec->pf_pad_bottom(pu1_pad_bottom_src, i4_rec_strd, i4_rec_strd, PAD_BOT);
1633
1634 /* padding bottom chroma */
1635 pu1_pad_bottom_src = ps_proc->pu1_rec_buf_chroma_base + ps_proc->i4_ht_mbs * (MB_SIZE >> 1) * i4_rec_strd - PAD_LEFT;
1636 ps_codec->pf_pad_bottom(pu1_pad_bottom_src, i4_rec_strd, i4_rec_strd, (PAD_BOT >> 1));
1637 }
1638 }
1639 }
1640 }
1641
1642 return IH264E_SUCCESS;
1643}
1644
1645
1646/**
1647*******************************************************************************
1648*
1649* @brief This function performs luma & chroma core coding for a set of mb's.
1650*
1651* @par Description:
1652* The mb to be coded is taken and is evaluated over a predefined set of modes
1653* (intra (i16, i4, i8)/inter (mv, skip)) for best cost. The mode with least cost
1654* is selected and using intra/inter prediction filters, prediction is carried out.
1655* The deviation between src and pred signal constitutes error signal. This error
1656* signal is transformed (hierarchical transform if necessary) and quantized. The
1657* quantized residue is packed in to entropy buffer for entropy coding. This is
1658* repeated for all the mb's enlisted under the job.
1659*
1660* @param[in] ps_proc
1661* Process context corresponding to the job
1662*
1663* @returns error status
1664*
1665* @remarks none
1666*
1667*******************************************************************************
1668*/
1669WORD32 ih264e_process(process_ctxt_t *ps_proc)
1670{
1671 /* error status */
1672 WORD32 error_status = IH264_SUCCESS;
1673
1674 /* codec context */
1675 codec_t *ps_codec = ps_proc->ps_codec;
1676
1677 /* cbp luma, chroma */
1678 UWORD32 u4_cbp_l, u4_cbp_c;
1679
1680 /* width in mbs */
1681 WORD32 i4_wd_mbs = ps_proc->i4_wd_mbs;
1682
1683 /* loop var */
1684 WORD32 i4_mb_idx, i4_mb_cnt = ps_proc->i4_mb_cnt;
1685
1686 /* valid modes */
1687 UWORD32 u4_valid_modes = 0;
1688
1689 /* gate threshold */
1690 WORD32 i4_gate_threshold = 0;
1691
1692 /* is intra */
1693 WORD32 luma_idx, chroma_idx, is_intra;
1694
1695 /* temp variables */
1696 WORD32 ctxt_sel = ps_proc->i4_encode_api_call_cnt & 1;
1697
1698 /* list of modes for evaluation */
1699 if (ps_proc->i4_slice_type == ISLICE)
1700 {
1701 /* enable intra 16x16 */
1702 u4_valid_modes |= ps_codec->s_cfg.u4_enable_intra_16x16 ? (1 << I16x16) : 0;
1703
1704 /* enable intra 8x8 */
1705 u4_valid_modes |= ps_codec->s_cfg.u4_enable_intra_8x8 ? (1 << I8x8) : 0;
1706
1707 /* enable intra 4x4 */
1708 u4_valid_modes |= ps_codec->s_cfg.u4_enable_intra_4x4 ? (1 << I4x4) : 0;
1709 }
1710 else if (ps_proc->i4_slice_type == PSLICE)
1711 {
1712 /* enable intra 16x16 */
1713 u4_valid_modes |= ps_codec->s_cfg.u4_enable_intra_16x16 ? (1 << I16x16) : 0;
1714
1715 /* enable intra 4x4 */
1716 if (ps_codec->s_cfg.u4_enc_speed_preset == IVE_SLOWEST)
1717 {
1718 u4_valid_modes |= ps_codec->s_cfg.u4_enable_intra_4x4 ? (1 << I4x4) : 0;
1719 }
1720
1721 /* enable inter 16x16 */
1722 u4_valid_modes |= (1 << P16x16);
1723 }
1724
1725
1726 /* init entropy */
1727 ps_proc->s_entropy.i4_mb_x = ps_proc->i4_mb_x;
1728 ps_proc->s_entropy.i4_mb_y = ps_proc->i4_mb_y;
1729 ps_proc->s_entropy.i4_mb_cnt = MIN(ps_proc->i4_nmb_ntrpy, i4_wd_mbs - ps_proc->i4_mb_x);
1730
1731 /* compute recon when :
1732 * 1. current frame is to be used as a reference
1733 * 2. dump recon for bit stream sanity check
1734 */
1735 ps_proc->u4_compute_recon = ps_codec->u4_is_curr_frm_ref ||
1736 ps_codec->s_cfg.u4_enable_recon;
1737
1738 /* Encode 'n' macroblocks,
1739 * 'n' being the number of mbs dictated by current proc ctxt */
1740 for (i4_mb_idx = 0; i4_mb_idx < i4_mb_cnt; i4_mb_idx ++)
1741 {
1742 /* since we have not yet found sad, we have not yet got min sad */
1743 /* we need to initialize these variables for each MB */
1744 /* TODO how to get the min sad into the codec */
1745 ps_proc->u4_min_sad = ps_codec->s_cfg.i4_min_sad;
1746 ps_proc->u4_min_sad_reached = 0;
1747
1748 /* mb analysis */
1749 {
1750 /* temp var */
1751 WORD32 i4_mb_id = ps_proc->i4_mb_x + ps_proc->i4_mb_y * i4_wd_mbs;
1752
1753 /* force intra refresh ? */
1754 WORD32 i4_air_enable_inter = (ps_codec->s_cfg.e_air_mode == IVE_AIR_MODE_NONE) ||
1755 (ps_proc->pu1_is_intra_coded[i4_mb_id] != 0) ||
1756 (ps_codec->pu2_intr_rfrsh_map[i4_mb_id] != ps_codec->i4_air_pic_cnt);
1757
1758 /* evaluate inter 16x16 modes */
1759 if (u4_valid_modes & (1 << P16x16))
1760 {
1761 /* compute nmb me */
1762 if (ps_proc->i4_mb_x % ps_proc->u4_nmb_me == 0)
1763 {
1764 ih264e_compute_me_nmb(ps_proc, MIN((WORD32)ps_proc->u4_nmb_me,
1765 i4_wd_mbs - ps_proc->i4_mb_x));
1766 }
1767
1768 /* set pointers to ME data appropriately for other modules to use */
1769 {
1770 UWORD32 u4_mb_index = ps_proc->i4_mb_x % ps_proc->u4_nmb_me ;
1771
1772 /* get the min sad condition for current mb */
1773 ps_proc->u4_min_sad_reached = ps_proc->ps_nmb_info[u4_mb_index].u4_min_sad_reached;
1774 ps_proc->u4_min_sad = ps_proc->ps_nmb_info[u4_mb_index].u4_min_sad;
1775
1776 ps_proc->ps_skip_mv = &(ps_proc->ps_nmb_info[u4_mb_index].s_skip_mv);
1777 ps_proc->ps_ngbr_avbl = &(ps_proc->ps_nmb_info[u4_mb_index].s_ngbr_avbl);
1778 ps_proc->ps_pred_mv = &(ps_proc->ps_nmb_info[u4_mb_index].s_pred_mv);
1779
1780 ps_proc->i4_mb_distortion = ps_proc->ps_nmb_info[u4_mb_index].i4_mb_distortion;
1781 ps_proc->i4_mb_cost = ps_proc->ps_nmb_info[u4_mb_index].i4_mb_cost;
1782 ps_proc->u4_min_sad = ps_proc->ps_nmb_info[u4_mb_index].u4_min_sad;
1783 ps_proc->u4_min_sad_reached = ps_proc->ps_nmb_info[u4_mb_index].u4_min_sad_reached;
1784 ps_proc->u4_mb_type = ps_proc->ps_nmb_info[u4_mb_index].u4_mb_type;
1785
1786 /* get the best sub pel buffer */
1787 ps_proc->pu1_best_subpel_buf = ps_proc->ps_nmb_info[u4_mb_index].pu1_best_sub_pel_buf;
1788 ps_proc->u4_bst_spel_buf_strd = ps_proc->ps_nmb_info[u4_mb_index].u4_bst_spel_buf_strd;
1789 }
1790 ih264e_derive_nghbr_avbl_of_mbs(ps_proc);
1791 }
1792 else
1793 {
1794 /* Derive neighbor availability for the current macroblock */
1795 ps_proc->ps_ngbr_avbl = &ps_proc->s_ngbr_avbl;
1796
1797 ih264e_derive_nghbr_avbl_of_mbs(ps_proc);
1798 }
1799
1800 /*
1801 * If air says intra, we need to force the following code path to evaluate intra
1802 * The easy way is just to say that the inter cost is too much
1803 */
1804 if (!i4_air_enable_inter)
1805 {
1806 ps_proc->u4_min_sad_reached = 0;
1807 ps_proc->i4_mb_cost = INT_MAX;
1808 ps_proc->i4_mb_distortion = INT_MAX;
1809 }
1810 else if (ps_proc->u4_mb_type == PSKIP)
1811 {
1812 goto UPDATE_MB_INFO;
1813 }
1814
1815 /* wait until the proc of [top + 1] mb is computed.
1816 * We wait till the proc dependencies are satisfied */
1817 if(ps_proc->i4_mb_y > 0)
1818 {
1819 /* proc map */
1820 UWORD8 *pu1_proc_map_top;
1821
1822 pu1_proc_map_top = ps_proc->pu1_proc_map + ((ps_proc->i4_mb_y - 1) * i4_wd_mbs);
1823
1824 while (1)
1825 {
1826 volatile UWORD8 *pu1_buf;
1827 WORD32 idx = i4_mb_idx + 1;
1828
1829 idx = MIN(idx, ((WORD32)ps_codec->s_cfg.i4_wd_mbs - 1));
1830 pu1_buf = pu1_proc_map_top + idx;
1831 if(*pu1_buf)
1832 break;
1833 ithread_yield();
1834 }
1835 }
1836
1837 /* If we already have the minimum sad, there is no point in searching for sad again */
1838 if (ps_proc->u4_min_sad_reached == 0)
1839 {
1840 /* intra gating in inter slices */
1841 /* No need of gating if we want to force intra, we need to find the threshold only if inter is enabled by AIR*/
1842 if (i4_air_enable_inter && ps_proc->i4_slice_type == PSLICE && ps_codec->u4_inter_gate)
1843 {
1844 /* distortion of neighboring blocks */
1845 WORD32 i4_distortion[4];
1846
1847 i4_distortion[0] = ps_proc->s_left_mb_syntax_ele.i4_mb_distortion;
1848
1849 i4_distortion[1] = ps_proc->ps_top_row_mb_syntax_ele[ps_proc->i4_mb_x].i4_mb_distortion;
1850
1851 i4_distortion[2] = ps_proc->ps_top_row_mb_syntax_ele[ps_proc->i4_mb_x + 1].i4_mb_distortion;
1852
1853 i4_distortion[3] = ps_proc->s_top_left_mb_syntax_ele.i4_mb_distortion;
1854
1855 i4_gate_threshold = (i4_distortion[0] + i4_distortion[1] + i4_distortion[2] + i4_distortion[3]) >> 2;
1856
1857 }
1858
1859 /* If we are going to force intra we need to evaluate intra irrespective of gating */
1860 if ( (!i4_air_enable_inter) || ((i4_gate_threshold + 16 *((WORD32) ps_proc->u4_lambda)) < ps_proc->i4_mb_distortion))
1861 {
1862 /* evaluate intra 4x4 modes */
1863 if (u4_valid_modes & (1 << I4x4))
1864 {
1865 if (ps_codec->s_cfg.u4_enc_speed_preset == IVE_SLOWEST)
1866 {
1867 ih264e_evaluate_intra4x4_modes_for_least_cost_rdopton(ps_proc);
1868 }
1869 else
1870 {
1871 ih264e_evaluate_intra4x4_modes_for_least_cost_rdoptoff(ps_proc);
1872 }
1873 }
1874
1875 /* evaluate intra 16x16 modes */
1876 if (u4_valid_modes & (1 << I16x16))
1877 {
1878 ih264e_evaluate_intra16x16_modes_for_least_cost_rdoptoff(ps_proc);
1879 }
1880
1881 /* evaluate intra 8x8 modes */
1882 if (u4_valid_modes & (1 << I8x8))
1883 {
1884 ih264e_evaluate_intra8x8_modes_for_least_cost_rdoptoff(ps_proc);
1885 }
1886 }
1887
1888 }
1889 }
1890
1891 /* is intra */
1892 if (ps_proc->u4_mb_type == I4x4 || ps_proc->u4_mb_type == I16x16 || ps_proc->u4_mb_type == I8x8)
1893 {
1894 luma_idx = ps_proc->u4_mb_type;
1895 chroma_idx = 0;
1896 is_intra = 1;
1897
1898 /* evaluate chroma blocks for intra */
1899 ih264e_evaluate_chroma_intra8x8_modes_for_least_cost_rdoptoff(ps_proc);
1900 }
1901 else
1902 {
1903 luma_idx = 3;
1904 chroma_idx = 1;
1905 is_intra = 0;
1906 }
1907 ps_proc->u4_is_intra = is_intra;
1908
1909 /* redo MV pred of neighbors in the case intra mb */
1910 /* TODO : currently called unconditionally, needs to be called only in the case of intra
1911 * to modify neighbors */
1912 if (ps_proc->i4_slice_type != ISLICE)
1913 {
1914 ih264e_mv_pred(ps_proc);
1915 }
1916
1917 /* Perform luma mb core coding */
1918 u4_cbp_l = (ps_codec->luma_energy_compaction)[luma_idx](ps_proc);
1919
1920 /* Perform luma mb core coding */
1921 u4_cbp_c = (ps_codec->chroma_energy_compaction)[chroma_idx](ps_proc);
1922
1923 /* coded block pattern */
1924 ps_proc->u4_cbp = (u4_cbp_c << 4) | u4_cbp_l;
1925
1926 /* mb skip */
1927 if (is_intra == 0)
1928 {
1929 if (ps_proc->u4_cbp == 0)
1930 {
1931 /* get skip mv */
1932 UWORD32 u4_for_me = 0;
1933 ih264e_find_skip_motion_vector(ps_proc,u4_for_me);
1934
1935 /* skip ? */
1936 if (ps_proc->ps_skip_mv->i2_mvx == ps_proc->ps_pu->s_l0_mv.i2_mvx &&
1937 ps_proc->ps_skip_mv->i2_mvy == ps_proc->ps_pu->s_l0_mv.i2_mvy)
1938 {
1939 ps_proc->u4_mb_type = PSKIP;
1940 }
1941 }
1942 }
1943
1944UPDATE_MB_INFO:
1945
1946 /* Update mb sad, mb qp and intra mb cost. Will be used by rate control */
1947 ih264e_update_rc_mb_info(&ps_proc->s_frame_info, ps_proc);
1948
1949 /**********************************************************************/
1950 /* if disable deblock level is '0' this implies enable deblocking for */
1951 /* all edges of all macroblocks with out any restrictions */
1952 /* */
1953 /* if disable deblock level is '1' this implies disable deblocking for*/
1954 /* all edges of all macroblocks with out any restrictions */
1955 /* */
1956 /* if disable deblock level is '2' this implies enable deblocking for */
1957 /* all edges of all macroblocks except edges overlapping with slice */
1958 /* boundaries. This option is not currently supported by the encoder */
1959 /* hence the slice map should be of no significance to perform debloc */
1960 /* king */
1961 /**********************************************************************/
1962
1963 if (ps_proc->u4_compute_recon)
1964 {
1965 /* deblk context */
1966 /* src pointers */
1967 UWORD8 *pu1_cur_pic_luma = ps_proc->pu1_rec_buf_luma;
1968 UWORD8 *pu1_cur_pic_chroma = ps_proc->pu1_rec_buf_chroma;
1969
1970 /* src indices */
1971 UWORD32 i4_mb_x = ps_proc->i4_mb_x;
1972 UWORD32 i4_mb_y = ps_proc->i4_mb_y;
1973
1974 /* compute blocking strength */
1975 if (ps_proc->u4_disable_deblock_level != 1)
1976 {
1977 ih264e_compute_bs(ps_proc);
1978 }
1979
1980 /* nmb deblocking and hpel and padding */
1981 ih264e_dblk_pad_hpel_processing_n_mbs(ps_proc, pu1_cur_pic_luma,
1982 pu1_cur_pic_chroma, i4_mb_x,
1983 i4_mb_y);
1984 }
1985
1986 /* update the context after for coding next mb */
1987 error_status |= ih264e_update_proc_ctxt(ps_proc);
1988
1989 /* Once the last row is processed, mark the buffer status appropriately */
1990 if (ps_proc->i4_ht_mbs == ps_proc->i4_mb_y)
1991 {
1992 /* Pointer to current picture buffer structure */
1993 pic_buf_t *ps_cur_pic = ps_proc->ps_cur_pic;
1994
1995 /* Pointer to current picture's mv buffer structure */
1996 mv_buf_t *ps_cur_mv_buf = ps_proc->ps_cur_mv_buf;
1997
1998 /**********************************************************************/
1999 /* if disable deblock level is '0' this implies enable deblocking for */
2000 /* all edges of all macroblocks with out any restrictions */
2001 /* */
2002 /* if disable deblock level is '1' this implies disable deblocking for*/
2003 /* all edges of all macroblocks with out any restrictions */
2004 /* */
2005 /* if disable deblock level is '2' this implies enable deblocking for */
2006 /* all edges of all macroblocks except edges overlapping with slice */
2007 /* boundaries. This option is not currently supported by the encoder */
2008 /* hence the slice map should be of no significance to perform debloc */
2009 /* king */
2010 /**********************************************************************/
2011 error_status |= ih264_buf_mgr_release(ps_codec->pv_mv_buf_mgr, ps_cur_mv_buf->i4_buf_id , BUF_MGR_CODEC);
2012
2013 error_status |= ih264_buf_mgr_release(ps_codec->pv_ref_buf_mgr, ps_cur_pic->i4_buf_id , BUF_MGR_CODEC);
2014
2015 if (ps_codec->s_cfg.u4_enable_recon)
2016 {
2017 /* pic cnt */
2018 ps_codec->as_rec_buf[ctxt_sel].i4_pic_cnt = ps_proc->i4_pic_cnt;
2019
2020 /* rec buffers */
2021 ps_codec->as_rec_buf[ctxt_sel].s_pic_buf = *ps_proc->ps_cur_pic;
2022
2023 /* is last? */
2024 ps_codec->as_rec_buf[ctxt_sel].u4_is_last = ps_proc->s_entropy.u4_is_last;
2025
2026 /* frame time stamp */
2027 ps_codec->as_rec_buf[ctxt_sel].u4_timestamp_high = ps_proc->s_entropy.u4_timestamp_high;
2028 ps_codec->as_rec_buf[ctxt_sel].u4_timestamp_low = ps_proc->s_entropy.u4_timestamp_low;
2029 }
2030
2031 }
2032 }
2033
2034 DEBUG_HISTOGRAM_DUMP(ps_codec->s_cfg.i4_ht_mbs == ps_proc->i4_mb_y);
2035
2036 return error_status;
2037}
2038
2039/**
2040*******************************************************************************
2041*
2042* @brief
2043* function to receive frame qp and pic type before encoding
2044*
2045* @par Description:
2046* Before encoding the frame, this function calls the rc library for frame qp
2047* and picture type
2048*
2049* @param[in] ps_codec
2050* Pointer to codec context
2051*
2052* @param[in] pic_cnt
2053* pic count
2054*
2055* @param[out] pi4_pic_type
2056* pic type
2057
2058* @returns skip_src
2059* if the source frame rate and target frame rate are not identical, the encoder
2060* skips few source frames. skip_src is set when the source need not be encoded.
2061*
2062* @remarks none
2063*
2064*******************************************************************************
2065*/
2066WORD32 ih264e_set_rc_pic_params(codec_t *ps_codec, WORD32 cur_pic_cnt, WORD32 *pi4_pic_type)
2067{
2068 /* rate control context */
2069 rate_control_ctxt_t *ps_rate_control = &ps_codec->s_rate_control;
2070
2071 /* frame qp */
2072 UWORD8 u1_frame_qp;
2073
2074 /* pic type */
2075 PIC_TYPE_T pic_type = PIC_NA;
2076
2077 /* should src be skipped */
2078 WORD32 skip_src = 0;
2079
2080 /* temp var */
2081 WORD32 delta_time_stamp = 1;
2082
2083 /* see if the app requires any specific frame */
2084 if (ps_codec->force_curr_frame_type == IV_IDR_FRAME || ps_codec->force_curr_frame_type == IV_I_FRAME)
2085 {
2086 irc_force_I_frame(ps_codec->s_rate_control.pps_rate_control_api);
2087 }
2088
2089 /* call rate control lib to get curr pic type and qp to be used */
2090 skip_src = ih264e_rc_pre_enc(ps_rate_control->pps_rate_control_api,
2091 ps_rate_control->pps_pd_frm_rate,
2092 ps_rate_control->pps_time_stamp,
2093 ps_rate_control->pps_frame_time,
2094 delta_time_stamp,
2095 (ps_codec->s_cfg.i4_wd_mbs * ps_codec->s_cfg.i4_ht_mbs),
2096 &ps_rate_control->e_pic_type,
2097 &u1_frame_qp);
2098
2099 switch (ps_rate_control->e_pic_type)
2100 {
2101 case I_PIC:
2102 pic_type = PIC_I;
2103 break;
2104
2105 case P_PIC:
2106 pic_type = PIC_P;
2107 break;
2108
2109 case B_PIC:
2110 pic_type = PIC_B;
2111 break;
2112
2113 default:
2114 break;
2115 }
2116
2117 /* is idr? */
2118 if ((0 == cur_pic_cnt % ps_codec->s_cfg.u4_idr_frm_interval) ||
2119 ps_codec->force_curr_frame_type == IV_IDR_FRAME)
2120 {
2121 pic_type = PIC_IDR;
2122 }
2123
2124 /* force frame tag is not sticky */
2125 if (ps_codec->force_curr_frame_type == IV_IDR_FRAME || ps_codec->force_curr_frame_type == IV_I_FRAME)
2126 {
2127 ps_codec->force_curr_frame_type = IV_NA_FRAME;
2128 }
2129
2130 /* qp */
2131 ps_codec->u4_frame_qp = gau1_mpeg2_to_h264_qmap[u1_frame_qp];
2132
2133 /* pic type */
2134 *pi4_pic_type = pic_type;
2135
2136 return skip_src;
2137}
2138
2139/**
2140*******************************************************************************
2141*
2142* @brief
2143* Function to update rc context after encoding
2144*
2145* @par Description
2146* This function updates the rate control context after the frame is encoded.
2147* Number of bits consumed by the current frame, frame distortion, frame cost,
2148* number of intra/inter mb's, ... are passed on to rate control context for
2149* updating the rc model.
2150*
2151* @param[in] ps_codec
2152* Handle to codec context
2153*
2154* @param[in] ctxt_sel
2155* frame context selector
2156*
2157* @param[in] pic_cnt
2158* pic count
2159*
2160* @returns i4_stuffing_byte
2161* number of stuffing bytes (if necessary)
2162*
2163* @remarks
2164*
2165*******************************************************************************
2166*/
2167WORD32 ih264e_update_rc_post_enc(codec_t *ps_codec, WORD32 ctxt_sel, WORD32 pic_cnt)
2168{
2169 /* proc set base idx */
2170 WORD32 i4_proc_ctxt_sel_base = ctxt_sel ? (MAX_PROCESS_CTXT / 2) : 0;
2171
2172 /* proc ctxt */
2173 process_ctxt_t *ps_proc = &ps_codec->as_process[i4_proc_ctxt_sel_base];
2174
2175 /* frame qp */
2176 UWORD8 u1_frame_qp = ps_codec->u4_frame_qp;
2177
2178 /* cbr rc return status */
2179 WORD32 i4_stuffing_byte = 0;
2180
2181 /* current frame stats */
2182 frame_info_t s_frame_info;
2183 picture_type_e rc_pic_type;
2184
2185 /* temp var */
2186 WORD32 i, j;
2187
2188 /********************************************************************/
2189 /* BEGIN INIT */
2190 /********************************************************************/
2191
2192 /* init frame info */
2193 irc_init_frame_info(&s_frame_info);
2194
2195 /* get frame info */
2196 for (i = 0; i < (WORD32)ps_codec->s_cfg.u4_num_cores; i++)
2197 {
2198 /*****************************************************************/
2199 /* One frame can be encoded by max of u4_num_cores threads */
2200 /* Accumulating the num mbs, sad, qp and intra_mb_cost from */
2201 /* u4_num_cores threads */
2202 /*****************************************************************/
2203 for (j = 0; j< MAX_MB_TYPE; j++)
2204 {
2205 s_frame_info.num_mbs[j] += ps_proc[i].s_frame_info.num_mbs[j];
2206
2207 s_frame_info.tot_mb_sad[j] += ps_proc[i].s_frame_info.tot_mb_sad[j];
2208
2209 s_frame_info.qp_sum[j] += ps_proc[i].s_frame_info.qp_sum[j];
2210 }
2211
2212 s_frame_info.intra_mb_cost_sum += ps_proc[i].s_frame_info.intra_mb_cost_sum;
2213
2214 s_frame_info.activity_sum += ps_proc[i].s_frame_info.activity_sum;
2215
2216 /*****************************************************************/
2217 /* gather number of residue and header bits consumed by the frame*/
2218 /*****************************************************************/
2219 ih264e_update_rc_bits_info(&s_frame_info, &ps_proc[i].s_entropy);
2220 }
2221
2222 /* get pic type */
2223 switch (ps_codec->pic_type)
2224 {
2225 case PIC_I:
2226 case PIC_IDR:
2227 rc_pic_type = I_PIC;
2228 break;
2229 case PIC_P:
2230 rc_pic_type = P_PIC;
2231 break;
2232 case PIC_B:
2233 rc_pic_type = B_PIC;
2234 break;
2235 default:
2236 assert(0);
2237 break;
2238 }
2239
2240 /* update rc lib with current frame stats */
2241 i4_stuffing_byte = ih264e_rc_post_enc(ps_codec->s_rate_control.pps_rate_control_api,
2242 &(s_frame_info),
2243 ps_codec->s_rate_control.pps_pd_frm_rate,
2244 ps_codec->s_rate_control.pps_time_stamp,
2245 ps_codec->s_rate_control.pps_frame_time,
2246 (ps_proc->i4_wd_mbs * ps_proc->i4_ht_mbs),
2247 &rc_pic_type,
2248 pic_cnt,
2249 &ps_codec->s_rate_control.post_encode_skip[ctxt_sel],
2250 u1_frame_qp,
2251 &ps_codec->s_rate_control.num_intra_in_prev_frame,
2252 &ps_codec->s_rate_control.i4_avg_activity);
2253
2254 /* in case the frame needs to be skipped, the frame num should not be incremented */
2255 if (ps_codec->s_rate_control.post_encode_skip[ctxt_sel])
2256 {
2257 ps_codec->i4_frame_num --;
2258 }
2259
2260 return i4_stuffing_byte;
2261}
2262
2263/**
2264*******************************************************************************
2265*
2266* @brief
2267* entry point of a spawned encoder thread
2268*
2269* @par Description:
2270* The encoder thread dequeues a proc/entropy job from the encoder queue and
2271* calls necessary routines.
2272*
2273* @param[in] pv_proc
2274* Process context corresponding to the thread
2275*
2276* @returns error status
2277*
2278* @remarks
2279*
2280*******************************************************************************
2281*/
2282WORD32 ih264e_process_thread(void *pv_proc)
2283{
2284 /* error status */
2285 IH264_ERROR_T ret = IH264_SUCCESS;
2286 WORD32 error_status = IH264_SUCCESS;
2287
2288 /* proc ctxt */
2289 process_ctxt_t *ps_proc = pv_proc;
2290
2291 /* codec ctxt */
2292 codec_t *ps_codec = ps_proc->ps_codec;
2293
2294 /* structure to represent a processing job entry */
2295 job_t s_job;
2296
2297 /* blocking call : entropy dequeue is non-blocking till all
2298 * the proc jobs are processed */
2299 WORD32 is_blocking = 0;
2300
2301 /* set affinity */
2302 ithread_set_affinity(ps_proc->i4_id);
2303
2304 while(1)
2305 {
2306 /* dequeue a job from the entropy queue */
2307 {
2308 int error = ithread_mutex_lock(ps_codec->pv_entropy_mutex);
2309
2310 /* codec context selector */
2311 WORD32 ctxt_sel = ps_codec->i4_encode_api_call_cnt & 1;
2312
2313 volatile UWORD32 *pu4_buf = &ps_codec->au4_entropy_thread_active[ctxt_sel];
2314
2315 /* have the lock */
2316 if (error == 0)
2317 {
2318 if (*pu4_buf == 0)
2319 {
2320 /* no entropy threads are active, try dequeuing a job from the entropy queue */
2321 ret = ih264_list_dequeue(ps_proc->pv_entropy_jobq, &s_job, is_blocking);
2322 if (IH264_SUCCESS == ret)
2323 {
2324 *pu4_buf = 1;
2325 ithread_mutex_unlock(ps_codec->pv_entropy_mutex);
2326 goto WORKER;
2327 }
2328 else if(is_blocking)
2329 {
2330 ithread_mutex_unlock(ps_codec->pv_entropy_mutex);
2331 break;
2332 }
2333 }
2334 ithread_mutex_unlock(ps_codec->pv_entropy_mutex);
2335 }
2336 }
2337
2338 /* dequeue a job from the process queue */
2339 ret = ih264_list_dequeue(ps_proc->pv_proc_jobq, &s_job, 1);
2340 if (IH264_SUCCESS != ret)
2341 {
2342 if(ps_proc->i4_id)
2343 break;
2344 else
2345 {
2346 is_blocking = 1;
2347 continue;
2348 }
2349 }
2350
2351WORKER:
2352 /* choose appropriate proc context based on proc_base_idx */
2353 ps_proc = &ps_codec->as_process[ps_proc->i4_id + s_job.i2_proc_base_idx];
2354
2355 switch (s_job.i4_cmd)
2356 {
2357 case CMD_PROCESS:
2358 ps_proc->i4_mb_cnt = s_job.i2_mb_cnt;
2359 ps_proc->i4_mb_x = s_job.i2_mb_x;
2360 ps_proc->i4_mb_y = s_job.i2_mb_y;
2361
2362 /* init process context */
2363 ih264e_init_proc_ctxt(ps_proc);
2364
2365 /* core code all mbs enlisted under the current job */
2366 error_status |= ih264e_process(ps_proc);
2367 break;
2368
2369 case CMD_ENTROPY:
2370 ps_proc->s_entropy.i4_mb_x = s_job.i2_mb_x;
2371 ps_proc->s_entropy.i4_mb_y = s_job.i2_mb_y;
2372 ps_proc->s_entropy.i4_mb_cnt = s_job.i2_mb_cnt;
2373
2374 /* init entropy */
2375 ih264e_init_entropy_ctxt(ps_proc);
2376
2377 /* entropy code all mbs enlisted under the current job */
2378 error_status |= ih264e_entropy(ps_proc);
2379 break;
2380
2381 default:
2382 error_status |= IH264_FAIL;
2383 break;
2384 }
2385 }
2386
2387 /* send error code */
2388 ps_proc->i4_error_code = error_status;
2389 return ret;
2390}