Added support for Main Profile toolsets in encoder.

Added support for CABAC entropy coding.
Added support for B slices.
Fixed an issue in rate control constant QP mode.

Change-Id: Ib759d35e8e943f941aa9b8bbff0362d92c619994
diff --git a/encoder/ih264e_mc.c b/encoder/ih264e_mc.c
index 2dd0974..2b19dd1 100644
--- a/encoder/ih264e_mc.c
+++ b/encoder/ih264e_mc.c
@@ -19,25 +19,25 @@
 */
 
 /**
-*******************************************************************************
-* @file
-*  ih264e_mc.c
-*
-* @brief
-*  Contains definition of functions for motion compensation
-*
-* @author
-*  ittiam
-*
-* @par List of Functions:
-*  - ih264e_motion_comp_luma()
-*  - ih264e_motion_comp_chroma()
-*
-* @remarks
-*  None
-*
-*******************************************************************************
-*/
+ *******************************************************************************
+ * @file
+ *  ih264e_mc.c
+ *
+ * @brief
+ *  Contains definition of functions for motion compensation
+ *
+ * @author
+ *  ittiam
+ *
+ * @par List of Functions:
+ *  - ih264e_motion_comp_luma()
+ *  - ih264e_motion_comp_chroma()
+ *
+ * @remarks
+ *  None
+ *
+ *******************************************************************************
+ */
 
 /*****************************************************************************/
 /* File Includes                                                             */
@@ -52,6 +52,7 @@
 #include "iv2.h"
 #include "ive2.h"
 #include "ime_distortion_metrics.h"
+#include "ime_defs.h"
 #include "ime_structs.h"
 #include "ih264_structs.h"
 #include "ih264_inter_pred_filters.h"
@@ -60,57 +61,52 @@
 #include "ih264_intra_pred_filters.h"
 #include "ih264_deblk_edge_filters.h"
 #include "ih264_trans_quant_itrans_iquant.h"
-#include "ih264_inter_pred_filters.h"
-#include "ih264_mem_fns.h"
-#include "ih264_padding.h"
-#include "ih264_intra_pred_filters.h"
-#include "ih264_deblk_edge_filters.h"
+#include "ih264_cabac_tables.h"
 #include "ih264e_defs.h"
 #include "ih264e_error.h"
 #include "ih264e_bitstream.h"
 #include "irc_cntrl_param.h"
 #include "irc_frame_info_collector.h"
 #include "ih264e_rate_control.h"
+#include "ih264e_cabac_structs.h"
 #include "ih264e_structs.h"
 #include "ih264e_mc.h"
 #include "ih264e_half_pel.h"
 
-
 /*****************************************************************************/
 /* Function Definitions                                                      */
 /*****************************************************************************/
 
 /**
-******************************************************************************
-*
-* @brief
-*  performs motion compensation for a luma mb for the given mv.
-*
-* @par Description
-*  This routine performs motion compensation of an inter mb. When the inter
-*  mb mode is P16x16, there is no need to copy 16x16 unit from reference buffer
-*  to pred buffer. In this case the function returns pointer and stride of the
-*  ref. buffer and this info is used in place of pred buffer else where.
-*  In other cases, the pred buffer is populated via copy / filtering + copy
-*  (q pel cases) and returned.
-*
-* @param[in] ps_proc
-*  pointer to current proc ctxt
-*
-* @param[out] pu1_pseudo_pred
-*  pseudo prediction buffer
-*
-* @param[out] u4_pseudo_pred_strd
-*  pseudo pred buffer stride
-*
-* @return  none
-*
-* @remarks Assumes half pel buffers for the entire frame are populated.
-*
-******************************************************************************
-*/
-void ih264e_motion_comp_luma(process_ctxt_t *ps_proc,
-                             UWORD8 **pu1_pseudo_pred,
+ ******************************************************************************
+ *
+ * @brief
+ *  performs motion compensation for a luma mb for the given mv.
+ *
+ * @par Description
+ *  This routine performs motion compensation of an inter mb. When the inter
+ *  mb mode is P16x16, there is no need to copy 16x16 unit from reference buffer
+ *  to pred buffer. In this case the function returns pointer and stride of the
+ *  ref. buffer and this info is used in place of pred buffer else where.
+ *  In other cases, the pred buffer is populated via copy / filtering + copy
+ *  (q pel cases) and returned.
+ *
+ * @param[in] ps_proc
+ *  pointer to current proc ctxt
+ *
+ * @param[out] pu1_pseudo_pred
+ *  pseudo prediction buffer
+ *
+ * @param[out] u4_pseudo_pred_strd
+ *  pseudo pred buffer stride
+ *
+ * @return  none
+ *
+ * @remarks Assumes half pel buffers for the entire frame are populated.
+ *
+ ******************************************************************************
+ */
+void ih264e_motion_comp_luma(process_ctxt_t *ps_proc, UWORD8 **pu1_pseudo_pred,
                              WORD32 *pi4_pseudo_pred_strd)
 {
     /* codec context */
@@ -152,51 +148,96 @@
     /* half / qpel coefficient */
     UWORD32 u4_subpel_factor;
 
+    /* BIPRED Flag */
+    WORD32 i4_bipred_flag;
+
     /* temp var */
     UWORD32 u4_lkup_idx1;
 
     /* Init */
     i4_ref_strd[0] = ps_proc->i4_rec_strd;
 
-    i4_ref_strd[1] = i4_ref_strd[2] = i4_ref_strd[3] = ps_me_ctxt->u4_hp_buf_strd;
+    i4_ref_strd[1] = i4_ref_strd[2] = i4_ref_strd[3] =
+                    ps_me_ctxt->u4_subpel_buf_strd;
 
-    for (u4_num_prtn = 0; u4_num_prtn < ps_proc->u4_num_sub_partitions; u4_num_prtn++)
+    for (u4_num_prtn = 0; u4_num_prtn < ps_proc->u4_num_sub_partitions;
+                    u4_num_prtn++)
     {
+        mv_t *ps_curr_mv;
+
         /* update ptr to curr partition */
         ps_curr_pu = ps_proc->ps_pu + u4_num_prtn;
 
+        /* Set no no bipred */
+        i4_bipred_flag = 0;
+
+        switch (ps_curr_pu->b2_pred_mode)
+        {
+            case PRED_L0:
+                ps_curr_mv = &ps_curr_pu->s_me_info[0].s_mv;
+                pu1_ref[0] = ps_proc->apu1_ref_buf_luma[0];
+                break;
+
+            case PRED_L1:
+                ps_curr_mv = &ps_curr_pu->s_me_info[1].s_mv;
+                pu1_ref[0] = ps_proc->apu1_ref_buf_luma[1];
+                break;
+
+            case PRED_BI:
+                /*
+                 * In case of PRED_BI, we only need to ensure that
+                 * the reference buffer that gets selected is
+                 * ps_proc->pu1_best_subpel_buf
+                 */
+
+                /* Dummy */
+                ps_curr_mv = &ps_curr_pu->s_me_info[0].s_mv;
+                pu1_ref[0] = ps_proc->apu1_ref_buf_luma[0];
+
+                i4_bipred_flag = 1;
+                break;
+
+            default:
+                ps_curr_mv = &ps_curr_pu->s_me_info[0].s_mv;
+                pu1_ref[0] = ps_proc->apu1_ref_buf_luma[0];
+                break;
+
+        }
 
         /* get full pel mv's (full pel units) */
-        u4_mv_x_full = ps_curr_pu->s_l0_mv.i2_mvx >> 2;
-        u4_mv_y_full = ps_curr_pu->s_l0_mv.i2_mvy >> 2;
+        u4_mv_x_full = ps_curr_mv->i2_mvx >> 2;
+        u4_mv_y_full = ps_curr_mv->i2_mvy >> 2;
 
         /* get half pel mv's */
-        u4_mv_x_hpel = (ps_curr_pu->s_l0_mv.i2_mvx & 0x2) >> 1;
-        u4_mv_y_hpel = (ps_curr_pu->s_l0_mv.i2_mvy & 0x2) >> 1;
+        u4_mv_x_hpel = (ps_curr_mv->i2_mvx & 0x2) >> 1;
+        u4_mv_y_hpel = (ps_curr_mv->i2_mvy & 0x2) >> 1;
 
         /* get quarter pel mv's */
-        u4_mv_x_qpel = (ps_curr_pu->s_l0_mv.i2_mvx & 0x1);
-        u4_mv_y_qpel = (ps_curr_pu->s_l0_mv.i2_mvy & 0x1);
+        u4_mv_x_qpel = (ps_curr_mv->i2_mvx & 0x1);
+        u4_mv_y_qpel = (ps_curr_mv->i2_mvy & 0x1);
 
         /* width and height of partition */
         wd = (ps_curr_pu->b4_wd + 1) << 2;
         ht = (ps_curr_pu->b4_ht + 1) << 2;
 
         /* decision ? qpel/hpel, fpel */
-        u4_subpel_factor = (u4_mv_y_hpel << 3) + (u4_mv_x_hpel << 2) + (u4_mv_y_qpel << 1) + (u4_mv_x_qpel);
+        u4_subpel_factor = (u4_mv_y_hpel << 3) + (u4_mv_x_hpel << 2)
+                        + (u4_mv_y_qpel << 1) + (u4_mv_x_qpel);
 
-        /* update ref buffer ptrs */
-        pu1_ref[0] = ps_proc->pu1_ref_buf_luma + (u4_mv_y_full * i4_ref_strd[0]) + u4_mv_x_full;
+        /* Move ref to position given by MV */
+        pu1_ref[0] += ((u4_mv_y_full * i4_ref_strd[0]) + u4_mv_x_full);
 
-        pu1_ref[1] =  ps_proc->pu1_best_subpel_buf;
+        /* Sub pel ptrs/ Biperd pointers init */
+        pu1_ref[1] = ps_proc->pu1_best_subpel_buf;
         i4_ref_strd[1] = ps_proc->u4_bst_spel_buf_strd;
 
-
         /* update pred buff ptr */
-        pu1_pred = ps_proc->pu1_pred_mb + 4 * ps_curr_pu->b4_pos_y * i4_pred_strd + 4 * ps_curr_pu->b4_pos_x;
+        pu1_pred = ps_proc->pu1_pred_mb
+                        + 4 * ps_curr_pu->b4_pos_y * i4_pred_strd
+                        + 4 * ps_curr_pu->b4_pos_x;
 
-        /*u4_lkup_idx1 will be non zero for half pel*/
-        u4_lkup_idx1 = (u4_subpel_factor >> 2 ) != 0 ;
+        /* u4_lkup_idx1 will be non zero for half pel and bipred */
+        u4_lkup_idx1 = ((u4_subpel_factor >> 2) != 0) || i4_bipred_flag;
 
         {
             /********************************************************************/
@@ -218,7 +259,11 @@
              */
             else
             {
-                ps_codec->pf_inter_pred_luma_copy(pu1_ref[u4_lkup_idx1], pu1_pred, i4_ref_strd[u4_lkup_idx1], i4_pred_strd, ht, wd, NULL, 0);
+                ps_codec->pf_inter_pred_luma_copy(pu1_ref[u4_lkup_idx1],
+                                                  pu1_pred,
+                                                  i4_ref_strd[u4_lkup_idx1],
+                                                  i4_pred_strd, ht, wd, NULL,
+                                                  0);
             }
 
         }
@@ -226,24 +271,24 @@
 }
 
 /**
-******************************************************************************
-*
-* @brief
-*  performs motion compensation for chroma mb
-*
-* @par   Description
-*  Copies a MB of data from the reference buffer (Full pel, half pel or q pel)
-*  according to the motion vectors given
-*
-* @param[in] ps_proc
-*  pointer to current proc ctxt
-*
-* @return  none
-*
-* @remarks Assumes half pel and quarter pel buffers for the entire frame are
-*  populated.
-******************************************************************************
-*/
+ ******************************************************************************
+ *
+ * @brief
+ *  performs motion compensation for chroma mb
+ *
+ * @par   Description
+ *  Copies a MB of data from the reference buffer (Full pel, half pel or q pel)
+ *  according to the motion vectors given
+ *
+ * @param[in] ps_proc
+ *  pointer to current proc ctxt
+ *
+ * @return  none
+ *
+ * @remarks Assumes half pel and quarter pel buffers for the entire frame are
+ *  populated.
+ ******************************************************************************
+ */
 void ih264e_motion_comp_chroma(process_ctxt_t *ps_proc)
 {
     /* codec context */
@@ -283,38 +328,122 @@
     WORD32 u4_mv_y;
     UWORD8 u1_dx, u1_dy;
 
-    for (u4_num_prtn = 0; u4_num_prtn < ps_proc->u4_num_sub_partitions; u4_num_prtn++)
+    for (u4_num_prtn = 0; u4_num_prtn < ps_proc->u4_num_sub_partitions;
+                    u4_num_prtn++)
     {
-        ps_curr_pu =ps_proc->ps_pu + u4_num_prtn;
+        mv_t *ps_curr_mv;
 
-        u4_mv_x = ps_curr_pu->s_l0_mv.i2_mvx >> 3;
-        u4_mv_y = ps_curr_pu->s_l0_mv.i2_mvy >> 3;
+        ps_curr_pu = ps_proc->ps_pu + u4_num_prtn;
 
-        /*  corresponds to full pel motion vector in luma, but in chroma corresponds to pel formed with dx, dy =4*/
-        u4_mv_x_full = (ps_curr_pu->s_l0_mv.i2_mvx & 0x4) >> 2;
-        u4_mv_y_full = (ps_curr_pu->s_l0_mv.i2_mvy & 0x4) >> 2;
+        if (ps_curr_pu->b2_pred_mode != PRED_BI)
+        {
+            ps_curr_mv = &ps_curr_pu->s_me_info[ps_curr_pu->b2_pred_mode].s_mv;
+            pu1_ref = ps_proc->apu1_ref_buf_chroma[ps_curr_pu->b2_pred_mode];
 
-        /* get half pel mv's */
-        u4_mv_x_hpel = (ps_curr_pu->s_l0_mv.i2_mvx & 0x2) >> 1;
-        u4_mv_y_hpel = (ps_curr_pu->s_l0_mv.i2_mvy & 0x2) >> 1;
+            u4_mv_x = ps_curr_mv->i2_mvx >> 3;
+            u4_mv_y = ps_curr_mv->i2_mvy >> 3;
 
-        /* get quarter pel mv's */
-        u4_mv_x_qpel = (ps_curr_pu->s_l0_mv.i2_mvx & 0x1);
-        u4_mv_y_qpel = (ps_curr_pu->s_l0_mv.i2_mvy & 0x1);
+            /*  corresponds to full pel motion vector in luma, but in chroma corresponds to pel formed wiith dx, dy =4 */
+            u4_mv_x_full = (ps_curr_mv->i2_mvx & 0x4) >> 2;
+            u4_mv_y_full = (ps_curr_mv->i2_mvy & 0x4) >> 2;
 
-        /* width and height of sub macro block */
-        wd = (ps_curr_pu->b4_wd + 1) << 1;
-        ht = (ps_curr_pu->b4_ht + 1) << 1;
+            /* get half pel mv's */
+            u4_mv_x_hpel = (ps_curr_mv->i2_mvx & 0x2) >> 1;
+            u4_mv_y_hpel = (ps_curr_mv->i2_mvy & 0x2) >> 1;
 
-        /* move the pointers so that they point to the motion compensated locations */
-        pu1_ref = ps_proc->pu1_ref_buf_chroma + (u4_mv_y * i4_ref_strd) + (u4_mv_x << 1);
+            /* get quarter pel mv's */
+            u4_mv_x_qpel = (ps_curr_mv->i2_mvx & 0x1);
+            u4_mv_y_qpel = (ps_curr_mv->i2_mvy & 0x1);
 
-        pu1_pred = ps_proc->pu1_pred_mb + 4 * ps_curr_pu->b4_pos_y * i4_pred_strd + 2 * ps_curr_pu->b4_pos_x;
+            /* width and height of sub macro block */
+            wd = (ps_curr_pu->b4_wd + 1) << 1;
+            ht = (ps_curr_pu->b4_ht + 1) << 1;
 
-        u1_dx = (u4_mv_x_full << 2) + (u4_mv_x_hpel << 1) + (u4_mv_x_qpel);
-        u1_dy = (u4_mv_y_full << 2) + (u4_mv_y_hpel << 1) + (u4_mv_y_qpel);
+            /* move the pointers so that they point to the motion compensated locations */
+            pu1_ref += ((u4_mv_y * i4_ref_strd) + (u4_mv_x << 1));
 
-        ps_codec->pf_inter_pred_chroma(pu1_ref, pu1_pred, i4_ref_strd, i4_pred_strd,
-                                   u1_dx, u1_dy, ht, wd);
+            pu1_pred = ps_proc->pu1_pred_mb
+                            + 4 * ps_curr_pu->b4_pos_y * i4_pred_strd
+                            + 2 * ps_curr_pu->b4_pos_x;
+
+            u1_dx = (u4_mv_x_full << 2) + (u4_mv_x_hpel << 1) + (u4_mv_x_qpel);
+            u1_dy = (u4_mv_y_full << 2) + (u4_mv_y_hpel << 1) + (u4_mv_y_qpel);
+
+            /* cases where u1_dx = 0 or u1_dy = 0 are dealt separately in neon with
+             * separate functions for better performance
+             *
+             * ih264_inter_pred_chroma_dx_zero_a9q
+             * and
+             * ih264_inter_pred_chroma_dy_zero_a9q
+             */
+
+            ps_codec->pf_inter_pred_chroma(pu1_ref, pu1_pred, i4_ref_strd,
+                                           i4_pred_strd, u1_dx, u1_dy, ht, wd);
+        }
+        else /* If the pred mode is PRED_BI */
+        {
+            /*
+             * We need to interpolate the L0 and L1 ref pics with the chorma MV
+             * then use them to average for bilinrar interpred
+             */
+            WORD32 i4_predmode;
+            UWORD8 *pu1_ref_buf[2];
+
+            /* Temporary buffers to store the interpolated value from L0 and L1 */
+            pu1_ref_buf[PRED_L0] = ps_proc->apu1_subpel_buffs[0];
+            pu1_ref_buf[PRED_L1] = ps_proc->apu1_subpel_buffs[1];
+
+
+            for (i4_predmode = 0; i4_predmode < PRED_BI; i4_predmode++)
+            {
+                ps_curr_mv = &ps_curr_pu->s_me_info[i4_predmode].s_mv;
+                pu1_ref = ps_proc->apu1_ref_buf_chroma[i4_predmode];
+
+                u4_mv_x = ps_curr_mv->i2_mvx >> 3;
+                u4_mv_y = ps_curr_mv->i2_mvy >> 3;
+
+                /*
+                 * corresponds to full pel motion vector in luma, but in chroma
+                 * corresponds to pel formed wiith dx, dy =4
+                 */
+                u4_mv_x_full = (ps_curr_mv->i2_mvx & 0x4) >> 2;
+                u4_mv_y_full = (ps_curr_mv->i2_mvy & 0x4) >> 2;
+
+                /* get half pel mv's */
+                u4_mv_x_hpel = (ps_curr_mv->i2_mvx & 0x2) >> 1;
+                u4_mv_y_hpel = (ps_curr_mv->i2_mvy & 0x2) >> 1;
+
+                /* get quarter pel mv's */
+                u4_mv_x_qpel = (ps_curr_mv->i2_mvx & 0x1);
+                u4_mv_y_qpel = (ps_curr_mv->i2_mvy & 0x1);
+
+                /* width and height of sub macro block */
+                wd = (ps_curr_pu->b4_wd + 1) << 1;
+                ht = (ps_curr_pu->b4_ht + 1) << 1;
+
+                /* move the pointers so that they point to the motion compensated locations */
+                pu1_ref += ((u4_mv_y * i4_ref_strd) + (u4_mv_x << 1));
+
+                pu1_pred = ps_proc->pu1_pred_mb
+                                + 4 * ps_curr_pu->b4_pos_y * i4_pred_strd
+                                + 2 * ps_curr_pu->b4_pos_x;
+
+                u1_dx = (u4_mv_x_full << 2) + (u4_mv_x_hpel << 1)
+                                + (u4_mv_x_qpel);
+                u1_dy = (u4_mv_y_full << 2) + (u4_mv_y_hpel << 1)
+                                + (u4_mv_y_qpel);
+
+                ps_codec->pf_inter_pred_chroma(pu1_ref,
+                                               pu1_ref_buf[i4_predmode],
+                                               i4_ref_strd, MB_SIZE, u1_dx,
+                                               u1_dy, ht, wd);
+            }
+
+            ps_codec->pf_inter_pred_luma_bilinear(pu1_ref_buf[PRED_L0],
+                                                  pu1_ref_buf[PRED_L1], pu1_pred,
+                                                  MB_SIZE, MB_SIZE,
+                                                  i4_pred_strd, MB_SIZE >> 1,
+                                                  MB_SIZE);
+        }
     }
 }