Initial version

Change-Id: I7efe9a589cd24edf86e8d086b40c27cbbf8b4017
diff --git a/common/ih264_weighted_pred.c b/common/ih264_weighted_pred.c
new file mode 100755
index 0000000..d5d73f2
--- /dev/null
+++ b/common/ih264_weighted_pred.c
@@ -0,0 +1,495 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+/*****************************************************************************/
+/*                                                                           */
+/*  File Name         : ih264_weighted_pred.c                                */
+/*                                                                           */
+/*  Description       : Contains function definitions for weighted           */
+/*                      prediction functions                                 */
+/*                                                                           */
+/*  List of Functions : ih264_default_weighted_pred_luma()                   */
+/*                      ih264_default_weighted_pred_chroma()                 */
+/*                      ih264_weighted_pred_luma()                           */
+/*                      ih264_weighted_pred_chroma()                         */
+/*                      ih264_weighted_bipred_luma()                         */
+/*                      ih264_weighted_bipred_chroma()                       */
+/*                                                                           */
+/*  Issues / Problems : None                                                 */
+/*                                                                           */
+/*  Revision History  :                                                      */
+/*                                                                           */
+/*         DD MM YYYY   Author(s)       Changes                              */
+/*         07 01 2015   Kaushik         Initial version                      */
+/*                      Senthoor                                             */
+/*                                                                           */
+/*****************************************************************************/
+/*****************************************************************************/
+/* File Includes                                                             */
+/*****************************************************************************/
+
+/* User include files */
+#include "ih264_typedefs.h"
+#include "ih264_macros.h"
+#include "ih264_platform_macros.h"
+#include "ih264_weighted_pred.h"
+
+/*****************************************************************************/
+/*  Function definitions .                                                   */
+/*****************************************************************************/
+/*****************************************************************************/
+/*                                                                           */
+/*  Function Name : ih264_default_weighted_pred_luma                         */
+/*                                                                           */
+/*  Description   : This function performs the default weighted prediction   */
+/*                  as described in sec 8.4.2.3.1 titled "Default weighted   */
+/*                  sample prediction process" for luma. The function gets   */
+/*                  two ht x wd blocks, calculates their rounded-average and */
+/*                  stores it in the destination block. (ht,wd) can be       */
+/*                  (4,4), (8,4), (4,8), (8,8), (16,8), (8,16) or (16,16).   */
+/*                                                                           */
+/*  Inputs        : puc_src1  - Pointer to source 1                          */
+/*                  puc_src2  - Pointer to source 2                          */
+/*                  puc_dst   - Pointer to destination                       */
+/*                  src_strd1 - stride for source 1                          */
+/*                  src_strd1 - stride for source 2                          */
+/*                  dst_strd  - stride for destination                       */
+/*                  ht        - height of the block                          */
+/*                  wd        - width of the block                           */
+/*                                                                           */
+/*  Issues        : None                                                     */
+/*                                                                           */
+/*  Revision History:                                                        */
+/*                                                                           */
+/*         DD MM YYYY   Author(s)       Changes                              */
+/*         07 01 2015   Kaushik         Initial Version                      */
+/*                      Senthoor                                             */
+/*                                                                           */
+/*****************************************************************************/
+void ih264_default_weighted_pred_luma(UWORD8 *pu1_src1,
+                                      UWORD8 *pu1_src2,
+                                      UWORD8 *pu1_dst,
+                                      WORD32 src_strd1,
+                                      WORD32 src_strd2,
+                                      WORD32 dst_strd,
+                                      WORD32 ht,
+                                      WORD32 wd)
+{
+    WORD32 i, j;
+
+    src_strd1 -= wd;
+    src_strd2 -= wd;
+    dst_strd -= wd;
+
+    for(i = 0; i < ht; i++)
+    {
+        for(j = 0; j < wd; j++, pu1_src1++, pu1_src2++, pu1_dst++)
+            *pu1_dst = (*pu1_src1 + *pu1_src2 + 1) >> 1;
+
+        pu1_src1 += src_strd1;
+        pu1_src2 += src_strd2;
+        pu1_dst += dst_strd;
+    }
+}
+
+/*****************************************************************************/
+/*                                                                           */
+/*  Function Name : ih264_default_weighted_pred_chroma                       */
+/*                                                                           */
+/*  Description   : This function performs the default weighted prediction   */
+/*                  as described in sec 8.4.2.3.1 titled "Default weighted   */
+/*                  sample prediction process" for chroma. The function gets */
+/*                  two ht x wd blocks, calculates their rounded-average and */
+/*                  stores it in the destination block. (ht,wd) can be       */
+/*                  (2,2), (4,2) , (2,4), (4,4), (8,4), (4,8) or (8,8).      */
+/*                                                                           */
+/*  Inputs        : puc_src1  - Pointer to source 1                          */
+/*                  puc_src2  - Pointer to source 2                          */
+/*                  puc_dst   - Pointer to destination                       */
+/*                  src_strd1 - stride for source 1                          */
+/*                  src_strd1 - stride for source 2                          */
+/*                  dst_strd  - stride for destination                       */
+/*                  ht        - height of the block                          */
+/*                  wd        - width of the block                           */
+/*                                                                           */
+/*  Issues        : None                                                     */
+/*                                                                           */
+/*  Revision History:                                                        */
+/*                                                                           */
+/*         DD MM YYYY   Author(s)       Changes                              */
+/*         07 01 2015   Kaushik         Initial Version                      */
+/*                      Senthoor                                             */
+/*                                                                           */
+/*****************************************************************************/
+void ih264_default_weighted_pred_chroma(UWORD8 *pu1_src1,
+                                        UWORD8 *pu1_src2,
+                                        UWORD8 *pu1_dst,
+                                        WORD32 src_strd1,
+                                        WORD32 src_strd2,
+                                        WORD32 dst_strd,
+                                        WORD32 ht,
+                                        WORD32 wd)
+{
+    WORD32 i, j;
+
+    wd = wd << 1;
+
+    src_strd1 -= wd;
+    src_strd2 -= wd;
+    dst_strd -= wd;
+
+    for(i = 0; i < ht; i++)
+    {
+        for(j = 0; j < wd; j++, pu1_src1++, pu1_src2++, pu1_dst++)
+            *pu1_dst = (*pu1_src1 + *pu1_src2 + 1) >> 1;
+
+        pu1_src1 += src_strd1;
+        pu1_src2 += src_strd2;
+        pu1_dst += dst_strd;
+    }
+}
+
+/*****************************************************************************/
+/*                                                                           */
+/*  Function Name : ih264_weighted_pred_luma                                 */
+/*                                                                           */
+/*  Description   : This function performs the weighted prediction as        */
+/*                  described in sec 8.4.2.3.2 titled "Weighted sample       */
+/*                  prediction process" for luma. The function gets one      */
+/*                  ht x wd block, weights it, rounds it off, offsets it,    */
+/*                  saturates it to unsigned 8-bit and stores it in the      */
+/*                  destination block. (ht,wd) can be (4,4), (8,4), (4,8),   */
+/*                  (8,8), (16,8), (8,16) or (16,16).                        */
+/*                                                                           */
+/*  Inputs        : puc_src  - Pointer to source                             */
+/*                  puc_dst  - Pointer to destination                        */
+/*                  src_strd - stride for source                             */
+/*                  dst_strd - stride for destination                        */
+/*                  log_wd   - number of bits to be rounded off              */
+/*                  wt       - weight value                                  */
+/*                  ofst     - offset value                                  */
+/*                  ht       - height of the block                           */
+/*                  wd       - width of the block                            */
+/*                                                                           */
+/*  Issues        : None                                                     */
+/*                                                                           */
+/*  Revision History:                                                        */
+/*                                                                           */
+/*         DD MM YYYY   Author(s)       Changes                              */
+/*         07 01 2015   Kaushik         Initial Version                      */
+/*                      Senthoor                                             */
+/*                                                                           */
+/*****************************************************************************/
+void ih264_weighted_pred_luma(UWORD8 *pu1_src,
+                              UWORD8 *pu1_dst,
+                              WORD32 src_strd,
+                              WORD32 dst_strd,
+                              WORD32 log_wd,
+                              WORD32 wt,
+                              WORD32 ofst,
+                              WORD32 ht,
+                              WORD32 wd)
+{
+    WORD32 i, j;
+
+    wt = (WORD16)(wt & 0xffff);
+    ofst = (WORD8)(ofst & 0xff);
+
+    src_strd -= wd;
+    dst_strd -= wd;
+
+    if(log_wd >= 1)
+    {
+        WORD32 i_ofst = (1 << (log_wd - 1)) + (ofst << log_wd);
+        for(i = 0; i < ht; i++)
+        {
+            for(j = 0; j < wd; j++, pu1_src++, pu1_dst++)
+                *pu1_dst = CLIP_U8((wt * (*pu1_src) + i_ofst) >> log_wd);
+
+            pu1_src += src_strd;
+            pu1_dst += dst_strd;
+        }
+    }
+    else
+    {
+        for(i = 0; i < ht; i++)
+        {
+            for(j = 0; j < wd; j++, pu1_src++, pu1_dst++)
+                *pu1_dst = CLIP_U8(wt * (*pu1_src) + ofst);
+
+            pu1_src += src_strd;
+            pu1_dst += dst_strd;
+        }
+    }
+}
+
+/*****************************************************************************/
+/*                                                                           */
+/*  Function Name : ih264_weighted_pred_chroma                               */
+/*                                                                           */
+/*  Description   : This function performs the weighted prediction as        */
+/*                  described in sec 8.4.2.3.2 titled "Weighted sample       */
+/*                  prediction process" for chroma. The function gets one    */
+/*                  ht x wd block, weights it, rounds it off, offsets it,    */
+/*                  saturates it to unsigned 8-bit and stores it in the      */
+/*                  destination block. (ht,wd) can be (2,2), (4,2), (2,4),   */
+/*                  (4,4), (8,4), (4,8) or (8,8).                            */
+/*                                                                           */
+/*  Inputs        : puc_src  - Pointer to source                             */
+/*                  puc_dst  - Pointer to destination                        */
+/*                  src_strd - stride for source                             */
+/*                  dst_strd - stride for destination                        */
+/*                  log_wd   - number of bits to be rounded off              */
+/*                  wt       - weight values for u and v                     */
+/*                  ofst     - offset values for u and v                     */
+/*                  ht       - height of the block                           */
+/*                  wd       - width of the block                            */
+/*                                                                           */
+/*  Issues        : None                                                     */
+/*                                                                           */
+/*  Revision History:                                                        */
+/*                                                                           */
+/*         DD MM YYYY   Author(s)       Changes                              */
+/*         07 01 2015   Kaushik         Initial Version                      */
+/*                      Senthoor                                             */
+/*                                                                           */
+/*****************************************************************************/
+void ih264_weighted_pred_chroma(UWORD8 *pu1_src,
+                                UWORD8 *pu1_dst,
+                                WORD32 src_strd,
+                                WORD32 dst_strd,
+                                WORD32 log_wd,
+                                WORD32 wt,
+                                WORD32 ofst,
+                                WORD32 ht,
+                                WORD32 wd)
+{
+    WORD32 i, j;
+    WORD32 wt_u, wt_v;
+    WORD32 ofst_u, ofst_v;
+
+    wt_u = (WORD16)(wt & 0xffff);
+    wt_v = (WORD16)(wt >> 16);
+
+    ofst_u = (WORD8)(ofst & 0xff);
+    ofst_v = (WORD8)(ofst >> 8);
+
+    src_strd -= wd << 1;
+    dst_strd -= wd << 1;
+
+    if(log_wd >= 1)
+    {
+        ofst_u = (1 << (log_wd - 1)) + (ofst_u << log_wd);
+        ofst_v = (1 << (log_wd - 1)) + (ofst_v << log_wd);
+
+        for(i = 0; i < ht; i++)
+        {
+            for(j = 0; j < wd; j++, pu1_src++, pu1_dst++)
+            {
+                *pu1_dst = CLIP_U8((wt_u * (*pu1_src) + ofst_u) >> log_wd);
+                pu1_src++;
+                pu1_dst++;
+                *pu1_dst = CLIP_U8((wt_v * (*pu1_src) + ofst_v) >> log_wd);
+            }
+            pu1_src += src_strd;
+            pu1_dst += dst_strd;
+        }
+    }
+    else
+    {
+        for(i = 0; i < ht; i++)
+        {
+            for(j = 0; j < wd; j++, pu1_src++, pu1_dst++)
+            {
+                *pu1_dst = CLIP_U8(wt_u * (*pu1_src) + ofst_u);
+                pu1_src++;
+                pu1_dst++;
+                *pu1_dst = CLIP_U8(wt_v * (*pu1_src) + ofst_v);
+            }
+            pu1_src += src_strd;
+            pu1_dst += dst_strd;
+        }
+    }
+}
+
+/*****************************************************************************/
+/*                                                                           */
+/*  Function Name : ih264_weighted_bi_pred_luma                              */
+/*                                                                           */
+/*  Description   : This function performs the weighted biprediction as      */
+/*                  described in sec 8.4.2.3.2 titled "Weighted sample       */
+/*                  prediction process" for luma. The function gets two      */
+/*                  ht x wd blocks, weights them, adds them, rounds off the  */
+/*                  sum, offsets it, saturates it to unsigned 8-bit and      */
+/*                  stores it in the destination block. (ht,wd) can be       */
+/*                  (4,4), (8,4), (4,8), (8,8), (16,8), (8,16) or (16,16).   */
+/*                                                                           */
+/*  Inputs        : puc_src1  - Pointer to source 1                          */
+/*                  puc_src2  - Pointer to source 2                          */
+/*                  puc_dst   - Pointer to destination                       */
+/*                  src_strd1 - stride for source 1                          */
+/*                  src_strd2 - stride for source 2                          */
+/*                  dst_strd2 - stride for destination                       */
+/*                  log_wd    - number of bits to be rounded off             */
+/*                  wt1       - weight value for source 1                    */
+/*                  wt2       - weight value for source 2                    */
+/*                  ofst1     - offset value for source 1                    */
+/*                  ofst2     - offset value for source 2                    */
+/*                  ht        - height of the block                          */
+/*                  wd        - width of the block                           */
+/*                                                                           */
+/*  Issues        : None                                                     */
+/*                                                                           */
+/*  Revision History:                                                        */
+/*                                                                           */
+/*         DD MM YYYY   Author(s)       Changes                              */
+/*         07 01 2015   Kaushik         Initial Version                      */
+/*                      Senthoor                                             */
+/*                                                                           */
+/*****************************************************************************/
+void ih264_weighted_bi_pred_luma(UWORD8 *pu1_src1,
+                                 UWORD8 *pu1_src2,
+                                 UWORD8 *pu1_dst,
+                                 WORD32 src_strd1,
+                                 WORD32 src_strd2,
+                                 WORD32 dst_strd,
+                                 WORD32 log_wd,
+                                 WORD32 wt1,
+                                 WORD32 wt2,
+                                 WORD32 ofst1,
+                                 WORD32 ofst2,
+                                 WORD32 ht,
+                                 WORD32 wd)
+{
+    WORD32 i, j;
+    WORD32 shft, ofst;
+
+    ofst1 = (WORD8)(ofst1 & 0xff);
+    ofst2 = (WORD8)(ofst2 & 0xff);
+    wt1 = (WORD16)(wt1 & 0xffff);
+    wt2 = (WORD16)(wt2 & 0xffff);
+    ofst = (ofst1 + ofst2 + 1) >> 1;
+
+    shft = log_wd + 1;
+    ofst = (1 << log_wd) + (ofst << shft);
+
+    src_strd1 -= wd;
+    src_strd2 -= wd;
+    dst_strd -= wd;
+
+    for(i = 0; i < ht; i++)
+    {
+        for(j = 0; j < wd; j++, pu1_src1++, pu1_src2++, pu1_dst++)
+            *pu1_dst = CLIP_U8((wt1 * (*pu1_src1) + wt2 * (*pu1_src2) + ofst) >> shft);
+
+        pu1_src1 += src_strd1;
+        pu1_src2 += src_strd2;
+        pu1_dst += dst_strd;
+    }
+}
+
+/*****************************************************************************/
+/*                                                                           */
+/*  Function Name : ih264_weighted_bi_pred_chroma                            */
+/*                                                                           */
+/*  Description   : This function performs the weighted biprediction as      */
+/*                  described in sec 8.4.2.3.2 titled "Weighted sample       */
+/*                  prediction process" for chroma. The function gets two    */
+/*                  ht x wd blocks, weights them, adds them, rounds off the  */
+/*                  sum, offsets it, saturates it to unsigned 8-bit and      */
+/*                  stores it in the destination block. (ht,wd) can be       */
+/*                  (2,2), (4,2), (2,4), (4,4), (8,4), (4,8) or (8,8).       */
+/*                                                                           */
+/*  Inputs        : puc_src1  - Pointer to source 1                          */
+/*                  puc_src2  - Pointer to source 2                          */
+/*                  puc_dst   - Pointer to destination                       */
+/*                  src_strd1 - stride for source 1                          */
+/*                  src_strd2 - stride for source 2                          */
+/*                  dst_strd2 - stride for destination                       */
+/*                  log_wd    - number of bits to be rounded off             */
+/*                  wt1       - weight values for u and v in source 1        */
+/*                  wt2       - weight values for u and v in source 2        */
+/*                  ofst1     - offset value for u and v in source 1         */
+/*                  ofst2     - offset value for u and v in source 2         */
+/*                  ht        - height of the block                          */
+/*                  wd        - width of the block                           */
+/*                                                                           */
+/*  Issues        : None                                                     */
+/*                                                                           */
+/*  Revision History:                                                        */
+/*                                                                           */
+/*         DD MM YYYY   Author(s)       Changes                              */
+/*         07 01 2015   Kaushik         Initial Version                      */
+/*                      Senthoor                                             */
+/*                                                                           */
+/*****************************************************************************/
+void ih264_weighted_bi_pred_chroma(UWORD8 *pu1_src1,
+                                   UWORD8 *pu1_src2,
+                                   UWORD8 *pu1_dst,
+                                   WORD32 src_strd1,
+                                   WORD32 src_strd2,
+                                   WORD32 dst_strd,
+                                   WORD32 log_wd,
+                                   WORD32 wt1,
+                                   WORD32 wt2,
+                                   WORD32 ofst1,
+                                   WORD32 ofst2,
+                                   WORD32 ht,
+                                   WORD32 wd)
+{
+    WORD32 i, j;
+    WORD32 wt1_u, wt1_v, wt2_u, wt2_v;
+    WORD32 ofst1_u, ofst1_v, ofst2_u, ofst2_v;
+    WORD32 ofst_u, ofst_v;
+    WORD32 shft;
+
+    ofst1_u = (WORD8)(ofst1 & 0xff);
+    ofst1_v = (WORD8)(ofst1 >> 8);
+    ofst2_u = (WORD8)(ofst2 & 0xff);
+    ofst2_v = (WORD8)(ofst2 >> 8);
+    wt1_u = (WORD16)(wt1 & 0xffff);
+    wt1_v = (WORD16)(wt1 >> 16);
+    wt2_u = (WORD16)(wt2 & 0xffff);
+    wt2_v = (WORD16)(wt2 >> 16);
+    ofst_u = (ofst1_u + ofst2_u + 1) >> 1;
+    ofst_v = (ofst1_v + ofst2_v + 1) >> 1;
+
+    src_strd1 -= wd << 1;
+    src_strd2 -= wd << 1;
+    dst_strd -= wd << 1;
+
+    shft = log_wd + 1;
+    ofst_u = (1 << log_wd) + (ofst_u << shft);
+    ofst_v = (1 << log_wd) + (ofst_v << shft);
+
+    for(i = 0; i < ht; i++)
+    {
+        for(j = 0; j < wd; j++, pu1_src1++, pu1_src2++, pu1_dst++)
+        {
+            *pu1_dst = CLIP_U8((wt1_u * (*pu1_src1) + wt2_u * (*pu1_src2) + ofst_u) >> shft);
+            pu1_src1++;
+            pu1_src2++;
+            pu1_dst++;
+            *pu1_dst = CLIP_U8((wt1_v * (*pu1_src1) + wt2_v * (*pu1_src2) + ofst_v) >> shft);
+        }
+        pu1_src1 += src_strd1;
+        pu1_src2 += src_strd2;
+        pu1_dst += dst_strd;
+    }
+}