Initial version Change-Id: I5a9473876e596e7461e91f971b0243f694f7e8fb

commit: aed24eee7ddfc93f1436b0c1679431bd286879b4 [log] [tgz]
author: Venkatarama Avadhani <venkatarama.avadhani@ittiam.com> Wed Mar 11 10:08:57 2015 +0530
committer: Harish Mahendrakar <harish.mahendrakar@ittiam.com> Tue Apr 07 18:19:15 2015 +0530
tree: 9399f32cdfa15ac9720ded9c8a8093876ba03376
parent: 839aea316dc98d258d75f7e2878b21db032a82c1 [diff]
diff --git a/Android.mk b/Android.mk
new file mode 100644
index 0000000..4668c52
--- /dev/null
+++ b/Android.mk

@@ -0,0 +1,6 @@
+LOCAL_PATH := $(call my-dir)
+include $(CLEAR_VARS)
+
+# decoder
+include $(LOCAL_PATH)/decoder.mk
+

diff --git a/common/arm/impeg2_format_conv.s b/common/arm/impeg2_format_conv.s
new file mode 100644
index 0000000..c07edda
--- /dev/null
+++ b/common/arm/impeg2_format_conv.s

@@ -0,0 +1,391 @@
+@/******************************************************************************
+@ *
+@ * Copyright (C) 2015 The Android Open Source Project
+@ *
+@ * Licensed under the Apache License, Version 2.0 (the "License");
+@ * you may not use this file except in compliance with the License.
+@ * You may obtain a copy of the License at:
+@ *
+@ * http://www.apache.org/licenses/LICENSE-2.0
+@ *
+@ * Unless required by applicable law or agreed to in writing, software
+@ * distributed under the License is distributed on an "AS IS" BASIS,
+@ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+@ * See the License for the specific language governing permissions and
+@ * limitations under the License.
+@ *
+@ *****************************************************************************
+@ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+@*/
+
+@/*
+@//----------------------------------------------------------------------------
+@// File Name            : impeg2_format_conv.s
+@//
+@// Description          : This file has the Idct Implementations for the
+@//                        MPEG4 SP decoder on neon platform.
+@//
+@// Reference Document   :
+@//
+@// Revision History     :
+@//      Date            Author                  Detail Description
+@//   ------------    ----------------    ----------------------------------
+@//   Jul 07, 2008     Naveen Kumar T                Created
+@//
+@//-------------------------------------------------------------------------
+@*/
+
+@/*
+@// ----------------------------------------------------------------------------
+@// Include Files
+@// ----------------------------------------------------------------------------
+@*/
+.text
+.p2align 2
+.equ log2_16 ,  4
+.equ log2_2  ,  1
+@/*
+@// ----------------------------------------------------------------------------
+@// Struct/Union Types and Define
+@// ----------------------------------------------------------------------------
+@*/
+
+@/*
+@// ----------------------------------------------------------------------------
+@// Static Global Data section variables
+@// ----------------------------------------------------------------------------
+@*/
+@//--------------------------- NONE --------------------------------------------
+
+@/*
+@// ----------------------------------------------------------------------------
+@// Static Prototype Functions
+@// ----------------------------------------------------------------------------
+@*/
+@// -------------------------- NONE --------------------------------------------
+
+@/*
+@// ----------------------------------------------------------------------------
+@// Exported functions
+@// ----------------------------------------------------------------------------
+@*/
+
+@/*****************************************************************************
+@*                                                                            *
+@*  Function Name    : impeg2_fmt_conv_yuv420p_to_yuv420sp_uv_a9q()                      *
+@*                                                                            *
+@*  Description      : This function conversts the image from YUV420P color   *
+@*                     space to 420SP color space(UV interleaved).        *
+@*                                                                            *
+@*  Arguments        : R0           pu1_y                                     *
+@*                     R1           pu1_u                                     *
+@*                     R2           pu1_v                                     *
+@*                     R3           pu1_dest_y                                *
+@*                     [R13 #40]    pu1_dest_uv                               *
+@*                     [R13 #44]    u2_height                                 *
+@*                     [R13 #48]    u2_width                                  *
+@*                     [R13 #52]    u2_stridey                                *
+@*                     [R13 #56]    u2_strideu                                *
+@*                     [R13 #60]    u2_stridev                                *
+@*                     [R13 #64]    u2_dest_stride_y                          *
+@*                     [R13 #68]    u2_dest_stride_uv                         *
+@*                     [R13 #72]    convert_uv_only                           *
+@*                                                                            *
+@*  Values Returned  : None                                                   *
+@*                                                                            *
+@*  Register Usage   : R0 - R8, Q0                                            *
+@*                                                                            *
+@*  Stack Usage      : 24 Bytes                                               *
+@*                                                                            *
+@*  Interruptibility : Interruptible                                          *
+@*                                                                            *
+@*  Known Limitations                                                         *
+@*       Assumptions: Image Width:     Assumed to be multiple of 16 and       *
+@*                     greater than or equal to 16                *
+@*                     Image Height:    Assumed to be even.                   *
+@*                                                                            *
+@*  Revision History :                                                        *
+@*         DD MM YYYY   Author(s)       Changes (Describe the changes made)   *
+@*         07 06 2010   Varshita        Draft                                 *
+@*         07 06 2010   Naveen Kr T     Completed                             *
+@*                                                                            *
+@*****************************************************************************/
+                .global impeg2_fmt_conv_yuv420p_to_yuv420sp_uv_a9q
+impeg2_fmt_conv_yuv420p_to_yuv420sp_uv_a9q:
+
+    @// push the registers on the stack
+    stmfd           sp!, {r4-r8, lr}
+
+    ldr             r4, [sp, #56]       @// Load convert_uv_only
+
+    cmp             r4, #1
+    beq             yuv420sp_uv_chroma
+    @/* Do the preprocessing before the main loops start */
+    @// Load the parameters from stack
+    ldr             r4, [sp, #28]       @// Load u2_height from stack
+
+    ldr             r5, [sp, #32]       @// Load u2_width from stack
+
+    ldr             r7, [sp, #36]       @// Load u2_stridey from stack
+
+    ldr             r8, [sp, #48]       @// Load u2_dest_stride_y from stack
+
+    sub             r7, r7, r5          @// Source increment
+
+    sub             r8, r8, r5          @// Destination increment
+
+
+yuv420sp_uv_row_loop_y:
+    mov             r6, r5
+
+yuv420sp_uv_col_loop_y:
+    pld             [r0, #128]
+    vld1.8          {q0}, [r0]!
+    vst1.8          {q0}, [r3]!
+    sub             r6, r6, #16
+    cmp             r6, #15
+    bgt             yuv420sp_uv_col_loop_y
+
+    cmp             r6, #0
+    beq             yuv420sp_uv_row_loop_end_y
+    @//If non-multiple of 16, then go back by few bytes to ensure 16 bytes can be read
+    @//Ex if width is 162, above loop will process 160 pixels. And
+    @//Both source and destination will point to 146th pixel and then 16 bytes will be read
+    @// and written using VLD1 and VST1
+    rsb             r6, r6, #16
+    sub             r0, r0, r6
+    sub             r3, r3, r6
+
+    vld1.8          {q0}, [r0]!
+    vst1.8          {q0}, [r3]!
+
+yuv420sp_uv_row_loop_end_y:
+    add             r0, r0, r7
+    add             r3, r3, r8
+    subs            r4, r4, #1
+    bgt             yuv420sp_uv_row_loop_y
+
+yuv420sp_uv_chroma:
+
+    ldr             r3, [sp, #24]       @// Load pu1_dest_uv from stack
+
+    ldr             r4, [sp, #28]       @// Load u2_height from stack
+
+    ldr             r5, [sp, #32]       @// Load u2_width from stack
+
+
+    ldr             r7, [sp, #40]       @// Load u2_strideu from stack
+
+    ldr             r8, [sp, #52]       @// Load u2_dest_stride_uv from stack
+
+    sub             r7, r7, r5, lsr #1  @// Source increment
+
+    sub             r8, r8, r5          @// Destination increment
+
+    mov             r5, r5, lsr #1
+    mov             r4, r4, lsr #1
+    ldr             r3, [sp, #24]       @// Load pu1_dest_uv from stack
+yuv420sp_uv_row_loop_uv:
+    mov             r6, r5
+
+
+yuv420sp_uv_col_loop_uv:
+    pld             [r1, #128]
+    pld             [r2, #128]
+    vld1.8          d0, [r1]!
+    vld1.8          d1, [r2]!
+    vst2.8          {d0, d1}, [r3]!
+    sub             r6, r6, #8
+    cmp             r6, #7
+    bgt             yuv420sp_uv_col_loop_uv
+
+    cmp             r6, #0
+    beq             yuv420sp_uv_row_loop_end_uv
+    @//If non-multiple of 16, then go back by few bytes to ensure 16 bytes can be read
+    @//Ex if width is 162, above loop will process 160 pixels. And
+    @//Both source and destination will point to 146th pixel and then 16 bytes will be read
+    @// and written using VLD1 and VST1
+    rsb             r6, r6, #8
+    sub             r1, r1, r6
+    sub             r2, r2, r6
+    sub             r3, r3, r6, lsl #1
+
+    vld1.8          d0, [r1]!
+    vld1.8          d1, [r2]!
+    vst2.8          {d0, d1}, [r3]!
+
+yuv420sp_uv_row_loop_end_uv:
+    add             r1, r1, r7
+    add             r2, r2, r7
+    add             r3, r3, r8
+    subs            r4, r4, #1
+    bgt             yuv420sp_uv_row_loop_uv
+    @//POP THE REGISTERS
+    ldmfd           sp!, {r4-r8, pc}
+
+
+
+
+
+@/*****************************************************************************
+@*                                                                            *
+@*  Function Name    : impeg2_fmt_conv_yuv420p_to_yuv420sp_vu_a9q()                      *
+@*                                                                            *
+@*  Description      : This function conversts the image from YUV420P color   *
+@*                     space to 420SP color space(VU interleaved).        *
+@*             This function is similar to above function         *
+@*             IMP4D_CXA8_YUV420toYUV420SP_VU with a difference in   *
+@*             VLD1.8 for chroma - order of registers is different    *
+@*                                                                            *
+@*  Arguments        : R0           pu1_y                                     *
+@*                     R1           pu1_u                                     *
+@*                     R2           pu1_v                                     *
+@*                     R3           pu1_dest_y                                *
+@*                     [R13 #40]    pu1_dest_uv                               *
+@*                     [R13 #44]    u2_height                                 *
+@*                     [R13 #48]    u2_width                                  *
+@*                     [R13 #52]    u2_stridey                                *
+@*                     [R13 #56]    u2_strideu                                *
+@*                     [R13 #60]    u2_stridev                                *
+@*                     [R13 #64]    u2_dest_stride_y                          *
+@*                     [R13 #68]    u2_dest_stride_uv                         *
+@*                     [R13 #72]    convert_uv_only                           *
+@*                                                                            *
+@*  Values Returned  : None                                                   *
+@*                                                                            *
+@*  Register Usage   : R0 - R8, Q0                                            *
+@*                                                                            *
+@*  Stack Usage      : 24 Bytes                                               *
+@*                                                                            *
+@*  Interruptibility : Interruptible                                          *
+@*                                                                            *
+@*  Known Limitations                                                         *
+@*       Assumptions: Image Width:     Assumed to be multiple of 16 and       *
+@*                     greater than or equal to 16                *
+@*                     Image Height:    Assumed to be even.                   *
+@*                                                                            *
+@*  Revision History :                                                        *
+@*         DD MM YYYY   Author(s)       Changes (Describe the changes made)   *
+@*         07 06 2010   Varshita        Draft                                 *
+@*         07 06 2010   Naveen Kr T     Completed                             *
+@*                                                                            *
+@*****************************************************************************/
+
+                .global impeg2_fmt_conv_yuv420p_to_yuv420sp_vu_a9q
+impeg2_fmt_conv_yuv420p_to_yuv420sp_vu_a9q:
+
+    @// push the registers on the stack
+    stmfd           sp!, {r4-r8, lr}
+
+    ldr             r4, [sp, #56]       @// Load convert_uv_only
+
+    cmp             r4, #1
+    beq             yuv420sp_vu_chroma
+
+    @/* Do the preprocessing before the main loops start */
+    @// Load the parameters from stack
+    ldr             r4, [sp, #28]       @// Load u2_height from stack
+
+    ldr             r5, [sp, #32]       @// Load u2_width from stack
+
+    ldr             r7, [sp, #36]       @// Load u2_stridey from stack
+
+    ldr             r8, [sp, #48]       @// Load u2_dest_stride_y from stack
+
+    sub             r7, r7, r5          @// Source increment
+
+    sub             r8, r8, r5          @// Destination increment
+
+
+yuv420sp_vu_row_loop_y:
+    mov             r6, r5
+
+yuv420sp_vu_col_loop_y:
+    pld             [r0, #128]
+    vld1.8          {q0}, [r0]!
+    vst1.8          {q0}, [r3]!
+    sub             r6, r6, #16
+    cmp             r6, #15
+    bgt             yuv420sp_vu_col_loop_y
+
+    cmp             r6, #0
+    beq             yuv420sp_vu_row_loop_end_y
+    @//If non-multiple of 16, then go back by few bytes to ensure 16 bytes can be read
+    @//Ex if width is 162, above loop will process 160 pixels. And
+    @//Both source and destination will point to 146th pixel and then 16 bytes will be read
+    @// and written using VLD1 and VST1
+    rsb             r6, r6, #16
+    sub             r0, r0, r6
+    sub             r3, r3, r6
+
+    vld1.8          {q0}, [r0]!
+    vst1.8          {q0}, [r3]!
+
+yuv420sp_vu_row_loop_end_y:
+    add             r0, r0, r7
+    add             r3, r3, r8
+    subs            r4, r4, #1
+    bgt             yuv420sp_vu_row_loop_y
+
+yuv420sp_vu_chroma:
+
+    ldr             r3, [sp, #24]       @// Load pu1_dest_uv from stack
+
+    ldr             r4, [sp, #28]       @// Load u2_height from stack
+
+    ldr             r5, [sp, #32]       @// Load u2_width from stack
+
+
+    ldr             r7, [sp, #40]       @// Load u2_strideu from stack
+
+    ldr             r8, [sp, #52]       @// Load u2_dest_stride_uv from stack
+
+    sub             r7, r7, r5, lsr #1  @// Source increment
+
+    sub             r8, r8, r5          @// Destination increment
+
+    mov             r5, r5, lsr #1
+    mov             r4, r4, lsr #1
+    ldr             r3, [sp, #24]       @// Load pu1_dest_uv from stack
+yuv420sp_vu_row_loop_uv:
+    mov             r6, r5
+
+
+yuv420sp_vu_col_loop_uv:
+    pld             [r1, #128]
+    pld             [r2, #128]
+    vld1.8          d1, [r1]!
+    vld1.8          d0, [r2]!
+    vst2.8          {d0, d1}, [r3]!
+    sub             r6, r6, #8
+    cmp             r6, #7
+    bgt             yuv420sp_vu_col_loop_uv
+
+    cmp             r6, #0
+    beq             yuv420sp_vu_row_loop_end_uv
+    @//If non-multiple of 16, then go back by few bytes to ensure 16 bytes can be read
+    @//Ex if width is 162, above loop will process 160 pixels. And
+    @//Both source and destination will point to 146th pixel and then 16 bytes will be read
+    @// and written using VLD1 and VST1
+    rsb             r6, r6, #8
+    sub             r1, r1, r6
+    sub             r2, r2, r6
+    sub             r3, r3, r6, lsl #1
+
+    vld1.8          d1, [r1]!
+    vld1.8          d0, [r2]!
+    vst2.8          {d0, d1}, [r3]!
+
+yuv420sp_vu_row_loop_end_uv:
+    add             r1, r1, r7
+    add             r2, r2, r7
+    add             r3, r3, r8
+    subs            r4, r4, #1
+    bgt             yuv420sp_vu_row_loop_uv
+    @//POP THE REGISTERS
+    ldmfd           sp!, {r4-r8, pc}
+
+
+
+
+

diff --git a/common/arm/impeg2_idct.s b/common/arm/impeg2_idct.s
new file mode 100644
index 0000000..22225bf
--- /dev/null
+++ b/common/arm/impeg2_idct.s

@@ -0,0 +1,1204 @@
+@/******************************************************************************
+@ *
+@ * Copyright (C) 2015 The Android Open Source Project
+@ *
+@ * Licensed under the Apache License, Version 2.0 (the "License");
+@ * you may not use this file except in compliance with the License.
+@ * You may obtain a copy of the License at:
+@ *
+@ * http://www.apache.org/licenses/LICENSE-2.0
+@ *
+@ * Unless required by applicable law or agreed to in writing, software
+@ * distributed under the License is distributed on an "AS IS" BASIS,
+@ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+@ * See the License for the specific language governing permissions and
+@ * limitations under the License.
+@ *
+@ *****************************************************************************
+@ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+@*/
+
+@/*
+@//----------------------------------------------------------------------------
+@// File Name            : impeg2_idct.s
+@//
+@// Description          : This file has the Idct Implementations for the
+@//                        MPEG2 SP decoder on neon platform.
+@//
+@// Reference Document   :
+@//
+@// Revision History     :
+@//      Date            Author                  Detail Description
+@//   ------------    ----------------    ----------------------------------
+@//   Feb 22, 2008     Naveen Kumar T                Created
+@//
+@//-------------------------------------------------------------------------
+@*/
+
+@/*
+@// ----------------------------------------------------------------------------
+@// Include Files
+@// ----------------------------------------------------------------------------
+@*/
+
+.text
+.p2align 2
+.equ idct_stg1_shift       ,            12
+.equ idct_stg2_shift       ,            16
+.equ idct_stg1_round     ,          (1 << (idct_stg1_shift - 1))
+.equ idct_stg2_round     ,          (1 << (idct_stg2_shift - 1))
+@/*
+@// ----------------------------------------------------------------------------
+@// Struct/Union Types and Define
+@// ----------------------------------------------------------------------------
+@*/
+
+@/*
+@// ----------------------------------------------------------------------------
+@// Static Global Data section variables
+@// ----------------------------------------------------------------------------
+@*/
+@//--------------------------- NONE --------------------------------------------
+
+@/*
+@// ----------------------------------------------------------------------------
+@// Static Prototype Functions
+@// ----------------------------------------------------------------------------
+@*/
+@// -------------------------- NONE --------------------------------------------
+
+@/*
+@// ----------------------------------------------------------------------------
+@// Exported functions
+@// ----------------------------------------------------------------------------
+@*/
+
+    .extern gai2_impeg2_idct_q15
+.hidden gai2_impeg2_idct_q15
+    .extern gai2_impeg2_idct_q11
+.hidden gai2_impeg2_idct_q11
+    .extern gai2_impeg2_idct_first_col_q15
+.hidden gai2_impeg2_idct_first_col_q15
+    .extern gai2_impeg2_idct_first_col_q11
+.hidden gai2_impeg2_idct_first_col_q11
+    .extern gai2_impeg2_mismatch_stg2_additive
+.hidden gai2_impeg2_mismatch_stg2_additive
+
+gai2_impeg2_idct_q15_addr1:
+    .long gai2_impeg2_idct_q15 - q15lbl1 - 8
+gai2_impeg2_idct_q15_addr2:
+    .long gai2_impeg2_idct_q15 - q15lbl2 - 8
+gai2_impeg2_idct_q11_addr1:
+    .long gai2_impeg2_idct_q11 - q11lbl1 - 8
+gai2_impeg2_idct_q11_addr2:
+    .long gai2_impeg2_idct_q11 - q11lbl2 - 8
+gai2_impeg2_idct_first_col_q15_addr1:
+    .long gai2_impeg2_idct_first_col_q15 - fcq15_lbl1 - 8
+gai2_impeg2_idct_first_col_q15_addr2:
+    .long gai2_impeg2_idct_first_col_q15 - fcq15_lbl2 - 8
+gai2_impeg2_idct_first_col_q15_addr3:
+    .long gai2_impeg2_idct_first_col_q15 - fcq15_lbl3 - 8
+gai2_impeg2_mismatch_stg2_additive_addr:
+    .long gai2_impeg2_mismatch_stg2_additive - additive_lbl - 8
+gai2_impeg2_idct_first_col_q11_addr1:
+    .long gai2_impeg2_idct_first_col_q11 - fcq11_lbl1 - 8
+gai2_impeg2_idct_first_col_q11_addr2:
+    .long gai2_impeg2_idct_first_col_q11 - fcq11_lbl2 - 8
+
+    .global impeg2_idct_recon_dc_a9q
+impeg2_idct_recon_dc_a9q:
+    stmfd           sp!, {r4, r6, r12, lr}
+    @//r0: pi2_src
+    @//r1: pi2_tmp - not used, used as pred_strd
+    @//r2: pu1_pred
+    @//r3: pu1_dst
+    @//r4: used as scratch
+    @//r5:
+
+    ldr             r1, [sp, #20]       @//pred_strd
+    ldr             r6, [sp, #24]       @//dst_strd
+
+    ldr             r14, gai2_impeg2_idct_q15_addr1
+q15lbl1:
+    add             r14, r14, pc
+    ldrsh           r12, [r14]
+    ldrsh           r4, [r0]
+
+    vld1.8          d0, [r2], r1
+    mul             r4, r4, r12
+
+    vld1.8          d1, [r2], r1
+    add             r4, #idct_stg1_round
+
+    vld1.8          d2, [r2], r1
+    asr             r4, r4, #idct_stg1_shift
+
+    ldr             r14, gai2_impeg2_idct_q11_addr1
+q11lbl1:
+    add             r14, r14, pc
+    ldrsh           r12, [r14]
+
+    vld1.8          d3, [r2], r1
+    mul             r4, r4, r12
+
+    vld1.8          d4, [r2], r1
+    add             r4, #idct_stg2_round
+
+    vld1.8          d5, [r2], r1
+    asr             r4, r4, #idct_stg2_shift
+
+    vld1.8          d6, [r2], r1
+    vdup.s16        q15, r4
+
+
+    vld1.8          d7, [r2], r1
+
+    vaddw.u8        q4, q15, d0
+
+    vaddw.u8        q5, q15, d1
+    vqmovun.s16     d0, q4
+
+    vaddw.u8        q6, q15, d2
+    vqmovun.s16     d1, q5
+    vst1.8          d0, [r3], r6
+
+    vaddw.u8        q7, q15, d3
+    vqmovun.s16     d2, q6
+    vst1.8          d1, [r3], r6
+
+    vaddw.u8        q8, q15, d4
+    vqmovun.s16     d3, q7
+    vst1.8          d2, [r3], r6
+
+    vaddw.u8        q9, q15, d5
+    vqmovun.s16     d4, q8
+    vst1.8          d3, [r3], r6
+
+    vaddw.u8        q10, q15, d6
+    vqmovun.s16     d5, q9
+    vst1.8          d4, [r3], r6
+
+    vaddw.u8        q11, q15, d7
+    vqmovun.s16     d6, q10
+    vst1.8          d5, [r3], r6
+
+    vqmovun.s16     d7, q11
+    vst1.8          d6, [r3], r6
+
+
+    vst1.8          d7, [r3], r6
+
+    ldmfd           sp!, {r4, r6, r12, pc}
+
+
+
+
+    .global impeg2_idct_recon_dc_mismatch_a9q
+impeg2_idct_recon_dc_mismatch_a9q:
+    stmfd           sp!, {r4-r12, lr}
+
+    ldr             r1, [sp, #44]       @//pred_strd
+    ldr             r6, [sp, #48]       @//dst_strd
+
+    ldr             r14, gai2_impeg2_idct_q15_addr2
+q15lbl2:
+    add             r14, r14, pc
+    ldrsh           r12, [r14]
+    ldrsh           r4, [r0]
+
+    mul             r4, r4, r12
+    add             r4, #idct_stg1_round
+    asr             r4, r4, #idct_stg1_shift
+
+    ldr             r14, gai2_impeg2_idct_q11_addr2
+q11lbl2:
+    add             r14, r14, pc
+    ldrsh           r12, [r14]
+    mul             r4, r4, r12
+    vdup.s32        q0, r4
+
+    mov             r14, #16            @//Increment for table read
+    ldr             r4, gai2_impeg2_mismatch_stg2_additive_addr
+additive_lbl:
+    add             r4, r4, pc
+
+    vld1.16         {q1}, [r4], r14
+
+    vld1.8          d30, [r2], r1
+    vmovl.s16       q4, d2
+    vmovl.s16       q5, d3
+    vraddhn.s32     d12, q0, q4
+    vraddhn.s32     d13, q0, q5
+    vaddw.u8        q7, q6, d30
+    vqmovun.s16     d30, q7
+    vst1.8          d30, [r3], r6
+
+    vld1.16         {q1}, [r4], r14
+    vld1.8          d30, [r2], r1
+    vmovl.s16       q4, d2
+    vmovl.s16       q5, d3
+    vraddhn.s32     d12, q0, q4
+    vraddhn.s32     d13, q0, q5
+    vaddw.u8        q7, q6, d30
+    vqmovun.s16     d30, q7
+    vst1.8          d30, [r3], r6
+
+    vld1.16         {q1}, [r4], r14
+    vld1.8          d30, [r2], r1
+    vmovl.s16       q4, d2
+    vmovl.s16       q5, d3
+    vraddhn.s32     d12, q0, q4
+    vraddhn.s32     d13, q0, q5
+    vaddw.u8        q7, q6, d30
+    vqmovun.s16     d30, q7
+    vst1.8          d30, [r3], r6
+
+    vld1.16         {q1}, [r4], r14
+    vld1.8          d30, [r2], r1
+    vmovl.s16       q4, d2
+    vmovl.s16       q5, d3
+    vraddhn.s32     d12, q0, q4
+    vraddhn.s32     d13, q0, q5
+    vaddw.u8        q7, q6, d30
+    vqmovun.s16     d30, q7
+    vst1.8          d30, [r3], r6
+
+    vld1.16         {q1}, [r4], r14
+    vld1.8          d30, [r2], r1
+    vmovl.s16       q4, d2
+    vmovl.s16       q5, d3
+    vraddhn.s32     d12, q0, q4
+    vraddhn.s32     d13, q0, q5
+    vaddw.u8        q7, q6, d30
+    vqmovun.s16     d30, q7
+    vst1.8          d30, [r3], r6
+
+    vld1.16         {q1}, [r4], r14
+    vld1.8          d30, [r2], r1
+    vmovl.s16       q4, d2
+    vmovl.s16       q5, d3
+    vraddhn.s32     d12, q0, q4
+    vraddhn.s32     d13, q0, q5
+    vaddw.u8        q7, q6, d30
+    vqmovun.s16     d30, q7
+    vst1.8          d30, [r3], r6
+
+    vld1.16         {q1}, [r4], r14
+    vld1.8          d30, [r2], r1
+    vmovl.s16       q4, d2
+    vmovl.s16       q5, d3
+    vraddhn.s32     d12, q0, q4
+    vraddhn.s32     d13, q0, q5
+    vaddw.u8        q7, q6, d30
+    vqmovun.s16     d30, q7
+    vst1.8          d30, [r3], r6
+
+    vld1.16         {q1}, [r4], r14
+    vld1.8          d30, [r2], r1
+    vmovl.s16       q4, d2
+    vmovl.s16       q5, d3
+    vraddhn.s32     d12, q0, q4
+    vraddhn.s32     d13, q0, q5
+    vaddw.u8        q7, q6, d30
+    vqmovun.s16     d30, q7
+    vst1.8          d30, [r3], r6
+
+
+    ldmfd           sp!, {r4-r12, pc}
+
+
+
+
+@/**
+@ *******************************************************************************
+@ *
+@ * ;brief
+@ *  This function performs Inverse transform  and reconstruction for 8x8
+@ * input block
+@ *
+@ * ;par Description:
+@ *  Performs inverse transform and adds the prediction  data and clips output
+@ * to 8 bit
+@ *
+@ * ;param[in] pi2_src
+@ *  Input 8x8 coefficients
+@ *
+@ * ;param[in] pi2_tmp
+@ *  Temporary 8x8 buffer for storing inverse
+@ *
+@ *  transform
+@ *  1st stage output
+@ *
+@ * ;param[in] pu1_pred
+@ *  Prediction 8x8 block
+@ *
+@ * ;param[out] pu1_dst
+@ *  Output 8x8 block
+@ *
+@ * ;param[in] src_strd
+@ *  Input stride
+@ *
+@ * ;param[in] pred_strd
+@ *  Prediction stride
+@ *
+@ * ;param[in] dst_strd
+@ *  Output Stride
+@ *
+@ * ;param[in] shift
+@ *  Output shift
+@ *
+@ * ;param[in] zero_cols
+@ *  Zero columns in pi2_src
+@ *
+@ * ;returns  Void
+@ *
+@ * ;remarks
+@ *  None
+@ *
+@ *******************************************************************************
+@ */
+
+@void impeg2_itrans_recon_8x8(WORD16 *pi2_src,
+@                            WORD16 *pi2_tmp,
+@                            UWORD8 *pu1_pred,
+@                            UWORD8 *pu1_dst,
+@                            WORD32 src_strd,
+@                            WORD32 pred_strd,
+@                            WORD32 dst_strd,
+@                            WORD32 zero_cols
+@                            WORD32 zero_rows               )
+
+@**************Variables Vs Registers*************************
+@   r0 => *pi2_src
+@   r1 => *pi2_tmp
+@   r2 => *pu1_pred
+@   r3 => *pu1_dst
+@   src_strd
+@   pred_strd
+@   dst_strd
+@   zero_cols
+
+
+
+    .global impeg2_idct_recon_a9q
+impeg2_idct_recon_a9q:
+@//Register Usage Reference     - loading and Until IDCT of columns
+@// Cosine Constants    -   D0
+@// Sine Constants      -   D1
+@// Row 0 First Half    -   D2      -   y0
+@// Row 1 First Half    -   D6      -   y1
+@// Row 2 First Half    -   D3      -   y2
+@// Row 3 First Half    -   D7      -   y3
+@// Row 4 First Half    -   D10     -   y4
+@// Row 5 First Half    -   D14     -   y5
+@// Row 6 First Half    -   D11     -   y6
+@// Row 7 First Half    -   D15     -   y7
+
+@// Row 0 Second Half   -   D4      -   y0
+@// Row 1 Second Half   -   D8      -   y1
+@// Row 2 Second Half   -   D5      -   y2
+@// Row 3 Second Half   -   D9      -   y3
+@// Row 4 Second Half   -   D12     -   y4
+@// Row 5 Second Half   -   D16     -   y5
+@// Row 6 Second Half   -   D13     -   y6
+@// Row 7 Second Half   -   D17     -   y7
+
+    @// Copy the input pointer to another register
+    @// Step 1 : load all constants
+    stmfd           sp!, {r4-r12, lr}
+    add             sp, sp, #40
+    ldr             r8, [sp, #4]        @ prediction stride
+    ldr             r7, [sp, #8]        @ destination stride
+    ldr             r6, [sp]            @ src stride
+    ldr             r12, [sp, #12]
+    ldr             r11, [sp, #16]
+    mov             r6, r6, lsl #1      @ x sizeof(word16)
+    add             r9, r0, r6, lsl #1  @ 2 rows
+
+    add             r10, r6, r6, lsl #1 @ 3 rows
+
+    sub             r10, r10, #8        @ - 4 cols * sizeof(WORD16)
+    sub             r5, r6, #8          @ src_strd - 4 cols * sizeof(WORD16)
+
+
+    ldr             r14, gai2_impeg2_idct_first_col_q15_addr1
+fcq15_lbl1:
+    add             r14, r14, pc
+    vld1.16         {d0, d1}, [r14]     @//D0,D1 are used for storing the constant data
+
+    @//Step 2 Load all the input data
+    @//Step 3 Operate first 4 colums at a time
+
+    and             r11, r11, #0xff
+    and             r12, r12, #0xff
+
+    cmp             r11, #0xf0
+    bge             skip_last4_rows
+
+
+    vld1.16         d2, [r0]!
+    vld1.16         d3, [r9]!
+    vld1.16         d4, [r0], r5
+    vmull.s16       q10, d2, d0[0]      @// y0 * cos4(part of c0 and c1)
+    vld1.16         d5, [r9], r5
+    vmull.s16       q9, d3, d1[2]       @// y2 * sin2 (Q3 is freed by this time)(part of d1)
+    vld1.16         d6, [r0]!
+    vld1.16         d7, [r9]!
+    vmull.s16       q12, d6, d0[1]      @// y1 * cos1(part of b0)
+    vld1.16         d8, [r0], r10
+    vmull.s16       q13, d6, d0[3]      @// y1 * cos3(part of b1)
+    vld1.16         d9, [r9], r10
+    vmull.s16       q14, d6, d1[1]      @// y1 * sin3(part of b2)
+    vld1.16         d10, [r0]!
+    vmull.s16       q15, d6, d1[3]      @// y1 * sin1(part of b3)
+    vld1.16         d11, [r9]!
+    vmlal.s16       q12, d7, d0[3]      @// y1 * cos1 + y3 * cos3(part of b0)
+    vld1.16         d12, [r0], r5
+    vmlsl.s16       q13, d7, d1[3]      @// y1 * cos3 - y3 * sin1(part of b1)
+    vld1.16         d13, [r9], r5
+    vmlsl.s16       q14, d7, d0[1]      @// y1 * sin3 - y3 * cos1(part of b2)
+    vld1.16         d14, [r0]!
+    vmlsl.s16       q15, d7, d1[1]      @// y1 * sin1 - y3 * sin3(part of b3)
+    vld1.16         d15, [r9]!
+    vmull.s16       q11, d10, d0[0]     @// y4 * cos4(part of c0 and c1)
+    vld1.16         d16, [r0], r10
+    vmull.s16       q3, d3, d0[2]       @// y2 * cos2(part of d0)
+    vld1.16         d17, [r9], r10
+
+    @/* This following was activated when alignment is not there */
+@// VLD1.16     D2,[r0]!
+@// VLD1.16     D3,[r2]!
+@// VLD1.16     D4,[r0]!
+@// VLD1.16     D5,[r2]!
+@// VLD1.16     D6,[r0]!
+@// VLD1.16     D7,[r2]!
+@// VLD1.16     D8,[r0],r3
+@// VLD1.16     D9,[r2],r3
+@// VLD1.16     D10,[r0]!
+@// VLD1.16     D11,[r2]!
+@// VLD1.16     D12,[r0]!
+@// VLD1.16     D13,[r2]!
+@// VLD1.16     D14,[r0]!
+@// VLD1.16     D15,[r2]!
+@// VLD1.16     D16,[r0],r3
+@// VLD1.16     D17,[r2],r3
+
+
+
+
+    vmlal.s16       q12, d14, d1[1]     @// y1 * cos1 + y3 * cos3 + y5 * sin3(part of b0)
+    vmlsl.s16       q13, d14, d0[1]     @// y1 * cos3 - y3 * sin1 - y5 * cos1(part of b1)
+    vmlal.s16       q14, d14, d1[3]     @// y1 * sin3 - y3 * cos1 + y5 * sin1(part of b2)
+    vmlal.s16       q15, d14, d0[3]     @// y1 * sin1 - y3 * sin3 + y5 * cos3(part of b3)
+
+    vmlsl.s16       q9, d11, d0[2]      @// d1 = y2 * sin2 - y6 * cos2(part of a0 and a1)
+    vmlal.s16       q3, d11, d1[2]      @// d0 = y2 * cos2 + y6 * sin2(part of a0 and a1)
+
+    vadd.s32        q5, q10, q11        @// c0 = y0 * cos4 + y4 * cos4(part of a0 and a1)
+    vsub.s32        q10, q10, q11       @// c1 = y0 * cos4 - y4 * cos4(part of a0 and a1)
+
+    vmlal.s16       q12, d15, d1[3]     @// b0 = y1 * cos1 + y3 * cos3 + y5 * sin3 + y7 * sin1(part of r0,r7)
+    vmlsl.s16       q13, d15, d1[1]     @// b1 = y1 * cos3 - y3 * sin1 - y5 * cos1 - y7 * sin3(part of r1,r6)
+    vmlal.s16       q14, d15, d0[3]     @// b2 = y1 * sin3 - y3 * cos1 + y5 * sin1 + y7 * cos3(part of r2,r5)
+    vmlsl.s16       q15, d15, d0[1]     @// b3 = y1 * sin1 - y3 * sin3 + y5 * cos3 - y7 * cos1(part of r3,r4)
+
+    vadd.s32        q7, q5, q3          @// a0 = c0 + d0(part of r0,r7)
+    vsub.s32        q5, q5, q3          @// a3 = c0 - d0(part of r3,r4)
+    vsub.s32        q11, q10, q9        @// a2 = c1 - d1(part of r2,r5)
+    vadd.s32        q9, q10, q9         @// a1 = c1 + d1(part of r1,r6)
+
+    vadd.s32        q10, q7, q12        @// a0 + b0(part of r0)
+    vsub.s32        q3, q7, q12         @// a0 - b0(part of r7)
+
+    vadd.s32        q12, q11, q14       @// a2 + b2(part of r2)
+    vsub.s32        q11, q11, q14       @// a2 - b2(part of r5)
+
+    vadd.s32        q14, q9, q13        @// a1 + b1(part of r1)
+    vsub.s32        q9, q9, q13         @// a1 - b1(part of r6)
+
+    vadd.s32        q13, q5, q15        @// a3 + b3(part of r3)
+    vsub.s32        q15, q5, q15        @// a3 - b3(part of r4)
+
+    vqrshrn.s32     d2, q10, #idct_stg1_shift @// r0 = (a0 + b0 + rnd) >> 7(IDCT_STG1_SHIFT)
+    vqrshrn.s32     d15, q3, #idct_stg1_shift @// r7 = (a0 - b0 + rnd) >> 7(IDCT_STG1_SHIFT)
+    vqrshrn.s32     d3, q12, #idct_stg1_shift @// r2 = (a2 + b2 + rnd) >> 7(IDCT_STG1_SHIFT)
+    vqrshrn.s32     d14, q11, #idct_stg1_shift @// r5 = (a2 - b2 + rnd) >> 7(IDCT_STG1_SHIFT)
+    vqrshrn.s32     d6, q14, #idct_stg1_shift @// r1 = (a1 + b1 + rnd) >> 7(IDCT_STG1_SHIFT)
+    vqrshrn.s32     d11, q9, #idct_stg1_shift @// r6 = (a1 - b1 + rnd) >> 7(IDCT_STG1_SHIFT)
+    vqrshrn.s32     d7, q13, #idct_stg1_shift @// r3 = (a3 + b3 + rnd) >> 7(IDCT_STG1_SHIFT)
+    vqrshrn.s32     d10, q15, #idct_stg1_shift @// r4 = (a3 - b3 + rnd) >> 7(IDCT_STG1_SHIFT)
+
+
+    b               last4_cols
+
+
+
+skip_last4_rows:
+
+
+    ldr             r14, gai2_impeg2_idct_first_col_q15_addr2
+fcq15_lbl2:
+    add             r14, r14, pc
+    vld1.16         {d0, d1}, [r14]     @//D0,D1 are used for storing the constant data
+
+    vld1.16         d2, [r0]!
+    vld1.16         d3, [r9]!
+    vld1.16         d4, [r0], r5
+    vld1.16         d5, [r9], r5
+    vld1.16         d6, [r0]!
+    vld1.16         d7, [r9]!
+    vld1.16         d8, [r0], r10
+    vld1.16         d9, [r9], r10
+
+
+
+    vmov.s16        q6, #0
+    vmov.s16        q8, #0
+
+
+
+
+    vmull.s16       q12, d6, d0[1]      @// y1 * cos1(part of b0)
+    vmull.s16       q13, d6, d0[3]      @// y1 * cos3(part of b1)
+    vmull.s16       q14, d6, d1[1]      @// y1 * sin3(part of b2)
+    vmull.s16       q15, d6, d1[3]      @// y1 * sin1(part of b3)
+
+    vmlal.s16       q12, d7, d0[3]      @// y1 * cos1 + y3 * cos3(part of b0)
+    vmlsl.s16       q13, d7, d1[3]      @// y1 * cos3 - y3 * sin1(part of b1)
+    vmlsl.s16       q14, d7, d0[1]      @// y1 * sin3 - y3 * cos1(part of b2)
+    vmlsl.s16       q15, d7, d1[1]      @// y1 * sin1 - y3 * sin3(part of b3)
+
+    vmull.s16       q9, d3, d1[2]       @// y2 * sin2 (Q3 is freed by this time)(part of d1)
+    vmull.s16       q3, d3, d0[2]       @// y2 * cos2(part of d0)
+
+    vmull.s16       q10, d2, d0[0]      @// y0 * cos4(part of c0 and c1)
+
+
+    vadd.s32        q7, q10, q3         @// a0 = c0 + d0(part of r0,r7)
+    vsub.s32        q5, q10, q3         @// a3 = c0 - d0(part of r3,r4)
+    vsub.s32        q11, q10, q9        @// a2 = c1 - d1(part of r2,r5)
+    vadd.s32        q9, q10, q9         @// a1 = c1 + d1(part of r1,r6)
+
+    vadd.s32        q10, q7, q12        @// a0 + b0(part of r0)
+    vsub.s32        q3, q7, q12         @// a0 - b0(part of r7)
+
+    vadd.s32        q12, q11, q14       @// a2 + b2(part of r2)
+    vsub.s32        q11, q11, q14       @// a2 - b2(part of r5)
+
+    vadd.s32        q14, q9, q13        @// a1 + b1(part of r1)
+    vsub.s32        q9, q9, q13         @// a1 - b1(part of r6)
+
+    vadd.s32        q13, q5, q15        @// a3 + b3(part of r3)
+    vsub.s32        q15, q5, q15        @// a3 - b3(part of r4)
+
+    vqrshrn.s32     d2, q10, #idct_stg1_shift @// r0 = (a0 + b0 + rnd) >> 7(IDCT_STG1_SHIFT)
+    vqrshrn.s32     d15, q3, #idct_stg1_shift @// r7 = (a0 - b0 + rnd) >> 7(IDCT_STG1_SHIFT)
+    vqrshrn.s32     d3, q12, #idct_stg1_shift @// r2 = (a2 + b2 + rnd) >> 7(IDCT_STG1_SHIFT)
+    vqrshrn.s32     d14, q11, #idct_stg1_shift @// r5 = (a2 - b2 + rnd) >> 7(IDCT_STG1_SHIFT)
+    vqrshrn.s32     d6, q14, #idct_stg1_shift @// r1 = (a1 + b1 + rnd) >> 7(IDCT_STG1_SHIFT)
+    vqrshrn.s32     d11, q9, #idct_stg1_shift @// r6 = (a1 - b1 + rnd) >> 7(IDCT_STG1_SHIFT)
+    vqrshrn.s32     d7, q13, #idct_stg1_shift @// r3 = (a3 + b3 + rnd) >> 7(IDCT_STG1_SHIFT)
+    vqrshrn.s32     d10, q15, #idct_stg1_shift @// r4 = (a3 - b3 + rnd) >> 7(IDCT_STG1_SHIFT)
+
+
+last4_cols:
+
+
+    cmp             r12, #0xf0
+    bge             skip_last4cols
+
+    ldr             r14, gai2_impeg2_idct_first_col_q15_addr3
+fcq15_lbl3:
+    add             r14, r14, pc
+    vld1.16         {d0, d1}, [r14]     @//D0,D1 are used for storing the constant data
+
+    vmull.s16       q12, d8, d0[1]      @// y1 * cos1(part of b0)
+    vmull.s16       q13, d8, d0[3]      @// y1 * cos3(part of b1)
+    vmull.s16       q14, d8, d1[1]      @// y1 * sin3(part of b2)
+    vmull.s16       q15, d8, d1[3]      @// y1 * sin1(part of b3)
+
+    vmlal.s16       q12, d9, d0[3]      @// y1 * cos1 + y3 * cos3(part of b0)
+    vmlsl.s16       q13, d9, d1[3]      @// y1 * cos3 - y3 * sin1(part of b1)
+    vmlsl.s16       q14, d9, d0[1]      @// y1 * sin3 - y3 * cos1(part of b2)
+    vmlsl.s16       q15, d9, d1[1]      @// y1 * sin1 - y3 * sin3(part of b3)
+
+    vmull.s16       q9, d5, d1[2]       @// y2 * sin2 (Q4 is freed by this time)(part of d1)
+    vmull.s16       q4, d5, d0[2]       @// y2 * cos2(part of d0)
+
+    vmull.s16       q10, d4, d0[0]      @// y0 * cos4(part of c0 and c1)
+    vmull.s16       q11, d12, d0[0]     @// y4 * cos4(part of c0 and c1)
+
+    vmlal.s16       q12, d16, d1[1]     @// y1 * cos1 + y3 * cos3 + y5 * sin3(part of b0)
+    vmlsl.s16       q13, d16, d0[1]     @// y1 * cos3 - y3 * sin1 - y5 * cos1(part of b1)
+    vmlal.s16       q14, d16, d1[3]     @// y1 * sin3 - y3 * cos1 + y5 * sin1(part of b2)
+    vmlal.s16       q15, d16, d0[3]     @// y1 * sin1 - y3 * sin3 + y5 * cos3(part of b3)
+
+    vmlsl.s16       q9, d13, d0[2]      @// d1 = y2 * sin2 - y6 * cos2(part of a0 and a1)
+    vmlal.s16       q4, d13, d1[2]      @// d0 = y2 * cos2 + y6 * sin2(part of a0 and a1)
+
+    vadd.s32        q6, q10, q11        @// c0 = y0 * cos4 + y4 * cos4(part of a0 and a1)
+    vsub.s32        q10, q10, q11       @// c1 = y0 * cos4 - y4 * cos4(part of a0 and a1)
+
+    vmlal.s16       q12, d17, d1[3]     @// b0 = y1 * cos1 + y3 * cos3 + y5 * sin3 + y7 * sin1(part of e0,e7)
+    vmlsl.s16       q13, d17, d1[1]     @// b1 = y1 * cos3 - y3 * sin1 - y5 * cos1 - y7 * sin3(part of e1,e6)
+    vmlal.s16       q14, d17, d0[3]     @// b2 = y1 * sin3 - y3 * cos1 + y5 * sin1 + y7 * cos3(part of e2,e5)
+    vmlsl.s16       q15, d17, d0[1]     @// b3 = y1 * sin1 - y3 * sin3 + y5 * cos3 - y7 * cos1(part of e3,e4)
+
+    vadd.s32        q8, q6, q4          @// a0 = c0 + d0(part of e0,e7)
+    vsub.s32        q6, q6, q4          @// a3 = c0 - d0(part of e3,e4)
+    vsub.s32        q11, q10, q9        @// a2 = c1 - d1(part of e2,e5)
+    vadd.s32        q9, q10, q9         @// a1 = c1 + d1(part of e1,e6)
+
+    vadd.s32        q10, q8, q12        @// a0 + b0(part of e0)
+    vsub.s32        q4, q8, q12         @// a0 - b0(part of e7)
+
+    vadd.s32        q12, q11, q14       @// a2 + b2(part of e2)
+    vsub.s32        q11, q11, q14       @// a2 - b2(part of e5)
+
+    vadd.s32        q14, q9, q13        @// a1 + b1(part of e1)
+    vsub.s32        q9, q9, q13         @// a1 - b1(part of e6)
+
+    vadd.s32        q13, q6, q15        @// a3 + b3(part of e3)
+    vsub.s32        q15, q6, q15        @// a3 - b3(part of r4)
+
+    vqrshrn.s32     d4, q10, #idct_stg1_shift @// r0 = (a0 + b0 + rnd) >> 7(IDCT_STG1_SHIFT)
+    vqrshrn.s32     d17, q4, #idct_stg1_shift @// r7 = (a0 - b0 + rnd) >> 7(IDCT_STG1_SHIFT)
+    vqrshrn.s32     d5, q12, #idct_stg1_shift @// r2 = (a2 + b2 + rnd) >> 7(IDCT_STG1_SHIFT)
+    vqrshrn.s32     d16, q11, #idct_stg1_shift @// r5 = (a2 - b2 + rnd) >> 7(IDCT_STG1_SHIFT)
+    vqrshrn.s32     d8, q14, #idct_stg1_shift @// r1 = (a1 + b1 + rnd) >> 7(IDCT_STG1_SHIFT)
+    vqrshrn.s32     d13, q9, #idct_stg1_shift @// r6 = (a1 - b1 + rnd) >> 7(IDCT_STG1_SHIFT)
+    vqrshrn.s32     d9, q13, #idct_stg1_shift @// r3 = (a3 + b3 + rnd) >> 7(IDCT_STG1_SHIFT)
+    vqrshrn.s32     d12, q15, #idct_stg1_shift @// r4 = (a3 - b3 + rnd) >> 7(IDCT_STG1_SHIFT)
+    b               end_skip_last4cols
+
+
+
+skip_last4cols:
+
+
+
+    ldr             r14, gai2_impeg2_idct_first_col_q11_addr1
+fcq11_lbl1:
+    add             r14, r14, pc
+    vld1.16         {d0, d1}, [r14]     @//D0,D1 are used for storing the constant data
+
+
+
+    vtrn.16         q1, q3              @//[r3,r1],[r2,r0] first qudrant transposing
+
+    vtrn.16         q5, q7              @//[r7,r5],[r6,r4] third qudrant transposing
+
+
+    vtrn.32         d6, d7              @//r0,r1,r2,r3 first qudrant transposing continued.....
+    vtrn.32         d2, d3              @//r0,r1,r2,r3 first qudrant transposing continued.....
+
+    vtrn.32         d10, d11            @//r4,r5,r6,r7 third qudrant transposing continued.....
+    vtrn.32         d14, d15            @//r4,r5,r6,r7 third qudrant transposing continued.....
+
+
+    vmull.s16       q12, d6, d0[1]      @// y1 * cos1(part of b0)
+    vmull.s16       q13, d6, d0[3]      @// y1 * cos3(part of b1)
+    vmull.s16       q14, d6, d1[1]      @// y1 * sin3(part of b2)
+    vmull.s16       q15, d6, d1[3]      @// y1 * sin1(part of b3)
+
+    vmlal.s16       q12, d7, d0[3]      @// y1 * cos1 + y3 * cos3(part of b0)
+    vmlsl.s16       q13, d7, d1[3]      @// y1 * cos3 - y3 * sin1(part of b1)
+    vmlsl.s16       q14, d7, d0[1]      @// y1 * sin3 - y3 * cos1(part of b2)
+    vmlsl.s16       q15, d7, d1[1]      @// y1 * sin1 - y3 * sin3(part of b3)
+
+    vmull.s16       q10, d2, d0[0]      @// y0 * cos4(part of c0 and c1)
+@   VMULL.S16   Q11,D4,D0[0]                    ;// y4 * cos4(part of c0 and c1)
+
+    vmull.s16       q9, d3, d1[2]       @// y2 * sin2 (Q3 is freed by this time)(part of d1)
+    vmull.s16       q3, d3, d0[2]       @// y2 * cos2(part of d0)
+
+
+
+
+    vsub.s32        q11, q10, q3        @// a3 = c0 - d0(part of r3,r4)
+    vadd.s32        q2, q10, q3         @// a0 = c0 + d0(part of r0,r7)
+
+
+    vadd.s32        q1, q2, q12
+
+    vsub.s32        q3, q2, q12
+
+    vadd.s32        q4, q11, q15
+
+    vsub.s32        q12, q11, q15
+
+    vqrshrn.s32     d5, q4, #idct_stg2_shift
+    vqrshrn.s32     d2, q1, #idct_stg2_shift
+    vqrshrn.s32     d9, q3, #idct_stg2_shift
+    vqrshrn.s32     d6, q12, #idct_stg2_shift
+
+    vsub.s32        q11, q10, q9        @// a2 = c1 - d1(part of r2,r5)
+    vadd.s32        q9, q10, q9         @// a1 = c1 + d1(part of r1,r6)
+
+
+    vadd.s32        q15, q11, q14
+
+    vsub.s32        q12, q11, q14
+
+    vadd.s32        q14, q9, q13
+
+    vsub.s32        q11, q9, q13
+    vqrshrn.s32     d4, q15, #idct_stg2_shift
+    vqrshrn.s32     d7, q12, #idct_stg2_shift
+    vqrshrn.s32     d3, q14, #idct_stg2_shift
+    vqrshrn.s32     d8, q11, #idct_stg2_shift
+
+
+
+
+
+
+
+
+
+
+    vmull.s16       q12, d14, d0[1]     @// y1 * cos1(part of b0)
+
+    vmull.s16       q13, d14, d0[3]     @// y1 * cos3(part of b1)
+    vmull.s16       q14, d14, d1[1]     @// y1 * sin3(part of b2)
+    vmull.s16       q15, d14, d1[3]     @// y1 * sin1(part of b3)
+
+    vmlal.s16       q12, d15, d0[3]     @// y1 * cos1 + y3 * cos3(part of b0)
+    vtrn.16         d2, d3
+    vmlsl.s16       q13, d15, d1[3]     @// y1 * cos3 - y3 * sin1(part of b1)
+    vtrn.16         d4, d5
+    vmlsl.s16       q14, d15, d0[1]     @// y1 * sin3 - y3 * cos1(part of b2)
+    vtrn.16         d6, d7
+    vmlsl.s16       q15, d15, d1[1]     @// y1 * sin1 - y3 * sin3(part of b3)
+    vtrn.16         d8, d9
+    vmull.s16       q10, d10, d0[0]     @// y0 * cos4(part of c0 and c1)
+    vtrn.32         d2, d4
+
+    vtrn.32         d3, d5
+    vmull.s16       q9, d11, d1[2]      @// y2 * sin2 (Q7 is freed by this time)(part of d1)
+    vtrn.32         d6, d8
+    vmull.s16       q7, d11, d0[2]      @// y2 * cos2(part of d0)
+    vtrn.32         d7, d9
+
+
+    add             r4, r2, r8, lsl #1  @ r4 = r2 + pred_strd * 2    => r4 points to 3rd row of pred data
+
+
+    add             r5, r8, r8, lsl #1  @
+
+
+    add             r0, r3, r7, lsl #1  @ r0 points to 3rd row of dest data
+
+
+    add             r10, r7, r7, lsl #1 @
+
+
+    vswp            d3, d6
+
+
+    vswp            d5, d8
+
+
+    vsub.s32        q11, q10, q7        @// a3 = c0 - d0(part of r3,r4)
+    vadd.s32        q6, q10, q7         @// a0 = c0 + d0(part of r0,r7)
+
+
+    vadd.s32        q0, q6, q12
+
+
+    vsub.s32        q12, q6, q12
+
+
+    vadd.s32        q6, q11, q15
+
+
+    vsub.s32        q7, q11, q15
+
+    vqrshrn.s32     d10, q0, #idct_stg2_shift
+    vqrshrn.s32     d17, q12, #idct_stg2_shift
+    vqrshrn.s32     d13, q6, #idct_stg2_shift
+    vqrshrn.s32     d14, q7, #idct_stg2_shift
+
+    vsub.s32        q11, q10, q9        @// a2 = c1 - d1(part of r2,r5)
+    vadd.s32        q9, q10, q9         @// a1 = c1 + d1(part of r1,r6)
+
+
+    vadd.s32        q0, q11, q14
+
+
+    vsub.s32        q12, q11, q14
+
+
+    vadd.s32        q14, q9, q13
+
+
+    vsub.s32        q13, q9, q13
+    vld1.8          d18, [r2], r8
+
+    vqrshrn.s32     d12, q0, #idct_stg2_shift
+    vld1.8          d20, [r2], r5
+
+
+    vqrshrn.s32     d15, q12, #idct_stg2_shift
+    vld1.8          d19, [r2], r8
+
+
+
+
+    vqrshrn.s32     d11, q14, #idct_stg2_shift
+    vld1.8          d22, [r4], r8
+
+
+
+
+    vqrshrn.s32     d16, q13, #idct_stg2_shift
+    vld1.8          d21, [r2], r5
+
+
+    b               pred_buff_addition
+end_skip_last4cols:
+
+    ldr             r14, gai2_impeg2_idct_first_col_q11_addr2
+fcq11_lbl2:
+    add             r14, r14, pc
+    vld1.16         {d0, d1}, [r14]     @//D0,D1 are used for storing the constant data
+
+
+@/* Now the Idct of columns is done, transpose so that row idct done efficiently(step5) */
+    vtrn.16         q1, q3              @//[r3,r1],[r2,r0] first qudrant transposing
+    vtrn.16         q2, q4              @//[r3,r1],[r2,r0] second qudrant transposing
+    vtrn.16         q5, q7              @//[r7,r5],[r6,r4] third qudrant transposing
+    vtrn.16         q6, q8              @//[r7,r5],[r6,r4] fourth qudrant transposing
+
+    vtrn.32         d6, d7              @//r0,r1,r2,r3 first qudrant transposing continued.....
+    vtrn.32         d2, d3              @//r0,r1,r2,r3 first qudrant transposing continued.....
+    vtrn.32         d4, d5              @//r0,r1,r2,r3 second qudrant transposing continued.....
+    vtrn.32         d8, d9              @//r0,r1,r2,r3 second qudrant transposing continued.....
+    vtrn.32         d10, d11            @//r4,r5,r6,r7 third qudrant transposing continued.....
+    vtrn.32         d14, d15            @//r4,r5,r6,r7 third qudrant transposing continued.....
+    vtrn.32         d12, d13            @//r4,r5,r6,r7 fourth qudrant transposing continued.....
+    vtrn.32         d16, d17            @//r4,r5,r6,r7 fourth qudrant transposing continued.....
+
+    @//step6 Operate on first four rows and find their idct
+    @//Register Usage Reference     - storing and IDCT of rows
+@// Cosine Constants    -   D0
+@// Sine Constants      -   D1
+@// Element 0 First four    -   D2      -   y0
+@// Element 1 First four    -   D6      -   y1
+@// Element 2 First four    -   D3      -   y2
+@// Element 3 First four    -   D7      -   y3
+@// Element 4 First four    -   D4      -   y4
+@// Element 5 First four    -   D8      -   y5
+@// Element 6 First four    -   D5      -   y6
+@// Element 7 First four    -   D9      -   y7
+@// Element 0 Second four   -   D10     -   y0
+@// Element 1 Second four   -   D14     -   y1
+@// Element 2 Second four   -   D11     -   y2
+@// Element 3 Second four   -   D15     -   y3
+@// Element 4 Second four   -   D12     -   y4
+@// Element 5 Second four   -   D16     -   y5
+@// Element 6 Second four   -   D13     -   y6
+@// Element 7 Second four   -   D17     -   y7
+
+    @// Map between first kernel code seq and current
+@//     D2  ->  D2
+@//     D6  ->  D6
+@//     D3  ->  D3
+@//     D7  ->  D7
+@//     D10 ->  D4
+@//     D14 ->  D8
+@//     D11 ->  D5
+@//     D15 ->  D9
+@//     Q3  ->  Q3
+@//     Q5  ->  Q2
+@//     Q7  ->  Q4
+
+    vmull.s16       q12, d6, d0[1]      @// y1 * cos1(part of b0)
+    vmull.s16       q13, d6, d0[3]      @// y1 * cos3(part of b1)
+    vmull.s16       q14, d6, d1[1]      @// y1 * sin3(part of b2)
+    vmull.s16       q15, d6, d1[3]      @// y1 * sin1(part of b3)
+
+    vmlal.s16       q12, d7, d0[3]      @// y1 * cos1 + y3 * cos3(part of b0)
+    vmlsl.s16       q13, d7, d1[3]      @// y1 * cos3 - y3 * sin1(part of b1)
+    vmlsl.s16       q14, d7, d0[1]      @// y1 * sin3 - y3 * cos1(part of b2)
+    vmlsl.s16       q15, d7, d1[1]      @// y1 * sin1 - y3 * sin3(part of b3)
+
+    vmull.s16       q10, d2, d0[0]      @// y0 * cos4(part of c0 and c1)
+    vmull.s16       q11, d4, d0[0]      @// y4 * cos4(part of c0 and c1)
+
+    vmull.s16       q9, d3, d1[2]       @// y2 * sin2 (Q3 is freed by this time)(part of d1)
+    vmull.s16       q3, d3, d0[2]       @// y2 * cos2(part of d0)
+
+
+    vmlal.s16       q12, d8, d1[1]      @// y1 * cos1 + y3 * cos3 + y5 * sin3(part of b0)
+    vmlsl.s16       q13, d8, d0[1]      @// y1 * cos3 - y3 * sin1 - y5 * cos1(part of b1)
+    vmlal.s16       q14, d8, d1[3]      @// y1 * sin3 - y3 * cos1 + y5 * sin1(part of b2)
+    vmlal.s16       q15, d8, d0[3]      @// y1 * sin1 - y3 * sin3 + y5 * cos3(part of b3)
+
+    vmlsl.s16       q9, d5, d0[2]       @// d1 = y2 * sin2 - y6 * cos2(part of a0 and a1)
+    vmlal.s16       q3, d5, d1[2]       @// d0 = y2 * cos2 + y6 * sin2(part of a0 and a1)
+
+    vadd.s32        q1, q10, q11        @// c0 = y0 * cos4 + y4 * cos4(part of a0 and a1)
+    vsub.s32        q10, q10, q11       @// c1 = y0 * cos4 - y4 * cos4(part of a0 and a1)
+
+    vmlal.s16       q12, d9, d1[3]      @// b0 = y1 * cos1 + y3 * cos3 + y5 * sin3 + y7 * sin1(part of r0,r7)
+    vmlsl.s16       q13, d9, d1[1]      @// b1 = y1 * cos3 - y3 * sin1 - y5 * cos1 - y7 * sin3(part of r1,r6)
+    vmlal.s16       q14, d9, d0[3]      @// b2 = y1 * sin3 - y3 * cos1 + y5 * sin1 + y7 * cos3(part of r2,r5)
+    vmlsl.s16       q15, d9, d0[1]      @// b3 = y1 * sin1 - y3 * sin3 + y5 * cos3 - y7 * cos1(part of r3,r4)
+
+    vsub.s32        q11, q1, q3         @// a3 = c0 - d0(part of r3,r4)
+    vadd.s32        q2, q1, q3          @// a0 = c0 + d0(part of r0,r7)
+
+
+    vadd.s32        q1, q2, q12
+
+    vsub.s32        q3, q2, q12
+
+    vadd.s32        q4, q11, q15
+
+    vsub.s32        q12, q11, q15
+
+    vqrshrn.s32     d5, q4, #idct_stg2_shift
+    vqrshrn.s32     d2, q1, #idct_stg2_shift
+    vqrshrn.s32     d9, q3, #idct_stg2_shift
+    vqrshrn.s32     d6, q12, #idct_stg2_shift
+
+    vsub.s32        q11, q10, q9        @// a2 = c1 - d1(part of r2,r5)
+    vadd.s32        q9, q10, q9         @// a1 = c1 + d1(part of r1,r6)
+
+
+    vadd.s32        q15, q11, q14
+
+    vsub.s32        q12, q11, q14
+
+    vadd.s32        q14, q9, q13
+
+    vsub.s32        q11, q9, q13
+    vqrshrn.s32     d4, q15, #idct_stg2_shift
+    vqrshrn.s32     d7, q12, #idct_stg2_shift
+    vqrshrn.s32     d3, q14, #idct_stg2_shift
+    vqrshrn.s32     d8, q11, #idct_stg2_shift
+
+
+
+
+
+
+
+
+
+
+    vmull.s16       q12, d14, d0[1]     @// y1 * cos1(part of b0)
+
+    vmull.s16       q13, d14, d0[3]     @// y1 * cos3(part of b1)
+    vmull.s16       q14, d14, d1[1]     @// y1 * sin3(part of b2)
+    vmull.s16       q15, d14, d1[3]     @// y1 * sin1(part of b3)
+
+    vmlal.s16       q12, d15, d0[3]     @// y1 * cos1 + y3 * cos3(part of b0)
+    vtrn.16         d2, d3
+    vmlsl.s16       q13, d15, d1[3]     @// y1 * cos3 - y3 * sin1(part of b1)
+    vtrn.16         d4, d5
+    vmlsl.s16       q14, d15, d0[1]     @// y1 * sin3 - y3 * cos1(part of b2)
+    vtrn.16         d6, d7
+    vmlsl.s16       q15, d15, d1[1]     @// y1 * sin1 - y3 * sin3(part of b3)
+    vtrn.16         d8, d9
+    vmull.s16       q10, d10, d0[0]     @// y0 * cos4(part of c0 and c1)
+    vtrn.32         d2, d4
+    vmull.s16       q11, d12, d0[0]     @// y4 * cos4(part of c0 and c1)
+    vtrn.32         d3, d5
+    vmull.s16       q9, d11, d1[2]      @// y2 * sin2 (Q7 is freed by this time)(part of d1)
+    vtrn.32         d6, d8
+    vmull.s16       q7, d11, d0[2]      @// y2 * cos2(part of d0)
+    vtrn.32         d7, d9
+    vmlal.s16       q12, d16, d1[1]     @// y1 * cos1 + y3 * cos3 + y5 * sin3(part of b0)
+
+    add             r4, r2, r8, lsl #1  @ r4 = r2 + pred_strd * 2    => r4 points to 3rd row of pred data
+    vmlsl.s16       q13, d16, d0[1]     @// y1 * cos3 - y3 * sin1 - y5 * cos1(part of b1)
+
+    add             r5, r8, r8, lsl #1  @
+    vmlal.s16       q14, d16, d1[3]     @// y1 * sin3 - y3 * cos1 + y5 * sin1(part of b2)
+
+    add             r0, r3, r7, lsl #1  @ r0 points to 3rd row of dest data
+    vmlal.s16       q15, d16, d0[3]     @// y1 * sin1 - y3 * sin3 + y5 * cos3(part of b3)
+
+    add             r10, r7, r7, lsl #1 @
+    vmlsl.s16       q9, d13, d0[2]      @// d1 = y2 * sin2 - y6 * cos2(part of a0 and a1)
+
+
+    vmlal.s16       q7, d13, d1[2]      @// d0 = y2 * cos2 + y6 * sin2(part of a0 and a1)
+
+    vadd.s32        q6, q10, q11        @// c0 = y0 * cos4 + y4 * cos4(part of a0 and a1)
+    vsub.s32        q10, q10, q11       @// c1 = y0 * cos4 - y4 * cos4(part of a0 and a1)
+
+    vmlal.s16       q12, d17, d1[3]     @// b0 = y1 * cos1 + y3 * cos3 + y5 * sin3 + y7 * sin1(part of r0,r7)
+    vswp            d3, d6
+    vmlsl.s16       q13, d17, d1[1]     @// b1 = y1 * cos3 - y3 * sin1 - y5 * cos1 - y7 * sin3(part of r1,r6)
+
+    vswp            d5, d8
+    vmlal.s16       q14, d17, d0[3]     @// b2 = y1 * sin3 - y3 * cos1 + y5 * sin1 + y7 * cos3(part of r2,r5)
+    vmlsl.s16       q15, d17, d0[1]     @// b3 = y1 * sin1 - y3 * sin3 + y5 * cos3 - y7 * cos1(part of r3,r4)
+
+    vsub.s32        q11, q6, q7         @// a3 = c0 - d0(part of r3,r4)
+    vadd.s32        q6, q6, q7          @// a0 = c0 + d0(part of r0,r7)
+
+
+    vadd.s32        q0, q6, q12
+
+
+    vsub.s32        q12, q6, q12
+
+
+    vadd.s32        q6, q11, q15
+
+
+    vsub.s32        q7, q11, q15
+
+    vqrshrn.s32     d10, q0, #idct_stg2_shift
+    vqrshrn.s32     d17, q12, #idct_stg2_shift
+    vqrshrn.s32     d13, q6, #idct_stg2_shift
+    vqrshrn.s32     d14, q7, #idct_stg2_shift
+
+    vsub.s32        q11, q10, q9        @// a2 = c1 - d1(part of r2,r5)
+    vadd.s32        q9, q10, q9         @// a1 = c1 + d1(part of r1,r6)
+
+
+    vadd.s32        q0, q11, q14
+
+
+    vsub.s32        q12, q11, q14
+
+
+    vadd.s32        q14, q9, q13
+
+
+    vsub.s32        q13, q9, q13
+    vld1.8          d18, [r2], r8
+
+    vqrshrn.s32     d12, q0, #idct_stg2_shift
+    vld1.8          d20, [r2], r5
+
+
+    vqrshrn.s32     d15, q12, #idct_stg2_shift
+    vld1.8          d19, [r2], r8
+
+
+
+
+    vqrshrn.s32     d11, q14, #idct_stg2_shift
+    vld1.8          d22, [r4], r8
+
+
+
+
+    vqrshrn.s32     d16, q13, #idct_stg2_shift
+    vld1.8          d21, [r2], r5
+
+
+
+
+pred_buff_addition:
+
+
+    vtrn.16         d10, d11
+    vld1.8          d24, [r4], r5
+
+    vtrn.16         d12, d13
+    vld1.8          d23, [r4], r8
+
+    vaddw.u8        q1, q1, d18
+    vld1.8          d25, [r4], r5
+
+    vtrn.16         d14, d15
+    vaddw.u8        q2, q2, d22
+
+    vtrn.16         d16, d17
+    vaddw.u8        q3, q3, d20
+
+    vtrn.32         d10, d12
+    vaddw.u8        q4, q4, d24
+
+    vtrn.32         d11, d13
+    vtrn.32         d14, d16
+    vtrn.32         d15, d17
+
+    vswp            d11, d14
+    vswp            d13, d16
+
+@ Row values stored in the q register.
+
+@Q1 :r0
+@Q3: r1
+@Q2: r2
+@Q4: r3
+@Q5: r4
+@Q7: r5
+@Q6: r6
+@Q8: r7
+
+
+
+@/// Adding the prediction buffer
+
+
+
+
+
+
+
+
+
+    @ Load prediction data
+
+
+
+
+
+    @Adding recon with prediction
+
+
+
+
+
+    vaddw.u8        q5, q5, d19
+    vqmovun.s16     d2, q1
+    vaddw.u8        q7, q7, d21
+    vqmovun.s16     d4, q2
+    vaddw.u8        q6, q6, d23
+    vqmovun.s16     d6, q3
+    vaddw.u8        q8, q8, d25
+    vqmovun.s16     d8, q4
+
+
+
+
+
+
+
+    vst1.8          {d2}, [r3], r7
+    vqmovun.s16     d10, q5
+    vst1.8          {d6}, [r3], r10
+    vqmovun.s16     d14, q7
+    vst1.8          {d4}, [r0], r7
+    vqmovun.s16     d12, q6
+    vst1.8          {d8}, [r0], r10
+    vqmovun.s16     d16, q8
+
+
+
+
+
+
+
+    vst1.8          {d10}, [r3], r7
+    vst1.8          {d14}, [r3], r10
+    vst1.8          {d12}, [r0], r7
+    vst1.8          {d16}, [r0], r10
+
+
+
+
+    sub             sp, sp, #40
+    ldmfd           sp!, {r4-r12, pc}
+
+
+

diff --git a/common/arm/impeg2_inter_pred.s b/common/arm/impeg2_inter_pred.s
new file mode 100644
index 0000000..f1b3dde
--- /dev/null
+++ b/common/arm/impeg2_inter_pred.s

@@ -0,0 +1,801 @@
+@/******************************************************************************
+@ *
+@ * Copyright (C) 2015 The Android Open Source Project
+@ *
+@ * Licensed under the Apache License, Version 2.0 (the "License");
+@ * you may not use this file except in compliance with the License.
+@ * You may obtain a copy of the License at:
+@ *
+@ * http://www.apache.org/licenses/LICENSE-2.0
+@ *
+@ * Unless required by applicable law or agreed to in writing, software
+@ * distributed under the License is distributed on an "AS IS" BASIS,
+@ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+@ * See the License for the specific language governing permissions and
+@ * limitations under the License.
+@ *
+@ *****************************************************************************
+@ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+@*/
+
+@/*
+@//----------------------------------------------------------------------------
+@// File Name            : impeg2_inter_pred.s
+@//
+@// Description          : This file has motion compensation related
+@//                        interpolation functions on Neon + CortexA-8 platform
+@//
+@// Reference Document   :
+@//
+@// Revision History     :
+@//      Date            Author                  Detail Description
+@//   ------------    ----------------    ----------------------------------
+@//   18 jun 2010     S Hamsalekha              Created
+@//
+@//-------------------------------------------------------------------------
+@*/
+
+@/*
+@// ----------------------------------------------------------------------------
+@// Include Files
+@// ----------------------------------------------------------------------------
+@*/
+.text
+.p2align 2
+
+
+@/*
+@// ----------------------------------------------------------------------------
+@// Struct/Union Types and Define
+@// ----------------------------------------------------------------------------
+@*/
+
+
+@/*
+@// ----------------------------------------------------------------------------
+@// Static Global Data section variables
+@// ----------------------------------------------------------------------------
+@*/
+@// -------------------------- NONE --------------------------------------------
+
+
+@/*
+@// ----------------------------------------------------------------------------
+@// Static Prototype Functions
+@// ----------------------------------------------------------------------------
+@*/
+@// -------------------------- NONE --------------------------------------------
+
+@/*
+@// ----------------------------------------------------------------------------
+@// Exported functions
+@// ----------------------------------------------------------------------------
+@*/
+
+@//---------------------------------------------------------------------------
+@// Function Name      :   impeg2_copy_mb_a9q()
+@//
+@// Detail Description : Copies one MB worth of data from src to the dst
+@//
+@// Inputs             : r0 - pointer to src
+@//                      r1 - pointer to dst
+@//                      r2 - source width
+@//                      r3 - destination width
+@// Registers Used     : r4, r5, d0, d1
+@//
+@// Stack Usage        : 12 bytes
+@//
+@// Outputs            :
+@//
+@// Return Data        : None
+@//
+@// Programming Note   : <program limitation>
+@//-----------------------------------------------------------------------------
+@*/
+
+
+
+        .global impeg2_copy_mb_a9q
+
+
+impeg2_copy_mb_a9q:
+
+    stmfd           r13!, {r4, r5, r14}
+
+
+    ldr             r4, [r0]            @src->y
+    ldr             r5, [r1]            @dst->y
+    @Read one row of data from the src
+    vld1.8          {d0, d1}, [r4], r2  @Load and increment src
+    vst1.8          {d0, d1}, [r5], r3  @Store and increment dst
+
+    @//Repeat 15 times for y
+    vld1.8          {d0, d1}, [r4], r2  @Load and increment src
+    vst1.8          {d0, d1}, [r5], r3  @Store and increment dst
+    vld1.8          {d0, d1}, [r4], r2  @Load and increment src
+    vst1.8          {d0, d1}, [r5], r3  @Store and increment dst
+    vld1.8          {d0, d1}, [r4], r2  @Load and increment src
+    vst1.8          {d0, d1}, [r5], r3  @Store and increment dst
+    vld1.8          {d0, d1}, [r4], r2  @Load and increment src
+    vst1.8          {d0, d1}, [r5], r3  @Store and increment dst
+    vld1.8          {d0, d1}, [r4], r2  @Load and increment src
+    vst1.8          {d0, d1}, [r5], r3  @Store and increment dst
+    vld1.8          {d0, d1}, [r4], r2  @Load and increment src
+    vst1.8          {d0, d1}, [r5], r3  @Store and increment dst
+    vld1.8          {d0, d1}, [r4], r2  @Load and increment src
+    vst1.8          {d0, d1}, [r5], r3  @Store and increment dst
+    vld1.8          {d0, d1}, [r4], r2  @Load and increment src
+    vst1.8          {d0, d1}, [r5], r3  @Store and increment dst
+    vld1.8          {d0, d1}, [r4], r2  @Load and increment src
+    vst1.8          {d0, d1}, [r5], r3  @Store and increment dst
+    vld1.8          {d0, d1}, [r4], r2  @Load and increment src
+    vst1.8          {d0, d1}, [r5], r3  @Store and increment dst
+    vld1.8          {d0, d1}, [r4], r2  @Load and increment src
+    vst1.8          {d0, d1}, [r5], r3  @Store and increment dst
+    vld1.8          {d0, d1}, [r4], r2  @Load and increment src
+    vst1.8          {d0, d1}, [r5], r3  @Store and increment dst
+    vld1.8          {d0, d1}, [r4], r2  @Load and increment src
+    vst1.8          {d0, d1}, [r5], r3  @Store and increment dst
+    vld1.8          {d0, d1}, [r4], r2  @Load and increment src
+    vst1.8          {d0, d1}, [r5], r3  @Store and increment dst
+    vld1.8          {d0, d1}, [r4], r2  @Load and increment src
+    vst1.8          {d0, d1}, [r5], r3  @Store and increment dst
+
+    mov             r2, r2, lsr #1      @src_offset /= 2
+    mov             r3, r3, lsr #1      @dst_offset /= 2
+
+    ldr             r4, [r0, #4]        @src->u
+    ldr             r5, [r1, #4]        @dst->u
+    @Read one row of data from the src
+    vld1.8          {d0}, [r4], r2      @Load and increment src
+    vst1.8          {d0}, [r5], r3      @Store and increment dst
+
+    @//Repeat 7 times for u
+    vld1.8          {d0}, [r4], r2      @Load and increment src
+    vst1.8          {d0}, [r5], r3      @Store and increment dst
+    vld1.8          {d0}, [r4], r2      @Load and increment src
+    vst1.8          {d0}, [r5], r3      @Store and increment dst
+    vld1.8          {d0}, [r4], r2      @Load and increment src
+    vst1.8          {d0}, [r5], r3      @Store and increment dst
+    vld1.8          {d0}, [r4], r2      @Load and increment src
+    vst1.8          {d0}, [r5], r3      @Store and increment dst
+    vld1.8          {d0}, [r4], r2      @Load and increment src
+    vst1.8          {d0}, [r5], r3      @Store and increment dst
+    vld1.8          {d0}, [r4], r2      @Load and increment src
+    vst1.8          {d0}, [r5], r3      @Store and increment dst
+    vld1.8          {d0}, [r4], r2      @Load and increment src
+    vst1.8          {d0}, [r5], r3      @Store and increment dst
+
+    ldr             r4, [r0, #8]        @src->v
+    ldr             r5, [r1, #8]        @dst->v
+    @Read one row of data from the src
+    vld1.8          {d0}, [r4], r2      @Load and increment src
+    vst1.8          {d0}, [r5], r3      @Store and increment dst
+
+    @//Repeat 7 times for v
+    vld1.8          {d0}, [r4], r2      @Load and increment src
+    vst1.8          {d0}, [r5], r3      @Store and increment dst
+    vld1.8          {d0}, [r4], r2      @Load and increment src
+    vst1.8          {d0}, [r5], r3      @Store and increment dst
+    vld1.8          {d0}, [r4], r2      @Load and increment src
+    vst1.8          {d0}, [r5], r3      @Store and increment dst
+    vld1.8          {d0}, [r4], r2      @Load and increment src
+    vst1.8          {d0}, [r5], r3      @Store and increment dst
+    vld1.8          {d0}, [r4], r2      @Load and increment src
+    vst1.8          {d0}, [r5], r3      @Store and increment dst
+    vld1.8          {d0}, [r4], r2      @Load and increment src
+    vst1.8          {d0}, [r5], r3      @Store and increment dst
+    vld1.8          {d0}, [r4], r2      @Load and increment src
+    vst1.8          {d0}, [r5], r3      @Store and increment dst
+
+    ldmfd           r13!, {r4, r5, pc}
+
+
+
+
+@/*
+@//---------------------------------------------------------------------------
+@// Function Name      :   impeg2_mc_fullx_halfy_8x8_a9q()
+@//
+@// Detail Description : This function pastes the reference block in the
+@//                      current frame buffer.This function is called for
+@//                      blocks that are not coded and have motion vectors
+@//                      with a half pel resolution.
+@//
+@// Inputs             : r0 - out    : Current Block Pointer
+@//                      r1 - ref     : Refernce Block Pointer
+@//                      r2 - ref_wid   : Refernce Block Width
+@//                      r3 - out_wid   ; Current Block Width
+@//
+@// Registers Used     : D0-D9
+@//
+@// Stack Usage        : 4 bytes
+@//
+@// Outputs            : The Motion Compensated Block
+@//
+@// Return Data        : None
+@//
+@// Programming Note   : <program limitation>
+@//-----------------------------------------------------------------------------
+@*/
+
+        .global impeg2_mc_fullx_halfy_8x8_a9q
+
+impeg2_mc_fullx_halfy_8x8_a9q:
+
+    stmfd           r13!, {r14}
+    add             r14, r1, r2
+    mov             r2, r2, lsl #1
+
+@/* Load 8 + 1 rows from reference block */
+@/* Do the addition with out rounding off as rounding value is 1 */
+    vld1.8          {d0}, [r1], r2      @// first row hence r1 = D0
+    vld1.8          {d2}, [r14], r2     @// second row hence r2 = D2
+    vld1.8          {d4}, [r1], r2      @// third row hence r3 = D4
+    vld1.8          {d6}, [r14], r2     @// fourth row hence r4 = D6
+    vld1.8          {d1}, [r1], r2      @// fifth row hence r5 = D1
+    vld1.8          {d3}, [r14], r2     @// sixth row hence r6 = D3
+    vrhadd.u8       d9, d1, d6          @// estimated row 4 = D9
+    vld1.8          {d5}, [r1], r2      @// seventh row hence r7 = D5
+    vrhadd.u8       q0, q0, q1          @// estimated row 1 = D0, row 5 = D1
+    vld1.8          {d7}, [r14], r2     @// eighth row hence r8 = D7
+    vrhadd.u8       q1, q1, q2          @// estimated row 2 = D2, row 6 = D3
+    vld1.8          {d8}, [r1], r2      @// ninth row hence r9 = D8
+    vrhadd.u8       q2, q2, q3          @// estimated row 3 = D4, row 7 = D5
+
+    add             r14, r0, r3
+    mov             r3, r3, lsl #1
+
+@/* Store the eight rows calculated above */
+    vst1.8          {d2}, [r14], r3     @// second row hence D2
+    vrhadd.u8       d7, d7, d8          @// estimated row 8 = D7
+    vst1.8          {d0}, [r0], r3      @// first row hence D0
+    vst1.8          {d9}, [r14], r3     @// fourth row hence D9
+    vst1.8          {d4}, [r0], r3      @// third row hence D4
+    vst1.8          {d3}, [r14], r3     @// sixth row hence r6 = D3
+    vst1.8          {d1}, [r0], r3      @// fifth row hence r5 = D1
+    vst1.8          {d7}, [r14], r3     @// eighth row hence r8 = D7
+    vst1.8          {d5}, [r0], r3      @// seventh row hence r7 = D5
+
+    ldmfd           sp!, {pc}
+
+
+
+
+
+
+@/*
+@//---------------------------------------------------------------------------
+@// Function Name      :   impeg2_mc_halfx_fully_8x8_a9q()
+@//
+@// Detail Description : This function pastes the reference block in the
+@//                      current frame buffer.This function is called for
+@//                      blocks that are not coded and have motion vectors
+@//                      with a half pel resolutionand VopRoundingType is 0 ..
+@//
+@// Inputs             : r0 - out    : Current Block Pointer
+@//                      r1 - ref     : Refernce Block Pointer
+@//                      r2 - ref_wid   : Refernce Block Width
+@//                      r3 - out_wid   ; Current Block Width
+@//
+@// Registers Used     : r12, r14, d0-d10, d12-d14, d16-d18, d20-d22
+
+@//
+@// Stack Usage        : 8 bytes
+@//
+@// Outputs            : The Motion Compensated Block
+@//
+@// Return Data        : None
+@//
+@// Programming Note   : <program limitation>
+@//-----------------------------------------------------------------------------
+@*/
+
+
+
+        .global impeg2_mc_halfx_fully_8x8_a9q
+
+
+
+impeg2_mc_halfx_fully_8x8_a9q:
+
+    stmfd           sp!, {r12, lr}
+
+    add             r14, r1, r2, lsl #2
+
+    add             r12, r0, r3, lsl#2
+
+    vld1.8          {d0, d1}, [r1], r2  @load 16 pixels of  row1
+
+    vld1.8          {d2, d3}, [r14], r2 @ row5
+
+
+    vld1.8          {d4, d5}, [r1], r2  @load 16 pixels row2
+
+    vld1.8          {d6, d7}, [r14], r2 @row6
+
+
+    vext.8          d8, d0, d1, #1      @Extract pixels (1-8) of row1
+
+    vext.8          d12, d2, d3, #1     @Extract pixels (1-8) of row5
+
+    vext.8          d16, d4, d5, #1     @Extract pixels (1-8) of row2
+
+    vext.8          d20, d6, d7, #1     @Extract pixels (1-8) of row6
+
+
+    vld1.8          {d9, d10}, [r1], r2 @load row3
+
+    vld1.8          {d13, d14}, [r14], r2 @load row7
+
+    vld1.8          {d17, d18}, [r1], r2 @load  row4
+
+    vld1.8          {d21, d22}, [r14], r2 @load  row8
+
+
+    vext.8          d1, d9, d10, #1     @Extract pixels (1-8) of row3
+
+    vext.8          d3, d13, d14, #1    @Extract pixels (1-8) of row7
+
+
+
+    vext.8          d5, d17, d18, #1    @Extract pixels (1-8) of row4
+
+    vext.8          d7, d21, d22, #1    @Extract pixels (1-8) of row8
+
+
+    vrhadd.u8       q0, q0, q4          @operate on row1 and row3
+
+    vrhadd.u8       q1, q1, q6          @operate on row5 and row7
+
+
+    vrhadd.u8       q2, q2, q8          @operate on row2 and row4
+
+
+
+    vrhadd.u8       q3, q3, q10         @operate on row6 and row8
+
+    vst1.8          d0, [r0], r3        @store row1
+
+    vst1.8          d2, [r12], r3       @store row5
+
+    vst1.8          d4, [r0], r3        @store row2
+
+    vst1.8          d6, [r12], r3       @store row6
+
+    vst1.8          d1, [r0], r3        @store row3
+
+    vst1.8          d3, [r12], r3       @store row7
+
+    vst1.8          d5, [r0], r3        @store row4
+
+    vst1.8          d7, [r12], r3       @store row8
+
+
+
+    ldmfd           sp!, {r12, pc}
+
+
+
+
+
+
+
+
+@/*
+@//---------------------------------------------------------------------------
+@// Function Name      :   impeg2_mc_halfx_halfy_8x8_a9q()
+@//
+@// Detail Description : This function pastes the reference block in the
+@//                      current frame buffer.This function is called for
+@//                      blocks that are not coded and have motion vectors
+@//                      with a half pel resolutionand VopRoundingType is 0 ..
+@//
+@// Inputs             : r0 - out    : Current Block Pointer
+@//                      r1 - ref     : Refernce Block Pointer
+@//                      r2 - ref_wid   : Refernce Block Width
+@//                      r3 - out_wid   ; Current Block Width
+@//
+@// Registers Used     : r14, q0-q15
+
+@//
+@// Stack Usage        : 4 bytes
+@//
+@// Outputs            : The Motion Compensated Block
+@//
+@// Return Data        : None
+@//
+@// Programming Note   : <program limitation>
+@//-----------------------------------------------------------------------------
+@*/
+
+
+        .global impeg2_mc_halfx_halfy_8x8_a9q
+
+impeg2_mc_halfx_halfy_8x8_a9q:
+
+    stmfd           sp!, {r14}
+
+    add             r14, r1, r2, lsl #2
+
+    vld1.8          {d0, d1}, [r1], r2  @load 16 pixels of  row1
+
+    vld1.8          {d2, d3}, [r14], r2 @ row5
+
+    vld1.8          {d4, d5}, [r1], r2  @load 16 pixels row2
+
+    vld1.8          {d6, d7}, [r14], r2 @row6
+
+    vext.8          d1, d0, d1, #1      @Extract pixels (1-8) of row1
+
+
+
+    vext.8          d3, d2, d3, #1      @Extract pixels (1-8) of row5
+
+
+
+    vext.8          d5, d4, d5, #1      @Extract pixels (1-8) of row2
+
+    vext.8          d7, d6, d7, #1      @Extract pixels (1-8) of row6
+
+
+
+
+    vld1.8          {d8, d9}, [r1], r2  @load row3
+
+
+
+    vld1.8          {d10, d11}, [r14], r2 @load row7
+
+    vld1.8          {d12, d13}, [r1], r2 @load  row4
+
+    vld1.8          {d14, d15}, [r14], r2 @load  row8
+
+    vext.8          d9, d8, d9, #1      @Extract pixels (1-8) of row3
+
+    vld1.8          {d16, d17}, [r14], r2 @load  row9
+
+
+
+
+
+    vext.8          d11, d10, d11, #1   @Extract pixels (1-8) of row7
+
+
+
+    vext.8          d13, d12, d13, #1   @Extract pixels (1-8) of row4
+
+
+
+    vext.8          d15, d14, d15, #1   @Extract pixels (1-8) of row8
+
+    vext.8          d17, d16, d17, #1   @Extract pixels (1-8) of row9
+
+
+    @interpolation in x direction
+
+    vaddl.u8        q0, d0, d1          @operate row1
+
+    vaddl.u8        q1, d2, d3          @operate row5
+
+    vaddl.u8        q2, d4, d5          @operate row2
+
+    vaddl.u8        q3, d6, d7          @operate row6
+
+    vaddl.u8        q4, d8, d9          @operate row3
+
+    vaddl.u8        q5, d10, d11        @operate row7
+
+    vaddl.u8        q6, d12, d13        @operate row4
+
+    vaddl.u8        q7, d14, d15        @operate row8
+
+    vaddl.u8        q8, d16, d17        @operate row9
+
+    @interpolation in y direction
+
+    add             r14, r0, r3, lsl #2
+
+
+
+    vadd.u16        q9, q0, q2          @operate row1 and row2
+
+    vadd.u16        q13, q1, q3         @operate row5 and row6
+
+    vadd.u16        q10, q2, q4         @operate row2 and row3
+
+    vadd.u16        q14, q3, q5         @operate row6 and row7
+
+    vrshrn.u16      d18, q9, #2         @row1
+
+    vrshrn.u16      d26, q13, #2        @row5
+
+    vrshrn.u16      d20, q10, #2        @row2
+
+    vrshrn.u16      d28, q14, #2        @row6
+
+    vadd.u16        q11, q4, q6         @operate row3 and row4
+
+    vst1.8          d18, [r0], r3       @store row1
+
+    vadd.u16        q15, q5, q7         @operate row7 and row8
+
+    vst1.8          d26, [r14], r3      @store row5
+
+    vadd.u16        q12, q6, q1         @operate row4 and row5
+
+    vst1.8          d20, [r0], r3       @store row2
+
+    vadd.u16        q7, q7, q8          @operate row8 and row9
+
+    vst1.8          d28, [r14], r3      @store row6
+
+
+
+    vrshrn.u16      d22, q11, #2        @row3
+
+    vrshrn.u16      d30, q15, #2        @row7
+
+    vrshrn.u16      d24, q12, #2        @row4
+
+    vrshrn.u16      d14, q7, #2         @row8
+
+
+    vst1.8          d22, [r0], r3       @store row3
+    vst1.8          d30, [r14], r3      @store row7
+    vst1.8          d24, [r0], r3       @store row4
+    vst1.8          d14, [r14], r3      @store row8
+
+
+
+    ldmfd           sp!, {pc}
+
+
+
+
+
+@/*
+@//---------------------------------------------------------------------------
+@// Function Name      :   impeg2_mc_fullx_fully_8x8_a9q()
+@//
+@// Detail Description : This function pastes the reference block in the
+@//                      current frame buffer.This function is called for
+@//                      blocks that are not coded and have motion vectors
+@//                      with a half pel resolutionand ..
+@//
+@// Inputs             : r0 - out    : Current Block Pointer
+@//                      r1 - ref     : Refernce Block Pointer
+@//                      r2 - ref_wid   : Refernce Block Width
+@//                      r3 - out_wid   ; Current Block Width
+@//
+@// Registers Used     : r12, r14, d0-d3
+
+@//
+@// Stack Usage        : 8 bytes
+@//
+@// Outputs            : The Motion Compensated Block
+@//
+@// Return Data        : None
+@//
+@// Programming Note   : <program limitation>
+@//-----------------------------------------------------------------------------
+@*/
+
+
+        .global impeg2_mc_fullx_fully_8x8_a9q
+impeg2_mc_fullx_fully_8x8_a9q:
+
+
+    stmfd           sp!, {r12, lr}
+
+    add             r14, r1, r2, lsl #2
+
+    add             r12, r0, r3, lsl #2
+
+
+    vld1.8          d0, [r1], r2        @load row1
+
+    vld1.8          d1, [r14], r2       @load row4
+
+    vld1.8          d2, [r1], r2        @load row2
+
+    vld1.8          d3, [r14], r2       @load row5
+
+
+    vst1.8          d0, [r0], r3        @store row1
+
+    vst1.8          d1, [r12], r3       @store row4
+
+    vst1.8          d2, [r0], r3        @store row2
+
+    vst1.8          d3, [r12], r3       @store row5
+
+
+    vld1.8          d0, [r1], r2        @load row3
+
+    vld1.8          d1, [r14], r2       @load row6
+
+    vld1.8          d2, [r1], r2        @load row4
+
+    vld1.8          d3, [r14], r2       @load row8
+
+
+    vst1.8          d0, [r0], r3        @store row3
+
+    vst1.8          d1, [r12], r3       @store row6
+
+    vst1.8          d2, [r0], r3        @store row4
+
+    vst1.8          d3, [r12], r3       @store row8
+
+
+    ldmfd           sp!, {r12, pc}
+
+
+
+
+
+@/*
+@//---------------------------------------------------------------------------
+@// Function Name      :   impeg2_interpolate_a9q()
+@//
+@// Detail Description : interpolates two buffers and adds pred
+@//
+@// Inputs             : r0 - pointer to src1
+@//                      r1 - pointer to src2
+@//                      r2 - dest buf
+@//                      r3 - dst stride
+@// Registers Used     : r4, r5, r7, r14, d0-d15
+@//
+@// Stack Usage        : 20 bytes
+@//
+@// Outputs            : The Motion Compensated Block
+@//
+@// Return Data        : None
+@//
+@// Programming Note   : <program limitation>
+@//-----------------------------------------------------------------------------
+@*/
+
+
+        .global impeg2_interpolate_a9q
+
+
+impeg2_interpolate_a9q:
+
+    stmfd           r13!, {r4, r5, r7, r12, r14}
+
+    ldr             r4, [r0, #0]        @ptr_y src1
+
+    ldr             r5, [r1, #0]        @ptr_y src2
+
+    ldr             r7, [r2, #0]        @ptr_y dst buf
+
+    mov             r12, #4             @counter for number of blocks
+
+
+interp_lumablocks_stride:
+
+    vld1.8          {d0, d1}, [r4]!     @row1 src1
+
+    vld1.8          {d2, d3}, [r4]!     @row2 src1
+
+    vld1.8          {d4, d5}, [r4]!     @row3 src1
+
+    vld1.8          {d6, d7}, [r4]!     @row4 src1
+
+
+    vld1.8          {d8, d9}, [r5]!     @row1 src2
+
+    vld1.8          {d10, d11}, [r5]!   @row2 src2
+
+    vld1.8          {d12, d13}, [r5]!   @row3 src2
+
+    vld1.8          {d14, d15}, [r5]!   @row4 src2
+
+
+
+
+    vrhadd.u8       q0, q0, q4          @operate on row1
+
+    vrhadd.u8       q1, q1, q5          @operate on row2
+
+    vrhadd.u8       q2, q2, q6          @operate on row3
+
+    vrhadd.u8       q3, q3, q7          @operate on row4
+
+
+
+    vst1.8          {d0, d1}, [r7], r3  @row1
+
+    vst1.8          {d2, d3}, [r7], r3  @row2
+
+    vst1.8          {d4, d5}, [r7], r3  @row3
+
+    vst1.8          {d6, d7}, [r7], r3  @row4
+
+    subs            r12, r12, #1
+
+    bne             interp_lumablocks_stride
+
+
+    mov             r3, r3, lsr #1      @stride >> 1
+
+    ldr             r4, [r0, #4]        @ptr_u src1
+
+    ldr             r5, [r1, #4]        @ptr_u src2
+
+    ldr             r7 , [r2, #4]       @ptr_u dst buf
+
+    mov             r12, #2             @counter for number of blocks
+
+
+
+@chroma blocks
+
+interp_chromablocks_stride:
+
+    vld1.8          {d0, d1}, [r4]!     @row1 & 2 src1
+
+    vld1.8          {d2, d3}, [r4]!     @row3 & 4 src1
+
+    vld1.8          {d4, d5}, [r4]!     @row5 & 6 src1
+
+    vld1.8          {d6, d7}, [r4]!     @row7 & 8 src1
+
+
+    vld1.8          {d8, d9}, [r5]!     @row1 & 2 src2
+
+    vld1.8          {d10, d11}, [r5]!   @row3 & 4 src2
+
+    vld1.8          {d12, d13}, [r5]!   @row5 & 6 src2
+
+    vld1.8          {d14, d15}, [r5]!   @row7 & 8 src2
+
+
+
+
+    vrhadd.u8       q0, q0, q4          @operate on row1 & 2
+
+    vrhadd.u8       q1, q1, q5          @operate on row3 & 4
+
+    vrhadd.u8       q2, q2, q6          @operate on row5 & 6
+
+    vrhadd.u8       q3, q3, q7          @operate on row7 & 8
+
+
+    vst1.8          {d0}, [r7], r3      @row1
+
+    vst1.8          {d1}, [r7], r3      @row2
+
+    vst1.8          {d2}, [r7], r3      @row3
+
+    vst1.8          {d3}, [r7], r3      @row4
+
+    vst1.8          {d4}, [r7], r3      @row5
+
+    vst1.8          {d5}, [r7], r3      @row6
+
+    vst1.8          {d6}, [r7], r3      @row7
+
+    vst1.8          {d7}, [r7], r3      @row8
+
+
+
+    ldr             r4, [r0, #8]        @ptr_v src1
+
+    ldr             r5, [r1, #8]        @ptr_v src2
+
+    ldr             r7, [r2, #8]        @ptr_v dst buf
+
+    subs            r12, r12, #1
+
+    bne             interp_chromablocks_stride
+
+
+    ldmfd           r13!, {r4, r5, r7, r12, pc}
+
+
+
+
+

diff --git a/common/arm/impeg2_mem_func.s b/common/arm/impeg2_mem_func.s
new file mode 100755
index 0000000..869b7d7
--- /dev/null
+++ b/common/arm/impeg2_mem_func.s

@@ -0,0 +1,177 @@
+@/******************************************************************************
+@ *
+@ * Copyright (C) 2015 The Android Open Source Project
+@ *
+@ * Licensed under the Apache License, Version 2.0 (the "License");
+@ * you may not use this file except in compliance with the License.
+@ * You may obtain a copy of the License at:
+@ *
+@ * http://www.apache.org/licenses/LICENSE-2.0
+@ *
+@ * Unless required by applicable law or agreed to in writing, software
+@ * distributed under the License is distributed on an "AS IS" BASIS,
+@ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+@ * See the License for the specific language governing permissions and
+@ * limitations under the License.
+@ *
+@ *****************************************************************************
+@ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+@*/
+
+@/*
+@//----------------------------------------------------------------------------
+@// File Name            : impeg2_mem_func.s
+@//
+@// Description          : This file has motion compensation related
+@//                        interpolation functions on Neon + CortexA-8 platform
+@//
+@// Reference Document   :
+@//
+@// Revision History     :
+@//      Date            Author                  Detail Description
+@//   ------------    ----------------    ----------------------------------
+@//   18 jun 2010     S Hamsalekha              Created
+@//
+@//-------------------------------------------------------------------------
+@*/
+
+@/*
+@// ----------------------------------------------------------------------------
+@// Include Files
+@// ----------------------------------------------------------------------------
+@*/
+.text
+.p2align 2
+
+
+@/*
+@// ----------------------------------------------------------------------------
+@// Struct/Union Types and Define
+@// ----------------------------------------------------------------------------
+@*/
+
+
+@/*
+@// ----------------------------------------------------------------------------
+@// Static Global Data section variables
+@// ----------------------------------------------------------------------------
+@*/
+@// -------------------------- NONE --------------------------------------------
+
+
+@/*
+@// ----------------------------------------------------------------------------
+@// Static Prototype Functions
+@// ----------------------------------------------------------------------------
+@*/
+@// -------------------------- NONE --------------------------------------------
+
+@/*
+@// ----------------------------------------------------------------------------
+@// Exported functions
+@// ----------------------------------------------------------------------------
+@*/
+
+@/*
+@//---------------------------------------------------------------------------
+@// Function Name      : impeg2_memset_8bit_8x8_block_a9q()
+@//
+@// Detail Description : This routine intialises the Block matrix buffer contents to a
+@//                      particular Value. This function also assumes the buffer size
+@//                      to be set is 64 Bytes fixed. It also assumes that blk matrix
+@//                      used is 64 bit aligned.
+@//
+@// Inputs             : r0: pi2_blk_mat : Block Pointer
+@//                      r1: u2_val      : Value with which the block is initialized
+@//                      r2: u4_dst_width: Destination Width
+@//
+@// Registers Used     : q0
+@//
+@// Stack Usage        : 4 bytes
+@//
+@// Outputs            : Block Matrix Initialized to given value
+@//
+@// Return Data        : None
+@//
+@// Programming Note   : None
+@//-----------------------------------------------------------------------------
+@*/
+        .global impeg2_memset_8bit_8x8_block_a9q
+impeg2_memset_8bit_8x8_block_a9q:
+    str             lr, [sp, #-4]!
+
+    vdup.8          d0, r1              @//r1 is the 8-bit value to be set into
+
+    vst1.8          {d0}, [r0], r2      @//Store the row 1
+    vst1.8          {d0}, [r0], r2      @//Store the row 2
+    vst1.8          {d0}, [r0], r2      @//Store the row 3
+    vst1.8          {d0}, [r0], r2      @//Store the row 4
+    vst1.8          {d0}, [r0], r2      @//Store the row 5
+    vst1.8          {d0}, [r0], r2      @//Store the row 6
+    vst1.8          {d0}, [r0], r2      @//Store the row 7
+    vst1.8          {d0}, [r0], r2      @//Store the row 8
+
+    ldr             pc, [sp], #4
+
+
+
+
+
+
+
+@/*
+@//---------------------------------------------------------------------------
+@// Function Name      :   impeg2_memset0_16bit_8x8_linear_block_a9q()
+@//
+@// Detail Description : memsets 128 byte long linear buf to 0
+@//
+@// Inputs             : r0 - Buffer
+@// Registers Used     : q0
+
+@//
+@// Stack Usage        : 4 bytes
+@//
+@// Outputs            : None
+@//
+@// Return Data        : None
+@//
+@// Programming Note   : <program limitation>
+@//-----------------------------------------------------------------------------
+@*/
+
+
+
+        .global impeg2_memset0_16bit_8x8_linear_block_a9q
+
+
+impeg2_memset0_16bit_8x8_linear_block_a9q:
+
+    stmfd           r13!, {r14}
+
+    vmov.i16        q0, #0
+
+@Y data
+
+    vst1.16         {d0, d1} , [r0]!    @row1
+
+    vst1.16         {d0, d1} , [r0]!    @row2
+
+    vst1.16         {d0, d1} , [r0]!    @row3
+
+    vst1.16         {d0, d1} , [r0]!    @row4
+
+    vst1.16         {d0, d1} , [r0]!    @row5
+
+    vst1.16         {d0, d1} , [r0]!    @row6
+
+    vst1.16         {d0, d1} , [r0]!    @row7
+
+    vst1.16         {d0, d1} , [r0]!    @row8
+
+
+
+    ldmfd           r13!, {pc}
+
+
+
+

diff --git a/common/arm/impeg2_platform_macros.h b/common/arm/impeg2_platform_macros.h
new file mode 100644
index 0000000..11db302
--- /dev/null
+++ b/common/arm/impeg2_platform_macros.h

@@ -0,0 +1,75 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+#ifndef __IMPEG2_PLATFORM_MACROS_H__
+#define __IMPEG2_PLATFORM_MACROS_H__
+
+
+#define CONV_LE_TO_BE(u4_temp2,u4_temp1) u4_temp2 =                            \
+                                         (u4_temp1 << 24) |                    \
+                                         ((u4_temp1 & 0xff00) << 8) |          \
+                                         ((u4_temp1 & 0xff0000) >> 8) |        \
+                                         (u4_temp1 >> 24);
+
+static __inline  UWORD32 CLZ(UWORD32 u4_word)
+{
+    if(u4_word)
+        return (__builtin_clz(u4_word));
+    else
+        return 32;
+}
+static __inline WORD32 CLIP_U8(WORD32 x)
+{
+    asm("usat %0, #8, %1" : "=r"(x) : "r"(x));
+    return x;
+}
+
+static __inline WORD32 CLIP_S8(WORD32 x)
+{
+    asm("ssat %0, #8, %1" : "=r"(x) : "r"(x));
+    return x;
+}
+
+static __inline WORD32 CLIP_U12(WORD32 x)
+{
+    asm("usat %0, #12, %1" : "=r"(x) : "r"(x));
+    return x;
+}
+
+static __inline WORD32 CLIP_S12(WORD32 x)
+{
+    asm("ssat %0, #12, %1" : "=r"(x) : "r"(x));
+    return x;
+}
+
+static __inline WORD32 CLIP_U16(WORD32 x)
+{
+    asm("usat %0, #16, %1" : "=r"(x) : "r"(x));
+    return x;
+}
+static __inline WORD32 CLIP_S16(WORD32 x)
+{
+    asm("ssat %0, #16, %1" : "=r"(x) : "r"(x));
+    return x;
+}
+
+#define INLINE
+#define PLD(x) __pld(x)
+
+#endif /* __IMPEG2_PLATFORM_MACROS_H__ */

diff --git a/common/armv8/impeg2_format_conv.s b/common/armv8/impeg2_format_conv.s
new file mode 100644
index 0000000..48baf04
--- /dev/null
+++ b/common/armv8/impeg2_format_conv.s

@@ -0,0 +1,409 @@
+//******************************************************************************
+//*
+//* Copyright (C) 2015 The Android Open Source Project
+//*
+//* Licensed under the Apache License, Version 2.0 (the "License");
+//* you may not use this file except in compliance with the License.
+//* You may obtain a copy of the License at:
+//*
+//* http://www.apache.org/licenses/LICENSE-2.0
+//*
+//* Unless required by applicable law or agreed to in writing, software
+//* distributed under the License is distributed on an "AS IS" BASIS,
+//* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+//* See the License for the specific language governing permissions and
+//* limitations under the License.
+//*
+//*****************************************************************************
+//* Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+//*/
+
+///*
+////----------------------------------------------------------------------------
+//// File Name            : impeg2_format_conv.s
+////
+//// Description          : This file has the Idct Implementations for the
+////                        MPEG4 SP decoder on neon platform.
+////
+//// Reference Document   :
+////
+//// Revision History     :
+////      Date            Author                  Detail Description
+////   ------------    ----------------    ----------------------------------
+////   Jul 07, 2008     Naveen Kumar T                Created
+////
+////-------------------------------------------------------------------------
+//*/
+
+///*
+//// ----------------------------------------------------------------------------
+//// Include Files
+//// ----------------------------------------------------------------------------
+//*/
+.set log2_16                    ,      4
+.set log2_2                     ,      1
+
+.text
+.include "impeg2_neon_macros.s"
+///*
+//// ----------------------------------------------------------------------------
+//// Struct/Union Types and Define
+//// ----------------------------------------------------------------------------
+//*/
+
+///*
+//// ----------------------------------------------------------------------------
+//// Static Global Data section variables
+//// ----------------------------------------------------------------------------
+//*/
+////--------------------------- NONE --------------------------------------------
+
+///*
+//// ----------------------------------------------------------------------------
+//// Static Prototype Functions
+//// ----------------------------------------------------------------------------
+//*/
+//// -------------------------- NONE --------------------------------------------
+
+///*
+//// ----------------------------------------------------------------------------
+//// Exported functions
+//// ----------------------------------------------------------------------------
+//*/
+
+
+///*****************************************************************************
+//*                                                                            *
+//*  Function Name    : impeg2_fmt_conv_yuv420p_to_yuv420sp_uv_av8()                      *
+//*                                                                            *
+//*  Description      : This function conversts the image from YUV420P color   *
+//*                     space to 420SP color space(UV interleaved).           *
+//*                                                                            *
+//*  Arguments        : x0          pu1_y                                     *
+//*                     x1          pu1_u                                     *
+//*                     x2          pu1_v                                     *
+//*                     x3          pu1_dest_y                                *
+//*                     x4          pu1_dest_uv                               *
+//*                     x5          u2_height                                 *
+//*                     x6          u2_width                                  *
+//*                     x7          u2_stridey                                *
+//*                     sp, #80     u2_strideu                                *
+//*                     sp, #88     u2_stridev                                *
+//*                     sp, #96     u2_dest_stride_y                          *
+//*                     sp, #104    u2_dest_stride_uv                         *
+//*                     sp, #112    convert_uv_only                           *
+//*                                                                            *
+//*  Values Returned  : None                                                   *
+//*                                                                            *
+//*  Register Usage   : x8, x10, x16, x20, v0, v1                              *
+//*                                                                            *
+//*  Stack Usage      : 80 Bytes                                               *
+//*                                                                            *
+//*  Interruptibility : Interruptible                                          *
+//*                                                                            *
+//*  Known Limitations                                                         *
+//*       Assumptions: Image Width:     Assumed to be multiple of 16 and       *
+//*                     greater than or equal to 16                  *
+//*                     Image Height:    Assumed to be even.                   *
+//*                                                                            *
+//*  Revision History :                                                        *
+//*         DD MM YYYY   Author(s)       Changes (Describe the changes made)   *
+//*         07 06 2010   Varshita        Draft                                 *
+//*         07 06 2010   Naveen Kr T     Completed                             *
+//*                                                                            *
+//*****************************************************************************/
+.global impeg2_fmt_conv_yuv420p_to_yuv420sp_uv_av8
+impeg2_fmt_conv_yuv420p_to_yuv420sp_uv_av8:
+
+    //// push the registers on the stack
+    //    pu1_y,                - x0
+    //    pu1_u,                - x1
+    //    pu1_v,                - x2
+    //    pu1_dest_y,           - x3
+    //    pu1_dest_uv,          - x4
+    //    u2_height,            - x5
+    //    u2_width,             - x6
+    //    u2_stridey,           - x7
+    //    u2_strideu,           - sp, #80
+    //    u2_stridev,           - sp, #88
+    //    u2_dest_stride_y,     - sp, #96
+    //    u2_dest_stride_uv,    - sp, #104
+    //    convert_uv_only       - sp, #112
+    // STMFD sp!,{x4-x12,x14}
+    push_v_regs
+    stp             x19, x20, [sp, #-16]!
+
+    ldr             w14, [sp, #112]     //// Load convert_uv_only
+
+    cmp             w14, #1
+    beq             yuv420sp_uv_chroma
+    ///* Do the preprocessing before the main loops start */
+    //// Load the parameters from stack
+
+    ldr             w8, [sp, #96]       //// Load u2_dest_stride_y from stack
+    uxtw            x8, w8
+
+    sub             x7, x7, x6          //// Source increment
+
+    sub             x8, x8, x6          //// Destination increment
+
+
+yuv420sp_uv_row_loop_y:
+    mov             x16, x6
+
+yuv420sp_uv_col_loop_y:
+    prfm            pldl1keep, [x0, #128]
+    ld1             {v0.8b, v1.8b}, [x0], #16
+    st1             {v0.8b, v1.8b}, [x3], #16
+    sub             x16, x16, #16
+    cmp             x16, #15
+    bgt             yuv420sp_uv_col_loop_y
+
+    cmp             x16, #0
+    beq             yuv420sp_uv_row_loop__y
+    ////If non-multiple of 16, then go back by few bytes to ensure 16 bytes can be read
+    ////Ex if width is 162, above loop will process 160 pixels. And
+    ////Both source and destination will point to 146th pixel and then 16 bytes will be read
+    //// and written using VLD1 and VST1
+    sub             x20, x16, #16
+    neg             x16, x20
+    sub             x0, x0, x16
+    sub             x3, x3, x16
+
+    ld1             {v0.8b, v1.8b}, [x0], #16
+    st1             {v0.8b, v1.8b}, [x3], #16
+
+yuv420sp_uv_row_loop__y:
+    add             x0, x0, x7
+    add             x3, x3, x8
+    subs            x5, x5, #1
+    bgt             yuv420sp_uv_row_loop_y
+
+yuv420sp_uv_chroma:
+    ldr             w7, [sp, #88]       //// Load u2_strideu from stack
+    sxtw            x7, w7
+
+    ldr             w8, [sp, #104]      //// Load u2_dest_stride_uv from stack
+    sxtw            x8, w8
+
+    sub             x7, x7, x6, lsr #1  //// Source increment
+
+    sub             x8, x8, x6          //// Destination increment
+
+    lsr             x6, x6, #1
+    lsr             x5, x5, #1
+yuv420sp_uv_row_loop_uv:
+    mov             x16, x6
+
+
+yuv420sp_uv_col_loop_uv:
+    prfm            pldl1keep, [x1, #128]
+    prfm            pldl1keep, [x2, #128]
+
+    ld1             {v0.8b}, [x1], #8
+    ld1             {v1.8b}, [x2], #8
+    st2             {v0.8b, v1.8b}, [x4], #16
+
+    sub             x16, x16, #8
+    cmp             x16, #7
+    bgt             yuv420sp_uv_col_loop_uv
+
+    cmp             x16, #0
+    beq             yuv420sp_uv_row_loop__uv
+    ////If non-multiple of 16, then go back by few bytes to ensure 16 bytes can be read
+    ////Ex if width is 162, above loop will process 160 pixels. And
+    ////Both source and destination will point to 146th pixel and then 16 bytes will be read
+    //// and written using VLD1 and VST1
+    sub             x20, x16, #8
+    neg             x16, x20
+    sub             x1, x1, x16
+    sub             x2, x2, x16
+    sub             x4, x4, x16, lsl #1
+
+    ld1             {v0.8b}, [x1], #8
+    ld1             {v1.8b}, [x2], #8
+    st2             {v0.8b, v1.8b}, [x4], #16
+
+yuv420sp_uv_row_loop__uv:
+    add             x1, x1, x7
+    add             x2, x2, x7
+    add             x4, x4, x8
+    subs            x5, x5, #1
+    bgt             yuv420sp_uv_row_loop_uv
+    ////POP THE REGISTERS
+    // LDMFD sp!,{x4-x12,PC}
+    ldp             x19, x20, [sp], #16
+    pop_v_regs
+    ret
+
+
+
+
+
+///*****************************************************************************
+//*                                                                            *
+//*  Function Name    : impeg2_fmt_conv_yuv420p_to_yuv420sp_vu_av8()                      *
+//*                                                                            *
+//*  Description      : This function conversts the image from YUV420P color   *
+//*                     space to 420SP color space(VU interleaved).           *
+//*               This function is similar to above function          *
+//*               IMP4D_CXA8_YUV420toYUV420SP_VU with a difference in   *
+//*               VLD1.8 for chroma - order of registers is different    *
+//*                                                                            *
+//*  Arguments        : x0          pu1_y                                     *
+//*                     x1          pu1_u                                     *
+//*                     x2          pu1_v                                     *
+//*                     x3          pu1_dest_y                                *
+//*                     x4          pu1_dest_uv                               *
+//*                     x5          u2_height                                 *
+//*                     x6          u2_width                                  *
+//*                     x7          u2_stridey                                *
+//*                     sp, #80     u2_strideu                                *
+//*                     sp, #88     u2_stridev                                *
+//*                     sp, #96     u2_dest_stride_y                          *
+//*                     sp, #104    u2_dest_stride_uv                         *
+//*                     sp, #112    convert_uv_only                           *
+//*                                                                            *
+//*  Values Returned  : None                                                   *
+//*                                                                            *
+//*  Register Usage   : x8, x14, x16, x20, v0, v1                              *
+//*                                                                            *
+//*  Stack Usage      : 80 Bytes                                               *
+//*                                                                            *
+//*  Interruptibility : Interruptible                                          *
+//*                                                                            *
+//*  Known Limitations                                                         *
+//*       Assumptions: Image Width:     Assumed to be multiple of 16 and       *
+//*                     greater than or equal to 16                  *
+//*                     Image Height:    Assumed to be even.                   *
+//*                                                                            *
+//*  Revision History :                                                        *
+//*         DD MM YYYY   Author(s)       Changes (Describe the changes made)   *
+//*         07 06 2010   Varshita        Draft                                 *
+//*         07 06 2010   Naveen Kr T     Completed                             *
+//*                                                                            *
+//*****************************************************************************/
+
+.global impeg2_fmt_conv_yuv420p_to_yuv420sp_vu_av8
+impeg2_fmt_conv_yuv420p_to_yuv420sp_vu_av8:
+
+    //// push the registers on the stack
+    //    pu1_y,                - x0
+    //    pu1_u,                - x1
+    //    pu1_v,                - x2
+    //    pu1_dest_y,           - x3
+    //    pu1_dest_uv,          - x4
+    //    u2_height,            - x5
+    //    u2_width,             - x6
+    //    u2_stridey,           - x7
+    //    u2_strideu,           - sp, #80
+    //    u2_stridev,           - sp, #88
+    //    u2_dest_stride_y,     - sp, #96
+    //    u2_dest_stride_uv,    - sp, #104
+    //    convert_uv_only       - sp, #112
+    // STMFD sp!,{x4-x12,x14}
+    push_v_regs
+    stp             x19, x20, [sp, #-16]!
+
+    ldr             w14, [sp, #112]     //// Load convert_uv_only
+
+    cmp             w14, #1
+    beq             yuv420sp_vu_chroma
+
+    ///* Do the preprocessing before the main loops start */
+    //// Load the parameters from stack
+
+    ldr             w8, [sp, #96]       //// Load u2_dest_stride_y from stack
+    uxtw            x8, w8
+
+    sub             x7, x7, x6          //// Source increment
+
+    sub             x8, x8, x6          //// Destination increment
+
+
+yuv420sp_vu_row_loop_y:
+    mov             x16, x6
+
+yuv420sp_vu_col_loop_y:
+    prfm            pldl1keep, [x0, #128]
+    ld1             {v0.8b, v1.8b}, [x0], #16
+    st1             {v0.8b, v1.8b}, [x3], #16
+    sub             x16, x16, #16
+    cmp             x16, #15
+    bgt             yuv420sp_vu_col_loop_y
+
+    cmp             x16, #0
+    beq             yuv420sp_vu_row_loop__y
+    ////If non-multiple of 16, then go back by few bytes to ensure 16 bytes can be read
+    ////Ex if width is 162, above loop will process 160 pixels. And
+    ////Both source and destination will point to 146th pixel and then 16 bytes will be read
+    //// and written using VLD1 and VST1
+    sub             x20, x16, #16
+    neg             x16, x20
+    sub             x0, x0, x16
+    sub             x3, x3, x16
+
+    ld1             {v0.8b, v1.8b}, [x0], #16
+    st1             {v0.8b, v1.8b}, [x3], #16
+
+yuv420sp_vu_row_loop__y:
+    add             x0, x0, x7
+    add             x3, x3, x8
+    subs            x5, x5, #1
+    bgt             yuv420sp_vu_row_loop_y
+
+yuv420sp_vu_chroma:
+    ldr             w7, [sp, #80]       //// Load u2_strideu from stack
+    sxtw            x7, w7
+
+    ldr             w8, [sp, #104]      //// Load u2_dest_stride_uv from stack
+    sxtw            x8, w8
+
+    sub             x7, x7, x6, lsr #1  //// Source increment
+
+    sub             x8, x8, x6          //// Destination increment
+
+    lsr             x6, x6, #1
+    lsr             x5, x5, #1
+yuv420sp_vu_row_loop_uv:
+    mov             x16, x6
+
+
+yuv420sp_vu_col_loop_uv:
+    prfm            pldl1keep, [x1, #128]
+    prfm            pldl1keep, [x2, #128]
+    ld1             {v1.8b}, [x1], #8
+    ld1             {v0.8b}, [x2], #8
+    st2             {v0.8b, v1.8b}, [x4], #16
+    sub             x16, x16, #8
+    cmp             x16, #7
+    bgt             yuv420sp_vu_col_loop_uv
+
+    cmp             x16, #0
+    beq             yuv420sp_vu_row_loop__uv
+    ////If non-multiple of 16, then go back by few bytes to ensure 16 bytes can be read
+    ////Ex if width is 162, above loop will process 160 pixels. And
+    ////Both source and destination will point to 146th pixel and then 16 bytes will be read
+    //// and written using VLD1 and VST1
+    sub             x20, x16, #8
+    neg             x16, x20
+    sub             x1, x1, x16
+    sub             x2, x2, x16
+    sub             x4, x4, x16, lsl #1
+
+    ld1             {v1.8b}, [x1], #8
+    ld1             {v0.8b}, [x2], #8
+    st2             {v0.8b, v1.8b}, [x4], #16
+
+yuv420sp_vu_row_loop__uv:
+    add             x1, x1, x7
+    add             x2, x2, x7
+    add             x4, x4, x8
+    subs            x5, x5, #1
+    bgt             yuv420sp_vu_row_loop_uv
+    ////POP THE REGISTERS
+    // LDMFD sp!,{x4-x12,PC}
+    ldp             x19, x20, [sp], #16
+    pop_v_regs
+    ret
+

diff --git a/common/armv8/impeg2_idct.s b/common/armv8/impeg2_idct.s
new file mode 100644
index 0000000..4956e54
--- /dev/null
+++ b/common/armv8/impeg2_idct.s

@@ -0,0 +1,1247 @@
+//******************************************************************************
+//*
+//* Copyright (C) 2015 The Android Open Source Project
+//*
+//* Licensed under the Apache License, Version 2.0 (the "License");
+//* you may not use this file except in compliance with the License.
+//* You may obtain a copy of the License at:
+//*
+//* http://www.apache.org/licenses/LICENSE-2.0
+//*
+//* Unless required by applicable law or agreed to in writing, software
+//* distributed under the License is distributed on an "AS IS" BASIS,
+//* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+//* See the License for the specific language governing permissions and
+//* limitations under the License.
+//*
+//*****************************************************************************
+//* Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+//*/
+///**
+// *******************************************************************************
+// * @file
+// *  impeg2_idct.s
+// *
+// * @brief
+// *  contains function definitions for single stage  inverse transform
+// *
+// * @author
+// *  anand s
+// *
+// * @par list of functions:
+// *  - impeg2_idct_recon_dc_av8()
+// *
+// * @remarks
+// *  none
+// *
+// *******************************************************************************
+//*/
+
+///**
+// *******************************************************************************
+// *
+// * @brief
+// *  this function performs inverse transform  and reconstruction for 8x8
+// * input block
+// *
+// * @par description:
+// *  performs inverse transform and adds the prediction  data and clips output
+// * to 8 bit
+// *
+// * @param[in] pi2_src
+// *  input 8x8 coefficients
+// *
+// * @param[in] pi2_tmp
+// *  temporary 8x8 buffer for storing inverse
+// *
+// *  transform
+// *  1st stage output
+// *
+// * @param[in] pu1_pred
+// *  prediction 8x8 block
+// *
+// * @param[out] pu1_dst
+// *  output 8x8 block
+// *
+// * @param[in] src_strd
+// *  input stride
+// *
+// * @param[in] pred_strd
+// *  prediction stride
+// *
+// * @param[in] dst_strd
+// *  output stride
+// *
+// * @param[in] shift
+// *  output shift
+// *
+// * @param[in] zero_cols
+// *  zero columns in pi2_src
+// *
+// * @returns  void
+// *
+// * @remarks
+// *  none
+// *
+// *******************************************************************************
+// */
+
+//void impeg2_itrans_recon_8x8(word16 *pi2_src,
+//                            word16 *pi2_tmp,
+//                            uword8 *pu1_pred,
+//                            uword8 *pu1_dst,
+//                            word32 src_strd,
+//                            word32 pred_strd,
+//                            word32 dst_strd,
+//                            word32 zero_cols
+//                             word32    zero_rows                )
+
+//**************variables vs registers*************************
+//    x0 => *pi2_src
+//    x1 => *pi2_tmp
+//    x2 => *pu1_pred
+//    x3 => *pu1_dst
+//    src_strd
+//    pred_strd
+//    dst_strd
+//    zero_cols
+
+
+
+.text
+.align 4
+.include "impeg2_neon_macros.s"
+
+.set idct_stg1_shift       ,            12
+.set idct_stg2_shift       ,            16
+.set idct_stg1_round        ,           (1 << (idct_stg1_shift - 1))
+.set idct_stg2_round        ,           (1 << (idct_stg2_shift - 1))
+
+.extern gai2_impeg2_idct_q15
+.extern gai2_impeg2_idct_q11
+.extern gai2_impeg2_idct_first_col_q15
+.extern gai2_impeg2_idct_first_col_q11
+.extern gai2_impeg2_mismatch_stg2_additive
+
+.global impeg2_idct_recon_dc_av8
+impeg2_idct_recon_dc_av8:
+    // STMFD sp!,{x4,x6,x12,x14}
+    push_v_regs
+    ////x0: pi2_src
+    ////x1: pi2_tmp - not used, used as pred_strd
+    ////x2: pu1_pred
+    ////x3: pu1_dst
+    ////x4: used as scratch
+    ////x5: pred_strd
+    ////x6: dst_strd
+
+    ldrsh           x4, [x0]
+    adrp            x14, :got:gai2_impeg2_idct_q15
+    ldr             x14, [x14, #:got_lo12:gai2_impeg2_idct_q15]
+    ldrsh           x12, [x14]
+
+    ld1             {v0.8b}, [x2], x5
+    mul             x4, x4, x12
+
+    ld1             {v1.8b}, [x2], x5
+    add             x4, x4, #idct_stg1_round
+
+    ld1             {v2.8b}, [x2], x5
+    asr             x4, x4, #idct_stg1_shift
+
+    adrp            x14, :got:gai2_impeg2_idct_q11
+    ldr             x14, [x14, #:got_lo12:gai2_impeg2_idct_q11]
+    ldrsh           x12, [x14]
+
+    ld1             {v3.8b}, [x2], x5
+    mul             x4, x4, x12
+
+    ld1             {v4.8b}, [x2], x5
+    add             x4, x4, #idct_stg2_round
+
+    ld1             {v5.8b}, [x2], x5
+    asr             x4, x4, #idct_stg2_shift
+
+    ld1             {v6.8b}, [x2], x5
+    dup             v30.8h, w4
+
+
+    ld1             {v7.8b}, [x2], x5
+
+    uaddw           v8.8h, v30.8h , v0.8b
+
+    uaddw           v10.8h, v30.8h , v1.8b
+    sqxtun          v0.8b, v8.8h
+
+    uaddw           v12.8h, v30.8h , v2.8b
+    sqxtun          v1.8b, v10.8h
+    st1             {v0.8b}, [x3], x6
+
+    uaddw           v14.8h, v30.8h , v3.8b
+    sqxtun          v2.8b, v12.8h
+    st1             {v1.8b}, [x3], x6
+
+    uaddw           v16.8h, v30.8h , v4.8b
+    sqxtun          v3.8b, v14.8h
+    st1             {v2.8b}, [x3], x6
+
+    uaddw           v18.8h, v30.8h , v5.8b
+    sqxtun          v4.8b, v16.8h
+    st1             {v3.8b}, [x3], x6
+
+    uaddw           v20.8h, v30.8h , v6.8b
+    sqxtun          v5.8b, v18.8h
+    st1             {v4.8b}, [x3], x6
+
+    uaddw           v22.8h, v30.8h , v7.8b
+    sqxtun          v6.8b, v20.8h
+    st1             {v5.8b}, [x3], x6
+
+    sqxtun          v7.8b, v22.8h
+    st1             {v6.8b}, [x3], x6
+
+
+    st1             {v7.8b}, [x3], x6
+
+    // LDMFD sp!,{x4,x6,x12,pc}
+    pop_v_regs
+    ret
+
+
+
+.global impeg2_idct_recon_dc_mismatch_av8
+.extern gai2_impeg2_idct_last_row_q11
+.extern gai2_impeg2_mismatch_stg1_outp
+impeg2_idct_recon_dc_mismatch_av8:
+    // STMFD sp!,{x4-x12,x14}
+    push_v_regs
+
+    ldrsh           x4, [x0]
+    adrp            x14, :got:gai2_impeg2_idct_q15
+    ldr             x14, [x14, #:got_lo12:gai2_impeg2_idct_q15]
+    ldrsh           x12, [x14]
+
+    mul             x4, x4, x12
+    add             x4, x4, #idct_stg1_round
+    asr             x4, x4, #idct_stg1_shift
+
+    adrp            x14, :got:gai2_impeg2_idct_q11
+    ldr             x14, [x14, #:got_lo12:gai2_impeg2_idct_q11]
+    ldrsh           x12, [x14]
+    mul             x4, x4, x12
+    dup             v0.4s, w4
+
+    mov             x14, #16            ////Increment for table read
+    adrp            x4, :got:gai2_impeg2_mismatch_stg2_additive
+    ldr             x4, [x4, #:got_lo12:gai2_impeg2_mismatch_stg2_additive]
+
+    ld1             {v2.4h, v3.4h}, [x4], x14
+    ld1             {v30.8b}, [x2], x5
+    sxtl            v8.4s, v2.4h
+    sxtl            v10.4s, v3.4h
+    raddhn          v12.4h, v0.4s, v8.4s
+    raddhn2         v12.8h, v0.4s, v10.4s
+    uaddw           v14.8h, v12.8h , v30.8b
+    sqxtun          v30.8b, v14.8h
+    st1             {v30.8b}, [x3], x6
+
+    ld1             {v2.4h, v3.4h}, [x4], x14
+    ld1             {v30.8b}, [x2], x5
+    sxtl            v8.4s, v2.4h
+    sxtl            v10.4s, v3.4h
+    raddhn          v12.4h, v0.4s, v8.4s
+    raddhn2         v12.8h, v0.4s, v10.4s
+    uaddw           v14.8h, v12.8h , v30.8b
+    sqxtun          v30.8b, v14.8h
+    st1             {v30.8b}, [x3], x6
+
+    ld1             {v2.4h, v3.4h}, [x4], x14
+    ld1             {v30.8b}, [x2], x5
+    sxtl            v8.4s, v2.4h
+    sxtl            v10.4s, v3.4h
+    raddhn          v12.4h, v0.4s, v8.4s
+    raddhn2         v12.8h, v0.4s, v10.4s
+    uaddw           v14.8h, v12.8h , v30.8b
+    sqxtun          v30.8b, v14.8h
+    st1             {v30.8b}, [x3], x6
+
+    ld1             {v2.4h, v3.4h}, [x4], x14
+    ld1             {v30.8b}, [x2], x5
+    sxtl            v8.4s, v2.4h
+    sxtl            v10.4s, v3.4h
+    raddhn          v12.4h, v0.4s, v8.4s
+    raddhn2         v12.8h, v0.4s, v10.4s
+    uaddw           v14.8h, v12.8h , v30.8b
+    sqxtun          v30.8b, v14.8h
+    st1             {v30.8b}, [x3], x6
+
+    ld1             {v2.4h, v3.4h}, [x4], x14
+    ld1             {v30.8b}, [x2], x5
+    sxtl            v8.4s, v2.4h
+    sxtl            v10.4s, v3.4h
+    raddhn          v12.4h, v0.4s, v8.4s
+    raddhn2         v12.8h, v0.4s, v10.4s
+    uaddw           v14.8h, v12.8h , v30.8b
+    sqxtun          v30.8b, v14.8h
+    st1             {v30.8b}, [x3], x6
+
+    ld1             {v2.4h, v3.4h}, [x4], x14
+    ld1             {v30.8b}, [x2], x5
+    sxtl            v8.4s, v2.4h
+    sxtl            v10.4s, v3.4h
+    raddhn          v12.4h, v0.4s, v8.4s
+    raddhn2         v12.8h, v0.4s, v10.4s
+    uaddw           v14.8h, v12.8h , v30.8b
+    sqxtun          v30.8b, v14.8h
+    st1             {v30.8b}, [x3], x6
+
+    ld1             {v2.4h, v3.4h}, [x4], x14
+    ld1             {v30.8b}, [x2], x5
+    sxtl            v8.4s, v2.4h
+    sxtl            v10.4s, v3.4h
+    raddhn          v12.4h, v0.4s, v8.4s
+    raddhn2         v12.8h, v0.4s, v10.4s
+    uaddw           v14.8h, v12.8h , v30.8b
+    sqxtun          v30.8b, v14.8h
+    st1             {v30.8b}, [x3], x6
+
+    ld1             {v2.4h, v3.4h}, [x4], x14
+    ld1             {v30.8b}, [x2], x5
+    sxtl            v8.4s, v2.4h
+    sxtl            v10.4s, v3.4h
+    raddhn          v12.4h, v0.4s, v8.4s
+    raddhn2         v12.8h, v0.4s, v10.4s
+    uaddw           v14.8h, v12.8h , v30.8b
+    sqxtun          v30.8b, v14.8h
+    st1             {v30.8b}, [x3], x6
+
+
+    // LDMFD sp!,{x4-x12,pc}
+    pop_v_regs
+    ret
+
+.globl impeg2_idct_recon_av8
+
+.type impeg2_idct_recon_av8, %function
+
+impeg2_idct_recon_av8:
+////register usage.extern        - loading and until idct of columns
+////    cosine constants     -     d0
+////    sine constants         -     d1
+////    row 0 first half     -     d2        -    y0
+////    row 1 first half     -     d6        -    y1
+////    row 2 first half     -     d3        -    y2
+////    row 3 first half     -     d7        -    y3
+////    row 4 first half     -     d10        -    y4
+////    row 5 first half     -     d14        -    y5
+////    row 6 first half     -     d11        -    y6
+////    row 7 first half     -     d15        -    y7
+
+////    row 0 second half    -     d4        -    y0
+////    row 1 second half    -     d8      -    y1
+////    row 2 second half    -     d5      -    y2
+////    row 3 second half    -     d9      -    y3
+////    row 4 second half    -     d12     -    y4
+////    row 5 second half    -     d16     -    y5
+////    row 6 second half    -     d13     -    y6
+////    row 7 second half    -     d17     -    y7
+
+    //// copy the input pointer to another register
+    //// step 1 : load all constants
+    // stmfd sp!,{x4-x12,x14}
+
+    ldr             w11, [sp]           // zero rows
+
+    push_v_regs
+    stp             x19, x20, [sp, #-16]!
+
+    mov             x12, x7             // zero columns
+    mov             x8, x5              // prediction stride
+    mov             x7, x6              // destination stride
+    mov             x6, x4              // src stride
+    lsl             x6, x6, #1          // x sizeof(word16)
+    add             x9, x0, x6, lsl #1  // 2 rows
+
+    add             x10, x6, x6, lsl #1 // 3 rows
+
+    sub             x10, x10, #8        // - 4 cols * sizeof(word16)
+    sub             x5, x6, #8          // src_strd - 4 cols * sizeof(word16)
+
+    adrp            x14, :got:gai2_impeg2_idct_first_col_q15
+    ldr             x14, [x14, #:got_lo12:gai2_impeg2_idct_first_col_q15]
+    ld1             {v0.4h, v1.4h}, [x14] ////d0,d1 are used for storing the constant data
+
+    ////step 2 load all the input data
+    ////step 3 operate first 4 colums at a time
+
+    and             x11, x11, #0xff
+    and             x12, x12, #0xff
+
+    cmp             x11, #0xf0
+    bge             skip_last4_rows
+
+
+    ld1             {v2.4h}, [x0], #8
+    ld1             {v3.4h}, [x9], #8
+    ld1             {v4.4h}, [x0], x5
+    smull           v20.4s, v2.4h, v0.4h[0] //// y0 * cos4(part of c0 and c1)
+    ld1             {v5.4h}, [x9], x5
+    smull           v18.4s, v3.4h, v1.4h[2] //// y2 * sin2 (q3 is freed by this time)(part of d1)
+    ld1             {v6.4h}, [x0], #8
+    ld1             {v7.4h}, [x9], #8
+    smull           v24.4s, v6.4h, v0.4h[1] //// y1 * cos1(part of b0)
+    ld1             {v8.4h}, [x0], x10
+    smull           v26.4s, v6.4h, v0.4h[3] //// y1 * cos3(part of b1)
+    ld1             {v9.4h}, [x9], x10
+    smull           v28.4s, v6.4h, v1.4h[1] //// y1 * sin3(part of b2)
+    ld1             {v10.4h}, [x0], #8
+    smull           v30.4s, v6.4h, v1.4h[3] //// y1 * sin1(part of b3)
+    ld1             {v11.4h}, [x9], #8
+    smlal           v24.4s, v7.4h, v0.4h[3] //// y1 * cos1 + y3 * cos3(part of b0)
+    ld1             {v12.4h}, [x0], x5
+    smlsl           v26.4s, v7.4h, v1.4h[3] //// y1 * cos3 - y3 * sin1(part of b1)
+    ld1             {v13.4h}, [x9], x5
+    smlsl           v28.4s, v7.4h, v0.4h[1] //// y1 * sin3 - y3 * cos1(part of b2)
+    ld1             {v14.4h}, [x0], #8
+    smlsl           v30.4s, v7.4h, v1.4h[1] //// y1 * sin1 - y3 * sin3(part of b3)
+    ld1             {v15.4h}, [x9], #8
+    smull           v22.4s, v10.4h, v0.4h[0] //// y4 * cos4(part of c0 and c1)
+    ld1             {v16.4h}, [x0], x10
+    smull           v6.4s, v3.4h, v0.4h[2] //// y2 * cos2(part of d0)
+    ld1             {v17.4h}, [x9], x10
+
+    ///* this following was activated when alignment is not there */
+////    vld1.16        d2,[x0]!
+////    vld1.16        d3,[x2]!
+////    vld1.16        d4,[x0]!
+////    vld1.16        d5,[x2]!
+////    vld1.16        d6,[x0]!
+////    vld1.16        d7,[x2]!
+////    vld1.16        d8,[x0],x3
+////    vld1.16        d9,[x2],x3
+////    vld1.16        d10,[x0]!
+////    vld1.16        d11,[x2]!
+////    vld1.16        d12,[x0]!
+////    vld1.16        d13,[x2]!
+////    vld1.16        d14,[x0]!
+////    vld1.16        d15,[x2]!
+////    vld1.16        d16,[x0],x3
+////    vld1.16        d17,[x2],x3
+
+
+
+
+    smlal           v24.4s, v14.4h, v1.4h[1] //// y1 * cos1 + y3 * cos3 + y5 * sin3(part of b0)
+    smlsl           v26.4s, v14.4h, v0.4h[1] //// y1 * cos3 - y3 * sin1 - y5 * cos1(part of b1)
+    smlal           v28.4s, v14.4h, v1.4h[3] //// y1 * sin3 - y3 * cos1 + y5 * sin1(part of b2)
+    smlal           v30.4s, v14.4h, v0.4h[3] //// y1 * sin1 - y3 * sin3 + y5 * cos3(part of b3)
+
+    smlsl           v18.4s, v11.4h, v0.4h[2] //// d1 = y2 * sin2 - y6 * cos2(part of a0 and a1)
+    smlal           v6.4s, v11.4h, v1.4h[2] //// d0 = y2 * cos2 + y6 * sin2(part of a0 and a1)
+
+    add             v10.4s, v20.4s , v22.4s //// c0 = y0 * cos4 + y4 * cos4(part of a0 and a1)
+    sub             v20.4s, v20.4s , v22.4s //// c1 = y0 * cos4 - y4 * cos4(part of a0 and a1)
+
+    smlal           v24.4s, v15.4h, v1.4h[3] //// b0 = y1 * cos1 + y3 * cos3 + y5 * sin3 + y7 * sin1(part of x0,x7)
+    smlsl           v26.4s, v15.4h, v1.4h[1] //// b1 = y1 * cos3 - y3 * sin1 - y5 * cos1 - y7 * sin3(part of x1,x6)
+    smlal           v28.4s, v15.4h, v0.4h[3] //// b2 = y1 * sin3 - y3 * cos1 + y5 * sin1 + y7 * cos3(part of x2,x5)
+    smlsl           v30.4s, v15.4h, v0.4h[1] //// b3 = y1 * sin1 - y3 * sin3 + y5 * cos3 - y7 * cos1(part of x3,x4)
+
+    add             v14.4s, v10.4s , v6.4s ////    a0 = c0 + d0(part of x0,x7)
+    sub             v10.4s, v10.4s , v6.4s //// a3 = c0 - d0(part of x3,x4)
+    sub             v22.4s, v20.4s , v18.4s //// a2 = c1 - d1(part of x2,x5)
+    add             v18.4s, v20.4s , v18.4s //// a1 = c1 + d1(part of x1,x6)
+
+    add             v20.4s, v14.4s , v24.4s //// a0 + b0(part of x0)
+    sub             v6.4s, v14.4s , v24.4s //// a0 - b0(part of x7)
+
+    add             v24.4s, v22.4s , v28.4s //// a2 + b2(part of x2)
+    sub             v22.4s, v22.4s , v28.4s //// a2 - b2(part of x5)
+
+    add             v28.4s, v18.4s , v26.4s //// a1 + b1(part of x1)
+    sub             v18.4s, v18.4s , v26.4s //// a1 - b1(part of x6)
+
+    add             v26.4s, v10.4s , v30.4s //// a3 + b3(part of x3)
+    sub             v30.4s, v10.4s , v30.4s //// a3 - b3(part of x4)
+
+    sqrshrn         v2.4h, v20.4s, #idct_stg1_shift //// x0 = (a0 + b0 + rnd) >> 7(IDCT_STG1_SHIFT)
+    sqrshrn         v15.4h, v6.4s, #idct_stg1_shift //// x7 = (a0 - b0 + rnd) >> 7(IDCT_STG1_SHIFT)
+    sqrshrn         v3.4h, v24.4s, #idct_stg1_shift //// x2 = (a2 + b2 + rnd) >> 7(IDCT_STG1_SHIFT)
+    sqrshrn         v14.4h, v22.4s, #idct_stg1_shift //// x5 = (a2 - b2 + rnd) >> 7(IDCT_STG1_SHIFT)
+    sqrshrn         v6.4h, v28.4s, #idct_stg1_shift //// x1 = (a1 + b1 + rnd) >> 7(IDCT_STG1_SHIFT)
+    sqrshrn         v11.4h, v18.4s, #idct_stg1_shift //// x6 = (a1 - b1 + rnd) >> 7(IDCT_STG1_SHIFT)
+    sqrshrn         v7.4h, v26.4s, #idct_stg1_shift //// x3 = (a3 + b3 + rnd) >> 7(IDCT_STG1_SHIFT)
+    sqrshrn         v10.4h, v30.4s, #idct_stg1_shift //// x4 = (a3 - b3 + rnd) >> 7(IDCT_STG1_SHIFT)
+
+
+    b               last4_cols
+
+
+
+skip_last4_rows:
+    adrp            x14, :got:gai2_impeg2_idct_first_col_q15
+    ldr             x14, [x14, #:got_lo12:gai2_impeg2_idct_first_col_q15]
+    ld1             {v0.4h, v1.4h}, [x14]
+
+    ld1             {v2.4h}, [x0], #8
+    ld1             {v3.4h}, [x9], #8
+    ld1             {v4.4h}, [x0], x5
+    ld1             {v5.4h}, [x9], x5
+    ld1             {v6.4h}, [x0], #8
+    ld1             {v7.4h}, [x9], #8
+    ld1             {v8.4h}, [x0], x10
+    ld1             {v9.4h}, [x9], x10
+
+
+
+    movi            v12.4h, #0
+    movi            v13.4h, #0
+    movi            v16.4h, #0
+    movi            v17.4h, #0
+
+
+
+
+    smull           v24.4s, v6.4h, v0.4h[1] //// y1 * cos1(part of b0)
+    smull           v26.4s, v6.4h, v0.4h[3] //// y1 * cos3(part of b1)
+    smull           v28.4s, v6.4h, v1.4h[1] //// y1 * sin3(part of b2)
+    smull           v30.4s, v6.4h, v1.4h[3] //// y1 * sin1(part of b3)
+
+    smlal           v24.4s, v7.4h, v0.4h[3] //// y1 * cos1 + y3 * cos3(part of b0)
+    smlsl           v26.4s, v7.4h, v1.4h[3] //// y1 * cos3 - y3 * sin1(part of b1)
+    smlsl           v28.4s, v7.4h, v0.4h[1] //// y1 * sin3 - y3 * cos1(part of b2)
+    smlsl           v30.4s, v7.4h, v1.4h[1] //// y1 * sin1 - y3 * sin3(part of b3)
+
+    smull           v18.4s, v3.4h, v1.4h[2] //// y2 * sin2 (q3 is freed by this time)(part of d1)
+    smull           v6.4s, v3.4h, v0.4h[2] //// y2 * cos2(part of d0)
+
+    smull           v20.4s, v2.4h, v0.4h[0] //// y0 * cos4(part of c0 and c1)
+
+
+    add             v14.4s, v20.4s , v6.4s ////    a0 = c0 + d0(part of x0,x7)
+    sub             v10.4s, v20.4s , v6.4s //// a3 = c0 - d0(part of x3,x4)
+    sub             v22.4s, v20.4s , v18.4s //// a2 = c1 - d1(part of x2,x5)
+    add             v18.4s, v20.4s , v18.4s //// a1 = c1 + d1(part of x1,x6)
+
+    add             v20.4s, v14.4s , v24.4s //// a0 + b0(part of x0)
+    sub             v6.4s, v14.4s , v24.4s //// a0 - b0(part of x7)
+
+    add             v24.4s, v22.4s , v28.4s //// a2 + b2(part of x2)
+    sub             v22.4s, v22.4s , v28.4s //// a2 - b2(part of x5)
+
+    add             v28.4s, v18.4s , v26.4s //// a1 + b1(part of x1)
+    sub             v18.4s, v18.4s , v26.4s //// a1 - b1(part of x6)
+
+    add             v26.4s, v10.4s , v30.4s //// a3 + b3(part of x3)
+    sub             v30.4s, v10.4s , v30.4s //// a3 - b3(part of x4)
+
+    sqrshrn         v2.4h, v20.4s, #idct_stg1_shift //// x0 = (a0 + b0 + rnd) >> 7(IDCT_STG1_SHIFT)
+    sqrshrn         v15.4h, v6.4s, #idct_stg1_shift //// x7 = (a0 - b0 + rnd) >> 7(IDCT_STG1_SHIFT)
+    sqrshrn         v3.4h, v24.4s, #idct_stg1_shift //// x2 = (a2 + b2 + rnd) >> 7(IDCT_STG1_SHIFT)
+    sqrshrn         v14.4h, v22.4s, #idct_stg1_shift //// x5 = (a2 - b2 + rnd) >> 7(IDCT_STG1_SHIFT)
+    sqrshrn         v6.4h, v28.4s, #idct_stg1_shift //// x1 = (a1 + b1 + rnd) >> 7(IDCT_STG1_SHIFT)
+    sqrshrn         v11.4h, v18.4s, #idct_stg1_shift //// x6 = (a1 - b1 + rnd) >> 7(IDCT_STG1_SHIFT)
+    sqrshrn         v7.4h, v26.4s, #idct_stg1_shift //// x3 = (a3 + b3 + rnd) >> 7(IDCT_STG1_SHIFT)
+    sqrshrn         v10.4h, v30.4s, #idct_stg1_shift //// x4 = (a3 - b3 + rnd) >> 7(IDCT_STG1_SHIFT)
+
+
+last4_cols:
+    adrp            x14, :got:gai2_impeg2_idct_first_col_q15
+    ldr             x14, [x14, #:got_lo12:gai2_impeg2_idct_first_col_q15]
+    ld1             {v0.4h, v1.4h}, [x14]
+
+
+    cmp             x12, #0xf0
+    bge             skip_last4cols
+
+    smull           v24.4s, v8.4h, v0.4h[1] //// y1 * cos1(part of b0)
+    smull           v26.4s, v8.4h, v0.4h[3] //// y1 * cos3(part of b1)
+    smull           v28.4s, v8.4h, v1.4h[1] //// y1 * sin3(part of b2)
+    smull           v30.4s, v8.4h, v1.4h[3] //// y1 * sin1(part of b3)
+
+    smlal           v24.4s, v9.4h, v0.4h[3] //// y1 * cos1 + y3 * cos3(part of b0)
+    smlsl           v26.4s, v9.4h, v1.4h[3] //// y1 * cos3 - y3 * sin1(part of b1)
+    smlsl           v28.4s, v9.4h, v0.4h[1] //// y1 * sin3 - y3 * cos1(part of b2)
+    smlsl           v30.4s, v9.4h, v1.4h[1] //// y1 * sin1 - y3 * sin3(part of b3)
+
+    smull           v18.4s, v5.4h, v1.4h[2] //// y2 * sin2 (q4 is freed by this time)(part of d1)
+    smull           v8.4s, v5.4h, v0.4h[2] //// y2 * cos2(part of d0)
+
+    smull           v20.4s, v4.4h, v0.4h[0] //// y0 * cos4(part of c0 and c1)
+    smull           v22.4s, v12.4h, v0.4h[0] //// y4 * cos4(part of c0 and c1)
+
+    smlal           v24.4s, v16.4h, v1.4h[1] //// y1 * cos1 + y3 * cos3 + y5 * sin3(part of b0)
+    smlsl           v26.4s, v16.4h, v0.4h[1] //// y1 * cos3 - y3 * sin1 - y5 * cos1(part of b1)
+    smlal           v28.4s, v16.4h, v1.4h[3] //// y1 * sin3 - y3 * cos1 + y5 * sin1(part of b2)
+    smlal           v30.4s, v16.4h, v0.4h[3] //// y1 * sin1 - y3 * sin3 + y5 * cos3(part of b3)
+
+    smlsl           v18.4s, v13.4h, v0.4h[2] //// d1 = y2 * sin2 - y6 * cos2(part of a0 and a1)
+    smlal           v8.4s, v13.4h, v1.4h[2] //// d0 = y2 * cos2 + y6 * sin2(part of a0 and a1)
+
+    add             v12.4s, v20.4s , v22.4s //// c0 = y0 * cos4 + y4 * cos4(part of a0 and a1)
+    sub             v20.4s, v20.4s , v22.4s //// c1 = y0 * cos4 - y4 * cos4(part of a0 and a1)
+
+    smlal           v24.4s, v17.4h, v1.4h[3] //// b0 = y1 * cos1 + y3 * cos3 + y5 * sin3 + y7 * sin1(part of e0,e7)
+    smlsl           v26.4s, v17.4h, v1.4h[1] //// b1 = y1 * cos3 - y3 * sin1 - y5 * cos1 - y7 * sin3(part of e1,e6)
+    smlal           v28.4s, v17.4h, v0.4h[3] //// b2 = y1 * sin3 - y3 * cos1 + y5 * sin1 + y7 * cos3(part of e2,e5)
+    smlsl           v30.4s, v17.4h, v0.4h[1] //// b3 = y1 * sin1 - y3 * sin3 + y5 * cos3 - y7 * cos1(part of e3,e4)
+
+    add             v16.4s, v12.4s , v8.4s ////    a0 = c0 + d0(part of e0,e7)
+    sub             v12.4s, v12.4s , v8.4s //// a3 = c0 - d0(part of e3,e4)
+    sub             v22.4s, v20.4s , v18.4s //// a2 = c1 - d1(part of e2,e5)
+    add             v18.4s, v20.4s , v18.4s //// a1 = c1 + d1(part of e1,e6)
+
+    add             v20.4s, v16.4s , v24.4s //// a0 + b0(part of e0)
+    sub             v8.4s, v16.4s , v24.4s //// a0 - b0(part of e7)
+
+    add             v24.4s, v22.4s , v28.4s //// a2 + b2(part of e2)
+    sub             v22.4s, v22.4s , v28.4s //// a2 - b2(part of e5)
+
+    add             v28.4s, v18.4s , v26.4s //// a1 + b1(part of e1)
+    sub             v18.4s, v18.4s , v26.4s //// a1 - b1(part of e6)
+
+    add             v26.4s, v12.4s , v30.4s //// a3 + b3(part of e3)
+    sub             v30.4s, v12.4s , v30.4s //// a3 - b3(part of x4)
+
+    sqrshrn         v4.4h, v20.4s, #idct_stg1_shift //// x0 = (a0 + b0 + rnd) >> 7(IDCT_STG1_SHIFT)
+    sqrshrn         v17.4h, v8.4s, #idct_stg1_shift //// x7 = (a0 - b0 + rnd) >> 7(IDCT_STG1_SHIFT)
+    sqrshrn         v5.4h, v24.4s, #idct_stg1_shift //// x2 = (a2 + b2 + rnd) >> 7(IDCT_STG1_SHIFT)
+    sqrshrn         v16.4h, v22.4s, #idct_stg1_shift //// x5 = (a2 - b2 + rnd) >> 7(IDCT_STG1_SHIFT)
+    sqrshrn         v8.4h, v28.4s, #idct_stg1_shift //// x1 = (a1 + b1 + rnd) >> 7(IDCT_STG1_SHIFT)
+    sqrshrn         v13.4h, v18.4s, #idct_stg1_shift //// x6 = (a1 - b1 + rnd) >> 7(IDCT_STG1_SHIFT)
+    sqrshrn         v9.4h, v26.4s, #idct_stg1_shift //// x3 = (a3 + b3 + rnd) >> 7(IDCT_STG1_SHIFT)
+    sqrshrn         v12.4h, v30.4s, #idct_stg1_shift //// x4 = (a3 - b3 + rnd) >> 7(IDCT_STG1_SHIFT)
+    b               end_skip_last4cols
+
+
+
+skip_last4cols:
+    adrp            x14, :got:gai2_impeg2_idct_first_col_q11
+    ldr             x14, [x14, #:got_lo12:gai2_impeg2_idct_first_col_q11]
+    ld1             {v0.4h, v1.4h}, [x14]
+
+    umov            x15, v25.d[0]
+
+    trn1            v25.4h, v2.4h, v6.4h
+    trn2            v29.4h, v2.4h, v6.4h ////[x3,x1],[x2,x0] first qudrant transposing
+
+    trn1            v27.4h, v3.4h, v7.4h
+    trn2            v31.4h, v3.4h, v7.4h ////[x3,x1],[x2,x0] first qudrant transposing
+
+    trn1            v6.2s, v29.2s, v31.2s
+    trn2            v7.2s, v29.2s, v31.2s ////x0,x1,x2,x3 first qudrant transposing continued.....
+    trn1            v2.2s, v25.2s, v27.2s
+    trn2            v3.2s, v25.2s, v27.2s ////x0,x1,x2,x3 first qudrant transposing continued.....
+
+
+    trn1            v25.4h, v10.4h, v14.4h
+    trn2            v29.4h, v10.4h, v14.4h ////[x7,x5],[x6,x4] third qudrant transposing
+
+    trn1            v27.4h, v11.4h, v15.4h
+    trn2            v31.4h, v11.4h, v15.4h ////[x7,x5],[x6,x4] third qudrant transposing
+
+    trn1            v10.2s, v25.2s, v27.2s
+    trn2            v11.2s, v25.2s, v27.2s ////x4,x5,x6,x7 third qudrant transposing continued.....
+    trn1            v14.2s, v29.2s, v31.2s
+    trn2            v15.2s, v29.2s, v31.2s ////x4,x5,x6,x7 third qudrant transposing continued.....
+
+    mov             v25.d[0], x15
+
+    smull           v24.4s, v6.4h, v0.4h[1] //// y1 * cos1(part of b0)
+    smull           v26.4s, v6.4h, v0.4h[3] //// y1 * cos3(part of b1)
+    smull           v28.4s, v6.4h, v1.4h[1] //// y1 * sin3(part of b2)
+    smull           v30.4s, v6.4h, v1.4h[3] //// y1 * sin1(part of b3)
+
+    smlal           v24.4s, v7.4h, v0.4h[3] //// y1 * cos1 + y3 * cos3(part of b0)
+    smlsl           v26.4s, v7.4h, v1.4h[3] //// y1 * cos3 - y3 * sin1(part of b1)
+    smlsl           v28.4s, v7.4h, v0.4h[1] //// y1 * sin3 - y3 * cos1(part of b2)
+    smlsl           v30.4s, v7.4h, v1.4h[1] //// y1 * sin1 - y3 * sin3(part of b3)
+
+    smull           v20.4s, v2.4h, v0.4h[0] //// y0 * cos4(part of c0 and c1)
+//    vmull.s16    q11,d4,d0[0]                    @// y4 * cos4(part of c0 and c1)
+
+    smull           v18.4s, v3.4h, v1.4h[2] //// y2 * sin2 (q3 is freed by this time)(part of d1)
+    smull           v6.4s, v3.4h, v0.4h[2] //// y2 * cos2(part of d0)
+
+
+
+
+    sub             v22.4s, v20.4s , v6.4s //// a3 = c0 - d0(part of x3,x4)
+    add             v4.4s, v20.4s , v6.4s ////    a0 = c0 + d0(part of x0,x7)
+
+
+    add             v2.4s, v4.4s , v24.4s
+
+    sub             v6.4s, v4.4s , v24.4s
+
+    add             v8.4s, v22.4s , v30.4s
+
+    sub             v24.4s, v22.4s , v30.4s
+
+    sqrshrn         v5.4h, v8.4s, #idct_stg2_shift
+    sqrshrn         v2.4h, v2.4s, #idct_stg2_shift
+    sqrshrn         v9.4h, v6.4s, #idct_stg2_shift
+    sqrshrn         v6.4h, v24.4s, #idct_stg2_shift
+
+    sub             v22.4s, v20.4s , v18.4s //// a2 = c1 - d1(part of x2,x5)
+    add             v18.4s, v20.4s , v18.4s //// a1 = c1 + d1(part of x1,x6)
+
+
+    add             v30.4s, v22.4s , v28.4s
+
+    sub             v24.4s, v22.4s , v28.4s
+
+    add             v28.4s, v18.4s , v26.4s
+
+    sub             v22.4s, v18.4s , v26.4s
+    sqrshrn         v4.4h, v30.4s, #idct_stg2_shift
+    sqrshrn         v7.4h, v24.4s, #idct_stg2_shift
+    sqrshrn         v3.4h, v28.4s, #idct_stg2_shift
+    sqrshrn         v8.4h, v22.4s, #idct_stg2_shift
+
+
+
+    umov            x19, v25.d[0]
+    umov            x20, v25.d[1]
+
+    trn1            v27.4h, v2.4h, v3.4h
+    trn2            v29.4h, v2.4h, v3.4h
+    trn1            v25.4h, v4.4h, v5.4h
+    trn2            v31.4h, v4.4h, v5.4h
+
+    trn1            v2.2s, v27.2s, v25.2s
+    trn2            v4.2s, v27.2s, v25.2s
+    trn1            v3.2s, v29.2s, v31.2s
+    trn2            v5.2s, v29.2s, v31.2s
+
+    trn1            v27.4h, v6.4h, v7.4h
+    trn2            v29.4h, v6.4h, v7.4h
+    trn1            v25.4h, v8.4h, v9.4h
+    trn2            v31.4h, v8.4h, v9.4h
+
+    trn1            v6.2s, v27.2s, v25.2s
+    trn2            v8.2s, v27.2s, v25.2s
+    trn1            v7.2s, v29.2s, v31.2s
+    trn2            v9.2s, v29.2s, v31.2s
+
+    mov             v25.d[0], x19
+    mov             v25.d[1], x20
+
+    smull           v24.4s, v14.4h, v0.4h[1] //// y1 * cos1(part of b0)
+
+    smull           v26.4s, v14.4h, v0.4h[3] //// y1 * cos3(part of b1)
+    smull           v28.4s, v14.4h, v1.4h[1] //// y1 * sin3(part of b2)
+    smull           v30.4s, v14.4h, v1.4h[3] //// y1 * sin1(part of b3)
+
+    smlal           v24.4s, v15.4h, v0.4h[3] //// y1 * cos1 + y3 * cos3(part of b0)
+    smlsl           v26.4s, v15.4h, v1.4h[3] //// y1 * cos3 - y3 * sin1(part of b1)
+    smlsl           v28.4s, v15.4h, v0.4h[1] //// y1 * sin3 - y3 * cos1(part of b2)
+    smlsl           v30.4s, v15.4h, v1.4h[1] //// y1 * sin1 - y3 * sin3(part of b3)
+    smull           v20.4s, v10.4h, v0.4h[0] //// y0 * cos4(part of c0 and c1)
+    smull           v18.4s, v11.4h, v1.4h[2] //// y2 * sin2 (q7 is freed by this time)(part of d1)
+    smull           v14.4s, v11.4h, v0.4h[2] //// y2 * cos2(part of d0)
+
+
+    add             x4, x2, x8, lsl #1  // x4 = x2 + pred_strd * 2    => x4 points to 3rd row of pred data
+
+
+    add             x5, x8, x8, lsl #1  //
+
+
+    add             x0, x3, x7, lsl #1  // x0 points to 3rd row of dest data
+
+
+    add             x10, x7, x7, lsl #1 //
+
+    // swapping v3 and v6
+    mov             v31.d[0], v3.d[0]
+    mov             v3.d[0], v6.d[0]
+    mov             v6.d[0], v31.d[0]
+
+    // swapping v5 and v8
+    mov             v31.d[0], v5.d[0]
+    mov             v5.d[0], v8.d[0]
+    mov             v8.d[0], v31.d[0]
+
+
+    sub             v22.4s, v20.4s , v14.4s //// a3 = c0 - d0(part of x3,x4)
+    add             v12.4s, v20.4s , v14.4s ////    a0 = c0 + d0(part of x0,x7)
+
+
+    add             v0.4s, v12.4s , v24.4s
+
+
+    sub             v24.4s, v12.4s , v24.4s
+
+
+    add             v12.4s, v22.4s , v30.4s
+
+
+    sub             v14.4s, v22.4s , v30.4s
+
+    sqrshrn         v10.4h, v0.4s, #idct_stg2_shift
+    sqrshrn         v17.4h, v24.4s, #idct_stg2_shift
+    sqrshrn         v13.4h, v12.4s, #idct_stg2_shift
+    sqrshrn         v14.4h, v14.4s, #idct_stg2_shift
+
+    sub             v22.4s, v20.4s , v18.4s //// a2 = c1 - d1(part of x2,x5)
+    add             v18.4s, v20.4s , v18.4s //// a1 = c1 + d1(part of x1,x6)
+
+
+    add             v0.4s, v22.4s , v28.4s
+
+
+    sub             v24.4s, v22.4s , v28.4s
+
+
+    add             v28.4s, v18.4s , v26.4s
+
+
+    sub             v26.4s, v18.4s , v26.4s
+    ld1             {v18.8b}, [x2], x8
+
+    sqrshrn         v12.4h, v0.4s, #idct_stg2_shift
+    ld1             {v20.8b}, [x2], x5
+
+
+    sqrshrn         v15.4h, v24.4s, #idct_stg2_shift
+    ld1             {v19.8b}, [x2], x8
+
+
+
+
+    sqrshrn         v11.4h, v28.4s, #idct_stg2_shift
+    ld1             {v22.8b}, [x4], x8
+
+
+
+
+    sqrshrn         v16.4h, v26.4s, #idct_stg2_shift
+    ld1             {v21.8b}, [x2], x5
+
+
+    b               pred_buff_addition
+end_skip_last4cols:
+    adrp            x14, :got:gai2_impeg2_idct_first_col_q11
+    ldr             x14, [x14, #:got_lo12:gai2_impeg2_idct_first_col_q11]
+    ld1             {v0.4h, v1.4h}, [x14]
+
+
+    umov            x19, v25.d[0]
+    umov            x20, v25.d[1]
+
+///* now the idct of columns is done, transpose so that row idct done efficiently(step5) */
+    trn1            v27.4h, v2.4h, v6.4h
+    trn2            v29.4h, v2.4h, v6.4h ////[x3,x1],[x2,x0] first qudrant transposing
+    trn1            v25.4h, v3.4h, v7.4h
+    trn2            v31.4h, v3.4h, v7.4h ////[x3,x1],[x2,x0] first qudrant transposing
+
+    trn1            v2.2s, v27.2s, v25.2s
+    trn2            v3.2s, v27.2s, v25.2s ////x0,x1,x2,x3 first qudrant transposing continued.....
+    trn1            v6.2s, v29.2s, v31.2s
+    trn2            v7.2s, v29.2s, v31.2s ////x0,x1,x2,x3 first qudrant transposing continued.....
+
+    trn1            v27.4h, v4.4h, v8.4h
+    trn2            v29.4h, v4.4h, v8.4h ////[x3,x1],[x2,x0] second qudrant transposing
+    trn1            v25.4h, v5.4h, v9.4h
+    trn2            v31.4h, v5.4h, v9.4h ////[x3,x1],[x2,x0] second qudrant transposing
+
+    trn1            v4.2s, v27.2s, v25.2s
+    trn2            v5.2s, v27.2s, v25.2s ////x0,x1,x2,x3 second qudrant transposing continued.....
+    trn1            v8.2s, v29.2s, v31.2s
+    trn2            v9.2s, v29.2s, v31.2s ////x0,x1,x2,x3 second qudrant transposing continued.....
+
+    trn1            v27.4h, v10.4h, v14.4h
+    trn2            v29.4h, v10.4h, v14.4h ////[x7,x5],[x6,x4] third qudrant transposing
+    trn1            v25.4h, v11.4h, v15.4h
+    trn2            v31.4h, v11.4h, v15.4h ////[x7,x5],[x6,x4] third qudrant transposing
+
+    trn1            v10.2s, v27.2s, v25.2s
+    trn2            v11.2s, v27.2s, v25.2s ////x4,x5,x6,x7 third qudrant transposing continued.....
+    trn1            v14.2s, v29.2s, v31.2s
+    trn2            v15.2s, v29.2s, v31.2s ////x4,x5,x6,x7 third qudrant transposing continued.....
+
+    trn1            v27.4h, v12.4h, v16.4h
+    trn2            v29.4h, v12.4h, v16.4h ////[x7,x5],[x6,x4] fourth qudrant transposing
+    trn1            v25.4h, v13.4h, v17.4h
+    trn2            v31.4h, v13.4h, v17.4h ////[x7,x5],[x6,x4] fourth qudrant transposing
+
+    trn1            v12.2s, v27.2s, v25.2s
+    trn2            v13.2s, v27.2s, v25.2s ////x4,x5,x6,x7 fourth qudrant transposing continued.....
+    trn1            v16.2s, v29.2s, v31.2s
+    trn2            v17.2s, v29.2s, v31.2s ////x4,x5,x6,x7 fourth qudrant transposing continued.....
+
+    mov             v25.d[0], x19
+    mov             v25.d[1], x20
+
+    ////step6 operate on first four rows and find their idct
+    ////register usage.extern        - storing and idct of rows
+////    cosine constants     -     d0
+////    sine constants         -     d1
+////    element 0 first four     -     d2        -    y0
+////    element 1 first four     -     d6        -    y1
+////    element 2 first four     -     d3        -    y2
+////    element 3 first four     -     d7        -    y3
+////    element 4 first four     -     d4        -    y4
+////    element 5 first four     -     d8        -    y5
+////    element 6 first four     -     d5        -    y6
+////    element 7 first four     -     d9        -    y7
+////    element 0 second four    -     d10        -    y0
+////    element 1 second four    -     d14     -    y1
+////    element 2 second four    -     d11     -    y2
+////    element 3 second four    -     d15     -    y3
+////    element 4 second four    -     d12     -    y4
+////    element 5 second four    -     d16     -    y5
+////    element 6 second four    -     d13     -    y6
+////    element 7 second four    -     d17     -    y7
+
+    //// map between first kernel code seq and current
+////        d2    ->    d2
+////        d6    ->    d6
+////        d3    ->    d3
+////        d7    ->    d7
+////        d10    ->    d4
+////        d14    ->    d8
+////        d11    ->    d5
+////        d15    ->    d9
+////        q3    ->    q3
+////        q5    ->    q2
+////        q7    ->    q4
+
+    smull           v24.4s, v6.4h, v0.4h[1] //// y1 * cos1(part of b0)
+    smull           v26.4s, v6.4h, v0.4h[3] //// y1 * cos3(part of b1)
+    smull           v28.4s, v6.4h, v1.4h[1] //// y1 * sin3(part of b2)
+    smull           v30.4s, v6.4h, v1.4h[3] //// y1 * sin1(part of b3)
+
+    smlal           v24.4s, v7.4h, v0.4h[3] //// y1 * cos1 + y3 * cos3(part of b0)
+    smlsl           v26.4s, v7.4h, v1.4h[3] //// y1 * cos3 - y3 * sin1(part of b1)
+    smlsl           v28.4s, v7.4h, v0.4h[1] //// y1 * sin3 - y3 * cos1(part of b2)
+    smlsl           v30.4s, v7.4h, v1.4h[1] //// y1 * sin1 - y3 * sin3(part of b3)
+
+    smull           v20.4s, v2.4h, v0.4h[0] //// y0 * cos4(part of c0 and c1)
+    smull           v22.4s, v4.4h, v0.4h[0] //// y4 * cos4(part of c0 and c1)
+
+    smull           v18.4s, v3.4h, v1.4h[2] //// y2 * sin2 (q3 is freed by this time)(part of d1)
+    smull           v6.4s, v3.4h, v0.4h[2] //// y2 * cos2(part of d0)
+
+
+    smlal           v24.4s, v8.4h, v1.4h[1] //// y1 * cos1 + y3 * cos3 + y5 * sin3(part of b0)
+    smlsl           v26.4s, v8.4h, v0.4h[1] //// y1 * cos3 - y3 * sin1 - y5 * cos1(part of b1)
+    smlal           v28.4s, v8.4h, v1.4h[3] //// y1 * sin3 - y3 * cos1 + y5 * sin1(part of b2)
+    smlal           v30.4s, v8.4h, v0.4h[3] //// y1 * sin1 - y3 * sin3 + y5 * cos3(part of b3)
+
+    smlsl           v18.4s, v5.4h, v0.4h[2] //// d1 = y2 * sin2 - y6 * cos2(part of a0 and a1)
+    smlal           v6.4s, v5.4h, v1.4h[2] //// d0 = y2 * cos2 + y6 * sin2(part of a0 and a1)
+
+    add             v2.4s, v20.4s , v22.4s //// c0 = y0 * cos4 + y4 * cos4(part of a0 and a1)
+    sub             v20.4s, v20.4s , v22.4s //// c1 = y0 * cos4 - y4 * cos4(part of a0 and a1)
+
+    smlal           v24.4s, v9.4h, v1.4h[3] //// b0 = y1 * cos1 + y3 * cos3 + y5 * sin3 + y7 * sin1(part of x0,x7)
+    smlsl           v26.4s, v9.4h, v1.4h[1] //// b1 = y1 * cos3 - y3 * sin1 - y5 * cos1 - y7 * sin3(part of x1,x6)
+    smlal           v28.4s, v9.4h, v0.4h[3] //// b2 = y1 * sin3 - y3 * cos1 + y5 * sin1 + y7 * cos3(part of x2,x5)
+    smlsl           v30.4s, v9.4h, v0.4h[1] //// b3 = y1 * sin1 - y3 * sin3 + y5 * cos3 - y7 * cos1(part of x3,x4)
+
+    sub             v22.4s, v2.4s , v6.4s //// a3 = c0 - d0(part of x3,x4)
+    add             v4.4s, v2.4s , v6.4s ////    a0 = c0 + d0(part of x0,x7)
+
+
+    add             v2.4s, v4.4s , v24.4s
+
+    sub             v6.4s, v4.4s , v24.4s
+
+    add             v8.4s, v22.4s , v30.4s
+
+    sub             v24.4s, v22.4s , v30.4s
+
+    sqrshrn         v5.4h, v8.4s, #idct_stg2_shift
+    sqrshrn         v2.4h, v2.4s, #idct_stg2_shift
+    sqrshrn         v9.4h, v6.4s, #idct_stg2_shift
+    sqrshrn         v6.4h, v24.4s, #idct_stg2_shift
+
+    sub             v22.4s, v20.4s , v18.4s //// a2 = c1 - d1(part of x2,x5)
+    add             v18.4s, v20.4s , v18.4s //// a1 = c1 + d1(part of x1,x6)
+
+
+    add             v30.4s, v22.4s , v28.4s
+
+    sub             v24.4s, v22.4s , v28.4s
+
+    add             v28.4s, v18.4s , v26.4s
+
+    sub             v22.4s, v18.4s , v26.4s
+    sqrshrn         v4.4h, v30.4s, #idct_stg2_shift
+    sqrshrn         v7.4h, v24.4s, #idct_stg2_shift
+    sqrshrn         v3.4h, v28.4s, #idct_stg2_shift
+    sqrshrn         v8.4h, v22.4s, #idct_stg2_shift
+
+
+
+    umov            x19, v25.d[0]
+    umov            x20, v25.d[1]
+
+    trn1            v27.4h, v2.4h, v3.4h
+    trn2            v29.4h, v2.4h, v3.4h
+    trn1            v25.4h, v4.4h, v5.4h
+    trn2            v31.4h, v4.4h, v5.4h
+
+    trn1            v2.2s, v27.2s, v25.2s
+    trn2            v4.2s, v27.2s, v25.2s
+    trn1            v3.2s, v29.2s, v31.2s
+    trn2            v5.2s, v29.2s, v31.2s
+
+    trn1            v27.4h, v6.4h, v7.4h
+    trn2            v29.4h, v6.4h, v7.4h
+    trn1            v25.4h, v8.4h, v9.4h
+    trn2            v31.4h, v8.4h, v9.4h
+
+    trn1            v6.2s, v27.2s, v25.2s
+    trn2            v8.2s, v27.2s, v25.2s
+    trn1            v7.2s, v29.2s, v31.2s
+    trn2            v9.2s, v29.2s, v31.2s
+
+    mov             v25.d[0], x19
+    mov             v25.d[1], x20
+
+
+
+    smull           v24.4s, v14.4h, v0.4h[1] //// y1 * cos1(part of b0)
+    smull           v26.4s, v14.4h, v0.4h[3] //// y1 * cos3(part of b1)
+    smull           v28.4s, v14.4h, v1.4h[1] //// y1 * sin3(part of b2)
+    smull           v30.4s, v14.4h, v1.4h[3] //// y1 * sin1(part of b3)
+    smlal           v24.4s, v15.4h, v0.4h[3] //// y1 * cos1 + y3 * cos3(part of b0)
+    smlsl           v26.4s, v15.4h, v1.4h[3] //// y1 * cos3 - y3 * sin1(part of b1)
+    smlsl           v28.4s, v15.4h, v0.4h[1] //// y1 * sin3 - y3 * cos1(part of b2)
+    smlsl           v30.4s, v15.4h, v1.4h[1] //// y1 * sin1 - y3 * sin3(part of b3)
+    smull           v20.4s, v10.4h, v0.4h[0] //// y0 * cos4(part of c0 and c1)
+    smull           v22.4s, v12.4h, v0.4h[0] //// y4 * cos4(part of c0 and c1)
+    smull           v18.4s, v11.4h, v1.4h[2] //// y2 * sin2 (q7 is freed by this time)(part of d1)
+    smull           v14.4s, v11.4h, v0.4h[2] //// y2 * cos2(part of d0)
+    smlal           v24.4s, v16.4h, v1.4h[1] //// y1 * cos1 + y3 * cos3 + y5 * sin3(part of b0)
+
+    add             x4, x2, x8, lsl #1  // x4 = x2 + pred_strd * 2    => x4 points to 3rd row of pred data
+    smlsl           v26.4s, v16.4h, v0.4h[1] //// y1 * cos3 - y3 * sin1 - y5 * cos1(part of b1)
+
+    add             x5, x8, x8, lsl #1  //
+    smlal           v28.4s, v16.4h, v1.4h[3] //// y1 * sin3 - y3 * cos1 + y5 * sin1(part of b2)
+
+    add             x0, x3, x7, lsl #1  // x0 points to 3rd row of dest data
+    smlal           v30.4s, v16.4h, v0.4h[3] //// y1 * sin1 - y3 * sin3 + y5 * cos3(part of b3)
+
+    add             x10, x7, x7, lsl #1 //
+    smlsl           v18.4s, v13.4h, v0.4h[2] //// d1 = y2 * sin2 - y6 * cos2(part of a0 and a1)
+
+
+    smlal           v14.4s, v13.4h, v1.4h[2] //// d0 = y2 * cos2 + y6 * sin2(part of a0 and a1)
+
+    add             v12.4s, v20.4s , v22.4s //// c0 = y0 * cos4 + y4 * cos4(part of a0 and a1)
+    sub             v20.4s, v20.4s , v22.4s //// c1 = y0 * cos4 - y4 * cos4(part of a0 and a1)
+
+    smlal           v24.4s, v17.4h, v1.4h[3] //// b0 = y1 * cos1 + y3 * cos3 + y5 * sin3 + y7 * sin1(part of x0,x7)
+
+    // swapping v3 and v6
+    mov             v31.d[0], v3.d[0]
+    mov             v3.d[0], v6.d[0]
+    mov             v6.d[0], v31.d[0]
+
+    smlsl           v26.4s, v17.4h, v1.4h[1] //// b1 = y1 * cos3 - y3 * sin1 - y5 * cos1 - y7 * sin3(part of x1,x6)
+    // swapping v5 and v8
+    mov             v31.d[0], v5.d[0]
+    mov             v5.d[0], v8.d[0]
+    mov             v8.d[0], v31.d[0]
+
+    smlal           v28.4s, v17.4h, v0.4h[3] //// b2 = y1 * sin3 - y3 * cos1 + y5 * sin1 + y7 * cos3(part of x2,x5)
+    smlsl           v30.4s, v17.4h, v0.4h[1] //// b3 = y1 * sin1 - y3 * sin3 + y5 * cos3 - y7 * cos1(part of x3,x4)
+
+    sub             v22.4s, v12.4s , v14.4s //// a3 = c0 - d0(part of x3,x4)
+    add             v12.4s, v12.4s , v14.4s ////    a0 = c0 + d0(part of x0,x7)
+
+
+    add             v0.4s, v12.4s , v24.4s
+
+
+    sub             v24.4s, v12.4s , v24.4s
+
+
+    add             v12.4s, v22.4s , v30.4s
+
+
+    sub             v14.4s, v22.4s , v30.4s
+
+    sqrshrn         v10.4h, v0.4s, #idct_stg2_shift
+    sqrshrn         v17.4h, v24.4s, #idct_stg2_shift
+    sqrshrn         v13.4h, v12.4s, #idct_stg2_shift
+    sqrshrn         v14.4h, v14.4s, #idct_stg2_shift
+
+    sub             v22.4s, v20.4s , v18.4s //// a2 = c1 - d1(part of x2,x5)
+    add             v18.4s, v20.4s , v18.4s //// a1 = c1 + d1(part of x1,x6)
+
+
+    add             v0.4s, v22.4s , v28.4s
+
+
+    sub             v24.4s, v22.4s , v28.4s
+
+
+    add             v28.4s, v18.4s , v26.4s
+
+
+    sub             v26.4s, v18.4s , v26.4s
+    ld1             {v18.8b}, [x2], x8
+
+    sqrshrn         v12.4h, v0.4s, #idct_stg2_shift
+    ld1             {v20.8b}, [x2], x5
+
+
+    sqrshrn         v15.4h, v24.4s, #idct_stg2_shift
+    ld1             {v19.8b}, [x2], x8
+
+
+
+
+    sqrshrn         v11.4h, v28.4s, #idct_stg2_shift
+    ld1             {v22.8b}, [x4], x8
+
+
+
+
+    sqrshrn         v16.4h, v26.4s, #idct_stg2_shift
+    ld1             {v21.8b}, [x2], x5
+
+
+
+
+pred_buff_addition:
+
+    umov            x19, v25.d[0]
+    umov            x20, v25.d[1]
+
+    trn1            v27.4h, v10.4h, v11.4h
+    trn2            v29.4h, v10.4h, v11.4h
+    trn1            v25.4h, v12.4h, v13.4h
+    trn2            v31.4h, v12.4h, v13.4h
+
+    trn1            v10.2s, v27.2s, v25.2s
+    trn2            v12.2s, v27.2s, v25.2s
+    trn1            v11.2s, v29.2s, v31.2s
+    trn2            v13.2s, v29.2s, v31.2s
+
+    trn1            v27.4h, v14.4h, v15.4h
+    trn2            v29.4h, v14.4h, v15.4h
+    trn1            v25.4h, v16.4h, v17.4h
+    trn2            v31.4h, v16.4h, v17.4h
+
+    trn1            v14.2s, v27.2s, v25.2s
+    trn2            v16.2s, v27.2s, v25.2s
+    trn1            v15.2s, v29.2s, v31.2s
+    trn2            v17.2s, v29.2s, v31.2s
+
+
+    mov             v25.d[0], x19
+    mov             v25.d[1], x20
+
+
+    ld1             {v24.8b}, [x4], x5
+    ld1             {v23.8b}, [x4], x8
+    ld1             {v25.8b}, [x4], x5
+    mov             v2.d[1], v3.d[0]
+    mov             v4.d[1], v5.d[0]
+    mov             v6.d[1], v7.d[0]
+    mov             v8.d[1], v9.d[0]
+    uaddw           v2.8h, v2.8h , v18.8b
+    uaddw           v4.8h, v4.8h , v22.8b
+    uaddw           v6.8h, v6.8h , v20.8b
+    uaddw           v8.8h, v8.8h , v24.8b
+
+    // swapping v11 and v14
+    mov             v31.d[0], v11.d[0]
+    mov             v11.d[0], v14.d[0]
+    mov             v14.d[0], v31.d[0]
+
+    // swapping v13 and v16
+    mov             v31.d[0], v13.d[0]
+    mov             v13.d[0], v16.d[0]
+    mov             v16.d[0], v31.d[0]
+// row values stored in the q register.
+
+//q1 :x0
+//q3: x1
+//q2: x2
+//q4: x3
+//q5: x4
+//q7: x5
+//q6: x6
+//q8: x7
+
+
+
+///// adding the prediction buffer
+
+
+
+
+
+
+
+
+
+    // load prediction data
+
+
+
+
+
+    //adding recon with prediction
+
+
+
+
+    mov             v10.d[1], v11.d[0]
+    mov             v12.d[1], v13.d[0]
+    mov             v14.d[1], v15.d[0]
+    mov             v16.d[1], v17.d[0]
+    uaddw           v10.8h, v10.8h , v19.8b
+    sqxtun          v2.8b, v2.8h
+    uaddw           v14.8h, v14.8h , v21.8b
+    sqxtun          v4.8b, v4.8h
+    uaddw           v12.8h, v12.8h , v23.8b
+    sqxtun          v6.8b, v6.8h
+    uaddw           v16.8h, v16.8h , v25.8b
+    sqxtun          v8.8b, v8.8h
+
+
+
+
+
+
+
+    st1             {v2.8b}, [x3], x7
+    sqxtun          v10.8b, v10.8h
+    st1             {v6.8b}, [x3], x10
+    sqxtun          v14.8b, v14.8h
+    st1             {v4.8b}, [x0], x7
+    sqxtun          v12.8b, v12.8h
+    st1             {v8.8b}, [x0], x10
+    sqxtun          v16.8b, v16.8h
+
+
+
+
+
+
+
+    st1             {v10.8b}, [x3], x7
+    st1             {v14.8b}, [x3], x10
+    st1             {v12.8b}, [x0], x7
+    st1             {v16.8b}, [x0], x10
+
+
+
+
+    // ldmfd sp!,{x4-x12,pc}
+    ldp             x19, x20, [sp], #16
+    pop_v_regs
+    ret
+
+
+
+

diff --git a/common/armv8/impeg2_inter_pred.s b/common/armv8/impeg2_inter_pred.s
new file mode 100644
index 0000000..98ade45
--- /dev/null
+++ b/common/armv8/impeg2_inter_pred.s

@@ -0,0 +1,814 @@
+//******************************************************************************
+//*
+//* Copyright (C) 2015 The Android Open Source Project
+//*
+//* Licensed under the Apache License, Version 2.0 (the "License");
+//* you may not use this file except in compliance with the License.
+//* You may obtain a copy of the License at:
+//*
+//* http://www.apache.org/licenses/LICENSE-2.0
+//*
+//* Unless required by applicable law or agreed to in writing, software
+//* distributed under the License is distributed on an "AS IS" BASIS,
+//* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+//* See the License for the specific language governing permissions and
+//* limitations under the License.
+//*
+//*****************************************************************************
+//* Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+//*/
+
+///*
+////----------------------------------------------------------------------------
+//// File Name            : impeg2_inter_pred.s
+////
+//// Description          : This file has motion compensation related
+////                        interpolation functions on Neon + CortexA-8 platform
+////
+//// Reference Document   :
+////
+//// Revision History     :
+////      Date            Author                  Detail Description
+////   ------------    ----------------    ----------------------------------
+////   18 jun 2010      S Hamsalekha              Created
+////
+////-------------------------------------------------------------------------
+//*/
+
+///*
+//// ----------------------------------------------------------------------------
+//// Include Files
+//// ----------------------------------------------------------------------------
+//*/
+//              PRESERVE8
+.text
+.include "impeg2_neon_macros.s"
+
+///*
+//// ----------------------------------------------------------------------------
+//// Struct/Union Types and Define
+//// ----------------------------------------------------------------------------
+//*/
+
+
+///*
+//// ----------------------------------------------------------------------------
+//// Static Global Data section variables
+//// ----------------------------------------------------------------------------
+//*/
+//// -------------------------- NONE --------------------------------------------
+
+
+///*
+//// ----------------------------------------------------------------------------
+//// Static Prototype Functions
+//// ----------------------------------------------------------------------------
+//*/
+//// -------------------------- NONE --------------------------------------------
+
+///*
+//// ----------------------------------------------------------------------------
+//// Exported functions
+//// ----------------------------------------------------------------------------
+//*/
+
+
+///*
+////---------------------------------------------------------------------------
+//// Function Name      :   impeg2_copy_mb_av8()
+////
+//// Detail Description : Copies one MB worth of data from src to the dst
+////
+//// Inputs             : x0 - pointer to src
+////                      x1 - pointer to dst
+////                      x2 - source width
+////                      x3 - destination width
+//// Registers Used     : v0, v1
+////
+//// Stack Usage        : 64 bytes
+////
+//// Outputs            :
+////
+//// Return Data        : None
+////
+//// Programming Note   : <program limitation>
+////-----------------------------------------------------------------------------
+//*/
+
+
+
+.global impeg2_copy_mb_av8
+
+
+impeg2_copy_mb_av8:
+
+//STMFD   x13!,{x4,x5,x12,x14}
+    push_v_regs
+
+
+    ldr             x4, [x0]            //src->y
+    ldr             x5, [x1]            //dst->y
+
+    //Read one row of data from the src
+    ld1             {v0.8b, v1.8b}, [x4], x2 //Load and increment src
+    st1             {v0.8b, v1.8b}, [x5], x3 //Store and increment dst
+
+    ////Repeat 15 times for y
+    ld1             {v0.8b, v1.8b}, [x4], x2 //Load and increment src
+    st1             {v0.8b, v1.8b}, [x5], x3 //Store and increment dst
+    ld1             {v0.8b, v1.8b}, [x4], x2 //Load and increment src
+    st1             {v0.8b, v1.8b}, [x5], x3 //Store and increment dst
+    ld1             {v0.8b, v1.8b}, [x4], x2 //Load and increment src
+    st1             {v0.8b, v1.8b}, [x5], x3 //Store and increment dst
+    ld1             {v0.8b, v1.8b}, [x4], x2 //Load and increment src
+    st1             {v0.8b, v1.8b}, [x5], x3 //Store and increment dst
+    ld1             {v0.8b, v1.8b}, [x4], x2 //Load and increment src
+    st1             {v0.8b, v1.8b}, [x5], x3 //Store and increment dst
+    ld1             {v0.8b, v1.8b}, [x4], x2 //Load and increment src
+    st1             {v0.8b, v1.8b}, [x5], x3 //Store and increment dst
+    ld1             {v0.8b, v1.8b}, [x4], x2 //Load and increment src
+    st1             {v0.8b, v1.8b}, [x5], x3 //Store and increment dst
+    ld1             {v0.8b, v1.8b}, [x4], x2 //Load and increment src
+    st1             {v0.8b, v1.8b}, [x5], x3 //Store and increment dst
+    ld1             {v0.8b, v1.8b}, [x4], x2 //Load and increment src
+    st1             {v0.8b, v1.8b}, [x5], x3 //Store and increment dst
+    ld1             {v0.8b, v1.8b}, [x4], x2 //Load and increment src
+    st1             {v0.8b, v1.8b}, [x5], x3 //Store and increment dst
+    ld1             {v0.8b, v1.8b}, [x4], x2 //Load and increment src
+    st1             {v0.8b, v1.8b}, [x5], x3 //Store and increment dst
+    ld1             {v0.8b, v1.8b}, [x4], x2 //Load and increment src
+    st1             {v0.8b, v1.8b}, [x5], x3 //Store and increment dst
+    ld1             {v0.8b, v1.8b}, [x4], x2 //Load and increment src
+    st1             {v0.8b, v1.8b}, [x5], x3 //Store and increment dst
+    ld1             {v0.8b, v1.8b}, [x4], x2 //Load and increment src
+    st1             {v0.8b, v1.8b}, [x5], x3 //Store and increment dst
+    ld1             {v0.8b, v1.8b}, [x4], x2 //Load and increment src
+    st1             {v0.8b, v1.8b}, [x5], x3 //Store and increment dst
+
+    lsr             x2, x2, #1          //src_offset /= 2
+    lsr             x3, x3, #1          //dst_offset /= 2
+
+    ldr             x4, [x0, #8]        //src->u
+    ldr             x5, [x1, #8]        //dst->u
+
+    //Read one row of data from the src
+    ld1             {v0.8b}, [x4], x2   //Load and increment src
+    st1             {v0.8b}, [x5], x3   //Store and increment dst
+
+    ////Repeat 7 times for u
+    ld1             {v0.8b}, [x4], x2   //Load and increment src
+    st1             {v0.8b}, [x5], x3   //Store and increment dst
+    ld1             {v0.8b}, [x4], x2   //Load and increment src
+    st1             {v0.8b}, [x5], x3   //Store and increment dst
+    ld1             {v0.8b}, [x4], x2   //Load and increment src
+    st1             {v0.8b}, [x5], x3   //Store and increment dst
+    ld1             {v0.8b}, [x4], x2   //Load and increment src
+    st1             {v0.8b}, [x5], x3   //Store and increment dst
+    ld1             {v0.8b}, [x4], x2   //Load and increment src
+    st1             {v0.8b}, [x5], x3   //Store and increment dst
+    ld1             {v0.8b}, [x4], x2   //Load and increment src
+    st1             {v0.8b}, [x5], x3   //Store and increment dst
+    ld1             {v0.8b}, [x4], x2   //Load and increment src
+    st1             {v0.8b}, [x5], x3   //Store and increment dst
+
+    ldr             x4, [x0, #16]       //src->v
+    ldr             x5, [x1, #16]       //dst->v
+
+    //Read one row of data from the src
+    ld1             {v0.8b}, [x4], x2   //Load and increment src
+    st1             {v0.8b}, [x5], x3   //Store and increment dst
+
+    ////Repeat 7 times for v
+    ld1             {v0.8b}, [x4], x2   //Load and increment src
+    st1             {v0.8b}, [x5], x3   //Store and increment dst
+    ld1             {v0.8b}, [x4], x2   //Load and increment src
+    st1             {v0.8b}, [x5], x3   //Store and increment dst
+    ld1             {v0.8b}, [x4], x2   //Load and increment src
+    st1             {v0.8b}, [x5], x3   //Store and increment dst
+    ld1             {v0.8b}, [x4], x2   //Load and increment src
+    st1             {v0.8b}, [x5], x3   //Store and increment dst
+    ld1             {v0.8b}, [x4], x2   //Load and increment src
+    st1             {v0.8b}, [x5], x3   //Store and increment dst
+    ld1             {v0.8b}, [x4], x2   //Load and increment src
+    st1             {v0.8b}, [x5], x3   //Store and increment dst
+    ld1             {v0.8b}, [x4], x2   //Load and increment src
+    st1             {v0.8b}, [x5], x3   //Store and increment dst
+
+//LDMFD   x13!,{x4,x5,x12,PC}
+    pop_v_regs
+    ret
+
+
+///*
+////---------------------------------------------------------------------------
+//// Function Name      :   impeg2_mc_fullx_halfy_8x8_av8()
+////
+//// Detail Description : This function pastes the reference block in the
+////                      current frame buffer.This function is called for
+////                      blocks that are not coded and have motion vectors
+////                      with a half pel resolution.
+////
+//// Inputs             : x0 - out    : Current Block Pointer
+////                      x1 - ref     : Refernce Block Pointer
+////                      x2 - ref_wid   : Refernce Block Width
+////                      x3 - out_wid    @ Current Block Width
+////
+//// Registers Used     : x14, D0-D9
+////
+//// Stack Usage        : 64 bytes
+////
+//// Outputs            : The Motion Compensated Block
+////
+//// Return Data        : None
+////
+//// Programming Note   : <program limitation>
+////-----------------------------------------------------------------------------
+//*/
+
+.global impeg2_mc_fullx_halfy_8x8_av8
+
+impeg2_mc_fullx_halfy_8x8_av8:
+
+//STMFD       x13!,{x12,x14}
+    push_v_regs
+    add             x14, x1, x2
+    lsl             x2, x2, #1
+
+///* Load 8 + 1 rows from reference block */
+///* Do the addition with out rounding off as rounding value is 1 */
+    ld1             {v0.8b}, [x1], x2   //// first row hence x1 = D0
+    ld1             {v2.8b}, [x14], x2  //// second row hence x2 = D2
+    ld1             {v4.8b}, [x1], x2   //// third row hence x3 = D4
+    ld1             {v6.8b}, [x14], x2  //// fourth row hence x4 = D6
+    ld1             {v1.8b}, [x1], x2   //// fifth row hence x5 = D1
+    ld1             {v3.8b}, [x14], x2  //// sixth row hence x6 = D3
+    urhadd          v9.8b, v1.8b , v6.8b //// estimated row 4 = D9
+    ld1             {v5.8b}, [x1], x2   //// seventh row hence x7 = D5
+    urhadd          v0.16b, v0.16b , v2.16b //// estimated row 1 = D0, row 5 = D1
+    urhadd          v1.16b, v1.16b , v3.16b //// estimated row 1 = D0, row 5 = D1
+    ld1             {v7.8b}, [x14], x2  //// eighth row hence x8 = D7
+    urhadd          v2.16b, v2.16b , v4.16b //// estimated row 2 = D2, row 6 = D3
+    urhadd          v3.16b, v3.16b , v5.16b //// estimated row 2 = D2, row 6 = D3
+    ld1             {v8.8b}, [x1], x2   //// ninth row hence x9 = D8
+    urhadd          v4.16b, v4.16b , v6.16b //// estimated row 3 = D4, row 7 = D5
+    urhadd          v5.16b, v5.16b , v7.16b //// estimated row 3 = D4, row 7 = D5
+
+    add             x14, x0, x3
+    lsl             x3, x3, #1
+
+///* Store the eight rows calculated above */
+    st1             {v2.8b}, [x14], x3  //// second row hence D2
+    urhadd          v7.8b, v7.8b , v8.8b //// estimated row 8 = D7
+    st1             {v0.8b}, [x0], x3   //// first row hence D0
+    st1             {v9.8b}, [x14], x3  //// fourth row hence D9
+    st1             {v4.8b}, [x0], x3   //// third row hence D4
+    st1             {v3.8b}, [x14], x3  //// sixth row hence x6 = D3
+    st1             {v1.8b}, [x0], x3   //// fifth row hence x5 = D1
+    st1             {v7.8b}, [x14], x3  //// eighth row hence x8 = D7
+    st1             {v5.8b}, [x0], x3   //// seventh row hence x7 = D5
+
+// LDMFD sp!,{x12,pc}
+    pop_v_regs
+    ret
+
+
+
+
+
+///*
+////---------------------------------------------------------------------------
+//// Function Name      :   impeg2_mc_halfx_fully_8x8_av8()
+////
+//// Detail Description : This function pastes the reference block in the
+////                      current frame buffer.This function is called for
+////                      blocks that are not coded and have motion vectors
+////                      with a half pel resolutionand VopRoundingType is 0 ..
+////
+//// Inputs             : x0 - out    : Current Block Pointer
+////                      x1 - ref     : Refernce Block Pointer
+////                      x2 - ref_wid   : Refernce Block Width
+////                      x3 - out_wid    @ Current Block Width
+////
+//// Registers Used     : x12, x14, v0-v10, v12-v14, v16-v18, v20-v22
+
+////
+//// Stack Usage        : 64 bytes
+////
+//// Outputs            : The Motion Compensated Block
+////
+//// Return Data        : None
+////
+//// Programming Note   : <program limitation>
+////-----------------------------------------------------------------------------
+//*/
+
+
+
+.global impeg2_mc_halfx_fully_8x8_av8
+
+
+
+impeg2_mc_halfx_fully_8x8_av8:
+
+    // STMFD sp!,{x12,x14}
+    push_v_regs
+
+    add             x14, x1, x2, lsl #2
+
+    add             x12, x0, x3, lsl#2
+
+    ld1             {v0.8b, v1.8b}, [x1], x2 //load 16 pixels of  row1
+
+    ld1             {v2.8b, v3.8b}, [x14], x2 // row5
+
+
+    ld1             {v4.8b, v5.8b}, [x1], x2 //load 16 pixels row2
+
+    ld1             {v6.8b, v7.8b}, [x14], x2 //row6
+
+
+    ext             v8.8b, v0.8b , v1.8b , #1
+
+    ext             v12.8b, v2.8b , v3.8b , #1
+
+    ext             v16.8b, v4.8b , v5.8b , #1
+
+    ext             v20.8b, v6.8b , v7.8b , #1
+
+
+    ld1             {v9.8b, v10.8b}, [x1], x2 //load row3
+
+    ld1             {v13.8b, v14.8b}, [x14], x2 //load row7
+
+    ld1             {v17.8b, v18.8b}, [x1], x2 //load  row4
+
+    ld1             {v21.8b, v22.8b}, [x14], x2 //load  row8
+
+
+    ext             v1.8b, v9.8b , v10.8b , #1
+
+    ext             v3.8b, v13.8b , v14.8b , #1
+
+
+
+    ext             v5.8b, v17.8b , v18.8b , #1
+
+    ext             v7.8b, v21.8b , v22.8b , #1
+
+
+    urhadd          v0.16b, v0.16b , v8.16b //operate on row1 and row3
+    urhadd          v1.16b, v1.16b , v9.16b //operate on row1 and row3
+
+    urhadd          v2.16b, v2.16b , v12.16b //operate on row5 and row7
+    urhadd          v3.16b, v3.16b , v13.16b //operate on row5 and row7
+
+
+    urhadd          v4.16b, v4.16b , v16.16b //operate on row2 and row4
+    urhadd          v5.16b, v5.16b , v17.16b //operate on row2 and row4
+
+
+    urhadd          v6.16b, v6.16b , v20.16b //operate on row6 and row8
+    urhadd          v7.16b, v7.16b , v21.16b //operate on row6 and row8
+
+    st1             {v0.8b}, [x0], x3   //store row1
+
+    st1             {v2.8b}, [x12], x3  //store row5
+
+    st1             {v4.8b}, [x0], x3   //store row2
+
+    st1             {v6.8b}, [x12], x3  //store row6
+
+    st1             {v1.8b}, [x0], x3   //store row3
+
+    st1             {v3.8b}, [x12], x3  //store row7
+
+    st1             {v5.8b}, [x0], x3   //store row4
+
+    st1             {v7.8b}, [x12], x3  //store row8
+
+
+
+    // LDMFD sp!,{x12,pc}
+    pop_v_regs
+    ret
+
+
+
+
+
+
+
+///*
+////---------------------------------------------------------------------------
+//// Function Name      :   impeg2_mc_halfx_halfy_8x8_av8()
+////
+//// Detail Description : This function pastes the reference block in the
+////                      current frame buffer.This function is called for
+////                      blocks that are not coded and have motion vectors
+////                      with a half pel resolutionand VopRoundingType is 0 ..
+////
+//// Inputs             : x0 - out    : Current Block Pointer
+////                      x1 - ref     : Refernce Block Pointer
+////                      x2 - ref_wid   : Refernce Block Width
+////                      x3 - out_wid    @ Current Block Width
+////
+//// Registers Used     : x14, v0-v18, v22, v24, v26, v28, v30
+
+////
+//// Stack Usage        : 64 bytes
+////
+//// Outputs            : The Motion Compensated Block
+////
+//// Return Data        : None
+////
+//// Programming Note   : <program limitation>
+////-----------------------------------------------------------------------------
+//*/
+
+
+.global impeg2_mc_halfx_halfy_8x8_av8
+
+impeg2_mc_halfx_halfy_8x8_av8:
+
+    // STMFD sp!,{x12,x14}
+    push_v_regs
+
+    add             x14, x1, x2, lsl #2
+
+    ld1             {v0.8b, v1.8b}, [x1], x2 //load 16 pixels of  row1
+
+    ld1             {v2.8b, v3.8b}, [x14], x2 // row5
+
+    ld1             {v4.8b, v5.8b}, [x1], x2 //load 16 pixels row2
+
+    ld1             {v6.8b, v7.8b}, [x14], x2 //row6
+
+    ext             v1.8b, v0.8b , v1.8b , #1
+
+
+
+    ext             v3.8b, v2.8b , v3.8b , #1
+
+
+
+    ext             v5.8b, v4.8b , v5.8b , #1
+
+    ext             v7.8b, v6.8b , v7.8b , #1
+
+
+
+
+    ld1             {v8.8b, v9.8b}, [x1], x2 //load row3
+
+
+
+    ld1             {v10.8b, v11.8b}, [x14], x2 //load row7
+
+    ld1             {v12.8b, v13.8b}, [x1], x2 //load  row4
+
+    ld1             {v14.8b, v15.8b}, [x14], x2 //load  row8
+
+    ext             v9.8b, v8.8b , v9.8b , #1
+
+    ld1             {v16.8b, v17.8b}, [x14], x2 //load  row9
+
+
+
+
+
+    ext             v11.8b, v10.8b , v11.8b , #1
+
+
+
+    ext             v13.8b, v12.8b , v13.8b , #1
+
+
+
+    ext             v15.8b, v14.8b , v15.8b , #1
+
+    ext             v17.8b, v16.8b , v17.8b , #1
+
+
+    //interpolation in x direction
+
+    uaddl           v0.8h, v0.8b, v1.8b //operate row1
+
+    uaddl           v2.8h, v2.8b, v3.8b //operate row5
+
+    uaddl           v4.8h, v4.8b, v5.8b //operate row2
+
+    uaddl           v6.8h, v6.8b, v7.8b //operate row6
+
+    uaddl           v8.8h, v8.8b, v9.8b //operate row3
+
+    uaddl           v10.8h, v10.8b, v11.8b //operate row7
+
+    uaddl           v12.8h, v12.8b, v13.8b //operate row4
+
+    uaddl           v14.8h, v14.8b, v15.8b //operate row8
+
+    uaddl           v16.8h, v16.8b, v17.8b //operate row9
+
+    //interpolation in y direction
+
+    add             x14, x0, x3, lsl #2
+
+
+
+    add             v18.8h, v0.8h , v4.8h //operate row1 and row2
+
+    add             v26.8h, v2.8h , v6.8h //operate row5 and row6
+
+    add             v20.8h, v4.8h , v8.8h //operate row2 and row3
+
+    add             v28.8h, v6.8h , v10.8h //operate row6 and row7
+
+    rshrn           v18.8b, v18.8h, #2  //row1
+
+    rshrn           v26.8b, v26.8h, #2  //row5
+
+    rshrn           v20.8b, v20.8h, #2  //row2
+
+    rshrn           v28.8b, v28.8h, #2  //row6
+
+    add             v22.8h, v8.8h , v12.8h //operate row3 and row4
+
+    st1             {v18.8b}, [x0], x3  //store row1
+
+    add             v30.8h, v10.8h , v14.8h //operate row7 and row8
+
+    st1             {v26.8b}, [x14], x3 //store row5
+
+    add             v24.8h, v12.8h , v2.8h //operate row4 and row5
+
+    st1             {v20.8b}, [x0], x3  //store row2
+
+    add             v14.8h, v14.8h , v16.8h //operate row8 and row9
+
+    st1             {v28.8b}, [x14], x3 //store row6
+
+
+
+    rshrn           v22.8b, v22.8h, #2  //row3
+
+    rshrn           v30.8b, v30.8h, #2  //row7
+
+    rshrn           v24.8b, v24.8h, #2  //row4
+
+    rshrn           v14.8b, v14.8h, #2  //row8
+
+
+    st1             {v22.8b}, [x0], x3  //store row3
+    st1             {v30.8b}, [x14], x3 //store row7
+    st1             {v24.8b}, [x0], x3  //store row4
+    st1             {v14.8b}, [x14], x3 //store row8
+
+
+
+    // LDMFD sp!,{x12,pc}
+    pop_v_regs
+    ret
+
+
+
+
+///*
+////---------------------------------------------------------------------------
+//// Function Name      :   impeg2_mc_fullx_fully_8x8_av8()
+////
+//// Detail Description : This function pastes the reference block in the
+////                      current frame buffer.This function is called for
+////                      blocks that are not coded and have motion vectors
+////                      with a half pel resolutionand ..
+////
+//// Inputs             : x0 - out    : Current Block Pointer
+////                      x1 - ref     : Refernce Block Pointer
+////                      x2 - ref_wid   : Refernce Block Width
+////                      x3 - out_wid    @ Current Block Width
+////
+//// Registers Used     : x12, x14, v0-v3
+
+////
+//// Stack Usage        : 64 bytes
+////
+//// Outputs            : The Motion Compensated Block
+////
+//// Return Data        : None
+////
+//// Programming Note   : <program limitation>
+////-----------------------------------------------------------------------------
+//*/
+
+
+.global impeg2_mc_fullx_fully_8x8_av8
+impeg2_mc_fullx_fully_8x8_av8:
+
+
+    // STMFD sp!,{x12,x14}
+    push_v_regs
+
+    add             x14, x1, x2, lsl #2
+
+    add             x12, x0, x3, lsl #2
+
+
+    ld1             {v0.8b}, [x1], x2   //load row1
+
+    ld1             {v1.8b}, [x14], x2  //load row4
+
+    ld1             {v2.8b}, [x1], x2   //load row2
+
+    ld1             {v3.8b}, [x14], x2  //load row5
+
+
+    st1             {v0.8b}, [x0], x3   //store row1
+
+    st1             {v1.8b}, [x12], x3  //store row4
+
+    st1             {v2.8b}, [x0], x3   //store row2
+
+    st1             {v3.8b}, [x12], x3  //store row5
+
+
+    ld1             {v0.8b}, [x1], x2   //load row3
+
+    ld1             {v1.8b}, [x14], x2  //load row6
+
+    ld1             {v2.8b}, [x1], x2   //load row4
+
+    ld1             {v3.8b}, [x14], x2  //load row8
+
+
+    st1             {v0.8b}, [x0], x3   //store row3
+
+    st1             {v1.8b}, [x12], x3  //store row6
+
+    st1             {v2.8b}, [x0], x3   //store row4
+
+    st1             {v3.8b}, [x12], x3  //store row8
+
+
+    // LDMFD sp!,{x12,pc}
+    pop_v_regs
+    ret
+
+
+
+
+///*
+////---------------------------------------------------------------------------
+//// Function Name      :   impeg2_interpolate_av8()
+////
+//// Detail Description : interpolates two buffers and adds pred
+////
+//// Inputs             : x0 - pointer to src1
+////                      x1 - pointer to src2
+////                      x2 - dest buf
+////                         x3 - dst stride
+//// Registers Used     : x12, v0-v15
+////
+//// Stack Usage        : 64 bytes
+////
+//// Outputs            : The Motion Compensated Block
+////
+//// Return Data        : None
+////
+//// Programming Note   : <program limitation>
+////-----------------------------------------------------------------------------
+//*/
+
+
+.global impeg2_interpolate_av8
+
+
+impeg2_interpolate_av8:
+
+//STMFD    x13!,{x4-x7,x12,x14}
+    push_v_regs
+
+    ldr             x4, [x0, #0]        //ptr_y src1
+
+    ldr             x5, [x1, #0]        //ptr_y src2
+
+    ldr             x7, [x2, #0]        //ptr_y dst buf
+
+    mov             x12, #4             //counter for number of blocks
+
+
+interp_lumablocks_stride:
+    ld1             {v0.16b}, [x4], #16 //row1 src1
+
+    ld1             {v2.16b}, [x4], #16 //row2 src1
+
+    ld1             {v4.16b}, [x4], #16 //row3 src1
+
+    ld1             {v6.16b}, [x4], #16 //row4 src1
+
+
+    ld1             {v8.16b}, [x5], #16 //row1 src2
+
+    ld1             {v10.16b}, [x5], #16 //row2 src2
+
+    ld1             {v12.16b}, [x5], #16 //row3 src2
+
+    ld1             {v14.16b}, [x5], #16 //row4 src2
+
+    urhadd          v0.16b, v0.16b , v8.16b //operate on row1
+
+    urhadd          v2.16b, v2.16b , v10.16b //operate on row2
+
+    urhadd          v4.16b, v4.16b , v12.16b //operate on row3
+
+    urhadd          v6.16b, v6.16b , v14.16b //operate on row4
+    st1             {v0.16b}, [x7], x3  //row1
+
+    st1             {v2.16b}, [x7], x3  //row2
+
+    st1             {v4.16b}, [x7], x3  //row3
+
+    st1             {v6.16b}, [x7], x3  //row4
+
+    subs            x12, x12, #1
+
+    bne             interp_lumablocks_stride
+
+
+    lsr             x3, x3, #1          //stride >> 1
+
+    ldr             x4, [x0, #8]        //ptr_u src1
+
+    ldr             x5, [x1, #8]        //ptr_u src2
+
+    ldr             x7 , [x2, #8]       //ptr_u dst buf
+
+    mov             x12, #2             //counter for number of blocks
+
+
+
+//chroma blocks
+
+interp_chromablocks_stride:
+    ld1             {v0.8b, v1.8b}, [x4], #16 //row1 & 2 src1
+
+    ld1             {v2.8b, v3.8b}, [x4], #16 //row3 & 4 src1
+
+    ld1             {v4.8b, v5.8b}, [x4], #16 //row5 & 6 src1
+
+    ld1             {v6.8b, v7.8b}, [x4], #16 //row7 & 8 src1
+
+
+    ld1             {v8.8b, v9.8b}, [x5], #16 //row1 & 2 src2
+
+    ld1             {v10.8b, v11.8b}, [x5], #16 //row3 & 4 src2
+
+    ld1             {v12.8b, v13.8b}, [x5], #16 //row5 & 6 src2
+
+    ld1             {v14.8b, v15.8b}, [x5], #16 //row7 & 8 src2
+
+    urhadd          v0.16b, v0.16b , v8.16b //operate on row1 & 2
+    urhadd          v1.16b, v1.16b , v9.16b //operate on row1 & 2
+
+    urhadd          v2.16b, v2.16b , v10.16b //operate on row3 & 4
+    urhadd          v3.16b, v3.16b , v11.16b //operate on row3 & 4
+
+    urhadd          v4.16b, v4.16b , v12.16b //operate on row5 & 6
+    urhadd          v5.16b, v5.16b , v13.16b //operate on row5 & 6
+
+    urhadd          v6.16b, v6.16b , v14.16b //operate on row7 & 8
+    urhadd          v7.16b, v7.16b , v15.16b //operate on row7 & 8
+
+    st1             {v0.8b}, [x7], x3   //row1
+
+    st1             {v1.8b}, [x7], x3   //row2
+
+    st1             {v2.8b}, [x7], x3   //row3
+
+    st1             {v3.8b}, [x7], x3   //row4
+
+    st1             {v4.8b}, [x7], x3   //row5
+
+    st1             {v5.8b}, [x7], x3   //row6
+
+    st1             {v6.8b}, [x7], x3   //row7
+
+    st1             {v7.8b}, [x7], x3   //row8
+
+
+    ldr             x4, [x0, #16]       //ptr_v src1
+
+    ldr             x5, [x1, #16]       //ptr_v src2
+
+    ldr             x7, [x2, #16]       //ptr_v dst buf
+
+    subs            x12, x12, #1
+
+    bne             interp_chromablocks_stride
+
+
+    //LDMFD  x13!,{x4-x7,x12,PC}
+    pop_v_regs
+    ret
+
+
+
+

diff --git a/common/armv8/impeg2_mem_func.s b/common/armv8/impeg2_mem_func.s
new file mode 100644
index 0000000..f0bb590
--- /dev/null
+++ b/common/armv8/impeg2_mem_func.s

@@ -0,0 +1,181 @@
+//******************************************************************************
+//*
+//* Copyright (C) 2015 The Android Open Source Project
+//*
+//* Licensed under the Apache License, Version 2.0 (the "License");
+//* you may not use this file except in compliance with the License.
+//* You may obtain a copy of the License at:
+//*
+//* http://www.apache.org/licenses/LICENSE-2.0
+//*
+//* Unless required by applicable law or agreed to in writing, software
+//* distributed under the License is distributed on an "AS IS" BASIS,
+//* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+//* See the License for the specific language governing permissions and
+//* limitations under the License.
+//*
+//*****************************************************************************
+//* Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+//*/
+
+///*
+////----------------------------------------------------------------------------
+//// File Name            : mot_comp_neon.s
+////
+//// Description          : This file has motion compensation related
+////                        interpolation functions on Neon + CortexA-8 platform
+////
+//// Reference Document   :
+////
+//// Revision History     :
+////      Date            Author                  Detail Description
+////   ------------    ----------------    ----------------------------------
+////   18 jun 2010      S Hamsalekha              Created
+////
+////-------------------------------------------------------------------------
+//*/
+
+///*
+//// ----------------------------------------------------------------------------
+//// Include Files
+//// ----------------------------------------------------------------------------
+//*/
+//              PRESERVE8
+.text
+.include "impeg2_neon_macros.s"
+///*
+//// ----------------------------------------------------------------------------
+//// Struct/Union Types and Define
+//// ----------------------------------------------------------------------------
+//*/
+
+
+///*
+//// ----------------------------------------------------------------------------
+//// Static Global Data section variables
+//// ----------------------------------------------------------------------------
+//*/
+//// -------------------------- NONE --------------------------------------------
+
+
+///*
+//// ----------------------------------------------------------------------------
+//// Static Prototype Functions
+//// ----------------------------------------------------------------------------
+//*/
+//// -------------------------- NONE --------------------------------------------
+
+///*
+//// ----------------------------------------------------------------------------
+//// Exported functions
+//// ----------------------------------------------------------------------------
+//*/
+
+///*
+////---------------------------------------------------------------------------
+//// Function Name      : impeg2_memset_8bit_8x8_block_av8()
+////
+//// Detail Description : This routine intialises the Block matrix buffer contents to a
+////                      particular Value. This function also assumes the buffer size
+////                         to be set is 64 Bytes fixed. It also assumes that blk matrix
+////                         used is 64 bit aligned.
+////
+//// Inputs             : pi2_blk_mat : Block Pointer
+////                         u2_val      : Value with which the block is initialized
+////
+//// Registers Used     : v0
+////
+//// Stack Usage        : 64 bytes
+////
+//// Outputs            : Block Matrix Iniliazed to given value
+////
+//// Return Data        : None
+////
+//// Programming Note   : This implementation assumes that blk matrix buffer
+////                         is 128 bit aligned
+////-----------------------------------------------------------------------------
+//*/
+.global impeg2_memset_8bit_8x8_block_av8
+impeg2_memset_8bit_8x8_block_av8:
+    push_v_regs
+
+//        ADD            x3,x0,#WIDTH_X_SIZE            @//x3 is another copy address offsetted
+
+    dup             v0.8b, w1           ////x1 is the 8-bit value to be set into
+
+    st1             {v0.8b}, [x0], x2   ////Store the row 1
+    st1             {v0.8b}, [x0], x2   ////Store the row 2
+    st1             {v0.8b}, [x0], x2   ////Store the row 3
+    st1             {v0.8b}, [x0], x2   ////Store the row 4
+    st1             {v0.8b}, [x0], x2   ////Store the row 5
+    st1             {v0.8b}, [x0], x2   ////Store the row 6
+    st1             {v0.8b}, [x0], x2   ////Store the row 7
+    st1             {v0.8b}, [x0], x2   ////Store the row 8
+
+    pop_v_regs
+    ret
+
+
+
+
+
+
+///*
+////---------------------------------------------------------------------------
+//// Function Name      :   impeg2_memset0_16bit_8x8_linear_block_av8()
+////
+//// Detail Description : memsets resudual buf to 0
+////
+//// Inputs             : x0 - pointer to y
+////                      x1 - pointer to u
+////                      x2 - pointer to v
+//// Registers Used     : v0
+
+////
+//// Stack Usage        : 64 bytes
+////
+//// Outputs            : The Motion Compensated Block
+////
+//// Return Data        : None
+////
+//// Programming Note   : <program limitation>
+////-----------------------------------------------------------------------------
+//*/
+
+
+
+.global impeg2_memset0_16bit_8x8_linear_block_av8
+
+
+impeg2_memset0_16bit_8x8_linear_block_av8:
+
+    push_v_regs
+
+    movi            v0.8h, #0
+
+    //Y data
+
+    st1             {v0.8h} , [x0], #16 //row1
+
+    st1             {v0.8h} , [x0], #16 //row2
+
+    st1             {v0.8h} , [x0], #16 //row3
+
+    st1             {v0.8h} , [x0], #16 //row4
+
+    st1             {v0.8h} , [x0], #16 //row5
+
+    st1             {v0.8h} , [x0], #16 //row6
+
+    st1             {v0.8h} , [x0], #16 //row7
+
+    st1             {v0.8h} , [x0], #16 //row8
+
+
+
+    pop_v_regs
+    ret
+
+
+
+

diff --git a/common/armv8/impeg2_neon_macros.s b/common/armv8/impeg2_neon_macros.s
new file mode 100644
index 0000000..452ba45
--- /dev/null
+++ b/common/armv8/impeg2_neon_macros.s

@@ -0,0 +1,58 @@
+//******************************************************************************
+//*
+//* Copyright (C) 2015 The Android Open Source Project
+//*
+//* Licensed under the Apache License, Version 2.0 (the "License");
+//* you may not use this file except in compliance with the License.
+//* You may obtain a copy of the License at:
+//*
+//* http://www.apache.org/licenses/LICENSE-2.0
+//*
+//* Unless required by applicable law or agreed to in writing, software
+//* distributed under the License is distributed on an "AS IS" BASIS,
+//* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+//* See the License for the specific language governing permissions and
+//* limitations under the License.
+//*
+//*****************************************************************************
+//* Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+//*/
+///**
+//*******************************************************************************
+//* @file
+//*  impeg2_neon_macros.s
+//*
+//* @brief
+//*  Contains assembly macros
+//*
+//* @author
+//*  Naveen SR
+//*
+//* @par List of Functions:
+//*
+//*
+//* @remarks
+//*  None
+//*
+//*******************************************************************************
+
+
+.macro push_v_regs
+    stp             d8, d9, [sp, #-16]!
+    stp             d10, d11, [sp, #-16]!
+    stp             d12, d13, [sp, #-16]!
+    stp             d14, d15, [sp, #-16]!
+.endm
+.macro pop_v_regs
+    ldp             d14, d15, [sp], #16
+    ldp             d12, d13, [sp], #16
+    ldp             d10, d11, [sp], #16
+    ldp             d8, d9, [sp], #16
+.endm
+
+.macro swp reg1, reg2
+    eor             \reg1, \reg1, \reg2
+    eor             \reg2, \reg1, \reg2
+    eor             \reg1, \reg1, \reg2
+.endm
+

diff --git a/common/armv8/impeg2_platform_macros.h b/common/armv8/impeg2_platform_macros.h
new file mode 100644
index 0000000..ff31034
--- /dev/null
+++ b/common/armv8/impeg2_platform_macros.h

@@ -0,0 +1,49 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+#ifndef __IMPEG2_PLATFORM_MACROS_H__
+#define __IMPEG2_PLATFORM_MACROS_H__
+
+#define CONV_LE_TO_BE(u4_temp2,u4_temp1) u4_temp2 =                            \
+                                         (u4_temp1 << 24) |                    \
+                                         ((u4_temp1 & 0xff00) << 8) |          \
+                                         ((u4_temp1 & 0xff0000) >> 8) |        \
+                                         (u4_temp1 >> 24);
+
+static __inline  UWORD32 CLZ(UWORD32 u4_word)
+{
+    if(u4_word)
+        return (__builtin_clz(u4_word));
+    else
+        return 32;
+}
+
+#define CLIP_U8(x) ((x) > 255) ? (255) : (((x) < 0) ? (0) : (x))
+#define CLIP_S8(x) ((x) > 127) ? (127) : (((x) < -128) ? (-128) : (x))
+
+#define CLIP_U12(x) ((x) > 4095) ? (4095) : (((x) < 0) ? (0) : (x))
+#define CLIP_S12(x) ((x) > 2047) ? (2047) : (((x) < -2048) ? (-2048) : (x))
+
+#define CLIP_U16(x) ((x) > 65535) ? (65535) : (((x) < 0) ? (0) : (x))
+#define CLIP_S16(x) ((x) > 65535) ? (65535) : (((x) < -65536) ? (-65536) : (x))
+
+#define INLINE
+#define PLD(x) __pld(x)
+
+#endif /* __IMPEG2_PLATFORM_MACROS_H__ */

diff --git a/common/impeg2_buf_mgr.c b/common/impeg2_buf_mgr.c
new file mode 100644
index 0000000..c4aca4a
--- /dev/null
+++ b/common/impeg2_buf_mgr.c

@@ -0,0 +1,411 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+
+/**
+*******************************************************************************
+* @file
+*  impeg2_buf_mgr.c
+*
+* @brief
+*  Contains function definitions for buffer management
+*
+* @author
+*  Srinivas T
+*
+* @par List of Functions:
+*   - impeg2_buf_mgr_init()
+*   - impeg2_buf_mgr_add()
+*   - impeg2_buf_mgr_get_next_free()
+*   - impeg2_buf_mgr_check_free()
+*   - impeg2_buf_mgr_release()
+*   - impeg2_buf_mgr_set_status()
+*   - impeg2_buf_mgr_get_status()
+*   - impeg2_buf_mgr_get_buf()
+*   - impeg2_buf_mgr_get_num_active_buf()
+*
+* @remarks
+*  None
+*
+*******************************************************************************
+*/
+#include <stdio.h>
+#include <stdlib.h>
+#include "iv_datatypedef.h"
+#include "impeg2_defs.h"
+#include "impeg2_buf_mgr.h"
+
+
+
+/**
+*******************************************************************************
+*
+* @brief
+*      Buffer manager initialization function.
+*
+* @par Description:
+*    Initializes the buffer manager structure
+*
+* @param[in] ps_buf_mgr
+*  Pointer to the buffer manager
+*
+* @returns
+*
+* @remarks
+*  None
+*
+*******************************************************************************
+*/
+
+void impeg2_buf_mgr_init(
+                buf_mgr_t *ps_buf_mgr)
+{
+    WORD32 id;
+
+    ps_buf_mgr->u4_max_buf_cnt = BUF_MGR_MAX_CNT;
+    ps_buf_mgr->u4_active_buf_cnt = 0;
+
+    for(id = 0; id < BUF_MGR_MAX_CNT; id++)
+    {
+        ps_buf_mgr->au4_status[id] = 0;
+        ps_buf_mgr->apv_ptr[id] = NULL;
+    }
+}
+
+
+/**
+*******************************************************************************
+*
+* @brief
+*       Adds and increments the buffer and buffer count.
+*
+* @par Description:
+*     Adds a buffer to the buffer manager if it is not already  present and
+*   increments the  active buffer count
+*
+* @param[in] ps_buf_mgr
+*  Pointer to the buffer manager
+*
+* @param[in] pv_ptr
+*  Pointer to the buffer to be added
+*
+* @returns  Returns 0 on success, -1 otherwise
+*
+* @remarks
+*  None
+*
+*******************************************************************************
+*/
+WORD32 impeg2_buf_mgr_add(
+                buf_mgr_t *ps_buf_mgr,
+                void *pv_ptr,
+                WORD32 i4_buf_id)
+{
+
+    /* Check if buffer ID is within allowed range */
+    if(i4_buf_id >= (WORD32)ps_buf_mgr->u4_max_buf_cnt)
+    {
+        return (-1);
+    }
+
+    /* Check if the current ID is being used to hold some other buffer */
+    if((ps_buf_mgr->apv_ptr[i4_buf_id] != NULL) &&
+       (ps_buf_mgr->apv_ptr[i4_buf_id] != pv_ptr))
+    {
+        return (-1);
+    }
+    ps_buf_mgr->apv_ptr[i4_buf_id] = pv_ptr;
+
+    return 0;
+}
+
+
+/**
+*******************************************************************************
+*
+* @brief
+*   Gets the next free buffer.
+*
+* @par Description:
+*     Returns the next free buffer available and sets the  corresponding status
+*   to DEC
+*
+* @param[in] ps_buf_mgr
+*  Pointer to the buffer manager
+*
+* @param[in] pi4_buf_id
+*  Pointer to the id of the free buffer
+*
+* @returns  Pointer to the free buffer
+*
+* @remarks
+*  None
+*
+*******************************************************************************
+*/
+void* impeg2_buf_mgr_get_next_free(
+                buf_mgr_t *ps_buf_mgr,
+                WORD32 *pi4_buf_id)
+{
+    WORD32 id;
+    void *pv_ret_ptr;
+
+    pv_ret_ptr = NULL;
+    for(id = 0; id < (WORD32)ps_buf_mgr->u4_max_buf_cnt; id++)
+    {
+        /* Check if the buffer is non-null and status is zero */
+        if((ps_buf_mgr->au4_status[id] == 0) && (ps_buf_mgr->apv_ptr[id]))
+        {
+            *pi4_buf_id = id;
+            /* DEC is set to 1 */
+            ps_buf_mgr->au4_status[id] = 1;
+            pv_ret_ptr = ps_buf_mgr->apv_ptr[id];
+            break;
+        }
+    }
+
+    return pv_ret_ptr;
+}
+
+
+/**
+*******************************************************************************
+*
+* @brief
+*      Checks the buffer manager for free buffers available.
+*
+* @par Description:
+*  Checks if there are any free buffers available
+*
+* @param[in] ps_buf_mgr
+*  Pointer to the buffer manager
+*
+* @returns  Returns 0 if available, -1 otherwise
+*
+* @remarks
+*  None
+*
+*******************************************************************************
+*/
+WORD32 impeg2_buf_mgr_check_free(
+                buf_mgr_t *ps_buf_mgr)
+{
+    UWORD32 id;
+
+    for(id = 0; id < ps_buf_mgr->u4_max_buf_cnt; id++)
+    {
+        if((ps_buf_mgr->au4_status[id] == 0) &&
+           (ps_buf_mgr->apv_ptr[id]))
+        {
+            return 1;
+        }
+    }
+
+    return 0;
+
+}
+
+
+/**
+*******************************************************************************
+*
+* @brief
+*       Resets the status bits.
+*
+* @par Description:
+*     resets the status bits that the mask contains (status  corresponding to
+*    the id)
+*
+* @param[in] ps_buf_mgr
+*  Pointer to the buffer manager
+*
+* @param[in] buf_id
+*  ID of the buffer status to be released
+*
+* @param[in] mask
+*  Contains the bits that are to be reset
+*
+* @returns  0 if success, -1 otherwise
+*
+* @remarks
+*  None
+*
+*******************************************************************************
+*/
+WORD32 impeg2_buf_mgr_release(
+                buf_mgr_t *ps_buf_mgr,
+                WORD32 i4_buf_id,
+                UWORD32 u4_mask)
+{
+    /* If the given id is pointing to an id which is not yet added */
+    if(i4_buf_id >= (WORD32)ps_buf_mgr->u4_max_buf_cnt)
+    {
+        return (-1);
+    }
+
+    if(0 == (ps_buf_mgr->au4_status[i4_buf_id] & u4_mask))
+    {
+        return (-1);
+    }
+
+    ps_buf_mgr->au4_status[i4_buf_id] &= ~u4_mask;
+
+    /* If both the REF and DISP are zero, DEC is set to zero */
+    if(ps_buf_mgr->au4_status[i4_buf_id] == 1)
+    {
+        ps_buf_mgr->au4_status[i4_buf_id] = 0;
+    }
+
+    return 0;
+}
+
+
+/**
+*******************************************************************************
+*
+* @brief
+*      Sets the status bit.
+*
+* @par Description:
+*     sets the status bits that the mask contains (status  corresponding to the
+*    id)
+*
+*
+* @param[in] ps_buf_mgr
+*  Pointer to the buffer manager
+*
+* @param[in] buf_id
+*  ID of the buffer whose status needs to be modified
+*
+*
+* @param[in] mask
+*  Contains the bits that are to be set
+*
+* @returns  0 if success, -1 otherwise
+*
+* @remarks
+*  None
+*
+*******************************************************************************
+*/
+WORD32 impeg2_buf_mgr_set_status(
+                buf_mgr_t *ps_buf_mgr,
+                WORD32 i4_buf_id,
+                UWORD32 u4_mask)
+{
+    if(i4_buf_id >= (WORD32)ps_buf_mgr->u4_max_buf_cnt)
+    {
+        return (-1);
+    }
+
+
+    if((ps_buf_mgr->au4_status[i4_buf_id] & u4_mask) != 0)
+    {
+        return (-1);
+    }
+
+    ps_buf_mgr->au4_status[i4_buf_id] |= u4_mask;
+    return 0;
+}
+
+
+/**
+*******************************************************************************
+*
+* @brief
+*   Returns the status of the buffer.
+*
+* @par Description:
+*  Returns the status of the buffer corresponding to the id
+*
+* @param[in] ps_buf_mgr
+*  Pointer to the buffer manager
+*
+* @param[in] buf_id
+*  ID of the buffer status required
+*
+* @returns  Status of the buffer corresponding to the id
+*
+* @remarks
+*  None
+*
+*******************************************************************************
+*/
+UWORD32 impeg2_buf_mgr_get_status(
+                buf_mgr_t *ps_buf_mgr,
+                WORD32 i4_buf_id)
+{
+    return ps_buf_mgr->au4_status[i4_buf_id];
+}
+
+
+/**
+*******************************************************************************
+*
+* @brief
+*      Gets the buffer from the buffer manager
+*
+* @par Description:
+*        Returns the pointer to the buffer corresponding to the id
+*
+* @param[in] ps_buf_mgr
+*  Pointer to the buffer manager
+*
+* @param[in] buf_id
+*  ID of the buffer required
+*
+* @returns  Pointer to the buffer required
+*
+* @remarks
+*  None
+*
+*******************************************************************************
+*/
+void* impeg2_buf_mgr_get_buf(
+                buf_mgr_t *ps_buf_mgr,
+                WORD32 i4_buf_id)
+{
+    return ps_buf_mgr->apv_ptr[i4_buf_id];
+}
+
+
+/**
+*******************************************************************************
+*
+* @brief
+*        Gets the no.of active buffer
+*
+* @par Description:
+*      Return the number of active buffers in the buffer manager
+*
+* @param[in] ps_buf_mgr
+*  Pointer to the buffer manager
+*
+* @returns  number of active buffers
+*
+* @remarks
+*  None
+*
+*******************************************************************************
+*/
+UWORD32 impeg2_buf_mgr_get_num_active_buf(
+                buf_mgr_t *ps_buf_mgr)
+{
+    return ps_buf_mgr->u4_max_buf_cnt;
+}

diff --git a/common/impeg2_buf_mgr.h b/common/impeg2_buf_mgr.h
new file mode 100644
index 0000000..6b1cbef
--- /dev/null
+++ b/common/impeg2_buf_mgr.h

@@ -0,0 +1,115 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+/**
+*******************************************************************************
+* @file
+*  impeg2_buf_mgr.h
+*
+* @brief
+*  Function declarations used for buffer management
+*
+* @author
+*  Srinivas T
+*
+* @remarks
+*  None
+*
+*******************************************************************************
+*/
+#ifndef _IMPEG2_BUF_MGR_H_
+#define _IMPEG2_BUF_MGR_H_
+
+#define BUF_MGR_MAX_CNT 64
+
+#define BUF_MGR_DEC         1
+#define BUF_MGR_REF         (1 << 1)
+#define BUF_MGR_DISP        (1 << 2)
+
+typedef struct
+{
+    /**
+     * max_buf_cnt
+     */
+    UWORD32 u4_max_buf_cnt;
+
+    /**
+     * active_buf_cnt
+     */
+    UWORD32 u4_active_buf_cnt;
+    /**
+     *  au4_status[BUF_MGR_MAX_CNT]
+     */
+    UWORD32 au4_status[BUF_MGR_MAX_CNT];
+    /* The last three bit of status are:    */
+    /* Bit 0 - DEC                          */
+    /* Bit 1 - REF                          */
+    /* Bit 2 - DISP                         */
+
+    void    *apv_ptr[BUF_MGR_MAX_CNT];
+}buf_mgr_t;
+
+// intializes the buffer API structure
+void impeg2_buf_mgr_init(
+                buf_mgr_t *ps_buf_mgr);
+
+// Add buffer to buffer manager. 0: success, -1: fail (u4_active_buf_cnt has reached u4_max_buf_cnt)
+WORD32 impeg2_buf_mgr_add(
+                buf_mgr_t *ps_buf_mgr,
+                void *pv_ptr,
+                WORD32 buf_id);
+
+// this function will set the buffer status to DEC
+void* impeg2_buf_mgr_get_next_free(
+                buf_mgr_t *ps_buf_mgr,
+                WORD32 *pi4_id);
+
+// this function will check if there are any free buffers
+WORD32 impeg2_buf_mgr_check_free(
+                buf_mgr_t *ps_buf_mgr);
+
+// mask will have who released it: DISP:REF:DEC
+WORD32 impeg2_buf_mgr_release(
+                buf_mgr_t *ps_buf_mgr,
+                WORD32 id,
+                UWORD32 mask);
+
+// sets the status to one or all of DISP:REF:DEC
+WORD32 impeg2_buf_mgr_set_status(
+                buf_mgr_t *ps_buf_mgr,
+                WORD32 id,
+                UWORD32 mask);
+
+// Gets status of the buffer
+UWORD32 impeg2_buf_mgr_get_status(
+                buf_mgr_t *ps_buf_mgr,
+                WORD32 id);
+
+// pass the ID - buffer will be returned
+void* impeg2_buf_mgr_get_buf(
+                buf_mgr_t *ps_buf_mgr,
+                WORD32 id);
+
+// will return number of active buffers
+UWORD32 impeg2_buf_mgr_get_num_active_buf(
+                buf_mgr_t *ps_buf_mgr);
+
+
+
+#endif  //_IMPEG2_BUF_MGR_H_

diff --git a/common/impeg2_defs.h b/common/impeg2_defs.h
new file mode 100644
index 0000000..f1523f2
--- /dev/null
+++ b/common/impeg2_defs.h

@@ -0,0 +1,331 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+
+#ifndef __IMPEG2_DEFS_H__
+#define __IMPEG2_DEFS_H__
+
+#include <assert.h>
+
+/* Decoder needs at least 4 reference buffers in order to support format conversion in a thread and
+to support B pictures. Because of format conversion in a thread, codec delay is now 2 frames instead of 1.
+To reduce this delay, format conversion has to wait for MB status before converting for B pictures.
+To avoid this check the delay is increased to 2 and hence number of reference frames minimum is 4 */
+#define NUM_INT_FRAME_BUFFERS                     4
+
+
+#define MAX_WIDTH               4096
+#define MAX_HEIGHT              2160
+
+#define MIN_WIDTH               16
+#define MIN_HEIGHT              16
+
+
+#define MAX_FRM_SIZE            (MAX_WIDTH * MAX_HEIGHT * 2)  /* Supports only 420P and 422ILE */
+
+#define DEC_ORDER               0
+
+#define MAX_BITSTREAM_BUFFER_SIZE       2000 * 1024
+
+
+/******************************************************************************
+* MPEG2 Start code and other code definitions
+*******************************************************************************/
+#define START_CODE_PREFIX               0x000001
+#define SEQUENCE_HEADER_CODE            0x000001B3
+#define EXTENSION_START_CODE            0x000001B5
+#define USER_DATA_START_CODE            0x000001B2
+#define GOP_START_CODE                  0x000001B8
+#define PICTURE_START_CODE              0x00000100
+#define SEQUENCE_END_CODE               0x000001B7
+#define RESERVED_START_CODE             0x000001B0
+#define MB_ESCAPE_CODE                  0x008
+
+/******************************************************************************
+* MPEG2 Length of various codes definitions
+*******************************************************************************/
+#define START_CODE_LEN                  32
+#define START_CODE_PREFIX_LEN           24
+#define MB_ESCAPE_CODE_LEN              11
+#define EXT_ID_LEN                      4
+#define MB_QUANT_SCALE_CODE_LEN         5
+#define MB_DCT_TYPE_LEN                 1
+#define MB_MOTION_TYPE_LEN              2
+#define BYTE_LEN                        8
+
+/******************************************************************************
+* MPEG1 code definitions
+*******************************************************************************/
+#define MB_STUFFING_CODE                0x00F
+
+/******************************************************************************
+* MPEG1 Length of various codes definitions
+*******************************************************************************/
+#define MB_STUFFING_CODE_LEN             11
+
+/******************************************************************************
+* MPEG2 MB definitions
+*******************************************************************************/
+#define MPEG2_INTRA_MB                  0x04
+#define MPEG2_INTRAQ_MB                 0x44
+#define MPEG2_INTER_MB                  0x28
+#define MB_MOTION_BIDIRECT              0x30
+#define MB_INTRA_OR_PATTERN             0x0C
+
+/******************************************************************************
+* Tools definitions
+*******************************************************************************/
+#define SPATIAL_SCALABILITY             0x01
+#define TEMPORAL_SCALABILITY            0x03
+
+/******************************************************************************
+* Extension IDs definitions
+*******************************************************************************/
+#define SEQ_DISPLAY_EXT_ID              0x02
+#define SEQ_SCALABLE_EXT_ID             0x05
+#define QUANT_MATRIX_EXT_ID             0x03
+#define COPYRIGHT_EXT_ID                0x04
+#define PIC_DISPLAY_EXT_ID              0x07
+#define PIC_SPATIAL_SCALABLE_EXT_ID     0x09
+#define PIC_TEMPORAL_SCALABLE_EXT_ID    0x0A
+#define CAMERA_PARAM_EXT_ID             0x0B
+#define ITU_T_EXT_ID                    0x0C
+/******************************************************************************
+* Extension IDs Length definitions
+*******************************************************************************/
+#define CAMERA_PARAMETER_EXTENSION_LEN  377
+#define COPYRIGHT_EXTENSION_LEN          88
+#define GROUP_OF_PICTURE_LEN             59
+
+
+/******************************************************************************
+* MPEG2 Picture structure definitions
+*******************************************************************************/
+#define TOP_FIELD                       1
+#define BOTTOM_FIELD                    2
+#define FRAME_PICTURE                   3
+
+/******************************************************************************
+* MPEG2 Profile definitions
+*******************************************************************************/
+#define MPEG2_SIMPLE_PROFILE            0x05
+#define MPEG2_MAIN_PROFILE              0x04
+
+/******************************************************************************
+* MPEG2 Level definitions
+*******************************************************************************/
+#define MPEG2_LOW_LEVEL                 0x0a
+#define MPEG2_MAIN_LEVEL                0x08
+
+/******************************************************************************
+* MPEG2 Prediction types
+*******************************************************************************/
+#define FIELD_PRED                      0
+#define FRAME_PRED                      1
+#define DUAL_PRED                       2
+#define RESERVED                        -1
+#define MC_16X8_PRED                    3
+
+/*****************************************************************************
+* MPEG2 Motion vector format
+******************************************************************************/
+#define FIELD_MV                        0
+#define FRAME_MV                        1
+
+/******************************************************************************/
+/* General Video related definitions                                          */
+/******************************************************************************/
+
+#define BLK_SIZE 8
+#define NUM_COEFFS ((BLK_SIZE)*(BLK_SIZE))
+#define LUMA_BLK_SIZE (2 * (BLK_SIZE))
+#define CHROMA_BLK_SIZE (BLK_SIZE)
+#define  BLOCKS_IN_MB            6
+#define  MB_SIZE                16
+#define  MB_CHROMA_SIZE          8
+#define  NUM_PELS_IN_BLOCK      64
+#define  NUM_LUMA_BLKS           4
+#define  NUM_CHROMA_BLKS         2
+#define  MAX_COLR_COMPS          3
+#define  Y_LUMA                  0
+#define  U_CHROMA                1
+#define  V_CHROMA                2
+#define  MB_LUMA_MEM_SIZE           ((MB_SIZE) * (MB_SIZE))
+#define  MB_CHROMA_MEM_SIZE         ((MB_SIZE/2) * (MB_SIZE/2))
+
+#define BITS_IN_INT     32
+/******************************************************************************/
+/* MPEG2 Motion compensation related definitions                              */
+/******************************************************************************/
+#define REF_FRM_MB_WIDTH        18
+#define REF_FRM_MB_HEIGHT       18
+#define REF_FLD_MB_HEIGHT       10
+#define REF_FLD_MB_WIDTH        18
+
+/******************************************************************************/
+/* Maximum number of bits per MB                                              */
+/******************************************************************************/
+#define I_MB_BIT_SIZE 90
+#define P_MB_BIT_SIZE 90
+#define B_MB_BIT_SIZE 150
+
+/******************************************************************************/
+/* Aspect ratio related definitions                                           */
+/******************************************************************************/
+#define MPG1_NTSC_4_3       0x8
+#define MPG1_PAL_4_3        0xc
+#define MPG1_NTSC_16_9      0x6
+#define MPG1_PAL_16_9       0x3
+#define MPG1_1_1            0x1
+
+#define MPG2_4_3            0x2
+#define MPG2_16_9           0x3
+#define MPG2_1_1            0x1
+
+/******************************************************************************/
+/* Inverse Quantizer Output range                                             */
+/******************************************************************************/
+#define IQ_OUTPUT_MAX 2047
+#define IQ_OUTPUT_MIN -2048
+
+/******************************************************************************/
+/* IDCT Output range                                                          */
+/******************************************************************************/
+#define IDCT_OUTPUT_MAX  255
+#define IDCT_OUTPUT_MIN -256
+
+/******************************************************************************/
+/* Output pixel range                                                         */
+/******************************************************************************/
+#define PEL_VALUE_MAX 255
+#define PEL_VALUE_MIN 0
+
+/******************************************************************************/
+/* inv scan types                                                             */
+/******************************************************************************/
+#define ZIG_ZAG_SCAN        0
+#define VERTICAL_SCAN       1
+
+/******************************************************************************/
+/* Related VLD codes                                                          */
+/******************************************************************************/
+#define ESC_CODE_VALUE 0x0058
+#define EOB_CODE_VALUE 0x07d0
+
+#define END_OF_BLOCK                    0x01
+#define ESCAPE_CODE                     0x06
+
+#define END_OF_BLOCK_ZERO               0x01ff
+#define END_OF_BLOCK_ONE                0x01ff
+
+/******************** Idct Specific ***************/
+#define TRANS_SIZE_8            8
+#define IDCT_STG1_SHIFT        12
+#define IDCT_STG2_SHIFT        16
+
+#define IDCT_STG1_ROUND        ((1 << IDCT_STG1_SHIFT) >> 1)
+#define IDCT_STG2_ROUND        ((1 << IDCT_STG2_SHIFT) >> 1)
+
+
+/******************************************************************************
+* Sample Version Definitions
+*******************************************************************************/
+#define SAMPLE_VERS_MAX_FRAMES_DECODE   999
+
+#define MAX_FRAME_BUFFER                     7
+
+/* vop coding type */
+typedef enum
+{
+    I_PIC = 1,
+    P_PIC,
+    B_PIC,
+    D_PIC
+} e_pic_type_t;
+
+typedef enum
+{
+    MPEG_2_VIDEO,
+    MPEG_1_VIDEO
+} e_video_type_t;
+
+typedef enum
+{
+    FORW,
+    BACK,
+    BIDIRECT
+} e_pred_direction_t;
+
+typedef enum
+{
+    TOP,
+    BOTTOM
+} e_field_t;
+
+/* Motion vectors (first/second) */
+enum
+{
+    FIRST,
+    SECOND,
+    THIRD,
+    FOURTH
+};
+
+enum
+{
+    MV_X,
+    MV_Y
+};
+
+/* Enumeration defining the various kinds of interpolation possible in
+motion compensation */
+typedef enum
+{
+  FULL_XFULL_Y,
+    FULL_XHALF_Y,
+    HALF_XFULL_Y,
+    HALF_XHALF_Y
+} e_sample_type_t;
+typedef enum
+{
+    /* Params of the reference buffer used as input to MC */
+    /* frame prediction in P frame picture */
+    MC_FRM_FW_OR_BK_1MV,
+    /* field prediction in P frame picture */
+    MC_FRM_FW_OR_BK_2MV,
+    /* frame prediction in B frame picture */
+    MC_FRM_FW_AND_BK_2MV,
+    /* field prediction in B frame picture */
+    MC_FRM_FW_AND_BK_4MV,
+    /* dual prime prediction in P frame picture */
+    MC_FRM_FW_DUAL_PRIME_1MV,
+    /* frame prediction in P field picture */
+    MC_FLD_FW_OR_BK_1MV,
+    /* 16x8 prediction in P field picture */
+    MC_FLD_FW_OR_BK_2MV,
+    /* field prediction in B field picture */
+    MC_FLD_FW_AND_BK_2MV,
+    /* 16x8 prediction in B field picture */
+    MC_FLD_FW_AND_BK_4MV,
+    /* dual prime prediction in P field picture */
+    MC_FLD_FW_DUAL_PRIME_1MV,
+} e_mb_type_t;
+
+#endif /* __IMPEG2_DEFS_H__ */
+

diff --git a/common/impeg2_disp_mgr.c b/common/impeg2_disp_mgr.c
new file mode 100644
index 0000000..f5ede84
--- /dev/null
+++ b/common/impeg2_disp_mgr.c

@@ -0,0 +1,172 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+/**
+*******************************************************************************
+* @file
+*  impeg2_disp_mgr.c
+*
+* @brief
+*  Contains function definitions for display management
+*
+* @author
+*  Srinivas T
+*
+* @par List of Functions:
+*   - impeg2_disp_mgr_init()
+*   - impeg2_disp_mgr_add()
+*   - impeg2_disp_mgr_get()
+*
+* @remarks
+*  None
+*
+*******************************************************************************
+*/
+#include <stdio.h>
+#include <stdlib.h>
+#include "iv_datatypedef.h"
+#include "impeg2_defs.h"
+#include "impeg2_disp_mgr.h"
+
+/**
+*******************************************************************************
+*
+* @brief
+*    Initialization function for display buffer manager
+*
+* @par Description:
+*    Initializes the display buffer management structure
+*
+* @param[in] ps_disp_mgr
+*  Pointer to the display buffer management structure
+*
+* @returns none
+*
+* @remarks
+*  None
+*
+*******************************************************************************
+*/
+void impeg2_disp_mgr_init(
+                disp_mgr_t *ps_disp_mgr)
+{
+    WORD32 id;
+
+
+    for(id = 0; id < DISP_MGR_MAX_CNT; id++)
+    {
+        ps_disp_mgr->apv_ptr[id] = NULL;
+    }
+
+    ps_disp_mgr->i4_wr_idx = 0;
+    ps_disp_mgr->i4_rd_idx = 0;
+}
+
+
+/**
+*******************************************************************************
+*
+* @brief
+*     Adds a buffer to the display manager
+*
+* @par Description:
+*      Adds a buffer to the display buffer manager
+*
+* @param[in] ps_disp_mgr
+*  Pointer to the diaplay buffer management structure
+*
+* @param[in] buf_id
+*  ID of the display buffer
+*
+* @param[in] abs_poc
+*  Absolute POC of the display buffer
+*
+* @param[in] pv_ptr
+*  Pointer to the display buffer
+*
+* @returns  0 if success, -1 otherwise
+*
+* @remarks
+*  None
+*
+*******************************************************************************
+*/
+WORD32 impeg2_disp_mgr_add(disp_mgr_t *ps_disp_mgr,
+                          void *pv_ptr,
+                          WORD32 i4_buf_id)
+{
+
+
+    WORD32 id;
+    id = ps_disp_mgr->i4_wr_idx % DISP_MGR_MAX_CNT;
+
+    ps_disp_mgr->apv_ptr[id] = pv_ptr;
+    ps_disp_mgr->ai4_buf_id[id] = i4_buf_id;
+    ps_disp_mgr->i4_wr_idx++;
+
+    return 0;
+}
+
+
+/**
+*******************************************************************************
+*
+* @brief
+*  Gets the next buffer
+*
+* @par Description:
+*  Gets the next display buffer
+*
+* @param[in] ps_disp_mgr
+*  Pointer to the display buffer structure
+*
+* @param[out]  pi4_buf_id
+*  Pointer to hold buffer id of the display buffer being returned
+*
+* @returns  Pointer to the next display buffer
+*
+* @remarks
+*  None
+*
+*******************************************************************************
+*/
+void* impeg2_disp_mgr_get(disp_mgr_t *ps_disp_mgr, WORD32 *pi4_buf_id)
+{
+    WORD32 id;
+
+    *pi4_buf_id = -1;
+
+    if(ps_disp_mgr->i4_rd_idx < ps_disp_mgr->i4_wr_idx)
+    {
+        id = ps_disp_mgr->i4_rd_idx % DISP_MGR_MAX_CNT;
+        if(NULL == ps_disp_mgr->apv_ptr[id])
+        {
+            return NULL;
+        }
+
+        *pi4_buf_id = ps_disp_mgr->ai4_buf_id[id];
+
+        ps_disp_mgr->i4_rd_idx++;
+
+        return ps_disp_mgr->apv_ptr[id];
+    }
+    else
+        return NULL;
+
+}

diff --git a/common/impeg2_disp_mgr.h b/common/impeg2_disp_mgr.h
new file mode 100644
index 0000000..96b01b0
--- /dev/null
+++ b/common/impeg2_disp_mgr.h

@@ -0,0 +1,67 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+/**
+*******************************************************************************
+* @file
+*  impeg2_disp_mgr.h
+*
+* @brief
+*  Function declarations used for display management
+*
+* @author
+*  Srinivas T
+*
+*
+* @remarks
+*  None
+*
+*******************************************************************************
+*/
+#ifndef _IMPEG2_DISP_MGR_H_
+#define _IMPEG2_DISP_MGR_H_
+
+#define DISP_MGR_MAX_CNT 64
+#define DEFAULT_POC 0x7FFFFFFF
+
+typedef struct
+{
+    /**
+     * apv_ptr[DISP_MGR_MAX_CNT]
+     */
+    void    *apv_ptr[DISP_MGR_MAX_CNT];
+
+    WORD32   ai4_buf_id[DISP_MGR_MAX_CNT];
+
+    WORD32  i4_wr_idx;
+
+    WORD32  i4_rd_idx;
+}disp_mgr_t;
+
+void impeg2_disp_mgr_init(
+                disp_mgr_t *ps_disp_mgr);
+
+WORD32 impeg2_disp_mgr_add(
+                disp_mgr_t *ps_disp_mgr,
+                void *pv_ptr,
+                WORD32 i4_buf_id);
+
+void* impeg2_disp_mgr_get(disp_mgr_t *ps_disp_mgr, WORD32 *pi4_buf_id);
+
+#endif  //_IMPEG2_DISP_MGR_H_

diff --git a/common/impeg2_format_conv.c b/common/impeg2_format_conv.c
new file mode 100644
index 0000000..ec0bcfb
--- /dev/null
+++ b/common/impeg2_format_conv.c

@@ -0,0 +1,401 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+/*****************************************************************************/
+/*                                                                           */
+/*  File Name         : impeg2_format_conv .c                                */
+/*                                                                           */
+/*  Description       : Contains functions needed to convert the images in   */
+/*                      different color spaces to yuv 422i color space       */
+/*                                                                           */
+/*  List of Functions : YUV420toYUV420()                                      */
+/*                      YUV420toYUV422I()                                    */
+/*                      YUV420toYUV420SP_VU()                                */
+/*                      YUV420toYUV420SP_UU()                                */
+/*                                                                           */
+/*  Issues / Problems : None                                                 */
+/*                                                                           */
+/*  Revision History  :                                                      */
+/*                                                                           */
+/*         DD MM YYYY   Author(s)       Changes (Describe the changes made)  */
+/*         28 08 2007  Naveen Kumar T        Draft                           */
+/*                                                                           */
+/*****************************************************************************/
+/*****************************************************************************/
+/* File Includes                                                             */
+/*****************************************************************************/
+
+/* System include files */
+
+/* User include files */
+#include <stdio.h>
+#include <string.h>
+#include "iv_datatypedef.h"
+#include "iv.h"
+#include "ithread.h"
+
+#include "iv_datatypedef.h"
+#include "impeg2_macros.h"
+#include "impeg2_buf_mgr.h"
+#include "impeg2_disp_mgr.h"
+#include "impeg2_defs.h"
+#include "impeg2_platform_macros.h"
+
+#include "impeg2_job_queue.h"
+#include "impeg2_format_conv.h"
+
+
+/*****************************************************************************/
+/*                                                                           */
+/*  Function Name : impeg2_copy_frm_yuv420p()                                        */
+/*                                                                           */
+/*  Description   : This function performs conversion from YUV420 to         */
+/*                  YUV422I color space.                                     */
+/*                                                                           */
+/*  Inputs        : pu1_src_y,       -   UWORD8 pointer to source y plane.   */
+/*                  pu1_src_u,       -   UWORD8 pointer to source u plane.   */
+/*                  pu1_src_v,       -   UWORD8 pointer to source v plane.   */
+/*                  pu1_dst_y,       -   UWORD8 pointer to dest y plane.     */
+/*                  pu1_dst_u,       -   UWORD8 pointer to dest u plane.     */
+/*                  pu1_dst_v,       -   UWORD8 pointer to dest v plane.     */
+/*                  u4_width,        -   Width of image.                     */
+/*                  u4_height,       -   Height of image.                    */
+/*                  u4_src_stride_y  -   Stride in pixels of source Y plane. */
+/*                  u4_src_stride_u  -   Stride in pixels of source U plane. */
+/*                  u4_src_stride_v  -   Stride in pixels of source V plane. */
+/*                  u4_dst_stride_y  -   Stride in pixels of dest Y plane.   */
+/*                  u4_dst_stride_u  -   Stride in pixels of dest U plane.   */
+/*                  u4_dst_stride_v  -   Stride in pixels of dest V plane.   */
+/*                                                                           */
+/*  Globals       : None                                                     */
+/*                                                                           */
+/*  Processing    : One row is processed at a time. The one iteration of the */
+/*                  code will rearrange pixels into YUV422 interleaved       */
+/*                  format.                                                  */
+/*                                                                           */
+/*  Outputs       : None                                                     */
+/*                                                                           */
+/*  Returns       : None                                                     */
+/*                                                                           */
+/*  Issues        : None                                                     */
+/*                                                                           */
+/*  Revision History:                                                        */
+/*                                                                           */
+/*         DD MM YYYY   Author(s)       Changes (Describe the changes made)  */
+/*         29 08 2007  Naveen Kumar T        Draft                           */
+/*                                                                           */
+/*****************************************************************************/
+void impeg2_copy_frm_yuv420p(UWORD8 *pu1_src_y,
+                             UWORD8 *pu1_src_u,
+                             UWORD8 *pu1_src_v,
+                             UWORD8 *pu1_dst_y,
+                             UWORD8 *pu1_dst_u,
+                             UWORD8 *pu1_dst_v,
+                             UWORD32 u4_width,
+                             UWORD32 u4_height,
+                             UWORD32 u4_src_stride_y,
+                             UWORD32 u4_src_stride_u,
+                             UWORD32 u4_src_stride_v,
+                             UWORD32 u4_dst_stride_y,
+                             UWORD32 u4_dst_stride_u,
+                             UWORD32 u4_dst_stride_v)
+{
+    WORD32 i4_cnt;
+    WORD32  i4_y_height     = (WORD32) u4_height;
+    WORD32  i4_uv_height    = u4_height >> 1;
+    WORD32  i4_uv_width     = u4_width >> 1;
+
+    for(i4_cnt = 0; i4_cnt < i4_y_height; i4_cnt++)
+    {
+        memcpy(pu1_dst_y, pu1_src_y, u4_width);
+        pu1_dst_y += (u4_dst_stride_y);
+        pu1_src_y += (u4_src_stride_y);
+    }
+
+    for(i4_cnt = 0; i4_cnt < i4_uv_height; i4_cnt++)
+    {
+        memcpy(pu1_dst_u, pu1_src_u, i4_uv_width);
+        pu1_dst_u += (u4_dst_stride_u);
+        pu1_src_u += (u4_src_stride_u);
+
+    }
+
+    for(i4_cnt = 0; i4_cnt < i4_uv_height; i4_cnt++)
+    {
+        memcpy(pu1_dst_v, pu1_src_v, i4_uv_width);
+        pu1_dst_v += (u4_dst_stride_v);
+        pu1_src_v += (u4_src_stride_v);
+
+    }
+
+}
+
+/*****************************************************************************/
+/*                                                                           */
+/*  Function Name : impeg2_fmt_conv_yuv420p_to_yuv422ile()                   */
+/*                                                                           */
+/*  Description   : This function performs conversion from YUV420 to         */
+/*                  YUV422I color space.                                     */
+/*                                                                           */
+/*  Inputs        : pu1_y            -   UWORD8 pointer to y plane.          */
+/*                  pu1_u            -   UWORD8 pointer to u plane.          */
+/*                  pu1_v            -   UWORD8 pointer to u plane.          */
+/*                  pu2_yuv422i      -   UWORD16 pointer to yuv422iimage.    */
+/*                  u4_width         -   Width of the Y plane.               */
+/*                  u4_height        -   Height of the Y plane.              */
+/*                  u4_stride_y      -   Stride in pixels of Y plane.        */
+/*                  u4_stride_u      -   Stride in pixels of U plane.        */
+/*                  u4_stride_v      -   Stride in pixels of V plane.        */
+/*                  u4_stride_yuv422i-   Stride in pixels of yuv422i image.  */
+/*                                                                           */
+/*  Globals       : None                                                     */
+/*                                                                           */
+/*  Processing    : One row is processed at a time. The one iteration of the */
+/*                  code will rearrange pixels into YUV422 interleaved       */
+/*                  format.                                                  */
+/*                                                                           */
+/*  Outputs       : None                                                     */
+/*                                                                           */
+/*  Returns       : None                                                     */
+/*                                                                           */
+/*  Issues        : None                                                     */
+/*                                                                           */
+/*  Revision History:                                                        */
+/*                                                                           */
+/*         DD MM YYYY   Author(s)       Changes (Describe the changes made)  */
+/*         29 08 2007  Naveen Kumar T        Draft                           */
+/*                                                                           */
+/*****************************************************************************/
+
+void impeg2_fmt_conv_yuv420p_to_yuv422ile(register UWORD8 *pu1_y,
+                     register UWORD8 *pu1_u,
+                     register UWORD8 *pu1_v,
+                     void *pv_yuv422i,
+                     UWORD32 u4_width,
+                     UWORD32 u4_height,
+                     UWORD32 u4_stride_y,
+                     UWORD32 u4_stride_u,
+                     UWORD32 u4_stride_v,
+                     UWORD32 u4_stride_yuv422i)
+{
+    /* Declare local variables */
+    register WORD16  i,j;
+    register UWORD16 u2_offset1,u2_offset2,u2_offset3,u2_offset_yuv422i;
+    register UWORD8  u1_y1,u1_uv;
+    register UWORD32 u4_pixel;
+    register UWORD16 u2_width_cnt;
+    register UWORD32 *pu4_yuv422i;
+
+    UWORD8 u1_flag;             /* This flag is used to indicate wether the row is even or odd */
+
+    u1_flag=0x0;                /* Intialize it with 0 indicating odd row */
+
+    /* Calculate the offsets necessary to make input and output buffers to point next row */
+    u2_offset1       = u4_stride_y - u4_width;
+    u2_offset2       = u4_stride_u - ((u4_width + 1) >> 1);
+    u2_offset3       = u4_stride_v - ((u4_width + 1) >> 1);
+    u2_offset_yuv422i = (u4_stride_yuv422i >> 1) -((u4_width + 1) >> 1);
+
+    /* Type cast the output pointer to UWORD32 */
+    pu4_yuv422i      = (UWORD32 *)pv_yuv422i;
+
+    /* Calculate the loop counter for inner loop */
+    u2_width_cnt     = u4_width >> 1;
+
+    /* Run the loop for height of input buffer */
+    for(i = u4_height; i > 0; i--)
+    {
+        /* Run the loop for width/2 */
+        for(j = u2_width_cnt; j > 0; j--)
+        {
+            /* Store the value in output buffer in the order U0Y0V0Y1U2Y2V2Y3.... */
+            /* Load Y0 */
+            u1_y1          = *pu1_y++;
+            /* Load Y1 */
+            u4_pixel       = *pu1_y++;
+            /* Load V0 */
+            u1_uv          = *pu1_v++;
+            u4_pixel       = (u4_pixel << 8) + u1_uv;
+            /* Load U0 */
+            u1_uv          = *pu1_u++;
+            u4_pixel       = (u4_pixel << 8) + u1_y1;
+            u4_pixel       = (u4_pixel << 8) + u1_uv;
+            *pu4_yuv422i++ = u4_pixel;
+        }
+        /* Incase of width is odd number take care of last pixel */
+        if(u4_width & 0x1)
+        {
+            /* Store the value in output buffer in the order U0Y0V0Y1U2Y2V2Y3.... */
+            /* Load Y0 */
+            u1_y1          = *pu1_y++;
+            /* Load V0 */
+            u1_uv          = *pu1_v++;
+            /* Take Y0 as Y1 */
+            u4_pixel       = u1_y1;
+            u4_pixel       = (u4_pixel << 8) + u1_uv;
+            /* Load U0 */
+            u1_uv          = *pu1_u++;
+            u4_pixel       = (u4_pixel << 8) + u1_y1;
+            u4_pixel       = (u4_pixel << 8) + u1_uv;
+            *pu4_yuv422i++ = u4_pixel;
+        }
+        /* Make the pointers to buffer to point to next row */
+        pu1_y = pu1_y       + u2_offset1;
+        if(!u1_flag)
+        {
+            /* Restore the pointers of u and v buffer back so that the row of pixels are also  */
+            /* Processed with same row of u and values again */
+            pu1_u = pu1_u - ((u4_width + 1) >> 1);
+            pu1_v = pu1_v - ((u4_width + 1) >> 1);
+        }
+        else
+        {
+            /* Adjust the u and v buffer pointers so that they will point to next row */
+            pu1_u = pu1_u + u2_offset2;
+            pu1_v = pu1_v + u2_offset3;
+        }
+
+        /* Adjust the output buffer pointer for next row */
+        pu4_yuv422i = pu4_yuv422i + u2_offset_yuv422i;
+        /* Toggle the flag to convert between odd and even row */
+        u1_flag= u1_flag ^ 0x1;
+    }
+}
+
+
+
+
+void impeg2_fmt_conv_yuv420p_to_yuv420sp_vu(UWORD8 *pu1_y, UWORD8 *pu1_u, UWORD8 *pu1_v,
+                                     UWORD8 *pu1_dest_y, UWORD8 *pu1_dest_uv,
+                                     UWORD32 u4_height,  UWORD32 u4_width,UWORD32 u4_stridey,
+                                     UWORD32 u4_strideu, UWORD32 u4_stridev,
+                                     UWORD32 u4_dest_stride_y, UWORD32 u4_dest_stride_uv,
+                                     UWORD32 u4_convert_uv_only
+                                     )
+
+{
+
+
+    UWORD8 *pu1_src,*pu1_dst;
+    UWORD8 *pu1_src_u, *pu1_src_v;
+    UWORD16 i;
+    UWORD32 u2_width_uv;
+
+    UWORD32 u4_dest_inc_y=0, u4_dest_inc_uv=0;
+
+
+    /* Copy Y buffer */
+    pu1_dst = (UWORD8 *)pu1_dest_y;
+    pu1_src = (UWORD8 *)pu1_y;
+
+    u4_dest_inc_y =    u4_dest_stride_y;
+    u4_dest_inc_uv =   u4_dest_stride_uv;
+
+    if(0 == u4_convert_uv_only)
+    {
+        for(i = 0; i < u4_height; i++)
+        {
+            memcpy((void *)pu1_dst,(void *)pu1_src, u4_width);
+            pu1_dst += u4_dest_inc_y;
+            pu1_src += u4_stridey;
+        }
+    }
+
+    /* Interleave Cb and Cr buffers */
+    pu1_src_u = pu1_u;
+    pu1_src_v = pu1_v;
+    pu1_dst = pu1_dest_uv ;
+
+    u4_height = (u4_height + 1) >> 1;
+    u2_width_uv = (u4_width + 1) >> 1;
+    for(i = 0; i < u4_height ; i++)
+    {
+        UWORD32 j;
+        for(j = 0; j < u2_width_uv; j++)
+        {
+            *pu1_dst++ = *pu1_src_v++;
+            *pu1_dst++ = *pu1_src_u++;
+
+        }
+
+        pu1_dst += u4_dest_inc_uv - u4_width;
+        pu1_src_u  += u4_strideu - u2_width_uv;
+        pu1_src_v  += u4_stridev - u2_width_uv;
+    }
+}
+
+void impeg2_fmt_conv_yuv420p_to_yuv420sp_uv(UWORD8 *pu1_y, UWORD8 *pu1_u, UWORD8 *pu1_v,
+                                     UWORD8 *pu1_dest_y, UWORD8 *pu1_dest_uv,
+                                     UWORD32 u4_height,  UWORD32 u4_width,UWORD32 u4_stridey,
+                                     UWORD32 u4_strideu, UWORD32 u4_stridev,
+                                     UWORD32 u4_dest_stride_y, UWORD32 u4_dest_stride_uv,
+                                     UWORD32 u4_convert_uv_only)
+
+{
+
+
+    UWORD8 *pu1_src,*pu1_dst;
+    UWORD8 *pu1_src_u, *pu1_src_v;
+    UWORD16 i;
+    UWORD32 u2_width_uv;
+
+    UWORD32 u4_dest_inc_y=0, u4_dest_inc_uv=0;
+
+
+    /* Copy Y buffer */
+    pu1_dst = (UWORD8 *)pu1_dest_y;
+    pu1_src = (UWORD8 *)pu1_y;
+
+    u4_dest_inc_y =    u4_dest_stride_y;
+    u4_dest_inc_uv =   u4_dest_stride_uv;
+
+    if(0 == u4_convert_uv_only)
+    {
+        for(i = 0; i < u4_height; i++)
+        {
+            memcpy((void *)pu1_dst,(void *)pu1_src, u4_width);
+            pu1_dst += u4_dest_inc_y;
+            pu1_src += u4_stridey;
+        }
+    }
+
+    /* Interleave Cb and Cr buffers */
+    pu1_src_u = pu1_u;
+    pu1_src_v = pu1_v;
+    pu1_dst = pu1_dest_uv ;
+
+    u4_height = (u4_height + 1) >> 1;
+    u2_width_uv = (u4_width + 1) >> 1;
+    for(i = 0; i < u4_height ; i++)
+    {
+        UWORD32 j;
+        for(j = 0; j < u2_width_uv; j++)
+        {
+            *pu1_dst++ = *pu1_src_u++;
+            *pu1_dst++ = *pu1_src_v++;
+        }
+
+        pu1_dst += u4_dest_inc_uv - u4_width;
+        pu1_src_u  += u4_strideu - u2_width_uv;
+        pu1_src_v  += u4_stridev - u2_width_uv;
+    }
+
+}
+
+

diff --git a/common/impeg2_format_conv.h b/common/impeg2_format_conv.h
new file mode 100644
index 0000000..52400d3
--- /dev/null
+++ b/common/impeg2_format_conv.h

@@ -0,0 +1,133 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+/*****************************************************************************/
+/*                                                                           */
+/*  File Name         : impeg2_format_conv.h                                */
+/*                                                                           */
+/*  Description       : Contains coefficients and constant reqquired for     */
+/*                      converting from rgb and gray color spaces to yuv422i */
+/*                      color space                                          */
+/*                                                                           */
+/*  List of Functions : None                                                 */
+/*                                                                           */
+/*  Issues / Problems : None                                                 */
+/*                                                                           */
+/*  Revision History  :                                                      */
+/*                                                                           */
+/*         DD MM YYYY   Author(s)       Changes (Describe the changes made)  */
+/*         27 08 2007  Naveen Kumar T        Draft                           */
+/*                                                                           */
+/*****************************************************************************/
+
+#ifndef __IMPEG2_FORMAT_CONV_H__
+#define __IMPEG2_FORMAT_CONV_H__
+
+/*****************************************************************************/
+/* Typedefs                                                                  */
+/*****************************************************************************/
+
+#define COEFF_0_Y       66
+#define COEFF_1_Y       129
+#define COEFF_2_Y       25
+#define COEFF_0_U       -38
+#define COEFF_1_U       -75
+#define COEFF_2_U       112
+#define COEFF_0_V       112
+#define COEFF_1_V       -94
+#define COEFF_2_V       -18
+#define CONST_RGB_YUV1  4096
+#define CONST_RGB_YUV2  32768
+#define CONST_GRAY_YUV  128
+#define COEF_2_V2_U  0xFFEE0070
+
+#define COF_2Y_0Y          0X00190042
+#define COF_1U_0U          0XFFB5FFDA
+#define COF_1V_0V          0XFFA20070
+
+/*****************************************************************************/
+/* Enums */
+/*****************************************************************************/
+typedef enum {
+GRAY_SCALE   = 0,
+YUV444      = 1,
+YUV420      = 2,
+YUV422H     = 3,
+YUV422V     = 4,
+YUV411      = 5,
+RGB24       = 6,
+RGB24i      = 7
+}input_format_t;
+
+/*****************************************************************************/
+/* Function Declarations                                                     */
+/*****************************************************************************/
+typedef void pf_copy_yuv420p_buf_t(UWORD8 *pu1_src_y,
+                                   UWORD8 *pu1_src_u,
+                                   UWORD8 *pu1_src_v,
+                                   UWORD8 *pu1_dst_y,
+                                   UWORD8 *pu1_dst_u,
+                                   UWORD8 *pu1_dst_v,
+                                   UWORD32 u4_width,
+                                   UWORD32 u4_height,
+                                   UWORD32 u4_src_stride_y,
+                                   UWORD32 u4_src_stride_u,
+                                   UWORD32 u4_src_stride_v,
+                                   UWORD32 u4_dst_stride_y,
+                                   UWORD32 u4_dst_stride_u,
+                                   UWORD32 u4_dst_stride_v);
+
+typedef void pf_fmt_conv_yuv420p_to_yuv422ile_t(UWORD8 *pu1_y,
+                                                UWORD8 *pu1_u,
+                                                UWORD8 *pu1_v,
+                                                void *pv_yuv422i,
+                                                UWORD32 u4_width,
+                                                UWORD32 u4_height,
+                                                UWORD32 u4_stride_y,
+                                                UWORD32 u4_stride_u,
+                                                UWORD32 u4_stride_v,
+                                                UWORD32 u4_stride_yuv422i);
+
+typedef void pf_fmt_conv_yuv420p_to_yuv420sp_t(UWORD8 *pu1_y,
+                                               UWORD8 *pu1_u,
+                                               UWORD8 *pu1_v,
+                                               UWORD8 *pu1_dest_y,
+                                               UWORD8 *pu1_dest_uv,
+                                               UWORD32 u2_height,
+                                               UWORD32 u2_width,
+                                               UWORD32 u2_stridey,
+                                               UWORD32 u2_strideu,
+                                               UWORD32 u2_stridev,
+                                               UWORD32 u2_dest_stride_y,
+                                               UWORD32 u2_dest_stride_uv,
+                                               UWORD32 convert_uv_only);
+
+pf_copy_yuv420p_buf_t impeg2_copy_frm_yuv420p;
+pf_fmt_conv_yuv420p_to_yuv422ile_t impeg2_fmt_conv_yuv420p_to_yuv422ile;
+pf_fmt_conv_yuv420p_to_yuv420sp_t impeg2_fmt_conv_yuv420p_to_yuv420sp_vu;
+pf_fmt_conv_yuv420p_to_yuv420sp_t impeg2_fmt_conv_yuv420p_to_yuv420sp_uv;
+
+pf_fmt_conv_yuv420p_to_yuv420sp_t impeg2_fmt_conv_yuv420p_to_yuv420sp_uv_a9q;
+pf_fmt_conv_yuv420p_to_yuv420sp_t impeg2_fmt_conv_yuv420p_to_yuv420sp_vu_a9q;
+
+pf_fmt_conv_yuv420p_to_yuv420sp_t impeg2_fmt_conv_yuv420p_to_yuv420sp_uv_av8;
+pf_fmt_conv_yuv420p_to_yuv420sp_t impeg2_fmt_conv_yuv420p_to_yuv420sp_vu_av8;
+
+
+#endif /* __IMPEG2_FORMAT_CONV_H__ */

diff --git a/common/impeg2_globals.c b/common/impeg2_globals.c
new file mode 100644
index 0000000..9193ef7
--- /dev/null
+++ b/common/impeg2_globals.c

@@ -0,0 +1,351 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+#include <stdio.h>
+#include "iv_datatypedef.h"
+#include "iv.h"
+#include "impeg2_buf_mgr.h"
+#include "impeg2_disp_mgr.h"
+#include "impeg2_defs.h"
+#include "impeg2_platform_macros.h"
+#include "impeg2_globals.h"
+
+/* Table for converting the quantizer_scale_code to quantizer_scale */
+const UWORD8 gau1_impeg2_non_linear_quant_scale[] =
+{
+    0, 1, 2, 3, 4, 5, 6, 7,
+    8,10,12,14,16,18,20,22,
+    24,28,32,36,40,44,48,52,
+    56,64,72,80,88,96,104,112
+};
+
+
+/* Default quantizer matrix to be used for intra blocks */
+const UWORD8 gau1_impeg2_intra_quant_matrix_default[] =
+{
+    8, 16, 19, 22, 26, 27, 29, 34,
+    16, 16, 22, 24, 27, 29, 34, 37,
+    19, 22, 26, 27, 29, 34, 34, 38,
+    22, 22, 26, 27, 29, 34, 37, 40,
+    22, 26, 27, 29, 32, 35, 40, 48,
+    26, 27, 29, 32, 35, 40, 48, 58,
+    26, 27, 29, 34, 38, 46, 56, 69,
+    27, 29, 35, 38, 46, 56, 69, 83
+};
+
+/* Default quantizer matrix to be used for inter blocks */
+const UWORD8 gau1_impeg2_inter_quant_matrix_default[] =
+{
+    16,16,16,16,16,16,16,16,
+    16,16,16,16,16,16,16,16,
+    16,16,16,16,16,16,16,16,
+    16,16,16,16,16,16,16,16,
+    16,16,16,16,16,16,16,16,
+    16,16,16,16,16,16,16,16,
+    16,16,16,16,16,16,16,16,
+    16,16,16,16,16,16,16,16
+};
+
+/* Table to perform inverse scan when the scan direction is zigzag */
+const UWORD8 gau1_impeg2_inv_scan_zig_zag[] =
+{
+     0,  1,  8, 16,  9,  2,  3, 10,
+    17, 24, 32, 25, 18, 11,  4,  5,
+    12, 19, 26, 33, 40, 48, 41, 34,
+    27, 20, 13,  6,  7, 14, 21, 28,
+    35, 42, 49, 56, 57, 50, 43, 36,
+    29, 22, 15, 23, 30, 37, 44, 51,
+    58, 59, 52, 45, 38, 31, 39, 46,
+    53, 60, 61, 54, 47, 55, 62, 63
+};
+
+/*  Table to perform inverse scan when the direction of scanning is vertical */
+const UWORD8 gau1_impeg2_inv_scan_vertical[] =
+{
+    0, 8, 16, 24, 1, 9, 2, 10,
+    17, 25, 32, 40, 48, 56, 57, 49,
+    41, 33, 26, 18, 3, 11, 4, 12,
+    19, 27, 34, 42, 50, 58, 35, 43,
+    51, 59, 20, 28, 5, 13, 6, 14,
+    21, 29, 36, 44, 52, 60, 37, 45,
+    53, 61, 22, 30, 7, 15, 23, 31,
+    38, 46, 54, 62, 39, 47, 55, 63
+};
+
+/*****************************************************************************/
+/* Table that indicate which interpolation  type is to used                  */
+/*****************************************************************************/
+/* Chroma when motion vector is positive */
+const UWORD16 gau2_impeg2_chroma_interp_mv[][16] =
+{
+    /* Pos X Pos Y */
+    {
+        0,  0,  1,  1,
+        0,  0,  1,  1,
+        2,  2,  3,  3,
+        2,  2,  3,  3
+    },
+    /* Neg X Pos Y */
+    {
+        0,  1,  1,  0,
+        0,  1,  1,  0,
+        2,  3,  3,  2,
+        2,  3,  3,  2
+    },
+    /* Pos X Neg Y */
+    {
+        0,  0,  1,  1,
+        2,  2,  3,  3,
+        2,  2,  3,  3,
+        0,  0,  1,  1
+    },
+    /* Neg X Neg Y */
+    {
+        0,  1,  1, 0,
+        2,  3,  3, 2,
+        2,  3,  3, 2,
+        0,  1,  1, 0
+    }
+};
+/*****************************************************************************/
+/* Input #1 Offset in bytes                                                  */
+/*****************************************************************************/
+/* Chroma */
+const UWORD16 gau2_impeg2_chroma_interp_inp1[][16] =
+{
+    /* Pos X Pos Y */
+    {
+        0,  0,  0,  0,
+        0,  0,  0,  0,
+        0,  0,  0,  0,
+        0,  0,  0,  0
+    },
+    /* Neg X Pos Y */
+    {
+        0,  0,  0,  4,
+        0,  0,  0,  4,
+        0,  0,  0,  4,
+        0,  0,  0,  4
+    },
+    /* Pos X Neg Y */
+    {
+        0,  0,  0,  0,
+        0,  0,  0,  0,
+        0,  0,  0,  0,
+        72, 72, 72, 72
+    },
+    /* Neg X Neg Y */
+    {
+        0,  0,  0,  4,
+        0,  0,  0,  4,
+        0,  0,  0,  4,
+        72, 72, 72, 76
+    }
+};
+/* Luma */
+const UWORD16 gau2_impeg2_luma_interp_inp1[] =
+{
+    1,  1,  3,  3,
+    1,  1,  3,  3,
+    37, 37, 39, 39,
+    37, 37, 39, 39
+};
+/*****************************************************************************/
+/* Input #2 Offset from Input #1 in bytes                                    */
+/*****************************************************************************/
+/*
+    FXFY  0,
+    HXFY  2,
+    FXHY 36,
+    HXHY 36
+*/
+const UWORD16 gau2_impeg2_luma_interp_inp2[] =
+{
+      0,  2,  0,  2,
+     36, 36, 36, 36,
+      0,  2,  0,  2,
+     36, 36, 36, 36
+};
+const UWORD16 gau2_impeg2_chroma_interp_inp2[] =
+{
+    /* FXFY */
+    0,
+    /* HXFY */
+    4,
+    /* FXHY */
+    72,
+    /* HXHY */
+    72
+};
+
+/*****************************************************************************/
+/* Corresponds to Table 6-4 frame_rate_value  of the standard                */
+/*****************************************************************************/
+/*
+    frame_rate_code frame_rate_value
+
+    0000            Forbidden
+    0001            24 000 ÷ 1001
+    0010            24
+    0011            25
+    0100            30 000 ÷ 1001
+    0101            30
+    0110            50
+    0111            60 000 ÷ 1001
+    1000            60
+    1001            Reserved
+    ....
+    1111            Reserved
+*/
+const UWORD16 gau2_impeg2_frm_rate_code[][2] =
+{
+    {1    ,    1}, /* Forbidden */
+    {24000, 1001},
+    {24000, 1000},
+    {25000, 1000},
+    {30000, 1001},
+    {30000, 1000},
+    {50000, 1000},
+    {60000, 1001},
+    {60000, 1000}
+    /* Rest reserved */
+};
+
+const WORD16 gai2_impeg2_idct_q15[] =
+{
+    23170,    23170,    23170,    23170,    23170,    23170,    23170,    23170,
+    32138,    27246,    18205,     6393,    -6393,   -18205,   -27246,   -32138,
+    30274,    12540,   -12540,   -30274,   -30274,   -12540,    12540,    30274,
+    27246,    -6393,   -32138,   -18205,    18205,    32138,     6393,   -27246,
+    23170,   -23170,   -23170,    23170,    23170,   -23170,   -23170,    23170,
+    18205,   -32138,     6393,    27246,   -27246,    -6393,    32138,   -18205,
+    12540,   -30274,    30274,   -12540,   -12540,    30274,   -30274,    12540,
+     6393,   -18205,    27246,   -32138,    32138,   -27246,    18205,    -6393,
+};
+
+const WORD16 gai2_impeg2_idct_q11[] =
+{
+    1448,     1448,     1448,     1448,     1448,     1448,     1448,     1448,
+    2009,     1703,     1138,      400,     -400,    -1138,    -1703,    -2009,
+    1892,      784,     -784,    -1892,    -1892,     -784,      784,     1892,
+    1703,     -400,    -2009,    -1138,     1138,     2009,      400,    -1703,
+    1448,    -1448,    -1448,     1448,     1448,    -1448,    -1448,     1448,
+    1138,    -2009,      400,     1703,    -1703,     -400,     2009,    -1138,
+     784,    -1892,     1892,     -784,     -784,     1892,    -1892,      784,
+     400,    -1138,     1703,    -2009,     2009,    -1703,     1138,     -400,
+};
+
+const WORD16 gai2_impeg2_idct_even_8_q15[][8] =
+{
+    {  23170,  23170,  23170,  23170,  23170,  23170,  23170,  23170  },
+    {  12540, -30274,  12540, -30274,  12540, -30274,  12540, -30274  },
+    {  30274,  12540,  30274,  12540,  30274,  12540,  30274,  12540  },
+    {  23170, -23170,  23170, -23170,  23170, -23170,  23170, -23170  }
+};
+const WORD16 gai2_impeg2_idct_odd_8_q15[][8] =
+{
+    {  32138,  27246,  32138,  27246,  32138,  27246,  32138,  27246 },
+    {  18205,   6393,  18205,   6393,  18205,   6393,  18205,   6393 },
+    {  27246,  -6393,  27246,  -6393,  27246,  -6393,  27246,  -6393 },
+    {  32138,  18205,  32138,  18205,  32138,  18205,  32138,  18205 },
+    {  18205, -32138,  18205, -32138,  18205, -32138,  18205, -32138 },
+    {  6393,   27246,   6393,  27246,   6393,  27246,   6393,  27246 },
+    {  6393,  -18205,   6393, -18205,   6393, -18205,   6393, -18205 },
+    {  27246, -32138,  27246, -32138,  27246, -32138,  27246, -32138 },
+};
+
+const WORD16 gai2_impeg2_idct_even_8_q11[][8] =
+{
+    {   1448,   1448,   1448,   1448,   1448,   1448,   1448,   1448 },
+    {    784,  -1892,    784,  -1892,    784,  -1892,    784,  -1892 },
+    {   1892,    784,   1892,    784,   1892,    784,   1892,    784 },
+    {   1448,  -1448,   1448,  -1448,   1448,  -1448,   1448,  -1448 }
+};
+const WORD16 gai2_impeg2_idct_odd_8_q11[][8] =
+{
+    {   2009,   1703,   2009,   1703,   2009,   1703,   2009,   1703 },
+    {   1138,    400,   1138,    400,   1138,    400,   1138,    400 },
+    {   1703,   -400,   1703,   -400,   1703,   -400,   1703,   -400 },
+    {   2009,   1138,   2009,   1138,   2009,   1138,   2009,   1138 },
+    {   1138,  -2009,   1138,  -2009,   1138,  -2009,   1138,  -2009 },
+    {    400,   1703,    400,   1703,    400,   1703,    400,   1703 },
+    {    400,  -1138,    400,  -1138,    400,  -1138,    400,  -1138 },
+    {   1703,  -2009,   1703,  -2009,   1703,  -2009,   1703,  -2009 },
+};
+
+
+
+/*****************************************************************************/
+/* Last row IDCT Coefficients in Q11 format                                           */
+/*****************************************************************************/
+const WORD16 gai2_impeg2_idct_last_row_q11[] =
+{
+     400,    -1138,     1703,    -2009,     2009,    -1703,     1138,     -400,
+};
+
+const WORD16 gai2_impeg2_idct_first_col_q15[] =
+{
+   23170,    32138,    30274,    27246,    23170,    18205,    12540,     6393,
+};
+
+const WORD16 gai2_impeg2_idct_first_col_q11[] =
+{
+     1448,    2009,     1892,     1703,     1448,     1138,      784,      400,
+};
+
+/*****************************************************************************/
+/* Output of first stage dct (using gai2_impeg2_idct_q15 as coeffs)          */
+/* for a 1D data (0, 0, 0, 0, 0, 0, 0, 1)                                    */
+/*****************************************************************************/
+
+const WORD16 gai2_impeg2_mismatch_stg1_outp[] =
+{
+    2, -4, 7, -8, 8, -7, 4, -2
+};
+
+const WORD16 gai2_impeg2_mismatch_stg2_additive[] =
+{
+     800,   -2276,  3406,   -4018,  4018,   -3406,  2276,   -800,
+     -1600, 4552,   -6812,  8036,   -8036,  6812,   -4552,  1600,
+     2800,  -7966,  11921,  -14063, 14063,  -11921, 7966,   -2800,
+     -3200, 9104,   -13624, 16072,  -16072, 13624,  -9104,  3200,
+     3200,  -9104,  13624,  -16072, 16072,  -13624, 9104,   -3200,
+     -2800, 7966,   -11921, 14063,  -14063, 11921,  -7966,  2800,
+     1600,  -4552,  6812,   -8036,  8036,   -6812,  4552,   -1600,
+     -800,  2276,   -3406,  4018,   -4018,  3406,   -2276,  800,
+};
+
+
+const UWORD8 gau1_impeg2_zerobuf[] =
+{
+    0, 0, 0, 0, 0, 0, 0, 0,
+    0, 0, 0, 0, 0, 0, 0, 0,
+    0, 0, 0, 0, 0, 0, 0, 0,
+    0, 0, 0, 0, 0, 0, 0, 0,
+    0, 0, 0, 0, 0, 0, 0, 0,
+    0, 0, 0, 0, 0, 0, 0, 0,
+    0, 0, 0, 0, 0, 0, 0, 0,
+    0, 0, 0, 0, 0, 0, 0, 0,
+};
+/*****************************************************************************/
+/* Tables of offset needed to address block in an MB                         */
+/*****************************************************************************/
+const WORD16  gai2_impeg2_blk_y_off_fld[]  = {0,0,1,1};
+const WORD16  gai2_impeg2_blk_y_off_frm[]  = {0,0,8,8};
+const WORD16  gai2_impeg2_blk_x_off[]      = {0,8,0,8};

diff --git a/common/impeg2_globals.h b/common/impeg2_globals.h
new file mode 100755
index 0000000..e8c6865
--- /dev/null
+++ b/common/impeg2_globals.h

@@ -0,0 +1,57 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+#ifndef __IMPEG2_GLOBALS_H__
+#define __IMPEG2_GLOBALS_H__
+
+extern const UWORD8 gau1_impeg2_non_linear_quant_scale[];
+extern const UWORD8 gau1_impeg2_intra_quant_matrix_default[];
+extern const UWORD8 gau1_impeg2_inter_quant_matrix_default[];
+extern const UWORD8  gau1_impeg2_inv_scan_vertical[];
+extern const UWORD8  gau1_impeg2_inv_scan_zig_zag[];
+extern const UWORD16 gau2_impeg2_frm_rate_code[][2];
+
+extern const UWORD16 gau2_impeg2_chroma_interp_mv[][16];
+extern const UWORD16 gau2_impeg2_chroma_interp_inp1[][16];
+extern const UWORD16 gau2_impeg2_luma_interp_inp1[];
+extern const UWORD16 gau2_impeg2_luma_interp_inp2[];
+extern const UWORD16 gau2_impeg2_chroma_interp_inp2[];
+
+extern const WORD16  gai2_impeg2_idct_q15[];
+extern const WORD16  gai2_impeg2_idct_q11[];
+
+extern const WORD16 gai2_impeg2_mismatch_stg1_outp[];
+extern const WORD16 gai2_impeg2_idct_last_row_q11[];
+extern const WORD16 gai2_impeg2_idct_first_col_q15[];
+extern const WORD16 gai2_impeg2_idct_first_col_q11[];
+extern const WORD16 gai2_impeg2_mismatch_stg2_additive[];
+
+extern const WORD16  gai2_impeg2_blk_y_off_fld[];
+extern const WORD16  gai2_impeg2_blk_y_off_frm[];
+extern const WORD16  gai2_impeg2_blk_x_off[];
+
+extern const UWORD8 gau1_impeg2_zerobuf[];
+
+extern const WORD16 gai2_impeg2_idct_odd_8_q15[8][8];
+extern const WORD16 gai2_impeg2_idct_odd_8_q11[8][8];
+
+extern const WORD16 gai2_impeg2_idct_even_8_q11[4][8];
+extern const WORD16 gai2_impeg2_idct_even_8_q15[4][8];
+
+#endif /* __IMPEG2_GLOBALS_H__ */

diff --git a/common/impeg2_idct.c b/common/impeg2_idct.c
new file mode 100644
index 0000000..6834260
--- /dev/null
+++ b/common/impeg2_idct.c

@@ -0,0 +1,500 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+/*****************************************************************************/
+/*                                                                           */
+/*  File Name         : impeg2_idct.c                                        */
+/*                                                                           */
+/*  Description       : Contains 2d idct and invese quantization functions   */
+/*                                                                           */
+/*  List of Functions : impeg2_idct_recon_dc()                               */
+/*                      impeg2_idct_recon_dc_mismatch()                      */
+/*                      impeg2_idct_recon()                                  */
+/*                                                                           */
+/*  Issues / Problems : None                                                 */
+/*                                                                           */
+/*  Revision History  :                                                      */
+/*                                                                           */
+/*         DD MM YYYY   Author(s)       Changes                              */
+/*         10 09 2005   Hairsh M        First Version                        */
+/*                                                                           */
+/*****************************************************************************/
+/*
+  IEEE - 1180 results for this IDCT
+  L                           256         256         5           5           300         300         384         384         Thresholds
+  H                           255         255         5           5           300         300         383         383
+  sign                        1           -1          1           -1          1           -1          1           -1
+  Peak Error                  1           1           1           1           1           1           1           1           1
+  Peak Mean Square Error      0.0191      0.0188      0.0108      0.0111      0.0176      0.0188      0.0165      0.0177      0.06
+  Overall Mean Square Error   0.01566406  0.01597656  0.0091875   0.00908906  0.01499063  0.01533281  0.01432344  0.01412344  0.02
+  Peak Mean Error             0.0027      0.0026      0.0028      0.002       0.0017      0.0033      0.0031      0.0025      0.015
+  Overall Mean Error          0.00002656  -0.00031406 0.00016875  0.00005469  -0.00003125 0.00011406  0.00009219  0.00004219  0.0015
+  */
+#include <stdio.h>
+#include <string.h>
+
+#include "iv_datatypedef.h"
+#include "iv.h"
+#include "impeg2_defs.h"
+#include "impeg2_platform_macros.h"
+
+#include "impeg2_macros.h"
+#include "impeg2_globals.h"
+#include "impeg2_idct.h"
+
+
+void impeg2_idct_recon_dc(WORD16 *pi2_src,
+                            WORD16 *pi2_tmp,
+                            UWORD8 *pu1_pred,
+                            UWORD8 *pu1_dst,
+                            WORD32 i4_src_strd,
+                            WORD32 i4_pred_strd,
+                            WORD32 i4_dst_strd,
+                            WORD32 i4_zero_cols,
+                            WORD32 i4_zero_rows)
+{
+    WORD32 i4_val, i, j;
+
+    UNUSED(pi2_tmp);
+    UNUSED(i4_src_strd);
+    UNUSED(i4_zero_cols);
+    UNUSED(i4_zero_rows);
+
+    i4_val = pi2_src[0] * gai2_impeg2_idct_q15[0];
+    i4_val = ((i4_val + IDCT_STG1_ROUND) >> IDCT_STG1_SHIFT);
+    i4_val = i4_val * gai2_impeg2_idct_q11[0];
+    i4_val = ((i4_val + IDCT_STG2_ROUND) >> IDCT_STG2_SHIFT);
+
+    for(i = 0; i < TRANS_SIZE_8; i++)
+    {
+        for(j = 0; j < TRANS_SIZE_8; j++)
+        {
+            pu1_dst[j] = CLIP_U8(i4_val + pu1_pred[j]);
+        }
+        pu1_dst  += i4_dst_strd;
+        pu1_pred += i4_pred_strd;
+    }
+}
+void impeg2_idct_recon_dc_mismatch(WORD16 *pi2_src,
+                            WORD16 *pi2_tmp,
+                            UWORD8 *pu1_pred,
+                            UWORD8 *pu1_dst,
+                            WORD32 i4_src_strd,
+                            WORD32 i4_pred_strd,
+                            WORD32 i4_dst_strd,
+                            WORD32 i4_zero_cols,
+                            WORD32 i4_zero_rows)
+
+{
+    WORD32 i4_val, i, j;
+    WORD32 i4_count = 0;
+    WORD32 i4_sum;
+
+    UNUSED(pi2_tmp);
+    UNUSED(i4_src_strd);
+    UNUSED(i4_zero_cols);
+    UNUSED(i4_zero_rows);
+
+    i4_val = pi2_src[0] * gai2_impeg2_idct_q15[0];
+    i4_val = ((i4_val + IDCT_STG1_ROUND) >> IDCT_STG1_SHIFT);
+
+    i4_val *= gai2_impeg2_idct_q11[0];
+    for(i = 0; i < TRANS_SIZE_8; i++)
+    {
+        for (j = 0; j < TRANS_SIZE_8; j++)
+        {
+            i4_sum = i4_val;
+            i4_sum += gai2_impeg2_mismatch_stg2_additive[i4_count];
+            i4_sum = ((i4_sum + IDCT_STG2_ROUND) >> IDCT_STG2_SHIFT);
+            i4_sum += pu1_pred[j];
+            pu1_dst[j] = CLIP_U8(i4_sum);
+            i4_count++;
+        }
+
+        pu1_dst  += i4_dst_strd;
+        pu1_pred += i4_pred_strd;
+    }
+
+}
+/**
+ *******************************************************************************
+ *
+ * @brief
+ *  This function performs Inverse transform  and reconstruction for 8x8
+ * input block
+ *
+ * @par Description:
+ *  Performs inverse transform and adds the prediction  data and clips output
+ * to 8 bit
+ *
+ * @param[in] pi2_src
+ *  Input 8x8 coefficients
+ *
+ * @param[in] pi2_tmp
+ *  Temporary 8x8 buffer for storing inverse
+ *
+ *  transform
+ *  1st stage output
+ *
+ * @param[in] pu1_pred
+ *  Prediction 8x8 block
+ *
+ * @param[out] pu1_dst
+ *  Output 8x8 block
+ *
+ * @param[in] src_strd
+ *  Input stride
+ *
+ * @param[in] pred_strd
+ *  Prediction stride
+ *
+ * @param[in] dst_strd
+ *  Output Stride
+ *
+ * @param[in] shift
+ *  Output shift
+ *
+ * @param[in] zero_cols
+ *  Zero columns in pi2_src
+ *
+ * @returns  Void
+ *
+ * @remarks
+ *  None
+ *
+ *******************************************************************************
+ */
+
+void impeg2_idct_recon(WORD16 *pi2_src,
+                        WORD16 *pi2_tmp,
+                        UWORD8 *pu1_pred,
+                        UWORD8 *pu1_dst,
+                        WORD32 i4_src_strd,
+                        WORD32 i4_pred_strd,
+                        WORD32 i4_dst_strd,
+                        WORD32 i4_zero_cols,
+                        WORD32 i4_zero_rows)
+{
+    WORD32 j, k;
+    WORD32 ai4_e[4], ai4_o[4];
+    WORD32 ai4_ee[2], ai4_eo[2];
+    WORD32 i4_add;
+    WORD32 i4_shift;
+    WORD16 *pi2_tmp_orig;
+    WORD32 i4_trans_size;
+    WORD32 i4_zero_rows_2nd_stage = i4_zero_cols;
+    WORD32 i4_row_limit_2nd_stage;
+
+    i4_trans_size = TRANS_SIZE_8;
+
+    pi2_tmp_orig = pi2_tmp;
+
+    if((i4_zero_cols & 0xF0) == 0xF0)
+        i4_row_limit_2nd_stage = 4;
+    else
+        i4_row_limit_2nd_stage = TRANS_SIZE_8;
+
+
+    if((i4_zero_rows & 0xF0) == 0xF0) /* First 4 rows of input are non-zero */
+    {
+        /************************************************************************************************/
+        /**********************************START - IT_RECON_8x8******************************************/
+        /************************************************************************************************/
+
+        /* Inverse Transform 1st stage */
+        i4_shift = IDCT_STG1_SHIFT;
+        i4_add = 1 << (i4_shift - 1);
+
+        for(j = 0; j < i4_row_limit_2nd_stage; j++)
+        {
+            /* Checking for Zero Cols */
+            if((i4_zero_cols & 1) == 1)
+            {
+                memset(pi2_tmp, 0, i4_trans_size * sizeof(WORD16));
+            }
+            else
+            {
+                /* Utilizing symmetry properties to the maximum to minimize the number of multiplications */
+                for(k = 0; k < 4; k++)
+                {
+                    ai4_o[k] = gai2_impeg2_idct_q15[1 * 8 + k] * pi2_src[i4_src_strd]
+                                    + gai2_impeg2_idct_q15[3 * 8 + k]
+                                                    * pi2_src[3 * i4_src_strd];
+                }
+                ai4_eo[0] = gai2_impeg2_idct_q15[2 * 8 + 0] * pi2_src[2 * i4_src_strd];
+                ai4_eo[1] = gai2_impeg2_idct_q15[2 * 8 + 1] * pi2_src[2 * i4_src_strd];
+                ai4_ee[0] = gai2_impeg2_idct_q15[0 * 8 + 0] * pi2_src[0];
+                ai4_ee[1] = gai2_impeg2_idct_q15[0 * 8 + 1] * pi2_src[0];
+
+                /* Combining e and o terms at each hierarchy levels to calculate the final spatial domain vector */
+                ai4_e[0] = ai4_ee[0] + ai4_eo[0];
+                ai4_e[3] = ai4_ee[0] - ai4_eo[0];
+                ai4_e[1] = ai4_ee[1] + ai4_eo[1];
+                ai4_e[2] = ai4_ee[1] - ai4_eo[1];
+                for(k = 0; k < 4; k++)
+                {
+                    pi2_tmp[k] =
+                                    CLIP_S16(((ai4_e[k] + ai4_o[k] + i4_add) >> i4_shift));
+                    pi2_tmp[k + 4] =
+                                    CLIP_S16(((ai4_e[3 - k] - ai4_o[3 - k] + i4_add) >> i4_shift));
+                }
+            }
+            pi2_src++;
+            pi2_tmp += i4_trans_size;
+            i4_zero_cols = i4_zero_cols >> 1;
+        }
+
+        pi2_tmp = pi2_tmp_orig;
+
+        /* Inverse Transform 2nd stage */
+        i4_shift = IDCT_STG2_SHIFT;
+        i4_add = 1 << (i4_shift - 1);
+        if((i4_zero_rows_2nd_stage & 0xF0) == 0xF0) /* First 4 rows of output of 1st stage are non-zero */
+        {
+            for(j = 0; j < i4_trans_size; j++)
+            {
+                /* Utilizing symmetry properties to the maximum to minimize the number of multiplications */
+                for(k = 0; k < 4; k++)
+                {
+                    ai4_o[k] = gai2_impeg2_idct_q11[1 * 8 + k] * pi2_tmp[i4_trans_size]
+                                    + gai2_impeg2_idct_q11[3 * 8 + k] * pi2_tmp[3 * i4_trans_size];
+                }
+                ai4_eo[0] = gai2_impeg2_idct_q11[2 * 8 + 0] * pi2_tmp[2 * i4_trans_size];
+                ai4_eo[1] = gai2_impeg2_idct_q11[2 * 8 + 1] * pi2_tmp[2 * i4_trans_size];
+                ai4_ee[0] = gai2_impeg2_idct_q11[0 * 8 + 0] * pi2_tmp[0];
+                ai4_ee[1] = gai2_impeg2_idct_q11[0 * 8 + 1] * pi2_tmp[0];
+
+                /* Combining e and o terms at each hierarchy levels to calculate the final spatial domain vector */
+                ai4_e[0] = ai4_ee[0] + ai4_eo[0];
+                ai4_e[3] = ai4_ee[0] - ai4_eo[0];
+                ai4_e[1] = ai4_ee[1] + ai4_eo[1];
+                ai4_e[2] = ai4_ee[1] - ai4_eo[1];
+                for(k = 0; k < 4; k++)
+                {
+                    WORD32 itrans_out;
+                    itrans_out =
+                                    CLIP_S16(((ai4_e[k] + ai4_o[k] + i4_add) >> i4_shift));
+                    pu1_dst[k] = CLIP_U8((itrans_out + pu1_pred[k]));
+                    itrans_out =
+                                    CLIP_S16(((ai4_e[3 - k] - ai4_o[3 - k] + i4_add) >> i4_shift));
+                    pu1_dst[k + 4] = CLIP_U8((itrans_out + pu1_pred[k + 4]));
+                }
+                pi2_tmp++;
+                pu1_pred += i4_pred_strd;
+                pu1_dst += i4_dst_strd;
+            }
+        }
+        else /* All rows of output of 1st stage are non-zero */
+        {
+            for(j = 0; j < i4_trans_size; j++)
+            {
+                /* Utilizing symmetry properties to the maximum to minimize the number of multiplications */
+                for(k = 0; k < 4; k++)
+                {
+                    ai4_o[k] = gai2_impeg2_idct_q11[1 * 8 + k] * pi2_tmp[i4_trans_size]
+                                    + gai2_impeg2_idct_q11[3 * 8 + k]
+                                                    * pi2_tmp[3 * i4_trans_size]
+                                    + gai2_impeg2_idct_q11[5 * 8 + k]
+                                                    * pi2_tmp[5 * i4_trans_size]
+                                    + gai2_impeg2_idct_q11[7 * 8 + k]
+                                                    * pi2_tmp[7 * i4_trans_size];
+                }
+
+                ai4_eo[0] = gai2_impeg2_idct_q11[2 * 8 + 0] * pi2_tmp[2 * i4_trans_size]
+                                + gai2_impeg2_idct_q11[6 * 8 + 0] * pi2_tmp[6 * i4_trans_size];
+                ai4_eo[1] = gai2_impeg2_idct_q11[2 * 8 + 1] * pi2_tmp[2 * i4_trans_size]
+                                + gai2_impeg2_idct_q11[6 * 8 + 1] * pi2_tmp[6 * i4_trans_size];
+                ai4_ee[0] = gai2_impeg2_idct_q11[0 * 8 + 0] * pi2_tmp[0]
+                                + gai2_impeg2_idct_q11[4 * 8 + 0] * pi2_tmp[4 * i4_trans_size];
+                ai4_ee[1] = gai2_impeg2_idct_q11[0 * 8 + 1] * pi2_tmp[0]
+                                + gai2_impeg2_idct_q11[4 * 8 + 1] * pi2_tmp[4 * i4_trans_size];
+
+                /* Combining e and o terms at each hierarchy levels to calculate the final spatial domain vector */
+                ai4_e[0] = ai4_ee[0] + ai4_eo[0];
+                ai4_e[3] = ai4_ee[0] - ai4_eo[0];
+                ai4_e[1] = ai4_ee[1] + ai4_eo[1];
+                ai4_e[2] = ai4_ee[1] - ai4_eo[1];
+                for(k = 0; k < 4; k++)
+                {
+                    WORD32 itrans_out;
+                    itrans_out =
+                                    CLIP_S16(((ai4_e[k] + ai4_o[k] + i4_add) >> i4_shift));
+                    pu1_dst[k] = CLIP_U8((itrans_out + pu1_pred[k]));
+                    itrans_out =
+                                    CLIP_S16(((ai4_e[3 - k] - ai4_o[3 - k] + i4_add) >> i4_shift));
+                    pu1_dst[k + 4] = CLIP_U8((itrans_out + pu1_pred[k + 4]));
+                }
+                pi2_tmp++;
+                pu1_pred += i4_pred_strd;
+                pu1_dst += i4_dst_strd;
+            }
+        }
+        /************************************************************************************************/
+        /************************************END - IT_RECON_8x8******************************************/
+        /************************************************************************************************/
+    }
+    else /* All rows of input are non-zero */
+    {
+        /************************************************************************************************/
+        /**********************************START - IT_RECON_8x8******************************************/
+        /************************************************************************************************/
+
+        /* Inverse Transform 1st stage */
+        i4_shift = IDCT_STG1_SHIFT;
+        i4_add = 1 << (i4_shift - 1);
+
+        for(j = 0; j < i4_row_limit_2nd_stage; j++)
+        {
+            /* Checking for Zero Cols */
+            if((i4_zero_cols & 1) == 1)
+            {
+                memset(pi2_tmp, 0, i4_trans_size * sizeof(WORD16));
+            }
+            else
+            {
+                /* Utilizing symmetry properties to the maximum to minimize the number of multiplications */
+                for(k = 0; k < 4; k++)
+                {
+                    ai4_o[k] = gai2_impeg2_idct_q15[1 * 8 + k] * pi2_src[i4_src_strd]
+                                    + gai2_impeg2_idct_q15[3 * 8 + k]
+                                                    * pi2_src[3 * i4_src_strd]
+                                    + gai2_impeg2_idct_q15[5 * 8 + k]
+                                                    * pi2_src[5 * i4_src_strd]
+                                    + gai2_impeg2_idct_q15[7 * 8 + k]
+                                                    * pi2_src[7 * i4_src_strd];
+                }
+
+                ai4_eo[0] = gai2_impeg2_idct_q15[2 * 8 + 0] * pi2_src[2 * i4_src_strd]
+                                + gai2_impeg2_idct_q15[6 * 8 + 0] * pi2_src[6 * i4_src_strd];
+                ai4_eo[1] = gai2_impeg2_idct_q15[2 * 8 + 1] * pi2_src[2 * i4_src_strd]
+                                + gai2_impeg2_idct_q15[6 * 8 + 1] * pi2_src[6 * i4_src_strd];
+                ai4_ee[0] = gai2_impeg2_idct_q15[0 * 8 + 0] * pi2_src[0]
+                                + gai2_impeg2_idct_q15[4 * 8 + 0] * pi2_src[4 * i4_src_strd];
+                ai4_ee[1] = gai2_impeg2_idct_q15[0 * 8 + 1] * pi2_src[0]
+                                + gai2_impeg2_idct_q15[4 * 8 + 1] * pi2_src[4 * i4_src_strd];
+
+                /* Combining e and o terms at each hierarchy levels to calculate the final spatial domain vector */
+                ai4_e[0] = ai4_ee[0] + ai4_eo[0];
+                ai4_e[3] = ai4_ee[0] - ai4_eo[0];
+                ai4_e[1] = ai4_ee[1] + ai4_eo[1];
+                ai4_e[2] = ai4_ee[1] - ai4_eo[1];
+                for(k = 0; k < 4; k++)
+                {
+                    pi2_tmp[k] =
+                                    CLIP_S16(((ai4_e[k] + ai4_o[k] + i4_add) >> i4_shift));
+                    pi2_tmp[k + 4] =
+                                    CLIP_S16(((ai4_e[3 - k] - ai4_o[3 - k] + i4_add) >> i4_shift));
+                }
+            }
+            pi2_src++;
+            pi2_tmp += i4_trans_size;
+            i4_zero_cols = i4_zero_cols >> 1;
+        }
+
+        pi2_tmp = pi2_tmp_orig;
+
+        /* Inverse Transform 2nd stage */
+        i4_shift = IDCT_STG2_SHIFT;
+        i4_add = 1 << (i4_shift - 1);
+        if((i4_zero_rows_2nd_stage & 0xF0) == 0xF0) /* First 4 rows of output of 1st stage are non-zero */
+        {
+            for(j = 0; j < i4_trans_size; j++)
+            {
+                /* Utilizing symmetry properties to the maximum to minimize the number of multiplications */
+                for(k = 0; k < 4; k++)
+                {
+                    ai4_o[k] = gai2_impeg2_idct_q11[1 * 8 + k] * pi2_tmp[i4_trans_size]
+                                    + gai2_impeg2_idct_q11[3 * 8 + k] * pi2_tmp[3 * i4_trans_size];
+                }
+                ai4_eo[0] = gai2_impeg2_idct_q11[2 * 8 + 0] * pi2_tmp[2 * i4_trans_size];
+                ai4_eo[1] = gai2_impeg2_idct_q11[2 * 8 + 1] * pi2_tmp[2 * i4_trans_size];
+                ai4_ee[0] = gai2_impeg2_idct_q11[0 * 8 + 0] * pi2_tmp[0];
+                ai4_ee[1] = gai2_impeg2_idct_q11[0 * 8 + 1] * pi2_tmp[0];
+
+                /* Combining e and o terms at each hierarchy levels to calculate the final spatial domain vector */
+                ai4_e[0] = ai4_ee[0] + ai4_eo[0];
+                ai4_e[3] = ai4_ee[0] - ai4_eo[0];
+                ai4_e[1] = ai4_ee[1] + ai4_eo[1];
+                ai4_e[2] = ai4_ee[1] - ai4_eo[1];
+                for(k = 0; k < 4; k++)
+                {
+                    WORD32 itrans_out;
+                    itrans_out =
+                                    CLIP_S16(((ai4_e[k] + ai4_o[k] + i4_add) >> i4_shift));
+                    pu1_dst[k] = CLIP_U8((itrans_out + pu1_pred[k]));
+                    itrans_out =
+                                    CLIP_S16(((ai4_e[3 - k] - ai4_o[3 - k] + i4_add) >> i4_shift));
+                    pu1_dst[k + 4] = CLIP_U8((itrans_out + pu1_pred[k + 4]));
+                }
+                pi2_tmp++;
+                pu1_pred += i4_pred_strd;
+                pu1_dst += i4_dst_strd;
+            }
+        }
+        else /* All rows of output of 1st stage are non-zero */
+        {
+            for(j = 0; j < i4_trans_size; j++)
+            {
+                /* Utilizing symmetry properties to the maximum to minimize the number of multiplications */
+                for(k = 0; k < 4; k++)
+                {
+                    ai4_o[k] = gai2_impeg2_idct_q11[1 * 8 + k] * pi2_tmp[i4_trans_size]
+                                    + gai2_impeg2_idct_q11[3 * 8 + k]
+                                                    * pi2_tmp[3 * i4_trans_size]
+                                    + gai2_impeg2_idct_q11[5 * 8 + k]
+                                                    * pi2_tmp[5 * i4_trans_size]
+                                    + gai2_impeg2_idct_q11[7 * 8 + k]
+                                                    * pi2_tmp[7 * i4_trans_size];
+                }
+
+                ai4_eo[0] = gai2_impeg2_idct_q11[2 * 8 + 0] * pi2_tmp[2 * i4_trans_size]
+                                + gai2_impeg2_idct_q11[6 * 8 + 0] * pi2_tmp[6 * i4_trans_size];
+                ai4_eo[1] = gai2_impeg2_idct_q11[2 * 8 + 1] * pi2_tmp[2 * i4_trans_size]
+                                + gai2_impeg2_idct_q11[6 * 8 + 1] * pi2_tmp[6 * i4_trans_size];
+                ai4_ee[0] = gai2_impeg2_idct_q11[0 * 8 + 0] * pi2_tmp[0]
+                                + gai2_impeg2_idct_q11[4 * 8 + 0] * pi2_tmp[4 * i4_trans_size];
+                ai4_ee[1] = gai2_impeg2_idct_q11[0 * 8 + 1] * pi2_tmp[0]
+                                + gai2_impeg2_idct_q11[4 * 8 + 1] * pi2_tmp[4 * i4_trans_size];
+
+                /* Combining e and o terms at each hierarchy levels to calculate the final spatial domain vector */
+                ai4_e[0] = ai4_ee[0] + ai4_eo[0];
+                ai4_e[3] = ai4_ee[0] - ai4_eo[0];
+                ai4_e[1] = ai4_ee[1] + ai4_eo[1];
+                ai4_e[2] = ai4_ee[1] - ai4_eo[1];
+                for(k = 0; k < 4; k++)
+                {
+                    WORD32 itrans_out;
+                    itrans_out =
+                                    CLIP_S16(((ai4_e[k] + ai4_o[k] + i4_add) >> i4_shift));
+                    pu1_dst[k] = CLIP_U8((itrans_out + pu1_pred[k]));
+                    itrans_out =
+                                    CLIP_S16(((ai4_e[3 - k] - ai4_o[3 - k] + i4_add) >> i4_shift));
+                    pu1_dst[k + 4] = CLIP_U8((itrans_out + pu1_pred[k + 4]));
+                }
+                pi2_tmp++;
+                pu1_pred += i4_pred_strd;
+                pu1_dst += i4_dst_strd;
+            }
+        }
+        /************************************************************************************************/
+        /************************************END - IT_RECON_8x8******************************************/
+        /************************************************************************************************/
+    }
+}
+

diff --git a/common/impeg2_idct.h b/common/impeg2_idct.h
new file mode 100644
index 0000000..80defde
--- /dev/null
+++ b/common/impeg2_idct.h

@@ -0,0 +1,66 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+#ifndef __IMPEG2_IDCT_H__
+#define __IMPEG2_IDCT_H__
+
+
+/*****************************************************************************/
+/* Function Declarations                                                     */
+/*****************************************************************************/
+
+typedef void  pf_idct_recon_t(WORD16 *pi2_src,
+                            WORD16 *pi2_tmp,
+                            UWORD8 *pu1_pred,
+                            UWORD8 *pu1_dst,
+                            WORD32 src_strd,
+                            WORD32 pred_strd,
+                            WORD32 dst_strd,
+                            WORD32 zero_cols,
+                            WORD32 zero_rows);
+
+/* ARM assembly modules curently ignore non_zero_cols argument */
+pf_idct_recon_t impeg2_idct_recon_dc;
+
+pf_idct_recon_t impeg2_idct_recon_dc_mismatch;
+
+pf_idct_recon_t impeg2_idct_recon;
+
+
+pf_idct_recon_t impeg2_idct_recon_dc_a9q;
+
+pf_idct_recon_t impeg2_idct_recon_dc_mismatch_a9q;
+
+pf_idct_recon_t impeg2_idct_recon_a9q;
+
+
+pf_idct_recon_t impeg2_idct_recon_dc_av8;
+
+pf_idct_recon_t impeg2_idct_recon_dc_mismatch_av8;
+
+pf_idct_recon_t impeg2_idct_recon_av8;
+
+pf_idct_recon_t impeg2_idct_recon_sse42;
+
+pf_idct_recon_t impeg2_idct_recon_dc_mismatch_sse42;
+
+pf_idct_recon_t impeg2_idct_recon_dc_sse42;
+
+#endif /* #ifndef __IMPEG2_IDCT_H__ */
+

diff --git a/common/impeg2_inter_pred.c b/common/impeg2_inter_pred.c
new file mode 100644
index 0000000..019fa5c
--- /dev/null
+++ b/common/impeg2_inter_pred.c

@@ -0,0 +1,467 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+/**
+*******************************************************************************
+* @file
+*  impeg2d_mcu.c
+*
+* @brief
+*  Contains MC function definitions for MPEG2 decoder
+*
+* @author
+*  Harish
+*
+* @par List of Functions:
+* - impeg2_copy_mb()
+* - impeg2_interpolate()
+* - impeg2_mc_halfx_halfy_8x8()
+* - impeg2_mc_halfx_fully_8x8()
+* - impeg2_mc_fullx_halfy_8x8()
+* - impeg2_mc_fullx_fully_8x8()
+*
+* @remarks
+*  None
+*
+*******************************************************************************
+*/
+
+#include <stdio.h>
+#include <string.h>
+#include "iv_datatypedef.h"
+#include "iv.h"
+#include "impeg2_buf_mgr.h"
+#include "impeg2_disp_mgr.h"
+#include "impeg2_defs.h"
+#include "impeg2_platform_macros.h"
+
+#include "impeg2_inter_pred.h"
+#include "impeg2_globals.h"
+#include "impeg2_macros.h"
+#include "impeg2_idct.h"
+
+/*******************************************************************************
+*  Function Name   : impeg2_copy_mb
+*
+*  Description     : copies 3 components to the frame from mc_buf
+*
+*  Arguments       :
+*  src_buf         : Source Buffer
+*  dst_buf         : Destination Buffer
+*  src_offset_x    : X offset for source
+*  src_offset_y    : Y offset for source
+*  dst_offset_x    : X offset for destination
+*  dst_offset_y    : Y offset for destination
+*  src_wd          : Source Width
+*  dst_wd          : destination Width
+*  rows            : Number of rows
+*  cols            : Number of columns
+*
+*  Values Returned : None
+*******************************************************************************/
+void impeg2_copy_mb(yuv_buf_t *ps_src_buf,
+                    yuv_buf_t *ps_dst_buf,
+                    UWORD32 u4_src_wd,
+                    UWORD32 u4_dst_wd)
+{
+    UWORD8 *pu1_src;
+    UWORD8 *pu1_dst;
+    UWORD32 i;
+    UWORD32 u4_rows = MB_SIZE;
+    UWORD32 u4_cols = MB_SIZE;
+
+    /*******************************************************/
+    /* copy Y                                              */
+    /*******************************************************/
+    pu1_src = ps_src_buf->pu1_y;
+    pu1_dst = ps_dst_buf->pu1_y;
+    for(i = 0; i < u4_rows; i++)
+    {
+        memcpy(pu1_dst, pu1_src, u4_cols);
+        pu1_src += u4_src_wd;
+        pu1_dst += u4_dst_wd;
+    }
+
+    u4_src_wd >>= 1;
+    u4_dst_wd >>= 1;
+    u4_rows >>= 1;
+    u4_cols >>= 1;
+
+    /*******************************************************/
+    /* copy U                                              */
+    /*******************************************************/
+    pu1_src = ps_src_buf->pu1_u;
+    pu1_dst = ps_dst_buf->pu1_u;
+    for(i = 0; i < u4_rows; i++)
+    {
+        memcpy(pu1_dst, pu1_src, u4_cols);
+
+        pu1_src += u4_src_wd;
+        pu1_dst += u4_dst_wd;
+    }
+    /*******************************************************/
+    /* copy V                                              */
+    /*******************************************************/
+    pu1_src = ps_src_buf->pu1_v;
+    pu1_dst = ps_dst_buf->pu1_v;
+    for(i = 0; i < u4_rows; i++)
+    {
+        memcpy(pu1_dst, pu1_src, u4_cols);
+
+        pu1_src += u4_src_wd;
+        pu1_dst += u4_dst_wd;
+    }
+
+}
+
+/*****************************************************************************/
+/*                                                                           */
+/*  Function Name : impeg2_interpolate                                       */
+/*                                                                           */
+/*  Description   : averages the contents of buf_src1 and buf_src2 and stores*/
+/*                  result in buf_dst                                        */
+/*                                                                           */
+/*  Inputs        : buf_src1 -  First Source                                 */
+/*                  buf_src2 -  Second Source                                */
+/*                                                                           */
+/*  Globals       : None                                                     */
+/*                                                                           */
+/*  Processing    : Avg the values from two sources and store the result in  */
+/*                  destination buffer                                       */
+/*                                                                           */
+/*  Outputs       : buf_dst  -  Avg of contents of buf_src1 and buf_src2     */
+/*                                                                           */
+/*  Returns       : None                                                     */
+/*                                                                           */
+/*  Issues        : Assumes that all 3 buffers are of same size              */
+/*                                                                           */
+/*  Revision History:                                                        */
+/*                                                                           */
+/*         DD MM YYYY   Author(s)       Changes                              */
+/*         14 09 2005   Harish M        First Version                        */
+/*         15 09 2010   Venkat          Added stride                         */
+/*                                                                           */
+/*****************************************************************************/
+void impeg2_interpolate(yuv_buf_t *ps_buf_src1,
+                        yuv_buf_t *ps_buf_src2,
+                        yuv_buf_t *ps_buf_dst,
+                        UWORD32 u4_stride)
+{
+
+    UWORD32 i,j;
+    UWORD8 *pu1_src1,*pu1_src2,*pu1_dst;
+    pu1_src1 = ps_buf_src1->pu1_y;
+    pu1_src2 = ps_buf_src2->pu1_y;
+    pu1_dst  = ps_buf_dst->pu1_y;
+    for(i = MB_SIZE; i > 0; i--)
+    {
+        for(j = MB_SIZE; j > 0; j--)
+        {
+            *pu1_dst++ = ((*pu1_src1++) + (*pu1_src2++) + 1) >> 1;
+        }
+
+        pu1_dst += u4_stride - MB_SIZE;
+
+    }
+
+    u4_stride >>= 1;
+
+    pu1_src1 = ps_buf_src1->pu1_u;
+    pu1_src2 = ps_buf_src2->pu1_u;
+    pu1_dst  = ps_buf_dst->pu1_u;
+    for(i = MB_CHROMA_SIZE; i > 0 ; i--)
+    {
+        for(j = MB_CHROMA_SIZE; j > 0; j--)
+        {
+            *pu1_dst++ = ((*pu1_src1++) + (*pu1_src2++) + 1) >> 1;
+        }
+
+        pu1_dst += u4_stride - MB_CHROMA_SIZE;
+    }
+
+    pu1_src1 = ps_buf_src1->pu1_v;
+    pu1_src2 = ps_buf_src2->pu1_v;
+    pu1_dst  = ps_buf_dst->pu1_v;
+    for(i = MB_CHROMA_SIZE; i > 0 ; i--)
+    {
+        for(j = MB_CHROMA_SIZE; j > 0; j--)
+        {
+            *pu1_dst++ = ((*pu1_src1++) + (*pu1_src2++) + 1) >> 1;
+        }
+
+        pu1_dst += u4_stride - MB_CHROMA_SIZE;
+    }
+
+}
+
+/*****************************************************************************/
+/*                                                                           */
+/*  Function Name : impeg2_mc_halfx_halfy_8x8()                                 */
+/*                                                                           */
+/*  Description   : Gets the buffer from (0.5,0.5) to (8.5,8.5)              */
+/*                  and the above block of size 8 x 8 will be placed as a    */
+/*                  block from the current position of out_buf               */
+/*                                                                           */
+/*  Inputs        : ref - Reference frame from which the block will be       */
+/*                        block will be extracted.                           */
+/*                  ref_wid - WIdth of reference frame                       */
+/*                  out_wid - WIdth of the output frame                      */
+/*                  blk_width  - width of the block                          */
+/*                  blk_width  - height of the block                         */
+/*                                                                           */
+/*  Globals       : None                                                     */
+/*                                                                           */
+/*  Processing    : Point to the (0,0),(1,0),(0,1),(1,1) position in         */
+/*                  the ref frame.Interpolate these four values to get the   */
+/*                  value at(0.5,0.5).Repeat this to get an 8 x 8 block      */
+/*                  using 9 x 9 block from reference frame                   */
+/*                                                                           */
+/*  Outputs       : out -  Output containing the extracted block             */
+/*                                                                           */
+/*  Returns       : None                                                     */
+/*                                                                           */
+/*  Issues        : None                                                     */
+/*                                                                           */
+/*  Revision History:                                                        */
+/*                                                                           */
+/*         DD MM YYYY   Author(s)       Changes                              */
+/*         05 09 2005   Harish M        First Version                        */
+/*                                                                           */
+/*****************************************************************************/
+void impeg2_mc_halfx_halfy_8x8(UWORD8 *pu1_out,
+                            UWORD8 *pu1_ref,
+                            UWORD32 u4_ref_wid,
+                            UWORD32 u4_out_wid)
+{
+    UWORD8 *pu1_ref_p0,*pu1_ref_p1,*pu1_ref_p2,*pu1_ref_p3;
+    UWORD32 i,j;
+    /* P0-P3 are the pixels in the reference frame and Q is the value being */
+    /* estimated                                                            */
+    /*
+       P0 P1
+         Q
+       P2 P3
+    */
+
+    pu1_ref_p0 = pu1_ref;
+    pu1_ref_p1 = pu1_ref + 1;
+    pu1_ref_p2 = pu1_ref + u4_ref_wid;
+    pu1_ref_p3 = pu1_ref + u4_ref_wid + 1;
+
+    for(i = 0; i < BLK_SIZE; i++)
+    {
+        for(j = 0; j < BLK_SIZE; j++)
+        {
+            *pu1_out++ =   (( (*pu1_ref_p0++ )
+                        + (*pu1_ref_p1++ )
+                        + (*pu1_ref_p2++ )
+                        + (*pu1_ref_p3++ ) + 2 ) >> 2);
+        }
+        pu1_ref_p0 += u4_ref_wid - BLK_SIZE;
+        pu1_ref_p1 += u4_ref_wid - BLK_SIZE;
+        pu1_ref_p2 += u4_ref_wid - BLK_SIZE;
+        pu1_ref_p3 += u4_ref_wid - BLK_SIZE;
+
+        pu1_out    += u4_out_wid - BLK_SIZE;
+    }
+    return;
+}
+
+/*****************************************************************************/
+/*                                                                           */
+/*  Function Name : impeg2_mc_halfx_fully_8x8()                                 */
+/*                                                                           */
+/*  Description   : Gets the buffer from (0.5,0) to (8.5,8)                  */
+/*                  and the above block of size 8 x 8 will be placed as a    */
+/*                  block from the current position of out_buf               */
+/*                                                                           */
+/*  Inputs        : ref - Reference frame from which the block will be       */
+/*                        block will be extracted.                           */
+/*                  ref_wid - WIdth of reference frame                       */
+/*                  out_wid - WIdth of the output frame                      */
+/*                  blk_width  - width of the block                          */
+/*                  blk_width  - height of the block                         */
+/*                                                                           */
+/*  Globals       : None                                                     */
+/*                                                                           */
+/*  Processing    : Point to the (0,0) and (1,0) position in the ref frame   */
+/*                  Interpolate these two values to get the value at(0.5,0)  */
+/*                  Repeat this to get an 8 x 8 block using 9 x 8 block from */
+/*                  reference frame                                          */
+/*                                                                           */
+/*  Outputs       : out -  Output containing the extracted block             */
+/*                                                                           */
+/*  Returns       : None                                                     */
+/*                                                                           */
+/*  Issues        : None                                                     */
+/*                                                                           */
+/*  Revision History:                                                        */
+/*                                                                           */
+/*         DD MM YYYY   Author(s)       Changes                              */
+/*         05 09 2005   Harish M        First Version                        */
+/*                                                                           */
+/*****************************************************************************/
+void impeg2_mc_halfx_fully_8x8(UWORD8 *pu1_out,
+                            UWORD8 *pu1_ref,
+                            UWORD32 u4_ref_wid,
+                            UWORD32 u4_out_wid)
+{
+    UWORD8 *pu1_ref_p0, *pu1_ref_p1;
+    UWORD32 i,j;
+
+    /* P0-P3 are the pixels in the reference frame and Q is the value being */
+    /* estimated                                                            */
+    /*
+       P0 Q P1
+    */
+
+    pu1_ref_p0 = pu1_ref;
+    pu1_ref_p1 = pu1_ref + 1;
+
+    for(i = 0; i < BLK_SIZE; i++)
+    {
+        for(j = 0; j < BLK_SIZE; j++)
+        {
+            *pu1_out++ =   ((( *pu1_ref_p0++ )
+                        + (*pu1_ref_p1++) + 1 ) >> 1);
+        }
+        pu1_ref_p0 += u4_ref_wid - BLK_SIZE;
+        pu1_ref_p1 += u4_ref_wid - BLK_SIZE;
+
+        pu1_out    += u4_out_wid - BLK_SIZE;
+    }
+    return;
+}
+
+/*****************************************************************************/
+/*                                                                           */
+/*  Function Name : impeg2_mc_fullx_halfy_8x8()                                 */
+/*                                                                           */
+/*  Description   : Gets the buffer from (0,0.5) to (8,8.5)                  */
+/*                  and the above block of size 8 x 8 will be placed as a    */
+/*                  block from the current position of out_buf               */
+/*                                                                           */
+/*  Inputs        : ref - Reference frame from which the block will be       */
+/*                        block will be extracted.                           */
+/*                  ref_wid - WIdth of reference frame                       */
+/*                  out_wid - WIdth of the output frame                      */
+/*                  blk_width  - width of the block                          */
+/*                  blk_width  - height of the block                         */
+/*                                                                           */
+/*  Globals       : None                                                     */
+/*                                                                           */
+/*  Processing    : Point to the (0,0) and (0,1)   position in the ref frame */
+/*                  Interpolate these two values to get the value at(0,0.5)  */
+/*                  Repeat this to get an 8 x 8 block using 8 x 9 block from */
+/*                  reference frame                                          */
+/*                                                                           */
+/*  Outputs       : out -  Output containing the extracted block             */
+/*                                                                           */
+/*  Returns       : None                                                     */
+/*                                                                           */
+/*  Issues        : None                                                     */
+/*                                                                           */
+/*  Revision History:                                                        */
+/*                                                                           */
+/*         DD MM YYYY   Author(s)       Changes                              */
+/*         05 09 2005   Harish M        First Version                        */
+/*                                                                           */
+/*****************************************************************************/
+void impeg2_mc_fullx_halfy_8x8(UWORD8 *pu1_out,
+                            UWORD8 *pu1_ref,
+                            UWORD32 u4_ref_wid,
+                            UWORD32 u4_out_wid)
+{
+
+    UWORD8 *pu1_ref_p0, *pu1_ref_p1;
+    UWORD32 i,j;
+    /* P0-P3 are the pixels in the reference frame and Q is the value being */
+    /* estimated                                                            */
+    /*
+       P0
+        x
+       P1
+    */
+    pu1_ref_p0 = pu1_ref;
+    pu1_ref_p1 = pu1_ref + u4_ref_wid;
+
+    for(i = 0; i < BLK_SIZE; i++)
+    {
+        for(j = 0; j < BLK_SIZE; j++)
+        {
+            *pu1_out++ =   ((( *pu1_ref_p0++)
+                        + (*pu1_ref_p1++) + 1 ) >> 1);
+        }
+        pu1_ref_p0 += u4_ref_wid - BLK_SIZE;
+        pu1_ref_p1 += u4_ref_wid - BLK_SIZE;
+
+        pu1_out    += u4_out_wid - BLK_SIZE;
+    }
+
+    return;
+}
+
+/*****************************************************************************/
+/*                                                                           */
+/*  Function Name : impeg2_mc_fullx_fully_8x8()                                 */
+/*                                                                           */
+/*  Description   : Gets the buffer from (x,y) to (x+8,y+8)                  */
+/*                  and the above block of size 8 x 8 will be placed as a    */
+/*                  block from the current position of out_buf               */
+/*                                                                           */
+/*  Inputs        : ref - Reference frame from which the block will be       */
+/*                        block will be extracted.                           */
+/*                  ref_wid - WIdth of reference frame                       */
+/*                  out_wid - WIdth of the output frame                      */
+/*                  blk_width  - width of the block                          */
+/*                  blk_width  - height of the block                         */
+/*                                                                           */
+/*  Globals       : None                                                     */
+/*                                                                           */
+/*  Processing    : Point to the (0,0) position in the ref frame             */
+/*                  Get an 8 x 8 block from reference frame                  */
+/*                                                                           */
+/*  Outputs       : out -  Output containing the extracted block             */
+/*                                                                           */
+/*  Returns       : None                                                     */
+/*                                                                           */
+/*  Issues        : None                                                     */
+/*                                                                           */
+/*  Revision History:                                                        */
+/*                                                                           */
+/*         DD MM YYYY   Author(s)       Changes                              */
+/*         05 09 2005   Harish M        First Version                        */
+/*                                                                           */
+/*****************************************************************************/
+void impeg2_mc_fullx_fully_8x8(UWORD8 *pu1_out,
+                            UWORD8 *pu1_ref,
+                            UWORD32 u4_ref_wid,
+                            UWORD32 u4_out_wid)
+{
+
+    UWORD32 i;
+
+    for(i = 0; i < BLK_SIZE; i++)
+    {
+        memcpy(pu1_out, pu1_ref, BLK_SIZE);
+        pu1_ref += u4_ref_wid;
+        pu1_out += u4_out_wid;
+    }
+    return;
+}

diff --git a/common/impeg2_inter_pred.h b/common/impeg2_inter_pred.h
new file mode 100644
index 0000000..be3b0e5
--- /dev/null
+++ b/common/impeg2_inter_pred.h

@@ -0,0 +1,103 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+#ifndef __IMPEG2_INTER_PRED_H__
+#define __IMPEG2_INTER_PRED_H__
+
+
+typedef struct
+{
+    UWORD8 *pu1_y;
+    UWORD8 *pu1_u;
+    UWORD8 *pu1_v;
+}yuv_buf_t;
+
+typedef struct
+{
+    WORD16 *pi2_y;
+    WORD16 *pi2_u;
+    WORD16 *pi2_v;
+}yuv_buf16_t;
+
+/**
+ * Picture buffer
+ */
+typedef struct
+{
+    UWORD8 *pu1_y;
+    UWORD8 *pu1_u;
+    UWORD8 *pu1_v;
+
+    /** Used to store display Timestamp for current buffer */
+    WORD32 u4_ts;
+    UWORD8 u1_used_as_ref;
+
+    /**
+     * buffer ID from buffer manager
+     */
+    WORD32 i4_buf_id;
+
+}pic_buf_t;
+
+typedef void pf_copy_mb_t (yuv_buf_t *src_buf,
+                   yuv_buf_t *dst_buf,
+                   UWORD32 src_wd,
+                   UWORD32 dst_wd);
+
+typedef void pf_interpred_t(UWORD8 *out,UWORD8 *ref, UWORD32 ref_wid,  UWORD32 out_wid);
+
+typedef void pf_interpolate_t(yuv_buf_t *buf_src1,
+                              yuv_buf_t *buf_src2,
+                              yuv_buf_t *buf_dst,
+                              UWORD32 stride);
+
+pf_interpolate_t impeg2_interpolate;
+pf_interpolate_t impeg2_interpolate_a9q;
+pf_interpolate_t impeg2_interpolate_av8;
+
+pf_copy_mb_t impeg2_copy_mb;
+pf_copy_mb_t impeg2_copy_mb_a9q;
+pf_copy_mb_t impeg2_copy_mb_av8;
+
+pf_interpred_t impeg2_mc_halfx_halfy_8x8;
+pf_interpred_t impeg2_mc_halfx_fully_8x8;
+pf_interpred_t impeg2_mc_fullx_halfy_8x8;
+pf_interpred_t impeg2_mc_fullx_fully_8x8;
+
+pf_interpred_t impeg2_mc_halfx_halfy_8x8_a9q;
+pf_interpred_t impeg2_mc_halfx_fully_8x8_a9q;
+pf_interpred_t impeg2_mc_fullx_halfy_8x8_a9q;
+pf_interpred_t impeg2_mc_fullx_fully_8x8_a9q;
+
+/* AV8 Declarations */
+pf_interpred_t impeg2_mc_halfx_halfy_8x8_av8;
+pf_interpred_t impeg2_mc_halfx_fully_8x8_av8;
+pf_interpred_t impeg2_mc_fullx_halfy_8x8_av8;
+pf_interpred_t impeg2_mc_fullx_fully_8x8_av8;
+
+
+/* SSE4.2 Declarations*/
+pf_copy_mb_t impeg2_copy_mb_sse42;
+pf_interpolate_t impeg2_interpolate_sse42;
+pf_interpred_t impeg2_mc_halfx_halfy_8x8_sse42;
+pf_interpred_t impeg2_mc_halfx_fully_8x8_sse42;
+pf_interpred_t impeg2_mc_fullx_halfy_8x8_sse42;
+pf_interpred_t impeg2_mc_fullx_fully_8x8_sse42;
+
+#endif /* #ifndef __IMPEG2_INTER_PRED_H__  */

diff --git a/common/impeg2_job_queue.c b/common/impeg2_job_queue.c
new file mode 100644
index 0000000..d36ce7c
--- /dev/null
+++ b/common/impeg2_job_queue.c

@@ -0,0 +1,530 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+/**
+*******************************************************************************
+* @file
+*  impeg2d_job_queue.c
+*
+* @brief
+*  Contains functions for job queue
+*
+* @author
+*  Harish
+*
+* @par List of Functions:
+*
+* @remarks
+*  None
+*
+*******************************************************************************
+*/
+/*****************************************************************************/
+/* File Includes                                                             */
+/*****************************************************************************/
+#include <stdio.h>
+#include <stddef.h>
+#include <stdlib.h>
+#include <string.h>
+#include <assert.h>
+
+#include "iv_datatypedef.h"
+#include "iv.h"
+#include "ithread.h"
+#include "impeg2_macros.h"
+#include "impeg2_job_queue.h"
+
+/**
+*******************************************************************************
+*
+* @brief Returns size for job queue context. Does not include job queue buffer
+* requirements
+*
+* @par   Description
+* Returns size for job queue context. Does not include job queue buffer
+* requirements. Buffer size required to store the jobs should be allocated in
+* addition to the value returned here.
+*
+* @returns Size of the job queue context
+*
+* @remarks
+*
+*******************************************************************************
+*/
+WORD32 impeg2_jobq_ctxt_size()
+{
+    WORD32 i4_size;
+    i4_size = sizeof(jobq_t);
+    i4_size += ithread_get_mutex_lock_size();
+    return i4_size;
+}
+
+/**
+*******************************************************************************
+*
+* @brief
+*   Locks the jobq conext
+*
+* @par   Description
+*   Locks the jobq conext by calling ithread_mutex_lock()
+*
+* @param[in] ps_jobq
+*   Job Queue context
+*
+* @returns IMPEG2D_FAIL if mutex lock fails else IV_SUCCESS
+*
+* @remarks
+*
+*******************************************************************************
+*/
+IV_API_CALL_STATUS_T impeg2_jobq_lock(jobq_t *ps_jobq)
+{
+    WORD32 i4_ret_val;
+    i4_ret_val = ithread_mutex_lock(ps_jobq->pv_mutex);
+    if(i4_ret_val)
+    {
+        return IV_FAIL;
+    }
+    return IV_SUCCESS;
+}
+
+/**
+*******************************************************************************
+*
+* @brief
+*   Unlocks the jobq conext
+*
+* @par   Description
+*   Unlocks the jobq conext by calling ithread_mutex_unlock()
+*
+* @param[in] ps_jobq
+*   Job Queue context
+*
+* @returns IMPEG2D_FAIL if mutex unlock fails else IV_SUCCESS
+*
+* @remarks
+*
+*******************************************************************************
+*/
+
+IV_API_CALL_STATUS_T impeg2_jobq_unlock(jobq_t *ps_jobq)
+{
+    WORD32 i4_ret_val;
+    i4_ret_val = ithread_mutex_unlock(ps_jobq->pv_mutex);
+    if(i4_ret_val)
+    {
+        return IV_FAIL;
+    }
+    return IV_SUCCESS;
+
+}
+/**
+*******************************************************************************
+*
+* @brief
+*   Yeilds the thread
+*
+* @par   Description
+*   Unlocks the jobq conext by calling
+* impeg2_jobq_unlock(), ithread_yield() and then impeg2_jobq_lock()
+* jobq is unlocked before to ensure the jobq can be accessed by other threads
+* If unlock is not done before calling yield then no other thread can access
+* the jobq functions and update jobq.
+*
+* @param[in] ps_jobq
+*   Job Queue context
+*
+* @returns IMPEG2D_FAIL if mutex lock unlock or yield fails else IV_SUCCESS
+*
+* @remarks
+*
+*******************************************************************************
+*/
+IV_API_CALL_STATUS_T impeg2_jobq_yield(jobq_t *ps_jobq)
+{
+
+    IV_API_CALL_STATUS_T e_ret = IV_SUCCESS;
+
+    IV_API_CALL_STATUS_T e_ret_tmp;
+    e_ret_tmp = impeg2_jobq_unlock(ps_jobq);
+    RETURN_IF((e_ret_tmp != IV_SUCCESS), e_ret_tmp);
+
+    //NOP(1024 * 8);
+    ithread_yield();
+
+    e_ret_tmp = impeg2_jobq_lock(ps_jobq);
+    RETURN_IF((e_ret_tmp != IV_SUCCESS), e_ret_tmp);
+    return e_ret;
+}
+
+
+/**
+*******************************************************************************
+*
+* @brief free the job queue pointers
+*
+* @par   Description
+* Frees the jobq context
+*
+* @param[in] pv_buf
+* Memoy for job queue buffer and job queue context
+*
+* @returns Pointer to job queue context
+*
+* @remarks
+* Since it will be called only once by master thread this is not thread safe.
+*
+*******************************************************************************
+*/
+IV_API_CALL_STATUS_T impeg2_jobq_free(jobq_t *ps_jobq)
+{
+    WORD32 i4_ret;
+    i4_ret = ithread_mutex_destroy(ps_jobq->pv_mutex);
+
+    if(0 == i4_ret)
+        return IV_SUCCESS;
+    else
+        return IV_FAIL;
+}
+
+/**
+*******************************************************************************
+*
+* @brief Initialize the job queue
+*
+* @par   Description
+* Initializes the jobq context and sets write and read pointers to start of
+* job queue buffer
+*
+* @param[in] pv_buf
+* Memoy for job queue buffer and job queue context
+*
+* @param[in] buf_size
+* Size of the total memory allocated
+*
+* @returns Pointer to job queue context
+*
+* @remarks
+* Since it will be called only once by master thread this is not thread safe.
+*
+*******************************************************************************
+*/
+void* impeg2_jobq_init(void *pv_buf, WORD32 i4_buf_size)
+{
+    jobq_t *ps_jobq;
+    UWORD8 *pu1_buf;
+    pu1_buf = (UWORD8 *)pv_buf;
+
+    ps_jobq = (jobq_t *)pu1_buf;
+    pu1_buf += sizeof(jobq_t);
+    i4_buf_size -= sizeof(jobq_t);
+
+    ps_jobq->pv_mutex = pu1_buf;
+    pu1_buf += ithread_get_mutex_lock_size();
+    i4_buf_size -= ithread_get_mutex_lock_size();
+
+    if(i4_buf_size <= 0)
+        return NULL;
+
+    ithread_mutex_init(ps_jobq->pv_mutex);
+
+    ps_jobq->pv_buf_base = pu1_buf;
+    ps_jobq->pv_buf_wr = pu1_buf;
+    ps_jobq->pv_buf_rd = pu1_buf;
+    ps_jobq->pv_buf_end = pu1_buf + i4_buf_size;
+    ps_jobq->i4_terminate = 0;
+
+
+    return ps_jobq;
+}
+/**
+*******************************************************************************
+*
+* @brief
+*   Resets the jobq conext
+*
+* @par   Description
+*   Resets the jobq conext by initilizing job queue context elements
+*
+* @param[in] ps_jobq
+*   Job Queue context
+*
+* @returns IMPEG2D_FAIL if lock unlock fails else IV_SUCCESS
+*
+* @remarks
+*
+*******************************************************************************
+*/
+IV_API_CALL_STATUS_T impeg2_jobq_reset(jobq_t *ps_jobq)
+{
+    IV_API_CALL_STATUS_T e_ret = IV_SUCCESS;
+    e_ret = impeg2_jobq_lock(ps_jobq);
+    RETURN_IF((e_ret != IV_SUCCESS), e_ret);
+
+    ps_jobq->pv_buf_wr      = ps_jobq->pv_buf_base;
+    ps_jobq->pv_buf_rd      = ps_jobq->pv_buf_base;
+    ps_jobq->i4_terminate   = 0;
+    e_ret = impeg2_jobq_unlock(ps_jobq);
+    RETURN_IF((e_ret != IV_SUCCESS), e_ret);
+
+    return e_ret;
+}
+
+/**
+*******************************************************************************
+*
+* @brief
+*   Deinitializes the jobq conext
+*
+* @par   Description
+*   Deinitializes the jobq conext by calling impeg2_jobq_reset()
+* and then destrying the mutex created
+*
+* @param[in] ps_jobq
+*   Job Queue context
+*
+* @returns IMPEG2D_FAIL if lock unlock fails else IV_SUCCESS
+*
+* @remarks
+*
+*******************************************************************************
+*/
+IV_API_CALL_STATUS_T impeg2_jobq_deinit(jobq_t *ps_jobq)
+{
+    WORD32 i4_ret_val;
+    IV_API_CALL_STATUS_T e_ret = IV_SUCCESS;
+
+    e_ret = impeg2_jobq_reset(ps_jobq);
+    RETURN_IF((e_ret != IV_SUCCESS), e_ret);
+
+    i4_ret_val = ithread_mutex_destroy(ps_jobq->pv_mutex);
+    if(i4_ret_val)
+    {
+        return IV_FAIL;
+    }
+
+    return IV_SUCCESS;
+}
+
+
+/**
+*******************************************************************************
+*
+* @brief
+*   Terminates the jobq
+*
+* @par   Description
+*   Terminates the jobq by setting a flag in context.
+*
+* @param[in] ps_jobq
+*   Job Queue context
+*
+* @returns IMPEG2D_FAIL if lock unlock fails else IV_SUCCESS
+*
+* @remarks
+*
+*******************************************************************************
+*/
+
+IV_API_CALL_STATUS_T impeg2_jobq_terminate(jobq_t *ps_jobq)
+{
+    IV_API_CALL_STATUS_T e_ret = IV_SUCCESS;
+    e_ret = impeg2_jobq_lock(ps_jobq);
+    RETURN_IF((e_ret != IV_SUCCESS), e_ret);
+
+    ps_jobq->i4_terminate = 1;
+
+    e_ret = impeg2_jobq_unlock(ps_jobq);
+    RETURN_IF((e_ret != IV_SUCCESS), e_ret);
+    return e_ret;
+}
+
+
+/**
+*******************************************************************************
+*
+* @brief Adds a job to the queue
+*
+* @par   Description
+* Adds a job to the queue and updates wr address to next location.
+* Format/content of the job structure is abstracted and hence size of the job
+* buffer is being passed.
+*
+* @param[in] ps_jobq
+*   Job Queue context
+*
+* @param[in] pv_job
+*   Pointer to the location that contains details of the job to be added
+*
+* @param[in] job_size
+*   Size of the job buffer
+*
+* @param[in] blocking
+*   To signal if the write is blocking or non-blocking.
+*
+* @returns
+*
+* @remarks
+* Job Queue buffer is assumed to be allocated to handle worst case number of jobs
+* Wrap around is not supported
+*
+*******************************************************************************
+*/
+IV_API_CALL_STATUS_T impeg2_jobq_queue(jobq_t *ps_jobq,
+                                       void *pv_job,
+                                       WORD32 i4_job_size,
+                                       WORD32 i4_blocking,
+                                       WORD32 i4_lock)
+{
+    IV_API_CALL_STATUS_T e_ret = IV_SUCCESS;
+    IV_API_CALL_STATUS_T e_ret_tmp;
+    UWORD8 *pu1_buf;
+    UNUSED(i4_blocking);
+
+    if(i4_lock)
+    {
+        e_ret_tmp = impeg2_jobq_lock(ps_jobq);
+        RETURN_IF((e_ret_tmp != IV_SUCCESS), e_ret_tmp);
+    }
+    pu1_buf = (UWORD8 *)ps_jobq->pv_buf_wr;
+    if((UWORD8 *)ps_jobq->pv_buf_end >= (pu1_buf + i4_job_size))
+    {
+        memcpy(ps_jobq->pv_buf_wr, pv_job, i4_job_size);
+        ps_jobq->pv_buf_wr = (UWORD8 *)ps_jobq->pv_buf_wr + i4_job_size;
+        e_ret = IV_SUCCESS;
+    }
+    else
+    {
+        /* Handle wrap around case */
+        /* Wait for pv_buf_rd to consume first job_size number of bytes
+         * from the beginning of job queue
+         */
+        e_ret = IV_FAIL;
+    }
+
+    ps_jobq->i4_terminate = 0;
+
+    if(i4_lock)
+    {
+        e_ret_tmp = impeg2_jobq_unlock(ps_jobq);
+        RETURN_IF((e_ret_tmp != IV_SUCCESS), e_ret_tmp);
+    }
+
+    return e_ret;
+}
+/**
+*******************************************************************************
+*
+* @brief Gets next from the Job queue
+*
+* @par   Description
+* Gets next job from the job queue and updates rd address to next location.
+* Format/content of the job structure is abstracted and hence size of the job
+* buffer is being passed. If it is a blocking call and if there is no new job
+* then this functions unlocks the mutext and calls yield and then locks it back.
+* and continues till a job is available or terminate is set
+*
+* @param[in] ps_jobq
+*   Job Queue context
+*
+* @param[out] pv_job
+*   Pointer to the location that contains details of the job to be written
+*
+* @param[in] job_size
+*   Size of the job buffer
+*
+* @param[in] blocking
+*   To signal if the read is blocking or non-blocking.
+*
+* @returns
+*
+* @remarks
+* Job Queue buffer is assumed to be allocated to handle worst case number of jobs
+* Wrap around is not supported
+*
+*******************************************************************************
+*/
+IV_API_CALL_STATUS_T impeg2_jobq_dequeue(jobq_t *ps_jobq,
+                                         void *pv_job,
+                                         WORD32 i4_job_size,
+                                         WORD32 i4_blocking,
+                                         WORD32 i4_lock)
+{
+    IV_API_CALL_STATUS_T e_ret;
+    IV_API_CALL_STATUS_T e_ret_tmp;
+    volatile UWORD8 *pu1_buf;
+    if(i4_lock)
+    {
+        e_ret_tmp = impeg2_jobq_lock(ps_jobq);
+        RETURN_IF((e_ret_tmp != IV_SUCCESS), e_ret_tmp);
+    }
+    pu1_buf = (UWORD8 *)ps_jobq->pv_buf_rd;
+
+
+    if((UWORD8 *)ps_jobq->pv_buf_end >= (pu1_buf + i4_job_size))
+    {
+        while(1)
+        {
+            pu1_buf = (UWORD8 *)ps_jobq->pv_buf_rd;
+            if((UWORD8 *)ps_jobq->pv_buf_wr >= (pu1_buf + i4_job_size))
+            {
+                memcpy(pv_job, ps_jobq->pv_buf_rd, i4_job_size);
+                ps_jobq->pv_buf_rd = (UWORD8 *)ps_jobq->pv_buf_rd + i4_job_size;
+                e_ret = IV_SUCCESS;
+                break;
+            }
+            else
+            {
+                /* If all the entries have been dequeued, then break and return */
+                if(1 == ps_jobq->i4_terminate)
+                {
+                    e_ret = IV_FAIL;
+                    break;
+                }
+
+                if((1 == i4_blocking) && (1 == i4_lock))
+                {
+                    impeg2_jobq_yield(ps_jobq);
+
+                }
+                else
+                {
+                    /* If there is no job available,
+                     * and this is non blocking call then return fail */
+                    e_ret = IV_FAIL;
+                }
+            }
+        }
+    }
+    else
+    {
+        /* Handle wrap around case */
+        /* Wait for pv_buf_rd to consume first i4_job_size number of bytes
+         * from the beginning of job queue
+         */
+        e_ret = IV_FAIL;
+    }
+    if(i4_lock)
+    {
+        e_ret_tmp = impeg2_jobq_unlock(ps_jobq);
+        RETURN_IF((e_ret_tmp != IV_SUCCESS), e_ret_tmp);
+    }
+
+    return e_ret;
+}

diff --git a/common/impeg2_job_queue.h b/common/impeg2_job_queue.h
new file mode 100644
index 0000000..46d8bb9
--- /dev/null
+++ b/common/impeg2_job_queue.h

@@ -0,0 +1,72 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+/**
+*******************************************************************************
+* @file
+*  impeg2_job_queue.h
+*
+* @brief
+*  Contains functions for job queue
+*
+* @author
+*  Harish
+*
+* @par List of Functions:
+*
+* @remarks
+*  None
+*
+*******************************************************************************
+*/
+
+#ifndef _IMPEG2_JOB_QUEUE_H_
+#define _IMPEG2_JOB_QUEUE_H_
+
+typedef struct
+{
+    /** Pointer to buffer base which contains the jobs */
+    void *pv_buf_base;
+
+    /** Pointer to current address where new job can be added */
+    void *pv_buf_wr;
+
+    /** Pointer to current address from where next job can be obtained */
+    void *pv_buf_rd;
+
+    /** Pointer to end of job buffer */
+    void *pv_buf_end;
+
+    /** Mutex used to keep the functions thread-safe */
+    void *pv_mutex;
+
+    /** Flag to indicate jobq has to be terminated */
+    WORD32 i4_terminate;
+}jobq_t;
+
+WORD32 impeg2_jobq_ctxt_size(void);
+void* impeg2_jobq_init(void *pv_buf, WORD32 buf_size);
+IV_API_CALL_STATUS_T impeg2_jobq_free(jobq_t *ps_jobq);
+IV_API_CALL_STATUS_T impeg2_jobq_reset(jobq_t *ps_jobq);
+IV_API_CALL_STATUS_T impeg2_jobq_deinit(jobq_t *ps_jobq);
+IV_API_CALL_STATUS_T impeg2_jobq_terminate(jobq_t *ps_jobq);
+IV_API_CALL_STATUS_T impeg2_jobq_queue(jobq_t *ps_jobq, void *pv_job, WORD32 job_size, WORD32 blocking, WORD32 lock);
+IV_API_CALL_STATUS_T impeg2_jobq_dequeue(jobq_t *ps_jobq, void *pv_job, WORD32 job_size, WORD32 blocking, WORD32 lock);
+
+#endif /* _IMPEG2_JOB_QUEUE_H_ */

diff --git a/common/impeg2_macros.h b/common/impeg2_macros.h
new file mode 100644
index 0000000..366510f
--- /dev/null
+++ b/common/impeg2_macros.h

@@ -0,0 +1,60 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+#ifndef __IMPEG2_MACROS_H__
+#define __IMPEG2_MACROS_H__
+
+#define ABS(x) ((x) < 0 ? (-1 * (x)) : (x))
+
+#define MAX(x,y) ((x) > (y) ? (x) : (y))
+
+#define MIN(x,y) ((x) < (y) ? (x) : (y))
+
+#define CLIP(Number,Max,Min)    if((Number) > (Max)) (Number) = (Max); \
+else if((Number) < (Min)) (Number) = (Min)
+
+#define SIGN(Number)    (((Number) < 0) ? -1 : 1)
+
+
+#define BITS(val,msb,lsb) (UWORD16)((((val) >> (lsb)) & ((1 << ((msb) - (lsb) + 1)) - 1)))
+
+#define BIT(val,bit)      (UWORD16)(((val) >> (bit)) & 0x1)
+
+#define IS_VAL_IN_RANGE(val,upperLimit,lowerLimit) ((val) >= (lowerLimit) && (val) <= (upperLimit))
+
+#define MSW(dword)        (dword >> 16)
+#define LSW(dword)        (dword & 0xFFFF)
+#define DIV_2_RND(mv) (((mv) + ((mv) > 0)) >> 1)
+#define IS_NEG(Number)    (((Number) < 0) ? 1 : 0)
+
+#define ALIGN128(x) ((((x) + 127) >> 7) << 7)
+#define ALIGN64(x)  ((((x) + 63) >> 6) << 6)
+#define ALIGN32(x)  ((((x) + 31) >> 5) << 5)
+#define ALIGN16(x)  ((((x) + 15) >> 4) << 4)
+#define ALIGN8(x)   ((((x) + 7) >> 3) << 3)
+
+
+#define RETURN_IF(cond, retval) if(cond) {return (retval);}
+#define UNUSED(x) ((void)(x))
+
+
+#define ASSERT(x) assert(x)
+
+
+#endif  /* __IMPEG2_IT_MACROS_H__ */

diff --git a/common/impeg2_mem_func.c b/common/impeg2_mem_func.c
new file mode 100644
index 0000000..9268c01
--- /dev/null
+++ b/common/impeg2_mem_func.c

@@ -0,0 +1,87 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+/**
+*******************************************************************************
+* @file
+*  impeg2_utils.c
+*
+* @brief
+*  Contains utility function definitions for MPEG2 codec
+*
+* @author
+*  Harish
+*
+* @par List of Functions:
+* - impeg2_memset0_16bit_8x8_linear_block()
+* - impeg2_memset_8bit_8x8_block()
+*
+* @remarks
+*  None
+*
+*******************************************************************************
+*/
+
+#include <stdio.h>
+#include <string.h>
+#include "iv_datatypedef.h"
+#include "impeg2_defs.h"
+
+/*******************************************************************************
+*  Function Name   : impeg2_memset0_16bit_8x8_linear_block
+*
+*  Description     : memsets resudial buf to 0
+*
+*  Arguments       : destination buffer
+*
+*  Values Returned : None
+*******************************************************************************/
+
+
+void impeg2_memset0_16bit_8x8_linear_block (WORD16 *pi2_buf)
+{
+        memset(pi2_buf,0,64 * sizeof(WORD16));
+}
+
+
+
+/*******************************************************************************
+*  Function Name   : impeg2_memset_8bit_8x8_block
+*
+*  Description     : memsets residual buf to value
+*
+*  Arguments       : destination buffer, value and stride
+*
+*  Values Returned : None
+*******************************************************************************/
+
+
+void impeg2_memset_8bit_8x8_block(UWORD8 *pu1_dst, WORD32 u4_dc_val, WORD32 u4_dst_wd)
+{
+    WORD32 j;
+
+    for(j = BLK_SIZE; j > 0; j--)
+    {
+        memset(pu1_dst, u4_dc_val, BLK_SIZE);
+        pu1_dst += u4_dst_wd;
+    }
+}
+
+
+

diff --git a/common/impeg2_mem_func.h b/common/impeg2_mem_func.h
new file mode 100644
index 0000000..f73702c
--- /dev/null
+++ b/common/impeg2_mem_func.h

@@ -0,0 +1,41 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+
+#ifndef IMPEG2_MEM_FUNC_H_
+#define IMPEG2_MEM_FUNC_H_
+
+typedef void pf_memset0_one_16bit_buf_t (WORD16 *buf);
+typedef void pf_memset_8bit_t (UWORD8 *dst, WORD32 dc_val, WORD32 dst_wd);
+
+pf_memset0_one_16bit_buf_t impeg2_memset0_16bit_8x8_linear_block;
+pf_memset0_one_16bit_buf_t impeg2_memset0_16bit_8x8_linear_block_a9q;
+
+pf_memset0_one_16bit_buf_t impeg2_memset0_16bit_8x8_linear_block_sse42;
+
+pf_memset0_one_16bit_buf_t impeg2_memset0_16bit_8x8_linear_block_av8;
+
+pf_memset_8bit_t impeg2_memset_8bit_8x8_block;
+pf_memset_8bit_t impeg2_memset_8bit_8x8_block_a9q;
+
+pf_memset_8bit_t impeg2_memset_8bit_8x8_block_sse42;
+
+pf_memset_8bit_t impeg2_memset_8bit_8x8_block_av8;
+
+#endif /* IMPEG2_MEM_FUNC_H_ */

diff --git a/common/ithread.c b/common/ithread.c
new file mode 100644
index 0000000..76fdad3
--- /dev/null
+++ b/common/ithread.c

@@ -0,0 +1,453 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+/*****************************************************************************/
+/*                                                                           */
+/*  File Name         : ithread.c                                            */
+/*                                                                           */
+/*  Description       : Contains abstraction for threads, mutex and semaphores*/
+/*                                                                           */
+/*  List of Functions :                                                      */
+/*                                                                           */
+/*  Issues / Problems : None                                                 */
+/*                                                                           */
+/*  Revision History  :                                                      */
+/*                                                                           */
+/*         DD MM YYYY   Author(s)       Changes                              */
+/*         07 09 2012   Harish          Initial Version                      */
+/*****************************************************************************/
+/*****************************************************************************/
+/* File Includes                                                             */
+/*****************************************************************************/
+#include <string.h>
+#include "iv_datatypedef.h"
+#include "ithread.h"
+#include <sys/types.h>
+
+#ifndef X86_MSVC
+//#define PTHREAD_AFFINITY
+//#define SYSCALL_AFFINITY
+
+#ifdef PTHREAD_AFFINITY
+#define _GNU_SOURCE
+#define __USE_GNU
+#endif
+
+#include <pthread.h>
+#include <sched.h>
+#include <semaphore.h>
+#include <unistd.h>
+
+
+#endif
+#if 0
+#include <sys/syscall.h>
+#endif
+
+#ifdef X86_MSVC
+
+#include <windows.h>
+#define SEM_MAX_COUNT       100
+#define SEM_INCREMENT_COUNT 1
+
+UWORD32 ithread_get_handle_size(void)
+{
+    return (sizeof(HANDLE));
+}
+
+UWORD32 ithread_get_mutex_lock_size(void)
+{
+    return (sizeof(HANDLE));
+}
+
+WORD32 ithread_create(void *thread_handle, void *attribute, void *strt, void *argument)
+{
+    HANDLE *ppv_thread_handle;
+    HANDLE thread_handle_value;
+
+    if(0 == thread_handle)
+        return -1;
+
+    ppv_thread_handle = (HANDLE *)thread_handle;
+    thread_handle_value = (void *)CreateThread
+            (NULL,                             /* Attributes      */
+            1024*128,                          /* Stack size      */
+            (LPTHREAD_START_ROUTINE)strt,      /* Thread function */
+            argument,                          /* Parameters      */
+            0,                                 /* Creation flags  */
+            NULL);                             /* Thread ID       */
+    *ppv_thread_handle = (HANDLE)thread_handle_value;
+
+    return 0;
+}
+
+WORD32 ithread_join(void *thread_handle, void ** val_ptr)
+{
+    HANDLE *ppv_thread_handle;
+    HANDLE thread_handle_value;
+
+    if(0 == thread_handle)
+        return -1;
+
+    ppv_thread_handle = (HANDLE *)thread_handle;
+    thread_handle_value = *ppv_thread_handle;
+
+    if(WAIT_OBJECT_0 == WaitForSingleObject(thread_handle_value, INFINITE))
+    {
+        CloseHandle(thread_handle_value);
+    }
+
+    return 0;
+}
+
+void ithread_exit(void *thread_handle)
+{
+    HANDLE *ppv_thread_handle;
+    HANDLE thread_handle_value;
+    DWORD thread_exit_code;
+
+    if(0 == thread_handle)
+        return;
+
+    ppv_thread_handle = (HANDLE *)thread_handle;
+    thread_handle_value = *ppv_thread_handle;
+    /* Get exit code for thread. If the return value is 0, means thread is busy */
+    if( 0 != GetExitCodeThread(thread_handle_value, &thread_exit_code))
+    {
+        TerminateThread(thread_handle_value, thread_exit_code);
+    }
+
+    return;
+}
+
+WORD32 ithread_get_mutex_struct_size(void)
+{
+    return (sizeof(HANDLE));
+}
+
+WORD32 ithread_mutex_init(void *mutex)
+{
+    HANDLE *ppv_mutex_handle;
+    HANDLE mutex_handle_value;
+
+    if(0 == mutex)
+        return -1;
+
+    ppv_mutex_handle = (HANDLE *)mutex;
+    mutex_handle_value = CreateSemaphore(NULL, 1, 1, NULL);
+    *ppv_mutex_handle = mutex_handle_value;
+    return 0;
+}
+
+WORD32 ithread_mutex_destroy(void *mutex)
+{
+    HANDLE *ppv_mutex_handle;
+    HANDLE mutex_handle_value;
+
+    if(0 == mutex)
+        return -1;
+
+    ppv_mutex_handle = (HANDLE *)mutex;
+    mutex_handle_value = *ppv_mutex_handle;
+    CloseHandle(mutex_handle_value);
+    return 0;
+}
+
+WORD32 ithread_mutex_lock(void *mutex)
+{
+    HANDLE *ppv_mutex_handle;
+    HANDLE mutex_handle_value;
+    DWORD  result = 0;
+
+    if(0 == mutex)
+        return -1;
+
+    ppv_mutex_handle = (HANDLE *)mutex;
+    mutex_handle_value = *ppv_mutex_handle;
+    result = WaitForSingleObject(mutex_handle_value, INFINITE);
+
+    if(WAIT_OBJECT_0 == result)
+        return 0;
+
+    return 1;
+
+}
+
+WORD32 ithread_mutex_unlock(void *mutex)
+{
+    HANDLE *ppv_mutex_handle;
+    HANDLE mutex_handle_value;
+    DWORD  result = 0;
+
+    if(0 == mutex)
+        return -1;
+
+    ppv_mutex_handle = (HANDLE *)mutex;
+    mutex_handle_value = *ppv_mutex_handle;
+    result = ReleaseSemaphore(mutex_handle_value, 1, NULL);
+
+    if(0 == result)
+        return -1;
+
+    return 0;
+}
+
+void ithread_yield(void) { }
+
+void ithread_usleep(UWORD32 u4_time_us)
+{
+    UWORD32 u4_time_ms = u4_time_us / 1000;
+    Sleep(u4_time_ms);
+}
+
+void ithread_msleep(UWORD32 u4_time_ms)
+{
+    Sleep(u4_time_ms);
+}
+
+void ithread_sleep(UWORD32 u4_time)
+{
+    UWORD32 u4_time_ms = u4_time * 1000;
+    Sleep(u4_time_ms);
+}
+
+UWORD32 ithread_get_sem_struct_size(void)
+{
+    return (sizeof(HANDLE));
+}
+
+WORD32 ithread_sem_init(void *sem,WORD32 pshared,UWORD32 value)
+{
+    HANDLE *sem_handle = (HANDLE *)sem;
+    HANDLE sem_handle_value;
+
+    if(0 == sem)
+        return -1;
+
+    sem_handle_value = CreateSemaphore(NULL,  /* Security Attribute*/
+                         value,  /* Initial count     */
+                        SEM_MAX_COUNT,/* Max value         */
+                        NULL);        /* Name, not used    */
+    *sem_handle = sem_handle_value;
+    return 0;
+}
+
+WORD32 ithread_sem_post(void *sem)
+{
+    HANDLE *sem_handle = (HANDLE *)sem;
+    HANDLE sem_handle_value;
+
+    if(0 == sem)
+        return -1;
+
+    sem_handle_value = *sem_handle;
+
+    /* Post on Semaphore by releasing the lock on mutex */
+    if(ReleaseSemaphore(sem_handle_value, SEM_INCREMENT_COUNT, NULL))
+        return 0;
+
+    return -1;
+}
+
+WORD32 ithread_sem_wait(void *sem)
+{
+    DWORD          result = 0;
+    HANDLE *sem_handle = (HANDLE *)sem;
+    HANDLE sem_handle_value;
+
+    if(0 == sem)
+        return -1;
+
+    sem_handle_value = *sem_handle;
+
+    /* Wait on Semaphore object infinitly */
+    result = WaitForSingleObject(sem_handle_value, INFINITE);
+
+    /* If lock on semaphore is acquired, return SUCCESS */
+    if(WAIT_OBJECT_0 == result)
+        return 0;
+
+    /* If call timeouts, return FAILURE */
+    if(WAIT_TIMEOUT == result)
+        return -1;
+
+    return 0;
+}
+
+WORD32 ithread_sem_destroy(void *sem)
+{
+    HANDLE *sem_handle = (HANDLE *)sem;
+    HANDLE sem_handle_value;
+
+    if(0 == sem)
+        return -1;
+
+    sem_handle_value = *sem_handle;
+
+    if(FALSE == CloseHandle(sem_handle_value) )
+    {
+        return -1;
+    }
+    return 0;
+}
+
+WORD32 ithread_set_affinity(WORD32 core_id)
+{
+        return 1;
+}
+
+#else
+UWORD32 ithread_get_handle_size(void)
+{
+    return sizeof(pthread_t);
+}
+
+UWORD32 ithread_get_mutex_lock_size(void)
+{
+    return sizeof(pthread_mutex_t);
+}
+
+
+WORD32 ithread_create(void *thread_handle, void *attribute, void *strt, void *argument)
+{
+    ((void)(attribute));
+    return pthread_create((pthread_t *)thread_handle, NULL,(void *(*)(void *)) strt, argument);
+}
+
+WORD32 ithread_join(void *thread_handle, void ** val_ptr)
+{
+    pthread_t *pthread_handle   = (pthread_t *)thread_handle;
+    ((void)(val_ptr));
+    return pthread_join(*pthread_handle, NULL);
+}
+
+void ithread_exit(void *val_ptr)
+{
+return pthread_exit(val_ptr);
+}
+
+WORD32 ithread_get_mutex_struct_size(void)
+{
+    return(sizeof(pthread_mutex_t));
+}
+WORD32 ithread_mutex_init(void *mutex)
+{
+    return pthread_mutex_init((pthread_mutex_t *) mutex, NULL);
+}
+
+WORD32 ithread_mutex_destroy(void *mutex)
+{
+    return pthread_mutex_destroy((pthread_mutex_t *) mutex);
+}
+
+WORD32 ithread_mutex_lock(void *mutex)
+{
+    return pthread_mutex_lock((pthread_mutex_t *)mutex);
+}
+
+WORD32 ithread_mutex_unlock(void *mutex)
+{
+    return pthread_mutex_unlock((pthread_mutex_t *)mutex);
+}
+
+void ithread_yield(void)
+{
+    sched_yield();
+}
+
+void ithread_sleep(UWORD32 u4_time)
+{
+    usleep(u4_time * 1000 * 1000);
+}
+
+void ithread_msleep(UWORD32 u4_time_ms)
+{
+    usleep(u4_time_ms * 1000);
+}
+
+void ithread_usleep(UWORD32 u4_time_us)
+{
+    usleep(u4_time_us);
+}
+
+UWORD32 ithread_get_sem_struct_size(void)
+{
+    return(sizeof(sem_t));
+}
+
+
+WORD32 ithread_sem_init(void *sem,WORD32 pshared,UWORD32 value)
+{
+    return sem_init((sem_t *)sem,pshared,value);
+}
+
+WORD32 ithread_sem_post(void *sem)
+{
+    return sem_post((sem_t *)sem);
+}
+
+
+WORD32 ithread_sem_wait(void *sem)
+{
+    return sem_wait((sem_t *)sem);
+}
+
+
+WORD32 ithread_sem_destroy(void *sem)
+{
+return sem_destroy((sem_t *)sem);
+}
+
+
+WORD32 ithread_set_affinity(WORD32 core_id)
+{
+#ifdef PTHREAD_AFFINITY
+    cpu_set_t cpuset;
+    int num_cores = sysconf(_SC_NPROCESSORS_ONLN);
+    pthread_t cur_thread = pthread_self();
+
+    if (core_id >= num_cores)
+        return -1;
+
+    CPU_ZERO(&cpuset);
+    CPU_SET(core_id, &cpuset);
+
+    return pthread_setaffinity_np(cur_thread, sizeof(cpu_set_t), &cpuset);
+
+#elif SYSCALL_AFFINITY
+    WORD32 i4_sys_res;
+
+    pid_t pid = gettid();
+
+
+    i4_sys_res = syscall(__NR_sched_setaffinity, pid, sizeof(i4_mask), &i4_mask);
+    if (i4_sys_res)
+    {
+        //WORD32 err;
+        //err = errno;
+        //perror("Error in setaffinity syscall PERROR : ");
+        //LOG_ERROR("Error in the syscall setaffinity: mask=0x%x err=0x%x", i4_mask, i4_sys_res);
+        return -1;
+    }
+#endif
+    ((void)(core_id));
+    return 1;
+
+}
+#endif

diff --git a/common/ithread.h b/common/ithread.h
new file mode 100644
index 0000000..eb75d20
--- /dev/null
+++ b/common/ithread.h

@@ -0,0 +1,80 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+/**
+*******************************************************************************
+* @file
+*  ithread.h
+*
+* @brief
+*  This file contains all the necessary structure and  enumeration
+* definitions needed for the Application  Program Interface(API) of the
+* Thread Abstraction Layer
+*
+* @author
+*  Harish
+*
+* @remarks
+*  None
+*
+*******************************************************************************
+*/
+#ifndef __ITHREAD_H__
+#define __ITHREAD_H__
+
+UWORD32 ithread_get_handle_size(void);
+
+UWORD32 ithread_get_mutex_lock_size(void);
+
+WORD32  ithread_create(void *thread_handle, void *attribute, void *strt, void *argument);
+
+void    ithread_exit(void *val_ptr);
+
+WORD32  ithread_join(void *thread_id, void ** val_ptr);
+
+WORD32  ithread_get_mutex_struct_size(void);
+
+WORD32 ithread_mutex_init(void *mutex);
+
+WORD32 ithread_mutex_destroy(void *mutex);
+
+WORD32  ithread_mutex_lock(void *mutex);
+
+WORD32  ithread_mutex_unlock(void *mutex);
+
+void    ithread_yield(void);
+
+void    ithread_sleep(UWORD32 u4_time);
+
+void    ithread_msleep(UWORD32 u4_time_ms);
+
+void    ithread_usleep(UWORD32 u4_time_us);
+
+UWORD32 ithread_get_sem_struct_size(void);
+
+WORD32  ithread_sem_init(void *sem,WORD32 pshared,UWORD32 value);
+
+WORD32  ithread_sem_post(void *sem);
+
+WORD32  ithread_sem_wait(void *sem);
+
+WORD32  ithread_sem_destroy(void *sem);
+
+WORD32 ithread_set_affinity(WORD32 core_id);
+#endif /* __ITHREAD_H__ */

diff --git a/common/iv.h b/common/iv.h
new file mode 100644
index 0000000..3941497
--- /dev/null
+++ b/common/iv.h

@@ -0,0 +1,420 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+/**
+*******************************************************************************
+* @file
+*  iv.h
+*
+* @brief
+*  This file contains all the necessary structure and  enumeration
+* definitions needed for the Application  Program Interface(API) of the
+* Ittiam Video and Image  codecs
+*
+* @author
+*  100239(RCY)
+*
+* @par List of Functions:
+*
+* @remarks
+*  None
+*
+*******************************************************************************
+*/
+
+
+#ifndef _IV_H
+#define _IV_H
+
+/*****************************************************************************/
+/* Constant Macros                                                           */
+/*****************************************************************************/
+
+
+/*****************************************************************************/
+/* Typedefs                                                                  */
+/*****************************************************************************/
+
+/*****************************************************************************/
+/* Enums                                                                     */
+/*****************************************************************************/
+
+
+/* IV_API_CALL_STATUS_T:This is only to return the FAIL/PASS status to the  */
+/* application for the current API call                                     */
+
+typedef enum{
+    IV_STATUS_NA                                = 0x7FFFFFFF,
+    IV_SUCCESS                                  = 0x0,
+    IV_FAIL                                     = 0x1,
+}IV_API_CALL_STATUS_T;
+
+/* IV_MEM_TYPE_T: This Enumeration defines the type of memory (Internal/Ext */
+/* -ernal) along with the cacheable/non-cacheable attributes                */
+
+typedef enum {
+    IV_NA_MEM_TYPE                              = 0x7FFFFFFF,
+    IV_INTERNAL_CACHEABLE_PERSISTENT_MEM        = 0x1,
+    IV_INTERNAL_CACHEABLE_SCRATCH_MEM           = 0x2,
+    IV_EXTERNAL_CACHEABLE_PERSISTENT_MEM        = 0x3,
+    IV_EXTERNAL_CACHEABLE_SCRATCH_MEM           = 0x4,
+    IV_INTERNAL_NONCACHEABLE_PERSISTENT_MEM     = 0x5,
+    IV_INTERNAL_NONCACHEABLE_SCRATCH_MEM        = 0x6,
+    IV_EXTERNAL_NONCACHEABLE_PERSISTENT_MEM     = 0x7,
+    IV_EXTERNAL_NONCACHEABLE_SCRATCH_MEM        = 0x8
+}IV_MEM_TYPE_T;
+
+/* IV_COLOR_FORMAT_T: This enumeration lists all the color formats which    */
+/* finds usage in video/image codecs                                        */
+
+typedef enum {
+    IV_CHROMA_NA                            = 0x7FFFFFFF,
+    IV_YUV_420P                             = 0x1,
+    IV_YUV_422P                             = 0x2,
+    IV_420_UV_INTL                          = 0x3,
+    IV_YUV_422IBE                           = 0x4,
+    IV_YUV_422ILE                           = 0x5,
+    IV_YUV_444P                             = 0x6,
+    IV_YUV_411P                             = 0x7,
+    IV_GRAY                                 = 0x8,
+    IV_RGB_565                              = 0x9,
+    IV_RGB_24                               = 0xa,
+    IV_YUV_420SP_UV                         = 0xb,
+    IV_YUV_420SP_VU                         = 0xc,
+    IV_RGBA_8888                            = 0xd
+}IV_COLOR_FORMAT_T;
+
+/* IV_PICTURE_CODING_TYPE_T: VOP/Frame coding type Enumeration              */
+
+typedef enum {
+    IV_NA_FRAME                             = 0x7FFFFFFF,
+    IV_I_FRAME                              = 0x0,
+    IV_P_FRAME                              = 0x1,
+    IV_B_FRAME                              = 0x2,
+    IV_IDR_FRAME                            = 0x3,
+    IV_II_FRAME                             = 0x4,
+    IV_IP_FRAME                             = 0x5,
+    IV_IB_FRAME                             = 0x6,
+    IV_PI_FRAME                             = 0x7,
+    IV_PP_FRAME                             = 0x8,
+    IV_PB_FRAME                             = 0x9,
+    IV_BI_FRAME                             = 0xa,
+    IV_BP_FRAME                             = 0xb,
+    IV_BB_FRAME                             = 0xc,
+    IV_MBAFF_I_FRAME                        = 0xd,
+    IV_MBAFF_P_FRAME                        = 0xe,
+    IV_MBAFF_B_FRAME                        = 0xf,
+    IV_MBAFF_IDR_FRAME                      = 0x10,
+    IV_NOT_CODED_FRAME                      = 0x11,
+    IV_FRAMETYPE_DEFAULT                    = IV_I_FRAME
+}IV_PICTURE_CODING_TYPE_T;
+
+/* IV_FLD_TYPE_T: field type Enumeration                                    */
+
+typedef enum {
+    IV_NA_FLD                               = 0x7FFFFFFF,
+    IV_TOP_FLD                              = 0x0,
+    IV_BOT_FLD                              = 0x1,
+    IV_FLD_TYPE_DEFAULT                     = IV_TOP_FLD
+}IV_FLD_TYPE_T;
+
+/* IV_CONTENT_TYPE_T: Video content type                                     */
+
+typedef enum {
+    IV_CONTENTTYPE_NA                       = 0x7FFFFFFF,
+    IV_PROGRESSIVE                          = 0x0,
+    IV_INTERLACED                           = 0x1,
+    IV_PROGRESSIVE_FRAME                    = 0x2,
+    IV_INTERLACED_FRAME                     = 0x3,
+    IV_INTERLACED_TOPFIELD                  = 0x4,
+    IV_INTERLACED_BOTTOMFIELD               = 0x5,
+    IV_CONTENTTYPE_DEFAULT                  = IV_PROGRESSIVE,
+}IV_CONTENT_TYPE_T;
+
+/* IV_API_COMMAND_TYPE_T:API command type                                   */
+typedef enum {
+    IV_CMD_NA                           = 0x7FFFFFFF,
+    IV_CMD_GET_NUM_MEM_REC              = 0x0,
+    IV_CMD_FILL_NUM_MEM_REC             = 0x1,
+    IV_CMD_RETRIEVE_MEMREC              = 0x2,
+    IV_CMD_INIT                         = 0x3,
+    IV_CMD_DUMMY_ELEMENT                = 0x4,
+}IV_API_COMMAND_TYPE_T;
+
+/*****************************************************************************/
+/* Structure                                                                 */
+/*****************************************************************************/
+
+/* IV_OBJ_T: This structure defines the handle for the codec instance        */
+
+typedef struct{
+    /**
+     * u4_size of the structure
+     */
+    UWORD32                                     u4_size;
+
+    /**
+     * Pointer to the API function pointer table of the codec
+     */
+    void                                        *pv_fxns;
+
+    /**
+     * Pointer to the handle of the codec
+     */
+    void                                        *pv_codec_handle;
+}iv_obj_t;
+
+/* iv_mem_rec_t: This structure defines the memory record holder which will  */
+/* be used by the codec to communicate its memory requirements to the        */
+/* application through appropriate API functions                             */
+
+typedef struct {
+    /**
+     * u4_size of the structure
+     */
+    UWORD32                                     u4_size;
+
+    /**
+     * Pointer to the memory allocated by the application
+     */
+    void                                        *pv_base;
+
+    /**
+     * u4_size of the memory to be allocated
+     */
+    UWORD32                                     u4_mem_size;
+
+    /**
+     * Alignment of the memory pointer
+     */
+    UWORD32                                     u4_mem_alignment;
+    /**
+     * Nature of the memory to be allocated
+     */
+    IV_MEM_TYPE_T                               e_mem_type;
+}iv_mem_rec_t;
+
+/* IV_YUV_BUF_T: This structure defines attributes for the yuv buffer        */
+
+typedef struct {
+    /**
+     * u4_size of the structure
+     */
+    UWORD32                                     u4_size;
+
+    /**
+     * Pointer to Luma (Y) Buffer
+     */
+
+    void                                        *pv_y_buf;
+    /**
+     * Pointer to Chroma (Cb) Buffer
+     */
+    void                                        *pv_u_buf;
+
+    /**
+     * Pointer to Chroma (Cr) Buffer
+     */
+    void                                        *pv_v_buf;
+
+    /**
+     * Width of the Luma (Y) Buffer
+     */
+    UWORD32                                     u4_y_wd;
+
+    /**
+     * Height of the Luma (Y) Buffer
+     */
+    UWORD32                                     u4_y_ht;
+
+    /**
+     * Stride/Pitch of the Luma (Y) Buffer
+     */
+    UWORD32                                     u4_y_strd;
+
+    /**
+     * Width of the Chroma (Cb) Buffer
+     */
+    UWORD32                                     u4_u_wd;
+
+    /**
+     * Height of the Chroma (Cb) Buffer
+     */
+    UWORD32                                     u4_u_ht;
+
+    /**
+     * Stride/Pitch of the Chroma (Cb) Buffer
+     */
+    UWORD32                                     u4_u_strd;
+
+    /**
+     * Width of the Chroma (Cr) Buffer
+     */
+    UWORD32                                     u4_v_wd;
+
+    /**
+     * Height of the Chroma (Cr) Buffer
+     */
+    UWORD32                                     u4_v_ht;
+
+    /**
+     * Stride/Pitch of the Chroma (Cr) Buffer
+     */
+    UWORD32                                     u4_v_strd;
+}iv_yuv_buf_t;
+
+/*****************************************************************************/
+/*  Get Number of Memory Records                                             */
+/*****************************************************************************/
+
+/* IV_API_COMMAND_TYPE_T::e_cmd = IV_CMD_GET_NUM_MEM_REC                     */
+
+
+typedef struct {
+    /**
+     * u4_size of the structure
+     */
+    UWORD32                                     u4_size;
+
+    /**
+     * cmd
+     */
+    IV_API_COMMAND_TYPE_T                       e_cmd;
+}iv_num_mem_rec_ip_t;
+
+
+typedef struct{
+    /**
+     * u4_size of the structure
+     */
+    UWORD32                                     u4_size;
+
+    /**
+     * error code
+     */
+    UWORD32                                     u4_error_code;
+
+    /**
+     * num_mem_rec
+     */
+    UWORD32                                     u4_num_mem_rec;
+}iv_num_mem_rec_op_t;
+
+
+/*****************************************************************************/
+/*  Fill Memory Records                                                      */
+/*****************************************************************************/
+
+/* IV_API_COMMAND_TYPE_T::e_cmd = IV_CMD_FILL_NUM_MEM_REC                    */
+
+
+typedef struct {
+    /**
+     * u4_size of the structure
+     */
+    UWORD32                                     u4_size;
+
+    /**
+     * cmd
+     */
+    IV_API_COMMAND_TYPE_T                       e_cmd;
+
+    /**
+     * pointer to array of memrecords structures should be filled by codec
+    with details of memory resource requirements
+     */
+    iv_mem_rec_t                                *pv_mem_rec_location;
+
+    /**
+     * maximum width for which codec should request memory requirements
+     */
+    UWORD32                                     u4_max_frm_wd;
+
+    /**
+     * maximum height for which codec should request memory requirements
+     */
+    UWORD32                                     u4_max_frm_ht;
+}iv_fill_mem_rec_ip_t;
+
+
+typedef struct{
+    /**
+     * u4_size of the structure
+     */
+    UWORD32                                     u4_size;
+
+    /**
+     * error_code
+     */
+    UWORD32                                     u4_error_code;
+
+    /**
+     * no of memory record structures which are filled by codec
+     */
+    UWORD32                                     u4_num_mem_rec_filled;
+}iv_fill_mem_rec_op_t;
+
+
+/*****************************************************************************/
+/*  Retrieve Memory Records                                                  */
+/*****************************************************************************/
+
+/* IV_API_COMMAND_TYPE_T::e_cmd = IV_CMD_RETRIEVE_MEMREC                     */
+
+
+
+typedef struct {
+    /**
+     * u4_size of the structure
+     */
+    UWORD32                                     u4_size;
+
+    /**
+     * cmd
+     */
+    IV_API_COMMAND_TYPE_T                       e_cmd;
+
+    /**
+     * array of structures where codec should fill with all resources(memory) with it
+     */
+    iv_mem_rec_t                                *pv_mem_rec_location;
+}iv_retrieve_mem_rec_ip_t;
+
+
+typedef struct{
+    /**
+     * u4_size of the structure
+     */
+    UWORD32                                     u4_size;
+
+    /**
+     * error_code
+     */
+    UWORD32                                     u4_error_code;
+
+    /**
+     * no of memory records filled by codec
+     */
+    UWORD32                                     u4_num_mem_rec_filled;
+}iv_retrieve_mem_rec_op_t;
+
+
+
+#endif /* _IV_H */
+

diff --git a/common/iv_datatypedef.h b/common/iv_datatypedef.h
new file mode 100644
index 0000000..3c45942
--- /dev/null
+++ b/common/iv_datatypedef.h

@@ -0,0 +1,81 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+/*****************************************************************************/
+/*                                                                           */
+/*  File Name         : datatypedef.h                                        */
+/*                                                                           */
+/*  Description       : This file contains all the necessary data type       */
+/*                      definitions.                                         */
+/*                                                                           */
+/*  List of Functions : None                                                 */
+/*                                                                           */
+/*  Issues / Problems : None                                                 */
+/*                                                                           */
+/*  Revision History  :                                                      */
+/*                                                                           */
+/*         DD MM YYYY   Author(s)       Changes (Describe the changes made)  */
+/*         29 12 2006  Rajendra C Y          Draft                           */
+/*                                                                           */
+/*****************************************************************************/
+
+#ifndef __IV_DATATYPEDEF_H__
+#define __IV_DATATYPEDEF_H__
+
+/*****************************************************************************/
+/* Typedefs                                                                  */
+/*****************************************************************************/
+
+typedef int             WORD32;
+typedef unsigned int    UWORD32;
+
+typedef short           WORD16;
+typedef unsigned short  UWORD16;
+
+typedef char            WORD8;
+typedef unsigned char   UWORD8;
+
+typedef char            CHAR;
+#ifndef NULL
+#define NULL            ((void *)0)
+
+#endif
+
+typedef enum
+{
+    IT_FALSE,
+    IT_TRUE
+} IT_BOOL;
+
+
+typedef enum
+{
+    IT_OK,
+    IT_ERROR = -1
+} IT_STATUS;
+
+/*****************************************************************************/
+/* Input and Output Parameter identifiers                                    */
+/*****************************************************************************/
+#define                 IT_IN
+#define                 IT_OUT
+
+
+#endif /* __IV_DATATYPEDEF_H__ */
+

diff --git a/common/mips/impeg2_platform_macros.h b/common/mips/impeg2_platform_macros.h
new file mode 100644
index 0000000..05ff6da
--- /dev/null
+++ b/common/mips/impeg2_platform_macros.h

@@ -0,0 +1,49 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+#ifndef __IMPEG2_PLATFORM_MACROS_H__
+#define __IMPEG2_PLATFORM_MACROS_H__
+
+
+#define     CONV_LE_TO_BE(u4_temp2,u4_temp1)    u4_temp2 = (u4_temp1 << 24) |               \
+                                                           ((u4_temp1 & 0xff00) << 8) |     \
+                                                           ((u4_temp1 & 0xff0000) >> 8) |   \
+                                                           (u4_temp1 >> 24);
+static __inline UWORD32 CLZ(UWORD32 u4_word)
+{
+    if(u4_word)
+        return (__builtin_clz(u4_word));
+    else
+        return 32;
+}
+
+
+#define CLIP_U8(x) ((x) > 255) ? (255) : (((x) < 0) ? (0) : (x))
+#define CLIP_S8(x) ((x) > 127) ? (127) : (((x) < -128) ? (-128) : (x))
+
+#define CLIP_U12(x) ((x) > 4095) ? (4095) : (((x) < 0) ? (0) : (x))
+#define CLIP_S12(x) ((x) > 2047) ? (2047) : (((x) < -2048) ? (-2048) : (x))
+
+#define CLIP_U16(x) ((x) > 65535) ? (65535) : (((x) < 0) ? (0) : (x))
+#define CLIP_S16(x) ((x) > 65535) ? (65535) : (((x) < -65536) ? (-65536) : (x))
+#define PLD(x)
+
+#define INLINE
+
+#endif /* __IMPEG2_PLATFORM_MACROS_H__ */

diff --git a/common/x86/impeg2_idct_recon_sse42_intr.c b/common/x86/impeg2_idct_recon_sse42_intr.c
new file mode 100755
index 0000000..4142032
--- /dev/null
+++ b/common/x86/impeg2_idct_recon_sse42_intr.c

@@ -0,0 +1,2205 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+
+/**
+ *******************************************************************************
+ * @file
+ *  impeg2_itrans_recon_x86_intr.c
+ *
+ * @brief
+ *  Contains function definitions for inverse  quantization, inverse
+ * transform and reconstruction
+ *
+ * @author
+ *  100470
+ *  100592 (edited by)
+ *
+ * @par List of Functions:
+ *  - impeg2_itrans_recon_8x8_sse42()
+ *
+ * @remarks
+ *  None
+ *
+ *******************************************************************************
+ */
+#include <stdio.h>
+#include <string.h>
+#include "iv_datatypedef.h"
+#include "impeg2_macros.h"
+#include "impeg2_defs.h"
+#include "impeg2_globals.h"
+
+#include <immintrin.h>
+#include <emmintrin.h>
+#include <smmintrin.h>
+#include <tmmintrin.h>
+
+
+/**
+ *******************************************************************************
+ *
+ * @brief
+ *  This function performs inverse quantization, inverse  transform and
+ * reconstruction for 8c8 input block
+ *
+ * @par Description:
+ *  Performs inverse quantization , inverse transform  and adds the
+ * prediction data and clips output to 8 bit
+ *
+ * @param[in] pi2_src
+ *  Input 8x8 coefficients
+ *
+ * @param[in] pi2_tmp
+ *  Temporary 8x8 buffer for storing inverse
+ *  transform 1st stage output
+ *
+ * @param[in] pu1_pred
+ *  Prediction 8x8 block
+ *
+ * @param[in] pi2_dequant_coeff
+ *  Dequant Coeffs
+ *
+ * @param[out] pu1_dst
+ *  Output 8x8 block
+ *
+ * @param[in] src_strd
+ *  Input stride
+ *
+ * @param[in] qp_div
+ *  Quantization parameter / 6
+ *
+ * @param[in] qp_rem
+ *  Quantization parameter % 6
+ *
+ * @param[in] pred_strd
+ *  Prediction stride
+ *
+ * @param[in] dst_strd
+ *  Output Stride
+ *
+ * @param[in] zero_cols
+ *  Zero columns in pi2_src
+ *
+ * @returns  Void
+ *
+ * @remarks
+ *  None
+ *
+ *******************************************************************************
+ */
+
+
+void impeg2_idct_recon_sse42(WORD16 *pi2_src,
+                                  WORD16 *pi2_tmp,
+                                  UWORD8 *pu1_pred,
+                                  UWORD8 *pu1_dst,
+                                  WORD32 src_strd,
+                                  WORD32 pred_strd,
+                                  WORD32 dst_strd,
+                                  WORD32 zero_cols,
+                                  WORD32 zero_rows)
+{
+    __m128i m_temp_reg_0;
+    __m128i m_temp_reg_1;
+    __m128i m_temp_reg_2;
+    __m128i m_temp_reg_3;
+    __m128i m_temp_reg_5;
+    __m128i m_temp_reg_6;
+    __m128i m_temp_reg_7;
+    __m128i m_temp_reg_4;
+    __m128i m_temp_reg_10;
+    __m128i m_temp_reg_11;
+    __m128i m_temp_reg_12;
+    __m128i m_temp_reg_13;
+    __m128i m_temp_reg_14;
+    __m128i m_temp_reg_15;
+    __m128i m_temp_reg_16;
+    __m128i m_temp_reg_17;
+    __m128i m_temp_reg_20;
+    __m128i m_temp_reg_21;
+    __m128i m_temp_reg_22;
+    __m128i m_temp_reg_23;
+    __m128i m_temp_reg_24;
+    __m128i m_temp_reg_25;
+    __m128i m_temp_reg_26;
+    __m128i m_temp_reg_27;
+    __m128i m_temp_reg_30;
+    __m128i m_temp_reg_31;
+    __m128i m_temp_reg_32;
+    __m128i m_temp_reg_33;
+    __m128i m_temp_reg_34;
+    __m128i m_temp_reg_35;
+    __m128i m_temp_reg_36;
+    __m128i m_temp_reg_37;
+    __m128i m_temp_reg_40;
+    __m128i m_temp_reg_41;
+    __m128i m_temp_reg_42;
+    __m128i m_temp_reg_43;
+    __m128i m_temp_reg_44;
+    __m128i m_temp_reg_45;
+    __m128i m_temp_reg_46;
+    __m128i m_temp_reg_47;
+    __m128i m_temp_reg_50;
+    __m128i m_temp_reg_51;
+    __m128i m_temp_reg_52;
+    __m128i m_temp_reg_53;
+    __m128i m_temp_reg_54;
+    __m128i m_temp_reg_55;
+    __m128i m_temp_reg_56;
+    __m128i m_temp_reg_57;
+    __m128i m_temp_reg_60;
+    __m128i m_temp_reg_61;
+    __m128i m_temp_reg_62;
+    __m128i m_temp_reg_63;
+    __m128i m_temp_reg_64;
+    __m128i m_temp_reg_65;
+    __m128i m_temp_reg_66;
+    __m128i m_temp_reg_67;
+    __m128i m_temp_reg_70;
+    __m128i m_temp_reg_71;
+    __m128i m_temp_reg_72;
+    __m128i m_temp_reg_73;
+    __m128i m_temp_reg_74;
+    __m128i m_temp_reg_75;
+    __m128i m_temp_reg_76;
+    __m128i m_temp_reg_77;
+    __m128i m_coeff1, m_coeff2, m_coeff3, m_coeff4;
+
+    WORD32 check_row_stage_1;   /* Lokesh */
+    WORD32 check_row_stage_2;   /* Lokesh */
+
+    __m128i m_rdng_factor;
+    WORD32 i4_shift = IDCT_STG1_SHIFT;
+    UNUSED(pi2_tmp);
+    check_row_stage_1   = ((zero_rows & 0xF0) != 0xF0) ? 1 : 0;
+    check_row_stage_2   = ((zero_cols & 0xF0) != 0xF0) ? 1 : 0;
+
+    m_temp_reg_70 = _mm_loadu_si128((__m128i *)pi2_src);
+    pi2_src += src_strd;
+    m_temp_reg_71 = _mm_loadu_si128((__m128i *)pi2_src);
+    pi2_src += src_strd;
+    m_temp_reg_72 = _mm_loadu_si128((__m128i *)pi2_src);
+    pi2_src += src_strd;
+    m_temp_reg_73 = _mm_loadu_si128((__m128i *)pi2_src);
+    pi2_src += src_strd;
+
+    m_temp_reg_74 = _mm_loadu_si128((__m128i *)pi2_src);
+    pi2_src += src_strd;
+    m_temp_reg_75 = _mm_loadu_si128((__m128i *)pi2_src);
+    pi2_src += src_strd;
+    m_temp_reg_76 = _mm_loadu_si128((__m128i *)pi2_src);
+    pi2_src += src_strd;
+    m_temp_reg_77 = _mm_loadu_si128((__m128i *)pi2_src);
+
+    if(!check_row_stage_2)
+    {
+        if(!check_row_stage_1)
+        {
+            /* ee0 is present in the registers m_temp_reg_10 and m_temp_reg_11 */
+            /* ee1 is present in the registers m_temp_reg_12 and m_temp_reg_13 */
+            {
+                //Interleaving 0,4 row in 0 , 1 Rishab
+                /*coef2 for m_temp_reg_12 and m_temp_reg_13 , coef1 for m_temp_reg_10 and m_temp_reg_11*/
+                m_coeff2 = _mm_loadu_si128((__m128i *)&gai2_impeg2_idct_even_8_q15[3][0]);
+                m_coeff1 = _mm_loadu_si128((__m128i *)&gai2_impeg2_idct_even_8_q15[0][0]);
+
+                m_temp_reg_0 = _mm_unpacklo_epi16(m_temp_reg_70, m_temp_reg_74);
+
+                m_temp_reg_10 = _mm_madd_epi16(m_temp_reg_0, m_coeff1);
+                m_temp_reg_12 = _mm_madd_epi16(m_temp_reg_0, m_coeff2);
+
+            }
+
+
+            /* eo0 is present in the registers m_temp_reg_14 and m_temp_reg_15 */
+            /* eo1 is present in the registers m_temp_reg_16 and m_temp_reg_17 */
+            /* as upper 8 bytes are zeros so m_temp_reg_15 and m_temp_reg_17 are not used*/
+            {
+
+                m_coeff1 = _mm_loadu_si128((__m128i *)&gai2_impeg2_idct_even_8_q15[1][0]); //sub 2B*36-6B*83 ,2T*36-6T*83
+                m_coeff2 = _mm_loadu_si128((__m128i *)&gai2_impeg2_idct_even_8_q15[2][0]); //add 2B*83+6B*36 ,2T*83+6T*36
+
+                /* Combining instructions to eliminate them based on zero_rows : Lokesh */
+                //Interleaving 2,6 row in 4, 5 Rishab
+                m_temp_reg_4 = _mm_unpacklo_epi16(m_temp_reg_72, m_temp_reg_76);
+
+                m_temp_reg_16 = _mm_madd_epi16(m_temp_reg_4, m_coeff1);
+                m_temp_reg_14 = _mm_madd_epi16(m_temp_reg_4, m_coeff2);
+
+
+                /* Loading coeff for computing o0, o1, o2 and o3 in the next block */
+
+                m_coeff3 = _mm_loadu_si128((__m128i *)&gai2_impeg2_idct_odd_8_q15[2][0]);
+                m_coeff4 = _mm_loadu_si128((__m128i *)&gai2_impeg2_idct_odd_8_q15[3][0]);
+
+                m_coeff1 = _mm_loadu_si128((__m128i *)&gai2_impeg2_idct_odd_8_q15[0][0]);
+                m_coeff2 = _mm_loadu_si128((__m128i *)&gai2_impeg2_idct_odd_8_q15[1][0]);
+
+
+
+                /* e */
+
+                /* e0 stored in m_temp_reg_40 and m_temp_reg_41 */
+                /* e1 stored in m_temp_reg_42 and m_temp_reg_43 */
+                /* e3 stored in m_temp_reg_46 and m_temp_reg_47 */
+                /* e2 stored in m_temp_reg_44 and m_temp_reg_45 */
+                m_temp_reg_42 = _mm_add_epi32(m_temp_reg_12, m_temp_reg_16);
+                m_temp_reg_44 = _mm_sub_epi32(m_temp_reg_12, m_temp_reg_16);
+
+                m_temp_reg_40 = _mm_add_epi32(m_temp_reg_10, m_temp_reg_14);
+                m_temp_reg_46 = _mm_sub_epi32(m_temp_reg_10, m_temp_reg_14);
+
+            }
+
+            /* o */
+            {
+
+                /* o0 stored in m_temp_reg_30 and m_temp_reg_31 */
+                {
+
+                    m_temp_reg_60 = _mm_unpacklo_epi16(m_temp_reg_71, m_temp_reg_73);
+                    //o0:1B*89+3B*75,5B*50+7B*18
+                    m_temp_reg_30 = _mm_madd_epi16(m_temp_reg_60, m_coeff1);
+
+                    m_rdng_factor = _mm_cvtsi32_si128((1 << (i4_shift - 1)));
+                    m_rdng_factor = _mm_shuffle_epi32(m_rdng_factor, 0x0000);
+
+
+
+                    /* Column 0 of destination computed here */
+                    /* It is stored in m_temp_reg_50 */
+                    /* Column 7 of destination computed here */
+                    /* It is stored in m_temp_reg_57 */
+                    /* Upper 8 bytes of both registers are zero due to zero_cols*/
+
+
+
+                    m_temp_reg_62 = _mm_add_epi32(m_temp_reg_40, m_temp_reg_30);
+                    m_temp_reg_66 = _mm_sub_epi32(m_temp_reg_40, m_temp_reg_30);
+
+                    m_temp_reg_62 = _mm_add_epi32(m_temp_reg_62, m_rdng_factor);
+                    m_temp_reg_66 = _mm_add_epi32(m_temp_reg_66, m_rdng_factor);
+
+                    m_temp_reg_62 = _mm_srai_epi32(m_temp_reg_62, i4_shift);
+                    m_temp_reg_63 = _mm_setzero_si128();
+                    m_temp_reg_66 = _mm_srai_epi32(m_temp_reg_66, i4_shift);
+
+                    //o1:1B*75-3B*18,5B*89+7B*50
+                    m_temp_reg_32 = _mm_madd_epi16(m_temp_reg_60, m_coeff3);
+
+                    m_temp_reg_50 = _mm_packs_epi32(m_temp_reg_62, m_temp_reg_63);
+                    m_temp_reg_57 = _mm_packs_epi32(m_temp_reg_66, m_temp_reg_63);
+
+                    /* Loading coeff for computing o2  in the next block */
+
+                    m_coeff1 = _mm_loadu_si128((__m128i *)&gai2_impeg2_idct_odd_8_q15[4][0]);
+                    m_coeff2 = _mm_loadu_si128((__m128i *)&gai2_impeg2_idct_odd_8_q15[5][0]);
+
+                    /* o1 stored in m_temp_reg_32 and m_temp_reg_33 */
+
+
+
+                    /* Column 1 of destination computed here */
+                    /* It is stored in m_temp_reg_51 */
+                    /* Column 6 of destination computed here */
+                    /* It is stored in m_temp_reg_56 */
+
+                    m_temp_reg_62 = _mm_add_epi32(m_temp_reg_42, m_temp_reg_32);
+                    m_temp_reg_66 = _mm_sub_epi32(m_temp_reg_42, m_temp_reg_32);
+
+                    m_temp_reg_62 = _mm_add_epi32(m_temp_reg_62, m_rdng_factor);
+                    m_temp_reg_66 = _mm_add_epi32(m_temp_reg_66, m_rdng_factor);
+
+                    m_temp_reg_62 = _mm_srai_epi32(m_temp_reg_62, i4_shift);
+                    m_temp_reg_66 = _mm_srai_epi32(m_temp_reg_66, i4_shift);
+
+                    //o2:1B*50-3B*89,5B*18+7B*75
+                    m_temp_reg_34 = _mm_madd_epi16(m_temp_reg_60, m_coeff1);
+
+                    m_temp_reg_51 = _mm_packs_epi32(m_temp_reg_62, m_temp_reg_63);
+                    m_temp_reg_56 = _mm_packs_epi32(m_temp_reg_66, m_temp_reg_63);
+
+
+                    /* o2 stored in m_temp_reg_34 and m_temp_reg_35 */
+
+                    /* Loading coeff for computing o3  in the next block */
+
+                    m_coeff3 = _mm_loadu_si128((__m128i *)&gai2_impeg2_idct_odd_8_q15[6][0]);
+                    m_coeff4 = _mm_loadu_si128((__m128i *)&gai2_impeg2_idct_odd_8_q15[7][0]);
+
+
+
+                    /* Column 2 of destination computed here */
+                    /* It is stored in m_temp_reg_52 */
+                    /* Column 5 of destination computed here */
+                    /* It is stored in m_temp_reg_55 */
+
+                    m_temp_reg_62 = _mm_add_epi32(m_temp_reg_44, m_temp_reg_34);
+                    m_temp_reg_66 = _mm_sub_epi32(m_temp_reg_44, m_temp_reg_34);
+
+                    m_temp_reg_62 = _mm_add_epi32(m_temp_reg_62, m_rdng_factor);
+                    m_temp_reg_66 = _mm_add_epi32(m_temp_reg_66, m_rdng_factor);
+
+                    m_temp_reg_62 = _mm_srai_epi32(m_temp_reg_62, i4_shift);
+                    m_temp_reg_66 = _mm_srai_epi32(m_temp_reg_66, i4_shift);
+
+                    //o3:1B*18-3B*50,5B*75-7B*89
+                    m_temp_reg_36 = _mm_madd_epi16(m_temp_reg_60, m_coeff3);
+
+                    m_temp_reg_52 = _mm_packs_epi32(m_temp_reg_62, m_temp_reg_63);
+                    m_temp_reg_55 = _mm_packs_epi32(m_temp_reg_66, m_temp_reg_63);
+
+
+
+                    /* o3 stored in m_temp_reg_36 and m_temp_reg_37 */
+
+
+
+                    /* Column 3 of destination computed here */
+                    /* It is stored in m_temp_reg_53 */
+                    /* Column 4 of destination computed here */
+                    /* It is stored in m_temp_reg_54 */
+
+                    m_temp_reg_62 = _mm_add_epi32(m_temp_reg_46, m_temp_reg_36);
+                    m_temp_reg_66 = _mm_sub_epi32(m_temp_reg_46, m_temp_reg_36);
+
+                    m_temp_reg_62 = _mm_add_epi32(m_temp_reg_62, m_rdng_factor);
+                    m_temp_reg_66 = _mm_add_epi32(m_temp_reg_66, m_rdng_factor);
+
+                    m_temp_reg_62 = _mm_srai_epi32(m_temp_reg_62, i4_shift);
+                    m_temp_reg_66 = _mm_srai_epi32(m_temp_reg_66, i4_shift);
+
+
+                    m_temp_reg_53 = _mm_packs_epi32(m_temp_reg_62, m_temp_reg_63);
+                    m_temp_reg_54 = _mm_packs_epi32(m_temp_reg_66, m_temp_reg_63);
+                }
+            }
+
+            /* Transpose of the destination 8x8 matrix done here */
+            /* and ultimately stored in registers m_temp_reg_50 to m_temp_reg_57 */
+            /* respectively */
+            {
+                m_temp_reg_10 = _mm_unpacklo_epi16(m_temp_reg_50, m_temp_reg_51);
+                m_temp_reg_11 = _mm_unpacklo_epi16(m_temp_reg_52, m_temp_reg_53);
+                m_temp_reg_0 = _mm_unpacklo_epi32(m_temp_reg_10, m_temp_reg_11);
+                m_temp_reg_1 = _mm_unpackhi_epi32(m_temp_reg_10, m_temp_reg_11);
+
+                m_temp_reg_12 = _mm_unpacklo_epi16(m_temp_reg_54, m_temp_reg_55);
+                m_temp_reg_13 = _mm_unpacklo_epi16(m_temp_reg_56, m_temp_reg_57);
+
+                m_temp_reg_4 = _mm_unpacklo_epi32(m_temp_reg_12, m_temp_reg_13);
+                m_temp_reg_5 = _mm_unpackhi_epi32(m_temp_reg_12, m_temp_reg_13);
+
+                m_temp_reg_50 = _mm_unpacklo_epi64(m_temp_reg_0, m_temp_reg_4);
+                m_temp_reg_51 = _mm_unpackhi_epi64(m_temp_reg_0, m_temp_reg_4);
+                m_temp_reg_52 = _mm_unpacklo_epi64(m_temp_reg_1, m_temp_reg_5);
+                m_temp_reg_53 = _mm_unpackhi_epi64(m_temp_reg_1, m_temp_reg_5);
+
+                m_temp_reg_54 = _mm_setzero_si128();
+                m_temp_reg_55 = _mm_setzero_si128();
+                m_temp_reg_56 = _mm_setzero_si128();
+                m_temp_reg_57 = _mm_setzero_si128();
+            }
+        }
+        else
+        {
+            /* ee0 is present in the registers m_temp_reg_10 and m_temp_reg_11 */
+            /* ee1 is present in the registers m_temp_reg_12 and m_temp_reg_13 */
+            {
+                //Interleaving 0,4 row in 0 , 1 Rishab
+                /*coef2 for m_temp_reg_12 and m_temp_reg_13 , coef1 for m_temp_reg_10 and m_temp_reg_11*/
+                m_coeff2 = _mm_loadu_si128((__m128i *)&gai2_impeg2_idct_even_8_q15[3][0]);
+                m_coeff1 = _mm_loadu_si128((__m128i *)&gai2_impeg2_idct_even_8_q15[0][0]);
+
+                m_temp_reg_0 = _mm_unpacklo_epi16(m_temp_reg_70, m_temp_reg_74);
+
+                m_temp_reg_10 = _mm_madd_epi16(m_temp_reg_0, m_coeff1);
+                m_temp_reg_12 = _mm_madd_epi16(m_temp_reg_0, m_coeff2);
+
+            }
+
+
+            /* eo0 is present in the registers m_temp_reg_14 and m_temp_reg_15 */
+            /* eo1 is present in the registers m_temp_reg_16 and m_temp_reg_17 */
+            /* as upper 8 bytes are zeros so m_temp_reg_15 and m_temp_reg_17 are not used*/
+            {
+
+                m_coeff1 = _mm_loadu_si128((__m128i *)&gai2_impeg2_idct_even_8_q15[1][0]); //sub 2B*36-6B*83 ,2T*36-6T*83
+                m_coeff2 = _mm_loadu_si128((__m128i *)&gai2_impeg2_idct_even_8_q15[2][0]); //add 2B*83+6B*36 ,2T*83+6T*36
+
+                /* Combining instructions to eliminate them based on zero_rows : Lokesh */
+                //Interleaving 2,6 row in 4, 5 Rishab
+                m_temp_reg_4 = _mm_unpacklo_epi16(m_temp_reg_72, m_temp_reg_76);
+
+                m_temp_reg_16 = _mm_madd_epi16(m_temp_reg_4, m_coeff1);
+                m_temp_reg_14 = _mm_madd_epi16(m_temp_reg_4, m_coeff2);
+
+
+                /* Loading coeff for computing o0, o1, o2 and o3 in the next block */
+
+                m_coeff3 = _mm_loadu_si128((__m128i *)&gai2_impeg2_idct_odd_8_q15[2][0]);
+                m_coeff4 = _mm_loadu_si128((__m128i *)&gai2_impeg2_idct_odd_8_q15[3][0]);
+
+                m_coeff1 = _mm_loadu_si128((__m128i *)&gai2_impeg2_idct_odd_8_q15[0][0]);
+                m_coeff2 = _mm_loadu_si128((__m128i *)&gai2_impeg2_idct_odd_8_q15[1][0]);
+
+
+
+                /* e */
+
+                /* e0 stored in m_temp_reg_40 and m_temp_reg_41 */
+                /* e1 stored in m_temp_reg_42 and m_temp_reg_43 */
+                /* e3 stored in m_temp_reg_46 and m_temp_reg_47 */
+                /* e2 stored in m_temp_reg_44 and m_temp_reg_45 */
+                m_temp_reg_42 = _mm_add_epi32(m_temp_reg_12, m_temp_reg_16);
+                m_temp_reg_44 = _mm_sub_epi32(m_temp_reg_12, m_temp_reg_16);
+
+                m_temp_reg_40 = _mm_add_epi32(m_temp_reg_10, m_temp_reg_14);
+                m_temp_reg_46 = _mm_sub_epi32(m_temp_reg_10, m_temp_reg_14);
+
+            }
+
+            /* o */
+            {
+
+                /* o0 stored in m_temp_reg_30 and m_temp_reg_31 */
+                {
+
+                    m_temp_reg_60 = _mm_unpacklo_epi16(m_temp_reg_71, m_temp_reg_73);
+                    m_temp_reg_64 = _mm_unpacklo_epi16(m_temp_reg_75, m_temp_reg_77);
+                    //o0:1B*89+3B*75,5B*50+7B*18
+                    m_temp_reg_20 = _mm_madd_epi16(m_temp_reg_60, m_coeff1);
+                    m_temp_reg_24 = _mm_madd_epi16(m_temp_reg_64, m_coeff2);
+
+                    m_rdng_factor = _mm_cvtsi32_si128((1 << (i4_shift - 1)));
+                    m_rdng_factor = _mm_shuffle_epi32(m_rdng_factor, 0x0000);
+
+                    m_temp_reg_30 = _mm_add_epi32(m_temp_reg_20, m_temp_reg_24);
+
+
+
+                    /* Column 0 of destination computed here */
+                    /* It is stored in m_temp_reg_50 */
+                    /* Column 7 of destination computed here */
+                    /* It is stored in m_temp_reg_57 */
+                    /* Upper 8 bytes of both registers are zero due to zero_cols*/
+
+
+
+                    m_temp_reg_62 = _mm_add_epi32(m_temp_reg_40, m_temp_reg_30);
+                    m_temp_reg_66 = _mm_sub_epi32(m_temp_reg_40, m_temp_reg_30);
+
+                    m_temp_reg_62 = _mm_add_epi32(m_temp_reg_62, m_rdng_factor);
+                    m_temp_reg_66 = _mm_add_epi32(m_temp_reg_66, m_rdng_factor);
+
+                    m_temp_reg_62 = _mm_srai_epi32(m_temp_reg_62, i4_shift);
+                    m_temp_reg_63 = _mm_setzero_si128();
+                    m_temp_reg_66 = _mm_srai_epi32(m_temp_reg_66, i4_shift);
+
+                    //o1:1B*75-3B*18,5B*89+7B*50
+                    m_temp_reg_22 = _mm_madd_epi16(m_temp_reg_60, m_coeff3);
+                    m_temp_reg_26 = _mm_madd_epi16(m_temp_reg_64, m_coeff4);
+
+                    m_temp_reg_50 = _mm_packs_epi32(m_temp_reg_62, m_temp_reg_63);
+                    m_temp_reg_57 = _mm_packs_epi32(m_temp_reg_66, m_temp_reg_63);
+
+                    /* Loading coeff for computing o2  in the next block */
+
+                    m_coeff1 = _mm_loadu_si128((__m128i *)&gai2_impeg2_idct_odd_8_q15[4][0]);
+                    m_coeff2 = _mm_loadu_si128((__m128i *)&gai2_impeg2_idct_odd_8_q15[5][0]);
+
+                    /* o1 stored in m_temp_reg_32 and m_temp_reg_33 */
+                    m_temp_reg_32 = _mm_sub_epi32(m_temp_reg_22, m_temp_reg_26);
+
+
+
+                    /* Column 1 of destination computed here */
+                    /* It is stored in m_temp_reg_51 */
+                    /* Column 6 of destination computed here */
+                    /* It is stored in m_temp_reg_56 */
+
+                    m_temp_reg_62 = _mm_add_epi32(m_temp_reg_42, m_temp_reg_32);
+                    m_temp_reg_66 = _mm_sub_epi32(m_temp_reg_42, m_temp_reg_32);
+
+                    m_temp_reg_62 = _mm_add_epi32(m_temp_reg_62, m_rdng_factor);
+                    m_temp_reg_66 = _mm_add_epi32(m_temp_reg_66, m_rdng_factor);
+
+                    m_temp_reg_62 = _mm_srai_epi32(m_temp_reg_62, i4_shift);
+                    m_temp_reg_66 = _mm_srai_epi32(m_temp_reg_66, i4_shift);
+
+                    //o2:1B*50-3B*89,5B*18+7B*75
+                    m_temp_reg_20 = _mm_madd_epi16(m_temp_reg_60, m_coeff1);
+                    m_temp_reg_24 = _mm_madd_epi16(m_temp_reg_64, m_coeff2);
+
+                    m_temp_reg_51 = _mm_packs_epi32(m_temp_reg_62, m_temp_reg_63);
+                    m_temp_reg_56 = _mm_packs_epi32(m_temp_reg_66, m_temp_reg_63);
+
+
+                    /* o2 stored in m_temp_reg_34 and m_temp_reg_35 */
+
+                    /* Loading coeff for computing o3  in the next block */
+
+                    m_coeff3 = _mm_loadu_si128((__m128i *)&gai2_impeg2_idct_odd_8_q15[6][0]);
+                    m_coeff4 = _mm_loadu_si128((__m128i *)&gai2_impeg2_idct_odd_8_q15[7][0]);
+
+                    m_temp_reg_34 = _mm_add_epi32(m_temp_reg_20, m_temp_reg_24);
+
+
+                    /* Column 2 of destination computed here */
+                    /* It is stored in m_temp_reg_52 */
+                    /* Column 5 of destination computed here */
+                    /* It is stored in m_temp_reg_55 */
+
+                    m_temp_reg_62 = _mm_add_epi32(m_temp_reg_44, m_temp_reg_34);
+                    m_temp_reg_66 = _mm_sub_epi32(m_temp_reg_44, m_temp_reg_34);
+
+                    m_temp_reg_62 = _mm_add_epi32(m_temp_reg_62, m_rdng_factor);
+                    m_temp_reg_66 = _mm_add_epi32(m_temp_reg_66, m_rdng_factor);
+
+                    m_temp_reg_62 = _mm_srai_epi32(m_temp_reg_62, i4_shift);
+                    m_temp_reg_66 = _mm_srai_epi32(m_temp_reg_66, i4_shift);
+
+                    //o3:1B*18-3B*50,5B*75-7B*89
+                    m_temp_reg_22 = _mm_madd_epi16(m_temp_reg_60, m_coeff3);
+                    m_temp_reg_26 = _mm_madd_epi16(m_temp_reg_64, m_coeff4);
+
+                    m_temp_reg_52 = _mm_packs_epi32(m_temp_reg_62, m_temp_reg_63);
+                    m_temp_reg_55 = _mm_packs_epi32(m_temp_reg_66, m_temp_reg_63);
+
+
+
+                    /* o3 stored in m_temp_reg_36 and m_temp_reg_37 */
+
+                    m_temp_reg_36 = _mm_add_epi32(m_temp_reg_22, m_temp_reg_26);
+
+
+                    /* Column 3 of destination computed here */
+                    /* It is stored in m_temp_reg_53 */
+                    /* Column 4 of destination computed here */
+                    /* It is stored in m_temp_reg_54 */
+
+                    m_temp_reg_62 = _mm_add_epi32(m_temp_reg_46, m_temp_reg_36);
+                    m_temp_reg_66 = _mm_sub_epi32(m_temp_reg_46, m_temp_reg_36);
+
+                    m_temp_reg_62 = _mm_add_epi32(m_temp_reg_62, m_rdng_factor);
+                    m_temp_reg_66 = _mm_add_epi32(m_temp_reg_66, m_rdng_factor);
+
+                    m_temp_reg_62 = _mm_srai_epi32(m_temp_reg_62, i4_shift);
+                    m_temp_reg_66 = _mm_srai_epi32(m_temp_reg_66, i4_shift);
+
+
+                    m_temp_reg_53 = _mm_packs_epi32(m_temp_reg_62, m_temp_reg_63);
+                    m_temp_reg_54 = _mm_packs_epi32(m_temp_reg_66, m_temp_reg_63);
+                }
+            }
+
+            /* Transpose of the destination 8x8 matrix done here */
+            /* and ultimately stored in registers m_temp_reg_50 to m_temp_reg_57 */
+            /* respectively */
+            {
+                m_temp_reg_10 = _mm_unpacklo_epi16(m_temp_reg_50, m_temp_reg_51);
+                m_temp_reg_11 = _mm_unpacklo_epi16(m_temp_reg_52, m_temp_reg_53);
+                m_temp_reg_0 = _mm_unpacklo_epi32(m_temp_reg_10, m_temp_reg_11);
+                m_temp_reg_1 = _mm_unpackhi_epi32(m_temp_reg_10, m_temp_reg_11);
+
+                m_temp_reg_12 = _mm_unpacklo_epi16(m_temp_reg_54, m_temp_reg_55);
+                m_temp_reg_13 = _mm_unpacklo_epi16(m_temp_reg_56, m_temp_reg_57);
+                m_temp_reg_4 = _mm_unpacklo_epi32(m_temp_reg_12, m_temp_reg_13);
+                m_temp_reg_5 = _mm_unpackhi_epi32(m_temp_reg_12, m_temp_reg_13);
+
+                m_temp_reg_50 = _mm_unpacklo_epi64(m_temp_reg_0, m_temp_reg_4);
+                m_temp_reg_51 = _mm_unpackhi_epi64(m_temp_reg_0, m_temp_reg_4);
+                m_temp_reg_52 = _mm_unpacklo_epi64(m_temp_reg_1, m_temp_reg_5);
+                m_temp_reg_53 = _mm_unpackhi_epi64(m_temp_reg_1, m_temp_reg_5);
+
+                m_temp_reg_54 = _mm_setzero_si128();
+                m_temp_reg_55 = _mm_setzero_si128();
+                m_temp_reg_56 = _mm_setzero_si128();
+                m_temp_reg_57 = _mm_setzero_si128();
+            }
+        }
+
+        /* Stage 2 */
+        i4_shift = IDCT_STG2_SHIFT;
+        {
+            /* ee0 is present in the registers m_temp_reg_10 and m_temp_reg_11 */
+            /* ee1 is present in the registers m_temp_reg_12 and m_temp_reg_13 */
+            {
+                m_coeff1 = _mm_loadu_si128((__m128i *)&gai2_impeg2_idct_even_8_q11[0][0]); //add
+                m_coeff2 = _mm_loadu_si128((__m128i *)&gai2_impeg2_idct_even_8_q11[3][0]); //sub
+
+                m_temp_reg_0 = _mm_unpacklo_epi16(m_temp_reg_50, m_temp_reg_54);
+                m_temp_reg_1 = _mm_unpackhi_epi16(m_temp_reg_50, m_temp_reg_54);
+
+                m_temp_reg_10 = _mm_madd_epi16(m_temp_reg_0, m_coeff1);
+                m_temp_reg_12 = _mm_madd_epi16(m_temp_reg_0, m_coeff2);
+                m_temp_reg_11 = _mm_madd_epi16(m_temp_reg_1, m_coeff1);
+                m_temp_reg_13 = _mm_madd_epi16(m_temp_reg_1, m_coeff2);
+
+
+                m_coeff1 = _mm_loadu_si128((__m128i *)&gai2_impeg2_idct_even_8_q11[1][0]);
+                m_coeff2 = _mm_loadu_si128((__m128i *)&gai2_impeg2_idct_even_8_q11[2][0]);
+            }
+
+
+            /* eo0 is present in the registers m_temp_reg_14 and m_temp_reg_15 */
+            /* eo1 is present in the registers m_temp_reg_16 and m_temp_reg_17 */
+            {
+
+                m_temp_reg_0 = _mm_unpacklo_epi16(m_temp_reg_52, m_temp_reg_56);
+                m_temp_reg_1 = _mm_unpackhi_epi16(m_temp_reg_52, m_temp_reg_56);
+
+
+                m_temp_reg_16 = _mm_madd_epi16(m_temp_reg_0, m_coeff1);
+                m_temp_reg_14 = _mm_madd_epi16(m_temp_reg_0, m_coeff2);
+                m_temp_reg_17 = _mm_madd_epi16(m_temp_reg_1, m_coeff1);
+                m_temp_reg_15 = _mm_madd_epi16(m_temp_reg_1, m_coeff2);
+
+                /* Loading coeff for computing o0 in the next block */
+                m_coeff1 = _mm_loadu_si128((__m128i *)&gai2_impeg2_idct_odd_8_q11[0][0]);
+
+
+                m_temp_reg_0 = _mm_unpacklo_epi16(m_temp_reg_51, m_temp_reg_53);
+                m_temp_reg_1 = _mm_unpackhi_epi16(m_temp_reg_51, m_temp_reg_53);
+
+
+
+                /* e */
+
+                /* e0 stored in m_temp_reg_40 and m_temp_reg_41 */
+                /* e1 stored in m_temp_reg_42 and m_temp_reg_43 */
+                /* e3 stored in m_temp_reg_46 and m_temp_reg_47 */
+                /* e2 stored in m_temp_reg_44 and m_temp_reg_45 */
+                m_temp_reg_42 = _mm_add_epi32(m_temp_reg_12, m_temp_reg_16);
+                m_temp_reg_44 = _mm_sub_epi32(m_temp_reg_12, m_temp_reg_16);
+
+                m_temp_reg_40 = _mm_add_epi32(m_temp_reg_10, m_temp_reg_14);
+                m_temp_reg_46 = _mm_sub_epi32(m_temp_reg_10, m_temp_reg_14);
+
+                m_temp_reg_43 = _mm_add_epi32(m_temp_reg_13, m_temp_reg_17);
+                m_temp_reg_45 = _mm_sub_epi32(m_temp_reg_13, m_temp_reg_17);
+
+                m_temp_reg_41 = _mm_add_epi32(m_temp_reg_11, m_temp_reg_15);
+                m_temp_reg_47 = _mm_sub_epi32(m_temp_reg_11, m_temp_reg_15);
+
+            }
+
+            /* o */
+            {
+
+                /* o0 stored in m_temp_reg_30 and m_temp_reg_31 */
+                {
+                    //o0:1B*89+3B*75,1T*89+3T*75
+                    m_temp_reg_30 = _mm_madd_epi16(m_temp_reg_0, m_coeff1);
+                    m_temp_reg_31 = _mm_madd_epi16(m_temp_reg_1, m_coeff1);
+
+                    m_rdng_factor = _mm_cvtsi32_si128((1 << (i4_shift - 1)));
+                    m_rdng_factor = _mm_shuffle_epi32(m_rdng_factor, 0x0000);
+                    /* Loading coeff for computing o1 in the next block */
+                    m_coeff3 = _mm_loadu_si128((__m128i *)&gai2_impeg2_idct_odd_8_q11[2][0]);
+
+
+
+                    /* Column 0 of destination computed here */
+                    /* It is stored in m_temp_reg_50 */
+                    /* Column 7 of destination computed here */
+                    /* It is stored in m_temp_reg_57 */
+
+                    m_temp_reg_2 = _mm_add_epi32(m_temp_reg_40, m_temp_reg_30);
+                    m_temp_reg_6 = _mm_sub_epi32(m_temp_reg_40, m_temp_reg_30);
+
+                    m_temp_reg_3 = _mm_add_epi32(m_temp_reg_41, m_temp_reg_31);
+                    m_temp_reg_7 = _mm_sub_epi32(m_temp_reg_41, m_temp_reg_31);
+
+                    m_temp_reg_2 = _mm_add_epi32(m_temp_reg_2, m_rdng_factor);
+                    m_temp_reg_3 = _mm_add_epi32(m_temp_reg_3, m_rdng_factor);
+                    m_temp_reg_6 = _mm_add_epi32(m_temp_reg_6, m_rdng_factor);
+                    m_temp_reg_7 = _mm_add_epi32(m_temp_reg_7, m_rdng_factor);
+
+                    //o1:1B*75-3B*18,1T*75-3T*18
+                    m_temp_reg_32 = _mm_madd_epi16(m_temp_reg_0, m_coeff3);
+                    m_temp_reg_33 = _mm_madd_epi16(m_temp_reg_1, m_coeff3);
+
+                    m_temp_reg_2 = _mm_srai_epi32(m_temp_reg_2, i4_shift);
+                    m_temp_reg_3 = _mm_srai_epi32(m_temp_reg_3, i4_shift);
+                    m_temp_reg_6 = _mm_srai_epi32(m_temp_reg_6, i4_shift);
+                    m_temp_reg_7 = _mm_srai_epi32(m_temp_reg_7, i4_shift);
+
+                    m_temp_reg_50 = _mm_packs_epi32(m_temp_reg_2, m_temp_reg_3);
+                    m_temp_reg_57 = _mm_packs_epi32(m_temp_reg_6, m_temp_reg_7);
+
+
+                    /* o1 stored in m_temp_reg_32 and m_temp_reg_33 */
+
+
+                    /* Loading coeff for computing o2  in the next block */
+                    m_coeff1 = _mm_loadu_si128((__m128i *)&gai2_impeg2_idct_odd_8_q11[4][0]);
+
+
+
+                    /* Column 1 of destination computed here */
+                    /* It is stored in m_temp_reg_51 */
+                    /* Column 6 of destination computed here */
+                    /* It is stored in m_temp_reg_56 */
+
+                    m_temp_reg_2 = _mm_add_epi32(m_temp_reg_42, m_temp_reg_32);
+                    m_temp_reg_6 = _mm_sub_epi32(m_temp_reg_42, m_temp_reg_32);
+
+                    m_temp_reg_3 = _mm_add_epi32(m_temp_reg_43, m_temp_reg_33);
+                    m_temp_reg_7 = _mm_sub_epi32(m_temp_reg_43, m_temp_reg_33);
+
+                    m_temp_reg_2 = _mm_add_epi32(m_temp_reg_2, m_rdng_factor);
+                    m_temp_reg_3 = _mm_add_epi32(m_temp_reg_3, m_rdng_factor);
+                    m_temp_reg_6 = _mm_add_epi32(m_temp_reg_6, m_rdng_factor);
+                    m_temp_reg_7 = _mm_add_epi32(m_temp_reg_7, m_rdng_factor);
+
+                    //o2:1B*50-3B*89,5T*18+7T*75.
+                    m_temp_reg_34 = _mm_madd_epi16(m_temp_reg_0, m_coeff1);
+                    m_temp_reg_35 = _mm_madd_epi16(m_temp_reg_1, m_coeff1);
+
+                    m_temp_reg_2 = _mm_srai_epi32(m_temp_reg_2, i4_shift);
+                    m_temp_reg_3 = _mm_srai_epi32(m_temp_reg_3, i4_shift);
+                    m_temp_reg_6 = _mm_srai_epi32(m_temp_reg_6, i4_shift);
+                    m_temp_reg_7 = _mm_srai_epi32(m_temp_reg_7, i4_shift);
+
+                    m_temp_reg_51 = _mm_packs_epi32(m_temp_reg_2, m_temp_reg_3);
+                    m_temp_reg_56 = _mm_packs_epi32(m_temp_reg_6, m_temp_reg_7);
+
+
+                    /* o2 stored in m_temp_reg_34 and m_temp_reg_35 */
+
+                    /* Loading coeff for computing o3  in the next block */
+
+                    m_coeff3 = _mm_loadu_si128((__m128i *)&gai2_impeg2_idct_odd_8_q11[6][0]);
+
+
+                    /* Column 2 of destination computed here */
+                    /* It is stored in m_temp_reg_52 */
+                    /* Column 5 of destination computed here */
+                    /* It is stored in m_temp_reg_55 */
+
+                    m_temp_reg_2 = _mm_add_epi32(m_temp_reg_44, m_temp_reg_34);
+                    m_temp_reg_6 = _mm_sub_epi32(m_temp_reg_44, m_temp_reg_34);
+
+                    m_temp_reg_3 = _mm_add_epi32(m_temp_reg_45, m_temp_reg_35);
+                    m_temp_reg_7 = _mm_sub_epi32(m_temp_reg_45, m_temp_reg_35);
+
+                    m_temp_reg_2 = _mm_add_epi32(m_temp_reg_2, m_rdng_factor);
+                    m_temp_reg_3 = _mm_add_epi32(m_temp_reg_3, m_rdng_factor);
+                    m_temp_reg_6 = _mm_add_epi32(m_temp_reg_6, m_rdng_factor);
+                    m_temp_reg_7 = _mm_add_epi32(m_temp_reg_7, m_rdng_factor);
+
+                    //o3:1B*18-3B*50,1T*18-3T*50
+                    m_temp_reg_36 = _mm_madd_epi16(m_temp_reg_0, m_coeff3);
+                    m_temp_reg_37 = _mm_madd_epi16(m_temp_reg_1, m_coeff3);
+
+                    m_temp_reg_2 = _mm_srai_epi32(m_temp_reg_2, i4_shift);
+                    m_temp_reg_3 = _mm_srai_epi32(m_temp_reg_3, i4_shift);
+                    m_temp_reg_6 = _mm_srai_epi32(m_temp_reg_6, i4_shift);
+                    m_temp_reg_7 = _mm_srai_epi32(m_temp_reg_7, i4_shift);
+
+
+                    m_temp_reg_52 = _mm_packs_epi32(m_temp_reg_2, m_temp_reg_3);
+                    m_temp_reg_55 = _mm_packs_epi32(m_temp_reg_6, m_temp_reg_7);
+
+
+
+                    /* o3 stored in m_temp_reg_36 and m_temp_reg_37 */
+
+
+                    /* Column 3 of destination computed here */
+                    /* It is stored in m_temp_reg_53 */
+                    /* Column 4 of destination computed here */
+                    /* It is stored in m_temp_reg_54 */
+
+                    m_temp_reg_20 = _mm_add_epi32(m_temp_reg_46, m_temp_reg_36);
+                    m_temp_reg_22 = _mm_sub_epi32(m_temp_reg_46, m_temp_reg_36);
+
+                    m_temp_reg_21 = _mm_add_epi32(m_temp_reg_47, m_temp_reg_37);
+                    m_temp_reg_23 = _mm_sub_epi32(m_temp_reg_47, m_temp_reg_37);
+
+                    m_temp_reg_20 = _mm_add_epi32(m_temp_reg_20, m_rdng_factor);
+                    m_temp_reg_21 = _mm_add_epi32(m_temp_reg_21, m_rdng_factor);
+                    m_temp_reg_22 = _mm_add_epi32(m_temp_reg_22, m_rdng_factor);
+                    m_temp_reg_23 = _mm_add_epi32(m_temp_reg_23, m_rdng_factor);
+
+                    m_temp_reg_20 = _mm_srai_epi32(m_temp_reg_20, i4_shift);
+                    m_temp_reg_21 = _mm_srai_epi32(m_temp_reg_21, i4_shift);
+                    m_temp_reg_22 = _mm_srai_epi32(m_temp_reg_22, i4_shift);
+                    m_temp_reg_23 = _mm_srai_epi32(m_temp_reg_23, i4_shift);
+
+                    m_temp_reg_53 = _mm_packs_epi32(m_temp_reg_20, m_temp_reg_21);
+                    m_temp_reg_54 = _mm_packs_epi32(m_temp_reg_22, m_temp_reg_23);
+                }
+            }
+
+            /* Transpose of the destination 8x8 matrix done here */
+            /* and ultimately stored in registers m_temp_reg_50 to m_temp_reg_57 */
+            /* respectively */
+            {
+                m_temp_reg_10 = _mm_unpacklo_epi16(m_temp_reg_50, m_temp_reg_51);
+                m_temp_reg_11 = _mm_unpacklo_epi16(m_temp_reg_52, m_temp_reg_53);
+                m_temp_reg_14 = _mm_unpackhi_epi16(m_temp_reg_50, m_temp_reg_51);
+                m_temp_reg_15 = _mm_unpackhi_epi16(m_temp_reg_52, m_temp_reg_53);
+                m_temp_reg_0 = _mm_unpacklo_epi32(m_temp_reg_10, m_temp_reg_11);
+                m_temp_reg_1 = _mm_unpackhi_epi32(m_temp_reg_10, m_temp_reg_11);
+                m_temp_reg_2 = _mm_unpacklo_epi32(m_temp_reg_14, m_temp_reg_15);
+                m_temp_reg_3 = _mm_unpackhi_epi32(m_temp_reg_14, m_temp_reg_15);
+
+                m_temp_reg_12 = _mm_unpacklo_epi16(m_temp_reg_54, m_temp_reg_55);
+                m_temp_reg_13 = _mm_unpacklo_epi16(m_temp_reg_56, m_temp_reg_57);
+                m_temp_reg_16 = _mm_unpackhi_epi16(m_temp_reg_54, m_temp_reg_55);
+                m_temp_reg_17 = _mm_unpackhi_epi16(m_temp_reg_56, m_temp_reg_57);
+                m_temp_reg_4 = _mm_unpacklo_epi32(m_temp_reg_12, m_temp_reg_13);
+                m_temp_reg_5 = _mm_unpackhi_epi32(m_temp_reg_12, m_temp_reg_13);
+                m_temp_reg_6 = _mm_unpacklo_epi32(m_temp_reg_16, m_temp_reg_17);
+                m_temp_reg_7 = _mm_unpackhi_epi32(m_temp_reg_16, m_temp_reg_17);
+                m_temp_reg_10 = _mm_unpacklo_epi64(m_temp_reg_0, m_temp_reg_4);
+                m_temp_reg_11 = _mm_unpackhi_epi64(m_temp_reg_0, m_temp_reg_4);
+                m_temp_reg_12 = _mm_unpacklo_epi64(m_temp_reg_1, m_temp_reg_5);
+                m_temp_reg_13 = _mm_unpackhi_epi64(m_temp_reg_1, m_temp_reg_5);
+
+                m_temp_reg_14 = _mm_unpacklo_epi64(m_temp_reg_2, m_temp_reg_6);
+                m_temp_reg_15 = _mm_unpackhi_epi64(m_temp_reg_2, m_temp_reg_6);
+                m_temp_reg_16 = _mm_unpacklo_epi64(m_temp_reg_3, m_temp_reg_7);
+                m_temp_reg_17 = _mm_unpackhi_epi64(m_temp_reg_3, m_temp_reg_7);
+            }
+
+            /* Recon and store */
+            {
+                m_temp_reg_0 = _mm_loadl_epi64((__m128i *)pu1_pred);
+                pu1_pred += pred_strd;
+                m_temp_reg_1 = _mm_loadl_epi64((__m128i *)pu1_pred);
+                pu1_pred += pred_strd;
+                m_temp_reg_2 = _mm_loadl_epi64((__m128i *)pu1_pred);
+                pu1_pred += pred_strd;
+                m_temp_reg_3 = _mm_loadl_epi64((__m128i *)pu1_pred);
+                pu1_pred += pred_strd;
+                m_temp_reg_4 = _mm_loadl_epi64((__m128i *)pu1_pred);
+                pu1_pred += pred_strd;
+                m_temp_reg_5 = _mm_loadl_epi64((__m128i *)pu1_pred);
+                pu1_pred += pred_strd;
+                m_temp_reg_6 = _mm_loadl_epi64((__m128i *)pu1_pred);
+                pu1_pred += pred_strd;
+                m_temp_reg_7 = _mm_loadl_epi64((__m128i *)pu1_pred);
+
+                m_temp_reg_50 = _mm_setzero_si128();
+                m_temp_reg_0 = _mm_unpacklo_epi8(m_temp_reg_0, m_temp_reg_50);
+                m_temp_reg_1 = _mm_unpacklo_epi8(m_temp_reg_1, m_temp_reg_50);
+                m_temp_reg_2 = _mm_unpacklo_epi8(m_temp_reg_2, m_temp_reg_50);
+                m_temp_reg_3 = _mm_unpacklo_epi8(m_temp_reg_3, m_temp_reg_50);
+                m_temp_reg_4 = _mm_unpacklo_epi8(m_temp_reg_4, m_temp_reg_50);
+                m_temp_reg_5 = _mm_unpacklo_epi8(m_temp_reg_5, m_temp_reg_50);
+                m_temp_reg_6 = _mm_unpacklo_epi8(m_temp_reg_6, m_temp_reg_50);
+                m_temp_reg_7 = _mm_unpacklo_epi8(m_temp_reg_7, m_temp_reg_50);
+
+                m_temp_reg_50 = _mm_add_epi16(m_temp_reg_10, m_temp_reg_0);
+                m_temp_reg_51 = _mm_add_epi16(m_temp_reg_11, m_temp_reg_1);
+                m_temp_reg_52 = _mm_add_epi16(m_temp_reg_12, m_temp_reg_2);
+                m_temp_reg_53 = _mm_add_epi16(m_temp_reg_13, m_temp_reg_3);
+                m_temp_reg_54 = _mm_add_epi16(m_temp_reg_14, m_temp_reg_4);
+                m_temp_reg_55 = _mm_add_epi16(m_temp_reg_15, m_temp_reg_5);
+                m_temp_reg_56 = _mm_add_epi16(m_temp_reg_16, m_temp_reg_6);
+                m_temp_reg_57 = _mm_add_epi16(m_temp_reg_17, m_temp_reg_7);
+
+                m_temp_reg_50 = _mm_packus_epi16(m_temp_reg_50, m_temp_reg_50);
+                m_temp_reg_51 = _mm_packus_epi16(m_temp_reg_51, m_temp_reg_51);
+                m_temp_reg_52 = _mm_packus_epi16(m_temp_reg_52, m_temp_reg_52);
+                m_temp_reg_53 = _mm_packus_epi16(m_temp_reg_53, m_temp_reg_53);
+                m_temp_reg_54 = _mm_packus_epi16(m_temp_reg_54, m_temp_reg_54);
+                m_temp_reg_55 = _mm_packus_epi16(m_temp_reg_55, m_temp_reg_55);
+                m_temp_reg_56 = _mm_packus_epi16(m_temp_reg_56, m_temp_reg_56);
+                m_temp_reg_57 = _mm_packus_epi16(m_temp_reg_57, m_temp_reg_57);
+
+                _mm_storel_epi64((__m128i *)pu1_dst, m_temp_reg_50);
+                pu1_dst += dst_strd;
+                _mm_storel_epi64((__m128i *)pu1_dst, m_temp_reg_51);
+                pu1_dst += dst_strd;
+                _mm_storel_epi64((__m128i *)pu1_dst, m_temp_reg_52);
+                pu1_dst += dst_strd;
+                _mm_storel_epi64((__m128i *)pu1_dst, m_temp_reg_53);
+                pu1_dst += dst_strd;
+                _mm_storel_epi64((__m128i *)pu1_dst, m_temp_reg_54);
+                pu1_dst += dst_strd;
+                _mm_storel_epi64((__m128i *)pu1_dst, m_temp_reg_55);
+                pu1_dst += dst_strd;
+                _mm_storel_epi64((__m128i *)pu1_dst, m_temp_reg_56);
+                pu1_dst += dst_strd;
+                _mm_storel_epi64((__m128i *)pu1_dst, m_temp_reg_57);
+                pu1_dst += dst_strd;
+            }
+        }
+    }
+    else
+
+    {
+
+        /* ee0 is present in the registers m_temp_reg_10 and m_temp_reg_11 */
+        /* ee1 is present in the registers m_temp_reg_12 and m_temp_reg_13 */
+        if(!check_row_stage_1)
+        {
+            /* ee0 is present in the registers m_temp_reg_10 and m_temp_reg_11 */
+            /* ee1 is present in the registers m_temp_reg_12 and m_temp_reg_13 */
+            {
+                //Interleaving 0,4 row in 0 , 1 Rishab
+                /*coef2 for m_temp_reg_12 and m_temp_reg_13 , coef1 for m_temp_reg_10 and m_temp_reg_11*/
+                m_coeff2 = _mm_loadu_si128((__m128i *)&gai2_impeg2_idct_even_8_q15[3][0]);
+                m_coeff1 = _mm_loadu_si128((__m128i *)&gai2_impeg2_idct_even_8_q15[0][0]);
+
+                m_temp_reg_0 = _mm_unpacklo_epi16(m_temp_reg_70, m_temp_reg_74);
+                m_temp_reg_1 = _mm_unpackhi_epi16(m_temp_reg_70, m_temp_reg_74);
+
+                m_temp_reg_10 = _mm_madd_epi16(m_temp_reg_0, m_coeff1);
+                m_temp_reg_12 = _mm_madd_epi16(m_temp_reg_0, m_coeff2);
+
+
+                m_temp_reg_11 = _mm_madd_epi16(m_temp_reg_1, m_coeff1);
+                m_temp_reg_13 = _mm_madd_epi16(m_temp_reg_1, m_coeff2);
+            }
+
+
+            /* eo0 is present in the registers m_temp_reg_14 and m_temp_reg_15 */
+            /* eo1 is present in the registers m_temp_reg_16 and m_temp_reg_17 */
+            {
+
+                m_coeff1 = _mm_loadu_si128((__m128i *)&gai2_impeg2_idct_even_8_q15[1][0]); //sub 2B*36-6B*83 ,2T*36-6T*83
+                m_coeff2 = _mm_loadu_si128((__m128i *)&gai2_impeg2_idct_even_8_q15[2][0]); //add 2B*83+6B*36 ,2T*83+6T*36
+
+                /* Combining instructions to eliminate them based on zero_rows : Lokesh */
+                //Interleaving 2,6 row in 4, 5 Rishab
+                m_temp_reg_4 = _mm_unpacklo_epi16(m_temp_reg_72, m_temp_reg_76);
+                m_temp_reg_5 = _mm_unpackhi_epi16(m_temp_reg_72, m_temp_reg_76);
+
+                m_temp_reg_16 = _mm_madd_epi16(m_temp_reg_4, m_coeff1);
+                m_temp_reg_14 = _mm_madd_epi16(m_temp_reg_4, m_coeff2);
+
+                m_temp_reg_17 = _mm_madd_epi16(m_temp_reg_5, m_coeff1);
+                m_temp_reg_15 = _mm_madd_epi16(m_temp_reg_5, m_coeff2);
+
+
+
+                /* Loading coeff for computing o0, o1, o2 and o3 in the next block */
+
+                m_coeff3 = _mm_loadu_si128((__m128i *)&gai2_impeg2_idct_odd_8_q15[2][0]);
+                //m_coeff4 = _mm_loadu_si128((__m128i *) &gai2_impeg2_idct_odd_8_q15[3][0]);
+
+                m_coeff1 = _mm_loadu_si128((__m128i *)&gai2_impeg2_idct_odd_8_q15[0][0]);
+                //m_coeff2 = _mm_loadu_si128((__m128i *) &gai2_impeg2_idct_odd_8_q15[1][0]);
+
+            }
+
+            /* e */
+            {
+                /* e0 stored in m_temp_reg_40 and m_temp_reg_41 */
+                /* e1 stored in m_temp_reg_42 and m_temp_reg_43 */
+                /* e3 stored in m_temp_reg_46 and m_temp_reg_47 */
+                /* e2 stored in m_temp_reg_44 and m_temp_reg_45 */
+                m_temp_reg_42 = _mm_add_epi32(m_temp_reg_12, m_temp_reg_16);
+                m_temp_reg_44 = _mm_sub_epi32(m_temp_reg_12, m_temp_reg_16);
+
+                m_temp_reg_40 = _mm_add_epi32(m_temp_reg_10, m_temp_reg_14);
+                m_temp_reg_46 = _mm_sub_epi32(m_temp_reg_10, m_temp_reg_14);
+
+                m_temp_reg_43 = _mm_add_epi32(m_temp_reg_13, m_temp_reg_17);
+                m_temp_reg_45 = _mm_sub_epi32(m_temp_reg_13, m_temp_reg_17);
+
+                m_temp_reg_41 = _mm_add_epi32(m_temp_reg_11, m_temp_reg_15);
+                m_temp_reg_47 = _mm_sub_epi32(m_temp_reg_11, m_temp_reg_15);
+
+            }
+
+            /* o */
+            {
+
+                /* o0 stored in m_temp_reg_30 and m_temp_reg_31 */
+                {
+
+                    m_temp_reg_60 = _mm_unpacklo_epi16(m_temp_reg_71, m_temp_reg_73);
+                    m_temp_reg_61 = _mm_unpackhi_epi16(m_temp_reg_71, m_temp_reg_73);
+                    //o0:1B*89+3B*75,1T*89+3T*75
+                    m_temp_reg_30 = _mm_madd_epi16(m_temp_reg_60, m_coeff1);
+                    m_temp_reg_31 = _mm_madd_epi16(m_temp_reg_61, m_coeff1);
+
+                    m_rdng_factor = _mm_cvtsi32_si128((1 << (i4_shift - 1)));
+                    m_rdng_factor = _mm_shuffle_epi32(m_rdng_factor, 0x0000);
+
+                }
+
+                /* Column 0 of destination computed here */
+                /* It is stored in m_temp_reg_50 */
+                /* Column 7 of destination computed here */
+                /* It is stored in m_temp_reg_57 */
+                {
+
+
+                    m_temp_reg_62 = _mm_add_epi32(m_temp_reg_40, m_temp_reg_30);
+                    m_temp_reg_66 = _mm_sub_epi32(m_temp_reg_40, m_temp_reg_30);
+
+                    m_temp_reg_63 = _mm_add_epi32(m_temp_reg_41, m_temp_reg_31);
+                    m_temp_reg_67 = _mm_sub_epi32(m_temp_reg_41, m_temp_reg_31);
+
+                    m_temp_reg_62 = _mm_add_epi32(m_temp_reg_62, m_rdng_factor);
+                    m_temp_reg_63 = _mm_add_epi32(m_temp_reg_63, m_rdng_factor);
+                    m_temp_reg_66 = _mm_add_epi32(m_temp_reg_66, m_rdng_factor);
+                    m_temp_reg_67 = _mm_add_epi32(m_temp_reg_67, m_rdng_factor);
+
+                    m_temp_reg_62 = _mm_srai_epi32(m_temp_reg_62, i4_shift);
+                    m_temp_reg_63 = _mm_srai_epi32(m_temp_reg_63, i4_shift);
+                    m_temp_reg_66 = _mm_srai_epi32(m_temp_reg_66, i4_shift);
+                    m_temp_reg_67 = _mm_srai_epi32(m_temp_reg_67, i4_shift);
+
+                    //o1:1B*75-3B*18,1T*75-3T*18,5B*89+7B*50,5T*89+7T*50
+                    m_temp_reg_32 = _mm_madd_epi16(m_temp_reg_60, m_coeff3);
+                    m_temp_reg_33 = _mm_madd_epi16(m_temp_reg_61, m_coeff3);
+
+                    m_temp_reg_50 = _mm_packs_epi32(m_temp_reg_62, m_temp_reg_63);
+                    m_temp_reg_57 = _mm_packs_epi32(m_temp_reg_66, m_temp_reg_67);
+
+                    /* Loading coeff for computing o2  in the next block */
+
+                    m_coeff1 = _mm_loadu_si128((__m128i *)&gai2_impeg2_idct_odd_8_q15[4][0]);
+
+                }
+
+                /* Column 1 of destination computed here */
+                /* It is stored in m_temp_reg_51 */
+                /* Column 6 of destination computed here */
+                /* It is stored in m_temp_reg_56 */
+                {
+                    m_temp_reg_62 = _mm_add_epi32(m_temp_reg_42, m_temp_reg_32);
+                    m_temp_reg_66 = _mm_sub_epi32(m_temp_reg_42, m_temp_reg_32);
+
+                    m_temp_reg_63 = _mm_add_epi32(m_temp_reg_43, m_temp_reg_33);
+                    m_temp_reg_67 = _mm_sub_epi32(m_temp_reg_43, m_temp_reg_33);
+
+                    m_temp_reg_62 = _mm_add_epi32(m_temp_reg_62, m_rdng_factor);
+                    m_temp_reg_66 = _mm_add_epi32(m_temp_reg_66, m_rdng_factor);
+                    m_temp_reg_63 = _mm_add_epi32(m_temp_reg_63, m_rdng_factor);
+                    m_temp_reg_67 = _mm_add_epi32(m_temp_reg_67, m_rdng_factor);
+
+                    m_temp_reg_62 = _mm_srai_epi32(m_temp_reg_62, i4_shift);
+                    m_temp_reg_63 = _mm_srai_epi32(m_temp_reg_63, i4_shift);
+                    m_temp_reg_66 = _mm_srai_epi32(m_temp_reg_66, i4_shift);
+                    m_temp_reg_67 = _mm_srai_epi32(m_temp_reg_67, i4_shift);
+
+                    //o2:1B*50-3B*89,1T*50-3T*89
+                    m_temp_reg_34 = _mm_madd_epi16(m_temp_reg_60, m_coeff1);
+                    m_temp_reg_35 = _mm_madd_epi16(m_temp_reg_61, m_coeff1);
+
+                    m_temp_reg_51 = _mm_packs_epi32(m_temp_reg_62, m_temp_reg_63);
+                    m_temp_reg_56 = _mm_packs_epi32(m_temp_reg_66, m_temp_reg_67);
+
+
+                    /* o2 stored in m_temp_reg_34 and m_temp_reg_35 */
+
+
+                    /* Loading coeff for computing o3  in the next block */
+
+                    m_coeff3 = _mm_loadu_si128((__m128i *)&gai2_impeg2_idct_odd_8_q15[6][0]);
+
+                }
+
+                /* Column 2 of destination computed here */
+                /* It is stored in m_temp_reg_52 */
+                /* Column 5 of destination computed here */
+                /* It is stored in m_temp_reg_55 */
+                {
+                    m_temp_reg_62 = _mm_add_epi32(m_temp_reg_44, m_temp_reg_34);
+                    m_temp_reg_66 = _mm_sub_epi32(m_temp_reg_44, m_temp_reg_34);
+
+                    m_temp_reg_63 = _mm_add_epi32(m_temp_reg_45, m_temp_reg_35);
+                    m_temp_reg_67 = _mm_sub_epi32(m_temp_reg_45, m_temp_reg_35);
+
+                    m_temp_reg_62 = _mm_add_epi32(m_temp_reg_62, m_rdng_factor);
+                    m_temp_reg_63 = _mm_add_epi32(m_temp_reg_63, m_rdng_factor);
+                    m_temp_reg_66 = _mm_add_epi32(m_temp_reg_66, m_rdng_factor);
+                    m_temp_reg_67 = _mm_add_epi32(m_temp_reg_67, m_rdng_factor);
+
+                    m_temp_reg_62 = _mm_srai_epi32(m_temp_reg_62, i4_shift);
+                    m_temp_reg_63 = _mm_srai_epi32(m_temp_reg_63, i4_shift);
+                    m_temp_reg_66 = _mm_srai_epi32(m_temp_reg_66, i4_shift);
+                    m_temp_reg_67 = _mm_srai_epi32(m_temp_reg_67, i4_shift);
+
+                    //o3:1B*18-3B*50,1T*18-3T*50
+                    m_temp_reg_36 = _mm_madd_epi16(m_temp_reg_60, m_coeff3);
+                    m_temp_reg_37 = _mm_madd_epi16(m_temp_reg_61, m_coeff3);
+
+                    m_temp_reg_52 = _mm_packs_epi32(m_temp_reg_62, m_temp_reg_63);
+                    m_temp_reg_55 = _mm_packs_epi32(m_temp_reg_66, m_temp_reg_67);
+
+
+
+                    /* o3 stored in m_temp_reg_36 and m_temp_reg_37 */
+
+
+                }
+
+                /* Column 3 of destination computed here */
+                /* It is stored in m_temp_reg_53 */
+                /* Column 4 of destination computed here */
+                /* It is stored in m_temp_reg_54 */
+                {
+                    m_temp_reg_62 = _mm_add_epi32(m_temp_reg_46, m_temp_reg_36);
+                    m_temp_reg_66 = _mm_sub_epi32(m_temp_reg_46, m_temp_reg_36);
+
+                    m_temp_reg_63 = _mm_add_epi32(m_temp_reg_47, m_temp_reg_37);
+                    m_temp_reg_67 = _mm_sub_epi32(m_temp_reg_47, m_temp_reg_37);
+
+                    m_temp_reg_62 = _mm_add_epi32(m_temp_reg_62, m_rdng_factor);
+                    m_temp_reg_63 = _mm_add_epi32(m_temp_reg_63, m_rdng_factor);
+                    m_temp_reg_66 = _mm_add_epi32(m_temp_reg_66, m_rdng_factor);
+                    m_temp_reg_67 = _mm_add_epi32(m_temp_reg_67, m_rdng_factor);
+
+                    m_temp_reg_62 = _mm_srai_epi32(m_temp_reg_62, i4_shift);
+                    m_temp_reg_63 = _mm_srai_epi32(m_temp_reg_63, i4_shift);
+                    m_temp_reg_66 = _mm_srai_epi32(m_temp_reg_66, i4_shift);
+                    m_temp_reg_67 = _mm_srai_epi32(m_temp_reg_67, i4_shift);
+
+                    m_temp_reg_53 = _mm_packs_epi32(m_temp_reg_62, m_temp_reg_63);
+                    m_temp_reg_54 = _mm_packs_epi32(m_temp_reg_66, m_temp_reg_67);
+                }
+            }
+
+            /* Transpose of the destination 8x8 matrix done here */
+            /* and ultimately stored in registers m_temp_reg_50 to m_temp_reg_57 */
+            /* respectively */
+            {
+
+
+                m_temp_reg_10 = _mm_unpacklo_epi16(m_temp_reg_50, m_temp_reg_51);
+                m_temp_reg_11 = _mm_unpacklo_epi16(m_temp_reg_52, m_temp_reg_53);
+                m_temp_reg_14 = _mm_unpackhi_epi16(m_temp_reg_50, m_temp_reg_51);
+                m_temp_reg_15 = _mm_unpackhi_epi16(m_temp_reg_52, m_temp_reg_53);
+                m_temp_reg_0 = _mm_unpacklo_epi32(m_temp_reg_10, m_temp_reg_11);
+                m_temp_reg_1 = _mm_unpackhi_epi32(m_temp_reg_10, m_temp_reg_11);
+                m_temp_reg_2 = _mm_unpacklo_epi32(m_temp_reg_14, m_temp_reg_15);
+                m_temp_reg_3 = _mm_unpackhi_epi32(m_temp_reg_14, m_temp_reg_15);
+
+                m_temp_reg_12 = _mm_unpacklo_epi16(m_temp_reg_54, m_temp_reg_55);
+                m_temp_reg_13 = _mm_unpacklo_epi16(m_temp_reg_56, m_temp_reg_57);
+                m_temp_reg_16 = _mm_unpackhi_epi16(m_temp_reg_54, m_temp_reg_55);
+                m_temp_reg_17 = _mm_unpackhi_epi16(m_temp_reg_56, m_temp_reg_57);
+                m_temp_reg_4 = _mm_unpacklo_epi32(m_temp_reg_12, m_temp_reg_13);
+                m_temp_reg_5 = _mm_unpackhi_epi32(m_temp_reg_12, m_temp_reg_13);
+                m_temp_reg_6 = _mm_unpacklo_epi32(m_temp_reg_16, m_temp_reg_17);
+                m_temp_reg_7 = _mm_unpackhi_epi32(m_temp_reg_16, m_temp_reg_17);
+
+                m_temp_reg_50 = _mm_unpacklo_epi64(m_temp_reg_0, m_temp_reg_4);
+                m_temp_reg_51 = _mm_unpackhi_epi64(m_temp_reg_0, m_temp_reg_4);
+                m_temp_reg_52 = _mm_unpacklo_epi64(m_temp_reg_1, m_temp_reg_5);
+                m_temp_reg_53 = _mm_unpackhi_epi64(m_temp_reg_1, m_temp_reg_5);
+
+                m_temp_reg_54 = _mm_unpacklo_epi64(m_temp_reg_2, m_temp_reg_6);
+                m_temp_reg_55 = _mm_unpackhi_epi64(m_temp_reg_2, m_temp_reg_6);
+                m_temp_reg_56 = _mm_unpacklo_epi64(m_temp_reg_3, m_temp_reg_7);
+                m_temp_reg_57 = _mm_unpackhi_epi64(m_temp_reg_3, m_temp_reg_7);
+            }
+        }
+        else
+        {
+
+            /* ee0 is present in the registers m_temp_reg_10 and m_temp_reg_11 */
+            /* ee1 is present in the registers m_temp_reg_12 and m_temp_reg_13 */
+            {
+                //Interleaving 0,4 row in 0 , 1 Rishab
+                /*coef2 for m_temp_reg_12 and m_temp_reg_13 , coef1 for m_temp_reg_10 and m_temp_reg_11*/
+                m_coeff2 = _mm_loadu_si128((__m128i *)&gai2_impeg2_idct_even_8_q15[3][0]);
+                m_coeff1 = _mm_loadu_si128((__m128i *)&gai2_impeg2_idct_even_8_q15[0][0]);
+
+                m_temp_reg_0 = _mm_unpacklo_epi16(m_temp_reg_70, m_temp_reg_74);
+                m_temp_reg_1 = _mm_unpackhi_epi16(m_temp_reg_70, m_temp_reg_74);
+
+                m_temp_reg_10 = _mm_madd_epi16(m_temp_reg_0, m_coeff1);
+                m_temp_reg_12 = _mm_madd_epi16(m_temp_reg_0, m_coeff2);
+
+
+                m_temp_reg_11 = _mm_madd_epi16(m_temp_reg_1, m_coeff1);
+                m_temp_reg_13 = _mm_madd_epi16(m_temp_reg_1, m_coeff2);
+            }
+
+
+            /* eo0 is present in the registers m_temp_reg_14 and m_temp_reg_15 */
+            /* eo1 is present in the registers m_temp_reg_16 and m_temp_reg_17 */
+            {
+
+                m_coeff1 = _mm_loadu_si128((__m128i *)&gai2_impeg2_idct_even_8_q15[1][0]); //sub 2B*36-6B*83 ,2T*36-6T*83
+                m_coeff2 = _mm_loadu_si128((__m128i *)&gai2_impeg2_idct_even_8_q15[2][0]); //add 2B*83+6B*36 ,2T*83+6T*36
+
+                /* Combining instructions to eliminate them based on zero_rows : Lokesh */
+                //Interleaving 2,6 row in 4, 5 Rishab
+                m_temp_reg_4 = _mm_unpacklo_epi16(m_temp_reg_72, m_temp_reg_76);
+                m_temp_reg_5 = _mm_unpackhi_epi16(m_temp_reg_72, m_temp_reg_76);
+
+                m_temp_reg_16 = _mm_madd_epi16(m_temp_reg_4, m_coeff1);
+                m_temp_reg_14 = _mm_madd_epi16(m_temp_reg_4, m_coeff2);
+
+                m_temp_reg_17 = _mm_madd_epi16(m_temp_reg_5, m_coeff1);
+                m_temp_reg_15 = _mm_madd_epi16(m_temp_reg_5, m_coeff2);
+
+
+
+                /* Loading coeff for computing o0, o1, o2 and o3 in the next block */
+
+                m_coeff3 = _mm_loadu_si128((__m128i *)&gai2_impeg2_idct_odd_8_q15[2][0]);
+                m_coeff4 = _mm_loadu_si128((__m128i *)&gai2_impeg2_idct_odd_8_q15[3][0]);
+
+                m_coeff1 = _mm_loadu_si128((__m128i *)&gai2_impeg2_idct_odd_8_q15[0][0]);
+                m_coeff2 = _mm_loadu_si128((__m128i *)&gai2_impeg2_idct_odd_8_q15[1][0]);
+
+            }
+
+            /* e */
+            {
+                /* e0 stored in m_temp_reg_40 and m_temp_reg_41 */
+                /* e1 stored in m_temp_reg_42 and m_temp_reg_43 */
+                /* e3 stored in m_temp_reg_46 and m_temp_reg_47 */
+                /* e2 stored in m_temp_reg_44 and m_temp_reg_45 */
+                m_temp_reg_42 = _mm_add_epi32(m_temp_reg_12, m_temp_reg_16);
+                m_temp_reg_44 = _mm_sub_epi32(m_temp_reg_12, m_temp_reg_16);
+
+                m_temp_reg_40 = _mm_add_epi32(m_temp_reg_10, m_temp_reg_14);
+                m_temp_reg_46 = _mm_sub_epi32(m_temp_reg_10, m_temp_reg_14);
+
+                m_temp_reg_43 = _mm_add_epi32(m_temp_reg_13, m_temp_reg_17);
+                m_temp_reg_45 = _mm_sub_epi32(m_temp_reg_13, m_temp_reg_17);
+
+                m_temp_reg_41 = _mm_add_epi32(m_temp_reg_11, m_temp_reg_15);
+                m_temp_reg_47 = _mm_sub_epi32(m_temp_reg_11, m_temp_reg_15);
+
+            }
+
+            /* o */
+            {
+
+                /* o0 stored in m_temp_reg_30 and m_temp_reg_31 */
+                {
+
+                    m_temp_reg_60 = _mm_unpacklo_epi16(m_temp_reg_71, m_temp_reg_73);
+                    m_temp_reg_61 = _mm_unpackhi_epi16(m_temp_reg_71, m_temp_reg_73);
+                    m_temp_reg_64 = _mm_unpacklo_epi16(m_temp_reg_75, m_temp_reg_77);
+                    m_temp_reg_65 = _mm_unpackhi_epi16(m_temp_reg_75, m_temp_reg_77);
+                    //o0:1B*89+3B*75,1T*89+3T*75,5B*50+7B*18,5T*50+7T*18
+                    m_temp_reg_20 = _mm_madd_epi16(m_temp_reg_60, m_coeff1);
+                    m_temp_reg_21 = _mm_madd_epi16(m_temp_reg_61, m_coeff1);
+                    m_temp_reg_24 = _mm_madd_epi16(m_temp_reg_64, m_coeff2);
+                    m_temp_reg_25 = _mm_madd_epi16(m_temp_reg_65, m_coeff2);
+
+
+                    m_rdng_factor = _mm_cvtsi32_si128((1 << (i4_shift - 1)));
+                    m_rdng_factor = _mm_shuffle_epi32(m_rdng_factor, 0x0000);
+
+                    m_temp_reg_30 = _mm_add_epi32(m_temp_reg_20, m_temp_reg_24);
+                    m_temp_reg_31 = _mm_add_epi32(m_temp_reg_21, m_temp_reg_25);
+                }
+
+                /* Column 0 of destination computed here */
+                /* It is stored in m_temp_reg_50 */
+                /* Column 7 of destination computed here */
+                /* It is stored in m_temp_reg_57 */
+                {
+
+
+                    m_temp_reg_62 = _mm_add_epi32(m_temp_reg_40, m_temp_reg_30);
+                    m_temp_reg_66 = _mm_sub_epi32(m_temp_reg_40, m_temp_reg_30);
+
+                    m_temp_reg_63 = _mm_add_epi32(m_temp_reg_41, m_temp_reg_31);
+                    m_temp_reg_67 = _mm_sub_epi32(m_temp_reg_41, m_temp_reg_31);
+
+                    m_temp_reg_62 = _mm_add_epi32(m_temp_reg_62, m_rdng_factor);
+                    m_temp_reg_63 = _mm_add_epi32(m_temp_reg_63, m_rdng_factor);
+                    m_temp_reg_66 = _mm_add_epi32(m_temp_reg_66, m_rdng_factor);
+                    m_temp_reg_67 = _mm_add_epi32(m_temp_reg_67, m_rdng_factor);
+
+                    m_temp_reg_62 = _mm_srai_epi32(m_temp_reg_62, i4_shift);
+                    m_temp_reg_63 = _mm_srai_epi32(m_temp_reg_63, i4_shift);
+                    m_temp_reg_66 = _mm_srai_epi32(m_temp_reg_66, i4_shift);
+                    m_temp_reg_67 = _mm_srai_epi32(m_temp_reg_67, i4_shift);
+
+                    //o1:1B*75-3B*18,1T*75-3T*18,5B*89+7B*50,5T*89+7T*50
+                    m_temp_reg_22 = _mm_madd_epi16(m_temp_reg_60, m_coeff3);
+                    m_temp_reg_26 = _mm_madd_epi16(m_temp_reg_64, m_coeff4);
+                    m_temp_reg_23 = _mm_madd_epi16(m_temp_reg_61, m_coeff3);
+                    m_temp_reg_27 = _mm_madd_epi16(m_temp_reg_65, m_coeff4);
+
+                    m_temp_reg_50 = _mm_packs_epi32(m_temp_reg_62, m_temp_reg_63);
+                    m_temp_reg_57 = _mm_packs_epi32(m_temp_reg_66, m_temp_reg_67);
+
+                    /* Loading coeff for computing o2  in the next block */
+
+                    m_coeff1 = _mm_loadu_si128((__m128i *)&gai2_impeg2_idct_odd_8_q15[4][0]);
+                    m_coeff2 = _mm_loadu_si128((__m128i *)&gai2_impeg2_idct_odd_8_q15[5][0]);
+
+                    /* o1 stored in m_temp_reg_32 and m_temp_reg_33 */
+                    m_temp_reg_32 = _mm_sub_epi32(m_temp_reg_22, m_temp_reg_26);
+                    m_temp_reg_33 = _mm_sub_epi32(m_temp_reg_23, m_temp_reg_27);
+                }
+
+                /* Column 1 of destination computed here */
+                /* It is stored in m_temp_reg_51 */
+                /* Column 6 of destination computed here */
+                /* It is stored in m_temp_reg_56 */
+                {
+                    m_temp_reg_62 = _mm_add_epi32(m_temp_reg_42, m_temp_reg_32);
+                    m_temp_reg_66 = _mm_sub_epi32(m_temp_reg_42, m_temp_reg_32);
+
+                    m_temp_reg_63 = _mm_add_epi32(m_temp_reg_43, m_temp_reg_33);
+                    m_temp_reg_67 = _mm_sub_epi32(m_temp_reg_43, m_temp_reg_33);
+
+                    m_temp_reg_62 = _mm_add_epi32(m_temp_reg_62, m_rdng_factor);
+                    m_temp_reg_66 = _mm_add_epi32(m_temp_reg_66, m_rdng_factor);
+                    m_temp_reg_63 = _mm_add_epi32(m_temp_reg_63, m_rdng_factor);
+                    m_temp_reg_67 = _mm_add_epi32(m_temp_reg_67, m_rdng_factor);
+
+                    m_temp_reg_62 = _mm_srai_epi32(m_temp_reg_62, i4_shift);
+                    m_temp_reg_63 = _mm_srai_epi32(m_temp_reg_63, i4_shift);
+                    m_temp_reg_66 = _mm_srai_epi32(m_temp_reg_66, i4_shift);
+                    m_temp_reg_67 = _mm_srai_epi32(m_temp_reg_67, i4_shift);
+
+                    //o2:1B*50-3B*89,1T*50-3T*89,5B*18+7B*75,5T*18+7T*75
+                    m_temp_reg_20 = _mm_madd_epi16(m_temp_reg_60, m_coeff1);
+                    m_temp_reg_24 = _mm_madd_epi16(m_temp_reg_64, m_coeff2);
+                    m_temp_reg_21 = _mm_madd_epi16(m_temp_reg_61, m_coeff1);
+                    m_temp_reg_25 = _mm_madd_epi16(m_temp_reg_65, m_coeff2);
+
+                    m_temp_reg_51 = _mm_packs_epi32(m_temp_reg_62, m_temp_reg_63);
+                    m_temp_reg_56 = _mm_packs_epi32(m_temp_reg_66, m_temp_reg_67);
+
+
+                    /* o2 stored in m_temp_reg_34 and m_temp_reg_35 */
+
+
+                    /* Loading coeff for computing o3  in the next block */
+
+                    m_coeff3 = _mm_loadu_si128((__m128i *)&gai2_impeg2_idct_odd_8_q15[6][0]);
+                    m_coeff4 = _mm_loadu_si128((__m128i *)&gai2_impeg2_idct_odd_8_q15[7][0]);
+
+                    m_temp_reg_34 = _mm_add_epi32(m_temp_reg_20, m_temp_reg_24);
+                    m_temp_reg_35 = _mm_add_epi32(m_temp_reg_21, m_temp_reg_25);
+                }
+
+                /* Column 2 of destination computed here */
+                /* It is stored in m_temp_reg_52 */
+                /* Column 5 of destination computed here */
+                /* It is stored in m_temp_reg_55 */
+                {
+                    m_temp_reg_62 = _mm_add_epi32(m_temp_reg_44, m_temp_reg_34);
+                    m_temp_reg_66 = _mm_sub_epi32(m_temp_reg_44, m_temp_reg_34);
+
+                    m_temp_reg_63 = _mm_add_epi32(m_temp_reg_45, m_temp_reg_35);
+                    m_temp_reg_67 = _mm_sub_epi32(m_temp_reg_45, m_temp_reg_35);
+
+                    m_temp_reg_62 = _mm_add_epi32(m_temp_reg_62, m_rdng_factor);
+                    m_temp_reg_63 = _mm_add_epi32(m_temp_reg_63, m_rdng_factor);
+                    m_temp_reg_66 = _mm_add_epi32(m_temp_reg_66, m_rdng_factor);
+                    m_temp_reg_67 = _mm_add_epi32(m_temp_reg_67, m_rdng_factor);
+
+                    m_temp_reg_62 = _mm_srai_epi32(m_temp_reg_62, i4_shift);
+                    m_temp_reg_63 = _mm_srai_epi32(m_temp_reg_63, i4_shift);
+                    m_temp_reg_66 = _mm_srai_epi32(m_temp_reg_66, i4_shift);
+                    m_temp_reg_67 = _mm_srai_epi32(m_temp_reg_67, i4_shift);
+
+                    //o3:1B*18-3B*50,1T*18-3T*50,5B*75-7B*89,5T*75-7T*89
+                    m_temp_reg_22 = _mm_madd_epi16(m_temp_reg_60, m_coeff3);
+                    m_temp_reg_26 = _mm_madd_epi16(m_temp_reg_64, m_coeff4);
+                    m_temp_reg_23 = _mm_madd_epi16(m_temp_reg_61, m_coeff3);
+                    m_temp_reg_27 = _mm_madd_epi16(m_temp_reg_65, m_coeff4);
+
+                    m_temp_reg_52 = _mm_packs_epi32(m_temp_reg_62, m_temp_reg_63);
+                    m_temp_reg_55 = _mm_packs_epi32(m_temp_reg_66, m_temp_reg_67);
+
+
+
+                    /* o3 stored in m_temp_reg_36 and m_temp_reg_37 */
+
+
+                    m_temp_reg_36 = _mm_add_epi32(m_temp_reg_22, m_temp_reg_26);
+                    m_temp_reg_37 = _mm_add_epi32(m_temp_reg_23, m_temp_reg_27);
+                }
+
+                /* Column 3 of destination computed here */
+                /* It is stored in m_temp_reg_53 */
+                /* Column 4 of destination computed here */
+                /* It is stored in m_temp_reg_54 */
+                {
+                    m_temp_reg_62 = _mm_add_epi32(m_temp_reg_46, m_temp_reg_36);
+                    m_temp_reg_66 = _mm_sub_epi32(m_temp_reg_46, m_temp_reg_36);
+
+                    m_temp_reg_63 = _mm_add_epi32(m_temp_reg_47, m_temp_reg_37);
+                    m_temp_reg_67 = _mm_sub_epi32(m_temp_reg_47, m_temp_reg_37);
+
+                    m_temp_reg_62 = _mm_add_epi32(m_temp_reg_62, m_rdng_factor);
+                    m_temp_reg_63 = _mm_add_epi32(m_temp_reg_63, m_rdng_factor);
+                    m_temp_reg_66 = _mm_add_epi32(m_temp_reg_66, m_rdng_factor);
+                    m_temp_reg_67 = _mm_add_epi32(m_temp_reg_67, m_rdng_factor);
+
+                    m_temp_reg_62 = _mm_srai_epi32(m_temp_reg_62, i4_shift);
+                    m_temp_reg_63 = _mm_srai_epi32(m_temp_reg_63, i4_shift);
+                    m_temp_reg_66 = _mm_srai_epi32(m_temp_reg_66, i4_shift);
+                    m_temp_reg_67 = _mm_srai_epi32(m_temp_reg_67, i4_shift);
+
+                    m_temp_reg_53 = _mm_packs_epi32(m_temp_reg_62, m_temp_reg_63);
+                    m_temp_reg_54 = _mm_packs_epi32(m_temp_reg_66, m_temp_reg_67);
+                }
+            }
+
+            /* Transpose of the destination 8x8 matrix done here */
+            /* and ultimately stored in registers m_temp_reg_50 to m_temp_reg_57 */
+            /* respectively */
+            {
+
+
+                m_temp_reg_10 = _mm_unpacklo_epi16(m_temp_reg_50, m_temp_reg_51);
+                m_temp_reg_11 = _mm_unpacklo_epi16(m_temp_reg_52, m_temp_reg_53);
+                m_temp_reg_14 = _mm_unpackhi_epi16(m_temp_reg_50, m_temp_reg_51);
+                m_temp_reg_15 = _mm_unpackhi_epi16(m_temp_reg_52, m_temp_reg_53);
+                m_temp_reg_0 = _mm_unpacklo_epi32(m_temp_reg_10, m_temp_reg_11);
+                m_temp_reg_1 = _mm_unpackhi_epi32(m_temp_reg_10, m_temp_reg_11);
+                m_temp_reg_2 = _mm_unpacklo_epi32(m_temp_reg_14, m_temp_reg_15);
+                m_temp_reg_3 = _mm_unpackhi_epi32(m_temp_reg_14, m_temp_reg_15);
+
+                m_temp_reg_12 = _mm_unpacklo_epi16(m_temp_reg_54, m_temp_reg_55);
+                m_temp_reg_13 = _mm_unpacklo_epi16(m_temp_reg_56, m_temp_reg_57);
+                m_temp_reg_16 = _mm_unpackhi_epi16(m_temp_reg_54, m_temp_reg_55);
+                m_temp_reg_17 = _mm_unpackhi_epi16(m_temp_reg_56, m_temp_reg_57);
+                m_temp_reg_4 = _mm_unpacklo_epi32(m_temp_reg_12, m_temp_reg_13);
+                m_temp_reg_5 = _mm_unpackhi_epi32(m_temp_reg_12, m_temp_reg_13);
+                m_temp_reg_6 = _mm_unpacklo_epi32(m_temp_reg_16, m_temp_reg_17);
+                m_temp_reg_7 = _mm_unpackhi_epi32(m_temp_reg_16, m_temp_reg_17);
+
+                m_temp_reg_50 = _mm_unpacklo_epi64(m_temp_reg_0, m_temp_reg_4);
+                m_temp_reg_51 = _mm_unpackhi_epi64(m_temp_reg_0, m_temp_reg_4);
+                m_temp_reg_52 = _mm_unpacklo_epi64(m_temp_reg_1, m_temp_reg_5);
+                m_temp_reg_53 = _mm_unpackhi_epi64(m_temp_reg_1, m_temp_reg_5);
+
+                m_temp_reg_54 = _mm_unpacklo_epi64(m_temp_reg_2, m_temp_reg_6);
+                m_temp_reg_55 = _mm_unpackhi_epi64(m_temp_reg_2, m_temp_reg_6);
+                m_temp_reg_56 = _mm_unpacklo_epi64(m_temp_reg_3, m_temp_reg_7);
+                m_temp_reg_57 = _mm_unpackhi_epi64(m_temp_reg_3, m_temp_reg_7);
+            }
+        }
+        /* Stage 2 */
+
+        i4_shift = IDCT_STG2_SHIFT;
+
+        {
+
+            /* ee0 is present in the registers m_temp_reg_10 and m_temp_reg_11 */
+            /* ee1 is present in the registers m_temp_reg_12 and m_temp_reg_13 */
+            {
+                m_coeff1 = _mm_loadu_si128((__m128i *)&gai2_impeg2_idct_even_8_q11[0][0]); //add
+                m_coeff2 = _mm_loadu_si128((__m128i *)&gai2_impeg2_idct_even_8_q11[3][0]); //sub
+
+                m_temp_reg_0 = _mm_unpacklo_epi16(m_temp_reg_50, m_temp_reg_54);
+                m_temp_reg_1 = _mm_unpackhi_epi16(m_temp_reg_50, m_temp_reg_54);
+
+                m_temp_reg_10 = _mm_madd_epi16(m_temp_reg_0, m_coeff1);
+                m_temp_reg_12 = _mm_madd_epi16(m_temp_reg_0, m_coeff2);
+                m_temp_reg_11 = _mm_madd_epi16(m_temp_reg_1, m_coeff1);
+                m_temp_reg_13 = _mm_madd_epi16(m_temp_reg_1, m_coeff2);
+
+
+                m_coeff1 = _mm_loadu_si128((__m128i *)&gai2_impeg2_idct_even_8_q11[1][0]);
+                m_coeff2 = _mm_loadu_si128((__m128i *)&gai2_impeg2_idct_even_8_q11[2][0]);
+            }
+
+
+            /* eo0 is present in the registers m_temp_reg_14 and m_temp_reg_15 */
+            /* eo1 is present in the registers m_temp_reg_16 and m_temp_reg_17 */
+            {
+                m_temp_reg_0 = _mm_unpacklo_epi16(m_temp_reg_52, m_temp_reg_56);
+                m_temp_reg_1 = _mm_unpackhi_epi16(m_temp_reg_52, m_temp_reg_56);
+
+
+                m_temp_reg_16 = _mm_madd_epi16(m_temp_reg_0, m_coeff1);
+                m_temp_reg_14 = _mm_madd_epi16(m_temp_reg_0, m_coeff2);
+                m_temp_reg_17 = _mm_madd_epi16(m_temp_reg_1, m_coeff1);
+                m_temp_reg_15 = _mm_madd_epi16(m_temp_reg_1, m_coeff2);
+
+                /* Loading coeff for computing o0 in the next block */
+                m_coeff1 = _mm_loadu_si128((__m128i *)&gai2_impeg2_idct_odd_8_q11[0][0]);
+                m_coeff2 = _mm_loadu_si128((__m128i *)&gai2_impeg2_idct_odd_8_q11[1][0]);
+
+
+                m_temp_reg_0 = _mm_unpacklo_epi16(m_temp_reg_51, m_temp_reg_53);
+                m_temp_reg_1 = _mm_unpackhi_epi16(m_temp_reg_51, m_temp_reg_53);
+            }
+
+            /* e */
+            {
+                /* e0 stored in m_temp_reg_40 and m_temp_reg_41 */
+                /* e1 stored in m_temp_reg_42 and m_temp_reg_43 */
+                /* e3 stored in m_temp_reg_46 and m_temp_reg_47 */
+                /* e2 stored in m_temp_reg_44 and m_temp_reg_45 */
+                m_temp_reg_42 = _mm_add_epi32(m_temp_reg_12, m_temp_reg_16);
+                m_temp_reg_44 = _mm_sub_epi32(m_temp_reg_12, m_temp_reg_16);
+
+                m_temp_reg_40 = _mm_add_epi32(m_temp_reg_10, m_temp_reg_14);
+                m_temp_reg_46 = _mm_sub_epi32(m_temp_reg_10, m_temp_reg_14);
+
+                m_temp_reg_43 = _mm_add_epi32(m_temp_reg_13, m_temp_reg_17);
+                m_temp_reg_45 = _mm_sub_epi32(m_temp_reg_13, m_temp_reg_17);
+
+                m_temp_reg_41 = _mm_add_epi32(m_temp_reg_11, m_temp_reg_15);
+                m_temp_reg_47 = _mm_sub_epi32(m_temp_reg_11, m_temp_reg_15);
+
+            }
+
+            /* o */
+            {
+                m_temp_reg_4 = _mm_unpacklo_epi16(m_temp_reg_55, m_temp_reg_57);
+                m_temp_reg_5 = _mm_unpackhi_epi16(m_temp_reg_55, m_temp_reg_57);
+
+                /* o0 stored in m_temp_reg_30 and m_temp_reg_31 */
+                {
+                    //o0:1B*89+3B*75,1T*89+3T*75,5B*50+7B*18,5T*50+7T*18
+                    m_temp_reg_20 = _mm_madd_epi16(m_temp_reg_0, m_coeff1);
+                    m_temp_reg_21 = _mm_madd_epi16(m_temp_reg_1, m_coeff1);
+                    m_temp_reg_24 = _mm_madd_epi16(m_temp_reg_4, m_coeff2);
+                    m_temp_reg_25 = _mm_madd_epi16(m_temp_reg_5, m_coeff2);
+
+                    m_rdng_factor = _mm_cvtsi32_si128((1 << (i4_shift - 1)));
+                    m_rdng_factor = _mm_shuffle_epi32(m_rdng_factor, 0x0000);
+                    /* Loading coeff for computing o1 in the next block */
+                    m_coeff3 = _mm_loadu_si128((__m128i *)&gai2_impeg2_idct_odd_8_q11[2][0]);
+                    m_coeff4 = _mm_loadu_si128((__m128i *)&gai2_impeg2_idct_odd_8_q11[3][0]);
+
+                    m_temp_reg_30 = _mm_add_epi32(m_temp_reg_20, m_temp_reg_24);
+                    m_temp_reg_31 = _mm_add_epi32(m_temp_reg_21, m_temp_reg_25);
+                }
+
+                /* Column 0 of destination computed here */
+                /* It is stored in m_temp_reg_50 */
+                /* Column 7 of destination computed here */
+                /* It is stored in m_temp_reg_57 */
+                {
+                    m_temp_reg_2 = _mm_add_epi32(m_temp_reg_40, m_temp_reg_30);
+                    m_temp_reg_6 = _mm_sub_epi32(m_temp_reg_40, m_temp_reg_30);
+
+                    m_temp_reg_3 = _mm_add_epi32(m_temp_reg_41, m_temp_reg_31);
+                    m_temp_reg_7 = _mm_sub_epi32(m_temp_reg_41, m_temp_reg_31);
+
+                    m_temp_reg_2 = _mm_add_epi32(m_temp_reg_2, m_rdng_factor);
+                    m_temp_reg_3 = _mm_add_epi32(m_temp_reg_3, m_rdng_factor);
+                    m_temp_reg_6 = _mm_add_epi32(m_temp_reg_6, m_rdng_factor);
+                    m_temp_reg_7 = _mm_add_epi32(m_temp_reg_7, m_rdng_factor);
+
+                    m_temp_reg_2 = _mm_srai_epi32(m_temp_reg_2, i4_shift);
+                    m_temp_reg_3 = _mm_srai_epi32(m_temp_reg_3, i4_shift);
+                    m_temp_reg_6 = _mm_srai_epi32(m_temp_reg_6, i4_shift);
+                    m_temp_reg_7 = _mm_srai_epi32(m_temp_reg_7, i4_shift);
+
+                    //o1:1B*75-3B*18,1T*75-3T*18,5B*89+7B*50,5T*89+7T*50
+                    m_temp_reg_22 = _mm_madd_epi16(m_temp_reg_0, m_coeff3);
+                    m_temp_reg_26 = _mm_madd_epi16(m_temp_reg_4, m_coeff4);
+                    m_temp_reg_23 = _mm_madd_epi16(m_temp_reg_1, m_coeff3);
+                    m_temp_reg_27 = _mm_madd_epi16(m_temp_reg_5, m_coeff4);
+
+                    m_temp_reg_50 = _mm_packs_epi32(m_temp_reg_2, m_temp_reg_3);
+                    m_temp_reg_57 = _mm_packs_epi32(m_temp_reg_6, m_temp_reg_7);
+
+
+                    /* o1 stored in m_temp_reg_32 and m_temp_reg_33 */
+
+
+                    /* Loading coeff for computing o2  in the next block */
+                    m_coeff1 = _mm_loadu_si128((__m128i *)&gai2_impeg2_idct_odd_8_q11[4][0]);
+                    m_coeff2 = _mm_loadu_si128((__m128i *)&gai2_impeg2_idct_odd_8_q11[5][0]);
+
+                    m_temp_reg_32 = _mm_sub_epi32(m_temp_reg_22, m_temp_reg_26);
+                    m_temp_reg_33 = _mm_sub_epi32(m_temp_reg_23, m_temp_reg_27);
+                }
+
+                /* Column 1 of destination computed here */
+                /* It is stored in m_temp_reg_51 */
+                /* Column 6 of destination computed here */
+                /* It is stored in m_temp_reg_56 */
+                {
+                    m_temp_reg_2 = _mm_add_epi32(m_temp_reg_42, m_temp_reg_32);
+                    m_temp_reg_6 = _mm_sub_epi32(m_temp_reg_42, m_temp_reg_32);
+
+                    m_temp_reg_3 = _mm_add_epi32(m_temp_reg_43, m_temp_reg_33);
+                    m_temp_reg_7 = _mm_sub_epi32(m_temp_reg_43, m_temp_reg_33);
+
+                    m_temp_reg_2 = _mm_add_epi32(m_temp_reg_2, m_rdng_factor);
+                    m_temp_reg_3 = _mm_add_epi32(m_temp_reg_3, m_rdng_factor);
+                    m_temp_reg_6 = _mm_add_epi32(m_temp_reg_6, m_rdng_factor);
+                    m_temp_reg_7 = _mm_add_epi32(m_temp_reg_7, m_rdng_factor);
+
+                    m_temp_reg_2 = _mm_srai_epi32(m_temp_reg_2, i4_shift);
+                    m_temp_reg_3 = _mm_srai_epi32(m_temp_reg_3, i4_shift);
+                    m_temp_reg_6 = _mm_srai_epi32(m_temp_reg_6, i4_shift);
+                    m_temp_reg_7 = _mm_srai_epi32(m_temp_reg_7, i4_shift);
+
+                    //o2:1B*50-3B*89,1T*50-3T*89,5B*18+7B*75,5T*18+7T*75
+                    m_temp_reg_20 = _mm_madd_epi16(m_temp_reg_0, m_coeff1);
+                    m_temp_reg_24 = _mm_madd_epi16(m_temp_reg_4, m_coeff2);
+                    m_temp_reg_21 = _mm_madd_epi16(m_temp_reg_1, m_coeff1);
+                    m_temp_reg_25 = _mm_madd_epi16(m_temp_reg_5, m_coeff2);
+
+                    m_temp_reg_51 = _mm_packs_epi32(m_temp_reg_2, m_temp_reg_3);
+                    m_temp_reg_56 = _mm_packs_epi32(m_temp_reg_6, m_temp_reg_7);
+
+
+                    /* o2 stored in m_temp_reg_34 and m_temp_reg_35 */
+
+                    /* Loading coeff for computing o3  in the next block */
+
+                    m_coeff3 = _mm_loadu_si128((__m128i *)&gai2_impeg2_idct_odd_8_q11[6][0]);
+                    m_coeff4 = _mm_loadu_si128((__m128i *)&gai2_impeg2_idct_odd_8_q11[7][0]);
+
+                    m_temp_reg_34 = _mm_add_epi32(m_temp_reg_20, m_temp_reg_24);
+                    m_temp_reg_35 = _mm_add_epi32(m_temp_reg_21, m_temp_reg_25);
+                }
+
+                /* Column 2 of destination computed here */
+                /* It is stored in m_temp_reg_52 */
+                /* Column 5 of destination computed here */
+                /* It is stored in m_temp_reg_55 */
+                {
+                    m_temp_reg_2 = _mm_add_epi32(m_temp_reg_44, m_temp_reg_34);
+                    m_temp_reg_6 = _mm_sub_epi32(m_temp_reg_44, m_temp_reg_34);
+
+                    m_temp_reg_3 = _mm_add_epi32(m_temp_reg_45, m_temp_reg_35);
+                    m_temp_reg_7 = _mm_sub_epi32(m_temp_reg_45, m_temp_reg_35);
+
+                    m_temp_reg_2 = _mm_add_epi32(m_temp_reg_2, m_rdng_factor);
+                    m_temp_reg_3 = _mm_add_epi32(m_temp_reg_3, m_rdng_factor);
+                    m_temp_reg_6 = _mm_add_epi32(m_temp_reg_6, m_rdng_factor);
+                    m_temp_reg_7 = _mm_add_epi32(m_temp_reg_7, m_rdng_factor);
+
+                    m_temp_reg_2 = _mm_srai_epi32(m_temp_reg_2, i4_shift);
+                    m_temp_reg_3 = _mm_srai_epi32(m_temp_reg_3, i4_shift);
+                    m_temp_reg_6 = _mm_srai_epi32(m_temp_reg_6, i4_shift);
+                    m_temp_reg_7 = _mm_srai_epi32(m_temp_reg_7, i4_shift);
+
+                    //o3:1B*18-3B*50,1T*18-3T*50,5B*75-7B*89,5T*75-7T*89
+                    m_temp_reg_22 = _mm_madd_epi16(m_temp_reg_0, m_coeff3);
+                    m_temp_reg_26 = _mm_madd_epi16(m_temp_reg_4, m_coeff4);
+                    m_temp_reg_23 = _mm_madd_epi16(m_temp_reg_1, m_coeff3);
+                    m_temp_reg_27 = _mm_madd_epi16(m_temp_reg_5, m_coeff4);
+
+                    m_temp_reg_52 = _mm_packs_epi32(m_temp_reg_2, m_temp_reg_3);
+                    m_temp_reg_55 = _mm_packs_epi32(m_temp_reg_6, m_temp_reg_7);
+
+
+
+                    /* o3 stored in m_temp_reg_36 and m_temp_reg_37 */
+
+
+                    m_temp_reg_36 = _mm_add_epi32(m_temp_reg_22, m_temp_reg_26);
+                    m_temp_reg_37 = _mm_add_epi32(m_temp_reg_23, m_temp_reg_27);
+                }
+
+                /* Column 3 of destination computed here */
+                /* It is stored in m_temp_reg_53 */
+                /* Column 4 of destination computed here */
+                /* It is stored in m_temp_reg_54 */
+                {
+                    m_temp_reg_20 = _mm_add_epi32(m_temp_reg_46, m_temp_reg_36);
+                    m_temp_reg_22 = _mm_sub_epi32(m_temp_reg_46, m_temp_reg_36);
+
+                    m_temp_reg_21 = _mm_add_epi32(m_temp_reg_47, m_temp_reg_37);
+                    m_temp_reg_23 = _mm_sub_epi32(m_temp_reg_47, m_temp_reg_37);
+
+                    m_temp_reg_20 = _mm_add_epi32(m_temp_reg_20, m_rdng_factor);
+                    m_temp_reg_21 = _mm_add_epi32(m_temp_reg_21, m_rdng_factor);
+                    m_temp_reg_22 = _mm_add_epi32(m_temp_reg_22, m_rdng_factor);
+                    m_temp_reg_23 = _mm_add_epi32(m_temp_reg_23, m_rdng_factor);
+
+                    m_temp_reg_20 = _mm_srai_epi32(m_temp_reg_20, i4_shift);
+                    m_temp_reg_21 = _mm_srai_epi32(m_temp_reg_21, i4_shift);
+                    m_temp_reg_22 = _mm_srai_epi32(m_temp_reg_22, i4_shift);
+                    m_temp_reg_23 = _mm_srai_epi32(m_temp_reg_23, i4_shift);
+
+                    m_temp_reg_53 = _mm_packs_epi32(m_temp_reg_20, m_temp_reg_21);
+                    m_temp_reg_54 = _mm_packs_epi32(m_temp_reg_22, m_temp_reg_23);
+                }
+            }
+
+            /* Transpose of the destination 8x8 matrix done here */
+            /* and ultimately stored in registers m_temp_reg_50 to m_temp_reg_57 */
+            /* respectively */
+            {
+                m_temp_reg_10 = _mm_unpacklo_epi16(m_temp_reg_50, m_temp_reg_51);
+                m_temp_reg_11 = _mm_unpacklo_epi16(m_temp_reg_52, m_temp_reg_53);
+                m_temp_reg_14 = _mm_unpackhi_epi16(m_temp_reg_50, m_temp_reg_51);
+                m_temp_reg_15 = _mm_unpackhi_epi16(m_temp_reg_52, m_temp_reg_53);
+                m_temp_reg_0 = _mm_unpacklo_epi32(m_temp_reg_10, m_temp_reg_11);
+                m_temp_reg_1 = _mm_unpackhi_epi32(m_temp_reg_10, m_temp_reg_11);
+                m_temp_reg_2 = _mm_unpacklo_epi32(m_temp_reg_14, m_temp_reg_15);
+                m_temp_reg_3 = _mm_unpackhi_epi32(m_temp_reg_14, m_temp_reg_15);
+
+                m_temp_reg_12 = _mm_unpacklo_epi16(m_temp_reg_54, m_temp_reg_55);
+                m_temp_reg_13 = _mm_unpacklo_epi16(m_temp_reg_56, m_temp_reg_57);
+                m_temp_reg_16 = _mm_unpackhi_epi16(m_temp_reg_54, m_temp_reg_55);
+                m_temp_reg_17 = _mm_unpackhi_epi16(m_temp_reg_56, m_temp_reg_57);
+                m_temp_reg_4 = _mm_unpacklo_epi32(m_temp_reg_12, m_temp_reg_13);
+                m_temp_reg_5 = _mm_unpackhi_epi32(m_temp_reg_12, m_temp_reg_13);
+                m_temp_reg_6 = _mm_unpacklo_epi32(m_temp_reg_16, m_temp_reg_17);
+                m_temp_reg_7 = _mm_unpackhi_epi32(m_temp_reg_16, m_temp_reg_17);
+                m_temp_reg_10 = _mm_unpacklo_epi64(m_temp_reg_0, m_temp_reg_4);
+                m_temp_reg_11 = _mm_unpackhi_epi64(m_temp_reg_0, m_temp_reg_4);
+                m_temp_reg_12 = _mm_unpacklo_epi64(m_temp_reg_1, m_temp_reg_5);
+                m_temp_reg_13 = _mm_unpackhi_epi64(m_temp_reg_1, m_temp_reg_5);
+
+                m_temp_reg_14 = _mm_unpacklo_epi64(m_temp_reg_2, m_temp_reg_6);
+                m_temp_reg_15 = _mm_unpackhi_epi64(m_temp_reg_2, m_temp_reg_6);
+                m_temp_reg_16 = _mm_unpacklo_epi64(m_temp_reg_3, m_temp_reg_7);
+                m_temp_reg_17 = _mm_unpackhi_epi64(m_temp_reg_3, m_temp_reg_7);
+            }
+
+            /* Recon and store */
+            {
+                m_temp_reg_0 = _mm_loadl_epi64((__m128i *)pu1_pred);
+                pu1_pred += pred_strd;
+                m_temp_reg_1 = _mm_loadl_epi64((__m128i *)pu1_pred);
+                pu1_pred += pred_strd;
+                m_temp_reg_2 = _mm_loadl_epi64((__m128i *)pu1_pred);
+                pu1_pred += pred_strd;
+                m_temp_reg_3 = _mm_loadl_epi64((__m128i *)pu1_pred);
+                pu1_pred += pred_strd;
+                m_temp_reg_4 = _mm_loadl_epi64((__m128i *)pu1_pred);
+                pu1_pred += pred_strd;
+                m_temp_reg_5 = _mm_loadl_epi64((__m128i *)pu1_pred);
+                pu1_pred += pred_strd;
+                m_temp_reg_6 = _mm_loadl_epi64((__m128i *)pu1_pred);
+                pu1_pred += pred_strd;
+                m_temp_reg_7 = _mm_loadl_epi64((__m128i *)pu1_pred);
+
+
+                m_temp_reg_50 = _mm_setzero_si128();
+                m_temp_reg_0 = _mm_unpacklo_epi8(m_temp_reg_0, m_temp_reg_50);
+                m_temp_reg_1 = _mm_unpacklo_epi8(m_temp_reg_1, m_temp_reg_50);
+                m_temp_reg_2 = _mm_unpacklo_epi8(m_temp_reg_2, m_temp_reg_50);
+                m_temp_reg_3 = _mm_unpacklo_epi8(m_temp_reg_3, m_temp_reg_50);
+                m_temp_reg_4 = _mm_unpacklo_epi8(m_temp_reg_4, m_temp_reg_50);
+                m_temp_reg_5 = _mm_unpacklo_epi8(m_temp_reg_5, m_temp_reg_50);
+                m_temp_reg_6 = _mm_unpacklo_epi8(m_temp_reg_6, m_temp_reg_50);
+                m_temp_reg_7 = _mm_unpacklo_epi8(m_temp_reg_7, m_temp_reg_50);
+
+                m_temp_reg_50 = _mm_add_epi16(m_temp_reg_10, m_temp_reg_0);
+                m_temp_reg_51 = _mm_add_epi16(m_temp_reg_11, m_temp_reg_1);
+                m_temp_reg_52 = _mm_add_epi16(m_temp_reg_12, m_temp_reg_2);
+                m_temp_reg_53 = _mm_add_epi16(m_temp_reg_13, m_temp_reg_3);
+                m_temp_reg_54 = _mm_add_epi16(m_temp_reg_14, m_temp_reg_4);
+                m_temp_reg_55 = _mm_add_epi16(m_temp_reg_15, m_temp_reg_5);
+                m_temp_reg_56 = _mm_add_epi16(m_temp_reg_16, m_temp_reg_6);
+                m_temp_reg_57 = _mm_add_epi16(m_temp_reg_17, m_temp_reg_7);
+
+                m_temp_reg_50 = _mm_packus_epi16(m_temp_reg_50, m_temp_reg_50);
+                m_temp_reg_51 = _mm_packus_epi16(m_temp_reg_51, m_temp_reg_51);
+                m_temp_reg_52 = _mm_packus_epi16(m_temp_reg_52, m_temp_reg_52);
+                m_temp_reg_53 = _mm_packus_epi16(m_temp_reg_53, m_temp_reg_53);
+                m_temp_reg_54 = _mm_packus_epi16(m_temp_reg_54, m_temp_reg_54);
+                m_temp_reg_55 = _mm_packus_epi16(m_temp_reg_55, m_temp_reg_55);
+                m_temp_reg_56 = _mm_packus_epi16(m_temp_reg_56, m_temp_reg_56);
+                m_temp_reg_57 = _mm_packus_epi16(m_temp_reg_57, m_temp_reg_57);
+
+                _mm_storel_epi64((__m128i *)pu1_dst, m_temp_reg_50);
+                pu1_dst += dst_strd;
+                _mm_storel_epi64((__m128i *)pu1_dst, m_temp_reg_51);
+                pu1_dst += dst_strd;
+                _mm_storel_epi64((__m128i *)pu1_dst, m_temp_reg_52);
+                pu1_dst += dst_strd;
+                _mm_storel_epi64((__m128i *)pu1_dst, m_temp_reg_53);
+                pu1_dst += dst_strd;
+                _mm_storel_epi64((__m128i *)pu1_dst, m_temp_reg_54);
+                pu1_dst += dst_strd;
+                _mm_storel_epi64((__m128i *)pu1_dst, m_temp_reg_55);
+                pu1_dst += dst_strd;
+                _mm_storel_epi64((__m128i *)pu1_dst, m_temp_reg_56);
+                pu1_dst += dst_strd;
+                _mm_storel_epi64((__m128i *)pu1_dst, m_temp_reg_57);
+                pu1_dst += dst_strd;
+
+            }
+
+
+        }
+
+
+    }
+}
+
+void impeg2_idct_recon_dc_mismatch_sse42(WORD16 *pi2_src,
+                            WORD16 *pi2_tmp,
+                            UWORD8 *pu1_pred,
+                            UWORD8 *pu1_dst,
+                            WORD32 src_strd,
+                            WORD32 pred_strd,
+                            WORD32 dst_strd,
+                            WORD32 zero_cols,
+                            WORD32 zero_rows)
+{
+    WORD32 val;
+    __m128i value_4x32b, mismatch_stg2_additive;
+    __m128i pred_r, pred_half0, pred_half1;
+    __m128i temp0, temp1;
+    __m128i round_stg2 = _mm_set1_epi32(IDCT_STG2_ROUND);
+
+    UNUSED(pi2_tmp);
+    UNUSED(src_strd);
+    UNUSED(zero_cols);
+    UNUSED(zero_rows);
+
+    val = pi2_src[0] * gai2_impeg2_idct_q15[0];
+    val = ((val + IDCT_STG1_ROUND) >> IDCT_STG1_SHIFT);
+    val *= gai2_impeg2_idct_q11[0];
+    value_4x32b = _mm_set1_epi32(val);
+
+    // Row 0 processing
+    mismatch_stg2_additive = _mm_loadu_si128((__m128i *) gai2_impeg2_mismatch_stg2_additive);
+    pred_r = _mm_loadl_epi64((__m128i *) pu1_pred);
+    pred_r =  _mm_cvtepu8_epi16(pred_r);
+    temp0 = _mm_cvtepi16_epi32(mismatch_stg2_additive);
+    mismatch_stg2_additive = _mm_srli_si128(mismatch_stg2_additive, 8);
+    pred_half0 = _mm_cvtepu16_epi32(pred_r);
+    temp1 = _mm_cvtepi16_epi32(mismatch_stg2_additive);
+
+    pred_r = _mm_srli_si128(pred_r, 8);
+
+    temp0 = _mm_add_epi32(temp0, value_4x32b);
+    temp1 = _mm_add_epi32(temp1, value_4x32b);
+    temp0 = _mm_add_epi32(temp0, round_stg2);
+    temp1 = _mm_add_epi32(temp1, round_stg2);
+    pred_half1 = _mm_cvtepu16_epi32(pred_r);
+    temp0 = _mm_srai_epi32(temp0, IDCT_STG2_SHIFT);
+    temp1 = _mm_srai_epi32(temp1, IDCT_STG2_SHIFT);
+    temp0 = _mm_add_epi32(temp0, pred_half0);
+    temp1 = _mm_add_epi32(temp1, pred_half1);
+
+    temp0 = _mm_packus_epi32(temp0, temp1);
+    temp0 = _mm_packus_epi16(temp0, temp1);
+
+    _mm_storel_epi64((__m128i *)pu1_dst, temp0);
+
+    // Row 1 processing
+    mismatch_stg2_additive = _mm_loadu_si128((__m128i *) (gai2_impeg2_mismatch_stg2_additive + 8));
+    pred_r = _mm_loadl_epi64((__m128i *) (pu1_pred + pred_strd));
+    pred_r =  _mm_cvtepu8_epi16(pred_r);
+    temp0 = _mm_cvtepi16_epi32(mismatch_stg2_additive);
+    mismatch_stg2_additive = _mm_srli_si128(mismatch_stg2_additive, 8);
+    pred_half0 = _mm_cvtepu16_epi32(pred_r);
+    temp1 = _mm_cvtepi16_epi32(mismatch_stg2_additive);
+
+    pred_r = _mm_srli_si128(pred_r, 8);
+
+    temp0 = _mm_add_epi32(temp0, value_4x32b);
+    temp1 = _mm_add_epi32(temp1, value_4x32b);
+    temp0 = _mm_add_epi32(temp0, round_stg2);
+    temp1 = _mm_add_epi32(temp1, round_stg2);
+    pred_half1 = _mm_cvtepu16_epi32(pred_r);
+    temp0 = _mm_srai_epi32(temp0, IDCT_STG2_SHIFT);
+    temp1 = _mm_srai_epi32(temp1, IDCT_STG2_SHIFT);
+    temp0 = _mm_add_epi32(temp0, pred_half0);
+    temp1 = _mm_add_epi32(temp1, pred_half1);
+
+    temp0 = _mm_packus_epi32(temp0, temp1);
+    temp0 = _mm_packus_epi16(temp0, temp1);
+
+    _mm_storel_epi64((__m128i *)(pu1_dst + dst_strd), temp0);
+
+    // Row 2 processing
+    mismatch_stg2_additive = _mm_loadu_si128((__m128i *) (gai2_impeg2_mismatch_stg2_additive + 16));
+    pred_r = _mm_loadl_epi64((__m128i *) (pu1_pred + 2 * pred_strd));
+    pred_r =  _mm_cvtepu8_epi16(pred_r);
+    temp0 = _mm_cvtepi16_epi32(mismatch_stg2_additive);
+    mismatch_stg2_additive = _mm_srli_si128(mismatch_stg2_additive, 8);
+    pred_half0 = _mm_cvtepu16_epi32(pred_r);
+    temp1 = _mm_cvtepi16_epi32(mismatch_stg2_additive);
+
+    pred_r = _mm_srli_si128(pred_r, 8);
+
+    temp0 = _mm_add_epi32(temp0, value_4x32b);
+    temp1 = _mm_add_epi32(temp1, value_4x32b);
+    temp0 = _mm_add_epi32(temp0, round_stg2);
+    temp1 = _mm_add_epi32(temp1, round_stg2);
+    pred_half1 = _mm_cvtepu16_epi32(pred_r);
+    temp0 = _mm_srai_epi32(temp0, IDCT_STG2_SHIFT);
+    temp1 = _mm_srai_epi32(temp1, IDCT_STG2_SHIFT);
+    temp0 = _mm_add_epi32(temp0, pred_half0);
+    temp1 = _mm_add_epi32(temp1, pred_half1);
+
+    temp0 = _mm_packus_epi32(temp0, temp1);
+    temp0 = _mm_packus_epi16(temp0, temp1);
+
+    _mm_storel_epi64((__m128i *)(pu1_dst + 2 * dst_strd), temp0);
+
+    // Row 3 processing
+    mismatch_stg2_additive = _mm_loadu_si128((__m128i *) (gai2_impeg2_mismatch_stg2_additive + 24));
+    pred_r = _mm_loadl_epi64((__m128i *) (pu1_pred + 3 * pred_strd));
+    pred_r =  _mm_cvtepu8_epi16(pred_r);
+    temp0 = _mm_cvtepi16_epi32(mismatch_stg2_additive);
+    mismatch_stg2_additive = _mm_srli_si128(mismatch_stg2_additive, 8);
+    pred_half0 = _mm_cvtepu16_epi32(pred_r);
+    temp1 = _mm_cvtepi16_epi32(mismatch_stg2_additive);
+
+    pred_r = _mm_srli_si128(pred_r, 8);
+
+    temp0 = _mm_add_epi32(temp0, value_4x32b);
+    temp1 = _mm_add_epi32(temp1, value_4x32b);
+    temp0 = _mm_add_epi32(temp0, round_stg2);
+    temp1 = _mm_add_epi32(temp1, round_stg2);
+    pred_half1 = _mm_cvtepu16_epi32(pred_r);
+    temp0 = _mm_srai_epi32(temp0, IDCT_STG2_SHIFT);
+    temp1 = _mm_srai_epi32(temp1, IDCT_STG2_SHIFT);
+    temp0 = _mm_add_epi32(temp0, pred_half0);
+    temp1 = _mm_add_epi32(temp1, pred_half1);
+
+    temp0 = _mm_packus_epi32(temp0, temp1);
+    temp0 = _mm_packus_epi16(temp0, temp1);
+
+    _mm_storel_epi64((__m128i *)(pu1_dst + 3 * dst_strd), temp0);
+
+    // Row 4 processing
+    mismatch_stg2_additive = _mm_loadu_si128((__m128i *) (gai2_impeg2_mismatch_stg2_additive + 32));
+    pred_r = _mm_loadl_epi64((__m128i *) (pu1_pred + 4 * pred_strd));
+    pred_r =  _mm_cvtepu8_epi16(pred_r);
+    temp0 = _mm_cvtepi16_epi32(mismatch_stg2_additive);
+    mismatch_stg2_additive = _mm_srli_si128(mismatch_stg2_additive, 8);
+    pred_half0 = _mm_cvtepu16_epi32(pred_r);
+    temp1 = _mm_cvtepi16_epi32(mismatch_stg2_additive);
+
+    pred_r = _mm_srli_si128(pred_r, 8);
+
+    temp0 = _mm_add_epi32(temp0, value_4x32b);
+    temp1 = _mm_add_epi32(temp1, value_4x32b);
+    temp0 = _mm_add_epi32(temp0, round_stg2);
+    temp1 = _mm_add_epi32(temp1, round_stg2);
+    pred_half1 = _mm_cvtepu16_epi32(pred_r);
+    temp0 = _mm_srai_epi32(temp0, IDCT_STG2_SHIFT);
+    temp1 = _mm_srai_epi32(temp1, IDCT_STG2_SHIFT);
+    temp0 = _mm_add_epi32(temp0, pred_half0);
+    temp1 = _mm_add_epi32(temp1, pred_half1);
+
+    temp0 = _mm_packus_epi32(temp0, temp1);
+    temp0 = _mm_packus_epi16(temp0, temp1);
+
+    _mm_storel_epi64((__m128i *)(pu1_dst + 4 * dst_strd), temp0);
+
+    // Row 5 processing
+    mismatch_stg2_additive = _mm_loadu_si128((__m128i *) (gai2_impeg2_mismatch_stg2_additive + 40));
+    pred_r = _mm_loadl_epi64((__m128i *) (pu1_pred + 5 * pred_strd));
+    pred_r =  _mm_cvtepu8_epi16(pred_r);
+    temp0 = _mm_cvtepi16_epi32(mismatch_stg2_additive);
+    mismatch_stg2_additive = _mm_srli_si128(mismatch_stg2_additive, 8);
+    pred_half0 = _mm_cvtepu16_epi32(pred_r);
+    temp1 = _mm_cvtepi16_epi32(mismatch_stg2_additive);
+
+    pred_r = _mm_srli_si128(pred_r, 8);
+
+    temp0 = _mm_add_epi32(temp0, value_4x32b);
+    temp1 = _mm_add_epi32(temp1, value_4x32b);
+    temp0 = _mm_add_epi32(temp0, round_stg2);
+    temp1 = _mm_add_epi32(temp1, round_stg2);
+    pred_half1 = _mm_cvtepu16_epi32(pred_r);
+    temp0 = _mm_srai_epi32(temp0, IDCT_STG2_SHIFT);
+    temp1 = _mm_srai_epi32(temp1, IDCT_STG2_SHIFT);
+    temp0 = _mm_add_epi32(temp0, pred_half0);
+    temp1 = _mm_add_epi32(temp1, pred_half1);
+
+    temp0 = _mm_packus_epi32(temp0, temp1);
+    temp0 = _mm_packus_epi16(temp0, temp1);
+
+    _mm_storel_epi64((__m128i *)(pu1_dst + 5 * dst_strd), temp0);
+
+    // Row 6 processing
+    mismatch_stg2_additive = _mm_loadu_si128((__m128i *) (gai2_impeg2_mismatch_stg2_additive + 48));
+    pred_r = _mm_loadl_epi64((__m128i *) (pu1_pred + 6 * pred_strd));
+    pred_r =  _mm_cvtepu8_epi16(pred_r);
+    temp0 = _mm_cvtepi16_epi32(mismatch_stg2_additive);
+    mismatch_stg2_additive = _mm_srli_si128(mismatch_stg2_additive, 8);
+    pred_half0 = _mm_cvtepu16_epi32(pred_r);
+    temp1 = _mm_cvtepi16_epi32(mismatch_stg2_additive);
+
+    pred_r = _mm_srli_si128(pred_r, 8);
+
+    temp0 = _mm_add_epi32(temp0, value_4x32b);
+    temp1 = _mm_add_epi32(temp1, value_4x32b);
+    temp0 = _mm_add_epi32(temp0, round_stg2);
+    temp1 = _mm_add_epi32(temp1, round_stg2);
+    pred_half1 = _mm_cvtepu16_epi32(pred_r);
+    temp0 = _mm_srai_epi32(temp0, IDCT_STG2_SHIFT);
+    temp1 = _mm_srai_epi32(temp1, IDCT_STG2_SHIFT);
+    temp0 = _mm_add_epi32(temp0, pred_half0);
+    temp1 = _mm_add_epi32(temp1, pred_half1);
+
+    temp0 = _mm_packus_epi32(temp0, temp1);
+    temp0 = _mm_packus_epi16(temp0, temp1);
+
+    _mm_storel_epi64((__m128i *)(pu1_dst + 6 * dst_strd), temp0);
+
+    // Row 7 processing
+    mismatch_stg2_additive = _mm_loadu_si128((__m128i *) (gai2_impeg2_mismatch_stg2_additive + 56));
+    pred_r = _mm_loadl_epi64((__m128i *) (pu1_pred + 7 * pred_strd));
+    pred_r =  _mm_cvtepu8_epi16(pred_r);
+    temp0 = _mm_cvtepi16_epi32(mismatch_stg2_additive);
+    mismatch_stg2_additive = _mm_srli_si128(mismatch_stg2_additive, 8);
+    pred_half0 = _mm_cvtepu16_epi32(pred_r);
+    temp1 = _mm_cvtepi16_epi32(mismatch_stg2_additive);
+
+    pred_r = _mm_srli_si128(pred_r, 8);
+
+    temp0 = _mm_add_epi32(temp0, value_4x32b);
+    temp1 = _mm_add_epi32(temp1, value_4x32b);
+    temp0 = _mm_add_epi32(temp0, round_stg2);
+    temp1 = _mm_add_epi32(temp1, round_stg2);
+    pred_half1 = _mm_cvtepu16_epi32(pred_r);
+    temp0 = _mm_srai_epi32(temp0, IDCT_STG2_SHIFT);
+    temp1 = _mm_srai_epi32(temp1, IDCT_STG2_SHIFT);
+    temp0 = _mm_add_epi32(temp0, pred_half0);
+    temp1 = _mm_add_epi32(temp1, pred_half1);
+
+    temp0 = _mm_packus_epi32(temp0, temp1);
+    temp0 = _mm_packus_epi16(temp0, temp1);
+
+    _mm_storel_epi64((__m128i *)(pu1_dst + 7 * dst_strd), temp0);
+}
+
+void impeg2_idct_recon_dc_sse42(WORD16 *pi2_src,
+                            WORD16 *pi2_tmp,
+                            UWORD8 *pu1_pred,
+                            UWORD8 *pu1_dst,
+                            WORD32 src_strd,
+                            WORD32 pred_strd,
+                            WORD32 dst_strd,
+                            WORD32 zero_cols,
+                            WORD32 zero_rows)
+{
+    WORD32 val;
+    __m128i value_4x32b, pred_r0, pred_r1, temp0, temp1, temp2, temp3;
+
+    UNUSED(pi2_tmp);
+    UNUSED(src_strd);
+    UNUSED(zero_cols);
+    UNUSED(zero_rows);
+
+    val = pi2_src[0] * gai2_impeg2_idct_q15[0];
+    val = ((val + IDCT_STG1_ROUND) >> IDCT_STG1_SHIFT);
+    val = val * gai2_impeg2_idct_q11[0];
+    val = ((val + IDCT_STG2_ROUND) >> IDCT_STG2_SHIFT);
+
+    value_4x32b = _mm_set1_epi32(val);
+
+    //Row 0-1 processing
+    pred_r0 = _mm_loadl_epi64((__m128i *) pu1_pred);
+    pred_r1 = _mm_loadl_epi64((__m128i *) (pu1_pred + pred_strd));
+    pred_r0 =  _mm_cvtepu8_epi16(pred_r0);
+    pred_r1 =  _mm_cvtepu8_epi16(pred_r1);
+
+    temp0 = _mm_cvtepu16_epi32(pred_r0);
+    pred_r0 = _mm_srli_si128(pred_r0, 8);
+    temp2 = _mm_cvtepu16_epi32(pred_r1);
+    pred_r1 = _mm_srli_si128(pred_r1, 8);
+    temp1 = _mm_cvtepu16_epi32(pred_r0);
+    temp3 = _mm_cvtepu16_epi32(pred_r1);
+
+    temp0 = _mm_add_epi32(temp0, value_4x32b);
+    temp2 = _mm_add_epi32(temp2, value_4x32b);
+    temp1 = _mm_add_epi32(temp1, value_4x32b);
+    temp3 = _mm_add_epi32(temp3, value_4x32b);
+    temp0 = _mm_packus_epi32(temp0, temp1);
+    temp2 = _mm_packus_epi32(temp2, temp3);
+    temp0 = _mm_packus_epi16(temp0, temp1);
+    temp2 = _mm_packus_epi16(temp2, temp3);
+    _mm_storel_epi64((__m128i *)(pu1_dst), temp0);
+    _mm_storel_epi64((__m128i *)(pu1_dst + dst_strd), temp2);
+
+    //Row 2-3 processing
+    pu1_pred += 2 * pred_strd;
+    pu1_dst += 2 * dst_strd;
+
+    pred_r0 = _mm_loadl_epi64((__m128i *) pu1_pred);
+    pred_r1 = _mm_loadl_epi64((__m128i *) (pu1_pred + pred_strd));
+    pred_r0 =  _mm_cvtepu8_epi16(pred_r0);
+    pred_r1 =  _mm_cvtepu8_epi16(pred_r1);
+
+    temp0 = _mm_cvtepu16_epi32(pred_r0);
+    pred_r0 = _mm_srli_si128(pred_r0, 8);
+    temp2 = _mm_cvtepu16_epi32(pred_r1);
+    pred_r1 = _mm_srli_si128(pred_r1, 8);
+    temp1 = _mm_cvtepu16_epi32(pred_r0);
+    temp3 = _mm_cvtepu16_epi32(pred_r1);
+
+    temp0 = _mm_add_epi32(temp0, value_4x32b);
+    temp2 = _mm_add_epi32(temp2, value_4x32b);
+    temp1 = _mm_add_epi32(temp1, value_4x32b);
+    temp3 = _mm_add_epi32(temp3, value_4x32b);
+    temp0 = _mm_packus_epi32(temp0, temp1);
+    temp2 = _mm_packus_epi32(temp2, temp3);
+    temp0 = _mm_packus_epi16(temp0, temp1);
+    temp2 = _mm_packus_epi16(temp2, temp3);
+    _mm_storel_epi64((__m128i *)(pu1_dst), temp0);
+    _mm_storel_epi64((__m128i *)(pu1_dst + dst_strd), temp2);
+
+    //Row 4-5 processing
+    pu1_pred += 2 * pred_strd;
+    pu1_dst += 2 * dst_strd;
+
+    pred_r0 = _mm_loadl_epi64((__m128i *) pu1_pred);
+    pred_r1 = _mm_loadl_epi64((__m128i *) (pu1_pred + pred_strd));
+    pred_r0 =  _mm_cvtepu8_epi16(pred_r0);
+    pred_r1 =  _mm_cvtepu8_epi16(pred_r1);
+
+    temp0 = _mm_cvtepu16_epi32(pred_r0);
+    pred_r0 = _mm_srli_si128(pred_r0, 8);
+    temp2 = _mm_cvtepu16_epi32(pred_r1);
+    pred_r1 = _mm_srli_si128(pred_r1, 8);
+    temp1 = _mm_cvtepu16_epi32(pred_r0);
+    temp3 = _mm_cvtepu16_epi32(pred_r1);
+
+    temp0 = _mm_add_epi32(temp0, value_4x32b);
+    temp2 = _mm_add_epi32(temp2, value_4x32b);
+    temp1 = _mm_add_epi32(temp1, value_4x32b);
+    temp3 = _mm_add_epi32(temp3, value_4x32b);
+    temp0 = _mm_packus_epi32(temp0, temp1);
+    temp2 = _mm_packus_epi32(temp2, temp3);
+    temp0 = _mm_packus_epi16(temp0, temp1);
+    temp2 = _mm_packus_epi16(temp2, temp3);
+    _mm_storel_epi64((__m128i *)(pu1_dst), temp0);
+    _mm_storel_epi64((__m128i *)(pu1_dst + dst_strd), temp2);
+
+    //Row 6-7 processing
+    pu1_pred += 2 * pred_strd;
+    pu1_dst += 2 * dst_strd;
+
+    pred_r0 = _mm_loadl_epi64((__m128i *) pu1_pred);
+    pred_r1 = _mm_loadl_epi64((__m128i *) (pu1_pred + pred_strd));
+    pred_r0 =  _mm_cvtepu8_epi16(pred_r0);
+    pred_r1 =  _mm_cvtepu8_epi16(pred_r1);
+
+    temp0 = _mm_cvtepu16_epi32(pred_r0);
+    pred_r0 = _mm_srli_si128(pred_r0, 8);
+    temp2 = _mm_cvtepu16_epi32(pred_r1);
+    pred_r1 = _mm_srli_si128(pred_r1, 8);
+    temp1 = _mm_cvtepu16_epi32(pred_r0);
+    temp3 = _mm_cvtepu16_epi32(pred_r1);
+
+    temp0 = _mm_add_epi32(temp0, value_4x32b);
+    temp2 = _mm_add_epi32(temp2, value_4x32b);
+    temp1 = _mm_add_epi32(temp1, value_4x32b);
+    temp3 = _mm_add_epi32(temp3, value_4x32b);
+    temp0 = _mm_packus_epi32(temp0, temp1);
+    temp2 = _mm_packus_epi32(temp2, temp3);
+    temp0 = _mm_packus_epi16(temp0, temp1);
+    temp2 = _mm_packus_epi16(temp2, temp3);
+    _mm_storel_epi64((__m128i *)(pu1_dst), temp0);
+    _mm_storel_epi64((__m128i *)(pu1_dst + dst_strd), temp2);
+}

diff --git a/common/x86/impeg2_inter_pred_sse42_intr.c b/common/x86/impeg2_inter_pred_sse42_intr.c
new file mode 100644
index 0000000..4599afa
--- /dev/null
+++ b/common/x86/impeg2_inter_pred_sse42_intr.c

@@ -0,0 +1,899 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+
+/**
+ *******************************************************************************
+ * @file
+ *  impeg2_inter_pred_sse42_intr.c
+ *
+ * @brief
+ *  Contains Motion compensation function definitions for MPEG2 decoder
+ *
+ * @author
+ *  Mohit [100664]
+ *
+ * - impeg2_copy_mb_sse42()
+ * - impeg2_interpolate_sse42()
+ * - impeg2_mc_halfx_halfy_8x8_sse42()
+ * - impeg2_mc_halfx_fully_8x8_sse42()
+ * - impeg2_mc_fullx_halfy_8x8_sse42()
+ * - impeg2_mc_fullx_fully_8x8_sse42()
+ *
+ * @remarks
+ *  None
+ *
+ *******************************************************************************
+ */
+#include <stdio.h>
+#include <string.h>
+#include "iv_datatypedef.h"
+#include "impeg2_macros.h"
+#include "impeg2_defs.h"
+#include "impeg2_inter_pred.h"
+
+#include <immintrin.h>
+#include <emmintrin.h>
+#include <smmintrin.h>
+#include <tmmintrin.h>
+
+/*******************************************************************************
+*  Function Name   : impeg2_copy_mb
+*
+*  Description     : copies 3 components to the frame from mc_buf
+*
+*  Arguments       :
+*  src_buf         : Source Buffer
+*  dst_buf         : Destination Buffer
+*  src_wd          : Source Width
+*  dst_wd          : destination Width
+*
+*  Values Returned : None
+*******************************************************************************/
+void impeg2_copy_mb_sse42(yuv_buf_t *src_buf,
+                    yuv_buf_t *dst_buf,
+                    UWORD32 src_wd,
+                    UWORD32 dst_wd)
+{
+    UWORD8 *src;
+    UWORD8 *dst;
+    __m128i src_r0, src_r1, src_r2, src_r3;
+
+    /*******************************************************/
+    /* copy Y                                              */
+    /*******************************************************/
+    src = src_buf->pu1_y;
+    dst = dst_buf->pu1_y;
+    // Row 0-3
+    src_r0 = _mm_loadu_si128((__m128i *) (src));
+    src_r1 = _mm_loadu_si128((__m128i *) (src + src_wd));
+    src_r2 = _mm_loadu_si128((__m128i *) (src + 2 * src_wd));
+    src_r3 = _mm_loadu_si128((__m128i *) (src + 3 * src_wd));
+
+    _mm_storeu_si128((__m128i *) dst, src_r0);
+    _mm_storeu_si128((__m128i *) (dst + dst_wd), src_r1);
+    _mm_storeu_si128((__m128i *) (dst + 2 * dst_wd), src_r2);
+    _mm_storeu_si128((__m128i *) (dst + 3 * dst_wd), src_r3);
+
+    // Row 4-7
+    src += 4 * src_wd;
+    dst += 4 * dst_wd;
+    src_r0 = _mm_loadu_si128((__m128i *) (src));
+    src_r1 = _mm_loadu_si128((__m128i *) (src + src_wd));
+    src_r2 = _mm_loadu_si128((__m128i *) (src + 2 * src_wd));
+    src_r3 = _mm_loadu_si128((__m128i *) (src + 3 * src_wd));
+
+    _mm_storeu_si128((__m128i *) dst, src_r0);
+    _mm_storeu_si128((__m128i *) (dst + dst_wd), src_r1);
+    _mm_storeu_si128((__m128i *) (dst + 2 * dst_wd), src_r2);
+    _mm_storeu_si128((__m128i *) (dst + 3 * dst_wd), src_r3);
+
+    // Row 8-11
+    src += 4 * src_wd;
+    dst += 4 * dst_wd;
+    src_r0 = _mm_loadu_si128((__m128i *) (src));
+    src_r1 = _mm_loadu_si128((__m128i *) (src + src_wd));
+    src_r2 = _mm_loadu_si128((__m128i *) (src + 2 * src_wd));
+    src_r3 = _mm_loadu_si128((__m128i *) (src + 3 * src_wd));
+
+    _mm_storeu_si128((__m128i *) dst, src_r0);
+    _mm_storeu_si128((__m128i *) (dst + dst_wd), src_r1);
+    _mm_storeu_si128((__m128i *) (dst + 2 * dst_wd), src_r2);
+    _mm_storeu_si128((__m128i *) (dst + 3 * dst_wd), src_r3);
+
+    // Row 12-15
+    src += 4 * src_wd;
+    dst += 4 * dst_wd;
+    src_r0 = _mm_loadu_si128((__m128i *) (src));
+    src_r1 = _mm_loadu_si128((__m128i *) (src + src_wd));
+    src_r2 = _mm_loadu_si128((__m128i *) (src + 2 * src_wd));
+    src_r3 = _mm_loadu_si128((__m128i *) (src + 3 * src_wd));
+
+    _mm_storeu_si128((__m128i *) dst, src_r0);
+    _mm_storeu_si128((__m128i *) (dst + dst_wd), src_r1);
+    _mm_storeu_si128((__m128i *) (dst + 2 * dst_wd), src_r2);
+    _mm_storeu_si128((__m128i *) (dst + 3 * dst_wd), src_r3);
+
+    src_wd >>= 1;
+    dst_wd >>= 1;
+
+    /*******************************************************/
+    /* copy U                                              */
+    /*******************************************************/
+    src = src_buf->pu1_u;
+    dst = dst_buf->pu1_u;
+
+    // Row 0-3
+    src_r0 =  _mm_loadl_epi64((__m128i *)src);
+    src_r1 =  _mm_loadl_epi64((__m128i *)(src + src_wd));
+    src_r2 =  _mm_loadl_epi64((__m128i *)(src + 2 * src_wd));
+    src_r3 =  _mm_loadl_epi64((__m128i *)(src + 3 * src_wd));
+
+    _mm_storel_epi64((__m128i *)dst, src_r0);
+    _mm_storel_epi64((__m128i *)(dst + dst_wd), src_r1);
+    _mm_storel_epi64((__m128i *)(dst + 2 * dst_wd), src_r2);
+    _mm_storel_epi64((__m128i *)(dst + 3 * dst_wd), src_r3);
+
+    // Row 4-7
+    src += 4 * src_wd;
+    dst += 4 * dst_wd;
+
+    src_r0 =  _mm_loadl_epi64((__m128i *)src);
+    src_r1 =  _mm_loadl_epi64((__m128i *)(src + src_wd));
+    src_r2 =  _mm_loadl_epi64((__m128i *)(src + 2 * src_wd));
+    src_r3 =  _mm_loadl_epi64((__m128i *)(src + 3 * src_wd));
+
+    _mm_storel_epi64((__m128i *)dst, src_r0);
+    _mm_storel_epi64((__m128i *)(dst + dst_wd), src_r1);
+    _mm_storel_epi64((__m128i *)(dst + 2 * dst_wd), src_r2);
+    _mm_storel_epi64((__m128i *)(dst + 3 * dst_wd), src_r3);
+
+    /*******************************************************/
+    /* copy V                                              */
+    /*******************************************************/
+    src = src_buf->pu1_v;
+    dst = dst_buf->pu1_v;
+    // Row 0-3
+    src_r0 =  _mm_loadl_epi64((__m128i *)src);
+    src_r1 =  _mm_loadl_epi64((__m128i *)(src + src_wd));
+    src_r2 =  _mm_loadl_epi64((__m128i *)(src + 2 * src_wd));
+    src_r3 =  _mm_loadl_epi64((__m128i *)(src + 3 * src_wd));
+
+    _mm_storel_epi64((__m128i *)dst, src_r0);
+    _mm_storel_epi64((__m128i *)(dst + dst_wd), src_r1);
+    _mm_storel_epi64((__m128i *)(dst + 2 * dst_wd), src_r2);
+    _mm_storel_epi64((__m128i *)(dst + 3 * dst_wd), src_r3);
+
+    // Row 4-7
+    src += 4 * src_wd;
+    dst += 4 * dst_wd;
+
+    src_r0 =  _mm_loadl_epi64((__m128i *)src);
+    src_r1 =  _mm_loadl_epi64((__m128i *)(src + src_wd));
+    src_r2 =  _mm_loadl_epi64((__m128i *)(src + 2 * src_wd));
+    src_r3 =  _mm_loadl_epi64((__m128i *)(src + 3 * src_wd));
+
+    _mm_storel_epi64((__m128i *)dst, src_r0);
+    _mm_storel_epi64((__m128i *)(dst + dst_wd), src_r1);
+    _mm_storel_epi64((__m128i *)(dst + 2 * dst_wd), src_r2);
+    _mm_storel_epi64((__m128i *)(dst + 3 * dst_wd), src_r3);
+}
+
+/*****************************************************************************/
+/*                                                                           */
+/*  Function Name : impeg2_interpolate                                       */
+/*                                                                           */
+/*  Description   : averages the contents of buf_src1 and buf_src2 and stores*/
+/*                  result in buf_dst                                        */
+/*                                                                           */
+/*  Inputs        : buf_src1 -  First Source                                 */
+/*                  buf_src2 -  Second Source                                */
+/*                                                                           */
+/*  Globals       : None                                                     */
+/*                                                                           */
+/*  Processing    : Avg the values from two sources and store the result in  */
+/*                  destination buffer                                       */
+/*                                                                           */
+/*  Outputs       : buf_dst  -  Avg of contents of buf_src1 and buf_src2     */
+/*                                                                           */
+/*  Returns       : None                                                     */
+/*                                                                           */
+/*  Issues        : Assumes that all 3 buffers are of same size              */
+/*                                                                           */
+/*****************************************************************************/
+void impeg2_interpolate_sse42(yuv_buf_t *buf_src1,
+                        yuv_buf_t *buf_src2,
+                        yuv_buf_t *buf_dst,
+                        UWORD32 stride)
+{
+    UWORD8 *src1, *src2;
+    UWORD8 *dst;
+    __m128i src1_r0, src1_r1, src1_r2, src1_r3;
+    __m128i src2_r0, src2_r1, src2_r2, src2_r3;
+
+    /*******************************************************/
+    /* interpolate Y                                       */
+    /*******************************************************/
+    src1 = buf_src1->pu1_y;
+    src2 = buf_src2->pu1_y;
+    dst  = buf_dst->pu1_y;
+    // Row 0-3
+    src1_r0 = _mm_loadu_si128((__m128i *) (src1));
+    src1_r1 = _mm_loadu_si128((__m128i *) (src1 + 16));
+    src1_r2 = _mm_loadu_si128((__m128i *) (src1 + 2 * 16));
+    src1_r3 = _mm_loadu_si128((__m128i *) (src1 + 3 * 16));
+
+    src2_r0 = _mm_loadu_si128((__m128i *) (src2));
+    src2_r1 = _mm_loadu_si128((__m128i *) (src2 + 16));
+    src2_r2 = _mm_loadu_si128((__m128i *) (src2 + 2 * 16));
+    src2_r3 = _mm_loadu_si128((__m128i *) (src2 + 3 * 16));
+
+    src1_r0 = _mm_avg_epu8 (src1_r0, src2_r0);
+    src1_r1 = _mm_avg_epu8 (src1_r1, src2_r1);
+    src1_r2 = _mm_avg_epu8 (src1_r2, src2_r2);
+    src1_r3 = _mm_avg_epu8 (src1_r3, src2_r3);
+
+    _mm_storeu_si128((__m128i *) dst, src1_r0);
+    _mm_storeu_si128((__m128i *) (dst + stride), src1_r1);
+    _mm_storeu_si128((__m128i *) (dst + 2 * stride), src1_r2);
+    _mm_storeu_si128((__m128i *) (dst + 3 * stride), src1_r3);
+
+    // Row 4-7
+    src1 += 4 * 16;
+    src2 += 4 * 16;
+    dst += 4 * stride;
+    src1_r0 = _mm_loadu_si128((__m128i *) (src1));
+    src1_r1 = _mm_loadu_si128((__m128i *) (src1 + 16));
+    src1_r2 = _mm_loadu_si128((__m128i *) (src1 + 2 * 16));
+    src1_r3 = _mm_loadu_si128((__m128i *) (src1 + 3 * 16));
+
+    src2_r0 = _mm_loadu_si128((__m128i *) (src2));
+    src2_r1 = _mm_loadu_si128((__m128i *) (src2 + 16));
+    src2_r2 = _mm_loadu_si128((__m128i *) (src2 + 2 * 16));
+    src2_r3 = _mm_loadu_si128((__m128i *) (src2 + 3 * 16));
+
+    src1_r0 = _mm_avg_epu8 (src1_r0, src2_r0);
+    src1_r1 = _mm_avg_epu8 (src1_r1, src2_r1);
+    src1_r2 = _mm_avg_epu8 (src1_r2, src2_r2);
+    src1_r3 = _mm_avg_epu8 (src1_r3, src2_r3);
+
+    _mm_storeu_si128((__m128i *) dst, src1_r0);
+    _mm_storeu_si128((__m128i *) (dst + stride), src1_r1);
+    _mm_storeu_si128((__m128i *) (dst + 2 * stride), src1_r2);
+    _mm_storeu_si128((__m128i *) (dst + 3 * stride), src1_r3);
+
+    // Row 8-11
+    src1 += 4 * 16;
+    src2 += 4 * 16;
+    dst += 4 * stride;
+    src1_r0 = _mm_loadu_si128((__m128i *) (src1));
+    src1_r1 = _mm_loadu_si128((__m128i *) (src1 + 16));
+    src1_r2 = _mm_loadu_si128((__m128i *) (src1 + 2 * 16));
+    src1_r3 = _mm_loadu_si128((__m128i *) (src1 + 3 * 16));
+
+    src2_r0 = _mm_loadu_si128((__m128i *) (src2));
+    src2_r1 = _mm_loadu_si128((__m128i *) (src2 + 16));
+    src2_r2 = _mm_loadu_si128((__m128i *) (src2 + 2 * 16));
+    src2_r3 = _mm_loadu_si128((__m128i *) (src2 + 3 * 16));
+
+    src1_r0 = _mm_avg_epu8 (src1_r0, src2_r0);
+    src1_r1 = _mm_avg_epu8 (src1_r1, src2_r1);
+    src1_r2 = _mm_avg_epu8 (src1_r2, src2_r2);
+    src1_r3 = _mm_avg_epu8 (src1_r3, src2_r3);
+
+    _mm_storeu_si128((__m128i *) dst, src1_r0);
+    _mm_storeu_si128((__m128i *) (dst + stride), src1_r1);
+    _mm_storeu_si128((__m128i *) (dst + 2 * stride), src1_r2);
+    _mm_storeu_si128((__m128i *) (dst + 3 * stride), src1_r3);
+
+    // Row 12-15
+    src1 += 4 * 16;
+    src2 += 4 * 16;
+    dst += 4 * stride;
+    src1_r0 = _mm_loadu_si128((__m128i *) (src1));
+    src1_r1 = _mm_loadu_si128((__m128i *) (src1 + 16));
+    src1_r2 = _mm_loadu_si128((__m128i *) (src1 + 2 * 16));
+    src1_r3 = _mm_loadu_si128((__m128i *) (src1 + 3 * 16));
+
+    src2_r0 = _mm_loadu_si128((__m128i *) (src2));
+    src2_r1 = _mm_loadu_si128((__m128i *) (src2 + 16));
+    src2_r2 = _mm_loadu_si128((__m128i *) (src2 + 2 * 16));
+    src2_r3 = _mm_loadu_si128((__m128i *) (src2 + 3 * 16));
+
+    src1_r0 = _mm_avg_epu8 (src1_r0, src2_r0);
+    src1_r1 = _mm_avg_epu8 (src1_r1, src2_r1);
+    src1_r2 = _mm_avg_epu8 (src1_r2, src2_r2);
+    src1_r3 = _mm_avg_epu8 (src1_r3, src2_r3);
+
+    _mm_storeu_si128((__m128i *) dst, src1_r0);
+    _mm_storeu_si128((__m128i *) (dst + stride), src1_r1);
+    _mm_storeu_si128((__m128i *) (dst + 2 * stride), src1_r2);
+    _mm_storeu_si128((__m128i *) (dst + 3 * stride), src1_r3);
+
+    stride >>= 1;
+
+    /*******************************************************/
+    /* interpolate U                                       */
+    /*******************************************************/
+    src1 = buf_src1->pu1_u;
+    src2 = buf_src2->pu1_u;
+    dst  = buf_dst->pu1_u;
+    // Row 0-3
+    src1_r0 = _mm_loadl_epi64((__m128i *) (src1));
+    src1_r1 = _mm_loadl_epi64((__m128i *) (src1 + 8));
+    src1_r2 = _mm_loadl_epi64((__m128i *) (src1 + 2 * 8));
+    src1_r3 = _mm_loadl_epi64((__m128i *) (src1 + 3 * 8));
+
+    src2_r0 = _mm_loadl_epi64((__m128i *) (src2));
+    src2_r1 = _mm_loadl_epi64((__m128i *) (src2 + 8));
+    src2_r2 = _mm_loadl_epi64((__m128i *) (src2 + 2 * 8));
+    src2_r3 = _mm_loadl_epi64((__m128i *) (src2 + 3 * 8));
+
+    src1_r0 = _mm_avg_epu8 (src1_r0, src2_r0);
+    src1_r1 = _mm_avg_epu8 (src1_r1, src2_r1);
+    src1_r2 = _mm_avg_epu8 (src1_r2, src2_r2);
+    src1_r3 = _mm_avg_epu8 (src1_r3, src2_r3);
+
+    _mm_storel_epi64((__m128i *) dst, src1_r0);
+    _mm_storel_epi64((__m128i *) (dst + stride), src1_r1);
+    _mm_storel_epi64((__m128i *) (dst + 2 * stride), src1_r2);
+    _mm_storel_epi64((__m128i *) (dst + 3 * stride), src1_r3);
+
+    // Row 4-7
+    src1 += 4 * 8;
+    src2 += 4 * 8;
+    dst += 4 * stride;
+
+    src1_r0 = _mm_loadl_epi64((__m128i *) (src1));
+    src1_r1 = _mm_loadl_epi64((__m128i *) (src1 + 8));
+    src1_r2 = _mm_loadl_epi64((__m128i *) (src1 + 2 * 8));
+    src1_r3 = _mm_loadl_epi64((__m128i *) (src1 + 3 * 8));
+
+    src2_r0 = _mm_loadl_epi64((__m128i *) (src2));
+    src2_r1 = _mm_loadl_epi64((__m128i *) (src2 + 8));
+    src2_r2 = _mm_loadl_epi64((__m128i *) (src2 + 2 * 8));
+    src2_r3 = _mm_loadl_epi64((__m128i *) (src2 + 3 * 8));
+
+    src1_r0 = _mm_avg_epu8 (src1_r0, src2_r0);
+    src1_r1 = _mm_avg_epu8 (src1_r1, src2_r1);
+    src1_r2 = _mm_avg_epu8 (src1_r2, src2_r2);
+    src1_r3 = _mm_avg_epu8 (src1_r3, src2_r3);
+
+    _mm_storel_epi64((__m128i *) dst, src1_r0);
+    _mm_storel_epi64((__m128i *) (dst + stride), src1_r1);
+    _mm_storel_epi64((__m128i *) (dst + 2 * stride), src1_r2);
+    _mm_storel_epi64((__m128i *) (dst + 3 * stride), src1_r3);
+
+    /*******************************************************/
+    /* interpolate V                                       */
+    /*******************************************************/
+    src1 = buf_src1->pu1_v;
+    src2 = buf_src2->pu1_v;
+    dst  = buf_dst->pu1_v;
+
+    // Row 0-3
+    src1_r0 = _mm_loadl_epi64((__m128i *) (src1));
+    src1_r1 = _mm_loadl_epi64((__m128i *) (src1 + 8));
+    src1_r2 = _mm_loadl_epi64((__m128i *) (src1 + 2 * 8));
+    src1_r3 = _mm_loadl_epi64((__m128i *) (src1 + 3 * 8));
+
+    src2_r0 = _mm_loadl_epi64((__m128i *) (src2));
+    src2_r1 = _mm_loadl_epi64((__m128i *) (src2 + 8));
+    src2_r2 = _mm_loadl_epi64((__m128i *) (src2 + 2 * 8));
+    src2_r3 = _mm_loadl_epi64((__m128i *) (src2 + 3 * 8));
+
+    src1_r0 = _mm_avg_epu8 (src1_r0, src2_r0);
+    src1_r1 = _mm_avg_epu8 (src1_r1, src2_r1);
+    src1_r2 = _mm_avg_epu8 (src1_r2, src2_r2);
+    src1_r3 = _mm_avg_epu8 (src1_r3, src2_r3);
+
+    _mm_storel_epi64((__m128i *) dst, src1_r0);
+    _mm_storel_epi64((__m128i *) (dst + stride), src1_r1);
+    _mm_storel_epi64((__m128i *) (dst + 2 * stride), src1_r2);
+    _mm_storel_epi64((__m128i *) (dst + 3 * stride), src1_r3);
+
+    // Row 4-7
+    src1 += 4 * 8;
+    src2 += 4 * 8;
+    dst += 4 * stride;
+
+    src1_r0 = _mm_loadl_epi64((__m128i *) (src1));
+    src1_r1 = _mm_loadl_epi64((__m128i *) (src1 + 8));
+    src1_r2 = _mm_loadl_epi64((__m128i *) (src1 + 2 * 8));
+    src1_r3 = _mm_loadl_epi64((__m128i *) (src1 + 3 * 8));
+
+    src2_r0 = _mm_loadl_epi64((__m128i *) (src2));
+    src2_r1 = _mm_loadl_epi64((__m128i *) (src2 + 8));
+    src2_r2 = _mm_loadl_epi64((__m128i *) (src2 + 2 * 8));
+    src2_r3 = _mm_loadl_epi64((__m128i *) (src2 + 3 * 8));
+
+    src1_r0 = _mm_avg_epu8 (src1_r0, src2_r0);
+    src1_r1 = _mm_avg_epu8 (src1_r1, src2_r1);
+    src1_r2 = _mm_avg_epu8 (src1_r2, src2_r2);
+    src1_r3 = _mm_avg_epu8 (src1_r3, src2_r3);
+
+    _mm_storel_epi64((__m128i *) dst, src1_r0);
+    _mm_storel_epi64((__m128i *) (dst + stride), src1_r1);
+    _mm_storel_epi64((__m128i *) (dst + 2 * stride), src1_r2);
+    _mm_storel_epi64((__m128i *) (dst + 3 * stride), src1_r3);
+}
+
+/*****************************************************************************/
+/*                                                                           */
+/*  Function Name : impeg2_mc_halfx_halfy_8x8_sse42()                                 */
+/*                                                                           */
+/*  Description   : Gets the buffer from (0.5,0.5) to (8.5,8.5)              */
+/*                  and the above block of size 8 x 8 will be placed as a    */
+/*                  block from the current position of out_buf               */
+/*                                                                           */
+/*  Inputs        : ref - Reference frame from which the block will be       */
+/*                        block will be extracted.                           */
+/*                  ref_wid - WIdth of reference frame                       */
+/*                  out_wid - WIdth of the output frame                      */
+/*                  blk_width  - width of the block                          */
+/*                  blk_width  - height of the block                         */
+/*                                                                           */
+/*  Globals       : None                                                     */
+/*                                                                           */
+/*  Processing    : Point to the (0,0),(1,0),(0,1),(1,1) position in         */
+/*                  the ref frame.Interpolate these four values to get the   */
+/*                  value at(0.5,0.5).Repeat this to get an 8 x 8 block      */
+/*                  using 9 x 9 block from reference frame                   */
+/*                                                                           */
+/*  Outputs       : out -  Output containing the extracted block             */
+/*                                                                           */
+/*  Returns       : None                                                     */
+/*                                                                           */
+/*  Issues        : None                                                     */
+/*                                                                           */
+/*****************************************************************************/
+void impeg2_mc_halfx_halfy_8x8_sse42(UWORD8 *out,
+                            UWORD8 *ref,
+                            UWORD32 ref_wid,
+                            UWORD32 out_wid)
+{
+    UWORD8 *ref_p0,*ref_p1,*ref_p2,*ref_p3;
+    /* P0-P3 are the pixels in the reference frame and Q is the value being */
+    /* estimated                                                            */
+    /*
+       P0 P1
+         Q
+       P2 P3
+    */
+    __m128i src_r0, src_r0_1, src_r1, src_r1_1;
+    __m128i tmp0, tmp1;
+    __m128i value_2 = _mm_set1_epi16(2);
+
+    ref_p0 = ref;
+    ref_p1 = ref + 1;
+    ref_p2 = ref + ref_wid;
+    ref_p3 = ref + ref_wid + 1;
+
+    src_r0 = _mm_loadl_epi64((__m128i *) (ref_p0));     //Row 0
+    src_r0_1 = _mm_loadl_epi64((__m128i *) (ref_p1));
+    src_r1 = _mm_loadl_epi64((__m128i *) (ref_p2));     //Row 1
+    src_r1_1 = _mm_loadl_epi64((__m128i *) (ref_p3));
+
+    src_r0 =  _mm_cvtepu8_epi16(src_r0);
+    src_r0_1 =  _mm_cvtepu8_epi16(src_r0_1);
+    src_r1 =  _mm_cvtepu8_epi16(src_r1);
+    src_r1_1 =  _mm_cvtepu8_epi16(src_r1_1);
+
+    tmp0 = _mm_add_epi16(src_r0, src_r0_1);             //Row 0 horizontal interpolation
+    tmp1 = _mm_add_epi16(src_r1, src_r1_1);             //Row 1 horizontal interpolation
+    tmp0 = _mm_add_epi16(tmp0, tmp1);                   //Row 0 vertical interpolation
+    tmp0 = _mm_add_epi16(tmp0, value_2);
+    tmp0 =  _mm_srli_epi16(tmp0, 2);
+    tmp0 = _mm_packus_epi16(tmp0, value_2);
+
+    _mm_storel_epi64((__m128i *)out, tmp0);
+
+    //Row 1
+    ref_p2 += ref_wid;
+    ref_p3 += ref_wid;
+    out += out_wid;
+
+    src_r0 = _mm_loadl_epi64((__m128i *) (ref_p2));     //Row 2
+    src_r0_1 = _mm_loadl_epi64((__m128i *) (ref_p3));
+
+    src_r0 =  _mm_cvtepu8_epi16(src_r0);
+    src_r0_1 =  _mm_cvtepu8_epi16(src_r0_1);
+
+    tmp0 = _mm_add_epi16(src_r0, src_r0_1);         //Row 2 horizontal interpolation
+    tmp1 = _mm_add_epi16(tmp0, tmp1);               //Row 1 vertical interpolation
+    tmp1 = _mm_add_epi16(tmp1, value_2);
+    tmp1 =  _mm_srli_epi16(tmp1, 2);
+    tmp1 = _mm_packus_epi16(tmp1, value_2);
+
+    _mm_storel_epi64((__m128i *)out, tmp1);
+
+    //Row 2
+    ref_p2 += ref_wid;
+    ref_p3 += ref_wid;
+    out += out_wid;
+
+    src_r0 = _mm_loadl_epi64((__m128i *) (ref_p2));     //Row 3
+    src_r0_1 = _mm_loadl_epi64((__m128i *) (ref_p3));
+
+    src_r0 =  _mm_cvtepu8_epi16(src_r0);
+    src_r0_1 =  _mm_cvtepu8_epi16(src_r0_1);
+
+    tmp1 = _mm_add_epi16(src_r0, src_r0_1);         //Row 3 horizontal interpolation
+
+    tmp0 = _mm_add_epi16(tmp0, tmp1);               //Row 2 vertical interpolation
+    tmp0 = _mm_add_epi16(tmp0, value_2);
+    tmp0 =  _mm_srli_epi16(tmp0, 2);
+    tmp0 = _mm_packus_epi16(tmp0, value_2);
+
+    _mm_storel_epi64((__m128i *)out, tmp0);
+
+    //Row 3
+    ref_p2 += ref_wid;
+    ref_p3 += ref_wid;
+    out += out_wid;
+
+    src_r0 = _mm_loadl_epi64((__m128i *) (ref_p2));     //Row 4
+    src_r0_1 = _mm_loadl_epi64((__m128i *) (ref_p3));
+
+    src_r0 =  _mm_cvtepu8_epi16(src_r0);
+    src_r0_1 =  _mm_cvtepu8_epi16(src_r0_1);
+
+    tmp0 = _mm_add_epi16(src_r0, src_r0_1);         //Row 4 horizontal interpolation
+
+    tmp1 = _mm_add_epi16(tmp0, tmp1);               //Row 3 vertical interpolation
+    tmp1 = _mm_add_epi16(tmp1, value_2);
+    tmp1 =  _mm_srli_epi16(tmp1, 2);
+    tmp1 = _mm_packus_epi16(tmp1, value_2);
+
+    _mm_storel_epi64((__m128i *)out, tmp1);
+
+    //Row 4
+    ref_p2 += ref_wid;
+    ref_p3 += ref_wid;
+    out += out_wid;
+
+    src_r0 = _mm_loadl_epi64((__m128i *) (ref_p2));     //Row 5
+    src_r0_1 = _mm_loadl_epi64((__m128i *) (ref_p3));
+
+    src_r0 =  _mm_cvtepu8_epi16(src_r0);
+    src_r0_1 =  _mm_cvtepu8_epi16(src_r0_1);
+
+    tmp1 = _mm_add_epi16(src_r0, src_r0_1);     //Row 5 horizontal interpolation
+
+    tmp0 = _mm_add_epi16(tmp0, tmp1);           //Row 4 vertical interpolation
+    tmp0 = _mm_add_epi16(tmp0, value_2);
+    tmp0 =  _mm_srli_epi16(tmp0, 2);
+    tmp0 = _mm_packus_epi16(tmp0, value_2);
+
+    _mm_storel_epi64((__m128i *)out, tmp0);
+
+    //Row 5
+    ref_p2 += ref_wid;
+    ref_p3 += ref_wid;
+    out += out_wid;
+
+    src_r0 = _mm_loadl_epi64((__m128i *) (ref_p2));     //Row 6
+    src_r0_1 = _mm_loadl_epi64((__m128i *) (ref_p3));
+
+    src_r0 =  _mm_cvtepu8_epi16(src_r0);
+    src_r0_1 =  _mm_cvtepu8_epi16(src_r0_1);
+
+    tmp0 = _mm_add_epi16(src_r0, src_r0_1);             //Row 6 horizontal interpolation
+
+    tmp1 = _mm_add_epi16(tmp0, tmp1);                   //Row 5 vertical interpolation
+    tmp1 = _mm_add_epi16(tmp1, value_2);
+    tmp1 =  _mm_srli_epi16(tmp1, 2);
+    tmp1 = _mm_packus_epi16(tmp1, value_2);
+
+    _mm_storel_epi64((__m128i *)out, tmp1);
+
+    //Row 6
+    ref_p2 += ref_wid;
+    ref_p3 += ref_wid;
+    out += out_wid;
+
+    src_r0 = _mm_loadl_epi64((__m128i *) (ref_p2));     //Row 7
+    src_r0_1 = _mm_loadl_epi64((__m128i *) (ref_p3));
+
+    src_r0 =  _mm_cvtepu8_epi16(src_r0);
+    src_r0_1 =  _mm_cvtepu8_epi16(src_r0_1);
+
+    tmp1 = _mm_add_epi16(src_r0, src_r0_1);             //Row 7 horizontal interpolation
+
+    tmp0 = _mm_add_epi16(tmp0, tmp1);                   //Row 6 vertical interpolation
+    tmp0 = _mm_add_epi16(tmp0, value_2);
+    tmp0 =  _mm_srli_epi16(tmp0, 2);
+    tmp0 = _mm_packus_epi16(tmp0, value_2);
+
+    _mm_storel_epi64((__m128i *)out, tmp0);
+
+    //Row 7
+    ref_p2 += ref_wid;
+    ref_p3 += ref_wid;
+    out += out_wid;
+
+    src_r0 = _mm_loadl_epi64((__m128i *) (ref_p2));     //Row 8
+    src_r0_1 = _mm_loadl_epi64((__m128i *) (ref_p3));
+
+    src_r0 =  _mm_cvtepu8_epi16(src_r0);
+    src_r0_1 =  _mm_cvtepu8_epi16(src_r0_1);
+
+    tmp0 = _mm_add_epi16(src_r0, src_r0_1);             //Row 8 horizontal interpolation
+
+    tmp1 = _mm_add_epi16(tmp0, tmp1);                   //Row 7 vertical interpolation
+    tmp1 = _mm_add_epi16(tmp1, value_2);
+    tmp1 =  _mm_srli_epi16(tmp1, 2);
+    tmp1 = _mm_packus_epi16(tmp1, value_2);
+
+    _mm_storel_epi64((__m128i *)out, tmp1);
+
+    return;
+}
+
+/*****************************************************************************/
+/*                                                                           */
+/*  Function Name : impeg2_mc_halfx_fully_8x8_sse42()                                 */
+/*                                                                           */
+/*  Description   : Gets the buffer from (0.5,0) to (8.5,8)                  */
+/*                  and the above block of size 8 x 8 will be placed as a    */
+/*                  block from the current position of out_buf               */
+/*                                                                           */
+/*  Inputs        : ref - Reference frame from which the block will be       */
+/*                        block will be extracted.                           */
+/*                  ref_wid - WIdth of reference frame                       */
+/*                  out_wid - WIdth of the output frame                      */
+/*                  blk_width  - width of the block                          */
+/*                  blk_width  - height of the block                         */
+/*                                                                           */
+/*  Globals       : None                                                     */
+/*                                                                           */
+/*  Processing    : Point to the (0,0) and (1,0) position in the ref frame   */
+/*                  Interpolate these two values to get the value at(0.5,0)  */
+/*                  Repeat this to get an 8 x 8 block using 9 x 8 block from */
+/*                  reference frame                                          */
+/*                                                                           */
+/*  Outputs       : out -  Output containing the extracted block             */
+/*                                                                           */
+/*  Returns       : None                                                     */
+/*                                                                           */
+/*  Issues        : None                                                     */
+/*                                                                           */
+/*****************************************************************************/
+void impeg2_mc_halfx_fully_8x8_sse42(UWORD8 *out,
+                            UWORD8 *ref,
+                            UWORD32 ref_wid,
+                            UWORD32 out_wid)
+{
+    UWORD8 *ref_p0,*ref_p1;
+    __m128i src_r0, src_r0_1, src_r1, src_r1_1;
+    /* P0-P3 are the pixels in the reference frame and Q is the value being */
+    /* estimated                                                            */
+    /*
+       P0 Q P1
+    */
+
+    ref_p0 = ref;
+    ref_p1 = ref + 1;
+
+    // Row 0 and 1
+    src_r0 = _mm_loadl_epi64((__m128i *) (ref_p0));     //Row 0
+    src_r0_1 = _mm_loadl_epi64((__m128i *) (ref_p1));
+    src_r1 = _mm_loadl_epi64((__m128i *) (ref_p0 + ref_wid));       //Row 1
+    src_r1_1 = _mm_loadl_epi64((__m128i *) (ref_p1 + ref_wid));
+
+    src_r0 = _mm_avg_epu8(src_r0, src_r0_1);
+    src_r1 = _mm_avg_epu8(src_r1, src_r1_1);
+
+    _mm_storel_epi64((__m128i *)out, src_r0);
+    _mm_storel_epi64((__m128i *)(out + out_wid), src_r1);
+
+    // Row 2 and 3
+    ref_p0 += 2*ref_wid;
+    ref_p1 += 2*ref_wid;
+    out += 2*out_wid;
+
+    src_r0 = _mm_loadl_epi64((__m128i *) (ref_p0));     //Row 2
+    src_r0_1 = _mm_loadl_epi64((__m128i *) (ref_p1));
+    src_r1 = _mm_loadl_epi64((__m128i *) (ref_p0 + ref_wid));       //Row 3
+    src_r1_1 = _mm_loadl_epi64((__m128i *) (ref_p1 + ref_wid));
+
+    src_r0 = _mm_avg_epu8(src_r0, src_r0_1);
+    src_r1 = _mm_avg_epu8(src_r1, src_r1_1);
+
+    _mm_storel_epi64((__m128i *)out, src_r0);
+    _mm_storel_epi64((__m128i *)(out + out_wid), src_r1);
+
+    // Row 4 and 5
+    ref_p0 += 2*ref_wid;
+    ref_p1 += 2*ref_wid;
+    out += 2*out_wid;
+
+    src_r0 = _mm_loadl_epi64((__m128i *) (ref_p0));     //Row 4
+    src_r0_1 = _mm_loadl_epi64((__m128i *) (ref_p1));
+    src_r1 = _mm_loadl_epi64((__m128i *) (ref_p0 + ref_wid));       //Row 5
+    src_r1_1 = _mm_loadl_epi64((__m128i *) (ref_p1 + ref_wid));
+
+    src_r0 = _mm_avg_epu8(src_r0, src_r0_1);
+    src_r1 = _mm_avg_epu8(src_r1, src_r1_1);
+
+    _mm_storel_epi64((__m128i *)out, src_r0);
+    _mm_storel_epi64((__m128i *)(out + out_wid), src_r1);
+
+    // Row 6 and 7
+    ref_p0 += 2*ref_wid;
+    ref_p1 += 2*ref_wid;
+    out += 2*out_wid;
+
+    src_r0 = _mm_loadl_epi64((__m128i *) (ref_p0));     //Row 6
+    src_r0_1 = _mm_loadl_epi64((__m128i *) (ref_p1));
+    src_r1 = _mm_loadl_epi64((__m128i *) (ref_p0 + ref_wid));       //Row 7
+    src_r1_1 = _mm_loadl_epi64((__m128i *) (ref_p1 + ref_wid));
+
+    src_r0 = _mm_avg_epu8(src_r0, src_r0_1);
+    src_r1 = _mm_avg_epu8(src_r1, src_r1_1);
+
+    _mm_storel_epi64((__m128i *)out, src_r0);
+    _mm_storel_epi64((__m128i *)(out + out_wid), src_r1);
+
+    return;
+}
+
+
+/*****************************************************************************/
+/*                                                                           */
+/*  Function Name : impeg2_mc_fullx_halfy_8x8_sse42()                                 */
+/*                                                                           */
+/*  Description   : Gets the buffer from (0,0.5) to (8,8.5)                  */
+/*                  and the above block of size 8 x 8 will be placed as a    */
+/*                  block from the current position of out_buf               */
+/*                                                                           */
+/*  Inputs        : ref - Reference frame from which the block will be       */
+/*                        block will be extracted.                           */
+/*                  ref_wid - WIdth of reference frame                       */
+/*                  out_wid - WIdth of the output frame                      */
+/*                  blk_width  - width of the block                          */
+/*                  blk_width  - height of the block                         */
+/*                                                                           */
+/*  Globals       : None                                                     */
+/*                                                                           */
+/*  Processing    : Point to the (0,0) and (0,1)   position in the ref frame */
+/*                  Interpolate these two values to get the value at(0,0.5)  */
+/*                  Repeat this to get an 8 x 8 block using 8 x 9 block from */
+/*                  reference frame                                          */
+/*                                                                           */
+/*  Outputs       : out -  Output containing the extracted block             */
+/*                                                                           */
+/*  Returns       : None                                                     */
+/*                                                                           */
+/*  Issues        : None                                                     */
+/*                                                                           */
+/*****************************************************************************/
+void impeg2_mc_fullx_halfy_8x8_sse42(UWORD8 *out,
+                            UWORD8 *ref,
+                            UWORD32 ref_wid,
+                            UWORD32 out_wid)
+{
+    __m128i src_r0, src_r1, src_r2, temp0, temp1;
+    /* P0-P3 are the pixels in the reference frame and Q is the value being */
+    /* estimated                                                            */
+    /*
+       P0
+        x
+       P1
+    */
+    src_r0 = _mm_loadl_epi64((__m128i *)ref);               //Row 0
+    src_r1 = _mm_loadl_epi64((__m128i *)(ref + ref_wid));   //Row 1
+    src_r2 = _mm_loadl_epi64((__m128i *)(ref + 2 * ref_wid));   //Row 2
+    temp0 = _mm_avg_epu8(src_r0, src_r1);
+    temp1 = _mm_avg_epu8(src_r1, src_r2);
+    _mm_storel_epi64((__m128i *)out, temp0);                //Row 0
+    _mm_storel_epi64((__m128i *)(out + out_wid), temp1);    //Row 1
+
+    ref+= 3*ref_wid;
+    out+= 2*out_wid;
+
+    src_r0 = _mm_loadl_epi64((__m128i *)ref);               //Row 3
+    src_r1 = _mm_loadl_epi64((__m128i *)(ref + ref_wid));   //Row 4
+    temp0 = _mm_avg_epu8(src_r2, src_r0);
+    temp1 = _mm_avg_epu8(src_r0, src_r1);
+    _mm_storel_epi64((__m128i *)out, temp0);                //Row 2
+    _mm_storel_epi64((__m128i *)(out + out_wid), temp1);    //Row 3
+
+    ref += 2*ref_wid;
+    out+= 2*out_wid;
+
+    src_r2 = _mm_loadl_epi64((__m128i *)ref);               //Row 5
+    src_r0 = _mm_loadl_epi64((__m128i *)(ref + ref_wid));   //Row 6
+    temp0 = _mm_avg_epu8(src_r1, src_r2);
+    temp1 = _mm_avg_epu8(src_r2, src_r0);
+    _mm_storel_epi64((__m128i *)out, temp0);                //Row 4
+    _mm_storel_epi64((__m128i *)(out + out_wid), temp1);    //Row 5
+
+    ref += 2*ref_wid;
+    out+= 2*out_wid;
+
+    src_r1 = _mm_loadl_epi64((__m128i *)ref);               //Row 7
+    src_r2 = _mm_loadl_epi64((__m128i *) (ref + ref_wid));  //Row 8
+    temp0 = _mm_avg_epu8(src_r0, src_r1);
+    temp1 = _mm_avg_epu8(src_r1, src_r2);
+    _mm_storel_epi64((__m128i *)out, temp0);                //Row 6
+    _mm_storel_epi64((__m128i *)(out + out_wid), temp1);    //Row 7
+
+    return;
+}
+
+/*****************************************************************************/
+/*                                                                           */
+/*  Function Name : impeg2_mc_fullx_fully_8x8_sse42()                                 */
+/*                                                                           */
+/*  Description   : Gets the buffer from (x,y) to (x+8,y+8)                  */
+/*                  and the above block of size 8 x 8 will be placed as a    */
+/*                  block from the current position of out_buf               */
+/*                                                                           */
+/*  Inputs        : ref - Reference frame from which the block will be       */
+/*                        block will be extracted.                           */
+/*                  ref_wid - WIdth of reference frame                       */
+/*                  out_wid - WIdth of the output frame                      */
+/*                  blk_width  - width of the block                          */
+/*                  blk_width  - height of the block                         */
+/*                                                                           */
+/*  Globals       : None                                                     */
+/*                                                                           */
+/*  Processing    : Point to the (0,0) position in the ref frame             */
+/*                  Get an 8 x 8 block from reference frame                  */
+/*                                                                           */
+/*  Outputs       : out -  Output containing the extracted block             */
+/*                                                                           */
+/*  Returns       : None                                                     */
+/*                                                                           */
+/*  Issues        : None                                                     */
+/*                                                                           */
+/*****************************************************************************/
+void impeg2_mc_fullx_fully_8x8_sse42(UWORD8 *out,
+                            UWORD8 *ref,
+                            UWORD32 ref_wid,
+                            UWORD32 out_wid)
+{
+    __m128i src_r0, src_r1, src_r2, src_r3;
+    // Row 0-3
+    src_r0 =  _mm_loadl_epi64((__m128i *)ref);
+    src_r1 =  _mm_loadl_epi64((__m128i *)(ref + ref_wid));
+    src_r2 =  _mm_loadl_epi64((__m128i *)(ref + 2 * ref_wid));
+    src_r3 =  _mm_loadl_epi64((__m128i *)(ref + 3 * ref_wid));
+
+    _mm_storel_epi64((__m128i *)out, src_r0);
+    _mm_storel_epi64((__m128i *)(out + out_wid), src_r1);
+    _mm_storel_epi64((__m128i *)(out + 2 * out_wid), src_r2);
+    _mm_storel_epi64((__m128i *)(out + 3 * out_wid), src_r3);
+
+    // Row 4-7
+    ref += 4 * ref_wid;
+    out += 4 * out_wid;
+
+    src_r0 =  _mm_loadl_epi64((__m128i *)ref);
+    src_r1 =  _mm_loadl_epi64((__m128i *)(ref + ref_wid));
+    src_r2 =  _mm_loadl_epi64((__m128i *)(ref + 2 * ref_wid));
+    src_r3 =  _mm_loadl_epi64((__m128i *)(ref + 3 * ref_wid));
+
+    _mm_storel_epi64((__m128i *)out, src_r0);
+    _mm_storel_epi64((__m128i *)(out + out_wid), src_r1);
+    _mm_storel_epi64((__m128i *)(out + 2 * out_wid), src_r2);
+    _mm_storel_epi64((__m128i *)(out + 3 * out_wid), src_r3);
+    return;
+}

diff --git a/common/x86/impeg2_mem_func_sse42_intr.c b/common/x86/impeg2_mem_func_sse42_intr.c
new file mode 100644
index 0000000..de7de8f
--- /dev/null
+++ b/common/x86/impeg2_mem_func_sse42_intr.c

@@ -0,0 +1,100 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+
+/**
+ *******************************************************************************
+ * @file
+ *  impeg2_mem_func_sse42_intr.c
+ *
+ * @brief
+ *  Contains utility function definitions for MPEG2 codec
+ *
+ * @author
+ *  Mohit [100664]
+ *
+* @par List of Functions:
+* - impeg2_memset0_16bit_8x8_linear_block_sse42()
+* - impeg2_memset_8bit_8x8_block_sse42()
+ *
+ * @remarks
+ *  None
+ *
+ *******************************************************************************
+ */
+#include <stdio.h>
+#include <string.h>
+#include "iv_datatypedef.h"
+#include "impeg2_defs.h"
+
+#include <immintrin.h>
+#include <emmintrin.h>
+#include <smmintrin.h>
+#include <tmmintrin.h>
+
+/*******************************************************************************
+*  Function Name   : impeg2_memset0_16bit_8x8_linear_block
+*
+*  Description     : memsets resudial buf to 0
+*
+*  Arguments       : destination buffer
+*
+*  Values Returned : None
+*******************************************************************************/
+
+
+void impeg2_memset0_16bit_8x8_linear_block_sse42 (WORD16 *buf)
+ {
+    __m128i zero_8x8_16b = _mm_set1_epi16(0);
+    _mm_storeu_si128((__m128i *) buf, zero_8x8_16b);
+    _mm_storeu_si128((__m128i *) (buf + 8), zero_8x8_16b);
+    _mm_storeu_si128((__m128i *) (buf + 16), zero_8x8_16b);
+    _mm_storeu_si128((__m128i *) (buf + 24), zero_8x8_16b);
+    _mm_storeu_si128((__m128i *) (buf + 32), zero_8x8_16b);
+    _mm_storeu_si128((__m128i *) (buf + 40), zero_8x8_16b);
+    _mm_storeu_si128((__m128i *) (buf + 48), zero_8x8_16b);
+    _mm_storeu_si128((__m128i *) (buf + 56), zero_8x8_16b);
+}
+
+
+
+/*******************************************************************************
+*  Function Name   : impeg2_memset_8bit_8x8_block
+*
+*  Description     : memsets residual buf to value
+*
+*  Arguments       : destination buffer, value and stride
+*
+*  Values Returned : None
+*******************************************************************************/
+
+
+void impeg2_memset_8bit_8x8_block_sse42(UWORD8 *dst, WORD32 dc_val, WORD32 dst_wd)
+{
+    __m128i value = _mm_set1_epi8((WORD8)dc_val);
+
+    _mm_storel_epi64((__m128i *)dst, value);
+    _mm_storel_epi64((__m128i *) (dst + dst_wd), value);
+    _mm_storel_epi64((__m128i *) (dst + 2 * dst_wd), value);
+    _mm_storel_epi64((__m128i *) (dst + 3 * dst_wd), value);
+    _mm_storel_epi64((__m128i *) (dst + 4 * dst_wd), value);
+    _mm_storel_epi64((__m128i *) (dst + 5 * dst_wd), value);
+    _mm_storel_epi64((__m128i *) (dst + 6 * dst_wd), value);
+    _mm_storel_epi64((__m128i *) (dst + 7 * dst_wd), value);
+}

diff --git a/common/x86/impeg2_platform_macros.h b/common/x86/impeg2_platform_macros.h
new file mode 100644
index 0000000..05ff6da
--- /dev/null
+++ b/common/x86/impeg2_platform_macros.h

@@ -0,0 +1,49 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+#ifndef __IMPEG2_PLATFORM_MACROS_H__
+#define __IMPEG2_PLATFORM_MACROS_H__
+
+
+#define     CONV_LE_TO_BE(u4_temp2,u4_temp1)    u4_temp2 = (u4_temp1 << 24) |               \
+                                                           ((u4_temp1 & 0xff00) << 8) |     \
+                                                           ((u4_temp1 & 0xff0000) >> 8) |   \
+                                                           (u4_temp1 >> 24);
+static __inline UWORD32 CLZ(UWORD32 u4_word)
+{
+    if(u4_word)
+        return (__builtin_clz(u4_word));
+    else
+        return 32;
+}
+
+
+#define CLIP_U8(x) ((x) > 255) ? (255) : (((x) < 0) ? (0) : (x))
+#define CLIP_S8(x) ((x) > 127) ? (127) : (((x) < -128) ? (-128) : (x))
+
+#define CLIP_U12(x) ((x) > 4095) ? (4095) : (((x) < 0) ? (0) : (x))
+#define CLIP_S12(x) ((x) > 2047) ? (2047) : (((x) < -2048) ? (-2048) : (x))
+
+#define CLIP_U16(x) ((x) > 65535) ? (65535) : (((x) < 0) ? (0) : (x))
+#define CLIP_S16(x) ((x) > 65535) ? (65535) : (((x) < -65536) ? (-65536) : (x))
+#define PLD(x)
+
+#define INLINE
+
+#endif /* __IMPEG2_PLATFORM_MACROS_H__ */

diff --git a/decoder.arm.mk b/decoder.arm.mk
new file mode 100644
index 0000000..6e02ce8
--- /dev/null
+++ b/decoder.arm.mk

@@ -0,0 +1,22 @@
+libmpeg2d_inc_dir_arm   +=  $(LOCAL_PATH)/decoder/arm
+libmpeg2d_inc_dir_arm   +=  $(LOCAL_PATH)/common/arm
+
+libmpeg2d_srcs_c_arm    +=  decoder/arm/impeg2d_function_selector.c
+libmpeg2d_cflags_arm    += -DDISABLE_NEONINTR  -DARM -DARMGCC
+
+LOCAL_ARM_MODE         := arm
+
+ifeq ($(ARCH_ARM_HAVE_NEON),true)
+libmpeg2d_srcs_c_arm    +=  decoder/arm/impeg2d_function_selector_a9q.c
+libmpeg2d_srcs_asm_arm    +=  common/arm/impeg2_format_conv.s
+libmpeg2d_srcs_asm_arm    +=  common/arm/impeg2_idct.s
+libmpeg2d_srcs_asm_arm    +=  common/arm/impeg2_inter_pred.s
+libmpeg2d_srcs_asm_arm    +=  common/arm/impeg2_mem_func.s
+libmpeg2d_cflags_arm += -DDEFAULT_ARCH=D_ARCH_ARM_A9Q
+else
+libmpeg2d_cflags_arm += -DDISABLE_NEON -DDEFAULT_ARCH=D_ARCH_ARM_NONEON
+endif
+
+LOCAL_SRC_FILES_arm += $(libmpeg2d_srcs_c_arm) $(libmpeg2d_srcs_asm_arm)
+LOCAL_C_INCLUDES_arm += $(libmpeg2d_inc_dir_arm)
+LOCAL_CFLAGS_arm += $(libmpeg2d_cflags_arm)

diff --git a/decoder.arm64.mk b/decoder.arm64.mk
new file mode 100644
index 0000000..f3a36e6
--- /dev/null
+++ b/decoder.arm64.mk

@@ -0,0 +1,31 @@
+libmpeg2d_cflags_arm64 += -DARMV8
+libmpeg2d_cflags_arm64 += -DDISABLE_NEONINTR  -DARM -DARMGCC
+
+libmpeg2d_inc_dir_arm64   +=  $(LOCAL_PATH)/decoder/arm
+libmpeg2d_inc_dir_arm64   +=  $(LOCAL_PATH)/common/armv8
+
+libmpeg2d_srcs_c_arm64    +=  decoder/arm/impeg2d_function_selector.c
+
+ifeq ($(ARCH_ARM_HAVE_NEON),true)
+libmpeg2d_srcs_c_arm64      +=  decoder/arm/impeg2d_function_selector_av8.c
+
+libmpeg2d_srcs_asm_arm64    +=  common/armv8/impeg2_neon_macros.s
+libmpeg2d_srcs_asm_arm64    +=  common/armv8/impeg2_format_conv.s
+libmpeg2d_srcs_asm_arm64    +=  common/armv8/impeg2_idct.s
+libmpeg2d_srcs_asm_arm64    +=  common/armv8/impeg2_inter_pred.s
+libmpeg2d_srcs_asm_arm64    +=  common/armv8/impeg2_mem_func.s
+libmpeg2d_cflags_arm += -DDEFAULT_ARCH=D_ARCH_ARMV8_GENERIC
+else
+libmpeg2d_cflags_arm64 += -DDISABLE_NEON -DDEFAULT_ARCH=D_ARCH_ARM_NONEON
+endif
+
+
+
+
+LOCAL_SRC_FILES_arm64 += $(libmpeg2d_srcs_c_arm64) $(libmpeg2d_srcs_asm_arm64)
+LOCAL_C_INCLUDES_arm64 += $(libmpeg2d_inc_dir_arm64)
+
+comma := ,
+LOCAL_ASFLAGS_arm64 += $(addprefix -Wa$(comma)-I,$(libmpeg2d_inc_dir_arm64))
+
+LOCAL_CFLAGS_arm64 += $(libmpeg2d_cflags_arm64)

diff --git a/decoder.mips.mk b/decoder.mips.mk
new file mode 100644
index 0000000..514eb9c
--- /dev/null
+++ b/decoder.mips.mk

@@ -0,0 +1,6 @@
+libmpeg2d_inc_dir_mips  +=  $(LOCAL_PATH)/common/mips
+
+libmpeg2d_srcs_c_mips   +=  decoder/mips/impeg2d_function_selector.c
+
+LOCAL_C_INCLUDES_mips   += $(libmpeg2d_inc_dir_mips)
+LOCAL_SRC_FILES_mips    += $(libmpeg2d_srcs_c_mips)

diff --git a/decoder.mips64.mk b/decoder.mips64.mk
new file mode 100644
index 0000000..5a3bdbc
--- /dev/null
+++ b/decoder.mips64.mk

@@ -0,0 +1,6 @@
+libmpeg2d_inc_dir_mips64    +=  $(LOCAL_PATH)/common/mips
+
+libmpeg2d_srcs_c_mips64     +=  decoder/mips/impeg2d_function_selector.c
+
+LOCAL_C_INCLUDES_mips64     += $(libmpeg2d_inc_dir_mips)
+LOCAL_SRC_FILES_mips64      += $(libmpeg2d_srcs_c_mips)

diff --git a/decoder.mk b/decoder.mk
new file mode 100644
index 0000000..7edccc2
--- /dev/null
+++ b/decoder.mk

@@ -0,0 +1,55 @@
+LOCAL_PATH := $(call my-dir)
+include $(CLEAR_VARS)
+
+libmpeg2d_source_dir := $(LOCAL_PATH)
+
+## Arch-common settings
+LOCAL_MODULE := libmpeg2dec
+#LOCAL_32_BIT_ONLY := true
+
+LOCAL_MODULE_CLASS := STATIC_LIBRARIES
+
+LOCAL_CFLAGS += -D_LIB -DMULTICORE -fPIC
+LOCAL_CFLAGS += -O3 -DANDROID
+
+LOCAL_C_INCLUDES := $(LOCAL_PATH)/decoder $(LOCAL_PATH)/common
+
+libmpeg2d_srcs_c    += common/impeg2_buf_mgr.c
+libmpeg2d_srcs_c    += common/impeg2_disp_mgr.c
+libmpeg2d_srcs_c    += common/impeg2_format_conv.c
+libmpeg2d_srcs_c    += common/impeg2_globals.c
+libmpeg2d_srcs_c    += common/impeg2_idct.c
+libmpeg2d_srcs_c    += common/impeg2_inter_pred.c
+libmpeg2d_srcs_c    += common/impeg2_job_queue.c
+libmpeg2d_srcs_c    += common/impeg2_mem_func.c
+
+libmpeg2d_srcs_c    += common/ithread.c
+
+libmpeg2d_srcs_c    += decoder/impeg2d_api_main.c
+libmpeg2d_srcs_c    += decoder/impeg2d_bitstream.c
+libmpeg2d_srcs_c    += decoder/impeg2d_debug.c
+libmpeg2d_srcs_c    += decoder/impeg2d_dec_hdr.c
+libmpeg2d_srcs_c    += decoder/impeg2d_decoder.c
+libmpeg2d_srcs_c    += decoder/impeg2d_d_pic.c
+libmpeg2d_srcs_c    += decoder/impeg2d_function_selector_generic.c
+libmpeg2d_srcs_c    += decoder/impeg2d_globals.c
+libmpeg2d_srcs_c    += decoder/impeg2d_i_pic.c
+libmpeg2d_srcs_c    += decoder/impeg2d_mc.c
+libmpeg2d_srcs_c    += decoder/impeg2d_mv_dec.c
+libmpeg2d_srcs_c    += decoder/impeg2d_pic_proc.c
+libmpeg2d_srcs_c    += decoder/impeg2d_pnb_pic.c
+libmpeg2d_srcs_c    += decoder/impeg2d_vld.c
+libmpeg2d_srcs_c    += decoder/impeg2d_vld_tables.c
+
+LOCAL_SRC_FILES := $(libmpeg2d_srcs_c) $(libmpeg2d_srcs_asm)
+
+
+# Load the arch-specific settings
+include $(LOCAL_PATH)/decoder.arm.mk
+include $(LOCAL_PATH)/decoder.arm64.mk
+include $(LOCAL_PATH)/decoder.x86.mk
+include $(LOCAL_PATH)/decoder.x86_64.mk
+include $(LOCAL_PATH)/decoder.mips.mk
+include $(LOCAL_PATH)/decoder.mips64.mk
+
+include $(BUILD_STATIC_LIBRARY)

diff --git a/decoder.x86.mk b/decoder.x86.mk
new file mode 100644
index 0000000..ff6344c
--- /dev/null
+++ b/decoder.x86.mk

@@ -0,0 +1,21 @@
+libmpeg2d_cflags_x86 += -DX86 -DDISABLE_AVX2 -m32 -msse4.2 -mno-avx -DDEFAULT_ARCH=D_ARCH_X86_SSE42
+
+libmpeg2d_inc_dir_x86   +=  $(LOCAL_PATH)/decoder/x86
+libmpeg2d_inc_dir_x86   +=  $(LOCAL_PATH)/common/x86
+
+libmpeg2d_srcs_c_x86    +=  decoder/x86/impeg2d_function_selector.c
+libmpeg2d_srcs_c_x86    +=  decoder/x86/impeg2d_function_selector_avx2.c
+libmpeg2d_srcs_c_x86    +=  decoder/x86/impeg2d_function_selector_ssse3.c
+libmpeg2d_srcs_c_x86    +=  decoder/x86/impeg2d_function_selector_sse42.c
+
+
+libmpeg2d_srcs_c_x86    +=  common/x86/impeg2_idct_recon_sse42_intr.c
+libmpeg2d_srcs_c_x86    +=  common/x86/impeg2_inter_pred_sse42_intr.c
+libmpeg2d_srcs_c_x86    +=  common/x86/impeg2_mem_func_sse42_intr.c
+
+LOCAL_SRC_FILES_x86 += $(libmpeg2d_srcs_c_x86) $(libmpeg2d_srcs_asm_x86)
+LOCAL_C_INCLUDES_x86 += $(libmpeg2d_inc_dir_x86)
+LOCAL_CFLAGS_x86 += $(libmpeg2d_cflags_x86)
+
+
+

diff --git a/decoder.x86_64.mk b/decoder.x86_64.mk
new file mode 100644
index 0000000..72c1820
--- /dev/null
+++ b/decoder.x86_64.mk

@@ -0,0 +1,21 @@
+libmpeg2d_cflags_x86_64 += -DX86 -DDISABLE_AVX2 -m64 -msse4.2 -mno-avx  -DDEFAULT_ARCH=D_ARCH_X86_SSE42
+
+libmpeg2d_inc_dir_x86_64   +=  $(LOCAL_PATH)/decoder/x86
+libmpeg2d_inc_dir_x86_64   +=  $(LOCAL_PATH)/common/x86
+
+libmpeg2d_srcs_c_x86_64     +=  decoder/x86/impeg2d_function_selector.c
+libmpeg2d_srcs_c_x86_64     +=  decoder/x86/impeg2d_function_selector_avx2.c
+libmpeg2d_srcs_c_x86_64     +=  decoder/x86/impeg2d_function_selector_ssse3.c
+libmpeg2d_srcs_c_x86_64     +=  decoder/x86/impeg2d_function_selector_sse42.c
+
+
+libmpeg2d_srcs_c_x86_64     +=  common/x86/impeg2_idct_recon_sse42_intr.c
+libmpeg2d_srcs_c_x86_64     +=  common/x86/impeg2_inter_pred_sse42_intr.c
+libmpeg2d_srcs_c_x86_64     +=  common/x86/impeg2_mem_func_sse42_intr.c
+
+LOCAL_SRC_FILES_x86_64 += $(libmpeg2d_srcs_c_x86_64) $(libmpeg2d_srcs_asm_x86_64)
+LOCAL_C_INCLUDES_x86_64 += $(libmpeg2d_inc_dir_x86_64)
+LOCAL_CFLAGS_x86_64 += $(libmpeg2d_cflags_x86_64)
+
+
+

diff --git a/decoder/arm/impeg2d_function_selector.c b/decoder/arm/impeg2d_function_selector.c
new file mode 100644
index 0000000..d43e060
--- /dev/null
+++ b/decoder/arm/impeg2d_function_selector.c

@@ -0,0 +1,119 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+/**
+*******************************************************************************
+* @file
+*  impeg2d_function_selector.c
+*
+* @brief
+*  Contains functions to initialize function pointers used in mpeg2
+*
+* @author
+*  Naveen
+*
+* @par List of Functions:
+* @remarks
+*  None
+*
+*******************************************************************************
+*/
+/*****************************************************************************/
+/* File Includes                                                             */
+/*****************************************************************************/
+#include <stdio.h>
+#include <stddef.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "iv_datatypedef.h"
+#include "iv.h"
+
+#include "impeg2_buf_mgr.h"
+#include "impeg2_disp_mgr.h"
+#include "impeg2_defs.h"
+#include "impeg2_platform_macros.h"
+#include "impeg2_inter_pred.h"
+#include "impeg2_idct.h"
+#include "impeg2_globals.h"
+#include "impeg2_mem_func.h"
+#include "impeg2_format_conv.h"
+#include "impeg2_macros.h"
+
+#include "ivd.h"
+#include "impeg2d.h"
+#include "impeg2d_bitstream.h"
+#include "impeg2d_structs.h"
+#include "impeg2d_vld_tables.h"
+#include "impeg2d_vld.h"
+#include "impeg2d_pic_proc.h"
+#include "impeg2d_debug.h"
+#include "impeg2d_mc.h"
+
+void impeg2d_init_function_ptr_generic(void *pv_codec);
+void impeg2d_init_function_ptr_a9q(void *pv_codec);
+#ifdef ARMV8
+void impeg2d_init_function_ptr_av8(void *pv_codec);
+#endif /* ARMV8 */
+
+void impeg2d_init_function_ptr(void *pv_codec)
+{
+    dec_state_t *ps_codec   = (dec_state_t *)pv_codec;
+    IVD_ARCH_T e_proc_arch  = ps_codec->e_processor_arch;
+
+    switch(e_proc_arch)
+    {
+        case ARCH_ARM_NONEON:
+            impeg2d_init_function_ptr_generic(ps_codec);
+            break;
+#ifndef ARMV8
+        case ARCH_ARM_A5:
+        case ARCH_ARM_A7:
+        case ARCH_ARM_A9:
+        case ARCH_ARM_A15:
+        case ARCH_ARM_A9Q:
+        default:
+        impeg2d_init_function_ptr_a9q(ps_codec);
+            break;
+#else /* ARMV8 */
+        case ARCH_ARMV8_GENERIC:
+        default:
+            impeg2d_init_function_ptr_av8(ps_codec);
+            break;
+#endif /* ARMV8 */
+    }
+}
+
+void impeg2d_init_arch(void *pv_codec)
+{
+    dec_state_t *ps_codec = (dec_state_t *)pv_codec;
+#ifdef DEFAULT_ARCH
+#if DEFAULT_ARCH == D_ARCH_ARM_NONEON
+    ps_codec->e_processor_arch = ARCH_ARM_NONEON;
+#elif DEFAULT_ARCH == D_ARCH_ARMV8_GENERIC
+    ps_codec->e_processor_arch = ARCH_ARMV8_GENERIC;
+#elif DEFAULT_ARCH == D_ARCH_ARM_NEONINTR
+    ps_codec->e_processor_arch = ARCH_ARM_NEONINTR;
+#else
+    ps_codec->e_processor_arch = ARCH_ARM_A9Q;
+#endif
+#else
+    ps_codec->e_processor_arch = ARCH_ARM_A9Q;
+#endif
+}

diff --git a/decoder/arm/impeg2d_function_selector_a9q.c b/decoder/arm/impeg2d_function_selector_a9q.c
new file mode 100644
index 0000000..024145d
--- /dev/null
+++ b/decoder/arm/impeg2d_function_selector_a9q.c

@@ -0,0 +1,100 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+/**
+*******************************************************************************
+* @file
+*  impeg2d_function_selector_a9q.c
+*
+* @brief
+*  Contains functions to initialize a9q function pointers used in mpeg2
+*
+* @author
+*  Naveen
+*
+* @par List of Functions:
+* @remarks
+*  None
+*
+*******************************************************************************
+*/
+/*****************************************************************************/
+/* File Includes                                                             */
+/*****************************************************************************/
+#include <stdio.h>
+#include <stddef.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "iv_datatypedef.h"
+#include "iv.h"
+
+#include "impeg2_buf_mgr.h"
+#include "impeg2_disp_mgr.h"
+#include "impeg2_defs.h"
+#include "impeg2_platform_macros.h"
+#include "impeg2_inter_pred.h"
+#include "impeg2_idct.h"
+#include "impeg2_globals.h"
+#include "impeg2_mem_func.h"
+#include "impeg2_format_conv.h"
+#include "impeg2_macros.h"
+
+#include "ivd.h"
+#include "impeg2d.h"
+#include "impeg2d_bitstream.h"
+#include "impeg2d_structs.h"
+#include "impeg2d_vld_tables.h"
+#include "impeg2d_vld.h"
+#include "impeg2d_pic_proc.h"
+#include "impeg2d_debug.h"
+#include "impeg2d_mc.h"
+
+void impeg2d_init_function_ptr_a9q(dec_state_t *pv_codec)
+{
+    dec_state_t *dec = (dec_state_t *)pv_codec;
+
+    dec->pf_idct_recon[0]                   = &impeg2_idct_recon_dc_a9q;
+    dec->pf_idct_recon[1]                   = &impeg2_idct_recon_dc_mismatch_a9q;
+    dec->pf_idct_recon[2]                   = &impeg2_idct_recon_a9q;
+    dec->pf_idct_recon[3]                   = &impeg2_idct_recon_a9q;
+
+    dec->pf_mc[0]                              = &impeg2d_mc_fullx_fully;
+    dec->pf_mc[1]                              = &impeg2d_mc_fullx_halfy;
+    dec->pf_mc[2]                              = &impeg2d_mc_halfx_fully;
+    dec->pf_mc[3]                              = &impeg2d_mc_halfx_halfy;
+
+    dec->pf_interpolate                     = &impeg2_interpolate_a9q;
+    dec->pf_copy_mb                         = &impeg2_copy_mb_a9q;
+
+    dec->pf_fullx_halfy_8x8                 = &impeg2_mc_fullx_halfy_8x8_a9q;
+    dec->pf_halfx_fully_8x8                 = &impeg2_mc_halfx_fully_8x8_a9q;
+    dec->pf_halfx_halfy_8x8                 = &impeg2_mc_halfx_halfy_8x8_a9q;
+    dec->pf_fullx_fully_8x8                 = &impeg2_mc_fullx_fully_8x8_a9q;
+
+    dec->pf_memset_8bit_8x8_block           = &impeg2_memset_8bit_8x8_block_a9q;
+    dec->pf_memset_16bit_8x8_linear_block   = &impeg2_memset0_16bit_8x8_linear_block_a9q;
+
+    dec->pf_copy_yuv420p_buf                = &impeg2_copy_frm_yuv420p;
+    dec->pf_fmt_conv_yuv420p_to_yuv422ile   = &impeg2_fmt_conv_yuv420p_to_yuv422ile;
+    dec->pf_fmt_conv_yuv420p_to_yuv420sp_uv = &impeg2_fmt_conv_yuv420p_to_yuv420sp_uv_a9q;
+    dec->pf_fmt_conv_yuv420p_to_yuv420sp_vu = &impeg2_fmt_conv_yuv420p_to_yuv420sp_vu_a9q;
+}
+
+

diff --git a/decoder/arm/impeg2d_function_selector_av8.c b/decoder/arm/impeg2d_function_selector_av8.c
new file mode 100644
index 0000000..d163b54
--- /dev/null
+++ b/decoder/arm/impeg2d_function_selector_av8.c

@@ -0,0 +1,103 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+/**
+*******************************************************************************
+* @file
+*  impeg2d_function_selector.c
+*
+* @brief
+*  Contains functions to initialize function pointers used in mpeg2
+*
+* @author
+*  Naveen
+*
+* @par List of Functions:
+* @remarks
+*  None
+*
+*******************************************************************************
+*/
+/*****************************************************************************/
+/* File Includes                                                             */
+/*****************************************************************************/
+#include <stdio.h>
+#include <stddef.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "iv_datatypedef.h"
+#include "iv.h"
+#include "ithread.h"
+
+
+#include "impeg2_macros.h"
+#include "impeg2_buf_mgr.h"
+#include "impeg2_disp_mgr.h"
+#include "impeg2_defs.h"
+#include "impeg2_platform_macros.h"
+#include "impeg2_inter_pred.h"
+#include "impeg2_idct.h"
+#include "impeg2_mem_func.h"
+#include "impeg2_format_conv.h"
+#include "impeg2_job_queue.h"
+#include "impeg2_globals.h"
+
+#include "ivd.h"
+#include "impeg2d.h"
+#include "impeg2d_api.h"
+#include "impeg2d_debug.h"
+#include "impeg2d_bitstream.h"
+#include "impeg2d_structs.h"
+#include "impeg2d_mc.h"
+#include "impeg2d_pic_proc.h"
+#include "impeg2d_vld_tables.h"
+#include "impeg2d_vld.h"
+#include "impeg2d_pic_proc.h"
+
+void impeg2d_init_function_ptr_av8(void *pv_codec)
+{
+    dec_state_t *dec = (dec_state_t *)pv_codec;
+
+    dec->pf_idct_recon[0]                   = &impeg2_idct_recon_dc_av8;
+    dec->pf_idct_recon[1]                   = &impeg2_idct_recon_dc_mismatch_av8;
+    dec->pf_idct_recon[2]                   = &impeg2_idct_recon_av8;
+    dec->pf_idct_recon[3]                   = &impeg2_idct_recon_av8;
+
+    dec->pf_mc[0]                              = &impeg2d_mc_fullx_fully;
+    dec->pf_mc[1]                              = &impeg2d_mc_fullx_halfy;
+    dec->pf_mc[2]                              = &impeg2d_mc_halfx_fully;
+    dec->pf_mc[3]                              = &impeg2d_mc_halfx_halfy;
+
+    dec->pf_interpolate                     = &impeg2_interpolate_av8;
+    dec->pf_copy_mb                         = &impeg2_copy_mb_av8;
+
+    dec->pf_fullx_halfy_8x8                 = &impeg2_mc_fullx_halfy_8x8_av8;
+    dec->pf_halfx_fully_8x8                 = &impeg2_mc_halfx_fully_8x8_av8;
+    dec->pf_halfx_halfy_8x8                 = &impeg2_mc_halfx_halfy_8x8_av8;
+    dec->pf_fullx_fully_8x8                 = &impeg2_mc_fullx_fully_8x8_av8;
+
+    dec->pf_memset_8bit_8x8_block           = &impeg2_memset_8bit_8x8_block_av8;
+    dec->pf_memset_16bit_8x8_linear_block   = &impeg2_memset0_16bit_8x8_linear_block_av8;
+
+    dec->pf_copy_yuv420p_buf                = &impeg2_copy_frm_yuv420p;
+    dec->pf_fmt_conv_yuv420p_to_yuv422ile   = &impeg2_fmt_conv_yuv420p_to_yuv422ile;
+    dec->pf_fmt_conv_yuv420p_to_yuv420sp_uv = &impeg2_fmt_conv_yuv420p_to_yuv420sp_uv_av8;
+    dec->pf_fmt_conv_yuv420p_to_yuv420sp_vu = &impeg2_fmt_conv_yuv420p_to_yuv420sp_vu_av8;
+}

diff --git a/decoder/impeg2d.h b/decoder/impeg2d.h
new file mode 100644
index 0000000..fe38046
--- /dev/null
+++ b/decoder/impeg2d.h

@@ -0,0 +1,506 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+/*****************************************************************************/
+/*                                                                           */
+/*  File Name         : impeg2d.h                                        */
+/*                                                                           */
+/*  Description       : This file contains all the necessary structure and   */
+/*                      enumeration definitions needed for the Application   */
+/*                      Program Interface(API) of the Ittiam MPEG2 ASP       */
+/*                      Decoder on Cortex A8 - Neon platform                 */
+/*                                                                           */
+/*  List of Functions : impeg2d_api_function                             */
+/*                                                                           */
+/*  Issues / Problems : None                                                 */
+/*                                                                           */
+/*  Revision History  :                                                      */
+/*                                                                           */
+/*         DD MM YYYY   Author(s)       Changes (Describe the changes made)  */
+/*         26 08 2010   100239(RCY)     Draft                                */
+/*                                                                           */
+/*****************************************************************************/
+
+#ifndef __IMPEG2D_H__
+#define __IMPEG2D_H__
+
+#include "iv.h"
+#include "ivd.h"
+#ifdef __cplusplus
+extern "C"
+{
+#endif
+
+/*****************************************************************************/
+/* Constant Macros                                                           */
+/*****************************************************************************/
+#define EXPORT_MPEG2DEC_FULLCODEC_MEM_RECORDS   22
+
+/*****************************************************************************/
+/* Function Macros                                                           */
+/*****************************************************************************/
+#define IS_IVD_CONCEALMENT_APPLIED(x)           (x & (1 << IVD_APPLIEDCONCEALMENT))
+#define IS_IVD_INSUFFICIENTDATA_ERROR(x)        (x & (1 << IVD_INSUFFICIENTDATA))
+#define IS_IVD_CORRUPTEDDATA_ERROR(x)           (x & (1 << IVD_CORRUPTEDDATA))
+#define IS_IVD_CORRUPTEDHEADER_ERROR(x)         (x & (1 << IVD_CORRUPTEDHEADER))
+#define IS_IVD_UNSUPPORTEDINPUT_ERROR(x)        (x & (1 << IVD_UNSUPPORTEDINPUT))
+#define IS_IVD_UNSUPPORTEDPARAM_ERROR(x)        (x & (1 << IVD_UNSUPPORTEDPARAM))
+#define IS_IVD_FATAL_ERROR(x)                   (x & (1 << IVD_FATALERROR))
+#define IS_IVD_INVALID_BITSTREAM_ERROR(x)       (x & (1 << IVD_INVALID_BITSTREAM))
+#define IS_IVD_INCOMPLETE_BITSTREAM_ERROR(x)    (x & (1 << IVD_INCOMPLETE_BITSTREAM))
+
+#define SET_IVD_CONCEALMENT_APPLIED(x)          ((x) |= (x) | (1 << IVD_APPLIEDCONCEALMENT))
+#define SET_IVD_INSUFFICIENTDATA_ERROR(x)       ((x) |= (x) | (1 << IVD_INSUFFICIENTDATA))
+#define SET_IVD_CORRUPTEDDATA_ERROR(x)          ((x) |= (x) | (1 << IVD_CORRUPTEDDATA))
+#define SET_IVD_CORRUPTEDHEADER_ERROR(x)        ((x) |= (x) | (1 << IVD_CORRUPTEDHEADER))
+#define SET_IVD_UNSUPPORTEDINPUT_ERROR(x)       ((x) |= (x) | (1 << IVD_UNSUPPORTEDINPUT))
+#define SET_IVD_UNSUPPORTEDPARAM_ERROR(x)       ((x) |= (x) | (1 << IVD_UNSUPPORTEDPARAM))
+#define SET_IVD_FATAL_ERROR(x)                  ((x) |= (x) | (1 << IVD_FATALERROR))
+#define SET_IVD_INVALID_BITSTREAM_ERROR(x)      ((x) |= (x) | (1 << IVD_INVALID_BITSTREAM))
+#define SET_IVD_INCOMPLETE_BITSTREAM_ERROR(x)   ((x) |= (x) | (1 << IVD_INCOMPLETE_BITSTREAM))
+
+/*****************************************************************************/
+/* API Function Prototype                                                    */
+/*****************************************************************************/
+IV_API_CALL_STATUS_T impeg2d_api_function(iv_obj_t *ps_handle,
+                                          void *pv_api_ip,
+                                          void *pv_api_op);
+
+/*****************************************************************************/
+/* Enums                                                                     */
+/*****************************************************************************/
+/* Codec Error codes for MPEG2 ASP Decoder                                   */
+
+typedef enum
+{
+
+    IMPEG2D_UNKNOWN_ERROR = IVD_DUMMY_ELEMENT_FOR_CODEC_EXTENSIONS + 1,
+    /* API calls without init call */
+    IMPEG2D_INIT_NOT_DONE,
+    /* Query number of Memory Records API */
+    IMPEG2D_QUERY_NUM_MEM_REC_FAIL,
+
+    /* Fill Memory Records API */
+    IMPEG2D_FILL_NUM_MEM_REC_NOT_SUFFICIENT,
+
+    /* Initialize Decoder API */
+    IMPEG2D_INIT_DEC_SCR_MEM_INSUFFICIENT,
+    IMPEG2D_INIT_DEC_PER_MEM_INSUFFICIENT,
+    IMPEG2D_INIT_NUM_MEM_REC_NOT_SUFFICIENT,
+    IMPEG2D_INIT_CHROMA_FORMAT_HEIGHT_ERROR,
+
+    /* Decode Sequence Header API */
+    IMPEG2D_FRM_HDR_START_CODE_NOT_FOUND,
+    IMPEG2D_FRM_HDR_MARKER_BIT_NOT_FOUND,
+    IMPEG2D_PROF_LEVEL_NOT_SUPPORTED,
+    IMPEG2D_FMT_NOT_SUPPORTED,
+    IMPEG2D_SCALABILITIY_NOT_SUPPORTED,
+    IMPEG2D_PIC_SIZE_NOT_SUPPORTED,
+
+    /* Search for start code API */
+    //IMPEG2D_SEARCH_START_CODE_FAIL         ,
+    /* Decode Video Frame API    */
+    IMPEG2D_START_CODE_NOT_FOUND,
+    IMPEG2D_MARKER_BIT_NOT_FOUND,
+    IMPEG2D_INVALID_STUFFING,
+    IMPEG2D_PROFILE_LEVEL_NOT_SUP,
+    IMPEG2D_CHROMA_FMT_NOT_SUP,
+    IMPEG2D_SCALABLITY_NOT_SUP,
+    IMPEG2D_FRM_HDR_DECODE_ERR,
+    IMPEG2D_MB_HDR_DECODE_ERR,
+    IMPEG2D_MB_TEX_DECODE_ERR,
+    IMPEG2D_INCORRECT_QUANT_MATRIX,
+    IMPEG2D_INVALID_SKIP_MB,
+    IMPEG2D_NOT_SUPPORTED_ERR,
+    IMPEG2D_BITSTREAM_BUFF_EXCEEDED_ERR,
+    IMPEG2D_INVALID_PIC_TYPE,
+    IMPEG2D_INVALID_HUFFMAN_CODE,
+    IMPEG2D_NO_FREE_BUF_ERR,
+
+    /* slice header errors */
+    IMPEG2D_INVALID_VERT_SIZE,
+    IMPEG2D_MB_DATA_DECODE_ERR,
+
+    /* Get Display Frame API */
+    IMPEG2D_GET_DISP_FRM_FAIL,
+
+    /* Sample Version limitation */
+    IMPEG2D_SAMPLE_VERSION_LIMIT_ERR,
+    /**
+     * Width/height greater than max width and max height
+     */
+    IMPEG2D_UNSUPPORTED_DIMENSIONS,
+
+    /* Unknown API Command */
+    IMPEG2D_UNKNOWN_API_COMMAND
+
+} IMPEG2D_ERROR_CODES_T;
+
+/*****************************************************************************/
+/* Extended Structures                                                       */
+/*****************************************************************************/
+typedef enum
+{
+    /** Set number of cores/threads to be used */
+    IMPEG2D_CMD_CTL_SET_NUM_CORES = IVD_CMD_CTL_CODEC_SUBCMD_START,
+
+    /** Set processor details */
+    IMPEG2D_CMD_CTL_SET_PROCESSOR = IVD_CMD_CTL_CODEC_SUBCMD_START + 0x001,
+
+    /** Get display buffer dimensions */
+    IMPEG2D_CMD_CTL_GET_BUFFER_DIMENSIONS = IVD_CMD_CTL_CODEC_SUBCMD_START
+                    + 0x100,
+
+} IMPEG2D_CMD_CTL_SUB_CMDS;
+
+/*****************************************************************************/
+/*  Get Number of Memory Records                                             */
+/*****************************************************************************/
+
+typedef struct
+{
+    iv_num_mem_rec_ip_t s_ivd_num_mem_rec_ip_t;
+} impeg2d_num_mem_rec_ip_t;
+
+typedef struct
+{
+    iv_num_mem_rec_op_t s_ivd_num_mem_rec_op_t;
+} impeg2d_num_mem_rec_op_t;
+
+/*****************************************************************************/
+/*  Fill Memory Records                                                      */
+/*****************************************************************************/
+
+typedef struct
+{
+    iv_fill_mem_rec_ip_t s_ivd_fill_mem_rec_ip_t;
+    /* Flag to enable sharing of reference buffers between decoder
+     and application */
+
+    UWORD32 u4_share_disp_buf;
+
+    /* format in which codec has to give out frame data for display */
+    IV_COLOR_FORMAT_T e_output_format;
+
+} impeg2d_fill_mem_rec_ip_t;
+
+typedef struct
+{
+    iv_fill_mem_rec_op_t s_ivd_fill_mem_rec_op_t;
+} impeg2d_fill_mem_rec_op_t;
+
+/*****************************************************************************/
+/*  Retrieve Memory Records                                                  */
+/*****************************************************************************/
+
+typedef struct
+{
+    iv_retrieve_mem_rec_ip_t s_ivd_retrieve_mem_rec_ip_t;
+} impeg2d_retrieve_mem_rec_ip_t;
+
+typedef struct
+{
+    iv_retrieve_mem_rec_op_t s_ivd_retrieve_mem_rec_op_t;
+} impeg2d_retrieve_mem_rec_op_t;
+
+/*****************************************************************************/
+/*   Initialize decoder                                                      */
+/*****************************************************************************/
+
+typedef struct
+{
+    ivd_init_ip_t s_ivd_init_ip_t;
+    /* Flag to enable sharing of reference buffers between decoder
+     and application */
+    UWORD32 u4_share_disp_buf;
+
+} impeg2d_init_ip_t;
+
+typedef struct
+{
+    ivd_init_op_t s_ivd_init_op_t;
+} impeg2d_init_op_t;
+
+/*****************************************************************************/
+/*   Video Decode                                                            */
+/*****************************************************************************/
+
+typedef struct
+{
+    ivd_video_decode_ip_t s_ivd_video_decode_ip_t;
+} impeg2d_video_decode_ip_t;
+
+typedef struct
+{
+    ivd_video_decode_op_t s_ivd_video_decode_op_t;
+} impeg2d_video_decode_op_t;
+
+/*****************************************************************************/
+/*   Get Display Frame                                                       */
+/*****************************************************************************/
+
+typedef struct
+{
+    ivd_get_display_frame_ip_t s_ivd_get_display_frame_ip_t;
+} impeg2d_get_display_frame_ip_t;
+
+typedef struct
+{
+    ivd_get_display_frame_op_t s_ivd_get_display_frame_op_t;
+} impeg2d_get_display_frame_op_t;
+
+/*****************************************************************************/
+/*   Set Display Frame                                                       */
+/*****************************************************************************/
+typedef struct
+{
+    ivd_set_display_frame_ip_t s_ivd_set_display_frame_ip_t;
+} impeg2d_set_display_frame_ip_t;
+
+typedef struct
+{
+    ivd_set_display_frame_op_t s_ivd_set_display_frame_op_t;
+} impeg2d_set_display_frame_op_t;
+
+/*****************************************************************************/
+/*   Release Display Buffers                                                 */
+/*****************************************************************************/
+
+typedef struct
+{
+    ivd_rel_display_frame_ip_t s_ivd_rel_display_frame_ip_t;
+} impeg2d_rel_display_frame_ip_t;
+
+typedef struct
+{
+    ivd_rel_display_frame_op_t s_ivd_rel_display_frame_op_t;
+} impeg2d_rel_display_frame_op_t;
+
+/*****************************************************************************/
+/*   Video control  Flush                                                    */
+/*****************************************************************************/
+
+typedef struct
+{
+    ivd_ctl_flush_ip_t s_ivd_ctl_flush_ip_t;
+} impeg2d_ctl_flush_ip_t;
+
+typedef struct
+{
+    ivd_ctl_flush_op_t s_ivd_ctl_flush_op_t;
+} impeg2d_ctl_flush_op_t;
+
+/*****************************************************************************/
+/*   Video control reset                                                     */
+/*****************************************************************************/
+
+typedef struct
+{
+    ivd_ctl_reset_ip_t s_ivd_ctl_reset_ip_t;
+} impeg2d_ctl_reset_ip_t;
+
+typedef struct
+{
+    ivd_ctl_reset_op_t s_ivd_ctl_reset_op_t;
+} impeg2d_ctl_reset_op_t;
+
+/*****************************************************************************/
+/*   Video control  Set Params                                               */
+/*****************************************************************************/
+
+typedef struct
+{
+    ivd_ctl_set_config_ip_t s_ivd_ctl_set_config_ip_t;
+} impeg2d_ctl_set_config_ip_t;
+
+typedef struct
+{
+    ivd_ctl_set_config_op_t s_ivd_ctl_set_config_op_t;
+} impeg2d_ctl_set_config_op_t;
+
+/*****************************************************************************/
+/*   Video control:Get Buf Info                                              */
+/*****************************************************************************/
+
+typedef struct
+{
+    ivd_ctl_getbufinfo_ip_t s_ivd_ctl_getbufinfo_ip_t;
+} impeg2d_ctl_getbufinfo_ip_t;
+
+typedef struct
+{
+    ivd_ctl_getbufinfo_op_t s_ivd_ctl_getbufinfo_op_t;
+} impeg2d_ctl_getbufinfo_op_t;
+
+/*****************************************************************************/
+/*   Video control:Getstatus Call                                            */
+/*****************************************************************************/
+
+typedef struct
+{
+    ivd_ctl_getstatus_ip_t s_ivd_ctl_getstatus_ip_t;
+} impeg2d_ctl_getstatus_ip_t;
+
+typedef struct
+{
+    ivd_ctl_getstatus_op_t s_ivd_ctl_getstatus_op_t;
+} impeg2d_ctl_getstatus_op_t;
+
+/*****************************************************************************/
+/*   Video control:Get Version Info                                          */
+/*****************************************************************************/
+
+typedef struct
+{
+    ivd_ctl_getversioninfo_ip_t s_ivd_ctl_getversioninfo_ip_t;
+} impeg2d_ctl_getversioninfo_ip_t;
+
+typedef struct
+{
+    ivd_ctl_getversioninfo_op_t s_ivd_ctl_getversioninfo_op_t;
+} impeg2d_ctl_getversioninfo_op_t;
+
+/*****************************************************************************/
+/*   Video control:Disable Qpel                                              */
+/*****************************************************************************/
+
+typedef struct
+{
+    UWORD32 u4_size;
+    IVD_API_COMMAND_TYPE_T e_cmd;
+    IVD_CONTROL_API_COMMAND_TYPE_T e_sub_cmd;
+    UWORD32 u4_num_cores;
+} impeg2d_ctl_set_num_cores_ip_t;
+
+typedef struct
+{
+    UWORD32 u4_size;
+    UWORD32 u4_error_code;
+} impeg2d_ctl_set_num_cores_op_t;
+
+typedef struct
+{
+    /**
+     * size
+     */
+    UWORD32 u4_size;
+    /**
+     * cmd
+     */
+    IVD_API_COMMAND_TYPE_T e_cmd;
+    /**
+     * sub cmd
+     */
+    IVD_CONTROL_API_COMMAND_TYPE_T e_sub_cmd;
+    /**
+     * Processor type
+     */
+    UWORD32 u4_arch;
+    /**
+     * SOC type
+     */
+    UWORD32 u4_soc;
+
+    /**
+     * num_cores
+     */
+    UWORD32 u4_num_cores;
+
+} impeg2d_ctl_set_processor_ip_t;
+
+typedef struct
+{
+    /**
+     * size
+     */
+    UWORD32 u4_size;
+    /**
+     * error_code
+     */
+    UWORD32 u4_error_code;
+} impeg2d_ctl_set_processor_op_t;
+
+typedef struct
+{
+
+    /**
+     * size
+     */
+    UWORD32 u4_size;
+
+    /**
+     * cmd
+     */
+    IVD_API_COMMAND_TYPE_T e_cmd;
+
+    /**
+     * sub cmd
+     */
+    IVD_CONTROL_API_COMMAND_TYPE_T e_sub_cmd;
+} impeg2d_ctl_get_frame_dimensions_ip_t;
+
+typedef struct
+{
+
+    /**
+     * size
+     */
+    UWORD32 u4_size;
+
+    /**
+     * error_code
+     */
+    UWORD32 u4_error_code;
+
+    /**
+     * x_offset[3]
+     */
+    UWORD32 u4_x_offset[3];
+
+    /**
+     * y_offset[3]
+     */
+    UWORD32 u4_y_offset[3];
+
+    /**
+     * disp_wd[3]
+     */
+    UWORD32 u4_disp_wd[3];
+
+    /**
+     * disp_ht[3]
+     */
+    UWORD32 u4_disp_ht[3];
+
+    /**
+     * buffer_wd[3]
+     */
+    UWORD32 u4_buffer_wd[3];
+
+    /**
+     * buffer_ht[3]
+     */
+    UWORD32 u4_buffer_ht[3];
+} impeg2d_ctl_get_frame_dimensions_op_t;
+
+#ifdef __cplusplus
+} /* closing brace for extern "C" */
+#endif
+
+#endif /* __IMPEG2D_H__ */

diff --git a/decoder/impeg2d_api.h b/decoder/impeg2d_api.h
new file mode 100644
index 0000000..bf3943e
--- /dev/null
+++ b/decoder/impeg2d_api.h

@@ -0,0 +1,149 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+/*****************************************************************************/
+/*                                                                           */
+/*  File Name         : impeg2d_api.h                                 */
+/*                                                                           */
+/*  Description       : This file contains all the necessary examples to     */
+/*                      establish a consistent use of Ittiam C coding        */
+/*                      standards (based on Indian Hill C Standards)         */
+/*                                                                           */
+/*  List of Functions : <List the functions defined in this file>            */
+/*                                                                           */
+/*  Issues / Problems : None                                                 */
+/*                                                                           */
+/*  Revision History  :                                                      */
+/*                                                                           */
+/*         DD MM YYYY   Author(s)       Changes (Describe the changes made)  */
+/*         10 10 2005   Ittiam          Draft                                */
+/*                                                                           */
+/*****************************************************************************/
+
+#ifndef __IMPEG2D_API_H__
+#define __IMPEG2D_API_H__
+
+
+/*****************************************************************************/
+/* Constant Macros                                                           */
+/*****************************************************************************/
+
+
+#define DEBUG_PRINT printf
+
+
+
+
+#define NUM_MEM_RECORDS                 4 *MAX_THREADS+NUM_INT_FRAME_BUFFERS + 5
+
+
+#define SETBIT(a,i)   ((a) |= (1 << i))
+
+
+/*********************/
+/* Codec Versioning  */
+/*********************/
+
+
+
+
+/*****************************************************************************/
+/* Function Declarations                                                     */
+/*****************************************************************************/
+
+IV_API_CALL_STATUS_T impeg2d_api_num_mem_rec(void *pv_api_ip, void *pv_api_op);
+
+IV_API_CALL_STATUS_T impeg2d_api_fill_mem_rec(void *pv_api_ip, void *pv_api_op);
+
+IV_API_CALL_STATUS_T impeg2d_api_init(iv_obj_t *DECHDL,
+                                      void *ps_ip,
+                                      void *ps_op);
+
+IV_API_CALL_STATUS_T impeg2d_api_set_display_frame(iv_obj_t *DECHDL,
+                                          void *pv_api_ip,
+                                          void *pv_api_op);
+
+IV_API_CALL_STATUS_T impeg2d_api_rel_display_frame(iv_obj_t *DECHDL,
+                                                   void *pv_api_ip,
+                                                   void *pv_api_op);
+
+IV_API_CALL_STATUS_T impeg2d_api_retrieve_mem_rec(iv_obj_t *DECHDL,
+                                                        void *pv_api_ip,
+                                                        void *pv_api_op);
+
+IV_API_CALL_STATUS_T impeg2d_api_ctl(iv_obj_t *DECHDL,
+                                       void *pv_api_ip,
+                                       void *pv_api_op);
+
+IV_API_CALL_STATUS_T impeg2d_api_get_version(iv_obj_t *DECHDL,
+                                               void *pv_api_ip,
+                                               void *pv_api_op);
+
+IV_API_CALL_STATUS_T impeg2d_api_get_buf_info(iv_obj_t *DECHDL,
+                                                    void *pv_api_ip,
+                                                    void *pv_api_op);
+
+IV_API_CALL_STATUS_T impeg2d_api_set_flush_mode(iv_obj_t *DECHDL,
+                                                      void *pv_api_ip,
+                                                      void *pv_api_op);
+
+IV_API_CALL_STATUS_T impeg2d_api_set_default(iv_obj_t *DECHDL,
+                                                   void *pv_api_ip,
+                                                   void *pv_api_op);
+
+IV_API_CALL_STATUS_T impeg2d_api_reset(iv_obj_t *DECHDL,
+                                             void *pv_api_ip,
+                                             void *pv_api_op);
+
+IV_API_CALL_STATUS_T impeg2d_api_set_params(iv_obj_t *DECHDL,
+                                                  void *pv_api_ip,
+                                                  void *pv_api_op);
+
+IV_API_CALL_STATUS_T impeg2d_api_get_status(iv_obj_t *DECHDL,
+                                                  void *pv_api_ip,
+                                                  void *pv_api_op);
+
+IV_API_CALL_STATUS_T impeg2d_api_set_params(iv_obj_t *DECHDL,
+                                                  void *pv_api_ip,
+                                                  void *pv_api_op);
+
+void impeg2d_fill_mem_rec(impeg2d_fill_mem_rec_ip_t *ps_ip,
+                            impeg2d_fill_mem_rec_op_t *ps_op);
+
+void impeg2d_dec_frm(void *dec,
+                                impeg2d_video_decode_ip_t *ps_ip,
+                                impeg2d_video_decode_op_t *ps_op);
+
+void impeg2d_dec_hdr(void *dec,
+                               impeg2d_video_decode_ip_t *ps_ip,
+                               impeg2d_video_decode_op_t *ps_op);
+
+IV_API_CALL_STATUS_T impeg2d_api_entity(iv_obj_t *DECHDL,
+                                        void *pv_api_ip,
+                                        void *pv_api_op);
+
+IV_API_CALL_STATUS_T impeg2d_api_check_struct_sanity(iv_obj_t *ps_handle,
+                                                     void *pv_api_ip,
+                                                     void *pv_api_op);
+
+
+
+
+#endif /* __IMPEG2D_API_H__ */
+

diff --git a/decoder/impeg2d_api_main.c b/decoder/impeg2d_api_main.c
new file mode 100755
index 0000000..451eb93
--- /dev/null
+++ b/decoder/impeg2d_api_main.c

@@ -0,0 +1,3258 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+/*****************************************************************************/
+/*                                                                           */
+/*  File Name         : decoder_api_main.c                                   */
+/*                                                                           */
+/*  Description       : Functions which recieve the API call from user       */
+/*                                                                           */
+/*  List of Functions : <List the functions defined in this file>            */
+/*                                                                           */
+/*  Issues / Problems : None                                                 */
+/*                                                                           */
+/*  Revision History  :                                                      */
+/*                                                                           */
+/*         DD MM YYYY   Author(s)       Changes (Describe the changes made)  */
+/*         30 05 2007   Rajneesh        Creation                             */
+/*                                                                           */
+/*****************************************************************************/
+
+/*****************************************************************************/
+/* File Includes                                                             */
+/*****************************************************************************/
+
+/* System include files */
+#include <stdio.h>
+#include <stdlib.h>
+#include <stddef.h>
+#include <string.h>
+
+/* User include files */
+#include "iv_datatypedef.h"
+#include "iv.h"
+#include "ivd.h"
+#include "ithread.h"
+
+#include "impeg2_job_queue.h"
+#include "impeg2_macros.h"
+#include "impeg2_buf_mgr.h"
+#include "impeg2_disp_mgr.h"
+#include "impeg2_defs.h"
+#include "impeg2_platform_macros.h"
+#include "impeg2_inter_pred.h"
+#include "impeg2_idct.h"
+#include "impeg2_format_conv.h"
+#include "impeg2_mem_func.h"
+
+#include "impeg2d.h"
+#include "impeg2d_api.h"
+#include "impeg2d_bitstream.h"
+#include "impeg2d_debug.h"
+#include "impeg2d_structs.h"
+#include "impeg2d_mc.h"
+#include "impeg2d_pic_proc.h"
+
+#define NUM_FRAMES_LIMIT_ENABLED 0
+
+#ifdef LOGO_EN
+#include "impeg2_ittiam_logo.h"
+#define INSERT_LOGO(buf_y, buf_u, buf_v, stride, x_pos, y_pos, yuv_fmt,disp_wd,disp_ht) impeg2_insert_logo(buf_y, buf_u, buf_v, stride, x_pos, y_pos, yuv_fmt,disp_wd,disp_ht);
+#else
+#define INSERT_LOGO(buf_y, buf_u, buf_v, stride, x_pos, y_pos, yuv_fmt,disp_wd,disp_ht)
+#endif
+
+#if NUM_FRAMES_LIMIT_ENABLED
+#define NUM_FRAMES_LIMIT 10000
+#else
+#define NUM_FRAMES_LIMIT 0x7FFFFFFF
+#endif
+
+#define CODEC_NAME              "MPEG2VDEC"
+#define CODEC_RELEASE_TYPE      "eval"
+#define CODEC_RELEASE_VER       "01.00"
+#define CODEC_VENDOR            "ITTIAM"
+
+#define VERSION(version_string, codec_name, codec_release_type, codec_release_ver, codec_vendor)    \
+    strcpy(version_string,"@(#)Id:");                                                               \
+    strcat(version_string,codec_name);                                                              \
+    strcat(version_string,"_");                                                                     \
+    strcat(version_string,codec_release_type);                                                      \
+    strcat(version_string," Ver:");                                                                 \
+    strcat(version_string,codec_release_ver);                                                       \
+    strcat(version_string," Released by ");                                                         \
+    strcat(version_string,codec_vendor);                                                            \
+    strcat(version_string," Build: ");                                                              \
+    strcat(version_string,__DATE__);                                                                \
+    strcat(version_string," @ ");                                                                       \
+    strcat(version_string,__TIME__);
+
+
+#define MIN_OUT_BUFS_420    3
+#define MIN_OUT_BUFS_422ILE 1
+#define MIN_OUT_BUFS_RGB565 1
+#define MIN_OUT_BUFS_420SP  2
+
+
+void impeg2d_init_arch(void *pv_codec);
+void impeg2d_init_function_ptr(void *pv_codec);
+
+/*****************************************************************************/
+/*                                                                           */
+/*  Function Name : impeg2d_api_rel_display_frame                            */
+/*                                                                           */
+/*  Description   : Release displ buffers that will be shared between decoder */
+/*                  and application                                          */
+/*  Inputs        : Error message                                            */
+/*  Globals       : None                                                     */
+/*  Processing    : Just prints error message to console                     */
+/*  Outputs       : Error mesage to the console                              */
+/*  Returns       : None                                                     */
+/*                                                                           */
+/*  Issues        : <List any issues or problems with this function>         */
+/*                                                                           */
+/*  Revision History:                                                        */
+/*                                                                           */
+/*         DD MM YYYY   Author(s)       Changes (Describe the changes made)  */
+/*         27 05 2006   Sankar          Creation                             */
+/*                                                                           */
+/*****************************************************************************/
+IV_API_CALL_STATUS_T impeg2d_api_rel_display_frame(iv_obj_t *ps_dechdl,
+                                                   void *pv_api_ip,
+                                                   void *pv_api_op)
+{
+
+    ivd_rel_display_frame_ip_t  *dec_rel_disp_ip;
+    ivd_rel_display_frame_op_t  *dec_rel_disp_op;
+
+    dec_state_t *ps_dec_state;
+    dec_state_multi_core_t *ps_dec_state_multi_core;
+
+
+    dec_rel_disp_ip = (ivd_rel_display_frame_ip_t  *)pv_api_ip;
+    dec_rel_disp_op = (ivd_rel_display_frame_op_t  *)pv_api_op;
+
+    dec_rel_disp_op->u4_error_code = 0;
+    ps_dec_state_multi_core = (dec_state_multi_core_t *) (ps_dechdl->pv_codec_handle);
+    ps_dec_state = ps_dec_state_multi_core->ps_dec_state[0];
+
+
+    /* If not in shared disp buf mode, return */
+    if(0 == ps_dec_state->u4_share_disp_buf)
+        return IV_SUCCESS;
+
+    if(NULL == ps_dec_state->pv_pic_buf_mg)
+        return IV_SUCCESS;
+
+
+    impeg2_buf_mgr_release(ps_dec_state->pv_pic_buf_mg, dec_rel_disp_ip->u4_disp_buf_id, BUF_MGR_DISP);
+
+    return IV_SUCCESS;
+}
+
+/*****************************************************************************/
+/*                                                                           */
+/*  Function Name : impeg2d_api_set_display_frame                            */
+/*                                                                           */
+/*  Description   : Sets display buffers that will be shared between decoder */
+/*                  and application                                          */
+/*  Inputs        : Error message                                            */
+/*  Globals       : None                                                     */
+/*  Processing    : Just prints error message to console                     */
+/*  Outputs       : Error mesage to the console                              */
+/*  Returns       : None                                                     */
+/*                                                                           */
+/*  Issues        : <List any issues or problems with this function>         */
+/*                                                                           */
+/*  Revision History:                                                        */
+/*                                                                           */
+/*         DD MM YYYY   Author(s)       Changes (Describe the changes made)  */
+/*         27 05 2006   Sankar          Creation                             */
+/*                                                                           */
+/*****************************************************************************/
+IV_API_CALL_STATUS_T impeg2d_api_set_display_frame(iv_obj_t *ps_dechdl,
+                                          void *pv_api_ip,
+                                          void *pv_api_op)
+{
+
+    ivd_set_display_frame_ip_t  *dec_disp_ip;
+    ivd_set_display_frame_op_t  *dec_disp_op;
+
+    UWORD32 i;
+    dec_state_t *ps_dec_state;
+    dec_state_multi_core_t *ps_dec_state_multi_core;
+    UWORD32 u4_num_disp_bufs;
+
+
+    dec_disp_ip = (ivd_set_display_frame_ip_t  *)pv_api_ip;
+    dec_disp_op = (ivd_set_display_frame_op_t  *)pv_api_op;
+    dec_disp_op->u4_error_code = 0;
+
+    u4_num_disp_bufs = dec_disp_ip->num_disp_bufs;
+    if(u4_num_disp_bufs > BUF_MGR_MAX_CNT)
+        u4_num_disp_bufs = BUF_MGR_MAX_CNT;
+
+    ps_dec_state_multi_core = (dec_state_multi_core_t *) (ps_dechdl->pv_codec_handle);
+    ps_dec_state = ps_dec_state_multi_core->ps_dec_state[0];
+
+    if(ps_dec_state->u4_share_disp_buf)
+    {
+        pic_buf_t *ps_pic_buf;
+        ps_pic_buf = (pic_buf_t *)ps_dec_state->pv_pic_buf_base;
+        for(i = 0; i < u4_num_disp_bufs; i++)
+        {
+
+            ps_pic_buf->pu1_y = dec_disp_ip->s_disp_buffer[i].pu1_bufs[0];
+            if(IV_YUV_420P == ps_dec_state->i4_chromaFormat)
+            {
+                ps_pic_buf->pu1_u = dec_disp_ip->s_disp_buffer[i].pu1_bufs[1];
+                ps_pic_buf->pu1_v = dec_disp_ip->s_disp_buffer[i].pu1_bufs[2];
+            }
+            else
+            {
+                ps_pic_buf->pu1_u = ps_dec_state->pu1_chroma_ref_buf[i];
+                ps_pic_buf->pu1_v = ps_dec_state->pu1_chroma_ref_buf[i] +
+                        ((ps_dec_state->u2_create_max_width * ps_dec_state->u2_create_max_height) >> 2);
+            }
+
+            ps_pic_buf->i4_buf_id = i;
+
+            ps_pic_buf->u1_used_as_ref = 0;
+
+            ps_pic_buf->u4_ts = 0;
+
+            impeg2_buf_mgr_add(ps_dec_state->pv_pic_buf_mg, ps_pic_buf, i);
+            impeg2_buf_mgr_set_status(ps_dec_state->pv_pic_buf_mg, i, BUF_MGR_DISP);
+            ps_pic_buf++;
+
+        }
+    }
+    memcpy(&(ps_dec_state->as_disp_buffers[0]),
+           &(dec_disp_ip->s_disp_buffer),
+           u4_num_disp_bufs * sizeof(ivd_out_bufdesc_t));
+
+    return IV_SUCCESS;
+
+}
+
+IV_API_CALL_STATUS_T impeg2d_api_set_num_cores(iv_obj_t *ps_dechdl,
+                                               void *pv_api_ip,
+                                               void *pv_api_op)
+{
+    impeg2d_ctl_set_num_cores_ip_t   *ps_ip;
+    impeg2d_ctl_set_num_cores_op_t *ps_op;
+    dec_state_t *ps_dec_state;
+    dec_state_multi_core_t *ps_dec_state_multi_core;
+
+    ps_ip  = (impeg2d_ctl_set_num_cores_ip_t *)pv_api_ip;
+    ps_op =  (impeg2d_ctl_set_num_cores_op_t *)pv_api_op;
+
+    ps_dec_state_multi_core = (dec_state_multi_core_t *) (ps_dechdl->pv_codec_handle);
+    ps_dec_state = ps_dec_state_multi_core->ps_dec_state[0];
+
+    if(ps_ip->u4_num_cores > 0)
+    {
+
+
+        WORD32 i;
+        for(i = 0; i < MAX_THREADS; i++)
+            ps_dec_state_multi_core->ps_dec_state[i]->i4_num_cores = ps_ip->u4_num_cores;
+    }
+    else
+    {
+        ps_dec_state->i4_num_cores = 1;
+    }
+    ps_op->u4_error_code = IV_SUCCESS;
+
+    return IV_SUCCESS;
+}
+
+/**
+*******************************************************************************
+*
+* @brief
+*  Sets Processor type
+*
+* @par Description:
+*  Sets Processor type
+*
+* @param[in] ps_codec_obj
+*  Pointer to codec object at API level
+*
+* @param[in] pv_api_ip
+*  Pointer to input argument structure
+*
+* @param[out] pv_api_op
+*  Pointer to output argument structure
+*
+* @returns  Status
+*
+* @remarks
+*
+*
+*******************************************************************************
+*/
+
+IV_API_CALL_STATUS_T impeg2d_set_processor(iv_obj_t *ps_codec_obj,
+                            void *pv_api_ip,
+                            void *pv_api_op)
+{
+    impeg2d_ctl_set_processor_ip_t *ps_ip;
+    impeg2d_ctl_set_processor_op_t *ps_op;
+    dec_state_t *ps_codec;
+    dec_state_multi_core_t *ps_dec_state_multi_core;
+
+    ps_dec_state_multi_core = (dec_state_multi_core_t *) (ps_codec_obj->pv_codec_handle);
+    ps_codec = ps_dec_state_multi_core->ps_dec_state[0];
+
+    ps_ip = (impeg2d_ctl_set_processor_ip_t *)pv_api_ip;
+    ps_op = (impeg2d_ctl_set_processor_op_t *)pv_api_op;
+
+    ps_codec->e_processor_arch = (IVD_ARCH_T)ps_ip->u4_arch;
+    ps_codec->e_processor_soc = (IVD_SOC_T)ps_ip->u4_soc;
+
+    impeg2d_init_function_ptr(ps_codec);
+
+
+    ps_op->u4_error_code = 0;
+    return IV_SUCCESS;
+}
+/*****************************************************************************/
+/*                                                                           */
+/*  Function Name : impeg2d_fill_mem_rec                                     */
+/*                                                                           */
+/*  Description   :                                                          */
+/*  Inputs        :                                                          */
+/*  Globals       :                                                          */
+/*  Processing    :                                                          */
+/*  Outputs       :                                                          */
+/*  Returns       :                                                          */
+/*                                                                           */
+/*  Issues        :                                                          */
+/*                                                                           */
+/*  Revision History:                                                        */
+/*                                                                           */
+/*         DD MM YYYY   Author(s)       Changes (Describe the changes made)  */
+/*         17 09 2007  Rajendra C Y          Draft                           */
+/*                                                                           */
+/*****************************************************************************/
+void impeg2d_fill_mem_rec(impeg2d_fill_mem_rec_ip_t *ps_ip,
+                  impeg2d_fill_mem_rec_op_t *ps_op)
+{
+    UWORD32 u4_i;
+
+    UWORD8 u1_no_rec = 0;
+    UWORD32 max_frm_width,max_frm_height,max_frm_size;
+    iv_mem_rec_t *ps_mem_rec = ps_ip->s_ivd_fill_mem_rec_ip_t.pv_mem_rec_location;
+    WORD32 i4_num_threads;
+    WORD32 i4_share_disp_buf, i4_chroma_format;
+    WORD32 i4_chroma_size;
+
+    max_frm_width = ALIGN16(ps_ip->s_ivd_fill_mem_rec_ip_t.u4_max_frm_wd);
+    max_frm_height = ALIGN16(ps_ip->s_ivd_fill_mem_rec_ip_t.u4_max_frm_ht);
+
+    max_frm_size = (max_frm_width * max_frm_height * 3) >> 1;/* 420 P */
+
+    i4_chroma_size = max_frm_width * max_frm_height / 4;
+
+    if(ps_ip->s_ivd_fill_mem_rec_ip_t.u4_size > offsetof(impeg2d_fill_mem_rec_ip_t, u4_share_disp_buf))
+    {
+#ifndef LOGO_EN
+        i4_share_disp_buf = ps_ip->u4_share_disp_buf;
+#else
+        i4_share_disp_buf = 0;
+#endif
+    }
+    else
+    {
+        i4_share_disp_buf = 0;
+    }
+    if(ps_ip->s_ivd_fill_mem_rec_ip_t.u4_size > offsetof(impeg2d_fill_mem_rec_ip_t, e_output_format))
+    {
+        i4_chroma_format = ps_ip->e_output_format;
+    }
+    else
+    {
+        i4_chroma_format = -1;
+    }
+
+
+    if( (i4_chroma_format != IV_YUV_420P) &&
+        (i4_chroma_format != IV_YUV_420SP_UV) &&
+        (i4_chroma_format != IV_YUV_420SP_VU))
+    {
+        i4_share_disp_buf = 0;
+    }
+
+    /*************************************************************************/
+    /*          Fill the memory requirement XDM Handle         */
+    /*************************************************************************/
+    /* ! */
+    ps_mem_rec->u4_mem_alignment = 128 /* 128 byte alignment*/;
+    ps_mem_rec->e_mem_type      = IV_EXTERNAL_CACHEABLE_PERSISTENT_MEM;
+    ps_mem_rec->u4_mem_size     = sizeof(iv_obj_t);
+
+    ps_mem_rec++;
+    u1_no_rec++;
+
+    {
+        /*************************************************************************/
+        /*        Fill the memory requirement for threads context         */
+        /*************************************************************************/
+        /* ! */
+        ps_mem_rec->u4_mem_alignment = 128 /* 128 byte alignment*/;
+        ps_mem_rec->e_mem_type      = IV_EXTERNAL_CACHEABLE_PERSISTENT_MEM;
+        ps_mem_rec->u4_mem_size     = sizeof(dec_state_multi_core_t);
+
+        ps_mem_rec++;
+        u1_no_rec++;
+    }
+
+    for(i4_num_threads = 0; i4_num_threads < MAX_THREADS; i4_num_threads++)
+    {
+        /*************************************************************************/
+        /*          Fill the memory requirement for MPEG2 Decoder Context        */
+        /*************************************************************************/
+        /* ! */
+        ps_mem_rec->u4_mem_alignment = 128 /* 128 byte alignment*/;
+        ps_mem_rec->e_mem_type      = IV_EXTERNAL_CACHEABLE_PERSISTENT_MEM;
+        ps_mem_rec->u4_mem_size     = sizeof(dec_state_t);
+
+        ps_mem_rec++;
+        u1_no_rec++;
+
+        /* To store thread handle */
+        ps_mem_rec->u4_mem_alignment = 128 /* 128 byte alignment*/;
+        ps_mem_rec->e_mem_type      = IV_EXTERNAL_CACHEABLE_PERSISTENT_MEM;
+        ps_mem_rec->u4_mem_size     = ithread_get_handle_size();
+
+        ps_mem_rec++;
+        u1_no_rec++;
+
+        /*************************************************************************/
+        /*      Fill the memory requirement for Motion Compensation Buffers      */
+        /*************************************************************************/
+        ps_mem_rec->u4_mem_alignment = 128 /* 128 byte alignment*/;
+        ps_mem_rec->e_mem_type      = IV_EXTERNAL_CACHEABLE_SCRATCH_MEM;
+
+        /* for mc_fw_buf.pu1_y */
+        ps_mem_rec->u4_mem_size     = MB_LUMA_MEM_SIZE;
+
+        /* for mc_fw_buf.pu1_u */
+        ps_mem_rec->u4_mem_size    += MB_CHROMA_MEM_SIZE;
+
+        /* for mc_fw_buf.pu1_v */
+        ps_mem_rec->u4_mem_size    += MB_CHROMA_MEM_SIZE;
+
+        /* for mc_bk_buf.pu1_y */
+        ps_mem_rec->u4_mem_size    += MB_LUMA_MEM_SIZE;
+
+        /* for mc_bk_buf.pu1_u */
+        ps_mem_rec->u4_mem_size    += MB_CHROMA_MEM_SIZE;
+
+        /* for mc_bk_buf.pu1_v */
+        ps_mem_rec->u4_mem_size    += MB_CHROMA_MEM_SIZE;
+
+        /* for mc_buf.pu1_y */
+        ps_mem_rec->u4_mem_size    += MB_LUMA_MEM_SIZE;
+
+        /* for mc_buf.pu1_u */
+        ps_mem_rec->u4_mem_size    += MB_CHROMA_MEM_SIZE;
+
+        /* for mc_buf.pu1_v */
+        ps_mem_rec->u4_mem_size    += MB_CHROMA_MEM_SIZE;
+
+        ps_mem_rec++;
+        u1_no_rec++;
+
+
+        /*************************************************************************/
+        /*             Fill the memory requirement Stack Context                 */
+        /*************************************************************************/
+        /* ! */
+        ps_mem_rec->u4_mem_alignment = 128 /* 128 byte alignment*/;
+        ps_mem_rec->e_mem_type      = IV_EXTERNAL_CACHEABLE_PERSISTENT_MEM;
+        ps_mem_rec->u4_mem_size     = 392;
+
+        ps_mem_rec++;
+        u1_no_rec++;
+    }
+
+
+
+    {
+        /*************************************************************************/
+        /*        Fill the memory requirement for Picture Buffer Manager         */
+        /*************************************************************************/
+        /* ! */
+        ps_mem_rec->u4_mem_alignment = 128 /* 128 byte alignment*/;
+        ps_mem_rec->e_mem_type      = IV_EXTERNAL_CACHEABLE_PERSISTENT_MEM;
+        ps_mem_rec->u4_mem_size     = sizeof(buf_mgr_t) + sizeof(pic_buf_t) * BUF_MGR_MAX_CNT;
+
+        ps_mem_rec++;
+        u1_no_rec++;
+    }
+    /*************************************************************************/
+    /*             Internal Frame Buffers                                    */
+    /*************************************************************************/
+/* ! */
+
+    {
+        for(u4_i = 0; u4_i < NUM_INT_FRAME_BUFFERS; u4_i++)
+        {
+            /* ! */
+            ps_mem_rec->u4_mem_alignment = 128 /* 128 byte alignment*/;
+            ps_mem_rec->e_mem_type      = IV_EXTERNAL_CACHEABLE_PERSISTENT_MEM;
+            if(0 == i4_share_disp_buf)
+                ps_mem_rec->u4_mem_size     = max_frm_size;
+            else if(IV_YUV_420P != i4_chroma_format)
+            {
+                /* If color format is not 420P and it is shared, then allocate for chroma */
+                ps_mem_rec->u4_mem_size     = i4_chroma_size * 2;
+            }
+            else
+                ps_mem_rec->u4_mem_size     = 64;
+            ps_mem_rec++;
+            u1_no_rec++;
+        }
+    }
+
+
+
+    {
+        WORD32 i4_job_queue_size;
+        WORD32 i4_num_jobs;
+
+        /* One job per row of MBs */
+        i4_num_jobs  = max_frm_height >> 4;
+
+        /* One format convert/frame copy job per row of MBs for non-shared mode*/
+        i4_num_jobs  += max_frm_height >> 4;
+
+
+        i4_job_queue_size = impeg2_jobq_ctxt_size();
+        i4_job_queue_size += i4_num_jobs * sizeof(job_t);
+        ps_mem_rec->u4_mem_size = i4_job_queue_size;
+        ps_mem_rec->u4_mem_alignment = 128;
+        ps_mem_rec->e_mem_type       = IV_EXTERNAL_CACHEABLE_PERSISTENT_MEM;
+
+        ps_mem_rec++;
+        u1_no_rec++;
+
+    }
+
+    ps_mem_rec->u4_mem_alignment = 128;
+    ps_mem_rec->e_mem_type       = IV_EXTERNAL_CACHEABLE_PERSISTENT_MEM;
+    ps_mem_rec->u4_mem_size      = sizeof(iv_mem_rec_t) * (NUM_MEM_RECORDS);
+    ps_mem_rec++;
+    u1_no_rec++;
+    ps_op->s_ivd_fill_mem_rec_op_t.u4_num_mem_rec_filled = u1_no_rec;
+    ps_op->s_ivd_fill_mem_rec_op_t.u4_error_code = 0;
+}
+
+
+/*****************************************************************************/
+/*                                                                           */
+/*  Function Name : impeg2d_api_get_version                                  */
+/*                                                                           */
+/*  Description   :                                                          */
+/*                                                                           */
+/*  Inputs        :                                                          */
+/*  Globals       : <Does it use any global variables?>                      */
+/*  Outputs       :                                                          */
+/*  Returns       : void                                                     */
+/*                                                                           */
+/*  Issues        : none                                                     */
+/*                                                                           */
+/*  Revision History:                                                        */
+/*                                                                           */
+/*         DD MM YYYY   Author(s)       Changes (Describe the changes made)  */
+/*         22 10 2008    100356         Draft                                */
+/*                                                                           */
+/*****************************************************************************/
+IV_API_CALL_STATUS_T impeg2d_api_get_version(iv_obj_t *ps_dechdl,
+                                             void *pv_api_ip,
+                                             void *pv_api_op)
+{
+    char au1_version_string[512];
+
+    impeg2d_ctl_getversioninfo_ip_t *ps_ip;
+    impeg2d_ctl_getversioninfo_op_t *ps_op;
+
+    UNUSED(ps_dechdl);
+
+    ps_ip = (impeg2d_ctl_getversioninfo_ip_t *)pv_api_ip;
+    ps_op = (impeg2d_ctl_getversioninfo_op_t *)pv_api_op;
+
+    ps_op->s_ivd_ctl_getversioninfo_op_t.u4_error_code = IV_SUCCESS;
+
+    VERSION(au1_version_string, CODEC_NAME, CODEC_RELEASE_TYPE, CODEC_RELEASE_VER,
+            CODEC_VENDOR);
+
+    if((WORD32)ps_ip->s_ivd_ctl_getversioninfo_ip_t.u4_version_buffer_size <= 0)
+    {
+        ps_op->s_ivd_ctl_getversioninfo_op_t.u4_error_code = IV_FAIL;
+        return (IV_FAIL);
+    }
+
+    if(ps_ip->s_ivd_ctl_getversioninfo_ip_t.u4_version_buffer_size
+                    >= (strlen(au1_version_string) + 1))
+    {
+        memcpy(ps_ip->s_ivd_ctl_getversioninfo_ip_t.pv_version_buffer,
+               au1_version_string, (strlen(au1_version_string) + 1));
+        ps_op->s_ivd_ctl_getversioninfo_op_t.u4_error_code = IV_SUCCESS;
+    }
+    else
+    {
+        ps_op->s_ivd_ctl_getversioninfo_op_t.u4_error_code = IV_FAIL;
+    }
+
+    return (IV_SUCCESS);
+}
+
+/*****************************************************************************/
+/*                                                                           */
+/*  Function Name : impeg2d_api_get_buf_info                                 */
+/*                                                                           */
+/*  Description   :                                                          */
+/*                                                                           */
+/*  Inputs        :                                                          */
+/*  Globals       : <Does it use any global variables?>                      */
+/*  Outputs       :                                                          */
+/*  Returns       : void                                                     */
+/*                                                                           */
+/*  Issues        : none                                                     */
+/*                                                                           */
+/*  Revision History:                                                        */
+/*                                                                           */
+/*         DD MM YYYY   Author(s)       Changes (Describe the changes made)  */
+/*         22 10 2008    100356         Draft                                */
+/*                                                                           */
+/*****************************************************************************/
+IV_API_CALL_STATUS_T impeg2d_api_get_buf_info(iv_obj_t *ps_dechdl,
+                                              void *pv_api_ip,
+                                              void *pv_api_op)
+{
+    dec_state_t *ps_dec_state;
+    dec_state_multi_core_t *ps_dec_state_multi_core;
+    impeg2d_ctl_getbufinfo_ip_t *ps_ctl_bufinfo_ip =
+                    (impeg2d_ctl_getbufinfo_ip_t *)pv_api_ip;
+    impeg2d_ctl_getbufinfo_op_t *ps_ctl_bufinfo_op =
+                    (impeg2d_ctl_getbufinfo_op_t *)pv_api_op;
+    UWORD32 u4_i, u4_stride, u4_height;
+    UNUSED(ps_ctl_bufinfo_ip);
+
+    ps_dec_state_multi_core =
+                    (dec_state_multi_core_t *)(ps_dechdl->pv_codec_handle);
+    ps_dec_state = ps_dec_state_multi_core->ps_dec_state[0];
+
+    ps_ctl_bufinfo_op->s_ivd_ctl_getbufinfo_op_t.u4_min_num_in_bufs = 1;
+    ps_ctl_bufinfo_op->s_ivd_ctl_getbufinfo_op_t.u4_min_num_out_bufs = 1;
+
+    if(ps_dec_state->i4_chromaFormat == IV_YUV_420P)
+    {
+        ps_ctl_bufinfo_op->s_ivd_ctl_getbufinfo_op_t.u4_min_num_out_bufs =
+                        MIN_OUT_BUFS_420;
+    }
+    else if((ps_dec_state->i4_chromaFormat == IV_YUV_420SP_UV)
+                    || (ps_dec_state->i4_chromaFormat == IV_YUV_420SP_VU))
+    {
+        ps_ctl_bufinfo_op->s_ivd_ctl_getbufinfo_op_t.u4_min_num_out_bufs =
+                        MIN_OUT_BUFS_420SP;
+    }
+    else if(ps_dec_state->i4_chromaFormat == IV_YUV_422ILE)
+    {
+        ps_ctl_bufinfo_op->s_ivd_ctl_getbufinfo_op_t.u4_min_num_out_bufs =
+                        MIN_OUT_BUFS_422ILE;
+    }
+    else if(ps_dec_state->i4_chromaFormat == IV_RGB_565)
+    {
+        ps_ctl_bufinfo_op->s_ivd_ctl_getbufinfo_op_t.u4_min_num_out_bufs =
+                        MIN_OUT_BUFS_RGB565;
+    }
+    else
+    {
+        //Invalid chroma format; Error code may be updated, verify in testing if needed
+        ps_ctl_bufinfo_op->s_ivd_ctl_getbufinfo_op_t.u4_error_code =
+                        IVD_INIT_DEC_COL_FMT_NOT_SUPPORTED;
+        return IV_FAIL;
+    }
+
+    for(u4_i = 0; u4_i < IVD_VIDDEC_MAX_IO_BUFFERS; u4_i++)
+    {
+        ps_ctl_bufinfo_op->s_ivd_ctl_getbufinfo_op_t.u4_min_in_buf_size[u4_i] =
+                        0;
+        ps_ctl_bufinfo_op->s_ivd_ctl_getbufinfo_op_t.u4_min_out_buf_size[u4_i] =
+                        0;
+    }
+
+    for(u4_i = 0;
+        u4_i < ps_ctl_bufinfo_op->s_ivd_ctl_getbufinfo_op_t.u4_min_num_in_bufs;
+        u4_i++)
+    {
+        ps_ctl_bufinfo_op->s_ivd_ctl_getbufinfo_op_t.u4_min_in_buf_size[u4_i] =
+                        MAX_BITSTREAM_BUFFER_SIZE;
+    }
+
+    if (0 == ps_dec_state->u4_frm_buf_stride)
+    {
+        if (1 == ps_dec_state->u2_header_done)
+        {
+            u4_stride   = ps_dec_state->u2_horizontal_size;
+        }
+        else
+        {
+            u4_stride   = ps_dec_state->u2_create_max_width;
+        }
+    }
+    else
+    {
+        u4_stride = ps_dec_state->u4_frm_buf_stride;
+    }
+    u4_height = ((ps_dec_state->u2_frame_height + 15) >> 4) << 4;
+
+    if(ps_dec_state->i4_chromaFormat == IV_YUV_420P)
+    {
+        ps_ctl_bufinfo_op->s_ivd_ctl_getbufinfo_op_t.u4_min_out_buf_size[0] =
+                        (u4_stride * u4_height);
+        ps_ctl_bufinfo_op->s_ivd_ctl_getbufinfo_op_t.u4_min_out_buf_size[1] =
+                        (u4_stride * u4_height) >> 2;
+        ps_ctl_bufinfo_op->s_ivd_ctl_getbufinfo_op_t.u4_min_out_buf_size[2] =
+                        (u4_stride * u4_height) >> 2;
+    }
+    else if((ps_dec_state->i4_chromaFormat == IV_YUV_420SP_UV)
+                    || (ps_dec_state->i4_chromaFormat == IV_YUV_420SP_VU))
+    {
+        ps_ctl_bufinfo_op->s_ivd_ctl_getbufinfo_op_t.u4_min_out_buf_size[0] =
+                        (u4_stride * u4_height);
+        ps_ctl_bufinfo_op->s_ivd_ctl_getbufinfo_op_t.u4_min_out_buf_size[1] =
+                        (u4_stride * u4_height) >> 1;
+        ps_ctl_bufinfo_op->s_ivd_ctl_getbufinfo_op_t.u4_min_out_buf_size[2] = 0;
+    }
+    else if(ps_dec_state->i4_chromaFormat == IV_YUV_422ILE)
+    {
+        ps_ctl_bufinfo_op->s_ivd_ctl_getbufinfo_op_t.u4_min_out_buf_size[0] =
+                        (u4_stride * u4_height) * 2;
+        ps_ctl_bufinfo_op->s_ivd_ctl_getbufinfo_op_t.u4_min_out_buf_size[1] =
+                        ps_ctl_bufinfo_op->s_ivd_ctl_getbufinfo_op_t.u4_min_out_buf_size[2] =
+                                        0;
+    }
+
+    /* Adding initialization for 2 uninitialized values */
+    ps_ctl_bufinfo_op->s_ivd_ctl_getbufinfo_op_t.u4_num_disp_bufs = 1;
+    if(ps_dec_state->u4_share_disp_buf)
+        ps_ctl_bufinfo_op->s_ivd_ctl_getbufinfo_op_t.u4_num_disp_bufs =
+                        NUM_INT_FRAME_BUFFERS;
+    ps_ctl_bufinfo_op->s_ivd_ctl_getbufinfo_op_t.u4_size = MAX_FRM_SIZE;
+
+    ps_ctl_bufinfo_op->s_ivd_ctl_getbufinfo_op_t.u4_error_code = IV_SUCCESS;
+
+    return (IV_SUCCESS);
+}
+
+/*****************************************************************************/
+/*                                                                           */
+/*  Function Name :  impeg2d_api_set_flush_mode                              */
+/*                                                                           */
+/*  Description   :                                                          */
+/*                                                                           */
+/*  Inputs        :                                                          */
+/*  Globals       : <Does it use any global variables?>                      */
+/*  Outputs       :                                                          */
+/*  Returns       : void                                                     */
+/*                                                                           */
+/*  Issues        : none                                                     */
+/*                                                                           */
+/*  Revision History:                                                        */
+/*                                                                           */
+/*         DD MM YYYY   Author(s)       Changes (Describe the changes made)  */
+/*         08 06 2009    100356         RAVI                                 */
+/*                                                                           */
+/*****************************************************************************/
+IV_API_CALL_STATUS_T impeg2d_api_set_flush_mode(iv_obj_t *ps_dechdl,
+                                                void *pv_api_ip,
+                                                void *pv_api_op)
+{
+    dec_state_t *ps_dec_state;
+    dec_state_multi_core_t *ps_dec_state_multi_core;
+    impeg2d_ctl_flush_op_t *ps_ctl_dec_op =
+                    (impeg2d_ctl_flush_op_t*)pv_api_op;
+
+    UNUSED(pv_api_ip);
+
+    ps_dec_state_multi_core =
+                    (dec_state_multi_core_t *)(ps_dechdl->pv_codec_handle);
+    ps_dec_state = ps_dec_state_multi_core->ps_dec_state[0];
+
+    ps_dec_state->u1_flushfrm = 1;
+
+    ps_ctl_dec_op->s_ivd_ctl_flush_op_t.u4_size =
+                    sizeof(impeg2d_ctl_flush_op_t);
+    ps_ctl_dec_op->s_ivd_ctl_flush_op_t.u4_error_code = IV_SUCCESS;
+
+    return (IV_SUCCESS);
+}
+
+/*****************************************************************************/
+/*                                                                           */
+/*  Function Name :  impeg2d_api_set_default                                 */
+/*                                                                           */
+/*  Description   :                                                          */
+/*                                                                           */
+/*  Inputs        :                                                          */
+/*  Globals       : <Does it use any global variables?>                      */
+/*  Outputs       :                                                          */
+/*  Returns       : void                                                     */
+/*                                                                           */
+/*  Issues        : none                                                     */
+/*                                                                           */
+/*  Revision History:                                                        */
+/*                                                                           */
+/*         DD MM YYYY   Author(s)       Changes (Describe the changes made)  */
+/*         08 06 2009    100356         RAVI                                 */
+/*                                                                           */
+/*****************************************************************************/
+IV_API_CALL_STATUS_T impeg2d_api_set_default(iv_obj_t *ps_dechdl,
+                                             void *pv_api_ip,
+                                             void *pv_api_op)
+{
+    dec_state_t *ps_dec_state;
+    dec_state_multi_core_t *ps_dec_state_multi_core;
+    impeg2d_ctl_set_config_op_t *ps_ctl_dec_op =
+                    (impeg2d_ctl_set_config_op_t *)pv_api_op;
+
+    UNUSED(pv_api_ip);
+
+    ps_ctl_dec_op->s_ivd_ctl_set_config_op_t.u4_error_code  = IV_SUCCESS;
+    ps_ctl_dec_op->s_ivd_ctl_set_config_op_t.u4_size        =
+                    sizeof(impeg2d_ctl_set_config_op_t);
+
+    ps_dec_state_multi_core =
+                    (dec_state_multi_core_t *)(ps_dechdl->pv_codec_handle);
+    ps_dec_state            = ps_dec_state_multi_core->ps_dec_state[0];
+
+    ps_dec_state->u1_flushfrm   = 0;
+    ps_dec_state->u2_decode_header = 1;
+
+    if (1 == ps_dec_state->u2_header_done)
+    {
+        ps_dec_state->u4_frm_buf_stride = ps_dec_state->u2_frame_width;
+    }
+
+    ps_ctl_dec_op->s_ivd_ctl_set_config_op_t.u4_error_code = IV_SUCCESS;
+
+    return (IV_SUCCESS);
+
+}
+
+/*****************************************************************************/
+/*                                                                           */
+/*  Function Name :  impeg2d_api_reset                                       */
+/*                                                                           */
+/*  Description   :                                                          */
+/*                                                                           */
+/*  Inputs        :                                                          */
+/*  Globals       : <Does it use any global variables?>                      */
+/*  Outputs       :                                                          */
+/*  Returns       : void                                                     */
+/*                                                                           */
+/*  Issues        : none                                                     */
+/*                                                                           */
+/*  Revision History:                                                        */
+/*                                                                           */
+/*         DD MM YYYY   Author(s)       Changes (Describe the changes made)  */
+/*         08 06 2009    100356         RAVI                                 */
+/*                                                                           */
+/*****************************************************************************/
+IV_API_CALL_STATUS_T impeg2d_api_reset(iv_obj_t *ps_dechdl,
+                                       void *pv_api_ip,
+                                       void *pv_api_op)
+{
+    dec_state_t *ps_dec_state;
+    dec_state_multi_core_t *ps_dec_state_multi_core;
+    UNUSED(pv_api_ip);
+    impeg2d_ctl_reset_op_t *s_ctl_reset_op = (impeg2d_ctl_reset_op_t *)pv_api_op;
+
+    WORD32 i4_num_threads;
+
+    ps_dec_state_multi_core = (dec_state_multi_core_t *) (ps_dechdl->pv_codec_handle);
+    ps_dec_state = ps_dec_state_multi_core->ps_dec_state[0];
+
+    if(ps_dec_state_multi_core != NULL)
+    {
+        for(i4_num_threads = 0; i4_num_threads < MAX_THREADS; i4_num_threads++)
+        {
+
+            ps_dec_state = ps_dec_state_multi_core->ps_dec_state[i4_num_threads];
+
+
+            /* --------------------------------------------------------------------- */
+            /* Initializations */
+
+            ps_dec_state->u2_header_done    = 0;  /* Header decoding not done */
+            ps_dec_state->u4_frm_buf_stride = 0;
+            ps_dec_state->u2_is_mpeg2       = 0;
+        }
+    }
+    else
+    {
+        s_ctl_reset_op->s_ivd_ctl_reset_op_t.u4_error_code =
+                        IMPEG2D_INIT_NOT_DONE;
+    }
+
+    return(IV_SUCCESS);
+}
+
+/*****************************************************************************/
+/*                                                                           */
+/*  Function Name :  impeg2d_api_set_params                                  */
+/*                                                                           */
+/*  Description   :                                                          */
+/*                                                                           */
+/*  Inputs        :                                                          */
+/*  Globals       : <Does it use any global variables?>                      */
+/*  Outputs       :                                                          */
+/*  Returns       : void                                                     */
+/*                                                                           */
+/*  Issues        : none                                                     */
+/*                                                                           */
+/*  Revision History:                                                        */
+/*                                                                           */
+/*         DD MM YYYY   Author(s)       Changes (Describe the changes made)  */
+/*         08 06 2009    100356         RAVI                                 */
+/*                                                                           */
+/*****************************************************************************/
+IV_API_CALL_STATUS_T impeg2d_api_set_params(iv_obj_t *ps_dechdl,void *pv_api_ip,void *pv_api_op)
+{
+    dec_state_t *ps_dec_state;
+    dec_state_multi_core_t *ps_dec_state_multi_core;
+    impeg2d_ctl_set_config_ip_t  *ps_ctl_dec_ip = (impeg2d_ctl_set_config_ip_t  *)pv_api_ip;
+    impeg2d_ctl_set_config_op_t  *ps_ctl_dec_op = (impeg2d_ctl_set_config_op_t  *)pv_api_op;
+
+    ps_dec_state_multi_core = (dec_state_multi_core_t *) (ps_dechdl->pv_codec_handle);
+    ps_dec_state = ps_dec_state_multi_core->ps_dec_state[0];
+
+    if((ps_ctl_dec_ip->s_ivd_ctl_set_config_ip_t.e_vid_dec_mode != IVD_DECODE_HEADER) && (ps_ctl_dec_ip->s_ivd_ctl_set_config_ip_t.e_vid_dec_mode != IVD_DECODE_FRAME))
+    {
+        ps_ctl_dec_op->s_ivd_ctl_set_config_op_t.u4_error_code = IV_FAIL;
+        return(IV_FAIL);
+    }
+
+    if((ps_ctl_dec_ip->s_ivd_ctl_set_config_ip_t.e_frm_out_mode != IVD_DISPLAY_FRAME_OUT) && (ps_ctl_dec_ip->s_ivd_ctl_set_config_ip_t.e_frm_out_mode != IVD_DECODE_FRAME_OUT))
+    {
+        ps_ctl_dec_op->s_ivd_ctl_set_config_op_t.u4_error_code = IV_FAIL;
+        return(IV_FAIL);
+    }
+
+    if( (WORD32) ps_ctl_dec_ip->s_ivd_ctl_set_config_ip_t.e_frm_skip_mode < IVD_SKIP_NONE)
+    {
+        ps_ctl_dec_op->s_ivd_ctl_set_config_op_t.u4_error_code = IV_FAIL;
+        return(IV_FAIL);
+    }
+
+    if(ps_dec_state->u2_header_done == 1)
+    {
+        if(((WORD32)ps_ctl_dec_ip->s_ivd_ctl_set_config_ip_t.u4_disp_wd < 0) ||
+            ((ps_ctl_dec_ip->s_ivd_ctl_set_config_ip_t.u4_disp_wd != 0) && (ps_ctl_dec_ip->s_ivd_ctl_set_config_ip_t.u4_disp_wd < ps_dec_state->u2_frame_width)))
+        {
+            ps_ctl_dec_op->s_ivd_ctl_set_config_op_t.u4_error_code = IV_FAIL;
+            return(IV_FAIL);
+        }
+
+    }
+    else
+    {
+        if(((WORD32)ps_ctl_dec_ip->s_ivd_ctl_set_config_ip_t.u4_disp_wd < 0) ||
+            ((ps_ctl_dec_ip->s_ivd_ctl_set_config_ip_t.u4_disp_wd != 0) && (ps_ctl_dec_ip->s_ivd_ctl_set_config_ip_t.u4_disp_wd < ps_dec_state->u2_horizontal_size)))
+        {
+            ps_ctl_dec_op->s_ivd_ctl_set_config_op_t.u4_error_code = IV_FAIL;
+            return(IV_FAIL);
+        }
+    }
+
+
+    ps_dec_state->u2_decode_header    = (UWORD8)ps_ctl_dec_ip->s_ivd_ctl_set_config_ip_t.e_vid_dec_mode;
+
+    if(ps_ctl_dec_ip->s_ivd_ctl_set_config_ip_t.u4_disp_wd != 0)
+    {
+        if(ps_dec_state->u2_header_done == 1)
+        {
+            if (ps_ctl_dec_ip->s_ivd_ctl_set_config_ip_t.u4_disp_wd > ps_dec_state->u2_frame_width)
+            {
+                ps_dec_state->u4_frm_buf_stride = ps_ctl_dec_ip->s_ivd_ctl_set_config_ip_t.u4_disp_wd;
+            }
+        }
+        else
+        {
+            ps_dec_state->u4_frm_buf_stride = ps_ctl_dec_ip->s_ivd_ctl_set_config_ip_t.u4_disp_wd;
+        }
+
+    }
+    else
+    {
+
+            if(ps_dec_state->u2_header_done == 1)
+            {
+                ps_dec_state->u4_frm_buf_stride = ps_dec_state->u2_frame_width;
+            }
+            else
+            {
+                ps_dec_state->u4_frm_buf_stride = 0;
+            }
+    }
+
+
+        if(ps_ctl_dec_ip->s_ivd_ctl_set_config_ip_t.e_vid_dec_mode  == IVD_DECODE_FRAME)
+        {
+            ps_dec_state->u1_flushfrm = 0;
+        }
+
+
+    ps_ctl_dec_op->s_ivd_ctl_set_config_op_t.u4_error_code = IV_SUCCESS;
+    return(IV_SUCCESS);
+
+}
+
+/*****************************************************************************/
+/*                                                                           */
+/*  Function Name :  impeg2d_api_get_status                                  */
+/*                                                                           */
+/*  Description   :                                                          */
+/*                                                                           */
+/*  Inputs        :                                                          */
+/*  Globals       : <Does it use any global variables?>                      */
+/*  Outputs       :                                                          */
+/*  Returns       : void                                                     */
+/*                                                                           */
+/*  Issues        : none                                                     */
+/*                                                                           */
+/*  Revision History:                                                        */
+/*                                                                           */
+/*         DD MM YYYY   Author(s)       Changes (Describe the changes made)  */
+/*         08 06 2009    100356         RAVI                                 */
+/*                                                                           */
+/*****************************************************************************/
+IV_API_CALL_STATUS_T impeg2d_api_get_status(iv_obj_t *ps_dechdl,
+                                                  void *pv_api_ip,
+                                                  void *pv_api_op)
+{
+    dec_state_t *ps_dec_state;
+    dec_state_multi_core_t *ps_dec_state_multi_core;
+    UWORD32 u4_i,u4_stride,u4_height;
+    impeg2d_ctl_getstatus_ip_t *ps_ctl_dec_ip = (impeg2d_ctl_getstatus_ip_t *)pv_api_ip;
+    impeg2d_ctl_getstatus_op_t *ps_ctl_dec_op = (impeg2d_ctl_getstatus_op_t *)pv_api_op;
+    UNUSED(ps_ctl_dec_ip);
+
+    ps_dec_state_multi_core = (dec_state_multi_core_t *) (ps_dechdl->pv_codec_handle);
+    ps_dec_state = ps_dec_state_multi_core->ps_dec_state[0];
+
+    ps_ctl_dec_op->s_ivd_ctl_getstatus_op_t.u4_size             = sizeof(impeg2d_ctl_getstatus_op_t);
+    ps_ctl_dec_op->s_ivd_ctl_getstatus_op_t.u4_num_disp_bufs    = 1;
+    ps_ctl_dec_op->s_ivd_ctl_getstatus_op_t.u4_pic_ht           = ps_dec_state->u2_frame_height;
+    ps_ctl_dec_op->s_ivd_ctl_getstatus_op_t.u4_pic_wd           = ps_dec_state->u2_frame_width;
+    ps_ctl_dec_op->s_ivd_ctl_getstatus_op_t.u4_frame_rate           = ps_dec_state->u2_framePeriod;
+
+
+    if(ps_dec_state->u2_progressive_sequence == 1)
+        ps_ctl_dec_op->s_ivd_ctl_getstatus_op_t.e_content_type          =   IV_PROGRESSIVE ;
+    else
+        ps_ctl_dec_op->s_ivd_ctl_getstatus_op_t.e_content_type          = IV_INTERLACED;
+
+
+    ps_ctl_dec_op->s_ivd_ctl_getstatus_op_t.e_output_chroma_format  = (IV_COLOR_FORMAT_T)ps_dec_state->i4_chromaFormat;
+    ps_ctl_dec_op->s_ivd_ctl_getstatus_op_t.u4_min_num_in_bufs          = 1;
+    ps_ctl_dec_op->s_ivd_ctl_getstatus_op_t.u4_min_num_out_bufs     = 1;
+
+
+    if(ps_dec_state->i4_chromaFormat == IV_YUV_420P)
+    {
+        ps_ctl_dec_op->s_ivd_ctl_getstatus_op_t.u4_min_num_out_bufs     = MIN_OUT_BUFS_420;
+    }
+    else if(ps_dec_state->i4_chromaFormat == IV_YUV_422ILE)
+    {
+        ps_ctl_dec_op->s_ivd_ctl_getstatus_op_t.u4_min_num_out_bufs     = MIN_OUT_BUFS_422ILE;
+    }
+    else if(ps_dec_state->i4_chromaFormat == IV_RGB_565)
+    {
+        ps_ctl_dec_op->s_ivd_ctl_getstatus_op_t.u4_min_num_out_bufs = MIN_OUT_BUFS_RGB565;
+    }
+    else
+    {
+        //Invalid chroma format; Error code may be updated, verify in testing if needed
+        ps_ctl_dec_op->s_ivd_ctl_getstatus_op_t.u4_error_code   = IVD_INIT_DEC_COL_FMT_NOT_SUPPORTED;
+        return IV_FAIL;
+    }
+
+    memset(&ps_ctl_dec_op->s_ivd_ctl_getstatus_op_t.u4_min_in_buf_size[0],0,(sizeof(UWORD32)*IVD_VIDDEC_MAX_IO_BUFFERS));
+    memset(&ps_ctl_dec_op->s_ivd_ctl_getstatus_op_t.u4_min_out_buf_size[0],0,(sizeof(UWORD32)*IVD_VIDDEC_MAX_IO_BUFFERS));
+
+    for(u4_i = 0; u4_i < ps_ctl_dec_op->s_ivd_ctl_getstatus_op_t.u4_min_num_in_bufs; u4_i++)
+    {
+        ps_ctl_dec_op->s_ivd_ctl_getstatus_op_t.u4_min_in_buf_size[u4_i] = MAX_BITSTREAM_BUFFER_SIZE;
+    }
+
+    u4_stride = ps_dec_state->u4_frm_buf_stride;
+    u4_height = ((ps_dec_state->u2_frame_height + 15) >> 4) << 4;
+
+    if(ps_dec_state->i4_chromaFormat == IV_YUV_420P)
+    {
+        ps_ctl_dec_op->s_ivd_ctl_getstatus_op_t.u4_min_out_buf_size[0] = (u4_stride * u4_height);
+        ps_ctl_dec_op->s_ivd_ctl_getstatus_op_t.u4_min_out_buf_size[1] = (u4_stride * u4_height)>>2 ;
+        ps_ctl_dec_op->s_ivd_ctl_getstatus_op_t.u4_min_out_buf_size[2] = (u4_stride * u4_height)>>2;
+    }
+    else if((ps_dec_state->i4_chromaFormat == IV_YUV_420SP_UV) || (ps_dec_state->i4_chromaFormat == IV_YUV_420SP_VU))
+    {
+        ps_ctl_dec_op->s_ivd_ctl_getstatus_op_t.u4_min_out_buf_size[0] = (u4_stride * u4_height);
+        ps_ctl_dec_op->s_ivd_ctl_getstatus_op_t.u4_min_out_buf_size[1] = (u4_stride * u4_height)>>1 ;
+        ps_ctl_dec_op->s_ivd_ctl_getstatus_op_t.u4_min_out_buf_size[2] = 0;
+    }
+    else if(ps_dec_state->i4_chromaFormat == IV_YUV_422ILE)
+    {
+        ps_ctl_dec_op->s_ivd_ctl_getstatus_op_t.u4_min_out_buf_size[0] = (u4_stride * u4_height)*2;
+        ps_ctl_dec_op->s_ivd_ctl_getstatus_op_t.u4_min_out_buf_size[1] = ps_ctl_dec_op->s_ivd_ctl_getstatus_op_t.u4_min_out_buf_size[2] = 0;
+    }
+
+    ps_ctl_dec_op->s_ivd_ctl_getstatus_op_t.u4_error_code = IV_SUCCESS;
+
+    return(IV_SUCCESS);
+
+}
+
+/**
+*******************************************************************************
+*
+* @brief
+*  Gets frame dimensions/offsets
+*
+* @par Description:
+*  Gets frame buffer chararacteristics such a x & y offsets  display and
+* buffer dimensions
+*
+* @param[in] ps_codec_obj
+*  Pointer to codec object at API level
+*
+* @param[in] pv_api_ip
+*  Pointer to input argument structure
+*
+* @param[out] pv_api_op
+*  Pointer to output argument structure
+*
+* @returns  Status
+*
+* @remarks
+*
+*
+*******************************************************************************
+*/
+IV_API_CALL_STATUS_T impeg2d_get_frame_dimensions(iv_obj_t *ps_codec_obj,
+                                   void *pv_api_ip,
+                                   void *pv_api_op)
+{
+    impeg2d_ctl_get_frame_dimensions_ip_t *ps_ip;
+    impeg2d_ctl_get_frame_dimensions_op_t *ps_op;
+    WORD32 disp_wd, disp_ht, buffer_wd, buffer_ht, x_offset, y_offset;
+    dec_state_t *ps_codec;
+    dec_state_multi_core_t *ps_dec_state_multi_core;
+
+    ps_dec_state_multi_core = (dec_state_multi_core_t *) (ps_codec_obj->pv_codec_handle);
+    ps_codec = ps_dec_state_multi_core->ps_dec_state[0];
+
+
+    ps_ip = (impeg2d_ctl_get_frame_dimensions_ip_t *)pv_api_ip;
+    ps_op = (impeg2d_ctl_get_frame_dimensions_op_t *)pv_api_op;
+    UNUSED(ps_ip);
+    if(ps_codec->u2_header_done)
+    {
+        disp_wd = ps_codec->u2_horizontal_size;
+        disp_ht = ps_codec->u2_vertical_size;
+
+        if(0 == ps_codec->u4_share_disp_buf)
+        {
+            buffer_wd = disp_wd;
+            buffer_ht = disp_ht;
+        }
+        else
+        {
+            buffer_wd = ps_codec->u2_frame_width;
+            buffer_ht = ps_codec->u2_frame_height;
+        }
+    }
+    else
+    {
+
+        disp_wd = ps_codec->u2_create_max_width;
+        disp_ht = ps_codec->u2_create_max_height;
+
+        if(0 == ps_codec->u4_share_disp_buf)
+        {
+            buffer_wd = disp_wd;
+            buffer_ht = disp_ht;
+        }
+        else
+        {
+            buffer_wd = ALIGN16(disp_wd);
+            buffer_ht = ALIGN16(disp_ht);
+
+        }
+    }
+    if(ps_codec->u2_frame_width > buffer_wd)
+        buffer_wd = ps_codec->u2_frame_width;
+
+    x_offset = 0;
+    y_offset = 0;
+
+
+    ps_op->u4_disp_wd[0] = disp_wd;
+    ps_op->u4_disp_ht[0] = disp_ht;
+    ps_op->u4_buffer_wd[0] = buffer_wd;
+    ps_op->u4_buffer_ht[0] = buffer_ht;
+    ps_op->u4_x_offset[0] = x_offset;
+    ps_op->u4_y_offset[0] = y_offset;
+
+    ps_op->u4_disp_wd[1] = ps_op->u4_disp_wd[2] = ((ps_op->u4_disp_wd[0] + 1)
+                    >> 1);
+    ps_op->u4_disp_ht[1] = ps_op->u4_disp_ht[2] = ((ps_op->u4_disp_ht[0] + 1)
+                    >> 1);
+    ps_op->u4_buffer_wd[1] = ps_op->u4_buffer_wd[2] = (ps_op->u4_buffer_wd[0]
+                    >> 1);
+    ps_op->u4_buffer_ht[1] = ps_op->u4_buffer_ht[2] = (ps_op->u4_buffer_ht[0]
+                    >> 1);
+    ps_op->u4_x_offset[1] = ps_op->u4_x_offset[2] = (ps_op->u4_x_offset[0]
+                    >> 1);
+    ps_op->u4_y_offset[1] = ps_op->u4_y_offset[2] = (ps_op->u4_y_offset[0]
+                    >> 1);
+
+    if((ps_codec->i4_chromaFormat == IV_YUV_420SP_UV)
+                    || (ps_codec->i4_chromaFormat == IV_YUV_420SP_VU))
+    {
+        ps_op->u4_disp_wd[2] = 0;
+        ps_op->u4_disp_ht[2] = 0;
+        ps_op->u4_buffer_wd[2] = 0;
+        ps_op->u4_buffer_ht[2] = 0;
+        ps_op->u4_x_offset[2] = 0;
+        ps_op->u4_y_offset[2] = 0;
+
+        ps_op->u4_disp_wd[1] <<= 1;
+        ps_op->u4_buffer_wd[1] <<= 1;
+        ps_op->u4_x_offset[1] <<= 1;
+    }
+
+    return IV_SUCCESS;
+
+}
+
+IV_API_CALL_STATUS_T impeg2d_api_function (iv_obj_t *ps_dechdl, void *pv_api_ip,void *pv_api_op)
+{
+    WORD32 i4_cmd;
+    IV_API_CALL_STATUS_T u4_error_code;
+    UWORD32 *pu4_api_ip;
+
+    u4_error_code = impeg2d_api_check_struct_sanity(ps_dechdl,pv_api_ip,pv_api_op);
+    if(IV_SUCCESS != u4_error_code)
+    {
+        return u4_error_code;
+    }
+
+
+    pu4_api_ip  = (UWORD32 *)pv_api_ip;
+    i4_cmd = *(pu4_api_ip + 1);
+
+    switch(i4_cmd)
+    {
+
+    case IV_CMD_GET_NUM_MEM_REC:
+        u4_error_code = impeg2d_api_num_mem_rec((void *)pv_api_ip,(void *)pv_api_op);
+        break;
+
+    case IV_CMD_FILL_NUM_MEM_REC:
+        u4_error_code = impeg2d_api_fill_mem_rec((void *)pv_api_ip,(void *)pv_api_op);
+        break;
+
+    case IV_CMD_INIT:
+        u4_error_code = impeg2d_api_init(ps_dechdl,(void *)pv_api_ip,(void *)pv_api_op);
+        break;
+
+    case IVD_CMD_SET_DISPLAY_FRAME:
+        u4_error_code = impeg2d_api_set_display_frame(ps_dechdl,(void *)pv_api_ip,(void *)pv_api_op);
+        break;
+
+    case IVD_CMD_REL_DISPLAY_FRAME:
+        u4_error_code = impeg2d_api_rel_display_frame(ps_dechdl,(void *)pv_api_ip,(void *)pv_api_op);
+        break;
+
+    case IVD_CMD_VIDEO_DECODE:
+        u4_error_code = impeg2d_api_entity(ps_dechdl, (void *)pv_api_ip,(void *)pv_api_op);
+        break;
+
+    case IV_CMD_RETRIEVE_MEMREC:
+        u4_error_code = impeg2d_api_retrieve_mem_rec(ps_dechdl,(void *)pv_api_ip,(void *)pv_api_op);
+        break;
+
+    case IVD_CMD_VIDEO_CTL:
+        u4_error_code = impeg2d_api_ctl(ps_dechdl,(void *)pv_api_ip,(void *)pv_api_op);
+        break;
+
+    default:
+            break;
+    }
+
+    return(u4_error_code);
+
+}
+
+/*****************************************************************************/
+/*                                                                           */
+/*  Function Name : impeg2d_api_num_mem_rec                                  */
+/*                                                                           */
+/*  Description   : The function get the number mem records library needs    */
+/*  Inputs        : Error message                                            */
+/*  Globals       : None                                                     */
+/*  Processing    : Just prints error message to console                     */
+/*  Outputs       : Error mesage to the console                              */
+/*  Returns       : None                                                     */
+/*                                                                           */
+/*  Issues        : <List any issues or problems with this function>         */
+/*                                                                           */
+/*  Revision History:                                                        */
+/*                                                                           */
+/*         DD MM YYYY   Author(s)       Changes (Describe the changes made)  */
+/*         23 09 2010   Hamsalekha          Creation                             */
+/*                                                                           */
+/*****************************************************************************/
+
+
+IV_API_CALL_STATUS_T impeg2d_api_num_mem_rec(void *pv_api_ip,void *pv_api_op)
+{
+    /* To Query No of Memory Records */
+    impeg2d_num_mem_rec_ip_t *ps_query_mem_rec_ip;
+    impeg2d_num_mem_rec_op_t *ps_query_mem_rec_op;
+
+    ps_query_mem_rec_ip = (impeg2d_num_mem_rec_ip_t *)pv_api_ip;
+    ps_query_mem_rec_op = (impeg2d_num_mem_rec_op_t *)pv_api_op;
+
+    UNUSED(ps_query_mem_rec_ip);
+    ps_query_mem_rec_op->s_ivd_num_mem_rec_op_t.u4_size = sizeof(impeg2d_num_mem_rec_op_t);
+
+    ps_query_mem_rec_op->s_ivd_num_mem_rec_op_t.u4_num_mem_rec  = (UWORD32)NUM_MEM_RECORDS;
+
+    ps_query_mem_rec_op->s_ivd_num_mem_rec_op_t.u4_error_code = IV_SUCCESS;
+
+
+    return(IV_SUCCESS);
+
+}
+
+
+/*****************************************************************************/
+/*                                                                           */
+/*  Function Name : impeg2d_api_fill_mem_rec                                 */
+/*                                                                           */
+/*  Description   : Thsi functions fills details of each mem record lib needs*/
+/*  Inputs        : Error message                                            */
+/*  Globals       : None                                                     */
+/*  Processing    : Just prints error message to console                     */
+/*  Outputs       : Error mesage to the console                              */
+/*  Returns       : None                                                     */
+/*                                                                           */
+/*  Issues        : <List any issues or problems with this function>         */
+/*                                                                           */
+/*  Revision History:                                                        */
+/*                                                                           */
+/*         DD MM YYYY   Author(s)       Changes (Describe the changes made)  */
+/*         23 09 2010   Hamsalekha          Creation                         */
+/*                                                                           */
+/*****************************************************************************/
+
+
+IV_API_CALL_STATUS_T impeg2d_api_fill_mem_rec(void *pv_api_ip,void *pv_api_op)
+{
+
+    impeg2d_fill_mem_rec_ip_t *ps_mem_q_ip;
+    impeg2d_fill_mem_rec_op_t *ps_mem_q_op;
+
+
+    ps_mem_q_ip = pv_api_ip;
+    ps_mem_q_op = pv_api_op;
+
+
+    impeg2d_fill_mem_rec((impeg2d_fill_mem_rec_ip_t *)ps_mem_q_ip,
+                           (impeg2d_fill_mem_rec_op_t *)ps_mem_q_op);
+
+
+    return(IV_SUCCESS);
+
+}
+
+
+
+/*****************************************************************************/
+/*                                                                           */
+/*  Function Name : impeg2d_api_init                                         */
+/*                                                                           */
+/*  Description   :                                                          */
+/*  Inputs        :                                                          */
+/*  Globals       :                                                          */
+/*  Processing    :                                                          */
+/*  Outputs       :                                                          */
+/*  Returns       :                                                          */
+/*                                                                           */
+/*  Issues        :                                                          */
+/*                                                                           */
+/*  Revision History:                                                        */
+/*                                                                           */
+/*         DD MM YYYY   Author(s)       Changes (Describe the changes made)  */
+/*         17 09 2007  Rajendra C Y          Draft                           */
+/*                                                                           */
+/*****************************************************************************/
+IV_API_CALL_STATUS_T impeg2d_api_init(iv_obj_t *ps_dechdl,
+                                      void *ps_ip,
+                                      void *ps_op)
+{
+    UWORD32 i;
+
+    void *pv;
+    UWORD32 u4_size;
+
+    dec_state_t *ps_dec_state;
+    dec_state_multi_core_t *ps_dec_state_multi_core;
+    UWORD32 u4_num_mem_rec;
+    iv_mem_rec_t *ps_mem_rec ;
+    iv_mem_rec_t *ps_frm_buf;
+    iv_obj_t *ps_dec_handle;
+    WORD32 i4_max_wd, i4_max_ht;
+
+    impeg2d_init_ip_t *ps_dec_init_ip;
+    impeg2d_init_op_t *ps_dec_init_op;
+    WORD32 i4_num_threads;
+    UWORD32 u4_share_disp_buf, u4_chroma_format;
+
+    ps_dec_init_ip = (impeg2d_init_ip_t *)ps_ip;
+    ps_dec_init_op = (impeg2d_init_op_t *)ps_op;
+
+    i4_max_wd = ALIGN16(ps_dec_init_ip->s_ivd_init_ip_t.u4_frm_max_wd);
+    i4_max_ht = ALIGN16(ps_dec_init_ip->s_ivd_init_ip_t.u4_frm_max_ht);
+
+    if(ps_dec_init_ip->s_ivd_init_ip_t.u4_size > offsetof(impeg2d_init_ip_t, u4_share_disp_buf))
+    {
+#ifndef LOGO_EN
+        u4_share_disp_buf = ps_dec_init_ip->u4_share_disp_buf;
+#else
+        u4_share_disp_buf = 0;
+#endif
+    }
+    else
+    {
+        u4_share_disp_buf = 0;
+    }
+
+    u4_chroma_format = ps_dec_init_ip->s_ivd_init_ip_t.e_output_format;
+
+    if( (u4_chroma_format != IV_YUV_420P) &&
+        (u4_chroma_format != IV_YUV_420SP_UV) &&
+        (u4_chroma_format != IV_YUV_420SP_VU))
+    {
+        u4_share_disp_buf = 0;
+    }
+
+
+
+
+
+    ps_mem_rec = ps_dec_init_ip->s_ivd_init_ip_t.pv_mem_rec_location;
+    ps_mem_rec ++;
+
+
+    ps_dec_init_op->s_ivd_init_op_t.u4_size = sizeof(impeg2d_init_op_t);
+
+
+    /* Except memTab[0], all other memTabs are initialized to zero */
+    for(i = 1; i < ps_dec_init_ip->s_ivd_init_ip_t.u4_num_mem_rec; i++)
+    {
+        memset(ps_mem_rec->pv_base,0,ps_mem_rec->u4_mem_size);
+        ps_mem_rec++;
+    }
+
+    /* Reinitializing memTab[0] memory base address */
+    ps_mem_rec     = ps_dec_init_ip->s_ivd_init_ip_t.pv_mem_rec_location;
+
+
+    /* memTab[0] is for codec Handle,redundant currently not being used */
+    ps_dec_handle  = ps_mem_rec->pv_base;
+    u4_num_mem_rec = 1;
+    ps_mem_rec++;
+
+
+
+
+
+    /* decoder handle */
+    ps_dec_state_multi_core = ps_mem_rec->pv_base;
+    u4_num_mem_rec++;
+    ps_mem_rec++;
+
+
+    {
+        ps_dec_handle->pv_codec_handle = (void *)ps_dec_state_multi_core; /* Initializing codec context */
+
+        ps_dechdl->pv_codec_handle =  (void *)ps_dec_state_multi_core;
+        ps_dechdl->pv_fxns = (void *)impeg2d_api_function;
+    }
+
+
+    for(i4_num_threads = 0; i4_num_threads < MAX_THREADS; i4_num_threads++)
+    {
+    /*************************************************************************/
+    /*                      For MPEG2 Decoder Context                        */
+    /*************************************************************************/
+    ps_dec_state = ps_mem_rec->pv_base;
+
+    ps_dec_state_multi_core->ps_dec_state[i4_num_threads] = ps_dec_state;
+
+    ps_dec_state->ps_dec_state_multi_core = ps_dec_state_multi_core;
+
+    ps_dec_state->i4_num_cores = 1;
+    /* @ */  /* Used for storing MemRecords */
+     u4_num_mem_rec++;
+     ps_mem_rec++;
+
+     /* Thread handle */
+     ps_dec_state->pv_codec_thread_handle = ps_mem_rec->pv_base;
+     u4_num_mem_rec++;
+     ps_mem_rec++;
+
+    /*************************************************************************/
+    /*                      For Motion Compensation Buffers                  */
+    /*************************************************************************/
+    pv = ps_mem_rec->pv_base;
+
+    /* for mc_fw_buf.pu1_y */
+
+    ps_dec_state->s_mc_fw_buf.pu1_y = pv;
+    pv = (void *)((UWORD8 *)pv + MB_LUMA_MEM_SIZE);
+
+    u4_size = sizeof(UWORD8) * MB_LUMA_MEM_SIZE;
+    /* for mc_fw_buf.pu1_u */
+
+    ps_dec_state->s_mc_fw_buf.pu1_u = pv;
+    pv = (void *)((UWORD8 *)pv + MB_CHROMA_MEM_SIZE);
+
+    u4_size += sizeof(UWORD8) * MB_CHROMA_MEM_SIZE;
+
+    /* for mc_fw_buf.pu1_v */
+
+    ps_dec_state->s_mc_fw_buf.pu1_v = pv;
+    pv = (void *)((UWORD8 *)pv + MB_CHROMA_MEM_SIZE);
+
+    u4_size += sizeof(UWORD8) * MB_CHROMA_MEM_SIZE;
+
+    /* for mc_bk_buf.pu1_y */
+
+    ps_dec_state->s_mc_bk_buf.pu1_y = pv;
+    pv = (void *)((UWORD8 *)pv + MB_LUMA_MEM_SIZE);
+
+    u4_size += sizeof(UWORD8) * MB_LUMA_MEM_SIZE;
+
+    /* for mc_bk_buf.pu1_u */
+
+    ps_dec_state->s_mc_bk_buf.pu1_u = pv;
+    pv = (void *)((UWORD8 *)pv + MB_CHROMA_MEM_SIZE);
+
+    u4_size += sizeof(UWORD8) * MB_CHROMA_MEM_SIZE;
+
+    /* for mc_bk_buf.pu1_v */
+
+    ps_dec_state->s_mc_bk_buf.pu1_v = pv;
+    pv = (void *)((UWORD8 *)pv + MB_CHROMA_MEM_SIZE);
+
+    u4_size += sizeof(UWORD8) * MB_CHROMA_MEM_SIZE;
+
+    /* for mc_buf.pu1_y */
+
+    ps_dec_state->s_mc_buf.pu1_y = pv;
+    pv = (void *)((UWORD8 *)pv + MB_LUMA_MEM_SIZE);
+
+    u4_size += sizeof(UWORD8) * MB_LUMA_MEM_SIZE;
+
+    /* for mc_buf.pu1_u */
+
+    ps_dec_state->s_mc_buf.pu1_u = pv;
+    pv = (void *)((UWORD8 *)pv + MB_CHROMA_MEM_SIZE);
+
+    u4_size += sizeof(UWORD8) * MB_CHROMA_MEM_SIZE;
+
+    /* for mc_buf.pu1_v */
+
+    ps_dec_state->s_mc_buf.pu1_v = pv;
+
+    u4_size += sizeof(UWORD8) * MB_CHROMA_MEM_SIZE;
+
+    u4_num_mem_rec++;
+    ps_mem_rec++;
+
+
+
+    ps_dec_state->pv_pic_buf_mg = 0;
+
+    /*************************************************************************/
+    /*        For saving stack context to support global error handling      */
+    /*************************************************************************/
+    ps_dec_state->pv_stack_cntxt = ps_mem_rec->pv_base;
+    u4_num_mem_rec++;
+    ps_mem_rec++;
+
+    }
+
+
+
+
+
+    /*************************************************************************/
+    /*                          For Picture Buffer Manager                   */
+    /*************************************************************************/
+    ps_dec_state = ps_dec_state_multi_core->ps_dec_state[0];
+
+    ps_dec_state->pv_pic_buf_mg = ps_mem_rec->pv_base;
+    ps_dec_state->pv_pic_buf_base = (UWORD8 *)ps_mem_rec->pv_base + sizeof(buf_mgr_t);
+
+    u4_num_mem_rec++;
+    ps_mem_rec++;
+
+
+
+    for(i4_num_threads = 0; i4_num_threads < MAX_THREADS; i4_num_threads++)
+    {
+
+        ps_dec_state = ps_dec_state_multi_core->ps_dec_state[i4_num_threads];
+
+
+        /* --------------------------------------------------------------------- */
+        /* Initializations */
+
+        ps_dec_state->u2_header_done  = 0;  /* Header decoding not done */
+
+
+        {
+            UWORD32 u4_max_frm_width,u4_max_frm_height;
+
+            u4_max_frm_width = ALIGN16(ps_dec_init_ip->s_ivd_init_ip_t.u4_frm_max_wd);
+            u4_max_frm_height = ALIGN16(ps_dec_init_ip->s_ivd_init_ip_t.u4_frm_max_ht);
+
+            ps_dec_state->u2_create_max_width   = u4_max_frm_width;
+            ps_dec_state->u2_create_max_height  = u4_max_frm_height;
+
+            ps_dec_state->i4_chromaFormat = ps_dec_init_ip->s_ivd_init_ip_t.e_output_format;
+            ps_dec_state->u4_frm_buf_stride  = 0 ;
+            ps_dec_state->u2_frame_width  = u4_max_frm_width;
+            ps_dec_state->u2_picture_width  = u4_max_frm_width;
+            ps_dec_state->u2_horizontal_size  = u4_max_frm_width;
+
+            ps_dec_state->u2_frame_height = u4_max_frm_height;
+            ps_dec_state->u2_vertical_size = u4_max_frm_height;
+            ps_dec_state->u4_share_disp_buf = u4_share_disp_buf;
+        }
+    }
+
+
+    ps_dec_state = ps_dec_state_multi_core->ps_dec_state[0];
+
+    if((ps_dec_state->i4_chromaFormat  == IV_YUV_422ILE)
+        &&((ps_dec_state->u2_vertical_size & 0x1) != 0))
+    {
+        //printf("Error! Height should be multiple of 2 if Chroma format is 422ILE\n");
+        ps_dec_init_op->s_ivd_init_op_t.u4_error_code = IMPEG2D_INIT_CHROMA_FORMAT_HEIGHT_ERROR;
+        return(IV_FAIL);
+
+
+    }
+
+    /* --------------------------------------------------------------------- */
+
+
+/* ! */
+    // picture buffer manager initialization will be done only for first thread
+    impeg2_disp_mgr_init(&ps_dec_state->s_disp_mgr);
+    impeg2_buf_mgr_init((buf_mgr_t *)ps_dec_state->pv_pic_buf_mg);
+
+    /*************************************************************************/
+    /*             Internal Frame Buffers                                    */
+    /*************************************************************************/
+
+
+    /* Set first frame to grey */
+    {
+        ps_frm_buf = ps_mem_rec;
+        memset(ps_frm_buf->pv_base, 128, ps_frm_buf->u4_mem_size);
+        ps_frm_buf++;
+    }
+
+    if(0 == ps_dec_state->u4_share_disp_buf)
+    {
+        pic_buf_t *ps_pic_buf;
+        ps_pic_buf = (pic_buf_t *)ps_dec_state->pv_pic_buf_base;
+        for(i = 0; i < NUM_INT_FRAME_BUFFERS; i++)
+        {
+            UWORD8 *pu1_buf;
+            pu1_buf = ps_mem_rec->pv_base;
+
+            ps_pic_buf->pu1_y = pu1_buf;
+            pu1_buf += i4_max_ht * i4_max_wd;
+
+            ps_pic_buf->pu1_u = pu1_buf;
+            pu1_buf += i4_max_ht * i4_max_wd >> 2;
+
+            ps_pic_buf->pu1_v = pu1_buf;
+            pu1_buf += i4_max_ht * i4_max_wd >> 2;
+
+            ps_pic_buf->i4_buf_id = i;
+
+            ps_pic_buf->u1_used_as_ref = 0;
+
+            ps_pic_buf->u4_ts = 0;
+
+            impeg2_buf_mgr_add(ps_dec_state->pv_pic_buf_mg, ps_pic_buf, i);
+            ps_mem_rec++;
+            ps_pic_buf++;
+        }
+        u4_num_mem_rec += NUM_INT_FRAME_BUFFERS;
+    }
+    else if (ps_dec_state->i4_chromaFormat  != IV_YUV_420P)
+    {
+        for(i = 0; i < NUM_INT_FRAME_BUFFERS; i++)
+        {
+            ps_dec_state->pu1_chroma_ref_buf[i] = ps_mem_rec->pv_base;
+            ps_mem_rec++;
+        }
+
+        u4_num_mem_rec += NUM_INT_FRAME_BUFFERS;
+    }
+    else
+    {
+        ps_mem_rec+=NUM_INT_FRAME_BUFFERS;
+        u4_num_mem_rec += NUM_INT_FRAME_BUFFERS;
+    }
+
+
+    ps_dec_state = ps_dec_state_multi_core->ps_dec_state[0];
+
+
+    ps_dec_state->pv_jobq_buf = ps_mem_rec->pv_base;
+    ps_dec_state->i4_jobq_buf_size = ps_mem_rec->u4_mem_size;
+    ps_mem_rec++;
+
+    if(u4_num_mem_rec > ps_dec_init_ip->s_ivd_init_ip_t.u4_num_mem_rec)
+    {
+        ps_dec_init_op->s_ivd_init_op_t.u4_error_code = IMPEG2D_INIT_NUM_MEM_REC_NOT_SUFFICIENT;
+        return(IV_FAIL);
+
+    }
+
+    ps_dec_state->u1_flushfrm = 0;
+    ps_dec_state->u1_flushcnt = 0;
+    ps_dec_state->pv_jobq = impeg2_jobq_init(ps_dec_state->pv_jobq_buf, ps_dec_state->i4_jobq_buf_size);
+
+
+    /*************************************************************************/
+    /*        MemTab[12] is used for storing TabRecords      */
+    /*************************************************************************/
+    ps_dec_state->pv_memTab     = (void *)ps_mem_rec->pv_base;
+    memcpy(ps_mem_rec->pv_base,ps_dec_init_ip->s_ivd_init_ip_t.pv_mem_rec_location, ps_mem_rec->u4_mem_size);
+    /* Updating in Decoder Context with memRecords  */
+    u4_num_mem_rec++;
+    ps_mem_rec++;
+    ps_dec_state->u4_num_mem_records = u4_num_mem_rec;
+
+
+    ps_dec_state->u4_num_frames_decoded    = 0;
+    ps_dec_state->aps_ref_pics[0] = NULL;
+    ps_dec_state->aps_ref_pics[1] = NULL;
+
+    ps_dec_init_op->s_ivd_init_op_t.u4_error_code = IV_SUCCESS;
+    impeg2d_init_arch(ps_dec_state);
+
+    impeg2d_init_function_ptr(ps_dec_state);
+
+    return(IV_SUCCESS);
+}
+
+/*****************************************************************************/
+/*                                                                           */
+/*  Function Name : impeg2d_api_retrieve_mem_rec                             */
+/*                                                                           */
+/*  Description   :                                                          */
+/*                                                                           */
+/*  Inputs        :                                                          */
+/*  Globals       : <Does it use any global variables?>                      */
+/*  Outputs       :                                                          */
+/*  Returns       : void                                                     */
+/*                                                                           */
+/*  Issues        : none                                                     */
+/*                                                                           */
+/*  Revision History:                                                        */
+/*                                                                           */
+/*         DD MM YYYY   Author(s)       Changes (Describe the changes made)  */
+/*         22 10 2008    100356         Draft                                */
+/*                                                                           */
+/*****************************************************************************/
+IV_API_CALL_STATUS_T impeg2d_api_retrieve_mem_rec(iv_obj_t *ps_dechdl,
+                                            void *pv_api_ip,
+                                            void *pv_api_op)
+{
+    UWORD32 u4_i;
+    dec_state_t *ps_dec_state;
+    dec_state_multi_core_t *ps_dec_state_multi_core;
+    iv_mem_rec_t *ps_mem_rec;
+    iv_mem_rec_t *ps_temp_rec;
+
+
+
+    impeg2d_retrieve_mem_rec_ip_t *ps_retr_mem_rec_ip;
+    impeg2d_retrieve_mem_rec_op_t *ps_retr_mem_rec_op;
+
+    ps_retr_mem_rec_ip  = (impeg2d_retrieve_mem_rec_ip_t *)pv_api_ip;
+    ps_retr_mem_rec_op  = (impeg2d_retrieve_mem_rec_op_t *)pv_api_op;
+
+    ps_mem_rec          = ps_retr_mem_rec_ip->s_ivd_retrieve_mem_rec_ip_t.pv_mem_rec_location;
+    ps_dec_state_multi_core = (dec_state_multi_core_t *) (ps_dechdl->pv_codec_handle);
+    ps_dec_state = ps_dec_state_multi_core->ps_dec_state[0];
+    ps_temp_rec        = ps_dec_state->pv_memTab;
+
+    for(u4_i = 0; u4_i < (ps_dec_state->u4_num_mem_records);u4_i++)
+    {
+        ps_mem_rec[u4_i].u4_mem_size        = ps_temp_rec[u4_i].u4_mem_size;
+        ps_mem_rec[u4_i].u4_mem_alignment   = ps_temp_rec[u4_i].u4_mem_alignment;
+        ps_mem_rec[u4_i].e_mem_type         = ps_temp_rec[u4_i].e_mem_type;
+        ps_mem_rec[u4_i].pv_base            = ps_temp_rec[u4_i].pv_base;
+    }
+
+    ps_retr_mem_rec_op->s_ivd_retrieve_mem_rec_op_t.u4_error_code       = IV_SUCCESS;
+    ps_retr_mem_rec_op->s_ivd_retrieve_mem_rec_op_t.u4_num_mem_rec_filled   = ps_dec_state->u4_num_mem_records;
+
+    impeg2_jobq_deinit(ps_dec_state->pv_jobq);
+    IMPEG2D_PRINT_STATISTICS();
+
+
+    return(IV_SUCCESS);
+
+}
+
+/*****************************************************************************/
+/*                                                                           */
+/*  Function Name :   impeg2d_api_ctl                                        */
+/*                                                                           */
+/*  Description   :                                                          */
+/*                                                                           */
+/*  Inputs        :                                                          */
+/*  Globals       : <Does it use any global variables?>                      */
+/*  Outputs       :                                                          */
+/*  Returns       : void                                                     */
+/*                                                                           */
+/*  Issues        : none                                                     */
+/*                                                                           */
+/*  Revision History:                                                        */
+/*                                                                           */
+/*         DD MM YYYY   Author(s)       Changes (Describe the changes made)  */
+/*         22 10 2008    100356         Draft                                */
+/*                                                                           */
+/*****************************************************************************/
+IV_API_CALL_STATUS_T impeg2d_api_ctl(iv_obj_t *ps_dechdl,
+                                     void *pv_api_ip,
+                                     void *pv_api_op)
+{
+    WORD32 i4_sub_cmd;
+    UWORD32 *pu4_api_ip;
+    IV_API_CALL_STATUS_T u4_error_code;
+
+    pu4_api_ip = (UWORD32 *)pv_api_ip;
+    i4_sub_cmd = *(pu4_api_ip + 2);
+
+    switch(i4_sub_cmd)
+    {
+        case IVD_CMD_CTL_GETPARAMS:
+            u4_error_code = impeg2d_api_get_status(ps_dechdl, (void *)pv_api_ip,
+                                                   (void *)pv_api_op);
+            break;
+
+        case IVD_CMD_CTL_SETPARAMS:
+            u4_error_code = impeg2d_api_set_params(ps_dechdl, (void *)pv_api_ip,
+                                                   (void *)pv_api_op);
+            break;
+
+        case IVD_CMD_CTL_RESET:
+            u4_error_code = impeg2d_api_reset(ps_dechdl, (void *)pv_api_ip,
+                                              (void *)pv_api_op);
+            break;
+
+        case IVD_CMD_CTL_SETDEFAULT:
+            u4_error_code = impeg2d_api_set_default(ps_dechdl,
+                                                          (void *)pv_api_ip,
+                                                          (void *)pv_api_op);
+            break;
+
+        case IVD_CMD_CTL_FLUSH:
+            u4_error_code = impeg2d_api_set_flush_mode(ps_dechdl,
+                                                             (void *)pv_api_ip,
+                                                             (void *)pv_api_op);
+            break;
+
+        case IVD_CMD_CTL_GETBUFINFO:
+            u4_error_code = impeg2d_api_get_buf_info(ps_dechdl,
+                                                           (void *)pv_api_ip,
+                                                           (void *)pv_api_op);
+            break;
+
+        case IVD_CMD_CTL_GETVERSION:
+            u4_error_code = impeg2d_api_get_version(ps_dechdl, (void *)pv_api_ip,
+                                                      (void *)pv_api_op);
+            break;
+
+        case IMPEG2D_CMD_CTL_SET_NUM_CORES:
+            u4_error_code = impeg2d_api_set_num_cores(ps_dechdl,
+                                                         (void *)pv_api_ip,
+                                                         (void *)pv_api_op);
+            break;
+
+        case IMPEG2D_CMD_CTL_GET_BUFFER_DIMENSIONS:
+            u4_error_code = impeg2d_get_frame_dimensions(ps_dechdl,
+                                                       (void *)pv_api_ip,
+                                                       (void *)pv_api_op);
+            break;
+
+        case IMPEG2D_CMD_CTL_SET_PROCESSOR:
+            u4_error_code = impeg2d_set_processor(ps_dechdl, (void *)pv_api_ip,
+                                                (void *)pv_api_op);
+            break;
+
+        default:
+            u4_error_code = IV_FAIL;
+            break;
+    }
+
+    return (u4_error_code);
+
+}
+
+/*****************************************************************************/
+/*                                                                           */
+/*  Function Name : impeg2d_api_check_struct_sanity                          */
+/*                                                                           */
+/*  Description   :                                                          */
+/*                                                                           */
+/*  Inputs        :                                                          */
+/*  Globals       : <Does it use any global variables?>                      */
+/*  Outputs       :                                                          */
+/*  Returns       : void                                                     */
+/*                                                                           */
+/*  Issues        : none                                                     */
+/*                                                                           */
+/*  Revision History:                                                        */
+/*                                                                           */
+/*         DD MM YYYY   Author(s)       Changes (Describe the changes made)  */
+/*         22 10 2008    100356         Draft                                */
+/*                                                                           */
+/*****************************************************************************/
+IV_API_CALL_STATUS_T impeg2d_api_check_struct_sanity(iv_obj_t *ps_handle,
+                                                    void *pv_api_ip,
+                                                    void *pv_api_op)
+{
+    WORD32  i4_cmd;
+    UWORD32 *pu4_api_ip;
+    UWORD32 *pu4_api_op;
+    WORD32 i,j;
+
+    if(NULL == pv_api_op)
+        return(IV_FAIL);
+
+    if(NULL == pv_api_ip)
+        return(IV_FAIL);
+
+    pu4_api_ip  = (UWORD32 *)pv_api_ip;
+    pu4_api_op  = (UWORD32 *)pv_api_op;
+    i4_cmd = (IVD_API_COMMAND_TYPE_T)*(pu4_api_ip + 1);
+
+    /* error checks on handle */
+    switch(i4_cmd)
+    {
+        case IV_CMD_GET_NUM_MEM_REC:
+        case IV_CMD_FILL_NUM_MEM_REC:
+            break;
+        case IV_CMD_INIT:
+            if(ps_handle == NULL)
+            {
+                *(pu4_api_op + 1) |= 1 << IVD_UNSUPPORTEDPARAM;
+                *(pu4_api_op + 1) |= IVD_HANDLE_NULL;
+                return IV_FAIL;
+            }
+
+            if(ps_handle->u4_size != sizeof(iv_obj_t))
+            {
+                *(pu4_api_op + 1) |= 1 << IVD_UNSUPPORTEDPARAM;
+                *(pu4_api_op + 1) |= IVD_HANDLE_STRUCT_SIZE_INCORRECT;
+                return IV_FAIL;
+            }
+            break;
+        case IVD_CMD_GET_DISPLAY_FRAME:
+        case IVD_CMD_VIDEO_DECODE:
+        case IV_CMD_RETRIEVE_MEMREC:
+        case IVD_CMD_SET_DISPLAY_FRAME:
+        case IVD_CMD_REL_DISPLAY_FRAME:
+        case IVD_CMD_VIDEO_CTL:
+            {
+            if(ps_handle == NULL)
+            {
+                *(pu4_api_op + 1) |= 1 << IVD_UNSUPPORTEDPARAM;
+                *(pu4_api_op + 1) |= IVD_HANDLE_NULL;
+                return IV_FAIL;
+            }
+
+            if(ps_handle->u4_size != sizeof(iv_obj_t))
+            {
+                *(pu4_api_op + 1) |= 1 << IVD_UNSUPPORTEDPARAM;
+                *(pu4_api_op + 1) |= IVD_HANDLE_STRUCT_SIZE_INCORRECT;
+                return IV_FAIL;
+            }
+            if(ps_handle->pv_fxns != impeg2d_api_function)
+            {
+                *(pu4_api_op + 1) |= 1 << IVD_UNSUPPORTEDPARAM;
+                    *(pu4_api_op + 1) |= IVD_INVALID_HANDLE_NULL;
+                return IV_FAIL;
+            }
+
+            if(ps_handle->pv_codec_handle == NULL)
+            {
+                *(pu4_api_op + 1) |= 1 << IVD_UNSUPPORTEDPARAM;
+                    *(pu4_api_op + 1) |= IVD_INVALID_HANDLE_NULL;
+                return IV_FAIL;
+            }
+            }
+            break;
+        default:
+            *(pu4_api_op + 1) |= 1 << IVD_UNSUPPORTEDPARAM;
+            *(pu4_api_op + 1) |= IVD_INVALID_API_CMD;
+            return IV_FAIL;
+    }
+
+    switch(i4_cmd)
+    {
+        case IV_CMD_GET_NUM_MEM_REC:
+            {
+                impeg2d_num_mem_rec_ip_t *ps_ip = (impeg2d_num_mem_rec_ip_t *)pv_api_ip;
+                impeg2d_num_mem_rec_op_t *ps_op = (impeg2d_num_mem_rec_op_t *)pv_api_op;
+                ps_op->s_ivd_num_mem_rec_op_t.u4_error_code = 0;
+
+                if(ps_ip->s_ivd_num_mem_rec_ip_t.u4_size != sizeof(impeg2d_num_mem_rec_ip_t))
+                {
+                    ps_op->s_ivd_num_mem_rec_op_t.u4_error_code  |= 1 << IVD_UNSUPPORTEDPARAM;
+                    ps_op->s_ivd_num_mem_rec_op_t.u4_error_code |= IVD_IP_API_STRUCT_SIZE_INCORRECT;
+                    return(IV_FAIL);
+                }
+
+                if(ps_op->s_ivd_num_mem_rec_op_t.u4_size != sizeof(impeg2d_num_mem_rec_op_t))
+                {
+                    ps_op->s_ivd_num_mem_rec_op_t.u4_error_code  |= 1 << IVD_UNSUPPORTEDPARAM;
+                    ps_op->s_ivd_num_mem_rec_op_t.u4_error_code |= IVD_OP_API_STRUCT_SIZE_INCORRECT;
+                    return(IV_FAIL);
+                }
+            }
+            break;
+        case IV_CMD_FILL_NUM_MEM_REC:
+            {
+                impeg2d_fill_mem_rec_ip_t *ps_ip = (impeg2d_fill_mem_rec_ip_t *)pv_api_ip;
+                impeg2d_fill_mem_rec_op_t *ps_op = (impeg2d_fill_mem_rec_op_t *)pv_api_op;
+                iv_mem_rec_t                  *ps_mem_rec;
+
+                ps_op->s_ivd_fill_mem_rec_op_t.u4_error_code = 0;
+
+                if(ps_ip->s_ivd_fill_mem_rec_ip_t.u4_size != sizeof(impeg2d_fill_mem_rec_ip_t))
+                {
+                    ps_op->s_ivd_fill_mem_rec_op_t.u4_error_code  |= 1 << IVD_UNSUPPORTEDPARAM;
+                    ps_op->s_ivd_fill_mem_rec_op_t.u4_error_code |= IVD_IP_API_STRUCT_SIZE_INCORRECT;
+                    return(IV_FAIL);
+                }
+
+                if(ps_op->s_ivd_fill_mem_rec_op_t.u4_size != sizeof(impeg2d_fill_mem_rec_op_t))
+                {
+                    ps_op->s_ivd_fill_mem_rec_op_t.u4_error_code  |= 1 << IVD_UNSUPPORTEDPARAM;
+                    ps_op->s_ivd_fill_mem_rec_op_t.u4_error_code |= IVD_OP_API_STRUCT_SIZE_INCORRECT;
+                    return(IV_FAIL);
+                }
+
+                if(ps_ip->s_ivd_fill_mem_rec_ip_t.u4_max_frm_wd < MIN_WIDTH)
+                {
+                    ps_op->s_ivd_fill_mem_rec_op_t.u4_error_code  |= 1 << IVD_UNSUPPORTEDPARAM;
+                    ps_op->s_ivd_fill_mem_rec_op_t.u4_error_code |= IVD_REQUESTED_WIDTH_NOT_SUPPPORTED;
+                    return(IV_FAIL);
+                }
+
+                if(ps_ip->s_ivd_fill_mem_rec_ip_t.u4_max_frm_wd > MAX_WIDTH)
+                {
+                    ps_op->s_ivd_fill_mem_rec_op_t.u4_error_code  |= 1 << IVD_UNSUPPORTEDPARAM;
+                    ps_op->s_ivd_fill_mem_rec_op_t.u4_error_code |= IVD_REQUESTED_WIDTH_NOT_SUPPPORTED;
+                    return(IV_FAIL);
+                }
+
+                if(ps_ip->s_ivd_fill_mem_rec_ip_t.u4_max_frm_ht < MIN_HEIGHT)
+                {
+                    ps_op->s_ivd_fill_mem_rec_op_t.u4_error_code  |= 1 << IVD_UNSUPPORTEDPARAM;
+                    ps_op->s_ivd_fill_mem_rec_op_t.u4_error_code |= IVD_REQUESTED_HEIGHT_NOT_SUPPPORTED;
+                    return(IV_FAIL);
+                }
+
+                if(ps_ip->s_ivd_fill_mem_rec_ip_t.u4_max_frm_ht > MAX_HEIGHT)
+                {
+                    ps_op->s_ivd_fill_mem_rec_op_t.u4_error_code  |= 1 << IVD_UNSUPPORTEDPARAM;
+                    ps_op->s_ivd_fill_mem_rec_op_t.u4_error_code |= IVD_REQUESTED_HEIGHT_NOT_SUPPPORTED;
+                    return(IV_FAIL);
+                }
+
+                if(NULL == ps_ip->s_ivd_fill_mem_rec_ip_t.pv_mem_rec_location)
+                {
+                    ps_op->s_ivd_fill_mem_rec_op_t.u4_error_code  |= 1 << IVD_UNSUPPORTEDPARAM;
+                    ps_op->s_ivd_fill_mem_rec_op_t.u4_error_code |= IVD_NUM_REC_NOT_SUFFICIENT;
+                    return(IV_FAIL);
+                }
+
+                /* check memrecords sizes are correct */
+                ps_mem_rec  = ps_ip->s_ivd_fill_mem_rec_ip_t.pv_mem_rec_location;
+                for(i=0;i<NUM_MEM_RECORDS;i++)
+                {
+                    if(ps_mem_rec[i].u4_size != sizeof(iv_mem_rec_t))
+                    {
+                        ps_op->s_ivd_fill_mem_rec_op_t.u4_error_code  |= 1 << IVD_UNSUPPORTEDPARAM;
+                        ps_op->s_ivd_fill_mem_rec_op_t.u4_error_code |= IVD_MEM_REC_STRUCT_SIZE_INCORRECT;
+                        return IV_FAIL;
+                    }
+                }
+            }
+            break;
+
+        case IV_CMD_INIT:
+            {
+                impeg2d_init_ip_t *ps_ip = (impeg2d_init_ip_t *)pv_api_ip;
+                impeg2d_init_op_t *ps_op = (impeg2d_init_op_t *)pv_api_op;
+                iv_mem_rec_t          *ps_mem_rec;
+                UWORD32 u4_tot_num_mem_recs;
+
+                ps_op->s_ivd_init_op_t.u4_error_code = 0;
+
+                if(ps_ip->s_ivd_init_ip_t.u4_size != sizeof(impeg2d_init_ip_t))
+                {
+                    ps_op->s_ivd_init_op_t.u4_error_code  |= 1 << IVD_UNSUPPORTEDPARAM;
+                    ps_op->s_ivd_init_op_t.u4_error_code |= IVD_IP_API_STRUCT_SIZE_INCORRECT;
+                    return(IV_FAIL);
+                }
+
+                if(ps_op->s_ivd_init_op_t.u4_size != sizeof(impeg2d_init_op_t))
+                {
+                    ps_op->s_ivd_init_op_t.u4_error_code  |= 1 << IVD_UNSUPPORTEDPARAM;
+                    ps_op->s_ivd_init_op_t.u4_error_code |= IVD_OP_API_STRUCT_SIZE_INCORRECT;
+                    return(IV_FAIL);
+                }
+
+                u4_tot_num_mem_recs = NUM_MEM_RECORDS;
+
+
+
+
+                if(ps_ip->s_ivd_init_ip_t.u4_num_mem_rec > u4_tot_num_mem_recs)
+                {
+                    ps_op->s_ivd_init_op_t.u4_error_code  |= 1 << IVD_UNSUPPORTEDPARAM;
+                    ps_op->s_ivd_init_op_t.u4_error_code |= IVD_INIT_DEC_NOT_SUFFICIENT;
+                    return(IV_FAIL);
+                }
+
+                if(ps_ip->s_ivd_init_ip_t.u4_frm_max_wd < MIN_WIDTH)
+                {
+                    ps_op->s_ivd_init_op_t.u4_error_code  |= 1 << IVD_UNSUPPORTEDPARAM;
+                    ps_op->s_ivd_init_op_t.u4_error_code |= IVD_INIT_DEC_WIDTH_NOT_SUPPPORTED;
+                    return(IV_FAIL);
+                }
+
+                if(ps_ip->s_ivd_init_ip_t.u4_frm_max_wd > MAX_WIDTH)
+                {
+                    ps_op->s_ivd_init_op_t.u4_error_code  |= 1 << IVD_UNSUPPORTEDPARAM;
+                    ps_op->s_ivd_init_op_t.u4_error_code |= IVD_INIT_DEC_WIDTH_NOT_SUPPPORTED;
+                    return(IV_FAIL);
+                }
+
+                if(ps_ip->s_ivd_init_ip_t.u4_frm_max_ht < MIN_HEIGHT)
+                {
+                    ps_op->s_ivd_init_op_t.u4_error_code  |= 1 << IVD_UNSUPPORTEDPARAM;
+                    ps_op->s_ivd_init_op_t.u4_error_code |= IVD_INIT_DEC_HEIGHT_NOT_SUPPPORTED;
+                    return(IV_FAIL);
+                }
+
+                if(ps_ip->s_ivd_init_ip_t.u4_frm_max_ht > MAX_HEIGHT)
+                {
+                    ps_op->s_ivd_init_op_t.u4_error_code  |= 1 << IVD_UNSUPPORTEDPARAM;
+                    ps_op->s_ivd_init_op_t.u4_error_code |= IVD_INIT_DEC_HEIGHT_NOT_SUPPPORTED;
+                    return(IV_FAIL);
+                }
+
+                if(NULL == ps_ip->s_ivd_init_ip_t.pv_mem_rec_location)
+                {
+                    ps_op->s_ivd_init_op_t.u4_error_code  |= 1 << IVD_UNSUPPORTEDPARAM;
+                    ps_op->s_ivd_init_op_t.u4_error_code |= IVD_NUM_REC_NOT_SUFFICIENT;
+                    return(IV_FAIL);
+                }
+
+                if((ps_ip->s_ivd_init_ip_t.e_output_format != IV_YUV_420P) &&
+                    (ps_ip->s_ivd_init_ip_t.e_output_format != IV_YUV_422ILE)&&(ps_ip->s_ivd_init_ip_t.e_output_format != IV_YUV_420SP_UV)&&(ps_ip->s_ivd_init_ip_t.e_output_format != IV_YUV_420SP_VU))
+                {
+                    ps_op->s_ivd_init_op_t.u4_error_code  |= 1 << IVD_UNSUPPORTEDPARAM;
+                    ps_op->s_ivd_init_op_t.u4_error_code |= IVD_INIT_DEC_COL_FMT_NOT_SUPPORTED;
+                    return(IV_FAIL);
+                }
+
+                /* verify number of mem records */
+                if(ps_ip->s_ivd_init_ip_t.u4_num_mem_rec < NUM_MEM_RECORDS)
+                {
+                    ps_op->s_ivd_init_op_t.u4_error_code |= 1 << IVD_UNSUPPORTEDPARAM;
+                    ps_op->s_ivd_init_op_t.u4_error_code |= IVD_INIT_DEC_MEM_REC_NOT_SUFFICIENT;
+                    return IV_FAIL;
+                }
+
+                ps_mem_rec  = ps_ip->s_ivd_init_ip_t.pv_mem_rec_location;
+                /* verify wether first memrecord is handle or not */
+                /*
+                if(ps_mem_rec->pv_base != ps_handle)
+                {
+                     // indicate the incorrect handle error
+                    ps_op->s_ivd_init_op_t.u4_error_code |= 1 << IVD_UNSUPPORTEDPARAM;
+                    ps_op->s_ivd_init_op_t.u4_error_code |= IVD_INVALID_HANDLE;
+                    return IV_FAIL;
+                }
+*/
+                /* check memrecords sizes are correct */
+                for(i=0;i < (WORD32)ps_ip->s_ivd_init_ip_t.u4_num_mem_rec ; i++)
+                {
+                    if(ps_mem_rec[i].u4_size != sizeof(iv_mem_rec_t))
+                    {
+                        ps_op->s_ivd_init_op_t.u4_error_code |= 1 << IVD_UNSUPPORTEDPARAM;
+                        ps_op->s_ivd_init_op_t.u4_error_code |= IVD_MEM_REC_STRUCT_SIZE_INCORRECT;
+                        return IV_FAIL;
+                    }
+                }
+
+                /* verify memtabs for overlapping regions */
+                {
+                    UWORD8 *pau1_start[NUM_MEM_RECORDS];
+                    UWORD8 *pau1_end[NUM_MEM_RECORDS];
+
+
+                    pau1_start[0] = (UWORD8 *)(ps_mem_rec[0].pv_base);
+                    pau1_end[0]   = (UWORD8 *)(ps_mem_rec[0].pv_base) + ps_mem_rec[0].u4_mem_size - 1;
+                    for(i = 1; i < (WORD32)ps_ip->s_ivd_init_ip_t.u4_num_mem_rec; i++)
+                    {
+                        /* This array is populated to check memtab overlapp */
+                        pau1_start[i] = (UWORD8 *)(ps_mem_rec[i].pv_base);
+                        pau1_end[i]   = (UWORD8 *)(ps_mem_rec[i].pv_base) + ps_mem_rec[i].u4_mem_size - 1;
+
+                        for(j = 0; j < i; j++)
+                        {
+                            if((pau1_start[i] >= pau1_start[j]) && (pau1_start[i] <= pau1_end[j]))
+                            {
+                                ps_op->s_ivd_init_op_t.u4_error_code |= 1 << IVD_UNSUPPORTEDPARAM;
+                                ps_op->s_ivd_init_op_t.u4_error_code |= IVD_INIT_DEC_MEM_REC_OVERLAP_ERR;
+                                return IV_FAIL;
+                            }
+
+                            if((pau1_end[i] >= pau1_start[j]) && (pau1_end[i] <= pau1_end[j]))
+                            {
+                                ps_op->s_ivd_init_op_t.u4_error_code |= 1 << IVD_UNSUPPORTEDPARAM;
+                                ps_op->s_ivd_init_op_t.u4_error_code |= IVD_INIT_DEC_MEM_REC_OVERLAP_ERR;
+                                return IV_FAIL;
+                            }
+
+                            if((pau1_start[i] < pau1_start[j]) && (pau1_end[i] > pau1_end[j]))
+                            {
+                                ps_op->s_ivd_init_op_t.u4_error_code |= 1 << IVD_UNSUPPORTEDPARAM;
+                                ps_op->s_ivd_init_op_t.u4_error_code |= IVD_INIT_DEC_MEM_REC_OVERLAP_ERR;
+                                return IV_FAIL;
+                            }
+                        }
+                    }
+                }
+
+
+
+
+                {
+                    iv_mem_rec_t    as_mem_rec_ittiam_api[NUM_MEM_RECORDS];
+
+                    impeg2d_fill_mem_rec_ip_t s_fill_mem_rec_ip;
+                    impeg2d_fill_mem_rec_op_t s_fill_mem_rec_op;
+                    IV_API_CALL_STATUS_T e_status;
+                    WORD32 i4_num_memrec;
+                    {
+
+                        iv_num_mem_rec_ip_t s_no_of_mem_rec_query_ip;
+                        iv_num_mem_rec_op_t s_no_of_mem_rec_query_op;
+
+
+                        s_no_of_mem_rec_query_ip.u4_size = sizeof(iv_num_mem_rec_ip_t);
+                        s_no_of_mem_rec_query_op.u4_size = sizeof(iv_num_mem_rec_op_t);
+
+                        s_no_of_mem_rec_query_ip.e_cmd   = IV_CMD_GET_NUM_MEM_REC;
+                        impeg2d_api_function(NULL,
+                                                    (void *)&s_no_of_mem_rec_query_ip,
+                                                    (void *)&s_no_of_mem_rec_query_op);
+
+                        i4_num_memrec  = s_no_of_mem_rec_query_op.u4_num_mem_rec;
+
+
+
+                    }
+
+
+                    /* initialize mem records array with sizes */
+                    for(i = 0; i < i4_num_memrec; i++)
+                    {
+                        as_mem_rec_ittiam_api[i].u4_size = sizeof(iv_mem_rec_t);
+                    }
+
+                    s_fill_mem_rec_ip.s_ivd_fill_mem_rec_ip_t.u4_size                   = sizeof(impeg2d_fill_mem_rec_ip_t);
+                    s_fill_mem_rec_ip.s_ivd_fill_mem_rec_ip_t.e_cmd                     = IV_CMD_FILL_NUM_MEM_REC;
+                    s_fill_mem_rec_ip.s_ivd_fill_mem_rec_ip_t.u4_max_frm_wd             = ps_ip->s_ivd_init_ip_t.u4_frm_max_wd;
+                    s_fill_mem_rec_ip.s_ivd_fill_mem_rec_ip_t.u4_max_frm_ht             = ps_ip->s_ivd_init_ip_t.u4_frm_max_ht;
+                    s_fill_mem_rec_ip.s_ivd_fill_mem_rec_ip_t.pv_mem_rec_location       = as_mem_rec_ittiam_api;
+                    s_fill_mem_rec_ip.u4_share_disp_buf                                 = ps_ip->u4_share_disp_buf;
+                    s_fill_mem_rec_ip.e_output_format                                   = ps_ip->s_ivd_init_ip_t.e_output_format;
+                    s_fill_mem_rec_op.s_ivd_fill_mem_rec_op_t.u4_size                   = sizeof(impeg2d_fill_mem_rec_op_t);
+
+
+                    e_status = impeg2d_api_function(NULL,
+                                                (void *)&s_fill_mem_rec_ip,
+                                                (void *)&s_fill_mem_rec_op);
+                    if(IV_FAIL == e_status)
+                    {
+                        ps_op->s_ivd_init_op_t.u4_error_code = s_fill_mem_rec_op.s_ivd_fill_mem_rec_op_t.u4_error_code;
+                        return(IV_FAIL);
+                    }
+
+
+
+                    for(i = 0; i < i4_num_memrec; i ++)
+                    {
+                        if(ps_mem_rec[i].pv_base == NULL)
+                        {
+                            ps_op->s_ivd_init_op_t.u4_error_code |= 1 << IVD_UNSUPPORTEDPARAM;
+                            ps_op->s_ivd_init_op_t.u4_error_code |= IVD_INIT_DEC_MEM_REC_BASE_NULL;
+                            return IV_FAIL;
+                        }
+#ifdef CHECK_ALIGN
+
+                        if((UWORD32)(ps_mem_rec[i].pv_base) & (ps_mem_rec[i].u4_mem_alignment - 1))
+                        {
+                            ps_op->s_ivd_init_op_t.u4_error_code |= 1 << IVD_UNSUPPORTEDPARAM;
+                            ps_op->s_ivd_init_op_t.u4_error_code |= IVD_INIT_DEC_MEM_REC_ALIGNMENT_ERR;
+                            return IV_FAIL;
+                        }
+#endif //CHECK_ALIGN
+                        if(ps_mem_rec[i].u4_mem_alignment != as_mem_rec_ittiam_api[i].u4_mem_alignment)
+                        {
+                            ps_op->s_ivd_init_op_t.u4_error_code |= 1 << IVD_UNSUPPORTEDPARAM;
+                            ps_op->s_ivd_init_op_t.u4_error_code |= IVD_INIT_DEC_MEM_REC_ALIGNMENT_ERR;
+                            return IV_FAIL;
+                        }
+
+                        if(ps_mem_rec[i].u4_mem_size < as_mem_rec_ittiam_api[i].u4_mem_size)
+                        {
+                            ps_op->s_ivd_init_op_t.u4_error_code |= 1 << IVD_UNSUPPORTEDPARAM;
+                            ps_op->s_ivd_init_op_t.u4_error_code |= IVD_INIT_DEC_MEM_REC_INSUFFICIENT_SIZE;
+                            return IV_FAIL;
+                        }
+
+                        if(ps_mem_rec[i].e_mem_type != as_mem_rec_ittiam_api[i].e_mem_type)
+                        {
+                            if (IV_EXTERNAL_CACHEABLE_SCRATCH_MEM == as_mem_rec_ittiam_api[i].e_mem_type)
+                            {
+                                if (IV_EXTERNAL_CACHEABLE_PERSISTENT_MEM == ps_mem_rec[i].e_mem_type)
+                                {
+                                    continue;
+                                }
+                            }
+                            ps_op->s_ivd_init_op_t.u4_error_code |= 1 << IVD_UNSUPPORTEDPARAM;
+                            ps_op->s_ivd_init_op_t.u4_error_code |= IVD_INIT_DEC_MEM_REC_INCORRECT_TYPE;
+                            return IV_FAIL;
+                        }
+                    }
+                }
+
+
+            }
+            break;
+
+        case IVD_CMD_GET_DISPLAY_FRAME:
+            {
+                impeg2d_get_display_frame_ip_t *ps_ip = (impeg2d_get_display_frame_ip_t *)pv_api_ip;
+                impeg2d_get_display_frame_op_t *ps_op = (impeg2d_get_display_frame_op_t *)pv_api_op;
+
+                ps_op->s_ivd_get_display_frame_op_t.u4_error_code = 0;
+
+                if(ps_ip->s_ivd_get_display_frame_ip_t.u4_size != sizeof(impeg2d_get_display_frame_ip_t))
+                {
+                    ps_op->s_ivd_get_display_frame_op_t.u4_error_code |= 1 << IVD_UNSUPPORTEDPARAM;
+                    ps_op->s_ivd_get_display_frame_op_t.u4_error_code |= IVD_IP_API_STRUCT_SIZE_INCORRECT;
+                    return(IV_FAIL);
+                }
+
+                if(ps_op->s_ivd_get_display_frame_op_t.u4_size != sizeof(impeg2d_get_display_frame_op_t))
+                {
+                    ps_op->s_ivd_get_display_frame_op_t.u4_error_code |= 1 << IVD_UNSUPPORTEDPARAM;
+                    ps_op->s_ivd_get_display_frame_op_t.u4_error_code |= IVD_OP_API_STRUCT_SIZE_INCORRECT;
+                    return(IV_FAIL);
+                }
+
+                if(ps_ip->s_ivd_get_display_frame_ip_t.s_out_buffer.u4_num_bufs == 0)
+                {
+                    ps_op->s_ivd_get_display_frame_op_t.u4_error_code |= 1 << IVD_UNSUPPORTEDPARAM;
+                    ps_op->s_ivd_get_display_frame_op_t.u4_error_code |= IVD_DISP_FRM_ZERO_OP_BUFS;
+                    return IV_FAIL;
+                }
+
+                for(i = 0; i< (WORD32)ps_ip->s_ivd_get_display_frame_ip_t.s_out_buffer.u4_num_bufs;i++)
+                {
+                    if(ps_ip->s_ivd_get_display_frame_ip_t.s_out_buffer.pu1_bufs[i] == NULL)
+                    {
+                        ps_op->s_ivd_get_display_frame_op_t.u4_error_code |= 1 << IVD_UNSUPPORTEDPARAM;
+                        ps_op->s_ivd_get_display_frame_op_t.u4_error_code |= IVD_DISP_FRM_OP_BUF_NULL;
+                        return IV_FAIL;
+                    }
+
+                    if(ps_ip->s_ivd_get_display_frame_ip_t.s_out_buffer.u4_min_out_buf_size[i] == 0)
+                    {
+                        ps_op->s_ivd_get_display_frame_op_t.u4_error_code |= 1 << IVD_UNSUPPORTEDPARAM;
+                        ps_op->s_ivd_get_display_frame_op_t.u4_error_code |= IVD_DISP_FRM_ZERO_OP_BUF_SIZE;
+                        return IV_FAIL;
+                    }
+                    /*
+                    if(ps_ip->s_ivd_get_display_frame_ip_t.s_out_buffer.u4_min_out_buf_size[i] == 0)
+                    {
+                        ps_op->s_ivd_get_display_frame_op_t.u4_error_code |= 1 << IVD_UNSUPPORTEDPARAM;
+                        ps_op->s_ivd_get_display_frame_op_t.u4_error_code |= IVD_DISP_FRM_ZERO_OP_BUF_SIZE;
+                        return IV_FAIL;
+                    }
+                    */
+                }
+            }
+            break;
+       case IVD_CMD_REL_DISPLAY_FRAME:
+            {
+                impeg2d_rel_display_frame_ip_t *ps_ip = (impeg2d_rel_display_frame_ip_t *)pv_api_ip;
+                impeg2d_rel_display_frame_op_t *ps_op = (impeg2d_rel_display_frame_op_t *)pv_api_op;
+
+                ps_op->s_ivd_rel_display_frame_op_t.u4_error_code = 0;
+
+                if ((ps_ip->s_ivd_rel_display_frame_ip_t.u4_size != sizeof(impeg2d_rel_display_frame_ip_t))
+                        && (ps_ip->s_ivd_rel_display_frame_ip_t.u4_size != sizeof(ivd_rel_display_frame_ip_t)))
+                {
+                    ps_op->s_ivd_rel_display_frame_op_t.u4_error_code |= 1 << IVD_UNSUPPORTEDPARAM;
+                    ps_op->s_ivd_rel_display_frame_op_t.u4_error_code |= IVD_IP_API_STRUCT_SIZE_INCORRECT;
+                    return(IV_FAIL);
+                }
+
+                if((ps_op->s_ivd_rel_display_frame_op_t.u4_size != sizeof(impeg2d_rel_display_frame_op_t)) &&
+                        (ps_op->s_ivd_rel_display_frame_op_t.u4_size != sizeof(ivd_rel_display_frame_op_t)))
+                {
+                    ps_op->s_ivd_rel_display_frame_op_t.u4_error_code |= 1 << IVD_UNSUPPORTEDPARAM;
+                    ps_op->s_ivd_rel_display_frame_op_t.u4_error_code |= IVD_OP_API_STRUCT_SIZE_INCORRECT;
+                    return(IV_FAIL);
+                }
+
+            }
+            break;
+
+
+        case IVD_CMD_SET_DISPLAY_FRAME:
+            {
+                impeg2d_set_display_frame_ip_t *ps_ip = (impeg2d_set_display_frame_ip_t *)pv_api_ip;
+                impeg2d_set_display_frame_op_t *ps_op = (impeg2d_set_display_frame_op_t *)pv_api_op;
+                UWORD32 j, i;
+
+                ps_op->s_ivd_set_display_frame_op_t.u4_error_code = 0;
+
+                if ((ps_ip->s_ivd_set_display_frame_ip_t.u4_size != sizeof(impeg2d_set_display_frame_ip_t))
+                        && (ps_ip->s_ivd_set_display_frame_ip_t.u4_size != sizeof(ivd_set_display_frame_ip_t)))
+                {
+                    ps_op->s_ivd_set_display_frame_op_t.u4_error_code |= 1 << IVD_UNSUPPORTEDPARAM;
+                    ps_op->s_ivd_set_display_frame_op_t.u4_error_code |= IVD_IP_API_STRUCT_SIZE_INCORRECT;
+                    return(IV_FAIL);
+                }
+
+                if((ps_op->s_ivd_set_display_frame_op_t.u4_size != sizeof(impeg2d_set_display_frame_op_t)) &&
+                        (ps_op->s_ivd_set_display_frame_op_t.u4_size != sizeof(ivd_set_display_frame_op_t)))
+                {
+                    ps_op->s_ivd_set_display_frame_op_t.u4_error_code |= 1 << IVD_UNSUPPORTEDPARAM;
+                    ps_op->s_ivd_set_display_frame_op_t.u4_error_code |= IVD_OP_API_STRUCT_SIZE_INCORRECT;
+                    return(IV_FAIL);
+                }
+
+                if(ps_ip->s_ivd_set_display_frame_ip_t.num_disp_bufs == 0)
+                {
+                    ps_op->s_ivd_set_display_frame_op_t.u4_error_code |= 1 << IVD_UNSUPPORTEDPARAM;
+                    ps_op->s_ivd_set_display_frame_op_t.u4_error_code |= IVD_DISP_FRM_ZERO_OP_BUFS;
+                    return IV_FAIL;
+                }
+
+                for(j = 0; j < ps_ip->s_ivd_set_display_frame_ip_t.num_disp_bufs; j++)
+                {
+                    if(ps_ip->s_ivd_set_display_frame_ip_t.s_disp_buffer[j].u4_num_bufs == 0)
+                    {
+                        ps_op->s_ivd_set_display_frame_op_t.u4_error_code |= 1 << IVD_UNSUPPORTEDPARAM;
+                        ps_op->s_ivd_set_display_frame_op_t.u4_error_code |= IVD_DISP_FRM_ZERO_OP_BUFS;
+                        return IV_FAIL;
+                    }
+
+                    for(i=0;i< ps_ip->s_ivd_set_display_frame_ip_t.s_disp_buffer[j].u4_num_bufs;i++)
+                    {
+                        if(ps_ip->s_ivd_set_display_frame_ip_t.s_disp_buffer[j].pu1_bufs[i] == NULL)
+                        {
+                            ps_op->s_ivd_set_display_frame_op_t.u4_error_code |= 1 << IVD_UNSUPPORTEDPARAM;
+                            ps_op->s_ivd_set_display_frame_op_t.u4_error_code |= IVD_DISP_FRM_OP_BUF_NULL;
+                            return IV_FAIL;
+                        }
+
+                        if(ps_ip->s_ivd_set_display_frame_ip_t.s_disp_buffer[j].u4_min_out_buf_size[i] == 0)
+                        {
+                            ps_op->s_ivd_set_display_frame_op_t.u4_error_code |= 1 << IVD_UNSUPPORTEDPARAM;
+                            ps_op->s_ivd_set_display_frame_op_t.u4_error_code |= IVD_DISP_FRM_ZERO_OP_BUF_SIZE;
+                            return IV_FAIL;
+                        }
+                    }
+                }
+            }
+            break;
+
+        case IVD_CMD_VIDEO_DECODE:
+            {
+                impeg2d_video_decode_ip_t *ps_ip = (impeg2d_video_decode_ip_t *)pv_api_ip;
+                impeg2d_video_decode_op_t *ps_op = (impeg2d_video_decode_op_t *)pv_api_op;
+
+                ps_op->s_ivd_video_decode_op_t.u4_error_code = 0;
+
+                if(ps_ip->s_ivd_video_decode_ip_t.u4_size != sizeof(impeg2d_video_decode_ip_t))
+                {
+                    ps_op->s_ivd_video_decode_op_t.u4_error_code |= 1 << IVD_UNSUPPORTEDPARAM;
+                    ps_op->s_ivd_video_decode_op_t.u4_error_code |= IVD_IP_API_STRUCT_SIZE_INCORRECT;
+                    return(IV_FAIL);
+                }
+
+                if(ps_op->s_ivd_video_decode_op_t.u4_size != sizeof(impeg2d_video_decode_op_t))
+                {
+                    ps_op->s_ivd_video_decode_op_t.u4_error_code |= 1 << IVD_UNSUPPORTEDPARAM;
+                    ps_op->s_ivd_video_decode_op_t.u4_error_code |= IVD_OP_API_STRUCT_SIZE_INCORRECT;
+                    return(IV_FAIL);
+                }
+
+            }
+            break;
+
+        case IV_CMD_RETRIEVE_MEMREC:
+            {
+                impeg2d_retrieve_mem_rec_ip_t *ps_ip = (impeg2d_retrieve_mem_rec_ip_t *)pv_api_ip;
+                impeg2d_retrieve_mem_rec_op_t *ps_op = (impeg2d_retrieve_mem_rec_op_t *)pv_api_op;
+                iv_mem_rec_t          *ps_mem_rec;
+
+                ps_op->s_ivd_retrieve_mem_rec_op_t.u4_error_code = 0;
+
+                if(ps_ip->s_ivd_retrieve_mem_rec_ip_t.u4_size != sizeof(impeg2d_retrieve_mem_rec_ip_t))
+                {
+                    ps_op->s_ivd_retrieve_mem_rec_op_t.u4_error_code |= 1 << IVD_UNSUPPORTEDPARAM;
+                    ps_op->s_ivd_retrieve_mem_rec_op_t.u4_error_code |= IVD_IP_API_STRUCT_SIZE_INCORRECT;
+                    return(IV_FAIL);
+                }
+
+                if(ps_op->s_ivd_retrieve_mem_rec_op_t.u4_size != sizeof(impeg2d_retrieve_mem_rec_op_t))
+                {
+                    ps_op->s_ivd_retrieve_mem_rec_op_t.u4_error_code |= 1 << IVD_UNSUPPORTEDPARAM;
+                    ps_op->s_ivd_retrieve_mem_rec_op_t.u4_error_code |= IVD_OP_API_STRUCT_SIZE_INCORRECT;
+                    return(IV_FAIL);
+                }
+
+                ps_mem_rec  = ps_ip->s_ivd_retrieve_mem_rec_ip_t.pv_mem_rec_location;
+                /* check memrecords sizes are correct */
+                for(i=0;i < NUM_MEM_RECORDS ; i++)
+                {
+                    if(ps_mem_rec[i].u4_size != sizeof(iv_mem_rec_t))
+                    {
+                        ps_op->s_ivd_retrieve_mem_rec_op_t.u4_error_code |= 1 << IVD_UNSUPPORTEDPARAM;
+                        ps_op->s_ivd_retrieve_mem_rec_op_t.u4_error_code |= IVD_MEM_REC_STRUCT_SIZE_INCORRECT;
+                        return IV_FAIL;
+                    }
+                }
+            }
+            break;
+
+        case IVD_CMD_VIDEO_CTL:
+            {
+                UWORD32 *pu4_ptr_cmd;
+                UWORD32 u4_sub_command;
+
+                pu4_ptr_cmd = (UWORD32 *)pv_api_ip;
+                pu4_ptr_cmd += 2;
+                u4_sub_command = *pu4_ptr_cmd;
+
+                switch(u4_sub_command)
+                {
+                    case IVD_CMD_CTL_SETPARAMS:
+                        {
+                            impeg2d_ctl_set_config_ip_t *ps_ip;
+                            impeg2d_ctl_set_config_op_t *ps_op;
+                            ps_ip = (impeg2d_ctl_set_config_ip_t *)pv_api_ip;
+                            ps_op = (impeg2d_ctl_set_config_op_t *)pv_api_op;
+
+                            ps_op->s_ivd_ctl_set_config_op_t.u4_error_code = 0;
+
+                            if(ps_ip->s_ivd_ctl_set_config_ip_t.u4_size != sizeof(impeg2d_ctl_set_config_ip_t))
+                            {
+                                ps_op->s_ivd_ctl_set_config_op_t.u4_error_code |= 1 << IVD_UNSUPPORTEDPARAM;
+                                ps_op->s_ivd_ctl_set_config_op_t.u4_error_code |= IVD_IP_API_STRUCT_SIZE_INCORRECT;
+                                return IV_FAIL;
+                            }
+                        }
+                    case IVD_CMD_CTL_SETDEFAULT:
+                        {
+                            impeg2d_ctl_set_config_op_t *ps_op;
+                            ps_op = (impeg2d_ctl_set_config_op_t *)pv_api_op;
+                            ps_op->s_ivd_ctl_set_config_op_t.u4_error_code   = 0;
+
+                            if(ps_op->s_ivd_ctl_set_config_op_t.u4_size != sizeof(impeg2d_ctl_set_config_op_t))
+                            {
+                                ps_op->s_ivd_ctl_set_config_op_t.u4_error_code |= 1 << IVD_UNSUPPORTEDPARAM;
+                                ps_op->s_ivd_ctl_set_config_op_t.u4_error_code |= IVD_OP_API_STRUCT_SIZE_INCORRECT;
+                                return IV_FAIL;
+                            }
+                        }
+                        break;
+
+                    case IVD_CMD_CTL_GETPARAMS:
+                        {
+                            impeg2d_ctl_getstatus_ip_t *ps_ip;
+                            impeg2d_ctl_getstatus_op_t *ps_op;
+
+                            ps_ip = (impeg2d_ctl_getstatus_ip_t *)pv_api_ip;
+                            ps_op = (impeg2d_ctl_getstatus_op_t *)pv_api_op;
+
+                            ps_op->s_ivd_ctl_getstatus_op_t.u4_error_code   = 0;
+
+                            if(ps_ip->s_ivd_ctl_getstatus_ip_t.u4_size != sizeof(impeg2d_ctl_getstatus_ip_t))
+                            {
+                                ps_op->s_ivd_ctl_getstatus_op_t.u4_error_code |= 1 << IVD_UNSUPPORTEDPARAM;
+                                ps_op->s_ivd_ctl_getstatus_op_t.u4_error_code |= IVD_IP_API_STRUCT_SIZE_INCORRECT;
+                                return IV_FAIL;
+                            }
+                            if(ps_op->s_ivd_ctl_getstatus_op_t.u4_size != sizeof(impeg2d_ctl_getstatus_op_t))
+                            {
+                                ps_op->s_ivd_ctl_getstatus_op_t.u4_error_code |= 1 << IVD_UNSUPPORTEDPARAM;
+                                ps_op->s_ivd_ctl_getstatus_op_t.u4_error_code |= IVD_OP_API_STRUCT_SIZE_INCORRECT;
+                                return IV_FAIL;
+                            }
+                        }
+                        break;
+
+                    case IVD_CMD_CTL_GETBUFINFO:
+                        {
+                            impeg2d_ctl_getbufinfo_ip_t *ps_ip;
+                            impeg2d_ctl_getbufinfo_op_t *ps_op;
+                            ps_ip = (impeg2d_ctl_getbufinfo_ip_t *)pv_api_ip;
+                            ps_op = (impeg2d_ctl_getbufinfo_op_t *)pv_api_op;
+
+                            ps_op->s_ivd_ctl_getbufinfo_op_t.u4_error_code  = 0;
+
+                            if(ps_ip->s_ivd_ctl_getbufinfo_ip_t.u4_size != sizeof(impeg2d_ctl_getbufinfo_ip_t))
+                            {
+                                ps_op->s_ivd_ctl_getbufinfo_op_t.u4_error_code  |= 1 << IVD_UNSUPPORTEDPARAM;
+                                ps_op->s_ivd_ctl_getbufinfo_op_t.u4_error_code |= IVD_IP_API_STRUCT_SIZE_INCORRECT;
+                                return IV_FAIL;
+                            }
+                            if(ps_op->s_ivd_ctl_getbufinfo_op_t.u4_size != sizeof(impeg2d_ctl_getbufinfo_op_t))
+                            {
+                                ps_op->s_ivd_ctl_getbufinfo_op_t.u4_error_code  |= 1 << IVD_UNSUPPORTEDPARAM;
+                                ps_op->s_ivd_ctl_getbufinfo_op_t.u4_error_code |= IVD_OP_API_STRUCT_SIZE_INCORRECT;
+                                return IV_FAIL;
+                            }
+                        }
+                        break;
+
+                    case IVD_CMD_CTL_GETVERSION:
+                        {
+                            impeg2d_ctl_getversioninfo_ip_t *ps_ip;
+                            impeg2d_ctl_getversioninfo_op_t *ps_op;
+                            ps_ip = (impeg2d_ctl_getversioninfo_ip_t *)pv_api_ip;
+                            ps_op = (impeg2d_ctl_getversioninfo_op_t *)pv_api_op;
+
+                            ps_op->s_ivd_ctl_getversioninfo_op_t.u4_error_code  = 0;
+
+                            if(ps_ip->s_ivd_ctl_getversioninfo_ip_t.u4_size != sizeof(impeg2d_ctl_getversioninfo_ip_t))
+                            {
+                                ps_op->s_ivd_ctl_getversioninfo_op_t.u4_error_code  |= 1 << IVD_UNSUPPORTEDPARAM;
+                                ps_op->s_ivd_ctl_getversioninfo_op_t.u4_error_code |= IVD_IP_API_STRUCT_SIZE_INCORRECT;
+                                return IV_FAIL;
+                            }
+                            if(ps_op->s_ivd_ctl_getversioninfo_op_t.u4_size != sizeof(impeg2d_ctl_getversioninfo_op_t))
+                            {
+                                ps_op->s_ivd_ctl_getversioninfo_op_t.u4_error_code  |= 1 << IVD_UNSUPPORTEDPARAM;
+                                ps_op->s_ivd_ctl_getversioninfo_op_t.u4_error_code |= IVD_OP_API_STRUCT_SIZE_INCORRECT;
+                                return IV_FAIL;
+                            }
+                        }
+                        break;
+
+                    case IVD_CMD_CTL_FLUSH:
+                        {
+                            impeg2d_ctl_flush_ip_t *ps_ip;
+                            impeg2d_ctl_flush_op_t *ps_op;
+                            ps_ip = (impeg2d_ctl_flush_ip_t *)pv_api_ip;
+                            ps_op = (impeg2d_ctl_flush_op_t *)pv_api_op;
+
+                            ps_op->s_ivd_ctl_flush_op_t.u4_error_code = 0;
+
+                            if(ps_ip->s_ivd_ctl_flush_ip_t.u4_size != sizeof(impeg2d_ctl_flush_ip_t))
+                            {
+                                ps_op->s_ivd_ctl_flush_op_t.u4_error_code  |= 1 << IVD_UNSUPPORTEDPARAM;
+                                ps_op->s_ivd_ctl_flush_op_t.u4_error_code |= IVD_IP_API_STRUCT_SIZE_INCORRECT;
+                                return IV_FAIL;
+                            }
+                            if(ps_op->s_ivd_ctl_flush_op_t.u4_size != sizeof(impeg2d_ctl_flush_op_t))
+                            {
+                                ps_op->s_ivd_ctl_flush_op_t.u4_error_code  |= 1 << IVD_UNSUPPORTEDPARAM;
+                                ps_op->s_ivd_ctl_flush_op_t.u4_error_code |= IVD_OP_API_STRUCT_SIZE_INCORRECT;
+                                return IV_FAIL;
+                            }
+                        }
+                        break;
+
+                    case IVD_CMD_CTL_RESET:
+                        {
+                            impeg2d_ctl_reset_ip_t *ps_ip;
+                            impeg2d_ctl_reset_op_t *ps_op;
+                            ps_ip = (impeg2d_ctl_reset_ip_t *)pv_api_ip;
+                            ps_op = (impeg2d_ctl_reset_op_t *)pv_api_op;
+
+                            ps_op->s_ivd_ctl_reset_op_t.u4_error_code    = 0;
+
+                            if(ps_ip->s_ivd_ctl_reset_ip_t.u4_size != sizeof(impeg2d_ctl_reset_ip_t))
+                            {
+                                ps_op->s_ivd_ctl_reset_op_t.u4_error_code  |= 1 << IVD_UNSUPPORTEDPARAM;
+                                ps_op->s_ivd_ctl_reset_op_t.u4_error_code |= IVD_IP_API_STRUCT_SIZE_INCORRECT;
+                                return IV_FAIL;
+                            }
+                            if(ps_op->s_ivd_ctl_reset_op_t.u4_size != sizeof(impeg2d_ctl_reset_op_t))
+                            {
+                                ps_op->s_ivd_ctl_reset_op_t.u4_error_code  |= 1 << IVD_UNSUPPORTEDPARAM;
+                                ps_op->s_ivd_ctl_reset_op_t.u4_error_code |= IVD_OP_API_STRUCT_SIZE_INCORRECT;
+                                return IV_FAIL;
+                            }
+                        }
+                        break;
+
+                    case IMPEG2D_CMD_CTL_GET_BUFFER_DIMENSIONS:
+                    {
+                        impeg2d_ctl_get_frame_dimensions_ip_t *ps_ip;
+                        impeg2d_ctl_get_frame_dimensions_op_t *ps_op;
+
+                        ps_ip =
+                                        (impeg2d_ctl_get_frame_dimensions_ip_t *)pv_api_ip;
+                        ps_op =
+                                        (impeg2d_ctl_get_frame_dimensions_op_t *)pv_api_op;
+
+                        if(ps_ip->u4_size
+                                        != sizeof(impeg2d_ctl_get_frame_dimensions_ip_t))
+                        {
+                            ps_op->u4_error_code |= 1 << IVD_UNSUPPORTEDPARAM;
+                            ps_op->u4_error_code |=
+                                            IVD_IP_API_STRUCT_SIZE_INCORRECT;
+                            return IV_FAIL;
+                        }
+
+                        if(ps_op->u4_size
+                                        != sizeof(impeg2d_ctl_get_frame_dimensions_op_t))
+                        {
+                            ps_op->u4_error_code |= 1 << IVD_UNSUPPORTEDPARAM;
+                            ps_op->u4_error_code |=
+                                            IVD_OP_API_STRUCT_SIZE_INCORRECT;
+                            return IV_FAIL;
+                        }
+
+                        break;
+                    }
+
+                    case IMPEG2D_CMD_CTL_SET_NUM_CORES:
+                    {
+                        impeg2d_ctl_set_num_cores_ip_t *ps_ip;
+                        impeg2d_ctl_set_num_cores_op_t *ps_op;
+
+                        ps_ip = (impeg2d_ctl_set_num_cores_ip_t *)pv_api_ip;
+                        ps_op = (impeg2d_ctl_set_num_cores_op_t *)pv_api_op;
+
+                        if(ps_ip->u4_size
+                                        != sizeof(impeg2d_ctl_set_num_cores_ip_t))
+                        {
+                            ps_op->u4_error_code |= 1 << IVD_UNSUPPORTEDPARAM;
+                            ps_op->u4_error_code |=
+                                            IVD_IP_API_STRUCT_SIZE_INCORRECT;
+                            return IV_FAIL;
+                        }
+
+                        if(ps_op->u4_size
+                                        != sizeof(impeg2d_ctl_set_num_cores_op_t))
+                        {
+                            ps_op->u4_error_code |= 1 << IVD_UNSUPPORTEDPARAM;
+                            ps_op->u4_error_code |=
+                                            IVD_OP_API_STRUCT_SIZE_INCORRECT;
+                            return IV_FAIL;
+                        }
+
+#ifdef MULTICORE
+                        if((ps_ip->u4_num_cores < 1) || (ps_ip->u4_num_cores > MAX_THREADS))
+#else
+                        if(ps_ip->u4_num_cores != 1)
+#endif
+                        {
+                            ps_op->u4_error_code |= 1 << IVD_UNSUPPORTEDPARAM;
+                            return IV_FAIL;
+                        }
+                        break;
+                    }
+                    case IMPEG2D_CMD_CTL_SET_PROCESSOR:
+                    {
+                        impeg2d_ctl_set_processor_ip_t *ps_ip;
+                        impeg2d_ctl_set_processor_op_t *ps_op;
+
+                        ps_ip = (impeg2d_ctl_set_processor_ip_t *)pv_api_ip;
+                        ps_op = (impeg2d_ctl_set_processor_op_t *)pv_api_op;
+
+                        if(ps_ip->u4_size
+                                        != sizeof(impeg2d_ctl_set_processor_ip_t))
+                        {
+                            ps_op->u4_error_code |= 1 << IVD_UNSUPPORTEDPARAM;
+                            ps_op->u4_error_code |=
+                                            IVD_IP_API_STRUCT_SIZE_INCORRECT;
+                            return IV_FAIL;
+                        }
+
+                        if(ps_op->u4_size
+                                        != sizeof(impeg2d_ctl_set_processor_op_t))
+                        {
+                            ps_op->u4_error_code |= 1 << IVD_UNSUPPORTEDPARAM;
+                            ps_op->u4_error_code |=
+                                            IVD_OP_API_STRUCT_SIZE_INCORRECT;
+                            return IV_FAIL;
+                        }
+
+                        break;
+                    }
+                    default:
+                        break;
+
+                }
+            }
+            break;
+
+        default:
+            {            *(pu4_api_op + 1) |= 1 << IVD_UNSUPPORTEDPARAM;
+                         *(pu4_api_op + 1) |= IVD_UNSUPPORTED_API_CMD;
+                         return IV_FAIL;
+            }
+
+
+    }
+
+    return IV_SUCCESS;
+}
+
+/*****************************************************************************/
+/*                                                                           */
+/*  Function Name :   impeg2d_api_entity                                     */
+/*                                                                           */
+/*  Description   :                                                          */
+/*                                                                           */
+/*  Inputs        :                                                          */
+/*  Globals       : <Does it use any global variables?>                      */
+/*  Outputs       :                                                          */
+/*  Returns       : void                                                     */
+/*                                                                           */
+/*  Issues        : none                                                     */
+/*                                                                           */
+/*  Revision History:                                                        */
+/*                                                                           */
+/*         DD MM YYYY   Author(s)       Changes (Describe the changes made)  */
+/*         22 10 2008    100356         Draft                                */
+/*                                                                           */
+/*****************************************************************************/
+
+
+IV_API_CALL_STATUS_T impeg2d_api_entity(iv_obj_t *ps_dechdl,
+                                        void *pv_api_ip,
+                                        void *pv_api_op)
+{
+    iv_obj_t *ps_dec_handle;
+    dec_state_t *ps_dec_state;
+    dec_state_multi_core_t *ps_dec_state_multi_core;
+
+    impeg2d_video_decode_ip_t    *ps_dec_ip;
+
+    impeg2d_video_decode_op_t    *ps_dec_op;
+    WORD32 bytes_remaining;
+    pic_buf_t *ps_disp_pic;
+
+
+
+    ps_dec_ip = (impeg2d_video_decode_ip_t    *)pv_api_ip;
+    ps_dec_op = (impeg2d_video_decode_op_t    *)pv_api_op;
+
+    memset(ps_dec_op,0,sizeof(impeg2d_video_decode_op_t));
+
+    ps_dec_op->s_ivd_video_decode_op_t.u4_size = sizeof(impeg2d_video_decode_op_t);
+    ps_dec_op->s_ivd_video_decode_op_t.u4_output_present = 0;
+    bytes_remaining = ps_dec_ip->s_ivd_video_decode_ip_t.u4_num_Bytes;
+
+    ps_dec_handle = (iv_obj_t *)ps_dechdl;
+
+    if(ps_dechdl == NULL)
+    {
+        return(IV_FAIL);
+    }
+
+
+
+    ps_dec_state_multi_core  = ps_dec_handle->pv_codec_handle;
+    ps_dec_state = ps_dec_state_multi_core->ps_dec_state[0];
+
+    ps_dec_state->ps_disp_frm_buf = &(ps_dec_op->s_ivd_video_decode_op_t.s_disp_frm_buf);
+    if(0 == ps_dec_state->u4_share_disp_buf)
+    {
+        ps_dec_state->ps_disp_frm_buf->pv_y_buf  = ps_dec_ip->s_ivd_video_decode_ip_t.s_out_buffer.pu1_bufs[0];
+        ps_dec_state->ps_disp_frm_buf->pv_u_buf  = ps_dec_ip->s_ivd_video_decode_ip_t.s_out_buffer.pu1_bufs[1];
+        ps_dec_state->ps_disp_frm_buf->pv_v_buf  = ps_dec_ip->s_ivd_video_decode_ip_t.s_out_buffer.pu1_bufs[2];
+    }
+
+    ps_dec_state->ps_disp_pic = NULL;
+    ps_dec_state->i4_frame_decoded = 0;
+    /*rest bytes consumed */
+    ps_dec_op->s_ivd_video_decode_op_t.u4_num_bytes_consumed = 0;
+
+    ps_dec_op->s_ivd_video_decode_op_t.u4_error_code           = IV_SUCCESS;
+
+    if((ps_dec_ip->s_ivd_video_decode_ip_t.pv_stream_buffer == NULL)&&(ps_dec_state->u1_flushfrm==0))
+    {
+        ps_dec_op->s_ivd_video_decode_op_t.u4_error_code |= 1 << IVD_UNSUPPORTEDPARAM;
+        ps_dec_op->s_ivd_video_decode_op_t.u4_error_code |= IVD_DEC_FRM_BS_BUF_NULL;
+        return IV_FAIL;
+    }
+
+
+    if (ps_dec_state->u4_num_frames_decoded > NUM_FRAMES_LIMIT)
+    {
+        ps_dec_op->s_ivd_video_decode_op_t.u4_error_code       = IMPEG2D_SAMPLE_VERSION_LIMIT_ERR;
+        return(IV_FAIL);
+    }
+
+    if(((0 == ps_dec_state->u2_header_done) || (ps_dec_state->u2_decode_header == 1)) && (ps_dec_state->u1_flushfrm == 0))
+    {
+        impeg2d_dec_hdr(ps_dec_state,ps_dec_ip ,ps_dec_op);
+        bytes_remaining -= ps_dec_op->s_ivd_video_decode_op_t.u4_num_bytes_consumed;
+    }
+
+    if((1 != ps_dec_state->u2_decode_header) && ((bytes_remaining > 0) || ps_dec_state->u1_flushfrm))
+    {
+        if(ps_dec_state->u1_flushfrm)
+        {
+            if(ps_dec_state->aps_ref_pics[1] != NULL)
+            {
+                impeg2_disp_mgr_add(&ps_dec_state->s_disp_mgr, ps_dec_state->aps_ref_pics[1], ps_dec_state->aps_ref_pics[1]->i4_buf_id);
+                impeg2_buf_mgr_release(ps_dec_state->pv_pic_buf_mg, ps_dec_state->aps_ref_pics[1]->i4_buf_id, BUF_MGR_REF);
+                impeg2_buf_mgr_release(ps_dec_state->pv_pic_buf_mg, ps_dec_state->aps_ref_pics[0]->i4_buf_id, BUF_MGR_REF);
+
+                ps_dec_state->aps_ref_pics[1] = NULL;
+                ps_dec_state->aps_ref_pics[0] = NULL;
+
+            }
+            else if(ps_dec_state->aps_ref_pics[0] != NULL)
+            {
+                impeg2_disp_mgr_add(&ps_dec_state->s_disp_mgr, ps_dec_state->aps_ref_pics[0], ps_dec_state->aps_ref_pics[0]->i4_buf_id);
+                impeg2_buf_mgr_release(ps_dec_state->pv_pic_buf_mg, ps_dec_state->aps_ref_pics[0]->i4_buf_id, BUF_MGR_REF);
+
+                ps_dec_state->aps_ref_pics[0] = NULL;
+            }
+            ps_dec_ip->s_ivd_video_decode_ip_t.u4_size                 = sizeof(impeg2d_video_decode_ip_t);
+            ps_dec_op->s_ivd_video_decode_op_t.u4_size                 = sizeof(impeg2d_video_decode_op_t);
+
+            ps_disp_pic = impeg2_disp_mgr_get(&ps_dec_state->s_disp_mgr, &ps_dec_state->i4_disp_buf_id);
+
+            ps_dec_state->ps_disp_pic = ps_disp_pic;
+            if(ps_disp_pic == NULL)
+            {
+                ps_dec_op->s_ivd_video_decode_op_t.u4_output_present = 0;
+            }
+            else
+            {
+                WORD32 fmt_conv;
+                if(0 == ps_dec_state->u4_share_disp_buf)
+                {
+                    ps_dec_op->s_ivd_video_decode_op_t.s_disp_frm_buf.pv_y_buf  = ps_dec_ip->s_ivd_video_decode_ip_t.s_out_buffer.pu1_bufs[0];
+                    ps_dec_op->s_ivd_video_decode_op_t.s_disp_frm_buf.pv_u_buf  = ps_dec_ip->s_ivd_video_decode_ip_t.s_out_buffer.pu1_bufs[1];
+                    ps_dec_op->s_ivd_video_decode_op_t.s_disp_frm_buf.pv_v_buf  = ps_dec_ip->s_ivd_video_decode_ip_t.s_out_buffer.pu1_bufs[2];
+                    fmt_conv = 1;
+                }
+                else
+                {
+                    ps_dec_op->s_ivd_video_decode_op_t.s_disp_frm_buf.pv_y_buf  = ps_disp_pic->pu1_y;
+                    if(IV_YUV_420P == ps_dec_state->i4_chromaFormat)
+                    {
+                        ps_dec_op->s_ivd_video_decode_op_t.s_disp_frm_buf.pv_u_buf  = ps_disp_pic->pu1_u;
+                        ps_dec_op->s_ivd_video_decode_op_t.s_disp_frm_buf.pv_v_buf  = ps_disp_pic->pu1_v;
+                        fmt_conv = 0;
+                    }
+                    else
+                    {
+                        UWORD8 *pu1_buf;
+
+                        pu1_buf = ps_dec_state->as_disp_buffers[ps_disp_pic->i4_buf_id].pu1_bufs[1];
+                        ps_dec_op->s_ivd_video_decode_op_t.s_disp_frm_buf.pv_u_buf  = pu1_buf;
+
+                        pu1_buf = ps_dec_state->as_disp_buffers[ps_disp_pic->i4_buf_id].pu1_bufs[2];
+                        ps_dec_op->s_ivd_video_decode_op_t.s_disp_frm_buf.pv_v_buf  = pu1_buf;
+                        fmt_conv = 1;
+                    }
+                }
+
+                if(fmt_conv == 1)
+                {
+                    impeg2d_format_convert(ps_dec_state, ps_disp_pic,
+                                         &(ps_dec_op->s_ivd_video_decode_op_t.s_disp_frm_buf),
+                                         0, ps_dec_state->u2_vertical_size);
+                }
+                if(0 == ps_dec_state->u4_share_disp_buf)
+                    impeg2_buf_mgr_release(ps_dec_state->pv_pic_buf_mg, ps_disp_pic->i4_buf_id, BUF_MGR_DISP);
+
+                ps_dec_op->s_ivd_video_decode_op_t.u4_pic_ht = ps_dec_state->u2_vertical_size;
+                ps_dec_op->s_ivd_video_decode_op_t.u4_pic_wd = ps_dec_state->u2_horizontal_size;
+                ps_dec_op->s_ivd_video_decode_op_t.u4_output_present = 1;
+
+                ps_dec_op->s_ivd_video_decode_op_t.u4_disp_buf_id = ps_disp_pic->i4_buf_id;
+                ps_dec_op->s_ivd_video_decode_op_t.u4_ts = ps_disp_pic->u4_ts;
+
+                ps_dec_op->s_ivd_video_decode_op_t.e_output_format = (IV_COLOR_FORMAT_T)ps_dec_state->i4_chromaFormat;
+
+                ps_dec_op->s_ivd_video_decode_op_t.u4_is_ref_flag = (B_PIC != ps_dec_state->e_pic_type);
+
+                ps_dec_op->s_ivd_video_decode_op_t.u4_progressive_frame_flag           = IV_PROGRESSIVE;
+
+                ps_dec_op->s_ivd_video_decode_op_t.s_disp_frm_buf.u4_y_wd = ps_dec_state->u2_horizontal_size;
+                ps_dec_op->s_ivd_video_decode_op_t.s_disp_frm_buf.u4_y_strd = ps_dec_state->u4_frm_buf_stride;
+                ps_dec_op->s_ivd_video_decode_op_t.s_disp_frm_buf.u4_y_ht = ps_dec_state->u2_vertical_size;
+
+                ps_dec_op->s_ivd_video_decode_op_t.s_disp_frm_buf.u4_u_wd = ps_dec_state->u2_horizontal_size >> 1;
+                ps_dec_op->s_ivd_video_decode_op_t.s_disp_frm_buf.u4_u_strd = ps_dec_state->u4_frm_buf_stride >> 1;
+                ps_dec_op->s_ivd_video_decode_op_t.s_disp_frm_buf.u4_u_ht = ps_dec_state->u2_vertical_size >> 1;
+
+                ps_dec_op->s_ivd_video_decode_op_t.s_disp_frm_buf.u4_v_wd = ps_dec_state->u2_horizontal_size >> 1;
+                ps_dec_op->s_ivd_video_decode_op_t.s_disp_frm_buf.u4_v_strd = ps_dec_state->u4_frm_buf_stride >> 1;
+                ps_dec_op->s_ivd_video_decode_op_t.s_disp_frm_buf.u4_v_ht = ps_dec_state->u2_vertical_size >> 1;
+                ps_dec_op->s_ivd_video_decode_op_t.s_disp_frm_buf.u4_size = sizeof(ps_dec_op->s_ivd_video_decode_op_t.s_disp_frm_buf);
+
+                switch(ps_dec_state->i4_chromaFormat)
+                {
+                    case IV_YUV_420SP_UV:
+                    case IV_YUV_420SP_VU:
+                        ps_dec_op->s_ivd_video_decode_op_t.s_disp_frm_buf.u4_u_wd = ps_dec_state->u2_horizontal_size;
+                        ps_dec_op->s_ivd_video_decode_op_t.s_disp_frm_buf.u4_u_strd = ps_dec_state->u4_frm_buf_stride;
+                    break;
+                    case IV_YUV_422ILE:
+                        ps_dec_op->s_ivd_video_decode_op_t.s_disp_frm_buf.u4_u_wd = 0;
+                        ps_dec_op->s_ivd_video_decode_op_t.s_disp_frm_buf.u4_u_ht = 0;
+                        ps_dec_op->s_ivd_video_decode_op_t.s_disp_frm_buf.u4_v_wd = 0;
+                        ps_dec_op->s_ivd_video_decode_op_t.s_disp_frm_buf.u4_v_ht = 0;
+                    break;
+                    default:
+                    break;
+                }
+
+
+            }
+            if(ps_dec_op->s_ivd_video_decode_op_t.u4_output_present)
+            {
+                if(1 == ps_dec_op->s_ivd_video_decode_op_t.u4_output_present)
+                {
+                    INSERT_LOGO(ps_dec_ip->s_ivd_video_decode_ip_t.s_out_buffer.pu1_bufs[0],
+                                ps_dec_ip->s_ivd_video_decode_ip_t.s_out_buffer.pu1_bufs[1],
+                                ps_dec_ip->s_ivd_video_decode_ip_t.s_out_buffer.pu1_bufs[2],
+                                ps_dec_state->u4_frm_buf_stride,
+                                ps_dec_state->u2_horizontal_size,
+                                ps_dec_state->u2_vertical_size,
+                                ps_dec_state->i4_chromaFormat,
+                                ps_dec_state->u2_horizontal_size,
+                                ps_dec_state->u2_vertical_size);
+                }
+                return(IV_SUCCESS);
+            }
+            else
+            {
+                ps_dec_state->u1_flushfrm = 0;
+
+                return(IV_FAIL);
+            }
+
+        }
+        else if(ps_dec_state->u1_flushfrm==0)
+        {
+            ps_dec_ip->s_ivd_video_decode_ip_t.u4_size                 = sizeof(impeg2d_video_decode_ip_t);
+            ps_dec_op->s_ivd_video_decode_op_t.u4_size                 = sizeof(impeg2d_video_decode_op_t);
+            if(ps_dec_ip->s_ivd_video_decode_ip_t.u4_num_Bytes < 4)
+            {
+                ps_dec_op->s_ivd_video_decode_op_t.u4_num_bytes_consumed = ps_dec_ip->s_ivd_video_decode_ip_t.u4_num_Bytes;
+                return(IV_FAIL);
+            }
+
+            if(1 == ps_dec_state->u4_share_disp_buf)
+            {
+                if(0 == impeg2_buf_mgr_check_free(ps_dec_state->pv_pic_buf_mg))
+                {
+                    ps_dec_op->s_ivd_video_decode_op_t.u4_error_code =
+                                    (IMPEG2D_ERROR_CODES_T)IVD_DEC_REF_BUF_NULL;
+                    return IV_FAIL;
+                }
+            }
+
+
+            ps_dec_op->s_ivd_video_decode_op_t.e_output_format = (IV_COLOR_FORMAT_T)ps_dec_state->i4_chromaFormat;
+
+            ps_dec_op->s_ivd_video_decode_op_t.u4_is_ref_flag = (B_PIC != ps_dec_state->e_pic_type);
+
+            ps_dec_op->s_ivd_video_decode_op_t.u4_progressive_frame_flag           = IV_PROGRESSIVE;
+
+            ps_dec_op->s_ivd_video_decode_op_t.s_disp_frm_buf.u4_y_wd = ps_dec_state->u2_horizontal_size;
+            ps_dec_op->s_ivd_video_decode_op_t.s_disp_frm_buf.u4_y_strd = ps_dec_state->u4_frm_buf_stride;
+            ps_dec_op->s_ivd_video_decode_op_t.s_disp_frm_buf.u4_y_ht = ps_dec_state->u2_vertical_size;
+
+            ps_dec_op->s_ivd_video_decode_op_t.s_disp_frm_buf.u4_u_wd = ps_dec_state->u2_horizontal_size >> 1;
+            ps_dec_op->s_ivd_video_decode_op_t.s_disp_frm_buf.u4_u_strd = ps_dec_state->u4_frm_buf_stride >> 1;
+            ps_dec_op->s_ivd_video_decode_op_t.s_disp_frm_buf.u4_u_ht = ps_dec_state->u2_vertical_size >> 1;
+
+            ps_dec_op->s_ivd_video_decode_op_t.s_disp_frm_buf.u4_v_wd = ps_dec_state->u2_horizontal_size >> 1;
+            ps_dec_op->s_ivd_video_decode_op_t.s_disp_frm_buf.u4_v_strd = ps_dec_state->u4_frm_buf_stride >> 1;
+            ps_dec_op->s_ivd_video_decode_op_t.s_disp_frm_buf.u4_v_ht = ps_dec_state->u2_vertical_size >> 1;
+            ps_dec_op->s_ivd_video_decode_op_t.s_disp_frm_buf.u4_size = sizeof(ps_dec_op->s_ivd_video_decode_op_t.s_disp_frm_buf);
+
+            switch(ps_dec_state->i4_chromaFormat)
+            {
+                case IV_YUV_420SP_UV:
+                case IV_YUV_420SP_VU:
+                    ps_dec_op->s_ivd_video_decode_op_t.s_disp_frm_buf.u4_u_wd = ps_dec_state->u2_horizontal_size;
+                    ps_dec_op->s_ivd_video_decode_op_t.s_disp_frm_buf.u4_u_strd = ps_dec_state->u4_frm_buf_stride;
+                break;
+                case IV_YUV_422ILE:
+                    ps_dec_op->s_ivd_video_decode_op_t.s_disp_frm_buf.u4_u_wd = 0;
+                    ps_dec_op->s_ivd_video_decode_op_t.s_disp_frm_buf.u4_u_ht = 0;
+                    ps_dec_op->s_ivd_video_decode_op_t.s_disp_frm_buf.u4_v_wd = 0;
+                    ps_dec_op->s_ivd_video_decode_op_t.s_disp_frm_buf.u4_v_ht = 0;
+                break;
+                default:
+                break;
+            }
+
+            if( ps_dec_state->u1_flushfrm == 0)
+            {
+                ps_dec_state->u1_flushcnt    = 0;
+
+                /*************************************************************************/
+                /*                              Frame Decode                             */
+                /*************************************************************************/
+
+                impeg2d_dec_frm(ps_dec_state,ps_dec_ip,ps_dec_op);
+
+                if (IVD_ERROR_NONE ==
+                        ps_dec_op->s_ivd_video_decode_op_t.u4_error_code)
+                {
+                    if(ps_dec_state->ps_disp_pic)
+                    ps_dec_op->s_ivd_video_decode_op_t.u4_output_present = 1;
+                    else
+                    ps_dec_op->s_ivd_video_decode_op_t.u4_output_present = 0;
+                    if(ps_dec_state->u1_first_frame_done == 0)
+                    {
+                        ps_dec_state->u1_first_frame_done = 1;
+                    }
+
+                    switch(ps_dec_state->e_pic_type)
+                    {
+                        case I_PIC :
+                        ps_dec_op->s_ivd_video_decode_op_t.e_pic_type = IV_I_FRAME;
+                        break;
+
+                        case P_PIC:
+                        ps_dec_op->s_ivd_video_decode_op_t.e_pic_type = IV_P_FRAME;
+                        break;
+
+                        case B_PIC:
+                        ps_dec_op->s_ivd_video_decode_op_t.e_pic_type = IV_B_FRAME;
+                        break;
+
+                        case D_PIC:
+                        ps_dec_op->s_ivd_video_decode_op_t.e_pic_type = IV_I_FRAME;
+                        break;
+
+                        default :
+                        ps_dec_op->s_ivd_video_decode_op_t.e_pic_type = IV_FRAMETYPE_DEFAULT;
+                        break;
+                    }
+
+                    ps_dec_state->u4_num_frames_decoded++;
+                }
+            }
+            else
+            {
+                ps_dec_state->u1_flushcnt++;
+            }
+        }
+        if(ps_dec_state->ps_disp_pic)
+        {
+            ps_dec_op->s_ivd_video_decode_op_t.u4_disp_buf_id = ps_dec_state->ps_disp_pic->i4_buf_id;
+            ps_dec_op->s_ivd_video_decode_op_t.u4_ts = ps_dec_state->ps_disp_pic->u4_ts;
+
+            if(0 == ps_dec_state->u4_share_disp_buf)
+            {
+                impeg2_buf_mgr_release(ps_dec_state->pv_pic_buf_mg, ps_dec_state->ps_disp_pic->i4_buf_id, BUF_MGR_DISP);
+            }
+        }
+
+        if(1 == ps_dec_op->s_ivd_video_decode_op_t.u4_output_present)
+        {
+            INSERT_LOGO(ps_dec_ip->s_ivd_video_decode_ip_t.s_out_buffer.pu1_bufs[0],
+                        ps_dec_ip->s_ivd_video_decode_ip_t.s_out_buffer.pu1_bufs[1],
+                        ps_dec_ip->s_ivd_video_decode_ip_t.s_out_buffer.pu1_bufs[2],
+                        ps_dec_state->u4_frm_buf_stride,
+                        ps_dec_state->u2_horizontal_size,
+                        ps_dec_state->u2_vertical_size,
+                        ps_dec_state->i4_chromaFormat,
+                        ps_dec_state->u2_horizontal_size,
+                        ps_dec_state->u2_vertical_size);
+        }
+
+    }
+
+    ps_dec_op->s_ivd_video_decode_op_t.u4_progressive_frame_flag = 1;
+    ps_dec_op->s_ivd_video_decode_op_t.e4_fld_type     = ps_dec_state->s_disp_op.e4_fld_type;
+
+
+    if(ps_dec_op->s_ivd_video_decode_op_t.u4_error_code)
+        return IV_FAIL;
+    else
+        return IV_SUCCESS;
+}

diff --git a/decoder/impeg2d_bitstream.c b/decoder/impeg2d_bitstream.c
new file mode 100644
index 0000000..92d3785
--- /dev/null
+++ b/decoder/impeg2d_bitstream.c

@@ -0,0 +1,335 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+/*****************************************************************************/
+/*                                                                           */
+/*  File Name         : impeg2d_bitstream.c                                  */
+/*                                                                           */
+/*  Description       : This file contains all the necessary examples to     */
+/*                      establish a consistent use of Ittiam C coding        */
+/*                      standards (based on Indian Hill C Standards)         */
+/*                                                                           */
+/*  List of Functions : <List the functions defined in this file>            */
+/*                                                                           */
+/*  Issues / Problems : None                                                 */
+/*                                                                           */
+/*  Revision History  :                                                      */
+/*                                                                           */
+/*         DD MM YYYY   Author(s)       Changes (Describe the changes made)  */
+/*         10 01 2005   Ittiam          Draft                                */
+/*                                                                           */
+/*****************************************************************************/
+#include <stdlib.h>
+
+#include "iv_datatypedef.h"
+#include "impeg2_defs.h"
+#include "impeg2_platform_macros.h"
+#include "impeg2_macros.h"
+#include "impeg2d_bitstream.h"
+
+#define BIT(val,bit)      (UWORD16)(((val) >> (bit)) & 0x1)
+/******************************************************************************
+*
+*  Function Name    : impeg2d_bit_stream_init
+*
+*  Description      : This is a Bitstream initialising function.
+*  Arguments        :
+*  stream           : Pointer to the Bitstream.
+*  byteBuf          : Address of the buffer
+*  size             : Size of the buffer in bytes
+*
+*  Values Returned  : None
+*******************************************************************************/
+void impeg2d_bit_stream_init(stream_t *ps_stream,
+                             UWORD8 *pu1_byte_buf,
+                             UWORD32 u4_max_offset)
+{
+    UWORD8      *pu1_byte_buff;
+    UWORD32     *pu4_word_buf;
+    size_t     u4_byte_addr;
+    UWORD32     u4_temp1,u4_temp2;
+
+    /* Set parameters of the stream structure.Associate the structure with
+       the file */
+    ps_stream->pv_bs_buf           = pu1_byte_buf;
+    ps_stream->u4_offset              = 0;
+
+    /* Take care of unaligned address and create
+       nearest greater aligned address */
+    pu1_byte_buff               = (UWORD8 *)pu1_byte_buf;
+    u4_byte_addr                = (size_t)pu1_byte_buff;
+
+    if((u4_byte_addr & 3) == 1)
+    {
+        u4_temp1                = ((UWORD32)(*pu1_byte_buff++)) << 8;
+        u4_temp1                += ((UWORD32)(*pu1_byte_buff++)) << 16;
+        u4_temp1                += ((UWORD32)(*pu1_byte_buff++)) << 24;
+
+        pu4_word_buf            = (UWORD32 *)pu1_byte_buff;
+
+        ps_stream->u4_offset          = 8;
+    }
+    else if((u4_byte_addr & 3) == 2)
+    {
+        u4_temp1                = ((UWORD32)(*pu1_byte_buff++)) << 16;
+        u4_temp1                += ((UWORD32)(*pu1_byte_buff++)) << 24;
+
+        pu4_word_buf            = (UWORD32 *)pu1_byte_buff;
+
+        ps_stream->u4_offset          = 16;
+    }
+    else if((u4_byte_addr & 3) == 3)
+    {
+        u4_temp1                = (((UWORD32)(*pu1_byte_buff++)) << 24);
+
+        pu4_word_buf            = (UWORD32 *)pu1_byte_buff;
+
+        ps_stream->u4_offset          = 24;
+    }
+    else
+    {
+        pu4_word_buf            = (UWORD32 *)pu1_byte_buff;
+
+        u4_temp1                = *pu4_word_buf++;
+        ps_stream->u4_offset          = 0;
+    }
+
+    /* convert the endian ness from Little endian to Big endian so that bits
+       are in proper order from MSB to LSB */
+    CONV_LE_TO_BE(u4_temp2,u4_temp1)
+
+    /* Read One more word for buf nxt */
+    u4_temp1                    = *pu4_word_buf++;
+    ps_stream->u4_buf              = u4_temp2;
+
+    CONV_LE_TO_BE(u4_temp2,u4_temp1)
+
+    ps_stream->u4_buf_nxt          = u4_temp2;
+
+    ps_stream->pu4_buf_aligned      = pu4_word_buf;
+
+
+    ps_stream->u4_max_offset        = (u4_max_offset << 3) + ps_stream->u4_offset;
+
+    return;
+}
+
+
+
+/******************************************************************************
+*
+*  Function Name    : impeg2d_bit_stream_get_bit
+*
+*  Description      : This is a Bitstream processing function. It reads the
+*                     bit currently pointed by the bit pointer in the buffer and
+*                     advances the pointer by one.
+*  Arguments        :
+*  stream           : Pointer to the Bitstream.
+*
+*  Values Returned  : The bit read(0/1)
+*******************************************************************************/
+INLINE UWORD8 impeg2d_bit_stream_get_bit(stream_t *ps_stream)
+{
+    UWORD32     u4_bit,u4_offset,u4_temp;
+    UWORD32     u4_curr_bit;
+
+    u4_offset               = ps_stream->u4_offset;
+    u4_curr_bit             = u4_offset & 0x1F;
+    u4_bit                  = ps_stream->u4_buf;
+
+    /* Move the current bit read from the current word to the
+       least significant bit positions of 'c'.*/
+    u4_bit                  >>= BITS_IN_INT - u4_curr_bit - 1;
+
+    u4_offset++;
+
+    /* If the last bit of the last word of the buffer has been read update
+       the currrent buf with next, and read next buf from bit stream buffer */
+    if (u4_curr_bit == 31)
+    {
+        ps_stream->u4_buf      = ps_stream->u4_buf_nxt;
+        u4_temp             = *(ps_stream->pu4_buf_aligned)++;
+
+        CONV_LE_TO_BE(ps_stream->u4_buf_nxt,u4_temp)
+    }
+    ps_stream->u4_offset          = u4_offset;
+
+    return (u4_bit & 0x1);
+}
+/******************************************************************************
+*
+*  Function Name    : impeg2d_bit_stream_flush
+*
+*  Description      : This is a Bitstream processing function. It
+*                     advances the bit and byte pointers appropriately
+*
+*  Arguments        :
+*  ctxt             : Pointer to the Bitstream.
+*  numBits          : No of bits to be read
+*
+*  Values Returned  : None
+*******************************************************************************/
+INLINE void impeg2d_bit_stream_flush(void* pv_ctxt, UWORD32 u4_no_of_bits)
+{
+    stream_t *ps_stream = (stream_t *)pv_ctxt;
+
+    FLUSH_BITS(ps_stream->u4_offset,ps_stream->u4_buf,ps_stream->u4_buf_nxt,u4_no_of_bits,ps_stream->pu4_buf_aligned)
+    return;
+}
+/******************************************************************************
+*
+*  Function Name    : impeg2d_bit_stream_flush_to_byte_boundary
+*
+*  Description      : This is a Bitstream processing function.It advances
+*                     the bit and byte pointers to next byte boundary
+*
+*  Arguments        :
+*  stream           : Pointer to the Bitstream.
+*  NoOfBits         : No of bits to be read
+*
+*  Values Returned  : The bits read (upto 32 bits maximum) starting from the
+*                     least significant bit and going towards most significant
+*                     bit in the order of their occurence.
+*******************************************************************************/
+INLINE void impeg2d_bit_stream_flush_to_byte_boundary(void* pv_ctxt)
+{
+    UWORD8 u1_bit_offset;
+    stream_t *ps_stream = (stream_t *)pv_ctxt;
+
+    u1_bit_offset = (ps_stream->u4_offset) & 0x7;
+
+
+    /* if it is not byte aligned make it byte aligned*/
+    if(u1_bit_offset != 0)
+    {
+        impeg2d_bit_stream_flush(ps_stream,(8 - u1_bit_offset));
+    }
+
+
+
+}
+
+
+/******************************************************************************
+*
+*  Function Name    : ibits_next
+*
+*  Description      : This is a Bitstream processing function.It gets the
+*                     specified number of bits from the buffer without
+*                     altering the current pointers. It is used mainly to
+*                     check for some specific pattern of bits like start
+*                     code. This is equivalent to next_bits() function
+*                     defined in MPEG-4 Visual Standard Definition of functions
+*
+*  Arguments        :
+*  ctxt             : Pointer to the Bitstream.
+*  numBits          : No of bits to be read
+*
+*  Values Returned  : The bits read (upto 32 bits maximum) starting from the
+*                     least significant bit and going towards most significant
+*                     bit in the order of their occurence.
+*******************************************************************************/
+INLINE UWORD32 impeg2d_bit_stream_nxt( stream_t  *ps_stream, WORD32 i4_no_of_bits)
+{
+    UWORD32     u4_bits,u4_offset,u4_temp;
+    UWORD8      u4_bit_ptr;
+
+    ASSERT(i4_no_of_bits > 0);
+
+    u4_offset               = ps_stream->u4_offset;
+    u4_bit_ptr              = u4_offset & 0x1F;
+    u4_bits                 = ps_stream->u4_buf << u4_bit_ptr;
+
+    u4_bit_ptr              += i4_no_of_bits;
+    if(32 < u4_bit_ptr)
+    {
+        /*  Read bits from the next word if necessary */
+        u4_temp             = ps_stream->u4_buf_nxt;
+        u4_bit_ptr          &= (BITS_IN_INT - 1);
+
+        u4_temp             = (u4_temp >> (BITS_IN_INT - u4_bit_ptr));
+
+        /* u4_temp consists of bits,if any that had to be read from the next word
+           of the buffer.The bits read from both the words are concatenated and
+           moved to the least significant positions of 'u4_bits'*/
+        u4_bits = (u4_bits >> (32 - i4_no_of_bits)) | u4_temp;
+    }
+    else
+    {
+        u4_bits = (u4_bits >> (32 - i4_no_of_bits));
+    }
+
+    return (u4_bits);
+}
+/******************************************************************************
+*
+*  Function Name    : impeg2d_bit_stream_get
+*
+*  Description      : This is a Bitstream processing function. It reads a
+*                     specified number of bits from the current bit
+*                     position and advances the bit and byte pointers
+*                     appropriately
+*  Arguments        :
+*  ctxt             : Pointer to the Bitstream.
+*  numBits          : No of bits to be read
+*
+*  Values Returned  : The bits read (upto 32 bits maximum) starting from the
+*                     least significant bit and going towards most significant
+*                     bit in the order of their occurence.
+*******************************************************************************/
+
+INLINE UWORD32 impeg2d_bit_stream_get(void* pv_ctxt, UWORD32 u4_num_bits)
+{
+    UWORD32 u4_next_bits = impeg2d_bit_stream_nxt(pv_ctxt, u4_num_bits);
+    impeg2d_bit_stream_flush(pv_ctxt, u4_num_bits);
+    return(u4_next_bits);
+}
+
+
+
+/******************************************************************************
+*
+*  Function Name    : impeg2d_bit_stream_num_bits_read
+*
+*  Description      : This is a Bitstream processing function. It reads a
+*                     specified number of bits from the current bit
+*                     position and advances the bit and byte pointers
+*                     appropriately
+*  Arguments        :
+*  ctxt             : Pointer to the Bitstream.
+*  numBits          : No of bits to be read
+*
+*  Values Returned  : The bits read (upto 16 bits maximum) starting from the
+*                     least significant bit and going towards most significant
+*                     bit in the order of their occurence.
+*******************************************************************************/
+INLINE UWORD32 impeg2d_bit_stream_num_bits_read(void* pv_ctxt)
+{
+    stream_t *u4_no_of_bitsstream = (stream_t *)pv_ctxt;
+    size_t     u4_temp;
+    UWORD32     u4_bits_read;
+    u4_temp         = (size_t)(u4_no_of_bitsstream->pv_bs_buf);
+    u4_temp         &= 0x3;
+    u4_bits_read         = (u4_no_of_bitsstream->u4_offset - (u4_temp << 3));
+
+    return(u4_bits_read);
+
+}
+
+

diff --git a/decoder/impeg2d_bitstream.h b/decoder/impeg2d_bitstream.h
new file mode 100644
index 0000000..4ce4013
--- /dev/null
+++ b/decoder/impeg2d_bitstream.h

@@ -0,0 +1,156 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+/*****************************************************************************/
+/*                                                                           */
+/*  File Name         : impeg2d_bitstream.h                                       */
+/*                                                                           */
+/*  Description       : This file contains all the necessary examples to     */
+/*                      establish a consistent use of Ittiam C coding        */
+/*                      standards (based on Indian Hill C Standards)         */
+/*                                                                           */
+/*  List of Functions : <List the functions defined in this file>            */
+/*                                                                           */
+/*  Issues / Problems : None                                                 */
+/*                                                                           */
+/*  Revision History  :                                                      */
+/*                                                                           */
+/*         DD MM YYYY   Author(s)       Changes (Describe the changes made)  */
+/*         10 01 2005   Ittiam          Draft                                */
+/*                                                                           */
+/*****************************************************************************/
+#ifndef __IMPEG2D_BITSTREAM_H__
+#define __IMPEG2D_BITSTREAM_H__
+
+
+
+/* Structure for the stream */
+typedef struct _stream_t
+{
+    void    *pv_bs_buf;               /* Pointer to buffer containing the
+                                        bitstream                    */
+
+    UWORD32  *pu4_buf_aligned;         /* Pointer to the buffer after alignment correction,
+                                         It points to the currently usable buffer */
+
+    UWORD32  u4_offset;                  /* Offset in the buffer for the current bit */
+
+    UWORD32  u4_buf;                  /* Buffer storing the current word */
+
+    UWORD32  u4_buf_nxt;              /* Buffer storing the next Word */
+
+    UWORD32  u4_max_offset;            /* Max Bit stream buffer offset in bytes for error checks */
+} stream_t;
+
+#define GET_MARKER_BIT(dec,stream)                                             \
+{                                                                              \
+    if (impeg2d_bit_stream_get(stream,1) != 0x1) {                             \
+    /* No need to return error if marker is not present. */                    \
+    }                                                                          \
+}
+
+/* Define A macro for inlining of FlushBits */
+#define     FLUSH_BITS(u4_offset,u4_buf,u4_buf_nxt,u4_no_bits,pu4_buf_aligned) \
+{                                                                              \
+        UWORD32     u4_temp;                                                   \
+                                                                               \
+        if (((u4_offset & 0x1f) + u4_no_bits)>= 32)                            \
+        {                                                                      \
+            u4_buf              = u4_buf_nxt;                                  \
+                                                                               \
+            u4_temp             = *(pu4_buf_aligned)++;                        \
+                                                                               \
+            CONV_LE_TO_BE(u4_buf_nxt,u4_temp)                                  \
+        }                                                                      \
+        u4_offset               += u4_no_bits;                                 \
+}
+
+/* Macro to initialize the variables from stream */
+#define GET_TEMP_STREAM_DATA(u4_buf,u4_buf_nxt,u4_offset,pu4_buf_aligned,stream)    \
+{                                                                                   \
+    u4_buf = stream->u4_buf;                                                        \
+    u4_buf_nxt = stream->u4_buf_nxt;                                                \
+    u4_offset = stream->u4_offset;                                                     \
+    pu4_buf_aligned = stream->pu4_buf_aligned;                                      \
+}
+
+/* Macro to put the stream variable values back */
+#define PUT_TEMP_STREAM_DATA(u4_buf,u4_buf_nxt,u4_offset,pu4_buf_aligned,stream)    \
+{                                                                                   \
+    stream->u4_buf = u4_buf;                                                        \
+    stream->u4_buf_nxt = u4_buf_nxt;                                                \
+    stream->u4_offset = u4_offset;                                                     \
+    stream->pu4_buf_aligned = pu4_buf_aligned;                                      \
+}
+
+/* Macro to implement the get bits inline (ibits_nxt_inline) */
+#define IBITS_NXT(u4_buf, u4_buf_nxt, u4_offset, u4_bits, no_of_bits)              \
+{                                                                                   \
+    UWORD8 u4_bit_ptr;                                                              \
+    UWORD32 u4_temp;                                                                \
+                                                                                    \
+    u4_bit_ptr  = u4_offset & 0x1F;                                                 \
+    u4_bits     = u4_buf << u4_bit_ptr;                                             \
+                                                                                    \
+    u4_bit_ptr  += no_of_bits;                                                      \
+                                                                                    \
+    if(32 < u4_bit_ptr)                                                             \
+    {                                                                               \
+        /*  Read bits from the next word if necessary */                            \
+        u4_temp     = u4_buf_nxt;                                           \
+        u4_bit_ptr  &= (BITS_IN_INT - 1);                                           \
+                                                                                    \
+        u4_temp     = (u4_temp >> (BITS_IN_INT - u4_bit_ptr));                      \
+                                                                                    \
+    /* u4_temp consists of bits,if any that had to be read from the next word*/     \
+    /* of the buffer.The bits read from both the words are concatenated and*/       \
+    /* moved to the least significant positions of 'u4_bits'*/                      \
+            u4_bits = (u4_bits >> (32 - no_of_bits)) | u4_temp;                     \
+        }                                                                           \
+        else                                                                        \
+        {                                                                           \
+            u4_bits = (u4_bits >> (32 - no_of_bits));                               \
+        }                                                                           \
+}
+
+/* Macro to implement the get bits inline (ibits_get_inline) */
+#define IBITS_GET(u4_buf,u4_buf_nxt,u4_offset,u4_bits,pu4_buf_aligned,no_of_bits)   \
+{                                                                                   \
+    IBITS_NXT(u4_buf, u4_buf_nxt, u4_offset, u4_bits, no_of_bits)                   \
+    FLUSH_BITS(u4_offset,u4_buf,u4_buf_nxt,no_of_bits,pu4_buf_aligned)              \
+}
+
+void impeg2d_bit_stream_init(stream_t *stream,
+                             UWORD8 *byteBuf,
+                             UWORD32 u4_max_offset);
+INLINE UWORD8 impeg2d_bit_stream_get_bit(stream_t *stream);
+INLINE void impeg2d_bit_stream_flush(void* ctxt, UWORD32 NoOfBits);
+INLINE void impeg2d_bit_stream_flush_to_byte_boundary(void* ctxt);
+INLINE UWORD32 impeg2d_bit_stream_nxt(stream_t *stream, WORD32 NoOfBits);
+
+INLINE UWORD32 impeg2d_bit_stream_get(void* ctxt, UWORD32 numBits);
+INLINE UWORD32 impeg2d_bit_stream_num_bits_read(void* ctxt);
+
+
+
+
+
+
+
+#endif /* __IMPEG2D_BITSTREAM_H__ */

diff --git a/decoder/impeg2d_d_pic.c b/decoder/impeg2d_d_pic.c
new file mode 100644
index 0000000..a90e16d
--- /dev/null
+++ b/decoder/impeg2d_d_pic.c

@@ -0,0 +1,251 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+
+#include "iv_datatypedef.h"
+#include "impeg2_defs.h"
+#include "impeg2_globals.h"
+#include "impeg2_platform_macros.h"
+#include "impeg2_inter_pred.h"
+#include "impeg2_idct.h"
+#include "impeg2_mem_func.h"
+#include "impeg2_format_conv.h"
+#include "impeg2_disp_mgr.h"
+#include "impeg2_buf_mgr.h"
+
+#include "impeg2d.h"
+#include "impeg2d_bitstream.h"
+#include "impeg2d_structs.h"
+#include "impeg2d_vld.h"
+#include "impeg2d_vld_tables.h"
+
+#define BLK_SIZE 8
+#define LUMA_BLK_SIZE (2 * (BLK_SIZE))
+#define CHROMA_BLK_SIZE (BLK_SIZE)
+/*****************************************************************************/
+/*                                                                           */
+/*  Function Name : impeg2d_get_luma_dc_diff                                         */
+/*                                                                           */
+/*  Description   : Decode the DC differential value from the bitstream for  */
+/*                  luma block                                               */
+/*                                                                           */
+/*  Inputs        : stream - Input stream                                    */
+/*                                                                           */
+/*  Globals       : None                                                     */
+/*                                                                           */
+/*  Processing    : Decode the vlc for dc_diff                               */
+/*                                                                           */
+/*  Outputs       : dc_diff - dc differential used in dc prediction          */
+/*                                                                           */
+/*  Returns       : dc_diff - dc differential used in dc prediction          */
+/*                                                                           */
+/*  Issues        : None                                                     */
+/*                                                                           */
+/*  Revision History:                                                        */
+/*                                                                           */
+/*         DD MM YYYY   Author(s)       Changes                              */
+/*         14 09 2005   Harish M        First Version                        */
+/*                                                                           */
+/*****************************************************************************/
+WORD16  impeg2d_get_luma_dc_diff(stream_t *ps_stream)
+{
+    UWORD16 u2_dc_size;
+    WORD16  i2_dc_diff;
+
+    u2_dc_size = impeg2d_dec_vld_symbol(ps_stream,gai2_impeg2d_dct_dc_size[0],
+                        MPEG2_DCT_DC_LUMA_SIZE_LEN) +
+                        MPEG2_DCT_DC_SIZE_OFFSET;
+    if (u2_dc_size != 0)
+    {
+        i2_dc_diff = impeg2d_bit_stream_get(ps_stream,u2_dc_size);
+        if ((i2_dc_diff & (1 << (u2_dc_size - 1))) == 0)
+            i2_dc_diff -= (1 << u2_dc_size) - 1;
+    }
+    else
+    {
+        i2_dc_diff = 0;
+    }
+    return i2_dc_diff;
+}
+
+/*****************************************************************************/
+/*                                                                           */
+/*  Function Name : impeg2d_get_chroma_dc_diff                                       */
+/*                                                                           */
+/*  Description   : Decode the DC differential value from the bitstream for  */
+/*                  chroma block                                             */
+/*                                                                           */
+/*  Inputs        : stream - Input stream                                    */
+/*                                                                           */
+/*  Globals       : None                                                     */
+/*                                                                           */
+/*  Processing    : Decode the vlc for dc_diff                               */
+/*                                                                           */
+/*  Outputs       : dc_diff - dc differential used in dc prediction          */
+/*                                                                           */
+/*  Returns       : dc_diff - dc differential used in dc prediction          */
+/*                                                                           */
+/*  Issues        : None                                                     */
+/*                                                                           */
+/*  Revision History:                                                        */
+/*                                                                           */
+/*         DD MM YYYY   Author(s)       Changes                              */
+/*         14 09 2005   Harish M        First Version                        */
+/*                                                                           */
+/*****************************************************************************/
+WORD16  impeg2d_get_chroma_dc_diff(stream_t *ps_stream)
+{
+    UWORD16 u2_dc_size;
+    WORD16  i2_dc_diff;
+    u2_dc_size = impeg2d_dec_vld_symbol(ps_stream,gai2_impeg2d_dct_dc_size[1],
+                        MPEG2_DCT_DC_CHROMA_SIZE_LEN) +
+                        MPEG2_DCT_DC_SIZE_OFFSET;
+    if (u2_dc_size != 0)
+    {
+        i2_dc_diff = impeg2d_bit_stream_get(ps_stream,u2_dc_size);
+        if ((i2_dc_diff & (1 << (u2_dc_size - 1))) == 0)
+            i2_dc_diff -= (1 << u2_dc_size) - 1;
+    }
+    else
+    {
+        i2_dc_diff = 0;
+    }
+    return i2_dc_diff;
+}
+/*******************************************************************************
+*  Function Name   : impeg2d_dec_d_slice
+*
+*  Description     : Decodes I slice
+*
+*  Arguments       :
+*  dec             : Decoder state
+*
+*  Values Returned : None
+*******************************************************************************/
+IMPEG2D_ERROR_CODES_T impeg2d_dec_d_slice(dec_state_t *ps_dec)
+{
+    UWORD32 i;
+    yuv_buf_t *ps_cur_frm_buf  = &ps_dec->s_cur_frm_buf;
+
+    stream_t   *ps_stream       = &ps_dec->s_bit_stream;
+    UWORD8   *pu1_vld_buf;
+
+    WORD16 i2_dc_diff;
+    UWORD32 u4_frame_width = ps_dec->u2_frame_width;
+    UWORD32 u4_frm_offset = 0;
+    if(ps_dec->u2_picture_structure != FRAME_PICTURE)
+    {
+        u4_frame_width <<= 1;
+        if(ps_dec->u2_picture_structure == BOTTOM_FIELD)
+        {
+            u4_frm_offset = ps_dec->u2_frame_width;
+        }
+    }
+
+    do
+    {
+
+        UWORD32 u4_x_offset, u4_y_offset;
+        UWORD32 u4_blk_pos;
+        WORD16 i2_dc_val;
+
+        UWORD32 u4_dst_x_offset     = u4_frm_offset + (ps_dec->u2_mb_x << 4);
+        UWORD32 u4_dst_y_offset     = (ps_dec->u2_mb_y << 4) * u4_frame_width;
+        UWORD8 *pu1_vld_buf8        = ps_cur_frm_buf->pu1_y + u4_dst_x_offset + u4_dst_y_offset;
+        UWORD32 u4_dst_wd           = u4_frame_width;
+        /*------------------------------------------------------------------*/
+        /* Discard the Macroblock stuffing in case of MPEG-1 stream         */
+        /*------------------------------------------------------------------*/
+        while(impeg2d_bit_stream_nxt(ps_stream,MB_STUFFING_CODE_LEN) == MB_STUFFING_CODE)
+            impeg2d_bit_stream_flush(ps_stream,MB_STUFFING_CODE_LEN);
+
+        /*------------------------------------------------------------------*/
+        /* Flush 2 bits from bitstream [MB_Type and MacroBlockAddrIncrement]*/
+        /*------------------------------------------------------------------*/
+        impeg2d_bit_stream_flush(ps_stream,1);
+
+        if(impeg2d_bit_stream_get(ps_stream, 1) != 0x01)
+        {
+            /* Ignore and continue decoding. */
+        }
+
+        /* Process LUMA blocks of the MB */
+        for(i = 0; i < NUM_LUMA_BLKS; ++i)
+        {
+
+            u4_x_offset    = gai2_impeg2_blk_x_off[i];
+            u4_y_offset    = gai2_impeg2_blk_y_off_frm[i] ;
+            u4_blk_pos     = (u4_y_offset * u4_dst_wd) + u4_x_offset;
+            pu1_vld_buf     = pu1_vld_buf8 + u4_blk_pos;
+
+            i2_dc_diff = impeg2d_get_luma_dc_diff(ps_stream);
+            i2_dc_val = ps_dec->u2_def_dc_pred[Y_LUMA] + i2_dc_diff;
+            ps_dec->u2_def_dc_pred[Y_LUMA] = i2_dc_val;
+            i2_dc_val = CLIP_U8(i2_dc_val);
+
+            ps_dec->pf_memset_8bit_8x8_block(pu1_vld_buf, i2_dc_val, u4_dst_wd);
+        }
+
+
+
+        /* Process U block of the MB */
+
+        u4_dst_x_offset                >>= 1;
+        u4_dst_y_offset                >>= 2;
+        u4_dst_wd                      >>= 1;
+        pu1_vld_buf                     = ps_cur_frm_buf->pu1_u + u4_dst_x_offset + u4_dst_y_offset;
+        i2_dc_diff                     = impeg2d_get_chroma_dc_diff(ps_stream);
+        i2_dc_val                      = ps_dec->u2_def_dc_pred[U_CHROMA] + i2_dc_diff;
+        ps_dec->u2_def_dc_pred[U_CHROMA]    = i2_dc_val;
+        i2_dc_val = CLIP_U8(i2_dc_val);
+        ps_dec->pf_memset_8bit_8x8_block(pu1_vld_buf, i2_dc_val, u4_dst_wd);
+
+
+        /* Process V block of the MB */
+
+        pu1_vld_buf                     = ps_cur_frm_buf->pu1_v + u4_dst_x_offset + u4_dst_y_offset;
+        i2_dc_diff                     = impeg2d_get_chroma_dc_diff(ps_stream);
+        i2_dc_val                      = ps_dec->u2_def_dc_pred[V_CHROMA] + i2_dc_diff;
+        ps_dec->u2_def_dc_pred[V_CHROMA]    = i2_dc_val;
+        i2_dc_val = CLIP_U8(i2_dc_val);
+        ps_dec->pf_memset_8bit_8x8_block(pu1_vld_buf, i2_dc_val, u4_dst_wd);
+
+        /* Common MB processing Steps */
+
+
+        ps_dec->u2_num_mbs_left--;
+        ps_dec->u2_mb_x++;
+
+        if(ps_dec->s_bit_stream.u4_offset > ps_dec->s_bit_stream.u4_max_offset)
+        {
+            return IMPEG2D_BITSTREAM_BUFF_EXCEEDED_ERR;
+        }
+        else if ((ps_dec->u2_mb_x == ps_dec->u2_num_horiz_mb) && ((ps_dec->s_bit_stream.u4_offset + START_CODE_PREFIX_LEN) < ps_dec->s_bit_stream.u4_max_offset))
+        {
+            ps_dec->u2_mb_x = 0;
+            ps_dec->u2_mb_y++;
+
+        }
+
+        /* Flush end of macro block */
+        impeg2d_bit_stream_flush(ps_stream,1);
+    }
+    while(ps_dec->u2_num_mbs_left != 0 && impeg2d_bit_stream_nxt(&ps_dec->s_bit_stream,23) != 0x0);
+    return (IMPEG2D_ERROR_CODES_T)IVD_ERROR_NONE;
+}/* End of impeg2d_dec_d_slice() */

diff --git a/decoder/impeg2d_debug.c b/decoder/impeg2d_debug.c
new file mode 100644
index 0000000..ff33290
--- /dev/null
+++ b/decoder/impeg2d_debug.c

@@ -0,0 +1,509 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+#include <string.h>
+#include <stdio.h>
+
+#include "iv_datatypedef.h"
+#include "iv.h"
+#include "ivd.h"
+#include "impeg2d.h"
+
+#include "impeg2_buf_mgr.h"
+#include "impeg2_disp_mgr.h"
+#include "impeg2_macros.h"
+#include "impeg2_defs.h"
+#include "impeg2_platform_macros.h"
+#include "impeg2_inter_pred.h"
+#include "impeg2_idct.h"
+#include "impeg2_format_conv.h"
+#include "impeg2_mem_func.h"
+#include "impeg2_globals.h"
+
+#include "impeg2d_bitstream.h"
+#include "impeg2d_api.h"
+#include "impeg2d_structs.h"
+#include "impeg2d_debug.h"
+
+#if STATISTICS
+WORD32 gai4_impeg2d_idct_inp_last_nonzero_histogram[64] = {0};
+WORD32 gai4_impeg2d_idct_inp_num_nonzero_histogram[64] = {0};
+WORD32 gai4_impeg2d_idct_inp_last_non_zero_row_histogram[8] = {0};
+
+WORD32 gai4_impeg2d_iqnt_inp_last_nonzero_histogram[64] = {0};
+WORD32 gai4_impeg2d_iqnt_inp_num_nonzero_histogram[64] = {0};
+WORD32 gai4_impeg2d_iqnt_inp_last_non_zero_row_histogram[8] = {0};
+
+WORD32 gi4_impeg2d_idct_inp_only_first_coeff = 0;
+WORD32 gi4_impeg2d_idct_inp_only_last_coeff = 0;
+WORD32 gi4_impeg2d_idct_inp_only_first_n_last_coeff = 0;
+WORD32 gi4_impeg2d_idct_cnt = 0;
+
+
+WORD32 gi4_impeg2d_iqnt_inp_only_first_coeff = 0;
+WORD32 gi4_impeg2d_iqnt_inp_only_last_coeff = 0;
+WORD32 gi4_impeg2d_iqnt_inp_only_first_n_last_coeff = 0;
+WORD32 gi4_impeg2d_iqnt_cnt = 0;
+
+
+void impeg2d_iqnt_inp_statistics(WORD16 *pi2_iqnt_inp,
+                                 WORD32 i4_non_zero_cols,
+                                 WORD32 i4_non_zero_rows)
+{
+    WORD32 i, j;
+    WORD32 i4_last_row = 0, i4_last_col = 0;
+    WORD32 i4_num_non_zero = 0;
+    WORD32 i4_non_zero_cols_computed = 0;
+    WORD32 i4_non_zero_rows_computed = 0;
+
+    for(i = 0; i < 8; i++)
+    {
+        for(j = 0; j < 8; j++)
+        {
+            if(pi2_iqnt_inp[i * 8 + j])
+            {
+                i4_non_zero_cols_computed |= (1 << j);
+                i4_non_zero_rows_computed |= (1 << i);
+            }
+        }
+    }
+
+    if(i4_non_zero_cols_computed != i4_non_zero_cols)
+    {
+        printf("IQ Input: Invalid non_zero_cols 0x%x non_zero_cols_computed 0x%x\n", i4_non_zero_cols, i4_non_zero_cols_computed);
+    }
+    if(i4_non_zero_rows_computed != i4_non_zero_rows)
+    {
+        printf("IQ Input: Invalid non_zero_rows 0x%x non_zero_rows_computed 0x%x\n", i4_non_zero_rows, i4_non_zero_rows_computed);
+    }
+    {
+        WORD32 last_non_zero_row = 32 - CLZ(i4_non_zero_rows);
+        gai4_impeg2d_iqnt_inp_last_non_zero_row_histogram[last_non_zero_row - 1]++;
+    }
+    for(i = 0; i < 8; i++)
+    {
+        for(j = 0; j < 8; j++)
+        {
+            if(pi2_iqnt_inp[i * 8 + j])
+            {
+                i4_last_col = MAX(i4_last_col, j);
+                i4_last_row = MAX(i4_last_row, i);
+                i4_num_non_zero++;
+            }
+        }
+    }
+    gai4_impeg2d_iqnt_inp_last_nonzero_histogram[i4_last_row * 8 + i4_last_col]++;
+    gai4_impeg2d_iqnt_inp_num_nonzero_histogram[i4_num_non_zero]++;
+    gi4_impeg2d_iqnt_cnt++;
+    /* Check if only (0,0) and (7,7) are non zero */
+    if(i4_num_non_zero == 1)
+    {
+        if(pi2_iqnt_inp[7 * 8 + 7])
+            gi4_impeg2d_iqnt_inp_only_last_coeff++;
+    }
+    if(i4_num_non_zero == 1)
+    {
+        if(pi2_iqnt_inp[0])
+            gi4_impeg2d_iqnt_inp_only_first_coeff++;
+    }
+
+    if(i4_num_non_zero == 2)
+    {
+        if((pi2_iqnt_inp[0]) && (1 == pi2_iqnt_inp[7 * 8 + 7]))
+            gi4_impeg2d_iqnt_inp_only_first_n_last_coeff++;
+    }
+}
+
+void impeg2d_idct_inp_statistics(WORD16 *pi2_idct_inp,
+                                 WORD32 i4_non_zero_cols,
+                                 WORD32 i4_non_zero_rows)
+{
+    WORD32 i, j;
+    WORD32 i4_last_row = 0, i4_last_col = 0;
+    WORD32 i4_num_non_zero = 0;
+    WORD32 i4_non_zero_cols_computed = 0;
+    WORD32 i4_non_zero_rows_computed = 0;
+
+    for(i = 0; i < 8; i++)
+    {
+        for(j = 0; j < 8; j++)
+        {
+            if(pi2_idct_inp[i * 8 + j])
+            {
+                i4_non_zero_cols_computed |= (1 << j);
+                i4_non_zero_rows_computed |= (1 << i);
+            }
+        }
+    }
+
+    if(i4_non_zero_cols_computed != i4_non_zero_cols)
+    {
+        printf("IDCT Input: Invalid non_zero_cols 0x%x non_zero_cols_computed 0x%x\n", i4_non_zero_cols, i4_non_zero_cols_computed);
+    }
+    if(i4_non_zero_rows_computed != i4_non_zero_rows)
+    {
+        printf("IDCT Input: Invalid non_zero_rows 0x%x non_zero_rows_computed 0x%x\n", i4_non_zero_rows, i4_non_zero_rows_computed);
+    }
+
+    {
+        WORD32 last_non_zero_row = 32 - CLZ(i4_non_zero_rows);
+        gai4_impeg2d_idct_inp_last_non_zero_row_histogram[last_non_zero_row - 1]++;
+    }
+
+    for(i = 0; i < 8; i++)
+    {
+        for(j = 0; j < 8; j++)
+        {
+            if(pi2_idct_inp[i * 8 + j])
+            {
+                i4_last_col = MAX(i4_last_col, j);
+                i4_last_row = MAX(i4_last_row, i);
+                i4_num_non_zero++;
+            }
+        }
+    }
+    gai4_impeg2d_idct_inp_last_nonzero_histogram[i4_last_row * 8 + i4_last_col]++;
+    gai4_impeg2d_idct_inp_num_nonzero_histogram[i4_num_non_zero]++;
+    gi4_impeg2d_idct_cnt++;
+    /* Check if only (0,0) and (7,7) are non zero */
+    if(i4_num_non_zero == 1)
+    {
+        if(pi2_idct_inp[7 * 8 + 7])
+            gi4_impeg2d_idct_inp_only_last_coeff++;
+    }
+    if(i4_num_non_zero == 1)
+    {
+        if(pi2_idct_inp[0])
+            gi4_impeg2d_idct_inp_only_first_coeff++;
+    }
+
+    if(i4_num_non_zero == 2)
+    {
+        if((pi2_idct_inp[0]) && (1 == pi2_idct_inp[7 * 8 + 7]))
+            gi4_impeg2d_idct_inp_only_first_n_last_coeff++;
+    }
+}
+void impeg2d_print_idct_inp_statistics()
+{
+    WORD32 i, j;
+    WORD32 i4_sum;
+    WORD32 i4_accumulator;
+    i4_sum = 0;
+    for(i = 0; i < 8; i++)
+    {
+        for(j = 0; j < 8; j++)
+        {
+            i4_sum += gai4_impeg2d_idct_inp_last_nonzero_histogram[i * 8 + j];
+        }
+    }
+    printf("IDCT input : Only last coeff non-zero %8.2f\n", (gi4_impeg2d_idct_inp_only_last_coeff * 100.0) / gi4_impeg2d_idct_cnt);
+    printf("IDCT input : Only first coeff non-zero (Includes DC + mismatch) %8.2f\n", (gi4_impeg2d_idct_inp_only_first_coeff * 100.0) / gi4_impeg2d_idct_cnt);
+
+    printf("IDCT input : Last non-zero coeff histogram\n");
+    for(i = 0; i < 8; i++)
+    {
+        for(j = 0; j < 8; j++)
+        {
+            double val = gai4_impeg2d_idct_inp_last_nonzero_histogram[i * 8 + j] * 100.0 / i4_sum;
+            printf("%8.2f \t", val);
+
+        }
+        printf("\n");
+    }
+
+    printf("IDCT input : Cumulative Last non-zero coeff histogram\n");
+    i4_accumulator = 0;
+    for(i = 0; i < 8; i++)
+    {
+        for(j = 0; j < 8; j++)
+        {
+            double val;
+            i4_accumulator += gai4_impeg2d_idct_inp_last_nonzero_histogram[i * 8 + j];
+            val = i4_accumulator * 100.0 / i4_sum;
+
+            printf("%8.2f \t", val);
+
+        }
+        printf("\n");
+    }
+
+
+
+    printf("IDCT input : Number of non-zero coeff histogram\n");
+    i4_sum = 0;
+    for(i = 0; i < 8; i++)
+    {
+        for(j = 0; j < 8; j++)
+        {
+            i4_sum += gai4_impeg2d_idct_inp_num_nonzero_histogram[i * 8 + j];
+        }
+    }
+    for(i = 0; i < 8; i++)
+    {
+        for(j = 0; j < 8; j++)
+        {
+            double val = gai4_impeg2d_idct_inp_num_nonzero_histogram[i * 8 + j] * 100.0 / i4_sum;
+            printf("%8.2f \t", val);
+
+        }
+        printf("\n");
+    }
+
+    printf("IDCT input : Cumulative number of non-zero coeffs histogram\n");
+    i4_accumulator = 0;
+    for(i = 0; i < 8; i++)
+    {
+        for(j = 0; j < 8; j++)
+        {
+            double val;
+            i4_accumulator += gai4_impeg2d_idct_inp_num_nonzero_histogram[i * 8 + j];
+            val = i4_accumulator * 100.0 / i4_sum;
+            printf("%8.2f \t", val);
+
+        }
+        printf("\n");
+    }
+
+    printf("IDCT input : Last non-zero row histogram\n");
+
+
+    {
+        i4_accumulator = 0;
+        for(i = 0; i < 8; i++)
+        {
+            i4_accumulator += gai4_impeg2d_idct_inp_last_non_zero_row_histogram[i];
+        }
+        for(i = 0; i < 8; i++)
+        {
+            double val = gai4_impeg2d_idct_inp_last_non_zero_row_histogram[i] * 100.0 / i4_accumulator;
+            printf("%8.2f \t", val);
+        }
+        printf("\n");
+    }
+
+
+
+
+}
+
+void impeg2d_print_iqnt_inp_statistics()
+{
+    WORD32 i, j;
+    WORD32 i4_sum;
+    WORD32 i4_accumulator;
+    i4_sum = 0;
+    for(i = 0; i < 8; i++)
+    {
+        for(j = 0; j < 8; j++)
+        {
+            i4_sum += gai4_impeg2d_iqnt_inp_last_nonzero_histogram[i * 8 + j];
+        }
+    }
+    printf("IQnt input : Only last coeff non-zero %8.2f\n", (gi4_impeg2d_iqnt_inp_only_last_coeff * 100.0) / gi4_impeg2d_iqnt_cnt);
+    printf("IQnt input : Only first coeff non-zero (Includes DC + mismatch) %8.2f\n", (gi4_impeg2d_iqnt_inp_only_first_coeff * 100.0) / gi4_impeg2d_idct_cnt);
+
+    printf("IQnt input : Last non-zero coeff histogram\n");
+    for(i = 0; i < 8; i++)
+    {
+        for(j = 0; j < 8; j++)
+        {
+            double val = gai4_impeg2d_iqnt_inp_last_nonzero_histogram[i * 8 + j] * 100.0 / i4_sum;
+            printf("%8.2f \t", val);
+
+        }
+        printf("\n");
+    }
+
+    printf("IQnt input : Cumulative Last non-zero coeff histogram\n");
+    i4_accumulator = 0;
+    for(i = 0; i < 8; i++)
+    {
+        for(j = 0; j < 8; j++)
+        {
+            double val;
+            i4_accumulator += gai4_impeg2d_iqnt_inp_last_nonzero_histogram[i * 8 + j];
+            val = i4_accumulator * 100.0 / i4_sum;
+
+            printf("%8.2f \t", val);
+
+        }
+        printf("\n");
+    }
+
+
+
+    printf("IQnt input : Number of non-zero coeff histogram\n");
+    i4_sum = 0;
+    for(i = 0; i < 8; i++)
+    {
+        for(j = 0; j < 8; j++)
+        {
+            i4_sum += gai4_impeg2d_iqnt_inp_num_nonzero_histogram[i * 8 + j];
+        }
+    }
+    for(i = 0; i < 8; i++)
+    {
+        for(j = 0; j < 8; j++)
+        {
+            double val = gai4_impeg2d_iqnt_inp_num_nonzero_histogram[i * 8 + j] * 100.0 / i4_sum;
+            printf("%8.2f \t", val);
+
+        }
+        printf("\n");
+    }
+
+    printf("IQnt input : Cumulative number of non-zero coeffs histogram\n");
+    i4_accumulator = 0;
+    for(i = 0; i < 8; i++)
+    {
+        for(j = 0; j < 8; j++)
+        {
+            double val;
+            i4_accumulator += gai4_impeg2d_iqnt_inp_num_nonzero_histogram[i * 8 + j];
+            val = i4_accumulator * 100.0 / i4_sum;
+            printf("%8.2f \t", val);
+
+        }
+        printf("\n");
+    }
+
+    printf("IQnt input : Last non-zero row histogram\n");
+
+
+    {
+        i4_accumulator = 0;
+        for(i = 0; i < 8; i++)
+        {
+            i4_accumulator += gai4_impeg2d_iqnt_inp_last_non_zero_row_histogram[i];
+        }
+        for(i = 0; i < 8; i++)
+        {
+            double val = gai4_impeg2d_iqnt_inp_last_non_zero_row_histogram[i] * 100.0 / i4_accumulator;
+            printf("%8.2f \t", val);
+        }
+        printf("\n");
+    }
+
+}
+
+void impeg2d_print_statistics()
+{
+    impeg2d_print_idct_inp_statistics();
+    impeg2d_print_iqnt_inp_statistics();
+}
+
+
+#endif
+
+#if DEBUG_MB
+
+static UWORD32  u4_debug_frm = 12;
+static UWORD32  u4_debug_mb_x = 3;
+static UWORD32  u4_debug_mb_y = 0;
+
+static UWORD32  u4_debug_frm_num = 0;
+
+/*****************************************************************************/
+/*                                                                           */
+/*  Function Name : example_of_a_function                                    */
+/*                                                                           */
+/*  Description   : This function illustrates the use of C coding standards. */
+/*                  switch/case, if, for, block comments have been shown     */
+/*                  here.                                                    */
+/*  Inputs        : <What inputs does the function take?>                    */
+/*  Globals       : <Does it use any global variables?>                      */
+/*  Processing    : <Describe how the function operates - include algorithm  */
+/*                  description>                                             */
+/*  Outputs       : <What does the function produce?>                        */
+/*  Returns       : <What does the function return?>                         */
+/*                                                                           */
+/*  Issues        : <List any issues or problems with this function>         */
+/*                                                                           */
+/*  Revision History:                                                        */
+/*                                                                           */
+/*         DD MM YYYY   Author(s)       Changes (Describe the changes made)  */
+/*         13 07 2002   Ittiam          Draft                                */
+/*                                                                           */
+/*****************************************************************************/
+void impeg2d_trace_mb_start(UWORD32 u4_mb_x, UWORD32 u4_mb_y)
+{
+    UWORD32 u4_frm_num = impeg2d_frm_num_get();
+
+   if(u4_frm_num == u4_debug_frm && u4_mb_x == u4_debug_mb_x &&  u4_mb_y == u4_debug_mb_y)
+   {
+//       printf("");
+   }
+}
+
+/*****************************************************************************/
+/*                                                                           */
+/*  Function Name : example_of_a_function                                    */
+/*                                                                           */
+/*  Description   : This function illustrates the use of C coding standards. */
+/*                  switch/case, if, for, block comments have been shown     */
+/*                  here.                                                    */
+/*  Inputs        : <What inputs does the function take?>                    */
+/*  Globals       : <Does it use any global variables?>                      */
+/*  Processing    : <Describe how the function operates - include algorithm  */
+/*                  description>                                             */
+/*  Outputs       : <What does the function produce?>                        */
+/*  Returns       : <What does the function return?>                         */
+/*                                                                           */
+/*  Issues        : <List any issues or problems with this function>         */
+/*                                                                           */
+/*  Revision History:                                                        */
+/*                                                                           */
+/*         DD MM YYYY   Author(s)       Changes (Describe the changes made)  */
+/*         13 07 2002   Ittiam          Draft                                */
+/*                                                                           */
+/*****************************************************************************/
+void impeg2d_frm_num_set(void)
+{
+    u4_debug_frm_num++;
+}
+
+
+/*****************************************************************************/
+/*                                                                           */
+/*  Function Name : example_of_a_function                                    */
+/*                                                                           */
+/*  Description   : This function illustrates the use of C coding standards. */
+/*                  switch/case, if, for, block comments have been shown     */
+/*                  here.                                                    */
+/*  Inputs        : <What inputs does the function take?>                    */
+/*  Globals       : <Does it use any global variables?>                      */
+/*  Processing    : <Describe how the function operates - include algorithm  */
+/*                  description>                                             */
+/*  Outputs       : <What does the function produce?>                        */
+/*  Returns       : <What does the function return?>                         */
+/*                                                                           */
+/*  Issues        : <List any issues or problems with this function>         */
+/*                                                                           */
+/*  Revision History:                                                        */
+/*                                                                           */
+/*         DD MM YYYY   Author(s)       Changes (Describe the changes made)  */
+/*         13 07 2002   Ittiam          Draft                                */
+/*                                                                           */
+/*****************************************************************************/
+UWORD32 impeg2d_frm_num_get(void)
+{
+    return(u4_debug_frm_num);
+}
+
+#endif

diff --git a/decoder/impeg2d_debug.h b/decoder/impeg2d_debug.h
new file mode 100644
index 0000000..5780427
--- /dev/null
+++ b/decoder/impeg2d_debug.h

@@ -0,0 +1,121 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+/*****************************************************************************/
+/*                                                                           */
+/*  File Name         : c_coding_example.h                                   */
+/*                                                                           */
+/*  Description       : This file contains all the necessary examples to     */
+/*                      establish a consistent use of Ittiam C coding        */
+/*                      standards (based on Indian Hill C Standards)         */
+/*                                                                           */
+/*  List of Functions : <List the functions defined in this file>            */
+/*                                                                           */
+/*  Issues / Problems : None                                                 */
+/*                                                                           */
+/*  Revision History  :                                                      */
+/*                                                                           */
+/*         DD MM YYYY   Author(s)       Changes (Describe the changes made)  */
+/*         10 01 2005   Ittiam          Draft                                */
+/*                                                                           */
+/*****************************************************************************/
+#ifndef __IMPEG2D_DEBUG_H__
+#define __IMPEG2D_DEBUG_H__
+
+
+/*************************************************************************/
+/* DEBUG                                                                 */
+/*************************************************************************/
+#define DEBUG_MB        0
+
+
+#if DEBUG_MB
+void impeg2d_trace_mb_start(UWORD32 mb_x, UWORD32 mb_y);
+void impeg2d_frm_num_set(void);
+UWORD32 impeg2d_frm_num_get(void);
+
+#define IMPEG2D_TRACE_MB_START(mb_x, mb_y) void impeg2d_trace_mb_start(UWORD32 mb_x, UWORD32 mb_y);
+#define IMPEG2D_FRM_NUM_SET()              void impeg2d_frm_num_set(void);
+#define IMPEG2D_FRM_NUM_GET()              UWORD32 impeg2d_frm_num_get(void);
+#else
+#define IMPEG2D_TRACE_MB_START(mb_x, mb_y)
+#define IMPEG2D_FRM_NUM_SET()
+#define IMPEG2D_FRM_NUM_GET()
+#endif
+
+
+#define STATISTICS  0
+
+#if STATISTICS
+void impeg2d_idct_inp_statistics(WORD16 *pi2_idct_inp, WORD32 non_zero_cols, WORD32 non_zero_rows);
+void impeg2d_iqnt_inp_statistics(WORD16 *pi2_iqnt_inp, WORD32 non_zero_cols, WORD32 non_zero_rows);
+void impeg2d_print_statistics(void);
+#define IMPEG2D_IDCT_INP_STATISTICS(pi2_idct_inp, non_zero_cols, non_zero_rows)  impeg2d_idct_inp_statistics(pi2_idct_inp, non_zero_cols, non_zero_rows)
+#define IMPEG2D_IQNT_INP_STATISTICS(pi2_iqnt_inp, non_zero_cols, non_zero_rows)  impeg2d_iqnt_inp_statistics(pi2_iqnt_inp, non_zero_cols, non_zero_rows)
+#define IMPEG2D_PRINT_STATISTICS()            impeg2d_print_statistics()
+#else
+#define IMPEG2D_IDCT_INP_STATISTICS(pi2_idct_inp, non_zero_cols, non_zero_rows)
+#define IMPEG2D_IQNT_INP_STATISTICS(pi2_iqnt_inp, non_zero_cols, non_zero_rows)
+#define IMPEG2D_PRINT_STATISTICS()
+#endif
+
+
+#if 0
+#define PROFILE_DIS_SKIP_MB
+#define PROFILE_DIS_MC
+#define PROFILE_DIS_INVQUANT
+#define PROFILE_DIS_IDCT
+#define PROFILE_DIS_MEMSET_RESBUF
+#endif
+
+
+#ifdef PROFILE_DIS_SKIP_MB
+#define PROFILE_DISABLE_SKIP_MB() return;
+#else
+#define PROFILE_DISABLE_SKIP_MB()
+#endif
+
+#ifdef PROFILE_DIS_MC
+#define PROFILE_DISABLE_MC_IF0 if(0)
+#define PROFILE_DISABLE_MC_RETURN return;
+#else
+#define PROFILE_DISABLE_MC_IF0
+#define PROFILE_DISABLE_MC_RETURN
+#endif
+
+#ifdef PROFILE_DIS_INVQUANT
+#define PROFILE_DISABLE_INVQUANT_IF0 if(0)
+#else
+#define PROFILE_DISABLE_INVQUANT_IF0
+#endif
+
+#ifdef PROFILE_DIS_IDCT
+#define PROFILE_DISABLE_IDCT_IF0 if(0)
+#else
+#define PROFILE_DISABLE_IDCT_IF0
+#endif
+
+#ifdef PROFILE_DIS_MEMSET_RESBUF
+#define PROFILE_DISABLE_MEMSET_RESBUF_IF0 if(0)
+#else
+#define PROFILE_DISABLE_MEMSET_RESBUF_IF0
+#endif
+
+
+#endif /* __IMPEG2D_DEBUG_H__ */

diff --git a/decoder/impeg2d_dec_hdr.c b/decoder/impeg2d_dec_hdr.c
new file mode 100644
index 0000000..15e61fb
--- /dev/null
+++ b/decoder/impeg2d_dec_hdr.c

@@ -0,0 +1,1733 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+#include <string.h>
+
+#include "iv_datatypedef.h"
+#include "iv.h"
+#include "ivd.h"
+#include "impeg2_macros.h"
+#include "impeg2_buf_mgr.h"
+#include "impeg2_disp_mgr.h"
+#include "impeg2_defs.h"
+#include "impeg2_inter_pred.h"
+#include "impeg2_idct.h"
+#include "impeg2_format_conv.h"
+#include "impeg2_mem_func.h"
+#include "impeg2_platform_macros.h"
+#include "ithread.h"
+#include "impeg2_job_queue.h"
+
+#include "impeg2d.h"
+#include "impeg2d_bitstream.h"
+#include "impeg2d_api.h"
+#include "impeg2d_structs.h"
+#include "impeg2_globals.h"
+#include "impeg2d_pic_proc.h"
+
+
+
+/******************************************************************************
+*  Function Name   : impeg2d_next_start_code
+*
+*  Description     : Peek for next_start_code from the stream_t.
+*
+*  Arguments       :
+*  dec             : Decoder Context
+*
+*  Values Returned : None
+******************************************************************************/
+void impeg2d_next_start_code(dec_state_t *ps_dec)
+{
+    stream_t *ps_stream;
+    ps_stream = &ps_dec->s_bit_stream;
+    impeg2d_bit_stream_flush_to_byte_boundary(ps_stream);
+
+    while ((impeg2d_bit_stream_nxt(ps_stream,START_CODE_PREFIX_LEN) != START_CODE_PREFIX)
+        && (ps_dec->s_bit_stream.u4_offset < ps_dec->s_bit_stream.u4_max_offset))
+    {
+        impeg2d_bit_stream_get(ps_stream,8);
+    }
+    return;
+}
+/******************************************************************************
+*  Function Name   : impeg2d_next_code
+*
+*  Description     : Peek for next_start_code from the stream_t.
+*
+*  Arguments       :
+*  dec             : Decoder Context
+*
+*  Values Returned : None
+******************************************************************************/
+void impeg2d_next_code(dec_state_t *ps_dec, UWORD32 u4_start_code_val)
+{
+    stream_t *ps_stream;
+    ps_stream = &ps_dec->s_bit_stream;
+    impeg2d_bit_stream_flush_to_byte_boundary(ps_stream);
+
+    while ((impeg2d_bit_stream_nxt(ps_stream,START_CODE_LEN) != u4_start_code_val)
+        && (ps_dec->s_bit_stream.u4_offset <= ps_dec->s_bit_stream.u4_max_offset))
+    {
+
+        if (impeg2d_bit_stream_get(ps_stream,8) != 0)
+        {
+            /* Ignore stuffing bit errors. */
+        }
+
+    }
+    return;
+}
+/******************************************************************************
+*  Function Name   : impeg2d_peek_next_start_code
+*
+*  Description     : Peek for next_start_code from the stream_t.
+*
+*  Arguments       :
+*  dec             : Decoder Context
+*
+*  Values Returned : None
+******************************************************************************/
+void impeg2d_peek_next_start_code(dec_state_t *ps_dec)
+{
+    stream_t *ps_stream;
+    ps_stream = &ps_dec->s_bit_stream;
+    impeg2d_bit_stream_flush_to_byte_boundary(ps_stream);
+
+    while ((impeg2d_bit_stream_nxt(ps_stream,START_CODE_PREFIX_LEN) != START_CODE_PREFIX)
+        && (ps_dec->s_bit_stream.u4_offset <= ps_dec->s_bit_stream.u4_max_offset))
+    {
+        impeg2d_bit_stream_get(ps_stream,8);
+    }
+    return;
+}
+/******************************************************************************
+*
+*  Function Name   : impeg2d_dec_seq_hdr
+*
+*  Description     : Decodes Sequence header information
+*
+*  Arguments       :
+*  dec             : Decoder Context
+*
+*  Values Returned : None
+******************************************************************************/
+IMPEG2D_ERROR_CODES_T impeg2d_dec_seq_hdr(dec_state_t *ps_dec)
+{
+    stream_t *ps_stream;
+    ps_stream = &ps_dec->s_bit_stream;
+    UWORD16 u2_height;
+    UWORD16 u2_width;
+
+    if (impeg2d_bit_stream_nxt(ps_stream,START_CODE_LEN) != SEQUENCE_HEADER_CODE)
+    {
+        impeg2d_bit_stream_flush(ps_stream,START_CODE_LEN);
+        return IMPEG2D_FRM_HDR_START_CODE_NOT_FOUND;
+
+    }
+    impeg2d_bit_stream_flush(ps_stream,START_CODE_LEN);
+
+    u2_width    = impeg2d_bit_stream_get(ps_stream,12);
+    u2_height   = impeg2d_bit_stream_get(ps_stream,12);
+
+    if ((u2_width != ps_dec->u2_horizontal_size)
+                    || (u2_height != ps_dec->u2_vertical_size))
+    {
+        if (0 == ps_dec->u2_header_done)
+        {
+            /* This is the first time we are reading the resolution */
+            ps_dec->u2_horizontal_size = u2_width;
+            ps_dec->u2_vertical_size = u2_height;
+            if (0 == ps_dec->u4_frm_buf_stride)
+            {
+                ps_dec->u4_frm_buf_stride  = (UWORD32) ALIGN16(u2_width);
+            }
+        }
+        else
+        {
+            if((u2_width > ps_dec->u2_create_max_width)
+                            || (u2_height > ps_dec->u2_create_max_height))
+            {
+                IMPEG2D_ERROR_CODES_T e_error = IMPEG2D_UNSUPPORTED_DIMENSIONS;
+
+                ps_dec->u2_reinit_max_height   = u2_height;
+                ps_dec->u2_reinit_max_width    = u2_width;
+
+                return e_error;
+            }
+            else
+            {
+                /* The resolution has changed */
+                return (IMPEG2D_ERROR_CODES_T)IVD_RES_CHANGED;
+            }
+        }
+    }
+
+    if((ps_dec->u2_horizontal_size > ps_dec->u2_create_max_width)
+                    || (ps_dec->u2_vertical_size > ps_dec->u2_create_max_height))
+    {
+        IMPEG2D_ERROR_CODES_T e_error = IMPEG2D_UNSUPPORTED_DIMENSIONS;
+        return SET_IVD_FATAL_ERROR(e_error);
+    }
+
+
+    /*------------------------------------------------------------------------*/
+    /* Flush the following as they are not being used                         */
+    /* aspect_ratio_info (4 bits)                                             */
+    /*------------------------------------------------------------------------*/
+    ps_dec->u2_aspect_ratio_info = impeg2d_bit_stream_get(ps_stream,4);
+
+    /*------------------------------------------------------------------------*/
+    /* Frame rate code(4 bits)                                                */
+    /*------------------------------------------------------------------------*/
+    ps_dec->u2_frame_rate_code = impeg2d_bit_stream_get(ps_stream,4);
+    /*------------------------------------------------------------------------*/
+    /* Flush the following as they are not being used                         */
+    /* bit_rate_value (18 bits)                                               */
+    /*------------------------------------------------------------------------*/
+    impeg2d_bit_stream_flush(ps_stream,18);
+    GET_MARKER_BIT(ps_dec,ps_stream);
+    /*------------------------------------------------------------------------*/
+    /* Flush the following as they are not being used                         */
+    /* vbv_buffer_size_value(10 bits), constrained_parameter_flag (1 bit)     */
+    /*------------------------------------------------------------------------*/
+    impeg2d_bit_stream_flush(ps_stream,11);
+
+    /*------------------------------------------------------------------------*/
+    /* Quantization matrix for the intra blocks                               */
+    /*------------------------------------------------------------------------*/
+    if(impeg2d_bit_stream_get_bit(ps_stream) == 1)
+    {
+        UWORD16 i;
+        for(i = 0; i < NUM_PELS_IN_BLOCK; i++)
+        {
+            ps_dec->au1_intra_quant_matrix[gau1_impeg2_inv_scan_zig_zag[i]] =  (UWORD8)impeg2d_bit_stream_get(ps_stream,8);
+        }
+
+    }
+    else
+    {
+        memcpy(ps_dec->au1_intra_quant_matrix,gau1_impeg2_intra_quant_matrix_default,
+                NUM_PELS_IN_BLOCK);
+    }
+
+    /*------------------------------------------------------------------------*/
+    /* Quantization matrix for the inter blocks                               */
+    /*------------------------------------------------------------------------*/
+    if(impeg2d_bit_stream_get_bit(ps_stream) == 1)
+    {
+        UWORD16 i;
+        for(i = 0; i < NUM_PELS_IN_BLOCK; i++)
+        {
+            ps_dec->au1_inter_quant_matrix[gau1_impeg2_inv_scan_zig_zag[i]] =   (UWORD8)impeg2d_bit_stream_get(ps_stream,8);
+        }
+    }
+    else
+    {
+        memcpy(ps_dec->au1_inter_quant_matrix,gau1_impeg2_inter_quant_matrix_default,
+            NUM_PELS_IN_BLOCK);
+    }
+    impeg2d_next_start_code(ps_dec);
+
+    return (IMPEG2D_ERROR_CODES_T)IVD_ERROR_NONE;
+}
+
+/******************************************************************************
+*
+*  Function Name   : impeg2d_dec_seq_ext
+*
+*  Description     : Gets additional sequence data.
+*
+*  Arguments       :
+*  dec             : Decoder Context
+*
+*  Values Returned : None
+******************************************************************************/
+IMPEG2D_ERROR_CODES_T impeg2d_dec_seq_ext(dec_state_t *ps_dec)
+{
+    stream_t *ps_stream;
+
+    ps_stream = &ps_dec->s_bit_stream;
+
+    if (impeg2d_bit_stream_nxt(ps_stream,START_CODE_LEN) != EXTENSION_START_CODE)
+    {
+        impeg2d_bit_stream_flush(ps_stream,START_CODE_LEN);
+        return IMPEG2D_FRM_HDR_START_CODE_NOT_FOUND;
+
+    }
+    /* Flush the extension start code */
+    impeg2d_bit_stream_flush(ps_stream,START_CODE_LEN);
+
+    /* Flush extension start code identifier */
+    impeg2d_bit_stream_flush(ps_stream,4);
+
+    /*----------------------------------------------------------------------*/
+    /* Profile and Level information                                        */
+    /*----------------------------------------------------------------------*/
+    {
+        UWORD32   u4_esc_bit, u4_profile, u4_level;
+
+        /* Read the profile and level information */
+        /* check_profile_and_level: Table 8-1     */
+        /* [7:7] 1 Escape bit                     */
+        /* [6:4] 3 Profile identification         */
+        /* [3:0] 4 Level identification           */
+
+        u4_esc_bit   = impeg2d_bit_stream_get_bit(ps_stream);
+        u4_profile   = impeg2d_bit_stream_get(ps_stream,3);
+        u4_level     = impeg2d_bit_stream_get(ps_stream,4);
+        UNUSED(u4_profile);
+        UNUSED(u4_level);
+        /*
+        if( escBit == 1                   ||
+            profile < MPEG2_MAIN_PROFILE  ||
+            level < MPEG2_MAIN_LEVEL)
+            */
+        if (1 == u4_esc_bit)
+        {
+            return IMPEG2D_PROF_LEVEL_NOT_SUPPORTED;
+        }
+    }
+
+    ps_dec->u2_progressive_sequence = impeg2d_bit_stream_get_bit(ps_stream);
+
+    /* Read the chrominance format */
+    if(impeg2d_bit_stream_get(ps_stream,2) != 0x1)
+        return IMPEG2D_CHROMA_FMT_NOT_SUP;
+
+    /* Read the 2 most significant bits from horizontal_size */
+    ps_dec->u2_horizontal_size    += (impeg2d_bit_stream_get(ps_stream,2) << 12);
+
+    /* Read the 2 most significant bits from vertical_size */
+    ps_dec->u2_vertical_size      += (impeg2d_bit_stream_get(ps_stream,2) << 12);
+
+    /*-----------------------------------------------------------------------*/
+    /* Flush the following as they are not used now                          */
+    /* bit_rate_extension          12                                        */
+    /* marker_bit                   1                                        */
+    /* vbv_buffer_size_extension    8                                        */
+    /* low_delay                    1                                        */
+    /*-----------------------------------------------------------------------*/
+    impeg2d_bit_stream_flush(ps_stream,12);
+    GET_MARKER_BIT(ps_dec,ps_stream);
+    impeg2d_bit_stream_flush(ps_stream,9);
+    /*-----------------------------------------------------------------------*/
+    /* frame_rate_extension_n       2                                        */
+    /* frame_rate_extension_d       5                                        */
+    /*-----------------------------------------------------------------------*/
+    ps_dec->u2_frame_rate_extension_n = impeg2d_bit_stream_get(ps_stream,2);
+    ps_dec->u2_frame_rate_extension_d = impeg2d_bit_stream_get(ps_stream,5);
+
+    return (IMPEG2D_ERROR_CODES_T)IVD_ERROR_NONE;
+}
+
+/*******************************************************************************
+*
+*  Function Name   : impeg2d_dec_seq_disp_ext
+*
+*  Description     : This function is eqvt to sequence_display_extension() of
+*                    standard. It flushes data present as it is not being used
+*
+*  Arguments       :
+*  dec             : Decoder Context
+*
+*  Values Returned : None
+******************************************************************************/
+void impeg2d_dec_seq_disp_ext(dec_state_t *ps_dec)
+{
+    stream_t *ps_stream;
+    ps_stream = &ps_dec->s_bit_stream;
+
+    /*
+    sequence_display_extension()
+    {
+        extension_start_code_identifier 4
+        video_format                    3
+        colour_description              1
+        if (colour_description)
+        {
+            colour_primaries            8
+            transfer_characteristics    8
+            matrix_coefficients         8
+        }
+        display_horizontal_size         14
+        marker_bit                      1
+        display_vertical_size           14
+        next_start_code()
+    }
+    */
+
+    impeg2d_bit_stream_get(ps_stream,7);
+    if (impeg2d_bit_stream_get_bit(ps_stream) == 1)
+    {
+        impeg2d_bit_stream_get(ps_stream,24);
+    }
+
+    /* display_horizontal_size and display_vertical_size */
+    ps_dec->u2_display_horizontal_size = impeg2d_bit_stream_get(ps_stream,14);;
+    GET_MARKER_BIT(ps_dec,ps_stream);
+    ps_dec->u2_display_vertical_size   = impeg2d_bit_stream_get(ps_stream,14);
+
+    impeg2d_next_start_code(ps_dec);
+}
+
+
+/*******************************************************************************
+*
+*  Function Name   : impeg2d_dec_seq_scale_ext
+*
+*  Description     : This function is eqvt to sequence_scalable_extension() of
+*                    standard.
+*
+*  Arguments       : Decoder context
+*
+*  Values Returned : None
+*******************************************************************************/
+IMPEG2D_ERROR_CODES_T impeg2d_dec_seq_scale_ext(dec_state_t *ps_dec)
+{
+    UNUSED(ps_dec);
+    return IMPEG2D_SCALABILITIY_NOT_SUPPORTED;
+}
+
+/*******************************************************************************
+*
+*  Function Name   : impeg2d_dec_quant_matrix_ext
+*
+*  Description     : Gets Intra and NonIntra quantizer matrix from the stream.
+*
+*  Arguments       : Decoder context
+*
+*  Values Returned : None
+*******************************************************************************/
+void impeg2d_dec_quant_matrix_ext(dec_state_t *ps_dec)
+{
+    stream_t *ps_stream;
+
+    ps_stream = &ps_dec->s_bit_stream;
+    /* Flush extension_start_code_identifier */
+    impeg2d_bit_stream_flush(ps_stream,4);
+
+    /*------------------------------------------------------------------------*/
+    /* Quantization matrix for the intra blocks                               */
+    /*------------------------------------------------------------------------*/
+    if(impeg2d_bit_stream_get(ps_stream,1) == 1)
+    {
+        UWORD16 i;
+        for(i = 0; i < NUM_PELS_IN_BLOCK; i++)
+        {
+            ps_dec->au1_intra_quant_matrix[gau1_impeg2_inv_scan_zig_zag[i]] =  (UWORD8)impeg2d_bit_stream_get(ps_stream,8);
+        }
+
+    }
+
+
+    /*------------------------------------------------------------------------*/
+    /* Quantization matrix for the inter blocks                               */
+    /*------------------------------------------------------------------------*/
+    if(impeg2d_bit_stream_get(ps_stream,1) == 1)
+    {
+        UWORD16 i;
+        for(i = 0; i < NUM_PELS_IN_BLOCK; i++)
+        {
+            ps_dec->au1_inter_quant_matrix[gau1_impeg2_inv_scan_zig_zag[i]] =   (UWORD8)impeg2d_bit_stream_get(ps_stream,8);
+        }
+    }
+
+    /* Note : chroma intra quantizer matrix and chroma non
+    intra quantizer matrix are not needed for 4:2:0 format */
+    impeg2d_next_start_code(ps_dec);
+}
+/*******************************************************************************
+*
+*  Function Name   : impeg2d_dec_pic_disp_ext
+*
+*  Description     : This function is eqvt to picture_display_extension() of
+*                    standard.The parameters are not used by decoder
+*
+*  Arguments       : Pointer to dec_state_t
+*
+*  Values Returned : Decoder context
+*
+*  Values Returned : None
+*******************************************************************************/
+void impeg2d_dec_pic_disp_ext(dec_state_t *ps_dec)
+{
+    WORD16 i2_number_of_frame_centre_offsets ;
+    stream_t *ps_stream;
+
+    ps_stream = &ps_dec->s_bit_stream;
+    impeg2d_bit_stream_flush(ps_stream,4);
+
+    if (ps_dec->u2_progressive_sequence)
+    {
+        i2_number_of_frame_centre_offsets = (ps_dec->u2_repeat_first_field) ?
+            2 + ps_dec->u2_top_field_first : 1;
+    }
+    else
+    {
+        i2_number_of_frame_centre_offsets =
+            (ps_dec->u2_picture_structure != FRAME_PICTURE) ?
+            1 : 2 + ps_dec->u2_repeat_first_field;
+    }
+    while(i2_number_of_frame_centre_offsets--)
+    {
+        /* frame_centre_horizontal_offset */
+        impeg2d_bit_stream_get(ps_stream,16);
+        GET_MARKER_BIT(ps_dec,ps_stream);
+        /* frame_centre_vertical_offset */
+        impeg2d_bit_stream_get(ps_stream,16);
+        GET_MARKER_BIT(ps_dec,ps_stream);
+    }
+    impeg2d_next_start_code(ps_dec);
+}
+
+/*******************************************************************************
+*
+*  Function Name   : impeg2d_dec_itu_t_ext
+*
+*  Description     : This function is eqvt to ITU-T_extension() of
+*                    standard.The parameters are not used by decoder
+*
+*  Arguments       : Decoder context
+*
+*  Values Returned : None
+*******************************************************************************/
+void impeg2d_dec_itu_t_ext(dec_state_t *ps_dec)
+{
+  impeg2d_bit_stream_flush(&ps_dec->s_bit_stream,EXT_ID_LEN);
+  impeg2d_next_start_code(ps_dec);
+}
+
+/*******************************************************************************
+*  Function Name   : impeg2d_dec_copyright_ext
+*
+*  Description     : This function is eqvt to copyright_extension() of
+*                    standard. The parameters are not used by decoder
+*
+*  Arguments       : Decoder context
+*
+*  Values Returned : None
+*******************************************************************************/
+
+
+void impeg2d_dec_copyright_ext(dec_state_t *ps_dec)
+{
+    UWORD32 u4_bits_to_flush;
+
+    u4_bits_to_flush = COPYRIGHT_EXTENSION_LEN;
+
+    while(u4_bits_to_flush >= 32 )
+    {
+        impeg2d_bit_stream_flush(&ps_dec->s_bit_stream,32);
+        u4_bits_to_flush = u4_bits_to_flush - 32;
+    }
+
+    if(u4_bits_to_flush > 0)
+    {
+        impeg2d_bit_stream_flush(&ps_dec->s_bit_stream,u4_bits_to_flush);
+    }
+
+
+  impeg2d_next_start_code(ps_dec);
+}
+/*******************************************************************************
+*  Function Name   : impeg2d_dec_cam_param_ext
+*
+*  Description     : This function is eqvt to camera_parameters_extension() of
+*                    standard. The parameters are not used by decoder
+*
+*  Arguments       : Decoder context
+*
+*  Values Returned : None
+*******************************************************************************/
+
+
+void impeg2d_dec_cam_param_ext(dec_state_t *ps_dec)
+{
+
+    UWORD32 u4_bits_to_flush;
+
+    u4_bits_to_flush = CAMERA_PARAMETER_EXTENSION_LEN;
+
+    while(u4_bits_to_flush >= 32 )
+    {
+        impeg2d_bit_stream_flush(&ps_dec->s_bit_stream,32);
+        u4_bits_to_flush = u4_bits_to_flush - 32;
+    }
+
+    if(u4_bits_to_flush > 0)
+    {
+        impeg2d_bit_stream_flush(&ps_dec->s_bit_stream,u4_bits_to_flush);
+    }
+
+  impeg2d_next_start_code(ps_dec);
+}
+
+/*******************************************************************************
+*
+*  Function Name   : impeg2d_dec_grp_of_pic_hdr
+*
+*  Description     : Gets information at the GOP level.
+*
+*  Arguments       : Decoder context
+*
+*  Values Returned : None
+*******************************************************************************/
+
+
+void impeg2d_dec_grp_of_pic_hdr(dec_state_t *ps_dec)
+{
+
+    UWORD32 u4_bits_to_flush;
+
+    u4_bits_to_flush = GROUP_OF_PICTURE_LEN;
+
+    while(u4_bits_to_flush >= 32 )
+    {
+        impeg2d_bit_stream_flush(&ps_dec->s_bit_stream,32);
+        u4_bits_to_flush = u4_bits_to_flush - 32;
+    }
+
+    if(u4_bits_to_flush > 0)
+    {
+        impeg2d_bit_stream_flush(&ps_dec->s_bit_stream,u4_bits_to_flush);
+    }
+
+}
+
+
+/*******************************************************************************
+*
+*  Function Name   : impeg2d_dec_pic_hdr
+*
+*  Description     : Gets the picture header information.
+*
+*  Arguments       : Decoder context
+*
+*  Values Returned : None
+*******************************************************************************/
+IMPEG2D_ERROR_CODES_T impeg2d_dec_pic_hdr(dec_state_t *ps_dec)
+{
+    stream_t *ps_stream;
+    ps_stream = &ps_dec->s_bit_stream;
+
+    impeg2d_bit_stream_flush(ps_stream,START_CODE_LEN);
+    /* Flush temporal reference */
+    impeg2d_bit_stream_get(ps_stream,10);
+
+    /* Picture type */
+    ps_dec->e_pic_type = (e_pic_type_t)impeg2d_bit_stream_get(ps_stream,3);
+    if((ps_dec->e_pic_type < I_PIC) || (ps_dec->e_pic_type > D_PIC))
+    {
+        impeg2d_next_code(ps_dec, PICTURE_START_CODE);
+        return IMPEG2D_INVALID_PIC_TYPE;
+    }
+
+    /* Flush vbv_delay */
+    impeg2d_bit_stream_get(ps_stream,16);
+
+    if(ps_dec->e_pic_type == P_PIC || ps_dec->e_pic_type == B_PIC)
+    {
+        ps_dec->u2_full_pel_forw_vector = impeg2d_bit_stream_get_bit(ps_stream);
+        ps_dec->u2_forw_f_code          = impeg2d_bit_stream_get(ps_stream,3);
+    }
+    if(ps_dec->e_pic_type == B_PIC)
+    {
+        ps_dec->u2_full_pel_back_vector = impeg2d_bit_stream_get_bit(ps_stream);
+        ps_dec->u2_back_f_code          = impeg2d_bit_stream_get(ps_stream,3);
+    }
+
+    if(ps_dec->u2_is_mpeg2 == 0)
+    {
+        ps_dec->au2_f_code[0][0] = ps_dec->au2_f_code[0][1] = ps_dec->u2_forw_f_code;
+        ps_dec->au2_f_code[1][0] = ps_dec->au2_f_code[1][1] = ps_dec->u2_back_f_code;
+    }
+
+    /*-----------------------------------------------------------------------*/
+    /*  Flush the extra bit value                                            */
+    /*                                                                       */
+    /*  while(impeg2d_bit_stream_nxt() == '1')                                  */
+    /*  {                                                                    */
+    /*      extra_bit_picture         1                                      */
+    /*      extra_information_picture 8                                      */
+    /*  }                                                                    */
+    /*  extra_bit_picture             1                                      */
+    /*-----------------------------------------------------------------------*/
+    while (impeg2d_bit_stream_nxt(ps_stream,1) == 1)
+    {
+        impeg2d_bit_stream_get(ps_stream,9);
+    }
+    impeg2d_bit_stream_get_bit(ps_stream);
+    impeg2d_next_start_code(ps_dec);
+
+    return (IMPEG2D_ERROR_CODES_T)IVD_ERROR_NONE;
+}
+
+
+/*******************************************************************************
+*
+*  Function Name   : impeg2d_dec_pic_coding_ext
+*
+*  Description     : Reads more picture level parameters
+*
+*  Arguments       :
+*  dec             : Decoder context
+*
+*  Values Returned : None
+*******************************************************************************/
+void impeg2d_dec_pic_coding_ext(dec_state_t *ps_dec)
+{
+    stream_t *ps_stream;
+
+    ps_stream = &ps_dec->s_bit_stream;
+    impeg2d_bit_stream_flush(ps_stream,START_CODE_LEN);
+    /* extension code identifier */
+    impeg2d_bit_stream_get(ps_stream,4);
+
+    ps_dec->au2_f_code[0][0]             = impeg2d_bit_stream_get(ps_stream,4);
+    ps_dec->au2_f_code[0][1]             = impeg2d_bit_stream_get(ps_stream,4);
+    ps_dec->au2_f_code[1][0]             = impeg2d_bit_stream_get(ps_stream,4);
+    ps_dec->au2_f_code[1][1]             = impeg2d_bit_stream_get(ps_stream,4);
+    ps_dec->u2_intra_dc_precision        = impeg2d_bit_stream_get(ps_stream,2);
+    ps_dec->u2_picture_structure            = impeg2d_bit_stream_get(ps_stream,2);
+    ps_dec->u2_top_field_first              = impeg2d_bit_stream_get_bit(ps_stream);
+    ps_dec->u2_frame_pred_frame_dct         = impeg2d_bit_stream_get_bit(ps_stream);
+    ps_dec->u2_concealment_motion_vectors   = impeg2d_bit_stream_get_bit(ps_stream);
+    ps_dec->u2_q_scale_type                 = impeg2d_bit_stream_get_bit(ps_stream);
+    ps_dec->u2_intra_vlc_format             = impeg2d_bit_stream_get_bit(ps_stream);
+    ps_dec->u2_alternate_scan               = impeg2d_bit_stream_get_bit(ps_stream);
+    ps_dec->u2_repeat_first_field           = impeg2d_bit_stream_get_bit(ps_stream);
+    /* Flush chroma_420_type */
+    impeg2d_bit_stream_get_bit(ps_stream);
+
+    ps_dec->u2_progressive_frame            = impeg2d_bit_stream_get_bit(ps_stream);
+    if (impeg2d_bit_stream_get_bit(ps_stream))
+    {
+        /* Flush v_axis, field_sequence, burst_amplitude, sub_carrier_phase */
+        impeg2d_bit_stream_flush(ps_stream,20);
+    }
+    impeg2d_next_start_code(ps_dec);
+
+
+    if(VERTICAL_SCAN == ps_dec->u2_alternate_scan)
+    {
+        ps_dec->pu1_inv_scan_matrix = (UWORD8 *)gau1_impeg2_inv_scan_vertical;
+    }
+    else
+    {
+        ps_dec->pu1_inv_scan_matrix = (UWORD8 *)gau1_impeg2_inv_scan_zig_zag;
+    }
+}
+
+/*******************************************************************************
+*
+*  Function Name   : impeg2d_dec_slice
+*
+*  Description     : Reads Slice level parameters and calls functions that
+*                    decode individual MBs of slice
+*
+*  Arguments       :
+*  dec             : Decoder context
+*
+*  Values Returned : None
+*******************************************************************************/
+IMPEG2D_ERROR_CODES_T impeg2d_dec_slice(dec_state_t *ps_dec)
+{
+    stream_t *ps_stream;
+    UWORD32 u4_slice_vertical_position;
+    UWORD32 u4_slice_vertical_position_extension;
+    IMPEG2D_ERROR_CODES_T e_error;
+
+    ps_stream = &ps_dec->s_bit_stream;
+
+    /*------------------------------------------------------------------------*/
+    /* All the profiles supported require restricted slice structure. Hence   */
+    /* there is no need to store slice_vertical_position. Note that max       */
+    /* height supported does not exceed 2800 and scalablity is not supported  */
+    /*------------------------------------------------------------------------*/
+
+    /* Remove the slice start code */
+    impeg2d_bit_stream_flush(ps_stream,START_CODE_PREFIX_LEN);
+    u4_slice_vertical_position = impeg2d_bit_stream_get(ps_stream, 8);
+    if(u4_slice_vertical_position > 2800)
+    {
+        u4_slice_vertical_position_extension = impeg2d_bit_stream_get(ps_stream, 3);
+        u4_slice_vertical_position += (u4_slice_vertical_position_extension << 7);
+    }
+
+    if((u4_slice_vertical_position > ps_dec->u2_num_vert_mb) ||
+       (u4_slice_vertical_position == 0))
+    {
+        return IMPEG2D_INVALID_VERT_SIZE;
+    }
+
+    // change the mb_y to point to slice_vertical_position
+    u4_slice_vertical_position--;
+    if (ps_dec->u2_mb_y != u4_slice_vertical_position)
+    {
+        ps_dec->u2_mb_y    = u4_slice_vertical_position;
+        ps_dec->u2_mb_x    = 0;
+    }
+    ps_dec->u2_first_mb = 1;
+
+    /*------------------------------------------------------------------------*/
+    /* Quant scale code decoding                                              */
+    /*------------------------------------------------------------------------*/
+    {
+        UWORD16 u2_quant_scale_code;
+        u2_quant_scale_code = impeg2d_bit_stream_get(ps_stream,5);
+        ps_dec->u1_quant_scale = (ps_dec->u2_q_scale_type) ?
+            gau1_impeg2_non_linear_quant_scale[u2_quant_scale_code] : (u2_quant_scale_code << 1);
+    }
+
+    if (impeg2d_bit_stream_nxt(ps_stream,1) == 1)
+    {
+        impeg2d_bit_stream_flush(ps_stream,9);
+        /* Flush extra bit information */
+        while (impeg2d_bit_stream_nxt(ps_stream,1) == 1)
+        {
+            impeg2d_bit_stream_flush(ps_stream,9);
+        }
+    }
+    impeg2d_bit_stream_get_bit(ps_stream);
+
+    /* Reset the DC predictors to reset values given in Table 7.2 at the start*/
+    /* of slice data */
+    ps_dec->u2_def_dc_pred[Y_LUMA]   = 128 << ps_dec->u2_intra_dc_precision;
+    ps_dec->u2_def_dc_pred[U_CHROMA]   = 128 << ps_dec->u2_intra_dc_precision;
+    ps_dec->u2_def_dc_pred[V_CHROMA]   = 128 << ps_dec->u2_intra_dc_precision;
+    /*------------------------------------------------------------------------*/
+    /* dec->DecMBsinSlice() implements the following psuedo code from standard*/
+    /* do                                                                     */
+    /* {                                                                      */
+    /*      macroblock()                                                      */
+    /* } while (impeg2d_bit_stream_nxt() != '000 0000 0000 0000 0000 0000')      */
+    /*------------------------------------------------------------------------*/
+
+    e_error = ps_dec->pf_decode_slice(ps_dec);
+    if ((IMPEG2D_ERROR_CODES_T)IVD_ERROR_NONE != e_error)
+    {
+        return e_error;
+    }
+
+    /* Check for the MBy index instead of number of MBs left, because the
+     * number of MBs left in case of multi-thread decode is the number of MBs
+     * in that row only
+     */
+    if(ps_dec->u2_mb_y < ps_dec->u2_num_vert_mb)
+        impeg2d_next_start_code(ps_dec);
+
+    return (IMPEG2D_ERROR_CODES_T)IVD_ERROR_NONE;
+}
+
+void impeg2d_dec_pic_data_thread(dec_state_t *ps_dec)
+{
+    WORD32 i4_continue_decode;
+
+    WORD32 i4_cur_row, temp;
+    UWORD32 u4_bits_read;
+    WORD32 i4_dequeue_job;
+    IMPEG2D_ERROR_CODES_T e_error;
+
+    i4_cur_row = ps_dec->u2_mb_y + 1;
+
+    i4_continue_decode = 1;
+
+    i4_dequeue_job = 1;
+    do
+    {
+        if(i4_cur_row > ps_dec->u2_num_vert_mb)
+        {
+            i4_continue_decode = 0;
+            break;
+        }
+
+        {
+            if((ps_dec->i4_num_cores> 1) && (i4_dequeue_job))
+            {
+                job_t s_job;
+                IV_API_CALL_STATUS_T e_ret;
+                UWORD8 *pu1_buf;
+
+                e_ret = impeg2_jobq_dequeue(ps_dec->pv_jobq, &s_job, sizeof(s_job), 1, 1);
+                if(e_ret != IV_SUCCESS)
+                    break;
+
+                if(CMD_PROCESS == s_job.i4_cmd)
+                {
+                    pu1_buf = ps_dec->pu1_inp_bits_buf + s_job.i4_bistream_ofst;
+                    impeg2d_bit_stream_init(&(ps_dec->s_bit_stream), pu1_buf,
+                            (ps_dec->u4_num_inp_bytes - s_job.i4_bistream_ofst) + 8);
+                    i4_cur_row      = s_job.i2_start_mb_y;
+                    ps_dec->i4_start_mb_y = s_job.i2_start_mb_y;
+                    ps_dec->i4_end_mb_y = s_job.i2_end_mb_y;
+                    ps_dec->u2_mb_x = 0;
+                    ps_dec->u2_mb_y = ps_dec->i4_start_mb_y;
+                    ps_dec->u2_num_mbs_left = (ps_dec->i4_end_mb_y - ps_dec->i4_start_mb_y) * ps_dec->u2_num_horiz_mb;
+
+                }
+                else
+                {
+                    WORD32 start_row;
+                    WORD32 num_rows;
+                    start_row = s_job.i2_start_mb_y << 4;
+                    num_rows = MIN((s_job.i2_end_mb_y << 4), ps_dec->u2_vertical_size);
+                    num_rows -= start_row;
+                    impeg2d_format_convert(ps_dec, ps_dec->ps_disp_pic,
+                                        ps_dec->ps_disp_frm_buf,
+                                        start_row, num_rows);
+                    break;
+
+                }
+
+            }
+            e_error = impeg2d_dec_slice(ps_dec);
+
+            if ((IMPEG2D_ERROR_CODES_T)IVD_ERROR_NONE != e_error)
+            {
+                impeg2d_next_start_code(ps_dec);
+            }
+        }
+
+        /* Detecting next slice start code */
+        while(1)
+        {
+            // skip (dec->u4_num_cores-1) rows
+            u4_bits_read = impeg2d_bit_stream_nxt(&ps_dec->s_bit_stream,START_CODE_LEN);
+            temp = u4_bits_read & 0xFF;
+            i4_continue_decode = (((u4_bits_read >> 8) == 0x01) && (temp) && (temp <= 0xAF));
+
+            if(i4_continue_decode)
+            {
+                /* If the slice is from the same row, then continue decoding without dequeue */
+                if((temp - 1) == i4_cur_row)
+                {
+                    i4_dequeue_job = 0;
+                    break;
+                }
+
+                if(temp < ps_dec->i4_end_mb_y)
+                {
+                    i4_cur_row = ps_dec->u2_mb_y;
+                }
+                else
+                {
+                    i4_dequeue_job = 1;
+                }
+                break;
+
+            }
+            else
+                break;
+        }
+
+    }while(i4_continue_decode);
+    if(ps_dec->i4_num_cores > 1)
+    {
+        while(1)
+        {
+            job_t s_job;
+            IV_API_CALL_STATUS_T e_ret;
+
+            e_ret = impeg2_jobq_dequeue(ps_dec->pv_jobq, &s_job, sizeof(s_job), 1, 1);
+            if(e_ret != IV_SUCCESS)
+                break;
+            if(CMD_FMTCONV == s_job.i4_cmd)
+            {
+                WORD32 start_row;
+                WORD32 num_rows;
+                start_row = s_job.i2_start_mb_y << 4;
+                num_rows = MIN((s_job.i2_end_mb_y << 4), ps_dec->u2_vertical_size);
+                num_rows -= start_row;
+                impeg2d_format_convert(ps_dec, ps_dec->ps_disp_pic,
+                                    ps_dec->ps_disp_frm_buf,
+                                    start_row, num_rows);
+            }
+        }
+    }
+    else
+    {
+        if((NULL != ps_dec->ps_disp_pic) && ((0 == ps_dec->u4_share_disp_buf) || (IV_YUV_420P != ps_dec->i4_chromaFormat)))
+            impeg2d_format_convert(ps_dec, ps_dec->ps_disp_pic,
+                            ps_dec->ps_disp_frm_buf,
+                            0, ps_dec->u2_vertical_size);
+    }
+}
+
+static WORD32 impeg2d_init_thread_dec_ctxt(dec_state_t *ps_dec,
+                                           dec_state_t *ps_dec_thd,
+                                           WORD32 i4_min_mb_y)
+{
+    UNUSED(i4_min_mb_y);
+    ps_dec_thd->i4_start_mb_y = 0;
+    ps_dec_thd->i4_end_mb_y = ps_dec->u2_num_vert_mb;
+    ps_dec_thd->u2_mb_x = 0;
+    ps_dec_thd->u2_mb_y = 0;
+    ps_dec_thd->u2_is_mpeg2 = ps_dec->u2_is_mpeg2;
+    ps_dec_thd->u2_frame_width = ps_dec->u2_frame_width;
+    ps_dec_thd->u2_frame_height = ps_dec->u2_frame_height;
+    ps_dec_thd->u2_picture_width = ps_dec->u2_picture_width;
+    ps_dec_thd->u2_horizontal_size = ps_dec->u2_horizontal_size;
+    ps_dec_thd->u2_vertical_size = ps_dec->u2_vertical_size;
+    ps_dec_thd->u2_create_max_width = ps_dec->u2_create_max_width;
+    ps_dec_thd->u2_create_max_height = ps_dec->u2_create_max_height;
+    ps_dec_thd->u2_header_done = ps_dec->u2_header_done;
+    ps_dec_thd->u2_decode_header = ps_dec->u2_decode_header;
+
+    ps_dec_thd->u2_num_horiz_mb = ps_dec->u2_num_horiz_mb;
+    ps_dec_thd->u2_num_vert_mb = ps_dec->u2_num_vert_mb;
+    ps_dec_thd->u2_num_flds_decoded = ps_dec->u2_num_flds_decoded;
+
+    ps_dec_thd->u4_frm_buf_stride = ps_dec->u4_frm_buf_stride;
+
+    ps_dec_thd->u2_field_dct = ps_dec->u2_field_dct;
+    ps_dec_thd->u2_read_dct_type = ps_dec->u2_read_dct_type;
+
+    ps_dec_thd->u2_read_motion_type = ps_dec->u2_read_motion_type;
+    ps_dec_thd->u2_motion_type = ps_dec->u2_motion_type;
+
+    ps_dec_thd->pu2_mb_type = ps_dec->pu2_mb_type;
+    ps_dec_thd->u2_fld_pic = ps_dec->u2_fld_pic;
+    ps_dec_thd->u2_frm_pic = ps_dec->u2_frm_pic;
+
+    ps_dec_thd->u2_fld_parity = ps_dec->u2_fld_parity;
+
+    ps_dec_thd->au2_fcode_data[0] = ps_dec->au2_fcode_data[0];
+    ps_dec_thd->au2_fcode_data[1] = ps_dec->au2_fcode_data[1];
+
+    ps_dec_thd->u1_quant_scale = ps_dec->u1_quant_scale;
+
+    ps_dec_thd->u2_num_mbs_left = ps_dec->u2_num_mbs_left;
+    ps_dec_thd->u2_first_mb = ps_dec->u2_first_mb;
+    ps_dec_thd->u2_num_skipped_mbs = ps_dec->u2_num_skipped_mbs;
+
+    memcpy(&ps_dec_thd->s_cur_frm_buf, &ps_dec->s_cur_frm_buf, sizeof(yuv_buf_t));
+    memcpy(&ps_dec_thd->as_recent_fld[0][0], &ps_dec->as_recent_fld[0][0], sizeof(yuv_buf_t));
+    memcpy(&ps_dec_thd->as_recent_fld[0][1], &ps_dec->as_recent_fld[0][1], sizeof(yuv_buf_t));
+    memcpy(&ps_dec_thd->as_recent_fld[1][0], &ps_dec->as_recent_fld[1][0], sizeof(yuv_buf_t));
+    memcpy(&ps_dec_thd->as_recent_fld[1][1], &ps_dec->as_recent_fld[1][1], sizeof(yuv_buf_t));
+    memcpy(&ps_dec_thd->as_ref_buf, &ps_dec->as_ref_buf, sizeof(yuv_buf_t) * 2 * 2);
+
+
+    ps_dec_thd->pf_decode_slice = ps_dec->pf_decode_slice;
+
+    ps_dec_thd->pf_vld_inv_quant = ps_dec->pf_vld_inv_quant;
+
+    memcpy(ps_dec_thd->pf_idct_recon, ps_dec->pf_idct_recon, sizeof(ps_dec->pf_idct_recon));
+
+    memcpy(ps_dec_thd->pf_mc, ps_dec->pf_mc, sizeof(ps_dec->pf_mc));
+    ps_dec_thd->pf_interpolate = ps_dec->pf_interpolate;
+    ps_dec_thd->pf_copy_mb = ps_dec->pf_copy_mb;
+    ps_dec_thd->pf_fullx_halfy_8x8              =  ps_dec->pf_fullx_halfy_8x8;
+    ps_dec_thd->pf_halfx_fully_8x8              =  ps_dec->pf_halfx_fully_8x8;
+    ps_dec_thd->pf_halfx_halfy_8x8              =  ps_dec->pf_halfx_halfy_8x8;
+    ps_dec_thd->pf_fullx_fully_8x8              =  ps_dec->pf_fullx_fully_8x8;
+
+    ps_dec_thd->pf_memset_8bit_8x8_block        =  ps_dec->pf_memset_8bit_8x8_block;
+    ps_dec_thd->pf_memset_16bit_8x8_linear_block        =  ps_dec->pf_memset_16bit_8x8_linear_block;
+    ps_dec_thd->pf_copy_yuv420p_buf             =   ps_dec->pf_copy_yuv420p_buf;
+    ps_dec_thd->pf_fmt_conv_yuv420p_to_yuv422ile    =   ps_dec->pf_fmt_conv_yuv420p_to_yuv422ile;
+    ps_dec_thd->pf_fmt_conv_yuv420p_to_yuv420sp_uv  =   ps_dec->pf_fmt_conv_yuv420p_to_yuv420sp_uv;
+    ps_dec_thd->pf_fmt_conv_yuv420p_to_yuv420sp_vu  =   ps_dec->pf_fmt_conv_yuv420p_to_yuv420sp_vu;
+
+
+    memcpy(ps_dec_thd->au1_intra_quant_matrix, ps_dec->au1_intra_quant_matrix, NUM_PELS_IN_BLOCK * sizeof(UWORD8));
+    memcpy(ps_dec_thd->au1_inter_quant_matrix, ps_dec->au1_inter_quant_matrix, NUM_PELS_IN_BLOCK * sizeof(UWORD8));
+    ps_dec_thd->pu1_inv_scan_matrix = ps_dec->pu1_inv_scan_matrix;
+
+
+    ps_dec_thd->u2_progressive_sequence = ps_dec->u2_progressive_sequence;
+    ps_dec_thd->e_pic_type =  ps_dec->e_pic_type;
+    ps_dec_thd->u2_full_pel_forw_vector = ps_dec->u2_full_pel_forw_vector;
+    ps_dec_thd->u2_forw_f_code =   ps_dec->u2_forw_f_code;
+    ps_dec_thd->u2_full_pel_back_vector = ps_dec->u2_full_pel_back_vector;
+    ps_dec_thd->u2_back_f_code = ps_dec->u2_back_f_code;
+
+    memcpy(ps_dec_thd->ai2_mv, ps_dec->ai2_mv, (2*2*2)*sizeof(WORD16));
+    memcpy(ps_dec_thd->au2_f_code, ps_dec->au2_f_code, (2*2)*sizeof(UWORD16));
+    ps_dec_thd->u2_intra_dc_precision = ps_dec->u2_intra_dc_precision;
+    ps_dec_thd->u2_picture_structure = ps_dec->u2_picture_structure;
+    ps_dec_thd->u2_top_field_first = ps_dec->u2_top_field_first;
+    ps_dec_thd->u2_frame_pred_frame_dct = ps_dec->u2_frame_pred_frame_dct;
+    ps_dec_thd->u2_concealment_motion_vectors = ps_dec->u2_concealment_motion_vectors;
+    ps_dec_thd->u2_q_scale_type =  ps_dec->u2_q_scale_type;
+    ps_dec_thd->u2_intra_vlc_format = ps_dec->u2_intra_vlc_format;
+    ps_dec_thd->u2_alternate_scan = ps_dec->u2_alternate_scan;
+    ps_dec_thd->u2_repeat_first_field = ps_dec->u2_repeat_first_field;
+    ps_dec_thd->u2_progressive_frame = ps_dec->u2_progressive_frame;
+    ps_dec_thd->pu1_inp_bits_buf = ps_dec->pu1_inp_bits_buf;
+    ps_dec_thd->u4_num_inp_bytes = ps_dec->u4_num_inp_bytes;
+    ps_dec_thd->pv_jobq = ps_dec->pv_jobq;
+    ps_dec_thd->pv_jobq_buf = ps_dec->pv_jobq_buf;
+    ps_dec_thd->i4_jobq_buf_size = ps_dec->i4_jobq_buf_size;
+
+
+    ps_dec_thd->u2_frame_rate_code = ps_dec->u2_frame_rate_code;
+    ps_dec_thd->u2_frame_rate_extension_n = ps_dec->u2_frame_rate_extension_n;
+    ps_dec_thd->u2_frame_rate_extension_d = ps_dec->u2_frame_rate_extension_d;
+    ps_dec_thd->u2_framePeriod =   ps_dec->u2_framePeriod;
+    ps_dec_thd->u2_display_horizontal_size = ps_dec->u2_display_horizontal_size;
+    ps_dec_thd->u2_display_vertical_size = ps_dec->u2_display_vertical_size;
+    ps_dec_thd->u2_aspect_ratio_info = ps_dec->u2_aspect_ratio_info;
+
+    ps_dec_thd->ps_func_bi_direct = ps_dec->ps_func_bi_direct;
+    ps_dec_thd->ps_func_forw_or_back = ps_dec->ps_func_forw_or_back;
+
+    return 0;
+
+}
+
+
+WORD32 impeg2d_get_slice_pos(dec_state_multi_core_t *ps_dec_state_multi_core)
+{
+    WORD32 u4_bits;
+    WORD32 i4_row;
+
+
+    dec_state_t *ps_dec = ps_dec_state_multi_core->ps_dec_state[0];
+    WORD32 i4_prev_row;
+    stream_t s_bitstrm;
+    WORD32 i4_start_row;
+    WORD32 i4_slice_bistream_ofst;
+    WORD32 i;
+    s_bitstrm = ps_dec->s_bit_stream;
+    i4_prev_row = -1;
+
+    ps_dec_state_multi_core->ps_dec_state[0]->i4_start_mb_y = 0;
+    ps_dec_state_multi_core->ps_dec_state[1]->i4_start_mb_y = -1;
+    ps_dec_state_multi_core->ps_dec_state[2]->i4_start_mb_y = -1;
+    ps_dec_state_multi_core->ps_dec_state[3]->i4_start_mb_y = -1;
+
+    ps_dec_state_multi_core->ps_dec_state[0]->i4_end_mb_y = ps_dec->u2_num_vert_mb;
+    ps_dec_state_multi_core->ps_dec_state[1]->i4_end_mb_y = -1;
+    ps_dec_state_multi_core->ps_dec_state[2]->i4_end_mb_y = -1;
+    ps_dec_state_multi_core->ps_dec_state[3]->i4_end_mb_y = -1;
+
+    if(ps_dec->i4_num_cores == 1)
+        return 0;
+    /* Reset the jobq to start of the jobq buffer */
+    impeg2_jobq_reset((jobq_t *)ps_dec->pv_jobq);
+
+    i4_start_row = -1;
+    i4_slice_bistream_ofst = 0;
+    while(1)
+    {
+        WORD32 i4_is_slice;
+        u4_bits = impeg2d_bit_stream_nxt(&s_bitstrm,START_CODE_LEN);
+        if(s_bitstrm.u4_offset >= s_bitstrm.u4_max_offset)
+        {
+            break;
+        }
+
+
+        i4_row = u4_bits & 0xFF;
+
+        /* Detect end of frame */
+        i4_is_slice = (((u4_bits >> 8) == 0x01) && (i4_row) && (i4_row <= ps_dec->u2_num_vert_mb));
+        if(!i4_is_slice)
+            break;
+
+        i4_row -= 1;
+
+
+        if(i4_prev_row != i4_row)
+        {
+            /* Create a job for previous slice row */
+            if(i4_start_row != -1)
+            {
+                job_t s_job;
+                IV_API_CALL_STATUS_T ret;
+                s_job.i2_start_mb_y = i4_start_row;
+                s_job.i2_end_mb_y = i4_row;
+                s_job.i4_cmd = CMD_PROCESS;
+                s_job.i4_bistream_ofst = i4_slice_bistream_ofst;
+                ret = impeg2_jobq_queue(ps_dec->pv_jobq, &s_job, sizeof(s_job), 1, 0);
+                if(ret != IV_SUCCESS)
+                    return ret;
+
+            }
+            /* Store current slice's bitstream offset */
+            i4_slice_bistream_ofst = s_bitstrm.u4_offset >> 3;
+            i4_slice_bistream_ofst -= (size_t)s_bitstrm.pv_bs_buf & 3;
+            i4_prev_row = i4_row;
+
+            /* Store current slice's row position */
+            i4_start_row = i4_row;
+
+        }
+
+
+        impeg2d_bit_stream_flush(&s_bitstrm, START_CODE_LEN);
+
+        // flush bytes till next start code
+        /* Flush the bytes till a  start code is encountered  */
+        while(impeg2d_bit_stream_nxt(&s_bitstrm, 24) != START_CODE_PREFIX)
+        {
+            impeg2d_bit_stream_get(&s_bitstrm, 8);
+
+            if(s_bitstrm.u4_offset >= s_bitstrm.u4_max_offset)
+            {
+                break;
+            }
+        }
+    }
+
+    /* Create job for the last slice row */
+    {
+        job_t s_job;
+        IV_API_CALL_STATUS_T e_ret;
+        s_job.i2_start_mb_y = i4_start_row;
+        s_job.i2_end_mb_y = ps_dec->u2_num_vert_mb;
+        s_job.i4_cmd = CMD_PROCESS;
+        s_job.i4_bistream_ofst = i4_slice_bistream_ofst;
+        e_ret = impeg2_jobq_queue(ps_dec->pv_jobq, &s_job, sizeof(s_job), 1, 0);
+        if(e_ret != IV_SUCCESS)
+            return e_ret;
+
+    }
+    if((NULL != ps_dec->ps_disp_pic) && ((0 == ps_dec->u4_share_disp_buf) || (IV_YUV_420P != ps_dec->i4_chromaFormat)))
+    {
+        for(i = 0; i < ps_dec->u2_vertical_size; i+=64)
+        {
+            job_t s_job;
+            IV_API_CALL_STATUS_T ret;
+            s_job.i2_start_mb_y = i;
+            s_job.i2_start_mb_y >>= 4;
+            s_job.i2_end_mb_y = (i + 64);
+            s_job.i2_end_mb_y >>= 4;
+            s_job.i4_cmd = CMD_FMTCONV;
+            s_job.i4_bistream_ofst = 0;
+            ret = impeg2_jobq_queue(ps_dec->pv_jobq, &s_job, sizeof(s_job), 1, 0);
+            if(ret != IV_SUCCESS)
+                return ret;
+
+        }
+    }
+
+    impeg2_jobq_terminate(ps_dec->pv_jobq);
+    ps_dec->i4_bytes_consumed = s_bitstrm.u4_offset >> 3;
+    ps_dec->i4_bytes_consumed -= ((size_t)s_bitstrm.pv_bs_buf & 3);
+
+    return 0;
+}
+
+/*******************************************************************************
+*
+*  Function Name   : impeg2d_dec_pic_data
+*
+*  Description     : It intializes several parameters and decodes a Picture
+*                    till any slice is left.
+*
+*  Arguments       :
+*  dec             : Decoder context
+*
+*  Values Returned : None
+*******************************************************************************/
+
+void impeg2d_dec_pic_data(dec_state_t *ps_dec)
+{
+
+    WORD32 i;
+    dec_state_multi_core_t *ps_dec_state_multi_core;
+
+    UWORD32  u4_error_code;
+
+    dec_state_t *ps_dec_thd;
+    WORD32 i4_status;
+    WORD32 i4_min_mb_y;
+
+
+    /* Resetting the MB address and MB coordinates at the start of the Frame */
+    ps_dec->u2_mb_x = ps_dec->u2_mb_y = 0;
+    u4_error_code = 0;
+
+    ps_dec_state_multi_core = ps_dec->ps_dec_state_multi_core;
+    impeg2d_get_slice_pos(ps_dec_state_multi_core);
+
+    i4_min_mb_y = 1;
+    for(i=0; i < ps_dec->i4_num_cores - 1; i++)
+    {
+        // initialize decoder context for thread
+        // launch dec->u4_num_cores-1 threads
+
+        ps_dec_thd = ps_dec_state_multi_core->ps_dec_state[i+1];
+
+        ps_dec_thd->ps_disp_pic = ps_dec->ps_disp_pic;
+        ps_dec_thd->ps_disp_frm_buf = ps_dec->ps_disp_frm_buf;
+
+        i4_status = impeg2d_init_thread_dec_ctxt(ps_dec, ps_dec_thd, i4_min_mb_y);
+        //impeg2d_dec_pic_data_thread(ps_dec_thd);
+
+        if(i4_status == 0)
+        {
+            ithread_create(ps_dec_thd->pv_codec_thread_handle, NULL, (void *)impeg2d_dec_pic_data_thread, ps_dec_thd);
+            ps_dec_state_multi_core->au4_thread_launched[i + 1] = 1;
+            i4_min_mb_y = ps_dec_thd->u2_mb_y + 1;
+        }
+        else
+        {
+            ps_dec_state_multi_core->au4_thread_launched[i + 1] = 0;
+            break;
+        }
+    }
+
+    impeg2d_dec_pic_data_thread(ps_dec);
+
+    // wait for threads to complete
+    for(i=0; i < (ps_dec->i4_num_cores - 1); i++)
+    {
+        if(ps_dec_state_multi_core->au4_thread_launched[i + 1] == 1)
+        {
+            ps_dec_thd = ps_dec_state_multi_core->ps_dec_state[i+1];
+            ithread_join(ps_dec_thd->pv_codec_thread_handle, NULL);
+        }
+    }
+
+    ps_dec->u4_error_code = u4_error_code;
+
+}
+/*******************************************************************************
+*
+*  Function Name   : impeg2d_flush_ext_and_user_data
+*
+*  Description     : Flushes the extension and user data present in the
+*                    stream_t
+*
+*  Arguments       :
+*  dec             : Decoder context
+*
+*  Values Returned : None
+*******************************************************************************/
+void impeg2d_flush_ext_and_user_data(dec_state_t *ps_dec)
+{
+    UWORD32 u4_start_code;
+    stream_t *ps_stream;
+
+    ps_stream    = &ps_dec->s_bit_stream;
+    u4_start_code = impeg2d_bit_stream_nxt(ps_stream,START_CODE_LEN);
+
+    while(u4_start_code == EXTENSION_START_CODE || u4_start_code == USER_DATA_START_CODE)
+    {
+        impeg2d_bit_stream_flush(ps_stream,START_CODE_LEN);
+        while(impeg2d_bit_stream_nxt(ps_stream,START_CODE_PREFIX_LEN) != START_CODE_PREFIX)
+        {
+            impeg2d_bit_stream_flush(ps_stream,8);
+        }
+        u4_start_code = impeg2d_bit_stream_nxt(ps_stream,START_CODE_LEN);
+    }
+}
+/*******************************************************************************
+*
+*  Function Name   : impeg2d_dec_user_data
+*
+*  Description     : Flushes the user data present in the stream_t
+*
+*  Arguments       :
+*  dec             : Decoder context
+*
+*  Values Returned : None
+*******************************************************************************/
+void impeg2d_dec_user_data(dec_state_t *ps_dec)
+{
+    UWORD32 u4_start_code;
+    stream_t *ps_stream;
+
+    ps_stream    = &ps_dec->s_bit_stream;
+    u4_start_code = impeg2d_bit_stream_nxt(ps_stream,START_CODE_LEN);
+
+    while(u4_start_code == USER_DATA_START_CODE)
+    {
+        impeg2d_bit_stream_flush(ps_stream,START_CODE_LEN);
+        while(impeg2d_bit_stream_nxt(ps_stream,START_CODE_PREFIX_LEN) != START_CODE_PREFIX)
+        {
+            impeg2d_bit_stream_flush(ps_stream,8);
+        }
+        u4_start_code = impeg2d_bit_stream_nxt(ps_stream,START_CODE_LEN);
+    }
+}
+/*******************************************************************************
+*  Function Name   : impeg2d_dec_seq_ext_data
+*
+*  Description     : Decodes the extension data following Sequence
+*                    Extension. It flushes any user data if present
+*
+*  Arguments       :
+*  dec             : Decoder context
+*
+*  Values Returned : None
+*******************************************************************************/
+IMPEG2D_ERROR_CODES_T impeg2d_dec_seq_ext_data(dec_state_t *ps_dec)
+{
+    stream_t   *ps_stream;
+    UWORD32     u4_start_code;
+    IMPEG2D_ERROR_CODES_T e_error;
+
+    e_error = (IMPEG2D_ERROR_CODES_T) IVD_ERROR_NONE;
+
+    ps_stream      = &ps_dec->s_bit_stream;
+    u4_start_code = impeg2d_bit_stream_nxt(ps_stream,START_CODE_LEN);
+    while( (u4_start_code == EXTENSION_START_CODE ||
+            u4_start_code == USER_DATA_START_CODE) &&
+            (IMPEG2D_ERROR_CODES_T)IVD_ERROR_NONE == e_error)
+    {
+        if(u4_start_code == USER_DATA_START_CODE)
+        {
+            impeg2d_dec_user_data(ps_dec);
+        }
+        else
+        {
+            impeg2d_bit_stream_flush(ps_stream,START_CODE_LEN);
+            u4_start_code   = impeg2d_bit_stream_nxt(ps_stream,EXT_ID_LEN);
+            switch(u4_start_code)
+            {
+            case SEQ_DISPLAY_EXT_ID:
+                impeg2d_dec_seq_disp_ext(ps_dec);
+                break;
+            case SEQ_SCALABLE_EXT_ID:
+                e_error = IMPEG2D_SCALABILITIY_NOT_SUPPORTED;
+                break;
+            default:
+                /* In case its a reserved extension code */
+                impeg2d_bit_stream_flush(ps_stream,EXT_ID_LEN);
+                impeg2d_peek_next_start_code(ps_dec);
+                break;
+            }
+        }
+        u4_start_code = impeg2d_bit_stream_nxt(ps_stream,START_CODE_LEN);
+    }
+    return e_error;
+}
+/*******************************************************************************
+*  Function Name   : impeg2d_dec_pic_ext_data
+*
+*  Description     : Decodes the extension data following Picture Coding
+*                    Extension. It flushes any user data if present
+*
+*  Arguments       :
+*  dec             : Decoder context
+*
+*  Values Returned : None
+*******************************************************************************/
+IMPEG2D_ERROR_CODES_T impeg2d_dec_pic_ext_data(dec_state_t *ps_dec)
+{
+    stream_t   *ps_stream;
+    UWORD32     u4_start_code;
+    IMPEG2D_ERROR_CODES_T e_error;
+
+    e_error = (IMPEG2D_ERROR_CODES_T)IVD_ERROR_NONE;
+
+    ps_stream      = &ps_dec->s_bit_stream;
+    u4_start_code   = impeg2d_bit_stream_nxt(ps_stream,START_CODE_LEN);
+    while ( (u4_start_code == EXTENSION_START_CODE ||
+            u4_start_code == USER_DATA_START_CODE) &&
+            (IMPEG2D_ERROR_CODES_T)IVD_ERROR_NONE == e_error)
+    {
+        if(u4_start_code == USER_DATA_START_CODE)
+        {
+            impeg2d_dec_user_data(ps_dec);
+        }
+        else
+        {
+            impeg2d_bit_stream_flush(ps_stream,START_CODE_LEN);
+            u4_start_code   = impeg2d_bit_stream_nxt(ps_stream,EXT_ID_LEN);
+            switch(u4_start_code)
+            {
+            case QUANT_MATRIX_EXT_ID:
+                impeg2d_dec_quant_matrix_ext(ps_dec);
+                break;
+            case COPYRIGHT_EXT_ID:
+                impeg2d_dec_copyright_ext(ps_dec);
+                break;
+            case PIC_DISPLAY_EXT_ID:
+                impeg2d_dec_pic_disp_ext(ps_dec);
+                break;
+            case CAMERA_PARAM_EXT_ID:
+                impeg2d_dec_cam_param_ext(ps_dec);
+                break;
+            case ITU_T_EXT_ID:
+                impeg2d_dec_itu_t_ext(ps_dec);
+                break;
+            case PIC_SPATIAL_SCALABLE_EXT_ID:
+            case PIC_TEMPORAL_SCALABLE_EXT_ID:
+                e_error = IMPEG2D_SCALABLITY_NOT_SUP;
+                break;
+            default:
+                /* In case its a reserved extension code */
+                impeg2d_bit_stream_flush(ps_stream,EXT_ID_LEN);
+                impeg2d_next_start_code(ps_dec);
+                break;
+            }
+        }
+        u4_start_code = impeg2d_bit_stream_nxt(ps_stream,START_CODE_LEN);
+    }
+    return e_error;
+}
+
+/*******************************************************************************
+*
+*  Function Name   : impeg2d_process_video_header
+*
+*  Description     : Processes video sequence header information
+*
+*  Arguments       :
+*  dec             : Decoder context
+*
+*  Values Returned : None
+*******************************************************************************/
+IMPEG2D_ERROR_CODES_T impeg2d_process_video_header(dec_state_t *ps_dec)
+{
+    stream_t *ps_stream;
+    ps_stream = &ps_dec->s_bit_stream;
+    IMPEG2D_ERROR_CODES_T e_error;
+
+    impeg2d_next_code(ps_dec, SEQUENCE_HEADER_CODE);
+    if(ps_dec->s_bit_stream.u4_offset < ps_dec->s_bit_stream.u4_max_offset)
+    {
+        e_error = impeg2d_dec_seq_hdr(ps_dec);
+        if ((IMPEG2D_ERROR_CODES_T)IVD_ERROR_NONE != e_error)
+        {
+            return e_error;
+        }
+    }
+    else
+    {
+      return IMPEG2D_BITSTREAM_BUFF_EXCEEDED_ERR;
+    }
+    if (impeg2d_bit_stream_nxt(ps_stream,START_CODE_LEN) == EXTENSION_START_CODE)
+    {
+        /* MPEG2 Decoder */
+        if(ps_dec->s_bit_stream.u4_offset < ps_dec->s_bit_stream.u4_max_offset)
+        {
+            e_error = impeg2d_dec_seq_ext(ps_dec);
+            if ((IMPEG2D_ERROR_CODES_T)IVD_ERROR_NONE != e_error)
+            {
+                return e_error;
+            }
+        }
+        else
+        {
+          return IMPEG2D_BITSTREAM_BUFF_EXCEEDED_ERR;
+        }
+        if(ps_dec->s_bit_stream.u4_offset < ps_dec->s_bit_stream.u4_max_offset)
+        {
+            e_error = impeg2d_dec_seq_ext_data(ps_dec);
+            if ((IMPEG2D_ERROR_CODES_T)IVD_ERROR_NONE != e_error)
+            {
+                return e_error;
+            }
+        }
+        return impeg2d_init_video_state(ps_dec,MPEG_2_VIDEO);
+    }
+    else
+    {
+         /* MPEG1 Decoder */
+        if(ps_dec->s_bit_stream.u4_offset < ps_dec->s_bit_stream.u4_max_offset)
+        {
+            impeg2d_flush_ext_and_user_data(ps_dec);
+        }
+        return impeg2d_init_video_state(ps_dec,MPEG_1_VIDEO);
+    }
+}
+/*******************************************************************************
+*
+*  Function Name   : impeg2d_process_video_bit_stream
+*
+*  Description     : Processes video sequence header information
+*
+*  Arguments       :
+*  dec             : Decoder context
+*
+*  Values Returned : None
+*******************************************************************************/
+IMPEG2D_ERROR_CODES_T impeg2d_process_video_bit_stream(dec_state_t *ps_dec)
+{
+    stream_t *ps_stream;
+    UWORD32 u4_next_bits, u4_start_code_found;
+    IMPEG2D_ERROR_CODES_T e_error;
+
+    ps_stream = &ps_dec->s_bit_stream;
+    impeg2d_next_start_code(ps_dec);
+    /* If the stream is MPEG-2 compliant stream */
+    u4_start_code_found = 0;
+
+    if(ps_dec->u2_is_mpeg2)
+    {
+        /* MPEG2 decoding starts */
+        while((u4_start_code_found == 0) && (ps_dec->s_bit_stream.u4_offset < ps_dec->s_bit_stream.u4_max_offset))
+        {
+            u4_next_bits = impeg2d_bit_stream_nxt(ps_stream,START_CODE_LEN);
+
+            if(u4_next_bits == SEQUENCE_HEADER_CODE)
+            {
+                if(ps_dec->s_bit_stream.u4_offset < ps_dec->s_bit_stream.u4_max_offset)
+                {
+                    e_error = impeg2d_dec_seq_hdr(ps_dec);
+                    if ((IMPEG2D_ERROR_CODES_T)IVD_ERROR_NONE != e_error)
+                    {
+                        return e_error;
+                    }
+
+                    u4_start_code_found = 0;
+
+                }
+                else
+                {
+                    return IMPEG2D_BITSTREAM_BUFF_EXCEEDED_ERR;
+                }
+
+
+                if(ps_dec->s_bit_stream.u4_offset < ps_dec->s_bit_stream.u4_max_offset)
+                {
+                    IMPEG2D_ERROR_CODES_T e_error;
+                    e_error = impeg2d_dec_seq_ext(ps_dec);
+                    if ((IMPEG2D_ERROR_CODES_T)IVD_ERROR_NONE != e_error)
+                    {
+                        return e_error;
+                    }
+                    u4_start_code_found = 0;
+
+                }
+                else
+                {
+                    return IMPEG2D_BITSTREAM_BUFF_EXCEEDED_ERR;
+                }
+            }
+            else if((u4_next_bits == USER_DATA_START_CODE) || (u4_next_bits == EXTENSION_START_CODE))
+            {
+                if(ps_dec->s_bit_stream.u4_offset < ps_dec->s_bit_stream.u4_max_offset)
+                {
+                    impeg2d_dec_seq_ext_data(ps_dec);
+                    u4_start_code_found = 0;
+
+                }
+
+            }
+            else if((ps_dec->s_bit_stream.u4_offset < ps_dec->s_bit_stream.u4_max_offset)
+                    && (u4_next_bits == GOP_START_CODE))
+            {
+                impeg2d_dec_grp_of_pic_hdr(ps_dec);
+                impeg2d_dec_user_data(ps_dec);
+                u4_start_code_found = 0;
+
+            }
+            else if((ps_dec->s_bit_stream.u4_offset < ps_dec->s_bit_stream.u4_max_offset)
+                    && (u4_next_bits == PICTURE_START_CODE))
+            {
+
+                e_error = impeg2d_dec_pic_hdr(ps_dec);
+                if ((IMPEG2D_ERROR_CODES_T)IVD_ERROR_NONE != e_error)
+                {
+                    return e_error;
+                }
+                impeg2d_dec_pic_coding_ext(ps_dec);
+                e_error = impeg2d_dec_pic_ext_data(ps_dec);
+                if ((IMPEG2D_ERROR_CODES_T)IVD_ERROR_NONE != e_error)
+                {
+                    return e_error;
+                }
+                impeg2d_pre_pic_dec_proc(ps_dec);
+                impeg2d_dec_pic_data(ps_dec);
+                impeg2d_post_pic_dec_proc(ps_dec);
+                u4_start_code_found = 1;
+            }
+            else
+
+            {
+                FLUSH_BITS(ps_dec->s_bit_stream.u4_offset, ps_dec->s_bit_stream.u4_buf, ps_dec->s_bit_stream.u4_buf_nxt, 8, ps_dec->s_bit_stream.pu4_buf_aligned);
+
+            }
+            if(u4_start_code_found == 0)
+            {
+                impeg2d_next_start_code(ps_dec);
+            }
+        }
+        if((u4_start_code_found == 0) && (ps_dec->s_bit_stream.u4_offset > ps_dec->s_bit_stream.u4_max_offset))
+        {
+            return IMPEG2D_FRM_HDR_START_CODE_NOT_FOUND;
+        }
+
+    }
+        /* If the stream is MPEG-1 compliant stream */
+    else
+    {
+        while((u4_start_code_found == 0) && (ps_dec->s_bit_stream.u4_offset < ps_dec->s_bit_stream.u4_max_offset))
+        {
+            u4_next_bits = impeg2d_bit_stream_nxt(ps_stream,START_CODE_LEN);
+
+            if(impeg2d_bit_stream_nxt(ps_stream,START_CODE_LEN) == SEQUENCE_HEADER_CODE)
+            {
+                if(ps_dec->s_bit_stream.u4_offset < ps_dec->s_bit_stream.u4_max_offset)
+                {
+                    e_error = impeg2d_dec_seq_hdr(ps_dec);
+                    if ((IMPEG2D_ERROR_CODES_T)IVD_ERROR_NONE != e_error)
+                    {
+                        return e_error;
+                    }
+
+                    u4_start_code_found = 0;
+                }
+                else
+                {
+                    return IMPEG2D_BITSTREAM_BUFF_EXCEEDED_ERR;
+                }
+            }
+            else if((ps_dec->s_bit_stream.u4_offset < ps_dec->s_bit_stream.u4_max_offset) && (u4_next_bits == EXTENSION_START_CODE || u4_next_bits == USER_DATA_START_CODE))
+            {
+                impeg2d_flush_ext_and_user_data(ps_dec);
+                u4_start_code_found = 0;
+            }
+
+
+            else if ((impeg2d_bit_stream_nxt(ps_stream,START_CODE_LEN) == GOP_START_CODE)
+                    && (ps_dec->s_bit_stream.u4_offset < ps_dec->s_bit_stream.u4_max_offset))
+            {
+                impeg2d_dec_grp_of_pic_hdr(ps_dec);
+                impeg2d_flush_ext_and_user_data(ps_dec);
+                u4_start_code_found = 0;
+            }
+            else if ((impeg2d_bit_stream_nxt(ps_stream,START_CODE_LEN) == PICTURE_START_CODE)
+                    && (ps_dec->s_bit_stream.u4_offset < ps_dec->s_bit_stream.u4_max_offset))
+            {
+
+                e_error = impeg2d_dec_pic_hdr(ps_dec);
+                if ((IMPEG2D_ERROR_CODES_T)IVD_ERROR_NONE != e_error)
+                {
+                    return e_error;
+                }
+                impeg2d_flush_ext_and_user_data(ps_dec);
+                impeg2d_pre_pic_dec_proc(ps_dec);
+                impeg2d_dec_pic_data(ps_dec);
+                impeg2d_post_pic_dec_proc(ps_dec);
+                u4_start_code_found = 1;
+            }
+            else
+            {
+                FLUSH_BITS(ps_dec->s_bit_stream.u4_offset, ps_dec->s_bit_stream.u4_buf, ps_dec->s_bit_stream.u4_buf_nxt, 8, ps_dec->s_bit_stream.pu4_buf_aligned);
+            }
+            impeg2d_next_start_code(ps_dec);
+
+        }
+        if((u4_start_code_found == 0) && (ps_dec->s_bit_stream.u4_offset > ps_dec->s_bit_stream.u4_max_offset))
+        {
+           return IMPEG2D_FRM_HDR_START_CODE_NOT_FOUND;
+        }
+    }
+
+    return (IMPEG2D_ERROR_CODES_T)IVD_ERROR_NONE;
+}

diff --git a/decoder/impeg2d_dec_hdr.h b/decoder/impeg2d_dec_hdr.h
new file mode 100644
index 0000000..8bd0378
--- /dev/null
+++ b/decoder/impeg2d_dec_hdr.h

@@ -0,0 +1,51 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+/*****************************************************************************/
+/*                                                                           */
+/*  File Name         : impeg2d_dec_hdr.h                                   */
+/*                                                                           */
+/*  Description       : This file contains all the necessary examples to     */
+/*                      establish a consistent use of Ittiam C coding        */
+/*                      standards (based on Indian Hill C Standards)         */
+/*                                                                           */
+/*  List of Functions : <List the functions defined in this file>            */
+/*                                                                           */
+/*  Issues / Problems : None                                                 */
+/*                                                                           */
+/*  Revision History  :                                                      */
+/*                                                                           */
+/*         DD MM YYYY   Author(s)       Changes (Describe the changes made)  */
+/*         10 10 2005   Ittiam          Draft                                */
+/*                                                                           */
+/*****************************************************************************/
+
+#ifndef __IMPEG2D_DEC_HDR_H__
+#define __IMPEG2D_DEC_HDR_H__
+
+/*****************************************************************************/
+/* Function Declarations                                                     */
+/*****************************************************************************/
+IMPEG2D_ERROR_CODES_T impeg2d_process_video_header(dec_state_t *dec);
+
+IMPEG2D_ERROR_CODES_T impeg2d_process_video_bit_stream(dec_state_t *dec);
+
+
+#endif /* __IMPEG2D_DEC_HDR_H__ */
+

diff --git a/decoder/impeg2d_decoder.c b/decoder/impeg2d_decoder.c
new file mode 100755
index 0000000..ae58675
--- /dev/null
+++ b/decoder/impeg2d_decoder.c

@@ -0,0 +1,292 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+/*****************************************************************************/
+/*                                                                           */
+/*  File Name         : mpeg2dec_api_utils.c                                 */
+/*                                                                           */
+/*                                                                           */
+/*  Description       : This file defines the API interface for MPEG2 Decoder*/
+/*                                                                           */
+/*  List of Functions : <List the functions defined in this file>            */
+/*                                                                           */
+/*  Issues / Problems : None                                                 */
+/*                                                                           */
+/*  Revision History  :                                                      */
+/*                                                                           */
+/*         DD MM YYYY   Author(s)       Changes (Describe the changes made)  */
+/*         17 09 2007  Rajendra C Y       Creation                           */
+/*                                                                           */
+/*****************************************************************************/
+
+/*****************************************************************************/
+/* File Includes                                                             */
+/*****************************************************************************/
+/* System include files */
+
+#include <stddef.h>
+#include <stdio.h>
+#include <string.h>
+
+/* User include files */
+#include "iv_datatypedef.h"
+#include "iv.h"
+#include "ivd.h"
+#include "ithread.h"
+
+#include "impeg2_job_queue.h"
+#include "impeg2_macros.h"
+#include "impeg2_buf_mgr.h"
+#include "impeg2_disp_mgr.h"
+#include "impeg2_defs.h"
+#include "impeg2_platform_macros.h"
+#include "impeg2_inter_pred.h"
+#include "impeg2_idct.h"
+#include "impeg2_format_conv.h"
+#include "impeg2_mem_func.h"
+
+#include "impeg2d.h"
+#include "impeg2d_api.h"
+#include "impeg2d_bitstream.h"
+#include "impeg2d_debug.h"
+#include "impeg2d_structs.h"
+#include "impeg2d_mc.h"
+#include "impeg2d_pic_proc.h"
+#include "impeg2d_dec_hdr.h"
+
+void impeg2d_next_start_code(dec_state_t *ps_dec);
+void impeg2d_next_code(dec_state_t *ps_dec, UWORD32 u4_start_code_val);
+
+/*****************************************************************************/
+/*                                                                           */
+/*  Function Name : impeg2d_dec_hdr                                      */
+/*                                                                           */
+/*  Description   :                                                          */
+/*  Inputs        :                                                          */
+/*  Globals       :                                                          */
+/*  Processing    :                                                          */
+/*  Outputs       :                                                          */
+/*  Returns       :                                                          */
+/*                                                                           */
+/*  Issues        :                                                          */
+/*                                                                           */
+/*  Revision History:                                                        */
+/*                                                                           */
+/*         DD MM YYYY   Author(s)       Changes (Describe the changes made)  */
+/*         17 09 2007  Rajendra C Y          Draft                           */
+/*                                                                           */
+/*****************************************************************************/
+void impeg2d_dec_hdr(void *pv_dec,impeg2d_video_decode_ip_t *ps_ip,
+                 impeg2d_video_decode_op_t *ps_op)
+{
+
+    UWORD32 u4_bits_read;
+    dec_state_t *ps_dec;
+
+    ps_dec = (dec_state_t *)pv_dec;
+    ps_op->s_ivd_video_decode_op_t.u4_error_code = 0;
+
+    impeg2d_bit_stream_init(&(ps_dec->s_bit_stream),ps_ip->s_ivd_video_decode_ip_t.pv_stream_buffer,
+        ps_ip->s_ivd_video_decode_ip_t.u4_num_Bytes);
+
+    {
+        {
+            IMPEG2D_ERROR_CODES_T e_error;
+            e_error = impeg2d_process_video_header(ps_dec);
+            if ((IMPEG2D_ERROR_CODES_T)IVD_ERROR_NONE != e_error)
+            {
+                ps_op->s_ivd_video_decode_op_t.u4_error_code    = e_error;
+
+                u4_bits_read     = impeg2d_bit_stream_num_bits_read(&ps_dec->s_bit_stream);
+
+                ps_op->s_ivd_video_decode_op_t.u4_num_bytes_consumed = u4_bits_read>> 3;
+                if(ps_op->s_ivd_video_decode_op_t.u4_num_bytes_consumed > ps_ip->s_ivd_video_decode_ip_t.u4_num_Bytes)
+                {
+                    ps_op->s_ivd_video_decode_op_t.u4_num_bytes_consumed = ps_ip->s_ivd_video_decode_ip_t.u4_num_Bytes;
+                }
+                if(ps_op->s_ivd_video_decode_op_t.u4_error_code == 0)
+                    ps_op->s_ivd_video_decode_op_t.u4_error_code = e_error;
+
+
+                impeg2d_next_code(ps_dec, SEQUENCE_HEADER_CODE);
+                return;
+            }
+        }
+        ps_op->s_ivd_video_decode_op_t.u4_pic_ht = ps_dec->u2_vertical_size;
+        ps_op->s_ivd_video_decode_op_t.u4_pic_wd = ps_dec->u2_horizontal_size;
+
+        ps_op->s_ivd_video_decode_op_t.e_pic_type            = IV_NA_FRAME;
+        ps_op->s_ivd_video_decode_op_t.u4_error_code        = IV_SUCCESS;
+
+        u4_bits_read     = impeg2d_bit_stream_num_bits_read(&ps_dec->s_bit_stream);
+        ps_op->s_ivd_video_decode_op_t.u4_num_bytes_consumed = u4_bits_read>> 3;
+        if(ps_op->s_ivd_video_decode_op_t.u4_num_bytes_consumed > ps_ip->s_ivd_video_decode_ip_t.u4_num_Bytes)
+        {
+            ps_op->s_ivd_video_decode_op_t.u4_num_bytes_consumed = ps_ip->s_ivd_video_decode_ip_t.u4_num_Bytes;
+        }
+        ps_op->s_ivd_video_decode_op_t.u4_frame_decoded_flag = 0;
+        /* MOD */
+        ps_dec->u2_header_done = 1;
+        ps_dec->u2_decode_header = 0;
+
+    }
+}
+
+/*****************************************************************************/
+/*                                                                           */
+/*  Function Name : impeg2d_dec_frm                                         */
+/*                                                                           */
+/*  Description   :                                                          */
+/*  Inputs        :                                                          */
+/*  Globals       :                                                          */
+/*  Processing    :                                                          */
+/*  Outputs       :                                                          */
+/*  Returns       :                                                          */
+/*                                                                           */
+/*  Issues        :                                                          */
+/*                                                                           */
+/*  Revision History:                                                        */
+/*                                                                           */
+/*         DD MM YYYY   Author(s)       Changes (Describe the changes made)  */
+/*         17 09 2007  Rajendra C Y          Draft                           */
+/*                                                                           */
+/*****************************************************************************/
+void impeg2d_dec_frm(void *pv_dec,impeg2d_video_decode_ip_t *ps_ip,
+                 impeg2d_video_decode_op_t *ps_op)
+{
+
+
+    stream_t *ps_stream;
+    UWORD32 u4_size = ps_ip->s_ivd_video_decode_ip_t.u4_num_Bytes;
+
+    dec_state_t *ps_dec;
+
+    ps_dec = (dec_state_t *)pv_dec;
+    ps_op->s_ivd_video_decode_op_t.u4_error_code = 0;
+
+    IMPEG2D_FRM_NUM_SET();
+
+    ps_dec->pu1_inp_bits_buf = ps_ip->s_ivd_video_decode_ip_t.pv_stream_buffer;
+    ps_dec->u4_num_inp_bytes = u4_size;
+    ps_stream  = &ps_dec->s_bit_stream;
+
+
+    impeg2d_bit_stream_init(ps_stream,ps_ip->s_ivd_video_decode_ip_t.pv_stream_buffer,u4_size);
+
+    /* @ */ /* Updating the bufferID */
+
+    ps_dec->u4_xdmBufID     = ps_ip->s_ivd_video_decode_ip_t.u4_ts;
+
+    {
+        IMPEG2D_ERROR_CODES_T e_error;
+        /* Process the Bitstream */
+        e_error = impeg2d_process_video_bit_stream(ps_dec);
+        if ((IMPEG2D_ERROR_CODES_T)IVD_ERROR_NONE != e_error)
+        {
+            ps_op->s_ivd_video_decode_op_t.u4_error_code    = e_error;
+
+            if ((IMPEG2D_ERROR_CODES_T) IVD_RES_CHANGED == e_error)
+            {
+                ps_op->s_ivd_video_decode_op_t.u4_num_bytes_consumed = 0;
+                ps_dec->u2_header_done = 0;
+            }
+            else if (IMPEG2D_UNSUPPORTED_DIMENSIONS == e_error)
+            {
+                ps_op->s_ivd_video_decode_op_t.u4_num_bytes_consumed = 0;
+                ps_dec->u2_header_done = 0;
+
+                ps_op->s_ivd_video_decode_op_t.u4_pic_ht = ps_dec->u2_reinit_max_height;
+                ps_op->s_ivd_video_decode_op_t.u4_pic_wd = ps_dec->u2_reinit_max_width;
+            }
+            else
+            {
+                if(ps_dec->i4_num_cores > 1)
+                    ps_op->s_ivd_video_decode_op_t.u4_num_bytes_consumed = ps_dec->i4_bytes_consumed;
+                else
+                {
+                    ps_op->s_ivd_video_decode_op_t.u4_num_bytes_consumed = (ps_dec->s_bit_stream.u4_offset + 7) >> 3;
+                    ps_op->s_ivd_video_decode_op_t.u4_num_bytes_consumed -= ((size_t)ps_dec->s_bit_stream.pv_bs_buf & 3);
+                }
+
+                if(ps_op->s_ivd_video_decode_op_t.u4_num_bytes_consumed
+                                > ps_ip->s_ivd_video_decode_ip_t.u4_num_Bytes)
+                {
+                    ps_op->s_ivd_video_decode_op_t.u4_num_bytes_consumed =
+                                    ps_ip->s_ivd_video_decode_ip_t.u4_num_Bytes;
+                }
+
+                impeg2d_next_start_code(ps_dec);
+            }
+
+            if(ps_op->s_ivd_video_decode_op_t.u4_error_code == 0)
+            {
+                ps_op->s_ivd_video_decode_op_t.u4_error_code = e_error;
+            }
+
+            return;
+        }
+    }
+    /**************************************************************************/
+    /* Remove the bytes left till next start code is encountered              */
+    /**************************************************************************/
+    ps_op->s_ivd_video_decode_op_t.u4_error_code  = IV_SUCCESS;
+
+    if(ps_dec->i4_num_cores > 1)
+        ps_op->s_ivd_video_decode_op_t.u4_num_bytes_consumed = ps_dec->i4_bytes_consumed;
+    else
+    {
+        ps_op->s_ivd_video_decode_op_t.u4_num_bytes_consumed = (ps_dec->s_bit_stream.u4_offset + 7) >> 3;
+        ps_op->s_ivd_video_decode_op_t.u4_num_bytes_consumed -= ((size_t)ps_dec->s_bit_stream.pv_bs_buf & 3);
+    }
+    if(ps_op->s_ivd_video_decode_op_t.u4_num_bytes_consumed > ps_ip->s_ivd_video_decode_ip_t.u4_num_Bytes)
+    {
+        ps_op->s_ivd_video_decode_op_t.u4_num_bytes_consumed = ps_ip->s_ivd_video_decode_ip_t.u4_num_Bytes;
+    }
+    ps_op->s_ivd_video_decode_op_t.u4_pic_ht = ps_dec->u2_vertical_size;
+    ps_op->s_ivd_video_decode_op_t.u4_pic_wd = ps_dec->u2_horizontal_size;
+
+        switch(ps_dec->e_pic_type)
+        {
+        case I_PIC :
+            ps_op->s_ivd_video_decode_op_t.e_pic_type = IV_I_FRAME;
+            break;
+
+        case P_PIC:
+            ps_op->s_ivd_video_decode_op_t.e_pic_type = IV_P_FRAME;
+            break;
+
+        case B_PIC:
+            ps_op->s_ivd_video_decode_op_t.e_pic_type = IV_B_FRAME;
+            break;
+
+        case D_PIC:
+            ps_op->s_ivd_video_decode_op_t.e_pic_type = IV_I_FRAME;
+            break;
+
+        default :
+            ps_op->s_ivd_video_decode_op_t.e_pic_type = IV_FRAMETYPE_DEFAULT;
+            break;
+        }
+
+        ps_op->s_ivd_video_decode_op_t.u4_frame_decoded_flag = ps_dec->i4_frame_decoded;
+        ps_op->s_ivd_video_decode_op_t.u4_new_seq = 0;
+        ps_op->s_ivd_video_decode_op_t.u4_error_code = ps_dec->u4_error_code;
+
+
+}

diff --git a/decoder/impeg2d_function_selector_generic.c b/decoder/impeg2d_function_selector_generic.c
new file mode 100644
index 0000000..b8cdf03
--- /dev/null
+++ b/decoder/impeg2d_function_selector_generic.c

@@ -0,0 +1,103 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+/**
+*******************************************************************************
+* @file
+*  impeg2d_function_selector.c
+*
+* @brief
+*  Contains functions to initialize function pointers used in mpeg2
+*
+* @author
+*  Naveen
+*
+* @par List of Functions:
+* @remarks
+*  None
+*
+*******************************************************************************
+*/
+/*****************************************************************************/
+/* File Includes                                                             */
+/*****************************************************************************/
+#include <stdio.h>
+#include <stddef.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "iv_datatypedef.h"
+#include "iv.h"
+#include "ithread.h"
+
+
+#include "impeg2_macros.h"
+#include "impeg2_buf_mgr.h"
+#include "impeg2_disp_mgr.h"
+#include "impeg2_defs.h"
+#include "impeg2_platform_macros.h"
+#include "impeg2_inter_pred.h"
+#include "impeg2_idct.h"
+#include "impeg2_mem_func.h"
+#include "impeg2_format_conv.h"
+#include "impeg2_job_queue.h"
+#include "impeg2_globals.h"
+
+#include "ivd.h"
+#include "impeg2d.h"
+#include "impeg2d_api.h"
+#include "impeg2d_debug.h"
+#include "impeg2d_bitstream.h"
+#include "impeg2d_structs.h"
+#include "impeg2d_mc.h"
+#include "impeg2d_pic_proc.h"
+#include "impeg2d_vld_tables.h"
+#include "impeg2d_vld.h"
+#include "impeg2d_pic_proc.h"
+
+void impeg2d_init_function_ptr_generic(void *pv_codec)
+{
+    dec_state_t *ps_dec = (dec_state_t *)pv_codec;
+
+    ps_dec->pf_idct_recon[0]                   = &impeg2_idct_recon_dc;
+    ps_dec->pf_idct_recon[1]                   = &impeg2_idct_recon_dc_mismatch;
+    ps_dec->pf_idct_recon[2]                   = &impeg2_idct_recon;
+    ps_dec->pf_idct_recon[3]                   = &impeg2_idct_recon;
+
+    ps_dec->pf_mc[0]                              = &impeg2d_mc_fullx_fully;
+    ps_dec->pf_mc[1]                              = &impeg2d_mc_fullx_halfy;
+    ps_dec->pf_mc[2]                              = &impeg2d_mc_halfx_fully;
+    ps_dec->pf_mc[3]                              = &impeg2d_mc_halfx_halfy;
+
+    ps_dec->pf_interpolate                     = &impeg2_interpolate;
+    ps_dec->pf_copy_mb                         = &impeg2_copy_mb;
+
+    ps_dec->pf_fullx_halfy_8x8                 = &impeg2_mc_fullx_halfy_8x8;
+    ps_dec->pf_halfx_fully_8x8                 = &impeg2_mc_halfx_fully_8x8;
+    ps_dec->pf_halfx_halfy_8x8                 = &impeg2_mc_halfx_halfy_8x8;
+    ps_dec->pf_fullx_fully_8x8                 = &impeg2_mc_fullx_fully_8x8;
+
+    ps_dec->pf_memset_8bit_8x8_block           = &impeg2_memset_8bit_8x8_block;
+    ps_dec->pf_memset_16bit_8x8_linear_block   = &impeg2_memset0_16bit_8x8_linear_block;
+
+    ps_dec->pf_copy_yuv420p_buf                = &impeg2_copy_frm_yuv420p;
+    ps_dec->pf_fmt_conv_yuv420p_to_yuv422ile   = &impeg2_fmt_conv_yuv420p_to_yuv422ile;
+    ps_dec->pf_fmt_conv_yuv420p_to_yuv420sp_uv = &impeg2_fmt_conv_yuv420p_to_yuv420sp_uv;
+    ps_dec->pf_fmt_conv_yuv420p_to_yuv420sp_vu = &impeg2_fmt_conv_yuv420p_to_yuv420sp_vu;
+}

diff --git a/decoder/impeg2d_globals.c b/decoder/impeg2d_globals.c
new file mode 100644
index 0000000..8c71ecf
--- /dev/null
+++ b/decoder/impeg2d_globals.c

@@ -0,0 +1,158 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+#include <stdio.h>
+#include "iv_datatypedef.h"
+#include "iv.h"
+
+#include "impeg2_buf_mgr.h"
+#include "impeg2_disp_mgr.h"
+#include "impeg2_defs.h"
+#include "impeg2_platform_macros.h"
+#include "impeg2_inter_pred.h"
+#include "impeg2_idct.h"
+#include "impeg2_globals.h"
+#include "impeg2_mem_func.h"
+#include "impeg2_format_conv.h"
+#include "impeg2_macros.h"
+
+#include "impeg2d.h"
+#include "impeg2d_bitstream.h"
+#include "impeg2d_structs.h"
+#include "impeg2d_globals.h"
+#include "impeg2d_mc.h"
+
+/*****************************************************************************/
+/* MC params table                                                           */
+/*****************************************************************************/
+const mc_type_consts_t gas_impeg2d_mc_params_luma[][2] =
+{
+    /* frame prediction in P frame picture */
+    {{1,0,1,1,MB_SIZE,MB_SIZE,0},
+     {1,0,1,1,MB_SIZE,MB_SIZE,0}},
+    /* field prediction in P frame picture */
+    {{2,0,1,2,MB_SIZE/2,MB_SIZE*2,0},
+     {2,0,1,2,MB_SIZE/2,MB_SIZE*2,1}},
+    /* frame prediction in B frame picture */
+    {{1,0,1,1,MB_SIZE,MB_SIZE,0},
+     {1,0,1,1,MB_SIZE,MB_SIZE,0}},
+    /* field prediction in B frame picture */
+    {{2,0,1,2,MB_SIZE/2,MB_SIZE*2,0},
+     {2,0,1,2,MB_SIZE/2,MB_SIZE*2,1}},
+    /* dual prime prediction in P frame picture */
+    {{2,0,1,2,MB_SIZE/2,MB_SIZE*2,0},
+     {2,0,1,2,MB_SIZE/2,MB_SIZE*2,1}},
+
+    /* field prediction in P field picture */
+    {{1,0,2,2,MB_SIZE,MB_SIZE,0},{1,0,2,2,MB_SIZE,MB_SIZE,0}},
+    /* 16x8 prediction in P field picture */
+    {{1,0,2,2,MB_SIZE/2,MB_SIZE,0},{1,8,2,2,MB_SIZE/2,MB_SIZE,(1*MB_SIZE/2)}},
+    /* field prediction in B field picture */
+    {{1,0,2,2,MB_SIZE,MB_SIZE,0},{1,0,2,2,MB_SIZE,MB_SIZE,0}},
+    /* 16x8 prediction in B field picture */
+    {{1,0,2,2,MB_SIZE/2,MB_SIZE,0},{1,8,2,2,MB_SIZE/2,MB_SIZE,(1*MB_SIZE/2)}},
+    /* dual prime prediction in P field picture */
+    {{1,0,2,2,MB_SIZE,MB_SIZE,0},{1,0,2,2,MB_SIZE,MB_SIZE,0}}
+
+};
+
+const mc_type_consts_t gas_impeg2d_mc_params_chroma[10][2] =
+{
+    /* frame prediction in P frame picture */
+    {{1,0,1,1,MB_CHROMA_SIZE,MB_CHROMA_SIZE,0},{1,0,1,1,MB_CHROMA_SIZE,MB_CHROMA_SIZE,0}},
+    /* field prediction in P frame picture */
+    {{2,0,1,2,MB_CHROMA_SIZE/2,MB_CHROMA_SIZE*2,0},{2,0,1,2,MB_CHROMA_SIZE/2,
+    MB_CHROMA_SIZE*2,1}},
+    /* frame prediction in B frame picture */
+    {{1,0,1,1,MB_CHROMA_SIZE,MB_CHROMA_SIZE,0},{1,0,1,1,MB_CHROMA_SIZE,
+    MB_CHROMA_SIZE,0}},
+    /* field prediction in B frame picture */
+    {{2,0,1,2,MB_CHROMA_SIZE/2,MB_CHROMA_SIZE*2,0},{2,0,1,2,MB_CHROMA_SIZE/2,
+    MB_CHROMA_SIZE*2,1}},
+    /* dual prime prediction in P frame picture */
+    {{2,0,1,2,MB_CHROMA_SIZE/2,MB_CHROMA_SIZE*2,0},{2,0,1,2,MB_CHROMA_SIZE/2,
+    MB_CHROMA_SIZE*2,1}},
+
+    /* field prediction in P field picture */
+    {{1,0,2,2,MB_CHROMA_SIZE,MB_CHROMA_SIZE,0},{1,0,2,2,MB_CHROMA_SIZE,
+    MB_CHROMA_SIZE,0}},
+    /* 16x8 prediction in P field picture */
+    {{1,0,2,2,MB_CHROMA_SIZE/2,MB_CHROMA_SIZE,0},{1,4,2,2,MB_CHROMA_SIZE/2,
+    MB_CHROMA_SIZE,(1*MB_CHROMA_SIZE/2)}},
+    /* field prediction in B field picture */
+    {{1,0,2,2,MB_CHROMA_SIZE,MB_CHROMA_SIZE,0},{1,0,2,2,MB_CHROMA_SIZE,
+    MB_CHROMA_SIZE,0}},
+    /* 16x8 prediction in B field picture */
+    {{1,0,2,2,MB_CHROMA_SIZE/2,MB_CHROMA_SIZE,0},{1,4,2,2,MB_CHROMA_SIZE/2,
+    MB_CHROMA_SIZE,(1*MB_CHROMA_SIZE/2)}},
+    /* dual prime prediction in P field picture */
+    {{1,0,2,2,MB_CHROMA_SIZE,MB_CHROMA_SIZE,0},{1,0,2,2,MB_CHROMA_SIZE,
+    MB_CHROMA_SIZE,0}}
+
+};
+
+/*****************************************************************************/
+/* MC function pointer table                                                 */
+/*****************************************************************************/
+const dec_mb_params_t gas_impeg2d_func_frm_fw_or_bk[4] =
+{
+    /*0MV*/
+    {impeg2d_dec_1mv_mb,MC_FRM_FW_OR_BK_1MV,impeg2d_mc_1mv},
+    /* motion_type Field based */
+    {impeg2d_dec_2mv_fw_or_bk_mb,MC_FRM_FW_OR_BK_2MV,impeg2d_mc_fw_or_bk_mb},
+    /* motion_type Frame based */
+    {impeg2d_dec_1mv_mb,MC_FRM_FW_OR_BK_1MV,impeg2d_mc_1mv},
+    /* motion_type Dual prime based */
+    {impeg2d_dec_frm_dual_prime,MC_FRM_FW_DUAL_PRIME_1MV,impeg2d_mc_frm_dual_prime},
+};
+
+const dec_mb_params_t gas_impeg2d_func_fld_fw_or_bk[4] =
+{
+    /*0MV*/
+    {impeg2d_dec_1mv_mb,MC_FRM_FW_OR_BK_1MV,impeg2d_mc_1mv},
+    /* motion_type Field based */
+    {impeg2d_dec_1mv_mb,MC_FLD_FW_OR_BK_1MV,impeg2d_mc_1mv},
+    /* motion_type 16x8 MC */
+    {impeg2d_dec_2mv_fw_or_bk_mb,MC_FLD_FW_OR_BK_2MV,impeg2d_mc_fw_or_bk_mb},
+    /* motion_type Dual prime based */
+    {impeg2d_dec_fld_dual_prime,MC_FLD_FW_DUAL_PRIME_1MV,impeg2d_mc_fld_dual_prime},
+};
+
+
+const dec_mb_params_t gas_impeg2d_func_frm_bi_direct[4] =
+{
+    {NULL,MC_FRM_FW_OR_BK_1MV,NULL},
+    /* motion_type Field based */
+    {impeg2d_dec_4mv_mb,MC_FRM_FW_AND_BK_4MV,impeg2d_mc_4mv},
+    /* motion_type Frame based */
+    {impeg2d_dec_2mv_interp_mb,MC_FRM_FW_AND_BK_2MV,impeg2d_mc_2mv},
+    /* Reserved not applicable */
+    {NULL,MC_FRM_FW_OR_BK_1MV,NULL},
+};
+
+const dec_mb_params_t gas_impeg2d_func_fld_bi_direct[4] =
+{
+    {NULL,MC_FRM_FW_OR_BK_1MV,NULL},
+    /* motion_type Field based */
+    {impeg2d_dec_2mv_interp_mb,MC_FLD_FW_AND_BK_2MV,impeg2d_mc_2mv},
+    /* motion_type 16x8 MC */
+    {impeg2d_dec_4mv_mb,MC_FLD_FW_AND_BK_4MV,impeg2d_mc_4mv},
+    /* Reserved not applicable */
+    {NULL,MC_FRM_FW_OR_BK_1MV,NULL},
+};

diff --git a/decoder/impeg2d_globals.h b/decoder/impeg2d_globals.h
new file mode 100644
index 0000000..5b9c093
--- /dev/null
+++ b/decoder/impeg2d_globals.h

@@ -0,0 +1,43 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+#ifndef __IMPEG2D_GLOBALS_H__
+#define __IMPEG2D_GLOBALS_H__
+
+typedef struct
+{
+    UWORD16     mvy_cf;
+    UWORD16     mv_num_cf;
+    UWORD16     frm_wd_cf;
+    UWORD16     src_wd_cf;
+    UWORD32      rows;
+    UWORD32      dst_wd;
+    UWORD32      dst_offset_scale;
+}mc_type_consts_t;
+
+extern const mc_type_consts_t gas_impeg2d_mc_params_luma[][2];
+extern const mc_type_consts_t gas_impeg2d_mc_params_chroma[][2];
+
+extern const dec_mb_params_t gas_impeg2d_func_frm_fw_or_bk[];
+extern const dec_mb_params_t gas_impeg2d_func_fld_fw_or_bk[];
+
+extern const dec_mb_params_t gas_impeg2d_func_frm_bi_direct[];
+extern const dec_mb_params_t gas_impeg2d_func_fld_bi_direct[];
+
+#endif /* __IMPEG2D_GLOBALS_H__ */

diff --git a/decoder/impeg2d_i_pic.c b/decoder/impeg2d_i_pic.c
new file mode 100644
index 0000000..1b45350
--- /dev/null
+++ b/decoder/impeg2d_i_pic.c

@@ -0,0 +1,328 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+
+#include "iv_datatypedef.h"
+#include "iv.h"
+
+#include "impeg2_buf_mgr.h"
+#include "impeg2_disp_mgr.h"
+#include "impeg2_defs.h"
+#include "impeg2_platform_macros.h"
+#include "impeg2_inter_pred.h"
+#include "impeg2_idct.h"
+#include "impeg2_globals.h"
+#include "impeg2_mem_func.h"
+#include "impeg2_format_conv.h"
+#include "impeg2_macros.h"
+
+#include "ivd.h"
+#include "impeg2d.h"
+#include "impeg2d_bitstream.h"
+#include "impeg2d_structs.h"
+#include "impeg2d_vld_tables.h"
+#include "impeg2d_vld.h"
+#include "impeg2d_pic_proc.h"
+#include "impeg2d_debug.h"
+#include "impeg2d_globals.h"
+#include "impeg2d_mv_dec.h"
+
+/*******************************************************************************
+*  Function Name   : impeg2d_dec_i_mb_params
+*
+*  Description     : Decoding I MB parameters.
+*
+*  Arguments       :
+*  dec             : Decoder state
+*  stream          : Bitstream
+*
+*  Values Returned : None
+*******************************************************************************/
+void impeg2d_dec_i_mb_params(dec_state_t *ps_dec)
+{
+
+    UWORD16 u2_next_bits;
+    UWORD16 u2_bits_to_flush;
+    stream_t *ps_stream = &ps_dec->s_bit_stream;
+
+    /*-----------------------------------------------------------------------*/
+    /* Flush the MBAddrIncr Bit                                              */
+    /*                                                                       */
+    /* Since we are not supporting scalable modes there won't be skipped     */
+    /* macroblocks in I-Picture and the MBAddrIncr will always be 1,         */
+    /* The MBAddrIncr can never be greater than 1 for the simple and main    */
+    /* profile MPEG2.                                                        */
+    /*-----------------------------------------------------------------------*/
+    if(impeg2d_bit_stream_nxt(ps_stream,1) == 1) //Making sure the increment is one.
+    {
+        impeg2d_bit_stream_flush(ps_stream,1);
+    }
+    else if(ps_dec->u2_first_mb && ps_dec->u2_mb_x)
+    {
+        WORD32 i4_mb_add_inc = impeg2d_get_mb_addr_incr(ps_stream);
+
+            //VOLParams->FirstInSlice = 0;
+            /****************************************************************/
+            /* Section 6.3.17                                               */
+            /* The first MB of a slice cannot be skipped                    */
+            /* But the mb_addr_incr can be > 1, because at the beginning of */
+            /* a slice, it indicates the offset from the last MB in the     */
+            /* previous row. Hence for the first slice in a row, the        */
+            /* mb_addr_incr needs to be 1.                                  */
+            /****************************************************************/
+            /* MB_x is set to zero whenever MB_y changes.                   */
+
+            ps_dec->u2_mb_x = i4_mb_add_inc - 1;
+            ps_dec->u2_mb_x = MIN(ps_dec->u2_mb_x, (ps_dec->u2_num_horiz_mb - 1));
+    }
+
+    /*-----------------------------------------------------------------------*/
+    /* Decode the macroblock_type, dct_type and quantiser_scale_code         */
+    /*                                                                       */
+    /* macroblock_type      2 bits [can be either 1 or 01]                   */
+    /* dct_type             1 bit                                            */
+    /* quantiser_scale_code 5 bits                                           */
+    /*-----------------------------------------------------------------------*/
+    u2_next_bits = impeg2d_bit_stream_nxt(ps_stream,8);
+    if(BIT(u2_next_bits,7) == 1)
+    {
+        /* read the dct_type if needed */
+        u2_bits_to_flush = 1;
+        if(ps_dec->u2_read_dct_type)
+        {
+            u2_bits_to_flush++;
+            ps_dec->u2_field_dct = BIT(u2_next_bits,6);
+        }
+    }
+    else
+    {
+        u2_bits_to_flush = 7;
+        /*------------------------------------------------------------------*/
+        /* read the dct_type if needed                                      */
+        /*------------------------------------------------------------------*/
+        if(ps_dec->u2_read_dct_type)
+        {
+            u2_bits_to_flush++;
+            ps_dec->u2_field_dct = BIT(u2_next_bits,5);
+        }
+        else
+        {
+            u2_next_bits >>= 1;
+        }
+        /*------------------------------------------------------------------*/
+        /* Quant scale code decoding                                        */
+        /*------------------------------------------------------------------*/
+        {
+            UWORD16 quant_scale_code;
+            quant_scale_code = u2_next_bits & 0x1F;
+
+            ps_dec->u1_quant_scale = (ps_dec->u2_q_scale_type) ?
+                gau1_impeg2_non_linear_quant_scale[quant_scale_code] :
+                (quant_scale_code << 1);
+        }
+    }
+    impeg2d_bit_stream_flush(ps_stream,u2_bits_to_flush);
+    /*************************************************************************/
+    /* Decoding of motion vectors if concealment motion vectors are present  */
+    /*************************************************************************/
+    if(ps_dec->u2_concealment_motion_vectors)
+    {
+        if(ps_dec->u2_picture_structure != FRAME_PICTURE)
+            impeg2d_bit_stream_flush(ps_stream,1);
+        impeg2d_dec_mv(ps_stream,ps_dec->ai2_pred_mv[FORW][FIRST],ps_dec->ai2_mv[FORW][FIRST],
+            ps_dec->au2_f_code[FORW],0,0);
+
+        /* Flush the marker bit */
+        if(0 == (impeg2d_bit_stream_get(ps_stream,1)))
+        {
+            /* Ignore marker bit error */
+        }
+
+    }
+    ps_dec->u2_first_mb = 0;
+    return;
+}
+/*******************************************************************************
+*  Function Name   : impeg2d_dec_i_slice
+*
+*  Description     : Decodes I slice
+*
+*  Arguments       :
+*  dec             : Decoder state
+*
+*  Values Returned : None
+*******************************************************************************/
+IMPEG2D_ERROR_CODES_T impeg2d_dec_i_slice(dec_state_t *ps_dec)
+{
+    WORD16 *pi2_vld_out;
+    UWORD32 i;
+    yuv_buf_t *ps_cur_frm_buf = &ps_dec->s_cur_frm_buf;
+
+    UWORD32 u4_frame_width = ps_dec->u2_frame_width;
+    UWORD32 u4_frm_offset = 0;
+    UWORD8  *pu1_out_p;
+    IMPEG2D_ERROR_CODES_T e_error = (IMPEG2D_ERROR_CODES_T)IVD_ERROR_NONE;
+
+
+    pi2_vld_out = ps_dec->ai2_vld_buf;
+
+
+    if(ps_dec->u2_picture_structure != FRAME_PICTURE)
+    {
+        u4_frame_width <<= 1;
+        if(ps_dec->u2_picture_structure == BOTTOM_FIELD)
+        {
+            u4_frm_offset = ps_dec->u2_frame_width;
+        }
+    }
+
+    do
+    {
+        UWORD32 u4_x_offset,u4_y_offset;
+        UWORD32 u4_blk_pos;
+        UWORD32 u4_x_dst_offset = 0;
+        UWORD32 u4_y_dst_offset = 0;
+
+
+        IMPEG2D_TRACE_MB_START(ps_dec->u2_mb_x, ps_dec->u2_mb_y);
+
+        impeg2d_dec_i_mb_params(ps_dec);
+
+        u4_x_dst_offset = u4_frm_offset + (ps_dec->u2_mb_x << 4);
+        u4_y_dst_offset = (ps_dec->u2_mb_y << 4) * u4_frame_width;
+        pu1_out_p = ps_cur_frm_buf->pu1_y + u4_x_dst_offset + u4_y_dst_offset;
+
+        for(i = 0; i < NUM_LUMA_BLKS; ++i)
+        {
+
+            e_error = ps_dec->pf_vld_inv_quant(ps_dec, pi2_vld_out,
+                                            ps_dec->pu1_inv_scan_matrix, 1, Y_LUMA, 0);
+            if ((IMPEG2D_ERROR_CODES_T)IVD_ERROR_NONE != e_error)
+            {
+                return e_error;
+            }
+
+            u4_x_offset = gai2_impeg2_blk_x_off[i];
+
+            if(ps_dec->u2_field_dct == 0)
+                u4_y_offset = gai2_impeg2_blk_y_off_frm[i] ;
+            else
+                u4_y_offset = gai2_impeg2_blk_y_off_fld[i] ;
+
+            u4_blk_pos = u4_y_offset * u4_frame_width + u4_x_offset;
+            IMPEG2D_IDCT_INP_STATISTICS(pi2_vld_out, ps_dec->u4_non_zero_cols, ps_dec->u4_non_zero_rows);
+
+            PROFILE_DISABLE_IDCT_IF0
+            {
+                WORD32 i4_idx;
+                i4_idx = 1;
+                if(1 == (ps_dec->u4_non_zero_cols | ps_dec->u4_non_zero_rows))
+                    i4_idx = 0;
+
+                ps_dec->pf_idct_recon[i4_idx * 2 + ps_dec->i4_last_value_one](pi2_vld_out,
+                                                        ps_dec->ai2_idct_stg1,
+                                                        (UWORD8 *)gau1_impeg2_zerobuf,
+                                                        pu1_out_p + u4_blk_pos,
+                                                        8,
+                                                        8,
+                                                        u4_frame_width << ps_dec->u2_field_dct,
+                                                        ~ps_dec->u4_non_zero_cols, ~ps_dec->u4_non_zero_rows);
+
+            }
+
+        }
+
+        /* For U and V blocks, divide the x and y offsets by 2. */
+        u4_x_dst_offset >>= 1;
+        u4_y_dst_offset >>= 2;
+
+        /* In case of chrominance blocks the DCT will be frame DCT */
+        /* i = 0, U component and */
+
+        e_error = ps_dec->pf_vld_inv_quant(ps_dec, pi2_vld_out,
+                                        ps_dec->pu1_inv_scan_matrix, 1, U_CHROMA, 0);
+        if ((IMPEG2D_ERROR_CODES_T)IVD_ERROR_NONE != e_error)
+        {
+            return e_error;
+        }
+
+        pu1_out_p = ps_cur_frm_buf->pu1_u + u4_x_dst_offset + u4_y_dst_offset;
+        IMPEG2D_IDCT_INP_STATISTICS(pi2_vld_out, ps_dec->u4_non_zero_cols, ps_dec->u4_non_zero_rows);
+        PROFILE_DISABLE_IDCT_IF0
+        {
+            WORD32 i4_idx;
+            i4_idx = 1;
+            if(1 == (ps_dec->u4_non_zero_cols | ps_dec->u4_non_zero_rows))
+                i4_idx = 0;
+
+            ps_dec->pf_idct_recon[i4_idx * 2 + ps_dec->i4_last_value_one](pi2_vld_out,
+                                                    ps_dec->ai2_idct_stg1,
+                                                    (UWORD8 *)gau1_impeg2_zerobuf,
+                                                    pu1_out_p,
+                                                    8,
+                                                    8,
+                                                    u4_frame_width >> 1,
+                                                    ~ps_dec->u4_non_zero_cols, ~ps_dec->u4_non_zero_rows);
+
+        }
+        /* Write the idct_out block to the current frame dec->curFrame*/
+        /* In case of field DCT type, write to alternate lines */
+        e_error = ps_dec->pf_vld_inv_quant(ps_dec, pi2_vld_out,
+                                        ps_dec->pu1_inv_scan_matrix, 1, V_CHROMA, 0);
+        if ((IMPEG2D_ERROR_CODES_T)IVD_ERROR_NONE != e_error)
+        {
+            return e_error;
+        }
+
+        pu1_out_p = ps_cur_frm_buf->pu1_v + u4_x_dst_offset + u4_y_dst_offset;
+        IMPEG2D_IDCT_INP_STATISTICS(pi2_vld_out, ps_dec->u4_non_zero_cols, ps_dec->u4_non_zero_rows);
+        PROFILE_DISABLE_IDCT_IF0
+        {
+            WORD32 i4_idx;
+            i4_idx = 1;
+            if(1 == (ps_dec->u4_non_zero_cols | ps_dec->u4_non_zero_rows))
+                i4_idx = 0;
+            ps_dec->pf_idct_recon[i4_idx * 2 + ps_dec->i4_last_value_one](pi2_vld_out,
+                                                    ps_dec->ai2_idct_stg1,
+                                                    (UWORD8 *)gau1_impeg2_zerobuf,
+                                                    pu1_out_p,
+                                                    8,
+                                                    8,
+                                                    u4_frame_width >> 1,
+                                                    ~ps_dec->u4_non_zero_cols, ~ps_dec->u4_non_zero_rows);
+        }
+        ps_dec->u2_num_mbs_left--;
+
+
+        ps_dec->u2_mb_x++;
+
+        if(ps_dec->s_bit_stream.u4_offset > ps_dec->s_bit_stream.u4_max_offset)
+        {
+            return IMPEG2D_BITSTREAM_BUFF_EXCEEDED_ERR;
+        }
+        else if ((ps_dec->u2_mb_x == ps_dec->u2_num_horiz_mb) && ((ps_dec->s_bit_stream.u4_offset + START_CODE_PREFIX_LEN) < ps_dec->s_bit_stream.u4_max_offset))
+        {
+            ps_dec->u2_mb_x = 0;
+            ps_dec->u2_mb_y++;
+        }
+
+    }
+    while(ps_dec->u2_num_mbs_left != 0 && impeg2d_bit_stream_nxt(&ps_dec->s_bit_stream,23) != 0x0);
+    return e_error;
+}

diff --git a/decoder/impeg2d_mc.c b/decoder/impeg2d_mc.c
new file mode 100644
index 0000000..da13a8c
--- /dev/null
+++ b/decoder/impeg2d_mc.c

@@ -0,0 +1,1373 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+/**
+*******************************************************************************
+* @file
+*  impeg2d_mc.c
+*
+* @brief
+*  Contains MC function definitions for MPEG2 decoder
+*
+* @author
+*  Harish
+*
+* @par List of Functions:
+* - impeg2d_motion_comp()
+* - impeg2d_motion_comp_recon_buf()
+* - impeg2d_mc_1mv()
+* - impeg2d_mc_fw_or_bk_mb()
+* - impeg2d_mc_frm_dual_prime()
+* - impeg2d_mc_fld_dual_prime()
+* - impeg2d_mc_4mv()
+* - impeg2d_mc_2mv()
+* - impeg2d_dec_intra_mb()
+* - impeg2d_dec_skip_p_mb()
+* - impeg2d_dec_skip_b_mb()
+* - impeg2d_dec_skip_mbs()
+* - impeg2d_dec_0mv_coded_mb()
+* - impeg2d_mc_halfx_halfy()
+* - impeg2d_mc_halfx_fully()
+* - impeg2d_mc_fullx_halfy()
+* - impeg2d_mc_fullx_fully()
+* - impeg2d_set_mc_params()
+*
+* @remarks
+*  None
+*
+*******************************************************************************
+*/
+#include <string.h>
+
+#include "iv_datatypedef.h"
+#include "iv.h"
+
+#include "impeg2_buf_mgr.h"
+#include "impeg2_disp_mgr.h"
+#include "impeg2_defs.h"
+#include "impeg2_platform_macros.h"
+#include "impeg2_inter_pred.h"
+#include "impeg2_idct.h"
+#include "impeg2_globals.h"
+#include "impeg2_mem_func.h"
+#include "impeg2_format_conv.h"
+#include "impeg2_macros.h"
+
+#include "ivd.h"
+#include "impeg2d.h"
+#include "impeg2d_bitstream.h"
+#include "impeg2d_structs.h"
+#include "impeg2d_globals.h"
+#include "impeg2d_pic_proc.h"
+#include "impeg2d_debug.h"
+#include "impeg2d_mv_dec.h"
+#include "impeg2d_mc.h"
+
+/*****************************************************************************/
+/*                                                                           */
+/*  Function Name : impeg2d_motion_comp                                      */
+/*                                                                           */
+/*  Description   : Perform motion compensation and store the resulting block*/
+/*                  in the buf                                               */
+/*                                                                           */
+/*  Inputs        : params - Parameters required to do motion compensation   */
+/*                                                                           */
+/*  Globals       :                                                          */
+/*                                                                           */
+/*  Processing    : Calls appropriate functions depending on the mode of     */
+/*                  compensation                                             */
+/*                                                                           */
+/*  Outputs       : buf       - Buffer for the motion compensation result    */
+/*                                                                           */
+/*  Returns       : None                                                     */
+/*                                                                           */
+/*  Issues        : None                                                     */
+/*                                                                           */
+/*  Revision History:                                                        */
+/*                                                                           */
+/*         DD MM YYYY   Author(s)       Changes                              */
+/*         14 09 2005   Hairsh M        First Version                        */
+/*                                                                           */
+/*****************************************************************************/
+void impeg2d_motion_comp(dec_state_t *ps_dec, mb_mc_params_t *ps_params,yuv_buf_t *ps_buf)
+{
+
+    PROFILE_DISABLE_MC_RETURN;
+
+    /* Perform motion compensation for Y */
+    ps_dec->pf_mc[ps_params->s_luma.u4_mode]((void *)ps_dec, ps_params->s_ref.pu1_y + ps_params->s_luma.u4_src_offset,
+                ps_params->s_luma.u4_src_wd,
+                ps_buf->pu1_y + ps_params->s_luma.u4_dst_offset_res_buf,
+                ps_params->s_luma.u4_dst_wd_res_buf,
+                ps_params->s_luma.u4_cols,
+                ps_params->s_luma.u4_rows);
+    /* Perform motion compensation for U */
+    ps_dec->pf_mc[ps_params->s_chroma.u4_mode]((void *)ps_dec, ps_params->s_ref.pu1_u + ps_params->s_chroma.u4_src_offset,
+                ps_params->s_chroma.u4_src_wd,
+                ps_buf->pu1_u + ps_params->s_chroma.u4_dst_offset_res_buf,
+                ps_params->s_chroma.u4_dst_wd_res_buf,
+                ps_params->s_chroma.u4_cols,
+                ps_params->s_chroma.u4_rows);
+
+    /* Perform motion compensation for V */
+    ps_dec->pf_mc[ps_params->s_chroma.u4_mode]((void *)ps_dec, ps_params->s_ref.pu1_v + ps_params->s_chroma.u4_src_offset,
+                ps_params->s_chroma.u4_src_wd,
+                ps_buf->pu1_v + ps_params->s_chroma.u4_dst_offset_res_buf,
+                ps_params->s_chroma.u4_dst_wd_res_buf,
+                ps_params->s_chroma.u4_cols,
+                ps_params->s_chroma.u4_rows);
+}
+
+
+
+/*****************************************************************************/
+/*                                                                           */
+/*  Function Name : impeg2d_motion_comp_recon_buf                          */
+/*                                                                           */
+/*  Description   : Perform motion compensation and store the resulting block*/
+/*                  in the buf                                               */
+/*                                                                           */
+/*  Inputs        : params - Parameters required to do motion compensation   */
+/*                                                                           */
+/*  Globals       :                                                          */
+/*                                                                           */
+/*  Processing    : Calls appropriate functions depending on the mode of     */
+/*                  compensation                                             */
+/*                                                                           */
+/*  Outputs       : buf       - Buffer for the motion compensation result    */
+/*                                                                           */
+/*  Returns       : None                                                     */
+/*                                                                           */
+/*  Issues        : None                                                     */
+/*                                                                           */
+/*  Revision History:                                                        */
+/*                                                                           */
+/*         DD MM YYYY   Author(s)       Changes                              */
+/*         14 09 2005   Harish M        First Version                        */
+/*                                                                           */
+/*****************************************************************************/
+void impeg2d_motion_comp_recon_buf(dec_state_t *ps_dec,
+                                     mb_mc_params_t *ps_params,
+                                     yuv_buf_t *ps_dest_buf)
+{
+
+    PROFILE_DISABLE_MC_RETURN;
+
+    /* Perform motion compensation for Y */
+    ps_dec->pf_mc[ps_params->s_luma.u4_mode](ps_dec, ps_params->s_ref.pu1_y + ps_params->s_luma.u4_src_offset,
+                                        ps_params->s_luma.u4_src_wd,
+                                        ps_dest_buf->pu1_y + ps_params->s_luma.u4_dst_offset_cur_frm,
+                                        ps_params->s_luma.u4_dst_wd_cur_frm,
+                                        ps_params->s_luma.u4_cols,
+                                        ps_params->s_luma.u4_rows);
+
+    /* Perform motion compensation for U */
+
+    ps_dec->pf_mc[ps_params->s_chroma.u4_mode](ps_dec, ps_params->s_ref.pu1_u + ps_params->s_chroma.u4_src_offset,
+                                        ps_params->s_chroma.u4_src_wd,
+                                        ps_dest_buf->pu1_u + ps_params->s_chroma.u4_dst_offset_cur_frm,
+                                        ps_params->s_chroma.u4_dst_wd_cur_frm,
+                                        ps_params->s_chroma.u4_cols,
+                                        ps_params->s_chroma.u4_rows);
+
+    /* Perform motion compensation for V */
+    ps_dec->pf_mc[ps_params->s_chroma.u4_mode](ps_dec, ps_params->s_ref.pu1_v + ps_params->s_chroma.u4_src_offset,
+                                        ps_params->s_chroma.u4_src_wd,
+                                        ps_dest_buf->pu1_v + ps_params->s_chroma.u4_dst_offset_cur_frm,
+                                        ps_params->s_chroma.u4_dst_wd_cur_frm,
+                                        ps_params->s_chroma.u4_cols,
+                                        ps_params->s_chroma.u4_rows);
+}
+
+
+
+/*****************************************************************************/
+/*                                                                           */
+/*  Function Name : impeg2d_mc_1mv                                           */
+/*                                                                           */
+/*  Description   : Perform motion compensation and store the resulting block*/
+/*                  in the buf                                               */
+/*                                                                           */
+/*  Inputs        : params - Parameters required to do motion compensation   */
+/*                                                                           */
+/*  Globals       :                                                          */
+/*                                                                           */
+/*  Processing    : Calls appropriate functions depending on the mode of     */
+/*                  compensation                                             */
+/*                                                                           */
+/*  Outputs       : buf       - Buffer for the motion compensation result    */
+/*                                                                           */
+/*  Returns       : None                                                     */
+/*                                                                           */
+/*  Issues        : None                                                     */
+/*                                                                           */
+/*  Revision History:                                                        */
+/*                                                                           */
+/*         DD MM YYYY   Author(s)       Changes                              */
+/*         14 09 2005   Hairsh M        First Version                        */
+/*                                                                           */
+/*****************************************************************************/
+void impeg2d_mc_1mv(dec_state_t *ps_dec)
+{
+
+    impeg2d_motion_comp_recon_buf(ps_dec, &ps_dec->as_mb_mc_params[ps_dec->e_mb_pred][FIRST], &ps_dec->s_dest_buf);
+}
+
+
+
+/*****************************************************************************/
+/*                                                                           */
+/*  Function Name : impeg2d_mc_fw_or_bk_mb                                   */
+/*                                                                           */
+/*  Description   : Perform motion compensation and store the resulting block*/
+/*                  in the buf                                               */
+/*                                                                           */
+/*  Inputs        : params - Parameters required to do motion compensation   */
+/*                                                                           */
+/*  Globals       :                                                          */
+/*                                                                           */
+/*  Processing    : Calls appropriate functions depending on the mode of     */
+/*                  compensation                                             */
+/*                                                                           */
+/*  Outputs       : buf       - Buffer for the motion compensation result    */
+/*                                                                           */
+/*  Returns       : None                                                     */
+/*                                                                           */
+/*  Issues        : None                                                     */
+/*                                                                           */
+/*  Revision History:                                                        */
+/*                                                                           */
+/*         DD MM YYYY   Author(s)       Changes                              */
+/*         14 09 2005   Hairsh M        First Version                        */
+/*                                                                           */
+/*****************************************************************************/
+void impeg2d_mc_fw_or_bk_mb(dec_state_t *ps_dec)
+{
+    impeg2d_motion_comp_recon_buf(ps_dec, &ps_dec->as_mb_mc_params[FORW][FIRST], &ps_dec->s_dest_buf);
+    impeg2d_motion_comp_recon_buf(ps_dec, &ps_dec->as_mb_mc_params[FORW][SECOND], &ps_dec->s_dest_buf);
+}
+
+
+
+/*****************************************************************************/
+/*                                                                           */
+/*  Function Name : impeg2d_mc_frm_dual_prime                                */
+/*                                                                           */
+/*  Description   : Perform motion compensation and store the resulting block*/
+/*                  in the buf                                               */
+/*                                                                           */
+/*  Inputs        : params - Parameters required to do motion compensation   */
+/*                                                                           */
+/*  Globals       :                                                          */
+/*                                                                           */
+/*  Processing    : Calls appropriate functions depending on the mode of     */
+/*                  compensation                                             */
+/*                                                                           */
+/*  Outputs       : buf       - Buffer for the motion compensation result    */
+/*                                                                           */
+/*  Returns       : None                                                     */
+/*                                                                           */
+/*  Issues        : None                                                     */
+/*                                                                           */
+/*  Revision History:                                                        */
+/*                                                                           */
+/*         DD MM YYYY   Author(s)       Changes                              */
+/*         14 09 2005   Hairsh M        First Version                        */
+/*                                                                           */
+/*****************************************************************************/
+void impeg2d_mc_frm_dual_prime(dec_state_t *ps_dec)
+{
+    /************************************************************************/
+    /* Perform Motion Compensation                                          */
+    /************************************************************************/
+    impeg2d_motion_comp(ps_dec, &ps_dec->as_mb_mc_params[FORW][FIRST], &ps_dec->s_mc_fw_buf);
+    impeg2d_motion_comp(ps_dec, &ps_dec->as_mb_mc_params[BACK][FIRST], &ps_dec->s_mc_bk_buf);
+
+    impeg2d_motion_comp(ps_dec, &ps_dec->as_mb_mc_params[FORW][SECOND], &ps_dec->s_mc_fw_buf);
+    impeg2d_motion_comp(ps_dec, &ps_dec->as_mb_mc_params[BACK][SECOND], &ps_dec->s_mc_bk_buf);
+
+
+
+    ps_dec->pf_interpolate(&ps_dec->s_mc_fw_buf,&ps_dec->s_mc_bk_buf,&ps_dec->s_dest_buf,ps_dec->u2_picture_width);
+}
+
+
+
+/*****************************************************************************/
+/*                                                                           */
+/*  Function Name : impeg2d_mc_fld_dual_prime                                */
+/*                                                                           */
+/*  Description   : Perform motion compensation and store the resulting block*/
+/*                  in the buf                                               */
+/*                                                                           */
+/*  Inputs        : params - Parameters required to do motion compensation   */
+/*                                                                           */
+/*  Globals       :                                                          */
+/*                                                                           */
+/*  Processing    : Calls appropriate functions depending on the mode of     */
+/*                  compensation                                             */
+/*                                                                           */
+/*  Outputs       : buf       - Buffer for the motion compensation result    */
+/*                                                                           */
+/*  Returns       : None                                                     */
+/*                                                                           */
+/*  Issues        : None                                                     */
+/*                                                                           */
+/*  Revision History:                                                        */
+/*                                                                           */
+/*         DD MM YYYY   Author(s)       Changes                              */
+/*         14 09 2005   Hairsh M        First Version                        */
+/*                                                                           */
+/*****************************************************************************/
+void impeg2d_mc_fld_dual_prime(dec_state_t *ps_dec)
+{
+    /************************************************************************/
+    /* Perform Motion Compensation                                          */
+    /************************************************************************/
+    impeg2d_motion_comp(ps_dec, &ps_dec->as_mb_mc_params[FORW][FIRST], &ps_dec->s_mc_fw_buf);
+    impeg2d_motion_comp(ps_dec, &ps_dec->as_mb_mc_params[FORW][SECOND], &ps_dec->s_mc_bk_buf);
+
+
+    ps_dec->pf_interpolate(&ps_dec->s_mc_fw_buf,&ps_dec->s_mc_bk_buf,&ps_dec->s_dest_buf,ps_dec->u2_picture_width);
+}
+
+
+
+
+
+/*****************************************************************************/
+/*                                                                           */
+/*  Function Name : impeg2d_mc_4mv                                      */
+/*                                                                           */
+/*  Description   : Perform motion compensation and store the resulting block*/
+/*                  in the buf                                               */
+/*                                                                           */
+/*  Inputs        : params - Parameters required to do motion compensation   */
+/*                                                                           */
+/*  Globals       :                                                          */
+/*                                                                           */
+/*  Processing    : Calls appropriate functions depending on the mode of     */
+/*                  compensation                                             */
+/*                                                                           */
+/*  Outputs       : buf       - Buffer for the motion compensation result    */
+/*                                                                           */
+/*  Returns       : None                                                     */
+/*                                                                           */
+/*  Issues        : None                                                     */
+/*                                                                           */
+/*  Revision History:                                                        */
+/*                                                                           */
+/*         DD MM YYYY   Author(s)       Changes                              */
+/*         14 09 2005   Hairsh M        First Version                        */
+/*                                                                           */
+/*****************************************************************************/
+void impeg2d_mc_4mv(dec_state_t *ps_dec)
+{
+    /************************************************************************/
+    /* Perform Motion Compensation                                          */
+    /************************************************************************/
+    impeg2d_motion_comp(ps_dec, &ps_dec->as_mb_mc_params[FORW][FIRST], &ps_dec->s_mc_fw_buf);
+    impeg2d_motion_comp(ps_dec, &ps_dec->as_mb_mc_params[BACK][FIRST], &ps_dec->s_mc_bk_buf);
+    impeg2d_motion_comp(ps_dec, &ps_dec->as_mb_mc_params[FORW][SECOND], &ps_dec->s_mc_fw_buf);
+    impeg2d_motion_comp(ps_dec, &ps_dec->as_mb_mc_params[BACK][SECOND], &ps_dec->s_mc_bk_buf);
+
+    ps_dec->pf_interpolate(&ps_dec->s_mc_fw_buf,&ps_dec->s_mc_bk_buf,&ps_dec->s_dest_buf,ps_dec->u2_picture_width);
+}
+
+/*****************************************************************************/
+/*                                                                           */
+/*  Function Name : impeg2d_mc_2mv                                         */
+/*                                                                           */
+/*  Description   : Perform motion compensation and store the resulting block*/
+/*                  in the buf                                               */
+/*                                                                           */
+/*  Inputs        : params - Parameters required to do motion compensation   */
+/*                                                                           */
+/*  Globals       :                                                          */
+/*                                                                           */
+/*  Processing    : Calls appropriate functions depending on the mode of     */
+/*                  compensation                                             */
+/*                                                                           */
+/*  Outputs       : buf       - Buffer for the motion compensation result    */
+/*                                                                           */
+/*  Returns       : None                                                     */
+/*                                                                           */
+/*  Issues        : None                                                     */
+/*                                                                           */
+/*  Revision History:                                                        */
+/*                                                                           */
+/*         DD MM YYYY   Author(s)       Changes                              */
+/*         14 09 2005   Hairsh M        First Version                        */
+/*                                                                           */
+/*****************************************************************************/
+void impeg2d_mc_2mv(dec_state_t *ps_dec)
+{
+   /************************************************************************/
+    /* Perform Motion Compensation                                          */
+    /************************************************************************/
+    impeg2d_motion_comp(ps_dec, &ps_dec->as_mb_mc_params[FORW][FIRST], &ps_dec->s_mc_fw_buf);
+    impeg2d_motion_comp(ps_dec, &ps_dec->as_mb_mc_params[BACK][FIRST], &ps_dec->s_mc_bk_buf);
+
+    ps_dec->pf_interpolate(&ps_dec->s_mc_fw_buf,&ps_dec->s_mc_bk_buf,&ps_dec->s_dest_buf,ps_dec->u2_picture_width);
+}
+
+/*****************************************************************************
+*  Function Name   : impeg2d_dec_intra_mb
+*
+*  Description     : Performs decoding of Intra MB
+*
+*  Arguments       :
+*  dec             : Decoder state
+*
+*  Values Returned : None
+*****************************************************************************/
+void impeg2d_dec_intra_mb(dec_state_t *ps_dec)
+{
+
+    ps_dec->u2_cbp = 0x3F;
+    if(ps_dec->u2_concealment_motion_vectors)
+    {
+
+        stream_t *ps_stream;
+
+        ps_stream = &ps_dec->s_bit_stream;
+        /* Decode the concealment motion vector */
+        impeg2d_dec_mv(ps_stream,ps_dec->ai2_pred_mv[FORW][FIRST],ps_dec->ai2_mv[FORW][FIRST],
+        ps_dec->au2_f_code[FORW],0,ps_dec->u2_fld_pic);
+
+
+        /* Set the second motion vector predictor */
+        ps_dec->ai2_pred_mv[FORW][SECOND][MV_X] = ps_dec->ai2_pred_mv[FORW][FIRST][MV_X];
+        ps_dec->ai2_pred_mv[FORW][SECOND][MV_Y] = ps_dec->ai2_pred_mv[FORW][FIRST][MV_Y];
+
+        /* Flush the marker bit */
+        if(0 == (impeg2d_bit_stream_get(ps_stream,1)))
+        {
+            /* Ignore marker bit error */
+        }
+    }
+    else
+    {
+        /* Reset the motion vector predictors */
+        memset(ps_dec->ai2_pred_mv,0,sizeof(ps_dec->ai2_pred_mv));
+    }
+}
+
+/*****************************************************************************
+*  Function Name   : impeg2d_dec_skip_p_mb
+*
+*  Description     : Performs decoding needed for Skipped MB encountered in
+*                    P Pictures and B Pictures with previous MB not bi-predicted
+*
+*  Arguments       :
+*  dec             : Decoder state
+*
+*  Values Returned : None
+*****************************************************************************/
+void impeg2d_dec_skip_p_mb(dec_state_t *ps_dec, WORD32 u4_num_of_mbs)
+{
+    WORD16  *pi2_mv;
+
+    e_mb_type_t e_mb_type;
+    mb_mc_params_t *ps_mc;
+
+
+    WORD32 i4_iter;
+    UWORD32 u4_dst_wd;
+    UWORD32  u4_dst_offset_x;
+    UWORD32  u4_dst_offset_y;
+    UWORD32 u4_frm_offset = 0;
+    yuv_buf_t s_dst;
+
+    u4_dst_wd = ps_dec->u2_frame_width;
+
+    if(ps_dec->u2_picture_structure != FRAME_PICTURE)
+    {
+        u4_dst_wd <<= 1;
+        if(ps_dec->u2_picture_structure == BOTTOM_FIELD)
+        {
+            u4_frm_offset = ps_dec->u2_frame_width;
+        }
+    }
+
+    for (i4_iter = u4_num_of_mbs; i4_iter > 0; i4_iter--)
+    {
+        if(ps_dec->u2_picture_structure == FRAME_PICTURE)
+        {
+            e_mb_type = MC_FRM_FW_AND_BK_2MV;
+        }
+        else
+        {
+            e_mb_type = MC_FLD_FW_AND_BK_2MV;
+        }
+
+        ps_dec->u2_prev_intra_mb = 0;
+        pi2_mv               = (WORD16 *)&(ps_dec->ai2_mv[FORW][FIRST]);
+
+        /* Reset the motion vector predictors */
+        if(ps_dec->e_pic_type == P_PIC)
+        {
+            memset(ps_dec->ai2_pred_mv,0,sizeof(ps_dec->ai2_pred_mv));
+            pi2_mv[MV_X]    = pi2_mv[MV_Y] = 0;
+
+            ps_dec->u2_cbp     = 0;
+
+            pi2_mv           = (WORD16 *)&ps_dec->ai2_mv[FORW][FIRST];
+            ps_mc           = &ps_dec->as_mb_mc_params[FORW][FIRST];
+            ps_mc->s_ref      = ps_dec->as_ref_buf[ps_dec->e_mb_pred][ps_dec->u2_fld_parity];
+
+            impeg2d_set_mc_params(&ps_mc->s_luma, &ps_mc->s_chroma, e_mb_type, 0,
+                      pi2_mv, ps_dec->u2_mb_x, ps_dec->u2_mb_y, ps_dec->u2_frame_width, ps_dec->u2_frame_height,ps_dec->u2_picture_width);
+
+
+            u4_dst_offset_x = (ps_dec->u2_mb_x << 4) + u4_frm_offset;
+            u4_dst_offset_y = (ps_dec->u2_mb_y << 4) * u4_dst_wd;
+
+            s_dst.pu1_y = ps_dec->s_cur_frm_buf.pu1_y + u4_dst_offset_x + u4_dst_offset_y;
+
+            u4_dst_offset_x = u4_dst_offset_x >> 1;
+            u4_dst_offset_y = u4_dst_offset_y >> 2;
+
+            s_dst.pu1_u = ps_dec->s_cur_frm_buf.pu1_u + u4_dst_offset_x + u4_dst_offset_y;
+            s_dst.pu1_v = ps_dec->s_cur_frm_buf.pu1_v + u4_dst_offset_x + u4_dst_offset_y;
+
+
+            ps_mc->s_ref.pu1_y += ps_mc->s_luma.u4_src_offset;
+            ps_mc->s_ref.pu1_u += ps_mc->s_chroma.u4_src_offset;
+            ps_mc->s_ref.pu1_v += ps_mc->s_chroma.u4_src_offset;
+
+            ps_dec->pf_copy_mb(&ps_mc->s_ref, &s_dst, ps_mc->s_luma.u4_src_wd, u4_dst_wd);
+        }
+
+        else
+        {
+            pi2_mv[MV_X]    = ps_dec->ai2_pred_mv[ps_dec->e_mb_pred][FIRST][MV_X];
+            pi2_mv[MV_Y]    = ps_dec->ai2_pred_mv[ps_dec->e_mb_pred][FIRST][MV_Y];
+
+            ps_dec->u2_cbp     = 0;
+
+            pi2_mv           = (WORD16 *)&ps_dec->ai2_mv[FORW][FIRST];
+            ps_mc           = &ps_dec->as_mb_mc_params[FORW][FIRST];
+            ps_mc->s_ref      = ps_dec->as_ref_buf[ps_dec->e_mb_pred][ps_dec->u2_fld_parity];
+
+            impeg2d_set_mc_params(&ps_mc->s_luma, &ps_mc->s_chroma, e_mb_type, 0,
+                      pi2_mv, ps_dec->u2_mb_x, ps_dec->u2_mb_y, ps_dec->u2_frame_width, ps_dec->u2_frame_height,ps_dec->u2_picture_width);
+
+            u4_dst_offset_x = (ps_dec->u2_mb_x << 4) + u4_frm_offset;
+            u4_dst_offset_y = (ps_dec->u2_mb_y << 4) * u4_dst_wd;
+
+            ps_mc->s_luma.u4_dst_offset_res_buf = u4_dst_offset_x + u4_dst_offset_y;
+            ps_mc->s_luma.u4_dst_wd_res_buf = u4_dst_wd;
+
+            u4_dst_offset_x = u4_dst_offset_x >> 1;
+            u4_dst_offset_y = u4_dst_offset_y >> 2;
+
+            ps_mc->s_chroma.u4_dst_offset_res_buf = u4_dst_offset_x + u4_dst_offset_y;
+            ps_mc->s_chroma.u4_dst_wd_res_buf = u4_dst_wd >> 1;
+
+            impeg2d_motion_comp(ps_dec, ps_mc, &ps_dec->s_cur_frm_buf);
+        }
+
+
+        /********************************************************************/
+        /* Common MB processing tasks                                       */
+        /********************************************************************/
+        ps_dec->u2_mb_x++;
+        ps_dec->u2_num_mbs_left--;
+
+        if ((ps_dec->u2_mb_x == ps_dec->u2_num_horiz_mb) && ((ps_dec->s_bit_stream.u4_offset + START_CODE_PREFIX_LEN) < ps_dec->s_bit_stream.u4_max_offset))
+        {
+            ps_dec->u2_mb_x = 0;
+            ps_dec->u2_mb_y++;
+        }
+    }
+
+}
+
+/*******************************************************************************
+*  Function Name   : impeg2d_dec_skip_b_mb
+*
+*  Description     : Performs processing needed for Skipped MB encountered in
+*                    B Pictures with previous MB bi-predicted.
+*
+*  Arguments       :
+*  dec             : Decoder state
+*
+*  Values Returned : None
+*******************************************************************************/
+void impeg2d_dec_skip_b_mb(dec_state_t *ps_dec, WORD32 u4_num_of_mbs)
+{
+
+
+    WORD16  *pi2_mv;
+
+    UWORD32 i;
+    e_mb_type_t e_mb_type;
+    mb_mc_params_t *ps_mc;
+
+    WORD32 i4_iter;
+    UWORD32 u4_dst_wd;
+    yuv_buf_t s_dst;
+    UWORD32  u4_dst_offset_x;
+    UWORD32  u4_dst_offset_y;
+    UWORD32 u4_frm_offset = 0;
+
+    u4_dst_wd = ps_dec->u2_frame_width;
+    s_dst = ps_dec->s_cur_frm_buf;
+
+    if(ps_dec->u2_picture_structure != FRAME_PICTURE)
+    {
+        u4_dst_wd <<= 1;
+        if(ps_dec->u2_picture_structure == BOTTOM_FIELD)
+        {
+            u4_frm_offset = ps_dec->u2_frame_width;
+        }
+    }
+
+    for (i4_iter = u4_num_of_mbs; i4_iter > 0; i4_iter--)
+    {
+        ps_dec->u2_prev_intra_mb = 0;
+
+        if(ps_dec->u2_picture_structure == FRAME_PICTURE)
+        {
+            e_mb_type = MC_FRM_FW_AND_BK_2MV;
+        }
+        else
+        {
+            e_mb_type = MC_FLD_FW_AND_BK_2MV;
+        }
+
+        /************************************************************************/
+        /* Setting of first motion vector for B MB                              */
+        /************************************************************************/
+        pi2_mv               = (WORD16 *)&(ps_dec->ai2_mv[FORW][FIRST]);
+        {
+            pi2_mv[MV_X]         = ps_dec->ai2_pred_mv[FORW][FIRST][MV_X];
+            pi2_mv[MV_Y]         = ps_dec->ai2_pred_mv[FORW][FIRST][MV_Y];
+        }
+        /************************************************************************/
+        /* Setting of second motion vector for B MB                             */
+        /************************************************************************/
+        pi2_mv               = (WORD16 *)&(ps_dec->ai2_mv[BACK][FIRST]);
+        {
+            pi2_mv[MV_X]         = ps_dec->ai2_pred_mv[BACK][FIRST][MV_X];
+            pi2_mv[MV_Y]         = ps_dec->ai2_pred_mv[BACK][FIRST][MV_Y];
+        }
+        ps_dec->u2_cbp  = 0;
+
+        for(i = 0; i < 2; i++)
+        {
+            pi2_mv          = (WORD16 *)&ps_dec->ai2_mv[i][FIRST];
+            ps_mc          = &ps_dec->as_mb_mc_params[i][FIRST];
+            ps_mc->s_ref     = ps_dec->as_ref_buf[i][ps_dec->u2_fld_parity];
+
+            impeg2d_set_mc_params(&ps_mc->s_luma, &ps_mc->s_chroma, e_mb_type, 0, pi2_mv, ps_dec->u2_mb_x,
+                          ps_dec->u2_mb_y, ps_dec->u2_frame_width, ps_dec->u2_frame_height,ps_dec->u2_picture_width);
+        }
+
+        impeg2d_motion_comp(ps_dec, &ps_dec->as_mb_mc_params[FORW][FIRST], &ps_dec->s_mc_fw_buf);
+        impeg2d_motion_comp(ps_dec, &ps_dec->as_mb_mc_params[BACK][FIRST], &ps_dec->s_mc_bk_buf);
+
+        u4_dst_offset_x = (ps_dec->u2_mb_x << 4) + u4_frm_offset;
+        u4_dst_offset_y = (ps_dec->u2_mb_y << 4) * u4_dst_wd;
+
+        s_dst.pu1_y = ps_dec->s_cur_frm_buf.pu1_y + u4_dst_offset_x + u4_dst_offset_y;
+
+        u4_dst_offset_x = u4_dst_offset_x >> 1;
+        u4_dst_offset_y = u4_dst_offset_y >> 2;
+
+        s_dst.pu1_u = ps_dec->s_cur_frm_buf.pu1_u + u4_dst_offset_x + u4_dst_offset_y;
+        s_dst.pu1_v = ps_dec->s_cur_frm_buf.pu1_v + u4_dst_offset_x + u4_dst_offset_y;
+
+        ps_dec->pf_interpolate(&ps_dec->s_mc_fw_buf,&ps_dec->s_mc_bk_buf,&s_dst, u4_dst_wd);
+//        dec->pf_copy_mb(&dec->mc_buf, &dst, MB_SIZE, dst_wd);
+
+        /********************************************************************/
+        /* Common MB processing tasks                                       */
+        /********************************************************************/
+        ps_dec->u2_mb_x++;
+        ps_dec->u2_num_mbs_left--;
+
+        if ((ps_dec->u2_mb_x == ps_dec->u2_num_horiz_mb) && ((ps_dec->s_bit_stream.u4_offset + START_CODE_PREFIX_LEN) < ps_dec->s_bit_stream.u4_max_offset))
+        {
+            ps_dec->u2_mb_x = 0;
+            ps_dec->u2_mb_y++;
+        }
+    }
+}
+/*******************************************************************************
+*  Function Name   : impeg2d_dec_skip_mbs
+*
+*  Description     : Performs processing needed for Skipped MB encountered in
+*                    B Pictures with previous MB bi-predicted.
+*
+*  Arguments       :
+*  dec             : Decoder state
+*
+*  Values Returned : None
+*******************************************************************************/
+void impeg2d_dec_skip_mbs(dec_state_t *ps_dec, UWORD16 u2_num_skip_mbs)
+{
+    PROFILE_DISABLE_SKIP_MB();
+
+    if(ps_dec->e_mb_pred == BIDIRECT)
+    {
+        impeg2d_dec_skip_b_mb(ps_dec, u2_num_skip_mbs);
+    }
+    else
+    {
+        impeg2d_dec_skip_p_mb(ps_dec, u2_num_skip_mbs);
+    }
+
+    ps_dec->u2_def_dc_pred[Y_LUMA] = 128 << ps_dec->u2_intra_dc_precision;
+    ps_dec->u2_def_dc_pred[U_CHROMA] = 128 << ps_dec->u2_intra_dc_precision;
+    ps_dec->u2_def_dc_pred[V_CHROMA] = 128 << ps_dec->u2_intra_dc_precision;
+}
+
+
+
+
+/*****************************************************************************
+*  Function Name   : impeg2d_dec_0mv_coded_mb
+*
+*  Description     : Decodes the MB with 0 MV but coded. This can occur in P
+*                    pictures only
+*
+*  Arguments       :
+*  dec             : Decoder state
+*
+*  Values Returned : None
+*****************************************************************************/
+void impeg2d_dec_0mv_coded_mb(dec_state_t *ps_dec)
+{
+
+
+    WORD16   *pi2_mv;
+    e_mb_type_t e_mb_type;
+    mb_mc_params_t *ps_mc;
+
+    if(ps_dec->u2_picture_structure == FRAME_PICTURE)
+    {
+        e_mb_type = MC_FRM_FW_AND_BK_2MV;
+    }
+    else
+    {
+        e_mb_type = MC_FLD_FW_AND_BK_2MV;
+    }
+
+
+
+
+    /* Reset the motion vector predictors */
+    memset(ps_dec->ai2_pred_mv,0,sizeof(ps_dec->ai2_pred_mv));
+
+    pi2_mv           = (WORD16 *)&ps_dec->ai2_mv[FORW][FIRST];
+    ps_mc           = &ps_dec->as_mb_mc_params[FORW][FIRST];
+    ps_mc->s_ref      = ps_dec->as_ref_buf[FORW][ps_dec->u2_fld_parity];
+
+    pi2_mv[MV_X] = 0;
+    pi2_mv[MV_Y] = 0;
+
+    impeg2d_set_mc_params(&ps_mc->s_luma, &ps_mc->s_chroma, e_mb_type, 0,
+              pi2_mv, ps_dec->u2_mb_x, ps_dec->u2_mb_y, ps_dec->u2_frame_width, ps_dec->u2_frame_height,ps_dec->u2_picture_width);
+}
+
+/*****************************************************************************/
+/*                                                                           */
+/*  Function Name : impeg2d_mc_halfx_halfy()                                 */
+/*                                                                           */
+/*  Description   : Gets the buffer from (0.5,0.5) to (8.5,8.5)              */
+/*                  and the above block of size 8 x 8 will be placed as a    */
+/*                  block from the current position of out_buf               */
+/*                                                                           */
+/*  Inputs        : ref - Reference frame from which the block will be       */
+/*                        block will be extracted.                           */
+/*                  ref_wid - WIdth of reference frame                       */
+/*                  out_wid - WIdth of the output frame                      */
+/*                  blk_width  - width of the block                          */
+/*                  blk_width  - height of the block                         */
+/*                                                                           */
+/*  Globals       : None                                                     */
+/*                                                                           */
+/*  Processing    : Point to the (0,0),(1,0),(0,1),(1,1) position in         */
+/*                  the ref frame.Interpolate these four values to get the   */
+/*                  value at(0.5,0.5).Repeat this to get an 8 x 8 block      */
+/*                  using 9 x 9 block from reference frame                   */
+/*                                                                           */
+/*  Outputs       : out -  Output containing the extracted block             */
+/*                                                                           */
+/*  Returns       : None                                                     */
+/*                                                                           */
+/*  Issues        : None                                                     */
+/*                                                                           */
+/*  Revision History:                                                        */
+/*                                                                           */
+/*         DD MM YYYY   Author(s)       Changes                              */
+/*         05 09 2005   Harish M        First Version                        */
+/*                                                                           */
+/*****************************************************************************/
+void impeg2d_mc_halfx_halfy(void *pv_dec,
+                           UWORD8 *pu1_ref,
+                           UWORD32 u4_ref_wid,
+                           UWORD8 *pu1_out,
+                           UWORD32 u4_out_wid,
+                           UWORD32 u4_blk_width,
+                           UWORD32 u4_blk_height)
+{
+   UWORD8 *pu1_out_ptr,*pu1_ref_ptr;
+   dec_state_t *ps_dec = (dec_state_t *)pv_dec;
+
+        pu1_out_ptr = pu1_out;
+        pu1_ref_ptr = pu1_ref;
+
+    if((u4_blk_width == MB_SIZE) && (u4_blk_height == MB_SIZE))
+    {
+
+        /*luma 16 x 16*/
+
+        /*block 0*/
+        ps_dec->pf_halfx_halfy_8x8(pu1_out_ptr,pu1_ref_ptr,u4_ref_wid,u4_out_wid);
+
+        /*block1*/
+        pu1_out_ptr = (pu1_out + BLK_SIZE);
+        pu1_ref_ptr = (pu1_ref + BLK_SIZE);
+        ps_dec->pf_halfx_halfy_8x8(pu1_out_ptr,pu1_ref_ptr,u4_ref_wid,u4_out_wid);
+
+        /*block 2*/
+        pu1_out_ptr = pu1_out + BLK_SIZE * u4_out_wid;
+        pu1_ref_ptr = pu1_ref + BLK_SIZE * u4_ref_wid;
+        ps_dec->pf_halfx_halfy_8x8(pu1_out_ptr,pu1_ref_ptr,u4_ref_wid,u4_out_wid);
+
+        /*block 3*/
+        pu1_out_ptr = pu1_out + BLK_SIZE * u4_out_wid + BLK_SIZE;
+        pu1_ref_ptr = pu1_ref + BLK_SIZE * u4_ref_wid + BLK_SIZE;
+        ps_dec->pf_halfx_halfy_8x8(pu1_out_ptr,pu1_ref_ptr,u4_ref_wid,u4_out_wid);
+
+
+
+
+    }
+    else if ((u4_blk_width == BLK_SIZE) && (u4_blk_height == BLK_SIZE))
+    {
+        /*chroma 8 x 8*/
+        ps_dec->pf_halfx_halfy_8x8(pu1_out_ptr,pu1_ref_ptr,u4_ref_wid,u4_out_wid);
+    }
+    else if ((u4_blk_width == MB_SIZE) && (u4_blk_height == BLK_SIZE))
+    {
+        /*block 0*/
+        ps_dec->pf_halfx_halfy_8x8(pu1_out_ptr,pu1_ref_ptr,u4_ref_wid,u4_out_wid);
+
+        /*block 1*/
+        pu1_out_ptr = (pu1_out + BLK_SIZE);
+        pu1_ref_ptr = (pu1_ref + BLK_SIZE);
+        ps_dec->pf_halfx_halfy_8x8(pu1_out_ptr,pu1_ref_ptr,u4_ref_wid,u4_out_wid);
+
+    }
+
+    else
+    {
+        UWORD8 *ref_p0,*ref_p1,*ref_p2,*ref_p3;
+        UWORD32 i,j;
+        /* P0-P3 are the pixels in the reference frame and Q is the value being */
+        /* estimated                                                            */
+        /*
+           P0 P1
+             Q
+           P2 P3
+        */
+
+        ref_p0 = pu1_ref;
+        ref_p1 = pu1_ref + 1;
+        ref_p2 = pu1_ref + u4_ref_wid;
+        ref_p3 = pu1_ref + u4_ref_wid + 1;
+
+        for(i = 0; i < u4_blk_height; i++)
+        {
+            for(j = 0; j < u4_blk_width; j++)
+            {
+                *pu1_out++ =   (( (*ref_p0++ )
+                            + (*ref_p1++ )
+                            + (*ref_p2++ )
+                            + (*ref_p3++ ) + 2 ) >> 2);
+            }
+            ref_p0 += u4_ref_wid - u4_blk_width;
+            ref_p1 += u4_ref_wid - u4_blk_width;
+            ref_p2 += u4_ref_wid - u4_blk_width;
+            ref_p3 += u4_ref_wid - u4_blk_width;
+
+            pu1_out    += u4_out_wid - u4_blk_width;
+        }
+    }
+    return;
+}
+
+/*****************************************************************************/
+/*                                                                           */
+/*  Function Name : impeg2d_mc_halfx_fully()                                 */
+/*                                                                           */
+/*  Description   : Gets the buffer from (0.5,0) to (8.5,8)                  */
+/*                  and the above block of size 8 x 8 will be placed as a    */
+/*                  block from the current position of out_buf               */
+/*                                                                           */
+/*  Inputs        : ref - Reference frame from which the block will be       */
+/*                        block will be extracted.                           */
+/*                  ref_wid - WIdth of reference frame                       */
+/*                  out_wid - WIdth of the output frame                      */
+/*                  blk_width  - width of the block                          */
+/*                  blk_width  - height of the block                         */
+/*                                                                           */
+/*  Globals       : None                                                     */
+/*                                                                           */
+/*  Processing    : Point to the (0,0) and (1,0) position in the ref frame   */
+/*                  Interpolate these two values to get the value at(0.5,0)  */
+/*                  Repeat this to get an 8 x 8 block using 9 x 8 block from */
+/*                  reference frame                                          */
+/*                                                                           */
+/*  Outputs       : out -  Output containing the extracted block             */
+/*                                                                           */
+/*  Returns       : None                                                     */
+/*                                                                           */
+/*  Issues        : None                                                     */
+/*                                                                           */
+/*  Revision History:                                                        */
+/*                                                                           */
+/*         DD MM YYYY   Author(s)       Changes                              */
+/*         05 09 2005   Harish M        First Version                        */
+/*                                                                           */
+/*****************************************************************************/
+
+void impeg2d_mc_halfx_fully(void *pv_dec,
+                            UWORD8 *pu1_ref,
+                            UWORD32 u4_ref_wid,
+                            UWORD8 *pu1_out,
+                            UWORD32 u4_out_wid,
+                            UWORD32 u4_blk_width,
+                            UWORD32 u4_blk_height)
+{
+    UWORD8 *pu1_out_ptr,*pu1_ref_ptr;
+    dec_state_t *ps_dec = (dec_state_t *)pv_dec;
+
+        pu1_out_ptr = pu1_out;
+        pu1_ref_ptr = pu1_ref;
+
+    if((u4_blk_width == MB_SIZE) && (u4_blk_height == MB_SIZE))
+    {
+
+        /*luma 16 x 16*/
+
+        /*block 0*/
+        ps_dec->pf_halfx_fully_8x8(pu1_out_ptr,pu1_ref_ptr,u4_ref_wid,u4_out_wid);
+
+        /*block1*/
+        pu1_out_ptr = (pu1_out + BLK_SIZE);
+        pu1_ref_ptr = (pu1_ref + BLK_SIZE);
+        ps_dec->pf_halfx_fully_8x8(pu1_out_ptr,pu1_ref_ptr,u4_ref_wid,u4_out_wid);
+
+        /*block 2*/
+        pu1_out_ptr = pu1_out + BLK_SIZE * u4_out_wid;
+        pu1_ref_ptr = pu1_ref + BLK_SIZE * u4_ref_wid;
+        ps_dec->pf_halfx_fully_8x8(pu1_out_ptr,pu1_ref_ptr,u4_ref_wid,u4_out_wid);
+
+        /*block 3*/
+        pu1_out_ptr = pu1_out + BLK_SIZE * u4_out_wid + BLK_SIZE;
+        pu1_ref_ptr = pu1_ref + BLK_SIZE * u4_ref_wid + BLK_SIZE;
+        ps_dec->pf_halfx_fully_8x8(pu1_out_ptr,pu1_ref_ptr,u4_ref_wid,u4_out_wid);
+
+
+
+
+    }
+    else if ((u4_blk_width == BLK_SIZE) && (u4_blk_height == BLK_SIZE))
+    {
+        /*chroma 8 x 8*/
+        ps_dec->pf_halfx_fully_8x8(pu1_out_ptr,pu1_ref_ptr,u4_ref_wid,u4_out_wid);
+    }
+    else if ((u4_blk_width == MB_SIZE) && (u4_blk_height == BLK_SIZE))
+    {
+        /*block 0*/
+        ps_dec->pf_halfx_fully_8x8(pu1_out_ptr,pu1_ref_ptr,u4_ref_wid,u4_out_wid);
+
+        /*block 1*/
+        pu1_out_ptr = (pu1_out + BLK_SIZE);
+        pu1_ref_ptr = (pu1_ref + BLK_SIZE);
+        ps_dec->pf_halfx_fully_8x8(pu1_out_ptr,pu1_ref_ptr,u4_ref_wid,u4_out_wid);
+
+    }
+
+    else
+    {
+        UWORD8 *ref_p0,*ref_p1;
+        UWORD32 i,j;
+
+        /* P0-P3 are the pixels in the reference frame and Q is the value being */
+        /* estimated                                                            */
+        /*
+           P0 Q P1
+        */
+
+        ref_p0 = pu1_ref;
+        ref_p1 = pu1_ref + 1;
+
+        for(i = 0; i < u4_blk_height; i++)
+        {
+            for(j = 0; j < u4_blk_width; j++)
+            {
+                *pu1_out++ =   ((( *ref_p0++ )
+                            + (*ref_p1++) + 1 ) >> 1);
+            }
+            ref_p0 += u4_ref_wid - u4_blk_width;
+            ref_p1 += u4_ref_wid - u4_blk_width;
+
+            pu1_out    += u4_out_wid - u4_blk_width;
+        }
+    }
+    return;
+}
+
+
+/*****************************************************************************/
+/*                                                                           */
+/*  Function Name : impeg2d_mc_fullx_halfy()                                 */
+/*                                                                           */
+/*  Description   : Gets the buffer from (0,0.5) to (8,8.5)                  */
+/*                  and the above block of size 8 x 8 will be placed as a    */
+/*                  block from the current position of out_buf               */
+/*                                                                           */
+/*  Inputs        : ref - Reference frame from which the block will be       */
+/*                        block will be extracted.                           */
+/*                  ref_wid - WIdth of reference frame                       */
+/*                  out_wid - WIdth of the output frame                      */
+/*                  blk_width  - width of the block                          */
+/*                  blk_width  - height of the block                         */
+/*                                                                           */
+/*  Globals       : None                                                     */
+/*                                                                           */
+/*  Processing    : Point to the (0,0) and (0,1)   position in the ref frame */
+/*                  Interpolate these two values to get the value at(0,0.5)  */
+/*                  Repeat this to get an 8 x 8 block using 8 x 9 block from */
+/*                  reference frame                                          */
+/*                                                                           */
+/*  Outputs       : out -  Output containing the extracted block             */
+/*                                                                           */
+/*  Returns       : None                                                     */
+/*                                                                           */
+/*  Issues        : None                                                     */
+/*                                                                           */
+/*  Revision History:                                                        */
+/*                                                                           */
+/*         DD MM YYYY   Author(s)       Changes                              */
+/*         05 09 2005   Harish M        First Version                        */
+/*                                                                           */
+/*****************************************************************************/
+void impeg2d_mc_fullx_halfy(void *pv_dec,
+                            UWORD8 *pu1_ref,
+                            UWORD32 u4_ref_wid,
+                            UWORD8 *pu1_out,
+                            UWORD32 u4_out_wid,
+                            UWORD32 u4_blk_width,
+                            UWORD32 u4_blk_height)
+{
+
+    UWORD8 *pu1_out_ptr,*pu1_ref_ptr;
+    dec_state_t *ps_dec = (dec_state_t *)pv_dec;
+        pu1_out_ptr = pu1_out;
+        pu1_ref_ptr = pu1_ref;
+
+    if((u4_blk_width == MB_SIZE) && (u4_blk_height == MB_SIZE))
+    {
+
+        /*luma 16 x 16*/
+
+        /*block 0*/
+        ps_dec->pf_fullx_halfy_8x8(pu1_out_ptr,pu1_ref_ptr,u4_ref_wid,u4_out_wid);
+
+        /*block1*/
+        pu1_out_ptr = (pu1_out + BLK_SIZE);
+        pu1_ref_ptr = (pu1_ref + BLK_SIZE);
+        ps_dec->pf_fullx_halfy_8x8(pu1_out_ptr,pu1_ref_ptr,u4_ref_wid,u4_out_wid);
+
+        /*block 2*/
+        pu1_out_ptr = pu1_out + BLK_SIZE * u4_out_wid;
+        pu1_ref_ptr = pu1_ref + BLK_SIZE * u4_ref_wid;
+        ps_dec->pf_fullx_halfy_8x8(pu1_out_ptr,pu1_ref_ptr,u4_ref_wid,u4_out_wid);
+
+        /*block 3*/
+        pu1_out_ptr = pu1_out + BLK_SIZE * u4_out_wid + BLK_SIZE;
+        pu1_ref_ptr = pu1_ref + BLK_SIZE * u4_ref_wid + BLK_SIZE;
+        ps_dec->pf_fullx_halfy_8x8(pu1_out_ptr,pu1_ref_ptr,u4_ref_wid,u4_out_wid);
+
+
+
+
+    }
+    else if ((u4_blk_width == BLK_SIZE) && (u4_blk_height == BLK_SIZE))
+    {
+        /*chroma 8 x 8*/
+        ps_dec->pf_fullx_halfy_8x8(pu1_out_ptr,pu1_ref_ptr,u4_ref_wid,u4_out_wid);
+    }
+    else if ((u4_blk_width == MB_SIZE) && (u4_blk_height == BLK_SIZE))
+    {
+        /*block 0*/
+        ps_dec->pf_fullx_halfy_8x8(pu1_out_ptr,pu1_ref_ptr,u4_ref_wid,u4_out_wid);
+
+        /*block 1*/
+        pu1_out_ptr = (pu1_out + BLK_SIZE);
+        pu1_ref_ptr = (pu1_ref + BLK_SIZE);
+        ps_dec->pf_fullx_halfy_8x8(pu1_out_ptr,pu1_ref_ptr,u4_ref_wid,u4_out_wid);
+
+    }
+
+    else if ((u4_blk_width == BLK_SIZE) && (u4_blk_height == (BLK_SIZE / 2)))
+    {
+        UWORD8 *ref_p0,*ref_p1;
+        UWORD32 i,j;
+        /* P0-P3 are the pixels in the reference frame and Q is the value being */
+        /* estimated                                                            */
+        /*
+           P0
+            x
+           P1
+        */
+        ref_p0 = pu1_ref;
+        ref_p1 = pu1_ref + u4_ref_wid;
+
+        for(i = 0; i < u4_blk_height; i++)
+        {
+            for(j = 0; j < u4_blk_width; j++)
+            {
+                *pu1_out++ =   ((( *ref_p0++)
+                            + (*ref_p1++) + 1 ) >> 1);
+            }
+            ref_p0 += u4_ref_wid - u4_blk_width;
+            ref_p1 += u4_ref_wid - u4_blk_width;
+
+            pu1_out    += u4_out_wid - u4_blk_width;
+        }
+    }
+    return;
+}
+
+/*****************************************************************************/
+/*                                                                           */
+/*  Function Name : impeg2d_mc_fullx_fully()                                 */
+/*                                                                           */
+/*  Description   : Gets the buffer from (x,y) to (x+8,y+8)                  */
+/*                  and the above block of size 8 x 8 will be placed as a    */
+/*                  block from the current position of out_buf               */
+/*                                                                           */
+/*  Inputs        : ref - Reference frame from which the block will be       */
+/*                        block will be extracted.                           */
+/*                  ref_wid - WIdth of reference frame                       */
+/*                  out_wid - WIdth of the output frame                      */
+/*                  blk_width  - width of the block                          */
+/*                  blk_width  - height of the block                         */
+/*                                                                           */
+/*  Globals       : None                                                     */
+/*                                                                           */
+/*  Processing    : Point to the (0,0) position in the ref frame             */
+/*                  Get an 8 x 8 block from reference frame                  */
+/*                                                                           */
+/*  Outputs       : out -  Output containing the extracted block             */
+/*                                                                           */
+/*  Returns       : None                                                     */
+/*                                                                           */
+/*  Issues        : None                                                     */
+/*                                                                           */
+/*  Revision History:                                                        */
+/*                                                                           */
+/*         DD MM YYYY   Author(s)       Changes                              */
+/*         05 09 2005   Harish M        First Version                        */
+/*                                                                           */
+/*****************************************************************************/
+
+void impeg2d_mc_fullx_fully(void *pv_dec,
+                            UWORD8 *pu1_ref,
+                            UWORD32 u4_ref_wid,
+                            UWORD8 *pu1_out,
+                            UWORD32 u4_out_wid,
+                            UWORD32 u4_blk_width,
+                            UWORD32 u4_blk_height)
+{
+
+    UWORD8 *pu1_out_ptr,*pu1_ref_ptr;
+    dec_state_t *ps_dec = (dec_state_t *)pv_dec;
+
+        pu1_out_ptr = pu1_out;
+        pu1_ref_ptr = pu1_ref;
+
+    if((u4_blk_width == MB_SIZE) && (u4_blk_height == MB_SIZE))
+    {
+
+        /*luma 16 x 16*/
+
+        /*block 0*/
+        ps_dec->pf_fullx_fully_8x8(pu1_out_ptr,pu1_ref_ptr,u4_ref_wid,u4_out_wid);
+
+        /*block1*/
+        pu1_out_ptr = (pu1_out + BLK_SIZE);
+        pu1_ref_ptr = (pu1_ref + BLK_SIZE);
+        ps_dec->pf_fullx_fully_8x8(pu1_out_ptr,pu1_ref_ptr,u4_ref_wid,u4_out_wid);
+
+        /*block 2*/
+        pu1_out_ptr = pu1_out + BLK_SIZE * u4_out_wid;
+        pu1_ref_ptr = pu1_ref + BLK_SIZE * u4_ref_wid;
+        ps_dec->pf_fullx_fully_8x8(pu1_out_ptr,pu1_ref_ptr,u4_ref_wid,u4_out_wid);
+
+        /*block 3*/
+        pu1_out_ptr = pu1_out + BLK_SIZE * u4_out_wid + BLK_SIZE;
+        pu1_ref_ptr = pu1_ref + BLK_SIZE * u4_ref_wid + BLK_SIZE;
+        ps_dec->pf_fullx_fully_8x8(pu1_out_ptr,pu1_ref_ptr,u4_ref_wid,u4_out_wid);
+
+
+
+
+    }
+    else if ((u4_blk_width == BLK_SIZE) && (u4_blk_height == BLK_SIZE))
+    {
+        /*chroma 8 x 8*/
+        ps_dec->pf_fullx_fully_8x8(pu1_out_ptr,pu1_ref_ptr,u4_ref_wid,u4_out_wid);
+    }
+    else if ((u4_blk_width == MB_SIZE) && (u4_blk_height == BLK_SIZE))
+    {
+        /*block 0*/
+        ps_dec->pf_fullx_fully_8x8(pu1_out_ptr,pu1_ref_ptr,u4_ref_wid,u4_out_wid);
+
+        /*block 1*/
+        pu1_out_ptr = (pu1_out + BLK_SIZE);
+        pu1_ref_ptr = (pu1_ref + BLK_SIZE);
+        ps_dec->pf_fullx_fully_8x8(pu1_out_ptr,pu1_ref_ptr,u4_ref_wid,u4_out_wid);
+
+    }
+    else
+    {
+        UWORD32 i;
+
+        for(i = 0; i < u4_blk_height; i++)
+        {
+            memcpy(pu1_out, pu1_ref, u4_blk_width);
+            pu1_ref += u4_ref_wid;
+            pu1_out += u4_out_wid;
+        }
+    }
+    return;
+}
+
+/*******************************************************************************
+*  Function Name   : impeg2d_set_mc_params
+*
+*  Description     : Sets the parameters for Motion Compensation
+*
+*  Arguments       :
+*  luma            : Parameters for luma blocks
+*  chroma          : Parameters for chroma blocks
+*  type            : Motion compensation type
+*  mv_num          : Number of motion vectors
+*  mv              : Motion Vectors
+*  mb_x            : X co-ordinate of MB
+*  mb_y            : Y co-ordinate of MB
+*  frm_wd          : Width of the frame
+*
+*  Values Returned : None
+*******************************************************************************/
+void impeg2d_set_mc_params(comp_mc_params_t *ps_luma,
+                           comp_mc_params_t *ps_chroma,
+                           e_mb_type_t e_type,
+                           UWORD16 u2_mv_num,
+                           WORD16 ai2_mv[],
+                           UWORD16 u2_mb_x,
+                           UWORD16 u2_mb_y,
+                           UWORD16 u2_frm_wd,
+                           UWORD16 u2_frm_ht,
+                           UWORD16 u2_picture_width)
+{
+    WORD16 i2_mvy_round;
+    WORD16 i2_mvx_round;
+    const mc_type_consts_t *ps_mc_params;
+    WORD16 i2_mvx_fullp_round;
+    WORD16 i2_mvy_fullp_round;
+    UWORD32 u4_frm_chroma_wd;
+    WORD16 i2_pix_x, i2_pix_y;
+
+    ps_mc_params = &gas_impeg2d_mc_params_luma[e_type][u2_mv_num];
+    /****************************************************************************/
+    /* get luma mc params                                                       */
+    /****************************************************************************/
+    i2_pix_x = MB_SIZE * u2_mb_x + (ai2_mv[MV_X]>>1);
+    i2_pix_y = (MB_SIZE * u2_mb_y  +
+        (ai2_mv[MV_Y]>>1) * ps_mc_params->mvy_cf + u2_mv_num * ps_mc_params->mv_num_cf) * ps_mc_params->frm_wd_cf;
+
+    // clip pix_x and pix_y so as it falls inside the frame boundary
+    CLIP(i2_pix_x, (u2_frm_wd-16), 0);
+    CLIP(i2_pix_y, (u2_frm_ht-16), 0);
+
+    ps_luma->u4_src_offset = i2_pix_x +  i2_pix_y * u2_frm_wd;
+
+
+    /* keep offset  in full pel */
+    ps_luma->u4_rows          = ps_mc_params->rows;
+    ps_luma->u4_cols          = MB_SIZE;
+    ps_luma->u4_dst_wd_res_buf        = ps_mc_params->dst_wd;
+    ps_luma->u4_src_wd        = u2_frm_wd * ps_mc_params->src_wd_cf;
+    ps_luma->u4_dst_offset_res_buf    = ps_mc_params->dst_offset_scale * MB_SIZE;
+    ps_luma->u4_dst_offset_cur_frm    = ps_mc_params->dst_offset_scale * u2_picture_width;
+    ps_luma->u4_mode          = ((ai2_mv[MV_X] & 1) << 1) | (ai2_mv[MV_Y] & 1);
+
+    /****************************************************************************/
+    /* get chroma mc params                                                     */
+    /****************************************************************************/
+    ps_mc_params   = &gas_impeg2d_mc_params_chroma[e_type][u2_mv_num];
+    i2_mvx_round   = ((ai2_mv[MV_X] + IS_NEG(ai2_mv[MV_X]))>>1);
+    i2_mvy_round   = ((ai2_mv[MV_Y] + IS_NEG(ai2_mv[MV_Y]))>>1);
+
+    i2_mvx_fullp_round = (i2_mvx_round>>1);
+    i2_mvy_fullp_round = (i2_mvy_round>>1)*ps_mc_params->mvy_cf;
+
+    u4_frm_chroma_wd = (u2_frm_wd>>1);
+
+    i2_pix_x = (MB_SIZE/2) * u2_mb_x + i2_mvx_fullp_round;
+    i2_pix_y = ((MB_SIZE/2) * u2_mb_y + i2_mvy_fullp_round + u2_mv_num *
+                           ps_mc_params->mv_num_cf)*ps_mc_params->frm_wd_cf;
+
+    CLIP(i2_pix_x, ((u2_frm_wd / 2)-8), 0);
+    CLIP(i2_pix_y, ((u2_frm_ht / 2)-8), 0);
+    ps_chroma->u4_src_offset = i2_pix_x + i2_pix_y * u4_frm_chroma_wd;
+
+
+    /* keep offset  in full pel */
+    ps_chroma->u4_rows = ps_mc_params->rows;
+    ps_chroma->u4_cols        = (MB_SIZE >> 1);
+    ps_chroma->u4_dst_wd_res_buf = ps_mc_params->dst_wd;
+    ps_chroma->u4_src_wd = (u2_frm_wd>>1) * ps_mc_params->src_wd_cf;
+    ps_chroma->u4_dst_offset_res_buf = ps_mc_params->dst_offset_scale * MB_CHROMA_SIZE;
+    ps_chroma->u4_dst_offset_cur_frm = ps_mc_params->dst_offset_scale * (u2_picture_width >> 1);
+    ps_chroma->u4_mode = ((i2_mvx_round & 1) << 1) | (i2_mvy_round & 1);
+
+
+
+    ps_luma->u4_dst_wd_cur_frm = u2_picture_width;
+    ps_chroma->u4_dst_wd_cur_frm = u2_picture_width >> 1;
+
+    if(ps_luma->u4_dst_wd_res_buf == MB_SIZE * 2)
+    {
+        ps_luma->u4_dst_wd_cur_frm = u2_frm_wd << 1;
+        ps_chroma->u4_dst_wd_cur_frm = u2_frm_wd;
+    }
+}
+
+

diff --git a/decoder/impeg2d_mc.h b/decoder/impeg2d_mc.h
new file mode 100644
index 0000000..14f1ef9
--- /dev/null
+++ b/decoder/impeg2d_mc.h

@@ -0,0 +1,78 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+/**
+*******************************************************************************
+* @file
+*  impeg2d_mc.h
+*
+* @brief
+*  Contains MC function declarations for MPEG2 codec
+*
+* @author
+*  Harish
+*
+* @remarks
+*  None
+*
+*******************************************************************************
+*/
+#ifndef __IMPEG2D_MC_H__
+#define __IMPEG2D_MC_H__
+
+void impeg2d_dec_2mv_interp_mb(dec_state_t *dec);
+void impeg2d_dec_4mv_mb(dec_state_t *dec);
+
+
+void impeg2d_dec_1mv_mb(dec_state_t *dec);
+void impeg2d_dec_2mv_fw_or_bk_mb(dec_state_t *dec);
+void impeg2d_dec_fld_dual_prime(dec_state_t *dec);
+void impeg2d_dec_frm_dual_prime(dec_state_t *dec);
+
+void impeg2d_mc_1mv(dec_state_t *dec);
+void impeg2d_mc_fw_or_bk_mb(dec_state_t *dec);
+void impeg2d_mc_fld_dual_prime(dec_state_t *dec);
+void impeg2d_mc_frm_dual_prime(dec_state_t *dec);
+void impeg2d_mc_4mv(dec_state_t *dec);
+void impeg2d_mc_2mv(dec_state_t *dec);
+
+void impeg2d_dec_skip_mbs(dec_state_t *dec, UWORD16 num_skip_mbs);
+void impeg2d_dec_0mv_coded_mb(dec_state_t *dec);
+void impeg2d_dec_intra_mb(dec_state_t *dec);
+
+void impeg2d_set_mc_params(comp_mc_params_t *luma,
+                   comp_mc_params_t *chroma,
+                   e_mb_type_t   type,
+                   UWORD16 mv_num,
+                   WORD16 mv[],
+                   UWORD16 mb_x,
+                   UWORD16 mb_y,
+                   UWORD16 frm_wd,
+                   UWORD16 frm_ht,
+                   UWORD16 picture_width);
+
+void impeg2d_motion_comp(dec_state_t *dec, mb_mc_params_t *params,yuv_buf_t *buf);
+
+pf_mc_t impeg2d_mc_halfx_halfy;
+pf_mc_t impeg2d_mc_halfx_fully;
+pf_mc_t impeg2d_mc_fullx_halfy;
+pf_mc_t impeg2d_mc_fullx_fully;
+
+
+#endif /* __IMPEG2D_MC_H__*/

diff --git a/decoder/impeg2d_mv_dec.c b/decoder/impeg2d_mv_dec.c
new file mode 100644
index 0000000..1a30146
--- /dev/null
+++ b/decoder/impeg2d_mv_dec.c

@@ -0,0 +1,499 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+#include <stdio.h>
+
+#include "iv_datatypedef.h"
+#include "iv.h"
+
+#include "impeg2_buf_mgr.h"
+#include "impeg2_disp_mgr.h"
+#include "impeg2_defs.h"
+#include "impeg2_platform_macros.h"
+#include "impeg2_inter_pred.h"
+#include "impeg2_idct.h"
+#include "impeg2_globals.h"
+#include "impeg2_mem_func.h"
+#include "impeg2_format_conv.h"
+#include "impeg2_macros.h"
+
+#include "ivd.h"
+#include "impeg2d.h"
+#include "impeg2d_bitstream.h"
+#include "impeg2d_structs.h"
+#include "impeg2d_globals.h"
+#include "impeg2d_vld_tables.h"
+#include "impeg2d_pic_proc.h"
+#include "impeg2d_debug.h"
+#include "impeg2d_mv_dec.h"
+#include "impeg2d_mc.h"
+
+/*******************************************************************************
+* Function name : impeg2d_dec_1mv
+*
+* Description   : Decodes a motion vector and updates the predictors
+*
+* Arguments     :
+* stream        : Bitstream
+* predMv        : Prediction for the motion vectors
+* mv            : Motion vectors
+* fCode         : fcode to the used for the decoding
+* shift         : Shift value to be used. This will be equal to
+*                 (mv_format == "field") && (picture_structure == "Frame picture")
+* i             : 0 - MV_X and 1 - MV_Y
+*
+* Value Returned: None
+*******************************************************************************/
+INLINE void impeg2d_dec_1mv(stream_t *ps_stream, WORD16 ai2_pred_mv[], WORD16 ai2_mv[],UWORD16 au2_fCode[],
+           UWORD16 u2_mv_y_shift, WORD16 ai2_dmv[])
+{
+    WORD16  i2_f;
+    WORD16  i2_r_size;
+    WORD16  i2_high,i2_low,i2_range;
+    UWORD32  u4_mv_code;
+    WORD16  i2_delta;
+    UWORD16 u2_first_bit;
+    WORD32 i;
+    WORD32 ai2_shifts[2];
+    UWORD32 u4_buf;
+    UWORD32 u4_buf_nxt;
+    UWORD32 u4_offset;
+    UWORD32 *pu4_buf_aligned;
+
+    ai2_shifts[0] = 0;
+    ai2_shifts[1] = u2_mv_y_shift;
+
+
+    GET_TEMP_STREAM_DATA(u4_buf,u4_buf_nxt,u4_offset,pu4_buf_aligned,ps_stream)
+    for(i = 0; i < 2; i++)
+    {
+        WORD32 i4_shift = ai2_shifts[i];
+        /* Decode the motion_code */
+        IBITS_NXT(u4_buf, u4_buf_nxt, u4_offset, u4_mv_code, MV_CODE_LEN)
+        u2_first_bit    = (u4_mv_code >> (MV_CODE_LEN - 1)) & 0x01;
+        if(u2_first_bit == 1) /* mvCode == 0 */
+        {
+            i2_delta = 0;
+            FLUSH_BITS(u4_offset,u4_buf,u4_buf_nxt,1,pu4_buf_aligned)
+
+            ai2_mv[i] = (ai2_pred_mv[i] >> i4_shift);
+
+            ai2_pred_mv[i] = (ai2_mv[i] << i4_shift);
+
+        }
+        else
+        {
+            UWORD16 u2_index;
+            UWORD16 u2_value;
+            UWORD16 u2_mv_len;
+            UWORD16 u2_abs_mvcode_minus1;
+            UWORD16 u2_sign_bit;
+
+            i2_r_size   = au2_fCode[i] - 1;
+            i2_f       = 1 << i2_r_size;
+            i2_high    = (16 * i2_f) - 1;
+            i2_low     = ((-16) * i2_f);
+            i2_range   = (32 * i2_f);
+
+            u2_index               = (u4_mv_code >> 1) & 0x1FF;
+            u2_value               = gau2_impeg2d_mv_code[u2_index];
+            u2_mv_len               = (u2_value & 0x0F);
+            u2_abs_mvcode_minus1   = (u2_value >> 8) & 0x0FF;
+            u4_mv_code            >>= (MV_CODE_LEN - u2_mv_len - 1);
+            u2_sign_bit             = u4_mv_code & 0x1;
+
+            FLUSH_BITS(u4_offset,u4_buf,u4_buf_nxt,(u2_mv_len + 1),pu4_buf_aligned)
+            i2_delta = u2_abs_mvcode_minus1 * i2_f + 1;
+            if(i2_r_size)
+            {
+                UWORD32 val;
+                IBITS_GET(u4_buf, u4_buf_nxt, u4_offset, val, pu4_buf_aligned, i2_r_size)
+                i2_delta += val;
+            }
+
+            if(u2_sign_bit)
+                i2_delta = -i2_delta;
+
+            ai2_mv[i] = (ai2_pred_mv[i] >> i4_shift) + i2_delta;
+
+            if(ai2_mv[i] < i2_low)
+            {
+                ai2_mv[i] += i2_range;
+            }
+
+            if(ai2_mv[i] > i2_high)
+            {
+                ai2_mv[i] -= i2_range;
+            }
+            ai2_pred_mv[i] = (ai2_mv[i] << i4_shift);
+
+        }
+        if(ai2_dmv)
+        {
+            UWORD32 u4_val;
+            ai2_dmv[i] = 0;
+            IBITS_GET(u4_buf, u4_buf_nxt, u4_offset, u4_val, pu4_buf_aligned, 1)
+            if(u4_val)
+            {
+                IBITS_GET(u4_buf, u4_buf_nxt, u4_offset, u4_val, pu4_buf_aligned, 1)
+                ai2_dmv[i] = gai2_impeg2d_dec_mv[u4_val];
+            }
+        }
+    }
+    PUT_TEMP_STREAM_DATA(u4_buf, u4_buf_nxt, u4_offset, pu4_buf_aligned, ps_stream)
+
+}
+/*******************************************************************************
+* Function name : impeg2d_dec_mv
+*
+* Description   : Decodes a motion vector and updates the predictors
+*
+* Arguments     :
+* stream        : Bitstream
+* predMv        : Prediction for the motion vectors
+* mv            : Motion vectors
+* fCode         : fcode to the used for the decoding
+* shift         : Shift value to be used. This will be equal to
+*                 (mv_format == "field") && (picture_structure == "Frame picture")
+*
+* Value Returned: None
+*******************************************************************************/
+e_field_t impeg2d_dec_mv(stream_t *ps_stream, WORD16 ai2_pred_mv[], WORD16 ai2_mv[],UWORD16 au2_f_code[],
+           UWORD16 u2_shift, UWORD16 u2_fld_sel)
+{
+    e_field_t e_fld;
+    if(u2_fld_sel)
+    {
+        e_fld = (e_field_t)impeg2d_bit_stream_get_bit(ps_stream);
+    }
+    else
+    {
+        e_fld = TOP;
+    }
+
+    impeg2d_dec_1mv(ps_stream,ai2_pred_mv,ai2_mv,au2_f_code,u2_shift,NULL);
+
+    return(e_fld);
+}
+
+/*****************************************************************************
+*  Function Name   : impeg2d_dec_1mv_mb
+*
+*  Description     : Decodes mc params for 1 MV  MB
+*
+*  Arguments       :
+*  dec             : Decoder state
+*
+*  Values Returned : None
+*****************************************************************************/
+void impeg2d_dec_1mv_mb(dec_state_t *ps_dec)
+{
+    stream_t         *ps_stream;
+    WORD16          *pi2_mv;
+    e_field_t         e_fld;
+    mb_mc_params_t  *ps_mc;
+    e_pred_direction_t   e_ref_pic;
+
+
+    ps_stream  = &ps_dec->s_bit_stream;
+    e_ref_pic = ps_dec->e_mb_pred;
+    /************************************************************************/
+    /* Decode the motion vector                                             */
+    /************************************************************************/
+    pi2_mv        = (WORD16 *)&ps_dec->ai2_mv[FORW][FIRST];
+    e_fld = impeg2d_dec_mv(ps_stream,ps_dec->ai2_pred_mv[e_ref_pic][FIRST],pi2_mv,
+                ps_dec->au2_f_code[e_ref_pic],0, ps_dec->u2_fld_pic);
+
+    ps_dec->ai2_pred_mv[e_ref_pic][SECOND][MV_X] = ps_dec->ai2_pred_mv[e_ref_pic][FIRST][MV_X];
+    ps_dec->ai2_pred_mv[e_ref_pic][SECOND][MV_Y] = ps_dec->ai2_pred_mv[e_ref_pic][FIRST][MV_Y];
+    /************************************************************************/
+    /* Set the motion vector params                                         */
+    /************************************************************************/
+    ps_mc = &ps_dec->as_mb_mc_params[e_ref_pic][FIRST];
+    ps_mc->s_ref = ps_dec->as_ref_buf[e_ref_pic][e_fld];
+    impeg2d_set_mc_params(&ps_mc->s_luma, &ps_mc->s_chroma, ps_dec->s_mb_type, 0,
+                  pi2_mv, ps_dec->u2_mb_x, ps_dec->u2_mb_y, ps_dec->u2_frame_width, ps_dec->u2_frame_height,ps_dec->u2_picture_width);
+
+}
+
+/*****************************************************************************
+*  Function Name   : impeg2d_dec_2mv_fw_or_bk_mb
+*
+*  Description     : Decodes first part of params for 2 MV Interpolated MB
+*
+*  Arguments       :
+*  dec             : Decoder state
+*
+*  Values Returned : None
+*****************************************************************************/
+void impeg2d_dec_2mv_fw_or_bk_mb(dec_state_t *ps_dec)
+{
+    stream_t         *ps_stream;
+    WORD16          *pi2_mv;
+    e_field_t         e_fld;
+    mb_mc_params_t  *ps_mc;
+    e_pred_direction_t   e_ref_pic;
+    UWORD16 i;
+
+    ps_stream  = &ps_dec->s_bit_stream;
+    e_ref_pic = ps_dec->e_mb_pred;
+    for(i = 0; i < 2; i++)
+    {
+        /********************************************************************/
+        /* Decode the first motion vector                                   */
+        /********************************************************************/
+        pi2_mv        = (WORD16 *)&ps_dec->ai2_mv[FORW][i];
+        e_fld = impeg2d_dec_mv(ps_stream,ps_dec->ai2_pred_mv[e_ref_pic][i],pi2_mv,
+                    ps_dec->au2_f_code[e_ref_pic],ps_dec->u2_frm_pic, 1);
+
+        /********************************************************************/
+        /* Set the motion vector params                                     */
+        /********************************************************************/
+        ps_mc = &ps_dec->as_mb_mc_params[FORW][i];
+        ps_mc->s_ref = ps_dec->as_ref_buf[e_ref_pic][e_fld];
+        impeg2d_set_mc_params(&ps_mc->s_luma, &ps_mc->s_chroma, ps_dec->s_mb_type, i,
+                      pi2_mv, ps_dec->u2_mb_x, ps_dec->u2_mb_y, ps_dec->u2_frame_width, ps_dec->u2_frame_height,ps_dec->u2_picture_width);
+    }
+}
+
+/*****************************************************************************
+*  Function Name   : impeg2d_dec_frm_dual_prime
+*
+*  Description     : Decodes first part of params for 2 MV Interpolated MB
+*
+*  Arguments       :
+*  dec             : Decoder state
+*
+*  Values Returned : None
+*****************************************************************************/
+void impeg2d_dec_frm_dual_prime(dec_state_t *ps_dec)
+{
+    stream_t         *ps_stream;
+    WORD16          *pi2_mv;
+    mb_mc_params_t  *ps_mc;
+
+    WORD16      ai2_dmv[2];
+    WORD16      *pi2_mv1, *pi2_mv2, *pi2_mv3, *pi2_mv4;
+    UWORD16 i,j;
+
+    pi2_mv1     = (WORD16 *)&(ps_dec->ai2_mv[FORW][FIRST]);
+    pi2_mv2     = (WORD16 *)&(ps_dec->ai2_mv[FORW][SECOND]);
+    pi2_mv3     = (WORD16 *)&(ps_dec->ai2_mv[BACK][FIRST]);
+    pi2_mv4     = (WORD16 *)&(ps_dec->ai2_mv[BACK][SECOND]);
+
+
+
+    ps_stream  = &ps_dec->s_bit_stream;
+
+    /************************************************************************/
+    /* Decode the motion vector MV_X, MV_Y and dmv[0], dmv[1]               */
+    /************************************************************************/
+    impeg2d_dec_1mv(ps_stream,ps_dec->ai2_pred_mv[FORW][FIRST],pi2_mv1,ps_dec->au2_f_code[FORW],ps_dec->u2_frm_pic,ai2_dmv);
+
+    {
+        WORD16 ai2_m[2][2];
+
+        if(ps_dec->u2_top_field_first)
+        {
+            ai2_m[1][0] = 1;
+            ai2_m[0][1] = 3;
+        }
+        else
+        {
+            ai2_m[1][0] = 3;
+            ai2_m[0][1] = 1;
+        }
+
+        pi2_mv2[MV_X] = pi2_mv1[MV_X];
+        pi2_mv2[MV_Y] = pi2_mv1[MV_Y];
+
+        pi2_mv3[MV_X] = ai2_dmv[0] + DIV_2_RND(pi2_mv1[MV_X] * ai2_m[1][0]);
+        pi2_mv4[MV_X] = ai2_dmv[0] + DIV_2_RND(pi2_mv1[MV_X] * ai2_m[0][1]);
+
+        pi2_mv3[MV_Y] = ai2_dmv[1] + DIV_2_RND(pi2_mv1[MV_Y] * ai2_m[1][0]) - 1;
+        pi2_mv4[MV_Y] = ai2_dmv[1] + DIV_2_RND(pi2_mv1[MV_Y] * ai2_m[0][1]) + 1;
+    }
+
+    ps_dec->ai2_pred_mv[FORW][SECOND][MV_X] = ps_dec->ai2_pred_mv[FORW][FIRST][MV_X];
+    ps_dec->ai2_pred_mv[FORW][SECOND][MV_Y] = ps_dec->ai2_pred_mv[FORW][FIRST][MV_Y];
+
+    /************************************************************************/
+    /* Set the motion vector params                                         */
+    /************************************************************************/
+    for(j = 0; j < 2; j++)
+    {
+        for(i = 0; i < 2; i++)
+        {
+            pi2_mv        = (WORD16 *)&ps_dec->ai2_mv[j][i];
+            ps_mc = &ps_dec->as_mb_mc_params[j][i];
+            ps_mc->s_ref = ps_dec->as_ref_buf[FORW][(i ^ j) & 1];
+            impeg2d_set_mc_params(&ps_mc->s_luma, &ps_mc->s_chroma, ps_dec->s_mb_type, i,
+                      pi2_mv, ps_dec->u2_mb_x, ps_dec->u2_mb_y, ps_dec->u2_frame_width, ps_dec->u2_frame_height,ps_dec->u2_picture_width);
+        }
+    }
+
+}
+/*****************************************************************************
+*  Function Name   : impeg2d_dec_fld_dual_prime
+*
+*  Description     : Decodes first part of params for 2 MV Interpolated MB
+*
+*  Arguments       :
+*  dec             : Decoder state
+*
+*  Values Returned : None
+*****************************************************************************/
+void impeg2d_dec_fld_dual_prime(dec_state_t *ps_dec)
+{
+    stream_t         *ps_stream;
+    WORD16          *pi2_mv;
+    mb_mc_params_t  *ps_mc;
+
+    WORD16      *pi2_mv1, *pi2_mv2;
+    WORD16      ai2_dmv[2];
+
+
+    pi2_mv1     = (WORD16 *)&(ps_dec->ai2_mv[FORW][FIRST]);
+    pi2_mv2     = (WORD16 *)&(ps_dec->ai2_mv[FORW][SECOND]);
+    ps_stream  = &ps_dec->s_bit_stream;
+
+    /************************************************************************/
+    /* Decode the motion vector MV_X, MV_Y and dmv[0], dmv[1]               */
+    /************************************************************************/
+    impeg2d_dec_1mv(ps_stream,ps_dec->ai2_pred_mv[FORW][FIRST],pi2_mv1,ps_dec->au2_f_code[FORW],0,ai2_dmv);
+
+
+    pi2_mv2[MV_X] = ai2_dmv[0] + DIV_2_RND(pi2_mv1[MV_X]);
+    pi2_mv2[MV_Y] = ai2_dmv[1] + DIV_2_RND(pi2_mv1[MV_Y]);
+
+    if(ps_dec->u2_picture_structure == TOP_FIELD)
+        pi2_mv2[MV_Y] -= 1;
+    else
+        pi2_mv2[MV_Y] += 1;
+
+    ps_dec->ai2_pred_mv[FORW][SECOND][MV_X] = ps_dec->ai2_pred_mv[FORW][FIRST][MV_X];
+    ps_dec->ai2_pred_mv[FORW][SECOND][MV_Y] = ps_dec->ai2_pred_mv[FORW][FIRST][MV_Y];
+
+    /************************************************************************/
+    /* Set the motion vector params                                         */
+    /************************************************************************/
+        pi2_mv        = (WORD16 *)&ps_dec->ai2_mv[FORW][0];
+        ps_mc = &ps_dec->as_mb_mc_params[FORW][0];
+        ps_mc->s_ref = ps_dec->as_ref_buf[FORW][ps_dec->u2_fld_parity];
+        impeg2d_set_mc_params(&ps_mc->s_luma, &ps_mc->s_chroma, ps_dec->s_mb_type, 0,
+                  pi2_mv, ps_dec->u2_mb_x, ps_dec->u2_mb_y, ps_dec->u2_frame_width, ps_dec->u2_frame_height,ps_dec->u2_picture_width);
+
+        pi2_mv        = (WORD16 *)&ps_dec->ai2_mv[FORW][1];
+        ps_mc = &ps_dec->as_mb_mc_params[FORW][1];
+        ps_mc->s_ref = ps_dec->as_ref_buf[FORW][!ps_dec->u2_fld_parity];
+        impeg2d_set_mc_params(&ps_mc->s_luma, &ps_mc->s_chroma, ps_dec->s_mb_type, 0,
+                  pi2_mv, ps_dec->u2_mb_x, ps_dec->u2_mb_y, ps_dec->u2_frame_width, ps_dec->u2_frame_height,ps_dec->u2_picture_width);
+
+
+}
+/*****************************************************************************
+*  Function Name   : impeg2d_dec_4mv_mb
+*
+*  Description     : Decodes first part of params for 2 MV Interpolated MB
+*
+*  Arguments       :
+*  dec             : Decoder state
+*
+*  Values Returned : None
+*****************************************************************************/
+void impeg2d_dec_4mv_mb(dec_state_t *ps_dec)
+{
+    stream_t         *ps_stream;
+    WORD16          *pi2_mv;
+    e_field_t         e_fld;
+    mb_mc_params_t  *ps_mc;
+
+    UWORD16 i,j;
+
+    ps_stream  = &ps_dec->s_bit_stream;
+
+    /***********************************************/
+    /* loop for FW & BK                            */
+    /***********************************************/
+    for(j = 0; j < 2; j++)
+    {
+        /***********************************************/
+        /* loop for decoding 2 mvs of same reference frame*/
+        /***********************************************/
+        for(i = 0; i < 2; i++)
+        {
+            /****************************************************************/
+            /* Decode the first motion vector                               */
+            /****************************************************************/
+            pi2_mv        = (WORD16 *)&ps_dec->ai2_mv[j][i];
+            e_fld = impeg2d_dec_mv(ps_stream,ps_dec->ai2_pred_mv[j][i],pi2_mv,
+                        ps_dec->au2_f_code[j],ps_dec->u2_frm_pic, 1);
+
+            /****************************************************************/
+            /* Set the motion vector params                                 */
+            /****************************************************************/
+            ps_mc = &ps_dec->as_mb_mc_params[j][i];
+            ps_mc->s_ref = ps_dec->as_ref_buf[j][e_fld];
+            impeg2d_set_mc_params(&ps_mc->s_luma, &ps_mc->s_chroma, ps_dec->s_mb_type, i,
+                          pi2_mv, ps_dec->u2_mb_x, ps_dec->u2_mb_y, ps_dec->u2_frame_width, ps_dec->u2_frame_height,ps_dec->u2_picture_width);
+        }
+    }
+
+}
+/*******************************************************************************
+*  Function Name   : impeg2d_dec_2mv_interp_mb
+*
+*  Description     : Decodes first part of params for 2 MV Interpolated MB
+*
+*  Arguments       :
+*  dec             : Decoder state
+*
+*  Values Returned : None
+*******************************************************************************/
+void impeg2d_dec_2mv_interp_mb(dec_state_t *ps_dec)
+{
+    stream_t         *ps_stream;
+    WORD16          *pi2_mv;
+    e_field_t         e_fld;
+    mb_mc_params_t  *ps_mc;
+    UWORD16 i;
+
+    ps_stream  = &ps_dec->s_bit_stream;
+
+    for(i = 0; i < 2; i++)
+    {
+        /********************************************************************/
+        /* Decode the first motion vector                                   */
+        /********************************************************************/
+        pi2_mv        = (WORD16 *)&ps_dec->ai2_mv[i][FIRST];
+        e_fld = impeg2d_dec_mv(ps_stream,ps_dec->ai2_pred_mv[i][FIRST],pi2_mv,
+                    ps_dec->au2_f_code[i],0, ps_dec->u2_fld_pic);
+
+        ps_dec->ai2_pred_mv[i][SECOND][MV_X] = ps_dec->ai2_pred_mv[i][FIRST][MV_X];
+        ps_dec->ai2_pred_mv[i][SECOND][MV_Y] = ps_dec->ai2_pred_mv[i][FIRST][MV_Y];
+        /********************************************************************/
+        /* Set the motion vector params                                     */
+        /********************************************************************/
+        ps_mc = &ps_dec->as_mb_mc_params[i][FIRST];
+        ps_mc->s_ref = ps_dec->as_ref_buf[i][e_fld];
+        impeg2d_set_mc_params(&ps_mc->s_luma, &ps_mc->s_chroma, ps_dec->s_mb_type,i,
+                      pi2_mv, ps_dec->u2_mb_x, ps_dec->u2_mb_y, ps_dec->u2_frame_width, ps_dec->u2_frame_height,ps_dec->u2_picture_width);
+    }
+
+}

diff --git a/decoder/impeg2d_mv_dec.h b/decoder/impeg2d_mv_dec.h
new file mode 100644
index 0000000..f6c691e
--- /dev/null
+++ b/decoder/impeg2d_mv_dec.h

@@ -0,0 +1,28 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+#ifndef __IMPEG2D_MV_DEC_H__
+#define __IMPEG2D_MV_DEC_H__
+
+e_field_t impeg2d_dec_mv(stream_t *stream, WORD16 predMv[], WORD16 mv[],UWORD16 fCode[],
+           UWORD16 shift,UWORD16 fld_sel);
+INLINE void impeg2d_dec_1mv(stream_t *stream, WORD16 predMv[], WORD16 mv[],UWORD16 fCode[],
+           UWORD16 shift,WORD16 dmv[]);
+
+#endif /* #ifndef __IMPEG2D_MV_DEC_H__  */

diff --git a/decoder/impeg2d_pic_proc.c b/decoder/impeg2d_pic_proc.c
new file mode 100755
index 0000000..3dececb
--- /dev/null
+++ b/decoder/impeg2d_pic_proc.c

@@ -0,0 +1,664 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+#include <stdio.h>
+#include <string.h>
+
+#include "iv_datatypedef.h"
+#include "iv.h"
+
+#include "impeg2_buf_mgr.h"
+#include "impeg2_disp_mgr.h"
+#include "impeg2_defs.h"
+#include "impeg2_platform_macros.h"
+#include "impeg2_inter_pred.h"
+#include "impeg2_idct.h"
+#include "impeg2_globals.h"
+#include "impeg2_mem_func.h"
+#include "impeg2_format_conv.h"
+#include "impeg2_macros.h"
+
+#include "ivd.h"
+#include "impeg2d.h"
+#include "impeg2d_bitstream.h"
+#include "impeg2d_structs.h"
+#include "impeg2d_globals.h"
+#include "impeg2d_vld_tables.h"
+#include "impeg2d_vld.h"
+#include "impeg2d_pic_proc.h"
+#include "impeg2d_debug.h"
+
+void impeg2d_init_function_ptr(void *pv_codec);
+void impeg2d_format_convert(dec_state_t *ps_dec,
+                            pic_buf_t *ps_src_pic,
+                            iv_yuv_buf_t    *ps_disp_frm_buf,
+                            UWORD32 u4_start_row, UWORD32 u4_num_rows)
+{
+    UWORD8  *pu1_src_y,*pu1_src_u,*pu1_src_v;
+    UWORD8  *pu1_dst_y,*pu1_dst_u,*pu1_dst_v;
+
+
+
+    if((NULL == ps_src_pic) || (NULL == ps_src_pic->pu1_y) || (0 == u4_num_rows))
+            return;
+
+    pu1_src_y   = ps_src_pic->pu1_y + (u4_start_row * ps_dec->u2_frame_width);
+    pu1_src_u   = ps_src_pic->pu1_u + ((u4_start_row >> 1) * (ps_dec->u2_frame_width >> 1));
+    pu1_src_v   = ps_src_pic->pu1_v + ((u4_start_row >> 1) *(ps_dec->u2_frame_width >> 1));
+
+    pu1_dst_y  =  (UWORD8 *)ps_disp_frm_buf->pv_y_buf + (u4_start_row *  ps_dec->u4_frm_buf_stride);
+    pu1_dst_u =   (UWORD8 *)ps_disp_frm_buf->pv_u_buf +((u4_start_row >> 1)*(ps_dec->u4_frm_buf_stride >> 1));
+    pu1_dst_v =   (UWORD8 *)ps_disp_frm_buf->pv_v_buf +((u4_start_row >> 1)*(ps_dec->u4_frm_buf_stride >> 1));
+
+    if (IV_YUV_420P == ps_dec->i4_chromaFormat)
+    {
+        ps_dec->pf_copy_yuv420p_buf(pu1_src_y, pu1_src_u, pu1_src_v, pu1_dst_y,
+                                    pu1_dst_u, pu1_dst_v,
+                                    ps_dec->u2_frame_width,
+                                    u4_num_rows,
+                                    ps_dec->u4_frm_buf_stride,
+                                    (ps_dec->u4_frm_buf_stride >> 1),
+                                    (ps_dec->u4_frm_buf_stride >> 1),
+                                    ps_dec->u2_frame_width,
+                                    (ps_dec->u2_frame_width >> 1),
+                                    (ps_dec->u2_frame_width >> 1));
+    }
+    else if (IV_YUV_422ILE == ps_dec->i4_chromaFormat)
+    {
+        void    *pv_yuv422i;
+        UWORD32 u2_height,u2_width,u2_stride_y,u2_stride_u,u2_stride_v;
+        UWORD32 u2_stride_yuv422i;
+
+
+        pv_yuv422i          = (UWORD8 *)ps_disp_frm_buf->pv_y_buf + ((ps_dec->u2_vertical_size)*(ps_dec->u4_frm_buf_stride));
+        u2_height           = u4_num_rows;
+        u2_width            = ps_dec->u2_horizontal_size;
+        u2_stride_y         = ps_dec->u2_frame_width;
+        u2_stride_u         = u2_stride_y >> 1;
+        u2_stride_v         = u2_stride_u;
+        u2_stride_yuv422i   = (0 == ps_dec->u4_frm_buf_stride) ? ps_dec->u2_horizontal_size : ps_dec->u4_frm_buf_stride;
+
+        ps_dec->pf_fmt_conv_yuv420p_to_yuv422ile(pu1_src_y,
+            pu1_src_u,
+            pu1_src_v,
+            pv_yuv422i,
+            u2_width,
+            u2_height,
+            u2_stride_y,
+            u2_stride_u,
+            u2_stride_v,
+            u2_stride_yuv422i);
+
+    }
+    else if((ps_dec->i4_chromaFormat == IV_YUV_420SP_UV) ||
+            (ps_dec->i4_chromaFormat == IV_YUV_420SP_VU))
+    {
+
+        UWORD32 dest_inc_Y=0,dest_inc_UV=0;
+        WORD32 convert_uv_only;
+
+        pu1_dst_u =   (UWORD8 *)ps_disp_frm_buf->pv_u_buf +((u4_start_row >> 1)*(ps_dec->u4_frm_buf_stride));
+        dest_inc_Y =    ps_dec->u4_frm_buf_stride;
+        dest_inc_UV =   ((ps_dec->u4_frm_buf_stride + 1) >> 1) << 1;
+        convert_uv_only = 0;
+        if(1 == ps_dec->u4_share_disp_buf)
+            convert_uv_only = 1;
+
+        if(ps_dec->i4_chromaFormat == IV_YUV_420SP_UV)
+        {
+            ps_dec->pf_fmt_conv_yuv420p_to_yuv420sp_uv(pu1_src_y,
+                pu1_src_u,
+                pu1_src_v,
+                pu1_dst_y,
+                pu1_dst_u,
+                u4_num_rows,
+                ps_dec->u2_horizontal_size,
+                ps_dec->u2_frame_width,
+                ps_dec->u2_frame_width >> 1,
+                ps_dec->u2_frame_width >> 1,
+                dest_inc_Y,
+                dest_inc_UV,
+                convert_uv_only);
+        }
+        else
+        {
+            ps_dec->pf_fmt_conv_yuv420p_to_yuv420sp_vu(pu1_src_y,
+                    pu1_src_u,
+                    pu1_src_v,
+                    pu1_dst_y,
+                    pu1_dst_u,
+                    u4_num_rows,
+                    ps_dec->u2_horizontal_size,
+                    ps_dec->u2_frame_width,
+                    ps_dec->u2_frame_width >> 1,
+                    ps_dec->u2_frame_width >> 1,
+                    dest_inc_Y,
+                    dest_inc_UV,
+                    convert_uv_only);
+        }
+
+
+
+    }
+
+}
+
+
+/*******************************************************************************
+*
+*  Function Name   : impeg2d_get_frm_buf
+*
+*  Description     : Gets YUV component buffers for the frame
+*
+*  Arguments       :
+*  frm_buf         : YUV buffer
+*  frm             : Reference frame
+*  width           : Width of the frame
+*  Height          : Height of the frame
+*
+*  Values Returned : None
+*******************************************************************************/
+void impeg2d_get_frm_buf(yuv_buf_t *ps_frm_buf,UWORD8 *pu1_frm,UWORD32 u4_width,UWORD32 u4_height)
+{
+   UWORD32 u4_luma_size = u4_width * u4_height;
+   UWORD32 u4_chroma_size = (u4_width * u4_height)>>2;
+
+   ps_frm_buf->pu1_y = pu1_frm;
+   ps_frm_buf->pu1_u = pu1_frm + u4_luma_size;
+   ps_frm_buf->pu1_v = pu1_frm + u4_luma_size + u4_chroma_size;
+
+}
+/*******************************************************************************
+*
+*  Function Name   : impeg2d_get_bottom_field_buf
+*
+*  Description     : Gets YUV component buffers for bottom field of the frame
+*
+*  Arguments       :
+*  frm_buf         : YUV buffer
+*  frm             : Reference frame
+*  width           : Width of the frame
+*  Height          : Height of the frame
+*
+*  Values Returned : None
+*******************************************************************************/
+void impeg2d_get_bottom_field_buf(yuv_buf_t *ps_src_buf,yuv_buf_t *ps_dst_buf,
+                      UWORD32 u4_width)
+{
+   ps_dst_buf->pu1_y = ps_src_buf->pu1_y + u4_width;
+   ps_dst_buf->pu1_u = ps_src_buf->pu1_u + (u4_width>>1);
+   ps_dst_buf->pu1_v = ps_src_buf->pu1_v + (u4_width>>1);
+
+}
+/*******************************************************************************
+*  Function Name   : impeg2d_get_mb_addr_incr
+*
+*  Description     : Decodes the Macroblock address increment
+*
+*  Arguments       :
+*  stream          : Bitstream
+*
+*  Values Returned : Macroblock address increment
+*******************************************************************************/
+UWORD16 impeg2d_get_mb_addr_incr(stream_t *ps_stream)
+{
+    UWORD16 u2_mb_addr_incr = 0;
+    while (impeg2d_bit_stream_nxt(ps_stream,MB_ESCAPE_CODE_LEN) == MB_ESCAPE_CODE)
+    {
+        impeg2d_bit_stream_flush(ps_stream,MB_ESCAPE_CODE_LEN);
+        u2_mb_addr_incr += 33;
+    }
+    u2_mb_addr_incr += impeg2d_dec_vld_symbol(ps_stream,gai2_impeg2d_mb_addr_incr,MB_ADDR_INCR_LEN) +
+        MB_ADDR_INCR_OFFSET;
+    return(u2_mb_addr_incr);
+}
+
+/*******************************************************************************
+*
+*  Function Name   : impeg2d_init_video_state
+*
+*  Description     : Initializes the Video decoder state
+*
+*  Arguments       :
+*  dec             : Decoder context
+*  videoType       : MPEG_2_Video / MPEG_1_Video
+*
+*  Values Returned : None
+*******************************************************************************/
+IMPEG2D_ERROR_CODES_T impeg2d_init_video_state(dec_state_t *ps_dec, e_video_type_t e_video_type)
+{
+    /*-----------------------------------------------------------------------*/
+    /* Bit Stream  that conforms to MPEG-1 <ISO/IEC 11172-2> standard        */
+    /*-----------------------------------------------------------------------*/
+    if(e_video_type == MPEG_1_VIDEO)
+    {
+        ps_dec->u2_is_mpeg2 = 0;
+
+        /*-------------------------------------------------------------------*/
+        /* force MPEG-1 parameters for proper decoder behavior               */
+        /* see ISO/IEC 13818-2 section D.9.14                                */
+        /*-------------------------------------------------------------------*/
+        ps_dec->u2_progressive_sequence         = 1;
+        ps_dec->u2_intra_dc_precision           = 0;
+        ps_dec->u2_picture_structure            = FRAME_PICTURE;
+        ps_dec->u2_frame_pred_frame_dct         = 1;
+        ps_dec->u2_concealment_motion_vectors   = 0;
+        ps_dec->u2_q_scale_type                 = 0;
+        ps_dec->u2_intra_vlc_format             = 0;
+        ps_dec->u2_alternate_scan               = 0;
+        ps_dec->u2_repeat_first_field           = 0;
+        ps_dec->u2_progressive_frame            = 1;
+        ps_dec->u2_frame_rate_extension_n       = 0;
+        ps_dec->u2_frame_rate_extension_d       = 0;
+
+        ps_dec->pf_vld_inv_quant                  = impeg2d_vld_inv_quant_mpeg1;
+        /*-------------------------------------------------------------------*/
+        /* Setting of parameters other than those mentioned in MPEG2 standard*/
+        /* but used in decoding process.                                     */
+        /*-------------------------------------------------------------------*/
+    }
+    /*-----------------------------------------------------------------------*/
+    /* Bit Stream  that conforms to MPEG-2                                   */
+    /*-----------------------------------------------------------------------*/
+    else
+    {
+        ps_dec->u2_is_mpeg2                  = 1;
+        ps_dec->u2_full_pel_forw_vector   = 0;
+        ps_dec->u2_forw_f_code            = 7;
+        ps_dec->u2_full_pel_back_vector   = 0;
+        ps_dec->u2_back_f_code            = 7;
+        ps_dec->pf_vld_inv_quant       = impeg2d_vld_inv_quant_mpeg2;
+
+
+    }
+
+
+    impeg2d_init_function_ptr(ps_dec);
+
+    /* Set the frame Width and frame Height */
+    ps_dec->u2_frame_height        = ALIGN16(ps_dec->u2_vertical_size);
+    ps_dec->u2_frame_width         = ALIGN16(ps_dec->u2_horizontal_size);
+    ps_dec->u2_num_horiz_mb         = (ps_dec->u2_horizontal_size + 15) >> 4;
+   // dec->u4_frm_buf_stride    = dec->frameWidth;
+    if (ps_dec->u2_frame_height > ps_dec->u2_create_max_height || ps_dec->u2_frame_width > ps_dec->u2_create_max_width)
+    {
+        return IMPEG2D_PIC_SIZE_NOT_SUPPORTED;
+    }
+
+    ps_dec->u2_num_flds_decoded = 0;
+
+    /* Calculate the frame period */
+    {
+        UWORD32 numer;
+        UWORD32 denom;
+        numer = (UWORD32)gau2_impeg2_frm_rate_code[ps_dec->u2_frame_rate_code][1] *
+                                (UWORD32)(ps_dec->u2_frame_rate_extension_d + 1);
+
+        denom = (UWORD32)gau2_impeg2_frm_rate_code[ps_dec->u2_frame_rate_code][0] *
+                                (UWORD32)(ps_dec->u2_frame_rate_extension_n + 1);
+        ps_dec->u2_framePeriod =  (numer * 1000 * 100) / denom;
+    }
+
+
+   if(VERTICAL_SCAN == ps_dec->u2_alternate_scan)
+   {
+    ps_dec->pu1_inv_scan_matrix = (UWORD8 *)gau1_impeg2_inv_scan_vertical;
+   }
+   else
+   {
+    ps_dec->pu1_inv_scan_matrix = (UWORD8 *)gau1_impeg2_inv_scan_zig_zag;
+   }
+   return (IMPEG2D_ERROR_CODES_T)IVD_ERROR_NONE;
+}
+/*******************************************************************************
+*
+*  Function Name   : impeg2d_pre_pic_dec_proc
+*
+*  Description     : Does the processing neccessary before picture decoding
+*
+*  Arguments       :
+*  dec             : Decoder context
+*
+*  Values Returned : None
+*******************************************************************************/
+IMPEG2D_ERROR_CODES_T impeg2d_pre_pic_dec_proc(dec_state_t *ps_dec)
+{
+    WORD32 u4_get_disp;
+    pic_buf_t *ps_disp_pic;
+    IMPEG2D_ERROR_CODES_T e_error = (IMPEG2D_ERROR_CODES_T)IVD_ERROR_NONE;
+
+    u4_get_disp = 0;
+    ps_disp_pic = NULL;
+
+    /* Field Picture */
+    if(ps_dec->u2_picture_structure != FRAME_PICTURE)
+    {
+        ps_dec->u2_num_vert_mb       = (ps_dec->u2_vertical_size + 31) >> 5;
+
+        if(ps_dec->u2_num_flds_decoded == 0)
+        {
+            pic_buf_t *ps_pic_buf;
+            u4_get_disp = 1;
+
+            ps_pic_buf = impeg2_buf_mgr_get_next_free(ps_dec->pv_pic_buf_mg, &ps_dec->i4_cur_buf_id);
+
+            if (NULL == ps_pic_buf)
+            {
+                return IMPEG2D_NO_FREE_BUF_ERR;
+            }
+
+            impeg2_buf_mgr_set_status((buf_mgr_t *)ps_dec->pv_pic_buf_mg, ps_dec->i4_cur_buf_id, BUF_MGR_DISP);
+            impeg2_buf_mgr_set_status((buf_mgr_t *)ps_dec->pv_pic_buf_mg, ps_dec->i4_cur_buf_id, BUF_MGR_REF);
+
+            ps_pic_buf->u4_ts = ps_dec->u4_inp_ts;
+            ps_dec->ps_cur_pic = ps_pic_buf;
+            ps_dec->s_cur_frm_buf.pu1_y = ps_pic_buf->pu1_y;
+            ps_dec->s_cur_frm_buf.pu1_u = ps_pic_buf->pu1_u;
+            ps_dec->s_cur_frm_buf.pu1_v = ps_pic_buf->pu1_v;
+        }
+
+        if(ps_dec->u2_picture_structure == TOP_FIELD)
+        {
+            ps_dec->u2_fld_parity = TOP;
+        }
+        else
+        {
+            ps_dec->u2_fld_parity = BOTTOM;
+        }
+        ps_dec->u2_field_dct           = 0;
+        ps_dec->u2_read_dct_type        = 0;
+        ps_dec->u2_read_motion_type     = 1;
+        ps_dec->u2_fld_pic             = 1;
+        ps_dec->u2_frm_pic             = 0;
+        ps_dec->ps_func_forw_or_back     = gas_impeg2d_func_fld_fw_or_bk;
+        ps_dec->ps_func_bi_direct       = gas_impeg2d_func_fld_bi_direct;
+   }
+    /* Frame Picture */
+    else
+    {
+        pic_buf_t *ps_pic_buf;
+
+
+        ps_dec->u2_num_vert_mb       = (ps_dec->u2_vertical_size + 15) >> 4;
+        u4_get_disp = 1;
+        ps_pic_buf = impeg2_buf_mgr_get_next_free(ps_dec->pv_pic_buf_mg, &ps_dec->i4_cur_buf_id);
+
+        if (NULL == ps_pic_buf)
+        {
+            return IMPEG2D_NO_FREE_BUF_ERR;
+        }
+        impeg2_buf_mgr_set_status((buf_mgr_t *)ps_dec->pv_pic_buf_mg, ps_dec->i4_cur_buf_id, BUF_MGR_DISP);
+        impeg2_buf_mgr_set_status((buf_mgr_t *)ps_dec->pv_pic_buf_mg, ps_dec->i4_cur_buf_id, BUF_MGR_REF);
+
+        ps_pic_buf->u4_ts = ps_dec->u4_inp_ts;
+        ps_dec->ps_cur_pic = ps_pic_buf;
+        ps_dec->s_cur_frm_buf.pu1_y = ps_pic_buf->pu1_y;
+        ps_dec->s_cur_frm_buf.pu1_u = ps_pic_buf->pu1_u;
+        ps_dec->s_cur_frm_buf.pu1_v = ps_pic_buf->pu1_v;
+
+
+        if(ps_dec->u2_frame_pred_frame_dct == 0)
+        {
+            ps_dec->u2_read_dct_type    = 1;
+            ps_dec->u2_read_motion_type = 1;
+        }
+        else
+        {
+            ps_dec->u2_read_dct_type    = 0;
+            ps_dec->u2_read_motion_type = 0;
+            ps_dec->u2_motion_type     = 2;
+            ps_dec->u2_field_dct       = 0;
+        }
+
+        ps_dec->u2_fld_parity          = TOP;
+        ps_dec->u2_fld_pic             = 0;
+        ps_dec->u2_frm_pic             = 1;
+        ps_dec->ps_func_forw_or_back     = gas_impeg2d_func_frm_fw_or_bk;
+        ps_dec->ps_func_bi_direct       = gas_impeg2d_func_frm_bi_direct;
+   }
+    ps_dec->u2_def_dc_pred[Y_LUMA]   = 128 << ps_dec->u2_intra_dc_precision;
+    ps_dec->u2_def_dc_pred[U_CHROMA]   = 128 << ps_dec->u2_intra_dc_precision;
+    ps_dec->u2_def_dc_pred[V_CHROMA]   = 128 << ps_dec->u2_intra_dc_precision;
+    ps_dec->u2_num_mbs_left  = ps_dec->u2_num_horiz_mb * ps_dec->u2_num_vert_mb;
+    if(u4_get_disp)
+    {
+        if(ps_dec->u4_num_frames_decoded > 1)
+        {
+            ps_disp_pic = impeg2_disp_mgr_get(&ps_dec->s_disp_mgr, &ps_dec->i4_disp_buf_id);
+        }
+        ps_dec->ps_disp_pic = ps_disp_pic;
+        if(ps_disp_pic)
+        {
+            if(1 == ps_dec->u4_share_disp_buf)
+            {
+                ps_dec->ps_disp_frm_buf->pv_y_buf  = ps_disp_pic->pu1_y;
+                if(IV_YUV_420P == ps_dec->i4_chromaFormat)
+                {
+                    ps_dec->ps_disp_frm_buf->pv_u_buf  = ps_disp_pic->pu1_u;
+                    ps_dec->ps_disp_frm_buf->pv_v_buf  = ps_disp_pic->pu1_v;
+                }
+                else
+                {
+                    UWORD8 *pu1_buf;
+
+                    pu1_buf = ps_dec->as_disp_buffers[ps_disp_pic->i4_buf_id].pu1_bufs[1];
+                    ps_dec->ps_disp_frm_buf->pv_u_buf  = pu1_buf;
+
+                    pu1_buf = ps_dec->as_disp_buffers[ps_disp_pic->i4_buf_id].pu1_bufs[2];
+                    ps_dec->ps_disp_frm_buf->pv_v_buf  = pu1_buf;
+                }
+            }
+        }
+    }
+
+
+    switch(ps_dec->e_pic_type)
+    {
+    case I_PIC:
+        {
+            ps_dec->pf_decode_slice = impeg2d_dec_i_slice;
+            break;
+        }
+    case D_PIC:
+        {
+            ps_dec->pf_decode_slice = impeg2d_dec_d_slice;
+            break;
+        }
+    case P_PIC:
+        {
+            ps_dec->pf_decode_slice = impeg2d_dec_p_b_slice;
+            ps_dec->pu2_mb_type       = gau2_impeg2d_p_mb_type;
+            break;
+        }
+    case B_PIC:
+        {
+            ps_dec->pf_decode_slice = impeg2d_dec_p_b_slice;
+            ps_dec->pu2_mb_type       = gau2_impeg2d_b_mb_type;
+            break;
+        }
+    default:
+        return IMPEG2D_INVALID_PIC_TYPE;
+    }
+
+    /*************************************************************************/
+    /* Set the reference pictures                                            */
+    /*************************************************************************/
+
+    /* Error resilience: If forward and backward pictures are going to be NULL*/
+    /* then assign both to the current                                        */
+    /* if one of them NULL then we will assign the non null to the NULL one   */
+
+    if(ps_dec->e_pic_type == P_PIC)
+    {
+        if (NULL == ps_dec->as_recent_fld[1][0].pu1_y)
+        {
+            ps_dec->as_recent_fld[1][0] = ps_dec->s_cur_frm_buf;
+        }
+        if (NULL == ps_dec->as_recent_fld[1][1].pu1_y)
+        {
+            impeg2d_get_bottom_field_buf(&ps_dec->s_cur_frm_buf, &ps_dec->as_recent_fld[1][1],
+                ps_dec->u2_frame_width);
+        }
+
+        ps_dec->as_ref_buf[FORW][TOP]    = ps_dec->as_recent_fld[1][0];
+        ps_dec->as_ref_buf[FORW][BOTTOM] = ps_dec->as_recent_fld[1][1];
+
+
+    }
+    else if(ps_dec->e_pic_type == B_PIC)
+    {
+        if((NULL == ps_dec->as_recent_fld[1][0].pu1_y) && (NULL == ps_dec->as_recent_fld[0][0].pu1_y))
+        {
+            // assign the current picture to both
+            ps_dec->as_recent_fld[1][0] = ps_dec->s_cur_frm_buf;
+            impeg2d_get_bottom_field_buf(&ps_dec->s_cur_frm_buf, &ps_dec->as_recent_fld[1][1],
+                ps_dec->u2_frame_width);
+            ps_dec->as_recent_fld[0][0] = ps_dec->s_cur_frm_buf;
+            ps_dec->as_recent_fld[0][1] = ps_dec->as_recent_fld[1][1];
+        }
+        //Assign the non-null picture to the null picture
+        else if ((NULL != ps_dec->as_recent_fld[1][0].pu1_y) && (NULL == ps_dec->as_recent_fld[0][0].pu1_y))
+        {
+            ps_dec->as_recent_fld[0][0] = ps_dec->as_recent_fld[1][0];
+            ps_dec->as_recent_fld[0][1] = ps_dec->as_recent_fld[1][1];
+        }
+        else if ((NULL == ps_dec->as_recent_fld[1][0].pu1_y) && (NULL != ps_dec->as_recent_fld[0][0].pu1_y))
+        {
+            ps_dec->as_recent_fld[1][0] = ps_dec->as_recent_fld[0][0];
+            ps_dec->as_recent_fld[1][1] = ps_dec->as_recent_fld[0][1];
+        }
+
+        ps_dec->as_ref_buf[FORW][TOP]    = ps_dec->as_recent_fld[0][0];
+        ps_dec->as_ref_buf[FORW][BOTTOM] = ps_dec->as_recent_fld[0][1];
+        ps_dec->as_ref_buf[BACK][TOP]    = ps_dec->as_recent_fld[1][0];
+        ps_dec->as_ref_buf[BACK][BOTTOM] = ps_dec->as_recent_fld[1][1];
+
+
+    }
+
+    return e_error;
+}
+
+/*******************************************************************************
+*
+*  Function Name   : impeg2d_post_pic_dec_proc
+*
+*  Description     : Performs processing that is needed at the end of picture
+*                    decode
+*
+*  Arguments       :
+*  dec             : Decoder context
+*
+*  Values Returned : None
+*******************************************************************************/
+void impeg2d_post_pic_dec_proc(dec_state_t *ps_dec)
+{
+
+   WORD32 u4_update_pic_buf = 0;
+    /*************************************************************************/
+    /* Processing at the end of picture                                      */
+    /*************************************************************************/
+    if(ps_dec->u2_picture_structure != FRAME_PICTURE)
+    {
+        ps_dec->u2_num_vert_mb       = (ps_dec->u2_vertical_size + 31) >> 5;
+
+        if(ps_dec->u2_num_flds_decoded == 1)
+        {
+            ps_dec->u2_num_flds_decoded = 0;
+            u4_update_pic_buf = 1;
+        }
+        else
+        {
+            ps_dec->u2_num_flds_decoded = 1;
+        }
+    }
+    else
+    {
+        u4_update_pic_buf = 1;
+    }
+
+    if(u4_update_pic_buf)
+    {
+        ps_dec->i4_frame_decoded = 1;
+        if(ps_dec->e_pic_type != B_PIC)
+        {
+            /* In any sequence first two pictures have to be reference pictures */
+            /* Adding of first picture in the sequence */
+            if(ps_dec->aps_ref_pics[0] == NULL)
+            {
+                ps_dec->aps_ref_pics[0] = ps_dec->ps_cur_pic;
+            }
+
+            /* Adding of second picture in the sequence */
+            else if(ps_dec->aps_ref_pics[1] == NULL)
+            {
+                ps_dec->aps_ref_pics[1] = ps_dec->ps_cur_pic;
+                impeg2_disp_mgr_add(&ps_dec->s_disp_mgr, ps_dec->aps_ref_pics[0], ps_dec->aps_ref_pics[0]->i4_buf_id);
+            }
+            else
+            {
+
+                impeg2_disp_mgr_add(&ps_dec->s_disp_mgr, ps_dec->aps_ref_pics[1], ps_dec->aps_ref_pics[1]->i4_buf_id);
+                impeg2_buf_mgr_release(ps_dec->pv_pic_buf_mg, ps_dec->aps_ref_pics[0]->i4_buf_id, BUF_MGR_REF);
+                ps_dec->aps_ref_pics[0] = ps_dec->aps_ref_pics[1];
+                ps_dec->aps_ref_pics[1] = ps_dec->ps_cur_pic;
+
+            }
+        }
+        else
+        {
+            impeg2_disp_mgr_add(&ps_dec->s_disp_mgr, ps_dec->ps_cur_pic, ps_dec->ps_cur_pic->i4_buf_id);
+
+            impeg2_buf_mgr_release(ps_dec->pv_pic_buf_mg, ps_dec->ps_cur_pic->i4_buf_id, BUF_MGR_REF);
+        }
+
+    }
+    /*************************************************************************/
+    /* Update the list of recent reference pictures                          */
+    /*************************************************************************/
+    if(ps_dec->e_pic_type != B_PIC)
+    {
+        switch(ps_dec->u2_picture_structure)
+        {
+        case FRAME_PICTURE:
+            {
+                ps_dec->as_recent_fld[0][0] = ps_dec->as_recent_fld[1][0];
+                ps_dec->as_recent_fld[0][1] = ps_dec->as_recent_fld[1][1];
+
+                ps_dec->as_recent_fld[1][0] = ps_dec->s_cur_frm_buf;
+                impeg2d_get_bottom_field_buf(&ps_dec->s_cur_frm_buf, &ps_dec->as_recent_fld[1][1],
+                ps_dec->u2_frame_width);
+                break;
+            }
+        case TOP_FIELD:
+            {
+                ps_dec->as_recent_fld[0][0] = ps_dec->as_recent_fld[1][0];
+                ps_dec->as_recent_fld[1][0] = ps_dec->s_cur_frm_buf;
+                break;
+            }
+        case BOTTOM_FIELD:
+            {
+                ps_dec->as_recent_fld[0][1] = ps_dec->as_recent_fld[1][1];
+                impeg2d_get_bottom_field_buf(&ps_dec->s_cur_frm_buf, &ps_dec->as_recent_fld[1][1],
+                ps_dec->u2_frame_width);
+                break;
+            }
+        }
+    }
+}

diff --git a/decoder/impeg2d_pic_proc.h b/decoder/impeg2d_pic_proc.h
new file mode 100644
index 0000000..e97dd13
--- /dev/null
+++ b/decoder/impeg2d_pic_proc.h

@@ -0,0 +1,41 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+#ifndef __IMPEG2D_PIC_PROC_H__
+#define __IMPEG2D_PIC_PROC_H__
+
+/*****************************************************************************/
+/* Function Declarations                                                     */
+/*****************************************************************************/
+UWORD16 impeg2d_get_mb_addr_incr(stream_t *stream);
+IMPEG2D_ERROR_CODES_T impeg2d_init_video_state(dec_state_t *dec, e_video_type_t videoType);
+IMPEG2D_ERROR_CODES_T impeg2d_pre_pic_dec_proc(dec_state_t *dec);
+void impeg2d_post_pic_dec_proc(dec_state_t *dec);
+IMPEG2D_ERROR_CODES_T impeg2d_dec_i_slice(dec_state_t *dec);
+IMPEG2D_ERROR_CODES_T impeg2d_dec_d_slice(dec_state_t *dec);
+IMPEG2D_ERROR_CODES_T impeg2d_dec_p_b_slice(dec_state_t *dec);
+
+void impeg2d_format_convert(dec_state_t *ps_dec,
+                            pic_buf_t *ps_src_pic,
+                            iv_yuv_buf_t    *ps_disp_frm_buf,
+                            UWORD32 u4_start_row, UWORD32 u4_num_rows);
+
+
+#endif /* __IMPEG2D_PIC_PROC_H__  */
+

diff --git a/decoder/impeg2d_pnb_pic.c b/decoder/impeg2d_pnb_pic.c
new file mode 100644
index 0000000..036c7d1
--- /dev/null
+++ b/decoder/impeg2d_pnb_pic.c

@@ -0,0 +1,698 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+#include <stdio.h>
+#include <string.h>
+
+#include "iv_datatypedef.h"
+#include "iv.h"
+
+#include "impeg2_buf_mgr.h"
+#include "impeg2_disp_mgr.h"
+#include "impeg2_defs.h"
+#include "impeg2_platform_macros.h"
+#include "impeg2_inter_pred.h"
+#include "impeg2_idct.h"
+#include "impeg2_globals.h"
+#include "impeg2_mem_func.h"
+#include "impeg2_format_conv.h"
+#include "impeg2_macros.h"
+
+#include "ivd.h"
+#include "impeg2d.h"
+#include "impeg2d_bitstream.h"
+#include "impeg2d_structs.h"
+#include "impeg2d_vld_tables.h"
+#include "impeg2d_vld.h"
+#include "impeg2d_pic_proc.h"
+#include "impeg2d_debug.h"
+#include "impeg2d_mc.h"
+
+#define BLK_SIZE 8
+#define LUMA_BLK_SIZE (2 * (BLK_SIZE))
+#define CHROMA_BLK_SIZE (BLK_SIZE)
+
+
+/*******************************************************************************
+*
+*  Function Name   : impeg2d_dec_p_mb_params
+*
+*  Description     : Decodes the parameters for P
+*
+*  Arguments       :
+*  dec             : Decoder context
+*
+*  Values Returned : None
+*******************************************************************************/
+void impeg2d_dec_p_mb_params(dec_state_t *ps_dec)
+{
+    stream_t *ps_stream = &ps_dec->s_bit_stream;
+    UWORD16 u2_mb_addr_incr;
+    UWORD16 u2_total_len;
+    UWORD16 u2_len;
+    UWORD16 u2_mb_type;
+    UWORD32 u4_next_word;
+    const dec_mb_params_t *ps_dec_mb_params;
+    if(impeg2d_bit_stream_nxt(ps_stream,1) == 1)
+    {
+        impeg2d_bit_stream_flush(ps_stream,1);
+
+    }
+    else
+    {
+        u2_mb_addr_incr = impeg2d_get_mb_addr_incr(ps_stream);
+        if(0 == ps_dec->u2_first_mb)
+        {
+            /****************************************************************/
+            /* If the 2nd member of a field picture pair is a P picture and */
+            /* the first one was an I picture, there cannot be any skipped  */
+            /* MBs in the second field picture                              */
+            /****************************************************************/
+            /*
+            if((dec->picture_structure != FRAME_PICTURE) &&
+                (dec->f->FieldFuncCall != 0) &&
+                (dec->las->u1_last_coded_vop_type == I))
+            {
+                core0_err_handler((void *)(VOLParams),
+                    ITTMPEG2_ERR_INVALID_MB_SKIP);
+            }
+            */
+            /****************************************************************/
+            /* In MPEG-2, the last MB of the row cannot be skipped and the  */
+            /* MBAddrIncr cannot be such that it will take the current MB   */
+            /* beyond the current row                                       */
+            /* In MPEG-1, the slice could start and end anywhere and is not */
+            /* restricted to a row like in MPEG-2. Hence this check should  */
+            /* not be done for MPEG-1 streams.                              */
+            /****************************************************************/
+            if(ps_dec->u2_is_mpeg2 && ((ps_dec->u2_mb_x + u2_mb_addr_incr) > ps_dec->u2_num_horiz_mb) )
+            {
+                u2_mb_addr_incr    = ps_dec->u2_num_horiz_mb - ps_dec->u2_mb_x;
+            }
+
+            impeg2d_dec_skip_mbs(ps_dec, (UWORD16)(u2_mb_addr_incr - 1));
+        }
+
+    }
+    u4_next_word = (UWORD16)impeg2d_bit_stream_nxt(ps_stream,16);
+    /*-----------------------------------------------------------------------*/
+    /* MB type                                                               */
+    /*-----------------------------------------------------------------------*/
+    {
+        u2_mb_type   = ps_dec->pu2_mb_type[BITS((UWORD16)u4_next_word,15,10)];
+        u2_len      = BITS(u2_mb_type,15,8);
+        u2_total_len = u2_len;
+        u4_next_word = (UWORD16)LSW((UWORD16)u4_next_word << u2_len);
+    }
+    /*-----------------------------------------------------------------------*/
+    /* motion type                                                           */
+    /*-----------------------------------------------------------------------*/
+    {
+        if((u2_mb_type & MB_FORW_OR_BACK) &&  ps_dec->u2_read_motion_type)
+        {
+            WORD32 i4_motion_type;
+            ps_dec->u2_motion_type = BITS((UWORD16)u4_next_word,15,14);
+            u2_total_len        += MB_MOTION_TYPE_LEN;
+            u4_next_word        = (UWORD16)LSW((UWORD16)u4_next_word << MB_MOTION_TYPE_LEN);
+            i4_motion_type     = ps_dec->u2_motion_type;
+
+            if((i4_motion_type == 0) ||
+                (i4_motion_type == 4) ||
+                (i4_motion_type >  7))
+            {
+                //TODO : VANG Check for validity
+                i4_motion_type = 1;
+            }
+
+        }
+    }
+    /*-----------------------------------------------------------------------*/
+    /* dct type                                                              */
+    /*-----------------------------------------------------------------------*/
+    {
+        if((u2_mb_type & MB_CODED) && ps_dec->u2_read_dct_type)
+        {
+            ps_dec->u2_field_dct = BIT((UWORD16)u4_next_word,15);
+            u2_total_len += MB_DCT_TYPE_LEN;
+            u4_next_word = (UWORD16)LSW((UWORD16)u4_next_word << MB_DCT_TYPE_LEN);
+        }
+    }
+    /*-----------------------------------------------------------------------*/
+    /* Quant scale code                                                      */
+    /*-----------------------------------------------------------------------*/
+    if(u2_mb_type & MB_QUANT)
+    {
+        UWORD16 u2_quant_scale_code;
+        u2_quant_scale_code = BITS((UWORD16)u4_next_word,15,11);
+
+        ps_dec->u1_quant_scale = (ps_dec->u2_q_scale_type) ?
+            gau1_impeg2_non_linear_quant_scale[u2_quant_scale_code] : (u2_quant_scale_code << 1);
+        u2_total_len += MB_QUANT_SCALE_CODE_LEN;
+    }
+    impeg2d_bit_stream_flush(ps_stream,u2_total_len);
+    /*-----------------------------------------------------------------------*/
+    /* Set the function pointers                                             */
+    /*-----------------------------------------------------------------------*/
+    ps_dec->u2_coded_mb    = (UWORD16)(u2_mb_type & MB_CODED);
+
+    if(u2_mb_type & MB_FORW_OR_BACK)
+    {
+
+        UWORD16 refPic      = !(u2_mb_type & MB_MV_FORW);
+        UWORD16 index       = (ps_dec->u2_motion_type);
+        ps_dec->u2_prev_intra_mb    = 0;
+        ps_dec->e_mb_pred         = (e_pred_direction_t)refPic;
+        ps_dec_mb_params = &ps_dec->ps_func_forw_or_back[index];
+        ps_dec->s_mb_type = ps_dec_mb_params->s_mb_type;
+        ps_dec_mb_params->pf_func_mb_params(ps_dec);
+
+    }
+    else if(u2_mb_type & MB_TYPE_INTRA)
+    {
+        ps_dec->u2_prev_intra_mb    = 1;
+        impeg2d_dec_intra_mb(ps_dec);
+
+    }
+    else
+    {
+        ps_dec->u2_prev_intra_mb    = 0;
+        ps_dec->e_mb_pred = FORW;
+        ps_dec->u2_motion_type = 0;
+        impeg2d_dec_0mv_coded_mb(ps_dec);
+    }
+
+    /*-----------------------------------------------------------------------*/
+    /* decode cbp                                                            */
+    /*-----------------------------------------------------------------------*/
+    if((u2_mb_type & MB_TYPE_INTRA))
+    {
+        ps_dec->u2_cbp  = 0x3f;
+        ps_dec->u2_prev_intra_mb    = 1;
+    }
+    else
+    {
+        ps_dec->u2_prev_intra_mb  = 0;
+        ps_dec->u2_def_dc_pred[Y_LUMA] = 128 << ps_dec->u2_intra_dc_precision;
+        ps_dec->u2_def_dc_pred[U_CHROMA] = 128 << ps_dec->u2_intra_dc_precision;
+        ps_dec->u2_def_dc_pred[V_CHROMA] = 128 << ps_dec->u2_intra_dc_precision;
+        if((ps_dec->u2_coded_mb))
+        {
+            UWORD16 cbpValue;
+            cbpValue  = gau2_impeg2d_cbp_code[impeg2d_bit_stream_nxt(ps_stream,MB_CBP_LEN)];
+            ps_dec->u2_cbp  = cbpValue & 0xFF;
+            impeg2d_bit_stream_flush(ps_stream,(cbpValue >> 8) & 0x0FF);
+        }
+        else
+        {
+            ps_dec->u2_cbp  = 0;
+        }
+    }
+}
+
+
+/*******************************************************************************
+*
+*  Function Name   : impeg2d_dec_pnb_mb_params
+*
+*  Description     : Decodes the parameters for P and B pictures
+*
+*  Arguments       :
+*  dec             : Decoder context
+*
+*  Values Returned : None
+*******************************************************************************/
+void impeg2d_dec_pnb_mb_params(dec_state_t *ps_dec)
+{
+    stream_t *ps_stream = &ps_dec->s_bit_stream;
+    UWORD16 u2_mb_addr_incr;
+    UWORD16 u2_total_len;
+    UWORD16 u2_len;
+    UWORD16 u2_mb_type;
+    UWORD32 u4_next_word;
+    const dec_mb_params_t *ps_dec_mb_params;
+    if(impeg2d_bit_stream_nxt(ps_stream,1) == 1)
+    {
+        impeg2d_bit_stream_flush(ps_stream,1);
+
+    }
+    else
+    {
+        u2_mb_addr_incr = impeg2d_get_mb_addr_incr(ps_stream);
+
+        if(ps_dec->u2_first_mb)
+        {
+            /****************************************************************/
+            /* Section 6.3.17                                               */
+            /* The first MB of a slice cannot be skipped                    */
+            /* But the mb_addr_incr can be > 1, because at the beginning of */
+            /* a slice, it indicates the offset from the last MB in the     */
+            /* previous row. Hence for the first slice in a row, the        */
+            /* mb_addr_incr needs to be 1.                                  */
+            /****************************************************************/
+            /* MB_x is set to zero whenever MB_y changes.                   */
+            ps_dec->u2_mb_x = u2_mb_addr_incr - 1;
+            /* For error resilience */
+            ps_dec->u2_mb_x = MIN(ps_dec->u2_mb_x, (ps_dec->u2_num_horiz_mb - 1));
+
+            /****************************************************************/
+            /* mb_addr_incr is forced to 1 because in this decoder it is used */
+            /* more as an indicator of the number of MBs skipped than the   */
+            /* as defined by the standard (Section 6.3.17)                  */
+            /****************************************************************/
+            u2_mb_addr_incr = 1;
+            ps_dec->u2_first_mb = 0;
+        }
+        else
+        {
+            /****************************************************************/
+            /* In MPEG-2, the last MB of the row cannot be skipped and the  */
+            /* mb_addr_incr cannot be such that it will take the current MB   */
+            /* beyond the current row                                       */
+            /* In MPEG-1, the slice could start and end anywhere and is not */
+            /* restricted to a row like in MPEG-2. Hence this check should  */
+            /* not be done for MPEG-1 streams.                              */
+            /****************************************************************/
+            if(ps_dec->u2_is_mpeg2 &&
+                ((ps_dec->u2_mb_x + u2_mb_addr_incr) > ps_dec->u2_num_horiz_mb))
+            {
+                u2_mb_addr_incr    = ps_dec->u2_num_horiz_mb - ps_dec->u2_mb_x;
+            }
+
+
+            impeg2d_dec_skip_mbs(ps_dec, (UWORD16)(u2_mb_addr_incr - 1));
+        }
+
+    }
+    u4_next_word = (UWORD16)impeg2d_bit_stream_nxt(ps_stream,16);
+    /*-----------------------------------------------------------------------*/
+    /* MB type                                                               */
+    /*-----------------------------------------------------------------------*/
+    {
+        u2_mb_type   = ps_dec->pu2_mb_type[BITS((UWORD16)u4_next_word,15,10)];
+        u2_len      = BITS(u2_mb_type,15,8);
+        u2_total_len = u2_len;
+        u4_next_word = (UWORD16)LSW((UWORD16)u4_next_word << u2_len);
+    }
+    /*-----------------------------------------------------------------------*/
+    /* motion type                                                           */
+    /*-----------------------------------------------------------------------*/
+    {
+        WORD32 i4_motion_type = ps_dec->u2_motion_type;
+
+        if((u2_mb_type & MB_FORW_OR_BACK) &&  ps_dec->u2_read_motion_type)
+        {
+            ps_dec->u2_motion_type = BITS((UWORD16)u4_next_word,15,14);
+            u2_total_len += MB_MOTION_TYPE_LEN;
+            u4_next_word = (UWORD16)LSW((UWORD16)u4_next_word << MB_MOTION_TYPE_LEN);
+            i4_motion_type     = ps_dec->u2_motion_type;
+
+        }
+
+
+        if ((u2_mb_type & MB_FORW_OR_BACK) &&
+            ((i4_motion_type == 0) ||
+            (i4_motion_type == 3) ||
+            (i4_motion_type == 4) ||
+            (i4_motion_type >= 7)))
+        {
+            //TODO: VANG Check for validity
+            i4_motion_type = 1;
+        }
+
+    }
+    /*-----------------------------------------------------------------------*/
+    /* dct type                                                              */
+    /*-----------------------------------------------------------------------*/
+    {
+        if((u2_mb_type & MB_CODED) && ps_dec->u2_read_dct_type)
+        {
+            ps_dec->u2_field_dct = BIT((UWORD16)u4_next_word,15);
+            u2_total_len += MB_DCT_TYPE_LEN;
+            u4_next_word = (UWORD16)LSW((UWORD16)u4_next_word << MB_DCT_TYPE_LEN);
+        }
+    }
+    /*-----------------------------------------------------------------------*/
+    /* Quant scale code                                                      */
+    /*-----------------------------------------------------------------------*/
+    if(u2_mb_type & MB_QUANT)
+    {
+        UWORD16 u2_quant_scale_code;
+        u2_quant_scale_code = BITS((UWORD16)u4_next_word,15,11);
+
+        ps_dec->u1_quant_scale = (ps_dec->u2_q_scale_type) ?
+            gau1_impeg2_non_linear_quant_scale[u2_quant_scale_code] : (u2_quant_scale_code << 1);
+        u2_total_len += MB_QUANT_SCALE_CODE_LEN;
+    }
+    impeg2d_bit_stream_flush(ps_stream,u2_total_len);
+    /*-----------------------------------------------------------------------*/
+    /* Set the function pointers                                             */
+    /*-----------------------------------------------------------------------*/
+    ps_dec->u2_coded_mb    = (UWORD16)(u2_mb_type & MB_CODED);
+
+    if(u2_mb_type & MB_BIDRECT)
+    {
+        UWORD16 u2_index       = (ps_dec->u2_motion_type);
+
+        ps_dec->u2_prev_intra_mb    = 0;
+        ps_dec->e_mb_pred         = BIDIRECT;
+        ps_dec_mb_params = &ps_dec->ps_func_bi_direct[u2_index];
+        ps_dec->s_mb_type = ps_dec_mb_params->s_mb_type;
+        ps_dec_mb_params->pf_func_mb_params(ps_dec);
+    }
+    else if(u2_mb_type & MB_FORW_OR_BACK)
+    {
+
+        UWORD16 u2_refPic      = !(u2_mb_type & MB_MV_FORW);
+        UWORD16 u2_index       = (ps_dec->u2_motion_type);
+        ps_dec->u2_prev_intra_mb    = 0;
+        ps_dec->e_mb_pred         = (e_pred_direction_t)u2_refPic;
+        ps_dec_mb_params = &ps_dec->ps_func_forw_or_back[u2_index];
+        ps_dec->s_mb_type = ps_dec_mb_params->s_mb_type;
+        ps_dec_mb_params->pf_func_mb_params(ps_dec);
+
+    }
+    else if(u2_mb_type & MB_TYPE_INTRA)
+    {
+        ps_dec->u2_prev_intra_mb    = 1;
+        impeg2d_dec_intra_mb(ps_dec);
+
+    }
+    else
+    {
+        ps_dec->u2_prev_intra_mb =0;
+        ps_dec->e_mb_pred = FORW;
+        ps_dec->u2_motion_type = 0;
+        impeg2d_dec_0mv_coded_mb(ps_dec);
+    }
+
+    /*-----------------------------------------------------------------------*/
+    /* decode cbp                                                            */
+    /*-----------------------------------------------------------------------*/
+    if((u2_mb_type & MB_TYPE_INTRA))
+    {
+        ps_dec->u2_cbp  = 0x3f;
+        ps_dec->u2_prev_intra_mb    = 1;
+    }
+    else
+    {
+        ps_dec->u2_prev_intra_mb  = 0;
+        ps_dec->u2_def_dc_pred[Y_LUMA] = 128 << ps_dec->u2_intra_dc_precision;
+        ps_dec->u2_def_dc_pred[U_CHROMA] = 128 << ps_dec->u2_intra_dc_precision;
+        ps_dec->u2_def_dc_pred[V_CHROMA] = 128 << ps_dec->u2_intra_dc_precision;
+        if((ps_dec->u2_coded_mb))
+        {
+            UWORD16 cbpValue;
+            cbpValue  = gau2_impeg2d_cbp_code[impeg2d_bit_stream_nxt(ps_stream,MB_CBP_LEN)];
+            ps_dec->u2_cbp  = cbpValue & 0xFF;
+            impeg2d_bit_stream_flush(ps_stream,(cbpValue >> 8) & 0x0FF);
+        }
+        else
+        {
+            ps_dec->u2_cbp  = 0;
+        }
+    }
+}
+
+/*******************************************************************************
+*  Function Name   : impeg2d_dec_p_b_slice
+*
+*  Description     : Decodes P and B slices
+*
+*  Arguments       :
+*  dec             : Decoder state
+*
+*  Values Returned : None
+*******************************************************************************/
+IMPEG2D_ERROR_CODES_T impeg2d_dec_p_b_slice(dec_state_t *ps_dec)
+{
+    WORD16 *pi2_vld_out;
+    UWORD32 i;
+    yuv_buf_t *ps_cur_frm_buf      = &ps_dec->s_cur_frm_buf;
+
+    UWORD32 u4_frm_offset          = 0;
+    const dec_mb_params_t *ps_dec_mb_params;
+    IMPEG2D_ERROR_CODES_T e_error   = (IMPEG2D_ERROR_CODES_T)IVD_ERROR_NONE;
+
+    pi2_vld_out = ps_dec->ai2_vld_buf;
+    memset(ps_dec->ai2_pred_mv,0,sizeof(ps_dec->ai2_pred_mv));
+
+    ps_dec->u2_prev_intra_mb    = 0;
+    ps_dec->u2_first_mb       = 1;
+
+    ps_dec->u2_picture_width = ps_dec->u2_frame_width;
+
+    if(ps_dec->u2_picture_structure != FRAME_PICTURE)
+    {
+        ps_dec->u2_picture_width <<= 1;
+        if(ps_dec->u2_picture_structure == BOTTOM_FIELD)
+        {
+            u4_frm_offset = ps_dec->u2_frame_width;
+        }
+    }
+
+    do
+    {
+        UWORD32 u4_x_offset, u4_y_offset;
+
+
+
+        UWORD32 u4_x_dst_offset = 0;
+        UWORD32 u4_y_dst_offset = 0;
+        UWORD8  *pu1_out_p;
+        UWORD8  *pu1_pred;
+        WORD32 u4_pred_strd;
+
+        IMPEG2D_TRACE_MB_START(ps_dec->u2_mb_x, ps_dec->u2_mb_y);
+
+
+        if(ps_dec->e_pic_type == B_PIC)
+            impeg2d_dec_pnb_mb_params(ps_dec);
+        else
+            impeg2d_dec_p_mb_params(ps_dec);
+
+        IMPEG2D_TRACE_MB_START(ps_dec->u2_mb_x, ps_dec->u2_mb_y);
+
+        u4_x_dst_offset = u4_frm_offset + (ps_dec->u2_mb_x << 4);
+        u4_y_dst_offset = (ps_dec->u2_mb_y << 4) * ps_dec->u2_picture_width;
+        pu1_out_p = ps_cur_frm_buf->pu1_y + u4_x_dst_offset + u4_y_dst_offset;
+        if(ps_dec->u2_prev_intra_mb == 0)
+        {
+            UWORD32 offset_x, offset_y, stride;
+            UWORD16 index = (ps_dec->u2_motion_type);
+            /*only for non intra mb's*/
+            if(ps_dec->e_mb_pred == BIDIRECT)
+            {
+                ps_dec_mb_params = &ps_dec->ps_func_bi_direct[index];
+            }
+            else
+            {
+                ps_dec_mb_params = &ps_dec->ps_func_forw_or_back[index];
+            }
+
+            stride = ps_dec->u2_picture_width;
+
+            offset_x = u4_frm_offset + (ps_dec->u2_mb_x << 4);
+
+            offset_y = (ps_dec->u2_mb_y << 4);
+
+            ps_dec->s_dest_buf.pu1_y = ps_cur_frm_buf->pu1_y + offset_y * stride + offset_x;
+
+            stride = stride >> 1;
+
+            ps_dec->s_dest_buf.pu1_u = ps_cur_frm_buf->pu1_u + (offset_y >> 1) * stride
+                            + (offset_x >> 1);
+
+            ps_dec->s_dest_buf.pu1_v = ps_cur_frm_buf->pu1_v + (offset_y >> 1) * stride
+                            + (offset_x >> 1);
+
+            PROFILE_DISABLE_MC_IF0
+            ps_dec_mb_params->pf_mc(ps_dec);
+
+        }
+        for(i = 0; i < NUM_LUMA_BLKS; ++i)
+        {
+            if((ps_dec->u2_cbp & (1 << (BLOCKS_IN_MB - 1 - i))) != 0)
+            {
+                e_error = ps_dec->pf_vld_inv_quant(ps_dec, pi2_vld_out, ps_dec->pu1_inv_scan_matrix,
+                              ps_dec->u2_prev_intra_mb, Y_LUMA, 0);
+                if ((IMPEG2D_ERROR_CODES_T)IVD_ERROR_NONE != e_error)
+                {
+                    return e_error;
+                }
+
+                u4_x_offset = gai2_impeg2_blk_x_off[i];
+
+                if(ps_dec->u2_field_dct == 0)
+                    u4_y_offset = gai2_impeg2_blk_y_off_frm[i] ;
+                else
+                    u4_y_offset = gai2_impeg2_blk_y_off_fld[i] ;
+
+
+
+
+
+                IMPEG2D_IDCT_INP_STATISTICS(pi2_vld_out, ps_dec->u4_non_zero_cols, ps_dec->u4_non_zero_rows);
+
+                PROFILE_DISABLE_IDCT_IF0
+                {
+                    WORD32 idx;
+                    if(1 == (ps_dec->u4_non_zero_cols | ps_dec->u4_non_zero_rows))
+                        idx = 0;
+                    else
+                        idx = 1;
+
+                    if(0 == ps_dec->u2_prev_intra_mb)
+                    {
+                        pu1_pred = pu1_out_p + u4_y_offset * ps_dec->u2_picture_width + u4_x_offset;
+                        u4_pred_strd = ps_dec->u2_picture_width << ps_dec->u2_field_dct;
+                    }
+                    else
+                    {
+                        pu1_pred = (UWORD8 *)gau1_impeg2_zerobuf;
+                        u4_pred_strd = 8;
+                    }
+
+                    ps_dec->pf_idct_recon[idx * 2 + ps_dec->i4_last_value_one](pi2_vld_out,
+                                                            ps_dec->ai2_idct_stg1,
+                                                            pu1_pred,
+                                                            pu1_out_p + u4_y_offset * ps_dec->u2_picture_width + u4_x_offset,
+                                                            8,
+                                                            u4_pred_strd,
+                                                            ps_dec->u2_picture_width << ps_dec->u2_field_dct,
+                                                            ~ps_dec->u4_non_zero_cols, ~ps_dec->u4_non_zero_rows);
+                }
+            }
+
+        }
+
+        /* For U and V blocks, divide the x and y offsets by 2. */
+        u4_x_dst_offset >>= 1;
+        u4_y_dst_offset >>= 2;
+
+
+        /* In case of chrominance blocks the DCT will be frame DCT */
+        /* i = 0, U component and i = 1 is V componet */
+        if((ps_dec->u2_cbp & 0x02) != 0)
+        {
+            pu1_out_p = ps_cur_frm_buf->pu1_u + u4_x_dst_offset + u4_y_dst_offset;
+            e_error = ps_dec->pf_vld_inv_quant(ps_dec, pi2_vld_out, ps_dec->pu1_inv_scan_matrix,
+                          ps_dec->u2_prev_intra_mb, U_CHROMA, 0);
+            if ((IMPEG2D_ERROR_CODES_T)IVD_ERROR_NONE != e_error)
+            {
+                return e_error;
+            }
+
+
+            IMPEG2D_IDCT_INP_STATISTICS(pi2_vld_out, ps_dec->u4_non_zero_cols, ps_dec->u4_non_zero_rows);
+
+            PROFILE_DISABLE_IDCT_IF0
+            {
+                WORD32 idx;
+                if(1 == (ps_dec->u4_non_zero_cols | ps_dec->u4_non_zero_rows))
+                    idx = 0;
+                else
+                    idx = 1;
+
+                if(0 == ps_dec->u2_prev_intra_mb)
+                {
+                    pu1_pred = pu1_out_p;
+                    u4_pred_strd = ps_dec->u2_picture_width >> 1;
+                }
+                else
+                {
+                    pu1_pred = (UWORD8 *)gau1_impeg2_zerobuf;
+                    u4_pred_strd = 8;
+                }
+
+                ps_dec->pf_idct_recon[idx * 2 + ps_dec->i4_last_value_one](pi2_vld_out,
+                                                        ps_dec->ai2_idct_stg1,
+                                                        pu1_pred,
+                                                        pu1_out_p,
+                                                        8,
+                                                        u4_pred_strd,
+                                                        ps_dec->u2_picture_width >> 1,
+                                                        ~ps_dec->u4_non_zero_cols, ~ps_dec->u4_non_zero_rows);
+
+            }
+
+        }
+
+
+        if((ps_dec->u2_cbp & 0x01) != 0)
+        {
+            pu1_out_p = ps_cur_frm_buf->pu1_v + u4_x_dst_offset + u4_y_dst_offset;
+            e_error = ps_dec->pf_vld_inv_quant(ps_dec, pi2_vld_out, ps_dec->pu1_inv_scan_matrix,
+                          ps_dec->u2_prev_intra_mb, V_CHROMA, 0);
+            if ((IMPEG2D_ERROR_CODES_T)IVD_ERROR_NONE != e_error)
+            {
+                return e_error;
+            }
+
+
+            IMPEG2D_IDCT_INP_STATISTICS(pi2_vld_out, ps_dec->u4_non_zero_cols, ps_dec->u4_non_zero_rows);
+
+            PROFILE_DISABLE_IDCT_IF0
+            {
+                WORD32 idx;
+                if(1 == (ps_dec->u4_non_zero_cols | ps_dec->u4_non_zero_rows))
+                    idx = 0;
+                else
+                    idx = 1;
+                if(0 == ps_dec->u2_prev_intra_mb)
+                {
+                    pu1_pred = pu1_out_p;
+                    u4_pred_strd = ps_dec->u2_picture_width >> 1;
+                }
+                else
+                {
+                    pu1_pred = (UWORD8 *)gau1_impeg2_zerobuf;
+                    u4_pred_strd = 8;
+                }
+
+                ps_dec->pf_idct_recon[idx * 2 + ps_dec->i4_last_value_one](pi2_vld_out,
+                                                        ps_dec->ai2_idct_stg1,
+                                                        pu1_pred,
+                                                        pu1_out_p,
+                                                        8,
+                                                        u4_pred_strd,
+                                                        ps_dec->u2_picture_width >> 1,
+                                                        ~ps_dec->u4_non_zero_cols, ~ps_dec->u4_non_zero_rows);
+
+            }
+        }
+
+
+        ps_dec->u2_num_mbs_left--;
+        ps_dec->u2_first_mb = 0;
+        ps_dec->u2_mb_x++;
+
+        if(ps_dec->s_bit_stream.u4_offset > ps_dec->s_bit_stream.u4_max_offset)
+        {
+            return IMPEG2D_BITSTREAM_BUFF_EXCEEDED_ERR;
+        }
+        else if ((ps_dec->u2_mb_x == ps_dec->u2_num_horiz_mb) && ((ps_dec->s_bit_stream.u4_offset + START_CODE_PREFIX_LEN) < ps_dec->s_bit_stream.u4_max_offset))
+        {
+            ps_dec->u2_mb_x = 0;
+            ps_dec->u2_mb_y++;
+
+        }
+    }
+    while(ps_dec->u2_num_mbs_left != 0 && impeg2d_bit_stream_nxt(&ps_dec->s_bit_stream,23) != 0x0);
+    return e_error;
+}

diff --git a/decoder/impeg2d_structs.h b/decoder/impeg2d_structs.h
new file mode 100755
index 0000000..63a0b03
--- /dev/null
+++ b/decoder/impeg2d_structs.h

@@ -0,0 +1,377 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+#ifndef __IMPEG2D_STRUCTS_H__
+#define __IMPEG2D_STRUCTS_H__
+
+typedef enum
+{
+    CMD_PROCESS,
+    CMD_FMTCONV,
+}e_jobq_cmd_t;
+
+/**
+ * Structure to represent a processing job entry
+ */
+typedef struct
+{
+    /**
+     * Command
+     * Currently: PROCESS, FMTCONV are the only two jobs
+     */
+    WORD32 i4_cmd;
+
+    /**
+     * MB y of the starting MB
+     */
+    WORD16 i2_start_mb_y;
+
+    /**
+     * MB y of the last MB
+     */
+
+    WORD16 i2_end_mb_y;
+
+    /**
+     * Bitstream offset for the current job
+     */
+    WORD32 i4_bistream_ofst;
+
+}job_t;
+
+typedef struct
+{
+    /* Params of the reference buffer used as input to MC */
+    UWORD32 u4_src_wd;
+    UWORD32 u4_src_offset;
+
+    /* Params of the buffer where MC output will be written */
+    UWORD32 u4_dst_wd_res_buf;
+    UWORD32 u4_dst_wd_cur_frm;
+    UWORD32 u4_dst_offset_res_buf;
+    UWORD32 u4_dst_offset_cur_frm;
+
+    /* Operation Parameters */
+    UWORD32 u4_rows;
+    UWORD32 u4_cols;
+    UWORD32 u4_mode;
+}comp_mc_params_t;
+
+typedef struct
+{
+    yuv_buf_t        s_ref;
+    comp_mc_params_t s_luma;
+    comp_mc_params_t s_chroma;
+}mb_mc_params_t;
+
+struct _dec_mb_params_t;
+
+typedef UWORD8 pf_inv_quant_t (WORD16 *blk,
+                                UWORD8 *weighting_matrix,
+                                UWORD8 quant_scale,
+                                WORD32 intra_flag,
+                                WORD32 i4_num_coeffs,
+                                WORD16 *pi2_coeffs,
+                                UWORD8 *pu1_pos,
+                                const UWORD8   *scan,
+                                UWORD16 *u2_def_dc_pred,
+                                UWORD16 u2_intra_dc_precision);
+
+typedef IMPEG2D_ERROR_CODES_T  pf_vld_inv_quant_t  (void  *dec,
+                             WORD16       *out_addr,
+                             const UWORD8 *scan,
+                             UWORD16      intra_flag,
+                             UWORD16      colr_comp,
+                             UWORD16      d_picture);
+
+typedef void  pf_mc_t(void *, UWORD8 *, UWORD32 , UWORD8 *, UWORD32 ,
+                 UWORD32 , UWORD32  );
+
+typedef struct dec_state_struct_t
+{
+    WORD16          ai2_vld_buf[NUM_PELS_IN_BLOCK];
+    WORD16          ai2_idct_stg1[NUM_PELS_IN_BLOCK];
+
+
+    UWORD8          au1_intra_quant_matrix[NUM_PELS_IN_BLOCK];
+    UWORD8          au1_inter_quant_matrix[NUM_PELS_IN_BLOCK];
+
+    IMPEG2D_ERROR_CODES_T (*pf_decode_slice)(struct dec_state_struct_t *);
+
+    pf_vld_inv_quant_t *pf_vld_inv_quant;
+
+    pf_idct_recon_t *pf_idct_recon[4];
+
+    pf_mc_t         *pf_mc[4];
+    pf_interpred_t  *pf_fullx_halfy_8x8;
+    pf_interpred_t  *pf_halfx_fully_8x8;
+    pf_interpred_t  *pf_halfx_halfy_8x8;
+    pf_interpred_t  *pf_fullx_fully_8x8;
+
+
+    pf_interpolate_t *pf_interpolate;
+    pf_copy_mb_t     *pf_copy_mb;
+
+    pf_memset0_one_16bit_buf_t *pf_memset_16bit_8x8_linear_block;
+    pf_memset_8bit_t    *pf_memset_8bit_8x8_block;
+    pf_copy_yuv420p_buf_t *pf_copy_yuv420p_buf;
+    pf_fmt_conv_yuv420p_to_yuv422ile_t *pf_fmt_conv_yuv420p_to_yuv422ile;
+    pf_fmt_conv_yuv420p_to_yuv420sp_t  *pf_fmt_conv_yuv420p_to_yuv420sp_uv;
+    pf_fmt_conv_yuv420p_to_yuv420sp_t  *pf_fmt_conv_yuv420p_to_yuv420sp_vu;
+
+    stream_t         s_bit_stream;
+/* @ */
+
+    UWORD16         u2_is_mpeg2; /* 0 if stream is MPEG1 1 otherwise */
+    UWORD16         u2_frame_width;  /* Width of the frame */
+    UWORD16         u2_frame_height; /* Height of the frame */
+    UWORD16         u2_picture_width;
+    UWORD16         u2_horizontal_size;
+    UWORD16         u2_vertical_size;
+    UWORD16         u2_create_max_width;
+    UWORD16         u2_create_max_height;
+    UWORD16         u2_reinit_max_width;
+    UWORD16         u2_reinit_max_height;
+    UWORD16         u2_header_done;
+    UWORD16         u2_decode_header;
+
+    UWORD16         u2_mb_x;
+    UWORD16         u2_mb_y;
+    UWORD16         u2_num_horiz_mb;
+    UWORD16         u2_num_vert_mb;
+    UWORD16         u2_num_flds_decoded;
+    void            *pv_pic_buf_mg;
+
+    UWORD32         u4_frm_buf_stride; /* for display Buffer */
+
+    UWORD16         u2_field_dct;
+    UWORD16         u2_read_dct_type;
+
+    UWORD16         u2_read_motion_type;
+    UWORD16         u2_motion_type;
+
+    const UWORD16   *pu2_mb_type;
+    UWORD16         u2_fld_pic;
+    UWORD16         u2_frm_pic;
+
+    yuv_buf_t       s_cur_frm_buf;
+
+    UWORD16         u2_fld_parity;
+    UWORD16         u2_def_dc_pred[MAX_COLR_COMPS];
+
+    /* Variables related to Motion Vector predictors */
+
+    WORD16          ai2_pred_mv[2][2][2];
+    e_pred_direction_t   e_mb_pred;
+    UWORD16         au2_fcode_data[2];
+
+    /* Variables related to reference pictures */
+    yuv_buf_t       as_recent_fld[2][2];
+
+    UWORD8          u1_quant_scale;
+    UWORD16         u2_num_mbs_left;
+    UWORD16         u2_first_mb;
+    UWORD16         u2_num_skipped_mbs;
+
+    UWORD8          *pu1_inv_scan_matrix;
+
+    UWORD16         u2_progressive_sequence;
+    e_pic_type_t         e_pic_type;
+
+    UWORD16         u2_full_pel_forw_vector;
+    UWORD16         u2_forw_f_code;
+    UWORD16         u2_full_pel_back_vector;
+    UWORD16         u2_back_f_code;
+
+    WORD16          ai2_mv[2][2][2]; /* Motion vectors */
+
+    /* Bitstream code present in Picture coding extension */
+    UWORD16         au2_f_code[2][2];
+    UWORD16         u2_intra_dc_precision;
+    UWORD16         u2_picture_structure;
+    UWORD16         u2_top_field_first;
+    UWORD16         u2_frame_pred_frame_dct;
+    UWORD16         u2_concealment_motion_vectors;
+    UWORD16         u2_q_scale_type;
+    UWORD16         u2_intra_vlc_format;
+    UWORD16         u2_alternate_scan;
+    UWORD16         u2_repeat_first_field;
+    UWORD16         u2_progressive_frame;
+
+
+    /* Bitstream code related to frame rate of the bitstream */
+    UWORD16         u2_frame_rate_code;
+    UWORD16         u2_frame_rate_extension_n;
+    UWORD16         u2_frame_rate_extension_d;
+    UWORD16         u2_framePeriod;   /* Frame period in milli seconds */
+
+    /* Members related to display dimensions of bitstream */
+    /* The size values may not be returned right now. But they are read */
+    /* and can be returned if there is a requirement.                   */
+    UWORD16         u2_display_horizontal_size;
+    UWORD16         u2_display_vertical_size;
+    UWORD16         u2_aspect_ratio_info;
+
+    /* Members related to motion compensation */
+    yuv_buf_t       s_mc_fw_buf;
+    yuv_buf_t       s_mc_bk_buf;
+    yuv_buf_t       s_mc_buf;
+    mb_mc_params_t  as_mb_mc_params[2][2];
+    yuv_buf_t       as_ref_buf[2][2];
+    e_mb_type_t       s_mb_type;
+
+    yuv_buf_t       s_dest_buf;
+
+    /* Variable to handle intra MB */
+    UWORD16         u2_prev_intra_mb;
+    UWORD16         u2_coded_mb;
+
+    /* Bidirect function pointers */
+    const struct _dec_mb_params_t *ps_func_bi_direct;
+
+    /* Forw or Back function pointers */
+    const struct _dec_mb_params_t *ps_func_forw_or_back;
+
+
+    /* CBP of the current MB        */
+    UWORD16         u2_cbp;
+    void            *pv_video_scratch;
+
+
+    /* For global error handling */
+    void            *pv_stack_cntxt;
+
+/* @ */
+    WORD32          i4_chromaFormat;
+    UWORD32         u4_xdmBufID;
+    UWORD32         u4_num_mem_records;
+    /* For holding memRecords */
+    void            *pv_memTab;
+
+    UWORD8          u1_flushfrm;
+    UWORD8          u1_flushcnt;
+    iv_yuv_buf_t    as_frame_buf[MAX_FRAME_BUFFER];
+    iv_yuv_buf_t    ps_yuv_buf;
+
+    ivd_get_display_frame_op_t  s_disp_op;
+
+
+    UWORD32         u4_non_zero_cols;
+    UWORD32         u4_non_zero_rows;
+
+    UWORD32         u4_num_frames_decoded;
+
+    /* Adding error code variable to signal benign errors. */
+    UWORD32         u4_error_code;
+
+    WORD32          i4_num_cores;
+
+    UWORD8          u1_first_frame_done;
+
+    void            *pv_codec_thread_handle;
+    void            *ps_dec_state_multi_core;
+    UWORD32         u4_inp_ts;
+    pic_buf_t       *ps_cur_pic;
+    pic_buf_t       *ps_disp_pic;
+    pic_buf_t       *aps_ref_pics[2];
+
+    WORD32          i4_disp_buf_id;
+    WORD32          i4_cur_buf_id;
+    iv_yuv_buf_t    *ps_disp_frm_buf;
+
+    UWORD32         u4_share_disp_buf;
+    void            *pv_pic_buf_base;
+
+    disp_mgr_t      s_disp_mgr;
+    UWORD8          *pu1_chroma_ref_buf[BUF_MGR_MAX_CNT];
+    ivd_out_bufdesc_t as_disp_buffers[BUF_MGR_MAX_CNT];
+
+    /* Flag to signal last coeff in a 8x8 block is one
+    after mismatch contol */
+    WORD32          i4_last_value_one;
+
+    WORD32          i4_start_mb_y;
+    WORD32          i4_end_mb_y;
+
+    /**
+     * Job queue buffer base
+     */
+    void            *pv_jobq_buf;
+
+    /**
+     * Job Queue mem tab size
+     */
+    WORD32          i4_jobq_buf_size;
+
+    /**
+     * Job Queue context
+     */
+    void            *pv_jobq;
+
+    /* Pointer to input bitstream */
+    UWORD8          *pu1_inp_bits_buf;
+
+    /* Number of bytes in the input bitstream */
+    UWORD32         u4_num_inp_bytes;
+
+    /* Bytes consumed */
+    WORD32          i4_bytes_consumed;
+
+    IVD_ARCH_T      e_processor_arch;
+
+    IVD_SOC_T       e_processor_soc;
+
+    WORD32          i4_frame_decoded;
+
+}dec_state_t;
+
+
+
+
+typedef void (*func_decmb_params)(dec_state_t *);
+typedef void  (*mc_funcs)(dec_state_t *);
+typedef struct _dec_mb_params_t
+{
+    func_decmb_params    pf_func_mb_params;
+    e_mb_type_t            s_mb_type;
+    mc_funcs             pf_mc;
+}dec_mb_params_t;
+
+
+
+#define MAX_THREADS     4
+
+
+#define MAX_MB_ROWS     (MAX_HEIGHT / 16) // number of rows for 1080p
+
+typedef struct _dec_state_multi_core
+{
+    // contains the decoder state of decoder for each thread
+    dec_state_t *ps_dec_state[MAX_THREADS];
+    UWORD32     au4_thread_launched[MAX_THREADS];
+    // number of rows: first thread will populate the row offsets and update
+    // row_offset_cnt. Other threads should pick up offset from this thread
+    // and start decoding
+    UWORD32     au4_row_offset[MAX_MB_ROWS];
+    volatile    UWORD32 u4_row_offset_cnt;
+}dec_state_multi_core_t;
+
+
+
+#endif /* #ifndef __IMPEG2D_STRUCTS_H__ */

diff --git a/decoder/impeg2d_vld.c b/decoder/impeg2d_vld.c
new file mode 100644
index 0000000..972f42a
--- /dev/null
+++ b/decoder/impeg2d_vld.c

@@ -0,0 +1,1183 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+#include <string.h>
+
+#include "iv_datatypedef.h"
+#include "iv.h"
+
+#include "impeg2_buf_mgr.h"
+#include "impeg2_disp_mgr.h"
+#include "impeg2_defs.h"
+#include "impeg2_platform_macros.h"
+#include "impeg2_inter_pred.h"
+#include "impeg2_idct.h"
+#include "impeg2_globals.h"
+#include "impeg2_mem_func.h"
+#include "impeg2_format_conv.h"
+#include "impeg2_macros.h"
+
+#include "ivd.h"
+#include "impeg2d.h"
+#include "impeg2d_bitstream.h"
+#include "impeg2d_structs.h"
+#include "impeg2d_vld_tables.h"
+#include "impeg2d_vld.h"
+#include "impeg2d_pic_proc.h"
+#include "impeg2d_debug.h"
+
+
+/*******************************************************************************
+* Function name : impeg2d_dec_vld_symbol
+*
+* Description   : Performs decoding of VLD symbol. It performs decoding by
+*                 processing 1 bit at a time
+*
+* Arguments     :
+* stream        : Bitstream
+* ai2_code_table     : Table used for decoding
+* maxLen        : Maximum Length of the decoded symbol in bits
+*
+* Value Returned: Decoded symbol
+*******************************************************************************/
+WORD16 impeg2d_dec_vld_symbol(stream_t *ps_stream,const WORD16 ai2_code_table[][2],  UWORD16 u2_max_len)
+{
+  UWORD16 u2_data;
+  WORD16  u2_end = 0;
+  UWORD16 u2_org_max_len = u2_max_len;
+  UWORD16 u2_i_bit;
+
+  /* Get the maximum number of bits needed to decode a symbol */
+  u2_data = impeg2d_bit_stream_nxt(ps_stream,u2_max_len);
+  do
+  {
+    u2_max_len--;
+    /* Read one bit at a time from the variable to decode the huffman code */
+    u2_i_bit = (UWORD8)((u2_data >> u2_max_len) & 0x1);
+
+    /* Get the next node pointer or the symbol from the tree */
+    u2_end = ai2_code_table[u2_end][u2_i_bit];
+  }while(u2_end > 0);
+
+  /* Flush the appropriate number of bits from the ps_stream */
+  impeg2d_bit_stream_flush(ps_stream,(UWORD8)(u2_org_max_len - u2_max_len));
+  return(u2_end);
+}
+/*******************************************************************************
+* Function name : impeg2d_fast_dec_vld_symbol
+*
+* Description   : Performs decoding of VLD symbol. It performs decoding by
+*                 processing n bits at a time
+*
+* Arguments     :
+* stream        : Bitstream
+* ai2_code_table     : Code table containing huffman value
+* indexTable    : Index table containing index
+* maxLen        : Maximum Length of the decoded symbol in bits
+*
+* Value Returned: Decoded symbol
+*******************************************************************************/
+WORD16 impeg2d_fast_dec_vld_symbol(stream_t *ps_stream,
+                     const WORD16  ai2_code_table[][2],
+                     const UWORD16 au2_indexTable[][2],
+                     UWORD16 u2_max_len)
+{
+    UWORD16 u2_cur_code;
+    UWORD16 u2_num_bits;
+    UWORD16 u2_vld_offset;
+    UWORD16 u2_start_len;
+    WORD16  u2_value;
+    UWORD16 u2_len;
+    UWORD16 u2_huffCode;
+
+    u2_start_len  = au2_indexTable[0][0];
+    u2_vld_offset = 0;
+    u2_huffCode  = impeg2d_bit_stream_nxt(ps_stream,u2_max_len);
+    do
+    {
+        u2_cur_code = u2_huffCode >> (u2_max_len - u2_start_len);
+        u2_num_bits = ai2_code_table[u2_cur_code + u2_vld_offset][0];
+        if(u2_num_bits == 0)
+        {
+            u2_huffCode  &= ((1 << (u2_max_len - u2_start_len)) - 1);
+            u2_max_len    -= u2_start_len;
+            u2_start_len   = au2_indexTable[ai2_code_table[u2_cur_code + u2_vld_offset][1]][0];
+            u2_vld_offset  = au2_indexTable[ai2_code_table[u2_cur_code + u2_vld_offset][1]][1];
+        }
+        else
+        {
+            u2_value = ai2_code_table[u2_cur_code + u2_vld_offset][1];
+            u2_len   = u2_num_bits;
+        }
+    }while(u2_num_bits == 0);
+    impeg2d_bit_stream_flush(ps_stream,u2_len);
+    return(u2_value);
+}
+/******************************************************************************
+*
+*  Function Name   : impeg2d_dec_ac_coeff_zero
+*
+*  Description     : Decodes using Table B.14
+*
+*  Arguments       : Pointer to VideoObjectLayerStructure
+*
+*  Values Returned : Decoded value
+*
+*  Revision History:
+*
+*         28 02 2002  AR        Creation
+*******************************************************************************/
+UWORD16 impeg2d_dec_ac_coeff_zero(stream_t *ps_stream, UWORD16* pu2_sym_len, UWORD16* pu2_sym_val)
+{
+    UWORD16 u2_offset,u2_decoded_value;
+    UWORD8  u1_shift;
+    UWORD32 u4_bits_read;
+
+    u4_bits_read = (UWORD16)impeg2d_bit_stream_nxt(ps_stream,MPEG2_AC_COEFF_MAX_LEN);
+
+    if ((UWORD16)u4_bits_read >= 0x0800)
+    {
+        u2_offset = (UWORD16)u4_bits_read >> 11;
+    }
+    else if ((UWORD16)u4_bits_read >= 0x40)
+    {
+        u2_offset = 31 + ((UWORD16)u4_bits_read >> 6);
+    }
+    else if ((UWORD16)u4_bits_read >= 0x20)
+    {
+        u2_offset = 64;
+    }
+    else
+    {
+        u2_offset      = 63;
+        u4_bits_read    = (UWORD16)u4_bits_read - 0x10;
+    }
+    /*-----------------------------------------------------------------------
+     * The table gOffset contains both the offset for the group to which the
+     * Vld code belongs in the Ac Coeff Table and the no of bits with which
+     * the BitsRead should be shifted
+     *-----------------------------------------------------------------------*/
+    u2_offset = gau2_impeg2d_offset_zero[u2_offset];
+    u1_shift  = u2_offset & 0xF;
+
+    /*-----------------------------------------------------------------------
+     * Depending upon the vld code, we index exactly to that particular
+     * Vld codes value in the Ac Coeff Table.
+     * (Offset >> 4)       gives the offset for the group in the AcCoeffTable.
+     * (BitsRead >> shift) gives the offset within its group
+     *-----------------------------------------------------------------------*/
+     u2_offset = (u2_offset >> 4) + ((UWORD16)u4_bits_read >> u1_shift);
+    /*-----------------------------------------------------------------------
+     * DecodedValue has the Run, Level and the number of bits used by Vld code
+     *-----------------------------------------------------------------------*/
+    u2_decoded_value = gau2_impeg2d_dct_coeff_zero[u2_offset];
+    if(u2_decoded_value == END_OF_BLOCK)
+    {
+        *pu2_sym_len = 2;
+        *pu2_sym_val = EOB_CODE_VALUE;
+    }
+    else if(u2_decoded_value == ESCAPE_CODE)
+    {
+        *pu2_sym_len     = u2_decoded_value & 0x1F;
+        *pu2_sym_val = ESC_CODE_VALUE;
+    }
+    else
+    {
+        *pu2_sym_len = u2_decoded_value & 0x1F;
+        *pu2_sym_val = u2_decoded_value >> 5;
+    }
+    return(u2_decoded_value);
+}
+
+/******************************************************************************
+*
+*  Function Name   : impeg2d_dec_ac_coeff_one
+*
+*  Description     : Decodes using Table B.15
+*
+*  Arguments       : Pointer to VideoObjectLayerStructure
+*
+*  Values Returned : Decoded value
+*
+*  Revision History:
+*
+*         28 02 2002  AR        Creation
+*******************************************************************************/
+UWORD16 impeg2d_dec_ac_coeff_one(stream_t *ps_stream, UWORD16* pu2_sym_len, UWORD16* pu2_sym_val)
+{
+    UWORD16 u2_offset, u2_decoded_value;
+    UWORD8  u1_shift;
+    UWORD32 u4_bits_read;
+
+
+    u4_bits_read = (UWORD16)impeg2d_bit_stream_nxt(ps_stream,MPEG2_AC_COEFF_MAX_LEN);
+
+    if ((UWORD16)u4_bits_read >= 0x8000)
+    {
+        /* If the MSB of the vld code is 1 */
+        if (((UWORD16)u4_bits_read >> 12) == 0xF)
+            u2_offset = ((UWORD16)u4_bits_read >> 8) & 0xF;
+        else
+            u2_offset = (UWORD16)u4_bits_read >> 11;
+        u2_offset += gau2_impeg2d_offset_one[0];
+    }
+    else if ((UWORD16)u4_bits_read >= 0x400)
+    {
+        u2_offset =(UWORD16) u4_bits_read >> 10;
+        u2_offset = gau2_impeg2d_offset_one[u2_offset];
+        u1_shift = u2_offset & 0xF;
+        u2_offset = (u2_offset >> 4) + ((UWORD16)u4_bits_read >> u1_shift);
+    }
+    else if ((UWORD16)u4_bits_read >= 0x20)
+    {
+        u2_offset = ((UWORD16)u4_bits_read >> 5) + 31;
+        u2_offset = gau2_impeg2d_offset_one[u2_offset];
+        u1_shift = u2_offset & 0xF;
+        u2_offset = (u2_offset >> 4) + ((UWORD16)u4_bits_read >> u1_shift);
+    }
+    else
+    {
+        u2_offset = gau2_impeg2d_offset_one[63] + ((UWORD16)u4_bits_read & 0xF);
+    }
+    /*-----------------------------------------------------------------------
+    * DecodedValue has the Run, Level and the number of bits used by Vld code
+    *-----------------------------------------------------------------------*/
+    u2_decoded_value = gau2_impeg2d_dct_coeff_one[u2_offset];
+
+    if(u2_decoded_value == END_OF_BLOCK)
+    {
+        *pu2_sym_len = 4;
+        *pu2_sym_val = EOB_CODE_VALUE;
+    }
+    else if(u2_decoded_value == ESCAPE_CODE)
+    {
+        *pu2_sym_len     = u2_decoded_value & 0x1F;
+        *pu2_sym_val = ESC_CODE_VALUE;
+    }
+    else
+    {
+        *pu2_sym_len = u2_decoded_value & 0x1F;
+        *pu2_sym_val = u2_decoded_value >> 5;
+    }
+
+    return(u2_decoded_value);
+}
+
+/******************************************************************************
+ *
+ *  Function Name   : impeg2d_vld_inv_quant_mpeg1
+ *
+ *  Description     : Performs VLD operation for MPEG1/2
+ *
+ *  Arguments       :
+ *  state           : VLCD state parameter
+ *  regs            : Registers of VLCD
+ *
+ *  Values Returned : None
+ ******************************************************************************/
+IMPEG2D_ERROR_CODES_T impeg2d_vld_inv_quant_mpeg1(
+                             void  *pv_dec,           /* Decoder State */
+                             WORD16       *pi2_out_addr,       /*!< Address where decoded symbols will be stored */
+                             const UWORD8 *pu1_scan,          /*!< Scan table to be used */
+                             UWORD16      u2_intra_flag,      /*!< Intra Macroblock or not */
+                             UWORD16      u2_colr_comp,      /*!< 0 - Luma,1 - U comp, 2 - V comp */
+                             UWORD16      u2_d_picture        /*!< D Picture or not */
+                             )
+{
+    UWORD8  *pu1_weighting_matrix;
+    dec_state_t *ps_dec    = (dec_state_t *) pv_dec;
+    IMPEG2D_ERROR_CODES_T e_error   = (IMPEG2D_ERROR_CODES_T)IVD_ERROR_NONE;
+
+    WORD16  pi2_coeffs[NUM_COEFFS];
+    UWORD8  pu1_pos[NUM_COEFFS];
+    WORD32  i4_num_coeffs;
+
+    /* Perform VLD on the stream to get the coefficients and their positions */
+    e_error = impeg2d_vld_decode(ps_dec, pi2_coeffs, pu1_scan, pu1_pos, u2_intra_flag,
+                                 u2_colr_comp, u2_d_picture, ps_dec->u2_intra_vlc_format,
+                                 ps_dec->u2_is_mpeg2, &i4_num_coeffs);
+    if ((IMPEG2D_ERROR_CODES_T)IVD_ERROR_NONE != e_error)
+    {
+        return e_error;
+    }
+
+    /* For YUV420 format,Select the weighting matrix according to Table 7.5 */
+    pu1_weighting_matrix = (u2_intra_flag == 1) ? ps_dec->au1_intra_quant_matrix:
+                    ps_dec->au1_inter_quant_matrix;
+
+    IMPEG2D_IQNT_INP_STATISTICS(pi2_out_addr, ps_dec->u4_non_zero_cols, ps_dec->u4_non_zero_rows);
+    /* Inverse Quantize the Output of VLD */
+    PROFILE_DISABLE_INVQUANT_IF0
+
+    {
+        /* Clear output matrix */
+        PROFILE_DISABLE_MEMSET_RESBUF_IF0
+        if (1 != (ps_dec->u4_non_zero_cols | ps_dec->u4_non_zero_rows))
+        {
+            ps_dec->pf_memset_16bit_8x8_linear_block (pi2_out_addr);
+        }
+
+        impeg2d_inv_quant_mpeg1(pi2_out_addr, pu1_weighting_matrix,
+                                  ps_dec->u1_quant_scale, u2_intra_flag,
+                                  i4_num_coeffs, pi2_coeffs, pu1_pos,
+                                  pu1_scan, &ps_dec->u2_def_dc_pred[u2_colr_comp],
+                                  ps_dec->u2_intra_dc_precision);
+
+        if (0 != pi2_out_addr[0])
+        {
+            /* The first coeff might've become non-zero due to intra_dc_decision
+             * value. So, check here after inverse quantization.
+             */
+            ps_dec->u4_non_zero_cols  |= 0x1;
+            ps_dec->u4_non_zero_rows  |= 0x1;
+        }
+    }
+
+    return e_error;
+}
+
+/******************************************************************************
+  *
+  *  Function Name   : impeg2d_vld_inv_quant_mpeg2
+  *
+  *  Description     : Performs VLD operation for MPEG1/2
+  *
+  *  Arguments       :
+  *  state           : VLCD state parameter
+  *  regs            : Registers of VLCD
+  *
+  *  Values Returned : None
+  ******************************************************************************/
+IMPEG2D_ERROR_CODES_T impeg2d_vld_inv_quant_mpeg2(
+                             void  *pv_dec,           /* Decoder State */
+                             WORD16       *pi2_out_addr,       /*!< Address where decoded symbols will be stored */
+                             const UWORD8 *pu1_scan,          /*!< Scan table to be used */
+                             UWORD16      u2_intra_flag,      /*!< Intra Macroblock or not */
+                             UWORD16      u2_colr_comp,      /*!< 0 - Luma,1 - U comp, 2 - V comp */
+                             UWORD16      u2_d_picture        /*!< D Picture or not */
+                             )
+{
+    UWORD8  *pu1_weighting_matrix;
+    WORD32 u4_sum_is_even;
+    dec_state_t *ps_dec = (dec_state_t *)pv_dec;
+    IMPEG2D_ERROR_CODES_T e_error = (IMPEG2D_ERROR_CODES_T)IVD_ERROR_NONE;
+
+    WORD16  pi2_coeffs[NUM_COEFFS];
+    UWORD8  pi4_pos[NUM_COEFFS];
+    WORD32  i4_num_coeffs;
+
+    /* Perform VLD on the stream to get the coefficients and their positions */
+    e_error = impeg2d_vld_decode(ps_dec, pi2_coeffs, pu1_scan, pi4_pos, u2_intra_flag,
+                                 u2_colr_comp, u2_d_picture, ps_dec->u2_intra_vlc_format,
+                                 ps_dec->u2_is_mpeg2, &i4_num_coeffs);
+    if ((IMPEG2D_ERROR_CODES_T)IVD_ERROR_NONE != e_error)
+    {
+        return e_error;
+    }
+
+    /* For YUV420 format,Select the weighting matrix according to Table 7.5 */
+    pu1_weighting_matrix = (u2_intra_flag == 1) ? ps_dec->au1_intra_quant_matrix:
+                    ps_dec->au1_inter_quant_matrix;
+
+    /*mismatch control for mpeg2*/
+    /* Check if the block has only one non-zero coeff which is DC  */
+    ps_dec->i4_last_value_one = 0;
+
+    IMPEG2D_IQNT_INP_STATISTICS(pi2_out_addr, ps_dec->u4_non_zero_cols, ps_dec->u4_non_zero_rows);
+
+    /* Inverse Quantize the Output of VLD */
+    PROFILE_DISABLE_INVQUANT_IF0
+
+    {
+        /* Clear output matrix */
+        PROFILE_DISABLE_MEMSET_RESBUF_IF0
+        if (1 != (ps_dec->u4_non_zero_cols | ps_dec->u4_non_zero_rows))
+        {
+            ps_dec->pf_memset_16bit_8x8_linear_block (pi2_out_addr);
+        }
+
+        u4_sum_is_even  = impeg2d_inv_quant_mpeg2(pi2_out_addr, pu1_weighting_matrix,
+                                                 ps_dec->u1_quant_scale, u2_intra_flag,
+                                                 i4_num_coeffs, pi2_coeffs,
+                                                 pi4_pos, pu1_scan,
+                                                 &ps_dec->u2_def_dc_pred[u2_colr_comp],
+                                                 ps_dec->u2_intra_dc_precision);
+
+        if (0 != pi2_out_addr[0])
+        {
+            /* The first coeff might've become non-zero due to intra_dc_decision
+             * value. So, check here after inverse quantization.
+             */
+            ps_dec->u4_non_zero_cols  |= 0x1;
+            ps_dec->u4_non_zero_rows  |= 0x1;
+        }
+
+        if (1 == (ps_dec->u4_non_zero_cols | ps_dec->u4_non_zero_rows))
+        {
+            ps_dec->i4_last_value_one = 1 - (pi2_out_addr[0] & 1);
+        }
+        else
+        {
+            /*toggle last bit if sum is even ,else retain it as it is*/
+            pi2_out_addr[63]        ^= (u4_sum_is_even & 1);
+
+            if (0 != pi2_out_addr[63])
+            {
+                ps_dec->u4_non_zero_cols  |= 0x80;
+                ps_dec->u4_non_zero_rows  |= 0x80;
+            }
+        }
+    }
+
+    return e_error;
+}
+
+
+/******************************************************************************
+*
+*  Function Name   : impeg2d_vld_decode
+*
+*  Description     : Performs VLD operation for MPEG1/2
+*
+*  Arguments       :
+*  state           : VLCD state parameter
+*  regs            : Registers of VLCD
+*
+*  Values Returned : None
+******************************************************************************/
+IMPEG2D_ERROR_CODES_T impeg2d_vld_decode(
+    dec_state_t *ps_dec,
+    WORD16      *pi2_outAddr,       /*!< Address where decoded symbols will be stored */
+    const UWORD8 *pu1_scan,         /*!< Scan table to be used */
+    UWORD8      *pu1_pos,       /*!< Scan table to be used */
+    UWORD16     u2_intra_flag,      /*!< Intra Macroblock or not */
+    UWORD16     u2_chroma_flag,     /*!< Chroma Block or not */
+    UWORD16     u2_d_picture,       /*!< D Picture or not */
+    UWORD16     u2_intra_vlc_format, /*!< Intra VLC format */
+    UWORD16     u2_mpeg2,          /*!< MPEG-2 or not */
+    WORD32      *pi4_num_coeffs /*!< Returns the number of coeffs in block */
+    )
+{
+
+    UWORD32 u4_sym_len;
+
+    UWORD32 u4_decoded_value;
+    UWORD32 u4_level_first_byte;
+    WORD32  u4_level;
+    UWORD32 u4_run, u4_numCoeffs;
+    UWORD32 u4_buf;
+    UWORD32 u4_buf_nxt;
+    UWORD32 u4_offset;
+    UWORD32 *pu4_buf_aligned;
+    UWORD32 u4_bits;
+    stream_t *ps_stream = &ps_dec->s_bit_stream;
+    WORD32  u4_pos;
+    UWORD32 u4_nz_cols;
+    UWORD32 u4_nz_rows;
+
+    *pi4_num_coeffs = 0;
+
+    ps_dec->u4_non_zero_cols = 0;
+    ps_dec->u4_non_zero_rows = 0;
+    u4_nz_cols = ps_dec->u4_non_zero_cols;
+    u4_nz_rows = ps_dec->u4_non_zero_rows;
+
+    GET_TEMP_STREAM_DATA(u4_buf,u4_buf_nxt,u4_offset,pu4_buf_aligned,ps_stream)
+    /**************************************************************************/
+    /* Decode the DC coefficient in case of Intra block                       */
+    /**************************************************************************/
+    if(u2_intra_flag)
+    {
+        WORD32 dc_size;
+        WORD32 dc_diff;
+        WORD32 maxLen;
+        WORD32 idx;
+
+
+        maxLen = MPEG2_DCT_DC_SIZE_LEN;
+        idx = 0;
+        if(u2_chroma_flag != 0)
+        {
+            maxLen += 1;
+            idx++;
+        }
+
+
+        {
+            WORD16  end = 0;
+            UWORD32 maxLen_tmp = maxLen;
+            UWORD16 m_iBit;
+
+
+            /* Get the maximum number of bits needed to decode a symbol */
+            IBITS_NXT(u4_buf,u4_buf_nxt,u4_offset,u4_bits,maxLen)
+            do
+            {
+                maxLen_tmp--;
+                /* Read one bit at a time from the variable to decode the huffman code */
+                m_iBit = (UWORD8)((u4_bits >> maxLen_tmp) & 0x1);
+
+                /* Get the next node pointer or the symbol from the tree */
+                end = gai2_impeg2d_dct_dc_size[idx][end][m_iBit];
+            }while(end > 0);
+            dc_size = end + MPEG2_DCT_DC_SIZE_OFFSET;
+
+            /* Flush the appropriate number of bits from the stream */
+            FLUSH_BITS(u4_offset,u4_buf,u4_buf_nxt,(maxLen - maxLen_tmp),pu4_buf_aligned)
+
+        }
+
+
+
+        if (dc_size != 0)
+        {
+            UWORD32 u4_bits;
+
+            IBITS_GET(u4_buf,u4_buf_nxt,u4_offset,u4_bits,pu4_buf_aligned, dc_size)
+            dc_diff = u4_bits;
+
+            if ((dc_diff & (1 << (dc_size - 1))) == 0) //v Probably the prediction algo?
+                dc_diff -= (1 << dc_size) - 1;
+        }
+        else
+        {
+            dc_diff = 0;
+        }
+
+
+        pi2_outAddr[*pi4_num_coeffs]    = dc_diff;
+        /* This indicates the position of the coefficient. Since this is the DC
+         * coefficient, we put the position as 0.
+         */
+        pu1_pos[*pi4_num_coeffs]    = pu1_scan[0];
+        (*pi4_num_coeffs)++;
+
+        if (0 != dc_diff)
+        {
+            u4_nz_cols |= 0x01;
+            u4_nz_rows |= 0x01;
+        }
+
+        u4_numCoeffs = 1;
+    }
+    /**************************************************************************/
+    /* Decoding of first AC coefficient in case of non Intra block            */
+    /**************************************************************************/
+    else
+    {
+        /* First symbol can be 1s */
+        UWORD32 u4_bits;
+
+        IBITS_NXT(u4_buf,u4_buf_nxt,u4_offset,u4_bits,1)
+
+        if(u4_bits == 1)
+        {
+
+            FLUSH_BITS(u4_offset,u4_buf,u4_buf_nxt,1, pu4_buf_aligned)
+            IBITS_GET(u4_buf,u4_buf_nxt,u4_offset,u4_bits,pu4_buf_aligned, 1)
+            if(u4_bits == 1)
+            {
+                pi2_outAddr[*pi4_num_coeffs] = -1;
+            }
+            else
+            {
+                pi2_outAddr[*pi4_num_coeffs] = 1;
+            }
+
+            /* This indicates the position of the coefficient. Since this is the DC
+             * coefficient, we put the position as 0.
+             */
+            pu1_pos[*pi4_num_coeffs]    = pu1_scan[0];
+            (*pi4_num_coeffs)++;
+            u4_numCoeffs = 1;
+
+            u4_nz_cols |= 0x01;
+            u4_nz_rows |= 0x01;
+        }
+        else
+        {
+            u4_numCoeffs = 0;
+        }
+    }
+    if (1 == u2_d_picture)
+    {
+        PUT_TEMP_STREAM_DATA(u4_buf, u4_buf_nxt, u4_offset, pu4_buf_aligned, ps_stream)
+        ps_dec->u4_non_zero_cols  = u4_nz_cols;
+        ps_dec->u4_non_zero_rows  = u4_nz_rows;
+        return ((IMPEG2D_ERROR_CODES_T)IVD_ERROR_NONE);
+    }
+
+
+
+        if (1 == u2_intra_vlc_format && u2_intra_flag)
+        {
+
+            while(1)
+            {
+                //Putting the impeg2d_dec_ac_coeff_one function inline.
+
+                UWORD32 lead_zeros;
+                WORD16 DecodedValue;
+
+                u4_sym_len = 17;
+                IBITS_NXT(u4_buf,u4_buf_nxt,u4_offset,u4_bits,u4_sym_len)
+
+                DecodedValue = gau2_impeg2d_tab_one_1_9[u4_bits >> 8];
+                u4_sym_len = (DecodedValue & 0xf);
+                u4_level = DecodedValue >> 9;
+                /* One table lookup */
+                if(0 != u4_level)
+                {
+                    u4_run = ((DecodedValue >> 4) & 0x1f);
+                    u4_numCoeffs       += u4_run;
+                    u4_pos             = pu1_scan[u4_numCoeffs++ & 63];
+                    pu1_pos[*pi4_num_coeffs]    = u4_pos;
+
+                    FLUSH_BITS(u4_offset,u4_buf,u4_buf_nxt,u4_sym_len,pu4_buf_aligned)
+                    pi2_outAddr[*pi4_num_coeffs]    = u4_level;
+
+                    (*pi4_num_coeffs)++;
+                }
+                else
+                {
+                    if (DecodedValue == END_OF_BLOCK_ONE)
+                    {
+                        u4_sym_len = 4;
+
+                        break;
+                    }
+                    else
+                    {
+                        /*Second table lookup*/
+                        lead_zeros = CLZ(u4_bits) - 20;/* -16 since we are dealing with WORD32 */
+                        if (0 != lead_zeros)
+                        {
+
+                            u4_bits         = (u4_bits >> (6 - lead_zeros)) & 0x001F;
+
+                            /* Flush the number of bits */
+                            if (1 == lead_zeros)
+                            {
+                                u4_sym_len         = ((u4_bits & 0x18) >> 3) == 2 ? 11:10;
+                            }
+                            else
+                            {
+                                u4_sym_len         = 11 + lead_zeros;
+                            }
+                            /* flushing */
+                            FLUSH_BITS(u4_offset,u4_buf,u4_buf_nxt,u4_sym_len,pu4_buf_aligned)
+
+                            /* Calculate the address */
+                            u4_bits         = ((lead_zeros - 1) << 5) + u4_bits;
+
+                            DecodedValue    = gau2_impeg2d_tab_one_10_16[u4_bits];
+
+                            u4_run = BITS(DecodedValue, 8,4);
+                            u4_level = ((WORD16) DecodedValue) >> 9;
+
+                            u4_numCoeffs       += u4_run;
+                            u4_pos             = pu1_scan[u4_numCoeffs++ & 63];
+                            pu1_pos[*pi4_num_coeffs]    = u4_pos;
+                            pi2_outAddr[*pi4_num_coeffs]    = u4_level;
+                            (*pi4_num_coeffs)++;
+                        }
+                        /*********************************************************************/
+                        /* MPEG2 Escape Code                                                 */
+                        /*********************************************************************/
+                        else if(u2_mpeg2 == 1)
+                        {
+                            u4_sym_len         = 6;
+                            FLUSH_BITS(u4_offset,u4_buf,u4_buf_nxt,u4_sym_len,pu4_buf_aligned)
+                                IBITS_GET(u4_buf,u4_buf_nxt,u4_offset,u4_bits,pu4_buf_aligned,18)
+                                u4_decoded_value    = u4_bits;
+                            u4_run             = (u4_decoded_value >> 12);
+                            u4_level           = (u4_decoded_value & 0x0FFF);
+
+                            if (u4_level)
+                                u4_level = (u4_level - ((u4_level & 0x0800) << 1));
+
+                            u4_numCoeffs       += u4_run;
+                            u4_pos             = pu1_scan[u4_numCoeffs++ & 63];
+                            pu1_pos[*pi4_num_coeffs]    = u4_pos;
+                            pi2_outAddr[*pi4_num_coeffs]    = u4_level;
+                            (*pi4_num_coeffs)++;
+                        }
+                        /*********************************************************************/
+                        /* MPEG1 Escape Code                                                 */
+                        /*********************************************************************/
+                        else
+                        {
+                            /*-----------------------------------------------------------
+                            * MPEG-1 Stream
+                            *
+                            * <See D.9.3 of MPEG-2> Run-level escape syntax
+                            * Run-level values that cannot be coded with a VLC are coded
+                            * by the escape code '0000 01' followed by
+                            * either a 14-bit FLC (127 <= level <= 127),
+                            * or a 22-bit FLC (255 <= level <= 255).
+                            * This is described in Annex B,B.5f of MPEG-1.standard
+                            *-----------------------------------------------------------*/
+
+                            /*-----------------------------------------------------------
+                            * First 6 bits are the value of the Run. Next is First 8 bits
+                            * of Level. These bits decide whether it is 14 bit FLC or
+                            * 22-bit FLC.
+                            *
+                            * If( first 8 bits of Level == '1000000' or '00000000')
+                            *      then its is 22-bit FLC.
+                            * else
+                            *      it is 14-bit FLC.
+                            *-----------------------------------------------------------*/
+                            u4_sym_len         = 6;
+                            FLUSH_BITS(u4_offset,u4_buf,u4_buf_nxt,u4_sym_len,pu4_buf_aligned)
+                                IBITS_GET(u4_buf,u4_buf_nxt,u4_offset,u4_bits,pu4_buf_aligned,14)
+                                u4_decoded_value     = u4_bits;
+                            u4_run              = (u4_decoded_value >> 8);
+                            u4_level_first_byte = (u4_decoded_value & 0x0FF);
+                            if(u4_level_first_byte & 0x7F)
+                            {
+                                /*-------------------------------------------------------
+                                * First 8 bits of level are neither 1000000 nor 00000000
+                                * Hence 14-bit FLC (Last 8 bits are used to get level)
+                                *
+                                *  Level = (msb of Level_First_Byte is 1)?
+                                *          Level_First_Byte - 256 : Level_First_Byte
+                                *-------------------------------------------------------*/
+                                u4_level = (u4_level_first_byte -
+                                    ((u4_level_first_byte & 0x80) << 1));
+                            }
+                            else
+                            {
+                                /*-------------------------------------------------------
+                                * Next 8 bits are either 1000000 or 00000000
+                                * Hence 22-bit FLC (Last 16 bits are used to get level)
+                                *
+                                *  Level = (msb of Level_First_Byte is 1)?
+                                *          Level_Second_Byte - 256 : Level_Second_Byte
+                                *-------------------------------------------------------*/
+                                IBITS_GET(u4_buf,u4_buf_nxt,u4_offset,u4_bits,pu4_buf_aligned,8)
+                                    u4_level = u4_bits;
+                                u4_level = (u4_level - (u4_level_first_byte << 1));
+                            }
+                            u4_numCoeffs += u4_run;
+
+                            u4_pos = pu1_scan[u4_numCoeffs++ & 63];
+
+                            pu1_pos[*pi4_num_coeffs]    = u4_pos;
+                            pi2_outAddr[*pi4_num_coeffs]    = u4_level;
+                            (*pi4_num_coeffs)++;
+                        }
+                    }
+                }
+
+                u4_nz_cols |= 1 << (u4_pos & 0x7);
+                u4_nz_rows |= 1 << (u4_pos >> 0x3);
+
+
+            }
+            IBITS_GET(u4_buf,u4_buf_nxt,u4_offset,u4_bits,pu4_buf_aligned,u4_sym_len)
+            if (u4_numCoeffs > 64)
+            {
+                return IMPEG2D_MB_TEX_DECODE_ERR;
+            }
+        }
+        else
+        {
+            // Inline
+            while(1)
+            {
+
+                UWORD32 lead_zeros;
+                UWORD16 DecodedValue;
+
+                u4_sym_len = 17;
+                IBITS_NXT(u4_buf, u4_buf_nxt, u4_offset, u4_bits, u4_sym_len)
+
+
+                DecodedValue = gau2_impeg2d_tab_zero_1_9[u4_bits >> 8];
+                u4_sym_len = BITS(DecodedValue, 3, 0);
+                u4_level = ((WORD16) DecodedValue) >> 9;
+
+                if (0 != u4_level)
+                {
+                    u4_run = BITS(DecodedValue, 8,4);
+
+                    u4_numCoeffs       += u4_run;
+
+                    u4_pos                 = pu1_scan[u4_numCoeffs++ & 63];
+                    pu1_pos[*pi4_num_coeffs]    = u4_pos;
+
+                    FLUSH_BITS(u4_offset,u4_buf,u4_buf_nxt,u4_sym_len,pu4_buf_aligned)
+                    pi2_outAddr[*pi4_num_coeffs]    = u4_level;
+                    (*pi4_num_coeffs)++;
+                }
+                else
+                {
+                    if(DecodedValue == END_OF_BLOCK_ZERO)
+                    {
+                        u4_sym_len = 2;
+
+                        break;
+                    }
+                    else
+                    {
+                        lead_zeros = CLZ(u4_bits) - 20;/* -15 since we are dealing with WORD32 */
+                        /*Second table lookup*/
+                        if (0 != lead_zeros)
+                        {
+                            u4_bits         = (u4_bits >> (6 - lead_zeros)) & 0x001F;
+
+                            /* Flush the number of bits */
+                            u4_sym_len         = 11 + lead_zeros;
+
+                            /* Calculate the address */
+                            u4_bits         = ((lead_zeros - 1) << 5) + u4_bits;
+
+                            DecodedValue    = gau2_impeg2d_tab_zero_10_16[u4_bits];
+
+                            u4_run = BITS(DecodedValue, 8,4);
+                            u4_level = ((WORD16) DecodedValue) >> 9;
+
+                            u4_numCoeffs       += u4_run;
+
+                            u4_pos                 = pu1_scan[u4_numCoeffs++ & 63];
+                            pu1_pos[*pi4_num_coeffs]    = u4_pos;
+                            if (1 == lead_zeros)
+                                u4_sym_len--;
+                            /* flushing */
+                            FLUSH_BITS(u4_offset,u4_buf,u4_buf_nxt,u4_sym_len,pu4_buf_aligned)
+                            pi2_outAddr[*pi4_num_coeffs]    = u4_level;
+
+                            (*pi4_num_coeffs)++;
+                        }
+                        /*Escape Sequence*/
+                        else if(u2_mpeg2 == 1)
+                        {
+                            u4_sym_len         = 6;
+                            FLUSH_BITS(u4_offset,u4_buf,u4_buf_nxt,u4_sym_len,pu4_buf_aligned)
+                            IBITS_GET(u4_buf,u4_buf_nxt,u4_offset,u4_bits,pu4_buf_aligned,18)
+                            u4_decoded_value    = u4_bits;
+                            u4_run             = (u4_decoded_value >> 12);
+                            u4_level           = (u4_decoded_value & 0x0FFF);
+
+                            if (u4_level)
+                                u4_level = (u4_level - ((u4_level & 0x0800) << 1));
+
+                            u4_numCoeffs           += u4_run;
+
+                            u4_pos                 = pu1_scan[u4_numCoeffs++ & 63];
+                            pu1_pos[*pi4_num_coeffs]    = u4_pos;
+                            pi2_outAddr[*pi4_num_coeffs]    = u4_level;
+
+                            (*pi4_num_coeffs)++;
+                        }
+                        /*********************************************************************/
+                        /* MPEG1 Escape Code                                                 */
+                        /*********************************************************************/
+                        else
+                        {
+                            /*-----------------------------------------------------------
+                            * MPEG-1 Stream
+                            *
+                            * <See D.9.3 of MPEG-2> Run-level escape syntax
+                            * Run-level values that cannot be coded with a VLC are coded
+                            * by the escape code '0000 01' followed by
+                            * either a 14-bit FLC (127 <= level <= 127),
+                            * or a 22-bit FLC (255 <= level <= 255).
+                            * This is described in Annex B,B.5f of MPEG-1.standard
+                            *-----------------------------------------------------------*/
+
+                            /*-----------------------------------------------------------
+                            * First 6 bits are the value of the Run. Next is First 8 bits
+                            * of Level. These bits decide whether it is 14 bit FLC or
+                            * 22-bit FLC.
+                            *
+                            * If( first 8 bits of Level == '1000000' or '00000000')
+                            *      then its is 22-bit FLC.
+                            * else
+                            *      it is 14-bit FLC.
+                            *-----------------------------------------------------------*/
+                            u4_sym_len             = 6;
+                            FLUSH_BITS(u4_offset,u4_buf,u4_buf_nxt,u4_sym_len,pu4_buf_aligned)
+                            IBITS_GET(u4_buf,u4_buf_nxt,u4_offset,u4_bits,pu4_buf_aligned,14)
+                            u4_decoded_value        = u4_bits;
+                            u4_run                 = (u4_decoded_value >> 8);
+                            u4_level_first_byte    = (u4_decoded_value & 0x0FF);
+                            if(u4_level_first_byte & 0x7F)
+                            {
+                                /*-------------------------------------------------------
+                                * First 8 bits of level are neither 1000000 nor 00000000
+                                * Hence 14-bit FLC (Last 8 bits are used to get level)
+                                *
+                                *  Level = (msb of Level_First_Byte is 1)?
+                                *          Level_First_Byte - 256 : Level_First_Byte
+                                *-------------------------------------------------------*/
+                                u4_level = (u4_level_first_byte -
+                                    ((u4_level_first_byte & 0x80) << 1));
+                            }
+                            else
+                            {
+                                /*-------------------------------------------------------
+                                * Next 8 bits are either 1000000 or 00000000
+                                * Hence 22-bit FLC (Last 16 bits are used to get level)
+                                *
+                                *  Level = (msb of Level_First_Byte is 1)?
+                                *          Level_Second_Byte - 256 : Level_Second_Byte
+                                *-------------------------------------------------------*/
+                                IBITS_GET(u4_buf,u4_buf_nxt,u4_offset,u4_bits,pu4_buf_aligned,8)
+                                u4_level = u4_bits;
+                                u4_level = (u4_level - (u4_level_first_byte << 1));
+                            }
+                            u4_numCoeffs           += u4_run;
+
+                            u4_pos                 = pu1_scan[u4_numCoeffs++ & 63];
+                            pu1_pos[*pi4_num_coeffs]    = u4_pos;
+                            pi2_outAddr[*pi4_num_coeffs]    = u4_level;
+
+                            (*pi4_num_coeffs)++;
+                        }
+                    }
+                }
+
+                u4_nz_cols |= 1 << (u4_pos & 0x7);
+                u4_nz_rows |= 1 << (u4_pos >> 0x3);
+            }
+            if (u4_numCoeffs > 64)
+            {
+                return IMPEG2D_MB_TEX_DECODE_ERR;
+            }
+
+            IBITS_GET(u4_buf,u4_buf_nxt,u4_offset,u4_bits,pu4_buf_aligned,u4_sym_len)
+
+        }
+
+        PUT_TEMP_STREAM_DATA(u4_buf, u4_buf_nxt, u4_offset, pu4_buf_aligned, ps_stream)
+
+        ps_dec->u4_non_zero_cols  = u4_nz_cols;
+        ps_dec->u4_non_zero_rows  = u4_nz_rows;
+
+            return (IMPEG2D_ERROR_CODES_T)IVD_ERROR_NONE;
+}
+
+
+
+/*****************************************************************************/
+/*                                                                           */
+/*  Function Name : impeg2d_inv_quant_mpeg1                                   */
+/*                                                                           */
+/*  Description   : Inverse quantizes the output of VLD                      */
+/*                                                                           */
+/*  Inputs        :                                                          */
+/*  blk,              - Block to be inverse quantized                        */
+/*  weighting_matrix  - Matrix to be used in inverse quant                   */
+/*  intra_dc_precision- Precision reqd to scale intra DC value               */
+/*  quant_scale       - Quanization scale for inverse quant                  */
+/*  intra_flag        - Intra or Not                                         */
+/*                                                                           */
+/*  Globals       : None                                                     */
+/*                                                                           */
+/*  Processing    : Implements the inverse quantize equation                 */
+/*                                                                           */
+/*  Outputs       : Inverse quantized values in the block                    */
+/*                                                                           */
+/*  Returns       : None                                                     */
+/*                                                                           */
+/*  Issues        : None                                                     */
+/*                                                                           */
+/*  Revision History:                                                        */
+/*                                                                           */
+/*         DD MM YYYY   Author(s)       Changes                              */
+/*         05 09 2005   Harish M        First Version                        */
+/*                                                                           */
+/*****************************************************************************/
+UWORD8 impeg2d_inv_quant_mpeg1(WORD16 *pi2_blk,
+                              UWORD8 *pu1_weighting_matrix,
+                              UWORD8 u1_quant_scale,
+                              WORD32 u4_intra_flag,
+                              WORD32 i4_num_coeffs,
+                              WORD16 *pi2_coeffs,
+                              UWORD8 *pu1_pos,
+                              const UWORD8 *pu1_scan,
+                              UWORD16 *pu2_def_dc_pred,
+                              UWORD16 u2_intra_dc_precision)
+{
+    UWORD16 i4_pos;
+
+    WORD32  i4_iter;
+
+    /* Inverse Quantize the predicted DC value for intra MB*/
+    if(u4_intra_flag == 1)
+    {
+        /**************************************************************************/
+        /* Decode the DC coefficient in case of Intra block and also update       */
+        /* DC predictor value of the corresponding color component                */
+        /**************************************************************************/
+        {
+            pi2_coeffs[0]   += *pu2_def_dc_pred;
+            *pu2_def_dc_pred      = pi2_coeffs[0];
+            pi2_coeffs[0]   <<= (3 - u2_intra_dc_precision);
+            pi2_coeffs[0]   = CLIP_S12(pi2_coeffs[0]);
+        }
+
+        pi2_blk[pu1_scan[0]]  = pi2_coeffs[0];
+    }
+    /************************************************************************/
+    /* Inverse quantization of other DCT coefficients                       */
+    /************************************************************************/
+    for(i4_iter = u4_intra_flag; i4_iter < i4_num_coeffs; i4_iter++)
+    {
+
+        WORD16 sign;
+        WORD32 temp, temp1;
+
+        /* Position is the inverse scan of the index stored */
+        i4_pos      = pu1_pos[i4_iter];
+        pi2_blk[i4_pos] = pi2_coeffs[i4_iter];
+
+        sign = SIGN(pi2_blk[i4_pos]);
+        temp = ABS(pi2_blk[i4_pos] << 1);
+
+        /* pi2_coeffs has only non-zero elements. So no need to check
+         * if the coeff is non-zero.
+         */
+        temp = temp + (1 * !u4_intra_flag);
+
+        temp = temp * pu1_weighting_matrix[i4_pos] * u1_quant_scale;
+
+        temp = temp >> 5;
+
+        temp1 = temp | 1;
+
+        temp1 = (temp1 > temp) ? (temp1 - temp) : (temp - temp1);
+
+        temp = temp - temp1;
+
+        if(temp < 0)
+        {
+            temp = 0;
+        }
+
+        temp = temp * sign;
+
+        temp = CLIP_S12(temp);
+
+        pi2_blk[i4_pos] = temp;
+    }
+
+    /*return value is used in the case of mpeg2 for mismatch control*/
+    return  (0);
+} /* End of inv_quant() */
+
+
+
+/*****************************************************************************/
+/*                                                                           */
+/*  Function Name : impeg2d_inv_quant_mpeg2                                   */
+/*                                                                           */
+/*  Description   : Inverse quantizes the output of VLD                      */
+/*                                                                           */
+/*  Inputs        :                                                          */
+/*  blk,              - Block to be inverse quantized                        */
+/*  weighting_matrix  - Matrix to be used in inverse quant                   */
+/*  intra_dc_precision- Precision reqd to scale intra DC value               */
+/*  quant_scale       - Quanization scale for inverse quant                  */
+/*  intra_flag        - Intra or Not                                         */
+/*                                                                           */
+/*  Globals       : None                                                     */
+/*                                                                           */
+/*  Processing    : Implements the inverse quantize equation                 */
+/*                                                                           */
+/*  Outputs       : Inverse quantized values in the block                    */
+/*                                                                           */
+/*  Returns       : None                                                     */
+/*                                                                           */
+/*  Issues        : None                                                     */
+/*                                                                           */
+/*  Revision History:                                                        */
+/*                                                                           */
+/*         DD MM YYYY   Author(s)       Changes                              */
+/*         05 09 2005   Harish M        First Version                        */
+/*                                                                           */
+/*****************************************************************************/
+UWORD8 impeg2d_inv_quant_mpeg2(WORD16 *pi2_blk,
+                              UWORD8 *pu1_weighting_matrix,
+                              UWORD8 u1_quant_scale,
+                              WORD32 u4_intra_flag,
+                              WORD32 i4_num_coeffs,
+                              WORD16 *pi2_coeffs,
+                              UWORD8 *pu1_pos,
+                              const UWORD8 *pu1_scan,
+                              UWORD16 *pu2_def_dc_pred,
+                              UWORD16 u2_intra_dc_precision)
+{
+
+    WORD32  i4_pos;
+    /* Used for Mismatch control */
+    UWORD32 sum;
+
+    WORD32  i4_iter;
+
+    sum = 0;
+
+    /* Inverse Quantize the predicted DC value for intra MB*/
+    if(u4_intra_flag == 1)
+    {
+        /**************************************************************************/
+        /* Decode the DC coefficient in case of Intra block and also update       */
+        /* DC predictor value of the corresponding color component                */
+        /**************************************************************************/
+        {
+            pi2_coeffs[0]   += *pu2_def_dc_pred;
+            *pu2_def_dc_pred      = pi2_coeffs[0];
+            pi2_coeffs[0]   <<= (3 - u2_intra_dc_precision);
+            pi2_coeffs[0]   = CLIP_S12(pi2_coeffs[0]);
+        }
+
+        pi2_blk[pu1_scan[0]]  = pi2_coeffs[0];
+        sum = pi2_blk[0];
+    }
+
+    /************************************************************************/
+    /* Inverse quantization of other DCT coefficients                       */
+    /************************************************************************/
+    for(i4_iter = u4_intra_flag; i4_iter < i4_num_coeffs; i4_iter++)
+    {
+        WORD16 sign;
+        WORD32 temp;
+        /* Position is the inverse scan of the index stored */
+        i4_pos      = pu1_pos[i4_iter];
+        pi2_blk[i4_pos] = pi2_coeffs[i4_iter];
+
+        sign = SIGN(pi2_blk[i4_pos]);
+        temp = ABS(pi2_blk[i4_pos] << 1);
+        temp = temp + (1 * !u4_intra_flag);
+        temp = temp * pu1_weighting_matrix[i4_pos] * u1_quant_scale;
+
+        temp = temp >> 5;
+
+        temp = temp * sign;
+
+        temp = CLIP_S12(temp);
+
+        pi2_blk[i4_pos] = temp;
+
+        sum += temp;
+    }
+    return (sum ^ 1);
+} /* End of inv_quant() */

diff --git a/decoder/impeg2d_vld.h b/decoder/impeg2d_vld.h
new file mode 100644
index 0000000..f52da96
--- /dev/null
+++ b/decoder/impeg2d_vld.h

@@ -0,0 +1,49 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+#ifndef __IMPEG2D_VLD_H__
+#define __IMPEG2D_VLD_H__
+
+
+WORD16 impeg2d_dec_vld_symbol(stream_t *stream,const WORD16 codeTable[][2],
+                 UWORD16 maxLen);
+WORD16 impeg2d_fast_dec_vld_symbol(stream_t *stream,
+                     const WORD16  codeTable[][2],
+                     const UWORD16 indexTable[][2],
+                     UWORD16 maxLen);
+IMPEG2D_ERROR_CODES_T impeg2d_vld_decode(dec_state_t *dec, WORD16 *outAddr, /*!< Address where decoded symbols will be stored */
+                                            const UWORD8 *scan, /*!< Scan table to be used */
+                                            UWORD8 *pu1_pos, /*!< Scan table to be used */
+                                            UWORD16 intraFlag, /*!< Intra Macroblock or not */
+                                            UWORD16 chromaFlag, /*!< Chroma Block or not */
+                                            UWORD16 dPicture, /*!< D Picture or not */
+                                            UWORD16 intraVlcFormat, /*!< Intra VLC format */
+                                            UWORD16 mpeg2, /*!< MPEG-2 or not */
+                                            WORD32 *pi4_num_coeffs /*!< Returns the number of coeffs in block */
+                                            );
+
+pf_vld_inv_quant_t impeg2d_vld_inv_quant_mpeg1;
+pf_vld_inv_quant_t impeg2d_vld_inv_quant_mpeg2;
+
+
+pf_inv_quant_t impeg2d_inv_quant_mpeg1;
+pf_inv_quant_t impeg2d_inv_quant_mpeg2;
+
+
+#endif /* #ifndef __IMPEG2D_VLD_H__ */

diff --git a/decoder/impeg2d_vld_tables.c b/decoder/impeg2d_vld_tables.c
new file mode 100644
index 0000000..dba05ec
--- /dev/null
+++ b/decoder/impeg2d_vld_tables.c

@@ -0,0 +1,465 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+#include "iv_datatypedef.h"
+#include "impeg2d_vld_tables.h"
+
+/* Table to be used for decoding the MB increment value */
+const WORD16 gai2_impeg2d_mb_addr_incr[][2] =
+{
+  {1,-33},{3,2},{-31,-32},{5,4},{-29,-30},{7,6},{-27,-28},{13,8},{10,9},
+  {-25,-26},{12,11},{-23,-24},{-21,-22},{25,14},{16,15},{-19,-20},{20,17},
+  {19,18},{-17,-18},{-15,-16},{22,21},{-13,-14},{24,23},{-11,-12},{-9,-10},
+  {34,26},{0,27},{31,28},{30,29},{-7,-8},{-5,-6},{33,32},{-3,-4},{-1,-2},
+  {0,35},{36,0},{37,0},{-34,0}
+};
+
+/* Table to be used for decoding the MB type in case of P Pictures */
+const UWORD16 gau2_impeg2d_p_mb_type[] =
+{
+    0x0100, 0x0611, 0x0512, 0x0512, 0x051a, 0x051a, 0x0501, 0x0501, 0x0308, 0x0308,
+    0x0308, 0x0308, 0x0308, 0x0308, 0x0308, 0x0308, 0x0202, 0x0202, 0x0202, 0x0202,
+    0x0202, 0x0202, 0x0202, 0x0202, 0x0202, 0x0202, 0x0202, 0x0202, 0x0202, 0x0202,
+    0x0202, 0x0202, 0x010a, 0x010a, 0x010a, 0x010a, 0x010a, 0x010a, 0x010a, 0x010a,
+    0x010a, 0x010a, 0x010a, 0x010a, 0x010a, 0x010a, 0x010a, 0x010a, 0x010a, 0x010a,
+    0x010a, 0x010a, 0x010a, 0x010a, 0x010a, 0x010a, 0x010a, 0x010a, 0x010a, 0x010a,
+    0x010a, 0x010a, 0x010a, 0x010a
+};
+/* Table to be used for decoding the MB type in case of B Pictures */
+const UWORD16 gau2_impeg2d_b_mb_type[] =
+{
+    0x0100, 0x0611, 0x0616, 0x061a, 0x053e, 0x053e, 0x0501, 0x0501, 0x0408, 0x0408,
+    0x0408, 0x0408, 0x040a, 0x040a, 0x040a, 0x040a, 0x0304, 0x0304, 0x0304, 0x0304,
+    0x0304, 0x0304, 0x0304, 0x0304, 0x0306, 0x0306, 0x0306, 0x0306, 0x0306, 0x0306,
+    0x0306, 0x0306, 0x022c, 0x022c, 0x022c, 0x022c, 0x022c, 0x022c, 0x022c, 0x022c,
+    0x022c, 0x022c, 0x022c, 0x022c, 0x022c, 0x022c, 0x022c, 0x022c, 0x022e, 0x022e,
+    0x022e, 0x022e, 0x022e, 0x022e, 0x022e, 0x022e, 0x022e, 0x022e, 0x022e, 0x022e,
+    0x022e, 0x022e, 0x022e, 0x022e
+};
+/* Table to be used for decoding dmvector[t] */
+const WORD16 gai2_impeg2d_dec_mv[] =
+{
+    1,-1
+};
+
+/* Tables used for motion code decode */
+const UWORD16 gau2_impeg2d_mv_code[] =
+{
+    0x0002, 0x0002, 0x0002, 0x0002, 0x0002, 0x0002, 0x0002, 0x0002, 0x0002, 0x0002,
+    0x0002, 0x0002, 0x0f0a, 0x0e0a, 0x0d0a, 0x0c0a, 0x0b0a, 0x0a0a, 0x0909, 0x0909,
+    0x0809, 0x0809, 0x0709, 0x0709, 0x0607, 0x0607, 0x0607, 0x0607, 0x0607, 0x0607,
+    0x0607, 0x0607, 0x0507, 0x0507, 0x0507, 0x0507, 0x0507, 0x0507, 0x0507, 0x0507,
+    0x0407, 0x0407, 0x0407, 0x0407, 0x0407, 0x0407, 0x0407, 0x0407, 0x0306, 0x0306,
+    0x0306, 0x0306, 0x0306, 0x0306, 0x0306, 0x0306, 0x0306, 0x0306, 0x0306, 0x0306,
+    0x0306, 0x0306, 0x0306, 0x0306, 0x0204, 0x0204, 0x0204, 0x0204, 0x0204, 0x0204,
+    0x0204, 0x0204, 0x0204, 0x0204, 0x0204, 0x0204, 0x0204, 0x0204, 0x0204, 0x0204,
+    0x0204, 0x0204, 0x0204, 0x0204, 0x0204, 0x0204, 0x0204, 0x0204, 0x0204, 0x0204,
+    0x0204, 0x0204, 0x0204, 0x0204, 0x0204, 0x0204, 0x0204, 0x0204, 0x0204, 0x0204,
+    0x0204, 0x0204, 0x0204, 0x0204, 0x0204, 0x0204, 0x0204, 0x0204, 0x0204, 0x0204,
+    0x0204, 0x0204, 0x0204, 0x0204, 0x0204, 0x0204, 0x0204, 0x0204, 0x0204, 0x0204,
+    0x0204, 0x0204, 0x0204, 0x0204, 0x0204, 0x0204, 0x0204, 0x0204, 0x0103, 0x0103,
+    0x0103, 0x0103, 0x0103, 0x0103, 0x0103, 0x0103, 0x0103, 0x0103, 0x0103, 0x0103,
+    0x0103, 0x0103, 0x0103, 0x0103, 0x0103, 0x0103, 0x0103, 0x0103, 0x0103, 0x0103,
+    0x0103, 0x0103, 0x0103, 0x0103, 0x0103, 0x0103, 0x0103, 0x0103, 0x0103, 0x0103,
+    0x0103, 0x0103, 0x0103, 0x0103, 0x0103, 0x0103, 0x0103, 0x0103, 0x0103, 0x0103,
+    0x0103, 0x0103, 0x0103, 0x0103, 0x0103, 0x0103, 0x0103, 0x0103, 0x0103, 0x0103,
+    0x0103, 0x0103, 0x0103, 0x0103, 0x0103, 0x0103, 0x0103, 0x0103, 0x0103, 0x0103,
+    0x0103, 0x0103, 0x0103, 0x0103, 0x0103, 0x0103, 0x0103, 0x0103, 0x0103, 0x0103,
+    0x0103, 0x0103, 0x0103, 0x0103, 0x0103, 0x0103, 0x0103, 0x0103, 0x0103, 0x0103,
+    0x0103, 0x0103, 0x0103, 0x0103, 0x0103, 0x0103, 0x0103, 0x0103, 0x0103, 0x0103,
+    0x0103, 0x0103, 0x0103, 0x0103, 0x0103, 0x0103, 0x0103, 0x0103, 0x0103, 0x0103,
+    0x0103, 0x0103, 0x0103, 0x0103, 0x0103, 0x0103, 0x0103, 0x0103, 0x0103, 0x0103,
+    0x0103, 0x0103, 0x0103, 0x0103, 0x0103, 0x0103, 0x0103, 0x0103, 0x0103, 0x0103,
+    0x0103, 0x0103, 0x0103, 0x0103, 0x0103, 0x0103, 0x0002, 0x0002, 0x0002, 0x0002,
+    0x0002, 0x0002, 0x0002, 0x0002, 0x0002, 0x0002, 0x0002, 0x0002, 0x0002, 0x0002,
+    0x0002, 0x0002, 0x0002, 0x0002, 0x0002, 0x0002, 0x0002, 0x0002, 0x0002, 0x0002,
+    0x0002, 0x0002, 0x0002, 0x0002, 0x0002, 0x0002, 0x0002, 0x0002, 0x0002, 0x0002,
+    0x0002, 0x0002, 0x0002, 0x0002, 0x0002, 0x0002, 0x0002, 0x0002, 0x0002, 0x0002,
+    0x0002, 0x0002, 0x0002, 0x0002, 0x0002, 0x0002, 0x0002, 0x0002, 0x0002, 0x0002,
+    0x0002, 0x0002, 0x0002, 0x0002, 0x0002, 0x0002, 0x0002, 0x0002, 0x0002, 0x0002,
+    0x0002, 0x0002, 0x0002, 0x0002, 0x0002, 0x0002, 0x0002, 0x0002, 0x0002, 0x0002,
+    0x0002, 0x0002, 0x0002, 0x0002, 0x0002, 0x0002, 0x0002, 0x0002, 0x0002, 0x0002,
+    0x0002, 0x0002, 0x0002, 0x0002, 0x0002, 0x0002, 0x0002, 0x0002, 0x0002, 0x0002,
+    0x0002, 0x0002, 0x0002, 0x0002, 0x0002, 0x0002, 0x0002, 0x0002, 0x0002, 0x0002,
+    0x0002, 0x0002, 0x0002, 0x0002, 0x0002, 0x0002, 0x0002, 0x0002, 0x0002, 0x0002,
+    0x0002, 0x0002, 0x0002, 0x0002, 0x0002, 0x0002, 0x0002, 0x0002, 0x0002, 0x0002,
+    0x0002, 0x0002, 0x0002, 0x0002, 0x0002, 0x0002, 0x0002, 0x0002, 0x0002, 0x0002,
+    0x0002, 0x0002, 0x0002, 0x0002, 0x0002, 0x0002, 0x0002, 0x0002, 0x0002, 0x0002,
+    0x0002, 0x0002, 0x0002, 0x0002, 0x0002, 0x0002, 0x0002, 0x0002, 0x0002, 0x0002,
+    0x0002, 0x0002, 0x0002, 0x0002, 0x0002, 0x0002, 0x0002, 0x0002, 0x0002, 0x0002,
+    0x0002, 0x0002, 0x0002, 0x0002, 0x0002, 0x0002, 0x0002, 0x0002, 0x0002, 0x0002,
+    0x0002, 0x0002, 0x0002, 0x0002, 0x0002, 0x0002, 0x0002, 0x0002, 0x0002, 0x0002,
+    0x0002, 0x0002, 0x0002, 0x0002, 0x0002, 0x0002, 0x0002, 0x0002, 0x0002, 0x0002,
+    0x0002, 0x0002, 0x0002, 0x0002, 0x0002, 0x0002, 0x0002, 0x0002, 0x0002, 0x0002,
+    0x0002, 0x0002, 0x0002, 0x0002, 0x0002, 0x0002, 0x0002, 0x0002, 0x0002, 0x0002,
+    0x0002, 0x0002, 0x0002, 0x0002, 0x0002, 0x0002, 0x0002, 0x0002, 0x0002, 0x0002,
+    0x0002, 0x0002, 0x0002, 0x0002, 0x0002, 0x0002, 0x0002, 0x0002, 0x0002, 0x0002,
+    0x0002, 0x0002, 0x0002, 0x0002, 0x0002, 0x0002, 0x0002, 0x0002, 0x0002, 0x0002,
+    0x0002, 0x0002, 0x0002, 0x0002, 0x0002, 0x0002, 0x0002, 0x0002, 0x0002, 0x0002,
+    0x0002, 0x0002
+};
+/* Tables used for cbp decode */
+const UWORD16 gau2_impeg2d_cbp_code[] =
+{
+    0x0100, 0x0100, 0x0927, 0x091b, 0x093b, 0x0937, 0x092f, 0x091f, 0x083a, 0x083a,
+    0x0836, 0x0836, 0x082e, 0x082e, 0x081e, 0x081e, 0x0839, 0x0839, 0x0835, 0x0835,
+    0x082d, 0x082d, 0x081d, 0x081d, 0x0826, 0x0826, 0x081a, 0x081a, 0x0825, 0x0825,
+    0x0819, 0x0819, 0x082b, 0x082b, 0x0817, 0x0817, 0x0833, 0x0833, 0x080f, 0x080f,
+    0x082a, 0x082a, 0x0816, 0x0816, 0x0832, 0x0832, 0x080e, 0x080e, 0x0829, 0x0829,
+    0x0815, 0x0815, 0x0831, 0x0831, 0x080d, 0x080d, 0x0823, 0x0823, 0x0813, 0x0813,
+    0x080b, 0x080b, 0x0807, 0x0807, 0x0722, 0x0722, 0x0722, 0x0722, 0x0712, 0x0712,
+    0x0712, 0x0712, 0x070a, 0x070a, 0x070a, 0x070a, 0x0706, 0x0706, 0x0706, 0x0706,
+    0x0721, 0x0721, 0x0721, 0x0721, 0x0711, 0x0711, 0x0711, 0x0711, 0x0709, 0x0709,
+    0x0709, 0x0709, 0x0705, 0x0705, 0x0705, 0x0705, 0x063f, 0x063f, 0x063f, 0x063f,
+    0x063f, 0x063f, 0x063f, 0x063f, 0x0603, 0x0603, 0x0603, 0x0603, 0x0603, 0x0603,
+    0x0603, 0x0603, 0x0624, 0x0624, 0x0624, 0x0624, 0x0624, 0x0624, 0x0624, 0x0624,
+    0x0618, 0x0618, 0x0618, 0x0618, 0x0618, 0x0618, 0x0618, 0x0618, 0x053e, 0x053e,
+    0x053e, 0x053e, 0x053e, 0x053e, 0x053e, 0x053e, 0x053e, 0x053e, 0x053e, 0x053e,
+    0x053e, 0x053e, 0x053e, 0x053e, 0x0502, 0x0502, 0x0502, 0x0502, 0x0502, 0x0502,
+    0x0502, 0x0502, 0x0502, 0x0502, 0x0502, 0x0502, 0x0502, 0x0502, 0x0502, 0x0502,
+    0x053d, 0x053d, 0x053d, 0x053d, 0x053d, 0x053d, 0x053d, 0x053d, 0x053d, 0x053d,
+    0x053d, 0x053d, 0x053d, 0x053d, 0x053d, 0x053d, 0x0501, 0x0501, 0x0501, 0x0501,
+    0x0501, 0x0501, 0x0501, 0x0501, 0x0501, 0x0501, 0x0501, 0x0501, 0x0501, 0x0501,
+    0x0501, 0x0501, 0x0538, 0x0538, 0x0538, 0x0538, 0x0538, 0x0538, 0x0538, 0x0538,
+    0x0538, 0x0538, 0x0538, 0x0538, 0x0538, 0x0538, 0x0538, 0x0538, 0x0534, 0x0534,
+    0x0534, 0x0534, 0x0534, 0x0534, 0x0534, 0x0534, 0x0534, 0x0534, 0x0534, 0x0534,
+    0x0534, 0x0534, 0x0534, 0x0534, 0x052c, 0x052c, 0x052c, 0x052c, 0x052c, 0x052c,
+    0x052c, 0x052c, 0x052c, 0x052c, 0x052c, 0x052c, 0x052c, 0x052c, 0x052c, 0x052c,
+    0x051c, 0x051c, 0x051c, 0x051c, 0x051c, 0x051c, 0x051c, 0x051c, 0x051c, 0x051c,
+    0x051c, 0x051c, 0x051c, 0x051c, 0x051c, 0x051c, 0x0528, 0x0528, 0x0528, 0x0528,
+    0x0528, 0x0528, 0x0528, 0x0528, 0x0528, 0x0528, 0x0528, 0x0528, 0x0528, 0x0528,
+    0x0528, 0x0528, 0x0514, 0x0514, 0x0514, 0x0514, 0x0514, 0x0514, 0x0514, 0x0514,
+    0x0514, 0x0514, 0x0514, 0x0514, 0x0514, 0x0514, 0x0514, 0x0514, 0x0530, 0x0530,
+    0x0530, 0x0530, 0x0530, 0x0530, 0x0530, 0x0530, 0x0530, 0x0530, 0x0530, 0x0530,
+    0x0530, 0x0530, 0x0530, 0x0530, 0x050c, 0x050c, 0x050c, 0x050c, 0x050c, 0x050c,
+    0x050c, 0x050c, 0x050c, 0x050c, 0x050c, 0x050c, 0x050c, 0x050c, 0x050c, 0x050c,
+    0x0420, 0x0420, 0x0420, 0x0420, 0x0420, 0x0420, 0x0420, 0x0420, 0x0420, 0x0420,
+    0x0420, 0x0420, 0x0420, 0x0420, 0x0420, 0x0420, 0x0420, 0x0420, 0x0420, 0x0420,
+    0x0420, 0x0420, 0x0420, 0x0420, 0x0420, 0x0420, 0x0420, 0x0420, 0x0420, 0x0420,
+    0x0420, 0x0420, 0x0410, 0x0410, 0x0410, 0x0410, 0x0410, 0x0410, 0x0410, 0x0410,
+    0x0410, 0x0410, 0x0410, 0x0410, 0x0410, 0x0410, 0x0410, 0x0410, 0x0410, 0x0410,
+    0x0410, 0x0410, 0x0410, 0x0410, 0x0410, 0x0410, 0x0410, 0x0410, 0x0410, 0x0410,
+    0x0410, 0x0410, 0x0410, 0x0410, 0x0408, 0x0408, 0x0408, 0x0408, 0x0408, 0x0408,
+    0x0408, 0x0408, 0x0408, 0x0408, 0x0408, 0x0408, 0x0408, 0x0408, 0x0408, 0x0408,
+    0x0408, 0x0408, 0x0408, 0x0408, 0x0408, 0x0408, 0x0408, 0x0408, 0x0408, 0x0408,
+    0x0408, 0x0408, 0x0408, 0x0408, 0x0408, 0x0408, 0x0404, 0x0404, 0x0404, 0x0404,
+    0x0404, 0x0404, 0x0404, 0x0404, 0x0404, 0x0404, 0x0404, 0x0404, 0x0404, 0x0404,
+    0x0404, 0x0404, 0x0404, 0x0404, 0x0404, 0x0404, 0x0404, 0x0404, 0x0404, 0x0404,
+    0x0404, 0x0404, 0x0404, 0x0404, 0x0404, 0x0404, 0x0404, 0x0404, 0x033c, 0x033c,
+    0x033c, 0x033c, 0x033c, 0x033c, 0x033c, 0x033c, 0x033c, 0x033c, 0x033c, 0x033c,
+    0x033c, 0x033c, 0x033c, 0x033c, 0x033c, 0x033c, 0x033c, 0x033c, 0x033c, 0x033c,
+    0x033c, 0x033c, 0x033c, 0x033c, 0x033c, 0x033c, 0x033c, 0x033c, 0x033c, 0x033c,
+    0x033c, 0x033c, 0x033c, 0x033c, 0x033c, 0x033c, 0x033c, 0x033c, 0x033c, 0x033c,
+    0x033c, 0x033c, 0x033c, 0x033c, 0x033c, 0x033c, 0x033c, 0x033c, 0x033c, 0x033c,
+    0x033c, 0x033c, 0x033c, 0x033c, 0x033c, 0x033c, 0x033c, 0x033c, 0x033c, 0x033c,
+    0x033c, 0x033c
+};
+
+
+/* Table B.14 DCT Coefficients Table zero */
+const UWORD16 gau2_impeg2d_dct_coeff_zero[] =
+{
+  2640,2608,2576,2544,12400,32848,30800,28752,26704,24656,22608,63536,61488,
+    59440,57392,55344,1295,1263,1231,1199,1167,1135,1103,1071,1039,2511,2479,
+    2447,2415,2383,2351,2319,1006,974,942,910,878,846,814,782,750,718,686,654,
+    622,590,558,526,20557,18509,10349,6285,4269,2285,2253,493,461,429,397,
+    53293,51245,49197,47149,45101,364,16460,8300,332,4236,14412,43052,41004,
+    300,38956,36908,2220,6252,268,12364,34860,32810,10314,234,4202,2186,30762,
+    28714,8266,ESCAPE_CODE,4167,18471,135,16423,14374,12326,2118,10278,26664,
+    200,24616,22568,6216,2152,168,20520,101,101,101,101,101,101,101,101,8229,
+    8229,8229,8229,8229,8229,8229,8229,6181,6181,6181,6181,6181,6181,6181,6181,
+    68,4132,2083,2083,END_OF_BLOCK,34
+};
+
+/* tab Zero b.14 for 1-9 bits*/
+const UWORD16 gau2_impeg2d_tab_zero_1_9[] =
+{
+    0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+    0x01f6, 0x01f6, 0x01f6, 0x01f6, 0x01f6, 0x01f6, 0x01f6, 0x01f6,
+    0x0428, 0x0428, 0xfc28, 0xfc28, 0x0298, 0x0298, 0xfe98, 0xfe98,
+    0x0808, 0x0808, 0xf808, 0xf808, 0x0288, 0x0288, 0xfe88, 0xfe88,
+    0x0277, 0x0277, 0x0277, 0x0277, 0xfe77, 0xfe77, 0xfe77, 0xfe77,
+    0x0267, 0x0267, 0x0267, 0x0267, 0xfe67, 0xfe67, 0xfe67, 0xfe67,
+    0x0417, 0x0417, 0x0417, 0x0417, 0xfc17, 0xfc17, 0xfc17, 0xfc17,
+    0x0257, 0x0257, 0x0257, 0x0257, 0xfe57, 0xfe57, 0xfe57, 0xfe57,
+    0x02d9, 0xfed9, 0x0c09, 0xf409, 0x02c9, 0xfec9, 0x02b9, 0xfeb9,
+    0x0439, 0xfc39, 0x0619, 0xfa19, 0x0a09, 0xf609, 0x02a9, 0xfea9,
+    0x0606, 0x0606, 0x0606, 0x0606, 0x0606, 0x0606, 0x0606, 0x0606,
+    0xfa06, 0xfa06, 0xfa06, 0xfa06, 0xfa06, 0xfa06, 0xfa06, 0xfa06,
+    0x0246, 0x0246, 0x0246, 0x0246, 0x0246, 0x0246, 0x0246, 0x0246,
+    0xfe46, 0xfe46, 0xfe46, 0xfe46, 0xfe46, 0xfe46, 0xfe46, 0xfe46,
+    0x0236, 0x0236, 0x0236, 0x0236, 0x0236, 0x0236, 0x0236, 0x0236,
+    0xfe36, 0xfe36, 0xfe36, 0xfe36, 0xfe36, 0xfe36, 0xfe36, 0xfe36,
+    0x0405, 0x0405, 0x0405, 0x0405, 0x0405, 0x0405, 0x0405, 0x0405,
+    0x0405, 0x0405, 0x0405, 0x0405, 0x0405, 0x0405, 0x0405, 0x0405,
+    0xfc05, 0xfc05, 0xfc05, 0xfc05, 0xfc05, 0xfc05, 0xfc05, 0xfc05,
+    0xfc05, 0xfc05, 0xfc05, 0xfc05, 0xfc05, 0xfc05, 0xfc05, 0xfc05,
+    0x0225, 0x0225, 0x0225, 0x0225, 0x0225, 0x0225, 0x0225, 0x0225,
+    0x0225, 0x0225, 0x0225, 0x0225, 0x0225, 0x0225, 0x0225, 0x0225,
+    0xfe25, 0xfe25, 0xfe25, 0xfe25, 0xfe25, 0xfe25, 0xfe25, 0xfe25,
+    0xfe25, 0xfe25, 0xfe25, 0xfe25, 0xfe25, 0xfe25, 0xfe25, 0xfe25,
+    0x0214, 0x0214, 0x0214, 0x0214, 0x0214, 0x0214, 0x0214, 0x0214,
+    0x0214, 0x0214, 0x0214, 0x0214, 0x0214, 0x0214, 0x0214, 0x0214,
+    0x0214, 0x0214, 0x0214, 0x0214, 0x0214, 0x0214, 0x0214, 0x0214,
+    0x0214, 0x0214, 0x0214, 0x0214, 0x0214, 0x0214, 0x0214, 0x0214,
+    0xfe14, 0xfe14, 0xfe14, 0xfe14, 0xfe14, 0xfe14, 0xfe14, 0xfe14,
+    0xfe14, 0xfe14, 0xfe14, 0xfe14, 0xfe14, 0xfe14, 0xfe14, 0xfe14,
+    0xfe14, 0xfe14, 0xfe14, 0xfe14, 0xfe14, 0xfe14, 0xfe14, 0xfe14,
+    0xfe14, 0xfe14, 0xfe14, 0xfe14, 0xfe14, 0xfe14, 0xfe14, 0xfe14,
+    0x01ff, 0x01ff, 0x01ff, 0x01ff, 0x01ff, 0x01ff, 0x01ff, 0x01ff,
+    0x01ff, 0x01ff, 0x01ff, 0x01ff, 0x01ff, 0x01ff, 0x01ff, 0x01ff,
+    0x01ff, 0x01ff, 0x01ff, 0x01ff, 0x01ff, 0x01ff, 0x01ff, 0x01ff,
+    0x01ff, 0x01ff, 0x01ff, 0x01ff, 0x01ff, 0x01ff, 0x01ff, 0x01ff,
+    0x01ff, 0x01ff, 0x01ff, 0x01ff, 0x01ff, 0x01ff, 0x01ff, 0x01ff,
+    0x01ff, 0x01ff, 0x01ff, 0x01ff, 0x01ff, 0x01ff, 0x01ff, 0x01ff,
+    0x01ff, 0x01ff, 0x01ff, 0x01ff, 0x01ff, 0x01ff, 0x01ff, 0x01ff,
+    0x01ff, 0x01ff, 0x01ff, 0x01ff, 0x01ff, 0x01ff, 0x01ff, 0x01ff,
+    0x01ff, 0x01ff, 0x01ff, 0x01ff, 0x01ff, 0x01ff, 0x01ff, 0x01ff,
+    0x01ff, 0x01ff, 0x01ff, 0x01ff, 0x01ff, 0x01ff, 0x01ff, 0x01ff,
+    0x01ff, 0x01ff, 0x01ff, 0x01ff, 0x01ff, 0x01ff, 0x01ff, 0x01ff,
+    0x01ff, 0x01ff, 0x01ff, 0x01ff, 0x01ff, 0x01ff, 0x01ff, 0x01ff,
+    0x01ff, 0x01ff, 0x01ff, 0x01ff, 0x01ff, 0x01ff, 0x01ff, 0x01ff,
+    0x01ff, 0x01ff, 0x01ff, 0x01ff, 0x01ff, 0x01ff, 0x01ff, 0x01ff,
+    0x01ff, 0x01ff, 0x01ff, 0x01ff, 0x01ff, 0x01ff, 0x01ff, 0x01ff,
+    0x01ff, 0x01ff, 0x01ff, 0x01ff, 0x01ff, 0x01ff, 0x01ff, 0x01ff,
+    0x0203, 0x0203, 0x0203, 0x0203, 0x0203, 0x0203, 0x0203, 0x0203,
+    0x0203, 0x0203, 0x0203, 0x0203, 0x0203, 0x0203, 0x0203, 0x0203,
+    0x0203, 0x0203, 0x0203, 0x0203, 0x0203, 0x0203, 0x0203, 0x0203,
+    0x0203, 0x0203, 0x0203, 0x0203, 0x0203, 0x0203, 0x0203, 0x0203,
+    0x0203, 0x0203, 0x0203, 0x0203, 0x0203, 0x0203, 0x0203, 0x0203,
+    0x0203, 0x0203, 0x0203, 0x0203, 0x0203, 0x0203, 0x0203, 0x0203,
+    0x0203, 0x0203, 0x0203, 0x0203, 0x0203, 0x0203, 0x0203, 0x0203,
+    0x0203, 0x0203, 0x0203, 0x0203, 0x0203, 0x0203, 0x0203, 0x0203,
+    0xfe03, 0xfe03, 0xfe03, 0xfe03, 0xfe03, 0xfe03, 0xfe03, 0xfe03,
+    0xfe03, 0xfe03, 0xfe03, 0xfe03, 0xfe03, 0xfe03, 0xfe03, 0xfe03,
+    0xfe03, 0xfe03, 0xfe03, 0xfe03, 0xfe03, 0xfe03, 0xfe03, 0xfe03,
+    0xfe03, 0xfe03, 0xfe03, 0xfe03, 0xfe03, 0xfe03, 0xfe03, 0xfe03,
+    0xfe03, 0xfe03, 0xfe03, 0xfe03, 0xfe03, 0xfe03, 0xfe03, 0xfe03,
+    0xfe03, 0xfe03, 0xfe03, 0xfe03, 0xfe03, 0xfe03, 0xfe03, 0xfe03,
+    0xfe03, 0xfe03, 0xfe03, 0xfe03, 0xfe03, 0xfe03, 0xfe03, 0xfe03,
+    0xfe03, 0xfe03, 0xfe03, 0xfe03, 0xfe03, 0xfe03, 0xfe03, 0xfe03
+};
+
+const UWORD16 gau2_impeg2d_tab_zero_10_16[] =
+{
+    /*Six leading zeros*/
+    0x0300, 0x0300, 0xff00, 0xff00, 0x0450, 0x0450, 0xfc50, 0xfc50,
+    0x0e00, 0x0e00, 0xf200, 0xf200, 0x0620, 0x0620, 0xfa20, 0xfa20,
+    0x0810, 0x0810, 0xf810, 0xf810, 0x02f0, 0x02f0, 0xfef0, 0xfef0,
+    0x02e0, 0x02e0, 0xfee0, 0xfee0, 0x0440, 0x0440, 0xfc40, 0xfc40,
+    /*Seven leading zeros*/
+    0x1602, 0xea02, 0x0482, 0xfc82, 0x0642, 0xfa42, 0x1402, 0xec02,
+    0x0822, 0xf822, 0x0472, 0xfc72, 0x0352, 0xff52, 0x0342, 0xff42,
+    0x1202, 0xee02, 0x0332, 0xff32, 0x0322, 0xff22, 0x0a12, 0xf612,
+    0x0632, 0xfa32, 0x1002, 0xf002, 0x0462, 0xfc62, 0x0312, 0xff12,
+    /*Eight leading zeros*/
+    0x04a3, 0xfca3, 0x0493, 0xfc93, 0x0653, 0xfa53, 0x0833, 0xf833,
+    0x0a23, 0xf623, 0x0e13, 0xf213, 0x0c13, 0xf413, 0x1e03, 0xe203,
+    0x1c03, 0xe403, 0x1a03, 0xe603, 0x1803, 0xe803, 0x03a3, 0xffa3,
+    0x0393, 0xff93, 0x0383, 0xff83, 0x0373, 0xff73, 0x0363, 0xff63,
+    /*Nine leading zeros*/
+    0x3e04, 0xc204, 0x3c04, 0xc404, 0x3a04, 0xc604, 0x3804, 0xc804,
+    0x3604, 0xca04, 0x3404, 0xcc04, 0x3204, 0xce04, 0x3004, 0xd004,
+    0x2e04, 0xd204, 0x2c04, 0xd404, 0x2a04, 0xd604, 0x2804, 0xd804,
+    0x2604, 0xda04, 0x2404, 0xdc04, 0x2204, 0xde04, 0x2004, 0xe004,
+    /*Ten leading zeros*/
+    0x5005, 0xb005, 0x4e05, 0xb205, 0x4c05, 0xb405, 0x4a05, 0xb605,
+    0x4805, 0xb805, 0x4605, 0xba05, 0x4405, 0xbc05, 0x4205, 0xbe05,
+    0x4005, 0xc005, 0x1c15, 0xe415, 0x1a15, 0xe615, 0x1815, 0xe815,
+    0x1615, 0xea15, 0x1415, 0xec15, 0x1215, 0xee15, 0x1015, 0xf015,
+    /*Eleven leading zeros*/
+    0x2416, 0xdc16, 0x2216, 0xde16, 0x2016, 0xe016, 0x1e16, 0xe216,
+    0x0666, 0xfa66, 0x0506, 0xfd06, 0x04f6, 0xfcf6, 0x04e6, 0xfce6,
+    0x04d6, 0xfcd6, 0x04c6, 0xfcc6, 0x04b6, 0xfcb6, 0x03f6, 0xfff6,
+    0x03e6, 0xffe6, 0x03d6, 0xffd6, 0x03c6, 0xffc6, 0x03b6, 0xffb6
+
+};
+
+/* Table B.14 DCT Coefficients Table one */
+const UWORD16 gau2_impeg2d_dct_coeff_one[] =
+{
+    2640,2608,2576,2544,12400,32848,30800,28752,26704,24656,22608,63536,61488,
+    59440,57392,55344,1295,1263,1231,1199,1167,1135,1103,1071,1039,2511,2479,
+    2447,2415,2383,2351,2319,1006,974,942,910,878,846,814,782,750,718,686,654,
+    622,590,558,526,20557,18509,10349,6285,4269,2285,2253,0,0,0,0,53293,51245,
+    49197,47149,45101,16460,8300,0,0,14412,43052,41004,0,38956,36908,0,6252,
+    0,12364,34860,10313,10313,28713,28713,4234,32810,30761,30761,ESCAPE_CODE,
+    14375,16423,12327,4167,230,198,8230,10278,2216,22568,360,328,26664,24616,
+    6216,2184,4133,4133,4133,4133,4133,4133,4133,4133,2117,2117,2117,2117,2117,
+    2117,2117,2117,6181,6181,6181,6181,6181,6181,6181,6181,2083,2083,
+    END_OF_BLOCK,100,18471,18471,2151,2151,20519,20519,263,263,295,295,392,424,
+    4200,8264,456,488,34,34,34,34,34,34,34,34,67,67,67,67,133,165
+};
+
+/* tab Zero b.15 for 1-9 bits*/
+const UWORD16 gau2_impeg2d_tab_one_1_9[] =
+{
+    0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+    0x01f6, 0x01f6, 0x01f6, 0x01f6, 0x01f6, 0x01f6, 0x01f6, 0x01f6,
+    0x0278, 0x0278, 0xfe78, 0xfe78, 0x0288, 0x0288, 0xfe88, 0xfe88,
+    0x0268, 0x0268, 0xfe68, 0xfe68, 0x0428, 0x0428, 0xfc28, 0xfc28,
+    0x0e07, 0x0e07, 0x0e07, 0x0e07, 0xf207, 0xf207, 0xf207, 0xf207,
+    0x0c07, 0x0c07, 0x0c07, 0x0c07, 0xf407, 0xf407, 0xf407, 0xf407,
+    0x0247, 0x0247, 0x0247, 0x0247, 0xfe47, 0xfe47, 0xfe47, 0xfe47,
+    0x0257, 0x0257, 0x0257, 0x0257, 0xfe57, 0xfe57, 0xfe57, 0xfe57,
+    0x0a19, 0xf619, 0x02b9, 0xfeb9, 0x1609, 0xea09, 0x1409, 0xec09,
+    0x02d9, 0xfed9, 0x02c9, 0xfec9, 0x0439, 0xfc39, 0x0819, 0xf819,
+    0x0226, 0x0226, 0x0226, 0x0226, 0x0226, 0x0226, 0x0226, 0x0226,
+    0xfe26, 0xfe26, 0xfe26, 0xfe26, 0xfe26, 0xfe26, 0xfe26, 0xfe26,
+    0x0416, 0x0416, 0x0416, 0x0416, 0x0416, 0x0416, 0x0416, 0x0416,
+    0xfc16, 0xfc16, 0xfc16, 0xfc16, 0xfc16, 0xfc16, 0xfc16, 0xfc16,
+    0x0236, 0x0236, 0x0236, 0x0236, 0x0236, 0x0236, 0x0236, 0x0236,
+    0xfe36, 0xfe36, 0xfe36, 0xfe36, 0xfe36, 0xfe36, 0xfe36, 0xfe36,
+    0x0214, 0x0214, 0x0214, 0x0214, 0x0214, 0x0214, 0x0214, 0x0214,
+    0x0214, 0x0214, 0x0214, 0x0214, 0x0214, 0x0214, 0x0214, 0x0214,
+    0x0214, 0x0214, 0x0214, 0x0214, 0x0214, 0x0214, 0x0214, 0x0214,
+    0x0214, 0x0214, 0x0214, 0x0214, 0x0214, 0x0214, 0x0214, 0x0214,
+    0xfe14, 0xfe14, 0xfe14, 0xfe14, 0xfe14, 0xfe14, 0xfe14, 0xfe14,
+    0xfe14, 0xfe14, 0xfe14, 0xfe14, 0xfe14, 0xfe14, 0xfe14, 0xfe14,
+    0xfe14, 0xfe14, 0xfe14, 0xfe14, 0xfe14, 0xfe14, 0xfe14, 0xfe14,
+    0xfe14, 0xfe14, 0xfe14, 0xfe14, 0xfe14, 0xfe14, 0xfe14, 0xfe14,
+    0x01ff, 0x01ff, 0x01ff, 0x01ff, 0x01ff, 0x01ff, 0x01ff, 0x01ff,
+    0x01ff, 0x01ff, 0x01ff, 0x01ff, 0x01ff, 0x01ff, 0x01ff, 0x01ff,
+    0x01ff, 0x01ff, 0x01ff, 0x01ff, 0x01ff, 0x01ff, 0x01ff, 0x01ff,
+    0x01ff, 0x01ff, 0x01ff, 0x01ff, 0x01ff, 0x01ff, 0x01ff, 0x01ff,
+    0x0605, 0x0605, 0x0605, 0x0605, 0x0605, 0x0605, 0x0605, 0x0605,
+    0x0605, 0x0605, 0x0605, 0x0605, 0x0605, 0x0605, 0x0605, 0x0605,
+    0xfa05, 0xfa05, 0xfa05, 0xfa05, 0xfa05, 0xfa05, 0xfa05, 0xfa05,
+    0xfa05, 0xfa05, 0xfa05, 0xfa05, 0xfa05, 0xfa05, 0xfa05, 0xfa05,
+    0x0203, 0x0203, 0x0203, 0x0203, 0x0203, 0x0203, 0x0203, 0x0203,
+    0x0203, 0x0203, 0x0203, 0x0203, 0x0203, 0x0203, 0x0203, 0x0203,
+    0x0203, 0x0203, 0x0203, 0x0203, 0x0203, 0x0203, 0x0203, 0x0203,
+    0x0203, 0x0203, 0x0203, 0x0203, 0x0203, 0x0203, 0x0203, 0x0203,
+    0x0203, 0x0203, 0x0203, 0x0203, 0x0203, 0x0203, 0x0203, 0x0203,
+    0x0203, 0x0203, 0x0203, 0x0203, 0x0203, 0x0203, 0x0203, 0x0203,
+    0x0203, 0x0203, 0x0203, 0x0203, 0x0203, 0x0203, 0x0203, 0x0203,
+    0x0203, 0x0203, 0x0203, 0x0203, 0x0203, 0x0203, 0x0203, 0x0203,
+    0xfe03, 0xfe03, 0xfe03, 0xfe03, 0xfe03, 0xfe03, 0xfe03, 0xfe03,
+    0xfe03, 0xfe03, 0xfe03, 0xfe03, 0xfe03, 0xfe03, 0xfe03, 0xfe03,
+    0xfe03, 0xfe03, 0xfe03, 0xfe03, 0xfe03, 0xfe03, 0xfe03, 0xfe03,
+    0xfe03, 0xfe03, 0xfe03, 0xfe03, 0xfe03, 0xfe03, 0xfe03, 0xfe03,
+    0xfe03, 0xfe03, 0xfe03, 0xfe03, 0xfe03, 0xfe03, 0xfe03, 0xfe03,
+    0xfe03, 0xfe03, 0xfe03, 0xfe03, 0xfe03, 0xfe03, 0xfe03, 0xfe03,
+    0xfe03, 0xfe03, 0xfe03, 0xfe03, 0xfe03, 0xfe03, 0xfe03, 0xfe03,
+    0xfe03, 0xfe03, 0xfe03, 0xfe03, 0xfe03, 0xfe03, 0xfe03, 0xfe03,
+    0x0404, 0x0404, 0x0404, 0x0404, 0x0404, 0x0404, 0x0404, 0x0404,
+    0x0404, 0x0404, 0x0404, 0x0404, 0x0404, 0x0404, 0x0404, 0x0404,
+    0x0404, 0x0404, 0x0404, 0x0404, 0x0404, 0x0404, 0x0404, 0x0404,
+    0x0404, 0x0404, 0x0404, 0x0404, 0x0404, 0x0404, 0x0404, 0x0404,
+    0xfc04, 0xfc04, 0xfc04, 0xfc04, 0xfc04, 0xfc04, 0xfc04, 0xfc04,
+    0xfc04, 0xfc04, 0xfc04, 0xfc04, 0xfc04, 0xfc04, 0xfc04, 0xfc04,
+    0xfc04, 0xfc04, 0xfc04, 0xfc04, 0xfc04, 0xfc04, 0xfc04, 0xfc04,
+    0xfc04, 0xfc04, 0xfc04, 0xfc04, 0xfc04, 0xfc04, 0xfc04, 0xfc04,
+    0x0806, 0x0806, 0x0806, 0x0806, 0x0806, 0x0806, 0x0806, 0x0806,
+    0xf806, 0xf806, 0xf806, 0xf806, 0xf806, 0xf806, 0xf806, 0xf806,
+    0x0a06, 0x0a06, 0x0a06, 0x0a06, 0x0a06, 0x0a06, 0x0a06, 0x0a06,
+    0xf606, 0xf606, 0xf606, 0xf606, 0xf606, 0xf606, 0xf606, 0xf606,
+    0x0298, 0x0298, 0xfe98, 0xfe98, 0x0618, 0x0618, 0xfa18, 0xfa18,
+    0x02a8, 0x02a8, 0xfea8, 0xfea8, 0x1008, 0x1008, 0xf008, 0xf008,
+    0x1208, 0x1208, 0xee08, 0xee08, 0x1809, 0xe809, 0x1a09, 0xe609,
+    0x0629, 0xfa29, 0x0449, 0xfc49, 0x1c09, 0xe409, 0x1e09, 0xe209
+};
+
+const UWORD16 gau2_impeg2d_tab_one_10_16[] =
+{
+    /*Six leading zeros*/
+    0x0458, 0x0458, 0x0458, 0x0458, 0xfc58, 0xfc58, 0xfc58, 0xfc58,
+    0x02e8, 0x02e8, 0x02e8, 0x02e8, 0xfee8, 0xfee8, 0xfee8, 0xfee8,
+    0x0829, 0x0829, 0xf829, 0xf829, 0x0309, 0x0309, 0xff09, 0xff09,
+    0x02f8, 0x02f8, 0x02f8, 0x02f8, 0xfef8, 0xfef8, 0xfef8, 0xfef8,
+    /*Seven leading zeros*/
+    0x000b, 0x000b, 0x048b, 0xfc8b, 0x064b, 0xfa4b, 0x000b, 0x000b,
+    0x000b, 0x000b, 0x047b, 0xfc7b, 0x035b, 0xff5b, 0x034b, 0xff4b,
+    0x000b, 0x000b, 0x033b, 0xff3b, 0x032b, 0xff2b, 0x000b, 0x000b,
+    0x063b, 0xfa3b, 0x000b, 0x000b, 0x046b, 0xfc6b, 0x031b, 0xff1b,
+    /*Eight leading zeros*/
+    0x04ac, 0xfcac, 0x049c, 0xfc9c, 0x065c, 0xfa5c, 0x083c, 0xf83c,
+    0x0a2c, 0xf62c, 0x0e1c, 0xf21c, 0x0c1c, 0xf41c, 0x000c, 0x000c,
+    0x000c, 0x000c, 0x000c, 0x000c, 0x000c, 0x000c, 0x03ac, 0xffac,
+    0x039c, 0xff9c, 0x038c, 0xff8c, 0x037c, 0xff7c, 0x036c, 0xff6c,
+    /*Nine leading zeros*/
+    0x3e0d, 0xc20d, 0x3c0d, 0xc40d, 0x3a0d, 0xc60d, 0x380d, 0xc80d,
+    0x360d, 0xca0d, 0x340d, 0xcc0d, 0x320d, 0xce0d, 0x300d, 0xd00d,
+    0x2e0d, 0xd20d, 0x2c0d, 0xd40d, 0x2a0d, 0xd60d, 0x280d, 0xd80d,
+    0x260d, 0xda0d, 0x240d, 0xdc0d, 0x220d, 0xde0d, 0x200d, 0xe00d,
+    /*Ten leading zeros*/
+    0x500e, 0xb00e, 0x4e0e, 0xb20e, 0x4c0e, 0xb40e, 0x4a0e, 0xb60e,
+    0x480e, 0xb80e, 0x460e, 0xba0e, 0x440e, 0xbc0e, 0x420e, 0xbe0e,
+    0x400e, 0xc00e, 0x1c1e, 0xe41e, 0x1a1e, 0xe61e, 0x181e, 0xe81e,
+    0x161e, 0xea1e, 0x141e, 0xec1e, 0x121e, 0xee1e, 0x101e, 0xf01e,
+    /*Eleven leading zeros*/
+    0x241f, 0xdc1f, 0x221f, 0xde1f, 0x201f, 0xe01f, 0x1e1f, 0xe21f,
+    0x066f, 0xfa6f, 0x050f, 0xfd0f, 0x04ff, 0xfcff, 0x04ef, 0xfcef,
+    0x04df, 0xfcdf, 0x04cf, 0xfccf, 0x04bf, 0xfcbf, 0x03ff, 0xffff,
+    0x03ef, 0xffef, 0x03df, 0xffdf, 0x03cf, 0xffcf, 0x03bf, 0xffbf
+};
+
+/* Depending upon the various groups identified in the Ac Vld Coeffs the
+following tables are used to index into the DCT Coefficients Tables Zero and
+One defined above */
+const UWORD16 gau2_impeg2d_offset_zero[] =
+{
+    0,
+    1369,
+    1434,1434,
+    1048,1048,1048,1048,
+    2012,2012,2012,2012,2012,2012,2012,2012,
+    2110,2110,2110,2110,2110,2110,2110,2110,2110,2110,2110,2110,2110,2110,
+    2110,2110,
+    258,
+    515,515,
+    772,772,772,772,
+    1158,1158,1158,1158,1158,1158,1158,1158,
+    1402,1402,1402,1402,1402,1402,1402,1402,1402,1402,1402,1402,1402,1402,
+    1402,1402,
+    0,
+    1
+};
+
+const UWORD16 gau2_impeg2d_offset_one[] =
+{
+    132,
+    1386,
+    1353,1353,
+    1418,1418,1418,1418,
+    1032,1032,1032,1032,1032,1032,1032,1032,
+    1996,1996,1996,1996,1996,1996,1996,1996,1996,1996,1996,1996,1996,1996,
+    1996,1996,
+    1,
+    258,258,
+    515,515,515,515,
+    756,756,756,756,756,756,756,756,
+    1142,1142,1142,1142,1142,1142,1142,1142,1142,1142,1142,1142,1142,1142,
+    1142,1142,
+    0
+};
+
+
+/* Table to be used for decoding dct_dc_size_luminance */
+const WORD16 gai2_impeg2d_dct_dc_size[][11][2] =
+{
+    {
+        {3,1},{2,4},{-12,-9},{-11,-10},{-8,5},{-7,6},{-6,7},{-5,8},{-4,9},{-3,10},{-2,-1}
+    },
+    {
+        {1,2},{-12,-11},{-10,3},{-9,4},{-8,5},{-7,6},{-6,7},{-5,8},{-4,9},{-3,10},{-2,-1}
+    }
+};
+
+

diff --git a/decoder/impeg2d_vld_tables.h b/decoder/impeg2d_vld_tables.h
new file mode 100644
index 0000000..75805af
--- /dev/null
+++ b/decoder/impeg2d_vld_tables.h

@@ -0,0 +1,88 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+#ifndef __IMPEG2D_VLD_TABLES_H__
+#define __IMPEG2D_VLD_TABLES_H__
+
+
+#define MB_ADDR_INCR_OFFSET       34
+#define MB_ADDR_INCR_LEN          11
+#define MB_TYPE_LEN               6
+#define MV_CODE_LEN               11
+#define MB_CBP_LEN                9
+
+
+
+#define MB_BIDRECT          0x20
+#define MB_QUANT            0x10
+#define MB_MV_FORW          0x8
+#define MB_MV_BACK          0x4
+#define MB_PATTERN          0x2
+#define MB_TYPE_INTRA       0x1
+#define MB_FORW_OR_BACK     (MB_MV_FORW    | MB_MV_BACK)
+#define MB_CODED            (MB_TYPE_INTRA | MB_PATTERN)
+
+
+#define MPEG2_MB_ADDR_INCR_OFFSET       34
+#define MPEG2_INTRA_MBTYPE_OFFSET       69
+#define MPEG2_INTER_MBTYPE_OFFSET       105
+#define MPEG2_BVOP_MBTYPE_OFFSET        125
+#define MPEG2_DCT_DC_SIZE_OFFSET        12
+#define MPEG2_CBP_OFFSET                64
+#define MPEG2_MOTION_CODE_OFFSET        17
+#define MPEG2_DMV_OFFSET                2
+
+#define MPEG2_AC_COEFF_MAX_LEN          16
+#define MB_ADDR_INCR_LEN                11
+#define MPEG2_INTRA_MBTYPE_LEN          2
+#define MPEG2_INTER_MBTYPE_LEN          6
+
+#define MPEG2_DCT_DC_SIZE_LEN           9
+#define MPEG2_DCT_DC_LUMA_SIZE_LEN      9
+#define MPEG2_DCT_DC_CHROMA_SIZE_LEN    10
+#define MPEG2_CBP_LEN                   9
+#define MPEG2_MOTION_CODE_LEN           11
+#define MPEG2_DMV_LEN                   2
+
+#define END_OF_BLOCK                    0x01
+#define ESCAPE_CODE                     0x06
+
+/* Table to be used for decoding the MB increment value */
+extern const WORD16  gai2_impeg2d_mb_addr_incr[][2];
+extern const WORD16  gai2_impeg2d_dct_dc_size[][11][2];
+
+extern const UWORD16 gau2_impeg2d_dct_coeff_zero[];
+extern const UWORD16 gau2_impeg2d_dct_coeff_one[];
+extern const UWORD16 gau2_impeg2d_offset_zero[];
+extern const UWORD16 gau2_impeg2d_offset_one[];
+
+extern const UWORD16 gau2_impeg2d_tab_zero_1_9[];
+extern const UWORD16 gau2_impeg2d_tab_one_1_9[];
+extern const UWORD16 gau2_impeg2d_tab_zero_10_16[];
+extern const UWORD16 gau2_impeg2d_tab_one_10_16[];
+
+extern const UWORD16 gau2_impeg2d_p_mb_type[];
+extern const UWORD16 gau2_impeg2d_b_mb_type[];
+extern const UWORD16 gau2_impeg2d_mv_code[];
+extern const WORD16  gai2_impeg2d_dec_mv[4];
+extern const UWORD16 gau2_impeg2d_cbp_code[];
+
+
+#endif /* __IMPEG2D_VLD_TABLES_H__ */
+

diff --git a/decoder/ivd.h b/decoder/ivd.h
new file mode 100644
index 0000000..abc6604
--- /dev/null
+++ b/decoder/ivd.h

@@ -0,0 +1,948 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+/**
+*******************************************************************************
+* @file
+*  ivd.h
+*
+* @brief
+*  This file contains all the necessary structure and  enumeration
+* definitions needed for the Application  Program Interface(API) of the
+* Ittiam Video Decoders
+*
+* @author
+*  100239(RCY)
+*
+* @remarks
+*  None
+*
+*******************************************************************************
+*/
+
+#ifndef _IVD_H
+#define _IVD_H
+
+/*****************************************************************************/
+/* Constant Macros                                                           */
+/*****************************************************************************/
+#define IVD_VIDDEC_MAX_IO_BUFFERS 64
+/*****************************************************************************/
+/* Typedefs                                                                  */
+/*****************************************************************************/
+
+/*****************************************************************************/
+/* Enums                                                                     */
+/*****************************************************************************/
+
+/* IVD_ARCH_T: Architecture Enumeration                               */
+typedef enum
+{
+    ARCH_NA                 =   0x7FFFFFFF,
+    ARCH_ARM_NONEON         =   0x0,
+    ARCH_ARM_A9Q,
+    ARCH_ARM_A9A,
+    ARCH_ARM_A9,
+    ARCH_ARM_A7,
+    ARCH_ARM_A5,
+    ARCH_ARM_A15,
+    ARCH_ARM_NEONINTR,
+    ARCH_ARMV8_GENERIC,
+    ARCH_X86_GENERIC        =   0x100,
+    ARCH_X86_SSSE3,
+    ARCH_X86_SSE42,
+    ARCH_X86_AVX2,
+    ARCH_MIPS_GENERIC       =   0x200,
+    ARCH_MIPS_32
+}IVD_ARCH_T;
+
+/* IVD_SOC_T: SOC Enumeration                               */
+typedef enum
+{
+    SOC_NA                  = 0x7FFFFFFF,
+    SOC_GENERIC             = 0x0,
+    SOC_HISI_37X            = 0x100,
+}IVD_SOC_T;
+
+/* IVD_FRAME_SKIP_MODE_T:Skip mode Enumeration                               */
+
+typedef enum {
+    IVD_SKIP_NONE                               = 0x7FFFFFFF,
+    IVD_SKIP_P                                  = 0x1,
+    IVD_SKIP_B                                  = 0x2,
+    IVD_SKIP_I                                  = 0x3,
+    IVD_SKIP_IP                                 = 0x4,
+    IVD_SKIP_IB                                 = 0x5,
+    IVD_SKIP_PB                                 = 0x6,
+    IVD_SKIP_IPB                                = 0x7,
+    IVD_SKIP_IDR                                = 0x8,
+    IVD_SKIP_DEFAULT                            = IVD_SKIP_NONE,
+}IVD_FRAME_SKIP_MODE_T;
+
+/* IVD_VIDEO_DECODE_MODE_T: Set decoder to decode either frame worth of data */
+/* or only header worth of data                                              */
+
+typedef enum {
+    IVD_DECODE_MODE_NA                          = 0x7FFFFFFF,
+
+    /* This enables the codec to process all decodable units */
+    IVD_DECODE_FRAME                            = 0x0,
+
+    /* This enables the codec to decode header only */
+    IVD_DECODE_HEADER                           = 0x1,
+
+
+
+}IVD_VIDEO_DECODE_MODE_T;
+
+
+/* IVD_DISPLAY_FRAME_OUT_MODE_T: Video Display Frame Output Mode             */
+
+typedef enum {
+
+    IVD_DISPLAY_ORDER_NA                        = 0x7FFFFFFF,
+    /* To set codec to fill output buffers in display order */
+    IVD_DISPLAY_FRAME_OUT                       = 0x0,
+
+    /* To set codec to fill output buffers in decode order */
+    IVD_DECODE_FRAME_OUT                        = 0x1,
+}IVD_DISPLAY_FRAME_OUT_MODE_T;
+
+
+/* IVD_API_COMMAND_TYPE_T:API command type                                   */
+typedef enum {
+    IVD_CMD_VIDEO_NA                          = 0x7FFFFFFF,
+    IVD_CMD_VIDEO_CTL                         = IV_CMD_DUMMY_ELEMENT + 1,
+    IVD_CMD_VIDEO_DECODE,
+    IVD_CMD_GET_DISPLAY_FRAME,
+    IVD_CMD_REL_DISPLAY_FRAME,
+    IVD_CMD_SET_DISPLAY_FRAME
+}IVD_API_COMMAND_TYPE_T;
+
+/* IVD_CONTROL_API_COMMAND_TYPE_T: Video Control API command type            */
+
+typedef enum {
+    IVD_CMD_NA                          = 0x7FFFFFFF,
+    IVD_CMD_CTL_GETPARAMS               = 0x0,
+    IVD_CMD_CTL_SETPARAMS               = 0x1,
+    IVD_CMD_CTL_RESET                   = 0x2,
+    IVD_CMD_CTL_SETDEFAULT              = 0x3,
+    IVD_CMD_CTL_FLUSH                   = 0x4,
+    IVD_CMD_CTL_GETBUFINFO              = 0x5,
+    IVD_CMD_CTL_GETVERSION              = 0x6,
+    IVD_CMD_CTL_CODEC_SUBCMD_START      = 0x7
+}IVD_CONTROL_API_COMMAND_TYPE_T;
+
+
+/* IVD_ERROR_BITS_T: A UWORD32 container will be used for reporting the error*/
+/* code to the application. The first 8 bits starting from LSB have been     */
+/* reserved for the codec to report internal error details. The rest of the  */
+/* bits will be generic for all video decoders and each bit has an associated*/
+/* meaning as mentioned below. The unused bit fields are reserved for future */
+/* extenstions and will be zero in the current implementation                */
+
+typedef enum {
+    /* Bit 8  - Applied concealment.                                         */
+    IVD_APPLIEDCONCEALMENT                      = 0x8,
+    /* Bit 9 - Insufficient input data.                                     */
+    IVD_INSUFFICIENTDATA                        = 0x9,
+    /* Bit 10 - Data problem/corruption.                                     */
+    IVD_CORRUPTEDDATA                           = 0xa,
+    /* Bit 11 - Header problem/corruption.                                   */
+    IVD_CORRUPTEDHEADER                         = 0xb,
+    /* Bit 12 - Unsupported feature/parameter in input.                      */
+    IVD_UNSUPPORTEDINPUT                        = 0xc,
+    /* Bit 13 - Unsupported input parameter orconfiguration.                 */
+    IVD_UNSUPPORTEDPARAM                        = 0xd,
+    /* Bit 14 - Fatal error (stop the codec).If there is an                  */
+    /* error and this bit is not set, the error is a recoverable one.        */
+    IVD_FATALERROR                              = 0xe,
+    /* Bit 15 - Invalid bitstream. Applies when Bitstream/YUV frame          */
+    /* buffer for encode/decode call is made with non-valid or zero u4_size  */
+    /* data                                                                  */
+    IVD_INVALID_BITSTREAM                       = 0xf,
+    /* Bit 16          */
+    IVD_INCOMPLETE_BITSTREAM                    = 0x10,
+    IVD_ERROR_BITS_T_DUMMY_ELEMENT              = 0x7FFFFFFF
+}IVD_ERROR_BITS_T;
+
+
+/* IVD_CONTROL_API_COMMAND_TYPE_T: Video Control API command type            */
+typedef enum {
+    IVD_ERROR_NONE                              = 0x0,
+    IVD_NUM_MEM_REC_FAILED                      = 0x1,
+    IVD_NUM_REC_NOT_SUFFICIENT                  = 0x2,
+    IVD_FILL_MEM_REC_FAILED                     = 0x3,
+    IVD_REQUESTED_WIDTH_NOT_SUPPPORTED          = 0x4,
+    IVD_REQUESTED_HEIGHT_NOT_SUPPPORTED         = 0x5,
+    IVD_INIT_DEC_FAILED                         = 0x6,
+    IVD_INIT_DEC_NOT_SUFFICIENT                 = 0x7,
+    IVD_INIT_DEC_WIDTH_NOT_SUPPPORTED           = 0x8,
+    IVD_INIT_DEC_HEIGHT_NOT_SUPPPORTED          = 0x9,
+    IVD_INIT_DEC_MEM_NOT_ALIGNED                = 0xa,
+    IVD_INIT_DEC_COL_FMT_NOT_SUPPORTED          = 0xb,
+    IVD_INIT_DEC_MEM_REC_NOT_SUFFICIENT         = 0xc,
+    IVD_GET_VERSION_DATABUFFER_SZ_INSUFFICIENT  = 0xd,
+    IVD_BUFFER_SIZE_SET_TO_ZERO                 = 0xe,
+    IVD_UNEXPECTED_END_OF_STREAM                = 0xf,
+    IVD_SEQUENCE_HEADER_NOT_DECODED             = 0x10,
+    IVD_STREAM_WIDTH_HEIGHT_NOT_SUPPORTED       = 0x11,
+    IVD_MAX_FRAME_LIMIT_REACHED                 = 0x12,
+    IVD_IP_API_STRUCT_SIZE_INCORRECT            = 0x13,
+    IVD_OP_API_STRUCT_SIZE_INCORRECT            = 0x14,
+    IVD_HANDLE_NULL                             = 0x15,
+    IVD_HANDLE_STRUCT_SIZE_INCORRECT            = 0x16,
+    IVD_INVALID_HANDLE_NULL                     = 0x17,
+    IVD_INVALID_API_CMD                         = 0x18,
+    IVD_UNSUPPORTED_API_CMD                     = 0x19,
+    IVD_MEM_REC_STRUCT_SIZE_INCORRECT           = 0x1a,
+    IVD_DISP_FRM_ZERO_OP_BUFS                   = 0x1b,
+    IVD_DISP_FRM_OP_BUF_NULL                    = 0x1c,
+    IVD_DISP_FRM_ZERO_OP_BUF_SIZE               = 0x1d,
+    IVD_DEC_FRM_BS_BUF_NULL                     = 0x1e,
+    IVD_SET_CONFG_INVALID_DEC_MODE              = 0x1f,
+    IVD_SET_CONFG_UNSUPPORTED_DISP_WIDTH        = 0x20,
+    IVD_RESET_FAILED                            = 0x21,
+    IVD_INIT_DEC_MEM_REC_OVERLAP_ERR            = 0x22,
+    IVD_INIT_DEC_MEM_REC_BASE_NULL              = 0x23,
+    IVD_INIT_DEC_MEM_REC_ALIGNMENT_ERR          = 0x24,
+    IVD_INIT_DEC_MEM_REC_INSUFFICIENT_SIZE      = 0x25,
+    IVD_INIT_DEC_MEM_REC_INCORRECT_TYPE         = 0x26,
+    IVD_DEC_NUMBYTES_INV                        = 0x27,
+    IVD_DEC_REF_BUF_NULL                        = 0x28,
+    IVD_DEC_FRM_SKIPPED                         = 0x29,
+    IVD_RES_CHANGED                             = 0x2a,
+    IVD_DUMMY_ELEMENT_FOR_CODEC_EXTENSIONS      = 0x300,
+}IVD_ERROR_CODES_T;
+
+
+/*****************************************************************************/
+/* Structure                                                                 */
+/*****************************************************************************/
+/* structure for passing output buffers to codec during get display buffer   */
+/* call                                                                      */
+typedef struct {
+
+    /**
+     * number of output buffers
+     */
+    UWORD32             u4_num_bufs;
+
+    /**
+     *list of pointers to output buffers
+     */
+    UWORD8              *pu1_bufs[IVD_VIDDEC_MAX_IO_BUFFERS];
+
+    /**
+     * sizes of each output buffer
+     */
+    UWORD32             u4_min_out_buf_size[IVD_VIDDEC_MAX_IO_BUFFERS];
+
+}ivd_out_bufdesc_t;
+
+/*****************************************************************************/
+/*   Initialize decoder                                                      */
+/*****************************************************************************/
+
+/* IVD_API_COMMAND_TYPE_T::e_cmd = IVD_CMD_INIT                              */
+
+
+typedef struct {
+    /**
+     * u4_size of the structure
+     */
+    UWORD32                                 u4_size;
+
+    /**
+     *  e_cmd
+     */
+    IVD_API_COMMAND_TYPE_T                  e_cmd;
+
+    /**
+     *no memrecords which are allocated on request of codec through fill mem records
+     */
+    UWORD32                                 u4_num_mem_rec;
+    /**
+     * maximum height for which codec should be initialized
+     */
+    UWORD32                                 u4_frm_max_wd;
+    /**
+     * maximum width for which codec should be initialized
+     */
+    UWORD32                                 u4_frm_max_ht;
+    /**
+     * format in which codec has to give out frame data for display
+     */
+    IV_COLOR_FORMAT_T                       e_output_format;
+    /**
+     * pointer to memrecord array, which contains allocated resources
+     */
+    iv_mem_rec_t                            *pv_mem_rec_location;
+}ivd_init_ip_t;
+
+
+typedef struct{
+    /**
+     * u4_size of the structure
+     */
+    UWORD32                                 u4_size;
+
+    /**
+     * u4_error_code
+     */
+    UWORD32                                 u4_error_code;
+}ivd_init_op_t;
+
+
+/*****************************************************************************/
+/*   Video Decode                                                            */
+/*****************************************************************************/
+
+
+/* IVD_API_COMMAND_TYPE_T::e_cmd = IVD_CMD_VIDEO_DECODE                      */
+
+
+typedef struct {
+    /**
+     * u4_size of the structure
+     */
+    UWORD32                                 u4_size;
+
+    /**
+     * e_cmd
+     */
+    IVD_API_COMMAND_TYPE_T                  e_cmd;
+
+    /**
+     * u4_ts
+     */
+    UWORD32                                 u4_ts;
+
+    /**
+     * u4_num_Bytes
+     */
+    UWORD32                                 u4_num_Bytes;
+
+    /**
+     * pv_stream_buffer
+     */
+    void                                    *pv_stream_buffer;
+
+    /**
+     * output buffer desc
+     */
+    ivd_out_bufdesc_t                       s_out_buffer;
+
+}ivd_video_decode_ip_t;
+
+
+typedef struct{
+    /**
+     * u4_size of the structure
+     */
+    UWORD32                                 u4_size;
+
+    /**
+     * u4_error_code
+     */
+    UWORD32                                 u4_error_code;
+
+    /**
+     * num_bytes_consumed
+     */
+    UWORD32                                 u4_num_bytes_consumed;
+
+    /**
+     * pic_wd
+     */
+    UWORD32                                 u4_pic_wd;
+
+    /**
+     * pic_ht
+     */
+    UWORD32                                 u4_pic_ht;
+
+    /**
+     * pic_type
+     */
+    IV_PICTURE_CODING_TYPE_T                e_pic_type;
+
+    /**
+     * frame_decoded_flag
+     */
+    UWORD32                                 u4_frame_decoded_flag;
+
+    /**
+     * new_seq
+     */
+    UWORD32                                 u4_new_seq;
+
+    /**
+     * output_present
+     */
+    UWORD32                                 u4_output_present;
+
+    /**
+     * progressive_frame_flag
+     */
+    UWORD32                                 u4_progressive_frame_flag;
+
+    /**
+     * is_ref_flag
+     */
+    UWORD32                                 u4_is_ref_flag;
+
+    /**
+     * output_format
+     */
+    IV_COLOR_FORMAT_T                       e_output_format;
+
+    /**
+     * disp_frm_buf
+     */
+    iv_yuv_buf_t                            s_disp_frm_buf;
+
+    /**
+     * fld_type
+     */
+    IV_FLD_TYPE_T                           e4_fld_type;
+
+    /**
+     * ts
+     */
+    UWORD32                                 u4_ts;
+
+    /**
+     * disp_buf_id
+     */
+    UWORD32                                 u4_disp_buf_id;
+}ivd_video_decode_op_t;
+
+
+/*****************************************************************************/
+/*   Get Display Frame                                                       */
+/*****************************************************************************/
+
+
+/* IVD_API_COMMAND_TYPE_T::e_cmd = IVD_CMD_GET_DISPLAY_FRAME                 */
+
+typedef struct
+{
+    /**
+     * u4_size of the structure
+     */
+    UWORD32                                 u4_size;
+
+    /**
+     * e_cmd
+     */
+    IVD_API_COMMAND_TYPE_T                  e_cmd;
+
+    /**
+     * output buffer desc
+     */
+    ivd_out_bufdesc_t                       s_out_buffer;
+
+}ivd_get_display_frame_ip_t;
+
+
+typedef struct
+{
+    /**
+     * u4_size of the structure
+     */
+    UWORD32                                 u4_size;
+
+    /**
+     * error_code
+     */
+    UWORD32                                 u4_error_code;
+
+    /**
+     * progressive_frame_flag
+     */
+    UWORD32                                 u4_progressive_frame_flag;
+
+    /**
+     * pic_type
+     */
+    IV_PICTURE_CODING_TYPE_T                e_pic_type;
+
+    /**
+     * is_ref_flag
+     */
+    UWORD32                                 u4_is_ref_flag;
+
+    /**
+     * output_format
+     */
+    IV_COLOR_FORMAT_T                       e_output_format;
+
+    /**
+     * disp_frm_buf
+     */
+    iv_yuv_buf_t                            s_disp_frm_buf;
+
+    /**
+     * fld_type
+     */
+    IV_FLD_TYPE_T                           e4_fld_type;
+
+    /**
+     * ts
+     */
+    UWORD32                                 u4_ts;
+
+    /**
+     * disp_buf_id
+     */
+    UWORD32                                 u4_disp_buf_id;
+}ivd_get_display_frame_op_t;
+
+/*****************************************************************************/
+/*   Set Display Frame                                                       */
+/*****************************************************************************/
+
+
+/* IVD_API_COMMAND_TYPE_T::e_cmd = IVD_CMD_SET_DISPLAY_FRAME                 */
+
+typedef struct
+{
+    /**
+     * u4_size of the structure
+     */
+    UWORD32                                 u4_size;
+
+    /**
+     * cmd
+     */
+    IVD_API_COMMAND_TYPE_T                  e_cmd;
+
+    /**
+     * num_disp_bufs
+     */
+    UWORD32                                 num_disp_bufs;
+
+    /**
+     * output buffer desc
+     */
+    ivd_out_bufdesc_t                       s_disp_buffer[IVD_VIDDEC_MAX_IO_BUFFERS];
+
+}ivd_set_display_frame_ip_t;
+
+
+typedef struct
+{
+    /**
+     * u4_size of the structure
+     */
+    UWORD32                                 u4_size;
+
+    /**
+     * error code
+     */
+    UWORD32                                 u4_error_code;
+}ivd_set_display_frame_op_t;
+
+
+/*****************************************************************************/
+/*   Release Display Frame                                                       */
+/*****************************************************************************/
+
+
+/* IVD_API_COMMAND_TYPE_T::e_cmd = IVD_CMD_SET_DISPLAY_FRAME                 */
+
+typedef struct
+{
+    /**
+     * u4_size of the structure
+     */
+    UWORD32                                 u4_size;
+
+    /**
+     * e_cmd
+     */
+    IVD_API_COMMAND_TYPE_T                  e_cmd;
+
+    /**
+     * disp_buf_id
+     */
+    UWORD32                                 u4_disp_buf_id;
+}ivd_rel_display_frame_ip_t;
+
+
+typedef struct
+{
+    /**
+     * u4_size of the structure
+     */
+    UWORD32                                 u4_size;
+
+    /**
+     * error code
+     */
+    UWORD32                                 u4_error_code;
+}ivd_rel_display_frame_op_t;
+
+/*****************************************************************************/
+/*   Video control  Flush                                                    */
+/*****************************************************************************/
+/* IVD_API_COMMAND_TYPE_T::e_cmd            = IVD_CMD_VIDEO_CTL              */
+/* IVD_CONTROL_API_COMMAND_TYPE_T::e_sub_cmd    = IVD_CMD_ctl_FLUSH          */
+
+
+
+typedef struct{
+    /**
+     * u4_size of the structure
+     */
+    UWORD32                                 u4_size;
+
+    /**
+     * cmd
+     */
+    IVD_API_COMMAND_TYPE_T                  e_cmd;
+
+    /**
+     * sub_cmd
+     */
+    IVD_CONTROL_API_COMMAND_TYPE_T          e_sub_cmd;
+}ivd_ctl_flush_ip_t;
+
+
+typedef struct{
+    /**
+     * u4_size of the structure
+     */
+    UWORD32                                 u4_size;
+
+    /**
+     * error code
+     */
+    UWORD32                                 u4_error_code;
+}ivd_ctl_flush_op_t;
+
+/*****************************************************************************/
+/*   Video control reset                                                     */
+/*****************************************************************************/
+/* IVD_API_COMMAND_TYPE_T::e_cmd            = IVD_CMD_VIDEO_CTL              */
+/* IVD_CONTROL_API_COMMAND_TYPE_T::e_sub_cmd    = IVD_CMD_ctl_RESET          */
+
+
+typedef struct{
+    /**
+     * u4_size of the structure
+     */
+    UWORD32                                 u4_size;
+
+    /**
+     * cmd
+     */
+    IVD_API_COMMAND_TYPE_T                  e_cmd;
+
+    /**
+     * sub_cmd
+     */
+
+    IVD_CONTROL_API_COMMAND_TYPE_T          e_sub_cmd;
+}ivd_ctl_reset_ip_t;
+
+
+typedef struct{
+    /**
+     * u4_size of the structure
+     */
+    UWORD32                                 u4_size;
+
+    /**
+     * error code
+     */
+    UWORD32                                 u4_error_code;
+}ivd_ctl_reset_op_t;
+
+
+/*****************************************************************************/
+/*   Video control  Set Params                                               */
+/*****************************************************************************/
+/* IVD_API_COMMAND_TYPE_T::e_cmd        = IVD_CMD_VIDEO_CTL                  */
+/* IVD_CONTROL_API_COMMAND_TYPE_T::e_sub_cmd=IVD_CMD_ctl_SETPARAMS           */
+/* IVD_CONTROL_API_COMMAND_TYPE_T::e_sub_cmd=IVD_CMD_ctl_SETDEFAULT          */
+
+
+
+typedef struct {
+    /**
+     * u4_size of the structure
+     */
+    UWORD32                                     u4_size;
+
+    /**
+     * cmd
+     */
+    IVD_API_COMMAND_TYPE_T                      e_cmd;
+
+    /**
+     * sub_cmd
+     */
+    IVD_CONTROL_API_COMMAND_TYPE_T              e_sub_cmd;
+
+    /**
+     * vid_dec_mode
+     */
+    IVD_VIDEO_DECODE_MODE_T                     e_vid_dec_mode;
+
+    /**
+     * disp_wd
+     */
+    UWORD32                                     u4_disp_wd;
+
+    /**
+     * frm_skip_mode
+     */
+    IVD_FRAME_SKIP_MODE_T                       e_frm_skip_mode;
+
+    /**
+     * frm_out_mode
+     */
+    IVD_DISPLAY_FRAME_OUT_MODE_T                e_frm_out_mode;
+}ivd_ctl_set_config_ip_t;
+
+
+typedef struct{
+    /**
+     * u4_size of the structure
+     */
+    UWORD32                                     u4_size;
+
+    /**
+     * u4_error_code
+     */
+    UWORD32                                     u4_error_code;
+}ivd_ctl_set_config_op_t;
+
+/*****************************************************************************/
+/*   Video control:Get Buf Info                                              */
+/*****************************************************************************/
+
+/* IVD_API_COMMAND_TYPE_T::e_cmd         = IVD_CMD_VIDEO_CTL                 */
+/* IVD_CONTROL_API_COMMAND_TYPE_T::e_sub_cmd=IVD_CMD_ctl_GETBUFINFO          */
+
+
+typedef struct{
+    /**
+     * u4_size of the structure
+     */
+    UWORD32                                     u4_size;
+
+    /**
+     *  e_cmd
+     */
+    IVD_API_COMMAND_TYPE_T                      e_cmd;
+
+    /**
+     * sub_cmd
+     */
+    IVD_CONTROL_API_COMMAND_TYPE_T              e_sub_cmd;
+}ivd_ctl_getbufinfo_ip_t;
+
+
+typedef struct{
+    /**
+     * u4_size of the structure
+     */
+    UWORD32                                     u4_size;
+
+    /**
+     * error code
+     */
+    UWORD32                                     u4_error_code;
+
+    /**
+     * no of display buffer sets required by codec
+     */
+    UWORD32                                     u4_num_disp_bufs;
+
+    /**
+     * no of input buffers required for codec
+     */
+    UWORD32                                     u4_min_num_in_bufs;
+
+    /**
+     * no of output buffers required for codec
+     */
+    UWORD32                                     u4_min_num_out_bufs;
+
+    /**
+     * sizes of each input buffer required
+     */
+    UWORD32                                     u4_min_in_buf_size[IVD_VIDDEC_MAX_IO_BUFFERS];
+
+    /**
+     * sizes of each output buffer required
+     */
+    UWORD32                                     u4_min_out_buf_size[IVD_VIDDEC_MAX_IO_BUFFERS];
+}ivd_ctl_getbufinfo_op_t;
+
+
+/*****************************************************************************/
+/*   Video control:Getstatus Call                                            */
+/*****************************************************************************/
+
+
+/* IVD_API_COMMAND_TYPE_T::e_cmd        = IVD_CMD_VIDEO_CTL                  */
+/* IVD_CONTROL_API_COMMAND_TYPE_T::e_sub_cmd=IVD_CMD_ctl_GETPARAMS           */
+
+
+typedef struct{
+    /**
+     * u4_size of the structure
+     */
+    UWORD32                                     u4_size;
+
+    /**
+     * cmd
+     */
+    IVD_API_COMMAND_TYPE_T                      e_cmd;
+
+    /**
+     * sub_cmd
+     */
+    IVD_CONTROL_API_COMMAND_TYPE_T              e_sub_cmd;
+}ivd_ctl_getstatus_ip_t;
+
+
+typedef struct{
+
+     /**
+      * u4_size of the structure
+      */
+    UWORD32                  u4_size;
+
+    /**
+      * error code
+      */
+    UWORD32                  u4_error_code;
+
+    /**
+     * no of display buffer sets required by codec
+     */
+    UWORD32                  u4_num_disp_bufs;
+
+    /**
+     * u4_pic_ht
+     */
+    UWORD32                  u4_pic_ht;
+
+    /**
+     * u4_pic_wd
+     */
+    UWORD32                  u4_pic_wd;
+
+    /**
+     * frame_rate
+     */
+    UWORD32                  u4_frame_rate;
+
+    /**
+     * u4_bit_rate
+     */
+    UWORD32                  u4_bit_rate;
+
+    /**
+     * content_type
+     */
+    IV_CONTENT_TYPE_T        e_content_type;
+
+    /**
+     * output_chroma_format
+     */
+    IV_COLOR_FORMAT_T        e_output_chroma_format;
+
+    /**
+     * no of input buffers required for codec
+     */
+    UWORD32                  u4_min_num_in_bufs;
+
+    /**
+     * no of output buffers required for codec
+     */
+    UWORD32                  u4_min_num_out_bufs;
+
+    /**
+     * sizes of each input buffer required
+     */
+    UWORD32                  u4_min_in_buf_size[IVD_VIDDEC_MAX_IO_BUFFERS];
+
+    /**
+     * sizes of each output buffer required
+     */
+    UWORD32                  u4_min_out_buf_size[IVD_VIDDEC_MAX_IO_BUFFERS];
+}ivd_ctl_getstatus_op_t;
+
+
+/*****************************************************************************/
+/*   Video control:Get Version Info                                          */
+/*****************************************************************************/
+
+/* IVD_API_COMMAND_TYPE_T::e_cmd        = IVD_CMD_VIDEO_CTL                  */
+/* IVD_CONTROL_API_COMMAND_TYPE_T::e_sub_cmd=IVD_CMD_ctl_GETVERSION          */
+
+
+typedef struct{
+    /**
+     * u4_size of the structure
+     */
+    UWORD32                                     u4_size;
+
+    /**
+     * cmd
+     */
+    IVD_API_COMMAND_TYPE_T                      e_cmd;
+
+    /**
+     * sub_cmd
+     */
+    IVD_CONTROL_API_COMMAND_TYPE_T              e_sub_cmd;
+
+    /**
+     * pv_version_buffer
+     */
+    void                                        *pv_version_buffer;
+
+    /**
+     * version_buffer_size
+     */
+    UWORD32                                     u4_version_buffer_size;
+}ivd_ctl_getversioninfo_ip_t;
+
+
+typedef struct{
+    /**
+     * u4_size of the structure
+     */
+    UWORD32                                     u4_size;
+
+    /**
+     * error code
+     */
+    UWORD32                                     u4_error_code;
+}ivd_ctl_getversioninfo_op_t;
+
+#endif /* __IVD_H__ */
+

diff --git a/decoder/mips/impeg2d_function_selector.c b/decoder/mips/impeg2d_function_selector.c
new file mode 100644
index 0000000..a72c1f9
--- /dev/null
+++ b/decoder/mips/impeg2d_function_selector.c

@@ -0,0 +1,83 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+/**
+*******************************************************************************
+* @file
+*  impeg2d_function_selector.c
+*
+* @brief
+*  Contains functions to initialize function pointers used in mpeg2
+*
+* @author
+*  Naveen
+*
+* @par List of Functions:
+* @remarks
+*  None
+*
+*******************************************************************************
+*/
+/*****************************************************************************/
+/* File Includes                                                             */
+/*****************************************************************************/
+#include <stdio.h>
+#include <stddef.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "iv_datatypedef.h"
+#include "iv.h"
+
+#include "impeg2_buf_mgr.h"
+#include "impeg2_disp_mgr.h"
+#include "impeg2_defs.h"
+#include "impeg2_platform_macros.h"
+#include "impeg2_inter_pred.h"
+#include "impeg2_idct.h"
+#include "impeg2_globals.h"
+#include "impeg2_mem_func.h"
+#include "impeg2_format_conv.h"
+#include "impeg2_macros.h"
+
+#include "ivd.h"
+#include "impeg2d.h"
+#include "impeg2d_bitstream.h"
+#include "impeg2d_structs.h"
+#include "impeg2d_vld_tables.h"
+#include "impeg2d_vld.h"
+#include "impeg2d_pic_proc.h"
+#include "impeg2d_debug.h"
+#include "impeg2d_mc.h"
+
+void impeg2d_init_function_ptr_generic(void *pv_codec);
+
+void impeg2d_init_function_ptr(void *pv_codec)
+{
+    dec_state_t *ps_codec   = (dec_state_t *)pv_codec;
+
+    impeg2d_init_function_ptr_generic(ps_codec);
+}
+
+void impeg2d_init_arch(void *pv_codec)
+{
+    dec_state_t *ps_codec = (dec_state_t *)pv_codec;
+
+    ps_codec->e_processor_arch = ARCH_NA;
+}

diff --git a/decoder/x86/impeg2d_function_selector.c b/decoder/x86/impeg2d_function_selector.c
new file mode 100755
index 0000000..ddadb02
--- /dev/null
+++ b/decoder/x86/impeg2d_function_selector.c

@@ -0,0 +1,117 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+/**
+*******************************************************************************
+* @file
+*  impeg2d_function_selector.c
+*
+* @brief
+*  Contains functions to initialize function pointers used in mpeg2
+*
+* @author
+*  Naveen
+*
+* @par List of Functions:
+* @remarks
+*  None
+*
+*******************************************************************************
+*/
+/*****************************************************************************/
+/* File Includes                                                             */
+/*****************************************************************************/
+#include <stdio.h>
+#include <stddef.h>
+#include <stdlib.h>
+#include <string.h>
+
+/* User include files */
+#include "iv_datatypedef.h"
+#include "iv.h"
+#include "ivd.h"
+#include "ithread.h"
+
+#include "impeg2_macros.h"
+#include "impeg2_buf_mgr.h"
+#include "impeg2_disp_mgr.h"
+#include "impeg2_defs.h"
+#include "impeg2_platform_macros.h"
+#include "impeg2_inter_pred.h"
+#include "impeg2_idct.h"
+#include "impeg2_format_conv.h"
+#include "impeg2_mem_func.h"
+
+#include "impeg2d.h"
+#include "impeg2d_bitstream.h"
+#include "impeg2d_debug.h"
+#include "impeg2d_structs.h"
+
+void impeg2d_init_function_ptr_generic(void *pv_codec);
+void impeg2d_init_function_ptr_ssse3(void *pv_codec);
+void impeg2d_init_function_ptr_sse42(void *pv_codec);
+void impeg2d_init_function_ptr_avx2(void *pv_codec);
+
+void impeg2d_init_function_ptr(void *pv_codec)
+{
+    dec_state_t *ps_codec = (dec_state_t *)pv_codec;
+
+    impeg2d_init_function_ptr_generic(pv_codec);
+    switch(ps_codec->e_processor_arch)
+    {
+        case ARCH_X86_GENERIC:
+            impeg2d_init_function_ptr_generic(pv_codec);
+        break;
+        case ARCH_X86_SSSE3:
+            impeg2d_init_function_ptr_ssse3(pv_codec);
+            break;
+        case ARCH_X86_SSE42:
+            impeg2d_init_function_ptr_sse42(pv_codec);
+        break;
+        case ARCH_X86_AVX2:
+#ifndef DISABLE_AVX2
+            impeg2d_init_function_ptr_avx2(pv_codec);
+#else
+            impeg2d_init_function_ptr_sse42(pv_codec);
+#endif
+        break;
+        default:
+            impeg2d_init_function_ptr_sse42(pv_codec);
+        break;
+    }
+}
+void impeg2d_init_arch(void *pv_codec)
+{
+    dec_state_t *ps_codec = (dec_state_t*) pv_codec;
+
+#ifdef DEFAULT_ARCH
+#if DEFAULT_ARCH == D_ARCH_X86_SSE42
+    ps_codec->e_processor_arch = ARCH_X86_SSE42;
+#elif DEFAULT_ARCH == D_ARCH_X86_SSSE3
+    ps_codec->e_processor_arch = ARCH_X86_SSSE3;
+#elif DEFAULT_ARCH == D_ARCH_X86_AVX2
+    ps_codec->e_processor_arch = D_ARCH_X86_AVX2;
+#else
+    ps_codec->e_processor_arch = ARCH_X86_GENERIC;
+#endif
+#else
+    ps_codec->e_processor_arch = ARCH_X86_SSE42;
+#endif
+
+}

diff --git a/decoder/x86/impeg2d_function_selector_avx2.c b/decoder/x86/impeg2d_function_selector_avx2.c
new file mode 100644
index 0000000..ab5d847
--- /dev/null
+++ b/decoder/x86/impeg2d_function_selector_avx2.c

@@ -0,0 +1,74 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+/**
+*******************************************************************************
+* @file
+*  impeg2d_function_selector.c
+*
+* @brief
+*  Contains functions to initialize function pointers used in mpeg2
+*
+* @author
+*  Naveen
+*
+* @par List of Functions:
+* @remarks
+*  None
+*
+*******************************************************************************
+*/
+/*****************************************************************************/
+/* File Includes                                                             */
+/*****************************************************************************/
+#include <stdio.h>
+#include <stddef.h>
+#include <stdlib.h>
+#include <string.h>
+
+/* User include files */
+#include "iv_datatypedef.h"
+#include "iv.h"
+#include "ivd.h"
+#include "ithread.h"
+
+#include "impeg2_macros.h"
+#include "impeg2_buf_mgr.h"
+#include "impeg2_disp_mgr.h"
+#include "impeg2_defs.h"
+#include "impeg2_platform_macros.h"
+#include "impeg2_inter_pred.h"
+#include "impeg2_idct.h"
+#include "impeg2_format_conv.h"
+#include "impeg2_mem_func.h"
+
+#include "impeg2d.h"
+#include "impeg2d_bitstream.h"
+#include "impeg2d_debug.h"
+#include "impeg2d_structs.h"
+
+void impeg2d_init_function_ptr_avx2(void *pv_codec)
+{
+    dec_state_t *dec = (dec_state_t *)pv_codec;
+
+    dec->pf_idct_recon[0]                   = &impeg2_idct_recon_dc;
+    dec->pf_idct_recon[1]                   = &impeg2_idct_recon_dc_mismatch;
+    dec->pf_idct_recon[2]                   = &impeg2_idct_recon;
+    dec->pf_idct_recon[3]                   = &impeg2_idct_recon;
+}

diff --git a/decoder/x86/impeg2d_function_selector_sse42.c b/decoder/x86/impeg2d_function_selector_sse42.c
new file mode 100644
index 0000000..a4b6673
--- /dev/null
+++ b/decoder/x86/impeg2d_function_selector_sse42.c

@@ -0,0 +1,84 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+/**
+*******************************************************************************
+* @file
+*  impeg2d_function_selector.c
+*
+* @brief
+*  Contains functions to initialize function pointers used in mpeg2
+*
+* @author
+*  Naveen
+*
+* @par List of Functions:
+* @remarks
+*  None
+*
+*******************************************************************************
+*/
+/*****************************************************************************/
+/* File Includes                                                             */
+/*****************************************************************************/
+#include <stdio.h>
+#include <stddef.h>
+#include <stdlib.h>
+#include <string.h>
+
+/* User include files */
+#include "iv_datatypedef.h"
+#include "iv.h"
+#include "ivd.h"
+#include "ithread.h"
+
+#include "impeg2_macros.h"
+#include "impeg2_buf_mgr.h"
+#include "impeg2_disp_mgr.h"
+#include "impeg2_defs.h"
+#include "impeg2_platform_macros.h"
+#include "impeg2_inter_pred.h"
+#include "impeg2_idct.h"
+#include "impeg2_format_conv.h"
+#include "impeg2_mem_func.h"
+
+#include "impeg2d.h"
+#include "impeg2d_bitstream.h"
+#include "impeg2d_debug.h"
+#include "impeg2d_structs.h"
+
+void impeg2d_init_function_ptr_sse42(void *pv_codec)
+{
+    dec_state_t *dec = (dec_state_t *)pv_codec;
+
+    dec->pf_idct_recon[0]                   = &impeg2_idct_recon_dc_sse42;
+    dec->pf_idct_recon[1]                   = &impeg2_idct_recon_dc_mismatch_sse42;
+    dec->pf_idct_recon[2]                   = &impeg2_idct_recon_sse42;
+    dec->pf_idct_recon[3]                   = &impeg2_idct_recon_sse42;
+
+    dec->pf_copy_mb                         = &impeg2_copy_mb_sse42;
+    dec->pf_interpolate                     = &impeg2_interpolate_sse42;
+    dec->pf_halfx_halfy_8x8                 = &impeg2_mc_halfx_halfy_8x8_sse42;
+    dec->pf_halfx_fully_8x8                 = &impeg2_mc_halfx_fully_8x8_sse42;
+    dec->pf_fullx_halfy_8x8                 = &impeg2_mc_fullx_halfy_8x8_sse42;
+    dec->pf_fullx_fully_8x8                 = &impeg2_mc_fullx_fully_8x8_sse42;
+
+    dec->pf_memset_8bit_8x8_block           = &impeg2_memset_8bit_8x8_block_sse42;
+    dec->pf_memset_16bit_8x8_linear_block   = &impeg2_memset0_16bit_8x8_linear_block_sse42;
+}

diff --git a/decoder/x86/impeg2d_function_selector_ssse3.c b/decoder/x86/impeg2d_function_selector_ssse3.c
new file mode 100644
index 0000000..fb6c345
--- /dev/null
+++ b/decoder/x86/impeg2d_function_selector_ssse3.c

@@ -0,0 +1,74 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+/**
+*******************************************************************************
+* @file
+*  impeg2d_function_selector.c
+*
+* @brief
+*  Contains functions to initialize function pointers used in mpeg2
+*
+* @author
+*  Naveen
+*
+* @par List of Functions:
+* @remarks
+*  None
+*
+*******************************************************************************
+*/
+/*****************************************************************************/
+/* File Includes                                                             */
+/*****************************************************************************/
+#include <stdio.h>
+#include <stddef.h>
+#include <stdlib.h>
+#include <string.h>
+
+/* User include files */
+#include "iv_datatypedef.h"
+#include "iv.h"
+#include "ivd.h"
+#include "ithread.h"
+
+#include "impeg2_macros.h"
+#include "impeg2_buf_mgr.h"
+#include "impeg2_disp_mgr.h"
+#include "impeg2_defs.h"
+#include "impeg2_platform_macros.h"
+#include "impeg2_inter_pred.h"
+#include "impeg2_idct.h"
+#include "impeg2_format_conv.h"
+#include "impeg2_mem_func.h"
+
+#include "impeg2d.h"
+#include "impeg2d_bitstream.h"
+#include "impeg2d_debug.h"
+#include "impeg2d_structs.h"
+
+void impeg2d_init_function_ptr_ssse3(void *pv_codec)
+{
+    dec_state_t *dec = (dec_state_t *)pv_codec;
+
+    dec->pf_idct_recon[0]                   = &impeg2_idct_recon_dc;
+    dec->pf_idct_recon[1]                   = &impeg2_idct_recon_dc_mismatch;
+    dec->pf_idct_recon[2]                   = &impeg2_idct_recon;
+    dec->pf_idct_recon[3]                   = &impeg2_idct_recon;
+}

diff --git a/test/Android.mk b/test/Android.mk
new file mode 100644
index 0000000..7807003
--- /dev/null
+++ b/test/Android.mk

@@ -0,0 +1,5 @@
+LOCAL_PATH := $(call my-dir)
+include $(CLEAR_VARS)
+
+# decoder
+include $(LOCAL_PATH)/decoder.mk

diff --git a/test/decoder.mk b/test/decoder.mk
new file mode 100644
index 0000000..2aef0f9
--- /dev/null
+++ b/test/decoder.mk

@@ -0,0 +1,13 @@
+LOCAL_PATH := $(call my-dir)
+
+include $(CLEAR_VARS)
+
+LOCAL_MODULE    := mpeg2dec
+LOCAL_MODULE_TAGS := optional
+
+LOCAL_CFLAGS := -DPROFILE_ENABLE -DMD5_DISABLE -DARM  -fPIC
+LOCAL_C_INCLUDES += $(LOCAL_PATH)/../decoder $(LOCAL_PATH)/../common $(LOCAL_PATH)/decoder/
+LOCAL_SRC_FILES := decoder/main.c
+LOCAL_STATIC_LIBRARIES := libmpeg2dec
+
+include $(BUILD_EXECUTABLE)

diff --git a/test/decoder/main.c b/test/decoder/main.c
new file mode 100644
index 0000000..5930bd1
--- /dev/null
+++ b/test/decoder/main.c

@@ -0,0 +1,3135 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+/*****************************************************************************/
+/*                                                                           */
+/*  File Name         : main.c                                               */
+/*                                                                           */
+/*  Description       : Contains an application that demonstrates use of HEVC*/
+/*                      decoder API                                          */
+/*                                                                           */
+/*  List of Functions :                                                      */
+/*                                                                           */
+/*  Issues / Problems : None                                                 */
+/*                                                                           */
+/*  Revision History  :                                                      */
+/*                                                                           */
+/*         DD MM YYYY   Author(s)       Changes                              */
+/*         07 09 2012   Harish          Initial Version                      */
+/*****************************************************************************/
+/*****************************************************************************/
+/* File Includes                                                             */
+/*****************************************************************************/
+#include <stdio.h>
+#include <string.h>
+#include <stdlib.h>
+
+#ifdef X86_MINGW
+#include <signal.h>
+#endif
+
+#ifndef IOS
+#include <malloc.h>
+#endif
+#ifdef IOS_DISPLAY
+#include "cast_types.h"
+#else
+#include "iv_datatypedef.h"
+#endif
+
+#include "iv.h"
+#include "ivd.h"
+#include "impeg2d.h"
+#include "ithread.h"
+
+#ifdef WINDOWS_TIMER
+#include <windows.h>
+#else
+#include <sys/time.h>
+#endif
+
+#define ALIGN8(x) ((((x) + 7) >> 3) << 3)
+#define NUM_DISPLAY_BUFFERS 4
+#define DEFAULT_FPS         30
+
+
+#define ENABLE_DEGRADE 0
+#define MAX_DISP_BUFFERS    64
+#define EXTRA_DISP_BUFFERS  0
+#define STRLENGTH 1000
+
+//#define TEST_FLUSH
+#define FLUSH_FRM_CNT 100
+
+
+#ifdef IOS
+#define PATHLENMAX 500
+char filename_with_path[PATHLENMAX];
+#endif
+
+#ifdef PROFILE_ENABLE
+#ifdef WINDOWS_TIMER
+typedef  LARGE_INTEGER TIMER;
+#else
+typedef struct timeval TIMER;
+#endif
+#else
+typedef WORD32 TIMER;
+#endif
+
+#ifdef PROFILE_ENABLE
+#ifdef WINDOWS_TIMER
+#define GETTIME(timer) QueryPerformanceCounter(timer);
+#else
+#define GETTIME(timer) gettimeofday(timer,NULL);
+#endif
+
+#ifdef WINDOWS_TIMER
+#define ELAPSEDTIME(s_start_timer,s_end_timer, s_elapsed_time, frequency)   \
+{                                                                           \
+   TIMER s_temp_time;                                                       \
+   s_temp_time.LowPart = s_end_timer.LowPart - s_start_timer.LowPart ;      \
+   s_elapsed_time = (UWORD32) ( ((DOUBLE)s_temp_time.LowPart / (DOUBLE)frequency.LowPart )  * 1000000); \
+}
+#else
+#define ELAPSEDTIME(s_start_timer,s_end_timer, s_elapsed_time, frequency) \
+                   s_elapsed_time = ((s_end_timer.tv_sec - s_start_timer.tv_sec) * 1000000) + (s_end_timer.tv_usec - s_start_timer.tv_usec);
+#endif
+
+#else
+#define GETTIME(timer)
+#define ELAPSEDTIME(s_start_timer,s_end_timer, s_elapsed_time, frequency)
+#endif
+
+
+/* Function declarations */
+#ifndef MD5_DISABLE
+void calc_md5_cksum(UWORD8 *pu1_inbuf, UWORD32 u4_stride, UWORD32 u4_width, UWORD32 u4_height, UWORD8 *pu1_cksum_p);
+#else
+#define calc_md5_cksum(a, b, c, d, e)
+#endif
+#ifdef SDL_DISPLAY
+void* sdl_disp_init(UWORD32, UWORD32, WORD32, WORD32, WORD32, WORD32, WORD32, WORD32 *, WORD32 *);
+void sdl_alloc_disp_buffers(void *);
+void sdl_display(void *, WORD32);
+void sdl_set_disp_buffers(void *, WORD32, UWORD8 **, UWORD8 **, UWORD8 **);
+void sdl_disp_deinit(void *);
+void sdl_disp_usleep(UWORD32);
+IV_COLOR_FORMAT_T sdl_get_color_fmt(void);
+UWORD32 sdl_get_stride(void);
+#endif
+
+#ifdef INTEL_CE5300
+void* gdl_disp_init(UWORD32, UWORD32, WORD32, WORD32, WORD32, WORD32, WORD32, WORD32 *, WORD32 *);
+void gdl_alloc_disp_buffers(void *);
+void gdl_display(void *, WORD32);
+void gdl_set_disp_buffers(void *, WORD32, UWORD8 **, UWORD8 **, UWORD8 **);
+void gdl_disp_deinit(void *);
+void gdl_disp_usleep(UWORD32);
+IV_COLOR_FORMAT_T gdl_get_color_fmt(void);
+UWORD32 gdl_get_stride(void);
+#endif
+
+#ifdef FBDEV_DISPLAY
+void* fbd_disp_init(UWORD32, UWORD32, WORD32, WORD32, WORD32, WORD32, WORD32, WORD32 *, WORD32 *);
+void fbd_alloc_disp_buffers(void *);
+void fbd_display(void *, WORD32);
+void fbd_set_disp_buffers(void *, WORD32, UWORD8 **, UWORD8 **, UWORD8 **);
+void fbd_disp_deinit(void *);
+void fbd_disp_usleep(UWORD32);
+IV_COLOR_FORMAT_T fbd_get_color_fmt(void);
+UWORD32 fbd_get_stride(void);
+#endif
+
+#ifdef IOS_DISPLAY
+void* ios_disp_init(UWORD32, UWORD32, WORD32, WORD32, WORD32, WORD32, WORD32, WORD32 *, WORD32 *);
+void ios_alloc_disp_buffers(void *);
+void ios_display(void *, WORD32);
+void ios_set_disp_buffers(void *, WORD32, UWORD8 **, UWORD8 **, UWORD8 **);
+void ios_disp_deinit(void *);
+void ios_disp_usleep(UWORD32);
+IV_COLOR_FORMAT_T ios_get_color_fmt(void);
+UWORD32 ios_get_stride(void);
+#endif
+
+typedef struct
+{
+    UWORD32 u4_piclen_flag;
+    UWORD32 u4_file_save_flag;
+    UWORD32 u4_chksum_save_flag;
+    UWORD32 u4_max_frm_ts;
+    IV_COLOR_FORMAT_T e_output_chroma_format;
+    IVD_ARCH_T e_arch;
+    IVD_SOC_T e_soc;
+    UWORD32 dump_q_rd_idx;
+    UWORD32 dump_q_wr_idx;
+    WORD32  disp_q_wr_idx;
+    WORD32  disp_q_rd_idx;
+
+    void *cocodec_obj;
+    UWORD32 share_disp_buf;
+    UWORD32 num_disp_buf;
+    UWORD32 b_pic_present;
+    WORD32 i4_degrade_type;
+    WORD32 i4_degrade_pics;
+    UWORD32 u4_num_cores;
+    UWORD32 disp_delay;
+    WORD32 trace_enable;
+    CHAR ac_trace_fname[STRLENGTH];
+    CHAR ac_piclen_fname[STRLENGTH];
+    CHAR ac_ip_fname[STRLENGTH];
+    CHAR ac_op_fname[STRLENGTH];
+    CHAR ac_op_chksum_fname[STRLENGTH];
+    ivd_out_bufdesc_t s_disp_buffers[MAX_DISP_BUFFERS];
+    iv_yuv_buf_t s_disp_frm_queue[MAX_DISP_BUFFERS];
+    UWORD32 s_disp_frm_id_queue[MAX_DISP_BUFFERS];
+    UWORD32 loopback;
+    UWORD32 display;
+    UWORD32 full_screen;
+    UWORD32 fps;
+    UWORD32 max_wd;
+    UWORD32 max_ht;
+    UWORD32 max_level;
+
+    UWORD32 u4_strd;
+
+    /* For signalling to display thread */
+    UWORD32 u4_pic_wd;
+    UWORD32 u4_pic_ht;
+
+    /* For IOS diplay */
+    WORD32 i4_screen_wd;
+    WORD32 i4_screen_ht;
+
+    //UWORD32 u4_output_present;
+    WORD32  quit;
+    WORD32  paused;
+
+
+    void *pv_disp_ctx;
+    void *display_thread_handle;
+    WORD32 display_thread_created;
+    volatile WORD32 display_init_done;
+    volatile WORD32 display_deinit_flag;
+
+    void* (*disp_init)(UWORD32, UWORD32, WORD32, WORD32, WORD32, WORD32, WORD32, WORD32 *, WORD32 *);
+    void (*alloc_disp_buffers)(void *);
+    void (*display_buffer)(void *, WORD32);
+    void (*set_disp_buffers)(void *, WORD32, UWORD8 **, UWORD8 **, UWORD8 **);
+    void (*disp_deinit)(void *);
+    void (*disp_usleep)(UWORD32);
+    IV_COLOR_FORMAT_T(*get_color_fmt)(void);
+    UWORD32(*get_stride)(void);
+}vid_dec_ctx_t;
+
+
+
+typedef enum
+{
+    INVALID,
+    HELP,
+    VERSION,
+    INPUT_FILE,
+    OUTPUT,
+    CHKSUM,
+    SAVE_OUTPUT,
+    SAVE_CHKSUM,
+    CHROMA_FORMAT,
+    NUM_FRAMES,
+    NUM_CORES,
+
+    SHARE_DISPLAY_BUF,
+    LOOPBACK,
+    DISPLAY,
+    FULLSCREEN,
+    FPS,
+    TRACE,
+    MAX_WD,
+    MAX_HT,
+    MAX_LEVEL,
+    CONFIG,
+
+    DEGRADE_TYPE,
+    DEGRADE_PICS,
+    ARCH,
+    SOC,
+    PICLEN,
+    PICLEN_FILE,
+}ARGUMENT_T;
+
+typedef struct
+{
+    CHAR argument_shortname[4];
+    CHAR argument_name[128];
+    ARGUMENT_T argument;
+    CHAR description[512];
+}argument_t;
+
+static const argument_t argument_mapping[] =
+{
+    { "-h",  "--help",                   HELP,
+        "Print this help\n" },
+    { "-c", "--config",      CONFIG,
+        "config file (Default: test.cfg)\n" },
+
+    { "-v",  "--version",                VERSION,
+        "Version information\n" },
+    { "-i",  "--input",                  INPUT_FILE,
+        "Input file\n" },
+    { "-o",  "--output",                 OUTPUT,
+        "Output file\n" },
+    { "--",  "--piclen",                 PICLEN,
+        "Flag to signal if the decoder has to use a file containing number of bytes in each picture to be fed in each call\n" },
+    { "--",  "--piclen_file",                 PICLEN_FILE,
+        "File containing number of bytes in each picture - each line containing one size\n" },
+    { "--",  "--chksum",          CHKSUM,
+        "Output MD5 Checksum file\n" },
+    { "-s", "--save_output",            SAVE_OUTPUT,
+        "Save Output file\n" },
+    { "--", "--save_chksum",            SAVE_CHKSUM,
+        "Save Check sum file\n" },
+    { "--",  "--chroma_format",          CHROMA_FORMAT,
+        "Output Chroma format Supported values YUV_420P, YUV_422ILE, RGB_565, YUV_420SP_UV, YUV_420SP_VU\n" },
+    { "-n", "--num_frames",             NUM_FRAMES,
+        "Number of frames to be decoded\n" },
+    { "--", "--num_cores",              NUM_CORES,
+        "Number of cores to be used\n" },
+    { "--", "--share_display_buf",      SHARE_DISPLAY_BUF,
+        "Enable shared display buffer mode\n" },
+
+    { "--", "--loopback",      LOOPBACK,
+        "Enable playback in a loop\n" },
+    { "--", "--display",      DISPLAY,
+        "Enable display (uses SDL)\n" },
+    { "--", "--fullscreen",      FULLSCREEN,
+        "Enable full screen (Only for GDL and SDL)\n" },
+    { "--", "--fps",      FPS,
+        "FPS to be used for display \n" },
+    { "-i",  "--trace",                   TRACE,
+        "Trace file\n" },
+    { "--", "--max_wd",      MAX_WD,
+        "Maximum width (Default: 2560) \n" },
+    { "--", "--max_ht",      MAX_HT,
+        "Maximum height (Default: 1600)\n" },
+    { "--",  "--arch", ARCH,
+        "Set Architecture. Supported values  ARM_NONEON, ARM_A9Q, ARM_A7, ARM_A5, ARM_NEONINTR, X86_GENERIC, X86_SSSE3, X86_SSE4 \n" },
+    { "--",  "--soc", SOC,
+        "Set SOC. Supported values  GENERIC, HISI_37X \n" },
+
+#if 0
+    { "--",  "--degrade_type",  DEGRADE_TYPE,
+        "Degrade type : 0: No degrade 0th bit set : Disable SAO 1st bit set : Disable deblocking 2nd bit set : Faster inter prediction filters 3rd bit set : Fastest inter prediction filters\n" },
+    { "--",  "--degrade_pics",  DEGRADE_PICS,
+        "Degrade pics : 0 : No degrade  1 : Only on non-reference frames  2 : Do not degrade every 4th or key frames  3 : All non-key frames  4 : All frames" },
+
+    { "--", "--max_level",      MAX_LEVEL,
+        "Maximum Decoder Level (Default: 50)\n" },
+#endif
+};
+
+#define PEAK_WINDOW_SIZE            8
+#define MAX_FRAME_WIDTH             2560
+#define MAX_FRAME_HEIGHT            1600
+#define MAX_LEVEL_SUPPORTED         50
+#define MAX_REF_FRAMES              16
+#define MAX_REORDER_FRAMES          16
+#define DEFAULT_SHARE_DISPLAY_BUF   0
+#define STRIDE                      0
+#define DEFAULT_NUM_CORES           1
+
+#define DUMP_SINGLE_BUF 0
+#define IV_ISFATALERROR(x)         (((x) >> IVD_FATALERROR) & 0x1)
+
+#define ivd_api_function        impeg2d_api_function
+
+#ifdef IOS
+char filename_trace[PATHLENMAX];
+#endif
+
+#if ANDROID_NDK
+/*****************************************************************************/
+/*                                                                           */
+/*  Function Name : raise                                                    */
+/*                                                                           */
+/*  Description   : Needed as a workaround when the application is built in  */
+/*                  Android NDK. This is an exception to be called for divide*/
+/*                  by zero error                                            */
+/*                                                                           */
+/*  Inputs        : a                                                        */
+/*  Globals       :                                                          */
+/*  Processing    : None                                                     */
+/*                                                                           */
+/*  Outputs       :                                                          */
+/*  Returns       :                                                          */
+/*                                                                           */
+/*  Issues        :                                                          */
+/*                                                                           */
+/*  Revision History:                                                        */
+/*                                                                           */
+/*         DD MM YYYY   Author(s)       Changes                              */
+/*         07 09 2012   100189          Initial Version                      */
+/*                                                                           */
+/*****************************************************************************/
+int raise(int a)
+{
+    printf("Divide by zero\n");
+    return 0;
+}
+#endif
+
+#ifdef _WIN32
+/*****************************************************************************/
+/* Function to print library calls                                           */
+/*****************************************************************************/
+/*****************************************************************************/
+/*                                                                           */
+/*  Function Name : memalign                                                 */
+/*                                                                           */
+/*  Description   : Returns malloc data. Ideally should return aligned memory*/
+/*                  support alignment will be added later                    */
+/*                                                                           */
+/*  Inputs        : alignment                                                */
+/*                  size                                                     */
+/*  Globals       :                                                          */
+/*  Processing    :                                                          */
+/*                                                                           */
+/*  Outputs       :                                                          */
+/*  Returns       :                                                          */
+/*                                                                           */
+/*  Issues        :                                                          */
+/*                                                                           */
+/*  Revision History:                                                        */
+/*                                                                           */
+/*         DD MM YYYY   Author(s)       Changes                              */
+/*         07 09 2012   100189          Initial Version                      */
+/*                                                                           */
+/*****************************************************************************/
+
+void* app_aligned_malloc(WORD32 alignment, WORD32 size)
+{
+    return (void *)_aligned_malloc(size, alignment);
+}
+
+void app_aligned_free(void *pv_buf)
+{
+    _aligned_free(pv_buf);
+    return;
+}
+#endif
+
+#if IOS
+void* app_aligned_malloc(WORD32 alignment, WORD32 size)
+{
+    return malloc(size);
+}
+
+void app_aligned_free(void *pv_buf)
+{
+    free(pv_buf);
+    return;
+}
+#endif
+
+#if (!defined(IOS)) && (!defined(_WIN32))
+void* app_aligned_malloc(WORD32 alignment, WORD32 size)
+{
+    return memalign(alignment, size);
+}
+
+void app_aligned_free(void *pv_buf)
+{
+    free(pv_buf);
+    return;
+}
+#endif
+
+/*****************************************************************************/
+/*                                                                           */
+/*  Function Name : set_degrade                                 */
+/*                                                                           */
+/*  Description   : Control call to set degrade level       */
+/*                                                                           */
+/*                                                                           */
+/*  Inputs        : codec_obj  - Codec Handle                                */
+/*                  type - degrade level value between 0 to 4                */
+/*                    0 : No degrade                                         */
+/*                    1st bit : Disable SAO                                  */
+/*                    2nd bit : Disable Deblock                              */
+/*                    3rd bit : Faster MC for non-ref                        */
+/*                    4th bit : Fastest MC for non-ref                       */
+/*                  pics - Pictures that are are degraded                    */
+/*                    0 : No degrade                                         */
+/*                    1 : Non-ref pictures                                   */
+/*                    2 : Pictures at given interval are not degraded        */
+/*                    3 : All non-key pictures                               */
+/*                    4 : All pictures                                       */
+/*  Globals       :                                                          */
+/*  Processing    : Calls degrade control to the codec                       */
+/*                                                                           */
+/*  Outputs       :                                                          */
+/*  Returns       : Control call return status                               */
+/*                                                                           */
+/*  Issues        :                                                          */
+/*                                                                           */
+/*  Revision History:                                                        */
+/*                                                                           */
+/*         DD MM YYYY   Author(s)       Changes                              */
+/*         07 09 2012   100189          Initial Version                      */
+/*                                                                           */
+/*****************************************************************************/
+
+IV_API_CALL_STATUS_T set_degrade(void *codec_obj, UWORD32 type, WORD32 pics)
+{
+    IV_API_CALL_STATUS_T e_dec_status = IV_SUCCESS;
+#if 0
+    impeg2d_ctl_degrade_ip_t s_ctl_ip;
+    impeg2d_ctl_degrade_op_t s_ctl_op;
+    void *pv_api_ip, *pv_api_op;
+
+
+    s_ctl_ip.u4_size = sizeof(impeg2d_ctl_degrade_ip_t);
+    s_ctl_ip.i4_degrade_type = type;
+    s_ctl_ip.i4_nondegrade_interval = 4;
+    s_ctl_ip.i4_degrade_pics = pics;
+
+    s_ctl_op.u4_size = sizeof(impeg2d_ctl_degrade_op_t);
+
+    pv_api_ip = (void *)&s_ctl_ip;
+    pv_api_op = (void *)&s_ctl_op;
+
+    s_ctl_ip.e_cmd = IVD_CMD_VIDEO_CTL;
+    s_ctl_ip.e_sub_cmd = (IVD_CONTROL_API_COMMAND_TYPE_T)IMPEG2D_CMD_CTL_DEGRADE;
+
+    e_dec_status = ivd_api_function((iv_obj_t *)codec_obj, pv_api_ip, pv_api_op);
+
+    if(IV_SUCCESS != e_dec_status)
+    {
+        printf("Error in setting degrade level \n");
+    }
+#endif
+    ((void)(codec_obj));
+    ((void)(type));
+    ((void)(pics));
+    return (e_dec_status);
+
+}
+
+/*****************************************************************************/
+/*                                                                           */
+/*  Function Name : enable_skipb_frames                                      */
+/*                                                                           */
+/*  Description   : Control call to enable skipping of b frames              */
+/*                                                                           */
+/*                                                                           */
+/*  Inputs        : codec_obj : Codec handle                                 */
+/*  Globals       :                                                          */
+/*  Processing    : Calls enable skip B frames control                       */
+/*                                                                           */
+/*  Outputs       :                                                          */
+/*  Returns       : Control call return status                               */
+/*                                                                           */
+/*  Issues        :                                                          */
+/*                                                                           */
+/*  Revision History:                                                        */
+/*                                                                           */
+/*         DD MM YYYY   Author(s)       Changes                              */
+/*         07 09 2012   100189          Initial Version                      */
+/*                                                                           */
+/*****************************************************************************/
+
+IV_API_CALL_STATUS_T enable_skipb_frames(void *codec_obj,
+                                         vid_dec_ctx_t *ps_app_ctx)
+{
+    ivd_ctl_set_config_ip_t s_ctl_ip;
+    ivd_ctl_set_config_op_t s_ctl_op;
+    IV_API_CALL_STATUS_T e_dec_status;
+
+    s_ctl_ip.u4_disp_wd = ps_app_ctx->u4_strd;
+    s_ctl_ip.e_frm_skip_mode = IVD_SKIP_B;
+
+    s_ctl_ip.e_frm_out_mode = IVD_DISPLAY_FRAME_OUT;
+    s_ctl_ip.e_vid_dec_mode = IVD_DECODE_FRAME;
+    s_ctl_ip.e_cmd = IVD_CMD_VIDEO_CTL;
+    s_ctl_ip.e_sub_cmd = IVD_CMD_CTL_SETPARAMS;
+    s_ctl_ip.u4_size = sizeof(ivd_ctl_set_config_ip_t);
+    s_ctl_op.u4_size = sizeof(ivd_ctl_set_config_op_t);
+
+    e_dec_status = ivd_api_function((iv_obj_t *)codec_obj, (void *)&s_ctl_ip,
+                                        (void *)&s_ctl_op);
+
+    if(IV_SUCCESS != e_dec_status)
+    {
+        printf("Error in Enable SkipB frames \n");
+    }
+
+    return e_dec_status;
+}
+/*****************************************************************************/
+/*                                                                           */
+/*  Function Name : disable_skipb_frames                                     */
+/*                                                                           */
+/*  Description   : Control call to disable skipping of b frames             */
+/*                                                                           */
+/*                                                                           */
+/*  Inputs        : codec_obj : Codec handle                                 */
+/*  Globals       :                                                          */
+/*  Processing    : Calls disable B frame skip control                       */
+/*                                                                           */
+/*  Outputs       :                                                          */
+/*  Returns       : Control call return status                               */
+/*                                                                           */
+/*  Issues        :                                                          */
+/*                                                                           */
+/*  Revision History:                                                        */
+/*                                                                           */
+/*         DD MM YYYY   Author(s)       Changes                              */
+/*         07 09 2012   100189          Initial Version                      */
+/*                                                                           */
+/*****************************************************************************/
+
+IV_API_CALL_STATUS_T disable_skipb_frames(void *codec_obj,
+                                          vid_dec_ctx_t *ps_app_ctx)
+{
+    ivd_ctl_set_config_ip_t s_ctl_ip;
+    ivd_ctl_set_config_op_t s_ctl_op;
+    IV_API_CALL_STATUS_T e_dec_status;
+
+    s_ctl_ip.u4_disp_wd = ps_app_ctx->u4_strd;
+    s_ctl_ip.e_frm_skip_mode = IVD_SKIP_NONE;
+
+    s_ctl_ip.e_frm_out_mode = IVD_DISPLAY_FRAME_OUT;
+    s_ctl_ip.e_vid_dec_mode = IVD_DECODE_FRAME;
+    s_ctl_ip.e_cmd = IVD_CMD_VIDEO_CTL;
+    s_ctl_ip.e_sub_cmd = IVD_CMD_CTL_SETPARAMS;
+    s_ctl_ip.u4_size = sizeof(ivd_ctl_set_config_ip_t);
+    s_ctl_op.u4_size = sizeof(ivd_ctl_set_config_op_t);
+
+    e_dec_status = ivd_api_function((iv_obj_t *)codec_obj, (void *)&s_ctl_ip,
+                                        (void *)&s_ctl_op);
+
+    if(IV_SUCCESS != e_dec_status)
+    {
+        printf("Error in Disable SkipB frames\n");
+    }
+
+    return e_dec_status;
+}
+
+/*****************************************************************************/
+/*                                                                           */
+/*  Function Name : enable_skippb_frames                                     */
+/*                                                                           */
+/*  Description   : Control call to enable skipping of P & B frames          */
+/*                                                                           */
+/*                                                                           */
+/*  Inputs        : codec_obj : Codec handle                                 */
+/*  Globals       :                                                          */
+/*  Processing    : Calls enable skip P and B frames control                 */
+/*                                                                           */
+/*  Outputs       :                                                          */
+/*  Returns       : Control call return status                               */
+/*                                                                           */
+/*  Issues        :                                                          */
+/*                                                                           */
+/*  Revision History:                                                        */
+/*                                                                           */
+/*         DD MM YYYY   Author(s)       Changes                              */
+/*         07 09 2012   100189          Initial Version                      */
+/*                                                                           */
+/*****************************************************************************/
+
+IV_API_CALL_STATUS_T enable_skippb_frames(void *codec_obj,
+                                          vid_dec_ctx_t *ps_app_ctx)
+{
+    ivd_ctl_set_config_ip_t s_ctl_ip;
+    ivd_ctl_set_config_op_t s_ctl_op;
+    IV_API_CALL_STATUS_T e_dec_status;
+
+    s_ctl_ip.u4_disp_wd = ps_app_ctx->u4_strd;
+    s_ctl_ip.e_frm_skip_mode = IVD_SKIP_PB;
+
+    s_ctl_ip.e_frm_out_mode = IVD_DISPLAY_FRAME_OUT;
+    s_ctl_ip.e_vid_dec_mode = IVD_DECODE_FRAME;
+    s_ctl_ip.e_cmd = IVD_CMD_VIDEO_CTL;
+    s_ctl_ip.e_sub_cmd = IVD_CMD_CTL_SETPARAMS;
+    s_ctl_ip.u4_size = sizeof(ivd_ctl_set_config_ip_t);
+    s_ctl_op.u4_size = sizeof(ivd_ctl_set_config_op_t);
+
+    e_dec_status = ivd_api_function((iv_obj_t *)codec_obj, (void *)&s_ctl_ip,
+                                        (void *)&s_ctl_op);
+    if(IV_SUCCESS != e_dec_status)
+    {
+        printf("Error in Enable SkipPB frames\n");
+    }
+
+    return e_dec_status;
+}
+
+/*****************************************************************************/
+/*                                                                           */
+/*  Function Name : disable_skippb_frames                                    */
+/*                                                                           */
+/*  Description   : Control call to disable skipping of P and B frames       */
+/*                                                                           */
+/*                                                                           */
+/*  Inputs        : codec_obj : Codec handle                                 */
+/*  Globals       :                                                          */
+/*  Processing    : Calls disable P and B frame skip control                 */
+/*                                                                           */
+/*  Outputs       :                                                          */
+/*  Returns       : Control call return status                               */
+/*                                                                           */
+/*  Issues        :                                                          */
+/*                                                                           */
+/*  Revision History:                                                        */
+/*                                                                           */
+/*         DD MM YYYY   Author(s)       Changes                              */
+/*         07 09 2012   100189          Initial Version                      */
+/*                                                                           */
+/*****************************************************************************/
+
+IV_API_CALL_STATUS_T disable_skippb_frames(void *codec_obj,
+                                           vid_dec_ctx_t *ps_app_ctx)
+{
+    ivd_ctl_set_config_ip_t s_ctl_ip;
+    ivd_ctl_set_config_op_t s_ctl_op;
+    IV_API_CALL_STATUS_T e_dec_status;
+
+    s_ctl_ip.u4_disp_wd = ps_app_ctx->u4_strd;
+    s_ctl_ip.e_frm_skip_mode = IVD_SKIP_NONE;
+
+    s_ctl_ip.e_frm_out_mode = IVD_DISPLAY_FRAME_OUT;
+    s_ctl_ip.e_vid_dec_mode = IVD_DECODE_FRAME;
+    s_ctl_ip.e_cmd = IVD_CMD_VIDEO_CTL;
+    s_ctl_ip.e_sub_cmd = IVD_CMD_CTL_SETPARAMS;
+    s_ctl_ip.u4_size = sizeof(ivd_ctl_set_config_ip_t);
+    s_ctl_op.u4_size = sizeof(ivd_ctl_set_config_op_t);
+
+    e_dec_status = ivd_api_function((iv_obj_t *)codec_obj, (void *)&s_ctl_ip,
+                                        (void *)&s_ctl_op);
+    if(IV_SUCCESS != e_dec_status)
+    {
+        printf("Error in Disable SkipPB frames\n");
+    }
+
+    return e_dec_status;
+}
+
+/*****************************************************************************/
+/*                                                                           */
+/*  Function Name : release_disp_frame                                       */
+/*                                                                           */
+/*  Description   : Calls release display control - Used to signal to the    */
+/*                  decoder that this particular buffer has been displayed   */
+/*                  and that the codec is now free to write to this buffer   */
+/*                                                                           */
+/*                                                                           */
+/*  Inputs        : codec_obj : Codec Handle                                 */
+/*                  buf_id    : Buffer Id of the buffer to be released       */
+/*                              This id would have been returned earlier by  */
+/*                              the codec                                    */
+/*  Globals       :                                                          */
+/*  Processing    : Calls Release Display call                               */
+/*                                                                           */
+/*  Outputs       :                                                          */
+/*  Returns       : Status of release display call                           */
+/*                                                                           */
+/*  Issues        :                                                          */
+/*                                                                           */
+/*  Revision History:                                                        */
+/*                                                                           */
+/*         DD MM YYYY   Author(s)       Changes                              */
+/*         07 09 2012   100189          Initial Version                      */
+/*                                                                           */
+/*****************************************************************************/
+
+IV_API_CALL_STATUS_T release_disp_frame(void *codec_obj, UWORD32 buf_id)
+{
+    ivd_rel_display_frame_ip_t s_video_rel_disp_ip;
+    ivd_rel_display_frame_op_t s_video_rel_disp_op;
+    IV_API_CALL_STATUS_T e_dec_status;
+
+    s_video_rel_disp_ip.e_cmd = IVD_CMD_REL_DISPLAY_FRAME;
+    s_video_rel_disp_ip.u4_size = sizeof(ivd_rel_display_frame_ip_t);
+    s_video_rel_disp_op.u4_size = sizeof(ivd_rel_display_frame_op_t);
+    s_video_rel_disp_ip.u4_disp_buf_id = buf_id;
+
+    e_dec_status = ivd_api_function((iv_obj_t *)codec_obj, (void *)&s_video_rel_disp_ip,
+                                        (void *)&s_video_rel_disp_op);
+    if(IV_SUCCESS != e_dec_status)
+    {
+        printf("Error in Release Disp frame\n");
+    }
+
+
+    return (e_dec_status);
+}
+
+/*****************************************************************************/
+/*                                                                           */
+/*  Function Name : get_version                                      */
+/*                                                                           */
+/*  Description   : Control call to get codec version              */
+/*                                                                           */
+/*                                                                           */
+/*  Inputs        : codec_obj : Codec handle                                 */
+/*  Globals       :                                                          */
+/*  Processing    : Calls enable skip B frames control                       */
+/*                                                                           */
+/*  Outputs       :                                                          */
+/*  Returns       : Control call return status                               */
+/*                                                                           */
+/*  Issues        :                                                          */
+/*                                                                           */
+/*  Revision History:                                                        */
+/*                                                                           */
+/*         DD MM YYYY   Author(s)       Changes                              */
+/*         07 09 2012   100189          Initial Version                      */
+/*                                                                           */
+/*****************************************************************************/
+
+IV_API_CALL_STATUS_T get_version(void *codec_obj)
+{
+    ivd_ctl_getversioninfo_ip_t s_ctl_dec_ip;
+    ivd_ctl_getversioninfo_op_t s_ctl_dec_op;
+    UWORD8 au1_buf[512];
+    IV_API_CALL_STATUS_T status;
+    s_ctl_dec_ip.e_cmd = IVD_CMD_VIDEO_CTL;
+    s_ctl_dec_ip.e_sub_cmd = IVD_CMD_CTL_GETVERSION;
+    s_ctl_dec_ip.u4_size = sizeof(ivd_ctl_getversioninfo_ip_t);
+    s_ctl_dec_op.u4_size = sizeof(ivd_ctl_getversioninfo_op_t);
+    s_ctl_dec_ip.pv_version_buffer = au1_buf;
+    s_ctl_dec_ip.u4_version_buffer_size = sizeof(au1_buf);
+
+    status = ivd_api_function((iv_obj_t *)codec_obj,
+                                  (void *)&(s_ctl_dec_ip),
+                                  (void *)&(s_ctl_dec_op));
+
+    if(status != IV_SUCCESS)
+    {
+        printf("Error in Getting Version number e_dec_status = %d u4_error_code = %x\n",
+               status, s_ctl_dec_op.u4_error_code);
+    }
+    else
+    {
+        printf("Ittiam Decoder Version number: %s\n",
+               (char *)s_ctl_dec_ip.pv_version_buffer);
+    }
+    return status;
+}
+/*****************************************************************************/
+/*                                                                           */
+/*  Function Name : codec_exit                                               */
+/*                                                                           */
+/*  Description   : handles unrecoverable errors                             */
+/*  Inputs        : Error message                                            */
+/*  Globals       : None                                                     */
+/*  Processing    : Prints error message to console and exits.               */
+/*  Outputs       : Error mesage to the console                              */
+/*  Returns       : None                                                     */
+/*                                                                           */
+/*  Issues        :                                                          */
+/*                                                                           */
+/*  Revision History:                                                        */
+/*                                                                           */
+/*         DD MM YYYY   Author(s)       Changes (Describe the changes made)  */
+/*         07 06 2006   Sankar          Creation                             */
+/*                                                                           */
+/*****************************************************************************/
+void codec_exit(CHAR *pc_err_message)
+{
+    printf("%s\n", pc_err_message);
+    exit(-1);
+}
+
+/*****************************************************************************/
+/*                                                                           */
+/*  Function Name : dump_output                                              */
+/*                                                                           */
+/*  Description   : Used to dump output YUV                                  */
+/*  Inputs        : App context, disp output desc, File pointer              */
+/*  Globals       : None                                                     */
+/*  Processing    : Dumps to a file                                          */
+/*  Returns       : None                                                     */
+/*                                                                           */
+/*  Issues        :                                                          */
+/*                                                                           */
+/*  Revision History:                                                        */
+/*                                                                           */
+/*         DD MM YYYY   Author(s)       Changes (Describe the changes made)  */
+/*         07 06 2006   Sankar          Creation                             */
+/*                                                                           */
+/*****************************************************************************/
+void dump_output(vid_dec_ctx_t *ps_app_ctx,
+                 iv_yuv_buf_t *ps_disp_frm_buf,
+                 UWORD32 u4_disp_frm_id,
+                 FILE *ps_op_file,
+                 FILE *ps_op_chksum_file,
+                 WORD32 i4_op_frm_ts,
+                 UWORD32 file_save,
+                 UWORD32 chksum_save)
+
+{
+
+    UWORD32 i;
+    iv_yuv_buf_t s_dump_disp_frm_buf;
+    UWORD32 u4_disp_id;
+
+    memset(&s_dump_disp_frm_buf, 0, sizeof(iv_yuv_buf_t));
+
+    if(ps_app_ctx->share_disp_buf)
+    {
+        if(ps_app_ctx->dump_q_wr_idx == MAX_DISP_BUFFERS
+                        )
+            ps_app_ctx->dump_q_wr_idx = 0;
+
+        if(ps_app_ctx->dump_q_rd_idx == MAX_DISP_BUFFERS
+                        )
+            ps_app_ctx->dump_q_rd_idx = 0;
+
+        ps_app_ctx->s_disp_frm_queue[ps_app_ctx->dump_q_wr_idx] =
+                        *ps_disp_frm_buf;
+        ps_app_ctx->s_disp_frm_id_queue[ps_app_ctx->dump_q_wr_idx] =
+                        u4_disp_frm_id;
+        ps_app_ctx->dump_q_wr_idx++;
+
+        if((WORD32)i4_op_frm_ts >= (WORD32)(ps_app_ctx->disp_delay - 1))
+        {
+            s_dump_disp_frm_buf =
+                            ps_app_ctx->s_disp_frm_queue[ps_app_ctx->dump_q_rd_idx];
+            u4_disp_id =
+                            ps_app_ctx->s_disp_frm_id_queue[ps_app_ctx->dump_q_rd_idx];
+            ps_app_ctx->dump_q_rd_idx++;
+        }
+        else
+        {
+            return;
+        }
+    }
+    else
+    {
+        s_dump_disp_frm_buf = *ps_disp_frm_buf;
+        u4_disp_id = u4_disp_frm_id;
+    }
+    if(1 == ps_app_ctx->share_disp_buf)
+        release_disp_frame(ps_app_ctx->cocodec_obj, u4_disp_id);
+
+    if(0 == file_save && 0 == chksum_save)
+        return;
+
+    if(NULL == s_dump_disp_frm_buf.pv_y_buf)
+        return;
+
+    if(ps_app_ctx->e_output_chroma_format == IV_YUV_420P)
+    {
+#if DUMP_SINGLE_BUF
+        {
+            UWORD8 *buf = s_dump_disp_frm_buf.pv_y_buf - 24 - (s_dump_disp_frm_buf.u4_y_strd * 40);
+
+            UWORD32 size = s_dump_disp_frm_buf.u4_y_strd * ((s_dump_disp_frm_buf.u4_y_ht + 80) + (s_dump_disp_frm_buf.u4_u_ht + 40));
+            fwrite(buf, 1, size, ps_op_file);
+
+        }
+#else
+        if(0 != file_save)
+        {
+            UWORD8 *buf;
+
+            buf = (UWORD8 *)s_dump_disp_frm_buf.pv_y_buf;
+            for(i = 0; i < s_dump_disp_frm_buf.u4_y_ht; i++)
+            {
+                fwrite(buf, 1, s_dump_disp_frm_buf.u4_y_wd, ps_op_file);
+                buf += s_dump_disp_frm_buf.u4_y_strd;
+            }
+
+            buf = (UWORD8 *)s_dump_disp_frm_buf.pv_u_buf;
+            for(i = 0; i < s_dump_disp_frm_buf.u4_u_ht; i++)
+            {
+                fwrite(buf, 1, s_dump_disp_frm_buf.u4_u_wd, ps_op_file);
+                buf += s_dump_disp_frm_buf.u4_u_strd;
+            }
+            buf = (UWORD8 *)s_dump_disp_frm_buf.pv_v_buf;
+            for(i = 0; i < s_dump_disp_frm_buf.u4_v_ht; i++)
+            {
+                fwrite(buf, 1, s_dump_disp_frm_buf.u4_v_wd, ps_op_file);
+                buf += s_dump_disp_frm_buf.u4_v_strd;
+            }
+
+        }
+
+        if(0 != chksum_save)
+        {
+            UWORD8 au1_y_chksum[16];
+            UWORD8 au1_u_chksum[16];
+            UWORD8 au1_v_chksum[16];
+            calc_md5_cksum((UWORD8 *)s_dump_disp_frm_buf.pv_y_buf,
+                           s_dump_disp_frm_buf.u4_y_strd,
+                           s_dump_disp_frm_buf.u4_y_wd,
+                           s_dump_disp_frm_buf.u4_y_ht,
+                           au1_y_chksum);
+            calc_md5_cksum((UWORD8 *)s_dump_disp_frm_buf.pv_u_buf,
+                           s_dump_disp_frm_buf.u4_u_strd,
+                           s_dump_disp_frm_buf.u4_u_wd,
+                           s_dump_disp_frm_buf.u4_u_ht,
+                           au1_u_chksum);
+            calc_md5_cksum((UWORD8 *)s_dump_disp_frm_buf.pv_v_buf,
+                           s_dump_disp_frm_buf.u4_v_strd,
+                           s_dump_disp_frm_buf.u4_v_wd,
+                           s_dump_disp_frm_buf.u4_v_ht,
+                           au1_v_chksum);
+
+            fwrite(au1_y_chksum, sizeof(UWORD8), 16, ps_op_chksum_file);
+            fwrite(au1_u_chksum, sizeof(UWORD8), 16, ps_op_chksum_file);
+            fwrite(au1_v_chksum, sizeof(UWORD8), 16, ps_op_chksum_file);
+        }
+#endif
+    }
+    else if((ps_app_ctx->e_output_chroma_format == IV_YUV_420SP_UV)
+                    || (ps_app_ctx->e_output_chroma_format == IV_YUV_420SP_VU))
+    {
+#if DUMP_SINGLE_BUF
+        {
+
+            UWORD8 *buf = s_dump_disp_frm_buf.pv_y_buf - 24 - (s_dump_disp_frm_buf.u4_y_strd * 40);
+
+            UWORD32 size = s_dump_disp_frm_buf.u4_y_strd * ((s_dump_disp_frm_buf.u4_y_ht + 80) + (s_dump_disp_frm_buf.u4_u_ht + 40));
+            fwrite(buf, 1, size, ps_op_file);
+        }
+#else
+        {
+            UWORD8 *buf;
+
+            buf = (UWORD8 *)s_dump_disp_frm_buf.pv_y_buf;
+            for(i = 0; i < s_dump_disp_frm_buf.u4_y_ht; i++)
+            {
+                fwrite(buf, 1, s_dump_disp_frm_buf.u4_y_wd, ps_op_file);
+                buf += s_dump_disp_frm_buf.u4_y_strd;
+            }
+
+            buf = (UWORD8 *)s_dump_disp_frm_buf.pv_u_buf;
+            for(i = 0; i < s_dump_disp_frm_buf.u4_u_ht; i++)
+            {
+                fwrite(buf, 1, s_dump_disp_frm_buf.u4_u_wd, ps_op_file);
+                buf += s_dump_disp_frm_buf.u4_u_strd;
+            }
+        }
+#endif
+    }
+    else if(ps_app_ctx->e_output_chroma_format == IV_RGBA_8888)
+    {
+        UWORD8 *buf;
+
+        buf = (UWORD8 *)s_dump_disp_frm_buf.pv_y_buf;
+        for(i = 0; i < s_dump_disp_frm_buf.u4_y_ht; i++)
+        {
+            fwrite(buf, 1, s_dump_disp_frm_buf.u4_y_wd * 4, ps_op_file);
+            buf += s_dump_disp_frm_buf.u4_y_strd * 4;
+        }
+    }
+    else
+    {
+        UWORD8 *buf;
+
+        buf = (UWORD8 *)s_dump_disp_frm_buf.pv_y_buf;
+        for(i = 0; i < s_dump_disp_frm_buf.u4_y_ht; i++)
+        {
+            fwrite(buf, 1, s_dump_disp_frm_buf.u4_y_strd * 2, ps_op_file);
+            buf += s_dump_disp_frm_buf.u4_y_strd * 2;
+        }
+    }
+
+    fflush(ps_op_file);
+    fflush(ps_op_chksum_file);
+
+}
+
+
+/*****************************************************************************/
+/*                                                                           */
+/*  Function Name : print_usage                                              */
+/*                                                                           */
+/*  Description   : Prints argument format                                   */
+/*                                                                           */
+/*                                                                           */
+/*  Inputs        :                                                          */
+/*  Globals       :                                                          */
+/*  Processing    : Prints argument format                                   */
+/*                                                                           */
+/*  Outputs       :                                                          */
+/*  Returns       :                                                          */
+/*                                                                           */
+/*  Issues        :                                                          */
+/*                                                                           */
+/*  Revision History:                                                        */
+/*                                                                           */
+/*         DD MM YYYY   Author(s)       Changes                              */
+/*         07 09 2012   100189          Initial Version                      */
+/*                                                                           */
+/*****************************************************************************/
+
+void print_usage(void)
+{
+    WORD32 i = 0;
+    WORD32 num_entries = sizeof(argument_mapping) / sizeof(argument_t);
+    printf("\nUsage:\n");
+    while(i < num_entries)
+    {
+        printf("%-32s\t %s", argument_mapping[i].argument_name,
+               argument_mapping[i].description);
+        i++;
+    }
+}
+
+/*****************************************************************************/
+/*                                                                           */
+/*  Function Name : get_argument                                             */
+/*                                                                           */
+/*  Description   : Gets argument for a given string                         */
+/*                                                                           */
+/*                                                                           */
+/*  Inputs        : name                                                     */
+/*  Globals       :                                                          */
+/*  Processing    : Searches the given string in the array and returns       */
+/*                  appropriate argument ID                                  */
+/*                                                                           */
+/*  Outputs       : Argument ID                                              */
+/*  Returns       : Argument ID                                              */
+/*                                                                           */
+/*  Issues        :                                                          */
+/*                                                                           */
+/*  Revision History:                                                        */
+/*                                                                           */
+/*         DD MM YYYY   Author(s)       Changes                              */
+/*         07 09 2012   100189          Initial Version                      */
+/*                                                                           */
+/*****************************************************************************/
+
+ARGUMENT_T get_argument(CHAR *name)
+{
+    WORD32 i = 0;
+    WORD32 num_entries = sizeof(argument_mapping) / sizeof(argument_t);
+    while(i < num_entries)
+    {
+        if((0 == strcmp(argument_mapping[i].argument_name, name)) ||
+                        ((0 == strcmp(argument_mapping[i].argument_shortname, name)) &&
+                                        (0 != strcmp(argument_mapping[i].argument_shortname, "--"))))
+        {
+            return argument_mapping[i].argument;
+        }
+        i++;
+    }
+    return INVALID;
+}
+
+/*****************************************************************************/
+/*                                                                           */
+/*  Function Name : get_argument                                             */
+/*                                                                           */
+/*  Description   : Gets argument for a given string                         */
+/*                                                                           */
+/*                                                                           */
+/*  Inputs        : name                                                     */
+/*  Globals       :                                                          */
+/*  Processing    : Searches the given string in the array and returns       */
+/*                  appropriate argument ID                                  */
+/*                                                                           */
+/*  Outputs       : Argument ID                                              */
+/*  Returns       : Argument ID                                              */
+/*                                                                           */
+/*  Issues        :                                                          */
+/*                                                                           */
+/*  Revision History:                                                        */
+/*                                                                           */
+/*         DD MM YYYY   Author(s)       Changes                              */
+/*         07 09 2012   100189          Initial Version                      */
+/*                                                                           */
+/*****************************************************************************/
+
+void parse_argument(vid_dec_ctx_t *ps_app_ctx, CHAR *argument, CHAR *value)
+{
+    ARGUMENT_T arg;
+
+    arg = get_argument(argument);
+    switch(arg)
+    {
+        case HELP:
+            print_usage();
+            exit(-1);
+        case VERSION:
+            break;
+        case INPUT_FILE:
+            sscanf(value, "%s", ps_app_ctx->ac_ip_fname);
+            //input_passed = 1;
+            break;
+
+        case OUTPUT:
+            sscanf(value, "%s", ps_app_ctx->ac_op_fname);
+            break;
+
+        case CHKSUM:
+            sscanf(value, "%s", ps_app_ctx->ac_op_chksum_fname);
+            break;
+
+        case SAVE_OUTPUT:
+            sscanf(value, "%d", &ps_app_ctx->u4_file_save_flag);
+            break;
+
+        case SAVE_CHKSUM:
+            sscanf(value, "%d", &ps_app_ctx->u4_chksum_save_flag);
+            break;
+
+        case CHROMA_FORMAT:
+            if((strcmp(value, "YUV_420P")) == 0)
+                ps_app_ctx->e_output_chroma_format = IV_YUV_420P;
+            else if((strcmp(value, "YUV_422ILE")) == 0)
+                ps_app_ctx->e_output_chroma_format = IV_YUV_422ILE;
+            else if((strcmp(value, "RGB_565")) == 0)
+                ps_app_ctx->e_output_chroma_format = IV_RGB_565;
+            else if((strcmp(value, "RGBA_8888")) == 0)
+                ps_app_ctx->e_output_chroma_format = IV_RGBA_8888;
+            else if((strcmp(value, "YUV_420SP_UV")) == 0)
+                ps_app_ctx->e_output_chroma_format = IV_YUV_420SP_UV;
+            else if((strcmp(value, "YUV_420SP_VU")) == 0)
+                ps_app_ctx->e_output_chroma_format = IV_YUV_420SP_VU;
+            else
+            {
+                printf("\nInvalid colour format setting it to IV_YUV_420P\n");
+                ps_app_ctx->e_output_chroma_format = IV_YUV_420P;
+            }
+
+            break;
+        case NUM_FRAMES:
+            sscanf(value, "%d", &ps_app_ctx->u4_max_frm_ts);
+            break;
+
+        case NUM_CORES:
+            sscanf(value, "%d", &ps_app_ctx->u4_num_cores);
+            break;
+        case DEGRADE_PICS:
+            sscanf(value, "%d", &ps_app_ctx->i4_degrade_pics);
+                ps_app_ctx->i4_degrade_pics = 0;
+                printf("degrade_pics is not supported. Setting it to zero");
+            break;
+        case DEGRADE_TYPE:
+            sscanf(value, "%d", &ps_app_ctx->i4_degrade_type);
+            break;
+        case SHARE_DISPLAY_BUF:
+            sscanf(value, "%d", &ps_app_ctx->share_disp_buf);
+            break;
+        case LOOPBACK:
+            sscanf(value, "%d", &ps_app_ctx->loopback);
+            break;
+        case DISPLAY:
+#if defined(SDL_DISPLAY) || defined(FBDEV_DISPLAY) || defined(INTEL_CE5300) || defined(IOS_DISPLAY)
+            sscanf(value, "%d", &ps_app_ctx->display);
+#else
+            ps_app_ctx->display = 0;
+#endif
+            break;
+        case FULLSCREEN:
+            sscanf(value, "%d", &ps_app_ctx->full_screen);
+            break;
+        case FPS:
+            sscanf(value, "%d", &ps_app_ctx->fps);
+            if(ps_app_ctx->fps <= 0)
+                ps_app_ctx->fps = DEFAULT_FPS;
+            break;
+        case MAX_WD:
+            sscanf(value, "%d", &ps_app_ctx->max_wd);
+            break;
+        case MAX_HT:
+            sscanf(value, "%d", &ps_app_ctx->max_ht);
+            break;
+        case MAX_LEVEL:
+            sscanf(value, "%d", &ps_app_ctx->max_level);
+            break;
+        case ARCH:
+            if((strcmp(value, "ARM_NONEON")) == 0)
+                ps_app_ctx->e_arch = ARCH_ARM_NONEON;
+            else if((strcmp(value, "ARM_A9Q")) == 0)
+                ps_app_ctx->e_arch = ARCH_ARM_A9Q;
+            else if((strcmp(value, "ARM_V8")) == 0)
+                ps_app_ctx->e_arch = ARCH_ARMV8_GENERIC;
+            else if((strcmp(value, "ARM_A7")) == 0)
+                ps_app_ctx->e_arch = ARCH_ARM_A7;
+            else if((strcmp(value, "ARM_A5")) == 0)
+                ps_app_ctx->e_arch = ARCH_ARM_A5;
+            else if((strcmp(value, "ARM_NEONINTR")) == 0)
+                ps_app_ctx->e_arch = ARCH_ARM_NEONINTR;
+            else if((strcmp(value, "X86_GENERIC")) == 0)
+                ps_app_ctx->e_arch = ARCH_X86_GENERIC;
+            else if((strcmp(value, "X86_SSSE3")) == 0)
+                ps_app_ctx->e_arch = ARCH_X86_SSSE3;
+            else if((strcmp(value, "X86_SSE42")) == 0)
+                ps_app_ctx->e_arch = ARCH_X86_SSE42;
+            else if((strcmp(value, "X86_AVX2")) == 0)
+                ps_app_ctx->e_arch = ARCH_X86_AVX2;
+            else if((strcmp(value, "MIPS_GENERIC")) == 0)
+                ps_app_ctx->e_arch = ARCH_MIPS_GENERIC;
+            else if((strcmp(value, "MIPS_32")) == 0)
+                ps_app_ctx->e_arch = ARCH_MIPS_32;
+            else
+            {
+                printf("\nInvalid Arch. Setting it to ARM_A9Q\n");
+                ps_app_ctx->e_arch = ARCH_ARM_A9Q;
+            }
+
+            break;
+        case SOC:
+            if((strcmp(value, "GENERIC")) == 0)
+                ps_app_ctx->e_soc = SOC_GENERIC;
+            else if((strcmp(value, "HISI_37X")) == 0)
+                ps_app_ctx->e_soc = SOC_HISI_37X;
+            else
+            {
+                ps_app_ctx->e_soc = atoi(value);
+/*
+                printf("\nInvalid SOC. Setting it to GENERIC\n");
+                ps_app_ctx->e_soc = SOC_GENERIC;
+*/
+            }
+            break;
+        case PICLEN:
+            sscanf(value, "%d", &ps_app_ctx->u4_piclen_flag);
+            break;
+
+        case PICLEN_FILE:
+            sscanf(value, "%s", ps_app_ctx->ac_piclen_fname);
+            break;
+
+        case INVALID:
+        default:
+            printf("Ignoring argument :  %s\n", argument);
+            break;
+    }
+}
+
+/*****************************************************************************/
+/*                                                                           */
+/*  Function Name : read_cfg_file                                            */
+/*                                                                           */
+/*  Description   : Reads arguments from a configuration file                */
+/*                                                                           */
+/*                                                                           */
+/*  Inputs        : ps_app_ctx  : Application context                        */
+/*                  fp_cfg_file : Configuration file handle                  */
+/*  Globals       :                                                          */
+/*  Processing    : Parses the arguments and fills in the application context*/
+/*                                                                           */
+/*  Outputs       : Arguments parsed                                         */
+/*  Returns       : None                                                     */
+/*                                                                           */
+/*  Issues        :                                                          */
+/*                                                                           */
+/*  Revision History:                                                        */
+/*                                                                           */
+/*         DD MM YYYY   Author(s)       Changes                              */
+/*         07 09 2012   100189          Initial Version                      */
+/*                                                                           */
+/*****************************************************************************/
+
+void read_cfg_file(vid_dec_ctx_t *ps_app_ctx, FILE *fp_cfg_file)
+{
+
+    CHAR line[STRLENGTH];
+    CHAR description[STRLENGTH];
+    CHAR value[STRLENGTH];
+    CHAR argument[STRLENGTH];
+    void *ret;
+    while(0 == feof(fp_cfg_file))
+    {
+        line[0] = '\0';
+        ret = fgets(line, STRLENGTH, fp_cfg_file);
+        if(NULL == ret)
+            break;
+        argument[0] = '\0';
+        /* Reading Input File Name */
+        sscanf(line, "%s %s %s", argument, value, description);
+        if(argument[0] == '\0')
+            continue;
+
+        parse_argument(ps_app_ctx, argument, value);
+    }
+
+
+}
+
+/*!
+**************************************************************************
+* \if Function name : dispq_producer_dequeue \endif
+*
+* \brief
+*    This function gets a free buffer index where display data can be written
+*    This is a blocking call and can be exited by setting quit to true in
+*    the application context
+*
+* \param[in]  ps_app_ctx  : Pointer to application context
+*
+* \return
+*    returns Next free buffer index for producer
+*
+* \author
+*  Ittiam
+*
+**************************************************************************
+*/
+WORD32 dispq_producer_dequeue(vid_dec_ctx_t *ps_app_ctx)
+{
+    WORD32 idx;
+
+    /* If there is no free buffer wait */
+
+    while(((ps_app_ctx->disp_q_wr_idx + 1) % NUM_DISPLAY_BUFFERS) == ps_app_ctx->disp_q_rd_idx)
+    {
+
+        ithread_msleep(1);
+
+        if(ps_app_ctx->quit)
+            return (-1);
+    }
+
+    idx = ps_app_ctx->disp_q_wr_idx;
+    return (idx);
+}
+
+/*!
+**************************************************************************
+* \if Function name : dispq_producer_queue \endif
+*
+* \brief
+*    This function adds buffer which can be displayed
+*
+* \param[in]  ps_app_ctx  : Pointer to application context
+*
+* \return
+*    returns Next free buffer index for producer
+*
+* \author
+*  Ittiam
+*
+**************************************************************************
+*/
+WORD32 dispq_producer_queue(vid_dec_ctx_t *ps_app_ctx)
+{
+    ps_app_ctx->disp_q_wr_idx++;
+    if(ps_app_ctx->disp_q_wr_idx == NUM_DISPLAY_BUFFERS)
+        ps_app_ctx->disp_q_wr_idx = 0;
+
+    return (0);
+}
+/*!
+**************************************************************************
+* \if Function name : dispq_consumer_dequeue \endif
+*
+* \brief
+*    This function gets a free buffer index where display data can be written
+*    This is a blocking call and can be exited by setting quit to true in
+*    the application context
+*
+* \param[in]  ps_app_ctx  : Pointer to application context
+*
+* \return
+*    returns Next free buffer index for producer
+*
+* \author
+*  Ittiam
+*
+**************************************************************************
+*/
+WORD32 dispq_consumer_dequeue(vid_dec_ctx_t *ps_app_ctx)
+{
+    WORD32 idx;
+
+    /* If there is no free buffer wait */
+
+    while(ps_app_ctx->disp_q_wr_idx == ps_app_ctx->disp_q_rd_idx)
+    {
+
+        ithread_msleep(1);
+
+        if(ps_app_ctx->quit)
+            return (-1);
+    }
+
+    idx = ps_app_ctx->disp_q_rd_idx;
+    return (idx);
+}
+
+/*!
+**************************************************************************
+* \if Function name : dispq_producer_queue \endif
+*
+* \brief
+*    This function adds buffer which can be displayed
+*
+* \param[in]  ps_app_ctx  : Pointer to application context
+*
+* \return
+*    returns Next free buffer index for producer
+*
+* \author
+*  Ittiam
+*
+**************************************************************************
+*/
+WORD32 dispq_consumer_queue(vid_dec_ctx_t *ps_app_ctx)
+{
+    ps_app_ctx->disp_q_rd_idx++;
+    if(ps_app_ctx->disp_q_rd_idx == NUM_DISPLAY_BUFFERS)
+        ps_app_ctx->disp_q_rd_idx = 0;
+
+    return (0);
+}
+
+/*****************************************************************************/
+/*                                                                           */
+/*  Function Name : display_thread                                           */
+/*                                                                           */
+/*  Description   : Thread to display the frame                              */
+/*                                                                           */
+/*                                                                           */
+/*  Inputs        : pv_ctx  : Application context                            */
+/*                                                                           */
+/*  Globals       :                                                          */
+/*  Processing    : Wait for a buffer to get produced by decoder and display */
+/*                  that frame                                               */
+/*                                                                           */
+/*  Outputs       :                                                          */
+/*  Returns       : None                                                     */
+/*                                                                           */
+/*  Issues        : Pause followed by quit is making some deadlock condn     */
+/*                  If decoder was lagging initially and then fasten up,     */
+/*                  display will also go at faster rate till it reaches      */
+/*                  equilibrium wrt the initial time                         */
+/*                                                                           */
+/*  Revision History:                                                        */
+/*                                                                           */
+/*         DD MM YYYY   Author(s)       Changes                              */
+/*         07 05 2013   100578          Initial Version                      */
+/*                                                                           */
+/*****************************************************************************/
+
+WORD32 display_thread(void *pv_ctx)
+{
+    vid_dec_ctx_t *ps_app_ctx = (vid_dec_ctx_t *)pv_ctx;
+
+
+    UWORD32 frm_duration; /* in us */
+    UWORD32 current_time;
+    UWORD32 expected_time;
+    TIMER   s_end_timer;
+    TIMER   s_first_frame_time;
+    UWORD32 first_frame_displayed;
+
+#ifdef WINDOWS_TIMER
+    TIMER frequency;
+#endif
+
+
+#ifdef WINDOWS_TIMER
+    QueryPerformanceFrequency(&frequency);
+#endif
+    first_frame_displayed = 0;
+    expected_time = 0;
+    frm_duration = 1000000 / ps_app_ctx->fps;
+
+    /* Init display and allocate display buffers */
+    ps_app_ctx->pv_disp_ctx = (void *)ps_app_ctx->disp_init(ps_app_ctx->u4_pic_wd,
+                                                            ps_app_ctx->u4_pic_ht,
+                                                            ps_app_ctx->i4_screen_wd,
+                                                            ps_app_ctx->i4_screen_ht,
+                                                            ps_app_ctx->max_wd,
+                                                            ps_app_ctx->max_ht,
+                                                            ps_app_ctx->full_screen,
+                                                            &ps_app_ctx->quit,
+                                                            &ps_app_ctx->paused);
+    ps_app_ctx->alloc_disp_buffers(ps_app_ctx->pv_disp_ctx);
+
+    ps_app_ctx->display_init_done = 1;
+
+    while(1)
+    {
+        WORD32 rd_idx;
+
+        rd_idx = dispq_consumer_dequeue(ps_app_ctx);
+        if(ps_app_ctx->quit)
+            break;
+
+        ps_app_ctx->display_buffer(ps_app_ctx->pv_disp_ctx, rd_idx);
+
+        if(0 == first_frame_displayed)
+        {
+            GETTIME(&s_first_frame_time);
+            first_frame_displayed = 1;
+        }
+
+        /*********************************************************************/
+        /* Sleep based on the expected time of arrival of current buffer and */
+        /* the Current frame                                                 */
+        /*********************************************************************/
+
+        GETTIME(&s_end_timer);
+        ELAPSEDTIME(s_first_frame_time, s_end_timer, current_time, frequency);
+
+        /* time in micro second */
+        expected_time += frm_duration;
+
+        //printf("current_time %d expected_time %d diff %d \n", current_time, expected_time, (expected_time - current_time));
+        /* sleep for the diff. in time */
+        if(current_time < expected_time)
+            ps_app_ctx->disp_usleep((expected_time - current_time));
+        else
+            expected_time += (current_time - expected_time);
+
+        dispq_consumer_queue(ps_app_ctx);
+
+    }
+
+
+    while(0 == ps_app_ctx->display_deinit_flag)
+    {
+        ps_app_ctx->disp_usleep(1000);
+    }
+    ps_app_ctx->disp_deinit(ps_app_ctx->pv_disp_ctx);
+
+    /* destroy the display thread */
+    ithread_exit(ps_app_ctx->display_thread_handle);
+
+    return 0;
+}
+
+void flush_output(iv_obj_t *codec_obj,
+                  vid_dec_ctx_t *ps_app_ctx,
+                  ivd_out_bufdesc_t *ps_out_buf,
+                  UWORD8 *pu1_bs_buf,
+                  UWORD32 *pu4_op_frm_ts,
+                  FILE *ps_op_file,
+                  FILE *ps_op_chksum_file,
+                  UWORD32 u4_ip_frm_ts,
+                  UWORD32 u4_bytes_remaining)
+{
+    WORD32 ret;
+
+    do
+    {
+
+        ivd_ctl_flush_ip_t s_ctl_ip;
+        ivd_ctl_flush_op_t s_ctl_op;
+
+        if(*pu4_op_frm_ts >= (ps_app_ctx->u4_max_frm_ts + ps_app_ctx->disp_delay))
+            break;
+
+        s_ctl_ip.e_cmd = IVD_CMD_VIDEO_CTL;
+        s_ctl_ip.e_sub_cmd = IVD_CMD_CTL_FLUSH;
+        s_ctl_ip.u4_size = sizeof(ivd_ctl_flush_ip_t);
+        s_ctl_op.u4_size = sizeof(ivd_ctl_flush_op_t);
+        ret = ivd_api_function((iv_obj_t *)codec_obj, (void *)&s_ctl_ip,
+                                   (void *)&s_ctl_op);
+
+        if(ret != IV_SUCCESS)
+        {
+            printf("Error in Setting the decoder in flush mode\n");
+        }
+
+        if(IV_SUCCESS == ret)
+        {
+            ivd_video_decode_ip_t s_video_decode_ip;
+            ivd_video_decode_op_t s_video_decode_op;
+
+            s_video_decode_ip.e_cmd = IVD_CMD_VIDEO_DECODE;
+            s_video_decode_ip.u4_ts = u4_ip_frm_ts;
+            s_video_decode_ip.pv_stream_buffer = pu1_bs_buf;
+            s_video_decode_ip.u4_num_Bytes = u4_bytes_remaining;
+            s_video_decode_ip.u4_size = sizeof(ivd_video_decode_ip_t);
+            s_video_decode_ip.s_out_buffer.u4_min_out_buf_size[0] =
+                            ps_out_buf->u4_min_out_buf_size[0];
+            s_video_decode_ip.s_out_buffer.u4_min_out_buf_size[1] =
+                            ps_out_buf->u4_min_out_buf_size[1];
+            s_video_decode_ip.s_out_buffer.u4_min_out_buf_size[2] =
+                            ps_out_buf->u4_min_out_buf_size[2];
+
+            s_video_decode_ip.s_out_buffer.pu1_bufs[0] =
+                            ps_out_buf->pu1_bufs[0];
+            s_video_decode_ip.s_out_buffer.pu1_bufs[1] =
+                            ps_out_buf->pu1_bufs[1];
+            s_video_decode_ip.s_out_buffer.pu1_bufs[2] =
+                            ps_out_buf->pu1_bufs[2];
+            s_video_decode_ip.s_out_buffer.u4_num_bufs =
+                            ps_out_buf->u4_num_bufs;
+
+            s_video_decode_op.u4_size = sizeof(ivd_video_decode_op_t);
+
+            /*****************************************************************************/
+            /*   API Call: Video Decode                                                  */
+            /*****************************************************************************/
+            ret = ivd_api_function((iv_obj_t *)codec_obj, (void *)&s_video_decode_ip,
+                                       (void *)&s_video_decode_op);
+
+            if(1 == s_video_decode_op.u4_output_present)
+            {
+                dump_output(ps_app_ctx, &(s_video_decode_op.s_disp_frm_buf),
+                            s_video_decode_op.u4_disp_buf_id, ps_op_file,
+                            ps_op_chksum_file,
+                            *pu4_op_frm_ts, ps_app_ctx->u4_file_save_flag,
+                            ps_app_ctx->u4_chksum_save_flag);
+
+                (*pu4_op_frm_ts)++;
+            }
+        }
+    }while(IV_SUCCESS == ret);
+
+}
+
+#ifdef X86_MINGW
+void sigsegv_handler()
+{
+    printf("Segmentation fault, Exiting.. \n");
+    exit(-1);
+}
+#endif
+
+UWORD32 default_get_stride(void)
+{
+    return 0;
+}
+
+
+IV_COLOR_FORMAT_T default_get_color_fmt(void)
+{
+    return IV_YUV_420P;
+}
+/*****************************************************************************/
+/*                                                                           */
+/*  Function Name : main                                                     */
+/*                                                                           */
+/*  Description   : Application to demonstrate codec API                     */
+/*                                                                           */
+/*                                                                           */
+/*  Inputs        : argc    - Number of arguments                            */
+/*                  argv[]  - Arguments                                      */
+/*  Globals       :                                                          */
+/*  Processing    : Shows how to use create, process, control and delete     */
+/*                                                                           */
+/*  Outputs       : Codec output in a file                                   */
+/*  Returns       :                                                          */
+/*                                                                           */
+/*  Issues        : Assumes both PROFILE_ENABLE to be                        */
+/*                  defined for multithread decode-display working           */
+/*                                                                           */
+/*  Revision History:                                                        */
+/*                                                                           */
+/*         DD MM YYYY   Author(s)       Changes                              */
+/*         07 09 2012   100189          Initial Version                      */
+/*         09 05 2013   100578          Multithread decode-display           */
+/*****************************************************************************/
+#ifdef IOS
+int vdec_main(char *homedir, char *documentdir, int screen_wd, int screen_ht)
+#else
+int main(WORD32 argc, CHAR *argv[])
+#endif
+{
+    CHAR ac_cfg_fname[STRLENGTH];
+    FILE *fp_cfg_file = NULL;
+    FILE *ps_piclen_file = NULL;
+    FILE *ps_ip_file = NULL;
+    FILE *ps_op_file = NULL;
+    FILE *ps_op_chksum_file = NULL;
+    WORD32 ret;
+    CHAR ac_error_str[STRLENGTH];
+    vid_dec_ctx_t s_app_ctx;
+    UWORD8 *pu1_bs_buf;
+
+    ivd_out_bufdesc_t *ps_out_buf;
+    UWORD32 u4_num_bytes_dec = 0;
+    UWORD32 file_pos = 0;
+    IV_API_CALL_STATUS_T e_dec_status;
+    UWORD32 u4_ip_frm_ts = 0, u4_op_frm_ts = 0;
+
+    WORD32 u4_bytes_remaining = 0;
+    void *pv_mem_rec_location;
+    UWORD32 u4_num_mem_recs;
+    UWORD32 i;
+    UWORD32 u4_ip_buf_len;
+    UWORD32 frm_cnt = 0;
+    WORD32 total_bytes_comsumed;
+
+#ifdef PROFILE_ENABLE
+    UWORD32 u4_tot_cycles = 0;
+    UWORD32 u4_tot_fmt_cycles = 0;
+    UWORD32 peak_window[PEAK_WINDOW_SIZE];
+    UWORD32 peak_window_idx = 0;
+    UWORD32 peak_avg_max = 0;
+#ifdef INTEL_CE5300
+    UWORD32 time_consumed = 0;
+    UWORD32 bytes_consumed = 0;
+#endif
+#endif
+#ifdef WINDOWS_TIMER
+    TIMER frequency;
+#endif
+    WORD32 width = 0, height = 0;
+    iv_obj_t *codec_obj;
+#if defined(GPU_BUILD) && !defined(X86)
+//    int ioctl_init();
+//    ioctl_init();
+#endif
+
+#ifdef X86_MINGW
+    //For getting printfs without any delay
+    setvbuf(stdout, NULL, _IONBF, 0);
+    setvbuf(stderr, NULL, _IONBF, 0);
+#endif
+#ifdef IOS
+    sprintf(filename_trace, "%s/iostrace.txt", homedir);
+    printf("\ntrace file name = %s", filename_trace);
+#endif
+
+#ifdef X86_MINGW
+    {
+        signal(SIGSEGV, sigsegv_handler);
+    }
+#endif
+
+
+#ifndef IOS
+    /* Usage */
+    if(argc < 2)
+    {
+        printf("Using test.cfg as configuration file \n");
+        strcpy(ac_cfg_fname, "test.cfg");
+    }
+    else if(argc == 2)
+    {
+        strcpy(ac_cfg_fname, argv[1]);
+    }
+
+#else
+    strcpy(ac_cfg_fname, "test.cfg");
+
+#endif
+
+
+    /***********************************************************************/
+    /*                  Initialize Application parameters                  */
+    /***********************************************************************/
+
+    strcpy(s_app_ctx.ac_ip_fname, "\0");
+    s_app_ctx.dump_q_wr_idx = 0;
+    s_app_ctx.dump_q_rd_idx = 0;
+    s_app_ctx.display_thread_created = 0;
+    s_app_ctx.disp_q_wr_idx = 0;
+    s_app_ctx.disp_q_rd_idx = 0;
+    s_app_ctx.disp_delay = 0;
+    s_app_ctx.loopback = 0;
+    s_app_ctx.display = 0;
+    s_app_ctx.full_screen = 0;
+    s_app_ctx.u4_piclen_flag = 0;
+    s_app_ctx.fps = DEFAULT_FPS;
+    file_pos = 0;
+    total_bytes_comsumed = 0;
+    u4_ip_frm_ts = 0;
+    u4_op_frm_ts = 0;
+#ifdef PROFILE_ENABLE
+    memset(peak_window, 0, sizeof(WORD32) * PEAK_WINDOW_SIZE);
+#endif
+    s_app_ctx.share_disp_buf = DEFAULT_SHARE_DISPLAY_BUF;
+    s_app_ctx.u4_num_cores = DEFAULT_NUM_CORES;
+    s_app_ctx.i4_degrade_type = 0;
+    s_app_ctx.i4_degrade_pics = 0;
+    s_app_ctx.max_wd = 0;
+    s_app_ctx.max_ht = 0;
+    s_app_ctx.max_level = 0;
+    s_app_ctx.e_arch = ARCH_ARM_A9Q;
+    s_app_ctx.e_soc = SOC_GENERIC;
+
+    s_app_ctx.u4_strd = STRIDE;
+
+    s_app_ctx.display_thread_handle           = malloc(ithread_get_handle_size());
+    s_app_ctx.quit          = 0;
+    s_app_ctx.paused        = 0;
+    //s_app_ctx.u4_output_present = 0;
+
+    s_app_ctx.get_stride = &default_get_stride;
+
+    s_app_ctx.get_color_fmt = &default_get_color_fmt;
+
+    /* Set function pointers for display */
+#ifdef SDL_DISPLAY
+    s_app_ctx.disp_init = &sdl_disp_init;
+    s_app_ctx.alloc_disp_buffers = &sdl_alloc_disp_buffers;
+    s_app_ctx.display_buffer = &sdl_display;
+    s_app_ctx.set_disp_buffers = &sdl_set_disp_buffers;
+    s_app_ctx.disp_deinit = &sdl_disp_deinit;
+    s_app_ctx.disp_usleep = &sdl_disp_usleep;
+    s_app_ctx.get_color_fmt = &sdl_get_color_fmt;
+    s_app_ctx.get_stride = &sdl_get_stride;
+#endif
+
+#ifdef FBDEV_DISPLAY
+    s_app_ctx.disp_init = &fbd_disp_init;
+    s_app_ctx.alloc_disp_buffers = &fbd_alloc_disp_buffers;
+    s_app_ctx.display_buffer = &fbd_display;
+    s_app_ctx.set_disp_buffers = &fbd_set_disp_buffers;
+    s_app_ctx.disp_deinit = &fbd_disp_deinit;
+    s_app_ctx.disp_usleep = &fbd_disp_usleep;
+    s_app_ctx.get_color_fmt = &fbd_get_color_fmt;
+    s_app_ctx.get_stride = &fbd_get_stride;
+#endif
+
+#ifdef INTEL_CE5300
+    s_app_ctx.disp_init = &gdl_disp_init;
+    s_app_ctx.alloc_disp_buffers = &gdl_alloc_disp_buffers;
+    s_app_ctx.display_buffer = &gdl_display;
+    s_app_ctx.set_disp_buffers = &gdl_set_disp_buffers;
+    s_app_ctx.disp_deinit = &gdl_disp_deinit;
+    s_app_ctx.disp_usleep = &gdl_disp_usleep;
+    s_app_ctx.get_color_fmt = &gdl_get_color_fmt;
+    s_app_ctx.get_stride = &gdl_get_stride;
+#endif
+
+#ifdef IOS_DISPLAY
+    s_app_ctx.disp_init = &ios_disp_init;
+    s_app_ctx.alloc_disp_buffers = &ios_alloc_disp_buffers;
+    s_app_ctx.display_buffer = &ios_display;
+    s_app_ctx.set_disp_buffers = &ios_set_disp_buffers;
+    s_app_ctx.disp_deinit = &ios_disp_deinit;
+    s_app_ctx.disp_usleep = &ios_disp_usleep;
+    s_app_ctx.get_color_fmt = &ios_get_color_fmt;
+    s_app_ctx.get_stride = &ios_get_stride;
+#endif
+
+    s_app_ctx.display_deinit_flag = 0;
+    s_app_ctx.e_output_chroma_format = IV_YUV_420SP_UV;
+    /*************************************************************************/
+    /* Parse arguments                                                       */
+    /*************************************************************************/
+
+#ifndef IOS
+    /* Read command line arguments */
+    if(argc > 2)
+    {
+        for(i = 1; i < (UWORD32)argc; i += 2)
+        {
+            if(CONFIG == get_argument(argv[i]))
+            {
+                strcpy(ac_cfg_fname, argv[i + 1]);
+                if((fp_cfg_file = fopen(ac_cfg_fname, "r")) == NULL)
+                {
+                    sprintf(ac_error_str, "Could not open Configuration file %s",
+                            ac_cfg_fname);
+                    codec_exit(ac_error_str);
+                }
+                read_cfg_file(&s_app_ctx, fp_cfg_file);
+                fclose(fp_cfg_file);
+            }
+            else
+            {
+                parse_argument(&s_app_ctx, argv[i], argv[i + 1]);
+            }
+        }
+    }
+    else
+    {
+        if((fp_cfg_file = fopen(ac_cfg_fname, "r")) == NULL)
+        {
+            sprintf(ac_error_str, "Could not open Configuration file %s",
+                    ac_cfg_fname);
+            codec_exit(ac_error_str);
+        }
+        read_cfg_file(&s_app_ctx, fp_cfg_file);
+        fclose(fp_cfg_file);
+    }
+#else
+    sprintf(filename_with_path, "%s/%s", homedir, ac_cfg_fname);
+    if((fp_cfg_file = fopen(filename_with_path, "r")) == NULL)
+    {
+        sprintf(ac_error_str, "Could not open Configuration file %s",
+                ac_cfg_fname);
+        codec_exit(ac_error_str);
+
+    }
+    read_cfg_file(&s_app_ctx, fp_cfg_file);
+    fclose(fp_cfg_file);
+
+#endif
+#ifdef PRINT_PICSIZE
+    /* If the binary is used for only getting number of bytes in each picture, then disable the following features */
+    s_app_ctx.u4_piclen_flag = 0;
+    s_app_ctx.u4_file_save_flag = 0;
+    s_app_ctx.u4_chksum_save_flag = 0;
+    s_app_ctx.i4_degrade_pics = 0;
+    s_app_ctx.i4_degrade_type = 0;
+    s_app_ctx.loopback = 0;
+    s_app_ctx.share_disp_buf = 0;
+    s_app_ctx.display = 0;
+#endif
+
+    /* If display is enabled, then turn off shared mode and get color format that is supported by display */
+    if(1 == s_app_ctx.display)
+    {
+        s_app_ctx.share_disp_buf = 0;
+        s_app_ctx.e_output_chroma_format = s_app_ctx.get_color_fmt();
+    }
+    if(strcmp(s_app_ctx.ac_ip_fname, "\0") == 0)
+    {
+        printf("\nNo input file given for decoding\n");
+        exit(-1);
+    }
+
+
+    /***********************************************************************/
+    /*          create the file object for input file                      */
+    /***********************************************************************/
+#ifdef IOS
+    sprintf(filename_with_path, "%s/%s", homedir, s_app_ctx.ac_ip_fname);
+    ps_ip_file = fopen(filename_with_path, "rb");
+#else
+    ps_ip_file = fopen(s_app_ctx.ac_ip_fname, "rb");
+#endif
+    if(NULL == ps_ip_file)
+    {
+        sprintf(ac_error_str, "Could not open input file %s",
+                s_app_ctx.ac_ip_fname);
+        codec_exit(ac_error_str);
+    }
+    /***********************************************************************/
+    /*          create the file object for input file                      */
+    /***********************************************************************/
+    if(1 == s_app_ctx.u4_piclen_flag)
+    {
+#ifdef IOS
+        sprintf(filename_with_path, "%s/%s", homedir, s_app_ctx.ac_piclen_fname);
+        ps_piclen_file = fopen(filename_with_path, "rb");
+#else
+        ps_piclen_file = fopen(s_app_ctx.ac_piclen_fname, "rb");
+#endif
+        if(NULL == ps_piclen_file)
+        {
+            sprintf(ac_error_str, "Could not open piclen file %s",
+                    s_app_ctx.ac_piclen_fname);
+            codec_exit(ac_error_str);
+        }
+    }
+
+    /***********************************************************************/
+    /*          create the file object for output file                     */
+    /***********************************************************************/
+    if(1 == s_app_ctx.u4_file_save_flag)
+    {
+#ifdef IOS
+        sprintf(filename_with_path, "%s/%s", documentdir, s_app_ctx.ac_op_fname);
+        ps_op_file = fopen(filename_with_path, "wb");
+#else
+        ps_op_file = fopen(s_app_ctx.ac_op_fname, "wb");
+#endif
+
+        if(NULL == ps_op_file)
+        {
+            sprintf(ac_error_str, "Could not open output file %s",
+                    s_app_ctx.ac_op_fname);
+            codec_exit(ac_error_str);
+        }
+    }
+
+    /***********************************************************************/
+    /*          create the file object for check sum file                  */
+    /***********************************************************************/
+    if(1 == s_app_ctx.u4_chksum_save_flag)
+    {
+#if IOS
+        sprintf(filename_with_path, "%s/%s", documentdir, s_app_ctx.ac_op_chksum_fname);
+        ps_op_chksum_file = fopen(filename_with_path, "wb");
+#else
+        ps_op_chksum_file = fopen(s_app_ctx.ac_op_chksum_fname, "wb");
+#endif
+        if(NULL == ps_op_chksum_file)
+        {
+            sprintf(ac_error_str, "Could not open check sum file %s",
+                    s_app_ctx.ac_op_chksum_fname);
+            codec_exit(ac_error_str);
+        }
+    }
+    /***********************************************************************/
+    /*                      Create decoder instance                        */
+    /***********************************************************************/
+    {
+
+        ps_out_buf = (ivd_out_bufdesc_t *)malloc(sizeof(ivd_out_bufdesc_t));
+
+        {
+            iv_num_mem_rec_ip_t s_no_of_mem_rec_query_ip;
+            iv_num_mem_rec_op_t s_no_of_mem_rec_query_op;
+
+            s_no_of_mem_rec_query_ip.u4_size = sizeof(s_no_of_mem_rec_query_ip);
+            s_no_of_mem_rec_query_op.u4_size = sizeof(s_no_of_mem_rec_query_op);
+            s_no_of_mem_rec_query_ip.e_cmd = IV_CMD_GET_NUM_MEM_REC;
+
+            /*****************************************************************************/
+            /*   API Call: Get Number of Mem Records                                     */
+            /*****************************************************************************/
+            e_dec_status = ivd_api_function(
+                            NULL, (void *)&s_no_of_mem_rec_query_ip,
+                            (void *)&s_no_of_mem_rec_query_op);
+            if(IV_SUCCESS != e_dec_status)
+            {
+                sprintf(ac_error_str, "Error in get mem records");
+                codec_exit(ac_error_str);
+            }
+
+            u4_num_mem_recs = s_no_of_mem_rec_query_op.u4_num_mem_rec;
+        }
+
+        pv_mem_rec_location = malloc(u4_num_mem_recs * sizeof(iv_mem_rec_t));
+        if(pv_mem_rec_location == NULL)
+        {
+            sprintf(ac_error_str, "Allocation failure for mem_rec_location");
+            codec_exit(ac_error_str);
+
+        }
+
+        {
+            impeg2d_fill_mem_rec_ip_t s_fill_mem_rec_ip;
+            impeg2d_fill_mem_rec_op_t s_fill_mem_rec_op;
+            iv_mem_rec_t *ps_mem_rec;
+            UWORD32 total_size;
+
+            s_fill_mem_rec_ip.s_ivd_fill_mem_rec_ip_t.e_cmd =
+                            IV_CMD_FILL_NUM_MEM_REC;
+            s_fill_mem_rec_ip.s_ivd_fill_mem_rec_ip_t.pv_mem_rec_location =
+                            (iv_mem_rec_t *)pv_mem_rec_location;
+            s_fill_mem_rec_ip.s_ivd_fill_mem_rec_ip_t.u4_max_frm_wd =
+                            (s_app_ctx.max_wd == 0) ? MAX_FRAME_WIDTH : s_app_ctx.max_wd;
+            s_fill_mem_rec_ip.s_ivd_fill_mem_rec_ip_t.u4_max_frm_ht =
+                            (s_app_ctx.max_ht == 0) ? MAX_FRAME_HEIGHT : s_app_ctx.max_ht;
+            s_fill_mem_rec_ip.u4_share_disp_buf = s_app_ctx.share_disp_buf;
+            s_fill_mem_rec_ip.e_output_format =
+                            (IV_COLOR_FORMAT_T)s_app_ctx.e_output_chroma_format;
+
+            s_fill_mem_rec_ip.s_ivd_fill_mem_rec_ip_t.u4_size =
+                            sizeof(impeg2d_fill_mem_rec_ip_t);
+            s_fill_mem_rec_op.s_ivd_fill_mem_rec_op_t.u4_size =
+                            sizeof(impeg2d_fill_mem_rec_op_t);
+
+            ps_mem_rec = (iv_mem_rec_t *)pv_mem_rec_location;
+            for(i = 0; i < u4_num_mem_recs; i++)
+                ps_mem_rec[i].u4_size = sizeof(iv_mem_rec_t);
+
+            /*****************************************************************************/
+            /*   API Call: Fill Mem Records                                     */
+            /*****************************************************************************/
+
+            e_dec_status = ivd_api_function(NULL,
+                                                (void *)&s_fill_mem_rec_ip,
+                                                (void *)&s_fill_mem_rec_op);
+
+            u4_num_mem_recs =
+                            s_fill_mem_rec_op.s_ivd_fill_mem_rec_op_t.u4_num_mem_rec_filled;
+
+            if(IV_SUCCESS != e_dec_status)
+            {
+                sprintf(ac_error_str, "Error in fill mem records: %x", s_fill_mem_rec_op.s_ivd_fill_mem_rec_op_t.u4_error_code);
+                codec_exit(ac_error_str);
+            }
+
+            ps_mem_rec = (iv_mem_rec_t *)pv_mem_rec_location;
+            total_size = 0;
+            for(i = 0; i < u4_num_mem_recs; i++)
+            {
+                ps_mem_rec->pv_base = app_aligned_malloc(ps_mem_rec->u4_mem_alignment,
+                                                            ps_mem_rec->u4_mem_size);
+                if(ps_mem_rec->pv_base == NULL)
+                {
+                    sprintf(ac_error_str,
+                            "\nAllocation failure for mem record id %d size %d\n",
+                            i, ps_mem_rec->u4_mem_size);
+                    codec_exit(ac_error_str);
+
+                }
+                total_size += ps_mem_rec->u4_mem_size;
+
+                ps_mem_rec++;
+            }
+            //printf("\nTotal memory for codec %d\n", total_size);
+        }
+        /*****************************************************************************/
+        /*   API Call: Initialize the Decoder                                        */
+        /*****************************************************************************/
+        {
+            impeg2d_init_ip_t s_init_ip;
+            impeg2d_init_op_t s_init_op;
+            void *fxns = &ivd_api_function;
+            iv_mem_rec_t *mem_tab;
+
+            mem_tab = (iv_mem_rec_t *)pv_mem_rec_location;
+            s_init_ip.s_ivd_init_ip_t.e_cmd = (IVD_API_COMMAND_TYPE_T)IV_CMD_INIT;
+            s_init_ip.s_ivd_init_ip_t.pv_mem_rec_location = mem_tab;
+            s_init_ip.s_ivd_init_ip_t.u4_frm_max_wd = (s_app_ctx.max_wd == 0) ? MAX_FRAME_WIDTH : s_app_ctx.max_wd;
+            s_init_ip.s_ivd_init_ip_t.u4_frm_max_ht = (s_app_ctx.max_ht == 0) ? MAX_FRAME_HEIGHT : s_app_ctx.max_ht;
+
+            s_init_ip.u4_share_disp_buf = s_app_ctx.share_disp_buf;
+
+            s_init_ip.s_ivd_init_ip_t.u4_num_mem_rec = u4_num_mem_recs;
+            s_init_ip.s_ivd_init_ip_t.e_output_format =
+                            (IV_COLOR_FORMAT_T)s_app_ctx.e_output_chroma_format;
+            s_init_ip.s_ivd_init_ip_t.u4_size = sizeof(impeg2d_init_ip_t);
+            s_init_op.s_ivd_init_op_t.u4_size = sizeof(impeg2d_init_op_t);
+
+            codec_obj = (iv_obj_t *)mem_tab[0].pv_base;
+            codec_obj->pv_fxns = fxns;
+            codec_obj->u4_size = sizeof(iv_obj_t);
+
+            s_app_ctx.cocodec_obj = codec_obj;
+
+            ret = ivd_api_function((iv_obj_t *)codec_obj, (void *)&s_init_ip,
+                                       (void *)&s_init_op);
+            if(ret != IV_SUCCESS)
+            {
+                sprintf(ac_error_str, "Error in Init %8x\n",
+                        s_init_op.s_ivd_init_op_t.u4_error_code);
+                codec_exit(ac_error_str);
+            }
+
+            /*****************************************************************************/
+            /*  Input and output buffer allocation                                       */
+            /*****************************************************************************/
+            {
+
+                ivd_ctl_getbufinfo_ip_t s_ctl_ip;
+                ivd_ctl_getbufinfo_op_t s_ctl_op;
+
+                s_ctl_ip.e_cmd = IVD_CMD_VIDEO_CTL;
+                s_ctl_ip.e_sub_cmd = IVD_CMD_CTL_GETBUFINFO;
+                s_ctl_ip.u4_size = sizeof(ivd_ctl_getbufinfo_ip_t);
+                s_ctl_op.u4_size = sizeof(ivd_ctl_getbufinfo_op_t);
+                ret = ivd_api_function((iv_obj_t *)codec_obj, (void *)&s_ctl_ip,
+                                           (void *)&s_ctl_op);
+                if(ret != IV_SUCCESS)
+                {
+                    sprintf(ac_error_str, "Error in Get Buf Info %x", s_ctl_op.u4_error_code);
+                    codec_exit(ac_error_str);
+                }
+
+                /* Allocate input buffer */
+                u4_ip_buf_len = s_ctl_op.u4_min_in_buf_size[0];
+                pu1_bs_buf = (UWORD8 *)malloc(u4_ip_buf_len);
+
+                if(pu1_bs_buf == NULL)
+                {
+                    sprintf(ac_error_str,
+                            "\nAllocation failure for input buffer of size %d",
+                            u4_ip_buf_len);
+                    codec_exit(ac_error_str);
+                }
+                s_app_ctx.num_disp_buf = s_ctl_op.u4_num_disp_bufs;
+                /* Allocate output buffer only if display buffers are not shared */
+                /* Or if shared and output is 420P */
+                if((0 == s_app_ctx.share_disp_buf) || (IV_YUV_420P == s_app_ctx.e_output_chroma_format))
+                {
+                    UWORD32 outlen;
+                    ps_out_buf->u4_min_out_buf_size[0] =
+                                    s_ctl_op.u4_min_out_buf_size[0];
+                    ps_out_buf->u4_min_out_buf_size[1] =
+                                    s_ctl_op.u4_min_out_buf_size[1];
+                    ps_out_buf->u4_min_out_buf_size[2] =
+                                    s_ctl_op.u4_min_out_buf_size[2];
+
+                    outlen = s_ctl_op.u4_min_out_buf_size[0];
+                    if(s_ctl_op.u4_min_num_out_bufs > 1)
+                        outlen += s_ctl_op.u4_min_out_buf_size[1];
+
+                    if(s_ctl_op.u4_min_num_out_bufs > 2)
+                        outlen += s_ctl_op.u4_min_out_buf_size[2];
+
+                    ps_out_buf->pu1_bufs[0] = (UWORD8 *)malloc(outlen);
+                    if(ps_out_buf->pu1_bufs[0] == NULL)
+                    {
+                        sprintf(ac_error_str,
+                                "\nAllocation failure for output buffer of size %d",
+                                outlen);
+                        codec_exit(ac_error_str);
+                    }
+
+                    if(s_ctl_op.u4_min_num_out_bufs > 1)
+                        ps_out_buf->pu1_bufs[1] = ps_out_buf->pu1_bufs[0]
+                                        + (s_ctl_op.u4_min_out_buf_size[0]);
+
+                    if(s_ctl_op.u4_min_num_out_bufs > 2)
+                        ps_out_buf->pu1_bufs[2] = ps_out_buf->pu1_bufs[1]
+                                        + (s_ctl_op.u4_min_out_buf_size[1]);
+
+                    ps_out_buf->u4_num_bufs = s_ctl_op.u4_min_num_out_bufs;
+                }
+
+            }
+        }
+
+    }
+
+
+    /*************************************************************************/
+    /* set num of cores                                                      */
+    /*************************************************************************/
+    {
+
+        impeg2d_ctl_set_num_cores_ip_t s_ctl_set_cores_ip;
+        impeg2d_ctl_set_num_cores_op_t s_ctl_set_cores_op;
+
+        s_ctl_set_cores_ip.e_cmd = IVD_CMD_VIDEO_CTL;
+        s_ctl_set_cores_ip.e_sub_cmd = (IVD_CONTROL_API_COMMAND_TYPE_T)IMPEG2D_CMD_CTL_SET_NUM_CORES;
+        s_ctl_set_cores_ip.u4_num_cores = s_app_ctx.u4_num_cores;
+        s_ctl_set_cores_ip.u4_size = sizeof(impeg2d_ctl_set_num_cores_ip_t);
+        s_ctl_set_cores_op.u4_size = sizeof(impeg2d_ctl_set_num_cores_op_t);
+
+        ret = ivd_api_function((iv_obj_t *)codec_obj, (void *)&s_ctl_set_cores_ip,
+                                   (void *)&s_ctl_set_cores_op);
+        if(ret != IV_SUCCESS)
+        {
+            sprintf(ac_error_str, "\nError in setting number of cores");
+            codec_exit(ac_error_str);
+        }
+
+    }
+    /*************************************************************************/
+    /* set processsor                                                        */
+    /*************************************************************************/
+
+    {
+
+        impeg2d_ctl_set_processor_ip_t s_ctl_set_num_processor_ip;
+        impeg2d_ctl_set_processor_op_t s_ctl_set_num_processor_op;
+
+        s_ctl_set_num_processor_ip.e_cmd = IVD_CMD_VIDEO_CTL;
+        s_ctl_set_num_processor_ip.e_sub_cmd = (IVD_CONTROL_API_COMMAND_TYPE_T)IMPEG2D_CMD_CTL_SET_PROCESSOR;
+        s_ctl_set_num_processor_ip.u4_arch = s_app_ctx.e_arch;
+        s_ctl_set_num_processor_ip.u4_soc = s_app_ctx.e_soc;
+        s_ctl_set_num_processor_ip.u4_size = sizeof(impeg2d_ctl_set_processor_ip_t);
+        s_ctl_set_num_processor_op.u4_size = sizeof(impeg2d_ctl_set_processor_op_t);
+
+        ret = ivd_api_function((iv_obj_t *)codec_obj, (void *)&s_ctl_set_num_processor_ip,
+                                   (void *)&s_ctl_set_num_processor_op);
+        if(ret != IV_SUCCESS)
+        {
+            sprintf(ac_error_str, "\nError in setting Processor type");
+            codec_exit(ac_error_str);
+        }
+
+    }
+
+
+    /*****************************************************************************/
+    /*   Decode header to get width and height and buffer sizes                  */
+    /*****************************************************************************/
+    {
+
+        ivd_ctl_set_config_ip_t s_ctl_ip;
+        ivd_ctl_set_config_op_t s_ctl_op;
+
+        ivd_video_decode_ip_t s_video_decode_ip;
+        ivd_video_decode_op_t s_video_decode_op;
+
+        s_ctl_ip.u4_disp_wd = STRIDE;
+        if(1 == s_app_ctx.display)
+            s_ctl_ip.u4_disp_wd = s_app_ctx.get_stride();
+
+        s_ctl_ip.e_frm_skip_mode = IVD_SKIP_NONE;
+        s_ctl_ip.e_frm_out_mode = IVD_DISPLAY_FRAME_OUT;
+        s_ctl_ip.e_vid_dec_mode = IVD_DECODE_HEADER;
+        s_ctl_ip.e_cmd = IVD_CMD_VIDEO_CTL;
+        s_ctl_ip.e_sub_cmd = IVD_CMD_CTL_SETPARAMS;
+        s_ctl_ip.u4_size = sizeof(ivd_ctl_set_config_ip_t);
+        s_ctl_op.u4_size = sizeof(ivd_ctl_set_config_op_t);
+
+        ret = ivd_api_function((iv_obj_t *)codec_obj, (void *)&s_ctl_ip,
+                                   (void *)&s_ctl_op);
+        if(ret != IV_SUCCESS)
+        {
+            sprintf(ac_error_str,
+                    "\nError in setting the codec in header decode mode");
+            codec_exit(ac_error_str);
+        }
+
+        do
+        {
+            WORD32 numbytes;
+            if(0 == s_app_ctx.u4_piclen_flag)
+            {
+                fseek(ps_ip_file, file_pos, SEEK_SET);
+                numbytes = u4_ip_buf_len;
+            }
+            else
+            {
+                WORD32 entries;
+                entries = fscanf(ps_piclen_file, "%d\n", &numbytes);
+                if(1 != entries)
+                    numbytes = u4_ip_buf_len;
+            }
+
+            u4_bytes_remaining = fread(pu1_bs_buf, sizeof(UWORD8), numbytes,
+                                       ps_ip_file);
+
+            if(0 == u4_bytes_remaining)
+            {
+                sprintf(ac_error_str, "\nUnable to read from input file");
+                codec_exit(ac_error_str);
+            }
+
+            s_video_decode_ip.e_cmd = IVD_CMD_VIDEO_DECODE;
+            s_video_decode_ip.u4_ts = u4_ip_frm_ts;
+            s_video_decode_ip.pv_stream_buffer = pu1_bs_buf;
+            s_video_decode_ip.u4_num_Bytes = u4_bytes_remaining;
+            s_video_decode_ip.u4_size = sizeof(ivd_video_decode_ip_t);
+            s_video_decode_op.u4_size = sizeof(ivd_video_decode_op_t);
+
+            /*****************************************************************************/
+            /*   API Call: Header Decode                                                  */
+            /*****************************************************************************/
+            ret = ivd_api_function((iv_obj_t *)codec_obj, (void *)&s_video_decode_ip,
+                                       (void *)&s_video_decode_op);
+
+            if(ret != IV_SUCCESS)
+            {
+                sprintf(ac_error_str, "\nError in header decode %x",
+                        s_video_decode_op.u4_error_code);
+                // codec_exit(ac_error_str);
+            }
+
+            u4_num_bytes_dec = s_video_decode_op.u4_num_bytes_consumed;
+#ifndef PROFILE_ENABLE
+            printf("%d\n", s_video_decode_op.u4_num_bytes_consumed);
+#endif
+            file_pos += u4_num_bytes_dec;
+            total_bytes_comsumed += u4_num_bytes_dec;
+        }while(ret != IV_SUCCESS);
+
+        /* copy pic_wd and pic_ht to initialize buffers */
+        s_app_ctx.u4_pic_wd = s_video_decode_op.u4_pic_wd;
+        s_app_ctx.u4_pic_ht = s_video_decode_op.u4_pic_ht;
+
+#if IOS_DISPLAY
+        s_app_ctx.i4_screen_wd = screen_wd;
+        s_app_ctx.i4_screen_ht = screen_ht;
+#endif
+
+        /* Create display thread and wait for the display buffers to be initialized */
+        if(1 == s_app_ctx.display)
+        {
+            if(0 == s_app_ctx.display_thread_created)
+            {
+                s_app_ctx.display_init_done = 0;
+                ithread_create(s_app_ctx.display_thread_handle, NULL,
+                               (void *)&display_thread, (void *)&s_app_ctx);
+                s_app_ctx.display_thread_created = 1;
+
+                while(1)
+                {
+                    if(s_app_ctx.display_init_done)
+                        break;
+
+                    ithread_msleep(1);
+                }
+            }
+
+            s_app_ctx.u4_strd = s_app_ctx.get_stride();
+        }
+    }
+
+    /*************************************************************************/
+    /* Get actual number of output buffers requried, which is dependent      */
+    /* on stream properties such as width, height and level etc              */
+    /* This is needed mainly for shared display mode                         */
+    /*************************************************************************/
+    //if(1 == s_app_ctx.share_disp_buf)
+    {
+        ivd_ctl_getbufinfo_ip_t s_ctl_ip;
+        ivd_ctl_getbufinfo_op_t s_ctl_op;
+        WORD32 outlen = 0;
+
+        s_ctl_ip.e_cmd = IVD_CMD_VIDEO_CTL;
+        s_ctl_ip.e_sub_cmd = IVD_CMD_CTL_GETBUFINFO;
+        s_ctl_ip.u4_size = sizeof(ivd_ctl_getbufinfo_ip_t);
+        s_ctl_op.u4_size = sizeof(ivd_ctl_getbufinfo_op_t);
+        ret = ivd_api_function((iv_obj_t *)codec_obj, (void *)&s_ctl_ip,
+                                   (void *)&s_ctl_op);
+        if(ret != IV_SUCCESS)
+        {
+            sprintf(ac_error_str, "Error in Get Buf Info %x", s_ctl_op.u4_error_code);
+            codec_exit(ac_error_str);
+        }
+
+#ifdef APP_EXTRA_BUFS
+        s_app_ctx.disp_delay = EXTRA_DISP_BUFFERS;
+        s_ctl_op.u4_num_disp_bufs += EXTRA_DISP_BUFFERS;
+#endif
+
+        /*****************************************************************************/
+        /*   API Call: Allocate display buffers for display buffer shared case       */
+        /*****************************************************************************/
+
+        for(i = 0; i < s_ctl_op.u4_num_disp_bufs; i++)
+        {
+
+            s_app_ctx.s_disp_buffers[i].u4_min_out_buf_size[0] =
+                            s_ctl_op.u4_min_out_buf_size[0];
+            s_app_ctx.s_disp_buffers[i].u4_min_out_buf_size[1] =
+                            s_ctl_op.u4_min_out_buf_size[1];
+            s_app_ctx.s_disp_buffers[i].u4_min_out_buf_size[2] =
+                            s_ctl_op.u4_min_out_buf_size[2];
+
+            outlen = s_ctl_op.u4_min_out_buf_size[0];
+            if(s_ctl_op.u4_min_num_out_bufs > 1)
+                outlen += s_ctl_op.u4_min_out_buf_size[1];
+
+            if(s_ctl_op.u4_min_num_out_bufs > 2)
+                outlen += s_ctl_op.u4_min_out_buf_size[2];
+
+            s_app_ctx.s_disp_buffers[i].pu1_bufs[0] = (UWORD8 *)malloc(outlen);
+
+            if(s_app_ctx.s_disp_buffers[i].pu1_bufs[0] == NULL)
+            {
+                sprintf(ac_error_str,
+                        "\nAllocation failure for output buffer of size %d",
+                        outlen);
+                codec_exit(ac_error_str);
+            }
+
+            if(s_ctl_op.u4_min_num_out_bufs > 1)
+                s_app_ctx.s_disp_buffers[i].pu1_bufs[1] =
+                                s_app_ctx.s_disp_buffers[i].pu1_bufs[0]
+                                                + (s_ctl_op.u4_min_out_buf_size[0]);
+
+            if(s_ctl_op.u4_min_num_out_bufs > 2)
+                s_app_ctx.s_disp_buffers[i].pu1_bufs[2] =
+                                s_app_ctx.s_disp_buffers[i].pu1_bufs[1]
+                                                + (s_ctl_op.u4_min_out_buf_size[1]);
+
+            s_app_ctx.s_disp_buffers[i].u4_num_bufs =
+                            s_ctl_op.u4_min_num_out_bufs;
+        }
+        s_app_ctx.num_disp_buf = s_ctl_op.u4_num_disp_bufs;
+
+        /*****************************************************************************/
+        /*   API Call: Send the allocated display buffers to codec                   */
+        /*****************************************************************************/
+        if(1 == s_app_ctx.share_disp_buf)
+        {
+            ivd_set_display_frame_ip_t s_set_display_frame_ip;
+            ivd_set_display_frame_op_t s_set_display_frame_op;
+
+            s_set_display_frame_ip.e_cmd = IVD_CMD_SET_DISPLAY_FRAME;
+            s_set_display_frame_ip.u4_size = sizeof(ivd_set_display_frame_ip_t);
+            s_set_display_frame_op.u4_size = sizeof(ivd_set_display_frame_op_t);
+
+            s_set_display_frame_ip.num_disp_bufs = s_app_ctx.num_disp_buf;
+
+            memcpy(&(s_set_display_frame_ip.s_disp_buffer),
+                   &(s_app_ctx.s_disp_buffers),
+                   s_ctl_op.u4_num_disp_bufs * sizeof(ivd_out_bufdesc_t));
+
+            ret = ivd_api_function((iv_obj_t *)codec_obj,
+                                       (void *)&s_set_display_frame_ip,
+                                       (void *)&s_set_display_frame_op);
+
+            if(IV_SUCCESS != ret)
+            {
+                sprintf(ac_error_str, "Error in Set display frame");
+                codec_exit(ac_error_str);
+            }
+        }
+
+
+    }
+
+    /*************************************************************************/
+    /* Get frame dimensions for display buffers such as x_offset,y_offset    */
+    /* etc. This information might be needed to set display buffer           */
+    /* offsets in case of shared display buffer mode                         */
+    /*************************************************************************/
+    {
+
+        impeg2d_ctl_get_frame_dimensions_ip_t s_ctl_get_frame_dimensions_ip;
+        impeg2d_ctl_get_frame_dimensions_op_t s_ctl_get_frame_dimensions_op;
+
+        s_ctl_get_frame_dimensions_ip.e_cmd = IVD_CMD_VIDEO_CTL;
+        s_ctl_get_frame_dimensions_ip.e_sub_cmd =
+                        (IVD_CONTROL_API_COMMAND_TYPE_T)IMPEG2D_CMD_CTL_GET_BUFFER_DIMENSIONS;
+        s_ctl_get_frame_dimensions_ip.u4_size =
+                        sizeof(impeg2d_ctl_get_frame_dimensions_ip_t);
+        s_ctl_get_frame_dimensions_op.u4_size =
+                        sizeof(impeg2d_ctl_get_frame_dimensions_op_t);
+
+        ret = ivd_api_function((iv_obj_t *)codec_obj, (void *)&s_ctl_get_frame_dimensions_ip,
+                                   (void *)&s_ctl_get_frame_dimensions_op);
+        if(IV_SUCCESS != ret)
+        {
+            sprintf(ac_error_str, "Error in Get buffer Dimensions");
+            codec_exit(ac_error_str);
+        }
+
+/*
+        printf("Frame offsets due to padding\n");
+        printf("s_ctl_get_frame_dimensions_op.x_offset[0] %d s_ctl_get_frame_dimensions_op.y_offset[0] %d\n",
+               s_ctl_get_frame_dimensions_op.u4_x_offset[0],
+               s_ctl_get_frame_dimensions_op.u4_y_offset[0]);
+*/
+    }
+    /*************************************************************************/
+    /* Get VUI parameters                                                    */
+    /*************************************************************************/
+#if 0
+    {
+
+        impeg2d_ctl_get_vui_params_ip_t s_ctl_get_vui_params_ip;
+        impeg2d_ctl_get_vui_params_op_t s_ctl_get_vui_params_op;
+
+        s_ctl_get_vui_params_ip.e_cmd = IVD_CMD_VIDEO_CTL;
+        s_ctl_get_vui_params_ip.e_sub_cmd =
+                        (IVD_CONTROL_API_COMMAND_TYPE_T)IMPEG2D_CMD_CTL_GET_VUI_PARAMS;
+        s_ctl_get_vui_params_ip.u4_size =
+                        sizeof(impeg2d_ctl_get_vui_params_ip_t);
+        s_ctl_get_vui_params_op.u4_size =
+                        sizeof(impeg2d_ctl_get_vui_params_op_t);
+
+        ret = ivd_api_function((iv_obj_t *)codec_obj, (void *)&s_ctl_get_vui_params_ip,
+                                   (void *)&s_ctl_get_vui_params_op);
+        if(IV_SUCCESS != ret)
+        {
+            sprintf(ac_error_str, "Error in Get VUI params");
+            //codec_exit(ac_error_str);
+        }
+
+    }
+#endif
+
+    /*************************************************************************/
+    /* Set the decoder in frame decode mode. It was set in header decode     */
+    /* mode earlier                                                          */
+    /*************************************************************************/
+    {
+
+        ivd_ctl_set_config_ip_t s_ctl_ip;
+        ivd_ctl_set_config_op_t s_ctl_op;
+
+        s_ctl_ip.u4_disp_wd = STRIDE;
+        if(1 == s_app_ctx.display)
+            s_ctl_ip.u4_disp_wd = s_app_ctx.get_stride();
+        s_ctl_ip.e_frm_skip_mode = IVD_SKIP_NONE;
+
+        s_ctl_ip.e_frm_out_mode = IVD_DISPLAY_FRAME_OUT;
+        s_ctl_ip.e_vid_dec_mode = IVD_DECODE_FRAME;
+        s_ctl_ip.e_cmd = IVD_CMD_VIDEO_CTL;
+        s_ctl_ip.e_sub_cmd = IVD_CMD_CTL_SETPARAMS;
+        s_ctl_ip.u4_size = sizeof(ivd_ctl_set_config_ip_t);
+
+        s_ctl_op.u4_size = sizeof(ivd_ctl_set_config_op_t);
+
+        ret = ivd_api_function((iv_obj_t *)codec_obj, (void *)&s_ctl_ip, (void *)&s_ctl_op);
+
+        if(IV_SUCCESS != ret)
+        {
+            sprintf(ac_error_str, "Error in Set Parameters");
+            //codec_exit(ac_error_str);
+        }
+
+    }
+    /*************************************************************************/
+    /* If required disable deblocking and sao at given level                 */
+    /*************************************************************************/
+    set_degrade(codec_obj, s_app_ctx.i4_degrade_type, s_app_ctx.i4_degrade_pics);
+#ifdef WINDOWS_TIMER
+    QueryPerformanceFrequency(&frequency);
+#endif
+#ifndef PRINT_PICSIZE
+    get_version(codec_obj);
+#endif
+    while(u4_op_frm_ts < (s_app_ctx.u4_max_frm_ts + s_app_ctx.disp_delay))
+    {
+
+#ifdef TEST_FLUSH
+        if(u4_ip_frm_ts == FLUSH_FRM_CNT)
+        {
+            ivd_ctl_flush_ip_t s_ctl_ip;
+            ivd_ctl_flush_op_t s_ctl_op;
+
+            s_ctl_ip.e_cmd = IVD_CMD_VIDEO_CTL;
+            s_ctl_ip.e_sub_cmd = IVD_CMD_CTL_FLUSH;
+            s_ctl_ip.u4_size = sizeof(ivd_ctl_flush_ip_t);
+            s_ctl_op.u4_size = sizeof(ivd_ctl_flush_op_t);
+            ret = ivd_api_function((iv_obj_t *)codec_obj, (void *)&s_ctl_ip,
+                                       (void *)&s_ctl_op);
+
+            if(ret != IV_SUCCESS)
+            {
+                printf("Error in Setting the decoder in flush mode\n");
+            }
+            file_pos = 0;
+
+            fseek(ps_ip_file, file_pos, SEEK_SET);
+
+        }
+#endif
+        if(u4_ip_frm_ts < s_app_ctx.num_disp_buf && (1 == s_app_ctx.share_disp_buf))
+        {
+            release_disp_frame(codec_obj, u4_ip_frm_ts);
+        }
+
+
+        /*************************************************************************/
+        /* set num of cores                                                      */
+        /*************************************************************************/
+#ifdef DYNAMIC_NUMCORES
+        {
+
+            impeg2d_ctl_set_num_cores_ip_t s_ctl_set_cores_ip;
+            impeg2d_ctl_set_num_cores_op_t s_ctl_set_cores_op;
+
+            s_ctl_set_cores_ip.e_cmd = IVD_CMD_VIDEO_CTL;
+            s_ctl_set_cores_ip.e_sub_cmd = IMPEG2D_CMD_CTL_SET_NUM_CORES;
+            s_ctl_set_cores_ip.u4_num_cores =  1 + 3 * (u4_ip_frm_ts % 2);
+            s_ctl_set_cores_ip.u4_size = sizeof(impeg2d_ctl_set_num_cores_ip_t);
+            s_ctl_set_cores_op.u4_size = sizeof(impeg2d_ctl_set_num_cores_op_t);
+
+            ret = ivd_api_function((iv_obj_t *)codec_obj, (void *)&s_ctl_set_cores_ip,
+                                       (void *)&s_ctl_set_cores_op);
+            if(ret != IV_SUCCESS)
+            {
+                sprintf(ac_error_str, "\nError in setting number of cores");
+                codec_exit(ac_error_str);
+            }
+
+        }
+#endif
+        /***********************************************************************/
+        /*   Seek the file to start of current frame, this is equavelent of    */
+        /*   having a parcer which tells the start of current frame            */
+        /***********************************************************************/
+        {
+            WORD32 numbytes;
+
+            if(0 == s_app_ctx.u4_piclen_flag)
+            {
+                fseek(ps_ip_file, file_pos, SEEK_SET);
+                numbytes = u4_ip_buf_len;
+            }
+            else
+            {
+                WORD32 entries;
+                entries = fscanf(ps_piclen_file, "%d\n", &numbytes);
+                if(1 != entries)
+                    numbytes = u4_ip_buf_len;
+            }
+
+            u4_bytes_remaining = fread(pu1_bs_buf, sizeof(UWORD8),
+                                       numbytes, ps_ip_file);
+
+            if(u4_bytes_remaining == 0)
+            {
+                if(1 == s_app_ctx.loopback)
+                {
+                    file_pos = 0;
+                    if(0 == s_app_ctx.u4_piclen_flag)
+                    {
+                        fseek(ps_ip_file, file_pos, SEEK_SET);
+                        numbytes = u4_ip_buf_len;
+                    }
+                    else
+                    {
+                        WORD32 entries;
+                        entries = fscanf(ps_piclen_file, "%d\n", &numbytes);
+                        if(1 != entries)
+                            numbytes = u4_ip_buf_len;
+                    }
+
+
+                    u4_bytes_remaining = fread(pu1_bs_buf, sizeof(UWORD8),
+                                               numbytes, ps_ip_file);
+                }
+                else
+                    break;
+            }
+        }
+
+        /*********************************************************************/
+        /* Following calls can be enabled at diffent times                   */
+        /*********************************************************************/
+#if ENABLE_DEGRADE
+        if(u4_op_frm_ts >= 10000)
+            disable_deblocking(codec_obj, 4);
+
+        if(u4_op_frm_ts == 30000)
+            enable_deblocking(codec_obj);
+
+        if(u4_op_frm_ts == 10000)
+            enable_skippb_frames(codec_obj);
+
+        if(u4_op_frm_ts == 60000)
+            disable_skippb_frames(codec_obj);
+
+        if(u4_op_frm_ts == 30000)
+            enable_skipb_frames(codec_obj);
+
+        if(u4_op_frm_ts == 60000)
+            disable_skipb_frames(codec_obj);
+#endif
+
+
+        {
+            ivd_video_decode_ip_t s_video_decode_ip;
+            ivd_video_decode_op_t s_video_decode_op;
+#ifdef PROFILE_ENABLE
+            UWORD32 s_elapsed_time;
+            TIMER s_start_timer;
+            TIMER s_end_timer;
+#endif
+
+
+            s_video_decode_ip.e_cmd = IVD_CMD_VIDEO_DECODE;
+            s_video_decode_ip.u4_ts = u4_ip_frm_ts;
+            s_video_decode_ip.pv_stream_buffer = pu1_bs_buf;
+            s_video_decode_ip.u4_num_Bytes = u4_bytes_remaining;
+            s_video_decode_ip.u4_size = sizeof(ivd_video_decode_ip_t);
+            s_video_decode_ip.s_out_buffer.u4_min_out_buf_size[0] =
+                            ps_out_buf->u4_min_out_buf_size[0];
+            s_video_decode_ip.s_out_buffer.u4_min_out_buf_size[1] =
+                            ps_out_buf->u4_min_out_buf_size[1];
+            s_video_decode_ip.s_out_buffer.u4_min_out_buf_size[2] =
+                            ps_out_buf->u4_min_out_buf_size[2];
+
+            s_video_decode_ip.s_out_buffer.pu1_bufs[0] =
+                            ps_out_buf->pu1_bufs[0];
+            s_video_decode_ip.s_out_buffer.pu1_bufs[1] =
+                            ps_out_buf->pu1_bufs[1];
+            s_video_decode_ip.s_out_buffer.pu1_bufs[2] =
+                            ps_out_buf->pu1_bufs[2];
+            s_video_decode_ip.s_out_buffer.u4_num_bufs =
+                            ps_out_buf->u4_num_bufs;
+            s_video_decode_op.u4_size = sizeof(ivd_video_decode_op_t);
+
+            /* Get display buffer pointers */
+            if(1 == s_app_ctx.display)
+            {
+                WORD32 wr_idx;
+
+                wr_idx = dispq_producer_dequeue(&s_app_ctx);
+
+                if(s_app_ctx.quit)
+                    break;
+
+                s_app_ctx.set_disp_buffers(s_app_ctx.pv_disp_ctx, wr_idx,
+                                           &s_video_decode_ip.s_out_buffer.pu1_bufs[0],
+                                           &s_video_decode_ip.s_out_buffer.pu1_bufs[1],
+                                           &s_video_decode_ip.s_out_buffer.pu1_bufs[2]);
+            }
+
+            /*****************************************************************************/
+            /*   API Call: Video Decode                                                  */
+            /*****************************************************************************/
+
+            GETTIME(&s_start_timer);
+
+            ret = ivd_api_function((iv_obj_t *)codec_obj, (void *)&s_video_decode_ip,
+                                       (void *)&s_video_decode_op);
+
+
+            GETTIME(&s_end_timer);
+            ELAPSEDTIME(s_start_timer, s_end_timer, s_elapsed_time, frequency);
+#ifdef PROFILE_ENABLE
+            {
+                UWORD32 peak_avg, id;
+                u4_tot_cycles += s_elapsed_time;
+                peak_window[peak_window_idx++] = s_elapsed_time;
+                if(peak_window_idx == PEAK_WINDOW_SIZE)
+                    peak_window_idx = 0;
+                peak_avg = 0;
+                for(id = 0; id < PEAK_WINDOW_SIZE; id++)
+                {
+                    peak_avg += peak_window[id];
+                }
+                peak_avg /= PEAK_WINDOW_SIZE;
+                if(peak_avg > peak_avg_max)
+                    peak_avg_max = peak_avg;
+                frm_cnt++;
+
+                printf("FrameNum: %4d TimeTaken(microsec): %6d AvgTime: %6d PeakAvgTimeMax: %6d Output: %2d NumBytes: %6d \n",
+                       frm_cnt, s_elapsed_time, u4_tot_cycles / frm_cnt, peak_avg_max, s_video_decode_op.u4_output_present, s_video_decode_op.u4_num_bytes_consumed);
+
+            }
+#ifdef INTEL_CE5300
+            time_consumed += s_elapsed_time;
+            bytes_consumed += s_video_decode_op.u4_num_bytes_consumed;
+            if(!(frm_cnt % (s_app_ctx.fps)))
+            {
+                time_consumed = time_consumed / s_app_ctx.fps;
+                printf("Average decode time(micro sec) for the last second = %6d\n", time_consumed);
+                printf("Average bitrate(kb) for the last second = %6d\n", (bytes_consumed * 8) / 1024);
+                time_consumed = 0;
+                bytes_consumed = 0;
+
+            }
+#endif
+#else
+            printf("%d\n", s_video_decode_op.u4_num_bytes_consumed);
+#endif
+
+            if(IV_SUCCESS != ret)
+            {
+                printf("Error in video Frame decode : ret %x Error %x\n", ret,
+                       s_video_decode_op.u4_error_code);
+                if ((s_video_decode_op.u4_error_code & 0xFF) == IVD_RES_CHANGED)
+                {
+                    ivd_ctl_reset_ip_t s_ctl_ip;
+                    ivd_ctl_reset_op_t s_ctl_op;
+
+                    flush_output(codec_obj, &s_app_ctx, ps_out_buf,
+                                 pu1_bs_buf, &u4_op_frm_ts,
+                                 ps_op_file, ps_op_chksum_file,
+                                 u4_ip_frm_ts, u4_bytes_remaining);
+
+                    s_ctl_ip.e_cmd = IVD_CMD_VIDEO_CTL;
+                    s_ctl_ip.e_sub_cmd = IVD_CMD_CTL_RESET;
+                    s_ctl_ip.u4_size = sizeof(ivd_ctl_reset_ip_t);
+                    s_ctl_op.u4_size = sizeof(ivd_ctl_reset_op_t);
+
+                    ret = ivd_api_function((iv_obj_t *)codec_obj, (void *)&s_ctl_ip,
+                                           (void *)&s_ctl_op);
+                    if(IV_SUCCESS != ret)
+                    {
+                        sprintf(ac_error_str, "Error in Reset");
+                        codec_exit(ac_error_str);
+                    }
+                    /*************************************************************************/
+                    /* set num of cores                                                      */
+                    /*************************************************************************/
+                    {
+
+                        impeg2d_ctl_set_num_cores_ip_t s_ctl_set_cores_ip;
+                        impeg2d_ctl_set_num_cores_op_t s_ctl_set_cores_op;
+
+                        s_ctl_set_cores_ip.e_cmd = IVD_CMD_VIDEO_CTL;
+                        s_ctl_set_cores_ip.e_sub_cmd = (IVD_CONTROL_API_COMMAND_TYPE_T)IMPEG2D_CMD_CTL_SET_NUM_CORES;
+                        s_ctl_set_cores_ip.u4_num_cores = s_app_ctx.u4_num_cores;
+                        s_ctl_set_cores_ip.u4_size = sizeof(impeg2d_ctl_set_num_cores_ip_t);
+                        s_ctl_set_cores_op.u4_size = sizeof(impeg2d_ctl_set_num_cores_op_t);
+
+                        ret = ivd_api_function((iv_obj_t *)codec_obj, (void *)&s_ctl_set_cores_ip,
+                                               (void *)&s_ctl_set_cores_op);
+                        if(ret != IV_SUCCESS)
+                        {
+                            sprintf(ac_error_str, "\nError in setting number of cores");
+                            codec_exit(ac_error_str);
+                        }
+
+                    }
+                    /*************************************************************************/
+                    /* set processsor                                                        */
+                    /*************************************************************************/
+
+                    {
+
+                        impeg2d_ctl_set_processor_ip_t s_ctl_set_num_processor_ip;
+                        impeg2d_ctl_set_processor_op_t s_ctl_set_num_processor_op;
+
+                        s_ctl_set_num_processor_ip.e_cmd = IVD_CMD_VIDEO_CTL;
+                        s_ctl_set_num_processor_ip.e_sub_cmd = (IVD_CONTROL_API_COMMAND_TYPE_T)IMPEG2D_CMD_CTL_SET_PROCESSOR;
+                        s_ctl_set_num_processor_ip.u4_arch = s_app_ctx.e_arch;
+                        s_ctl_set_num_processor_ip.u4_soc = s_app_ctx.e_soc;
+                        s_ctl_set_num_processor_ip.u4_size = sizeof(impeg2d_ctl_set_processor_ip_t);
+                        s_ctl_set_num_processor_op.u4_size = sizeof(impeg2d_ctl_set_processor_op_t);
+
+                        ret = ivd_api_function((iv_obj_t *)codec_obj, (void *)&s_ctl_set_num_processor_ip,
+                                               (void *)&s_ctl_set_num_processor_op);
+                        if(ret != IV_SUCCESS)
+                        {
+                            sprintf(ac_error_str, "\nError in setting Processor type");
+                            codec_exit(ac_error_str);
+                        }
+
+                    }
+
+                }
+                else if(IMPEG2D_UNSUPPORTED_DIMENSIONS
+                                == (IMPEG2D_ERROR_CODES_T)s_video_decode_op.u4_error_code)
+                {
+                    flush_output(codec_obj, &s_app_ctx, ps_out_buf,
+                                 pu1_bs_buf, &u4_op_frm_ts,
+                                 ps_op_file, ps_op_chksum_file,
+                                 u4_ip_frm_ts, u4_bytes_remaining);
+
+                    printf("Reinit codec with width %d and height %d\n",
+                           s_video_decode_op.u4_pic_wd,
+                           s_video_decode_op.u4_pic_ht);
+
+                    break;
+                }
+            }
+
+            if((1 == s_app_ctx.display) &&
+                            (1 == s_video_decode_op.u4_output_present))
+            {
+                dispq_producer_queue(&s_app_ctx);
+            }
+
+            if(IV_B_FRAME == s_video_decode_op.e_pic_type)
+                s_app_ctx.b_pic_present |= 1;
+
+            u4_num_bytes_dec = s_video_decode_op.u4_num_bytes_consumed;
+
+            file_pos += u4_num_bytes_dec;
+            total_bytes_comsumed += u4_num_bytes_dec;
+            u4_ip_frm_ts++;
+
+
+            if(1 == s_video_decode_op.u4_output_present)
+            {
+                width = s_video_decode_op.s_disp_frm_buf.u4_y_wd;
+                height = s_video_decode_op.s_disp_frm_buf.u4_y_ht;
+                dump_output(&s_app_ctx, &(s_video_decode_op.s_disp_frm_buf),
+                            s_video_decode_op.u4_disp_buf_id, ps_op_file,
+                            ps_op_chksum_file,
+                            u4_op_frm_ts, s_app_ctx.u4_file_save_flag,
+                            s_app_ctx.u4_chksum_save_flag);
+
+                u4_op_frm_ts++;
+            }
+            else
+            {
+                if((s_video_decode_op.u4_error_code >> IVD_FATALERROR) & 1)
+                {
+                    printf("Fatal error\n");
+                    break;
+                }
+            }
+
+        }
+    }
+
+    /***********************************************************************/
+    /*      To get the last decoded frames, call process with NULL input    */
+    /***********************************************************************/
+    flush_output(codec_obj, &s_app_ctx, ps_out_buf,
+                 pu1_bs_buf, &u4_op_frm_ts,
+                 ps_op_file, ps_op_chksum_file,
+                 u4_ip_frm_ts, u4_bytes_remaining);
+
+    /* set disp_end flag */
+    s_app_ctx.quit = 1;
+
+
+#ifdef PROFILE_ENABLE
+    printf("Summary\n");
+    printf("Input filename                  : %s\n", s_app_ctx.ac_ip_fname);
+    printf("Output Width                    : %-4d\n", width);
+    printf("Output Height                   : %-4d\n", height);
+
+    if(frm_cnt)
+    {
+        double avg = u4_tot_cycles / frm_cnt;
+        double bytes_avg = total_bytes_comsumed / frm_cnt;
+        double bitrate = (bytes_avg * 8 * s_app_ctx.fps) / 1000000;
+        printf("Bitrate @ %2d fps(mbps)          : %-6.2f\n", s_app_ctx.fps, bitrate);
+        printf("Average decode time(micro sec)  : %-6d\n", (WORD32)avg);
+        printf("Avg Peak decode time(%2d frames) : %-6d\n", PEAK_WINDOW_SIZE, (WORD32)peak_avg_max);
+        avg = (u4_tot_cycles + u4_tot_fmt_cycles) * 1.0 / frm_cnt;
+
+        if(0 == s_app_ctx.share_disp_buf)
+            printf("FPS achieved (with format conv) : %-3.2f\n", 1000000 / avg);
+        else
+            printf("FPS achieved                    : %-3.2f\n", 1000000 / avg);
+    }
+#endif
+    /***********************************************************************/
+    /*   Clear the decoder, close all the files, free all the memory       */
+    /***********************************************************************/
+    if(1 == s_app_ctx.display)
+    {
+        s_app_ctx.display_deinit_flag = 1;
+        /* wait for display to finish */
+        if(s_app_ctx.display_thread_created)
+        {
+            ithread_join(s_app_ctx.display_thread_handle, NULL);
+        }
+        free(s_app_ctx.display_thread_handle);
+    }
+
+    {
+        iv_retrieve_mem_rec_ip_t s_retrieve_dec_ip;
+        iv_retrieve_mem_rec_op_t s_retrieve_dec_op;
+        s_retrieve_dec_ip.pv_mem_rec_location = (iv_mem_rec_t *)pv_mem_rec_location;
+
+        s_retrieve_dec_ip.e_cmd = IV_CMD_RETRIEVE_MEMREC;
+        s_retrieve_dec_ip.u4_size = sizeof(iv_retrieve_mem_rec_ip_t);
+        s_retrieve_dec_op.u4_size = sizeof(iv_retrieve_mem_rec_op_t);
+
+        ret = ivd_api_function((iv_obj_t *)codec_obj, (void *)&s_retrieve_dec_ip,
+                                   (void *)&s_retrieve_dec_op);
+
+        if(IV_SUCCESS != ret)
+        {
+            sprintf(ac_error_str, "Error in Retrieve Memrec");
+            codec_exit(ac_error_str);
+        }
+
+        {
+            iv_mem_rec_t *ps_mem_rec;
+            UWORD16 u2_i;
+
+            u4_num_mem_recs = s_retrieve_dec_op.u4_num_mem_rec_filled;
+
+            ps_mem_rec = s_retrieve_dec_ip.pv_mem_rec_location;
+
+            for(u2_i = 0; u2_i < u4_num_mem_recs; u2_i++)
+            {
+                app_aligned_free(ps_mem_rec->pv_base);
+                ps_mem_rec++;
+            }
+            free(s_retrieve_dec_ip.pv_mem_rec_location);
+        }
+
+    }
+    /***********************************************************************/
+    /*              Close all the files and free all the memory            */
+    /***********************************************************************/
+    {
+        fclose(ps_ip_file);
+
+        if(1 == s_app_ctx.u4_file_save_flag)
+        {
+            fclose(ps_op_file);
+        }
+        if(1 == s_app_ctx.u4_chksum_save_flag)
+        {
+            fclose(ps_op_chksum_file);
+        }
+
+    }
+
+    if(0 == s_app_ctx.share_disp_buf)
+    {
+        free(ps_out_buf->pu1_bufs[0]);
+    }
+
+    for(i = 0; i < s_app_ctx.num_disp_buf; i++)
+    {
+        free(s_app_ctx.s_disp_buffers[i].pu1_bufs[0]);
+    }
+
+    free(ps_out_buf);
+    free(pu1_bs_buf);
+
+    return (0);
+}
commit	aed24eee7ddfc93f1436b0c1679431bd286879b4	[log] [tgz]
author	Venkatarama Avadhani <venkatarama.avadhani@ittiam.com>	Wed Mar 11 10:08:57 2015 +0530
committer	Harish Mahendrakar <harish.mahendrakar@ittiam.com>	Tue Apr 07 18:19:15 2015 +0530
tree	9399f32cdfa15ac9720ded9c8a8093876ba03376
parent	839aea316dc98d258d75f7e2878b21db032a82c1 [diff]