Resolved warnings and fixed alignment of few assemblies

Resolved warnings seen in x86 modules
Fixed alignment of few modules
Updated comments in few arm modules for consistency
Fixed warnings seen in clang build

Change-Id: I0623169b5e84a6a6f09c3d2212e754101272f5e9
diff --git a/common/arm/ih264_arm_memory_barrier.s b/common/arm/ih264_arm_memory_barrier.s
index 523218f..3816409 100644
--- a/common/arm/ih264_arm_memory_barrier.s
+++ b/common/arm/ih264_arm_memory_barrier.s
@@ -17,7 +17,7 @@
 @ *****************************************************************************
 @ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
 @*/
-@/**
+@**
 @*******************************************************************************
 @* @file
 @*  ih264_arm_memory_barrier.s
@@ -39,7 +39,6 @@
 .text
 .p2align 2
 
-
 @*****************************************************************************
 @*
 @* Function Name    : ih264_arm_dsb
diff --git a/common/arm/ih264_deblk_chroma_a9.s b/common/arm/ih264_deblk_chroma_a9.s
index 66102a7..8c9960a 100644
--- a/common/arm/ih264_deblk_chroma_a9.s
+++ b/common/arm/ih264_deblk_chroma_a9.s
@@ -54,7 +54,7 @@
 .text
 .p2align 2
 
-@/**
+@**
 @*******************************************************************************
 @*
 @* @brief
@@ -84,7 +84,7 @@
 @*  None
 @*
 @*******************************************************************************
-@*/
+@*
 
     .global ih264_deblk_chroma_horz_bs4_bp_a9
 
@@ -130,7 +130,7 @@
 
 
 
-@/**
+@**
 @*******************************************************************************
 @*
 @* @brief
@@ -160,7 +160,7 @@
 @*  None
 @*
 @*******************************************************************************
-@*/
+@*
 
     .global ih264_deblk_chroma_vert_bs4_bp_a9
 
@@ -224,7 +224,7 @@
 
 
 
-@/**
+@**
 @*******************************************************************************
 @*
 @* @brief
@@ -260,7 +260,7 @@
 @*  None
 @*
 @*******************************************************************************
-@*/
+@*
 
     .global ih264_deblk_chroma_horz_bslt4_bp_a9
 
@@ -326,7 +326,7 @@
 
 
 
-@/**
+@**
 @*******************************************************************************
 @*
 @* @brief
@@ -362,7 +362,7 @@
 @*  None
 @*
 @*******************************************************************************
-@*/
+@*
 
     .global ih264_deblk_chroma_vert_bslt4_bp_a9
 
@@ -465,7 +465,7 @@
 
 
 
-@/**
+@**
 @*******************************************************************************
 @*
 @* @brief
@@ -495,7 +495,7 @@
 @*  None
 @*
 @*******************************************************************************
-@*/
+@*
 
     .global ih264_deblk_chroma_vert_bs4_mbaff_bp_a9
 
@@ -543,7 +543,7 @@
 
 
 
-@/**
+@**
 @*******************************************************************************
 @*
 @* @brief
@@ -579,7 +579,7 @@
 @*  None
 @*
 @*******************************************************************************
-@*/
+@*
 
     .global ih264_deblk_chroma_vert_bslt4_mbaff_bp_a9
 
@@ -656,7 +656,7 @@
 
 
 
-@/**
+@**
 @*******************************************************************************
 @*
 @* @brief
@@ -692,7 +692,7 @@
 @*  None
 @*
 @*******************************************************************************
-@*/
+@*
 
     .global ih264_deblk_chroma_horz_bs4_a9
 
@@ -743,7 +743,7 @@
 
 
 
-@/**
+@**
 @*******************************************************************************
 @*
 @* @brief
@@ -779,7 +779,7 @@
 @*  None
 @*
 @*******************************************************************************
-@*/
+@*
 
     .global ih264_deblk_chroma_vert_bs4_a9
 
@@ -848,7 +848,7 @@
 
 
 
-@/**
+@**
 @*******************************************************************************
 @*
 @* @brief
@@ -893,7 +893,7 @@
 @*  None
 @*
 @*******************************************************************************
-@*/
+@*
 
     .global ih264_deblk_chroma_horz_bslt4_a9
 
@@ -968,7 +968,7 @@
 
 
 
-@/**
+@**
 @*******************************************************************************
 @*
 @* @brief
@@ -1013,7 +1013,7 @@
 @*  None
 @*
 @*******************************************************************************
-@*/
+@*
 
     .global ih264_deblk_chroma_vert_bslt4_a9
 
@@ -1119,7 +1119,7 @@
 
 
 
-@/**
+@**
 @*******************************************************************************
 @*
 @* @brief
@@ -1155,7 +1155,7 @@
 @*  None
 @*
 @*******************************************************************************
-@*/
+@*
 
     .global ih264_deblk_chroma_vert_bs4_mbaff_a9
 
@@ -1206,7 +1206,7 @@
 
 
 
-@/**
+@**
 @*******************************************************************************
 @*
 @* @brief
@@ -1251,7 +1251,7 @@
 @*  None
 @*
 @*******************************************************************************
-@*/
+@*
 
     .global ih264_deblk_chroma_vert_bslt4_mbaff_a9
 
diff --git a/common/arm/ih264_deblk_luma_a9.s b/common/arm/ih264_deblk_luma_a9.s
index 3e6a4d9..9217ed2 100644
--- a/common/arm/ih264_deblk_luma_a9.s
+++ b/common/arm/ih264_deblk_luma_a9.s
@@ -47,7 +47,7 @@
 .text
 .p2align 2
 
-@/**
+@**
 @*******************************************************************************
 @*
 @* @brief
@@ -83,7 +83,7 @@
 @*  None
 @*
 @*******************************************************************************
-@*/
+@*
 
     .global ih264_deblk_luma_horz_bslt4_a9
 
@@ -187,7 +187,7 @@
 
 
 
-@/**
+@**
 @*******************************************************************************
 @*
 @* @brief
@@ -217,7 +217,7 @@
 @*  None
 @*
 @*******************************************************************************
-@*/
+@*
 
     .global ih264_deblk_luma_horz_bs4_a9
 
@@ -353,7 +353,7 @@
 
 
 
-@/**
+@**
 @*******************************************************************************
 @*
 @* @brief
@@ -389,7 +389,7 @@
 @*  None
 @*
 @*******************************************************************************
-@*/
+@*
 
     .global ih264_deblk_luma_vert_bslt4_a9
 
@@ -574,7 +574,7 @@
 
 
 
-@/**
+@**
 @*******************************************************************************
 @*
 @* @brief
@@ -604,7 +604,7 @@
 @*  None
 @*
 @*******************************************************************************
-@*/
+@*
 
     .global ih264_deblk_luma_vert_bs4_a9
 
@@ -800,7 +800,7 @@
 
 
 
-@/**
+@**
 @*******************************************************************************
 @*
 @* @brief
@@ -830,7 +830,7 @@
 @*  None
 @*
 @*******************************************************************************
-@*/
+@*
 
     .global ih264_deblk_luma_vert_bs4_mbaff_a9
 
@@ -942,7 +942,7 @@
 
 
 
-@/**
+@**
 @*******************************************************************************
 @*
 @* @brief
@@ -978,7 +978,7 @@
 @*  None
 @*
 @*******************************************************************************
-@*/
+@*
 
     .global ih264_deblk_luma_vert_bslt4_mbaff_a9
 
diff --git a/common/arm/ih264_default_weighted_pred_a9q.s b/common/arm/ih264_default_weighted_pred_a9q.s
index 94cda46..a4688f2 100644
--- a/common/arm/ih264_default_weighted_pred_a9q.s
+++ b/common/arm/ih264_default_weighted_pred_a9q.s
@@ -17,14 +17,13 @@
 @ *****************************************************************************
 @ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
 @*/
-@/**
+@**
 @******************************************************************************
 @* @file
 @*  ih264_default_weighted_pred_a9q.s
 @*
 @* @brief
 @*  Contains function definitions for default weighted prediction.
-@* Functions are coded using NEON intrinsics and can be compiled using ARM RVCT
 @*
 @* @author
 @*  Kaushik Senthoor R
@@ -38,7 +37,7 @@
 @*  None
 @*
 @*******************************************************************************
-@*/
+@*
 @*******************************************************************************
 @* @function
 @*  ih264_default_weighted_pred_luma_a9q()
@@ -82,7 +81,7 @@
 @*  (ht,wd) can be (4,4), (4,8), (8,4), (8,8), (8,16), (16,8) or (16,16).
 @*
 @*******************************************************************************
-@*/
+@*
 @void ih264_default_weighted_pred_luma_a9q(UWORD8 *pu1_src1,
 @                                          UWORD8 *pu1_src2,
 @                                          UWORD8 *pu1_dst,
@@ -256,7 +255,7 @@
 @*  (ht,wd) can be (2,2), (2,4), (4,2), (4,4), (4,8), (8,4) or (8,8).
 @*
 @*******************************************************************************
-@*/
+@*
 @void ih264_default_weighted_pred_chroma_a9q(UWORD8 *pu1_src1,
 @                                            UWORD8 *pu1_src2,
 @                                            UWORD8 *pu1_dst,
diff --git a/common/arm/ih264_ihadamard_scaling_a9.s b/common/arm/ih264_ihadamard_scaling_a9.s
index 687099a..c7feddd 100644
--- a/common/arm/ih264_ihadamard_scaling_a9.s
+++ b/common/arm/ih264_ihadamard_scaling_a9.s
@@ -17,7 +17,7 @@
 @ *****************************************************************************
 @ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
 @*/
-@/**
+@**
 @ *******************************************************************************
 @ * @file
 @ *  ih264_ihadamard_scaling_a9.s
@@ -37,7 +37,7 @@
 @ *  None
 @ *
 @ *******************************************************************************
-@ */
+@ *
 @ * @brief This function performs a 4x4 inverse hadamard transform on the 4x4 DC coefficients
 @ * of a 16x16 intra prediction macroblock, and then performs scaling.
 @ * prediction buffer
@@ -69,10 +69,10 @@
 @ * @remarks none
 @ *
 @ *******************************************************************************
-@ */
+@ *
 @ *
 @ *******************************************************************************
-@ */
+@ *
 @ void ih264_ihadamard_scaling_4x4(WORD16* pi2_src,
 @       WORD16* pi2_out,
 @       const UWORD16 *pu2_iscal_mat,
@@ -161,7 +161,7 @@
 
 
 @ *******************************************************************************
-@ */
+@ *
 @ * @brief This function performs a 2x2 inverse hadamard transform for chroma block
 @ *
 @ * @par Description:
@@ -189,10 +189,10 @@
 @ * @remarks none
 @ *
 @ *******************************************************************************
-@ */
+@ *
 @ *
 @ *******************************************************************************
-@ */
+@ *
 @ void ih264_ihadamard_scaling_2x2_uv(WORD16* pi2_src,
 @                                  WORD16* pi2_out,
 @                                  const UWORD16 *pu2_iscal_mat,
diff --git a/common/arm/ih264_inter_pred_chroma_a9q.s b/common/arm/ih264_inter_pred_chroma_a9q.s
index afd2860..6681a7c 100644
--- a/common/arm/ih264_inter_pred_chroma_a9q.s
+++ b/common/arm/ih264_inter_pred_chroma_a9q.s
@@ -17,7 +17,7 @@
 @ *****************************************************************************
 @ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
 @*/
-@/**
+@**
 @******************************************************************************
 @* @file
 @*  ih264_inter_pred_chroma_a9q.s
@@ -36,16 +36,16 @@
 @*  None
 @*
 @*******************************************************************************
-@*/
+@*
 
-@/* All the functions here are replicated from ih264_inter_pred_filters.c
+@* All the functions here are replicated from ih264_inter_pred_filters.c
 @
 
-@/**
-@/**
-@/**
+@**
+@**
+@**
 @
-@/**
+@**
 @*******************************************************************************
 @*
 @* @brief
@@ -85,7 +85,7 @@
 @*  None
 @*
 @*******************************************************************************
-@*/
+@*
 
 @void ih264_inter_pred_chroma(UWORD8 *pu1_src,
 @                             UWORD8 *pu1_dst,
@@ -112,8 +112,6 @@
 
 ih264_inter_pred_chroma_a9q:
 
-
-
     stmfd         sp!, {r4-r12, r14}    @store register values to stack
     vstmdb        sp!, {d8-d15}         @push neon registers to stack
     ldr           r4, [sp, #104]
diff --git a/common/arm/ih264_inter_pred_filters_luma_horz_a9q.s b/common/arm/ih264_inter_pred_filters_luma_horz_a9q.s
index ea6bba0..62b4b94 100644
--- a/common/arm/ih264_inter_pred_filters_luma_horz_a9q.s
+++ b/common/arm/ih264_inter_pred_filters_luma_horz_a9q.s
@@ -17,7 +17,7 @@
 @ *****************************************************************************
 @ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
 @*/
-@/**
+@**
 @******************************************************************************
 @* @file
 @*  ih264_inter_pred_luma_horz_a9q.s
@@ -36,13 +36,13 @@
 @*  None
 @*
 @*******************************************************************************
-@*/
+@*
 
-@/* All the functions here are replicated from ih264_inter_pred_filters.c
+@* All the functions here are replicated from ih264_inter_pred_filters.c
 @
 
-@/**
-@/**
+@**
+@**
 @*******************************************************************************
 @*
 @* @brief
@@ -76,7 +76,7 @@
 @*  None
 @*
 @*******************************************************************************
-@*/
+@*
 
 @void ih264_inter_pred_luma_horz (
 @                            UWORD8 *pu1_src,
@@ -102,6 +102,9 @@
 
 ih264_inter_pred_luma_horz_a9q:
 
+
+
+
     stmfd         sp!, {r4-r12, r14}    @store register values to stack
     vstmdb        sp!, {d8-d15}         @push neon registers to stack
     ldr           r5, [sp, #104]        @Loads ht
@@ -116,7 +119,7 @@
     beq           loop_4
 
 loop_16:                                @when  wd=16
-    @// Processing row0 and row1
+    @ Processing row0 and row1
     vld1.8        {d2, d3, d4}, [r0], r2 @// Load row0                        ;for checking loop
     vext.8        d31, d2, d3, #5       @//extract a[5]                         (column1,row0)
     vld1.8        {d5, d6, d7}, [r0], r2 @// Load row1
@@ -173,7 +176,7 @@
     b             loop_16               @ loop if height == 8 or 16
 
 loop_8:
-@// Processing row0 and row1
+@ Processing row0 and row1
     vld1.8        {d5, d6}, [r0], r2    @// Load row1
     vext.8        d28, d5, d6, #5       @//extract a[5]                         (column1,row1)
     vld1.8        {d2, d3}, [r0], r2    @// Load row0
@@ -204,7 +207,7 @@
 
     beq           end_func              @ Branch if height==4
 
-    b             loop_8 @looping if height =8 or 16
+    b             loop_8                @looping if height =8 or 16
 
 loop_4:
     vld1.8        {d5, d6}, [r0], r2    @// Load row1
diff --git a/common/arm/ih264_inter_pred_filters_luma_vert_a9q.s b/common/arm/ih264_inter_pred_filters_luma_vert_a9q.s
index 5b29e02..65c40a6 100644
--- a/common/arm/ih264_inter_pred_filters_luma_vert_a9q.s
+++ b/common/arm/ih264_inter_pred_filters_luma_vert_a9q.s
@@ -17,7 +17,7 @@
 @ *****************************************************************************
 @ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
 @*/
-@/**
+@**
 @******************************************************************************
 @* @file
 @*  ih264_inter_pred_luma_vert_a9q.s
@@ -36,14 +36,14 @@
 @*  None
 @*
 @*******************************************************************************
-@*/
+@*
 
-@/* All the functions here are replicated from ih264_inter_pred_filters.c
+@* All the functions here are replicated from ih264_inter_pred_filters.c
 @
 
-@/**
-@/**
-@/**
+@**
+@**
+@**
 @ *******************************************************************************
 @ *
 @ * @brief
@@ -195,10 +195,10 @@
     subne         r0, r0, r2
     beq           end_func              @ Branch if height==4
 
-    b             loop_16 @ looping if height = 8 or 16
+    b             loop_16               @ looping if height = 8 or 16
 
 loop_8:
-@// Processing row0 and row1
+@ Processing row0 and row1
 
     vld1.u32      d0, [r0], r2          @ Vector load from src[0_0]
     vld1.u32      d1, [r0], r2          @ Vector load from src[1_0]
@@ -248,7 +248,7 @@
 
 
 loop_4:
-@// Processing row0 and row1
+@ Processing row0 and row1
 
     vld1.u32      d0[0], [r0], r2       @ Vector load from src[0_0]
     vld1.u32      d1[0], [r0], r2       @ Vector load from src[1_0]
diff --git a/common/arm/ih264_inter_pred_luma_bilinear_a9q.s b/common/arm/ih264_inter_pred_luma_bilinear_a9q.s
index 6a3c83d..8f049f8 100644
--- a/common/arm/ih264_inter_pred_luma_bilinear_a9q.s
+++ b/common/arm/ih264_inter_pred_luma_bilinear_a9q.s
@@ -17,7 +17,7 @@
 @ *****************************************************************************
 @ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
 @*/
-@/**
+@**
 @******************************************************************************
 @* @file
 @*  ih264_inter_pred_luma_bilinear_a9q.s
@@ -36,14 +36,14 @@
 @*  None
 @*
 @*******************************************************************************
-@*/
+@*
 
-@/* All the functions here are replicated from ih264_inter_pred_filters.c
+@* All the functions here are replicated from ih264_inter_pred_filters.c
 @
 
-@/**
-@/**
-@/**
+@**
+@**
+@**
 @ *******************************************************************************
 @ *  function:ih264_inter_pred_luma_bilinear
 @ *
@@ -89,7 +89,7 @@
 @*  None
 @*
 @*******************************************************************************
-@*/
+@*
 
 @void ih264_inter_pred_luma_bilinear(UWORD8 *pu1_src1,
 @                                   UWORD8 *pu1_src2,
@@ -192,7 +192,7 @@
     subs          r12, r6, #8
     vst1.8        {q15}, [r2], r5       @//Store dest row7
 
-    beq           end_func @ end function if ht=8
+    beq           end_func              @ end function if ht=8
 
     vld1.8        {q0}, [r0], r3        @// Load row8 ;src1
     vaddl.u8      q10, d0, d4
@@ -275,7 +275,7 @@
     vqrshrun.s16  d31, q13, #1
     subs          r12, r6, #4
     vst1.8        {d31}, [r2], r5       @//Store dest row3
-    beq           end_func @ end function if ht=4
+    beq           end_func              @ end function if ht=4
 
     vld1.8        {d12}, [r1], r4       @// Load row4 ;src2
     vld1.8        {d8}, [r0], r3        @// Load row4 ;src1
@@ -298,7 +298,7 @@
     vqrshrun.s16  d31, q11, #1
     subs          r12, r6, #8
     vst1.8        {d31}, [r2], r5       @//Store dest row7
-    beq           end_func @ end function if ht=8
+    beq           end_func              @ end function if ht=8
 
     vld1.8        {d0}, [r0], r3        @// Load row8 ;src1
     vld1.8        {d4}, [r1], r4        @// Load row8  ;src2
@@ -367,7 +367,7 @@
     vqrshrun.s16  d31, q13, #1
     subs          r12, r6, #4
     vst1.32       d31[0], [r2], r5      @//Store dest row3
-    beq           end_func @ end function if ht=4
+    beq           end_func              @ end function if ht=4
 
     vld1.32       d12[0], [r1], r4      @// Load row4 ;src2
     vld1.32       d8[0], [r0], r3       @// Load row4 ;src1
diff --git a/common/arm/ih264_inter_pred_luma_copy_a9q.s b/common/arm/ih264_inter_pred_luma_copy_a9q.s
index 8ba2fbf..c0b0568 100644
--- a/common/arm/ih264_inter_pred_luma_copy_a9q.s
+++ b/common/arm/ih264_inter_pred_luma_copy_a9q.s
@@ -17,8 +17,8 @@
 @ *****************************************************************************
 @ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
 @*/
-@/**
-@/**
+@**
+@**
 @*******************************************************************************
 @*
 @* @brief
@@ -53,7 +53,7 @@
 @*  None
 @*
 @*******************************************************************************
-@*/
+@*
 @void ih264_inter_pred_luma_copy (
 @                            UWORD8 *pu1_src,
 @                            UWORD8 *pu1_dst,
@@ -182,7 +182,7 @@
     ldmfd         sp!, {r4-r12, r15}    @Reload the registers from SP
 
 
-@ /*
+@ *
 @ ********************************************************************************
 @ *
 @ * @brief This function copies a 4x4 block to destination
@@ -208,7 +208,7 @@
 @ * Currently wd and height is not used, ie a 4x4 block is always copied
 @ *
 @ *******************************************************************************
-@ */
+@ *
 @ void ih264_interleave_copy(WORD16 *pi2_src,
 @                            UWORD8 *pu1_out,
 @                            WORD32 pred_strd,
diff --git a/common/arm/ih264_inter_pred_luma_horz_hpel_vert_hpel_a9q.s b/common/arm/ih264_inter_pred_luma_horz_hpel_vert_hpel_a9q.s
index 43321a8..54183f0 100644
--- a/common/arm/ih264_inter_pred_luma_horz_hpel_vert_hpel_a9q.s
+++ b/common/arm/ih264_inter_pred_luma_horz_hpel_vert_hpel_a9q.s
@@ -17,7 +17,7 @@
 @ *****************************************************************************
 @ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
 @*/
-@/**
+@**
 @******************************************************************************
 @* @file
 @*  ih264_inter_pred_luma_horz_hpel_vert_hpel_a9q.s
@@ -36,14 +36,14 @@
 @*  None
 @*
 @*******************************************************************************
-@*/
+@*
 
-@/* All the functions here are replicated from ih264_inter_pred_filters.c
+@* All the functions here are replicated from ih264_inter_pred_filters.c
 @
 
-@/**
-@/**
-@/**
+@**
+@**
+@**
 @*******************************************************************************
 @*
 @* @brief
@@ -88,7 +88,7 @@
 @*  None
 @*
 @*******************************************************************************
-@*/;
+@*;
 
 @void ih264_inter_pred_luma_horz_hpel_vert_hpel(UWORD8 *pu1_src,
 @                                UWORD8 *pu1_dst,
diff --git a/common/arm/ih264_inter_pred_luma_horz_hpel_vert_qpel_a9q.s b/common/arm/ih264_inter_pred_luma_horz_hpel_vert_qpel_a9q.s
index 65a6de7..c8edf38 100644
--- a/common/arm/ih264_inter_pred_luma_horz_hpel_vert_qpel_a9q.s
+++ b/common/arm/ih264_inter_pred_luma_horz_hpel_vert_qpel_a9q.s
@@ -17,7 +17,7 @@
 @ *****************************************************************************
 @ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
 @*/
-@/**
+@**
 @******************************************************************************
 @* @file
 @*  ih264_inter_pred_luma_horz_hpel_vert_qpel_a9q.s
@@ -36,14 +36,14 @@
 @*  None
 @*
 @*******************************************************************************
-@*/
+@*
 
-@/* All the functions here are replicated from ih264_inter_pred_filters.c
+@* All the functions here are replicated from ih264_inter_pred_filters.c
 @
 
-@/**
-@/**
-@/**
+@**
+@**
+@**
 @*******************************************************************************
 @*
 @* @brief
@@ -91,7 +91,7 @@
 @*  None
 @*
 @*******************************************************************************
-@*/;
+@*;
 
 @void ih264_inter_pred_luma_horz_hpel_vert_qpel(UWORD8 *pu1_src,
 @                                UWORD8 *pu1_dst,
@@ -835,7 +835,7 @@
     vmov          q7, q14
     vst1.32       d30, [r1], r3         @ store row 3
 
-    bgt           loop_8 @if height =8 or 16  loop
+    bgt           loop_8                @if height =8 or 16  loop
     b             end_func
 
 loop_4_start:
diff --git a/common/arm/ih264_inter_pred_luma_horz_qpel_a9q.s b/common/arm/ih264_inter_pred_luma_horz_qpel_a9q.s
index c39ae01..ab1d1d1 100644
--- a/common/arm/ih264_inter_pred_luma_horz_qpel_a9q.s
+++ b/common/arm/ih264_inter_pred_luma_horz_qpel_a9q.s
@@ -17,7 +17,7 @@
 @ *****************************************************************************
 @ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
 @*/
-@/**
+@**
 @******************************************************************************
 @* @file
 @*  ih264_inter_pred_luma_horz_qpel_a9q.s
@@ -30,19 +30,19 @@
 @*
 @* @par List of Functions:
 @*
-@*  - ih264_inter_pred_luma_horz_qpe_a9ql()
+@*  - ih264_inter_pred_luma_horz_qpel_a9q()
 @*
 @* @remarks
 @*  None
 @*
 @*******************************************************************************
-@*/
+@*
 
-@/* All the functions here are replicated from ih264_inter_pred_filters.c
+@* All the functions here are replicated from ih264_inter_pred_filters.c
 @
 
-@/**
-@/**
+@**
+@**
 @*******************************************************************************
 @*
 @* @brief
@@ -79,7 +79,7 @@
 @*  None
 @*
 @*******************************************************************************
-@*/
+@*
 
 @void ih264_inter_pred_luma_horz (
 @                            UWORD8 *pu1_src,
@@ -126,7 +126,7 @@
     beq           loop_4
 
 loop_16:                                @when  wd=16
-    @// Processing row0 and row1
+    @ Processing row0 and row1
     vld1.8        {d2, d3, d4}, [r0], r2 @// Load row0
     vext.8        d31, d2, d3, #5       @//extract a[5]                         (column1,row0)
     vld1.8        {d5, d6, d7}, [r0], r2 @// Load row1
@@ -187,7 +187,7 @@
     b             loop_16
 
 loop_8:
-@// Processing row0 and row1
+@ Processing row0 and row1
 
     vld1.8        {d5, d6}, [r0], r2    @// Load row1
     vext.8        d28, d5, d6, #5       @//extract a[5]                         (column1,row1)
@@ -221,7 +221,7 @@
     subs          r5, r5, #2            @ 2 rows done, decrement by 2
 
     beq           end_func              @ Branch if height==4
-    b             loop_8 @looping if height == 8 or 16
+    b             loop_8                @looping if height == 8 or 16
 
 loop_4:
     vld1.8        {d5, d6}, [r0], r2    @// Load row1
diff --git a/common/arm/ih264_inter_pred_luma_horz_qpel_vert_hpel_a9q.s b/common/arm/ih264_inter_pred_luma_horz_qpel_vert_hpel_a9q.s
index 565cc80..3c63ca3 100644
--- a/common/arm/ih264_inter_pred_luma_horz_qpel_vert_hpel_a9q.s
+++ b/common/arm/ih264_inter_pred_luma_horz_qpel_vert_hpel_a9q.s
@@ -17,7 +17,7 @@
 @ *****************************************************************************
 @ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
 @*/
-@/**
+@**
 @******************************************************************************
 @* @file
 @*  ih264_inter_pred_luma_horz_qpel_vert_hpel_a9q.s
@@ -36,14 +36,14 @@
 @*  None
 @*
 @*******************************************************************************
-@*/
+@*
 
-@/* All the functions here are replicated from ih264_inter_pred_filters.c
+@* All the functions here are replicated from ih264_inter_pred_filters.c
 @
 
-@/**
-@/**
-@/**
+@**
+@**
+@**
 @*******************************************************************************
 @*
 @* @brief
@@ -91,7 +91,7 @@
 @*  None
 @*
 @*******************************************************************************
-@*/;
+@*;
 
 @void ih264_inter_pred_luma_horz_qpel_vert_hpel(UWORD8 *pu1_src,
 @                                UWORD8 *pu1_dst,
diff --git a/common/arm/ih264_inter_pred_luma_horz_qpel_vert_qpel_a9q.s b/common/arm/ih264_inter_pred_luma_horz_qpel_vert_qpel_a9q.s
index 3c8b60a..cfe03a0 100644
--- a/common/arm/ih264_inter_pred_luma_horz_qpel_vert_qpel_a9q.s
+++ b/common/arm/ih264_inter_pred_luma_horz_qpel_vert_qpel_a9q.s
@@ -17,7 +17,7 @@
 @ *****************************************************************************
 @ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
 @*/
-@/**
+@**
 @******************************************************************************
 @* @file
 @*  ih264_inter_pred_luma_horz_qpel_vert_qpel_a9q.s
@@ -36,14 +36,11 @@
 @*  None
 @*
 @*******************************************************************************
-@*/
+@*
 
-@/* All the functions here are replicated from ih264_inter_pred_filters.c
+@* All the functions here are replicated from ih264_inter_pred_filters.c
 @
 
-@/**
-@/**
-@/**
 @*******************************************************************************
 @*
 @* @brief
@@ -90,7 +87,7 @@
 @*  None
 @*
 @*******************************************************************************
-@*/;
+@*;
 
 @void ih264_inter_pred_luma_horz_qpel_vert_qpel(UWORD8 *pu1_src,
 @                                UWORD8 *pu1_dst,
diff --git a/common/arm/ih264_inter_pred_luma_vert_qpel_a9q.s b/common/arm/ih264_inter_pred_luma_vert_qpel_a9q.s
index d45055e..e2c68ef 100644
--- a/common/arm/ih264_inter_pred_luma_vert_qpel_a9q.s
+++ b/common/arm/ih264_inter_pred_luma_vert_qpel_a9q.s
@@ -17,7 +17,7 @@
 @ *****************************************************************************
 @ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
 @*/
-@/**
+@**
 @******************************************************************************
 @* @file
 @*  ih264_inter_pred_luma_vert_qpel_a9q.s
@@ -36,13 +36,11 @@
 @*  None
 @*
 @*******************************************************************************
-@*/
+@*
 
-@/* All the functions here are replicated from ih264_inter_pred_filters.c
+@* All the functions here are replicated from ih264_inter_pred_filters.c
 @
 
-@/**
-@/**
 @*******************************************************************************
 @*
 @* @brief
@@ -79,7 +77,7 @@
 @*  None
 @*
 @*******************************************************************************
-@*/
+@*
 
 @void ih264_inter_pred_luma_vert (
 @                            UWORD8 *pu1_src,
@@ -211,12 +209,12 @@
     subne         r0, r0, r2
     beq           end_func              @ Branch if height==4
 
-    b             loop_16 @ looping if height = 8 or 16
+    b             loop_16               @ looping if height = 8 or 16
 
 
 loop_8:
 
-    @// Processing row0 and row1
+    @ Processing row0 and row1
     vld1.u32      d0, [r0], r2          @ Vector load from src[0_0]
     vld1.u32      d1, [r0], r2          @ Vector load from src[1_0]
     vld1.u32      d2, [r0], r2          @ Vector load from src[2_0]
@@ -270,7 +268,7 @@
     b             loop_8                @looping if height == 8 or 16
 
 loop_4:
-@// Processing row0 and row1
+@ Processing row0 and row1
 
     vld1.u32      d0[0], [r0], r2       @ Vector load from src[0_0]
     vld1.u32      d1[0], [r0], r2       @ Vector load from src[1_0]
diff --git a/common/arm/ih264_intra_pred_chroma_a9q.s b/common/arm/ih264_intra_pred_chroma_a9q.s
index d03fc55..ccd5c0d 100644
--- a/common/arm/ih264_intra_pred_chroma_a9q.s
+++ b/common/arm/ih264_intra_pred_chroma_a9q.s
@@ -17,7 +17,7 @@
 @ *****************************************************************************
 @ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
 @*/
-@/**
+@**
 @******************************************************************************
 @* @file
 @*  ih264_intra_pred_chroma_a9q.s
@@ -39,15 +39,11 @@
 @*  None
 @*
 @*******************************************************************************
-@*/
+@*
 
-@/* All the functions here are replicated from ih264_chroma_intra_pred_filters.c
+@* All the functions here are replicated from ih264_chroma_intra_pred_filters.c
 @
 
-@/**
-@/**
-@/**
-@
 .text
 .p2align 2
 
@@ -60,7 +56,7 @@
 
 scratch_intrapred_chroma_plane_addr1:
     .long ih264_gai1_intrapred_chroma_plane_coeffs2 - scrlblc2 - 8
-@/**
+@**
 @*******************************************************************************
 @*
 @*ih264_intra_pred_chroma_8x8_mode_dc
@@ -91,7 +87,7 @@
 @* @remarks
 @*  None
 @*
-@*******************************************************************************/
+@*******************************************************************************
 @void ih264_intra_pred_chroma_8x8_mode_dc(UWORD8 *pu1_src,
 @                                        UWORD8 *pu1_dst,
 @                                        WORD32 src_strd,
@@ -105,8 +101,6 @@
 @   r3 =>  dst_strd
 @   r4 =>  ui_neighboravailability
 
-
-
     .global ih264_intra_pred_chroma_8x8_mode_dc_a9q
 
 ih264_intra_pred_chroma_8x8_mode_dc_a9q:
@@ -191,10 +185,10 @@
 
 
 
-@/******************************************************************************
+@******************************************************************************
 
 
-@/**
+@**
 @*******************************************************************************
 @*
 @*ih264_intra_pred_chroma_8x8_mode_horz
@@ -226,7 +220,7 @@
 @*  None
 @*
 @*******************************************************************************
-@*/
+@*
 @void ih264_intra_pred_chroma_8x8_mode_horz(UWORD8 *pu1_src,
 @                                         UWORD8 *pu1_dst,
 @                                         WORD32 src_strd,
@@ -270,7 +264,7 @@
 
 
 
-@/**
+@**
 @*******************************************************************************
 @*
 @*ih264_intra_pred_chroma_8x8_mode_vert
@@ -339,10 +333,10 @@
 
 
 
-@/******************************************************************************
+@******************************************************************************
 
 
-@/**
+@**
 @*******************************************************************************
 @*
 @*ih264_intra_pred_chroma_8x8_mode_plane
@@ -373,7 +367,7 @@
 @* @remarks
 @*  None
 @*
-@*******************************************************************************/
+@*******************************************************************************
 @void ih264_intra_pred_chroma_8x8_mode_plane(UWORD8 *pu1_src,
 @                                        UWORD8 *pu1_dst,
 @                                        WORD32 src_strd,
@@ -393,7 +387,6 @@
     stmfd         sp!, {r4-r10, r12, lr}
     vpush         {d8-d15}
 
-
     vld1.32       d0, [r0]
     add           r10, r0, #10
     vld1.32       d1, [r10]
@@ -542,7 +535,6 @@
 
 end_func_plane:
 
-
     vpop          {d8-d15}
     ldmfd         sp!, {r4-r10, r12, pc}
 
diff --git a/common/arm/ih264_intra_pred_luma_16x16_a9q.s b/common/arm/ih264_intra_pred_luma_16x16_a9q.s
index e38e203..0dd82f3 100644
--- a/common/arm/ih264_intra_pred_luma_16x16_a9q.s
+++ b/common/arm/ih264_intra_pred_luma_16x16_a9q.s
@@ -17,7 +17,7 @@
 @ *****************************************************************************
 @ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
 @*/
-@/**
+@**
 @******************************************************************************
 @* @file
 @*  ih264_intra_pred_luma_16x16_a9q.s
@@ -39,14 +39,14 @@
 @*  None
 @*
 @*******************************************************************************
-@*/
+@*
 
-@/* All the functions here are replicated from ih264_intra_pred_filters.c
+@* All the functions here are replicated from ih264_intra_pred_filters.c
 @
 
-@/**
-@/**
-@/**
+@**
+@**
+@**
 @
 
 .text
@@ -57,10 +57,10 @@
 .hidden ih264_gai1_intrapred_luma_plane_coeffs
 scratch_intrapred_addr1:
     .long ih264_gai1_intrapred_luma_plane_coeffs - scrlbl1 - 8
-@/**
+@**
 @*******************************************************************************
 @*
-@*ih264_intra_pred_luma_16x16_mode_vert_a9q
+@*ih264_intra_pred_luma_16x16_mode_vert
 @*
 @* @brief
 @*   Perform Intra prediction for  luma_16x16 mode:vertical
@@ -135,13 +135,13 @@
 
 
 
-@/******************************************************************************
+@******************************************************************************
 
 
-@/**
+@**
 @*******************************************************************************
 @*
-@*ih264_intra_pred_luma_16x16_mode_horz_a9q
+@*ih264_intra_pred_luma_16x16_mode_horz
 @*
 @* @brief
 @*  Perform Intra prediction for  luma_16x16 mode:horizontal
@@ -170,7 +170,7 @@
 @*  None
 @*
 @*******************************************************************************
-@*/
+@*
 @void ih264_intra_pred_luma_16x16_mode_horz(UWORD8 *pu1_src,
 @                                         UWORD8 *pu1_dst,
 @                                         WORD32 src_strd,
@@ -213,13 +213,13 @@
 
 
 
-@/******************************************************************************
+@******************************************************************************
 
 
-@/**
+@**
 @*******************************************************************************
 @*
-@*ih264_intra_pred_luma_16x16_mode_dc_a9q
+@*ih264_intra_pred_luma_16x16_mode_dc
 @*
 @* @brief
 @*  Perform Intra prediction for  luma_16x16 mode:DC
@@ -247,7 +247,7 @@
 @* @remarks
 @*  None
 @*
-@*******************************************************************************/
+@*******************************************************************************
 @void ih264_intra_pred_luma_16x16_mode_dc(UWORD8 *pu1_src,
 @                                       UWORD8 *pu1_dst,
 @                                       WORD32 src_strd,
@@ -300,7 +300,7 @@
     vdup.u8       q0, d0[0]
     b             str_pred
 
-left_available: @ONLY LEFT AVAILABLE
+left_available:                         @ONLY LEFT AVAILABLE
     vld1.u8       {q0}, [r0]
     vpaddl.u8     q0, q0
     vadd.u16      d0, d0, d1
@@ -337,13 +337,13 @@
 
 
 
-@/******************************************************************************
+@******************************************************************************
 
 
-@/**
+@**
 @*******************************************************************************
 @*
-@*ih264_intra_pred_luma_16x16_mode_plane_a9q
+@*ih264_intra_pred_luma_16x16_mode_plane
 @*
 @* @brief
 @*  Perform Intra prediction for  luma_16x16 mode:PLANE
@@ -371,7 +371,7 @@
 @* @remarks
 @*  None
 @*
-@*******************************************************************************/
+@*******************************************************************************
 @void ih264_intra_pred_luma_16x16_mode_plane(UWORD8 *pu1_src,
 @                                        UWORD8 *pu1_dst,
 @                                        WORD32 src_strd,
diff --git a/common/arm/ih264_intra_pred_luma_4x4_a9q.s b/common/arm/ih264_intra_pred_luma_4x4_a9q.s
index cb386ea..5cc7e23 100644
--- a/common/arm/ih264_intra_pred_luma_4x4_a9q.s
+++ b/common/arm/ih264_intra_pred_luma_4x4_a9q.s
@@ -17,7 +17,7 @@
 @ *****************************************************************************
 @ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
 @*/
-@/**
+@**
 @******************************************************************************
 @* @file
 @*  ih264_intra_pred_luma_4x4_a9q.s
@@ -44,21 +44,16 @@
 @*  None
 @*
 @*******************************************************************************
-@*/
+@*
 
-@/* All the functions here are replicated from ih264_intra_pred_filters.c
-@
-
-@/**
-@/**
-@/**
+@* All the functions here are replicated from ih264_intra_pred_filters.c
 @
 
 .text
 .p2align 2
 
 
-@/**
+@**
 @*******************************************************************************
 @*
 @*ih264_intra_pred_luma_4x4_mode_vert
@@ -128,10 +123,10 @@
 
 
 
-@/******************************************************************************
+@******************************************************************************
 
 
-@/**
+@**
 @*******************************************************************************
 @*
 @*ih264_intra_pred_luma_4x4_mode_horz
@@ -163,7 +158,7 @@
 @*  None
 @*
 @*******************************************************************************
-@*/
+@*
 @void ih264_intra_pred_luma_4x4_mode_horz(UWORD8 *pu1_src,
 @                                         UWORD8 *pu1_dst,
 @                                         WORD32 src_strd,
@@ -210,10 +205,10 @@
 
 
 
-@/******************************************************************************
+@******************************************************************************
 
 
-@/**
+@**
 @*******************************************************************************
 @*
 @*ih264_intra_pred_luma_4x4_mode_dc
@@ -244,7 +239,7 @@
 @* @remarks
 @*  None
 @*
-@*******************************************************************************/
+@*******************************************************************************
 @void ih264_intra_pred_luma_4x4_mode_dc(UWORD8 *pu1_src,
 @                                       UWORD8 *pu1_dst,
 @                                       WORD32 src_strd,
@@ -352,7 +347,7 @@
 
 
 
-@/**
+@**
 @*******************************************************************************
 @*
 @*ih264_intra_pred_luma_4x4_mode_diag_dl
@@ -383,7 +378,7 @@
 @* @remarks
 @*  None
 @*
-@*******************************************************************************/
+@*******************************************************************************
 @void ih264_intra_pred_luma_4x4_mode_diag_dl(UWORD8 *pu1_src,
 @                                            UWORD8 *pu1_dst,
 @                                            WORD32 src_strd,
@@ -434,7 +429,7 @@
 
 
 
-@/**
+@**
 @*******************************************************************************
 @*
 @*ih264_intra_pred_luma_4x4_mode_diag_dr
@@ -465,7 +460,7 @@
 @* @remarks
 @*  None
 @*
-@*******************************************************************************/
+@*******************************************************************************
 @void ih264_intra_pred_luma_4x4_mode_diag_dr(UWORD8 *pu1_src,
 @                                            UWORD8 *pu1_dst,
 @                                            WORD32 src_strd,
@@ -514,7 +509,7 @@
 
 
 
-@/**
+@**
 @*******************************************************************************
 @*
 @*ih264_intra_pred_luma_4x4_mode_vert_r
@@ -545,7 +540,7 @@
 @* @remarks
 @*  None
 @*
-@*******************************************************************************/
+@*******************************************************************************
 @void ih264_intra_pred_luma_4x4_mode_vert_r(UWORD8 *pu1_src,
 @                                            UWORD8 *pu1_dst,
 @                                            WORD32 src_strd,
@@ -596,7 +591,7 @@
 
 
 
-@/**
+@**
 @*******************************************************************************
 @*
 @*ih264_intra_pred_luma_4x4_mode_horz_d
@@ -627,7 +622,7 @@
 @* @remarks
 @*  None
 @*
-@*******************************************************************************/
+@*******************************************************************************
 @void ih264_intra_pred_luma_4x4_mode_horz_d(UWORD8 *pu1_src,
 @                                            UWORD8 *pu1_dst,
 @                                            WORD32 src_strd,
@@ -659,7 +654,7 @@
     vqrshrun.s16  d5, q12, #2
     sub           r5, r3, #2
     vmov.8        d6, d5
-    vtrn.8        d4, d5 @
+    vtrn.8        d4, d5                @
     vst1.u16      {d5[1]}, [r1]!
     vst1.16       {d6[2]}, [r1], r5
     vst1.u16      {d4[1]}, [r1]!
@@ -678,7 +673,7 @@
 
 
 
-@/**
+@**
 @*******************************************************************************
 @*
 @*ih264_intra_pred_luma_4x4_mode_vert_l
@@ -709,7 +704,7 @@
 @* @remarks
 @*  None
 @*
-@*******************************************************************************/
+@*******************************************************************************
 @void ih264_intra_pred_luma_4x4_mode_vert_l(UWORD8 *pu1_src,
 @                                            UWORD8 *pu1_dst,
 @                                            WORD32 src_strd,
@@ -759,7 +754,7 @@
 
 
 
-@/**
+@**
 @*******************************************************************************
 @*
 @*ih264_intra_pred_luma_4x4_mode_horz_u
@@ -790,7 +785,7 @@
 @* @remarks
 @*  None
 @*
-@*******************************************************************************/
+@*******************************************************************************
 @void ih264_intra_pred_luma_4x4_mode_horz_u(UWORD8 *pu1_src,
 @                                           UWORD8 *pu1_dst,
 @                                           WORD32 src_strd,
@@ -825,9 +820,9 @@
     vext.8        d6, d5, d4, #1
     vst1.8        {d4[2]}, [r1]!
     vst1.8        {d6[0]}, [r1]!
-    vtrn.8        d6, d5 @
+    vtrn.8        d6, d5                @
     sub           r5, r3, #2
-    vtrn.8        d4, d6 @
+    vtrn.8        d4, d6                @
     vdup.8        d7, r9
     vst1.16       {d6[0]}, [r1], r5
     vst1.16       {d6[0]}, [r1]!
diff --git a/common/arm/ih264_intra_pred_luma_8x8_a9q.s b/common/arm/ih264_intra_pred_luma_8x8_a9q.s
index 6da1c95..352d29d 100644
--- a/common/arm/ih264_intra_pred_luma_8x8_a9q.s
+++ b/common/arm/ih264_intra_pred_luma_8x8_a9q.s
@@ -17,7 +17,7 @@
 @ *****************************************************************************
 @ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
 @*/
-@/**
+@**
 @******************************************************************************
 @* @file
 @*  ih264_intra_pred_luma_8x8_a9q.s
@@ -45,17 +45,11 @@
 @*  None
 @*
 @*******************************************************************************
-@*/
+@*
 
-@/* All the functions here are replicated from ih264_intra_pred_filters.c
+@* All the functions here are replicated from ih264_intra_pred_filters.c
 @
 
-@/**
-@/**
-@/**
-@
-
-
 .text
 .p2align 2
 
@@ -64,7 +58,7 @@
 scratch_intrapred_addr_8x8:
     .long ih264_gai1_intrapred_luma_8x8_horz_u -  scrlb8x8l2 - 8
 
-@/**
+@**
 @*******************************************************************************
 @*
 @*ih264_intra_pred_luma_8x8_mode_ref_filtering
@@ -95,7 +89,7 @@
 @* @remarks
 @*  None
 @*
-@*******************************************************************************/
+@*******************************************************************************
 @void ih264_intra_pred_luma_8x8_mode_ref_filtering(UWORD8 *pu1_src,
 @                                                 UWORD8 *pu1_dst)
 
@@ -111,7 +105,6 @@
     stmfd         sp!, {r4-r12, r14}    @store register values to stack
     vpush         {d8-d15}
 
-
     vld1.u8       {q0}, [r0]!           @
     vld1.u8       {q1}, [r0]
     add           r0, r0, #8            @
@@ -141,6 +134,7 @@
 
 
 end_func_ref_filt:
+
     vpop          {d8-d15}
     ldmfd         sp!, {r4-r12, pc}     @Restoring registers from stack
 
@@ -149,7 +143,7 @@
 
 
 
-@/**
+@**
 @*******************************************************************************
 @*
 @*ih264_intra_pred_luma_8x8_mode_vert
@@ -219,10 +213,10 @@
 
 
 
-@/******************************************************************************
+@******************************************************************************
 
 
-@/**
+@**
 @*******************************************************************************
 @*
 @*ih264_intra_pred_luma_8x8_mode_horz
@@ -254,7 +248,7 @@
 @*  None
 @*
 @*******************************************************************************
-@*/
+@*
 @void ih264_intra_pred_luma_8x8_mode_horz(UWORD8 *pu1_src,
 @                                         UWORD8 *pu1_dst,
 @                                         WORD32 src_strd,
@@ -299,10 +293,10 @@
 
 
 
-@/******************************************************************************
+@******************************************************************************
 
 
-@/**
+@**
 @*******************************************************************************
 @*
 @*ih264_intra_pred_luma_8x8_mode_dc
@@ -333,7 +327,7 @@
 @* @remarks
 @*  None
 @*
-@*******************************************************************************/
+@*******************************************************************************
 @void ih264_intra_pred_luma_8x8_mode_dc(UWORD8 *pu1_src,
 @                                       UWORD8 *pu1_dst,
 @                                       WORD32 src_strd,
@@ -413,7 +407,7 @@
 
 
 
-@/**
+@**
 @*******************************************************************************
 @*
 @*ih264_intra_pred_luma_8x8_mode_diag_dl
@@ -444,7 +438,7 @@
 @* @remarks
 @*  None
 @*
-@*******************************************************************************/
+@*******************************************************************************
 @void ih264_intra_pred_luma_8x8_mode_diag_dl(UWORD8 *pu1_src,
 @                                            UWORD8 *pu1_dst,
 @                                            WORD32 src_strd,
@@ -506,7 +500,7 @@
 
 
 
-@/**
+@**
 @*******************************************************************************
 @*
 @*ih264_intra_pred_luma_8x8_mode_diag_dr
@@ -537,7 +531,7 @@
 @* @remarks
 @*  None
 @*
-@*******************************************************************************/
+@*******************************************************************************
 @void ih264_intra_pred_luma_8x8_mode_diag_dr(UWORD8 *pu1_src,
 @                                            UWORD8 *pu1_dst,
 @                                            WORD32 src_strd,
@@ -597,7 +591,7 @@
 
 
 
-@/**
+@**
 @*******************************************************************************
 @*
 @*ih264_intra_pred_luma_8x8_mode_vert_r
@@ -628,7 +622,7 @@
 @* @remarks
 @*  None
 @*
-@*******************************************************************************/
+@*******************************************************************************
 @void ih264_intra_pred_luma_8x8_mode_vert_r(UWORD8 *pu1_src,
 @                                            UWORD8 *pu1_dst,
 @                                            WORD32 src_strd,
@@ -717,7 +711,7 @@
 
 
 
-@/**
+@**
 @*******************************************************************************
 @*
 @*ih264_intra_pred_luma_8x8_mode_horz_d
@@ -748,7 +742,7 @@
 @* @remarks
 @*  None
 @*
-@*******************************************************************************/
+@*******************************************************************************
 @void ih264_intra_pred_luma_8x8_mode_horz_d(UWORD8 *pu1_src,
 @                                            UWORD8 *pu1_dst,
 @                                            WORD32 src_strd,
@@ -791,7 +785,7 @@
     vmov.8        q4, q2
     vmov.8        q5, q3
     sub           r6, r3, #6
-    vtrn.8        q4, q5 @
+    vtrn.8        q4, q5                @
     vmov.8        q6, q4
     vmov.8        q7, q5
     sub           r5, r3, #4
@@ -835,7 +829,7 @@
 
 
 
-@/**
+@**
 @*******************************************************************************
 @*
 @*ih264_intra_pred_luma_8x8_mode_vert_l
@@ -866,7 +860,7 @@
 @* @remarks
 @*  None
 @*
-@*******************************************************************************/
+@*******************************************************************************
 @void ih264_intra_pred_luma_8x8_mode_vert_l(UWORD8 *pu1_src,
 @                                            UWORD8 *pu1_dst,
 @                                            WORD32 src_strd,
@@ -887,6 +881,7 @@
 
     stmfd         sp!, {r4-r12, r14}    @Restoring registers from stack
     vpush         {d8-d15}
+
     add           r0, r0, #9
     vld1.u8       {q0}, [r0]
     add           r0, r0, #1
@@ -935,7 +930,7 @@
 
 
 
-@/**
+@**
 @*******************************************************************************
 @*
 @*ih264_intra_pred_luma_8x8_mode_horz_u
@@ -966,7 +961,7 @@
 @* @remarks
 @*  None
 @*
-@*******************************************************************************/
+@*******************************************************************************
 @void ih264_intra_pred_luma_8x8_mode_horz_u(UWORD8 *pu1_src,
 @                                           UWORD8 *pu1_dst,
 @                                           WORD32 src_strd,
diff --git a/common/arm/ih264_iquant_itrans_recon_a9.s b/common/arm/ih264_iquant_itrans_recon_a9.s
index f71ca69..4e49f6a 100644
--- a/common/arm/ih264_iquant_itrans_recon_a9.s
+++ b/common/arm/ih264_iquant_itrans_recon_a9.s
@@ -17,7 +17,7 @@
 @ *****************************************************************************
 @ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
 @*/
-@/**
+@**
 @ *******************************************************************************
 @ * @file
 @ *  ih264_iquant_itrans_recon_a9.s
@@ -38,8 +38,8 @@
 @ *  None
 @ *
 @ *******************************************************************************
-@*/
-@/**
+@*
+@**
 @ *******************************************************************************
 @ *
 @ * @brief
@@ -82,7 +82,7 @@
 @ *  None
 @ *
 @ *******************************************************************************
-@ */
+@ *
 @void ih264_iquant_itrans_recon_4x4(WORD16 *pi2_src,
 @                                   UWORD8 *pu1_pred,
 @                                   UWORD8 *pu1_out,
@@ -225,7 +225,7 @@
     ldmfd         sp!, {r4-r12, r15}    @Reload the registers from SP
 
 
-    @/**
+@**
 @ *******************************************************************************
 @ *
 @ * @brief
@@ -268,7 +268,7 @@
 @ *  None
 @ *
 @ *******************************************************************************
-@ */
+@ *
 @void ih264_iquant_itrans_recon_chroma_4x4(WORD16 *pi2_src,
 @                                   UWORD8 *pu1_pred,
 @                                   UWORD8 *pu1_out,
@@ -416,7 +416,7 @@
     ldmfd         sp!, {r4-r12, r15}    @Reload the registers from SP
 
 
-@/*
+@*
 @ *******************************************************************************
 @ *
 @ * @brief
@@ -459,7 +459,7 @@
 @ *  None
 @ *
 @ *******************************************************************************
-@ */
+@ *
 @void ih264_iquant_itrans_recon_8x8(WORD16 *pi2_src,
 @                                   UWORD8 *pu1_pred,
 @                                   UWORD8 *pu1_out,
diff --git a/common/arm/ih264_iquant_itrans_recon_dc_a9.s b/common/arm/ih264_iquant_itrans_recon_dc_a9.s
index 8d71bdb..97c4724 100644
--- a/common/arm/ih264_iquant_itrans_recon_dc_a9.s
+++ b/common/arm/ih264_iquant_itrans_recon_dc_a9.s
@@ -17,7 +17,7 @@
 @ *****************************************************************************
 @ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
 @*/
-@/**
+@**
 @ *******************************************************************************
 @ * @file
 @ *  ih264_iquant_itrans_recon_dc_a9.s
@@ -37,8 +37,8 @@
 @ *  None
 @ *
 @ *******************************************************************************
-@*/
-@/**
+@*
+@**
 @ *******************************************************************************
 @ *
 @ * @brief
@@ -83,7 +83,7 @@
 @ *  None
 @ *
 @ *******************************************************************************
-@ */
+@ *
 @void ih264_iquant_itrans_recon_4x4_dc(WORD16 *pi2_src,
 @                                   UWORD8 *pu1_pred,
 @                                   UWORD8 *pu1_out,
@@ -167,7 +167,7 @@
 
 
 
-@/*
+@*
 @ *******************************************************************************
 @ *
 @ * @brief
@@ -212,7 +212,7 @@
 @ *  None
 @ *
 @ *******************************************************************************
-@ */
+@ *
 @void ih264_iquant_itrans_recon_8x8_dc(WORD16 *pi2_src,
 @                                   UWORD8 *pu1_pred,
 @                                   UWORD8 *pu1_out,
@@ -300,7 +300,7 @@
     ldmfd         sp!, {r4-r8, r15}
 
 
-@ /*
+@ *
 @ ********************************************************************************
 @ *
 @ * @brief This function reconstructs a 4x4 sub block from quantized resiude and
@@ -328,7 +328,7 @@
 @ * @remarks none
 @ *
 @ *******************************************************************************
-@ */
+@ *
 @ void ih264_iquant_itrans_recon_chroma_4x4_dc(WORD16 *pi2_src,
 @                                             UWORD8 *pu1_pred,
 @                                             UWORD8 *pu1_out,
@@ -368,6 +368,7 @@
 
     vmov.u16      q15, #0x00ff
 
+
     vld1.u8       d18, [r2], r0         @load out [8 bit size) -8 coeffs
     vaddw.u8      q1, q0, d2            @Add pred
     vld1.u8       d19, [r2], r0
diff --git a/common/arm/ih264_itrans_recon_a9.s b/common/arm/ih264_itrans_recon_a9.s
index 1d74da5..769d5d7 100644
--- a/common/arm/ih264_itrans_recon_a9.s
+++ b/common/arm/ih264_itrans_recon_a9.s
@@ -17,7 +17,7 @@
 @ *****************************************************************************
 @ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
 @*/
-@/**
+@**
 @ *******************************************************************************
 @ * @file
 @ *  ih264_itrans_recon_neon_a9.s
@@ -33,8 +33,8 @@
 @ *  None
 @ *
 @ *******************************************************************************
-@*/
-@/**
+@*
+@**
 @ *******************************************************************************
 @ *
 @ * @brief
@@ -72,7 +72,7 @@
 @ *
 @ *
 @ *******************************************************************************
-@ */
+@ *
 @void ih264_itrans_recon_4x4(
 @       WORD16 *pi2_src,
 @       UWORD8 *pu1_pred,
diff --git a/common/arm/ih264_mem_fns_neon.s b/common/arm/ih264_mem_fns_neon.s
index 2808897..39ad9b3 100644
--- a/common/arm/ih264_mem_fns_neon.s
+++ b/common/arm/ih264_mem_fns_neon.s
@@ -17,7 +17,7 @@
 @ *****************************************************************************
 @ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
 @*/
-@/**
+@**
 @ *******************************************************************************
 @ * @file
 @ *  ih264_mem_fns_neon.s
@@ -40,9 +40,9 @@
 @ *  None
 @ *
 @ *******************************************************************************
-@*/
+@*
 
-@/**
+@**
 @*******************************************************************************
 @*
 @* @brief
@@ -65,7 +65,7 @@
 @*  None
 @*
 @*******************************************************************************
-@*/
+@*
 @void ih264_memcpy_mul_8(UWORD8 *pu1_dst,
 @                    UWORD8 *pu1_src,
 @                   UWORD8 num_bytes)
@@ -94,7 +94,7 @@
 
 
 @*******************************************************************************
-@*/
+@*
 @void ih264_memcpy(UWORD8 *pu1_dst,
 @                  UWORD8 *pu1_src,
 @                  UWORD8 num_bytes)
@@ -143,6 +143,8 @@
 
 
 
+
+
     .global ih264_memset_mul_8_a9q
 
 ih264_memset_mul_8_a9q:
@@ -208,6 +210,8 @@
 
 
 
+
+
     .global ih264_memset_16bit_mul_8_a9q
 
 ih264_memset_16bit_mul_8_a9q:
diff --git a/common/arm/ih264_padding_neon.s b/common/arm/ih264_padding_neon.s
index 9bab268..e7a1f91 100644
--- a/common/arm/ih264_padding_neon.s
+++ b/common/arm/ih264_padding_neon.s
@@ -17,7 +17,7 @@
 @ *****************************************************************************
 @ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
 @*/
-@/**
+@*
 @ *******************************************************************************
 @ * @file
 @ *  ih264_padding_neon.s
@@ -39,10 +39,10 @@
 @ *  None
 @ *
 @ *******************************************************************************
-@*/
+@*
 
 
-@/**
+@**
 @*******************************************************************************
 @*
 @* @brief pad at the top of a 2d array
@@ -67,7 +67,7 @@
 @* @remarks none
 @*
 @*******************************************************************************
-@*/
+@*
 @void ih264_pad_top(UWORD8 *pu1_src,
 @                   WORD32 src_strd,
 @                   WORD32 wd,
@@ -110,7 +110,7 @@
 
 
 
-@/**
+@**
 @*******************************************************************************
 @*
 @* @brief
@@ -147,7 +147,7 @@
 @*  None
 @*
 @*******************************************************************************
-@*/
+@*
 @#if PAD_LEFT_LUMA == C
 @void ih264_pad_left_luma(UWORD8 *pu1_src,
 @                        WORD32 src_strd,
@@ -160,6 +160,7 @@
 @   r3 => pad_size
 
 
+
     .global ih264_pad_left_luma_a9q
 
 ih264_pad_left_luma_a9q:
@@ -245,7 +246,7 @@
 
 
 
-@/**
+@**
 @*******************************************************************************
 @*
 @* @brief
@@ -282,7 +283,7 @@
 @*  None
 @*
 @*******************************************************************************
-@*/
+@*
 @#if PAD_LEFT_CHROMA == C
 @void ih264_pad_left_chroma(UWORD8 *pu1_src,
 @                            WORD32 src_strd,
@@ -373,7 +374,7 @@
 
 
 
-@/**
+@**
 @*******************************************************************************
 @*
 @* @brief
@@ -410,7 +411,7 @@
 @*  None
 @*
 @*******************************************************************************
-@*/
+@*
 @#if PAD_RIGHT_LUMA == C
 @void ih264_pad_right_luma(UWORD8 *pu1_src,
 @                        WORD32 src_strd,
@@ -519,7 +520,7 @@
 
 
 
-@/**
+@**
 @*******************************************************************************
 @*
 @* @brief
@@ -556,7 +557,7 @@
 @*  None
 @*
 @*******************************************************************************
-@*/
+@*
 @#if PAD_RIGHT_CHROMA == C
 @void ih264_pad_right_chroma(UWORD8 *pu1_src,
 @                        WORD32 src_strd,
diff --git a/common/arm/ih264_resi_trans_a9.s b/common/arm/ih264_resi_trans_a9.s
deleted file mode 100644
index 08821f5..0000000
--- a/common/arm/ih264_resi_trans_a9.s
+++ /dev/null
@@ -1,604 +0,0 @@
-@/******************************************************************************
-@ *
-@ * Copyright (C) 2015 The Android Open Source Project
-@ *
-@ * Licensed under the Apache License, Version 2.0 (the "License");
-@ * you may not use this file except in compliance with the License.
-@ * You may obtain a copy of the License at:
-@ *
-@ * http://www.apache.org/licenses/LICENSE-2.0
-@ *
-@ * Unless required by applicable law or agreed to in writing, software
-@ * distributed under the License is distributed on an "AS IS" BASIS,
-@ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-@ * See the License for the specific language governing permissions and
-@ * limitations under the License.
-@ *
-@ *****************************************************************************
-@ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
-@*/
-@/**
-@*******************************************************************************
-@* @file
-@*  ih264_resi_trans_a9.s
-@*
-@* @brief
-@*  Contains function definitions for residual and forward trans
-@*
-@* @author
-@*  Ittiam
-@*
-@* @par List of Functions:
-@*  ih264_resi_trans_4x4_a9
-@*  ih264_resi_trans_8x8_a9
-@* @remarks
-@*  None
-@*
-@*******************************************************************************
-
-
-.text
-.p2align 2
-@*****************************************************************************
-@*
-@* Function Name     : ih264_resi_trans_4x4_a9
-@* Description       : This function does cf4 of H264 followed by and approximate scaling
-@*
-@* Arguments         :
-@                       R0 :pointer to src buffer
-@                       R1 :pointer to pred buffer
-@                       R2 :pointer to dst buffer
-@                       R3 :src_stride
-@                       STACk :pred_stride,dst_stride
-
-@* Values Returned   : NONE
-@*
-@* Register Usage    :
-@* Stack Usage       :
-@* Cycles            : Around
-@* Interruptiaility  : Interruptable
-@*
-@* Known Limitations
-@*   \Assumptions    :
-@*
-@* Revision History  :
-@*         DD MM YYYY    Author(s)   Changes
-@*         30 12 2009    100633      First version
-@*
-@*****************************************************************************
-
-
-    .global ih264_resi_trans_4x4_a9
-    .extern g_scal_coff_h264_4x4
-g_scal_coff_h264_4x4_addr:
-    .long g_scal_coff_h264_4x4 - 4x4lbl - 8
-
-ih264_resi_trans_4x4_a9:
-
-    @R0 :pointer to src buffer
-    @R1 :pointer to pred buffer
-    @R2 :pointer to dst buffer
-    @R3 :src_stride
-    @STACk :pred_stride,dst_stride
-
-    push          {r4-r12, lr}          @push all the variables first
-
-    mov           r6, sp
-    add           r6, r6, #40           @decrement stack pointer,to accomodate two variables
-    ldmfd         r6, {r4-r5}           @load the strides into registers
-                                        @R4 pred_stride
-                                        @R5 dst_stride
-
-
-    @we have to give the stride as post inrement in VLDR1
-    @but since thr stride is from end of row 1 to start of row 2,
-    @we need to add the size of the curent row to strides ie we need to add 4 to it (4 bytes)
-    @ADD R3,#4
-    @ADD R4,#4
-    @ADD R5,#4
-    @in case of dst the stride represnts 16 bit ie 2*8bits
-    @hence we need to add #4 to it and thenm multiply by 2
-    @--------------------function loading done------------------------
-
-    @lets find residual
-    @data is like 1a -> d0[1:31]  d0[32:64]
-    @                    a b c d   # # # #
-    vld1.u8       d30, [r0], r3         @load 4 pixels of row1 current buffer
-    vld1.u8       d31, [r1], r4         @load 4 pixels of row1 pred buffer
-    @ data is like 1a -> q4[1:63]  q4[64:148]
-    @                    d8[1:63]  d9[1:63]
-    @                    a b c d   # # # #
-
-    vld1.u8       d28, [r0], r3         @load row 2 of src to d28[0]
-    vld1.u8       d29, [r1], r4         @load row2 of pred to d29[0]
-
-    vld1.u8       d26, [r0], r3         @load row 3 of src to d26[0]
-    vsubl.u8      q0, d30, d31          @curr - pred for row one
-
-    vld1.u8       d27, [r1], r4         @load row 3of pred t0 d27[0]
-    vsubl.u8      q1, d28, d29          @find row 2 of src -pred to d0
-
-    vld1.u8       d24, [r0], r3         @load row 4 of src to d24[0]
-
-    vld1.u8       d25, [r1], r4         @load row 4 of src tp d25[0]
-    vsubl.u8      q2, d26, d27          @load src-pred row 3 to d[2]
-
-    lsl           r5, r5, #2            @ multiply dst stride by since we are storing 32 bit values
-    ldr           r6, g_scal_coff_h264_4x4_addr
-4x4lbl:
-    add           r6, r6, pc            @  load the address of global array
-
-    vsubl.u8      q3, d24, d25          @load row 4 of src - pred to q6
-
-    @after this
-    @D0  -> 1a
-    @D2 -> 2a
-    @D4 -> 3a
-    @D6 -> 4a
-
-    @transpose the matrix so that we can do the horizontal transform first
-    @#1 #2  #3  #4
-    @a  b   c   d       ---- D0
-    @e  f   g   h       -----D2
-    @i  j   k   l       -----D4
-    @m  n   o   p       -----D6
-    @transpose the inner 2x2 blocks
-    vtrn.16       d0, d2
-    vld1.s16      {q10}, [r6]!          @   load the scaling values 0-7;
-    vtrn.16       d4, d6
-    @a  e   c   g
-    @b  f   d   h
-    @i  m   k   o
-    @j  n   l   p
-    vtrn.32       d0, d4
-    vtrn.32       d2, d6
-    @a  e   i   m  #1  -- D0 --- x4
-    @b  f   j   n  #2  -- D2 --- x5
-    @c  g   k   o  #3  -- D4 ----x6
-    @d  h   l   p  #4  -- D6 ----x7
-
-    @we have loaded the residuals into the registers , now we need to add and subtract them
-    @let us do the horiz transform first
-
-    vsub.s16      d5, d2, d4            @x2 = x5-x6
-    vsub.s16      d7, d0, d6            @x3 = x4-x7;
-
-    vadd.s16      d3, d2, d4            @x1 = x5+x6
-    vadd.s16      d1, d0, d6            @x0 = x4+x7
-
-
-    vshl.s16      d31, d7, #1           @
-    vshl.s16      d30, d5, #1           @
-
-    vadd.s16      d0, d1, d3            @x0 + x1;
-    vsub.s16      d4, d1, d3            @x0 - x1;
-
-    vadd.s16      d2, d31, d5           @U_SHIFT(x3,1,shft) + x2;
-    vsub.s16      d6, d7, d30           @x3 - U_SHIFT(x2,1,shft);
-
-    @taking transform again so as to make do vert transform
-    vtrn.16       d0, d2
-    vtrn.16       d4, d6
-
-    vtrn.32       d0, d4
-    vtrn.32       d2, d6
-
-    @let us do vertical transform
-    @same code as horiz
-
-    vadd.s16      d1, d0, d6            @x0 = x4+x7
-    vadd.s16      d3, d2, d4            @x1 = x5+x6
-    vsub.s16      d7, d0, d6            @x3 = x4-x7;
-    vsub.s16      d5, d2, d4            @x2 = x5-x6
-
-
-@Since we are going to do scal / quant or whatever, we are going to divide by
-@a 32 bit number. So we have to expand the values
-
-    @VADDL.S16 Q12,D1,D3;x0 + x1
-    @VSUBL.S16 Q14,D1,D3;x0 - x1
-
-    @VSHL.S16  D8,D5,#1;
-    @VSHL.S16  D9,D7,#1;
-
-    @VADDL.S16 Q13,D9,D5 ; + x2
-    @VSUBL.S16 Q15,D7,D8 ;x3 - U_SHIFT(x2,1,shft)
-
-@scaling follows
-
-@now we need to do the scaling,so load the scaling matrix
-@mutliplying by the scaling coeffient; store the results from q5-q8 ;
-
-    vadd.s16      d24, d3, d1           @x4 = x0 + x1
-    vsub.s16      d28, d1, d3           @x6 = x0 - x1
-
-    vshl.s16      d0, d7, #1            @ U_SHIFT(x3,1,shft)
-    vmull.s16     q4, d24, d20          @x4*s0
-
-    vshl.s16      d2, d5, #1            @ U_SHIFT(x2,1,shft)
-
-    vadd.s16      d26, d0, d5           @x5 = U_SHIFT(x3,1,shft) + x2
-    vmull.s16     q5, d26, d21          @x5*s1
-
-    vst1.s32      {q4}, [r2], r5        @save 4 pixels of row1 current buffer and increment pointer by stride
-
-    vld1.s16      {q10}, [r6]           @load 8-16 scaling coeffcients
-
-    vsub.s16      d30, d7, d2           @x7 = x3 - U_SHIFT(x2,1,shft)
-
-    vmull.s16     q6, d28, d20          @x6*s2
-    vst1.s32      {q5}, [r2], r5
-
-    vmull.s16     q7, d30, d21          @x7*s3
-
-
-    vst1.s32      {q6}, [r2], r5
-    vst1.s32      {q7}, [r2]
-
-    pop           {r4-r12, pc}          @pop back all variables
-
-
-
-
-@*****************************************************************************
-@* Function Name     : ih264_resi_trans_8x8_a9
-@* Description       : This function does cf8 followd by an approximate normalization of H264
-@*
-@* Arguments         :
-@*                      R0 :pointer to src buffer
-@                       R1 :pointer to pred buffer
-@                       R2 :pointer to dst buffer
-@                       R3 :src_stride
-@                       STACk :pred_stride,dst_st
-@*
-@*
-@* Values Returned   : NONE
-@*
-@* Register Usage    :
-@* Stack Usage       :
-@* Cycles            : Around
-@* Interruptiaility  : Interruptable
-@*
-@* Known Limitations
-@*   \Assumptions    :
-@*
-@* Revision History  :
-@*         DD MM YYYY    Author(s)   Changes
-@*         30 12 2009    100633      First version
-@*
-@*****************************************************************************
-
-
-    .global ih264_resi_trans_8x8_a9
-    .extern g_scal_coff_h264_8x8
-g_scal_coff_h264_8x8_addr:
-    .long g_scal_coff_h264_8x8 - 8x8lbl - 8
-
-
-ih264_resi_trans_8x8_a9:
-
-    @R0 :pointer to src buffer
-    @R1 :pointer to pred buffer
-    @R2 :pointer to dst buffer
-    @R3 :src_stride
-    @STACk :pred_stride,dst_stride
-
-    push          {r4-r12, lr}          @push all the variables first
-
-    mov           r6, sp
-    add           r6, r6, #40           @decrement stack pointer,to accomodate two variables
-    ldmfd         r6, {r4-r5}           @load the strides into registers
-                                        @R4 pred_stride
-                                        @R5 dst_stride
-
-    @we have to give the stride as post inrement in vst1
-    @in case of dst the stride represnts 16 bit ie 2*8bits
-    @hence we need to add #4 to it and thenm multiply by 2
-    @--------------------function loading done------------------------
-
-    @lets find residual
-    @data is like 1a -> d0[1:31]  d0[32:64]
-    @                    a b c d   # # # #
-    vld1.u8       d30, [r0], r3         @load 4 pixels of row1 current buffer
-    vld1.u8       d31, [r1], r4         @load 4 pixels of row1 pred buffer
-
-    vld1.u8       d28, [r0], r3         @src  rw2
-    vld1.u8       d29, [r1], r4         @pred rw2
-    vsubl.u8      q0, d30, d31          @src-pred rw1
-
-    vld1.u8       d26, [r0], r3
-    vld1.u8       d27, [r1], r4
-    vsubl.u8      q1, d28, d29
-
-    vld1.u8       d24, [r0], r3
-    vld1.u8       d25, [r1], r4
-    vsubl.u8      q2, d26, d27
-
-    vld1.u8       d22, [r0], r3
-    vld1.u8       d23, [r1], r4
-    vsubl.u8      q3, d24, d25
-
-    vld1.u8       d20, [r0], r3
-    vld1.u8       d21, [r1], r4
-    vsubl.u8      q4, d22, d23
-
-    vld1.u8       d18, [r0], r3
-    vld1.u8       d19, [r1], r4
-    vsubl.u8      q5, d20, d21
-
-    vld1.u8       d16, [r0], r3
-    vld1.u8       d17, [r1], r4
-    vsubl.u8      q6, d18, d19
-
-    lsl           r5, r5, #2
-
-
-    vsubl.u8      q7, d16, d17
-
-    @after this
-    @Q0 -> 1a
-    @Q1 -> 2a
-    @Q2 -> 3a
-    @Q3 -> 4a
-    @Q4 -> 5a
-    @Q5 -> 6a
-    @Q6 -> 7a
-    @Q7 -> 8a
-
-    @transpose the matrix so that we can do the horizontal transform first
-
-    @transpose the inner 2x2 blocks
-    vtrn.16       q0, q1
-    vtrn.16       q2, q3
-    vtrn.16       q4, q5
-    vtrn.16       q6, q7
-
-    @transpose the inner 4x4 blocks
-    vtrn.32       q0, q2
-    vtrn.32       q1, q3
-
-    vtrn.32       q4, q6
-    vtrn.32       q5, q7
-
-    @transpose the outer 8x8 blocks
-    vswp          d1, d8
-    vswp          d7, d14
-    vswp          d3, d10
-    vswp          d5, d12
-    @transpose done
-
-@@this point we will have data in Q0-Q7
-@Q7 will be populated within 2 clock cycle
-@all others are availabe @ this clock cycle
-
-    @we have loaded the residuals into the registers , now we need to add and subtract them
-    @let us do the horiz transform first
-
-    vadd.s16      q8, q0, q7            @      a0 = r0 + r7;
-    vadd.s16      q9, q1, q6            @      a1 = r1 + r6;
-    vadd.s16      q10, q2, q5           @     a2 = r2 + r5;
-    vadd.s16      q11, q3, q4           @     a3 = r3 + r4;
-
-    vsub.s16      q12, q0, q7           @     b0 = r0 - r7;
-    vsub.s16      q13, q1, q6           @     b1 = r1 - r6;
-    vsub.s16      q15, q3, q4           @     b3 = r3 - r4;
-    vsub.s16      q14, q2, q5           @     b2 = r2 - r5;
-
-    vadd.s16      q1, q8, q11           @     a4 = a0 + a3;
-    vadd.s16      q3, q9, q10           @     a5 = a1 + a2;
-    vsub.s16      q7, q9, q10           @     a7 = a1 - a2;
-    vsub.s16      q5, q8, q11           @     a6 = a0 - a3;
-
-    ldr           r6, g_scal_coff_h264_8x8_addr
-8x8lbl:
-    add           r6, r6, pc            @  load the address of global array
-
-    vadd.s16      q0, q1, q3            @      pi2_res[0] = a4 + a5;
-    vshr.s16      q8, q7, #1            @      pi2_res[2] = a6 + D_SHIFT(a7,1,shft);
-
-    vsub.s16      q4, q1, q3            @      pi2_res[4] = a4 - a5;
-
-    vadd.s16      q2, q5, q8            @
-
-
-    vshr.s16      q9, q5, #1            @      pi2_res[6] = D_SHIFT(a6,1,shft) - a7;
-    vsub.s16      q6, q9, q7            @
-
-@do not change Q0,Q2.Q4,Q6 they contain results
-@Q1,Q3,Q5,Q7 TO STORE RESULTS
-@Q8 Q9 Q10 Q11 USE @WILL
-
-    vshr.s16      q1, q12, #1           @     D_SHIFT(b0,1,shft)
-    vshr.s16      q3, q13, #1           @     D_SHIFT(b1,1,shft)
-    vshr.s16      q5, q14, #1           @     D_SHIFT(b2,1,shft)
-    vshr.s16      q7, q15, #1           @     D_SHIFT(b3,1,shft)
-
-    vadd.s16      q8, q1, q12           @     (D_SHIFT(b0,1,shft) + b0);
-    vadd.s16      q9, q3, q13           @     (D_SHIFT(b1,1,shft) + b1);
-    vadd.s16      q10, q5, q14          @    (D_SHIFT(b2,1,shft) + b2);
-    vadd.s16      q11, q7, q15          @    (D_SHIFT(b3,1,shft) + b3);
-
-    vadd.s16      q1, q14, q8           @     b2 + (D_SHIFT(b0,1,shft) + b0);
-    vsub.s16      q5, q15, q9           @     b3 - (D_SHIFT(b1,1,shft) + b1);
-    vadd.s16      q3, q15, q10          @    b3 + (D_SHIFT(b2,1,shft) + b2);
-    vsub.s16      q7, q11, q14          @    -b2 + (D_SHIFT(b3,1,shft) + b3);
-
-    vadd.s16      q8, q13, q1           @     b4 = b1 + b2 + (D_SHIFT(b0,1,shft) + b0);
-    vsub.s16      q9, q12, q3           @     b5 = b0 - b3 - (D_SHIFT(b2,1,shft) + b2);
-    vadd.s16      q10, q12, q5          @    b6 = b0 + b3 - (D_SHIFT(b1,1,shft) + b1);
-    vadd.s16      q11, q13, q7          @    b7 = b1 - b2 + (D_SHIFT(b3,1,shft) + b3);
-
-    vshr.s16      q15, q8, #2           @     D_SHIFT(b4,2,shft)
-    vshr.s16      q14, q9, #2           @     D_SHIFT(b5,2,shft);
-    vshr.s16      q13, q10, #2          @    D_SHIFT(b6,2,shft);
-    vshr.s16      q12, q11, #2          @    D_SHIFT(b7,2,shft);
-
-
-    vadd.s16      q3, q9, q13           @     pi2_res[3] = b5 + D_SHIFT(b6,2,shft);
-    vsub.s16      q5, q10, q14          @    pi2_res[5] = b6 - D_SHIFT(b5,2,shft);
-    vadd.s16      q1, q8, q12           @     pi2_res[1] = b4 + D_SHIFT(b7,2,shft);
-    vsub.s16      q7, q15, q11          @    pi2_res[7] = D_SHIFT(b4,2,shft) - b7;
-
-    @------------horiz transform done-------------------------
-    @results are in Q0-Q7
-    @all other neon registes can be used at will
-
-@doing vertical transform
-@code exact copy of horiz transform above
-
-    @transpose the inner 2x2 blocks
-    vtrn.16       q0, q1
-    vtrn.16       q2, q3
-    vtrn.16       q4, q5
-    vtrn.16       q6, q7
-
-    @transpose the inner 4x4 blocks
-    vtrn.32       q0, q2
-    vtrn.32       q1, q3
-
-    vtrn.32       q4, q6
-    vtrn.32       q5, q7
-
-    @transpose the outer 8x8 blocks
-    vswp          d1, d8
-    vswp          d3, d10
-    vswp          d5, d12
-    vswp          d7, d14
-
-    @transpose done
-
-    vadd.s16      q8, q0, q7            @      a0 = r0 + r7;
-    vadd.s16      q9, q1, q6            @      a1 = r1 + r6;
-    vadd.s16      q10, q2, q5           @     a2 = r2 + r5;
-    vadd.s16      q11, q3, q4           @     a3 = r3 + r4;
-
-    vsub.s16      q12, q0, q7           @     b0 = r0 - r7;
-    vsub.s16      q13, q1, q6           @     b1 = r1 - r6;
-    vsub.s16      q14, q2, q5           @     b2 = r2 - r5;
-    vsub.s16      q15, q3, q4           @     b3 = r3 - r4;
-
-    vadd.s16      q1, q8, q11           @     a4 = a0 + a3;
-    vadd.s16      q3, q9, q10           @     a5 = a1 + a2;
-    vsub.s16      q5, q8, q11           @     a6 = a0 - a3;
-    vsub.s16      q7, q9, q10           @     a7 = a1 - a2;
-
-
-    vadd.s16      q0, q1, q3            @      pi2_res[0] = a4 + a5;
-
-    vshr.s16      q8, q7, #1            @      pi2_res[2] = a6 + D_SHIFT(a7,1,shft);
-    @DSHIFT_TO_0 Q8,Q7,#1,#0
-    vadd.s16      q2, q5, q8            @
-
-    vsub.s16      q4, q1, q3            @      pi2_res[4] = a4 - a5;
-
-    vshr.s16      q9, q5, #1            @      pi2_res[6] = D_SHIFT(a6,1,shft) - a7;
-    vsub.s16      q6, q9, q7            @
-
-@do not change Q0,Q2.Q4,Q6 they contain results
-@Q1,Q3,Q5,Q7 TO STORE RESULTS
-@Q8 Q9 Q10 Q11 USE @WILL
-
-    vshr.s16      q1, q12, #1           @     D_SHIFT(b0,1,shft)
-    vshr.s16      q3, q13, #1           @     D_SHIFT(b1,1,shft)
-    vshr.s16      q5, q14, #1           @     D_SHIFT(b2,1,shft)
-    vshr.s16      q7, q15, #1           @     D_SHIFT(b3,1,shft)
-
-
-    vadd.s16      q8, q1, q12           @     (D_SHIFT(b0,1,shft) + b0);
-    vadd.s16      q9, q3, q13           @     (D_SHIFT(b1,1,shft) + b1);
-    vadd.s16      q10, q5, q14          @    (D_SHIFT(b2,1,shft) + b2);
-    vadd.s16      q11, q7, q15          @    (D_SHIFT(b3,1,shft) + b3);
-
-    vadd.s16      q1, q14, q8           @     b2 + (D_SHIFT(b0,1,shft) + b0);
-    vadd.s16      q3, q15, q10          @    b3 + (D_SHIFT(b2,1,shft) + b2);
-    vsub.s16      q5, q15, q9           @     b3 - (D_SHIFT(b1,1,shft) + b1);
-    vsub.s16      q7, q11, q14          @    -b2 + (D_SHIFT(b3,1,shft) + b3);
-
-    vadd.s16      q8, q13, q1           @     b4 = b1 + b2 + (D_SHIFT(b0,1,shft) + b0);
-    vsub.s16      q9, q12, q3           @     b5 = b0 - b3 - (D_SHIFT(b2,1,shft) + b2);
-    vadd.s16      q10, q12, q5          @    b6 = b0 + b3 - (D_SHIFT(b1,1,shft) + b1);
-    vadd.s16      q11, q13, q7          @    b7 = b1 - b2 + (D_SHIFT(b3,1,shft) + b3);
-
-    vshr.s16      q15, q8, #2           @     D_SHIFT(b4,2,shft)
-    vshr.s16      q14, q9, #2           @     D_SHIFT(b5,2,shft);
-    vshr.s16      q13, q10, #2          @    D_SHIFT(b6,2,shft);
-    vshr.s16      q12, q11, #2          @    D_SHIFT(b7,2,shft);
-
-
-@since we are going to scal by small values, we need not expand the guys to 32 bit bit values
-    vsub.s16      q5, q10, q14          @    pi2_res[5] = b6 - D_SHIFT(b5,2,shft);
-    vsub.s16      q7, q15, q11          @    pi2_res[7] = D_SHIFT(b4,2,shft) - b7;
-    vadd.s16      q3, q9, q13           @     pi2_res[3] = b5 + D_SHIFT(b6,2,shft);
-    vadd.s16      q1, q8, q12           @     pi2_res[1] = b4 + D_SHIFT(b7,2,shft);
-
-    @------------vert transform done-------------------------
-    @results are in Q0-Q7
-    @all other neon registes can be used at will
-
-    @scaling
-    @since the 8x8 scaling matrix repeats in 1x4,1x4 block ,
-    @we need only load 4 values for each row and in total 4 rows
-    vld1.s16      {q14-q15}, [r6]       @
-
-    @since we need to get a 32 bit o/p for two 16 bit multiplications
-    @we need a VMULL instruction
-@-----------------------------first and second row
-
-    vmull.s16     q8, d0, d28           @scale the first row first 4 elem
-    vmull.s16     q9, d28, d1           @scale the second row last 4 elemts
-
-    vmull.s16     q10, d2, d29          @ scale second row first 4 elem
-    vmull.s16     q11, d29, d3          @scale the second row last 4 elem
-    vmull.s16     q12, d4, d30          @scale third row first  4 elem
-
-    vst1.s32      {q8, q9}, [r2], r5    @ write the first row complete
-
-    vmull.s16     q13, d30, d5          @scale the third row last 4 elem
-    vmull.s16     q8, d6, d31           @scale the fourth row first 4 elem
-
-
-    vst1.s32      {q10, q11}, [r2], r5  @store the second row complete
-
-@------------------------------- 3rd and 4th row
-
-    vmull.s16     q9, d31, d7           @scale the fourth row second column
-
-    vst1.s32      {q12, q13}, [r2], r5  @store the third row complete
-
-    vmull.s16     q10, d8, d28          @scale the 5th row fisrst 4 elms
-    vmull.s16     q11, d28, d9          @scale the 5th row second 4 elems
-
-    vmull.s16     q12, d10, d29         @scale the 6th row first4 elements
-
-
-    vst1.s32      {q8, q9}, [r2], r5    @store fifth row
-
-@--------------------------------5th and 6th row
-
-    vmull.s16     q13, d29, d11         @scale 6th row sendond 4 elems
-
-    vmull.s16     q8, d12, d30          @scale 7th rw first 4 elms
-
-    vst1.s32      {q10, q11}, [r2], r5  @store 6th row second 4 elements
-
-    vmull.s16     q9, d30, d13          @scale 7th rw second 4 elms
-    vmull.s16     q10, d14, d31         @scale 8th rw forst 4 elms
-
-
-    vst1.s32      {q12, q13}, [r2], r5  @store 6th row
-
-@----------------------------------7th and 8th row
-    vmull.s16     q11, d31, d15         @scale 8th row second 4 elms
-
-    vst1.s32      {q8, q9}, [r2], r5    @store 7th row
-    vst1.s32      {q10, q11}, [r2], r5  @store 8th row
-
-@----------------------------------done writing
-
-    pop           {r4-r12, pc}          @pop back all variables
-
-
-
-
-
-
diff --git a/common/arm/ih264_resi_trans_quant_a9.s b/common/arm/ih264_resi_trans_quant_a9.s
index caf362e..bb836bd 100644
--- a/common/arm/ih264_resi_trans_quant_a9.s
+++ b/common/arm/ih264_resi_trans_quant_a9.s
@@ -17,7 +17,7 @@
 @ *****************************************************************************
 @ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
 @*/
-@/**
+@**
 @*******************************************************************************
 @* @file
 @*  ih264_resi_trans_quant_a9.s
diff --git a/common/arm/ih264_weighted_bi_pred_a9q.s b/common/arm/ih264_weighted_bi_pred_a9q.s
index ccae779..33859e6 100644
--- a/common/arm/ih264_weighted_bi_pred_a9q.s
+++ b/common/arm/ih264_weighted_bi_pred_a9q.s
@@ -17,7 +17,7 @@
 @ *****************************************************************************
 @ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
 @*/
-@/**
+@**
 @******************************************************************************
 @* @file
 @*  ih264_weighted_bi_pred_a9q.s
@@ -37,7 +37,7 @@
 @*  None
 @*
 @*******************************************************************************
-@*/
+@*
 @*******************************************************************************
 @* @function
 @*  ih264_weighted_bi_pred_luma_a9q()
@@ -96,7 +96,7 @@
 @*  (ht,wd) can be (4,4), (4,8), (8,4), (8,8), (8,16), (16,8) or (16,16).
 @*
 @*******************************************************************************
-@*/
+@*
 @void ih264_weighted_bi_pred_luma_a9q(UWORD8 *pu1_src1,
 @                                     UWORD8 *pu1_src2,
 @                                     UWORD8 *pu1_dst,
@@ -411,7 +411,7 @@
 @*  (ht,wd) can be (2,2), (2,4), (4,2), (4,4), (4,8), (8,4) or (8,8).
 @*
 @*******************************************************************************
-@*/
+@*
 @void ih264_weighted_bi_pred_chroma_a9q(UWORD8 *pu1_src1,
 @                                       UWORD8 *pu1_src2,
 @                                       UWORD8 *pu1_dst,
diff --git a/common/arm/ih264_weighted_pred_a9q.s b/common/arm/ih264_weighted_pred_a9q.s
index 1ce94d0..81d26d4 100644
--- a/common/arm/ih264_weighted_pred_a9q.s
+++ b/common/arm/ih264_weighted_pred_a9q.s
@@ -17,7 +17,7 @@
 @ *****************************************************************************
 @ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
 @*/
-@/**
+@**
 @******************************************************************************
 @* @file
 @*  ih264_weighted_pred_a9q.s
@@ -37,7 +37,7 @@
 @*  None
 @*
 @*******************************************************************************
-@*/
+@*
 @*******************************************************************************
 @* @function
 @*  ih264_weighted_pred_luma_a9q()
@@ -84,7 +84,7 @@
 @*  (ht,wd) can be (4,4), (4,8), (8,4), (8,8), (8,16), (16,8) or (16,16).
 @*
 @*******************************************************************************
-@*/
+@*
 @void ih264_weighted_pred_luma_a9q(UWORD8 *pu1_src,
 @                                  UWORD8 *pu1_dst,
 @                                  WORD32 src_strd,
@@ -314,7 +314,7 @@
 @*  (ht,wd) can be (2,2), (2,4), (4,2), (4,4), (4,8), (8,4) or (8,8).
 @*
 @*******************************************************************************
-@*/
+@*
 @void ih264_weighted_pred_chroma_a9q(UWORD8 *pu1_src,
 @                                    UWORD8 *pu1_dst,
 @                                    WORD32 src_strd,