Resolved warnings and fixed alignment of few assemblies

Resolved warnings seen in x86 modules
Fixed alignment of few modules
Updated comments in few arm modules for consistency
Fixed warnings seen in clang build

Change-Id: I0623169b5e84a6a6f09c3d2212e754101272f5e9
diff --git a/common/arm/ih264_inter_pred_luma_horz_qpel_a9q.s b/common/arm/ih264_inter_pred_luma_horz_qpel_a9q.s
index c39ae01..ab1d1d1 100644
--- a/common/arm/ih264_inter_pred_luma_horz_qpel_a9q.s
+++ b/common/arm/ih264_inter_pred_luma_horz_qpel_a9q.s
@@ -17,7 +17,7 @@
 @ *****************************************************************************
 @ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
 @*/
-@/**
+@**
 @******************************************************************************
 @* @file
 @*  ih264_inter_pred_luma_horz_qpel_a9q.s
@@ -30,19 +30,19 @@
 @*
 @* @par List of Functions:
 @*
-@*  - ih264_inter_pred_luma_horz_qpe_a9ql()
+@*  - ih264_inter_pred_luma_horz_qpel_a9q()
 @*
 @* @remarks
 @*  None
 @*
 @*******************************************************************************
-@*/
+@*
 
-@/* All the functions here are replicated from ih264_inter_pred_filters.c
+@* All the functions here are replicated from ih264_inter_pred_filters.c
 @
 
-@/**
-@/**
+@**
+@**
 @*******************************************************************************
 @*
 @* @brief
@@ -79,7 +79,7 @@
 @*  None
 @*
 @*******************************************************************************
-@*/
+@*
 
 @void ih264_inter_pred_luma_horz (
 @                            UWORD8 *pu1_src,
@@ -126,7 +126,7 @@
     beq           loop_4
 
 loop_16:                                @when  wd=16
-    @// Processing row0 and row1
+    @ Processing row0 and row1
     vld1.8        {d2, d3, d4}, [r0], r2 @// Load row0
     vext.8        d31, d2, d3, #5       @//extract a[5]                         (column1,row0)
     vld1.8        {d5, d6, d7}, [r0], r2 @// Load row1
@@ -187,7 +187,7 @@
     b             loop_16
 
 loop_8:
-@// Processing row0 and row1
+@ Processing row0 and row1
 
     vld1.8        {d5, d6}, [r0], r2    @// Load row1
     vext.8        d28, d5, d6, #5       @//extract a[5]                         (column1,row1)
@@ -221,7 +221,7 @@
     subs          r5, r5, #2            @ 2 rows done, decrement by 2
 
     beq           end_func              @ Branch if height==4
-    b             loop_8 @looping if height == 8 or 16
+    b             loop_8                @looping if height == 8 or 16
 
 loop_4:
     vld1.8        {d5, d6}, [r0], r2    @// Load row1