Harish Mahendrakar | 0d8951c | 2014-05-16 10:31:13 -0700 | [diff] [blame^] | 1 | @/***************************************************************************** |
| 2 | @* |
| 3 | @* Copyright (C) 2012 Ittiam Systems Pvt Ltd, Bangalore |
| 4 | @* |
| 5 | @* Licensed under the Apache License, Version 2.0 (the "License"); |
| 6 | @* you may not use this file except in compliance with the License. |
| 7 | @* You may obtain a copy of the License at: |
| 8 | @* |
| 9 | @* http://www.apache.org/licenses/LICENSE-2.0 |
| 10 | @* |
| 11 | @* Unless required by applicable law or agreed to in writing, software |
| 12 | @* distributed under the License is distributed on an "AS IS" BASIS, |
| 13 | @* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 14 | @* See the License for the specific language governing permissions and |
| 15 | @* limitations under the License. |
| 16 | @* |
| 17 | @*****************************************************************************/ |
| 18 | @/** |
| 19 | @******************************************************************************* |
| 20 | @* @file |
| 21 | @* ihevc_intra_pred_luma_mode2_neon.s |
| 22 | @* |
| 23 | @* @brief |
| 24 | @* contains function definitions for intra prediction dc filtering. |
| 25 | @* functions are coded using neon intrinsics and can be compiled using |
| 26 | |
| 27 | @* rvct |
| 28 | @* |
| 29 | @* @author |
| 30 | @* yogeswaran rs |
| 31 | @* |
| 32 | @* @par list of functions: |
| 33 | @* |
| 34 | @* |
| 35 | @* @remarks |
| 36 | @* none |
| 37 | @* |
| 38 | @******************************************************************************* |
| 39 | @*/ |
| 40 | @/** |
| 41 | @******************************************************************************* |
| 42 | @* |
| 43 | @* @brief |
| 44 | @* luma intraprediction filter for dc input |
| 45 | @* |
| 46 | @* @par description: |
| 47 | @* |
| 48 | @* @param[in] pu1_ref |
| 49 | @* uword8 pointer to the source |
| 50 | @* |
| 51 | @* @param[out] pu1_dst |
| 52 | @* uword8 pointer to the destination |
| 53 | @* |
| 54 | @* @param[in] src_strd |
| 55 | @* integer source stride |
| 56 | @* |
| 57 | @* @param[in] dst_strd |
| 58 | @* integer destination stride |
| 59 | @* |
| 60 | @* @param[in] pi1_coeff |
| 61 | @* word8 pointer to the planar coefficients |
| 62 | @* |
| 63 | @* @param[in] nt |
| 64 | @* size of tranform block |
| 65 | @* |
| 66 | @* @param[in] mode |
| 67 | @* type of filtering |
| 68 | @* |
| 69 | @* @returns |
| 70 | @* |
| 71 | @* @remarks |
| 72 | @* none |
| 73 | @* |
| 74 | @******************************************************************************* |
| 75 | @*/ |
| 76 | |
| 77 | @void ihevc_intra_pred_luma_mode2(uword8 *pu1_ref, |
| 78 | @ word32 src_strd, |
| 79 | @ uword8 *pu1_dst, |
| 80 | @ word32 dst_strd, |
| 81 | @ word32 nt, |
| 82 | @ word32 mode) |
| 83 | @ |
| 84 | @**************variables vs registers***************************************** |
| 85 | @r0 => *pu1_ref |
| 86 | @r1 => src_strd |
| 87 | @r2 => *pu1_dst |
| 88 | @r3 => dst_strd |
| 89 | |
| 90 | @stack contents from #40 |
| 91 | @ nt |
| 92 | @ mode |
| 93 | @ pi1_coeff |
| 94 | |
| 95 | .text |
| 96 | .align 4 |
| 97 | |
| 98 | |
| 99 | |
| 100 | |
| 101 | .globl ihevc_intra_pred_luma_mode2_a9q |
| 102 | |
| 103 | .type ihevc_intra_pred_luma_mode2_a9q, %function |
| 104 | |
| 105 | ihevc_intra_pred_luma_mode2_a9q: |
| 106 | |
| 107 | stmfd sp!, {r4-r12, r14} @stack stores the values of the arguments |
| 108 | |
| 109 | ldr r4,[sp,#40] @loads nt |
| 110 | mov r8,#-2 |
| 111 | |
| 112 | cmp r4,#4 |
| 113 | beq mode2_4 |
| 114 | |
| 115 | add r0,r0,r4,lsl #1 |
| 116 | |
| 117 | sub r0,r0,#9 @src[1] |
| 118 | add r10,r0,#-1 |
| 119 | |
| 120 | prologue_cpy_32: |
| 121 | |
| 122 | vld1.8 {d0},[r0],r8 |
| 123 | mov r11,r4 |
| 124 | |
| 125 | vld1.8 {d1},[r10],r8 |
| 126 | mov r6, r2 |
| 127 | |
| 128 | vld1.8 {d2},[r0],r8 |
| 129 | vld1.8 {d3},[r10],r8 |
| 130 | lsr r1, r4, #3 |
| 131 | |
| 132 | vld1.8 {d4},[r0],r8 |
| 133 | vld1.8 {d5},[r10],r8 |
| 134 | vld1.8 {d6},[r0],r8 |
| 135 | mul r1, r4, r1 |
| 136 | |
| 137 | vld1.8 {d7},[r10],r8 |
| 138 | add r7,r6,r3 |
| 139 | |
| 140 | vrev64.8 d8,d0 |
| 141 | vrev64.8 d9,d1 |
| 142 | lsl r5, r3, #2 |
| 143 | |
| 144 | vrev64.8 d10,d2 |
| 145 | vrev64.8 d11,d3 |
| 146 | add r9,r7,r3 |
| 147 | |
| 148 | vrev64.8 d12,d4 |
| 149 | subs r1,r1,#8 |
| 150 | |
| 151 | vrev64.8 d13,d5 |
| 152 | vrev64.8 d14,d6 |
| 153 | vrev64.8 d15,d7 |
| 154 | add r14,r9,r3 |
| 155 | |
| 156 | beq epilogue_mode2 |
| 157 | |
| 158 | sub r12,r4,#8 |
| 159 | |
| 160 | kernel_mode2: |
| 161 | |
| 162 | vst1.8 {d8},[r6],r5 |
| 163 | vst1.8 {d9},[r7],r5 |
| 164 | subs r11,r11,#8 |
| 165 | |
| 166 | vst1.8 {d10},[r9],r5 |
| 167 | addgt r2,r2,#8 |
| 168 | |
| 169 | vst1.8 {d11},[r14],r5 |
| 170 | vst1.8 {d12},[r6],r5 |
| 171 | movle r11,r4 |
| 172 | |
| 173 | vst1.8 {d13},[r7],r5 |
| 174 | vst1.8 {d14},[r9],r5 |
| 175 | addle r2, r2, r3, lsl #2 |
| 176 | |
| 177 | vst1.8 {d15},[r14],r5 |
| 178 | vld1.8 {d0},[r0],r8 |
| 179 | sub r14,r4,#8 |
| 180 | |
| 181 | vld1.8 {d1},[r10],r8 |
| 182 | vld1.8 {d2},[r0],r8 |
| 183 | addle r2, r2, #8 |
| 184 | |
| 185 | vld1.8 {d3},[r10],r8 |
| 186 | vld1.8 {d4},[r0],r8 |
| 187 | suble r2, r6, r14 |
| 188 | |
| 189 | vld1.8 {d5},[r10],r8 |
| 190 | subs r12,r12,#8 |
| 191 | |
| 192 | vld1.8 {d6},[r0],r8 |
| 193 | mov r6, r2 |
| 194 | |
| 195 | vld1.8 {d7},[r10],r8 |
| 196 | addle r0, r0, r4 |
| 197 | |
| 198 | vrev64.8 d8,d0 |
| 199 | add r7, r6, r3 |
| 200 | |
| 201 | vrev64.8 d9,d1 |
| 202 | suble r0, r0, #8 |
| 203 | |
| 204 | vrev64.8 d10,d2 |
| 205 | movle r12,r4 |
| 206 | |
| 207 | vrev64.8 d11,d3 |
| 208 | add r9, r7, r3 |
| 209 | |
| 210 | vrev64.8 d12,d4 |
| 211 | add r10,r0,#-1 |
| 212 | |
| 213 | vrev64.8 d13,d5 |
| 214 | subs r1, r1, #8 |
| 215 | |
| 216 | vrev64.8 d14,d6 |
| 217 | add r14, r9, r3 |
| 218 | |
| 219 | vrev64.8 d15,d7 |
| 220 | |
| 221 | bne kernel_mode2 |
| 222 | |
| 223 | epilogue_mode2: |
| 224 | |
| 225 | vst1.8 {d8},[r6],r5 |
| 226 | vst1.8 {d9},[r7],r5 |
| 227 | vst1.8 {d10},[r9],r5 |
| 228 | vst1.8 {d11},[r14],r5 |
| 229 | vst1.8 {d12},[r6],r5 |
| 230 | vst1.8 {d13},[r7],r5 |
| 231 | vst1.8 {d14},[r9],r5 |
| 232 | vst1.8 {d15},[r14],r5 |
| 233 | |
| 234 | b end_func |
| 235 | |
| 236 | mode2_4: |
| 237 | |
| 238 | mov r8,#-2 |
| 239 | sub r0,r0,#1 |
| 240 | add r10,r0,#-1 |
| 241 | |
| 242 | vld1.8 {d0},[r0],r8 |
| 243 | add r5,r2,r3 |
| 244 | vld1.8 {d2},[r10],r8 |
| 245 | add r6,r5,r3 |
| 246 | vld1.8 {d4},[r0] |
| 247 | add r7,r6,r3 |
| 248 | vld1.8 {d6},[r10] |
| 249 | |
| 250 | vrev64.8 d1,d0 |
| 251 | vrev64.8 d3,d2 |
| 252 | |
| 253 | |
| 254 | |
| 255 | vst1.32 {d1[0]},[r2] |
| 256 | vrev64.8 d5,d4 |
| 257 | vst1.32 {d3[0]},[r5] |
| 258 | vrev64.8 d7,d6 |
| 259 | vst1.32 {d5[0]},[r6] |
| 260 | vst1.32 {d7[0]},[r7] |
| 261 | |
| 262 | end_func: |
| 263 | ldmfd sp!,{r4-r12,r15} @reload the registers from sp |
| 264 | |
| 265 | |
| 266 | |
| 267 | |
| 268 | |
| 269 | |
| 270 | |