blob: cf7999b958481a0a8045c028239c087bb65e9ed7 [file] [log] [blame]
Harish Mahendrakar0d8951c2014-05-16 10:31:13 -07001@/*****************************************************************************
2@*
3@* Copyright (C) 2012 Ittiam Systems Pvt Ltd, Bangalore
4@*
5@* Licensed under the Apache License, Version 2.0 (the "License");
6@* you may not use this file except in compliance with the License.
7@* You may obtain a copy of the License at:
8@*
9@* http://www.apache.org/licenses/LICENSE-2.0
10@*
11@* Unless required by applicable law or agreed to in writing, software
12@* distributed under the License is distributed on an "AS IS" BASIS,
13@* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14@* See the License for the specific language governing permissions and
15@* limitations under the License.
16@*
17@*****************************************************************************/
18@/**
19@*******************************************************************************
20@* @file
21@* ihevc_intra_pred_luma_mode2_neon.s
22@*
23@* @brief
24@* contains function definitions for intra prediction dc filtering.
25@* functions are coded using neon intrinsics and can be compiled using
26
27@* rvct
28@*
29@* @author
30@* yogeswaran rs
31@*
32@* @par list of functions:
33@*
34@*
35@* @remarks
36@* none
37@*
38@*******************************************************************************
39@*/
40@/**
41@*******************************************************************************
42@*
43@* @brief
44@* luma intraprediction filter for dc input
45@*
46@* @par description:
47@*
48@* @param[in] pu1_ref
49@* uword8 pointer to the source
50@*
51@* @param[out] pu1_dst
52@* uword8 pointer to the destination
53@*
54@* @param[in] src_strd
55@* integer source stride
56@*
57@* @param[in] dst_strd
58@* integer destination stride
59@*
60@* @param[in] pi1_coeff
61@* word8 pointer to the planar coefficients
62@*
63@* @param[in] nt
64@* size of tranform block
65@*
66@* @param[in] mode
67@* type of filtering
68@*
69@* @returns
70@*
71@* @remarks
72@* none
73@*
74@*******************************************************************************
75@*/
76
77@void ihevc_intra_pred_luma_mode2(uword8 *pu1_ref,
78@ word32 src_strd,
79@ uword8 *pu1_dst,
80@ word32 dst_strd,
81@ word32 nt,
82@ word32 mode)
83@
84@**************variables vs registers*****************************************
85@r0 => *pu1_ref
86@r1 => src_strd
87@r2 => *pu1_dst
88@r3 => dst_strd
89
90@stack contents from #40
91@ nt
92@ mode
93@ pi1_coeff
94
95.text
96.align 4
97
98
99
100
101.globl ihevc_intra_pred_luma_mode2_a9q
102
103.type ihevc_intra_pred_luma_mode2_a9q, %function
104
105ihevc_intra_pred_luma_mode2_a9q:
106
107 stmfd sp!, {r4-r12, r14} @stack stores the values of the arguments
108
109 ldr r4,[sp,#40] @loads nt
110 mov r8,#-2
111
112 cmp r4,#4
113 beq mode2_4
114
115 add r0,r0,r4,lsl #1
116
117 sub r0,r0,#9 @src[1]
118 add r10,r0,#-1
119
120prologue_cpy_32:
121
122 vld1.8 {d0},[r0],r8
123 mov r11,r4
124
125 vld1.8 {d1},[r10],r8
126 mov r6, r2
127
128 vld1.8 {d2},[r0],r8
129 vld1.8 {d3},[r10],r8
130 lsr r1, r4, #3
131
132 vld1.8 {d4},[r0],r8
133 vld1.8 {d5},[r10],r8
134 vld1.8 {d6},[r0],r8
135 mul r1, r4, r1
136
137 vld1.8 {d7},[r10],r8
138 add r7,r6,r3
139
140 vrev64.8 d8,d0
141 vrev64.8 d9,d1
142 lsl r5, r3, #2
143
144 vrev64.8 d10,d2
145 vrev64.8 d11,d3
146 add r9,r7,r3
147
148 vrev64.8 d12,d4
149 subs r1,r1,#8
150
151 vrev64.8 d13,d5
152 vrev64.8 d14,d6
153 vrev64.8 d15,d7
154 add r14,r9,r3
155
156 beq epilogue_mode2
157
158 sub r12,r4,#8
159
160kernel_mode2:
161
162 vst1.8 {d8},[r6],r5
163 vst1.8 {d9},[r7],r5
164 subs r11,r11,#8
165
166 vst1.8 {d10},[r9],r5
167 addgt r2,r2,#8
168
169 vst1.8 {d11},[r14],r5
170 vst1.8 {d12},[r6],r5
171 movle r11,r4
172
173 vst1.8 {d13},[r7],r5
174 vst1.8 {d14},[r9],r5
175 addle r2, r2, r3, lsl #2
176
177 vst1.8 {d15},[r14],r5
178 vld1.8 {d0},[r0],r8
179 sub r14,r4,#8
180
181 vld1.8 {d1},[r10],r8
182 vld1.8 {d2},[r0],r8
183 addle r2, r2, #8
184
185 vld1.8 {d3},[r10],r8
186 vld1.8 {d4},[r0],r8
187 suble r2, r6, r14
188
189 vld1.8 {d5},[r10],r8
190 subs r12,r12,#8
191
192 vld1.8 {d6},[r0],r8
193 mov r6, r2
194
195 vld1.8 {d7},[r10],r8
196 addle r0, r0, r4
197
198 vrev64.8 d8,d0
199 add r7, r6, r3
200
201 vrev64.8 d9,d1
202 suble r0, r0, #8
203
204 vrev64.8 d10,d2
205 movle r12,r4
206
207 vrev64.8 d11,d3
208 add r9, r7, r3
209
210 vrev64.8 d12,d4
211 add r10,r0,#-1
212
213 vrev64.8 d13,d5
214 subs r1, r1, #8
215
216 vrev64.8 d14,d6
217 add r14, r9, r3
218
219 vrev64.8 d15,d7
220
221 bne kernel_mode2
222
223epilogue_mode2:
224
225 vst1.8 {d8},[r6],r5
226 vst1.8 {d9},[r7],r5
227 vst1.8 {d10},[r9],r5
228 vst1.8 {d11},[r14],r5
229 vst1.8 {d12},[r6],r5
230 vst1.8 {d13},[r7],r5
231 vst1.8 {d14},[r9],r5
232 vst1.8 {d15},[r14],r5
233
234 b end_func
235
236mode2_4:
237
238 mov r8,#-2
239 sub r0,r0,#1
240 add r10,r0,#-1
241
242 vld1.8 {d0},[r0],r8
243 add r5,r2,r3
244 vld1.8 {d2},[r10],r8
245 add r6,r5,r3
246 vld1.8 {d4},[r0]
247 add r7,r6,r3
248 vld1.8 {d6},[r10]
249
250 vrev64.8 d1,d0
251 vrev64.8 d3,d2
252
253
254
255 vst1.32 {d1[0]},[r2]
256 vrev64.8 d5,d4
257 vst1.32 {d3[0]},[r5]
258 vrev64.8 d7,d6
259 vst1.32 {d5[0]},[r6]
260 vst1.32 {d7[0]},[r7]
261
262end_func:
263 ldmfd sp!,{r4-r12,r15} @reload the registers from sp
264
265
266
267
268
269
270