blob: 9f5bfa97a15ff889a118c004daafc11b3a9c001d [file] [log] [blame]
Hamsalekha S8d3d3032015-03-13 21:24:58 +05301@/******************************************************************************
2@ *
3@ * Copyright (C) 2015 The Android Open Source Project
4@ *
5@ * Licensed under the Apache License, Version 2.0 (the "License");
6@ * you may not use this file except in compliance with the License.
7@ * You may obtain a copy of the License at:
8@ *
9@ * http://www.apache.org/licenses/LICENSE-2.0
10@ *
11@ * Unless required by applicable law or agreed to in writing, software
12@ * distributed under the License is distributed on an "AS IS" BASIS,
13@ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14@ * See the License for the specific language governing permissions and
15@ * limitations under the License.
16@ *
17@ *****************************************************************************
18@ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
19@*/
Hamsalekha S8d3d3032015-03-13 21:24:58 +053020
21@/**
22@******************************************************************************
23@*
24@* @brief :Evaluate best intra 16x16 mode (among VERT, HORZ and DC )
25@* and do the prediction.
26@*
27@* @par Description
28@* This function evaluates first three 16x16 modes and compute corresponding sad
29@* and return the buffer predicted with best mode.
30@*
31@* @param[in] pu1_src
32@* UWORD8 pointer to the source
33@*
34@** @param[in] pu1_ngbr_pels_i16
35@* UWORD8 pointer to neighbouring pels
36@*
37@* @param[out] pu1_dst
38@* UWORD8 pointer to the destination
39@*
40@* @param[in] src_strd
41@* integer source stride
42@*
43@* @param[in] dst_strd
44@* integer destination stride
45@*
46@* @param[in] u4_n_avblty
47@* availability of neighbouring pixels
48@*
49@* @param[in] u4_intra_mode
50@* Pointer to the variable in which best mode is returned
51@*
52@* @param[in] pu4_sadmin
53@* Pointer to the variable in which minimum sad is returned
54@*
55@* @param[in] u4_valid_intra_modes
56@* Says what all modes are valid
57@*
58@*
59@* @return none
60@*
61@******************************************************************************
62@*/
63@
64@void ih264e_evaluate_intra16x16_modes(UWORD8 *pu1_src,
65@ UWORD8 *pu1_ngbr_pels_i16,
66@ UWORD8 *pu1_dst,
67@ UWORD32 src_strd,
68@ UWORD32 dst_strd,
69@ WORD32 u4_n_avblty,
70@ UWORD32 *u4_intra_mode,
71@ WORD32 *pu4_sadmin,
72@ UWORD32 u4_valid_intra_modes)
73@
74.text
75.p2align 2
76
77 .global ih264e_evaluate_intra16x16_modes_a9q
78
79ih264e_evaluate_intra16x16_modes_a9q:
80
81@r0 = pu1_src,
82@r1 = pu1_ngbr_pels_i16,
83@r2 = pu1_dst,
84@r3 = src_strd,
85@r4 = dst_strd,
86@r5 = u4_n_avblty,
87@r6 = u4_intra_mode,
88@r7 = pu4_sadmin
89
90
91
92 stmfd sp!, {r4-r12, r14} @store register values to stack
93 ldr r5, [sp, #44]
94
95
96 vpush {d8-d15}
97 vld1.32 {q4}, [r1]!
98 sub r6, r1, #1
99 add r1, r1, #1
100 mov r10, #0
101 vld1.32 {q5}, [r1]!
102 mov r11, #0
103 mov r4, #0
Martin Storsjo50738a62015-06-11 11:42:36 +0300104 @/* Left available ???? */
Hamsalekha S8d3d3032015-03-13 21:24:58 +0530105 ands r7, r5, #01
106 movne r10, #1
107
Martin Storsjo50738a62015-06-11 11:42:36 +0300108 @/* Top available ???? */
Hamsalekha S8d3d3032015-03-13 21:24:58 +0530109 ands r8, r5, #04
110 lsl r9, r10, #3
111 movne r11, #1
112 lsl r12, r11, #3
113 adds r8, r9, r12
114
115
Martin Storsjo50738a62015-06-11 11:42:36 +0300116 @/* None available :( */
Hamsalekha S8d3d3032015-03-13 21:24:58 +0530117 moveq r4, #128
118
119
120
121@/fINDING dc val*/
122 @----------------------
123 vaddl.u8 q15, d8, d9
124
125 vaddl.u8 q14, d10, d11
126
127 vadd.u16 q15, q14, q15
128 @ VLD1.32 {q2},[r0],r3;row 2
129 vadd.u16 d30, d31, d30
130 vpadd.u16 d30, d30
131 @ VLD1.32 {q3},[r0],r3 ;row 3
132 vpadd.u16 d30, d30
133 @---------------------
134
135
136 vmov.u16 r7, d30[0]
137 add r7, r7, r8
138 add r11, r11, #3
139 add r8, r10, r11
140
141 lsr r7, r8
142 add r7, r4, r7
143 vld1.32 {q0}, [r0], r3 @ source r0w 0
144 vdup.8 q15, r7 @dc val
145
146@/* computing SADs for all three modes*/
147 ldrb r7, [r6]
148 vdup.8 q10, r7 @/HORIZONTAL VALUE ROW=0;
149 @/vertical row 0;
150 vabdl.u8 q8, d0, d10
151 vabdl.u8 q9, d1, d11
152 sub r6, r6, #1
153 @/HORZ row 0;
154 vabdl.u8 q13, d0, d20
155 vabdl.u8 q14, d1, d21
156 mov r1, #15
157 @/dc row 0;
158 vabdl.u8 q11, d0, d30
159 vabdl.u8 q12, d1, d31
160
161
162loop:
163 vld1.32 {q1}, [r0], r3 @row i
164 @/dc row i;
165 vabal.u8 q11, d2, d30
166 ldrb r7, [r6]
167 vabal.u8 q12, d3, d31
168
169 @/vertical row i;
170 vabal.u8 q8, d2, d10
171 vdup.8 q10, r7 @/HORIZONTAL VALUE ROW=i;
172 sub r6, r6, #1
173 vabal.u8 q9, d3, d11
174
175 subs r1, r1, #1
176 @/HORZ row i;
177 vabal.u8 q13, d2, d20
178 vabal.u8 q14, d3, d21
179 bne loop
180
181 @------------------------------------------------------------------------------
182
183 vadd.i16 q9, q9, q8 @/VERT
184 vadd.i16 d18, d19, d18 @/VERT
185 vpaddl.u16 d18, d18 @/VERT
186 vadd.i16 q14, q13, q14 @/HORZ
187 vadd.i16 d28, d29, d28 @/HORZ
188 vpaddl.u32 d18, d18 @/VERT
189 vpaddl.u16 d28, d28 @/HORZ
190
191 vpaddl.u32 d28, d28 @/HORZ
192 vmov.u32 r8, d18[0] @ vert
193 vadd.i16 q12, q11, q12 @/DC
194 vmov.u32 r9, d28[0] @horz
195 mov r11, #1
196 vadd.i16 d24, d24, d25 @/DC
197 lsl r11 , #30
198
199 @-----------------------
200 ldr r0, [sp, #120] @ u4_valid_intra_modes
201 @--------------------------------------------
202 ands r7, r0, #01 @ vert mode valid????????????
203 moveq r8, r11
204 vpaddl.u16 d24, d24 @/DC
205
206 ands r6, r0, #02 @ horz mode valid????????????
207 moveq r9, r11
208 vpaddl.u32 d24, d24 @/DC
209
210 vmov.u32 r10, d24[0] @dc
211@--------------------------------
212 ldr r4, [sp, #104] @r4 = dst_strd,
213 ldr r7, [sp, #116] @r7 = pu4_sadmin
214@----------------------------------------------
215 ands r6, r0, #04 @ dc mode valid????????????
216 moveq r10, r11
217
218 @---------------------------
219 ldr r6, [sp, #112] @ R6 =MODE
220 @--------------------------
221
222 cmp r8, r9
223 bgt not_vert
224 cmp r8, r10
225 bgt do_dc
226
227 @/----------------------
228 @DO VERTICAL PREDICTION
229 str r8 , [r7] @MIN SAD
230 mov r8, #0
231 str r8 , [r6] @ MODE
232 vmov q15, q5
233
234 b do_dc_vert
235 @-----------------------------
236not_vert:
237 cmp r9, r10
238 bgt do_dc
239
240 @/----------------------
241 @DO HORIZONTAL
242 vdup.8 q5, d9[7] @0
243 str r9 , [r7] @MIN SAD
244 vdup.8 q6, d9[6] @1
245 mov r9, #1
246 vdup.8 q7, d9[5] @2
247 vst1.32 {d10, d11} , [r2], r4 @0
248 vdup.8 q8, d9[4] @3
249 str r9 , [r6] @ MODE
250 vdup.8 q9, d9[3] @4
251 vst1.32 {d12, d13} , [r2], r4 @1
252 vdup.8 q10, d9[2] @5
253 vst1.32 {d14, d15} , [r2], r4 @2
254 vdup.8 q11, d9[1] @6
255 vst1.32 {d16, d17} , [r2], r4 @3
256 vdup.8 q12, d9[0] @7
257 vst1.32 {d18, d19} , [r2], r4 @4
258 vdup.8 q13, d8[7] @8
259 vst1.32 {d20, d21} , [r2], r4 @5
260 vdup.8 q14, d8[6] @9
261 vst1.32 {d22, d23} , [r2], r4 @6
262 vdup.8 q15, d8[5] @10
263 vst1.32 {d24, d25} , [r2], r4 @7
264 vdup.8 q1, d8[4] @11
265 vst1.32 {d26, d27} , [r2], r4 @8
266 vdup.8 q2, d8[3] @12
267 vst1.32 {d28, d29} , [r2], r4 @9
268 vdup.8 q3, d8[2] @13
269 vst1.32 {d30, d31}, [r2], r4 @10
270 vdup.8 q5, d8[1] @14
271 vst1.32 {d2, d3} , [r2], r4 @11
272 vdup.8 q6, d8[0] @15
273 vst1.32 {d4, d5} , [r2], r4 @12
274
275 vst1.32 {d6, d7} , [r2], r4 @13
276
277 vst1.32 {d10, d11} , [r2], r4 @14
278
279 vst1.32 {d12, d13} , [r2], r4 @15
280 b end_func
281
282
283 @/-----------------------------
284
285do_dc: @/---------------------------------
286 @DO DC
287 str r10 , [r7] @MIN SAD
288 mov r10, #2
289 str r10 , [r6] @ MODE
290do_dc_vert:
291 vst1.32 {d30, d31}, [r2], r4 @0
292 vst1.32 {d30, d31}, [r2], r4 @1
293 vst1.32 {d30, d31}, [r2], r4 @2
294 vst1.32 {d30, d31}, [r2], r4 @3
295 vst1.32 {d30, d31}, [r2], r4 @4
296 vst1.32 {d30, d31}, [r2], r4 @5
297 vst1.32 {d30, d31}, [r2], r4 @6
298 vst1.32 {d30, d31}, [r2], r4 @7
299 vst1.32 {d30, d31}, [r2], r4 @8
300 vst1.32 {d30, d31}, [r2], r4 @9
301 vst1.32 {d30, d31}, [r2], r4 @10
302 vst1.32 {d30, d31}, [r2], r4 @11
303 vst1.32 {d30, d31}, [r2], r4 @12
304 vst1.32 {d30, d31}, [r2], r4 @13
305 vst1.32 {d30, d31}, [r2], r4 @14
306 vst1.32 {d30, d31}, [r2], r4 @15
307 @/------------------
308end_func:
309 vpop {d8-d15}
310 ldmfd sp!, {r4-r12, pc} @Restoring registers from stack
311
312