blob: c84307f5e4520672836fe4e0c0fed8dc83628391 [file] [log] [blame]
andrew@webrtc.orga7b57da2012-10-22 18:19:23 +00001@
2@ Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
3@
4@ Use of this source code is governed by a BSD-style license
5@ that can be found in the LICENSE file in the root of the source
6@ tree. An additional intellectual property rights grant can be found
7@ in the file PATENTS. All contributing project authors may
8@ be found in the AUTHORS file in the root of the source tree.
9@
10
11@ This file contains some minimum and maximum functions, optimized for
12@ ARM Neon platform. The description header can be found in
13@ signal_processing_library.h
14@
15@ The reference C code is in file min_max_operations.c. Code here is basically
16@ a loop unrolling by 8 with Neon instructions. Bit-exact.
17
kma@webrtc.org9fc62502012-11-17 00:22:46 +000018#include "webrtc/system_wrappers/interface/asm_defines.h"
19
20GLOBAL_FUNCTION WebRtcSpl_MaxAbsValueW16Neon
21GLOBAL_FUNCTION WebRtcSpl_MaxAbsValueW32Neon
22GLOBAL_FUNCTION WebRtcSpl_MaxValueW16Neon
23GLOBAL_FUNCTION WebRtcSpl_MaxValueW32Neon
24GLOBAL_FUNCTION WebRtcSpl_MinValueW16Neon
25GLOBAL_FUNCTION WebRtcSpl_MinValueW32Neon
26
andrew@webrtc.orga7b57da2012-10-22 18:19:23 +000027.align 2
andrew@webrtc.orga7b57da2012-10-22 18:19:23 +000028@ int16_t WebRtcSpl_MaxAbsValueW16Neon(const int16_t* vector, int length);
kma@webrtc.org9fc62502012-11-17 00:22:46 +000029DEFINE_FUNCTION WebRtcSpl_MaxAbsValueW16Neon
andrew@webrtc.orga7b57da2012-10-22 18:19:23 +000030 mov r2, #-1 @ Initialize the return value.
31 cmp r0, #0
32 beq END_MAX_ABS_VALUE_W16
33 cmp r1, #0
34 ble END_MAX_ABS_VALUE_W16
35
36 cmp r1, #8
37 blt LOOP_MAX_ABS_VALUE_W16
38
39 vmov.i16 q12, #0
40 sub r1, #8 @ Counter for loops
41
42LOOP_UNROLLED_BY_8_MAX_ABS_VALUE_W16:
43 vld1.16 {q13}, [r0]!
44 subs r1, #8
45 vabs.s16 q13, q13 @ Note vabs doesn't change the value of -32768.
46 vmax.u16 q12, q13 @ Use u16 so we don't lose the value -32768.
47 bge LOOP_UNROLLED_BY_8_MAX_ABS_VALUE_W16
48
49 @ Find the maximum value in the Neon registers and move it to r2.
50 vmax.u16 d24, d25
kma@webrtc.org9fc62502012-11-17 00:22:46 +000051 vpmax.u16 d24, d24, d24
52 vpmax.u16 d24, d24, d24
andrew@webrtc.orga7b57da2012-10-22 18:19:23 +000053 adds r1, #8
54 vmov.u16 r2, d24[0]
55 beq END_MAX_ABS_VALUE_W16
56
57LOOP_MAX_ABS_VALUE_W16:
58 ldrsh r3, [r0], #2
59 eor r12, r3, r3, asr #31 @ eor and then sub, to get absolute value.
60 sub r12, r12, r3, asr #31
61 cmp r2, r12
62 movlt r2, r12
63 subs r1, #1
64 bne LOOP_MAX_ABS_VALUE_W16
65
66END_MAX_ABS_VALUE_W16:
67 cmp r2, #0x8000 @ Guard against the case for -32768.
68 subeq r2, #1
69 mov r0, r2
70 bx lr
71
kma@webrtc.org9fc62502012-11-17 00:22:46 +000072
andrew@webrtc.orga7b57da2012-10-22 18:19:23 +000073
74@ int32_t WebRtcSpl_MaxAbsValueW32Neon(const int32_t* vector, int length);
kma@webrtc.org9fc62502012-11-17 00:22:46 +000075DEFINE_FUNCTION WebRtcSpl_MaxAbsValueW32Neon
andrew@webrtc.orga7b57da2012-10-22 18:19:23 +000076 cmp r0, #0
77 moveq r0, #-1
78 beq EXIT @ Return -1 for a NULL pointer.
79 cmp r1, #0 @ length
80 movle r0, #-1
81 ble EXIT @ Return -1 if length <= 0.
82
83 vmov.i32 q11, #0
84 vmov.i32 q12, #0
85 cmp r1, #8
86 blt LOOP_MAX_ABS_VALUE_W32
87
88 sub r1, #8 @ Counter for loops
89
90LOOP_UNROLLED_BY_8_MAX_ABS_VALUE_W32:
91 vld1.32 {q13, q14}, [r0]!
92 subs r1, #8 @ Counter for loops
93 vabs.s32 q13, q13 @ vabs doesn't change the value of 0x80000000.
94 vabs.s32 q14, q14
95 vmax.u32 q11, q13 @ Use u32 so we don't lose the value 0x80000000.
96 vmax.u32 q12, q14
97 bge LOOP_UNROLLED_BY_8_MAX_ABS_VALUE_W32
98
99 @ Find the maximum value in the Neon registers and move it to r2.
100 vmax.u32 q12, q11
101 vmax.u32 d24, d25
kma@webrtc.org9fc62502012-11-17 00:22:46 +0000102 vpmax.u32 d24, d24, d24
andrew@webrtc.orga7b57da2012-10-22 18:19:23 +0000103 adds r1, #8
104 vmov.u32 r2, d24[0]
105 beq END_MAX_ABS_VALUE_W32
106
107LOOP_MAX_ABS_VALUE_W32:
108 ldr r3, [r0], #4
109 eor r12, r3, r3, asr #31 @ eor and then sub, to get absolute value.
110 sub r12, r12, r3, asr #31
111 cmp r2, r12
112 movcc r2, r12
113 subs r1, #1
114 bne LOOP_MAX_ABS_VALUE_W32
115
116END_MAX_ABS_VALUE_W32:
117 mvn r0, #0x80000000 @ Guard against the case for 0x80000000.
118 cmp r2, r0
119 movcc r0, r2
120
121EXIT:
122 bx lr
123
andrew@webrtc.orga7b57da2012-10-22 18:19:23 +0000124@ int16_t WebRtcSpl_MaxValueW16Neon(const int16_t* vector, int length);
kma@webrtc.org9fc62502012-11-17 00:22:46 +0000125DEFINE_FUNCTION WebRtcSpl_MaxValueW16Neon
andrew@webrtc.orga7b57da2012-10-22 18:19:23 +0000126 mov r2, #0x8000 @ Initialize the return value.
127 cmp r0, #0
128 beq END_MAX_VALUE_W16
129 cmp r1, #0
130 ble END_MAX_VALUE_W16
131
132 vmov.i16 q12, #0x8000
133 cmp r1, #8
134 blt LOOP_MAX_VALUE_W16
135
136 sub r1, #8 @ Counter for loops
137
138LOOP_UNROLLED_BY_8_MAX_VALUE_W16:
139 vld1.16 {q13}, [r0]!
140 subs r1, #8
141 vmax.s16 q12, q13
142 bge LOOP_UNROLLED_BY_8_MAX_VALUE_W16
143
144 @ Find the maximum value in the Neon registers and move it to r2.
145 vmax.s16 d24, d25
kma@webrtc.org9fc62502012-11-17 00:22:46 +0000146 vpmax.s16 d24, d24, d24
147 vpmax.s16 d24, d24, d24
andrew@webrtc.orga7b57da2012-10-22 18:19:23 +0000148 adds r1, #8
149 vmov.u16 r2, d24[0]
150 beq END_MAX_VALUE_W16
151
152LOOP_MAX_VALUE_W16:
153 ldrsh r3, [r0], #2
154 cmp r2, r3
155 movlt r2, r3
156 subs r1, #1
157 bne LOOP_MAX_VALUE_W16
158
159END_MAX_VALUE_W16:
160 mov r0, r2
161 bx lr
162
andrew@webrtc.orga7b57da2012-10-22 18:19:23 +0000163@ int32_t WebRtcSpl_MaxValueW32Neon(const int32_t* vector, int length);
kma@webrtc.org9fc62502012-11-17 00:22:46 +0000164DEFINE_FUNCTION WebRtcSpl_MaxValueW32Neon
andrew@webrtc.orga7b57da2012-10-22 18:19:23 +0000165 mov r2, #0x80000000 @ Initialize the return value.
166 cmp r0, #0
167 beq END_MAX_VALUE_W32
168 cmp r1, #0
169 ble END_MAX_VALUE_W32
170
171 vmov.i32 q11, #0x80000000
172 vmov.i32 q12, #0x80000000
173 cmp r1, #8
174 blt LOOP_MAX_VALUE_W32
175
176 sub r1, #8 @ Counter for loops
177
178LOOP_UNROLLED_BY_8_MAX_VALUE_W32:
179 vld1.32 {q13, q14}, [r0]!
180 subs r1, #8
181 vmax.s32 q11, q13
182 vmax.s32 q12, q14
183 bge LOOP_UNROLLED_BY_8_MAX_VALUE_W32
184
185 @ Find the maximum value in the Neon registers and move it to r2.
186 vmax.s32 q12, q11
kma@webrtc.org9fc62502012-11-17 00:22:46 +0000187 vpmax.s32 d24, d24, d25
188 vpmax.s32 d24, d24, d24
andrew@webrtc.orga7b57da2012-10-22 18:19:23 +0000189 adds r1, #8
190 vmov.s32 r2, d24[0]
191 beq END_MAX_VALUE_W32
192
193LOOP_MAX_VALUE_W32:
194 ldr r3, [r0], #4
195 cmp r2, r3
196 movlt r2, r3
197 subs r1, #1
198 bne LOOP_MAX_VALUE_W32
199
200END_MAX_VALUE_W32:
201 mov r0, r2
202 bx lr
203
andrew@webrtc.orga7b57da2012-10-22 18:19:23 +0000204@ int16_t WebRtcSpl_MinValueW16Neon(const int16_t* vector, int length);
kma@webrtc.org9fc62502012-11-17 00:22:46 +0000205DEFINE_FUNCTION WebRtcSpl_MinValueW16Neon
andrew@webrtc.orga7b57da2012-10-22 18:19:23 +0000206 movw r2, #0x7FFF @ Initialize the return value.
207 cmp r0, #0
208 beq END_MIN_VALUE_W16
209 cmp r1, #0
210 ble END_MIN_VALUE_W16
211
212 vmov.i16 q12, #0x7FFF
213 cmp r1, #8
214 blt LOOP_MIN_VALUE_W16
215
216 sub r1, #8 @ Counter for loops
217
218LOOP_UNROLLED_BY_8_MIN_VALUE_W16:
219 vld1.16 {q13}, [r0]!
220 subs r1, #8
221 vmin.s16 q12, q13
222 bge LOOP_UNROLLED_BY_8_MIN_VALUE_W16
223
224 @ Find the maximum value in the Neon registers and move it to r2.
225 vmin.s16 d24, d25
kma@webrtc.org9fc62502012-11-17 00:22:46 +0000226 vpmin.s16 d24, d24, d24
227 vpmin.s16 d24, d24, d24
andrew@webrtc.orga7b57da2012-10-22 18:19:23 +0000228 adds r1, #8
229 vmov.s16 r2, d24[0]
230 sxth r2, r2
231 beq END_MIN_VALUE_W16
232
233LOOP_MIN_VALUE_W16:
234 ldrsh r3, [r0], #2
235 cmp r2, r3
236 movge r2, r3
237 subs r1, #1
238 bne LOOP_MIN_VALUE_W16
239
240END_MIN_VALUE_W16:
241 mov r0, r2
242 bx lr
243
andrew@webrtc.orga7b57da2012-10-22 18:19:23 +0000244@ int32_t WebRtcSpl_MinValueW32Neon(const int32_t* vector, int length);
kma@webrtc.org9fc62502012-11-17 00:22:46 +0000245DEFINE_FUNCTION WebRtcSpl_MinValueW32Neon
andrew@webrtc.orga7b57da2012-10-22 18:19:23 +0000246 mov r2, #0x7FFFFFFF @ Initialize the return value.
247 cmp r0, #0
248 beq END_MIN_VALUE_W32
249 cmp r1, #0
250 ble END_MIN_VALUE_W32
251
252 vdup.32 q11, r2
253 vdup.32 q12, r2
254 cmp r1, #8
255 blt LOOP_MIN_VALUE_W32
256
257 sub r1, #8 @ Counter for loops
258
259LOOP_UNROLLED_BY_8_MIN_VALUE_W32:
260 vld1.32 {q13, q14}, [r0]!
261 subs r1, #8
262 vmin.s32 q11, q13
263 vmin.s32 q12, q14
264 bge LOOP_UNROLLED_BY_8_MIN_VALUE_W32
265
266 @ Find the maximum value in the Neon registers and move it to r2.
267 vmin.s32 q12, q11
kma@webrtc.org9fc62502012-11-17 00:22:46 +0000268 vpmin.s32 d24, d24, d25
269 vpmin.s32 d24, d24, d24
andrew@webrtc.orga7b57da2012-10-22 18:19:23 +0000270 adds r1, #8
271 vmov.s32 r2, d24[0]
272 beq END_MIN_VALUE_W32
273
274LOOP_MIN_VALUE_W32:
275 ldr r3, [r0], #4
276 cmp r2, r3
277 movge r2, r3
278 subs r1, #1
279 bne LOOP_MIN_VALUE_W32
280
281END_MIN_VALUE_W32:
282 mov r0, r2
283 bx lr