Blame - common_audio/signal_processing/min_max_operations_neon.S - fp2-dev/platform/external/chromium_org/third_party/webrtc

blob: c84307f5e4520672836fe4e0c0fed8dc83628391 [file] [log] [blame]

andrew@webrtc.org	a7b57da	2012-10-22 18:19:23 +0000	[diff] [blame]	1	@
				2	@ Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
				3	@
				4	@ Use of this source code is governed by a BSD-style license
				5	@ that can be found in the LICENSE file in the root of the source
				6	@ tree. An additional intellectual property rights grant can be found
				7	@ in the file PATENTS. All contributing project authors may
				8	@ be found in the AUTHORS file in the root of the source tree.
				9	@
				10
				11	@ This file contains some minimum and maximum functions, optimized for
				12	@ ARM Neon platform. The description header can be found in
				13	@ signal_processing_library.h
				14	@
				15	@ The reference C code is in file min_max_operations.c. Code here is basically
				16	@ a loop unrolling by 8 with Neon instructions. Bit-exact.
				17
kma@webrtc.org	9fc6250	2012-11-17 00:22:46 +0000	[diff] [blame]	18	#include "webrtc/system_wrappers/interface/asm_defines.h"
				19
				20	GLOBAL_FUNCTION WebRtcSpl_MaxAbsValueW16Neon
				21	GLOBAL_FUNCTION WebRtcSpl_MaxAbsValueW32Neon
				22	GLOBAL_FUNCTION WebRtcSpl_MaxValueW16Neon
				23	GLOBAL_FUNCTION WebRtcSpl_MaxValueW32Neon
				24	GLOBAL_FUNCTION WebRtcSpl_MinValueW16Neon
				25	GLOBAL_FUNCTION WebRtcSpl_MinValueW32Neon
				26
andrew@webrtc.org	a7b57da	2012-10-22 18:19:23 +0000	[diff] [blame]	27	.align 2
andrew@webrtc.org	a7b57da	2012-10-22 18:19:23 +0000	[diff] [blame]	28	@ int16_t WebRtcSpl_MaxAbsValueW16Neon(const int16_t* vector, int length);
kma@webrtc.org	9fc6250	2012-11-17 00:22:46 +0000	[diff] [blame]	29	DEFINE_FUNCTION WebRtcSpl_MaxAbsValueW16Neon
andrew@webrtc.org	a7b57da	2012-10-22 18:19:23 +0000	[diff] [blame]	30	mov r2, #-1 @ Initialize the return value.
				31	cmp r0, #0
				32	beq END_MAX_ABS_VALUE_W16
				33	cmp r1, #0
				34	ble END_MAX_ABS_VALUE_W16
				35
				36	cmp r1, #8
				37	blt LOOP_MAX_ABS_VALUE_W16
				38
				39	vmov.i16 q12, #0
				40	sub r1, #8 @ Counter for loops
				41
				42	LOOP_UNROLLED_BY_8_MAX_ABS_VALUE_W16:
				43	vld1.16 {q13}, [r0]!
				44	subs r1, #8
				45	vabs.s16 q13, q13 @ Note vabs doesn't change the value of -32768.
				46	vmax.u16 q12, q13 @ Use u16 so we don't lose the value -32768.
				47	bge LOOP_UNROLLED_BY_8_MAX_ABS_VALUE_W16
				48
				49	@ Find the maximum value in the Neon registers and move it to r2.
				50	vmax.u16 d24, d25
kma@webrtc.org	9fc6250	2012-11-17 00:22:46 +0000	[diff] [blame]	51	vpmax.u16 d24, d24, d24
				52	vpmax.u16 d24, d24, d24
andrew@webrtc.org	a7b57da	2012-10-22 18:19:23 +0000	[diff] [blame]	53	adds r1, #8
				54	vmov.u16 r2, d24[0]
				55	beq END_MAX_ABS_VALUE_W16
				56
				57	LOOP_MAX_ABS_VALUE_W16:
				58	ldrsh r3, [r0], #2
				59	eor r12, r3, r3, asr #31 @ eor and then sub, to get absolute value.
				60	sub r12, r12, r3, asr #31
				61	cmp r2, r12
				62	movlt r2, r12
				63	subs r1, #1
				64	bne LOOP_MAX_ABS_VALUE_W16
				65
				66	END_MAX_ABS_VALUE_W16:
				67	cmp r2, #0x8000 @ Guard against the case for -32768.
				68	subeq r2, #1
				69	mov r0, r2
				70	bx lr
				71
kma@webrtc.org	9fc6250	2012-11-17 00:22:46 +0000	[diff] [blame]	72
andrew@webrtc.org	a7b57da	2012-10-22 18:19:23 +0000	[diff] [blame]	73
				74	@ int32_t WebRtcSpl_MaxAbsValueW32Neon(const int32_t* vector, int length);
kma@webrtc.org	9fc6250	2012-11-17 00:22:46 +0000	[diff] [blame]	75	DEFINE_FUNCTION WebRtcSpl_MaxAbsValueW32Neon
andrew@webrtc.org	a7b57da	2012-10-22 18:19:23 +0000	[diff] [blame]	76	cmp r0, #0
				77	moveq r0, #-1
				78	beq EXIT @ Return -1 for a NULL pointer.
				79	cmp r1, #0 @ length
				80	movle r0, #-1
				81	ble EXIT @ Return -1 if length <= 0.
				82
				83	vmov.i32 q11, #0
				84	vmov.i32 q12, #0
				85	cmp r1, #8
				86	blt LOOP_MAX_ABS_VALUE_W32
				87
				88	sub r1, #8 @ Counter for loops
				89
				90	LOOP_UNROLLED_BY_8_MAX_ABS_VALUE_W32:
				91	vld1.32 {q13, q14}, [r0]!
				92	subs r1, #8 @ Counter for loops
				93	vabs.s32 q13, q13 @ vabs doesn't change the value of 0x80000000.
				94	vabs.s32 q14, q14
				95	vmax.u32 q11, q13 @ Use u32 so we don't lose the value 0x80000000.
				96	vmax.u32 q12, q14
				97	bge LOOP_UNROLLED_BY_8_MAX_ABS_VALUE_W32
				98
				99	@ Find the maximum value in the Neon registers and move it to r2.
				100	vmax.u32 q12, q11
				101	vmax.u32 d24, d25
kma@webrtc.org	9fc6250	2012-11-17 00:22:46 +0000	[diff] [blame]	102	vpmax.u32 d24, d24, d24
andrew@webrtc.org	a7b57da	2012-10-22 18:19:23 +0000	[diff] [blame]	103	adds r1, #8
				104	vmov.u32 r2, d24[0]
				105	beq END_MAX_ABS_VALUE_W32
				106
				107	LOOP_MAX_ABS_VALUE_W32:
				108	ldr r3, [r0], #4
				109	eor r12, r3, r3, asr #31 @ eor and then sub, to get absolute value.
				110	sub r12, r12, r3, asr #31
				111	cmp r2, r12
				112	movcc r2, r12
				113	subs r1, #1
				114	bne LOOP_MAX_ABS_VALUE_W32
				115
				116	END_MAX_ABS_VALUE_W32:
				117	mvn r0, #0x80000000 @ Guard against the case for 0x80000000.
				118	cmp r2, r0
				119	movcc r0, r2
				120
				121	EXIT:
				122	bx lr
				123
andrew@webrtc.org	a7b57da	2012-10-22 18:19:23 +0000	[diff] [blame]	124	@ int16_t WebRtcSpl_MaxValueW16Neon(const int16_t* vector, int length);
kma@webrtc.org	9fc6250	2012-11-17 00:22:46 +0000	[diff] [blame]	125	DEFINE_FUNCTION WebRtcSpl_MaxValueW16Neon
andrew@webrtc.org	a7b57da	2012-10-22 18:19:23 +0000	[diff] [blame]	126	mov r2, #0x8000 @ Initialize the return value.
				127	cmp r0, #0
				128	beq END_MAX_VALUE_W16
				129	cmp r1, #0
				130	ble END_MAX_VALUE_W16
				131
				132	vmov.i16 q12, #0x8000
				133	cmp r1, #8
				134	blt LOOP_MAX_VALUE_W16
				135
				136	sub r1, #8 @ Counter for loops
				137
				138	LOOP_UNROLLED_BY_8_MAX_VALUE_W16:
				139	vld1.16 {q13}, [r0]!
				140	subs r1, #8
				141	vmax.s16 q12, q13
				142	bge LOOP_UNROLLED_BY_8_MAX_VALUE_W16
				143
				144	@ Find the maximum value in the Neon registers and move it to r2.
				145	vmax.s16 d24, d25
kma@webrtc.org	9fc6250	2012-11-17 00:22:46 +0000	[diff] [blame]	146	vpmax.s16 d24, d24, d24
				147	vpmax.s16 d24, d24, d24
andrew@webrtc.org	a7b57da	2012-10-22 18:19:23 +0000	[diff] [blame]	148	adds r1, #8
				149	vmov.u16 r2, d24[0]
				150	beq END_MAX_VALUE_W16
				151
				152	LOOP_MAX_VALUE_W16:
				153	ldrsh r3, [r0], #2
				154	cmp r2, r3
				155	movlt r2, r3
				156	subs r1, #1
				157	bne LOOP_MAX_VALUE_W16
				158
				159	END_MAX_VALUE_W16:
				160	mov r0, r2
				161	bx lr
				162
andrew@webrtc.org	a7b57da	2012-10-22 18:19:23 +0000	[diff] [blame]	163	@ int32_t WebRtcSpl_MaxValueW32Neon(const int32_t* vector, int length);
kma@webrtc.org	9fc6250	2012-11-17 00:22:46 +0000	[diff] [blame]	164	DEFINE_FUNCTION WebRtcSpl_MaxValueW32Neon
andrew@webrtc.org	a7b57da	2012-10-22 18:19:23 +0000	[diff] [blame]	165	mov r2, #0x80000000 @ Initialize the return value.
				166	cmp r0, #0
				167	beq END_MAX_VALUE_W32
				168	cmp r1, #0
				169	ble END_MAX_VALUE_W32
				170
				171	vmov.i32 q11, #0x80000000
				172	vmov.i32 q12, #0x80000000
				173	cmp r1, #8
				174	blt LOOP_MAX_VALUE_W32
				175
				176	sub r1, #8 @ Counter for loops
				177
				178	LOOP_UNROLLED_BY_8_MAX_VALUE_W32:
				179	vld1.32 {q13, q14}, [r0]!
				180	subs r1, #8
				181	vmax.s32 q11, q13
				182	vmax.s32 q12, q14
				183	bge LOOP_UNROLLED_BY_8_MAX_VALUE_W32
				184
				185	@ Find the maximum value in the Neon registers and move it to r2.
				186	vmax.s32 q12, q11
kma@webrtc.org	9fc6250	2012-11-17 00:22:46 +0000	[diff] [blame]	187	vpmax.s32 d24, d24, d25
				188	vpmax.s32 d24, d24, d24
andrew@webrtc.org	a7b57da	2012-10-22 18:19:23 +0000	[diff] [blame]	189	adds r1, #8
				190	vmov.s32 r2, d24[0]
				191	beq END_MAX_VALUE_W32
				192
				193	LOOP_MAX_VALUE_W32:
				194	ldr r3, [r0], #4
				195	cmp r2, r3
				196	movlt r2, r3
				197	subs r1, #1
				198	bne LOOP_MAX_VALUE_W32
				199
				200	END_MAX_VALUE_W32:
				201	mov r0, r2
				202	bx lr
				203
andrew@webrtc.org	a7b57da	2012-10-22 18:19:23 +0000	[diff] [blame]	204	@ int16_t WebRtcSpl_MinValueW16Neon(const int16_t* vector, int length);
kma@webrtc.org	9fc6250	2012-11-17 00:22:46 +0000	[diff] [blame]	205	DEFINE_FUNCTION WebRtcSpl_MinValueW16Neon
andrew@webrtc.org	a7b57da	2012-10-22 18:19:23 +0000	[diff] [blame]	206	movw r2, #0x7FFF @ Initialize the return value.
				207	cmp r0, #0
				208	beq END_MIN_VALUE_W16
				209	cmp r1, #0
				210	ble END_MIN_VALUE_W16
				211
				212	vmov.i16 q12, #0x7FFF
				213	cmp r1, #8
				214	blt LOOP_MIN_VALUE_W16
				215
				216	sub r1, #8 @ Counter for loops
				217
				218	LOOP_UNROLLED_BY_8_MIN_VALUE_W16:
				219	vld1.16 {q13}, [r0]!
				220	subs r1, #8
				221	vmin.s16 q12, q13
				222	bge LOOP_UNROLLED_BY_8_MIN_VALUE_W16
				223
				224	@ Find the maximum value in the Neon registers and move it to r2.
				225	vmin.s16 d24, d25
kma@webrtc.org	9fc6250	2012-11-17 00:22:46 +0000	[diff] [blame]	226	vpmin.s16 d24, d24, d24
				227	vpmin.s16 d24, d24, d24
andrew@webrtc.org	a7b57da	2012-10-22 18:19:23 +0000	[diff] [blame]	228	adds r1, #8
				229	vmov.s16 r2, d24[0]
				230	sxth r2, r2
				231	beq END_MIN_VALUE_W16
				232
				233	LOOP_MIN_VALUE_W16:
				234	ldrsh r3, [r0], #2
				235	cmp r2, r3
				236	movge r2, r3
				237	subs r1, #1
				238	bne LOOP_MIN_VALUE_W16
				239
				240	END_MIN_VALUE_W16:
				241	mov r0, r2
				242	bx lr
				243
andrew@webrtc.org	a7b57da	2012-10-22 18:19:23 +0000	[diff] [blame]	244	@ int32_t WebRtcSpl_MinValueW32Neon(const int32_t* vector, int length);
kma@webrtc.org	9fc6250	2012-11-17 00:22:46 +0000	[diff] [blame]	245	DEFINE_FUNCTION WebRtcSpl_MinValueW32Neon
andrew@webrtc.org	a7b57da	2012-10-22 18:19:23 +0000	[diff] [blame]	246	mov r2, #0x7FFFFFFF @ Initialize the return value.
				247	cmp r0, #0
				248	beq END_MIN_VALUE_W32
				249	cmp r1, #0
				250	ble END_MIN_VALUE_W32
				251
				252	vdup.32 q11, r2
				253	vdup.32 q12, r2
				254	cmp r1, #8
				255	blt LOOP_MIN_VALUE_W32
				256
				257	sub r1, #8 @ Counter for loops
				258
				259	LOOP_UNROLLED_BY_8_MIN_VALUE_W32:
				260	vld1.32 {q13, q14}, [r0]!
				261	subs r1, #8
				262	vmin.s32 q11, q13
				263	vmin.s32 q12, q14
				264	bge LOOP_UNROLLED_BY_8_MIN_VALUE_W32
				265
				266	@ Find the maximum value in the Neon registers and move it to r2.
				267	vmin.s32 q12, q11
kma@webrtc.org	9fc6250	2012-11-17 00:22:46 +0000	[diff] [blame]	268	vpmin.s32 d24, d24, d25
				269	vpmin.s32 d24, d24, d24
andrew@webrtc.org	a7b57da	2012-10-22 18:19:23 +0000	[diff] [blame]	270	adds r1, #8
				271	vmov.s32 r2, d24[0]
				272	beq END_MIN_VALUE_W32
				273
				274	LOOP_MIN_VALUE_W32:
				275	ldr r3, [r0], #4
				276	cmp r2, r3
				277	movge r2, r3
				278	subs r1, #1
				279	bne LOOP_MIN_VALUE_W32
				280
				281	END_MIN_VALUE_W32:
				282	mov r0, r2
				283	bx lr