Blame - common_audio/signal_processing/vector_scaling_operations_neon.S - fp2-dev/platform/external/chromium_org/third_party/webrtc

blob: 07db741b009e652a368a7af530b4c55b594fb376 [file] [log] [blame]

andrew@webrtc.org	a7b57da	2012-10-22 18:19:23 +0000	[diff] [blame]	1	@
				2	@ Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
				3	@
				4	@ Use of this source code is governed by a BSD-style license
				5	@ that can be found in the LICENSE file in the root of the source
				6	@ tree. An additional intellectual property rights grant can be found
				7	@ in the file PATENTS. All contributing project authors may
				8	@ be found in the AUTHORS file in the root of the source tree.
				9	@
				10
				11	@ vector_scaling_operations_neon.s
				12	@ This file contains the function WebRtcSpl_ScaleAndAddVectorsWithRoundNeon(),
				13	@ optimized for ARM Neon platform. Output is bit-exact with the reference
				14	@ C code in vector_scaling_operations.c.
				15
kma@webrtc.org	9fc6250	2012-11-17 00:22:46 +0000	[diff] [blame]	16	#include "webrtc/system_wrappers/interface/asm_defines.h"
andrew@webrtc.org	a7b57da	2012-10-22 18:19:23 +0000	[diff] [blame]	17
kma@webrtc.org	9fc6250	2012-11-17 00:22:46 +0000	[diff] [blame]	18	GLOBAL_FUNCTION WebRtcSpl_ScaleAndAddVectorsWithRoundNeon
andrew@webrtc.org	a7b57da	2012-10-22 18:19:23 +0000	[diff] [blame]	19	.align 2
kma@webrtc.org	9fc6250	2012-11-17 00:22:46 +0000	[diff] [blame]	20	DEFINE_FUNCTION WebRtcSpl_ScaleAndAddVectorsWithRoundNeon
andrew@webrtc.org	a7b57da	2012-10-22 18:19:23 +0000	[diff] [blame]	21	push {r4-r9}
				22
				23	ldr r4, [sp, #32] @ length
				24	ldr r5, [sp, #28] @ out_vector
				25	ldrsh r6, [sp, #24] @ right_shifts
				26
				27	cmp r4, #0
				28	ble END @ Return if length <= 0.
				29
				30	cmp r4, #8
				31	blt SET_ROUND_VALUE
				32
				33	vdup.16 d26, r1 @ in_vector1_scale
				34	vdup.16 d27, r3 @ in_vector2_scale
				35
				36	@ Neon instructions can only right shift by an immediate value. To shift right
				37	@ by a register value, we have to do a left shift left by the negative value.
				38	rsb r7, r6, #0
				39	vdup.16 q12, r7 @ -right_shifts
				40
				41	bic r7, r4, #7 @ Counter for LOOP_UNROLLED_BY_8: length / 8 * 8.
				42
				43	LOOP_UNROLLED_BY_8:
				44	vld1.16 {d28, d29}, [r0]! @ in_vector1[]
				45	vld1.16 {d30, d31}, [r2]! @ in_vector2[]
				46	vmull.s16 q0, d28, d26
				47	vmull.s16 q1, d29, d26
				48	vmull.s16 q2, d30, d27
				49	vmull.s16 q3, d31, d27
				50	vadd.s32 q0, q2
				51	vadd.s32 q1, q3
				52	vrshl.s32 q0, q12 @ Round shift right by right_shifts.
				53	vrshl.s32 q1, q12
				54	vmovn.i32 d0, q0 @ Cast to 16 bit values.
				55	vmovn.i32 d1, q1
				56	subs r7, #8
				57	vst1.16 {d0, d1}, [r5]!
				58	bgt LOOP_UNROLLED_BY_8
				59
				60	ands r4, #0xFF @ Counter for LOOP_NO_UNROLLING: length % 8.
				61	beq END
				62
				63	SET_ROUND_VALUE:
				64	mov r9, #1
				65	lsl r9, r6
				66	lsr r9, #1
				67
				68	LOOP_NO_UNROLLING:
				69	ldrh r7, [r0], #2
				70	ldrh r8, [r2], #2
				71	smulbb r7, r7, r1
				72	smulbb r8, r8, r3
				73	subs r4, #1
				74	add r7, r9
				75	add r7, r8
				76	asr r7, r6
				77	strh r7, [r5], #2
				78	bne LOOP_NO_UNROLLING
				79
				80	END:
				81	pop {r4-r9}
				82	bx lr