Blame - common_audio/signal_processing/filter_ar_fast_q12_armv7.S - fp2-dev/platform/external/chromium_org/third_party/webrtc

blob: ff60cc6198214c49df6e61fd883bde0be50188fa [file] [log] [blame]

andrew@webrtc.org	a7b57da	2012-10-22 18:19:23 +0000	[diff] [blame]	1	@
				2	@ Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
				3	@
				4	@ Use of this source code is governed by a BSD-style license
				5	@ that can be found in the LICENSE file in the root of the source
				6	@ tree. An additional intellectual property rights grant can be found
				7	@ in the file PATENTS. All contributing project authors may
				8	@ be found in the AUTHORS file in the root of the source tree.
				9	@
				10
				11	@ This file contains the function WebRtcSpl_FilterARFastQ12(), optimized for
				12	@ ARMv7 platform. The description header can be found in
				13	@ signal_processing_library.h
				14	@
				15	@ Output is bit-exact with the generic C code as in filter_ar_fast_q12.c, and
				16	@ the reference C code at end of this file.
				17
				18	@ Assumptions:
				19	@ (1) data_length > 0
				20	@ (2) coefficients_length > 1
				21
				22	@ Register usage:
				23	@
				24	@ r0: &data_in[i]
				25	@ r1: &data_out[i], for result ouput
				26	@ r2: &coefficients[0]
				27	@ r3: coefficients_length
				28	@ r4: Iteration counter for the outer loop.
				29	@ r5: data_out[j] as multiplication inputs
				30	@ r6: Calculated value for output data_out[]; interation counter for inner loop
				31	@ r7: Partial sum of a filtering multiplication results
				32	@ r8: Partial sum of a filtering multiplication results
				33	@ r9: &data_out[], for filtering input; data_in[i]
				34	@ r10: coefficients[j]
				35	@ r11: Scratch
				36	@ r12: &coefficients[j]
				37
kma@webrtc.org	9fc6250	2012-11-17 00:22:46 +0000	[diff] [blame]	38	#include "webrtc/system_wrappers/interface/asm_defines.h"
andrew@webrtc.org	a7b57da	2012-10-22 18:19:23 +0000	[diff] [blame]	39
kma@webrtc.org	9fc6250	2012-11-17 00:22:46 +0000	[diff] [blame]	40	GLOBAL_FUNCTION WebRtcSpl_FilterARFastQ12
andrew@webrtc.org	a7b57da	2012-10-22 18:19:23 +0000	[diff] [blame]	41	.align 2
kma@webrtc.org	9fc6250	2012-11-17 00:22:46 +0000	[diff] [blame]	42	DEFINE_FUNCTION WebRtcSpl_FilterARFastQ12
andrew@webrtc.org	a7b57da	2012-10-22 18:19:23 +0000	[diff] [blame]	43	push {r4-r11}
				44
				45	ldrsh r12, [sp, #32] @ data_length
				46	subs r4, r12, #1
				47	beq ODD_LENGTH @ jump if data_length == 1
				48
				49	LOOP_LENGTH:
				50	add r12, r2, r3, lsl #1
				51	sub r12, #4 @ &coefficients[coefficients_length - 2]
				52	sub r9, r1, r3, lsl #1
				53	add r9, #2 @ &data_out[i - coefficients_length + 1]
				54	ldr r5, [r9], #4 @ data_out[i - coefficients_length + {1,2}]
				55
				56	mov r7, #0 @ sum1
				57	mov r8, #0 @ sum2
				58	subs r6, r3, #3 @ Iteration counter for inner loop.
				59	beq ODD_A_LENGTH @ branch if coefficients_length == 3
				60	blt POST_LOOP_A_LENGTH @ branch if coefficients_length == 2
				61
				62	LOOP_A_LENGTH:
				63	ldr r10, [r12], #-4 @ coefficients[j - 1], coefficients[j]
				64	subs r6, #2
				65	smlatt r8, r10, r5, r8 @ sum2 += coefficients[j] * data_out[i - j + 1];
				66	smlatb r7, r10, r5, r7 @ sum1 += coefficients[j] * data_out[i - j];
				67	smlabt r7, r10, r5, r7 @ coefficients[j - 1] * data_out[i - j + 1];
				68	ldr r5, [r9], #4 @ data_out[i - j + 2], data_out[i - j + 3]
				69	smlabb r8, r10, r5, r8 @ coefficients[j - 1] * data_out[i - j + 2];
				70	bgt LOOP_A_LENGTH
				71	blt POST_LOOP_A_LENGTH
				72
				73	ODD_A_LENGTH:
				74	ldrsh r10, [r12, #2] @ Filter coefficients coefficients[2]
				75	sub r12, #2 @ &coefficients[0]
				76	smlabb r7, r10, r5, r7 @ sum1 += coefficients[2] * data_out[i - 2];
				77	smlabt r8, r10, r5, r8 @ sum2 += coefficients[2] * data_out[i - 1];
				78	ldr r5, [r9, #-2] @ data_out[i - 1], data_out[i]
				79
				80	POST_LOOP_A_LENGTH:
				81	ldr r10, [r12] @ coefficients[0], coefficients[1]
				82	smlatb r7, r10, r5, r7 @ sum1 += coefficients[1] * data_out[i - 1];
				83
				84	ldr r9, [r0], #4 @ data_in[i], data_in[i + 1]
				85	smulbb r6, r10, r9 @ output1 = coefficients[0] * data_in[i];
				86	sub r6, r7 @ output1 -= sum1;
				87
				88	sbfx r11, r6, #12, #16
				89	ssat r7, #16, r6, asr #12
				90	cmp r7, r11
				91	addeq r6, r6, #2048
				92	ssat r6, #16, r6, asr #12
				93	strh r6, [r1], #2 @ Store data_out[i]
				94
				95	smlatb r8, r10, r6, r8 @ sum2 += coefficients[1] * data_out[i];
				96	smulbt r6, r10, r9 @ output2 = coefficients[0] * data_in[i + 1];
				97	sub r6, r8 @ output1 -= sum1;
				98
				99	sbfx r11, r6, #12, #16
				100	ssat r7, #16, r6, asr #12
				101	cmp r7, r11
				102	addeq r6, r6, #2048
				103	ssat r6, #16, r6, asr #12
				104	strh r6, [r1], #2 @ Store data_out[i + 1]
				105
				106	subs r4, #2
				107	bgt LOOP_LENGTH
				108	blt END @ For even data_length, it's done. Jump to END.
				109
				110	@ Process i = data_length -1, for the case of an odd length.
				111	ODD_LENGTH:
				112	add r12, r2, r3, lsl #1
				113	sub r12, #4 @ &coefficients[coefficients_length - 2]
				114	sub r9, r1, r3, lsl #1
				115	add r9, #2 @ &data_out[i - coefficients_length + 1]
				116	mov r7, #0 @ sum1
				117	mov r8, #0 @ sum1
				118	subs r6, r3, #2 @ inner loop counter
				119	beq EVEN_A_LENGTH @ branch if coefficients_length == 2
				120
				121	LOOP2_A_LENGTH:
				122	ldr r10, [r12], #-4 @ coefficients[j - 1], coefficients[j]
				123	ldr r5, [r9], #4 @ data_out[i - j], data_out[i - j + 1]
				124	subs r6, #2
				125	smlatb r7, r10, r5, r7 @ sum1 += coefficients[j] * data_out[i - j];
				126	smlabt r8, r10, r5, r8 @ coefficients[j - 1] * data_out[i - j + 1];
				127	bgt LOOP2_A_LENGTH
				128	addlt r12, #2
				129	blt POST_LOOP2_A_LENGTH
				130
				131	EVEN_A_LENGTH:
				132	ldrsh r10, [r12, #2] @ Filter coefficients coefficients[1]
				133	ldrsh r5, [r9] @ data_out[i - 1]
				134	smlabb r7, r10, r5, r7 @ sum1 += coefficients[1] * data_out[i - 1];
				135
				136	POST_LOOP2_A_LENGTH:
				137	ldrsh r10, [r12] @ Filter coefficients coefficients[0]
				138	ldrsh r9, [r0] @ data_in[i]
				139	smulbb r6, r10, r9 @ output1 = coefficients[0] * data_in[i];
				140	sub r6, r7 @ output1 -= sum1;
				141	sub r6, r8 @ output1 -= sum1;
				142	sbfx r8, r6, #12, #16
				143	ssat r7, #16, r6, asr #12
				144	cmp r7, r8
				145	addeq r6, r6, #2048
				146	ssat r6, #16, r6, asr #12
				147	strh r6, [r1] @ Store the data_out[i]
				148
				149	END:
				150	pop {r4-r11}
				151	bx lr
				152
andrew@webrtc.org	a7b57da	2012-10-22 18:19:23 +0000	[diff] [blame]	153	@Reference C code:
				154	@
				155	@void WebRtcSpl_FilterARFastQ12(int16_t* data_in,
				156	@ int16_t* data_out,
				157	@ int16_t* __restrict coefficients,
				158	@ int coefficients_length,
				159	@ int data_length) {
				160	@ int i = 0;
				161	@ int j = 0;
				162	@
				163	@ for (i = 0; i < data_length - 1; i += 2) {
				164	@ int32_t output1 = 0;
				165	@ int32_t sum1 = 0;
				166	@ int32_t output2 = 0;
				167	@ int32_t sum2 = 0;
				168	@
				169	@ for (j = coefficients_length - 1; j > 2; j -= 2) {
				170	@ sum1 += coefficients[j] * data_out[i - j];
				171	@ sum1 += coefficients[j - 1] * data_out[i - j + 1];
				172	@ sum2 += coefficients[j] * data_out[i - j + 1];
				173	@ sum2 += coefficients[j - 1] * data_out[i - j + 2];
				174	@ }
				175	@
				176	@ if (j == 2) {
				177	@ sum1 += coefficients[2] * data_out[i - 2];
				178	@ sum2 += coefficients[2] * data_out[i - 1];
				179	@ }
				180	@
				181	@ sum1 += coefficients[1] * data_out[i - 1];
				182	@ output1 = coefficients[0] * data_in[i];
				183	@ output1 -= sum1;
				184	@ // Saturate and store the output.
				185	@ output1 = WEBRTC_SPL_SAT(134215679, output1, -134217728);
				186	@ data_out[i] = (int16_t)((output1 + 2048) >> 12);
				187	@
				188	@ sum2 += coefficients[1] * data_out[i];
				189	@ output2 = coefficients[0] * data_in[i + 1];
				190	@ output2 -= sum2;
				191	@ // Saturate and store the output.
				192	@ output2 = WEBRTC_SPL_SAT(134215679, output2, -134217728);
				193	@ data_out[i + 1] = (int16_t)((output2 + 2048) >> 12);
				194	@ }
				195	@
				196	@ if (i == data_length - 1) {
				197	@ int32_t output1 = 0;
				198	@ int32_t sum1 = 0;
				199	@
				200	@ for (j = coefficients_length - 1; j > 1; j -= 2) {
				201	@ sum1 += coefficients[j] * data_out[i - j];
				202	@ sum1 += coefficients[j - 1] * data_out[i - j + 1];
				203	@ }
				204	@
				205	@ if (j == 1) {
				206	@ sum1 += coefficients[1] * data_out[i - 1];
				207	@ }
				208	@
				209	@ output1 = coefficients[0] * data_in[i];
				210	@ output1 -= sum1;
				211	@ // Saturate and store the output.
				212	@ output1 = WEBRTC_SPL_SAT(134215679, output1, -134217728);
				213	@ data_out[i] = (int16_t)((output1 + 2048) >> 12);
				214	@ }
				215	@}