Blame - sumsq_mmx_assist.s - platform/external/fec

blob: b3bac6633b18a305871510edb87b2a7b76629c53 [file] [log] [blame]

Bill Yi	4e213d5	2015-06-23 13:53:11 -0700	[diff] [blame]	1	# MMX assist routines for sumsq
				2	# Copyright 2001 Phil Karn, KA9Q
				3	# May be used under the terms of the GNU Public License (GPL)
				4
				5	.text
				6
				7	# Evaluate sum of squares of signed 16-bit input samples
				8	# long long sumsq_mmx_assist(signed short *in,int cnt);
				9	.global sumsq_mmx_assist
				10	.type sumsq_mmx_assist,@function
				11	.align 16
				12	sumsq_mmx_assist:
				13	pushl %ebp
				14	movl %esp,%ebp
				15	pushl %esi
				16	pushl %ecx
				17	pushl %ebx
				18
				19	movl 8(%ebp),%esi
				20	movl 12(%ebp),%ecx
				21	xor %eax,%eax
				22	xor %edx,%edx
				23
				24	# Since 4 * 327672 < 232, we can accumulate two at a time
				25	1: subl $8,%ecx
				26	jl 2f
				27	movq (%esi),%mm0 # S0 S1 S2 S3
				28	pmaddwd %mm0,%mm0 # (S0^2+S1^2) (S2^2+S3^2)
				29	movq 8(%esi),%mm6 # S4 S5 S6 S7
				30	pmaddwd %mm6,%mm6 # (S4^2+S5^2) (S6^2+S7^2)
				31	paddd %mm6,%mm0 # (S0^2+S1^2+S4^2+S5^2)(S2^2+S3^2+S6^2+S7^2)
				32	movd %mm0,%ebx
				33	addl %ebx,%eax
				34	adcl $0,%edx
				35	psrlq $32,%mm0
				36	movd %mm0,%ebx
				37	addl %ebx,%eax
				38	adcl $0,%edx
				39	addl $16,%esi
				40	jmp 1b
				41
				42	2: emms
				43	popl %ebx
				44	popl %ecx
				45	popl %esi
				46	popl %ebp
				47	ret
				48
				49	# Evaluate sum of squares of signed 16-bit input samples
				50	# long sumsq_wd_mmx_assist(signed short *in,int cnt);
				51	# Quick version, only safe for small numbers of small input values...
				52	.global sumsq_wd_mmx_assist
				53	.type sumsq_wd_mmx_assist,@function
				54	.align 16
				55	sumsq_wd_mmx_assist:
				56	pushl %ebp
				57	movl %esp,%ebp
				58	pushl %esi
				59
				60	movl 8(%ebp),%esi
				61	movl 12(%ebp),%ecx
				62	pxor %mm2,%mm2 # zero sum
				63
				64	1: subl $8,%ecx
				65	jl 2f
				66	movq (%esi),%mm0 # S0 S1 S2 S3
				67	pmaddwd %mm0,%mm0 # (S0S0+S1S1) (S2S2+S3S3)
				68	movq 8(%esi),%mm1
				69	pmaddwd %mm1,%mm1
				70	paddd %mm1,%mm2
				71	paddd %mm0,%mm2 # accumulate
				72
				73	addl $16,%esi
				74	jmp 1b
				75
				76	2: movd %mm2,%eax # even sum
				77	psrlq $32,%mm2
				78	movd %mm2,%edx # odd sum
				79	addl %edx,%eax
				80	emms
				81	popl %esi
				82	popl %ebp
				83	ret