Blame - dotprod_mmx.c - platform/external/fec

blob: c516afe8fbb5e8f22643305f2d0b7bd32c0499bc [file] [log] [blame]

Bill Yi	4e213d5	2015-06-23 13:53:11 -0700	[diff] [blame]	1	/* 16-bit signed integer dot product
				2	* MMX assisted version; also for SSE
				3	*
				4	* Copyright 2004 Phil Karn
				5	* May be used under the terms of the GNU Lesser General Public License (LGPL)
				6	*/
				7	#include <stdlib.h>
				8	#include "fec.h"
				9
				10	struct dotprod {
				11	int len; /* Number of coefficients */
				12
				13	/* On a MMX or SSE machine, these hold 4 copies of the coefficients,
				14	* preshifted by 0,1,2,3 words to meet all possible input data
				15	* alignments (see Intel ap559 on MMX dot products).
				16	*/
				17	signed short *coeffs[4];
				18	};
				19	long dotprod_mmx_assist(signed short a,signed short b,int cnt);
				20
				21	/* Create and return a descriptor for use with the dot product function */
				22	void *initdp_mmx(signed short coeffs[],int len){
				23	struct dotprod *dp;
				24	int i,j;
				25
				26
				27	if(len == 0)
				28	return NULL;
				29
				30	dp = (struct dotprod *)calloc(1,sizeof(struct dotprod));
				31	dp->len = len;
				32
				33	/* Make 4 copies of coefficients, one for each data alignment */
				34	for(i=0;i<4;i++){
				35	dp->coeffs[i] = (signed short *)calloc(1+(len+i-1)/4,
				36	4*sizeof(signed short));
				37	for(j=0;j<len;j++)
				38	dp->coeffs[i][j+i] = coeffs[j];
				39	}
				40	return (void *)dp;
				41	}
				42
				43
				44	/* Free a dot product descriptor created earlier */
				45	void freedp_mmx(void *p){
				46	struct dotprod dp = (struct dotprod )p;
				47	int i;
				48
				49	for(i=0;i<4;i++)
				50	if(dp->coeffs[i] != NULL)
				51	free(dp->coeffs[i]);
				52	free(dp);
				53	}
				54
				55	/* Compute a dot product given a descriptor and an input array
				56	* The length is taken from the descriptor
				57	*/
				58	long dotprod_mmx(void *p,signed short a[]){
				59	struct dotprod dp = (struct dotprod )p;
				60	int al;
				61	signed short *ar;
				62
				63	/* Round input data address down to 8 byte boundary
				64	* NB: depending on the alignment of a[], memory
				65	* before a[] will be accessed. The contents don't matter since they'll
				66	* be multiplied by zero coefficients. I can't conceive of any
				67	* situation where this could cause a segfault since memory protection
				68	* in the x86 machines is done on much larger boundaries
				69	*/
				70	ar = (signed short *)((int)a & ~7);
				71
				72	/* Choose one of 4 sets of pre-shifted coefficients. al is both the
				73	* index into dp->coeffs[] and the number of 0 words padded onto
				74	* that coefficients array for alignment purposes
				75	*/
				76	al = a - ar;
				77
				78	/* Call assembler routine to do the work, passing number of 4-word blocks */
				79	return dotprod_mmx_assist(ar,dp->coeffs[al],(dp->len+al-1)/4+1);
				80	}
				81