dotprod_mmx.c - platform/external/fec - Gitiles

 /* 16-bit signed integer dot product
  * MMX assisted version; also for SSE
  *
  * Copyright 2004 Phil Karn
  * May be used under the terms of the GNU Lesser General Public License (LGPL)
  */
 #include <stdlib.h>
 #include "fec.h"

 struct dotprod {
   int len; /* Number of coefficients */

   /* On a MMX or SSE machine, these hold 4 copies of the coefficients,
    * preshifted by 0,1,2,3 words to meet all possible input data
    * alignments (see Intel ap559 on MMX dot products).
    */
   signed short *coeffs[4];
 };
 long dotprod_mmx_assist(signed short *a,signed short *b,int cnt);

 /* Create and return a descriptor for use with the dot product function */
 void *initdp_mmx(signed short coeffs[],int len){
   struct dotprod *dp;
   int i,j;


   if(len == 0)
     return NULL;

   dp = (struct dotprod *)calloc(1,sizeof(struct dotprod));
   dp->len = len;

   /* Make 4 copies of coefficients, one for each data alignment */
   for(i=0;i<4;i++){
     dp->coeffs[i] = (signed short *)calloc(1+(len+i-1)/4,
 					   4*sizeof(signed short));
     for(j=0;j<len;j++)
       dp->coeffs[i][j+i] = coeffs[j];
   }
   return (void *)dp;
 }


 /* Free a dot product descriptor created earlier */
 void freedp_mmx(void *p){
   struct dotprod *dp = (struct dotprod *)p;
   int i;

   for(i=0;i<4;i++)
     if(dp->coeffs[i] != NULL)
       free(dp->coeffs[i]);
   free(dp);
 }

 /* Compute a dot product given a descriptor and an input array
  * The length is taken from the descriptor
  */
 long dotprod_mmx(void *p,signed short a[]){
   struct dotprod *dp = (struct dotprod *)p;
   int al;
   signed short *ar;

   /* Round input data address down to 8 byte boundary
    * NB: depending on the alignment of a[], memory
    * before a[] will be accessed. The contents don't matter since they'll
    * be multiplied by zero coefficients. I can't conceive of any
    * situation where this could cause a segfault since memory protection
    * in the x86 machines is done on much larger boundaries
    */
   ar = (signed short *)((int)a & ~7);

   /* Choose one of 4 sets of pre-shifted coefficients. al is both the
    * index into dp->coeffs[] and the number of 0 words padded onto
    * that coefficients array for alignment purposes
    */
   al = a - ar;

   /* Call assembler routine to do the work, passing number of 4-word blocks */
   return dotprod_mmx_assist(ar,dp->coeffs[al],(dp->len+al-1)/4+1);
 }
	/* 16-bit signed integer dot product
	* MMX assisted version; also for SSE
	*
	* Copyright 2004 Phil Karn
	* May be used under the terms of the GNU Lesser General Public License (LGPL)
	*/
	#include <stdlib.h>
	#include "fec.h"

	struct dotprod {
	int len; /* Number of coefficients */

	/* On a MMX or SSE machine, these hold 4 copies of the coefficients,
	* preshifted by 0,1,2,3 words to meet all possible input data
	* alignments (see Intel ap559 on MMX dot products).
	*/
	signed short *coeffs[4];
	};
	long dotprod_mmx_assist(signed short a,signed short b,int cnt);

	/* Create and return a descriptor for use with the dot product function */
	void *initdp_mmx(signed short coeffs[],int len){
	struct dotprod *dp;
	int i,j;


	if(len == 0)
	return NULL;

	dp = (struct dotprod *)calloc(1,sizeof(struct dotprod));
	dp->len = len;

	/* Make 4 copies of coefficients, one for each data alignment */
	for(i=0;i<4;i++){
	dp->coeffs[i] = (signed short *)calloc(1+(len+i-1)/4,
	4*sizeof(signed short));
	for(j=0;j<len;j++)
	dp->coeffs[i][j+i] = coeffs[j];
	}
	return (void *)dp;
	}


	/* Free a dot product descriptor created earlier */
	void freedp_mmx(void *p){
	struct dotprod dp = (struct dotprod )p;
	int i;

	for(i=0;i<4;i++)
	if(dp->coeffs[i] != NULL)
	free(dp->coeffs[i]);
	free(dp);
	}

	/* Compute a dot product given a descriptor and an input array
	* The length is taken from the descriptor
	*/
	long dotprod_mmx(void *p,signed short a[]){
	struct dotprod dp = (struct dotprod )p;
	int al;
	signed short *ar;

	/* Round input data address down to 8 byte boundary
	* NB: depending on the alignment of a[], memory
	* before a[] will be accessed. The contents don't matter since they'll
	* be multiplied by zero coefficients. I can't conceive of any
	* situation where this could cause a segfault since memory protection
	* in the x86 machines is done on much larger boundaries
	*/
	ar = (signed short *)((int)a & ~7);

	/* Choose one of 4 sets of pre-shifted coefficients. al is both the
	* index into dp->coeffs[] and the number of 0 words padded onto
	* that coefficients array for alignment purposes
	*/
	al = a - ar;

	/* Call assembler routine to do the work, passing number of 4-word blocks */
	return dotprod_mmx_assist(ar,dp->coeffs[al],(dp->len+al-1)/4+1);
	}