blob: b05d2e9b1d6778c5560fc2b69f7a1de5d354e6d1 [file] [log] [blame]
Bill Yi4e213d52015-06-23 13:53:11 -07001/* Compute the sum of the squares of a vector of signed shorts
2
3 * The SSE2 and MMX assist routines both operate on multiples of
4 * 8 words; they differ only in their alignment requirements (8 bytes
5 * for MMX, 16 bytes for SSE2)
6
7 * Copyright 2004 Phil Karn, KA9Q
8 * May be used under the terms of the GNU Lesser Public License (LGPL)
9 */
10
11long long sumsq_sse2_assist(signed short *,int);
12
13long long sumsq_sse2(signed short *in,int cnt){
14 long long sum = 0;
15
16 /* Handle stuff before the next 8-byte boundary */
17 while(((int)in & 15) != 0 && cnt != 0){
18 sum += (long)in[0] * in[0];
19 in++;
20 cnt--;
21 }
22 sum += sumsq_sse2_assist(in,cnt);
23 in += cnt & ~7;
24 cnt &= 7;
25
26 /* Handle up to 7 trailing words */
27 while(cnt != 0){
28 sum += (long)in[0] * in[0];
29 in++;
30 cnt--;
31 }
32 return sum;
33}