blob: e766831bdfd582a456507bcc97b740967d19c850 [file] [log] [blame]
Bill Yi4e213d52015-06-23 13:53:11 -07001/* Compute the sum of the squares of a vector of signed shorts
2
3 * MMX-assisted version (also used on SSE)
4
5 * The SSE2 and MMX assist routines both operate on multiples of
6 * 8 words; they differ only in their alignment requirements (8 bytes
7 * for MMX, 16 bytes for SSE2)
8
9 * Copyright 2004 Phil Karn, KA9Q
10 * May be used under the terms of the GNU Lesser Public License (LGPL)
11 */
12
13long long sumsq_mmx_assist(signed short *,int);
14
15long long sumsq_mmx(signed short *in,int cnt){
16 long long sum = 0;
17
18 /* Handle stuff before the next 8-byte boundary */
19 while(((int)in & 7) != 0 && cnt != 0){
20 sum += (long)in[0] * in[0];
21 in++;
22 cnt--;
23 }
24 sum += sumsq_mmx_assist(in,cnt);
25 in += cnt & ~7;
26 cnt &= 7;
27
28 /* Handle up to 7 words at end */
29 while(cnt != 0){
30 sum += (long)in[0] * in[0];
31 in++;
32 cnt--;
33 }
34 return sum;
35}