Bill Yi | 4e213d5 | 2015-06-23 13:53:11 -0700 | [diff] [blame] | 1 | /* Compute the sum of the squares of a vector of signed shorts |
| 2 | |
| 3 | * The SSE2 and MMX assist routines both operate on multiples of |
| 4 | * 8 words; they differ only in their alignment requirements (8 bytes |
| 5 | * for MMX, 16 bytes for SSE2) |
| 6 | |
| 7 | * Copyright 2004 Phil Karn, KA9Q |
| 8 | * May be used under the terms of the GNU Lesser Public License (LGPL) |
| 9 | */ |
| 10 | |
| 11 | long long sumsq_sse2_assist(signed short *,int); |
| 12 | |
| 13 | long long sumsq_sse2(signed short *in,int cnt){ |
| 14 | long long sum = 0; |
| 15 | |
| 16 | /* Handle stuff before the next 8-byte boundary */ |
| 17 | while(((int)in & 15) != 0 && cnt != 0){ |
| 18 | sum += (long)in[0] * in[0]; |
| 19 | in++; |
| 20 | cnt--; |
| 21 | } |
| 22 | sum += sumsq_sse2_assist(in,cnt); |
| 23 | in += cnt & ~7; |
| 24 | cnt &= 7; |
| 25 | |
| 26 | /* Handle up to 7 trailing words */ |
| 27 | while(cnt != 0){ |
| 28 | sum += (long)in[0] * in[0]; |
| 29 | in++; |
| 30 | cnt--; |
| 31 | } |
| 32 | return sum; |
| 33 | } |