Bill Yi | 4e213d5 | 2015-06-23 13:53:11 -0700 | [diff] [blame] | 1 | /* Compute the sum of the squares of a vector of signed shorts |
| 2 | |
| 3 | * MMX-assisted version (also used on SSE) |
| 4 | |
| 5 | * The SSE2 and MMX assist routines both operate on multiples of |
| 6 | * 8 words; they differ only in their alignment requirements (8 bytes |
| 7 | * for MMX, 16 bytes for SSE2) |
| 8 | |
| 9 | * Copyright 2004 Phil Karn, KA9Q |
| 10 | * May be used under the terms of the GNU Lesser Public License (LGPL) |
| 11 | */ |
| 12 | |
| 13 | long long sumsq_mmx_assist(signed short *,int); |
| 14 | |
| 15 | long long sumsq_mmx(signed short *in,int cnt){ |
| 16 | long long sum = 0; |
| 17 | |
| 18 | /* Handle stuff before the next 8-byte boundary */ |
| 19 | while(((int)in & 7) != 0 && cnt != 0){ |
| 20 | sum += (long)in[0] * in[0]; |
| 21 | in++; |
| 22 | cnt--; |
| 23 | } |
| 24 | sum += sumsq_mmx_assist(in,cnt); |
| 25 | in += cnt & ~7; |
| 26 | cnt &= 7; |
| 27 | |
| 28 | /* Handle up to 7 words at end */ |
| 29 | while(cnt != 0){ |
| 30 | sum += (long)in[0] * in[0]; |
| 31 | in++; |
| 32 | cnt--; |
| 33 | } |
| 34 | return sum; |
| 35 | } |