blob: 70cded027ecd3be5af3317d739afae15b42f3f54 [file] [log] [blame]
Anders Carlsson37958ee2008-03-03 19:29:06 +00001/*===---- mmintrin.h - MMX intrinsics --------------------------------------===
2 *
3 * Copyright (c) 2008 Anders Carlsson
4 *
5 * Permission is hereby granted, free of charge, to any person obtaining a copy
6 * of this software and associated documentation files (the "Software"), to deal
7 * in the Software without restriction, including without limitation the rights
8 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 * copies of the Software, and to permit persons to whom the Software is
10 * furnished to do so, subject to the following conditions:
11 *
12 * The above copyright notice and this permission notice shall be included in
13 * all copies or substantial portions of the Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
21 * THE SOFTWARE.
22 *
23 *===-----------------------------------------------------------------------===
24 */
25
26#ifndef __MMINTRIN_H
27#define __MMINTRIN_H
28
29#ifndef __MMX__
30#error "MMX instruction set not enabled"
31#else
32
33typedef long long __m64 __attribute__((vector_size(8)));
34
35typedef int __v2si __attribute__((vector_size(8)));
36typedef short __v4hi __attribute__((vector_size(8)));
37typedef char __v8qi __attribute__((vector_size(8)));
38
Anders Carlsson365db3b2008-03-04 21:11:29 +000039inline void __attribute__((__always_inline__)) _mm_empty()
Anders Carlsson37958ee2008-03-03 19:29:06 +000040{
41 __builtin_ia32_emms();
42}
43
Anders Carlsson365db3b2008-03-04 21:11:29 +000044inline __m64 __attribute__((__always_inline__)) _mm_cvtsi32_si64(int i)
Anders Carlsson37958ee2008-03-03 19:29:06 +000045{
46 return (__m64)(__v2si){i, 0};
47}
48
Anders Carlsson365db3b2008-03-04 21:11:29 +000049inline int __attribute__((__always_inline__)) _mm_cvtsi64_si32(__m64 m)
Anders Carlsson37958ee2008-03-03 19:29:06 +000050{
51 return ((__v2si)m)[0];
52}
53
Anders Carlsson365db3b2008-03-04 21:11:29 +000054inline __m64 __attribute__((__always_inline__)) _mm_cvtsi64_m64(long long i)
Anders Carlsson37958ee2008-03-03 19:29:06 +000055{
56 return (__m64)i;
57}
58
Anders Carlsson365db3b2008-03-04 21:11:29 +000059inline long long __attribute__((__always_inline__)) _mm_cvtm64_si64(__m64 m)
Anders Carlsson37958ee2008-03-03 19:29:06 +000060{
61 return (long long)m;
62}
63
Anders Carlsson365db3b2008-03-04 21:11:29 +000064inline __m64 __attribute__((__always_inline__)) _mm_packs_pi16(__m64 m1, __m64 m2)
Anders Carlsson37958ee2008-03-03 19:29:06 +000065{
66 return (__m64)__builtin_ia32_packsswb((__v4hi)m1, (__v4hi)m2);
67}
68
Anders Carlsson365db3b2008-03-04 21:11:29 +000069inline __m64 __attribute__((__always_inline__)) _mm_packs_pi32(__m64 m1, __m64 m2)
Anders Carlsson37958ee2008-03-03 19:29:06 +000070{
71 return (__m64)__builtin_ia32_packssdw((__v2si)m1, (__v2si)m2);
72}
73
Anders Carlsson365db3b2008-03-04 21:11:29 +000074inline __m64 __attribute__((__always_inline__)) _mm_packs_pu16(__m64 m1, __m64 m2)
Anders Carlsson37958ee2008-03-03 19:29:06 +000075{
76 return (__m64)__builtin_ia32_packuswb((__v4hi)m1, (__v4hi)m2);
77}
78
Anders Carlsson365db3b2008-03-04 21:11:29 +000079inline __m64 __attribute__((__always_inline__)) _mm_unpackhi_pi8(__m64 m1, __m64 m2)
Anders Carlsson37958ee2008-03-03 19:29:06 +000080{
81 // FIXME: use __builtin_shuffle_vector
82}
83
Anders Carlsson365db3b2008-03-04 21:11:29 +000084inline __m64 __attribute__((__always_inline__)) _mm_unpackhi_pi16(__m64 m1, __m64 m2)
Anders Carlsson37958ee2008-03-03 19:29:06 +000085{
86 // FIXME: use __builtin_shuffle_vector
87}
88
Anders Carlsson365db3b2008-03-04 21:11:29 +000089inline __m64 __attribute__((__always_inline__)) _mm_unpackhi_pi32(__m64 m1, __m64 m2)
Anders Carlsson37958ee2008-03-03 19:29:06 +000090{
91 // FIXME: use __builtin_shuffle_vector
92}
93
Anders Carlsson365db3b2008-03-04 21:11:29 +000094inline __m64 __attribute__((__always_inline__)) _mm_unpacklo_pi8(__m64 m1, __m64 m2)
Anders Carlsson37958ee2008-03-03 19:29:06 +000095{
96 // FIXME: use __builtin_shuffle_vector
97}
98
Anders Carlsson365db3b2008-03-04 21:11:29 +000099inline __m64 __attribute__((__always_inline__)) _mm_unpacklo_pi16(__m64 m1, __m64 m2)
Anders Carlsson37958ee2008-03-03 19:29:06 +0000100{
101 // FIXME: use __builtin_shuffle_vector
102}
103
Anders Carlsson365db3b2008-03-04 21:11:29 +0000104inline __m64 __attribute__((__always_inline__)) _mm_unpacklo_pi32(__m64 m1, __m64 m2)
Anders Carlsson37958ee2008-03-03 19:29:06 +0000105{
106 // FIXME: use __builtin_shuffle_vector
107}
108
Anders Carlsson365db3b2008-03-04 21:11:29 +0000109inline __m64 __attribute__((__always_inline__)) _mm_add_pi8(__m64 m1, __m64 m2)
Anders Carlsson37958ee2008-03-03 19:29:06 +0000110{
111 return (__m64)((__v8qi)m1 + (__v8qi)m2);
112}
113
Anders Carlsson365db3b2008-03-04 21:11:29 +0000114inline __m64 __attribute__((__always_inline__)) _mm_add_pi16(__m64 m1, __m64 m2)
Anders Carlsson37958ee2008-03-03 19:29:06 +0000115{
116 return (__m64)((__v4hi)m1 + (__v4hi)m2);
117}
118
Anders Carlsson365db3b2008-03-04 21:11:29 +0000119inline __m64 __attribute__((__always_inline__)) _mm_add_pi32(__m64 m1, __m64 m2)
Anders Carlsson37958ee2008-03-03 19:29:06 +0000120{
121 return (__m64)((__v2si)m1 + (__v2si)m2);
122}
123
Anders Carlsson365db3b2008-03-04 21:11:29 +0000124inline __m64 __attribute__((__always_inline__)) _mm_adds_pi8(__m64 m1, __m64 m2)
Anders Carlsson37958ee2008-03-03 19:29:06 +0000125{
126 return (__m64)__builtin_ia32_paddsb((__v8qi)m1, (__v8qi)m2);
127}
128
Anders Carlsson365db3b2008-03-04 21:11:29 +0000129inline __m64 __attribute__((__always_inline__)) _mm_adds_pi16(__m64 m1, __m64 m2)
Anders Carlsson37958ee2008-03-03 19:29:06 +0000130{
131 return (__m64)__builtin_ia32_paddsw((__v4hi)m1, (__v4hi)m2);
132}
133
Anders Carlsson365db3b2008-03-04 21:11:29 +0000134inline __m64 __attribute__((__always_inline__)) _mm_adds_pu8(__m64 m1, __m64 m2)
Anders Carlsson37958ee2008-03-03 19:29:06 +0000135{
136 return (__m64)__builtin_ia32_paddusb((__v8qi)m1, (__v8qi)m2);
137}
138
Anders Carlsson365db3b2008-03-04 21:11:29 +0000139inline __m64 __attribute__((__always_inline__)) _mm_adds_pu16(__m64 m1, __m64 m2)
Anders Carlsson37958ee2008-03-03 19:29:06 +0000140{
141 return (__m64)__builtin_ia32_paddusw((__v4hi)m1, (__v4hi)m2);
142}
143
Anders Carlsson365db3b2008-03-04 21:11:29 +0000144inline __m64 __attribute__((__always_inline__)) _mm_sub_pi8(__m64 m1, __m64 m2)
Anders Carlsson37958ee2008-03-03 19:29:06 +0000145{
146 return (__m64)((__v8qi)m1 - (__v8qi)m2);
147}
148
Anders Carlsson365db3b2008-03-04 21:11:29 +0000149inline __m64 __attribute__((__always_inline__)) _mm_sub_pi16(__m64 m1, __m64 m2)
Anders Carlsson37958ee2008-03-03 19:29:06 +0000150{
151 return (__m64)((__v4hi)m1 - (__v4hi)m2);
152}
153
Anders Carlsson365db3b2008-03-04 21:11:29 +0000154inline __m64 __attribute__((__always_inline__)) _mm_sub_pi32(__m64 m1, __m64 m2)
Anders Carlsson37958ee2008-03-03 19:29:06 +0000155{
156 return (__m64)((__v2si)m1 - (__v2si)m2);
157}
158
Anders Carlsson365db3b2008-03-04 21:11:29 +0000159inline __m64 __attribute__((__always_inline__)) _mm_subs_pi8(__m64 m1, __m64 m2)
Anders Carlsson37958ee2008-03-03 19:29:06 +0000160{
161 return (__m64)__builtin_ia32_psubsb((__v8qi)m1, (__v8qi)m2);
162}
163
Anders Carlsson365db3b2008-03-04 21:11:29 +0000164inline __m64 __attribute__((__always_inline__)) _mm_subs_pi16(__m64 m1, __m64 m2)
Anders Carlsson37958ee2008-03-03 19:29:06 +0000165{
166 return (__m64)__builtin_ia32_psubsw((__v4hi)m1, (__v4hi)m2);
167}
168
Anders Carlsson365db3b2008-03-04 21:11:29 +0000169inline __m64 __attribute__((__always_inline__)) _mm_subs_pu8(__m64 m1, __m64 m2)
Anders Carlsson37958ee2008-03-03 19:29:06 +0000170{
171 return (__m64)__builtin_ia32_psubusb((__v8qi)m1, (__v8qi)m2);
172}
173
Anders Carlsson365db3b2008-03-04 21:11:29 +0000174inline __m64 __attribute__((__always_inline__)) _mm_subs_pu16(__m64 m1, __m64 m2)
Anders Carlsson37958ee2008-03-03 19:29:06 +0000175{
176 return (__m64)__builtin_ia32_psubusw((__v4hi)m1, (__v4hi)m2);
177}
178
Anders Carlsson365db3b2008-03-04 21:11:29 +0000179inline __m64 __attribute__((__always_inline__)) _mm_madd_pi16(__m64 m1, __m64 m2)
Anders Carlsson37958ee2008-03-03 19:29:06 +0000180{
181 return (__m64)__builtin_ia32_pmaddwd((__v4hi)m1, (__v4hi)m2);
182}
183
Anders Carlsson365db3b2008-03-04 21:11:29 +0000184inline __m64 __attribute__((__always_inline__)) _mm_mulhi_pi16(__m64 m1, __m64 m2)
Anders Carlsson37958ee2008-03-03 19:29:06 +0000185{
186 return (__m64)__builtin_ia32_pmulhw((__v4hi)m1, (__v4hi)m2);
187}
188
Anders Carlsson365db3b2008-03-04 21:11:29 +0000189inline __m64 __attribute__((__always_inline__)) _mm_mullo_pi16(__m64 m1, __m64 m2)
Anders Carlsson37958ee2008-03-03 19:29:06 +0000190{
191 return (__m64)((__v4hi)m1 * (__v4hi)m2);
192}
193
Anders Carlsson365db3b2008-03-04 21:11:29 +0000194inline __m64 __attribute__((__always_inline__)) _mm_sll_pi16(__m64 m, __m64 count)
Anders Carlsson37958ee2008-03-03 19:29:06 +0000195{
196 return (__m64)__builtin_ia32_psllw((__v4hi)m, count);
197}
198
Anders Carlsson365db3b2008-03-04 21:11:29 +0000199inline __m64 __attribute__((__always_inline__)) _mm_slli_pi16(__m64 m, int count)
Anders Carlsson37958ee2008-03-03 19:29:06 +0000200{
201 return (__m64)__builtin_ia32_psllwi((__v4hi)m, count);
202}
203
Anders Carlsson365db3b2008-03-04 21:11:29 +0000204inline __m64 __attribute__((__always_inline__)) _mm_sll_pi32(__m64 m, __m64 count)
Anders Carlsson37958ee2008-03-03 19:29:06 +0000205{
206 return (__m64)__builtin_ia32_pslld((__v2si)m, count);
207}
208
Anders Carlsson365db3b2008-03-04 21:11:29 +0000209inline __m64 __attribute__((__always_inline__)) _mm_slli_pi32(__m64 m, int count)
Anders Carlsson37958ee2008-03-03 19:29:06 +0000210{
211 return (__m64)__builtin_ia32_pslldi((__v2si)m, count);
212}
213
Anders Carlsson365db3b2008-03-04 21:11:29 +0000214inline __m64 __attribute__((__always_inline__)) _mm_sll_pi64(__m64 m, __m64 count)
Anders Carlsson37958ee2008-03-03 19:29:06 +0000215{
216 return __builtin_ia32_psllq(m, count);
217}
218
Anders Carlsson365db3b2008-03-04 21:11:29 +0000219inline __m64 __attribute__((__always_inline__)) _mm_slli_pi64(__m64 m, int count)
Anders Carlsson37958ee2008-03-03 19:29:06 +0000220{
221 return __builtin_ia32_psllqi(m, count);
222}
223
Anders Carlsson365db3b2008-03-04 21:11:29 +0000224inline __m64 __attribute__((__always_inline__)) _mm_sra_pi16(__m64 m, __m64 count)
Anders Carlsson37958ee2008-03-03 19:29:06 +0000225{
226 return (__m64)__builtin_ia32_psraw((__v4hi)m, count);
227}
228
Anders Carlsson365db3b2008-03-04 21:11:29 +0000229inline __m64 __attribute__((__always_inline__)) _mm_srai_pi16(__m64 m, int count)
Anders Carlsson37958ee2008-03-03 19:29:06 +0000230{
231 return (__m64)__builtin_ia32_psrawi((__v4hi)m, count);
232}
233
Anders Carlsson365db3b2008-03-04 21:11:29 +0000234inline __m64 __attribute__((__always_inline__)) _mm_sra_pi32(__m64 m, __m64 count)
Anders Carlsson37958ee2008-03-03 19:29:06 +0000235{
236 return (__m64)__builtin_ia32_psrad((__v2si)m, count);
237}
238
Anders Carlsson365db3b2008-03-04 21:11:29 +0000239inline __m64 __attribute__((__always_inline__)) _mm_srai_pi32(__m64 m, int count)
Anders Carlsson37958ee2008-03-03 19:29:06 +0000240{
241 return (__m64)__builtin_ia32_psradi((__v2si)m, count);
242}
243
Anders Carlsson365db3b2008-03-04 21:11:29 +0000244inline __m64 __attribute__((__always_inline__)) _mm_srl_pi16(__m64 m, __m64 count)
Anders Carlsson37958ee2008-03-03 19:29:06 +0000245{
246 return (__m64)__builtin_ia32_psrlw((__v4hi)m, count);
247}
248
Anders Carlsson365db3b2008-03-04 21:11:29 +0000249inline __m64 __attribute__((__always_inline__)) _mm_srli_pi16(__m64 m, int count)
Anders Carlsson37958ee2008-03-03 19:29:06 +0000250{
251 return (__m64)__builtin_ia32_psrlwi((__v4hi)m, count);
252}
253
Anders Carlsson365db3b2008-03-04 21:11:29 +0000254inline __m64 __attribute__((__always_inline__)) _mm_srl_pi32(__m64 m, __m64 count)
Anders Carlsson37958ee2008-03-03 19:29:06 +0000255{
256 return (__m64)__builtin_ia32_psrld((__v2si)m, count);
257}
258
Anders Carlsson365db3b2008-03-04 21:11:29 +0000259inline __m64 __attribute__((__always_inline__)) _mm_srli_pi32(__m64 m, int count)
Anders Carlsson37958ee2008-03-03 19:29:06 +0000260{
261 return (__m64)__builtin_ia32_psrldi((__v2si)m, count);
262}
263
Anders Carlsson365db3b2008-03-04 21:11:29 +0000264inline __m64 __attribute__((__always_inline__)) _mm_srl_pi64(__m64 m, __m64 count)
Anders Carlsson37958ee2008-03-03 19:29:06 +0000265{
266 return (__m64)__builtin_ia32_psrlq(m, count);
267}
268
Anders Carlsson365db3b2008-03-04 21:11:29 +0000269inline __m64 __attribute__((__always_inline__)) _mm_srli_pi64(__m64 m, int count)
Anders Carlsson37958ee2008-03-03 19:29:06 +0000270{
271 return __builtin_ia32_psrlqi(m, count);
272}
273
Anders Carlsson365db3b2008-03-04 21:11:29 +0000274inline __m64 __attribute__((__always_inline__)) _mm_and_si64(__m64 m1, __m64 m2)
Anders Carlsson37958ee2008-03-03 19:29:06 +0000275{
276 return m1 & m2;
277}
278
Anders Carlsson365db3b2008-03-04 21:11:29 +0000279inline __m64 __attribute__((__always_inline__)) _mm_andnot_si64(__m64 m1, __m64 m2)
Anders Carlsson37958ee2008-03-03 19:29:06 +0000280{
281 return ~m1 & m2;
282}
283
Anders Carlsson365db3b2008-03-04 21:11:29 +0000284inline __m64 __attribute__((__always_inline__)) _mm_or_si64(__m64 m1, __m64 m2)
Anders Carlsson37958ee2008-03-03 19:29:06 +0000285{
286 return m1 | m2;
287}
288
Anders Carlsson365db3b2008-03-04 21:11:29 +0000289inline __m64 __attribute__((__always_inline__)) _mm_xor_si64(__m64 m1, __m64 m2)
Anders Carlsson37958ee2008-03-03 19:29:06 +0000290{
291 return m1 ^ m2;
292}
293
Anders Carlsson365db3b2008-03-04 21:11:29 +0000294inline __m64 __attribute__((__always_inline__)) _mm_cmpeq_pi8(__m64 m1, __m64 m2)
Anders Carlsson37958ee2008-03-03 19:29:06 +0000295{
296 return (__m64)__builtin_ia32_pcmpeqb((__v8qi)m1, (__v8qi)m2);
297}
298
Anders Carlsson365db3b2008-03-04 21:11:29 +0000299inline __m64 __attribute__((__always_inline__)) _mm_cmpeq_pi16(__m64 m1, __m64 m2)
Anders Carlsson37958ee2008-03-03 19:29:06 +0000300{
301 return (__m64)__builtin_ia32_pcmpeqw((__v4hi)m1, (__v4hi)m2);
302}
303
Anders Carlsson365db3b2008-03-04 21:11:29 +0000304inline __m64 __attribute__((__always_inline__)) _mm_cmpeq_pi32(__m64 m1, __m64 m2)
Anders Carlsson37958ee2008-03-03 19:29:06 +0000305{
306 return (__m64)__builtin_ia32_pcmpeqd((__v2si)m1, (__v2si)m2);
307}
308
Anders Carlsson365db3b2008-03-04 21:11:29 +0000309inline __m64 __attribute__((__always_inline__)) _mm_cmpgt_pi8(__m64 m1, __m64 m2)
Anders Carlsson37958ee2008-03-03 19:29:06 +0000310{
311 return (__m64)__builtin_ia32_pcmpgtb((__v8qi)m1, (__v8qi)m2);
312}
313
Anders Carlsson365db3b2008-03-04 21:11:29 +0000314inline __m64 __attribute__((__always_inline__)) _mm_cmpgt_pi16(__m64 m1, __m64 m2)
Anders Carlsson37958ee2008-03-03 19:29:06 +0000315{
316 return (__m64)__builtin_ia32_pcmpgtw((__v4hi)m1, (__v4hi)m2);
317}
318
Anders Carlsson365db3b2008-03-04 21:11:29 +0000319inline __m64 __attribute__((__always_inline__)) _mm_cmpgt_pi32(__m64 m1, __m64 m2)
Anders Carlsson37958ee2008-03-03 19:29:06 +0000320{
321 return (__m64)__builtin_ia32_pcmpgtd((__v2si)m1, (__v2si)m2);
322}
323
Anders Carlsson365db3b2008-03-04 21:11:29 +0000324inline __m64 __attribute__((__always_inline__)) _mm_setzero_si64()
Anders Carlsson37958ee2008-03-03 19:29:06 +0000325{
326 return (__m64){ 0LL };
327}
328
Anders Carlsson365db3b2008-03-04 21:11:29 +0000329inline __m64 __attribute__((__always_inline__)) _mm_set_pi32(int i1, int i0)
Anders Carlsson37958ee2008-03-03 19:29:06 +0000330{
331 return (__m64)(__v2si){ i0, i1 };
332}
333
Anders Carlsson365db3b2008-03-04 21:11:29 +0000334inline __m64 __attribute__((__always_inline__)) _mm_set_pi16(short s3, short s2, short s1, short s0)
Anders Carlsson37958ee2008-03-03 19:29:06 +0000335{
336 return (__m64)(__v4hi){ s0, s1, s2, s3 };
337}
338
Anders Carlsson365db3b2008-03-04 21:11:29 +0000339inline __m64 __attribute__((__always_inline__)) _mm_set_pi8(char b7, char b6, char b5, char b4, char b3, char b2, char b1, char b0)
Anders Carlsson37958ee2008-03-03 19:29:06 +0000340{
341 return (__m64)(__v8qi){ b0, b1, b2, b3, b4, b5, b6, b7 };
342}
343
Anders Carlsson365db3b2008-03-04 21:11:29 +0000344inline __m64 __attribute__((__always_inline__)) _mm_set1_pi32(int i)
Anders Carlsson37958ee2008-03-03 19:29:06 +0000345{
346 return (__m64)(__v2si){ i, i };
347}
348
Anders Carlsson365db3b2008-03-04 21:11:29 +0000349inline __m64 __attribute__((__always_inline__)) _mm_set1_pi16(short s)
Anders Carlsson37958ee2008-03-03 19:29:06 +0000350{
351 return (__m64)(__v4hi){ s };
352}
353
Anders Carlsson365db3b2008-03-04 21:11:29 +0000354inline __m64 __attribute__((__always_inline__)) _mm_set1_pi8(char b)
Anders Carlsson37958ee2008-03-03 19:29:06 +0000355{
356 return (__m64)(__v8qi){ b };
357}
358
Anders Carlsson365db3b2008-03-04 21:11:29 +0000359inline __m64 __attribute__((__always_inline__)) _mm_setr_pi32(int i1, int i0)
Anders Carlsson37958ee2008-03-03 19:29:06 +0000360{
361 return (__m64)(__v2si){ i1, i0 };
362}
363
Anders Carlsson365db3b2008-03-04 21:11:29 +0000364inline __m64 __attribute__((__always_inline__)) _mm_setr_pi16(short s3, short s2, short s1, short s0)
Anders Carlsson37958ee2008-03-03 19:29:06 +0000365{
366 return (__m64)(__v4hi){ s3, s2, s1, s0 };
367}
368
Anders Carlsson365db3b2008-03-04 21:11:29 +0000369inline __m64 __attribute__((__always_inline__)) _mm_setr_pi8(char b7, char b6, char b5, char b4, char b3, char b2, char b1, char b0)
Anders Carlsson37958ee2008-03-03 19:29:06 +0000370{
371 return (__m64)(__v8qi){ b7, b6, b5, b4, b3, b2, b1, b0 };
372}
373
374#endif /* __MMX__ */
375
376#endif /* __MMINTRIN_H */
377