blob: 4bb25b324bcb9735fdcdb4e973d9c14668573f02 [file] [log] [blame]
Anders Carlssonb4b6fa02008-03-03 19:29:06 +00001/*===---- mmintrin.h - MMX intrinsics --------------------------------------===
2 *
3 * Copyright (c) 2008 Anders Carlsson
4 *
5 * Permission is hereby granted, free of charge, to any person obtaining a copy
6 * of this software and associated documentation files (the "Software"), to deal
7 * in the Software without restriction, including without limitation the rights
8 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 * copies of the Software, and to permit persons to whom the Software is
10 * furnished to do so, subject to the following conditions:
11 *
12 * The above copyright notice and this permission notice shall be included in
13 * all copies or substantial portions of the Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
21 * THE SOFTWARE.
22 *
23 *===-----------------------------------------------------------------------===
24 */
25
26#ifndef __MMINTRIN_H
27#define __MMINTRIN_H
28
29#ifndef __MMX__
30#error "MMX instruction set not enabled"
31#else
32
33typedef long long __m64 __attribute__((vector_size(8)));
34
35typedef int __v2si __attribute__((vector_size(8)));
36typedef short __v4hi __attribute__((vector_size(8)));
37typedef char __v8qi __attribute__((vector_size(8)));
38
39inline void _mm_empty()
40{
41 __builtin_ia32_emms();
42}
43
44inline __m64 _mm_cvtsi32_si64(int i)
45{
46 return (__m64)(__v2si){i, 0};
47}
48
49inline int _mm_cvtsi64_si32(__m64 m)
50{
51 return ((__v2si)m)[0];
52}
53
54inline __m64 _mm_cvtsi64_m64(long long i)
55{
56 return (__m64)i;
57}
58
59inline long long _mm_cvtm64_si64(__m64 m)
60{
61 return (long long)m;
62}
63
64inline __m64 _mm_packs_pi16(__m64 m1, __m64 m2)
65{
66 return (__m64)__builtin_ia32_packsswb((__v4hi)m1, (__v4hi)m2);
67}
68
69inline __m64 _mm_packs_pi32(__m64 m1, __m64 m2)
70{
71 return (__m64)__builtin_ia32_packssdw((__v2si)m1, (__v2si)m2);
72}
73
74inline __m64 _mm_packs_pu16(__m64 m1, __m64 m2)
75{
76 return (__m64)__builtin_ia32_packuswb((__v4hi)m1, (__v4hi)m2);
77}
78
79inline __m64 _mm_unpackhi_pi8(__m64 m1, __m64 m2)
80{
81 // FIXME: use __builtin_shuffle_vector
82}
83
84inline __m64 _mm_unpackhi_pi16(__m64 m1, __m64 m2)
85{
86 // FIXME: use __builtin_shuffle_vector
87}
88
89inline __m64 _mm_unpackhi_pi32(__m64 m1, __m64 m2)
90{
91 // FIXME: use __builtin_shuffle_vector
92}
93
94inline __m64 _mm_unpacklo_pi8(__m64 m1, __m64 m2)
95{
96 // FIXME: use __builtin_shuffle_vector
97}
98
99inline __m64 _mm_unpacklo_pi16(__m64 m1, __m64 m2)
100{
101 // FIXME: use __builtin_shuffle_vector
102}
103
104inline __m64 _mm_unpacklo_pi32(__m64 m1, __m64 m2)
105{
106 // FIXME: use __builtin_shuffle_vector
107}
108
109inline __m64 _mm_add_pi8(__m64 m1, __m64 m2)
110{
111 return (__m64)((__v8qi)m1 + (__v8qi)m2);
112}
113
114inline __m64 _mm_add_pi16(__m64 m1, __m64 m2)
115{
116 return (__m64)((__v4hi)m1 + (__v4hi)m2);
117}
118
119inline __m64 _mm_add_pi32(__m64 m1, __m64 m2)
120{
121 return (__m64)((__v2si)m1 + (__v2si)m2);
122}
123
124inline __m64 _mm_adds_pi8(__m64 m1, __m64 m2)
125{
126 return (__m64)__builtin_ia32_paddsb((__v8qi)m1, (__v8qi)m2);
127}
128
129inline __m64 _mm_adds_pi16(__m64 m1, __m64 m2)
130{
131 return (__m64)__builtin_ia32_paddsw((__v4hi)m1, (__v4hi)m2);
132}
133
134inline __m64 _mm_adds_pu8(__m64 m1, __m64 m2)
135{
136 return (__m64)__builtin_ia32_paddusb((__v8qi)m1, (__v8qi)m2);
137}
138
139inline __m64 _mm_adds_pu16(__m64 m1, __m64 m2)
140{
141 return (__m64)__builtin_ia32_paddusw((__v4hi)m1, (__v4hi)m2);
142}
143
144inline __m64 _mm_sub_pi8(__m64 m1, __m64 m2)
145{
146 return (__m64)((__v8qi)m1 - (__v8qi)m2);
147}
148
149inline __m64 _mm_sub_pi16(__m64 m1, __m64 m2)
150{
151 return (__m64)((__v4hi)m1 - (__v4hi)m2);
152}
153
154inline __m64 _mm_sub_pi32(__m64 m1, __m64 m2)
155{
156 return (__m64)((__v2si)m1 - (__v2si)m2);
157}
158
159inline __m64 _mm_subs_pi8(__m64 m1, __m64 m2)
160{
161 return (__m64)__builtin_ia32_psubsb((__v8qi)m1, (__v8qi)m2);
162}
163
164inline __m64 _mm_subs_pi16(__m64 m1, __m64 m2)
165{
166 return (__m64)__builtin_ia32_psubsw((__v4hi)m1, (__v4hi)m2);
167}
168
169inline __m64 _mm_subs_pu8(__m64 m1, __m64 m2)
170{
171 return (__m64)__builtin_ia32_psubusb((__v8qi)m1, (__v8qi)m2);
172}
173
174inline __m64 _mm_subs_pu16(__m64 m1, __m64 m2)
175{
176 return (__m64)__builtin_ia32_psubusw((__v4hi)m1, (__v4hi)m2);
177}
178
179inline __m64 _mm_madd_pi16(__m64 m1, __m64 m2)
180{
181 return (__m64)__builtin_ia32_pmaddwd((__v4hi)m1, (__v4hi)m2);
182}
183
184inline __m64 _mm_mulhi_pi16(__m64 m1, __m64 m2)
185{
186 return (__m64)__builtin_ia32_pmulhw((__v4hi)m1, (__v4hi)m2);
187}
188
189inline __m64 _mm_mullo_pi16(__m64 m1, __m64 m2)
190{
191 return (__m64)((__v4hi)m1 * (__v4hi)m2);
192}
193
194inline __m64 _mm_sll_pi16(__m64 m, __m64 count)
195{
196 return (__m64)__builtin_ia32_psllw((__v4hi)m, count);
197}
198
199inline __m64 _mm_slli_pi16(__m64 m, int count)
200{
201 return (__m64)__builtin_ia32_psllwi((__v4hi)m, count);
202}
203
204inline __m64 _mm_sll_pi32(__m64 m, __m64 count)
205{
206 return (__m64)__builtin_ia32_pslld((__v2si)m, count);
207}
208
209inline __m64 _mm_slli_pi32(__m64 m, int count)
210{
211 return (__m64)__builtin_ia32_pslldi((__v2si)m, count);
212}
213
214inline __m64 _mm_sll_pi64(__m64 m, __m64 count)
215{
216 return __builtin_ia32_psllq(m, count);
217}
218
219inline __m64 _mm_slli_pi64(__m64 m, int count)
220{
221 return __builtin_ia32_psllqi(m, count);
222}
223
224inline __m64 _mm_sra_pi16(__m64 m, __m64 count)
225{
226 return (__m64)__builtin_ia32_psraw((__v4hi)m, count);
227}
228
229inline __m64 _mm_srai_pi16(__m64 m, int count)
230{
231 return (__m64)__builtin_ia32_psrawi((__v4hi)m, count);
232}
233
234inline __m64 _mm_sra_pi32(__m64 m, __m64 count)
235{
236 return (__m64)__builtin_ia32_psrad((__v2si)m, count);
237}
238
239inline __m64 _mm_srai_pi32(__m64 m, int count)
240{
241 return (__m64)__builtin_ia32_psradi((__v2si)m, count);
242}
243
244inline __m64 _mm_srl_pi16(__m64 m, __m64 count)
245{
246 return (__m64)__builtin_ia32_psrlw((__v4hi)m, count);
247}
248
249inline __m64 _mm_srli_pi16(__m64 m, int count)
250{
251 return (__m64)__builtin_ia32_psrlwi((__v4hi)m, count);
252}
253
254inline __m64 _mm_srl_pi32(__m64 m, __m64 count)
255{
256 return (__m64)__builtin_ia32_psrld((__v2si)m, count);
257}
258
259inline __m64 _mm_srli_pi32(__m64 m, int count)
260{
261 return (__m64)__builtin_ia32_psrldi((__v2si)m, count);
262}
263
264inline __m64 _mm_srl_pi64(__m64 m, __m64 count)
265{
266 return (__m64)__builtin_ia32_psrlq(m, count);
267}
268
269inline __m64 _mm_srli_pi64(__m64 m, int count)
270{
271 return __builtin_ia32_psrlqi(m, count);
272}
273
274inline __m64 _mm_and_si64(__m64 m1, __m64 m2)
275{
276 return m1 & m2;
277}
278
279inline __m64 _mm_andnot_si64(__m64 m1, __m64 m2)
280{
281 return ~m1 & m2;
282}
283
284inline __m64 _mm_or_si64(__m64 m1, __m64 m2)
285{
286 return m1 | m2;
287}
288
289inline __m64 _mm_xor_si64(__m64 m1, __m64 m2)
290{
291 return m1 ^ m2;
292}
293
294inline __m64 _mm_cmpeq_pi8(__m64 m1, __m64 m2)
295{
296 return (__m64)__builtin_ia32_pcmpeqb((__v8qi)m1, (__v8qi)m2);
297}
298
299inline __m64 _mm_cmpeq_pi16(__m64 m1, __m64 m2)
300{
301 return (__m64)__builtin_ia32_pcmpeqw((__v4hi)m1, (__v4hi)m2);
302}
303
304inline __m64 _mm_cmpeq_pi32(__m64 m1, __m64 m2)
305{
306 return (__m64)__builtin_ia32_pcmpeqd((__v2si)m1, (__v2si)m2);
307}
308
309inline __m64 _mm_cmpgt_pi8(__m64 m1, __m64 m2)
310{
311 return (__m64)__builtin_ia32_pcmpgtb((__v8qi)m1, (__v8qi)m2);
312}
313
314inline __m64 _mm_cmpgt_pi16(__m64 m1, __m64 m2)
315{
316 return (__m64)__builtin_ia32_pcmpgtw((__v4hi)m1, (__v4hi)m2);
317}
318
319inline __m64 _mm_cmpgt_pi32(__m64 m1, __m64 m2)
320{
321 return (__m64)__builtin_ia32_pcmpgtd((__v2si)m1, (__v2si)m2);
322}
323
324inline __m64 _mm_setzero_si64()
325{
326 return (__m64){ 0LL };
327}
328
329inline __m64 _mm_set_pi32(int i1, int i0)
330{
331 return (__m64)(__v2si){ i0, i1 };
332}
333
334inline __m64 _mm_set_pi16(short s3, short s2, short s1, short s0)
335{
336 return (__m64)(__v4hi){ s0, s1, s2, s3 };
337}
338
339inline __m64 _mm_set_pi8(char b7, char b6, char b5, char b4, char b3, char b2, char b1, char b0)
340{
341 return (__m64)(__v8qi){ b0, b1, b2, b3, b4, b5, b6, b7 };
342}
343
344inline __m64 _mm_set1_pi32(int i)
345{
346 return (__m64)(__v2si){ i, i };
347}
348
349inline __m64 _mm_set1_pi16(short s)
350{
351 return (__m64)(__v4hi){ s };
352}
353
354inline __m64 _mm_set1_pi8(char b)
355{
356 return (__m64)(__v8qi){ b };
357}
358
359inline __m64 _mm_setr_pi32(int i1, int i0)
360{
361 return (__m64)(__v2si){ i1, i0 };
362}
363
364inline __m64 _mm_setr_pi16(short s3, short s2, short s1, short s0)
365{
366 return (__m64)(__v4hi){ s3, s2, s1, s0 };
367}
368
369inline __m64 _mm_setr_pi8(char b7, char b6, char b5, char b4, char b3, char b2, char b1, char b0)
370{
371 return (__m64)(__v8qi){ b7, b6, b5, b4, b3, b2, b1, b0 };
372}
373
374#endif /* __MMX__ */
375
376#endif /* __MMINTRIN_H */
377