blob: 9f80fe2ce2aabc38a33605486d084e142543e481 [file] [log] [blame]
Simon Pilgrim5a0d7282016-05-18 18:00:43 +00001; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -fast-isel -mtriple=i386-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefix=ALL --check-prefix=X32
3; RUN: llc < %s -fast-isel -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefix=ALL --check-prefix=X64
4
5; NOTE: This should use IR equivalent to what is generated by clang/test/CodeGen/sse2-builtins.c
6
7define <2 x i64> @test_mm_add_epi8(<2 x i64> %a0, <2 x i64> %a1) nounwind {
8; X32-LABEL: test_mm_add_epi8:
9; X32: # BB#0:
10; X32-NEXT: paddb %xmm1, %xmm0
11; X32-NEXT: retl
12;
13; X64-LABEL: test_mm_add_epi8:
14; X64: # BB#0:
15; X64-NEXT: paddb %xmm1, %xmm0
16; X64-NEXT: retq
17 %arg0 = bitcast <2 x i64> %a0 to <16 x i8>
18 %arg1 = bitcast <2 x i64> %a1 to <16 x i8>
19 %res = add <16 x i8> %arg0, %arg1
20 %bc = bitcast <16 x i8> %res to <2 x i64>
21 ret <2 x i64> %bc
22}
23
24define <2 x i64> @test_mm_add_epi16(<2 x i64> %a0, <2 x i64> %a1) nounwind {
25; X32-LABEL: test_mm_add_epi16:
26; X32: # BB#0:
27; X32-NEXT: paddw %xmm1, %xmm0
28; X32-NEXT: retl
29;
30; X64-LABEL: test_mm_add_epi16:
31; X64: # BB#0:
32; X64-NEXT: paddw %xmm1, %xmm0
33; X64-NEXT: retq
34 %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
35 %arg1 = bitcast <2 x i64> %a1 to <8 x i16>
36 %res = add <8 x i16> %arg0, %arg1
37 %bc = bitcast <8 x i16> %res to <2 x i64>
38 ret <2 x i64> %bc
39}
40
41define <2 x i64> @test_mm_add_epi32(<2 x i64> %a0, <2 x i64> %a1) nounwind {
42; X32-LABEL: test_mm_add_epi32:
43; X32: # BB#0:
44; X32-NEXT: paddd %xmm1, %xmm0
45; X32-NEXT: retl
46;
47; X64-LABEL: test_mm_add_epi32:
48; X64: # BB#0:
49; X64-NEXT: paddd %xmm1, %xmm0
50; X64-NEXT: retq
51 %arg0 = bitcast <2 x i64> %a0 to <4 x i32>
52 %arg1 = bitcast <2 x i64> %a1 to <4 x i32>
53 %res = add <4 x i32> %arg0, %arg1
54 %bc = bitcast <4 x i32> %res to <2 x i64>
55 ret <2 x i64> %bc
56}
57
58define <2 x i64> @test_mm_add_epi64(<2 x i64> %a0, <2 x i64> %a1) nounwind {
59; X32-LABEL: test_mm_add_epi64:
60; X32: # BB#0:
61; X32-NEXT: paddq %xmm1, %xmm0
62; X32-NEXT: retl
63;
64; X64-LABEL: test_mm_add_epi64:
65; X64: # BB#0:
66; X64-NEXT: paddq %xmm1, %xmm0
67; X64-NEXT: retq
68 %res = add <2 x i64> %a0, %a1
69 ret <2 x i64> %res
70}
71
72define <2 x double> @test_mm_add_pd(<2 x double> %a0, <2 x double> %a1) nounwind {
73; X32-LABEL: test_mm_add_pd:
74; X32: # BB#0:
75; X32-NEXT: addpd %xmm1, %xmm0
76; X32-NEXT: retl
77;
78; X64-LABEL: test_mm_add_pd:
79; X64: # BB#0:
80; X64-NEXT: addpd %xmm1, %xmm0
81; X64-NEXT: retq
82 %res = fadd <2 x double> %a0, %a1
83 ret <2 x double> %res
84}
85
86define <2 x double> @test_mm_add_sd(<2 x double> %a0, <2 x double> %a1) nounwind {
87; X32-LABEL: test_mm_add_sd:
88; X32: # BB#0:
89; X32-NEXT: addsd %xmm1, %xmm0
90; X32-NEXT: retl
91;
92; X64-LABEL: test_mm_add_sd:
93; X64: # BB#0:
94; X64-NEXT: addsd %xmm1, %xmm0
95; X64-NEXT: retq
96 %ext0 = extractelement <2 x double> %a0, i32 0
97 %ext1 = extractelement <2 x double> %a1, i32 0
98 %fadd = fadd double %ext0, %ext1
99 %res = insertelement <2 x double> %a0, double %fadd, i32 0
100 ret <2 x double> %res
101}
102
103define <2 x i64> @test_mm_adds_epi8(<2 x i64> %a0, <2 x i64> %a1) nounwind {
104; X32-LABEL: test_mm_adds_epi8:
105; X32: # BB#0:
106; X32-NEXT: paddsb %xmm1, %xmm0
107; X32-NEXT: retl
108;
109; X64-LABEL: test_mm_adds_epi8:
110; X64: # BB#0:
111; X64-NEXT: paddsb %xmm1, %xmm0
112; X64-NEXT: retq
113 %arg0 = bitcast <2 x i64> %a0 to <16 x i8>
114 %arg1 = bitcast <2 x i64> %a1 to <16 x i8>
115 %res = call <16 x i8> @llvm.x86.sse2.padds.b(<16 x i8> %arg0, <16 x i8> %arg1)
116 %bc = bitcast <16 x i8> %res to <2 x i64>
117 ret <2 x i64> %bc
118}
119declare <16 x i8> @llvm.x86.sse2.padds.b(<16 x i8>, <16 x i8>) nounwind readnone
120
121define <2 x i64> @test_mm_adds_epi16(<2 x i64> %a0, <2 x i64> %a1) nounwind {
122; X32-LABEL: test_mm_adds_epi16:
123; X32: # BB#0:
124; X32-NEXT: paddsw %xmm1, %xmm0
125; X32-NEXT: retl
126;
127; X64-LABEL: test_mm_adds_epi16:
128; X64: # BB#0:
129; X64-NEXT: paddsw %xmm1, %xmm0
130; X64-NEXT: retq
131 %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
132 %arg1 = bitcast <2 x i64> %a1 to <8 x i16>
133 %res = call <8 x i16> @llvm.x86.sse2.padds.w(<8 x i16> %arg0, <8 x i16> %arg1)
134 %bc = bitcast <8 x i16> %res to <2 x i64>
135 ret <2 x i64> %bc
136}
137declare <8 x i16> @llvm.x86.sse2.padds.w(<8 x i16>, <8 x i16>) nounwind readnone
138
139define <2 x i64> @test_mm_adds_epu8(<2 x i64> %a0, <2 x i64> %a1) nounwind {
140; X32-LABEL: test_mm_adds_epu8:
141; X32: # BB#0:
142; X32-NEXT: paddusb %xmm1, %xmm0
143; X32-NEXT: retl
144;
145; X64-LABEL: test_mm_adds_epu8:
146; X64: # BB#0:
147; X64-NEXT: paddusb %xmm1, %xmm0
148; X64-NEXT: retq
149 %arg0 = bitcast <2 x i64> %a0 to <16 x i8>
150 %arg1 = bitcast <2 x i64> %a1 to <16 x i8>
151 %res = call <16 x i8> @llvm.x86.sse2.paddus.b(<16 x i8> %arg0, <16 x i8> %arg1)
152 %bc = bitcast <16 x i8> %res to <2 x i64>
153 ret <2 x i64> %bc
154}
155declare <16 x i8> @llvm.x86.sse2.paddus.b(<16 x i8>, <16 x i8>) nounwind readnone
156
157define <2 x i64> @test_mm_adds_epu16(<2 x i64> %a0, <2 x i64> %a1) nounwind {
158; X32-LABEL: test_mm_adds_epu16:
159; X32: # BB#0:
160; X32-NEXT: paddusw %xmm1, %xmm0
161; X32-NEXT: retl
162;
163; X64-LABEL: test_mm_adds_epu16:
164; X64: # BB#0:
165; X64-NEXT: paddusw %xmm1, %xmm0
166; X64-NEXT: retq
167 %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
168 %arg1 = bitcast <2 x i64> %a1 to <8 x i16>
169 %res = call <8 x i16> @llvm.x86.sse2.paddus.w(<8 x i16> %arg0, <8 x i16> %arg1)
170 %bc = bitcast <8 x i16> %res to <2 x i64>
171 ret <2 x i64> %bc
172}
173declare <8 x i16> @llvm.x86.sse2.paddus.w(<8 x i16>, <8 x i16>) nounwind readnone
174
175define <2 x double> @test_mm_and_pd(<2 x double> %a0, <2 x double> %a1) nounwind {
176; X32-LABEL: test_mm_and_pd:
177; X32: # BB#0:
178; X32-NEXT: andps %xmm1, %xmm0
179; X32-NEXT: retl
180;
181; X64-LABEL: test_mm_and_pd:
182; X64: # BB#0:
183; X64-NEXT: andps %xmm1, %xmm0
184; X64-NEXT: retq
185 %arg0 = bitcast <2 x double> %a0 to <4 x i32>
186 %arg1 = bitcast <2 x double> %a1 to <4 x i32>
187 %res = and <4 x i32> %arg0, %arg1
188 %bc = bitcast <4 x i32> %res to <2 x double>
189 ret <2 x double> %bc
190}
191
192define <2 x i64> @test_mm_and_si128(<2 x i64> %a0, <2 x i64> %a1) nounwind {
193; X32-LABEL: test_mm_and_si128:
194; X32: # BB#0:
195; X32-NEXT: andps %xmm1, %xmm0
196; X32-NEXT: retl
197;
198; X64-LABEL: test_mm_and_si128:
199; X64: # BB#0:
200; X64-NEXT: andps %xmm1, %xmm0
201; X64-NEXT: retq
202 %res = and <2 x i64> %a0, %a1
203 ret <2 x i64> %res
204}
205
206define <2 x double> @test_mm_andnot_pd(<2 x double> %a0, <2 x double> %a1) nounwind {
207; X32-LABEL: test_mm_andnot_pd:
208; X32: # BB#0:
209; X32-NEXT: andnps %xmm1, %xmm0
210; X32-NEXT: retl
211;
212; X64-LABEL: test_mm_andnot_pd:
213; X64: # BB#0:
214; X64-NEXT: andnps %xmm1, %xmm0
215; X64-NEXT: retq
216 %arg0 = bitcast <2 x double> %a0 to <4 x i32>
217 %arg1 = bitcast <2 x double> %a1 to <4 x i32>
218 %not = xor <4 x i32> %arg0, <i32 -1, i32 -1, i32 -1, i32 -1>
219 %res = and <4 x i32> %not, %arg1
220 %bc = bitcast <4 x i32> %res to <2 x double>
221 ret <2 x double> %bc
222}
223
224define <2 x i64> @test_mm_andnot_si128(<2 x i64> %a0, <2 x i64> %a1) nounwind {
225; X32-LABEL: test_mm_andnot_si128:
226; X32: # BB#0:
227; X32-NEXT: pcmpeqd %xmm2, %xmm2
228; X32-NEXT: pxor %xmm2, %xmm0
229; X32-NEXT: pand %xmm1, %xmm0
230; X32-NEXT: retl
231;
232; X64-LABEL: test_mm_andnot_si128:
233; X64: # BB#0:
234; X64-NEXT: pcmpeqd %xmm2, %xmm2
235; X64-NEXT: pxor %xmm2, %xmm0
236; X64-NEXT: pand %xmm1, %xmm0
237; X64-NEXT: retq
238 %not = xor <2 x i64> %a0, <i64 -1, i64 -1>
239 %res = and <2 x i64> %not, %a1
240 ret <2 x i64> %res
241}
242
243define <2 x i64> @test_mm_avg_epu8(<2 x i64> %a0, <2 x i64> %a1) nounwind {
244; X32-LABEL: test_mm_avg_epu8:
245; X32: # BB#0:
246; X32-NEXT: pavgb %xmm1, %xmm0
247; X32-NEXT: retl
248;
249; X64-LABEL: test_mm_avg_epu8:
250; X64: # BB#0:
251; X64-NEXT: pavgb %xmm1, %xmm0
252; X64-NEXT: retq
253 %arg0 = bitcast <2 x i64> %a0 to <16 x i8>
254 %arg1 = bitcast <2 x i64> %a1 to <16 x i8>
255 %res = call <16 x i8> @llvm.x86.sse2.pavg.b(<16 x i8> %arg0, <16 x i8> %arg1)
256 %bc = bitcast <16 x i8> %res to <2 x i64>
257 ret <2 x i64> %bc
258}
259declare <16 x i8> @llvm.x86.sse2.pavg.b(<16 x i8> %arg0, <16 x i8> %arg1) nounwind readnone
260
261define <2 x i64> @test_mm_avg_epu16(<2 x i64> %a0, <2 x i64> %a1) nounwind {
262; X32-LABEL: test_mm_avg_epu16:
263; X32: # BB#0:
264; X32-NEXT: pavgw %xmm1, %xmm0
265; X32-NEXT: retl
266;
267; X64-LABEL: test_mm_avg_epu16:
268; X64: # BB#0:
269; X64-NEXT: pavgw %xmm1, %xmm0
270; X64-NEXT: retq
271 %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
272 %arg1 = bitcast <2 x i64> %a1 to <8 x i16>
273 %res = call <8 x i16> @llvm.x86.sse2.pavg.w(<8 x i16> %arg0, <8 x i16> %arg1)
274 %bc = bitcast <8 x i16> %res to <2 x i64>
275 ret <2 x i64> %bc
276}
277declare <8 x i16> @llvm.x86.sse2.pavg.w(<8 x i16>, <8 x i16>) nounwind readnone
278
279define <2 x i64> @test_mm_bslli_si128(<2 x i64> %a0) nounwind {
280; X32-LABEL: test_mm_bslli_si128:
281; X32: # BB#0:
282; X32-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10]
283; X32-NEXT: retl
284;
285; X64-LABEL: test_mm_bslli_si128:
286; X64: # BB#0:
287; X64-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10]
288; X64-NEXT: retq
289 %arg0 = bitcast <2 x i64> %a0 to <16 x i8>
290 %res = shufflevector <16 x i8> zeroinitializer, <16 x i8> %arg0, <16 x i32> <i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26>
291 %bc = bitcast <16 x i8> %res to <2 x i64>
292 ret <2 x i64> %bc
293}
294
295define <2 x i64> @test_mm_bsrli_si128(<2 x i64> %a0) nounwind {
296; X32-LABEL: test_mm_bsrli_si128:
297; X32: # BB#0:
298; X32-NEXT: psrldq {{.*#+}} xmm0 = xmm0[5,6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero
299; X32-NEXT: retl
300;
301; X64-LABEL: test_mm_bsrli_si128:
302; X64: # BB#0:
303; X64-NEXT: psrldq {{.*#+}} xmm0 = xmm0[5,6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero
304; X64-NEXT: retq
305 %arg0 = bitcast <2 x i64> %a0 to <16 x i8>
306 %res = shufflevector <16 x i8> %arg0, <16 x i8> zeroinitializer, <16 x i32> <i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20>
307 %bc = bitcast <16 x i8> %res to <2 x i64>
308 ret <2 x i64> %bc
309}
310
311define void @test_mm_clflush(i8* %a0) nounwind {
312; X32-LABEL: test_mm_clflush:
313; X32: # BB#0:
314; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
315; X32-NEXT: clflush (%eax)
316; X32-NEXT: retl
317;
318; X64-LABEL: test_mm_clflush:
319; X64: # BB#0:
320; X64-NEXT: clflush (%rdi)
321; X64-NEXT: retq
322 call void @llvm.x86.sse2.clflush(i8* %a0)
323 ret void
324}
325declare void @llvm.x86.sse2.clflush(i8*) nounwind readnone
326
327define <2 x i64> @test_mm_cmpeq_epi8(<2 x i64> %a0, <2 x i64> %a1) nounwind {
328; X32-LABEL: test_mm_cmpeq_epi8:
329; X32: # BB#0:
330; X32-NEXT: pcmpeqb %xmm1, %xmm0
331; X32-NEXT: retl
332;
333; X64-LABEL: test_mm_cmpeq_epi8:
334; X64: # BB#0:
335; X64-NEXT: pcmpeqb %xmm1, %xmm0
336; X64-NEXT: retq
337 %arg0 = bitcast <2 x i64> %a0 to <16 x i8>
338 %arg1 = bitcast <2 x i64> %a1 to <16 x i8>
339 %cmp = icmp eq <16 x i8> %arg0, %arg1
340 %res = sext <16 x i1> %cmp to <16 x i8>
341 %bc = bitcast <16 x i8> %res to <2 x i64>
342 ret <2 x i64> %bc
343}
344
345define <2 x i64> @test_mm_cmpeq_epi16(<2 x i64> %a0, <2 x i64> %a1) nounwind {
346; X32-LABEL: test_mm_cmpeq_epi16:
347; X32: # BB#0:
348; X32-NEXT: pcmpeqw %xmm1, %xmm0
349; X32-NEXT: retl
350;
351; X64-LABEL: test_mm_cmpeq_epi16:
352; X64: # BB#0:
353; X64-NEXT: pcmpeqw %xmm1, %xmm0
354; X64-NEXT: retq
355 %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
356 %arg1 = bitcast <2 x i64> %a1 to <8 x i16>
357 %cmp = icmp eq <8 x i16> %arg0, %arg1
358 %res = sext <8 x i1> %cmp to <8 x i16>
359 %bc = bitcast <8 x i16> %res to <2 x i64>
360 ret <2 x i64> %bc
361}
362
363define <2 x i64> @test_mm_cmpeq_epi32(<2 x i64> %a0, <2 x i64> %a1) nounwind {
364; X32-LABEL: test_mm_cmpeq_epi32:
365; X32: # BB#0:
366; X32-NEXT: pcmpeqd %xmm1, %xmm0
367; X32-NEXT: retl
368;
369; X64-LABEL: test_mm_cmpeq_epi32:
370; X64: # BB#0:
371; X64-NEXT: pcmpeqd %xmm1, %xmm0
372; X64-NEXT: retq
373 %arg0 = bitcast <2 x i64> %a0 to <4 x i32>
374 %arg1 = bitcast <2 x i64> %a1 to <4 x i32>
375 %cmp = icmp eq <4 x i32> %arg0, %arg1
376 %res = sext <4 x i1> %cmp to <4 x i32>
377 %bc = bitcast <4 x i32> %res to <2 x i64>
378 ret <2 x i64> %bc
379}
380
381define <2 x double> @test_mm_cmpeq_pd(<2 x double> %a0, <2 x double> %a1) nounwind {
382; X32-LABEL: test_mm_cmpeq_pd:
383; X32: # BB#0:
384; X32-NEXT: cmpeqpd %xmm1, %xmm0
385; X32-NEXT: retl
386;
387; X64-LABEL: test_mm_cmpeq_pd:
388; X64: # BB#0:
389; X64-NEXT: cmpeqpd %xmm1, %xmm0
390; X64-NEXT: retq
391 %res = call <2 x double> @llvm.x86.sse2.cmp.pd(<2 x double> %a0, <2 x double> %a1, i8 0)
392 ret <2 x double> %res
393}
394declare <2 x double> @llvm.x86.sse2.cmp.pd(<2 x double>, <2 x double>, i8) nounwind readnone
395
396define <2 x double> @test_mm_cmpeq_sd(<2 x double> %a0, <2 x double> %a1) nounwind {
397; X32-LABEL: test_mm_cmpeq_sd:
398; X32: # BB#0:
399; X32-NEXT: cmpeqsd %xmm1, %xmm0
400; X32-NEXT: retl
401;
402; X64-LABEL: test_mm_cmpeq_sd:
403; X64: # BB#0:
404; X64-NEXT: cmpeqsd %xmm1, %xmm0
405; X64-NEXT: retq
406 %res = call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %a0, <2 x double> %a1, i8 0)
407 ret <2 x double> %res
408}
409declare <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double>, <2 x double>, i8) nounwind readnone
410
411define <2 x double> @test_mm_cmpge_pd(<2 x double> %a0, <2 x double> %a1) nounwind {
412; X32-LABEL: test_mm_cmpge_pd:
413; X32: # BB#0:
414; X32-NEXT: cmplepd %xmm0, %xmm1
415; X32-NEXT: movapd %xmm1, %xmm0
416; X32-NEXT: retl
417;
418; X64-LABEL: test_mm_cmpge_pd:
419; X64: # BB#0:
420; X64-NEXT: cmplepd %xmm0, %xmm1
421; X64-NEXT: movapd %xmm1, %xmm0
422; X64-NEXT: retq
423 %res = call <2 x double> @llvm.x86.sse2.cmp.pd(<2 x double> %a1, <2 x double> %a0, i8 2)
424 ret <2 x double> %res
425}
426
427define <2 x double> @test_mm_cmpge_sd(<2 x double> %a0, <2 x double> %a1) nounwind {
428; X32-LABEL: test_mm_cmpge_sd:
429; X32: # BB#0:
430; X32-NEXT: cmplesd %xmm0, %xmm1
431; X32-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
432; X32-NEXT: retl
433;
434; X64-LABEL: test_mm_cmpge_sd:
435; X64: # BB#0:
436; X64-NEXT: cmplesd %xmm0, %xmm1
437; X64-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
438; X64-NEXT: retq
439 %cmp = call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %a1, <2 x double> %a0, i8 2)
440 %ext0 = extractelement <2 x double> %cmp, i32 0
441 %ins0 = insertelement <2 x double> undef, double %ext0, i32 0
442 %ext1 = extractelement <2 x double> %a0, i32 1
443 %ins1 = insertelement <2 x double> %ins0, double %ext1, i32 1
444 ret <2 x double> %ins1
445}
446
447define <2 x i64> @test_mm_cmpgt_epi8(<2 x i64> %a0, <2 x i64> %a1) nounwind {
448; X32-LABEL: test_mm_cmpgt_epi8:
449; X32: # BB#0:
450; X32-NEXT: pcmpgtb %xmm1, %xmm0
451; X32-NEXT: retl
452;
453; X64-LABEL: test_mm_cmpgt_epi8:
454; X64: # BB#0:
455; X64-NEXT: pcmpgtb %xmm1, %xmm0
456; X64-NEXT: retq
457 %arg0 = bitcast <2 x i64> %a0 to <16 x i8>
458 %arg1 = bitcast <2 x i64> %a1 to <16 x i8>
459 %cmp = icmp sgt <16 x i8> %arg0, %arg1
460 %res = sext <16 x i1> %cmp to <16 x i8>
461 %bc = bitcast <16 x i8> %res to <2 x i64>
462 ret <2 x i64> %bc
463}
464
465define <2 x i64> @test_mm_cmpgt_epi16(<2 x i64> %a0, <2 x i64> %a1) nounwind {
466; X32-LABEL: test_mm_cmpgt_epi16:
467; X32: # BB#0:
468; X32-NEXT: pcmpgtw %xmm1, %xmm0
469; X32-NEXT: retl
470;
471; X64-LABEL: test_mm_cmpgt_epi16:
472; X64: # BB#0:
473; X64-NEXT: pcmpgtw %xmm1, %xmm0
474; X64-NEXT: retq
475 %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
476 %arg1 = bitcast <2 x i64> %a1 to <8 x i16>
477 %cmp = icmp sgt <8 x i16> %arg0, %arg1
478 %res = sext <8 x i1> %cmp to <8 x i16>
479 %bc = bitcast <8 x i16> %res to <2 x i64>
480 ret <2 x i64> %bc
481}
482
483define <2 x i64> @test_mm_cmpgt_epi32(<2 x i64> %a0, <2 x i64> %a1) nounwind {
484; X32-LABEL: test_mm_cmpgt_epi32:
485; X32: # BB#0:
486; X32-NEXT: pcmpgtd %xmm1, %xmm0
487; X32-NEXT: retl
488;
489; X64-LABEL: test_mm_cmpgt_epi32:
490; X64: # BB#0:
491; X64-NEXT: pcmpgtd %xmm1, %xmm0
492; X64-NEXT: retq
493 %arg0 = bitcast <2 x i64> %a0 to <4 x i32>
494 %arg1 = bitcast <2 x i64> %a1 to <4 x i32>
495 %cmp = icmp sgt <4 x i32> %arg0, %arg1
496 %res = sext <4 x i1> %cmp to <4 x i32>
497 %bc = bitcast <4 x i32> %res to <2 x i64>
498 ret <2 x i64> %bc
499}
500
501define <2 x double> @test_mm_cmpgt_pd(<2 x double> %a0, <2 x double> %a1) nounwind {
502; X32-LABEL: test_mm_cmpgt_pd:
503; X32: # BB#0:
504; X32-NEXT: cmpltpd %xmm0, %xmm1
505; X32-NEXT: movapd %xmm1, %xmm0
506; X32-NEXT: retl
507;
508; X64-LABEL: test_mm_cmpgt_pd:
509; X64: # BB#0:
510; X64-NEXT: cmpltpd %xmm0, %xmm1
511; X64-NEXT: movapd %xmm1, %xmm0
512; X64-NEXT: retq
513 %res = call <2 x double> @llvm.x86.sse2.cmp.pd(<2 x double> %a1, <2 x double> %a0, i8 1)
514 ret <2 x double> %res
515}
516
517define <2 x double> @test_mm_cmpgt_sd(<2 x double> %a0, <2 x double> %a1) nounwind {
518; X32-LABEL: test_mm_cmpgt_sd:
519; X32: # BB#0:
520; X32-NEXT: cmpltsd %xmm0, %xmm1
521; X32-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
522; X32-NEXT: retl
523;
524; X64-LABEL: test_mm_cmpgt_sd:
525; X64: # BB#0:
526; X64-NEXT: cmpltsd %xmm0, %xmm1
527; X64-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
528; X64-NEXT: retq
529 %cmp = call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %a1, <2 x double> %a0, i8 1)
530 %ext0 = extractelement <2 x double> %cmp, i32 0
531 %ins0 = insertelement <2 x double> undef, double %ext0, i32 0
532 %ext1 = extractelement <2 x double> %a0, i32 1
533 %ins1 = insertelement <2 x double> %ins0, double %ext1, i32 1
534 ret <2 x double> %ins1
535}
536
537define <2 x double> @test_mm_cmple_pd(<2 x double> %a0, <2 x double> %a1) nounwind {
538; X32-LABEL: test_mm_cmple_pd:
539; X32: # BB#0:
540; X32-NEXT: cmplepd %xmm1, %xmm0
541; X32-NEXT: retl
542;
543; X64-LABEL: test_mm_cmple_pd:
544; X64: # BB#0:
545; X64-NEXT: cmplepd %xmm1, %xmm0
546; X64-NEXT: retq
547 %res = call <2 x double> @llvm.x86.sse2.cmp.pd(<2 x double> %a0, <2 x double> %a1, i8 2)
548 ret <2 x double> %res
549}
550
551define <2 x double> @test_mm_cmple_sd(<2 x double> %a0, <2 x double> %a1) nounwind {
552; X32-LABEL: test_mm_cmple_sd:
553; X32: # BB#0:
554; X32-NEXT: cmplesd %xmm1, %xmm0
555; X32-NEXT: retl
556;
557; X64-LABEL: test_mm_cmple_sd:
558; X64: # BB#0:
559; X64-NEXT: cmplesd %xmm1, %xmm0
560; X64-NEXT: retq
561 %res = call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %a0, <2 x double> %a1, i8 2)
562 ret <2 x double> %res
563}
564
565define <2 x i64> @test_mm_cmplt_epi8(<2 x i64> %a0, <2 x i64> %a1) nounwind {
566; X32-LABEL: test_mm_cmplt_epi8:
567; X32: # BB#0:
568; X32-NEXT: pcmpgtb %xmm0, %xmm1
569; X32-NEXT: movdqa %xmm1, %xmm0
570; X32-NEXT: retl
571;
572; X64-LABEL: test_mm_cmplt_epi8:
573; X64: # BB#0:
574; X64-NEXT: pcmpgtb %xmm0, %xmm1
575; X64-NEXT: movdqa %xmm1, %xmm0
576; X64-NEXT: retq
577 %arg0 = bitcast <2 x i64> %a0 to <16 x i8>
578 %arg1 = bitcast <2 x i64> %a1 to <16 x i8>
579 %cmp = icmp sgt <16 x i8> %arg1, %arg0
580 %res = sext <16 x i1> %cmp to <16 x i8>
581 %bc = bitcast <16 x i8> %res to <2 x i64>
582 ret <2 x i64> %bc
583}
584
585define <2 x i64> @test_mm_cmplt_epi16(<2 x i64> %a0, <2 x i64> %a1) nounwind {
586; X32-LABEL: test_mm_cmplt_epi16:
587; X32: # BB#0:
588; X32-NEXT: pcmpgtw %xmm0, %xmm1
589; X32-NEXT: movdqa %xmm1, %xmm0
590; X32-NEXT: retl
591;
592; X64-LABEL: test_mm_cmplt_epi16:
593; X64: # BB#0:
594; X64-NEXT: pcmpgtw %xmm0, %xmm1
595; X64-NEXT: movdqa %xmm1, %xmm0
596; X64-NEXT: retq
597 %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
598 %arg1 = bitcast <2 x i64> %a1 to <8 x i16>
599 %cmp = icmp sgt <8 x i16> %arg1, %arg0
600 %res = sext <8 x i1> %cmp to <8 x i16>
601 %bc = bitcast <8 x i16> %res to <2 x i64>
602 ret <2 x i64> %bc
603}
604
605define <2 x i64> @test_mm_cmplt_epi32(<2 x i64> %a0, <2 x i64> %a1) nounwind {
606; X32-LABEL: test_mm_cmplt_epi32:
607; X32: # BB#0:
608; X32-NEXT: pcmpgtd %xmm0, %xmm1
609; X32-NEXT: movdqa %xmm1, %xmm0
610; X32-NEXT: retl
611;
612; X64-LABEL: test_mm_cmplt_epi32:
613; X64: # BB#0:
614; X64-NEXT: pcmpgtd %xmm0, %xmm1
615; X64-NEXT: movdqa %xmm1, %xmm0
616; X64-NEXT: retq
617 %arg0 = bitcast <2 x i64> %a0 to <4 x i32>
618 %arg1 = bitcast <2 x i64> %a1 to <4 x i32>
619 %cmp = icmp sgt <4 x i32> %arg1, %arg0
620 %res = sext <4 x i1> %cmp to <4 x i32>
621 %bc = bitcast <4 x i32> %res to <2 x i64>
622 ret <2 x i64> %bc
623}
624
625define <2 x double> @test_mm_cmplt_pd(<2 x double> %a0, <2 x double> %a1) nounwind {
626; X32-LABEL: test_mm_cmplt_pd:
627; X32: # BB#0:
628; X32-NEXT: cmpltpd %xmm1, %xmm0
629; X32-NEXT: retl
630;
631; X64-LABEL: test_mm_cmplt_pd:
632; X64: # BB#0:
633; X64-NEXT: cmpltpd %xmm1, %xmm0
634; X64-NEXT: retq
635 %res = call <2 x double> @llvm.x86.sse2.cmp.pd(<2 x double> %a0, <2 x double> %a1, i8 1)
636 ret <2 x double> %res
637}
638
639define <2 x double> @test_mm_cmplt_sd(<2 x double> %a0, <2 x double> %a1) nounwind {
640; X32-LABEL: test_mm_cmplt_sd:
641; X32: # BB#0:
642; X32-NEXT: cmpltsd %xmm1, %xmm0
643; X32-NEXT: retl
644;
645; X64-LABEL: test_mm_cmplt_sd:
646; X64: # BB#0:
647; X64-NEXT: cmpltsd %xmm1, %xmm0
648; X64-NEXT: retq
649 %res = call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %a0, <2 x double> %a1, i8 1)
650 ret <2 x double> %res
651}
652
653define <2 x double> @test_mm_cmpneq_pd(<2 x double> %a0, <2 x double> %a1) nounwind {
654; X32-LABEL: test_mm_cmpneq_pd:
655; X32: # BB#0:
656; X32-NEXT: cmpneqpd %xmm1, %xmm0
657; X32-NEXT: retl
658;
659; X64-LABEL: test_mm_cmpneq_pd:
660; X64: # BB#0:
661; X64-NEXT: cmpneqpd %xmm1, %xmm0
662; X64-NEXT: retq
663 %res = call <2 x double> @llvm.x86.sse2.cmp.pd(<2 x double> %a0, <2 x double> %a1, i8 4)
664 ret <2 x double> %res
665}
666
667define <2 x double> @test_mm_cmpneq_sd(<2 x double> %a0, <2 x double> %a1) nounwind {
668; X32-LABEL: test_mm_cmpneq_sd:
669; X32: # BB#0:
670; X32-NEXT: cmpneqsd %xmm1, %xmm0
671; X32-NEXT: retl
672;
673; X64-LABEL: test_mm_cmpneq_sd:
674; X64: # BB#0:
675; X64-NEXT: cmpneqsd %xmm1, %xmm0
676; X64-NEXT: retq
677 %res = call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %a0, <2 x double> %a1, i8 4)
678 ret <2 x double> %res
679}
680
681define <2 x double> @test_mm_cmpnge_pd(<2 x double> %a0, <2 x double> %a1) nounwind {
682; X32-LABEL: test_mm_cmpnge_pd:
683; X32: # BB#0:
684; X32-NEXT: cmpnlepd %xmm0, %xmm1
685; X32-NEXT: movapd %xmm1, %xmm0
686; X32-NEXT: retl
687;
688; X64-LABEL: test_mm_cmpnge_pd:
689; X64: # BB#0:
690; X64-NEXT: cmpnlepd %xmm0, %xmm1
691; X64-NEXT: movapd %xmm1, %xmm0
692; X64-NEXT: retq
693 %res = call <2 x double> @llvm.x86.sse2.cmp.pd(<2 x double> %a1, <2 x double> %a0, i8 6)
694 ret <2 x double> %res
695}
696
697define <2 x double> @test_mm_cmpnge_sd(<2 x double> %a0, <2 x double> %a1) nounwind {
698; X32-LABEL: test_mm_cmpnge_sd:
699; X32: # BB#0:
700; X32-NEXT: cmpnlesd %xmm0, %xmm1
701; X32-NEXT: movaps %xmm1, %xmm0
702; X32-NEXT: retl
703;
704; X64-LABEL: test_mm_cmpnge_sd:
705; X64: # BB#0:
706; X64-NEXT: cmpnlesd %xmm0, %xmm1
707; X64-NEXT: movaps %xmm1, %xmm0
708; X64-NEXT: retq
709 %res = call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %a1, <2 x double> %a0, i8 6)
710 ret <2 x double> %res
711}
712
713define <2 x double> @test_mm_cmpngt_pd(<2 x double> %a0, <2 x double> %a1) nounwind {
714; X32-LABEL: test_mm_cmpngt_pd:
715; X32: # BB#0:
716; X32-NEXT: cmpnltpd %xmm0, %xmm1
717; X32-NEXT: movapd %xmm1, %xmm0
718; X32-NEXT: retl
719;
720; X64-LABEL: test_mm_cmpngt_pd:
721; X64: # BB#0:
722; X64-NEXT: cmpnltpd %xmm0, %xmm1
723; X64-NEXT: movapd %xmm1, %xmm0
724; X64-NEXT: retq
725 %res = call <2 x double> @llvm.x86.sse2.cmp.pd(<2 x double> %a1, <2 x double> %a0, i8 5)
726 ret <2 x double> %res
727}
728
729define <2 x double> @test_mm_cmpngt_sd(<2 x double> %a0, <2 x double> %a1) nounwind {
730; X32-LABEL: test_mm_cmpngt_sd:
731; X32: # BB#0:
732; X32-NEXT: cmpnltsd %xmm0, %xmm1
733; X32-NEXT: movaps %xmm1, %xmm0
734; X32-NEXT: retl
735;
736; X64-LABEL: test_mm_cmpngt_sd:
737; X64: # BB#0:
738; X64-NEXT: cmpnltsd %xmm0, %xmm1
739; X64-NEXT: movaps %xmm1, %xmm0
740; X64-NEXT: retq
741 %res = call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %a1, <2 x double> %a0, i8 5)
742 ret <2 x double> %res
743}
744
745define <2 x double> @test_mm_cmpnle_pd(<2 x double> %a0, <2 x double> %a1) nounwind {
746; X32-LABEL: test_mm_cmpnle_pd:
747; X32: # BB#0:
748; X32-NEXT: cmpnlepd %xmm1, %xmm0
749; X32-NEXT: retl
750;
751; X64-LABEL: test_mm_cmpnle_pd:
752; X64: # BB#0:
753; X64-NEXT: cmpnlepd %xmm1, %xmm0
754; X64-NEXT: retq
755 %res = call <2 x double> @llvm.x86.sse2.cmp.pd(<2 x double> %a0, <2 x double> %a1, i8 6)
756 ret <2 x double> %res
757}
758
759define <2 x double> @test_mm_cmpnle_sd(<2 x double> %a0, <2 x double> %a1) nounwind {
760; X32-LABEL: test_mm_cmpnle_sd:
761; X32: # BB#0:
762; X32-NEXT: cmpnlesd %xmm1, %xmm0
763; X32-NEXT: retl
764;
765; X64-LABEL: test_mm_cmpnle_sd:
766; X64: # BB#0:
767; X64-NEXT: cmpnlesd %xmm1, %xmm0
768; X64-NEXT: retq
769 %res = call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %a0, <2 x double> %a1, i8 6)
770 ret <2 x double> %res
771}
772
773define <2 x double> @test_mm_cmpnlt_pd(<2 x double> %a0, <2 x double> %a1) nounwind {
774; X32-LABEL: test_mm_cmpnlt_pd:
775; X32: # BB#0:
776; X32-NEXT: cmpnltpd %xmm1, %xmm0
777; X32-NEXT: retl
778;
779; X64-LABEL: test_mm_cmpnlt_pd:
780; X64: # BB#0:
781; X64-NEXT: cmpnltpd %xmm1, %xmm0
782; X64-NEXT: retq
783 %res = call <2 x double> @llvm.x86.sse2.cmp.pd(<2 x double> %a0, <2 x double> %a1, i8 5)
784 ret <2 x double> %res
785}
786
787define <2 x double> @test_mm_cmpnlt_sd(<2 x double> %a0, <2 x double> %a1) nounwind {
788; X32-LABEL: test_mm_cmpnlt_sd:
789; X32: # BB#0:
790; X32-NEXT: cmpnltsd %xmm1, %xmm0
791; X32-NEXT: retl
792;
793; X64-LABEL: test_mm_cmpnlt_sd:
794; X64: # BB#0:
795; X64-NEXT: cmpnltsd %xmm1, %xmm0
796; X64-NEXT: retq
797 %res = call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %a0, <2 x double> %a1, i8 5)
798 ret <2 x double> %res
799}
800
801define <2 x double> @test_mm_cmpord_pd(<2 x double> %a0, <2 x double> %a1) nounwind {
802; X32-LABEL: test_mm_cmpord_pd:
803; X32: # BB#0:
804; X32-NEXT: cmpordpd %xmm1, %xmm0
805; X32-NEXT: retl
806;
807; X64-LABEL: test_mm_cmpord_pd:
808; X64: # BB#0:
809; X64-NEXT: cmpordpd %xmm1, %xmm0
810; X64-NEXT: retq
811 %res = call <2 x double> @llvm.x86.sse2.cmp.pd(<2 x double> %a0, <2 x double> %a1, i8 7)
812 ret <2 x double> %res
813}
814
815define <2 x double> @test_mm_cmpord_sd(<2 x double> %a0, <2 x double> %a1) nounwind {
816; X32-LABEL: test_mm_cmpord_sd:
817; X32: # BB#0:
818; X32-NEXT: cmpordsd %xmm1, %xmm0
819; X32-NEXT: retl
820;
821; X64-LABEL: test_mm_cmpord_sd:
822; X64: # BB#0:
823; X64-NEXT: cmpordsd %xmm1, %xmm0
824; X64-NEXT: retq
825 %res = call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %a0, <2 x double> %a1, i8 7)
826 ret <2 x double> %res
827}
828
829define <2 x double> @test_mm_cmpunord_pd(<2 x double> %a0, <2 x double> %a1) nounwind {
830; X32-LABEL: test_mm_cmpunord_pd:
831; X32: # BB#0:
832; X32-NEXT: cmpunordpd %xmm1, %xmm0
833; X32-NEXT: retl
834;
835; X64-LABEL: test_mm_cmpunord_pd:
836; X64: # BB#0:
837; X64-NEXT: cmpunordpd %xmm1, %xmm0
838; X64-NEXT: retq
839 %res = call <2 x double> @llvm.x86.sse2.cmp.pd(<2 x double> %a0, <2 x double> %a1, i8 3)
840 ret <2 x double> %res
841}
842
843define <2 x double> @test_mm_cmpunord_sd(<2 x double> %a0, <2 x double> %a1) nounwind {
844; X32-LABEL: test_mm_cmpunord_sd:
845; X32: # BB#0:
846; X32-NEXT: cmpunordsd %xmm1, %xmm0
847; X32-NEXT: retl
848;
849; X64-LABEL: test_mm_cmpunord_sd:
850; X64: # BB#0:
851; X64-NEXT: cmpunordsd %xmm1, %xmm0
852; X64-NEXT: retq
853 %res = call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %a0, <2 x double> %a1, i8 3)
854 ret <2 x double> %res
855}
856
857define i32 @test_mm_comieq_sd(<2 x double> %a0, <2 x double> %a1) nounwind {
858; X32-LABEL: test_mm_comieq_sd:
859; X32: # BB#0:
860; X32-NEXT: comisd %xmm1, %xmm0
861; X32-NEXT: setnp %al
862; X32-NEXT: sete %cl
863; X32-NEXT: andb %al, %cl
864; X32-NEXT: movzbl %cl, %eax
865; X32-NEXT: retl
866;
867; X64-LABEL: test_mm_comieq_sd:
868; X64: # BB#0:
869; X64-NEXT: comisd %xmm1, %xmm0
870; X64-NEXT: setnp %al
871; X64-NEXT: sete %cl
872; X64-NEXT: andb %al, %cl
873; X64-NEXT: movzbl %cl, %eax
874; X64-NEXT: retq
875 %res = call i32 @llvm.x86.sse2.comieq.sd(<2 x double> %a0, <2 x double> %a1)
876 ret i32 %res
877}
878declare i32 @llvm.x86.sse2.comieq.sd(<2 x double>, <2 x double>) nounwind readnone
879
880define i32 @test_mm_comige_sd(<2 x double> %a0, <2 x double> %a1) nounwind {
881; X32-LABEL: test_mm_comige_sd:
882; X32: # BB#0:
883; X32-NEXT: comisd %xmm1, %xmm0
884; X32-NEXT: setae %al
885; X32-NEXT: movzbl %al, %eax
886; X32-NEXT: retl
887;
888; X64-LABEL: test_mm_comige_sd:
889; X64: # BB#0:
890; X64-NEXT: comisd %xmm1, %xmm0
891; X64-NEXT: setae %al
892; X64-NEXT: movzbl %al, %eax
893; X64-NEXT: retq
894 %res = call i32 @llvm.x86.sse2.comige.sd(<2 x double> %a0, <2 x double> %a1)
895 ret i32 %res
896}
897declare i32 @llvm.x86.sse2.comige.sd(<2 x double>, <2 x double>) nounwind readnone
898
899define i32 @test_mm_comigt_sd(<2 x double> %a0, <2 x double> %a1) nounwind {
900; X32-LABEL: test_mm_comigt_sd:
901; X32: # BB#0:
902; X32-NEXT: comisd %xmm1, %xmm0
903; X32-NEXT: seta %al
904; X32-NEXT: movzbl %al, %eax
905; X32-NEXT: retl
906;
907; X64-LABEL: test_mm_comigt_sd:
908; X64: # BB#0:
909; X64-NEXT: comisd %xmm1, %xmm0
910; X64-NEXT: seta %al
911; X64-NEXT: movzbl %al, %eax
912; X64-NEXT: retq
913 %res = call i32 @llvm.x86.sse2.comigt.sd(<2 x double> %a0, <2 x double> %a1)
914 ret i32 %res
915}
916declare i32 @llvm.x86.sse2.comigt.sd(<2 x double>, <2 x double>) nounwind readnone
917
918define i32 @test_mm_comile_sd(<2 x double> %a0, <2 x double> %a1) nounwind {
919; X32-LABEL: test_mm_comile_sd:
920; X32: # BB#0:
921; X32-NEXT: comisd %xmm0, %xmm1
922; X32-NEXT: setae %al
923; X32-NEXT: movzbl %al, %eax
924; X32-NEXT: retl
925;
926; X64-LABEL: test_mm_comile_sd:
927; X64: # BB#0:
928; X64-NEXT: comisd %xmm0, %xmm1
929; X64-NEXT: setae %al
930; X64-NEXT: movzbl %al, %eax
931; X64-NEXT: retq
932 %res = call i32 @llvm.x86.sse2.comile.sd(<2 x double> %a0, <2 x double> %a1)
933 ret i32 %res
934}
935declare i32 @llvm.x86.sse2.comile.sd(<2 x double>, <2 x double>) nounwind readnone
936
937define i32 @test_mm_comilt_sd(<2 x double> %a0, <2 x double> %a1) nounwind {
938; X32-LABEL: test_mm_comilt_sd:
939; X32: # BB#0:
940; X32-NEXT: comisd %xmm0, %xmm1
941; X32-NEXT: seta %al
942; X32-NEXT: movzbl %al, %eax
943; X32-NEXT: retl
944;
945; X64-LABEL: test_mm_comilt_sd:
946; X64: # BB#0:
947; X64-NEXT: comisd %xmm0, %xmm1
948; X64-NEXT: seta %al
949; X64-NEXT: movzbl %al, %eax
950; X64-NEXT: retq
951 %res = call i32 @llvm.x86.sse2.comilt.sd(<2 x double> %a0, <2 x double> %a1)
952 ret i32 %res
953}
954declare i32 @llvm.x86.sse2.comilt.sd(<2 x double>, <2 x double>) nounwind readnone
955
956define i32 @test_mm_comineq_sd(<2 x double> %a0, <2 x double> %a1) nounwind {
957; X32-LABEL: test_mm_comineq_sd:
958; X32: # BB#0:
959; X32-NEXT: comisd %xmm1, %xmm0
960; X32-NEXT: setp %al
961; X32-NEXT: setne %cl
962; X32-NEXT: orb %al, %cl
963; X32-NEXT: movzbl %cl, %eax
964; X32-NEXT: retl
965;
966; X64-LABEL: test_mm_comineq_sd:
967; X64: # BB#0:
968; X64-NEXT: comisd %xmm1, %xmm0
969; X64-NEXT: setp %al
970; X64-NEXT: setne %cl
971; X64-NEXT: orb %al, %cl
972; X64-NEXT: movzbl %cl, %eax
973; X64-NEXT: retq
974 %res = call i32 @llvm.x86.sse2.comineq.sd(<2 x double> %a0, <2 x double> %a1)
975 ret i32 %res
976}
977declare i32 @llvm.x86.sse2.comineq.sd(<2 x double>, <2 x double>) nounwind readnone
978
979define <2 x double> @test_mm_cvtepi32_pd(<2 x i64> %a0) nounwind {
980; X32-LABEL: test_mm_cvtepi32_pd:
981; X32: # BB#0:
982; X32-NEXT: cvtdq2pd %xmm0, %xmm0
983; X32-NEXT: retl
984;
985; X64-LABEL: test_mm_cvtepi32_pd:
986; X64: # BB#0:
987; X64-NEXT: cvtdq2pd %xmm0, %xmm0
988; X64-NEXT: retq
989 %arg0 = bitcast <2 x i64> %a0 to <4 x i32>
990 %res = call <2 x double> @llvm.x86.sse2.cvtdq2pd(<4 x i32> %arg0)
991 ret <2 x double> %res
992}
993declare <2 x double> @llvm.x86.sse2.cvtdq2pd(<4 x i32>) nounwind readnone
994
995define <4 x float> @test_mm_cvtepi32_ps(<2 x i64> %a0) nounwind {
996; X32-LABEL: test_mm_cvtepi32_ps:
997; X32: # BB#0:
998; X32-NEXT: cvtdq2ps %xmm0, %xmm0
999; X32-NEXT: retl
1000;
1001; X64-LABEL: test_mm_cvtepi32_ps:
1002; X64: # BB#0:
1003; X64-NEXT: cvtdq2ps %xmm0, %xmm0
1004; X64-NEXT: retq
1005 %arg0 = bitcast <2 x i64> %a0 to <4 x i32>
1006 %res = call <4 x float> @llvm.x86.sse2.cvtdq2ps(<4 x i32> %arg0)
1007 ret <4 x float> %res
1008}
1009declare <4 x float> @llvm.x86.sse2.cvtdq2ps(<4 x i32>) nounwind readnone
1010
1011define <2 x i64> @test_mm_cvtpd_epi32(<2 x double> %a0) nounwind {
1012; X32-LABEL: test_mm_cvtpd_epi32:
1013; X32: # BB#0:
1014; X32-NEXT: cvtpd2dq %xmm0, %xmm0
1015; X32-NEXT: retl
1016;
1017; X64-LABEL: test_mm_cvtpd_epi32:
1018; X64: # BB#0:
1019; X64-NEXT: cvtpd2dq %xmm0, %xmm0
1020; X64-NEXT: retq
1021 %res = call <4 x i32> @llvm.x86.sse2.cvtpd2dq(<2 x double> %a0)
1022 %bc = bitcast <4 x i32> %res to <2 x i64>
1023 ret <2 x i64> %bc
1024}
1025declare <4 x i32> @llvm.x86.sse2.cvtpd2dq(<2 x double>) nounwind readnone
1026
1027define <4 x float> @test_mm_cvtpd_ps(<2 x double> %a0) nounwind {
1028; X32-LABEL: test_mm_cvtpd_ps:
1029; X32: # BB#0:
1030; X32-NEXT: cvtpd2ps %xmm0, %xmm0
1031; X32-NEXT: retl
1032;
1033; X64-LABEL: test_mm_cvtpd_ps:
1034; X64: # BB#0:
1035; X64-NEXT: cvtpd2ps %xmm0, %xmm0
1036; X64-NEXT: retq
1037 %res = call <4 x float> @llvm.x86.sse2.cvtpd2ps(<2 x double> %a0)
1038 ret <4 x float> %res
1039}
1040declare <4 x float> @llvm.x86.sse2.cvtpd2ps(<2 x double>) nounwind readnone
1041
1042define <2 x i64> @test_mm_cvtps_epi32(<4 x float> %a0) nounwind {
1043; X32-LABEL: test_mm_cvtps_epi32:
1044; X32: # BB#0:
1045; X32-NEXT: cvtps2dq %xmm0, %xmm0
1046; X32-NEXT: retl
1047;
1048; X64-LABEL: test_mm_cvtps_epi32:
1049; X64: # BB#0:
1050; X64-NEXT: cvtps2dq %xmm0, %xmm0
1051; X64-NEXT: retq
1052 %res = call <4 x i32> @llvm.x86.sse2.cvtps2dq(<4 x float> %a0)
1053 %bc = bitcast <4 x i32> %res to <2 x i64>
1054 ret <2 x i64> %bc
1055}
1056declare <4 x i32> @llvm.x86.sse2.cvtps2dq(<4 x float>) nounwind readnone
1057
1058define <2 x double> @test_mm_cvtps_pd(<4 x float> %a0) nounwind {
1059; X32-LABEL: test_mm_cvtps_pd:
1060; X32: # BB#0:
1061; X32-NEXT: cvtps2pd %xmm0, %xmm0
1062; X32-NEXT: retl
1063;
1064; X64-LABEL: test_mm_cvtps_pd:
1065; X64: # BB#0:
1066; X64-NEXT: cvtps2pd %xmm0, %xmm0
1067; X64-NEXT: retq
1068 %res = call <2 x double> @llvm.x86.sse2.cvtps2pd(<4 x float> %a0)
1069 ret <2 x double> %res
1070}
1071declare <2 x double> @llvm.x86.sse2.cvtps2pd(<4 x float>) nounwind readnone
1072
1073define double @test_mm_cvtsd_f64(<2 x double> %a0) nounwind {
1074; X32-LABEL: test_mm_cvtsd_f64:
1075; X32: # BB#0:
1076; X32-NEXT: pushl %ebp
1077; X32-NEXT: movl %esp, %ebp
1078; X32-NEXT: andl $-8, %esp
1079; X32-NEXT: subl $8, %esp
1080; X32-NEXT: movlps %xmm0, (%esp)
1081; X32-NEXT: fldl (%esp)
1082; X32-NEXT: movl %ebp, %esp
1083; X32-NEXT: popl %ebp
1084; X32-NEXT: retl
1085;
1086; X64-LABEL: test_mm_cvtsd_f64:
1087; X64: # BB#0:
1088; X64-NEXT: retq
1089 %res = extractelement <2 x double> %a0, i32 0
1090 ret double %res
1091}
1092
1093define i32 @test_mm_cvtsd_si32(<2 x double> %a0) nounwind {
1094; X32-LABEL: test_mm_cvtsd_si32:
1095; X32: # BB#0:
1096; X32-NEXT: cvtsd2si %xmm0, %eax
1097; X32-NEXT: retl
1098;
1099; X64-LABEL: test_mm_cvtsd_si32:
1100; X64: # BB#0:
1101; X64-NEXT: cvtsd2si %xmm0, %eax
1102; X64-NEXT: retq
1103 %res = call i32 @llvm.x86.sse2.cvtsd2si(<2 x double> %a0)
1104 ret i32 %res
1105}
1106declare i32 @llvm.x86.sse2.cvtsd2si(<2 x double>) nounwind readnone
1107
1108define i32 @test_mm_cvtsi128_si32(<2 x i64> %a0) nounwind {
1109; X32-LABEL: test_mm_cvtsi128_si32:
1110; X32: # BB#0:
1111; X32-NEXT: movd %xmm0, %eax
1112; X32-NEXT: retl
1113;
1114; X64-LABEL: test_mm_cvtsi128_si32:
1115; X64: # BB#0:
1116; X64-NEXT: movd %xmm0, %eax
1117; X64-NEXT: retq
1118 %arg0 = bitcast <2 x i64> %a0 to <4 x i32>
1119 %res = extractelement <4 x i32> %arg0, i32 0
1120 ret i32 %res
1121}
1122
1123define <2 x double> @test_mm_cvtsi32_sd(<2 x double> %a0, i32 %a1) nounwind {
1124; X32-LABEL: test_mm_cvtsi32_sd:
1125; X32: # BB#0:
1126; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
1127; X32-NEXT: cvtsi2sdl %eax, %xmm1
1128; X32-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
1129; X32-NEXT: retl
1130;
1131; X64-LABEL: test_mm_cvtsi32_sd:
1132; X64: # BB#0:
1133; X64-NEXT: cvtsi2sdl %edi, %xmm1
1134; X64-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
1135; X64-NEXT: retq
1136 %cvt = sitofp i32 %a1 to double
1137 %res = insertelement <2 x double> %a0, double %cvt, i32 0
1138 ret <2 x double> %res
1139}
1140
1141define <2 x i64> @test_mm_cvtsi32_si128(i32 %a0) nounwind {
1142; X32-LABEL: test_mm_cvtsi32_si128:
1143; X32: # BB#0:
1144; X32-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
1145; X32-NEXT: retl
1146;
1147; X64-LABEL: test_mm_cvtsi32_si128:
1148; X64: # BB#0:
1149; X64-NEXT: movd %edi, %xmm0
1150; X64-NEXT: retq
1151 %res0 = insertelement <4 x i32> undef, i32 %a0, i32 0
1152 %res1 = insertelement <4 x i32> %res0, i32 0, i32 1
1153 %res2 = insertelement <4 x i32> %res1, i32 0, i32 2
1154 %res3 = insertelement <4 x i32> %res2, i32 0, i32 3
1155 %res = bitcast <4 x i32> %res3 to <2 x i64>
1156 ret <2 x i64> %res
1157}
1158
1159define <2 x double> @test_mm_cvtss_sd(<2 x double> %a0, <4 x float> %a1) nounwind {
1160; X32-LABEL: test_mm_cvtss_sd:
1161; X32: # BB#0:
1162; X32-NEXT: cvtss2sd %xmm1, %xmm1
1163; X32-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
1164; X32-NEXT: retl
1165;
1166; X64-LABEL: test_mm_cvtss_sd:
1167; X64: # BB#0:
1168; X64-NEXT: cvtss2sd %xmm1, %xmm1
1169; X64-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
1170; X64-NEXT: retq
1171 %ext = extractelement <4 x float> %a1, i32 0
1172 %cvt = fpext float %ext to double
1173 %res = insertelement <2 x double> %a0, double %cvt, i32 0
1174 ret <2 x double> %res
1175}
1176
1177define <2 x i64> @test_mm_cvttpd_epi32(<2 x double> %a0) nounwind {
1178; X32-LABEL: test_mm_cvttpd_epi32:
1179; X32: # BB#0:
1180; X32-NEXT: cvttpd2dq %xmm0, %xmm0
1181; X32-NEXT: retl
1182;
1183; X64-LABEL: test_mm_cvttpd_epi32:
1184; X64: # BB#0:
1185; X64-NEXT: cvttpd2dq %xmm0, %xmm0
1186; X64-NEXT: retq
1187 %res = call <4 x i32> @llvm.x86.sse2.cvttpd2dq(<2 x double> %a0)
1188 %bc = bitcast <4 x i32> %res to <2 x i64>
1189 ret <2 x i64> %bc
1190}
1191declare <4 x i32> @llvm.x86.sse2.cvttpd2dq(<2 x double>) nounwind readnone
1192
1193define <2 x i64> @test_mm_cvttps_epi32(<4 x float> %a0) nounwind {
1194; X32-LABEL: test_mm_cvttps_epi32:
1195; X32: # BB#0:
1196; X32-NEXT: cvttps2dq %xmm0, %xmm0
1197; X32-NEXT: retl
1198;
1199; X64-LABEL: test_mm_cvttps_epi32:
1200; X64: # BB#0:
1201; X64-NEXT: cvttps2dq %xmm0, %xmm0
1202; X64-NEXT: retq
1203 %res = call <4 x i32> @llvm.x86.sse2.cvttps2dq(<4 x float> %a0)
1204 %bc = bitcast <4 x i32> %res to <2 x i64>
1205 ret <2 x i64> %bc
1206}
1207declare <4 x i32> @llvm.x86.sse2.cvttps2dq(<4 x float>) nounwind readnone
1208
1209define i32 @test_mm_cvttsd_si32(<2 x double> %a0) nounwind {
1210; X32-LABEL: test_mm_cvttsd_si32:
1211; X32: # BB#0:
1212; X32-NEXT: cvttsd2si %xmm0, %eax
1213; X32-NEXT: retl
1214;
1215; X64-LABEL: test_mm_cvttsd_si32:
1216; X64: # BB#0:
1217; X64-NEXT: cvttsd2si %xmm0, %eax
1218; X64-NEXT: retq
1219 %ext = extractelement <2 x double> %a0, i32 0
1220 %res = fptosi double %ext to i32
1221 ret i32 %res
1222}
1223
1224define <2 x double> @test_mm_div_pd(<2 x double> %a0, <2 x double> %a1) nounwind {
1225; X32-LABEL: test_mm_div_pd:
1226; X32: # BB#0:
1227; X32-NEXT: divpd %xmm1, %xmm0
1228; X32-NEXT: retl
1229;
1230; X64-LABEL: test_mm_div_pd:
1231; X64: # BB#0:
1232; X64-NEXT: divpd %xmm1, %xmm0
1233; X64-NEXT: retq
1234 %res = fdiv <2 x double> %a0, %a1
1235 ret <2 x double> %res
1236}
1237
1238define <2 x double> @test_mm_div_sd(<2 x double> %a0, <2 x double> %a1) nounwind {
1239; X32-LABEL: test_mm_div_sd:
1240; X32: # BB#0:
1241; X32-NEXT: divsd %xmm1, %xmm0
1242; X32-NEXT: retl
1243;
1244; X64-LABEL: test_mm_div_sd:
1245; X64: # BB#0:
1246; X64-NEXT: divsd %xmm1, %xmm0
1247; X64-NEXT: retq
1248 %ext0 = extractelement <2 x double> %a0, i32 0
1249 %ext1 = extractelement <2 x double> %a1, i32 0
1250 %fdiv = fdiv double %ext0, %ext1
1251 %res = insertelement <2 x double> %a0, double %fdiv, i32 0
1252 ret <2 x double> %res
1253}
1254
1255define i32 @test_mm_extract_epi16(<2 x i64> %a0) nounwind {
1256; X32-LABEL: test_mm_extract_epi16:
1257; X32: # BB#0:
1258; X32-NEXT: pextrw $1, %xmm0, %eax
1259; X32-NEXT: movzwl %ax, %eax
1260; X32-NEXT: retl
1261;
1262; X64-LABEL: test_mm_extract_epi16:
1263; X64: # BB#0:
1264; X64-NEXT: pextrw $1, %xmm0, %eax
1265; X64-NEXT: movzwl %ax, %eax
1266; X64-NEXT: retq
1267 %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
1268 %ext = extractelement <8 x i16> %arg0, i32 1
1269 %res = zext i16 %ext to i32
1270 ret i32 %res
1271}
1272
1273define <2 x i64> @test_mm_insert_epi16(<2 x i64> %a0, i16 %a1) nounwind {
1274; X32-LABEL: test_mm_insert_epi16:
1275; X32: # BB#0:
1276; X32-NEXT: movw {{[0-9]+}}(%esp), %ax
1277; X32-NEXT: pinsrw $1, %eax, %xmm0
1278; X32-NEXT: retl
1279;
1280; X64-LABEL: test_mm_insert_epi16:
1281; X64: # BB#0:
1282; X64-NEXT: pinsrw $1, %edi, %xmm0
1283; X64-NEXT: retq
1284 %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
1285 %res = insertelement <8 x i16> %arg0, i16 %a1,i32 1
1286 %bc = bitcast <8 x i16> %res to <2 x i64>
1287 ret <2 x i64> %bc
1288}
1289
1290define void @test_mm_lfence() nounwind {
1291; X32-LABEL: test_mm_lfence:
1292; X32: # BB#0:
1293; X32-NEXT: lfence
1294; X32-NEXT: retl
1295;
1296; X64-LABEL: test_mm_lfence:
1297; X64: # BB#0:
1298; X64-NEXT: lfence
1299; X64-NEXT: retq
1300 call void @llvm.x86.sse2.lfence()
1301 ret void
1302}
1303declare void @llvm.x86.sse2.lfence() nounwind readnone
1304
1305define <2 x double> @test_mm_load_pd(double* %a0) nounwind {
1306; X32-LABEL: test_mm_load_pd:
1307; X32: # BB#0:
1308; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
1309; X32-NEXT: movaps (%eax), %xmm0
1310; X32-NEXT: retl
1311;
1312; X64-LABEL: test_mm_load_pd:
1313; X64: # BB#0:
1314; X64-NEXT: movaps (%rdi), %xmm0
1315; X64-NEXT: retq
1316 %arg0 = bitcast double* %a0 to <2 x double>*
1317 %res = load <2 x double>, <2 x double>* %arg0, align 16
1318 ret <2 x double> %res
1319}
1320
1321define <2 x double> @test_mm_load_sd(double* %a0) nounwind {
1322; X32-LABEL: test_mm_load_sd:
1323; X32: # BB#0:
1324; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
1325; X32-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
1326; X32-NEXT: retl
1327;
1328; X64-LABEL: test_mm_load_sd:
1329; X64: # BB#0:
1330; X64-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
1331; X64-NEXT: retq
1332 %ld = load double, double* %a0, align 1
1333 %res0 = insertelement <2 x double> undef, double %ld, i32 0
1334 %res1 = insertelement <2 x double> %res0, double 0.0, i32 1
1335 ret <2 x double> %res1
1336}
1337
1338define <2 x i64> @test_mm_load_si128(<2 x i64>* %a0) nounwind {
1339; X32-LABEL: test_mm_load_si128:
1340; X32: # BB#0:
1341; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
1342; X32-NEXT: movaps (%eax), %xmm0
1343; X32-NEXT: retl
1344;
1345; X64-LABEL: test_mm_load_si128:
1346; X64: # BB#0:
1347; X64-NEXT: movaps (%rdi), %xmm0
1348; X64-NEXT: retq
1349 %res = load <2 x i64>, <2 x i64>* %a0, align 16
1350 ret <2 x i64> %res
1351}
1352
1353define <2 x double> @test_mm_load1_pd(double* %a0) nounwind {
1354; X32-LABEL: test_mm_load1_pd:
1355; X32: # BB#0:
1356; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
1357; X32-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
1358; X32-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0,0]
1359; X32-NEXT: retl
1360;
1361; X64-LABEL: test_mm_load1_pd:
1362; X64: # BB#0:
1363; X64-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
1364; X64-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0,0]
1365; X64-NEXT: retq
1366 %ld = load double, double* %a0, align 8
1367 %res0 = insertelement <2 x double> undef, double %ld, i32 0
1368 %res1 = insertelement <2 x double> %res0, double %ld, i32 1
1369 ret <2 x double> %res1
1370}
1371
1372define <2 x double> @test_mm_loadh_pd(<2 x double> %a0, double* %a1) nounwind {
1373; X32-LABEL: test_mm_loadh_pd:
1374; X32: # BB#0:
1375; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
1376; X32-NEXT: movhpd {{.*#+}} xmm0 = xmm0[0],mem[0]
1377; X32-NEXT: retl
1378;
1379; X64-LABEL: test_mm_loadh_pd:
1380; X64: # BB#0:
1381; X64-NEXT: movhpd {{.*#+}} xmm0 = xmm0[0],mem[0]
1382; X64-NEXT: retq
1383 %ld = load double, double* %a1, align 8
1384 %res = insertelement <2 x double> %a0, double %ld, i32 1
1385 ret <2 x double> %res
1386}
1387
1388define <2 x i64> @test_mm_loadl_epi64(<2 x i64> %a0, <2 x i64>* %a1) nounwind {
1389; X32-LABEL: test_mm_loadl_epi64:
1390; X32: # BB#0:
1391; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
1392; X32-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
1393; X32-NEXT: retl
1394;
1395; X64-LABEL: test_mm_loadl_epi64:
1396; X64: # BB#0:
1397; X64-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
1398; X64-NEXT: retq
1399 %bc = bitcast <2 x i64>* %a1 to i64*
1400 %ld = load i64, i64* %bc, align 1
1401 %res0 = insertelement <2 x i64> undef, i64 %ld, i32 0
1402 %res1 = insertelement <2 x i64> %res0, i64 0, i32 1
1403 ret <2 x i64> %res1
1404}
1405
1406define <2 x double> @test_mm_loadl_pd(<2 x double> %a0, double* %a1) nounwind {
1407; X32-LABEL: test_mm_loadl_pd:
1408; X32: # BB#0:
1409; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
1410; X32-NEXT: movlpd {{.*#+}} xmm0 = mem[0],xmm0[1]
1411; X32-NEXT: retl
1412;
1413; X64-LABEL: test_mm_loadl_pd:
1414; X64: # BB#0:
1415; X64-NEXT: movlpd {{.*#+}} xmm0 = mem[0],xmm0[1]
1416; X64-NEXT: retq
1417 %ld = load double, double* %a1, align 8
1418 %res = insertelement <2 x double> %a0, double %ld, i32 0
1419 ret <2 x double> %res
1420}
1421
1422define <2 x double> @test_mm_loadr_pd(double* %a0) nounwind {
1423; X32-LABEL: test_mm_loadr_pd:
1424; X32: # BB#0:
1425; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
1426; X32-NEXT: movapd (%eax), %xmm0
1427; X32-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1,0]
1428; X32-NEXT: retl
1429;
1430; X64-LABEL: test_mm_loadr_pd:
1431; X64: # BB#0:
1432; X64-NEXT: movapd (%rdi), %xmm0
1433; X64-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1,0]
1434; X64-NEXT: retq
1435 %arg0 = bitcast double* %a0 to <2 x double>*
1436 %ld = load <2 x double>, <2 x double>* %arg0, align 16
1437 %res = shufflevector <2 x double> %ld, <2 x double> undef, <2 x i32> <i32 1, i32 0>
1438 ret <2 x double> %res
1439}
1440
1441define <2 x double> @test_mm_loadu_pd(double* %a0) nounwind {
1442; X32-LABEL: test_mm_loadu_pd:
1443; X32: # BB#0:
1444; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
1445; X32-NEXT: movups (%eax), %xmm0
1446; X32-NEXT: retl
1447;
1448; X64-LABEL: test_mm_loadu_pd:
1449; X64: # BB#0:
1450; X64-NEXT: movups (%rdi), %xmm0
1451; X64-NEXT: retq
1452 %arg0 = bitcast double* %a0 to <2 x double>*
1453 %res = load <2 x double>, <2 x double>* %arg0, align 1
1454 ret <2 x double> %res
1455}
1456
1457define <2 x i64> @test_mm_loadu_si128(<2 x i64>* %a0) nounwind {
1458; X32-LABEL: test_mm_loadu_si128:
1459; X32: # BB#0:
1460; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
1461; X32-NEXT: movups (%eax), %xmm0
1462; X32-NEXT: retl
1463;
1464; X64-LABEL: test_mm_loadu_si128:
1465; X64: # BB#0:
1466; X64-NEXT: movups (%rdi), %xmm0
1467; X64-NEXT: retq
1468 %res = load <2 x i64>, <2 x i64>* %a0, align 1
1469 ret <2 x i64> %res
1470}
1471
1472define <2 x i64> @test_mm_madd_epi16(<2 x i64> %a0, <2 x i64> %a1) nounwind {
1473; X32-LABEL: test_mm_madd_epi16:
1474; X32: # BB#0:
1475; X32-NEXT: pmaddwd %xmm1, %xmm0
1476; X32-NEXT: retl
1477;
1478; X64-LABEL: test_mm_madd_epi16:
1479; X64: # BB#0:
1480; X64-NEXT: pmaddwd %xmm1, %xmm0
1481; X64-NEXT: retq
1482 %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
1483 %arg1 = bitcast <2 x i64> %a1 to <8 x i16>
1484 %res = call <4 x i32> @llvm.x86.sse2.pmadd.wd(<8 x i16> %arg0, <8 x i16> %arg1)
1485 %bc = bitcast <4 x i32> %res to <2 x i64>
1486 ret <2 x i64> %bc
1487}
1488declare <4 x i32> @llvm.x86.sse2.pmadd.wd(<8 x i16>, <8 x i16>) nounwind readnone
1489
1490define void @test_mm_maskmoveu_si128(<2 x i64> %a0, <2 x i64> %a1, i8* %a2) nounwind {
1491; X32-LABEL: test_mm_maskmoveu_si128:
1492; X32: # BB#0:
1493; X32-NEXT: pushl %edi
1494; X32-NEXT: movl {{[0-9]+}}(%esp), %edi
1495; X32-NEXT: maskmovdqu %xmm1, %xmm0
1496; X32-NEXT: popl %edi
1497; X32-NEXT: retl
1498;
1499; X64-LABEL: test_mm_maskmoveu_si128:
1500; X64: # BB#0:
1501; X64-NEXT: maskmovdqu %xmm1, %xmm0
1502; X64-NEXT: retq
1503 %arg0 = bitcast <2 x i64> %a0 to <16 x i8>
1504 %arg1 = bitcast <2 x i64> %a1 to <16 x i8>
1505 call void @llvm.x86.sse2.maskmov.dqu(<16 x i8> %arg0, <16 x i8> %arg1, i8* %a2)
1506 ret void
1507}
1508declare void @llvm.x86.sse2.maskmov.dqu(<16 x i8>, <16 x i8>, i8*) nounwind
1509
1510define <2 x i64> @test_mm_max_epi16(<2 x i64> %a0, <2 x i64> %a1) nounwind {
1511; X32-LABEL: test_mm_max_epi16:
1512; X32: # BB#0:
1513; X32-NEXT: pmaxsw %xmm1, %xmm0
1514; X32-NEXT: retl
1515;
1516; X64-LABEL: test_mm_max_epi16:
1517; X64: # BB#0:
1518; X64-NEXT: pmaxsw %xmm1, %xmm0
1519; X64-NEXT: retq
1520 %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
1521 %arg1 = bitcast <2 x i64> %a1 to <8 x i16>
1522 %res = call <8 x i16> @llvm.x86.sse2.pmaxs.w(<8 x i16> %arg0, <8 x i16> %arg1)
1523 %bc = bitcast <8 x i16> %res to <2 x i64>
1524 ret <2 x i64> %bc
1525}
1526declare <8 x i16> @llvm.x86.sse2.pmaxs.w(<8 x i16>, <8 x i16>) nounwind readnone
1527
1528define <2 x i64> @test_mm_max_epu8(<2 x i64> %a0, <2 x i64> %a1) nounwind {
1529; X32-LABEL: test_mm_max_epu8:
1530; X32: # BB#0:
1531; X32-NEXT: pmaxub %xmm1, %xmm0
1532; X32-NEXT: retl
1533;
1534; X64-LABEL: test_mm_max_epu8:
1535; X64: # BB#0:
1536; X64-NEXT: pmaxub %xmm1, %xmm0
1537; X64-NEXT: retq
1538 %arg0 = bitcast <2 x i64> %a0 to <16 x i8>
1539 %arg1 = bitcast <2 x i64> %a1 to <16 x i8>
1540 %res = call <16 x i8> @llvm.x86.sse2.pmaxu.b(<16 x i8> %arg0, <16 x i8> %arg1)
1541 %bc = bitcast <16 x i8> %res to <2 x i64>
1542 ret <2 x i64> %bc
1543}
1544declare <16 x i8> @llvm.x86.sse2.pmaxu.b(<16 x i8>, <16 x i8>) nounwind readnone
1545
1546define <2 x double> @test_mm_max_pd(<2 x double> %a0, <2 x double> %a1) nounwind {
1547; X32-LABEL: test_mm_max_pd:
1548; X32: # BB#0:
1549; X32-NEXT: maxpd %xmm1, %xmm0
1550; X32-NEXT: retl
1551;
1552; X64-LABEL: test_mm_max_pd:
1553; X64: # BB#0:
1554; X64-NEXT: maxpd %xmm1, %xmm0
1555; X64-NEXT: retq
1556 %res = call <2 x double> @llvm.x86.sse2.max.pd(<2 x double> %a0, <2 x double> %a1)
1557 ret <2 x double> %res
1558}
1559declare <2 x double> @llvm.x86.sse2.max.pd(<2 x double>, <2 x double>) nounwind readnone
1560
1561define <2 x double> @test_mm_max_sd(<2 x double> %a0, <2 x double> %a1) nounwind {
1562; X32-LABEL: test_mm_max_sd:
1563; X32: # BB#0:
1564; X32-NEXT: maxsd %xmm1, %xmm0
1565; X32-NEXT: retl
1566;
1567; X64-LABEL: test_mm_max_sd:
1568; X64: # BB#0:
1569; X64-NEXT: maxsd %xmm1, %xmm0
1570; X64-NEXT: retq
1571 %res = call <2 x double> @llvm.x86.sse2.max.sd(<2 x double> %a0, <2 x double> %a1)
1572 ret <2 x double> %res
1573}
1574declare <2 x double> @llvm.x86.sse2.max.sd(<2 x double>, <2 x double>) nounwind readnone
1575
1576define void @test_mm_mfence() nounwind {
1577; X32-LABEL: test_mm_mfence:
1578; X32: # BB#0:
1579; X32-NEXT: mfence
1580; X32-NEXT: retl
1581;
1582; X64-LABEL: test_mm_mfence:
1583; X64: # BB#0:
1584; X64-NEXT: mfence
1585; X64-NEXT: retq
1586 call void @llvm.x86.sse2.mfence()
1587 ret void
1588}
1589declare void @llvm.x86.sse2.mfence() nounwind readnone
1590
1591define <2 x i64> @test_mm_min_epi16(<2 x i64> %a0, <2 x i64> %a1) nounwind {
1592; X32-LABEL: test_mm_min_epi16:
1593; X32: # BB#0:
1594; X32-NEXT: pminsw %xmm1, %xmm0
1595; X32-NEXT: retl
1596;
1597; X64-LABEL: test_mm_min_epi16:
1598; X64: # BB#0:
1599; X64-NEXT: pminsw %xmm1, %xmm0
1600; X64-NEXT: retq
1601 %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
1602 %arg1 = bitcast <2 x i64> %a1 to <8 x i16>
1603 %res = call <8 x i16> @llvm.x86.sse2.pmins.w(<8 x i16> %arg0, <8 x i16> %arg1)
1604 %bc = bitcast <8 x i16> %res to <2 x i64>
1605 ret <2 x i64> %bc
1606}
1607declare <8 x i16> @llvm.x86.sse2.pmins.w(<8 x i16>, <8 x i16>) nounwind readnone
1608
1609define <2 x i64> @test_mm_min_epu8(<2 x i64> %a0, <2 x i64> %a1) nounwind {
1610; X32-LABEL: test_mm_min_epu8:
1611; X32: # BB#0:
1612; X32-NEXT: pminub %xmm1, %xmm0
1613; X32-NEXT: retl
1614;
1615; X64-LABEL: test_mm_min_epu8:
1616; X64: # BB#0:
1617; X64-NEXT: pminub %xmm1, %xmm0
1618; X64-NEXT: retq
1619 %arg0 = bitcast <2 x i64> %a0 to <16 x i8>
1620 %arg1 = bitcast <2 x i64> %a1 to <16 x i8>
1621 %res = call <16 x i8> @llvm.x86.sse2.pminu.b(<16 x i8> %arg0, <16 x i8> %arg1)
1622 %bc = bitcast <16 x i8> %res to <2 x i64>
1623 ret <2 x i64> %bc
1624}
1625declare <16 x i8> @llvm.x86.sse2.pminu.b(<16 x i8>, <16 x i8>) nounwind readnone
1626
1627define <2 x double> @test_mm_min_pd(<2 x double> %a0, <2 x double> %a1) nounwind {
1628; X32-LABEL: test_mm_min_pd:
1629; X32: # BB#0:
1630; X32-NEXT: minpd %xmm1, %xmm0
1631; X32-NEXT: retl
1632;
1633; X64-LABEL: test_mm_min_pd:
1634; X64: # BB#0:
1635; X64-NEXT: minpd %xmm1, %xmm0
1636; X64-NEXT: retq
1637 %res = call <2 x double> @llvm.x86.sse2.min.pd(<2 x double> %a0, <2 x double> %a1)
1638 ret <2 x double> %res
1639}
1640declare <2 x double> @llvm.x86.sse2.min.pd(<2 x double>, <2 x double>) nounwind readnone
1641
1642define <2 x double> @test_mm_min_sd(<2 x double> %a0, <2 x double> %a1) nounwind {
1643; X32-LABEL: test_mm_min_sd:
1644; X32: # BB#0:
1645; X32-NEXT: minsd %xmm1, %xmm0
1646; X32-NEXT: retl
1647;
1648; X64-LABEL: test_mm_min_sd:
1649; X64: # BB#0:
1650; X64-NEXT: minsd %xmm1, %xmm0
1651; X64-NEXT: retq
1652 %res = call <2 x double> @llvm.x86.sse2.min.sd(<2 x double> %a0, <2 x double> %a1)
1653 ret <2 x double> %res
1654}
1655declare <2 x double> @llvm.x86.sse2.min.sd(<2 x double>, <2 x double>) nounwind readnone
1656
1657define i32 @test_mm_movemask_epi8(<2 x i64> %a0) nounwind {
1658; X32-LABEL: test_mm_movemask_epi8:
1659; X32: # BB#0:
1660; X32-NEXT: pmovmskb %xmm0, %eax
1661; X32-NEXT: retl
1662;
1663; X64-LABEL: test_mm_movemask_epi8:
1664; X64: # BB#0:
1665; X64-NEXT: pmovmskb %xmm0, %eax
1666; X64-NEXT: retq
1667 %arg0 = bitcast <2 x i64> %a0 to <16 x i8>
1668 %res = call i32 @llvm.x86.sse2.pmovmskb.128(<16 x i8> %arg0)
1669 ret i32 %res
1670}
1671declare i32 @llvm.x86.sse2.pmovmskb.128(<16 x i8>) nounwind readnone
1672
1673define i32 @test_mm_movemask_pd(<2 x double> %a0) nounwind {
1674; X32-LABEL: test_mm_movemask_pd:
1675; X32: # BB#0:
1676; X32-NEXT: movmskpd %xmm0, %eax
1677; X32-NEXT: retl
1678;
1679; X64-LABEL: test_mm_movemask_pd:
1680; X64: # BB#0:
1681; X64-NEXT: movmskpd %xmm0, %eax
1682; X64-NEXT: retq
1683 %res = call i32 @llvm.x86.sse2.movmsk.pd(<2 x double> %a0)
1684 ret i32 %res
1685}
1686declare i32 @llvm.x86.sse2.movmsk.pd(<2 x double>) nounwind readnone
1687
1688define <2 x i64> @test_mm_mul_epu32(<2 x i64> %a0, <2 x i64> %a1) {
1689; X32-LABEL: test_mm_mul_epu32:
1690; X32: # BB#0:
1691; X32-NEXT: pmuludq %xmm1, %xmm0
1692; X32-NEXT: retl
1693;
1694; X64-LABEL: test_mm_mul_epu32:
1695; X64: # BB#0:
1696; X64-NEXT: pmuludq %xmm1, %xmm0
1697; X64-NEXT: retq
1698 %arg0 = bitcast <2 x i64> %a0 to <4 x i32>
1699 %arg1 = bitcast <2 x i64> %a1 to <4 x i32>
1700 %res = call <2 x i64> @llvm.x86.sse2.pmulu.dq(<4 x i32> %arg0, <4 x i32> %arg1)
1701 ret <2 x i64> %res
1702}
1703declare <2 x i64> @llvm.x86.sse2.pmulu.dq(<4 x i32>, <4 x i32>) nounwind readnone
1704
1705define <2 x double> @test_mm_mul_pd(<2 x double> %a0, <2 x double> %a1) nounwind {
1706; X32-LABEL: test_mm_mul_pd:
1707; X32: # BB#0:
1708; X32-NEXT: mulpd %xmm1, %xmm0
1709; X32-NEXT: retl
1710;
1711; X64-LABEL: test_mm_mul_pd:
1712; X64: # BB#0:
1713; X64-NEXT: mulpd %xmm1, %xmm0
1714; X64-NEXT: retq
1715 %res = fmul <2 x double> %a0, %a1
1716 ret <2 x double> %res
1717}
1718
1719define <2 x double> @test_mm_mul_sd(<2 x double> %a0, <2 x double> %a1) nounwind {
1720; X32-LABEL: test_mm_mul_sd:
1721; X32: # BB#0:
1722; X32-NEXT: mulsd %xmm1, %xmm0
1723; X32-NEXT: retl
1724;
1725; X64-LABEL: test_mm_mul_sd:
1726; X64: # BB#0:
1727; X64-NEXT: mulsd %xmm1, %xmm0
1728; X64-NEXT: retq
1729 %ext0 = extractelement <2 x double> %a0, i32 0
1730 %ext1 = extractelement <2 x double> %a1, i32 0
1731 %fmul = fmul double %ext0, %ext1
1732 %res = insertelement <2 x double> %a0, double %fmul, i32 0
1733 ret <2 x double> %res
1734}
1735
1736define <2 x i64> @test_mm_mulhi_epi16(<2 x i64> %a0, <2 x i64> %a1) {
1737; X32-LABEL: test_mm_mulhi_epi16:
1738; X32: # BB#0:
1739; X32-NEXT: pmulhw %xmm1, %xmm0
1740; X32-NEXT: retl
1741;
1742; X64-LABEL: test_mm_mulhi_epi16:
1743; X64: # BB#0:
1744; X64-NEXT: pmulhw %xmm1, %xmm0
1745; X64-NEXT: retq
1746 %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
1747 %arg1 = bitcast <2 x i64> %a1 to <8 x i16>
1748 %res = call <8 x i16> @llvm.x86.sse2.pmulh.w(<8 x i16> %arg0, <8 x i16> %arg1)
1749 %bc = bitcast <8 x i16> %res to <2 x i64>
1750 ret <2 x i64> %bc
1751}
1752declare <8 x i16> @llvm.x86.sse2.pmulh.w(<8 x i16>, <8 x i16>) nounwind readnone
1753
1754define <2 x i64> @test_mm_mulhi_epu16(<2 x i64> %a0, <2 x i64> %a1) {
1755; X32-LABEL: test_mm_mulhi_epu16:
1756; X32: # BB#0:
1757; X32-NEXT: pmulhuw %xmm1, %xmm0
1758; X32-NEXT: retl
1759;
1760; X64-LABEL: test_mm_mulhi_epu16:
1761; X64: # BB#0:
1762; X64-NEXT: pmulhuw %xmm1, %xmm0
1763; X64-NEXT: retq
1764 %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
1765 %arg1 = bitcast <2 x i64> %a1 to <8 x i16>
1766 %res = call <8 x i16> @llvm.x86.sse2.pmulhu.w(<8 x i16> %arg0, <8 x i16> %arg1)
1767 %bc = bitcast <8 x i16> %res to <2 x i64>
1768 ret <2 x i64> %bc
1769}
1770declare <8 x i16> @llvm.x86.sse2.pmulhu.w(<8 x i16>, <8 x i16>) nounwind readnone
1771
1772define <2 x i64> @test_mm_mullo_epi16(<2 x i64> %a0, <2 x i64> %a1) {
1773; X32-LABEL: test_mm_mullo_epi16:
1774; X32: # BB#0:
1775; X32-NEXT: pmullw %xmm1, %xmm0
1776; X32-NEXT: retl
1777;
1778; X64-LABEL: test_mm_mullo_epi16:
1779; X64: # BB#0:
1780; X64-NEXT: pmullw %xmm1, %xmm0
1781; X64-NEXT: retq
1782 %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
1783 %arg1 = bitcast <2 x i64> %a1 to <8 x i16>
1784 %res = mul <8 x i16> %arg0, %arg1
1785 %bc = bitcast <8 x i16> %res to <2 x i64>
1786 ret <2 x i64> %bc
1787}
1788
1789define <2 x double> @test_mm_or_pd(<2 x double> %a0, <2 x double> %a1) nounwind {
1790; X32-LABEL: test_mm_or_pd:
1791; X32: # BB#0:
1792; X32-NEXT: orps %xmm1, %xmm0
1793; X32-NEXT: retl
1794;
1795; X64-LABEL: test_mm_or_pd:
1796; X64: # BB#0:
1797; X64-NEXT: orps %xmm1, %xmm0
1798; X64-NEXT: retq
1799 %arg0 = bitcast <2 x double> %a0 to <4 x i32>
1800 %arg1 = bitcast <2 x double> %a1 to <4 x i32>
1801 %res = or <4 x i32> %arg0, %arg1
1802 %bc = bitcast <4 x i32> %res to <2 x double>
1803 ret <2 x double> %bc
1804}
1805
1806define <2 x i64> @test_mm_or_si128(<2 x i64> %a0, <2 x i64> %a1) nounwind {
1807; X32-LABEL: test_mm_or_si128:
1808; X32: # BB#0:
1809; X32-NEXT: orps %xmm1, %xmm0
1810; X32-NEXT: retl
1811;
1812; X64-LABEL: test_mm_or_si128:
1813; X64: # BB#0:
1814; X64-NEXT: orps %xmm1, %xmm0
1815; X64-NEXT: retq
1816 %res = or <2 x i64> %a0, %a1
1817 ret <2 x i64> %res
1818}
1819
1820define <2 x i64> @test_mm_packs_epi16(<2 x i64> %a0, <2 x i64> %a1) {
1821; X32-LABEL: test_mm_packs_epi16:
1822; X32: # BB#0:
1823; X32-NEXT: packsswb %xmm1, %xmm0
1824; X32-NEXT: retl
1825;
1826; X64-LABEL: test_mm_packs_epi16:
1827; X64: # BB#0:
1828; X64-NEXT: packsswb %xmm1, %xmm0
1829; X64-NEXT: retq
1830 %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
1831 %arg1 = bitcast <2 x i64> %a1 to <8 x i16>
1832 %res = call <16 x i8> @llvm.x86.sse2.packsswb.128(<8 x i16> %arg0, <8 x i16> %arg1)
1833 %bc = bitcast <16 x i8> %res to <2 x i64>
1834 ret <2 x i64> %bc
1835}
1836declare <16 x i8> @llvm.x86.sse2.packsswb.128(<8 x i16>, <8 x i16>) nounwind readnone
1837
1838define <2 x i64> @test_mm_packs_epi32(<2 x i64> %a0, <2 x i64> %a1) {
1839; X32-LABEL: test_mm_packs_epi32:
1840; X32: # BB#0:
1841; X32-NEXT: packssdw %xmm1, %xmm0
1842; X32-NEXT: retl
1843;
1844; X64-LABEL: test_mm_packs_epi32:
1845; X64: # BB#0:
1846; X64-NEXT: packssdw %xmm1, %xmm0
1847; X64-NEXT: retq
1848 %arg0 = bitcast <2 x i64> %a0 to <4 x i32>
1849 %arg1 = bitcast <2 x i64> %a1 to <4 x i32>
1850 %res = call <8 x i16> @llvm.x86.sse2.packssdw.128(<4 x i32> %arg0, <4 x i32> %arg1)
1851 %bc = bitcast <8 x i16> %res to <2 x i64>
1852 ret <2 x i64> %bc
1853}
1854declare <8 x i16> @llvm.x86.sse2.packssdw.128(<4 x i32>, <4 x i32>) nounwind readnone
1855
1856define <2 x i64> @test_mm_packus_epi16(<2 x i64> %a0, <2 x i64> %a1) {
1857; X32-LABEL: test_mm_packus_epi16:
1858; X32: # BB#0:
1859; X32-NEXT: packuswb %xmm1, %xmm0
1860; X32-NEXT: retl
1861;
1862; X64-LABEL: test_mm_packus_epi16:
1863; X64: # BB#0:
1864; X64-NEXT: packuswb %xmm1, %xmm0
1865; X64-NEXT: retq
1866 %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
1867 %arg1 = bitcast <2 x i64> %a1 to <8 x i16>
1868 %res = call <16 x i8> @llvm.x86.sse2.packuswb.128(<8 x i16> %arg0, <8 x i16> %arg1)
1869 %bc = bitcast <16 x i8> %res to <2 x i64>
1870 ret <2 x i64> %bc
1871}
1872declare <16 x i8> @llvm.x86.sse2.packuswb.128(<8 x i16>, <8 x i16>) nounwind readnone
1873
1874define void @test_mm_pause() nounwind {
1875; X32-LABEL: test_mm_pause:
1876; X32: # BB#0:
1877; X32-NEXT: pause
1878; X32-NEXT: retl
1879;
1880; X64-LABEL: test_mm_pause:
1881; X64: # BB#0:
1882; X64-NEXT: pause
1883; X64-NEXT: retq
1884 call void @llvm.x86.sse2.pause()
1885 ret void
1886}
1887declare void @llvm.x86.sse2.pause() nounwind readnone
1888
1889define <2 x i64> @test_mm_sad_epu8(<2 x i64> %a0, <2 x i64> %a1) nounwind {
1890; X32-LABEL: test_mm_sad_epu8:
1891; X32: # BB#0:
1892; X32-NEXT: psadbw %xmm1, %xmm0
1893; X32-NEXT: retl
1894;
1895; X64-LABEL: test_mm_sad_epu8:
1896; X64: # BB#0:
1897; X64-NEXT: psadbw %xmm1, %xmm0
1898; X64-NEXT: retq
1899 %arg0 = bitcast <2 x i64> %a0 to <16 x i8>
1900 %arg1 = bitcast <2 x i64> %a1 to <16 x i8>
1901 %res = call <2 x i64> @llvm.x86.sse2.psad.bw(<16 x i8> %arg0, <16 x i8> %arg1)
1902 ret <2 x i64> %res
1903}
1904declare <2 x i64> @llvm.x86.sse2.psad.bw(<16 x i8>, <16 x i8>) nounwind readnone
1905
1906define <2 x double> @test_mm_setzero_pd() {
1907; X32-LABEL: test_mm_setzero_pd:
1908; X32: # BB#0:
1909; X32-NEXT: xorps %xmm0, %xmm0
1910; X32-NEXT: retl
1911;
1912; X64-LABEL: test_mm_setzero_pd:
1913; X64: # BB#0:
1914; X64-NEXT: xorps %xmm0, %xmm0
1915; X64-NEXT: retq
1916 ret <2 x double> zeroinitializer
1917}
1918
1919define <2 x i64> @test_mm_setzero_si128() {
1920; X32-LABEL: test_mm_setzero_si128:
1921; X32: # BB#0:
1922; X32-NEXT: xorps %xmm0, %xmm0
1923; X32-NEXT: retl
1924;
1925; X64-LABEL: test_mm_setzero_si128:
1926; X64: # BB#0:
1927; X64-NEXT: xorps %xmm0, %xmm0
1928; X64-NEXT: retq
1929 ret <2 x i64> zeroinitializer
1930}
1931
1932define <2 x i64> @test_mm_shuffle_epi32(<2 x i64> %a0) {
1933; X32-LABEL: test_mm_shuffle_epi32:
1934; X32: # BB#0:
1935; X32-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
1936; X32-NEXT: retl
1937;
1938; X64-LABEL: test_mm_shuffle_epi32:
1939; X64: # BB#0:
1940; X64-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
1941; X64-NEXT: retq
1942 %arg0 = bitcast <2 x i64> %a0 to <4 x i32>
1943 %res = shufflevector <4 x i32> %arg0, <4 x i32> undef, <4 x i32> zeroinitializer
1944 %bc = bitcast <4 x i32> %res to <2 x i64>
1945 ret <2 x i64> %bc
1946}
1947
1948define <2 x double> @test_mm_shuffle_pd(<2 x double> %a0, <2 x double> %a1) {
1949; X32-LABEL: test_mm_shuffle_pd:
1950; X32: # BB#0:
1951; X32-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0]
1952; X32-NEXT: retl
1953;
1954; X64-LABEL: test_mm_shuffle_pd:
1955; X64: # BB#0:
1956; X64-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0]
1957; X64-NEXT: retq
1958 %res = shufflevector <2 x double> %a0, <2 x double> %a1, <2 x i32> <i32 1, i32 2>
1959 ret <2 x double> %res
1960}
1961
1962define <2 x i64> @test_mm_shufflehi_epi16(<2 x i64> %a0) {
1963; X32-LABEL: test_mm_shufflehi_epi16:
1964; X32: # BB#0:
1965; X32-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,4,4]
1966; X32-NEXT: retl
1967;
1968; X64-LABEL: test_mm_shufflehi_epi16:
1969; X64: # BB#0:
1970; X64-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,4,4]
1971; X64-NEXT: retq
1972 %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
1973 %res = shufflevector <8 x i16> %arg0, <8 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 4, i32 4, i32 4>
1974 %bc = bitcast <8 x i16> %res to <2 x i64>
1975 ret <2 x i64> %bc
1976}
1977
1978define <2 x i64> @test_mm_shufflelo_epi16(<2 x i64> %a0) {
1979; X32-LABEL: test_mm_shufflelo_epi16:
1980; X32: # BB#0:
1981; X32-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
1982; X32-NEXT: retl
1983;
1984; X64-LABEL: test_mm_shufflelo_epi16:
1985; X64: # BB#0:
1986; X64-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
1987; X64-NEXT: retq
1988 %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
1989 %res = shufflevector <8 x i16> %arg0, <8 x i16> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 4, i32 5, i32 6, i32 7>
1990 %bc = bitcast <8 x i16> %res to <2 x i64>
1991 ret <2 x i64> %bc
1992}
1993
1994define <2 x i64> @test_mm_sll_epi16(<2 x i64> %a0, <2 x i64> %a1) {
1995; X32-LABEL: test_mm_sll_epi16:
1996; X32: # BB#0:
1997; X32-NEXT: psllw %xmm1, %xmm0
1998; X32-NEXT: retl
1999;
2000; X64-LABEL: test_mm_sll_epi16:
2001; X64: # BB#0:
2002; X64-NEXT: psllw %xmm1, %xmm0
2003; X64-NEXT: retq
2004 %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
2005 %arg1 = bitcast <2 x i64> %a1 to <8 x i16>
2006 %res = call <8 x i16> @llvm.x86.sse2.psll.w(<8 x i16> %arg0, <8 x i16> %arg1)
2007 %bc = bitcast <8 x i16> %res to <2 x i64>
2008 ret <2 x i64> %bc
2009}
2010declare <8 x i16> @llvm.x86.sse2.psll.w(<8 x i16>, <8 x i16>) nounwind readnone
2011
2012define <2 x i64> @test_mm_sll_epi32(<2 x i64> %a0, <2 x i64> %a1) {
2013; X32-LABEL: test_mm_sll_epi32:
2014; X32: # BB#0:
2015; X32-NEXT: pslld %xmm1, %xmm0
2016; X32-NEXT: retl
2017;
2018; X64-LABEL: test_mm_sll_epi32:
2019; X64: # BB#0:
2020; X64-NEXT: pslld %xmm1, %xmm0
2021; X64-NEXT: retq
2022 %arg0 = bitcast <2 x i64> %a0 to <4 x i32>
2023 %arg1 = bitcast <2 x i64> %a1 to <4 x i32>
2024 %res = call <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32> %arg0, <4 x i32> %arg1)
2025 %bc = bitcast <4 x i32> %res to <2 x i64>
2026 ret <2 x i64> %bc
2027}
2028declare <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32>, <4 x i32>) nounwind readnone
2029
2030define <2 x i64> @test_mm_sll_epi64(<2 x i64> %a0, <2 x i64> %a1) {
2031; X32-LABEL: test_mm_sll_epi64:
2032; X32: # BB#0:
2033; X32-NEXT: psllq %xmm1, %xmm0
2034; X32-NEXT: retl
2035;
2036; X64-LABEL: test_mm_sll_epi64:
2037; X64: # BB#0:
2038; X64-NEXT: psllq %xmm1, %xmm0
2039; X64-NEXT: retq
2040 %res = call <2 x i64> @llvm.x86.sse2.psll.q(<2 x i64> %a0, <2 x i64> %a1)
2041 ret <2 x i64> %res
2042}
2043declare <2 x i64> @llvm.x86.sse2.psll.q(<2 x i64>, <2 x i64>) nounwind readnone
2044
2045define <2 x i64> @test_mm_slli_epi16(<2 x i64> %a0) {
2046; X32-LABEL: test_mm_slli_epi16:
2047; X32: # BB#0:
2048; X32-NEXT: psllw $1, %xmm0
2049; X32-NEXT: retl
2050;
2051; X64-LABEL: test_mm_slli_epi16:
2052; X64: # BB#0:
2053; X64-NEXT: psllw $1, %xmm0
2054; X64-NEXT: retq
2055 %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
2056 %res = call <8 x i16> @llvm.x86.sse2.pslli.w(<8 x i16> %arg0, i32 1)
2057 %bc = bitcast <8 x i16> %res to <2 x i64>
2058 ret <2 x i64> %bc
2059}
2060declare <8 x i16> @llvm.x86.sse2.pslli.w(<8 x i16>, i32) nounwind readnone
2061
2062define <2 x i64> @test_mm_slli_epi32(<2 x i64> %a0) {
2063; X32-LABEL: test_mm_slli_epi32:
2064; X32: # BB#0:
2065; X32-NEXT: pslld $1, %xmm0
2066; X32-NEXT: retl
2067;
2068; X64-LABEL: test_mm_slli_epi32:
2069; X64: # BB#0:
2070; X64-NEXT: pslld $1, %xmm0
2071; X64-NEXT: retq
2072 %arg0 = bitcast <2 x i64> %a0 to <4 x i32>
2073 %res = call <4 x i32> @llvm.x86.sse2.pslli.d(<4 x i32> %arg0, i32 1)
2074 %bc = bitcast <4 x i32> %res to <2 x i64>
2075 ret <2 x i64> %bc
2076}
2077declare <4 x i32> @llvm.x86.sse2.pslli.d(<4 x i32>, i32) nounwind readnone
2078
2079define <2 x i64> @test_mm_slli_epi64(<2 x i64> %a0) {
2080; X32-LABEL: test_mm_slli_epi64:
2081; X32: # BB#0:
2082; X32-NEXT: psllq $1, %xmm0
2083; X32-NEXT: retl
2084;
2085; X64-LABEL: test_mm_slli_epi64:
2086; X64: # BB#0:
2087; X64-NEXT: psllq $1, %xmm0
2088; X64-NEXT: retq
2089 %res = call <2 x i64> @llvm.x86.sse2.pslli.q(<2 x i64> %a0, i32 1)
2090 ret <2 x i64> %res
2091}
2092declare <2 x i64> @llvm.x86.sse2.pslli.q(<2 x i64>, i32) nounwind readnone
2093
2094define <2 x i64> @test_mm_slli_si128(<2 x i64> %a0) nounwind {
2095; X32-LABEL: test_mm_slli_si128:
2096; X32: # BB#0:
2097; X32-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10]
2098; X32-NEXT: retl
2099;
2100; X64-LABEL: test_mm_slli_si128:
2101; X64: # BB#0:
2102; X64-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10]
2103; X64-NEXT: retq
2104 %arg0 = bitcast <2 x i64> %a0 to <16 x i8>
2105 %res = shufflevector <16 x i8> zeroinitializer, <16 x i8> %arg0, <16 x i32> <i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26>
2106 %bc = bitcast <16 x i8> %res to <2 x i64>
2107 ret <2 x i64> %bc
2108}
2109
2110define <2 x double> @test_mm_sqrt_pd(<2 x double> %a0) nounwind {
2111; X32-LABEL: test_mm_sqrt_pd:
2112; X32: # BB#0:
2113; X32-NEXT: sqrtpd %xmm0, %xmm0
2114; X32-NEXT: retl
2115;
2116; X64-LABEL: test_mm_sqrt_pd:
2117; X64: # BB#0:
2118; X64-NEXT: sqrtpd %xmm0, %xmm0
2119; X64-NEXT: retq
2120 %res = call <2 x double> @llvm.x86.sse2.sqrt.pd(<2 x double> %a0)
2121 ret <2 x double> %res
2122}
2123declare <2 x double> @llvm.x86.sse2.sqrt.pd(<2 x double>) nounwind readnone
2124
2125define <2 x double> @test_mm_sqrt_sd(<2 x double> %a0, <2 x double> %a1) nounwind {
2126; X32-LABEL: test_mm_sqrt_sd:
2127; X32: # BB#0:
2128; X32-NEXT: sqrtsd %xmm0, %xmm1
2129; X32-NEXT: movaps %xmm1, %xmm0
2130; X32-NEXT: retl
2131;
2132; X64-LABEL: test_mm_sqrt_sd:
2133; X64: # BB#0:
2134; X64-NEXT: sqrtsd %xmm0, %xmm1
2135; X64-NEXT: movaps %xmm1, %xmm0
2136; X64-NEXT: retq
2137 %call = call <2 x double> @llvm.x86.sse2.sqrt.sd(<2 x double> %a0)
2138 %ext0 = extractelement <2 x double> %call, i32 0
2139 %ins0 = insertelement <2 x double> undef, double %ext0, i32 0
2140 %ext1 = extractelement <2 x double> %a1, i32 1
2141 %ins1 = insertelement <2 x double> %ins0, double %ext1, i32 1
2142 ret <2 x double> %ins1
2143}
2144declare <2 x double> @llvm.x86.sse2.sqrt.sd(<2 x double>) nounwind readnone
2145
2146define <2 x i64> @test_mm_sra_epi16(<2 x i64> %a0, <2 x i64> %a1) {
2147; X32-LABEL: test_mm_sra_epi16:
2148; X32: # BB#0:
2149; X32-NEXT: psraw %xmm1, %xmm0
2150; X32-NEXT: retl
2151;
2152; X64-LABEL: test_mm_sra_epi16:
2153; X64: # BB#0:
2154; X64-NEXT: psraw %xmm1, %xmm0
2155; X64-NEXT: retq
2156 %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
2157 %arg1 = bitcast <2 x i64> %a1 to <8 x i16>
2158 %res = call <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16> %arg0, <8 x i16> %arg1)
2159 %bc = bitcast <8 x i16> %res to <2 x i64>
2160 ret <2 x i64> %bc
2161}
2162declare <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16>, <8 x i16>) nounwind readnone
2163
2164define <2 x i64> @test_mm_sra_epi32(<2 x i64> %a0, <2 x i64> %a1) {
2165; X32-LABEL: test_mm_sra_epi32:
2166; X32: # BB#0:
2167; X32-NEXT: psrad %xmm1, %xmm0
2168; X32-NEXT: retl
2169;
2170; X64-LABEL: test_mm_sra_epi32:
2171; X64: # BB#0:
2172; X64-NEXT: psrad %xmm1, %xmm0
2173; X64-NEXT: retq
2174 %arg0 = bitcast <2 x i64> %a0 to <4 x i32>
2175 %arg1 = bitcast <2 x i64> %a1 to <4 x i32>
2176 %res = call <4 x i32> @llvm.x86.sse2.psra.d(<4 x i32> %arg0, <4 x i32> %arg1)
2177 %bc = bitcast <4 x i32> %res to <2 x i64>
2178 ret <2 x i64> %bc
2179}
2180declare <4 x i32> @llvm.x86.sse2.psra.d(<4 x i32>, <4 x i32>) nounwind readnone
2181
2182define <2 x i64> @test_mm_srai_epi16(<2 x i64> %a0) {
2183; X32-LABEL: test_mm_srai_epi16:
2184; X32: # BB#0:
2185; X32-NEXT: psraw $1, %xmm0
2186; X32-NEXT: retl
2187;
2188; X64-LABEL: test_mm_srai_epi16:
2189; X64: # BB#0:
2190; X64-NEXT: psraw $1, %xmm0
2191; X64-NEXT: retq
2192 %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
2193 %res = call <8 x i16> @llvm.x86.sse2.psrai.w(<8 x i16> %arg0, i32 1)
2194 %bc = bitcast <8 x i16> %res to <2 x i64>
2195 ret <2 x i64> %bc
2196}
2197declare <8 x i16> @llvm.x86.sse2.psrai.w(<8 x i16>, i32) nounwind readnone
2198
2199define <2 x i64> @test_mm_srai_epi32(<2 x i64> %a0) {
2200; X32-LABEL: test_mm_srai_epi32:
2201; X32: # BB#0:
2202; X32-NEXT: psrad $1, %xmm0
2203; X32-NEXT: retl
2204;
2205; X64-LABEL: test_mm_srai_epi32:
2206; X64: # BB#0:
2207; X64-NEXT: psrad $1, %xmm0
2208; X64-NEXT: retq
2209 %arg0 = bitcast <2 x i64> %a0 to <4 x i32>
2210 %res = call <4 x i32> @llvm.x86.sse2.psrai.d(<4 x i32> %arg0, i32 1)
2211 %bc = bitcast <4 x i32> %res to <2 x i64>
2212 ret <2 x i64> %bc
2213}
2214declare <4 x i32> @llvm.x86.sse2.psrai.d(<4 x i32>, i32) nounwind readnone
2215
2216define <2 x i64> @test_mm_srl_epi16(<2 x i64> %a0, <2 x i64> %a1) {
2217; X32-LABEL: test_mm_srl_epi16:
2218; X32: # BB#0:
2219; X32-NEXT: psrlw %xmm1, %xmm0
2220; X32-NEXT: retl
2221;
2222; X64-LABEL: test_mm_srl_epi16:
2223; X64: # BB#0:
2224; X64-NEXT: psrlw %xmm1, %xmm0
2225; X64-NEXT: retq
2226 %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
2227 %arg1 = bitcast <2 x i64> %a1 to <8 x i16>
2228 %res = call <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16> %arg0, <8 x i16> %arg1)
2229 %bc = bitcast <8 x i16> %res to <2 x i64>
2230 ret <2 x i64> %bc
2231}
2232declare <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16>, <8 x i16>) nounwind readnone
2233
2234define <2 x i64> @test_mm_srl_epi32(<2 x i64> %a0, <2 x i64> %a1) {
2235; X32-LABEL: test_mm_srl_epi32:
2236; X32: # BB#0:
2237; X32-NEXT: psrld %xmm1, %xmm0
2238; X32-NEXT: retl
2239;
2240; X64-LABEL: test_mm_srl_epi32:
2241; X64: # BB#0:
2242; X64-NEXT: psrld %xmm1, %xmm0
2243; X64-NEXT: retq
2244 %arg0 = bitcast <2 x i64> %a0 to <4 x i32>
2245 %arg1 = bitcast <2 x i64> %a1 to <4 x i32>
2246 %res = call <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32> %arg0, <4 x i32> %arg1)
2247 %bc = bitcast <4 x i32> %res to <2 x i64>
2248 ret <2 x i64> %bc
2249}
2250declare <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32>, <4 x i32>) nounwind readnone
2251
2252define <2 x i64> @test_mm_srl_epi64(<2 x i64> %a0, <2 x i64> %a1) {
2253; X32-LABEL: test_mm_srl_epi64:
2254; X32: # BB#0:
2255; X32-NEXT: psrlq %xmm1, %xmm0
2256; X32-NEXT: retl
2257;
2258; X64-LABEL: test_mm_srl_epi64:
2259; X64: # BB#0:
2260; X64-NEXT: psrlq %xmm1, %xmm0
2261; X64-NEXT: retq
2262 %res = call <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64> %a0, <2 x i64> %a1)
2263 ret <2 x i64> %res
2264}
2265declare <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64>, <2 x i64>) nounwind readnone
2266
2267define <2 x i64> @test_mm_srli_epi16(<2 x i64> %a0) {
2268; X32-LABEL: test_mm_srli_epi16:
2269; X32: # BB#0:
2270; X32-NEXT: psrlw $1, %xmm0
2271; X32-NEXT: retl
2272;
2273; X64-LABEL: test_mm_srli_epi16:
2274; X64: # BB#0:
2275; X64-NEXT: psrlw $1, %xmm0
2276; X64-NEXT: retq
2277 %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
2278 %res = call <8 x i16> @llvm.x86.sse2.psrli.w(<8 x i16> %arg0, i32 1)
2279 %bc = bitcast <8 x i16> %res to <2 x i64>
2280 ret <2 x i64> %bc
2281}
2282declare <8 x i16> @llvm.x86.sse2.psrli.w(<8 x i16>, i32) nounwind readnone
2283
2284define <2 x i64> @test_mm_srli_epi32(<2 x i64> %a0) {
2285; X32-LABEL: test_mm_srli_epi32:
2286; X32: # BB#0:
2287; X32-NEXT: psrld $1, %xmm0
2288; X32-NEXT: retl
2289;
2290; X64-LABEL: test_mm_srli_epi32:
2291; X64: # BB#0:
2292; X64-NEXT: psrld $1, %xmm0
2293; X64-NEXT: retq
2294 %arg0 = bitcast <2 x i64> %a0 to <4 x i32>
2295 %res = call <4 x i32> @llvm.x86.sse2.psrli.d(<4 x i32> %arg0, i32 1)
2296 %bc = bitcast <4 x i32> %res to <2 x i64>
2297 ret <2 x i64> %bc
2298}
2299declare <4 x i32> @llvm.x86.sse2.psrli.d(<4 x i32>, i32) nounwind readnone
2300
2301define <2 x i64> @test_mm_srli_epi64(<2 x i64> %a0) {
2302; X32-LABEL: test_mm_srli_epi64:
2303; X32: # BB#0:
2304; X32-NEXT: psrlq $1, %xmm0
2305; X32-NEXT: retl
2306;
2307; X64-LABEL: test_mm_srli_epi64:
2308; X64: # BB#0:
2309; X64-NEXT: psrlq $1, %xmm0
2310; X64-NEXT: retq
2311 %res = call <2 x i64> @llvm.x86.sse2.psrli.q(<2 x i64> %a0, i32 1)
2312 ret <2 x i64> %res
2313}
2314declare <2 x i64> @llvm.x86.sse2.psrli.q(<2 x i64>, i32) nounwind readnone
2315
2316define <2 x i64> @test_mm_srli_si128(<2 x i64> %a0) nounwind {
2317; X32-LABEL: test_mm_srli_si128:
2318; X32: # BB#0:
2319; X32-NEXT: psrldq {{.*#+}} xmm0 = xmm0[5,6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero
2320; X32-NEXT: retl
2321;
2322; X64-LABEL: test_mm_srli_si128:
2323; X64: # BB#0:
2324; X64-NEXT: psrldq {{.*#+}} xmm0 = xmm0[5,6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero
2325; X64-NEXT: retq
2326 %arg0 = bitcast <2 x i64> %a0 to <16 x i8>
2327 %res = shufflevector <16 x i8> %arg0, <16 x i8> zeroinitializer, <16 x i32> <i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20>
2328 %bc = bitcast <16 x i8> %res to <2 x i64>
2329 ret <2 x i64> %bc
2330}
2331
2332define void @test_mm_store_pd(double *%a0, <2 x double> %a1) {
2333; X32-LABEL: test_mm_store_pd:
2334; X32: # BB#0:
2335; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
2336; X32-NEXT: movaps %xmm0, (%eax)
2337; X32-NEXT: retl
2338;
2339; X64-LABEL: test_mm_store_pd:
2340; X64: # BB#0:
2341; X64-NEXT: movaps %xmm0, (%rdi)
2342; X64-NEXT: retq
2343 %arg0 = bitcast double* %a0 to <2 x double>*
2344 store <2 x double> %a1, <2 x double>* %arg0, align 16
2345 ret void
2346}
2347
2348define void @test_mm_store_sd(double *%a0, <2 x double> %a1) {
2349; X32-LABEL: test_mm_store_sd:
2350; X32: # BB#0:
2351; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
2352; X32-NEXT: movsd %xmm0, (%eax)
2353; X32-NEXT: retl
2354;
2355; X64-LABEL: test_mm_store_sd:
2356; X64: # BB#0:
2357; X64-NEXT: movsd %xmm0, (%rdi)
2358; X64-NEXT: retq
2359 %ext = extractelement <2 x double> %a1, i32 0
2360 store double %ext, double* %a0, align 1
2361 ret void
2362}
2363
2364define void @test_mm_store_si128(<2 x i64> *%a0, <2 x i64> %a1) {
2365; X32-LABEL: test_mm_store_si128:
2366; X32: # BB#0:
2367; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
2368; X32-NEXT: movaps %xmm0, (%eax)
2369; X32-NEXT: retl
2370;
2371; X64-LABEL: test_mm_store_si128:
2372; X64: # BB#0:
2373; X64-NEXT: movaps %xmm0, (%rdi)
2374; X64-NEXT: retq
2375 store <2 x i64> %a1, <2 x i64>* %a0, align 16
2376 ret void
2377}
2378
2379define void @test_mm_store1_sd(double *%a0, <2 x double> %a1) {
2380; X32-LABEL: test_mm_store1_sd:
2381; X32: # BB#0:
2382; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
2383; X32-NEXT: movsd %xmm0, (%eax)
2384; X32-NEXT: movsd %xmm0, 8(%eax)
2385; X32-NEXT: retl
2386;
2387; X64-LABEL: test_mm_store1_sd:
2388; X64: # BB#0:
2389; X64-NEXT: movsd %xmm0, (%rdi)
2390; X64-NEXT: movsd %xmm0, 8(%rdi)
2391; X64-NEXT: retq
2392 %ext = extractelement <2 x double> %a1, i32 0
2393 %ptr0 = getelementptr inbounds double, double* %a0, i32 0
2394 %ptr1 = getelementptr inbounds double, double* %a0, i32 1
2395 store double %ext, double* %ptr0, align 1
2396 store double %ext, double* %ptr1, align 1
2397 ret void
2398}
2399
2400define void @test_mm_storeh_sd(double *%a0, <2 x double> %a1) {
2401; X32-LABEL: test_mm_storeh_sd:
2402; X32: # BB#0:
2403; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
2404; X32-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1,0]
2405; X32-NEXT: movsd %xmm0, (%eax)
2406; X32-NEXT: retl
2407;
2408; X64-LABEL: test_mm_storeh_sd:
2409; X64: # BB#0:
2410; X64-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1,0]
2411; X64-NEXT: movsd %xmm0, (%rdi)
2412; X64-NEXT: retq
2413 %ext = extractelement <2 x double> %a1, i32 1
2414 store double %ext, double* %a0, align 8
2415 ret void
2416}
2417
2418define void @test_mm_storel_epi64(<2 x i64> *%a0, <2 x i64> %a1) {
2419; X32-LABEL: test_mm_storel_epi64:
2420; X32: # BB#0:
2421; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
2422; X32-NEXT: movlps %xmm0, (%eax)
2423; X32-NEXT: retl
2424;
2425; X64-LABEL: test_mm_storel_epi64:
2426; X64: # BB#0:
2427; X64-NEXT: movd %xmm0, %rax
2428; X64-NEXT: movq %rax, (%rdi)
2429; X64-NEXT: retq
2430 %ext = extractelement <2 x i64> %a1, i32 0
2431 %bc = bitcast <2 x i64> *%a0 to i64*
2432 store i64 %ext, i64* %bc, align 8
2433 ret void
2434}
2435
2436define void @test_mm_storel_sd(double *%a0, <2 x double> %a1) {
2437; X32-LABEL: test_mm_storel_sd:
2438; X32: # BB#0:
2439; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
2440; X32-NEXT: movsd %xmm0, (%eax)
2441; X32-NEXT: retl
2442;
2443; X64-LABEL: test_mm_storel_sd:
2444; X64: # BB#0:
2445; X64-NEXT: movsd %xmm0, (%rdi)
2446; X64-NEXT: retq
2447 %ext = extractelement <2 x double> %a1, i32 0
2448 store double %ext, double* %a0, align 8
2449 ret void
2450}
2451
2452define void @test_mm_storer_pd(double *%a0, <2 x double> %a1) {
2453; X32-LABEL: test_mm_storer_pd:
2454; X32: # BB#0:
2455; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
2456; X32-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1,0]
2457; X32-NEXT: movapd %xmm0, (%eax)
2458; X32-NEXT: retl
2459;
2460; X64-LABEL: test_mm_storer_pd:
2461; X64: # BB#0:
2462; X64-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1,0]
2463; X64-NEXT: movapd %xmm0, (%rdi)
2464; X64-NEXT: retq
2465 %arg0 = bitcast double* %a0 to <2 x double>*
2466 %shuf = shufflevector <2 x double> %a1, <2 x double> undef, <2 x i32> <i32 1, i32 0>
2467 store <2 x double> %shuf, <2 x double>* %arg0, align 16
2468 ret void
2469}
2470
2471define void @test_mm_storeu_pd(double *%a0, <2 x double> %a1) {
2472; X32-LABEL: test_mm_storeu_pd:
2473; X32: # BB#0:
2474; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
2475; X32-NEXT: movups %xmm0, (%eax)
2476; X32-NEXT: retl
2477;
2478; X64-LABEL: test_mm_storeu_pd:
2479; X64: # BB#0:
2480; X64-NEXT: movups %xmm0, (%rdi)
2481; X64-NEXT: retq
2482 %arg0 = bitcast double* %a0 to <2 x double>*
2483 store <2 x double> %a1, <2 x double>* %arg0, align 1
2484 ret void
2485}
2486
2487define void @test_mm_storeu_si128(<2 x i64> *%a0, <2 x i64> %a1) {
2488; X32-LABEL: test_mm_storeu_si128:
2489; X32: # BB#0:
2490; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
2491; X32-NEXT: movups %xmm0, (%eax)
2492; X32-NEXT: retl
2493;
2494; X64-LABEL: test_mm_storeu_si128:
2495; X64: # BB#0:
2496; X64-NEXT: movups %xmm0, (%rdi)
2497; X64-NEXT: retq
2498 store <2 x i64> %a1, <2 x i64>* %a0, align 1
2499 ret void
2500}
2501
2502define void @test_mm_stream_pd(double *%a0, <2 x double> %a1) {
2503; X32-LABEL: test_mm_stream_pd:
2504; X32: # BB#0:
2505; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
2506; X32-NEXT: movntps %xmm0, (%eax)
2507; X32-NEXT: retl
2508;
2509; X64-LABEL: test_mm_stream_pd:
2510; X64: # BB#0:
2511; X64-NEXT: movntps %xmm0, (%rdi)
2512; X64-NEXT: retq
2513 %arg0 = bitcast double* %a0 to <2 x double>*
2514 store <2 x double> %a1, <2 x double>* %arg0, align 16, !nontemporal !0
2515 ret void
2516}
2517
2518define void @test_mm_stream_si32(i32 *%a0, i32 %a1) {
2519; X32-LABEL: test_mm_stream_si32:
2520; X32: # BB#0:
2521; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
2522; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
2523; X32-NEXT: movntil %eax, (%ecx)
2524; X32-NEXT: retl
2525;
2526; X64-LABEL: test_mm_stream_si32:
2527; X64: # BB#0:
2528; X64-NEXT: movntil %esi, (%rdi)
2529; X64-NEXT: retq
2530 store i32 %a1, i32* %a0, align 1, !nontemporal !0
2531 ret void
2532}
2533
2534define void @test_mm_stream_si128(<2 x i64> *%a0, <2 x i64> %a1) {
2535; X32-LABEL: test_mm_stream_si128:
2536; X32: # BB#0:
2537; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
2538; X32-NEXT: movntps %xmm0, (%eax)
2539; X32-NEXT: retl
2540;
2541; X64-LABEL: test_mm_stream_si128:
2542; X64: # BB#0:
2543; X64-NEXT: movntps %xmm0, (%rdi)
2544; X64-NEXT: retq
2545 store <2 x i64> %a1, <2 x i64>* %a0, align 16, !nontemporal !0
2546 ret void
2547}
2548
2549define <2 x i64> @test_mm_sub_epi8(<2 x i64> %a0, <2 x i64> %a1) nounwind {
2550; X32-LABEL: test_mm_sub_epi8:
2551; X32: # BB#0:
2552; X32-NEXT: psubb %xmm1, %xmm0
2553; X32-NEXT: retl
2554;
2555; X64-LABEL: test_mm_sub_epi8:
2556; X64: # BB#0:
2557; X64-NEXT: psubb %xmm1, %xmm0
2558; X64-NEXT: retq
2559 %arg0 = bitcast <2 x i64> %a0 to <16 x i8>
2560 %arg1 = bitcast <2 x i64> %a1 to <16 x i8>
2561 %res = sub <16 x i8> %arg0, %arg1
2562 %bc = bitcast <16 x i8> %res to <2 x i64>
2563 ret <2 x i64> %bc
2564}
2565
2566define <2 x i64> @test_mm_sub_epi16(<2 x i64> %a0, <2 x i64> %a1) nounwind {
2567; X32-LABEL: test_mm_sub_epi16:
2568; X32: # BB#0:
2569; X32-NEXT: psubw %xmm1, %xmm0
2570; X32-NEXT: retl
2571;
2572; X64-LABEL: test_mm_sub_epi16:
2573; X64: # BB#0:
2574; X64-NEXT: psubw %xmm1, %xmm0
2575; X64-NEXT: retq
2576 %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
2577 %arg1 = bitcast <2 x i64> %a1 to <8 x i16>
2578 %res = sub <8 x i16> %arg0, %arg1
2579 %bc = bitcast <8 x i16> %res to <2 x i64>
2580 ret <2 x i64> %bc
2581}
2582
2583define <2 x i64> @test_mm_sub_epi32(<2 x i64> %a0, <2 x i64> %a1) nounwind {
2584; X32-LABEL: test_mm_sub_epi32:
2585; X32: # BB#0:
2586; X32-NEXT: psubd %xmm1, %xmm0
2587; X32-NEXT: retl
2588;
2589; X64-LABEL: test_mm_sub_epi32:
2590; X64: # BB#0:
2591; X64-NEXT: psubd %xmm1, %xmm0
2592; X64-NEXT: retq
2593 %arg0 = bitcast <2 x i64> %a0 to <4 x i32>
2594 %arg1 = bitcast <2 x i64> %a1 to <4 x i32>
2595 %res = sub <4 x i32> %arg0, %arg1
2596 %bc = bitcast <4 x i32> %res to <2 x i64>
2597 ret <2 x i64> %bc
2598}
2599
2600define <2 x i64> @test_mm_sub_epi64(<2 x i64> %a0, <2 x i64> %a1) nounwind {
2601; X32-LABEL: test_mm_sub_epi64:
2602; X32: # BB#0:
2603; X32-NEXT: psubq %xmm1, %xmm0
2604; X32-NEXT: retl
2605;
2606; X64-LABEL: test_mm_sub_epi64:
2607; X64: # BB#0:
2608; X64-NEXT: psubq %xmm1, %xmm0
2609; X64-NEXT: retq
2610 %res = sub <2 x i64> %a0, %a1
2611 ret <2 x i64> %res
2612}
2613
2614define <2 x double> @test_mm_sub_pd(<2 x double> %a0, <2 x double> %a1) nounwind {
2615; X32-LABEL: test_mm_sub_pd:
2616; X32: # BB#0:
2617; X32-NEXT: subpd %xmm1, %xmm0
2618; X32-NEXT: retl
2619;
2620; X64-LABEL: test_mm_sub_pd:
2621; X64: # BB#0:
2622; X64-NEXT: subpd %xmm1, %xmm0
2623; X64-NEXT: retq
2624 %res = fsub <2 x double> %a0, %a1
2625 ret <2 x double> %res
2626}
2627
2628define <2 x double> @test_mm_sub_sd(<2 x double> %a0, <2 x double> %a1) nounwind {
2629; X32-LABEL: test_mm_sub_sd:
2630; X32: # BB#0:
2631; X32-NEXT: subsd %xmm1, %xmm0
2632; X32-NEXT: retl
2633;
2634; X64-LABEL: test_mm_sub_sd:
2635; X64: # BB#0:
2636; X64-NEXT: subsd %xmm1, %xmm0
2637; X64-NEXT: retq
2638 %ext0 = extractelement <2 x double> %a0, i32 0
2639 %ext1 = extractelement <2 x double> %a1, i32 0
2640 %fsub = fsub double %ext0, %ext1
2641 %res = insertelement <2 x double> %a0, double %fsub, i32 0
2642 ret <2 x double> %res
2643}
2644
2645define <2 x i64> @test_mm_subs_epi8(<2 x i64> %a0, <2 x i64> %a1) nounwind {
2646; X32-LABEL: test_mm_subs_epi8:
2647; X32: # BB#0:
2648; X32-NEXT: psubsb %xmm1, %xmm0
2649; X32-NEXT: retl
2650;
2651; X64-LABEL: test_mm_subs_epi8:
2652; X64: # BB#0:
2653; X64-NEXT: psubsb %xmm1, %xmm0
2654; X64-NEXT: retq
2655 %arg0 = bitcast <2 x i64> %a0 to <16 x i8>
2656 %arg1 = bitcast <2 x i64> %a1 to <16 x i8>
2657 %res = call <16 x i8> @llvm.x86.sse2.psubs.b(<16 x i8> %arg0, <16 x i8> %arg1)
2658 %bc = bitcast <16 x i8> %res to <2 x i64>
2659 ret <2 x i64> %bc
2660}
2661declare <16 x i8> @llvm.x86.sse2.psubs.b(<16 x i8>, <16 x i8>) nounwind readnone
2662
2663define <2 x i64> @test_mm_subs_epi16(<2 x i64> %a0, <2 x i64> %a1) nounwind {
2664; X32-LABEL: test_mm_subs_epi16:
2665; X32: # BB#0:
2666; X32-NEXT: psubsw %xmm1, %xmm0
2667; X32-NEXT: retl
2668;
2669; X64-LABEL: test_mm_subs_epi16:
2670; X64: # BB#0:
2671; X64-NEXT: psubsw %xmm1, %xmm0
2672; X64-NEXT: retq
2673 %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
2674 %arg1 = bitcast <2 x i64> %a1 to <8 x i16>
2675 %res = call <8 x i16> @llvm.x86.sse2.psubs.w(<8 x i16> %arg0, <8 x i16> %arg1)
2676 %bc = bitcast <8 x i16> %res to <2 x i64>
2677 ret <2 x i64> %bc
2678}
2679declare <8 x i16> @llvm.x86.sse2.psubs.w(<8 x i16>, <8 x i16>) nounwind readnone
2680
2681define <2 x i64> @test_mm_subs_epu8(<2 x i64> %a0, <2 x i64> %a1) nounwind {
2682; X32-LABEL: test_mm_subs_epu8:
2683; X32: # BB#0:
2684; X32-NEXT: psubusb %xmm1, %xmm0
2685; X32-NEXT: retl
2686;
2687; X64-LABEL: test_mm_subs_epu8:
2688; X64: # BB#0:
2689; X64-NEXT: psubusb %xmm1, %xmm0
2690; X64-NEXT: retq
2691 %arg0 = bitcast <2 x i64> %a0 to <16 x i8>
2692 %arg1 = bitcast <2 x i64> %a1 to <16 x i8>
2693 %res = call <16 x i8> @llvm.x86.sse2.psubus.b(<16 x i8> %arg0, <16 x i8> %arg1)
2694 %bc = bitcast <16 x i8> %res to <2 x i64>
2695 ret <2 x i64> %bc
2696}
2697declare <16 x i8> @llvm.x86.sse2.psubus.b(<16 x i8>, <16 x i8>) nounwind readnone
2698
2699define <2 x i64> @test_mm_subs_epu16(<2 x i64> %a0, <2 x i64> %a1) nounwind {
2700; X32-LABEL: test_mm_subs_epu16:
2701; X32: # BB#0:
2702; X32-NEXT: psubusw %xmm1, %xmm0
2703; X32-NEXT: retl
2704;
2705; X64-LABEL: test_mm_subs_epu16:
2706; X64: # BB#0:
2707; X64-NEXT: psubusw %xmm1, %xmm0
2708; X64-NEXT: retq
2709 %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
2710 %arg1 = bitcast <2 x i64> %a1 to <8 x i16>
2711 %res = call <8 x i16> @llvm.x86.sse2.psubus.w(<8 x i16> %arg0, <8 x i16> %arg1)
2712 %bc = bitcast <8 x i16> %res to <2 x i64>
2713 ret <2 x i64> %bc
2714}
2715declare <8 x i16> @llvm.x86.sse2.psubus.w(<8 x i16>, <8 x i16>) nounwind readnone
2716
2717define i32 @test_mm_ucomieq_sd(<2 x double> %a0, <2 x double> %a1) nounwind {
2718; X32-LABEL: test_mm_ucomieq_sd:
2719; X32: # BB#0:
2720; X32-NEXT: ucomisd %xmm1, %xmm0
2721; X32-NEXT: setnp %al
2722; X32-NEXT: sete %cl
2723; X32-NEXT: andb %al, %cl
2724; X32-NEXT: movzbl %cl, %eax
2725; X32-NEXT: retl
2726;
2727; X64-LABEL: test_mm_ucomieq_sd:
2728; X64: # BB#0:
2729; X64-NEXT: ucomisd %xmm1, %xmm0
2730; X64-NEXT: setnp %al
2731; X64-NEXT: sete %cl
2732; X64-NEXT: andb %al, %cl
2733; X64-NEXT: movzbl %cl, %eax
2734; X64-NEXT: retq
2735 %res = call i32 @llvm.x86.sse2.ucomieq.sd(<2 x double> %a0, <2 x double> %a1)
2736 ret i32 %res
2737}
2738declare i32 @llvm.x86.sse2.ucomieq.sd(<2 x double>, <2 x double>) nounwind readnone
2739
2740define i32 @test_mm_ucomige_sd(<2 x double> %a0, <2 x double> %a1) nounwind {
2741; X32-LABEL: test_mm_ucomige_sd:
2742; X32: # BB#0:
2743; X32-NEXT: ucomisd %xmm1, %xmm0
2744; X32-NEXT: setae %al
2745; X32-NEXT: movzbl %al, %eax
2746; X32-NEXT: retl
2747;
2748; X64-LABEL: test_mm_ucomige_sd:
2749; X64: # BB#0:
2750; X64-NEXT: ucomisd %xmm1, %xmm0
2751; X64-NEXT: setae %al
2752; X64-NEXT: movzbl %al, %eax
2753; X64-NEXT: retq
2754 %res = call i32 @llvm.x86.sse2.ucomige.sd(<2 x double> %a0, <2 x double> %a1)
2755 ret i32 %res
2756}
2757declare i32 @llvm.x86.sse2.ucomige.sd(<2 x double>, <2 x double>) nounwind readnone
2758
2759define i32 @test_mm_ucomigt_sd(<2 x double> %a0, <2 x double> %a1) nounwind {
2760; X32-LABEL: test_mm_ucomigt_sd:
2761; X32: # BB#0:
2762; X32-NEXT: ucomisd %xmm1, %xmm0
2763; X32-NEXT: seta %al
2764; X32-NEXT: movzbl %al, %eax
2765; X32-NEXT: retl
2766;
2767; X64-LABEL: test_mm_ucomigt_sd:
2768; X64: # BB#0:
2769; X64-NEXT: ucomisd %xmm1, %xmm0
2770; X64-NEXT: seta %al
2771; X64-NEXT: movzbl %al, %eax
2772; X64-NEXT: retq
2773 %res = call i32 @llvm.x86.sse2.ucomigt.sd(<2 x double> %a0, <2 x double> %a1)
2774 ret i32 %res
2775}
2776declare i32 @llvm.x86.sse2.ucomigt.sd(<2 x double>, <2 x double>) nounwind readnone
2777
2778define i32 @test_mm_ucomile_sd(<2 x double> %a0, <2 x double> %a1) nounwind {
2779; X32-LABEL: test_mm_ucomile_sd:
2780; X32: # BB#0:
2781; X32-NEXT: ucomisd %xmm0, %xmm1
2782; X32-NEXT: setae %al
2783; X32-NEXT: movzbl %al, %eax
2784; X32-NEXT: retl
2785;
2786; X64-LABEL: test_mm_ucomile_sd:
2787; X64: # BB#0:
2788; X64-NEXT: ucomisd %xmm0, %xmm1
2789; X64-NEXT: setae %al
2790; X64-NEXT: movzbl %al, %eax
2791; X64-NEXT: retq
2792 %res = call i32 @llvm.x86.sse2.ucomile.sd(<2 x double> %a0, <2 x double> %a1)
2793 ret i32 %res
2794}
2795declare i32 @llvm.x86.sse2.ucomile.sd(<2 x double>, <2 x double>) nounwind readnone
2796
2797define i32 @test_mm_ucomilt_sd(<2 x double> %a0, <2 x double> %a1) nounwind {
2798; X32-LABEL: test_mm_ucomilt_sd:
2799; X32: # BB#0:
2800; X32-NEXT: ucomisd %xmm0, %xmm1
2801; X32-NEXT: seta %al
2802; X32-NEXT: movzbl %al, %eax
2803; X32-NEXT: retl
2804;
2805; X64-LABEL: test_mm_ucomilt_sd:
2806; X64: # BB#0:
2807; X64-NEXT: ucomisd %xmm0, %xmm1
2808; X64-NEXT: seta %al
2809; X64-NEXT: movzbl %al, %eax
2810; X64-NEXT: retq
2811 %res = call i32 @llvm.x86.sse2.ucomilt.sd(<2 x double> %a0, <2 x double> %a1)
2812 ret i32 %res
2813}
2814declare i32 @llvm.x86.sse2.ucomilt.sd(<2 x double>, <2 x double>) nounwind readnone
2815
2816define i32 @test_mm_ucomineq_sd(<2 x double> %a0, <2 x double> %a1) nounwind {
2817; X32-LABEL: test_mm_ucomineq_sd:
2818; X32: # BB#0:
2819; X32-NEXT: ucomisd %xmm1, %xmm0
2820; X32-NEXT: setp %al
2821; X32-NEXT: setne %cl
2822; X32-NEXT: orb %al, %cl
2823; X32-NEXT: movzbl %cl, %eax
2824; X32-NEXT: retl
2825;
2826; X64-LABEL: test_mm_ucomineq_sd:
2827; X64: # BB#0:
2828; X64-NEXT: ucomisd %xmm1, %xmm0
2829; X64-NEXT: setp %al
2830; X64-NEXT: setne %cl
2831; X64-NEXT: orb %al, %cl
2832; X64-NEXT: movzbl %cl, %eax
2833; X64-NEXT: retq
2834 %res = call i32 @llvm.x86.sse2.ucomineq.sd(<2 x double> %a0, <2 x double> %a1)
2835 ret i32 %res
2836}
2837declare i32 @llvm.x86.sse2.ucomineq.sd(<2 x double>, <2 x double>) nounwind readnone
2838
2839define <2 x double> @test_mm_undefined_pd() {
2840; X32-LABEL: test_mm_undefined_pd:
2841; X32: # BB#0:
2842; X32-NEXT: retl
2843;
2844; X64-LABEL: test_mm_undefined_pd:
2845; X64: # BB#0:
2846; X64-NEXT: retq
2847 ret <2 x double> undef
2848}
2849
2850define <2 x i64> @test_mm_undefined_si128() {
2851; X32-LABEL: test_mm_undefined_si128:
2852; X32: # BB#0:
2853; X32-NEXT: retl
2854;
2855; X64-LABEL: test_mm_undefined_si128:
2856; X64: # BB#0:
2857; X64-NEXT: retq
2858 ret <2 x i64> undef
2859}
2860
2861define <2 x i64> @test_mm_unpackhi_epi8(<2 x i64> %a0, <2 x i64> %a1) {
2862; X32-LABEL: test_mm_unpackhi_epi8:
2863; X32: # BB#0:
2864; X32-NEXT: punpckhbw {{.*#+}} xmm0 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15]
2865; X32-NEXT: retl
2866;
2867; X64-LABEL: test_mm_unpackhi_epi8:
2868; X64: # BB#0:
2869; X64-NEXT: punpckhbw {{.*#+}} xmm0 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15]
2870; X64-NEXT: retq
2871 %arg0 = bitcast <2 x i64> %a0 to <16 x i8>
2872 %arg1 = bitcast <2 x i64> %a1 to <16 x i8>
2873 %res = shufflevector <16 x i8> %arg0, <16 x i8> %arg1, <16 x i32> <i32 8, i32 24, i32 9, i32 25, i32 10, i32 26, i32 11, i32 27, i32 12, i32 28, i32 13, i32 29, i32 14, i32 30, i32 15, i32 31>
2874 %bc = bitcast <16 x i8> %res to <2 x i64>
2875 ret <2 x i64> %bc
2876}
2877
2878define <2 x i64> @test_mm_unpackhi_epi16(<2 x i64> %a0, <2 x i64> %a1) {
2879; X32-LABEL: test_mm_unpackhi_epi16:
2880; X32: # BB#0:
2881; X32-NEXT: punpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
2882; X32-NEXT: retl
2883;
2884; X64-LABEL: test_mm_unpackhi_epi16:
2885; X64: # BB#0:
2886; X64-NEXT: punpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
2887; X64-NEXT: retq
2888 %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
2889 %arg1 = bitcast <2 x i64> %a1 to <8 x i16>
2890 %res = shufflevector <8 x i16> %arg0, <8 x i16> %arg1, <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
2891 %bc = bitcast <8 x i16> %res to <2 x i64>
2892 ret <2 x i64> %bc
2893}
2894
2895define <2 x i64> @test_mm_unpackhi_epi32(<2 x i64> %a0, <2 x i64> %a1) {
2896; X32-LABEL: test_mm_unpackhi_epi32:
2897; X32: # BB#0:
2898; X32-NEXT: punpckhdq {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
2899; X32-NEXT: retl
2900;
2901; X64-LABEL: test_mm_unpackhi_epi32:
2902; X64: # BB#0:
2903; X64-NEXT: punpckhdq {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
2904; X64-NEXT: retq
2905 %arg0 = bitcast <2 x i64> %a0 to <4 x i32>
2906 %arg1 = bitcast <2 x i64> %a1 to <4 x i32>
2907 %res = shufflevector <4 x i32> %arg0,<4 x i32> %arg1, <4 x i32> <i32 2, i32 6, i32 3, i32 7>
2908 %bc = bitcast <4 x i32> %res to <2 x i64>
2909 ret <2 x i64> %bc
2910}
2911
2912define <2 x i64> @test_mm_unpackhi_epi64(<2 x i64> %a0, <2 x i64> %a1) {
2913; X32-LABEL: test_mm_unpackhi_epi64:
2914; X32: # BB#0:
2915; X32-NEXT: punpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1]
2916; X32-NEXT: retl
2917;
2918; X64-LABEL: test_mm_unpackhi_epi64:
2919; X64: # BB#0:
2920; X64-NEXT: punpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1]
2921; X64-NEXT: retq
2922 %res = shufflevector <2 x i64> %a0, <2 x i64> %a1, <2 x i32> <i32 1, i32 3>
2923 ret <2 x i64> %res
2924}
2925
2926define <2 x double> @test_mm_unpackhi_pd(<2 x double> %a0, <2 x double> %a1) {
2927; X32-LABEL: test_mm_unpackhi_pd:
2928; X32: # BB#0:
2929; X32-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1]
2930; X32-NEXT: retl
2931;
2932; X64-LABEL: test_mm_unpackhi_pd:
2933; X64: # BB#0:
2934; X64-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1]
2935; X64-NEXT: retq
2936 %res = shufflevector <2 x double> %a0, <2 x double> %a1, <2 x i32> <i32 1, i32 3>
2937 ret <2 x double> %res
2938}
2939
2940define <2 x i64> @test_mm_unpacklo_epi8(<2 x i64> %a0, <2 x i64> %a1) {
2941; X32-LABEL: test_mm_unpacklo_epi8:
2942; X32: # BB#0:
2943; X32-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
2944; X32-NEXT: retl
2945;
2946; X64-LABEL: test_mm_unpacklo_epi8:
2947; X64: # BB#0:
2948; X64-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
2949; X64-NEXT: retq
2950 %arg0 = bitcast <2 x i64> %a0 to <16 x i8>
2951 %arg1 = bitcast <2 x i64> %a1 to <16 x i8>
2952 %res = shufflevector <16 x i8> %arg0, <16 x i8> %arg1, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19, i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23>
2953 %bc = bitcast <16 x i8> %res to <2 x i64>
2954 ret <2 x i64> %bc
2955}
2956
2957define <2 x i64> @test_mm_unpacklo_epi16(<2 x i64> %a0, <2 x i64> %a1) {
2958; X32-LABEL: test_mm_unpacklo_epi16:
2959; X32: # BB#0:
2960; X32-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
2961; X32-NEXT: retl
2962;
2963; X64-LABEL: test_mm_unpacklo_epi16:
2964; X64: # BB#0:
2965; X64-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
2966; X64-NEXT: retq
2967 %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
2968 %arg1 = bitcast <2 x i64> %a1 to <8 x i16>
2969 %res = shufflevector <8 x i16> %arg0, <8 x i16> %arg1, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
2970 %bc = bitcast <8 x i16> %res to <2 x i64>
2971 ret <2 x i64> %bc
2972}
2973
2974define <2 x i64> @test_mm_unpacklo_epi32(<2 x i64> %a0, <2 x i64> %a1) {
2975; X32-LABEL: test_mm_unpacklo_epi32:
2976; X32: # BB#0:
2977; X32-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
2978; X32-NEXT: retl
2979;
2980; X64-LABEL: test_mm_unpacklo_epi32:
2981; X64: # BB#0:
2982; X64-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
2983; X64-NEXT: retq
2984 %arg0 = bitcast <2 x i64> %a0 to <4 x i32>
2985 %arg1 = bitcast <2 x i64> %a1 to <4 x i32>
2986 %res = shufflevector <4 x i32> %arg0,<4 x i32> %arg1, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
2987 %bc = bitcast <4 x i32> %res to <2 x i64>
2988 ret <2 x i64> %bc
2989}
2990
2991define <2 x i64> @test_mm_unpacklo_epi64(<2 x i64> %a0, <2 x i64> %a1) {
2992; X32-LABEL: test_mm_unpacklo_epi64:
2993; X32: # BB#0:
2994; X32-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
2995; X32-NEXT: retl
2996;
2997; X64-LABEL: test_mm_unpacklo_epi64:
2998; X64: # BB#0:
2999; X64-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
3000; X64-NEXT: retq
3001 %res = shufflevector <2 x i64> %a0, <2 x i64> %a1, <2 x i32> <i32 0, i32 2>
3002 ret <2 x i64> %res
3003}
3004
3005define <2 x double> @test_mm_unpacklo_pd(<2 x double> %a0, <2 x double> %a1) {
3006; X32-LABEL: test_mm_unpacklo_pd:
3007; X32: # BB#0:
3008; X32-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
3009; X32-NEXT: retl
3010;
3011; X64-LABEL: test_mm_unpacklo_pd:
3012; X64: # BB#0:
3013; X64-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
3014; X64-NEXT: retq
3015 %res = shufflevector <2 x double> %a0, <2 x double> %a1, <2 x i32> <i32 0, i32 2>
3016 ret <2 x double> %res
3017}
3018
3019define <2 x double> @test_mm_xor_pd(<2 x double> %a0, <2 x double> %a1) nounwind {
3020; X32-LABEL: test_mm_xor_pd:
3021; X32: # BB#0:
3022; X32-NEXT: xorps %xmm1, %xmm0
3023; X32-NEXT: retl
3024;
3025; X64-LABEL: test_mm_xor_pd:
3026; X64: # BB#0:
3027; X64-NEXT: xorps %xmm1, %xmm0
3028; X64-NEXT: retq
3029 %arg0 = bitcast <2 x double> %a0 to <4 x i32>
3030 %arg1 = bitcast <2 x double> %a1 to <4 x i32>
3031 %res = xor <4 x i32> %arg0, %arg1
3032 %bc = bitcast <4 x i32> %res to <2 x double>
3033 ret <2 x double> %bc
3034}
3035
3036define <2 x i64> @test_mm_xor_si128(<2 x i64> %a0, <2 x i64> %a1) nounwind {
3037; X32-LABEL: test_mm_xor_si128:
3038; X32: # BB#0:
3039; X32-NEXT: xorps %xmm1, %xmm0
3040; X32-NEXT: retl
3041;
3042; X64-LABEL: test_mm_xor_si128:
3043; X64: # BB#0:
3044; X64-NEXT: xorps %xmm1, %xmm0
3045; X64-NEXT: retq
3046 %res = xor <2 x i64> %a0, %a1
3047 ret <2 x i64> %res
3048}
3049
3050!0 = !{i32 1}
3051