blob: 9d1ab922d964f2dacb00d0a412286803b2f6d515 [file] [log] [blame]
Simon Pilgrim5a0d7282016-05-18 18:00:43 +00001; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -fast-isel -mtriple=i386-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefix=ALL --check-prefix=X32
3; RUN: llc < %s -fast-isel -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefix=ALL --check-prefix=X64
4
5; NOTE: This should use IR equivalent to what is generated by clang/test/CodeGen/sse2-builtins.c
6
7define <2 x i64> @test_mm_add_epi8(<2 x i64> %a0, <2 x i64> %a1) nounwind {
8; X32-LABEL: test_mm_add_epi8:
9; X32: # BB#0:
10; X32-NEXT: paddb %xmm1, %xmm0
11; X32-NEXT: retl
12;
13; X64-LABEL: test_mm_add_epi8:
14; X64: # BB#0:
15; X64-NEXT: paddb %xmm1, %xmm0
16; X64-NEXT: retq
17 %arg0 = bitcast <2 x i64> %a0 to <16 x i8>
18 %arg1 = bitcast <2 x i64> %a1 to <16 x i8>
19 %res = add <16 x i8> %arg0, %arg1
20 %bc = bitcast <16 x i8> %res to <2 x i64>
21 ret <2 x i64> %bc
22}
23
24define <2 x i64> @test_mm_add_epi16(<2 x i64> %a0, <2 x i64> %a1) nounwind {
25; X32-LABEL: test_mm_add_epi16:
26; X32: # BB#0:
27; X32-NEXT: paddw %xmm1, %xmm0
28; X32-NEXT: retl
29;
30; X64-LABEL: test_mm_add_epi16:
31; X64: # BB#0:
32; X64-NEXT: paddw %xmm1, %xmm0
33; X64-NEXT: retq
34 %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
35 %arg1 = bitcast <2 x i64> %a1 to <8 x i16>
36 %res = add <8 x i16> %arg0, %arg1
37 %bc = bitcast <8 x i16> %res to <2 x i64>
38 ret <2 x i64> %bc
39}
40
41define <2 x i64> @test_mm_add_epi32(<2 x i64> %a0, <2 x i64> %a1) nounwind {
42; X32-LABEL: test_mm_add_epi32:
43; X32: # BB#0:
44; X32-NEXT: paddd %xmm1, %xmm0
45; X32-NEXT: retl
46;
47; X64-LABEL: test_mm_add_epi32:
48; X64: # BB#0:
49; X64-NEXT: paddd %xmm1, %xmm0
50; X64-NEXT: retq
51 %arg0 = bitcast <2 x i64> %a0 to <4 x i32>
52 %arg1 = bitcast <2 x i64> %a1 to <4 x i32>
53 %res = add <4 x i32> %arg0, %arg1
54 %bc = bitcast <4 x i32> %res to <2 x i64>
55 ret <2 x i64> %bc
56}
57
58define <2 x i64> @test_mm_add_epi64(<2 x i64> %a0, <2 x i64> %a1) nounwind {
59; X32-LABEL: test_mm_add_epi64:
60; X32: # BB#0:
61; X32-NEXT: paddq %xmm1, %xmm0
62; X32-NEXT: retl
63;
64; X64-LABEL: test_mm_add_epi64:
65; X64: # BB#0:
66; X64-NEXT: paddq %xmm1, %xmm0
67; X64-NEXT: retq
68 %res = add <2 x i64> %a0, %a1
69 ret <2 x i64> %res
70}
71
72define <2 x double> @test_mm_add_pd(<2 x double> %a0, <2 x double> %a1) nounwind {
73; X32-LABEL: test_mm_add_pd:
74; X32: # BB#0:
75; X32-NEXT: addpd %xmm1, %xmm0
76; X32-NEXT: retl
77;
78; X64-LABEL: test_mm_add_pd:
79; X64: # BB#0:
80; X64-NEXT: addpd %xmm1, %xmm0
81; X64-NEXT: retq
82 %res = fadd <2 x double> %a0, %a1
83 ret <2 x double> %res
84}
85
86define <2 x double> @test_mm_add_sd(<2 x double> %a0, <2 x double> %a1) nounwind {
87; X32-LABEL: test_mm_add_sd:
88; X32: # BB#0:
89; X32-NEXT: addsd %xmm1, %xmm0
90; X32-NEXT: retl
91;
92; X64-LABEL: test_mm_add_sd:
93; X64: # BB#0:
94; X64-NEXT: addsd %xmm1, %xmm0
95; X64-NEXT: retq
96 %ext0 = extractelement <2 x double> %a0, i32 0
97 %ext1 = extractelement <2 x double> %a1, i32 0
98 %fadd = fadd double %ext0, %ext1
99 %res = insertelement <2 x double> %a0, double %fadd, i32 0
100 ret <2 x double> %res
101}
102
103define <2 x i64> @test_mm_adds_epi8(<2 x i64> %a0, <2 x i64> %a1) nounwind {
104; X32-LABEL: test_mm_adds_epi8:
105; X32: # BB#0:
106; X32-NEXT: paddsb %xmm1, %xmm0
107; X32-NEXT: retl
108;
109; X64-LABEL: test_mm_adds_epi8:
110; X64: # BB#0:
111; X64-NEXT: paddsb %xmm1, %xmm0
112; X64-NEXT: retq
113 %arg0 = bitcast <2 x i64> %a0 to <16 x i8>
114 %arg1 = bitcast <2 x i64> %a1 to <16 x i8>
115 %res = call <16 x i8> @llvm.x86.sse2.padds.b(<16 x i8> %arg0, <16 x i8> %arg1)
116 %bc = bitcast <16 x i8> %res to <2 x i64>
117 ret <2 x i64> %bc
118}
119declare <16 x i8> @llvm.x86.sse2.padds.b(<16 x i8>, <16 x i8>) nounwind readnone
120
121define <2 x i64> @test_mm_adds_epi16(<2 x i64> %a0, <2 x i64> %a1) nounwind {
122; X32-LABEL: test_mm_adds_epi16:
123; X32: # BB#0:
124; X32-NEXT: paddsw %xmm1, %xmm0
125; X32-NEXT: retl
126;
127; X64-LABEL: test_mm_adds_epi16:
128; X64: # BB#0:
129; X64-NEXT: paddsw %xmm1, %xmm0
130; X64-NEXT: retq
131 %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
132 %arg1 = bitcast <2 x i64> %a1 to <8 x i16>
133 %res = call <8 x i16> @llvm.x86.sse2.padds.w(<8 x i16> %arg0, <8 x i16> %arg1)
134 %bc = bitcast <8 x i16> %res to <2 x i64>
135 ret <2 x i64> %bc
136}
137declare <8 x i16> @llvm.x86.sse2.padds.w(<8 x i16>, <8 x i16>) nounwind readnone
138
139define <2 x i64> @test_mm_adds_epu8(<2 x i64> %a0, <2 x i64> %a1) nounwind {
140; X32-LABEL: test_mm_adds_epu8:
141; X32: # BB#0:
142; X32-NEXT: paddusb %xmm1, %xmm0
143; X32-NEXT: retl
144;
145; X64-LABEL: test_mm_adds_epu8:
146; X64: # BB#0:
147; X64-NEXT: paddusb %xmm1, %xmm0
148; X64-NEXT: retq
149 %arg0 = bitcast <2 x i64> %a0 to <16 x i8>
150 %arg1 = bitcast <2 x i64> %a1 to <16 x i8>
151 %res = call <16 x i8> @llvm.x86.sse2.paddus.b(<16 x i8> %arg0, <16 x i8> %arg1)
152 %bc = bitcast <16 x i8> %res to <2 x i64>
153 ret <2 x i64> %bc
154}
155declare <16 x i8> @llvm.x86.sse2.paddus.b(<16 x i8>, <16 x i8>) nounwind readnone
156
157define <2 x i64> @test_mm_adds_epu16(<2 x i64> %a0, <2 x i64> %a1) nounwind {
158; X32-LABEL: test_mm_adds_epu16:
159; X32: # BB#0:
160; X32-NEXT: paddusw %xmm1, %xmm0
161; X32-NEXT: retl
162;
163; X64-LABEL: test_mm_adds_epu16:
164; X64: # BB#0:
165; X64-NEXT: paddusw %xmm1, %xmm0
166; X64-NEXT: retq
167 %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
168 %arg1 = bitcast <2 x i64> %a1 to <8 x i16>
169 %res = call <8 x i16> @llvm.x86.sse2.paddus.w(<8 x i16> %arg0, <8 x i16> %arg1)
170 %bc = bitcast <8 x i16> %res to <2 x i64>
171 ret <2 x i64> %bc
172}
173declare <8 x i16> @llvm.x86.sse2.paddus.w(<8 x i16>, <8 x i16>) nounwind readnone
174
175define <2 x double> @test_mm_and_pd(<2 x double> %a0, <2 x double> %a1) nounwind {
176; X32-LABEL: test_mm_and_pd:
177; X32: # BB#0:
178; X32-NEXT: andps %xmm1, %xmm0
179; X32-NEXT: retl
180;
181; X64-LABEL: test_mm_and_pd:
182; X64: # BB#0:
183; X64-NEXT: andps %xmm1, %xmm0
184; X64-NEXT: retq
185 %arg0 = bitcast <2 x double> %a0 to <4 x i32>
186 %arg1 = bitcast <2 x double> %a1 to <4 x i32>
187 %res = and <4 x i32> %arg0, %arg1
188 %bc = bitcast <4 x i32> %res to <2 x double>
189 ret <2 x double> %bc
190}
191
192define <2 x i64> @test_mm_and_si128(<2 x i64> %a0, <2 x i64> %a1) nounwind {
193; X32-LABEL: test_mm_and_si128:
194; X32: # BB#0:
195; X32-NEXT: andps %xmm1, %xmm0
196; X32-NEXT: retl
197;
198; X64-LABEL: test_mm_and_si128:
199; X64: # BB#0:
200; X64-NEXT: andps %xmm1, %xmm0
201; X64-NEXT: retq
202 %res = and <2 x i64> %a0, %a1
203 ret <2 x i64> %res
204}
205
206define <2 x double> @test_mm_andnot_pd(<2 x double> %a0, <2 x double> %a1) nounwind {
207; X32-LABEL: test_mm_andnot_pd:
208; X32: # BB#0:
209; X32-NEXT: andnps %xmm1, %xmm0
210; X32-NEXT: retl
211;
212; X64-LABEL: test_mm_andnot_pd:
213; X64: # BB#0:
214; X64-NEXT: andnps %xmm1, %xmm0
215; X64-NEXT: retq
216 %arg0 = bitcast <2 x double> %a0 to <4 x i32>
217 %arg1 = bitcast <2 x double> %a1 to <4 x i32>
218 %not = xor <4 x i32> %arg0, <i32 -1, i32 -1, i32 -1, i32 -1>
219 %res = and <4 x i32> %not, %arg1
220 %bc = bitcast <4 x i32> %res to <2 x double>
221 ret <2 x double> %bc
222}
223
224define <2 x i64> @test_mm_andnot_si128(<2 x i64> %a0, <2 x i64> %a1) nounwind {
225; X32-LABEL: test_mm_andnot_si128:
226; X32: # BB#0:
227; X32-NEXT: pcmpeqd %xmm2, %xmm2
228; X32-NEXT: pxor %xmm2, %xmm0
229; X32-NEXT: pand %xmm1, %xmm0
230; X32-NEXT: retl
231;
232; X64-LABEL: test_mm_andnot_si128:
233; X64: # BB#0:
234; X64-NEXT: pcmpeqd %xmm2, %xmm2
235; X64-NEXT: pxor %xmm2, %xmm0
236; X64-NEXT: pand %xmm1, %xmm0
237; X64-NEXT: retq
238 %not = xor <2 x i64> %a0, <i64 -1, i64 -1>
239 %res = and <2 x i64> %not, %a1
240 ret <2 x i64> %res
241}
242
243define <2 x i64> @test_mm_avg_epu8(<2 x i64> %a0, <2 x i64> %a1) nounwind {
244; X32-LABEL: test_mm_avg_epu8:
245; X32: # BB#0:
246; X32-NEXT: pavgb %xmm1, %xmm0
247; X32-NEXT: retl
248;
249; X64-LABEL: test_mm_avg_epu8:
250; X64: # BB#0:
251; X64-NEXT: pavgb %xmm1, %xmm0
252; X64-NEXT: retq
253 %arg0 = bitcast <2 x i64> %a0 to <16 x i8>
254 %arg1 = bitcast <2 x i64> %a1 to <16 x i8>
255 %res = call <16 x i8> @llvm.x86.sse2.pavg.b(<16 x i8> %arg0, <16 x i8> %arg1)
256 %bc = bitcast <16 x i8> %res to <2 x i64>
257 ret <2 x i64> %bc
258}
259declare <16 x i8> @llvm.x86.sse2.pavg.b(<16 x i8> %arg0, <16 x i8> %arg1) nounwind readnone
260
261define <2 x i64> @test_mm_avg_epu16(<2 x i64> %a0, <2 x i64> %a1) nounwind {
262; X32-LABEL: test_mm_avg_epu16:
263; X32: # BB#0:
264; X32-NEXT: pavgw %xmm1, %xmm0
265; X32-NEXT: retl
266;
267; X64-LABEL: test_mm_avg_epu16:
268; X64: # BB#0:
269; X64-NEXT: pavgw %xmm1, %xmm0
270; X64-NEXT: retq
271 %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
272 %arg1 = bitcast <2 x i64> %a1 to <8 x i16>
273 %res = call <8 x i16> @llvm.x86.sse2.pavg.w(<8 x i16> %arg0, <8 x i16> %arg1)
274 %bc = bitcast <8 x i16> %res to <2 x i64>
275 ret <2 x i64> %bc
276}
277declare <8 x i16> @llvm.x86.sse2.pavg.w(<8 x i16>, <8 x i16>) nounwind readnone
278
279define <2 x i64> @test_mm_bslli_si128(<2 x i64> %a0) nounwind {
280; X32-LABEL: test_mm_bslli_si128:
281; X32: # BB#0:
282; X32-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10]
283; X32-NEXT: retl
284;
285; X64-LABEL: test_mm_bslli_si128:
286; X64: # BB#0:
287; X64-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10]
288; X64-NEXT: retq
289 %arg0 = bitcast <2 x i64> %a0 to <16 x i8>
290 %res = shufflevector <16 x i8> zeroinitializer, <16 x i8> %arg0, <16 x i32> <i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26>
291 %bc = bitcast <16 x i8> %res to <2 x i64>
292 ret <2 x i64> %bc
293}
294
295define <2 x i64> @test_mm_bsrli_si128(<2 x i64> %a0) nounwind {
296; X32-LABEL: test_mm_bsrli_si128:
297; X32: # BB#0:
298; X32-NEXT: psrldq {{.*#+}} xmm0 = xmm0[5,6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero
299; X32-NEXT: retl
300;
301; X64-LABEL: test_mm_bsrli_si128:
302; X64: # BB#0:
303; X64-NEXT: psrldq {{.*#+}} xmm0 = xmm0[5,6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero
304; X64-NEXT: retq
305 %arg0 = bitcast <2 x i64> %a0 to <16 x i8>
306 %res = shufflevector <16 x i8> %arg0, <16 x i8> zeroinitializer, <16 x i32> <i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20>
307 %bc = bitcast <16 x i8> %res to <2 x i64>
308 ret <2 x i64> %bc
309}
310
Simon Pilgrim01809e02016-05-19 10:58:54 +0000311define <4 x float> @test_mm_castpd_ps(<2 x double> %a0) nounwind {
312; X32-LABEL: test_mm_castpd_ps:
313; X32: # BB#0:
314; X32-NEXT: retl
315;
316; X64-LABEL: test_mm_castpd_ps:
317; X64: # BB#0:
318; X64-NEXT: retq
319 %res = bitcast <2 x double> %a0 to <4 x float>
320 ret <4 x float> %res
321}
322
323define <2 x i64> @test_mm_castpd_si128(<2 x double> %a0) nounwind {
324; X32-LABEL: test_mm_castpd_si128:
325; X32: # BB#0:
326; X32-NEXT: retl
327;
328; X64-LABEL: test_mm_castpd_si128:
329; X64: # BB#0:
330; X64-NEXT: retq
331 %res = bitcast <2 x double> %a0 to <2 x i64>
332 ret <2 x i64> %res
333}
334
335define <2 x double> @test_mm_castps_pd(<4 x float> %a0) nounwind {
336; X32-LABEL: test_mm_castps_pd:
337; X32: # BB#0:
338; X32-NEXT: retl
339;
340; X64-LABEL: test_mm_castps_pd:
341; X64: # BB#0:
342; X64-NEXT: retq
343 %res = bitcast <4 x float> %a0 to <2 x double>
344 ret <2 x double> %res
345}
346
347define <2 x i64> @test_mm_castps_si128(<4 x float> %a0) nounwind {
348; X32-LABEL: test_mm_castps_si128:
349; X32: # BB#0:
350; X32-NEXT: retl
351;
352; X64-LABEL: test_mm_castps_si128:
353; X64: # BB#0:
354; X64-NEXT: retq
355 %res = bitcast <4 x float> %a0 to <2 x i64>
356 ret <2 x i64> %res
357}
358
359define <2 x double> @test_mm_castsi128_pd(<2 x i64> %a0) nounwind {
360; X32-LABEL: test_mm_castsi128_pd:
361; X32: # BB#0:
362; X32-NEXT: retl
363;
364; X64-LABEL: test_mm_castsi128_pd:
365; X64: # BB#0:
366; X64-NEXT: retq
367 %res = bitcast <2 x i64> %a0 to <2 x double>
368 ret <2 x double> %res
369}
370
371define <4 x float> @test_mm_castsi128_ps(<2 x i64> %a0) nounwind {
372; X32-LABEL: test_mm_castsi128_ps:
373; X32: # BB#0:
374; X32-NEXT: retl
375;
376; X64-LABEL: test_mm_castsi128_ps:
377; X64: # BB#0:
378; X64-NEXT: retq
379 %res = bitcast <2 x i64> %a0 to <4 x float>
380 ret <4 x float> %res
381}
382
Simon Pilgrim5a0d7282016-05-18 18:00:43 +0000383define void @test_mm_clflush(i8* %a0) nounwind {
384; X32-LABEL: test_mm_clflush:
385; X32: # BB#0:
386; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
387; X32-NEXT: clflush (%eax)
388; X32-NEXT: retl
389;
390; X64-LABEL: test_mm_clflush:
391; X64: # BB#0:
392; X64-NEXT: clflush (%rdi)
393; X64-NEXT: retq
394 call void @llvm.x86.sse2.clflush(i8* %a0)
395 ret void
396}
397declare void @llvm.x86.sse2.clflush(i8*) nounwind readnone
398
399define <2 x i64> @test_mm_cmpeq_epi8(<2 x i64> %a0, <2 x i64> %a1) nounwind {
400; X32-LABEL: test_mm_cmpeq_epi8:
401; X32: # BB#0:
402; X32-NEXT: pcmpeqb %xmm1, %xmm0
403; X32-NEXT: retl
404;
405; X64-LABEL: test_mm_cmpeq_epi8:
406; X64: # BB#0:
407; X64-NEXT: pcmpeqb %xmm1, %xmm0
408; X64-NEXT: retq
409 %arg0 = bitcast <2 x i64> %a0 to <16 x i8>
410 %arg1 = bitcast <2 x i64> %a1 to <16 x i8>
411 %cmp = icmp eq <16 x i8> %arg0, %arg1
412 %res = sext <16 x i1> %cmp to <16 x i8>
413 %bc = bitcast <16 x i8> %res to <2 x i64>
414 ret <2 x i64> %bc
415}
416
417define <2 x i64> @test_mm_cmpeq_epi16(<2 x i64> %a0, <2 x i64> %a1) nounwind {
418; X32-LABEL: test_mm_cmpeq_epi16:
419; X32: # BB#0:
420; X32-NEXT: pcmpeqw %xmm1, %xmm0
421; X32-NEXT: retl
422;
423; X64-LABEL: test_mm_cmpeq_epi16:
424; X64: # BB#0:
425; X64-NEXT: pcmpeqw %xmm1, %xmm0
426; X64-NEXT: retq
427 %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
428 %arg1 = bitcast <2 x i64> %a1 to <8 x i16>
429 %cmp = icmp eq <8 x i16> %arg0, %arg1
430 %res = sext <8 x i1> %cmp to <8 x i16>
431 %bc = bitcast <8 x i16> %res to <2 x i64>
432 ret <2 x i64> %bc
433}
434
435define <2 x i64> @test_mm_cmpeq_epi32(<2 x i64> %a0, <2 x i64> %a1) nounwind {
436; X32-LABEL: test_mm_cmpeq_epi32:
437; X32: # BB#0:
438; X32-NEXT: pcmpeqd %xmm1, %xmm0
439; X32-NEXT: retl
440;
441; X64-LABEL: test_mm_cmpeq_epi32:
442; X64: # BB#0:
443; X64-NEXT: pcmpeqd %xmm1, %xmm0
444; X64-NEXT: retq
445 %arg0 = bitcast <2 x i64> %a0 to <4 x i32>
446 %arg1 = bitcast <2 x i64> %a1 to <4 x i32>
447 %cmp = icmp eq <4 x i32> %arg0, %arg1
448 %res = sext <4 x i1> %cmp to <4 x i32>
449 %bc = bitcast <4 x i32> %res to <2 x i64>
450 ret <2 x i64> %bc
451}
452
453define <2 x double> @test_mm_cmpeq_pd(<2 x double> %a0, <2 x double> %a1) nounwind {
454; X32-LABEL: test_mm_cmpeq_pd:
455; X32: # BB#0:
456; X32-NEXT: cmpeqpd %xmm1, %xmm0
457; X32-NEXT: retl
458;
459; X64-LABEL: test_mm_cmpeq_pd:
460; X64: # BB#0:
461; X64-NEXT: cmpeqpd %xmm1, %xmm0
462; X64-NEXT: retq
Sanjay Patel74b40bd2016-06-15 21:22:15 +0000463 %fcmp = fcmp oeq <2 x double> %a0, %a1
464 %sext = sext <2 x i1> %fcmp to <2 x i64>
465 %res = bitcast <2 x i64> %sext to <2 x double>
Simon Pilgrim5a0d7282016-05-18 18:00:43 +0000466 ret <2 x double> %res
467}
Simon Pilgrim5a0d7282016-05-18 18:00:43 +0000468
469define <2 x double> @test_mm_cmpeq_sd(<2 x double> %a0, <2 x double> %a1) nounwind {
470; X32-LABEL: test_mm_cmpeq_sd:
471; X32: # BB#0:
472; X32-NEXT: cmpeqsd %xmm1, %xmm0
473; X32-NEXT: retl
474;
475; X64-LABEL: test_mm_cmpeq_sd:
476; X64: # BB#0:
477; X64-NEXT: cmpeqsd %xmm1, %xmm0
478; X64-NEXT: retq
479 %res = call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %a0, <2 x double> %a1, i8 0)
480 ret <2 x double> %res
481}
482declare <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double>, <2 x double>, i8) nounwind readnone
483
484define <2 x double> @test_mm_cmpge_pd(<2 x double> %a0, <2 x double> %a1) nounwind {
485; X32-LABEL: test_mm_cmpge_pd:
486; X32: # BB#0:
487; X32-NEXT: cmplepd %xmm0, %xmm1
488; X32-NEXT: movapd %xmm1, %xmm0
489; X32-NEXT: retl
490;
491; X64-LABEL: test_mm_cmpge_pd:
492; X64: # BB#0:
493; X64-NEXT: cmplepd %xmm0, %xmm1
494; X64-NEXT: movapd %xmm1, %xmm0
495; X64-NEXT: retq
Sanjay Patel74b40bd2016-06-15 21:22:15 +0000496 %fcmp = fcmp ole <2 x double> %a1, %a0
497 %sext = sext <2 x i1> %fcmp to <2 x i64>
498 %res = bitcast <2 x i64> %sext to <2 x double>
Simon Pilgrim5a0d7282016-05-18 18:00:43 +0000499 ret <2 x double> %res
500}
501
502define <2 x double> @test_mm_cmpge_sd(<2 x double> %a0, <2 x double> %a1) nounwind {
503; X32-LABEL: test_mm_cmpge_sd:
504; X32: # BB#0:
505; X32-NEXT: cmplesd %xmm0, %xmm1
506; X32-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
507; X32-NEXT: retl
508;
509; X64-LABEL: test_mm_cmpge_sd:
510; X64: # BB#0:
511; X64-NEXT: cmplesd %xmm0, %xmm1
512; X64-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
513; X64-NEXT: retq
514 %cmp = call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %a1, <2 x double> %a0, i8 2)
515 %ext0 = extractelement <2 x double> %cmp, i32 0
516 %ins0 = insertelement <2 x double> undef, double %ext0, i32 0
517 %ext1 = extractelement <2 x double> %a0, i32 1
518 %ins1 = insertelement <2 x double> %ins0, double %ext1, i32 1
519 ret <2 x double> %ins1
520}
521
522define <2 x i64> @test_mm_cmpgt_epi8(<2 x i64> %a0, <2 x i64> %a1) nounwind {
523; X32-LABEL: test_mm_cmpgt_epi8:
524; X32: # BB#0:
525; X32-NEXT: pcmpgtb %xmm1, %xmm0
526; X32-NEXT: retl
527;
528; X64-LABEL: test_mm_cmpgt_epi8:
529; X64: # BB#0:
530; X64-NEXT: pcmpgtb %xmm1, %xmm0
531; X64-NEXT: retq
532 %arg0 = bitcast <2 x i64> %a0 to <16 x i8>
533 %arg1 = bitcast <2 x i64> %a1 to <16 x i8>
534 %cmp = icmp sgt <16 x i8> %arg0, %arg1
535 %res = sext <16 x i1> %cmp to <16 x i8>
536 %bc = bitcast <16 x i8> %res to <2 x i64>
537 ret <2 x i64> %bc
538}
539
540define <2 x i64> @test_mm_cmpgt_epi16(<2 x i64> %a0, <2 x i64> %a1) nounwind {
541; X32-LABEL: test_mm_cmpgt_epi16:
542; X32: # BB#0:
543; X32-NEXT: pcmpgtw %xmm1, %xmm0
544; X32-NEXT: retl
545;
546; X64-LABEL: test_mm_cmpgt_epi16:
547; X64: # BB#0:
548; X64-NEXT: pcmpgtw %xmm1, %xmm0
549; X64-NEXT: retq
550 %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
551 %arg1 = bitcast <2 x i64> %a1 to <8 x i16>
552 %cmp = icmp sgt <8 x i16> %arg0, %arg1
553 %res = sext <8 x i1> %cmp to <8 x i16>
554 %bc = bitcast <8 x i16> %res to <2 x i64>
555 ret <2 x i64> %bc
556}
557
558define <2 x i64> @test_mm_cmpgt_epi32(<2 x i64> %a0, <2 x i64> %a1) nounwind {
559; X32-LABEL: test_mm_cmpgt_epi32:
560; X32: # BB#0:
561; X32-NEXT: pcmpgtd %xmm1, %xmm0
562; X32-NEXT: retl
563;
564; X64-LABEL: test_mm_cmpgt_epi32:
565; X64: # BB#0:
566; X64-NEXT: pcmpgtd %xmm1, %xmm0
567; X64-NEXT: retq
568 %arg0 = bitcast <2 x i64> %a0 to <4 x i32>
569 %arg1 = bitcast <2 x i64> %a1 to <4 x i32>
570 %cmp = icmp sgt <4 x i32> %arg0, %arg1
571 %res = sext <4 x i1> %cmp to <4 x i32>
572 %bc = bitcast <4 x i32> %res to <2 x i64>
573 ret <2 x i64> %bc
574}
575
576define <2 x double> @test_mm_cmpgt_pd(<2 x double> %a0, <2 x double> %a1) nounwind {
577; X32-LABEL: test_mm_cmpgt_pd:
578; X32: # BB#0:
579; X32-NEXT: cmpltpd %xmm0, %xmm1
580; X32-NEXT: movapd %xmm1, %xmm0
581; X32-NEXT: retl
582;
583; X64-LABEL: test_mm_cmpgt_pd:
584; X64: # BB#0:
585; X64-NEXT: cmpltpd %xmm0, %xmm1
586; X64-NEXT: movapd %xmm1, %xmm0
587; X64-NEXT: retq
Sanjay Patel74b40bd2016-06-15 21:22:15 +0000588 %fcmp = fcmp olt <2 x double> %a1, %a0
589 %sext = sext <2 x i1> %fcmp to <2 x i64>
590 %res = bitcast <2 x i64> %sext to <2 x double>
Simon Pilgrim5a0d7282016-05-18 18:00:43 +0000591 ret <2 x double> %res
592}
593
594define <2 x double> @test_mm_cmpgt_sd(<2 x double> %a0, <2 x double> %a1) nounwind {
595; X32-LABEL: test_mm_cmpgt_sd:
596; X32: # BB#0:
597; X32-NEXT: cmpltsd %xmm0, %xmm1
598; X32-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
599; X32-NEXT: retl
600;
601; X64-LABEL: test_mm_cmpgt_sd:
602; X64: # BB#0:
603; X64-NEXT: cmpltsd %xmm0, %xmm1
604; X64-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
605; X64-NEXT: retq
606 %cmp = call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %a1, <2 x double> %a0, i8 1)
607 %ext0 = extractelement <2 x double> %cmp, i32 0
608 %ins0 = insertelement <2 x double> undef, double %ext0, i32 0
609 %ext1 = extractelement <2 x double> %a0, i32 1
610 %ins1 = insertelement <2 x double> %ins0, double %ext1, i32 1
611 ret <2 x double> %ins1
612}
613
614define <2 x double> @test_mm_cmple_pd(<2 x double> %a0, <2 x double> %a1) nounwind {
615; X32-LABEL: test_mm_cmple_pd:
616; X32: # BB#0:
617; X32-NEXT: cmplepd %xmm1, %xmm0
618; X32-NEXT: retl
619;
620; X64-LABEL: test_mm_cmple_pd:
621; X64: # BB#0:
622; X64-NEXT: cmplepd %xmm1, %xmm0
623; X64-NEXT: retq
Sanjay Patel74b40bd2016-06-15 21:22:15 +0000624 %fcmp = fcmp ole <2 x double> %a0, %a1
625 %sext = sext <2 x i1> %fcmp to <2 x i64>
626 %res = bitcast <2 x i64> %sext to <2 x double>
Simon Pilgrim5a0d7282016-05-18 18:00:43 +0000627 ret <2 x double> %res
628}
629
630define <2 x double> @test_mm_cmple_sd(<2 x double> %a0, <2 x double> %a1) nounwind {
631; X32-LABEL: test_mm_cmple_sd:
632; X32: # BB#0:
633; X32-NEXT: cmplesd %xmm1, %xmm0
634; X32-NEXT: retl
635;
636; X64-LABEL: test_mm_cmple_sd:
637; X64: # BB#0:
638; X64-NEXT: cmplesd %xmm1, %xmm0
639; X64-NEXT: retq
640 %res = call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %a0, <2 x double> %a1, i8 2)
641 ret <2 x double> %res
642}
643
644define <2 x i64> @test_mm_cmplt_epi8(<2 x i64> %a0, <2 x i64> %a1) nounwind {
645; X32-LABEL: test_mm_cmplt_epi8:
646; X32: # BB#0:
647; X32-NEXT: pcmpgtb %xmm0, %xmm1
648; X32-NEXT: movdqa %xmm1, %xmm0
649; X32-NEXT: retl
650;
651; X64-LABEL: test_mm_cmplt_epi8:
652; X64: # BB#0:
653; X64-NEXT: pcmpgtb %xmm0, %xmm1
654; X64-NEXT: movdqa %xmm1, %xmm0
655; X64-NEXT: retq
656 %arg0 = bitcast <2 x i64> %a0 to <16 x i8>
657 %arg1 = bitcast <2 x i64> %a1 to <16 x i8>
658 %cmp = icmp sgt <16 x i8> %arg1, %arg0
659 %res = sext <16 x i1> %cmp to <16 x i8>
660 %bc = bitcast <16 x i8> %res to <2 x i64>
661 ret <2 x i64> %bc
662}
663
664define <2 x i64> @test_mm_cmplt_epi16(<2 x i64> %a0, <2 x i64> %a1) nounwind {
665; X32-LABEL: test_mm_cmplt_epi16:
666; X32: # BB#0:
667; X32-NEXT: pcmpgtw %xmm0, %xmm1
668; X32-NEXT: movdqa %xmm1, %xmm0
669; X32-NEXT: retl
670;
671; X64-LABEL: test_mm_cmplt_epi16:
672; X64: # BB#0:
673; X64-NEXT: pcmpgtw %xmm0, %xmm1
674; X64-NEXT: movdqa %xmm1, %xmm0
675; X64-NEXT: retq
676 %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
677 %arg1 = bitcast <2 x i64> %a1 to <8 x i16>
678 %cmp = icmp sgt <8 x i16> %arg1, %arg0
679 %res = sext <8 x i1> %cmp to <8 x i16>
680 %bc = bitcast <8 x i16> %res to <2 x i64>
681 ret <2 x i64> %bc
682}
683
684define <2 x i64> @test_mm_cmplt_epi32(<2 x i64> %a0, <2 x i64> %a1) nounwind {
685; X32-LABEL: test_mm_cmplt_epi32:
686; X32: # BB#0:
687; X32-NEXT: pcmpgtd %xmm0, %xmm1
688; X32-NEXT: movdqa %xmm1, %xmm0
689; X32-NEXT: retl
690;
691; X64-LABEL: test_mm_cmplt_epi32:
692; X64: # BB#0:
693; X64-NEXT: pcmpgtd %xmm0, %xmm1
694; X64-NEXT: movdqa %xmm1, %xmm0
695; X64-NEXT: retq
696 %arg0 = bitcast <2 x i64> %a0 to <4 x i32>
697 %arg1 = bitcast <2 x i64> %a1 to <4 x i32>
698 %cmp = icmp sgt <4 x i32> %arg1, %arg0
699 %res = sext <4 x i1> %cmp to <4 x i32>
700 %bc = bitcast <4 x i32> %res to <2 x i64>
701 ret <2 x i64> %bc
702}
703
704define <2 x double> @test_mm_cmplt_pd(<2 x double> %a0, <2 x double> %a1) nounwind {
705; X32-LABEL: test_mm_cmplt_pd:
706; X32: # BB#0:
707; X32-NEXT: cmpltpd %xmm1, %xmm0
708; X32-NEXT: retl
709;
710; X64-LABEL: test_mm_cmplt_pd:
711; X64: # BB#0:
712; X64-NEXT: cmpltpd %xmm1, %xmm0
713; X64-NEXT: retq
Sanjay Patel74b40bd2016-06-15 21:22:15 +0000714 %fcmp = fcmp olt <2 x double> %a0, %a1
715 %sext = sext <2 x i1> %fcmp to <2 x i64>
716 %res = bitcast <2 x i64> %sext to <2 x double>
Simon Pilgrim5a0d7282016-05-18 18:00:43 +0000717 ret <2 x double> %res
718}
719
720define <2 x double> @test_mm_cmplt_sd(<2 x double> %a0, <2 x double> %a1) nounwind {
721; X32-LABEL: test_mm_cmplt_sd:
722; X32: # BB#0:
723; X32-NEXT: cmpltsd %xmm1, %xmm0
724; X32-NEXT: retl
725;
726; X64-LABEL: test_mm_cmplt_sd:
727; X64: # BB#0:
728; X64-NEXT: cmpltsd %xmm1, %xmm0
729; X64-NEXT: retq
730 %res = call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %a0, <2 x double> %a1, i8 1)
731 ret <2 x double> %res
732}
733
734define <2 x double> @test_mm_cmpneq_pd(<2 x double> %a0, <2 x double> %a1) nounwind {
735; X32-LABEL: test_mm_cmpneq_pd:
736; X32: # BB#0:
737; X32-NEXT: cmpneqpd %xmm1, %xmm0
738; X32-NEXT: retl
739;
740; X64-LABEL: test_mm_cmpneq_pd:
741; X64: # BB#0:
742; X64-NEXT: cmpneqpd %xmm1, %xmm0
743; X64-NEXT: retq
Sanjay Patel74b40bd2016-06-15 21:22:15 +0000744 %fcmp = fcmp une <2 x double> %a0, %a1
745 %sext = sext <2 x i1> %fcmp to <2 x i64>
746 %res = bitcast <2 x i64> %sext to <2 x double>
Simon Pilgrim5a0d7282016-05-18 18:00:43 +0000747 ret <2 x double> %res
748}
749
750define <2 x double> @test_mm_cmpneq_sd(<2 x double> %a0, <2 x double> %a1) nounwind {
751; X32-LABEL: test_mm_cmpneq_sd:
752; X32: # BB#0:
753; X32-NEXT: cmpneqsd %xmm1, %xmm0
754; X32-NEXT: retl
755;
756; X64-LABEL: test_mm_cmpneq_sd:
757; X64: # BB#0:
758; X64-NEXT: cmpneqsd %xmm1, %xmm0
759; X64-NEXT: retq
760 %res = call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %a0, <2 x double> %a1, i8 4)
761 ret <2 x double> %res
762}
763
764define <2 x double> @test_mm_cmpnge_pd(<2 x double> %a0, <2 x double> %a1) nounwind {
765; X32-LABEL: test_mm_cmpnge_pd:
766; X32: # BB#0:
767; X32-NEXT: cmpnlepd %xmm0, %xmm1
768; X32-NEXT: movapd %xmm1, %xmm0
769; X32-NEXT: retl
770;
771; X64-LABEL: test_mm_cmpnge_pd:
772; X64: # BB#0:
773; X64-NEXT: cmpnlepd %xmm0, %xmm1
774; X64-NEXT: movapd %xmm1, %xmm0
775; X64-NEXT: retq
Sanjay Patel74b40bd2016-06-15 21:22:15 +0000776 %fcmp = fcmp ugt <2 x double> %a1, %a0
777 %sext = sext <2 x i1> %fcmp to <2 x i64>
778 %res = bitcast <2 x i64> %sext to <2 x double>
Simon Pilgrim5a0d7282016-05-18 18:00:43 +0000779 ret <2 x double> %res
780}
781
782define <2 x double> @test_mm_cmpnge_sd(<2 x double> %a0, <2 x double> %a1) nounwind {
783; X32-LABEL: test_mm_cmpnge_sd:
784; X32: # BB#0:
785; X32-NEXT: cmpnlesd %xmm0, %xmm1
Simon Pilgrimb1ff2dd2016-05-19 16:49:53 +0000786; X32-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
Simon Pilgrim5a0d7282016-05-18 18:00:43 +0000787; X32-NEXT: retl
788;
789; X64-LABEL: test_mm_cmpnge_sd:
790; X64: # BB#0:
791; X64-NEXT: cmpnlesd %xmm0, %xmm1
Simon Pilgrimb1ff2dd2016-05-19 16:49:53 +0000792; X64-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
Simon Pilgrim5a0d7282016-05-18 18:00:43 +0000793; X64-NEXT: retq
Simon Pilgrimb1ff2dd2016-05-19 16:49:53 +0000794 %cmp = call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %a1, <2 x double> %a0, i8 6)
795 %ext0 = extractelement <2 x double> %cmp, i32 0
796 %ins0 = insertelement <2 x double> undef, double %ext0, i32 0
797 %ext1 = extractelement <2 x double> %a0, i32 1
798 %ins1 = insertelement <2 x double> %ins0, double %ext1, i32 1
799 ret <2 x double> %ins1
Simon Pilgrim5a0d7282016-05-18 18:00:43 +0000800}
801
802define <2 x double> @test_mm_cmpngt_pd(<2 x double> %a0, <2 x double> %a1) nounwind {
803; X32-LABEL: test_mm_cmpngt_pd:
804; X32: # BB#0:
805; X32-NEXT: cmpnltpd %xmm0, %xmm1
806; X32-NEXT: movapd %xmm1, %xmm0
807; X32-NEXT: retl
808;
809; X64-LABEL: test_mm_cmpngt_pd:
810; X64: # BB#0:
811; X64-NEXT: cmpnltpd %xmm0, %xmm1
812; X64-NEXT: movapd %xmm1, %xmm0
813; X64-NEXT: retq
Sanjay Patel74b40bd2016-06-15 21:22:15 +0000814 %fcmp = fcmp uge <2 x double> %a1, %a0
815 %sext = sext <2 x i1> %fcmp to <2 x i64>
816 %res = bitcast <2 x i64> %sext to <2 x double>
Simon Pilgrim5a0d7282016-05-18 18:00:43 +0000817 ret <2 x double> %res
818}
819
820define <2 x double> @test_mm_cmpngt_sd(<2 x double> %a0, <2 x double> %a1) nounwind {
821; X32-LABEL: test_mm_cmpngt_sd:
822; X32: # BB#0:
823; X32-NEXT: cmpnltsd %xmm0, %xmm1
Simon Pilgrimb1ff2dd2016-05-19 16:49:53 +0000824; X32-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
Simon Pilgrim5a0d7282016-05-18 18:00:43 +0000825; X32-NEXT: retl
826;
827; X64-LABEL: test_mm_cmpngt_sd:
828; X64: # BB#0:
829; X64-NEXT: cmpnltsd %xmm0, %xmm1
Simon Pilgrimb1ff2dd2016-05-19 16:49:53 +0000830; X64-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
Simon Pilgrim5a0d7282016-05-18 18:00:43 +0000831; X64-NEXT: retq
Simon Pilgrimb1ff2dd2016-05-19 16:49:53 +0000832 %cmp = call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %a1, <2 x double> %a0, i8 5)
833 %ext0 = extractelement <2 x double> %cmp, i32 0
834 %ins0 = insertelement <2 x double> undef, double %ext0, i32 0
835 %ext1 = extractelement <2 x double> %a0, i32 1
836 %ins1 = insertelement <2 x double> %ins0, double %ext1, i32 1
837 ret <2 x double> %ins1
Simon Pilgrim5a0d7282016-05-18 18:00:43 +0000838}
839
840define <2 x double> @test_mm_cmpnle_pd(<2 x double> %a0, <2 x double> %a1) nounwind {
841; X32-LABEL: test_mm_cmpnle_pd:
842; X32: # BB#0:
843; X32-NEXT: cmpnlepd %xmm1, %xmm0
844; X32-NEXT: retl
845;
846; X64-LABEL: test_mm_cmpnle_pd:
847; X64: # BB#0:
848; X64-NEXT: cmpnlepd %xmm1, %xmm0
849; X64-NEXT: retq
Sanjay Patel74b40bd2016-06-15 21:22:15 +0000850 %fcmp = fcmp ugt <2 x double> %a0, %a1
851 %sext = sext <2 x i1> %fcmp to <2 x i64>
852 %res = bitcast <2 x i64> %sext to <2 x double>
Simon Pilgrim5a0d7282016-05-18 18:00:43 +0000853 ret <2 x double> %res
854}
855
856define <2 x double> @test_mm_cmpnle_sd(<2 x double> %a0, <2 x double> %a1) nounwind {
857; X32-LABEL: test_mm_cmpnle_sd:
858; X32: # BB#0:
859; X32-NEXT: cmpnlesd %xmm1, %xmm0
860; X32-NEXT: retl
861;
862; X64-LABEL: test_mm_cmpnle_sd:
863; X64: # BB#0:
864; X64-NEXT: cmpnlesd %xmm1, %xmm0
865; X64-NEXT: retq
866 %res = call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %a0, <2 x double> %a1, i8 6)
867 ret <2 x double> %res
868}
869
870define <2 x double> @test_mm_cmpnlt_pd(<2 x double> %a0, <2 x double> %a1) nounwind {
871; X32-LABEL: test_mm_cmpnlt_pd:
872; X32: # BB#0:
873; X32-NEXT: cmpnltpd %xmm1, %xmm0
874; X32-NEXT: retl
875;
876; X64-LABEL: test_mm_cmpnlt_pd:
877; X64: # BB#0:
878; X64-NEXT: cmpnltpd %xmm1, %xmm0
879; X64-NEXT: retq
Sanjay Patel74b40bd2016-06-15 21:22:15 +0000880 %fcmp = fcmp uge <2 x double> %a0, %a1
881 %sext = sext <2 x i1> %fcmp to <2 x i64>
882 %res = bitcast <2 x i64> %sext to <2 x double>
Simon Pilgrim5a0d7282016-05-18 18:00:43 +0000883 ret <2 x double> %res
884}
885
886define <2 x double> @test_mm_cmpnlt_sd(<2 x double> %a0, <2 x double> %a1) nounwind {
887; X32-LABEL: test_mm_cmpnlt_sd:
888; X32: # BB#0:
889; X32-NEXT: cmpnltsd %xmm1, %xmm0
890; X32-NEXT: retl
891;
892; X64-LABEL: test_mm_cmpnlt_sd:
893; X64: # BB#0:
894; X64-NEXT: cmpnltsd %xmm1, %xmm0
895; X64-NEXT: retq
896 %res = call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %a0, <2 x double> %a1, i8 5)
897 ret <2 x double> %res
898}
899
900define <2 x double> @test_mm_cmpord_pd(<2 x double> %a0, <2 x double> %a1) nounwind {
901; X32-LABEL: test_mm_cmpord_pd:
902; X32: # BB#0:
903; X32-NEXT: cmpordpd %xmm1, %xmm0
904; X32-NEXT: retl
905;
906; X64-LABEL: test_mm_cmpord_pd:
907; X64: # BB#0:
908; X64-NEXT: cmpordpd %xmm1, %xmm0
909; X64-NEXT: retq
Sanjay Patel74b40bd2016-06-15 21:22:15 +0000910 %fcmp = fcmp ord <2 x double> %a0, %a1
911 %sext = sext <2 x i1> %fcmp to <2 x i64>
912 %res = bitcast <2 x i64> %sext to <2 x double>
Simon Pilgrim5a0d7282016-05-18 18:00:43 +0000913 ret <2 x double> %res
914}
915
916define <2 x double> @test_mm_cmpord_sd(<2 x double> %a0, <2 x double> %a1) nounwind {
917; X32-LABEL: test_mm_cmpord_sd:
918; X32: # BB#0:
919; X32-NEXT: cmpordsd %xmm1, %xmm0
920; X32-NEXT: retl
921;
922; X64-LABEL: test_mm_cmpord_sd:
923; X64: # BB#0:
924; X64-NEXT: cmpordsd %xmm1, %xmm0
925; X64-NEXT: retq
926 %res = call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %a0, <2 x double> %a1, i8 7)
927 ret <2 x double> %res
928}
929
930define <2 x double> @test_mm_cmpunord_pd(<2 x double> %a0, <2 x double> %a1) nounwind {
931; X32-LABEL: test_mm_cmpunord_pd:
932; X32: # BB#0:
933; X32-NEXT: cmpunordpd %xmm1, %xmm0
934; X32-NEXT: retl
935;
936; X64-LABEL: test_mm_cmpunord_pd:
937; X64: # BB#0:
938; X64-NEXT: cmpunordpd %xmm1, %xmm0
939; X64-NEXT: retq
Sanjay Patel74b40bd2016-06-15 21:22:15 +0000940 %fcmp = fcmp uno <2 x double> %a0, %a1
941 %sext = sext <2 x i1> %fcmp to <2 x i64>
942 %res = bitcast <2 x i64> %sext to <2 x double>
Simon Pilgrim5a0d7282016-05-18 18:00:43 +0000943 ret <2 x double> %res
944}
945
946define <2 x double> @test_mm_cmpunord_sd(<2 x double> %a0, <2 x double> %a1) nounwind {
947; X32-LABEL: test_mm_cmpunord_sd:
948; X32: # BB#0:
949; X32-NEXT: cmpunordsd %xmm1, %xmm0
950; X32-NEXT: retl
951;
952; X64-LABEL: test_mm_cmpunord_sd:
953; X64: # BB#0:
954; X64-NEXT: cmpunordsd %xmm1, %xmm0
955; X64-NEXT: retq
956 %res = call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %a0, <2 x double> %a1, i8 3)
957 ret <2 x double> %res
958}
959
960define i32 @test_mm_comieq_sd(<2 x double> %a0, <2 x double> %a1) nounwind {
961; X32-LABEL: test_mm_comieq_sd:
962; X32: # BB#0:
963; X32-NEXT: comisd %xmm1, %xmm0
964; X32-NEXT: setnp %al
965; X32-NEXT: sete %cl
966; X32-NEXT: andb %al, %cl
967; X32-NEXT: movzbl %cl, %eax
968; X32-NEXT: retl
969;
970; X64-LABEL: test_mm_comieq_sd:
971; X64: # BB#0:
972; X64-NEXT: comisd %xmm1, %xmm0
973; X64-NEXT: setnp %al
974; X64-NEXT: sete %cl
975; X64-NEXT: andb %al, %cl
976; X64-NEXT: movzbl %cl, %eax
977; X64-NEXT: retq
978 %res = call i32 @llvm.x86.sse2.comieq.sd(<2 x double> %a0, <2 x double> %a1)
979 ret i32 %res
980}
981declare i32 @llvm.x86.sse2.comieq.sd(<2 x double>, <2 x double>) nounwind readnone
982
983define i32 @test_mm_comige_sd(<2 x double> %a0, <2 x double> %a1) nounwind {
984; X32-LABEL: test_mm_comige_sd:
985; X32: # BB#0:
Michael Kuperstein3e3652a2016-07-07 22:50:23 +0000986; X32-NEXT: xorl %eax, %eax
Simon Pilgrim5a0d7282016-05-18 18:00:43 +0000987; X32-NEXT: comisd %xmm1, %xmm0
988; X32-NEXT: setae %al
Simon Pilgrim5a0d7282016-05-18 18:00:43 +0000989; X32-NEXT: retl
990;
991; X64-LABEL: test_mm_comige_sd:
992; X64: # BB#0:
Michael Kuperstein3e3652a2016-07-07 22:50:23 +0000993; X64-NEXT: xorl %eax, %eax
Simon Pilgrim5a0d7282016-05-18 18:00:43 +0000994; X64-NEXT: comisd %xmm1, %xmm0
995; X64-NEXT: setae %al
Simon Pilgrim5a0d7282016-05-18 18:00:43 +0000996; X64-NEXT: retq
997 %res = call i32 @llvm.x86.sse2.comige.sd(<2 x double> %a0, <2 x double> %a1)
998 ret i32 %res
999}
1000declare i32 @llvm.x86.sse2.comige.sd(<2 x double>, <2 x double>) nounwind readnone
1001
1002define i32 @test_mm_comigt_sd(<2 x double> %a0, <2 x double> %a1) nounwind {
1003; X32-LABEL: test_mm_comigt_sd:
1004; X32: # BB#0:
Michael Kuperstein3e3652a2016-07-07 22:50:23 +00001005; X32-NEXT: xorl %eax, %eax
Simon Pilgrim5a0d7282016-05-18 18:00:43 +00001006; X32-NEXT: comisd %xmm1, %xmm0
1007; X32-NEXT: seta %al
Simon Pilgrim5a0d7282016-05-18 18:00:43 +00001008; X32-NEXT: retl
1009;
1010; X64-LABEL: test_mm_comigt_sd:
1011; X64: # BB#0:
Michael Kuperstein3e3652a2016-07-07 22:50:23 +00001012; X64-NEXT: xorl %eax, %eax
Simon Pilgrim5a0d7282016-05-18 18:00:43 +00001013; X64-NEXT: comisd %xmm1, %xmm0
1014; X64-NEXT: seta %al
Simon Pilgrim5a0d7282016-05-18 18:00:43 +00001015; X64-NEXT: retq
1016 %res = call i32 @llvm.x86.sse2.comigt.sd(<2 x double> %a0, <2 x double> %a1)
1017 ret i32 %res
1018}
1019declare i32 @llvm.x86.sse2.comigt.sd(<2 x double>, <2 x double>) nounwind readnone
1020
1021define i32 @test_mm_comile_sd(<2 x double> %a0, <2 x double> %a1) nounwind {
1022; X32-LABEL: test_mm_comile_sd:
1023; X32: # BB#0:
Michael Kuperstein3e3652a2016-07-07 22:50:23 +00001024; X32-NEXT: xorl %eax, %eax
Simon Pilgrim5a0d7282016-05-18 18:00:43 +00001025; X32-NEXT: comisd %xmm0, %xmm1
1026; X32-NEXT: setae %al
Simon Pilgrim5a0d7282016-05-18 18:00:43 +00001027; X32-NEXT: retl
1028;
1029; X64-LABEL: test_mm_comile_sd:
1030; X64: # BB#0:
Michael Kuperstein3e3652a2016-07-07 22:50:23 +00001031; X64-NEXT: xorl %eax, %eax
Simon Pilgrim5a0d7282016-05-18 18:00:43 +00001032; X64-NEXT: comisd %xmm0, %xmm1
1033; X64-NEXT: setae %al
Simon Pilgrim5a0d7282016-05-18 18:00:43 +00001034; X64-NEXT: retq
1035 %res = call i32 @llvm.x86.sse2.comile.sd(<2 x double> %a0, <2 x double> %a1)
1036 ret i32 %res
1037}
1038declare i32 @llvm.x86.sse2.comile.sd(<2 x double>, <2 x double>) nounwind readnone
1039
1040define i32 @test_mm_comilt_sd(<2 x double> %a0, <2 x double> %a1) nounwind {
1041; X32-LABEL: test_mm_comilt_sd:
1042; X32: # BB#0:
Michael Kuperstein3e3652a2016-07-07 22:50:23 +00001043; X32-NEXT: xorl %eax, %eax
Simon Pilgrim5a0d7282016-05-18 18:00:43 +00001044; X32-NEXT: comisd %xmm0, %xmm1
1045; X32-NEXT: seta %al
Simon Pilgrim5a0d7282016-05-18 18:00:43 +00001046; X32-NEXT: retl
1047;
1048; X64-LABEL: test_mm_comilt_sd:
1049; X64: # BB#0:
Michael Kuperstein3e3652a2016-07-07 22:50:23 +00001050; X64-NEXT: xorl %eax, %eax
Simon Pilgrim5a0d7282016-05-18 18:00:43 +00001051; X64-NEXT: comisd %xmm0, %xmm1
1052; X64-NEXT: seta %al
Simon Pilgrim5a0d7282016-05-18 18:00:43 +00001053; X64-NEXT: retq
1054 %res = call i32 @llvm.x86.sse2.comilt.sd(<2 x double> %a0, <2 x double> %a1)
1055 ret i32 %res
1056}
1057declare i32 @llvm.x86.sse2.comilt.sd(<2 x double>, <2 x double>) nounwind readnone
1058
1059define i32 @test_mm_comineq_sd(<2 x double> %a0, <2 x double> %a1) nounwind {
1060; X32-LABEL: test_mm_comineq_sd:
1061; X32: # BB#0:
1062; X32-NEXT: comisd %xmm1, %xmm0
1063; X32-NEXT: setp %al
1064; X32-NEXT: setne %cl
1065; X32-NEXT: orb %al, %cl
1066; X32-NEXT: movzbl %cl, %eax
1067; X32-NEXT: retl
1068;
1069; X64-LABEL: test_mm_comineq_sd:
1070; X64: # BB#0:
1071; X64-NEXT: comisd %xmm1, %xmm0
1072; X64-NEXT: setp %al
1073; X64-NEXT: setne %cl
1074; X64-NEXT: orb %al, %cl
1075; X64-NEXT: movzbl %cl, %eax
1076; X64-NEXT: retq
1077 %res = call i32 @llvm.x86.sse2.comineq.sd(<2 x double> %a0, <2 x double> %a1)
1078 ret i32 %res
1079}
1080declare i32 @llvm.x86.sse2.comineq.sd(<2 x double>, <2 x double>) nounwind readnone
1081
1082define <2 x double> @test_mm_cvtepi32_pd(<2 x i64> %a0) nounwind {
1083; X32-LABEL: test_mm_cvtepi32_pd:
1084; X32: # BB#0:
1085; X32-NEXT: cvtdq2pd %xmm0, %xmm0
1086; X32-NEXT: retl
1087;
1088; X64-LABEL: test_mm_cvtepi32_pd:
1089; X64: # BB#0:
1090; X64-NEXT: cvtdq2pd %xmm0, %xmm0
1091; X64-NEXT: retq
1092 %arg0 = bitcast <2 x i64> %a0 to <4 x i32>
Simon Pilgrim8a5ff3c2016-05-23 22:17:36 +00001093 %ext = shufflevector <4 x i32> %arg0, <4 x i32> %arg0, <2 x i32> <i32 0, i32 1>
1094 %res = sitofp <2 x i32> %ext to <2 x double>
Simon Pilgrim5a0d7282016-05-18 18:00:43 +00001095 ret <2 x double> %res
1096}
Simon Pilgrim5a0d7282016-05-18 18:00:43 +00001097
1098define <4 x float> @test_mm_cvtepi32_ps(<2 x i64> %a0) nounwind {
1099; X32-LABEL: test_mm_cvtepi32_ps:
1100; X32: # BB#0:
1101; X32-NEXT: cvtdq2ps %xmm0, %xmm0
1102; X32-NEXT: retl
1103;
1104; X64-LABEL: test_mm_cvtepi32_ps:
1105; X64: # BB#0:
1106; X64-NEXT: cvtdq2ps %xmm0, %xmm0
1107; X64-NEXT: retq
1108 %arg0 = bitcast <2 x i64> %a0 to <4 x i32>
1109 %res = call <4 x float> @llvm.x86.sse2.cvtdq2ps(<4 x i32> %arg0)
1110 ret <4 x float> %res
1111}
1112declare <4 x float> @llvm.x86.sse2.cvtdq2ps(<4 x i32>) nounwind readnone
1113
1114define <2 x i64> @test_mm_cvtpd_epi32(<2 x double> %a0) nounwind {
1115; X32-LABEL: test_mm_cvtpd_epi32:
1116; X32: # BB#0:
1117; X32-NEXT: cvtpd2dq %xmm0, %xmm0
1118; X32-NEXT: retl
1119;
1120; X64-LABEL: test_mm_cvtpd_epi32:
1121; X64: # BB#0:
1122; X64-NEXT: cvtpd2dq %xmm0, %xmm0
1123; X64-NEXT: retq
1124 %res = call <4 x i32> @llvm.x86.sse2.cvtpd2dq(<2 x double> %a0)
1125 %bc = bitcast <4 x i32> %res to <2 x i64>
1126 ret <2 x i64> %bc
1127}
1128declare <4 x i32> @llvm.x86.sse2.cvtpd2dq(<2 x double>) nounwind readnone
1129
1130define <4 x float> @test_mm_cvtpd_ps(<2 x double> %a0) nounwind {
1131; X32-LABEL: test_mm_cvtpd_ps:
1132; X32: # BB#0:
1133; X32-NEXT: cvtpd2ps %xmm0, %xmm0
1134; X32-NEXT: retl
1135;
1136; X64-LABEL: test_mm_cvtpd_ps:
1137; X64: # BB#0:
1138; X64-NEXT: cvtpd2ps %xmm0, %xmm0
1139; X64-NEXT: retq
1140 %res = call <4 x float> @llvm.x86.sse2.cvtpd2ps(<2 x double> %a0)
1141 ret <4 x float> %res
1142}
1143declare <4 x float> @llvm.x86.sse2.cvtpd2ps(<2 x double>) nounwind readnone
1144
1145define <2 x i64> @test_mm_cvtps_epi32(<4 x float> %a0) nounwind {
1146; X32-LABEL: test_mm_cvtps_epi32:
1147; X32: # BB#0:
1148; X32-NEXT: cvtps2dq %xmm0, %xmm0
1149; X32-NEXT: retl
1150;
1151; X64-LABEL: test_mm_cvtps_epi32:
1152; X64: # BB#0:
1153; X64-NEXT: cvtps2dq %xmm0, %xmm0
1154; X64-NEXT: retq
1155 %res = call <4 x i32> @llvm.x86.sse2.cvtps2dq(<4 x float> %a0)
1156 %bc = bitcast <4 x i32> %res to <2 x i64>
1157 ret <2 x i64> %bc
1158}
1159declare <4 x i32> @llvm.x86.sse2.cvtps2dq(<4 x float>) nounwind readnone
1160
1161define <2 x double> @test_mm_cvtps_pd(<4 x float> %a0) nounwind {
1162; X32-LABEL: test_mm_cvtps_pd:
1163; X32: # BB#0:
1164; X32-NEXT: cvtps2pd %xmm0, %xmm0
1165; X32-NEXT: retl
1166;
1167; X64-LABEL: test_mm_cvtps_pd:
1168; X64: # BB#0:
1169; X64-NEXT: cvtps2pd %xmm0, %xmm0
1170; X64-NEXT: retq
Simon Pilgrim8a5ff3c2016-05-23 22:17:36 +00001171 %ext = shufflevector <4 x float> %a0, <4 x float> %a0, <2 x i32> <i32 0, i32 1>
1172 %res = fpext <2 x float> %ext to <2 x double>
Simon Pilgrim5a0d7282016-05-18 18:00:43 +00001173 ret <2 x double> %res
1174}
Simon Pilgrim5a0d7282016-05-18 18:00:43 +00001175
1176define double @test_mm_cvtsd_f64(<2 x double> %a0) nounwind {
1177; X32-LABEL: test_mm_cvtsd_f64:
1178; X32: # BB#0:
1179; X32-NEXT: pushl %ebp
1180; X32-NEXT: movl %esp, %ebp
1181; X32-NEXT: andl $-8, %esp
1182; X32-NEXT: subl $8, %esp
1183; X32-NEXT: movlps %xmm0, (%esp)
1184; X32-NEXT: fldl (%esp)
1185; X32-NEXT: movl %ebp, %esp
1186; X32-NEXT: popl %ebp
1187; X32-NEXT: retl
1188;
1189; X64-LABEL: test_mm_cvtsd_f64:
1190; X64: # BB#0:
1191; X64-NEXT: retq
1192 %res = extractelement <2 x double> %a0, i32 0
1193 ret double %res
1194}
1195
1196define i32 @test_mm_cvtsd_si32(<2 x double> %a0) nounwind {
1197; X32-LABEL: test_mm_cvtsd_si32:
1198; X32: # BB#0:
1199; X32-NEXT: cvtsd2si %xmm0, %eax
1200; X32-NEXT: retl
1201;
1202; X64-LABEL: test_mm_cvtsd_si32:
1203; X64: # BB#0:
1204; X64-NEXT: cvtsd2si %xmm0, %eax
1205; X64-NEXT: retq
1206 %res = call i32 @llvm.x86.sse2.cvtsd2si(<2 x double> %a0)
1207 ret i32 %res
1208}
1209declare i32 @llvm.x86.sse2.cvtsd2si(<2 x double>) nounwind readnone
1210
Simon Pilgrim0ea8d272016-07-19 15:07:43 +00001211define <4 x float> @test_mm_cvtsd_ss(<4 x float> %a0, <2 x double> %a1) {
1212; X32-LABEL: test_mm_cvtsd_ss:
1213; X32: # BB#0:
1214; X32-NEXT: cvtsd2ss %xmm1, %xmm0
1215; X32-NEXT: retl
1216;
1217; X64-LABEL: test_mm_cvtsd_ss:
1218; X64: # BB#0:
1219; X64-NEXT: cvtsd2ss %xmm1, %xmm0
1220; X64-NEXT: retq
1221 %res = call <4 x float> @llvm.x86.sse2.cvtsd2ss(<4 x float> %a0, <2 x double> %a1)
1222 ret <4 x float> %res
1223}
1224declare <4 x float> @llvm.x86.sse2.cvtsd2ss(<4 x float>, <2 x double>) nounwind readnone
1225
Simon Pilgrim019e1022016-07-26 10:41:28 +00001226define <4 x float> @test_mm_cvtsd_ss_load(<4 x float> %a0, <2 x double>* %p1) {
1227; X32-LABEL: test_mm_cvtsd_ss_load:
1228; X32: # BB#0:
1229; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
1230; X32-NEXT: movaps (%eax), %xmm1
1231; X32-NEXT: cvtsd2ss %xmm1, %xmm0
1232; X32-NEXT: retl
1233;
1234; X64-LABEL: test_mm_cvtsd_ss_load:
1235; X64: # BB#0:
1236; X64-NEXT: movaps (%rdi), %xmm1
1237; X64-NEXT: cvtsd2ss %xmm1, %xmm0
1238; X64-NEXT: retq
1239 %a1 = load <2 x double>, <2 x double>* %p1
1240 %res = call <4 x float> @llvm.x86.sse2.cvtsd2ss(<4 x float> %a0, <2 x double> %a1)
1241 ret <4 x float> %res
1242}
1243
Simon Pilgrim5a0d7282016-05-18 18:00:43 +00001244define i32 @test_mm_cvtsi128_si32(<2 x i64> %a0) nounwind {
1245; X32-LABEL: test_mm_cvtsi128_si32:
1246; X32: # BB#0:
1247; X32-NEXT: movd %xmm0, %eax
1248; X32-NEXT: retl
1249;
1250; X64-LABEL: test_mm_cvtsi128_si32:
1251; X64: # BB#0:
1252; X64-NEXT: movd %xmm0, %eax
1253; X64-NEXT: retq
1254 %arg0 = bitcast <2 x i64> %a0 to <4 x i32>
1255 %res = extractelement <4 x i32> %arg0, i32 0
1256 ret i32 %res
1257}
1258
1259define <2 x double> @test_mm_cvtsi32_sd(<2 x double> %a0, i32 %a1) nounwind {
1260; X32-LABEL: test_mm_cvtsi32_sd:
1261; X32: # BB#0:
1262; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
1263; X32-NEXT: cvtsi2sdl %eax, %xmm1
1264; X32-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
1265; X32-NEXT: retl
1266;
1267; X64-LABEL: test_mm_cvtsi32_sd:
1268; X64: # BB#0:
1269; X64-NEXT: cvtsi2sdl %edi, %xmm1
1270; X64-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
1271; X64-NEXT: retq
1272 %cvt = sitofp i32 %a1 to double
1273 %res = insertelement <2 x double> %a0, double %cvt, i32 0
1274 ret <2 x double> %res
1275}
1276
1277define <2 x i64> @test_mm_cvtsi32_si128(i32 %a0) nounwind {
1278; X32-LABEL: test_mm_cvtsi32_si128:
1279; X32: # BB#0:
1280; X32-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
1281; X32-NEXT: retl
1282;
1283; X64-LABEL: test_mm_cvtsi32_si128:
1284; X64: # BB#0:
1285; X64-NEXT: movd %edi, %xmm0
1286; X64-NEXT: retq
1287 %res0 = insertelement <4 x i32> undef, i32 %a0, i32 0
1288 %res1 = insertelement <4 x i32> %res0, i32 0, i32 1
1289 %res2 = insertelement <4 x i32> %res1, i32 0, i32 2
1290 %res3 = insertelement <4 x i32> %res2, i32 0, i32 3
1291 %res = bitcast <4 x i32> %res3 to <2 x i64>
1292 ret <2 x i64> %res
1293}
1294
1295define <2 x double> @test_mm_cvtss_sd(<2 x double> %a0, <4 x float> %a1) nounwind {
1296; X32-LABEL: test_mm_cvtss_sd:
1297; X32: # BB#0:
1298; X32-NEXT: cvtss2sd %xmm1, %xmm1
1299; X32-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
1300; X32-NEXT: retl
1301;
1302; X64-LABEL: test_mm_cvtss_sd:
1303; X64: # BB#0:
1304; X64-NEXT: cvtss2sd %xmm1, %xmm1
1305; X64-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
1306; X64-NEXT: retq
1307 %ext = extractelement <4 x float> %a1, i32 0
1308 %cvt = fpext float %ext to double
1309 %res = insertelement <2 x double> %a0, double %cvt, i32 0
1310 ret <2 x double> %res
1311}
1312
1313define <2 x i64> @test_mm_cvttpd_epi32(<2 x double> %a0) nounwind {
1314; X32-LABEL: test_mm_cvttpd_epi32:
1315; X32: # BB#0:
1316; X32-NEXT: cvttpd2dq %xmm0, %xmm0
1317; X32-NEXT: retl
1318;
1319; X64-LABEL: test_mm_cvttpd_epi32:
1320; X64: # BB#0:
1321; X64-NEXT: cvttpd2dq %xmm0, %xmm0
1322; X64-NEXT: retq
1323 %res = call <4 x i32> @llvm.x86.sse2.cvttpd2dq(<2 x double> %a0)
1324 %bc = bitcast <4 x i32> %res to <2 x i64>
1325 ret <2 x i64> %bc
1326}
1327declare <4 x i32> @llvm.x86.sse2.cvttpd2dq(<2 x double>) nounwind readnone
1328
1329define <2 x i64> @test_mm_cvttps_epi32(<4 x float> %a0) nounwind {
1330; X32-LABEL: test_mm_cvttps_epi32:
1331; X32: # BB#0:
1332; X32-NEXT: cvttps2dq %xmm0, %xmm0
1333; X32-NEXT: retl
1334;
1335; X64-LABEL: test_mm_cvttps_epi32:
1336; X64: # BB#0:
1337; X64-NEXT: cvttps2dq %xmm0, %xmm0
1338; X64-NEXT: retq
Simon Pilgrim0ea8d272016-07-19 15:07:43 +00001339 %res = call <4 x i32> @llvm.x86.sse2.cvttps2dq(<4 x float> %a0)
Simon Pilgrim5a0d7282016-05-18 18:00:43 +00001340 %bc = bitcast <4 x i32> %res to <2 x i64>
1341 ret <2 x i64> %bc
1342}
Simon Pilgrim0ea8d272016-07-19 15:07:43 +00001343declare <4 x i32> @llvm.x86.sse2.cvttps2dq(<4 x float>) nounwind readnone
Simon Pilgrim5a0d7282016-05-18 18:00:43 +00001344
1345define i32 @test_mm_cvttsd_si32(<2 x double> %a0) nounwind {
1346; X32-LABEL: test_mm_cvttsd_si32:
1347; X32: # BB#0:
1348; X32-NEXT: cvttsd2si %xmm0, %eax
1349; X32-NEXT: retl
1350;
1351; X64-LABEL: test_mm_cvttsd_si32:
1352; X64: # BB#0:
1353; X64-NEXT: cvttsd2si %xmm0, %eax
1354; X64-NEXT: retq
Simon Pilgrim0ea8d272016-07-19 15:07:43 +00001355 %res = call i32 @llvm.x86.sse2.cvttsd2si(<2 x double> %a0)
Simon Pilgrim5a0d7282016-05-18 18:00:43 +00001356 ret i32 %res
1357}
Simon Pilgrim0ea8d272016-07-19 15:07:43 +00001358declare i32 @llvm.x86.sse2.cvttsd2si(<2 x double>) nounwind readnone
Simon Pilgrim5a0d7282016-05-18 18:00:43 +00001359
1360define <2 x double> @test_mm_div_pd(<2 x double> %a0, <2 x double> %a1) nounwind {
1361; X32-LABEL: test_mm_div_pd:
1362; X32: # BB#0:
1363; X32-NEXT: divpd %xmm1, %xmm0
1364; X32-NEXT: retl
1365;
1366; X64-LABEL: test_mm_div_pd:
1367; X64: # BB#0:
1368; X64-NEXT: divpd %xmm1, %xmm0
1369; X64-NEXT: retq
1370 %res = fdiv <2 x double> %a0, %a1
1371 ret <2 x double> %res
1372}
1373
1374define <2 x double> @test_mm_div_sd(<2 x double> %a0, <2 x double> %a1) nounwind {
1375; X32-LABEL: test_mm_div_sd:
1376; X32: # BB#0:
1377; X32-NEXT: divsd %xmm1, %xmm0
1378; X32-NEXT: retl
1379;
1380; X64-LABEL: test_mm_div_sd:
1381; X64: # BB#0:
1382; X64-NEXT: divsd %xmm1, %xmm0
1383; X64-NEXT: retq
1384 %ext0 = extractelement <2 x double> %a0, i32 0
1385 %ext1 = extractelement <2 x double> %a1, i32 0
1386 %fdiv = fdiv double %ext0, %ext1
1387 %res = insertelement <2 x double> %a0, double %fdiv, i32 0
1388 ret <2 x double> %res
1389}
1390
1391define i32 @test_mm_extract_epi16(<2 x i64> %a0) nounwind {
1392; X32-LABEL: test_mm_extract_epi16:
1393; X32: # BB#0:
1394; X32-NEXT: pextrw $1, %xmm0, %eax
1395; X32-NEXT: movzwl %ax, %eax
1396; X32-NEXT: retl
1397;
1398; X64-LABEL: test_mm_extract_epi16:
1399; X64: # BB#0:
1400; X64-NEXT: pextrw $1, %xmm0, %eax
1401; X64-NEXT: movzwl %ax, %eax
1402; X64-NEXT: retq
1403 %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
1404 %ext = extractelement <8 x i16> %arg0, i32 1
1405 %res = zext i16 %ext to i32
1406 ret i32 %res
1407}
1408
1409define <2 x i64> @test_mm_insert_epi16(<2 x i64> %a0, i16 %a1) nounwind {
1410; X32-LABEL: test_mm_insert_epi16:
1411; X32: # BB#0:
1412; X32-NEXT: movw {{[0-9]+}}(%esp), %ax
1413; X32-NEXT: pinsrw $1, %eax, %xmm0
1414; X32-NEXT: retl
1415;
1416; X64-LABEL: test_mm_insert_epi16:
1417; X64: # BB#0:
1418; X64-NEXT: pinsrw $1, %edi, %xmm0
1419; X64-NEXT: retq
1420 %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
1421 %res = insertelement <8 x i16> %arg0, i16 %a1,i32 1
1422 %bc = bitcast <8 x i16> %res to <2 x i64>
1423 ret <2 x i64> %bc
1424}
1425
1426define void @test_mm_lfence() nounwind {
1427; X32-LABEL: test_mm_lfence:
1428; X32: # BB#0:
1429; X32-NEXT: lfence
1430; X32-NEXT: retl
1431;
1432; X64-LABEL: test_mm_lfence:
1433; X64: # BB#0:
1434; X64-NEXT: lfence
1435; X64-NEXT: retq
1436 call void @llvm.x86.sse2.lfence()
1437 ret void
1438}
1439declare void @llvm.x86.sse2.lfence() nounwind readnone
1440
1441define <2 x double> @test_mm_load_pd(double* %a0) nounwind {
1442; X32-LABEL: test_mm_load_pd:
1443; X32: # BB#0:
1444; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
1445; X32-NEXT: movaps (%eax), %xmm0
1446; X32-NEXT: retl
1447;
1448; X64-LABEL: test_mm_load_pd:
1449; X64: # BB#0:
1450; X64-NEXT: movaps (%rdi), %xmm0
1451; X64-NEXT: retq
1452 %arg0 = bitcast double* %a0 to <2 x double>*
1453 %res = load <2 x double>, <2 x double>* %arg0, align 16
1454 ret <2 x double> %res
1455}
1456
1457define <2 x double> @test_mm_load_sd(double* %a0) nounwind {
1458; X32-LABEL: test_mm_load_sd:
1459; X32: # BB#0:
1460; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
1461; X32-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
1462; X32-NEXT: retl
1463;
1464; X64-LABEL: test_mm_load_sd:
1465; X64: # BB#0:
1466; X64-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
1467; X64-NEXT: retq
1468 %ld = load double, double* %a0, align 1
1469 %res0 = insertelement <2 x double> undef, double %ld, i32 0
1470 %res1 = insertelement <2 x double> %res0, double 0.0, i32 1
1471 ret <2 x double> %res1
1472}
1473
1474define <2 x i64> @test_mm_load_si128(<2 x i64>* %a0) nounwind {
1475; X32-LABEL: test_mm_load_si128:
1476; X32: # BB#0:
1477; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
1478; X32-NEXT: movaps (%eax), %xmm0
1479; X32-NEXT: retl
1480;
1481; X64-LABEL: test_mm_load_si128:
1482; X64: # BB#0:
1483; X64-NEXT: movaps (%rdi), %xmm0
1484; X64-NEXT: retq
1485 %res = load <2 x i64>, <2 x i64>* %a0, align 16
1486 ret <2 x i64> %res
1487}
1488
1489define <2 x double> @test_mm_load1_pd(double* %a0) nounwind {
1490; X32-LABEL: test_mm_load1_pd:
1491; X32: # BB#0:
1492; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
1493; X32-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
1494; X32-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0,0]
1495; X32-NEXT: retl
1496;
1497; X64-LABEL: test_mm_load1_pd:
1498; X64: # BB#0:
1499; X64-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
1500; X64-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0,0]
1501; X64-NEXT: retq
1502 %ld = load double, double* %a0, align 8
1503 %res0 = insertelement <2 x double> undef, double %ld, i32 0
1504 %res1 = insertelement <2 x double> %res0, double %ld, i32 1
1505 ret <2 x double> %res1
1506}
1507
1508define <2 x double> @test_mm_loadh_pd(<2 x double> %a0, double* %a1) nounwind {
1509; X32-LABEL: test_mm_loadh_pd:
1510; X32: # BB#0:
1511; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
1512; X32-NEXT: movhpd {{.*#+}} xmm0 = xmm0[0],mem[0]
1513; X32-NEXT: retl
1514;
1515; X64-LABEL: test_mm_loadh_pd:
1516; X64: # BB#0:
1517; X64-NEXT: movhpd {{.*#+}} xmm0 = xmm0[0],mem[0]
1518; X64-NEXT: retq
1519 %ld = load double, double* %a1, align 8
1520 %res = insertelement <2 x double> %a0, double %ld, i32 1
1521 ret <2 x double> %res
1522}
1523
1524define <2 x i64> @test_mm_loadl_epi64(<2 x i64> %a0, <2 x i64>* %a1) nounwind {
1525; X32-LABEL: test_mm_loadl_epi64:
1526; X32: # BB#0:
1527; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
1528; X32-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
1529; X32-NEXT: retl
1530;
1531; X64-LABEL: test_mm_loadl_epi64:
1532; X64: # BB#0:
1533; X64-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
1534; X64-NEXT: retq
1535 %bc = bitcast <2 x i64>* %a1 to i64*
1536 %ld = load i64, i64* %bc, align 1
1537 %res0 = insertelement <2 x i64> undef, i64 %ld, i32 0
1538 %res1 = insertelement <2 x i64> %res0, i64 0, i32 1
1539 ret <2 x i64> %res1
1540}
1541
1542define <2 x double> @test_mm_loadl_pd(<2 x double> %a0, double* %a1) nounwind {
1543; X32-LABEL: test_mm_loadl_pd:
1544; X32: # BB#0:
1545; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
1546; X32-NEXT: movlpd {{.*#+}} xmm0 = mem[0],xmm0[1]
1547; X32-NEXT: retl
1548;
1549; X64-LABEL: test_mm_loadl_pd:
1550; X64: # BB#0:
1551; X64-NEXT: movlpd {{.*#+}} xmm0 = mem[0],xmm0[1]
1552; X64-NEXT: retq
1553 %ld = load double, double* %a1, align 8
1554 %res = insertelement <2 x double> %a0, double %ld, i32 0
1555 ret <2 x double> %res
1556}
1557
1558define <2 x double> @test_mm_loadr_pd(double* %a0) nounwind {
1559; X32-LABEL: test_mm_loadr_pd:
1560; X32: # BB#0:
1561; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
1562; X32-NEXT: movapd (%eax), %xmm0
1563; X32-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1,0]
1564; X32-NEXT: retl
1565;
1566; X64-LABEL: test_mm_loadr_pd:
1567; X64: # BB#0:
1568; X64-NEXT: movapd (%rdi), %xmm0
1569; X64-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1,0]
1570; X64-NEXT: retq
1571 %arg0 = bitcast double* %a0 to <2 x double>*
1572 %ld = load <2 x double>, <2 x double>* %arg0, align 16
1573 %res = shufflevector <2 x double> %ld, <2 x double> undef, <2 x i32> <i32 1, i32 0>
1574 ret <2 x double> %res
1575}
1576
1577define <2 x double> @test_mm_loadu_pd(double* %a0) nounwind {
1578; X32-LABEL: test_mm_loadu_pd:
1579; X32: # BB#0:
1580; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
1581; X32-NEXT: movups (%eax), %xmm0
1582; X32-NEXT: retl
1583;
1584; X64-LABEL: test_mm_loadu_pd:
1585; X64: # BB#0:
1586; X64-NEXT: movups (%rdi), %xmm0
1587; X64-NEXT: retq
1588 %arg0 = bitcast double* %a0 to <2 x double>*
1589 %res = load <2 x double>, <2 x double>* %arg0, align 1
1590 ret <2 x double> %res
1591}
1592
1593define <2 x i64> @test_mm_loadu_si128(<2 x i64>* %a0) nounwind {
1594; X32-LABEL: test_mm_loadu_si128:
1595; X32: # BB#0:
1596; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
1597; X32-NEXT: movups (%eax), %xmm0
1598; X32-NEXT: retl
1599;
1600; X64-LABEL: test_mm_loadu_si128:
1601; X64: # BB#0:
1602; X64-NEXT: movups (%rdi), %xmm0
1603; X64-NEXT: retq
1604 %res = load <2 x i64>, <2 x i64>* %a0, align 1
1605 ret <2 x i64> %res
1606}
1607
1608define <2 x i64> @test_mm_madd_epi16(<2 x i64> %a0, <2 x i64> %a1) nounwind {
1609; X32-LABEL: test_mm_madd_epi16:
1610; X32: # BB#0:
1611; X32-NEXT: pmaddwd %xmm1, %xmm0
1612; X32-NEXT: retl
1613;
1614; X64-LABEL: test_mm_madd_epi16:
1615; X64: # BB#0:
1616; X64-NEXT: pmaddwd %xmm1, %xmm0
1617; X64-NEXT: retq
1618 %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
1619 %arg1 = bitcast <2 x i64> %a1 to <8 x i16>
1620 %res = call <4 x i32> @llvm.x86.sse2.pmadd.wd(<8 x i16> %arg0, <8 x i16> %arg1)
1621 %bc = bitcast <4 x i32> %res to <2 x i64>
1622 ret <2 x i64> %bc
1623}
1624declare <4 x i32> @llvm.x86.sse2.pmadd.wd(<8 x i16>, <8 x i16>) nounwind readnone
1625
1626define void @test_mm_maskmoveu_si128(<2 x i64> %a0, <2 x i64> %a1, i8* %a2) nounwind {
1627; X32-LABEL: test_mm_maskmoveu_si128:
1628; X32: # BB#0:
1629; X32-NEXT: pushl %edi
1630; X32-NEXT: movl {{[0-9]+}}(%esp), %edi
1631; X32-NEXT: maskmovdqu %xmm1, %xmm0
1632; X32-NEXT: popl %edi
1633; X32-NEXT: retl
1634;
1635; X64-LABEL: test_mm_maskmoveu_si128:
1636; X64: # BB#0:
1637; X64-NEXT: maskmovdqu %xmm1, %xmm0
1638; X64-NEXT: retq
1639 %arg0 = bitcast <2 x i64> %a0 to <16 x i8>
1640 %arg1 = bitcast <2 x i64> %a1 to <16 x i8>
1641 call void @llvm.x86.sse2.maskmov.dqu(<16 x i8> %arg0, <16 x i8> %arg1, i8* %a2)
1642 ret void
1643}
1644declare void @llvm.x86.sse2.maskmov.dqu(<16 x i8>, <16 x i8>, i8*) nounwind
1645
1646define <2 x i64> @test_mm_max_epi16(<2 x i64> %a0, <2 x i64> %a1) nounwind {
1647; X32-LABEL: test_mm_max_epi16:
1648; X32: # BB#0:
1649; X32-NEXT: pmaxsw %xmm1, %xmm0
1650; X32-NEXT: retl
1651;
1652; X64-LABEL: test_mm_max_epi16:
1653; X64: # BB#0:
1654; X64-NEXT: pmaxsw %xmm1, %xmm0
1655; X64-NEXT: retq
1656 %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
1657 %arg1 = bitcast <2 x i64> %a1 to <8 x i16>
Sanjay Patela6c6f092016-06-15 17:17:27 +00001658 %cmp = icmp sgt <8 x i16> %arg0, %arg1
1659 %sel = select <8 x i1> %cmp, <8 x i16> %arg0, <8 x i16> %arg1
1660 %bc = bitcast <8 x i16> %sel to <2 x i64>
Simon Pilgrim5a0d7282016-05-18 18:00:43 +00001661 ret <2 x i64> %bc
1662}
Simon Pilgrim5a0d7282016-05-18 18:00:43 +00001663
1664define <2 x i64> @test_mm_max_epu8(<2 x i64> %a0, <2 x i64> %a1) nounwind {
1665; X32-LABEL: test_mm_max_epu8:
1666; X32: # BB#0:
1667; X32-NEXT: pmaxub %xmm1, %xmm0
1668; X32-NEXT: retl
1669;
1670; X64-LABEL: test_mm_max_epu8:
1671; X64: # BB#0:
1672; X64-NEXT: pmaxub %xmm1, %xmm0
1673; X64-NEXT: retq
1674 %arg0 = bitcast <2 x i64> %a0 to <16 x i8>
1675 %arg1 = bitcast <2 x i64> %a1 to <16 x i8>
Sanjay Patela6c6f092016-06-15 17:17:27 +00001676 %cmp = icmp ugt <16 x i8> %arg0, %arg1
1677 %sel = select <16 x i1> %cmp, <16 x i8> %arg0, <16 x i8> %arg1
1678 %bc = bitcast <16 x i8> %sel to <2 x i64>
Simon Pilgrim5a0d7282016-05-18 18:00:43 +00001679 ret <2 x i64> %bc
1680}
Simon Pilgrim5a0d7282016-05-18 18:00:43 +00001681
1682define <2 x double> @test_mm_max_pd(<2 x double> %a0, <2 x double> %a1) nounwind {
1683; X32-LABEL: test_mm_max_pd:
1684; X32: # BB#0:
1685; X32-NEXT: maxpd %xmm1, %xmm0
1686; X32-NEXT: retl
1687;
1688; X64-LABEL: test_mm_max_pd:
1689; X64: # BB#0:
1690; X64-NEXT: maxpd %xmm1, %xmm0
1691; X64-NEXT: retq
1692 %res = call <2 x double> @llvm.x86.sse2.max.pd(<2 x double> %a0, <2 x double> %a1)
1693 ret <2 x double> %res
1694}
1695declare <2 x double> @llvm.x86.sse2.max.pd(<2 x double>, <2 x double>) nounwind readnone
1696
1697define <2 x double> @test_mm_max_sd(<2 x double> %a0, <2 x double> %a1) nounwind {
1698; X32-LABEL: test_mm_max_sd:
1699; X32: # BB#0:
1700; X32-NEXT: maxsd %xmm1, %xmm0
1701; X32-NEXT: retl
1702;
1703; X64-LABEL: test_mm_max_sd:
1704; X64: # BB#0:
1705; X64-NEXT: maxsd %xmm1, %xmm0
1706; X64-NEXT: retq
1707 %res = call <2 x double> @llvm.x86.sse2.max.sd(<2 x double> %a0, <2 x double> %a1)
1708 ret <2 x double> %res
1709}
1710declare <2 x double> @llvm.x86.sse2.max.sd(<2 x double>, <2 x double>) nounwind readnone
1711
1712define void @test_mm_mfence() nounwind {
1713; X32-LABEL: test_mm_mfence:
1714; X32: # BB#0:
1715; X32-NEXT: mfence
1716; X32-NEXT: retl
1717;
1718; X64-LABEL: test_mm_mfence:
1719; X64: # BB#0:
1720; X64-NEXT: mfence
1721; X64-NEXT: retq
1722 call void @llvm.x86.sse2.mfence()
1723 ret void
1724}
1725declare void @llvm.x86.sse2.mfence() nounwind readnone
1726
1727define <2 x i64> @test_mm_min_epi16(<2 x i64> %a0, <2 x i64> %a1) nounwind {
1728; X32-LABEL: test_mm_min_epi16:
1729; X32: # BB#0:
1730; X32-NEXT: pminsw %xmm1, %xmm0
1731; X32-NEXT: retl
1732;
1733; X64-LABEL: test_mm_min_epi16:
1734; X64: # BB#0:
1735; X64-NEXT: pminsw %xmm1, %xmm0
1736; X64-NEXT: retq
1737 %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
1738 %arg1 = bitcast <2 x i64> %a1 to <8 x i16>
Sanjay Patela6c6f092016-06-15 17:17:27 +00001739 %cmp = icmp slt <8 x i16> %arg0, %arg1
1740 %sel = select <8 x i1> %cmp, <8 x i16> %arg0, <8 x i16> %arg1
1741 %bc = bitcast <8 x i16> %sel to <2 x i64>
Simon Pilgrim5a0d7282016-05-18 18:00:43 +00001742 ret <2 x i64> %bc
1743}
Simon Pilgrim5a0d7282016-05-18 18:00:43 +00001744
1745define <2 x i64> @test_mm_min_epu8(<2 x i64> %a0, <2 x i64> %a1) nounwind {
1746; X32-LABEL: test_mm_min_epu8:
1747; X32: # BB#0:
1748; X32-NEXT: pminub %xmm1, %xmm0
1749; X32-NEXT: retl
1750;
1751; X64-LABEL: test_mm_min_epu8:
1752; X64: # BB#0:
1753; X64-NEXT: pminub %xmm1, %xmm0
1754; X64-NEXT: retq
1755 %arg0 = bitcast <2 x i64> %a0 to <16 x i8>
1756 %arg1 = bitcast <2 x i64> %a1 to <16 x i8>
Sanjay Patela6c6f092016-06-15 17:17:27 +00001757 %cmp = icmp ult <16 x i8> %arg0, %arg1
1758 %sel = select <16 x i1> %cmp, <16 x i8> %arg0, <16 x i8> %arg1
1759 %bc = bitcast <16 x i8> %sel to <2 x i64>
Simon Pilgrim5a0d7282016-05-18 18:00:43 +00001760 ret <2 x i64> %bc
1761}
Simon Pilgrim5a0d7282016-05-18 18:00:43 +00001762
1763define <2 x double> @test_mm_min_pd(<2 x double> %a0, <2 x double> %a1) nounwind {
1764; X32-LABEL: test_mm_min_pd:
1765; X32: # BB#0:
1766; X32-NEXT: minpd %xmm1, %xmm0
1767; X32-NEXT: retl
1768;
1769; X64-LABEL: test_mm_min_pd:
1770; X64: # BB#0:
1771; X64-NEXT: minpd %xmm1, %xmm0
1772; X64-NEXT: retq
1773 %res = call <2 x double> @llvm.x86.sse2.min.pd(<2 x double> %a0, <2 x double> %a1)
1774 ret <2 x double> %res
1775}
1776declare <2 x double> @llvm.x86.sse2.min.pd(<2 x double>, <2 x double>) nounwind readnone
1777
1778define <2 x double> @test_mm_min_sd(<2 x double> %a0, <2 x double> %a1) nounwind {
1779; X32-LABEL: test_mm_min_sd:
1780; X32: # BB#0:
1781; X32-NEXT: minsd %xmm1, %xmm0
1782; X32-NEXT: retl
1783;
1784; X64-LABEL: test_mm_min_sd:
1785; X64: # BB#0:
1786; X64-NEXT: minsd %xmm1, %xmm0
1787; X64-NEXT: retq
1788 %res = call <2 x double> @llvm.x86.sse2.min.sd(<2 x double> %a0, <2 x double> %a1)
1789 ret <2 x double> %res
1790}
1791declare <2 x double> @llvm.x86.sse2.min.sd(<2 x double>, <2 x double>) nounwind readnone
1792
Simon Pilgrim47825fa2016-05-19 11:59:57 +00001793define <2 x i64> @test_mm_move_epi64(<2 x i64> %a0) nounwind {
1794; X32-LABEL: test_mm_move_epi64:
1795; X32: # BB#0:
1796; X32-NEXT: movq {{.*#+}} xmm0 = xmm0[0],zero
1797; X32-NEXT: retl
1798;
1799; X64-LABEL: test_mm_move_epi64:
1800; X64: # BB#0:
1801; X64-NEXT: movq {{.*#+}} xmm0 = xmm0[0],zero
1802; X64-NEXT: retq
1803 %res = shufflevector <2 x i64> %a0, <2 x i64> zeroinitializer, <2 x i32> <i32 0, i32 2>
1804 ret <2 x i64> %res
1805}
1806
1807define <2 x double> @test_mm_move_sd(<2 x double> %a0, <2 x double> %a1) nounwind {
1808; X32-LABEL: test_mm_move_sd:
1809; X32: # BB#0:
1810; X32-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
1811; X32-NEXT: retl
1812;
1813; X64-LABEL: test_mm_move_sd:
1814; X64: # BB#0:
1815; X64-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
1816; X64-NEXT: retq
1817 %ext0 = extractelement <2 x double> %a1, i32 0
1818 %res0 = insertelement <2 x double> undef, double %ext0, i32 0
1819 %ext1 = extractelement <2 x double> %a0, i32 1
1820 %res1 = insertelement <2 x double> %res0, double %ext1, i32 1
1821 ret <2 x double> %res1
1822}
1823
Simon Pilgrim5a0d7282016-05-18 18:00:43 +00001824define i32 @test_mm_movemask_epi8(<2 x i64> %a0) nounwind {
1825; X32-LABEL: test_mm_movemask_epi8:
1826; X32: # BB#0:
1827; X32-NEXT: pmovmskb %xmm0, %eax
1828; X32-NEXT: retl
1829;
1830; X64-LABEL: test_mm_movemask_epi8:
1831; X64: # BB#0:
1832; X64-NEXT: pmovmskb %xmm0, %eax
1833; X64-NEXT: retq
1834 %arg0 = bitcast <2 x i64> %a0 to <16 x i8>
1835 %res = call i32 @llvm.x86.sse2.pmovmskb.128(<16 x i8> %arg0)
1836 ret i32 %res
1837}
1838declare i32 @llvm.x86.sse2.pmovmskb.128(<16 x i8>) nounwind readnone
1839
1840define i32 @test_mm_movemask_pd(<2 x double> %a0) nounwind {
1841; X32-LABEL: test_mm_movemask_pd:
1842; X32: # BB#0:
1843; X32-NEXT: movmskpd %xmm0, %eax
1844; X32-NEXT: retl
1845;
1846; X64-LABEL: test_mm_movemask_pd:
1847; X64: # BB#0:
1848; X64-NEXT: movmskpd %xmm0, %eax
1849; X64-NEXT: retq
1850 %res = call i32 @llvm.x86.sse2.movmsk.pd(<2 x double> %a0)
1851 ret i32 %res
1852}
1853declare i32 @llvm.x86.sse2.movmsk.pd(<2 x double>) nounwind readnone
1854
1855define <2 x i64> @test_mm_mul_epu32(<2 x i64> %a0, <2 x i64> %a1) {
1856; X32-LABEL: test_mm_mul_epu32:
1857; X32: # BB#0:
1858; X32-NEXT: pmuludq %xmm1, %xmm0
1859; X32-NEXT: retl
1860;
1861; X64-LABEL: test_mm_mul_epu32:
1862; X64: # BB#0:
1863; X64-NEXT: pmuludq %xmm1, %xmm0
1864; X64-NEXT: retq
1865 %arg0 = bitcast <2 x i64> %a0 to <4 x i32>
1866 %arg1 = bitcast <2 x i64> %a1 to <4 x i32>
1867 %res = call <2 x i64> @llvm.x86.sse2.pmulu.dq(<4 x i32> %arg0, <4 x i32> %arg1)
1868 ret <2 x i64> %res
1869}
1870declare <2 x i64> @llvm.x86.sse2.pmulu.dq(<4 x i32>, <4 x i32>) nounwind readnone
1871
1872define <2 x double> @test_mm_mul_pd(<2 x double> %a0, <2 x double> %a1) nounwind {
1873; X32-LABEL: test_mm_mul_pd:
1874; X32: # BB#0:
1875; X32-NEXT: mulpd %xmm1, %xmm0
1876; X32-NEXT: retl
1877;
1878; X64-LABEL: test_mm_mul_pd:
1879; X64: # BB#0:
1880; X64-NEXT: mulpd %xmm1, %xmm0
1881; X64-NEXT: retq
1882 %res = fmul <2 x double> %a0, %a1
1883 ret <2 x double> %res
1884}
1885
1886define <2 x double> @test_mm_mul_sd(<2 x double> %a0, <2 x double> %a1) nounwind {
1887; X32-LABEL: test_mm_mul_sd:
1888; X32: # BB#0:
1889; X32-NEXT: mulsd %xmm1, %xmm0
1890; X32-NEXT: retl
1891;
1892; X64-LABEL: test_mm_mul_sd:
1893; X64: # BB#0:
1894; X64-NEXT: mulsd %xmm1, %xmm0
1895; X64-NEXT: retq
1896 %ext0 = extractelement <2 x double> %a0, i32 0
1897 %ext1 = extractelement <2 x double> %a1, i32 0
1898 %fmul = fmul double %ext0, %ext1
1899 %res = insertelement <2 x double> %a0, double %fmul, i32 0
1900 ret <2 x double> %res
1901}
1902
1903define <2 x i64> @test_mm_mulhi_epi16(<2 x i64> %a0, <2 x i64> %a1) {
1904; X32-LABEL: test_mm_mulhi_epi16:
1905; X32: # BB#0:
1906; X32-NEXT: pmulhw %xmm1, %xmm0
1907; X32-NEXT: retl
1908;
1909; X64-LABEL: test_mm_mulhi_epi16:
1910; X64: # BB#0:
1911; X64-NEXT: pmulhw %xmm1, %xmm0
1912; X64-NEXT: retq
1913 %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
1914 %arg1 = bitcast <2 x i64> %a1 to <8 x i16>
1915 %res = call <8 x i16> @llvm.x86.sse2.pmulh.w(<8 x i16> %arg0, <8 x i16> %arg1)
1916 %bc = bitcast <8 x i16> %res to <2 x i64>
1917 ret <2 x i64> %bc
1918}
1919declare <8 x i16> @llvm.x86.sse2.pmulh.w(<8 x i16>, <8 x i16>) nounwind readnone
1920
1921define <2 x i64> @test_mm_mulhi_epu16(<2 x i64> %a0, <2 x i64> %a1) {
1922; X32-LABEL: test_mm_mulhi_epu16:
1923; X32: # BB#0:
1924; X32-NEXT: pmulhuw %xmm1, %xmm0
1925; X32-NEXT: retl
1926;
1927; X64-LABEL: test_mm_mulhi_epu16:
1928; X64: # BB#0:
1929; X64-NEXT: pmulhuw %xmm1, %xmm0
1930; X64-NEXT: retq
1931 %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
1932 %arg1 = bitcast <2 x i64> %a1 to <8 x i16>
1933 %res = call <8 x i16> @llvm.x86.sse2.pmulhu.w(<8 x i16> %arg0, <8 x i16> %arg1)
1934 %bc = bitcast <8 x i16> %res to <2 x i64>
1935 ret <2 x i64> %bc
1936}
1937declare <8 x i16> @llvm.x86.sse2.pmulhu.w(<8 x i16>, <8 x i16>) nounwind readnone
1938
1939define <2 x i64> @test_mm_mullo_epi16(<2 x i64> %a0, <2 x i64> %a1) {
1940; X32-LABEL: test_mm_mullo_epi16:
1941; X32: # BB#0:
1942; X32-NEXT: pmullw %xmm1, %xmm0
1943; X32-NEXT: retl
1944;
1945; X64-LABEL: test_mm_mullo_epi16:
1946; X64: # BB#0:
1947; X64-NEXT: pmullw %xmm1, %xmm0
1948; X64-NEXT: retq
1949 %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
1950 %arg1 = bitcast <2 x i64> %a1 to <8 x i16>
1951 %res = mul <8 x i16> %arg0, %arg1
1952 %bc = bitcast <8 x i16> %res to <2 x i64>
1953 ret <2 x i64> %bc
1954}
1955
1956define <2 x double> @test_mm_or_pd(<2 x double> %a0, <2 x double> %a1) nounwind {
1957; X32-LABEL: test_mm_or_pd:
1958; X32: # BB#0:
1959; X32-NEXT: orps %xmm1, %xmm0
1960; X32-NEXT: retl
1961;
1962; X64-LABEL: test_mm_or_pd:
1963; X64: # BB#0:
1964; X64-NEXT: orps %xmm1, %xmm0
1965; X64-NEXT: retq
1966 %arg0 = bitcast <2 x double> %a0 to <4 x i32>
1967 %arg1 = bitcast <2 x double> %a1 to <4 x i32>
1968 %res = or <4 x i32> %arg0, %arg1
1969 %bc = bitcast <4 x i32> %res to <2 x double>
1970 ret <2 x double> %bc
1971}
1972
1973define <2 x i64> @test_mm_or_si128(<2 x i64> %a0, <2 x i64> %a1) nounwind {
1974; X32-LABEL: test_mm_or_si128:
1975; X32: # BB#0:
1976; X32-NEXT: orps %xmm1, %xmm0
1977; X32-NEXT: retl
1978;
1979; X64-LABEL: test_mm_or_si128:
1980; X64: # BB#0:
1981; X64-NEXT: orps %xmm1, %xmm0
1982; X64-NEXT: retq
1983 %res = or <2 x i64> %a0, %a1
1984 ret <2 x i64> %res
1985}
1986
1987define <2 x i64> @test_mm_packs_epi16(<2 x i64> %a0, <2 x i64> %a1) {
1988; X32-LABEL: test_mm_packs_epi16:
1989; X32: # BB#0:
1990; X32-NEXT: packsswb %xmm1, %xmm0
1991; X32-NEXT: retl
1992;
1993; X64-LABEL: test_mm_packs_epi16:
1994; X64: # BB#0:
1995; X64-NEXT: packsswb %xmm1, %xmm0
1996; X64-NEXT: retq
1997 %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
1998 %arg1 = bitcast <2 x i64> %a1 to <8 x i16>
1999 %res = call <16 x i8> @llvm.x86.sse2.packsswb.128(<8 x i16> %arg0, <8 x i16> %arg1)
2000 %bc = bitcast <16 x i8> %res to <2 x i64>
2001 ret <2 x i64> %bc
2002}
2003declare <16 x i8> @llvm.x86.sse2.packsswb.128(<8 x i16>, <8 x i16>) nounwind readnone
2004
2005define <2 x i64> @test_mm_packs_epi32(<2 x i64> %a0, <2 x i64> %a1) {
2006; X32-LABEL: test_mm_packs_epi32:
2007; X32: # BB#0:
2008; X32-NEXT: packssdw %xmm1, %xmm0
2009; X32-NEXT: retl
2010;
2011; X64-LABEL: test_mm_packs_epi32:
2012; X64: # BB#0:
2013; X64-NEXT: packssdw %xmm1, %xmm0
2014; X64-NEXT: retq
2015 %arg0 = bitcast <2 x i64> %a0 to <4 x i32>
2016 %arg1 = bitcast <2 x i64> %a1 to <4 x i32>
2017 %res = call <8 x i16> @llvm.x86.sse2.packssdw.128(<4 x i32> %arg0, <4 x i32> %arg1)
2018 %bc = bitcast <8 x i16> %res to <2 x i64>
2019 ret <2 x i64> %bc
2020}
2021declare <8 x i16> @llvm.x86.sse2.packssdw.128(<4 x i32>, <4 x i32>) nounwind readnone
2022
2023define <2 x i64> @test_mm_packus_epi16(<2 x i64> %a0, <2 x i64> %a1) {
2024; X32-LABEL: test_mm_packus_epi16:
2025; X32: # BB#0:
2026; X32-NEXT: packuswb %xmm1, %xmm0
2027; X32-NEXT: retl
2028;
2029; X64-LABEL: test_mm_packus_epi16:
2030; X64: # BB#0:
2031; X64-NEXT: packuswb %xmm1, %xmm0
2032; X64-NEXT: retq
2033 %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
2034 %arg1 = bitcast <2 x i64> %a1 to <8 x i16>
2035 %res = call <16 x i8> @llvm.x86.sse2.packuswb.128(<8 x i16> %arg0, <8 x i16> %arg1)
2036 %bc = bitcast <16 x i8> %res to <2 x i64>
2037 ret <2 x i64> %bc
2038}
2039declare <16 x i8> @llvm.x86.sse2.packuswb.128(<8 x i16>, <8 x i16>) nounwind readnone
2040
2041define void @test_mm_pause() nounwind {
2042; X32-LABEL: test_mm_pause:
2043; X32: # BB#0:
2044; X32-NEXT: pause
2045; X32-NEXT: retl
2046;
2047; X64-LABEL: test_mm_pause:
2048; X64: # BB#0:
2049; X64-NEXT: pause
2050; X64-NEXT: retq
2051 call void @llvm.x86.sse2.pause()
2052 ret void
2053}
2054declare void @llvm.x86.sse2.pause() nounwind readnone
2055
2056define <2 x i64> @test_mm_sad_epu8(<2 x i64> %a0, <2 x i64> %a1) nounwind {
2057; X32-LABEL: test_mm_sad_epu8:
2058; X32: # BB#0:
2059; X32-NEXT: psadbw %xmm1, %xmm0
2060; X32-NEXT: retl
2061;
2062; X64-LABEL: test_mm_sad_epu8:
2063; X64: # BB#0:
2064; X64-NEXT: psadbw %xmm1, %xmm0
2065; X64-NEXT: retq
2066 %arg0 = bitcast <2 x i64> %a0 to <16 x i8>
2067 %arg1 = bitcast <2 x i64> %a1 to <16 x i8>
2068 %res = call <2 x i64> @llvm.x86.sse2.psad.bw(<16 x i8> %arg0, <16 x i8> %arg1)
2069 ret <2 x i64> %res
2070}
2071declare <2 x i64> @llvm.x86.sse2.psad.bw(<16 x i8>, <16 x i8>) nounwind readnone
2072
Simon Pilgrim01809e02016-05-19 10:58:54 +00002073define <2 x i64> @test_mm_set_epi8(i8 %a0, i8 %a1, i8 %a2, i8 %a3, i8 %a4, i8 %a5, i8 %a6, i8 %a7, i8 %a8, i8 %a9, i8 %a10, i8 %a11, i8 %a12, i8 %a13, i8 %a14, i8 %a15) nounwind {
2074; X32-LABEL: test_mm_set_epi8:
2075; X32: # BB#0:
2076; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
2077; X32-NEXT: movd %eax, %xmm0
2078; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
2079; X32-NEXT: movd %eax, %xmm1
2080; X32-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
2081; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
2082; X32-NEXT: movd %eax, %xmm0
2083; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
2084; X32-NEXT: movd %eax, %xmm2
2085; X32-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
2086; X32-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3],xmm2[4],xmm1[4],xmm2[5],xmm1[5],xmm2[6],xmm1[6],xmm2[7],xmm1[7]
2087; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
2088; X32-NEXT: movd %eax, %xmm0
2089; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
2090; X32-NEXT: movd %eax, %xmm3
2091; X32-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3],xmm3[4],xmm0[4],xmm3[5],xmm0[5],xmm3[6],xmm0[6],xmm3[7],xmm0[7]
2092; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
2093; X32-NEXT: movd %eax, %xmm0
2094; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
2095; X32-NEXT: movd %eax, %xmm1
2096; X32-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
2097; X32-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1],xmm1[2],xmm3[2],xmm1[3],xmm3[3],xmm1[4],xmm3[4],xmm1[5],xmm3[5],xmm1[6],xmm3[6],xmm1[7],xmm3[7]
2098; X32-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
2099; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
2100; X32-NEXT: movd %eax, %xmm0
2101; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
2102; X32-NEXT: movd %eax, %xmm2
2103; X32-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
2104; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
2105; X32-NEXT: movd %eax, %xmm0
2106; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
2107; X32-NEXT: movd %eax, %xmm3
2108; X32-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3],xmm3[4],xmm0[4],xmm3[5],xmm0[5],xmm3[6],xmm0[6],xmm3[7],xmm0[7]
2109; X32-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3],xmm3[4],xmm2[4],xmm3[5],xmm2[5],xmm3[6],xmm2[6],xmm3[7],xmm2[7]
2110; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
2111; X32-NEXT: movd %eax, %xmm0
2112; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
2113; X32-NEXT: movd %eax, %xmm2
2114; X32-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
2115; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
2116; X32-NEXT: movd %eax, %xmm4
2117; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
2118; X32-NEXT: movd %eax, %xmm0
2119; X32-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1],xmm0[2],xmm4[2],xmm0[3],xmm4[3],xmm0[4],xmm4[4],xmm0[5],xmm4[5],xmm0[6],xmm4[6],xmm0[7],xmm4[7]
2120; X32-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7]
2121; X32-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3],xmm0[4],xmm3[4],xmm0[5],xmm3[5],xmm0[6],xmm3[6],xmm0[7],xmm3[7]
2122; X32-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
2123; X32-NEXT: retl
2124;
2125; X64-LABEL: test_mm_set_epi8:
2126; X64: # BB#0:
2127; X64-NEXT: movzbl %dil, %eax
2128; X64-NEXT: movd %eax, %xmm0
2129; X64-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
2130; X64-NEXT: movd %eax, %xmm1
2131; X64-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
2132; X64-NEXT: movzbl %r8b, %eax
2133; X64-NEXT: movd %eax, %xmm0
2134; X64-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
2135; X64-NEXT: movd %eax, %xmm2
2136; X64-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
2137; X64-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3],xmm2[4],xmm1[4],xmm2[5],xmm1[5],xmm2[6],xmm1[6],xmm2[7],xmm1[7]
2138; X64-NEXT: movzbl %dl, %eax
2139; X64-NEXT: movd %eax, %xmm0
2140; X64-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
2141; X64-NEXT: movd %eax, %xmm3
2142; X64-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3],xmm3[4],xmm0[4],xmm3[5],xmm0[5],xmm3[6],xmm0[6],xmm3[7],xmm0[7]
2143; X64-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
2144; X64-NEXT: movd %eax, %xmm0
2145; X64-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
2146; X64-NEXT: movd %eax, %xmm1
2147; X64-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
2148; X64-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1],xmm1[2],xmm3[2],xmm1[3],xmm3[3],xmm1[4],xmm3[4],xmm1[5],xmm3[5],xmm1[6],xmm3[6],xmm1[7],xmm3[7]
2149; X64-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
2150; X64-NEXT: movzbl %sil, %eax
2151; X64-NEXT: movd %eax, %xmm0
2152; X64-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
2153; X64-NEXT: movd %eax, %xmm2
2154; X64-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
2155; X64-NEXT: movzbl %r9b, %eax
2156; X64-NEXT: movd %eax, %xmm0
2157; X64-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
2158; X64-NEXT: movd %eax, %xmm3
2159; X64-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3],xmm3[4],xmm0[4],xmm3[5],xmm0[5],xmm3[6],xmm0[6],xmm3[7],xmm0[7]
2160; X64-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3],xmm3[4],xmm2[4],xmm3[5],xmm2[5],xmm3[6],xmm2[6],xmm3[7],xmm2[7]
2161; X64-NEXT: movzbl %cl, %eax
2162; X64-NEXT: movd %eax, %xmm0
2163; X64-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
2164; X64-NEXT: movd %eax, %xmm2
2165; X64-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
2166; X64-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
2167; X64-NEXT: movd %eax, %xmm4
2168; X64-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
2169; X64-NEXT: movd %eax, %xmm0
2170; X64-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1],xmm0[2],xmm4[2],xmm0[3],xmm4[3],xmm0[4],xmm4[4],xmm0[5],xmm4[5],xmm0[6],xmm4[6],xmm0[7],xmm4[7]
2171; X64-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7]
2172; X64-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3],xmm0[4],xmm3[4],xmm0[5],xmm3[5],xmm0[6],xmm3[6],xmm0[7],xmm3[7]
2173; X64-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
2174; X64-NEXT: retq
2175 %res0 = insertelement <16 x i8> undef, i8 %a15, i32 0
2176 %res1 = insertelement <16 x i8> %res0, i8 %a14, i32 1
2177 %res2 = insertelement <16 x i8> %res1, i8 %a13, i32 2
2178 %res3 = insertelement <16 x i8> %res2, i8 %a12, i32 3
2179 %res4 = insertelement <16 x i8> %res3, i8 %a11, i32 4
2180 %res5 = insertelement <16 x i8> %res4, i8 %a10, i32 5
2181 %res6 = insertelement <16 x i8> %res5, i8 %a9 , i32 6
2182 %res7 = insertelement <16 x i8> %res6, i8 %a8 , i32 7
2183 %res8 = insertelement <16 x i8> %res7, i8 %a7 , i32 8
2184 %res9 = insertelement <16 x i8> %res8, i8 %a6 , i32 9
2185 %res10 = insertelement <16 x i8> %res9, i8 %a5 , i32 10
2186 %res11 = insertelement <16 x i8> %res10, i8 %a4 , i32 11
2187 %res12 = insertelement <16 x i8> %res11, i8 %a3 , i32 12
2188 %res13 = insertelement <16 x i8> %res12, i8 %a2 , i32 13
2189 %res14 = insertelement <16 x i8> %res13, i8 %a1 , i32 14
2190 %res15 = insertelement <16 x i8> %res14, i8 %a0 , i32 15
2191 %res = bitcast <16 x i8> %res15 to <2 x i64>
2192 ret <2 x i64> %res
2193}
2194
2195define <2 x i64> @test_mm_set_epi16(i16 %a0, i16 %a1, i16 %a2, i16 %a3, i16 %a4, i16 %a5, i16 %a6, i16 %a7) nounwind {
2196; X32-LABEL: test_mm_set_epi16:
2197; X32: # BB#0:
2198; X32-NEXT: movw {{[0-9]+}}(%esp), %ax
2199; X32-NEXT: movd %eax, %xmm1
2200; X32-NEXT: movw {{[0-9]+}}(%esp), %ax
2201; X32-NEXT: movd %eax, %xmm2
2202; X32-NEXT: movw {{[0-9]+}}(%esp), %ax
2203; X32-NEXT: movd %eax, %xmm3
2204; X32-NEXT: movw {{[0-9]+}}(%esp), %ax
2205; X32-NEXT: movd %eax, %xmm4
2206; X32-NEXT: movw {{[0-9]+}}(%esp), %ax
2207; X32-NEXT: movd %eax, %xmm5
2208; X32-NEXT: movw {{[0-9]+}}(%esp), %ax
2209; X32-NEXT: movd %eax, %xmm6
2210; X32-NEXT: movw {{[0-9]+}}(%esp), %ax
2211; X32-NEXT: movd %eax, %xmm7
2212; X32-NEXT: movw {{[0-9]+}}(%esp), %ax
2213; X32-NEXT: movd %eax, %xmm0
2214; X32-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3]
2215; X32-NEXT: punpcklwd {{.*#+}} xmm4 = xmm4[0],xmm3[0],xmm4[1],xmm3[1],xmm4[2],xmm3[2],xmm4[3],xmm3[3]
2216; X32-NEXT: punpcklwd {{.*#+}} xmm4 = xmm4[0],xmm2[0],xmm4[1],xmm2[1],xmm4[2],xmm2[2],xmm4[3],xmm2[3]
2217; X32-NEXT: punpcklwd {{.*#+}} xmm6 = xmm6[0],xmm5[0],xmm6[1],xmm5[1],xmm6[2],xmm5[2],xmm6[3],xmm5[3]
2218; X32-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm7[0],xmm0[1],xmm7[1],xmm0[2],xmm7[2],xmm0[3],xmm7[3]
2219; X32-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm6[0],xmm0[1],xmm6[1],xmm0[2],xmm6[2],xmm0[3],xmm6[3]
2220; X32-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1],xmm0[2],xmm4[2],xmm0[3],xmm4[3]
2221; X32-NEXT: retl
2222;
2223; X64-LABEL: test_mm_set_epi16:
2224; X64: # BB#0:
2225; X64-NEXT: movw {{[0-9]+}}(%rsp), %r10w
2226; X64-NEXT: movw {{[0-9]+}}(%rsp), %ax
2227; X64-NEXT: movd %edi, %xmm0
2228; X64-NEXT: movd %r8d, %xmm1
2229; X64-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
2230; X64-NEXT: movd %edx, %xmm0
2231; X64-NEXT: movd %eax, %xmm2
2232; X64-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3]
2233; X64-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3]
2234; X64-NEXT: movd %esi, %xmm0
2235; X64-NEXT: movd %r9d, %xmm1
2236; X64-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
2237; X64-NEXT: movd %ecx, %xmm3
2238; X64-NEXT: movd %r10d, %xmm0
2239; X64-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3]
2240; X64-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
2241; X64-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
2242; X64-NEXT: retq
2243 %res0 = insertelement <8 x i16> undef, i16 %a7, i32 0
2244 %res1 = insertelement <8 x i16> %res0, i16 %a6, i32 1
2245 %res2 = insertelement <8 x i16> %res1, i16 %a5, i32 2
2246 %res3 = insertelement <8 x i16> %res2, i16 %a4, i32 3
2247 %res4 = insertelement <8 x i16> %res3, i16 %a3, i32 4
2248 %res5 = insertelement <8 x i16> %res4, i16 %a2, i32 5
2249 %res6 = insertelement <8 x i16> %res5, i16 %a1, i32 6
2250 %res7 = insertelement <8 x i16> %res6, i16 %a0, i32 7
2251 %res = bitcast <8 x i16> %res7 to <2 x i64>
2252 ret <2 x i64> %res
2253}
2254
2255define <2 x i64> @test_mm_set_epi32(i32 %a0, i32 %a1, i32 %a2, i32 %a3) nounwind {
2256; X32-LABEL: test_mm_set_epi32:
2257; X32: # BB#0:
2258; X32-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
2259; X32-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero
2260; X32-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
2261; X32-NEXT: movd {{.*#+}} xmm2 = mem[0],zero,zero,zero
2262; X32-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
2263; X32-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
2264; X32-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
2265; X32-NEXT: retl
2266;
2267; X64-LABEL: test_mm_set_epi32:
2268; X64: # BB#0:
2269; X64-NEXT: movd %edi, %xmm0
2270; X64-NEXT: movd %edx, %xmm1
2271; X64-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
2272; X64-NEXT: movd %esi, %xmm2
2273; X64-NEXT: movd %ecx, %xmm0
2274; X64-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
2275; X64-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
2276; X64-NEXT: retq
2277 %res0 = insertelement <4 x i32> undef, i32 %a3, i32 0
2278 %res1 = insertelement <4 x i32> %res0, i32 %a2, i32 1
2279 %res2 = insertelement <4 x i32> %res1, i32 %a1, i32 2
2280 %res3 = insertelement <4 x i32> %res2, i32 %a0, i32 3
2281 %res = bitcast <4 x i32> %res3 to <2 x i64>
2282 ret <2 x i64> %res
2283}
2284
2285; TODO test_mm_set_epi64
2286
2287define <2 x i64> @test_mm_set_epi64x(i64 %a0, i64 %a1) nounwind {
2288; X32-LABEL: test_mm_set_epi64x:
2289; X32: # BB#0:
2290; X32-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero
2291; X32-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
2292; X32-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
2293; X32-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero
2294; X32-NEXT: movd {{.*#+}} xmm2 = mem[0],zero,zero,zero
2295; X32-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
2296; X32-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
2297; X32-NEXT: retl
2298;
2299; X64-LABEL: test_mm_set_epi64x:
2300; X64: # BB#0:
2301; X64-NEXT: movd %rdi, %xmm1
2302; X64-NEXT: movd %rsi, %xmm0
2303; X64-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
2304; X64-NEXT: retq
2305 %res0 = insertelement <2 x i64> undef, i64 %a1, i32 0
2306 %res1 = insertelement <2 x i64> %res0, i64 %a0, i32 1
2307 ret <2 x i64> %res1
2308}
2309
2310define <2 x double> @test_mm_set_pd(double %a0, double %a1) nounwind {
2311; X32-LABEL: test_mm_set_pd:
2312; X32: # BB#0:
2313; X32-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
2314; X32-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero
2315; X32-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
2316; X32-NEXT: retl
2317;
2318; X64-LABEL: test_mm_set_pd:
2319; X64: # BB#0:
2320; X64-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0],xmm0[0]
2321; X64-NEXT: movapd %xmm1, %xmm0
2322; X64-NEXT: retq
2323 %res0 = insertelement <2 x double> undef, double %a1, i32 0
2324 %res1 = insertelement <2 x double> %res0, double %a0, i32 1
2325 ret <2 x double> %res1
2326}
2327
2328define <2 x double> @test_mm_set_sd(double %a0) nounwind {
2329; X32-LABEL: test_mm_set_sd:
2330; X32: # BB#0:
2331; X32-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
2332; X32-NEXT: movq {{.*#+}} xmm0 = xmm0[0],zero
2333; X32-NEXT: retl
2334;
2335; X64-LABEL: test_mm_set_sd:
2336; X64: # BB#0:
2337; X64-NEXT: movq {{.*#+}} xmm0 = xmm0[0],zero
2338; X64-NEXT: retq
2339 %res0 = insertelement <2 x double> undef, double %a0, i32 0
2340 %res1 = insertelement <2 x double> %res0, double 0.0, i32 1
2341 ret <2 x double> %res1
2342}
2343
2344define <2 x i64> @test_mm_set1_epi8(i8 %a0) nounwind {
2345; X32-LABEL: test_mm_set1_epi8:
2346; X32: # BB#0:
2347; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
2348; X32-NEXT: movd %eax, %xmm0
2349; X32-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
2350; X32-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
2351; X32-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
2352; X32-NEXT: retl
2353;
2354; X64-LABEL: test_mm_set1_epi8:
2355; X64: # BB#0:
2356; X64-NEXT: movzbl %dil, %eax
2357; X64-NEXT: movd %eax, %xmm0
2358; X64-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
2359; X64-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
2360; X64-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
2361; X64-NEXT: retq
2362 %res0 = insertelement <16 x i8> undef, i8 %a0, i32 0
2363 %res1 = insertelement <16 x i8> %res0, i8 %a0, i32 1
2364 %res2 = insertelement <16 x i8> %res1, i8 %a0, i32 2
2365 %res3 = insertelement <16 x i8> %res2, i8 %a0, i32 3
2366 %res4 = insertelement <16 x i8> %res3, i8 %a0, i32 4
2367 %res5 = insertelement <16 x i8> %res4, i8 %a0, i32 5
2368 %res6 = insertelement <16 x i8> %res5, i8 %a0, i32 6
2369 %res7 = insertelement <16 x i8> %res6, i8 %a0, i32 7
2370 %res8 = insertelement <16 x i8> %res7, i8 %a0, i32 8
2371 %res9 = insertelement <16 x i8> %res8, i8 %a0, i32 9
2372 %res10 = insertelement <16 x i8> %res9, i8 %a0, i32 10
2373 %res11 = insertelement <16 x i8> %res10, i8 %a0, i32 11
2374 %res12 = insertelement <16 x i8> %res11, i8 %a0, i32 12
2375 %res13 = insertelement <16 x i8> %res12, i8 %a0, i32 13
2376 %res14 = insertelement <16 x i8> %res13, i8 %a0, i32 14
2377 %res15 = insertelement <16 x i8> %res14, i8 %a0, i32 15
2378 %res = bitcast <16 x i8> %res15 to <2 x i64>
2379 ret <2 x i64> %res
2380}
2381
2382define <2 x i64> @test_mm_set1_epi16(i16 %a0) nounwind {
2383; X32-LABEL: test_mm_set1_epi16:
2384; X32: # BB#0:
2385; X32-NEXT: movw {{[0-9]+}}(%esp), %ax
2386; X32-NEXT: movd %eax, %xmm0
2387; X32-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
2388; X32-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
2389; X32-NEXT: retl
2390;
2391; X64-LABEL: test_mm_set1_epi16:
2392; X64: # BB#0:
2393; X64-NEXT: movd %edi, %xmm0
2394; X64-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
2395; X64-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
2396; X64-NEXT: retq
2397 %res0 = insertelement <8 x i16> undef, i16 %a0, i32 0
2398 %res1 = insertelement <8 x i16> %res0, i16 %a0, i32 1
2399 %res2 = insertelement <8 x i16> %res1, i16 %a0, i32 2
2400 %res3 = insertelement <8 x i16> %res2, i16 %a0, i32 3
2401 %res4 = insertelement <8 x i16> %res3, i16 %a0, i32 4
2402 %res5 = insertelement <8 x i16> %res4, i16 %a0, i32 5
2403 %res6 = insertelement <8 x i16> %res5, i16 %a0, i32 6
2404 %res7 = insertelement <8 x i16> %res6, i16 %a0, i32 7
2405 %res = bitcast <8 x i16> %res7 to <2 x i64>
2406 ret <2 x i64> %res
2407}
2408
2409define <2 x i64> @test_mm_set1_epi32(i32 %a0) nounwind {
2410; X32-LABEL: test_mm_set1_epi32:
2411; X32: # BB#0:
2412; X32-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
2413; X32-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
2414; X32-NEXT: retl
2415;
2416; X64-LABEL: test_mm_set1_epi32:
2417; X64: # BB#0:
2418; X64-NEXT: movd %edi, %xmm0
2419; X64-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
2420; X64-NEXT: retq
2421 %res0 = insertelement <4 x i32> undef, i32 %a0, i32 0
2422 %res1 = insertelement <4 x i32> %res0, i32 %a0, i32 1
2423 %res2 = insertelement <4 x i32> %res1, i32 %a0, i32 2
2424 %res3 = insertelement <4 x i32> %res2, i32 %a0, i32 3
2425 %res = bitcast <4 x i32> %res3 to <2 x i64>
2426 ret <2 x i64> %res
2427}
2428
2429; TODO test_mm_set1_epi64
2430
2431define <2 x i64> @test_mm_set1_epi64x(i64 %a0) nounwind {
2432; X32-LABEL: test_mm_set1_epi64x:
2433; X32: # BB#0:
2434; X32-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
2435; X32-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
2436; X32-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero
2437; X32-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,0,1,1]
2438; X32-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
2439; X32-NEXT: retl
2440;
2441; X64-LABEL: test_mm_set1_epi64x:
2442; X64: # BB#0:
2443; X64-NEXT: movd %rdi, %xmm0
2444; X64-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
2445; X64-NEXT: retq
2446 %res0 = insertelement <2 x i64> undef, i64 %a0, i32 0
2447 %res1 = insertelement <2 x i64> %res0, i64 %a0, i32 1
2448 ret <2 x i64> %res1
2449}
2450
2451define <2 x double> @test_mm_set1_pd(double %a0) nounwind {
2452; X32-LABEL: test_mm_set1_pd:
2453; X32: # BB#0:
2454; X32-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
2455; X32-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0,0]
2456; X32-NEXT: retl
2457;
2458; X64-LABEL: test_mm_set1_pd:
2459; X64: # BB#0:
2460; X64-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0,0]
2461; X64-NEXT: retq
2462 %res0 = insertelement <2 x double> undef, double %a0, i32 0
2463 %res1 = insertelement <2 x double> %res0, double %a0, i32 1
2464 ret <2 x double> %res1
2465}
2466
2467define <2 x i64> @test_mm_setr_epi8(i8 %a0, i8 %a1, i8 %a2, i8 %a3, i8 %a4, i8 %a5, i8 %a6, i8 %a7, i8 %a8, i8 %a9, i8 %a10, i8 %a11, i8 %a12, i8 %a13, i8 %a14, i8 %a15) nounwind {
2468; X32-LABEL: test_mm_setr_epi8:
2469; X32: # BB#0:
2470; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
2471; X32-NEXT: movd %eax, %xmm0
2472; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
2473; X32-NEXT: movd %eax, %xmm1
2474; X32-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
2475; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
2476; X32-NEXT: movd %eax, %xmm0
2477; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
2478; X32-NEXT: movd %eax, %xmm2
2479; X32-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
2480; X32-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3],xmm2[4],xmm1[4],xmm2[5],xmm1[5],xmm2[6],xmm1[6],xmm2[7],xmm1[7]
2481; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
2482; X32-NEXT: movd %eax, %xmm0
2483; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
2484; X32-NEXT: movd %eax, %xmm3
2485; X32-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3],xmm3[4],xmm0[4],xmm3[5],xmm0[5],xmm3[6],xmm0[6],xmm3[7],xmm0[7]
2486; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
2487; X32-NEXT: movd %eax, %xmm0
2488; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
2489; X32-NEXT: movd %eax, %xmm1
2490; X32-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
2491; X32-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1],xmm1[2],xmm3[2],xmm1[3],xmm3[3],xmm1[4],xmm3[4],xmm1[5],xmm3[5],xmm1[6],xmm3[6],xmm1[7],xmm3[7]
2492; X32-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
2493; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
2494; X32-NEXT: movd %eax, %xmm0
2495; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
2496; X32-NEXT: movd %eax, %xmm2
2497; X32-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
2498; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
2499; X32-NEXT: movd %eax, %xmm0
2500; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
2501; X32-NEXT: movd %eax, %xmm3
2502; X32-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3],xmm3[4],xmm0[4],xmm3[5],xmm0[5],xmm3[6],xmm0[6],xmm3[7],xmm0[7]
2503; X32-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3],xmm3[4],xmm2[4],xmm3[5],xmm2[5],xmm3[6],xmm2[6],xmm3[7],xmm2[7]
2504; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
2505; X32-NEXT: movd %eax, %xmm0
2506; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
2507; X32-NEXT: movd %eax, %xmm2
2508; X32-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
2509; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
2510; X32-NEXT: movd %eax, %xmm4
2511; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
2512; X32-NEXT: movd %eax, %xmm0
2513; X32-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1],xmm0[2],xmm4[2],xmm0[3],xmm4[3],xmm0[4],xmm4[4],xmm0[5],xmm4[5],xmm0[6],xmm4[6],xmm0[7],xmm4[7]
2514; X32-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7]
2515; X32-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3],xmm0[4],xmm3[4],xmm0[5],xmm3[5],xmm0[6],xmm3[6],xmm0[7],xmm3[7]
2516; X32-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
2517; X32-NEXT: retl
2518;
2519; X64-LABEL: test_mm_setr_epi8:
2520; X64: # BB#0:
2521; X64-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
2522; X64-NEXT: movd %eax, %xmm0
2523; X64-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
2524; X64-NEXT: movd %eax, %xmm1
2525; X64-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
2526; X64-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
2527; X64-NEXT: movd %eax, %xmm0
2528; X64-NEXT: movzbl %cl, %eax
2529; X64-NEXT: movd %eax, %xmm2
2530; X64-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
2531; X64-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3],xmm2[4],xmm1[4],xmm2[5],xmm1[5],xmm2[6],xmm1[6],xmm2[7],xmm1[7]
2532; X64-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
2533; X64-NEXT: movd %eax, %xmm0
2534; X64-NEXT: movzbl %r9b, %eax
2535; X64-NEXT: movd %eax, %xmm3
2536; X64-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3],xmm3[4],xmm0[4],xmm3[5],xmm0[5],xmm3[6],xmm0[6],xmm3[7],xmm0[7]
2537; X64-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
2538; X64-NEXT: movd %eax, %xmm0
2539; X64-NEXT: movzbl %sil, %eax
2540; X64-NEXT: movd %eax, %xmm1
2541; X64-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
2542; X64-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1],xmm1[2],xmm3[2],xmm1[3],xmm3[3],xmm1[4],xmm3[4],xmm1[5],xmm3[5],xmm1[6],xmm3[6],xmm1[7],xmm3[7]
2543; X64-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
2544; X64-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
2545; X64-NEXT: movd %eax, %xmm0
2546; X64-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
2547; X64-NEXT: movd %eax, %xmm2
2548; X64-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
2549; X64-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
2550; X64-NEXT: movd %eax, %xmm0
2551; X64-NEXT: movzbl %dl, %eax
2552; X64-NEXT: movd %eax, %xmm3
2553; X64-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3],xmm3[4],xmm0[4],xmm3[5],xmm0[5],xmm3[6],xmm0[6],xmm3[7],xmm0[7]
2554; X64-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3],xmm3[4],xmm2[4],xmm3[5],xmm2[5],xmm3[6],xmm2[6],xmm3[7],xmm2[7]
2555; X64-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
2556; X64-NEXT: movd %eax, %xmm0
2557; X64-NEXT: movzbl %r8b, %eax
2558; X64-NEXT: movd %eax, %xmm2
2559; X64-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
2560; X64-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
2561; X64-NEXT: movd %eax, %xmm4
2562; X64-NEXT: movzbl %dil, %eax
2563; X64-NEXT: movd %eax, %xmm0
2564; X64-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1],xmm0[2],xmm4[2],xmm0[3],xmm4[3],xmm0[4],xmm4[4],xmm0[5],xmm4[5],xmm0[6],xmm4[6],xmm0[7],xmm4[7]
2565; X64-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7]
2566; X64-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3],xmm0[4],xmm3[4],xmm0[5],xmm3[5],xmm0[6],xmm3[6],xmm0[7],xmm3[7]
2567; X64-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
2568; X64-NEXT: retq
2569 %res0 = insertelement <16 x i8> undef, i8 %a0 , i32 0
2570 %res1 = insertelement <16 x i8> %res0, i8 %a1 , i32 1
2571 %res2 = insertelement <16 x i8> %res1, i8 %a2 , i32 2
2572 %res3 = insertelement <16 x i8> %res2, i8 %a3 , i32 3
2573 %res4 = insertelement <16 x i8> %res3, i8 %a4 , i32 4
2574 %res5 = insertelement <16 x i8> %res4, i8 %a5 , i32 5
2575 %res6 = insertelement <16 x i8> %res5, i8 %a6 , i32 6
2576 %res7 = insertelement <16 x i8> %res6, i8 %a7 , i32 7
2577 %res8 = insertelement <16 x i8> %res7, i8 %a8 , i32 8
2578 %res9 = insertelement <16 x i8> %res8, i8 %a9 , i32 9
2579 %res10 = insertelement <16 x i8> %res9, i8 %a10, i32 10
2580 %res11 = insertelement <16 x i8> %res10, i8 %a11, i32 11
2581 %res12 = insertelement <16 x i8> %res11, i8 %a12, i32 12
2582 %res13 = insertelement <16 x i8> %res12, i8 %a13, i32 13
2583 %res14 = insertelement <16 x i8> %res13, i8 %a14, i32 14
2584 %res15 = insertelement <16 x i8> %res14, i8 %a15, i32 15
2585 %res = bitcast <16 x i8> %res15 to <2 x i64>
2586 ret <2 x i64> %res
2587}
2588
2589define <2 x i64> @test_mm_setr_epi16(i16 %a0, i16 %a1, i16 %a2, i16 %a3, i16 %a4, i16 %a5, i16 %a6, i16 %a7) nounwind {
2590; X32-LABEL: test_mm_setr_epi16:
2591; X32: # BB#0:
2592; X32-NEXT: movw {{[0-9]+}}(%esp), %ax
2593; X32-NEXT: movd %eax, %xmm1
2594; X32-NEXT: movw {{[0-9]+}}(%esp), %ax
2595; X32-NEXT: movd %eax, %xmm2
2596; X32-NEXT: movw {{[0-9]+}}(%esp), %ax
2597; X32-NEXT: movd %eax, %xmm3
2598; X32-NEXT: movw {{[0-9]+}}(%esp), %ax
2599; X32-NEXT: movd %eax, %xmm4
2600; X32-NEXT: movw {{[0-9]+}}(%esp), %ax
2601; X32-NEXT: movd %eax, %xmm5
2602; X32-NEXT: movw {{[0-9]+}}(%esp), %ax
2603; X32-NEXT: movd %eax, %xmm6
2604; X32-NEXT: movw {{[0-9]+}}(%esp), %ax
2605; X32-NEXT: movd %eax, %xmm7
2606; X32-NEXT: movw {{[0-9]+}}(%esp), %ax
2607; X32-NEXT: movd %eax, %xmm0
2608; X32-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3]
2609; X32-NEXT: punpcklwd {{.*#+}} xmm4 = xmm4[0],xmm3[0],xmm4[1],xmm3[1],xmm4[2],xmm3[2],xmm4[3],xmm3[3]
2610; X32-NEXT: punpcklwd {{.*#+}} xmm4 = xmm4[0],xmm2[0],xmm4[1],xmm2[1],xmm4[2],xmm2[2],xmm4[3],xmm2[3]
2611; X32-NEXT: punpcklwd {{.*#+}} xmm6 = xmm6[0],xmm5[0],xmm6[1],xmm5[1],xmm6[2],xmm5[2],xmm6[3],xmm5[3]
2612; X32-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm7[0],xmm0[1],xmm7[1],xmm0[2],xmm7[2],xmm0[3],xmm7[3]
2613; X32-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm6[0],xmm0[1],xmm6[1],xmm0[2],xmm6[2],xmm0[3],xmm6[3]
2614; X32-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1],xmm0[2],xmm4[2],xmm0[3],xmm4[3]
2615; X32-NEXT: retl
2616;
2617; X64-LABEL: test_mm_setr_epi16:
2618; X64: # BB#0:
2619; X64-NEXT: movw {{[0-9]+}}(%rsp), %ax
2620; X64-NEXT: movw {{[0-9]+}}(%rsp), %r10w
2621; X64-NEXT: movd %eax, %xmm0
2622; X64-NEXT: movd %ecx, %xmm1
2623; X64-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
2624; X64-NEXT: movd %r9d, %xmm0
2625; X64-NEXT: movd %esi, %xmm2
2626; X64-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3]
2627; X64-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3]
2628; X64-NEXT: movd %r10d, %xmm0
2629; X64-NEXT: movd %edx, %xmm1
2630; X64-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
2631; X64-NEXT: movd %r8d, %xmm3
2632; X64-NEXT: movd %edi, %xmm0
2633; X64-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3]
2634; X64-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
2635; X64-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
2636; X64-NEXT: retq
2637 %res0 = insertelement <8 x i16> undef, i16 %a0, i32 0
2638 %res1 = insertelement <8 x i16> %res0, i16 %a1, i32 1
2639 %res2 = insertelement <8 x i16> %res1, i16 %a2, i32 2
2640 %res3 = insertelement <8 x i16> %res2, i16 %a3, i32 3
2641 %res4 = insertelement <8 x i16> %res3, i16 %a4, i32 4
2642 %res5 = insertelement <8 x i16> %res4, i16 %a5, i32 5
2643 %res6 = insertelement <8 x i16> %res5, i16 %a6, i32 6
2644 %res7 = insertelement <8 x i16> %res6, i16 %a7, i32 7
2645 %res = bitcast <8 x i16> %res7 to <2 x i64>
2646 ret <2 x i64> %res
2647}
2648
2649define <2 x i64> @test_mm_setr_epi32(i32 %a0, i32 %a1, i32 %a2, i32 %a3) nounwind {
2650; X32-LABEL: test_mm_setr_epi32:
2651; X32: # BB#0:
2652; X32-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
2653; X32-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero
2654; X32-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
2655; X32-NEXT: movd {{.*#+}} xmm2 = mem[0],zero,zero,zero
2656; X32-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
2657; X32-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
2658; X32-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
2659; X32-NEXT: retl
2660;
2661; X64-LABEL: test_mm_setr_epi32:
2662; X64: # BB#0:
2663; X64-NEXT: movd %ecx, %xmm0
2664; X64-NEXT: movd %esi, %xmm1
2665; X64-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
2666; X64-NEXT: movd %edx, %xmm2
2667; X64-NEXT: movd %edi, %xmm0
2668; X64-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
2669; X64-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
2670; X64-NEXT: retq
2671 %res0 = insertelement <4 x i32> undef, i32 %a0, i32 0
2672 %res1 = insertelement <4 x i32> %res0, i32 %a1, i32 1
2673 %res2 = insertelement <4 x i32> %res1, i32 %a2, i32 2
2674 %res3 = insertelement <4 x i32> %res2, i32 %a3, i32 3
2675 %res = bitcast <4 x i32> %res3 to <2 x i64>
2676 ret <2 x i64> %res
2677}
2678
2679; TODO test_mm_setr_epi64
2680
2681define <2 x i64> @test_mm_setr_epi64x(i64 %a0, i64 %a1) nounwind {
2682; X32-LABEL: test_mm_setr_epi64x:
2683; X32: # BB#0:
2684; X32-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero
2685; X32-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
2686; X32-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
2687; X32-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero
2688; X32-NEXT: movd {{.*#+}} xmm2 = mem[0],zero,zero,zero
2689; X32-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
2690; X32-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
2691; X32-NEXT: retl
2692;
2693; X64-LABEL: test_mm_setr_epi64x:
2694; X64: # BB#0:
2695; X64-NEXT: movd %rsi, %xmm1
2696; X64-NEXT: movd %rdi, %xmm0
2697; X64-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
2698; X64-NEXT: retq
2699 %res0 = insertelement <2 x i64> undef, i64 %a0, i32 0
2700 %res1 = insertelement <2 x i64> %res0, i64 %a1, i32 1
2701 ret <2 x i64> %res1
2702}
2703
2704define <2 x double> @test_mm_setr_pd(double %a0, double %a1) nounwind {
2705; X32-LABEL: test_mm_setr_pd:
2706; X32: # BB#0:
2707; X32-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero
2708; X32-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
2709; X32-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
2710; X32-NEXT: retl
2711;
2712; X64-LABEL: test_mm_setr_pd:
2713; X64: # BB#0:
2714; X64-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
2715; X64-NEXT: retq
2716 %res0 = insertelement <2 x double> undef, double %a0, i32 0
2717 %res1 = insertelement <2 x double> %res0, double %a1, i32 1
2718 ret <2 x double> %res1
2719}
2720
Simon Pilgrim5a0d7282016-05-18 18:00:43 +00002721define <2 x double> @test_mm_setzero_pd() {
2722; X32-LABEL: test_mm_setzero_pd:
2723; X32: # BB#0:
2724; X32-NEXT: xorps %xmm0, %xmm0
2725; X32-NEXT: retl
2726;
2727; X64-LABEL: test_mm_setzero_pd:
2728; X64: # BB#0:
2729; X64-NEXT: xorps %xmm0, %xmm0
2730; X64-NEXT: retq
2731 ret <2 x double> zeroinitializer
2732}
2733
2734define <2 x i64> @test_mm_setzero_si128() {
2735; X32-LABEL: test_mm_setzero_si128:
2736; X32: # BB#0:
2737; X32-NEXT: xorps %xmm0, %xmm0
2738; X32-NEXT: retl
2739;
2740; X64-LABEL: test_mm_setzero_si128:
2741; X64: # BB#0:
2742; X64-NEXT: xorps %xmm0, %xmm0
2743; X64-NEXT: retq
2744 ret <2 x i64> zeroinitializer
2745}
2746
2747define <2 x i64> @test_mm_shuffle_epi32(<2 x i64> %a0) {
2748; X32-LABEL: test_mm_shuffle_epi32:
2749; X32: # BB#0:
2750; X32-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
2751; X32-NEXT: retl
2752;
2753; X64-LABEL: test_mm_shuffle_epi32:
2754; X64: # BB#0:
2755; X64-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
2756; X64-NEXT: retq
2757 %arg0 = bitcast <2 x i64> %a0 to <4 x i32>
2758 %res = shufflevector <4 x i32> %arg0, <4 x i32> undef, <4 x i32> zeroinitializer
2759 %bc = bitcast <4 x i32> %res to <2 x i64>
2760 ret <2 x i64> %bc
2761}
2762
2763define <2 x double> @test_mm_shuffle_pd(<2 x double> %a0, <2 x double> %a1) {
2764; X32-LABEL: test_mm_shuffle_pd:
2765; X32: # BB#0:
2766; X32-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0]
2767; X32-NEXT: retl
2768;
2769; X64-LABEL: test_mm_shuffle_pd:
2770; X64: # BB#0:
2771; X64-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0]
2772; X64-NEXT: retq
2773 %res = shufflevector <2 x double> %a0, <2 x double> %a1, <2 x i32> <i32 1, i32 2>
2774 ret <2 x double> %res
2775}
2776
2777define <2 x i64> @test_mm_shufflehi_epi16(<2 x i64> %a0) {
2778; X32-LABEL: test_mm_shufflehi_epi16:
2779; X32: # BB#0:
2780; X32-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,4,4]
2781; X32-NEXT: retl
2782;
2783; X64-LABEL: test_mm_shufflehi_epi16:
2784; X64: # BB#0:
2785; X64-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,4,4]
2786; X64-NEXT: retq
2787 %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
2788 %res = shufflevector <8 x i16> %arg0, <8 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 4, i32 4, i32 4>
2789 %bc = bitcast <8 x i16> %res to <2 x i64>
2790 ret <2 x i64> %bc
2791}
2792
2793define <2 x i64> @test_mm_shufflelo_epi16(<2 x i64> %a0) {
2794; X32-LABEL: test_mm_shufflelo_epi16:
2795; X32: # BB#0:
2796; X32-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
2797; X32-NEXT: retl
2798;
2799; X64-LABEL: test_mm_shufflelo_epi16:
2800; X64: # BB#0:
2801; X64-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
2802; X64-NEXT: retq
2803 %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
2804 %res = shufflevector <8 x i16> %arg0, <8 x i16> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 4, i32 5, i32 6, i32 7>
2805 %bc = bitcast <8 x i16> %res to <2 x i64>
2806 ret <2 x i64> %bc
2807}
2808
2809define <2 x i64> @test_mm_sll_epi16(<2 x i64> %a0, <2 x i64> %a1) {
2810; X32-LABEL: test_mm_sll_epi16:
2811; X32: # BB#0:
2812; X32-NEXT: psllw %xmm1, %xmm0
2813; X32-NEXT: retl
2814;
2815; X64-LABEL: test_mm_sll_epi16:
2816; X64: # BB#0:
2817; X64-NEXT: psllw %xmm1, %xmm0
2818; X64-NEXT: retq
2819 %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
2820 %arg1 = bitcast <2 x i64> %a1 to <8 x i16>
2821 %res = call <8 x i16> @llvm.x86.sse2.psll.w(<8 x i16> %arg0, <8 x i16> %arg1)
2822 %bc = bitcast <8 x i16> %res to <2 x i64>
2823 ret <2 x i64> %bc
2824}
2825declare <8 x i16> @llvm.x86.sse2.psll.w(<8 x i16>, <8 x i16>) nounwind readnone
2826
2827define <2 x i64> @test_mm_sll_epi32(<2 x i64> %a0, <2 x i64> %a1) {
2828; X32-LABEL: test_mm_sll_epi32:
2829; X32: # BB#0:
2830; X32-NEXT: pslld %xmm1, %xmm0
2831; X32-NEXT: retl
2832;
2833; X64-LABEL: test_mm_sll_epi32:
2834; X64: # BB#0:
2835; X64-NEXT: pslld %xmm1, %xmm0
2836; X64-NEXT: retq
2837 %arg0 = bitcast <2 x i64> %a0 to <4 x i32>
2838 %arg1 = bitcast <2 x i64> %a1 to <4 x i32>
2839 %res = call <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32> %arg0, <4 x i32> %arg1)
2840 %bc = bitcast <4 x i32> %res to <2 x i64>
2841 ret <2 x i64> %bc
2842}
2843declare <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32>, <4 x i32>) nounwind readnone
2844
2845define <2 x i64> @test_mm_sll_epi64(<2 x i64> %a0, <2 x i64> %a1) {
2846; X32-LABEL: test_mm_sll_epi64:
2847; X32: # BB#0:
2848; X32-NEXT: psllq %xmm1, %xmm0
2849; X32-NEXT: retl
2850;
2851; X64-LABEL: test_mm_sll_epi64:
2852; X64: # BB#0:
2853; X64-NEXT: psllq %xmm1, %xmm0
2854; X64-NEXT: retq
2855 %res = call <2 x i64> @llvm.x86.sse2.psll.q(<2 x i64> %a0, <2 x i64> %a1)
2856 ret <2 x i64> %res
2857}
2858declare <2 x i64> @llvm.x86.sse2.psll.q(<2 x i64>, <2 x i64>) nounwind readnone
2859
2860define <2 x i64> @test_mm_slli_epi16(<2 x i64> %a0) {
2861; X32-LABEL: test_mm_slli_epi16:
2862; X32: # BB#0:
2863; X32-NEXT: psllw $1, %xmm0
2864; X32-NEXT: retl
2865;
2866; X64-LABEL: test_mm_slli_epi16:
2867; X64: # BB#0:
2868; X64-NEXT: psllw $1, %xmm0
2869; X64-NEXT: retq
2870 %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
2871 %res = call <8 x i16> @llvm.x86.sse2.pslli.w(<8 x i16> %arg0, i32 1)
2872 %bc = bitcast <8 x i16> %res to <2 x i64>
2873 ret <2 x i64> %bc
2874}
2875declare <8 x i16> @llvm.x86.sse2.pslli.w(<8 x i16>, i32) nounwind readnone
2876
2877define <2 x i64> @test_mm_slli_epi32(<2 x i64> %a0) {
2878; X32-LABEL: test_mm_slli_epi32:
2879; X32: # BB#0:
2880; X32-NEXT: pslld $1, %xmm0
2881; X32-NEXT: retl
2882;
2883; X64-LABEL: test_mm_slli_epi32:
2884; X64: # BB#0:
2885; X64-NEXT: pslld $1, %xmm0
2886; X64-NEXT: retq
2887 %arg0 = bitcast <2 x i64> %a0 to <4 x i32>
2888 %res = call <4 x i32> @llvm.x86.sse2.pslli.d(<4 x i32> %arg0, i32 1)
2889 %bc = bitcast <4 x i32> %res to <2 x i64>
2890 ret <2 x i64> %bc
2891}
2892declare <4 x i32> @llvm.x86.sse2.pslli.d(<4 x i32>, i32) nounwind readnone
2893
2894define <2 x i64> @test_mm_slli_epi64(<2 x i64> %a0) {
2895; X32-LABEL: test_mm_slli_epi64:
2896; X32: # BB#0:
2897; X32-NEXT: psllq $1, %xmm0
2898; X32-NEXT: retl
2899;
2900; X64-LABEL: test_mm_slli_epi64:
2901; X64: # BB#0:
2902; X64-NEXT: psllq $1, %xmm0
2903; X64-NEXT: retq
2904 %res = call <2 x i64> @llvm.x86.sse2.pslli.q(<2 x i64> %a0, i32 1)
2905 ret <2 x i64> %res
2906}
2907declare <2 x i64> @llvm.x86.sse2.pslli.q(<2 x i64>, i32) nounwind readnone
2908
2909define <2 x i64> @test_mm_slli_si128(<2 x i64> %a0) nounwind {
2910; X32-LABEL: test_mm_slli_si128:
2911; X32: # BB#0:
2912; X32-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10]
2913; X32-NEXT: retl
2914;
2915; X64-LABEL: test_mm_slli_si128:
2916; X64: # BB#0:
2917; X64-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10]
2918; X64-NEXT: retq
2919 %arg0 = bitcast <2 x i64> %a0 to <16 x i8>
2920 %res = shufflevector <16 x i8> zeroinitializer, <16 x i8> %arg0, <16 x i32> <i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26>
2921 %bc = bitcast <16 x i8> %res to <2 x i64>
2922 ret <2 x i64> %bc
2923}
2924
2925define <2 x double> @test_mm_sqrt_pd(<2 x double> %a0) nounwind {
2926; X32-LABEL: test_mm_sqrt_pd:
2927; X32: # BB#0:
2928; X32-NEXT: sqrtpd %xmm0, %xmm0
2929; X32-NEXT: retl
2930;
2931; X64-LABEL: test_mm_sqrt_pd:
2932; X64: # BB#0:
2933; X64-NEXT: sqrtpd %xmm0, %xmm0
2934; X64-NEXT: retq
2935 %res = call <2 x double> @llvm.x86.sse2.sqrt.pd(<2 x double> %a0)
2936 ret <2 x double> %res
2937}
2938declare <2 x double> @llvm.x86.sse2.sqrt.pd(<2 x double>) nounwind readnone
2939
2940define <2 x double> @test_mm_sqrt_sd(<2 x double> %a0, <2 x double> %a1) nounwind {
2941; X32-LABEL: test_mm_sqrt_sd:
2942; X32: # BB#0:
2943; X32-NEXT: sqrtsd %xmm0, %xmm1
2944; X32-NEXT: movaps %xmm1, %xmm0
2945; X32-NEXT: retl
2946;
2947; X64-LABEL: test_mm_sqrt_sd:
2948; X64: # BB#0:
2949; X64-NEXT: sqrtsd %xmm0, %xmm1
2950; X64-NEXT: movaps %xmm1, %xmm0
2951; X64-NEXT: retq
2952 %call = call <2 x double> @llvm.x86.sse2.sqrt.sd(<2 x double> %a0)
2953 %ext0 = extractelement <2 x double> %call, i32 0
2954 %ins0 = insertelement <2 x double> undef, double %ext0, i32 0
2955 %ext1 = extractelement <2 x double> %a1, i32 1
2956 %ins1 = insertelement <2 x double> %ins0, double %ext1, i32 1
2957 ret <2 x double> %ins1
2958}
2959declare <2 x double> @llvm.x86.sse2.sqrt.sd(<2 x double>) nounwind readnone
2960
2961define <2 x i64> @test_mm_sra_epi16(<2 x i64> %a0, <2 x i64> %a1) {
2962; X32-LABEL: test_mm_sra_epi16:
2963; X32: # BB#0:
2964; X32-NEXT: psraw %xmm1, %xmm0
2965; X32-NEXT: retl
2966;
2967; X64-LABEL: test_mm_sra_epi16:
2968; X64: # BB#0:
2969; X64-NEXT: psraw %xmm1, %xmm0
2970; X64-NEXT: retq
2971 %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
2972 %arg1 = bitcast <2 x i64> %a1 to <8 x i16>
2973 %res = call <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16> %arg0, <8 x i16> %arg1)
2974 %bc = bitcast <8 x i16> %res to <2 x i64>
2975 ret <2 x i64> %bc
2976}
2977declare <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16>, <8 x i16>) nounwind readnone
2978
2979define <2 x i64> @test_mm_sra_epi32(<2 x i64> %a0, <2 x i64> %a1) {
2980; X32-LABEL: test_mm_sra_epi32:
2981; X32: # BB#0:
2982; X32-NEXT: psrad %xmm1, %xmm0
2983; X32-NEXT: retl
2984;
2985; X64-LABEL: test_mm_sra_epi32:
2986; X64: # BB#0:
2987; X64-NEXT: psrad %xmm1, %xmm0
2988; X64-NEXT: retq
2989 %arg0 = bitcast <2 x i64> %a0 to <4 x i32>
2990 %arg1 = bitcast <2 x i64> %a1 to <4 x i32>
2991 %res = call <4 x i32> @llvm.x86.sse2.psra.d(<4 x i32> %arg0, <4 x i32> %arg1)
2992 %bc = bitcast <4 x i32> %res to <2 x i64>
2993 ret <2 x i64> %bc
2994}
2995declare <4 x i32> @llvm.x86.sse2.psra.d(<4 x i32>, <4 x i32>) nounwind readnone
2996
2997define <2 x i64> @test_mm_srai_epi16(<2 x i64> %a0) {
2998; X32-LABEL: test_mm_srai_epi16:
2999; X32: # BB#0:
3000; X32-NEXT: psraw $1, %xmm0
3001; X32-NEXT: retl
3002;
3003; X64-LABEL: test_mm_srai_epi16:
3004; X64: # BB#0:
3005; X64-NEXT: psraw $1, %xmm0
3006; X64-NEXT: retq
3007 %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
3008 %res = call <8 x i16> @llvm.x86.sse2.psrai.w(<8 x i16> %arg0, i32 1)
3009 %bc = bitcast <8 x i16> %res to <2 x i64>
3010 ret <2 x i64> %bc
3011}
3012declare <8 x i16> @llvm.x86.sse2.psrai.w(<8 x i16>, i32) nounwind readnone
3013
3014define <2 x i64> @test_mm_srai_epi32(<2 x i64> %a0) {
3015; X32-LABEL: test_mm_srai_epi32:
3016; X32: # BB#0:
3017; X32-NEXT: psrad $1, %xmm0
3018; X32-NEXT: retl
3019;
3020; X64-LABEL: test_mm_srai_epi32:
3021; X64: # BB#0:
3022; X64-NEXT: psrad $1, %xmm0
3023; X64-NEXT: retq
3024 %arg0 = bitcast <2 x i64> %a0 to <4 x i32>
3025 %res = call <4 x i32> @llvm.x86.sse2.psrai.d(<4 x i32> %arg0, i32 1)
3026 %bc = bitcast <4 x i32> %res to <2 x i64>
3027 ret <2 x i64> %bc
3028}
3029declare <4 x i32> @llvm.x86.sse2.psrai.d(<4 x i32>, i32) nounwind readnone
3030
3031define <2 x i64> @test_mm_srl_epi16(<2 x i64> %a0, <2 x i64> %a1) {
3032; X32-LABEL: test_mm_srl_epi16:
3033; X32: # BB#0:
3034; X32-NEXT: psrlw %xmm1, %xmm0
3035; X32-NEXT: retl
3036;
3037; X64-LABEL: test_mm_srl_epi16:
3038; X64: # BB#0:
3039; X64-NEXT: psrlw %xmm1, %xmm0
3040; X64-NEXT: retq
3041 %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
3042 %arg1 = bitcast <2 x i64> %a1 to <8 x i16>
3043 %res = call <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16> %arg0, <8 x i16> %arg1)
3044 %bc = bitcast <8 x i16> %res to <2 x i64>
3045 ret <2 x i64> %bc
3046}
3047declare <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16>, <8 x i16>) nounwind readnone
3048
3049define <2 x i64> @test_mm_srl_epi32(<2 x i64> %a0, <2 x i64> %a1) {
3050; X32-LABEL: test_mm_srl_epi32:
3051; X32: # BB#0:
3052; X32-NEXT: psrld %xmm1, %xmm0
3053; X32-NEXT: retl
3054;
3055; X64-LABEL: test_mm_srl_epi32:
3056; X64: # BB#0:
3057; X64-NEXT: psrld %xmm1, %xmm0
3058; X64-NEXT: retq
3059 %arg0 = bitcast <2 x i64> %a0 to <4 x i32>
3060 %arg1 = bitcast <2 x i64> %a1 to <4 x i32>
3061 %res = call <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32> %arg0, <4 x i32> %arg1)
3062 %bc = bitcast <4 x i32> %res to <2 x i64>
3063 ret <2 x i64> %bc
3064}
3065declare <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32>, <4 x i32>) nounwind readnone
3066
3067define <2 x i64> @test_mm_srl_epi64(<2 x i64> %a0, <2 x i64> %a1) {
3068; X32-LABEL: test_mm_srl_epi64:
3069; X32: # BB#0:
3070; X32-NEXT: psrlq %xmm1, %xmm0
3071; X32-NEXT: retl
3072;
3073; X64-LABEL: test_mm_srl_epi64:
3074; X64: # BB#0:
3075; X64-NEXT: psrlq %xmm1, %xmm0
3076; X64-NEXT: retq
3077 %res = call <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64> %a0, <2 x i64> %a1)
3078 ret <2 x i64> %res
3079}
3080declare <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64>, <2 x i64>) nounwind readnone
3081
3082define <2 x i64> @test_mm_srli_epi16(<2 x i64> %a0) {
3083; X32-LABEL: test_mm_srli_epi16:
3084; X32: # BB#0:
3085; X32-NEXT: psrlw $1, %xmm0
3086; X32-NEXT: retl
3087;
3088; X64-LABEL: test_mm_srli_epi16:
3089; X64: # BB#0:
3090; X64-NEXT: psrlw $1, %xmm0
3091; X64-NEXT: retq
3092 %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
3093 %res = call <8 x i16> @llvm.x86.sse2.psrli.w(<8 x i16> %arg0, i32 1)
3094 %bc = bitcast <8 x i16> %res to <2 x i64>
3095 ret <2 x i64> %bc
3096}
3097declare <8 x i16> @llvm.x86.sse2.psrli.w(<8 x i16>, i32) nounwind readnone
3098
3099define <2 x i64> @test_mm_srli_epi32(<2 x i64> %a0) {
3100; X32-LABEL: test_mm_srli_epi32:
3101; X32: # BB#0:
3102; X32-NEXT: psrld $1, %xmm0
3103; X32-NEXT: retl
3104;
3105; X64-LABEL: test_mm_srli_epi32:
3106; X64: # BB#0:
3107; X64-NEXT: psrld $1, %xmm0
3108; X64-NEXT: retq
3109 %arg0 = bitcast <2 x i64> %a0 to <4 x i32>
3110 %res = call <4 x i32> @llvm.x86.sse2.psrli.d(<4 x i32> %arg0, i32 1)
3111 %bc = bitcast <4 x i32> %res to <2 x i64>
3112 ret <2 x i64> %bc
3113}
3114declare <4 x i32> @llvm.x86.sse2.psrli.d(<4 x i32>, i32) nounwind readnone
3115
3116define <2 x i64> @test_mm_srli_epi64(<2 x i64> %a0) {
3117; X32-LABEL: test_mm_srli_epi64:
3118; X32: # BB#0:
3119; X32-NEXT: psrlq $1, %xmm0
3120; X32-NEXT: retl
3121;
3122; X64-LABEL: test_mm_srli_epi64:
3123; X64: # BB#0:
3124; X64-NEXT: psrlq $1, %xmm0
3125; X64-NEXT: retq
3126 %res = call <2 x i64> @llvm.x86.sse2.psrli.q(<2 x i64> %a0, i32 1)
3127 ret <2 x i64> %res
3128}
3129declare <2 x i64> @llvm.x86.sse2.psrli.q(<2 x i64>, i32) nounwind readnone
3130
3131define <2 x i64> @test_mm_srli_si128(<2 x i64> %a0) nounwind {
3132; X32-LABEL: test_mm_srli_si128:
3133; X32: # BB#0:
3134; X32-NEXT: psrldq {{.*#+}} xmm0 = xmm0[5,6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero
3135; X32-NEXT: retl
3136;
3137; X64-LABEL: test_mm_srli_si128:
3138; X64: # BB#0:
3139; X64-NEXT: psrldq {{.*#+}} xmm0 = xmm0[5,6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero
3140; X64-NEXT: retq
3141 %arg0 = bitcast <2 x i64> %a0 to <16 x i8>
3142 %res = shufflevector <16 x i8> %arg0, <16 x i8> zeroinitializer, <16 x i32> <i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20>
3143 %bc = bitcast <16 x i8> %res to <2 x i64>
3144 ret <2 x i64> %bc
3145}
3146
3147define void @test_mm_store_pd(double *%a0, <2 x double> %a1) {
3148; X32-LABEL: test_mm_store_pd:
3149; X32: # BB#0:
3150; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
3151; X32-NEXT: movaps %xmm0, (%eax)
3152; X32-NEXT: retl
3153;
3154; X64-LABEL: test_mm_store_pd:
3155; X64: # BB#0:
3156; X64-NEXT: movaps %xmm0, (%rdi)
3157; X64-NEXT: retq
3158 %arg0 = bitcast double* %a0 to <2 x double>*
3159 store <2 x double> %a1, <2 x double>* %arg0, align 16
3160 ret void
3161}
3162
Simon Pilgrim4ed0e072016-05-30 18:18:44 +00003163define void @test_mm_store_pd1(double *%a0, <2 x double> %a1) {
3164; X32-LABEL: test_mm_store_pd1:
3165; X32: # BB#0:
3166; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
3167; X32-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0,0]
3168; X32-NEXT: movaps %xmm0, (%eax)
3169; X32-NEXT: retl
3170;
3171; X64-LABEL: test_mm_store_pd1:
3172; X64: # BB#0:
3173; X64-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0,0]
3174; X64-NEXT: movaps %xmm0, (%rdi)
3175; X64-NEXT: retq
3176 %arg0 = bitcast double * %a0 to <2 x double>*
3177 %shuf = shufflevector <2 x double> %a1, <2 x double> undef, <2 x i32> zeroinitializer
3178 store <2 x double> %shuf, <2 x double>* %arg0, align 16
3179 ret void
3180}
3181
Simon Pilgrim5a0d7282016-05-18 18:00:43 +00003182define void @test_mm_store_sd(double *%a0, <2 x double> %a1) {
3183; X32-LABEL: test_mm_store_sd:
3184; X32: # BB#0:
3185; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
3186; X32-NEXT: movsd %xmm0, (%eax)
3187; X32-NEXT: retl
3188;
3189; X64-LABEL: test_mm_store_sd:
3190; X64: # BB#0:
3191; X64-NEXT: movsd %xmm0, (%rdi)
3192; X64-NEXT: retq
3193 %ext = extractelement <2 x double> %a1, i32 0
3194 store double %ext, double* %a0, align 1
3195 ret void
3196}
3197
3198define void @test_mm_store_si128(<2 x i64> *%a0, <2 x i64> %a1) {
3199; X32-LABEL: test_mm_store_si128:
3200; X32: # BB#0:
3201; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
3202; X32-NEXT: movaps %xmm0, (%eax)
3203; X32-NEXT: retl
3204;
3205; X64-LABEL: test_mm_store_si128:
3206; X64: # BB#0:
3207; X64-NEXT: movaps %xmm0, (%rdi)
3208; X64-NEXT: retq
3209 store <2 x i64> %a1, <2 x i64>* %a0, align 16
3210 ret void
3211}
3212
Simon Pilgrim4d1e2582016-05-25 09:42:29 +00003213define void @test_mm_store1_pd(double *%a0, <2 x double> %a1) {
3214; X32-LABEL: test_mm_store1_pd:
Simon Pilgrim5a0d7282016-05-18 18:00:43 +00003215; X32: # BB#0:
3216; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
Simon Pilgrim4ed0e072016-05-30 18:18:44 +00003217; X32-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0,0]
3218; X32-NEXT: movaps %xmm0, (%eax)
Simon Pilgrim5a0d7282016-05-18 18:00:43 +00003219; X32-NEXT: retl
3220;
Simon Pilgrim4d1e2582016-05-25 09:42:29 +00003221; X64-LABEL: test_mm_store1_pd:
Simon Pilgrim5a0d7282016-05-18 18:00:43 +00003222; X64: # BB#0:
Simon Pilgrim4ed0e072016-05-30 18:18:44 +00003223; X64-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0,0]
3224; X64-NEXT: movaps %xmm0, (%rdi)
Simon Pilgrim5a0d7282016-05-18 18:00:43 +00003225; X64-NEXT: retq
Simon Pilgrim4ed0e072016-05-30 18:18:44 +00003226 %arg0 = bitcast double * %a0 to <2 x double>*
3227 %shuf = shufflevector <2 x double> %a1, <2 x double> undef, <2 x i32> zeroinitializer
3228 store <2 x double> %shuf, <2 x double>* %arg0, align 16
Simon Pilgrim5a0d7282016-05-18 18:00:43 +00003229 ret void
3230}
3231
3232define void @test_mm_storeh_sd(double *%a0, <2 x double> %a1) {
3233; X32-LABEL: test_mm_storeh_sd:
3234; X32: # BB#0:
3235; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
Simon Pilgrim2279e592016-08-22 12:56:54 +00003236; X32-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1]
Simon Pilgrim5a0d7282016-05-18 18:00:43 +00003237; X32-NEXT: movsd %xmm0, (%eax)
3238; X32-NEXT: retl
3239;
3240; X64-LABEL: test_mm_storeh_sd:
3241; X64: # BB#0:
Simon Pilgrim2279e592016-08-22 12:56:54 +00003242; X64-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1]
Simon Pilgrim5a0d7282016-05-18 18:00:43 +00003243; X64-NEXT: movsd %xmm0, (%rdi)
3244; X64-NEXT: retq
3245 %ext = extractelement <2 x double> %a1, i32 1
3246 store double %ext, double* %a0, align 8
3247 ret void
3248}
3249
3250define void @test_mm_storel_epi64(<2 x i64> *%a0, <2 x i64> %a1) {
3251; X32-LABEL: test_mm_storel_epi64:
3252; X32: # BB#0:
3253; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
3254; X32-NEXT: movlps %xmm0, (%eax)
3255; X32-NEXT: retl
3256;
3257; X64-LABEL: test_mm_storel_epi64:
3258; X64: # BB#0:
3259; X64-NEXT: movd %xmm0, %rax
3260; X64-NEXT: movq %rax, (%rdi)
3261; X64-NEXT: retq
3262 %ext = extractelement <2 x i64> %a1, i32 0
3263 %bc = bitcast <2 x i64> *%a0 to i64*
3264 store i64 %ext, i64* %bc, align 8
3265 ret void
3266}
3267
3268define void @test_mm_storel_sd(double *%a0, <2 x double> %a1) {
3269; X32-LABEL: test_mm_storel_sd:
3270; X32: # BB#0:
3271; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
3272; X32-NEXT: movsd %xmm0, (%eax)
3273; X32-NEXT: retl
3274;
3275; X64-LABEL: test_mm_storel_sd:
3276; X64: # BB#0:
3277; X64-NEXT: movsd %xmm0, (%rdi)
3278; X64-NEXT: retq
3279 %ext = extractelement <2 x double> %a1, i32 0
3280 store double %ext, double* %a0, align 8
3281 ret void
3282}
3283
3284define void @test_mm_storer_pd(double *%a0, <2 x double> %a1) {
3285; X32-LABEL: test_mm_storer_pd:
3286; X32: # BB#0:
3287; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
3288; X32-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1,0]
3289; X32-NEXT: movapd %xmm0, (%eax)
3290; X32-NEXT: retl
3291;
3292; X64-LABEL: test_mm_storer_pd:
3293; X64: # BB#0:
3294; X64-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1,0]
3295; X64-NEXT: movapd %xmm0, (%rdi)
3296; X64-NEXT: retq
3297 %arg0 = bitcast double* %a0 to <2 x double>*
3298 %shuf = shufflevector <2 x double> %a1, <2 x double> undef, <2 x i32> <i32 1, i32 0>
3299 store <2 x double> %shuf, <2 x double>* %arg0, align 16
3300 ret void
3301}
3302
3303define void @test_mm_storeu_pd(double *%a0, <2 x double> %a1) {
3304; X32-LABEL: test_mm_storeu_pd:
3305; X32: # BB#0:
3306; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
3307; X32-NEXT: movups %xmm0, (%eax)
3308; X32-NEXT: retl
3309;
3310; X64-LABEL: test_mm_storeu_pd:
3311; X64: # BB#0:
3312; X64-NEXT: movups %xmm0, (%rdi)
3313; X64-NEXT: retq
Simon Pilgrimd64af652016-05-30 18:42:51 +00003314 %arg0 = bitcast double* %a0 to <2 x double>*
3315 store <2 x double> %a1, <2 x double>* %arg0, align 1
Simon Pilgrim5a0d7282016-05-18 18:00:43 +00003316 ret void
3317}
3318
3319define void @test_mm_storeu_si128(<2 x i64> *%a0, <2 x i64> %a1) {
3320; X32-LABEL: test_mm_storeu_si128:
3321; X32: # BB#0:
3322; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
3323; X32-NEXT: movups %xmm0, (%eax)
3324; X32-NEXT: retl
3325;
3326; X64-LABEL: test_mm_storeu_si128:
3327; X64: # BB#0:
3328; X64-NEXT: movups %xmm0, (%rdi)
3329; X64-NEXT: retq
Simon Pilgrimd64af652016-05-30 18:42:51 +00003330 store <2 x i64> %a1, <2 x i64>* %a0, align 1
Simon Pilgrim5a0d7282016-05-18 18:00:43 +00003331 ret void
3332}
Simon Pilgrim5a0d7282016-05-18 18:00:43 +00003333
3334define void @test_mm_stream_pd(double *%a0, <2 x double> %a1) {
3335; X32-LABEL: test_mm_stream_pd:
3336; X32: # BB#0:
3337; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
3338; X32-NEXT: movntps %xmm0, (%eax)
3339; X32-NEXT: retl
3340;
3341; X64-LABEL: test_mm_stream_pd:
3342; X64: # BB#0:
3343; X64-NEXT: movntps %xmm0, (%rdi)
3344; X64-NEXT: retq
3345 %arg0 = bitcast double* %a0 to <2 x double>*
3346 store <2 x double> %a1, <2 x double>* %arg0, align 16, !nontemporal !0
3347 ret void
3348}
3349
3350define void @test_mm_stream_si32(i32 *%a0, i32 %a1) {
3351; X32-LABEL: test_mm_stream_si32:
3352; X32: # BB#0:
3353; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
3354; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
3355; X32-NEXT: movntil %eax, (%ecx)
3356; X32-NEXT: retl
3357;
3358; X64-LABEL: test_mm_stream_si32:
3359; X64: # BB#0:
3360; X64-NEXT: movntil %esi, (%rdi)
3361; X64-NEXT: retq
3362 store i32 %a1, i32* %a0, align 1, !nontemporal !0
3363 ret void
3364}
3365
3366define void @test_mm_stream_si128(<2 x i64> *%a0, <2 x i64> %a1) {
3367; X32-LABEL: test_mm_stream_si128:
3368; X32: # BB#0:
3369; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
3370; X32-NEXT: movntps %xmm0, (%eax)
3371; X32-NEXT: retl
3372;
3373; X64-LABEL: test_mm_stream_si128:
3374; X64: # BB#0:
3375; X64-NEXT: movntps %xmm0, (%rdi)
3376; X64-NEXT: retq
3377 store <2 x i64> %a1, <2 x i64>* %a0, align 16, !nontemporal !0
3378 ret void
3379}
3380
3381define <2 x i64> @test_mm_sub_epi8(<2 x i64> %a0, <2 x i64> %a1) nounwind {
3382; X32-LABEL: test_mm_sub_epi8:
3383; X32: # BB#0:
3384; X32-NEXT: psubb %xmm1, %xmm0
3385; X32-NEXT: retl
3386;
3387; X64-LABEL: test_mm_sub_epi8:
3388; X64: # BB#0:
3389; X64-NEXT: psubb %xmm1, %xmm0
3390; X64-NEXT: retq
3391 %arg0 = bitcast <2 x i64> %a0 to <16 x i8>
3392 %arg1 = bitcast <2 x i64> %a1 to <16 x i8>
3393 %res = sub <16 x i8> %arg0, %arg1
3394 %bc = bitcast <16 x i8> %res to <2 x i64>
3395 ret <2 x i64> %bc
3396}
3397
3398define <2 x i64> @test_mm_sub_epi16(<2 x i64> %a0, <2 x i64> %a1) nounwind {
3399; X32-LABEL: test_mm_sub_epi16:
3400; X32: # BB#0:
3401; X32-NEXT: psubw %xmm1, %xmm0
3402; X32-NEXT: retl
3403;
3404; X64-LABEL: test_mm_sub_epi16:
3405; X64: # BB#0:
3406; X64-NEXT: psubw %xmm1, %xmm0
3407; X64-NEXT: retq
3408 %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
3409 %arg1 = bitcast <2 x i64> %a1 to <8 x i16>
3410 %res = sub <8 x i16> %arg0, %arg1
3411 %bc = bitcast <8 x i16> %res to <2 x i64>
3412 ret <2 x i64> %bc
3413}
3414
3415define <2 x i64> @test_mm_sub_epi32(<2 x i64> %a0, <2 x i64> %a1) nounwind {
3416; X32-LABEL: test_mm_sub_epi32:
3417; X32: # BB#0:
3418; X32-NEXT: psubd %xmm1, %xmm0
3419; X32-NEXT: retl
3420;
3421; X64-LABEL: test_mm_sub_epi32:
3422; X64: # BB#0:
3423; X64-NEXT: psubd %xmm1, %xmm0
3424; X64-NEXT: retq
3425 %arg0 = bitcast <2 x i64> %a0 to <4 x i32>
3426 %arg1 = bitcast <2 x i64> %a1 to <4 x i32>
3427 %res = sub <4 x i32> %arg0, %arg1
3428 %bc = bitcast <4 x i32> %res to <2 x i64>
3429 ret <2 x i64> %bc
3430}
3431
3432define <2 x i64> @test_mm_sub_epi64(<2 x i64> %a0, <2 x i64> %a1) nounwind {
3433; X32-LABEL: test_mm_sub_epi64:
3434; X32: # BB#0:
3435; X32-NEXT: psubq %xmm1, %xmm0
3436; X32-NEXT: retl
3437;
3438; X64-LABEL: test_mm_sub_epi64:
3439; X64: # BB#0:
3440; X64-NEXT: psubq %xmm1, %xmm0
3441; X64-NEXT: retq
3442 %res = sub <2 x i64> %a0, %a1
3443 ret <2 x i64> %res
3444}
3445
3446define <2 x double> @test_mm_sub_pd(<2 x double> %a0, <2 x double> %a1) nounwind {
3447; X32-LABEL: test_mm_sub_pd:
3448; X32: # BB#0:
3449; X32-NEXT: subpd %xmm1, %xmm0
3450; X32-NEXT: retl
3451;
3452; X64-LABEL: test_mm_sub_pd:
3453; X64: # BB#0:
3454; X64-NEXT: subpd %xmm1, %xmm0
3455; X64-NEXT: retq
3456 %res = fsub <2 x double> %a0, %a1
3457 ret <2 x double> %res
3458}
3459
3460define <2 x double> @test_mm_sub_sd(<2 x double> %a0, <2 x double> %a1) nounwind {
3461; X32-LABEL: test_mm_sub_sd:
3462; X32: # BB#0:
3463; X32-NEXT: subsd %xmm1, %xmm0
3464; X32-NEXT: retl
3465;
3466; X64-LABEL: test_mm_sub_sd:
3467; X64: # BB#0:
3468; X64-NEXT: subsd %xmm1, %xmm0
3469; X64-NEXT: retq
3470 %ext0 = extractelement <2 x double> %a0, i32 0
3471 %ext1 = extractelement <2 x double> %a1, i32 0
3472 %fsub = fsub double %ext0, %ext1
3473 %res = insertelement <2 x double> %a0, double %fsub, i32 0
3474 ret <2 x double> %res
3475}
3476
3477define <2 x i64> @test_mm_subs_epi8(<2 x i64> %a0, <2 x i64> %a1) nounwind {
3478; X32-LABEL: test_mm_subs_epi8:
3479; X32: # BB#0:
3480; X32-NEXT: psubsb %xmm1, %xmm0
3481; X32-NEXT: retl
3482;
3483; X64-LABEL: test_mm_subs_epi8:
3484; X64: # BB#0:
3485; X64-NEXT: psubsb %xmm1, %xmm0
3486; X64-NEXT: retq
3487 %arg0 = bitcast <2 x i64> %a0 to <16 x i8>
3488 %arg1 = bitcast <2 x i64> %a1 to <16 x i8>
3489 %res = call <16 x i8> @llvm.x86.sse2.psubs.b(<16 x i8> %arg0, <16 x i8> %arg1)
3490 %bc = bitcast <16 x i8> %res to <2 x i64>
3491 ret <2 x i64> %bc
3492}
3493declare <16 x i8> @llvm.x86.sse2.psubs.b(<16 x i8>, <16 x i8>) nounwind readnone
3494
3495define <2 x i64> @test_mm_subs_epi16(<2 x i64> %a0, <2 x i64> %a1) nounwind {
3496; X32-LABEL: test_mm_subs_epi16:
3497; X32: # BB#0:
3498; X32-NEXT: psubsw %xmm1, %xmm0
3499; X32-NEXT: retl
3500;
3501; X64-LABEL: test_mm_subs_epi16:
3502; X64: # BB#0:
3503; X64-NEXT: psubsw %xmm1, %xmm0
3504; X64-NEXT: retq
3505 %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
3506 %arg1 = bitcast <2 x i64> %a1 to <8 x i16>
3507 %res = call <8 x i16> @llvm.x86.sse2.psubs.w(<8 x i16> %arg0, <8 x i16> %arg1)
3508 %bc = bitcast <8 x i16> %res to <2 x i64>
3509 ret <2 x i64> %bc
3510}
3511declare <8 x i16> @llvm.x86.sse2.psubs.w(<8 x i16>, <8 x i16>) nounwind readnone
3512
3513define <2 x i64> @test_mm_subs_epu8(<2 x i64> %a0, <2 x i64> %a1) nounwind {
3514; X32-LABEL: test_mm_subs_epu8:
3515; X32: # BB#0:
3516; X32-NEXT: psubusb %xmm1, %xmm0
3517; X32-NEXT: retl
3518;
3519; X64-LABEL: test_mm_subs_epu8:
3520; X64: # BB#0:
3521; X64-NEXT: psubusb %xmm1, %xmm0
3522; X64-NEXT: retq
3523 %arg0 = bitcast <2 x i64> %a0 to <16 x i8>
3524 %arg1 = bitcast <2 x i64> %a1 to <16 x i8>
3525 %res = call <16 x i8> @llvm.x86.sse2.psubus.b(<16 x i8> %arg0, <16 x i8> %arg1)
3526 %bc = bitcast <16 x i8> %res to <2 x i64>
3527 ret <2 x i64> %bc
3528}
3529declare <16 x i8> @llvm.x86.sse2.psubus.b(<16 x i8>, <16 x i8>) nounwind readnone
3530
3531define <2 x i64> @test_mm_subs_epu16(<2 x i64> %a0, <2 x i64> %a1) nounwind {
3532; X32-LABEL: test_mm_subs_epu16:
3533; X32: # BB#0:
3534; X32-NEXT: psubusw %xmm1, %xmm0
3535; X32-NEXT: retl
3536;
3537; X64-LABEL: test_mm_subs_epu16:
3538; X64: # BB#0:
3539; X64-NEXT: psubusw %xmm1, %xmm0
3540; X64-NEXT: retq
3541 %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
3542 %arg1 = bitcast <2 x i64> %a1 to <8 x i16>
3543 %res = call <8 x i16> @llvm.x86.sse2.psubus.w(<8 x i16> %arg0, <8 x i16> %arg1)
3544 %bc = bitcast <8 x i16> %res to <2 x i64>
3545 ret <2 x i64> %bc
3546}
3547declare <8 x i16> @llvm.x86.sse2.psubus.w(<8 x i16>, <8 x i16>) nounwind readnone
3548
3549define i32 @test_mm_ucomieq_sd(<2 x double> %a0, <2 x double> %a1) nounwind {
3550; X32-LABEL: test_mm_ucomieq_sd:
3551; X32: # BB#0:
3552; X32-NEXT: ucomisd %xmm1, %xmm0
3553; X32-NEXT: setnp %al
3554; X32-NEXT: sete %cl
3555; X32-NEXT: andb %al, %cl
3556; X32-NEXT: movzbl %cl, %eax
3557; X32-NEXT: retl
3558;
3559; X64-LABEL: test_mm_ucomieq_sd:
3560; X64: # BB#0:
3561; X64-NEXT: ucomisd %xmm1, %xmm0
3562; X64-NEXT: setnp %al
3563; X64-NEXT: sete %cl
3564; X64-NEXT: andb %al, %cl
3565; X64-NEXT: movzbl %cl, %eax
3566; X64-NEXT: retq
3567 %res = call i32 @llvm.x86.sse2.ucomieq.sd(<2 x double> %a0, <2 x double> %a1)
3568 ret i32 %res
3569}
3570declare i32 @llvm.x86.sse2.ucomieq.sd(<2 x double>, <2 x double>) nounwind readnone
3571
3572define i32 @test_mm_ucomige_sd(<2 x double> %a0, <2 x double> %a1) nounwind {
3573; X32-LABEL: test_mm_ucomige_sd:
3574; X32: # BB#0:
Michael Kuperstein3e3652a2016-07-07 22:50:23 +00003575; X32-NEXT: xorl %eax, %eax
Simon Pilgrim5a0d7282016-05-18 18:00:43 +00003576; X32-NEXT: ucomisd %xmm1, %xmm0
3577; X32-NEXT: setae %al
Simon Pilgrim5a0d7282016-05-18 18:00:43 +00003578; X32-NEXT: retl
3579;
3580; X64-LABEL: test_mm_ucomige_sd:
3581; X64: # BB#0:
Michael Kuperstein3e3652a2016-07-07 22:50:23 +00003582; X64-NEXT: xorl %eax, %eax
Simon Pilgrim5a0d7282016-05-18 18:00:43 +00003583; X64-NEXT: ucomisd %xmm1, %xmm0
3584; X64-NEXT: setae %al
Simon Pilgrim5a0d7282016-05-18 18:00:43 +00003585; X64-NEXT: retq
3586 %res = call i32 @llvm.x86.sse2.ucomige.sd(<2 x double> %a0, <2 x double> %a1)
3587 ret i32 %res
3588}
3589declare i32 @llvm.x86.sse2.ucomige.sd(<2 x double>, <2 x double>) nounwind readnone
3590
3591define i32 @test_mm_ucomigt_sd(<2 x double> %a0, <2 x double> %a1) nounwind {
3592; X32-LABEL: test_mm_ucomigt_sd:
3593; X32: # BB#0:
Michael Kuperstein3e3652a2016-07-07 22:50:23 +00003594; X32-NEXT: xorl %eax, %eax
Simon Pilgrim5a0d7282016-05-18 18:00:43 +00003595; X32-NEXT: ucomisd %xmm1, %xmm0
3596; X32-NEXT: seta %al
Simon Pilgrim5a0d7282016-05-18 18:00:43 +00003597; X32-NEXT: retl
3598;
3599; X64-LABEL: test_mm_ucomigt_sd:
3600; X64: # BB#0:
Michael Kuperstein3e3652a2016-07-07 22:50:23 +00003601; X64-NEXT: xorl %eax, %eax
Simon Pilgrim5a0d7282016-05-18 18:00:43 +00003602; X64-NEXT: ucomisd %xmm1, %xmm0
3603; X64-NEXT: seta %al
Simon Pilgrim5a0d7282016-05-18 18:00:43 +00003604; X64-NEXT: retq
3605 %res = call i32 @llvm.x86.sse2.ucomigt.sd(<2 x double> %a0, <2 x double> %a1)
3606 ret i32 %res
3607}
3608declare i32 @llvm.x86.sse2.ucomigt.sd(<2 x double>, <2 x double>) nounwind readnone
3609
3610define i32 @test_mm_ucomile_sd(<2 x double> %a0, <2 x double> %a1) nounwind {
3611; X32-LABEL: test_mm_ucomile_sd:
3612; X32: # BB#0:
Michael Kuperstein3e3652a2016-07-07 22:50:23 +00003613; X32-NEXT: xorl %eax, %eax
Simon Pilgrim5a0d7282016-05-18 18:00:43 +00003614; X32-NEXT: ucomisd %xmm0, %xmm1
3615; X32-NEXT: setae %al
Simon Pilgrim5a0d7282016-05-18 18:00:43 +00003616; X32-NEXT: retl
3617;
3618; X64-LABEL: test_mm_ucomile_sd:
3619; X64: # BB#0:
Michael Kuperstein3e3652a2016-07-07 22:50:23 +00003620; X64-NEXT: xorl %eax, %eax
Simon Pilgrim5a0d7282016-05-18 18:00:43 +00003621; X64-NEXT: ucomisd %xmm0, %xmm1
3622; X64-NEXT: setae %al
Simon Pilgrim5a0d7282016-05-18 18:00:43 +00003623; X64-NEXT: retq
3624 %res = call i32 @llvm.x86.sse2.ucomile.sd(<2 x double> %a0, <2 x double> %a1)
3625 ret i32 %res
3626}
3627declare i32 @llvm.x86.sse2.ucomile.sd(<2 x double>, <2 x double>) nounwind readnone
3628
3629define i32 @test_mm_ucomilt_sd(<2 x double> %a0, <2 x double> %a1) nounwind {
3630; X32-LABEL: test_mm_ucomilt_sd:
3631; X32: # BB#0:
Michael Kuperstein3e3652a2016-07-07 22:50:23 +00003632; X32-NEXT: xorl %eax, %eax
Simon Pilgrim5a0d7282016-05-18 18:00:43 +00003633; X32-NEXT: ucomisd %xmm0, %xmm1
3634; X32-NEXT: seta %al
Simon Pilgrim5a0d7282016-05-18 18:00:43 +00003635; X32-NEXT: retl
3636;
3637; X64-LABEL: test_mm_ucomilt_sd:
3638; X64: # BB#0:
Michael Kuperstein3e3652a2016-07-07 22:50:23 +00003639; X64-NEXT: xorl %eax, %eax
Simon Pilgrim5a0d7282016-05-18 18:00:43 +00003640; X64-NEXT: ucomisd %xmm0, %xmm1
3641; X64-NEXT: seta %al
Simon Pilgrim5a0d7282016-05-18 18:00:43 +00003642; X64-NEXT: retq
3643 %res = call i32 @llvm.x86.sse2.ucomilt.sd(<2 x double> %a0, <2 x double> %a1)
3644 ret i32 %res
3645}
3646declare i32 @llvm.x86.sse2.ucomilt.sd(<2 x double>, <2 x double>) nounwind readnone
3647
3648define i32 @test_mm_ucomineq_sd(<2 x double> %a0, <2 x double> %a1) nounwind {
3649; X32-LABEL: test_mm_ucomineq_sd:
3650; X32: # BB#0:
3651; X32-NEXT: ucomisd %xmm1, %xmm0
3652; X32-NEXT: setp %al
3653; X32-NEXT: setne %cl
3654; X32-NEXT: orb %al, %cl
3655; X32-NEXT: movzbl %cl, %eax
3656; X32-NEXT: retl
3657;
3658; X64-LABEL: test_mm_ucomineq_sd:
3659; X64: # BB#0:
3660; X64-NEXT: ucomisd %xmm1, %xmm0
3661; X64-NEXT: setp %al
3662; X64-NEXT: setne %cl
3663; X64-NEXT: orb %al, %cl
3664; X64-NEXT: movzbl %cl, %eax
3665; X64-NEXT: retq
3666 %res = call i32 @llvm.x86.sse2.ucomineq.sd(<2 x double> %a0, <2 x double> %a1)
3667 ret i32 %res
3668}
3669declare i32 @llvm.x86.sse2.ucomineq.sd(<2 x double>, <2 x double>) nounwind readnone
3670
3671define <2 x double> @test_mm_undefined_pd() {
3672; X32-LABEL: test_mm_undefined_pd:
3673; X32: # BB#0:
3674; X32-NEXT: retl
3675;
3676; X64-LABEL: test_mm_undefined_pd:
3677; X64: # BB#0:
3678; X64-NEXT: retq
3679 ret <2 x double> undef
3680}
3681
3682define <2 x i64> @test_mm_undefined_si128() {
3683; X32-LABEL: test_mm_undefined_si128:
3684; X32: # BB#0:
3685; X32-NEXT: retl
3686;
3687; X64-LABEL: test_mm_undefined_si128:
3688; X64: # BB#0:
3689; X64-NEXT: retq
3690 ret <2 x i64> undef
3691}
3692
3693define <2 x i64> @test_mm_unpackhi_epi8(<2 x i64> %a0, <2 x i64> %a1) {
3694; X32-LABEL: test_mm_unpackhi_epi8:
3695; X32: # BB#0:
3696; X32-NEXT: punpckhbw {{.*#+}} xmm0 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15]
3697; X32-NEXT: retl
3698;
3699; X64-LABEL: test_mm_unpackhi_epi8:
3700; X64: # BB#0:
3701; X64-NEXT: punpckhbw {{.*#+}} xmm0 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15]
3702; X64-NEXT: retq
3703 %arg0 = bitcast <2 x i64> %a0 to <16 x i8>
3704 %arg1 = bitcast <2 x i64> %a1 to <16 x i8>
3705 %res = shufflevector <16 x i8> %arg0, <16 x i8> %arg1, <16 x i32> <i32 8, i32 24, i32 9, i32 25, i32 10, i32 26, i32 11, i32 27, i32 12, i32 28, i32 13, i32 29, i32 14, i32 30, i32 15, i32 31>
3706 %bc = bitcast <16 x i8> %res to <2 x i64>
3707 ret <2 x i64> %bc
3708}
3709
3710define <2 x i64> @test_mm_unpackhi_epi16(<2 x i64> %a0, <2 x i64> %a1) {
3711; X32-LABEL: test_mm_unpackhi_epi16:
3712; X32: # BB#0:
3713; X32-NEXT: punpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
3714; X32-NEXT: retl
3715;
3716; X64-LABEL: test_mm_unpackhi_epi16:
3717; X64: # BB#0:
3718; X64-NEXT: punpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
3719; X64-NEXT: retq
3720 %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
3721 %arg1 = bitcast <2 x i64> %a1 to <8 x i16>
3722 %res = shufflevector <8 x i16> %arg0, <8 x i16> %arg1, <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
3723 %bc = bitcast <8 x i16> %res to <2 x i64>
3724 ret <2 x i64> %bc
3725}
3726
3727define <2 x i64> @test_mm_unpackhi_epi32(<2 x i64> %a0, <2 x i64> %a1) {
3728; X32-LABEL: test_mm_unpackhi_epi32:
3729; X32: # BB#0:
3730; X32-NEXT: punpckhdq {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
3731; X32-NEXT: retl
3732;
3733; X64-LABEL: test_mm_unpackhi_epi32:
3734; X64: # BB#0:
3735; X64-NEXT: punpckhdq {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
3736; X64-NEXT: retq
3737 %arg0 = bitcast <2 x i64> %a0 to <4 x i32>
3738 %arg1 = bitcast <2 x i64> %a1 to <4 x i32>
3739 %res = shufflevector <4 x i32> %arg0,<4 x i32> %arg1, <4 x i32> <i32 2, i32 6, i32 3, i32 7>
3740 %bc = bitcast <4 x i32> %res to <2 x i64>
3741 ret <2 x i64> %bc
3742}
3743
3744define <2 x i64> @test_mm_unpackhi_epi64(<2 x i64> %a0, <2 x i64> %a1) {
3745; X32-LABEL: test_mm_unpackhi_epi64:
3746; X32: # BB#0:
3747; X32-NEXT: punpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1]
3748; X32-NEXT: retl
3749;
3750; X64-LABEL: test_mm_unpackhi_epi64:
3751; X64: # BB#0:
3752; X64-NEXT: punpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1]
3753; X64-NEXT: retq
3754 %res = shufflevector <2 x i64> %a0, <2 x i64> %a1, <2 x i32> <i32 1, i32 3>
3755 ret <2 x i64> %res
3756}
3757
3758define <2 x double> @test_mm_unpackhi_pd(<2 x double> %a0, <2 x double> %a1) {
3759; X32-LABEL: test_mm_unpackhi_pd:
3760; X32: # BB#0:
3761; X32-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1]
3762; X32-NEXT: retl
3763;
3764; X64-LABEL: test_mm_unpackhi_pd:
3765; X64: # BB#0:
3766; X64-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1]
3767; X64-NEXT: retq
3768 %res = shufflevector <2 x double> %a0, <2 x double> %a1, <2 x i32> <i32 1, i32 3>
3769 ret <2 x double> %res
3770}
3771
3772define <2 x i64> @test_mm_unpacklo_epi8(<2 x i64> %a0, <2 x i64> %a1) {
3773; X32-LABEL: test_mm_unpacklo_epi8:
3774; X32: # BB#0:
3775; X32-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
3776; X32-NEXT: retl
3777;
3778; X64-LABEL: test_mm_unpacklo_epi8:
3779; X64: # BB#0:
3780; X64-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
3781; X64-NEXT: retq
3782 %arg0 = bitcast <2 x i64> %a0 to <16 x i8>
3783 %arg1 = bitcast <2 x i64> %a1 to <16 x i8>
3784 %res = shufflevector <16 x i8> %arg0, <16 x i8> %arg1, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19, i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23>
3785 %bc = bitcast <16 x i8> %res to <2 x i64>
3786 ret <2 x i64> %bc
3787}
3788
3789define <2 x i64> @test_mm_unpacklo_epi16(<2 x i64> %a0, <2 x i64> %a1) {
3790; X32-LABEL: test_mm_unpacklo_epi16:
3791; X32: # BB#0:
3792; X32-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
3793; X32-NEXT: retl
3794;
3795; X64-LABEL: test_mm_unpacklo_epi16:
3796; X64: # BB#0:
3797; X64-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
3798; X64-NEXT: retq
3799 %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
3800 %arg1 = bitcast <2 x i64> %a1 to <8 x i16>
3801 %res = shufflevector <8 x i16> %arg0, <8 x i16> %arg1, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
3802 %bc = bitcast <8 x i16> %res to <2 x i64>
3803 ret <2 x i64> %bc
3804}
3805
3806define <2 x i64> @test_mm_unpacklo_epi32(<2 x i64> %a0, <2 x i64> %a1) {
3807; X32-LABEL: test_mm_unpacklo_epi32:
3808; X32: # BB#0:
3809; X32-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
3810; X32-NEXT: retl
3811;
3812; X64-LABEL: test_mm_unpacklo_epi32:
3813; X64: # BB#0:
3814; X64-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
3815; X64-NEXT: retq
3816 %arg0 = bitcast <2 x i64> %a0 to <4 x i32>
3817 %arg1 = bitcast <2 x i64> %a1 to <4 x i32>
3818 %res = shufflevector <4 x i32> %arg0,<4 x i32> %arg1, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
3819 %bc = bitcast <4 x i32> %res to <2 x i64>
3820 ret <2 x i64> %bc
3821}
3822
3823define <2 x i64> @test_mm_unpacklo_epi64(<2 x i64> %a0, <2 x i64> %a1) {
3824; X32-LABEL: test_mm_unpacklo_epi64:
3825; X32: # BB#0:
3826; X32-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
3827; X32-NEXT: retl
3828;
3829; X64-LABEL: test_mm_unpacklo_epi64:
3830; X64: # BB#0:
3831; X64-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
3832; X64-NEXT: retq
3833 %res = shufflevector <2 x i64> %a0, <2 x i64> %a1, <2 x i32> <i32 0, i32 2>
3834 ret <2 x i64> %res
3835}
3836
3837define <2 x double> @test_mm_unpacklo_pd(<2 x double> %a0, <2 x double> %a1) {
3838; X32-LABEL: test_mm_unpacklo_pd:
3839; X32: # BB#0:
3840; X32-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
3841; X32-NEXT: retl
3842;
3843; X64-LABEL: test_mm_unpacklo_pd:
3844; X64: # BB#0:
3845; X64-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
3846; X64-NEXT: retq
3847 %res = shufflevector <2 x double> %a0, <2 x double> %a1, <2 x i32> <i32 0, i32 2>
3848 ret <2 x double> %res
3849}
3850
3851define <2 x double> @test_mm_xor_pd(<2 x double> %a0, <2 x double> %a1) nounwind {
3852; X32-LABEL: test_mm_xor_pd:
3853; X32: # BB#0:
3854; X32-NEXT: xorps %xmm1, %xmm0
3855; X32-NEXT: retl
3856;
3857; X64-LABEL: test_mm_xor_pd:
3858; X64: # BB#0:
3859; X64-NEXT: xorps %xmm1, %xmm0
3860; X64-NEXT: retq
3861 %arg0 = bitcast <2 x double> %a0 to <4 x i32>
3862 %arg1 = bitcast <2 x double> %a1 to <4 x i32>
3863 %res = xor <4 x i32> %arg0, %arg1
3864 %bc = bitcast <4 x i32> %res to <2 x double>
3865 ret <2 x double> %bc
3866}
3867
3868define <2 x i64> @test_mm_xor_si128(<2 x i64> %a0, <2 x i64> %a1) nounwind {
3869; X32-LABEL: test_mm_xor_si128:
3870; X32: # BB#0:
3871; X32-NEXT: xorps %xmm1, %xmm0
3872; X32-NEXT: retl
3873;
3874; X64-LABEL: test_mm_xor_si128:
3875; X64: # BB#0:
3876; X64-NEXT: xorps %xmm1, %xmm0
3877; X64-NEXT: retq
3878 %res = xor <2 x i64> %a0, %a1
3879 ret <2 x i64> %res
3880}
3881
3882!0 = !{i32 1}
3883