blob: 803f364a8248e46eb325519c98072b2341c43c4e [file] [log] [blame]
Simon Pilgrim5a0d7282016-05-18 18:00:43 +00001; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -fast-isel -mtriple=i386-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefix=ALL --check-prefix=X32
3; RUN: llc < %s -fast-isel -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefix=ALL --check-prefix=X64
4
5; NOTE: This should use IR equivalent to what is generated by clang/test/CodeGen/sse2-builtins.c
6
7define <2 x i64> @test_mm_add_epi8(<2 x i64> %a0, <2 x i64> %a1) nounwind {
8; X32-LABEL: test_mm_add_epi8:
9; X32: # BB#0:
10; X32-NEXT: paddb %xmm1, %xmm0
11; X32-NEXT: retl
12;
13; X64-LABEL: test_mm_add_epi8:
14; X64: # BB#0:
15; X64-NEXT: paddb %xmm1, %xmm0
16; X64-NEXT: retq
17 %arg0 = bitcast <2 x i64> %a0 to <16 x i8>
18 %arg1 = bitcast <2 x i64> %a1 to <16 x i8>
19 %res = add <16 x i8> %arg0, %arg1
20 %bc = bitcast <16 x i8> %res to <2 x i64>
21 ret <2 x i64> %bc
22}
23
24define <2 x i64> @test_mm_add_epi16(<2 x i64> %a0, <2 x i64> %a1) nounwind {
25; X32-LABEL: test_mm_add_epi16:
26; X32: # BB#0:
27; X32-NEXT: paddw %xmm1, %xmm0
28; X32-NEXT: retl
29;
30; X64-LABEL: test_mm_add_epi16:
31; X64: # BB#0:
32; X64-NEXT: paddw %xmm1, %xmm0
33; X64-NEXT: retq
34 %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
35 %arg1 = bitcast <2 x i64> %a1 to <8 x i16>
36 %res = add <8 x i16> %arg0, %arg1
37 %bc = bitcast <8 x i16> %res to <2 x i64>
38 ret <2 x i64> %bc
39}
40
41define <2 x i64> @test_mm_add_epi32(<2 x i64> %a0, <2 x i64> %a1) nounwind {
42; X32-LABEL: test_mm_add_epi32:
43; X32: # BB#0:
44; X32-NEXT: paddd %xmm1, %xmm0
45; X32-NEXT: retl
46;
47; X64-LABEL: test_mm_add_epi32:
48; X64: # BB#0:
49; X64-NEXT: paddd %xmm1, %xmm0
50; X64-NEXT: retq
51 %arg0 = bitcast <2 x i64> %a0 to <4 x i32>
52 %arg1 = bitcast <2 x i64> %a1 to <4 x i32>
53 %res = add <4 x i32> %arg0, %arg1
54 %bc = bitcast <4 x i32> %res to <2 x i64>
55 ret <2 x i64> %bc
56}
57
58define <2 x i64> @test_mm_add_epi64(<2 x i64> %a0, <2 x i64> %a1) nounwind {
59; X32-LABEL: test_mm_add_epi64:
60; X32: # BB#0:
61; X32-NEXT: paddq %xmm1, %xmm0
62; X32-NEXT: retl
63;
64; X64-LABEL: test_mm_add_epi64:
65; X64: # BB#0:
66; X64-NEXT: paddq %xmm1, %xmm0
67; X64-NEXT: retq
68 %res = add <2 x i64> %a0, %a1
69 ret <2 x i64> %res
70}
71
72define <2 x double> @test_mm_add_pd(<2 x double> %a0, <2 x double> %a1) nounwind {
73; X32-LABEL: test_mm_add_pd:
74; X32: # BB#0:
75; X32-NEXT: addpd %xmm1, %xmm0
76; X32-NEXT: retl
77;
78; X64-LABEL: test_mm_add_pd:
79; X64: # BB#0:
80; X64-NEXT: addpd %xmm1, %xmm0
81; X64-NEXT: retq
82 %res = fadd <2 x double> %a0, %a1
83 ret <2 x double> %res
84}
85
86define <2 x double> @test_mm_add_sd(<2 x double> %a0, <2 x double> %a1) nounwind {
87; X32-LABEL: test_mm_add_sd:
88; X32: # BB#0:
89; X32-NEXT: addsd %xmm1, %xmm0
90; X32-NEXT: retl
91;
92; X64-LABEL: test_mm_add_sd:
93; X64: # BB#0:
94; X64-NEXT: addsd %xmm1, %xmm0
95; X64-NEXT: retq
96 %ext0 = extractelement <2 x double> %a0, i32 0
97 %ext1 = extractelement <2 x double> %a1, i32 0
98 %fadd = fadd double %ext0, %ext1
99 %res = insertelement <2 x double> %a0, double %fadd, i32 0
100 ret <2 x double> %res
101}
102
103define <2 x i64> @test_mm_adds_epi8(<2 x i64> %a0, <2 x i64> %a1) nounwind {
104; X32-LABEL: test_mm_adds_epi8:
105; X32: # BB#0:
106; X32-NEXT: paddsb %xmm1, %xmm0
107; X32-NEXT: retl
108;
109; X64-LABEL: test_mm_adds_epi8:
110; X64: # BB#0:
111; X64-NEXT: paddsb %xmm1, %xmm0
112; X64-NEXT: retq
113 %arg0 = bitcast <2 x i64> %a0 to <16 x i8>
114 %arg1 = bitcast <2 x i64> %a1 to <16 x i8>
115 %res = call <16 x i8> @llvm.x86.sse2.padds.b(<16 x i8> %arg0, <16 x i8> %arg1)
116 %bc = bitcast <16 x i8> %res to <2 x i64>
117 ret <2 x i64> %bc
118}
119declare <16 x i8> @llvm.x86.sse2.padds.b(<16 x i8>, <16 x i8>) nounwind readnone
120
121define <2 x i64> @test_mm_adds_epi16(<2 x i64> %a0, <2 x i64> %a1) nounwind {
122; X32-LABEL: test_mm_adds_epi16:
123; X32: # BB#0:
124; X32-NEXT: paddsw %xmm1, %xmm0
125; X32-NEXT: retl
126;
127; X64-LABEL: test_mm_adds_epi16:
128; X64: # BB#0:
129; X64-NEXT: paddsw %xmm1, %xmm0
130; X64-NEXT: retq
131 %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
132 %arg1 = bitcast <2 x i64> %a1 to <8 x i16>
133 %res = call <8 x i16> @llvm.x86.sse2.padds.w(<8 x i16> %arg0, <8 x i16> %arg1)
134 %bc = bitcast <8 x i16> %res to <2 x i64>
135 ret <2 x i64> %bc
136}
137declare <8 x i16> @llvm.x86.sse2.padds.w(<8 x i16>, <8 x i16>) nounwind readnone
138
139define <2 x i64> @test_mm_adds_epu8(<2 x i64> %a0, <2 x i64> %a1) nounwind {
140; X32-LABEL: test_mm_adds_epu8:
141; X32: # BB#0:
142; X32-NEXT: paddusb %xmm1, %xmm0
143; X32-NEXT: retl
144;
145; X64-LABEL: test_mm_adds_epu8:
146; X64: # BB#0:
147; X64-NEXT: paddusb %xmm1, %xmm0
148; X64-NEXT: retq
149 %arg0 = bitcast <2 x i64> %a0 to <16 x i8>
150 %arg1 = bitcast <2 x i64> %a1 to <16 x i8>
151 %res = call <16 x i8> @llvm.x86.sse2.paddus.b(<16 x i8> %arg0, <16 x i8> %arg1)
152 %bc = bitcast <16 x i8> %res to <2 x i64>
153 ret <2 x i64> %bc
154}
155declare <16 x i8> @llvm.x86.sse2.paddus.b(<16 x i8>, <16 x i8>) nounwind readnone
156
157define <2 x i64> @test_mm_adds_epu16(<2 x i64> %a0, <2 x i64> %a1) nounwind {
158; X32-LABEL: test_mm_adds_epu16:
159; X32: # BB#0:
160; X32-NEXT: paddusw %xmm1, %xmm0
161; X32-NEXT: retl
162;
163; X64-LABEL: test_mm_adds_epu16:
164; X64: # BB#0:
165; X64-NEXT: paddusw %xmm1, %xmm0
166; X64-NEXT: retq
167 %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
168 %arg1 = bitcast <2 x i64> %a1 to <8 x i16>
169 %res = call <8 x i16> @llvm.x86.sse2.paddus.w(<8 x i16> %arg0, <8 x i16> %arg1)
170 %bc = bitcast <8 x i16> %res to <2 x i64>
171 ret <2 x i64> %bc
172}
173declare <8 x i16> @llvm.x86.sse2.paddus.w(<8 x i16>, <8 x i16>) nounwind readnone
174
175define <2 x double> @test_mm_and_pd(<2 x double> %a0, <2 x double> %a1) nounwind {
176; X32-LABEL: test_mm_and_pd:
177; X32: # BB#0:
178; X32-NEXT: andps %xmm1, %xmm0
179; X32-NEXT: retl
180;
181; X64-LABEL: test_mm_and_pd:
182; X64: # BB#0:
183; X64-NEXT: andps %xmm1, %xmm0
184; X64-NEXT: retq
185 %arg0 = bitcast <2 x double> %a0 to <4 x i32>
186 %arg1 = bitcast <2 x double> %a1 to <4 x i32>
187 %res = and <4 x i32> %arg0, %arg1
188 %bc = bitcast <4 x i32> %res to <2 x double>
189 ret <2 x double> %bc
190}
191
192define <2 x i64> @test_mm_and_si128(<2 x i64> %a0, <2 x i64> %a1) nounwind {
193; X32-LABEL: test_mm_and_si128:
194; X32: # BB#0:
195; X32-NEXT: andps %xmm1, %xmm0
196; X32-NEXT: retl
197;
198; X64-LABEL: test_mm_and_si128:
199; X64: # BB#0:
200; X64-NEXT: andps %xmm1, %xmm0
201; X64-NEXT: retq
202 %res = and <2 x i64> %a0, %a1
203 ret <2 x i64> %res
204}
205
206define <2 x double> @test_mm_andnot_pd(<2 x double> %a0, <2 x double> %a1) nounwind {
207; X32-LABEL: test_mm_andnot_pd:
208; X32: # BB#0:
209; X32-NEXT: andnps %xmm1, %xmm0
210; X32-NEXT: retl
211;
212; X64-LABEL: test_mm_andnot_pd:
213; X64: # BB#0:
214; X64-NEXT: andnps %xmm1, %xmm0
215; X64-NEXT: retq
216 %arg0 = bitcast <2 x double> %a0 to <4 x i32>
217 %arg1 = bitcast <2 x double> %a1 to <4 x i32>
218 %not = xor <4 x i32> %arg0, <i32 -1, i32 -1, i32 -1, i32 -1>
219 %res = and <4 x i32> %not, %arg1
220 %bc = bitcast <4 x i32> %res to <2 x double>
221 ret <2 x double> %bc
222}
223
224define <2 x i64> @test_mm_andnot_si128(<2 x i64> %a0, <2 x i64> %a1) nounwind {
225; X32-LABEL: test_mm_andnot_si128:
226; X32: # BB#0:
227; X32-NEXT: pcmpeqd %xmm2, %xmm2
228; X32-NEXT: pxor %xmm2, %xmm0
229; X32-NEXT: pand %xmm1, %xmm0
230; X32-NEXT: retl
231;
232; X64-LABEL: test_mm_andnot_si128:
233; X64: # BB#0:
234; X64-NEXT: pcmpeqd %xmm2, %xmm2
235; X64-NEXT: pxor %xmm2, %xmm0
236; X64-NEXT: pand %xmm1, %xmm0
237; X64-NEXT: retq
238 %not = xor <2 x i64> %a0, <i64 -1, i64 -1>
239 %res = and <2 x i64> %not, %a1
240 ret <2 x i64> %res
241}
242
243define <2 x i64> @test_mm_avg_epu8(<2 x i64> %a0, <2 x i64> %a1) nounwind {
244; X32-LABEL: test_mm_avg_epu8:
245; X32: # BB#0:
246; X32-NEXT: pavgb %xmm1, %xmm0
247; X32-NEXT: retl
248;
249; X64-LABEL: test_mm_avg_epu8:
250; X64: # BB#0:
251; X64-NEXT: pavgb %xmm1, %xmm0
252; X64-NEXT: retq
253 %arg0 = bitcast <2 x i64> %a0 to <16 x i8>
254 %arg1 = bitcast <2 x i64> %a1 to <16 x i8>
255 %res = call <16 x i8> @llvm.x86.sse2.pavg.b(<16 x i8> %arg0, <16 x i8> %arg1)
256 %bc = bitcast <16 x i8> %res to <2 x i64>
257 ret <2 x i64> %bc
258}
259declare <16 x i8> @llvm.x86.sse2.pavg.b(<16 x i8> %arg0, <16 x i8> %arg1) nounwind readnone
260
261define <2 x i64> @test_mm_avg_epu16(<2 x i64> %a0, <2 x i64> %a1) nounwind {
262; X32-LABEL: test_mm_avg_epu16:
263; X32: # BB#0:
264; X32-NEXT: pavgw %xmm1, %xmm0
265; X32-NEXT: retl
266;
267; X64-LABEL: test_mm_avg_epu16:
268; X64: # BB#0:
269; X64-NEXT: pavgw %xmm1, %xmm0
270; X64-NEXT: retq
271 %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
272 %arg1 = bitcast <2 x i64> %a1 to <8 x i16>
273 %res = call <8 x i16> @llvm.x86.sse2.pavg.w(<8 x i16> %arg0, <8 x i16> %arg1)
274 %bc = bitcast <8 x i16> %res to <2 x i64>
275 ret <2 x i64> %bc
276}
277declare <8 x i16> @llvm.x86.sse2.pavg.w(<8 x i16>, <8 x i16>) nounwind readnone
278
279define <2 x i64> @test_mm_bslli_si128(<2 x i64> %a0) nounwind {
280; X32-LABEL: test_mm_bslli_si128:
281; X32: # BB#0:
282; X32-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10]
283; X32-NEXT: retl
284;
285; X64-LABEL: test_mm_bslli_si128:
286; X64: # BB#0:
287; X64-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10]
288; X64-NEXT: retq
289 %arg0 = bitcast <2 x i64> %a0 to <16 x i8>
290 %res = shufflevector <16 x i8> zeroinitializer, <16 x i8> %arg0, <16 x i32> <i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26>
291 %bc = bitcast <16 x i8> %res to <2 x i64>
292 ret <2 x i64> %bc
293}
294
295define <2 x i64> @test_mm_bsrli_si128(<2 x i64> %a0) nounwind {
296; X32-LABEL: test_mm_bsrli_si128:
297; X32: # BB#0:
298; X32-NEXT: psrldq {{.*#+}} xmm0 = xmm0[5,6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero
299; X32-NEXT: retl
300;
301; X64-LABEL: test_mm_bsrli_si128:
302; X64: # BB#0:
303; X64-NEXT: psrldq {{.*#+}} xmm0 = xmm0[5,6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero
304; X64-NEXT: retq
305 %arg0 = bitcast <2 x i64> %a0 to <16 x i8>
306 %res = shufflevector <16 x i8> %arg0, <16 x i8> zeroinitializer, <16 x i32> <i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20>
307 %bc = bitcast <16 x i8> %res to <2 x i64>
308 ret <2 x i64> %bc
309}
310
Simon Pilgrim01809e02016-05-19 10:58:54 +0000311define <4 x float> @test_mm_castpd_ps(<2 x double> %a0) nounwind {
312; X32-LABEL: test_mm_castpd_ps:
313; X32: # BB#0:
314; X32-NEXT: retl
315;
316; X64-LABEL: test_mm_castpd_ps:
317; X64: # BB#0:
318; X64-NEXT: retq
319 %res = bitcast <2 x double> %a0 to <4 x float>
320 ret <4 x float> %res
321}
322
323define <2 x i64> @test_mm_castpd_si128(<2 x double> %a0) nounwind {
324; X32-LABEL: test_mm_castpd_si128:
325; X32: # BB#0:
326; X32-NEXT: retl
327;
328; X64-LABEL: test_mm_castpd_si128:
329; X64: # BB#0:
330; X64-NEXT: retq
331 %res = bitcast <2 x double> %a0 to <2 x i64>
332 ret <2 x i64> %res
333}
334
335define <2 x double> @test_mm_castps_pd(<4 x float> %a0) nounwind {
336; X32-LABEL: test_mm_castps_pd:
337; X32: # BB#0:
338; X32-NEXT: retl
339;
340; X64-LABEL: test_mm_castps_pd:
341; X64: # BB#0:
342; X64-NEXT: retq
343 %res = bitcast <4 x float> %a0 to <2 x double>
344 ret <2 x double> %res
345}
346
347define <2 x i64> @test_mm_castps_si128(<4 x float> %a0) nounwind {
348; X32-LABEL: test_mm_castps_si128:
349; X32: # BB#0:
350; X32-NEXT: retl
351;
352; X64-LABEL: test_mm_castps_si128:
353; X64: # BB#0:
354; X64-NEXT: retq
355 %res = bitcast <4 x float> %a0 to <2 x i64>
356 ret <2 x i64> %res
357}
358
359define <2 x double> @test_mm_castsi128_pd(<2 x i64> %a0) nounwind {
360; X32-LABEL: test_mm_castsi128_pd:
361; X32: # BB#0:
362; X32-NEXT: retl
363;
364; X64-LABEL: test_mm_castsi128_pd:
365; X64: # BB#0:
366; X64-NEXT: retq
367 %res = bitcast <2 x i64> %a0 to <2 x double>
368 ret <2 x double> %res
369}
370
371define <4 x float> @test_mm_castsi128_ps(<2 x i64> %a0) nounwind {
372; X32-LABEL: test_mm_castsi128_ps:
373; X32: # BB#0:
374; X32-NEXT: retl
375;
376; X64-LABEL: test_mm_castsi128_ps:
377; X64: # BB#0:
378; X64-NEXT: retq
379 %res = bitcast <2 x i64> %a0 to <4 x float>
380 ret <4 x float> %res
381}
382
Simon Pilgrim5a0d7282016-05-18 18:00:43 +0000383define void @test_mm_clflush(i8* %a0) nounwind {
384; X32-LABEL: test_mm_clflush:
385; X32: # BB#0:
386; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
387; X32-NEXT: clflush (%eax)
388; X32-NEXT: retl
389;
390; X64-LABEL: test_mm_clflush:
391; X64: # BB#0:
392; X64-NEXT: clflush (%rdi)
393; X64-NEXT: retq
394 call void @llvm.x86.sse2.clflush(i8* %a0)
395 ret void
396}
397declare void @llvm.x86.sse2.clflush(i8*) nounwind readnone
398
399define <2 x i64> @test_mm_cmpeq_epi8(<2 x i64> %a0, <2 x i64> %a1) nounwind {
400; X32-LABEL: test_mm_cmpeq_epi8:
401; X32: # BB#0:
402; X32-NEXT: pcmpeqb %xmm1, %xmm0
403; X32-NEXT: retl
404;
405; X64-LABEL: test_mm_cmpeq_epi8:
406; X64: # BB#0:
407; X64-NEXT: pcmpeqb %xmm1, %xmm0
408; X64-NEXT: retq
409 %arg0 = bitcast <2 x i64> %a0 to <16 x i8>
410 %arg1 = bitcast <2 x i64> %a1 to <16 x i8>
411 %cmp = icmp eq <16 x i8> %arg0, %arg1
412 %res = sext <16 x i1> %cmp to <16 x i8>
413 %bc = bitcast <16 x i8> %res to <2 x i64>
414 ret <2 x i64> %bc
415}
416
417define <2 x i64> @test_mm_cmpeq_epi16(<2 x i64> %a0, <2 x i64> %a1) nounwind {
418; X32-LABEL: test_mm_cmpeq_epi16:
419; X32: # BB#0:
420; X32-NEXT: pcmpeqw %xmm1, %xmm0
421; X32-NEXT: retl
422;
423; X64-LABEL: test_mm_cmpeq_epi16:
424; X64: # BB#0:
425; X64-NEXT: pcmpeqw %xmm1, %xmm0
426; X64-NEXT: retq
427 %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
428 %arg1 = bitcast <2 x i64> %a1 to <8 x i16>
429 %cmp = icmp eq <8 x i16> %arg0, %arg1
430 %res = sext <8 x i1> %cmp to <8 x i16>
431 %bc = bitcast <8 x i16> %res to <2 x i64>
432 ret <2 x i64> %bc
433}
434
435define <2 x i64> @test_mm_cmpeq_epi32(<2 x i64> %a0, <2 x i64> %a1) nounwind {
436; X32-LABEL: test_mm_cmpeq_epi32:
437; X32: # BB#0:
438; X32-NEXT: pcmpeqd %xmm1, %xmm0
439; X32-NEXT: retl
440;
441; X64-LABEL: test_mm_cmpeq_epi32:
442; X64: # BB#0:
443; X64-NEXT: pcmpeqd %xmm1, %xmm0
444; X64-NEXT: retq
445 %arg0 = bitcast <2 x i64> %a0 to <4 x i32>
446 %arg1 = bitcast <2 x i64> %a1 to <4 x i32>
447 %cmp = icmp eq <4 x i32> %arg0, %arg1
448 %res = sext <4 x i1> %cmp to <4 x i32>
449 %bc = bitcast <4 x i32> %res to <2 x i64>
450 ret <2 x i64> %bc
451}
452
453define <2 x double> @test_mm_cmpeq_pd(<2 x double> %a0, <2 x double> %a1) nounwind {
454; X32-LABEL: test_mm_cmpeq_pd:
455; X32: # BB#0:
456; X32-NEXT: cmpeqpd %xmm1, %xmm0
457; X32-NEXT: retl
458;
459; X64-LABEL: test_mm_cmpeq_pd:
460; X64: # BB#0:
461; X64-NEXT: cmpeqpd %xmm1, %xmm0
462; X64-NEXT: retq
Sanjay Patel74b40bd2016-06-15 21:22:15 +0000463 %fcmp = fcmp oeq <2 x double> %a0, %a1
464 %sext = sext <2 x i1> %fcmp to <2 x i64>
465 %res = bitcast <2 x i64> %sext to <2 x double>
Simon Pilgrim5a0d7282016-05-18 18:00:43 +0000466 ret <2 x double> %res
467}
Simon Pilgrim5a0d7282016-05-18 18:00:43 +0000468
469define <2 x double> @test_mm_cmpeq_sd(<2 x double> %a0, <2 x double> %a1) nounwind {
470; X32-LABEL: test_mm_cmpeq_sd:
471; X32: # BB#0:
472; X32-NEXT: cmpeqsd %xmm1, %xmm0
473; X32-NEXT: retl
474;
475; X64-LABEL: test_mm_cmpeq_sd:
476; X64: # BB#0:
477; X64-NEXT: cmpeqsd %xmm1, %xmm0
478; X64-NEXT: retq
479 %res = call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %a0, <2 x double> %a1, i8 0)
480 ret <2 x double> %res
481}
482declare <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double>, <2 x double>, i8) nounwind readnone
483
484define <2 x double> @test_mm_cmpge_pd(<2 x double> %a0, <2 x double> %a1) nounwind {
485; X32-LABEL: test_mm_cmpge_pd:
486; X32: # BB#0:
487; X32-NEXT: cmplepd %xmm0, %xmm1
488; X32-NEXT: movapd %xmm1, %xmm0
489; X32-NEXT: retl
490;
491; X64-LABEL: test_mm_cmpge_pd:
492; X64: # BB#0:
493; X64-NEXT: cmplepd %xmm0, %xmm1
494; X64-NEXT: movapd %xmm1, %xmm0
495; X64-NEXT: retq
Sanjay Patel74b40bd2016-06-15 21:22:15 +0000496 %fcmp = fcmp ole <2 x double> %a1, %a0
497 %sext = sext <2 x i1> %fcmp to <2 x i64>
498 %res = bitcast <2 x i64> %sext to <2 x double>
Simon Pilgrim5a0d7282016-05-18 18:00:43 +0000499 ret <2 x double> %res
500}
501
502define <2 x double> @test_mm_cmpge_sd(<2 x double> %a0, <2 x double> %a1) nounwind {
503; X32-LABEL: test_mm_cmpge_sd:
504; X32: # BB#0:
505; X32-NEXT: cmplesd %xmm0, %xmm1
506; X32-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
507; X32-NEXT: retl
508;
509; X64-LABEL: test_mm_cmpge_sd:
510; X64: # BB#0:
511; X64-NEXT: cmplesd %xmm0, %xmm1
512; X64-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
513; X64-NEXT: retq
514 %cmp = call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %a1, <2 x double> %a0, i8 2)
515 %ext0 = extractelement <2 x double> %cmp, i32 0
516 %ins0 = insertelement <2 x double> undef, double %ext0, i32 0
517 %ext1 = extractelement <2 x double> %a0, i32 1
518 %ins1 = insertelement <2 x double> %ins0, double %ext1, i32 1
519 ret <2 x double> %ins1
520}
521
522define <2 x i64> @test_mm_cmpgt_epi8(<2 x i64> %a0, <2 x i64> %a1) nounwind {
523; X32-LABEL: test_mm_cmpgt_epi8:
524; X32: # BB#0:
525; X32-NEXT: pcmpgtb %xmm1, %xmm0
526; X32-NEXT: retl
527;
528; X64-LABEL: test_mm_cmpgt_epi8:
529; X64: # BB#0:
530; X64-NEXT: pcmpgtb %xmm1, %xmm0
531; X64-NEXT: retq
532 %arg0 = bitcast <2 x i64> %a0 to <16 x i8>
533 %arg1 = bitcast <2 x i64> %a1 to <16 x i8>
534 %cmp = icmp sgt <16 x i8> %arg0, %arg1
535 %res = sext <16 x i1> %cmp to <16 x i8>
536 %bc = bitcast <16 x i8> %res to <2 x i64>
537 ret <2 x i64> %bc
538}
539
540define <2 x i64> @test_mm_cmpgt_epi16(<2 x i64> %a0, <2 x i64> %a1) nounwind {
541; X32-LABEL: test_mm_cmpgt_epi16:
542; X32: # BB#0:
543; X32-NEXT: pcmpgtw %xmm1, %xmm0
544; X32-NEXT: retl
545;
546; X64-LABEL: test_mm_cmpgt_epi16:
547; X64: # BB#0:
548; X64-NEXT: pcmpgtw %xmm1, %xmm0
549; X64-NEXT: retq
550 %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
551 %arg1 = bitcast <2 x i64> %a1 to <8 x i16>
552 %cmp = icmp sgt <8 x i16> %arg0, %arg1
553 %res = sext <8 x i1> %cmp to <8 x i16>
554 %bc = bitcast <8 x i16> %res to <2 x i64>
555 ret <2 x i64> %bc
556}
557
558define <2 x i64> @test_mm_cmpgt_epi32(<2 x i64> %a0, <2 x i64> %a1) nounwind {
559; X32-LABEL: test_mm_cmpgt_epi32:
560; X32: # BB#0:
561; X32-NEXT: pcmpgtd %xmm1, %xmm0
562; X32-NEXT: retl
563;
564; X64-LABEL: test_mm_cmpgt_epi32:
565; X64: # BB#0:
566; X64-NEXT: pcmpgtd %xmm1, %xmm0
567; X64-NEXT: retq
568 %arg0 = bitcast <2 x i64> %a0 to <4 x i32>
569 %arg1 = bitcast <2 x i64> %a1 to <4 x i32>
570 %cmp = icmp sgt <4 x i32> %arg0, %arg1
571 %res = sext <4 x i1> %cmp to <4 x i32>
572 %bc = bitcast <4 x i32> %res to <2 x i64>
573 ret <2 x i64> %bc
574}
575
576define <2 x double> @test_mm_cmpgt_pd(<2 x double> %a0, <2 x double> %a1) nounwind {
577; X32-LABEL: test_mm_cmpgt_pd:
578; X32: # BB#0:
579; X32-NEXT: cmpltpd %xmm0, %xmm1
580; X32-NEXT: movapd %xmm1, %xmm0
581; X32-NEXT: retl
582;
583; X64-LABEL: test_mm_cmpgt_pd:
584; X64: # BB#0:
585; X64-NEXT: cmpltpd %xmm0, %xmm1
586; X64-NEXT: movapd %xmm1, %xmm0
587; X64-NEXT: retq
Sanjay Patel74b40bd2016-06-15 21:22:15 +0000588 %fcmp = fcmp olt <2 x double> %a1, %a0
589 %sext = sext <2 x i1> %fcmp to <2 x i64>
590 %res = bitcast <2 x i64> %sext to <2 x double>
Simon Pilgrim5a0d7282016-05-18 18:00:43 +0000591 ret <2 x double> %res
592}
593
594define <2 x double> @test_mm_cmpgt_sd(<2 x double> %a0, <2 x double> %a1) nounwind {
595; X32-LABEL: test_mm_cmpgt_sd:
596; X32: # BB#0:
597; X32-NEXT: cmpltsd %xmm0, %xmm1
598; X32-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
599; X32-NEXT: retl
600;
601; X64-LABEL: test_mm_cmpgt_sd:
602; X64: # BB#0:
603; X64-NEXT: cmpltsd %xmm0, %xmm1
604; X64-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
605; X64-NEXT: retq
606 %cmp = call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %a1, <2 x double> %a0, i8 1)
607 %ext0 = extractelement <2 x double> %cmp, i32 0
608 %ins0 = insertelement <2 x double> undef, double %ext0, i32 0
609 %ext1 = extractelement <2 x double> %a0, i32 1
610 %ins1 = insertelement <2 x double> %ins0, double %ext1, i32 1
611 ret <2 x double> %ins1
612}
613
614define <2 x double> @test_mm_cmple_pd(<2 x double> %a0, <2 x double> %a1) nounwind {
615; X32-LABEL: test_mm_cmple_pd:
616; X32: # BB#0:
617; X32-NEXT: cmplepd %xmm1, %xmm0
618; X32-NEXT: retl
619;
620; X64-LABEL: test_mm_cmple_pd:
621; X64: # BB#0:
622; X64-NEXT: cmplepd %xmm1, %xmm0
623; X64-NEXT: retq
Sanjay Patel74b40bd2016-06-15 21:22:15 +0000624 %fcmp = fcmp ole <2 x double> %a0, %a1
625 %sext = sext <2 x i1> %fcmp to <2 x i64>
626 %res = bitcast <2 x i64> %sext to <2 x double>
Simon Pilgrim5a0d7282016-05-18 18:00:43 +0000627 ret <2 x double> %res
628}
629
630define <2 x double> @test_mm_cmple_sd(<2 x double> %a0, <2 x double> %a1) nounwind {
631; X32-LABEL: test_mm_cmple_sd:
632; X32: # BB#0:
633; X32-NEXT: cmplesd %xmm1, %xmm0
634; X32-NEXT: retl
635;
636; X64-LABEL: test_mm_cmple_sd:
637; X64: # BB#0:
638; X64-NEXT: cmplesd %xmm1, %xmm0
639; X64-NEXT: retq
640 %res = call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %a0, <2 x double> %a1, i8 2)
641 ret <2 x double> %res
642}
643
644define <2 x i64> @test_mm_cmplt_epi8(<2 x i64> %a0, <2 x i64> %a1) nounwind {
645; X32-LABEL: test_mm_cmplt_epi8:
646; X32: # BB#0:
647; X32-NEXT: pcmpgtb %xmm0, %xmm1
648; X32-NEXT: movdqa %xmm1, %xmm0
649; X32-NEXT: retl
650;
651; X64-LABEL: test_mm_cmplt_epi8:
652; X64: # BB#0:
653; X64-NEXT: pcmpgtb %xmm0, %xmm1
654; X64-NEXT: movdqa %xmm1, %xmm0
655; X64-NEXT: retq
656 %arg0 = bitcast <2 x i64> %a0 to <16 x i8>
657 %arg1 = bitcast <2 x i64> %a1 to <16 x i8>
658 %cmp = icmp sgt <16 x i8> %arg1, %arg0
659 %res = sext <16 x i1> %cmp to <16 x i8>
660 %bc = bitcast <16 x i8> %res to <2 x i64>
661 ret <2 x i64> %bc
662}
663
664define <2 x i64> @test_mm_cmplt_epi16(<2 x i64> %a0, <2 x i64> %a1) nounwind {
665; X32-LABEL: test_mm_cmplt_epi16:
666; X32: # BB#0:
667; X32-NEXT: pcmpgtw %xmm0, %xmm1
668; X32-NEXT: movdqa %xmm1, %xmm0
669; X32-NEXT: retl
670;
671; X64-LABEL: test_mm_cmplt_epi16:
672; X64: # BB#0:
673; X64-NEXT: pcmpgtw %xmm0, %xmm1
674; X64-NEXT: movdqa %xmm1, %xmm0
675; X64-NEXT: retq
676 %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
677 %arg1 = bitcast <2 x i64> %a1 to <8 x i16>
678 %cmp = icmp sgt <8 x i16> %arg1, %arg0
679 %res = sext <8 x i1> %cmp to <8 x i16>
680 %bc = bitcast <8 x i16> %res to <2 x i64>
681 ret <2 x i64> %bc
682}
683
684define <2 x i64> @test_mm_cmplt_epi32(<2 x i64> %a0, <2 x i64> %a1) nounwind {
685; X32-LABEL: test_mm_cmplt_epi32:
686; X32: # BB#0:
687; X32-NEXT: pcmpgtd %xmm0, %xmm1
688; X32-NEXT: movdqa %xmm1, %xmm0
689; X32-NEXT: retl
690;
691; X64-LABEL: test_mm_cmplt_epi32:
692; X64: # BB#0:
693; X64-NEXT: pcmpgtd %xmm0, %xmm1
694; X64-NEXT: movdqa %xmm1, %xmm0
695; X64-NEXT: retq
696 %arg0 = bitcast <2 x i64> %a0 to <4 x i32>
697 %arg1 = bitcast <2 x i64> %a1 to <4 x i32>
698 %cmp = icmp sgt <4 x i32> %arg1, %arg0
699 %res = sext <4 x i1> %cmp to <4 x i32>
700 %bc = bitcast <4 x i32> %res to <2 x i64>
701 ret <2 x i64> %bc
702}
703
704define <2 x double> @test_mm_cmplt_pd(<2 x double> %a0, <2 x double> %a1) nounwind {
705; X32-LABEL: test_mm_cmplt_pd:
706; X32: # BB#0:
707; X32-NEXT: cmpltpd %xmm1, %xmm0
708; X32-NEXT: retl
709;
710; X64-LABEL: test_mm_cmplt_pd:
711; X64: # BB#0:
712; X64-NEXT: cmpltpd %xmm1, %xmm0
713; X64-NEXT: retq
Sanjay Patel74b40bd2016-06-15 21:22:15 +0000714 %fcmp = fcmp olt <2 x double> %a0, %a1
715 %sext = sext <2 x i1> %fcmp to <2 x i64>
716 %res = bitcast <2 x i64> %sext to <2 x double>
Simon Pilgrim5a0d7282016-05-18 18:00:43 +0000717 ret <2 x double> %res
718}
719
720define <2 x double> @test_mm_cmplt_sd(<2 x double> %a0, <2 x double> %a1) nounwind {
721; X32-LABEL: test_mm_cmplt_sd:
722; X32: # BB#0:
723; X32-NEXT: cmpltsd %xmm1, %xmm0
724; X32-NEXT: retl
725;
726; X64-LABEL: test_mm_cmplt_sd:
727; X64: # BB#0:
728; X64-NEXT: cmpltsd %xmm1, %xmm0
729; X64-NEXT: retq
730 %res = call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %a0, <2 x double> %a1, i8 1)
731 ret <2 x double> %res
732}
733
734define <2 x double> @test_mm_cmpneq_pd(<2 x double> %a0, <2 x double> %a1) nounwind {
735; X32-LABEL: test_mm_cmpneq_pd:
736; X32: # BB#0:
737; X32-NEXT: cmpneqpd %xmm1, %xmm0
738; X32-NEXT: retl
739;
740; X64-LABEL: test_mm_cmpneq_pd:
741; X64: # BB#0:
742; X64-NEXT: cmpneqpd %xmm1, %xmm0
743; X64-NEXT: retq
Sanjay Patel74b40bd2016-06-15 21:22:15 +0000744 %fcmp = fcmp une <2 x double> %a0, %a1
745 %sext = sext <2 x i1> %fcmp to <2 x i64>
746 %res = bitcast <2 x i64> %sext to <2 x double>
Simon Pilgrim5a0d7282016-05-18 18:00:43 +0000747 ret <2 x double> %res
748}
749
750define <2 x double> @test_mm_cmpneq_sd(<2 x double> %a0, <2 x double> %a1) nounwind {
751; X32-LABEL: test_mm_cmpneq_sd:
752; X32: # BB#0:
753; X32-NEXT: cmpneqsd %xmm1, %xmm0
754; X32-NEXT: retl
755;
756; X64-LABEL: test_mm_cmpneq_sd:
757; X64: # BB#0:
758; X64-NEXT: cmpneqsd %xmm1, %xmm0
759; X64-NEXT: retq
760 %res = call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %a0, <2 x double> %a1, i8 4)
761 ret <2 x double> %res
762}
763
764define <2 x double> @test_mm_cmpnge_pd(<2 x double> %a0, <2 x double> %a1) nounwind {
765; X32-LABEL: test_mm_cmpnge_pd:
766; X32: # BB#0:
767; X32-NEXT: cmpnlepd %xmm0, %xmm1
768; X32-NEXT: movapd %xmm1, %xmm0
769; X32-NEXT: retl
770;
771; X64-LABEL: test_mm_cmpnge_pd:
772; X64: # BB#0:
773; X64-NEXT: cmpnlepd %xmm0, %xmm1
774; X64-NEXT: movapd %xmm1, %xmm0
775; X64-NEXT: retq
Sanjay Patel74b40bd2016-06-15 21:22:15 +0000776 %fcmp = fcmp ugt <2 x double> %a1, %a0
777 %sext = sext <2 x i1> %fcmp to <2 x i64>
778 %res = bitcast <2 x i64> %sext to <2 x double>
Simon Pilgrim5a0d7282016-05-18 18:00:43 +0000779 ret <2 x double> %res
780}
781
782define <2 x double> @test_mm_cmpnge_sd(<2 x double> %a0, <2 x double> %a1) nounwind {
783; X32-LABEL: test_mm_cmpnge_sd:
784; X32: # BB#0:
785; X32-NEXT: cmpnlesd %xmm0, %xmm1
Simon Pilgrimb1ff2dd2016-05-19 16:49:53 +0000786; X32-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
Simon Pilgrim5a0d7282016-05-18 18:00:43 +0000787; X32-NEXT: retl
788;
789; X64-LABEL: test_mm_cmpnge_sd:
790; X64: # BB#0:
791; X64-NEXT: cmpnlesd %xmm0, %xmm1
Simon Pilgrimb1ff2dd2016-05-19 16:49:53 +0000792; X64-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
Simon Pilgrim5a0d7282016-05-18 18:00:43 +0000793; X64-NEXT: retq
Simon Pilgrimb1ff2dd2016-05-19 16:49:53 +0000794 %cmp = call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %a1, <2 x double> %a0, i8 6)
795 %ext0 = extractelement <2 x double> %cmp, i32 0
796 %ins0 = insertelement <2 x double> undef, double %ext0, i32 0
797 %ext1 = extractelement <2 x double> %a0, i32 1
798 %ins1 = insertelement <2 x double> %ins0, double %ext1, i32 1
799 ret <2 x double> %ins1
Simon Pilgrim5a0d7282016-05-18 18:00:43 +0000800}
801
802define <2 x double> @test_mm_cmpngt_pd(<2 x double> %a0, <2 x double> %a1) nounwind {
803; X32-LABEL: test_mm_cmpngt_pd:
804; X32: # BB#0:
805; X32-NEXT: cmpnltpd %xmm0, %xmm1
806; X32-NEXT: movapd %xmm1, %xmm0
807; X32-NEXT: retl
808;
809; X64-LABEL: test_mm_cmpngt_pd:
810; X64: # BB#0:
811; X64-NEXT: cmpnltpd %xmm0, %xmm1
812; X64-NEXT: movapd %xmm1, %xmm0
813; X64-NEXT: retq
Sanjay Patel74b40bd2016-06-15 21:22:15 +0000814 %fcmp = fcmp uge <2 x double> %a1, %a0
815 %sext = sext <2 x i1> %fcmp to <2 x i64>
816 %res = bitcast <2 x i64> %sext to <2 x double>
Simon Pilgrim5a0d7282016-05-18 18:00:43 +0000817 ret <2 x double> %res
818}
819
820define <2 x double> @test_mm_cmpngt_sd(<2 x double> %a0, <2 x double> %a1) nounwind {
821; X32-LABEL: test_mm_cmpngt_sd:
822; X32: # BB#0:
823; X32-NEXT: cmpnltsd %xmm0, %xmm1
Simon Pilgrimb1ff2dd2016-05-19 16:49:53 +0000824; X32-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
Simon Pilgrim5a0d7282016-05-18 18:00:43 +0000825; X32-NEXT: retl
826;
827; X64-LABEL: test_mm_cmpngt_sd:
828; X64: # BB#0:
829; X64-NEXT: cmpnltsd %xmm0, %xmm1
Simon Pilgrimb1ff2dd2016-05-19 16:49:53 +0000830; X64-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
Simon Pilgrim5a0d7282016-05-18 18:00:43 +0000831; X64-NEXT: retq
Simon Pilgrimb1ff2dd2016-05-19 16:49:53 +0000832 %cmp = call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %a1, <2 x double> %a0, i8 5)
833 %ext0 = extractelement <2 x double> %cmp, i32 0
834 %ins0 = insertelement <2 x double> undef, double %ext0, i32 0
835 %ext1 = extractelement <2 x double> %a0, i32 1
836 %ins1 = insertelement <2 x double> %ins0, double %ext1, i32 1
837 ret <2 x double> %ins1
Simon Pilgrim5a0d7282016-05-18 18:00:43 +0000838}
839
840define <2 x double> @test_mm_cmpnle_pd(<2 x double> %a0, <2 x double> %a1) nounwind {
841; X32-LABEL: test_mm_cmpnle_pd:
842; X32: # BB#0:
843; X32-NEXT: cmpnlepd %xmm1, %xmm0
844; X32-NEXT: retl
845;
846; X64-LABEL: test_mm_cmpnle_pd:
847; X64: # BB#0:
848; X64-NEXT: cmpnlepd %xmm1, %xmm0
849; X64-NEXT: retq
Sanjay Patel74b40bd2016-06-15 21:22:15 +0000850 %fcmp = fcmp ugt <2 x double> %a0, %a1
851 %sext = sext <2 x i1> %fcmp to <2 x i64>
852 %res = bitcast <2 x i64> %sext to <2 x double>
Simon Pilgrim5a0d7282016-05-18 18:00:43 +0000853 ret <2 x double> %res
854}
855
856define <2 x double> @test_mm_cmpnle_sd(<2 x double> %a0, <2 x double> %a1) nounwind {
857; X32-LABEL: test_mm_cmpnle_sd:
858; X32: # BB#0:
859; X32-NEXT: cmpnlesd %xmm1, %xmm0
860; X32-NEXT: retl
861;
862; X64-LABEL: test_mm_cmpnle_sd:
863; X64: # BB#0:
864; X64-NEXT: cmpnlesd %xmm1, %xmm0
865; X64-NEXT: retq
866 %res = call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %a0, <2 x double> %a1, i8 6)
867 ret <2 x double> %res
868}
869
870define <2 x double> @test_mm_cmpnlt_pd(<2 x double> %a0, <2 x double> %a1) nounwind {
871; X32-LABEL: test_mm_cmpnlt_pd:
872; X32: # BB#0:
873; X32-NEXT: cmpnltpd %xmm1, %xmm0
874; X32-NEXT: retl
875;
876; X64-LABEL: test_mm_cmpnlt_pd:
877; X64: # BB#0:
878; X64-NEXT: cmpnltpd %xmm1, %xmm0
879; X64-NEXT: retq
Sanjay Patel74b40bd2016-06-15 21:22:15 +0000880 %fcmp = fcmp uge <2 x double> %a0, %a1
881 %sext = sext <2 x i1> %fcmp to <2 x i64>
882 %res = bitcast <2 x i64> %sext to <2 x double>
Simon Pilgrim5a0d7282016-05-18 18:00:43 +0000883 ret <2 x double> %res
884}
885
886define <2 x double> @test_mm_cmpnlt_sd(<2 x double> %a0, <2 x double> %a1) nounwind {
887; X32-LABEL: test_mm_cmpnlt_sd:
888; X32: # BB#0:
889; X32-NEXT: cmpnltsd %xmm1, %xmm0
890; X32-NEXT: retl
891;
892; X64-LABEL: test_mm_cmpnlt_sd:
893; X64: # BB#0:
894; X64-NEXT: cmpnltsd %xmm1, %xmm0
895; X64-NEXT: retq
896 %res = call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %a0, <2 x double> %a1, i8 5)
897 ret <2 x double> %res
898}
899
900define <2 x double> @test_mm_cmpord_pd(<2 x double> %a0, <2 x double> %a1) nounwind {
901; X32-LABEL: test_mm_cmpord_pd:
902; X32: # BB#0:
903; X32-NEXT: cmpordpd %xmm1, %xmm0
904; X32-NEXT: retl
905;
906; X64-LABEL: test_mm_cmpord_pd:
907; X64: # BB#0:
908; X64-NEXT: cmpordpd %xmm1, %xmm0
909; X64-NEXT: retq
Sanjay Patel74b40bd2016-06-15 21:22:15 +0000910 %fcmp = fcmp ord <2 x double> %a0, %a1
911 %sext = sext <2 x i1> %fcmp to <2 x i64>
912 %res = bitcast <2 x i64> %sext to <2 x double>
Simon Pilgrim5a0d7282016-05-18 18:00:43 +0000913 ret <2 x double> %res
914}
915
916define <2 x double> @test_mm_cmpord_sd(<2 x double> %a0, <2 x double> %a1) nounwind {
917; X32-LABEL: test_mm_cmpord_sd:
918; X32: # BB#0:
919; X32-NEXT: cmpordsd %xmm1, %xmm0
920; X32-NEXT: retl
921;
922; X64-LABEL: test_mm_cmpord_sd:
923; X64: # BB#0:
924; X64-NEXT: cmpordsd %xmm1, %xmm0
925; X64-NEXT: retq
926 %res = call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %a0, <2 x double> %a1, i8 7)
927 ret <2 x double> %res
928}
929
930define <2 x double> @test_mm_cmpunord_pd(<2 x double> %a0, <2 x double> %a1) nounwind {
931; X32-LABEL: test_mm_cmpunord_pd:
932; X32: # BB#0:
933; X32-NEXT: cmpunordpd %xmm1, %xmm0
934; X32-NEXT: retl
935;
936; X64-LABEL: test_mm_cmpunord_pd:
937; X64: # BB#0:
938; X64-NEXT: cmpunordpd %xmm1, %xmm0
939; X64-NEXT: retq
Sanjay Patel74b40bd2016-06-15 21:22:15 +0000940 %fcmp = fcmp uno <2 x double> %a0, %a1
941 %sext = sext <2 x i1> %fcmp to <2 x i64>
942 %res = bitcast <2 x i64> %sext to <2 x double>
Simon Pilgrim5a0d7282016-05-18 18:00:43 +0000943 ret <2 x double> %res
944}
945
946define <2 x double> @test_mm_cmpunord_sd(<2 x double> %a0, <2 x double> %a1) nounwind {
947; X32-LABEL: test_mm_cmpunord_sd:
948; X32: # BB#0:
949; X32-NEXT: cmpunordsd %xmm1, %xmm0
950; X32-NEXT: retl
951;
952; X64-LABEL: test_mm_cmpunord_sd:
953; X64: # BB#0:
954; X64-NEXT: cmpunordsd %xmm1, %xmm0
955; X64-NEXT: retq
956 %res = call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %a0, <2 x double> %a1, i8 3)
957 ret <2 x double> %res
958}
959
960define i32 @test_mm_comieq_sd(<2 x double> %a0, <2 x double> %a1) nounwind {
961; X32-LABEL: test_mm_comieq_sd:
962; X32: # BB#0:
963; X32-NEXT: comisd %xmm1, %xmm0
964; X32-NEXT: setnp %al
965; X32-NEXT: sete %cl
966; X32-NEXT: andb %al, %cl
967; X32-NEXT: movzbl %cl, %eax
968; X32-NEXT: retl
969;
970; X64-LABEL: test_mm_comieq_sd:
971; X64: # BB#0:
972; X64-NEXT: comisd %xmm1, %xmm0
973; X64-NEXT: setnp %al
974; X64-NEXT: sete %cl
975; X64-NEXT: andb %al, %cl
976; X64-NEXT: movzbl %cl, %eax
977; X64-NEXT: retq
978 %res = call i32 @llvm.x86.sse2.comieq.sd(<2 x double> %a0, <2 x double> %a1)
979 ret i32 %res
980}
981declare i32 @llvm.x86.sse2.comieq.sd(<2 x double>, <2 x double>) nounwind readnone
982
983define i32 @test_mm_comige_sd(<2 x double> %a0, <2 x double> %a1) nounwind {
984; X32-LABEL: test_mm_comige_sd:
985; X32: # BB#0:
Michael Kuperstein3e3652a2016-07-07 22:50:23 +0000986; X32-NEXT: xorl %eax, %eax
Simon Pilgrim5a0d7282016-05-18 18:00:43 +0000987; X32-NEXT: comisd %xmm1, %xmm0
988; X32-NEXT: setae %al
Simon Pilgrim5a0d7282016-05-18 18:00:43 +0000989; X32-NEXT: retl
990;
991; X64-LABEL: test_mm_comige_sd:
992; X64: # BB#0:
Michael Kuperstein3e3652a2016-07-07 22:50:23 +0000993; X64-NEXT: xorl %eax, %eax
Simon Pilgrim5a0d7282016-05-18 18:00:43 +0000994; X64-NEXT: comisd %xmm1, %xmm0
995; X64-NEXT: setae %al
Simon Pilgrim5a0d7282016-05-18 18:00:43 +0000996; X64-NEXT: retq
997 %res = call i32 @llvm.x86.sse2.comige.sd(<2 x double> %a0, <2 x double> %a1)
998 ret i32 %res
999}
1000declare i32 @llvm.x86.sse2.comige.sd(<2 x double>, <2 x double>) nounwind readnone
1001
1002define i32 @test_mm_comigt_sd(<2 x double> %a0, <2 x double> %a1) nounwind {
1003; X32-LABEL: test_mm_comigt_sd:
1004; X32: # BB#0:
Michael Kuperstein3e3652a2016-07-07 22:50:23 +00001005; X32-NEXT: xorl %eax, %eax
Simon Pilgrim5a0d7282016-05-18 18:00:43 +00001006; X32-NEXT: comisd %xmm1, %xmm0
1007; X32-NEXT: seta %al
Simon Pilgrim5a0d7282016-05-18 18:00:43 +00001008; X32-NEXT: retl
1009;
1010; X64-LABEL: test_mm_comigt_sd:
1011; X64: # BB#0:
Michael Kuperstein3e3652a2016-07-07 22:50:23 +00001012; X64-NEXT: xorl %eax, %eax
Simon Pilgrim5a0d7282016-05-18 18:00:43 +00001013; X64-NEXT: comisd %xmm1, %xmm0
1014; X64-NEXT: seta %al
Simon Pilgrim5a0d7282016-05-18 18:00:43 +00001015; X64-NEXT: retq
1016 %res = call i32 @llvm.x86.sse2.comigt.sd(<2 x double> %a0, <2 x double> %a1)
1017 ret i32 %res
1018}
1019declare i32 @llvm.x86.sse2.comigt.sd(<2 x double>, <2 x double>) nounwind readnone
1020
1021define i32 @test_mm_comile_sd(<2 x double> %a0, <2 x double> %a1) nounwind {
1022; X32-LABEL: test_mm_comile_sd:
1023; X32: # BB#0:
Michael Kuperstein3e3652a2016-07-07 22:50:23 +00001024; X32-NEXT: xorl %eax, %eax
Simon Pilgrim5a0d7282016-05-18 18:00:43 +00001025; X32-NEXT: comisd %xmm0, %xmm1
1026; X32-NEXT: setae %al
Simon Pilgrim5a0d7282016-05-18 18:00:43 +00001027; X32-NEXT: retl
1028;
1029; X64-LABEL: test_mm_comile_sd:
1030; X64: # BB#0:
Michael Kuperstein3e3652a2016-07-07 22:50:23 +00001031; X64-NEXT: xorl %eax, %eax
Simon Pilgrim5a0d7282016-05-18 18:00:43 +00001032; X64-NEXT: comisd %xmm0, %xmm1
1033; X64-NEXT: setae %al
Simon Pilgrim5a0d7282016-05-18 18:00:43 +00001034; X64-NEXT: retq
1035 %res = call i32 @llvm.x86.sse2.comile.sd(<2 x double> %a0, <2 x double> %a1)
1036 ret i32 %res
1037}
1038declare i32 @llvm.x86.sse2.comile.sd(<2 x double>, <2 x double>) nounwind readnone
1039
1040define i32 @test_mm_comilt_sd(<2 x double> %a0, <2 x double> %a1) nounwind {
1041; X32-LABEL: test_mm_comilt_sd:
1042; X32: # BB#0:
Michael Kuperstein3e3652a2016-07-07 22:50:23 +00001043; X32-NEXT: xorl %eax, %eax
Simon Pilgrim5a0d7282016-05-18 18:00:43 +00001044; X32-NEXT: comisd %xmm0, %xmm1
1045; X32-NEXT: seta %al
Simon Pilgrim5a0d7282016-05-18 18:00:43 +00001046; X32-NEXT: retl
1047;
1048; X64-LABEL: test_mm_comilt_sd:
1049; X64: # BB#0:
Michael Kuperstein3e3652a2016-07-07 22:50:23 +00001050; X64-NEXT: xorl %eax, %eax
Simon Pilgrim5a0d7282016-05-18 18:00:43 +00001051; X64-NEXT: comisd %xmm0, %xmm1
1052; X64-NEXT: seta %al
Simon Pilgrim5a0d7282016-05-18 18:00:43 +00001053; X64-NEXT: retq
1054 %res = call i32 @llvm.x86.sse2.comilt.sd(<2 x double> %a0, <2 x double> %a1)
1055 ret i32 %res
1056}
1057declare i32 @llvm.x86.sse2.comilt.sd(<2 x double>, <2 x double>) nounwind readnone
1058
1059define i32 @test_mm_comineq_sd(<2 x double> %a0, <2 x double> %a1) nounwind {
1060; X32-LABEL: test_mm_comineq_sd:
1061; X32: # BB#0:
1062; X32-NEXT: comisd %xmm1, %xmm0
1063; X32-NEXT: setp %al
1064; X32-NEXT: setne %cl
1065; X32-NEXT: orb %al, %cl
1066; X32-NEXT: movzbl %cl, %eax
1067; X32-NEXT: retl
1068;
1069; X64-LABEL: test_mm_comineq_sd:
1070; X64: # BB#0:
1071; X64-NEXT: comisd %xmm1, %xmm0
1072; X64-NEXT: setp %al
1073; X64-NEXT: setne %cl
1074; X64-NEXT: orb %al, %cl
1075; X64-NEXT: movzbl %cl, %eax
1076; X64-NEXT: retq
1077 %res = call i32 @llvm.x86.sse2.comineq.sd(<2 x double> %a0, <2 x double> %a1)
1078 ret i32 %res
1079}
1080declare i32 @llvm.x86.sse2.comineq.sd(<2 x double>, <2 x double>) nounwind readnone
1081
1082define <2 x double> @test_mm_cvtepi32_pd(<2 x i64> %a0) nounwind {
1083; X32-LABEL: test_mm_cvtepi32_pd:
1084; X32: # BB#0:
1085; X32-NEXT: cvtdq2pd %xmm0, %xmm0
1086; X32-NEXT: retl
1087;
1088; X64-LABEL: test_mm_cvtepi32_pd:
1089; X64: # BB#0:
1090; X64-NEXT: cvtdq2pd %xmm0, %xmm0
1091; X64-NEXT: retq
1092 %arg0 = bitcast <2 x i64> %a0 to <4 x i32>
Simon Pilgrim8a5ff3c2016-05-23 22:17:36 +00001093 %ext = shufflevector <4 x i32> %arg0, <4 x i32> %arg0, <2 x i32> <i32 0, i32 1>
1094 %res = sitofp <2 x i32> %ext to <2 x double>
Simon Pilgrim5a0d7282016-05-18 18:00:43 +00001095 ret <2 x double> %res
1096}
Simon Pilgrim5a0d7282016-05-18 18:00:43 +00001097
1098define <4 x float> @test_mm_cvtepi32_ps(<2 x i64> %a0) nounwind {
1099; X32-LABEL: test_mm_cvtepi32_ps:
1100; X32: # BB#0:
1101; X32-NEXT: cvtdq2ps %xmm0, %xmm0
1102; X32-NEXT: retl
1103;
1104; X64-LABEL: test_mm_cvtepi32_ps:
1105; X64: # BB#0:
1106; X64-NEXT: cvtdq2ps %xmm0, %xmm0
1107; X64-NEXT: retq
1108 %arg0 = bitcast <2 x i64> %a0 to <4 x i32>
1109 %res = call <4 x float> @llvm.x86.sse2.cvtdq2ps(<4 x i32> %arg0)
1110 ret <4 x float> %res
1111}
1112declare <4 x float> @llvm.x86.sse2.cvtdq2ps(<4 x i32>) nounwind readnone
1113
1114define <2 x i64> @test_mm_cvtpd_epi32(<2 x double> %a0) nounwind {
1115; X32-LABEL: test_mm_cvtpd_epi32:
1116; X32: # BB#0:
1117; X32-NEXT: cvtpd2dq %xmm0, %xmm0
1118; X32-NEXT: retl
1119;
1120; X64-LABEL: test_mm_cvtpd_epi32:
1121; X64: # BB#0:
1122; X64-NEXT: cvtpd2dq %xmm0, %xmm0
1123; X64-NEXT: retq
1124 %res = call <4 x i32> @llvm.x86.sse2.cvtpd2dq(<2 x double> %a0)
1125 %bc = bitcast <4 x i32> %res to <2 x i64>
1126 ret <2 x i64> %bc
1127}
1128declare <4 x i32> @llvm.x86.sse2.cvtpd2dq(<2 x double>) nounwind readnone
1129
1130define <4 x float> @test_mm_cvtpd_ps(<2 x double> %a0) nounwind {
1131; X32-LABEL: test_mm_cvtpd_ps:
1132; X32: # BB#0:
1133; X32-NEXT: cvtpd2ps %xmm0, %xmm0
1134; X32-NEXT: retl
1135;
1136; X64-LABEL: test_mm_cvtpd_ps:
1137; X64: # BB#0:
1138; X64-NEXT: cvtpd2ps %xmm0, %xmm0
1139; X64-NEXT: retq
1140 %res = call <4 x float> @llvm.x86.sse2.cvtpd2ps(<2 x double> %a0)
1141 ret <4 x float> %res
1142}
1143declare <4 x float> @llvm.x86.sse2.cvtpd2ps(<2 x double>) nounwind readnone
1144
1145define <2 x i64> @test_mm_cvtps_epi32(<4 x float> %a0) nounwind {
1146; X32-LABEL: test_mm_cvtps_epi32:
1147; X32: # BB#0:
1148; X32-NEXT: cvtps2dq %xmm0, %xmm0
1149; X32-NEXT: retl
1150;
1151; X64-LABEL: test_mm_cvtps_epi32:
1152; X64: # BB#0:
1153; X64-NEXT: cvtps2dq %xmm0, %xmm0
1154; X64-NEXT: retq
1155 %res = call <4 x i32> @llvm.x86.sse2.cvtps2dq(<4 x float> %a0)
1156 %bc = bitcast <4 x i32> %res to <2 x i64>
1157 ret <2 x i64> %bc
1158}
1159declare <4 x i32> @llvm.x86.sse2.cvtps2dq(<4 x float>) nounwind readnone
1160
1161define <2 x double> @test_mm_cvtps_pd(<4 x float> %a0) nounwind {
1162; X32-LABEL: test_mm_cvtps_pd:
1163; X32: # BB#0:
1164; X32-NEXT: cvtps2pd %xmm0, %xmm0
1165; X32-NEXT: retl
1166;
1167; X64-LABEL: test_mm_cvtps_pd:
1168; X64: # BB#0:
1169; X64-NEXT: cvtps2pd %xmm0, %xmm0
1170; X64-NEXT: retq
Simon Pilgrim8a5ff3c2016-05-23 22:17:36 +00001171 %ext = shufflevector <4 x float> %a0, <4 x float> %a0, <2 x i32> <i32 0, i32 1>
1172 %res = fpext <2 x float> %ext to <2 x double>
Simon Pilgrim5a0d7282016-05-18 18:00:43 +00001173 ret <2 x double> %res
1174}
Simon Pilgrim5a0d7282016-05-18 18:00:43 +00001175
1176define double @test_mm_cvtsd_f64(<2 x double> %a0) nounwind {
1177; X32-LABEL: test_mm_cvtsd_f64:
1178; X32: # BB#0:
1179; X32-NEXT: pushl %ebp
1180; X32-NEXT: movl %esp, %ebp
1181; X32-NEXT: andl $-8, %esp
1182; X32-NEXT: subl $8, %esp
1183; X32-NEXT: movlps %xmm0, (%esp)
1184; X32-NEXT: fldl (%esp)
1185; X32-NEXT: movl %ebp, %esp
1186; X32-NEXT: popl %ebp
1187; X32-NEXT: retl
1188;
1189; X64-LABEL: test_mm_cvtsd_f64:
1190; X64: # BB#0:
1191; X64-NEXT: retq
1192 %res = extractelement <2 x double> %a0, i32 0
1193 ret double %res
1194}
1195
1196define i32 @test_mm_cvtsd_si32(<2 x double> %a0) nounwind {
1197; X32-LABEL: test_mm_cvtsd_si32:
1198; X32: # BB#0:
1199; X32-NEXT: cvtsd2si %xmm0, %eax
1200; X32-NEXT: retl
1201;
1202; X64-LABEL: test_mm_cvtsd_si32:
1203; X64: # BB#0:
1204; X64-NEXT: cvtsd2si %xmm0, %eax
1205; X64-NEXT: retq
1206 %res = call i32 @llvm.x86.sse2.cvtsd2si(<2 x double> %a0)
1207 ret i32 %res
1208}
1209declare i32 @llvm.x86.sse2.cvtsd2si(<2 x double>) nounwind readnone
1210
Simon Pilgrim0ea8d272016-07-19 15:07:43 +00001211define <4 x float> @test_mm_cvtsd_ss(<4 x float> %a0, <2 x double> %a1) {
1212; X32-LABEL: test_mm_cvtsd_ss:
1213; X32: # BB#0:
1214; X32-NEXT: cvtsd2ss %xmm1, %xmm0
1215; X32-NEXT: retl
1216;
1217; X64-LABEL: test_mm_cvtsd_ss:
1218; X64: # BB#0:
1219; X64-NEXT: cvtsd2ss %xmm1, %xmm0
1220; X64-NEXT: retq
1221 %res = call <4 x float> @llvm.x86.sse2.cvtsd2ss(<4 x float> %a0, <2 x double> %a1)
1222 ret <4 x float> %res
1223}
1224declare <4 x float> @llvm.x86.sse2.cvtsd2ss(<4 x float>, <2 x double>) nounwind readnone
1225
Simon Pilgrim5a0d7282016-05-18 18:00:43 +00001226define i32 @test_mm_cvtsi128_si32(<2 x i64> %a0) nounwind {
1227; X32-LABEL: test_mm_cvtsi128_si32:
1228; X32: # BB#0:
1229; X32-NEXT: movd %xmm0, %eax
1230; X32-NEXT: retl
1231;
1232; X64-LABEL: test_mm_cvtsi128_si32:
1233; X64: # BB#0:
1234; X64-NEXT: movd %xmm0, %eax
1235; X64-NEXT: retq
1236 %arg0 = bitcast <2 x i64> %a0 to <4 x i32>
1237 %res = extractelement <4 x i32> %arg0, i32 0
1238 ret i32 %res
1239}
1240
1241define <2 x double> @test_mm_cvtsi32_sd(<2 x double> %a0, i32 %a1) nounwind {
1242; X32-LABEL: test_mm_cvtsi32_sd:
1243; X32: # BB#0:
1244; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
1245; X32-NEXT: cvtsi2sdl %eax, %xmm1
1246; X32-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
1247; X32-NEXT: retl
1248;
1249; X64-LABEL: test_mm_cvtsi32_sd:
1250; X64: # BB#0:
1251; X64-NEXT: cvtsi2sdl %edi, %xmm1
1252; X64-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
1253; X64-NEXT: retq
1254 %cvt = sitofp i32 %a1 to double
1255 %res = insertelement <2 x double> %a0, double %cvt, i32 0
1256 ret <2 x double> %res
1257}
1258
1259define <2 x i64> @test_mm_cvtsi32_si128(i32 %a0) nounwind {
1260; X32-LABEL: test_mm_cvtsi32_si128:
1261; X32: # BB#0:
1262; X32-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
1263; X32-NEXT: retl
1264;
1265; X64-LABEL: test_mm_cvtsi32_si128:
1266; X64: # BB#0:
1267; X64-NEXT: movd %edi, %xmm0
1268; X64-NEXT: retq
1269 %res0 = insertelement <4 x i32> undef, i32 %a0, i32 0
1270 %res1 = insertelement <4 x i32> %res0, i32 0, i32 1
1271 %res2 = insertelement <4 x i32> %res1, i32 0, i32 2
1272 %res3 = insertelement <4 x i32> %res2, i32 0, i32 3
1273 %res = bitcast <4 x i32> %res3 to <2 x i64>
1274 ret <2 x i64> %res
1275}
1276
1277define <2 x double> @test_mm_cvtss_sd(<2 x double> %a0, <4 x float> %a1) nounwind {
1278; X32-LABEL: test_mm_cvtss_sd:
1279; X32: # BB#0:
1280; X32-NEXT: cvtss2sd %xmm1, %xmm1
1281; X32-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
1282; X32-NEXT: retl
1283;
1284; X64-LABEL: test_mm_cvtss_sd:
1285; X64: # BB#0:
1286; X64-NEXT: cvtss2sd %xmm1, %xmm1
1287; X64-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
1288; X64-NEXT: retq
1289 %ext = extractelement <4 x float> %a1, i32 0
1290 %cvt = fpext float %ext to double
1291 %res = insertelement <2 x double> %a0, double %cvt, i32 0
1292 ret <2 x double> %res
1293}
1294
1295define <2 x i64> @test_mm_cvttpd_epi32(<2 x double> %a0) nounwind {
1296; X32-LABEL: test_mm_cvttpd_epi32:
1297; X32: # BB#0:
1298; X32-NEXT: cvttpd2dq %xmm0, %xmm0
1299; X32-NEXT: retl
1300;
1301; X64-LABEL: test_mm_cvttpd_epi32:
1302; X64: # BB#0:
1303; X64-NEXT: cvttpd2dq %xmm0, %xmm0
1304; X64-NEXT: retq
1305 %res = call <4 x i32> @llvm.x86.sse2.cvttpd2dq(<2 x double> %a0)
1306 %bc = bitcast <4 x i32> %res to <2 x i64>
1307 ret <2 x i64> %bc
1308}
1309declare <4 x i32> @llvm.x86.sse2.cvttpd2dq(<2 x double>) nounwind readnone
1310
1311define <2 x i64> @test_mm_cvttps_epi32(<4 x float> %a0) nounwind {
1312; X32-LABEL: test_mm_cvttps_epi32:
1313; X32: # BB#0:
1314; X32-NEXT: cvttps2dq %xmm0, %xmm0
1315; X32-NEXT: retl
1316;
1317; X64-LABEL: test_mm_cvttps_epi32:
1318; X64: # BB#0:
1319; X64-NEXT: cvttps2dq %xmm0, %xmm0
1320; X64-NEXT: retq
Simon Pilgrim0ea8d272016-07-19 15:07:43 +00001321 %res = call <4 x i32> @llvm.x86.sse2.cvttps2dq(<4 x float> %a0)
Simon Pilgrim5a0d7282016-05-18 18:00:43 +00001322 %bc = bitcast <4 x i32> %res to <2 x i64>
1323 ret <2 x i64> %bc
1324}
Simon Pilgrim0ea8d272016-07-19 15:07:43 +00001325declare <4 x i32> @llvm.x86.sse2.cvttps2dq(<4 x float>) nounwind readnone
Simon Pilgrim5a0d7282016-05-18 18:00:43 +00001326
1327define i32 @test_mm_cvttsd_si32(<2 x double> %a0) nounwind {
1328; X32-LABEL: test_mm_cvttsd_si32:
1329; X32: # BB#0:
1330; X32-NEXT: cvttsd2si %xmm0, %eax
1331; X32-NEXT: retl
1332;
1333; X64-LABEL: test_mm_cvttsd_si32:
1334; X64: # BB#0:
1335; X64-NEXT: cvttsd2si %xmm0, %eax
1336; X64-NEXT: retq
Simon Pilgrim0ea8d272016-07-19 15:07:43 +00001337 %res = call i32 @llvm.x86.sse2.cvttsd2si(<2 x double> %a0)
Simon Pilgrim5a0d7282016-05-18 18:00:43 +00001338 ret i32 %res
1339}
Simon Pilgrim0ea8d272016-07-19 15:07:43 +00001340declare i32 @llvm.x86.sse2.cvttsd2si(<2 x double>) nounwind readnone
Simon Pilgrim5a0d7282016-05-18 18:00:43 +00001341
1342define <2 x double> @test_mm_div_pd(<2 x double> %a0, <2 x double> %a1) nounwind {
1343; X32-LABEL: test_mm_div_pd:
1344; X32: # BB#0:
1345; X32-NEXT: divpd %xmm1, %xmm0
1346; X32-NEXT: retl
1347;
1348; X64-LABEL: test_mm_div_pd:
1349; X64: # BB#0:
1350; X64-NEXT: divpd %xmm1, %xmm0
1351; X64-NEXT: retq
1352 %res = fdiv <2 x double> %a0, %a1
1353 ret <2 x double> %res
1354}
1355
1356define <2 x double> @test_mm_div_sd(<2 x double> %a0, <2 x double> %a1) nounwind {
1357; X32-LABEL: test_mm_div_sd:
1358; X32: # BB#0:
1359; X32-NEXT: divsd %xmm1, %xmm0
1360; X32-NEXT: retl
1361;
1362; X64-LABEL: test_mm_div_sd:
1363; X64: # BB#0:
1364; X64-NEXT: divsd %xmm1, %xmm0
1365; X64-NEXT: retq
1366 %ext0 = extractelement <2 x double> %a0, i32 0
1367 %ext1 = extractelement <2 x double> %a1, i32 0
1368 %fdiv = fdiv double %ext0, %ext1
1369 %res = insertelement <2 x double> %a0, double %fdiv, i32 0
1370 ret <2 x double> %res
1371}
1372
1373define i32 @test_mm_extract_epi16(<2 x i64> %a0) nounwind {
1374; X32-LABEL: test_mm_extract_epi16:
1375; X32: # BB#0:
1376; X32-NEXT: pextrw $1, %xmm0, %eax
1377; X32-NEXT: movzwl %ax, %eax
1378; X32-NEXT: retl
1379;
1380; X64-LABEL: test_mm_extract_epi16:
1381; X64: # BB#0:
1382; X64-NEXT: pextrw $1, %xmm0, %eax
1383; X64-NEXT: movzwl %ax, %eax
1384; X64-NEXT: retq
1385 %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
1386 %ext = extractelement <8 x i16> %arg0, i32 1
1387 %res = zext i16 %ext to i32
1388 ret i32 %res
1389}
1390
1391define <2 x i64> @test_mm_insert_epi16(<2 x i64> %a0, i16 %a1) nounwind {
1392; X32-LABEL: test_mm_insert_epi16:
1393; X32: # BB#0:
1394; X32-NEXT: movw {{[0-9]+}}(%esp), %ax
1395; X32-NEXT: pinsrw $1, %eax, %xmm0
1396; X32-NEXT: retl
1397;
1398; X64-LABEL: test_mm_insert_epi16:
1399; X64: # BB#0:
1400; X64-NEXT: pinsrw $1, %edi, %xmm0
1401; X64-NEXT: retq
1402 %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
1403 %res = insertelement <8 x i16> %arg0, i16 %a1,i32 1
1404 %bc = bitcast <8 x i16> %res to <2 x i64>
1405 ret <2 x i64> %bc
1406}
1407
1408define void @test_mm_lfence() nounwind {
1409; X32-LABEL: test_mm_lfence:
1410; X32: # BB#0:
1411; X32-NEXT: lfence
1412; X32-NEXT: retl
1413;
1414; X64-LABEL: test_mm_lfence:
1415; X64: # BB#0:
1416; X64-NEXT: lfence
1417; X64-NEXT: retq
1418 call void @llvm.x86.sse2.lfence()
1419 ret void
1420}
1421declare void @llvm.x86.sse2.lfence() nounwind readnone
1422
1423define <2 x double> @test_mm_load_pd(double* %a0) nounwind {
1424; X32-LABEL: test_mm_load_pd:
1425; X32: # BB#0:
1426; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
1427; X32-NEXT: movaps (%eax), %xmm0
1428; X32-NEXT: retl
1429;
1430; X64-LABEL: test_mm_load_pd:
1431; X64: # BB#0:
1432; X64-NEXT: movaps (%rdi), %xmm0
1433; X64-NEXT: retq
1434 %arg0 = bitcast double* %a0 to <2 x double>*
1435 %res = load <2 x double>, <2 x double>* %arg0, align 16
1436 ret <2 x double> %res
1437}
1438
1439define <2 x double> @test_mm_load_sd(double* %a0) nounwind {
1440; X32-LABEL: test_mm_load_sd:
1441; X32: # BB#0:
1442; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
1443; X32-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
1444; X32-NEXT: retl
1445;
1446; X64-LABEL: test_mm_load_sd:
1447; X64: # BB#0:
1448; X64-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
1449; X64-NEXT: retq
1450 %ld = load double, double* %a0, align 1
1451 %res0 = insertelement <2 x double> undef, double %ld, i32 0
1452 %res1 = insertelement <2 x double> %res0, double 0.0, i32 1
1453 ret <2 x double> %res1
1454}
1455
1456define <2 x i64> @test_mm_load_si128(<2 x i64>* %a0) nounwind {
1457; X32-LABEL: test_mm_load_si128:
1458; X32: # BB#0:
1459; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
1460; X32-NEXT: movaps (%eax), %xmm0
1461; X32-NEXT: retl
1462;
1463; X64-LABEL: test_mm_load_si128:
1464; X64: # BB#0:
1465; X64-NEXT: movaps (%rdi), %xmm0
1466; X64-NEXT: retq
1467 %res = load <2 x i64>, <2 x i64>* %a0, align 16
1468 ret <2 x i64> %res
1469}
1470
1471define <2 x double> @test_mm_load1_pd(double* %a0) nounwind {
1472; X32-LABEL: test_mm_load1_pd:
1473; X32: # BB#0:
1474; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
1475; X32-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
1476; X32-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0,0]
1477; X32-NEXT: retl
1478;
1479; X64-LABEL: test_mm_load1_pd:
1480; X64: # BB#0:
1481; X64-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
1482; X64-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0,0]
1483; X64-NEXT: retq
1484 %ld = load double, double* %a0, align 8
1485 %res0 = insertelement <2 x double> undef, double %ld, i32 0
1486 %res1 = insertelement <2 x double> %res0, double %ld, i32 1
1487 ret <2 x double> %res1
1488}
1489
1490define <2 x double> @test_mm_loadh_pd(<2 x double> %a0, double* %a1) nounwind {
1491; X32-LABEL: test_mm_loadh_pd:
1492; X32: # BB#0:
1493; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
1494; X32-NEXT: movhpd {{.*#+}} xmm0 = xmm0[0],mem[0]
1495; X32-NEXT: retl
1496;
1497; X64-LABEL: test_mm_loadh_pd:
1498; X64: # BB#0:
1499; X64-NEXT: movhpd {{.*#+}} xmm0 = xmm0[0],mem[0]
1500; X64-NEXT: retq
1501 %ld = load double, double* %a1, align 8
1502 %res = insertelement <2 x double> %a0, double %ld, i32 1
1503 ret <2 x double> %res
1504}
1505
1506define <2 x i64> @test_mm_loadl_epi64(<2 x i64> %a0, <2 x i64>* %a1) nounwind {
1507; X32-LABEL: test_mm_loadl_epi64:
1508; X32: # BB#0:
1509; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
1510; X32-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
1511; X32-NEXT: retl
1512;
1513; X64-LABEL: test_mm_loadl_epi64:
1514; X64: # BB#0:
1515; X64-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
1516; X64-NEXT: retq
1517 %bc = bitcast <2 x i64>* %a1 to i64*
1518 %ld = load i64, i64* %bc, align 1
1519 %res0 = insertelement <2 x i64> undef, i64 %ld, i32 0
1520 %res1 = insertelement <2 x i64> %res0, i64 0, i32 1
1521 ret <2 x i64> %res1
1522}
1523
1524define <2 x double> @test_mm_loadl_pd(<2 x double> %a0, double* %a1) nounwind {
1525; X32-LABEL: test_mm_loadl_pd:
1526; X32: # BB#0:
1527; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
1528; X32-NEXT: movlpd {{.*#+}} xmm0 = mem[0],xmm0[1]
1529; X32-NEXT: retl
1530;
1531; X64-LABEL: test_mm_loadl_pd:
1532; X64: # BB#0:
1533; X64-NEXT: movlpd {{.*#+}} xmm0 = mem[0],xmm0[1]
1534; X64-NEXT: retq
1535 %ld = load double, double* %a1, align 8
1536 %res = insertelement <2 x double> %a0, double %ld, i32 0
1537 ret <2 x double> %res
1538}
1539
1540define <2 x double> @test_mm_loadr_pd(double* %a0) nounwind {
1541; X32-LABEL: test_mm_loadr_pd:
1542; X32: # BB#0:
1543; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
1544; X32-NEXT: movapd (%eax), %xmm0
1545; X32-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1,0]
1546; X32-NEXT: retl
1547;
1548; X64-LABEL: test_mm_loadr_pd:
1549; X64: # BB#0:
1550; X64-NEXT: movapd (%rdi), %xmm0
1551; X64-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1,0]
1552; X64-NEXT: retq
1553 %arg0 = bitcast double* %a0 to <2 x double>*
1554 %ld = load <2 x double>, <2 x double>* %arg0, align 16
1555 %res = shufflevector <2 x double> %ld, <2 x double> undef, <2 x i32> <i32 1, i32 0>
1556 ret <2 x double> %res
1557}
1558
1559define <2 x double> @test_mm_loadu_pd(double* %a0) nounwind {
1560; X32-LABEL: test_mm_loadu_pd:
1561; X32: # BB#0:
1562; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
1563; X32-NEXT: movups (%eax), %xmm0
1564; X32-NEXT: retl
1565;
1566; X64-LABEL: test_mm_loadu_pd:
1567; X64: # BB#0:
1568; X64-NEXT: movups (%rdi), %xmm0
1569; X64-NEXT: retq
1570 %arg0 = bitcast double* %a0 to <2 x double>*
1571 %res = load <2 x double>, <2 x double>* %arg0, align 1
1572 ret <2 x double> %res
1573}
1574
1575define <2 x i64> @test_mm_loadu_si128(<2 x i64>* %a0) nounwind {
1576; X32-LABEL: test_mm_loadu_si128:
1577; X32: # BB#0:
1578; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
1579; X32-NEXT: movups (%eax), %xmm0
1580; X32-NEXT: retl
1581;
1582; X64-LABEL: test_mm_loadu_si128:
1583; X64: # BB#0:
1584; X64-NEXT: movups (%rdi), %xmm0
1585; X64-NEXT: retq
1586 %res = load <2 x i64>, <2 x i64>* %a0, align 1
1587 ret <2 x i64> %res
1588}
1589
1590define <2 x i64> @test_mm_madd_epi16(<2 x i64> %a0, <2 x i64> %a1) nounwind {
1591; X32-LABEL: test_mm_madd_epi16:
1592; X32: # BB#0:
1593; X32-NEXT: pmaddwd %xmm1, %xmm0
1594; X32-NEXT: retl
1595;
1596; X64-LABEL: test_mm_madd_epi16:
1597; X64: # BB#0:
1598; X64-NEXT: pmaddwd %xmm1, %xmm0
1599; X64-NEXT: retq
1600 %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
1601 %arg1 = bitcast <2 x i64> %a1 to <8 x i16>
1602 %res = call <4 x i32> @llvm.x86.sse2.pmadd.wd(<8 x i16> %arg0, <8 x i16> %arg1)
1603 %bc = bitcast <4 x i32> %res to <2 x i64>
1604 ret <2 x i64> %bc
1605}
1606declare <4 x i32> @llvm.x86.sse2.pmadd.wd(<8 x i16>, <8 x i16>) nounwind readnone
1607
1608define void @test_mm_maskmoveu_si128(<2 x i64> %a0, <2 x i64> %a1, i8* %a2) nounwind {
1609; X32-LABEL: test_mm_maskmoveu_si128:
1610; X32: # BB#0:
1611; X32-NEXT: pushl %edi
1612; X32-NEXT: movl {{[0-9]+}}(%esp), %edi
1613; X32-NEXT: maskmovdqu %xmm1, %xmm0
1614; X32-NEXT: popl %edi
1615; X32-NEXT: retl
1616;
1617; X64-LABEL: test_mm_maskmoveu_si128:
1618; X64: # BB#0:
1619; X64-NEXT: maskmovdqu %xmm1, %xmm0
1620; X64-NEXT: retq
1621 %arg0 = bitcast <2 x i64> %a0 to <16 x i8>
1622 %arg1 = bitcast <2 x i64> %a1 to <16 x i8>
1623 call void @llvm.x86.sse2.maskmov.dqu(<16 x i8> %arg0, <16 x i8> %arg1, i8* %a2)
1624 ret void
1625}
1626declare void @llvm.x86.sse2.maskmov.dqu(<16 x i8>, <16 x i8>, i8*) nounwind
1627
1628define <2 x i64> @test_mm_max_epi16(<2 x i64> %a0, <2 x i64> %a1) nounwind {
1629; X32-LABEL: test_mm_max_epi16:
1630; X32: # BB#0:
1631; X32-NEXT: pmaxsw %xmm1, %xmm0
1632; X32-NEXT: retl
1633;
1634; X64-LABEL: test_mm_max_epi16:
1635; X64: # BB#0:
1636; X64-NEXT: pmaxsw %xmm1, %xmm0
1637; X64-NEXT: retq
1638 %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
1639 %arg1 = bitcast <2 x i64> %a1 to <8 x i16>
Sanjay Patela6c6f092016-06-15 17:17:27 +00001640 %cmp = icmp sgt <8 x i16> %arg0, %arg1
1641 %sel = select <8 x i1> %cmp, <8 x i16> %arg0, <8 x i16> %arg1
1642 %bc = bitcast <8 x i16> %sel to <2 x i64>
Simon Pilgrim5a0d7282016-05-18 18:00:43 +00001643 ret <2 x i64> %bc
1644}
Simon Pilgrim5a0d7282016-05-18 18:00:43 +00001645
1646define <2 x i64> @test_mm_max_epu8(<2 x i64> %a0, <2 x i64> %a1) nounwind {
1647; X32-LABEL: test_mm_max_epu8:
1648; X32: # BB#0:
1649; X32-NEXT: pmaxub %xmm1, %xmm0
1650; X32-NEXT: retl
1651;
1652; X64-LABEL: test_mm_max_epu8:
1653; X64: # BB#0:
1654; X64-NEXT: pmaxub %xmm1, %xmm0
1655; X64-NEXT: retq
1656 %arg0 = bitcast <2 x i64> %a0 to <16 x i8>
1657 %arg1 = bitcast <2 x i64> %a1 to <16 x i8>
Sanjay Patela6c6f092016-06-15 17:17:27 +00001658 %cmp = icmp ugt <16 x i8> %arg0, %arg1
1659 %sel = select <16 x i1> %cmp, <16 x i8> %arg0, <16 x i8> %arg1
1660 %bc = bitcast <16 x i8> %sel to <2 x i64>
Simon Pilgrim5a0d7282016-05-18 18:00:43 +00001661 ret <2 x i64> %bc
1662}
Simon Pilgrim5a0d7282016-05-18 18:00:43 +00001663
1664define <2 x double> @test_mm_max_pd(<2 x double> %a0, <2 x double> %a1) nounwind {
1665; X32-LABEL: test_mm_max_pd:
1666; X32: # BB#0:
1667; X32-NEXT: maxpd %xmm1, %xmm0
1668; X32-NEXT: retl
1669;
1670; X64-LABEL: test_mm_max_pd:
1671; X64: # BB#0:
1672; X64-NEXT: maxpd %xmm1, %xmm0
1673; X64-NEXT: retq
1674 %res = call <2 x double> @llvm.x86.sse2.max.pd(<2 x double> %a0, <2 x double> %a1)
1675 ret <2 x double> %res
1676}
1677declare <2 x double> @llvm.x86.sse2.max.pd(<2 x double>, <2 x double>) nounwind readnone
1678
1679define <2 x double> @test_mm_max_sd(<2 x double> %a0, <2 x double> %a1) nounwind {
1680; X32-LABEL: test_mm_max_sd:
1681; X32: # BB#0:
1682; X32-NEXT: maxsd %xmm1, %xmm0
1683; X32-NEXT: retl
1684;
1685; X64-LABEL: test_mm_max_sd:
1686; X64: # BB#0:
1687; X64-NEXT: maxsd %xmm1, %xmm0
1688; X64-NEXT: retq
1689 %res = call <2 x double> @llvm.x86.sse2.max.sd(<2 x double> %a0, <2 x double> %a1)
1690 ret <2 x double> %res
1691}
1692declare <2 x double> @llvm.x86.sse2.max.sd(<2 x double>, <2 x double>) nounwind readnone
1693
1694define void @test_mm_mfence() nounwind {
1695; X32-LABEL: test_mm_mfence:
1696; X32: # BB#0:
1697; X32-NEXT: mfence
1698; X32-NEXT: retl
1699;
1700; X64-LABEL: test_mm_mfence:
1701; X64: # BB#0:
1702; X64-NEXT: mfence
1703; X64-NEXT: retq
1704 call void @llvm.x86.sse2.mfence()
1705 ret void
1706}
1707declare void @llvm.x86.sse2.mfence() nounwind readnone
1708
1709define <2 x i64> @test_mm_min_epi16(<2 x i64> %a0, <2 x i64> %a1) nounwind {
1710; X32-LABEL: test_mm_min_epi16:
1711; X32: # BB#0:
1712; X32-NEXT: pminsw %xmm1, %xmm0
1713; X32-NEXT: retl
1714;
1715; X64-LABEL: test_mm_min_epi16:
1716; X64: # BB#0:
1717; X64-NEXT: pminsw %xmm1, %xmm0
1718; X64-NEXT: retq
1719 %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
1720 %arg1 = bitcast <2 x i64> %a1 to <8 x i16>
Sanjay Patela6c6f092016-06-15 17:17:27 +00001721 %cmp = icmp slt <8 x i16> %arg0, %arg1
1722 %sel = select <8 x i1> %cmp, <8 x i16> %arg0, <8 x i16> %arg1
1723 %bc = bitcast <8 x i16> %sel to <2 x i64>
Simon Pilgrim5a0d7282016-05-18 18:00:43 +00001724 ret <2 x i64> %bc
1725}
Simon Pilgrim5a0d7282016-05-18 18:00:43 +00001726
1727define <2 x i64> @test_mm_min_epu8(<2 x i64> %a0, <2 x i64> %a1) nounwind {
1728; X32-LABEL: test_mm_min_epu8:
1729; X32: # BB#0:
1730; X32-NEXT: pminub %xmm1, %xmm0
1731; X32-NEXT: retl
1732;
1733; X64-LABEL: test_mm_min_epu8:
1734; X64: # BB#0:
1735; X64-NEXT: pminub %xmm1, %xmm0
1736; X64-NEXT: retq
1737 %arg0 = bitcast <2 x i64> %a0 to <16 x i8>
1738 %arg1 = bitcast <2 x i64> %a1 to <16 x i8>
Sanjay Patela6c6f092016-06-15 17:17:27 +00001739 %cmp = icmp ult <16 x i8> %arg0, %arg1
1740 %sel = select <16 x i1> %cmp, <16 x i8> %arg0, <16 x i8> %arg1
1741 %bc = bitcast <16 x i8> %sel to <2 x i64>
Simon Pilgrim5a0d7282016-05-18 18:00:43 +00001742 ret <2 x i64> %bc
1743}
Simon Pilgrim5a0d7282016-05-18 18:00:43 +00001744
1745define <2 x double> @test_mm_min_pd(<2 x double> %a0, <2 x double> %a1) nounwind {
1746; X32-LABEL: test_mm_min_pd:
1747; X32: # BB#0:
1748; X32-NEXT: minpd %xmm1, %xmm0
1749; X32-NEXT: retl
1750;
1751; X64-LABEL: test_mm_min_pd:
1752; X64: # BB#0:
1753; X64-NEXT: minpd %xmm1, %xmm0
1754; X64-NEXT: retq
1755 %res = call <2 x double> @llvm.x86.sse2.min.pd(<2 x double> %a0, <2 x double> %a1)
1756 ret <2 x double> %res
1757}
1758declare <2 x double> @llvm.x86.sse2.min.pd(<2 x double>, <2 x double>) nounwind readnone
1759
1760define <2 x double> @test_mm_min_sd(<2 x double> %a0, <2 x double> %a1) nounwind {
1761; X32-LABEL: test_mm_min_sd:
1762; X32: # BB#0:
1763; X32-NEXT: minsd %xmm1, %xmm0
1764; X32-NEXT: retl
1765;
1766; X64-LABEL: test_mm_min_sd:
1767; X64: # BB#0:
1768; X64-NEXT: minsd %xmm1, %xmm0
1769; X64-NEXT: retq
1770 %res = call <2 x double> @llvm.x86.sse2.min.sd(<2 x double> %a0, <2 x double> %a1)
1771 ret <2 x double> %res
1772}
1773declare <2 x double> @llvm.x86.sse2.min.sd(<2 x double>, <2 x double>) nounwind readnone
1774
Simon Pilgrim47825fa2016-05-19 11:59:57 +00001775define <2 x i64> @test_mm_move_epi64(<2 x i64> %a0) nounwind {
1776; X32-LABEL: test_mm_move_epi64:
1777; X32: # BB#0:
1778; X32-NEXT: movq {{.*#+}} xmm0 = xmm0[0],zero
1779; X32-NEXT: retl
1780;
1781; X64-LABEL: test_mm_move_epi64:
1782; X64: # BB#0:
1783; X64-NEXT: movq {{.*#+}} xmm0 = xmm0[0],zero
1784; X64-NEXT: retq
1785 %res = shufflevector <2 x i64> %a0, <2 x i64> zeroinitializer, <2 x i32> <i32 0, i32 2>
1786 ret <2 x i64> %res
1787}
1788
1789define <2 x double> @test_mm_move_sd(<2 x double> %a0, <2 x double> %a1) nounwind {
1790; X32-LABEL: test_mm_move_sd:
1791; X32: # BB#0:
1792; X32-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
1793; X32-NEXT: retl
1794;
1795; X64-LABEL: test_mm_move_sd:
1796; X64: # BB#0:
1797; X64-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
1798; X64-NEXT: retq
1799 %ext0 = extractelement <2 x double> %a1, i32 0
1800 %res0 = insertelement <2 x double> undef, double %ext0, i32 0
1801 %ext1 = extractelement <2 x double> %a0, i32 1
1802 %res1 = insertelement <2 x double> %res0, double %ext1, i32 1
1803 ret <2 x double> %res1
1804}
1805
Simon Pilgrim5a0d7282016-05-18 18:00:43 +00001806define i32 @test_mm_movemask_epi8(<2 x i64> %a0) nounwind {
1807; X32-LABEL: test_mm_movemask_epi8:
1808; X32: # BB#0:
1809; X32-NEXT: pmovmskb %xmm0, %eax
1810; X32-NEXT: retl
1811;
1812; X64-LABEL: test_mm_movemask_epi8:
1813; X64: # BB#0:
1814; X64-NEXT: pmovmskb %xmm0, %eax
1815; X64-NEXT: retq
1816 %arg0 = bitcast <2 x i64> %a0 to <16 x i8>
1817 %res = call i32 @llvm.x86.sse2.pmovmskb.128(<16 x i8> %arg0)
1818 ret i32 %res
1819}
1820declare i32 @llvm.x86.sse2.pmovmskb.128(<16 x i8>) nounwind readnone
1821
1822define i32 @test_mm_movemask_pd(<2 x double> %a0) nounwind {
1823; X32-LABEL: test_mm_movemask_pd:
1824; X32: # BB#0:
1825; X32-NEXT: movmskpd %xmm0, %eax
1826; X32-NEXT: retl
1827;
1828; X64-LABEL: test_mm_movemask_pd:
1829; X64: # BB#0:
1830; X64-NEXT: movmskpd %xmm0, %eax
1831; X64-NEXT: retq
1832 %res = call i32 @llvm.x86.sse2.movmsk.pd(<2 x double> %a0)
1833 ret i32 %res
1834}
1835declare i32 @llvm.x86.sse2.movmsk.pd(<2 x double>) nounwind readnone
1836
1837define <2 x i64> @test_mm_mul_epu32(<2 x i64> %a0, <2 x i64> %a1) {
1838; X32-LABEL: test_mm_mul_epu32:
1839; X32: # BB#0:
1840; X32-NEXT: pmuludq %xmm1, %xmm0
1841; X32-NEXT: retl
1842;
1843; X64-LABEL: test_mm_mul_epu32:
1844; X64: # BB#0:
1845; X64-NEXT: pmuludq %xmm1, %xmm0
1846; X64-NEXT: retq
1847 %arg0 = bitcast <2 x i64> %a0 to <4 x i32>
1848 %arg1 = bitcast <2 x i64> %a1 to <4 x i32>
1849 %res = call <2 x i64> @llvm.x86.sse2.pmulu.dq(<4 x i32> %arg0, <4 x i32> %arg1)
1850 ret <2 x i64> %res
1851}
1852declare <2 x i64> @llvm.x86.sse2.pmulu.dq(<4 x i32>, <4 x i32>) nounwind readnone
1853
1854define <2 x double> @test_mm_mul_pd(<2 x double> %a0, <2 x double> %a1) nounwind {
1855; X32-LABEL: test_mm_mul_pd:
1856; X32: # BB#0:
1857; X32-NEXT: mulpd %xmm1, %xmm0
1858; X32-NEXT: retl
1859;
1860; X64-LABEL: test_mm_mul_pd:
1861; X64: # BB#0:
1862; X64-NEXT: mulpd %xmm1, %xmm0
1863; X64-NEXT: retq
1864 %res = fmul <2 x double> %a0, %a1
1865 ret <2 x double> %res
1866}
1867
1868define <2 x double> @test_mm_mul_sd(<2 x double> %a0, <2 x double> %a1) nounwind {
1869; X32-LABEL: test_mm_mul_sd:
1870; X32: # BB#0:
1871; X32-NEXT: mulsd %xmm1, %xmm0
1872; X32-NEXT: retl
1873;
1874; X64-LABEL: test_mm_mul_sd:
1875; X64: # BB#0:
1876; X64-NEXT: mulsd %xmm1, %xmm0
1877; X64-NEXT: retq
1878 %ext0 = extractelement <2 x double> %a0, i32 0
1879 %ext1 = extractelement <2 x double> %a1, i32 0
1880 %fmul = fmul double %ext0, %ext1
1881 %res = insertelement <2 x double> %a0, double %fmul, i32 0
1882 ret <2 x double> %res
1883}
1884
1885define <2 x i64> @test_mm_mulhi_epi16(<2 x i64> %a0, <2 x i64> %a1) {
1886; X32-LABEL: test_mm_mulhi_epi16:
1887; X32: # BB#0:
1888; X32-NEXT: pmulhw %xmm1, %xmm0
1889; X32-NEXT: retl
1890;
1891; X64-LABEL: test_mm_mulhi_epi16:
1892; X64: # BB#0:
1893; X64-NEXT: pmulhw %xmm1, %xmm0
1894; X64-NEXT: retq
1895 %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
1896 %arg1 = bitcast <2 x i64> %a1 to <8 x i16>
1897 %res = call <8 x i16> @llvm.x86.sse2.pmulh.w(<8 x i16> %arg0, <8 x i16> %arg1)
1898 %bc = bitcast <8 x i16> %res to <2 x i64>
1899 ret <2 x i64> %bc
1900}
1901declare <8 x i16> @llvm.x86.sse2.pmulh.w(<8 x i16>, <8 x i16>) nounwind readnone
1902
1903define <2 x i64> @test_mm_mulhi_epu16(<2 x i64> %a0, <2 x i64> %a1) {
1904; X32-LABEL: test_mm_mulhi_epu16:
1905; X32: # BB#0:
1906; X32-NEXT: pmulhuw %xmm1, %xmm0
1907; X32-NEXT: retl
1908;
1909; X64-LABEL: test_mm_mulhi_epu16:
1910; X64: # BB#0:
1911; X64-NEXT: pmulhuw %xmm1, %xmm0
1912; X64-NEXT: retq
1913 %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
1914 %arg1 = bitcast <2 x i64> %a1 to <8 x i16>
1915 %res = call <8 x i16> @llvm.x86.sse2.pmulhu.w(<8 x i16> %arg0, <8 x i16> %arg1)
1916 %bc = bitcast <8 x i16> %res to <2 x i64>
1917 ret <2 x i64> %bc
1918}
1919declare <8 x i16> @llvm.x86.sse2.pmulhu.w(<8 x i16>, <8 x i16>) nounwind readnone
1920
1921define <2 x i64> @test_mm_mullo_epi16(<2 x i64> %a0, <2 x i64> %a1) {
1922; X32-LABEL: test_mm_mullo_epi16:
1923; X32: # BB#0:
1924; X32-NEXT: pmullw %xmm1, %xmm0
1925; X32-NEXT: retl
1926;
1927; X64-LABEL: test_mm_mullo_epi16:
1928; X64: # BB#0:
1929; X64-NEXT: pmullw %xmm1, %xmm0
1930; X64-NEXT: retq
1931 %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
1932 %arg1 = bitcast <2 x i64> %a1 to <8 x i16>
1933 %res = mul <8 x i16> %arg0, %arg1
1934 %bc = bitcast <8 x i16> %res to <2 x i64>
1935 ret <2 x i64> %bc
1936}
1937
1938define <2 x double> @test_mm_or_pd(<2 x double> %a0, <2 x double> %a1) nounwind {
1939; X32-LABEL: test_mm_or_pd:
1940; X32: # BB#0:
1941; X32-NEXT: orps %xmm1, %xmm0
1942; X32-NEXT: retl
1943;
1944; X64-LABEL: test_mm_or_pd:
1945; X64: # BB#0:
1946; X64-NEXT: orps %xmm1, %xmm0
1947; X64-NEXT: retq
1948 %arg0 = bitcast <2 x double> %a0 to <4 x i32>
1949 %arg1 = bitcast <2 x double> %a1 to <4 x i32>
1950 %res = or <4 x i32> %arg0, %arg1
1951 %bc = bitcast <4 x i32> %res to <2 x double>
1952 ret <2 x double> %bc
1953}
1954
1955define <2 x i64> @test_mm_or_si128(<2 x i64> %a0, <2 x i64> %a1) nounwind {
1956; X32-LABEL: test_mm_or_si128:
1957; X32: # BB#0:
1958; X32-NEXT: orps %xmm1, %xmm0
1959; X32-NEXT: retl
1960;
1961; X64-LABEL: test_mm_or_si128:
1962; X64: # BB#0:
1963; X64-NEXT: orps %xmm1, %xmm0
1964; X64-NEXT: retq
1965 %res = or <2 x i64> %a0, %a1
1966 ret <2 x i64> %res
1967}
1968
1969define <2 x i64> @test_mm_packs_epi16(<2 x i64> %a0, <2 x i64> %a1) {
1970; X32-LABEL: test_mm_packs_epi16:
1971; X32: # BB#0:
1972; X32-NEXT: packsswb %xmm1, %xmm0
1973; X32-NEXT: retl
1974;
1975; X64-LABEL: test_mm_packs_epi16:
1976; X64: # BB#0:
1977; X64-NEXT: packsswb %xmm1, %xmm0
1978; X64-NEXT: retq
1979 %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
1980 %arg1 = bitcast <2 x i64> %a1 to <8 x i16>
1981 %res = call <16 x i8> @llvm.x86.sse2.packsswb.128(<8 x i16> %arg0, <8 x i16> %arg1)
1982 %bc = bitcast <16 x i8> %res to <2 x i64>
1983 ret <2 x i64> %bc
1984}
1985declare <16 x i8> @llvm.x86.sse2.packsswb.128(<8 x i16>, <8 x i16>) nounwind readnone
1986
1987define <2 x i64> @test_mm_packs_epi32(<2 x i64> %a0, <2 x i64> %a1) {
1988; X32-LABEL: test_mm_packs_epi32:
1989; X32: # BB#0:
1990; X32-NEXT: packssdw %xmm1, %xmm0
1991; X32-NEXT: retl
1992;
1993; X64-LABEL: test_mm_packs_epi32:
1994; X64: # BB#0:
1995; X64-NEXT: packssdw %xmm1, %xmm0
1996; X64-NEXT: retq
1997 %arg0 = bitcast <2 x i64> %a0 to <4 x i32>
1998 %arg1 = bitcast <2 x i64> %a1 to <4 x i32>
1999 %res = call <8 x i16> @llvm.x86.sse2.packssdw.128(<4 x i32> %arg0, <4 x i32> %arg1)
2000 %bc = bitcast <8 x i16> %res to <2 x i64>
2001 ret <2 x i64> %bc
2002}
2003declare <8 x i16> @llvm.x86.sse2.packssdw.128(<4 x i32>, <4 x i32>) nounwind readnone
2004
2005define <2 x i64> @test_mm_packus_epi16(<2 x i64> %a0, <2 x i64> %a1) {
2006; X32-LABEL: test_mm_packus_epi16:
2007; X32: # BB#0:
2008; X32-NEXT: packuswb %xmm1, %xmm0
2009; X32-NEXT: retl
2010;
2011; X64-LABEL: test_mm_packus_epi16:
2012; X64: # BB#0:
2013; X64-NEXT: packuswb %xmm1, %xmm0
2014; X64-NEXT: retq
2015 %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
2016 %arg1 = bitcast <2 x i64> %a1 to <8 x i16>
2017 %res = call <16 x i8> @llvm.x86.sse2.packuswb.128(<8 x i16> %arg0, <8 x i16> %arg1)
2018 %bc = bitcast <16 x i8> %res to <2 x i64>
2019 ret <2 x i64> %bc
2020}
2021declare <16 x i8> @llvm.x86.sse2.packuswb.128(<8 x i16>, <8 x i16>) nounwind readnone
2022
2023define void @test_mm_pause() nounwind {
2024; X32-LABEL: test_mm_pause:
2025; X32: # BB#0:
2026; X32-NEXT: pause
2027; X32-NEXT: retl
2028;
2029; X64-LABEL: test_mm_pause:
2030; X64: # BB#0:
2031; X64-NEXT: pause
2032; X64-NEXT: retq
2033 call void @llvm.x86.sse2.pause()
2034 ret void
2035}
2036declare void @llvm.x86.sse2.pause() nounwind readnone
2037
2038define <2 x i64> @test_mm_sad_epu8(<2 x i64> %a0, <2 x i64> %a1) nounwind {
2039; X32-LABEL: test_mm_sad_epu8:
2040; X32: # BB#0:
2041; X32-NEXT: psadbw %xmm1, %xmm0
2042; X32-NEXT: retl
2043;
2044; X64-LABEL: test_mm_sad_epu8:
2045; X64: # BB#0:
2046; X64-NEXT: psadbw %xmm1, %xmm0
2047; X64-NEXT: retq
2048 %arg0 = bitcast <2 x i64> %a0 to <16 x i8>
2049 %arg1 = bitcast <2 x i64> %a1 to <16 x i8>
2050 %res = call <2 x i64> @llvm.x86.sse2.psad.bw(<16 x i8> %arg0, <16 x i8> %arg1)
2051 ret <2 x i64> %res
2052}
2053declare <2 x i64> @llvm.x86.sse2.psad.bw(<16 x i8>, <16 x i8>) nounwind readnone
2054
Simon Pilgrim01809e02016-05-19 10:58:54 +00002055define <2 x i64> @test_mm_set_epi8(i8 %a0, i8 %a1, i8 %a2, i8 %a3, i8 %a4, i8 %a5, i8 %a6, i8 %a7, i8 %a8, i8 %a9, i8 %a10, i8 %a11, i8 %a12, i8 %a13, i8 %a14, i8 %a15) nounwind {
2056; X32-LABEL: test_mm_set_epi8:
2057; X32: # BB#0:
2058; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
2059; X32-NEXT: movd %eax, %xmm0
2060; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
2061; X32-NEXT: movd %eax, %xmm1
2062; X32-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
2063; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
2064; X32-NEXT: movd %eax, %xmm0
2065; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
2066; X32-NEXT: movd %eax, %xmm2
2067; X32-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
2068; X32-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3],xmm2[4],xmm1[4],xmm2[5],xmm1[5],xmm2[6],xmm1[6],xmm2[7],xmm1[7]
2069; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
2070; X32-NEXT: movd %eax, %xmm0
2071; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
2072; X32-NEXT: movd %eax, %xmm3
2073; X32-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3],xmm3[4],xmm0[4],xmm3[5],xmm0[5],xmm3[6],xmm0[6],xmm3[7],xmm0[7]
2074; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
2075; X32-NEXT: movd %eax, %xmm0
2076; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
2077; X32-NEXT: movd %eax, %xmm1
2078; X32-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
2079; X32-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1],xmm1[2],xmm3[2],xmm1[3],xmm3[3],xmm1[4],xmm3[4],xmm1[5],xmm3[5],xmm1[6],xmm3[6],xmm1[7],xmm3[7]
2080; X32-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
2081; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
2082; X32-NEXT: movd %eax, %xmm0
2083; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
2084; X32-NEXT: movd %eax, %xmm2
2085; X32-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
2086; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
2087; X32-NEXT: movd %eax, %xmm0
2088; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
2089; X32-NEXT: movd %eax, %xmm3
2090; X32-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3],xmm3[4],xmm0[4],xmm3[5],xmm0[5],xmm3[6],xmm0[6],xmm3[7],xmm0[7]
2091; X32-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3],xmm3[4],xmm2[4],xmm3[5],xmm2[5],xmm3[6],xmm2[6],xmm3[7],xmm2[7]
2092; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
2093; X32-NEXT: movd %eax, %xmm0
2094; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
2095; X32-NEXT: movd %eax, %xmm2
2096; X32-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
2097; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
2098; X32-NEXT: movd %eax, %xmm4
2099; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
2100; X32-NEXT: movd %eax, %xmm0
2101; X32-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1],xmm0[2],xmm4[2],xmm0[3],xmm4[3],xmm0[4],xmm4[4],xmm0[5],xmm4[5],xmm0[6],xmm4[6],xmm0[7],xmm4[7]
2102; X32-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7]
2103; X32-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3],xmm0[4],xmm3[4],xmm0[5],xmm3[5],xmm0[6],xmm3[6],xmm0[7],xmm3[7]
2104; X32-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
2105; X32-NEXT: retl
2106;
2107; X64-LABEL: test_mm_set_epi8:
2108; X64: # BB#0:
2109; X64-NEXT: movzbl %dil, %eax
2110; X64-NEXT: movd %eax, %xmm0
2111; X64-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
2112; X64-NEXT: movd %eax, %xmm1
2113; X64-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
2114; X64-NEXT: movzbl %r8b, %eax
2115; X64-NEXT: movd %eax, %xmm0
2116; X64-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
2117; X64-NEXT: movd %eax, %xmm2
2118; X64-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
2119; X64-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3],xmm2[4],xmm1[4],xmm2[5],xmm1[5],xmm2[6],xmm1[6],xmm2[7],xmm1[7]
2120; X64-NEXT: movzbl %dl, %eax
2121; X64-NEXT: movd %eax, %xmm0
2122; X64-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
2123; X64-NEXT: movd %eax, %xmm3
2124; X64-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3],xmm3[4],xmm0[4],xmm3[5],xmm0[5],xmm3[6],xmm0[6],xmm3[7],xmm0[7]
2125; X64-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
2126; X64-NEXT: movd %eax, %xmm0
2127; X64-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
2128; X64-NEXT: movd %eax, %xmm1
2129; X64-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
2130; X64-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1],xmm1[2],xmm3[2],xmm1[3],xmm3[3],xmm1[4],xmm3[4],xmm1[5],xmm3[5],xmm1[6],xmm3[6],xmm1[7],xmm3[7]
2131; X64-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
2132; X64-NEXT: movzbl %sil, %eax
2133; X64-NEXT: movd %eax, %xmm0
2134; X64-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
2135; X64-NEXT: movd %eax, %xmm2
2136; X64-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
2137; X64-NEXT: movzbl %r9b, %eax
2138; X64-NEXT: movd %eax, %xmm0
2139; X64-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
2140; X64-NEXT: movd %eax, %xmm3
2141; X64-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3],xmm3[4],xmm0[4],xmm3[5],xmm0[5],xmm3[6],xmm0[6],xmm3[7],xmm0[7]
2142; X64-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3],xmm3[4],xmm2[4],xmm3[5],xmm2[5],xmm3[6],xmm2[6],xmm3[7],xmm2[7]
2143; X64-NEXT: movzbl %cl, %eax
2144; X64-NEXT: movd %eax, %xmm0
2145; X64-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
2146; X64-NEXT: movd %eax, %xmm2
2147; X64-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
2148; X64-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
2149; X64-NEXT: movd %eax, %xmm4
2150; X64-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
2151; X64-NEXT: movd %eax, %xmm0
2152; X64-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1],xmm0[2],xmm4[2],xmm0[3],xmm4[3],xmm0[4],xmm4[4],xmm0[5],xmm4[5],xmm0[6],xmm4[6],xmm0[7],xmm4[7]
2153; X64-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7]
2154; X64-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3],xmm0[4],xmm3[4],xmm0[5],xmm3[5],xmm0[6],xmm3[6],xmm0[7],xmm3[7]
2155; X64-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
2156; X64-NEXT: retq
2157 %res0 = insertelement <16 x i8> undef, i8 %a15, i32 0
2158 %res1 = insertelement <16 x i8> %res0, i8 %a14, i32 1
2159 %res2 = insertelement <16 x i8> %res1, i8 %a13, i32 2
2160 %res3 = insertelement <16 x i8> %res2, i8 %a12, i32 3
2161 %res4 = insertelement <16 x i8> %res3, i8 %a11, i32 4
2162 %res5 = insertelement <16 x i8> %res4, i8 %a10, i32 5
2163 %res6 = insertelement <16 x i8> %res5, i8 %a9 , i32 6
2164 %res7 = insertelement <16 x i8> %res6, i8 %a8 , i32 7
2165 %res8 = insertelement <16 x i8> %res7, i8 %a7 , i32 8
2166 %res9 = insertelement <16 x i8> %res8, i8 %a6 , i32 9
2167 %res10 = insertelement <16 x i8> %res9, i8 %a5 , i32 10
2168 %res11 = insertelement <16 x i8> %res10, i8 %a4 , i32 11
2169 %res12 = insertelement <16 x i8> %res11, i8 %a3 , i32 12
2170 %res13 = insertelement <16 x i8> %res12, i8 %a2 , i32 13
2171 %res14 = insertelement <16 x i8> %res13, i8 %a1 , i32 14
2172 %res15 = insertelement <16 x i8> %res14, i8 %a0 , i32 15
2173 %res = bitcast <16 x i8> %res15 to <2 x i64>
2174 ret <2 x i64> %res
2175}
2176
2177define <2 x i64> @test_mm_set_epi16(i16 %a0, i16 %a1, i16 %a2, i16 %a3, i16 %a4, i16 %a5, i16 %a6, i16 %a7) nounwind {
2178; X32-LABEL: test_mm_set_epi16:
2179; X32: # BB#0:
2180; X32-NEXT: movw {{[0-9]+}}(%esp), %ax
2181; X32-NEXT: movd %eax, %xmm1
2182; X32-NEXT: movw {{[0-9]+}}(%esp), %ax
2183; X32-NEXT: movd %eax, %xmm2
2184; X32-NEXT: movw {{[0-9]+}}(%esp), %ax
2185; X32-NEXT: movd %eax, %xmm3
2186; X32-NEXT: movw {{[0-9]+}}(%esp), %ax
2187; X32-NEXT: movd %eax, %xmm4
2188; X32-NEXT: movw {{[0-9]+}}(%esp), %ax
2189; X32-NEXT: movd %eax, %xmm5
2190; X32-NEXT: movw {{[0-9]+}}(%esp), %ax
2191; X32-NEXT: movd %eax, %xmm6
2192; X32-NEXT: movw {{[0-9]+}}(%esp), %ax
2193; X32-NEXT: movd %eax, %xmm7
2194; X32-NEXT: movw {{[0-9]+}}(%esp), %ax
2195; X32-NEXT: movd %eax, %xmm0
2196; X32-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3]
2197; X32-NEXT: punpcklwd {{.*#+}} xmm4 = xmm4[0],xmm3[0],xmm4[1],xmm3[1],xmm4[2],xmm3[2],xmm4[3],xmm3[3]
2198; X32-NEXT: punpcklwd {{.*#+}} xmm4 = xmm4[0],xmm2[0],xmm4[1],xmm2[1],xmm4[2],xmm2[2],xmm4[3],xmm2[3]
2199; X32-NEXT: punpcklwd {{.*#+}} xmm6 = xmm6[0],xmm5[0],xmm6[1],xmm5[1],xmm6[2],xmm5[2],xmm6[3],xmm5[3]
2200; X32-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm7[0],xmm0[1],xmm7[1],xmm0[2],xmm7[2],xmm0[3],xmm7[3]
2201; X32-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm6[0],xmm0[1],xmm6[1],xmm0[2],xmm6[2],xmm0[3],xmm6[3]
2202; X32-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1],xmm0[2],xmm4[2],xmm0[3],xmm4[3]
2203; X32-NEXT: retl
2204;
2205; X64-LABEL: test_mm_set_epi16:
2206; X64: # BB#0:
2207; X64-NEXT: movw {{[0-9]+}}(%rsp), %r10w
2208; X64-NEXT: movw {{[0-9]+}}(%rsp), %ax
2209; X64-NEXT: movd %edi, %xmm0
2210; X64-NEXT: movd %r8d, %xmm1
2211; X64-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
2212; X64-NEXT: movd %edx, %xmm0
2213; X64-NEXT: movd %eax, %xmm2
2214; X64-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3]
2215; X64-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3]
2216; X64-NEXT: movd %esi, %xmm0
2217; X64-NEXT: movd %r9d, %xmm1
2218; X64-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
2219; X64-NEXT: movd %ecx, %xmm3
2220; X64-NEXT: movd %r10d, %xmm0
2221; X64-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3]
2222; X64-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
2223; X64-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
2224; X64-NEXT: retq
2225 %res0 = insertelement <8 x i16> undef, i16 %a7, i32 0
2226 %res1 = insertelement <8 x i16> %res0, i16 %a6, i32 1
2227 %res2 = insertelement <8 x i16> %res1, i16 %a5, i32 2
2228 %res3 = insertelement <8 x i16> %res2, i16 %a4, i32 3
2229 %res4 = insertelement <8 x i16> %res3, i16 %a3, i32 4
2230 %res5 = insertelement <8 x i16> %res4, i16 %a2, i32 5
2231 %res6 = insertelement <8 x i16> %res5, i16 %a1, i32 6
2232 %res7 = insertelement <8 x i16> %res6, i16 %a0, i32 7
2233 %res = bitcast <8 x i16> %res7 to <2 x i64>
2234 ret <2 x i64> %res
2235}
2236
2237define <2 x i64> @test_mm_set_epi32(i32 %a0, i32 %a1, i32 %a2, i32 %a3) nounwind {
2238; X32-LABEL: test_mm_set_epi32:
2239; X32: # BB#0:
2240; X32-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
2241; X32-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero
2242; X32-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
2243; X32-NEXT: movd {{.*#+}} xmm2 = mem[0],zero,zero,zero
2244; X32-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
2245; X32-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
2246; X32-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
2247; X32-NEXT: retl
2248;
2249; X64-LABEL: test_mm_set_epi32:
2250; X64: # BB#0:
2251; X64-NEXT: movd %edi, %xmm0
2252; X64-NEXT: movd %edx, %xmm1
2253; X64-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
2254; X64-NEXT: movd %esi, %xmm2
2255; X64-NEXT: movd %ecx, %xmm0
2256; X64-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
2257; X64-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
2258; X64-NEXT: retq
2259 %res0 = insertelement <4 x i32> undef, i32 %a3, i32 0
2260 %res1 = insertelement <4 x i32> %res0, i32 %a2, i32 1
2261 %res2 = insertelement <4 x i32> %res1, i32 %a1, i32 2
2262 %res3 = insertelement <4 x i32> %res2, i32 %a0, i32 3
2263 %res = bitcast <4 x i32> %res3 to <2 x i64>
2264 ret <2 x i64> %res
2265}
2266
2267; TODO test_mm_set_epi64
2268
2269define <2 x i64> @test_mm_set_epi64x(i64 %a0, i64 %a1) nounwind {
2270; X32-LABEL: test_mm_set_epi64x:
2271; X32: # BB#0:
2272; X32-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero
2273; X32-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
2274; X32-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
2275; X32-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero
2276; X32-NEXT: movd {{.*#+}} xmm2 = mem[0],zero,zero,zero
2277; X32-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
2278; X32-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
2279; X32-NEXT: retl
2280;
2281; X64-LABEL: test_mm_set_epi64x:
2282; X64: # BB#0:
2283; X64-NEXT: movd %rdi, %xmm1
2284; X64-NEXT: movd %rsi, %xmm0
2285; X64-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
2286; X64-NEXT: retq
2287 %res0 = insertelement <2 x i64> undef, i64 %a1, i32 0
2288 %res1 = insertelement <2 x i64> %res0, i64 %a0, i32 1
2289 ret <2 x i64> %res1
2290}
2291
2292define <2 x double> @test_mm_set_pd(double %a0, double %a1) nounwind {
2293; X32-LABEL: test_mm_set_pd:
2294; X32: # BB#0:
2295; X32-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
2296; X32-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero
2297; X32-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
2298; X32-NEXT: retl
2299;
2300; X64-LABEL: test_mm_set_pd:
2301; X64: # BB#0:
2302; X64-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0],xmm0[0]
2303; X64-NEXT: movapd %xmm1, %xmm0
2304; X64-NEXT: retq
2305 %res0 = insertelement <2 x double> undef, double %a1, i32 0
2306 %res1 = insertelement <2 x double> %res0, double %a0, i32 1
2307 ret <2 x double> %res1
2308}
2309
2310define <2 x double> @test_mm_set_sd(double %a0) nounwind {
2311; X32-LABEL: test_mm_set_sd:
2312; X32: # BB#0:
2313; X32-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
2314; X32-NEXT: movq {{.*#+}} xmm0 = xmm0[0],zero
2315; X32-NEXT: retl
2316;
2317; X64-LABEL: test_mm_set_sd:
2318; X64: # BB#0:
2319; X64-NEXT: movq {{.*#+}} xmm0 = xmm0[0],zero
2320; X64-NEXT: retq
2321 %res0 = insertelement <2 x double> undef, double %a0, i32 0
2322 %res1 = insertelement <2 x double> %res0, double 0.0, i32 1
2323 ret <2 x double> %res1
2324}
2325
2326define <2 x i64> @test_mm_set1_epi8(i8 %a0) nounwind {
2327; X32-LABEL: test_mm_set1_epi8:
2328; X32: # BB#0:
2329; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
2330; X32-NEXT: movd %eax, %xmm0
2331; X32-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
2332; X32-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
2333; X32-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
2334; X32-NEXT: retl
2335;
2336; X64-LABEL: test_mm_set1_epi8:
2337; X64: # BB#0:
2338; X64-NEXT: movzbl %dil, %eax
2339; X64-NEXT: movd %eax, %xmm0
2340; X64-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
2341; X64-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
2342; X64-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
2343; X64-NEXT: retq
2344 %res0 = insertelement <16 x i8> undef, i8 %a0, i32 0
2345 %res1 = insertelement <16 x i8> %res0, i8 %a0, i32 1
2346 %res2 = insertelement <16 x i8> %res1, i8 %a0, i32 2
2347 %res3 = insertelement <16 x i8> %res2, i8 %a0, i32 3
2348 %res4 = insertelement <16 x i8> %res3, i8 %a0, i32 4
2349 %res5 = insertelement <16 x i8> %res4, i8 %a0, i32 5
2350 %res6 = insertelement <16 x i8> %res5, i8 %a0, i32 6
2351 %res7 = insertelement <16 x i8> %res6, i8 %a0, i32 7
2352 %res8 = insertelement <16 x i8> %res7, i8 %a0, i32 8
2353 %res9 = insertelement <16 x i8> %res8, i8 %a0, i32 9
2354 %res10 = insertelement <16 x i8> %res9, i8 %a0, i32 10
2355 %res11 = insertelement <16 x i8> %res10, i8 %a0, i32 11
2356 %res12 = insertelement <16 x i8> %res11, i8 %a0, i32 12
2357 %res13 = insertelement <16 x i8> %res12, i8 %a0, i32 13
2358 %res14 = insertelement <16 x i8> %res13, i8 %a0, i32 14
2359 %res15 = insertelement <16 x i8> %res14, i8 %a0, i32 15
2360 %res = bitcast <16 x i8> %res15 to <2 x i64>
2361 ret <2 x i64> %res
2362}
2363
2364define <2 x i64> @test_mm_set1_epi16(i16 %a0) nounwind {
2365; X32-LABEL: test_mm_set1_epi16:
2366; X32: # BB#0:
2367; X32-NEXT: movw {{[0-9]+}}(%esp), %ax
2368; X32-NEXT: movd %eax, %xmm0
2369; X32-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
2370; X32-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
2371; X32-NEXT: retl
2372;
2373; X64-LABEL: test_mm_set1_epi16:
2374; X64: # BB#0:
2375; X64-NEXT: movd %edi, %xmm0
2376; X64-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
2377; X64-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
2378; X64-NEXT: retq
2379 %res0 = insertelement <8 x i16> undef, i16 %a0, i32 0
2380 %res1 = insertelement <8 x i16> %res0, i16 %a0, i32 1
2381 %res2 = insertelement <8 x i16> %res1, i16 %a0, i32 2
2382 %res3 = insertelement <8 x i16> %res2, i16 %a0, i32 3
2383 %res4 = insertelement <8 x i16> %res3, i16 %a0, i32 4
2384 %res5 = insertelement <8 x i16> %res4, i16 %a0, i32 5
2385 %res6 = insertelement <8 x i16> %res5, i16 %a0, i32 6
2386 %res7 = insertelement <8 x i16> %res6, i16 %a0, i32 7
2387 %res = bitcast <8 x i16> %res7 to <2 x i64>
2388 ret <2 x i64> %res
2389}
2390
2391define <2 x i64> @test_mm_set1_epi32(i32 %a0) nounwind {
2392; X32-LABEL: test_mm_set1_epi32:
2393; X32: # BB#0:
2394; X32-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
2395; X32-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
2396; X32-NEXT: retl
2397;
2398; X64-LABEL: test_mm_set1_epi32:
2399; X64: # BB#0:
2400; X64-NEXT: movd %edi, %xmm0
2401; X64-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
2402; X64-NEXT: retq
2403 %res0 = insertelement <4 x i32> undef, i32 %a0, i32 0
2404 %res1 = insertelement <4 x i32> %res0, i32 %a0, i32 1
2405 %res2 = insertelement <4 x i32> %res1, i32 %a0, i32 2
2406 %res3 = insertelement <4 x i32> %res2, i32 %a0, i32 3
2407 %res = bitcast <4 x i32> %res3 to <2 x i64>
2408 ret <2 x i64> %res
2409}
2410
2411; TODO test_mm_set1_epi64
2412
2413define <2 x i64> @test_mm_set1_epi64x(i64 %a0) nounwind {
2414; X32-LABEL: test_mm_set1_epi64x:
2415; X32: # BB#0:
2416; X32-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
2417; X32-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
2418; X32-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero
2419; X32-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,0,1,1]
2420; X32-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
2421; X32-NEXT: retl
2422;
2423; X64-LABEL: test_mm_set1_epi64x:
2424; X64: # BB#0:
2425; X64-NEXT: movd %rdi, %xmm0
2426; X64-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
2427; X64-NEXT: retq
2428 %res0 = insertelement <2 x i64> undef, i64 %a0, i32 0
2429 %res1 = insertelement <2 x i64> %res0, i64 %a0, i32 1
2430 ret <2 x i64> %res1
2431}
2432
2433define <2 x double> @test_mm_set1_pd(double %a0) nounwind {
2434; X32-LABEL: test_mm_set1_pd:
2435; X32: # BB#0:
2436; X32-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
2437; X32-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0,0]
2438; X32-NEXT: retl
2439;
2440; X64-LABEL: test_mm_set1_pd:
2441; X64: # BB#0:
2442; X64-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0,0]
2443; X64-NEXT: retq
2444 %res0 = insertelement <2 x double> undef, double %a0, i32 0
2445 %res1 = insertelement <2 x double> %res0, double %a0, i32 1
2446 ret <2 x double> %res1
2447}
2448
2449define <2 x i64> @test_mm_setr_epi8(i8 %a0, i8 %a1, i8 %a2, i8 %a3, i8 %a4, i8 %a5, i8 %a6, i8 %a7, i8 %a8, i8 %a9, i8 %a10, i8 %a11, i8 %a12, i8 %a13, i8 %a14, i8 %a15) nounwind {
2450; X32-LABEL: test_mm_setr_epi8:
2451; X32: # BB#0:
2452; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
2453; X32-NEXT: movd %eax, %xmm0
2454; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
2455; X32-NEXT: movd %eax, %xmm1
2456; X32-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
2457; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
2458; X32-NEXT: movd %eax, %xmm0
2459; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
2460; X32-NEXT: movd %eax, %xmm2
2461; X32-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
2462; X32-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3],xmm2[4],xmm1[4],xmm2[5],xmm1[5],xmm2[6],xmm1[6],xmm2[7],xmm1[7]
2463; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
2464; X32-NEXT: movd %eax, %xmm0
2465; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
2466; X32-NEXT: movd %eax, %xmm3
2467; X32-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3],xmm3[4],xmm0[4],xmm3[5],xmm0[5],xmm3[6],xmm0[6],xmm3[7],xmm0[7]
2468; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
2469; X32-NEXT: movd %eax, %xmm0
2470; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
2471; X32-NEXT: movd %eax, %xmm1
2472; X32-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
2473; X32-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1],xmm1[2],xmm3[2],xmm1[3],xmm3[3],xmm1[4],xmm3[4],xmm1[5],xmm3[5],xmm1[6],xmm3[6],xmm1[7],xmm3[7]
2474; X32-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
2475; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
2476; X32-NEXT: movd %eax, %xmm0
2477; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
2478; X32-NEXT: movd %eax, %xmm2
2479; X32-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
2480; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
2481; X32-NEXT: movd %eax, %xmm0
2482; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
2483; X32-NEXT: movd %eax, %xmm3
2484; X32-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3],xmm3[4],xmm0[4],xmm3[5],xmm0[5],xmm3[6],xmm0[6],xmm3[7],xmm0[7]
2485; X32-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3],xmm3[4],xmm2[4],xmm3[5],xmm2[5],xmm3[6],xmm2[6],xmm3[7],xmm2[7]
2486; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
2487; X32-NEXT: movd %eax, %xmm0
2488; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
2489; X32-NEXT: movd %eax, %xmm2
2490; X32-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
2491; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
2492; X32-NEXT: movd %eax, %xmm4
2493; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
2494; X32-NEXT: movd %eax, %xmm0
2495; X32-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1],xmm0[2],xmm4[2],xmm0[3],xmm4[3],xmm0[4],xmm4[4],xmm0[5],xmm4[5],xmm0[6],xmm4[6],xmm0[7],xmm4[7]
2496; X32-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7]
2497; X32-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3],xmm0[4],xmm3[4],xmm0[5],xmm3[5],xmm0[6],xmm3[6],xmm0[7],xmm3[7]
2498; X32-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
2499; X32-NEXT: retl
2500;
2501; X64-LABEL: test_mm_setr_epi8:
2502; X64: # BB#0:
2503; X64-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
2504; X64-NEXT: movd %eax, %xmm0
2505; X64-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
2506; X64-NEXT: movd %eax, %xmm1
2507; X64-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
2508; X64-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
2509; X64-NEXT: movd %eax, %xmm0
2510; X64-NEXT: movzbl %cl, %eax
2511; X64-NEXT: movd %eax, %xmm2
2512; X64-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
2513; X64-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3],xmm2[4],xmm1[4],xmm2[5],xmm1[5],xmm2[6],xmm1[6],xmm2[7],xmm1[7]
2514; X64-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
2515; X64-NEXT: movd %eax, %xmm0
2516; X64-NEXT: movzbl %r9b, %eax
2517; X64-NEXT: movd %eax, %xmm3
2518; X64-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3],xmm3[4],xmm0[4],xmm3[5],xmm0[5],xmm3[6],xmm0[6],xmm3[7],xmm0[7]
2519; X64-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
2520; X64-NEXT: movd %eax, %xmm0
2521; X64-NEXT: movzbl %sil, %eax
2522; X64-NEXT: movd %eax, %xmm1
2523; X64-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
2524; X64-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1],xmm1[2],xmm3[2],xmm1[3],xmm3[3],xmm1[4],xmm3[4],xmm1[5],xmm3[5],xmm1[6],xmm3[6],xmm1[7],xmm3[7]
2525; X64-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
2526; X64-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
2527; X64-NEXT: movd %eax, %xmm0
2528; X64-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
2529; X64-NEXT: movd %eax, %xmm2
2530; X64-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
2531; X64-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
2532; X64-NEXT: movd %eax, %xmm0
2533; X64-NEXT: movzbl %dl, %eax
2534; X64-NEXT: movd %eax, %xmm3
2535; X64-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3],xmm3[4],xmm0[4],xmm3[5],xmm0[5],xmm3[6],xmm0[6],xmm3[7],xmm0[7]
2536; X64-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3],xmm3[4],xmm2[4],xmm3[5],xmm2[5],xmm3[6],xmm2[6],xmm3[7],xmm2[7]
2537; X64-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
2538; X64-NEXT: movd %eax, %xmm0
2539; X64-NEXT: movzbl %r8b, %eax
2540; X64-NEXT: movd %eax, %xmm2
2541; X64-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
2542; X64-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
2543; X64-NEXT: movd %eax, %xmm4
2544; X64-NEXT: movzbl %dil, %eax
2545; X64-NEXT: movd %eax, %xmm0
2546; X64-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1],xmm0[2],xmm4[2],xmm0[3],xmm4[3],xmm0[4],xmm4[4],xmm0[5],xmm4[5],xmm0[6],xmm4[6],xmm0[7],xmm4[7]
2547; X64-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7]
2548; X64-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3],xmm0[4],xmm3[4],xmm0[5],xmm3[5],xmm0[6],xmm3[6],xmm0[7],xmm3[7]
2549; X64-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
2550; X64-NEXT: retq
2551 %res0 = insertelement <16 x i8> undef, i8 %a0 , i32 0
2552 %res1 = insertelement <16 x i8> %res0, i8 %a1 , i32 1
2553 %res2 = insertelement <16 x i8> %res1, i8 %a2 , i32 2
2554 %res3 = insertelement <16 x i8> %res2, i8 %a3 , i32 3
2555 %res4 = insertelement <16 x i8> %res3, i8 %a4 , i32 4
2556 %res5 = insertelement <16 x i8> %res4, i8 %a5 , i32 5
2557 %res6 = insertelement <16 x i8> %res5, i8 %a6 , i32 6
2558 %res7 = insertelement <16 x i8> %res6, i8 %a7 , i32 7
2559 %res8 = insertelement <16 x i8> %res7, i8 %a8 , i32 8
2560 %res9 = insertelement <16 x i8> %res8, i8 %a9 , i32 9
2561 %res10 = insertelement <16 x i8> %res9, i8 %a10, i32 10
2562 %res11 = insertelement <16 x i8> %res10, i8 %a11, i32 11
2563 %res12 = insertelement <16 x i8> %res11, i8 %a12, i32 12
2564 %res13 = insertelement <16 x i8> %res12, i8 %a13, i32 13
2565 %res14 = insertelement <16 x i8> %res13, i8 %a14, i32 14
2566 %res15 = insertelement <16 x i8> %res14, i8 %a15, i32 15
2567 %res = bitcast <16 x i8> %res15 to <2 x i64>
2568 ret <2 x i64> %res
2569}
2570
2571define <2 x i64> @test_mm_setr_epi16(i16 %a0, i16 %a1, i16 %a2, i16 %a3, i16 %a4, i16 %a5, i16 %a6, i16 %a7) nounwind {
2572; X32-LABEL: test_mm_setr_epi16:
2573; X32: # BB#0:
2574; X32-NEXT: movw {{[0-9]+}}(%esp), %ax
2575; X32-NEXT: movd %eax, %xmm1
2576; X32-NEXT: movw {{[0-9]+}}(%esp), %ax
2577; X32-NEXT: movd %eax, %xmm2
2578; X32-NEXT: movw {{[0-9]+}}(%esp), %ax
2579; X32-NEXT: movd %eax, %xmm3
2580; X32-NEXT: movw {{[0-9]+}}(%esp), %ax
2581; X32-NEXT: movd %eax, %xmm4
2582; X32-NEXT: movw {{[0-9]+}}(%esp), %ax
2583; X32-NEXT: movd %eax, %xmm5
2584; X32-NEXT: movw {{[0-9]+}}(%esp), %ax
2585; X32-NEXT: movd %eax, %xmm6
2586; X32-NEXT: movw {{[0-9]+}}(%esp), %ax
2587; X32-NEXT: movd %eax, %xmm7
2588; X32-NEXT: movw {{[0-9]+}}(%esp), %ax
2589; X32-NEXT: movd %eax, %xmm0
2590; X32-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3]
2591; X32-NEXT: punpcklwd {{.*#+}} xmm4 = xmm4[0],xmm3[0],xmm4[1],xmm3[1],xmm4[2],xmm3[2],xmm4[3],xmm3[3]
2592; X32-NEXT: punpcklwd {{.*#+}} xmm4 = xmm4[0],xmm2[0],xmm4[1],xmm2[1],xmm4[2],xmm2[2],xmm4[3],xmm2[3]
2593; X32-NEXT: punpcklwd {{.*#+}} xmm6 = xmm6[0],xmm5[0],xmm6[1],xmm5[1],xmm6[2],xmm5[2],xmm6[3],xmm5[3]
2594; X32-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm7[0],xmm0[1],xmm7[1],xmm0[2],xmm7[2],xmm0[3],xmm7[3]
2595; X32-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm6[0],xmm0[1],xmm6[1],xmm0[2],xmm6[2],xmm0[3],xmm6[3]
2596; X32-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1],xmm0[2],xmm4[2],xmm0[3],xmm4[3]
2597; X32-NEXT: retl
2598;
2599; X64-LABEL: test_mm_setr_epi16:
2600; X64: # BB#0:
2601; X64-NEXT: movw {{[0-9]+}}(%rsp), %ax
2602; X64-NEXT: movw {{[0-9]+}}(%rsp), %r10w
2603; X64-NEXT: movd %eax, %xmm0
2604; X64-NEXT: movd %ecx, %xmm1
2605; X64-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
2606; X64-NEXT: movd %r9d, %xmm0
2607; X64-NEXT: movd %esi, %xmm2
2608; X64-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3]
2609; X64-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3]
2610; X64-NEXT: movd %r10d, %xmm0
2611; X64-NEXT: movd %edx, %xmm1
2612; X64-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
2613; X64-NEXT: movd %r8d, %xmm3
2614; X64-NEXT: movd %edi, %xmm0
2615; X64-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3]
2616; X64-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
2617; X64-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
2618; X64-NEXT: retq
2619 %res0 = insertelement <8 x i16> undef, i16 %a0, i32 0
2620 %res1 = insertelement <8 x i16> %res0, i16 %a1, i32 1
2621 %res2 = insertelement <8 x i16> %res1, i16 %a2, i32 2
2622 %res3 = insertelement <8 x i16> %res2, i16 %a3, i32 3
2623 %res4 = insertelement <8 x i16> %res3, i16 %a4, i32 4
2624 %res5 = insertelement <8 x i16> %res4, i16 %a5, i32 5
2625 %res6 = insertelement <8 x i16> %res5, i16 %a6, i32 6
2626 %res7 = insertelement <8 x i16> %res6, i16 %a7, i32 7
2627 %res = bitcast <8 x i16> %res7 to <2 x i64>
2628 ret <2 x i64> %res
2629}
2630
2631define <2 x i64> @test_mm_setr_epi32(i32 %a0, i32 %a1, i32 %a2, i32 %a3) nounwind {
2632; X32-LABEL: test_mm_setr_epi32:
2633; X32: # BB#0:
2634; X32-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
2635; X32-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero
2636; X32-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
2637; X32-NEXT: movd {{.*#+}} xmm2 = mem[0],zero,zero,zero
2638; X32-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
2639; X32-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
2640; X32-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
2641; X32-NEXT: retl
2642;
2643; X64-LABEL: test_mm_setr_epi32:
2644; X64: # BB#0:
2645; X64-NEXT: movd %ecx, %xmm0
2646; X64-NEXT: movd %esi, %xmm1
2647; X64-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
2648; X64-NEXT: movd %edx, %xmm2
2649; X64-NEXT: movd %edi, %xmm0
2650; X64-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
2651; X64-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
2652; X64-NEXT: retq
2653 %res0 = insertelement <4 x i32> undef, i32 %a0, i32 0
2654 %res1 = insertelement <4 x i32> %res0, i32 %a1, i32 1
2655 %res2 = insertelement <4 x i32> %res1, i32 %a2, i32 2
2656 %res3 = insertelement <4 x i32> %res2, i32 %a3, i32 3
2657 %res = bitcast <4 x i32> %res3 to <2 x i64>
2658 ret <2 x i64> %res
2659}
2660
2661; TODO test_mm_setr_epi64
2662
2663define <2 x i64> @test_mm_setr_epi64x(i64 %a0, i64 %a1) nounwind {
2664; X32-LABEL: test_mm_setr_epi64x:
2665; X32: # BB#0:
2666; X32-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero
2667; X32-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
2668; X32-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
2669; X32-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero
2670; X32-NEXT: movd {{.*#+}} xmm2 = mem[0],zero,zero,zero
2671; X32-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
2672; X32-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
2673; X32-NEXT: retl
2674;
2675; X64-LABEL: test_mm_setr_epi64x:
2676; X64: # BB#0:
2677; X64-NEXT: movd %rsi, %xmm1
2678; X64-NEXT: movd %rdi, %xmm0
2679; X64-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
2680; X64-NEXT: retq
2681 %res0 = insertelement <2 x i64> undef, i64 %a0, i32 0
2682 %res1 = insertelement <2 x i64> %res0, i64 %a1, i32 1
2683 ret <2 x i64> %res1
2684}
2685
2686define <2 x double> @test_mm_setr_pd(double %a0, double %a1) nounwind {
2687; X32-LABEL: test_mm_setr_pd:
2688; X32: # BB#0:
2689; X32-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero
2690; X32-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
2691; X32-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
2692; X32-NEXT: retl
2693;
2694; X64-LABEL: test_mm_setr_pd:
2695; X64: # BB#0:
2696; X64-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
2697; X64-NEXT: retq
2698 %res0 = insertelement <2 x double> undef, double %a0, i32 0
2699 %res1 = insertelement <2 x double> %res0, double %a1, i32 1
2700 ret <2 x double> %res1
2701}
2702
Simon Pilgrim5a0d7282016-05-18 18:00:43 +00002703define <2 x double> @test_mm_setzero_pd() {
2704; X32-LABEL: test_mm_setzero_pd:
2705; X32: # BB#0:
2706; X32-NEXT: xorps %xmm0, %xmm0
2707; X32-NEXT: retl
2708;
2709; X64-LABEL: test_mm_setzero_pd:
2710; X64: # BB#0:
2711; X64-NEXT: xorps %xmm0, %xmm0
2712; X64-NEXT: retq
2713 ret <2 x double> zeroinitializer
2714}
2715
2716define <2 x i64> @test_mm_setzero_si128() {
2717; X32-LABEL: test_mm_setzero_si128:
2718; X32: # BB#0:
2719; X32-NEXT: xorps %xmm0, %xmm0
2720; X32-NEXT: retl
2721;
2722; X64-LABEL: test_mm_setzero_si128:
2723; X64: # BB#0:
2724; X64-NEXT: xorps %xmm0, %xmm0
2725; X64-NEXT: retq
2726 ret <2 x i64> zeroinitializer
2727}
2728
2729define <2 x i64> @test_mm_shuffle_epi32(<2 x i64> %a0) {
2730; X32-LABEL: test_mm_shuffle_epi32:
2731; X32: # BB#0:
2732; X32-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
2733; X32-NEXT: retl
2734;
2735; X64-LABEL: test_mm_shuffle_epi32:
2736; X64: # BB#0:
2737; X64-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
2738; X64-NEXT: retq
2739 %arg0 = bitcast <2 x i64> %a0 to <4 x i32>
2740 %res = shufflevector <4 x i32> %arg0, <4 x i32> undef, <4 x i32> zeroinitializer
2741 %bc = bitcast <4 x i32> %res to <2 x i64>
2742 ret <2 x i64> %bc
2743}
2744
2745define <2 x double> @test_mm_shuffle_pd(<2 x double> %a0, <2 x double> %a1) {
2746; X32-LABEL: test_mm_shuffle_pd:
2747; X32: # BB#0:
2748; X32-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0]
2749; X32-NEXT: retl
2750;
2751; X64-LABEL: test_mm_shuffle_pd:
2752; X64: # BB#0:
2753; X64-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0]
2754; X64-NEXT: retq
2755 %res = shufflevector <2 x double> %a0, <2 x double> %a1, <2 x i32> <i32 1, i32 2>
2756 ret <2 x double> %res
2757}
2758
2759define <2 x i64> @test_mm_shufflehi_epi16(<2 x i64> %a0) {
2760; X32-LABEL: test_mm_shufflehi_epi16:
2761; X32: # BB#0:
2762; X32-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,4,4]
2763; X32-NEXT: retl
2764;
2765; X64-LABEL: test_mm_shufflehi_epi16:
2766; X64: # BB#0:
2767; X64-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,4,4]
2768; X64-NEXT: retq
2769 %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
2770 %res = shufflevector <8 x i16> %arg0, <8 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 4, i32 4, i32 4>
2771 %bc = bitcast <8 x i16> %res to <2 x i64>
2772 ret <2 x i64> %bc
2773}
2774
2775define <2 x i64> @test_mm_shufflelo_epi16(<2 x i64> %a0) {
2776; X32-LABEL: test_mm_shufflelo_epi16:
2777; X32: # BB#0:
2778; X32-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
2779; X32-NEXT: retl
2780;
2781; X64-LABEL: test_mm_shufflelo_epi16:
2782; X64: # BB#0:
2783; X64-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
2784; X64-NEXT: retq
2785 %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
2786 %res = shufflevector <8 x i16> %arg0, <8 x i16> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 4, i32 5, i32 6, i32 7>
2787 %bc = bitcast <8 x i16> %res to <2 x i64>
2788 ret <2 x i64> %bc
2789}
2790
2791define <2 x i64> @test_mm_sll_epi16(<2 x i64> %a0, <2 x i64> %a1) {
2792; X32-LABEL: test_mm_sll_epi16:
2793; X32: # BB#0:
2794; X32-NEXT: psllw %xmm1, %xmm0
2795; X32-NEXT: retl
2796;
2797; X64-LABEL: test_mm_sll_epi16:
2798; X64: # BB#0:
2799; X64-NEXT: psllw %xmm1, %xmm0
2800; X64-NEXT: retq
2801 %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
2802 %arg1 = bitcast <2 x i64> %a1 to <8 x i16>
2803 %res = call <8 x i16> @llvm.x86.sse2.psll.w(<8 x i16> %arg0, <8 x i16> %arg1)
2804 %bc = bitcast <8 x i16> %res to <2 x i64>
2805 ret <2 x i64> %bc
2806}
2807declare <8 x i16> @llvm.x86.sse2.psll.w(<8 x i16>, <8 x i16>) nounwind readnone
2808
2809define <2 x i64> @test_mm_sll_epi32(<2 x i64> %a0, <2 x i64> %a1) {
2810; X32-LABEL: test_mm_sll_epi32:
2811; X32: # BB#0:
2812; X32-NEXT: pslld %xmm1, %xmm0
2813; X32-NEXT: retl
2814;
2815; X64-LABEL: test_mm_sll_epi32:
2816; X64: # BB#0:
2817; X64-NEXT: pslld %xmm1, %xmm0
2818; X64-NEXT: retq
2819 %arg0 = bitcast <2 x i64> %a0 to <4 x i32>
2820 %arg1 = bitcast <2 x i64> %a1 to <4 x i32>
2821 %res = call <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32> %arg0, <4 x i32> %arg1)
2822 %bc = bitcast <4 x i32> %res to <2 x i64>
2823 ret <2 x i64> %bc
2824}
2825declare <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32>, <4 x i32>) nounwind readnone
2826
2827define <2 x i64> @test_mm_sll_epi64(<2 x i64> %a0, <2 x i64> %a1) {
2828; X32-LABEL: test_mm_sll_epi64:
2829; X32: # BB#0:
2830; X32-NEXT: psllq %xmm1, %xmm0
2831; X32-NEXT: retl
2832;
2833; X64-LABEL: test_mm_sll_epi64:
2834; X64: # BB#0:
2835; X64-NEXT: psllq %xmm1, %xmm0
2836; X64-NEXT: retq
2837 %res = call <2 x i64> @llvm.x86.sse2.psll.q(<2 x i64> %a0, <2 x i64> %a1)
2838 ret <2 x i64> %res
2839}
2840declare <2 x i64> @llvm.x86.sse2.psll.q(<2 x i64>, <2 x i64>) nounwind readnone
2841
2842define <2 x i64> @test_mm_slli_epi16(<2 x i64> %a0) {
2843; X32-LABEL: test_mm_slli_epi16:
2844; X32: # BB#0:
2845; X32-NEXT: psllw $1, %xmm0
2846; X32-NEXT: retl
2847;
2848; X64-LABEL: test_mm_slli_epi16:
2849; X64: # BB#0:
2850; X64-NEXT: psllw $1, %xmm0
2851; X64-NEXT: retq
2852 %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
2853 %res = call <8 x i16> @llvm.x86.sse2.pslli.w(<8 x i16> %arg0, i32 1)
2854 %bc = bitcast <8 x i16> %res to <2 x i64>
2855 ret <2 x i64> %bc
2856}
2857declare <8 x i16> @llvm.x86.sse2.pslli.w(<8 x i16>, i32) nounwind readnone
2858
2859define <2 x i64> @test_mm_slli_epi32(<2 x i64> %a0) {
2860; X32-LABEL: test_mm_slli_epi32:
2861; X32: # BB#0:
2862; X32-NEXT: pslld $1, %xmm0
2863; X32-NEXT: retl
2864;
2865; X64-LABEL: test_mm_slli_epi32:
2866; X64: # BB#0:
2867; X64-NEXT: pslld $1, %xmm0
2868; X64-NEXT: retq
2869 %arg0 = bitcast <2 x i64> %a0 to <4 x i32>
2870 %res = call <4 x i32> @llvm.x86.sse2.pslli.d(<4 x i32> %arg0, i32 1)
2871 %bc = bitcast <4 x i32> %res to <2 x i64>
2872 ret <2 x i64> %bc
2873}
2874declare <4 x i32> @llvm.x86.sse2.pslli.d(<4 x i32>, i32) nounwind readnone
2875
2876define <2 x i64> @test_mm_slli_epi64(<2 x i64> %a0) {
2877; X32-LABEL: test_mm_slli_epi64:
2878; X32: # BB#0:
2879; X32-NEXT: psllq $1, %xmm0
2880; X32-NEXT: retl
2881;
2882; X64-LABEL: test_mm_slli_epi64:
2883; X64: # BB#0:
2884; X64-NEXT: psllq $1, %xmm0
2885; X64-NEXT: retq
2886 %res = call <2 x i64> @llvm.x86.sse2.pslli.q(<2 x i64> %a0, i32 1)
2887 ret <2 x i64> %res
2888}
2889declare <2 x i64> @llvm.x86.sse2.pslli.q(<2 x i64>, i32) nounwind readnone
2890
2891define <2 x i64> @test_mm_slli_si128(<2 x i64> %a0) nounwind {
2892; X32-LABEL: test_mm_slli_si128:
2893; X32: # BB#0:
2894; X32-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10]
2895; X32-NEXT: retl
2896;
2897; X64-LABEL: test_mm_slli_si128:
2898; X64: # BB#0:
2899; X64-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10]
2900; X64-NEXT: retq
2901 %arg0 = bitcast <2 x i64> %a0 to <16 x i8>
2902 %res = shufflevector <16 x i8> zeroinitializer, <16 x i8> %arg0, <16 x i32> <i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26>
2903 %bc = bitcast <16 x i8> %res to <2 x i64>
2904 ret <2 x i64> %bc
2905}
2906
2907define <2 x double> @test_mm_sqrt_pd(<2 x double> %a0) nounwind {
2908; X32-LABEL: test_mm_sqrt_pd:
2909; X32: # BB#0:
2910; X32-NEXT: sqrtpd %xmm0, %xmm0
2911; X32-NEXT: retl
2912;
2913; X64-LABEL: test_mm_sqrt_pd:
2914; X64: # BB#0:
2915; X64-NEXT: sqrtpd %xmm0, %xmm0
2916; X64-NEXT: retq
2917 %res = call <2 x double> @llvm.x86.sse2.sqrt.pd(<2 x double> %a0)
2918 ret <2 x double> %res
2919}
2920declare <2 x double> @llvm.x86.sse2.sqrt.pd(<2 x double>) nounwind readnone
2921
2922define <2 x double> @test_mm_sqrt_sd(<2 x double> %a0, <2 x double> %a1) nounwind {
2923; X32-LABEL: test_mm_sqrt_sd:
2924; X32: # BB#0:
2925; X32-NEXT: sqrtsd %xmm0, %xmm1
2926; X32-NEXT: movaps %xmm1, %xmm0
2927; X32-NEXT: retl
2928;
2929; X64-LABEL: test_mm_sqrt_sd:
2930; X64: # BB#0:
2931; X64-NEXT: sqrtsd %xmm0, %xmm1
2932; X64-NEXT: movaps %xmm1, %xmm0
2933; X64-NEXT: retq
2934 %call = call <2 x double> @llvm.x86.sse2.sqrt.sd(<2 x double> %a0)
2935 %ext0 = extractelement <2 x double> %call, i32 0
2936 %ins0 = insertelement <2 x double> undef, double %ext0, i32 0
2937 %ext1 = extractelement <2 x double> %a1, i32 1
2938 %ins1 = insertelement <2 x double> %ins0, double %ext1, i32 1
2939 ret <2 x double> %ins1
2940}
2941declare <2 x double> @llvm.x86.sse2.sqrt.sd(<2 x double>) nounwind readnone
2942
2943define <2 x i64> @test_mm_sra_epi16(<2 x i64> %a0, <2 x i64> %a1) {
2944; X32-LABEL: test_mm_sra_epi16:
2945; X32: # BB#0:
2946; X32-NEXT: psraw %xmm1, %xmm0
2947; X32-NEXT: retl
2948;
2949; X64-LABEL: test_mm_sra_epi16:
2950; X64: # BB#0:
2951; X64-NEXT: psraw %xmm1, %xmm0
2952; X64-NEXT: retq
2953 %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
2954 %arg1 = bitcast <2 x i64> %a1 to <8 x i16>
2955 %res = call <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16> %arg0, <8 x i16> %arg1)
2956 %bc = bitcast <8 x i16> %res to <2 x i64>
2957 ret <2 x i64> %bc
2958}
2959declare <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16>, <8 x i16>) nounwind readnone
2960
2961define <2 x i64> @test_mm_sra_epi32(<2 x i64> %a0, <2 x i64> %a1) {
2962; X32-LABEL: test_mm_sra_epi32:
2963; X32: # BB#0:
2964; X32-NEXT: psrad %xmm1, %xmm0
2965; X32-NEXT: retl
2966;
2967; X64-LABEL: test_mm_sra_epi32:
2968; X64: # BB#0:
2969; X64-NEXT: psrad %xmm1, %xmm0
2970; X64-NEXT: retq
2971 %arg0 = bitcast <2 x i64> %a0 to <4 x i32>
2972 %arg1 = bitcast <2 x i64> %a1 to <4 x i32>
2973 %res = call <4 x i32> @llvm.x86.sse2.psra.d(<4 x i32> %arg0, <4 x i32> %arg1)
2974 %bc = bitcast <4 x i32> %res to <2 x i64>
2975 ret <2 x i64> %bc
2976}
2977declare <4 x i32> @llvm.x86.sse2.psra.d(<4 x i32>, <4 x i32>) nounwind readnone
2978
2979define <2 x i64> @test_mm_srai_epi16(<2 x i64> %a0) {
2980; X32-LABEL: test_mm_srai_epi16:
2981; X32: # BB#0:
2982; X32-NEXT: psraw $1, %xmm0
2983; X32-NEXT: retl
2984;
2985; X64-LABEL: test_mm_srai_epi16:
2986; X64: # BB#0:
2987; X64-NEXT: psraw $1, %xmm0
2988; X64-NEXT: retq
2989 %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
2990 %res = call <8 x i16> @llvm.x86.sse2.psrai.w(<8 x i16> %arg0, i32 1)
2991 %bc = bitcast <8 x i16> %res to <2 x i64>
2992 ret <2 x i64> %bc
2993}
2994declare <8 x i16> @llvm.x86.sse2.psrai.w(<8 x i16>, i32) nounwind readnone
2995
2996define <2 x i64> @test_mm_srai_epi32(<2 x i64> %a0) {
2997; X32-LABEL: test_mm_srai_epi32:
2998; X32: # BB#0:
2999; X32-NEXT: psrad $1, %xmm0
3000; X32-NEXT: retl
3001;
3002; X64-LABEL: test_mm_srai_epi32:
3003; X64: # BB#0:
3004; X64-NEXT: psrad $1, %xmm0
3005; X64-NEXT: retq
3006 %arg0 = bitcast <2 x i64> %a0 to <4 x i32>
3007 %res = call <4 x i32> @llvm.x86.sse2.psrai.d(<4 x i32> %arg0, i32 1)
3008 %bc = bitcast <4 x i32> %res to <2 x i64>
3009 ret <2 x i64> %bc
3010}
3011declare <4 x i32> @llvm.x86.sse2.psrai.d(<4 x i32>, i32) nounwind readnone
3012
3013define <2 x i64> @test_mm_srl_epi16(<2 x i64> %a0, <2 x i64> %a1) {
3014; X32-LABEL: test_mm_srl_epi16:
3015; X32: # BB#0:
3016; X32-NEXT: psrlw %xmm1, %xmm0
3017; X32-NEXT: retl
3018;
3019; X64-LABEL: test_mm_srl_epi16:
3020; X64: # BB#0:
3021; X64-NEXT: psrlw %xmm1, %xmm0
3022; X64-NEXT: retq
3023 %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
3024 %arg1 = bitcast <2 x i64> %a1 to <8 x i16>
3025 %res = call <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16> %arg0, <8 x i16> %arg1)
3026 %bc = bitcast <8 x i16> %res to <2 x i64>
3027 ret <2 x i64> %bc
3028}
3029declare <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16>, <8 x i16>) nounwind readnone
3030
3031define <2 x i64> @test_mm_srl_epi32(<2 x i64> %a0, <2 x i64> %a1) {
3032; X32-LABEL: test_mm_srl_epi32:
3033; X32: # BB#0:
3034; X32-NEXT: psrld %xmm1, %xmm0
3035; X32-NEXT: retl
3036;
3037; X64-LABEL: test_mm_srl_epi32:
3038; X64: # BB#0:
3039; X64-NEXT: psrld %xmm1, %xmm0
3040; X64-NEXT: retq
3041 %arg0 = bitcast <2 x i64> %a0 to <4 x i32>
3042 %arg1 = bitcast <2 x i64> %a1 to <4 x i32>
3043 %res = call <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32> %arg0, <4 x i32> %arg1)
3044 %bc = bitcast <4 x i32> %res to <2 x i64>
3045 ret <2 x i64> %bc
3046}
3047declare <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32>, <4 x i32>) nounwind readnone
3048
3049define <2 x i64> @test_mm_srl_epi64(<2 x i64> %a0, <2 x i64> %a1) {
3050; X32-LABEL: test_mm_srl_epi64:
3051; X32: # BB#0:
3052; X32-NEXT: psrlq %xmm1, %xmm0
3053; X32-NEXT: retl
3054;
3055; X64-LABEL: test_mm_srl_epi64:
3056; X64: # BB#0:
3057; X64-NEXT: psrlq %xmm1, %xmm0
3058; X64-NEXT: retq
3059 %res = call <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64> %a0, <2 x i64> %a1)
3060 ret <2 x i64> %res
3061}
3062declare <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64>, <2 x i64>) nounwind readnone
3063
3064define <2 x i64> @test_mm_srli_epi16(<2 x i64> %a0) {
3065; X32-LABEL: test_mm_srli_epi16:
3066; X32: # BB#0:
3067; X32-NEXT: psrlw $1, %xmm0
3068; X32-NEXT: retl
3069;
3070; X64-LABEL: test_mm_srli_epi16:
3071; X64: # BB#0:
3072; X64-NEXT: psrlw $1, %xmm0
3073; X64-NEXT: retq
3074 %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
3075 %res = call <8 x i16> @llvm.x86.sse2.psrli.w(<8 x i16> %arg0, i32 1)
3076 %bc = bitcast <8 x i16> %res to <2 x i64>
3077 ret <2 x i64> %bc
3078}
3079declare <8 x i16> @llvm.x86.sse2.psrli.w(<8 x i16>, i32) nounwind readnone
3080
3081define <2 x i64> @test_mm_srli_epi32(<2 x i64> %a0) {
3082; X32-LABEL: test_mm_srli_epi32:
3083; X32: # BB#0:
3084; X32-NEXT: psrld $1, %xmm0
3085; X32-NEXT: retl
3086;
3087; X64-LABEL: test_mm_srli_epi32:
3088; X64: # BB#0:
3089; X64-NEXT: psrld $1, %xmm0
3090; X64-NEXT: retq
3091 %arg0 = bitcast <2 x i64> %a0 to <4 x i32>
3092 %res = call <4 x i32> @llvm.x86.sse2.psrli.d(<4 x i32> %arg0, i32 1)
3093 %bc = bitcast <4 x i32> %res to <2 x i64>
3094 ret <2 x i64> %bc
3095}
3096declare <4 x i32> @llvm.x86.sse2.psrli.d(<4 x i32>, i32) nounwind readnone
3097
3098define <2 x i64> @test_mm_srli_epi64(<2 x i64> %a0) {
3099; X32-LABEL: test_mm_srli_epi64:
3100; X32: # BB#0:
3101; X32-NEXT: psrlq $1, %xmm0
3102; X32-NEXT: retl
3103;
3104; X64-LABEL: test_mm_srli_epi64:
3105; X64: # BB#0:
3106; X64-NEXT: psrlq $1, %xmm0
3107; X64-NEXT: retq
3108 %res = call <2 x i64> @llvm.x86.sse2.psrli.q(<2 x i64> %a0, i32 1)
3109 ret <2 x i64> %res
3110}
3111declare <2 x i64> @llvm.x86.sse2.psrli.q(<2 x i64>, i32) nounwind readnone
3112
3113define <2 x i64> @test_mm_srli_si128(<2 x i64> %a0) nounwind {
3114; X32-LABEL: test_mm_srli_si128:
3115; X32: # BB#0:
3116; X32-NEXT: psrldq {{.*#+}} xmm0 = xmm0[5,6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero
3117; X32-NEXT: retl
3118;
3119; X64-LABEL: test_mm_srli_si128:
3120; X64: # BB#0:
3121; X64-NEXT: psrldq {{.*#+}} xmm0 = xmm0[5,6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero
3122; X64-NEXT: retq
3123 %arg0 = bitcast <2 x i64> %a0 to <16 x i8>
3124 %res = shufflevector <16 x i8> %arg0, <16 x i8> zeroinitializer, <16 x i32> <i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20>
3125 %bc = bitcast <16 x i8> %res to <2 x i64>
3126 ret <2 x i64> %bc
3127}
3128
3129define void @test_mm_store_pd(double *%a0, <2 x double> %a1) {
3130; X32-LABEL: test_mm_store_pd:
3131; X32: # BB#0:
3132; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
3133; X32-NEXT: movaps %xmm0, (%eax)
3134; X32-NEXT: retl
3135;
3136; X64-LABEL: test_mm_store_pd:
3137; X64: # BB#0:
3138; X64-NEXT: movaps %xmm0, (%rdi)
3139; X64-NEXT: retq
3140 %arg0 = bitcast double* %a0 to <2 x double>*
3141 store <2 x double> %a1, <2 x double>* %arg0, align 16
3142 ret void
3143}
3144
Simon Pilgrim4ed0e072016-05-30 18:18:44 +00003145define void @test_mm_store_pd1(double *%a0, <2 x double> %a1) {
3146; X32-LABEL: test_mm_store_pd1:
3147; X32: # BB#0:
3148; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
3149; X32-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0,0]
3150; X32-NEXT: movaps %xmm0, (%eax)
3151; X32-NEXT: retl
3152;
3153; X64-LABEL: test_mm_store_pd1:
3154; X64: # BB#0:
3155; X64-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0,0]
3156; X64-NEXT: movaps %xmm0, (%rdi)
3157; X64-NEXT: retq
3158 %arg0 = bitcast double * %a0 to <2 x double>*
3159 %shuf = shufflevector <2 x double> %a1, <2 x double> undef, <2 x i32> zeroinitializer
3160 store <2 x double> %shuf, <2 x double>* %arg0, align 16
3161 ret void
3162}
3163
Simon Pilgrim5a0d7282016-05-18 18:00:43 +00003164define void @test_mm_store_sd(double *%a0, <2 x double> %a1) {
3165; X32-LABEL: test_mm_store_sd:
3166; X32: # BB#0:
3167; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
3168; X32-NEXT: movsd %xmm0, (%eax)
3169; X32-NEXT: retl
3170;
3171; X64-LABEL: test_mm_store_sd:
3172; X64: # BB#0:
3173; X64-NEXT: movsd %xmm0, (%rdi)
3174; X64-NEXT: retq
3175 %ext = extractelement <2 x double> %a1, i32 0
3176 store double %ext, double* %a0, align 1
3177 ret void
3178}
3179
3180define void @test_mm_store_si128(<2 x i64> *%a0, <2 x i64> %a1) {
3181; X32-LABEL: test_mm_store_si128:
3182; X32: # BB#0:
3183; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
3184; X32-NEXT: movaps %xmm0, (%eax)
3185; X32-NEXT: retl
3186;
3187; X64-LABEL: test_mm_store_si128:
3188; X64: # BB#0:
3189; X64-NEXT: movaps %xmm0, (%rdi)
3190; X64-NEXT: retq
3191 store <2 x i64> %a1, <2 x i64>* %a0, align 16
3192 ret void
3193}
3194
Simon Pilgrim4d1e2582016-05-25 09:42:29 +00003195define void @test_mm_store1_pd(double *%a0, <2 x double> %a1) {
3196; X32-LABEL: test_mm_store1_pd:
Simon Pilgrim5a0d7282016-05-18 18:00:43 +00003197; X32: # BB#0:
3198; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
Simon Pilgrim4ed0e072016-05-30 18:18:44 +00003199; X32-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0,0]
3200; X32-NEXT: movaps %xmm0, (%eax)
Simon Pilgrim5a0d7282016-05-18 18:00:43 +00003201; X32-NEXT: retl
3202;
Simon Pilgrim4d1e2582016-05-25 09:42:29 +00003203; X64-LABEL: test_mm_store1_pd:
Simon Pilgrim5a0d7282016-05-18 18:00:43 +00003204; X64: # BB#0:
Simon Pilgrim4ed0e072016-05-30 18:18:44 +00003205; X64-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0,0]
3206; X64-NEXT: movaps %xmm0, (%rdi)
Simon Pilgrim5a0d7282016-05-18 18:00:43 +00003207; X64-NEXT: retq
Simon Pilgrim4ed0e072016-05-30 18:18:44 +00003208 %arg0 = bitcast double * %a0 to <2 x double>*
3209 %shuf = shufflevector <2 x double> %a1, <2 x double> undef, <2 x i32> zeroinitializer
3210 store <2 x double> %shuf, <2 x double>* %arg0, align 16
Simon Pilgrim5a0d7282016-05-18 18:00:43 +00003211 ret void
3212}
3213
3214define void @test_mm_storeh_sd(double *%a0, <2 x double> %a1) {
3215; X32-LABEL: test_mm_storeh_sd:
3216; X32: # BB#0:
3217; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
3218; X32-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1,0]
3219; X32-NEXT: movsd %xmm0, (%eax)
3220; X32-NEXT: retl
3221;
3222; X64-LABEL: test_mm_storeh_sd:
3223; X64: # BB#0:
3224; X64-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1,0]
3225; X64-NEXT: movsd %xmm0, (%rdi)
3226; X64-NEXT: retq
3227 %ext = extractelement <2 x double> %a1, i32 1
3228 store double %ext, double* %a0, align 8
3229 ret void
3230}
3231
3232define void @test_mm_storel_epi64(<2 x i64> *%a0, <2 x i64> %a1) {
3233; X32-LABEL: test_mm_storel_epi64:
3234; X32: # BB#0:
3235; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
3236; X32-NEXT: movlps %xmm0, (%eax)
3237; X32-NEXT: retl
3238;
3239; X64-LABEL: test_mm_storel_epi64:
3240; X64: # BB#0:
3241; X64-NEXT: movd %xmm0, %rax
3242; X64-NEXT: movq %rax, (%rdi)
3243; X64-NEXT: retq
3244 %ext = extractelement <2 x i64> %a1, i32 0
3245 %bc = bitcast <2 x i64> *%a0 to i64*
3246 store i64 %ext, i64* %bc, align 8
3247 ret void
3248}
3249
3250define void @test_mm_storel_sd(double *%a0, <2 x double> %a1) {
3251; X32-LABEL: test_mm_storel_sd:
3252; X32: # BB#0:
3253; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
3254; X32-NEXT: movsd %xmm0, (%eax)
3255; X32-NEXT: retl
3256;
3257; X64-LABEL: test_mm_storel_sd:
3258; X64: # BB#0:
3259; X64-NEXT: movsd %xmm0, (%rdi)
3260; X64-NEXT: retq
3261 %ext = extractelement <2 x double> %a1, i32 0
3262 store double %ext, double* %a0, align 8
3263 ret void
3264}
3265
3266define void @test_mm_storer_pd(double *%a0, <2 x double> %a1) {
3267; X32-LABEL: test_mm_storer_pd:
3268; X32: # BB#0:
3269; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
3270; X32-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1,0]
3271; X32-NEXT: movapd %xmm0, (%eax)
3272; X32-NEXT: retl
3273;
3274; X64-LABEL: test_mm_storer_pd:
3275; X64: # BB#0:
3276; X64-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1,0]
3277; X64-NEXT: movapd %xmm0, (%rdi)
3278; X64-NEXT: retq
3279 %arg0 = bitcast double* %a0 to <2 x double>*
3280 %shuf = shufflevector <2 x double> %a1, <2 x double> undef, <2 x i32> <i32 1, i32 0>
3281 store <2 x double> %shuf, <2 x double>* %arg0, align 16
3282 ret void
3283}
3284
3285define void @test_mm_storeu_pd(double *%a0, <2 x double> %a1) {
3286; X32-LABEL: test_mm_storeu_pd:
3287; X32: # BB#0:
3288; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
3289; X32-NEXT: movups %xmm0, (%eax)
3290; X32-NEXT: retl
3291;
3292; X64-LABEL: test_mm_storeu_pd:
3293; X64: # BB#0:
3294; X64-NEXT: movups %xmm0, (%rdi)
3295; X64-NEXT: retq
Simon Pilgrimd64af652016-05-30 18:42:51 +00003296 %arg0 = bitcast double* %a0 to <2 x double>*
3297 store <2 x double> %a1, <2 x double>* %arg0, align 1
Simon Pilgrim5a0d7282016-05-18 18:00:43 +00003298 ret void
3299}
3300
3301define void @test_mm_storeu_si128(<2 x i64> *%a0, <2 x i64> %a1) {
3302; X32-LABEL: test_mm_storeu_si128:
3303; X32: # BB#0:
3304; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
3305; X32-NEXT: movups %xmm0, (%eax)
3306; X32-NEXT: retl
3307;
3308; X64-LABEL: test_mm_storeu_si128:
3309; X64: # BB#0:
3310; X64-NEXT: movups %xmm0, (%rdi)
3311; X64-NEXT: retq
Simon Pilgrimd64af652016-05-30 18:42:51 +00003312 store <2 x i64> %a1, <2 x i64>* %a0, align 1
Simon Pilgrim5a0d7282016-05-18 18:00:43 +00003313 ret void
3314}
Simon Pilgrim5a0d7282016-05-18 18:00:43 +00003315
3316define void @test_mm_stream_pd(double *%a0, <2 x double> %a1) {
3317; X32-LABEL: test_mm_stream_pd:
3318; X32: # BB#0:
3319; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
3320; X32-NEXT: movntps %xmm0, (%eax)
3321; X32-NEXT: retl
3322;
3323; X64-LABEL: test_mm_stream_pd:
3324; X64: # BB#0:
3325; X64-NEXT: movntps %xmm0, (%rdi)
3326; X64-NEXT: retq
3327 %arg0 = bitcast double* %a0 to <2 x double>*
3328 store <2 x double> %a1, <2 x double>* %arg0, align 16, !nontemporal !0
3329 ret void
3330}
3331
3332define void @test_mm_stream_si32(i32 *%a0, i32 %a1) {
3333; X32-LABEL: test_mm_stream_si32:
3334; X32: # BB#0:
3335; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
3336; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
3337; X32-NEXT: movntil %eax, (%ecx)
3338; X32-NEXT: retl
3339;
3340; X64-LABEL: test_mm_stream_si32:
3341; X64: # BB#0:
3342; X64-NEXT: movntil %esi, (%rdi)
3343; X64-NEXT: retq
3344 store i32 %a1, i32* %a0, align 1, !nontemporal !0
3345 ret void
3346}
3347
3348define void @test_mm_stream_si128(<2 x i64> *%a0, <2 x i64> %a1) {
3349; X32-LABEL: test_mm_stream_si128:
3350; X32: # BB#0:
3351; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
3352; X32-NEXT: movntps %xmm0, (%eax)
3353; X32-NEXT: retl
3354;
3355; X64-LABEL: test_mm_stream_si128:
3356; X64: # BB#0:
3357; X64-NEXT: movntps %xmm0, (%rdi)
3358; X64-NEXT: retq
3359 store <2 x i64> %a1, <2 x i64>* %a0, align 16, !nontemporal !0
3360 ret void
3361}
3362
3363define <2 x i64> @test_mm_sub_epi8(<2 x i64> %a0, <2 x i64> %a1) nounwind {
3364; X32-LABEL: test_mm_sub_epi8:
3365; X32: # BB#0:
3366; X32-NEXT: psubb %xmm1, %xmm0
3367; X32-NEXT: retl
3368;
3369; X64-LABEL: test_mm_sub_epi8:
3370; X64: # BB#0:
3371; X64-NEXT: psubb %xmm1, %xmm0
3372; X64-NEXT: retq
3373 %arg0 = bitcast <2 x i64> %a0 to <16 x i8>
3374 %arg1 = bitcast <2 x i64> %a1 to <16 x i8>
3375 %res = sub <16 x i8> %arg0, %arg1
3376 %bc = bitcast <16 x i8> %res to <2 x i64>
3377 ret <2 x i64> %bc
3378}
3379
3380define <2 x i64> @test_mm_sub_epi16(<2 x i64> %a0, <2 x i64> %a1) nounwind {
3381; X32-LABEL: test_mm_sub_epi16:
3382; X32: # BB#0:
3383; X32-NEXT: psubw %xmm1, %xmm0
3384; X32-NEXT: retl
3385;
3386; X64-LABEL: test_mm_sub_epi16:
3387; X64: # BB#0:
3388; X64-NEXT: psubw %xmm1, %xmm0
3389; X64-NEXT: retq
3390 %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
3391 %arg1 = bitcast <2 x i64> %a1 to <8 x i16>
3392 %res = sub <8 x i16> %arg0, %arg1
3393 %bc = bitcast <8 x i16> %res to <2 x i64>
3394 ret <2 x i64> %bc
3395}
3396
3397define <2 x i64> @test_mm_sub_epi32(<2 x i64> %a0, <2 x i64> %a1) nounwind {
3398; X32-LABEL: test_mm_sub_epi32:
3399; X32: # BB#0:
3400; X32-NEXT: psubd %xmm1, %xmm0
3401; X32-NEXT: retl
3402;
3403; X64-LABEL: test_mm_sub_epi32:
3404; X64: # BB#0:
3405; X64-NEXT: psubd %xmm1, %xmm0
3406; X64-NEXT: retq
3407 %arg0 = bitcast <2 x i64> %a0 to <4 x i32>
3408 %arg1 = bitcast <2 x i64> %a1 to <4 x i32>
3409 %res = sub <4 x i32> %arg0, %arg1
3410 %bc = bitcast <4 x i32> %res to <2 x i64>
3411 ret <2 x i64> %bc
3412}
3413
3414define <2 x i64> @test_mm_sub_epi64(<2 x i64> %a0, <2 x i64> %a1) nounwind {
3415; X32-LABEL: test_mm_sub_epi64:
3416; X32: # BB#0:
3417; X32-NEXT: psubq %xmm1, %xmm0
3418; X32-NEXT: retl
3419;
3420; X64-LABEL: test_mm_sub_epi64:
3421; X64: # BB#0:
3422; X64-NEXT: psubq %xmm1, %xmm0
3423; X64-NEXT: retq
3424 %res = sub <2 x i64> %a0, %a1
3425 ret <2 x i64> %res
3426}
3427
3428define <2 x double> @test_mm_sub_pd(<2 x double> %a0, <2 x double> %a1) nounwind {
3429; X32-LABEL: test_mm_sub_pd:
3430; X32: # BB#0:
3431; X32-NEXT: subpd %xmm1, %xmm0
3432; X32-NEXT: retl
3433;
3434; X64-LABEL: test_mm_sub_pd:
3435; X64: # BB#0:
3436; X64-NEXT: subpd %xmm1, %xmm0
3437; X64-NEXT: retq
3438 %res = fsub <2 x double> %a0, %a1
3439 ret <2 x double> %res
3440}
3441
3442define <2 x double> @test_mm_sub_sd(<2 x double> %a0, <2 x double> %a1) nounwind {
3443; X32-LABEL: test_mm_sub_sd:
3444; X32: # BB#0:
3445; X32-NEXT: subsd %xmm1, %xmm0
3446; X32-NEXT: retl
3447;
3448; X64-LABEL: test_mm_sub_sd:
3449; X64: # BB#0:
3450; X64-NEXT: subsd %xmm1, %xmm0
3451; X64-NEXT: retq
3452 %ext0 = extractelement <2 x double> %a0, i32 0
3453 %ext1 = extractelement <2 x double> %a1, i32 0
3454 %fsub = fsub double %ext0, %ext1
3455 %res = insertelement <2 x double> %a0, double %fsub, i32 0
3456 ret <2 x double> %res
3457}
3458
3459define <2 x i64> @test_mm_subs_epi8(<2 x i64> %a0, <2 x i64> %a1) nounwind {
3460; X32-LABEL: test_mm_subs_epi8:
3461; X32: # BB#0:
3462; X32-NEXT: psubsb %xmm1, %xmm0
3463; X32-NEXT: retl
3464;
3465; X64-LABEL: test_mm_subs_epi8:
3466; X64: # BB#0:
3467; X64-NEXT: psubsb %xmm1, %xmm0
3468; X64-NEXT: retq
3469 %arg0 = bitcast <2 x i64> %a0 to <16 x i8>
3470 %arg1 = bitcast <2 x i64> %a1 to <16 x i8>
3471 %res = call <16 x i8> @llvm.x86.sse2.psubs.b(<16 x i8> %arg0, <16 x i8> %arg1)
3472 %bc = bitcast <16 x i8> %res to <2 x i64>
3473 ret <2 x i64> %bc
3474}
3475declare <16 x i8> @llvm.x86.sse2.psubs.b(<16 x i8>, <16 x i8>) nounwind readnone
3476
3477define <2 x i64> @test_mm_subs_epi16(<2 x i64> %a0, <2 x i64> %a1) nounwind {
3478; X32-LABEL: test_mm_subs_epi16:
3479; X32: # BB#0:
3480; X32-NEXT: psubsw %xmm1, %xmm0
3481; X32-NEXT: retl
3482;
3483; X64-LABEL: test_mm_subs_epi16:
3484; X64: # BB#0:
3485; X64-NEXT: psubsw %xmm1, %xmm0
3486; X64-NEXT: retq
3487 %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
3488 %arg1 = bitcast <2 x i64> %a1 to <8 x i16>
3489 %res = call <8 x i16> @llvm.x86.sse2.psubs.w(<8 x i16> %arg0, <8 x i16> %arg1)
3490 %bc = bitcast <8 x i16> %res to <2 x i64>
3491 ret <2 x i64> %bc
3492}
3493declare <8 x i16> @llvm.x86.sse2.psubs.w(<8 x i16>, <8 x i16>) nounwind readnone
3494
3495define <2 x i64> @test_mm_subs_epu8(<2 x i64> %a0, <2 x i64> %a1) nounwind {
3496; X32-LABEL: test_mm_subs_epu8:
3497; X32: # BB#0:
3498; X32-NEXT: psubusb %xmm1, %xmm0
3499; X32-NEXT: retl
3500;
3501; X64-LABEL: test_mm_subs_epu8:
3502; X64: # BB#0:
3503; X64-NEXT: psubusb %xmm1, %xmm0
3504; X64-NEXT: retq
3505 %arg0 = bitcast <2 x i64> %a0 to <16 x i8>
3506 %arg1 = bitcast <2 x i64> %a1 to <16 x i8>
3507 %res = call <16 x i8> @llvm.x86.sse2.psubus.b(<16 x i8> %arg0, <16 x i8> %arg1)
3508 %bc = bitcast <16 x i8> %res to <2 x i64>
3509 ret <2 x i64> %bc
3510}
3511declare <16 x i8> @llvm.x86.sse2.psubus.b(<16 x i8>, <16 x i8>) nounwind readnone
3512
3513define <2 x i64> @test_mm_subs_epu16(<2 x i64> %a0, <2 x i64> %a1) nounwind {
3514; X32-LABEL: test_mm_subs_epu16:
3515; X32: # BB#0:
3516; X32-NEXT: psubusw %xmm1, %xmm0
3517; X32-NEXT: retl
3518;
3519; X64-LABEL: test_mm_subs_epu16:
3520; X64: # BB#0:
3521; X64-NEXT: psubusw %xmm1, %xmm0
3522; X64-NEXT: retq
3523 %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
3524 %arg1 = bitcast <2 x i64> %a1 to <8 x i16>
3525 %res = call <8 x i16> @llvm.x86.sse2.psubus.w(<8 x i16> %arg0, <8 x i16> %arg1)
3526 %bc = bitcast <8 x i16> %res to <2 x i64>
3527 ret <2 x i64> %bc
3528}
3529declare <8 x i16> @llvm.x86.sse2.psubus.w(<8 x i16>, <8 x i16>) nounwind readnone
3530
3531define i32 @test_mm_ucomieq_sd(<2 x double> %a0, <2 x double> %a1) nounwind {
3532; X32-LABEL: test_mm_ucomieq_sd:
3533; X32: # BB#0:
3534; X32-NEXT: ucomisd %xmm1, %xmm0
3535; X32-NEXT: setnp %al
3536; X32-NEXT: sete %cl
3537; X32-NEXT: andb %al, %cl
3538; X32-NEXT: movzbl %cl, %eax
3539; X32-NEXT: retl
3540;
3541; X64-LABEL: test_mm_ucomieq_sd:
3542; X64: # BB#0:
3543; X64-NEXT: ucomisd %xmm1, %xmm0
3544; X64-NEXT: setnp %al
3545; X64-NEXT: sete %cl
3546; X64-NEXT: andb %al, %cl
3547; X64-NEXT: movzbl %cl, %eax
3548; X64-NEXT: retq
3549 %res = call i32 @llvm.x86.sse2.ucomieq.sd(<2 x double> %a0, <2 x double> %a1)
3550 ret i32 %res
3551}
3552declare i32 @llvm.x86.sse2.ucomieq.sd(<2 x double>, <2 x double>) nounwind readnone
3553
3554define i32 @test_mm_ucomige_sd(<2 x double> %a0, <2 x double> %a1) nounwind {
3555; X32-LABEL: test_mm_ucomige_sd:
3556; X32: # BB#0:
Michael Kuperstein3e3652a2016-07-07 22:50:23 +00003557; X32-NEXT: xorl %eax, %eax
Simon Pilgrim5a0d7282016-05-18 18:00:43 +00003558; X32-NEXT: ucomisd %xmm1, %xmm0
3559; X32-NEXT: setae %al
Simon Pilgrim5a0d7282016-05-18 18:00:43 +00003560; X32-NEXT: retl
3561;
3562; X64-LABEL: test_mm_ucomige_sd:
3563; X64: # BB#0:
Michael Kuperstein3e3652a2016-07-07 22:50:23 +00003564; X64-NEXT: xorl %eax, %eax
Simon Pilgrim5a0d7282016-05-18 18:00:43 +00003565; X64-NEXT: ucomisd %xmm1, %xmm0
3566; X64-NEXT: setae %al
Simon Pilgrim5a0d7282016-05-18 18:00:43 +00003567; X64-NEXT: retq
3568 %res = call i32 @llvm.x86.sse2.ucomige.sd(<2 x double> %a0, <2 x double> %a1)
3569 ret i32 %res
3570}
3571declare i32 @llvm.x86.sse2.ucomige.sd(<2 x double>, <2 x double>) nounwind readnone
3572
3573define i32 @test_mm_ucomigt_sd(<2 x double> %a0, <2 x double> %a1) nounwind {
3574; X32-LABEL: test_mm_ucomigt_sd:
3575; X32: # BB#0:
Michael Kuperstein3e3652a2016-07-07 22:50:23 +00003576; X32-NEXT: xorl %eax, %eax
Simon Pilgrim5a0d7282016-05-18 18:00:43 +00003577; X32-NEXT: ucomisd %xmm1, %xmm0
3578; X32-NEXT: seta %al
Simon Pilgrim5a0d7282016-05-18 18:00:43 +00003579; X32-NEXT: retl
3580;
3581; X64-LABEL: test_mm_ucomigt_sd:
3582; X64: # BB#0:
Michael Kuperstein3e3652a2016-07-07 22:50:23 +00003583; X64-NEXT: xorl %eax, %eax
Simon Pilgrim5a0d7282016-05-18 18:00:43 +00003584; X64-NEXT: ucomisd %xmm1, %xmm0
3585; X64-NEXT: seta %al
Simon Pilgrim5a0d7282016-05-18 18:00:43 +00003586; X64-NEXT: retq
3587 %res = call i32 @llvm.x86.sse2.ucomigt.sd(<2 x double> %a0, <2 x double> %a1)
3588 ret i32 %res
3589}
3590declare i32 @llvm.x86.sse2.ucomigt.sd(<2 x double>, <2 x double>) nounwind readnone
3591
3592define i32 @test_mm_ucomile_sd(<2 x double> %a0, <2 x double> %a1) nounwind {
3593; X32-LABEL: test_mm_ucomile_sd:
3594; X32: # BB#0:
Michael Kuperstein3e3652a2016-07-07 22:50:23 +00003595; X32-NEXT: xorl %eax, %eax
Simon Pilgrim5a0d7282016-05-18 18:00:43 +00003596; X32-NEXT: ucomisd %xmm0, %xmm1
3597; X32-NEXT: setae %al
Simon Pilgrim5a0d7282016-05-18 18:00:43 +00003598; X32-NEXT: retl
3599;
3600; X64-LABEL: test_mm_ucomile_sd:
3601; X64: # BB#0:
Michael Kuperstein3e3652a2016-07-07 22:50:23 +00003602; X64-NEXT: xorl %eax, %eax
Simon Pilgrim5a0d7282016-05-18 18:00:43 +00003603; X64-NEXT: ucomisd %xmm0, %xmm1
3604; X64-NEXT: setae %al
Simon Pilgrim5a0d7282016-05-18 18:00:43 +00003605; X64-NEXT: retq
3606 %res = call i32 @llvm.x86.sse2.ucomile.sd(<2 x double> %a0, <2 x double> %a1)
3607 ret i32 %res
3608}
3609declare i32 @llvm.x86.sse2.ucomile.sd(<2 x double>, <2 x double>) nounwind readnone
3610
3611define i32 @test_mm_ucomilt_sd(<2 x double> %a0, <2 x double> %a1) nounwind {
3612; X32-LABEL: test_mm_ucomilt_sd:
3613; X32: # BB#0:
Michael Kuperstein3e3652a2016-07-07 22:50:23 +00003614; X32-NEXT: xorl %eax, %eax
Simon Pilgrim5a0d7282016-05-18 18:00:43 +00003615; X32-NEXT: ucomisd %xmm0, %xmm1
3616; X32-NEXT: seta %al
Simon Pilgrim5a0d7282016-05-18 18:00:43 +00003617; X32-NEXT: retl
3618;
3619; X64-LABEL: test_mm_ucomilt_sd:
3620; X64: # BB#0:
Michael Kuperstein3e3652a2016-07-07 22:50:23 +00003621; X64-NEXT: xorl %eax, %eax
Simon Pilgrim5a0d7282016-05-18 18:00:43 +00003622; X64-NEXT: ucomisd %xmm0, %xmm1
3623; X64-NEXT: seta %al
Simon Pilgrim5a0d7282016-05-18 18:00:43 +00003624; X64-NEXT: retq
3625 %res = call i32 @llvm.x86.sse2.ucomilt.sd(<2 x double> %a0, <2 x double> %a1)
3626 ret i32 %res
3627}
3628declare i32 @llvm.x86.sse2.ucomilt.sd(<2 x double>, <2 x double>) nounwind readnone
3629
3630define i32 @test_mm_ucomineq_sd(<2 x double> %a0, <2 x double> %a1) nounwind {
3631; X32-LABEL: test_mm_ucomineq_sd:
3632; X32: # BB#0:
3633; X32-NEXT: ucomisd %xmm1, %xmm0
3634; X32-NEXT: setp %al
3635; X32-NEXT: setne %cl
3636; X32-NEXT: orb %al, %cl
3637; X32-NEXT: movzbl %cl, %eax
3638; X32-NEXT: retl
3639;
3640; X64-LABEL: test_mm_ucomineq_sd:
3641; X64: # BB#0:
3642; X64-NEXT: ucomisd %xmm1, %xmm0
3643; X64-NEXT: setp %al
3644; X64-NEXT: setne %cl
3645; X64-NEXT: orb %al, %cl
3646; X64-NEXT: movzbl %cl, %eax
3647; X64-NEXT: retq
3648 %res = call i32 @llvm.x86.sse2.ucomineq.sd(<2 x double> %a0, <2 x double> %a1)
3649 ret i32 %res
3650}
3651declare i32 @llvm.x86.sse2.ucomineq.sd(<2 x double>, <2 x double>) nounwind readnone
3652
3653define <2 x double> @test_mm_undefined_pd() {
3654; X32-LABEL: test_mm_undefined_pd:
3655; X32: # BB#0:
3656; X32-NEXT: retl
3657;
3658; X64-LABEL: test_mm_undefined_pd:
3659; X64: # BB#0:
3660; X64-NEXT: retq
3661 ret <2 x double> undef
3662}
3663
3664define <2 x i64> @test_mm_undefined_si128() {
3665; X32-LABEL: test_mm_undefined_si128:
3666; X32: # BB#0:
3667; X32-NEXT: retl
3668;
3669; X64-LABEL: test_mm_undefined_si128:
3670; X64: # BB#0:
3671; X64-NEXT: retq
3672 ret <2 x i64> undef
3673}
3674
3675define <2 x i64> @test_mm_unpackhi_epi8(<2 x i64> %a0, <2 x i64> %a1) {
3676; X32-LABEL: test_mm_unpackhi_epi8:
3677; X32: # BB#0:
3678; X32-NEXT: punpckhbw {{.*#+}} xmm0 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15]
3679; X32-NEXT: retl
3680;
3681; X64-LABEL: test_mm_unpackhi_epi8:
3682; X64: # BB#0:
3683; X64-NEXT: punpckhbw {{.*#+}} xmm0 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15]
3684; X64-NEXT: retq
3685 %arg0 = bitcast <2 x i64> %a0 to <16 x i8>
3686 %arg1 = bitcast <2 x i64> %a1 to <16 x i8>
3687 %res = shufflevector <16 x i8> %arg0, <16 x i8> %arg1, <16 x i32> <i32 8, i32 24, i32 9, i32 25, i32 10, i32 26, i32 11, i32 27, i32 12, i32 28, i32 13, i32 29, i32 14, i32 30, i32 15, i32 31>
3688 %bc = bitcast <16 x i8> %res to <2 x i64>
3689 ret <2 x i64> %bc
3690}
3691
3692define <2 x i64> @test_mm_unpackhi_epi16(<2 x i64> %a0, <2 x i64> %a1) {
3693; X32-LABEL: test_mm_unpackhi_epi16:
3694; X32: # BB#0:
3695; X32-NEXT: punpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
3696; X32-NEXT: retl
3697;
3698; X64-LABEL: test_mm_unpackhi_epi16:
3699; X64: # BB#0:
3700; X64-NEXT: punpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
3701; X64-NEXT: retq
3702 %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
3703 %arg1 = bitcast <2 x i64> %a1 to <8 x i16>
3704 %res = shufflevector <8 x i16> %arg0, <8 x i16> %arg1, <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
3705 %bc = bitcast <8 x i16> %res to <2 x i64>
3706 ret <2 x i64> %bc
3707}
3708
3709define <2 x i64> @test_mm_unpackhi_epi32(<2 x i64> %a0, <2 x i64> %a1) {
3710; X32-LABEL: test_mm_unpackhi_epi32:
3711; X32: # BB#0:
3712; X32-NEXT: punpckhdq {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
3713; X32-NEXT: retl
3714;
3715; X64-LABEL: test_mm_unpackhi_epi32:
3716; X64: # BB#0:
3717; X64-NEXT: punpckhdq {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
3718; X64-NEXT: retq
3719 %arg0 = bitcast <2 x i64> %a0 to <4 x i32>
3720 %arg1 = bitcast <2 x i64> %a1 to <4 x i32>
3721 %res = shufflevector <4 x i32> %arg0,<4 x i32> %arg1, <4 x i32> <i32 2, i32 6, i32 3, i32 7>
3722 %bc = bitcast <4 x i32> %res to <2 x i64>
3723 ret <2 x i64> %bc
3724}
3725
3726define <2 x i64> @test_mm_unpackhi_epi64(<2 x i64> %a0, <2 x i64> %a1) {
3727; X32-LABEL: test_mm_unpackhi_epi64:
3728; X32: # BB#0:
3729; X32-NEXT: punpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1]
3730; X32-NEXT: retl
3731;
3732; X64-LABEL: test_mm_unpackhi_epi64:
3733; X64: # BB#0:
3734; X64-NEXT: punpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1]
3735; X64-NEXT: retq
3736 %res = shufflevector <2 x i64> %a0, <2 x i64> %a1, <2 x i32> <i32 1, i32 3>
3737 ret <2 x i64> %res
3738}
3739
3740define <2 x double> @test_mm_unpackhi_pd(<2 x double> %a0, <2 x double> %a1) {
3741; X32-LABEL: test_mm_unpackhi_pd:
3742; X32: # BB#0:
3743; X32-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1]
3744; X32-NEXT: retl
3745;
3746; X64-LABEL: test_mm_unpackhi_pd:
3747; X64: # BB#0:
3748; X64-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1]
3749; X64-NEXT: retq
3750 %res = shufflevector <2 x double> %a0, <2 x double> %a1, <2 x i32> <i32 1, i32 3>
3751 ret <2 x double> %res
3752}
3753
3754define <2 x i64> @test_mm_unpacklo_epi8(<2 x i64> %a0, <2 x i64> %a1) {
3755; X32-LABEL: test_mm_unpacklo_epi8:
3756; X32: # BB#0:
3757; X32-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
3758; X32-NEXT: retl
3759;
3760; X64-LABEL: test_mm_unpacklo_epi8:
3761; X64: # BB#0:
3762; X64-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
3763; X64-NEXT: retq
3764 %arg0 = bitcast <2 x i64> %a0 to <16 x i8>
3765 %arg1 = bitcast <2 x i64> %a1 to <16 x i8>
3766 %res = shufflevector <16 x i8> %arg0, <16 x i8> %arg1, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19, i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23>
3767 %bc = bitcast <16 x i8> %res to <2 x i64>
3768 ret <2 x i64> %bc
3769}
3770
3771define <2 x i64> @test_mm_unpacklo_epi16(<2 x i64> %a0, <2 x i64> %a1) {
3772; X32-LABEL: test_mm_unpacklo_epi16:
3773; X32: # BB#0:
3774; X32-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
3775; X32-NEXT: retl
3776;
3777; X64-LABEL: test_mm_unpacklo_epi16:
3778; X64: # BB#0:
3779; X64-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
3780; X64-NEXT: retq
3781 %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
3782 %arg1 = bitcast <2 x i64> %a1 to <8 x i16>
3783 %res = shufflevector <8 x i16> %arg0, <8 x i16> %arg1, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
3784 %bc = bitcast <8 x i16> %res to <2 x i64>
3785 ret <2 x i64> %bc
3786}
3787
3788define <2 x i64> @test_mm_unpacklo_epi32(<2 x i64> %a0, <2 x i64> %a1) {
3789; X32-LABEL: test_mm_unpacklo_epi32:
3790; X32: # BB#0:
3791; X32-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
3792; X32-NEXT: retl
3793;
3794; X64-LABEL: test_mm_unpacklo_epi32:
3795; X64: # BB#0:
3796; X64-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
3797; X64-NEXT: retq
3798 %arg0 = bitcast <2 x i64> %a0 to <4 x i32>
3799 %arg1 = bitcast <2 x i64> %a1 to <4 x i32>
3800 %res = shufflevector <4 x i32> %arg0,<4 x i32> %arg1, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
3801 %bc = bitcast <4 x i32> %res to <2 x i64>
3802 ret <2 x i64> %bc
3803}
3804
3805define <2 x i64> @test_mm_unpacklo_epi64(<2 x i64> %a0, <2 x i64> %a1) {
3806; X32-LABEL: test_mm_unpacklo_epi64:
3807; X32: # BB#0:
3808; X32-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
3809; X32-NEXT: retl
3810;
3811; X64-LABEL: test_mm_unpacklo_epi64:
3812; X64: # BB#0:
3813; X64-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
3814; X64-NEXT: retq
3815 %res = shufflevector <2 x i64> %a0, <2 x i64> %a1, <2 x i32> <i32 0, i32 2>
3816 ret <2 x i64> %res
3817}
3818
3819define <2 x double> @test_mm_unpacklo_pd(<2 x double> %a0, <2 x double> %a1) {
3820; X32-LABEL: test_mm_unpacklo_pd:
3821; X32: # BB#0:
3822; X32-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
3823; X32-NEXT: retl
3824;
3825; X64-LABEL: test_mm_unpacklo_pd:
3826; X64: # BB#0:
3827; X64-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
3828; X64-NEXT: retq
3829 %res = shufflevector <2 x double> %a0, <2 x double> %a1, <2 x i32> <i32 0, i32 2>
3830 ret <2 x double> %res
3831}
3832
3833define <2 x double> @test_mm_xor_pd(<2 x double> %a0, <2 x double> %a1) nounwind {
3834; X32-LABEL: test_mm_xor_pd:
3835; X32: # BB#0:
3836; X32-NEXT: xorps %xmm1, %xmm0
3837; X32-NEXT: retl
3838;
3839; X64-LABEL: test_mm_xor_pd:
3840; X64: # BB#0:
3841; X64-NEXT: xorps %xmm1, %xmm0
3842; X64-NEXT: retq
3843 %arg0 = bitcast <2 x double> %a0 to <4 x i32>
3844 %arg1 = bitcast <2 x double> %a1 to <4 x i32>
3845 %res = xor <4 x i32> %arg0, %arg1
3846 %bc = bitcast <4 x i32> %res to <2 x double>
3847 ret <2 x double> %bc
3848}
3849
3850define <2 x i64> @test_mm_xor_si128(<2 x i64> %a0, <2 x i64> %a1) nounwind {
3851; X32-LABEL: test_mm_xor_si128:
3852; X32: # BB#0:
3853; X32-NEXT: xorps %xmm1, %xmm0
3854; X32-NEXT: retl
3855;
3856; X64-LABEL: test_mm_xor_si128:
3857; X64: # BB#0:
3858; X64-NEXT: xorps %xmm1, %xmm0
3859; X64-NEXT: retq
3860 %res = xor <2 x i64> %a0, %a1
3861 ret <2 x i64> %res
3862}
3863
3864!0 = !{i32 1}
3865