blob: cc9c2991e56acc2c024525564fd1f6cf522b4cc4 [file] [log] [blame]
Simon Pilgrim5a0d7282016-05-18 18:00:43 +00001; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -fast-isel -mtriple=i386-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefix=ALL --check-prefix=X32
3; RUN: llc < %s -fast-isel -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefix=ALL --check-prefix=X64
4
5; NOTE: This should use IR equivalent to what is generated by clang/test/CodeGen/sse2-builtins.c
6
7define <2 x i64> @test_mm_add_epi8(<2 x i64> %a0, <2 x i64> %a1) nounwind {
8; X32-LABEL: test_mm_add_epi8:
9; X32: # BB#0:
10; X32-NEXT: paddb %xmm1, %xmm0
11; X32-NEXT: retl
12;
13; X64-LABEL: test_mm_add_epi8:
14; X64: # BB#0:
15; X64-NEXT: paddb %xmm1, %xmm0
16; X64-NEXT: retq
17 %arg0 = bitcast <2 x i64> %a0 to <16 x i8>
18 %arg1 = bitcast <2 x i64> %a1 to <16 x i8>
19 %res = add <16 x i8> %arg0, %arg1
20 %bc = bitcast <16 x i8> %res to <2 x i64>
21 ret <2 x i64> %bc
22}
23
24define <2 x i64> @test_mm_add_epi16(<2 x i64> %a0, <2 x i64> %a1) nounwind {
25; X32-LABEL: test_mm_add_epi16:
26; X32: # BB#0:
27; X32-NEXT: paddw %xmm1, %xmm0
28; X32-NEXT: retl
29;
30; X64-LABEL: test_mm_add_epi16:
31; X64: # BB#0:
32; X64-NEXT: paddw %xmm1, %xmm0
33; X64-NEXT: retq
34 %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
35 %arg1 = bitcast <2 x i64> %a1 to <8 x i16>
36 %res = add <8 x i16> %arg0, %arg1
37 %bc = bitcast <8 x i16> %res to <2 x i64>
38 ret <2 x i64> %bc
39}
40
41define <2 x i64> @test_mm_add_epi32(<2 x i64> %a0, <2 x i64> %a1) nounwind {
42; X32-LABEL: test_mm_add_epi32:
43; X32: # BB#0:
44; X32-NEXT: paddd %xmm1, %xmm0
45; X32-NEXT: retl
46;
47; X64-LABEL: test_mm_add_epi32:
48; X64: # BB#0:
49; X64-NEXT: paddd %xmm1, %xmm0
50; X64-NEXT: retq
51 %arg0 = bitcast <2 x i64> %a0 to <4 x i32>
52 %arg1 = bitcast <2 x i64> %a1 to <4 x i32>
53 %res = add <4 x i32> %arg0, %arg1
54 %bc = bitcast <4 x i32> %res to <2 x i64>
55 ret <2 x i64> %bc
56}
57
58define <2 x i64> @test_mm_add_epi64(<2 x i64> %a0, <2 x i64> %a1) nounwind {
59; X32-LABEL: test_mm_add_epi64:
60; X32: # BB#0:
61; X32-NEXT: paddq %xmm1, %xmm0
62; X32-NEXT: retl
63;
64; X64-LABEL: test_mm_add_epi64:
65; X64: # BB#0:
66; X64-NEXT: paddq %xmm1, %xmm0
67; X64-NEXT: retq
68 %res = add <2 x i64> %a0, %a1
69 ret <2 x i64> %res
70}
71
72define <2 x double> @test_mm_add_pd(<2 x double> %a0, <2 x double> %a1) nounwind {
73; X32-LABEL: test_mm_add_pd:
74; X32: # BB#0:
75; X32-NEXT: addpd %xmm1, %xmm0
76; X32-NEXT: retl
77;
78; X64-LABEL: test_mm_add_pd:
79; X64: # BB#0:
80; X64-NEXT: addpd %xmm1, %xmm0
81; X64-NEXT: retq
82 %res = fadd <2 x double> %a0, %a1
83 ret <2 x double> %res
84}
85
86define <2 x double> @test_mm_add_sd(<2 x double> %a0, <2 x double> %a1) nounwind {
87; X32-LABEL: test_mm_add_sd:
88; X32: # BB#0:
89; X32-NEXT: addsd %xmm1, %xmm0
90; X32-NEXT: retl
91;
92; X64-LABEL: test_mm_add_sd:
93; X64: # BB#0:
94; X64-NEXT: addsd %xmm1, %xmm0
95; X64-NEXT: retq
96 %ext0 = extractelement <2 x double> %a0, i32 0
97 %ext1 = extractelement <2 x double> %a1, i32 0
98 %fadd = fadd double %ext0, %ext1
99 %res = insertelement <2 x double> %a0, double %fadd, i32 0
100 ret <2 x double> %res
101}
102
103define <2 x i64> @test_mm_adds_epi8(<2 x i64> %a0, <2 x i64> %a1) nounwind {
104; X32-LABEL: test_mm_adds_epi8:
105; X32: # BB#0:
106; X32-NEXT: paddsb %xmm1, %xmm0
107; X32-NEXT: retl
108;
109; X64-LABEL: test_mm_adds_epi8:
110; X64: # BB#0:
111; X64-NEXT: paddsb %xmm1, %xmm0
112; X64-NEXT: retq
113 %arg0 = bitcast <2 x i64> %a0 to <16 x i8>
114 %arg1 = bitcast <2 x i64> %a1 to <16 x i8>
115 %res = call <16 x i8> @llvm.x86.sse2.padds.b(<16 x i8> %arg0, <16 x i8> %arg1)
116 %bc = bitcast <16 x i8> %res to <2 x i64>
117 ret <2 x i64> %bc
118}
119declare <16 x i8> @llvm.x86.sse2.padds.b(<16 x i8>, <16 x i8>) nounwind readnone
120
121define <2 x i64> @test_mm_adds_epi16(<2 x i64> %a0, <2 x i64> %a1) nounwind {
122; X32-LABEL: test_mm_adds_epi16:
123; X32: # BB#0:
124; X32-NEXT: paddsw %xmm1, %xmm0
125; X32-NEXT: retl
126;
127; X64-LABEL: test_mm_adds_epi16:
128; X64: # BB#0:
129; X64-NEXT: paddsw %xmm1, %xmm0
130; X64-NEXT: retq
131 %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
132 %arg1 = bitcast <2 x i64> %a1 to <8 x i16>
133 %res = call <8 x i16> @llvm.x86.sse2.padds.w(<8 x i16> %arg0, <8 x i16> %arg1)
134 %bc = bitcast <8 x i16> %res to <2 x i64>
135 ret <2 x i64> %bc
136}
137declare <8 x i16> @llvm.x86.sse2.padds.w(<8 x i16>, <8 x i16>) nounwind readnone
138
139define <2 x i64> @test_mm_adds_epu8(<2 x i64> %a0, <2 x i64> %a1) nounwind {
140; X32-LABEL: test_mm_adds_epu8:
141; X32: # BB#0:
142; X32-NEXT: paddusb %xmm1, %xmm0
143; X32-NEXT: retl
144;
145; X64-LABEL: test_mm_adds_epu8:
146; X64: # BB#0:
147; X64-NEXT: paddusb %xmm1, %xmm0
148; X64-NEXT: retq
149 %arg0 = bitcast <2 x i64> %a0 to <16 x i8>
150 %arg1 = bitcast <2 x i64> %a1 to <16 x i8>
151 %res = call <16 x i8> @llvm.x86.sse2.paddus.b(<16 x i8> %arg0, <16 x i8> %arg1)
152 %bc = bitcast <16 x i8> %res to <2 x i64>
153 ret <2 x i64> %bc
154}
155declare <16 x i8> @llvm.x86.sse2.paddus.b(<16 x i8>, <16 x i8>) nounwind readnone
156
157define <2 x i64> @test_mm_adds_epu16(<2 x i64> %a0, <2 x i64> %a1) nounwind {
158; X32-LABEL: test_mm_adds_epu16:
159; X32: # BB#0:
160; X32-NEXT: paddusw %xmm1, %xmm0
161; X32-NEXT: retl
162;
163; X64-LABEL: test_mm_adds_epu16:
164; X64: # BB#0:
165; X64-NEXT: paddusw %xmm1, %xmm0
166; X64-NEXT: retq
167 %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
168 %arg1 = bitcast <2 x i64> %a1 to <8 x i16>
169 %res = call <8 x i16> @llvm.x86.sse2.paddus.w(<8 x i16> %arg0, <8 x i16> %arg1)
170 %bc = bitcast <8 x i16> %res to <2 x i64>
171 ret <2 x i64> %bc
172}
173declare <8 x i16> @llvm.x86.sse2.paddus.w(<8 x i16>, <8 x i16>) nounwind readnone
174
175define <2 x double> @test_mm_and_pd(<2 x double> %a0, <2 x double> %a1) nounwind {
176; X32-LABEL: test_mm_and_pd:
177; X32: # BB#0:
178; X32-NEXT: andps %xmm1, %xmm0
179; X32-NEXT: retl
180;
181; X64-LABEL: test_mm_and_pd:
182; X64: # BB#0:
183; X64-NEXT: andps %xmm1, %xmm0
184; X64-NEXT: retq
185 %arg0 = bitcast <2 x double> %a0 to <4 x i32>
186 %arg1 = bitcast <2 x double> %a1 to <4 x i32>
187 %res = and <4 x i32> %arg0, %arg1
188 %bc = bitcast <4 x i32> %res to <2 x double>
189 ret <2 x double> %bc
190}
191
192define <2 x i64> @test_mm_and_si128(<2 x i64> %a0, <2 x i64> %a1) nounwind {
193; X32-LABEL: test_mm_and_si128:
194; X32: # BB#0:
195; X32-NEXT: andps %xmm1, %xmm0
196; X32-NEXT: retl
197;
198; X64-LABEL: test_mm_and_si128:
199; X64: # BB#0:
200; X64-NEXT: andps %xmm1, %xmm0
201; X64-NEXT: retq
202 %res = and <2 x i64> %a0, %a1
203 ret <2 x i64> %res
204}
205
206define <2 x double> @test_mm_andnot_pd(<2 x double> %a0, <2 x double> %a1) nounwind {
207; X32-LABEL: test_mm_andnot_pd:
208; X32: # BB#0:
209; X32-NEXT: andnps %xmm1, %xmm0
210; X32-NEXT: retl
211;
212; X64-LABEL: test_mm_andnot_pd:
213; X64: # BB#0:
214; X64-NEXT: andnps %xmm1, %xmm0
215; X64-NEXT: retq
216 %arg0 = bitcast <2 x double> %a0 to <4 x i32>
217 %arg1 = bitcast <2 x double> %a1 to <4 x i32>
218 %not = xor <4 x i32> %arg0, <i32 -1, i32 -1, i32 -1, i32 -1>
219 %res = and <4 x i32> %not, %arg1
220 %bc = bitcast <4 x i32> %res to <2 x double>
221 ret <2 x double> %bc
222}
223
224define <2 x i64> @test_mm_andnot_si128(<2 x i64> %a0, <2 x i64> %a1) nounwind {
225; X32-LABEL: test_mm_andnot_si128:
226; X32: # BB#0:
227; X32-NEXT: pcmpeqd %xmm2, %xmm2
228; X32-NEXT: pxor %xmm2, %xmm0
229; X32-NEXT: pand %xmm1, %xmm0
230; X32-NEXT: retl
231;
232; X64-LABEL: test_mm_andnot_si128:
233; X64: # BB#0:
234; X64-NEXT: pcmpeqd %xmm2, %xmm2
235; X64-NEXT: pxor %xmm2, %xmm0
236; X64-NEXT: pand %xmm1, %xmm0
237; X64-NEXT: retq
238 %not = xor <2 x i64> %a0, <i64 -1, i64 -1>
239 %res = and <2 x i64> %not, %a1
240 ret <2 x i64> %res
241}
242
243define <2 x i64> @test_mm_avg_epu8(<2 x i64> %a0, <2 x i64> %a1) nounwind {
244; X32-LABEL: test_mm_avg_epu8:
245; X32: # BB#0:
246; X32-NEXT: pavgb %xmm1, %xmm0
247; X32-NEXT: retl
248;
249; X64-LABEL: test_mm_avg_epu8:
250; X64: # BB#0:
251; X64-NEXT: pavgb %xmm1, %xmm0
252; X64-NEXT: retq
253 %arg0 = bitcast <2 x i64> %a0 to <16 x i8>
254 %arg1 = bitcast <2 x i64> %a1 to <16 x i8>
255 %res = call <16 x i8> @llvm.x86.sse2.pavg.b(<16 x i8> %arg0, <16 x i8> %arg1)
256 %bc = bitcast <16 x i8> %res to <2 x i64>
257 ret <2 x i64> %bc
258}
259declare <16 x i8> @llvm.x86.sse2.pavg.b(<16 x i8> %arg0, <16 x i8> %arg1) nounwind readnone
260
261define <2 x i64> @test_mm_avg_epu16(<2 x i64> %a0, <2 x i64> %a1) nounwind {
262; X32-LABEL: test_mm_avg_epu16:
263; X32: # BB#0:
264; X32-NEXT: pavgw %xmm1, %xmm0
265; X32-NEXT: retl
266;
267; X64-LABEL: test_mm_avg_epu16:
268; X64: # BB#0:
269; X64-NEXT: pavgw %xmm1, %xmm0
270; X64-NEXT: retq
271 %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
272 %arg1 = bitcast <2 x i64> %a1 to <8 x i16>
273 %res = call <8 x i16> @llvm.x86.sse2.pavg.w(<8 x i16> %arg0, <8 x i16> %arg1)
274 %bc = bitcast <8 x i16> %res to <2 x i64>
275 ret <2 x i64> %bc
276}
277declare <8 x i16> @llvm.x86.sse2.pavg.w(<8 x i16>, <8 x i16>) nounwind readnone
278
279define <2 x i64> @test_mm_bslli_si128(<2 x i64> %a0) nounwind {
280; X32-LABEL: test_mm_bslli_si128:
281; X32: # BB#0:
282; X32-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10]
283; X32-NEXT: retl
284;
285; X64-LABEL: test_mm_bslli_si128:
286; X64: # BB#0:
287; X64-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10]
288; X64-NEXT: retq
289 %arg0 = bitcast <2 x i64> %a0 to <16 x i8>
290 %res = shufflevector <16 x i8> zeroinitializer, <16 x i8> %arg0, <16 x i32> <i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26>
291 %bc = bitcast <16 x i8> %res to <2 x i64>
292 ret <2 x i64> %bc
293}
294
295define <2 x i64> @test_mm_bsrli_si128(<2 x i64> %a0) nounwind {
296; X32-LABEL: test_mm_bsrli_si128:
297; X32: # BB#0:
298; X32-NEXT: psrldq {{.*#+}} xmm0 = xmm0[5,6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero
299; X32-NEXT: retl
300;
301; X64-LABEL: test_mm_bsrli_si128:
302; X64: # BB#0:
303; X64-NEXT: psrldq {{.*#+}} xmm0 = xmm0[5,6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero
304; X64-NEXT: retq
305 %arg0 = bitcast <2 x i64> %a0 to <16 x i8>
306 %res = shufflevector <16 x i8> %arg0, <16 x i8> zeroinitializer, <16 x i32> <i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20>
307 %bc = bitcast <16 x i8> %res to <2 x i64>
308 ret <2 x i64> %bc
309}
310
Simon Pilgrim01809e02016-05-19 10:58:54 +0000311define <4 x float> @test_mm_castpd_ps(<2 x double> %a0) nounwind {
312; X32-LABEL: test_mm_castpd_ps:
313; X32: # BB#0:
314; X32-NEXT: retl
315;
316; X64-LABEL: test_mm_castpd_ps:
317; X64: # BB#0:
318; X64-NEXT: retq
319 %res = bitcast <2 x double> %a0 to <4 x float>
320 ret <4 x float> %res
321}
322
323define <2 x i64> @test_mm_castpd_si128(<2 x double> %a0) nounwind {
324; X32-LABEL: test_mm_castpd_si128:
325; X32: # BB#0:
326; X32-NEXT: retl
327;
328; X64-LABEL: test_mm_castpd_si128:
329; X64: # BB#0:
330; X64-NEXT: retq
331 %res = bitcast <2 x double> %a0 to <2 x i64>
332 ret <2 x i64> %res
333}
334
335define <2 x double> @test_mm_castps_pd(<4 x float> %a0) nounwind {
336; X32-LABEL: test_mm_castps_pd:
337; X32: # BB#0:
338; X32-NEXT: retl
339;
340; X64-LABEL: test_mm_castps_pd:
341; X64: # BB#0:
342; X64-NEXT: retq
343 %res = bitcast <4 x float> %a0 to <2 x double>
344 ret <2 x double> %res
345}
346
347define <2 x i64> @test_mm_castps_si128(<4 x float> %a0) nounwind {
348; X32-LABEL: test_mm_castps_si128:
349; X32: # BB#0:
350; X32-NEXT: retl
351;
352; X64-LABEL: test_mm_castps_si128:
353; X64: # BB#0:
354; X64-NEXT: retq
355 %res = bitcast <4 x float> %a0 to <2 x i64>
356 ret <2 x i64> %res
357}
358
359define <2 x double> @test_mm_castsi128_pd(<2 x i64> %a0) nounwind {
360; X32-LABEL: test_mm_castsi128_pd:
361; X32: # BB#0:
362; X32-NEXT: retl
363;
364; X64-LABEL: test_mm_castsi128_pd:
365; X64: # BB#0:
366; X64-NEXT: retq
367 %res = bitcast <2 x i64> %a0 to <2 x double>
368 ret <2 x double> %res
369}
370
371define <4 x float> @test_mm_castsi128_ps(<2 x i64> %a0) nounwind {
372; X32-LABEL: test_mm_castsi128_ps:
373; X32: # BB#0:
374; X32-NEXT: retl
375;
376; X64-LABEL: test_mm_castsi128_ps:
377; X64: # BB#0:
378; X64-NEXT: retq
379 %res = bitcast <2 x i64> %a0 to <4 x float>
380 ret <4 x float> %res
381}
382
Simon Pilgrim5a0d7282016-05-18 18:00:43 +0000383define void @test_mm_clflush(i8* %a0) nounwind {
384; X32-LABEL: test_mm_clflush:
385; X32: # BB#0:
386; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
387; X32-NEXT: clflush (%eax)
388; X32-NEXT: retl
389;
390; X64-LABEL: test_mm_clflush:
391; X64: # BB#0:
392; X64-NEXT: clflush (%rdi)
393; X64-NEXT: retq
394 call void @llvm.x86.sse2.clflush(i8* %a0)
395 ret void
396}
397declare void @llvm.x86.sse2.clflush(i8*) nounwind readnone
398
399define <2 x i64> @test_mm_cmpeq_epi8(<2 x i64> %a0, <2 x i64> %a1) nounwind {
400; X32-LABEL: test_mm_cmpeq_epi8:
401; X32: # BB#0:
402; X32-NEXT: pcmpeqb %xmm1, %xmm0
403; X32-NEXT: retl
404;
405; X64-LABEL: test_mm_cmpeq_epi8:
406; X64: # BB#0:
407; X64-NEXT: pcmpeqb %xmm1, %xmm0
408; X64-NEXT: retq
409 %arg0 = bitcast <2 x i64> %a0 to <16 x i8>
410 %arg1 = bitcast <2 x i64> %a1 to <16 x i8>
411 %cmp = icmp eq <16 x i8> %arg0, %arg1
412 %res = sext <16 x i1> %cmp to <16 x i8>
413 %bc = bitcast <16 x i8> %res to <2 x i64>
414 ret <2 x i64> %bc
415}
416
417define <2 x i64> @test_mm_cmpeq_epi16(<2 x i64> %a0, <2 x i64> %a1) nounwind {
418; X32-LABEL: test_mm_cmpeq_epi16:
419; X32: # BB#0:
420; X32-NEXT: pcmpeqw %xmm1, %xmm0
421; X32-NEXT: retl
422;
423; X64-LABEL: test_mm_cmpeq_epi16:
424; X64: # BB#0:
425; X64-NEXT: pcmpeqw %xmm1, %xmm0
426; X64-NEXT: retq
427 %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
428 %arg1 = bitcast <2 x i64> %a1 to <8 x i16>
429 %cmp = icmp eq <8 x i16> %arg0, %arg1
430 %res = sext <8 x i1> %cmp to <8 x i16>
431 %bc = bitcast <8 x i16> %res to <2 x i64>
432 ret <2 x i64> %bc
433}
434
435define <2 x i64> @test_mm_cmpeq_epi32(<2 x i64> %a0, <2 x i64> %a1) nounwind {
436; X32-LABEL: test_mm_cmpeq_epi32:
437; X32: # BB#0:
438; X32-NEXT: pcmpeqd %xmm1, %xmm0
439; X32-NEXT: retl
440;
441; X64-LABEL: test_mm_cmpeq_epi32:
442; X64: # BB#0:
443; X64-NEXT: pcmpeqd %xmm1, %xmm0
444; X64-NEXT: retq
445 %arg0 = bitcast <2 x i64> %a0 to <4 x i32>
446 %arg1 = bitcast <2 x i64> %a1 to <4 x i32>
447 %cmp = icmp eq <4 x i32> %arg0, %arg1
448 %res = sext <4 x i1> %cmp to <4 x i32>
449 %bc = bitcast <4 x i32> %res to <2 x i64>
450 ret <2 x i64> %bc
451}
452
453define <2 x double> @test_mm_cmpeq_pd(<2 x double> %a0, <2 x double> %a1) nounwind {
454; X32-LABEL: test_mm_cmpeq_pd:
455; X32: # BB#0:
456; X32-NEXT: cmpeqpd %xmm1, %xmm0
457; X32-NEXT: retl
458;
459; X64-LABEL: test_mm_cmpeq_pd:
460; X64: # BB#0:
461; X64-NEXT: cmpeqpd %xmm1, %xmm0
462; X64-NEXT: retq
463 %res = call <2 x double> @llvm.x86.sse2.cmp.pd(<2 x double> %a0, <2 x double> %a1, i8 0)
464 ret <2 x double> %res
465}
466declare <2 x double> @llvm.x86.sse2.cmp.pd(<2 x double>, <2 x double>, i8) nounwind readnone
467
468define <2 x double> @test_mm_cmpeq_sd(<2 x double> %a0, <2 x double> %a1) nounwind {
469; X32-LABEL: test_mm_cmpeq_sd:
470; X32: # BB#0:
471; X32-NEXT: cmpeqsd %xmm1, %xmm0
472; X32-NEXT: retl
473;
474; X64-LABEL: test_mm_cmpeq_sd:
475; X64: # BB#0:
476; X64-NEXT: cmpeqsd %xmm1, %xmm0
477; X64-NEXT: retq
478 %res = call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %a0, <2 x double> %a1, i8 0)
479 ret <2 x double> %res
480}
481declare <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double>, <2 x double>, i8) nounwind readnone
482
483define <2 x double> @test_mm_cmpge_pd(<2 x double> %a0, <2 x double> %a1) nounwind {
484; X32-LABEL: test_mm_cmpge_pd:
485; X32: # BB#0:
486; X32-NEXT: cmplepd %xmm0, %xmm1
487; X32-NEXT: movapd %xmm1, %xmm0
488; X32-NEXT: retl
489;
490; X64-LABEL: test_mm_cmpge_pd:
491; X64: # BB#0:
492; X64-NEXT: cmplepd %xmm0, %xmm1
493; X64-NEXT: movapd %xmm1, %xmm0
494; X64-NEXT: retq
495 %res = call <2 x double> @llvm.x86.sse2.cmp.pd(<2 x double> %a1, <2 x double> %a0, i8 2)
496 ret <2 x double> %res
497}
498
499define <2 x double> @test_mm_cmpge_sd(<2 x double> %a0, <2 x double> %a1) nounwind {
500; X32-LABEL: test_mm_cmpge_sd:
501; X32: # BB#0:
502; X32-NEXT: cmplesd %xmm0, %xmm1
503; X32-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
504; X32-NEXT: retl
505;
506; X64-LABEL: test_mm_cmpge_sd:
507; X64: # BB#0:
508; X64-NEXT: cmplesd %xmm0, %xmm1
509; X64-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
510; X64-NEXT: retq
511 %cmp = call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %a1, <2 x double> %a0, i8 2)
512 %ext0 = extractelement <2 x double> %cmp, i32 0
513 %ins0 = insertelement <2 x double> undef, double %ext0, i32 0
514 %ext1 = extractelement <2 x double> %a0, i32 1
515 %ins1 = insertelement <2 x double> %ins0, double %ext1, i32 1
516 ret <2 x double> %ins1
517}
518
519define <2 x i64> @test_mm_cmpgt_epi8(<2 x i64> %a0, <2 x i64> %a1) nounwind {
520; X32-LABEL: test_mm_cmpgt_epi8:
521; X32: # BB#0:
522; X32-NEXT: pcmpgtb %xmm1, %xmm0
523; X32-NEXT: retl
524;
525; X64-LABEL: test_mm_cmpgt_epi8:
526; X64: # BB#0:
527; X64-NEXT: pcmpgtb %xmm1, %xmm0
528; X64-NEXT: retq
529 %arg0 = bitcast <2 x i64> %a0 to <16 x i8>
530 %arg1 = bitcast <2 x i64> %a1 to <16 x i8>
531 %cmp = icmp sgt <16 x i8> %arg0, %arg1
532 %res = sext <16 x i1> %cmp to <16 x i8>
533 %bc = bitcast <16 x i8> %res to <2 x i64>
534 ret <2 x i64> %bc
535}
536
537define <2 x i64> @test_mm_cmpgt_epi16(<2 x i64> %a0, <2 x i64> %a1) nounwind {
538; X32-LABEL: test_mm_cmpgt_epi16:
539; X32: # BB#0:
540; X32-NEXT: pcmpgtw %xmm1, %xmm0
541; X32-NEXT: retl
542;
543; X64-LABEL: test_mm_cmpgt_epi16:
544; X64: # BB#0:
545; X64-NEXT: pcmpgtw %xmm1, %xmm0
546; X64-NEXT: retq
547 %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
548 %arg1 = bitcast <2 x i64> %a1 to <8 x i16>
549 %cmp = icmp sgt <8 x i16> %arg0, %arg1
550 %res = sext <8 x i1> %cmp to <8 x i16>
551 %bc = bitcast <8 x i16> %res to <2 x i64>
552 ret <2 x i64> %bc
553}
554
555define <2 x i64> @test_mm_cmpgt_epi32(<2 x i64> %a0, <2 x i64> %a1) nounwind {
556; X32-LABEL: test_mm_cmpgt_epi32:
557; X32: # BB#0:
558; X32-NEXT: pcmpgtd %xmm1, %xmm0
559; X32-NEXT: retl
560;
561; X64-LABEL: test_mm_cmpgt_epi32:
562; X64: # BB#0:
563; X64-NEXT: pcmpgtd %xmm1, %xmm0
564; X64-NEXT: retq
565 %arg0 = bitcast <2 x i64> %a0 to <4 x i32>
566 %arg1 = bitcast <2 x i64> %a1 to <4 x i32>
567 %cmp = icmp sgt <4 x i32> %arg0, %arg1
568 %res = sext <4 x i1> %cmp to <4 x i32>
569 %bc = bitcast <4 x i32> %res to <2 x i64>
570 ret <2 x i64> %bc
571}
572
573define <2 x double> @test_mm_cmpgt_pd(<2 x double> %a0, <2 x double> %a1) nounwind {
574; X32-LABEL: test_mm_cmpgt_pd:
575; X32: # BB#0:
576; X32-NEXT: cmpltpd %xmm0, %xmm1
577; X32-NEXT: movapd %xmm1, %xmm0
578; X32-NEXT: retl
579;
580; X64-LABEL: test_mm_cmpgt_pd:
581; X64: # BB#0:
582; X64-NEXT: cmpltpd %xmm0, %xmm1
583; X64-NEXT: movapd %xmm1, %xmm0
584; X64-NEXT: retq
585 %res = call <2 x double> @llvm.x86.sse2.cmp.pd(<2 x double> %a1, <2 x double> %a0, i8 1)
586 ret <2 x double> %res
587}
588
589define <2 x double> @test_mm_cmpgt_sd(<2 x double> %a0, <2 x double> %a1) nounwind {
590; X32-LABEL: test_mm_cmpgt_sd:
591; X32: # BB#0:
592; X32-NEXT: cmpltsd %xmm0, %xmm1
593; X32-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
594; X32-NEXT: retl
595;
596; X64-LABEL: test_mm_cmpgt_sd:
597; X64: # BB#0:
598; X64-NEXT: cmpltsd %xmm0, %xmm1
599; X64-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
600; X64-NEXT: retq
601 %cmp = call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %a1, <2 x double> %a0, i8 1)
602 %ext0 = extractelement <2 x double> %cmp, i32 0
603 %ins0 = insertelement <2 x double> undef, double %ext0, i32 0
604 %ext1 = extractelement <2 x double> %a0, i32 1
605 %ins1 = insertelement <2 x double> %ins0, double %ext1, i32 1
606 ret <2 x double> %ins1
607}
608
609define <2 x double> @test_mm_cmple_pd(<2 x double> %a0, <2 x double> %a1) nounwind {
610; X32-LABEL: test_mm_cmple_pd:
611; X32: # BB#0:
612; X32-NEXT: cmplepd %xmm1, %xmm0
613; X32-NEXT: retl
614;
615; X64-LABEL: test_mm_cmple_pd:
616; X64: # BB#0:
617; X64-NEXT: cmplepd %xmm1, %xmm0
618; X64-NEXT: retq
619 %res = call <2 x double> @llvm.x86.sse2.cmp.pd(<2 x double> %a0, <2 x double> %a1, i8 2)
620 ret <2 x double> %res
621}
622
623define <2 x double> @test_mm_cmple_sd(<2 x double> %a0, <2 x double> %a1) nounwind {
624; X32-LABEL: test_mm_cmple_sd:
625; X32: # BB#0:
626; X32-NEXT: cmplesd %xmm1, %xmm0
627; X32-NEXT: retl
628;
629; X64-LABEL: test_mm_cmple_sd:
630; X64: # BB#0:
631; X64-NEXT: cmplesd %xmm1, %xmm0
632; X64-NEXT: retq
633 %res = call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %a0, <2 x double> %a1, i8 2)
634 ret <2 x double> %res
635}
636
637define <2 x i64> @test_mm_cmplt_epi8(<2 x i64> %a0, <2 x i64> %a1) nounwind {
638; X32-LABEL: test_mm_cmplt_epi8:
639; X32: # BB#0:
640; X32-NEXT: pcmpgtb %xmm0, %xmm1
641; X32-NEXT: movdqa %xmm1, %xmm0
642; X32-NEXT: retl
643;
644; X64-LABEL: test_mm_cmplt_epi8:
645; X64: # BB#0:
646; X64-NEXT: pcmpgtb %xmm0, %xmm1
647; X64-NEXT: movdqa %xmm1, %xmm0
648; X64-NEXT: retq
649 %arg0 = bitcast <2 x i64> %a0 to <16 x i8>
650 %arg1 = bitcast <2 x i64> %a1 to <16 x i8>
651 %cmp = icmp sgt <16 x i8> %arg1, %arg0
652 %res = sext <16 x i1> %cmp to <16 x i8>
653 %bc = bitcast <16 x i8> %res to <2 x i64>
654 ret <2 x i64> %bc
655}
656
657define <2 x i64> @test_mm_cmplt_epi16(<2 x i64> %a0, <2 x i64> %a1) nounwind {
658; X32-LABEL: test_mm_cmplt_epi16:
659; X32: # BB#0:
660; X32-NEXT: pcmpgtw %xmm0, %xmm1
661; X32-NEXT: movdqa %xmm1, %xmm0
662; X32-NEXT: retl
663;
664; X64-LABEL: test_mm_cmplt_epi16:
665; X64: # BB#0:
666; X64-NEXT: pcmpgtw %xmm0, %xmm1
667; X64-NEXT: movdqa %xmm1, %xmm0
668; X64-NEXT: retq
669 %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
670 %arg1 = bitcast <2 x i64> %a1 to <8 x i16>
671 %cmp = icmp sgt <8 x i16> %arg1, %arg0
672 %res = sext <8 x i1> %cmp to <8 x i16>
673 %bc = bitcast <8 x i16> %res to <2 x i64>
674 ret <2 x i64> %bc
675}
676
677define <2 x i64> @test_mm_cmplt_epi32(<2 x i64> %a0, <2 x i64> %a1) nounwind {
678; X32-LABEL: test_mm_cmplt_epi32:
679; X32: # BB#0:
680; X32-NEXT: pcmpgtd %xmm0, %xmm1
681; X32-NEXT: movdqa %xmm1, %xmm0
682; X32-NEXT: retl
683;
684; X64-LABEL: test_mm_cmplt_epi32:
685; X64: # BB#0:
686; X64-NEXT: pcmpgtd %xmm0, %xmm1
687; X64-NEXT: movdqa %xmm1, %xmm0
688; X64-NEXT: retq
689 %arg0 = bitcast <2 x i64> %a0 to <4 x i32>
690 %arg1 = bitcast <2 x i64> %a1 to <4 x i32>
691 %cmp = icmp sgt <4 x i32> %arg1, %arg0
692 %res = sext <4 x i1> %cmp to <4 x i32>
693 %bc = bitcast <4 x i32> %res to <2 x i64>
694 ret <2 x i64> %bc
695}
696
697define <2 x double> @test_mm_cmplt_pd(<2 x double> %a0, <2 x double> %a1) nounwind {
698; X32-LABEL: test_mm_cmplt_pd:
699; X32: # BB#0:
700; X32-NEXT: cmpltpd %xmm1, %xmm0
701; X32-NEXT: retl
702;
703; X64-LABEL: test_mm_cmplt_pd:
704; X64: # BB#0:
705; X64-NEXT: cmpltpd %xmm1, %xmm0
706; X64-NEXT: retq
707 %res = call <2 x double> @llvm.x86.sse2.cmp.pd(<2 x double> %a0, <2 x double> %a1, i8 1)
708 ret <2 x double> %res
709}
710
711define <2 x double> @test_mm_cmplt_sd(<2 x double> %a0, <2 x double> %a1) nounwind {
712; X32-LABEL: test_mm_cmplt_sd:
713; X32: # BB#0:
714; X32-NEXT: cmpltsd %xmm1, %xmm0
715; X32-NEXT: retl
716;
717; X64-LABEL: test_mm_cmplt_sd:
718; X64: # BB#0:
719; X64-NEXT: cmpltsd %xmm1, %xmm0
720; X64-NEXT: retq
721 %res = call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %a0, <2 x double> %a1, i8 1)
722 ret <2 x double> %res
723}
724
725define <2 x double> @test_mm_cmpneq_pd(<2 x double> %a0, <2 x double> %a1) nounwind {
726; X32-LABEL: test_mm_cmpneq_pd:
727; X32: # BB#0:
728; X32-NEXT: cmpneqpd %xmm1, %xmm0
729; X32-NEXT: retl
730;
731; X64-LABEL: test_mm_cmpneq_pd:
732; X64: # BB#0:
733; X64-NEXT: cmpneqpd %xmm1, %xmm0
734; X64-NEXT: retq
735 %res = call <2 x double> @llvm.x86.sse2.cmp.pd(<2 x double> %a0, <2 x double> %a1, i8 4)
736 ret <2 x double> %res
737}
738
739define <2 x double> @test_mm_cmpneq_sd(<2 x double> %a0, <2 x double> %a1) nounwind {
740; X32-LABEL: test_mm_cmpneq_sd:
741; X32: # BB#0:
742; X32-NEXT: cmpneqsd %xmm1, %xmm0
743; X32-NEXT: retl
744;
745; X64-LABEL: test_mm_cmpneq_sd:
746; X64: # BB#0:
747; X64-NEXT: cmpneqsd %xmm1, %xmm0
748; X64-NEXT: retq
749 %res = call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %a0, <2 x double> %a1, i8 4)
750 ret <2 x double> %res
751}
752
753define <2 x double> @test_mm_cmpnge_pd(<2 x double> %a0, <2 x double> %a1) nounwind {
754; X32-LABEL: test_mm_cmpnge_pd:
755; X32: # BB#0:
756; X32-NEXT: cmpnlepd %xmm0, %xmm1
757; X32-NEXT: movapd %xmm1, %xmm0
758; X32-NEXT: retl
759;
760; X64-LABEL: test_mm_cmpnge_pd:
761; X64: # BB#0:
762; X64-NEXT: cmpnlepd %xmm0, %xmm1
763; X64-NEXT: movapd %xmm1, %xmm0
764; X64-NEXT: retq
765 %res = call <2 x double> @llvm.x86.sse2.cmp.pd(<2 x double> %a1, <2 x double> %a0, i8 6)
766 ret <2 x double> %res
767}
768
769define <2 x double> @test_mm_cmpnge_sd(<2 x double> %a0, <2 x double> %a1) nounwind {
770; X32-LABEL: test_mm_cmpnge_sd:
771; X32: # BB#0:
772; X32-NEXT: cmpnlesd %xmm0, %xmm1
Simon Pilgrimb1ff2dd2016-05-19 16:49:53 +0000773; X32-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
Simon Pilgrim5a0d7282016-05-18 18:00:43 +0000774; X32-NEXT: retl
775;
776; X64-LABEL: test_mm_cmpnge_sd:
777; X64: # BB#0:
778; X64-NEXT: cmpnlesd %xmm0, %xmm1
Simon Pilgrimb1ff2dd2016-05-19 16:49:53 +0000779; X64-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
Simon Pilgrim5a0d7282016-05-18 18:00:43 +0000780; X64-NEXT: retq
Simon Pilgrimb1ff2dd2016-05-19 16:49:53 +0000781 %cmp = call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %a1, <2 x double> %a0, i8 6)
782 %ext0 = extractelement <2 x double> %cmp, i32 0
783 %ins0 = insertelement <2 x double> undef, double %ext0, i32 0
784 %ext1 = extractelement <2 x double> %a0, i32 1
785 %ins1 = insertelement <2 x double> %ins0, double %ext1, i32 1
786 ret <2 x double> %ins1
Simon Pilgrim5a0d7282016-05-18 18:00:43 +0000787}
788
789define <2 x double> @test_mm_cmpngt_pd(<2 x double> %a0, <2 x double> %a1) nounwind {
790; X32-LABEL: test_mm_cmpngt_pd:
791; X32: # BB#0:
792; X32-NEXT: cmpnltpd %xmm0, %xmm1
793; X32-NEXT: movapd %xmm1, %xmm0
794; X32-NEXT: retl
795;
796; X64-LABEL: test_mm_cmpngt_pd:
797; X64: # BB#0:
798; X64-NEXT: cmpnltpd %xmm0, %xmm1
799; X64-NEXT: movapd %xmm1, %xmm0
800; X64-NEXT: retq
801 %res = call <2 x double> @llvm.x86.sse2.cmp.pd(<2 x double> %a1, <2 x double> %a0, i8 5)
802 ret <2 x double> %res
803}
804
805define <2 x double> @test_mm_cmpngt_sd(<2 x double> %a0, <2 x double> %a1) nounwind {
806; X32-LABEL: test_mm_cmpngt_sd:
807; X32: # BB#0:
808; X32-NEXT: cmpnltsd %xmm0, %xmm1
Simon Pilgrimb1ff2dd2016-05-19 16:49:53 +0000809; X32-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
Simon Pilgrim5a0d7282016-05-18 18:00:43 +0000810; X32-NEXT: retl
811;
812; X64-LABEL: test_mm_cmpngt_sd:
813; X64: # BB#0:
814; X64-NEXT: cmpnltsd %xmm0, %xmm1
Simon Pilgrimb1ff2dd2016-05-19 16:49:53 +0000815; X64-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
Simon Pilgrim5a0d7282016-05-18 18:00:43 +0000816; X64-NEXT: retq
Simon Pilgrimb1ff2dd2016-05-19 16:49:53 +0000817 %cmp = call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %a1, <2 x double> %a0, i8 5)
818 %ext0 = extractelement <2 x double> %cmp, i32 0
819 %ins0 = insertelement <2 x double> undef, double %ext0, i32 0
820 %ext1 = extractelement <2 x double> %a0, i32 1
821 %ins1 = insertelement <2 x double> %ins0, double %ext1, i32 1
822 ret <2 x double> %ins1
Simon Pilgrim5a0d7282016-05-18 18:00:43 +0000823}
824
825define <2 x double> @test_mm_cmpnle_pd(<2 x double> %a0, <2 x double> %a1) nounwind {
826; X32-LABEL: test_mm_cmpnle_pd:
827; X32: # BB#0:
828; X32-NEXT: cmpnlepd %xmm1, %xmm0
829; X32-NEXT: retl
830;
831; X64-LABEL: test_mm_cmpnle_pd:
832; X64: # BB#0:
833; X64-NEXT: cmpnlepd %xmm1, %xmm0
834; X64-NEXT: retq
835 %res = call <2 x double> @llvm.x86.sse2.cmp.pd(<2 x double> %a0, <2 x double> %a1, i8 6)
836 ret <2 x double> %res
837}
838
839define <2 x double> @test_mm_cmpnle_sd(<2 x double> %a0, <2 x double> %a1) nounwind {
840; X32-LABEL: test_mm_cmpnle_sd:
841; X32: # BB#0:
842; X32-NEXT: cmpnlesd %xmm1, %xmm0
843; X32-NEXT: retl
844;
845; X64-LABEL: test_mm_cmpnle_sd:
846; X64: # BB#0:
847; X64-NEXT: cmpnlesd %xmm1, %xmm0
848; X64-NEXT: retq
849 %res = call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %a0, <2 x double> %a1, i8 6)
850 ret <2 x double> %res
851}
852
853define <2 x double> @test_mm_cmpnlt_pd(<2 x double> %a0, <2 x double> %a1) nounwind {
854; X32-LABEL: test_mm_cmpnlt_pd:
855; X32: # BB#0:
856; X32-NEXT: cmpnltpd %xmm1, %xmm0
857; X32-NEXT: retl
858;
859; X64-LABEL: test_mm_cmpnlt_pd:
860; X64: # BB#0:
861; X64-NEXT: cmpnltpd %xmm1, %xmm0
862; X64-NEXT: retq
863 %res = call <2 x double> @llvm.x86.sse2.cmp.pd(<2 x double> %a0, <2 x double> %a1, i8 5)
864 ret <2 x double> %res
865}
866
867define <2 x double> @test_mm_cmpnlt_sd(<2 x double> %a0, <2 x double> %a1) nounwind {
868; X32-LABEL: test_mm_cmpnlt_sd:
869; X32: # BB#0:
870; X32-NEXT: cmpnltsd %xmm1, %xmm0
871; X32-NEXT: retl
872;
873; X64-LABEL: test_mm_cmpnlt_sd:
874; X64: # BB#0:
875; X64-NEXT: cmpnltsd %xmm1, %xmm0
876; X64-NEXT: retq
877 %res = call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %a0, <2 x double> %a1, i8 5)
878 ret <2 x double> %res
879}
880
881define <2 x double> @test_mm_cmpord_pd(<2 x double> %a0, <2 x double> %a1) nounwind {
882; X32-LABEL: test_mm_cmpord_pd:
883; X32: # BB#0:
884; X32-NEXT: cmpordpd %xmm1, %xmm0
885; X32-NEXT: retl
886;
887; X64-LABEL: test_mm_cmpord_pd:
888; X64: # BB#0:
889; X64-NEXT: cmpordpd %xmm1, %xmm0
890; X64-NEXT: retq
891 %res = call <2 x double> @llvm.x86.sse2.cmp.pd(<2 x double> %a0, <2 x double> %a1, i8 7)
892 ret <2 x double> %res
893}
894
895define <2 x double> @test_mm_cmpord_sd(<2 x double> %a0, <2 x double> %a1) nounwind {
896; X32-LABEL: test_mm_cmpord_sd:
897; X32: # BB#0:
898; X32-NEXT: cmpordsd %xmm1, %xmm0
899; X32-NEXT: retl
900;
901; X64-LABEL: test_mm_cmpord_sd:
902; X64: # BB#0:
903; X64-NEXT: cmpordsd %xmm1, %xmm0
904; X64-NEXT: retq
905 %res = call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %a0, <2 x double> %a1, i8 7)
906 ret <2 x double> %res
907}
908
909define <2 x double> @test_mm_cmpunord_pd(<2 x double> %a0, <2 x double> %a1) nounwind {
910; X32-LABEL: test_mm_cmpunord_pd:
911; X32: # BB#0:
912; X32-NEXT: cmpunordpd %xmm1, %xmm0
913; X32-NEXT: retl
914;
915; X64-LABEL: test_mm_cmpunord_pd:
916; X64: # BB#0:
917; X64-NEXT: cmpunordpd %xmm1, %xmm0
918; X64-NEXT: retq
919 %res = call <2 x double> @llvm.x86.sse2.cmp.pd(<2 x double> %a0, <2 x double> %a1, i8 3)
920 ret <2 x double> %res
921}
922
923define <2 x double> @test_mm_cmpunord_sd(<2 x double> %a0, <2 x double> %a1) nounwind {
924; X32-LABEL: test_mm_cmpunord_sd:
925; X32: # BB#0:
926; X32-NEXT: cmpunordsd %xmm1, %xmm0
927; X32-NEXT: retl
928;
929; X64-LABEL: test_mm_cmpunord_sd:
930; X64: # BB#0:
931; X64-NEXT: cmpunordsd %xmm1, %xmm0
932; X64-NEXT: retq
933 %res = call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %a0, <2 x double> %a1, i8 3)
934 ret <2 x double> %res
935}
936
937define i32 @test_mm_comieq_sd(<2 x double> %a0, <2 x double> %a1) nounwind {
938; X32-LABEL: test_mm_comieq_sd:
939; X32: # BB#0:
940; X32-NEXT: comisd %xmm1, %xmm0
941; X32-NEXT: setnp %al
942; X32-NEXT: sete %cl
943; X32-NEXT: andb %al, %cl
944; X32-NEXT: movzbl %cl, %eax
945; X32-NEXT: retl
946;
947; X64-LABEL: test_mm_comieq_sd:
948; X64: # BB#0:
949; X64-NEXT: comisd %xmm1, %xmm0
950; X64-NEXT: setnp %al
951; X64-NEXT: sete %cl
952; X64-NEXT: andb %al, %cl
953; X64-NEXT: movzbl %cl, %eax
954; X64-NEXT: retq
955 %res = call i32 @llvm.x86.sse2.comieq.sd(<2 x double> %a0, <2 x double> %a1)
956 ret i32 %res
957}
958declare i32 @llvm.x86.sse2.comieq.sd(<2 x double>, <2 x double>) nounwind readnone
959
960define i32 @test_mm_comige_sd(<2 x double> %a0, <2 x double> %a1) nounwind {
961; X32-LABEL: test_mm_comige_sd:
962; X32: # BB#0:
963; X32-NEXT: comisd %xmm1, %xmm0
964; X32-NEXT: setae %al
965; X32-NEXT: movzbl %al, %eax
966; X32-NEXT: retl
967;
968; X64-LABEL: test_mm_comige_sd:
969; X64: # BB#0:
970; X64-NEXT: comisd %xmm1, %xmm0
971; X64-NEXT: setae %al
972; X64-NEXT: movzbl %al, %eax
973; X64-NEXT: retq
974 %res = call i32 @llvm.x86.sse2.comige.sd(<2 x double> %a0, <2 x double> %a1)
975 ret i32 %res
976}
977declare i32 @llvm.x86.sse2.comige.sd(<2 x double>, <2 x double>) nounwind readnone
978
979define i32 @test_mm_comigt_sd(<2 x double> %a0, <2 x double> %a1) nounwind {
980; X32-LABEL: test_mm_comigt_sd:
981; X32: # BB#0:
982; X32-NEXT: comisd %xmm1, %xmm0
983; X32-NEXT: seta %al
984; X32-NEXT: movzbl %al, %eax
985; X32-NEXT: retl
986;
987; X64-LABEL: test_mm_comigt_sd:
988; X64: # BB#0:
989; X64-NEXT: comisd %xmm1, %xmm0
990; X64-NEXT: seta %al
991; X64-NEXT: movzbl %al, %eax
992; X64-NEXT: retq
993 %res = call i32 @llvm.x86.sse2.comigt.sd(<2 x double> %a0, <2 x double> %a1)
994 ret i32 %res
995}
996declare i32 @llvm.x86.sse2.comigt.sd(<2 x double>, <2 x double>) nounwind readnone
997
998define i32 @test_mm_comile_sd(<2 x double> %a0, <2 x double> %a1) nounwind {
999; X32-LABEL: test_mm_comile_sd:
1000; X32: # BB#0:
1001; X32-NEXT: comisd %xmm0, %xmm1
1002; X32-NEXT: setae %al
1003; X32-NEXT: movzbl %al, %eax
1004; X32-NEXT: retl
1005;
1006; X64-LABEL: test_mm_comile_sd:
1007; X64: # BB#0:
1008; X64-NEXT: comisd %xmm0, %xmm1
1009; X64-NEXT: setae %al
1010; X64-NEXT: movzbl %al, %eax
1011; X64-NEXT: retq
1012 %res = call i32 @llvm.x86.sse2.comile.sd(<2 x double> %a0, <2 x double> %a1)
1013 ret i32 %res
1014}
1015declare i32 @llvm.x86.sse2.comile.sd(<2 x double>, <2 x double>) nounwind readnone
1016
1017define i32 @test_mm_comilt_sd(<2 x double> %a0, <2 x double> %a1) nounwind {
1018; X32-LABEL: test_mm_comilt_sd:
1019; X32: # BB#0:
1020; X32-NEXT: comisd %xmm0, %xmm1
1021; X32-NEXT: seta %al
1022; X32-NEXT: movzbl %al, %eax
1023; X32-NEXT: retl
1024;
1025; X64-LABEL: test_mm_comilt_sd:
1026; X64: # BB#0:
1027; X64-NEXT: comisd %xmm0, %xmm1
1028; X64-NEXT: seta %al
1029; X64-NEXT: movzbl %al, %eax
1030; X64-NEXT: retq
1031 %res = call i32 @llvm.x86.sse2.comilt.sd(<2 x double> %a0, <2 x double> %a1)
1032 ret i32 %res
1033}
1034declare i32 @llvm.x86.sse2.comilt.sd(<2 x double>, <2 x double>) nounwind readnone
1035
1036define i32 @test_mm_comineq_sd(<2 x double> %a0, <2 x double> %a1) nounwind {
1037; X32-LABEL: test_mm_comineq_sd:
1038; X32: # BB#0:
1039; X32-NEXT: comisd %xmm1, %xmm0
1040; X32-NEXT: setp %al
1041; X32-NEXT: setne %cl
1042; X32-NEXT: orb %al, %cl
1043; X32-NEXT: movzbl %cl, %eax
1044; X32-NEXT: retl
1045;
1046; X64-LABEL: test_mm_comineq_sd:
1047; X64: # BB#0:
1048; X64-NEXT: comisd %xmm1, %xmm0
1049; X64-NEXT: setp %al
1050; X64-NEXT: setne %cl
1051; X64-NEXT: orb %al, %cl
1052; X64-NEXT: movzbl %cl, %eax
1053; X64-NEXT: retq
1054 %res = call i32 @llvm.x86.sse2.comineq.sd(<2 x double> %a0, <2 x double> %a1)
1055 ret i32 %res
1056}
1057declare i32 @llvm.x86.sse2.comineq.sd(<2 x double>, <2 x double>) nounwind readnone
1058
1059define <2 x double> @test_mm_cvtepi32_pd(<2 x i64> %a0) nounwind {
1060; X32-LABEL: test_mm_cvtepi32_pd:
1061; X32: # BB#0:
1062; X32-NEXT: cvtdq2pd %xmm0, %xmm0
1063; X32-NEXT: retl
1064;
1065; X64-LABEL: test_mm_cvtepi32_pd:
1066; X64: # BB#0:
1067; X64-NEXT: cvtdq2pd %xmm0, %xmm0
1068; X64-NEXT: retq
1069 %arg0 = bitcast <2 x i64> %a0 to <4 x i32>
Simon Pilgrim8a5ff3c2016-05-23 22:17:36 +00001070 %ext = shufflevector <4 x i32> %arg0, <4 x i32> %arg0, <2 x i32> <i32 0, i32 1>
1071 %res = sitofp <2 x i32> %ext to <2 x double>
Simon Pilgrim5a0d7282016-05-18 18:00:43 +00001072 ret <2 x double> %res
1073}
Simon Pilgrim5a0d7282016-05-18 18:00:43 +00001074
1075define <4 x float> @test_mm_cvtepi32_ps(<2 x i64> %a0) nounwind {
1076; X32-LABEL: test_mm_cvtepi32_ps:
1077; X32: # BB#0:
1078; X32-NEXT: cvtdq2ps %xmm0, %xmm0
1079; X32-NEXT: retl
1080;
1081; X64-LABEL: test_mm_cvtepi32_ps:
1082; X64: # BB#0:
1083; X64-NEXT: cvtdq2ps %xmm0, %xmm0
1084; X64-NEXT: retq
1085 %arg0 = bitcast <2 x i64> %a0 to <4 x i32>
1086 %res = call <4 x float> @llvm.x86.sse2.cvtdq2ps(<4 x i32> %arg0)
1087 ret <4 x float> %res
1088}
1089declare <4 x float> @llvm.x86.sse2.cvtdq2ps(<4 x i32>) nounwind readnone
1090
1091define <2 x i64> @test_mm_cvtpd_epi32(<2 x double> %a0) nounwind {
1092; X32-LABEL: test_mm_cvtpd_epi32:
1093; X32: # BB#0:
1094; X32-NEXT: cvtpd2dq %xmm0, %xmm0
1095; X32-NEXT: retl
1096;
1097; X64-LABEL: test_mm_cvtpd_epi32:
1098; X64: # BB#0:
1099; X64-NEXT: cvtpd2dq %xmm0, %xmm0
1100; X64-NEXT: retq
1101 %res = call <4 x i32> @llvm.x86.sse2.cvtpd2dq(<2 x double> %a0)
1102 %bc = bitcast <4 x i32> %res to <2 x i64>
1103 ret <2 x i64> %bc
1104}
1105declare <4 x i32> @llvm.x86.sse2.cvtpd2dq(<2 x double>) nounwind readnone
1106
1107define <4 x float> @test_mm_cvtpd_ps(<2 x double> %a0) nounwind {
1108; X32-LABEL: test_mm_cvtpd_ps:
1109; X32: # BB#0:
1110; X32-NEXT: cvtpd2ps %xmm0, %xmm0
1111; X32-NEXT: retl
1112;
1113; X64-LABEL: test_mm_cvtpd_ps:
1114; X64: # BB#0:
1115; X64-NEXT: cvtpd2ps %xmm0, %xmm0
1116; X64-NEXT: retq
1117 %res = call <4 x float> @llvm.x86.sse2.cvtpd2ps(<2 x double> %a0)
1118 ret <4 x float> %res
1119}
1120declare <4 x float> @llvm.x86.sse2.cvtpd2ps(<2 x double>) nounwind readnone
1121
1122define <2 x i64> @test_mm_cvtps_epi32(<4 x float> %a0) nounwind {
1123; X32-LABEL: test_mm_cvtps_epi32:
1124; X32: # BB#0:
1125; X32-NEXT: cvtps2dq %xmm0, %xmm0
1126; X32-NEXT: retl
1127;
1128; X64-LABEL: test_mm_cvtps_epi32:
1129; X64: # BB#0:
1130; X64-NEXT: cvtps2dq %xmm0, %xmm0
1131; X64-NEXT: retq
1132 %res = call <4 x i32> @llvm.x86.sse2.cvtps2dq(<4 x float> %a0)
1133 %bc = bitcast <4 x i32> %res to <2 x i64>
1134 ret <2 x i64> %bc
1135}
1136declare <4 x i32> @llvm.x86.sse2.cvtps2dq(<4 x float>) nounwind readnone
1137
1138define <2 x double> @test_mm_cvtps_pd(<4 x float> %a0) nounwind {
1139; X32-LABEL: test_mm_cvtps_pd:
1140; X32: # BB#0:
1141; X32-NEXT: cvtps2pd %xmm0, %xmm0
1142; X32-NEXT: retl
1143;
1144; X64-LABEL: test_mm_cvtps_pd:
1145; X64: # BB#0:
1146; X64-NEXT: cvtps2pd %xmm0, %xmm0
1147; X64-NEXT: retq
Simon Pilgrim8a5ff3c2016-05-23 22:17:36 +00001148 %ext = shufflevector <4 x float> %a0, <4 x float> %a0, <2 x i32> <i32 0, i32 1>
1149 %res = fpext <2 x float> %ext to <2 x double>
Simon Pilgrim5a0d7282016-05-18 18:00:43 +00001150 ret <2 x double> %res
1151}
Simon Pilgrim5a0d7282016-05-18 18:00:43 +00001152
1153define double @test_mm_cvtsd_f64(<2 x double> %a0) nounwind {
1154; X32-LABEL: test_mm_cvtsd_f64:
1155; X32: # BB#0:
1156; X32-NEXT: pushl %ebp
1157; X32-NEXT: movl %esp, %ebp
1158; X32-NEXT: andl $-8, %esp
1159; X32-NEXT: subl $8, %esp
1160; X32-NEXT: movlps %xmm0, (%esp)
1161; X32-NEXT: fldl (%esp)
1162; X32-NEXT: movl %ebp, %esp
1163; X32-NEXT: popl %ebp
1164; X32-NEXT: retl
1165;
1166; X64-LABEL: test_mm_cvtsd_f64:
1167; X64: # BB#0:
1168; X64-NEXT: retq
1169 %res = extractelement <2 x double> %a0, i32 0
1170 ret double %res
1171}
1172
1173define i32 @test_mm_cvtsd_si32(<2 x double> %a0) nounwind {
1174; X32-LABEL: test_mm_cvtsd_si32:
1175; X32: # BB#0:
1176; X32-NEXT: cvtsd2si %xmm0, %eax
1177; X32-NEXT: retl
1178;
1179; X64-LABEL: test_mm_cvtsd_si32:
1180; X64: # BB#0:
1181; X64-NEXT: cvtsd2si %xmm0, %eax
1182; X64-NEXT: retq
1183 %res = call i32 @llvm.x86.sse2.cvtsd2si(<2 x double> %a0)
1184 ret i32 %res
1185}
1186declare i32 @llvm.x86.sse2.cvtsd2si(<2 x double>) nounwind readnone
1187
1188define i32 @test_mm_cvtsi128_si32(<2 x i64> %a0) nounwind {
1189; X32-LABEL: test_mm_cvtsi128_si32:
1190; X32: # BB#0:
1191; X32-NEXT: movd %xmm0, %eax
1192; X32-NEXT: retl
1193;
1194; X64-LABEL: test_mm_cvtsi128_si32:
1195; X64: # BB#0:
1196; X64-NEXT: movd %xmm0, %eax
1197; X64-NEXT: retq
1198 %arg0 = bitcast <2 x i64> %a0 to <4 x i32>
1199 %res = extractelement <4 x i32> %arg0, i32 0
1200 ret i32 %res
1201}
1202
1203define <2 x double> @test_mm_cvtsi32_sd(<2 x double> %a0, i32 %a1) nounwind {
1204; X32-LABEL: test_mm_cvtsi32_sd:
1205; X32: # BB#0:
1206; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
1207; X32-NEXT: cvtsi2sdl %eax, %xmm1
1208; X32-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
1209; X32-NEXT: retl
1210;
1211; X64-LABEL: test_mm_cvtsi32_sd:
1212; X64: # BB#0:
1213; X64-NEXT: cvtsi2sdl %edi, %xmm1
1214; X64-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
1215; X64-NEXT: retq
1216 %cvt = sitofp i32 %a1 to double
1217 %res = insertelement <2 x double> %a0, double %cvt, i32 0
1218 ret <2 x double> %res
1219}
1220
1221define <2 x i64> @test_mm_cvtsi32_si128(i32 %a0) nounwind {
1222; X32-LABEL: test_mm_cvtsi32_si128:
1223; X32: # BB#0:
1224; X32-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
1225; X32-NEXT: retl
1226;
1227; X64-LABEL: test_mm_cvtsi32_si128:
1228; X64: # BB#0:
1229; X64-NEXT: movd %edi, %xmm0
1230; X64-NEXT: retq
1231 %res0 = insertelement <4 x i32> undef, i32 %a0, i32 0
1232 %res1 = insertelement <4 x i32> %res0, i32 0, i32 1
1233 %res2 = insertelement <4 x i32> %res1, i32 0, i32 2
1234 %res3 = insertelement <4 x i32> %res2, i32 0, i32 3
1235 %res = bitcast <4 x i32> %res3 to <2 x i64>
1236 ret <2 x i64> %res
1237}
1238
1239define <2 x double> @test_mm_cvtss_sd(<2 x double> %a0, <4 x float> %a1) nounwind {
1240; X32-LABEL: test_mm_cvtss_sd:
1241; X32: # BB#0:
1242; X32-NEXT: cvtss2sd %xmm1, %xmm1
1243; X32-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
1244; X32-NEXT: retl
1245;
1246; X64-LABEL: test_mm_cvtss_sd:
1247; X64: # BB#0:
1248; X64-NEXT: cvtss2sd %xmm1, %xmm1
1249; X64-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
1250; X64-NEXT: retq
1251 %ext = extractelement <4 x float> %a1, i32 0
1252 %cvt = fpext float %ext to double
1253 %res = insertelement <2 x double> %a0, double %cvt, i32 0
1254 ret <2 x double> %res
1255}
1256
1257define <2 x i64> @test_mm_cvttpd_epi32(<2 x double> %a0) nounwind {
1258; X32-LABEL: test_mm_cvttpd_epi32:
1259; X32: # BB#0:
1260; X32-NEXT: cvttpd2dq %xmm0, %xmm0
1261; X32-NEXT: retl
1262;
1263; X64-LABEL: test_mm_cvttpd_epi32:
1264; X64: # BB#0:
1265; X64-NEXT: cvttpd2dq %xmm0, %xmm0
1266; X64-NEXT: retq
1267 %res = call <4 x i32> @llvm.x86.sse2.cvttpd2dq(<2 x double> %a0)
1268 %bc = bitcast <4 x i32> %res to <2 x i64>
1269 ret <2 x i64> %bc
1270}
1271declare <4 x i32> @llvm.x86.sse2.cvttpd2dq(<2 x double>) nounwind readnone
1272
1273define <2 x i64> @test_mm_cvttps_epi32(<4 x float> %a0) nounwind {
1274; X32-LABEL: test_mm_cvttps_epi32:
1275; X32: # BB#0:
1276; X32-NEXT: cvttps2dq %xmm0, %xmm0
1277; X32-NEXT: retl
1278;
1279; X64-LABEL: test_mm_cvttps_epi32:
1280; X64: # BB#0:
1281; X64-NEXT: cvttps2dq %xmm0, %xmm0
1282; X64-NEXT: retq
1283 %res = call <4 x i32> @llvm.x86.sse2.cvttps2dq(<4 x float> %a0)
1284 %bc = bitcast <4 x i32> %res to <2 x i64>
1285 ret <2 x i64> %bc
1286}
1287declare <4 x i32> @llvm.x86.sse2.cvttps2dq(<4 x float>) nounwind readnone
1288
1289define i32 @test_mm_cvttsd_si32(<2 x double> %a0) nounwind {
1290; X32-LABEL: test_mm_cvttsd_si32:
1291; X32: # BB#0:
1292; X32-NEXT: cvttsd2si %xmm0, %eax
1293; X32-NEXT: retl
1294;
1295; X64-LABEL: test_mm_cvttsd_si32:
1296; X64: # BB#0:
1297; X64-NEXT: cvttsd2si %xmm0, %eax
1298; X64-NEXT: retq
1299 %ext = extractelement <2 x double> %a0, i32 0
1300 %res = fptosi double %ext to i32
1301 ret i32 %res
1302}
1303
1304define <2 x double> @test_mm_div_pd(<2 x double> %a0, <2 x double> %a1) nounwind {
1305; X32-LABEL: test_mm_div_pd:
1306; X32: # BB#0:
1307; X32-NEXT: divpd %xmm1, %xmm0
1308; X32-NEXT: retl
1309;
1310; X64-LABEL: test_mm_div_pd:
1311; X64: # BB#0:
1312; X64-NEXT: divpd %xmm1, %xmm0
1313; X64-NEXT: retq
1314 %res = fdiv <2 x double> %a0, %a1
1315 ret <2 x double> %res
1316}
1317
1318define <2 x double> @test_mm_div_sd(<2 x double> %a0, <2 x double> %a1) nounwind {
1319; X32-LABEL: test_mm_div_sd:
1320; X32: # BB#0:
1321; X32-NEXT: divsd %xmm1, %xmm0
1322; X32-NEXT: retl
1323;
1324; X64-LABEL: test_mm_div_sd:
1325; X64: # BB#0:
1326; X64-NEXT: divsd %xmm1, %xmm0
1327; X64-NEXT: retq
1328 %ext0 = extractelement <2 x double> %a0, i32 0
1329 %ext1 = extractelement <2 x double> %a1, i32 0
1330 %fdiv = fdiv double %ext0, %ext1
1331 %res = insertelement <2 x double> %a0, double %fdiv, i32 0
1332 ret <2 x double> %res
1333}
1334
1335define i32 @test_mm_extract_epi16(<2 x i64> %a0) nounwind {
1336; X32-LABEL: test_mm_extract_epi16:
1337; X32: # BB#0:
1338; X32-NEXT: pextrw $1, %xmm0, %eax
1339; X32-NEXT: movzwl %ax, %eax
1340; X32-NEXT: retl
1341;
1342; X64-LABEL: test_mm_extract_epi16:
1343; X64: # BB#0:
1344; X64-NEXT: pextrw $1, %xmm0, %eax
1345; X64-NEXT: movzwl %ax, %eax
1346; X64-NEXT: retq
1347 %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
1348 %ext = extractelement <8 x i16> %arg0, i32 1
1349 %res = zext i16 %ext to i32
1350 ret i32 %res
1351}
1352
1353define <2 x i64> @test_mm_insert_epi16(<2 x i64> %a0, i16 %a1) nounwind {
1354; X32-LABEL: test_mm_insert_epi16:
1355; X32: # BB#0:
1356; X32-NEXT: movw {{[0-9]+}}(%esp), %ax
1357; X32-NEXT: pinsrw $1, %eax, %xmm0
1358; X32-NEXT: retl
1359;
1360; X64-LABEL: test_mm_insert_epi16:
1361; X64: # BB#0:
1362; X64-NEXT: pinsrw $1, %edi, %xmm0
1363; X64-NEXT: retq
1364 %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
1365 %res = insertelement <8 x i16> %arg0, i16 %a1,i32 1
1366 %bc = bitcast <8 x i16> %res to <2 x i64>
1367 ret <2 x i64> %bc
1368}
1369
1370define void @test_mm_lfence() nounwind {
1371; X32-LABEL: test_mm_lfence:
1372; X32: # BB#0:
1373; X32-NEXT: lfence
1374; X32-NEXT: retl
1375;
1376; X64-LABEL: test_mm_lfence:
1377; X64: # BB#0:
1378; X64-NEXT: lfence
1379; X64-NEXT: retq
1380 call void @llvm.x86.sse2.lfence()
1381 ret void
1382}
1383declare void @llvm.x86.sse2.lfence() nounwind readnone
1384
1385define <2 x double> @test_mm_load_pd(double* %a0) nounwind {
1386; X32-LABEL: test_mm_load_pd:
1387; X32: # BB#0:
1388; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
1389; X32-NEXT: movaps (%eax), %xmm0
1390; X32-NEXT: retl
1391;
1392; X64-LABEL: test_mm_load_pd:
1393; X64: # BB#0:
1394; X64-NEXT: movaps (%rdi), %xmm0
1395; X64-NEXT: retq
1396 %arg0 = bitcast double* %a0 to <2 x double>*
1397 %res = load <2 x double>, <2 x double>* %arg0, align 16
1398 ret <2 x double> %res
1399}
1400
1401define <2 x double> @test_mm_load_sd(double* %a0) nounwind {
1402; X32-LABEL: test_mm_load_sd:
1403; X32: # BB#0:
1404; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
1405; X32-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
1406; X32-NEXT: retl
1407;
1408; X64-LABEL: test_mm_load_sd:
1409; X64: # BB#0:
1410; X64-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
1411; X64-NEXT: retq
1412 %ld = load double, double* %a0, align 1
1413 %res0 = insertelement <2 x double> undef, double %ld, i32 0
1414 %res1 = insertelement <2 x double> %res0, double 0.0, i32 1
1415 ret <2 x double> %res1
1416}
1417
1418define <2 x i64> @test_mm_load_si128(<2 x i64>* %a0) nounwind {
1419; X32-LABEL: test_mm_load_si128:
1420; X32: # BB#0:
1421; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
1422; X32-NEXT: movaps (%eax), %xmm0
1423; X32-NEXT: retl
1424;
1425; X64-LABEL: test_mm_load_si128:
1426; X64: # BB#0:
1427; X64-NEXT: movaps (%rdi), %xmm0
1428; X64-NEXT: retq
1429 %res = load <2 x i64>, <2 x i64>* %a0, align 16
1430 ret <2 x i64> %res
1431}
1432
1433define <2 x double> @test_mm_load1_pd(double* %a0) nounwind {
1434; X32-LABEL: test_mm_load1_pd:
1435; X32: # BB#0:
1436; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
1437; X32-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
1438; X32-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0,0]
1439; X32-NEXT: retl
1440;
1441; X64-LABEL: test_mm_load1_pd:
1442; X64: # BB#0:
1443; X64-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
1444; X64-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0,0]
1445; X64-NEXT: retq
1446 %ld = load double, double* %a0, align 8
1447 %res0 = insertelement <2 x double> undef, double %ld, i32 0
1448 %res1 = insertelement <2 x double> %res0, double %ld, i32 1
1449 ret <2 x double> %res1
1450}
1451
1452define <2 x double> @test_mm_loadh_pd(<2 x double> %a0, double* %a1) nounwind {
1453; X32-LABEL: test_mm_loadh_pd:
1454; X32: # BB#0:
1455; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
1456; X32-NEXT: movhpd {{.*#+}} xmm0 = xmm0[0],mem[0]
1457; X32-NEXT: retl
1458;
1459; X64-LABEL: test_mm_loadh_pd:
1460; X64: # BB#0:
1461; X64-NEXT: movhpd {{.*#+}} xmm0 = xmm0[0],mem[0]
1462; X64-NEXT: retq
1463 %ld = load double, double* %a1, align 8
1464 %res = insertelement <2 x double> %a0, double %ld, i32 1
1465 ret <2 x double> %res
1466}
1467
1468define <2 x i64> @test_mm_loadl_epi64(<2 x i64> %a0, <2 x i64>* %a1) nounwind {
1469; X32-LABEL: test_mm_loadl_epi64:
1470; X32: # BB#0:
1471; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
1472; X32-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
1473; X32-NEXT: retl
1474;
1475; X64-LABEL: test_mm_loadl_epi64:
1476; X64: # BB#0:
1477; X64-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
1478; X64-NEXT: retq
1479 %bc = bitcast <2 x i64>* %a1 to i64*
1480 %ld = load i64, i64* %bc, align 1
1481 %res0 = insertelement <2 x i64> undef, i64 %ld, i32 0
1482 %res1 = insertelement <2 x i64> %res0, i64 0, i32 1
1483 ret <2 x i64> %res1
1484}
1485
1486define <2 x double> @test_mm_loadl_pd(<2 x double> %a0, double* %a1) nounwind {
1487; X32-LABEL: test_mm_loadl_pd:
1488; X32: # BB#0:
1489; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
1490; X32-NEXT: movlpd {{.*#+}} xmm0 = mem[0],xmm0[1]
1491; X32-NEXT: retl
1492;
1493; X64-LABEL: test_mm_loadl_pd:
1494; X64: # BB#0:
1495; X64-NEXT: movlpd {{.*#+}} xmm0 = mem[0],xmm0[1]
1496; X64-NEXT: retq
1497 %ld = load double, double* %a1, align 8
1498 %res = insertelement <2 x double> %a0, double %ld, i32 0
1499 ret <2 x double> %res
1500}
1501
1502define <2 x double> @test_mm_loadr_pd(double* %a0) nounwind {
1503; X32-LABEL: test_mm_loadr_pd:
1504; X32: # BB#0:
1505; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
1506; X32-NEXT: movapd (%eax), %xmm0
1507; X32-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1,0]
1508; X32-NEXT: retl
1509;
1510; X64-LABEL: test_mm_loadr_pd:
1511; X64: # BB#0:
1512; X64-NEXT: movapd (%rdi), %xmm0
1513; X64-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1,0]
1514; X64-NEXT: retq
1515 %arg0 = bitcast double* %a0 to <2 x double>*
1516 %ld = load <2 x double>, <2 x double>* %arg0, align 16
1517 %res = shufflevector <2 x double> %ld, <2 x double> undef, <2 x i32> <i32 1, i32 0>
1518 ret <2 x double> %res
1519}
1520
1521define <2 x double> @test_mm_loadu_pd(double* %a0) nounwind {
1522; X32-LABEL: test_mm_loadu_pd:
1523; X32: # BB#0:
1524; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
1525; X32-NEXT: movups (%eax), %xmm0
1526; X32-NEXT: retl
1527;
1528; X64-LABEL: test_mm_loadu_pd:
1529; X64: # BB#0:
1530; X64-NEXT: movups (%rdi), %xmm0
1531; X64-NEXT: retq
1532 %arg0 = bitcast double* %a0 to <2 x double>*
1533 %res = load <2 x double>, <2 x double>* %arg0, align 1
1534 ret <2 x double> %res
1535}
1536
1537define <2 x i64> @test_mm_loadu_si128(<2 x i64>* %a0) nounwind {
1538; X32-LABEL: test_mm_loadu_si128:
1539; X32: # BB#0:
1540; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
1541; X32-NEXT: movups (%eax), %xmm0
1542; X32-NEXT: retl
1543;
1544; X64-LABEL: test_mm_loadu_si128:
1545; X64: # BB#0:
1546; X64-NEXT: movups (%rdi), %xmm0
1547; X64-NEXT: retq
1548 %res = load <2 x i64>, <2 x i64>* %a0, align 1
1549 ret <2 x i64> %res
1550}
1551
1552define <2 x i64> @test_mm_madd_epi16(<2 x i64> %a0, <2 x i64> %a1) nounwind {
1553; X32-LABEL: test_mm_madd_epi16:
1554; X32: # BB#0:
1555; X32-NEXT: pmaddwd %xmm1, %xmm0
1556; X32-NEXT: retl
1557;
1558; X64-LABEL: test_mm_madd_epi16:
1559; X64: # BB#0:
1560; X64-NEXT: pmaddwd %xmm1, %xmm0
1561; X64-NEXT: retq
1562 %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
1563 %arg1 = bitcast <2 x i64> %a1 to <8 x i16>
1564 %res = call <4 x i32> @llvm.x86.sse2.pmadd.wd(<8 x i16> %arg0, <8 x i16> %arg1)
1565 %bc = bitcast <4 x i32> %res to <2 x i64>
1566 ret <2 x i64> %bc
1567}
1568declare <4 x i32> @llvm.x86.sse2.pmadd.wd(<8 x i16>, <8 x i16>) nounwind readnone
1569
1570define void @test_mm_maskmoveu_si128(<2 x i64> %a0, <2 x i64> %a1, i8* %a2) nounwind {
1571; X32-LABEL: test_mm_maskmoveu_si128:
1572; X32: # BB#0:
1573; X32-NEXT: pushl %edi
1574; X32-NEXT: movl {{[0-9]+}}(%esp), %edi
1575; X32-NEXT: maskmovdqu %xmm1, %xmm0
1576; X32-NEXT: popl %edi
1577; X32-NEXT: retl
1578;
1579; X64-LABEL: test_mm_maskmoveu_si128:
1580; X64: # BB#0:
1581; X64-NEXT: maskmovdqu %xmm1, %xmm0
1582; X64-NEXT: retq
1583 %arg0 = bitcast <2 x i64> %a0 to <16 x i8>
1584 %arg1 = bitcast <2 x i64> %a1 to <16 x i8>
1585 call void @llvm.x86.sse2.maskmov.dqu(<16 x i8> %arg0, <16 x i8> %arg1, i8* %a2)
1586 ret void
1587}
1588declare void @llvm.x86.sse2.maskmov.dqu(<16 x i8>, <16 x i8>, i8*) nounwind
1589
1590define <2 x i64> @test_mm_max_epi16(<2 x i64> %a0, <2 x i64> %a1) nounwind {
1591; X32-LABEL: test_mm_max_epi16:
1592; X32: # BB#0:
1593; X32-NEXT: pmaxsw %xmm1, %xmm0
1594; X32-NEXT: retl
1595;
1596; X64-LABEL: test_mm_max_epi16:
1597; X64: # BB#0:
1598; X64-NEXT: pmaxsw %xmm1, %xmm0
1599; X64-NEXT: retq
1600 %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
1601 %arg1 = bitcast <2 x i64> %a1 to <8 x i16>
1602 %res = call <8 x i16> @llvm.x86.sse2.pmaxs.w(<8 x i16> %arg0, <8 x i16> %arg1)
1603 %bc = bitcast <8 x i16> %res to <2 x i64>
1604 ret <2 x i64> %bc
1605}
1606declare <8 x i16> @llvm.x86.sse2.pmaxs.w(<8 x i16>, <8 x i16>) nounwind readnone
1607
1608define <2 x i64> @test_mm_max_epu8(<2 x i64> %a0, <2 x i64> %a1) nounwind {
1609; X32-LABEL: test_mm_max_epu8:
1610; X32: # BB#0:
1611; X32-NEXT: pmaxub %xmm1, %xmm0
1612; X32-NEXT: retl
1613;
1614; X64-LABEL: test_mm_max_epu8:
1615; X64: # BB#0:
1616; X64-NEXT: pmaxub %xmm1, %xmm0
1617; X64-NEXT: retq
1618 %arg0 = bitcast <2 x i64> %a0 to <16 x i8>
1619 %arg1 = bitcast <2 x i64> %a1 to <16 x i8>
1620 %res = call <16 x i8> @llvm.x86.sse2.pmaxu.b(<16 x i8> %arg0, <16 x i8> %arg1)
1621 %bc = bitcast <16 x i8> %res to <2 x i64>
1622 ret <2 x i64> %bc
1623}
1624declare <16 x i8> @llvm.x86.sse2.pmaxu.b(<16 x i8>, <16 x i8>) nounwind readnone
1625
1626define <2 x double> @test_mm_max_pd(<2 x double> %a0, <2 x double> %a1) nounwind {
1627; X32-LABEL: test_mm_max_pd:
1628; X32: # BB#0:
1629; X32-NEXT: maxpd %xmm1, %xmm0
1630; X32-NEXT: retl
1631;
1632; X64-LABEL: test_mm_max_pd:
1633; X64: # BB#0:
1634; X64-NEXT: maxpd %xmm1, %xmm0
1635; X64-NEXT: retq
1636 %res = call <2 x double> @llvm.x86.sse2.max.pd(<2 x double> %a0, <2 x double> %a1)
1637 ret <2 x double> %res
1638}
1639declare <2 x double> @llvm.x86.sse2.max.pd(<2 x double>, <2 x double>) nounwind readnone
1640
1641define <2 x double> @test_mm_max_sd(<2 x double> %a0, <2 x double> %a1) nounwind {
1642; X32-LABEL: test_mm_max_sd:
1643; X32: # BB#0:
1644; X32-NEXT: maxsd %xmm1, %xmm0
1645; X32-NEXT: retl
1646;
1647; X64-LABEL: test_mm_max_sd:
1648; X64: # BB#0:
1649; X64-NEXT: maxsd %xmm1, %xmm0
1650; X64-NEXT: retq
1651 %res = call <2 x double> @llvm.x86.sse2.max.sd(<2 x double> %a0, <2 x double> %a1)
1652 ret <2 x double> %res
1653}
1654declare <2 x double> @llvm.x86.sse2.max.sd(<2 x double>, <2 x double>) nounwind readnone
1655
1656define void @test_mm_mfence() nounwind {
1657; X32-LABEL: test_mm_mfence:
1658; X32: # BB#0:
1659; X32-NEXT: mfence
1660; X32-NEXT: retl
1661;
1662; X64-LABEL: test_mm_mfence:
1663; X64: # BB#0:
1664; X64-NEXT: mfence
1665; X64-NEXT: retq
1666 call void @llvm.x86.sse2.mfence()
1667 ret void
1668}
1669declare void @llvm.x86.sse2.mfence() nounwind readnone
1670
1671define <2 x i64> @test_mm_min_epi16(<2 x i64> %a0, <2 x i64> %a1) nounwind {
1672; X32-LABEL: test_mm_min_epi16:
1673; X32: # BB#0:
1674; X32-NEXT: pminsw %xmm1, %xmm0
1675; X32-NEXT: retl
1676;
1677; X64-LABEL: test_mm_min_epi16:
1678; X64: # BB#0:
1679; X64-NEXT: pminsw %xmm1, %xmm0
1680; X64-NEXT: retq
1681 %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
1682 %arg1 = bitcast <2 x i64> %a1 to <8 x i16>
1683 %res = call <8 x i16> @llvm.x86.sse2.pmins.w(<8 x i16> %arg0, <8 x i16> %arg1)
1684 %bc = bitcast <8 x i16> %res to <2 x i64>
1685 ret <2 x i64> %bc
1686}
1687declare <8 x i16> @llvm.x86.sse2.pmins.w(<8 x i16>, <8 x i16>) nounwind readnone
1688
1689define <2 x i64> @test_mm_min_epu8(<2 x i64> %a0, <2 x i64> %a1) nounwind {
1690; X32-LABEL: test_mm_min_epu8:
1691; X32: # BB#0:
1692; X32-NEXT: pminub %xmm1, %xmm0
1693; X32-NEXT: retl
1694;
1695; X64-LABEL: test_mm_min_epu8:
1696; X64: # BB#0:
1697; X64-NEXT: pminub %xmm1, %xmm0
1698; X64-NEXT: retq
1699 %arg0 = bitcast <2 x i64> %a0 to <16 x i8>
1700 %arg1 = bitcast <2 x i64> %a1 to <16 x i8>
1701 %res = call <16 x i8> @llvm.x86.sse2.pminu.b(<16 x i8> %arg0, <16 x i8> %arg1)
1702 %bc = bitcast <16 x i8> %res to <2 x i64>
1703 ret <2 x i64> %bc
1704}
1705declare <16 x i8> @llvm.x86.sse2.pminu.b(<16 x i8>, <16 x i8>) nounwind readnone
1706
1707define <2 x double> @test_mm_min_pd(<2 x double> %a0, <2 x double> %a1) nounwind {
1708; X32-LABEL: test_mm_min_pd:
1709; X32: # BB#0:
1710; X32-NEXT: minpd %xmm1, %xmm0
1711; X32-NEXT: retl
1712;
1713; X64-LABEL: test_mm_min_pd:
1714; X64: # BB#0:
1715; X64-NEXT: minpd %xmm1, %xmm0
1716; X64-NEXT: retq
1717 %res = call <2 x double> @llvm.x86.sse2.min.pd(<2 x double> %a0, <2 x double> %a1)
1718 ret <2 x double> %res
1719}
1720declare <2 x double> @llvm.x86.sse2.min.pd(<2 x double>, <2 x double>) nounwind readnone
1721
1722define <2 x double> @test_mm_min_sd(<2 x double> %a0, <2 x double> %a1) nounwind {
1723; X32-LABEL: test_mm_min_sd:
1724; X32: # BB#0:
1725; X32-NEXT: minsd %xmm1, %xmm0
1726; X32-NEXT: retl
1727;
1728; X64-LABEL: test_mm_min_sd:
1729; X64: # BB#0:
1730; X64-NEXT: minsd %xmm1, %xmm0
1731; X64-NEXT: retq
1732 %res = call <2 x double> @llvm.x86.sse2.min.sd(<2 x double> %a0, <2 x double> %a1)
1733 ret <2 x double> %res
1734}
1735declare <2 x double> @llvm.x86.sse2.min.sd(<2 x double>, <2 x double>) nounwind readnone
1736
Simon Pilgrim47825fa2016-05-19 11:59:57 +00001737define <2 x i64> @test_mm_move_epi64(<2 x i64> %a0) nounwind {
1738; X32-LABEL: test_mm_move_epi64:
1739; X32: # BB#0:
1740; X32-NEXT: movq {{.*#+}} xmm0 = xmm0[0],zero
1741; X32-NEXT: retl
1742;
1743; X64-LABEL: test_mm_move_epi64:
1744; X64: # BB#0:
1745; X64-NEXT: movq {{.*#+}} xmm0 = xmm0[0],zero
1746; X64-NEXT: retq
1747 %res = shufflevector <2 x i64> %a0, <2 x i64> zeroinitializer, <2 x i32> <i32 0, i32 2>
1748 ret <2 x i64> %res
1749}
1750
1751define <2 x double> @test_mm_move_sd(<2 x double> %a0, <2 x double> %a1) nounwind {
1752; X32-LABEL: test_mm_move_sd:
1753; X32: # BB#0:
1754; X32-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
1755; X32-NEXT: retl
1756;
1757; X64-LABEL: test_mm_move_sd:
1758; X64: # BB#0:
1759; X64-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
1760; X64-NEXT: retq
1761 %ext0 = extractelement <2 x double> %a1, i32 0
1762 %res0 = insertelement <2 x double> undef, double %ext0, i32 0
1763 %ext1 = extractelement <2 x double> %a0, i32 1
1764 %res1 = insertelement <2 x double> %res0, double %ext1, i32 1
1765 ret <2 x double> %res1
1766}
1767
Simon Pilgrim5a0d7282016-05-18 18:00:43 +00001768define i32 @test_mm_movemask_epi8(<2 x i64> %a0) nounwind {
1769; X32-LABEL: test_mm_movemask_epi8:
1770; X32: # BB#0:
1771; X32-NEXT: pmovmskb %xmm0, %eax
1772; X32-NEXT: retl
1773;
1774; X64-LABEL: test_mm_movemask_epi8:
1775; X64: # BB#0:
1776; X64-NEXT: pmovmskb %xmm0, %eax
1777; X64-NEXT: retq
1778 %arg0 = bitcast <2 x i64> %a0 to <16 x i8>
1779 %res = call i32 @llvm.x86.sse2.pmovmskb.128(<16 x i8> %arg0)
1780 ret i32 %res
1781}
1782declare i32 @llvm.x86.sse2.pmovmskb.128(<16 x i8>) nounwind readnone
1783
1784define i32 @test_mm_movemask_pd(<2 x double> %a0) nounwind {
1785; X32-LABEL: test_mm_movemask_pd:
1786; X32: # BB#0:
1787; X32-NEXT: movmskpd %xmm0, %eax
1788; X32-NEXT: retl
1789;
1790; X64-LABEL: test_mm_movemask_pd:
1791; X64: # BB#0:
1792; X64-NEXT: movmskpd %xmm0, %eax
1793; X64-NEXT: retq
1794 %res = call i32 @llvm.x86.sse2.movmsk.pd(<2 x double> %a0)
1795 ret i32 %res
1796}
1797declare i32 @llvm.x86.sse2.movmsk.pd(<2 x double>) nounwind readnone
1798
1799define <2 x i64> @test_mm_mul_epu32(<2 x i64> %a0, <2 x i64> %a1) {
1800; X32-LABEL: test_mm_mul_epu32:
1801; X32: # BB#0:
1802; X32-NEXT: pmuludq %xmm1, %xmm0
1803; X32-NEXT: retl
1804;
1805; X64-LABEL: test_mm_mul_epu32:
1806; X64: # BB#0:
1807; X64-NEXT: pmuludq %xmm1, %xmm0
1808; X64-NEXT: retq
1809 %arg0 = bitcast <2 x i64> %a0 to <4 x i32>
1810 %arg1 = bitcast <2 x i64> %a1 to <4 x i32>
1811 %res = call <2 x i64> @llvm.x86.sse2.pmulu.dq(<4 x i32> %arg0, <4 x i32> %arg1)
1812 ret <2 x i64> %res
1813}
1814declare <2 x i64> @llvm.x86.sse2.pmulu.dq(<4 x i32>, <4 x i32>) nounwind readnone
1815
1816define <2 x double> @test_mm_mul_pd(<2 x double> %a0, <2 x double> %a1) nounwind {
1817; X32-LABEL: test_mm_mul_pd:
1818; X32: # BB#0:
1819; X32-NEXT: mulpd %xmm1, %xmm0
1820; X32-NEXT: retl
1821;
1822; X64-LABEL: test_mm_mul_pd:
1823; X64: # BB#0:
1824; X64-NEXT: mulpd %xmm1, %xmm0
1825; X64-NEXT: retq
1826 %res = fmul <2 x double> %a0, %a1
1827 ret <2 x double> %res
1828}
1829
1830define <2 x double> @test_mm_mul_sd(<2 x double> %a0, <2 x double> %a1) nounwind {
1831; X32-LABEL: test_mm_mul_sd:
1832; X32: # BB#0:
1833; X32-NEXT: mulsd %xmm1, %xmm0
1834; X32-NEXT: retl
1835;
1836; X64-LABEL: test_mm_mul_sd:
1837; X64: # BB#0:
1838; X64-NEXT: mulsd %xmm1, %xmm0
1839; X64-NEXT: retq
1840 %ext0 = extractelement <2 x double> %a0, i32 0
1841 %ext1 = extractelement <2 x double> %a1, i32 0
1842 %fmul = fmul double %ext0, %ext1
1843 %res = insertelement <2 x double> %a0, double %fmul, i32 0
1844 ret <2 x double> %res
1845}
1846
1847define <2 x i64> @test_mm_mulhi_epi16(<2 x i64> %a0, <2 x i64> %a1) {
1848; X32-LABEL: test_mm_mulhi_epi16:
1849; X32: # BB#0:
1850; X32-NEXT: pmulhw %xmm1, %xmm0
1851; X32-NEXT: retl
1852;
1853; X64-LABEL: test_mm_mulhi_epi16:
1854; X64: # BB#0:
1855; X64-NEXT: pmulhw %xmm1, %xmm0
1856; X64-NEXT: retq
1857 %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
1858 %arg1 = bitcast <2 x i64> %a1 to <8 x i16>
1859 %res = call <8 x i16> @llvm.x86.sse2.pmulh.w(<8 x i16> %arg0, <8 x i16> %arg1)
1860 %bc = bitcast <8 x i16> %res to <2 x i64>
1861 ret <2 x i64> %bc
1862}
1863declare <8 x i16> @llvm.x86.sse2.pmulh.w(<8 x i16>, <8 x i16>) nounwind readnone
1864
1865define <2 x i64> @test_mm_mulhi_epu16(<2 x i64> %a0, <2 x i64> %a1) {
1866; X32-LABEL: test_mm_mulhi_epu16:
1867; X32: # BB#0:
1868; X32-NEXT: pmulhuw %xmm1, %xmm0
1869; X32-NEXT: retl
1870;
1871; X64-LABEL: test_mm_mulhi_epu16:
1872; X64: # BB#0:
1873; X64-NEXT: pmulhuw %xmm1, %xmm0
1874; X64-NEXT: retq
1875 %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
1876 %arg1 = bitcast <2 x i64> %a1 to <8 x i16>
1877 %res = call <8 x i16> @llvm.x86.sse2.pmulhu.w(<8 x i16> %arg0, <8 x i16> %arg1)
1878 %bc = bitcast <8 x i16> %res to <2 x i64>
1879 ret <2 x i64> %bc
1880}
1881declare <8 x i16> @llvm.x86.sse2.pmulhu.w(<8 x i16>, <8 x i16>) nounwind readnone
1882
1883define <2 x i64> @test_mm_mullo_epi16(<2 x i64> %a0, <2 x i64> %a1) {
1884; X32-LABEL: test_mm_mullo_epi16:
1885; X32: # BB#0:
1886; X32-NEXT: pmullw %xmm1, %xmm0
1887; X32-NEXT: retl
1888;
1889; X64-LABEL: test_mm_mullo_epi16:
1890; X64: # BB#0:
1891; X64-NEXT: pmullw %xmm1, %xmm0
1892; X64-NEXT: retq
1893 %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
1894 %arg1 = bitcast <2 x i64> %a1 to <8 x i16>
1895 %res = mul <8 x i16> %arg0, %arg1
1896 %bc = bitcast <8 x i16> %res to <2 x i64>
1897 ret <2 x i64> %bc
1898}
1899
1900define <2 x double> @test_mm_or_pd(<2 x double> %a0, <2 x double> %a1) nounwind {
1901; X32-LABEL: test_mm_or_pd:
1902; X32: # BB#0:
1903; X32-NEXT: orps %xmm1, %xmm0
1904; X32-NEXT: retl
1905;
1906; X64-LABEL: test_mm_or_pd:
1907; X64: # BB#0:
1908; X64-NEXT: orps %xmm1, %xmm0
1909; X64-NEXT: retq
1910 %arg0 = bitcast <2 x double> %a0 to <4 x i32>
1911 %arg1 = bitcast <2 x double> %a1 to <4 x i32>
1912 %res = or <4 x i32> %arg0, %arg1
1913 %bc = bitcast <4 x i32> %res to <2 x double>
1914 ret <2 x double> %bc
1915}
1916
1917define <2 x i64> @test_mm_or_si128(<2 x i64> %a0, <2 x i64> %a1) nounwind {
1918; X32-LABEL: test_mm_or_si128:
1919; X32: # BB#0:
1920; X32-NEXT: orps %xmm1, %xmm0
1921; X32-NEXT: retl
1922;
1923; X64-LABEL: test_mm_or_si128:
1924; X64: # BB#0:
1925; X64-NEXT: orps %xmm1, %xmm0
1926; X64-NEXT: retq
1927 %res = or <2 x i64> %a0, %a1
1928 ret <2 x i64> %res
1929}
1930
1931define <2 x i64> @test_mm_packs_epi16(<2 x i64> %a0, <2 x i64> %a1) {
1932; X32-LABEL: test_mm_packs_epi16:
1933; X32: # BB#0:
1934; X32-NEXT: packsswb %xmm1, %xmm0
1935; X32-NEXT: retl
1936;
1937; X64-LABEL: test_mm_packs_epi16:
1938; X64: # BB#0:
1939; X64-NEXT: packsswb %xmm1, %xmm0
1940; X64-NEXT: retq
1941 %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
1942 %arg1 = bitcast <2 x i64> %a1 to <8 x i16>
1943 %res = call <16 x i8> @llvm.x86.sse2.packsswb.128(<8 x i16> %arg0, <8 x i16> %arg1)
1944 %bc = bitcast <16 x i8> %res to <2 x i64>
1945 ret <2 x i64> %bc
1946}
1947declare <16 x i8> @llvm.x86.sse2.packsswb.128(<8 x i16>, <8 x i16>) nounwind readnone
1948
1949define <2 x i64> @test_mm_packs_epi32(<2 x i64> %a0, <2 x i64> %a1) {
1950; X32-LABEL: test_mm_packs_epi32:
1951; X32: # BB#0:
1952; X32-NEXT: packssdw %xmm1, %xmm0
1953; X32-NEXT: retl
1954;
1955; X64-LABEL: test_mm_packs_epi32:
1956; X64: # BB#0:
1957; X64-NEXT: packssdw %xmm1, %xmm0
1958; X64-NEXT: retq
1959 %arg0 = bitcast <2 x i64> %a0 to <4 x i32>
1960 %arg1 = bitcast <2 x i64> %a1 to <4 x i32>
1961 %res = call <8 x i16> @llvm.x86.sse2.packssdw.128(<4 x i32> %arg0, <4 x i32> %arg1)
1962 %bc = bitcast <8 x i16> %res to <2 x i64>
1963 ret <2 x i64> %bc
1964}
1965declare <8 x i16> @llvm.x86.sse2.packssdw.128(<4 x i32>, <4 x i32>) nounwind readnone
1966
1967define <2 x i64> @test_mm_packus_epi16(<2 x i64> %a0, <2 x i64> %a1) {
1968; X32-LABEL: test_mm_packus_epi16:
1969; X32: # BB#0:
1970; X32-NEXT: packuswb %xmm1, %xmm0
1971; X32-NEXT: retl
1972;
1973; X64-LABEL: test_mm_packus_epi16:
1974; X64: # BB#0:
1975; X64-NEXT: packuswb %xmm1, %xmm0
1976; X64-NEXT: retq
1977 %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
1978 %arg1 = bitcast <2 x i64> %a1 to <8 x i16>
1979 %res = call <16 x i8> @llvm.x86.sse2.packuswb.128(<8 x i16> %arg0, <8 x i16> %arg1)
1980 %bc = bitcast <16 x i8> %res to <2 x i64>
1981 ret <2 x i64> %bc
1982}
1983declare <16 x i8> @llvm.x86.sse2.packuswb.128(<8 x i16>, <8 x i16>) nounwind readnone
1984
1985define void @test_mm_pause() nounwind {
1986; X32-LABEL: test_mm_pause:
1987; X32: # BB#0:
1988; X32-NEXT: pause
1989; X32-NEXT: retl
1990;
1991; X64-LABEL: test_mm_pause:
1992; X64: # BB#0:
1993; X64-NEXT: pause
1994; X64-NEXT: retq
1995 call void @llvm.x86.sse2.pause()
1996 ret void
1997}
1998declare void @llvm.x86.sse2.pause() nounwind readnone
1999
2000define <2 x i64> @test_mm_sad_epu8(<2 x i64> %a0, <2 x i64> %a1) nounwind {
2001; X32-LABEL: test_mm_sad_epu8:
2002; X32: # BB#0:
2003; X32-NEXT: psadbw %xmm1, %xmm0
2004; X32-NEXT: retl
2005;
2006; X64-LABEL: test_mm_sad_epu8:
2007; X64: # BB#0:
2008; X64-NEXT: psadbw %xmm1, %xmm0
2009; X64-NEXT: retq
2010 %arg0 = bitcast <2 x i64> %a0 to <16 x i8>
2011 %arg1 = bitcast <2 x i64> %a1 to <16 x i8>
2012 %res = call <2 x i64> @llvm.x86.sse2.psad.bw(<16 x i8> %arg0, <16 x i8> %arg1)
2013 ret <2 x i64> %res
2014}
2015declare <2 x i64> @llvm.x86.sse2.psad.bw(<16 x i8>, <16 x i8>) nounwind readnone
2016
Simon Pilgrim01809e02016-05-19 10:58:54 +00002017define <2 x i64> @test_mm_set_epi8(i8 %a0, i8 %a1, i8 %a2, i8 %a3, i8 %a4, i8 %a5, i8 %a6, i8 %a7, i8 %a8, i8 %a9, i8 %a10, i8 %a11, i8 %a12, i8 %a13, i8 %a14, i8 %a15) nounwind {
2018; X32-LABEL: test_mm_set_epi8:
2019; X32: # BB#0:
2020; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
2021; X32-NEXT: movd %eax, %xmm0
2022; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
2023; X32-NEXT: movd %eax, %xmm1
2024; X32-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
2025; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
2026; X32-NEXT: movd %eax, %xmm0
2027; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
2028; X32-NEXT: movd %eax, %xmm2
2029; X32-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
2030; X32-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3],xmm2[4],xmm1[4],xmm2[5],xmm1[5],xmm2[6],xmm1[6],xmm2[7],xmm1[7]
2031; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
2032; X32-NEXT: movd %eax, %xmm0
2033; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
2034; X32-NEXT: movd %eax, %xmm3
2035; X32-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3],xmm3[4],xmm0[4],xmm3[5],xmm0[5],xmm3[6],xmm0[6],xmm3[7],xmm0[7]
2036; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
2037; X32-NEXT: movd %eax, %xmm0
2038; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
2039; X32-NEXT: movd %eax, %xmm1
2040; X32-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
2041; X32-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1],xmm1[2],xmm3[2],xmm1[3],xmm3[3],xmm1[4],xmm3[4],xmm1[5],xmm3[5],xmm1[6],xmm3[6],xmm1[7],xmm3[7]
2042; X32-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
2043; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
2044; X32-NEXT: movd %eax, %xmm0
2045; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
2046; X32-NEXT: movd %eax, %xmm2
2047; X32-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
2048; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
2049; X32-NEXT: movd %eax, %xmm0
2050; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
2051; X32-NEXT: movd %eax, %xmm3
2052; X32-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3],xmm3[4],xmm0[4],xmm3[5],xmm0[5],xmm3[6],xmm0[6],xmm3[7],xmm0[7]
2053; X32-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3],xmm3[4],xmm2[4],xmm3[5],xmm2[5],xmm3[6],xmm2[6],xmm3[7],xmm2[7]
2054; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
2055; X32-NEXT: movd %eax, %xmm0
2056; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
2057; X32-NEXT: movd %eax, %xmm2
2058; X32-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
2059; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
2060; X32-NEXT: movd %eax, %xmm4
2061; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
2062; X32-NEXT: movd %eax, %xmm0
2063; X32-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1],xmm0[2],xmm4[2],xmm0[3],xmm4[3],xmm0[4],xmm4[4],xmm0[5],xmm4[5],xmm0[6],xmm4[6],xmm0[7],xmm4[7]
2064; X32-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7]
2065; X32-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3],xmm0[4],xmm3[4],xmm0[5],xmm3[5],xmm0[6],xmm3[6],xmm0[7],xmm3[7]
2066; X32-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
2067; X32-NEXT: retl
2068;
2069; X64-LABEL: test_mm_set_epi8:
2070; X64: # BB#0:
2071; X64-NEXT: movzbl %dil, %eax
2072; X64-NEXT: movd %eax, %xmm0
2073; X64-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
2074; X64-NEXT: movd %eax, %xmm1
2075; X64-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
2076; X64-NEXT: movzbl %r8b, %eax
2077; X64-NEXT: movd %eax, %xmm0
2078; X64-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
2079; X64-NEXT: movd %eax, %xmm2
2080; X64-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
2081; X64-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3],xmm2[4],xmm1[4],xmm2[5],xmm1[5],xmm2[6],xmm1[6],xmm2[7],xmm1[7]
2082; X64-NEXT: movzbl %dl, %eax
2083; X64-NEXT: movd %eax, %xmm0
2084; X64-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
2085; X64-NEXT: movd %eax, %xmm3
2086; X64-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3],xmm3[4],xmm0[4],xmm3[5],xmm0[5],xmm3[6],xmm0[6],xmm3[7],xmm0[7]
2087; X64-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
2088; X64-NEXT: movd %eax, %xmm0
2089; X64-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
2090; X64-NEXT: movd %eax, %xmm1
2091; X64-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
2092; X64-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1],xmm1[2],xmm3[2],xmm1[3],xmm3[3],xmm1[4],xmm3[4],xmm1[5],xmm3[5],xmm1[6],xmm3[6],xmm1[7],xmm3[7]
2093; X64-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
2094; X64-NEXT: movzbl %sil, %eax
2095; X64-NEXT: movd %eax, %xmm0
2096; X64-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
2097; X64-NEXT: movd %eax, %xmm2
2098; X64-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
2099; X64-NEXT: movzbl %r9b, %eax
2100; X64-NEXT: movd %eax, %xmm0
2101; X64-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
2102; X64-NEXT: movd %eax, %xmm3
2103; X64-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3],xmm3[4],xmm0[4],xmm3[5],xmm0[5],xmm3[6],xmm0[6],xmm3[7],xmm0[7]
2104; X64-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3],xmm3[4],xmm2[4],xmm3[5],xmm2[5],xmm3[6],xmm2[6],xmm3[7],xmm2[7]
2105; X64-NEXT: movzbl %cl, %eax
2106; X64-NEXT: movd %eax, %xmm0
2107; X64-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
2108; X64-NEXT: movd %eax, %xmm2
2109; X64-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
2110; X64-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
2111; X64-NEXT: movd %eax, %xmm4
2112; X64-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
2113; X64-NEXT: movd %eax, %xmm0
2114; X64-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1],xmm0[2],xmm4[2],xmm0[3],xmm4[3],xmm0[4],xmm4[4],xmm0[5],xmm4[5],xmm0[6],xmm4[6],xmm0[7],xmm4[7]
2115; X64-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7]
2116; X64-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3],xmm0[4],xmm3[4],xmm0[5],xmm3[5],xmm0[6],xmm3[6],xmm0[7],xmm3[7]
2117; X64-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
2118; X64-NEXT: retq
2119 %res0 = insertelement <16 x i8> undef, i8 %a15, i32 0
2120 %res1 = insertelement <16 x i8> %res0, i8 %a14, i32 1
2121 %res2 = insertelement <16 x i8> %res1, i8 %a13, i32 2
2122 %res3 = insertelement <16 x i8> %res2, i8 %a12, i32 3
2123 %res4 = insertelement <16 x i8> %res3, i8 %a11, i32 4
2124 %res5 = insertelement <16 x i8> %res4, i8 %a10, i32 5
2125 %res6 = insertelement <16 x i8> %res5, i8 %a9 , i32 6
2126 %res7 = insertelement <16 x i8> %res6, i8 %a8 , i32 7
2127 %res8 = insertelement <16 x i8> %res7, i8 %a7 , i32 8
2128 %res9 = insertelement <16 x i8> %res8, i8 %a6 , i32 9
2129 %res10 = insertelement <16 x i8> %res9, i8 %a5 , i32 10
2130 %res11 = insertelement <16 x i8> %res10, i8 %a4 , i32 11
2131 %res12 = insertelement <16 x i8> %res11, i8 %a3 , i32 12
2132 %res13 = insertelement <16 x i8> %res12, i8 %a2 , i32 13
2133 %res14 = insertelement <16 x i8> %res13, i8 %a1 , i32 14
2134 %res15 = insertelement <16 x i8> %res14, i8 %a0 , i32 15
2135 %res = bitcast <16 x i8> %res15 to <2 x i64>
2136 ret <2 x i64> %res
2137}
2138
2139define <2 x i64> @test_mm_set_epi16(i16 %a0, i16 %a1, i16 %a2, i16 %a3, i16 %a4, i16 %a5, i16 %a6, i16 %a7) nounwind {
2140; X32-LABEL: test_mm_set_epi16:
2141; X32: # BB#0:
2142; X32-NEXT: movw {{[0-9]+}}(%esp), %ax
2143; X32-NEXT: movd %eax, %xmm1
2144; X32-NEXT: movw {{[0-9]+}}(%esp), %ax
2145; X32-NEXT: movd %eax, %xmm2
2146; X32-NEXT: movw {{[0-9]+}}(%esp), %ax
2147; X32-NEXT: movd %eax, %xmm3
2148; X32-NEXT: movw {{[0-9]+}}(%esp), %ax
2149; X32-NEXT: movd %eax, %xmm4
2150; X32-NEXT: movw {{[0-9]+}}(%esp), %ax
2151; X32-NEXT: movd %eax, %xmm5
2152; X32-NEXT: movw {{[0-9]+}}(%esp), %ax
2153; X32-NEXT: movd %eax, %xmm6
2154; X32-NEXT: movw {{[0-9]+}}(%esp), %ax
2155; X32-NEXT: movd %eax, %xmm7
2156; X32-NEXT: movw {{[0-9]+}}(%esp), %ax
2157; X32-NEXT: movd %eax, %xmm0
2158; X32-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3]
2159; X32-NEXT: punpcklwd {{.*#+}} xmm4 = xmm4[0],xmm3[0],xmm4[1],xmm3[1],xmm4[2],xmm3[2],xmm4[3],xmm3[3]
2160; X32-NEXT: punpcklwd {{.*#+}} xmm4 = xmm4[0],xmm2[0],xmm4[1],xmm2[1],xmm4[2],xmm2[2],xmm4[3],xmm2[3]
2161; X32-NEXT: punpcklwd {{.*#+}} xmm6 = xmm6[0],xmm5[0],xmm6[1],xmm5[1],xmm6[2],xmm5[2],xmm6[3],xmm5[3]
2162; X32-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm7[0],xmm0[1],xmm7[1],xmm0[2],xmm7[2],xmm0[3],xmm7[3]
2163; X32-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm6[0],xmm0[1],xmm6[1],xmm0[2],xmm6[2],xmm0[3],xmm6[3]
2164; X32-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1],xmm0[2],xmm4[2],xmm0[3],xmm4[3]
2165; X32-NEXT: retl
2166;
2167; X64-LABEL: test_mm_set_epi16:
2168; X64: # BB#0:
2169; X64-NEXT: movw {{[0-9]+}}(%rsp), %r10w
2170; X64-NEXT: movw {{[0-9]+}}(%rsp), %ax
2171; X64-NEXT: movd %edi, %xmm0
2172; X64-NEXT: movd %r8d, %xmm1
2173; X64-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
2174; X64-NEXT: movd %edx, %xmm0
2175; X64-NEXT: movd %eax, %xmm2
2176; X64-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3]
2177; X64-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3]
2178; X64-NEXT: movd %esi, %xmm0
2179; X64-NEXT: movd %r9d, %xmm1
2180; X64-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
2181; X64-NEXT: movd %ecx, %xmm3
2182; X64-NEXT: movd %r10d, %xmm0
2183; X64-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3]
2184; X64-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
2185; X64-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
2186; X64-NEXT: retq
2187 %res0 = insertelement <8 x i16> undef, i16 %a7, i32 0
2188 %res1 = insertelement <8 x i16> %res0, i16 %a6, i32 1
2189 %res2 = insertelement <8 x i16> %res1, i16 %a5, i32 2
2190 %res3 = insertelement <8 x i16> %res2, i16 %a4, i32 3
2191 %res4 = insertelement <8 x i16> %res3, i16 %a3, i32 4
2192 %res5 = insertelement <8 x i16> %res4, i16 %a2, i32 5
2193 %res6 = insertelement <8 x i16> %res5, i16 %a1, i32 6
2194 %res7 = insertelement <8 x i16> %res6, i16 %a0, i32 7
2195 %res = bitcast <8 x i16> %res7 to <2 x i64>
2196 ret <2 x i64> %res
2197}
2198
2199define <2 x i64> @test_mm_set_epi32(i32 %a0, i32 %a1, i32 %a2, i32 %a3) nounwind {
2200; X32-LABEL: test_mm_set_epi32:
2201; X32: # BB#0:
2202; X32-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
2203; X32-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero
2204; X32-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
2205; X32-NEXT: movd {{.*#+}} xmm2 = mem[0],zero,zero,zero
2206; X32-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
2207; X32-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
2208; X32-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
2209; X32-NEXT: retl
2210;
2211; X64-LABEL: test_mm_set_epi32:
2212; X64: # BB#0:
2213; X64-NEXT: movd %edi, %xmm0
2214; X64-NEXT: movd %edx, %xmm1
2215; X64-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
2216; X64-NEXT: movd %esi, %xmm2
2217; X64-NEXT: movd %ecx, %xmm0
2218; X64-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
2219; X64-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
2220; X64-NEXT: retq
2221 %res0 = insertelement <4 x i32> undef, i32 %a3, i32 0
2222 %res1 = insertelement <4 x i32> %res0, i32 %a2, i32 1
2223 %res2 = insertelement <4 x i32> %res1, i32 %a1, i32 2
2224 %res3 = insertelement <4 x i32> %res2, i32 %a0, i32 3
2225 %res = bitcast <4 x i32> %res3 to <2 x i64>
2226 ret <2 x i64> %res
2227}
2228
2229; TODO test_mm_set_epi64
2230
2231define <2 x i64> @test_mm_set_epi64x(i64 %a0, i64 %a1) nounwind {
2232; X32-LABEL: test_mm_set_epi64x:
2233; X32: # BB#0:
2234; X32-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero
2235; X32-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
2236; X32-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
2237; X32-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero
2238; X32-NEXT: movd {{.*#+}} xmm2 = mem[0],zero,zero,zero
2239; X32-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
2240; X32-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
2241; X32-NEXT: retl
2242;
2243; X64-LABEL: test_mm_set_epi64x:
2244; X64: # BB#0:
2245; X64-NEXT: movd %rdi, %xmm1
2246; X64-NEXT: movd %rsi, %xmm0
2247; X64-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
2248; X64-NEXT: retq
2249 %res0 = insertelement <2 x i64> undef, i64 %a1, i32 0
2250 %res1 = insertelement <2 x i64> %res0, i64 %a0, i32 1
2251 ret <2 x i64> %res1
2252}
2253
2254define <2 x double> @test_mm_set_pd(double %a0, double %a1) nounwind {
2255; X32-LABEL: test_mm_set_pd:
2256; X32: # BB#0:
2257; X32-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
2258; X32-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero
2259; X32-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
2260; X32-NEXT: retl
2261;
2262; X64-LABEL: test_mm_set_pd:
2263; X64: # BB#0:
2264; X64-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0],xmm0[0]
2265; X64-NEXT: movapd %xmm1, %xmm0
2266; X64-NEXT: retq
2267 %res0 = insertelement <2 x double> undef, double %a1, i32 0
2268 %res1 = insertelement <2 x double> %res0, double %a0, i32 1
2269 ret <2 x double> %res1
2270}
2271
2272define <2 x double> @test_mm_set_sd(double %a0) nounwind {
2273; X32-LABEL: test_mm_set_sd:
2274; X32: # BB#0:
2275; X32-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
2276; X32-NEXT: movq {{.*#+}} xmm0 = xmm0[0],zero
2277; X32-NEXT: retl
2278;
2279; X64-LABEL: test_mm_set_sd:
2280; X64: # BB#0:
2281; X64-NEXT: movq {{.*#+}} xmm0 = xmm0[0],zero
2282; X64-NEXT: retq
2283 %res0 = insertelement <2 x double> undef, double %a0, i32 0
2284 %res1 = insertelement <2 x double> %res0, double 0.0, i32 1
2285 ret <2 x double> %res1
2286}
2287
2288define <2 x i64> @test_mm_set1_epi8(i8 %a0) nounwind {
2289; X32-LABEL: test_mm_set1_epi8:
2290; X32: # BB#0:
2291; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
2292; X32-NEXT: movd %eax, %xmm0
2293; X32-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
2294; X32-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
2295; X32-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
2296; X32-NEXT: retl
2297;
2298; X64-LABEL: test_mm_set1_epi8:
2299; X64: # BB#0:
2300; X64-NEXT: movzbl %dil, %eax
2301; X64-NEXT: movd %eax, %xmm0
2302; X64-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
2303; X64-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
2304; X64-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
2305; X64-NEXT: retq
2306 %res0 = insertelement <16 x i8> undef, i8 %a0, i32 0
2307 %res1 = insertelement <16 x i8> %res0, i8 %a0, i32 1
2308 %res2 = insertelement <16 x i8> %res1, i8 %a0, i32 2
2309 %res3 = insertelement <16 x i8> %res2, i8 %a0, i32 3
2310 %res4 = insertelement <16 x i8> %res3, i8 %a0, i32 4
2311 %res5 = insertelement <16 x i8> %res4, i8 %a0, i32 5
2312 %res6 = insertelement <16 x i8> %res5, i8 %a0, i32 6
2313 %res7 = insertelement <16 x i8> %res6, i8 %a0, i32 7
2314 %res8 = insertelement <16 x i8> %res7, i8 %a0, i32 8
2315 %res9 = insertelement <16 x i8> %res8, i8 %a0, i32 9
2316 %res10 = insertelement <16 x i8> %res9, i8 %a0, i32 10
2317 %res11 = insertelement <16 x i8> %res10, i8 %a0, i32 11
2318 %res12 = insertelement <16 x i8> %res11, i8 %a0, i32 12
2319 %res13 = insertelement <16 x i8> %res12, i8 %a0, i32 13
2320 %res14 = insertelement <16 x i8> %res13, i8 %a0, i32 14
2321 %res15 = insertelement <16 x i8> %res14, i8 %a0, i32 15
2322 %res = bitcast <16 x i8> %res15 to <2 x i64>
2323 ret <2 x i64> %res
2324}
2325
2326define <2 x i64> @test_mm_set1_epi16(i16 %a0) nounwind {
2327; X32-LABEL: test_mm_set1_epi16:
2328; X32: # BB#0:
2329; X32-NEXT: movw {{[0-9]+}}(%esp), %ax
2330; X32-NEXT: movd %eax, %xmm0
2331; X32-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
2332; X32-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
2333; X32-NEXT: retl
2334;
2335; X64-LABEL: test_mm_set1_epi16:
2336; X64: # BB#0:
2337; X64-NEXT: movd %edi, %xmm0
2338; X64-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
2339; X64-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
2340; X64-NEXT: retq
2341 %res0 = insertelement <8 x i16> undef, i16 %a0, i32 0
2342 %res1 = insertelement <8 x i16> %res0, i16 %a0, i32 1
2343 %res2 = insertelement <8 x i16> %res1, i16 %a0, i32 2
2344 %res3 = insertelement <8 x i16> %res2, i16 %a0, i32 3
2345 %res4 = insertelement <8 x i16> %res3, i16 %a0, i32 4
2346 %res5 = insertelement <8 x i16> %res4, i16 %a0, i32 5
2347 %res6 = insertelement <8 x i16> %res5, i16 %a0, i32 6
2348 %res7 = insertelement <8 x i16> %res6, i16 %a0, i32 7
2349 %res = bitcast <8 x i16> %res7 to <2 x i64>
2350 ret <2 x i64> %res
2351}
2352
2353define <2 x i64> @test_mm_set1_epi32(i32 %a0) nounwind {
2354; X32-LABEL: test_mm_set1_epi32:
2355; X32: # BB#0:
2356; X32-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
2357; X32-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
2358; X32-NEXT: retl
2359;
2360; X64-LABEL: test_mm_set1_epi32:
2361; X64: # BB#0:
2362; X64-NEXT: movd %edi, %xmm0
2363; X64-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
2364; X64-NEXT: retq
2365 %res0 = insertelement <4 x i32> undef, i32 %a0, i32 0
2366 %res1 = insertelement <4 x i32> %res0, i32 %a0, i32 1
2367 %res2 = insertelement <4 x i32> %res1, i32 %a0, i32 2
2368 %res3 = insertelement <4 x i32> %res2, i32 %a0, i32 3
2369 %res = bitcast <4 x i32> %res3 to <2 x i64>
2370 ret <2 x i64> %res
2371}
2372
2373; TODO test_mm_set1_epi64
2374
2375define <2 x i64> @test_mm_set1_epi64x(i64 %a0) nounwind {
2376; X32-LABEL: test_mm_set1_epi64x:
2377; X32: # BB#0:
2378; X32-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
2379; X32-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
2380; X32-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero
2381; X32-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,0,1,1]
2382; X32-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
2383; X32-NEXT: retl
2384;
2385; X64-LABEL: test_mm_set1_epi64x:
2386; X64: # BB#0:
2387; X64-NEXT: movd %rdi, %xmm0
2388; X64-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
2389; X64-NEXT: retq
2390 %res0 = insertelement <2 x i64> undef, i64 %a0, i32 0
2391 %res1 = insertelement <2 x i64> %res0, i64 %a0, i32 1
2392 ret <2 x i64> %res1
2393}
2394
2395define <2 x double> @test_mm_set1_pd(double %a0) nounwind {
2396; X32-LABEL: test_mm_set1_pd:
2397; X32: # BB#0:
2398; X32-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
2399; X32-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0,0]
2400; X32-NEXT: retl
2401;
2402; X64-LABEL: test_mm_set1_pd:
2403; X64: # BB#0:
2404; X64-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0,0]
2405; X64-NEXT: retq
2406 %res0 = insertelement <2 x double> undef, double %a0, i32 0
2407 %res1 = insertelement <2 x double> %res0, double %a0, i32 1
2408 ret <2 x double> %res1
2409}
2410
2411define <2 x i64> @test_mm_setr_epi8(i8 %a0, i8 %a1, i8 %a2, i8 %a3, i8 %a4, i8 %a5, i8 %a6, i8 %a7, i8 %a8, i8 %a9, i8 %a10, i8 %a11, i8 %a12, i8 %a13, i8 %a14, i8 %a15) nounwind {
2412; X32-LABEL: test_mm_setr_epi8:
2413; X32: # BB#0:
2414; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
2415; X32-NEXT: movd %eax, %xmm0
2416; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
2417; X32-NEXT: movd %eax, %xmm1
2418; X32-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
2419; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
2420; X32-NEXT: movd %eax, %xmm0
2421; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
2422; X32-NEXT: movd %eax, %xmm2
2423; X32-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
2424; X32-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3],xmm2[4],xmm1[4],xmm2[5],xmm1[5],xmm2[6],xmm1[6],xmm2[7],xmm1[7]
2425; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
2426; X32-NEXT: movd %eax, %xmm0
2427; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
2428; X32-NEXT: movd %eax, %xmm3
2429; X32-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3],xmm3[4],xmm0[4],xmm3[5],xmm0[5],xmm3[6],xmm0[6],xmm3[7],xmm0[7]
2430; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
2431; X32-NEXT: movd %eax, %xmm0
2432; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
2433; X32-NEXT: movd %eax, %xmm1
2434; X32-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
2435; X32-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1],xmm1[2],xmm3[2],xmm1[3],xmm3[3],xmm1[4],xmm3[4],xmm1[5],xmm3[5],xmm1[6],xmm3[6],xmm1[7],xmm3[7]
2436; X32-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
2437; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
2438; X32-NEXT: movd %eax, %xmm0
2439; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
2440; X32-NEXT: movd %eax, %xmm2
2441; X32-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
2442; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
2443; X32-NEXT: movd %eax, %xmm0
2444; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
2445; X32-NEXT: movd %eax, %xmm3
2446; X32-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3],xmm3[4],xmm0[4],xmm3[5],xmm0[5],xmm3[6],xmm0[6],xmm3[7],xmm0[7]
2447; X32-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3],xmm3[4],xmm2[4],xmm3[5],xmm2[5],xmm3[6],xmm2[6],xmm3[7],xmm2[7]
2448; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
2449; X32-NEXT: movd %eax, %xmm0
2450; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
2451; X32-NEXT: movd %eax, %xmm2
2452; X32-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
2453; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
2454; X32-NEXT: movd %eax, %xmm4
2455; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
2456; X32-NEXT: movd %eax, %xmm0
2457; X32-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1],xmm0[2],xmm4[2],xmm0[3],xmm4[3],xmm0[4],xmm4[4],xmm0[5],xmm4[5],xmm0[6],xmm4[6],xmm0[7],xmm4[7]
2458; X32-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7]
2459; X32-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3],xmm0[4],xmm3[4],xmm0[5],xmm3[5],xmm0[6],xmm3[6],xmm0[7],xmm3[7]
2460; X32-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
2461; X32-NEXT: retl
2462;
2463; X64-LABEL: test_mm_setr_epi8:
2464; X64: # BB#0:
2465; X64-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
2466; X64-NEXT: movd %eax, %xmm0
2467; X64-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
2468; X64-NEXT: movd %eax, %xmm1
2469; X64-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
2470; X64-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
2471; X64-NEXT: movd %eax, %xmm0
2472; X64-NEXT: movzbl %cl, %eax
2473; X64-NEXT: movd %eax, %xmm2
2474; X64-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
2475; X64-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3],xmm2[4],xmm1[4],xmm2[5],xmm1[5],xmm2[6],xmm1[6],xmm2[7],xmm1[7]
2476; X64-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
2477; X64-NEXT: movd %eax, %xmm0
2478; X64-NEXT: movzbl %r9b, %eax
2479; X64-NEXT: movd %eax, %xmm3
2480; X64-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3],xmm3[4],xmm0[4],xmm3[5],xmm0[5],xmm3[6],xmm0[6],xmm3[7],xmm0[7]
2481; X64-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
2482; X64-NEXT: movd %eax, %xmm0
2483; X64-NEXT: movzbl %sil, %eax
2484; X64-NEXT: movd %eax, %xmm1
2485; X64-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
2486; X64-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1],xmm1[2],xmm3[2],xmm1[3],xmm3[3],xmm1[4],xmm3[4],xmm1[5],xmm3[5],xmm1[6],xmm3[6],xmm1[7],xmm3[7]
2487; X64-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
2488; X64-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
2489; X64-NEXT: movd %eax, %xmm0
2490; X64-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
2491; X64-NEXT: movd %eax, %xmm2
2492; X64-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
2493; X64-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
2494; X64-NEXT: movd %eax, %xmm0
2495; X64-NEXT: movzbl %dl, %eax
2496; X64-NEXT: movd %eax, %xmm3
2497; X64-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3],xmm3[4],xmm0[4],xmm3[5],xmm0[5],xmm3[6],xmm0[6],xmm3[7],xmm0[7]
2498; X64-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3],xmm3[4],xmm2[4],xmm3[5],xmm2[5],xmm3[6],xmm2[6],xmm3[7],xmm2[7]
2499; X64-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
2500; X64-NEXT: movd %eax, %xmm0
2501; X64-NEXT: movzbl %r8b, %eax
2502; X64-NEXT: movd %eax, %xmm2
2503; X64-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
2504; X64-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
2505; X64-NEXT: movd %eax, %xmm4
2506; X64-NEXT: movzbl %dil, %eax
2507; X64-NEXT: movd %eax, %xmm0
2508; X64-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1],xmm0[2],xmm4[2],xmm0[3],xmm4[3],xmm0[4],xmm4[4],xmm0[5],xmm4[5],xmm0[6],xmm4[6],xmm0[7],xmm4[7]
2509; X64-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7]
2510; X64-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3],xmm0[4],xmm3[4],xmm0[5],xmm3[5],xmm0[6],xmm3[6],xmm0[7],xmm3[7]
2511; X64-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
2512; X64-NEXT: retq
2513 %res0 = insertelement <16 x i8> undef, i8 %a0 , i32 0
2514 %res1 = insertelement <16 x i8> %res0, i8 %a1 , i32 1
2515 %res2 = insertelement <16 x i8> %res1, i8 %a2 , i32 2
2516 %res3 = insertelement <16 x i8> %res2, i8 %a3 , i32 3
2517 %res4 = insertelement <16 x i8> %res3, i8 %a4 , i32 4
2518 %res5 = insertelement <16 x i8> %res4, i8 %a5 , i32 5
2519 %res6 = insertelement <16 x i8> %res5, i8 %a6 , i32 6
2520 %res7 = insertelement <16 x i8> %res6, i8 %a7 , i32 7
2521 %res8 = insertelement <16 x i8> %res7, i8 %a8 , i32 8
2522 %res9 = insertelement <16 x i8> %res8, i8 %a9 , i32 9
2523 %res10 = insertelement <16 x i8> %res9, i8 %a10, i32 10
2524 %res11 = insertelement <16 x i8> %res10, i8 %a11, i32 11
2525 %res12 = insertelement <16 x i8> %res11, i8 %a12, i32 12
2526 %res13 = insertelement <16 x i8> %res12, i8 %a13, i32 13
2527 %res14 = insertelement <16 x i8> %res13, i8 %a14, i32 14
2528 %res15 = insertelement <16 x i8> %res14, i8 %a15, i32 15
2529 %res = bitcast <16 x i8> %res15 to <2 x i64>
2530 ret <2 x i64> %res
2531}
2532
2533define <2 x i64> @test_mm_setr_epi16(i16 %a0, i16 %a1, i16 %a2, i16 %a3, i16 %a4, i16 %a5, i16 %a6, i16 %a7) nounwind {
2534; X32-LABEL: test_mm_setr_epi16:
2535; X32: # BB#0:
2536; X32-NEXT: movw {{[0-9]+}}(%esp), %ax
2537; X32-NEXT: movd %eax, %xmm1
2538; X32-NEXT: movw {{[0-9]+}}(%esp), %ax
2539; X32-NEXT: movd %eax, %xmm2
2540; X32-NEXT: movw {{[0-9]+}}(%esp), %ax
2541; X32-NEXT: movd %eax, %xmm3
2542; X32-NEXT: movw {{[0-9]+}}(%esp), %ax
2543; X32-NEXT: movd %eax, %xmm4
2544; X32-NEXT: movw {{[0-9]+}}(%esp), %ax
2545; X32-NEXT: movd %eax, %xmm5
2546; X32-NEXT: movw {{[0-9]+}}(%esp), %ax
2547; X32-NEXT: movd %eax, %xmm6
2548; X32-NEXT: movw {{[0-9]+}}(%esp), %ax
2549; X32-NEXT: movd %eax, %xmm7
2550; X32-NEXT: movw {{[0-9]+}}(%esp), %ax
2551; X32-NEXT: movd %eax, %xmm0
2552; X32-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3]
2553; X32-NEXT: punpcklwd {{.*#+}} xmm4 = xmm4[0],xmm3[0],xmm4[1],xmm3[1],xmm4[2],xmm3[2],xmm4[3],xmm3[3]
2554; X32-NEXT: punpcklwd {{.*#+}} xmm4 = xmm4[0],xmm2[0],xmm4[1],xmm2[1],xmm4[2],xmm2[2],xmm4[3],xmm2[3]
2555; X32-NEXT: punpcklwd {{.*#+}} xmm6 = xmm6[0],xmm5[0],xmm6[1],xmm5[1],xmm6[2],xmm5[2],xmm6[3],xmm5[3]
2556; X32-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm7[0],xmm0[1],xmm7[1],xmm0[2],xmm7[2],xmm0[3],xmm7[3]
2557; X32-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm6[0],xmm0[1],xmm6[1],xmm0[2],xmm6[2],xmm0[3],xmm6[3]
2558; X32-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1],xmm0[2],xmm4[2],xmm0[3],xmm4[3]
2559; X32-NEXT: retl
2560;
2561; X64-LABEL: test_mm_setr_epi16:
2562; X64: # BB#0:
2563; X64-NEXT: movw {{[0-9]+}}(%rsp), %ax
2564; X64-NEXT: movw {{[0-9]+}}(%rsp), %r10w
2565; X64-NEXT: movd %eax, %xmm0
2566; X64-NEXT: movd %ecx, %xmm1
2567; X64-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
2568; X64-NEXT: movd %r9d, %xmm0
2569; X64-NEXT: movd %esi, %xmm2
2570; X64-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3]
2571; X64-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3]
2572; X64-NEXT: movd %r10d, %xmm0
2573; X64-NEXT: movd %edx, %xmm1
2574; X64-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
2575; X64-NEXT: movd %r8d, %xmm3
2576; X64-NEXT: movd %edi, %xmm0
2577; X64-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3]
2578; X64-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
2579; X64-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
2580; X64-NEXT: retq
2581 %res0 = insertelement <8 x i16> undef, i16 %a0, i32 0
2582 %res1 = insertelement <8 x i16> %res0, i16 %a1, i32 1
2583 %res2 = insertelement <8 x i16> %res1, i16 %a2, i32 2
2584 %res3 = insertelement <8 x i16> %res2, i16 %a3, i32 3
2585 %res4 = insertelement <8 x i16> %res3, i16 %a4, i32 4
2586 %res5 = insertelement <8 x i16> %res4, i16 %a5, i32 5
2587 %res6 = insertelement <8 x i16> %res5, i16 %a6, i32 6
2588 %res7 = insertelement <8 x i16> %res6, i16 %a7, i32 7
2589 %res = bitcast <8 x i16> %res7 to <2 x i64>
2590 ret <2 x i64> %res
2591}
2592
2593define <2 x i64> @test_mm_setr_epi32(i32 %a0, i32 %a1, i32 %a2, i32 %a3) nounwind {
2594; X32-LABEL: test_mm_setr_epi32:
2595; X32: # BB#0:
2596; X32-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
2597; X32-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero
2598; X32-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
2599; X32-NEXT: movd {{.*#+}} xmm2 = mem[0],zero,zero,zero
2600; X32-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
2601; X32-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
2602; X32-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
2603; X32-NEXT: retl
2604;
2605; X64-LABEL: test_mm_setr_epi32:
2606; X64: # BB#0:
2607; X64-NEXT: movd %ecx, %xmm0
2608; X64-NEXT: movd %esi, %xmm1
2609; X64-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
2610; X64-NEXT: movd %edx, %xmm2
2611; X64-NEXT: movd %edi, %xmm0
2612; X64-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
2613; X64-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
2614; X64-NEXT: retq
2615 %res0 = insertelement <4 x i32> undef, i32 %a0, i32 0
2616 %res1 = insertelement <4 x i32> %res0, i32 %a1, i32 1
2617 %res2 = insertelement <4 x i32> %res1, i32 %a2, i32 2
2618 %res3 = insertelement <4 x i32> %res2, i32 %a3, i32 3
2619 %res = bitcast <4 x i32> %res3 to <2 x i64>
2620 ret <2 x i64> %res
2621}
2622
2623; TODO test_mm_setr_epi64
2624
2625define <2 x i64> @test_mm_setr_epi64x(i64 %a0, i64 %a1) nounwind {
2626; X32-LABEL: test_mm_setr_epi64x:
2627; X32: # BB#0:
2628; X32-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero
2629; X32-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
2630; X32-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
2631; X32-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero
2632; X32-NEXT: movd {{.*#+}} xmm2 = mem[0],zero,zero,zero
2633; X32-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
2634; X32-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
2635; X32-NEXT: retl
2636;
2637; X64-LABEL: test_mm_setr_epi64x:
2638; X64: # BB#0:
2639; X64-NEXT: movd %rsi, %xmm1
2640; X64-NEXT: movd %rdi, %xmm0
2641; X64-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
2642; X64-NEXT: retq
2643 %res0 = insertelement <2 x i64> undef, i64 %a0, i32 0
2644 %res1 = insertelement <2 x i64> %res0, i64 %a1, i32 1
2645 ret <2 x i64> %res1
2646}
2647
2648define <2 x double> @test_mm_setr_pd(double %a0, double %a1) nounwind {
2649; X32-LABEL: test_mm_setr_pd:
2650; X32: # BB#0:
2651; X32-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero
2652; X32-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
2653; X32-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
2654; X32-NEXT: retl
2655;
2656; X64-LABEL: test_mm_setr_pd:
2657; X64: # BB#0:
2658; X64-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
2659; X64-NEXT: retq
2660 %res0 = insertelement <2 x double> undef, double %a0, i32 0
2661 %res1 = insertelement <2 x double> %res0, double %a1, i32 1
2662 ret <2 x double> %res1
2663}
2664
Simon Pilgrim5a0d7282016-05-18 18:00:43 +00002665define <2 x double> @test_mm_setzero_pd() {
2666; X32-LABEL: test_mm_setzero_pd:
2667; X32: # BB#0:
2668; X32-NEXT: xorps %xmm0, %xmm0
2669; X32-NEXT: retl
2670;
2671; X64-LABEL: test_mm_setzero_pd:
2672; X64: # BB#0:
2673; X64-NEXT: xorps %xmm0, %xmm0
2674; X64-NEXT: retq
2675 ret <2 x double> zeroinitializer
2676}
2677
2678define <2 x i64> @test_mm_setzero_si128() {
2679; X32-LABEL: test_mm_setzero_si128:
2680; X32: # BB#0:
2681; X32-NEXT: xorps %xmm0, %xmm0
2682; X32-NEXT: retl
2683;
2684; X64-LABEL: test_mm_setzero_si128:
2685; X64: # BB#0:
2686; X64-NEXT: xorps %xmm0, %xmm0
2687; X64-NEXT: retq
2688 ret <2 x i64> zeroinitializer
2689}
2690
2691define <2 x i64> @test_mm_shuffle_epi32(<2 x i64> %a0) {
2692; X32-LABEL: test_mm_shuffle_epi32:
2693; X32: # BB#0:
2694; X32-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
2695; X32-NEXT: retl
2696;
2697; X64-LABEL: test_mm_shuffle_epi32:
2698; X64: # BB#0:
2699; X64-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
2700; X64-NEXT: retq
2701 %arg0 = bitcast <2 x i64> %a0 to <4 x i32>
2702 %res = shufflevector <4 x i32> %arg0, <4 x i32> undef, <4 x i32> zeroinitializer
2703 %bc = bitcast <4 x i32> %res to <2 x i64>
2704 ret <2 x i64> %bc
2705}
2706
2707define <2 x double> @test_mm_shuffle_pd(<2 x double> %a0, <2 x double> %a1) {
2708; X32-LABEL: test_mm_shuffle_pd:
2709; X32: # BB#0:
2710; X32-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0]
2711; X32-NEXT: retl
2712;
2713; X64-LABEL: test_mm_shuffle_pd:
2714; X64: # BB#0:
2715; X64-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0]
2716; X64-NEXT: retq
2717 %res = shufflevector <2 x double> %a0, <2 x double> %a1, <2 x i32> <i32 1, i32 2>
2718 ret <2 x double> %res
2719}
2720
2721define <2 x i64> @test_mm_shufflehi_epi16(<2 x i64> %a0) {
2722; X32-LABEL: test_mm_shufflehi_epi16:
2723; X32: # BB#0:
2724; X32-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,4,4]
2725; X32-NEXT: retl
2726;
2727; X64-LABEL: test_mm_shufflehi_epi16:
2728; X64: # BB#0:
2729; X64-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,4,4]
2730; X64-NEXT: retq
2731 %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
2732 %res = shufflevector <8 x i16> %arg0, <8 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 4, i32 4, i32 4>
2733 %bc = bitcast <8 x i16> %res to <2 x i64>
2734 ret <2 x i64> %bc
2735}
2736
2737define <2 x i64> @test_mm_shufflelo_epi16(<2 x i64> %a0) {
2738; X32-LABEL: test_mm_shufflelo_epi16:
2739; X32: # BB#0:
2740; X32-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
2741; X32-NEXT: retl
2742;
2743; X64-LABEL: test_mm_shufflelo_epi16:
2744; X64: # BB#0:
2745; X64-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
2746; X64-NEXT: retq
2747 %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
2748 %res = shufflevector <8 x i16> %arg0, <8 x i16> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 4, i32 5, i32 6, i32 7>
2749 %bc = bitcast <8 x i16> %res to <2 x i64>
2750 ret <2 x i64> %bc
2751}
2752
2753define <2 x i64> @test_mm_sll_epi16(<2 x i64> %a0, <2 x i64> %a1) {
2754; X32-LABEL: test_mm_sll_epi16:
2755; X32: # BB#0:
2756; X32-NEXT: psllw %xmm1, %xmm0
2757; X32-NEXT: retl
2758;
2759; X64-LABEL: test_mm_sll_epi16:
2760; X64: # BB#0:
2761; X64-NEXT: psllw %xmm1, %xmm0
2762; X64-NEXT: retq
2763 %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
2764 %arg1 = bitcast <2 x i64> %a1 to <8 x i16>
2765 %res = call <8 x i16> @llvm.x86.sse2.psll.w(<8 x i16> %arg0, <8 x i16> %arg1)
2766 %bc = bitcast <8 x i16> %res to <2 x i64>
2767 ret <2 x i64> %bc
2768}
2769declare <8 x i16> @llvm.x86.sse2.psll.w(<8 x i16>, <8 x i16>) nounwind readnone
2770
2771define <2 x i64> @test_mm_sll_epi32(<2 x i64> %a0, <2 x i64> %a1) {
2772; X32-LABEL: test_mm_sll_epi32:
2773; X32: # BB#0:
2774; X32-NEXT: pslld %xmm1, %xmm0
2775; X32-NEXT: retl
2776;
2777; X64-LABEL: test_mm_sll_epi32:
2778; X64: # BB#0:
2779; X64-NEXT: pslld %xmm1, %xmm0
2780; X64-NEXT: retq
2781 %arg0 = bitcast <2 x i64> %a0 to <4 x i32>
2782 %arg1 = bitcast <2 x i64> %a1 to <4 x i32>
2783 %res = call <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32> %arg0, <4 x i32> %arg1)
2784 %bc = bitcast <4 x i32> %res to <2 x i64>
2785 ret <2 x i64> %bc
2786}
2787declare <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32>, <4 x i32>) nounwind readnone
2788
2789define <2 x i64> @test_mm_sll_epi64(<2 x i64> %a0, <2 x i64> %a1) {
2790; X32-LABEL: test_mm_sll_epi64:
2791; X32: # BB#0:
2792; X32-NEXT: psllq %xmm1, %xmm0
2793; X32-NEXT: retl
2794;
2795; X64-LABEL: test_mm_sll_epi64:
2796; X64: # BB#0:
2797; X64-NEXT: psllq %xmm1, %xmm0
2798; X64-NEXT: retq
2799 %res = call <2 x i64> @llvm.x86.sse2.psll.q(<2 x i64> %a0, <2 x i64> %a1)
2800 ret <2 x i64> %res
2801}
2802declare <2 x i64> @llvm.x86.sse2.psll.q(<2 x i64>, <2 x i64>) nounwind readnone
2803
2804define <2 x i64> @test_mm_slli_epi16(<2 x i64> %a0) {
2805; X32-LABEL: test_mm_slli_epi16:
2806; X32: # BB#0:
2807; X32-NEXT: psllw $1, %xmm0
2808; X32-NEXT: retl
2809;
2810; X64-LABEL: test_mm_slli_epi16:
2811; X64: # BB#0:
2812; X64-NEXT: psllw $1, %xmm0
2813; X64-NEXT: retq
2814 %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
2815 %res = call <8 x i16> @llvm.x86.sse2.pslli.w(<8 x i16> %arg0, i32 1)
2816 %bc = bitcast <8 x i16> %res to <2 x i64>
2817 ret <2 x i64> %bc
2818}
2819declare <8 x i16> @llvm.x86.sse2.pslli.w(<8 x i16>, i32) nounwind readnone
2820
2821define <2 x i64> @test_mm_slli_epi32(<2 x i64> %a0) {
2822; X32-LABEL: test_mm_slli_epi32:
2823; X32: # BB#0:
2824; X32-NEXT: pslld $1, %xmm0
2825; X32-NEXT: retl
2826;
2827; X64-LABEL: test_mm_slli_epi32:
2828; X64: # BB#0:
2829; X64-NEXT: pslld $1, %xmm0
2830; X64-NEXT: retq
2831 %arg0 = bitcast <2 x i64> %a0 to <4 x i32>
2832 %res = call <4 x i32> @llvm.x86.sse2.pslli.d(<4 x i32> %arg0, i32 1)
2833 %bc = bitcast <4 x i32> %res to <2 x i64>
2834 ret <2 x i64> %bc
2835}
2836declare <4 x i32> @llvm.x86.sse2.pslli.d(<4 x i32>, i32) nounwind readnone
2837
2838define <2 x i64> @test_mm_slli_epi64(<2 x i64> %a0) {
2839; X32-LABEL: test_mm_slli_epi64:
2840; X32: # BB#0:
2841; X32-NEXT: psllq $1, %xmm0
2842; X32-NEXT: retl
2843;
2844; X64-LABEL: test_mm_slli_epi64:
2845; X64: # BB#0:
2846; X64-NEXT: psllq $1, %xmm0
2847; X64-NEXT: retq
2848 %res = call <2 x i64> @llvm.x86.sse2.pslli.q(<2 x i64> %a0, i32 1)
2849 ret <2 x i64> %res
2850}
2851declare <2 x i64> @llvm.x86.sse2.pslli.q(<2 x i64>, i32) nounwind readnone
2852
2853define <2 x i64> @test_mm_slli_si128(<2 x i64> %a0) nounwind {
2854; X32-LABEL: test_mm_slli_si128:
2855; X32: # BB#0:
2856; X32-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10]
2857; X32-NEXT: retl
2858;
2859; X64-LABEL: test_mm_slli_si128:
2860; X64: # BB#0:
2861; X64-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10]
2862; X64-NEXT: retq
2863 %arg0 = bitcast <2 x i64> %a0 to <16 x i8>
2864 %res = shufflevector <16 x i8> zeroinitializer, <16 x i8> %arg0, <16 x i32> <i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26>
2865 %bc = bitcast <16 x i8> %res to <2 x i64>
2866 ret <2 x i64> %bc
2867}
2868
2869define <2 x double> @test_mm_sqrt_pd(<2 x double> %a0) nounwind {
2870; X32-LABEL: test_mm_sqrt_pd:
2871; X32: # BB#0:
2872; X32-NEXT: sqrtpd %xmm0, %xmm0
2873; X32-NEXT: retl
2874;
2875; X64-LABEL: test_mm_sqrt_pd:
2876; X64: # BB#0:
2877; X64-NEXT: sqrtpd %xmm0, %xmm0
2878; X64-NEXT: retq
2879 %res = call <2 x double> @llvm.x86.sse2.sqrt.pd(<2 x double> %a0)
2880 ret <2 x double> %res
2881}
2882declare <2 x double> @llvm.x86.sse2.sqrt.pd(<2 x double>) nounwind readnone
2883
2884define <2 x double> @test_mm_sqrt_sd(<2 x double> %a0, <2 x double> %a1) nounwind {
2885; X32-LABEL: test_mm_sqrt_sd:
2886; X32: # BB#0:
2887; X32-NEXT: sqrtsd %xmm0, %xmm1
2888; X32-NEXT: movaps %xmm1, %xmm0
2889; X32-NEXT: retl
2890;
2891; X64-LABEL: test_mm_sqrt_sd:
2892; X64: # BB#0:
2893; X64-NEXT: sqrtsd %xmm0, %xmm1
2894; X64-NEXT: movaps %xmm1, %xmm0
2895; X64-NEXT: retq
2896 %call = call <2 x double> @llvm.x86.sse2.sqrt.sd(<2 x double> %a0)
2897 %ext0 = extractelement <2 x double> %call, i32 0
2898 %ins0 = insertelement <2 x double> undef, double %ext0, i32 0
2899 %ext1 = extractelement <2 x double> %a1, i32 1
2900 %ins1 = insertelement <2 x double> %ins0, double %ext1, i32 1
2901 ret <2 x double> %ins1
2902}
2903declare <2 x double> @llvm.x86.sse2.sqrt.sd(<2 x double>) nounwind readnone
2904
2905define <2 x i64> @test_mm_sra_epi16(<2 x i64> %a0, <2 x i64> %a1) {
2906; X32-LABEL: test_mm_sra_epi16:
2907; X32: # BB#0:
2908; X32-NEXT: psraw %xmm1, %xmm0
2909; X32-NEXT: retl
2910;
2911; X64-LABEL: test_mm_sra_epi16:
2912; X64: # BB#0:
2913; X64-NEXT: psraw %xmm1, %xmm0
2914; X64-NEXT: retq
2915 %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
2916 %arg1 = bitcast <2 x i64> %a1 to <8 x i16>
2917 %res = call <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16> %arg0, <8 x i16> %arg1)
2918 %bc = bitcast <8 x i16> %res to <2 x i64>
2919 ret <2 x i64> %bc
2920}
2921declare <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16>, <8 x i16>) nounwind readnone
2922
2923define <2 x i64> @test_mm_sra_epi32(<2 x i64> %a0, <2 x i64> %a1) {
2924; X32-LABEL: test_mm_sra_epi32:
2925; X32: # BB#0:
2926; X32-NEXT: psrad %xmm1, %xmm0
2927; X32-NEXT: retl
2928;
2929; X64-LABEL: test_mm_sra_epi32:
2930; X64: # BB#0:
2931; X64-NEXT: psrad %xmm1, %xmm0
2932; X64-NEXT: retq
2933 %arg0 = bitcast <2 x i64> %a0 to <4 x i32>
2934 %arg1 = bitcast <2 x i64> %a1 to <4 x i32>
2935 %res = call <4 x i32> @llvm.x86.sse2.psra.d(<4 x i32> %arg0, <4 x i32> %arg1)
2936 %bc = bitcast <4 x i32> %res to <2 x i64>
2937 ret <2 x i64> %bc
2938}
2939declare <4 x i32> @llvm.x86.sse2.psra.d(<4 x i32>, <4 x i32>) nounwind readnone
2940
2941define <2 x i64> @test_mm_srai_epi16(<2 x i64> %a0) {
2942; X32-LABEL: test_mm_srai_epi16:
2943; X32: # BB#0:
2944; X32-NEXT: psraw $1, %xmm0
2945; X32-NEXT: retl
2946;
2947; X64-LABEL: test_mm_srai_epi16:
2948; X64: # BB#0:
2949; X64-NEXT: psraw $1, %xmm0
2950; X64-NEXT: retq
2951 %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
2952 %res = call <8 x i16> @llvm.x86.sse2.psrai.w(<8 x i16> %arg0, i32 1)
2953 %bc = bitcast <8 x i16> %res to <2 x i64>
2954 ret <2 x i64> %bc
2955}
2956declare <8 x i16> @llvm.x86.sse2.psrai.w(<8 x i16>, i32) nounwind readnone
2957
2958define <2 x i64> @test_mm_srai_epi32(<2 x i64> %a0) {
2959; X32-LABEL: test_mm_srai_epi32:
2960; X32: # BB#0:
2961; X32-NEXT: psrad $1, %xmm0
2962; X32-NEXT: retl
2963;
2964; X64-LABEL: test_mm_srai_epi32:
2965; X64: # BB#0:
2966; X64-NEXT: psrad $1, %xmm0
2967; X64-NEXT: retq
2968 %arg0 = bitcast <2 x i64> %a0 to <4 x i32>
2969 %res = call <4 x i32> @llvm.x86.sse2.psrai.d(<4 x i32> %arg0, i32 1)
2970 %bc = bitcast <4 x i32> %res to <2 x i64>
2971 ret <2 x i64> %bc
2972}
2973declare <4 x i32> @llvm.x86.sse2.psrai.d(<4 x i32>, i32) nounwind readnone
2974
2975define <2 x i64> @test_mm_srl_epi16(<2 x i64> %a0, <2 x i64> %a1) {
2976; X32-LABEL: test_mm_srl_epi16:
2977; X32: # BB#0:
2978; X32-NEXT: psrlw %xmm1, %xmm0
2979; X32-NEXT: retl
2980;
2981; X64-LABEL: test_mm_srl_epi16:
2982; X64: # BB#0:
2983; X64-NEXT: psrlw %xmm1, %xmm0
2984; X64-NEXT: retq
2985 %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
2986 %arg1 = bitcast <2 x i64> %a1 to <8 x i16>
2987 %res = call <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16> %arg0, <8 x i16> %arg1)
2988 %bc = bitcast <8 x i16> %res to <2 x i64>
2989 ret <2 x i64> %bc
2990}
2991declare <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16>, <8 x i16>) nounwind readnone
2992
2993define <2 x i64> @test_mm_srl_epi32(<2 x i64> %a0, <2 x i64> %a1) {
2994; X32-LABEL: test_mm_srl_epi32:
2995; X32: # BB#0:
2996; X32-NEXT: psrld %xmm1, %xmm0
2997; X32-NEXT: retl
2998;
2999; X64-LABEL: test_mm_srl_epi32:
3000; X64: # BB#0:
3001; X64-NEXT: psrld %xmm1, %xmm0
3002; X64-NEXT: retq
3003 %arg0 = bitcast <2 x i64> %a0 to <4 x i32>
3004 %arg1 = bitcast <2 x i64> %a1 to <4 x i32>
3005 %res = call <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32> %arg0, <4 x i32> %arg1)
3006 %bc = bitcast <4 x i32> %res to <2 x i64>
3007 ret <2 x i64> %bc
3008}
3009declare <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32>, <4 x i32>) nounwind readnone
3010
3011define <2 x i64> @test_mm_srl_epi64(<2 x i64> %a0, <2 x i64> %a1) {
3012; X32-LABEL: test_mm_srl_epi64:
3013; X32: # BB#0:
3014; X32-NEXT: psrlq %xmm1, %xmm0
3015; X32-NEXT: retl
3016;
3017; X64-LABEL: test_mm_srl_epi64:
3018; X64: # BB#0:
3019; X64-NEXT: psrlq %xmm1, %xmm0
3020; X64-NEXT: retq
3021 %res = call <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64> %a0, <2 x i64> %a1)
3022 ret <2 x i64> %res
3023}
3024declare <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64>, <2 x i64>) nounwind readnone
3025
3026define <2 x i64> @test_mm_srli_epi16(<2 x i64> %a0) {
3027; X32-LABEL: test_mm_srli_epi16:
3028; X32: # BB#0:
3029; X32-NEXT: psrlw $1, %xmm0
3030; X32-NEXT: retl
3031;
3032; X64-LABEL: test_mm_srli_epi16:
3033; X64: # BB#0:
3034; X64-NEXT: psrlw $1, %xmm0
3035; X64-NEXT: retq
3036 %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
3037 %res = call <8 x i16> @llvm.x86.sse2.psrli.w(<8 x i16> %arg0, i32 1)
3038 %bc = bitcast <8 x i16> %res to <2 x i64>
3039 ret <2 x i64> %bc
3040}
3041declare <8 x i16> @llvm.x86.sse2.psrli.w(<8 x i16>, i32) nounwind readnone
3042
3043define <2 x i64> @test_mm_srli_epi32(<2 x i64> %a0) {
3044; X32-LABEL: test_mm_srli_epi32:
3045; X32: # BB#0:
3046; X32-NEXT: psrld $1, %xmm0
3047; X32-NEXT: retl
3048;
3049; X64-LABEL: test_mm_srli_epi32:
3050; X64: # BB#0:
3051; X64-NEXT: psrld $1, %xmm0
3052; X64-NEXT: retq
3053 %arg0 = bitcast <2 x i64> %a0 to <4 x i32>
3054 %res = call <4 x i32> @llvm.x86.sse2.psrli.d(<4 x i32> %arg0, i32 1)
3055 %bc = bitcast <4 x i32> %res to <2 x i64>
3056 ret <2 x i64> %bc
3057}
3058declare <4 x i32> @llvm.x86.sse2.psrli.d(<4 x i32>, i32) nounwind readnone
3059
3060define <2 x i64> @test_mm_srli_epi64(<2 x i64> %a0) {
3061; X32-LABEL: test_mm_srli_epi64:
3062; X32: # BB#0:
3063; X32-NEXT: psrlq $1, %xmm0
3064; X32-NEXT: retl
3065;
3066; X64-LABEL: test_mm_srli_epi64:
3067; X64: # BB#0:
3068; X64-NEXT: psrlq $1, %xmm0
3069; X64-NEXT: retq
3070 %res = call <2 x i64> @llvm.x86.sse2.psrli.q(<2 x i64> %a0, i32 1)
3071 ret <2 x i64> %res
3072}
3073declare <2 x i64> @llvm.x86.sse2.psrli.q(<2 x i64>, i32) nounwind readnone
3074
3075define <2 x i64> @test_mm_srli_si128(<2 x i64> %a0) nounwind {
3076; X32-LABEL: test_mm_srli_si128:
3077; X32: # BB#0:
3078; X32-NEXT: psrldq {{.*#+}} xmm0 = xmm0[5,6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero
3079; X32-NEXT: retl
3080;
3081; X64-LABEL: test_mm_srli_si128:
3082; X64: # BB#0:
3083; X64-NEXT: psrldq {{.*#+}} xmm0 = xmm0[5,6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero
3084; X64-NEXT: retq
3085 %arg0 = bitcast <2 x i64> %a0 to <16 x i8>
3086 %res = shufflevector <16 x i8> %arg0, <16 x i8> zeroinitializer, <16 x i32> <i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20>
3087 %bc = bitcast <16 x i8> %res to <2 x i64>
3088 ret <2 x i64> %bc
3089}
3090
3091define void @test_mm_store_pd(double *%a0, <2 x double> %a1) {
3092; X32-LABEL: test_mm_store_pd:
3093; X32: # BB#0:
3094; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
3095; X32-NEXT: movaps %xmm0, (%eax)
3096; X32-NEXT: retl
3097;
3098; X64-LABEL: test_mm_store_pd:
3099; X64: # BB#0:
3100; X64-NEXT: movaps %xmm0, (%rdi)
3101; X64-NEXT: retq
3102 %arg0 = bitcast double* %a0 to <2 x double>*
3103 store <2 x double> %a1, <2 x double>* %arg0, align 16
3104 ret void
3105}
3106
3107define void @test_mm_store_sd(double *%a0, <2 x double> %a1) {
3108; X32-LABEL: test_mm_store_sd:
3109; X32: # BB#0:
3110; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
3111; X32-NEXT: movsd %xmm0, (%eax)
3112; X32-NEXT: retl
3113;
3114; X64-LABEL: test_mm_store_sd:
3115; X64: # BB#0:
3116; X64-NEXT: movsd %xmm0, (%rdi)
3117; X64-NEXT: retq
3118 %ext = extractelement <2 x double> %a1, i32 0
3119 store double %ext, double* %a0, align 1
3120 ret void
3121}
3122
3123define void @test_mm_store_si128(<2 x i64> *%a0, <2 x i64> %a1) {
3124; X32-LABEL: test_mm_store_si128:
3125; X32: # BB#0:
3126; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
3127; X32-NEXT: movaps %xmm0, (%eax)
3128; X32-NEXT: retl
3129;
3130; X64-LABEL: test_mm_store_si128:
3131; X64: # BB#0:
3132; X64-NEXT: movaps %xmm0, (%rdi)
3133; X64-NEXT: retq
3134 store <2 x i64> %a1, <2 x i64>* %a0, align 16
3135 ret void
3136}
3137
3138define void @test_mm_store1_sd(double *%a0, <2 x double> %a1) {
3139; X32-LABEL: test_mm_store1_sd:
3140; X32: # BB#0:
3141; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
3142; X32-NEXT: movsd %xmm0, (%eax)
3143; X32-NEXT: movsd %xmm0, 8(%eax)
3144; X32-NEXT: retl
3145;
3146; X64-LABEL: test_mm_store1_sd:
3147; X64: # BB#0:
3148; X64-NEXT: movsd %xmm0, (%rdi)
3149; X64-NEXT: movsd %xmm0, 8(%rdi)
3150; X64-NEXT: retq
3151 %ext = extractelement <2 x double> %a1, i32 0
3152 %ptr0 = getelementptr inbounds double, double* %a0, i32 0
3153 %ptr1 = getelementptr inbounds double, double* %a0, i32 1
3154 store double %ext, double* %ptr0, align 1
3155 store double %ext, double* %ptr1, align 1
3156 ret void
3157}
3158
3159define void @test_mm_storeh_sd(double *%a0, <2 x double> %a1) {
3160; X32-LABEL: test_mm_storeh_sd:
3161; X32: # BB#0:
3162; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
3163; X32-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1,0]
3164; X32-NEXT: movsd %xmm0, (%eax)
3165; X32-NEXT: retl
3166;
3167; X64-LABEL: test_mm_storeh_sd:
3168; X64: # BB#0:
3169; X64-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1,0]
3170; X64-NEXT: movsd %xmm0, (%rdi)
3171; X64-NEXT: retq
3172 %ext = extractelement <2 x double> %a1, i32 1
3173 store double %ext, double* %a0, align 8
3174 ret void
3175}
3176
3177define void @test_mm_storel_epi64(<2 x i64> *%a0, <2 x i64> %a1) {
3178; X32-LABEL: test_mm_storel_epi64:
3179; X32: # BB#0:
3180; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
3181; X32-NEXT: movlps %xmm0, (%eax)
3182; X32-NEXT: retl
3183;
3184; X64-LABEL: test_mm_storel_epi64:
3185; X64: # BB#0:
3186; X64-NEXT: movd %xmm0, %rax
3187; X64-NEXT: movq %rax, (%rdi)
3188; X64-NEXT: retq
3189 %ext = extractelement <2 x i64> %a1, i32 0
3190 %bc = bitcast <2 x i64> *%a0 to i64*
3191 store i64 %ext, i64* %bc, align 8
3192 ret void
3193}
3194
3195define void @test_mm_storel_sd(double *%a0, <2 x double> %a1) {
3196; X32-LABEL: test_mm_storel_sd:
3197; X32: # BB#0:
3198; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
3199; X32-NEXT: movsd %xmm0, (%eax)
3200; X32-NEXT: retl
3201;
3202; X64-LABEL: test_mm_storel_sd:
3203; X64: # BB#0:
3204; X64-NEXT: movsd %xmm0, (%rdi)
3205; X64-NEXT: retq
3206 %ext = extractelement <2 x double> %a1, i32 0
3207 store double %ext, double* %a0, align 8
3208 ret void
3209}
3210
3211define void @test_mm_storer_pd(double *%a0, <2 x double> %a1) {
3212; X32-LABEL: test_mm_storer_pd:
3213; X32: # BB#0:
3214; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
3215; X32-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1,0]
3216; X32-NEXT: movapd %xmm0, (%eax)
3217; X32-NEXT: retl
3218;
3219; X64-LABEL: test_mm_storer_pd:
3220; X64: # BB#0:
3221; X64-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1,0]
3222; X64-NEXT: movapd %xmm0, (%rdi)
3223; X64-NEXT: retq
3224 %arg0 = bitcast double* %a0 to <2 x double>*
3225 %shuf = shufflevector <2 x double> %a1, <2 x double> undef, <2 x i32> <i32 1, i32 0>
3226 store <2 x double> %shuf, <2 x double>* %arg0, align 16
3227 ret void
3228}
3229
3230define void @test_mm_storeu_pd(double *%a0, <2 x double> %a1) {
3231; X32-LABEL: test_mm_storeu_pd:
3232; X32: # BB#0:
3233; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
3234; X32-NEXT: movups %xmm0, (%eax)
3235; X32-NEXT: retl
3236;
3237; X64-LABEL: test_mm_storeu_pd:
3238; X64: # BB#0:
3239; X64-NEXT: movups %xmm0, (%rdi)
3240; X64-NEXT: retq
3241 %arg0 = bitcast double* %a0 to <2 x double>*
3242 store <2 x double> %a1, <2 x double>* %arg0, align 1
3243 ret void
3244}
3245
3246define void @test_mm_storeu_si128(<2 x i64> *%a0, <2 x i64> %a1) {
3247; X32-LABEL: test_mm_storeu_si128:
3248; X32: # BB#0:
3249; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
3250; X32-NEXT: movups %xmm0, (%eax)
3251; X32-NEXT: retl
3252;
3253; X64-LABEL: test_mm_storeu_si128:
3254; X64: # BB#0:
3255; X64-NEXT: movups %xmm0, (%rdi)
3256; X64-NEXT: retq
3257 store <2 x i64> %a1, <2 x i64>* %a0, align 1
3258 ret void
3259}
3260
3261define void @test_mm_stream_pd(double *%a0, <2 x double> %a1) {
3262; X32-LABEL: test_mm_stream_pd:
3263; X32: # BB#0:
3264; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
3265; X32-NEXT: movntps %xmm0, (%eax)
3266; X32-NEXT: retl
3267;
3268; X64-LABEL: test_mm_stream_pd:
3269; X64: # BB#0:
3270; X64-NEXT: movntps %xmm0, (%rdi)
3271; X64-NEXT: retq
3272 %arg0 = bitcast double* %a0 to <2 x double>*
3273 store <2 x double> %a1, <2 x double>* %arg0, align 16, !nontemporal !0
3274 ret void
3275}
3276
3277define void @test_mm_stream_si32(i32 *%a0, i32 %a1) {
3278; X32-LABEL: test_mm_stream_si32:
3279; X32: # BB#0:
3280; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
3281; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
3282; X32-NEXT: movntil %eax, (%ecx)
3283; X32-NEXT: retl
3284;
3285; X64-LABEL: test_mm_stream_si32:
3286; X64: # BB#0:
3287; X64-NEXT: movntil %esi, (%rdi)
3288; X64-NEXT: retq
3289 store i32 %a1, i32* %a0, align 1, !nontemporal !0
3290 ret void
3291}
3292
3293define void @test_mm_stream_si128(<2 x i64> *%a0, <2 x i64> %a1) {
3294; X32-LABEL: test_mm_stream_si128:
3295; X32: # BB#0:
3296; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
3297; X32-NEXT: movntps %xmm0, (%eax)
3298; X32-NEXT: retl
3299;
3300; X64-LABEL: test_mm_stream_si128:
3301; X64: # BB#0:
3302; X64-NEXT: movntps %xmm0, (%rdi)
3303; X64-NEXT: retq
3304 store <2 x i64> %a1, <2 x i64>* %a0, align 16, !nontemporal !0
3305 ret void
3306}
3307
3308define <2 x i64> @test_mm_sub_epi8(<2 x i64> %a0, <2 x i64> %a1) nounwind {
3309; X32-LABEL: test_mm_sub_epi8:
3310; X32: # BB#0:
3311; X32-NEXT: psubb %xmm1, %xmm0
3312; X32-NEXT: retl
3313;
3314; X64-LABEL: test_mm_sub_epi8:
3315; X64: # BB#0:
3316; X64-NEXT: psubb %xmm1, %xmm0
3317; X64-NEXT: retq
3318 %arg0 = bitcast <2 x i64> %a0 to <16 x i8>
3319 %arg1 = bitcast <2 x i64> %a1 to <16 x i8>
3320 %res = sub <16 x i8> %arg0, %arg1
3321 %bc = bitcast <16 x i8> %res to <2 x i64>
3322 ret <2 x i64> %bc
3323}
3324
3325define <2 x i64> @test_mm_sub_epi16(<2 x i64> %a0, <2 x i64> %a1) nounwind {
3326; X32-LABEL: test_mm_sub_epi16:
3327; X32: # BB#0:
3328; X32-NEXT: psubw %xmm1, %xmm0
3329; X32-NEXT: retl
3330;
3331; X64-LABEL: test_mm_sub_epi16:
3332; X64: # BB#0:
3333; X64-NEXT: psubw %xmm1, %xmm0
3334; X64-NEXT: retq
3335 %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
3336 %arg1 = bitcast <2 x i64> %a1 to <8 x i16>
3337 %res = sub <8 x i16> %arg0, %arg1
3338 %bc = bitcast <8 x i16> %res to <2 x i64>
3339 ret <2 x i64> %bc
3340}
3341
3342define <2 x i64> @test_mm_sub_epi32(<2 x i64> %a0, <2 x i64> %a1) nounwind {
3343; X32-LABEL: test_mm_sub_epi32:
3344; X32: # BB#0:
3345; X32-NEXT: psubd %xmm1, %xmm0
3346; X32-NEXT: retl
3347;
3348; X64-LABEL: test_mm_sub_epi32:
3349; X64: # BB#0:
3350; X64-NEXT: psubd %xmm1, %xmm0
3351; X64-NEXT: retq
3352 %arg0 = bitcast <2 x i64> %a0 to <4 x i32>
3353 %arg1 = bitcast <2 x i64> %a1 to <4 x i32>
3354 %res = sub <4 x i32> %arg0, %arg1
3355 %bc = bitcast <4 x i32> %res to <2 x i64>
3356 ret <2 x i64> %bc
3357}
3358
3359define <2 x i64> @test_mm_sub_epi64(<2 x i64> %a0, <2 x i64> %a1) nounwind {
3360; X32-LABEL: test_mm_sub_epi64:
3361; X32: # BB#0:
3362; X32-NEXT: psubq %xmm1, %xmm0
3363; X32-NEXT: retl
3364;
3365; X64-LABEL: test_mm_sub_epi64:
3366; X64: # BB#0:
3367; X64-NEXT: psubq %xmm1, %xmm0
3368; X64-NEXT: retq
3369 %res = sub <2 x i64> %a0, %a1
3370 ret <2 x i64> %res
3371}
3372
3373define <2 x double> @test_mm_sub_pd(<2 x double> %a0, <2 x double> %a1) nounwind {
3374; X32-LABEL: test_mm_sub_pd:
3375; X32: # BB#0:
3376; X32-NEXT: subpd %xmm1, %xmm0
3377; X32-NEXT: retl
3378;
3379; X64-LABEL: test_mm_sub_pd:
3380; X64: # BB#0:
3381; X64-NEXT: subpd %xmm1, %xmm0
3382; X64-NEXT: retq
3383 %res = fsub <2 x double> %a0, %a1
3384 ret <2 x double> %res
3385}
3386
3387define <2 x double> @test_mm_sub_sd(<2 x double> %a0, <2 x double> %a1) nounwind {
3388; X32-LABEL: test_mm_sub_sd:
3389; X32: # BB#0:
3390; X32-NEXT: subsd %xmm1, %xmm0
3391; X32-NEXT: retl
3392;
3393; X64-LABEL: test_mm_sub_sd:
3394; X64: # BB#0:
3395; X64-NEXT: subsd %xmm1, %xmm0
3396; X64-NEXT: retq
3397 %ext0 = extractelement <2 x double> %a0, i32 0
3398 %ext1 = extractelement <2 x double> %a1, i32 0
3399 %fsub = fsub double %ext0, %ext1
3400 %res = insertelement <2 x double> %a0, double %fsub, i32 0
3401 ret <2 x double> %res
3402}
3403
3404define <2 x i64> @test_mm_subs_epi8(<2 x i64> %a0, <2 x i64> %a1) nounwind {
3405; X32-LABEL: test_mm_subs_epi8:
3406; X32: # BB#0:
3407; X32-NEXT: psubsb %xmm1, %xmm0
3408; X32-NEXT: retl
3409;
3410; X64-LABEL: test_mm_subs_epi8:
3411; X64: # BB#0:
3412; X64-NEXT: psubsb %xmm1, %xmm0
3413; X64-NEXT: retq
3414 %arg0 = bitcast <2 x i64> %a0 to <16 x i8>
3415 %arg1 = bitcast <2 x i64> %a1 to <16 x i8>
3416 %res = call <16 x i8> @llvm.x86.sse2.psubs.b(<16 x i8> %arg0, <16 x i8> %arg1)
3417 %bc = bitcast <16 x i8> %res to <2 x i64>
3418 ret <2 x i64> %bc
3419}
3420declare <16 x i8> @llvm.x86.sse2.psubs.b(<16 x i8>, <16 x i8>) nounwind readnone
3421
3422define <2 x i64> @test_mm_subs_epi16(<2 x i64> %a0, <2 x i64> %a1) nounwind {
3423; X32-LABEL: test_mm_subs_epi16:
3424; X32: # BB#0:
3425; X32-NEXT: psubsw %xmm1, %xmm0
3426; X32-NEXT: retl
3427;
3428; X64-LABEL: test_mm_subs_epi16:
3429; X64: # BB#0:
3430; X64-NEXT: psubsw %xmm1, %xmm0
3431; X64-NEXT: retq
3432 %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
3433 %arg1 = bitcast <2 x i64> %a1 to <8 x i16>
3434 %res = call <8 x i16> @llvm.x86.sse2.psubs.w(<8 x i16> %arg0, <8 x i16> %arg1)
3435 %bc = bitcast <8 x i16> %res to <2 x i64>
3436 ret <2 x i64> %bc
3437}
3438declare <8 x i16> @llvm.x86.sse2.psubs.w(<8 x i16>, <8 x i16>) nounwind readnone
3439
3440define <2 x i64> @test_mm_subs_epu8(<2 x i64> %a0, <2 x i64> %a1) nounwind {
3441; X32-LABEL: test_mm_subs_epu8:
3442; X32: # BB#0:
3443; X32-NEXT: psubusb %xmm1, %xmm0
3444; X32-NEXT: retl
3445;
3446; X64-LABEL: test_mm_subs_epu8:
3447; X64: # BB#0:
3448; X64-NEXT: psubusb %xmm1, %xmm0
3449; X64-NEXT: retq
3450 %arg0 = bitcast <2 x i64> %a0 to <16 x i8>
3451 %arg1 = bitcast <2 x i64> %a1 to <16 x i8>
3452 %res = call <16 x i8> @llvm.x86.sse2.psubus.b(<16 x i8> %arg0, <16 x i8> %arg1)
3453 %bc = bitcast <16 x i8> %res to <2 x i64>
3454 ret <2 x i64> %bc
3455}
3456declare <16 x i8> @llvm.x86.sse2.psubus.b(<16 x i8>, <16 x i8>) nounwind readnone
3457
3458define <2 x i64> @test_mm_subs_epu16(<2 x i64> %a0, <2 x i64> %a1) nounwind {
3459; X32-LABEL: test_mm_subs_epu16:
3460; X32: # BB#0:
3461; X32-NEXT: psubusw %xmm1, %xmm0
3462; X32-NEXT: retl
3463;
3464; X64-LABEL: test_mm_subs_epu16:
3465; X64: # BB#0:
3466; X64-NEXT: psubusw %xmm1, %xmm0
3467; X64-NEXT: retq
3468 %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
3469 %arg1 = bitcast <2 x i64> %a1 to <8 x i16>
3470 %res = call <8 x i16> @llvm.x86.sse2.psubus.w(<8 x i16> %arg0, <8 x i16> %arg1)
3471 %bc = bitcast <8 x i16> %res to <2 x i64>
3472 ret <2 x i64> %bc
3473}
3474declare <8 x i16> @llvm.x86.sse2.psubus.w(<8 x i16>, <8 x i16>) nounwind readnone
3475
3476define i32 @test_mm_ucomieq_sd(<2 x double> %a0, <2 x double> %a1) nounwind {
3477; X32-LABEL: test_mm_ucomieq_sd:
3478; X32: # BB#0:
3479; X32-NEXT: ucomisd %xmm1, %xmm0
3480; X32-NEXT: setnp %al
3481; X32-NEXT: sete %cl
3482; X32-NEXT: andb %al, %cl
3483; X32-NEXT: movzbl %cl, %eax
3484; X32-NEXT: retl
3485;
3486; X64-LABEL: test_mm_ucomieq_sd:
3487; X64: # BB#0:
3488; X64-NEXT: ucomisd %xmm1, %xmm0
3489; X64-NEXT: setnp %al
3490; X64-NEXT: sete %cl
3491; X64-NEXT: andb %al, %cl
3492; X64-NEXT: movzbl %cl, %eax
3493; X64-NEXT: retq
3494 %res = call i32 @llvm.x86.sse2.ucomieq.sd(<2 x double> %a0, <2 x double> %a1)
3495 ret i32 %res
3496}
3497declare i32 @llvm.x86.sse2.ucomieq.sd(<2 x double>, <2 x double>) nounwind readnone
3498
3499define i32 @test_mm_ucomige_sd(<2 x double> %a0, <2 x double> %a1) nounwind {
3500; X32-LABEL: test_mm_ucomige_sd:
3501; X32: # BB#0:
3502; X32-NEXT: ucomisd %xmm1, %xmm0
3503; X32-NEXT: setae %al
3504; X32-NEXT: movzbl %al, %eax
3505; X32-NEXT: retl
3506;
3507; X64-LABEL: test_mm_ucomige_sd:
3508; X64: # BB#0:
3509; X64-NEXT: ucomisd %xmm1, %xmm0
3510; X64-NEXT: setae %al
3511; X64-NEXT: movzbl %al, %eax
3512; X64-NEXT: retq
3513 %res = call i32 @llvm.x86.sse2.ucomige.sd(<2 x double> %a0, <2 x double> %a1)
3514 ret i32 %res
3515}
3516declare i32 @llvm.x86.sse2.ucomige.sd(<2 x double>, <2 x double>) nounwind readnone
3517
3518define i32 @test_mm_ucomigt_sd(<2 x double> %a0, <2 x double> %a1) nounwind {
3519; X32-LABEL: test_mm_ucomigt_sd:
3520; X32: # BB#0:
3521; X32-NEXT: ucomisd %xmm1, %xmm0
3522; X32-NEXT: seta %al
3523; X32-NEXT: movzbl %al, %eax
3524; X32-NEXT: retl
3525;
3526; X64-LABEL: test_mm_ucomigt_sd:
3527; X64: # BB#0:
3528; X64-NEXT: ucomisd %xmm1, %xmm0
3529; X64-NEXT: seta %al
3530; X64-NEXT: movzbl %al, %eax
3531; X64-NEXT: retq
3532 %res = call i32 @llvm.x86.sse2.ucomigt.sd(<2 x double> %a0, <2 x double> %a1)
3533 ret i32 %res
3534}
3535declare i32 @llvm.x86.sse2.ucomigt.sd(<2 x double>, <2 x double>) nounwind readnone
3536
3537define i32 @test_mm_ucomile_sd(<2 x double> %a0, <2 x double> %a1) nounwind {
3538; X32-LABEL: test_mm_ucomile_sd:
3539; X32: # BB#0:
3540; X32-NEXT: ucomisd %xmm0, %xmm1
3541; X32-NEXT: setae %al
3542; X32-NEXT: movzbl %al, %eax
3543; X32-NEXT: retl
3544;
3545; X64-LABEL: test_mm_ucomile_sd:
3546; X64: # BB#0:
3547; X64-NEXT: ucomisd %xmm0, %xmm1
3548; X64-NEXT: setae %al
3549; X64-NEXT: movzbl %al, %eax
3550; X64-NEXT: retq
3551 %res = call i32 @llvm.x86.sse2.ucomile.sd(<2 x double> %a0, <2 x double> %a1)
3552 ret i32 %res
3553}
3554declare i32 @llvm.x86.sse2.ucomile.sd(<2 x double>, <2 x double>) nounwind readnone
3555
3556define i32 @test_mm_ucomilt_sd(<2 x double> %a0, <2 x double> %a1) nounwind {
3557; X32-LABEL: test_mm_ucomilt_sd:
3558; X32: # BB#0:
3559; X32-NEXT: ucomisd %xmm0, %xmm1
3560; X32-NEXT: seta %al
3561; X32-NEXT: movzbl %al, %eax
3562; X32-NEXT: retl
3563;
3564; X64-LABEL: test_mm_ucomilt_sd:
3565; X64: # BB#0:
3566; X64-NEXT: ucomisd %xmm0, %xmm1
3567; X64-NEXT: seta %al
3568; X64-NEXT: movzbl %al, %eax
3569; X64-NEXT: retq
3570 %res = call i32 @llvm.x86.sse2.ucomilt.sd(<2 x double> %a0, <2 x double> %a1)
3571 ret i32 %res
3572}
3573declare i32 @llvm.x86.sse2.ucomilt.sd(<2 x double>, <2 x double>) nounwind readnone
3574
3575define i32 @test_mm_ucomineq_sd(<2 x double> %a0, <2 x double> %a1) nounwind {
3576; X32-LABEL: test_mm_ucomineq_sd:
3577; X32: # BB#0:
3578; X32-NEXT: ucomisd %xmm1, %xmm0
3579; X32-NEXT: setp %al
3580; X32-NEXT: setne %cl
3581; X32-NEXT: orb %al, %cl
3582; X32-NEXT: movzbl %cl, %eax
3583; X32-NEXT: retl
3584;
3585; X64-LABEL: test_mm_ucomineq_sd:
3586; X64: # BB#0:
3587; X64-NEXT: ucomisd %xmm1, %xmm0
3588; X64-NEXT: setp %al
3589; X64-NEXT: setne %cl
3590; X64-NEXT: orb %al, %cl
3591; X64-NEXT: movzbl %cl, %eax
3592; X64-NEXT: retq
3593 %res = call i32 @llvm.x86.sse2.ucomineq.sd(<2 x double> %a0, <2 x double> %a1)
3594 ret i32 %res
3595}
3596declare i32 @llvm.x86.sse2.ucomineq.sd(<2 x double>, <2 x double>) nounwind readnone
3597
3598define <2 x double> @test_mm_undefined_pd() {
3599; X32-LABEL: test_mm_undefined_pd:
3600; X32: # BB#0:
3601; X32-NEXT: retl
3602;
3603; X64-LABEL: test_mm_undefined_pd:
3604; X64: # BB#0:
3605; X64-NEXT: retq
3606 ret <2 x double> undef
3607}
3608
3609define <2 x i64> @test_mm_undefined_si128() {
3610; X32-LABEL: test_mm_undefined_si128:
3611; X32: # BB#0:
3612; X32-NEXT: retl
3613;
3614; X64-LABEL: test_mm_undefined_si128:
3615; X64: # BB#0:
3616; X64-NEXT: retq
3617 ret <2 x i64> undef
3618}
3619
3620define <2 x i64> @test_mm_unpackhi_epi8(<2 x i64> %a0, <2 x i64> %a1) {
3621; X32-LABEL: test_mm_unpackhi_epi8:
3622; X32: # BB#0:
3623; X32-NEXT: punpckhbw {{.*#+}} xmm0 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15]
3624; X32-NEXT: retl
3625;
3626; X64-LABEL: test_mm_unpackhi_epi8:
3627; X64: # BB#0:
3628; X64-NEXT: punpckhbw {{.*#+}} xmm0 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15]
3629; X64-NEXT: retq
3630 %arg0 = bitcast <2 x i64> %a0 to <16 x i8>
3631 %arg1 = bitcast <2 x i64> %a1 to <16 x i8>
3632 %res = shufflevector <16 x i8> %arg0, <16 x i8> %arg1, <16 x i32> <i32 8, i32 24, i32 9, i32 25, i32 10, i32 26, i32 11, i32 27, i32 12, i32 28, i32 13, i32 29, i32 14, i32 30, i32 15, i32 31>
3633 %bc = bitcast <16 x i8> %res to <2 x i64>
3634 ret <2 x i64> %bc
3635}
3636
3637define <2 x i64> @test_mm_unpackhi_epi16(<2 x i64> %a0, <2 x i64> %a1) {
3638; X32-LABEL: test_mm_unpackhi_epi16:
3639; X32: # BB#0:
3640; X32-NEXT: punpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
3641; X32-NEXT: retl
3642;
3643; X64-LABEL: test_mm_unpackhi_epi16:
3644; X64: # BB#0:
3645; X64-NEXT: punpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
3646; X64-NEXT: retq
3647 %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
3648 %arg1 = bitcast <2 x i64> %a1 to <8 x i16>
3649 %res = shufflevector <8 x i16> %arg0, <8 x i16> %arg1, <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
3650 %bc = bitcast <8 x i16> %res to <2 x i64>
3651 ret <2 x i64> %bc
3652}
3653
3654define <2 x i64> @test_mm_unpackhi_epi32(<2 x i64> %a0, <2 x i64> %a1) {
3655; X32-LABEL: test_mm_unpackhi_epi32:
3656; X32: # BB#0:
3657; X32-NEXT: punpckhdq {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
3658; X32-NEXT: retl
3659;
3660; X64-LABEL: test_mm_unpackhi_epi32:
3661; X64: # BB#0:
3662; X64-NEXT: punpckhdq {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
3663; X64-NEXT: retq
3664 %arg0 = bitcast <2 x i64> %a0 to <4 x i32>
3665 %arg1 = bitcast <2 x i64> %a1 to <4 x i32>
3666 %res = shufflevector <4 x i32> %arg0,<4 x i32> %arg1, <4 x i32> <i32 2, i32 6, i32 3, i32 7>
3667 %bc = bitcast <4 x i32> %res to <2 x i64>
3668 ret <2 x i64> %bc
3669}
3670
3671define <2 x i64> @test_mm_unpackhi_epi64(<2 x i64> %a0, <2 x i64> %a1) {
3672; X32-LABEL: test_mm_unpackhi_epi64:
3673; X32: # BB#0:
3674; X32-NEXT: punpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1]
3675; X32-NEXT: retl
3676;
3677; X64-LABEL: test_mm_unpackhi_epi64:
3678; X64: # BB#0:
3679; X64-NEXT: punpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1]
3680; X64-NEXT: retq
3681 %res = shufflevector <2 x i64> %a0, <2 x i64> %a1, <2 x i32> <i32 1, i32 3>
3682 ret <2 x i64> %res
3683}
3684
3685define <2 x double> @test_mm_unpackhi_pd(<2 x double> %a0, <2 x double> %a1) {
3686; X32-LABEL: test_mm_unpackhi_pd:
3687; X32: # BB#0:
3688; X32-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1]
3689; X32-NEXT: retl
3690;
3691; X64-LABEL: test_mm_unpackhi_pd:
3692; X64: # BB#0:
3693; X64-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1]
3694; X64-NEXT: retq
3695 %res = shufflevector <2 x double> %a0, <2 x double> %a1, <2 x i32> <i32 1, i32 3>
3696 ret <2 x double> %res
3697}
3698
3699define <2 x i64> @test_mm_unpacklo_epi8(<2 x i64> %a0, <2 x i64> %a1) {
3700; X32-LABEL: test_mm_unpacklo_epi8:
3701; X32: # BB#0:
3702; X32-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
3703; X32-NEXT: retl
3704;
3705; X64-LABEL: test_mm_unpacklo_epi8:
3706; X64: # BB#0:
3707; X64-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
3708; X64-NEXT: retq
3709 %arg0 = bitcast <2 x i64> %a0 to <16 x i8>
3710 %arg1 = bitcast <2 x i64> %a1 to <16 x i8>
3711 %res = shufflevector <16 x i8> %arg0, <16 x i8> %arg1, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19, i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23>
3712 %bc = bitcast <16 x i8> %res to <2 x i64>
3713 ret <2 x i64> %bc
3714}
3715
3716define <2 x i64> @test_mm_unpacklo_epi16(<2 x i64> %a0, <2 x i64> %a1) {
3717; X32-LABEL: test_mm_unpacklo_epi16:
3718; X32: # BB#0:
3719; X32-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
3720; X32-NEXT: retl
3721;
3722; X64-LABEL: test_mm_unpacklo_epi16:
3723; X64: # BB#0:
3724; X64-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
3725; X64-NEXT: retq
3726 %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
3727 %arg1 = bitcast <2 x i64> %a1 to <8 x i16>
3728 %res = shufflevector <8 x i16> %arg0, <8 x i16> %arg1, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
3729 %bc = bitcast <8 x i16> %res to <2 x i64>
3730 ret <2 x i64> %bc
3731}
3732
3733define <2 x i64> @test_mm_unpacklo_epi32(<2 x i64> %a0, <2 x i64> %a1) {
3734; X32-LABEL: test_mm_unpacklo_epi32:
3735; X32: # BB#0:
3736; X32-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
3737; X32-NEXT: retl
3738;
3739; X64-LABEL: test_mm_unpacklo_epi32:
3740; X64: # BB#0:
3741; X64-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
3742; X64-NEXT: retq
3743 %arg0 = bitcast <2 x i64> %a0 to <4 x i32>
3744 %arg1 = bitcast <2 x i64> %a1 to <4 x i32>
3745 %res = shufflevector <4 x i32> %arg0,<4 x i32> %arg1, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
3746 %bc = bitcast <4 x i32> %res to <2 x i64>
3747 ret <2 x i64> %bc
3748}
3749
3750define <2 x i64> @test_mm_unpacklo_epi64(<2 x i64> %a0, <2 x i64> %a1) {
3751; X32-LABEL: test_mm_unpacklo_epi64:
3752; X32: # BB#0:
3753; X32-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
3754; X32-NEXT: retl
3755;
3756; X64-LABEL: test_mm_unpacklo_epi64:
3757; X64: # BB#0:
3758; X64-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
3759; X64-NEXT: retq
3760 %res = shufflevector <2 x i64> %a0, <2 x i64> %a1, <2 x i32> <i32 0, i32 2>
3761 ret <2 x i64> %res
3762}
3763
3764define <2 x double> @test_mm_unpacklo_pd(<2 x double> %a0, <2 x double> %a1) {
3765; X32-LABEL: test_mm_unpacklo_pd:
3766; X32: # BB#0:
3767; X32-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
3768; X32-NEXT: retl
3769;
3770; X64-LABEL: test_mm_unpacklo_pd:
3771; X64: # BB#0:
3772; X64-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
3773; X64-NEXT: retq
3774 %res = shufflevector <2 x double> %a0, <2 x double> %a1, <2 x i32> <i32 0, i32 2>
3775 ret <2 x double> %res
3776}
3777
3778define <2 x double> @test_mm_xor_pd(<2 x double> %a0, <2 x double> %a1) nounwind {
3779; X32-LABEL: test_mm_xor_pd:
3780; X32: # BB#0:
3781; X32-NEXT: xorps %xmm1, %xmm0
3782; X32-NEXT: retl
3783;
3784; X64-LABEL: test_mm_xor_pd:
3785; X64: # BB#0:
3786; X64-NEXT: xorps %xmm1, %xmm0
3787; X64-NEXT: retq
3788 %arg0 = bitcast <2 x double> %a0 to <4 x i32>
3789 %arg1 = bitcast <2 x double> %a1 to <4 x i32>
3790 %res = xor <4 x i32> %arg0, %arg1
3791 %bc = bitcast <4 x i32> %res to <2 x double>
3792 ret <2 x double> %bc
3793}
3794
3795define <2 x i64> @test_mm_xor_si128(<2 x i64> %a0, <2 x i64> %a1) nounwind {
3796; X32-LABEL: test_mm_xor_si128:
3797; X32: # BB#0:
3798; X32-NEXT: xorps %xmm1, %xmm0
3799; X32-NEXT: retl
3800;
3801; X64-LABEL: test_mm_xor_si128:
3802; X64: # BB#0:
3803; X64-NEXT: xorps %xmm1, %xmm0
3804; X64-NEXT: retq
3805 %res = xor <2 x i64> %a0, %a1
3806 ret <2 x i64> %res
3807}
3808
3809!0 = !{i32 1}
3810