blob: f8491dcd67840eb8142bb2c7334432613fc1360e [file] [log] [blame]
Simon Pilgrim5a0d7282016-05-18 18:00:43 +00001; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -fast-isel -mtriple=i386-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefix=ALL --check-prefix=X32
3; RUN: llc < %s -fast-isel -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefix=ALL --check-prefix=X64
4
5; NOTE: This should use IR equivalent to what is generated by clang/test/CodeGen/sse2-builtins.c
6
7define <2 x i64> @test_mm_add_epi8(<2 x i64> %a0, <2 x i64> %a1) nounwind {
8; X32-LABEL: test_mm_add_epi8:
9; X32: # BB#0:
10; X32-NEXT: paddb %xmm1, %xmm0
11; X32-NEXT: retl
12;
13; X64-LABEL: test_mm_add_epi8:
14; X64: # BB#0:
15; X64-NEXT: paddb %xmm1, %xmm0
16; X64-NEXT: retq
17 %arg0 = bitcast <2 x i64> %a0 to <16 x i8>
18 %arg1 = bitcast <2 x i64> %a1 to <16 x i8>
19 %res = add <16 x i8> %arg0, %arg1
20 %bc = bitcast <16 x i8> %res to <2 x i64>
21 ret <2 x i64> %bc
22}
23
24define <2 x i64> @test_mm_add_epi16(<2 x i64> %a0, <2 x i64> %a1) nounwind {
25; X32-LABEL: test_mm_add_epi16:
26; X32: # BB#0:
27; X32-NEXT: paddw %xmm1, %xmm0
28; X32-NEXT: retl
29;
30; X64-LABEL: test_mm_add_epi16:
31; X64: # BB#0:
32; X64-NEXT: paddw %xmm1, %xmm0
33; X64-NEXT: retq
34 %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
35 %arg1 = bitcast <2 x i64> %a1 to <8 x i16>
36 %res = add <8 x i16> %arg0, %arg1
37 %bc = bitcast <8 x i16> %res to <2 x i64>
38 ret <2 x i64> %bc
39}
40
41define <2 x i64> @test_mm_add_epi32(<2 x i64> %a0, <2 x i64> %a1) nounwind {
42; X32-LABEL: test_mm_add_epi32:
43; X32: # BB#0:
44; X32-NEXT: paddd %xmm1, %xmm0
45; X32-NEXT: retl
46;
47; X64-LABEL: test_mm_add_epi32:
48; X64: # BB#0:
49; X64-NEXT: paddd %xmm1, %xmm0
50; X64-NEXT: retq
51 %arg0 = bitcast <2 x i64> %a0 to <4 x i32>
52 %arg1 = bitcast <2 x i64> %a1 to <4 x i32>
53 %res = add <4 x i32> %arg0, %arg1
54 %bc = bitcast <4 x i32> %res to <2 x i64>
55 ret <2 x i64> %bc
56}
57
58define <2 x i64> @test_mm_add_epi64(<2 x i64> %a0, <2 x i64> %a1) nounwind {
59; X32-LABEL: test_mm_add_epi64:
60; X32: # BB#0:
61; X32-NEXT: paddq %xmm1, %xmm0
62; X32-NEXT: retl
63;
64; X64-LABEL: test_mm_add_epi64:
65; X64: # BB#0:
66; X64-NEXT: paddq %xmm1, %xmm0
67; X64-NEXT: retq
68 %res = add <2 x i64> %a0, %a1
69 ret <2 x i64> %res
70}
71
72define <2 x double> @test_mm_add_pd(<2 x double> %a0, <2 x double> %a1) nounwind {
73; X32-LABEL: test_mm_add_pd:
74; X32: # BB#0:
75; X32-NEXT: addpd %xmm1, %xmm0
76; X32-NEXT: retl
77;
78; X64-LABEL: test_mm_add_pd:
79; X64: # BB#0:
80; X64-NEXT: addpd %xmm1, %xmm0
81; X64-NEXT: retq
82 %res = fadd <2 x double> %a0, %a1
83 ret <2 x double> %res
84}
85
86define <2 x double> @test_mm_add_sd(<2 x double> %a0, <2 x double> %a1) nounwind {
87; X32-LABEL: test_mm_add_sd:
88; X32: # BB#0:
89; X32-NEXT: addsd %xmm1, %xmm0
90; X32-NEXT: retl
91;
92; X64-LABEL: test_mm_add_sd:
93; X64: # BB#0:
94; X64-NEXT: addsd %xmm1, %xmm0
95; X64-NEXT: retq
96 %ext0 = extractelement <2 x double> %a0, i32 0
97 %ext1 = extractelement <2 x double> %a1, i32 0
98 %fadd = fadd double %ext0, %ext1
99 %res = insertelement <2 x double> %a0, double %fadd, i32 0
100 ret <2 x double> %res
101}
102
103define <2 x i64> @test_mm_adds_epi8(<2 x i64> %a0, <2 x i64> %a1) nounwind {
104; X32-LABEL: test_mm_adds_epi8:
105; X32: # BB#0:
106; X32-NEXT: paddsb %xmm1, %xmm0
107; X32-NEXT: retl
108;
109; X64-LABEL: test_mm_adds_epi8:
110; X64: # BB#0:
111; X64-NEXT: paddsb %xmm1, %xmm0
112; X64-NEXT: retq
113 %arg0 = bitcast <2 x i64> %a0 to <16 x i8>
114 %arg1 = bitcast <2 x i64> %a1 to <16 x i8>
115 %res = call <16 x i8> @llvm.x86.sse2.padds.b(<16 x i8> %arg0, <16 x i8> %arg1)
116 %bc = bitcast <16 x i8> %res to <2 x i64>
117 ret <2 x i64> %bc
118}
119declare <16 x i8> @llvm.x86.sse2.padds.b(<16 x i8>, <16 x i8>) nounwind readnone
120
121define <2 x i64> @test_mm_adds_epi16(<2 x i64> %a0, <2 x i64> %a1) nounwind {
122; X32-LABEL: test_mm_adds_epi16:
123; X32: # BB#0:
124; X32-NEXT: paddsw %xmm1, %xmm0
125; X32-NEXT: retl
126;
127; X64-LABEL: test_mm_adds_epi16:
128; X64: # BB#0:
129; X64-NEXT: paddsw %xmm1, %xmm0
130; X64-NEXT: retq
131 %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
132 %arg1 = bitcast <2 x i64> %a1 to <8 x i16>
133 %res = call <8 x i16> @llvm.x86.sse2.padds.w(<8 x i16> %arg0, <8 x i16> %arg1)
134 %bc = bitcast <8 x i16> %res to <2 x i64>
135 ret <2 x i64> %bc
136}
137declare <8 x i16> @llvm.x86.sse2.padds.w(<8 x i16>, <8 x i16>) nounwind readnone
138
139define <2 x i64> @test_mm_adds_epu8(<2 x i64> %a0, <2 x i64> %a1) nounwind {
140; X32-LABEL: test_mm_adds_epu8:
141; X32: # BB#0:
142; X32-NEXT: paddusb %xmm1, %xmm0
143; X32-NEXT: retl
144;
145; X64-LABEL: test_mm_adds_epu8:
146; X64: # BB#0:
147; X64-NEXT: paddusb %xmm1, %xmm0
148; X64-NEXT: retq
149 %arg0 = bitcast <2 x i64> %a0 to <16 x i8>
150 %arg1 = bitcast <2 x i64> %a1 to <16 x i8>
151 %res = call <16 x i8> @llvm.x86.sse2.paddus.b(<16 x i8> %arg0, <16 x i8> %arg1)
152 %bc = bitcast <16 x i8> %res to <2 x i64>
153 ret <2 x i64> %bc
154}
155declare <16 x i8> @llvm.x86.sse2.paddus.b(<16 x i8>, <16 x i8>) nounwind readnone
156
157define <2 x i64> @test_mm_adds_epu16(<2 x i64> %a0, <2 x i64> %a1) nounwind {
158; X32-LABEL: test_mm_adds_epu16:
159; X32: # BB#0:
160; X32-NEXT: paddusw %xmm1, %xmm0
161; X32-NEXT: retl
162;
163; X64-LABEL: test_mm_adds_epu16:
164; X64: # BB#0:
165; X64-NEXT: paddusw %xmm1, %xmm0
166; X64-NEXT: retq
167 %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
168 %arg1 = bitcast <2 x i64> %a1 to <8 x i16>
169 %res = call <8 x i16> @llvm.x86.sse2.paddus.w(<8 x i16> %arg0, <8 x i16> %arg1)
170 %bc = bitcast <8 x i16> %res to <2 x i64>
171 ret <2 x i64> %bc
172}
173declare <8 x i16> @llvm.x86.sse2.paddus.w(<8 x i16>, <8 x i16>) nounwind readnone
174
175define <2 x double> @test_mm_and_pd(<2 x double> %a0, <2 x double> %a1) nounwind {
176; X32-LABEL: test_mm_and_pd:
177; X32: # BB#0:
178; X32-NEXT: andps %xmm1, %xmm0
179; X32-NEXT: retl
180;
181; X64-LABEL: test_mm_and_pd:
182; X64: # BB#0:
183; X64-NEXT: andps %xmm1, %xmm0
184; X64-NEXT: retq
185 %arg0 = bitcast <2 x double> %a0 to <4 x i32>
186 %arg1 = bitcast <2 x double> %a1 to <4 x i32>
187 %res = and <4 x i32> %arg0, %arg1
188 %bc = bitcast <4 x i32> %res to <2 x double>
189 ret <2 x double> %bc
190}
191
192define <2 x i64> @test_mm_and_si128(<2 x i64> %a0, <2 x i64> %a1) nounwind {
193; X32-LABEL: test_mm_and_si128:
194; X32: # BB#0:
195; X32-NEXT: andps %xmm1, %xmm0
196; X32-NEXT: retl
197;
198; X64-LABEL: test_mm_and_si128:
199; X64: # BB#0:
200; X64-NEXT: andps %xmm1, %xmm0
201; X64-NEXT: retq
202 %res = and <2 x i64> %a0, %a1
203 ret <2 x i64> %res
204}
205
206define <2 x double> @test_mm_andnot_pd(<2 x double> %a0, <2 x double> %a1) nounwind {
207; X32-LABEL: test_mm_andnot_pd:
208; X32: # BB#0:
209; X32-NEXT: andnps %xmm1, %xmm0
210; X32-NEXT: retl
211;
212; X64-LABEL: test_mm_andnot_pd:
213; X64: # BB#0:
214; X64-NEXT: andnps %xmm1, %xmm0
215; X64-NEXT: retq
216 %arg0 = bitcast <2 x double> %a0 to <4 x i32>
217 %arg1 = bitcast <2 x double> %a1 to <4 x i32>
218 %not = xor <4 x i32> %arg0, <i32 -1, i32 -1, i32 -1, i32 -1>
219 %res = and <4 x i32> %not, %arg1
220 %bc = bitcast <4 x i32> %res to <2 x double>
221 ret <2 x double> %bc
222}
223
224define <2 x i64> @test_mm_andnot_si128(<2 x i64> %a0, <2 x i64> %a1) nounwind {
225; X32-LABEL: test_mm_andnot_si128:
226; X32: # BB#0:
227; X32-NEXT: pcmpeqd %xmm2, %xmm2
228; X32-NEXT: pxor %xmm2, %xmm0
229; X32-NEXT: pand %xmm1, %xmm0
230; X32-NEXT: retl
231;
232; X64-LABEL: test_mm_andnot_si128:
233; X64: # BB#0:
234; X64-NEXT: pcmpeqd %xmm2, %xmm2
235; X64-NEXT: pxor %xmm2, %xmm0
236; X64-NEXT: pand %xmm1, %xmm0
237; X64-NEXT: retq
238 %not = xor <2 x i64> %a0, <i64 -1, i64 -1>
239 %res = and <2 x i64> %not, %a1
240 ret <2 x i64> %res
241}
242
243define <2 x i64> @test_mm_avg_epu8(<2 x i64> %a0, <2 x i64> %a1) nounwind {
244; X32-LABEL: test_mm_avg_epu8:
245; X32: # BB#0:
246; X32-NEXT: pavgb %xmm1, %xmm0
247; X32-NEXT: retl
248;
249; X64-LABEL: test_mm_avg_epu8:
250; X64: # BB#0:
251; X64-NEXT: pavgb %xmm1, %xmm0
252; X64-NEXT: retq
253 %arg0 = bitcast <2 x i64> %a0 to <16 x i8>
254 %arg1 = bitcast <2 x i64> %a1 to <16 x i8>
255 %res = call <16 x i8> @llvm.x86.sse2.pavg.b(<16 x i8> %arg0, <16 x i8> %arg1)
256 %bc = bitcast <16 x i8> %res to <2 x i64>
257 ret <2 x i64> %bc
258}
259declare <16 x i8> @llvm.x86.sse2.pavg.b(<16 x i8> %arg0, <16 x i8> %arg1) nounwind readnone
260
261define <2 x i64> @test_mm_avg_epu16(<2 x i64> %a0, <2 x i64> %a1) nounwind {
262; X32-LABEL: test_mm_avg_epu16:
263; X32: # BB#0:
264; X32-NEXT: pavgw %xmm1, %xmm0
265; X32-NEXT: retl
266;
267; X64-LABEL: test_mm_avg_epu16:
268; X64: # BB#0:
269; X64-NEXT: pavgw %xmm1, %xmm0
270; X64-NEXT: retq
271 %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
272 %arg1 = bitcast <2 x i64> %a1 to <8 x i16>
273 %res = call <8 x i16> @llvm.x86.sse2.pavg.w(<8 x i16> %arg0, <8 x i16> %arg1)
274 %bc = bitcast <8 x i16> %res to <2 x i64>
275 ret <2 x i64> %bc
276}
277declare <8 x i16> @llvm.x86.sse2.pavg.w(<8 x i16>, <8 x i16>) nounwind readnone
278
279define <2 x i64> @test_mm_bslli_si128(<2 x i64> %a0) nounwind {
280; X32-LABEL: test_mm_bslli_si128:
281; X32: # BB#0:
282; X32-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10]
283; X32-NEXT: retl
284;
285; X64-LABEL: test_mm_bslli_si128:
286; X64: # BB#0:
287; X64-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10]
288; X64-NEXT: retq
289 %arg0 = bitcast <2 x i64> %a0 to <16 x i8>
290 %res = shufflevector <16 x i8> zeroinitializer, <16 x i8> %arg0, <16 x i32> <i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26>
291 %bc = bitcast <16 x i8> %res to <2 x i64>
292 ret <2 x i64> %bc
293}
294
295define <2 x i64> @test_mm_bsrli_si128(<2 x i64> %a0) nounwind {
296; X32-LABEL: test_mm_bsrli_si128:
297; X32: # BB#0:
298; X32-NEXT: psrldq {{.*#+}} xmm0 = xmm0[5,6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero
299; X32-NEXT: retl
300;
301; X64-LABEL: test_mm_bsrli_si128:
302; X64: # BB#0:
303; X64-NEXT: psrldq {{.*#+}} xmm0 = xmm0[5,6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero
304; X64-NEXT: retq
305 %arg0 = bitcast <2 x i64> %a0 to <16 x i8>
306 %res = shufflevector <16 x i8> %arg0, <16 x i8> zeroinitializer, <16 x i32> <i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20>
307 %bc = bitcast <16 x i8> %res to <2 x i64>
308 ret <2 x i64> %bc
309}
310
Simon Pilgrim01809e02016-05-19 10:58:54 +0000311define <4 x float> @test_mm_castpd_ps(<2 x double> %a0) nounwind {
312; X32-LABEL: test_mm_castpd_ps:
313; X32: # BB#0:
314; X32-NEXT: retl
315;
316; X64-LABEL: test_mm_castpd_ps:
317; X64: # BB#0:
318; X64-NEXT: retq
319 %res = bitcast <2 x double> %a0 to <4 x float>
320 ret <4 x float> %res
321}
322
323define <2 x i64> @test_mm_castpd_si128(<2 x double> %a0) nounwind {
324; X32-LABEL: test_mm_castpd_si128:
325; X32: # BB#0:
326; X32-NEXT: retl
327;
328; X64-LABEL: test_mm_castpd_si128:
329; X64: # BB#0:
330; X64-NEXT: retq
331 %res = bitcast <2 x double> %a0 to <2 x i64>
332 ret <2 x i64> %res
333}
334
335define <2 x double> @test_mm_castps_pd(<4 x float> %a0) nounwind {
336; X32-LABEL: test_mm_castps_pd:
337; X32: # BB#0:
338; X32-NEXT: retl
339;
340; X64-LABEL: test_mm_castps_pd:
341; X64: # BB#0:
342; X64-NEXT: retq
343 %res = bitcast <4 x float> %a0 to <2 x double>
344 ret <2 x double> %res
345}
346
347define <2 x i64> @test_mm_castps_si128(<4 x float> %a0) nounwind {
348; X32-LABEL: test_mm_castps_si128:
349; X32: # BB#0:
350; X32-NEXT: retl
351;
352; X64-LABEL: test_mm_castps_si128:
353; X64: # BB#0:
354; X64-NEXT: retq
355 %res = bitcast <4 x float> %a0 to <2 x i64>
356 ret <2 x i64> %res
357}
358
359define <2 x double> @test_mm_castsi128_pd(<2 x i64> %a0) nounwind {
360; X32-LABEL: test_mm_castsi128_pd:
361; X32: # BB#0:
362; X32-NEXT: retl
363;
364; X64-LABEL: test_mm_castsi128_pd:
365; X64: # BB#0:
366; X64-NEXT: retq
367 %res = bitcast <2 x i64> %a0 to <2 x double>
368 ret <2 x double> %res
369}
370
371define <4 x float> @test_mm_castsi128_ps(<2 x i64> %a0) nounwind {
372; X32-LABEL: test_mm_castsi128_ps:
373; X32: # BB#0:
374; X32-NEXT: retl
375;
376; X64-LABEL: test_mm_castsi128_ps:
377; X64: # BB#0:
378; X64-NEXT: retq
379 %res = bitcast <2 x i64> %a0 to <4 x float>
380 ret <4 x float> %res
381}
382
Simon Pilgrim5a0d7282016-05-18 18:00:43 +0000383define void @test_mm_clflush(i8* %a0) nounwind {
384; X32-LABEL: test_mm_clflush:
385; X32: # BB#0:
386; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
387; X32-NEXT: clflush (%eax)
388; X32-NEXT: retl
389;
390; X64-LABEL: test_mm_clflush:
391; X64: # BB#0:
392; X64-NEXT: clflush (%rdi)
393; X64-NEXT: retq
394 call void @llvm.x86.sse2.clflush(i8* %a0)
395 ret void
396}
397declare void @llvm.x86.sse2.clflush(i8*) nounwind readnone
398
399define <2 x i64> @test_mm_cmpeq_epi8(<2 x i64> %a0, <2 x i64> %a1) nounwind {
400; X32-LABEL: test_mm_cmpeq_epi8:
401; X32: # BB#0:
402; X32-NEXT: pcmpeqb %xmm1, %xmm0
403; X32-NEXT: retl
404;
405; X64-LABEL: test_mm_cmpeq_epi8:
406; X64: # BB#0:
407; X64-NEXT: pcmpeqb %xmm1, %xmm0
408; X64-NEXT: retq
409 %arg0 = bitcast <2 x i64> %a0 to <16 x i8>
410 %arg1 = bitcast <2 x i64> %a1 to <16 x i8>
411 %cmp = icmp eq <16 x i8> %arg0, %arg1
412 %res = sext <16 x i1> %cmp to <16 x i8>
413 %bc = bitcast <16 x i8> %res to <2 x i64>
414 ret <2 x i64> %bc
415}
416
417define <2 x i64> @test_mm_cmpeq_epi16(<2 x i64> %a0, <2 x i64> %a1) nounwind {
418; X32-LABEL: test_mm_cmpeq_epi16:
419; X32: # BB#0:
420; X32-NEXT: pcmpeqw %xmm1, %xmm0
421; X32-NEXT: retl
422;
423; X64-LABEL: test_mm_cmpeq_epi16:
424; X64: # BB#0:
425; X64-NEXT: pcmpeqw %xmm1, %xmm0
426; X64-NEXT: retq
427 %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
428 %arg1 = bitcast <2 x i64> %a1 to <8 x i16>
429 %cmp = icmp eq <8 x i16> %arg0, %arg1
430 %res = sext <8 x i1> %cmp to <8 x i16>
431 %bc = bitcast <8 x i16> %res to <2 x i64>
432 ret <2 x i64> %bc
433}
434
435define <2 x i64> @test_mm_cmpeq_epi32(<2 x i64> %a0, <2 x i64> %a1) nounwind {
436; X32-LABEL: test_mm_cmpeq_epi32:
437; X32: # BB#0:
438; X32-NEXT: pcmpeqd %xmm1, %xmm0
439; X32-NEXT: retl
440;
441; X64-LABEL: test_mm_cmpeq_epi32:
442; X64: # BB#0:
443; X64-NEXT: pcmpeqd %xmm1, %xmm0
444; X64-NEXT: retq
445 %arg0 = bitcast <2 x i64> %a0 to <4 x i32>
446 %arg1 = bitcast <2 x i64> %a1 to <4 x i32>
447 %cmp = icmp eq <4 x i32> %arg0, %arg1
448 %res = sext <4 x i1> %cmp to <4 x i32>
449 %bc = bitcast <4 x i32> %res to <2 x i64>
450 ret <2 x i64> %bc
451}
452
453define <2 x double> @test_mm_cmpeq_pd(<2 x double> %a0, <2 x double> %a1) nounwind {
454; X32-LABEL: test_mm_cmpeq_pd:
455; X32: # BB#0:
456; X32-NEXT: cmpeqpd %xmm1, %xmm0
457; X32-NEXT: retl
458;
459; X64-LABEL: test_mm_cmpeq_pd:
460; X64: # BB#0:
461; X64-NEXT: cmpeqpd %xmm1, %xmm0
462; X64-NEXT: retq
463 %res = call <2 x double> @llvm.x86.sse2.cmp.pd(<2 x double> %a0, <2 x double> %a1, i8 0)
464 ret <2 x double> %res
465}
466declare <2 x double> @llvm.x86.sse2.cmp.pd(<2 x double>, <2 x double>, i8) nounwind readnone
467
468define <2 x double> @test_mm_cmpeq_sd(<2 x double> %a0, <2 x double> %a1) nounwind {
469; X32-LABEL: test_mm_cmpeq_sd:
470; X32: # BB#0:
471; X32-NEXT: cmpeqsd %xmm1, %xmm0
472; X32-NEXT: retl
473;
474; X64-LABEL: test_mm_cmpeq_sd:
475; X64: # BB#0:
476; X64-NEXT: cmpeqsd %xmm1, %xmm0
477; X64-NEXT: retq
478 %res = call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %a0, <2 x double> %a1, i8 0)
479 ret <2 x double> %res
480}
481declare <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double>, <2 x double>, i8) nounwind readnone
482
483define <2 x double> @test_mm_cmpge_pd(<2 x double> %a0, <2 x double> %a1) nounwind {
484; X32-LABEL: test_mm_cmpge_pd:
485; X32: # BB#0:
486; X32-NEXT: cmplepd %xmm0, %xmm1
487; X32-NEXT: movapd %xmm1, %xmm0
488; X32-NEXT: retl
489;
490; X64-LABEL: test_mm_cmpge_pd:
491; X64: # BB#0:
492; X64-NEXT: cmplepd %xmm0, %xmm1
493; X64-NEXT: movapd %xmm1, %xmm0
494; X64-NEXT: retq
495 %res = call <2 x double> @llvm.x86.sse2.cmp.pd(<2 x double> %a1, <2 x double> %a0, i8 2)
496 ret <2 x double> %res
497}
498
499define <2 x double> @test_mm_cmpge_sd(<2 x double> %a0, <2 x double> %a1) nounwind {
500; X32-LABEL: test_mm_cmpge_sd:
501; X32: # BB#0:
502; X32-NEXT: cmplesd %xmm0, %xmm1
503; X32-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
504; X32-NEXT: retl
505;
506; X64-LABEL: test_mm_cmpge_sd:
507; X64: # BB#0:
508; X64-NEXT: cmplesd %xmm0, %xmm1
509; X64-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
510; X64-NEXT: retq
511 %cmp = call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %a1, <2 x double> %a0, i8 2)
512 %ext0 = extractelement <2 x double> %cmp, i32 0
513 %ins0 = insertelement <2 x double> undef, double %ext0, i32 0
514 %ext1 = extractelement <2 x double> %a0, i32 1
515 %ins1 = insertelement <2 x double> %ins0, double %ext1, i32 1
516 ret <2 x double> %ins1
517}
518
519define <2 x i64> @test_mm_cmpgt_epi8(<2 x i64> %a0, <2 x i64> %a1) nounwind {
520; X32-LABEL: test_mm_cmpgt_epi8:
521; X32: # BB#0:
522; X32-NEXT: pcmpgtb %xmm1, %xmm0
523; X32-NEXT: retl
524;
525; X64-LABEL: test_mm_cmpgt_epi8:
526; X64: # BB#0:
527; X64-NEXT: pcmpgtb %xmm1, %xmm0
528; X64-NEXT: retq
529 %arg0 = bitcast <2 x i64> %a0 to <16 x i8>
530 %arg1 = bitcast <2 x i64> %a1 to <16 x i8>
531 %cmp = icmp sgt <16 x i8> %arg0, %arg1
532 %res = sext <16 x i1> %cmp to <16 x i8>
533 %bc = bitcast <16 x i8> %res to <2 x i64>
534 ret <2 x i64> %bc
535}
536
537define <2 x i64> @test_mm_cmpgt_epi16(<2 x i64> %a0, <2 x i64> %a1) nounwind {
538; X32-LABEL: test_mm_cmpgt_epi16:
539; X32: # BB#0:
540; X32-NEXT: pcmpgtw %xmm1, %xmm0
541; X32-NEXT: retl
542;
543; X64-LABEL: test_mm_cmpgt_epi16:
544; X64: # BB#0:
545; X64-NEXT: pcmpgtw %xmm1, %xmm0
546; X64-NEXT: retq
547 %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
548 %arg1 = bitcast <2 x i64> %a1 to <8 x i16>
549 %cmp = icmp sgt <8 x i16> %arg0, %arg1
550 %res = sext <8 x i1> %cmp to <8 x i16>
551 %bc = bitcast <8 x i16> %res to <2 x i64>
552 ret <2 x i64> %bc
553}
554
555define <2 x i64> @test_mm_cmpgt_epi32(<2 x i64> %a0, <2 x i64> %a1) nounwind {
556; X32-LABEL: test_mm_cmpgt_epi32:
557; X32: # BB#0:
558; X32-NEXT: pcmpgtd %xmm1, %xmm0
559; X32-NEXT: retl
560;
561; X64-LABEL: test_mm_cmpgt_epi32:
562; X64: # BB#0:
563; X64-NEXT: pcmpgtd %xmm1, %xmm0
564; X64-NEXT: retq
565 %arg0 = bitcast <2 x i64> %a0 to <4 x i32>
566 %arg1 = bitcast <2 x i64> %a1 to <4 x i32>
567 %cmp = icmp sgt <4 x i32> %arg0, %arg1
568 %res = sext <4 x i1> %cmp to <4 x i32>
569 %bc = bitcast <4 x i32> %res to <2 x i64>
570 ret <2 x i64> %bc
571}
572
573define <2 x double> @test_mm_cmpgt_pd(<2 x double> %a0, <2 x double> %a1) nounwind {
574; X32-LABEL: test_mm_cmpgt_pd:
575; X32: # BB#0:
576; X32-NEXT: cmpltpd %xmm0, %xmm1
577; X32-NEXT: movapd %xmm1, %xmm0
578; X32-NEXT: retl
579;
580; X64-LABEL: test_mm_cmpgt_pd:
581; X64: # BB#0:
582; X64-NEXT: cmpltpd %xmm0, %xmm1
583; X64-NEXT: movapd %xmm1, %xmm0
584; X64-NEXT: retq
585 %res = call <2 x double> @llvm.x86.sse2.cmp.pd(<2 x double> %a1, <2 x double> %a0, i8 1)
586 ret <2 x double> %res
587}
588
589define <2 x double> @test_mm_cmpgt_sd(<2 x double> %a0, <2 x double> %a1) nounwind {
590; X32-LABEL: test_mm_cmpgt_sd:
591; X32: # BB#0:
592; X32-NEXT: cmpltsd %xmm0, %xmm1
593; X32-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
594; X32-NEXT: retl
595;
596; X64-LABEL: test_mm_cmpgt_sd:
597; X64: # BB#0:
598; X64-NEXT: cmpltsd %xmm0, %xmm1
599; X64-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
600; X64-NEXT: retq
601 %cmp = call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %a1, <2 x double> %a0, i8 1)
602 %ext0 = extractelement <2 x double> %cmp, i32 0
603 %ins0 = insertelement <2 x double> undef, double %ext0, i32 0
604 %ext1 = extractelement <2 x double> %a0, i32 1
605 %ins1 = insertelement <2 x double> %ins0, double %ext1, i32 1
606 ret <2 x double> %ins1
607}
608
609define <2 x double> @test_mm_cmple_pd(<2 x double> %a0, <2 x double> %a1) nounwind {
610; X32-LABEL: test_mm_cmple_pd:
611; X32: # BB#0:
612; X32-NEXT: cmplepd %xmm1, %xmm0
613; X32-NEXT: retl
614;
615; X64-LABEL: test_mm_cmple_pd:
616; X64: # BB#0:
617; X64-NEXT: cmplepd %xmm1, %xmm0
618; X64-NEXT: retq
619 %res = call <2 x double> @llvm.x86.sse2.cmp.pd(<2 x double> %a0, <2 x double> %a1, i8 2)
620 ret <2 x double> %res
621}
622
623define <2 x double> @test_mm_cmple_sd(<2 x double> %a0, <2 x double> %a1) nounwind {
624; X32-LABEL: test_mm_cmple_sd:
625; X32: # BB#0:
626; X32-NEXT: cmplesd %xmm1, %xmm0
627; X32-NEXT: retl
628;
629; X64-LABEL: test_mm_cmple_sd:
630; X64: # BB#0:
631; X64-NEXT: cmplesd %xmm1, %xmm0
632; X64-NEXT: retq
633 %res = call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %a0, <2 x double> %a1, i8 2)
634 ret <2 x double> %res
635}
636
637define <2 x i64> @test_mm_cmplt_epi8(<2 x i64> %a0, <2 x i64> %a1) nounwind {
638; X32-LABEL: test_mm_cmplt_epi8:
639; X32: # BB#0:
640; X32-NEXT: pcmpgtb %xmm0, %xmm1
641; X32-NEXT: movdqa %xmm1, %xmm0
642; X32-NEXT: retl
643;
644; X64-LABEL: test_mm_cmplt_epi8:
645; X64: # BB#0:
646; X64-NEXT: pcmpgtb %xmm0, %xmm1
647; X64-NEXT: movdqa %xmm1, %xmm0
648; X64-NEXT: retq
649 %arg0 = bitcast <2 x i64> %a0 to <16 x i8>
650 %arg1 = bitcast <2 x i64> %a1 to <16 x i8>
651 %cmp = icmp sgt <16 x i8> %arg1, %arg0
652 %res = sext <16 x i1> %cmp to <16 x i8>
653 %bc = bitcast <16 x i8> %res to <2 x i64>
654 ret <2 x i64> %bc
655}
656
657define <2 x i64> @test_mm_cmplt_epi16(<2 x i64> %a0, <2 x i64> %a1) nounwind {
658; X32-LABEL: test_mm_cmplt_epi16:
659; X32: # BB#0:
660; X32-NEXT: pcmpgtw %xmm0, %xmm1
661; X32-NEXT: movdqa %xmm1, %xmm0
662; X32-NEXT: retl
663;
664; X64-LABEL: test_mm_cmplt_epi16:
665; X64: # BB#0:
666; X64-NEXT: pcmpgtw %xmm0, %xmm1
667; X64-NEXT: movdqa %xmm1, %xmm0
668; X64-NEXT: retq
669 %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
670 %arg1 = bitcast <2 x i64> %a1 to <8 x i16>
671 %cmp = icmp sgt <8 x i16> %arg1, %arg0
672 %res = sext <8 x i1> %cmp to <8 x i16>
673 %bc = bitcast <8 x i16> %res to <2 x i64>
674 ret <2 x i64> %bc
675}
676
677define <2 x i64> @test_mm_cmplt_epi32(<2 x i64> %a0, <2 x i64> %a1) nounwind {
678; X32-LABEL: test_mm_cmplt_epi32:
679; X32: # BB#0:
680; X32-NEXT: pcmpgtd %xmm0, %xmm1
681; X32-NEXT: movdqa %xmm1, %xmm0
682; X32-NEXT: retl
683;
684; X64-LABEL: test_mm_cmplt_epi32:
685; X64: # BB#0:
686; X64-NEXT: pcmpgtd %xmm0, %xmm1
687; X64-NEXT: movdqa %xmm1, %xmm0
688; X64-NEXT: retq
689 %arg0 = bitcast <2 x i64> %a0 to <4 x i32>
690 %arg1 = bitcast <2 x i64> %a1 to <4 x i32>
691 %cmp = icmp sgt <4 x i32> %arg1, %arg0
692 %res = sext <4 x i1> %cmp to <4 x i32>
693 %bc = bitcast <4 x i32> %res to <2 x i64>
694 ret <2 x i64> %bc
695}
696
697define <2 x double> @test_mm_cmplt_pd(<2 x double> %a0, <2 x double> %a1) nounwind {
698; X32-LABEL: test_mm_cmplt_pd:
699; X32: # BB#0:
700; X32-NEXT: cmpltpd %xmm1, %xmm0
701; X32-NEXT: retl
702;
703; X64-LABEL: test_mm_cmplt_pd:
704; X64: # BB#0:
705; X64-NEXT: cmpltpd %xmm1, %xmm0
706; X64-NEXT: retq
707 %res = call <2 x double> @llvm.x86.sse2.cmp.pd(<2 x double> %a0, <2 x double> %a1, i8 1)
708 ret <2 x double> %res
709}
710
711define <2 x double> @test_mm_cmplt_sd(<2 x double> %a0, <2 x double> %a1) nounwind {
712; X32-LABEL: test_mm_cmplt_sd:
713; X32: # BB#0:
714; X32-NEXT: cmpltsd %xmm1, %xmm0
715; X32-NEXT: retl
716;
717; X64-LABEL: test_mm_cmplt_sd:
718; X64: # BB#0:
719; X64-NEXT: cmpltsd %xmm1, %xmm0
720; X64-NEXT: retq
721 %res = call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %a0, <2 x double> %a1, i8 1)
722 ret <2 x double> %res
723}
724
725define <2 x double> @test_mm_cmpneq_pd(<2 x double> %a0, <2 x double> %a1) nounwind {
726; X32-LABEL: test_mm_cmpneq_pd:
727; X32: # BB#0:
728; X32-NEXT: cmpneqpd %xmm1, %xmm0
729; X32-NEXT: retl
730;
731; X64-LABEL: test_mm_cmpneq_pd:
732; X64: # BB#0:
733; X64-NEXT: cmpneqpd %xmm1, %xmm0
734; X64-NEXT: retq
735 %res = call <2 x double> @llvm.x86.sse2.cmp.pd(<2 x double> %a0, <2 x double> %a1, i8 4)
736 ret <2 x double> %res
737}
738
739define <2 x double> @test_mm_cmpneq_sd(<2 x double> %a0, <2 x double> %a1) nounwind {
740; X32-LABEL: test_mm_cmpneq_sd:
741; X32: # BB#0:
742; X32-NEXT: cmpneqsd %xmm1, %xmm0
743; X32-NEXT: retl
744;
745; X64-LABEL: test_mm_cmpneq_sd:
746; X64: # BB#0:
747; X64-NEXT: cmpneqsd %xmm1, %xmm0
748; X64-NEXT: retq
749 %res = call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %a0, <2 x double> %a1, i8 4)
750 ret <2 x double> %res
751}
752
753define <2 x double> @test_mm_cmpnge_pd(<2 x double> %a0, <2 x double> %a1) nounwind {
754; X32-LABEL: test_mm_cmpnge_pd:
755; X32: # BB#0:
756; X32-NEXT: cmpnlepd %xmm0, %xmm1
757; X32-NEXT: movapd %xmm1, %xmm0
758; X32-NEXT: retl
759;
760; X64-LABEL: test_mm_cmpnge_pd:
761; X64: # BB#0:
762; X64-NEXT: cmpnlepd %xmm0, %xmm1
763; X64-NEXT: movapd %xmm1, %xmm0
764; X64-NEXT: retq
765 %res = call <2 x double> @llvm.x86.sse2.cmp.pd(<2 x double> %a1, <2 x double> %a0, i8 6)
766 ret <2 x double> %res
767}
768
769define <2 x double> @test_mm_cmpnge_sd(<2 x double> %a0, <2 x double> %a1) nounwind {
770; X32-LABEL: test_mm_cmpnge_sd:
771; X32: # BB#0:
772; X32-NEXT: cmpnlesd %xmm0, %xmm1
773; X32-NEXT: movaps %xmm1, %xmm0
774; X32-NEXT: retl
775;
776; X64-LABEL: test_mm_cmpnge_sd:
777; X64: # BB#0:
778; X64-NEXT: cmpnlesd %xmm0, %xmm1
779; X64-NEXT: movaps %xmm1, %xmm0
780; X64-NEXT: retq
781 %res = call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %a1, <2 x double> %a0, i8 6)
782 ret <2 x double> %res
783}
784
785define <2 x double> @test_mm_cmpngt_pd(<2 x double> %a0, <2 x double> %a1) nounwind {
786; X32-LABEL: test_mm_cmpngt_pd:
787; X32: # BB#0:
788; X32-NEXT: cmpnltpd %xmm0, %xmm1
789; X32-NEXT: movapd %xmm1, %xmm0
790; X32-NEXT: retl
791;
792; X64-LABEL: test_mm_cmpngt_pd:
793; X64: # BB#0:
794; X64-NEXT: cmpnltpd %xmm0, %xmm1
795; X64-NEXT: movapd %xmm1, %xmm0
796; X64-NEXT: retq
797 %res = call <2 x double> @llvm.x86.sse2.cmp.pd(<2 x double> %a1, <2 x double> %a0, i8 5)
798 ret <2 x double> %res
799}
800
801define <2 x double> @test_mm_cmpngt_sd(<2 x double> %a0, <2 x double> %a1) nounwind {
802; X32-LABEL: test_mm_cmpngt_sd:
803; X32: # BB#0:
804; X32-NEXT: cmpnltsd %xmm0, %xmm1
805; X32-NEXT: movaps %xmm1, %xmm0
806; X32-NEXT: retl
807;
808; X64-LABEL: test_mm_cmpngt_sd:
809; X64: # BB#0:
810; X64-NEXT: cmpnltsd %xmm0, %xmm1
811; X64-NEXT: movaps %xmm1, %xmm0
812; X64-NEXT: retq
813 %res = call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %a1, <2 x double> %a0, i8 5)
814 ret <2 x double> %res
815}
816
817define <2 x double> @test_mm_cmpnle_pd(<2 x double> %a0, <2 x double> %a1) nounwind {
818; X32-LABEL: test_mm_cmpnle_pd:
819; X32: # BB#0:
820; X32-NEXT: cmpnlepd %xmm1, %xmm0
821; X32-NEXT: retl
822;
823; X64-LABEL: test_mm_cmpnle_pd:
824; X64: # BB#0:
825; X64-NEXT: cmpnlepd %xmm1, %xmm0
826; X64-NEXT: retq
827 %res = call <2 x double> @llvm.x86.sse2.cmp.pd(<2 x double> %a0, <2 x double> %a1, i8 6)
828 ret <2 x double> %res
829}
830
831define <2 x double> @test_mm_cmpnle_sd(<2 x double> %a0, <2 x double> %a1) nounwind {
832; X32-LABEL: test_mm_cmpnle_sd:
833; X32: # BB#0:
834; X32-NEXT: cmpnlesd %xmm1, %xmm0
835; X32-NEXT: retl
836;
837; X64-LABEL: test_mm_cmpnle_sd:
838; X64: # BB#0:
839; X64-NEXT: cmpnlesd %xmm1, %xmm0
840; X64-NEXT: retq
841 %res = call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %a0, <2 x double> %a1, i8 6)
842 ret <2 x double> %res
843}
844
845define <2 x double> @test_mm_cmpnlt_pd(<2 x double> %a0, <2 x double> %a1) nounwind {
846; X32-LABEL: test_mm_cmpnlt_pd:
847; X32: # BB#0:
848; X32-NEXT: cmpnltpd %xmm1, %xmm0
849; X32-NEXT: retl
850;
851; X64-LABEL: test_mm_cmpnlt_pd:
852; X64: # BB#0:
853; X64-NEXT: cmpnltpd %xmm1, %xmm0
854; X64-NEXT: retq
855 %res = call <2 x double> @llvm.x86.sse2.cmp.pd(<2 x double> %a0, <2 x double> %a1, i8 5)
856 ret <2 x double> %res
857}
858
859define <2 x double> @test_mm_cmpnlt_sd(<2 x double> %a0, <2 x double> %a1) nounwind {
860; X32-LABEL: test_mm_cmpnlt_sd:
861; X32: # BB#0:
862; X32-NEXT: cmpnltsd %xmm1, %xmm0
863; X32-NEXT: retl
864;
865; X64-LABEL: test_mm_cmpnlt_sd:
866; X64: # BB#0:
867; X64-NEXT: cmpnltsd %xmm1, %xmm0
868; X64-NEXT: retq
869 %res = call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %a0, <2 x double> %a1, i8 5)
870 ret <2 x double> %res
871}
872
873define <2 x double> @test_mm_cmpord_pd(<2 x double> %a0, <2 x double> %a1) nounwind {
874; X32-LABEL: test_mm_cmpord_pd:
875; X32: # BB#0:
876; X32-NEXT: cmpordpd %xmm1, %xmm0
877; X32-NEXT: retl
878;
879; X64-LABEL: test_mm_cmpord_pd:
880; X64: # BB#0:
881; X64-NEXT: cmpordpd %xmm1, %xmm0
882; X64-NEXT: retq
883 %res = call <2 x double> @llvm.x86.sse2.cmp.pd(<2 x double> %a0, <2 x double> %a1, i8 7)
884 ret <2 x double> %res
885}
886
887define <2 x double> @test_mm_cmpord_sd(<2 x double> %a0, <2 x double> %a1) nounwind {
888; X32-LABEL: test_mm_cmpord_sd:
889; X32: # BB#0:
890; X32-NEXT: cmpordsd %xmm1, %xmm0
891; X32-NEXT: retl
892;
893; X64-LABEL: test_mm_cmpord_sd:
894; X64: # BB#0:
895; X64-NEXT: cmpordsd %xmm1, %xmm0
896; X64-NEXT: retq
897 %res = call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %a0, <2 x double> %a1, i8 7)
898 ret <2 x double> %res
899}
900
901define <2 x double> @test_mm_cmpunord_pd(<2 x double> %a0, <2 x double> %a1) nounwind {
902; X32-LABEL: test_mm_cmpunord_pd:
903; X32: # BB#0:
904; X32-NEXT: cmpunordpd %xmm1, %xmm0
905; X32-NEXT: retl
906;
907; X64-LABEL: test_mm_cmpunord_pd:
908; X64: # BB#0:
909; X64-NEXT: cmpunordpd %xmm1, %xmm0
910; X64-NEXT: retq
911 %res = call <2 x double> @llvm.x86.sse2.cmp.pd(<2 x double> %a0, <2 x double> %a1, i8 3)
912 ret <2 x double> %res
913}
914
915define <2 x double> @test_mm_cmpunord_sd(<2 x double> %a0, <2 x double> %a1) nounwind {
916; X32-LABEL: test_mm_cmpunord_sd:
917; X32: # BB#0:
918; X32-NEXT: cmpunordsd %xmm1, %xmm0
919; X32-NEXT: retl
920;
921; X64-LABEL: test_mm_cmpunord_sd:
922; X64: # BB#0:
923; X64-NEXT: cmpunordsd %xmm1, %xmm0
924; X64-NEXT: retq
925 %res = call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %a0, <2 x double> %a1, i8 3)
926 ret <2 x double> %res
927}
928
929define i32 @test_mm_comieq_sd(<2 x double> %a0, <2 x double> %a1) nounwind {
930; X32-LABEL: test_mm_comieq_sd:
931; X32: # BB#0:
932; X32-NEXT: comisd %xmm1, %xmm0
933; X32-NEXT: setnp %al
934; X32-NEXT: sete %cl
935; X32-NEXT: andb %al, %cl
936; X32-NEXT: movzbl %cl, %eax
937; X32-NEXT: retl
938;
939; X64-LABEL: test_mm_comieq_sd:
940; X64: # BB#0:
941; X64-NEXT: comisd %xmm1, %xmm0
942; X64-NEXT: setnp %al
943; X64-NEXT: sete %cl
944; X64-NEXT: andb %al, %cl
945; X64-NEXT: movzbl %cl, %eax
946; X64-NEXT: retq
947 %res = call i32 @llvm.x86.sse2.comieq.sd(<2 x double> %a0, <2 x double> %a1)
948 ret i32 %res
949}
950declare i32 @llvm.x86.sse2.comieq.sd(<2 x double>, <2 x double>) nounwind readnone
951
952define i32 @test_mm_comige_sd(<2 x double> %a0, <2 x double> %a1) nounwind {
953; X32-LABEL: test_mm_comige_sd:
954; X32: # BB#0:
955; X32-NEXT: comisd %xmm1, %xmm0
956; X32-NEXT: setae %al
957; X32-NEXT: movzbl %al, %eax
958; X32-NEXT: retl
959;
960; X64-LABEL: test_mm_comige_sd:
961; X64: # BB#0:
962; X64-NEXT: comisd %xmm1, %xmm0
963; X64-NEXT: setae %al
964; X64-NEXT: movzbl %al, %eax
965; X64-NEXT: retq
966 %res = call i32 @llvm.x86.sse2.comige.sd(<2 x double> %a0, <2 x double> %a1)
967 ret i32 %res
968}
969declare i32 @llvm.x86.sse2.comige.sd(<2 x double>, <2 x double>) nounwind readnone
970
971define i32 @test_mm_comigt_sd(<2 x double> %a0, <2 x double> %a1) nounwind {
972; X32-LABEL: test_mm_comigt_sd:
973; X32: # BB#0:
974; X32-NEXT: comisd %xmm1, %xmm0
975; X32-NEXT: seta %al
976; X32-NEXT: movzbl %al, %eax
977; X32-NEXT: retl
978;
979; X64-LABEL: test_mm_comigt_sd:
980; X64: # BB#0:
981; X64-NEXT: comisd %xmm1, %xmm0
982; X64-NEXT: seta %al
983; X64-NEXT: movzbl %al, %eax
984; X64-NEXT: retq
985 %res = call i32 @llvm.x86.sse2.comigt.sd(<2 x double> %a0, <2 x double> %a1)
986 ret i32 %res
987}
988declare i32 @llvm.x86.sse2.comigt.sd(<2 x double>, <2 x double>) nounwind readnone
989
990define i32 @test_mm_comile_sd(<2 x double> %a0, <2 x double> %a1) nounwind {
991; X32-LABEL: test_mm_comile_sd:
992; X32: # BB#0:
993; X32-NEXT: comisd %xmm0, %xmm1
994; X32-NEXT: setae %al
995; X32-NEXT: movzbl %al, %eax
996; X32-NEXT: retl
997;
998; X64-LABEL: test_mm_comile_sd:
999; X64: # BB#0:
1000; X64-NEXT: comisd %xmm0, %xmm1
1001; X64-NEXT: setae %al
1002; X64-NEXT: movzbl %al, %eax
1003; X64-NEXT: retq
1004 %res = call i32 @llvm.x86.sse2.comile.sd(<2 x double> %a0, <2 x double> %a1)
1005 ret i32 %res
1006}
1007declare i32 @llvm.x86.sse2.comile.sd(<2 x double>, <2 x double>) nounwind readnone
1008
1009define i32 @test_mm_comilt_sd(<2 x double> %a0, <2 x double> %a1) nounwind {
1010; X32-LABEL: test_mm_comilt_sd:
1011; X32: # BB#0:
1012; X32-NEXT: comisd %xmm0, %xmm1
1013; X32-NEXT: seta %al
1014; X32-NEXT: movzbl %al, %eax
1015; X32-NEXT: retl
1016;
1017; X64-LABEL: test_mm_comilt_sd:
1018; X64: # BB#0:
1019; X64-NEXT: comisd %xmm0, %xmm1
1020; X64-NEXT: seta %al
1021; X64-NEXT: movzbl %al, %eax
1022; X64-NEXT: retq
1023 %res = call i32 @llvm.x86.sse2.comilt.sd(<2 x double> %a0, <2 x double> %a1)
1024 ret i32 %res
1025}
1026declare i32 @llvm.x86.sse2.comilt.sd(<2 x double>, <2 x double>) nounwind readnone
1027
1028define i32 @test_mm_comineq_sd(<2 x double> %a0, <2 x double> %a1) nounwind {
1029; X32-LABEL: test_mm_comineq_sd:
1030; X32: # BB#0:
1031; X32-NEXT: comisd %xmm1, %xmm0
1032; X32-NEXT: setp %al
1033; X32-NEXT: setne %cl
1034; X32-NEXT: orb %al, %cl
1035; X32-NEXT: movzbl %cl, %eax
1036; X32-NEXT: retl
1037;
1038; X64-LABEL: test_mm_comineq_sd:
1039; X64: # BB#0:
1040; X64-NEXT: comisd %xmm1, %xmm0
1041; X64-NEXT: setp %al
1042; X64-NEXT: setne %cl
1043; X64-NEXT: orb %al, %cl
1044; X64-NEXT: movzbl %cl, %eax
1045; X64-NEXT: retq
1046 %res = call i32 @llvm.x86.sse2.comineq.sd(<2 x double> %a0, <2 x double> %a1)
1047 ret i32 %res
1048}
1049declare i32 @llvm.x86.sse2.comineq.sd(<2 x double>, <2 x double>) nounwind readnone
1050
1051define <2 x double> @test_mm_cvtepi32_pd(<2 x i64> %a0) nounwind {
1052; X32-LABEL: test_mm_cvtepi32_pd:
1053; X32: # BB#0:
1054; X32-NEXT: cvtdq2pd %xmm0, %xmm0
1055; X32-NEXT: retl
1056;
1057; X64-LABEL: test_mm_cvtepi32_pd:
1058; X64: # BB#0:
1059; X64-NEXT: cvtdq2pd %xmm0, %xmm0
1060; X64-NEXT: retq
1061 %arg0 = bitcast <2 x i64> %a0 to <4 x i32>
1062 %res = call <2 x double> @llvm.x86.sse2.cvtdq2pd(<4 x i32> %arg0)
1063 ret <2 x double> %res
1064}
1065declare <2 x double> @llvm.x86.sse2.cvtdq2pd(<4 x i32>) nounwind readnone
1066
1067define <4 x float> @test_mm_cvtepi32_ps(<2 x i64> %a0) nounwind {
1068; X32-LABEL: test_mm_cvtepi32_ps:
1069; X32: # BB#0:
1070; X32-NEXT: cvtdq2ps %xmm0, %xmm0
1071; X32-NEXT: retl
1072;
1073; X64-LABEL: test_mm_cvtepi32_ps:
1074; X64: # BB#0:
1075; X64-NEXT: cvtdq2ps %xmm0, %xmm0
1076; X64-NEXT: retq
1077 %arg0 = bitcast <2 x i64> %a0 to <4 x i32>
1078 %res = call <4 x float> @llvm.x86.sse2.cvtdq2ps(<4 x i32> %arg0)
1079 ret <4 x float> %res
1080}
1081declare <4 x float> @llvm.x86.sse2.cvtdq2ps(<4 x i32>) nounwind readnone
1082
1083define <2 x i64> @test_mm_cvtpd_epi32(<2 x double> %a0) nounwind {
1084; X32-LABEL: test_mm_cvtpd_epi32:
1085; X32: # BB#0:
1086; X32-NEXT: cvtpd2dq %xmm0, %xmm0
1087; X32-NEXT: retl
1088;
1089; X64-LABEL: test_mm_cvtpd_epi32:
1090; X64: # BB#0:
1091; X64-NEXT: cvtpd2dq %xmm0, %xmm0
1092; X64-NEXT: retq
1093 %res = call <4 x i32> @llvm.x86.sse2.cvtpd2dq(<2 x double> %a0)
1094 %bc = bitcast <4 x i32> %res to <2 x i64>
1095 ret <2 x i64> %bc
1096}
1097declare <4 x i32> @llvm.x86.sse2.cvtpd2dq(<2 x double>) nounwind readnone
1098
1099define <4 x float> @test_mm_cvtpd_ps(<2 x double> %a0) nounwind {
1100; X32-LABEL: test_mm_cvtpd_ps:
1101; X32: # BB#0:
1102; X32-NEXT: cvtpd2ps %xmm0, %xmm0
1103; X32-NEXT: retl
1104;
1105; X64-LABEL: test_mm_cvtpd_ps:
1106; X64: # BB#0:
1107; X64-NEXT: cvtpd2ps %xmm0, %xmm0
1108; X64-NEXT: retq
1109 %res = call <4 x float> @llvm.x86.sse2.cvtpd2ps(<2 x double> %a0)
1110 ret <4 x float> %res
1111}
1112declare <4 x float> @llvm.x86.sse2.cvtpd2ps(<2 x double>) nounwind readnone
1113
1114define <2 x i64> @test_mm_cvtps_epi32(<4 x float> %a0) nounwind {
1115; X32-LABEL: test_mm_cvtps_epi32:
1116; X32: # BB#0:
1117; X32-NEXT: cvtps2dq %xmm0, %xmm0
1118; X32-NEXT: retl
1119;
1120; X64-LABEL: test_mm_cvtps_epi32:
1121; X64: # BB#0:
1122; X64-NEXT: cvtps2dq %xmm0, %xmm0
1123; X64-NEXT: retq
1124 %res = call <4 x i32> @llvm.x86.sse2.cvtps2dq(<4 x float> %a0)
1125 %bc = bitcast <4 x i32> %res to <2 x i64>
1126 ret <2 x i64> %bc
1127}
1128declare <4 x i32> @llvm.x86.sse2.cvtps2dq(<4 x float>) nounwind readnone
1129
1130define <2 x double> @test_mm_cvtps_pd(<4 x float> %a0) nounwind {
1131; X32-LABEL: test_mm_cvtps_pd:
1132; X32: # BB#0:
1133; X32-NEXT: cvtps2pd %xmm0, %xmm0
1134; X32-NEXT: retl
1135;
1136; X64-LABEL: test_mm_cvtps_pd:
1137; X64: # BB#0:
1138; X64-NEXT: cvtps2pd %xmm0, %xmm0
1139; X64-NEXT: retq
1140 %res = call <2 x double> @llvm.x86.sse2.cvtps2pd(<4 x float> %a0)
1141 ret <2 x double> %res
1142}
1143declare <2 x double> @llvm.x86.sse2.cvtps2pd(<4 x float>) nounwind readnone
1144
1145define double @test_mm_cvtsd_f64(<2 x double> %a0) nounwind {
1146; X32-LABEL: test_mm_cvtsd_f64:
1147; X32: # BB#0:
1148; X32-NEXT: pushl %ebp
1149; X32-NEXT: movl %esp, %ebp
1150; X32-NEXT: andl $-8, %esp
1151; X32-NEXT: subl $8, %esp
1152; X32-NEXT: movlps %xmm0, (%esp)
1153; X32-NEXT: fldl (%esp)
1154; X32-NEXT: movl %ebp, %esp
1155; X32-NEXT: popl %ebp
1156; X32-NEXT: retl
1157;
1158; X64-LABEL: test_mm_cvtsd_f64:
1159; X64: # BB#0:
1160; X64-NEXT: retq
1161 %res = extractelement <2 x double> %a0, i32 0
1162 ret double %res
1163}
1164
1165define i32 @test_mm_cvtsd_si32(<2 x double> %a0) nounwind {
1166; X32-LABEL: test_mm_cvtsd_si32:
1167; X32: # BB#0:
1168; X32-NEXT: cvtsd2si %xmm0, %eax
1169; X32-NEXT: retl
1170;
1171; X64-LABEL: test_mm_cvtsd_si32:
1172; X64: # BB#0:
1173; X64-NEXT: cvtsd2si %xmm0, %eax
1174; X64-NEXT: retq
1175 %res = call i32 @llvm.x86.sse2.cvtsd2si(<2 x double> %a0)
1176 ret i32 %res
1177}
1178declare i32 @llvm.x86.sse2.cvtsd2si(<2 x double>) nounwind readnone
1179
1180define i32 @test_mm_cvtsi128_si32(<2 x i64> %a0) nounwind {
1181; X32-LABEL: test_mm_cvtsi128_si32:
1182; X32: # BB#0:
1183; X32-NEXT: movd %xmm0, %eax
1184; X32-NEXT: retl
1185;
1186; X64-LABEL: test_mm_cvtsi128_si32:
1187; X64: # BB#0:
1188; X64-NEXT: movd %xmm0, %eax
1189; X64-NEXT: retq
1190 %arg0 = bitcast <2 x i64> %a0 to <4 x i32>
1191 %res = extractelement <4 x i32> %arg0, i32 0
1192 ret i32 %res
1193}
1194
1195define <2 x double> @test_mm_cvtsi32_sd(<2 x double> %a0, i32 %a1) nounwind {
1196; X32-LABEL: test_mm_cvtsi32_sd:
1197; X32: # BB#0:
1198; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
1199; X32-NEXT: cvtsi2sdl %eax, %xmm1
1200; X32-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
1201; X32-NEXT: retl
1202;
1203; X64-LABEL: test_mm_cvtsi32_sd:
1204; X64: # BB#0:
1205; X64-NEXT: cvtsi2sdl %edi, %xmm1
1206; X64-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
1207; X64-NEXT: retq
1208 %cvt = sitofp i32 %a1 to double
1209 %res = insertelement <2 x double> %a0, double %cvt, i32 0
1210 ret <2 x double> %res
1211}
1212
1213define <2 x i64> @test_mm_cvtsi32_si128(i32 %a0) nounwind {
1214; X32-LABEL: test_mm_cvtsi32_si128:
1215; X32: # BB#0:
1216; X32-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
1217; X32-NEXT: retl
1218;
1219; X64-LABEL: test_mm_cvtsi32_si128:
1220; X64: # BB#0:
1221; X64-NEXT: movd %edi, %xmm0
1222; X64-NEXT: retq
1223 %res0 = insertelement <4 x i32> undef, i32 %a0, i32 0
1224 %res1 = insertelement <4 x i32> %res0, i32 0, i32 1
1225 %res2 = insertelement <4 x i32> %res1, i32 0, i32 2
1226 %res3 = insertelement <4 x i32> %res2, i32 0, i32 3
1227 %res = bitcast <4 x i32> %res3 to <2 x i64>
1228 ret <2 x i64> %res
1229}
1230
1231define <2 x double> @test_mm_cvtss_sd(<2 x double> %a0, <4 x float> %a1) nounwind {
1232; X32-LABEL: test_mm_cvtss_sd:
1233; X32: # BB#0:
1234; X32-NEXT: cvtss2sd %xmm1, %xmm1
1235; X32-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
1236; X32-NEXT: retl
1237;
1238; X64-LABEL: test_mm_cvtss_sd:
1239; X64: # BB#0:
1240; X64-NEXT: cvtss2sd %xmm1, %xmm1
1241; X64-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
1242; X64-NEXT: retq
1243 %ext = extractelement <4 x float> %a1, i32 0
1244 %cvt = fpext float %ext to double
1245 %res = insertelement <2 x double> %a0, double %cvt, i32 0
1246 ret <2 x double> %res
1247}
1248
1249define <2 x i64> @test_mm_cvttpd_epi32(<2 x double> %a0) nounwind {
1250; X32-LABEL: test_mm_cvttpd_epi32:
1251; X32: # BB#0:
1252; X32-NEXT: cvttpd2dq %xmm0, %xmm0
1253; X32-NEXT: retl
1254;
1255; X64-LABEL: test_mm_cvttpd_epi32:
1256; X64: # BB#0:
1257; X64-NEXT: cvttpd2dq %xmm0, %xmm0
1258; X64-NEXT: retq
1259 %res = call <4 x i32> @llvm.x86.sse2.cvttpd2dq(<2 x double> %a0)
1260 %bc = bitcast <4 x i32> %res to <2 x i64>
1261 ret <2 x i64> %bc
1262}
1263declare <4 x i32> @llvm.x86.sse2.cvttpd2dq(<2 x double>) nounwind readnone
1264
1265define <2 x i64> @test_mm_cvttps_epi32(<4 x float> %a0) nounwind {
1266; X32-LABEL: test_mm_cvttps_epi32:
1267; X32: # BB#0:
1268; X32-NEXT: cvttps2dq %xmm0, %xmm0
1269; X32-NEXT: retl
1270;
1271; X64-LABEL: test_mm_cvttps_epi32:
1272; X64: # BB#0:
1273; X64-NEXT: cvttps2dq %xmm0, %xmm0
1274; X64-NEXT: retq
1275 %res = call <4 x i32> @llvm.x86.sse2.cvttps2dq(<4 x float> %a0)
1276 %bc = bitcast <4 x i32> %res to <2 x i64>
1277 ret <2 x i64> %bc
1278}
1279declare <4 x i32> @llvm.x86.sse2.cvttps2dq(<4 x float>) nounwind readnone
1280
1281define i32 @test_mm_cvttsd_si32(<2 x double> %a0) nounwind {
1282; X32-LABEL: test_mm_cvttsd_si32:
1283; X32: # BB#0:
1284; X32-NEXT: cvttsd2si %xmm0, %eax
1285; X32-NEXT: retl
1286;
1287; X64-LABEL: test_mm_cvttsd_si32:
1288; X64: # BB#0:
1289; X64-NEXT: cvttsd2si %xmm0, %eax
1290; X64-NEXT: retq
1291 %ext = extractelement <2 x double> %a0, i32 0
1292 %res = fptosi double %ext to i32
1293 ret i32 %res
1294}
1295
1296define <2 x double> @test_mm_div_pd(<2 x double> %a0, <2 x double> %a1) nounwind {
1297; X32-LABEL: test_mm_div_pd:
1298; X32: # BB#0:
1299; X32-NEXT: divpd %xmm1, %xmm0
1300; X32-NEXT: retl
1301;
1302; X64-LABEL: test_mm_div_pd:
1303; X64: # BB#0:
1304; X64-NEXT: divpd %xmm1, %xmm0
1305; X64-NEXT: retq
1306 %res = fdiv <2 x double> %a0, %a1
1307 ret <2 x double> %res
1308}
1309
1310define <2 x double> @test_mm_div_sd(<2 x double> %a0, <2 x double> %a1) nounwind {
1311; X32-LABEL: test_mm_div_sd:
1312; X32: # BB#0:
1313; X32-NEXT: divsd %xmm1, %xmm0
1314; X32-NEXT: retl
1315;
1316; X64-LABEL: test_mm_div_sd:
1317; X64: # BB#0:
1318; X64-NEXT: divsd %xmm1, %xmm0
1319; X64-NEXT: retq
1320 %ext0 = extractelement <2 x double> %a0, i32 0
1321 %ext1 = extractelement <2 x double> %a1, i32 0
1322 %fdiv = fdiv double %ext0, %ext1
1323 %res = insertelement <2 x double> %a0, double %fdiv, i32 0
1324 ret <2 x double> %res
1325}
1326
1327define i32 @test_mm_extract_epi16(<2 x i64> %a0) nounwind {
1328; X32-LABEL: test_mm_extract_epi16:
1329; X32: # BB#0:
1330; X32-NEXT: pextrw $1, %xmm0, %eax
1331; X32-NEXT: movzwl %ax, %eax
1332; X32-NEXT: retl
1333;
1334; X64-LABEL: test_mm_extract_epi16:
1335; X64: # BB#0:
1336; X64-NEXT: pextrw $1, %xmm0, %eax
1337; X64-NEXT: movzwl %ax, %eax
1338; X64-NEXT: retq
1339 %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
1340 %ext = extractelement <8 x i16> %arg0, i32 1
1341 %res = zext i16 %ext to i32
1342 ret i32 %res
1343}
1344
1345define <2 x i64> @test_mm_insert_epi16(<2 x i64> %a0, i16 %a1) nounwind {
1346; X32-LABEL: test_mm_insert_epi16:
1347; X32: # BB#0:
1348; X32-NEXT: movw {{[0-9]+}}(%esp), %ax
1349; X32-NEXT: pinsrw $1, %eax, %xmm0
1350; X32-NEXT: retl
1351;
1352; X64-LABEL: test_mm_insert_epi16:
1353; X64: # BB#0:
1354; X64-NEXT: pinsrw $1, %edi, %xmm0
1355; X64-NEXT: retq
1356 %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
1357 %res = insertelement <8 x i16> %arg0, i16 %a1,i32 1
1358 %bc = bitcast <8 x i16> %res to <2 x i64>
1359 ret <2 x i64> %bc
1360}
1361
1362define void @test_mm_lfence() nounwind {
1363; X32-LABEL: test_mm_lfence:
1364; X32: # BB#0:
1365; X32-NEXT: lfence
1366; X32-NEXT: retl
1367;
1368; X64-LABEL: test_mm_lfence:
1369; X64: # BB#0:
1370; X64-NEXT: lfence
1371; X64-NEXT: retq
1372 call void @llvm.x86.sse2.lfence()
1373 ret void
1374}
1375declare void @llvm.x86.sse2.lfence() nounwind readnone
1376
1377define <2 x double> @test_mm_load_pd(double* %a0) nounwind {
1378; X32-LABEL: test_mm_load_pd:
1379; X32: # BB#0:
1380; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
1381; X32-NEXT: movaps (%eax), %xmm0
1382; X32-NEXT: retl
1383;
1384; X64-LABEL: test_mm_load_pd:
1385; X64: # BB#0:
1386; X64-NEXT: movaps (%rdi), %xmm0
1387; X64-NEXT: retq
1388 %arg0 = bitcast double* %a0 to <2 x double>*
1389 %res = load <2 x double>, <2 x double>* %arg0, align 16
1390 ret <2 x double> %res
1391}
1392
1393define <2 x double> @test_mm_load_sd(double* %a0) nounwind {
1394; X32-LABEL: test_mm_load_sd:
1395; X32: # BB#0:
1396; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
1397; X32-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
1398; X32-NEXT: retl
1399;
1400; X64-LABEL: test_mm_load_sd:
1401; X64: # BB#0:
1402; X64-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
1403; X64-NEXT: retq
1404 %ld = load double, double* %a0, align 1
1405 %res0 = insertelement <2 x double> undef, double %ld, i32 0
1406 %res1 = insertelement <2 x double> %res0, double 0.0, i32 1
1407 ret <2 x double> %res1
1408}
1409
1410define <2 x i64> @test_mm_load_si128(<2 x i64>* %a0) nounwind {
1411; X32-LABEL: test_mm_load_si128:
1412; X32: # BB#0:
1413; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
1414; X32-NEXT: movaps (%eax), %xmm0
1415; X32-NEXT: retl
1416;
1417; X64-LABEL: test_mm_load_si128:
1418; X64: # BB#0:
1419; X64-NEXT: movaps (%rdi), %xmm0
1420; X64-NEXT: retq
1421 %res = load <2 x i64>, <2 x i64>* %a0, align 16
1422 ret <2 x i64> %res
1423}
1424
1425define <2 x double> @test_mm_load1_pd(double* %a0) nounwind {
1426; X32-LABEL: test_mm_load1_pd:
1427; X32: # BB#0:
1428; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
1429; X32-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
1430; X32-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0,0]
1431; X32-NEXT: retl
1432;
1433; X64-LABEL: test_mm_load1_pd:
1434; X64: # BB#0:
1435; X64-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
1436; X64-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0,0]
1437; X64-NEXT: retq
1438 %ld = load double, double* %a0, align 8
1439 %res0 = insertelement <2 x double> undef, double %ld, i32 0
1440 %res1 = insertelement <2 x double> %res0, double %ld, i32 1
1441 ret <2 x double> %res1
1442}
1443
1444define <2 x double> @test_mm_loadh_pd(<2 x double> %a0, double* %a1) nounwind {
1445; X32-LABEL: test_mm_loadh_pd:
1446; X32: # BB#0:
1447; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
1448; X32-NEXT: movhpd {{.*#+}} xmm0 = xmm0[0],mem[0]
1449; X32-NEXT: retl
1450;
1451; X64-LABEL: test_mm_loadh_pd:
1452; X64: # BB#0:
1453; X64-NEXT: movhpd {{.*#+}} xmm0 = xmm0[0],mem[0]
1454; X64-NEXT: retq
1455 %ld = load double, double* %a1, align 8
1456 %res = insertelement <2 x double> %a0, double %ld, i32 1
1457 ret <2 x double> %res
1458}
1459
1460define <2 x i64> @test_mm_loadl_epi64(<2 x i64> %a0, <2 x i64>* %a1) nounwind {
1461; X32-LABEL: test_mm_loadl_epi64:
1462; X32: # BB#0:
1463; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
1464; X32-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
1465; X32-NEXT: retl
1466;
1467; X64-LABEL: test_mm_loadl_epi64:
1468; X64: # BB#0:
1469; X64-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
1470; X64-NEXT: retq
1471 %bc = bitcast <2 x i64>* %a1 to i64*
1472 %ld = load i64, i64* %bc, align 1
1473 %res0 = insertelement <2 x i64> undef, i64 %ld, i32 0
1474 %res1 = insertelement <2 x i64> %res0, i64 0, i32 1
1475 ret <2 x i64> %res1
1476}
1477
1478define <2 x double> @test_mm_loadl_pd(<2 x double> %a0, double* %a1) nounwind {
1479; X32-LABEL: test_mm_loadl_pd:
1480; X32: # BB#0:
1481; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
1482; X32-NEXT: movlpd {{.*#+}} xmm0 = mem[0],xmm0[1]
1483; X32-NEXT: retl
1484;
1485; X64-LABEL: test_mm_loadl_pd:
1486; X64: # BB#0:
1487; X64-NEXT: movlpd {{.*#+}} xmm0 = mem[0],xmm0[1]
1488; X64-NEXT: retq
1489 %ld = load double, double* %a1, align 8
1490 %res = insertelement <2 x double> %a0, double %ld, i32 0
1491 ret <2 x double> %res
1492}
1493
1494define <2 x double> @test_mm_loadr_pd(double* %a0) nounwind {
1495; X32-LABEL: test_mm_loadr_pd:
1496; X32: # BB#0:
1497; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
1498; X32-NEXT: movapd (%eax), %xmm0
1499; X32-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1,0]
1500; X32-NEXT: retl
1501;
1502; X64-LABEL: test_mm_loadr_pd:
1503; X64: # BB#0:
1504; X64-NEXT: movapd (%rdi), %xmm0
1505; X64-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1,0]
1506; X64-NEXT: retq
1507 %arg0 = bitcast double* %a0 to <2 x double>*
1508 %ld = load <2 x double>, <2 x double>* %arg0, align 16
1509 %res = shufflevector <2 x double> %ld, <2 x double> undef, <2 x i32> <i32 1, i32 0>
1510 ret <2 x double> %res
1511}
1512
1513define <2 x double> @test_mm_loadu_pd(double* %a0) nounwind {
1514; X32-LABEL: test_mm_loadu_pd:
1515; X32: # BB#0:
1516; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
1517; X32-NEXT: movups (%eax), %xmm0
1518; X32-NEXT: retl
1519;
1520; X64-LABEL: test_mm_loadu_pd:
1521; X64: # BB#0:
1522; X64-NEXT: movups (%rdi), %xmm0
1523; X64-NEXT: retq
1524 %arg0 = bitcast double* %a0 to <2 x double>*
1525 %res = load <2 x double>, <2 x double>* %arg0, align 1
1526 ret <2 x double> %res
1527}
1528
1529define <2 x i64> @test_mm_loadu_si128(<2 x i64>* %a0) nounwind {
1530; X32-LABEL: test_mm_loadu_si128:
1531; X32: # BB#0:
1532; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
1533; X32-NEXT: movups (%eax), %xmm0
1534; X32-NEXT: retl
1535;
1536; X64-LABEL: test_mm_loadu_si128:
1537; X64: # BB#0:
1538; X64-NEXT: movups (%rdi), %xmm0
1539; X64-NEXT: retq
1540 %res = load <2 x i64>, <2 x i64>* %a0, align 1
1541 ret <2 x i64> %res
1542}
1543
1544define <2 x i64> @test_mm_madd_epi16(<2 x i64> %a0, <2 x i64> %a1) nounwind {
1545; X32-LABEL: test_mm_madd_epi16:
1546; X32: # BB#0:
1547; X32-NEXT: pmaddwd %xmm1, %xmm0
1548; X32-NEXT: retl
1549;
1550; X64-LABEL: test_mm_madd_epi16:
1551; X64: # BB#0:
1552; X64-NEXT: pmaddwd %xmm1, %xmm0
1553; X64-NEXT: retq
1554 %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
1555 %arg1 = bitcast <2 x i64> %a1 to <8 x i16>
1556 %res = call <4 x i32> @llvm.x86.sse2.pmadd.wd(<8 x i16> %arg0, <8 x i16> %arg1)
1557 %bc = bitcast <4 x i32> %res to <2 x i64>
1558 ret <2 x i64> %bc
1559}
1560declare <4 x i32> @llvm.x86.sse2.pmadd.wd(<8 x i16>, <8 x i16>) nounwind readnone
1561
1562define void @test_mm_maskmoveu_si128(<2 x i64> %a0, <2 x i64> %a1, i8* %a2) nounwind {
1563; X32-LABEL: test_mm_maskmoveu_si128:
1564; X32: # BB#0:
1565; X32-NEXT: pushl %edi
1566; X32-NEXT: movl {{[0-9]+}}(%esp), %edi
1567; X32-NEXT: maskmovdqu %xmm1, %xmm0
1568; X32-NEXT: popl %edi
1569; X32-NEXT: retl
1570;
1571; X64-LABEL: test_mm_maskmoveu_si128:
1572; X64: # BB#0:
1573; X64-NEXT: maskmovdqu %xmm1, %xmm0
1574; X64-NEXT: retq
1575 %arg0 = bitcast <2 x i64> %a0 to <16 x i8>
1576 %arg1 = bitcast <2 x i64> %a1 to <16 x i8>
1577 call void @llvm.x86.sse2.maskmov.dqu(<16 x i8> %arg0, <16 x i8> %arg1, i8* %a2)
1578 ret void
1579}
1580declare void @llvm.x86.sse2.maskmov.dqu(<16 x i8>, <16 x i8>, i8*) nounwind
1581
1582define <2 x i64> @test_mm_max_epi16(<2 x i64> %a0, <2 x i64> %a1) nounwind {
1583; X32-LABEL: test_mm_max_epi16:
1584; X32: # BB#0:
1585; X32-NEXT: pmaxsw %xmm1, %xmm0
1586; X32-NEXT: retl
1587;
1588; X64-LABEL: test_mm_max_epi16:
1589; X64: # BB#0:
1590; X64-NEXT: pmaxsw %xmm1, %xmm0
1591; X64-NEXT: retq
1592 %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
1593 %arg1 = bitcast <2 x i64> %a1 to <8 x i16>
1594 %res = call <8 x i16> @llvm.x86.sse2.pmaxs.w(<8 x i16> %arg0, <8 x i16> %arg1)
1595 %bc = bitcast <8 x i16> %res to <2 x i64>
1596 ret <2 x i64> %bc
1597}
1598declare <8 x i16> @llvm.x86.sse2.pmaxs.w(<8 x i16>, <8 x i16>) nounwind readnone
1599
1600define <2 x i64> @test_mm_max_epu8(<2 x i64> %a0, <2 x i64> %a1) nounwind {
1601; X32-LABEL: test_mm_max_epu8:
1602; X32: # BB#0:
1603; X32-NEXT: pmaxub %xmm1, %xmm0
1604; X32-NEXT: retl
1605;
1606; X64-LABEL: test_mm_max_epu8:
1607; X64: # BB#0:
1608; X64-NEXT: pmaxub %xmm1, %xmm0
1609; X64-NEXT: retq
1610 %arg0 = bitcast <2 x i64> %a0 to <16 x i8>
1611 %arg1 = bitcast <2 x i64> %a1 to <16 x i8>
1612 %res = call <16 x i8> @llvm.x86.sse2.pmaxu.b(<16 x i8> %arg0, <16 x i8> %arg1)
1613 %bc = bitcast <16 x i8> %res to <2 x i64>
1614 ret <2 x i64> %bc
1615}
1616declare <16 x i8> @llvm.x86.sse2.pmaxu.b(<16 x i8>, <16 x i8>) nounwind readnone
1617
1618define <2 x double> @test_mm_max_pd(<2 x double> %a0, <2 x double> %a1) nounwind {
1619; X32-LABEL: test_mm_max_pd:
1620; X32: # BB#0:
1621; X32-NEXT: maxpd %xmm1, %xmm0
1622; X32-NEXT: retl
1623;
1624; X64-LABEL: test_mm_max_pd:
1625; X64: # BB#0:
1626; X64-NEXT: maxpd %xmm1, %xmm0
1627; X64-NEXT: retq
1628 %res = call <2 x double> @llvm.x86.sse2.max.pd(<2 x double> %a0, <2 x double> %a1)
1629 ret <2 x double> %res
1630}
1631declare <2 x double> @llvm.x86.sse2.max.pd(<2 x double>, <2 x double>) nounwind readnone
1632
1633define <2 x double> @test_mm_max_sd(<2 x double> %a0, <2 x double> %a1) nounwind {
1634; X32-LABEL: test_mm_max_sd:
1635; X32: # BB#0:
1636; X32-NEXT: maxsd %xmm1, %xmm0
1637; X32-NEXT: retl
1638;
1639; X64-LABEL: test_mm_max_sd:
1640; X64: # BB#0:
1641; X64-NEXT: maxsd %xmm1, %xmm0
1642; X64-NEXT: retq
1643 %res = call <2 x double> @llvm.x86.sse2.max.sd(<2 x double> %a0, <2 x double> %a1)
1644 ret <2 x double> %res
1645}
1646declare <2 x double> @llvm.x86.sse2.max.sd(<2 x double>, <2 x double>) nounwind readnone
1647
1648define void @test_mm_mfence() nounwind {
1649; X32-LABEL: test_mm_mfence:
1650; X32: # BB#0:
1651; X32-NEXT: mfence
1652; X32-NEXT: retl
1653;
1654; X64-LABEL: test_mm_mfence:
1655; X64: # BB#0:
1656; X64-NEXT: mfence
1657; X64-NEXT: retq
1658 call void @llvm.x86.sse2.mfence()
1659 ret void
1660}
1661declare void @llvm.x86.sse2.mfence() nounwind readnone
1662
1663define <2 x i64> @test_mm_min_epi16(<2 x i64> %a0, <2 x i64> %a1) nounwind {
1664; X32-LABEL: test_mm_min_epi16:
1665; X32: # BB#0:
1666; X32-NEXT: pminsw %xmm1, %xmm0
1667; X32-NEXT: retl
1668;
1669; X64-LABEL: test_mm_min_epi16:
1670; X64: # BB#0:
1671; X64-NEXT: pminsw %xmm1, %xmm0
1672; X64-NEXT: retq
1673 %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
1674 %arg1 = bitcast <2 x i64> %a1 to <8 x i16>
1675 %res = call <8 x i16> @llvm.x86.sse2.pmins.w(<8 x i16> %arg0, <8 x i16> %arg1)
1676 %bc = bitcast <8 x i16> %res to <2 x i64>
1677 ret <2 x i64> %bc
1678}
1679declare <8 x i16> @llvm.x86.sse2.pmins.w(<8 x i16>, <8 x i16>) nounwind readnone
1680
1681define <2 x i64> @test_mm_min_epu8(<2 x i64> %a0, <2 x i64> %a1) nounwind {
1682; X32-LABEL: test_mm_min_epu8:
1683; X32: # BB#0:
1684; X32-NEXT: pminub %xmm1, %xmm0
1685; X32-NEXT: retl
1686;
1687; X64-LABEL: test_mm_min_epu8:
1688; X64: # BB#0:
1689; X64-NEXT: pminub %xmm1, %xmm0
1690; X64-NEXT: retq
1691 %arg0 = bitcast <2 x i64> %a0 to <16 x i8>
1692 %arg1 = bitcast <2 x i64> %a1 to <16 x i8>
1693 %res = call <16 x i8> @llvm.x86.sse2.pminu.b(<16 x i8> %arg0, <16 x i8> %arg1)
1694 %bc = bitcast <16 x i8> %res to <2 x i64>
1695 ret <2 x i64> %bc
1696}
1697declare <16 x i8> @llvm.x86.sse2.pminu.b(<16 x i8>, <16 x i8>) nounwind readnone
1698
1699define <2 x double> @test_mm_min_pd(<2 x double> %a0, <2 x double> %a1) nounwind {
1700; X32-LABEL: test_mm_min_pd:
1701; X32: # BB#0:
1702; X32-NEXT: minpd %xmm1, %xmm0
1703; X32-NEXT: retl
1704;
1705; X64-LABEL: test_mm_min_pd:
1706; X64: # BB#0:
1707; X64-NEXT: minpd %xmm1, %xmm0
1708; X64-NEXT: retq
1709 %res = call <2 x double> @llvm.x86.sse2.min.pd(<2 x double> %a0, <2 x double> %a1)
1710 ret <2 x double> %res
1711}
1712declare <2 x double> @llvm.x86.sse2.min.pd(<2 x double>, <2 x double>) nounwind readnone
1713
1714define <2 x double> @test_mm_min_sd(<2 x double> %a0, <2 x double> %a1) nounwind {
1715; X32-LABEL: test_mm_min_sd:
1716; X32: # BB#0:
1717; X32-NEXT: minsd %xmm1, %xmm0
1718; X32-NEXT: retl
1719;
1720; X64-LABEL: test_mm_min_sd:
1721; X64: # BB#0:
1722; X64-NEXT: minsd %xmm1, %xmm0
1723; X64-NEXT: retq
1724 %res = call <2 x double> @llvm.x86.sse2.min.sd(<2 x double> %a0, <2 x double> %a1)
1725 ret <2 x double> %res
1726}
1727declare <2 x double> @llvm.x86.sse2.min.sd(<2 x double>, <2 x double>) nounwind readnone
1728
Simon Pilgrim47825fa2016-05-19 11:59:57 +00001729define <2 x i64> @test_mm_move_epi64(<2 x i64> %a0) nounwind {
1730; X32-LABEL: test_mm_move_epi64:
1731; X32: # BB#0:
1732; X32-NEXT: movq {{.*#+}} xmm0 = xmm0[0],zero
1733; X32-NEXT: retl
1734;
1735; X64-LABEL: test_mm_move_epi64:
1736; X64: # BB#0:
1737; X64-NEXT: movq {{.*#+}} xmm0 = xmm0[0],zero
1738; X64-NEXT: retq
1739 %res = shufflevector <2 x i64> %a0, <2 x i64> zeroinitializer, <2 x i32> <i32 0, i32 2>
1740 ret <2 x i64> %res
1741}
1742
1743define <2 x double> @test_mm_move_sd(<2 x double> %a0, <2 x double> %a1) nounwind {
1744; X32-LABEL: test_mm_move_sd:
1745; X32: # BB#0:
1746; X32-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
1747; X32-NEXT: retl
1748;
1749; X64-LABEL: test_mm_move_sd:
1750; X64: # BB#0:
1751; X64-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
1752; X64-NEXT: retq
1753 %ext0 = extractelement <2 x double> %a1, i32 0
1754 %res0 = insertelement <2 x double> undef, double %ext0, i32 0
1755 %ext1 = extractelement <2 x double> %a0, i32 1
1756 %res1 = insertelement <2 x double> %res0, double %ext1, i32 1
1757 ret <2 x double> %res1
1758}
1759
Simon Pilgrim5a0d7282016-05-18 18:00:43 +00001760define i32 @test_mm_movemask_epi8(<2 x i64> %a0) nounwind {
1761; X32-LABEL: test_mm_movemask_epi8:
1762; X32: # BB#0:
1763; X32-NEXT: pmovmskb %xmm0, %eax
1764; X32-NEXT: retl
1765;
1766; X64-LABEL: test_mm_movemask_epi8:
1767; X64: # BB#0:
1768; X64-NEXT: pmovmskb %xmm0, %eax
1769; X64-NEXT: retq
1770 %arg0 = bitcast <2 x i64> %a0 to <16 x i8>
1771 %res = call i32 @llvm.x86.sse2.pmovmskb.128(<16 x i8> %arg0)
1772 ret i32 %res
1773}
1774declare i32 @llvm.x86.sse2.pmovmskb.128(<16 x i8>) nounwind readnone
1775
1776define i32 @test_mm_movemask_pd(<2 x double> %a0) nounwind {
1777; X32-LABEL: test_mm_movemask_pd:
1778; X32: # BB#0:
1779; X32-NEXT: movmskpd %xmm0, %eax
1780; X32-NEXT: retl
1781;
1782; X64-LABEL: test_mm_movemask_pd:
1783; X64: # BB#0:
1784; X64-NEXT: movmskpd %xmm0, %eax
1785; X64-NEXT: retq
1786 %res = call i32 @llvm.x86.sse2.movmsk.pd(<2 x double> %a0)
1787 ret i32 %res
1788}
1789declare i32 @llvm.x86.sse2.movmsk.pd(<2 x double>) nounwind readnone
1790
1791define <2 x i64> @test_mm_mul_epu32(<2 x i64> %a0, <2 x i64> %a1) {
1792; X32-LABEL: test_mm_mul_epu32:
1793; X32: # BB#0:
1794; X32-NEXT: pmuludq %xmm1, %xmm0
1795; X32-NEXT: retl
1796;
1797; X64-LABEL: test_mm_mul_epu32:
1798; X64: # BB#0:
1799; X64-NEXT: pmuludq %xmm1, %xmm0
1800; X64-NEXT: retq
1801 %arg0 = bitcast <2 x i64> %a0 to <4 x i32>
1802 %arg1 = bitcast <2 x i64> %a1 to <4 x i32>
1803 %res = call <2 x i64> @llvm.x86.sse2.pmulu.dq(<4 x i32> %arg0, <4 x i32> %arg1)
1804 ret <2 x i64> %res
1805}
1806declare <2 x i64> @llvm.x86.sse2.pmulu.dq(<4 x i32>, <4 x i32>) nounwind readnone
1807
1808define <2 x double> @test_mm_mul_pd(<2 x double> %a0, <2 x double> %a1) nounwind {
1809; X32-LABEL: test_mm_mul_pd:
1810; X32: # BB#0:
1811; X32-NEXT: mulpd %xmm1, %xmm0
1812; X32-NEXT: retl
1813;
1814; X64-LABEL: test_mm_mul_pd:
1815; X64: # BB#0:
1816; X64-NEXT: mulpd %xmm1, %xmm0
1817; X64-NEXT: retq
1818 %res = fmul <2 x double> %a0, %a1
1819 ret <2 x double> %res
1820}
1821
1822define <2 x double> @test_mm_mul_sd(<2 x double> %a0, <2 x double> %a1) nounwind {
1823; X32-LABEL: test_mm_mul_sd:
1824; X32: # BB#0:
1825; X32-NEXT: mulsd %xmm1, %xmm0
1826; X32-NEXT: retl
1827;
1828; X64-LABEL: test_mm_mul_sd:
1829; X64: # BB#0:
1830; X64-NEXT: mulsd %xmm1, %xmm0
1831; X64-NEXT: retq
1832 %ext0 = extractelement <2 x double> %a0, i32 0
1833 %ext1 = extractelement <2 x double> %a1, i32 0
1834 %fmul = fmul double %ext0, %ext1
1835 %res = insertelement <2 x double> %a0, double %fmul, i32 0
1836 ret <2 x double> %res
1837}
1838
1839define <2 x i64> @test_mm_mulhi_epi16(<2 x i64> %a0, <2 x i64> %a1) {
1840; X32-LABEL: test_mm_mulhi_epi16:
1841; X32: # BB#0:
1842; X32-NEXT: pmulhw %xmm1, %xmm0
1843; X32-NEXT: retl
1844;
1845; X64-LABEL: test_mm_mulhi_epi16:
1846; X64: # BB#0:
1847; X64-NEXT: pmulhw %xmm1, %xmm0
1848; X64-NEXT: retq
1849 %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
1850 %arg1 = bitcast <2 x i64> %a1 to <8 x i16>
1851 %res = call <8 x i16> @llvm.x86.sse2.pmulh.w(<8 x i16> %arg0, <8 x i16> %arg1)
1852 %bc = bitcast <8 x i16> %res to <2 x i64>
1853 ret <2 x i64> %bc
1854}
1855declare <8 x i16> @llvm.x86.sse2.pmulh.w(<8 x i16>, <8 x i16>) nounwind readnone
1856
1857define <2 x i64> @test_mm_mulhi_epu16(<2 x i64> %a0, <2 x i64> %a1) {
1858; X32-LABEL: test_mm_mulhi_epu16:
1859; X32: # BB#0:
1860; X32-NEXT: pmulhuw %xmm1, %xmm0
1861; X32-NEXT: retl
1862;
1863; X64-LABEL: test_mm_mulhi_epu16:
1864; X64: # BB#0:
1865; X64-NEXT: pmulhuw %xmm1, %xmm0
1866; X64-NEXT: retq
1867 %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
1868 %arg1 = bitcast <2 x i64> %a1 to <8 x i16>
1869 %res = call <8 x i16> @llvm.x86.sse2.pmulhu.w(<8 x i16> %arg0, <8 x i16> %arg1)
1870 %bc = bitcast <8 x i16> %res to <2 x i64>
1871 ret <2 x i64> %bc
1872}
1873declare <8 x i16> @llvm.x86.sse2.pmulhu.w(<8 x i16>, <8 x i16>) nounwind readnone
1874
1875define <2 x i64> @test_mm_mullo_epi16(<2 x i64> %a0, <2 x i64> %a1) {
1876; X32-LABEL: test_mm_mullo_epi16:
1877; X32: # BB#0:
1878; X32-NEXT: pmullw %xmm1, %xmm0
1879; X32-NEXT: retl
1880;
1881; X64-LABEL: test_mm_mullo_epi16:
1882; X64: # BB#0:
1883; X64-NEXT: pmullw %xmm1, %xmm0
1884; X64-NEXT: retq
1885 %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
1886 %arg1 = bitcast <2 x i64> %a1 to <8 x i16>
1887 %res = mul <8 x i16> %arg0, %arg1
1888 %bc = bitcast <8 x i16> %res to <2 x i64>
1889 ret <2 x i64> %bc
1890}
1891
1892define <2 x double> @test_mm_or_pd(<2 x double> %a0, <2 x double> %a1) nounwind {
1893; X32-LABEL: test_mm_or_pd:
1894; X32: # BB#0:
1895; X32-NEXT: orps %xmm1, %xmm0
1896; X32-NEXT: retl
1897;
1898; X64-LABEL: test_mm_or_pd:
1899; X64: # BB#0:
1900; X64-NEXT: orps %xmm1, %xmm0
1901; X64-NEXT: retq
1902 %arg0 = bitcast <2 x double> %a0 to <4 x i32>
1903 %arg1 = bitcast <2 x double> %a1 to <4 x i32>
1904 %res = or <4 x i32> %arg0, %arg1
1905 %bc = bitcast <4 x i32> %res to <2 x double>
1906 ret <2 x double> %bc
1907}
1908
1909define <2 x i64> @test_mm_or_si128(<2 x i64> %a0, <2 x i64> %a1) nounwind {
1910; X32-LABEL: test_mm_or_si128:
1911; X32: # BB#0:
1912; X32-NEXT: orps %xmm1, %xmm0
1913; X32-NEXT: retl
1914;
1915; X64-LABEL: test_mm_or_si128:
1916; X64: # BB#0:
1917; X64-NEXT: orps %xmm1, %xmm0
1918; X64-NEXT: retq
1919 %res = or <2 x i64> %a0, %a1
1920 ret <2 x i64> %res
1921}
1922
1923define <2 x i64> @test_mm_packs_epi16(<2 x i64> %a0, <2 x i64> %a1) {
1924; X32-LABEL: test_mm_packs_epi16:
1925; X32: # BB#0:
1926; X32-NEXT: packsswb %xmm1, %xmm0
1927; X32-NEXT: retl
1928;
1929; X64-LABEL: test_mm_packs_epi16:
1930; X64: # BB#0:
1931; X64-NEXT: packsswb %xmm1, %xmm0
1932; X64-NEXT: retq
1933 %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
1934 %arg1 = bitcast <2 x i64> %a1 to <8 x i16>
1935 %res = call <16 x i8> @llvm.x86.sse2.packsswb.128(<8 x i16> %arg0, <8 x i16> %arg1)
1936 %bc = bitcast <16 x i8> %res to <2 x i64>
1937 ret <2 x i64> %bc
1938}
1939declare <16 x i8> @llvm.x86.sse2.packsswb.128(<8 x i16>, <8 x i16>) nounwind readnone
1940
1941define <2 x i64> @test_mm_packs_epi32(<2 x i64> %a0, <2 x i64> %a1) {
1942; X32-LABEL: test_mm_packs_epi32:
1943; X32: # BB#0:
1944; X32-NEXT: packssdw %xmm1, %xmm0
1945; X32-NEXT: retl
1946;
1947; X64-LABEL: test_mm_packs_epi32:
1948; X64: # BB#0:
1949; X64-NEXT: packssdw %xmm1, %xmm0
1950; X64-NEXT: retq
1951 %arg0 = bitcast <2 x i64> %a0 to <4 x i32>
1952 %arg1 = bitcast <2 x i64> %a1 to <4 x i32>
1953 %res = call <8 x i16> @llvm.x86.sse2.packssdw.128(<4 x i32> %arg0, <4 x i32> %arg1)
1954 %bc = bitcast <8 x i16> %res to <2 x i64>
1955 ret <2 x i64> %bc
1956}
1957declare <8 x i16> @llvm.x86.sse2.packssdw.128(<4 x i32>, <4 x i32>) nounwind readnone
1958
1959define <2 x i64> @test_mm_packus_epi16(<2 x i64> %a0, <2 x i64> %a1) {
1960; X32-LABEL: test_mm_packus_epi16:
1961; X32: # BB#0:
1962; X32-NEXT: packuswb %xmm1, %xmm0
1963; X32-NEXT: retl
1964;
1965; X64-LABEL: test_mm_packus_epi16:
1966; X64: # BB#0:
1967; X64-NEXT: packuswb %xmm1, %xmm0
1968; X64-NEXT: retq
1969 %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
1970 %arg1 = bitcast <2 x i64> %a1 to <8 x i16>
1971 %res = call <16 x i8> @llvm.x86.sse2.packuswb.128(<8 x i16> %arg0, <8 x i16> %arg1)
1972 %bc = bitcast <16 x i8> %res to <2 x i64>
1973 ret <2 x i64> %bc
1974}
1975declare <16 x i8> @llvm.x86.sse2.packuswb.128(<8 x i16>, <8 x i16>) nounwind readnone
1976
1977define void @test_mm_pause() nounwind {
1978; X32-LABEL: test_mm_pause:
1979; X32: # BB#0:
1980; X32-NEXT: pause
1981; X32-NEXT: retl
1982;
1983; X64-LABEL: test_mm_pause:
1984; X64: # BB#0:
1985; X64-NEXT: pause
1986; X64-NEXT: retq
1987 call void @llvm.x86.sse2.pause()
1988 ret void
1989}
1990declare void @llvm.x86.sse2.pause() nounwind readnone
1991
1992define <2 x i64> @test_mm_sad_epu8(<2 x i64> %a0, <2 x i64> %a1) nounwind {
1993; X32-LABEL: test_mm_sad_epu8:
1994; X32: # BB#0:
1995; X32-NEXT: psadbw %xmm1, %xmm0
1996; X32-NEXT: retl
1997;
1998; X64-LABEL: test_mm_sad_epu8:
1999; X64: # BB#0:
2000; X64-NEXT: psadbw %xmm1, %xmm0
2001; X64-NEXT: retq
2002 %arg0 = bitcast <2 x i64> %a0 to <16 x i8>
2003 %arg1 = bitcast <2 x i64> %a1 to <16 x i8>
2004 %res = call <2 x i64> @llvm.x86.sse2.psad.bw(<16 x i8> %arg0, <16 x i8> %arg1)
2005 ret <2 x i64> %res
2006}
2007declare <2 x i64> @llvm.x86.sse2.psad.bw(<16 x i8>, <16 x i8>) nounwind readnone
2008
Simon Pilgrim01809e02016-05-19 10:58:54 +00002009define <2 x i64> @test_mm_set_epi8(i8 %a0, i8 %a1, i8 %a2, i8 %a3, i8 %a4, i8 %a5, i8 %a6, i8 %a7, i8 %a8, i8 %a9, i8 %a10, i8 %a11, i8 %a12, i8 %a13, i8 %a14, i8 %a15) nounwind {
2010; X32-LABEL: test_mm_set_epi8:
2011; X32: # BB#0:
2012; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
2013; X32-NEXT: movd %eax, %xmm0
2014; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
2015; X32-NEXT: movd %eax, %xmm1
2016; X32-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
2017; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
2018; X32-NEXT: movd %eax, %xmm0
2019; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
2020; X32-NEXT: movd %eax, %xmm2
2021; X32-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
2022; X32-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3],xmm2[4],xmm1[4],xmm2[5],xmm1[5],xmm2[6],xmm1[6],xmm2[7],xmm1[7]
2023; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
2024; X32-NEXT: movd %eax, %xmm0
2025; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
2026; X32-NEXT: movd %eax, %xmm3
2027; X32-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3],xmm3[4],xmm0[4],xmm3[5],xmm0[5],xmm3[6],xmm0[6],xmm3[7],xmm0[7]
2028; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
2029; X32-NEXT: movd %eax, %xmm0
2030; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
2031; X32-NEXT: movd %eax, %xmm1
2032; X32-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
2033; X32-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1],xmm1[2],xmm3[2],xmm1[3],xmm3[3],xmm1[4],xmm3[4],xmm1[5],xmm3[5],xmm1[6],xmm3[6],xmm1[7],xmm3[7]
2034; X32-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
2035; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
2036; X32-NEXT: movd %eax, %xmm0
2037; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
2038; X32-NEXT: movd %eax, %xmm2
2039; X32-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
2040; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
2041; X32-NEXT: movd %eax, %xmm0
2042; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
2043; X32-NEXT: movd %eax, %xmm3
2044; X32-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3],xmm3[4],xmm0[4],xmm3[5],xmm0[5],xmm3[6],xmm0[6],xmm3[7],xmm0[7]
2045; X32-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3],xmm3[4],xmm2[4],xmm3[5],xmm2[5],xmm3[6],xmm2[6],xmm3[7],xmm2[7]
2046; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
2047; X32-NEXT: movd %eax, %xmm0
2048; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
2049; X32-NEXT: movd %eax, %xmm2
2050; X32-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
2051; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
2052; X32-NEXT: movd %eax, %xmm4
2053; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
2054; X32-NEXT: movd %eax, %xmm0
2055; X32-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1],xmm0[2],xmm4[2],xmm0[3],xmm4[3],xmm0[4],xmm4[4],xmm0[5],xmm4[5],xmm0[6],xmm4[6],xmm0[7],xmm4[7]
2056; X32-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7]
2057; X32-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3],xmm0[4],xmm3[4],xmm0[5],xmm3[5],xmm0[6],xmm3[6],xmm0[7],xmm3[7]
2058; X32-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
2059; X32-NEXT: retl
2060;
2061; X64-LABEL: test_mm_set_epi8:
2062; X64: # BB#0:
2063; X64-NEXT: movzbl %dil, %eax
2064; X64-NEXT: movd %eax, %xmm0
2065; X64-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
2066; X64-NEXT: movd %eax, %xmm1
2067; X64-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
2068; X64-NEXT: movzbl %r8b, %eax
2069; X64-NEXT: movd %eax, %xmm0
2070; X64-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
2071; X64-NEXT: movd %eax, %xmm2
2072; X64-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
2073; X64-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3],xmm2[4],xmm1[4],xmm2[5],xmm1[5],xmm2[6],xmm1[6],xmm2[7],xmm1[7]
2074; X64-NEXT: movzbl %dl, %eax
2075; X64-NEXT: movd %eax, %xmm0
2076; X64-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
2077; X64-NEXT: movd %eax, %xmm3
2078; X64-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3],xmm3[4],xmm0[4],xmm3[5],xmm0[5],xmm3[6],xmm0[6],xmm3[7],xmm0[7]
2079; X64-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
2080; X64-NEXT: movd %eax, %xmm0
2081; X64-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
2082; X64-NEXT: movd %eax, %xmm1
2083; X64-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
2084; X64-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1],xmm1[2],xmm3[2],xmm1[3],xmm3[3],xmm1[4],xmm3[4],xmm1[5],xmm3[5],xmm1[6],xmm3[6],xmm1[7],xmm3[7]
2085; X64-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
2086; X64-NEXT: movzbl %sil, %eax
2087; X64-NEXT: movd %eax, %xmm0
2088; X64-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
2089; X64-NEXT: movd %eax, %xmm2
2090; X64-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
2091; X64-NEXT: movzbl %r9b, %eax
2092; X64-NEXT: movd %eax, %xmm0
2093; X64-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
2094; X64-NEXT: movd %eax, %xmm3
2095; X64-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3],xmm3[4],xmm0[4],xmm3[5],xmm0[5],xmm3[6],xmm0[6],xmm3[7],xmm0[7]
2096; X64-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3],xmm3[4],xmm2[4],xmm3[5],xmm2[5],xmm3[6],xmm2[6],xmm3[7],xmm2[7]
2097; X64-NEXT: movzbl %cl, %eax
2098; X64-NEXT: movd %eax, %xmm0
2099; X64-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
2100; X64-NEXT: movd %eax, %xmm2
2101; X64-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
2102; X64-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
2103; X64-NEXT: movd %eax, %xmm4
2104; X64-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
2105; X64-NEXT: movd %eax, %xmm0
2106; X64-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1],xmm0[2],xmm4[2],xmm0[3],xmm4[3],xmm0[4],xmm4[4],xmm0[5],xmm4[5],xmm0[6],xmm4[6],xmm0[7],xmm4[7]
2107; X64-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7]
2108; X64-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3],xmm0[4],xmm3[4],xmm0[5],xmm3[5],xmm0[6],xmm3[6],xmm0[7],xmm3[7]
2109; X64-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
2110; X64-NEXT: retq
2111 %res0 = insertelement <16 x i8> undef, i8 %a15, i32 0
2112 %res1 = insertelement <16 x i8> %res0, i8 %a14, i32 1
2113 %res2 = insertelement <16 x i8> %res1, i8 %a13, i32 2
2114 %res3 = insertelement <16 x i8> %res2, i8 %a12, i32 3
2115 %res4 = insertelement <16 x i8> %res3, i8 %a11, i32 4
2116 %res5 = insertelement <16 x i8> %res4, i8 %a10, i32 5
2117 %res6 = insertelement <16 x i8> %res5, i8 %a9 , i32 6
2118 %res7 = insertelement <16 x i8> %res6, i8 %a8 , i32 7
2119 %res8 = insertelement <16 x i8> %res7, i8 %a7 , i32 8
2120 %res9 = insertelement <16 x i8> %res8, i8 %a6 , i32 9
2121 %res10 = insertelement <16 x i8> %res9, i8 %a5 , i32 10
2122 %res11 = insertelement <16 x i8> %res10, i8 %a4 , i32 11
2123 %res12 = insertelement <16 x i8> %res11, i8 %a3 , i32 12
2124 %res13 = insertelement <16 x i8> %res12, i8 %a2 , i32 13
2125 %res14 = insertelement <16 x i8> %res13, i8 %a1 , i32 14
2126 %res15 = insertelement <16 x i8> %res14, i8 %a0 , i32 15
2127 %res = bitcast <16 x i8> %res15 to <2 x i64>
2128 ret <2 x i64> %res
2129}
2130
2131define <2 x i64> @test_mm_set_epi16(i16 %a0, i16 %a1, i16 %a2, i16 %a3, i16 %a4, i16 %a5, i16 %a6, i16 %a7) nounwind {
2132; X32-LABEL: test_mm_set_epi16:
2133; X32: # BB#0:
2134; X32-NEXT: movw {{[0-9]+}}(%esp), %ax
2135; X32-NEXT: movd %eax, %xmm1
2136; X32-NEXT: movw {{[0-9]+}}(%esp), %ax
2137; X32-NEXT: movd %eax, %xmm2
2138; X32-NEXT: movw {{[0-9]+}}(%esp), %ax
2139; X32-NEXT: movd %eax, %xmm3
2140; X32-NEXT: movw {{[0-9]+}}(%esp), %ax
2141; X32-NEXT: movd %eax, %xmm4
2142; X32-NEXT: movw {{[0-9]+}}(%esp), %ax
2143; X32-NEXT: movd %eax, %xmm5
2144; X32-NEXT: movw {{[0-9]+}}(%esp), %ax
2145; X32-NEXT: movd %eax, %xmm6
2146; X32-NEXT: movw {{[0-9]+}}(%esp), %ax
2147; X32-NEXT: movd %eax, %xmm7
2148; X32-NEXT: movw {{[0-9]+}}(%esp), %ax
2149; X32-NEXT: movd %eax, %xmm0
2150; X32-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3]
2151; X32-NEXT: punpcklwd {{.*#+}} xmm4 = xmm4[0],xmm3[0],xmm4[1],xmm3[1],xmm4[2],xmm3[2],xmm4[3],xmm3[3]
2152; X32-NEXT: punpcklwd {{.*#+}} xmm4 = xmm4[0],xmm2[0],xmm4[1],xmm2[1],xmm4[2],xmm2[2],xmm4[3],xmm2[3]
2153; X32-NEXT: punpcklwd {{.*#+}} xmm6 = xmm6[0],xmm5[0],xmm6[1],xmm5[1],xmm6[2],xmm5[2],xmm6[3],xmm5[3]
2154; X32-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm7[0],xmm0[1],xmm7[1],xmm0[2],xmm7[2],xmm0[3],xmm7[3]
2155; X32-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm6[0],xmm0[1],xmm6[1],xmm0[2],xmm6[2],xmm0[3],xmm6[3]
2156; X32-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1],xmm0[2],xmm4[2],xmm0[3],xmm4[3]
2157; X32-NEXT: retl
2158;
2159; X64-LABEL: test_mm_set_epi16:
2160; X64: # BB#0:
2161; X64-NEXT: movw {{[0-9]+}}(%rsp), %r10w
2162; X64-NEXT: movw {{[0-9]+}}(%rsp), %ax
2163; X64-NEXT: movd %edi, %xmm0
2164; X64-NEXT: movd %r8d, %xmm1
2165; X64-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
2166; X64-NEXT: movd %edx, %xmm0
2167; X64-NEXT: movd %eax, %xmm2
2168; X64-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3]
2169; X64-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3]
2170; X64-NEXT: movd %esi, %xmm0
2171; X64-NEXT: movd %r9d, %xmm1
2172; X64-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
2173; X64-NEXT: movd %ecx, %xmm3
2174; X64-NEXT: movd %r10d, %xmm0
2175; X64-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3]
2176; X64-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
2177; X64-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
2178; X64-NEXT: retq
2179 %res0 = insertelement <8 x i16> undef, i16 %a7, i32 0
2180 %res1 = insertelement <8 x i16> %res0, i16 %a6, i32 1
2181 %res2 = insertelement <8 x i16> %res1, i16 %a5, i32 2
2182 %res3 = insertelement <8 x i16> %res2, i16 %a4, i32 3
2183 %res4 = insertelement <8 x i16> %res3, i16 %a3, i32 4
2184 %res5 = insertelement <8 x i16> %res4, i16 %a2, i32 5
2185 %res6 = insertelement <8 x i16> %res5, i16 %a1, i32 6
2186 %res7 = insertelement <8 x i16> %res6, i16 %a0, i32 7
2187 %res = bitcast <8 x i16> %res7 to <2 x i64>
2188 ret <2 x i64> %res
2189}
2190
2191define <2 x i64> @test_mm_set_epi32(i32 %a0, i32 %a1, i32 %a2, i32 %a3) nounwind {
2192; X32-LABEL: test_mm_set_epi32:
2193; X32: # BB#0:
2194; X32-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
2195; X32-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero
2196; X32-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
2197; X32-NEXT: movd {{.*#+}} xmm2 = mem[0],zero,zero,zero
2198; X32-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
2199; X32-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
2200; X32-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
2201; X32-NEXT: retl
2202;
2203; X64-LABEL: test_mm_set_epi32:
2204; X64: # BB#0:
2205; X64-NEXT: movd %edi, %xmm0
2206; X64-NEXT: movd %edx, %xmm1
2207; X64-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
2208; X64-NEXT: movd %esi, %xmm2
2209; X64-NEXT: movd %ecx, %xmm0
2210; X64-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
2211; X64-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
2212; X64-NEXT: retq
2213 %res0 = insertelement <4 x i32> undef, i32 %a3, i32 0
2214 %res1 = insertelement <4 x i32> %res0, i32 %a2, i32 1
2215 %res2 = insertelement <4 x i32> %res1, i32 %a1, i32 2
2216 %res3 = insertelement <4 x i32> %res2, i32 %a0, i32 3
2217 %res = bitcast <4 x i32> %res3 to <2 x i64>
2218 ret <2 x i64> %res
2219}
2220
2221; TODO test_mm_set_epi64
2222
2223define <2 x i64> @test_mm_set_epi64x(i64 %a0, i64 %a1) nounwind {
2224; X32-LABEL: test_mm_set_epi64x:
2225; X32: # BB#0:
2226; X32-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero
2227; X32-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
2228; X32-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
2229; X32-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero
2230; X32-NEXT: movd {{.*#+}} xmm2 = mem[0],zero,zero,zero
2231; X32-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
2232; X32-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
2233; X32-NEXT: retl
2234;
2235; X64-LABEL: test_mm_set_epi64x:
2236; X64: # BB#0:
2237; X64-NEXT: movd %rdi, %xmm1
2238; X64-NEXT: movd %rsi, %xmm0
2239; X64-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
2240; X64-NEXT: retq
2241 %res0 = insertelement <2 x i64> undef, i64 %a1, i32 0
2242 %res1 = insertelement <2 x i64> %res0, i64 %a0, i32 1
2243 ret <2 x i64> %res1
2244}
2245
2246define <2 x double> @test_mm_set_pd(double %a0, double %a1) nounwind {
2247; X32-LABEL: test_mm_set_pd:
2248; X32: # BB#0:
2249; X32-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
2250; X32-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero
2251; X32-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
2252; X32-NEXT: retl
2253;
2254; X64-LABEL: test_mm_set_pd:
2255; X64: # BB#0:
2256; X64-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0],xmm0[0]
2257; X64-NEXT: movapd %xmm1, %xmm0
2258; X64-NEXT: retq
2259 %res0 = insertelement <2 x double> undef, double %a1, i32 0
2260 %res1 = insertelement <2 x double> %res0, double %a0, i32 1
2261 ret <2 x double> %res1
2262}
2263
2264define <2 x double> @test_mm_set_sd(double %a0) nounwind {
2265; X32-LABEL: test_mm_set_sd:
2266; X32: # BB#0:
2267; X32-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
2268; X32-NEXT: movq {{.*#+}} xmm0 = xmm0[0],zero
2269; X32-NEXT: retl
2270;
2271; X64-LABEL: test_mm_set_sd:
2272; X64: # BB#0:
2273; X64-NEXT: movq {{.*#+}} xmm0 = xmm0[0],zero
2274; X64-NEXT: retq
2275 %res0 = insertelement <2 x double> undef, double %a0, i32 0
2276 %res1 = insertelement <2 x double> %res0, double 0.0, i32 1
2277 ret <2 x double> %res1
2278}
2279
2280define <2 x i64> @test_mm_set1_epi8(i8 %a0) nounwind {
2281; X32-LABEL: test_mm_set1_epi8:
2282; X32: # BB#0:
2283; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
2284; X32-NEXT: movd %eax, %xmm0
2285; X32-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
2286; X32-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
2287; X32-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
2288; X32-NEXT: retl
2289;
2290; X64-LABEL: test_mm_set1_epi8:
2291; X64: # BB#0:
2292; X64-NEXT: movzbl %dil, %eax
2293; X64-NEXT: movd %eax, %xmm0
2294; X64-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
2295; X64-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
2296; X64-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
2297; X64-NEXT: retq
2298 %res0 = insertelement <16 x i8> undef, i8 %a0, i32 0
2299 %res1 = insertelement <16 x i8> %res0, i8 %a0, i32 1
2300 %res2 = insertelement <16 x i8> %res1, i8 %a0, i32 2
2301 %res3 = insertelement <16 x i8> %res2, i8 %a0, i32 3
2302 %res4 = insertelement <16 x i8> %res3, i8 %a0, i32 4
2303 %res5 = insertelement <16 x i8> %res4, i8 %a0, i32 5
2304 %res6 = insertelement <16 x i8> %res5, i8 %a0, i32 6
2305 %res7 = insertelement <16 x i8> %res6, i8 %a0, i32 7
2306 %res8 = insertelement <16 x i8> %res7, i8 %a0, i32 8
2307 %res9 = insertelement <16 x i8> %res8, i8 %a0, i32 9
2308 %res10 = insertelement <16 x i8> %res9, i8 %a0, i32 10
2309 %res11 = insertelement <16 x i8> %res10, i8 %a0, i32 11
2310 %res12 = insertelement <16 x i8> %res11, i8 %a0, i32 12
2311 %res13 = insertelement <16 x i8> %res12, i8 %a0, i32 13
2312 %res14 = insertelement <16 x i8> %res13, i8 %a0, i32 14
2313 %res15 = insertelement <16 x i8> %res14, i8 %a0, i32 15
2314 %res = bitcast <16 x i8> %res15 to <2 x i64>
2315 ret <2 x i64> %res
2316}
2317
2318define <2 x i64> @test_mm_set1_epi16(i16 %a0) nounwind {
2319; X32-LABEL: test_mm_set1_epi16:
2320; X32: # BB#0:
2321; X32-NEXT: movw {{[0-9]+}}(%esp), %ax
2322; X32-NEXT: movd %eax, %xmm0
2323; X32-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
2324; X32-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
2325; X32-NEXT: retl
2326;
2327; X64-LABEL: test_mm_set1_epi16:
2328; X64: # BB#0:
2329; X64-NEXT: movd %edi, %xmm0
2330; X64-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
2331; X64-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
2332; X64-NEXT: retq
2333 %res0 = insertelement <8 x i16> undef, i16 %a0, i32 0
2334 %res1 = insertelement <8 x i16> %res0, i16 %a0, i32 1
2335 %res2 = insertelement <8 x i16> %res1, i16 %a0, i32 2
2336 %res3 = insertelement <8 x i16> %res2, i16 %a0, i32 3
2337 %res4 = insertelement <8 x i16> %res3, i16 %a0, i32 4
2338 %res5 = insertelement <8 x i16> %res4, i16 %a0, i32 5
2339 %res6 = insertelement <8 x i16> %res5, i16 %a0, i32 6
2340 %res7 = insertelement <8 x i16> %res6, i16 %a0, i32 7
2341 %res = bitcast <8 x i16> %res7 to <2 x i64>
2342 ret <2 x i64> %res
2343}
2344
2345define <2 x i64> @test_mm_set1_epi32(i32 %a0) nounwind {
2346; X32-LABEL: test_mm_set1_epi32:
2347; X32: # BB#0:
2348; X32-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
2349; X32-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
2350; X32-NEXT: retl
2351;
2352; X64-LABEL: test_mm_set1_epi32:
2353; X64: # BB#0:
2354; X64-NEXT: movd %edi, %xmm0
2355; X64-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
2356; X64-NEXT: retq
2357 %res0 = insertelement <4 x i32> undef, i32 %a0, i32 0
2358 %res1 = insertelement <4 x i32> %res0, i32 %a0, i32 1
2359 %res2 = insertelement <4 x i32> %res1, i32 %a0, i32 2
2360 %res3 = insertelement <4 x i32> %res2, i32 %a0, i32 3
2361 %res = bitcast <4 x i32> %res3 to <2 x i64>
2362 ret <2 x i64> %res
2363}
2364
2365; TODO test_mm_set1_epi64
2366
2367define <2 x i64> @test_mm_set1_epi64x(i64 %a0) nounwind {
2368; X32-LABEL: test_mm_set1_epi64x:
2369; X32: # BB#0:
2370; X32-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
2371; X32-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
2372; X32-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero
2373; X32-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,0,1,1]
2374; X32-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
2375; X32-NEXT: retl
2376;
2377; X64-LABEL: test_mm_set1_epi64x:
2378; X64: # BB#0:
2379; X64-NEXT: movd %rdi, %xmm0
2380; X64-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
2381; X64-NEXT: retq
2382 %res0 = insertelement <2 x i64> undef, i64 %a0, i32 0
2383 %res1 = insertelement <2 x i64> %res0, i64 %a0, i32 1
2384 ret <2 x i64> %res1
2385}
2386
2387define <2 x double> @test_mm_set1_pd(double %a0) nounwind {
2388; X32-LABEL: test_mm_set1_pd:
2389; X32: # BB#0:
2390; X32-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
2391; X32-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0,0]
2392; X32-NEXT: retl
2393;
2394; X64-LABEL: test_mm_set1_pd:
2395; X64: # BB#0:
2396; X64-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0,0]
2397; X64-NEXT: retq
2398 %res0 = insertelement <2 x double> undef, double %a0, i32 0
2399 %res1 = insertelement <2 x double> %res0, double %a0, i32 1
2400 ret <2 x double> %res1
2401}
2402
2403define <2 x i64> @test_mm_setr_epi8(i8 %a0, i8 %a1, i8 %a2, i8 %a3, i8 %a4, i8 %a5, i8 %a6, i8 %a7, i8 %a8, i8 %a9, i8 %a10, i8 %a11, i8 %a12, i8 %a13, i8 %a14, i8 %a15) nounwind {
2404; X32-LABEL: test_mm_setr_epi8:
2405; X32: # BB#0:
2406; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
2407; X32-NEXT: movd %eax, %xmm0
2408; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
2409; X32-NEXT: movd %eax, %xmm1
2410; X32-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
2411; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
2412; X32-NEXT: movd %eax, %xmm0
2413; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
2414; X32-NEXT: movd %eax, %xmm2
2415; X32-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
2416; X32-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3],xmm2[4],xmm1[4],xmm2[5],xmm1[5],xmm2[6],xmm1[6],xmm2[7],xmm1[7]
2417; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
2418; X32-NEXT: movd %eax, %xmm0
2419; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
2420; X32-NEXT: movd %eax, %xmm3
2421; X32-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3],xmm3[4],xmm0[4],xmm3[5],xmm0[5],xmm3[6],xmm0[6],xmm3[7],xmm0[7]
2422; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
2423; X32-NEXT: movd %eax, %xmm0
2424; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
2425; X32-NEXT: movd %eax, %xmm1
2426; X32-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
2427; X32-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1],xmm1[2],xmm3[2],xmm1[3],xmm3[3],xmm1[4],xmm3[4],xmm1[5],xmm3[5],xmm1[6],xmm3[6],xmm1[7],xmm3[7]
2428; X32-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
2429; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
2430; X32-NEXT: movd %eax, %xmm0
2431; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
2432; X32-NEXT: movd %eax, %xmm2
2433; X32-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
2434; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
2435; X32-NEXT: movd %eax, %xmm0
2436; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
2437; X32-NEXT: movd %eax, %xmm3
2438; X32-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3],xmm3[4],xmm0[4],xmm3[5],xmm0[5],xmm3[6],xmm0[6],xmm3[7],xmm0[7]
2439; X32-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3],xmm3[4],xmm2[4],xmm3[5],xmm2[5],xmm3[6],xmm2[6],xmm3[7],xmm2[7]
2440; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
2441; X32-NEXT: movd %eax, %xmm0
2442; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
2443; X32-NEXT: movd %eax, %xmm2
2444; X32-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
2445; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
2446; X32-NEXT: movd %eax, %xmm4
2447; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
2448; X32-NEXT: movd %eax, %xmm0
2449; X32-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1],xmm0[2],xmm4[2],xmm0[3],xmm4[3],xmm0[4],xmm4[4],xmm0[5],xmm4[5],xmm0[6],xmm4[6],xmm0[7],xmm4[7]
2450; X32-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7]
2451; X32-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3],xmm0[4],xmm3[4],xmm0[5],xmm3[5],xmm0[6],xmm3[6],xmm0[7],xmm3[7]
2452; X32-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
2453; X32-NEXT: retl
2454;
2455; X64-LABEL: test_mm_setr_epi8:
2456; X64: # BB#0:
2457; X64-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
2458; X64-NEXT: movd %eax, %xmm0
2459; X64-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
2460; X64-NEXT: movd %eax, %xmm1
2461; X64-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
2462; X64-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
2463; X64-NEXT: movd %eax, %xmm0
2464; X64-NEXT: movzbl %cl, %eax
2465; X64-NEXT: movd %eax, %xmm2
2466; X64-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
2467; X64-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3],xmm2[4],xmm1[4],xmm2[5],xmm1[5],xmm2[6],xmm1[6],xmm2[7],xmm1[7]
2468; X64-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
2469; X64-NEXT: movd %eax, %xmm0
2470; X64-NEXT: movzbl %r9b, %eax
2471; X64-NEXT: movd %eax, %xmm3
2472; X64-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3],xmm3[4],xmm0[4],xmm3[5],xmm0[5],xmm3[6],xmm0[6],xmm3[7],xmm0[7]
2473; X64-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
2474; X64-NEXT: movd %eax, %xmm0
2475; X64-NEXT: movzbl %sil, %eax
2476; X64-NEXT: movd %eax, %xmm1
2477; X64-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
2478; X64-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1],xmm1[2],xmm3[2],xmm1[3],xmm3[3],xmm1[4],xmm3[4],xmm1[5],xmm3[5],xmm1[6],xmm3[6],xmm1[7],xmm3[7]
2479; X64-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
2480; X64-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
2481; X64-NEXT: movd %eax, %xmm0
2482; X64-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
2483; X64-NEXT: movd %eax, %xmm2
2484; X64-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
2485; X64-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
2486; X64-NEXT: movd %eax, %xmm0
2487; X64-NEXT: movzbl %dl, %eax
2488; X64-NEXT: movd %eax, %xmm3
2489; X64-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3],xmm3[4],xmm0[4],xmm3[5],xmm0[5],xmm3[6],xmm0[6],xmm3[7],xmm0[7]
2490; X64-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3],xmm3[4],xmm2[4],xmm3[5],xmm2[5],xmm3[6],xmm2[6],xmm3[7],xmm2[7]
2491; X64-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
2492; X64-NEXT: movd %eax, %xmm0
2493; X64-NEXT: movzbl %r8b, %eax
2494; X64-NEXT: movd %eax, %xmm2
2495; X64-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
2496; X64-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
2497; X64-NEXT: movd %eax, %xmm4
2498; X64-NEXT: movzbl %dil, %eax
2499; X64-NEXT: movd %eax, %xmm0
2500; X64-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1],xmm0[2],xmm4[2],xmm0[3],xmm4[3],xmm0[4],xmm4[4],xmm0[5],xmm4[5],xmm0[6],xmm4[6],xmm0[7],xmm4[7]
2501; X64-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7]
2502; X64-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3],xmm0[4],xmm3[4],xmm0[5],xmm3[5],xmm0[6],xmm3[6],xmm0[7],xmm3[7]
2503; X64-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
2504; X64-NEXT: retq
2505 %res0 = insertelement <16 x i8> undef, i8 %a0 , i32 0
2506 %res1 = insertelement <16 x i8> %res0, i8 %a1 , i32 1
2507 %res2 = insertelement <16 x i8> %res1, i8 %a2 , i32 2
2508 %res3 = insertelement <16 x i8> %res2, i8 %a3 , i32 3
2509 %res4 = insertelement <16 x i8> %res3, i8 %a4 , i32 4
2510 %res5 = insertelement <16 x i8> %res4, i8 %a5 , i32 5
2511 %res6 = insertelement <16 x i8> %res5, i8 %a6 , i32 6
2512 %res7 = insertelement <16 x i8> %res6, i8 %a7 , i32 7
2513 %res8 = insertelement <16 x i8> %res7, i8 %a8 , i32 8
2514 %res9 = insertelement <16 x i8> %res8, i8 %a9 , i32 9
2515 %res10 = insertelement <16 x i8> %res9, i8 %a10, i32 10
2516 %res11 = insertelement <16 x i8> %res10, i8 %a11, i32 11
2517 %res12 = insertelement <16 x i8> %res11, i8 %a12, i32 12
2518 %res13 = insertelement <16 x i8> %res12, i8 %a13, i32 13
2519 %res14 = insertelement <16 x i8> %res13, i8 %a14, i32 14
2520 %res15 = insertelement <16 x i8> %res14, i8 %a15, i32 15
2521 %res = bitcast <16 x i8> %res15 to <2 x i64>
2522 ret <2 x i64> %res
2523}
2524
2525define <2 x i64> @test_mm_setr_epi16(i16 %a0, i16 %a1, i16 %a2, i16 %a3, i16 %a4, i16 %a5, i16 %a6, i16 %a7) nounwind {
2526; X32-LABEL: test_mm_setr_epi16:
2527; X32: # BB#0:
2528; X32-NEXT: movw {{[0-9]+}}(%esp), %ax
2529; X32-NEXT: movd %eax, %xmm1
2530; X32-NEXT: movw {{[0-9]+}}(%esp), %ax
2531; X32-NEXT: movd %eax, %xmm2
2532; X32-NEXT: movw {{[0-9]+}}(%esp), %ax
2533; X32-NEXT: movd %eax, %xmm3
2534; X32-NEXT: movw {{[0-9]+}}(%esp), %ax
2535; X32-NEXT: movd %eax, %xmm4
2536; X32-NEXT: movw {{[0-9]+}}(%esp), %ax
2537; X32-NEXT: movd %eax, %xmm5
2538; X32-NEXT: movw {{[0-9]+}}(%esp), %ax
2539; X32-NEXT: movd %eax, %xmm6
2540; X32-NEXT: movw {{[0-9]+}}(%esp), %ax
2541; X32-NEXT: movd %eax, %xmm7
2542; X32-NEXT: movw {{[0-9]+}}(%esp), %ax
2543; X32-NEXT: movd %eax, %xmm0
2544; X32-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3]
2545; X32-NEXT: punpcklwd {{.*#+}} xmm4 = xmm4[0],xmm3[0],xmm4[1],xmm3[1],xmm4[2],xmm3[2],xmm4[3],xmm3[3]
2546; X32-NEXT: punpcklwd {{.*#+}} xmm4 = xmm4[0],xmm2[0],xmm4[1],xmm2[1],xmm4[2],xmm2[2],xmm4[3],xmm2[3]
2547; X32-NEXT: punpcklwd {{.*#+}} xmm6 = xmm6[0],xmm5[0],xmm6[1],xmm5[1],xmm6[2],xmm5[2],xmm6[3],xmm5[3]
2548; X32-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm7[0],xmm0[1],xmm7[1],xmm0[2],xmm7[2],xmm0[3],xmm7[3]
2549; X32-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm6[0],xmm0[1],xmm6[1],xmm0[2],xmm6[2],xmm0[3],xmm6[3]
2550; X32-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1],xmm0[2],xmm4[2],xmm0[3],xmm4[3]
2551; X32-NEXT: retl
2552;
2553; X64-LABEL: test_mm_setr_epi16:
2554; X64: # BB#0:
2555; X64-NEXT: movw {{[0-9]+}}(%rsp), %ax
2556; X64-NEXT: movw {{[0-9]+}}(%rsp), %r10w
2557; X64-NEXT: movd %eax, %xmm0
2558; X64-NEXT: movd %ecx, %xmm1
2559; X64-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
2560; X64-NEXT: movd %r9d, %xmm0
2561; X64-NEXT: movd %esi, %xmm2
2562; X64-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3]
2563; X64-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3]
2564; X64-NEXT: movd %r10d, %xmm0
2565; X64-NEXT: movd %edx, %xmm1
2566; X64-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
2567; X64-NEXT: movd %r8d, %xmm3
2568; X64-NEXT: movd %edi, %xmm0
2569; X64-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3]
2570; X64-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
2571; X64-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
2572; X64-NEXT: retq
2573 %res0 = insertelement <8 x i16> undef, i16 %a0, i32 0
2574 %res1 = insertelement <8 x i16> %res0, i16 %a1, i32 1
2575 %res2 = insertelement <8 x i16> %res1, i16 %a2, i32 2
2576 %res3 = insertelement <8 x i16> %res2, i16 %a3, i32 3
2577 %res4 = insertelement <8 x i16> %res3, i16 %a4, i32 4
2578 %res5 = insertelement <8 x i16> %res4, i16 %a5, i32 5
2579 %res6 = insertelement <8 x i16> %res5, i16 %a6, i32 6
2580 %res7 = insertelement <8 x i16> %res6, i16 %a7, i32 7
2581 %res = bitcast <8 x i16> %res7 to <2 x i64>
2582 ret <2 x i64> %res
2583}
2584
2585define <2 x i64> @test_mm_setr_epi32(i32 %a0, i32 %a1, i32 %a2, i32 %a3) nounwind {
2586; X32-LABEL: test_mm_setr_epi32:
2587; X32: # BB#0:
2588; X32-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
2589; X32-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero
2590; X32-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
2591; X32-NEXT: movd {{.*#+}} xmm2 = mem[0],zero,zero,zero
2592; X32-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
2593; X32-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
2594; X32-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
2595; X32-NEXT: retl
2596;
2597; X64-LABEL: test_mm_setr_epi32:
2598; X64: # BB#0:
2599; X64-NEXT: movd %ecx, %xmm0
2600; X64-NEXT: movd %esi, %xmm1
2601; X64-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
2602; X64-NEXT: movd %edx, %xmm2
2603; X64-NEXT: movd %edi, %xmm0
2604; X64-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
2605; X64-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
2606; X64-NEXT: retq
2607 %res0 = insertelement <4 x i32> undef, i32 %a0, i32 0
2608 %res1 = insertelement <4 x i32> %res0, i32 %a1, i32 1
2609 %res2 = insertelement <4 x i32> %res1, i32 %a2, i32 2
2610 %res3 = insertelement <4 x i32> %res2, i32 %a3, i32 3
2611 %res = bitcast <4 x i32> %res3 to <2 x i64>
2612 ret <2 x i64> %res
2613}
2614
2615; TODO test_mm_setr_epi64
2616
2617define <2 x i64> @test_mm_setr_epi64x(i64 %a0, i64 %a1) nounwind {
2618; X32-LABEL: test_mm_setr_epi64x:
2619; X32: # BB#0:
2620; X32-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero
2621; X32-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
2622; X32-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
2623; X32-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero
2624; X32-NEXT: movd {{.*#+}} xmm2 = mem[0],zero,zero,zero
2625; X32-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
2626; X32-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
2627; X32-NEXT: retl
2628;
2629; X64-LABEL: test_mm_setr_epi64x:
2630; X64: # BB#0:
2631; X64-NEXT: movd %rsi, %xmm1
2632; X64-NEXT: movd %rdi, %xmm0
2633; X64-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
2634; X64-NEXT: retq
2635 %res0 = insertelement <2 x i64> undef, i64 %a0, i32 0
2636 %res1 = insertelement <2 x i64> %res0, i64 %a1, i32 1
2637 ret <2 x i64> %res1
2638}
2639
2640define <2 x double> @test_mm_setr_pd(double %a0, double %a1) nounwind {
2641; X32-LABEL: test_mm_setr_pd:
2642; X32: # BB#0:
2643; X32-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero
2644; X32-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
2645; X32-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
2646; X32-NEXT: retl
2647;
2648; X64-LABEL: test_mm_setr_pd:
2649; X64: # BB#0:
2650; X64-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
2651; X64-NEXT: retq
2652 %res0 = insertelement <2 x double> undef, double %a0, i32 0
2653 %res1 = insertelement <2 x double> %res0, double %a1, i32 1
2654 ret <2 x double> %res1
2655}
2656
Simon Pilgrim5a0d7282016-05-18 18:00:43 +00002657define <2 x double> @test_mm_setzero_pd() {
2658; X32-LABEL: test_mm_setzero_pd:
2659; X32: # BB#0:
2660; X32-NEXT: xorps %xmm0, %xmm0
2661; X32-NEXT: retl
2662;
2663; X64-LABEL: test_mm_setzero_pd:
2664; X64: # BB#0:
2665; X64-NEXT: xorps %xmm0, %xmm0
2666; X64-NEXT: retq
2667 ret <2 x double> zeroinitializer
2668}
2669
2670define <2 x i64> @test_mm_setzero_si128() {
2671; X32-LABEL: test_mm_setzero_si128:
2672; X32: # BB#0:
2673; X32-NEXT: xorps %xmm0, %xmm0
2674; X32-NEXT: retl
2675;
2676; X64-LABEL: test_mm_setzero_si128:
2677; X64: # BB#0:
2678; X64-NEXT: xorps %xmm0, %xmm0
2679; X64-NEXT: retq
2680 ret <2 x i64> zeroinitializer
2681}
2682
2683define <2 x i64> @test_mm_shuffle_epi32(<2 x i64> %a0) {
2684; X32-LABEL: test_mm_shuffle_epi32:
2685; X32: # BB#0:
2686; X32-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
2687; X32-NEXT: retl
2688;
2689; X64-LABEL: test_mm_shuffle_epi32:
2690; X64: # BB#0:
2691; X64-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
2692; X64-NEXT: retq
2693 %arg0 = bitcast <2 x i64> %a0 to <4 x i32>
2694 %res = shufflevector <4 x i32> %arg0, <4 x i32> undef, <4 x i32> zeroinitializer
2695 %bc = bitcast <4 x i32> %res to <2 x i64>
2696 ret <2 x i64> %bc
2697}
2698
2699define <2 x double> @test_mm_shuffle_pd(<2 x double> %a0, <2 x double> %a1) {
2700; X32-LABEL: test_mm_shuffle_pd:
2701; X32: # BB#0:
2702; X32-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0]
2703; X32-NEXT: retl
2704;
2705; X64-LABEL: test_mm_shuffle_pd:
2706; X64: # BB#0:
2707; X64-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0]
2708; X64-NEXT: retq
2709 %res = shufflevector <2 x double> %a0, <2 x double> %a1, <2 x i32> <i32 1, i32 2>
2710 ret <2 x double> %res
2711}
2712
2713define <2 x i64> @test_mm_shufflehi_epi16(<2 x i64> %a0) {
2714; X32-LABEL: test_mm_shufflehi_epi16:
2715; X32: # BB#0:
2716; X32-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,4,4]
2717; X32-NEXT: retl
2718;
2719; X64-LABEL: test_mm_shufflehi_epi16:
2720; X64: # BB#0:
2721; X64-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,4,4]
2722; X64-NEXT: retq
2723 %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
2724 %res = shufflevector <8 x i16> %arg0, <8 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 4, i32 4, i32 4>
2725 %bc = bitcast <8 x i16> %res to <2 x i64>
2726 ret <2 x i64> %bc
2727}
2728
2729define <2 x i64> @test_mm_shufflelo_epi16(<2 x i64> %a0) {
2730; X32-LABEL: test_mm_shufflelo_epi16:
2731; X32: # BB#0:
2732; X32-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
2733; X32-NEXT: retl
2734;
2735; X64-LABEL: test_mm_shufflelo_epi16:
2736; X64: # BB#0:
2737; X64-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
2738; X64-NEXT: retq
2739 %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
2740 %res = shufflevector <8 x i16> %arg0, <8 x i16> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 4, i32 5, i32 6, i32 7>
2741 %bc = bitcast <8 x i16> %res to <2 x i64>
2742 ret <2 x i64> %bc
2743}
2744
2745define <2 x i64> @test_mm_sll_epi16(<2 x i64> %a0, <2 x i64> %a1) {
2746; X32-LABEL: test_mm_sll_epi16:
2747; X32: # BB#0:
2748; X32-NEXT: psllw %xmm1, %xmm0
2749; X32-NEXT: retl
2750;
2751; X64-LABEL: test_mm_sll_epi16:
2752; X64: # BB#0:
2753; X64-NEXT: psllw %xmm1, %xmm0
2754; X64-NEXT: retq
2755 %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
2756 %arg1 = bitcast <2 x i64> %a1 to <8 x i16>
2757 %res = call <8 x i16> @llvm.x86.sse2.psll.w(<8 x i16> %arg0, <8 x i16> %arg1)
2758 %bc = bitcast <8 x i16> %res to <2 x i64>
2759 ret <2 x i64> %bc
2760}
2761declare <8 x i16> @llvm.x86.sse2.psll.w(<8 x i16>, <8 x i16>) nounwind readnone
2762
2763define <2 x i64> @test_mm_sll_epi32(<2 x i64> %a0, <2 x i64> %a1) {
2764; X32-LABEL: test_mm_sll_epi32:
2765; X32: # BB#0:
2766; X32-NEXT: pslld %xmm1, %xmm0
2767; X32-NEXT: retl
2768;
2769; X64-LABEL: test_mm_sll_epi32:
2770; X64: # BB#0:
2771; X64-NEXT: pslld %xmm1, %xmm0
2772; X64-NEXT: retq
2773 %arg0 = bitcast <2 x i64> %a0 to <4 x i32>
2774 %arg1 = bitcast <2 x i64> %a1 to <4 x i32>
2775 %res = call <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32> %arg0, <4 x i32> %arg1)
2776 %bc = bitcast <4 x i32> %res to <2 x i64>
2777 ret <2 x i64> %bc
2778}
2779declare <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32>, <4 x i32>) nounwind readnone
2780
2781define <2 x i64> @test_mm_sll_epi64(<2 x i64> %a0, <2 x i64> %a1) {
2782; X32-LABEL: test_mm_sll_epi64:
2783; X32: # BB#0:
2784; X32-NEXT: psllq %xmm1, %xmm0
2785; X32-NEXT: retl
2786;
2787; X64-LABEL: test_mm_sll_epi64:
2788; X64: # BB#0:
2789; X64-NEXT: psllq %xmm1, %xmm0
2790; X64-NEXT: retq
2791 %res = call <2 x i64> @llvm.x86.sse2.psll.q(<2 x i64> %a0, <2 x i64> %a1)
2792 ret <2 x i64> %res
2793}
2794declare <2 x i64> @llvm.x86.sse2.psll.q(<2 x i64>, <2 x i64>) nounwind readnone
2795
2796define <2 x i64> @test_mm_slli_epi16(<2 x i64> %a0) {
2797; X32-LABEL: test_mm_slli_epi16:
2798; X32: # BB#0:
2799; X32-NEXT: psllw $1, %xmm0
2800; X32-NEXT: retl
2801;
2802; X64-LABEL: test_mm_slli_epi16:
2803; X64: # BB#0:
2804; X64-NEXT: psllw $1, %xmm0
2805; X64-NEXT: retq
2806 %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
2807 %res = call <8 x i16> @llvm.x86.sse2.pslli.w(<8 x i16> %arg0, i32 1)
2808 %bc = bitcast <8 x i16> %res to <2 x i64>
2809 ret <2 x i64> %bc
2810}
2811declare <8 x i16> @llvm.x86.sse2.pslli.w(<8 x i16>, i32) nounwind readnone
2812
2813define <2 x i64> @test_mm_slli_epi32(<2 x i64> %a0) {
2814; X32-LABEL: test_mm_slli_epi32:
2815; X32: # BB#0:
2816; X32-NEXT: pslld $1, %xmm0
2817; X32-NEXT: retl
2818;
2819; X64-LABEL: test_mm_slli_epi32:
2820; X64: # BB#0:
2821; X64-NEXT: pslld $1, %xmm0
2822; X64-NEXT: retq
2823 %arg0 = bitcast <2 x i64> %a0 to <4 x i32>
2824 %res = call <4 x i32> @llvm.x86.sse2.pslli.d(<4 x i32> %arg0, i32 1)
2825 %bc = bitcast <4 x i32> %res to <2 x i64>
2826 ret <2 x i64> %bc
2827}
2828declare <4 x i32> @llvm.x86.sse2.pslli.d(<4 x i32>, i32) nounwind readnone
2829
2830define <2 x i64> @test_mm_slli_epi64(<2 x i64> %a0) {
2831; X32-LABEL: test_mm_slli_epi64:
2832; X32: # BB#0:
2833; X32-NEXT: psllq $1, %xmm0
2834; X32-NEXT: retl
2835;
2836; X64-LABEL: test_mm_slli_epi64:
2837; X64: # BB#0:
2838; X64-NEXT: psllq $1, %xmm0
2839; X64-NEXT: retq
2840 %res = call <2 x i64> @llvm.x86.sse2.pslli.q(<2 x i64> %a0, i32 1)
2841 ret <2 x i64> %res
2842}
2843declare <2 x i64> @llvm.x86.sse2.pslli.q(<2 x i64>, i32) nounwind readnone
2844
2845define <2 x i64> @test_mm_slli_si128(<2 x i64> %a0) nounwind {
2846; X32-LABEL: test_mm_slli_si128:
2847; X32: # BB#0:
2848; X32-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10]
2849; X32-NEXT: retl
2850;
2851; X64-LABEL: test_mm_slli_si128:
2852; X64: # BB#0:
2853; X64-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10]
2854; X64-NEXT: retq
2855 %arg0 = bitcast <2 x i64> %a0 to <16 x i8>
2856 %res = shufflevector <16 x i8> zeroinitializer, <16 x i8> %arg0, <16 x i32> <i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26>
2857 %bc = bitcast <16 x i8> %res to <2 x i64>
2858 ret <2 x i64> %bc
2859}
2860
2861define <2 x double> @test_mm_sqrt_pd(<2 x double> %a0) nounwind {
2862; X32-LABEL: test_mm_sqrt_pd:
2863; X32: # BB#0:
2864; X32-NEXT: sqrtpd %xmm0, %xmm0
2865; X32-NEXT: retl
2866;
2867; X64-LABEL: test_mm_sqrt_pd:
2868; X64: # BB#0:
2869; X64-NEXT: sqrtpd %xmm0, %xmm0
2870; X64-NEXT: retq
2871 %res = call <2 x double> @llvm.x86.sse2.sqrt.pd(<2 x double> %a0)
2872 ret <2 x double> %res
2873}
2874declare <2 x double> @llvm.x86.sse2.sqrt.pd(<2 x double>) nounwind readnone
2875
2876define <2 x double> @test_mm_sqrt_sd(<2 x double> %a0, <2 x double> %a1) nounwind {
2877; X32-LABEL: test_mm_sqrt_sd:
2878; X32: # BB#0:
2879; X32-NEXT: sqrtsd %xmm0, %xmm1
2880; X32-NEXT: movaps %xmm1, %xmm0
2881; X32-NEXT: retl
2882;
2883; X64-LABEL: test_mm_sqrt_sd:
2884; X64: # BB#0:
2885; X64-NEXT: sqrtsd %xmm0, %xmm1
2886; X64-NEXT: movaps %xmm1, %xmm0
2887; X64-NEXT: retq
2888 %call = call <2 x double> @llvm.x86.sse2.sqrt.sd(<2 x double> %a0)
2889 %ext0 = extractelement <2 x double> %call, i32 0
2890 %ins0 = insertelement <2 x double> undef, double %ext0, i32 0
2891 %ext1 = extractelement <2 x double> %a1, i32 1
2892 %ins1 = insertelement <2 x double> %ins0, double %ext1, i32 1
2893 ret <2 x double> %ins1
2894}
2895declare <2 x double> @llvm.x86.sse2.sqrt.sd(<2 x double>) nounwind readnone
2896
2897define <2 x i64> @test_mm_sra_epi16(<2 x i64> %a0, <2 x i64> %a1) {
2898; X32-LABEL: test_mm_sra_epi16:
2899; X32: # BB#0:
2900; X32-NEXT: psraw %xmm1, %xmm0
2901; X32-NEXT: retl
2902;
2903; X64-LABEL: test_mm_sra_epi16:
2904; X64: # BB#0:
2905; X64-NEXT: psraw %xmm1, %xmm0
2906; X64-NEXT: retq
2907 %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
2908 %arg1 = bitcast <2 x i64> %a1 to <8 x i16>
2909 %res = call <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16> %arg0, <8 x i16> %arg1)
2910 %bc = bitcast <8 x i16> %res to <2 x i64>
2911 ret <2 x i64> %bc
2912}
2913declare <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16>, <8 x i16>) nounwind readnone
2914
2915define <2 x i64> @test_mm_sra_epi32(<2 x i64> %a0, <2 x i64> %a1) {
2916; X32-LABEL: test_mm_sra_epi32:
2917; X32: # BB#0:
2918; X32-NEXT: psrad %xmm1, %xmm0
2919; X32-NEXT: retl
2920;
2921; X64-LABEL: test_mm_sra_epi32:
2922; X64: # BB#0:
2923; X64-NEXT: psrad %xmm1, %xmm0
2924; X64-NEXT: retq
2925 %arg0 = bitcast <2 x i64> %a0 to <4 x i32>
2926 %arg1 = bitcast <2 x i64> %a1 to <4 x i32>
2927 %res = call <4 x i32> @llvm.x86.sse2.psra.d(<4 x i32> %arg0, <4 x i32> %arg1)
2928 %bc = bitcast <4 x i32> %res to <2 x i64>
2929 ret <2 x i64> %bc
2930}
2931declare <4 x i32> @llvm.x86.sse2.psra.d(<4 x i32>, <4 x i32>) nounwind readnone
2932
2933define <2 x i64> @test_mm_srai_epi16(<2 x i64> %a0) {
2934; X32-LABEL: test_mm_srai_epi16:
2935; X32: # BB#0:
2936; X32-NEXT: psraw $1, %xmm0
2937; X32-NEXT: retl
2938;
2939; X64-LABEL: test_mm_srai_epi16:
2940; X64: # BB#0:
2941; X64-NEXT: psraw $1, %xmm0
2942; X64-NEXT: retq
2943 %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
2944 %res = call <8 x i16> @llvm.x86.sse2.psrai.w(<8 x i16> %arg0, i32 1)
2945 %bc = bitcast <8 x i16> %res to <2 x i64>
2946 ret <2 x i64> %bc
2947}
2948declare <8 x i16> @llvm.x86.sse2.psrai.w(<8 x i16>, i32) nounwind readnone
2949
2950define <2 x i64> @test_mm_srai_epi32(<2 x i64> %a0) {
2951; X32-LABEL: test_mm_srai_epi32:
2952; X32: # BB#0:
2953; X32-NEXT: psrad $1, %xmm0
2954; X32-NEXT: retl
2955;
2956; X64-LABEL: test_mm_srai_epi32:
2957; X64: # BB#0:
2958; X64-NEXT: psrad $1, %xmm0
2959; X64-NEXT: retq
2960 %arg0 = bitcast <2 x i64> %a0 to <4 x i32>
2961 %res = call <4 x i32> @llvm.x86.sse2.psrai.d(<4 x i32> %arg0, i32 1)
2962 %bc = bitcast <4 x i32> %res to <2 x i64>
2963 ret <2 x i64> %bc
2964}
2965declare <4 x i32> @llvm.x86.sse2.psrai.d(<4 x i32>, i32) nounwind readnone
2966
2967define <2 x i64> @test_mm_srl_epi16(<2 x i64> %a0, <2 x i64> %a1) {
2968; X32-LABEL: test_mm_srl_epi16:
2969; X32: # BB#0:
2970; X32-NEXT: psrlw %xmm1, %xmm0
2971; X32-NEXT: retl
2972;
2973; X64-LABEL: test_mm_srl_epi16:
2974; X64: # BB#0:
2975; X64-NEXT: psrlw %xmm1, %xmm0
2976; X64-NEXT: retq
2977 %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
2978 %arg1 = bitcast <2 x i64> %a1 to <8 x i16>
2979 %res = call <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16> %arg0, <8 x i16> %arg1)
2980 %bc = bitcast <8 x i16> %res to <2 x i64>
2981 ret <2 x i64> %bc
2982}
2983declare <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16>, <8 x i16>) nounwind readnone
2984
2985define <2 x i64> @test_mm_srl_epi32(<2 x i64> %a0, <2 x i64> %a1) {
2986; X32-LABEL: test_mm_srl_epi32:
2987; X32: # BB#0:
2988; X32-NEXT: psrld %xmm1, %xmm0
2989; X32-NEXT: retl
2990;
2991; X64-LABEL: test_mm_srl_epi32:
2992; X64: # BB#0:
2993; X64-NEXT: psrld %xmm1, %xmm0
2994; X64-NEXT: retq
2995 %arg0 = bitcast <2 x i64> %a0 to <4 x i32>
2996 %arg1 = bitcast <2 x i64> %a1 to <4 x i32>
2997 %res = call <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32> %arg0, <4 x i32> %arg1)
2998 %bc = bitcast <4 x i32> %res to <2 x i64>
2999 ret <2 x i64> %bc
3000}
3001declare <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32>, <4 x i32>) nounwind readnone
3002
3003define <2 x i64> @test_mm_srl_epi64(<2 x i64> %a0, <2 x i64> %a1) {
3004; X32-LABEL: test_mm_srl_epi64:
3005; X32: # BB#0:
3006; X32-NEXT: psrlq %xmm1, %xmm0
3007; X32-NEXT: retl
3008;
3009; X64-LABEL: test_mm_srl_epi64:
3010; X64: # BB#0:
3011; X64-NEXT: psrlq %xmm1, %xmm0
3012; X64-NEXT: retq
3013 %res = call <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64> %a0, <2 x i64> %a1)
3014 ret <2 x i64> %res
3015}
3016declare <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64>, <2 x i64>) nounwind readnone
3017
3018define <2 x i64> @test_mm_srli_epi16(<2 x i64> %a0) {
3019; X32-LABEL: test_mm_srli_epi16:
3020; X32: # BB#0:
3021; X32-NEXT: psrlw $1, %xmm0
3022; X32-NEXT: retl
3023;
3024; X64-LABEL: test_mm_srli_epi16:
3025; X64: # BB#0:
3026; X64-NEXT: psrlw $1, %xmm0
3027; X64-NEXT: retq
3028 %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
3029 %res = call <8 x i16> @llvm.x86.sse2.psrli.w(<8 x i16> %arg0, i32 1)
3030 %bc = bitcast <8 x i16> %res to <2 x i64>
3031 ret <2 x i64> %bc
3032}
3033declare <8 x i16> @llvm.x86.sse2.psrli.w(<8 x i16>, i32) nounwind readnone
3034
3035define <2 x i64> @test_mm_srli_epi32(<2 x i64> %a0) {
3036; X32-LABEL: test_mm_srli_epi32:
3037; X32: # BB#0:
3038; X32-NEXT: psrld $1, %xmm0
3039; X32-NEXT: retl
3040;
3041; X64-LABEL: test_mm_srli_epi32:
3042; X64: # BB#0:
3043; X64-NEXT: psrld $1, %xmm0
3044; X64-NEXT: retq
3045 %arg0 = bitcast <2 x i64> %a0 to <4 x i32>
3046 %res = call <4 x i32> @llvm.x86.sse2.psrli.d(<4 x i32> %arg0, i32 1)
3047 %bc = bitcast <4 x i32> %res to <2 x i64>
3048 ret <2 x i64> %bc
3049}
3050declare <4 x i32> @llvm.x86.sse2.psrli.d(<4 x i32>, i32) nounwind readnone
3051
3052define <2 x i64> @test_mm_srli_epi64(<2 x i64> %a0) {
3053; X32-LABEL: test_mm_srli_epi64:
3054; X32: # BB#0:
3055; X32-NEXT: psrlq $1, %xmm0
3056; X32-NEXT: retl
3057;
3058; X64-LABEL: test_mm_srli_epi64:
3059; X64: # BB#0:
3060; X64-NEXT: psrlq $1, %xmm0
3061; X64-NEXT: retq
3062 %res = call <2 x i64> @llvm.x86.sse2.psrli.q(<2 x i64> %a0, i32 1)
3063 ret <2 x i64> %res
3064}
3065declare <2 x i64> @llvm.x86.sse2.psrli.q(<2 x i64>, i32) nounwind readnone
3066
3067define <2 x i64> @test_mm_srli_si128(<2 x i64> %a0) nounwind {
3068; X32-LABEL: test_mm_srli_si128:
3069; X32: # BB#0:
3070; X32-NEXT: psrldq {{.*#+}} xmm0 = xmm0[5,6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero
3071; X32-NEXT: retl
3072;
3073; X64-LABEL: test_mm_srli_si128:
3074; X64: # BB#0:
3075; X64-NEXT: psrldq {{.*#+}} xmm0 = xmm0[5,6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero
3076; X64-NEXT: retq
3077 %arg0 = bitcast <2 x i64> %a0 to <16 x i8>
3078 %res = shufflevector <16 x i8> %arg0, <16 x i8> zeroinitializer, <16 x i32> <i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20>
3079 %bc = bitcast <16 x i8> %res to <2 x i64>
3080 ret <2 x i64> %bc
3081}
3082
3083define void @test_mm_store_pd(double *%a0, <2 x double> %a1) {
3084; X32-LABEL: test_mm_store_pd:
3085; X32: # BB#0:
3086; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
3087; X32-NEXT: movaps %xmm0, (%eax)
3088; X32-NEXT: retl
3089;
3090; X64-LABEL: test_mm_store_pd:
3091; X64: # BB#0:
3092; X64-NEXT: movaps %xmm0, (%rdi)
3093; X64-NEXT: retq
3094 %arg0 = bitcast double* %a0 to <2 x double>*
3095 store <2 x double> %a1, <2 x double>* %arg0, align 16
3096 ret void
3097}
3098
3099define void @test_mm_store_sd(double *%a0, <2 x double> %a1) {
3100; X32-LABEL: test_mm_store_sd:
3101; X32: # BB#0:
3102; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
3103; X32-NEXT: movsd %xmm0, (%eax)
3104; X32-NEXT: retl
3105;
3106; X64-LABEL: test_mm_store_sd:
3107; X64: # BB#0:
3108; X64-NEXT: movsd %xmm0, (%rdi)
3109; X64-NEXT: retq
3110 %ext = extractelement <2 x double> %a1, i32 0
3111 store double %ext, double* %a0, align 1
3112 ret void
3113}
3114
3115define void @test_mm_store_si128(<2 x i64> *%a0, <2 x i64> %a1) {
3116; X32-LABEL: test_mm_store_si128:
3117; X32: # BB#0:
3118; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
3119; X32-NEXT: movaps %xmm0, (%eax)
3120; X32-NEXT: retl
3121;
3122; X64-LABEL: test_mm_store_si128:
3123; X64: # BB#0:
3124; X64-NEXT: movaps %xmm0, (%rdi)
3125; X64-NEXT: retq
3126 store <2 x i64> %a1, <2 x i64>* %a0, align 16
3127 ret void
3128}
3129
3130define void @test_mm_store1_sd(double *%a0, <2 x double> %a1) {
3131; X32-LABEL: test_mm_store1_sd:
3132; X32: # BB#0:
3133; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
3134; X32-NEXT: movsd %xmm0, (%eax)
3135; X32-NEXT: movsd %xmm0, 8(%eax)
3136; X32-NEXT: retl
3137;
3138; X64-LABEL: test_mm_store1_sd:
3139; X64: # BB#0:
3140; X64-NEXT: movsd %xmm0, (%rdi)
3141; X64-NEXT: movsd %xmm0, 8(%rdi)
3142; X64-NEXT: retq
3143 %ext = extractelement <2 x double> %a1, i32 0
3144 %ptr0 = getelementptr inbounds double, double* %a0, i32 0
3145 %ptr1 = getelementptr inbounds double, double* %a0, i32 1
3146 store double %ext, double* %ptr0, align 1
3147 store double %ext, double* %ptr1, align 1
3148 ret void
3149}
3150
3151define void @test_mm_storeh_sd(double *%a0, <2 x double> %a1) {
3152; X32-LABEL: test_mm_storeh_sd:
3153; X32: # BB#0:
3154; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
3155; X32-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1,0]
3156; X32-NEXT: movsd %xmm0, (%eax)
3157; X32-NEXT: retl
3158;
3159; X64-LABEL: test_mm_storeh_sd:
3160; X64: # BB#0:
3161; X64-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1,0]
3162; X64-NEXT: movsd %xmm0, (%rdi)
3163; X64-NEXT: retq
3164 %ext = extractelement <2 x double> %a1, i32 1
3165 store double %ext, double* %a0, align 8
3166 ret void
3167}
3168
3169define void @test_mm_storel_epi64(<2 x i64> *%a0, <2 x i64> %a1) {
3170; X32-LABEL: test_mm_storel_epi64:
3171; X32: # BB#0:
3172; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
3173; X32-NEXT: movlps %xmm0, (%eax)
3174; X32-NEXT: retl
3175;
3176; X64-LABEL: test_mm_storel_epi64:
3177; X64: # BB#0:
3178; X64-NEXT: movd %xmm0, %rax
3179; X64-NEXT: movq %rax, (%rdi)
3180; X64-NEXT: retq
3181 %ext = extractelement <2 x i64> %a1, i32 0
3182 %bc = bitcast <2 x i64> *%a0 to i64*
3183 store i64 %ext, i64* %bc, align 8
3184 ret void
3185}
3186
3187define void @test_mm_storel_sd(double *%a0, <2 x double> %a1) {
3188; X32-LABEL: test_mm_storel_sd:
3189; X32: # BB#0:
3190; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
3191; X32-NEXT: movsd %xmm0, (%eax)
3192; X32-NEXT: retl
3193;
3194; X64-LABEL: test_mm_storel_sd:
3195; X64: # BB#0:
3196; X64-NEXT: movsd %xmm0, (%rdi)
3197; X64-NEXT: retq
3198 %ext = extractelement <2 x double> %a1, i32 0
3199 store double %ext, double* %a0, align 8
3200 ret void
3201}
3202
3203define void @test_mm_storer_pd(double *%a0, <2 x double> %a1) {
3204; X32-LABEL: test_mm_storer_pd:
3205; X32: # BB#0:
3206; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
3207; X32-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1,0]
3208; X32-NEXT: movapd %xmm0, (%eax)
3209; X32-NEXT: retl
3210;
3211; X64-LABEL: test_mm_storer_pd:
3212; X64: # BB#0:
3213; X64-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1,0]
3214; X64-NEXT: movapd %xmm0, (%rdi)
3215; X64-NEXT: retq
3216 %arg0 = bitcast double* %a0 to <2 x double>*
3217 %shuf = shufflevector <2 x double> %a1, <2 x double> undef, <2 x i32> <i32 1, i32 0>
3218 store <2 x double> %shuf, <2 x double>* %arg0, align 16
3219 ret void
3220}
3221
3222define void @test_mm_storeu_pd(double *%a0, <2 x double> %a1) {
3223; X32-LABEL: test_mm_storeu_pd:
3224; X32: # BB#0:
3225; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
3226; X32-NEXT: movups %xmm0, (%eax)
3227; X32-NEXT: retl
3228;
3229; X64-LABEL: test_mm_storeu_pd:
3230; X64: # BB#0:
3231; X64-NEXT: movups %xmm0, (%rdi)
3232; X64-NEXT: retq
3233 %arg0 = bitcast double* %a0 to <2 x double>*
3234 store <2 x double> %a1, <2 x double>* %arg0, align 1
3235 ret void
3236}
3237
3238define void @test_mm_storeu_si128(<2 x i64> *%a0, <2 x i64> %a1) {
3239; X32-LABEL: test_mm_storeu_si128:
3240; X32: # BB#0:
3241; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
3242; X32-NEXT: movups %xmm0, (%eax)
3243; X32-NEXT: retl
3244;
3245; X64-LABEL: test_mm_storeu_si128:
3246; X64: # BB#0:
3247; X64-NEXT: movups %xmm0, (%rdi)
3248; X64-NEXT: retq
3249 store <2 x i64> %a1, <2 x i64>* %a0, align 1
3250 ret void
3251}
3252
3253define void @test_mm_stream_pd(double *%a0, <2 x double> %a1) {
3254; X32-LABEL: test_mm_stream_pd:
3255; X32: # BB#0:
3256; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
3257; X32-NEXT: movntps %xmm0, (%eax)
3258; X32-NEXT: retl
3259;
3260; X64-LABEL: test_mm_stream_pd:
3261; X64: # BB#0:
3262; X64-NEXT: movntps %xmm0, (%rdi)
3263; X64-NEXT: retq
3264 %arg0 = bitcast double* %a0 to <2 x double>*
3265 store <2 x double> %a1, <2 x double>* %arg0, align 16, !nontemporal !0
3266 ret void
3267}
3268
3269define void @test_mm_stream_si32(i32 *%a0, i32 %a1) {
3270; X32-LABEL: test_mm_stream_si32:
3271; X32: # BB#0:
3272; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
3273; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
3274; X32-NEXT: movntil %eax, (%ecx)
3275; X32-NEXT: retl
3276;
3277; X64-LABEL: test_mm_stream_si32:
3278; X64: # BB#0:
3279; X64-NEXT: movntil %esi, (%rdi)
3280; X64-NEXT: retq
3281 store i32 %a1, i32* %a0, align 1, !nontemporal !0
3282 ret void
3283}
3284
3285define void @test_mm_stream_si128(<2 x i64> *%a0, <2 x i64> %a1) {
3286; X32-LABEL: test_mm_stream_si128:
3287; X32: # BB#0:
3288; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
3289; X32-NEXT: movntps %xmm0, (%eax)
3290; X32-NEXT: retl
3291;
3292; X64-LABEL: test_mm_stream_si128:
3293; X64: # BB#0:
3294; X64-NEXT: movntps %xmm0, (%rdi)
3295; X64-NEXT: retq
3296 store <2 x i64> %a1, <2 x i64>* %a0, align 16, !nontemporal !0
3297 ret void
3298}
3299
3300define <2 x i64> @test_mm_sub_epi8(<2 x i64> %a0, <2 x i64> %a1) nounwind {
3301; X32-LABEL: test_mm_sub_epi8:
3302; X32: # BB#0:
3303; X32-NEXT: psubb %xmm1, %xmm0
3304; X32-NEXT: retl
3305;
3306; X64-LABEL: test_mm_sub_epi8:
3307; X64: # BB#0:
3308; X64-NEXT: psubb %xmm1, %xmm0
3309; X64-NEXT: retq
3310 %arg0 = bitcast <2 x i64> %a0 to <16 x i8>
3311 %arg1 = bitcast <2 x i64> %a1 to <16 x i8>
3312 %res = sub <16 x i8> %arg0, %arg1
3313 %bc = bitcast <16 x i8> %res to <2 x i64>
3314 ret <2 x i64> %bc
3315}
3316
3317define <2 x i64> @test_mm_sub_epi16(<2 x i64> %a0, <2 x i64> %a1) nounwind {
3318; X32-LABEL: test_mm_sub_epi16:
3319; X32: # BB#0:
3320; X32-NEXT: psubw %xmm1, %xmm0
3321; X32-NEXT: retl
3322;
3323; X64-LABEL: test_mm_sub_epi16:
3324; X64: # BB#0:
3325; X64-NEXT: psubw %xmm1, %xmm0
3326; X64-NEXT: retq
3327 %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
3328 %arg1 = bitcast <2 x i64> %a1 to <8 x i16>
3329 %res = sub <8 x i16> %arg0, %arg1
3330 %bc = bitcast <8 x i16> %res to <2 x i64>
3331 ret <2 x i64> %bc
3332}
3333
3334define <2 x i64> @test_mm_sub_epi32(<2 x i64> %a0, <2 x i64> %a1) nounwind {
3335; X32-LABEL: test_mm_sub_epi32:
3336; X32: # BB#0:
3337; X32-NEXT: psubd %xmm1, %xmm0
3338; X32-NEXT: retl
3339;
3340; X64-LABEL: test_mm_sub_epi32:
3341; X64: # BB#0:
3342; X64-NEXT: psubd %xmm1, %xmm0
3343; X64-NEXT: retq
3344 %arg0 = bitcast <2 x i64> %a0 to <4 x i32>
3345 %arg1 = bitcast <2 x i64> %a1 to <4 x i32>
3346 %res = sub <4 x i32> %arg0, %arg1
3347 %bc = bitcast <4 x i32> %res to <2 x i64>
3348 ret <2 x i64> %bc
3349}
3350
3351define <2 x i64> @test_mm_sub_epi64(<2 x i64> %a0, <2 x i64> %a1) nounwind {
3352; X32-LABEL: test_mm_sub_epi64:
3353; X32: # BB#0:
3354; X32-NEXT: psubq %xmm1, %xmm0
3355; X32-NEXT: retl
3356;
3357; X64-LABEL: test_mm_sub_epi64:
3358; X64: # BB#0:
3359; X64-NEXT: psubq %xmm1, %xmm0
3360; X64-NEXT: retq
3361 %res = sub <2 x i64> %a0, %a1
3362 ret <2 x i64> %res
3363}
3364
3365define <2 x double> @test_mm_sub_pd(<2 x double> %a0, <2 x double> %a1) nounwind {
3366; X32-LABEL: test_mm_sub_pd:
3367; X32: # BB#0:
3368; X32-NEXT: subpd %xmm1, %xmm0
3369; X32-NEXT: retl
3370;
3371; X64-LABEL: test_mm_sub_pd:
3372; X64: # BB#0:
3373; X64-NEXT: subpd %xmm1, %xmm0
3374; X64-NEXT: retq
3375 %res = fsub <2 x double> %a0, %a1
3376 ret <2 x double> %res
3377}
3378
3379define <2 x double> @test_mm_sub_sd(<2 x double> %a0, <2 x double> %a1) nounwind {
3380; X32-LABEL: test_mm_sub_sd:
3381; X32: # BB#0:
3382; X32-NEXT: subsd %xmm1, %xmm0
3383; X32-NEXT: retl
3384;
3385; X64-LABEL: test_mm_sub_sd:
3386; X64: # BB#0:
3387; X64-NEXT: subsd %xmm1, %xmm0
3388; X64-NEXT: retq
3389 %ext0 = extractelement <2 x double> %a0, i32 0
3390 %ext1 = extractelement <2 x double> %a1, i32 0
3391 %fsub = fsub double %ext0, %ext1
3392 %res = insertelement <2 x double> %a0, double %fsub, i32 0
3393 ret <2 x double> %res
3394}
3395
3396define <2 x i64> @test_mm_subs_epi8(<2 x i64> %a0, <2 x i64> %a1) nounwind {
3397; X32-LABEL: test_mm_subs_epi8:
3398; X32: # BB#0:
3399; X32-NEXT: psubsb %xmm1, %xmm0
3400; X32-NEXT: retl
3401;
3402; X64-LABEL: test_mm_subs_epi8:
3403; X64: # BB#0:
3404; X64-NEXT: psubsb %xmm1, %xmm0
3405; X64-NEXT: retq
3406 %arg0 = bitcast <2 x i64> %a0 to <16 x i8>
3407 %arg1 = bitcast <2 x i64> %a1 to <16 x i8>
3408 %res = call <16 x i8> @llvm.x86.sse2.psubs.b(<16 x i8> %arg0, <16 x i8> %arg1)
3409 %bc = bitcast <16 x i8> %res to <2 x i64>
3410 ret <2 x i64> %bc
3411}
3412declare <16 x i8> @llvm.x86.sse2.psubs.b(<16 x i8>, <16 x i8>) nounwind readnone
3413
3414define <2 x i64> @test_mm_subs_epi16(<2 x i64> %a0, <2 x i64> %a1) nounwind {
3415; X32-LABEL: test_mm_subs_epi16:
3416; X32: # BB#0:
3417; X32-NEXT: psubsw %xmm1, %xmm0
3418; X32-NEXT: retl
3419;
3420; X64-LABEL: test_mm_subs_epi16:
3421; X64: # BB#0:
3422; X64-NEXT: psubsw %xmm1, %xmm0
3423; X64-NEXT: retq
3424 %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
3425 %arg1 = bitcast <2 x i64> %a1 to <8 x i16>
3426 %res = call <8 x i16> @llvm.x86.sse2.psubs.w(<8 x i16> %arg0, <8 x i16> %arg1)
3427 %bc = bitcast <8 x i16> %res to <2 x i64>
3428 ret <2 x i64> %bc
3429}
3430declare <8 x i16> @llvm.x86.sse2.psubs.w(<8 x i16>, <8 x i16>) nounwind readnone
3431
3432define <2 x i64> @test_mm_subs_epu8(<2 x i64> %a0, <2 x i64> %a1) nounwind {
3433; X32-LABEL: test_mm_subs_epu8:
3434; X32: # BB#0:
3435; X32-NEXT: psubusb %xmm1, %xmm0
3436; X32-NEXT: retl
3437;
3438; X64-LABEL: test_mm_subs_epu8:
3439; X64: # BB#0:
3440; X64-NEXT: psubusb %xmm1, %xmm0
3441; X64-NEXT: retq
3442 %arg0 = bitcast <2 x i64> %a0 to <16 x i8>
3443 %arg1 = bitcast <2 x i64> %a1 to <16 x i8>
3444 %res = call <16 x i8> @llvm.x86.sse2.psubus.b(<16 x i8> %arg0, <16 x i8> %arg1)
3445 %bc = bitcast <16 x i8> %res to <2 x i64>
3446 ret <2 x i64> %bc
3447}
3448declare <16 x i8> @llvm.x86.sse2.psubus.b(<16 x i8>, <16 x i8>) nounwind readnone
3449
3450define <2 x i64> @test_mm_subs_epu16(<2 x i64> %a0, <2 x i64> %a1) nounwind {
3451; X32-LABEL: test_mm_subs_epu16:
3452; X32: # BB#0:
3453; X32-NEXT: psubusw %xmm1, %xmm0
3454; X32-NEXT: retl
3455;
3456; X64-LABEL: test_mm_subs_epu16:
3457; X64: # BB#0:
3458; X64-NEXT: psubusw %xmm1, %xmm0
3459; X64-NEXT: retq
3460 %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
3461 %arg1 = bitcast <2 x i64> %a1 to <8 x i16>
3462 %res = call <8 x i16> @llvm.x86.sse2.psubus.w(<8 x i16> %arg0, <8 x i16> %arg1)
3463 %bc = bitcast <8 x i16> %res to <2 x i64>
3464 ret <2 x i64> %bc
3465}
3466declare <8 x i16> @llvm.x86.sse2.psubus.w(<8 x i16>, <8 x i16>) nounwind readnone
3467
3468define i32 @test_mm_ucomieq_sd(<2 x double> %a0, <2 x double> %a1) nounwind {
3469; X32-LABEL: test_mm_ucomieq_sd:
3470; X32: # BB#0:
3471; X32-NEXT: ucomisd %xmm1, %xmm0
3472; X32-NEXT: setnp %al
3473; X32-NEXT: sete %cl
3474; X32-NEXT: andb %al, %cl
3475; X32-NEXT: movzbl %cl, %eax
3476; X32-NEXT: retl
3477;
3478; X64-LABEL: test_mm_ucomieq_sd:
3479; X64: # BB#0:
3480; X64-NEXT: ucomisd %xmm1, %xmm0
3481; X64-NEXT: setnp %al
3482; X64-NEXT: sete %cl
3483; X64-NEXT: andb %al, %cl
3484; X64-NEXT: movzbl %cl, %eax
3485; X64-NEXT: retq
3486 %res = call i32 @llvm.x86.sse2.ucomieq.sd(<2 x double> %a0, <2 x double> %a1)
3487 ret i32 %res
3488}
3489declare i32 @llvm.x86.sse2.ucomieq.sd(<2 x double>, <2 x double>) nounwind readnone
3490
3491define i32 @test_mm_ucomige_sd(<2 x double> %a0, <2 x double> %a1) nounwind {
3492; X32-LABEL: test_mm_ucomige_sd:
3493; X32: # BB#0:
3494; X32-NEXT: ucomisd %xmm1, %xmm0
3495; X32-NEXT: setae %al
3496; X32-NEXT: movzbl %al, %eax
3497; X32-NEXT: retl
3498;
3499; X64-LABEL: test_mm_ucomige_sd:
3500; X64: # BB#0:
3501; X64-NEXT: ucomisd %xmm1, %xmm0
3502; X64-NEXT: setae %al
3503; X64-NEXT: movzbl %al, %eax
3504; X64-NEXT: retq
3505 %res = call i32 @llvm.x86.sse2.ucomige.sd(<2 x double> %a0, <2 x double> %a1)
3506 ret i32 %res
3507}
3508declare i32 @llvm.x86.sse2.ucomige.sd(<2 x double>, <2 x double>) nounwind readnone
3509
3510define i32 @test_mm_ucomigt_sd(<2 x double> %a0, <2 x double> %a1) nounwind {
3511; X32-LABEL: test_mm_ucomigt_sd:
3512; X32: # BB#0:
3513; X32-NEXT: ucomisd %xmm1, %xmm0
3514; X32-NEXT: seta %al
3515; X32-NEXT: movzbl %al, %eax
3516; X32-NEXT: retl
3517;
3518; X64-LABEL: test_mm_ucomigt_sd:
3519; X64: # BB#0:
3520; X64-NEXT: ucomisd %xmm1, %xmm0
3521; X64-NEXT: seta %al
3522; X64-NEXT: movzbl %al, %eax
3523; X64-NEXT: retq
3524 %res = call i32 @llvm.x86.sse2.ucomigt.sd(<2 x double> %a0, <2 x double> %a1)
3525 ret i32 %res
3526}
3527declare i32 @llvm.x86.sse2.ucomigt.sd(<2 x double>, <2 x double>) nounwind readnone
3528
3529define i32 @test_mm_ucomile_sd(<2 x double> %a0, <2 x double> %a1) nounwind {
3530; X32-LABEL: test_mm_ucomile_sd:
3531; X32: # BB#0:
3532; X32-NEXT: ucomisd %xmm0, %xmm1
3533; X32-NEXT: setae %al
3534; X32-NEXT: movzbl %al, %eax
3535; X32-NEXT: retl
3536;
3537; X64-LABEL: test_mm_ucomile_sd:
3538; X64: # BB#0:
3539; X64-NEXT: ucomisd %xmm0, %xmm1
3540; X64-NEXT: setae %al
3541; X64-NEXT: movzbl %al, %eax
3542; X64-NEXT: retq
3543 %res = call i32 @llvm.x86.sse2.ucomile.sd(<2 x double> %a0, <2 x double> %a1)
3544 ret i32 %res
3545}
3546declare i32 @llvm.x86.sse2.ucomile.sd(<2 x double>, <2 x double>) nounwind readnone
3547
3548define i32 @test_mm_ucomilt_sd(<2 x double> %a0, <2 x double> %a1) nounwind {
3549; X32-LABEL: test_mm_ucomilt_sd:
3550; X32: # BB#0:
3551; X32-NEXT: ucomisd %xmm0, %xmm1
3552; X32-NEXT: seta %al
3553; X32-NEXT: movzbl %al, %eax
3554; X32-NEXT: retl
3555;
3556; X64-LABEL: test_mm_ucomilt_sd:
3557; X64: # BB#0:
3558; X64-NEXT: ucomisd %xmm0, %xmm1
3559; X64-NEXT: seta %al
3560; X64-NEXT: movzbl %al, %eax
3561; X64-NEXT: retq
3562 %res = call i32 @llvm.x86.sse2.ucomilt.sd(<2 x double> %a0, <2 x double> %a1)
3563 ret i32 %res
3564}
3565declare i32 @llvm.x86.sse2.ucomilt.sd(<2 x double>, <2 x double>) nounwind readnone
3566
3567define i32 @test_mm_ucomineq_sd(<2 x double> %a0, <2 x double> %a1) nounwind {
3568; X32-LABEL: test_mm_ucomineq_sd:
3569; X32: # BB#0:
3570; X32-NEXT: ucomisd %xmm1, %xmm0
3571; X32-NEXT: setp %al
3572; X32-NEXT: setne %cl
3573; X32-NEXT: orb %al, %cl
3574; X32-NEXT: movzbl %cl, %eax
3575; X32-NEXT: retl
3576;
3577; X64-LABEL: test_mm_ucomineq_sd:
3578; X64: # BB#0:
3579; X64-NEXT: ucomisd %xmm1, %xmm0
3580; X64-NEXT: setp %al
3581; X64-NEXT: setne %cl
3582; X64-NEXT: orb %al, %cl
3583; X64-NEXT: movzbl %cl, %eax
3584; X64-NEXT: retq
3585 %res = call i32 @llvm.x86.sse2.ucomineq.sd(<2 x double> %a0, <2 x double> %a1)
3586 ret i32 %res
3587}
3588declare i32 @llvm.x86.sse2.ucomineq.sd(<2 x double>, <2 x double>) nounwind readnone
3589
3590define <2 x double> @test_mm_undefined_pd() {
3591; X32-LABEL: test_mm_undefined_pd:
3592; X32: # BB#0:
3593; X32-NEXT: retl
3594;
3595; X64-LABEL: test_mm_undefined_pd:
3596; X64: # BB#0:
3597; X64-NEXT: retq
3598 ret <2 x double> undef
3599}
3600
3601define <2 x i64> @test_mm_undefined_si128() {
3602; X32-LABEL: test_mm_undefined_si128:
3603; X32: # BB#0:
3604; X32-NEXT: retl
3605;
3606; X64-LABEL: test_mm_undefined_si128:
3607; X64: # BB#0:
3608; X64-NEXT: retq
3609 ret <2 x i64> undef
3610}
3611
3612define <2 x i64> @test_mm_unpackhi_epi8(<2 x i64> %a0, <2 x i64> %a1) {
3613; X32-LABEL: test_mm_unpackhi_epi8:
3614; X32: # BB#0:
3615; X32-NEXT: punpckhbw {{.*#+}} xmm0 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15]
3616; X32-NEXT: retl
3617;
3618; X64-LABEL: test_mm_unpackhi_epi8:
3619; X64: # BB#0:
3620; X64-NEXT: punpckhbw {{.*#+}} xmm0 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15]
3621; X64-NEXT: retq
3622 %arg0 = bitcast <2 x i64> %a0 to <16 x i8>
3623 %arg1 = bitcast <2 x i64> %a1 to <16 x i8>
3624 %res = shufflevector <16 x i8> %arg0, <16 x i8> %arg1, <16 x i32> <i32 8, i32 24, i32 9, i32 25, i32 10, i32 26, i32 11, i32 27, i32 12, i32 28, i32 13, i32 29, i32 14, i32 30, i32 15, i32 31>
3625 %bc = bitcast <16 x i8> %res to <2 x i64>
3626 ret <2 x i64> %bc
3627}
3628
3629define <2 x i64> @test_mm_unpackhi_epi16(<2 x i64> %a0, <2 x i64> %a1) {
3630; X32-LABEL: test_mm_unpackhi_epi16:
3631; X32: # BB#0:
3632; X32-NEXT: punpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
3633; X32-NEXT: retl
3634;
3635; X64-LABEL: test_mm_unpackhi_epi16:
3636; X64: # BB#0:
3637; X64-NEXT: punpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
3638; X64-NEXT: retq
3639 %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
3640 %arg1 = bitcast <2 x i64> %a1 to <8 x i16>
3641 %res = shufflevector <8 x i16> %arg0, <8 x i16> %arg1, <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
3642 %bc = bitcast <8 x i16> %res to <2 x i64>
3643 ret <2 x i64> %bc
3644}
3645
3646define <2 x i64> @test_mm_unpackhi_epi32(<2 x i64> %a0, <2 x i64> %a1) {
3647; X32-LABEL: test_mm_unpackhi_epi32:
3648; X32: # BB#0:
3649; X32-NEXT: punpckhdq {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
3650; X32-NEXT: retl
3651;
3652; X64-LABEL: test_mm_unpackhi_epi32:
3653; X64: # BB#0:
3654; X64-NEXT: punpckhdq {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
3655; X64-NEXT: retq
3656 %arg0 = bitcast <2 x i64> %a0 to <4 x i32>
3657 %arg1 = bitcast <2 x i64> %a1 to <4 x i32>
3658 %res = shufflevector <4 x i32> %arg0,<4 x i32> %arg1, <4 x i32> <i32 2, i32 6, i32 3, i32 7>
3659 %bc = bitcast <4 x i32> %res to <2 x i64>
3660 ret <2 x i64> %bc
3661}
3662
3663define <2 x i64> @test_mm_unpackhi_epi64(<2 x i64> %a0, <2 x i64> %a1) {
3664; X32-LABEL: test_mm_unpackhi_epi64:
3665; X32: # BB#0:
3666; X32-NEXT: punpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1]
3667; X32-NEXT: retl
3668;
3669; X64-LABEL: test_mm_unpackhi_epi64:
3670; X64: # BB#0:
3671; X64-NEXT: punpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1]
3672; X64-NEXT: retq
3673 %res = shufflevector <2 x i64> %a0, <2 x i64> %a1, <2 x i32> <i32 1, i32 3>
3674 ret <2 x i64> %res
3675}
3676
3677define <2 x double> @test_mm_unpackhi_pd(<2 x double> %a0, <2 x double> %a1) {
3678; X32-LABEL: test_mm_unpackhi_pd:
3679; X32: # BB#0:
3680; X32-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1]
3681; X32-NEXT: retl
3682;
3683; X64-LABEL: test_mm_unpackhi_pd:
3684; X64: # BB#0:
3685; X64-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1]
3686; X64-NEXT: retq
3687 %res = shufflevector <2 x double> %a0, <2 x double> %a1, <2 x i32> <i32 1, i32 3>
3688 ret <2 x double> %res
3689}
3690
3691define <2 x i64> @test_mm_unpacklo_epi8(<2 x i64> %a0, <2 x i64> %a1) {
3692; X32-LABEL: test_mm_unpacklo_epi8:
3693; X32: # BB#0:
3694; X32-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
3695; X32-NEXT: retl
3696;
3697; X64-LABEL: test_mm_unpacklo_epi8:
3698; X64: # BB#0:
3699; X64-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
3700; X64-NEXT: retq
3701 %arg0 = bitcast <2 x i64> %a0 to <16 x i8>
3702 %arg1 = bitcast <2 x i64> %a1 to <16 x i8>
3703 %res = shufflevector <16 x i8> %arg0, <16 x i8> %arg1, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19, i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23>
3704 %bc = bitcast <16 x i8> %res to <2 x i64>
3705 ret <2 x i64> %bc
3706}
3707
3708define <2 x i64> @test_mm_unpacklo_epi16(<2 x i64> %a0, <2 x i64> %a1) {
3709; X32-LABEL: test_mm_unpacklo_epi16:
3710; X32: # BB#0:
3711; X32-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
3712; X32-NEXT: retl
3713;
3714; X64-LABEL: test_mm_unpacklo_epi16:
3715; X64: # BB#0:
3716; X64-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
3717; X64-NEXT: retq
3718 %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
3719 %arg1 = bitcast <2 x i64> %a1 to <8 x i16>
3720 %res = shufflevector <8 x i16> %arg0, <8 x i16> %arg1, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
3721 %bc = bitcast <8 x i16> %res to <2 x i64>
3722 ret <2 x i64> %bc
3723}
3724
3725define <2 x i64> @test_mm_unpacklo_epi32(<2 x i64> %a0, <2 x i64> %a1) {
3726; X32-LABEL: test_mm_unpacklo_epi32:
3727; X32: # BB#0:
3728; X32-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
3729; X32-NEXT: retl
3730;
3731; X64-LABEL: test_mm_unpacklo_epi32:
3732; X64: # BB#0:
3733; X64-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
3734; X64-NEXT: retq
3735 %arg0 = bitcast <2 x i64> %a0 to <4 x i32>
3736 %arg1 = bitcast <2 x i64> %a1 to <4 x i32>
3737 %res = shufflevector <4 x i32> %arg0,<4 x i32> %arg1, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
3738 %bc = bitcast <4 x i32> %res to <2 x i64>
3739 ret <2 x i64> %bc
3740}
3741
3742define <2 x i64> @test_mm_unpacklo_epi64(<2 x i64> %a0, <2 x i64> %a1) {
3743; X32-LABEL: test_mm_unpacklo_epi64:
3744; X32: # BB#0:
3745; X32-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
3746; X32-NEXT: retl
3747;
3748; X64-LABEL: test_mm_unpacklo_epi64:
3749; X64: # BB#0:
3750; X64-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
3751; X64-NEXT: retq
3752 %res = shufflevector <2 x i64> %a0, <2 x i64> %a1, <2 x i32> <i32 0, i32 2>
3753 ret <2 x i64> %res
3754}
3755
3756define <2 x double> @test_mm_unpacklo_pd(<2 x double> %a0, <2 x double> %a1) {
3757; X32-LABEL: test_mm_unpacklo_pd:
3758; X32: # BB#0:
3759; X32-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
3760; X32-NEXT: retl
3761;
3762; X64-LABEL: test_mm_unpacklo_pd:
3763; X64: # BB#0:
3764; X64-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
3765; X64-NEXT: retq
3766 %res = shufflevector <2 x double> %a0, <2 x double> %a1, <2 x i32> <i32 0, i32 2>
3767 ret <2 x double> %res
3768}
3769
3770define <2 x double> @test_mm_xor_pd(<2 x double> %a0, <2 x double> %a1) nounwind {
3771; X32-LABEL: test_mm_xor_pd:
3772; X32: # BB#0:
3773; X32-NEXT: xorps %xmm1, %xmm0
3774; X32-NEXT: retl
3775;
3776; X64-LABEL: test_mm_xor_pd:
3777; X64: # BB#0:
3778; X64-NEXT: xorps %xmm1, %xmm0
3779; X64-NEXT: retq
3780 %arg0 = bitcast <2 x double> %a0 to <4 x i32>
3781 %arg1 = bitcast <2 x double> %a1 to <4 x i32>
3782 %res = xor <4 x i32> %arg0, %arg1
3783 %bc = bitcast <4 x i32> %res to <2 x double>
3784 ret <2 x double> %bc
3785}
3786
3787define <2 x i64> @test_mm_xor_si128(<2 x i64> %a0, <2 x i64> %a1) nounwind {
3788; X32-LABEL: test_mm_xor_si128:
3789; X32: # BB#0:
3790; X32-NEXT: xorps %xmm1, %xmm0
3791; X32-NEXT: retl
3792;
3793; X64-LABEL: test_mm_xor_si128:
3794; X64: # BB#0:
3795; X64-NEXT: xorps %xmm1, %xmm0
3796; X64-NEXT: retq
3797 %res = xor <2 x i64> %a0, %a1
3798 ret <2 x i64> %res
3799}
3800
3801!0 = !{i32 1}
3802