blob: b1e6e6c73b43de4e4bf7b3c50912a3540829a764 [file] [log] [blame]
Simon Pilgrim5a0d7282016-05-18 18:00:43 +00001; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -fast-isel -mtriple=i386-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefix=ALL --check-prefix=X32
3; RUN: llc < %s -fast-isel -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefix=ALL --check-prefix=X64
4
5; NOTE: This should use IR equivalent to what is generated by clang/test/CodeGen/sse2-builtins.c
6
7define <2 x i64> @test_mm_add_epi8(<2 x i64> %a0, <2 x i64> %a1) nounwind {
8; X32-LABEL: test_mm_add_epi8:
9; X32: # BB#0:
10; X32-NEXT: paddb %xmm1, %xmm0
11; X32-NEXT: retl
12;
13; X64-LABEL: test_mm_add_epi8:
14; X64: # BB#0:
15; X64-NEXT: paddb %xmm1, %xmm0
16; X64-NEXT: retq
17 %arg0 = bitcast <2 x i64> %a0 to <16 x i8>
18 %arg1 = bitcast <2 x i64> %a1 to <16 x i8>
19 %res = add <16 x i8> %arg0, %arg1
20 %bc = bitcast <16 x i8> %res to <2 x i64>
21 ret <2 x i64> %bc
22}
23
24define <2 x i64> @test_mm_add_epi16(<2 x i64> %a0, <2 x i64> %a1) nounwind {
25; X32-LABEL: test_mm_add_epi16:
26; X32: # BB#0:
27; X32-NEXT: paddw %xmm1, %xmm0
28; X32-NEXT: retl
29;
30; X64-LABEL: test_mm_add_epi16:
31; X64: # BB#0:
32; X64-NEXT: paddw %xmm1, %xmm0
33; X64-NEXT: retq
34 %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
35 %arg1 = bitcast <2 x i64> %a1 to <8 x i16>
36 %res = add <8 x i16> %arg0, %arg1
37 %bc = bitcast <8 x i16> %res to <2 x i64>
38 ret <2 x i64> %bc
39}
40
41define <2 x i64> @test_mm_add_epi32(<2 x i64> %a0, <2 x i64> %a1) nounwind {
42; X32-LABEL: test_mm_add_epi32:
43; X32: # BB#0:
44; X32-NEXT: paddd %xmm1, %xmm0
45; X32-NEXT: retl
46;
47; X64-LABEL: test_mm_add_epi32:
48; X64: # BB#0:
49; X64-NEXT: paddd %xmm1, %xmm0
50; X64-NEXT: retq
51 %arg0 = bitcast <2 x i64> %a0 to <4 x i32>
52 %arg1 = bitcast <2 x i64> %a1 to <4 x i32>
53 %res = add <4 x i32> %arg0, %arg1
54 %bc = bitcast <4 x i32> %res to <2 x i64>
55 ret <2 x i64> %bc
56}
57
58define <2 x i64> @test_mm_add_epi64(<2 x i64> %a0, <2 x i64> %a1) nounwind {
59; X32-LABEL: test_mm_add_epi64:
60; X32: # BB#0:
61; X32-NEXT: paddq %xmm1, %xmm0
62; X32-NEXT: retl
63;
64; X64-LABEL: test_mm_add_epi64:
65; X64: # BB#0:
66; X64-NEXT: paddq %xmm1, %xmm0
67; X64-NEXT: retq
68 %res = add <2 x i64> %a0, %a1
69 ret <2 x i64> %res
70}
71
72define <2 x double> @test_mm_add_pd(<2 x double> %a0, <2 x double> %a1) nounwind {
73; X32-LABEL: test_mm_add_pd:
74; X32: # BB#0:
75; X32-NEXT: addpd %xmm1, %xmm0
76; X32-NEXT: retl
77;
78; X64-LABEL: test_mm_add_pd:
79; X64: # BB#0:
80; X64-NEXT: addpd %xmm1, %xmm0
81; X64-NEXT: retq
82 %res = fadd <2 x double> %a0, %a1
83 ret <2 x double> %res
84}
85
86define <2 x double> @test_mm_add_sd(<2 x double> %a0, <2 x double> %a1) nounwind {
87; X32-LABEL: test_mm_add_sd:
88; X32: # BB#0:
89; X32-NEXT: addsd %xmm1, %xmm0
90; X32-NEXT: retl
91;
92; X64-LABEL: test_mm_add_sd:
93; X64: # BB#0:
94; X64-NEXT: addsd %xmm1, %xmm0
95; X64-NEXT: retq
96 %ext0 = extractelement <2 x double> %a0, i32 0
97 %ext1 = extractelement <2 x double> %a1, i32 0
98 %fadd = fadd double %ext0, %ext1
99 %res = insertelement <2 x double> %a0, double %fadd, i32 0
100 ret <2 x double> %res
101}
102
103define <2 x i64> @test_mm_adds_epi8(<2 x i64> %a0, <2 x i64> %a1) nounwind {
104; X32-LABEL: test_mm_adds_epi8:
105; X32: # BB#0:
106; X32-NEXT: paddsb %xmm1, %xmm0
107; X32-NEXT: retl
108;
109; X64-LABEL: test_mm_adds_epi8:
110; X64: # BB#0:
111; X64-NEXT: paddsb %xmm1, %xmm0
112; X64-NEXT: retq
113 %arg0 = bitcast <2 x i64> %a0 to <16 x i8>
114 %arg1 = bitcast <2 x i64> %a1 to <16 x i8>
115 %res = call <16 x i8> @llvm.x86.sse2.padds.b(<16 x i8> %arg0, <16 x i8> %arg1)
116 %bc = bitcast <16 x i8> %res to <2 x i64>
117 ret <2 x i64> %bc
118}
119declare <16 x i8> @llvm.x86.sse2.padds.b(<16 x i8>, <16 x i8>) nounwind readnone
120
121define <2 x i64> @test_mm_adds_epi16(<2 x i64> %a0, <2 x i64> %a1) nounwind {
122; X32-LABEL: test_mm_adds_epi16:
123; X32: # BB#0:
124; X32-NEXT: paddsw %xmm1, %xmm0
125; X32-NEXT: retl
126;
127; X64-LABEL: test_mm_adds_epi16:
128; X64: # BB#0:
129; X64-NEXT: paddsw %xmm1, %xmm0
130; X64-NEXT: retq
131 %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
132 %arg1 = bitcast <2 x i64> %a1 to <8 x i16>
133 %res = call <8 x i16> @llvm.x86.sse2.padds.w(<8 x i16> %arg0, <8 x i16> %arg1)
134 %bc = bitcast <8 x i16> %res to <2 x i64>
135 ret <2 x i64> %bc
136}
137declare <8 x i16> @llvm.x86.sse2.padds.w(<8 x i16>, <8 x i16>) nounwind readnone
138
139define <2 x i64> @test_mm_adds_epu8(<2 x i64> %a0, <2 x i64> %a1) nounwind {
140; X32-LABEL: test_mm_adds_epu8:
141; X32: # BB#0:
142; X32-NEXT: paddusb %xmm1, %xmm0
143; X32-NEXT: retl
144;
145; X64-LABEL: test_mm_adds_epu8:
146; X64: # BB#0:
147; X64-NEXT: paddusb %xmm1, %xmm0
148; X64-NEXT: retq
149 %arg0 = bitcast <2 x i64> %a0 to <16 x i8>
150 %arg1 = bitcast <2 x i64> %a1 to <16 x i8>
151 %res = call <16 x i8> @llvm.x86.sse2.paddus.b(<16 x i8> %arg0, <16 x i8> %arg1)
152 %bc = bitcast <16 x i8> %res to <2 x i64>
153 ret <2 x i64> %bc
154}
155declare <16 x i8> @llvm.x86.sse2.paddus.b(<16 x i8>, <16 x i8>) nounwind readnone
156
157define <2 x i64> @test_mm_adds_epu16(<2 x i64> %a0, <2 x i64> %a1) nounwind {
158; X32-LABEL: test_mm_adds_epu16:
159; X32: # BB#0:
160; X32-NEXT: paddusw %xmm1, %xmm0
161; X32-NEXT: retl
162;
163; X64-LABEL: test_mm_adds_epu16:
164; X64: # BB#0:
165; X64-NEXT: paddusw %xmm1, %xmm0
166; X64-NEXT: retq
167 %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
168 %arg1 = bitcast <2 x i64> %a1 to <8 x i16>
169 %res = call <8 x i16> @llvm.x86.sse2.paddus.w(<8 x i16> %arg0, <8 x i16> %arg1)
170 %bc = bitcast <8 x i16> %res to <2 x i64>
171 ret <2 x i64> %bc
172}
173declare <8 x i16> @llvm.x86.sse2.paddus.w(<8 x i16>, <8 x i16>) nounwind readnone
174
175define <2 x double> @test_mm_and_pd(<2 x double> %a0, <2 x double> %a1) nounwind {
176; X32-LABEL: test_mm_and_pd:
177; X32: # BB#0:
178; X32-NEXT: andps %xmm1, %xmm0
179; X32-NEXT: retl
180;
181; X64-LABEL: test_mm_and_pd:
182; X64: # BB#0:
183; X64-NEXT: andps %xmm1, %xmm0
184; X64-NEXT: retq
185 %arg0 = bitcast <2 x double> %a0 to <4 x i32>
186 %arg1 = bitcast <2 x double> %a1 to <4 x i32>
187 %res = and <4 x i32> %arg0, %arg1
188 %bc = bitcast <4 x i32> %res to <2 x double>
189 ret <2 x double> %bc
190}
191
192define <2 x i64> @test_mm_and_si128(<2 x i64> %a0, <2 x i64> %a1) nounwind {
193; X32-LABEL: test_mm_and_si128:
194; X32: # BB#0:
195; X32-NEXT: andps %xmm1, %xmm0
196; X32-NEXT: retl
197;
198; X64-LABEL: test_mm_and_si128:
199; X64: # BB#0:
200; X64-NEXT: andps %xmm1, %xmm0
201; X64-NEXT: retq
202 %res = and <2 x i64> %a0, %a1
203 ret <2 x i64> %res
204}
205
206define <2 x double> @test_mm_andnot_pd(<2 x double> %a0, <2 x double> %a1) nounwind {
207; X32-LABEL: test_mm_andnot_pd:
208; X32: # BB#0:
209; X32-NEXT: andnps %xmm1, %xmm0
210; X32-NEXT: retl
211;
212; X64-LABEL: test_mm_andnot_pd:
213; X64: # BB#0:
214; X64-NEXT: andnps %xmm1, %xmm0
215; X64-NEXT: retq
216 %arg0 = bitcast <2 x double> %a0 to <4 x i32>
217 %arg1 = bitcast <2 x double> %a1 to <4 x i32>
218 %not = xor <4 x i32> %arg0, <i32 -1, i32 -1, i32 -1, i32 -1>
219 %res = and <4 x i32> %not, %arg1
220 %bc = bitcast <4 x i32> %res to <2 x double>
221 ret <2 x double> %bc
222}
223
224define <2 x i64> @test_mm_andnot_si128(<2 x i64> %a0, <2 x i64> %a1) nounwind {
225; X32-LABEL: test_mm_andnot_si128:
226; X32: # BB#0:
227; X32-NEXT: pcmpeqd %xmm2, %xmm2
228; X32-NEXT: pxor %xmm2, %xmm0
229; X32-NEXT: pand %xmm1, %xmm0
230; X32-NEXT: retl
231;
232; X64-LABEL: test_mm_andnot_si128:
233; X64: # BB#0:
234; X64-NEXT: pcmpeqd %xmm2, %xmm2
235; X64-NEXT: pxor %xmm2, %xmm0
236; X64-NEXT: pand %xmm1, %xmm0
237; X64-NEXT: retq
238 %not = xor <2 x i64> %a0, <i64 -1, i64 -1>
239 %res = and <2 x i64> %not, %a1
240 ret <2 x i64> %res
241}
242
243define <2 x i64> @test_mm_avg_epu8(<2 x i64> %a0, <2 x i64> %a1) nounwind {
244; X32-LABEL: test_mm_avg_epu8:
245; X32: # BB#0:
246; X32-NEXT: pavgb %xmm1, %xmm0
247; X32-NEXT: retl
248;
249; X64-LABEL: test_mm_avg_epu8:
250; X64: # BB#0:
251; X64-NEXT: pavgb %xmm1, %xmm0
252; X64-NEXT: retq
253 %arg0 = bitcast <2 x i64> %a0 to <16 x i8>
254 %arg1 = bitcast <2 x i64> %a1 to <16 x i8>
255 %res = call <16 x i8> @llvm.x86.sse2.pavg.b(<16 x i8> %arg0, <16 x i8> %arg1)
256 %bc = bitcast <16 x i8> %res to <2 x i64>
257 ret <2 x i64> %bc
258}
259declare <16 x i8> @llvm.x86.sse2.pavg.b(<16 x i8> %arg0, <16 x i8> %arg1) nounwind readnone
260
261define <2 x i64> @test_mm_avg_epu16(<2 x i64> %a0, <2 x i64> %a1) nounwind {
262; X32-LABEL: test_mm_avg_epu16:
263; X32: # BB#0:
264; X32-NEXT: pavgw %xmm1, %xmm0
265; X32-NEXT: retl
266;
267; X64-LABEL: test_mm_avg_epu16:
268; X64: # BB#0:
269; X64-NEXT: pavgw %xmm1, %xmm0
270; X64-NEXT: retq
271 %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
272 %arg1 = bitcast <2 x i64> %a1 to <8 x i16>
273 %res = call <8 x i16> @llvm.x86.sse2.pavg.w(<8 x i16> %arg0, <8 x i16> %arg1)
274 %bc = bitcast <8 x i16> %res to <2 x i64>
275 ret <2 x i64> %bc
276}
277declare <8 x i16> @llvm.x86.sse2.pavg.w(<8 x i16>, <8 x i16>) nounwind readnone
278
279define <2 x i64> @test_mm_bslli_si128(<2 x i64> %a0) nounwind {
280; X32-LABEL: test_mm_bslli_si128:
281; X32: # BB#0:
282; X32-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10]
283; X32-NEXT: retl
284;
285; X64-LABEL: test_mm_bslli_si128:
286; X64: # BB#0:
287; X64-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10]
288; X64-NEXT: retq
289 %arg0 = bitcast <2 x i64> %a0 to <16 x i8>
290 %res = shufflevector <16 x i8> zeroinitializer, <16 x i8> %arg0, <16 x i32> <i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26>
291 %bc = bitcast <16 x i8> %res to <2 x i64>
292 ret <2 x i64> %bc
293}
294
295define <2 x i64> @test_mm_bsrli_si128(<2 x i64> %a0) nounwind {
296; X32-LABEL: test_mm_bsrli_si128:
297; X32: # BB#0:
298; X32-NEXT: psrldq {{.*#+}} xmm0 = xmm0[5,6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero
299; X32-NEXT: retl
300;
301; X64-LABEL: test_mm_bsrli_si128:
302; X64: # BB#0:
303; X64-NEXT: psrldq {{.*#+}} xmm0 = xmm0[5,6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero
304; X64-NEXT: retq
305 %arg0 = bitcast <2 x i64> %a0 to <16 x i8>
306 %res = shufflevector <16 x i8> %arg0, <16 x i8> zeroinitializer, <16 x i32> <i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20>
307 %bc = bitcast <16 x i8> %res to <2 x i64>
308 ret <2 x i64> %bc
309}
310
Simon Pilgrim01809e02016-05-19 10:58:54 +0000311define <4 x float> @test_mm_castpd_ps(<2 x double> %a0) nounwind {
312; X32-LABEL: test_mm_castpd_ps:
313; X32: # BB#0:
314; X32-NEXT: retl
315;
316; X64-LABEL: test_mm_castpd_ps:
317; X64: # BB#0:
318; X64-NEXT: retq
319 %res = bitcast <2 x double> %a0 to <4 x float>
320 ret <4 x float> %res
321}
322
323define <2 x i64> @test_mm_castpd_si128(<2 x double> %a0) nounwind {
324; X32-LABEL: test_mm_castpd_si128:
325; X32: # BB#0:
326; X32-NEXT: retl
327;
328; X64-LABEL: test_mm_castpd_si128:
329; X64: # BB#0:
330; X64-NEXT: retq
331 %res = bitcast <2 x double> %a0 to <2 x i64>
332 ret <2 x i64> %res
333}
334
335define <2 x double> @test_mm_castps_pd(<4 x float> %a0) nounwind {
336; X32-LABEL: test_mm_castps_pd:
337; X32: # BB#0:
338; X32-NEXT: retl
339;
340; X64-LABEL: test_mm_castps_pd:
341; X64: # BB#0:
342; X64-NEXT: retq
343 %res = bitcast <4 x float> %a0 to <2 x double>
344 ret <2 x double> %res
345}
346
347define <2 x i64> @test_mm_castps_si128(<4 x float> %a0) nounwind {
348; X32-LABEL: test_mm_castps_si128:
349; X32: # BB#0:
350; X32-NEXT: retl
351;
352; X64-LABEL: test_mm_castps_si128:
353; X64: # BB#0:
354; X64-NEXT: retq
355 %res = bitcast <4 x float> %a0 to <2 x i64>
356 ret <2 x i64> %res
357}
358
359define <2 x double> @test_mm_castsi128_pd(<2 x i64> %a0) nounwind {
360; X32-LABEL: test_mm_castsi128_pd:
361; X32: # BB#0:
362; X32-NEXT: retl
363;
364; X64-LABEL: test_mm_castsi128_pd:
365; X64: # BB#0:
366; X64-NEXT: retq
367 %res = bitcast <2 x i64> %a0 to <2 x double>
368 ret <2 x double> %res
369}
370
371define <4 x float> @test_mm_castsi128_ps(<2 x i64> %a0) nounwind {
372; X32-LABEL: test_mm_castsi128_ps:
373; X32: # BB#0:
374; X32-NEXT: retl
375;
376; X64-LABEL: test_mm_castsi128_ps:
377; X64: # BB#0:
378; X64-NEXT: retq
379 %res = bitcast <2 x i64> %a0 to <4 x float>
380 ret <4 x float> %res
381}
382
Simon Pilgrim5a0d7282016-05-18 18:00:43 +0000383define void @test_mm_clflush(i8* %a0) nounwind {
384; X32-LABEL: test_mm_clflush:
385; X32: # BB#0:
386; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
387; X32-NEXT: clflush (%eax)
388; X32-NEXT: retl
389;
390; X64-LABEL: test_mm_clflush:
391; X64: # BB#0:
392; X64-NEXT: clflush (%rdi)
393; X64-NEXT: retq
394 call void @llvm.x86.sse2.clflush(i8* %a0)
395 ret void
396}
397declare void @llvm.x86.sse2.clflush(i8*) nounwind readnone
398
399define <2 x i64> @test_mm_cmpeq_epi8(<2 x i64> %a0, <2 x i64> %a1) nounwind {
400; X32-LABEL: test_mm_cmpeq_epi8:
401; X32: # BB#0:
402; X32-NEXT: pcmpeqb %xmm1, %xmm0
403; X32-NEXT: retl
404;
405; X64-LABEL: test_mm_cmpeq_epi8:
406; X64: # BB#0:
407; X64-NEXT: pcmpeqb %xmm1, %xmm0
408; X64-NEXT: retq
409 %arg0 = bitcast <2 x i64> %a0 to <16 x i8>
410 %arg1 = bitcast <2 x i64> %a1 to <16 x i8>
411 %cmp = icmp eq <16 x i8> %arg0, %arg1
412 %res = sext <16 x i1> %cmp to <16 x i8>
413 %bc = bitcast <16 x i8> %res to <2 x i64>
414 ret <2 x i64> %bc
415}
416
417define <2 x i64> @test_mm_cmpeq_epi16(<2 x i64> %a0, <2 x i64> %a1) nounwind {
418; X32-LABEL: test_mm_cmpeq_epi16:
419; X32: # BB#0:
420; X32-NEXT: pcmpeqw %xmm1, %xmm0
421; X32-NEXT: retl
422;
423; X64-LABEL: test_mm_cmpeq_epi16:
424; X64: # BB#0:
425; X64-NEXT: pcmpeqw %xmm1, %xmm0
426; X64-NEXT: retq
427 %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
428 %arg1 = bitcast <2 x i64> %a1 to <8 x i16>
429 %cmp = icmp eq <8 x i16> %arg0, %arg1
430 %res = sext <8 x i1> %cmp to <8 x i16>
431 %bc = bitcast <8 x i16> %res to <2 x i64>
432 ret <2 x i64> %bc
433}
434
435define <2 x i64> @test_mm_cmpeq_epi32(<2 x i64> %a0, <2 x i64> %a1) nounwind {
436; X32-LABEL: test_mm_cmpeq_epi32:
437; X32: # BB#0:
438; X32-NEXT: pcmpeqd %xmm1, %xmm0
439; X32-NEXT: retl
440;
441; X64-LABEL: test_mm_cmpeq_epi32:
442; X64: # BB#0:
443; X64-NEXT: pcmpeqd %xmm1, %xmm0
444; X64-NEXT: retq
445 %arg0 = bitcast <2 x i64> %a0 to <4 x i32>
446 %arg1 = bitcast <2 x i64> %a1 to <4 x i32>
447 %cmp = icmp eq <4 x i32> %arg0, %arg1
448 %res = sext <4 x i1> %cmp to <4 x i32>
449 %bc = bitcast <4 x i32> %res to <2 x i64>
450 ret <2 x i64> %bc
451}
452
453define <2 x double> @test_mm_cmpeq_pd(<2 x double> %a0, <2 x double> %a1) nounwind {
454; X32-LABEL: test_mm_cmpeq_pd:
455; X32: # BB#0:
456; X32-NEXT: cmpeqpd %xmm1, %xmm0
457; X32-NEXT: retl
458;
459; X64-LABEL: test_mm_cmpeq_pd:
460; X64: # BB#0:
461; X64-NEXT: cmpeqpd %xmm1, %xmm0
462; X64-NEXT: retq
463 %res = call <2 x double> @llvm.x86.sse2.cmp.pd(<2 x double> %a0, <2 x double> %a1, i8 0)
464 ret <2 x double> %res
465}
466declare <2 x double> @llvm.x86.sse2.cmp.pd(<2 x double>, <2 x double>, i8) nounwind readnone
467
468define <2 x double> @test_mm_cmpeq_sd(<2 x double> %a0, <2 x double> %a1) nounwind {
469; X32-LABEL: test_mm_cmpeq_sd:
470; X32: # BB#0:
471; X32-NEXT: cmpeqsd %xmm1, %xmm0
472; X32-NEXT: retl
473;
474; X64-LABEL: test_mm_cmpeq_sd:
475; X64: # BB#0:
476; X64-NEXT: cmpeqsd %xmm1, %xmm0
477; X64-NEXT: retq
478 %res = call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %a0, <2 x double> %a1, i8 0)
479 ret <2 x double> %res
480}
481declare <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double>, <2 x double>, i8) nounwind readnone
482
483define <2 x double> @test_mm_cmpge_pd(<2 x double> %a0, <2 x double> %a1) nounwind {
484; X32-LABEL: test_mm_cmpge_pd:
485; X32: # BB#0:
486; X32-NEXT: cmplepd %xmm0, %xmm1
487; X32-NEXT: movapd %xmm1, %xmm0
488; X32-NEXT: retl
489;
490; X64-LABEL: test_mm_cmpge_pd:
491; X64: # BB#0:
492; X64-NEXT: cmplepd %xmm0, %xmm1
493; X64-NEXT: movapd %xmm1, %xmm0
494; X64-NEXT: retq
495 %res = call <2 x double> @llvm.x86.sse2.cmp.pd(<2 x double> %a1, <2 x double> %a0, i8 2)
496 ret <2 x double> %res
497}
498
499define <2 x double> @test_mm_cmpge_sd(<2 x double> %a0, <2 x double> %a1) nounwind {
500; X32-LABEL: test_mm_cmpge_sd:
501; X32: # BB#0:
502; X32-NEXT: cmplesd %xmm0, %xmm1
503; X32-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
504; X32-NEXT: retl
505;
506; X64-LABEL: test_mm_cmpge_sd:
507; X64: # BB#0:
508; X64-NEXT: cmplesd %xmm0, %xmm1
509; X64-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
510; X64-NEXT: retq
511 %cmp = call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %a1, <2 x double> %a0, i8 2)
512 %ext0 = extractelement <2 x double> %cmp, i32 0
513 %ins0 = insertelement <2 x double> undef, double %ext0, i32 0
514 %ext1 = extractelement <2 x double> %a0, i32 1
515 %ins1 = insertelement <2 x double> %ins0, double %ext1, i32 1
516 ret <2 x double> %ins1
517}
518
519define <2 x i64> @test_mm_cmpgt_epi8(<2 x i64> %a0, <2 x i64> %a1) nounwind {
520; X32-LABEL: test_mm_cmpgt_epi8:
521; X32: # BB#0:
522; X32-NEXT: pcmpgtb %xmm1, %xmm0
523; X32-NEXT: retl
524;
525; X64-LABEL: test_mm_cmpgt_epi8:
526; X64: # BB#0:
527; X64-NEXT: pcmpgtb %xmm1, %xmm0
528; X64-NEXT: retq
529 %arg0 = bitcast <2 x i64> %a0 to <16 x i8>
530 %arg1 = bitcast <2 x i64> %a1 to <16 x i8>
531 %cmp = icmp sgt <16 x i8> %arg0, %arg1
532 %res = sext <16 x i1> %cmp to <16 x i8>
533 %bc = bitcast <16 x i8> %res to <2 x i64>
534 ret <2 x i64> %bc
535}
536
537define <2 x i64> @test_mm_cmpgt_epi16(<2 x i64> %a0, <2 x i64> %a1) nounwind {
538; X32-LABEL: test_mm_cmpgt_epi16:
539; X32: # BB#0:
540; X32-NEXT: pcmpgtw %xmm1, %xmm0
541; X32-NEXT: retl
542;
543; X64-LABEL: test_mm_cmpgt_epi16:
544; X64: # BB#0:
545; X64-NEXT: pcmpgtw %xmm1, %xmm0
546; X64-NEXT: retq
547 %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
548 %arg1 = bitcast <2 x i64> %a1 to <8 x i16>
549 %cmp = icmp sgt <8 x i16> %arg0, %arg1
550 %res = sext <8 x i1> %cmp to <8 x i16>
551 %bc = bitcast <8 x i16> %res to <2 x i64>
552 ret <2 x i64> %bc
553}
554
555define <2 x i64> @test_mm_cmpgt_epi32(<2 x i64> %a0, <2 x i64> %a1) nounwind {
556; X32-LABEL: test_mm_cmpgt_epi32:
557; X32: # BB#0:
558; X32-NEXT: pcmpgtd %xmm1, %xmm0
559; X32-NEXT: retl
560;
561; X64-LABEL: test_mm_cmpgt_epi32:
562; X64: # BB#0:
563; X64-NEXT: pcmpgtd %xmm1, %xmm0
564; X64-NEXT: retq
565 %arg0 = bitcast <2 x i64> %a0 to <4 x i32>
566 %arg1 = bitcast <2 x i64> %a1 to <4 x i32>
567 %cmp = icmp sgt <4 x i32> %arg0, %arg1
568 %res = sext <4 x i1> %cmp to <4 x i32>
569 %bc = bitcast <4 x i32> %res to <2 x i64>
570 ret <2 x i64> %bc
571}
572
573define <2 x double> @test_mm_cmpgt_pd(<2 x double> %a0, <2 x double> %a1) nounwind {
574; X32-LABEL: test_mm_cmpgt_pd:
575; X32: # BB#0:
576; X32-NEXT: cmpltpd %xmm0, %xmm1
577; X32-NEXT: movapd %xmm1, %xmm0
578; X32-NEXT: retl
579;
580; X64-LABEL: test_mm_cmpgt_pd:
581; X64: # BB#0:
582; X64-NEXT: cmpltpd %xmm0, %xmm1
583; X64-NEXT: movapd %xmm1, %xmm0
584; X64-NEXT: retq
585 %res = call <2 x double> @llvm.x86.sse2.cmp.pd(<2 x double> %a1, <2 x double> %a0, i8 1)
586 ret <2 x double> %res
587}
588
589define <2 x double> @test_mm_cmpgt_sd(<2 x double> %a0, <2 x double> %a1) nounwind {
590; X32-LABEL: test_mm_cmpgt_sd:
591; X32: # BB#0:
592; X32-NEXT: cmpltsd %xmm0, %xmm1
593; X32-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
594; X32-NEXT: retl
595;
596; X64-LABEL: test_mm_cmpgt_sd:
597; X64: # BB#0:
598; X64-NEXT: cmpltsd %xmm0, %xmm1
599; X64-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
600; X64-NEXT: retq
601 %cmp = call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %a1, <2 x double> %a0, i8 1)
602 %ext0 = extractelement <2 x double> %cmp, i32 0
603 %ins0 = insertelement <2 x double> undef, double %ext0, i32 0
604 %ext1 = extractelement <2 x double> %a0, i32 1
605 %ins1 = insertelement <2 x double> %ins0, double %ext1, i32 1
606 ret <2 x double> %ins1
607}
608
609define <2 x double> @test_mm_cmple_pd(<2 x double> %a0, <2 x double> %a1) nounwind {
610; X32-LABEL: test_mm_cmple_pd:
611; X32: # BB#0:
612; X32-NEXT: cmplepd %xmm1, %xmm0
613; X32-NEXT: retl
614;
615; X64-LABEL: test_mm_cmple_pd:
616; X64: # BB#0:
617; X64-NEXT: cmplepd %xmm1, %xmm0
618; X64-NEXT: retq
619 %res = call <2 x double> @llvm.x86.sse2.cmp.pd(<2 x double> %a0, <2 x double> %a1, i8 2)
620 ret <2 x double> %res
621}
622
623define <2 x double> @test_mm_cmple_sd(<2 x double> %a0, <2 x double> %a1) nounwind {
624; X32-LABEL: test_mm_cmple_sd:
625; X32: # BB#0:
626; X32-NEXT: cmplesd %xmm1, %xmm0
627; X32-NEXT: retl
628;
629; X64-LABEL: test_mm_cmple_sd:
630; X64: # BB#0:
631; X64-NEXT: cmplesd %xmm1, %xmm0
632; X64-NEXT: retq
633 %res = call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %a0, <2 x double> %a1, i8 2)
634 ret <2 x double> %res
635}
636
637define <2 x i64> @test_mm_cmplt_epi8(<2 x i64> %a0, <2 x i64> %a1) nounwind {
638; X32-LABEL: test_mm_cmplt_epi8:
639; X32: # BB#0:
640; X32-NEXT: pcmpgtb %xmm0, %xmm1
641; X32-NEXT: movdqa %xmm1, %xmm0
642; X32-NEXT: retl
643;
644; X64-LABEL: test_mm_cmplt_epi8:
645; X64: # BB#0:
646; X64-NEXT: pcmpgtb %xmm0, %xmm1
647; X64-NEXT: movdqa %xmm1, %xmm0
648; X64-NEXT: retq
649 %arg0 = bitcast <2 x i64> %a0 to <16 x i8>
650 %arg1 = bitcast <2 x i64> %a1 to <16 x i8>
651 %cmp = icmp sgt <16 x i8> %arg1, %arg0
652 %res = sext <16 x i1> %cmp to <16 x i8>
653 %bc = bitcast <16 x i8> %res to <2 x i64>
654 ret <2 x i64> %bc
655}
656
657define <2 x i64> @test_mm_cmplt_epi16(<2 x i64> %a0, <2 x i64> %a1) nounwind {
658; X32-LABEL: test_mm_cmplt_epi16:
659; X32: # BB#0:
660; X32-NEXT: pcmpgtw %xmm0, %xmm1
661; X32-NEXT: movdqa %xmm1, %xmm0
662; X32-NEXT: retl
663;
664; X64-LABEL: test_mm_cmplt_epi16:
665; X64: # BB#0:
666; X64-NEXT: pcmpgtw %xmm0, %xmm1
667; X64-NEXT: movdqa %xmm1, %xmm0
668; X64-NEXT: retq
669 %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
670 %arg1 = bitcast <2 x i64> %a1 to <8 x i16>
671 %cmp = icmp sgt <8 x i16> %arg1, %arg0
672 %res = sext <8 x i1> %cmp to <8 x i16>
673 %bc = bitcast <8 x i16> %res to <2 x i64>
674 ret <2 x i64> %bc
675}
676
677define <2 x i64> @test_mm_cmplt_epi32(<2 x i64> %a0, <2 x i64> %a1) nounwind {
678; X32-LABEL: test_mm_cmplt_epi32:
679; X32: # BB#0:
680; X32-NEXT: pcmpgtd %xmm0, %xmm1
681; X32-NEXT: movdqa %xmm1, %xmm0
682; X32-NEXT: retl
683;
684; X64-LABEL: test_mm_cmplt_epi32:
685; X64: # BB#0:
686; X64-NEXT: pcmpgtd %xmm0, %xmm1
687; X64-NEXT: movdqa %xmm1, %xmm0
688; X64-NEXT: retq
689 %arg0 = bitcast <2 x i64> %a0 to <4 x i32>
690 %arg1 = bitcast <2 x i64> %a1 to <4 x i32>
691 %cmp = icmp sgt <4 x i32> %arg1, %arg0
692 %res = sext <4 x i1> %cmp to <4 x i32>
693 %bc = bitcast <4 x i32> %res to <2 x i64>
694 ret <2 x i64> %bc
695}
696
697define <2 x double> @test_mm_cmplt_pd(<2 x double> %a0, <2 x double> %a1) nounwind {
698; X32-LABEL: test_mm_cmplt_pd:
699; X32: # BB#0:
700; X32-NEXT: cmpltpd %xmm1, %xmm0
701; X32-NEXT: retl
702;
703; X64-LABEL: test_mm_cmplt_pd:
704; X64: # BB#0:
705; X64-NEXT: cmpltpd %xmm1, %xmm0
706; X64-NEXT: retq
707 %res = call <2 x double> @llvm.x86.sse2.cmp.pd(<2 x double> %a0, <2 x double> %a1, i8 1)
708 ret <2 x double> %res
709}
710
711define <2 x double> @test_mm_cmplt_sd(<2 x double> %a0, <2 x double> %a1) nounwind {
712; X32-LABEL: test_mm_cmplt_sd:
713; X32: # BB#0:
714; X32-NEXT: cmpltsd %xmm1, %xmm0
715; X32-NEXT: retl
716;
717; X64-LABEL: test_mm_cmplt_sd:
718; X64: # BB#0:
719; X64-NEXT: cmpltsd %xmm1, %xmm0
720; X64-NEXT: retq
721 %res = call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %a0, <2 x double> %a1, i8 1)
722 ret <2 x double> %res
723}
724
725define <2 x double> @test_mm_cmpneq_pd(<2 x double> %a0, <2 x double> %a1) nounwind {
726; X32-LABEL: test_mm_cmpneq_pd:
727; X32: # BB#0:
728; X32-NEXT: cmpneqpd %xmm1, %xmm0
729; X32-NEXT: retl
730;
731; X64-LABEL: test_mm_cmpneq_pd:
732; X64: # BB#0:
733; X64-NEXT: cmpneqpd %xmm1, %xmm0
734; X64-NEXT: retq
735 %res = call <2 x double> @llvm.x86.sse2.cmp.pd(<2 x double> %a0, <2 x double> %a1, i8 4)
736 ret <2 x double> %res
737}
738
739define <2 x double> @test_mm_cmpneq_sd(<2 x double> %a0, <2 x double> %a1) nounwind {
740; X32-LABEL: test_mm_cmpneq_sd:
741; X32: # BB#0:
742; X32-NEXT: cmpneqsd %xmm1, %xmm0
743; X32-NEXT: retl
744;
745; X64-LABEL: test_mm_cmpneq_sd:
746; X64: # BB#0:
747; X64-NEXT: cmpneqsd %xmm1, %xmm0
748; X64-NEXT: retq
749 %res = call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %a0, <2 x double> %a1, i8 4)
750 ret <2 x double> %res
751}
752
753define <2 x double> @test_mm_cmpnge_pd(<2 x double> %a0, <2 x double> %a1) nounwind {
754; X32-LABEL: test_mm_cmpnge_pd:
755; X32: # BB#0:
756; X32-NEXT: cmpnlepd %xmm0, %xmm1
757; X32-NEXT: movapd %xmm1, %xmm0
758; X32-NEXT: retl
759;
760; X64-LABEL: test_mm_cmpnge_pd:
761; X64: # BB#0:
762; X64-NEXT: cmpnlepd %xmm0, %xmm1
763; X64-NEXT: movapd %xmm1, %xmm0
764; X64-NEXT: retq
765 %res = call <2 x double> @llvm.x86.sse2.cmp.pd(<2 x double> %a1, <2 x double> %a0, i8 6)
766 ret <2 x double> %res
767}
768
769define <2 x double> @test_mm_cmpnge_sd(<2 x double> %a0, <2 x double> %a1) nounwind {
770; X32-LABEL: test_mm_cmpnge_sd:
771; X32: # BB#0:
772; X32-NEXT: cmpnlesd %xmm0, %xmm1
Simon Pilgrimb1ff2dd2016-05-19 16:49:53 +0000773; X32-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
Simon Pilgrim5a0d7282016-05-18 18:00:43 +0000774; X32-NEXT: retl
775;
776; X64-LABEL: test_mm_cmpnge_sd:
777; X64: # BB#0:
778; X64-NEXT: cmpnlesd %xmm0, %xmm1
Simon Pilgrimb1ff2dd2016-05-19 16:49:53 +0000779; X64-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
Simon Pilgrim5a0d7282016-05-18 18:00:43 +0000780; X64-NEXT: retq
Simon Pilgrimb1ff2dd2016-05-19 16:49:53 +0000781 %cmp = call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %a1, <2 x double> %a0, i8 6)
782 %ext0 = extractelement <2 x double> %cmp, i32 0
783 %ins0 = insertelement <2 x double> undef, double %ext0, i32 0
784 %ext1 = extractelement <2 x double> %a0, i32 1
785 %ins1 = insertelement <2 x double> %ins0, double %ext1, i32 1
786 ret <2 x double> %ins1
Simon Pilgrim5a0d7282016-05-18 18:00:43 +0000787}
788
789define <2 x double> @test_mm_cmpngt_pd(<2 x double> %a0, <2 x double> %a1) nounwind {
790; X32-LABEL: test_mm_cmpngt_pd:
791; X32: # BB#0:
792; X32-NEXT: cmpnltpd %xmm0, %xmm1
793; X32-NEXT: movapd %xmm1, %xmm0
794; X32-NEXT: retl
795;
796; X64-LABEL: test_mm_cmpngt_pd:
797; X64: # BB#0:
798; X64-NEXT: cmpnltpd %xmm0, %xmm1
799; X64-NEXT: movapd %xmm1, %xmm0
800; X64-NEXT: retq
801 %res = call <2 x double> @llvm.x86.sse2.cmp.pd(<2 x double> %a1, <2 x double> %a0, i8 5)
802 ret <2 x double> %res
803}
804
805define <2 x double> @test_mm_cmpngt_sd(<2 x double> %a0, <2 x double> %a1) nounwind {
806; X32-LABEL: test_mm_cmpngt_sd:
807; X32: # BB#0:
808; X32-NEXT: cmpnltsd %xmm0, %xmm1
Simon Pilgrimb1ff2dd2016-05-19 16:49:53 +0000809; X32-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
Simon Pilgrim5a0d7282016-05-18 18:00:43 +0000810; X32-NEXT: retl
811;
812; X64-LABEL: test_mm_cmpngt_sd:
813; X64: # BB#0:
814; X64-NEXT: cmpnltsd %xmm0, %xmm1
Simon Pilgrimb1ff2dd2016-05-19 16:49:53 +0000815; X64-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
Simon Pilgrim5a0d7282016-05-18 18:00:43 +0000816; X64-NEXT: retq
Simon Pilgrimb1ff2dd2016-05-19 16:49:53 +0000817 %cmp = call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %a1, <2 x double> %a0, i8 5)
818 %ext0 = extractelement <2 x double> %cmp, i32 0
819 %ins0 = insertelement <2 x double> undef, double %ext0, i32 0
820 %ext1 = extractelement <2 x double> %a0, i32 1
821 %ins1 = insertelement <2 x double> %ins0, double %ext1, i32 1
822 ret <2 x double> %ins1
Simon Pilgrim5a0d7282016-05-18 18:00:43 +0000823}
824
825define <2 x double> @test_mm_cmpnle_pd(<2 x double> %a0, <2 x double> %a1) nounwind {
826; X32-LABEL: test_mm_cmpnle_pd:
827; X32: # BB#0:
828; X32-NEXT: cmpnlepd %xmm1, %xmm0
829; X32-NEXT: retl
830;
831; X64-LABEL: test_mm_cmpnle_pd:
832; X64: # BB#0:
833; X64-NEXT: cmpnlepd %xmm1, %xmm0
834; X64-NEXT: retq
835 %res = call <2 x double> @llvm.x86.sse2.cmp.pd(<2 x double> %a0, <2 x double> %a1, i8 6)
836 ret <2 x double> %res
837}
838
839define <2 x double> @test_mm_cmpnle_sd(<2 x double> %a0, <2 x double> %a1) nounwind {
840; X32-LABEL: test_mm_cmpnle_sd:
841; X32: # BB#0:
842; X32-NEXT: cmpnlesd %xmm1, %xmm0
843; X32-NEXT: retl
844;
845; X64-LABEL: test_mm_cmpnle_sd:
846; X64: # BB#0:
847; X64-NEXT: cmpnlesd %xmm1, %xmm0
848; X64-NEXT: retq
849 %res = call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %a0, <2 x double> %a1, i8 6)
850 ret <2 x double> %res
851}
852
853define <2 x double> @test_mm_cmpnlt_pd(<2 x double> %a0, <2 x double> %a1) nounwind {
854; X32-LABEL: test_mm_cmpnlt_pd:
855; X32: # BB#0:
856; X32-NEXT: cmpnltpd %xmm1, %xmm0
857; X32-NEXT: retl
858;
859; X64-LABEL: test_mm_cmpnlt_pd:
860; X64: # BB#0:
861; X64-NEXT: cmpnltpd %xmm1, %xmm0
862; X64-NEXT: retq
863 %res = call <2 x double> @llvm.x86.sse2.cmp.pd(<2 x double> %a0, <2 x double> %a1, i8 5)
864 ret <2 x double> %res
865}
866
867define <2 x double> @test_mm_cmpnlt_sd(<2 x double> %a0, <2 x double> %a1) nounwind {
868; X32-LABEL: test_mm_cmpnlt_sd:
869; X32: # BB#0:
870; X32-NEXT: cmpnltsd %xmm1, %xmm0
871; X32-NEXT: retl
872;
873; X64-LABEL: test_mm_cmpnlt_sd:
874; X64: # BB#0:
875; X64-NEXT: cmpnltsd %xmm1, %xmm0
876; X64-NEXT: retq
877 %res = call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %a0, <2 x double> %a1, i8 5)
878 ret <2 x double> %res
879}
880
881define <2 x double> @test_mm_cmpord_pd(<2 x double> %a0, <2 x double> %a1) nounwind {
882; X32-LABEL: test_mm_cmpord_pd:
883; X32: # BB#0:
884; X32-NEXT: cmpordpd %xmm1, %xmm0
885; X32-NEXT: retl
886;
887; X64-LABEL: test_mm_cmpord_pd:
888; X64: # BB#0:
889; X64-NEXT: cmpordpd %xmm1, %xmm0
890; X64-NEXT: retq
891 %res = call <2 x double> @llvm.x86.sse2.cmp.pd(<2 x double> %a0, <2 x double> %a1, i8 7)
892 ret <2 x double> %res
893}
894
895define <2 x double> @test_mm_cmpord_sd(<2 x double> %a0, <2 x double> %a1) nounwind {
896; X32-LABEL: test_mm_cmpord_sd:
897; X32: # BB#0:
898; X32-NEXT: cmpordsd %xmm1, %xmm0
899; X32-NEXT: retl
900;
901; X64-LABEL: test_mm_cmpord_sd:
902; X64: # BB#0:
903; X64-NEXT: cmpordsd %xmm1, %xmm0
904; X64-NEXT: retq
905 %res = call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %a0, <2 x double> %a1, i8 7)
906 ret <2 x double> %res
907}
908
909define <2 x double> @test_mm_cmpunord_pd(<2 x double> %a0, <2 x double> %a1) nounwind {
910; X32-LABEL: test_mm_cmpunord_pd:
911; X32: # BB#0:
912; X32-NEXT: cmpunordpd %xmm1, %xmm0
913; X32-NEXT: retl
914;
915; X64-LABEL: test_mm_cmpunord_pd:
916; X64: # BB#0:
917; X64-NEXT: cmpunordpd %xmm1, %xmm0
918; X64-NEXT: retq
919 %res = call <2 x double> @llvm.x86.sse2.cmp.pd(<2 x double> %a0, <2 x double> %a1, i8 3)
920 ret <2 x double> %res
921}
922
923define <2 x double> @test_mm_cmpunord_sd(<2 x double> %a0, <2 x double> %a1) nounwind {
924; X32-LABEL: test_mm_cmpunord_sd:
925; X32: # BB#0:
926; X32-NEXT: cmpunordsd %xmm1, %xmm0
927; X32-NEXT: retl
928;
929; X64-LABEL: test_mm_cmpunord_sd:
930; X64: # BB#0:
931; X64-NEXT: cmpunordsd %xmm1, %xmm0
932; X64-NEXT: retq
933 %res = call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %a0, <2 x double> %a1, i8 3)
934 ret <2 x double> %res
935}
936
937define i32 @test_mm_comieq_sd(<2 x double> %a0, <2 x double> %a1) nounwind {
938; X32-LABEL: test_mm_comieq_sd:
939; X32: # BB#0:
940; X32-NEXT: comisd %xmm1, %xmm0
941; X32-NEXT: setnp %al
942; X32-NEXT: sete %cl
943; X32-NEXT: andb %al, %cl
944; X32-NEXT: movzbl %cl, %eax
945; X32-NEXT: retl
946;
947; X64-LABEL: test_mm_comieq_sd:
948; X64: # BB#0:
949; X64-NEXT: comisd %xmm1, %xmm0
950; X64-NEXT: setnp %al
951; X64-NEXT: sete %cl
952; X64-NEXT: andb %al, %cl
953; X64-NEXT: movzbl %cl, %eax
954; X64-NEXT: retq
955 %res = call i32 @llvm.x86.sse2.comieq.sd(<2 x double> %a0, <2 x double> %a1)
956 ret i32 %res
957}
958declare i32 @llvm.x86.sse2.comieq.sd(<2 x double>, <2 x double>) nounwind readnone
959
960define i32 @test_mm_comige_sd(<2 x double> %a0, <2 x double> %a1) nounwind {
961; X32-LABEL: test_mm_comige_sd:
962; X32: # BB#0:
963; X32-NEXT: comisd %xmm1, %xmm0
964; X32-NEXT: setae %al
965; X32-NEXT: movzbl %al, %eax
966; X32-NEXT: retl
967;
968; X64-LABEL: test_mm_comige_sd:
969; X64: # BB#0:
970; X64-NEXT: comisd %xmm1, %xmm0
971; X64-NEXT: setae %al
972; X64-NEXT: movzbl %al, %eax
973; X64-NEXT: retq
974 %res = call i32 @llvm.x86.sse2.comige.sd(<2 x double> %a0, <2 x double> %a1)
975 ret i32 %res
976}
977declare i32 @llvm.x86.sse2.comige.sd(<2 x double>, <2 x double>) nounwind readnone
978
979define i32 @test_mm_comigt_sd(<2 x double> %a0, <2 x double> %a1) nounwind {
980; X32-LABEL: test_mm_comigt_sd:
981; X32: # BB#0:
982; X32-NEXT: comisd %xmm1, %xmm0
983; X32-NEXT: seta %al
984; X32-NEXT: movzbl %al, %eax
985; X32-NEXT: retl
986;
987; X64-LABEL: test_mm_comigt_sd:
988; X64: # BB#0:
989; X64-NEXT: comisd %xmm1, %xmm0
990; X64-NEXT: seta %al
991; X64-NEXT: movzbl %al, %eax
992; X64-NEXT: retq
993 %res = call i32 @llvm.x86.sse2.comigt.sd(<2 x double> %a0, <2 x double> %a1)
994 ret i32 %res
995}
996declare i32 @llvm.x86.sse2.comigt.sd(<2 x double>, <2 x double>) nounwind readnone
997
998define i32 @test_mm_comile_sd(<2 x double> %a0, <2 x double> %a1) nounwind {
999; X32-LABEL: test_mm_comile_sd:
1000; X32: # BB#0:
1001; X32-NEXT: comisd %xmm0, %xmm1
1002; X32-NEXT: setae %al
1003; X32-NEXT: movzbl %al, %eax
1004; X32-NEXT: retl
1005;
1006; X64-LABEL: test_mm_comile_sd:
1007; X64: # BB#0:
1008; X64-NEXT: comisd %xmm0, %xmm1
1009; X64-NEXT: setae %al
1010; X64-NEXT: movzbl %al, %eax
1011; X64-NEXT: retq
1012 %res = call i32 @llvm.x86.sse2.comile.sd(<2 x double> %a0, <2 x double> %a1)
1013 ret i32 %res
1014}
1015declare i32 @llvm.x86.sse2.comile.sd(<2 x double>, <2 x double>) nounwind readnone
1016
1017define i32 @test_mm_comilt_sd(<2 x double> %a0, <2 x double> %a1) nounwind {
1018; X32-LABEL: test_mm_comilt_sd:
1019; X32: # BB#0:
1020; X32-NEXT: comisd %xmm0, %xmm1
1021; X32-NEXT: seta %al
1022; X32-NEXT: movzbl %al, %eax
1023; X32-NEXT: retl
1024;
1025; X64-LABEL: test_mm_comilt_sd:
1026; X64: # BB#0:
1027; X64-NEXT: comisd %xmm0, %xmm1
1028; X64-NEXT: seta %al
1029; X64-NEXT: movzbl %al, %eax
1030; X64-NEXT: retq
1031 %res = call i32 @llvm.x86.sse2.comilt.sd(<2 x double> %a0, <2 x double> %a1)
1032 ret i32 %res
1033}
1034declare i32 @llvm.x86.sse2.comilt.sd(<2 x double>, <2 x double>) nounwind readnone
1035
1036define i32 @test_mm_comineq_sd(<2 x double> %a0, <2 x double> %a1) nounwind {
1037; X32-LABEL: test_mm_comineq_sd:
1038; X32: # BB#0:
1039; X32-NEXT: comisd %xmm1, %xmm0
1040; X32-NEXT: setp %al
1041; X32-NEXT: setne %cl
1042; X32-NEXT: orb %al, %cl
1043; X32-NEXT: movzbl %cl, %eax
1044; X32-NEXT: retl
1045;
1046; X64-LABEL: test_mm_comineq_sd:
1047; X64: # BB#0:
1048; X64-NEXT: comisd %xmm1, %xmm0
1049; X64-NEXT: setp %al
1050; X64-NEXT: setne %cl
1051; X64-NEXT: orb %al, %cl
1052; X64-NEXT: movzbl %cl, %eax
1053; X64-NEXT: retq
1054 %res = call i32 @llvm.x86.sse2.comineq.sd(<2 x double> %a0, <2 x double> %a1)
1055 ret i32 %res
1056}
1057declare i32 @llvm.x86.sse2.comineq.sd(<2 x double>, <2 x double>) nounwind readnone
1058
1059define <2 x double> @test_mm_cvtepi32_pd(<2 x i64> %a0) nounwind {
1060; X32-LABEL: test_mm_cvtepi32_pd:
1061; X32: # BB#0:
1062; X32-NEXT: cvtdq2pd %xmm0, %xmm0
1063; X32-NEXT: retl
1064;
1065; X64-LABEL: test_mm_cvtepi32_pd:
1066; X64: # BB#0:
1067; X64-NEXT: cvtdq2pd %xmm0, %xmm0
1068; X64-NEXT: retq
1069 %arg0 = bitcast <2 x i64> %a0 to <4 x i32>
Simon Pilgrim8a5ff3c2016-05-23 22:17:36 +00001070 %ext = shufflevector <4 x i32> %arg0, <4 x i32> %arg0, <2 x i32> <i32 0, i32 1>
1071 %res = sitofp <2 x i32> %ext to <2 x double>
Simon Pilgrim5a0d7282016-05-18 18:00:43 +00001072 ret <2 x double> %res
1073}
Simon Pilgrim5a0d7282016-05-18 18:00:43 +00001074
1075define <4 x float> @test_mm_cvtepi32_ps(<2 x i64> %a0) nounwind {
1076; X32-LABEL: test_mm_cvtepi32_ps:
1077; X32: # BB#0:
1078; X32-NEXT: cvtdq2ps %xmm0, %xmm0
1079; X32-NEXT: retl
1080;
1081; X64-LABEL: test_mm_cvtepi32_ps:
1082; X64: # BB#0:
1083; X64-NEXT: cvtdq2ps %xmm0, %xmm0
1084; X64-NEXT: retq
1085 %arg0 = bitcast <2 x i64> %a0 to <4 x i32>
1086 %res = call <4 x float> @llvm.x86.sse2.cvtdq2ps(<4 x i32> %arg0)
1087 ret <4 x float> %res
1088}
1089declare <4 x float> @llvm.x86.sse2.cvtdq2ps(<4 x i32>) nounwind readnone
1090
1091define <2 x i64> @test_mm_cvtpd_epi32(<2 x double> %a0) nounwind {
1092; X32-LABEL: test_mm_cvtpd_epi32:
1093; X32: # BB#0:
1094; X32-NEXT: cvtpd2dq %xmm0, %xmm0
1095; X32-NEXT: retl
1096;
1097; X64-LABEL: test_mm_cvtpd_epi32:
1098; X64: # BB#0:
1099; X64-NEXT: cvtpd2dq %xmm0, %xmm0
1100; X64-NEXT: retq
1101 %res = call <4 x i32> @llvm.x86.sse2.cvtpd2dq(<2 x double> %a0)
1102 %bc = bitcast <4 x i32> %res to <2 x i64>
1103 ret <2 x i64> %bc
1104}
1105declare <4 x i32> @llvm.x86.sse2.cvtpd2dq(<2 x double>) nounwind readnone
1106
1107define <4 x float> @test_mm_cvtpd_ps(<2 x double> %a0) nounwind {
1108; X32-LABEL: test_mm_cvtpd_ps:
1109; X32: # BB#0:
1110; X32-NEXT: cvtpd2ps %xmm0, %xmm0
1111; X32-NEXT: retl
1112;
1113; X64-LABEL: test_mm_cvtpd_ps:
1114; X64: # BB#0:
1115; X64-NEXT: cvtpd2ps %xmm0, %xmm0
1116; X64-NEXT: retq
1117 %res = call <4 x float> @llvm.x86.sse2.cvtpd2ps(<2 x double> %a0)
1118 ret <4 x float> %res
1119}
1120declare <4 x float> @llvm.x86.sse2.cvtpd2ps(<2 x double>) nounwind readnone
1121
1122define <2 x i64> @test_mm_cvtps_epi32(<4 x float> %a0) nounwind {
1123; X32-LABEL: test_mm_cvtps_epi32:
1124; X32: # BB#0:
1125; X32-NEXT: cvtps2dq %xmm0, %xmm0
1126; X32-NEXT: retl
1127;
1128; X64-LABEL: test_mm_cvtps_epi32:
1129; X64: # BB#0:
1130; X64-NEXT: cvtps2dq %xmm0, %xmm0
1131; X64-NEXT: retq
1132 %res = call <4 x i32> @llvm.x86.sse2.cvtps2dq(<4 x float> %a0)
1133 %bc = bitcast <4 x i32> %res to <2 x i64>
1134 ret <2 x i64> %bc
1135}
1136declare <4 x i32> @llvm.x86.sse2.cvtps2dq(<4 x float>) nounwind readnone
1137
1138define <2 x double> @test_mm_cvtps_pd(<4 x float> %a0) nounwind {
1139; X32-LABEL: test_mm_cvtps_pd:
1140; X32: # BB#0:
1141; X32-NEXT: cvtps2pd %xmm0, %xmm0
1142; X32-NEXT: retl
1143;
1144; X64-LABEL: test_mm_cvtps_pd:
1145; X64: # BB#0:
1146; X64-NEXT: cvtps2pd %xmm0, %xmm0
1147; X64-NEXT: retq
Simon Pilgrim8a5ff3c2016-05-23 22:17:36 +00001148 %ext = shufflevector <4 x float> %a0, <4 x float> %a0, <2 x i32> <i32 0, i32 1>
1149 %res = fpext <2 x float> %ext to <2 x double>
Simon Pilgrim5a0d7282016-05-18 18:00:43 +00001150 ret <2 x double> %res
1151}
Simon Pilgrim5a0d7282016-05-18 18:00:43 +00001152
1153define double @test_mm_cvtsd_f64(<2 x double> %a0) nounwind {
1154; X32-LABEL: test_mm_cvtsd_f64:
1155; X32: # BB#0:
1156; X32-NEXT: pushl %ebp
1157; X32-NEXT: movl %esp, %ebp
1158; X32-NEXT: andl $-8, %esp
1159; X32-NEXT: subl $8, %esp
1160; X32-NEXT: movlps %xmm0, (%esp)
1161; X32-NEXT: fldl (%esp)
1162; X32-NEXT: movl %ebp, %esp
1163; X32-NEXT: popl %ebp
1164; X32-NEXT: retl
1165;
1166; X64-LABEL: test_mm_cvtsd_f64:
1167; X64: # BB#0:
1168; X64-NEXT: retq
1169 %res = extractelement <2 x double> %a0, i32 0
1170 ret double %res
1171}
1172
1173define i32 @test_mm_cvtsd_si32(<2 x double> %a0) nounwind {
1174; X32-LABEL: test_mm_cvtsd_si32:
1175; X32: # BB#0:
1176; X32-NEXT: cvtsd2si %xmm0, %eax
1177; X32-NEXT: retl
1178;
1179; X64-LABEL: test_mm_cvtsd_si32:
1180; X64: # BB#0:
1181; X64-NEXT: cvtsd2si %xmm0, %eax
1182; X64-NEXT: retq
1183 %res = call i32 @llvm.x86.sse2.cvtsd2si(<2 x double> %a0)
1184 ret i32 %res
1185}
1186declare i32 @llvm.x86.sse2.cvtsd2si(<2 x double>) nounwind readnone
1187
1188define i32 @test_mm_cvtsi128_si32(<2 x i64> %a0) nounwind {
1189; X32-LABEL: test_mm_cvtsi128_si32:
1190; X32: # BB#0:
1191; X32-NEXT: movd %xmm0, %eax
1192; X32-NEXT: retl
1193;
1194; X64-LABEL: test_mm_cvtsi128_si32:
1195; X64: # BB#0:
1196; X64-NEXT: movd %xmm0, %eax
1197; X64-NEXT: retq
1198 %arg0 = bitcast <2 x i64> %a0 to <4 x i32>
1199 %res = extractelement <4 x i32> %arg0, i32 0
1200 ret i32 %res
1201}
1202
1203define <2 x double> @test_mm_cvtsi32_sd(<2 x double> %a0, i32 %a1) nounwind {
1204; X32-LABEL: test_mm_cvtsi32_sd:
1205; X32: # BB#0:
1206; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
1207; X32-NEXT: cvtsi2sdl %eax, %xmm1
1208; X32-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
1209; X32-NEXT: retl
1210;
1211; X64-LABEL: test_mm_cvtsi32_sd:
1212; X64: # BB#0:
1213; X64-NEXT: cvtsi2sdl %edi, %xmm1
1214; X64-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
1215; X64-NEXT: retq
1216 %cvt = sitofp i32 %a1 to double
1217 %res = insertelement <2 x double> %a0, double %cvt, i32 0
1218 ret <2 x double> %res
1219}
1220
1221define <2 x i64> @test_mm_cvtsi32_si128(i32 %a0) nounwind {
1222; X32-LABEL: test_mm_cvtsi32_si128:
1223; X32: # BB#0:
1224; X32-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
1225; X32-NEXT: retl
1226;
1227; X64-LABEL: test_mm_cvtsi32_si128:
1228; X64: # BB#0:
1229; X64-NEXT: movd %edi, %xmm0
1230; X64-NEXT: retq
1231 %res0 = insertelement <4 x i32> undef, i32 %a0, i32 0
1232 %res1 = insertelement <4 x i32> %res0, i32 0, i32 1
1233 %res2 = insertelement <4 x i32> %res1, i32 0, i32 2
1234 %res3 = insertelement <4 x i32> %res2, i32 0, i32 3
1235 %res = bitcast <4 x i32> %res3 to <2 x i64>
1236 ret <2 x i64> %res
1237}
1238
1239define <2 x double> @test_mm_cvtss_sd(<2 x double> %a0, <4 x float> %a1) nounwind {
1240; X32-LABEL: test_mm_cvtss_sd:
1241; X32: # BB#0:
1242; X32-NEXT: cvtss2sd %xmm1, %xmm1
1243; X32-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
1244; X32-NEXT: retl
1245;
1246; X64-LABEL: test_mm_cvtss_sd:
1247; X64: # BB#0:
1248; X64-NEXT: cvtss2sd %xmm1, %xmm1
1249; X64-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
1250; X64-NEXT: retq
1251 %ext = extractelement <4 x float> %a1, i32 0
1252 %cvt = fpext float %ext to double
1253 %res = insertelement <2 x double> %a0, double %cvt, i32 0
1254 ret <2 x double> %res
1255}
1256
1257define <2 x i64> @test_mm_cvttpd_epi32(<2 x double> %a0) nounwind {
1258; X32-LABEL: test_mm_cvttpd_epi32:
1259; X32: # BB#0:
1260; X32-NEXT: cvttpd2dq %xmm0, %xmm0
1261; X32-NEXT: retl
1262;
1263; X64-LABEL: test_mm_cvttpd_epi32:
1264; X64: # BB#0:
1265; X64-NEXT: cvttpd2dq %xmm0, %xmm0
1266; X64-NEXT: retq
1267 %res = call <4 x i32> @llvm.x86.sse2.cvttpd2dq(<2 x double> %a0)
1268 %bc = bitcast <4 x i32> %res to <2 x i64>
1269 ret <2 x i64> %bc
1270}
1271declare <4 x i32> @llvm.x86.sse2.cvttpd2dq(<2 x double>) nounwind readnone
1272
1273define <2 x i64> @test_mm_cvttps_epi32(<4 x float> %a0) nounwind {
1274; X32-LABEL: test_mm_cvttps_epi32:
1275; X32: # BB#0:
1276; X32-NEXT: cvttps2dq %xmm0, %xmm0
1277; X32-NEXT: retl
1278;
1279; X64-LABEL: test_mm_cvttps_epi32:
1280; X64: # BB#0:
1281; X64-NEXT: cvttps2dq %xmm0, %xmm0
1282; X64-NEXT: retq
1283 %res = call <4 x i32> @llvm.x86.sse2.cvttps2dq(<4 x float> %a0)
1284 %bc = bitcast <4 x i32> %res to <2 x i64>
1285 ret <2 x i64> %bc
1286}
1287declare <4 x i32> @llvm.x86.sse2.cvttps2dq(<4 x float>) nounwind readnone
1288
1289define i32 @test_mm_cvttsd_si32(<2 x double> %a0) nounwind {
1290; X32-LABEL: test_mm_cvttsd_si32:
1291; X32: # BB#0:
1292; X32-NEXT: cvttsd2si %xmm0, %eax
1293; X32-NEXT: retl
1294;
1295; X64-LABEL: test_mm_cvttsd_si32:
1296; X64: # BB#0:
1297; X64-NEXT: cvttsd2si %xmm0, %eax
1298; X64-NEXT: retq
1299 %ext = extractelement <2 x double> %a0, i32 0
1300 %res = fptosi double %ext to i32
1301 ret i32 %res
1302}
1303
1304define <2 x double> @test_mm_div_pd(<2 x double> %a0, <2 x double> %a1) nounwind {
1305; X32-LABEL: test_mm_div_pd:
1306; X32: # BB#0:
1307; X32-NEXT: divpd %xmm1, %xmm0
1308; X32-NEXT: retl
1309;
1310; X64-LABEL: test_mm_div_pd:
1311; X64: # BB#0:
1312; X64-NEXT: divpd %xmm1, %xmm0
1313; X64-NEXT: retq
1314 %res = fdiv <2 x double> %a0, %a1
1315 ret <2 x double> %res
1316}
1317
1318define <2 x double> @test_mm_div_sd(<2 x double> %a0, <2 x double> %a1) nounwind {
1319; X32-LABEL: test_mm_div_sd:
1320; X32: # BB#0:
1321; X32-NEXT: divsd %xmm1, %xmm0
1322; X32-NEXT: retl
1323;
1324; X64-LABEL: test_mm_div_sd:
1325; X64: # BB#0:
1326; X64-NEXT: divsd %xmm1, %xmm0
1327; X64-NEXT: retq
1328 %ext0 = extractelement <2 x double> %a0, i32 0
1329 %ext1 = extractelement <2 x double> %a1, i32 0
1330 %fdiv = fdiv double %ext0, %ext1
1331 %res = insertelement <2 x double> %a0, double %fdiv, i32 0
1332 ret <2 x double> %res
1333}
1334
1335define i32 @test_mm_extract_epi16(<2 x i64> %a0) nounwind {
1336; X32-LABEL: test_mm_extract_epi16:
1337; X32: # BB#0:
1338; X32-NEXT: pextrw $1, %xmm0, %eax
1339; X32-NEXT: movzwl %ax, %eax
1340; X32-NEXT: retl
1341;
1342; X64-LABEL: test_mm_extract_epi16:
1343; X64: # BB#0:
1344; X64-NEXT: pextrw $1, %xmm0, %eax
1345; X64-NEXT: movzwl %ax, %eax
1346; X64-NEXT: retq
1347 %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
1348 %ext = extractelement <8 x i16> %arg0, i32 1
1349 %res = zext i16 %ext to i32
1350 ret i32 %res
1351}
1352
1353define <2 x i64> @test_mm_insert_epi16(<2 x i64> %a0, i16 %a1) nounwind {
1354; X32-LABEL: test_mm_insert_epi16:
1355; X32: # BB#0:
1356; X32-NEXT: movw {{[0-9]+}}(%esp), %ax
1357; X32-NEXT: pinsrw $1, %eax, %xmm0
1358; X32-NEXT: retl
1359;
1360; X64-LABEL: test_mm_insert_epi16:
1361; X64: # BB#0:
1362; X64-NEXT: pinsrw $1, %edi, %xmm0
1363; X64-NEXT: retq
1364 %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
1365 %res = insertelement <8 x i16> %arg0, i16 %a1,i32 1
1366 %bc = bitcast <8 x i16> %res to <2 x i64>
1367 ret <2 x i64> %bc
1368}
1369
1370define void @test_mm_lfence() nounwind {
1371; X32-LABEL: test_mm_lfence:
1372; X32: # BB#0:
1373; X32-NEXT: lfence
1374; X32-NEXT: retl
1375;
1376; X64-LABEL: test_mm_lfence:
1377; X64: # BB#0:
1378; X64-NEXT: lfence
1379; X64-NEXT: retq
1380 call void @llvm.x86.sse2.lfence()
1381 ret void
1382}
1383declare void @llvm.x86.sse2.lfence() nounwind readnone
1384
1385define <2 x double> @test_mm_load_pd(double* %a0) nounwind {
1386; X32-LABEL: test_mm_load_pd:
1387; X32: # BB#0:
1388; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
1389; X32-NEXT: movaps (%eax), %xmm0
1390; X32-NEXT: retl
1391;
1392; X64-LABEL: test_mm_load_pd:
1393; X64: # BB#0:
1394; X64-NEXT: movaps (%rdi), %xmm0
1395; X64-NEXT: retq
1396 %arg0 = bitcast double* %a0 to <2 x double>*
1397 %res = load <2 x double>, <2 x double>* %arg0, align 16
1398 ret <2 x double> %res
1399}
1400
1401define <2 x double> @test_mm_load_sd(double* %a0) nounwind {
1402; X32-LABEL: test_mm_load_sd:
1403; X32: # BB#0:
1404; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
1405; X32-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
1406; X32-NEXT: retl
1407;
1408; X64-LABEL: test_mm_load_sd:
1409; X64: # BB#0:
1410; X64-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
1411; X64-NEXT: retq
1412 %ld = load double, double* %a0, align 1
1413 %res0 = insertelement <2 x double> undef, double %ld, i32 0
1414 %res1 = insertelement <2 x double> %res0, double 0.0, i32 1
1415 ret <2 x double> %res1
1416}
1417
1418define <2 x i64> @test_mm_load_si128(<2 x i64>* %a0) nounwind {
1419; X32-LABEL: test_mm_load_si128:
1420; X32: # BB#0:
1421; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
1422; X32-NEXT: movaps (%eax), %xmm0
1423; X32-NEXT: retl
1424;
1425; X64-LABEL: test_mm_load_si128:
1426; X64: # BB#0:
1427; X64-NEXT: movaps (%rdi), %xmm0
1428; X64-NEXT: retq
1429 %res = load <2 x i64>, <2 x i64>* %a0, align 16
1430 ret <2 x i64> %res
1431}
1432
1433define <2 x double> @test_mm_load1_pd(double* %a0) nounwind {
1434; X32-LABEL: test_mm_load1_pd:
1435; X32: # BB#0:
1436; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
1437; X32-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
1438; X32-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0,0]
1439; X32-NEXT: retl
1440;
1441; X64-LABEL: test_mm_load1_pd:
1442; X64: # BB#0:
1443; X64-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
1444; X64-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0,0]
1445; X64-NEXT: retq
1446 %ld = load double, double* %a0, align 8
1447 %res0 = insertelement <2 x double> undef, double %ld, i32 0
1448 %res1 = insertelement <2 x double> %res0, double %ld, i32 1
1449 ret <2 x double> %res1
1450}
1451
1452define <2 x double> @test_mm_loadh_pd(<2 x double> %a0, double* %a1) nounwind {
1453; X32-LABEL: test_mm_loadh_pd:
1454; X32: # BB#0:
1455; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
1456; X32-NEXT: movhpd {{.*#+}} xmm0 = xmm0[0],mem[0]
1457; X32-NEXT: retl
1458;
1459; X64-LABEL: test_mm_loadh_pd:
1460; X64: # BB#0:
1461; X64-NEXT: movhpd {{.*#+}} xmm0 = xmm0[0],mem[0]
1462; X64-NEXT: retq
1463 %ld = load double, double* %a1, align 8
1464 %res = insertelement <2 x double> %a0, double %ld, i32 1
1465 ret <2 x double> %res
1466}
1467
1468define <2 x i64> @test_mm_loadl_epi64(<2 x i64> %a0, <2 x i64>* %a1) nounwind {
1469; X32-LABEL: test_mm_loadl_epi64:
1470; X32: # BB#0:
1471; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
1472; X32-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
1473; X32-NEXT: retl
1474;
1475; X64-LABEL: test_mm_loadl_epi64:
1476; X64: # BB#0:
1477; X64-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
1478; X64-NEXT: retq
1479 %bc = bitcast <2 x i64>* %a1 to i64*
1480 %ld = load i64, i64* %bc, align 1
1481 %res0 = insertelement <2 x i64> undef, i64 %ld, i32 0
1482 %res1 = insertelement <2 x i64> %res0, i64 0, i32 1
1483 ret <2 x i64> %res1
1484}
1485
1486define <2 x double> @test_mm_loadl_pd(<2 x double> %a0, double* %a1) nounwind {
1487; X32-LABEL: test_mm_loadl_pd:
1488; X32: # BB#0:
1489; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
1490; X32-NEXT: movlpd {{.*#+}} xmm0 = mem[0],xmm0[1]
1491; X32-NEXT: retl
1492;
1493; X64-LABEL: test_mm_loadl_pd:
1494; X64: # BB#0:
1495; X64-NEXT: movlpd {{.*#+}} xmm0 = mem[0],xmm0[1]
1496; X64-NEXT: retq
1497 %ld = load double, double* %a1, align 8
1498 %res = insertelement <2 x double> %a0, double %ld, i32 0
1499 ret <2 x double> %res
1500}
1501
1502define <2 x double> @test_mm_loadr_pd(double* %a0) nounwind {
1503; X32-LABEL: test_mm_loadr_pd:
1504; X32: # BB#0:
1505; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
1506; X32-NEXT: movapd (%eax), %xmm0
1507; X32-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1,0]
1508; X32-NEXT: retl
1509;
1510; X64-LABEL: test_mm_loadr_pd:
1511; X64: # BB#0:
1512; X64-NEXT: movapd (%rdi), %xmm0
1513; X64-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1,0]
1514; X64-NEXT: retq
1515 %arg0 = bitcast double* %a0 to <2 x double>*
1516 %ld = load <2 x double>, <2 x double>* %arg0, align 16
1517 %res = shufflevector <2 x double> %ld, <2 x double> undef, <2 x i32> <i32 1, i32 0>
1518 ret <2 x double> %res
1519}
1520
1521define <2 x double> @test_mm_loadu_pd(double* %a0) nounwind {
1522; X32-LABEL: test_mm_loadu_pd:
1523; X32: # BB#0:
1524; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
1525; X32-NEXT: movups (%eax), %xmm0
1526; X32-NEXT: retl
1527;
1528; X64-LABEL: test_mm_loadu_pd:
1529; X64: # BB#0:
1530; X64-NEXT: movups (%rdi), %xmm0
1531; X64-NEXT: retq
1532 %arg0 = bitcast double* %a0 to <2 x double>*
1533 %res = load <2 x double>, <2 x double>* %arg0, align 1
1534 ret <2 x double> %res
1535}
1536
1537define <2 x i64> @test_mm_loadu_si128(<2 x i64>* %a0) nounwind {
1538; X32-LABEL: test_mm_loadu_si128:
1539; X32: # BB#0:
1540; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
1541; X32-NEXT: movups (%eax), %xmm0
1542; X32-NEXT: retl
1543;
1544; X64-LABEL: test_mm_loadu_si128:
1545; X64: # BB#0:
1546; X64-NEXT: movups (%rdi), %xmm0
1547; X64-NEXT: retq
1548 %res = load <2 x i64>, <2 x i64>* %a0, align 1
1549 ret <2 x i64> %res
1550}
1551
1552define <2 x i64> @test_mm_madd_epi16(<2 x i64> %a0, <2 x i64> %a1) nounwind {
1553; X32-LABEL: test_mm_madd_epi16:
1554; X32: # BB#0:
1555; X32-NEXT: pmaddwd %xmm1, %xmm0
1556; X32-NEXT: retl
1557;
1558; X64-LABEL: test_mm_madd_epi16:
1559; X64: # BB#0:
1560; X64-NEXT: pmaddwd %xmm1, %xmm0
1561; X64-NEXT: retq
1562 %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
1563 %arg1 = bitcast <2 x i64> %a1 to <8 x i16>
1564 %res = call <4 x i32> @llvm.x86.sse2.pmadd.wd(<8 x i16> %arg0, <8 x i16> %arg1)
1565 %bc = bitcast <4 x i32> %res to <2 x i64>
1566 ret <2 x i64> %bc
1567}
1568declare <4 x i32> @llvm.x86.sse2.pmadd.wd(<8 x i16>, <8 x i16>) nounwind readnone
1569
1570define void @test_mm_maskmoveu_si128(<2 x i64> %a0, <2 x i64> %a1, i8* %a2) nounwind {
1571; X32-LABEL: test_mm_maskmoveu_si128:
1572; X32: # BB#0:
1573; X32-NEXT: pushl %edi
1574; X32-NEXT: movl {{[0-9]+}}(%esp), %edi
1575; X32-NEXT: maskmovdqu %xmm1, %xmm0
1576; X32-NEXT: popl %edi
1577; X32-NEXT: retl
1578;
1579; X64-LABEL: test_mm_maskmoveu_si128:
1580; X64: # BB#0:
1581; X64-NEXT: maskmovdqu %xmm1, %xmm0
1582; X64-NEXT: retq
1583 %arg0 = bitcast <2 x i64> %a0 to <16 x i8>
1584 %arg1 = bitcast <2 x i64> %a1 to <16 x i8>
1585 call void @llvm.x86.sse2.maskmov.dqu(<16 x i8> %arg0, <16 x i8> %arg1, i8* %a2)
1586 ret void
1587}
1588declare void @llvm.x86.sse2.maskmov.dqu(<16 x i8>, <16 x i8>, i8*) nounwind
1589
1590define <2 x i64> @test_mm_max_epi16(<2 x i64> %a0, <2 x i64> %a1) nounwind {
1591; X32-LABEL: test_mm_max_epi16:
1592; X32: # BB#0:
1593; X32-NEXT: pmaxsw %xmm1, %xmm0
1594; X32-NEXT: retl
1595;
1596; X64-LABEL: test_mm_max_epi16:
1597; X64: # BB#0:
1598; X64-NEXT: pmaxsw %xmm1, %xmm0
1599; X64-NEXT: retq
1600 %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
1601 %arg1 = bitcast <2 x i64> %a1 to <8 x i16>
1602 %res = call <8 x i16> @llvm.x86.sse2.pmaxs.w(<8 x i16> %arg0, <8 x i16> %arg1)
1603 %bc = bitcast <8 x i16> %res to <2 x i64>
1604 ret <2 x i64> %bc
1605}
1606declare <8 x i16> @llvm.x86.sse2.pmaxs.w(<8 x i16>, <8 x i16>) nounwind readnone
1607
1608define <2 x i64> @test_mm_max_epu8(<2 x i64> %a0, <2 x i64> %a1) nounwind {
1609; X32-LABEL: test_mm_max_epu8:
1610; X32: # BB#0:
1611; X32-NEXT: pmaxub %xmm1, %xmm0
1612; X32-NEXT: retl
1613;
1614; X64-LABEL: test_mm_max_epu8:
1615; X64: # BB#0:
1616; X64-NEXT: pmaxub %xmm1, %xmm0
1617; X64-NEXT: retq
1618 %arg0 = bitcast <2 x i64> %a0 to <16 x i8>
1619 %arg1 = bitcast <2 x i64> %a1 to <16 x i8>
1620 %res = call <16 x i8> @llvm.x86.sse2.pmaxu.b(<16 x i8> %arg0, <16 x i8> %arg1)
1621 %bc = bitcast <16 x i8> %res to <2 x i64>
1622 ret <2 x i64> %bc
1623}
1624declare <16 x i8> @llvm.x86.sse2.pmaxu.b(<16 x i8>, <16 x i8>) nounwind readnone
1625
1626define <2 x double> @test_mm_max_pd(<2 x double> %a0, <2 x double> %a1) nounwind {
1627; X32-LABEL: test_mm_max_pd:
1628; X32: # BB#0:
1629; X32-NEXT: maxpd %xmm1, %xmm0
1630; X32-NEXT: retl
1631;
1632; X64-LABEL: test_mm_max_pd:
1633; X64: # BB#0:
1634; X64-NEXT: maxpd %xmm1, %xmm0
1635; X64-NEXT: retq
1636 %res = call <2 x double> @llvm.x86.sse2.max.pd(<2 x double> %a0, <2 x double> %a1)
1637 ret <2 x double> %res
1638}
1639declare <2 x double> @llvm.x86.sse2.max.pd(<2 x double>, <2 x double>) nounwind readnone
1640
1641define <2 x double> @test_mm_max_sd(<2 x double> %a0, <2 x double> %a1) nounwind {
1642; X32-LABEL: test_mm_max_sd:
1643; X32: # BB#0:
1644; X32-NEXT: maxsd %xmm1, %xmm0
1645; X32-NEXT: retl
1646;
1647; X64-LABEL: test_mm_max_sd:
1648; X64: # BB#0:
1649; X64-NEXT: maxsd %xmm1, %xmm0
1650; X64-NEXT: retq
1651 %res = call <2 x double> @llvm.x86.sse2.max.sd(<2 x double> %a0, <2 x double> %a1)
1652 ret <2 x double> %res
1653}
1654declare <2 x double> @llvm.x86.sse2.max.sd(<2 x double>, <2 x double>) nounwind readnone
1655
1656define void @test_mm_mfence() nounwind {
1657; X32-LABEL: test_mm_mfence:
1658; X32: # BB#0:
1659; X32-NEXT: mfence
1660; X32-NEXT: retl
1661;
1662; X64-LABEL: test_mm_mfence:
1663; X64: # BB#0:
1664; X64-NEXT: mfence
1665; X64-NEXT: retq
1666 call void @llvm.x86.sse2.mfence()
1667 ret void
1668}
1669declare void @llvm.x86.sse2.mfence() nounwind readnone
1670
1671define <2 x i64> @test_mm_min_epi16(<2 x i64> %a0, <2 x i64> %a1) nounwind {
1672; X32-LABEL: test_mm_min_epi16:
1673; X32: # BB#0:
1674; X32-NEXT: pminsw %xmm1, %xmm0
1675; X32-NEXT: retl
1676;
1677; X64-LABEL: test_mm_min_epi16:
1678; X64: # BB#0:
1679; X64-NEXT: pminsw %xmm1, %xmm0
1680; X64-NEXT: retq
1681 %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
1682 %arg1 = bitcast <2 x i64> %a1 to <8 x i16>
1683 %res = call <8 x i16> @llvm.x86.sse2.pmins.w(<8 x i16> %arg0, <8 x i16> %arg1)
1684 %bc = bitcast <8 x i16> %res to <2 x i64>
1685 ret <2 x i64> %bc
1686}
1687declare <8 x i16> @llvm.x86.sse2.pmins.w(<8 x i16>, <8 x i16>) nounwind readnone
1688
1689define <2 x i64> @test_mm_min_epu8(<2 x i64> %a0, <2 x i64> %a1) nounwind {
1690; X32-LABEL: test_mm_min_epu8:
1691; X32: # BB#0:
1692; X32-NEXT: pminub %xmm1, %xmm0
1693; X32-NEXT: retl
1694;
1695; X64-LABEL: test_mm_min_epu8:
1696; X64: # BB#0:
1697; X64-NEXT: pminub %xmm1, %xmm0
1698; X64-NEXT: retq
1699 %arg0 = bitcast <2 x i64> %a0 to <16 x i8>
1700 %arg1 = bitcast <2 x i64> %a1 to <16 x i8>
1701 %res = call <16 x i8> @llvm.x86.sse2.pminu.b(<16 x i8> %arg0, <16 x i8> %arg1)
1702 %bc = bitcast <16 x i8> %res to <2 x i64>
1703 ret <2 x i64> %bc
1704}
1705declare <16 x i8> @llvm.x86.sse2.pminu.b(<16 x i8>, <16 x i8>) nounwind readnone
1706
1707define <2 x double> @test_mm_min_pd(<2 x double> %a0, <2 x double> %a1) nounwind {
1708; X32-LABEL: test_mm_min_pd:
1709; X32: # BB#0:
1710; X32-NEXT: minpd %xmm1, %xmm0
1711; X32-NEXT: retl
1712;
1713; X64-LABEL: test_mm_min_pd:
1714; X64: # BB#0:
1715; X64-NEXT: minpd %xmm1, %xmm0
1716; X64-NEXT: retq
1717 %res = call <2 x double> @llvm.x86.sse2.min.pd(<2 x double> %a0, <2 x double> %a1)
1718 ret <2 x double> %res
1719}
1720declare <2 x double> @llvm.x86.sse2.min.pd(<2 x double>, <2 x double>) nounwind readnone
1721
1722define <2 x double> @test_mm_min_sd(<2 x double> %a0, <2 x double> %a1) nounwind {
1723; X32-LABEL: test_mm_min_sd:
1724; X32: # BB#0:
1725; X32-NEXT: minsd %xmm1, %xmm0
1726; X32-NEXT: retl
1727;
1728; X64-LABEL: test_mm_min_sd:
1729; X64: # BB#0:
1730; X64-NEXT: minsd %xmm1, %xmm0
1731; X64-NEXT: retq
1732 %res = call <2 x double> @llvm.x86.sse2.min.sd(<2 x double> %a0, <2 x double> %a1)
1733 ret <2 x double> %res
1734}
1735declare <2 x double> @llvm.x86.sse2.min.sd(<2 x double>, <2 x double>) nounwind readnone
1736
Simon Pilgrim47825fa2016-05-19 11:59:57 +00001737define <2 x i64> @test_mm_move_epi64(<2 x i64> %a0) nounwind {
1738; X32-LABEL: test_mm_move_epi64:
1739; X32: # BB#0:
1740; X32-NEXT: movq {{.*#+}} xmm0 = xmm0[0],zero
1741; X32-NEXT: retl
1742;
1743; X64-LABEL: test_mm_move_epi64:
1744; X64: # BB#0:
1745; X64-NEXT: movq {{.*#+}} xmm0 = xmm0[0],zero
1746; X64-NEXT: retq
1747 %res = shufflevector <2 x i64> %a0, <2 x i64> zeroinitializer, <2 x i32> <i32 0, i32 2>
1748 ret <2 x i64> %res
1749}
1750
1751define <2 x double> @test_mm_move_sd(<2 x double> %a0, <2 x double> %a1) nounwind {
1752; X32-LABEL: test_mm_move_sd:
1753; X32: # BB#0:
1754; X32-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
1755; X32-NEXT: retl
1756;
1757; X64-LABEL: test_mm_move_sd:
1758; X64: # BB#0:
1759; X64-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
1760; X64-NEXT: retq
1761 %ext0 = extractelement <2 x double> %a1, i32 0
1762 %res0 = insertelement <2 x double> undef, double %ext0, i32 0
1763 %ext1 = extractelement <2 x double> %a0, i32 1
1764 %res1 = insertelement <2 x double> %res0, double %ext1, i32 1
1765 ret <2 x double> %res1
1766}
1767
Simon Pilgrim5a0d7282016-05-18 18:00:43 +00001768define i32 @test_mm_movemask_epi8(<2 x i64> %a0) nounwind {
1769; X32-LABEL: test_mm_movemask_epi8:
1770; X32: # BB#0:
1771; X32-NEXT: pmovmskb %xmm0, %eax
1772; X32-NEXT: retl
1773;
1774; X64-LABEL: test_mm_movemask_epi8:
1775; X64: # BB#0:
1776; X64-NEXT: pmovmskb %xmm0, %eax
1777; X64-NEXT: retq
1778 %arg0 = bitcast <2 x i64> %a0 to <16 x i8>
1779 %res = call i32 @llvm.x86.sse2.pmovmskb.128(<16 x i8> %arg0)
1780 ret i32 %res
1781}
1782declare i32 @llvm.x86.sse2.pmovmskb.128(<16 x i8>) nounwind readnone
1783
1784define i32 @test_mm_movemask_pd(<2 x double> %a0) nounwind {
1785; X32-LABEL: test_mm_movemask_pd:
1786; X32: # BB#0:
1787; X32-NEXT: movmskpd %xmm0, %eax
1788; X32-NEXT: retl
1789;
1790; X64-LABEL: test_mm_movemask_pd:
1791; X64: # BB#0:
1792; X64-NEXT: movmskpd %xmm0, %eax
1793; X64-NEXT: retq
1794 %res = call i32 @llvm.x86.sse2.movmsk.pd(<2 x double> %a0)
1795 ret i32 %res
1796}
1797declare i32 @llvm.x86.sse2.movmsk.pd(<2 x double>) nounwind readnone
1798
1799define <2 x i64> @test_mm_mul_epu32(<2 x i64> %a0, <2 x i64> %a1) {
1800; X32-LABEL: test_mm_mul_epu32:
1801; X32: # BB#0:
1802; X32-NEXT: pmuludq %xmm1, %xmm0
1803; X32-NEXT: retl
1804;
1805; X64-LABEL: test_mm_mul_epu32:
1806; X64: # BB#0:
1807; X64-NEXT: pmuludq %xmm1, %xmm0
1808; X64-NEXT: retq
1809 %arg0 = bitcast <2 x i64> %a0 to <4 x i32>
1810 %arg1 = bitcast <2 x i64> %a1 to <4 x i32>
1811 %res = call <2 x i64> @llvm.x86.sse2.pmulu.dq(<4 x i32> %arg0, <4 x i32> %arg1)
1812 ret <2 x i64> %res
1813}
1814declare <2 x i64> @llvm.x86.sse2.pmulu.dq(<4 x i32>, <4 x i32>) nounwind readnone
1815
1816define <2 x double> @test_mm_mul_pd(<2 x double> %a0, <2 x double> %a1) nounwind {
1817; X32-LABEL: test_mm_mul_pd:
1818; X32: # BB#0:
1819; X32-NEXT: mulpd %xmm1, %xmm0
1820; X32-NEXT: retl
1821;
1822; X64-LABEL: test_mm_mul_pd:
1823; X64: # BB#0:
1824; X64-NEXT: mulpd %xmm1, %xmm0
1825; X64-NEXT: retq
1826 %res = fmul <2 x double> %a0, %a1
1827 ret <2 x double> %res
1828}
1829
1830define <2 x double> @test_mm_mul_sd(<2 x double> %a0, <2 x double> %a1) nounwind {
1831; X32-LABEL: test_mm_mul_sd:
1832; X32: # BB#0:
1833; X32-NEXT: mulsd %xmm1, %xmm0
1834; X32-NEXT: retl
1835;
1836; X64-LABEL: test_mm_mul_sd:
1837; X64: # BB#0:
1838; X64-NEXT: mulsd %xmm1, %xmm0
1839; X64-NEXT: retq
1840 %ext0 = extractelement <2 x double> %a0, i32 0
1841 %ext1 = extractelement <2 x double> %a1, i32 0
1842 %fmul = fmul double %ext0, %ext1
1843 %res = insertelement <2 x double> %a0, double %fmul, i32 0
1844 ret <2 x double> %res
1845}
1846
1847define <2 x i64> @test_mm_mulhi_epi16(<2 x i64> %a0, <2 x i64> %a1) {
1848; X32-LABEL: test_mm_mulhi_epi16:
1849; X32: # BB#0:
1850; X32-NEXT: pmulhw %xmm1, %xmm0
1851; X32-NEXT: retl
1852;
1853; X64-LABEL: test_mm_mulhi_epi16:
1854; X64: # BB#0:
1855; X64-NEXT: pmulhw %xmm1, %xmm0
1856; X64-NEXT: retq
1857 %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
1858 %arg1 = bitcast <2 x i64> %a1 to <8 x i16>
1859 %res = call <8 x i16> @llvm.x86.sse2.pmulh.w(<8 x i16> %arg0, <8 x i16> %arg1)
1860 %bc = bitcast <8 x i16> %res to <2 x i64>
1861 ret <2 x i64> %bc
1862}
1863declare <8 x i16> @llvm.x86.sse2.pmulh.w(<8 x i16>, <8 x i16>) nounwind readnone
1864
1865define <2 x i64> @test_mm_mulhi_epu16(<2 x i64> %a0, <2 x i64> %a1) {
1866; X32-LABEL: test_mm_mulhi_epu16:
1867; X32: # BB#0:
1868; X32-NEXT: pmulhuw %xmm1, %xmm0
1869; X32-NEXT: retl
1870;
1871; X64-LABEL: test_mm_mulhi_epu16:
1872; X64: # BB#0:
1873; X64-NEXT: pmulhuw %xmm1, %xmm0
1874; X64-NEXT: retq
1875 %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
1876 %arg1 = bitcast <2 x i64> %a1 to <8 x i16>
1877 %res = call <8 x i16> @llvm.x86.sse2.pmulhu.w(<8 x i16> %arg0, <8 x i16> %arg1)
1878 %bc = bitcast <8 x i16> %res to <2 x i64>
1879 ret <2 x i64> %bc
1880}
1881declare <8 x i16> @llvm.x86.sse2.pmulhu.w(<8 x i16>, <8 x i16>) nounwind readnone
1882
1883define <2 x i64> @test_mm_mullo_epi16(<2 x i64> %a0, <2 x i64> %a1) {
1884; X32-LABEL: test_mm_mullo_epi16:
1885; X32: # BB#0:
1886; X32-NEXT: pmullw %xmm1, %xmm0
1887; X32-NEXT: retl
1888;
1889; X64-LABEL: test_mm_mullo_epi16:
1890; X64: # BB#0:
1891; X64-NEXT: pmullw %xmm1, %xmm0
1892; X64-NEXT: retq
1893 %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
1894 %arg1 = bitcast <2 x i64> %a1 to <8 x i16>
1895 %res = mul <8 x i16> %arg0, %arg1
1896 %bc = bitcast <8 x i16> %res to <2 x i64>
1897 ret <2 x i64> %bc
1898}
1899
1900define <2 x double> @test_mm_or_pd(<2 x double> %a0, <2 x double> %a1) nounwind {
1901; X32-LABEL: test_mm_or_pd:
1902; X32: # BB#0:
1903; X32-NEXT: orps %xmm1, %xmm0
1904; X32-NEXT: retl
1905;
1906; X64-LABEL: test_mm_or_pd:
1907; X64: # BB#0:
1908; X64-NEXT: orps %xmm1, %xmm0
1909; X64-NEXT: retq
1910 %arg0 = bitcast <2 x double> %a0 to <4 x i32>
1911 %arg1 = bitcast <2 x double> %a1 to <4 x i32>
1912 %res = or <4 x i32> %arg0, %arg1
1913 %bc = bitcast <4 x i32> %res to <2 x double>
1914 ret <2 x double> %bc
1915}
1916
1917define <2 x i64> @test_mm_or_si128(<2 x i64> %a0, <2 x i64> %a1) nounwind {
1918; X32-LABEL: test_mm_or_si128:
1919; X32: # BB#0:
1920; X32-NEXT: orps %xmm1, %xmm0
1921; X32-NEXT: retl
1922;
1923; X64-LABEL: test_mm_or_si128:
1924; X64: # BB#0:
1925; X64-NEXT: orps %xmm1, %xmm0
1926; X64-NEXT: retq
1927 %res = or <2 x i64> %a0, %a1
1928 ret <2 x i64> %res
1929}
1930
1931define <2 x i64> @test_mm_packs_epi16(<2 x i64> %a0, <2 x i64> %a1) {
1932; X32-LABEL: test_mm_packs_epi16:
1933; X32: # BB#0:
1934; X32-NEXT: packsswb %xmm1, %xmm0
1935; X32-NEXT: retl
1936;
1937; X64-LABEL: test_mm_packs_epi16:
1938; X64: # BB#0:
1939; X64-NEXT: packsswb %xmm1, %xmm0
1940; X64-NEXT: retq
1941 %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
1942 %arg1 = bitcast <2 x i64> %a1 to <8 x i16>
1943 %res = call <16 x i8> @llvm.x86.sse2.packsswb.128(<8 x i16> %arg0, <8 x i16> %arg1)
1944 %bc = bitcast <16 x i8> %res to <2 x i64>
1945 ret <2 x i64> %bc
1946}
1947declare <16 x i8> @llvm.x86.sse2.packsswb.128(<8 x i16>, <8 x i16>) nounwind readnone
1948
1949define <2 x i64> @test_mm_packs_epi32(<2 x i64> %a0, <2 x i64> %a1) {
1950; X32-LABEL: test_mm_packs_epi32:
1951; X32: # BB#0:
1952; X32-NEXT: packssdw %xmm1, %xmm0
1953; X32-NEXT: retl
1954;
1955; X64-LABEL: test_mm_packs_epi32:
1956; X64: # BB#0:
1957; X64-NEXT: packssdw %xmm1, %xmm0
1958; X64-NEXT: retq
1959 %arg0 = bitcast <2 x i64> %a0 to <4 x i32>
1960 %arg1 = bitcast <2 x i64> %a1 to <4 x i32>
1961 %res = call <8 x i16> @llvm.x86.sse2.packssdw.128(<4 x i32> %arg0, <4 x i32> %arg1)
1962 %bc = bitcast <8 x i16> %res to <2 x i64>
1963 ret <2 x i64> %bc
1964}
1965declare <8 x i16> @llvm.x86.sse2.packssdw.128(<4 x i32>, <4 x i32>) nounwind readnone
1966
1967define <2 x i64> @test_mm_packus_epi16(<2 x i64> %a0, <2 x i64> %a1) {
1968; X32-LABEL: test_mm_packus_epi16:
1969; X32: # BB#0:
1970; X32-NEXT: packuswb %xmm1, %xmm0
1971; X32-NEXT: retl
1972;
1973; X64-LABEL: test_mm_packus_epi16:
1974; X64: # BB#0:
1975; X64-NEXT: packuswb %xmm1, %xmm0
1976; X64-NEXT: retq
1977 %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
1978 %arg1 = bitcast <2 x i64> %a1 to <8 x i16>
1979 %res = call <16 x i8> @llvm.x86.sse2.packuswb.128(<8 x i16> %arg0, <8 x i16> %arg1)
1980 %bc = bitcast <16 x i8> %res to <2 x i64>
1981 ret <2 x i64> %bc
1982}
1983declare <16 x i8> @llvm.x86.sse2.packuswb.128(<8 x i16>, <8 x i16>) nounwind readnone
1984
1985define void @test_mm_pause() nounwind {
1986; X32-LABEL: test_mm_pause:
1987; X32: # BB#0:
1988; X32-NEXT: pause
1989; X32-NEXT: retl
1990;
1991; X64-LABEL: test_mm_pause:
1992; X64: # BB#0:
1993; X64-NEXT: pause
1994; X64-NEXT: retq
1995 call void @llvm.x86.sse2.pause()
1996 ret void
1997}
1998declare void @llvm.x86.sse2.pause() nounwind readnone
1999
2000define <2 x i64> @test_mm_sad_epu8(<2 x i64> %a0, <2 x i64> %a1) nounwind {
2001; X32-LABEL: test_mm_sad_epu8:
2002; X32: # BB#0:
2003; X32-NEXT: psadbw %xmm1, %xmm0
2004; X32-NEXT: retl
2005;
2006; X64-LABEL: test_mm_sad_epu8:
2007; X64: # BB#0:
2008; X64-NEXT: psadbw %xmm1, %xmm0
2009; X64-NEXT: retq
2010 %arg0 = bitcast <2 x i64> %a0 to <16 x i8>
2011 %arg1 = bitcast <2 x i64> %a1 to <16 x i8>
2012 %res = call <2 x i64> @llvm.x86.sse2.psad.bw(<16 x i8> %arg0, <16 x i8> %arg1)
2013 ret <2 x i64> %res
2014}
2015declare <2 x i64> @llvm.x86.sse2.psad.bw(<16 x i8>, <16 x i8>) nounwind readnone
2016
Simon Pilgrim01809e02016-05-19 10:58:54 +00002017define <2 x i64> @test_mm_set_epi8(i8 %a0, i8 %a1, i8 %a2, i8 %a3, i8 %a4, i8 %a5, i8 %a6, i8 %a7, i8 %a8, i8 %a9, i8 %a10, i8 %a11, i8 %a12, i8 %a13, i8 %a14, i8 %a15) nounwind {
2018; X32-LABEL: test_mm_set_epi8:
2019; X32: # BB#0:
2020; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
2021; X32-NEXT: movd %eax, %xmm0
2022; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
2023; X32-NEXT: movd %eax, %xmm1
2024; X32-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
2025; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
2026; X32-NEXT: movd %eax, %xmm0
2027; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
2028; X32-NEXT: movd %eax, %xmm2
2029; X32-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
2030; X32-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3],xmm2[4],xmm1[4],xmm2[5],xmm1[5],xmm2[6],xmm1[6],xmm2[7],xmm1[7]
2031; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
2032; X32-NEXT: movd %eax, %xmm0
2033; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
2034; X32-NEXT: movd %eax, %xmm3
2035; X32-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3],xmm3[4],xmm0[4],xmm3[5],xmm0[5],xmm3[6],xmm0[6],xmm3[7],xmm0[7]
2036; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
2037; X32-NEXT: movd %eax, %xmm0
2038; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
2039; X32-NEXT: movd %eax, %xmm1
2040; X32-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
2041; X32-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1],xmm1[2],xmm3[2],xmm1[3],xmm3[3],xmm1[4],xmm3[4],xmm1[5],xmm3[5],xmm1[6],xmm3[6],xmm1[7],xmm3[7]
2042; X32-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
2043; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
2044; X32-NEXT: movd %eax, %xmm0
2045; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
2046; X32-NEXT: movd %eax, %xmm2
2047; X32-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
2048; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
2049; X32-NEXT: movd %eax, %xmm0
2050; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
2051; X32-NEXT: movd %eax, %xmm3
2052; X32-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3],xmm3[4],xmm0[4],xmm3[5],xmm0[5],xmm3[6],xmm0[6],xmm3[7],xmm0[7]
2053; X32-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3],xmm3[4],xmm2[4],xmm3[5],xmm2[5],xmm3[6],xmm2[6],xmm3[7],xmm2[7]
2054; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
2055; X32-NEXT: movd %eax, %xmm0
2056; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
2057; X32-NEXT: movd %eax, %xmm2
2058; X32-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
2059; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
2060; X32-NEXT: movd %eax, %xmm4
2061; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
2062; X32-NEXT: movd %eax, %xmm0
2063; X32-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1],xmm0[2],xmm4[2],xmm0[3],xmm4[3],xmm0[4],xmm4[4],xmm0[5],xmm4[5],xmm0[6],xmm4[6],xmm0[7],xmm4[7]
2064; X32-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7]
2065; X32-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3],xmm0[4],xmm3[4],xmm0[5],xmm3[5],xmm0[6],xmm3[6],xmm0[7],xmm3[7]
2066; X32-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
2067; X32-NEXT: retl
2068;
2069; X64-LABEL: test_mm_set_epi8:
2070; X64: # BB#0:
2071; X64-NEXT: movzbl %dil, %eax
2072; X64-NEXT: movd %eax, %xmm0
2073; X64-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
2074; X64-NEXT: movd %eax, %xmm1
2075; X64-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
2076; X64-NEXT: movzbl %r8b, %eax
2077; X64-NEXT: movd %eax, %xmm0
2078; X64-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
2079; X64-NEXT: movd %eax, %xmm2
2080; X64-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
2081; X64-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3],xmm2[4],xmm1[4],xmm2[5],xmm1[5],xmm2[6],xmm1[6],xmm2[7],xmm1[7]
2082; X64-NEXT: movzbl %dl, %eax
2083; X64-NEXT: movd %eax, %xmm0
2084; X64-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
2085; X64-NEXT: movd %eax, %xmm3
2086; X64-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3],xmm3[4],xmm0[4],xmm3[5],xmm0[5],xmm3[6],xmm0[6],xmm3[7],xmm0[7]
2087; X64-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
2088; X64-NEXT: movd %eax, %xmm0
2089; X64-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
2090; X64-NEXT: movd %eax, %xmm1
2091; X64-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
2092; X64-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1],xmm1[2],xmm3[2],xmm1[3],xmm3[3],xmm1[4],xmm3[4],xmm1[5],xmm3[5],xmm1[6],xmm3[6],xmm1[7],xmm3[7]
2093; X64-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
2094; X64-NEXT: movzbl %sil, %eax
2095; X64-NEXT: movd %eax, %xmm0
2096; X64-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
2097; X64-NEXT: movd %eax, %xmm2
2098; X64-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
2099; X64-NEXT: movzbl %r9b, %eax
2100; X64-NEXT: movd %eax, %xmm0
2101; X64-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
2102; X64-NEXT: movd %eax, %xmm3
2103; X64-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3],xmm3[4],xmm0[4],xmm3[5],xmm0[5],xmm3[6],xmm0[6],xmm3[7],xmm0[7]
2104; X64-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3],xmm3[4],xmm2[4],xmm3[5],xmm2[5],xmm3[6],xmm2[6],xmm3[7],xmm2[7]
2105; X64-NEXT: movzbl %cl, %eax
2106; X64-NEXT: movd %eax, %xmm0
2107; X64-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
2108; X64-NEXT: movd %eax, %xmm2
2109; X64-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
2110; X64-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
2111; X64-NEXT: movd %eax, %xmm4
2112; X64-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
2113; X64-NEXT: movd %eax, %xmm0
2114; X64-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1],xmm0[2],xmm4[2],xmm0[3],xmm4[3],xmm0[4],xmm4[4],xmm0[5],xmm4[5],xmm0[6],xmm4[6],xmm0[7],xmm4[7]
2115; X64-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7]
2116; X64-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3],xmm0[4],xmm3[4],xmm0[5],xmm3[5],xmm0[6],xmm3[6],xmm0[7],xmm3[7]
2117; X64-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
2118; X64-NEXT: retq
2119 %res0 = insertelement <16 x i8> undef, i8 %a15, i32 0
2120 %res1 = insertelement <16 x i8> %res0, i8 %a14, i32 1
2121 %res2 = insertelement <16 x i8> %res1, i8 %a13, i32 2
2122 %res3 = insertelement <16 x i8> %res2, i8 %a12, i32 3
2123 %res4 = insertelement <16 x i8> %res3, i8 %a11, i32 4
2124 %res5 = insertelement <16 x i8> %res4, i8 %a10, i32 5
2125 %res6 = insertelement <16 x i8> %res5, i8 %a9 , i32 6
2126 %res7 = insertelement <16 x i8> %res6, i8 %a8 , i32 7
2127 %res8 = insertelement <16 x i8> %res7, i8 %a7 , i32 8
2128 %res9 = insertelement <16 x i8> %res8, i8 %a6 , i32 9
2129 %res10 = insertelement <16 x i8> %res9, i8 %a5 , i32 10
2130 %res11 = insertelement <16 x i8> %res10, i8 %a4 , i32 11
2131 %res12 = insertelement <16 x i8> %res11, i8 %a3 , i32 12
2132 %res13 = insertelement <16 x i8> %res12, i8 %a2 , i32 13
2133 %res14 = insertelement <16 x i8> %res13, i8 %a1 , i32 14
2134 %res15 = insertelement <16 x i8> %res14, i8 %a0 , i32 15
2135 %res = bitcast <16 x i8> %res15 to <2 x i64>
2136 ret <2 x i64> %res
2137}
2138
2139define <2 x i64> @test_mm_set_epi16(i16 %a0, i16 %a1, i16 %a2, i16 %a3, i16 %a4, i16 %a5, i16 %a6, i16 %a7) nounwind {
2140; X32-LABEL: test_mm_set_epi16:
2141; X32: # BB#0:
2142; X32-NEXT: movw {{[0-9]+}}(%esp), %ax
2143; X32-NEXT: movd %eax, %xmm1
2144; X32-NEXT: movw {{[0-9]+}}(%esp), %ax
2145; X32-NEXT: movd %eax, %xmm2
2146; X32-NEXT: movw {{[0-9]+}}(%esp), %ax
2147; X32-NEXT: movd %eax, %xmm3
2148; X32-NEXT: movw {{[0-9]+}}(%esp), %ax
2149; X32-NEXT: movd %eax, %xmm4
2150; X32-NEXT: movw {{[0-9]+}}(%esp), %ax
2151; X32-NEXT: movd %eax, %xmm5
2152; X32-NEXT: movw {{[0-9]+}}(%esp), %ax
2153; X32-NEXT: movd %eax, %xmm6
2154; X32-NEXT: movw {{[0-9]+}}(%esp), %ax
2155; X32-NEXT: movd %eax, %xmm7
2156; X32-NEXT: movw {{[0-9]+}}(%esp), %ax
2157; X32-NEXT: movd %eax, %xmm0
2158; X32-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3]
2159; X32-NEXT: punpcklwd {{.*#+}} xmm4 = xmm4[0],xmm3[0],xmm4[1],xmm3[1],xmm4[2],xmm3[2],xmm4[3],xmm3[3]
2160; X32-NEXT: punpcklwd {{.*#+}} xmm4 = xmm4[0],xmm2[0],xmm4[1],xmm2[1],xmm4[2],xmm2[2],xmm4[3],xmm2[3]
2161; X32-NEXT: punpcklwd {{.*#+}} xmm6 = xmm6[0],xmm5[0],xmm6[1],xmm5[1],xmm6[2],xmm5[2],xmm6[3],xmm5[3]
2162; X32-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm7[0],xmm0[1],xmm7[1],xmm0[2],xmm7[2],xmm0[3],xmm7[3]
2163; X32-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm6[0],xmm0[1],xmm6[1],xmm0[2],xmm6[2],xmm0[3],xmm6[3]
2164; X32-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1],xmm0[2],xmm4[2],xmm0[3],xmm4[3]
2165; X32-NEXT: retl
2166;
2167; X64-LABEL: test_mm_set_epi16:
2168; X64: # BB#0:
2169; X64-NEXT: movw {{[0-9]+}}(%rsp), %r10w
2170; X64-NEXT: movw {{[0-9]+}}(%rsp), %ax
2171; X64-NEXT: movd %edi, %xmm0
2172; X64-NEXT: movd %r8d, %xmm1
2173; X64-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
2174; X64-NEXT: movd %edx, %xmm0
2175; X64-NEXT: movd %eax, %xmm2
2176; X64-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3]
2177; X64-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3]
2178; X64-NEXT: movd %esi, %xmm0
2179; X64-NEXT: movd %r9d, %xmm1
2180; X64-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
2181; X64-NEXT: movd %ecx, %xmm3
2182; X64-NEXT: movd %r10d, %xmm0
2183; X64-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3]
2184; X64-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
2185; X64-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
2186; X64-NEXT: retq
2187 %res0 = insertelement <8 x i16> undef, i16 %a7, i32 0
2188 %res1 = insertelement <8 x i16> %res0, i16 %a6, i32 1
2189 %res2 = insertelement <8 x i16> %res1, i16 %a5, i32 2
2190 %res3 = insertelement <8 x i16> %res2, i16 %a4, i32 3
2191 %res4 = insertelement <8 x i16> %res3, i16 %a3, i32 4
2192 %res5 = insertelement <8 x i16> %res4, i16 %a2, i32 5
2193 %res6 = insertelement <8 x i16> %res5, i16 %a1, i32 6
2194 %res7 = insertelement <8 x i16> %res6, i16 %a0, i32 7
2195 %res = bitcast <8 x i16> %res7 to <2 x i64>
2196 ret <2 x i64> %res
2197}
2198
2199define <2 x i64> @test_mm_set_epi32(i32 %a0, i32 %a1, i32 %a2, i32 %a3) nounwind {
2200; X32-LABEL: test_mm_set_epi32:
2201; X32: # BB#0:
2202; X32-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
2203; X32-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero
2204; X32-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
2205; X32-NEXT: movd {{.*#+}} xmm2 = mem[0],zero,zero,zero
2206; X32-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
2207; X32-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
2208; X32-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
2209; X32-NEXT: retl
2210;
2211; X64-LABEL: test_mm_set_epi32:
2212; X64: # BB#0:
2213; X64-NEXT: movd %edi, %xmm0
2214; X64-NEXT: movd %edx, %xmm1
2215; X64-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
2216; X64-NEXT: movd %esi, %xmm2
2217; X64-NEXT: movd %ecx, %xmm0
2218; X64-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
2219; X64-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
2220; X64-NEXT: retq
2221 %res0 = insertelement <4 x i32> undef, i32 %a3, i32 0
2222 %res1 = insertelement <4 x i32> %res0, i32 %a2, i32 1
2223 %res2 = insertelement <4 x i32> %res1, i32 %a1, i32 2
2224 %res3 = insertelement <4 x i32> %res2, i32 %a0, i32 3
2225 %res = bitcast <4 x i32> %res3 to <2 x i64>
2226 ret <2 x i64> %res
2227}
2228
2229; TODO test_mm_set_epi64
2230
2231define <2 x i64> @test_mm_set_epi64x(i64 %a0, i64 %a1) nounwind {
2232; X32-LABEL: test_mm_set_epi64x:
2233; X32: # BB#0:
2234; X32-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero
2235; X32-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
2236; X32-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
2237; X32-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero
2238; X32-NEXT: movd {{.*#+}} xmm2 = mem[0],zero,zero,zero
2239; X32-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
2240; X32-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
2241; X32-NEXT: retl
2242;
2243; X64-LABEL: test_mm_set_epi64x:
2244; X64: # BB#0:
2245; X64-NEXT: movd %rdi, %xmm1
2246; X64-NEXT: movd %rsi, %xmm0
2247; X64-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
2248; X64-NEXT: retq
2249 %res0 = insertelement <2 x i64> undef, i64 %a1, i32 0
2250 %res1 = insertelement <2 x i64> %res0, i64 %a0, i32 1
2251 ret <2 x i64> %res1
2252}
2253
2254define <2 x double> @test_mm_set_pd(double %a0, double %a1) nounwind {
2255; X32-LABEL: test_mm_set_pd:
2256; X32: # BB#0:
2257; X32-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
2258; X32-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero
2259; X32-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
2260; X32-NEXT: retl
2261;
2262; X64-LABEL: test_mm_set_pd:
2263; X64: # BB#0:
2264; X64-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0],xmm0[0]
2265; X64-NEXT: movapd %xmm1, %xmm0
2266; X64-NEXT: retq
2267 %res0 = insertelement <2 x double> undef, double %a1, i32 0
2268 %res1 = insertelement <2 x double> %res0, double %a0, i32 1
2269 ret <2 x double> %res1
2270}
2271
2272define <2 x double> @test_mm_set_sd(double %a0) nounwind {
2273; X32-LABEL: test_mm_set_sd:
2274; X32: # BB#0:
2275; X32-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
2276; X32-NEXT: movq {{.*#+}} xmm0 = xmm0[0],zero
2277; X32-NEXT: retl
2278;
2279; X64-LABEL: test_mm_set_sd:
2280; X64: # BB#0:
2281; X64-NEXT: movq {{.*#+}} xmm0 = xmm0[0],zero
2282; X64-NEXT: retq
2283 %res0 = insertelement <2 x double> undef, double %a0, i32 0
2284 %res1 = insertelement <2 x double> %res0, double 0.0, i32 1
2285 ret <2 x double> %res1
2286}
2287
2288define <2 x i64> @test_mm_set1_epi8(i8 %a0) nounwind {
2289; X32-LABEL: test_mm_set1_epi8:
2290; X32: # BB#0:
2291; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
2292; X32-NEXT: movd %eax, %xmm0
2293; X32-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
2294; X32-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
2295; X32-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
2296; X32-NEXT: retl
2297;
2298; X64-LABEL: test_mm_set1_epi8:
2299; X64: # BB#0:
2300; X64-NEXT: movzbl %dil, %eax
2301; X64-NEXT: movd %eax, %xmm0
2302; X64-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
2303; X64-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
2304; X64-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
2305; X64-NEXT: retq
2306 %res0 = insertelement <16 x i8> undef, i8 %a0, i32 0
2307 %res1 = insertelement <16 x i8> %res0, i8 %a0, i32 1
2308 %res2 = insertelement <16 x i8> %res1, i8 %a0, i32 2
2309 %res3 = insertelement <16 x i8> %res2, i8 %a0, i32 3
2310 %res4 = insertelement <16 x i8> %res3, i8 %a0, i32 4
2311 %res5 = insertelement <16 x i8> %res4, i8 %a0, i32 5
2312 %res6 = insertelement <16 x i8> %res5, i8 %a0, i32 6
2313 %res7 = insertelement <16 x i8> %res6, i8 %a0, i32 7
2314 %res8 = insertelement <16 x i8> %res7, i8 %a0, i32 8
2315 %res9 = insertelement <16 x i8> %res8, i8 %a0, i32 9
2316 %res10 = insertelement <16 x i8> %res9, i8 %a0, i32 10
2317 %res11 = insertelement <16 x i8> %res10, i8 %a0, i32 11
2318 %res12 = insertelement <16 x i8> %res11, i8 %a0, i32 12
2319 %res13 = insertelement <16 x i8> %res12, i8 %a0, i32 13
2320 %res14 = insertelement <16 x i8> %res13, i8 %a0, i32 14
2321 %res15 = insertelement <16 x i8> %res14, i8 %a0, i32 15
2322 %res = bitcast <16 x i8> %res15 to <2 x i64>
2323 ret <2 x i64> %res
2324}
2325
2326define <2 x i64> @test_mm_set1_epi16(i16 %a0) nounwind {
2327; X32-LABEL: test_mm_set1_epi16:
2328; X32: # BB#0:
2329; X32-NEXT: movw {{[0-9]+}}(%esp), %ax
2330; X32-NEXT: movd %eax, %xmm0
2331; X32-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
2332; X32-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
2333; X32-NEXT: retl
2334;
2335; X64-LABEL: test_mm_set1_epi16:
2336; X64: # BB#0:
2337; X64-NEXT: movd %edi, %xmm0
2338; X64-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
2339; X64-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
2340; X64-NEXT: retq
2341 %res0 = insertelement <8 x i16> undef, i16 %a0, i32 0
2342 %res1 = insertelement <8 x i16> %res0, i16 %a0, i32 1
2343 %res2 = insertelement <8 x i16> %res1, i16 %a0, i32 2
2344 %res3 = insertelement <8 x i16> %res2, i16 %a0, i32 3
2345 %res4 = insertelement <8 x i16> %res3, i16 %a0, i32 4
2346 %res5 = insertelement <8 x i16> %res4, i16 %a0, i32 5
2347 %res6 = insertelement <8 x i16> %res5, i16 %a0, i32 6
2348 %res7 = insertelement <8 x i16> %res6, i16 %a0, i32 7
2349 %res = bitcast <8 x i16> %res7 to <2 x i64>
2350 ret <2 x i64> %res
2351}
2352
2353define <2 x i64> @test_mm_set1_epi32(i32 %a0) nounwind {
2354; X32-LABEL: test_mm_set1_epi32:
2355; X32: # BB#0:
2356; X32-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
2357; X32-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
2358; X32-NEXT: retl
2359;
2360; X64-LABEL: test_mm_set1_epi32:
2361; X64: # BB#0:
2362; X64-NEXT: movd %edi, %xmm0
2363; X64-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
2364; X64-NEXT: retq
2365 %res0 = insertelement <4 x i32> undef, i32 %a0, i32 0
2366 %res1 = insertelement <4 x i32> %res0, i32 %a0, i32 1
2367 %res2 = insertelement <4 x i32> %res1, i32 %a0, i32 2
2368 %res3 = insertelement <4 x i32> %res2, i32 %a0, i32 3
2369 %res = bitcast <4 x i32> %res3 to <2 x i64>
2370 ret <2 x i64> %res
2371}
2372
2373; TODO test_mm_set1_epi64
2374
2375define <2 x i64> @test_mm_set1_epi64x(i64 %a0) nounwind {
2376; X32-LABEL: test_mm_set1_epi64x:
2377; X32: # BB#0:
2378; X32-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
2379; X32-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
2380; X32-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero
2381; X32-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,0,1,1]
2382; X32-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
2383; X32-NEXT: retl
2384;
2385; X64-LABEL: test_mm_set1_epi64x:
2386; X64: # BB#0:
2387; X64-NEXT: movd %rdi, %xmm0
2388; X64-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
2389; X64-NEXT: retq
2390 %res0 = insertelement <2 x i64> undef, i64 %a0, i32 0
2391 %res1 = insertelement <2 x i64> %res0, i64 %a0, i32 1
2392 ret <2 x i64> %res1
2393}
2394
2395define <2 x double> @test_mm_set1_pd(double %a0) nounwind {
2396; X32-LABEL: test_mm_set1_pd:
2397; X32: # BB#0:
2398; X32-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
2399; X32-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0,0]
2400; X32-NEXT: retl
2401;
2402; X64-LABEL: test_mm_set1_pd:
2403; X64: # BB#0:
2404; X64-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0,0]
2405; X64-NEXT: retq
2406 %res0 = insertelement <2 x double> undef, double %a0, i32 0
2407 %res1 = insertelement <2 x double> %res0, double %a0, i32 1
2408 ret <2 x double> %res1
2409}
2410
2411define <2 x i64> @test_mm_setr_epi8(i8 %a0, i8 %a1, i8 %a2, i8 %a3, i8 %a4, i8 %a5, i8 %a6, i8 %a7, i8 %a8, i8 %a9, i8 %a10, i8 %a11, i8 %a12, i8 %a13, i8 %a14, i8 %a15) nounwind {
2412; X32-LABEL: test_mm_setr_epi8:
2413; X32: # BB#0:
2414; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
2415; X32-NEXT: movd %eax, %xmm0
2416; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
2417; X32-NEXT: movd %eax, %xmm1
2418; X32-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
2419; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
2420; X32-NEXT: movd %eax, %xmm0
2421; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
2422; X32-NEXT: movd %eax, %xmm2
2423; X32-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
2424; X32-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3],xmm2[4],xmm1[4],xmm2[5],xmm1[5],xmm2[6],xmm1[6],xmm2[7],xmm1[7]
2425; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
2426; X32-NEXT: movd %eax, %xmm0
2427; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
2428; X32-NEXT: movd %eax, %xmm3
2429; X32-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3],xmm3[4],xmm0[4],xmm3[5],xmm0[5],xmm3[6],xmm0[6],xmm3[7],xmm0[7]
2430; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
2431; X32-NEXT: movd %eax, %xmm0
2432; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
2433; X32-NEXT: movd %eax, %xmm1
2434; X32-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
2435; X32-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1],xmm1[2],xmm3[2],xmm1[3],xmm3[3],xmm1[4],xmm3[4],xmm1[5],xmm3[5],xmm1[6],xmm3[6],xmm1[7],xmm3[7]
2436; X32-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
2437; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
2438; X32-NEXT: movd %eax, %xmm0
2439; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
2440; X32-NEXT: movd %eax, %xmm2
2441; X32-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
2442; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
2443; X32-NEXT: movd %eax, %xmm0
2444; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
2445; X32-NEXT: movd %eax, %xmm3
2446; X32-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3],xmm3[4],xmm0[4],xmm3[5],xmm0[5],xmm3[6],xmm0[6],xmm3[7],xmm0[7]
2447; X32-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3],xmm3[4],xmm2[4],xmm3[5],xmm2[5],xmm3[6],xmm2[6],xmm3[7],xmm2[7]
2448; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
2449; X32-NEXT: movd %eax, %xmm0
2450; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
2451; X32-NEXT: movd %eax, %xmm2
2452; X32-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
2453; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
2454; X32-NEXT: movd %eax, %xmm4
2455; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
2456; X32-NEXT: movd %eax, %xmm0
2457; X32-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1],xmm0[2],xmm4[2],xmm0[3],xmm4[3],xmm0[4],xmm4[4],xmm0[5],xmm4[5],xmm0[6],xmm4[6],xmm0[7],xmm4[7]
2458; X32-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7]
2459; X32-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3],xmm0[4],xmm3[4],xmm0[5],xmm3[5],xmm0[6],xmm3[6],xmm0[7],xmm3[7]
2460; X32-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
2461; X32-NEXT: retl
2462;
2463; X64-LABEL: test_mm_setr_epi8:
2464; X64: # BB#0:
2465; X64-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
2466; X64-NEXT: movd %eax, %xmm0
2467; X64-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
2468; X64-NEXT: movd %eax, %xmm1
2469; X64-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
2470; X64-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
2471; X64-NEXT: movd %eax, %xmm0
2472; X64-NEXT: movzbl %cl, %eax
2473; X64-NEXT: movd %eax, %xmm2
2474; X64-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
2475; X64-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3],xmm2[4],xmm1[4],xmm2[5],xmm1[5],xmm2[6],xmm1[6],xmm2[7],xmm1[7]
2476; X64-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
2477; X64-NEXT: movd %eax, %xmm0
2478; X64-NEXT: movzbl %r9b, %eax
2479; X64-NEXT: movd %eax, %xmm3
2480; X64-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3],xmm3[4],xmm0[4],xmm3[5],xmm0[5],xmm3[6],xmm0[6],xmm3[7],xmm0[7]
2481; X64-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
2482; X64-NEXT: movd %eax, %xmm0
2483; X64-NEXT: movzbl %sil, %eax
2484; X64-NEXT: movd %eax, %xmm1
2485; X64-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
2486; X64-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1],xmm1[2],xmm3[2],xmm1[3],xmm3[3],xmm1[4],xmm3[4],xmm1[5],xmm3[5],xmm1[6],xmm3[6],xmm1[7],xmm3[7]
2487; X64-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
2488; X64-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
2489; X64-NEXT: movd %eax, %xmm0
2490; X64-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
2491; X64-NEXT: movd %eax, %xmm2
2492; X64-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
2493; X64-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
2494; X64-NEXT: movd %eax, %xmm0
2495; X64-NEXT: movzbl %dl, %eax
2496; X64-NEXT: movd %eax, %xmm3
2497; X64-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3],xmm3[4],xmm0[4],xmm3[5],xmm0[5],xmm3[6],xmm0[6],xmm3[7],xmm0[7]
2498; X64-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3],xmm3[4],xmm2[4],xmm3[5],xmm2[5],xmm3[6],xmm2[6],xmm3[7],xmm2[7]
2499; X64-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
2500; X64-NEXT: movd %eax, %xmm0
2501; X64-NEXT: movzbl %r8b, %eax
2502; X64-NEXT: movd %eax, %xmm2
2503; X64-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
2504; X64-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
2505; X64-NEXT: movd %eax, %xmm4
2506; X64-NEXT: movzbl %dil, %eax
2507; X64-NEXT: movd %eax, %xmm0
2508; X64-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1],xmm0[2],xmm4[2],xmm0[3],xmm4[3],xmm0[4],xmm4[4],xmm0[5],xmm4[5],xmm0[6],xmm4[6],xmm0[7],xmm4[7]
2509; X64-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7]
2510; X64-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3],xmm0[4],xmm3[4],xmm0[5],xmm3[5],xmm0[6],xmm3[6],xmm0[7],xmm3[7]
2511; X64-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
2512; X64-NEXT: retq
2513 %res0 = insertelement <16 x i8> undef, i8 %a0 , i32 0
2514 %res1 = insertelement <16 x i8> %res0, i8 %a1 , i32 1
2515 %res2 = insertelement <16 x i8> %res1, i8 %a2 , i32 2
2516 %res3 = insertelement <16 x i8> %res2, i8 %a3 , i32 3
2517 %res4 = insertelement <16 x i8> %res3, i8 %a4 , i32 4
2518 %res5 = insertelement <16 x i8> %res4, i8 %a5 , i32 5
2519 %res6 = insertelement <16 x i8> %res5, i8 %a6 , i32 6
2520 %res7 = insertelement <16 x i8> %res6, i8 %a7 , i32 7
2521 %res8 = insertelement <16 x i8> %res7, i8 %a8 , i32 8
2522 %res9 = insertelement <16 x i8> %res8, i8 %a9 , i32 9
2523 %res10 = insertelement <16 x i8> %res9, i8 %a10, i32 10
2524 %res11 = insertelement <16 x i8> %res10, i8 %a11, i32 11
2525 %res12 = insertelement <16 x i8> %res11, i8 %a12, i32 12
2526 %res13 = insertelement <16 x i8> %res12, i8 %a13, i32 13
2527 %res14 = insertelement <16 x i8> %res13, i8 %a14, i32 14
2528 %res15 = insertelement <16 x i8> %res14, i8 %a15, i32 15
2529 %res = bitcast <16 x i8> %res15 to <2 x i64>
2530 ret <2 x i64> %res
2531}
2532
2533define <2 x i64> @test_mm_setr_epi16(i16 %a0, i16 %a1, i16 %a2, i16 %a3, i16 %a4, i16 %a5, i16 %a6, i16 %a7) nounwind {
2534; X32-LABEL: test_mm_setr_epi16:
2535; X32: # BB#0:
2536; X32-NEXT: movw {{[0-9]+}}(%esp), %ax
2537; X32-NEXT: movd %eax, %xmm1
2538; X32-NEXT: movw {{[0-9]+}}(%esp), %ax
2539; X32-NEXT: movd %eax, %xmm2
2540; X32-NEXT: movw {{[0-9]+}}(%esp), %ax
2541; X32-NEXT: movd %eax, %xmm3
2542; X32-NEXT: movw {{[0-9]+}}(%esp), %ax
2543; X32-NEXT: movd %eax, %xmm4
2544; X32-NEXT: movw {{[0-9]+}}(%esp), %ax
2545; X32-NEXT: movd %eax, %xmm5
2546; X32-NEXT: movw {{[0-9]+}}(%esp), %ax
2547; X32-NEXT: movd %eax, %xmm6
2548; X32-NEXT: movw {{[0-9]+}}(%esp), %ax
2549; X32-NEXT: movd %eax, %xmm7
2550; X32-NEXT: movw {{[0-9]+}}(%esp), %ax
2551; X32-NEXT: movd %eax, %xmm0
2552; X32-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3]
2553; X32-NEXT: punpcklwd {{.*#+}} xmm4 = xmm4[0],xmm3[0],xmm4[1],xmm3[1],xmm4[2],xmm3[2],xmm4[3],xmm3[3]
2554; X32-NEXT: punpcklwd {{.*#+}} xmm4 = xmm4[0],xmm2[0],xmm4[1],xmm2[1],xmm4[2],xmm2[2],xmm4[3],xmm2[3]
2555; X32-NEXT: punpcklwd {{.*#+}} xmm6 = xmm6[0],xmm5[0],xmm6[1],xmm5[1],xmm6[2],xmm5[2],xmm6[3],xmm5[3]
2556; X32-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm7[0],xmm0[1],xmm7[1],xmm0[2],xmm7[2],xmm0[3],xmm7[3]
2557; X32-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm6[0],xmm0[1],xmm6[1],xmm0[2],xmm6[2],xmm0[3],xmm6[3]
2558; X32-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1],xmm0[2],xmm4[2],xmm0[3],xmm4[3]
2559; X32-NEXT: retl
2560;
2561; X64-LABEL: test_mm_setr_epi16:
2562; X64: # BB#0:
2563; X64-NEXT: movw {{[0-9]+}}(%rsp), %ax
2564; X64-NEXT: movw {{[0-9]+}}(%rsp), %r10w
2565; X64-NEXT: movd %eax, %xmm0
2566; X64-NEXT: movd %ecx, %xmm1
2567; X64-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
2568; X64-NEXT: movd %r9d, %xmm0
2569; X64-NEXT: movd %esi, %xmm2
2570; X64-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3]
2571; X64-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3]
2572; X64-NEXT: movd %r10d, %xmm0
2573; X64-NEXT: movd %edx, %xmm1
2574; X64-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
2575; X64-NEXT: movd %r8d, %xmm3
2576; X64-NEXT: movd %edi, %xmm0
2577; X64-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3]
2578; X64-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
2579; X64-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
2580; X64-NEXT: retq
2581 %res0 = insertelement <8 x i16> undef, i16 %a0, i32 0
2582 %res1 = insertelement <8 x i16> %res0, i16 %a1, i32 1
2583 %res2 = insertelement <8 x i16> %res1, i16 %a2, i32 2
2584 %res3 = insertelement <8 x i16> %res2, i16 %a3, i32 3
2585 %res4 = insertelement <8 x i16> %res3, i16 %a4, i32 4
2586 %res5 = insertelement <8 x i16> %res4, i16 %a5, i32 5
2587 %res6 = insertelement <8 x i16> %res5, i16 %a6, i32 6
2588 %res7 = insertelement <8 x i16> %res6, i16 %a7, i32 7
2589 %res = bitcast <8 x i16> %res7 to <2 x i64>
2590 ret <2 x i64> %res
2591}
2592
2593define <2 x i64> @test_mm_setr_epi32(i32 %a0, i32 %a1, i32 %a2, i32 %a3) nounwind {
2594; X32-LABEL: test_mm_setr_epi32:
2595; X32: # BB#0:
2596; X32-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
2597; X32-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero
2598; X32-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
2599; X32-NEXT: movd {{.*#+}} xmm2 = mem[0],zero,zero,zero
2600; X32-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
2601; X32-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
2602; X32-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
2603; X32-NEXT: retl
2604;
2605; X64-LABEL: test_mm_setr_epi32:
2606; X64: # BB#0:
2607; X64-NEXT: movd %ecx, %xmm0
2608; X64-NEXT: movd %esi, %xmm1
2609; X64-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
2610; X64-NEXT: movd %edx, %xmm2
2611; X64-NEXT: movd %edi, %xmm0
2612; X64-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
2613; X64-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
2614; X64-NEXT: retq
2615 %res0 = insertelement <4 x i32> undef, i32 %a0, i32 0
2616 %res1 = insertelement <4 x i32> %res0, i32 %a1, i32 1
2617 %res2 = insertelement <4 x i32> %res1, i32 %a2, i32 2
2618 %res3 = insertelement <4 x i32> %res2, i32 %a3, i32 3
2619 %res = bitcast <4 x i32> %res3 to <2 x i64>
2620 ret <2 x i64> %res
2621}
2622
2623; TODO test_mm_setr_epi64
2624
2625define <2 x i64> @test_mm_setr_epi64x(i64 %a0, i64 %a1) nounwind {
2626; X32-LABEL: test_mm_setr_epi64x:
2627; X32: # BB#0:
2628; X32-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero
2629; X32-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
2630; X32-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
2631; X32-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero
2632; X32-NEXT: movd {{.*#+}} xmm2 = mem[0],zero,zero,zero
2633; X32-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
2634; X32-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
2635; X32-NEXT: retl
2636;
2637; X64-LABEL: test_mm_setr_epi64x:
2638; X64: # BB#0:
2639; X64-NEXT: movd %rsi, %xmm1
2640; X64-NEXT: movd %rdi, %xmm0
2641; X64-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
2642; X64-NEXT: retq
2643 %res0 = insertelement <2 x i64> undef, i64 %a0, i32 0
2644 %res1 = insertelement <2 x i64> %res0, i64 %a1, i32 1
2645 ret <2 x i64> %res1
2646}
2647
2648define <2 x double> @test_mm_setr_pd(double %a0, double %a1) nounwind {
2649; X32-LABEL: test_mm_setr_pd:
2650; X32: # BB#0:
2651; X32-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero
2652; X32-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
2653; X32-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
2654; X32-NEXT: retl
2655;
2656; X64-LABEL: test_mm_setr_pd:
2657; X64: # BB#0:
2658; X64-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
2659; X64-NEXT: retq
2660 %res0 = insertelement <2 x double> undef, double %a0, i32 0
2661 %res1 = insertelement <2 x double> %res0, double %a1, i32 1
2662 ret <2 x double> %res1
2663}
2664
Simon Pilgrim5a0d7282016-05-18 18:00:43 +00002665define <2 x double> @test_mm_setzero_pd() {
2666; X32-LABEL: test_mm_setzero_pd:
2667; X32: # BB#0:
2668; X32-NEXT: xorps %xmm0, %xmm0
2669; X32-NEXT: retl
2670;
2671; X64-LABEL: test_mm_setzero_pd:
2672; X64: # BB#0:
2673; X64-NEXT: xorps %xmm0, %xmm0
2674; X64-NEXT: retq
2675 ret <2 x double> zeroinitializer
2676}
2677
2678define <2 x i64> @test_mm_setzero_si128() {
2679; X32-LABEL: test_mm_setzero_si128:
2680; X32: # BB#0:
2681; X32-NEXT: xorps %xmm0, %xmm0
2682; X32-NEXT: retl
2683;
2684; X64-LABEL: test_mm_setzero_si128:
2685; X64: # BB#0:
2686; X64-NEXT: xorps %xmm0, %xmm0
2687; X64-NEXT: retq
2688 ret <2 x i64> zeroinitializer
2689}
2690
2691define <2 x i64> @test_mm_shuffle_epi32(<2 x i64> %a0) {
2692; X32-LABEL: test_mm_shuffle_epi32:
2693; X32: # BB#0:
2694; X32-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
2695; X32-NEXT: retl
2696;
2697; X64-LABEL: test_mm_shuffle_epi32:
2698; X64: # BB#0:
2699; X64-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
2700; X64-NEXT: retq
2701 %arg0 = bitcast <2 x i64> %a0 to <4 x i32>
2702 %res = shufflevector <4 x i32> %arg0, <4 x i32> undef, <4 x i32> zeroinitializer
2703 %bc = bitcast <4 x i32> %res to <2 x i64>
2704 ret <2 x i64> %bc
2705}
2706
2707define <2 x double> @test_mm_shuffle_pd(<2 x double> %a0, <2 x double> %a1) {
2708; X32-LABEL: test_mm_shuffle_pd:
2709; X32: # BB#0:
2710; X32-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0]
2711; X32-NEXT: retl
2712;
2713; X64-LABEL: test_mm_shuffle_pd:
2714; X64: # BB#0:
2715; X64-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0]
2716; X64-NEXT: retq
2717 %res = shufflevector <2 x double> %a0, <2 x double> %a1, <2 x i32> <i32 1, i32 2>
2718 ret <2 x double> %res
2719}
2720
2721define <2 x i64> @test_mm_shufflehi_epi16(<2 x i64> %a0) {
2722; X32-LABEL: test_mm_shufflehi_epi16:
2723; X32: # BB#0:
2724; X32-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,4,4]
2725; X32-NEXT: retl
2726;
2727; X64-LABEL: test_mm_shufflehi_epi16:
2728; X64: # BB#0:
2729; X64-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,4,4]
2730; X64-NEXT: retq
2731 %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
2732 %res = shufflevector <8 x i16> %arg0, <8 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 4, i32 4, i32 4>
2733 %bc = bitcast <8 x i16> %res to <2 x i64>
2734 ret <2 x i64> %bc
2735}
2736
2737define <2 x i64> @test_mm_shufflelo_epi16(<2 x i64> %a0) {
2738; X32-LABEL: test_mm_shufflelo_epi16:
2739; X32: # BB#0:
2740; X32-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
2741; X32-NEXT: retl
2742;
2743; X64-LABEL: test_mm_shufflelo_epi16:
2744; X64: # BB#0:
2745; X64-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
2746; X64-NEXT: retq
2747 %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
2748 %res = shufflevector <8 x i16> %arg0, <8 x i16> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 4, i32 5, i32 6, i32 7>
2749 %bc = bitcast <8 x i16> %res to <2 x i64>
2750 ret <2 x i64> %bc
2751}
2752
2753define <2 x i64> @test_mm_sll_epi16(<2 x i64> %a0, <2 x i64> %a1) {
2754; X32-LABEL: test_mm_sll_epi16:
2755; X32: # BB#0:
2756; X32-NEXT: psllw %xmm1, %xmm0
2757; X32-NEXT: retl
2758;
2759; X64-LABEL: test_mm_sll_epi16:
2760; X64: # BB#0:
2761; X64-NEXT: psllw %xmm1, %xmm0
2762; X64-NEXT: retq
2763 %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
2764 %arg1 = bitcast <2 x i64> %a1 to <8 x i16>
2765 %res = call <8 x i16> @llvm.x86.sse2.psll.w(<8 x i16> %arg0, <8 x i16> %arg1)
2766 %bc = bitcast <8 x i16> %res to <2 x i64>
2767 ret <2 x i64> %bc
2768}
2769declare <8 x i16> @llvm.x86.sse2.psll.w(<8 x i16>, <8 x i16>) nounwind readnone
2770
2771define <2 x i64> @test_mm_sll_epi32(<2 x i64> %a0, <2 x i64> %a1) {
2772; X32-LABEL: test_mm_sll_epi32:
2773; X32: # BB#0:
2774; X32-NEXT: pslld %xmm1, %xmm0
2775; X32-NEXT: retl
2776;
2777; X64-LABEL: test_mm_sll_epi32:
2778; X64: # BB#0:
2779; X64-NEXT: pslld %xmm1, %xmm0
2780; X64-NEXT: retq
2781 %arg0 = bitcast <2 x i64> %a0 to <4 x i32>
2782 %arg1 = bitcast <2 x i64> %a1 to <4 x i32>
2783 %res = call <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32> %arg0, <4 x i32> %arg1)
2784 %bc = bitcast <4 x i32> %res to <2 x i64>
2785 ret <2 x i64> %bc
2786}
2787declare <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32>, <4 x i32>) nounwind readnone
2788
2789define <2 x i64> @test_mm_sll_epi64(<2 x i64> %a0, <2 x i64> %a1) {
2790; X32-LABEL: test_mm_sll_epi64:
2791; X32: # BB#0:
2792; X32-NEXT: psllq %xmm1, %xmm0
2793; X32-NEXT: retl
2794;
2795; X64-LABEL: test_mm_sll_epi64:
2796; X64: # BB#0:
2797; X64-NEXT: psllq %xmm1, %xmm0
2798; X64-NEXT: retq
2799 %res = call <2 x i64> @llvm.x86.sse2.psll.q(<2 x i64> %a0, <2 x i64> %a1)
2800 ret <2 x i64> %res
2801}
2802declare <2 x i64> @llvm.x86.sse2.psll.q(<2 x i64>, <2 x i64>) nounwind readnone
2803
2804define <2 x i64> @test_mm_slli_epi16(<2 x i64> %a0) {
2805; X32-LABEL: test_mm_slli_epi16:
2806; X32: # BB#0:
2807; X32-NEXT: psllw $1, %xmm0
2808; X32-NEXT: retl
2809;
2810; X64-LABEL: test_mm_slli_epi16:
2811; X64: # BB#0:
2812; X64-NEXT: psllw $1, %xmm0
2813; X64-NEXT: retq
2814 %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
2815 %res = call <8 x i16> @llvm.x86.sse2.pslli.w(<8 x i16> %arg0, i32 1)
2816 %bc = bitcast <8 x i16> %res to <2 x i64>
2817 ret <2 x i64> %bc
2818}
2819declare <8 x i16> @llvm.x86.sse2.pslli.w(<8 x i16>, i32) nounwind readnone
2820
2821define <2 x i64> @test_mm_slli_epi32(<2 x i64> %a0) {
2822; X32-LABEL: test_mm_slli_epi32:
2823; X32: # BB#0:
2824; X32-NEXT: pslld $1, %xmm0
2825; X32-NEXT: retl
2826;
2827; X64-LABEL: test_mm_slli_epi32:
2828; X64: # BB#0:
2829; X64-NEXT: pslld $1, %xmm0
2830; X64-NEXT: retq
2831 %arg0 = bitcast <2 x i64> %a0 to <4 x i32>
2832 %res = call <4 x i32> @llvm.x86.sse2.pslli.d(<4 x i32> %arg0, i32 1)
2833 %bc = bitcast <4 x i32> %res to <2 x i64>
2834 ret <2 x i64> %bc
2835}
2836declare <4 x i32> @llvm.x86.sse2.pslli.d(<4 x i32>, i32) nounwind readnone
2837
2838define <2 x i64> @test_mm_slli_epi64(<2 x i64> %a0) {
2839; X32-LABEL: test_mm_slli_epi64:
2840; X32: # BB#0:
2841; X32-NEXT: psllq $1, %xmm0
2842; X32-NEXT: retl
2843;
2844; X64-LABEL: test_mm_slli_epi64:
2845; X64: # BB#0:
2846; X64-NEXT: psllq $1, %xmm0
2847; X64-NEXT: retq
2848 %res = call <2 x i64> @llvm.x86.sse2.pslli.q(<2 x i64> %a0, i32 1)
2849 ret <2 x i64> %res
2850}
2851declare <2 x i64> @llvm.x86.sse2.pslli.q(<2 x i64>, i32) nounwind readnone
2852
2853define <2 x i64> @test_mm_slli_si128(<2 x i64> %a0) nounwind {
2854; X32-LABEL: test_mm_slli_si128:
2855; X32: # BB#0:
2856; X32-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10]
2857; X32-NEXT: retl
2858;
2859; X64-LABEL: test_mm_slli_si128:
2860; X64: # BB#0:
2861; X64-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10]
2862; X64-NEXT: retq
2863 %arg0 = bitcast <2 x i64> %a0 to <16 x i8>
2864 %res = shufflevector <16 x i8> zeroinitializer, <16 x i8> %arg0, <16 x i32> <i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26>
2865 %bc = bitcast <16 x i8> %res to <2 x i64>
2866 ret <2 x i64> %bc
2867}
2868
2869define <2 x double> @test_mm_sqrt_pd(<2 x double> %a0) nounwind {
2870; X32-LABEL: test_mm_sqrt_pd:
2871; X32: # BB#0:
2872; X32-NEXT: sqrtpd %xmm0, %xmm0
2873; X32-NEXT: retl
2874;
2875; X64-LABEL: test_mm_sqrt_pd:
2876; X64: # BB#0:
2877; X64-NEXT: sqrtpd %xmm0, %xmm0
2878; X64-NEXT: retq
2879 %res = call <2 x double> @llvm.x86.sse2.sqrt.pd(<2 x double> %a0)
2880 ret <2 x double> %res
2881}
2882declare <2 x double> @llvm.x86.sse2.sqrt.pd(<2 x double>) nounwind readnone
2883
2884define <2 x double> @test_mm_sqrt_sd(<2 x double> %a0, <2 x double> %a1) nounwind {
2885; X32-LABEL: test_mm_sqrt_sd:
2886; X32: # BB#0:
2887; X32-NEXT: sqrtsd %xmm0, %xmm1
2888; X32-NEXT: movaps %xmm1, %xmm0
2889; X32-NEXT: retl
2890;
2891; X64-LABEL: test_mm_sqrt_sd:
2892; X64: # BB#0:
2893; X64-NEXT: sqrtsd %xmm0, %xmm1
2894; X64-NEXT: movaps %xmm1, %xmm0
2895; X64-NEXT: retq
2896 %call = call <2 x double> @llvm.x86.sse2.sqrt.sd(<2 x double> %a0)
2897 %ext0 = extractelement <2 x double> %call, i32 0
2898 %ins0 = insertelement <2 x double> undef, double %ext0, i32 0
2899 %ext1 = extractelement <2 x double> %a1, i32 1
2900 %ins1 = insertelement <2 x double> %ins0, double %ext1, i32 1
2901 ret <2 x double> %ins1
2902}
2903declare <2 x double> @llvm.x86.sse2.sqrt.sd(<2 x double>) nounwind readnone
2904
2905define <2 x i64> @test_mm_sra_epi16(<2 x i64> %a0, <2 x i64> %a1) {
2906; X32-LABEL: test_mm_sra_epi16:
2907; X32: # BB#0:
2908; X32-NEXT: psraw %xmm1, %xmm0
2909; X32-NEXT: retl
2910;
2911; X64-LABEL: test_mm_sra_epi16:
2912; X64: # BB#0:
2913; X64-NEXT: psraw %xmm1, %xmm0
2914; X64-NEXT: retq
2915 %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
2916 %arg1 = bitcast <2 x i64> %a1 to <8 x i16>
2917 %res = call <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16> %arg0, <8 x i16> %arg1)
2918 %bc = bitcast <8 x i16> %res to <2 x i64>
2919 ret <2 x i64> %bc
2920}
2921declare <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16>, <8 x i16>) nounwind readnone
2922
2923define <2 x i64> @test_mm_sra_epi32(<2 x i64> %a0, <2 x i64> %a1) {
2924; X32-LABEL: test_mm_sra_epi32:
2925; X32: # BB#0:
2926; X32-NEXT: psrad %xmm1, %xmm0
2927; X32-NEXT: retl
2928;
2929; X64-LABEL: test_mm_sra_epi32:
2930; X64: # BB#0:
2931; X64-NEXT: psrad %xmm1, %xmm0
2932; X64-NEXT: retq
2933 %arg0 = bitcast <2 x i64> %a0 to <4 x i32>
2934 %arg1 = bitcast <2 x i64> %a1 to <4 x i32>
2935 %res = call <4 x i32> @llvm.x86.sse2.psra.d(<4 x i32> %arg0, <4 x i32> %arg1)
2936 %bc = bitcast <4 x i32> %res to <2 x i64>
2937 ret <2 x i64> %bc
2938}
2939declare <4 x i32> @llvm.x86.sse2.psra.d(<4 x i32>, <4 x i32>) nounwind readnone
2940
2941define <2 x i64> @test_mm_srai_epi16(<2 x i64> %a0) {
2942; X32-LABEL: test_mm_srai_epi16:
2943; X32: # BB#0:
2944; X32-NEXT: psraw $1, %xmm0
2945; X32-NEXT: retl
2946;
2947; X64-LABEL: test_mm_srai_epi16:
2948; X64: # BB#0:
2949; X64-NEXT: psraw $1, %xmm0
2950; X64-NEXT: retq
2951 %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
2952 %res = call <8 x i16> @llvm.x86.sse2.psrai.w(<8 x i16> %arg0, i32 1)
2953 %bc = bitcast <8 x i16> %res to <2 x i64>
2954 ret <2 x i64> %bc
2955}
2956declare <8 x i16> @llvm.x86.sse2.psrai.w(<8 x i16>, i32) nounwind readnone
2957
2958define <2 x i64> @test_mm_srai_epi32(<2 x i64> %a0) {
2959; X32-LABEL: test_mm_srai_epi32:
2960; X32: # BB#0:
2961; X32-NEXT: psrad $1, %xmm0
2962; X32-NEXT: retl
2963;
2964; X64-LABEL: test_mm_srai_epi32:
2965; X64: # BB#0:
2966; X64-NEXT: psrad $1, %xmm0
2967; X64-NEXT: retq
2968 %arg0 = bitcast <2 x i64> %a0 to <4 x i32>
2969 %res = call <4 x i32> @llvm.x86.sse2.psrai.d(<4 x i32> %arg0, i32 1)
2970 %bc = bitcast <4 x i32> %res to <2 x i64>
2971 ret <2 x i64> %bc
2972}
2973declare <4 x i32> @llvm.x86.sse2.psrai.d(<4 x i32>, i32) nounwind readnone
2974
2975define <2 x i64> @test_mm_srl_epi16(<2 x i64> %a0, <2 x i64> %a1) {
2976; X32-LABEL: test_mm_srl_epi16:
2977; X32: # BB#0:
2978; X32-NEXT: psrlw %xmm1, %xmm0
2979; X32-NEXT: retl
2980;
2981; X64-LABEL: test_mm_srl_epi16:
2982; X64: # BB#0:
2983; X64-NEXT: psrlw %xmm1, %xmm0
2984; X64-NEXT: retq
2985 %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
2986 %arg1 = bitcast <2 x i64> %a1 to <8 x i16>
2987 %res = call <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16> %arg0, <8 x i16> %arg1)
2988 %bc = bitcast <8 x i16> %res to <2 x i64>
2989 ret <2 x i64> %bc
2990}
2991declare <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16>, <8 x i16>) nounwind readnone
2992
2993define <2 x i64> @test_mm_srl_epi32(<2 x i64> %a0, <2 x i64> %a1) {
2994; X32-LABEL: test_mm_srl_epi32:
2995; X32: # BB#0:
2996; X32-NEXT: psrld %xmm1, %xmm0
2997; X32-NEXT: retl
2998;
2999; X64-LABEL: test_mm_srl_epi32:
3000; X64: # BB#0:
3001; X64-NEXT: psrld %xmm1, %xmm0
3002; X64-NEXT: retq
3003 %arg0 = bitcast <2 x i64> %a0 to <4 x i32>
3004 %arg1 = bitcast <2 x i64> %a1 to <4 x i32>
3005 %res = call <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32> %arg0, <4 x i32> %arg1)
3006 %bc = bitcast <4 x i32> %res to <2 x i64>
3007 ret <2 x i64> %bc
3008}
3009declare <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32>, <4 x i32>) nounwind readnone
3010
3011define <2 x i64> @test_mm_srl_epi64(<2 x i64> %a0, <2 x i64> %a1) {
3012; X32-LABEL: test_mm_srl_epi64:
3013; X32: # BB#0:
3014; X32-NEXT: psrlq %xmm1, %xmm0
3015; X32-NEXT: retl
3016;
3017; X64-LABEL: test_mm_srl_epi64:
3018; X64: # BB#0:
3019; X64-NEXT: psrlq %xmm1, %xmm0
3020; X64-NEXT: retq
3021 %res = call <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64> %a0, <2 x i64> %a1)
3022 ret <2 x i64> %res
3023}
3024declare <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64>, <2 x i64>) nounwind readnone
3025
3026define <2 x i64> @test_mm_srli_epi16(<2 x i64> %a0) {
3027; X32-LABEL: test_mm_srli_epi16:
3028; X32: # BB#0:
3029; X32-NEXT: psrlw $1, %xmm0
3030; X32-NEXT: retl
3031;
3032; X64-LABEL: test_mm_srli_epi16:
3033; X64: # BB#0:
3034; X64-NEXT: psrlw $1, %xmm0
3035; X64-NEXT: retq
3036 %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
3037 %res = call <8 x i16> @llvm.x86.sse2.psrli.w(<8 x i16> %arg0, i32 1)
3038 %bc = bitcast <8 x i16> %res to <2 x i64>
3039 ret <2 x i64> %bc
3040}
3041declare <8 x i16> @llvm.x86.sse2.psrli.w(<8 x i16>, i32) nounwind readnone
3042
3043define <2 x i64> @test_mm_srli_epi32(<2 x i64> %a0) {
3044; X32-LABEL: test_mm_srli_epi32:
3045; X32: # BB#0:
3046; X32-NEXT: psrld $1, %xmm0
3047; X32-NEXT: retl
3048;
3049; X64-LABEL: test_mm_srli_epi32:
3050; X64: # BB#0:
3051; X64-NEXT: psrld $1, %xmm0
3052; X64-NEXT: retq
3053 %arg0 = bitcast <2 x i64> %a0 to <4 x i32>
3054 %res = call <4 x i32> @llvm.x86.sse2.psrli.d(<4 x i32> %arg0, i32 1)
3055 %bc = bitcast <4 x i32> %res to <2 x i64>
3056 ret <2 x i64> %bc
3057}
3058declare <4 x i32> @llvm.x86.sse2.psrli.d(<4 x i32>, i32) nounwind readnone
3059
3060define <2 x i64> @test_mm_srli_epi64(<2 x i64> %a0) {
3061; X32-LABEL: test_mm_srli_epi64:
3062; X32: # BB#0:
3063; X32-NEXT: psrlq $1, %xmm0
3064; X32-NEXT: retl
3065;
3066; X64-LABEL: test_mm_srli_epi64:
3067; X64: # BB#0:
3068; X64-NEXT: psrlq $1, %xmm0
3069; X64-NEXT: retq
3070 %res = call <2 x i64> @llvm.x86.sse2.psrli.q(<2 x i64> %a0, i32 1)
3071 ret <2 x i64> %res
3072}
3073declare <2 x i64> @llvm.x86.sse2.psrli.q(<2 x i64>, i32) nounwind readnone
3074
3075define <2 x i64> @test_mm_srli_si128(<2 x i64> %a0) nounwind {
3076; X32-LABEL: test_mm_srli_si128:
3077; X32: # BB#0:
3078; X32-NEXT: psrldq {{.*#+}} xmm0 = xmm0[5,6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero
3079; X32-NEXT: retl
3080;
3081; X64-LABEL: test_mm_srli_si128:
3082; X64: # BB#0:
3083; X64-NEXT: psrldq {{.*#+}} xmm0 = xmm0[5,6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero
3084; X64-NEXT: retq
3085 %arg0 = bitcast <2 x i64> %a0 to <16 x i8>
3086 %res = shufflevector <16 x i8> %arg0, <16 x i8> zeroinitializer, <16 x i32> <i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20>
3087 %bc = bitcast <16 x i8> %res to <2 x i64>
3088 ret <2 x i64> %bc
3089}
3090
3091define void @test_mm_store_pd(double *%a0, <2 x double> %a1) {
3092; X32-LABEL: test_mm_store_pd:
3093; X32: # BB#0:
3094; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
3095; X32-NEXT: movaps %xmm0, (%eax)
3096; X32-NEXT: retl
3097;
3098; X64-LABEL: test_mm_store_pd:
3099; X64: # BB#0:
3100; X64-NEXT: movaps %xmm0, (%rdi)
3101; X64-NEXT: retq
3102 %arg0 = bitcast double* %a0 to <2 x double>*
3103 store <2 x double> %a1, <2 x double>* %arg0, align 16
3104 ret void
3105}
3106
3107define void @test_mm_store_sd(double *%a0, <2 x double> %a1) {
3108; X32-LABEL: test_mm_store_sd:
3109; X32: # BB#0:
3110; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
3111; X32-NEXT: movsd %xmm0, (%eax)
3112; X32-NEXT: retl
3113;
3114; X64-LABEL: test_mm_store_sd:
3115; X64: # BB#0:
3116; X64-NEXT: movsd %xmm0, (%rdi)
3117; X64-NEXT: retq
3118 %ext = extractelement <2 x double> %a1, i32 0
3119 store double %ext, double* %a0, align 1
3120 ret void
3121}
3122
3123define void @test_mm_store_si128(<2 x i64> *%a0, <2 x i64> %a1) {
3124; X32-LABEL: test_mm_store_si128:
3125; X32: # BB#0:
3126; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
3127; X32-NEXT: movaps %xmm0, (%eax)
3128; X32-NEXT: retl
3129;
3130; X64-LABEL: test_mm_store_si128:
3131; X64: # BB#0:
3132; X64-NEXT: movaps %xmm0, (%rdi)
3133; X64-NEXT: retq
3134 store <2 x i64> %a1, <2 x i64>* %a0, align 16
3135 ret void
3136}
3137
Simon Pilgrim4d1e2582016-05-25 09:42:29 +00003138define void @test_mm_store1_pd(double *%a0, <2 x double> %a1) {
3139; X32-LABEL: test_mm_store1_pd:
Simon Pilgrim5a0d7282016-05-18 18:00:43 +00003140; X32: # BB#0:
3141; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
3142; X32-NEXT: movsd %xmm0, (%eax)
3143; X32-NEXT: movsd %xmm0, 8(%eax)
3144; X32-NEXT: retl
3145;
Simon Pilgrim4d1e2582016-05-25 09:42:29 +00003146; X64-LABEL: test_mm_store1_pd:
Simon Pilgrim5a0d7282016-05-18 18:00:43 +00003147; X64: # BB#0:
3148; X64-NEXT: movsd %xmm0, (%rdi)
3149; X64-NEXT: movsd %xmm0, 8(%rdi)
3150; X64-NEXT: retq
3151 %ext = extractelement <2 x double> %a1, i32 0
3152 %ptr0 = getelementptr inbounds double, double* %a0, i32 0
3153 %ptr1 = getelementptr inbounds double, double* %a0, i32 1
3154 store double %ext, double* %ptr0, align 1
3155 store double %ext, double* %ptr1, align 1
3156 ret void
3157}
3158
3159define void @test_mm_storeh_sd(double *%a0, <2 x double> %a1) {
3160; X32-LABEL: test_mm_storeh_sd:
3161; X32: # BB#0:
3162; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
3163; X32-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1,0]
3164; X32-NEXT: movsd %xmm0, (%eax)
3165; X32-NEXT: retl
3166;
3167; X64-LABEL: test_mm_storeh_sd:
3168; X64: # BB#0:
3169; X64-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1,0]
3170; X64-NEXT: movsd %xmm0, (%rdi)
3171; X64-NEXT: retq
3172 %ext = extractelement <2 x double> %a1, i32 1
3173 store double %ext, double* %a0, align 8
3174 ret void
3175}
3176
3177define void @test_mm_storel_epi64(<2 x i64> *%a0, <2 x i64> %a1) {
3178; X32-LABEL: test_mm_storel_epi64:
3179; X32: # BB#0:
3180; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
3181; X32-NEXT: movlps %xmm0, (%eax)
3182; X32-NEXT: retl
3183;
3184; X64-LABEL: test_mm_storel_epi64:
3185; X64: # BB#0:
3186; X64-NEXT: movd %xmm0, %rax
3187; X64-NEXT: movq %rax, (%rdi)
3188; X64-NEXT: retq
3189 %ext = extractelement <2 x i64> %a1, i32 0
3190 %bc = bitcast <2 x i64> *%a0 to i64*
3191 store i64 %ext, i64* %bc, align 8
3192 ret void
3193}
3194
3195define void @test_mm_storel_sd(double *%a0, <2 x double> %a1) {
3196; X32-LABEL: test_mm_storel_sd:
3197; X32: # BB#0:
3198; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
3199; X32-NEXT: movsd %xmm0, (%eax)
3200; X32-NEXT: retl
3201;
3202; X64-LABEL: test_mm_storel_sd:
3203; X64: # BB#0:
3204; X64-NEXT: movsd %xmm0, (%rdi)
3205; X64-NEXT: retq
3206 %ext = extractelement <2 x double> %a1, i32 0
3207 store double %ext, double* %a0, align 8
3208 ret void
3209}
3210
3211define void @test_mm_storer_pd(double *%a0, <2 x double> %a1) {
3212; X32-LABEL: test_mm_storer_pd:
3213; X32: # BB#0:
3214; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
3215; X32-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1,0]
3216; X32-NEXT: movapd %xmm0, (%eax)
3217; X32-NEXT: retl
3218;
3219; X64-LABEL: test_mm_storer_pd:
3220; X64: # BB#0:
3221; X64-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1,0]
3222; X64-NEXT: movapd %xmm0, (%rdi)
3223; X64-NEXT: retq
3224 %arg0 = bitcast double* %a0 to <2 x double>*
3225 %shuf = shufflevector <2 x double> %a1, <2 x double> undef, <2 x i32> <i32 1, i32 0>
3226 store <2 x double> %shuf, <2 x double>* %arg0, align 16
3227 ret void
3228}
3229
3230define void @test_mm_storeu_pd(double *%a0, <2 x double> %a1) {
3231; X32-LABEL: test_mm_storeu_pd:
3232; X32: # BB#0:
3233; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
3234; X32-NEXT: movups %xmm0, (%eax)
3235; X32-NEXT: retl
3236;
3237; X64-LABEL: test_mm_storeu_pd:
3238; X64: # BB#0:
3239; X64-NEXT: movups %xmm0, (%rdi)
3240; X64-NEXT: retq
Simon Pilgrim4d1e2582016-05-25 09:42:29 +00003241 %arg0 = bitcast double* %a0 to i8*
3242 call void @llvm.x86.sse2.storeu.pd(i8* %arg0, <2 x double> %a1)
Simon Pilgrim5a0d7282016-05-18 18:00:43 +00003243 ret void
3244}
Simon Pilgrim4d1e2582016-05-25 09:42:29 +00003245declare void @llvm.x86.sse2.storeu.pd(i8*, <2 x double>) nounwind
Simon Pilgrim5a0d7282016-05-18 18:00:43 +00003246
3247define void @test_mm_storeu_si128(<2 x i64> *%a0, <2 x i64> %a1) {
3248; X32-LABEL: test_mm_storeu_si128:
3249; X32: # BB#0:
3250; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
3251; X32-NEXT: movups %xmm0, (%eax)
3252; X32-NEXT: retl
3253;
3254; X64-LABEL: test_mm_storeu_si128:
3255; X64: # BB#0:
3256; X64-NEXT: movups %xmm0, (%rdi)
3257; X64-NEXT: retq
Simon Pilgrim4d1e2582016-05-25 09:42:29 +00003258 %arg0 = bitcast <2 x i64>* %a0 to i8*
3259 %arg1 = bitcast <2 x i64> %a1 to <16 x i8>
3260 call void @llvm.x86.sse2.storeu.dq(i8* %arg0, <16 x i8> %arg1)
Simon Pilgrim5a0d7282016-05-18 18:00:43 +00003261 ret void
3262}
Simon Pilgrim4d1e2582016-05-25 09:42:29 +00003263declare void @llvm.x86.sse2.storeu.dq(i8*, <16 x i8>) nounwind
Simon Pilgrim5a0d7282016-05-18 18:00:43 +00003264
3265define void @test_mm_stream_pd(double *%a0, <2 x double> %a1) {
3266; X32-LABEL: test_mm_stream_pd:
3267; X32: # BB#0:
3268; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
3269; X32-NEXT: movntps %xmm0, (%eax)
3270; X32-NEXT: retl
3271;
3272; X64-LABEL: test_mm_stream_pd:
3273; X64: # BB#0:
3274; X64-NEXT: movntps %xmm0, (%rdi)
3275; X64-NEXT: retq
3276 %arg0 = bitcast double* %a0 to <2 x double>*
3277 store <2 x double> %a1, <2 x double>* %arg0, align 16, !nontemporal !0
3278 ret void
3279}
3280
3281define void @test_mm_stream_si32(i32 *%a0, i32 %a1) {
3282; X32-LABEL: test_mm_stream_si32:
3283; X32: # BB#0:
3284; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
3285; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
3286; X32-NEXT: movntil %eax, (%ecx)
3287; X32-NEXT: retl
3288;
3289; X64-LABEL: test_mm_stream_si32:
3290; X64: # BB#0:
3291; X64-NEXT: movntil %esi, (%rdi)
3292; X64-NEXT: retq
3293 store i32 %a1, i32* %a0, align 1, !nontemporal !0
3294 ret void
3295}
3296
3297define void @test_mm_stream_si128(<2 x i64> *%a0, <2 x i64> %a1) {
3298; X32-LABEL: test_mm_stream_si128:
3299; X32: # BB#0:
3300; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
3301; X32-NEXT: movntps %xmm0, (%eax)
3302; X32-NEXT: retl
3303;
3304; X64-LABEL: test_mm_stream_si128:
3305; X64: # BB#0:
3306; X64-NEXT: movntps %xmm0, (%rdi)
3307; X64-NEXT: retq
3308 store <2 x i64> %a1, <2 x i64>* %a0, align 16, !nontemporal !0
3309 ret void
3310}
3311
3312define <2 x i64> @test_mm_sub_epi8(<2 x i64> %a0, <2 x i64> %a1) nounwind {
3313; X32-LABEL: test_mm_sub_epi8:
3314; X32: # BB#0:
3315; X32-NEXT: psubb %xmm1, %xmm0
3316; X32-NEXT: retl
3317;
3318; X64-LABEL: test_mm_sub_epi8:
3319; X64: # BB#0:
3320; X64-NEXT: psubb %xmm1, %xmm0
3321; X64-NEXT: retq
3322 %arg0 = bitcast <2 x i64> %a0 to <16 x i8>
3323 %arg1 = bitcast <2 x i64> %a1 to <16 x i8>
3324 %res = sub <16 x i8> %arg0, %arg1
3325 %bc = bitcast <16 x i8> %res to <2 x i64>
3326 ret <2 x i64> %bc
3327}
3328
3329define <2 x i64> @test_mm_sub_epi16(<2 x i64> %a0, <2 x i64> %a1) nounwind {
3330; X32-LABEL: test_mm_sub_epi16:
3331; X32: # BB#0:
3332; X32-NEXT: psubw %xmm1, %xmm0
3333; X32-NEXT: retl
3334;
3335; X64-LABEL: test_mm_sub_epi16:
3336; X64: # BB#0:
3337; X64-NEXT: psubw %xmm1, %xmm0
3338; X64-NEXT: retq
3339 %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
3340 %arg1 = bitcast <2 x i64> %a1 to <8 x i16>
3341 %res = sub <8 x i16> %arg0, %arg1
3342 %bc = bitcast <8 x i16> %res to <2 x i64>
3343 ret <2 x i64> %bc
3344}
3345
3346define <2 x i64> @test_mm_sub_epi32(<2 x i64> %a0, <2 x i64> %a1) nounwind {
3347; X32-LABEL: test_mm_sub_epi32:
3348; X32: # BB#0:
3349; X32-NEXT: psubd %xmm1, %xmm0
3350; X32-NEXT: retl
3351;
3352; X64-LABEL: test_mm_sub_epi32:
3353; X64: # BB#0:
3354; X64-NEXT: psubd %xmm1, %xmm0
3355; X64-NEXT: retq
3356 %arg0 = bitcast <2 x i64> %a0 to <4 x i32>
3357 %arg1 = bitcast <2 x i64> %a1 to <4 x i32>
3358 %res = sub <4 x i32> %arg0, %arg1
3359 %bc = bitcast <4 x i32> %res to <2 x i64>
3360 ret <2 x i64> %bc
3361}
3362
3363define <2 x i64> @test_mm_sub_epi64(<2 x i64> %a0, <2 x i64> %a1) nounwind {
3364; X32-LABEL: test_mm_sub_epi64:
3365; X32: # BB#0:
3366; X32-NEXT: psubq %xmm1, %xmm0
3367; X32-NEXT: retl
3368;
3369; X64-LABEL: test_mm_sub_epi64:
3370; X64: # BB#0:
3371; X64-NEXT: psubq %xmm1, %xmm0
3372; X64-NEXT: retq
3373 %res = sub <2 x i64> %a0, %a1
3374 ret <2 x i64> %res
3375}
3376
3377define <2 x double> @test_mm_sub_pd(<2 x double> %a0, <2 x double> %a1) nounwind {
3378; X32-LABEL: test_mm_sub_pd:
3379; X32: # BB#0:
3380; X32-NEXT: subpd %xmm1, %xmm0
3381; X32-NEXT: retl
3382;
3383; X64-LABEL: test_mm_sub_pd:
3384; X64: # BB#0:
3385; X64-NEXT: subpd %xmm1, %xmm0
3386; X64-NEXT: retq
3387 %res = fsub <2 x double> %a0, %a1
3388 ret <2 x double> %res
3389}
3390
3391define <2 x double> @test_mm_sub_sd(<2 x double> %a0, <2 x double> %a1) nounwind {
3392; X32-LABEL: test_mm_sub_sd:
3393; X32: # BB#0:
3394; X32-NEXT: subsd %xmm1, %xmm0
3395; X32-NEXT: retl
3396;
3397; X64-LABEL: test_mm_sub_sd:
3398; X64: # BB#0:
3399; X64-NEXT: subsd %xmm1, %xmm0
3400; X64-NEXT: retq
3401 %ext0 = extractelement <2 x double> %a0, i32 0
3402 %ext1 = extractelement <2 x double> %a1, i32 0
3403 %fsub = fsub double %ext0, %ext1
3404 %res = insertelement <2 x double> %a0, double %fsub, i32 0
3405 ret <2 x double> %res
3406}
3407
3408define <2 x i64> @test_mm_subs_epi8(<2 x i64> %a0, <2 x i64> %a1) nounwind {
3409; X32-LABEL: test_mm_subs_epi8:
3410; X32: # BB#0:
3411; X32-NEXT: psubsb %xmm1, %xmm0
3412; X32-NEXT: retl
3413;
3414; X64-LABEL: test_mm_subs_epi8:
3415; X64: # BB#0:
3416; X64-NEXT: psubsb %xmm1, %xmm0
3417; X64-NEXT: retq
3418 %arg0 = bitcast <2 x i64> %a0 to <16 x i8>
3419 %arg1 = bitcast <2 x i64> %a1 to <16 x i8>
3420 %res = call <16 x i8> @llvm.x86.sse2.psubs.b(<16 x i8> %arg0, <16 x i8> %arg1)
3421 %bc = bitcast <16 x i8> %res to <2 x i64>
3422 ret <2 x i64> %bc
3423}
3424declare <16 x i8> @llvm.x86.sse2.psubs.b(<16 x i8>, <16 x i8>) nounwind readnone
3425
3426define <2 x i64> @test_mm_subs_epi16(<2 x i64> %a0, <2 x i64> %a1) nounwind {
3427; X32-LABEL: test_mm_subs_epi16:
3428; X32: # BB#0:
3429; X32-NEXT: psubsw %xmm1, %xmm0
3430; X32-NEXT: retl
3431;
3432; X64-LABEL: test_mm_subs_epi16:
3433; X64: # BB#0:
3434; X64-NEXT: psubsw %xmm1, %xmm0
3435; X64-NEXT: retq
3436 %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
3437 %arg1 = bitcast <2 x i64> %a1 to <8 x i16>
3438 %res = call <8 x i16> @llvm.x86.sse2.psubs.w(<8 x i16> %arg0, <8 x i16> %arg1)
3439 %bc = bitcast <8 x i16> %res to <2 x i64>
3440 ret <2 x i64> %bc
3441}
3442declare <8 x i16> @llvm.x86.sse2.psubs.w(<8 x i16>, <8 x i16>) nounwind readnone
3443
3444define <2 x i64> @test_mm_subs_epu8(<2 x i64> %a0, <2 x i64> %a1) nounwind {
3445; X32-LABEL: test_mm_subs_epu8:
3446; X32: # BB#0:
3447; X32-NEXT: psubusb %xmm1, %xmm0
3448; X32-NEXT: retl
3449;
3450; X64-LABEL: test_mm_subs_epu8:
3451; X64: # BB#0:
3452; X64-NEXT: psubusb %xmm1, %xmm0
3453; X64-NEXT: retq
3454 %arg0 = bitcast <2 x i64> %a0 to <16 x i8>
3455 %arg1 = bitcast <2 x i64> %a1 to <16 x i8>
3456 %res = call <16 x i8> @llvm.x86.sse2.psubus.b(<16 x i8> %arg0, <16 x i8> %arg1)
3457 %bc = bitcast <16 x i8> %res to <2 x i64>
3458 ret <2 x i64> %bc
3459}
3460declare <16 x i8> @llvm.x86.sse2.psubus.b(<16 x i8>, <16 x i8>) nounwind readnone
3461
3462define <2 x i64> @test_mm_subs_epu16(<2 x i64> %a0, <2 x i64> %a1) nounwind {
3463; X32-LABEL: test_mm_subs_epu16:
3464; X32: # BB#0:
3465; X32-NEXT: psubusw %xmm1, %xmm0
3466; X32-NEXT: retl
3467;
3468; X64-LABEL: test_mm_subs_epu16:
3469; X64: # BB#0:
3470; X64-NEXT: psubusw %xmm1, %xmm0
3471; X64-NEXT: retq
3472 %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
3473 %arg1 = bitcast <2 x i64> %a1 to <8 x i16>
3474 %res = call <8 x i16> @llvm.x86.sse2.psubus.w(<8 x i16> %arg0, <8 x i16> %arg1)
3475 %bc = bitcast <8 x i16> %res to <2 x i64>
3476 ret <2 x i64> %bc
3477}
3478declare <8 x i16> @llvm.x86.sse2.psubus.w(<8 x i16>, <8 x i16>) nounwind readnone
3479
3480define i32 @test_mm_ucomieq_sd(<2 x double> %a0, <2 x double> %a1) nounwind {
3481; X32-LABEL: test_mm_ucomieq_sd:
3482; X32: # BB#0:
3483; X32-NEXT: ucomisd %xmm1, %xmm0
3484; X32-NEXT: setnp %al
3485; X32-NEXT: sete %cl
3486; X32-NEXT: andb %al, %cl
3487; X32-NEXT: movzbl %cl, %eax
3488; X32-NEXT: retl
3489;
3490; X64-LABEL: test_mm_ucomieq_sd:
3491; X64: # BB#0:
3492; X64-NEXT: ucomisd %xmm1, %xmm0
3493; X64-NEXT: setnp %al
3494; X64-NEXT: sete %cl
3495; X64-NEXT: andb %al, %cl
3496; X64-NEXT: movzbl %cl, %eax
3497; X64-NEXT: retq
3498 %res = call i32 @llvm.x86.sse2.ucomieq.sd(<2 x double> %a0, <2 x double> %a1)
3499 ret i32 %res
3500}
3501declare i32 @llvm.x86.sse2.ucomieq.sd(<2 x double>, <2 x double>) nounwind readnone
3502
3503define i32 @test_mm_ucomige_sd(<2 x double> %a0, <2 x double> %a1) nounwind {
3504; X32-LABEL: test_mm_ucomige_sd:
3505; X32: # BB#0:
3506; X32-NEXT: ucomisd %xmm1, %xmm0
3507; X32-NEXT: setae %al
3508; X32-NEXT: movzbl %al, %eax
3509; X32-NEXT: retl
3510;
3511; X64-LABEL: test_mm_ucomige_sd:
3512; X64: # BB#0:
3513; X64-NEXT: ucomisd %xmm1, %xmm0
3514; X64-NEXT: setae %al
3515; X64-NEXT: movzbl %al, %eax
3516; X64-NEXT: retq
3517 %res = call i32 @llvm.x86.sse2.ucomige.sd(<2 x double> %a0, <2 x double> %a1)
3518 ret i32 %res
3519}
3520declare i32 @llvm.x86.sse2.ucomige.sd(<2 x double>, <2 x double>) nounwind readnone
3521
3522define i32 @test_mm_ucomigt_sd(<2 x double> %a0, <2 x double> %a1) nounwind {
3523; X32-LABEL: test_mm_ucomigt_sd:
3524; X32: # BB#0:
3525; X32-NEXT: ucomisd %xmm1, %xmm0
3526; X32-NEXT: seta %al
3527; X32-NEXT: movzbl %al, %eax
3528; X32-NEXT: retl
3529;
3530; X64-LABEL: test_mm_ucomigt_sd:
3531; X64: # BB#0:
3532; X64-NEXT: ucomisd %xmm1, %xmm0
3533; X64-NEXT: seta %al
3534; X64-NEXT: movzbl %al, %eax
3535; X64-NEXT: retq
3536 %res = call i32 @llvm.x86.sse2.ucomigt.sd(<2 x double> %a0, <2 x double> %a1)
3537 ret i32 %res
3538}
3539declare i32 @llvm.x86.sse2.ucomigt.sd(<2 x double>, <2 x double>) nounwind readnone
3540
3541define i32 @test_mm_ucomile_sd(<2 x double> %a0, <2 x double> %a1) nounwind {
3542; X32-LABEL: test_mm_ucomile_sd:
3543; X32: # BB#0:
3544; X32-NEXT: ucomisd %xmm0, %xmm1
3545; X32-NEXT: setae %al
3546; X32-NEXT: movzbl %al, %eax
3547; X32-NEXT: retl
3548;
3549; X64-LABEL: test_mm_ucomile_sd:
3550; X64: # BB#0:
3551; X64-NEXT: ucomisd %xmm0, %xmm1
3552; X64-NEXT: setae %al
3553; X64-NEXT: movzbl %al, %eax
3554; X64-NEXT: retq
3555 %res = call i32 @llvm.x86.sse2.ucomile.sd(<2 x double> %a0, <2 x double> %a1)
3556 ret i32 %res
3557}
3558declare i32 @llvm.x86.sse2.ucomile.sd(<2 x double>, <2 x double>) nounwind readnone
3559
3560define i32 @test_mm_ucomilt_sd(<2 x double> %a0, <2 x double> %a1) nounwind {
3561; X32-LABEL: test_mm_ucomilt_sd:
3562; X32: # BB#0:
3563; X32-NEXT: ucomisd %xmm0, %xmm1
3564; X32-NEXT: seta %al
3565; X32-NEXT: movzbl %al, %eax
3566; X32-NEXT: retl
3567;
3568; X64-LABEL: test_mm_ucomilt_sd:
3569; X64: # BB#0:
3570; X64-NEXT: ucomisd %xmm0, %xmm1
3571; X64-NEXT: seta %al
3572; X64-NEXT: movzbl %al, %eax
3573; X64-NEXT: retq
3574 %res = call i32 @llvm.x86.sse2.ucomilt.sd(<2 x double> %a0, <2 x double> %a1)
3575 ret i32 %res
3576}
3577declare i32 @llvm.x86.sse2.ucomilt.sd(<2 x double>, <2 x double>) nounwind readnone
3578
3579define i32 @test_mm_ucomineq_sd(<2 x double> %a0, <2 x double> %a1) nounwind {
3580; X32-LABEL: test_mm_ucomineq_sd:
3581; X32: # BB#0:
3582; X32-NEXT: ucomisd %xmm1, %xmm0
3583; X32-NEXT: setp %al
3584; X32-NEXT: setne %cl
3585; X32-NEXT: orb %al, %cl
3586; X32-NEXT: movzbl %cl, %eax
3587; X32-NEXT: retl
3588;
3589; X64-LABEL: test_mm_ucomineq_sd:
3590; X64: # BB#0:
3591; X64-NEXT: ucomisd %xmm1, %xmm0
3592; X64-NEXT: setp %al
3593; X64-NEXT: setne %cl
3594; X64-NEXT: orb %al, %cl
3595; X64-NEXT: movzbl %cl, %eax
3596; X64-NEXT: retq
3597 %res = call i32 @llvm.x86.sse2.ucomineq.sd(<2 x double> %a0, <2 x double> %a1)
3598 ret i32 %res
3599}
3600declare i32 @llvm.x86.sse2.ucomineq.sd(<2 x double>, <2 x double>) nounwind readnone
3601
3602define <2 x double> @test_mm_undefined_pd() {
3603; X32-LABEL: test_mm_undefined_pd:
3604; X32: # BB#0:
3605; X32-NEXT: retl
3606;
3607; X64-LABEL: test_mm_undefined_pd:
3608; X64: # BB#0:
3609; X64-NEXT: retq
3610 ret <2 x double> undef
3611}
3612
3613define <2 x i64> @test_mm_undefined_si128() {
3614; X32-LABEL: test_mm_undefined_si128:
3615; X32: # BB#0:
3616; X32-NEXT: retl
3617;
3618; X64-LABEL: test_mm_undefined_si128:
3619; X64: # BB#0:
3620; X64-NEXT: retq
3621 ret <2 x i64> undef
3622}
3623
3624define <2 x i64> @test_mm_unpackhi_epi8(<2 x i64> %a0, <2 x i64> %a1) {
3625; X32-LABEL: test_mm_unpackhi_epi8:
3626; X32: # BB#0:
3627; X32-NEXT: punpckhbw {{.*#+}} xmm0 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15]
3628; X32-NEXT: retl
3629;
3630; X64-LABEL: test_mm_unpackhi_epi8:
3631; X64: # BB#0:
3632; X64-NEXT: punpckhbw {{.*#+}} xmm0 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15]
3633; X64-NEXT: retq
3634 %arg0 = bitcast <2 x i64> %a0 to <16 x i8>
3635 %arg1 = bitcast <2 x i64> %a1 to <16 x i8>
3636 %res = shufflevector <16 x i8> %arg0, <16 x i8> %arg1, <16 x i32> <i32 8, i32 24, i32 9, i32 25, i32 10, i32 26, i32 11, i32 27, i32 12, i32 28, i32 13, i32 29, i32 14, i32 30, i32 15, i32 31>
3637 %bc = bitcast <16 x i8> %res to <2 x i64>
3638 ret <2 x i64> %bc
3639}
3640
3641define <2 x i64> @test_mm_unpackhi_epi16(<2 x i64> %a0, <2 x i64> %a1) {
3642; X32-LABEL: test_mm_unpackhi_epi16:
3643; X32: # BB#0:
3644; X32-NEXT: punpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
3645; X32-NEXT: retl
3646;
3647; X64-LABEL: test_mm_unpackhi_epi16:
3648; X64: # BB#0:
3649; X64-NEXT: punpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
3650; X64-NEXT: retq
3651 %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
3652 %arg1 = bitcast <2 x i64> %a1 to <8 x i16>
3653 %res = shufflevector <8 x i16> %arg0, <8 x i16> %arg1, <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
3654 %bc = bitcast <8 x i16> %res to <2 x i64>
3655 ret <2 x i64> %bc
3656}
3657
3658define <2 x i64> @test_mm_unpackhi_epi32(<2 x i64> %a0, <2 x i64> %a1) {
3659; X32-LABEL: test_mm_unpackhi_epi32:
3660; X32: # BB#0:
3661; X32-NEXT: punpckhdq {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
3662; X32-NEXT: retl
3663;
3664; X64-LABEL: test_mm_unpackhi_epi32:
3665; X64: # BB#0:
3666; X64-NEXT: punpckhdq {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
3667; X64-NEXT: retq
3668 %arg0 = bitcast <2 x i64> %a0 to <4 x i32>
3669 %arg1 = bitcast <2 x i64> %a1 to <4 x i32>
3670 %res = shufflevector <4 x i32> %arg0,<4 x i32> %arg1, <4 x i32> <i32 2, i32 6, i32 3, i32 7>
3671 %bc = bitcast <4 x i32> %res to <2 x i64>
3672 ret <2 x i64> %bc
3673}
3674
3675define <2 x i64> @test_mm_unpackhi_epi64(<2 x i64> %a0, <2 x i64> %a1) {
3676; X32-LABEL: test_mm_unpackhi_epi64:
3677; X32: # BB#0:
3678; X32-NEXT: punpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1]
3679; X32-NEXT: retl
3680;
3681; X64-LABEL: test_mm_unpackhi_epi64:
3682; X64: # BB#0:
3683; X64-NEXT: punpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1]
3684; X64-NEXT: retq
3685 %res = shufflevector <2 x i64> %a0, <2 x i64> %a1, <2 x i32> <i32 1, i32 3>
3686 ret <2 x i64> %res
3687}
3688
3689define <2 x double> @test_mm_unpackhi_pd(<2 x double> %a0, <2 x double> %a1) {
3690; X32-LABEL: test_mm_unpackhi_pd:
3691; X32: # BB#0:
3692; X32-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1]
3693; X32-NEXT: retl
3694;
3695; X64-LABEL: test_mm_unpackhi_pd:
3696; X64: # BB#0:
3697; X64-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1]
3698; X64-NEXT: retq
3699 %res = shufflevector <2 x double> %a0, <2 x double> %a1, <2 x i32> <i32 1, i32 3>
3700 ret <2 x double> %res
3701}
3702
3703define <2 x i64> @test_mm_unpacklo_epi8(<2 x i64> %a0, <2 x i64> %a1) {
3704; X32-LABEL: test_mm_unpacklo_epi8:
3705; X32: # BB#0:
3706; X32-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
3707; X32-NEXT: retl
3708;
3709; X64-LABEL: test_mm_unpacklo_epi8:
3710; X64: # BB#0:
3711; X64-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
3712; X64-NEXT: retq
3713 %arg0 = bitcast <2 x i64> %a0 to <16 x i8>
3714 %arg1 = bitcast <2 x i64> %a1 to <16 x i8>
3715 %res = shufflevector <16 x i8> %arg0, <16 x i8> %arg1, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19, i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23>
3716 %bc = bitcast <16 x i8> %res to <2 x i64>
3717 ret <2 x i64> %bc
3718}
3719
3720define <2 x i64> @test_mm_unpacklo_epi16(<2 x i64> %a0, <2 x i64> %a1) {
3721; X32-LABEL: test_mm_unpacklo_epi16:
3722; X32: # BB#0:
3723; X32-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
3724; X32-NEXT: retl
3725;
3726; X64-LABEL: test_mm_unpacklo_epi16:
3727; X64: # BB#0:
3728; X64-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
3729; X64-NEXT: retq
3730 %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
3731 %arg1 = bitcast <2 x i64> %a1 to <8 x i16>
3732 %res = shufflevector <8 x i16> %arg0, <8 x i16> %arg1, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
3733 %bc = bitcast <8 x i16> %res to <2 x i64>
3734 ret <2 x i64> %bc
3735}
3736
3737define <2 x i64> @test_mm_unpacklo_epi32(<2 x i64> %a0, <2 x i64> %a1) {
3738; X32-LABEL: test_mm_unpacklo_epi32:
3739; X32: # BB#0:
3740; X32-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
3741; X32-NEXT: retl
3742;
3743; X64-LABEL: test_mm_unpacklo_epi32:
3744; X64: # BB#0:
3745; X64-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
3746; X64-NEXT: retq
3747 %arg0 = bitcast <2 x i64> %a0 to <4 x i32>
3748 %arg1 = bitcast <2 x i64> %a1 to <4 x i32>
3749 %res = shufflevector <4 x i32> %arg0,<4 x i32> %arg1, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
3750 %bc = bitcast <4 x i32> %res to <2 x i64>
3751 ret <2 x i64> %bc
3752}
3753
3754define <2 x i64> @test_mm_unpacklo_epi64(<2 x i64> %a0, <2 x i64> %a1) {
3755; X32-LABEL: test_mm_unpacklo_epi64:
3756; X32: # BB#0:
3757; X32-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
3758; X32-NEXT: retl
3759;
3760; X64-LABEL: test_mm_unpacklo_epi64:
3761; X64: # BB#0:
3762; X64-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
3763; X64-NEXT: retq
3764 %res = shufflevector <2 x i64> %a0, <2 x i64> %a1, <2 x i32> <i32 0, i32 2>
3765 ret <2 x i64> %res
3766}
3767
3768define <2 x double> @test_mm_unpacklo_pd(<2 x double> %a0, <2 x double> %a1) {
3769; X32-LABEL: test_mm_unpacklo_pd:
3770; X32: # BB#0:
3771; X32-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
3772; X32-NEXT: retl
3773;
3774; X64-LABEL: test_mm_unpacklo_pd:
3775; X64: # BB#0:
3776; X64-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
3777; X64-NEXT: retq
3778 %res = shufflevector <2 x double> %a0, <2 x double> %a1, <2 x i32> <i32 0, i32 2>
3779 ret <2 x double> %res
3780}
3781
3782define <2 x double> @test_mm_xor_pd(<2 x double> %a0, <2 x double> %a1) nounwind {
3783; X32-LABEL: test_mm_xor_pd:
3784; X32: # BB#0:
3785; X32-NEXT: xorps %xmm1, %xmm0
3786; X32-NEXT: retl
3787;
3788; X64-LABEL: test_mm_xor_pd:
3789; X64: # BB#0:
3790; X64-NEXT: xorps %xmm1, %xmm0
3791; X64-NEXT: retq
3792 %arg0 = bitcast <2 x double> %a0 to <4 x i32>
3793 %arg1 = bitcast <2 x double> %a1 to <4 x i32>
3794 %res = xor <4 x i32> %arg0, %arg1
3795 %bc = bitcast <4 x i32> %res to <2 x double>
3796 ret <2 x double> %bc
3797}
3798
3799define <2 x i64> @test_mm_xor_si128(<2 x i64> %a0, <2 x i64> %a1) nounwind {
3800; X32-LABEL: test_mm_xor_si128:
3801; X32: # BB#0:
3802; X32-NEXT: xorps %xmm1, %xmm0
3803; X32-NEXT: retl
3804;
3805; X64-LABEL: test_mm_xor_si128:
3806; X64: # BB#0:
3807; X64-NEXT: xorps %xmm1, %xmm0
3808; X64-NEXT: retq
3809 %res = xor <2 x i64> %a0, %a1
3810 ret <2 x i64> %res
3811}
3812
3813!0 = !{i32 1}
3814